diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..dea1f85a4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,18 @@ +language: perl +perl: + - "5.30" + - "5.28" + - "5.26" + +before_install: + - git clone git://github.com/travis-perl/helpers ~/travis-perl-helpers + - source ~/travis-perl-helpers/init --auto + - sudo apt-get -y install libdb-dev + - sudo apt-get -y install r-base + - cpanm --notest Devel::Cover::Report::Coveralls + +script: + - perl Makefile.PL && make test TEST_VERBOSE=1 + +after_success: + - cover -test -report coveralls diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP new file mode 100644 index 000000000..0dd44c2e2 --- /dev/null +++ b/MANIFEST.SKIP @@ -0,0 +1,49 @@ +# Avoid version control files +\B.git\b +\B.gitignore$ +\bRCS\b +\bCVS\b +,v$ +\B.svn\b +\B.cvsignore$ + +# Avoid Module::Build generated and utility files +\bBuild$ +\bBuild.bat$ +\b_build + +# Avoid Makemaker generated and utility files +\bMakefile$ +\bblib +\bMakeMaker - \d +\bpm_to_blib$ +\bblibdirs$ +^MANIFEST\.SKIP$ + +# Avoid Devel::Cover generated files +\bcover_db + +# Avoid temp and backup files +~$ +\.tmp$ +\.old$ +\.bak$ +\#$ +\.# +\.rej$ +\.index$ + +# Avoid OS-specific files/dirs +# Mac OSX metadata +\B.DS_Store +# Mac OSX SMB mount metadata files +\B\._ + +# Avoid archives of this distribution +\bGAAS-v[\d\.\_]+ +^MYMETA.yml +^MYMETA.json + +# other +\B.travis.yml$ +^tmp diff --git a/Makefile b/Makefile deleted file mode 100644 index f1c29e7c3..000000000 --- a/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -check: - @printf "Checking for tool dependencies\n" - @printf "Ruby is installed ... " - @if command -v ruby >/dev/null 2>&1 ; then printf "\033[0;32myes\033[0m : "; ruby -v; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Groovy is installed ... " - @if command -v groovy >/dev/null 2>&1 ; then printf "\033[0;32myes\033[0m : "; groovy -v; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl is installed ... " - @if command -v perl >/dev/null 2>&1 ; then printf "\033[0;32myes\033[0m : "; perl -v; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl lib Clone is installed ... " - @if perl -MClone -e 1 2>/dev/null; then printf "\033[0;32myes\033[0m \n"; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl lib Moose is installed ... " - @if perl -MMoose -e 1 2>/dev/null; then printf "\033[0;32myes\033[0m \n"; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl lib Graph::Directed is installed ..." - @if perl -MGraph::Directed -e 1 2>/dev/null; then printf "\033[0;32myes\033[0m \n"; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl lib LWP::UserAgent is installed ..." - @if perl -MLWP::UserAgent -e 1 2>/dev/null; then printf "\033[0;32myes\033[0m \n"; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl lib Statistics::R is installed ... " - @if perl -MStatistics::R -e 1 2>/dev/null; then printf "\033[0;32myes\033[0m \n"; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl lib JSON is installed ... " - @if perl -MJSON -e 1 2>/dev/null; then printf "\033[0;32myes\033[0m \n"; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Perl lib Sort::Naturally is installed ... " - @if perl -MSort::Naturally -e 1 2>/dev/null; then printf "\033[0;32myes\033[0m \n"; else printf "\033[0;31mno\033[0m\n"; fi - @printf "Rscript is installed ... " - @if command -v Rscript >/dev/null 2>&1 ; then printf "\033[0;32myes\033[0m : "; Rscript --version; else printf "\033[0;31mno\033[0m\n"; fi - -install: update_profiles - -update_profiles: - sed -i.orig -e "s#VIRTUAL_ENV=\"\$$HOME/GAAS\"#VIRTUAL_ENV=\"${PWD}\"#" profiles/activate_nbis_env - sed -i.orig -e "s#VIRTUAL_ENV=\"\$$HOME/GAAS\"#VIRTUAL_ENV=\"${PWD}\"#" profiles/activate_rackham_env - sed -i.orig -e "s#VIRTUAL_ENV=\"\$$HOME/GAAS\"#VIRTUAL_ENV=\"${PWD}\"#" profiles/activate_env -clean: - mv profiles/activate_nbis_env{.orig,} - mv profiles/activate_rackham_env{.orig,} - mv profiles/activate_local_env{.orig,} - -get_groovy: - curl -s "https://get.sdkman.io" | bash && source "${HOME}/.sdkman/bin/sdkman-init.sh" && sdk install groovy diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 000000000..dcef05b01 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,121 @@ +use 5.006; +use strict; +use warnings FATAL => 'all'; +use ExtUtils::MakeMaker; +use File::ShareDir::Install; + +install_share dist => 'share'; + +# ------------------- CREATE LIST OF EXE FILE ---------------------------------- + +# define function to retrieve list of exe files +sub get_list_exe{ + my @list_exe; + + opendir (DIR, "bin") or die $!; + while (my $file = readdir(DIR)) { + # Use a regular expression to ignore files beginning with a period + next if ($file =~ m/^\./); + + #add exe file + push @list_exe, "bin/".$file; + } + closedir(DIR); + + return \@list_exe; +} +my $ref_list_exe = get_list_exe(); + + +# ------------------- CREATE PREREQ_PM Hash ----------------------------------- +# As it is used twice, I factorze it here, to avoid de-synchronization + +my %prereq_hash = ( "Bio::DB::Fasta" => 0, + "Bio::DB::Taxonomy" => 0, + "Bio::DB::EUtilities" => 0, + "Bio::FeatureIO" => 0, + "Bio::Seq" => 0, + "Bio::SeqIO" => 0, + "Bio::Tools::CodonTable" => 0, + "Bio::Tools::GFF" => 0, + "Carp" => 0, + "Clone" => 0, + "Cwd" => 0, + "Exporter" => 0, + "File::Basename" => 0, + "File::Copy" => 0, + "File::Share" => 0, + "Getopt::Long" => 0, + "IO::File" => 0, + "IPC::Open2" => 0, + "IPC::Cmd" => 0, + "JSON" => 0, + "LWP::UserAgent" => 0, + "List::MoreUtils" => 0, + "POSIX" => 0, + "Pod::Usage" => 0, + "Scalar::Util" => 0, + "Sort::Naturally" => 0, + "Statistics::R" => 0, + "Time::Piece" => 0, + "Time::Seconds" => 0, + "Try::Tiny" => 0, + "URI::Escape" => 0, + "strict" => 0, + "warnings" => 0 + ); + +# ------------------- CREATE the WriteMakefile hash ---------------------------- +# will be used to create the WriteMakefile object + +my %WriteMakefileArgs = ( + NAME => 'GAAS', + AUTHOR => 'Jacques Dainat ', + VERSION_FROM => 'lib/GAAS/GAAS.pm', + ABSTRACT => 'Genome Assembly and Annotation Service toolkit at NBISweden', + LICENSE => 'gpl_3', + PREREQ_PM => \%prereq_hash, # give a ref to the hash + MIN_PERL_VERSION => '5.006', + BUILD_REQUIRES => {"ExtUtils::MakeMaker" => 6.31, + "File::ShareDir::Install" => 0 + }, + TEST_REQUIRES => {"Test::More" => 0 + }, + EXE_FILES => $ref_list_exe, + META_MERGE => { + 'meta-spec' => { version => 2 }, + resources => { + repository => { + type => 'git', + url => 'https://github.com/NBISweden/GAAS.git', + web => 'https://github.com/NBISweden/GAAS', + }, + bugtracker => {web => 'https://github.com/NBISweden/GAAS/issues'}, + homepage => 'https://nbis.se', + }, + }, + dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, + clean => { FILES => 'GAAS-*' }, +); + +my %FallbackPrereqs = %prereq_hash; + + +unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) { + delete $WriteMakefileArgs{TEST_REQUIRES}; + delete $WriteMakefileArgs{BUILD_REQUIRES}; + $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs; +} + +delete $WriteMakefileArgs{CONFIGURE_REQUIRES} + unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; + +if ( $^O eq 'MSWin32' ) { + $WriteMakefileArgs{PREREQ_PM}{'Win32'} = $FallbackPrereqs{'Win32'} = '0'; +} + +WriteMakefile(%WriteMakefileArgs); + +#Mandatory otherwise the data files from the share folder will not be copied. +package MY; +use File::ShareDir::Install 'postamble'; diff --git a/README.md b/README.md index 1c3474d4f..d9388e11c 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,150 @@ - -[](https://nbis.se) +[![Build Status](https://travis-ci.org/NBISweden/AGAT.svg?branch=master)](https://travis-ci.org/NBISweden/AGAT) +GAAS +=========================================

Genome Assembly Annotation Service (GAAS)

-Contains development done in the GAA (Genome Assembly Annotation) Service. +Suite of tools related to Genome Assembly Annotation Service tasks. + +[](https://nbis.se) + +--------------------------- + +## Table of Contents + + * [What can GAAS do for you?](#what-can-gaas-do-for-you) + * [Installation](#installation) + * [Using bioconda](using-bioconda) + * [Install](#install) + * [Update](#update) + * [Uninstall](#uninstall) + * [Old school](#old-school) + * [Prerequisites](#prerequisites) + * [Install](#install-1) + * [Update](#update-1) + * [Uninstall](#uninstall-1) + * [Change to a specific version](#change-to-a-specific-version) + * [Usage](#usage) + * [Repository structure](#repository-structure) --------------------------- +## What can GAAS do for you? + +The repository contains mainly tools and knowledge related to bioinformatics and annotation the most often. To access and install the tools please follow the installation procedures below. For the knowledge you are invited to visit the [knowledge](annotation/knowledge) part of the repo or if you are looking specifically for genome assembly knowledge [The Genome Assembly Workshop Knowledge Base](https://github.com/NBISweden/workshop-genome_assembly/wiki). + +## Installation + +### Using conda + +#### Install + + ``` + conda install -c bioconda gaas + ``` + +#### Update + + ``` + conda update gaas + ``` + +#### Uninstall + ``` + conda uninstall gaas + ``` + +### Old school + +#### Prerequisites + * R + * Perl + Perl >= 5.8, and a list of perl modules that can be installed using cpan, cpanm or conda: + + * Install perl modules with cpanm + ``` + cpanm install bioperl + cpanm install Clone + cpanm install Graph::Directed + cpanm install LWP::UserAgent + cpanm install Statistics::R + cpanm install Sort::Naturally + cpanm install File::Share + cpanm install Moose + cpanm install File::ShareDir::Install + cpanm install Bio::DB::EUtilities + ``` + * Install perl modules with conda + + ``` + conda env create -f conda_environment_GAAS.yml + conda activate gaas + ``` + +#### Install + + ``` + git clone https://github.com/NBISweden/GAAS.git # Clone GAAS + cd GAAS # move into GAAS folder + perl Makefile.PL # Check all the dependencies* + make # Compile + make test # Test + make install # Install + ``` + +*If dependencies are missing you can install them using cpan/cpanm or use conda and load the environment conda_environment_GAAS.yml + +**Remark**: On MS Windows, instead of make you'd probably have to use dmake or nmake depending the toolchain you have. + +#### Update +From the folder where the repository is located. + + ``` + git pull # Update to last GAAS + perl Makefile.PL # Check all the dependencies1 + make # Compile + make test # Test + make install # Install + ``` + +#### Change to a specific version +From the folder where the repository is located. + + ``` + git pull # Update the code + git checkout v0.1.1 # use version v0.1 (See releases tab for a list of available versions) + perl Makefile.PL # Check all the dependencies1 + make # Compile + make test # Test + make install # Install + ``` + +#### Uninstall + + ``` + perl uninstall_GAAS + ``` + +## Usage + + ``` + script_name.pl -h + ``` + +--------------------------- + +## Repository structure + ## [__annotation__](annotation) -Annotation directory contains development related to annotation side of the service. +Annotation directory contains evertything related to annotation side of the service. #### Shorcuts: - - [Cheat Sheets](annotation/CheatSheet) + - [knowledge](annotation/knowledge) - [Genome annotation workshop](https://nbisweden.github.io/workshop-genome_annotation/) - - [Tools](annotation/Tools/bin/) + - [Tools](annotation/tools) => All gff related work have been transplanted into [AGAT](https://github.com/NBISweden/AGAT) (11/2019) - - [Pipelines](https://github.com/NBISweden/pipelines/tree/master/bpipe) + - [Pipelines](https://github.com/NBISweden/pipelines-nextflow) ## [__assembly__](assembly) Assembly directory contains development related to assembly side of the service. @@ -25,107 +153,3 @@ Assembly directory contains development related to assembly side of the service. - [Genome assembly workshop](https://nbisweden.github.io/workshop-genome_assembly/) - [The Genome Assembly Workshop Knowledge Base](https://github.com/NBISweden/workshop-genome_assembly/wiki) ---------------------------- - -## Installation - - * Clone and install GAAS - -To use the tools in this repository, clone the directory and run `make install` to update some paths: -``` -git clone https://github.com/NBISweden/GAAS.git -cd GAAS -``` - -#### A) For the use through ***env*** - - * A.1) Updates paths in the environment profiles to point to the correct GAAS repository location - ``` - make install - ``` - - * A.2) Dependencies - * You might check that all dependencies are filled up. Depending the scripts you want to use, all dependencies are not required. - ``` - make check - ``` - * Install the missing perl dependencies - - * With cpanm - ``` - cpanm bioperl - cpanm Clone - cpanm Moose - cpanm Graph::Directed - cpanm LWP::UserAgent - cpanm Statistics::R - cpanm JSON - cpanm Sort::Naturally - cpanm Bio::DB::EUtilities - ``` - - * With conda - ``` - conda env create -f gaas_environment.yml - ``` - - * A.3) Load the correct profiles (add NBIS libs and tools to the PATH) - Three profiles are available to setup the necessary environment variables to use the scripts: - - * By default: - ``` - source profiles/activate_env - ``` - - * If you are on Rackham: - ``` - source profiles/activate_rackham_env - ``` - - * If you are on NBIS's servers: - ``` - source profiles/activate_nbis_env - ``` - * A.4) To get out of the nbis environment and restore your previous environment type - - ``` - deactivate - ``` - -#### B) For a permanent use - - * B.1) Dependencies - - * You might check that all dependencies are filled up. Depending the scripts you want to use, all dependencies are not required. - ``` - make check - ``` - - * Install the missing perl dependencies - - * With cpanm - ``` - cpanm bioperl - cpanm Clone - cpanm Moose - cpanm Graph::Directed - cpanm LWP::UserAgent - cpanm Statistics::R - cpanm JSON - cpanm Sort::Naturally - cpanm Bio::DB::EUtilities - ``` - - * With conda - ``` - conda env create -f conda_env.yml - ``` - - - * B.2) Add the path to the NBIS perl library as well as the bin folder containing all tools. You can add in you *~/.bashrc* or *~/.profile* file. - ``` - export PERL5LIB=$PERL5LIB:/pathTo/GAAS/annotation - export PATH=${PATH}:/pathTo/GAAS/annotation/Tools/bin - ``` - - diff --git a/annotation/LICENSE b/annotation/LICENSE deleted file mode 100644 index 9cecc1d46..000000000 --- a/annotation/LICENSE +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - {one line to give the program's name and a brief idea of what it does.} - Copyright (C) {year} {name of author} - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - {project} Copyright (C) {year} {fullname} - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/annotation/NBIS/GFF3/Feature_levels/features_level1.json b/annotation/NBIS/GFF3/Feature_levels/features_level1.json deleted file mode 100644 index f841ade1f..000000000 --- a/annotation/NBIS/GFF3/Feature_levels/features_level1.json +++ /dev/null @@ -1,21 +0,0 @@ -{ "_comment": "level1 are features without parent", - "EST_match":"1", - "cDNA_match":"2", - "gene":"3", - "lincrna_gene":"4", - "match":"5", - "mirna_gene":"6", - "ncrna_gene":"7", - "nucleotide_motif":"8", - "nucleotide_to_protein_match":"9", - "pirna_gene":"10", - "polypeptide":"11", - "protein_match":"12", - "pseudogene":"13", - "rrna_gene":"14", - "sirna_gene":"15", - "snorna_gene":"16", - "snrna_gene":"17", - "sts":"18", - "translated_nucleotide_match":"19" -} diff --git a/annotation/NBIS/GFF3/Feature_levels/features_level2.json b/annotation/NBIS/GFF3/Feature_levels/features_level2.json deleted file mode 100644 index e6c84ece1..000000000 --- a/annotation/NBIS/GFF3/Feature_levels/features_level2.json +++ /dev/null @@ -1,33 +0,0 @@ -{ "_comment": "level2 are features with parent and potentially a child (no child as example for match)", - "aberrant_processed_transcript":"gene", - "lcrna":"lincrna_gene", - "lincrna":"lincrna_gene", - "lnc_rna":"gene", - "match_part":"match", - "mirna":"mirna_gene", - "misc_rna":"gene", - "mrna":"gene", - "nc_primary_transcript":"gene", - "ncrna":"ncrna_gene", - "nmd_transcript_variant":"gene", - "piRNA":"pirna_gene", - "pre_mirna":"gene", - "processed_pseudogene":"pseudogene", - "processed_transcript":"gene", - "pseudogenic_transcript":"pseudogene", - "rna":"gene", - "rnase_mrp_rna":"gene", - "rnase_p_rna":"gene", - "rrna":"rrna_gene", - "similarity":"match", - "sirna_gene":"sirna_gene", - "snorna":"snorna_gene", - "snrna":"snrna_gene", - "srp_rna":"gene", - "tmrna":"gene", - "transcript":"gene", - "trna":"gene", - "trna_pseudogene":"pseudogene", - "unitary_pseudogene":"pseudogene", - "vaultrna":"gene" -} diff --git a/annotation/NBIS/GFF3/Feature_levels/features_level3.json b/annotation/NBIS/GFF3/Feature_levels/features_level3.json deleted file mode 100644 index 06739cc32..000000000 --- a/annotation/NBIS/GFF3/Feature_levels/features_level3.json +++ /dev/null @@ -1,16 +0,0 @@ -{ "_comment": "level3 features have parents, but no child. ( cds => exon mean that cds is included into an exon)", - "cds":"exon", - "exon":"1", - "five_prime_utr":"exon", - "intron":"1", - "non_canonical_five_prime_splice_site":"1", - "non_canonical_three_prime_splice_site":"1", - "selenocysteine":"1", - "sig_peptide":"exon", - "start_codon":"exon", - "stop_codon":"exon", - "stop_codon_read_through":"exon", - "three_prime_utr":"exon", - "tss":"exon", - "tts":"exon" -} diff --git a/annotation/NBIS/GFF3/Feature_levels/features_spread.json b/annotation/NBIS/GFF3/Feature_levels/features_spread.json deleted file mode 100644 index a07ee16dd..000000000 --- a/annotation/NBIS/GFF3/Feature_levels/features_spread.json +++ /dev/null @@ -1,6 +0,0 @@ -{ "_comment": "Here are described features that can be split over different locations", - "cds":"1", - "three_prime_utr":"1", - "five_prime_utr":"1", - "utr":"1" -} \ No newline at end of file diff --git a/annotation/NBIS/GFF3/Omniscient.pm b/annotation/NBIS/GFF3/Omniscient.pm deleted file mode 100644 index ddb71c95c..000000000 --- a/annotation/NBIS/GFF3/Omniscient.pm +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/perl -w - -package NBIS::GFF3::Omniscient; - -use strict; -use warnings; - -use NBIS::GFF3::Omniscient::OmniscientI; -use NBIS::GFF3::Omniscient::OmniscientO; -use NBIS::GFF3::Omniscient::OmniscientTools; -use NBIS::GFF3::Omniscient::Statistics; - -1; diff --git a/annotation/NBIS/GFF3/Omniscient/OmniscientI.pm b/annotation/NBIS/GFF3/Omniscient/OmniscientI.pm deleted file mode 100644 index aac9ddb0c..000000000 --- a/annotation/NBIS/GFF3/Omniscient/OmniscientI.pm +++ /dev/null @@ -1,3208 +0,0 @@ -#!/usr/bin/perl -w - -package NBIS::GFF3::Omniscient::OmniscientI; - -use strict; -use warnings; -use Try::Tiny; -use JSON; -use Cwd qw(cwd); -use Bio::Tools::GFF; -use File::Basename; -use File::Copy; -use Sort::Naturally; -use LWP::UserAgent; -use Bio::OntologyIO::obo; -use Clone 'clone'; -use Exporter; -use NBIS::GFF3::Omniscient::OmniscientTools; - -our @ISA = qw(Exporter); -our @EXPORT = qw(get_level select_gff_format check_mrna_positions - modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop - slurp_gff3_file_JD _check_all_level1_positions _check_all_level2_positions - load_levels); -sub import { - NBIS::GFF3::Omniscient::OmniscientI->export_to_level(1, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient::OmniscientI; (normal case) - NBIS::GFF3::Omniscient::OmniscientI->export_to_level(2, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient; -} - -=head1 SYNOPSIS - - -=head1 DESCRIPTION - - A library to convert handle any kind of gff file and save it in memory as GFF3 "full" format. - Full format means, we expand exon with several parent, we add ID everywhere (even if level3 ID is not mandatory), and Parent everywhere. - -=head1 VERSION - - Perl librairy last edited May-2019. - -=head1 CONTACT - jacques.dainat@nbis.se (Jacques Dainat) - -=cut - -#===== TO DO ===== -# When creating a parent check its type from the value in the constant hash - -########################## -# DEFINE CONSTANT # -use constant PREFIXL2 => "nbis_noL2id"; - -##################################### -# DEFINE file scope variable # -my $createL3forL2orphan = 1; -my $fh_error = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -my $LEVEL1; # level1 are features without parent -my $LEVEL2; # level2 are features with parent and potentially a child (no child as example for match) -my $LEVEL3; # level3 features have parents, but no child. ( cds => "exon" mean that cds is included into an exon) -my $SPREADFEATURE; # feature that can be split over different locations - -# Comon_tag is used in old gff format and in gtf (with gene_id) to group features together. Priority to comonTag compare to sequential read -# COMONTAG is accessible from the whole file. if a tag has been specified by a user, it is added to this list when slurp_gff3_file_JD is called -my @COMONTAG = ('locus_tag','gene_id'); - -# ====== PURPOSE =======: -# Save in omniscient hash (sorted in a specific way (3 levels)) a whole gff3 file -# Parser phylosophy: Parse by Parent/child ELSE -# Parse by comon_tag ELSE -# Parse by sequential (mean group features in a bucket, and the bucket change at each level2 feature, and bucket are join in a comon tag at each new L1 feature) -# So if only level3 feature (i.e rast or some prokka files, sequential will not work. A comon_tag must be provided) -# Priority Parent > locus_tag > sequential -# ====== INPUT =======: -# $file => string (file) / list / hash -# $locus_tag => tag to consider for gathering features (in top of the default one) -# $gff_version => Int (if is used, force the parser to use this gff parser instead of guessing) -# $verbose =>define the deepth of verbosity -# $no_check is to deactivate sanity check. We can tune the deactivation of check steps using no_check_skip. -# $no_check_skip [list] is to avoid the deactivation of all check with the $no_check parameter. Check steps listed here will not be deactivated. -sub slurp_gff3_file_JD { - - my $start_run = time(); - my $previous_time = undef; - -# +-----------------------------------------+ -# | HANDLE ARGUMENTS | -# +-----------------------------------------+ - my ($args) = @_ ; - - # Check we receive a hash as ref - if(ref($args) ne 'HASH'){ print "Hash Arguments expected for slurp_gff3_file_JD. Please check the call.\n";exit; } - - # Declare all variables and fill them - my ($file, $gff_version, $locus_tag, $verbose, $no_check, $merge_loci, $no_check_skip, $expose_feature_levels); - #first define verbosity - if( ! defined($args->{verbose}) ) {$verbose = 0;} else{ $verbose = $args->{verbose}; } - print "=> parse option and metadata:\n" if ($verbose > 0); - #Secondly check if expose_feature_levels option - if( ! defined($args->{expose_feature_levels})) {$expose_feature_levels = undef;} - else{ $expose_feature_levels = $args->{expose_feature_levels}; - print " expose json feature level files\n" if ($verbose > 0);} # list of check to skip - load_levels($verbose, $expose_feature_levels); # HANDLE feature level - if( defined($args->{input})) {$file = $args->{input};} else{ print "Input data --input is mandatory when using slurp_gff3_file_JD!"; exit;} - if( ! defined($args->{gff_version})) {$gff_version = undef;} else{ $gff_version = $args->{gff_version}; } # force using gff parser version - if( ! defined($args->{locus_tag})) {$locus_tag = undef;} else{ push @COMONTAG, $args->{locus_tag}; } #add a new comon tag to the list if provided.} - if( ! defined($args->{no_check})) {$no_check = undef;} else{ $no_check = $args->{no_check}; print " no_check option activated\n" if ($verbose > 0); } # skip checks - if( ! defined($args->{no_check_skip})) {$no_check_skip = [];} else{ $no_check_skip = $args->{no_check_skip}; } # list of check to skip - if( ! defined($args->{merge_loci})) { $merge_loci = undef; print " merge_locus option deactivated\n" if ($verbose > 0);} - else{ $merge_loci = $args->{merge_loci}; print " merge_locus option activated\n" if ($verbose > 0);} # activat merge locus option - -# +-----------------------------------------+ -# | HANDLE GFF HEADER | -# +-----------------------------------------+ - my $gff3headerInfo = _check_header($file); - -# +-----------------------------------------+ -# | HANDLE SOFA (feature-ontology) | -# +-----------------------------------------+ - my $ontology = {}; - my $ontology_obj = _handle_ontology($gff3headerInfo, $verbose); - if($ontology_obj){ - $ontology = create_term_and_id_hash($ontology_obj); - } - if(! keys %{$ontology} ){ #hash is empty - print " No data retrieved among the feature-ontology.\n" if ($verbose > 0); - } - -# +-----------------------------------------+ -# | HANDLE WARNING | -# +-----------------------------------------+ - my %WARNS; - my %globalWARNS; - my $nbWarnLimit=10; # Handle to not print to much warning - local $SIG{__WARN__} = sub { - my $message = shift; - my @thematic=split /@/,$message ; - - if($thematic[0] eq "GLOBAL"){ #extract global warning - push @{$globalWARNS{$thematic[1]}}, $thematic[2]; - } - else{ - $WARNS{$thematic[0]}++; - if($verbose > 0){ - if ($WARNS{$thematic[0]} <= $nbWarnLimit){ - print $message; - } - if($WARNS{$thematic[0]} == $nbWarnLimit){ - print "$thematic[0] ************** Too much WARNING message we skip the next **************\n"; - } - } - } - }; - -# +-------------------------------------------------------------------------+ -# | HANDLE FEATUTRES PARSING ACCORDING TO TYPE OF INPUTS | -# +-------------------------------------------------------------------------+ - my %mRNAGeneLink; #Hast that keep track about link between l2 and l1 - my %omniscient; #Hast where all the feature will be saved - my %duplicate;# Hash to store duplicated feature info - my %miscCount;# Hash to store any counter. Will be use to create a new uniq ID - my %uniqID;# Hash to follow up with an uniq identifier every feature - my %uniqIDtoType; # Hash to follow up with an uniq identifier every feature type - my %locusTAG; # Hash to follow up the locus tag met - my %infoSequential;# Hash to store sequential features - my $locusTAGvalue=undef; - my $last_l1_f=undef; - my $last_l2_f=undef; - my $last_l3_f=undef; - my $last_f=undef;# last feature handled - my $lastL1_new =undef; # Bolean to check if last l1 feature is a newly created one. Important to deal with strict sequential - - if($verbose > 0) { print "=> start parsing:\n";} - - # ============================> ARRAY CASE <============================ - if(ref($file) eq 'ARRAY'){ - foreach my $feature (@{$file}) { - ($locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new) = - manage_one_feature($ontology, $feature, \%omniscient, \%mRNAGeneLink, \%duplicate, \%miscCount, \%uniqID, \%uniqIDtoType, \%locusTAG, \%infoSequential, $locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new, $verbose); - } - } - # ============================> HASH CASE <============================ - elsif(ref($file) eq 'HASH'){ - - foreach my $level (keys %{$file}){ - if ( ref($file->{$level}) eq 'HASH'){ #level1,level2,#level3 - foreach my $tag (keys %{$file->{$level}}){ - foreach my $id (keys %{$file->{$level}{$tag}}){ - if ( ref($file->{$level}{$tag}{$id}) eq 'ARRAY'){ #level2,#level3 - foreach my $feature ( @{$file->{$level}{$tag}{$id} }){ - ($locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new) = - manage_one_feature($ontology, $feature, \%omniscient, \%mRNAGeneLink, \%duplicate, \%miscCount, \%uniqID, \%uniqIDtoType, \%locusTAG, \%infoSequential, $locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new, $verbose); - } - } - else{ #level1 - my $feature = $file->{$level}{$tag}{$id}; - ($locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new) = - manage_one_feature($ontology, $feature, \%omniscient, \%mRNAGeneLink, \%duplicate, \%miscCount, \%uniqID, \%uniqIDtoType, \%locusTAG, \%infoSequential, $locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new, $verbose); - } - } - } - } - else{ #extra list of feature - foreach my $feature ( @{$file->{$level}} ) { - ($locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new) = - manage_one_feature($ontology, $feature, \%omniscient, \%mRNAGeneLink, \%duplicate, \%miscCount, \%uniqID, \%uniqIDtoType, \%locusTAG, \%infoSequential, $locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new, $verbose); - } - } - } - } - # ============================> FILE CASE <============================ - else{ - - #GFF format used for parser - my $format; - if($gff_version){$format = $gff_version;} - else{ $format = select_gff_format($file);} - - print " GFF version parser used: $format\n" if ($verbose > 0) ; - my $gffio = Bio::Tools::GFF->new(-file => $file, -gff_version => $format); - - #read every lines - while( my $feature = $gffio->next_feature()) { - if($format eq "1"){_gff1_corrector($feature, $verbose);} # case where gff1 has been used to parse.... we have to do some attribute manipulations - ($locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new) = - manage_one_feature($ontology, $feature, \%omniscient, \%mRNAGeneLink, \%duplicate, \%miscCount, \%uniqID, \%uniqIDtoType, \%locusTAG, \%infoSequential, $locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new, $verbose); - } - - #close the file - $gffio->close(); - } - - #------- Inform user about warnings encountered during parsing --------------- - foreach my $thematic (keys %WARNS){ - my $nbW = $WARNS{$thematic}; - if($nbW > $nbWarnLimit){ - print "$nbW warning messages: $thematic\n"; - } - } - _handle_globalWARNS(\%globalWARNS, $ontology); - delete $globalWARNS{$_} for (keys %globalWARNS); # re-initialize the hash - delete $WARNS{$_} for (keys %WARNS); # re-initialize the hash - - # Parsing time - if($verbose > 0) { print " done in ", time() - $start_run," seconds\n"; $previous_time = time(); } - - #report detected duplicates - print "=> report duplicates:\n" if ($verbose > 0) ; - _check_duplicates(\%duplicate, \%omniscient, $verbose); - if( $verbose > 0 ) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time(); } - -# +-----------------------------------------+ -# | CHECK OMNISCIENT | -# +-----------------------------------------+ - if(! $no_check ){ - _printSurrounded("- Start extra check -",50,"*","\n") if ($verbose > 0) ; - } - - if(! $no_check or grep( /^_check_sequential/, $no_check_skip ) ) { - #Check sequential if we can fix cases. Hash to be done first, else is risky that we remove orphan L1 feature ... that are not yet linked to a sequential bucket - _printSurrounded("Check1: _check_sequential",30,"*") if ($verbose > 0) ; - if( keys %infoSequential ){ #hash is not empty - _check_sequential(\%infoSequential, \%omniscient, \%miscCount, \%uniqID, \%uniqIDtoType, \%locusTAG, \%mRNAGeneLink, $verbose); - undef %infoSequential; - } - else{ print " Nothing to check as sequential !\n" if($verbose > 0) } - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n"; $previous_time = time();} - } - - if(! $no_check or grep( /^_check_l2_linked_to_l3/, $no_check_skip ) ) { - #Check relationship between l3 and l2 - _printSurrounded("Check2: _check_l2_linked_to_l3",30,"*") if($verbose > 0 ) ; - _check_l2_linked_to_l3(\%omniscient, \%mRNAGeneLink, \%miscCount, \%uniqID, \%uniqIDtoType, $verbose); # When creating L2 missing we create as well L1 if missing too - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time();} - } - - if(! $no_check or grep( /^_check_l1_linked_to_l2/, $no_check_skip ) ) { - #Check relationship between mRNA and gene. / gene position are checked! If No Level1 we create it ! - _printSurrounded("Check3: _check_l1_linked_to_l2",30,"*") if ($verbose > 0 ) ; - _check_l1_linked_to_l2(\%omniscient, $verbose); - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time();} - } - - if(! $no_check or grep( /^_remove_orphan_l1$/, $no_check_skip ) ) { - #check level1 has subfeature else we remove it - _printSurrounded("Check4: _remove_orphan_l1",30,"*") if ($verbose > 0 ) ; - _remove_orphan_l1(\%omniscient, \%miscCount, \%uniqID, \%uniqIDtoType, \%mRNAGeneLink, $verbose); #or fix if level2 is missing (refseq case) - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time();} - } - - if(! $no_check or grep( /^_check_exons/, $no_check_skip ) ) { - #Check relationship L3 feature, exons have to be defined... / mRNA position are checked! - _printSurrounded("Check5: _check_exons",30,"*") if ($verbose > 0 ) ; - _check_exons(\%omniscient, \%mRNAGeneLink, \%miscCount, \%uniqID, \%uniqIDtoType, $verbose); - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n"; $previous_time = time();} - } - - if(! $no_check or grep( /^_check_utrs/, $no_check_skip ) ) { - #Check relationship L3 feature, exons have to be defined... / mRNA position are checked! - _printSurrounded("Check6: _check_utrs",30,"*") if ($verbose > 0 ) ; - _check_utrs(\%omniscient, \%mRNAGeneLink, \%miscCount, \%uniqID, \%uniqIDtoType, $verbose); - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n"; $previous_time = time();} - } - - if(! $no_check or grep( /^_check_all_level2_positions/, $no_check_skip ) ) { - # Check rna positions compared to its l2 features - _printSurrounded("Check7: _check_all_level2_positions",30,"*") if ($verbose > 0 ) ; - _check_all_level2_positions(\%omniscient, $verbose); - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time();} - } - - if(! $no_check or grep( /^_check_all_level1_positions/, $no_check_skip ) ) { - # Check gene positions compared to its l2 features - _printSurrounded("Check8: _check_all_level1_positions",30,"*") if ($verbose > 0 ) ; - _check_all_level1_positions(\%omniscient, $verbose); - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time();} - } - - #check loci names (when overlap should be the same if type is the same) - if ( $merge_loci ){ - _printSurrounded("merge overlaping features into \nsame locus",30,"-") if ($verbose > 0 ) ; # ancien check9. Better probably to keep it before check 10 anyway - _merge_overlap_features(\%omniscient, \%mRNAGeneLink, $verbose); - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time();} - } - - if(! $no_check or grep( /^_check_identical_isoforms/, $no_check_skip ) ) { - #check identical isoforms - _printSurrounded("Check10: _check_identical_isoforms",30,"*") if ($verbose > 0 ) ; - _check_identical_isoforms(\%omniscient, \%mRNAGeneLink, $verbose); - if($verbose > 0) {print " done in ",time() - $previous_time," seconds\n\n" ; $previous_time = time();} - } - - #------- Inform user about warnings encountered during checking --------------- - foreach my $thematic (keys %WARNS){ - my $nbW = $WARNS{$thematic}; - if($nbW > $nbWarnLimit){ - print "$nbW warning messages: $thematic\n"; - } - } - _handle_globalWARNS(\%globalWARNS, $ontology); - - if(! $no_check ){ - _printSurrounded("- End extra check -\ndone in ".(time() - $previous_time)." seconds",50,"*","\n") if ($verbose > 0); - } - - print "=> OmniscientI total time: ",(time() - $start_run)," seconds\n" if ($verbose > 0); - - #return - return \%omniscient, \%mRNAGeneLink ; -} - -##============================================================================== -##============================================================================== - -# ====== PURPOSE =======: -# The method read a gff3 feature, Check for the sanity according to what will has been read before, and the whole features that will be read. -# Designed to be used within a loop going through a big amout of feature -# ====== INPUT =======: -# $feature => gff feature object -# example: scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -# $omniscient => hash to store all the gff feature in 3 levels structures -# example at level1: $omniscient->{"level1"}{$primary_tag}{$id}=$feature; -# example at other levels: $omniscient->{"levelX"}{$primary_tag}{$parent}= [$feature]; -# $mRNAGeneLink => hash to keep track link from L2 to l1 (avoid to go through the whole omniscient to retrieve this information) -# example: $mRNAGeneLink->{lc($id)}=$parent; -# $duplicate => hash to keep track of duplicates found -# example: duplicate->{$level}{$primary_tag}{$id} = [$feature]; -# $miscCount => hash that contains a counter for each feature type. It is used to create uniq ID -# example: $miscCount->{$primary_tag}++; -# $uniqID => hash of uniqID (UniqID link to the original ID) -# example: $uniqID->{$uID}=$id; -# $uniqIDtoType => hash to keep track about the feature type linked to the uniqID (useful for handling SPREADFEATURES) -# example: $uniqIDtoType->{$uID}=$primary_tag; -# $locusTAG_uniq => hash of comon tag found when reading the grouped features sequentialy -# example: $locusTAG_uniq->{'level1'}{$id}=$id; -# $infoSequential => hash that contains features grouped together in a sequential order. (Useful as example when no Parent tag and/or locus tag missing) -# structure at level1: $infoSequential->{$id}{'level1'}=$id; -# structure at other levels: $infoSequential->{$locusTAGvalue}{lc($l2_id)}{'level3'}}, [$feature1,$feature2] ; -# $last_locusTAGvalue => String: Last locus tag that has been met when parsing the file (If no locus tag found it will be the last feature L1 ID) -# example: CLUHARG00000008717 -# $last_l1_f => String: Last l1 feature that has been met when parsing the file -# example: scaffold625 maker gene 341518 341628 . + . ID=CLUHARG00000008717 -# $last_l2_f => String: Last L2 feature that has been met when parsing the file -# example: scaffold625 maker mRNA 341518 341628 . + . ID=CLUHART00000008717;Parent=CLUHARG00000008717 -# $last_l3_f => String: Last L3 feature that has been met when parsing the file -# example: scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -# $verbose => INT: Verbose level. Bigger the value is, deeper the information sould be. 0 = quiet -# example: 2 -# ====== OUTPUT======= : Omniscient Hash -sub manage_one_feature{ - - my ($ontology, $feature, $omniscient, $mRNAGeneLink, $duplicate, $miscCount, $uniqID, $uniqIDtoType, $locusTAG_uniq, $infoSequential, $last_locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $last_f, $lastL1_new, $verbose)=@_; - - my $seq_id = $feature->seq_id; #col1 - my $source_tag = lc($feature->source_tag); #col2 - my $primary_tag = lc($feature->primary_tag); #col3 - my $start = $feature->start; #col4 - my $end = $feature->end; #col5 - my $score = $feature->score; #col6 - my $strand = $feature->strand; #col7 - my $frame = $feature->frame; #col8 - #Attribute-value #col9 - my $id= undef; - my $parent= undef; - my $locusTAGvalue=undef; - -# +-------------------------------+ -# | CKECK SEQUENCE ONTOLOGY | -# +-------------------------------+ - if(! exists($ontology->{$primary_tag}) ) { - warn "GLOBAL@"."ontology1@".$primary_tag."@"; - } - -# +----------------------------------------------------------------------------+ -# MANAGE LEVEL1 => feature WITHOUT parent -# +----------------------------------------------------------------------------+ - if( get_level($feature) eq 'level1' ) { - - ########## - # get ID # - $id = lc(_check_uniq_id($omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature)); - _save_common_tag_value_top_feature($feature, $locusTAG_uniq, 'level1'); - - ##################### - # Ckeck duplication # - if(! _it_is_duplication($duplicate, $omniscient, $uniqID, $feature)){ - - ################ - # Save feature # - $last_l1_f = $feature; - print "::::::::::Push-L1-omniscient level1 || $primary_tag || $id = ".$feature->gff_string()."\n" if ($verbose > 1); - $omniscient->{"level1"}{$primary_tag}{$id}=$feature; - $locusTAG_uniq->{'level1'}{$id}=$id; - - ############# - # COMON TAG # - $locusTAGvalue =_get_comon_tag_value($feature, $locusTAG_uniq, 'level1'); - - if($locusTAGvalue){ - print "::::::::::Push-L1-sequential $locusTAGvalue || level1 == $id\n" if ($verbose > 1); - $locusTAG_uniq->{'level1'}{$locusTAGvalue}=$id; - $infoSequential->{$id}{'level1'}=$id; - } - else{ - $locusTAGvalue=$id; - } - - ################# - #reinitialization - $last_l2_f=undef; #opposite to I have a comon tag - $last_l3_f=undef; - } - return $id, $last_l1_f, $last_l2_f, $last_l3_f, $last_l1_f, $lastL1_new; - } - -# +----------------------------------------------------------------------------+ -# MANAGE LEVEL2 => feature WITH child and WITH parent -# +----------------------------------------------------------------------------+ - elsif ( get_level($feature) eq 'level2' ) { - - #reinitialization - $last_l3_f=undef; - - ########## - # get ID # - $id = _check_uniq_id($omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature); - - ############## - # get Parent # - #GFF case - if($feature->has_tag('Parent')){ - $parent = lc($feature->_tag_value('Parent')); - $locusTAGvalue=$parent; - _save_common_tag_value_top_feature($feature, $locusTAG_uniq, 'level2'); - } - - #GTF case - elsif($feature->has_tag('gene_id') ){ - $parent = lc($feature->_tag_value('gene_id')); - create_or_replace_tag($feature,'Parent',$feature->_tag_value('gene_id')); #modify Parent To keep only one - $locusTAGvalue=$parent; - _save_common_tag_value_top_feature($feature, $locusTAG_uniq, 'level2'); - } - else{ - warn "WARNING gff3 reader level2 : No Parent attribute found @ for the feature: ".$feature->gff_string()."\n"; - - ################# - # COMON TAG PART1 - $locusTAGvalue =_get_comon_tag_value( $feature, $locusTAG_uniq, 'level2'); - - - ###################### - # NEED THE LEVEL1 ID # - my $l1_ID=""; - # If I don't have a last_l1_f I create one. The Id can be used as comonTag. - # The feature can also be used later if comon_tag was existing, but mising for one of the feature. - # If we have comon tag, we check we are changing from the previous one before to create a new level1 feature. - # It's to deal with potential level2 (like mRNA isoforms). - if(! $last_l1_f or ($locusTAGvalue and ($locusTAGvalue ne $last_locusTAGvalue) ) ){ - print "create L1 feature\n" if ($verbose > 2); - $l1_ID = _create_ID($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, "nbis_noL1id"); - $last_l1_f = clone($feature); - create_or_replace_tag($last_l1_f,'ID',$l1_ID); #modify Parent To keep only one - $last_l1_f->primary_tag('gene'); - $lastL1_new = 1; - } - else{ # case where previous level1 exists - # Stricly sequential at level2 feature. We create a new L1 at every L2 met except if two L2 are in a row - if ( ($lastL1_new and not exists($LEVEL2->{$last_f->primary_tag}) ) and (!$locusTAGvalue or ($locusTAGvalue ne $last_locusTAGvalue) ) ){ # if previous L1 newly created and last feature is not f2 (if several f2 in a row we attach them to the same newly created l1 feature) - print "create L1 feature stritcly\n" if ($verbose > 2); - $l1_ID = _create_ID($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, "nbis_noL1id"); - $last_l1_f = clone($feature); - create_or_replace_tag($last_l1_f,'ID',$l1_ID); #modify Parent To keep only one - $last_l1_f->primary_tag('gene'); - $lastL1_new = 1; - $lastL1_new = 1; - } - else{ - print "take last L1 feature\n" if ($verbose > 2); - $l1_ID=$last_l1_f->_tag_value('ID'); - $lastL1_new = undef; - } - } - create_or_replace_tag($feature,'Parent',$l1_ID); #modify Parent To keep only one - - ################# - # COMON TAG PART2 - if($locusTAGvalue){ #Previous Level up feature had a comon tag - print "::::::::::Push-L2-Sequential-1 $locusTAGvalue || ".lc($id)." || level2 == ".$feature->gff_string."\n" if ($verbose > 1); - $infoSequential->{$locusTAGvalue}{lc($id)}{'level2'} = $feature ; - - return $locusTAGvalue, $last_l1_f, $feature, $last_l3_f, $feature, $lastL1_new; #### STOP HERE AND RETURN - } - else{ - - print "::::::::::Push-L2-omniscient-2: level2 || ".$primary_tag." || ".lc($l1_ID)." == ".$feature->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level2"}{$primary_tag}{lc($l1_ID)}}, $feature); - - # keep track of link between level2->leve1 # - if (! exists ($mRNAGeneLink->{lc($id)})){ - $mRNAGeneLink->{lc($id)}=$l1_ID; - } - - return lc($l1_ID) , $last_l1_f, $feature, $last_l3_f, $feature, $lastL1_new; #### STOP HERE AND RETURN - } - } - - ##################### - # Ckeck duplication # - if(! _it_is_duplication($duplicate, $omniscient, $uniqID, $feature)){ - - ############################################ - # keep track of link between level2->leve1 # - if (! exists ($mRNAGeneLink->{lc($id)})){ - $mRNAGeneLink->{lc($id)}=$parent; - } - - #################### - # SAVE THE FEATURE # - print "::::::::::Push-L2-omniscient-3 level2 || $primary_tag || $parent == feature\n" if ($verbose > 1); - push (@{$omniscient->{"level2"}{$primary_tag}{lc($parent)}}, $feature); - } - return $last_locusTAGvalue, $last_l1_f, $feature, $last_l3_f, $feature, $lastL1_new; - } - -# +----------------------------------------------------------------------------+ -# MANAGE LEVEL3 => feature WITHOUT child -# +----------------------------------------------------------------------------+ - elsif ( get_level($feature) eq 'level3' ){ - - # get ID # - $id = _check_uniq_id($omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature); - - # +-------------------------------+ - # | GET PARENT L3 | - # +-------------------------------+ - my @parentList; - - ################### - # GFF case # - if($feature->has_tag('Parent')){ - @parentList = $feature->get_tag_values('Parent'); - $locusTAGvalue=$last_locusTAGvalue; - _save_common_tag_value_top_feature($feature, $locusTAG_uniq, 'level3'); - } - - ################### - # GTF case # - elsif($feature->has_tag('transcript_id') ){ - @parentList = $feature->get_tag_values('transcript_id'); - create_or_replace_tag($feature,'Parent',$feature->_tag_value('transcript_id')); #modify Parent To keep only one - $locusTAGvalue=$last_locusTAGvalue; - _save_common_tag_value_top_feature($feature, $locusTAG_uniq, 'level3'); - } - - ################### In that case we create a uniq parentID to create a proper omniscient structure. - # No parent case # But the feature itself stay intact without parentID. - else{ - my $play_this_game=1; - warn "WARNING gff3 reader level3: No Parent attribute found @ for the feature: ".$feature->gff_string()."\n"; - - ################# - # COMON TAG PART1 - $locusTAGvalue = _get_comon_tag_value( $feature, $locusTAG_uniq, 'level3'); - - ###################### - # NEED THE LEVEL2 ID # - my $l2_id=""; - - #To keep track of locus tag that has been spread over the file, and a piece is found later - my $skip_last_l2=undef; - if($last_l2_f and $locusTAGvalue){ - - if(exists_keys ($locusTAG_uniq, ('linkl2l1', lc($last_l2_f->_tag_value('ID') ) ) ) ){ - if (lc($locusTAG_uniq->{'linkl2l1'}{lc( $last_l2_f->_tag_value('ID') )}) ne lc($locusTAGvalue)){ - $skip_last_l2=1; - print "skip last l2\n" if ($verbose > 0); - } - } - } - - # Just to avoid to have parent undef in case there is no parent feature define for the last_l2_f - my $parent_of_last_l2 = "@@@@"; - if($last_l2_f and $last_l2_f->has_tag('Parent')){ $parent_of_last_l2 = lc($last_l2_f->_tag_value('Parent')); } - - # case where No level2 feature defined yet - I will need a bucketL2 - # OR common tag changed (= level1/level2 different) so we have to create a new level2 tag - # but only if the last_comon tag is different as the parent of the last_l2_f - # (In that case we can use the last L2 feature. It was missing the comon tag in it). - if(! $last_l2_f or - ($locusTAGvalue and ($locusTAGvalue ne $last_locusTAGvalue) - and $last_locusTAGvalue ne $parent_of_last_l2 or $skip_last_l2) ){ - print "Come in the complex case L3!!!\n" if ($verbose > 3); - ####################### - # Change referentiel => based on the last L2 link to this locus - ####################### - #use Data::Dumper; print Dumper($locusTAG_uniq);print "$locusTAGvalue"; - # case were locus already met before (feature are spread within the file), we link the L3 to the last l2 of this locus. - if( exists_keys($locusTAG_uniq, ('level2', $locusTAGvalue) ) ){ - print "Complex case L3 1 !!!\n"if ($verbose > 3); - $last_l2_f = @{$locusTAG_uniq->{'level2'}{$locusTAGvalue}}[$#{$locusTAG_uniq->{'level2'}{$locusTAGvalue}}]; - $l2_id = $last_l2_f->_tag_value('ID'); - foreach my $tag_l1 (keys %{$omniscient->{'level1'}}){ - if( exists_keys($omniscient,('level1', $tag_l1, $locusTAG_uniq->{'linkl2l1'}{lc($l2_id)}))){ - $last_l1_f = $omniscient->{'level1'}{$tag_l1}{$locusTAG_uniq->{'linkl2l1'}{lc($l2_id)}}; - } - } - } - else{ - print "Complex case L3 2 !!!\n" if ($verbose > 3); - # case we can catch parent from previous feature L3 - if($last_l3_f and $last_l3_f->has_tag('Parent')){ # Need to not be the first feature because we need a previous feature - - print "Complex case 2.1 !!!\n" if ($verbose > 3); - my $previousL3 =_get_comon_tag_value( $last_l3_f, $locusTAG_uniq, 'level3'); - - if ($locusTAGvalue and $previousL3 and ($locusTAGvalue eq $previousL3)){ - - print "Complex case 2.1.1 !!!\n" if ($verbose > 3); - $l2_id = $last_l3_f->_tag_value("Parent"); - create_or_replace_tag($feature,'Parent', $l2_id); - push @parentList, $l2_id; - $play_this_game=undef; # Only place where we skip this game - } - } - if ($play_this_game){ - print "Complex case 2.2 !!!\n" if ($verbose > 3); - $l2_id = _create_ID($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, PREFIXL2); - $last_l2_f = clone($feature); - create_or_replace_tag($last_l2_f,'ID',$l2_id); #modify Parent To keep only one - $last_l2_f->primary_tag('RNA'); - } - } - } - # case where previous level2 exists - else{ - print "case where previous level2 exists\n" if ($verbose > 3); - $l2_id=$last_l2_f->_tag_value('ID'); - } - - # Let's play the no parent case. We will return from that part of the code - # We don't play this game only if we decided finally to take the same parent - # value as previous feature - if ($play_this_game){ - - create_or_replace_tag($feature,'Parent',$l2_id); #modify Parent To keep only one - - ############# - # COMON TAG Part2 - if($locusTAGvalue){ #Previous Level up feature had a comon tag - print "::::::::::Push-L3-sequential-1 $locusTAGvalue || ".lc($l2_id)." || level3 == ".$feature->gff_string."\n" if ($verbose > 1); - #if($feature->_tag_value("Parent") eq ""){exit;} - ### TAKE LAST L2 of the locus tag iF exist ! - push( @{$infoSequential->{$locusTAGvalue}{lc($l2_id)}{'level3'}}, $feature ); - return $locusTAGvalue, $last_l1_f, $last_l2_f, $feature, $feature, $lastL1_new; #### STOP HERE AND RETURN - } - else{# No comon tag found - ###################### - # NEED THE LEVEL1 ID # - if(!$last_l1_f and $last_l3_f){ #particular case : Two l3 that follow each other, but first one has locus_tag but not the second - print "::::::::::Push-L3-sequential-2 $last_locusTAGvalue || ".lc($l2_id)." || level3 == ".$feature->gff_string."\n" if ($verbose > 1); - push( @{$infoSequential->{$last_locusTAGvalue}{lc($l2_id)}{'level3'}}, $feature ); - return $last_locusTAGvalue, $last_l1_f, $last_l2_f, $feature, $feature, $lastL1_new; - } - else{ - my $l1_id=""; - if($last_l1_f){ # case where previous level1 exists - $l1_id=$last_l1_f->_tag_value('ID'); - } - else{ # case where No level1 feature defined yet - I will need a bucketL1 - $l1_id = _create_ID($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, "nbis_noL1id"); - $last_l1_f = clone($feature); - create_or_replace_tag($last_l1_f,'ID',$l1_id); #modify Parent To keep only one - $last_l1_f->primary_tag('gene'); - } - - #push( @{$infoSequential->{lc($l1_id)}{lc($l2_id)}{'level3'}}, $feature ); - print "::::::::::Push-L3-omiscient-3: level3 ".$primary_tag." || ".lc($l2_id)." == ".$feature->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level3"}{$primary_tag}{lc($l2_id)}}, $feature); - return $l2_id, $last_l1_f, $last_l2_f, $feature, $feature, $lastL1_new; #### STOP HERE AND RETURN - } - } - } - } - # END No parent case # - ####################### - - # +--------------------------------------+ - # | HANDLE PARENT(S) L3 | - # +--------------------------------------+ - # Save feature and check duplicates - # (treat also cases where there is multiple parent. => In that case we expand to create a uniq feature for each) - my $cptParent=0; # to check if it is a multiple parent case - my $allParent = scalar @parentList; - foreach my $parent (@parentList){ # first feature level3 with this primary_tag linked to the level2 feature - $cptParent++; - - #Level3 key doesn't exist - if(! exists_keys($omniscient,('level3',$primary_tag,lc($parent)))){ - - # It is a multiple parent case - if($allParent > 1){ - - # Not the first parent, we have to clone the feature !! - if($cptParent > 1){ - - my $feature_clone=clone($feature); - create_or_replace_tag($feature_clone,'Parent',$parent); #modify Parent To keep only one - _check_uniq_id($omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature_clone); #Will change the ID if needed - - print "::::::::::Push-L3-omniscient-4 level3 || $primary_tag || ".lc($parent)." == ".$feature->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level3"}{$primary_tag}{lc($parent)}}, $feature_clone); - } - - # It is the first parent. Do not clone the feature - else{ - create_or_replace_tag($feature,'Parent',$parent); #modify Parent To keep only one - print "::::::::::Push-L3-omniscient-5 level3 || $primary_tag || ".lc($parent)." == ".$feature->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level3"}{$primary_tag}{lc($parent)}}, $feature); - } - } - else{ #the simpliest case. One parent only - print "::::::::::Push-L3-omniscient-6 level3 || $primary_tag || ".lc($parent)." == ".$feature->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level3"}{$primary_tag}{lc($parent)}}, $feature); - } - } - - #Level3 key exists - else{ - - # It is a multiple parent case # Not the first parent, we have to clone the feature !! - if($cptParent > 1){ #several parent, and we are not looking the first one - - my $feature_clone=clone($feature); - create_or_replace_tag($feature_clone,'Parent',$parent); #modify Parent To keep only one - _check_uniq_id($omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature_clone); #Will change the ID if needed - - if( ! _it_is_duplication($duplicate, $omniscient, $uniqID, $feature_clone) ){ - print "::::::::::Push-L3-omniscient-8 level3 || $primary_tag || ".lc($parent)." == ".$feature_clone->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level3"}{$primary_tag}{lc($parent)}}, $feature_clone); - } - - } - elsif($allParent > 1){ # It is a multiple parent case #several parent, but we are looking the first one - - # It is the first parent. Do not clone the feature - create_or_replace_tag($feature,'Parent',$parent); #modify Parent To keep only one - if( ! _it_is_duplication($duplicate, $omniscient, $uniqID, $feature) ){ - print "::::::::::Push-L3-omniscient-9 level3 || $primary_tag || ".lc($parent)." == ".$feature->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level3"}{$primary_tag}{lc($parent)}}, $feature); - } - - } - #It is not a duplicated feature => save it in omniscient - elsif( ! _it_is_duplication($duplicate, $omniscient, $uniqID, $feature) ){ - #the simpliest case. One parent only - print "::::::::::Push-L3-omniscient-10 level3 || $primary_tag || ".lc($parent)." == ".$feature->gff_string."\n" if ($verbose > 1); - push (@{$omniscient->{"level3"}{$primary_tag}{lc($parent)}}, $feature); - } - } - } - return $last_locusTAGvalue, $last_l1_f, $last_l2_f, $feature, $feature, $lastL1_new; - } - - -# +----------------------------------------------------------------------------+ -# | MANAGE THE REST => feature UNKNOWN | # FEATURE NOT DEFINE IN ANY OF THE 3 LEVELS YET -# +----------------------------------------------------------------------------+ - else{ - warn "gff3 reader warning: primary_tag error @ ".$primary_tag." still not taken in account ! Please modify the code to define on of the three level of this feature.\n"; - warn "GLOBAL@"."parser1@".$primary_tag."@"; - return $locusTAGvalue, $last_l1_f, $last_l2_f, $last_l3_f, $feature, $lastL1_new; - } - - print "Congratulation ! Read this line is not normal !! Please contact the developer !!!\n";exit; -} - -##============================================================================== -##============================================================================== - -# /!\ $feature must have a parent if not level1 -# Keep track to recover from sequential locus tag share whith feature saved in Omniscient -sub _save_common_tag_value_top_feature{ - my ($feature, $locusTAG_uniq, $level)=@_; - - my $locusName=undef; - - foreach my $tag (@COMONTAG){ - - #check if we have the tag - if($feature->has_tag($tag)){ - $locusName=lc($feature->_tag_value($tag)); #get the value - - if ( !( exists_keys ( $locusTAG_uniq, ('topfeature', $locusName, 'level1') ) ) and ($level eq 'level2') ) { - $locusTAG_uniq->{'topfeature'}{$locusName}{'level1'}{'ID'} = lc($feature->_tag_value('ID')); - last; - } - - if ( !( exists_keys ( $locusTAG_uniq, ('topfeature', $locusName, 'level2') ) ) and ($level eq 'level2') ) { - $locusTAG_uniq->{'topfeature'}{$locusName}{'level2'} = [lc($feature->_tag_value('ID')), lc($feature->_tag_value('Parent'))]; - last; - } - - if ( !( exists_keys ( $locusTAG_uniq, ('topfeature', $locusName, 'level3') ) ) and ($level eq 'level3') ) { - $locusTAG_uniq->{'topfeature'}{$locusName}{'level3'} = [lc($feature->_tag_value('ID')), lc($feature->_tag_value('Parent'))]; - last; - } - - } - } -} - -#check if the comom tag is present among the attributes -# return lower case value -sub _get_comon_tag_value{ - my ( $feature, $locusTAG_uniq, $level)=@_; - - my $locusName=undef; - foreach my $tag (@COMONTAG){ - - #check if we have the tag - if($feature->has_tag($tag)){ - $locusName=lc($feature->_tag_value($tag)); #get the value - - if(exists_keys ($locusTAG_uniq, ('level1',$locusName) ) ){ - $locusName = $locusTAG_uniq->{'level1'}{$locusName}; - last; - } - else{ - $locusTAG_uniq->{'level1'}{$locusName} = $locusName; #save it - last; - } - } - } - - if($level eq 'level2' and $locusName){ - if(! exists_keys ($locusTAG_uniq, ('level2',$locusName, lc($feature->_tag_value('ID'))) ) ){ - push @{$locusTAG_uniq->{'level2'}{$locusName}}, $feature; - $locusTAG_uniq->{'linkl2l1'}{lc($feature->_tag_value('ID'))} = $locusName; - } - } - - # In case where no parent, no comon tag, and no sequential, we cannot deal at all with it !!!! - if(! $locusName and $level ne 'level1'){ - warn "WARNING gff3 reader: Hmmm, be aware that your feature doesn't contain any Parent and locus tag. No worries, we will handle it by considering it as striclty sequential. If you are not ok with that, provide an ID or a comon tag by locus. @ the feature is:\n".$feature->gff_string()."\n"; - } - - return $locusName; -} - -#feature is not yet saved in omniscient ! -sub _it_is_duplication{ - my ($duplicate, $omniscient, $uniqID, $feature)=@_; - - my $is_dupli=undef; - my $potentialList=undef; - - my $level = get_level($feature); - my $primary_tag = lc($feature->primary_tag); - - my $id = $uniqID->{$feature->_tag_value('ID')}; # check the original ID - - if($level eq "level1"){ - if(! exists_keys($omniscient,($level, $primary_tag, lc($id) ))){ - return $is_dupli; #return is not a dupli - } - else{ - $potentialList=$omniscient->{$level}{$primary_tag}{lc($id)}; #push the feature L1 in potentialList - } - } - else{ #feature l2 or l3 - - my @parent = $feature->get_tag_values('Parent'); - foreach my $one_parent_ID (@parent){ - - my $one_parent_uID = $one_parent_ID; # In case where the parent have not yet been processed, we cannot have his uID, we will check the current ID - if ( exists_keys($uniqID, ($one_parent_ID)) ){ - $one_parent_uID = lc($uniqID->{$one_parent_ID}); # check the original ID - } - - foreach my $primary_tag ( keys %{$omniscient->{$level}} ){ - if (exists_keys($omniscient,($level, $primary_tag, $one_parent_uID))){ - push @{$potentialList}, @{$omniscient->{$level}{$primary_tag}{$one_parent_uID}}; - } - } - } - if(! $potentialList){ #potential list empty - return $is_dupli; #return is not a dupli - } - } - - - #check if list is a feature or a reference of a list of feature - my @list_feature; # will be a list of feature. - - if(ref($potentialList) eq 'ARRAY'){ - @list_feature=@$potentialList; #it's a reference of a list of feature - } - else{push (@list_feature, $potentialList);} # it's a feature - - #### PREPARE THE SENTENCE TO CHECK - my $current_string=_create_comparison_string($uniqID, $feature); - - #Check all the level2 list element - foreach my $feature_in_omniscient ( @list_feature ){ - - my $string=_create_comparison_string($uniqID, $feature_in_omniscient); - if($current_string eq $string){ - $is_dupli=1; - push (@{$duplicate->{$level}{$primary_tag}{$id}}, $feature); - delete $uniqID->{$feature->_tag_value('ID')}; # delete uniq ID that has been created for nothing - last; - } - } - - if(! $is_dupli and $level eq "level1" and $omniscient->{"level1"}{$primary_tag}{$id}){ - warn "WARNING gff3 reader level1 : This feature level1 is not a duplicate (different than others feature level1) but has an ID already used. We cannot deal with that. @ the feature is: ".$feature->gff_string()."\n"; #Indeed If we change the ID we do not know wich sub-feature parent attribute value to modify (It could occur that we link sub feature not related) - } - return $is_dupli; -} - -# find the level of the feature tested -sub get_level{ - my ($feature)=@_; - - my $source_tag = lc($feature->source_tag); - my $primary_tag = lc($feature->primary_tag); - - my $level=undef; - - ######################################### - ## PECULIARITIES FROM HAVANA / ENSEMBL ## - if ($source_tag eq "ensembl" ){ - if ( $primary_tag eq "rna" ) {return 'level1';} #particularity ENSEMBL - } - if ( ($source_tag =~ "havana" or $source_tag =~ "ensembl") and ($primary_tag eq "processed_transcript" or $primary_tag eq "pseudogene" ) ){ #By default processed_transcript is l2 and pseudogene is l1 - if ($feature->has_tag('Parent')){return "level2" ;} - else{return "level1" ;} - } - ## PECULIARITIES FROM HAVANA / ENSEMBL ## - ######################################### - - if (exists($LEVEL1->{$primary_tag}) ){ - return 'level1'; - } - elsif(exists($LEVEL2->{$primary_tag}) ){ - return 'level2'; - } - elsif(exists($LEVEL3->{$primary_tag}) ){ - return 'level3'; - } -} - -# create string that should be uniq by feature -# will be used to detect duplicated features -sub _create_comparison_string{ - my ($uniqID, $feature)=@_; - - my $string=$feature->seq_id().$feature->primary_tag().$feature->start().$feature->end(); - my $primary_tag = lc($feature->primary_tag); - - # get the ID - my $ID=undef; - if(exists_keys($uniqID,($feature->_tag_value('ID')))){ - $ID = $uniqID->{$feature->_tag_value('ID')}; - } - else{ - $ID = $feature->_tag_value('ID') - } - - # If we are checking a level1 feature no need to go further - if ( exists($LEVEL1->{$primary_tag}) ){ - $string .= $ID; # compare with original ID - return $string; - } - - # If we are not checking a level1 feature, let's take the parent info too - my $Parent=undef; - - if(exists_keys($uniqID,($feature->_tag_value('Parent')))){ - $Parent = $uniqID->{$feature->_tag_value('Parent')}; - } - else{ - $Parent = $feature->_tag_value('Parent') - } - - if ( exists($LEVEL2->{$primary_tag}) ){ - $string .= $ID; # compare with original ID - $string .= $Parent; # compare with original Parent - } - if ( exists($LEVEL3->{$primary_tag}) ){ - $string .= $Parent; # compare with original Parent - } - - return $string; -} - -# create an ID uniq. Don't give multi-parent feature ! -# If we have to create new ID for SPREADFEATURES they will not have a shared ID. -sub _check_uniq_id{ - my ($omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature)=@_; - - my $uID=undef; - my $primary_tag = lc($feature->primary_tag); - - my $id=undef; - if($feature->has_tag('ID')){ #has the tag - $id = $feature->_tag_value('ID'); - } - elsif($feature->has_tag($primary_tag."_id") ){ - $id = $feature->_tag_value($primary_tag."_id"); - create_or_replace_tag($feature, 'ID', $id); - } - elsif( get_level($feature) eq 'level1' and $feature->has_tag("gene_id") ){ - $id = $feature->_tag_value("gene_id"); - create_or_replace_tag($feature, 'ID', $id); - } - elsif( get_level($feature) eq 'level2' and $feature->has_tag("transcript_id") ){ - $id = $feature->_tag_value("transcript_id"); - create_or_replace_tag($feature, 'ID', $id); - } - - # CHECK THE ID TO SEE IF IT's uniq, otherwise we have to create a new uniq ID - if($id){ - # In case of non-spreadfeature (avoid CDS and UTR that can share identical IDs) - if(! exists_keys($SPREADFEATURE,($primary_tag) ) ){ - $uID = _create_ID($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, 'nbis_NEW'); #method will push the uID - if( $id ne $uID ){ #push the new ID if there is one - create_or_replace_tag($feature, 'ID', $uID); - } - } - # In case of spreadfeature ( CDS and UTR that can share identical IDs) - else{ - # First time we see this ID => No problem; - if(! exists($uniqID->{$id})){ - #push the uID - $uID = $id; - $uniqID->{$uID}=$id; - $uniqIDtoType->{$id}=$primary_tag; - } - # NOT the first time we have this ID - # check if it's the same type (To not mix a same ID between UTR and CDS); - elsif( $uniqIDtoType->{$id} eq $primary_tag ){ # Same type, so we can keep this ID, let's continue - $uID = $id; - } - else{ # The spreadfeature type is different - # Let's check if one of the same type is already in omniscient (THE ID could be linked to a non-spreadfeature), in that case we keep the ID already given. - if( $feature->has_tag('Parent') ){ - if ( exists_keys( $omniscient, ('level3', $primary_tag, lc($feature->_tag_value('Parent')) ) ) ){ - $uID = @{ $omniscient->{'level3'}{$primary_tag}{lc($feature->_tag_value('Parent'))} }[0]->_tag_value('ID'); - } - } - if(! $uID){ #ID already taken by another feature type, and we do not have ID already existing of this feature type within omniscient, let's create a new ID - $uID = _create_ID($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, 'nbis_NEW'); #method will push the uID - } - if( $id ne $uID ){ #push the new ID if there is one - create_or_replace_tag($feature, 'ID', $uID); - } - } - } - } - else{ #tag absent - my $level = get_level($feature); - if($level ne 'level3'){ - warn "gff3 reader error ".$level .": No ID attribute found @ for the feature: ".$feature->gff_string()."\n"; - } - $miscCount->{$primary_tag}++; - $id = $primary_tag."-".$miscCount->{$primary_tag}; # create an ID and then check if not already taken - $uID = _create_ID($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, 'nbis_NEW'); #method will push the uID - create_or_replace_tag($feature, 'ID', $uID); - } - - return $uID; -} - -# create the ID and add it to the feature. -sub _create_ID{ - my ($miscCount, $uniqID, $uniqIDtoType, $primary_tag, $id, $prefix)=@_; - - my $key; - - if($prefix){ - $key=$prefix."-".$primary_tag; - } - else{ - $key=$primary_tag; - } - - my $uID= $id ? $id : $key."-1"; - - while( exists_keys($uniqID, ($uID) )){ #loop until we found an uniq tag - $miscCount->{$key}++; - $uID = $key."-".$miscCount->{$key}; - } - - #push the new ID - $uniqID->{$uID}=$id; - $uniqIDtoType->{$uID}=$primary_tag; - - return $uID; -} - -# check if mRNA have is Parental gene existing. If not we create it. -sub _check_l1_linked_to_l2{ - my ($hash_omniscient, $verbose)=@_; - my $resume_case=undef; - - foreach my $primary_tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - foreach my $id_l1 (keys %{$hash_omniscient->{'level2'}{$primary_tag_l2}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - my $l1_exist=undef; - foreach my $primary_tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - if(exists_keys ($hash_omniscient, ('level1', $primary_tag_l1, $id_l1))){ - $l1_exist=1; - last; - } - } - if(! $l1_exist){ - $resume_case++; - print "WARNING gff3 reader level2 : No Parent feature found with the ID @ ".$id_l1.". We will create one.\n" if ($verbose >= 2); - my $gene_feature=clone($hash_omniscient->{'level2'}{$primary_tag_l2}{$id_l1}[0]);#create a copy of the first mRNA feature; - my $new_ID = $gene_feature->_tag_value('Parent'); - print "Here is the new ID created $new_ID.\n" if ($verbose >= 2); - create_or_replace_tag($gene_feature,'ID', $new_ID); #modify ID to replace by parent value - $gene_feature->remove_tag('Parent'); # remove parent ID because, none. - check_level1_positions($hash_omniscient, $gene_feature); # check start stop if isoforms exists - - #Deal case where we reconstruct other thing than a gene - my $primary_tag_l1=undef; - if(lc($gene_feature->primary_tag) =~ /match/){ $primary_tag_l1="match"; } - else{ $primary_tag_l1="gene"; } - - $gene_feature->primary_tag($primary_tag_l1); # change primary tag - $hash_omniscient->{"level1"}{$primary_tag_l1}{lc($new_ID)}=$gene_feature; # now save it in omniscient - } - } - } - - print "We create $resume_case level1 features that were missing\n" if($verbose >= 1 and $resume_case); -} - -# @Purpose: Remove the level1 feature that havn't subfeature linked to it. Before to remove it check if L3 is linked to it. In that case it is a format error that we will fix ! -# @input: 1 => hash(omniscient hash) -# @output: none -sub _remove_orphan_l1{ - my ($hash_omniscient, $miscCount, $uniqID, $uniqIDtoType, $mRNAGeneLink, $verbose)=@_; - my $resume_case=undef; - - foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - my $neverfound="yes"; - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys ( $hash_omniscient,('level2',$tag_l2,$id_l1) ) ){ - $neverfound=undef;last - } - } - if($neverfound){ - $resume_case++; - print "removing ".$hash_omniscient->{'level1'}{$tag_l1}{$id_l1}->gff_string."\n" if ($verbose >= 3); - delete $hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; # delete level1 // In case of refseq the thin has been cloned and modified, it is why we nevertheless remove it - } - } - } - print "We removed $resume_case level1 features that had no subfeature linked to it.\n" if($verbose >= 1 and $resume_case); -} - -# @Purpose: Check relationship betwwen L3 and L2. If L2 is missing we create it. When creating L2 missing we create as well L1 if missing too. -# @input: 4 => hash(omniscient hash), hash(mRNAGeneLink hash), hash(miscCount hash), hash(uniqID hash) -# @output: none -sub _check_l2_linked_to_l3{ - my ($hash_omniscient, $mRNAGeneLink, $miscCount, $uniqID, $uniqIDtoType, $verbose)=@_; - my $resume_case=undef; - - foreach my $tag_l3 (sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ - - foreach my $id_l2 (sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}{$tag_l3}}){ - - #check if L2 exits - if (! exists($mRNAGeneLink->{ $id_l2 }) ) { - $resume_case++; - - #L3 linked directly to L1 - foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - my $has_l1_feature = undef; - my $id_l2_to_replace = undef; - - if(exists_keys ($hash_omniscient, ('level1', $tag_l1, $id_l2))){ - # case where it's linked by parent/ID attribute - $has_l1_feature = $hash_omniscient->{"level1"}{$tag_l1}{$id_l2}; - } - else{ - # Check if one as a common tag value == to L1 common tag value (then when creating l2 in check3 add parent for L2 of the L1 Id) - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - my $l1_feature = $hash_omniscient->{"level1"}{$tag_l1}{$id_l1}; - foreach my $tag (@COMONTAG){ - #check if we have the tag - if($l1_feature->has_tag($tag)){ - my $l1_ct_value=lc($l1_feature->_tag_value($tag)); #get the value - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - if($l3_feature->has_tag($tag) and lc($l3_feature->_tag_value($tag)) eq $l1_ct_value){ - $has_l1_feature = $l1_feature; - $id_l2_to_replace = $l3_feature->_tag_value('ID'); - # case where it's linked by comon_tag attribute - last; - } - } - } - last if ($has_l1_feature); - } - last if ($has_l1_feature); - } - } - - if ($has_l1_feature){ - - my $l1_ID = $has_l1_feature->_tag_value('ID'); - my $l2_feature = clone($has_l1_feature);#create a copy of the first mRNA feature; - - if (exists_keys($hash_omniscient,("level3",'cds', $id_l2) ) ){ - $l2_feature->primary_tag('mRNA'); - } - else{ #we cannot guess - $l2_feature->primary_tag('RNA'); - } - - #modify Level1: - $l1_ID = _create_ID($miscCount, $uniqID, $uniqIDtoType, lc($has_l1_feature->primary_tag), $l1_ID, 'nbis_NEW'); - create_or_replace_tag($has_l1_feature,'ID', $l1_ID); #modify ID to replace by parent value - delete $hash_omniscient->{'level1'}{$tag_l1}{$id_l2}; # delete level1 // In case of refseq the thin has been cloned and modified, it is why we nevertheless remove it - $hash_omniscient->{"level1"}{lc($has_l1_feature->primary_tag)}{lc($l1_ID)} = $has_l1_feature; - - #Modify parent L2 - create_or_replace_tag($l2_feature,'Parent', $l1_ID); #modify ID to replace by parent value - create_or_replace_tag($l2_feature,'ID', $id_l2) if ($id_l2_to_replace); #modify ID to replace by parent value - push(@{$hash_omniscient->{"level2"}{lc($l2_feature->primary_tag)}{lc($l1_ID)}}, $l2_feature); - - #fill the $mRNAGeneLink hash - $mRNAGeneLink->{ $id_l2 } = $l1_ID; # Always need to keep track about l2->l1, else the method _check_l2_linked_to_l3 will recreate a l1 thinking this relationship is not fill - print "L3 was directly linked to L1. Corrected by creating the intermediate L2 feature from L1 feature\n" if($verbose >= 2); - last - } - } - - if (! exists($mRNAGeneLink->{ $id_l2 }) ) { # it was not previous case (L3 linked directly to L1) - - #start fill L2 - my $l2_feature=clone($hash_omniscient->{'level3'}{$tag_l3}{$id_l2}[0]);#create a copy of the first mRNA feature; - $l2_feature->frame(".") if ($l2_feature->frame ne "."); # If we clone a CDS there will be a frame information to remove. - my $new_ID = $l2_feature->_tag_value('Parent'); - create_or_replace_tag($l2_feature,'ID', $new_ID); #modify ID to replace by parent value - my $primary_tag_l2; - if( exists_keys($hash_omniscient,('level3', 'cds', $id_l2)) ) { - $primary_tag_l2="mRNA"; - } - else{ - $primary_tag_l2="RNA"; - } - $l2_feature->primary_tag($primary_tag_l2); # change primary tag - check_level2_positions($hash_omniscient, $l2_feature); # check start stop if isoforms exists - - #fill L1 - my $l1_feature=clone($hash_omniscient->{'level3'}{$tag_l3}{$id_l2}[0]);#create a copy of the first mRNA feature; - $l1_feature->frame(".") if ($l1_feature->frame ne "."); # If we clone a CDS there will be a frame information to remove. - $l1_feature->remove_tag('Parent'); # remove parent ID because, none. - - #Deal case where we reconstruct other thing than a gene - my $primary_tag_l1=undef; - if(lc($l1_feature->primary_tag) =~ /match/){ $primary_tag_l1="match"; } - else{ $primary_tag_l1="gene"; } - $l1_feature->primary_tag($primary_tag_l1); # change primary tag - - my $new_ID_l1 = _check_uniq_id($hash_omniscient, $miscCount, $uniqID, $uniqIDtoType, $l1_feature); - create_or_replace_tag($l1_feature,'ID', $new_ID_l1); #modify ID to replace by parent value - - #finish fill Level2 - create_or_replace_tag($l2_feature, 'Parent', $new_ID_l1); # remove parent ID because, none. - #save new feature L2 - push (@{$hash_omniscient->{"level2"}{lc($primary_tag_l2)}{lc($new_ID_l1)}}, $l2_feature); - - #finish fill Level1 - check_level1_positions($hash_omniscient, $l1_feature); # check start stop if isoforms exists - #save new feature L1 - $hash_omniscient->{"level1"}{lc($primary_tag_l1)}{lc($new_ID_l1)} = $l1_feature; # now save it in omniscient - $mRNAGeneLink->{lc($id_l2)} = $new_ID_l1; - - print "L1 and L2 created, \n" if($verbose >= 1); - } - } - } - } - print "We fixed $resume_case cases where L2 and L1 features were missing\n" if($verbose >= 1 and $resume_case); -} - -# @Purpose: Check L3 features. If exon are missing we create them. We go through all features of level3 and check them by type, if two should be merged, we do it (CDS 1-50 and 51-100, must be CDS 1-100). -# @input: 3 => hash(omniscient hash), hash(miscCount hash), hash(uniqID hash) -# @output: none -sub _check_exons{ - my ($hash_omniscient, $mRNAGeneLink, $miscCount, $uniqID, $uniqIDtoType, $verbose)=@_; - my $resume_case=undef; my $resume_case2=undef; my $resume_case3=undef; - - my %checked; - foreach my $tag_l3 ( sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ - if ($tag_l3 ne "exon"){ - foreach my $id_l2 ( sort { (($a =~ /(\d+)$/)[0] || 0) <=> (($b =~ /(\d+)$/)[0] || 0) } keys %{$hash_omniscient->{'level3'}{$tag_l3}}){ - - if( ! exists_keys(\%checked,($id_l2)) ){ #l2 already checked - print "Check: ".$id_l2."\n" if ($verbose >= 3); - my $feature_example=undef; # will be used to create the exon features - my $list_location_Exon=undef; - my $list_location_NoExon=undef; - my $list_location_NoExon_overlap=undef; - my %createIT; # will be usefull to list the feature to create -# +----------------------------------------------------- -# | Go through l3 and save info needed | -# +----------------------------------------------------- - - foreach my $tag_l3 ( sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ - - # LIST NON-EXON LOCATIONS THAT NEED TO BE IN AN EXON LOCATION - if ($tag_l3 ne "exon" and $LEVEL3->{$tag_l3} eq "exon" ){ - - if( exists_keys($hash_omniscient,('level3',$tag_l3, $id_l2)) ){ - - my $list_location_l3=[]; - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - - if(! $feature_example){ - $feature_example=$l3_feature; - } - - my $locationRefList=[[[$l3_feature->_tag_value('ID')] ,int($l3_feature->start), int($l3_feature->end)]]; - $list_location_l3 = _manage_location($locationRefList, $list_location_l3, 'adjacent', 0); # we check first in overlap mode to check if badly define features exists - } - - #Rare case when a feature of the same type is badly defined - if($#{$list_location_l3} < $#{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - my $message = "Peculiar rare case, we found ".$#{$list_location_l3}." ".$tag_l3." while ".$#{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}." expected.\n"; - $message .= "Either some are supernumerary or some have been merged they overlap or are adjacent while they are not suppose to.\n"; - $message .= "In case you were using gtf file as input (no parent/id attributes), check you provide the attribute (i.e comon_tag) used to group features together (e.g. locus_tag, gene_id, etc.).\n"; - $message .= "(In case your file contains only CDS features, and your organism is prokaryote (e.g rast file), using ID as comon_tag might be the solution.)\n"; - warn $message; - } - push @{$list_location_NoExon_overlap}, @{$list_location_l3}; #list of all feature that has been checked in overlap mode - } - } - - # LIST EXON LOCATIONS - elsif($tag_l3 eq "exon"){ - - if( exists_keys($hash_omniscient,('level3',$tag_l3, $id_l2)) ){ - - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - - if(! $feature_example){ - $feature_example=$l3_feature; - } - my $locationRefList=[[[$l3_feature->_tag_value('ID')] ,int($l3_feature->start), int($l3_feature->end)]]; - $list_location_Exon = _manage_location($locationRefList, $list_location_Exon , 'adjacent', 0); - } - - #Rare case when a features are badly defined - # This approch works for exon because they have uniq ID - if($#{$list_location_Exon} < $#{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - warn "Peculiar rare case, we have to remove existing exon which are supernumerary. Parent is $id_l2\n"; - #remove the non needed features (they where wrong, and are unecessary) - my @id_list2=(); - foreach my $locations (@{$list_location_Exon}){ - # If several value in the ID list, we have to avoid the first value (which is the one to keep), and remove all the other ones which are the Id to remove - if(@{$locations->[0]} > 1){ - my $correct_ID = shift @{$locations->[0]}; - push @id_list2, @{$locations->[0]}; - @{$locations->[0]} = $correct_ID; - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{'exon'}{$id_l2} } ){ - if($l3_feature->_tag_value('ID') eq $correct_ID){ - $l3_feature->start($locations->[1]); - $l3_feature->end($locations->[2]); - } - } - } - } - my @tag_list = ('all'); - my @id_list=($id_l2); - $resume_case3 += $#id_list2; - print "We remove the supernumerary @id_list2 exon(s)\n" if($verbose >= 2); - remove_element_from_omniscient(\@id_list, \@id_list2, $hash_omniscient, 'level3', 'false', \@tag_list); - } - - } - } - } - - ## check all NOn-exon in adjacent mater to have the correct list (allows to merge UTR and CDS to recreate exon ) - foreach my $location (@{$list_location_NoExon_overlap}){ - $list_location_NoExon = _manage_location([$location], $list_location_NoExon, 'adjacent', 0); - } - - #print "list_location_Exon: ".Dumper($list_location_Exon) if ($verbose >= 3); - #print "list_location_NOEXON: ".Dumper($list_location_NoExon) if ($verbose >= 3); - -# +--------------------------------------------------------------------------------------------------------+ -# | COMPARE EXONS POSITION TO THOSE DESCRIBED BY NON-EXON FEATURES | -# +--------------------------------------------------------------------------------------------------------+ - - #Case where exon feature exists, we have to check them - if( exists_keys($hash_omniscient,('level3','exon', $id_l2)) ){ #When thre are l3 features but no exon among them... we need to recreate them. - - if ($list_location_NoExon){ #We have features like UTR,CDS,etc allowing to check the exon locations. - #create string to comapre the 2 lists. - my $list_location_Exon_joined=""; - foreach my $location (sort {$a->[1] <=> $b->[1] } @{$list_location_Exon}){ - $list_location_Exon_joined .= $location->[1].$location->[2]; - } - my $list_location_NoExon_joined=""; - foreach my $location (sort {$a->[1] <=> $b->[1] } @{$list_location_NoExon}){ - $list_location_NoExon_joined .= $location->[1].$location->[2]; - } - #If two lists are different we have to check/fix the difference - # If no overlap we create the exon: - # If overlap: Redefine internal exon ; Redfine external exon only if too short. - if($list_location_Exon_joined ne $list_location_NoExon_joined ){ - print "_check_exons EXON problem for $id_l2! coordinates of exons found and coordinate of exons expected according to other feature (i.e. CDS and/or UTR, and/or ...). Let's check that (We will create exon, or modify coordinates, depending of cases. If creation of UTR is needed it will be done in a next step) !! \n" if ($verbose >= 2); - - my $location_cpt=0; - foreach my $location (sort {$a->[1] <=> $b->[1] } @{$list_location_NoExon}){ - $location_cpt++; - - my $create_exon=1; - my $new_location; - my $overlap; - - foreach my $exon_location (sort {$a->[1] <=> $b->[1] } @{$list_location_Exon}){ - - ($new_location, $overlap) = _manage_location_lowLevel_adjacent($location, $exon_location); #there is an overlap if $new_location != $exon_location. If it's the same, we should check $overlap to be sure - - if($new_location->[1] < $exon_location->[1] or $new_location->[2] > $exon_location->[2] ){ #The exon_location has been modified by location... We have to remodelate the exon (only if fit some conditions) location to take the modification into account - $create_exon=undef; # We must avoid to create exon because there is an overlap. - - my $redefine_left=undef; - my $redefine_right=undef; - #first location => check left - if($location_cpt == 1){ - if($new_location->[1] < $exon_location->[1]){ $redefine_left = $new_location->[1];} # Modify only if it's more left - } - #=> check left and right - if($location_cpt != 1 and $location_cpt != @$location){ - if($new_location->[1] < $exon_location->[1]){ $redefine_left = $new_location->[1];} # Modify only if it's more left - if($new_location->[2] > $exon_location->[2]){ $redefine_right = $new_location->[2];} # Modify only if it's more right - } - #last location => check right - if($location_cpt == @$location){ - if($new_location->[2] > $exon_location->[2]){ $redefine_right = $new_location->[2];} # Modify only if it's more right - } - - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{'exon'}{$id_l2} } ){ - if($l3_feature->_tag_value('ID') eq $exon_location->[0][0]){ - - if($redefine_left){ - $l3_feature->start($new_location->[1]); - }else{$redefine_left = $exon_location->[1];} - - if($redefine_right){ - $l3_feature->end($new_location->[2]); - }else{$redefine_right = $exon_location->[2];} - if($redefine_left or $redefine_right){$resume_case2++;} - print "We modify the location of the existing exon !! ".$exon_location->[0][0]." ".$exon_location->[1]." ".$exon_location->[2]." to ".$redefine_left." ".$redefine_right."\n" if ($verbose >= 2); - last; - } - } - } - elsif($overlap){ #location not modified but no overlap, so it means the exon is not defined ! <= ?? Not sure this comment is good 27th Nov 2018 - $create_exon=undef; - } - } - - if($create_exon){ - push @{$createIT{'exon'}}, $location; - } - } - } - } - else{print "No other feature to check the exon locations (e.g CDS, UTR, etc...). We can trust them then.\n" if ($verbose >= 3);} - } - else{ $createIT{'exon'}=$list_location_NoExon;} # no exon exists, we have to create all of them - - # NOW CREATE EXON IF NECESSARY - if(keys %createIT){ - foreach my $tag (keys %createIT){ - foreach my $location (@{$createIT{$tag}}){ - $resume_case++; - my $feature_exon = clone($feature_example);#create a copy of a random feature l3; - $feature_exon->start($location->[1]); - $feature_exon->end($location->[2]); - $feature_exon->frame("."); - $feature_exon->primary_tag($tag); - my $uID = _check_uniq_id($hash_omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature_exon); - create_or_replace_tag($feature_exon, 'ID', $uID); # remove parent ID because, none. - #save new feature L2 - print "_check_exons Create one Exon : ".$feature_exon->gff_string."\n" if ($verbose >= 2); - push (@{$hash_omniscient->{"level3"}{$tag}{$id_l2}}, $feature_exon); - } - } - } - - #Check extremities of exons (If exon is shorter we adapt it to the mRNA size, else we adapt the L2 size to the exon size) - my $id_l1 = lc($mRNAGeneLink->{lc($id_l2)}); - my $getout=undef; - foreach my $tag_l2 ( %{$hash_omniscient->{'level2'}} ){ - if( exists_keys($hash_omniscient,('level2', $tag_l2, $id_l1)) ){ - foreach my $l2_feature ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}} ){ - if( lc($l2_feature->_tag_value('ID')) eq $id_l2 ){ - if( exists_keys ($hash_omniscient, ('level3', 'exon', $id_l2))) { # If no exon it could be a case whre no L3 feature need an exon like non_canonical_three_prime_splice_site (they are out of exon). So the list of exon does not exist. - my $myLeftExtremity=$l2_feature->start(); - my $myRightExtremity=$l2_feature->end(); - - my @list_exon = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$id_l2}}; - - if( int($list_exon[0]->start) > int($myLeftExtremity) ){ - print "_check_exons We modified the exon LEFT extremity from $id_l2! ".$list_exon[0]->start." ".$myLeftExtremity."\n" if($verbose >= 1);; - $list_exon[0]->start($myLeftExtremity); - } - if($list_exon[0]->start < $myLeftExtremity){ #modify L2 - $l2_feature->start($list_exon[0]->start); - print "_check_exons We modified the L2 LEFT extremity !\n" if($verbose >= 1); - } - - if($list_exon[$#list_exon]->end < $myRightExtremity){ - print "_check_exons We modified the exon RIGHT extremity from $id_l2!".$list_exon[$#list_exon]->end." to ".$myRightExtremity."\n" if($verbose >= 1); - $list_exon[$#list_exon]->end($myRightExtremity); - } - elsif($list_exon[$#list_exon]->end > $myRightExtremity){ #modify L2 - $l2_feature->end($list_exon[$#list_exon]->end); - print "_check_exons We modified the L2 RIGHT extremity !\n" if($verbose >= 1); - } - - $getout=1; - last; - } - } - } - if($getout){ - last; - } - } - } - - #keep track of l2 checked (as we loop over L3, we meet several time the same l2) - $checked{$id_l2}++; - } - } - } - } - print "We create $resume_case exons that were missing\n" if($verbose >= 1 and $resume_case); - print "We modified $resume_case2 exons positions that were wrong\n" if($verbose >= 1 and $resume_case2); - print "We have removed $resume_case3 existing exons which were supernumerary\n" if($verbose >= 1 and $resume_case3); -} - -# @Purpose: Check L3 features. If UTRS are missing we create them. -# @input: 3 => hash(omniscient hash), hash(miscCount hash), hash(uniqID hash) -# @output: none -sub _check_utrs{ - my ($hash_omniscient, $mRNAGeneLink, $miscCount, $uniqID, $uniqIDtoType, $verbose)=@_; - my $resume_case=undef;my $resume_case2=undef; - - my %checked; - foreach my $tag_l3 (sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ - if ($tag_l3 ne "exon"){ - foreach my $id_l2 (sort { (($a =~ /(\d+)$/)[0] || 0) <=> (($b =~ /(\d+)$/)[0] || 0) } keys %{$hash_omniscient->{'level3'}{$tag_l3}}){ - - if( ! exists_keys(\%checked,($id_l2)) ){ #l2 already checked - - my $feature_example=undef; # will be used to create the exon features - my $list_location_Exon=undef; - my $list_location_CDS=undef; - my $list_location_UTR=undef; - -# +----------------------------------------------------- -# | Go through l3 and save info needed | -# +----------------------------------------------------- - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - - # LIST CDS LOCATIONS - if ($tag_l3 eq "cds"){ - if( exists_keys($hash_omniscient,('level3',$tag_l3, $id_l2)) ){ - - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - - my $locationRefList=[[[$l3_feature->_tag_value('ID')] ,int($l3_feature->start), int($l3_feature->end)]]; - $list_location_CDS = _manage_location($locationRefList, $list_location_CDS, 'adjacent', $verbose); - } - } - } - - # LIST UTR LOCATIONS - if ($tag_l3 =~ "utr"){ - if( exists_keys($hash_omniscient,('level3',$tag_l3, $id_l2)) ){ - - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - - my $locationRefList=[[[$l3_feature->_tag_value('ID')] ,int($l3_feature->start), int($l3_feature->end)]]; - $list_location_UTR = _manage_location($locationRefList, $list_location_UTR, 'adjacent', $verbose); - } - } - } - - # LIST EXON LOCATIONS - elsif($tag_l3 eq "exon"){ - if( exists_keys($hash_omniscient,('level3',$tag_l3, $id_l2)) ){ - - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - - if(! $feature_example){ - $feature_example=$l3_feature; - } - #print "exonFeature= ".$l3_feature->gff_string."\n"; - push @{$list_location_Exon}, [ [$l3_feature->_tag_value('ID')], int($l3_feature->start), int($l3_feature->end)] ; - } - } - } - } - -# +----------------------------------------------------- -# | HANDLE UTRs | -# +----------------------------------------------------- - - if( exists_keys($hash_omniscient,('level3','cds', $id_l2)) ){ #Check UTR only if CDS exists - - # Create list of UTR expected: - my $list_location_UTR_expected=undef; - my $expected_utr=1; - - foreach my $exon_location (sort {$a->[1] <=> $b->[1] } @{$list_location_Exon}){ - - my $new_location; - my $overlap; - my $never_overlap="yes"; - foreach my $location_cds (sort {$a->[1] <=> $b->[1] } @{$list_location_CDS}){ - - if( $location_cds->[1] > $exon_location->[2]){last;} - if( $location_cds->[2] < $exon_location->[1]){next;} - - ($new_location, $overlap) = _manage_location_lowLevel_inversed($location_cds, $exon_location, $verbose); - - if($overlap eq "perfect"){ $never_overlap=undef; $expected_utr=undef;last;} - - if($new_location->[1] != $exon_location->[1] and $new_location->[2] != $exon_location->[2] ){ #two UTR expected ========================= exon - print "creation utr push1\n" if($verbose >= 3); - push @{$list_location_UTR_expected}, [undef, $exon_location->[1], $location_cds->[1]-1]; # ======= CDS - push @{$list_location_UTR_expected}, [undef, $location_cds->[2]+1, $exon_location->[2]]; - $never_overlap=undef; - last; - } - elsif($new_location->[1] != $exon_location->[1] or $new_location->[2] != $exon_location->[2] ){ #two UTR expected { - #print "creation utr push2\n".Dumper($new_location)."\n" if($verbose >= 3); - push @{$list_location_UTR_expected}, $new_location; - $never_overlap=undef; - last; - } - } - if($never_overlap){ #case of UTR that match++ fully the exon - push @{$list_location_UTR_expected}, $exon_location; - } - } - - #print "list_location_UTR_expected: ".Dumper($list_location_UTR_expected) if ($verbose >= 3); - #print "list_location_UTR: ".Dumper($list_location_UTR) if ($verbose >= 3); - - # Compare UTR Present and UTR expected - my $list_utr_to_create=undef; - - if($list_location_UTR){ #List UTR not empty - if($list_location_UTR_expected){ #List UTR not empty - foreach my $UTRexp_location (sort {$a->[1] <=> $b->[1] } @{$list_location_UTR_expected} ){ - - my $create_utr=1; - my $new_location; - my $overlap; - foreach my $UTR_location (sort {$a->[1] <=> $b->[1] } @{$list_location_UTR}){ - - ($new_location, $overlap) = _manage_location_lowLevel_inversed($UTR_location, $UTRexp_location, $verbose); #just to check that it overlaps - - if($overlap and ( $UTR_location->[1] != $UTRexp_location->[1] or $UTR_location->[2] != $UTRexp_location->[2] ) ){ #It overlaps and at least one location is different. We have to re-modelate the utr location to take the modification into account - print "We modify the location of the existing utr: ".$UTR_location->[0][0]." ".$UTR_location->[1]." ".$UTR_location->[2]." to ".$UTRexp_location->[1]." ".$UTRexp_location->[2]."\n" if ($verbose >= 3); - $resume_case2++; - $create_utr=undef; - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'} } ){ - if($tag_l3 =~"utr"){ - if( exists_keys($hash_omniscient,('level3', $tag_l3, $id_l2)) ){ - foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2} } ){ - if($l3_feature->_tag_value('ID') eq $UTR_location->[0][0] and $l3_feature->start eq $UTR_location->[1] and $l3_feature->end eq $UTR_location->[2]){ # we have to check position to be sure we modify the correct one, because UTR could share the same ID - print "UTR location modified: = ".$l3_feature->gff_string."\nnew location:".$UTRexp_location->[1]." ".$UTRexp_location->[2]."\n" if ($verbose >= 3); - $l3_feature->start($UTRexp_location->[1]); - $l3_feature->end($UTRexp_location->[2]); - last; - } - } - } - } - } - } - elsif($overlap and $overlap eq "perfect"){ #An UTR that match perfectly already exists ! - $create_utr=undef; - } - } - - if($create_utr){ - push @{$list_utr_to_create}, $new_location; - } - } - } - else{print "UTR check step. How is it possible ? We have an UTR in the file but none is expected according to the described exons.\nLevel2 studied:".$id_l2."\n";exit;} - } - else{ - if($list_location_UTR_expected){ - $list_utr_to_create=$list_location_UTR_expected;# no UTR exists, we have to create all of them - } - } - #print "list_utr_to_create: ".Dumper($list_utr_to_create) if ($verbose >= 3); - - # NOW CREATE UTR IF NECESSARY - my @cds_sorted = sort {$a->[1] <=> $b->[1]} @{$list_location_CDS}; - - my $extremLeftCDS = $cds_sorted[0]->[1]; - my $extremRightCDS = $cds_sorted[$#cds_sorted]->[2]; - - if($list_utr_to_create){ - - foreach my $location (@{$list_utr_to_create}){ - $resume_case++; - print "_check_utrs Create one UTR !\n" if ($verbose >= 2); - - my $feature_utr = clone($feature_example);#create a copy of a random feature l3; - $feature_utr->start($location->[1]); - $feature_utr->end($location->[2]); - $feature_utr->frame("."); - - #HANDLE primary tag - my $primary_tag = "UTR"; - if($location->[2] < $extremLeftCDS){ - if($feature_utr->strand == 1){ - $primary_tag = "five_prime_UTR"; - } - else{ - $primary_tag = "three_prime_UTR"; - } - } - elsif($location->[1] > $extremRightCDS){ - if($feature_utr->strand == 1){ - $primary_tag = "three_prime_UTR"; - } - else{ - $primary_tag = "five_prime_UTR"; - } - } - - $feature_utr->primary_tag($primary_tag); - - - my $uID = _check_uniq_id($hash_omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature_utr); - create_or_replace_tag($feature_utr, 'ID', $uID); # remove parent ID because, none. - #save new feature L2 - push (@{$hash_omniscient->{"level3"}{lc($primary_tag)}{$id_l2}}, $feature_utr); - } - } - } - - #keep track of l2 checked (as we loop over L3, we meet several time the same l2) - $checked{$id_l2}++; - } - } - } - } - print "We created $resume_case UTRs that were missing\n" if($verbose >= 1 and $resume_case); - print "We modified $resume_case2 UTRs positions that were wrong\n" if($verbose >= 1 and $resume_case2); -} - -# @Purpose: Will merge a list of "location" (tuple of integer), and another list of location. If two location overlap or are adjacent, only one location will be kept that represent the most extrem values -# @input: 3 => list of 3 values([[S,X,Y][S,Z,W]] or [[[S],X,Y]]), list of integer tuple, verbose option for debug -# @output: list of list -sub _manage_location{ - my ($locationRefList, $locationTargetList, $method, $verbose) = @_; - - my @new_location_list; #new location list that will be returned once filled - - _printSurrounded("Enter",25,"+","\n\n") if ($verbose >= 4); - print "Enter Ref: ".Dumper($locationRefList)."\nEnter Target: ".Dumper($locationTargetList) if ($verbose >= 4); - - if ($locationTargetList and @$locationTargetList >= 1){ #check number of location -> List not empty - - my @locations = (@{$locationRefList},@{$locationTargetList}); - my @locations_sorted = sort {$a->[1] <=> $b->[1]} @locations; - - my $location_modified = undef; - my $overlap = undef; - my $location1=undef; - my $location2 = undef; - - foreach my $i (0 .. $#locations_sorted-1){ - - if($location_modified){ - $location1 = $location_modified; - $location_modified = undef; - } - else{ - $location1 = $locations_sorted[$i]; - } - $location2 = $locations_sorted[$i+1]; - - if ($location2->[1] > $location1->[2]+1 and $i+1 != $#locations_sorted){ #locations do not overlap and not before last of the list - push @new_location_list, [@$location1]; - } - else{ - my $location_back = undef; - if($method eq 'adjacent'){ - ($location_back, $overlap) = _manage_location_lowLevel_adjacent($location1, $location2); - if($overlap){$location_modified=$location_back;} - } - else{ - ($location_back, $overlap) = _manage_location_lowLevel_overlap($location1, $location2); - if($overlap){$location_modified=$location_back;} - } - } - } - #if last round was not overlaping we should add the last value in the list - if( $overlap ){ - push @new_location_list, [@$location_modified]; - } - else{ - push @new_location_list, [@$location1]; - push @new_location_list, [@$location2]; - } - } - else{#check number of location -> none - _printSurrounded("Return",25,"-","\n\n") if ($verbose >= 4); - if($verbose >= 4){print "returnA: ".Dumper($locationRefList)."\n\n\n";} - return \@{$locationRefList}; - } - _printSurrounded("Return",25,"-","\n\n") if ($verbose >= 4); - if($verbose >= 4){print "returnB: ".Dumper(\@new_location_list)."\n\n\n";} - return \@new_location_list; -} - -# ===================== location1 -# ===================== location2 -# ========================= <= New location2 returned -# @Purpose: Modify the location2 if it overlap the location1 by keeping the extrem values. Return the location2 intact if no overlap. /!\ The locations are merged if they are contigu -# @input: 2 => integer tuple [[ID],X,Y], list of integer tuple -# @output: 2 => ref of a list of 2 element, boolean -sub _manage_location_lowLevel_adjacent{ - my ($location, $location2) = @_; - - my $new_location = [@{$location2}]; - my $overlap=undef; - - if ( ($location2->[1] <= $location->[2]+1) and ($location2->[2]+1 >= $location->[1]) ){ #it overlaps or are consecutive - - #Manage Id to avoid same IDs - my %params = map { $_ => 1 } @{$new_location->[0]}; - foreach my $id ( @{$location->[0]}){ - if(! exists($params{$id})){ - push @{$new_location->[0]}, $id ; #append from the end the list of ID - } - } - $overlap=1; - - if($location2->[1] > $location->[1]){ - $new_location->[1]=$location->[1]; - } - if($location->[2] > $location2->[2]){ - $new_location->[2]=$location->[2]; - } - } - return $new_location, $overlap; -} - -# ===================== location1 -# ===================== location2 -# ========================= <= New location2 returned -# @Purpose: Modify the location2 if it overlap the location1 by keeping the extrem values. Return the location2 intact if no overlap. /!\ We append the ID list by the end (as push) when there is an overlap -# @input: 2 => integer tuple [[ID],X,Y], list of integer tuple -# @output: 2 => ref of a list of 2 element, boolean -sub _manage_location_lowLevel_overlap{ - my ($location, $location2) = @_; - - my $new_location = [@{$location2}]; - my $overlap=undef; - - if ( ($location2->[1] <= $location->[2]) and ($location2->[2] >= $location->[1]) ){ #it overlaps or are consecutive - - #Manage Id to avoid same IDs - my %params = map { $_ => 1 } @{$new_location->[0]}; - foreach my $id ( @{$location->[0]}){ - if(! exists($params{$id})){ - push @{$new_location->[0]}, $id ; #append from the end the list of ID - } - } - - $overlap=1; - - if($location2->[1] > $location->[1]){ - $new_location->[1]=$location->[1]; - } - if($location->[2] > $location2->[2]){ - $new_location->[2]=$location->[2]; - } - } - - return $new_location, $overlap; -} - - -# ================= location1 (cds) -# ===================== location2 (exon) -# ======== <= New location2 returned -sub _manage_location_lowLevel_inversed{ - my ($location, $location2, $verbose) = @_; - - print "_manage_location_lowLevel_inversed\n" if($verbose >= 3); - - my $new_location = [@{$location2}]; - my $overlap=undef; - - if ( ($location2->[1] == $location->[1]) and ($location2->[2] == $location->[2]) ){ #it overlaps perfectly - return $new_location, "perfect"; - } - - if ( ($location2->[1] <= $location->[2]) and ($location2->[2] >= $location->[1]) ){ #it overlaps - - $overlap=1; - - if($location2->[1] < $location->[1]){ - $new_location->[2] = $location->[1]-1; - print "case1\n" if($verbose >= 3); - } - if($location->[2] < $location2->[2]){ - $new_location->[1] = $location->[2]+1; - print "case2\n" if($verbose >= 3); - } - } - return $new_location, $overlap; -} - -#============================================================================================================ -#Explanation: Case where part of the locus BBBBBB has been seen before to meet the its Parent feature (see below) = a parent feature ID has been created on the fly during the parsing. -# We now need to remove the wrong Parent ID and link them to the correct one. -#seq1 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA -#seq1 maker UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -#seq1 maker UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -#seq1 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -#... -#seq1 maker gene 337818 343277 . + . ID=CLUHARG00000005458;locus_tag=BBBBBB -# -# HIS<=>Hash InfoSequential -# -# => Improve something ?????? -sub _cleanSequentialIncase{ - my ($infoSequential, $locusTAGuniq, $verbose) = @_; - my $resume_case=undef; - - foreach my $locusNameHIS (keys %{$infoSequential} ){ - - if(exists_keys($locusTAGuniq,('level1', $locusNameHIS))){ - - my $locusNameUniq = $locusTAGuniq->{'level1'}{$locusNameHIS}; - if($locusNameHIS ne $locusNameUniq ){ - $resume_case++; - - # The locusNameUniq already exists, we have to fill it with the part of inforamtion missing that is contained in$infoSequential->{$locusNameHIS} - if(exists_keys ($infoSequential,($locusNameUniq) ) ){ - - foreach my $bucket (keys %{$infoSequential->{$locusNameHIS}} ){ - if ($bucket eq 'level1'){next;} - - my $prefix= lc(PREFIXL2); #when a l2 start with this prefix it means we created the l2 on the fly (the real l2 if exists, had not been met yet) - if($bucket =~ /^$prefix/i){ - my $idok=undef; - foreach my $feature ( @{$locusTAGuniq->{'level2'}{ $locusNameUniq}}){ - if(lc($feature->_tag_value('ID')) !~ /^$prefix/i){ - $idok = lc( $feature->_tag_value('ID') ); # @{$locusTAGuniq->{'level2'}{ $locusNameUniq }}[$cpt] is the first l2 feature that has been realy met - last; # We make the assumption that the pieces of the locus that were lost before to describe its real l2 is part of the first real l2 met. - # ==================================================================================================================================== - } - } - - if(exists_keys ($infoSequential,($locusNameUniq, $idok) ) ){ - - foreach my $level (keys %{$infoSequential->{$locusNameHIS}{$bucket}} ){ - push @{$infoSequential->{$locusNameUniq}{$idok}{$level}}, @{$infoSequential->{$locusNameHIS}{$bucket}{$level}}; - } - delete $infoSequential->{$locusNameHIS}{$bucket}; - if(! %{$infoSequential->{$locusNameHIS}}){delete $infoSequential->{$locusNameHIS};} # remove because nothing linked to it anymore - } - else{ - $infoSequential->{$locusNameUniq}{$idok} = delete $infoSequential->{$locusNameHIS}{$bucket}; #delete the lod key but transfer the data to a new key - } - } - } - } - else{ # The locusNameUniq didn't exists, we can directly shift the old locusNameHIS by the locusNameUniq - $infoSequential->{$locusNameUniq} = delete $infoSequential->{$locusNameHIS}; # link to the first l2 #delete the lod key but transfer the data to a new key - } - } - } - } - print "We found $resume_case cases where part of the locus data where defined earlier in the file.\n" if($verbose >= 1 and $resume_case); -} - -# -# -# All Level1 feature are for sure in omniscient, we have only the ID in sequential, other level feature are in sequential only if no parent has been found -sub _check_sequential{ # Goes through from L3 to l1 - my ($infoSequential, $omniscient, $miscCount, $uniqID, $uniqIDtoType, $locusTAGuniq, $mRNAGeneLink, $verbose) = @_; - my $resume_case=undef; - - _cleanSequentialIncase($infoSequential, $locusTAGuniq, $verbose); # PART OF LOCUS LOST BEFORE TO MEET IT L2 or L1 ... we catch them and re-link everythong as it should be - - foreach my $locusNameHIS ( sort { ncmp($a,$b) } keys %{$infoSequential} ){ #comon tag was l1 id when no real comon tag present - - foreach my $bucket (sort { ncmp($a,$b) } keys %{$infoSequential->{$locusNameHIS} } ){ #bucket = level1 or Id L2 - print "\nlocusNameHIS $locusNameHIS bucket $bucket\n\n" if ($verbose >= 3); - - if ($bucket eq 'level1'){next;} #skip case level1 - structure of the hash different - - my $must_create_l2=undef; - my $feature_l2 = undef; - - #Bucket is an uniq ID created during the reading process. So it can be used as uniq ID. - if(! exists_keys($infoSequential,($locusNameHIS, $bucket, 'level3') ) ){ - - # Link the l2 to the L1 feature - $feature_l2=$infoSequential->{$locusNameHIS}{$bucket}{'level2'}; - print "level2 in sequential doenst have L3 feature associated in sequential - $locusNameHIS $bucket! ".$feature_l2->gff_string."\n" if ($verbose >= 3); - - if(! exists($mRNAGeneLink->{lc($bucket)}) ){ - # We add it to omniscient and to mRNAGeneLink - print "level2 does not exits in mRNAGeneLink(omniscient) !".$feature_l2->gff_string."\n" if ($verbose >= 3); - push (@{$omniscient->{"level2"}{lc($feature_l2->primary_tag)}{lc($feature_l2->_tag_value('Parent'))} }, $feature_l2); - $mRNAGeneLink->{lc($feature_l2->_tag_value('ID'))} = $feature_l2->_tag_value('Parent'); - } - if( ! exists_keys($omniscient,('level3', "exon", lc($feature_l2->_tag_value("ID")))) ){ #check if an exon exist in the omniscient - if ($createL3forL2orphan){ # create the exon missing if option agreed - print "create single level3 exon feature !\n" if($verbose >= 2); - my $feature_l3 = clone($feature_l2);#create a copy of the l2 feature; - $feature_l3->primary_tag('exon'); - create_or_replace_tag($feature_l3,'Parent', $feature_l3->_tag_value('ID')); # change parentID - $feature_l3->remove_tag('ID'); - # create ID - my $id = _create_ID($miscCount, $uniqID, $uniqIDtoType, 'exon', undef, 'nbis_NEW'); - create_or_replace_tag($feature_l3,'ID', $id); # change ID - #my $id = _check_uniq_id($omniscient, $miscCount, $uniqID, $uniqIDtoType, $feature_l3); - push (@{$omniscient->{"level3"}{lc($feature_l3->primary_tag)}{lc($feature_l3->_tag_value('Parent'))} }, $feature_l3); - } - } - #warn "Not normal, we have feature L2 without L3 feature associated.\n"; #We cannot guess the structure except if it is prokaryote or single exon in eucaryote... should we improve that ? - } - - else{ - foreach my $feature_L3 (@{$infoSequential->{$locusNameHIS}{$bucket}{'level3'}} ){ - - if(! exists_keys($infoSequential,($locusNameHIS, $bucket,'level2')) ){ - print "_check_sequential level2 does not exits in sequential !\n" if($verbose >= 2); - my $common_tag = _get_comon_tag_value($feature_L3, $locusTAGuniq, 'level1'); # check presence of common_tag, maybe we will play a different game - - #take L2 from omniscient if already exits - if(exists($mRNAGeneLink->{lc($bucket)}) ){ - - my $l1_id = $mRNAGeneLink->{lc($bucket)}; - foreach my $tag_l2 (keys %{$omniscient->{'level2'}}){ - if(exists_keys($omniscient, ('level2', $tag_l2, lc($l1_id) ) ) ){ - foreach my $featureL2 (@{$omniscient->{'level2'}{$tag_l2}{lc($l1_id)}}){ - if(lc($featureL2->_tag_value('ID')) eq $bucket){ - print "_check_sequential level2 exits in omniscient !\n" if($verbose >= 2); - $feature_l2 = $featureL2; - last; - } - } - if($feature_l2){last;} - } - } - $infoSequential->{$locusNameHIS}{$bucket}{'level2'} = $feature_l2; - } - #If locus_tag check from omniscient if feature has same locus tag - elsif ( $common_tag and ( exists_keys($locusTAGuniq,('topfeature', $common_tag) ) ) and (! exists_keys($locusTAGuniq,('topfeature', $common_tag,'level1') ) ) ) { - my $id_level2 = undef; - if ( exists_keys($locusTAGuniq,('topfeature', $common_tag, 'level2') ) ){ - $id_level2 = $locusTAGuniq->{'topfeature'}{$common_tag}{'level2'}[0]; - } - elsif ( exists_keys($locusTAGuniq,('topfeature', $common_tag, 'level3') ) ){ - $id_level2 = $locusTAGuniq->{'topfeature'}{$common_tag}{'level3'}[1]; - } - print "FeatureA has the common tag value shared with a featureX from omniscient. We use same parent ID as featureX, and inject FeatureA in omniscient: $common_tag\n" if ($verbose > 2); - create_or_replace_tag($feature_L3, 'Parent', $id_level2); - push (@{$omniscient->{"level3"} {lc($feature_L3->primary_tag)} {$id_level2} }, $feature_L3); - next; - } - else{#create l2 - print "create level2 !\n" if($verbose >= 2); - $must_create_l2=1; - $feature_l2 = clone($infoSequential->{$locusNameHIS}{$bucket}{'level3'}[0]);#create a copy of the first mRNA feature; - - #manage primary tag - my $primary_tag_l2='RNA'; - foreach my $feature_L3 (@{$infoSequential->{$locusNameHIS}{$bucket}{'level3'}} ){ - - if ( lc($feature_L3->primary_tag) eq 'cds'){ - $primary_tag_l2 ='mRNA'; - last; - } - } - $feature_l2->primary_tag($primary_tag_l2); - - #Manage ID - create_or_replace_tag($feature_l2,'ID', $bucket); #modify ID to replace by parent value - print "level2 ID created: $bucket !\n" if($verbose >= 2); - #Manage Parent - my $parentID = undef; - if( exists_keys($infoSequential,($locusNameHIS,'level1')) ){ # parent ID exists in infoSequential - $parentID = lc($infoSequential->{$locusNameHIS}{'level1'}); # PArentID it correct case ??? - print "_check_sequential Parent IDtaken from infoSequential\n" if ($verbose >= 3); - } - else{ - my $IDgoodCast = _id_exists_in_l1_omniscient($omniscient, $locusNameHIS); - if($IDgoodCast){ - $parentID = $IDgoodCast; - print "_check_sequential Parent IDtaken from omniscient\n" if ($verbose >= 3); - } - - if( ! $parentID ){ #In that case level1 feature doesn't exists in $infoSequential and in $omniscient. I will be created by the method check_gene_link_to_mrna - #my ($miscCount, $uniqID, $primary_tag, $id, $prefix)=@_; - $parentID = _create_ID($miscCount, $uniqID, $uniqIDtoType, 'gene', undef, 'nbis_NEW'); - print "_check_sequential Parent IDtaken created\n" if ($verbose >= 3); - $infoSequential->{$locusNameHIS}{'level1'}=$parentID; - } - } - print "_check_sequential Parent ID created for level2 = $parentID\n" if ($verbose >= 2); - create_or_replace_tag($feature_l2,'Parent', $parentID ); # change parentID - - print "push-omniscient: level2 || ".lc($primary_tag_l2)." || ".lc($parentID)." == ".$feature_l2->gff_string."\n" if ($verbose >= 2); - push (@{$omniscient->{"level2"}{lc($primary_tag_l2)}{lc($parentID)}}, $feature_l2); - $mRNAGeneLink->{lc($bucket)} = $parentID; # Always need to keep track about l2->l1, else the method check_l3_link_to_l2 will recreate a l1 thinking this relationship is not fill - $infoSequential->{$locusNameHIS}{$bucket}{'level2'} = $feature_l2; - } - } - else{ - - #MUST push L2 in omniscient if absent ! - $feature_l2=$infoSequential->{$locusNameHIS}{$bucket}{'level2'}; - print "level2 exits in sequential - $locusNameHIS $bucket! ".$feature_l2->gff_string."\n" if ($verbose >= 3); - - if(! exists($mRNAGeneLink->{$bucket}) ){ - print "level2 does not exits in mRNAGeneLink(omniscient) !".$feature_l2->gff_string."\n" if ($verbose >= 3); - push (@{$omniscient->{"level2"}{lc($feature_l2->primary_tag)}{lc($feature_l2->_tag_value('Parent'))} }, $feature_l2); - $mRNAGeneLink->{lc($feature_l2->_tag_value('ID'))} = $feature_l2->_tag_value('Parent'); - } - } - - my $primary_tag_L3 = lc($feature_L3->primary_tag); - create_or_replace_tag($feature_L3,'Parent', $feature_l2->_tag_value('ID')); #modify ID to replace by parent value - - print "push-omniscient: level3 || ".$primary_tag_L3." || ".$bucket." == ".$feature_L3->gff_string."\n" if ($verbose >= 2); - push (@{$omniscient->{"level3"}{$primary_tag_L3}{$bucket}}, $feature_L3); - } - $resume_case++; - } - - if($must_create_l2){ - check_level2_positions($omniscient, $feature_l2); - } - } - #LEVEL 1 IS taking care later - } - print "We found $resume_case sequential cases\n" if($verbose >= 1 and $resume_case); -} - -#print return ID if exists original cast -sub _id_exists_in_l1_omniscient{ - my ($omniscient, $id)=@_; - - my $id_good_cast=undef; - foreach my $tag_l1 (keys %{$omniscient->{'level1'}} ){ - if(exists_keys($omniscient, ('level1',$tag_l1, $id))){ - $id_good_cast = $omniscient->{'level1'}{$tag_l1}{$id}->_tag_value('ID'); - return $id_good_cast; - } - } - return $id_good_cast; -} - - -# Check the start and end of level1 feature based on all features level2; -sub _check_all_level1_positions { - my ($hash_omniscient, $verbose)=@_; - my $resume_case=undef; - - foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_l1 ( keys %{$hash_omniscient->{'level1'}{$tag_l1}} ) { #sort by position - - my $level1_feature = $hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - - $resume_case++ if(check_level1_positions($hash_omniscient, $level1_feature, $verbose)); - } - } - print "We fixed $resume_case wrong level1 position cases\n" if($verbose >= 1 and $resume_case); -} - -# Purpose: review all the feature L2 to adjust their start and stop according to the extrem start and stop from L3 sub features. -sub _check_all_level2_positions{ - my ($hash_omniscient, $verbose)=@_; - my $resume_case=undef; - - foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_l1 ( keys %{$hash_omniscient->{'level1'}{$tag_l1}} ) { #sort by position - - foreach my $tag_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys ($hash_omniscient, ('level2', $tag_level2, $id_l1) ) ){ - - foreach my $mRNA_feature ( @{$hash_omniscient->{'level2'}{$tag_level2}{$id_l1}}){ - my $level2_ID = lc($mRNA_feature->_tag_value('ID')); - my @feature_list=(); - foreach my $primary_tag_l3 ( keys %{$hash_omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - - if ( exists_keys( $hash_omniscient, ('level3', $primary_tag_l3, $level2_ID) ) ){ - push @feature_list, @{$hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}; - } - } - if(scalar(@feature_list) > 0){ #could be emtpy like in match match_part features - $resume_case++ if(check_mrna_positions($mRNA_feature, \@feature_list, $verbose)); - } - } - } - } - } - } - - print "We fixed $resume_case wrong level2 position cases\n" if($verbose >= 1 and $resume_case); -} - -# Check the start and end of mRNA based a list of feature like list of exon; -sub check_mrna_positions{ - my ($mRNA_feature, $exon_list, $verbose)=@_; - if(! $verbose){$verbose=0;} - my $result=undef; - - my @exon_list_sorted = sort {$a->start <=> $b->start} @{$exon_list}; - my $exonStart=$exon_list_sorted[0]->start; - - @exon_list_sorted = sort {$a->end <=> $b->end} @exon_list_sorted; - my $exonEnd=$exon_list_sorted[$#exon_list_sorted]->end; - - #check start - if ($mRNA_feature->start != $exonStart){ - print "We modified the L2 LEFT extremity for the sanity the biological data!\n" if($verbose >= 3 ); - $mRNA_feature->start($exonStart); - $result=1; - } - #check stop - if($mRNA_feature->end != $exonEnd){ - print "We modified the L2 RIGHT extremity for the sanity the biological data!\n" if($verbose >= 3); - $mRNA_feature->end($exonEnd); - $result=1; - } - - return $result; -} - -# L1: LocusID->level->typeFeature->ID->[ID,start,end] -# LocusID->level->typeFeature->Parent->[ID,start,end] -# @Purpose: When two feature overlap at level3, and are the same type level 2 they have to be merged under the same level 1 feature. -# @input: 2 => hash, integer for verbosity -# @output: 0 -sub _merge_overlap_features{ - my ($omniscient, $mRNAGeneLink, $verbose) = @_; - my $resume_case=undef; - - my $sortBySeq = _gather_and_sort_l1_by_seq_id_and_strand($omniscient); - - foreach my $locusID ( keys %{$sortBySeq}){ # tag_l1 = gene or repeat etc... - foreach my $tag_l1 ( keys %{$sortBySeq->{$locusID}} ) { - - #create list to keep track of l1 - my %to_check; - foreach my $feature_l1 ( @{$sortBySeq->{$locusID}{$tag_l1}} ) { - my $id_l1 = lc($feature_l1->_tag_value('ID')); - $to_check{$id_l1}++; - } - - # Go through location from left to right ### - while ( @{$sortBySeq->{$locusID}{$tag_l1}} ){ - - my $feature_l1 = shift @{$sortBySeq->{$locusID}{$tag_l1}}; - my $id_l1 = lc($feature_l1->_tag_value('ID')); - my @location = ($id_l1, int($feature_l1->start()), int($feature_l1->end())); # This location will be updated on the fly - - # Go through location from left to right ### !! - foreach my $l1_feature2 ( @{$sortBySeq->{$locusID}{$tag_l1}} ) { - my $id2_l1 = lc($l1_feature2->_tag_value('ID')); - my @location_to_check = ($id2_l1, int($l1_feature2->start()), int($l1_feature2->end())); - - #If location_to_check start if over the end of the reference location, we stop - if($location_to_check[1] > $location[2]) {last;} - - my ($notneeded, $overlap) = location_overlap_update(\@location, \@location_to_check); # location is updated on the fly, and the newly modified location is the one that will be used at the next loop - - # Let's check at Gene LEVEL - if($overlap){ - - #let's check at CDS level - if(check_gene_overlap_at_CDSthenEXON($omniscient, $omniscient , $id_l1, $id2_l1)){ #If contains CDS it has to overlap at CDS level to be merged, otherwise any type of feature level3 overlaping is sufficient to decide to merge the level1 together - #they overlap in the CDS we should give them the same name - $resume_case++; - - print "$id_l1 and $id2_l1 same locus. We merge them together. Below the corresponding feature groups in their whole.\n" if ($verbose >= 3); - print_omniscient_from_level1_id_list($omniscient, [$id_l1,$id2_l1], $fh_error ) if ($verbose >= 3); - # remove the level1 of the ovelaping one - delete $omniscient->{'level1'}{$tag_l1}{$id2_l1}; - # remove the level2 to level1 link stored into the mRNAGeneLink hash. The new links will be added just later after the check to see if we keep the level2 feature or not (we remove it when identical) - foreach my $l2_type (%{$omniscient->{'level2'}}){ - if(exists_keys($omniscient,('level2', $l2_type, $id2_l1))){ - foreach my $feature_l2 (@{$omniscient->{'level2'}{$l2_type}{$id2_l1}}){ - delete $mRNAGeneLink->{lc($feature_l2->_tag_value('ID'))}; - } - } - } - - # Let's change the parent of all the L2 features - foreach my $l2_type (%{$omniscient->{'level2'}} ){ - - if(exists_keys($omniscient,('level2', $l2_type, $id2_l1))){ - ############################### - # REMOVE THE IDENTICAL ISOFORMS - - # first list uniqs - my $list_of_uniqs = keep_only_uniq_from_list2($omniscient, $omniscient->{'level2'}{$l2_type}{$id_l1}, $omniscient->{'level2'}{$l2_type}{$id2_l1}, $verbose); # remove if identical l2 exists - - - #Now manage the rest - foreach my $feature_l2 (@{$list_of_uniqs}){ - - create_or_replace_tag($feature_l2,'Parent', $feature_l1->_tag_value('ID')); #change the parent - # Add the corrected feature to its new L2 bucket - push (@{$omniscient->{'level2'}{$l2_type}{$id_l1}}, $feature_l2); - - # Attach the new parent into the mRNAGeneLink hash - $mRNAGeneLink->{lc($feature_l2->_tag_value('ID'))}=$feature_l2->_tag_value('Parent'); - - } - # remove the old l2 key - delete $omniscient->{'level2'}{$l2_type}{$id2_l1}; - } - } - check_level1_positions($omniscient, $omniscient->{'level1'}{$tag_l1}{$id_l1}, 0); - } - } - } - } - } - } - if($verbose >= 1 and $resume_case){ - print "We fixed $resume_case case where feature has been merged within the same locus\n"; - } - elsif($verbose >= 1){ - print "None found\n" ; - } -} - - -# @Purpose: When too feature l2 isoform are identical, we remove one -# @input: 2 => hash, integer for verbosity -# @output: 0 -sub _check_identical_isoforms{ - my ($omniscient, $mRNAGeneLink, $verbose) = @_; - my $resume_case=undef; - - # Go through oall l2 feature - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ - foreach my $id2_l1 (keys %{$omniscient->{'level2'}{$l2_type}}){ - # If more than 1 related to level1 - - if(exists_keys($omniscient,('level2', $l2_type, $id2_l1)) and scalar @{$omniscient->{'level2'}{$l2_type}{$id2_l1}} > 1){ # more than one l2 feature of that type - - my @L2_list_to_remove; - my %checked; - foreach my $feature2 (sort {$b->_tag_value('ID') cmp $a->_tag_value('ID')} @{$omniscient->{'level2'}{$l2_type}{$id2_l1}}){ - $checked{lc($feature2->_tag_value('ID'))}{lc($feature2->_tag_value('ID'))}++; - - my $keep = 1; - foreach my $feature1 (sort {$b cmp $a} @{$omniscient->{'level2'}{$l2_type}{$id2_l1}}){ - - # If not itself and not already checked (A -> B is the same as B -> A), and A or B already removed and must now be skiped (skipme key) - if( (! exists_keys(\%checked, (lc($feature2->_tag_value('ID')), "skipme"))) and (! exists_keys(\%checked, (lc($feature1->_tag_value('ID')), "skipme"))) and ! exists_keys(\%checked, ( lc($feature2->_tag_value('ID')), lc($feature1->_tag_value('ID')) ) ) ){ # - $checked{lc($feature2->_tag_value('ID'))}{lc($feature1->_tag_value('ID'))}++; - $checked{lc($feature1->_tag_value('ID'))}{lc($feature2->_tag_value('ID'))}++; - - #check their position are identical - if($feature1->start().$feature1->end() eq $feature2->start().$feature2->end()){ - - #Check their subfeature are identicals - if(l2_identical($omniscient, $feature1, $feature2, $verbose )){ - $keep = undef; - last; - } - } - } - } - # We dont keep the l2 feature so we have to remove all related features and itself - if(! $keep){ - $resume_case++; - print "Lets remove isoform ".$feature2->_tag_value('ID')."\n" if ($verbose >= 2); - $checked{lc($feature2->_tag_value('ID'))}{"skipme"}++;# will be removed later do not check anymore this one - - foreach my $tag (keys %{$omniscient->{'level3'}}){ - if(exists_keys($omniscient, ('level3', $tag, lc($feature2->_tag_value('ID'))))){ - delete $omniscient->{'level3'}{$tag}{lc($feature2->_tag_value('ID'))}; - } - } - #Has to be removed once we finished to go through the l2 list - my $ID_to_remove = lc($feature2->_tag_value('ID')); - push(@L2_list_to_remove,$ID_to_remove); - delete $mRNAGeneLink->{$ID_to_remove}; - } - } - - #L2 has to be removed from List - my @newL2List; - foreach my $feature ( @{$omniscient->{'level2'}{$l2_type}{$id2_l1}} ){ - my $keep = 1; - foreach my $id_l2 (@L2_list_to_remove){ - if( lc($feature->_tag_value('ID')) eq lc($id_l2) ){ - $keep = undef; - } - } - if($keep){ - push (@newL2List,$feature) - } - } - @{$omniscient->{'level2'}{$l2_type}{$id2_l1}}=@newL2List; - - } - } - } - print "We removed $resume_case cases where gene where identical.\n" if($verbose >= 1 and $resume_case); -} - -# Sort by locusID and strand -# LocusID_strand->typeFeature = [feature, feature, feature] -# return a hash. Key is position,tag and value is list of feature l1. The list is sorted -sub _gather_and_sort_l1_by_seq_id_and_strand{ - my ($omniscient) = @_; - - my %hash_sortBySeq; - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - foreach my $level1_id (keys %{$omniscient->{'level1'}{$tag_level1}}){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{$level1_id}; - my $position_l1=$level1_feature->seq_id.$level1_feature->strand; - push (@{$hash_sortBySeq{$position_l1}{$tag_level1}}, $level1_feature); - } - foreach my $position_l1 (keys %hash_sortBySeq){ - @{$hash_sortBySeq{$position_l1}{$tag_level1}} = sort { ncmp ($a->start.$a->end.$a->_tag_value('ID'), $b->start.$b->end.$b->_tag_value('ID') ) } @{$hash_sortBySeq{$position_l1}{$tag_level1}}; - } - } - return \%hash_sortBySeq; -} - -# -sub modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop{ - - my ($exon_features, $ORFstart, $ORFend)=@_; - - my @cds_features; - my @utr3_features; - my @utr5_features; - my $strand = $exon_features->[0]->strand; - my @exon_features_sorted = sort {$a->start <=> $b->start} @{$exon_features}; # be sure that exon list is sorted - - my $cds_counter=1; - my $utr3_counter=1; - my $utr5_counter=1; - foreach my $exon_feature (@exon_features_sorted){ - - # exon overlap fully a CDS - if( ($exon_feature->end >= $ORFend) and ($exon_feature->start <= $ORFstart) ){ - - my $cds_feature=clone($exon_feature);#create a copy of the feature exon ==================================== - $cds_feature->start($ORFstart); #modify start cds ============================ - $cds_feature->end($ORFend); #modify end - $cds_feature->primary_tag('CDS'); - #get old name - my $ID = $cds_feature->_tag_value('ID'); - create_or_replace_tag($cds_feature,'ID',$ID.'-cds-'.$cds_counter); #modify name - push(@cds_features, $cds_feature);#save that cds - $cds_counter++; - if($exon_feature->start < $ORFstart){ - my $utr_feature=clone($exon_feature);#create a copy of the feature - $utr_feature->end($ORFstart-1); #modify start - if ( ($strand == -1) or ($strand eq "-") ) { - $utr_feature->primary_tag('three_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr3-'.$utr3_counter); #modify name - push(@utr3_features, $utr_feature);#save that cds - $utr3_counter++; - }else{ - $utr_feature->primary_tag('five_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr5-'.$utr5_counter); #modify name - push(@utr5_features, $utr_feature);#save that cds - $utr5_counter++; - } - } - if($exon_feature->end > $ORFend){ - my $utr_feature=clone($exon_feature);#create a copy of the feature - $utr_feature->start($ORFend+1); #modify start - if ( ($strand == -1) or ($strand eq "-") ) { - $utr_feature->primary_tag('five_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr5-'.$utr5_counter); #modify name - push(@utr5_features, $utr_feature);#save that cds - $utr5_counter++; - }else{ - $utr_feature->primary_tag('three_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr3-'.$utr3_counter); #modify name - push(@utr3_features, $utr_feature);#save that cds - $utr3_counter++; - } - } - } - # cds overlap fully an exon - elsif( ($exon_feature->end <= $ORFend) and ($exon_feature->start >= $ORFstart) ){ - my $cds_feature=clone($exon_feature);#create a copy of the feature exon ======================== - $cds_feature->primary_tag('CDS'); - #get old name cds =============================== - my $ID = $cds_feature->_tag_value('ID'); - create_or_replace_tag($cds_feature,'ID',$ID.'-cds-'.$cds_counter); #modify name - push(@cds_features, $cds_feature);#save that cds - $cds_counter++; - } - # cds overp partially an exon - elsif( ($exon_feature->end >= $ORFstart) and ($exon_feature->start <= $ORFend) ){ #they overlap - - if($exon_feature->start >= $ORFstart){ # cds overlap start of exon exon =============================== - #Manage CDS - my $cds_feature=clone($exon_feature);#create a copy of the feature cds =============================== - $cds_feature->end($ORFend); #modify end - $cds_feature->primary_tag('CDS'); - #get old name - my $ID = $cds_feature->_tag_value('ID'); - create_or_replace_tag($cds_feature,'ID',$ID.'-cds-'.$cds_counter); #modify name - push(@cds_features, $cds_feature);#save that cds - $cds_counter++; - #manage UTR - my $utr_feature=clone($exon_feature);#create a copy of the feature - $utr_feature->start($ORFend+1); #modify end - $ID = $utr_feature->_tag_value('ID'); - if ( ($strand == -1) or ($strand eq "-") ) { - $utr_feature->primary_tag('five_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr5-'.$utr5_counter); #modify name - push(@utr5_features, $utr_feature);#save that cds - $utr5_counter++; - - }else{ - $utr_feature->primary_tag('three_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr3-'.$utr3_counter); #modify name - push(@utr3_features, $utr_feature);#save that cds - $utr3_counter++; - } - - } - else{ #cds overlap start end exon - #Manage CDS - my $cds_feature=clone($exon_feature);#create a copy of the feature - $cds_feature->start($ORFstart); #modify start exon =============================== - $cds_feature->primary_tag('CDS'); - #get old name cds ===================================== - my $ID = $cds_feature->_tag_value('ID'); - create_or_replace_tag($cds_feature,'ID',$ID.'-cds-'.$cds_counter); #modify name - push(@cds_features, $cds_feature);#save that cds - $cds_counter++; - #Manage UTR - my $utr_feature=clone($exon_feature);#create a copy of the feature - $utr_feature->end($ORFstart-1); #modify start - $ID = $utr_feature->_tag_value('ID'); - if ( ($strand == -1) or ($strand eq "-") ) { - $utr_feature->primary_tag('three_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr3-'.$utr3_counter); #modify name - push(@utr3_features, $utr_feature);#save that cds - $utr3_counter++; - }else{ - $utr_feature->primary_tag('five_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr5-'.$utr5_counter); #modify name - push(@utr5_features, $utr_feature);#save that cds - $utr5_counter++; - } - } - }###### Only UTR part - else{ #Does not overlap - if($exon_feature->end < $ORFstart){ #UTR5 in + strand - my $utr_feature=clone($exon_feature);#create a copy of the feature exon =============================== - #get old name cds =============================== - my $ID = $utr_feature->_tag_value('ID'); - if ( ($strand == -1) or ($strand eq "-") ) { - $utr_feature->primary_tag('three_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr3-'.$utr3_counter); #modify name - push(@utr3_features, $utr_feature);#save that cds - $utr3_counter++; - }else{ - $utr_feature->primary_tag('five_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr5-'.$utr5_counter); #modify name - push(@utr5_features, $utr_feature);#save that cds - $utr5_counter++; - } - - } - else{ #UTR3 in + strand - my $utr_feature=clone($exon_feature);#create a copy of the feature exon =============================== - #get old name - my $ID = $utr_feature->_tag_value('ID'); #cds =============================== - if ( ($strand == -1) or ($strand eq "-") ) { - $utr_feature->primary_tag('five_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr5-'.$utr5_counter); #modify name - push(@utr5_features, $utr_feature);#save that cds - $utr5_counter++; - }else{ - $utr_feature->primary_tag('three_prime_UTR'); - create_or_replace_tag($utr_feature,'ID',$ID.'-utr3-'.$utr3_counter); #modify name - push(@utr3_features, $utr_feature);#save that cds - $utr3_counter++; - } - } - } - } -my @utr5_features_sorted=sort {$a->start <=> $b->start} @utr5_features; -my @cds_features_sorted=sort {$a->start <=> $b->start} @cds_features; -my @utr3_features_sorted=sort {$a->start <=> $b->start} @utr3_features; -return \@utr5_features_sorted, \@cds_features_sorted, \@utr3_features_sorted; #really utr5 and utr3 that are return -} - - -# Actually the duplicates have been collected during the parsing process here we just print them. -sub _check_duplicates{ - my ($duplicate, $omniscient, $verbose) = @_ ; - - my $keyExist = keys %{$duplicate}; - if($keyExist){#print result - _printSurrounded("Achthung /\\ Attention /\\ Be carefull => Duplicates removed !\n(Same chr/contig/scaffold, same position, same ID)",75, "#"); - - my $gffout= Bio::Tools::GFF->new( -fh => \*STDOUT ); - my $info = _print_duplicates($duplicate, $omniscient, $gffout, $verbose); - print "$info\n" if($verbose > 0); - } - else{ - print " none found.\n" if($verbose > 0); - } -} - -# print duplicate hash -sub _print_duplicates { - my ($duplicate_omniscient, $hash_omniscient, $gffout, $verbose) = @_ ; - - my $string=""; - foreach my $level (keys %{$duplicate_omniscient}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $primary_tag (keys %{$duplicate_omniscient->{$level}}){ - my $nb_by_pt=0; - my $nb_feat_pt=0; - foreach my $id (keys %{$duplicate_omniscient->{$level}{$primary_tag}}){ - $nb_feat_pt++; - foreach my $feature (@{$duplicate_omniscient->{$level}{$primary_tag}{$id}}){ - $nb_by_pt++; - $gffout->write_feature($feature) if($verbose >= 2); # print feature - } - } - $string .= "There were $nb_feat_pt duplicated $primary_tag feature for a total of $nb_by_pt duplicates."; - } - } - - return $string; -} - - -# allows to add a frame to a string to print -sub _printSurrounded{ - my ($term,$size,$char,$extra) = @_; - - my $frame=$char x ($size+4); - $frame.="\n"; - - my $result = $frame; - - my @lines = split(/\n/,$term); - - foreach my $line (@lines){ - $result .="$char "; - - my $sizeTerm=length($line); - if ($sizeTerm > $size ){ - $result .= substr($line, 0,($size));# - } - else{ - my $nbBlancBefore=int(($size-$sizeTerm) / 2); - my $nbBlancAfter = ($size-$sizeTerm) - $nbBlancBefore; - $result .= " " x $nbBlancBefore; - $result .= $line; - $result .= " " x $nbBlancAfter; - } - $result .= " $char\n"; - } - $result .= "$frame"; - if($extra){$result .= "$extra";} - print $result; -} - -#GFF format guess -# Input: filename -# Output: Integer (1,2 or 3) -sub select_gff_format{ - my ($file) = @_; - - #HANDLE format - my %format; - my $problem3=undef; - my $nbLineChecked=100; #number line to use to check the formnat - my $cpt=0; - - open(my $fh, '<', $file) or die "cannot open file $file"; - { - while(<$fh>){ - - if($_ =~ /^#/){next;} #if it is a commented line starting by # we skip it. - - $cpt++; - if($cpt > $nbLineChecked){ - last; - } - if($_ =~ /^.*\t.*\t.*\t.*\t.*\t.*\t.*\t.*\t(.*)/){ - if(length($1) < 1){next;} - - my $Ninethcolum = $1; - if($Ninethcolum =~ /=/ and $Ninethcolum =~ /;/ ){ $format{3}++;}; - - if($Ninethcolum !~ /=/ and $Ninethcolum !~ /;/ ){ - $format{1}++; - } - elsif($Ninethcolum !~ /=/ and $Ninethcolum =~ /;/ ){ - $format{2}++; - } - my $c = () = $Ninethcolum =~ /=/g; - my $d = () = $Ninethcolum =~ /\ /g; - if($c > 1 and $d > 1 and $Ninethcolum !~ /;/ ){ - $problem3=1; - } - } - } - } - close($fh); - - if($problem3){ - _printSurrounded("There is a problem with your GFF format.\nThis format is wrong: tag=value tag=value.\nYou should have: tag=value;tag=value or tag value ; tag value\nThe best parser (gff1) we can use will keep only the first attribute.",100,"!"); - $format{1}++; - } - - if (%format){ - my $number_of_format = scalar keys %format; - if ($number_of_format > 1){ - print ("There is a problem we found several formats in this file:"); - my $var = join ",", keys %format; - print "$var\n"; - print "Let's see what we can do...\n"; - } - } - else{ - _printSurrounded("Doesn't look like a GFF file\nLet's see what the Bioperl parser can do with that...(using gff3 parser)",100,"!"); - $format{3}++; - } - if($format{3}){return 3;} - if($format{2}){return 2;} - if($format{1}){return 1;} -} - -# We modify the attributes: group=gene_id "e_gw1.5.2.1" protein_id 335805 exonNumber 1 -# in order to get : gene_id=e_gw1.5.2.1;protein_id=335805;exonNumber=1 -sub _gff1_corrector{ - my ($feat, $verbose)=@_; - - if($feat->has_tag('group')){ - - my @attribs = $feat->get_tag_values('group'); - my $attribs = join ' ', @attribs; - my @parsed; - my $flag = 0; # this could be changed to a bit and just be twiddled - - # run through each character one at a time and check it - my $previousChar=undef; - my $string=""; - foreach my $a ( split //, $attribs ) { - $string.=$a; - - # flag up on entering quoted text, down on leaving it - if( $a eq '"') { $flag = ( $flag == 0 ) ? 1:0 ;} #active deactive the flag - - if ($previousChar and $previousChar eq '"' and $flag == 0){ # case we have to strip the " characters - chop $string; - chop $string; - $string = reverse($string); - chop($string); - $string= reverse($string); - push @parsed, $string; - $string=""; - } - elsif( ( $a eq " " and $flag == 0) and !($string =~ /^ *$/) ){ - chop $string; - push @parsed, $string; - $string=""; - } - $previousChar = $a; - } - # ---- Check now last string ---- - # If it was quoted - if ($previousChar and $previousChar eq '"' and $flag == 0){ # case we have to strip the " characters - chop $string; - $string = reverse($string); - chop($string); - $string= reverse($string); - push @parsed, $string; - }# If it not empty or not only space and not quoted - elsif( ($string ne "") and !($string =~ /^ *$/) ){ - if($previousChar eq " "){ - chop $string; - } - push @parsed, $string; - } - - while (@parsed){ - my $value = pop @parsed; - my $tag = pop @parsed; - $feat->add_tag_value($tag, $value); - } - #remove old group attribute - $feat->remove_tag('group'); - } -} - -# @Purpose: Create a hash containing all the name and identifier of an ontology. -# @input: 1 => Object Bio::Ontology -# @output: 1 => hash containing all the name and identifier -sub create_term_and_id_hash{ - my ($self) = @_; - - my %hash_term_id; - - foreach my $term ($self->get_all_terms) { - $hash_term_id{lc($term->name)} = lc($term->identifier); - $hash_term_id{lc($term->identifier)} = lc($term->name); - #print $term->name." <=> ".$term->identifier."\n"; - } - return \%hash_term_id; -} - -#Look for gff3 specific header -#@INPUT: 1 => string (a file) -#@OUPUT: 1 => hash of the different header and their values -sub _check_header{ - my ($file) = @_; - - #HANDLE format - my %headerInfo; - - #check it is a file - if(-f $file){ - open(my $fh, '<', $file) or die "cannot open file $file"; - { - while(<$fh>){ - if($_ !~ /^##[^#]/) { - last; - } - else{ - my @data = split /\s/, $_ ; - my $type = shift @data; - - if($type eq /^##gff-version/){ - $headerInfo{$type}=$data[0]; #1 element - } - if($type eq "##sequence-region"){ - $headerInfo{$type}=@data; # 3 elements - } - if($type eq "##feature-ontology"){ - $headerInfo{$type}=$data[0] #1 element - } - if($type eq "##attribute-ontology"){ - $headerInfo{$type}=$data[0]; #1 element - } - if($type eq "##species"){ - $headerInfo{$type}=$data[0]; #1 element - } - if($type eq "##genome-build"){ - $headerInfo{$type}=@data; #2 elements - } - } - } - } - close($fh); - } - - return \%headerInfo; -} - -# @Purpose: Read a file from URL -# @input: 2 => String URL, String target (Target is not mandatory) -# @output: none -sub fetcher_JD { - my ($url, $target) = @_; - my $ua = LWP::UserAgent->new; - $ua->timeout(10); - $ua->env_proxy; - - my $response = $ua->get($url); - if ($response->is_success) { - if($target){ - open my $OUT, '>', $target or die "File error: $! :: $?"; - print $OUT $response->decoded_content; # or whatever - } - else{ - my $string = $response->decoded_content; - return $string ; - } - } - else { - die $response->status_line; - } -} - -# @Purpose: retrieve the feature_ontology -# @input: 3 => String file, Hash, Int -# @output: 1 => Object Ontology -# @Remark: Do not deal if multiple ontologies (we will use the first one meet) -sub _handle_ontology{ - my ($gff3headerInfo, $verbose) = @_ ; - - my $ontology_obj=undef; - my $internalO=1; - - if(exists_keys($gff3headerInfo, ("##feature-ontology"))){ - - print "feature-ontology URI defined within the file: ".$gff3headerInfo->{'##feature-ontology'}."\n" if $verbose; - #retrieve the data from URI and save it in a string - my $stringFILE=undef; - try{ - $stringFILE = fetcher_JD($gff3headerInfo->{"##feature-ontology"}); - } - catch{ - print "The URI provided (".$gff3headerInfo->{'##feature-ontology'}.") doesn't work.\n" if $verbose; - print "error: $_\n" if ( $verbose >= 1); - }; - - if($stringFILE){ - #create a filehandler from a string - open( my $fh_uriOnto, '<', \$stringFILE) || die "Cannot read the string: $! :: $?"; - - #parse the ontology saved - my $parser = undef; - try{ - $parser = Bio::OntologyIO->new(-format => "obo", - -fh => $fh_uriOnto); - $ontology_obj = $parser->parse(); - close $fh_uriOnto; - } - catch{ - print "The URI provided doesn't point to obo ontology format data.\n" if $verbose; - print "error: $_\n" if ( $verbose >= 1); - $parser = undef; - }; - - if($parser){ #We got ontology at the URI location, no need to use the internal one - $internalO=undef; - print "feature-ontology parsed correctly\n" if $verbose; - } - } - } - - if($internalO){ #No URI provided for the feature-ontology(file case), or doesn't exist (hash / table case) let's use the interal one - - try{ - my $full_path = `perldoc -lm NBIS::GFF3::Omniscient`; - my $index = index($full_path, "NBIS/GFF3/"); - $index+=10; #To not shrinck NBIS/GFF3/ part of the path - my $path_begin = substr $full_path, 0, $index;; - my $correct_path = $path_begin."Ontology/"; - opendir (DIR, $correct_path) or die $!; - my @list_file; - - # list all the sofa file available - while (my $file = readdir(DIR)) { - next if($file eq "." or $file eq ".."); - push(@list_file, $file); - } - - #get the most recent file - my @sorted_list = sort { $a cmp $b } @list_file; - my $recent_file = pop @sorted_list; - my $sofa_file_path = $correct_path."/".$recent_file; - print " We will use the most recent SOFA feature-ontology we have localy: $recent_file\n" if $verbose; - - #parse the ontology - my $parser = Bio::OntologyIO->new(-format => "obo", - -file => $sofa_file_path); - $ontology_obj = $parser->parse(); - if($verbose) { - my $nbroot_terms =0; - foreach my $term ($ontology_obj->get_root_terms) { - $nbroot_terms++; - } - my $nbterms =0; - foreach my $term ($ontology_obj->get_all_terms) { - $nbterms++; - } - my $nbleaf_terms =0; - foreach my $term ($ontology_obj->get_leaf_terms) { - $nbleaf_terms++; - } - print " read ontology $recent_file with ", - "$nbroot_terms root terms, and ", - "$nbterms total terms, and ", - "$nbleaf_terms leaf terms\n"; - } - } - catch{ - print "error: $_\n" if ( $verbose >= 1); - print "Let's continue without feature-ontology information.\n" if( $verbose > 0); - }; - } - return $ontology_obj; -} - -# @Purpose: Handle global warnings to provide momre information to the user according to problems encountered -# @input: 3 => hash, -# @output: 1 => none (because it will just display infromation) -# @Remark: none -sub _handle_globalWARNS{ - my ($globalWARNS, $ontology) = @_; - - if( keys %{$globalWARNS} ){ - if(exists($globalWARNS->{"parser1"}) ) { - my %hash = map { $_, 1 } @{$globalWARNS->{parser1}}; - my @unique = keys %hash; - my $string = "Primary tag values (3rd column) not expected => @unique\n". - "Those primary tag are not yet taken into account by the parser!\n". - "If you wish to use it/them, pleast update the parameter feature json files accordingly (features_level1, features_level2 or features_level3).\n". - "To resume:\n". - "- it must be a level1 feature if it has no parent.\n". - "- it must be a level2 feature if it has a parent and this parent is from level1.\n". - "- it must be a level3 feature if it has a parent and this parent has also a parent.\n\n". - "Currently the tool just ignore them, So if they where Level1,level2, a gene or RNA feature will be created accordingly."; - _printSurrounded($string,150,"-") ; - } - if(exists($globalWARNS->{"ontology1"}) ) { - if( keys %{$ontology} ){ - my %hash = map { $_, 1 } @{$globalWARNS->{ontology1}}; - my @unique = keys %hash; - my $string = "Primary tag values (3rd column) not expected => @unique\n". - "In theory these values are not compatible with gff3 format because they are not part of the Sequence Ontology.\n". - "If you want to follow rigourously the gff3 format, please visit this website:\n". - "https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md\n". - "They provide tools to check the gff3 format.\n". - "Even if you have this warning, you should be able to use the gff3 output in most of gff3 tools."; - _printSurrounded($string,150,"-") ; - } - else{ - my $string = "No feature-ontology was available, we haven't checked that the feature types (3rd column) correspond to the gff3 specifactions."; - _printSurrounded($string,150,"-") ; - } - } - } -} - -# @Purpose: set path to look at the json feature level files (If present locally we take them otherwise look at standard path). If expose option is activated, we copy the json files localy and exit -# @input: 2 => string (path), integer -# @output: 3 => hash, hash, hash -# @Remark: none -sub load_levels{ - my ($verbose, $expose_feature_levels) = @_ ; - - #set run directory - my $run_dir = cwd; - - #set original path to json files - my $original_path = `perldoc -lm NBIS::GFF3::Omniscient`; - my $index = index($original_path, "NBIS/GFF3/"); - $index+=10; #To not shrinck NBIS/Handler/ part of the path - my $json_path = substr $original_path, 0, $index; - $json_path = $json_path."Feature_levels/"; - - # Check if it is asked to copy the json files locally - if ($expose_feature_levels){ - my @files = glob("$json_path/*.json"); - - foreach my $file (@files) { - copy($file, $run_dir) or die "Copy failed: $!"; - } - print " All json feature level files copied in your working directory\n" if ($verbose); - exit; - } - # Load the json files - else{ - #check first if exist locally - if (-e $run_dir."/features_level1.json") { - print " Using local json level feature files\n" if($verbose > 0); - ($LEVEL1, $LEVEL2, $LEVEL3) = load_levels_from_json_local($run_dir."/", $verbose); - } - else{ #otherwise use the standard location ones - print " Using standard json level feature files\n" if($verbose > 0); - ($LEVEL1, $LEVEL2, $LEVEL3) = load_levels_from_json_local($json_path, $verbose); - } - } -} - -# @Purpose: load all parameter (about level of the features i.e. gene = level1, mRNA=level2, exon=level3 (and if they are spread or not like cds,utr) stored in json files -# @input: 2 => string (path), integer -# @output: 3 => hash, hash, hash -# @Remark: none -sub load_levels_from_json_local{ - - my ($json_path, $verbose) = @_ ; - my $level1 = undef; - my $level2 = undef; - my $level3 = undef; - - try{ - # --Deal with feature L1-- - my $correct_path_level = $json_path."features_level1.json"; - $level1 = load_json($correct_path_level); - # --Deal with feature L2-- - $correct_path_level = $json_path."features_level2.json"; - $level2 = load_json($correct_path_level); - # --Deal with feature L3-- - $correct_path_level = $json_path."features_level3.json"; - $level3 = load_json($correct_path_level); - # --Deal feature spread-- - $correct_path_level = $json_path."features_spread.json"; - $SPREADFEATURE = load_json($correct_path_level); - } - catch{ - print "error: Feature levels not found we cannot continue.\n"; - exit; - }; - return $level1,$level2,$level3; -} - -# @Purpose: load json data into variable -# @input: 3 => String path to the json file -# @output: 1 => hash reference with data -# @Remark: none -sub load_json{ - - my ($file_path) = @_; - - my $result = undef; - my $json_text = do { - open(my $json_fh, "<:encoding(UTF-8)", $file_path) - or die("Can't open \$file_path\": $!\n"); - local $/; - <$json_fh> - }; - - my $json = JSON->new; - try{ - $result = $json->decode($json_text); - } - catch{ - print "error while parsing $file_path. Please verify the sanity of your json file.\n"; - }; - - return $result; -} -1; diff --git a/annotation/NBIS/GFF3/Omniscient/OmniscientO.pm b/annotation/NBIS/GFF3/Omniscient/OmniscientO.pm deleted file mode 100644 index b4037dbbd..000000000 --- a/annotation/NBIS/GFF3/Omniscient/OmniscientO.pm +++ /dev/null @@ -1,768 +0,0 @@ -#!/usr/bin/perl -w - -package NBIS::GFF3::Omniscient::OmniscientO; - -use strict; -use warnings; -use Sort::Naturally; -use Bio::Tools::GFF; -use URI::Escape; -use NBIS::GFF3::Omniscient::OmniscientTools; -use Exporter; - -our @ISA = qw(Exporter); -our @EXPORT = qw(print_ref_list_feature print_omniscient print_omniscient_as_match print_omniscient_from_level1_id_list webapollo_compliant embl_compliant convert_omniscient_to_ensembl_style ); -sub import { - NBIS::GFF3::Omniscient::OmniscientO->export_to_level(1, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient::OmniscientI; (normal case) - NBIS::GFF3::Omniscient::OmniscientO->export_to_level(2, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient; -} - -=head1 SYNOPSIS - - -=head1 DESCRIPTION - - A library to convert handle any kind of gff file and save it in memory as GFF3 "full" format. - Full format means, we expand exon with several parent, we add ID everywhere (even if level3 ID is not mandatory), and Parent everywhere. - Inherits from - -=head1 CONTACT - jacques.dainat@nbis.se (Jacques Dainat) - -=cut - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || Print Methods || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# 1) Original print -################### -sub print_ref_list_feature { - - my ($list, $gffout) = @_ ; - - foreach my $feature (@$list) { - $gffout->write_feature($feature); - } -} - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -sub print_omniscient{ - - my ($hash_omniscient, $gffout) = @_ ; - - #uri_decode_omniscient($hash_omniscient); - -### OLD FASHION GOING TRHOUGH LEVEL1 - #foreach my $primary_tag_l1 ( sort {$a <=> $b or $a cmp $b} keys %{$hash_omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - # foreach my $id_tag_key_level1 ( sort { $hash_omniscient->{'level1'}{$primary_tag_l1}{$a}->start <=> $hash_omniscient->{'level1'}{$primary_tag_l1}{$b}->start } keys %{$hash_omniscient->{'level1'}{$primary_tag_l1}} ) { #sort by position - -### NEW FASHION GOING TRHOUGH LEVEL1 - Have to first create a hash of seq_id -> level1_feature , then we can go through in alphanumerical order. - # sort by seq id - my $hash_sortBySeq = gather_and_sort_l1_by_seq_id($hash_omniscient); - - # Read by seqId to sort properly the output by seq ID - # sort { (($a =~ /(\d+)$/)[0] || 0) <=> (($b =~ /(\d+)$/)[0] || 0) will provide sorting liek that: contig contig1 contig2 contig3 contig10 contig11 contig22 contig100 contig101 - foreach my $seqid (sort { (($a =~ /(\d+)$/)[0] || 0) <=> (($b =~ /(\d+)$/)[0] || 0) } keys %{$hash_sortBySeq}){ # loop over all the feature level1 - - foreach my $primary_tag_l1 (sort {$a cmp $b} keys %{$hash_sortBySeq->{$seqid}}){ - - foreach my $feature_l1 ( @{$hash_sortBySeq->{$seqid}{$primary_tag_l1}} ){ - my $id_tag_key_level1 = lc($feature_l1->_tag_value('ID')); - $gffout->write_feature($hash_omniscient->{'level1'}{$primary_tag_l1}{$id_tag_key_level1}); # print feature - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (sort {$a cmp $b} keys %{$hash_omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_l2, $id_tag_key_level1) ) ){ - foreach my $feature_level2 ( sort { ncmp ($a->start.$a->end.$a->_tag_value('ID'), $b->start.$b->end.$b->_tag_value('ID') ) } @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_key_level1}}) { - $gffout->write_feature($feature_level2); - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - ###### - # FIRST EXON - if ( exists_keys( $hash_omniscient, ('level3', 'exon', $level2_ID) ) ){ - foreach my $feature_level3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - ########### - # SECOND CDS - if ( exists_keys( $hash_omniscient, ('level3', 'cds', $level2_ID) ) ){ - foreach my $feature_level3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - - ############ - # THEN ALL THE REST - foreach my $primary_tag_l3 (sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if (($primary_tag_l3 ne 'cds') and ($primary_tag_l3 ne 'exon')) { - if ( exists_keys( $hash_omniscient, ('level3', $primary_tag_l3, $level2_ID) ) ){ - foreach my $feature_level3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - } - } - } - } - } - } - } - } -} - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -sub print_omniscient_as_match{ - - my ($hash_omniscient, $gffout) = @_ ; - - #uri_decode_omniscient($hash_omniscient); - - # sort by seq id - my $hash_sortBySeq = gather_and_sort_l1_by_seq_id($hash_omniscient); - - #Read by seqId to sort properly the output by seq ID - foreach my $seqid ( sort { (($a =~ /(\d+)$/)[0] || 0) <=> (($b =~ /(\d+)$/)[0] || 0) } keys %{$hash_sortBySeq}){ # loop over all the feature level1 - foreach my $primary_tag_l1 (sort {$a cmp $b} keys %{$hash_sortBySeq->{$seqid}}){ - - ################# - # == LEVEL 1 == # - ################# - foreach my $feature_l1 ( @{$hash_sortBySeq->{$seqid}{$primary_tag_l1}} ){ - my $id_tag_key_level1 = lc($feature_l1->_tag_value('ID')); - - if($primary_tag_l1 =~ "match"){ - $gffout->write_feature($hash_omniscient->{'level1'}{$primary_tag_l1}{$id_tag_key_level1}); # print feature - } - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (sort {$a cmp $b} keys %{$hash_omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_l2, $id_tag_key_level1) ) ){ - foreach my $feature_level2 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_key_level1}}) { - - if($primary_tag_l2 =~ "match"){ - $gffout->write_feature($feature_level2); - } - else{ - $feature_level2->primary_tag('match'); - if( $feature_level2->has_tag('Parent')){ - $feature_level2->remove_tag('Parent'); - } - - $gffout->write_feature($feature_level2); - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = $feature_level2->_tag_value('ID'); - - ###### - # EXON - if ( exists_keys( $hash_omniscient, ('level3', 'exon', lc($level2_ID)) ) ){ - my $current_start=0; - foreach my $feature_level3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{lc($level2_ID)}}) { - - $current_start++; - my $end=($feature_level3->end - $feature_level3->start)+$current_start; - $feature_level3->primary_tag('match_part'); - - if(! $feature_level3->has_tag('Target')){ - my @target=(); - create_or_replace_tag($feature_level3, "Target", "$level2_ID $current_start $end +"); # Target has value has to be a list correctly formated - } - $current_start=$end; - - $gffout->write_feature($feature_level3); - } - } - } - } - } - } - } - } - } -} - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -sub print_omniscient_from_level1_id_list { - - my ($hash_omniscient, $level_id_list, $gffout) = @_ ; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - - foreach my $id_tag_key_level1_raw (@$level_id_list){ - my $id_tag_key_level1 = lc($id_tag_key_level1_raw); - if(exists ($hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1})){ - - #_uri_encode_one_feature($hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}); - - $gffout->write_feature($hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}); # print feature - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - - #_uri_encode_one_feature($feature_level2); - - $gffout->write_feature($feature_level2); - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID ; - if($feature_level2->has_tag('ID')){ - $level2_ID = lc($feature_level2->_tag_value('ID')); - } - elsif($feature_level2->has_tag('transcript_id')){ - $level2_ID = lc( $feature_level2->_tag_value('transcript_id')); - } - else{ - warn "Cannot retrieve the parent feature of the following feature: ".gff_string($feature_level2); - } - - ########### - # Before tss - if ( exists_keys($hash_omniscient,('level3','tss',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'tss'}{$level2_ID}}) { - - #_uri_encode_one_feature($feature_level3); - - $gffout->write_feature($feature_level3); - } - } - - ###### - # FIRST EXON - if ( exists_keys($hash_omniscient,('level3','exon',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}) { - - #_uri_encode_one_feature($feature_level3); - - $gffout->write_feature($feature_level3); - } - } - ########### - # SECOND CDS - if ( exists_keys($hash_omniscient,('level3','cds',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}) { - - #_uri_encode_one_feature($feature_level3); - - $gffout->write_feature($feature_level3); - } - } - - ########### - # Last tts - if ( exists_keys($hash_omniscient,('level3','tts',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'tts'}{$level2_ID}}) { - - #_uri_encode_one_feature($feature_level3); - - $gffout->write_feature($feature_level3); - } - } - - ########### - # The rest - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if( ($primary_tag_key_level3 ne 'cds') and ($primary_tag_key_level3 ne 'exon') and ($primary_tag_key_level3 ne 'tss') and ($primary_tag_key_level3 ne 'tts')){ - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - - #_uri_encode_one_feature($feature_level3); - - $gffout->write_feature($feature_level3); - } - } - } - } - } - } - } - } - } - - } -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || webapollo compliant || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -############################### -# METHOD RELATED TO WEBAPOLLO # -############################### -use constant CWA_skip_feature => { "non_canonical_three_prime_splice_site" => 1 , "non_canonical_five_prime_splice_site" => 2}; -#Transform omniscient data to be Webapollo compliant -sub webapollo_compliant { - my ($hash_omniscient) = @_ ; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - if(exists (CWA_skip_feature->{$primary_tag_l1})){delete $hash_omniscient->{'level1'}{$primary_tag_l1}; next;} - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_l1}}){ - webapollo_rendering_l1($hash_omniscient->{'level1'}{$primary_tag_l1}{$id_l1}); - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if(exists (CWA_skip_feature->{$primary_tag_l2})){delete $hash_omniscient->{'level2'}{$primary_tag_l2}; next;} - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_l2}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_l1}}) { - webapollo_rendering_l2($feature_level2); - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - foreach my $primary_tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if(exists (CWA_skip_feature->{$primary_tag_l3})){delete $hash_omniscient->{'level3'}{$primary_tag_l3}; next;} - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - webapollo_rendering_l3($feature_level3); - } - } - } - } - } - } - } - } -} - -#follow webapollo description for a correct visualisation of data -sub webapollo_rendering_l1 { - my ($feature)=@_; - - my @tags = $feature->get_all_tags(); - - ## check Name attribute - my @f = grep /^\Qname\E$/i, @tags; #look at tag that match the whole string to NAME (case insensitive) - my $name_tag = $f[0]; - if(! $name_tag){ - my $value = $feature->_tag_value('ID'); - $feature->add_tag_value('Name', $value); - } -} - - my ($feature)=@_; - -#follow webapollo description for a correct visualisation of data -sub webapollo_rendering_l2 { - my ($feature)=@_; - - ## check primary tag - my $primary_tag = lc($feature->primary_tag); - - my %corrections = ( - mrna => 'mRNA', - ); - if ( exists $corrections{$primary_tag}) { - $feature->primary_tag( $corrections{$primary_tag}); - } - - my @tags = $feature->get_all_tags(); - - ## check product/description attribute - # if($feature->has_tag('product')){ - my @f = grep /\Qproduct\E/i, @tags; - my $product_tag = $f[0]; - if($product_tag){ - my @values = $feature->get_tag_values($product_tag); - $feature->add_tag_value('description', @values); - $feature->remove_tag($product_tag); - } -} - -#follow webapollo description for a correct visualisation of data -sub webapollo_rendering_l3 { - my ($feature)=@_; - - ## check primary tag - my $primary_tag = lc($feature->primary_tag); - - my %corrections = ( - cds => 'CDS', - exon => 'exon', - three_prime_utr => 'three_prime_UTR', - five_prime_utr => 'five_prime_UTR', - utr => 'UTR', - ); - if ( exists $corrections{$primary_tag}) { - $feature->primary_tag( $corrections{$primary_tag}); - } - - my @tags = $feature->get_all_tags(); - - foreach my $tag (@tags){ - if(lc($tag) ne 'id' and lc($tag) ne 'parent'){ - $feature->remove_tag($tag); - } - } -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || EMBL compliant || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -#Transform omniscient data to be embl compliant -sub embl_compliant { - my ($hash_omniscient) = @_ ; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - _embl_rendering($hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}); - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - _embl_rendering($feature_level2); - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - _embl_rendering($feature_level3); - } - } - } - } - } - } - } - } -} - -sub _embl_rendering { - - my ($feature)=@_; - - ## check primary tag - my $primary_tag = lc($feature->primary_tag); - - my @feature_list=["assembly_gap","C_region","CDS","centromere","D-loop","D_segment","exon","gap","gene","iDNA","intron","J_segment","LTR","mat_peptide","misc_binding","misc_difference","misc_feature","misc_recomb","misc_RNA","misc_structure","mobile_element","modified_base","mRNA","ncRNA","N_region","old_sequence","operon","oriT","polyA_site","precursor_RNA","prim_transcript","primer_bind","protein_bind","regulatory","repeat_region","rep_origin","rRNA","S_region","sig_peptide","source","stem_loop","STS","telomere","tmRNA","transit_peptide","tRNA","unsure","V_region","V_segment","variation","3'UTR","5'UTR"]; - - foreach my $element (@feature_list){ - if(lc($element) =~ /$primary_tag/){ - $feature->$primary_tag = $element; - } - else{ - #repeat exception rule - if( $primary_tag =~ /repeat/ ){ - $feature->$primary_tag = "repeat_region"; - } - #utr5 exception rule - elsif($primary_tag =~ /utr/ and ($primary_tag =~ /3/ or $primary_tag =~ /three/) ){ - $feature->$primary_tag = "3'UTR"; - } - #utr5 exception rule - elsif($primary_tag =~ /utr/ and ($primary_tag =~ /5/ or $primary_tag =~ /five/) ){ - $feature->$primary_tag = "5'UTR"; - } - print "WARNING: this primary tag ".$primary_tag." is not recognized among those expected to be EMBL compliant. Please check it or create an exception rule.\n"; - } - } -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || ENSEMBL compliant || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# @Purpose: Convert the omniscient to be constistent with the ensembl gff format -# @input: 1 => omniscient Hash reference -# @output 1 => omniscient Hash reference -sub convert_omniscient_to_ensembl_style{ - my ($omniscient) = @_; - - _convert_3th_column($omniscient, "mRNA", "transcript"); - _add_exon_id($omniscient, "ID"); - _add_transcript_id($omniscient, "ID"); - _add_gene_id($omniscient, "ID"); -} - -# @Purpose: Convert the 3th column (feature type) from old value to new value -# @input: 3 => omniscient Hash reference, String = featurey type original, String = new feature type -# @output none => The hash itself is modified -sub _convert_3th_column{ - my ($omniscient, $original, $new) = @_; - - foreach my $primary_tag_l1 (keys %{$omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient->{'level1'}{$primary_tag_l1}}){ - - if( lc($omniscient->{'level1'}{$primary_tag_l1}{$id_l1}->primary_tag) eq lc($original) ){ - $omniscient->{'level1'}{$primary_tag_l1}{$id_l1}->primary_tag($new); - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$primary_tag_l2}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$omniscient->{'level2'}{$primary_tag_l2}{$id_l1}}) { - - if(lc($feature_level2->primary_tag) eq lc($original) ){ - $feature_level2->primary_tag($new); - } - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - foreach my $primary_tag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if ( exists ($omniscient->{'level3'}{$primary_tag_l3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - if(lc($feature_level3->primary_tag) eq lc($original) ){ - $feature_level3->primary_tag($new); - } - } - } - } - } - } - } - } - } -} - -# @Purpose: add exon_id to exon feature if does not exist. -# @input: 3 => omniscient Hash reference, String = attribute to use to create exon_id -# @output none => The hash itself is modified -sub _add_exon_id{ - my ($omniscient, $original_attribute) = @_; - - foreach my $primary_tag_l1 (keys %{$omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient->{'level1'}{$primary_tag_l1}}){ - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$primary_tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$primary_tag_l2}{$id_l1}}) { - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_l2->_tag_value('ID')); - foreach my $primary_tag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if ( exists ($omniscient->{'level3'}{$primary_tag_l3}{$level2_ID} ) ){ - foreach my $feature_l3 ( @{$omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - - if( $feature_l3->has_tag($original_attribute) and ! $feature_l3->has_tag('exon_id') ){ - create_or_replace_tag($feature_l3, "exon_id", $feature_l3->_tag_value($original_attribute) ); - } - } - } - } - } - } - } - } - } -} - -# @Purpose: add transcript_id to all feature l2 if does not exist. -# @input: 3 => omniscient Hash reference, String = attribute to use to create transcript_id -# @output none => The hash itself is modified -sub _add_transcript_id{ - my ($omniscient, $original_attribute) = @_; - - foreach my $primary_tag_l1 (keys %{$omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient->{'level1'}{$primary_tag_l1}}){ - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$primary_tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$primary_tag_l2}{$id_l1}}) { - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - if( $feature_l2->has_tag($original_attribute) and ! $feature_l2->has_tag('transcript_id') ){ - create_or_replace_tag($feature_l2, "transcript_id", $feature_l2->_tag_value($original_attribute) ); - } - } - } - } - } - } -} - -# @Purpose: add gene_id to all feature l1 if does not exist. -# @input: 3 => omniscient Hash reference, String = attribute to use to create gene_id -# @output none => The hash itself is modified -sub _add_gene_id{ - my ($omniscient, $original_attribute) = @_; - - foreach my $primary_tag_l1 (keys %{$omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient->{'level1'}{$primary_tag_l1}}){ - my $feature_l1 = $omniscient->{'level1'}{$primary_tag_l1}{$id_l1}; - - if( $feature_l1->has_tag($original_attribute) and ! $feature_l1->has_tag('gene_id') ){ - create_or_replace_tag($feature_l1, "gene_id", $feature_l1->_tag_value($original_attribute) ); - } - } - } -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || Deal with URI || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# check all the attribute to URI encode the values -sub uri_encode_omniscient { - - my ($hash_omniscient) = @_ ; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - _uri_encode_one_feature($hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}); - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - _uri_encode_one_feature($feature_level2); - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - _uri_encode_one_feature($feature_level3); - } - } - } - } - } - } - } - } -} - -sub uri_decode_omniscient { - - my ($hash_omniscient) = @_ ; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - _uri_decode_one_feature($hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}); - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - _uri_decode_one_feature($feature_level2); - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - _uri_decode_one_feature($feature_level3); - } - } - } - } - } - } - } - } -} - -# reencode in uri all value of all attributes of a feature -sub _uri_encode_one_feature { - - my ($feature)=@_; - - my @list_tag = $feature->get_all_tags; - - foreach my $tag (@list_tag){ - my @values = $feature->get_tag_values($tag); - $feature->remove_tag($tag); - foreach my $val (@values){ - my $val_checked = uri_unescape($val); - my $new_val = uri_escape($val_checked ); - $feature->add_tag_value($tag, $new_val); - } - } -} - -sub _uri_decode_one_feature { - - my ($feature)=@_; - - my @list_tag = $feature->get_all_tags; - - foreach my $tag (@list_tag){ - my @values = $feature->get_tag_values($tag); - $feature->remove_tag($tag); - foreach my $val (@values){ - my $new_val = uri_unescape($val); - $feature->add_tag_value($tag, $new_val); - } - } -} - -1; diff --git a/annotation/NBIS/GFF3/Omniscient/OmniscientTools.pm b/annotation/NBIS/GFF3/Omniscient/OmniscientTools.pm deleted file mode 100644 index acb50686d..000000000 --- a/annotation/NBIS/GFF3/Omniscient/OmniscientTools.pm +++ /dev/null @@ -1,2452 +0,0 @@ -#!/usr/bin/perl -w - -package NBIS::GFF3::Omniscient::OmniscientTools; - -use strict; -use warnings; -use Bio::Tools::GFF; -use Bio::Seq; -use Sort::Naturally; -use Exporter; - - -our @ISA = qw(Exporter); -our @EXPORT = qw(is_single_exon_gene get_most_right_left_cds_positions l2_has_cds l1_has_l3_type check_record_positions remove_l2_related_feature l2_identical group_l1IDs_from_omniscient complement_omniscients rename_ID_existing_in_omniscient keep_only_uniq_from_list2 check_gene_overlap_at_CDSthenEXON location_overlap_update location_overlap nb_feature_level1 check_gene_positions gather_and_sort_l1_location_by_seq_id gather_and_sort_l1_location_by_seq_id_and_strand gather_and_sort_l1_by_seq_id gather_and_sort_l1_by_seq_id_and_strand extract_cds_sequence group_l1features_from_omniscient create_omniscient_from_idlevel2list get_feature_l2_from_id_l2_l1 remove_omniscient_elements_from_level2_feature_list remove_omniscient_elements_from_level2_ID_list featuresList_identik group_features_from_omniscient featuresList_overlap check_level1_positions check_level2_positions info_omniscient fil_cds_frame exists_keys remove_element_from_omniscient append_omniscient merge_omniscients remove_omniscient_elements_from_level1_id_list fill_omniscient_from_other_omniscient_level1_id subsample_omniscient_from_level1_id_list check_if_feature_overlap remove_tuple_from_omniscient create_or_replace_tag remove_element_from_omniscient_attributeValueBased get_longest_cds_level2); -sub import { - NBIS::GFF3::Omniscient::OmniscientTools->export_to_level(1, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient::OmniscientI; (normal case) - NBIS::GFF3::Omniscient::OmniscientTools->export_to_level(2, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient; -} - - -# for my $sub_name ( @methods ){ -# push @EXPORT, $sub_name; -# } -#} - -=head1 SYNOPSIS - - - -=head1 DESCRIPTION - - A library to convert - Inherits from - -=head1 CONTACT - jacques.dainat@nbis.se (Jacques Dainat) - -=cut - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || HANDLE OMNISCIENT => Fill / Modify || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - - - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -# If a feature/record already exists in omniscient_to_append, it will be replaced by the new one (If the new one content less features, the surnumerary ones are actually erased/removed) -sub fill_omniscient_from_other_omniscient_level1_id { - - my ($level_id_list, $hash_omniscient, $omniscient_to_append)=@_; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (@$level_id_list){ #select Id of the list - if( exists_keys($hash_omniscient, ('level1', $primary_tag_key_level1, $id_tag_key_level1)) ){ - $omniscient_to_append->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1} = $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; # print feature - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if( exists_keys($hash_omniscient, ('level2', $primary_tag_key_level2, $id_tag_key_level1)) ){ - @{$omniscient_to_append->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}} = @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}; - - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - # remove feature from omniscient_to_append related to level2_ID. Like that those that are not anymore in hash_omniscient will not appear anymore at the end of the next step (#Now add the new L3 feature of level2_ID) - foreach my $tag_l3 (keys %{$omniscient_to_append->{'level3'}}){ - if( exists_keys($omniscient_to_append, ('level3', $tag_l3, $level2_ID)) ){ - delete $omniscient_to_append->{'level3'}{$tag_l3}{$level2_ID} ; - } - } - #Now add the new L3 feature of level2_ID - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if( exists_keys($hash_omniscient, ('level3', $primary_tag_key_level3, $level2_ID)) ){ - @{$omniscient_to_append->{'level3'}{$primary_tag_key_level3}{$level2_ID}} = @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}; - } - } - } - } - } - } - } - } -} - -# @Purpose: Append hash1 with gene and subfeature of hash2 if: -# -> Have CDS and do not overlap at CDS level (if overlap at UTR it's fine) and CDS size is over threshold. -# -> Do not have CDS and do not overlap at exon level -# @input: 4 => omniscient1 hash, omniscient2 hash, int, int -# @output 1 => omniscient1 -sub complement_omniscients { - my ($omniscient1, $omniscient2, $size_min, $verbose)=@_; - - my %add_omniscient; - - $size_min=0 if (! $size_min); - - if(! $verbose){$verbose=0;} - my $omniscient1_sorted = gather_and_sort_l1_location_by_seq_id_and_strand($omniscient1); - my $omniscient2_sorted = gather_and_sort_l1_location_by_seq_id_and_strand($omniscient2); - - foreach my $locusID ( keys %{$omniscient2_sorted}){ # tag_l1 = gene or repeat etc... - - foreach my $tag_l1 ( keys %{$omniscient2_sorted->{$locusID}} ) { - - # Go through location from left to right ### !! - foreach my $location ( @{$omniscient2_sorted->{$locusID}{$tag_l1}} ) { - my $id1_l1 = lc($location->[0]); - print "\nlets look at $id1_l1.\n" if ($verbose >= 3); - my $take_it=1; - - if( exists_keys($omniscient1_sorted, ($locusID,$tag_l1) ) ) { - - foreach my $location2 ( @{$omniscient1_sorted->{$locusID}{$tag_l1}} ) { - my $id2_l1 = lc($location2->[0]); - - #If location_to_check start if over the end of the reference location, we stop - if($location2->[1] > $location->[2]) {last;} - print "location_to_check start if over the end of the reference location.\n" if ($verbose >= 3); - - #If location_to_check end if inferior to the start of the reference location, we continue next - if($location2->[2] < $location->[1]) {next;} - print "location_to_check start if inferior to the start of the reference location.\n" if ($verbose >= 3); - - # Let's check at Gene LEVEL - if( location_overlap($location, $location2) ){ #location overlap at gene level check now level3 - #let's check at CDS level (/!\ id1_l1 is corresponding to id from $omniscient2) - if(check_gene_overlap_at_CDSthenEXON($omniscient2, $omniscient1, $id1_l1, $id2_l1)){ #If contains CDS it has to overlap at CDS level, otherwise any type of feature level3 overlaping is sufficient to decide that they overlap - print "$id2_l1 overlaps $id1_l1, we skip it.\n" if ($verbose >= 3); - $take_it=undef; last; - } - print "$id2_l1 overlaps $id1_l1 overlap but not at CDS level.\n" if ($verbose >= 3); - } - else{ - print "$id2_l1 DO NOT OVERLAP $id1_l1.\n" if ($verbose >= 3); - } - } - } - - # We keep it because is not overlaping - if($take_it){ - print "We take it : $id1_l1\n" if ($verbose >= 3); - - #look at size - my $still_take_it=undef; - foreach my $tag_l2 (keys %{$omniscient2->{'level2'}} ){ - if(exists_keys($omniscient2,('level2', $tag_l2, $id1_l1))){ - foreach my $feature_l2 ( @{$omniscient2->{'level2'}{$tag_l2}{$id1_l1}} ){ - my $id_l2 = $feature_l2->_tag_value('ID'); - - if(exists_keys($omniscient2,('level3', 'cds', lc($id_l2)))){ - my $cds_size=0; - foreach my $feature_l3 ( @{$omniscient2->{'level3'}{'cds'}{lc($id_l2)}} ){ - my $size=$feature_l3->end - $feature_l3->start +1; - $cds_size += $size; - } - if($cds_size >= $size_min){ - $still_take_it=1; - last; - } - } - else{ - $still_take_it=1; - } - - last if $still_take_it; - } - last if $still_take_it; - } - } - # We keep it because has size over threshold - if ($still_take_it){ - #save level1 - $add_omniscient{'level1'}{$tag_l1}{$id1_l1} = $omniscient2->{'level1'}{$tag_l1}{$id1_l1}; - #save level2 - foreach my $tag_l2 (keys %{$omniscient2->{'level2'}} ){ - if(exists_keys($omniscient2,('level2', $tag_l2, $id1_l1))){ - # Add the level2 list data - $add_omniscient{'level2'}{$tag_l2}{$id1_l1} = $omniscient2->{'level2'}{$tag_l2}{$id1_l1}; - # for each level2 get the level3 subfeatures - foreach my $feature_l2 ( @{$omniscient2->{'level2'}{$tag_l2}{$id1_l1}} ){ - my $id_l2 = $feature_l2->_tag_value('ID'); - #save level3 - foreach my $tag_l3 (keys %{$omniscient2->{'level3'}} ){ - if(exists_keys($omniscient2,('level3', $tag_l3, lc($id_l2)))){ - $add_omniscient{'level3'}{$tag_l3}{lc($id_l2)} = $omniscient2->{'level3'}{$tag_l3}{lc($id_l2)}; - } - } - } - } - } - } - } - } - } - } - - #Now populate hash1 with data from hash2 - merge_omniscients($omniscient1, \%add_omniscient); - - undef %add_omniscient; - - return $omniscient1; -} - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -# rename ID in hash_omniscient2 that already exist in hash_omniscient1 -sub rename_ID_existing_in_omniscient { - - my ($hash_omniscient1, $hash_omniscient2, $verbose)=@_; - - if(! $verbose){$verbose=1;} - - my $hash_whole_IDs = get_all_IDs($hash_omniscient1); - my $hash2_whole_IDs = get_all_IDs($hash_omniscient2); - - my %hash_miscCount; - my $miscCount = \%hash_miscCount; - my $resume_case=undef; - - ################# - # == LEVEL 1 == # - ################# - foreach my $tag_l1 (keys %{$hash_omniscient2->{'level1'}}){ # tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$hash_omniscient2->{'level1'}{$tag_l1}}){ - my $new_parent=undef; - my $uID = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}->_tag_value('ID'); - - if ( exists ( $hash_whole_IDs->{$id_l1} ) ){ - $resume_case++; - my $feature = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}; - $uID = replace_by_uniq_ID( $feature, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); - $hash_omniscient2->{'level1'}{$tag_l1}{lc($uID)} = delete $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}; # save feature - $new_parent=1; - } - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient2->{'level2'}}){ # tag_l2 = mrna or mirna or ncrna or trna etc... - - if (exists_keys ($hash_omniscient2, ('level2', $tag_l2, $id_l1) ) ){ #Non present in hash2, we create a list with one element - - foreach my $feature_l2 ( @{$hash_omniscient2->{'level2'}{$tag_l2}{$id_l1}}) { - - my $new_parent_l2=undef; - - if($new_parent){ - create_or_replace_tag($feature_l2, 'Parent', $uID); - } - - my $uID_l2 = $feature_l2->_tag_value('ID'); - my $id_l2 = lc($uID_l2); - - if ( exists ( $hash_whole_IDs->{$id_l2} ) ){ - - $resume_case++; - $uID_l2 = replace_by_uniq_ID($feature_l2, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); - $new_parent_l2=1; - } - - ################# - # == LEVEL 3 == # - ################# - foreach my $tag_l3 (keys %{$hash_omniscient2->{'level3'}}){ - - if (exists_keys ($hash_omniscient2, ('level3', $tag_l3, $id_l2) ) ){ - - foreach my $feature_l3 ( @{$hash_omniscient2->{'level3'}{$tag_l3}{$id_l2}}) { - - if($new_parent_l2){ - create_or_replace_tag($feature_l3, 'Parent', $uID_l2); - } - - my $uID_l3 = $feature_l3->_tag_value('ID'); - my $id_l3 = lc($uID_l3); - - if ( exists ( $hash_whole_IDs->{$id_l2} ) ){ - $resume_case++; - $uID_l3 = replace_by_uniq_ID($feature_l3, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); - - } - } - #save list feature level3 - if($new_parent_l2){ - $hash_omniscient2->{'level3'}{$tag_l3}{lc($uID_l2)} = delete $hash_omniscient2->{'level3'}{$tag_l3}{lc($id_l2)} ; - } - } - } - } - #save list feature level2 - if($new_parent){ - $hash_omniscient2->{'level2'}{$tag_l2}{lc($uID)} = delete $hash_omniscient2->{'level2'}{$tag_l2}{lc($id_l1)}; - } - } - } - } - } - print "we renamed $resume_case cases\n" if($verbose and $resume_case); - - return $hash_omniscient2; -} - -# put data from hash_omniscient2 in hash_omniscient1 -# Features are added even if they are identical. If they have similar name, new name will be given too. -sub merge_omniscients { - # $hash_omniscient1 = omniscient to append !!! - my ($hash_omniscient1, $hash_omniscient2, $hash_whole_IDs)=@_; - - if (! $hash_whole_IDs){ - $hash_whole_IDs = get_all_IDs($hash_omniscient1); - } - my $hash2_whole_IDs = get_all_IDs($hash_omniscient2); - - my %hash_miscCount; - my $miscCount = \%hash_miscCount; - - ################# - # == LEVEL 1 == # - ################# - foreach my $tag_l1 (keys %{$hash_omniscient2->{'level1'}}){ # tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$hash_omniscient2->{'level1'}{$tag_l1}}){ - - my $new_parent=undef; - my $uID = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}->_tag_value('ID'); - - if ( ! exists ( $hash_whole_IDs->{$id_l1} ) ){ - $hash_omniscient1->{'level1'}{$tag_l1}{$id_l1} = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}; # save feature level1 - $hash_whole_IDs->{$id_l1}++; - } - else{ - #print "INFO level1: Parent $id_l1 already exist. We generate a new one to avoid collision !\n"; - my $feature = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}; - $uID = replace_by_uniq_ID( $feature, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); - $hash_omniscient1->{'level1'}{$tag_l1}{lc($uID)} = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}; # save feature level1 - $new_parent=1; - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient2->{'level2'}}){ # tag_l2 = mrna or mirna or ncrna or trna etc... - - if (exists_keys ($hash_omniscient2, ('level2', $tag_l2, $id_l1) ) ){ #Non present in hash2, we create a list with one element - - foreach my $feature_l2 ( @{$hash_omniscient2->{'level2'}{$tag_l2}{$id_l1}}) { - - my $new_parent_l2=undef; - if($new_parent){ - create_or_replace_tag($feature_l2, 'Parent', $hash_omniscient1->{'level1'}{$tag_l1}{lc($uID)}->_tag_value('ID')); - } - - my $uID_l2 = $feature_l2->_tag_value('ID'); - my $id_l2 = lc($uID_l2); - - if ( exists ( $hash_whole_IDs->{$id_l2} ) ){ - - #print "INFO level2: Parent $id_l2 already exist. We generate a new one to avoid collision !\n"; - $uID_l2 = replace_by_uniq_ID($feature_l2, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); - $new_parent_l2=1; - } - else{$hash_whole_IDs->{$id_l2}++;} - - ################# - # == LEVEL 3 == # - ################# - foreach my $tag_l3 (keys %{$hash_omniscient2->{'level3'}}){ - - if (exists_keys ($hash_omniscient2, ('level3', $tag_l3, $id_l2) ) ){ - - foreach my $feature_l3 ( @{$hash_omniscient2->{'level3'}{$tag_l3}{$id_l2}}) { - - if($new_parent_l2){ - create_or_replace_tag($feature_l3, 'Parent', $uID_l2); - } - - my $uID_l3 = $feature_l3->_tag_value('ID'); - my $id_l3 = lc($uID_l3); - - if ( exists ( $hash_whole_IDs->{$id_l3} ) ){ - # print "INFO level3: Parent $id_l3 already exist. We generate a new one to avoid collision !\n"; - $uID_l3 = replace_by_uniq_ID($feature_l3, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); - } - else{$hash_whole_IDs->{$id_l3}++;} - } - #save list feature level3 - @{$hash_omniscient1->{'level3'}{$tag_l3}{lc($uID_l2)} } = @{ $hash_omniscient2->{'level3'}{$tag_l3}{$id_l2} }; - } - } - } - #save list feature level2 - @{$hash_omniscient1->{'level2'}{$tag_l2}{lc($uID)}} = @{ $hash_omniscient2->{'level2'}{$tag_l2}{$id_l1} }; - } - } - } - } - return $hash_omniscient1, $hash_whole_IDs; -} - -# The hash of reference will be hash_omniscient2. We will keep name from this one. -# When an overlap is found, the ID/parent are fixed -# Check id are not used twice -# sub merge_omniscients_properly { -# # $hash_omniscient1 = omniscient to append !!! -# my ($hash_omniscient1, $hash_omniscient2)=@_; - -# my $hash_whole_IDs1 = get_all_IDs($hash_omniscient1); -# my $hash_sortBySeq1 = gather_and_sort_l1_location_by_seq_id_and_strand($hash_omniscient1); - -# my $hash_whole_IDs2 = get_all_IDs($hash_omniscient2); -# my $hash_sortBySeq2 = gather_and_sort_l1_location_by_seq_id_and_strand($hash_omniscient2); - -# my %hash_miscCount; -# my $miscCount = \%hash_miscCount; - -# ################# -# # == LEVEL 1 == # -# ################# -# foreach my $position1 ( keys %{$hash_sortBySeq1} ){ -# foreach my $tag1 (keys %{$hash_sortBySeq1->{$position1}}){ -# if (! exists_keys($hash_sortBySeq2, ($position1, $tag1))){ -# foreach my $location1 @{$hash_sortBySeq1->{$position1}{tag1}}{ -# my $l1_id1 = $location1->[0]; -# my $l1_id_to_use = check_record_ids($l1_id1, $hash_omniscient1, $hash_whole_IDs1, $hash_whole_IDs2, $miscCount); -# my @level_id_list = ($l1_id_to_use); -# fill_omniscient_from_other_omniscient_level1_id(\@level_id_list, $hash_omniscient1, $hash_omniscient2); -# } -# } -# else{ #check if overlap -# # Go through location from left to right ### !! -# for (my $i = 0; $i < scalar @{$sortBySeq->{$locusID}{$tag_l1}}; $i++) { -# my $location = shift @{$sortBySeq->{$locusID}{$tag_l1}};# This location will be updated on the fly -# my $id_l1 = $location->[0]; - - -# foreach my $location1 @{$hash_sortBySeq1->{$position1}{tag1}}{ -# if(check_gene_overlap_at_CDSthenEXON($hash_omniscient1, $hash_omniscient2 , lc($l1_feature1->_tag_value('ID')), lc($id2_l1) )){ #OVERLAP - -# } -# else{ # feature do not overlap -# my $l1_id1 = $l1_feature1->_tag_value('ID'); -# my $l1_id_to_use = check_record_ids($l1_id1, $hash_omniscient1, $hash_whole_IDs1, $hash_whole_IDs2, $miscCount); -# my @level_id_list = ($l1_id_to_use); -# fill_omniscient_from_other_omniscient_level1_id(\@level_id_list, $hash_omniscient1, $hash_omniscient2); -# } - -# } - -# } - - -# return $hash_omniscient2; -# } - -sub append_omniscient { - - my ($omniscient, $level1,$level2,$level3)=@_; - - foreach my $feature (@$level1){ - my $primaryTag = lc($feature->primary_tag); - my $id = lc($feature->_tag_value('ID')); - - if( ! exists_keys($omniscient, ('level1', $primaryTag, $id)) ){ - $omniscient->{"level1"}{$primaryTag}{$id}=$feature; - } - } - foreach my $feature (@$level2){ # if exist, try to append the list - my $primaryTag = lc($feature->primary_tag); - my $parent_id = lc($feature->_tag_value('Parent')); - - if( ! exists_keys($omniscient, ('level2', $primaryTag, $parent_id)) ){ - push(@{$omniscient->{"level2"}{$primaryTag}{$parent_id}}, $feature);### - } - else{ # append element in the list if not existing - my $exist_in_list="no"; - my $id = lc($feature->_tag_value('ID')); - - foreach my $feature_original (@{$omniscient->{"level2"}{$primaryTag}{$parent_id}}){ - my $original_id = lc($feature_original->_tag_value('ID')); - if ($original_id eq $id){ - $exist_in_list="yes"; last; - } - } - if($exist_in_list eq "no"){ # feature doesnt exist in the feature list already present. So, we append it. - push(@{$omniscient->{"level2"}{$primaryTag}{$parent_id}}, $feature) - } - } - } - foreach my $feature (@$level3){ - my $primaryTag = lc($feature->primary_tag); - my $parent_id = lc($feature->_tag_value('Parent')); - - if( ! exists_keys($omniscient, ('level3', $primaryTag, $parent_id)) ){ - push(@{$omniscient->{"level3"}{$primaryTag}{$parent_id}}, $feature); - } - else{ # append element in the list if not existing - my $exist_in_list="no"; - my $id = lc($feature->_tag_value('ID')); - - foreach my $feature_original (@{$omniscient->{"level3"}{$primaryTag}{$parent_id}}){ - - my $original_id = lc($feature_original->get_tag_values('ID')); - - if ($original_id eq $id){ - $exist_in_list="yes"; last; - } - } - if($exist_in_list eq "no"){ # feature doesnt exist in the feature list already present. So, we append it. - push(@{$omniscient->{"level3"}{$primaryTag}{$parent_id}}, $feature) - } - } - } -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || HANDLE OMNISCIENT => Remove || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -# Input: list of level1 id -# omniscient -# -sub remove_omniscient_elements_from_level1_id_list { - - my ($hash_omniscient, $level_id_list) = @_ ; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - foreach my $level_id (@$level_id_list){ - if($id_tag_key_level1 eq lc($level_id)){ - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - delete $hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} # delete level3 - } - } - } - delete $hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} # delete level2 - } - } - delete $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; # delete level1 - } - } - } - } -} - -# /!\XXX Has to be improved, we should loop over the feature list and extract the id_tag_key_level1 before to loop over the hash -# Input: list of level2 id -# omniscient -# -sub remove_omniscient_elements_from_level2_feature_list { - - my ($hash_omniscient, $feature_list) = @_ ; - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level2'}{$primary_tag_key_level2}}){ - if( exists_keys($hash_omniscient, ('level2', $primary_tag_key_level2, $id_tag_key_level1)) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - my $level2_ID= lc($feature_level2->_tag_value('ID')); - - foreach my $feature (@$feature_list){ - my $feature_ID = lc($feature->_tag_value('ID')); - my $feature_Parent_ID = lc($feature->_tag_value('Parent')); - - if($level2_ID eq $feature_ID){ - - ################# - # == LEVEL 3 == # - ################# - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if( exists_keys($hash_omniscient, ('level3', $primary_tag_key_level3, $level2_ID)) ){ - delete $hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} # delete level3 - } - } - my @id_concern_list=($feature_Parent_ID); - my @id_list_to_remove=($feature_ID); - my @list_tag_key=('all'); - remove_element_from_omniscient(\@id_concern_list, \@id_list_to_remove, $hash_omniscient, 'level2','false', \@list_tag_key); - - if( ! exists_keys($hash_omniscient, ('level2', $primary_tag_key_level2, $id_tag_key_level1)) ){ - #New new list was empty so l2 has been removed, we can now remove l1 - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ - if( exists_keys($hash_omniscient, ('level1', $primary_tag_key_level1, $feature_Parent_ID)) ){ - delete $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$feature_Parent_ID} - } - } - } - } - } - } - } - } - } -} - -# After cleaning if nothing left attache to level 2 we removed it, and the same for level1 -sub remove_omniscient_elements_from_level2_ID_list { - - my ($hash_omniscient, $ID_l2_list) = @_ ; - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level2'}{$primary_tag_key_level2}}){ - if( exists_keys($hash_omniscient, ('level2', $primary_tag_key_level2, $id_tag_key_level1)) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - my $level2_ID= lc($feature_level2->_tag_value('ID')); - my $feature_Parent_ID = lc($feature_level2->_tag_value('Parent')); - - foreach my $feature_ID (@$ID_l2_list){ - $feature_ID = lc($feature_ID); - - if($level2_ID eq $feature_ID){ - - ################# - # == LEVEL 3 == # - ################# - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if( exists_keys($hash_omniscient, ('level3', $primary_tag_key_level3, $level2_ID)) ){ - delete $hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} # delete level3 - } - } - - my @id_concern_list=($feature_Parent_ID); - my @id_list_to_remove=($feature_ID); - my @list_tag_key=('all'); - remove_element_from_omniscient(\@id_concern_list, \@id_list_to_remove, $hash_omniscient, 'level2','false', \@list_tag_key); - - if( ! exists_keys($hash_omniscient, ('level2', $primary_tag_key_level2, $id_tag_key_level1)) ){ - #New list was empty so l2 has been removed, we can now remove l1 - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ - if( exists_keys($hash_omniscient, ('level1', $primary_tag_key_level1, $feature_Parent_ID)) ){ - delete $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$feature_Parent_ID} - } - } - } - } - } - } - } - } - } -} - -# remove value of hash from omniscient in level $level which have the tag incriminated -sub remove_tuple_from_omniscient { - - my ($id_list_to_remove, $hash_omniscient, $level, $bolean, $list_tag_key)=@_; - - # bolean true => we remove if in list_tag_key - # bolean false => we remove if ti is not in list_tag_key - my $remove; - foreach my $tag_key (keys %{$hash_omniscient->{$level}}){ - if($bolean eq 'true'){ - $remove="no"; - }else{$remove="yes";} - foreach my $tag_key_to_match (@$list_tag_key){ - if ((lc($tag_key) eq lc($tag_key_to_match)) and ($bolean eq 'true')){ - $remove="yes"; - } - if ((lc($tag_key) eq lc($tag_key_to_match)) and ($bolean eq 'false')){ - $remove="no";last; - } - } - if ($remove eq 'yes'){ - foreach my $id_key (keys %{$hash_omniscient->{$level}{$tag_key}}){ - foreach my $id_to_remove (@$id_list_to_remove){ - if(lc($id_to_remove) eq lc($id_key)){ - delete $hash_omniscient->{lc($level)}{lc($tag_key)}{lc($id_to_remove)}; #REMOVE THAT KEY-VALUE pair - } - } - } - } - } -} - -# from omniscient: remove feature from "feature list" of level2 or level3 with id present in $id_list_to_remove -# $id_concern = ID of parent we will check -sub remove_element_from_omniscient { - - my ($id_concern_list, $id_list_to_remove, $hash_omniscient, $level, $bolean, $list_tag_key)=@_; - - # bolean true => we remove if in list_tag_key - # bolean false => we remove if is not in list_tag_key - my $remove; - #Check level and tag - foreach my $tag_key (keys %{$hash_omniscient->{$level}}){ - if($bolean eq 'true'){ - $remove="no"; - }else{$remove="yes";} - foreach my $tag_key_to_match (@$list_tag_key){ - - if ((lc($tag_key) eq lc($tag_key_to_match)) and ($bolean eq 'true')){ - $remove="yes"; - } - if ((lc($tag_key) eq lc($tag_key_to_match)) and ($bolean eq 'false')){ - $remove="no";last; - } - } - #Check feature id from list - if ($remove eq 'yes'){ - foreach my $id_concern (@$id_concern_list){ - my $mustModifyList=undef; - my @listok; - - if(exists_keys($hash_omniscient, ($level,$tag_key,lc($id_concern)))){ - foreach my $feature (@{$hash_omniscient->{$level}{$tag_key}{lc($id_concern)}}){ - my $id = lc($feature->_tag_value('ID')); - my $shouldremoveit=undef; - - foreach my $id_to_remove (@$id_list_to_remove){ - - if(lc ($id_to_remove) eq $id){ # These feature is in list to remove - $mustModifyList="yes"; $shouldremoveit="yes"; last; - } - } - if(! $shouldremoveit){ - push(@listok, $feature); - } # Feature not present in id_to_remove, we keep it in list. - } - if($mustModifyList){ # at least one feature has been removed from list. Save the new list - if(@listok){ - @{$hash_omniscient->{$level}{$tag_key}{$id_concern}}=@listok; - } - else{ # The list is empty we could remove the key (otherwise we would have saved a emplty list) - delete $hash_omniscient->{$level}{$tag_key}{$id_concern}; - } - } - } - } - } - } -} - -# $id_concern = ID of parent we will check -# Go trhough all the element (L1 or L2 list) and check if we find one with the specied tag attribute and value attribute. In that case we remove it from the list -sub remove_element_from_omniscient_attributeValueBased { - - my ($id_concern_list, $attributeValue, $attributeTag, $hash_omniscient, $level, $bolean, $list_tag_key)=@_; - - # bolean true => we remove if in list_tag_key - # bolean false => we remove if is not in list_tag_key - my $remove; - #Check level and tag - foreach my $tag_key (keys %{$hash_omniscient->{$level}}){ - if($bolean eq 'true'){ - $remove="no"; - }else{$remove="yes";} - foreach my $tag_key_to_match (@$list_tag_key){ - - if ((lc($tag_key) eq lc($tag_key_to_match)) and ($bolean eq 'true')){ - $remove="yes"; - } - if ((lc($tag_key) eq lc($tag_key_to_match)) and ($bolean eq 'false')){ - $remove="no";last; - } - } - #Check feature id from list - if ($remove eq 'yes'){ - foreach my $id_concern (@{$id_concern_list}){ - my $mustModifyList=undef; - my @listok; - - if(exists_keys($hash_omniscient, ($level,$tag_key,lc($id_concern)))){ - foreach my $feature (@{$hash_omniscient->{$level}{$tag_key}{lc($id_concern)}}){ - my $id = lc($feature->_tag_value('ID')); - my $shouldremoveit=undef; - - if($feature->has_tag($attributeTag)){ - if( lc($feature->_tag_value($attributeTag)) eq lc($attributeValue) ){ - $mustModifyList="yes"; $shouldremoveit="yes"; - } - else{push(@listok, $feature);} - } - else{push(@listok, $feature);} - } - if($mustModifyList){ # at least one feature has been removed from list. Save the new list - @{$hash_omniscient->{$level}{$tag_key}{$id_concern}}=@listok; - } - } - } - } - } -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || HANDLE OMNISCIENT => CREATE || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - - -# @Purpose: Create an omniscient from list of feature L1,L2 and L3 -# @input: 3 => list L1, List L2, List L3 -# @output 1 => omniscient hash reference -sub create_omniscient { - - my ($level1,$level2,$level3)=@_; - - my $omniscient; - - foreach my $feature (@$level1){ - my $id = lc($feature->_tag_value('ID')); - $omniscient->{"level1"}{lc($feature->primary_tag)}{$id}=$feature; - } - foreach my $feature (@$level2){ - my $id = lc($feature->_tag_value('Parent')); - push(@{$omniscient->{"level2"}{lc($feature->primary_tag)}{$id}}, $feature);### - } - foreach my $feature (@$level3){ - my @parentList = lc( $feature->get_tag_values('Parent')); - foreach my $id (@parentList){ - push(@{$omniscient->{"level3"}{lc($feature->primary_tag)}{$id}}, $feature); - } - } - return $omniscient; -} - -# This method will create a new omniscient from an omniscient of reference and a list of id level2 -#$list_id_l2 has to be lower case -sub create_omniscient_from_idlevel2list{ - - my ($omniscientref, $hash_mRNAGeneLink, $list_id_l2)=@_; - - my %omniscient_new; - - foreach my $id_l2 (@$list_id_l2){ - my $id_l1 = lc($hash_mRNAGeneLink->{$id_l2}); - - # ADD LEVEL1 - foreach my $tag_l1 (keys %{$omniscientref->{'level1'}}){ - if( exists_keys($omniscientref,('level1',$tag_l1,$id_l1) ) ){ - $omniscient_new{'level1'}{$tag_l1}{$id_l1}=$omniscientref->{'level1'}{$tag_l1}{$id_l1}; - last; - } - } - # ADD LEVEL2 - foreach my $tag_l2 (keys %{$omniscientref->{'level2'}}){ - if( exists_keys($omniscientref,('level2',$tag_l2,$id_l1) ) ){ - foreach my $feature_l2 ( @{$omniscientref->{'level2'}{$tag_l2}{$id_l1}}){ - if(lc($feature_l2->_tag_value('ID')) eq $id_l2 ){ - push (@{$omniscient_new{'level2'}{$tag_l2}{$id_l1}}, $feature_l2); - last; - } - } - } - } - # ADD LEVEL3 - foreach my $tag_l3 (keys %{$omniscientref->{'level3'}}){ - if( exists_keys($omniscientref,('level3',$tag_l3,$id_l2) ) ){ - foreach my $feature_l3 ( @{$omniscientref->{'level3'}{$tag_l3}{$id_l2}}){ - push (@{$omniscient_new{'level3'}{$tag_l3}{$id_l2}}, $feature_l3); - } - } - } - } - return \%omniscient_new; -} - -# @Purpose: filter an omniscient to return a new omnicient containing only data related by the list of level1 IDs -# @input: 1 => omniscient hash reference -# @output 1 => omniscient hash reference -sub subsample_omniscient_from_level1_id_list { - - my ($hash_omniscient, $level_id_list) = @_ ; - - my %new_hash; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - - foreach my $id_tag_key_level1_raw (@$level_id_list){ - my $id_tag_key_level1 = lc($id_tag_key_level1_raw); - if(exists ($hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1})){ - - $new_hash{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1} = delete $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - ################# - # == LEVEL 3 == # - ################# - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - $new_hash{'level3'}{$primary_tag_key_level3}{$level2_ID} = delete $hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}; - } - } - } - $new_hash{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} = delete $hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}; - } - } - } - } - } - return \%new_hash; -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || Miscenaleous || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - - -# INPUT: feature object, String tag, String or Array ref; -# Output: None -sub create_or_replace_tag{ - - my ($feature, $tag, $value)=@_; - - if ($feature->has_tag($tag) ) { - $feature->remove_tag($tag); - if(ref($value) eq "ARRAY"){ - $feature->add_tag_value($tag,@{$value}); - } - else{ - $feature->add_tag_value($tag,$value); - } - } - else{ - if(ref($value) eq "ARRAY"){ - $feature->add_tag_value($tag,@{$value}); - } - else{ - $feature->add_tag_value($tag,$value); - } - } -} - -# frame explanation -#0 indicates that the feature begins with a whole codon at the 5' most base. 1 means that there is one extra base (the third base of a codon) before the first whole codon -#and 2 means that there are two extra bases (the second and third bases of the codon) before the first codon. -sub fil_cds_frame { - - my ($hash_omniscient, $db, $verbose)=@_; - - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level2'}{$primary_tag_key_level2}}) { - - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - - my @temp = $feature_level2->get_tag_values('ID'); - my $level2_ID = lc(shift @temp); - - # == LEVEL 3 == # - if ( exists_keys($hash_omniscient,('level3','cds',$level2_ID) ) ){ - my $strand=$feature_level2->strand; - my @cds_list; - if(($feature_level2->strand eq "+") or ($feature_level2->strand eq "1")){ - @cds_list=sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}; - }else{ - @cds_list=sort {$b->start <=> $a->start} @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}; - } - - my $phase = _get_cds_start_phase( $db, $hash_omniscient->{'level3'}{'cds'}{$level2_ID} ); - if ($phase ) { #If no phase found we keep original, otherwise we loop over CDS features to set the correct phase - foreach my $cds_feature ( @cds_list) { - my $original_phase = $cds_feature->frame; - if ($original_phase != $phase ){ - print "Original phase $original_phase replaced by $phase for ".$cds_feature->_tag_value("ID")."\n" if $verbose; - $cds_feature->frame($phase); - } - my $cds_length=$cds_feature->end-$cds_feature->start +1; - $phase=(3-(($cds_length-$phase)%3))%3; #second modulo allows to avoid the frame with 3. Instead we have 0. - } - } - } - } - } - } -} - -# @Purpose: get phase of the start of a CDS -# @input: 1 => $db of the fasta genome, list of CDS features, codon table -# @output 1 => integer (0,1,2) or undef -sub _get_cds_start_phase { - my ($db, $cds_list, $codonTableId) = @_; - - if(! $codonTableId){$codonTableId = 0;} - - my $cds_dna_seq = undef; - my @cds_list_sorted=sort {$a->start <=> $b->start} @{$cds_list}; - foreach my $cds_feature ( @cds_list_sorted) { - $cds_dna_seq .= $db->seq( $cds_feature->seq_id, $cds_feature->start, $cds_feature->end ); - } - my $cds_obj = Bio::Seq->new(-seq => $cds_dna_seq, -alphabet => 'dna' ); - #Reverse the object depending on strand - if ($cds_list->[0]->strand == -1 or $cds_list->[0]->strand eq "-"){ - $cds_obj = $cds_obj->revcom(); - } - my $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); - # case we have start codon => phase 0 - if ($codonTable->is_start_codon(substr($cds_obj->seq, 0, 3)) ) { - return 0; - }# No start codon we have to check the phase - else{ - #try wihtout offset - my $protein_seq_obj = $cds_obj->translate(); - my $lastChar = substr $protein_seq_obj->seq(),-1,1; - my $count = () = $protein_seq_obj->seq() =~ /\*/g; - if ($lastChar eq "*"){ # if last char is a stop we remove it - - if ($count == 1){ - #print "Missing start codon, phase 0, stop present\n"; - return 0; - } - } - else{ - if ($count == 0){ - #print "Missing start codon, phase 0, missing stop codon\n"; - return 0; - } - } - - #try wiht offset (+2 nucleotide) - $protein_seq_obj = $cds_obj->translate(-offset => 3); #remove 2 nucleotide at the beginning - $lastChar = substr $protein_seq_obj->seq(),-1,1; - $count = () = $protein_seq_obj->seq() =~ /\*/g; - if ($lastChar eq "*"){ # if last char is a stop we remove it - if ($count == 1){ - #print "Missing start codon, phase +2, stop present\n"; - return 2; - } - } - else{ - if ($count == 0){ - #print "Missing start codon, phase +2, missing stop codon\n"; - return 2; - } - } - - #try wiht offset (+1 nucleotide) - $protein_seq_obj = $cds_obj->translate(-offset => 2); #remove 2 nucleotide at the beginning - $lastChar = substr $protein_seq_obj->seq(),-1,1; - $count = () = $protein_seq_obj->seq() =~ /\*/g; - if ($lastChar eq "*"){ # if last char is a stop we remove it - if ($count == 1){ - #print "Missing start codon, phase +1, stop present\n"; - return 1; - } - } - else{ - if ($count == 0){ - #print "Missing start codon, phase +1, missing stop codon\n"; - return 1; - } - } - - # always stop codon in the middle of the sequence... cannot determine correct phase, keep original phase and trow a warning ! - warn "WARNING OmniscientTools _get_cds_start_phase: No phase found for the CDS. ". - "All frames contain an internal stop codon, thus we cannot determine the correct phase. We will keep original stored phase information.\n"; - return undef; - } -} - -sub info_omniscient { - - my ($hash_omniscient)=@_; - - my %resu; - - foreach my $tag (keys %{$hash_omniscient->{'level1'}}){ - my $nb=keys %{$hash_omniscient->{'level1'}{$tag}}; - $resu{$tag}=$nb; - } - - foreach my $level (keys %{$hash_omniscient}){ - if ($level ne 'level1'){ - foreach my $tag (keys %{$hash_omniscient->{$level}}){ - foreach my $id (keys %{$hash_omniscient->{$level}{$tag}}){ - my $nb=$#{$hash_omniscient->{$level}{$tag}{$id}}+1; - if(exists_keys(\%resu,($tag))){ - $resu{$tag}=$resu{$tag}+$nb; - } - else{ - $resu{$tag}=$nb; - } - } - } - } - } - foreach my $tag (keys %resu){ - print "There is $resu{$tag} $tag\n"; - } -} - -#check if reference exists in hash. Deep infinite : hash{a} or hash{a}{b} or hash{a}{b}{c}, etc. -# usage example: exists_keys($hash_omniscient,('level3','cds',$level2_ID) -sub exists_keys { - my ($hash, @keys) = @_; - - for my $key (@keys) { - if (ref $hash ne 'HASH' or ! exists $hash->{$key}) { - return ''; - } - $hash = $hash->{$key}; - } - return 1; -} - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -# This method group all features of a seq_id together. -sub group_features_from_omniscient { - - my ($hash_omniscient) = @_ ; - - my %group; - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - my $key; - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - my $feature_l1=$hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; - my $seq_id=$feature_l1->seq_id; - $key="$primary_tag_key_level1$id_tag_key_level1"; - push(@{$group{$seq_id}{$key}}, $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}); - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - push(@{$group{$seq_id}{$key}}, $feature_level2); - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - ############ - # THEN ALL THE REST - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - push(@{$group{$seq_id}{$key}}, $feature_level3); - } - } - } - } - } - } - } - } - return \%group; -} - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -# This method group all level1 features of a seq_id together. -# hash{seq_id} = @(feature1, feature2 ...) -sub group_l1features_from_omniscient { - - my ($hash_omniscient) = @_ ; - - my %group; - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - my $feature_l1=$hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; - my $seq_id=$feature_l1->seq_id; - push(@{$group{$seq_id}}, $feature_l1); - - } - } - return \%group; -} - -# omniscient is a hash containing a whole gXf file in memory sorted in a specific way (3 levels) -# This method group all level1 features of a seq_id together. -# hash{seq_id} = @(id1, id2 ...) -sub group_l1IDs_from_omniscient { - - my ($hash_omniscient) = @_ ; - - my %group; - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - my $feature_l1=$hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; - my $seq_id=$feature_l1->seq_id; - push(@{$group{$seq_id}}, lc($feature_l1->_tag_value('ID'))); - - } - } - return \%group; -} - -sub get_feature_l2_from_id_l2_l1 { - my ($hash_omniscient, $id_l2, $id_l1) = @_ ; - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - if(exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1})){ - foreach my $feature (@{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}){ - if ( lc($feature->_tag_value('ID')) eq lc($id_l2) ) { - return $feature - } - } - } - else{print "element level2 $tag_l2 $id_l1 doesnt exists in omniscient\n";} - } -} - -#extract sequences form list of cds features in a fasta db -# return a Bio::Seq object -sub extract_cds_sequence { - my ($feature_list, $db)=@_; - - my $sequence=""; - foreach my $feature (sort {$a->start <=> $b->start} @$feature_list){ - $sequence .= $db->subseq($feature->seq_id,$feature->start,$feature->end); - } - my $seq = Bio::Seq->new( '-format' => 'fasta' , -seq => $sequence); - if($feature_list->[0]->strand eq "-1" or $feature_list->[0]->strand eq "-"){ - $seq=$seq->revcom; - } - return $seq ; -} - -# @Purpose: from a omniscient and a gene_id, will get back the extrem value for start and end -# @input: 2 => hash(omniscient), string(gene identifier) -# @output: 2 => integer(extrem start position), integer(extrem end position) -sub get_longest_cds_start_end { - my ($hash_omniscient,$gene_id)=@_; - my $resu_start=100000000000; - my $resu_end=0; - - #check full CDS for each mRNA - foreach my $mrna_feature (@{$hash_omniscient->{'level2'}{'mrna'}{lc($gene_id)}}){ - my $mrna_id = lc($mrna_feature->_tag_value('ID')); - my $extrem_start=100000000000; - my $extrem_end=0; - - #check all cds pieces - foreach my $cds_feature (@{$hash_omniscient->{'level3'}{'cds'}{$mrna_id}}){ - if ($cds_feature->start < $extrem_start){ - $extrem_start=$cds_feature->start; - } - if($cds_feature->end > $extrem_end){ - $extrem_end=$cds_feature->end ; - } - } - - if($extrem_start < $resu_start){ - $resu_start=$extrem_start; - } - if($extrem_end > $resu_end){ - $resu_end=$extrem_end; - } - } - return $resu_start,$resu_end; -} - -# @Purpose: Filter the mRNA to keep only the one containing the longest CDS per gene -# @input: 1 => hash(omniscient hash) -# @output: list of id level2 -sub get_longest_cds_level2{ - my ($hash_omniscient)= @_; - - my @list_id_l2; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_l1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_l1}}){ - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_l1} ) ){ - - #check if there is isoforms - ########################### - - #take only le longest - if ($#{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_l1}} > 0){ - my $longestL2 =""; - my $longestCDSsize = 0; - my $longestEXONsize = 0; - - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_l1}}) { - - my $level2_ID = lc($feature_level2->_tag_value('ID') ) ; - if ( exists_keys( $hash_omniscient, ('level3','cds',$level2_ID ) ) ) { - my $cdsSize=0; - foreach my $cds ( @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}} ) { # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - $cdsSize += ( $cds->end - $cds->start + 1 ); - } - - if($cdsSize > $longestCDSsize ){ - $longestL2 = $level2_ID; - $longestCDSsize = $cdsSize; - } - } - elsif ( exists_keys( $hash_omniscient, ('level3','exon',$level2_ID ) ) ) { - my $exonSize=0; - foreach my $exon ( @{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}} ) { # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - $exonSize += ( $exon->end - $exon->start + 1 ); - } - - if($exonSize > $longestEXONsize ){ - $longestL2 = $level2_ID; - $longestEXONsize = $exonSize; - } - } - else{ - warn "WARNING get_longest_cds_level2: NO exon or cds to select the longest l2 for $id_tag_l1 l1 ! We will take one randomly ! @\n"; - $longestL2 = $level2_ID; - } - } - push @list_id_l2,$longestL2; # push id of the longest - } - else{ #take it only of cds exits - my $level2_ID = lc(@{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_l1}}[0]->_tag_value('ID')) ; - if (exists_keys( $hash_omniscient, ('level3','cds', $level2_ID ) ) ){ - push @list_id_l2, $level2_ID; # push the only one existing - } - } - } - } - } - } - - return \@list_id_l2; -} - -# @Purpose: Counter the number of feature level in an omniscient -# @input: 1 => hash(omniscient hash) -# @output: integer -sub nb_feature_level1 { - - my ($omniscient)=@_; - my $resu=0; - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - $resu += (keys %{$omniscient->{'level1'}{$tag_level1}}) - } - return $resu; -} - -# @Purpose: get all the ID present in an omniscient -# @input: 1 => hash(omniscient hash) -# @output: hash of the whole IDs -sub get_all_IDs{ - my ($omniscient)=@_; - - my %whole_IDs; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_l1 (keys %{$omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient->{'level1'}{$primary_tag_l1}}){ - $whole_IDs{$id_l1}++; - } - } - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - foreach my $id_l1 ( keys %{$omniscient->{'level2'}{$primary_tag_l2}}) { - foreach my $feature_level2 ( @{$omniscient->{'level2'}{$primary_tag_l2}{$id_l1}}) { - my $level2_ID = lc($feature_level2->_tag_value('ID')); - $whole_IDs{$level2_ID}++; - } - } - } - ################# - # == LEVEL 3 == # - ################# - foreach my $primary_tag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - foreach my $level2_ID ( keys %{$omniscient->{'level3'}{$primary_tag_l3}}) { - foreach my $feature_level3 ( @{$omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - my $level3_ID = lc($feature_level3->_tag_value('ID')); - $whole_IDs{$level3_ID}++; - } - } - } - return \%whole_IDs; -} - -# @Purpose: Replace ID by Uniq ID and modify all parent attribute of child feature to stay in line with the modification -# @input: 4 => feature objetc, hash of ids, hash of ids, hash of feature counted to give more rapidly a name -# @output: uniq ID -sub replace_by_uniq_ID{ - my ($feature, $hash_whole_IDs, $hash2_whole_IDs, $miscCount) = @_; - - my $id = $feature->_tag_value('ID'); - my $prefix = "IDmodified"; - my $key; - - if($prefix){ - $key=$prefix."-".lc($feature->primary_tag); - } - else{ - $key=lc($feature->primary_tag); - } - - my $uID=$id; - while( exists_keys($hash_whole_IDs, (lc($uID)) ) or exists_keys($hash2_whole_IDs, (lc($uID)) ) ){ #loop until we found an uniq tag - $miscCount->{$key}++; - $uID = $key."-".$miscCount->{$key}; - } - - #push the new ID - $hash_whole_IDs->{lc($uID)}=$id; - - # modify the feature ID with the correct one chosen - create_or_replace_tag($feature,'ID', $uID); #modify ID to replace by parent value - - #Now repercute this modification to the subfeatures - return $uID; -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || MANIPULATION AT OMNISCIENT LEVEL1/2/3 || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# @Purpose: Check 2 lists of feature L2 and remove the identical ones from the second list. -# @input: 4 => omniscient Hash reference, list1 reference of L2 features, list2 reference of L2 features, verbose option for debug -# @output: list2 minus all the feature identical to one of the list1 feature -sub keep_only_uniq_from_list2{ - my ($omniscient, $list1_l2, $list2_l2, $verbose)= @_; - - my @new_list2; - my $keep = 1; - - foreach my $feature2 ( @{$list2_l2} ){ - foreach my $feature1 ( @{$list1_l2} ){ - if(l2_identical($omniscient, $feature1, $feature2, $verbose )){ - $keep = undef; last; - } - } - if($keep){ - push(@new_list2, $feature2); - } - else{ # We dont keep the l2 feature so we have to remove all related features - remove_l2_related_feature($omniscient, $feature2, $verbose); - } - } - return \@new_list2; -} - -# check if l2 are identical. So look recursively at the level under. -# return 1 if identical -sub l2_identical{ - my ($omniscient, $feature1_l2, $feature2_l2, $verbose)= @_; - my $result=1; - - my $id1_l2 = lc($feature1_l2->_tag_value('ID') ); - my $id2_l2 = lc($feature2_l2->_tag_value('ID') ); - - foreach my $l3_type (keys %{$omniscient->{'level3'}} ){ - if(exists_keys($omniscient,('level3', $l3_type, $id1_l2))){ - if(exists_keys($omniscient,('level3', $l3_type, $id2_l2))){ - - if(scalar @{$omniscient->{'level3'}{$l3_type}{$id1_l2}} == scalar @{$omniscient->{'level3'}{$l3_type}{$id2_l2}}){ - - foreach my $feature1_level3 ( sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$l3_type}{$id1_l2}}) { - - my $identik = undef; - foreach my $feature2_level3 ( sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$l3_type}{$id2_l2}}) { - - if( ($feature1_level3->start == $feature2_level3->start) and ($feature1_level3->end == $feature2_level3->end) ){ - $identik=1; - } - } - if(! $identik){ - return undef; - } - } - } - else{return undef;} # Not same number of features. Cannot be identical - } - else{return undef;} - } - else{ - if(exists_keys($omniscient,('level3', $l3_type, $id2_l2))){ # $id1_l2 do not have l3 but $id2_l2 has ! - return undef; - } - } - } - print "The isoforms $id1_l2 and $id2_l2 are identical\n" if ($verbose >= 2 and $result); - return $result; -} - -# -# -#Remove everything related to l2. But not itself ... why ? -sub remove_l2_related_feature{ - my ($omniscient, $feature2, $verbose) = @_; - - my $l1_id = lc($feature2->_tag_value('Parent')); - my $l2_id = lc($feature2->_tag_value('ID')); - - #remove level 1 feature - foreach my $tag (keys %{$omniscient->{'level1'}}){ - if(exists_keys($omniscient, ('level1', $tag, $l1_id))){ - delete $omniscient->{'level1'}{$tag}{$l1_id}; - last; - } - } - foreach my $tag (keys %{$omniscient->{'level3'}}){ - if(exists_keys($omniscient, ('level3', $tag, $l2_id))){ - delete $omniscient->{'level3'}{$tag}{$l2_id}; - } - } -} - - - - -# @Purpose: Copy past an attribute and change its tag -# @input: 3 => omniscient Hash reference, String = attribute tag original, String = attribute tag new -# @output none => The hash itself is modified -sub create_attribute_from_existing_attribute{ - my ($omniscient, $original_attribute, $new_attribute) = @_; - - foreach my $primary_tag_l1 (keys %{$omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient->{'level1'}{$primary_tag_l1}}){ - my $feature_l1 = $omniscient->{'level1'}{$primary_tag_l1}{$id_l1}; - - if( $feature_l1->has_tag($original_attribute) and ! $feature_l1->has_tag($new_attribute) ) { - create_or_replace_tag($feature_l1,$new_attribute, $feature_l1->get_tag_values($original_attribute)); - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$primary_tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$primary_tag_l2}{$id_l1}}) { - - if( $feature_l2->has_tag($original_attribute) and ! $feature_l2->has_tag($new_attribute) ){ - create_or_replace_tag($feature_l2,$new_attribute, $feature_l2->get_tag_values($original_attribute)); - } - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - foreach my $primary_tag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if ( exists ($omniscient->{'level3'}{$primary_tag_l3}{$level2_ID} ) ){ - foreach my $feature_l3 ( @{$omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - if( $feature_l3->has_tag($original_attribute) and ! $feature_l3->has_tag($new_attribute) ){ - create_or_replace_tag($feature_l3, $new_attribute, $feature_l3->get_tag_values($original_attribute)); - } - } - } - } - } - } - } - } - } -} - -# @Purpose: Create a locus tag for all feature L1 and share it with all children features -# @input: 3 => omniscient Hash reference, String = attribute tag original to use as locus tag, String = locus_tag attribute tag -# @output none => The hash itself is modified -sub create_locus_tag{ - my ($omniscient, $original_attribute, $locus_tag) = @_; - - foreach my $primary_tag_l1 (keys %{$omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient->{'level1'}{$primary_tag_l1}}){ - my $feature_l1 = $omniscient->{'level1'}{$primary_tag_l1}{$id_l1}; - - my $locus_tag_value=undef; - if( $feature_l1->has_tag($original_attribute) and ! $feature_l1->has_tag($locus_tag) ) { - $locus_tag_value = $feature_l1->_tag_value($original_attribute); - create_or_replace_tag($feature_l1, $locus_tag, $locus_tag_value); - } - else{ - $locus_tag_value = $feature_l1->_tag_value($locus_tag); - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$primary_tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$primary_tag_l2}{$id_l1}}) { - - if( $feature_l2->has_tag($original_attribute) and ! $feature_l2->has_tag($locus_tag) ){ - create_or_replace_tag($feature_l2,$locus_tag, $locus_tag_value); - } - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - foreach my $primary_tag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if ( exists ($omniscient->{'level3'}{$primary_tag_l3}{$level2_ID} ) ){ - foreach my $feature_l3 ( @{$omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - if( $feature_l3->has_tag($original_attribute) and ! $feature_l3->has_tag($locus_tag) ){ - create_or_replace_tag($feature_l3, $locus_tag, $locus_tag_value); - } - } - } - } - } - } - } - } - } -} - - - - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || FEATURES LOCATION || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# looking the end and the start, the method check if two location overlap. -#A location os [Id, position1, position2] -# return t1 is location overlap -sub location_overlap{ - my($location1, $location2)=@_; - my $overlap = undef; - - if (($location1->[1] <= $location2->[2]) and ($location1->[2] >= $location2->[1])){ - $overlap = 1; - } - - return $overlap; -} - -# looking the end and the start, the method check if two location overlap. -#A location os [Id, position1, position2] -# return the intersect of locations -sub location_overlap_update{ - my($location1, $location2)=@_; - my $location = $location1; - my $overlap = undef; - - if (($location1->[1] <= $location2->[2]) and ($location1->[2] >= $location2->[1])){ - $overlap = 1; - if($location2->[1] < $location1->[1]){ - $location->[1] = $location2->[1] - } - if($location2->[2] > $location1->[2]){ - $location->[2] = $location2->[2] - } - } - - return $location, $overlap; -} - -# Check if two genes have at least one L2 isoform which overlap at cds level. -# if no CDS we check if overlap at any other l3 feature. -sub check_gene_overlap_at_CDSthenEXON{ - my ($hash_omniscient, $hash_omniscient2, $gene_id, $gene_id2)=@_; - my $resu=undef; - - foreach my $l2_type (keys %{$hash_omniscient->{'level2'}} ){ - - #check full CDS for each mRNA - if(exists_keys($hash_omniscient,('level2', $l2_type, lc($gene_id)))){ - foreach my $mrna_feature (@{$hash_omniscient->{'level2'}{$l2_type}{lc($gene_id)}}){ - my $mrna_id1 = $mrna_feature->_tag_value('ID'); - - if(exists_keys($hash_omniscient2,('level2', $l2_type, lc($gene_id2)))){ - - foreach my $mrna_feature2 (@{$hash_omniscient2->{'level2'}{$l2_type}{lc($gene_id2)}}){ # from here bothe feature level2 are the same type - my $mrna_id2 = $mrna_feature2->_tag_value('ID'); - - #check all cds pieces - if(exists_keys($hash_omniscient,('level3', 'cds', lc($mrna_id1)))){ - if(exists_keys($hash_omniscient2,('level3', 'cds', lc($mrna_id2)))){ - foreach my $cds_feature1 (@{$hash_omniscient->{'level3'}{'cds'}{lc($mrna_id1)}}){ - foreach my $cds_feature2 (@{$hash_omniscient2->{'level3'}{'cds'}{lc($mrna_id2)}}){ - - if(($cds_feature2->start <= $cds_feature1->end) and ($cds_feature2->end >= $cds_feature1->start )){ # they overlap - $resu="yes";last; - } - } - if($resu){last;} - } - - if($resu){last;} - } - } - elsif(! exists_keys($hash_omniscient2,('level3', 'cds', lc($mrna_id2)))){ # No CDS at all, check at exon / match level and if same level2 type - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - - if(exists_keys($hash_omniscient,('level3', $tag_l3, lc($mrna_id1)))){ - foreach my $feature1 (@{$hash_omniscient->{'level3'}{$tag_l3}{lc($mrna_id1)}}){ - - if(exists_keys($hash_omniscient2,('level3', $tag_l3, lc($mrna_id2)))){ - foreach my $feature2 (@{$hash_omniscient2->{'level3'}{$tag_l3}{lc($mrna_id2)}}){ - - if(($feature2->start <= $feature1->end) and ($feature2->end >= $feature1->start )){ # they overlap - $resu="yes";last; - } - } - if($resu){last;} - } - } - if($resu){last;} - } - } - - if($resu){last;} - } - } - - if($resu){last;} - } - } - - if($resu){last;} - } - } - return $resu; -} - -# @Purpose: Check the start and end of gene feature based on its mRNAs and eventualy fix it. -# @input: 2 => hash(omniscient hash), string(gene identifier) -# @output: none -sub check_gene_positions { - - my ($hash_omniscient, $gene_id)=@_; - - ##### - #Modify gene start-end (have to check size of each mRNA) - my $geneExtremStart=1000000000000; - my $geneExtremEnd=0; - foreach my $primary_tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if (exists_keys($hash_omniscient, ('level2', $primary_tag_l2, lc($gene_id) ) ) ){ # check if they have mRNA avoiding autovivifcation - foreach my $mrna_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_l2}{lc($gene_id)}}) { - my $start=$mrna_feature->start(); - my $end=$mrna_feature->end(); - - if ($start < $geneExtremStart){ - $geneExtremStart=$start; - } - if($end > $geneExtremEnd){ - $geneExtremEnd=$end; - } - } - } - } - my $gene_feature=$hash_omniscient->{'level1'}{'gene'}{lc($gene_id)}; - if ($gene_feature->start != $geneExtremStart){ - $gene_feature->start($geneExtremStart); - } - if($gene_feature->end != $geneExtremEnd){ - $gene_feature->end($geneExtremEnd); - } -} - -# Check the start and end of level1 feature based on all features level2; -#return 1 if something modified -sub check_level1_positions { - my ($hash_omniscient, $feature_l1, $verbose) = @_; - my $result=undef; - if(! $verbose){$verbose=0;} - - my $extrem_start=1000000000000; - my $extrem_end=0; - my $check_existence_feature_l2=undef; - my $id_l1 = lc($feature_l1->_tag_value('ID')); - - foreach my $tag_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys ($hash_omniscient, ('level2', $tag_level2, $id_l1) ) ){ - $check_existence_feature_l2=1; - - my $extrem_start_A=1000000000000; - my $extrem_end_A=0; - foreach my $feature ( @{$hash_omniscient->{'level2'}{$tag_level2}{$id_l1}}) { - my $start=$feature->start(); - my $end=$feature->end(); - if ($start < $extrem_start_A){ - $extrem_start_A=$start; - } - if($end > $extrem_end_A){ - $extrem_end_A=$end; - } - } - - if ($extrem_start_A < $extrem_start){ - $extrem_start=$extrem_start_A; - } - if($extrem_end_A > $extrem_end){ - $extrem_end=$extrem_end_A; - } - } - } - if(! $check_existence_feature_l2){ - warn "WARNING check_level1_positions: NO level2 feature to check positions of the level1 feature ! @\n"; - } - else{ - # modify START if needed - if($feature_l1->start != $extrem_start){ - $feature_l1->start($extrem_start); - $result=1; - print "We modified the L1 LEFT extremity for the sanity the biological data!\n" if($verbose >= 3); - } - - # modify END if needed - if($feature_l1->end != $extrem_end){ - $feature_l1->end($extrem_end); - $result=1; - print "We modified the L1 RIGHT extremity for the sanity the biological data!\n" if($verbose >= 3); - } - } - return $result; -} - -# Check the start and end of level2 feature based on all features level3; -sub check_level2_positions { - my ($hash_omniscient, $level2_feature)=@_; - - my @values = $level2_feature->get_tag_values('ID'); - my $level2_feature_name = lc(shift @values) ; - - my $extrem_start=1000000000000; - my $extrem_end=0; - foreach my $tag_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - my $extrem_start_A=1000000000000; - my $extrem_end_A=0; - if( exists_keys ($hash_omniscient, ('level3', $tag_level3, $level2_feature_name) ) ){ - foreach my $feature ( @{$hash_omniscient->{'level3'}{$tag_level3}{$level2_feature_name}}) { - my $start=$feature->start(); - my $end=$feature->end(); - if ($start < $extrem_start_A){ - $extrem_start_A=$start; - } - if($end > $extrem_end_A){ - $extrem_end_A=$end; - } - } - } - if ($extrem_start_A < $extrem_start){ - $extrem_start=$extrem_start_A; - } - if($extrem_end_A > $extrem_end){ - $extrem_end=$extrem_end_A; - } - } - - # modify START if needed - if($level2_feature->start != $extrem_start){ - $level2_feature->start($extrem_start); - } - - # modify END if needed - if($level2_feature->end != $extrem_end){ - $level2_feature->end($extrem_end); - } -} - -#calcul the overlaping percentage betwwen 2 CDS list or 2 exon list etc... -# /!\ Be careful if you test the output, a overlaping gene can have a percentage overlap to 0. And if you test "if(featuresList_overlap)" and you have a 0, it will fail. So you have to check defined(featuresList_overlap) -sub featuresList_overlap { - - my ($listCDS1ref, $listCDS2ref)=@_; - my $resu; - - ### - # sort the list - my @listCDS1 = sort {$a->start <=> $b->start} @{$listCDS1ref}; - my @listCDS2 = sort {$a->start <=> $b->start} @{$listCDS2ref}; - # foreach my $t (@listCDS1){ - # print "list1: ".$t->start." ".$t->end."\n"; - # } - # foreach my $t (@listCDS2){ - # print "list2: ".$t->start." ".$t->end."\n"; - # } - my $size_overlap=0; - my $cds1_size=0; - foreach my $cds1 (@listCDS1){ - - $cds1_size=$cds1_size+($cds1->end - $cds1->start)+1; - my $starto; - my $endo; - - foreach my $cds2 (@listCDS2){ - if($cds2->start > $cds1->end){ #we are after of the investigated cds. - last; - } - elsif($cds2->end < $cds1->start){ # we are before investigated cds. - next; - } - else{ #we are overlaping - #check start - if($cds1->start >= $cds2->start){ - $starto=$cds1->start; - } - else{$starto=$cds2->start;} - #check end - if($cds1->end >= $cds2->end){ - $endo=$cds2->end; - } - else{$endo=$cds1->end;} - - #calcul overlap; - $size_overlap=$size_overlap+($endo - $starto + 1); - } - } - } - - #Now calcul percentage overlap - if($size_overlap != 0){ - $resu=($size_overlap*100)/$cds1_size; - $resu = sprintf('%.0f', $resu); - return $resu; - } - else{return undef;} -} - - -sub featuresList_identik { - my ($list1, $list2)=@_; - - my @slist1 = sort {$a->start <=> $b->start} @{$list1}; - my @slist2 = sort {$a->start <=> $b->start} @{$list2}; - my $identik="true"; - - if($#slist1 == $#slist2){ - my $cpt=0; - - while ($cpt <= $#slist1){ - - my $feature1=$slist1[$cpt]; - my $feature2=$slist2[$cpt]; - #print $feature1->start." != ".$feature2->start." or ".$feature1->end." != ".$feature2->end." or ".$feature1->strand." ne ".$feature2->strand." or ".$feature1->seq_id." ne ".$feature2->seq_id."\n"; - if( ($feature1->start != $feature2->start) or ($feature1->end != $feature2->end) or ($feature1->strand ne $feature2->strand) or ($feature1->seq_id ne $feature2->seq_id)){ - $identik=undef;last; - } - $cpt++; - } - } - else{$identik=undef;} - return $identik; -} - -# @Purpose: Check the start and end of l1 l2 features from l3 features. -# @input: 2 => hash(omniscient hash), string(gene identifier) -# @output: none -sub check_record_positions { - my ($hash_omniscient, $gene_id_raw)=@_; - - my $gene_id = lc($gene_id_raw); - my $ExtremStart=1000000000000; - my $ExtremEnd=0; - - foreach my $primary_tag_l2 (keys %{$hash_omniscient->{'level2'}} ){ - if (exists_keys($hash_omniscient, ('level2', $primary_tag_l2, $gene_id ) ) ){ - foreach my $mrna_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$gene_id}} ) { - my $l2_id = lc($mrna_feature->_tag_value('ID')); - my $l2_ExtremStart=1000000000000; - my $l2_ExtremEnd=0; - foreach my $tag_l3 ( keys %{$hash_omniscient->{'level3'}} ) { - if ( exists_keys ( $hash_omniscient, ('level3', $tag_l3, $l2_id ) ) ){ - foreach my $feature_l3 ( @{$hash_omniscient->{'level3'}{$tag_l3}{$l2_id}} ) { - - if ($feature_l3->start() < $l2_ExtremStart){ - $l2_ExtremStart = $feature_l3->start(); - } - if($feature_l3->end() > $l2_ExtremEnd){ - $l2_ExtremEnd = $feature_l3->end(); - } - } - } - } - if ($mrna_feature->start != $l2_ExtremStart and $l2_ExtremStart != 1000000000000){ - $mrna_feature->start($l2_ExtremStart); - } - if($mrna_feature->end != $l2_ExtremEnd and $l2_ExtremEnd != 0){ - $mrna_feature->end($l2_ExtremEnd); - } - if ( $l2_ExtremStart < $ExtremStart ){ - $ExtremStart = $l2_ExtremStart; - } - if ($l2_ExtremEnd > $ExtremEnd ){ - $ExtremEnd = $l2_ExtremEnd; - } - } - } - - - my $gene_feature=$hash_omniscient->{'level1'}{'gene'}{$gene_id}; - if ($gene_feature->start != $ExtremStart and $ExtremStart != 1000000000000){ - $gene_feature->start($ExtremStart); - } - if($gene_feature->end != $ExtremEnd and $ExtremEnd != 0){ - $gene_feature->end($ExtremEnd); - } - } -} - -# Sort by locusID -# LocusID->typeFeature = [feature, feature, feature] -#return a hash. Key is position,tag and value is list of feature l1. The list is sorted -sub gather_and_sort_l1_by_seq_id{ - my ($omniscient) = @_; - - my %hash_sortBySeq; - foreach my $tag_level1 ( keys %{$omniscient->{'level1'}}){ - foreach my $level1_id ( keys %{$omniscient->{'level1'}{$tag_level1}}){ - my $position=$omniscient->{'level1'}{$tag_level1}{$level1_id}->seq_id; - push (@{$hash_sortBySeq{$position}{$tag_level1}}, $omniscient->{'level1'}{$tag_level1}{$level1_id}); - } - foreach my $position_l1 (keys %hash_sortBySeq){ - @{$hash_sortBySeq{$position_l1}{$tag_level1}} = sort { ncmp ($a->start.$a->end.$a->_tag_value('ID'), $b->start.$b->end.$b->_tag_value('ID') ) } @{$hash_sortBySeq{$position_l1}{$tag_level1}}; - } - } - return \%hash_sortBySeq; -} - - -# Sort by locusID and strand -# LocusID_strand->typeFeature = [feature, feature, feature] -# return a hash. Key is position,tag and value is list of feature l1. The list is sorted -sub gather_and_sort_l1_by_seq_id_and_strand{ - my ($omniscient) = @_; - - my %hash_sortBySeq; - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - foreach my $level1_id (keys %{$omniscient->{'level1'}{$tag_level1}}){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{$level1_id}; - my $position_l1=$level1_feature->seq_id.$level1_feature->strand; - push (@{$hash_sortBySeq{$position_l1}{$tag_level1}}, $level1_feature); - } - foreach my $position_l1 (keys %hash_sortBySeq){ - @{$hash_sortBySeq{$position_l1}{$tag_level1}} = sort { ncmp ($a->start.$a->end.$a->_tag_value('ID'), $b->start.$b->end.$b->_tag_value('ID') ) } @{$hash_sortBySeq{$position_l1}{$tag_level1}}; - } - } - return \%hash_sortBySeq; -} - -# @Purpose: Create a hash of level1 location (location = [level1ID,start,end]) sorted by feature type and localisation. A localisation is the sequence_id appended by the strand -# @input: 1 => hash omniscient -# @output: 1 => hash => LocusID->typeFeature =[ID,start,end] -sub gather_and_sort_l1_location_by_seq_id_and_strand{ - my ($omniscient) = @_; - - my %hash_sortBySeq; - - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - foreach my $level1_id (keys %{$omniscient->{'level1'}{$tag_level1}}){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{$level1_id}; - my $ID = $level1_feature->_tag_value('ID'); - my $strand="+"; - if($level1_feature->strand != 1){$strand = "-";} - my $position_l1=$level1_feature->seq_id."".$strand; - push ( @{$hash_sortBySeq{$position_l1}{$tag_level1}}, [$ID, int($level1_feature->start), int($level1_feature->end)] ); - } - - foreach my $position_l1 (keys %hash_sortBySeq){ - @{$hash_sortBySeq{$position_l1}{$tag_level1}} = sort { ncmp ( $a->[1], $b->[1] ) } @{$hash_sortBySeq{$position_l1}{$tag_level1}}; - } - } - return \%hash_sortBySeq; -} - -# @Purpose: Create a hash of level1 location (location = [level1ID,start,end]) sorted by feature type and localisation. A localisation is the sequence_id -# @input: 1 => hash omniscient -# @output: 1 => hash => LocusID->typeFeature =[ID,start,end] -sub gather_and_sort_l1_location_by_seq_id{ - my ($omniscient) = @_; - - my %hash_sortBySeq; - - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - foreach my $level1_id (keys %{$omniscient->{'level1'}{$tag_level1}}){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{$level1_id}; - my $ID = $level1_feature->_tag_value('ID'); - my $position_l1=$level1_feature->seq_id; - push ( @{$hash_sortBySeq{$position_l1}{$tag_level1}}, [$ID, int($level1_feature->start), int($level1_feature->end)] ); - } - - foreach my $position_l1 (keys %hash_sortBySeq){ - @{$hash_sortBySeq{$position_l1}{$tag_level1}} = sort { ncmp ( $a->[1], $b->[1] ) } @{$hash_sortBySeq{$position_l1}{$tag_level1}}; - } - } - return \%hash_sortBySeq; -} - -# @Purpose: get position of the most left and right cds positions -# @input: 2 => hash(omniscient hash), [l1 feature /or/ l1 id] -# @output: (integer, integer) -sub get_most_right_left_cds_positions { - my ($omniscient, $l1_feature) = @_; - - my $cds_start = undef; - my $cds_end = undef; - my $gene_id=undef; - if (ref($l1_feature) =~ "::"){ - $gene_id = lc($l1_feature->_tag_value('ID')); - } - else{ - $gene_id=lc($l1_feature); - } - - foreach my $tag_l1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys ($omniscient, ('level1', $tag_l1, $gene_id) ) ){ - - # == LEVEL 2 == # - foreach my $tag_l2 (keys %{$omniscient->{'level2'}}){ - if (exists_keys ($omniscient, ('level2', $tag_l2, $gene_id) ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$tag_l2}{$gene_id}}) { - # == LEVEL 3 == # - my $l2_id = lc($feature_l2->_tag_value('ID') ); - if (exists_keys ($omniscient, ('level3', 'cds', $l2_id ) ) ){ - - my @sorted_cds = sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{'cds'}{$l2_id}}; - my $local_cds_start = $omniscient->{'level3'}{'cds'}{lc($l2_id)}[0]->start; #first element of the array - my $local_cds_end = $omniscient->{'level3'}{'cds'}{$l2_id}[$#{$omniscient->{'level3'}{'cds'}{$l2_id}}]->end; #last element of the array - - if ( ! $cds_start){ - $cds_start = $local_cds_start; - } - elsif( $local_cds_start < $cds_start){ - $cds_start = $local_cds_start; - } - if ( ! $cds_end){ - $cds_end = $local_cds_end; - } - elsif( $local_cds_end > $cds_end){ - $cds_end = $local_cds_end; - } - } - } - } - } - } - } - return $cds_start, $cds_end; -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || MANIPULATION from record || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# @Purpose: Check the ID of a record and fix it if ducplicate. -# @input: 3 => id from level1 hash1, hash1 omnicient, hash1 of whole_IDs -# @output: l1_id -# /!\ Not tested yest -sub check_record_ids { - my ($l1_id1, $hash_omniscient, $hash_whole_IDs, $hash_whole_IDs2, $miscCount)=@_; - - my $l1_id_final = $l1_id1; - ################# - # == LEVEL 1 == # - ################# - my $id_l1 = lc($l1_id1); - foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # tag_l1 = gene or repeat etc... - if (exists_keys ($hash_omniscient, ('level1', $tag_l1, $id_l1) ) ){ - - my $new_parent=undef; - my $uID = $hash_omniscient->{'level1'}{$tag_l1}{$id_l1}->_tag_value('ID'); - - if ( exists ( $hash_whole_IDs->{$id_l1} ) ){ - my $feature = $hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - $uID = replace_by_uniq_ID( $feature, $hash_whole_IDs2, $hash_whole_IDs, $miscCount); - $hash_omniscient->{'level1'}{$tag_l1}{lc($uID)} = delete $hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; # save feature level1 - $new_parent=1; - $l1_id_final = $uID ; - } - else{ - $hash_whole_IDs2->{lc($uID)}=$uID; - } - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # tag_l2 = mrna or mirna or ncrna or trna etc... - - if (exists_keys ($hash_omniscient, ('level2', $tag_l2, $id_l1) ) ){ #Non present in hash2, we create a list with one element - - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - - my $new_parent_l2=undef; - if($new_parent){ - create_or_replace_tag($feature_l2, 'Parent', $uID); - } - - my $id_l2 = $feature_l2->_tag_value('ID'); - my $uID_l2 = undef; - if ( exists ( $hash_whole_IDs->{lc($id_l2)} ) ){ - $uID_l2 = replace_by_uniq_ID($feature_l2, $hash_whole_IDs2, $hash_whole_IDs, $miscCount); - $new_parent_l2=1; - } - else{$hash_whole_IDs2->{lc($id_l2)} = $id_l2;} - - ################# - # == LEVEL 3 == # - ################# - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - - if (exists_keys ($hash_omniscient, ('level3', $tag_l3, lc($id_l2) ) ) ){ - - foreach my $feature_l3 ( @{$hash_omniscient->{'level3'}{$tag_l3}{lc($id_l2)}}) { - - if($new_parent_l2){ - create_or_replace_tag($feature_l3, 'Parent', $uID_l2); - } - - my $id_l3 = $feature_l3->_tag_value('ID'); - my $uID_l3 = undef; - if ( exists ( $hash_whole_IDs->{lc($id_l3)} ) ){ - $uID_l3 = replace_by_uniq_ID($feature_l3, $hash_whole_IDs2, $hash_whole_IDs, $miscCount); - } - else{$hash_whole_IDs2->{lc($id_l3)} = $id_l3;} - } - if($new_parent_l2){ - $hash_omniscient->{'level3'}{$tag_l3}{lc($uID_l2)} = delete $hash_omniscient->{'level3'}{$tag_l3}{lc($id_l2)}; # save feature level1 - } - } - } - } - if($new_parent){ - $hash_omniscient->{'level2'}{$tag_l2}{lc($uID)} = delete $hash_omniscient->{'level2'}{$tag_l2}{$id_l1}; # save feature level1 - } - } - } - } - } - return $l1_id_final; -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || MANIPULATION on feature || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# @Purpose: Check if has a l3 subfeature type. -# @input: 2 => hash(omniscient hash), [l1 feature /or/ l1 id] -# @output: bolean -sub l1_has_l3_type { - my ($omniscient, $l1_feature, $type, $part_match) = @_; - - my $full_match=1; - if($part_match){ - $full_match=undef; - } - - - my $gene_id=undef; - if (ref($l1_feature) =~ "::"){ - $gene_id = lc($l1_feature->_tag_value('ID')); - } - else{ - $gene_id=lc($l1_feature); - } - - foreach my $tag_l1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys ($omniscient, ('level1', $tag_l1, $gene_id) ) ){ - - # == LEVEL 2 == # - foreach my $tag_l2 (keys %{$omniscient->{'level2'}}){ - - if (exists_keys ($omniscient, ('level2', $tag_l2, $gene_id) ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$tag_l2}{$gene_id}}) { - - # == LEVEL 3 == # - if($full_match){ - if (exists_keys ($omniscient, ('level3', $type, lc($feature_l2->_tag_value('ID') ) ) ) ){ - return 1 - } - } - else{ - my $level2_ID = lc($feature_l2->_tag_value('ID')); - foreach my $ptag_l3 (keys %{$omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if (lc($ptag_l3) =~ lc($type)){ - if( exists_keys($omniscient, ('level3', $ptag_l3, $level2_ID)) ){ - return 1; - } - } - } - } - } - } - } - } - } - return 0; -} - -# @Purpose: Check if has a cds l3 subfeature. -# @input: 2 => hash(omniscient hash), l2 feature -# @output: bolean -sub l2_has_cds { - my ($omniscient, $l2_feature) = @_; - - my $gene_id = lc($l2_feature->_tag_value('Parent')); - - # == LEVEL 2 == # - foreach my $tag_l2 (keys %{$omniscient->{'level2'}}){ - if (exists_keys ($omniscient, ('level2', $tag_l2, $gene_id) ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$tag_l2}{$gene_id}}) { - # == LEVEL 3 == # - if (exists_keys ($omniscient, ('level3', 'cds', lc($feature_l2->_tag_value('ID') ) ) ) ){ - return 1 - } - } - } - } - return 0; -} - -# @Purpose: Check if has a cds l3 subfeature. -# @input: 2 => hash(omniscient hash), l2 feature -# @output: bolean -sub get_cds_from_l2 { - my ($omniscient, $l2_feature) = @_; - - my $gene_id = lc($l2_feature->_tag_value('Parent')); - - # == LEVEL 2 == # - foreach my $tag_l2 (keys %{$omniscient->{'level2'}}){ - if (exists_keys ($omniscient, ('level2', $tag_l2, $gene_id) ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$tag_l2}{$gene_id}}) { - # == LEVEL 3 == # - my $l2_id = lc($feature_l2->_tag_value('ID') ); - if (exists_keys ($omniscient, ('level3', 'cds', $l2_id) ) ){ - my @sorted_cds = sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{'cds'}{$l2_id}}; - return \@sorted_cds; - } - } - } - } - return undef; -} - -# @Purpose: check if two features overlap. -# @input: 2 => l1 feature, l2 feature -# @output: bolean -sub check_if_feature_overlap{ - my($feature1, $feature2)=@_; - my $result=undef; - if (($feature1->start <= $feature2->end) and ($feature1->end >= $feature2->start)){ - $result="true"; - } - -return $result -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || MANIPULATION on feature List || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || MANIPULATION from id || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || Info from id/feature || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -# @Purpose: check if it is a single exon gene -# @input: 2 => hash(omniscient hash), [l1 feature /or/ l1 id] -# @output: bolean -sub is_single_exon_gene { - my ($omniscient, $l1_feature) = @_; - - my $gene_id = undef; - if (ref($l1_feature) =~ "::"){ - $gene_id = lc($l1_feature->_tag_value('ID')); - } - else{ - $gene_id=lc($l1_feature); - } - - foreach my $tag_l1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys ($omniscient, ('level1', $tag_l1, $gene_id) ) ){ - - # == LEVEL 2 == # - foreach my $tag_l2 (keys %{$omniscient->{'level2'}}){ - if (exists_keys ($omniscient, ('level2', $tag_l2, $gene_id) ) ){ - foreach my $feature_l2 ( @{$omniscient->{'level2'}{$tag_l2}{$gene_id}}) { - # == LEVEL 3 == # - my $l2_id = lc($feature_l2->_tag_value('ID') ); - if (exists_keys ($omniscient, ('level3', 'exon', $l2_id ) ) ){ - if (scalar @{$omniscient->{'level3'}{'exon'}{$l2_id}} == 1){ - return 1; - } - else{return 0;} - } - else{ - warn "WARNING No exon available to check if it is a single exon gene\n"; - } - } - } - } - } - } -} - -1; diff --git a/annotation/NBIS/GFF3/Omniscient/Statistics.pm b/annotation/NBIS/GFF3/Omniscient/Statistics.pm deleted file mode 100644 index 257b29ae7..000000000 --- a/annotation/NBIS/GFF3/Omniscient/Statistics.pm +++ /dev/null @@ -1,699 +0,0 @@ -#!/usr/bin/perl -w - -package NBIS::GFF3::Omniscient::Statistics; - -use strict; -use warnings; -use Bio::Tools::GFF; -use Bio::SeqIO;; -use NBIS::GFF3::Omniscient::OmniscientTools; -use Exporter; -our @ISA = qw(Exporter); -our @EXPORT = qw( gff3_statistics ); - -sub import { - NBIS::GFF3::Omniscient::Statistics->export_to_level(1, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient::OmniscientI; (normal case) - NBIS::GFF3::Omniscient::Statistics->export_to_level(2, @_); # to be able to load the EXPORT functions by calling NBIS::GFF3::Omniscient; -} - -=head1 SYNOPSIS - - - -=head1 DESCRIPTION - - A library to get statistics from an omniscient hash of a gff3 file - - - We create a complex hash of hash containing all information needeed. - The data are scaned from level 2 to level 1 and 3. We do that because different type of feature from level 2 can have same type of feature of level1. (e.g: mRNA => gene and tRNA => gene). - So the structure of the hash created is the following: - {type_feature_level2}{'level'}{type_feature_level}{'flag'}='value'; - 'level' can be level1, level2 or level accordingly, allow to go all over the data for printing by driving the data form level1 to level3. - 'flag' correspond to the type of information that has been saved in 'value' - -=cut - -# Calculate information necessary going through the omniscient only once -# return a lisf of sub_list - Sub list contain all inforamtion level1,2,3 of all feature linked to a type of feature of level 2. -# (eg: Gene(l1),mRNA(l2),cds(l3),exon(l3), where the type of level1 and level3 feature are only those linked to mRNA.) -sub gff3_statistics { - - my ($hash_omniscient, $genome) = @_ ; - - my @result_list; - my %distribution; - #my $out = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - - #check genome size - my $genomeSize=undef; - if($genome){ - if( $genome =~ /^[0-9]+$/){ #check if it's a number - $genomeSize=$genome; - } - elsif($genome){ - my $seqio = Bio::SeqIO->new(-file => $genome, '-format' => 'Fasta'); - while(my $seq = $seqio->next_seq) { - my $string = $seq->seq; - $genomeSize += length($string); - } - } - printf("%-45s%d%s", "Total sequence length", $genomeSize,"\n"); - } - - # get nb of each feature in omniscient; - my %all_info; - my %extra_info; #For info not sorted by Level. - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level2'}{$tag_l2}}){ - my $one_f2 = $hash_omniscient->{'level2'}{$tag_l2}{$id_l1}[0]; - - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || FEATURE LEVEL1 || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - - my $feature_l1=undef; - - # retrieve the l1 tag - my $tag_l1; - foreach my $tag_level1 (keys %{$hash_omniscient->{'level1'}}){ - if (exists ($hash_omniscient->{'level1'}{$tag_level1}{$id_l1})){ - $feature_l1=$hash_omniscient->{'level1'}{$tag_level1}{$id_l1}; - $tag_l1=$tag_level1; - last; - } - } - if(! $feature_l1){print "Problem ! We didnt retrieve the level1 feature with id $id_l1\n";exit;} - - #count number of feature - $all_info{$tag_l2}{'level1'}{$tag_l1}{'nb_feat'}++; - - #compute feature size - my $sizeFeature=($feature_l1->end-$feature_l1->start)+1; - $all_info{$tag_l2}{'level1'}{$tag_l1}{'size_feat'}+=$sizeFeature; - - #create distribution list - push @{$all_info{$tag_l2}{'level1'}{$tag_l1}{'distribution'}}, $sizeFeature; - - # grab longest - if ((! $all_info{$tag_l2}{'level1'}{$tag_l1}{'longest'}) or ($all_info{$tag_l2}{'level1'}{$tag_l1}{'longest'} < $sizeFeature)){ - $all_info{$tag_l2}{'level1'}{$tag_l1}{'longest'}=$sizeFeature; - } - - # grab shorter - if ((! $all_info{$tag_l2}{'level1'}{$tag_l1}{'shortest'}) or ($all_info{$tag_l2}{'level1'}{$tag_l1}{'shortest'} > $sizeFeature)){ - $all_info{$tag_l2}{'level1'}{$tag_l1}{'shortest'}=$sizeFeature; - } - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || FEATURE LEVEL2 || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - my $counterL2_match=-1; - my $All_l2_single=1; - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}} ){ - #print $feature_l2->gff_string()."\n"; - #count number of feature - $all_info{$tag_l2}{'level2'}{$tag_l2}{'nb_feat'}++; - - #compute feature size - my $sizeFeature=($feature_l2->end-$feature_l2->start)+1; - $all_info{$tag_l2}{'level2'}{$tag_l2}{'size_feat'}+=$sizeFeature; - - #create distribution list - push @{$all_info{$tag_l2}{'level2'}{$tag_l2}{'distribution'}}, $sizeFeature; - - # grab longest - if ((! $all_info{$tag_l2}{'level2'}{$tag_l2}{'longest'}) or ($all_info{$tag_l2}{'level2'}{$tag_l2}{'longest'} < $sizeFeature)){ - $all_info{$tag_l2}{'level2'}{$tag_l2}{'longest'}=$sizeFeature; - } - # grab shorter - if ((! $all_info{$tag_l2}{'level2'}{$tag_l2}{'shortest'}) or ($all_info{$tag_l2}{'level2'}{$tag_l2}{'shortest'} > $sizeFeature)){ - $all_info{$tag_l2}{'level2'}{$tag_l2}{'shortest'}=$sizeFeature; - } - - ######################################################## - # Special case match match_part => calcul the introns - ######################################################## - if($tag_l2 =~ "match"){ - my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}; - #if(! exists ($all_info{$tag_l2}{'level2'}{'intron'}{'nb_feat'})) {$all_info{$tag_l2}{'level2'}{'intron'}{'nb_feat'}=0;} - #if(! exists ($all_info{$tag_l2}{'level2'}{'intron'}{'size_feat'})) {$all_info{$tag_l2}{'level2'}{'intron'}{'size_feat'}=0;} - my $indexLastL2 = $#{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}; - $counterL2_match++; - - if($counterL2_match > 0 and $counterL2_match <= $indexLastL2){ - my $intronSize= $sortedList[$counterL2_match]->start - $sortedList[$counterL2_match-1]->end; - - #compute feature size - $all_info{$tag_l2}{'level2'}{'intron'}{'size_feat'}+=$intronSize; - - #create distribution list - push @{$all_info{$tag_l2}{'level2'}{'intron'}{'distribution'}}, $sizeFeature; - - # grab longest - if ((! $all_info{$tag_l2}{'level2'}{'intron'}{'longest'}) or ($all_info{$tag_l2}{'level2'}{'intron'}{'longest'} < $intronSize)){ - $all_info{$tag_l2}{'level2'}{'intron'}{'longest'}=$intronSize; - } - # grab shorter - if ((! $all_info{$tag_l2}{'level2'}{'intron'}{'shortest'}) or ($all_info{$tag_l2}{'level2'}{'intron'}{'shortest'} > $intronSize)){ - $all_info{$tag_l2}{'level2'}{'intron'}{'shortest'}=$intronSize; - } - #Count number - $all_info{$tag_l2}{'level2'}{'intron'}{'nb_feat'}+=1; - - } - - } - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || FEATURE LEVEL3 || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - my $utr3 = undef; - my $utr5 = undef; - my $id_l2=lc($feature_l2->_tag_value('ID')); - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - - if(exists ($hash_omniscient->{'level3'}{$tag_l3}{$id_l2})){ - my $sizeMultiFeat=0; - my $counterL3=-1; - my $indexLast = $#{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}; - - my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}; - foreach my $feature_l3 ( @sortedList ){ - - #count number feature of tag_l3 type - $counterL3++; - - #------------------------------------------------- - # Manage Introns - #------------------------------------------------- - # from the second intron to the last (from index 1 to last index of the table sortedList) - # We go inside this loop only if we have more than 1 feature. - if($counterL3 > 0 and $counterL3 <= $indexLast){ - my $intronSize = $sortedList[$counterL3]->start - $sortedList[$counterL3-1]->end; - - #compute feature size - $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'size_feat'}+=$intronSize; - - #create distribution list - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'distribution'}}, $sizeFeature; - - # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'} < $intronSize)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'longest'}=$intronSize; - } - - # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'} > $intronSize)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'shortest'}=$intronSize; - } - - #Count number - $all_info{$tag_l2}{'level3'}{$tag_l3}{'intron'}{'nb_feat'}+=1; - } - - #compute cumulative feature size - my $sizeFeature=($feature_l3->end-$feature_l3->start)+1; - $all_info{$tag_l2}{'level3'}{$tag_l3}{'size_feat'}+=$sizeFeature; - - #------------------------------------------------- - # MANAGE SPREAD FEATURES (multi exon features) - #------------------------------------------------- - if(($tag_l3 =~ /cds/) or ($tag_l3 =~ /utr/)){ - $sizeMultiFeat+=$sizeFeature; - $all_info{$tag_l2}{'level3'}{$tag_l3}{'exon'}{'nb_feat'}++; - - #### MANAGE piece of multi exon features (spread features) - - #create distribution list of multifeature piece - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'distribution'}}, $sizeFeature; - - # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'} < $sizeFeature)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'longest'}=$sizeFeature; - } - # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'} > $sizeFeature)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'piece'}{'shortest'}=$sizeFeature; - } - } - #------------------------------------------------- - # MANAGE single FEATURES (multi exon features) - #------------------------------------------------- - else{ - #count number of feature - $all_info{$tag_l2}{'level3'}{$tag_l3}{'nb_feat'}++; - - #create distribution list of multifeature piece - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'distribution'}}, $sizeFeature; - - # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'} < $sizeFeature)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}=$sizeFeature; - } - # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'} > $sizeFeature)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}=$sizeFeature; - } - } - #################### - #mange utr per mRNA - if ($tag_l3 =~ /three_prime_utr/){ - $utr3=1; - } - if ($tag_l3 =~ /five_prime_utr/){ - $utr5=1; - } - }# END FOREACH L3 - - #---------------------------------------- - # NOW TAKE CARE OF MULTIFEATURE AND L2 - #in that case the feature was split in several peaces that have been glue together - if (($tag_l3 =~ /utr/) or ($tag_l3 =~ /cds/)){ - #count number of feature - $all_info{$tag_l2}{'level3'}{$tag_l3}{'nb_feat'}++; - - #create distribution list - push @{$all_info{$tag_l2}{'level3'}{$tag_l3}{'distribution'}}, $sizeMultiFeat; - - # grab longest - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'} < $sizeMultiFeat)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'longest'}=$sizeMultiFeat; - } - - # grab shorter - if ((! $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}) or ($all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'} > $sizeMultiFeat)){ - $all_info{$tag_l2}{'level3'}{$tag_l3}{'shortest'}=$sizeMultiFeat; - } - } - - if ($tag_l3 =~ /exon/){ - if ($indexLast == 0) { - # body... - $extra_info{$tag_l2}{'level2'}{$tag_l2}{'single'}++; - } - else{ - $All_l2_single=undef; - } - } - } - }# END all feature level 3 - - # 1) Manage UTR both side - if ($utr3 and $utr5){ - $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_both_side'}++; - $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_at_least_one_side'}++; - } # 2) Manage UTR at least one side - elsif ($utr3 or $utr5){ - $all_info{$tag_l2}{'level2'}{$tag_l2}{'utr_at_least_one_side'}++; - } - }# END all feature level 2 - if($All_l2_single){ - $extra_info{$tag_l2}{'level1'}{$tag_l1}{'single'}++; - #print "avant: ".$feature_l1->gff_string()."\n"; - #print $extra_info{$tag_l2}{'level1'}{$tag_l1}{'single'}." ".$extra_info{$tag_l2}{'level2'}{$tag_l2}{'single'}; - #if ($extra_info{$tag_l2}{'level1'}{$tag_l1}{'single'} != $extra_info{$tag_l2}{'level2'}{$tag_l2}{'single'} ){ - # print $feature_l1->gff_string()."\n";exit; - #} - } - } - } - - # create the list of sentences that resume the results - foreach my $type (keys %all_info){ - my $hashType = $all_info{$type}; - my @result; - - my $info_number = _info_number($hashType); - push @result, @$info_number; - - if(exists ($extra_info{$type})){ - my $info_single = _info_single($extra_info{$type}); - push @result, @$info_single; - } - - my $info_mean_per = _info_mean_per($hashType); - push @result, @$info_mean_per; - - my $info_length = _info_length($hashType); - push @result, @$info_length; - - my $info_mean_length = _info_mean_length($hashType); - push @result, @$info_mean_length; - - if($genome){ - my $info_coverage = _info_coverage($hashType, $genomeSize); - push @result, @$info_coverage; - } - - my $info_longest = _info_longest($hashType); - push @result, @$info_longest; - - my $info_shortest = _info_shortest($hashType); - push @result, @$info_shortest; - - push @result_list, \@result; - - #extract distribution values - - foreach my $level (keys %{$all_info{$type}} ) { - - foreach my $tag ( keys %{$all_info{$type}{$level}} ) { - - if( exists_keys (\%all_info,($type, $level, $tag, 'distribution')) ){ - $distribution{$type}{$level}{$tag}{'whole'} = delete $all_info{$type}{$level}{$tag}{'distribution'}; - } - if( exists_keys (\%all_info,($type, $level, $tag, 'piece', 'distribution') ) ){ - $distribution{$type}{$level}{$tag}{'piece'} = delete $all_info{$type}{$level}{$tag}{'piece'}{'distribution'}; - } - } - } - } - -return \@result_list, \%distribution; -} - -##### -# Give info about single exon gene and mRNA -sub _info_single{ - - my ($all_info) = @_ ; - my @resu; - - #print level1 - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - push @resu, sprintf("%-45s%d%s", "Number of single exon $tag_l1",, $all_info->{'level1'}{$tag_l1}{'single'},"\n"); - } - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - push @resu, sprintf("%-45s%d%s", "Number of single exon $tag_l2", $all_info->{'level2'}{$tag_l2}{'single'},"\n"); - } - - return \@resu; -} - -##### -# Give info about number of feature of each type -sub _info_number { - - my ($all_info) = @_ ; - my @resu; - my $there_is_utr=undef; - - #print level1 - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - push @resu, sprintf("%-45s%d%s", "Number of $tag_l1"."s", $all_info->{'level1'}{$tag_l1}{'nb_feat'},"\n"); - } - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - push @resu, sprintf("%-45s%d%s", "Number of $tag_l2"."s", $all_info->{'level2'}{$tag_l2}{'nb_feat'},"\n"); - #manage utr both side - if(exists ($all_info->{'level2'}{$tag_l2}{'utr_both_side'})){ - push @resu, sprintf("%-45s%d%s", "Number of mrnas with utr both sides", $all_info->{'level2'}{$tag_l2}{'utr_both_side'},"\n"); - } - #manage utr both side - if(exists ($all_info->{'level2'}{$tag_l2}{'utr_at_least_one_side'})){ - push @resu, sprintf("%-45s%d%s", "Number of mrnas with at least one utr", $all_info->{'level2'}{$tag_l2}{'utr_at_least_one_side'},"\n"); - } - } - - #print level3 - - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - push @resu, sprintf("%-45s%d%s", "Number of $tag_l3"."s", $all_info->{'level3'}{$tag_l3}{'nb_feat'},"\n"); - } - - #print level3 - exon case - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( exists ($all_info->{'level3'}{$tag_l3}{'exon'} )) { - push @resu, sprintf("%-45s%d%s", "Number of exon in $tag_l3", $all_info->{'level3'}{$tag_l3}{'exon'}{'nb_feat'},"\n"); - } - } - #print level3 - intron case - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( exists ($all_info->{'level3'}{$tag_l3}{'intron'} )) { - push @resu, sprintf("%-45s%d%s", "Number of intron in $tag_l3", $all_info->{'level3'}{$tag_l3}{'intron'}{'nb_feat'},"\n"); - } - } - - return \@resu; -} - -############# -# Give info about shortest feature of each type -sub _info_shortest { - - my ($all_info) = @_ ; - my @resu; - - #print level1 - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - push @resu, sprintf("%-45s%d%s", "Shortest $tag_l1"."s", $all_info->{'level1'}{$tag_l1}{'shortest'},"\n"); - } - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - push @resu, sprintf("%-45s%d%s", "Shortest $tag_l2"."s", $all_info->{'level2'}{$tag_l2}{'shortest'},"\n"); - } - - #print level3 - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( ! exists($all_info->{'level3'}{$tag_l3}{'shortest'}) or $all_info->{'level3'}{$tag_l3}{'shortest'} == 0 ) { - print "No shortest for $tag_l3\n"; - } - else{ - push @resu, sprintf("%-45s%d%s", "Shortest $tag_l3"."s", $all_info->{'level3'}{$tag_l3}{'shortest'},"\n"); - } - } - - #print level3 - spread feature cases - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( exists ($all_info->{'level3'}{$tag_l3}{'piece'} )) { - push @resu, sprintf("%-45s%d%s", "Shortest $tag_l3 piece", $all_info->{'level3'}{$tag_l3}{'piece'}{'shortest'},"\n"); - } - } - - #print level3 - intron - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if(exists_keys($all_info, ('level3',$tag_l3,'intron'))){ - push @resu, sprintf("%-45s%d%s", "Shortest intron into $tag_l3 part", $all_info->{'level3'}{$tag_l3}{'intron'}{'shortest'},"\n"); - } - } - - return \@resu; -} - -############# -# Give info about longest feature of each type -sub _info_longest { - - my ($all_info) = @_ ; - my @resu; - - #print level1 - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - push @resu, sprintf("%-45s%d%s", "Longest $tag_l1"."s", $all_info->{'level1'}{$tag_l1}{'longest'},"\n"); - } - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - push @resu, sprintf("%-45s%d%s", "Longest $tag_l2"."s", $all_info->{'level2'}{$tag_l2}{'longest'},"\n"); - } - - #print level3 - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( ! exists($all_info->{'level3'}{$tag_l3}{'longest'}) or $all_info->{'level3'}{$tag_l3}{'longest'} == 0 ) { - print "No longest for $tag_l3\n"; - } - else{ - push @resu, sprintf("%-45s%d%s", "Longest $tag_l3"."s", $all_info->{'level3'}{$tag_l3}{'longest'},"\n"); - } - } - - #print level3 - spread feature cases - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( exists ($all_info->{'level3'}{$tag_l3}{'piece'} )) { - push @resu, sprintf("%-45s%d%s", "Longest $tag_l3 piece", $all_info->{'level3'}{$tag_l3}{'piece'}{'longest'},"\n"); - } - } - - #print level3 - intron - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if(exists_keys($all_info, ('level3',$tag_l3,'intron'))){ - push @resu, sprintf("%-45s%d%s", "Longest intron into $tag_l3 part", $all_info->{'level3'}{$tag_l3}{'intron'}{'longest'},"\n"); - } - } - - return \@resu; -} - -############# -# Give info about number mean of feature of each type per Parent type (mRNA per gene / cds per mRNA / etc) -sub _info_mean_per { - my ($all_info) = @_ ; - my @resu; - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - my $mean= $all_info->{'level2'}{$tag_l2}{'nb_feat'}/$all_info->{'level1'}{$tag_l1}{'nb_feat'}; - push @resu, sprintf("%-45s%.1f%s", "mean $tag_l2"."s per $tag_l1", $mean,"\n"); - } - } - #print level3 - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - my $mean= $all_info->{'level3'}{$tag_l3}{'nb_feat'}/$all_info->{'level2'}{$tag_l2}{'nb_feat'}; - push @resu, sprintf("%-45s%.1f%s", "mean $tag_l3"."s per $tag_l2", $mean,"\n"); - } - } - #print level3 - spread feature cases - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( exists ($all_info->{'level3'}{$tag_l3}{'exon'} )) { - my $mean= $all_info->{'level3'}{$tag_l3}{'exon'}{'nb_feat'}/$all_info->{'level3'}{$tag_l3}{'nb_feat'}; - push @resu, sprintf("%-45s%.1f%s", "mean exons per $tag_l3", $mean,"\n"); - } - } - #print introns - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - if(exists_keys($all_info, ('level3',$tag_l3,'intron'))){ - my $mean= $all_info->{'level3'}{$tag_l3}{'intron'}{'nb_feat'}/$all_info->{'level2'}{$tag_l2}{'nb_feat'}; - push @resu, sprintf("%-45s%.1f%s", "mean introns in $tag_l3"."s per $tag_l2", $mean,"\n"); - } - } - } - return \@resu; -} - -############# -# Give info about lenght of the total of features by type -sub _info_length { - my ($all_info) = @_ ; - my @resu; - - #print level1 - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - push @resu, sprintf("%-45s%d%s", "Total $tag_l1 length", $all_info->{'level1'}{$tag_l1}{'size_feat'},"\n"); - } - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - push @resu, sprintf("%-45s%d%s", "Total $tag_l2 length", $all_info->{'level2'}{$tag_l2}{'size_feat'},"\n"); - } - - #print level3 - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - push @resu, sprintf("%-45s%d%s", "Total $tag_l3 length", $all_info->{'level3'}{$tag_l3}{'size_feat'},"\n"); - } - - #print introns - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if(exists_keys($all_info, ('level3',$tag_l3,'intron'))){ - push @resu, sprintf("%-45s%d%s", "Total intron length per $tag_l3", $all_info->{'level3'}{$tag_l3}{'intron'}{'size_feat'},"\n"); - } - } - - return \@resu; -} - -############# -# Give info about mean lenght of features by type -sub _info_mean_length { - my ($all_info) = @_ ; - my @resu; - - - #print level1 - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - my $meanl= $all_info->{'level1'}{$tag_l1}{'size_feat'}/$all_info->{'level1'}{$tag_l1}{'nb_feat'}; - push @resu, sprintf("%-45s%d%s", "mean $tag_l1 length", $meanl,"\n"); - } - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - my $size_feat = $all_info->{'level2'}{$tag_l2}{'size_feat'}; - my $nb_feat = $all_info->{'level2'}{$tag_l2}{'nb_feat'}; - - if($size_feat !=0 and $nb_feat != 0){ - my $meanl= $all_info->{'level2'}{$tag_l2}{'size_feat'}/$all_info->{'level2'}{$tag_l2}{'nb_feat'}; - push @resu, sprintf("%-45s%d%s", "mean $tag_l2 length", $meanl,"\n"); - } - else{warn "Problem in the calcul of level2 - $tag_l2 - size_feat";} - } - - #print level3 - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( ($all_info->{'level3'}{$tag_l3}{'size_feat'} == 0) or ($all_info->{'level3'}{$tag_l3}{'nb_feat'} == 0) ) { - print "No size_feat for $tag_l3\n"; - } - else{ - my $meanl= $all_info->{'level3'}{$tag_l3}{'size_feat'}/$all_info->{'level3'}{$tag_l3}{'nb_feat'}; - push @resu, sprintf("%-45s%d%s", "mean $tag_l3 length", $meanl,"\n"); - } - } - - #print level3 - multifeature cases - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if( exists ($all_info->{'level3'}{$tag_l3}{'exon'} )) { - my $meanl= $all_info->{'level3'}{$tag_l3}{'size_feat'}/$all_info->{'level3'}{$tag_l3}{'exon'}{'nb_feat'}; - push @resu, sprintf("%-45s%d%s", "mean $tag_l3 piece length", $meanl,"\n"); - } - } - - #print introns - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if(exists_keys($all_info, ('level3',$tag_l3,'intron'))){ - my $meanl= $all_info->{'level3'}{$tag_l3}{'intron'}{'size_feat'}/$all_info->{'level3'}{$tag_l3}{'intron'}{'nb_feat'}; - push @resu, sprintf("%-45s%d%s", "mean intron in $tag_l3 length", $meanl,"\n"); - } - } - - return \@resu; -} - -############# -# Give info about the features' coverage (by types) within/among the genome -sub _info_coverage { - my ($all_info, $genomeSize) = @_ ; - my @resu; - - #print level1 - foreach my $tag_l1 (sort keys %{$all_info->{'level1'}}){ - my $perc= ($all_info->{'level1'}{$tag_l1}{'size_feat'}*100)/$genomeSize; - push @resu, sprintf("%-45s%.1f%s", "% of genome covered by $tag_l1", $perc,"\n"); - } - - #print level2 - foreach my $tag_l2 (sort keys %{$all_info->{'level2'}}){ - my $perc= ($all_info->{'level2'}{$tag_l2}{'size_feat'}*100)/$genomeSize; - push @resu, sprintf("%-45s%.1f%s", "% of genome covered by $tag_l2", $perc,"\n"); - } - - #print level3 - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - my $perc= ($all_info->{'level3'}{$tag_l3}{'size_feat'}*100)/$genomeSize; - push @resu, sprintf("%-45s%.1f%s", "% of genome covered by $tag_l3", $perc,"\n"); - } - - #print level3 - foreach my $tag_l3 (sort keys %{$all_info->{'level3'}}){ - if(exists_keys($all_info, ('level3',$tag_l3,'intron'))){ - my $perc= ($all_info->{'level3'}{$tag_l3}{'intron'}{'size_feat'}*100)/$genomeSize; - push @resu, sprintf("%-45s%.1f%s", "% of genome covered by intron from $tag_l3", $perc,"\n"); - } - } - - return \@resu; -} - - -1; diff --git a/annotation/NBIS/GFF3/Ontology/sofa_2_5_3.obo b/annotation/NBIS/GFF3/Ontology/sofa_2_5_3.obo deleted file mode 100644 index 5cf5c6525..000000000 --- a/annotation/NBIS/GFF3/Ontology/sofa_2_5_3.obo +++ /dev/null @@ -1,3057 +0,0 @@ -format-version: 1.2 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: DBVAR "database of genomic structural variation" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: dbvar "DBVAR" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -synonymtypedef: VAR "variant annotation term" -ontology: so-xp/subsets/SOFA -default-namespace: sequence - -[Term] -id: SO:0000000 -name: Sequence_Ontology -namespace: sequence -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -namespace: sequence -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000699 ! junction - -[Term] -id: SO:0000004 -name: interior_coding_exon -namespace: sequence -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -namespace: sequence -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -namespace: sequence -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -namespace: sequence -def: "One of a pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "mate pair" EXACT [] -synonym: "read-pair" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000149 ! contig -relationship: part_of SO:0001790 ! paired_end_fragment - -[Term] -id: SO:0000013 -name: scRNA -namespace: sequence -def: "A small non coding RNA sequence, present in the cytoplasm." [SO:ke] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000038 -name: match_set -namespace: sequence -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -namespace: sequence -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -namespace: sequence -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -namespace: sequence -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -namespace: sequence -def: "A binding site that, of a nucleotide molecule, that interacts selectively and non-covalently with polypeptide residues of a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -namespace: sequence -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -namespace: sequence -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -namespace: sequence -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -namespace: sequence -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -namespace: sequence -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -namespace: sequence -def: "Any extent of continuous biological sequence." [LAMHDI:mb, SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -namespace: sequence -def: "An oligo to which new deoxyribonucleotides can be added by DNA polymerase." [SO:ke] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -namespace: sequence -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_cytosine -namespace: sequence -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -synonym: "methylated_C" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -namespace: sequence -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -namespace: sequence -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -namespace: sequence -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -namespace: sequence -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -namespace: sequence -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -namespace: sequence -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -namespace: sequence -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -namespace: sequence -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -namespace: sequence -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -namespace: sequence -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -namespace: sequence -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_adenine -namespace: sequence -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000162 -name: splice_site -namespace: sequence -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -namespace: sequence -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -namespace: sequence -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -namespace: sequence -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -namespace: sequence -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -namespace: sequence -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -namespace: sequence -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -namespace: sequence -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -namespace: sequence -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -namespace: sequence -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -namespace: sequence -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -namespace: sequence -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -namespace: sequence -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -namespace: sequence -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -namespace: sequence -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -namespace: sequence -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -namespace: sequence -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -namespace: sequence -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -namespace: sequence -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -namespace: sequence -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -namespace: sequence -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -namespace: sequence -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -namespace: sequence -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -namespace: sequence -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -namespace: sequence -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -namespace: sequence -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -namespace: sequence -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -namespace: sequence -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -namespace: sequence -alt_id: SO:0000649 -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene (or the product of other non coding RNA genes. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (usually via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:11081512, PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -synonym: "small temporal RNA" EXACT [] -synonym: "stRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: part_of SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000289 -name: microsatellite -namespace: sequence -def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -namespace: sequence -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -namespace: sequence -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -namespace: sequence -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_DNA_base -namespace: sequence -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -namespace: sequence -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -namespace: sequence -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -namespace: sequence -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -namespace: sequence -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -synonym: "transcription_start_site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -namespace: sequence -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -synonym: "coding_sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -namespace: sequence -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -namespace: sequence -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -namespace: sequence -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -namespace: sequence -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -namespace: sequence -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -namespace: sequence -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -namespace: sequence -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -namespace: sequence -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -namespace: sequence -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -namespace: sequence -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -namespace: sequence -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -namespace: sequence -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -namespace: sequence -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -namespace: sequence -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -namespace: sequence -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -namespace: sequence -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -namespace: sequence -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -namespace: sequence -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -namespace: sequence -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -namespace: sequence -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -namespace: sequence -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -namespace: sequence -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -namespace: sequence -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -namespace: sequence -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -namespace: sequence -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -namespace: sequence -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -namespace: sequence -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -namespace: sequence -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -namespace: sequence -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -namespace: sequence -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -namespace: sequence -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -namespace: sequence -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -namespace: sequence -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -namespace: sequence -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -namespace: sequence -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -namespace: sequence -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -namespace: sequence -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -namespace: sequence -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -namespace: sequence -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -namespace: sequence -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -namespace: sequence -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -namespace: sequence -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -namespace: sequence -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -namespace: sequence -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -namespace: sequence -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -namespace: sequence -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -namespace: sequence -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -namespace: sequence -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -namespace: sequence -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -namespace: sequence -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -namespace: sequence -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -namespace: sequence -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -namespace: sequence -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -namespace: sequence -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -namespace: sequence -def: "A non-functional descendant of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -namespace: sequence -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -namespace: sequence -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -namespace: sequence -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -namespace: sequence -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -namespace: sequence -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -namespace: sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -namespace: sequence -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -namespace: sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -namespace: sequence -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -namespace: sequence -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -namespace: sequence -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -namespace: sequence -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -namespace: sequence -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -namespace: sequence -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -namespace: sequence -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -namespace: sequence -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -namespace: sequence -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -namespace: sequence -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -namespace: sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -namespace: sequence -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -namespace: sequence -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -namespace: sequence -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -namespace: sequence -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -namespace: sequence -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -namespace: sequence -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -namespace: sequence -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -namespace: sequence -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -namespace: sequence -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -namespace: sequence -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -namespace: sequence -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -namespace: sequence -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -namespace: sequence -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -namespace: sequence -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -namespace: sequence -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0001411 ! biological_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -namespace: sequence -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -namespace: sequence -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -namespace: sequence -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -namespace: sequence -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -namespace: sequence -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -namespace: sequence -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -namespace: sequence -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -namespace: sequence -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -namespace: sequence -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -namespace: sequence -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -namespace: sequence -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -namespace: sequence -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -namespace: sequence -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -namespace: sequence -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -namespace: sequence -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -namespace: sequence -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -namespace: sequence -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -namespace: sequence -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -namespace: sequence -def: "Two or more adjacent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -namespace: sequence -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -namespace: sequence -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -namespace: sequence -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -namespace: sequence -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -namespace: sequence -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -namespace: sequence -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -namespace: sequence -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the EMBL, DDBJ, GenBank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000727 -name: CRM -namespace: sequence -def: "A regulatory region where transcription factor binding sites clustered to regulate various aspects of transcription activities. (CRMs can be located a few kb to hundred kb upstream of the basal promoter, in the coding sequence, within introns, or in the downstream 3'UTR sequences, as well as on different chromosome). A single gene can be regulated by multiple CRMs to give precise control of its spatial and temporal expression. CRMs function as nodes in large, intertwined regulatory network." [PMID:19660565, SO:SG] -comment: Requested by Stephen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -synonym: "transcription factor module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -namespace: sequence -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -namespace: sequence -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -namespace: sequence -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -namespace: sequence -def: "A non functional descendant of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -namespace: sequence -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -namespace: sequence -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -namespace: sequence -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -namespace: sequence -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -namespace: sequence -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -namespace: sequence -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -namespace: sequence -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -namespace: sequence -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -namespace: sequence -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -namespace: sequence -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -namespace: sequence -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -namespace: sequence -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -namespace: sequence -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -namespace: sequence -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -namespace: sequence -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -namespace: sequence -def: "A nucleotide region with either intra-genome or intracellular mobility, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -namespace: sequence -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates the transcription of a gene or genes." [PMID:9679020, SO:regcreative] -subset: SOFA -synonym: "transcription-control region" EXACT [] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -namespace: sequence -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -namespace: sequence -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -namespace: sequence -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -namespace: sequence -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -namespace: sequence -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -namespace: sequence -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -namespace: sequence -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -namespace: sequence -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -namespace: sequence -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -namespace: sequence -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -namespace: sequence -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -namespace: sequence -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -namespace: sequence -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -namespace: sequence -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001647 -name: kozak_sequence -namespace: sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -namespace: sequence -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -namespace: sequence -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001683 -name: sequence_motif -namespace: sequence -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -namespace: sequence -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001790 -name: paired_end_fragment -namespace: sequence -def: "An assembly region that has been sequenced from both ends resulting in a read_pair (mate_pair)." [SO:ke] -subset: SOFA -synonym: "paired end fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -created_by: kareneilbeck -creation_date: 2011-04-14T01:48:20Z - -[Term] -id: SO:0005836 -name: regulatory_region -namespace: sequence -def: "A region of sequence that is involved in the control of a biological process." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -namespace: sequence -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -namespace: sequence -def: "The cleaved_peptide_region is the region of a peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -namespace: sequence -def: "A sequence alteration where the length of the change in the variant is the same as that of the reference." [SO:ke] -subset: SOFA -xref: loinc:LA6690-7 "Substitution" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -namespace: sequence -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -namespace: sequence -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -namespace: sequence -def: "A continuous nucleotide sequence is inverted in the same position." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: DBVAR -subset: SOFA -synonym: "inversion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -xref: loinc:LA6689-9 "Inversion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -namespace: sequence -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -namespace: sequence -def: "The sequence referred to by an entry in a databank such as GenBank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -namespace: sequence -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence. X adjacent_to Y iff X and Y share a boundary but do not overlap." [PMID:20226267, SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -namespace: sequence -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -namespace: sequence -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: connects_on -name: connects_on -namespace: sequence -def: "X connects_on Y, Z, R iff whenever Z is on a R, X is adjacent to a Y and adjacent to a Z." [PMID:20226267] -comment: Example: A splice_junction connects_on exon, exon, mature_transcript. -created_by: kareneilbeck -creation_date: 2010-10-14T01:38:51Z - -[Typedef] -id: contained_by -name: contained_by -namespace: sequence -def: "X contained_by Y iff X starts after start of Y and X ends before end of Y." [PMID:20226267] -comment: The inverse is contains. Example: intein contained_by immature_peptide_region. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:26:16Z - -[Typedef] -id: contains -name: contains -namespace: sequence -def: "The inverse of contained_by." [PMID:20226267] -comment: Example: pre_miRNA contains miRNA_loop. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:32:15Z - -[Typedef] -id: derives_from -name: derives_from -namespace: sequence -subset: SOFA -is_transitive: true - -[Typedef] -id: disconnected_from -name: disconnected_from -namespace: sequence -def: "X is disconnected_from Y iff it is not the case that X overlaps Y." [PMID:20226267] -created_by: kareneilbeck -creation_date: 2010-10-14T01:42:10Z - -[Typedef] -id: edited_from -name: edited_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -namespace: sequence -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -namespace: sequence -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: finished_by -name: finished_by -namespace: sequence -def: "Xy is finished_by Y if Y part of X, and X and Y share a 3' boundary." [PMID:20226267] -comment: Example CDS finished_by stop_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:45:45Z - -[Typedef] -id: finishes -name: finishes -namespace: sequence -def: "X finishes Y if X is part_of Y and X and Y share a 3' or C terminal boundary." [PMID:20226267] -comment: Example: stop_codon finishes CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T02:17:53Z - -[Typedef] -id: gained -name: gained -namespace: sequence -def: "X gained Y if X is a variant_of X' and Y part of X but not X'." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may gain a stop codon not present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:51:10Z - -[Typedef] -id: genome_of -name: genome_of -namespace: sequence - -[Typedef] -id: guided_by -name: guided_by -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_integral_part -name: has_integral_part -namespace: sequence -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -is_a: has_part ! has_part -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin -namespace: sequence - -[Typedef] -id: has_part -name: has_part -namespace: sequence -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -namespace: sequence -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -namespace: sequence -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -is_a: part_of ! part_of -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: is_consecutive_sequence_of -name: is_consecutive_sequence_of -namespace: sequence -def: "R is_consecutive_sequence_of R iff every instance of R is equivalent to a collection of instances of U:u1, u2, un, such that no pair of ux uy is overlapping and for all ux, it is adjacent to ux-1 and ux+1, with the exception of the initial and terminal u1,and un (which may be identical)." [PMID:20226267] -comment: Example: region is consecutive_sequence of base. -created_by: kareneilbeck -creation_date: 2010-10-14T02:19:48Z - -[Typedef] -id: lost -name: lost -namespace: sequence -def: "X lost Y if X is a variant_of X' and Y part of X' but not X." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may have lost a stop codon present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:53:16Z - -[Typedef] -id: maximally_overlaps -name: maximally_overlaps -namespace: sequence -def: "A maximally_overlaps X iff all parts of A (including A itself) overlap both A and Y." [PMID:20226267] -comment: Example: non_coding_region_of_exon maximally_overlaps the intersections of exon and UTR. -created_by: kareneilbeck -creation_date: 2010-10-14T01:34:48Z - -[Typedef] -id: member_of -name: member_of -namespace: sequence -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -namespace: sequence -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: overlaps -name: overlaps -namespace: sequence -def: "X overlaps Y iff there exists some Z such that Z contained_by X and Z contained_by Y." [PMID:20226267] -comment: Example: coding_exon overlaps CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:33:15Z - -[Typedef] -id: paralogous_to -name: paralogous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: sequence -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -namespace: sequence -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of -namespace: sequence - -[Typedef] -id: processed_from -name: processed_from -namespace: sequence -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -namespace: sequence -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: sequence_of -name: sequence_of -namespace: sequence - -[Typedef] -id: similar_to -name: similar_to -namespace: sequence -subset: SOFA -is_symmetric: true - -[Typedef] -id: started_by -name: started_by -namespace: sequence -def: "X is strted_by Y if Y is part_of X and X and Y share a 5' boundary." [PMID:20226267] -comment: Example: CDS started_by start_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:43:55Z - -[Typedef] -id: starts -name: starts -namespace: sequence -def: "X starts Y if X is part of Y, and A and Y share a 5' or N-terminal boundary." [PMID:20226267] -comment: Example: start_codon starts CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:47:53Z - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -namespace: sequence -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -namespace: sequence -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -namespace: sequence -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -namespace: sequence -def: "X is translation of Y if Y is translated by ribosome to create X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -namespace: sequence -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/0_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/0_correct_output.gff deleted file mode 100644 index fafe86ed5..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/0_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/0_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/0_test.gff deleted file mode 100644 index fafe86ed5..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/0_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/10_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/10_correct_output.gff deleted file mode 100644 index 53e26fe56..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/10_correct_output.gff +++ /dev/null @@ -1,32 +0,0 @@ -##gff-version 3 -NC_003070.9 RefSeq gene 138513 139568 . + . ID=gene-2;db_xref=TAIR:AT1G01355,GeneID:6241268;locus_tag=AT1G01355 -NC_003070.9 RefSeq mRNA 138513 139568 . + . ID=nbis_nol2id-exon-2;Parent=gene-2;db_xref=GI:186478012,TAIR:AT1G01355,GeneID:6241268;exon_number=1;gbkey=mRNA;insd_transcript_id=NM_001123734.1;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase -NC_003070.9 RefSeq exon 138513 138541 . + . ID=exon-6;Parent=nbis_nol2id-exon-2;db_xref=GI:186478012,TAIR:AT1G01355,GeneID:6241268;exon_number=1;gbkey=mRNA;insd_transcript_id=NM_001123734.1;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase -NC_003070.9 RefSeq exon 138597 138778 . + . ID=exon-7;Parent=nbis_nol2id-exon-2;db_xref=GI:186478012,TAIR:AT1G01355,GeneID:6241268;exon_number=2;gbkey=mRNA;insd_transcript_id=NM_001123734.1;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase -NC_003070.9 RefSeq exon 138863 139114 . + . ID=exon-8;Parent=nbis_nol2id-exon-2;db_xref=GI:186478012,TAIR:AT1G01355,GeneID:6241268;exon_number=3;gbkey=mRNA;insd_transcript_id=NM_001123734.1;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase -NC_003070.9 RefSeq exon 139203 139345 . + . ID=exon-9;Parent=nbis_nol2id-exon-2;db_xref=GI:186478012,TAIR:AT1G01355,GeneID:6241268;exon_number=4;gbkey=mRNA;insd_transcript_id=NM_001123734.1;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase -NC_003070.9 RefSeq exon 139488 139568 . + . ID=exon-10;Parent=nbis_nol2id-exon-2;db_xref=GI:186478012,TAIR:AT1G01355,GeneID:6241268;exon_number=5;gbkey=mRNA;insd_transcript_id=NM_001123734.1;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase -NC_003070.9 RefSeq CDS 138513 138541 . + 0 ID=cds-5;Parent=nbis_nol2id-exon-2;db_xref=GI:186478013,TAIR:AT1G01355,GeneID:6241268;exon_number=1;locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN/s: Zinc finger%2C C2H2-type (InterPro:IPR007087)%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase (TAIR:AT5G35640.1)%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 (source: NCBI BLink).;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1 -NC_003070.9 RefSeq CDS 138597 138778 . + 1 ID=cds-6;Parent=nbis_nol2id-exon-2;db_xref=GI:186478013,TAIR:AT1G01355,GeneID:6241268;exon_number=2;locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN/s: Zinc finger%2C C2H2-type (InterPro:IPR007087)%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase (TAIR:AT5G35640.1)%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 (source: NCBI BLink).;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1 -NC_003070.9 RefSeq CDS 138863 139114 . + 2 ID=cds-7;Parent=nbis_nol2id-exon-2;db_xref=GI:186478013,TAIR:AT1G01355,GeneID:6241268;exon_number=3;locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN/s: Zinc finger%2C C2H2-type (InterPro:IPR007087)%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase (TAIR:AT5G35640.1)%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 (source: NCBI BLink).;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1 -NC_003070.9 RefSeq CDS 139203 139345 . + 2 ID=cds-8;Parent=nbis_nol2id-exon-2;db_xref=GI:186478013,TAIR:AT1G01355,GeneID:6241268;exon_number=4;locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN/s: Zinc finger%2C C2H2-type (InterPro:IPR007087)%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase (TAIR:AT5G35640.1)%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 (source: NCBI BLink).;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1 -NC_003070.9 RefSeq CDS 139488 139565 . + 0 ID=cds-9;Parent=nbis_nol2id-exon-2;db_xref=GI:186478013,TAIR:AT1G01355,GeneID:6241268;exon_number=5;locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN/s: Zinc finger%2C C2H2-type (InterPro:IPR007087)%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase (TAIR:AT5G35640.1)%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 (source: NCBI BLink).;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1 -NC_003070.9 RefSeq start_codon 138513 138515 . + 0 ID=start_codon-2;Parent=nbis_nol2id-exon-2;db_xref=GI:186478013,TAIR:AT1G01355,GeneID:6241268;exon_number=1;locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN/s: Zinc finger%2C C2H2-type (InterPro:IPR007087)%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase (TAIR:AT5G35640.1)%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 (source: NCBI BLink).;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1 -NC_003070.9 RefSeq stop_codon 139566 139568 . + 0 ID=stop_codon-2;Parent=nbis_nol2id-exon-2;db_xref=GI:186478013,TAIR:AT1G01355,GeneID:6241268;exon_number=1;locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN/s: Zinc finger%2C C2H2-type (InterPro:IPR007087)%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase (TAIR:AT5G35640.1)%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 (source: NCBI BLink).;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1 -NC_003070.9 RefSeq three_prime_UTR 139566 139568 . + . ID=nbis_NEW-three_prime_utr-2;Parent=nbis_nol2id-exon-2;db_xref=GI:186478012,TAIR:AT1G01355,GeneID:6241268;exon_number=1;gbkey=mRNA;insd_transcript_id=NM_001123734.1;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase -NC_003070.9 RefSeq gene 159856 162572 . - . ID=gene-1;db_xref=TAIR:AT1G01440,GeneID:837155;gene_synonym=F6F3.24%3B F6F3_24;locus_tag=AT1G01440 -NC_003070.9 RefSeq mRNA 159856 162572 . - . ID=nbis_nol2id-exon-1;Parent=gene-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=1;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq exon 159856 159992 . - . ID=exon-5;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=5;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq exon 160079 160302 . - . ID=exon-4;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=4;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq exon 160418 160478 . - . ID=exon-3;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=3;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq exon 160568 162280 . - . ID=exon-2;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=2;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq exon 162487 162572 . - . ID=exon-1;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=1;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq CDS 159938 159992 . - 1 ID=cds-4;Parent=nbis_nol2id-exon-1;db_xref=GI:15223406,TAIR:AT1G01440,GeneID:837155;exon_number=4;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;locus_tag=AT1G01440;note=Protein of unknown function (DUF3133)%3B CONTAINS InterPro DOMAIN/s: Protein of unknown function DUF3133 (InterPro:IPR021480)%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function (DUF3133) (TAIR:AT4G01090.1)%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 (source: NCBI BLink).;product=hypothetical protein;protein_id=NP_171651.1 -NC_003070.9 RefSeq CDS 160079 160302 . - 0 ID=cds-3;Parent=nbis_nol2id-exon-1;db_xref=GI:15223406,TAIR:AT1G01440,GeneID:837155;exon_number=3;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;locus_tag=AT1G01440;note=Protein of unknown function (DUF3133)%3B CONTAINS InterPro DOMAIN/s: Protein of unknown function DUF3133 (InterPro:IPR021480)%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function (DUF3133) (TAIR:AT4G01090.1)%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 (source: NCBI BLink).;product=hypothetical protein;protein_id=NP_171651.1 -NC_003070.9 RefSeq CDS 160418 160478 . - 1 ID=cds-2;Parent=nbis_nol2id-exon-1;db_xref=GI:15223406,TAIR:AT1G01440,GeneID:837155;exon_number=2;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;locus_tag=AT1G01440;note=Protein of unknown function (DUF3133)%3B CONTAINS InterPro DOMAIN/s: Protein of unknown function DUF3133 (InterPro:IPR021480)%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function (DUF3133) (TAIR:AT4G01090.1)%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 (source: NCBI BLink).;product=hypothetical protein;protein_id=NP_171651.1 -NC_003070.9 RefSeq CDS 160568 162219 . - 0 ID=cds-1;Parent=nbis_nol2id-exon-1;db_xref=GI:15223406,TAIR:AT1G01440,GeneID:837155;exon_number=1;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;locus_tag=AT1G01440;note=Protein of unknown function (DUF3133)%3B CONTAINS InterPro DOMAIN/s: Protein of unknown function DUF3133 (InterPro:IPR021480)%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function (DUF3133) (TAIR:AT4G01090.1)%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 (source: NCBI BLink).;product=hypothetical protein;protein_id=NP_171651.1 -NC_003070.9 RefSeq five_prime_UTR 162220 162280 . - . ID=nbis_NEW-five_prime_utr-1;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=1;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq five_prime_UTR 162487 162572 . - . ID=nbis_NEW-five_prime_utr-1;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=1;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein -NC_003070.9 RefSeq start_codon 162217 162219 . - 0 ID=start_codon-1;Parent=nbis_nol2id-exon-1;db_xref=GI:15223406,TAIR:AT1G01440,GeneID:837155;exon_number=1;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;locus_tag=AT1G01440;note=Protein of unknown function (DUF3133)%3B CONTAINS InterPro DOMAIN/s: Protein of unknown function DUF3133 (InterPro:IPR021480)%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function (DUF3133) (TAIR:AT4G01090.1)%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 (source: NCBI BLink).;product=hypothetical protein;protein_id=NP_171651.1 -NC_003070.9 RefSeq stop_codon 159935 159937 . - 0 ID=stop_codon-1;Parent=nbis_nol2id-exon-1;db_xref=GI:15223406,TAIR:AT1G01440,GeneID:837155;exon_number=1;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;locus_tag=AT1G01440;note=Protein of unknown function (DUF3133)%3B CONTAINS InterPro DOMAIN/s: Protein of unknown function DUF3133 (InterPro:IPR021480)%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function (DUF3133) (TAIR:AT4G01090.1)%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 (source: NCBI BLink).;product=hypothetical protein;protein_id=NP_171651.1 -NC_003070.9 RefSeq three_prime_UTR 159856 159937 . - . ID=nbis_NEW-three_prime_utr-1;Parent=nbis_nol2id-exon-1;db_xref=GI:145334948,TAIR:AT1G01440,GeneID:837155;exon_number=1;gbkey=mRNA;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1,Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;locus_tag=AT1G01440;product=uncharacterized protein diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/10_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/10_test.gff deleted file mode 100644 index ee9d2951b..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/10_test.gff +++ /dev/null @@ -1,29 +0,0 @@ -##gff-version 3 -#!gff-spec-version 1.14 -#!source-version NCBI C++ formatter 0.2 -##Type DNA NC_003070.9 -NC_003070.9 RefSeq gene 159856 162572 . - . locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155 -NC_003070.9 RefSeq exon 162487 162572 . - . gbkey=mRNA;locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;product=uncharacterized protein;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;db_xref=GI:145334948;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=1 -NC_003070.9 RefSeq exon 160568 162280 . - . gbkey=mRNA;locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;product=uncharacterized protein;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;db_xref=GI:145334948;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=2 -NC_003070.9 RefSeq exon 160418 160478 . - . gbkey=mRNA;locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;product=uncharacterized protein;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;db_xref=GI:145334948;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=3 -NC_003070.9 RefSeq exon 160079 160302 . - . gbkey=mRNA;locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;product=uncharacterized protein;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;db_xref=GI:145334948;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=4 -NC_003070.9 RefSeq exon 159856 159992 . - . gbkey=mRNA;locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;product=uncharacterized protein;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;insd_transcript_id=NM_100026.2;db_xref=GI:145334948;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=5 -NC_003070.9 RefSeq CDS 160568 162219 . - 0 locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;note=Protein of unknown function %28DUF3133%29%3B CONTAINS InterPro DOMAIN%2Fs: Protein of unknown function DUF3133 %28InterPro:IPR021480%29%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function %28DUF3133%29 %28TAIR:AT4G01090.1%29%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 %28source: NCBI BLink%29.;product=hypothetical protein;protein_id=NP_171651.1;db_xref=GI:15223406;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=1 -NC_003070.9 RefSeq CDS 160418 160478 . - 1 locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;note=Protein of unknown function %28DUF3133%29%3B CONTAINS InterPro DOMAIN%2Fs: Protein of unknown function DUF3133 %28InterPro:IPR021480%29%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function %28DUF3133%29 %28TAIR:AT4G01090.1%29%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 %28source: NCBI BLink%29.;product=hypothetical protein;protein_id=NP_171651.1;db_xref=GI:15223406;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=2 -NC_003070.9 RefSeq CDS 160079 160302 . - 0 locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;note=Protein of unknown function %28DUF3133%29%3B CONTAINS InterPro DOMAIN%2Fs: Protein of unknown function DUF3133 %28InterPro:IPR021480%29%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function %28DUF3133%29 %28TAIR:AT4G01090.1%29%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 %28source: NCBI BLink%29.;product=hypothetical protein;protein_id=NP_171651.1;db_xref=GI:15223406;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=3 -NC_003070.9 RefSeq CDS 159938 159992 . - 1 locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;note=Protein of unknown function %28DUF3133%29%3B CONTAINS InterPro DOMAIN%2Fs: Protein of unknown function DUF3133 %28InterPro:IPR021480%29%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function %28DUF3133%29 %28TAIR:AT4G01090.1%29%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 %28source: NCBI BLink%29.;product=hypothetical protein;protein_id=NP_171651.1;db_xref=GI:15223406;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=4 -NC_003070.9 RefSeq start_codon 162217 162219 . - 0 locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;note=Protein of unknown function %28DUF3133%29%3B CONTAINS InterPro DOMAIN%2Fs: Protein of unknown function DUF3133 %28InterPro:IPR021480%29%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function %28DUF3133%29 %28TAIR:AT4G01090.1%29%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 %28source: NCBI BLink%29.;product=hypothetical protein;protein_id=NP_171651.1;db_xref=GI:15223406;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=1 -NC_003070.9 RefSeq stop_codon 159935 159937 . - 0 locus_tag=AT1G01440;gene_synonym=F6F3.24%3B F6F3_24;inference=Similar to DNA sequence:INSD:BT010599.1;inference=Similar to RNA sequence%2C EST:INSD:EL114090.1%2CINSD:DR364210.1%2CINSD:EL143259.1%2CINSD:EH915792.1%2CINSD:EL008688.1%2CINSD:DR383577.1%2CINSD:ES010491.1%2CINSD:EL054910.1%2CINSD:ES062605.1%2CINSD:EL301702.1;note=Protein of unknown function %28DUF3133%29%3B CONTAINS InterPro DOMAIN%2Fs: Protein of unknown function DUF3133 %28InterPro:IPR021480%29%3B BEST Arabidopsis thaliana protein match is: Protein of unknown function %28DUF3133%29 %28TAIR:AT4G01090.1%29%3B Has 702 Blast hits to 662 proteins in 107 species: Archae - 0%3B Bacteria - 15%3B Metazoa - 235%3B Fungi - 59%3B Plants - 328%3B Viruses - 0%3B Other Eukaryotes - 65 %28source: NCBI BLink%29.;product=hypothetical protein;protein_id=NP_171651.1;db_xref=GI:15223406;db_xref=TAIR:AT1G01440;db_xref=GeneID:837155;exon_number=1 -NC_003070.9 RefSeq gene 138513 139568 . + . locus_tag=AT1G01355;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268 -NC_003070.9 RefSeq exon 138513 138541 . + . gbkey=mRNA;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase;insd_transcript_id=NM_001123734.1;db_xref=GI:186478012;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=1 -NC_003070.9 RefSeq exon 138597 138778 . + . gbkey=mRNA;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase;insd_transcript_id=NM_001123734.1;db_xref=GI:186478012;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=2 -NC_003070.9 RefSeq exon 138863 139114 . + . gbkey=mRNA;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase;insd_transcript_id=NM_001123734.1;db_xref=GI:186478012;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=3 -NC_003070.9 RefSeq exon 139203 139345 . + . gbkey=mRNA;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase;insd_transcript_id=NM_001123734.1;db_xref=GI:186478012;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=4 -NC_003070.9 RefSeq exon 139488 139568 . + . gbkey=mRNA;locus_tag=AT1G01355;product=Putative endonuclease or glycosyl hydrolase;insd_transcript_id=NM_001123734.1;db_xref=GI:186478012;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=5 -NC_003070.9 RefSeq CDS 138513 138541 . + 0 locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN%2Fs: Zinc finger%2C C2H2-type %28InterPro:IPR007087%29%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase %28TAIR:AT5G35640.1%29%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 %28source: NCBI BLink%29.;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1;db_xref=GI:186478013;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=1 -NC_003070.9 RefSeq CDS 138597 138778 . + 1 locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN%2Fs: Zinc finger%2C C2H2-type %28InterPro:IPR007087%29%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase %28TAIR:AT5G35640.1%29%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 %28source: NCBI BLink%29.;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1;db_xref=GI:186478013;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=2 -NC_003070.9 RefSeq CDS 138863 139114 . + 2 locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN%2Fs: Zinc finger%2C C2H2-type %28InterPro:IPR007087%29%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase %28TAIR:AT5G35640.1%29%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 %28source: NCBI BLink%29.;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1;db_xref=GI:186478013;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=3 -NC_003070.9 RefSeq CDS 139203 139345 . + 2 locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN%2Fs: Zinc finger%2C C2H2-type %28InterPro:IPR007087%29%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase %28TAIR:AT5G35640.1%29%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 %28source: NCBI BLink%29.;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1;db_xref=GI:186478013;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=4 -NC_003070.9 RefSeq CDS 139488 139565 . + 0 locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN%2Fs: Zinc finger%2C C2H2-type %28InterPro:IPR007087%29%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase %28TAIR:AT5G35640.1%29%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 %28source: NCBI BLink%29.;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1;db_xref=GI:186478013;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=5 -NC_003070.9 RefSeq start_codon 138513 138515 . + 0 locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN%2Fs: Zinc finger%2C C2H2-type %28InterPro:IPR007087%29%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase %28TAIR:AT5G35640.1%29%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 %28source: NCBI BLink%29.;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1;db_xref=GI:186478013;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=1 -NC_003070.9 RefSeq stop_codon 139566 139568 . + 0 locus_tag=AT1G01355;note=Putative endonuclease or glycosyl hydrolase%3B FUNCTIONS IN: zinc ion binding%3B INVOLVED IN: biological_process unknown%3B LOCATED IN: intracellular%3B CONTAINS InterPro DOMAIN%2Fs: Zinc finger%2C C2H2-type %28InterPro:IPR007087%29%3B BEST Arabidopsis thaliana protein match is: Putative endonuclease or glycosyl hydrolase %28TAIR:AT5G35640.1%29%3B Has 30201 Blast hits to 17322 proteins in 780 species: Archae - 12%3B Bacteria - 1396%3B Metazoa - 17338%3B Fungi - 3422%3B Plants - 5037%3B Viruses - 0%3B Other Eukaryotes - 2996 %28source: NCBI BLink%29.;product=Putative endonuclease or glycosyl hydrolase;protein_id=NP_001117206.1;db_xref=GI:186478013;db_xref=TAIR:AT1G01355;db_xref=GeneID:6241268;exon_number=1 \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/11_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/11_correct_output.gff deleted file mode 100644 index 939e8e4c0..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/11_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=nbis_NEW-gene-1;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=nbis_nol2id-exon-1;Parent=nbis_NEW-gene-1;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=nbis_nol2id-exon-2;Parent=nbis_new-gene-1;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=nbis_nol2id-exon-2;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/11_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/11_test.gff deleted file mode 100644 index 3bbb334c3..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/11_test.gff +++ /dev/null @@ -1,33 +0,0 @@ -##gff-version 3 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/12_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/12_correct_output.gff deleted file mode 100644 index 077b3cfe4..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/12_correct_output.gff +++ /dev/null @@ -1,35 +0,0 @@ -##gff-version 3 -1 ensembl_havana pseudogene 11869 14412 . + . ID=gene:ENSG00000223972;Name=DDX11L1;biotype=pseudogene;description=DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 [Source:HGNC Symbol%3BAcc:37102];gene_id=ENSG00000223972;logic_name=ensembl_havana_gene;version=4 -1 ensembl_havana processed_transcript 11869 14409 . + . ID=transcript:ENST00000456328;Parent=gene:ENSG00000223972;Name=DDX11L1-002;biotype=processed_transcript;havana_transcript=OTTHUMT00000362751;havana_version=1;tag=basic;transcript_id=ENST00000456328;version=2 -1 havana exon 11869 12227 . + . ID=ENSE00002234944;Parent=transcript:ENST00000456328;Name=ENSE00002234944;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002234944;rank=1;version=1 -1 havana exon 12613 12721 . + . ID=ENSE00003582793;Parent=transcript:ENST00000456328;Name=ENSE00003582793;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003582793;rank=2;version=1 -1 havana exon 13221 14409 . + . ID=ENSE00002312635;Parent=transcript:ENST00000456328;Name=ENSE00002312635;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002312635;rank=3;version=1 -1 ensembl_havana pseudogenic_transcript 11872 14412 . + . ID=transcript:ENST00000515242;Parent=gene:ENSG00000223972;Name=DDX11L1-201;biotype=transcribed_unprocessed_pseudogene;transcript_id=ENST00000515242;version=2 -1 ensembl exon 11872 12227 . + . ID=ENSE00002234632;Parent=transcript:ENST00000515242;Name=ENSE00002234632;constitutive=0;ensembl_end_phase=2;ensembl_phase=-1;exon_id=ENSE00002234632;rank=1;version=1 -1 ensembl exon 12613 12721 . + . ID=ENSE00003608237;Parent=transcript:ENST00000515242;Name=ENSE00003608237;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00003608237;rank=2;version=1 -1 ensembl exon 13225 14412 . + . ID=ENSE00002306041;Parent=transcript:ENST00000515242;Name=ENSE00002306041;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00002306041;rank=3;version=1 -1 ensembl_havana pseudogenic_transcript 11874 14409 . + . ID=transcript:ENST00000518655;Parent=gene:ENSG00000223972;Name=DDX11L1-202;biotype=transcribed_unprocessed_pseudogene;transcript_id=ENST00000518655;version=2 -1 ensembl exon 11874 12227 . + . ID=ENSE00002269724;Parent=transcript:ENST00000518655;Name=ENSE00002269724;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002269724;rank=1;version=1 -1 ensembl exon 12595 12721 . + . ID=ENSE00002270865;Parent=transcript:ENST00000518655;Name=ENSE00002270865;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002270865;rank=2;version=1 -1 ensembl exon 13403 13655 . + . ID=ENSE00002216795;Parent=transcript:ENST00000518655;Name=ENSE00002216795;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00002216795;rank=3;version=1 -1 ensembl exon 13661 14409 . + . ID=ENSE00002303382;Parent=transcript:ENST00000518655;Name=ENSE00002303382;constitutive=0;ensembl_end_phase=0;ensembl_phase=0;exon_id=ENSE00002303382;rank=4;version=1 -1 ensembl_havana pseudogenic_transcript 12010 13670 . + . ID=transcript:ENST00000450305;Parent=gene:ENSG00000223972;Name=DDX11L1-001;biotype=transcribed_unprocessed_pseudogene;havana_transcript=OTTHUMT00000002844;havana_version=2;transcript_id=ENST00000450305;version=2 -1 havana exon 12010 12057 . + . ID=ENSE00001948541;Parent=transcript:ENST00000450305;Name=ENSE00001948541;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001948541;rank=1;version=1 -1 havana exon 12179 12227 . + . ID=ENSE00001671638;Parent=transcript:ENST00000450305;Name=ENSE00001671638;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001671638;rank=2;version=2 -1 havana exon 12613 12697 . + . ID=ENSE00001758273;Parent=transcript:ENST00000450305;Name=ENSE00001758273;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001758273;rank=3;version=2 -1 havana exon 12975 13052 . + . ID=ENSE00001799933;Parent=transcript:ENST00000450305;Name=ENSE00001799933;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001799933;rank=4;version=2 -1 havana exon 13221 13374 . + . ID=ENSE00001746346;Parent=transcript:ENST00000450305;Name=ENSE00001746346;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001746346;rank=5;version=2 -1 havana exon 13453 13670 . + . ID=ENSE00001863096;Parent=transcript:ENST00000450305;Name=ENSE00001863096;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001863096;rank=6;version=1 -1 havana pseudogene 13411551 13414482 . + . ID=gene:ENSG00000237700;Name=RP11-219C24.6;biotype=pseudogene;gene_id=ENSG00000237700;logic_name=havana;version=1 -1 havana pseudogene 13411551 13414482 . + . ID=transcript:ENST00000437300;Parent=gene:ENSG00000237700;Name=RP11-219C24.6-001;biotype=unitary_pseudogene;havana_transcript=OTTHUMT00000022042;havana_version=1;tag=basic;transcript_id=ENST00000437300;version=1 -1 havana exon 13411551 13411837 . + . ID=ENSE00001677077;Parent=transcript:ENST00000437300;Name=ENSE00001677077;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001677077;rank=1;version=1 -1 havana exon 13412234 13412812 . + . ID=ENSE00001715540;Parent=transcript:ENST00000437300;Name=ENSE00001715540;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001715540;rank=2;version=1 -1 havana exon 13413924 13414482 . + . ID=ENSE00001784031;Parent=transcript:ENST00000437300;Name=ENSE00001784031;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001784031;rank=3;version=1 -1 havana pseudogene 176241619 176242538 . + . ID=gene:ENSG00000227815;Name=RP11-195C7.3;biotype=pseudogene;gene_id=ENSG00000227815;logic_name=havana;version=2 -1 havana pseudogene 176241619 176242538 . + . ID=transcript:ENST00000440296;Parent=gene:ENSG00000227815;Name=RP11-195C7.3-001;biotype=unprocessed_pseudogene;havana_transcript=OTTHUMT00000084685;havana_version=2;tag=basic;transcript_id=ENST00000440296;version=2 -1 havana exon 176241619 176241675 . + . ID=ENSE00001660785;Parent=transcript:ENST00000440296;Name=ENSE00001660785;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001660785;rank=1;version=2 -1 havana exon 176241743 176242168 . + . ID=ENSE00001739151;Parent=transcript:ENST00000440296;Name=ENSE00001739151;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001739151;rank=2;version=2 -1 havana exon 176242227 176242538 . + . ID=ENSE00001773509;Parent=transcript:ENST00000440296;Name=ENSE00001773509;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001773509;rank=3;version=2 -1 ensembl RNA 1340841 1341132 . - . ID=gene:ENSG00000264293;Name=RN7SL657P;biotype=misc_RNA;description=RNA%2C 7SL%2C cytoplasmic 657%2C pseudogene [Source:HGNC Symbol%3BAcc:46673];gene_id=ENSG00000264293;logic_name=ncrna;version=1 -1 ensembl transcript 1340841 1341132 . - . ID=transcript:ENST00000582431;Parent=gene:ENSG00000264293;Name=RN7SL657P-201;biotype=misc_RNA;tag=basic;transcript_id=ENST00000582431;version=1 -1 ensembl exon 1340841 1341132 . - . ID=ENSE00002720632;Parent=transcript:ENST00000582431;Name=ENSE00002720632;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002720632;rank=1;version=1 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/12_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/12_test.gff deleted file mode 100644 index c208c0bc8..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/12_test.gff +++ /dev/null @@ -1,138 +0,0 @@ -##gff-version 3 -##sequence-region 1 1 249250621 -##sequence-region 10 1 135534747 -##sequence-region 11 1 135006516 -##sequence-region 12 1 133851895 -##sequence-region 13 1 115169878 -##sequence-region 14 1 107349540 -##sequence-region 15 1 102531392 -##sequence-region 16 1 90354753 -##sequence-region 17 1 81195210 -##sequence-region 18 1 78077248 -##sequence-region 19 1 59128983 -##sequence-region 2 1 243199373 -##sequence-region 20 1 63025520 -##sequence-region 21 1 48129895 -##sequence-region 22 1 51304566 -##sequence-region 3 1 198022430 -##sequence-region 4 1 191154276 -##sequence-region 5 1 180915260 -##sequence-region 6 1 171115067 -##sequence-region 7 1 159138663 -##sequence-region 8 1 146364022 -##sequence-region 9 1 141213431 -##sequence-region GL000191.1 1 106433 -##sequence-region GL000192.1 1 547496 -##sequence-region GL000193.1 1 189789 -##sequence-region GL000194.1 1 191469 -##sequence-region GL000195.1 1 182896 -##sequence-region GL000196.1 1 38914 -##sequence-region GL000197.1 1 37175 -##sequence-region GL000198.1 1 90085 -##sequence-region GL000199.1 1 169874 -##sequence-region GL000200.1 1 187035 -##sequence-region GL000201.1 1 36148 -##sequence-region GL000202.1 1 40103 -##sequence-region GL000203.1 1 37498 -##sequence-region GL000204.1 1 81310 -##sequence-region GL000205.1 1 174588 -##sequence-region GL000206.1 1 41001 -##sequence-region GL000207.1 1 4262 -##sequence-region GL000208.1 1 92689 -##sequence-region GL000209.1 1 159169 -##sequence-region GL000210.1 1 27682 -##sequence-region GL000211.1 1 166566 -##sequence-region GL000212.1 1 186858 -##sequence-region GL000213.1 1 164239 -##sequence-region GL000214.1 1 137718 -##sequence-region GL000215.1 1 172545 -##sequence-region GL000216.1 1 172294 -##sequence-region GL000217.1 1 172149 -##sequence-region GL000218.1 1 161147 -##sequence-region GL000219.1 1 179198 -##sequence-region GL000220.1 1 161802 -##sequence-region GL000221.1 1 155397 -##sequence-region GL000222.1 1 186861 -##sequence-region GL000223.1 1 180455 -##sequence-region GL000224.1 1 179693 -##sequence-region GL000225.1 1 211173 -##sequence-region GL000226.1 1 15008 -##sequence-region GL000227.1 1 128374 -##sequence-region GL000228.1 1 129120 -##sequence-region GL000229.1 1 19913 -##sequence-region GL000230.1 1 43691 -##sequence-region GL000231.1 1 27386 -##sequence-region GL000232.1 1 40652 -##sequence-region GL000233.1 1 45941 -##sequence-region GL000234.1 1 40531 -##sequence-region GL000235.1 1 34474 -##sequence-region GL000236.1 1 41934 -##sequence-region GL000237.1 1 45867 -##sequence-region GL000238.1 1 39939 -##sequence-region GL000239.1 1 33824 -##sequence-region GL000240.1 1 41933 -##sequence-region GL000241.1 1 42152 -##sequence-region GL000242.1 1 43523 -##sequence-region GL000243.1 1 43341 -##sequence-region GL000244.1 1 39929 -##sequence-region GL000245.1 1 36651 -##sequence-region GL000246.1 1 38154 -##sequence-region GL000247.1 1 36422 -##sequence-region GL000248.1 1 39786 -##sequence-region GL000249.1 1 38502 -##sequence-region MT 1 16569 -##sequence-region X 1 155270560 -##sequence-region Y 2649521 59034049 -#!genome-build GRCh37.p13 -#!genome-version GRCh37 -#!genome-date 2009-02 -#!genome-build-accession NCBI:GCA_000001405.14 -#!genebuild-last-updated 2013-09 -1 GRCh37 chromosome 1 249250621 . . . ID=chromosome:1;Alias=CM000663.1,NC_000001.10 -### -1 ensembl_havana pseudogene 11869 14412 . + . ID=gene:ENSG00000223972;Name=DDX11L1;biotype=pseudogene;description=DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 [Source:HGNC Symbol%3BAcc:37102];gene_id=ENSG00000223972;logic_name=ensembl_havana_gene;version=4 -1 ensembl_havana processed_transcript 11869 14409 . + . ID=transcript:ENST00000456328;Parent=gene:ENSG00000223972;Name=DDX11L1-002;biotype=processed_transcript;havana_transcript=OTTHUMT00000362751;havana_version=1;tag=basic;transcript_id=ENST00000456328;version=2 -1 havana exon 11869 12227 . + . Parent=transcript:ENST00000456328;Name=ENSE00002234944;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002234944;rank=1;version=1 -1 havana exon 12613 12721 . + . Parent=transcript:ENST00000456328;Name=ENSE00003582793;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003582793;rank=2;version=1 -1 havana exon 13221 14409 . + . Parent=transcript:ENST00000456328;Name=ENSE00002312635;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002312635;rank=3;version=1 -1 ensembl_havana pseudogenic_transcript 11872 14412 . + . ID=transcript:ENST00000515242;Parent=gene:ENSG00000223972;Name=DDX11L1-201;biotype=transcribed_unprocessed_pseudogene;transcript_id=ENST00000515242;version=2 -1 ensembl exon 11872 12227 . + . Parent=transcript:ENST00000515242;Name=ENSE00002234632;constitutive=0;ensembl_end_phase=2;ensembl_phase=-1;exon_id=ENSE00002234632;rank=1;version=1 -1 ensembl exon 12613 12721 . + . Parent=transcript:ENST00000515242;Name=ENSE00003608237;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00003608237;rank=2;version=1 -1 ensembl exon 13225 14412 . + . Parent=transcript:ENST00000515242;Name=ENSE00002306041;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00002306041;rank=3;version=1 -1 ensembl_havana pseudogenic_transcript 11874 14409 . + . ID=transcript:ENST00000518655;Parent=gene:ENSG00000223972;Name=DDX11L1-202;biotype=transcribed_unprocessed_pseudogene;transcript_id=ENST00000518655;version=2 -1 ensembl exon 11874 12227 . + . Parent=transcript:ENST00000518655;Name=ENSE00002269724;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002269724;rank=1;version=1 -1 ensembl exon 12595 12721 . + . Parent=transcript:ENST00000518655;Name=ENSE00002270865;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002270865;rank=2;version=1 -1 ensembl exon 13403 13655 . + . Parent=transcript:ENST00000518655;Name=ENSE00002216795;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00002216795;rank=3;version=1 -1 ensembl exon 13661 14409 . + . Parent=transcript:ENST00000518655;Name=ENSE00002303382;constitutive=0;ensembl_end_phase=0;ensembl_phase=0;exon_id=ENSE00002303382;rank=4;version=1 -1 ensembl_havana pseudogenic_transcript 12010 13670 . + . ID=transcript:ENST00000450305;Parent=gene:ENSG00000223972;Name=DDX11L1-001;biotype=transcribed_unprocessed_pseudogene;havana_transcript=OTTHUMT00000002844;havana_version=2;transcript_id=ENST00000450305;version=2 -1 havana exon 12010 12057 . + . Parent=transcript:ENST00000450305;Name=ENSE00001948541;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001948541;rank=1;version=1 -1 havana exon 12179 12227 . + . Parent=transcript:ENST00000450305;Name=ENSE00001671638;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001671638;rank=2;version=2 -1 havana exon 12613 12697 . + . Parent=transcript:ENST00000450305;Name=ENSE00001758273;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001758273;rank=3;version=2 -1 havana exon 12975 13052 . + . Parent=transcript:ENST00000450305;Name=ENSE00001799933;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001799933;rank=4;version=2 -1 havana exon 13221 13374 . + . Parent=transcript:ENST00000450305;Name=ENSE00001746346;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001746346;rank=5;version=2 -1 havana exon 13453 13670 . + . Parent=transcript:ENST00000450305;Name=ENSE00001863096;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001863096;rank=6;version=1 -### -1 havana pseudogene 176241619 176242538 . + . ID=gene:ENSG00000227815;Name=RP11-195C7.3;biotype=pseudogene;gene_id=ENSG00000227815;logic_name=havana;version=2 -1 havana pseudogene 176241619 176242538 . + . ID=transcript:ENST00000440296;Parent=gene:ENSG00000227815;Name=RP11-195C7.3-001;biotype=unprocessed_pseudogene;havana_transcript=OTTHUMT00000084685;havana_version=2;tag=basic;transcript_id=ENST00000440296;version=2 -1 havana exon 176241619 176241675 . + . Parent=transcript:ENST00000440296;Name=ENSE00001660785;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001660785;rank=1;version=2 -1 havana exon 176241743 176242168 . + . Parent=transcript:ENST00000440296;Name=ENSE00001739151;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001739151;rank=2;version=2 -1 havana exon 176242227 176242538 . + . Parent=transcript:ENST00000440296;Name=ENSE00001773509;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001773509;rank=3;version=2 -### -1 havana pseudogene 13411551 13414482 . + . ID=gene:ENSG00000237700;Name=RP11-219C24.6;biotype=pseudogene;gene_id=ENSG00000237700;logic_name=havana;version=1 -1 havana pseudogene 13411551 13414482 . + . ID=transcript:ENST00000437300;Parent=gene:ENSG00000237700;Name=RP11-219C24.6-001;biotype=unitary_pseudogene;havana_transcript=OTTHUMT00000022042;havana_version=1;tag=basic;transcript_id=ENST00000437300;version=1 -1 havana exon 13411551 13411837 . + . Parent=transcript:ENST00000437300;Name=ENSE00001677077;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001677077;rank=1;version=1 -1 havana exon 13412234 13412812 . + . Parent=transcript:ENST00000437300;Name=ENSE00001715540;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001715540;rank=2;version=1 -1 havana exon 13413924 13414482 . + . Parent=transcript:ENST00000437300;Name=ENSE00001784031;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001784031;rank=3;version=1 -### -1 ensembl_havana pseudogene 11869 14412 . + . ID=gene:ENSG00000223972;Name=DDX11L1;biotype=pseudogene;description=DEAD/H (Asp-Glu-Ala-Asp/His) box helicase 11 like 1 [Source:HGNC Symbol%3BAcc:37102];gene_id=ENSG00000223972;logic_name=ensembl_havana_gene;version=4 -1 ensembl_havana processed_transcript 11869 14409 . + . ID=transcript:ENST00000456328;Parent=gene:ENSG00000223972;Name=DDX11L1-002;biotype=processed_transcript;havana_transcript=OTTHUMT00000362751;havana_version=1;tag=basic;transcript_id=ENST00000456328;version=2 -1 havana exon 11869 12227 . + . Parent=transcript:ENST00000456328;Name=ENSE00002234944;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002234944;rank=1;version=1 -1 havana exon 12613 12721 . + . Parent=transcript:ENST00000456328;Name=ENSE00003582793;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003582793;rank=2;version=1 -1 havana exon 13221 14409 . + . Parent=transcript:ENST00000456328;Name=ENSE00002312635;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002312635;rank=3;version=1 -1 ensembl_havana pseudogenic_transcript 11872 14412 . + . ID=transcript:ENST00000515242;Parent=gene:ENSG00000223972;Name=DDX11L1-201;biotype=transcribed_unprocessed_pseudogene;transcript_id=ENST00000515242;version=2 -1 ensembl exon 11872 12227 . + . Parent=transcript:ENST00000515242;Name=ENSE00002234632;constitutive=0;ensembl_end_phase=2;ensembl_phase=-1;exon_id=ENSE00002234632;rank=1;version=1 -### -1 ensembl RNA 1340841 1341132 . - . ID=gene:ENSG00000264293;Name=RN7SL657P;biotype=misc_RNA;description=RNA%2C 7SL%2C cytoplasmic 657%2C pseudogene [Source:HGNC Symbol%3BAcc:46673];gene_id=ENSG00000264293;logic_name=ncrna;version=1 -1 ensembl transcript 1340841 1341132 . - . ID=transcript:ENST00000582431;Parent=gene:ENSG00000264293;Name=RN7SL657P-201;biotype=misc_RNA;tag=basic;transcript_id=ENST00000582431;version=1 -1 ensembl exon 1340841 1341132 . - . Parent=transcript:ENST00000582431;Name=ENSE00002720632;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002720632;rank=1;version=1 -### \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/13_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/13_correct_output.gff deleted file mode 100644 index 96a089420..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/13_correct_output.gff +++ /dev/null @@ -1,25 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=nbis_NEW-gene-1;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=nbis_nol2id-exon-1;Parent=nbis_NEW-gene-1;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=nbis_nol2id-exon-1;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/13_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/13_test.gff deleted file mode 100644 index c6f692784..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/13_test.gff +++ /dev/null @@ -1,33 +0,0 @@ -##gff-version 3 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/14_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/14_correct_output.gff deleted file mode 100644 index ea05ba5ed..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/14_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=nbis_NEW-five_prime_utr-3;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=nbis_NEW-three_prime_utr-3;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=nbis_NEW-five_prime_utr-1;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=nbis_NEW-three_prime_utr-1;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=nbis_NEW-five_prime_utr-2;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=nbis_NEW-three_prime_utr-2;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/14_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/14_test.gff deleted file mode 100644 index e3469d784..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/14_test.gff +++ /dev/null @@ -1,30 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/15_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/15_correct_output.gff deleted file mode 100644 index 88ed3eced..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/15_correct_output.gff +++ /dev/null @@ -1,16 +0,0 @@ -##gff-version 3 -scaffold625 maker match 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker match_part 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;Target=CLUHART00000008717 1 154 -scaffold625 maker match_part 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;Target=CLUHART00000008717 155 263 -scaffold625 maker match_part 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;Target=CLUHART00000008717 264 374 -scaffold625 maker match_part 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;Target=CLUHART00000008717 375 1688 -scaffold789 maker match 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker match_part 558184 560123 . + . ID=nbis_NEW-exon-4;Parent=CLUHART00000006146;Target=CLUHART00000006146 1 1940 -scaffold789 maker match_part 561401 561519 . + . ID=nbis_NEW-exon-5;Parent=CLUHART00000006146;Target=CLUHART00000006146 1941 2059 -scaffold789 maker match_part 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;Target=CLUHART00000006146 2060 2124 -scaffold789 maker match_part 564372 564780 . + . ID=nbis_NEW-exon-6;Parent=CLUHART00000006146;Target=CLUHART00000006146 2125 2533 -scaffold789 maker match 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker match_part 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006147;Target=CLUHART00000006147 1 1940 -scaffold789 maker match_part 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006147;Target=CLUHART00000006147 1941 2059 -scaffold789 maker match_part 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;Target=CLUHART00000006147 2060 2124 -scaffold789 maker match_part 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006147;Target=CLUHART00000006147 2125 2533 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/15_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/15_test.gff deleted file mode 100644 index 66a15ef0f..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/15_test.gff +++ /dev/null @@ -1,16 +0,0 @@ -##gff-version 3 -scaffold625 maker match 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker match_part 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;Target=CLUHART00000008717 1 154 -scaffold625 maker match_part 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;Target=CLUHART00000008717 155 263 -scaffold625 maker match_part 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;Target=CLUHART00000008717 264 374 -scaffold625 maker match_part 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;Target=CLUHART00000008717 375 1688 -scaffold789 maker match 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker match_part 558184 560123 . + . ID=nbis_NEW-exon-4;Parent=CLUHART00000006146;Target=CLUHART00000006146 1 1940 -scaffold789 maker match_part 561401 561519 . + . ID=nbis_NEW-exon-5;Parent=CLUHART00000006146;Target=CLUHART00000006146 1941 2059 -scaffold789 maker match_part 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;Target=CLUHART00000006146 2060 2124 -scaffold789 maker match_part 564372 564780 . + . ID=nbis_NEW-exon-6;Parent=CLUHART00000006146;Target=CLUHART00000006146 2125 2533 -scaffold789 maker match 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker match_part 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006147;Target=CLUHART00000006147 1 1940 -scaffold789 maker match_part 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006147;Target=CLUHART00000006147 1941 2059 -scaffold789 maker match_part 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;Target=CLUHART00000006147 2060 2124 -scaffold789 maker match_part 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006147;Target=CLUHART00000006147,2125,2533 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/16_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/16_correct_output.gff deleted file mode 100644 index c8d2eeafd..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/16_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564688 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564689 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/16_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/16_test.gff deleted file mode 100644 index d7e9537a3..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/16_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564688 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/17_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/17_correct_output.gff deleted file mode 100644 index 23b548f74..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/17_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564800 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564800 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564800 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564800 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/17_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/17_test.gff deleted file mode 100644 index 14d1980b8..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/17_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564800 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/18_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/18_correct_output.gff deleted file mode 100644 index fafe86ed5..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/18_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/18_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/18_test.gff deleted file mode 100644 index fea67b1d4..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/18_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/19_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/19_correct_output.gff deleted file mode 100644 index f968af002..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/19_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;lParent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/19_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/19_test.gff deleted file mode 100644 index 88e40c140..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/19_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;lParent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/1_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/1_correct_output.gff deleted file mode 100644 index 918ef9af2..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/1_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=nbis_NEW-exon-5;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=nbis_NEW-exon-6;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=nbis_NEW-exon-7;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=nbis_NEW-exon-8;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/1_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/1_test.gff deleted file mode 100644 index 78f6bf1bb..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/1_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/20_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/20_correct_output.gff deleted file mode 100644 index 439ebf127..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/20_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=nbis_NEW-exon-1;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=nbis_NEW-exon-2;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=nbis_NEW-exon-3;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=nbis_NEW-exon-4;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/20_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/20_test.gff deleted file mode 100644 index 4d2368c9a..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/20_test.gff +++ /dev/null @@ -1,32 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/21_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/21_correct_output.gff deleted file mode 100644 index b65b9715a..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/21_correct_output.gff +++ /dev/null @@ -1,13 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/21_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/21_test.gff deleted file mode 100644 index 9cda07e1a..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/21_test.gff +++ /dev/null @@ -1,14 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341618 . + . ID=CLUHART00000008717:exon:XXXX;Parent=CLUHART00000008717 -scaffold625 maker exon 341600 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/22_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/22_correct_output.gff deleted file mode 100644 index 74ec0510e..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/22_correct_output.gff +++ /dev/null @@ -1,9 +0,0 @@ -##gff-version 3 -unitig_0|quiver maker gene 2098663 2104039 1000 + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0;Name=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0;cov=15.156588;fPKM=3.157668;gene_id=4_160615_BC9HJBANXX_P3969_201.1307;tPM=4.228132;transcript_id=4_160615_BC9HJBANXX_P3969_201.1307.2 -unitig_0|quiver maker mRNA 2098663 2104039 1000 + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0;Name=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1;_AED=0.00;_QI=22|1|1|1|0|0|2|2676|698;_eAED=0.00 -unitig_0|quiver maker exon 2098663 2102863 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:exon:84;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker exon 2103446 2104039 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:exon:85;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker CDS 2098685 2100781 . + 0 ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:cds;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker five_prime_UTR 2098663 2098684 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:five_prime_utr;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker three_prime_UTR 2100782 2102863 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:three_prime_utr;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker three_prime_UTR 2103446 2104039 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:three_prime_utr;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/22_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/22_test.gff deleted file mode 100644 index ac5d170f2..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/22_test.gff +++ /dev/null @@ -1,8 +0,0 @@ -unitig_0|quiver maker gene 2098663 2104039 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0;Name=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0;cov=15.156588;fPKM=3.157668;gene_id=4_160615_BC9HJBANXX_P3969_201.1307;score=1000;tPM=4.228132;transcript_id=4_160615_BC9HJBANXX_P3969_201.1307.2 -unitig_0|quiver maker mRNA 2098663 2104039 1000 + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0;Name=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=22|1|1|1|0|0|2|2676|698 -unitig_0|quiver maker exon 2098663 2102863 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:exon:84;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker exon 2103446 2104039 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:exon:85;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker five_prime_UTR 2098663 2098684 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:five_prime_utr;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker CDS 2098685 2100781 . + 0 ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:cds;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker three_prime_UTR 2100782 2102863 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:three_prime_utr;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 -unitig_0|quiver maker three_prime_UTR 2103446 2104039 . + . ID=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1:three_prime_utr;Parent=maker-unitig_0|quiver-est_gff_StringTie-gene-21.0-mRNA-1 \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/23_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/23_correct_output.gff deleted file mode 100644 index 346f2a0e6..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/23_correct_output.gff +++ /dev/null @@ -1,18 +0,0 @@ -##gff-version 3 -unitig_10|quiver maker gene 293192 296995 1000 - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;Name=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;cov=112.403648;fPKM=21.595270;gene_id=4_160615_BC9HJBANXX_P3969_209.645;tPM=29.167316;transcript_id=4_160615_BC9HJBANXX_P3969_209.645.1 -unitig_10|quiver maker mRNA 293192 296995 3669 - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;Name=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1;_AED=0.22;_QI=81|1|1|1|0|0|3|846|913;_eAED=0.22 -unitig_10|quiver maker exon 293192 294553 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:exon:5;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker exon 294618 296824 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:exon:4;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker exon 296896 296995 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:exon:3;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker CDS 294038 294553 . - 0 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker CDS 294618 296824 . - 2 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker CDS 296896 296914 . - 0 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker five_prime_UTR 296915 296995 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:five_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker three_prime_UTR 293192 294037 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:three_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker mRNA 293192 296995 3733 - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;Name=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2;_AED=0.22;_QI=81|1|1|1|0|0|2|1411|746;_eAED=0.22 -unitig_10|quiver maker exon 293192 296824 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:exon:6;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker exon 296896 296995 . - . ID=nbis_NEW-exon-1;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker CDS 294603 296824 . - 2 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker CDS 296896 296914 . - 0 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker five_prime_UTR 296915 296995 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:five_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker three_prime_UTR 293192 294602 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:three_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/23_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/23_test.gff deleted file mode 100644 index 8ea679df7..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/23_test.gff +++ /dev/null @@ -1,16 +0,0 @@ -unitig_10|quiver maker gene 293192 296995 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;Name=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;cov=112.403648;fPKM=21.595270;gene_id=4_160615_BC9HJBANXX_P3969_209.645;score=1000;tPM=29.167316;transcript_id=4_160615_BC9HJBANXX_P3969_209.645.1 -unitig_10|quiver maker mRNA 293192 296995 3669 - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;Name=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1;_AED=0.22;_eAED=0.22;_QI=81|1|1|1|0|0|3|846|913 -unitig_10|quiver maker mRNA 293192 296995 3733 - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0;Name=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2;_AED=0.22;_eAED=0.22;_QI=81|1|1|1|0|0|2|1411|746 -unitig_10|quiver maker exon 293192 296824 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:exon:6;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker exon 293192 294553 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:exon:5;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker exon 294618 296824 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:exon:4;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker exon 296896 296995 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:exon:3;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1,maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker five_prime_UTR 296915 296995 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:five_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker CDS 296896 296914 . - 0 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker CDS 294618 296824 . - 2 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker CDS 294038 294553 . - 0 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker three_prime_UTR 293192 294037 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1:three_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-1 -unitig_10|quiver maker five_prime_UTR 296915 296995 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:five_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker CDS 296896 296914 . - 0 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker CDS 294603 296824 . - 2 ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:cds;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 -unitig_10|quiver maker three_prime_UTR 293192 294602 . - . ID=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2:three_prime_utr;Parent=maker-unitig_10|quiver-est_gff_StringTie-gene-3.0-mRNA-2 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/24_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/24_test.gff deleted file mode 100644 index 6b7f8ed25..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/24_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker bibou 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker lula 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/25_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/25_correct_output.gff deleted file mode 100644 index 328d5d55a..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/25_correct_output.gff +++ /dev/null @@ -1,34 +0,0 @@ -##gff-version 3 -scaffold1 StringTie gene 2551 2965 1000.00 . . ID=nbis_noL1id-transcript-1;geneID=MSTRG.1 -scaffold1 StringTie transcript 2551 2965 1000.00 . . ID=MSTRG.1.1;Parent=nbis_noL1id-transcript-1;geneID=MSTRG.1 -scaffold1 StringTie exon 2551 2965 1000.00 . . ID=exon-1;Parent=MSTRG.1.1;cov=68.607231 -scaffold1 StringTie gene 8147 13353 1000.00 - . ID=nbis_noL1id-transcript-2;geneID=MSTRG.6 -scaffold1 StringTie transcript 8147 13353 1000.00 - . ID=MSTRG.6.1;Parent=nbis_noL1id-transcript-2;geneID=MSTRG.6 -scaffold1 StringTie exon 8147 8981 1000.00 - . ID=exon-2;Parent=MSTRG.6.1;cov=529.868042 -scaffold1 StringTie exon 9082 9171 1000.00 - . ID=exon-3;Parent=MSTRG.6.1;cov=451.066681 -scaffold1 StringTie exon 9328 9433 1000.00 - . ID=exon-4;Parent=MSTRG.6.1;cov=548.889893 -scaffold1 StringTie exon 9682 9875 1000.00 - . ID=exon-5;Parent=MSTRG.6.1;cov=416.032471 -scaffold1 StringTie exon 10018 10228 1000.00 - . ID=exon-6;Parent=MSTRG.6.1;cov=268.398773 -scaffold1 StringTie exon 10436 10511 1000.00 - . ID=exon-7;Parent=MSTRG.6.1;cov=263.012329 -scaffold1 StringTie exon 10665 10744 1000.00 - . ID=exon-8;Parent=MSTRG.6.1;cov=262.177094 -scaffold1 StringTie exon 10901 10996 1000.00 - . ID=exon-9;Parent=MSTRG.6.1;cov=285.484375 -scaffold1 StringTie exon 11277 11348 1000.00 - . ID=exon-10;Parent=MSTRG.6.1;cov=272.513885 -scaffold1 StringTie exon 11521 11718 1000.00 - . ID=exon-11;Parent=MSTRG.6.1;cov=323.955170 -scaffold1 StringTie exon 11802 12004 1000.00 - . ID=exon-12;Parent=MSTRG.6.1;cov=258.021729 -scaffold1 StringTie exon 12106 13353 1000.00 - . ID=exon-13;Parent=MSTRG.6.1;cov=192.039612 -scaffold1 StringTie gene 21499 23178 1000.00 . . ID=nbis_noL1id-transcript-3;geneID=MSTRG.7 -scaffold1 StringTie transcript 21499 23178 1000.00 . . ID=MSTRG.7.1;Parent=nbis_noL1id-transcript-3;geneID=MSTRG.7 -scaffold1 StringTie exon 21499 23178 1000.00 . . ID=exon-14;Parent=MSTRG.7.1;cov=207.398804 -scaffold1 StringTie gene 44218 47964 1000.00 - . ID=nbis_noL1id-transcript-4;geneID=MSTRG.11 -scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.1;Parent=nbis_noL1id-transcript-4;geneID=MSTRG.11 -scaffold1 StringTie exon 44218 45365 1000.00 - . ID=exon-15;Parent=MSTRG.11.1;cov=3001.629883 -scaffold1 StringTie exon 47660 47706 1000.00 - . ID=exon-16;Parent=MSTRG.11.1;cov=4399.870117 -scaffold1 StringTie exon 47827 47964 1000.00 - . ID=exon-17;Parent=MSTRG.11.1;cov=2103.559082 -scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.2;Parent=nbis_noL1id-transcript-4;geneID=MSTRG.11 -scaffold1 StringTie exon 44218 45365 1000.00 - . ID=exon-18;Parent=MSTRG.11.2;cov=487.085846 -scaffold1 StringTie exon 47660 47718 1000.00 - . ID=exon-19;Parent=MSTRG.11.2;cov=557.812744 -scaffold1 StringTie exon 47824 47964 1000.00 - . ID=exon-20;Parent=MSTRG.11.2;cov=242.265823 -scaffold1 StringTie transcript 44427 47958 1000.00 - . ID=MSTRG.11.3;Parent=nbis_noL1id-transcript-4;geneID=MSTRG.11 -scaffold1 StringTie exon 44427 45365 1000.00 - . ID=exon-21;Parent=MSTRG.11.3;cov=2892.249023 -scaffold1 StringTie exon 47660 47723 1000.00 - . ID=exon-22;Parent=MSTRG.11.3;cov=2083.479492 -scaffold1 StringTie exon 47827 47958 1000.00 - . ID=exon-23;Parent=MSTRG.11.3;cov=734.545044 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/25_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/25_test.gff deleted file mode 100644 index c321576e7..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/25_test.gff +++ /dev/null @@ -1,32 +0,0 @@ -# gffread all_merged.stringtie.gtf -E -F -o - -# gffread v0.9.9 -##gff-version 3 -scaffold1 StringTie transcript 2551 2965 1000.00 . . ID=MSTRG.1.1;geneID=MSTRG.1 -scaffold1 StringTie exon 2551 2965 1000.00 . . Parent=MSTRG.1.1;cov=68.607231 -scaffold1 StringTie transcript 8147 13353 1000.00 - . ID=MSTRG.6.1;geneID=MSTRG.6 -scaffold1 StringTie exon 8147 8981 1000.00 - . Parent=MSTRG.6.1;cov=529.868042 -scaffold1 StringTie exon 9082 9171 1000.00 - . Parent=MSTRG.6.1;cov=451.066681 -scaffold1 StringTie exon 9328 9433 1000.00 - . Parent=MSTRG.6.1;cov=548.889893 -scaffold1 StringTie exon 9682 9875 1000.00 - . Parent=MSTRG.6.1;cov=416.032471 -scaffold1 StringTie exon 10018 10228 1000.00 - . Parent=MSTRG.6.1;cov=268.398773 -scaffold1 StringTie exon 10436 10511 1000.00 - . Parent=MSTRG.6.1;cov=263.012329 -scaffold1 StringTie exon 10665 10744 1000.00 - . Parent=MSTRG.6.1;cov=262.177094 -scaffold1 StringTie exon 10901 10996 1000.00 - . Parent=MSTRG.6.1;cov=285.484375 -scaffold1 StringTie exon 11277 11348 1000.00 - . Parent=MSTRG.6.1;cov=272.513885 -scaffold1 StringTie exon 11521 11718 1000.00 - . Parent=MSTRG.6.1;cov=323.955170 -scaffold1 StringTie exon 11802 12004 1000.00 - . Parent=MSTRG.6.1;cov=258.021729 -scaffold1 StringTie exon 12106 13353 1000.00 - . Parent=MSTRG.6.1;cov=192.039612 -scaffold1 StringTie transcript 21499 23178 1000.00 . . ID=MSTRG.7.1;geneID=MSTRG.7 -scaffold1 StringTie exon 21499 23178 1000.00 . . Parent=MSTRG.7.1;cov=207.398804 -scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.1;geneID=MSTRG.11 -scaffold1 StringTie exon 44218 45365 1000.00 - . Parent=MSTRG.11.1;cov=3001.629883 -scaffold1 StringTie exon 47660 47706 1000.00 - . Parent=MSTRG.11.1;cov=4399.870117 -scaffold1 StringTie exon 47827 47964 1000.00 - . Parent=MSTRG.11.1;cov=2103.559082 -scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.2;geneID=MSTRG.11 -scaffold1 StringTie exon 44218 45365 1000.00 - . Parent=MSTRG.11.2;cov=487.085846 -scaffold1 StringTie exon 47660 47718 1000.00 - . Parent=MSTRG.11.2;cov=557.812744 -scaffold1 StringTie exon 47824 47964 1000.00 - . Parent=MSTRG.11.2;cov=242.265823 -scaffold1 StringTie transcript 44427 47958 1000.00 - . ID=MSTRG.11.3;geneID=MSTRG.11 -scaffold1 StringTie exon 44427 45365 1000.00 - . Parent=MSTRG.11.3;cov=2892.249023 -scaffold1 StringTie exon 47660 47723 1000.00 - . Parent=MSTRG.11.3;cov=2083.479492 -scaffold1 StringTie exon 47827 47958 1000.00 - . Parent=MSTRG.11.3;cov=734.545044 \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/26_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/26_test.gff deleted file mode 100644 index 2a40664d5..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/26_test.gff +++ /dev/null @@ -1,5 +0,0 @@ -Tob1_contig1 Prodigal:2.60 CDS 29190 32849 . + 0 ID=Tob1_00021;eC_number=3.6.3.-;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99T13 -Tob1_contig1 Prodigal:2.60 CDS 32953 33366 . + 0 ID=Tob1_00022;inference=ab initio prediction:Prodigal:2.60;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 33411 34181 . + 0 ID=Tob1_00023;inference=ab initio prediction:Prodigal:2.60;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 34266 35222 . + 0 ID=Tob1_00024;inference=ab initio prediction:Prodigal:2.60;product=hypothetical protein -Tob1_contig1 SignalP:4.1 sig_peptide 34266 34298 . + 0 inference=ab initio prediction:SignalP:4.1;note=predicted cleavage at residue 33;product=putative signal peptide \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/27_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/27_correct_output.gff deleted file mode 100644 index 92b89d8fa..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/27_correct_output.gff +++ /dev/null @@ -1,24 +0,0 @@ -##gff-version 3 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/27_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/27_test.gff deleted file mode 100644 index 74a0e0099..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/27_test.gff +++ /dev/null @@ -1,35 +0,0 @@ -##gff-version 3 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146d;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146d:exon:995;Parent=CLUHART00000006146d -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146d:exon:996;Parent=CLUHART00000006146d -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146d:exon:997;Parent=CLUHART00000006146d -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146d:exon:998;Parent=CLUHART00000006146d -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146d:cds;Parent=CLUHART00000006146d -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146d:cds;Parent=CLUHART00000006146d -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146d:cds;Parent=CLUHART00000006146d -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146d:cds;Parent=CLUHART00000006146d -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146d:five_prime_utr;Parent=CLUHART00000006146d -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146d:three_prime_utr;Parent=CLUHART00000006146d \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/28_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/28_correct_output.gff deleted file mode 100644 index b05515394..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/28_correct_output.gff +++ /dev/null @@ -1,73 +0,0 @@ -##gff-version 3 -Contig_mito mitfi gene 15 82 8.431e-12 + . ID=nbis_noL1id-trna-1;Name=trnF(ttc) -Contig_mito mitfi tRNA 15 82 8.431e-12 + . ID=trna-1;Parent=nbis_noL1id-trna-1;Name=trnF(ttc) -Contig_mito mitfi exon 15 82 8.431e-12 + . ID=nbis_NEW-exon-7;Parent=trna-1;Name=trnF(ttc) -Contig_mito mitfi gene 82 1063 6.464e-56 + . ID=nbis_noL1id-rrna-1;Name=rrnS -Contig_mito mitfi rRNA 82 1063 6.464e-56 + . ID=rrna-1;Parent=nbis_noL1id-rrna-1;Name=rrnS -Contig_mito mitfi exon 82 1063 6.464e-56 + . ID=nbis_NEW-exon-2;Parent=rrna-1;Name=rrnS -Contig_mito mitfi gene 1063 1132 1.403e-11 + . ID=nbis_noL1id-trna-2;Name=trnV(gta) -Contig_mito mitfi tRNA 1063 1132 1.403e-11 + . ID=trna-2;Parent=nbis_noL1id-trna-2;Name=trnV(gta) -Contig_mito mitfi exon 1063 1132 1.403e-11 + . ID=nbis_NEW-exon-22;Parent=trna-2;Name=trnV(gta) -Contig_mito mitfi gene 1133 2739 1.625e-37 + . ID=nbis_noL1id-rrna-2;Name=rrnL -Contig_mito mitfi rRNA 1133 2739 1.625e-37 + . ID=rrna-2;Parent=nbis_noL1id-rrna-2;Name=rrnL -Contig_mito mitfi exon 1133 2739 1.625e-37 + . ID=nbis_NEW-exon-1;Parent=rrna-2;Name=rrnL -Contig_mito mitfi gene 2740 2814 3.113e-07 + . ID=nbis_noL1id-trna-3;Name=trnL2(tta) -Contig_mito mitfi tRNA 2740 2814 3.113e-07 + . ID=trna-3;Parent=nbis_noL1id-trna-3;Name=trnL2(tta) -Contig_mito mitfi exon 2740 2814 3.113e-07 + . ID=nbis_NEW-exon-13;Parent=trna-3;Name=trnL2(tta) -Contig_mito mitfi gene 3821 3893 2.468e-10 + . ID=nbis_noL1id-trna-4;Name=trnI(atc) -Contig_mito mitfi tRNA 3821 3893 2.468e-10 + . ID=trna-4;Parent=nbis_noL1id-trna-4;Name=trnI(atc) -Contig_mito mitfi exon 3821 3893 2.468e-10 + . ID=nbis_NEW-exon-10;Parent=trna-4;Name=trnI(atc) -Contig_mito mitfi gene 3900 3970 6.295e-10 - . ID=nbis_noL1id-trna-5;Name=trnQ(caa) -Contig_mito mitfi tRNA 3900 3970 6.295e-10 - . ID=trna-5;Parent=nbis_noL1id-trna-5;Name=trnQ(caa) -Contig_mito mitfi exon 3900 3970 6.295e-10 - . ID=nbis_NEW-exon-17;Parent=trna-5;Name=trnQ(caa) -Contig_mito mitfi gene 3970 4038 1.766e-07 + . ID=nbis_noL1id-trna-6;Name=trnM(atg) -Contig_mito mitfi tRNA 3970 4038 1.766e-07 + . ID=trna-6;Parent=nbis_noL1id-trna-6;Name=trnM(atg) -Contig_mito mitfi exon 3970 4038 1.766e-07 + . ID=nbis_NEW-exon-14;Parent=trna-6;Name=trnM(atg) -Contig_mito mitfi gene 5080 5149 4.659e-12 + . ID=nbis_noL1id-trna-7;Name=trnW(tga) -Contig_mito mitfi tRNA 5080 5149 4.659e-12 + . ID=trna-7;Parent=nbis_noL1id-trna-7;Name=trnW(tga) -Contig_mito mitfi exon 5080 5149 4.659e-12 + . ID=nbis_NEW-exon-23;Parent=trna-7;Name=trnW(tga) -Contig_mito mitfi gene 5151 5219 4.413e-10 - . ID=nbis_noL1id-trna-8;Name=trnA(gca) -Contig_mito mitfi tRNA 5151 5219 4.413e-10 - . ID=trna-8;Parent=nbis_noL1id-trna-8;Name=trnA(gca) -Contig_mito mitfi exon 5151 5219 4.413e-10 - . ID=nbis_NEW-exon-3;Parent=trna-8;Name=trnA(gca) -Contig_mito mitfi gene 5220 5292 2.559e-09 - . ID=nbis_noL1id-trna-9;Name=trnN(aac) -Contig_mito mitfi tRNA 5220 5292 2.559e-09 - . ID=trna-9;Parent=nbis_noL1id-trna-9;Name=trnN(aac) -Contig_mito mitfi exon 5220 5292 2.559e-09 - . ID=nbis_NEW-exon-15;Parent=trna-9;Name=trnN(aac) -Contig_mito mitfi gene 5293 5359 2.134e-08 - . ID=nbis_noL1id-trna-10;Name=trnC(tgc) -Contig_mito mitfi tRNA 5293 5359 2.134e-08 - . ID=trna-10;Parent=nbis_noL1id-trna-10;Name=trnC(tgc) -Contig_mito mitfi exon 5293 5359 2.134e-08 - . ID=nbis_NEW-exon-4;Parent=trna-10;Name=trnC(tgc) -Contig_mito mitfi gene 5360 5429 8.91e-08 - . ID=nbis_noL1id-trna-11;Name=trnY(tac) -Contig_mito mitfi tRNA 5360 5429 8.91e-08 - . ID=trna-11;Parent=nbis_noL1id-trna-11;Name=trnY(tac) -Contig_mito mitfi exon 5360 5429 8.91e-08 - . ID=nbis_NEW-exon-24;Parent=trna-11;Name=trnY(tac) -Contig_mito mitfi gene 6973 7045 4.328e-10 - . ID=nbis_noL1id-trna-12;Name=trnS2(tca) -Contig_mito mitfi tRNA 6973 7045 4.328e-10 - . ID=trna-12;Parent=nbis_noL1id-trna-12;Name=trnS2(tca) -Contig_mito mitfi exon 6973 7045 4.328e-10 - . ID=nbis_NEW-exon-20;Parent=trna-12;Name=trnS2(tca) -Contig_mito mitfi gene 7051 7119 4.892e-10 + . ID=nbis_noL1id-trna-13;Name=trnD(gac) -Contig_mito mitfi tRNA 7051 7119 4.892e-10 + . ID=trna-13;Parent=nbis_noL1id-trna-13;Name=trnD(gac) -Contig_mito mitfi exon 7051 7119 4.892e-10 + . ID=nbis_NEW-exon-5;Parent=trna-13;Name=trnD(gac) -Contig_mito mitfi gene 7815 7884 2.271e-09 + . ID=nbis_noL1id-trna-14;Name=trnK(aaa) -Contig_mito mitfi tRNA 7815 7884 2.271e-09 + . ID=trna-14;Parent=nbis_noL1id-trna-14;Name=trnK(aaa) -Contig_mito mitfi exon 7815 7884 2.271e-09 + . ID=nbis_NEW-exon-11;Parent=trna-14;Name=trnK(aaa) -Contig_mito mitfi gene 9521 9589 1.016e-09 + . ID=nbis_noL1id-trna-15;Name=trnG(gga) -Contig_mito mitfi tRNA 9521 9589 1.016e-09 + . ID=trna-15;Parent=nbis_noL1id-trna-15;Name=trnG(gga) -Contig_mito mitfi exon 9521 9589 1.016e-09 + . ID=nbis_NEW-exon-8;Parent=trna-15;Name=trnG(gga) -Contig_mito mitfi gene 9942 10011 8.947e-09 + . ID=nbis_noL1id-trna-16;Name=trnR(cga) -Contig_mito mitfi tRNA 9942 10011 8.947e-09 + . ID=trna-16;Parent=nbis_noL1id-trna-16;Name=trnR(cga) -Contig_mito mitfi exon 9942 10011 8.947e-09 + . ID=nbis_NEW-exon-18;Parent=trna-16;Name=trnR(cga) -Contig_mito mitfi gene 11680 11749 1.428e-06 + . ID=nbis_noL1id-trna-17;Name=trnH(cac) -Contig_mito mitfi tRNA 11680 11749 1.428e-06 + . ID=trna-17;Parent=nbis_noL1id-trna-17;Name=trnH(cac) -Contig_mito mitfi exon 11680 11749 1.428e-06 + . ID=nbis_NEW-exon-9;Parent=trna-17;Name=trnH(cac) -Contig_mito mitfi gene 11750 11816 3.713e-08 + . ID=nbis_noL1id-trna-18;Name=trnS1(agc) -Contig_mito mitfi tRNA 11750 11816 3.713e-08 + . ID=trna-18;Parent=nbis_noL1id-trna-18;Name=trnS1(agc) -Contig_mito mitfi exon 11750 11816 3.713e-08 + . ID=nbis_NEW-exon-19;Parent=trna-18;Name=trnS1(agc) -Contig_mito mitfi gene 11816 11886 2.937e-17 + . ID=nbis_noL1id-trna-19;Name=trnL1(cta) -Contig_mito mitfi tRNA 11816 11886 2.937e-17 + . ID=trna-19;Parent=nbis_noL1id-trna-19;Name=trnL1(cta) -Contig_mito mitfi exon 11816 11886 2.937e-17 + . ID=nbis_NEW-exon-12;Parent=trna-19;Name=trnL1(cta) -Contig_mito mitfi gene 14839 14907 1.034e-10 + . ID=nbis_noL1id-trna-20;Name=trnT(aca) -Contig_mito mitfi tRNA 14839 14907 1.034e-10 + . ID=trna-20;Parent=nbis_noL1id-trna-20;Name=trnT(aca) -Contig_mito mitfi exon 14839 14907 1.034e-10 + . ID=nbis_NEW-exon-21;Parent=trna-20;Name=trnT(aca) -Contig_mito mitfi gene 15983 16052 4.471e-11 - . ID=nbis_noL1id-trna-21;Name=trnP(cca) -Contig_mito mitfi tRNA 15983 16052 4.471e-11 - . ID=trna-21;Parent=nbis_noL1id-trna-21;Name=trnP(cca) -Contig_mito mitfi exon 15983 16052 4.471e-11 - . ID=nbis_NEW-exon-16;Parent=trna-21;Name=trnP(cca) -Contig_mito mitfi gene 16579 16636 0.03944 - . ID=nbis_noL1id-trna-22;Name=trnE(gaa) -Contig_mito mitfi tRNA 16579 16636 0.03944 - . ID=trna-22;Parent=nbis_noL1id-trna-22;Name=trnE(gaa) -Contig_mito mitfi exon 16579 16636 0.03944 - . ID=nbis_NEW-exon-6;Parent=trna-22;Name=trnE(gaa) diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/28_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/28_test.gff deleted file mode 100644 index 166aaadb9..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/28_test.gff +++ /dev/null @@ -1,43 +0,0 @@ -Contig_mito mitfi tRNA 15 82 8.431e-12 + . Name=trnF(ttc) -Contig_mito mitfi rRNA 82 1063 6.464e-56 + . Name=rrnS -Contig_mito mitfi tRNA 1063 1132 1.403e-11 + . Name=trnV(gta) -Contig_mito mitfi rRNA 1133 2739 1.625e-37 + . Name=rrnL -Contig_mito mitfi tRNA 2740 2814 3.113e-07 + . Name=trnL2(tta) -Contig_mito mitos gene 2834 3802 219299273.8 + . Name=nad1 -Contig_mito mitfi tRNA 3821 3893 2.468e-10 + . Name=trnI(atc) -Contig_mito mitfi tRNA 3900 3970 6.295e-10 - . Name=trnQ(caa) -Contig_mito mitfi tRNA 3970 4038 1.766e-07 + . Name=trnM(atg) -Contig_mito mitos gene 4039 5064 174456090.0 + . Name=nad2 -Contig_mito mitfi tRNA 5080 5149 4.659e-12 + . Name=trnW(tga) -Contig_mito mitfi tRNA 5151 5219 4.413e-10 - . Name=trnA(gca) -Contig_mito mitfi tRNA 5220 5292 2.559e-09 - . Name=trnN(aac) -Contig_mito mitfi tRNA 5293 5359 2.134e-08 - . Name=trnC(tgc) -Contig_mito mitfi tRNA 5360 5429 8.91e-08 - . Name=trnY(tac) -Contig_mito mitos gene 5431 6972 288891731.6 + . Name=cox1 -Contig_mito mitfi tRNA 6973 7045 4.328e-10 - . Name=trnS2(tca) -Contig_mito mitfi tRNA 7051 7119 4.892e-10 + . Name=trnD(gac) -Contig_mito mitos gene 7130 7804 122513375.6 + . Name=cox2 -Contig_mito mitfi tRNA 7815 7884 2.271e-09 + . Name=trnK(aaa) -Contig_mito mitos gene 7886 8050 2217987.4 + . Name=atp8 -Contig_mito mitos gene 8047 8727 94993414.9 + . Name=atp6 -Contig_mito mitos gene 8737 9519 194612208.7 + . Name=cox3 -Contig_mito mitfi tRNA 9521 9589 1.016e-09 + . Name=trnG(gga) -Contig_mito mitos gene 9590 9937 26642647.3 + . Name=nad3 -Contig_mito mitfi tRNA 9942 10011 8.947e-09 + . Name=trnR(cga) -Contig_mito mitos gene 10013 10306 13686341.2 + . Name=nad4l -Contig_mito mitos gene 10303 11652 337911190.4 + . Name=nad4 -Contig_mito mitfi tRNA 11680 11749 1.428e-06 + . Name=trnH(cac) -Contig_mito mitfi tRNA 11750 11816 3.713e-08 + . Name=trnS1(agc) -Contig_mito mitfi tRNA 11816 11886 2.937e-17 + . Name=trnL1(cta) -Contig_mito mitos gene 11887 12171 53475528.5 + . Name=nad5_a -Contig_mito mitos gene 12171 12302 27946882.3 + . Name=nad5_b -Contig_mito mitos gene 12308 12655 79467040.6 + . Name=nad5_c -Contig_mito mitos gene 12729 12932 44320449.4 + . Name=nad5_d -Contig_mito mitos gene 12955 13686 162354806.2 + . Name=nad5_e -Contig_mito mitos gene 13707 14003 25369884.3 + . Name=cob_a -Contig_mito mitos gene 14005 14829 163150210.1 + . Name=cob_b -Contig_mito mitfi tRNA 14839 14907 1.034e-10 + . Name=trnT(aca) -Contig_mito mitfi tRNA 15983 16052 4.471e-11 - . Name=trnP(cca) -Contig_mito mitos gene 16064 16258 5474409.0 - . Name=nad6_b -Contig_mito mitos gene 16263 16577 11725720.5 - . Name=nad6_a -Contig_mito mitfi tRNA 16579 16636 0.03944 - . Name=trnE(gaa) diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/29_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/29_correct_output.gff deleted file mode 100644 index a17bf5772..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/29_correct_output.gff +++ /dev/null @@ -1,39 +0,0 @@ -##gff-version 3 -NC_033801.1 Gnomon gene 126084215 126096241 . - . ID=gene26786;Dbxref=GeneID:109851368;Name=LOC109851368;gbkey=Gene;gene=LOC109851368;gene_biotype=protein_coding -NC_033801.1 Gnomon mRNA 126084215 126090315 . - . ID=rna38267;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;Name=XM_020421455.1;gbkey=mRNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 9 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 4 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon exon 126084215 126084777 . - . ID=id248429;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon exon 126089094 126090315 . - . ID=id248428;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon CDS 126084268 126084777 . - 0 ID=cds30771;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XP_020277044.1;Name=XP_020277044.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X1;protein_id=XP_020277044.1 -NC_033801.1 Gnomon CDS 126089094 126089480 . - 0 ID=cds30771;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XP_020277044.1;Name=XP_020277044.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X1;protein_id=XP_020277044.1 -NC_033801.1 Gnomon five_prime_UTR 126089481 126090315 . - . ID=nbis_NEW-five_prime_utr-2;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon intron 126084778 126089093 . - . ID=intron-1;Parent=rna38267 -NC_033801.1 Gnomon three_prime_UTR 126084215 126084267 . - . ID=nbis_NEW-three_prime_utr-2;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon mRNA 126084215 126096241 . - . ID=rna38266;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;Name=XM_020421456.1;gbkey=mRNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 6 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 11 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon exon 126084215 126084777 . - . ID=id248427;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon exon 126092383 126092458 . - . ID=id248426;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon exon 126093366 126093703 . - . ID=id248425;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon exon 126093798 126095180 . - . ID=id248424;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon exon 126096082 126096241 . - . ID=id248423;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon CDS 126084268 126084777 . - 0 ID=cds30770;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XP_020277045.1;Name=XP_020277045.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X2;protein_id=XP_020277045.1 -NC_033801.1 Gnomon CDS 126092383 126092458 . - 1 ID=cds30770;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XP_020277045.1;Name=XP_020277045.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X2;protein_id=XP_020277045.1 -NC_033801.1 Gnomon CDS 126093366 126093394 . - 0 ID=cds30770;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XP_020277045.1;Name=XP_020277045.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X2;protein_id=XP_020277045.1 -NC_033801.1 Gnomon five_prime_UTR 126093395 126093703 . - . ID=nbis_NEW-five_prime_utr-1;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon five_prime_UTR 126093798 126095180 . - . ID=nbis_NEW-five_prime_utr-1;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon five_prime_UTR 126096082 126096241 . - . ID=nbis_NEW-five_prime_utr-1;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon intron 126084778 126092382 . - . ID=intron-2;Parent=rna38266 -NC_033801.1 Gnomon intron 126092459 126093365 . - . ID=intron-3;Parent=rna38266 -NC_033801.1 Gnomon intron 126093704 126093797 . - . ID=intron-4;Parent=rna38266 -NC_033801.1 Gnomon intron 126095181 126096081 . - . ID=intron-5;Parent=rna38266 -NC_033801.1 Gnomon three_prime_UTR 126084215 126084267 . - . ID=nbis_NEW-three_prime_utr-1;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon transcript 126093500 126096241 . - . ID=rna38268;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;Name=XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 13 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon exon 126093500 126093703 . - . ID=id248432;Parent=rna38268;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon exon 126093798 126095074 . - . ID=id248431;Parent=rna38268;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon exon 126096082 126096241 . - . ID=id248430;Parent=rna38268;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon intron 126093704 126093797 . - . ID=intron-6;Parent=rna38268 -NC_033801.1 Gnomon intron 126095075 126096081 . - . ID=intron-7;Parent=rna38268 -NC_033801.1 Gnomon transcript 126093500 126096241 . - . ID=rna38269;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;Name=XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 18 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 -NC_033801.1 Gnomon exon 126093500 126093703 . - . ID=id248435;Parent=rna38269;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 -NC_033801.1 Gnomon exon 126093798 126095180 . - . ID=id248434;Parent=rna38269;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 -NC_033801.1 Gnomon exon 126096082 126096241 . - . ID=id248433;Parent=rna38269;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 -NC_033801.1 Gnomon intron 126093704 126093797 . - . ID=intron-8;Parent=rna38269 -NC_033801.1 Gnomon intron 126095181 126096081 . - . ID=intron-9;Parent=rna38269 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/29_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/29_test.gff deleted file mode 100644 index a19739fef..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/29_test.gff +++ /dev/null @@ -1,32 +0,0 @@ -NC_033801.1 Gnomon gene 126084215 126096241 . - . ID=gene26786;Dbxref=GeneID:109851368;Name=LOC109851368;gbkey=Gene;gene=LOC109851368;gene_biotype=protein_coding -NC_033801.1 Gnomon mRNA 126084215 126090315 . - . ID=rna38267;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;Name=XM_020421455.1;gbkey=mRNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 9 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 4 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon exon 126084215 126084777 . - . ID=id248429;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon CDS 126084268 126084777 . - 0 ID=cds30771;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XP_020277044.1;Name=XP_020277044.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X1;protein_id=XP_020277044.1 -NC_033801.1 Gnomon intron 126084778 126089093 . - . Parent=rna38267 -NC_033801.1 Gnomon CDS 126089094 126089480 . - 0 ID=cds30771;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XP_020277044.1;Name=XP_020277044.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X1;protein_id=XP_020277044.1 -NC_033801.1 Gnomon exon 126089094 126090315 . - . ID=id248428;Parent=rna38267;Dbxref=GeneID:109851368,Genbank:XM_020421455.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X1;transcript_id=XM_020421455.1 -NC_033801.1 Gnomon mRNA 126084215 126096241 . - . ID=rna38266;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;Name=XM_020421456.1;gbkey=mRNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 6 Proteins%2C and 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 11 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon exon 126084215 126084777 . - . ID=id248427;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon CDS 126084268 126084777 . - 0 ID=cds30770;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XP_020277045.1;Name=XP_020277045.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X2;protein_id=XP_020277045.1 -NC_033801.1 Gnomon intron 126084778 126092382 . - . Parent=rna38266 -NC_033801.1 Gnomon exon 126092383 126092458 . - . ID=id248426;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon CDS 126092383 126092458 . - 1 ID=cds30770;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XP_020277045.1;Name=XP_020277045.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X2;protein_id=XP_020277045.1 -NC_033801.1 Gnomon intron 126092459 126093365 . - . Parent=rna38266 -NC_033801.1 Gnomon CDS 126093366 126093394 . - 0 ID=cds30770;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XP_020277045.1;Name=XP_020277045.1;gbkey=CDS;gene=LOC109851368;product=uncharacterized protein LOC109851368 isoform X2;protein_id=XP_020277045.1 -NC_033801.1 Gnomon exon 126093366 126093703 . - . ID=id248425;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon intron 126093704 126093797 . - . Parent=rna38266 -NC_033801.1 Gnomon exon 126093798 126095180 . - . ID=id248424;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon intron 126095181 126096081 . - . Parent=rna38266 -NC_033801.1 Gnomon exon 126096082 126096241 . - . ID=id248423;Parent=rna38266;Dbxref=GeneID:109851368,Genbank:XM_020421456.1;gbkey=mRNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X4;transcript_id=XM_020421456.1 -NC_033801.1 Gnomon transcript 126093500 126096241 . - . ID=rna38268;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;Name=XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 13 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon exon 126093500 126093703 . - . ID=id248432;Parent=rna38268;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon intron 126093704 126093797 . - . Parent=rna38268 -NC_033801.1 Gnomon exon 126093798 126095074 . - . ID=id248431;Parent=rna38268;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon intron 126095075 126096081 . - . Parent=rna38268 -NC_033801.1 Gnomon exon 126096082 126096241 . - . ID=id248430;Parent=rna38268;Dbxref=GeneID:109851368,Genbank:XR_002249778.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X3;transcript_id=XR_002249778.1 -NC_033801.1 Gnomon transcript 126093500 126096241 . - . ID=rna38269;Parent=gene26786;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;Name=XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;model_evidence=Supporting evidence includes similarity to: 100%25 coverage of the annotated genomic feature by RNAseq alignments%2C including 18 samples with support for all annotated introns;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 -NC_033801.1 Gnomon exon 126093500 126093703 . - . ID=id248435;Parent=rna38269;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 -NC_033801.1 Gnomon intron 126093704 126093797 . - . Parent=rna38269 -NC_033801.1 Gnomon exon 126093798 126095180 . - . ID=id248434;Parent=rna38269;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 -NC_033801.1 Gnomon intron 126095181 126096081 . - . Parent=rna38269 -NC_033801.1 Gnomon exon 126096082 126096241 . - . ID=id248433;Parent=rna38269;Dbxref=GeneID:109851368,Genbank:XR_002249777.1;gbkey=misc_RNA;gene=LOC109851368;product=uncharacterized LOC109851368%2C transcript variant X2;transcript_id=XR_002249777.1 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/2_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/2_correct_output.gff deleted file mode 100644 index fafe86ed5..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/2_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/2_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/2_test.gff deleted file mode 100644 index 2b83f42c9..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/2_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717 -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404 -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405 -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406 -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407 -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996 -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147 -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998 -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/30_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/30_correct_output.gff deleted file mode 100644 index 0683c68c2..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/30_correct_output.gff +++ /dev/null @@ -1,10 +0,0 @@ -##gff-version 3 -000000F|arrow StringTie gene 898 1804 1000 + . ID=P12103_109_S2_L002.1;gene_id=P12103_109_S2_L002.1;transcript_id=P12103_109_S2_L002.1.1 -000000F|arrow StringTie transcript 898 1804 1000 + . ID=P12103_109_S2_L002.1.1;Parent=P12103_109_S2_L002.1;gene_id=P12103_109_S2_L002.1;transcript_id=P12103_109_S2_L002.1.1 -000000F|arrow StringTie exon 898 1804 1000 + . ID=exon-1;Parent=P12103_109_S2_L002.1.1;gene_id=P12103_109_S2_L002.1;transcript_id=P12103_109_S2_L002.1.1 -000000F|arrow StringTie gene 6990 7449 1000 - . ID=P12103_109_S2_L002.4;gene_id=P12103_109_S2_L002.4;transcript_id=P12103_109_S2_L002.4.1 -000000F|arrow StringTie transcript 6990 7449 1000 - . ID=P12103_109_S2_L002.4.1;Parent=P12103_109_S2_L002.4;gene_id=P12103_109_S2_L002.4;transcript_id=P12103_109_S2_L002.4.1 -000000F|arrow StringTie exon 6990 7449 1000 - . ID=exon-3;Parent=P12103_109_S2_L002.4.1;gene_id=P12103_109_S2_L002.4;transcript_id=P12103_109_S2_L002.4.1 -000000F|arrow StringTie pseudogene 1147 3802 1000 - . ID=nbis_NEW-pseudogene-1;gene_id=P12103_109_S2_L002.2;transcript_id=P12103_109_S2_L002.2.1 -000000F|arrow StringTie RNA 1147 3802 1000 - . ID=p12103_109_s2_l002.2.1;Parent=nbis_NEW-pseudogene-1;gene_id=P12103_109_S2_L002.2;transcript_id=P12103_109_S2_L002.2.1 -000000F|arrow StringTie exon 1147 3802 1000 - . ID=exon-2;Parent=P12103_109_S2_L002.2.1;gene_id=P12103_109_S2_L002.2;transcript_id=P12103_109_S2_L002.2.1 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/30_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/30_test.gff deleted file mode 100644 index 4927a687e..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/30_test.gff +++ /dev/null @@ -1,6 +0,0 @@ -000000F|arrow StringTie transcript 898 1804 1000 + . gene_id "P12103_109_S2_L002.1"; transcript_id "P12103_109_S2_L002.1.1"; -000000F|arrow StringTie exon 898 1804 1000 + . gene_id "P12103_109_S2_L002.1"; transcript_id "P12103_109_S2_L002.1.1"; -000000F|arrow StringTie pseudogene 1147 3802 1000 - . gene_id "P12103_109_S2_L002.2"; transcript_id "P12103_109_S2_L002.2.1"; -000000F|arrow StringTie exon 1147 3802 1000 - . gene_id "P12103_109_S2_L002.2"; transcript_id "P12103_109_S2_L002.2.1"; -000000F|arrow StringTie transcript 6990 7449 1000 - . gene_id "P12103_109_S2_L002.4"; transcript_id "P12103_109_S2_L002.4.1"; -000000F|arrow StringTie exon 6990 7449 1000 - . gene_id "P12103_109_S2_L002.4"; transcript_id "P12103_109_S2_L002.4.1"; \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/31_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/31_correct_output.gff deleted file mode 100644 index 6fe1e5c78..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/31_correct_output.gff +++ /dev/null @@ -1,21 +0,0 @@ -##gff-version 3 -scaffold_5 JGI gene 442603 445653 . + . ID=nbis_NEW-gene-2;exonNumber=1;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI mRNA 442603 445653 . + . ID=335995;Parent=nbis_NEW-gene-2;exonNumber=1;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI exon 442603 443344 . + 0 ID=exon-1;Parent=335995;gene_id=e_gw1.5.2.1;transcript_id=335995 -scaffold_5 JGI exon 443530 443531 . + 0 ID=exon-2;Parent=335995;gene_id=e_gw1.5.2.1;transcript_id=335995 -scaffold_5 JGI exon 443569 445653 . + 0 ID=exon-3;Parent=335995;gene_id=e_gw1.5.2.1;transcript_id=335995 -scaffold_5 JGI CDS 442603 443344 . + 0 ID=cds-1;Parent=335995;exonNumber=1;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI CDS 443530 443531 . + 2 ID=cds-2;Parent=335995;exonNumber=2;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI CDS 443569 445653 . + 0 ID=cds-3;Parent=335995;exonNumber=3;gene_id=e_gw1.5.2.1;protein_id=m335805 -scaffold_5 JGI start_codon 442603 442605 . + 0 ID=start_codon-1;Parent=335995;exonNumber=7;gene_id=e_gw1.5.2.1 -scaffold_5 JGI stop_codon 445651 445653 . + 0 ID=stop_codon-1;Parent=335995;gene_id=e_gw1.5.2.1 -scaffold_5 JGI gene 542603 545653 . + . ID=nbis_NEW-gene-1;exonNumber=1;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI mRNA 542603 545653 . + . ID=2;Parent=nbis_NEW-gene-1;exonNumber=1;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI exon 542603 543344 . + 0 ID=exon-4;Parent=2;gene_id=e_gw2;transcript_id=2 -scaffold_5 JGI exon 543530 543531 . + 0 ID=exon-5;Parent=2;gene_id=e_gw2;transcript_id=2 -scaffold_5 JGI exon 543569 545653 . + 0 ID=exon-6;Parent=2;gene_id=e_gw2;transcript_id=2 -scaffold_5 JGI CDS 542603 543344 . + 0 ID=cds-4;Parent=2;exonNumber=1;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI CDS 543530 543531 . + 2 ID=cds-5;Parent=2;exonNumber=2;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI CDS 543569 545653 . + 0 ID=cds-6;Parent=2;exonNumber=3;gene_id=e_gw2;protein_id=m335805 -scaffold_5 JGI start_codon 542603 542605 . + 0 ID=start_codon-2;Parent=2;exonNumber=7;gene_id=e_gw2 -scaffold_5 JGI stop_codon 545651 545653 . + 0 ID=stop_codon-2;Parent=2;gene_id=e_gw2 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/31_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/31_test.gff deleted file mode 100644 index 378ac7bbe..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/31_test.gff +++ /dev/null @@ -1,17 +0,0 @@ -#GFF1 with attribute "tag value tag value" -scaffold_5 JGI exon 442603 443344 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id "e_gw1.5.2.1" protein_id 335805 exonNumber 1 -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id "e_gw1.5.2.1" exonNumber 7 -scaffold_5 JGI exon 443530 443531 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id "e_gw1.5.2.1" protein_id 335805 exonNumber 2 -scaffold_5 JGI exon 443569 445653 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id "e_gw1.5.2.1" protein_id m335805 exonNumber 3 -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id "e_gw1.5.2.1" -scaffold_5 JGI exon 542603 543344 . + . gene_id "e_gw2" transcript_id 2 -scaffold_5 JGI CDS 542603 543344 . + 0 gene_id "e_gw2" protein_id 335805 exonNumber 1 -scaffold_5 JGI start_codon 542603 542605 . + 0 gene_id "e_gw2" exonNumber 7 -scaffold_5 JGI exon 543530 543531 . + . gene_id "e_gw2" transcript_id 2 -scaffold_5 JGI CDS 543530 543531 . + 2 gene_id "e_gw2" protein_id 335805 exonNumber 2 -scaffold_5 JGI exon 543569 545653 . + . gene_id "e_gw2" transcript_id 2 -scaffold_5 JGI CDS 543569 545653 . + 0 gene_id "e_gw2" protein_id m335805 exonNumber 3 -scaffold_5 JGI stop_codon 545651 545653 . + 0 gene_id "e_gw2" diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/32_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/32_correct_output.gff deleted file mode 100644 index 6fe1e5c78..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/32_correct_output.gff +++ /dev/null @@ -1,21 +0,0 @@ -##gff-version 3 -scaffold_5 JGI gene 442603 445653 . + . ID=nbis_NEW-gene-2;exonNumber=1;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI mRNA 442603 445653 . + . ID=335995;Parent=nbis_NEW-gene-2;exonNumber=1;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI exon 442603 443344 . + 0 ID=exon-1;Parent=335995;gene_id=e_gw1.5.2.1;transcript_id=335995 -scaffold_5 JGI exon 443530 443531 . + 0 ID=exon-2;Parent=335995;gene_id=e_gw1.5.2.1;transcript_id=335995 -scaffold_5 JGI exon 443569 445653 . + 0 ID=exon-3;Parent=335995;gene_id=e_gw1.5.2.1;transcript_id=335995 -scaffold_5 JGI CDS 442603 443344 . + 0 ID=cds-1;Parent=335995;exonNumber=1;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI CDS 443530 443531 . + 2 ID=cds-2;Parent=335995;exonNumber=2;gene_id=e_gw1.5.2.1;protein_id=335805 -scaffold_5 JGI CDS 443569 445653 . + 0 ID=cds-3;Parent=335995;exonNumber=3;gene_id=e_gw1.5.2.1;protein_id=m335805 -scaffold_5 JGI start_codon 442603 442605 . + 0 ID=start_codon-1;Parent=335995;exonNumber=7;gene_id=e_gw1.5.2.1 -scaffold_5 JGI stop_codon 445651 445653 . + 0 ID=stop_codon-1;Parent=335995;gene_id=e_gw1.5.2.1 -scaffold_5 JGI gene 542603 545653 . + . ID=nbis_NEW-gene-1;exonNumber=1;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI mRNA 542603 545653 . + . ID=2;Parent=nbis_NEW-gene-1;exonNumber=1;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI exon 542603 543344 . + 0 ID=exon-4;Parent=2;gene_id=e_gw2;transcript_id=2 -scaffold_5 JGI exon 543530 543531 . + 0 ID=exon-5;Parent=2;gene_id=e_gw2;transcript_id=2 -scaffold_5 JGI exon 543569 545653 . + 0 ID=exon-6;Parent=2;gene_id=e_gw2;transcript_id=2 -scaffold_5 JGI CDS 542603 543344 . + 0 ID=cds-4;Parent=2;exonNumber=1;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI CDS 543530 543531 . + 2 ID=cds-5;Parent=2;exonNumber=2;gene_id=e_gw2;protein_id=335805 -scaffold_5 JGI CDS 543569 545653 . + 0 ID=cds-6;Parent=2;exonNumber=3;gene_id=e_gw2;protein_id=m335805 -scaffold_5 JGI start_codon 542603 542605 . + 0 ID=start_codon-2;Parent=2;exonNumber=7;gene_id=e_gw2 -scaffold_5 JGI stop_codon 545651 545653 . + 0 ID=stop_codon-2;Parent=2;gene_id=e_gw2 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/32_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/32_test.gff deleted file mode 100644 index 42b851fe9..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/32_test.gff +++ /dev/null @@ -1,17 +0,0 @@ -#GFF1 with attribute "tag value tag value" -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id "e_gw1.5.2.1" protein_id 335805 exonNumber 1 -scaffold_5 JGI exon 442603 443344 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id "e_gw1.5.2.1" exonNumber 7 -scaffold_5 JGI exon 443530 443531 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id "e_gw1.5.2.1" protein_id 335805 exonNumber 2 -scaffold_5 JGI exon 443569 445653 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id "e_gw1.5.2.1" protein_id m335805 exonNumber 3 -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id "e_gw1.5.2.1" -scaffold_5 JGI CDS 542603 543344 . + 0 gene_id "e_gw2" protein_id 335805 exonNumber 1 -scaffold_5 JGI exon 542603 543344 . + . gene_id "e_gw2" transcript_id 2 -scaffold_5 JGI start_codon 542603 542605 . + 0 gene_id "e_gw2" exonNumber 7 -scaffold_5 JGI exon 543530 543531 . + . gene_id "e_gw2" transcript_id 2 -scaffold_5 JGI CDS 543530 543531 . + 2 gene_id "e_gw2" protein_id 335805 exonNumber 2 -scaffold_5 JGI exon 543569 545653 . + . gene_id "e_gw2" transcript_id 2 -scaffold_5 JGI CDS 543569 545653 . + 0 gene_id "e_gw2" protein_id m335805 exonNumber 3 -scaffold_5 JGI stop_codon 545651 545653 . + 0 gene_id "e_gw2" diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/3_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/3_correct_output.gff deleted file mode 100644 index 069e65fc0..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/3_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/3_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/3_test.gff deleted file mode 100644 index af4a8cdb8..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/3_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/4_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/4_correct_output.gff deleted file mode 100644 index 069e65fc0..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/4_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/4_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/4_test.gff deleted file mode 100644 index 992872075..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/4_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;locus_tag=BBBBBB -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/5_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/5_correct_output.gff deleted file mode 100644 index 069e65fc0..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/5_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/5_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/5_test.gff deleted file mode 100644 index 72c8cfa86..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/5_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;locus_tag=BBBBBB -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/6_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/6_correct_output.gff deleted file mode 100644 index ddcbe6074..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/6_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 -scaffold789 maker gene 558184 564780 . + . ID=nbis_noL1id-mrna-1;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=nbis_noL1id-mrna-1;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=nbis_noL1id-mrna-1;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/6_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/6_test.gff deleted file mode 100644 index 737813508..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/6_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/7_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/7_correct_output.gff deleted file mode 100644 index 069e65fc0..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/7_correct_output.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/7_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/7_test.gff deleted file mode 100644 index 87eeebde5..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/7_test.gff +++ /dev/null @@ -1,36 +0,0 @@ -##gff-version 3 -scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2;locus_tag=BBBBBB -scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;locus_tag=BBBBBB -scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;locus_tag=BBBBBB -scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;locus_tag=BBBBBB -scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;locus_tag=BBBBBB -scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;locus_tag=BBBBBB -scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;locus_tag=BBBBBB -scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;locus_tag=BBBBBB -scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;locus_tag=BBBBBB -scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;locus_tag=AAAAA;Parent=CLUHART00000006146 -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;locus_tag=AAAAA -scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;locus_tag=AAAAA;Parent=CLUHART00000006146 -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;locus_tag=AAAAA;Parent=CLUHART00000006146 -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;locus_tag=AAAAA;Parent=CLUHART00000006146 -scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;locus_tag=AAAAA -scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;locus_tag=AAAAA -scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;locus_tag=AAAAA -scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;locus_tag=AAAAA -scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;locus_tag=AAAAA -scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;locus_tag=AAAAA -scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;locus_tag=AAAAA -scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;locus_tag=AAAAA -scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;locus_tag=AAAAA diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/8_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/8_correct_output.gff deleted file mode 100644 index 38f2e5bef..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/8_correct_output.gff +++ /dev/null @@ -1,107 +0,0 @@ -##gff-version 3 -Tob1_contig1 Prodigal:2.60 gene 476 670 . - 0 ID=nbis_NEW-gene-1;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00001;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 476 670 . - 0 ID=nbis_nol2id-cds-1;Parent=nbis_NEW-gene-1;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00001;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 476 670 . - . ID=nbis_NEW-exon-1;Parent=nbis_nol2id-cds-1;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00001;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 476 670 . - 0 ID=Tob1_00001;Parent=nbis_nol2id-cds-1;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00001;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 1018 1191 . - 0 ID=nbis_NEW-gene-2;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00002;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 1018 1191 . - 0 ID=nbis_nol2id-cds-2;Parent=nbis_NEW-gene-2;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00002;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 1018 1191 . - . ID=nbis_NEW-exon-2;Parent=nbis_nol2id-cds-2;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00002;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 1018 1191 . - 0 ID=Tob1_00002;Parent=nbis_nol2id-cds-2;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00002;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 1175 1591 . - 0 ID=nbis_NEW-gene-3;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00003;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 1175 1591 . - 0 ID=nbis_nol2id-cds-3;Parent=nbis_NEW-gene-3;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00003;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 1175 1591 . - . ID=nbis_NEW-exon-3;Parent=nbis_nol2id-cds-3;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00003;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 1175 1591 . - 0 ID=Tob1_00003;Parent=nbis_nol2id-cds-3;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00003;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 1617 2186 . - 0 ID=nbis_NEW-gene-4;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00004;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 1617 2186 . - 0 ID=nbis_nol2id-cds-4;Parent=nbis_NEW-gene-4;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00004;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 1617 2186 . - . ID=nbis_NEW-exon-4;Parent=nbis_nol2id-cds-4;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00004;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 1617 2186 . - 0 ID=Tob1_00004;Parent=nbis_nol2id-cds-4;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00004;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 2318 3862 . - 0 ID=nbis_NEW-gene-5;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00005;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 2318 3862 . - 0 ID=nbis_nol2id-cds-5;Parent=nbis_NEW-gene-5;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00005;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 2318 3862 . - . ID=nbis_NEW-exon-5;Parent=nbis_nol2id-cds-5;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00005;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 2318 3862 . - 0 ID=Tob1_00005;Parent=nbis_nol2id-cds-5;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00005;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 3983 4375 . - 0 ID=nbis_NEW-gene-6;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00006;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 3983 4375 . - 0 ID=nbis_nol2id-cds-6;Parent=nbis_NEW-gene-6;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00006;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 3983 4375 . - . ID=nbis_NEW-exon-6;Parent=nbis_nol2id-cds-6;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00006;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 3983 4375 . - 0 ID=Tob1_00006;Parent=nbis_nol2id-cds-6;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00006;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 4465 5121 . - 0 ID=nbis_NEW-gene-7;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00007;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 4465 5121 . - 0 ID=nbis_nol2id-cds-7;Parent=nbis_NEW-gene-7;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00007;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 4465 5121 . - . ID=nbis_NEW-exon-7;Parent=nbis_nol2id-cds-7;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00007;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 4465 5121 . - 0 ID=Tob1_00007;Parent=nbis_nol2id-cds-7;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00007;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 5303 6295 . - 0 ID=nbis_NEW-gene-8;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00008;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 5303 6295 . - 0 ID=nbis_nol2id-cds-8;Parent=nbis_NEW-gene-8;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00008;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 5303 6295 . - . ID=nbis_NEW-exon-8;Parent=nbis_nol2id-cds-8;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00008;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 5303 6295 . - 0 ID=Tob1_00008;Parent=nbis_nol2id-cds-8;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00008;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 6413 20059 . - 0 ID=nbis_NEW-gene-9;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00009;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 6413 20059 . - 0 ID=nbis_nol2id-cds-9;Parent=nbis_NEW-gene-9;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00009;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 6413 20059 . - . ID=nbis_NEW-exon-9;Parent=nbis_nol2id-cds-9;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00009;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 6413 20059 . - 0 ID=Tob1_00009;Parent=nbis_nol2id-cds-9;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00009;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 20337 21584 . - 0 ID=nbis_NEW-gene-10;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00010;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 20337 21584 . - 0 ID=nbis_nol2id-cds-10;Parent=nbis_NEW-gene-10;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00010;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 20337 21584 . - . ID=nbis_NEW-exon-10;Parent=nbis_nol2id-cds-10;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00010;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 20337 21584 . - 0 ID=Tob1_00010;Parent=nbis_nol2id-cds-10;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00010;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 21644 23791 . - 0 ID=nbis_NEW-gene-11;gene=apxIB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P26760;locus_tag=Tob1_00011 -Tob1_contig1 Prodigal:2.60 mRNA 21644 23791 . - 0 ID=nbis_nol2id-cds-11;Parent=nbis_NEW-gene-11;gene=apxIB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P26760;locus_tag=Tob1_00011 -Tob1_contig1 Prodigal:2.60 exon 21644 23791 . - . ID=nbis_NEW-exon-11;Parent=nbis_nol2id-cds-11;gene=apxIB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P26760;locus_tag=Tob1_00011 -Tob1_contig1 Prodigal:2.60 CDS 21644 23791 . - 0 ID=Tob1_00011;Parent=nbis_nol2id-cds-11;gene=apxIB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P26760;locus_tag=Tob1_00011 -Tob1_contig1 Prodigal:2.60 gene 23788 24972 . - 0 ID=nbis_NEW-gene-12;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00012;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 23788 24972 . - 0 ID=nbis_nol2id-cds-12;Parent=nbis_NEW-gene-12;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00012;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 23788 24972 . - . ID=nbis_NEW-exon-12;Parent=nbis_nol2id-cds-12;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00012;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 23788 24972 . - 0 ID=Tob1_00012;Parent=nbis_nol2id-cds-12;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00012;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 25114 25416 . - 0 ID=nbis_NEW-gene-13;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00013;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 25114 25416 . - 0 ID=nbis_nol2id-cds-13;Parent=nbis_NEW-gene-13;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00013;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 25114 25416 . - . ID=nbis_NEW-exon-13;Parent=nbis_nol2id-cds-13;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00013;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 25114 25416 . - 0 ID=Tob1_00013;Parent=nbis_nol2id-cds-13;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00013;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 25781 25876 . - 0 ID=nbis_NEW-gene-14;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00014;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 25781 25876 . - 0 ID=nbis_nol2id-cds-14;Parent=nbis_NEW-gene-14;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00014;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 25781 25876 . - . ID=nbis_NEW-exon-14;Parent=nbis_nol2id-cds-14;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00014;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 25781 25876 . - 0 ID=Tob1_00014;Parent=nbis_nol2id-cds-14;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00014;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 25961 26089 . + 0 ID=nbis_NEW-gene-15;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00015;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 25961 26089 . + 0 ID=nbis_nol2id-cds-15;Parent=nbis_NEW-gene-15;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00015;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 25961 26089 . + . ID=nbis_NEW-exon-15;Parent=nbis_nol2id-cds-15;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00015;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 25961 26089 . + 0 ID=Tob1_00015;Parent=nbis_nol2id-cds-15;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00015;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 26156 26260 . + 0 ID=nbis_NEW-gene-16;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00016;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 26156 26260 . + 0 ID=nbis_nol2id-cds-16;Parent=nbis_NEW-gene-16;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00016;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 26156 26260 . + . ID=nbis_NEW-exon-16;Parent=nbis_nol2id-cds-16;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00016;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 26156 26260 . + 0 ID=Tob1_00016;Parent=nbis_nol2id-cds-16;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00016;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 26257 26694 . + 0 ID=nbis_NEW-gene-17;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00017;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 26257 26694 . + 0 ID=nbis_nol2id-cds-17;Parent=nbis_NEW-gene-17;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00017;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 26257 26694 . + . ID=nbis_NEW-exon-17;Parent=nbis_nol2id-cds-17;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00017;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 26257 26694 . + 0 ID=Tob1_00017;Parent=nbis_nol2id-cds-17;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00017;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 26694 27134 . + 0 ID=nbis_NEW-gene-18;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00018;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 26694 27134 . + 0 ID=nbis_nol2id-cds-18;Parent=nbis_NEW-gene-18;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00018;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 26694 27134 . + . ID=nbis_NEW-exon-18;Parent=nbis_nol2id-cds-18;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00018;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 26694 27134 . + 0 ID=Tob1_00018;Parent=nbis_nol2id-cds-18;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00018;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 27145 27324 . - 0 ID=nbis_NEW-gene-19;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00019;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 27145 27324 . - 0 ID=nbis_nol2id-cds-19;Parent=nbis_NEW-gene-19;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00019;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 27145 27324 . - . ID=nbis_NEW-exon-19;Parent=nbis_nol2id-cds-19;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00019;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 27145 27324 . - 0 ID=Tob1_00019;Parent=nbis_nol2id-cds-19;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00019;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 27935 29050 . + 0 ID=nbis_NEW-gene-20;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00020;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 27935 29050 . + 0 ID=nbis_nol2id-cds-20;Parent=nbis_NEW-gene-20;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00020;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 27935 29050 . + . ID=nbis_NEW-exon-20;Parent=nbis_nol2id-cds-20;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00020;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 27935 29050 . + 0 ID=Tob1_00020;Parent=nbis_nol2id-cds-20;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00020;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 29190 32849 . + 0 ID=nbis_NEW-gene-21;eC_number=3.6.3.-;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99T13;locus_tag=Tob1_00021 -Tob1_contig1 Prodigal:2.60 mRNA 29190 32849 . + 0 ID=nbis_nol2id-cds-21;Parent=nbis_NEW-gene-21;eC_number=3.6.3.-;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99T13;locus_tag=Tob1_00021 -Tob1_contig1 Prodigal:2.60 exon 29190 32849 . + . ID=nbis_NEW-exon-21;Parent=nbis_nol2id-cds-21;eC_number=3.6.3.-;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99T13;locus_tag=Tob1_00021 -Tob1_contig1 Prodigal:2.60 CDS 29190 32849 . + 0 ID=Tob1_00021;Parent=nbis_nol2id-cds-21;eC_number=3.6.3.-;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99T13;locus_tag=Tob1_00021 -Tob1_contig1 Prodigal:2.60 gene 32953 33366 . + 0 ID=nbis_NEW-gene-22;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00022;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 32953 33366 . + 0 ID=nbis_nol2id-cds-22;Parent=nbis_NEW-gene-22;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00022;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 32953 33366 . + . ID=nbis_NEW-exon-22;Parent=nbis_nol2id-cds-22;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00022;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 32953 33366 . + 0 ID=Tob1_00022;Parent=nbis_nol2id-cds-22;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00022;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 33411 34181 . + 0 ID=nbis_NEW-gene-23;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00023;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 33411 34181 . + 0 ID=nbis_nol2id-cds-23;Parent=nbis_NEW-gene-23;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00023;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 33411 34181 . + . ID=nbis_NEW-exon-23;Parent=nbis_nol2id-cds-23;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00023;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 33411 34181 . + 0 ID=Tob1_00023;Parent=nbis_nol2id-cds-23;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00023;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 gene 34266 35222 . + 0 ID=nbis_NEW-gene-24;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00024;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 mRNA 34266 35222 . + 0 ID=nbis_nol2id-cds-24;Parent=nbis_NEW-gene-24;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00024;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 exon 34266 35222 . + . ID=nbis_NEW-exon-24;Parent=nbis_nol2id-cds-24;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00024;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 34266 35222 . + 0 ID=Tob1_00024;Parent=nbis_nol2id-cds-24;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00024;product=hypothetical protein -Tob1_contig1 SignalP:4.1 sig_peptide 34266 34298 . + 0 ID=sig_peptide-1;Parent=nbis_nol2id-cds-24;inference=ab initio prediction:SignalP:4.1;note=predicted cleavage at residue 33;product=putative signal peptide -Tob1_contig1 Prodigal:2.60 gene 35267 37444 . - 0 ID=nbis_NEW-gene-25;locus_tag=Tob1_00025 -Tob1_contig1 Prodigal:2.60 mRNA 35267 37444 . - 0 ID=nbis_nol2id-cds-25;Parent=nbis_NEW-gene-25;locus_tag=Tob1_00025 -Tob1_contig1 Prodigal:2.60 exon 35267 37444 . - . ID=nbis_NEW-exon-25;Parent=nbis_nol2id-cds-25;locus_tag=Tob1_00025 -Tob1_contig1 Prodigal:2.60 CDS 35267 37444 . - 0 ID=Tob1_00025;Parent=nbis_nol2id-cds-25;locus_tag=Tob1_00025 -Tob1_contig1 SignalP:4.1 sig_peptide 37420 37444 . - 0 ID=sig_peptide-2;Parent=nbis_nol2id-cds-25;inference=ab initio prediction:SignalP:4.1;note=predicted cleavage at residue 25;product=putative signal peptide -Tob1_contig1 Prodigal:2.60 gene 38304 39338 . - 0 ID=nbis_NEW-gene-26;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:uniprot.escherichia_coli_k12_newNames.fasta:sp|P00887|AROH_ECOLI;locus_tag=Tob1_00026 -Tob1_contig1 Prodigal:2.60 mRNA 38304 39338 . - 0 ID=nbis_nol2id-cds-26;Parent=nbis_NEW-gene-26;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:uniprot.escherichia_coli_k12_newNames.fasta:sp|P00887|AROH_ECOLI;locus_tag=Tob1_00026 -Tob1_contig1 Prodigal:2.60 exon 38304 39338 . - . ID=nbis_NEW-exon-26;Parent=nbis_nol2id-cds-26;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:uniprot.escherichia_coli_k12_newNames.fasta:sp|P00887|AROH_ECOLI;locus_tag=Tob1_00026 -Tob1_contig1 Prodigal:2.60 CDS 38304 39338 . - 0 ID=Tob1_00026;Parent=nbis_nol2id-cds-26;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:uniprot.escherichia_coli_k12_newNames.fasta:sp|P00887|AROH_ECOLI;locus_tag=Tob1_00026 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/8_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/8_test.gff deleted file mode 100644 index cd7796cbe..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/8_test.gff +++ /dev/null @@ -1,30 +0,0 @@ -##gff-version 3 -Tob1_contig1 Prodigal:2.60 CDS 476 670 . - 0 ID=Tob1_00001;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00001;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 1018 1191 . - 0 ID=Tob1_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00002;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 1175 1591 . - 0 ID=Tob1_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00003;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 1617 2186 . - 0 ID=Tob1_00004;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00004;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 2318 3862 . - 0 ID=Tob1_00005;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00005;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 3983 4375 . - 0 ID=Tob1_00006;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00006;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 4465 5121 . - 0 ID=Tob1_00007;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00007;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 5303 6295 . - 0 ID=Tob1_00008;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00008;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 6413 20059 . - 0 ID=Tob1_00009;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00009;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 20337 21584 . - 0 ID=Tob1_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00010;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 21644 23791 . - 0 ID=Tob1_00011;gene=apxIB_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P26760;locus_tag=Tob1_00011 -Tob1_contig1 Prodigal:2.60 CDS 23788 24972 . - 0 ID=Tob1_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00012;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 25114 25416 . - 0 ID=Tob1_00013;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00013;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 25781 25876 . - 0 ID=Tob1_00014;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00014;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 25961 26089 . + 0 ID=Tob1_00015;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00015;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 26156 26260 . + 0 ID=Tob1_00016;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00016;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 26257 26694 . + 0 ID=Tob1_00017;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00017;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 26694 27134 . + 0 ID=Tob1_00018;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00018;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 27145 27324 . - 0 ID=Tob1_00019;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00019;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 27935 29050 . + 0 ID=Tob1_00020;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00020;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 29190 32849 . + 0 ID=Tob1_00021;eC_number=3.6.3.-;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q99T13;locus_tag=Tob1_00021 -Tob1_contig1 Prodigal:2.60 CDS 32953 33366 . + 0 ID=Tob1_00022;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00022;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 33411 34181 . + 0 ID=Tob1_00023;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00023;product=hypothetical protein -Tob1_contig1 Prodigal:2.60 CDS 34266 35222 . + 0 ID=Tob1_00024;inference=ab initio prediction:Prodigal:2.60;locus_tag=Tob1_00024;product=hypothetical protein -Tob1_contig1 SignalP:4.1 sig_peptide 34266 34298 . + 0 inference=ab initio prediction:SignalP:4.1;note=predicted cleavage at residue 33;product=putative signal peptide -Tob1_contig1 Prodigal:2.60 CDS 35267 37444 . - 0 ID=Tob1_00025;locus_tag=Tob1_00025; -Tob1_contig1 SignalP:4.1 sig_peptide 37420 37444 . - 0 inference=ab initio prediction:SignalP:4.1;note=predicted cleavage at residue 25;product=putative signal peptide -Tob1_contig1 Prodigal:2.60 CDS 38304 39338 . - 0 ID=Tob1_00026;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:uniprot.escherichia_coli_k12_newNames.fasta:sp|P00887|AROH_ECOLI;locus_tag=Tob1_00026; - diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/9_correct_output.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/9_correct_output.gff deleted file mode 100644 index 448b4a60c..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/9_correct_output.gff +++ /dev/null @@ -1,19 +0,0 @@ -##gff-version 3 -NC_003070.9 RefSeq gene 3631 5899 . + . ID=nbis_NEW-gene-1;db_xref=TAIR:AT1G01010,GeneID:839580;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;locus_tag=AT1G01010 -NC_003070.9 RefSeq mRNA 3631 5899 . + . ID=NC_003070.9:NAC001;Parent=nbis_NEW-gene-1;db_xref=TAIR:AT1G01010,GeneID:839580;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;locus_tag=AT1G01010 -NC_003070.9 RefSeq exon 3631 3913 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=1;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 -NC_003070.9 RefSeq exon 3996 4276 . + . ID=nbis_NEW-exon-1;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=2;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 -NC_003070.9 RefSeq exon 4486 4605 . + . ID=nbis_NEW-exon-2;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=3;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 -NC_003070.9 RefSeq exon 4706 5095 . + . ID=nbis_NEW-exon-3;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=4;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 -NC_003070.9 RefSeq exon 5174 5326 . + . ID=nbis_NEW-exon-4;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=5;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 -NC_003070.9 RefSeq exon 5439 5899 . + . ID=nbis_NEW-exon-5;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=6;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 -NC_003070.9 RefSeq CDS 3760 3913 . + 0 ID=nbis_NEW-cds-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=1;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq CDS 3996 4276 . + 2 ID=nbis_NEW-cds-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=2;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq CDS 4486 4605 . + 0 ID=nbis_NEW-cds-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=3;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq CDS 4706 5095 . + 0 ID=nbis_NEW-cds-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=4;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq CDS 5174 5326 . + 0 ID=nbis_NEW-cds-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=5;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq CDS 5439 5627 . + 0 ID=nbis_NEW-cds-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=6;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq five_prime_UTR 3631 3759 . + . ID=nbis_NEW-five_prime_utr-1;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=1;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 -NC_003070.9 RefSeq start_codon 3760 3762 . + 0 ID=nbis_NEW-start_codon-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=1;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq stop_codon 5628 5630 . + 0 ID=nbis_NEW-stop_codon-1;Parent=NC_003070.9:NAC001;db_xref=GI:15223276,TAIR:AT1G01010,GeneID:839580;exon_number=1;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;locus_tag=AT1G01010;note=NAC domain containing protein 1 (NAC001)%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN/s: No apical meristem (NAM) protein (InterPro:IPR003441)%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 (TAIR:AT4G01550.1)%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 (source: NCBI BLink).;product=NAC domain-containing protein 1;protein_id=NP_171609.1 -NC_003070.9 RefSeq three_prime_UTR 5628 5899 . + . ID=nbis_NEW-three_prime_utr-1;Parent=NC_003070.9:NAC001;db_xref=GI:30677865,TAIR:AT1G01010,GeneID:839580;exon_number=1;gbkey=mRNA;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1,Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;locus_tag=AT1G01010;product=NAC domain-containing protein 1 diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/9_test.gff b/annotation/NBIS/GFF3/test/test_gff_syntax/9_test.gff deleted file mode 100644 index 84e0d1127..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/9_test.gff +++ /dev/null @@ -1,20 +0,0 @@ -##gff-version 3 -#!gff-spec-version 1.14 -#!source-version NCBI C++ formatter 0.2 -##Type DNA NC_003070.9 -NC_003070.9 RefSeq source 1 30427671 . + . organism=Arabidopsis thaliana;mol_type=genomic DNA;db_xref=taxon:3702;chromosome=1;ecotype=Columbia -NC_003070.9 RefSeq gene 3631 5899 . + . ID=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580 -NC_003070.9 RefSeq exon 3631 3913 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;product=NAC domain-containing protein 1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;db_xref=GI:30677865;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=1 -NC_003070.9 RefSeq exon 3996 4276 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;product=NAC domain-containing protein 1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;db_xref=GI:30677865;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=2 -NC_003070.9 RefSeq exon 4486 4605 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;product=NAC domain-containing protein 1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;db_xref=GI:30677865;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=3 -NC_003070.9 RefSeq exon 4706 5095 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;product=NAC domain-containing protein 1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;db_xref=GI:30677865;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=4 -NC_003070.9 RefSeq exon 5174 5326 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;product=NAC domain-containing protein 1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;db_xref=GI:30677865;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=5 -NC_003070.9 RefSeq exon 5439 5899 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;product=NAC domain-containing protein 1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;insd_transcript_id=NM_099983.2;db_xref=GI:30677865;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=6 -NC_003070.9 RefSeq CDS 3760 3913 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=1 -NC_003070.9 RefSeq CDS 3996 4276 . + 2 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=2 -NC_003070.9 RefSeq CDS 4486 4605 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=3 -NC_003070.9 RefSeq CDS 4706 5095 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=4 -NC_003070.9 RefSeq CDS 5174 5326 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=5 -NC_003070.9 RefSeq CDS 5439 5627 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=6 -NC_003070.9 RefSeq start_codon 3760 3762 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=1 -NC_003070.9 RefSeq stop_codon 5628 5630 . + 0 ID=NM_099983.2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010;gene_synonym=ANAC001%3B NAC domain containing protein 1%3B NAC001%3B T25K16.1%3B T25K16_1;inference=Similar to DNA sequence:INSD:BT001115.1%2CINSD:AF439834.1%2CINSD:AK226863.1;inference=Similar to RNA sequence%2C EST:INSD:BP621258.1%2CINSD:BP779869.1%2CINSD:AV826635.1%2CINSD:EL124732.1%2CINSD:AV810144.1%2CINSD:BP788378.1%2CINSD:BP562468.2%2CINSD:AV795918.1;note=NAC domain containing protein 1 %28NAC001%29%3B FUNCTIONS IN: sequence-specific DNA binding transcription factor activity%3B INVOLVED IN: multicellular organismal development%2C regulation of transcription%3B LOCATED IN: cellular_component unknown%3B EXPRESSED IN: 7 plant structures%3B EXPRESSED DURING: 4 anthesis%2C C globular stage%2C petal differentiation and expansion stage%3B CONTAINS InterPro DOMAIN%2Fs: No apical meristem %28NAM%29 protein %28InterPro:IPR003441%29%3B BEST Arabidopsis thaliana protein match is: NAC domain containing protein 69 %28TAIR:AT4G01550.1%29%3B Has 2503 Blast hits to 2496 proteins in 69 species: Archae - 0%3B Bacteria - 0%3B Metazoa - 0%3B Fungi - 0%3B Plants - 2502%3B Viruses - 0%3B Other Eukaryotes - 1 %28source: NCBI BLink%29.;product=NAC domain-containing protein 1;protein_id=NP_171609.1;db_xref=GI:15223276;db_xref=TAIR:AT1G01010;db_xref=GeneID:839580;exon_number=1 \ No newline at end of file diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/README b/annotation/NBIS/GFF3/test/test_gff_syntax/README deleted file mode 100644 index aefedc65f..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/README +++ /dev/null @@ -1,48 +0,0 @@ -This is an explanations of the different test files used to check the GFF3 parser. -Launch the tester.sh script to check the parser and all these files. - - -00: Correct gff3 => Must stay as it is. -01: 4 Exon duplicated + 4 exon missing -02: No Parent attribute, but sorted correctly -03: Same as 2 but with common tag -04: Same as 3 but but 2 features with common tag spread within the file (locus AAAA). Actually the 2 features occurs too early in the file. -05: Same as 3 but but 2 features with common tag spread within the file (locus BBBB). Actually the 2 features occurs too late in the file. -06: Same as 3 but some common tag are missing -07: Same as 3 but some have also parent feature -08: Prokka output => only CDS defined, and locus_tag is defined -09: Refseq1: Level1 and level3 linked by ID/Parent ... but level2 missing ... and two UTRs missing -10: Refseq2: level2 missing, the whole is linked by a common attribute (here locus_tag); No ID attribute ... and 3 UTRs missing -11: Two isoforms of the same locus are not defined one after each other (so, spread...), and the level1 and level2 of those are missing. The two isoforms have to be linked to the same gene -12: Ensembl peculiarities case -13: same as 11 but isoforms are identical (one has to be removed). Two mRNA identical in locus tag mode. -14: 6 UTRs missing -15: Match / Match_part case (alignment GFF). Clean case. -16: One UTR is wrong -17: One exon is badly defined (UTR show that one is a bit longer) -18: features spread -19: some have locus tag, some have Parent -20: 4 Exon are missing -21: Two exon badly defined, they are overlapping. One has to have its location modified and the other one ahas to be completely removed -22: case where UTR both side of an exon... nothing to do, just the check has to not perform anything... -23: Multi-parent for the exon feature -24: Not correct SOFA feature type => It's normal that there is no correct output to check against. -25: No L1 feature. L2 and l3 feature without common locus tag. Has to be handle strictly sequential. (/!\ if there are isoforms of the same gene they cannot be gather under the same L1. There is no way to do the assumption) -26: No L1 feature, No L2 feature, No Locus tag. => It's normal that there is no correct output to check against. -27: One L2 is an identical isoform but not same ID. It has to be removed. -28: Mitos output. Only one line by record, no ID, no locus tag. We have to use the option '-c Name' to specify that we will use the Name attribute as a locus_tag as they are uniq identifiers. -29: Gnomom output. Two isoforms do not have CDS described but only exons. (was raising error at some point. See issue #9, fixed now) -30: Level1 and level3 linked by common tag ... but level2 missing ... -31: interleaved feature with parent( here transcript_id) and other with locus only Level3 features. (locus shared with feature having parent attribute) -32: Same as 31 except, the first feature of a record do not have parent attribute (it will be saved into the sequential before to be fish out and put with the features in omniscient sharing the same locus tag) - -/!\ If only level3 features are defined, and no locus tag present (see test 26), the tool cannot deal with it. I will create by default one umbrella level1, or if you on attribute as uniq locus ID, It will create a l1 for each feature => If only exon or only CDS features so the result will be fine, but if there are two different features that has to be linked together (two CDS or a CDS and a signal peptide as in the test case 26) , the tool will not perform properly. - -The philosophy of the parser is to -- Parse by Parent/child ELSE -- Parse by common_tag ELSE -- Parse sequentially - -Definitions: -feature = 1 line -record = bench of features linked together to describe a genomic element (i.e gebe feature + mRNA feature + exon feature + CDS feature + UTR feature) diff --git a/annotation/NBIS/GFF3/test/test_gff_syntax/tester.sh b/annotation/NBIS/GFF3/test/test_gff_syntax/tester.sh deleted file mode 100755 index 12843b345..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_syntax/tester.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -#This script -cleanIntermediateFile="yes" - -if [[ $1 == "no" ]];then - cleanIntermediateFile="no" -fi - -for i in {0..50}_*;do - - if [[ -f $i ]];then - #go through only the test files (not the correct output) - if [[ ! $i =~ ^[[:digit:]]+_correct ]];then - echo -e "\nTest of $i"; - testperfect="no" - - #case with comon locus needed and prokaryote!! - nb=${i%"_test.gff"} - if (( $nb == 28 ));then - ~/git/NBIS/GAAS/annotation/Tools/Converter/gxf_to_gff3.pl --gff $i -o test.gff3 -c Name &> /dev/null - #case with prokaryote mode needed - elif (( $nb == 8 ));then - ~/git/NBIS/GAAS/annotation/Tools/Converter/gxf_to_gff3.pl --gff $i -o test.gff3 &> /dev/null - #others - else - ~/git/NBIS/GAAS/annotation/Tools/Converter/gxf_to_gff3.pl --gff $i -o test.gff3 --merge_loci &> /dev/null - fi - - #get the expected name of the correct output file we will have to check against - pref=$(echo $i | cut -d'_' -f1) - fileok=${pref}_correct_output.gff - - # Check against the correct output - if [ ! -f $fileok ];then - echo "We didnt find any correct output to check against for $i ( $fileok ) " - else - resu=$(diff test.gff3 $fileok) - if [[ $resu != "" ]];then - echo -e "There is differences between the correct reference output and the current output:\n$resu" - else - echo "test1 ok !" - testperfect="yes" - fi - fi - - #echo "check against itself" - #case with prokaryote!! - nb=${i%"_test.gff"} - if (( $nb == 8 || $nb == 28 ));then - ~/git/NBIS/GAAS/annotation/Tools/Converter/gxf_to_gff3.pl --gff test.gff3 -o test2.gff3 &> /dev/null - else - ~/git/NBIS/GAAS/annotation/Tools/Converter/gxf_to_gff3.pl --gff test.gff3 -o test2.gff3 --merge_loci &> /dev/null - fi - resu=$(diff test2.gff3 $fileok) - if [[ $resu != "" ]];then - echo -e "There is differences between the original current output and the output of this file processed again:\n$resu" - else - echo "test2 ok !" - if [[ $testperfect == "yes" ]];then - echo "All test perfect !" - fi - fi - - fi - fi -done - -#Intermediate file cleaned -if [[ $cleanIntermediateFile == "yes" ]];then - rm test.gff3 - rm test2.gff3 -fi diff --git a/annotation/NBIS/GFF3/test/test_gff_version/1_test.gff b/annotation/NBIS/GFF3/test/test_gff_version/1_test.gff deleted file mode 100644 index 946903f0d..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/1_test.gff +++ /dev/null @@ -1,9 +0,0 @@ -#GFF1 with attribute "tag value tag value" -scaffold_5 JGI exon 442603 443344 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id "e_gw1.5.2.1" protein_id 335805 exonNumber 1 -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id "e_gw1.5.2.1" -scaffold_5 JGI exon 443530 443531 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id "e_gw1.5.2.1" protein_id 335805 exonNumber 2 -scaffold_5 JGI exon 443569 445653 . + . gene_id "e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id "e_gw1.5.2.1" protein_id m335805 exonNumber 3 -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id "e_gw1.5.2.1" diff --git a/annotation/NBIS/GFF3/test/test_gff_version/1a_test.gff b/annotation/NBIS/GFF3/test/test_gff_version/1a_test.gff deleted file mode 100644 index 607c3ab8b..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/1a_test.gff +++ /dev/null @@ -1,9 +0,0 @@ -# GFF1 only one value as attribute -scaffold_5 JGI exon 442603 443344 . + . geneA -scaffold_5 JGI CDS 442603 443344 . + 0 geneA -scaffold_5 JGI start_codon 442603 442605 . + 0 geneA -scaffold_5 JGI exon 443530 443531 . + . geneA -scaffold_5 JGI CDS 443530 443531 . + 2 geneA -scaffold_5 JGI exon 443569 445653 . + . geneA -scaffold_5 JGI CDS 443569 445653 . + 0 geneA -scaffold_5 JGI stop_codon 445651 445653 . + 0 geneA diff --git a/annotation/NBIS/GFF3/test/test_gff_version/2_test.gff b/annotation/NBIS/GFF3/test/test_gff_version/2_test.gff deleted file mode 100644 index 2fca952d4..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/2_test.gff +++ /dev/null @@ -1,9 +0,0 @@ -# GFF2 -scaffold_5 JGI exon 442603 443344 . + . gene_id "e_gw1.5.2.1"; transcript_id 335995 -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id "e_gw1.5.2.1"; protein_id 335805; exonNumber 1 -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id "e_gw1.5.2.1" -scaffold_5 JGI exon 443530 443531 . + . gene_id "e_gw1.5.2.1"; transcript_id 335995 -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id "e_gw1.5.2.1"; protein_id 335805; exonNumber 2 -scaffold_5 JGI exon 443569 445653 . + . gene_id "e_gw1.5.2.1"; transcript_id 335995 -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id "e_gw1.5.2.1"; protein_id 335805; exonNumber 3 -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id "e_gw1.5.2.1" diff --git a/annotation/NBIS/GFF3/test/test_gff_version/2a_test.gff b/annotation/NBIS/GFF3/test/test_gff_version/2a_test.gff deleted file mode 100644 index d08acd8bc..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/2a_test.gff +++ /dev/null @@ -1,9 +0,0 @@ -#GFF2 as we see often with space before semicolon -scaffold_5 JGI exon 442603 443344 . + . gene_id "e_gw1.5.2.1" ; transcript_id 335995 -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id "e_gw1.5.2.1" ; protein_id 335805; exonNumber 1 -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id "e_gw1.5.2.1" -scaffold_5 JGI exon 443530 443531 . + . gene_id "e_gw1.5.2.1" ; transcript_id 335995 -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id "e_gw1.5.2.1" ; protein_id 335805; exonNumber 2 -scaffold_5 JGI exon 443569 445653 . + . gene_id "e_gw1.5.2.1" ; transcript_id 335995 -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id "e_gw1.5.2.1" ; protein_id 335805; exonNumber 3 -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id "e_gw1.5.2.1" diff --git a/annotation/NBIS/GFF3/test/test_gff_version/2b_test.gff b/annotation/NBIS/GFF3/test/test_gff_version/2b_test.gff deleted file mode 100644 index 79e20127d..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/2b_test.gff +++ /dev/null @@ -1,9 +0,0 @@ -#GTF2 GFF2.5 as GFF2 but with semicolon at the end -scaffold_5 JGI exon 442603 443344 . + . gene_id "e_gw1.5.2.1"; transcript_id 335995; -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id "e_gw1.5.2.1"; protein_id 335805; exonNumber 1; -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id "e_gw1.5.2.1"; -scaffold_5 JGI exon 443530 443531 . + . gene_id "e_gw1.5.2.1"; transcript_id 335995; -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id "e_gw1.5.2.1"; protein_id 335805; exonNumber 2; -scaffold_5 JGI exon 443569 445653 . + . gene_id "e_gw1.5.2.1"; transcript_id 335995; -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id "e_gw1.5.2.1"; protein_id 335805; exonNumber 3; -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id "e_gw1.5.2.1"; diff --git a/annotation/NBIS/GFF3/test/test_gff_version/3_test.gff b/annotation/NBIS/GFF3/test/test_gff_version/3_test.gff deleted file mode 100644 index 7ffc254da..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/3_test.gff +++ /dev/null @@ -1,8 +0,0 @@ -scaffold_5 JGI exon 442603 443344 . + . gene_id="e_gw1.5.2.1"; transcript_id=335995 -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id="e_gw1.5.2.1"; protein_id=335805; exonNumber=1 -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id="e_gw1.5.2.1" -scaffold_5 JGI exon 443530 443531 . + . gene_id="e_gw1.5.2.1"; transcript_id=335995 -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id="e_gw1.5.2.1"; protein_id=335805; exonNumber=2 -scaffold_5 JGI exon 443569 445653 . + . gene_id="e_gw1.5.2.1"; transcript_id 335995 -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id="e_gw1.5.2.1"; protein_id=335805; exonNumber=3 -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id="e_gw1.5.2.1" diff --git a/annotation/NBIS/GFF3/test/test_gff_version/README b/annotation/NBIS/GFF3/test/test_gff_version/README deleted file mode 100644 index f4664338b..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/README +++ /dev/null @@ -1,7 +0,0 @@ -#specificities 9th column -gff1 - 1a: Only one value per line (to group feature together) -gff1 - 1b: Only spaces between tag value and beetween attributes (quoted when space in the value) -gff2 - 2: space + semicolom between attributes, space between tag and value -gff2.5 - 2.5: space + semicolom between attributes, space between tag and value, semicolon end of line -gff3 - 3: = between tag and value -gff3 - 4 : ; between attributes and = between tag and value diff --git a/annotation/NBIS/GFF3/test/test_gff_version/x_test.gff b/annotation/NBIS/GFF3/test/test_gff_version/x_test.gff deleted file mode 100644 index 7e533fa32..000000000 --- a/annotation/NBIS/GFF3/test/test_gff_version/x_test.gff +++ /dev/null @@ -1,8 +0,0 @@ -scaffold_5 JGI exon 442603 443344 . + . gene_id="e_gw1.5.2.1" transcript_id=335995 -scaffold_5 JGI CDS 442603 443344 . + 0 gene_id="e_gw1.5.2.1" protein_id=335805 exonNumber=1 -scaffold_5 JGI start_codon 442603 442605 . + 0 gene_id="e_gw1.5.2.1" -scaffold_5 JGI exon 443530 443531 . + . gene_id="e_gw1.5.2.1" transcript_id=335995 -scaffold_5 JGI CDS 443530 443531 . + 2 gene_id="e_gw1.5.2.1" protein_id=335805 exonNumber=2 -scaffold_5 JGI exon 443569 445653 . + . gene_id="e_gw1.5.2.1" transcript_id 335995 -scaffold_5 JGI CDS 443569 445653 . + 0 gene_id="e_gw1.5.2.1" protein_id=335805 exonNumber=3 -scaffold_5 JGI stop_codon 445651 445653 . + 0 gene_id="e_gw1.5.2.1" diff --git a/annotation/NBIS/Ontology/SO/so_1.obo b/annotation/NBIS/Ontology/SO/so_1.obo deleted file mode 100644 index 2ace844d8..000000000 --- a/annotation/NBIS/Ontology/SO/so_1.obo +++ /dev/null @@ -1,6423 +0,0 @@ -format-version: 1.2 -date: 13:02:2006 15:53 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.000-beta15 -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "Continuous sequence." [SO:ke] -subset: SOFA -synonym: "sequence" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucelic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.library.csi.cuny.edu/ ~ davis/molbiol/lecture_notes/post-transcriptional_processes/RNACapping.pdf] -synonym: "G-quartet" RELATED [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -subset: SOFA -synonym: "amplicon" RELATED [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000009 -name: gene_class -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000010 -name: protein_coding_gene -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000011 -name: non_protein_coding_gene -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote." [http:www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote." [http:www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; always found with the INR_motif. Positioned from +28 to +32 with respect to the TSS (+1). Consensus sequence (A|G)G(A|T)(C|T)(G|A|C). Required for TFIID binding to TATA-less promoters." [PMID:12651739] -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000019 -name: RNA_hairpin_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000020 -name: RNA_internal_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: Sarcin_like_RNA_motif -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "Watson_Crick_based_pair" RELATED [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:http://aptamer.icmb.utexas.edu] -is_a: SO:0000351 ! synthetic_sequence -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http:www.gene-tools.com/Morpholinos/morpholinos.HTML] -is_a: SO:0000351 ! synthetic_sequence -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000035 -name: riboswitch -def: "Riboswitches are mRNAs that can act as direct sensors of small molecules to control their own expression. A riboswitch contains a cis element within mRNA, that can act as a direct sensor of metabolites without a protein intermediate." [PMID:2820954] -is_a: SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" RELATED [] -synonym: "SMAR" RELATED [] -synonym: "scaffold_attachment_site" RELATED [] -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts" [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match -relationship: part_of SO:0000038 ! match_set - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000041 -name: variation_operation -def: "An operation that can be applied to a sequence, that results in a chnage." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000043 -name: processed_pseudogene -synonym: "pseudogene_by_reverse_transcription" RELATED [] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! variation_operation - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! variation_operation - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! variation_operation - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_a: SO:0000041 ! variation_operation - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! variation_operation - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It is also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: mutation_affecting_regulatory_region -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:0000054 -name: aneuploid -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind hereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000065 -name: free_chromosome_arm -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_a: SO:0000064 ! gene_by_transcript_attribute - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping_gene -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: gene_included_within_intron -is_a: SO:0000068 ! overlapping_gene - -[Term] -id: SO:0000070 -name: gene_included_within_intron_antiparallel -is_a: SO:0000069 ! gene_included_within_intron - -[Term] -id: SO:0000071 -name: gene_included_within_intron_parallel -is_a: SO:0000069 ! gene_included_within_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_a: SO:0000068 ! overlapping_gene - -[Term] -id: SO:0000073 -name: end_overlapping_gene_five_primethree_prime_overlap -is_a: SO:0000072 ! end_overlapping_gene - -[Term] -id: SO:0000074 -name: end_overlapping_gene_five_primefive_prime_overlap -is_a: SO:0000072 ! end_overlapping_gene - -[Term] -id: SO:0000075 -name: end_overlapping_gene_three_primethree_prime_overlap -is_a: SO:0000072 ! end_overlapping_gene - -[Term] -id: SO:0000076 -name: end_overlapping_gene_three_primefive_prime_overlap -is_a: SO:0000072 ! end_overlapping_gene - -[Term] -id: SO:0000077 -name: antisense_gene -is_a: SO:0000068 ! overlapping_gene - -[Term] -id: SO:0000078 -name: polycistronic_transcript -is_a: SO:0000115 ! transcript_feature - -[Term] -id: SO:0000079 -name: dicistronic_transcript -is_a: SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000080 -name: member_of_operon -is_a: SO:0000081 ! member_gene_array - -[Term] -id: SO:0000081 -name: member_gene_array -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000083 -name: macronuclear_sequence_feature -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000084 -name: micronuclear_sequence_feature -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_a: SO:0000085 ! gene_by_genome_location - -[Term] -id: SO:0000087 -name: nuclear_gene -is_a: SO:0000086 ! gene_by_organelle_of_genome - -[Term] -id: SO:0000088 -name: mt_gene -synonym: "mitochondrial_gene" RELATED [] -is_a: SO:0000086 ! gene_by_organelle_of_genome - -[Term] -id: SO:0000089 -name: kinetoplast_gene -is_a: SO:0000088 ! mt_gene - -[Term] -id: SO:0000090 -name: plastid_gene -is_a: SO:0000086 ! gene_by_organelle_of_genome - -[Term] -id: SO:0000091 -name: apicoplast_gene -is_a: SO:0000090 ! plastid_gene - -[Term] -id: SO:0000092 -name: ct_gene -synonym: "chloroplast_gene" RELATED [] -is_a: SO:0000090 ! plastid_gene - -[Term] -id: SO:0000093 -name: chromoplast_gene -is_a: SO:0000090 ! plastid_gene - -[Term] -id: SO:0000094 -name: cyanelle_gene -is_a: SO:0000090 ! plastid_gene - -[Term] -id: SO:0000095 -name: leucoplast_gene -is_a: SO:0000090 ! plastid_gene - -[Term] -id: SO:0000096 -name: proplastid_gene -is_a: SO:0000090 ! plastid_gene - -[Term] -id: SO:0000097 -name: nucleomorph_gene -is_a: SO:0000086 ! gene_by_organelle_of_genome - -[Term] -id: SO:0000098 -name: plasmid_gene -is_a: SO:0000085 ! gene_by_genome_location - -[Term] -id: SO:0000099 -name: proviral_gene -is_a: SO:0000085 ! gene_by_genome_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -is_a: SO:0000099 ! proviral_gene - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000187 ! repeat_family -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversable denaturation." [SO:ma] -subset: SOFA -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.exactsciences.com/cic/glossary/_index.htm] -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_a: SO:0000146 ! primary_transcript_by_cap_class - -[Term] -id: SO:0000107 -name: sequencing_primer -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -is_a: SO:0000082 ! processed_transcript_attribute - -[Term] -id: SO:0000109 -name: sequence_variant -def: "A region of sequence where variation has been observed." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000110 -name: located_sequence_feature -def: "A biological feature that can be attributed to a region of biological sequence." [SO:ke] -subset: SOFA -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transpable element genes of the TY element in yeast." [SO:ke] -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: integrated_virus -def: "A viral sequence which has integrated into the host genome." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000116 -name: edited_transcript -def: "A gene whose transcript is edited." [http://www.rna.ucla.edu/] -is_a: SO:0000115 ! transcript_feature - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -is_a: SO:1001261 ! recoded_mRNA - -[Term] -id: SO:0000119 -name: gene_by_class_of_regulation -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns -subset: SOFA -synonym: "pre-mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "." [SO:ma] -comment: by\: -is_a: SO:0000119 ! gene_by_class_of_regulation - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -is_a: SO:0000127 ! silenced_gene - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -is_a: SO:0000128 ! gene_silenced_by_DNA_modification - -[Term] -id: SO:0000130 -name: post_translationally_regulated -synonym: "post-translationally_regulated" RELATED [] -is_a: SO:0000119 ! gene_by_class_of_regulation - -[Term] -id: SO:0000131 -name: translationally_regulated -is_a: SO:0000119 ! gene_by_class_of_regulation - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000133 -name: gene_by_epigenetic_modification -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -is_a: SO:0000119 ! gene_by_class_of_regulation -is_a: SO:0000133 ! gene_by_epigenetic_modification - -[Term] -id: SO:0000135 -name: maternally_imprinted -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -is_a: SO:0000133 ! gene_by_epigenetic_modification - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -is_a: SO:0000133 ! gene_by_epigenetic_modification - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: gene\: -subset: SOFA -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located between the promoter and a structural gene that causes partial termination of transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000145 -name: recoded_codon -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: primary_transcript_by_cap_class -is_a: SO:0000144 ! primary_transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the genome that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -is_a: SO:0000440 ! vector -relationship: part_of SO:0000760 ! YAC_clone - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -is_a: SO:0000440 ! vector -relationship: part_of SO:0000764 ! BAC_clone - -[Term] -id: SO:0000154 -name: PAC -def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm] -synonym: "P1" RELATED [] -is_a: SO:0000440 ! vector -relationship: part_of SO:0000762 ! PAC_clone - -[Term] -id: SO:0000155 -name: plasmid -def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as aplasmids or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1)\:9-20. -is_a: SO:0000440 ! vector -relationship: part_of SO:0000765 ! cosmid_clone - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -is_a: SO:0000440 ! vector -relationship: part_of SO:0000761 ! phagemid_clone - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource\: mapping and analysis of 96 clones. Genomics 1996; -is_a: SO:0000440 ! vector -relationship: part_of SO:0000763 ! fosmid_clone - -[Term] -id: SO:0000159 -name: deletion -def: "The sequence that is deleted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000045 ! delete - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -is_a: SO:0000250 ! modified_RNA_base_feature -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000163 -name: splice_donor_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "donor" RELATED [] -synonym: "donor_splice_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: splice_acceptor_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "acceptor" RELATED [] -synonym: "acceptor_splice_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000166 -name: enhancer_by_bound_factor -is_a: SO:0000402 ! enhancer_attribute - -[Term] -id: SO:0000167 -name: promoter -def: "The region on a DNA molecule involved in RNA polymerase binding to initiate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_A_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_B_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_C_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "GC-rich_region" RELATED [] -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -relationship: part_of SO:0000170 ! RNApol_II_promoter -relationship: part_of SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "-10_signal" RELATED [] -synonym: "Pribnow_box" RELATED [] -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "-35_signal" RELATED [] -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "The primary (initial, unprocessed) transcript; includes five_prime_clip (SO:0000555), five_prime_untranslated_region (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) and three_prime_ untranslated_region (three_prime_UTR), and three_prime_clip (SO:0000557)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "precursor_RNA" RELATED [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction_fragment_length_polymorphism" RELATED [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http:www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "Long interspersed element" RELATED [] -synonym: "Long interspersed nuclear element" RELATED [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -synonym: "noncoding_exon" RELATED [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -relationship: sequence_of SO:0000049 ! translocate - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000201 -name: interior_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The exon that is most 3-prime on a given transcript." [SO:ma] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated_region" RELATED [] -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "five_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "three_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" RELATED [] -synonym: "Short interspersed nuclear element" RELATED [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_polymorphism -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http:www.genetics.org/cgi/reprint/156/4/1983.pdf] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253.)" [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear mRNA (SO:0000274)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone processing to remove parts such as introns and transcribed_spacer_regions." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: mRNA does not contain introns as it is a processd_transcript.nThe equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. -subset: SOFA -synonym: "messenger_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds to a transcription factor." [SO:ke] -subset: SOFA -synonym: "transcription_factor_binding_site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER" [SO:ma, SO:rb] -comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised. -subset: SOFA -synonym: "open_reading_frame" RELATED [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats" [http:www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "LVR element" RELATED [] -synonym: "long inverted repeat element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000240 -name: chromosome_variation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000241 -name: internal_UTR -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polyicistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "IRES" RELATED [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_a: SO:0000082 ! processed_transcript_attribute - -[Term] -id: SO:0000246 -name: mRNA_polyadenylated -is_a: SO:0000245 ! mRNA_by_polyadenylation_status - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_a: SO:0000245 ! mRNA_by_polyadenylation_status - -[Term] -id: SO:0000248 -name: sequence_length_variation -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, ISBN:0198506732] -subset: SOFA -synonym: "ribsomal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. tRNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). tRNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -subset: SOFA -synonym: "transfer_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000254 -name: alanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000261 -name: glycyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000262 -name: histidyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000264 -name: leucyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000265 -name: lysyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000266 -name: methionyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000268 -name: prolyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000269 -name: seryl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000270 -name: threonyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000273 -name: valyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing" [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, PMID:11733745] -subset: SOFA -synonym: "small_nuclear_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -subset: SOFA -synonym: "small_nucleolar_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. miRNAs are produced from precursor molecules (SO:0000647) that can form local hairpin strcutures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. miRNAs may trigger the cleavage of their target molecules oract as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000277 -name: transcript_by_bound_factor -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000278 -name: transcript_by_bound_nucleic_acid -is_a: SO:0000277 ! transcript_by_bound_factor - -[Term] -id: SO:0000279 -name: transcript_by_bound_protein -is_a: SO:0000277 ! transcript_by_bound_factor - -[Term] -id: SO:0000280 -name: engineered_gene -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -is_a: SO:0000108 ! mRNA_with_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000280 ! engineered_gene - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartate and interupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -is_a: SO:0000452 ! transgene - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "LTR" RELATED [] -synonym: "direct_terminal _repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000292 ! repetitive_element - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. Example: GCTGA-----TCAGC." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -is_a: SO:0000001 ! region - -[Term] -id: SO:0000299 -name: specific_recombination_site -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-RS] -synonym: "J-RS" RELATED [] -is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interupted palidrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: modified base\: -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG_island" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000309 -name: computed_feature -is_a: SO:0000308 ! sequence_feature_locating_method - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_a: SO:0000309 ! computed_feature - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to\: -is_a: SO:0000309 ! computed_feature - -[Term] -id: SO:0000312 -name: experimentally_determined_feature -is_a: SO:0000308 ! sequence_feature_locating_method - -[Term] -id: SO:0000313 -name: stem_loop -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The site where transcription begins." [SO:ke] -subset: SOFA -synonym: "TSS" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding_sequence" RELATED [] -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -synonym: "initiation codon" RELATED [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -is_a: SO:0000344 ! splice_enhancer -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -is_a: SO:0000108 ! mRNA_with_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "translation_start" RELATED [] -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "translation_end" RELATED [] -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray_oligonucleotide" RELATED [] -is_a: SO:0000051 ! probe -is_a: SO:0000324 ! tag -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -is_a: SO:0000108 ! mRNA_with_frameshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence_tag_site" RELATED [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -synonym: "noncoding_conserved_region" RELATED [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -is_a: SO:0000108 ! mRNA_with_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000695 ! reagent -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITES do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome whioch promotes recombination." [SO:rd] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of long DNA molecule." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -synonym: "cytological_band" RELATED [] -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -subset: SOFA -synonym: "expressed_sequence_tag" RELATED [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: Cre_recombination_target_region -synonym: "lox_site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FLP_recombination_target_region -synonym: "FRT_site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "A sequence of nucleotides or amino acids that has been designed by an experimentor and which may, or may not, correspond with any natural sequence." [SO:ma] -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000352 -name: DNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which in which markers are co-inherited as the result of the lack of historic recombination between them due to their close proximity." [SO:ma] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: sequence_by_flanking_target_attribute -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000358 -name: protein -def: "One or more polypeptides which may, or may not, be covalently bonded, and which assume a native secondary and tertiary structure." [SO:ma] -comment: This definition no longer matches the meaning of the concept! Term should probably be proteinacious or something... KEn -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000359 -name: floxed_sequence -is_a: SO:0000357 ! sequence_by_flanking_target_attribute -is_a: SO:0000452 ! transgene - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000361 -name: FRT_flanked_sequence -is_a: SO:0000357 ! sequence_by_flanking_target_attribute - -[Term] -id: SO:0000362 -name: chimeric_cDNA_clone -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0000363 -name: floxed_gene -is_a: SO:0000359 ! floxed_sequence - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "DNA elements capable of mobilizing individual gene cassettes into bacterial chromosomes by site- specific recombination." [http://www.genomicglossaries.com/content/DNA.asp] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction -relationship: position_of SO:0000046 ! insert - -[Term] -id: SO:0000367 -name: attI_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/C.html] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000373 -name: recombinationally_inverted -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB-RsmB_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http:rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000381 -name: group_IIA_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http:http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=1 2409455&dopt=Abstract] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000401 -name: gene_attribute -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000403 -name: U14_snRNA -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron." [PMID:1899376] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "18S_rRNA -A large polynucleotide which functions as a part of the small subunit of the ribosome" [SO:ke] -subset: SOFA -synonym: "16S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -def: "A region on the surface of a molecule that may interact with another molecule." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_fragment -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequences differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: genomically_contaminated_cDNA_clone -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0000415 -name: genomic_polyA_primed_cDNA_clone -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0000416 -name: partially_unprocessed_cDNA_clone -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0000417 -name: polypeptide_domain -def: "A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains." [http:www.molbiol.bbsrc.ac.uk/new_protein/domains.html] -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000418 -name: signal_peptide -def: "The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "signal peptide coding sequence" RELATED [] -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000419 -name: mature_peptide -def: "The coding sequence for the mature or final peptide or protein product following post-translational modification." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000423 -name: R_LTR_region -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000424 -name: U3_LTR_region -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000425 -name: five_prime_LTR -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -is_a: SO:0000423 ! R_LTR_region -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -is_a: SO:0000422 ! U5_LTR_region -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -is_a: SO:0000424 ! U3_LTR_region -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: transposable_element_target_site_duplication -def: "A sequence of DNA that is duplicated when a transposable element inserts; usually found at each end the insertion." [http:www.koko.gov.my/CocoaBioTech/Glossaryt.html] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR_retrotransposon_poly_purine_tract" RELATED [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously_replicating_sequence" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_a: SO:0000066 ! gene_by_polyadenylation_attribute - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector -def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -synonym: "single stranded oligonucleotide.new synonym" RELATED [] -synonym: "ss_oligonucleotide" RELATED [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -synonym: "double stranded oligonucleotide" RELATED [] -synonym: "ds_oligonucleotide" RELATED [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_type -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "five_prime_noncoding_exon" RELATED [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequencyof these components." [SO:ma] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -is_a: SO:0000341 ! chromosome_band - -[Term] -id: SO:0000451 -name: gene_polyadenylated -is_a: SO:0000066 ! gene_by_polyadenylation_attribute - -[Term] -id: SO:0000452 -name: transgene -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts ofrepetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -is_a: SO:0000064 ! gene_by_transcript_attribute - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-GENE] -synonym: "D-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000559 ! D_cluster -relationship: part_of SO:0000560 ! D_J_cluster - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -is_a: SO:0000064 ! gene_by_transcript_attribute - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_gene -synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene" RELATED [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entitity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000463 -name: gene_with_alternately_spliced_transcript -is_a: SO:0000064 ! gene_by_transcript_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendent of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-GENE] -synonym: "V_GENE" RELATED [] -synonym: "variable_gene" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000526 ! V_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -synonym: "post-translationally_regulated_by_protein_stability" RELATED [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -synonym: "post-translationally_regulated_by_protein_modification" RELATED [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-GENE] -synonym: "J-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000513 ! J_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000560 ! D_J_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000471 -name: autoregulated -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path.SO:0000472." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -is_a: SO:0000081 ! member_gene_array - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-GENE] -synonym: "C_GENE" RELATED [] -synonym: "constant_gene" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000558 ! C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -synonym: "trans-spliced_transcript" RELATED [] -is_a: SO:0000082 ! processed_transcript_attribute - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly.A minimal_tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly attempting to minimize the overlap between adjacent clones. (LS)" [SO:ke] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "TIR" RELATED [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene-cluster" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding_primary_transcript" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -synonym: "three_prime_exon_noncoding_region" RELATED [] -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-CLUSTER] -synonym: "(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000486 -name: five_prime_exon_noncoding_region -def: "The sequence of the 5' exon preceeding the start codon." [SO:ke] -synonym: "five_prime_exon_noncoding_region" RELATED [] -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-C-CLUSTER] -synonym: "(VDJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-CLUSTER] -synonym: "(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-C-CLUSTER] -synonym: "(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-C-CLUSTER] -synonym: "(VJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-CLUSTER] -synonym: "(VJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-HEPTAMER] -synonym: "3'D-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-NONAMER] -synonym: "3'D-NOMAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-SPACER] -synonym: "3'D-SPACER" RELATED [] -is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-HEPTAMER] -synonym: "5'D-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-NONAMER] -synonym: "5'D-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-SPACER] -synonym: "5'-SPACER" RELATED [] -synonym: "five_prime_D-spacer" RELATED [] -is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continous piece of sequence similar to the 'virtual contig' concept of ensembl." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-C-CLUSTER] -synonym: "D-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-CLUSTER] -synonym: "D-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-C-CLUSTER] -synonym: "D-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000507 -name: pseudogenic_exon -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-CLUSTER] -synonym: "D-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-C-CLUSTER] -synonym: "D-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-GENE] -synonym: "V_D_GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-C-CLUSTER] -synonym: "J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-CLUSTER] -synonym: "J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-NONAMER] -synonym: "J-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-HEPTAMER] -synonym: "J-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-SPACER] -synonym: "J-SPACER" RELATED [] -is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-CLUSTER] -synonym: "V-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-CLUSTER] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-C-CLUSTER] -synonym: "V-(VDJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-CLUSTER] -synonym: "V-(VDJ)-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-CLUSTER] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-C-CLUSTER] -synonym: "V-(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-CLUSTER] -synonym: "V-(VJ)-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-CLUSTER] -synonym: "V-(VJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-CLUSTER] -synonym: "V-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-C-CLUSTER] -synonym: "V-D-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-CLUSTER] -synonym: "V-D-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-C-CLUSTER] -synonym: "V-D-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-CLUSTER] -synonym: "V-D-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-C-CLUSTER] -synonym: "V-D-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-CLUSTER] -synonym: "V-D-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-HEPTAMER] -synonym: "V-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-CLUSTER] -synonym: "V-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-C-CLUSTER] -synonym: "V-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-NONAMER] -synonym: "V-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-SPACER] -synonym: "V-SPACER" RELATED [] -is_a: SO:0000563 ! spacer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-RS] -synonym: "V-RS" RELATED [] -is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-C-CLUSTER] -synonym: "(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-C-CLUSTER] -synonym: "(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-C-CLUSTER] -synonym: "(VDJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-C-CLUSTER] -synonym: "V-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous Helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -is_a: SO:0000064 ! gene_by_transcript_attribute - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "RBS" RELATED [] -synonym: "Shine-Dalgarno_sequence" RELATED [] -synonym: "five_prime_ribosome_binding_site" RELATED [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "five_prime_-clip" RELATED [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-RS] -synonym: "5'RS" RELATED [] -synonym: "five_prime_D-recombination_signal_sequence" RELATED [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "3'-clip" RELATED [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-CLUSTER] -synonym: "C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-CLUSTER] -synonym: "D-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-CLUSTER] -synonym: "D-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: " 7 nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin/T-cell receptor gene" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#HEPTAMER] -synonym: "HEPTAMER" RELATED [] -is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene - -[Term] -id: SO:0000563 -name: spacer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000301 ! recombination_feature_of_vertebrate_immune_system_gene - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-C-CLUSTER] -synonym: "V-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-C-CLUSTER] -synonym: "V-(VDJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-C-CLUSTER] -synonym: "V-(VJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promotor -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed_protein_coding_gene -alt_id: SO:0100042 -synonym: "captured_pseudogene" RELATED [] -is_a: SO:0000010 ! protein_coding_gene -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-RS] -synonym: "3'D-RS" RELATED [] -synonym: "three_prime_D-recombination_signal_sequence" RELATED [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-GENE] -synonym: "D_J_GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster - -[Term] -id: SO:0000573 -name: rRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000574 -name: VDJ_gene -def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-GENE] -synonym: "V-D-J-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster - -[Term] -id: SO:0000575 -name: scRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000576 -name: VJ_gene -def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-GENE] -synonym: "V-J-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_gene -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu] -synonym: "pre-edited_region" RELATED [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. tmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa_RNA" RELATED [] -synonym: "ssrA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_gene -is_a: SO:0000578 ! snoRNA_gene - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa_RNA_primary_transcript" RELATED [] -synonym: "ssrA_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal_recognition_particle_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A stem-loop RNA structure where nucleotides in the loop participate in complementary interactions with a region of RNA downstream of the stem-loop." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "H-pseudoknot" RELATED [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "C/D_box_snoRNA" RELATED [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "H/ACA_box_snoRNA" RELATED [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_a: SO:0000116 ! edited_transcript - -[Term] -id: SO:0000598 -name: transcript_edited_by_C_insertion_and_dinucleotide_insertion -def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm] -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_a: SO:0000116 ! edited_transcript - -[Term] -id: SO:0000599 -name: transcript_edited_by_C_to_U_substitution -is_a: SO:0000116 ! edited_transcript - -[Term] -id: SO:0000600 -name: transcript_edited_by_A_to_I_substitution -is_a: SO:0000116 ! edited_transcript - -[Term] -id: SO:0000601 -name: transcript_edited_by_G_addition -is_a: SO:0000116 ! edited_transcript - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a perfect duplex (except for the oligoU tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_gene -is_a: SO:0000578 ! snoRNA_gene - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch_point" RELATED [] -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -is_a: SO:0000167 ! promoter -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The site where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "tRNA_promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenence of the end," [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm#s] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000627 -name: insulator -subset: SOFA -synonym: "insulator_element" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one protein product." [SO:ke] -is_a: SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for more than one protein product." [SO:ke] -is_a: SO:0000665 ! monocistronic_transcript - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic_processed_transcript" RELATED [] -is_a: SO:0000665 ! monocistronic_transcript - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic_processed_transcript" RELATED [] -is_a: SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "The 3' site of a mini-exon which is trans-spliced on to the 5'end of a mature mRNA." [SO:ke] -synonym: "mini-exon_donor_RNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -synonym: "engineered_plasmid_gene" RELATED [] -is_a: SO:0000098 ! plasmid_gene -is_a: SO:0000280 ! engineered_gene - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "Small RNA molecule that is the product of a longerexogenous or endogenous dsRNA, which is either a bimolecular duplexe or very longhairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulatefrom both strands of the dsRNA. sRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small_interfering_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small_temporal_RNA_primary_transcript" RELATED [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small_temporal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "23S_rRNA" RELATED [] -synonym: "28S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000654 -name: maxi_circle_gene -synonym: "maxi-circle" RELATED [] -is_a: SO:0000088 ! mt_gene - -[Term] -id: SO:0000655 -name: ncRNA -def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: ncRNA is a processed_transcript so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000656 -name: stRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed_repeat" RELATED [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000661 -name: intron_attribute -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_gene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000665 -name: monocistronic_transcript -is_a: SO:0000115 ! transcript_feature - -[Term] -id: SO:0000666 -name: mobile_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence identified as having been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000046 ! insert - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non-canonical_splice_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -is_a: SO:0000164 ! splice_acceptor_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -is_a: SO:0000163 ! splice_donor_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non-canonical_three_prime_splice_site" RELATED [] -is_a: SO:0000164 ! splice_acceptor_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non-canonical-five_prime_splice_site" RELATED [] -is_a: SO:0000163 ! splice_donor_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non-canonical_start_codon" RELATED [] -synonym: "non_ATG_start_codon" RELATED [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000685 -name: DNAaseI_hypersensitive_site -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -relationship: part_of SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction -relationship: position_of SO:0000045 ! delete - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -is_a: SO:0000064 ! gene_by_transcript_attribute - -[Term] -id: SO:0000691 -name: translocation_site -def: "The space between two bases in a sequence which marks the position where a translocation has occurred." [SO:ke] -relationship: position_of SO:0000049 ! translocate - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -is_a: SO:0000690 ! gene_with_polycistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -is_a: SO:0000064 ! gene_by_transcript_attribute - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single_nucleotide_polymorphism" RELATED [] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" RELATED [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -is_a: SO:0000693 ! gene_with_recoded_mRNA - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -is_a: SO:0000697 ! gene_with_stop_codon_read_through - -[Term] -id: SO:0000699 -name: junction -def: "A junction refers to an interbase location of zero in a sequence." [SO:ke] -subset: SOFA -synonym: "boundary" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A locatable region of genomic sequence, corresponding to a unit of inheritance, which is associated with regulatory regions, transcribed regions and/or other functional sequence regions" [SO:rd] -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! splice_acceptor_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The site at which trans-splicing occurs." [SO:ke] -synonym: "trans-splice_donor_site" RELATED [] -is_a: SO:0000163 ! splice_donor_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -is_a: SO:0000697 ! gene_with_stop_codon_read_through - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -is_a: SO:0000693 ! gene_with_recoded_mRNA - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -is_a: SO:0000693 ! gene_with_recoded_mRNA - -[Term] -id: SO:0000713 -name: DNA_motif -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000715 -name: RNA_motif -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -synonym: "dicistronic_processed_transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It does not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD.nAgust 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb] -comment: Term requested by Rama from SGD -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -comment: requested by Michael on 19 Nov 2004 -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -comment: Requested by Michael, 19 nov 2004 -is_a: SO:0000692 ! gene_with_dicistronic_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -comment: Requested by MA nov 19 2004 -synonym: "gene_with_dicistronic_processed_transcript" RELATED [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" RELATED [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: origin_of_transfer -def: "A region of a DNA molecule whre transfer is initiated during the process of conjugation or mobilization." [http:http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "oriT" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000725 -name: transit_peptide -def: "The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein: this domain is involved in post translational import of the protein into the organelle." [http:http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: Added to bring SO inline with the embl ddbj genbank feature table. -subset: SOFA -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000726 -name: repeat_unit -def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: Added to comply with the feature table. -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000727 -name: TF_module -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active" [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "CRM" RELATED [] -synonym: "cis_regulatory_module" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000728 -name: intein -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000729 -name: intein_containing_protein_coding_gene -is_a: SO:0000010 ! protein_coding_gene - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000731 -name: fragment -comment: added because of request by MO people. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000732 -name: predicted -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000733 -name: feature_attribute -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http:mged.sourceforge.net/ontologies/MGEDontology.php#exemplar_mRNA] -comment: Added for the MO people. -is_a: SO:0000082 ! processed_transcript_attribute - -[Term] -id: SO:0000735 -name: sequence_location -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000738 -name: nuclear_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000740 -name: plastid_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000741 -name: kinetoplast_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000742 -name: maxicircle_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000743 -name: apicoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -is_a: SO:0000440 ! vector -relationship: part_of SO:0000160 ! lambda_clone - -[Term] -id: SO:0000755 -name: plasmid_vector -is_a: SO:0000440 ! vector -relationship: part_of SO:0000759 ! plasmid_clone - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template" [SO:ma] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000317 ! cDNA_clone - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000760 -name: YAC_clone -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000761 -name: phagemid_clone -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000763 -name: fosmid_clone -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000764 -name: BAC_clone -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000765 -name: cosmid_clone -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome. " [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw] -comment: Added in response to comment from Kelly Williams from Indiana.nhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 Nov, 2005 -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw] -comment: Added in response to Kelly Williams from Indiananhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 nov 2005 -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000771 -name: QTL -def: "Quantitative Trait Locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http:rgd.cbi.pku.edu.cn/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005 -is_a: SO:0000001 ! region - -[Term] -id: SO:0000772 -name: genomic_island -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -is_a: SO:0000001 ! region - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038/nrmicro884 nGENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMSnUlrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible_element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands.nNature Reviews Microbiology 2, 414-424 (2004); doi:10.1038/nrmicro884 nGENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMSnUlrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible_element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands.nnEvolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA genenJohn T. Sullivan and Clive W. RonsonnPNAS 1998 Apr 28 95 (9) 5145-5149n -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -synonym: "NUMT" RELATED [] -synonym: "nuclear_mitochondrial_pseudogene" RELATED [] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0005837 -name: snRNA_4.5S_primary_transcript -def: "A primary transcript encoding a 4.5S snRNA." [SO:ke] -synonym: "4.5S_snRNA_primary_transcript" RELATED [] -is_a: SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0005839 -name: snRNA_4.5S -synonym: "4.5S_snRNA" RELATED [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005843 -name: rRNA_cleavage_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005845 -name: single_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: member_of_gene_cassette_array -is_a: SO:0005848 ! member_of_gene_cassette - -[Term] -id: SO:0005848 -name: member_of_gene_cassette -is_a: SO:0000081 ! member_gene_array - -[Term] -id: SO:0005849 -name: member_of_gene_subarray -is_a: SO:0000081 ! member_gene_array - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include\, for example\, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include\, for example\, a cluster of genes encoding different histones. -is_a: SO:0005851 ! gene_array - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A non-functional gene that, when captured by recombination forms a functional gene." [SO:ma] -comment: This would include\, for example\, the mating type gene cassettes of S. cerevisiae. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include\, for example\, the arrays of non-functional VSG genes of Trypanosomes. -is_a: SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://tbase.jax.org/docs/glossary.html] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000048 ! substitute - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A mutation event where a single DNA nucleotide changes into another nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -synonym: "(Drosophila)Df" RELATED [] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(fungi)D" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(Drosophila)In" RELATED [] -synonym: "(bacteria)IN" RELATED [] -synonym: "(fungi)In" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0000109 ! sequence_variant - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000047 ! invert - -[Term] -id: SO:1000037 -name: chromosomal_duplication -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: mutation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000050 -name: no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000052 -name: complex_change_in_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000054 -name: mutation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "non-synonymous_codon_change_in_transcript" RELATED [] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000058 ! non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated\, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated\, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: terminator_codon_change_in_transcript -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: mutation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_mutation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000064 ! mutation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: plus_1_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000067 -name: minus_1_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000068 -name: plus_2_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000069 -name: minus_2_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000070 -name: mutation_affecting_transcript_processing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: mutation_affecting_splicing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000072 -name: splice_donor_mutation -is_a: SO:1000071 ! mutation_affecting_splicing -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1000073 -name: splice_acceptor_mutation -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000074 -name: cryptic_splice_activator_mutation -def: "Mutation creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000075 -name: mutation_affecting_editing -def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000070 ! mutation_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: mutation_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000078 -name: mutation_decreasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: mutation_affecting_transcript_sequence -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000080 -name: mutation_increasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: mutation_affecting_rate_of_transcription -is_a: SO:1000076 ! mutation_affecting_transcription - -[Term] -id: SO:1000082 -name: mutation_affecting_transcript_stability -def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: mutation_increasing_transcript_stability -def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: mutation_decreasing_transcript_stability -def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: mutation_affecting_level_of_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000086 -name: mutation_decreasing_level_of_transcript -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: mutation_increasing_level_of_transcript -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: mutation_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000089 -name: no_change_of_translational_product -def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000090 -name: uncharacterised_change_of_translational_product -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000091 -name: partially_characterised_change_of_translational_product -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000090 ! uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: complex_change_of_translational_product -def: "Any mutation effect that is known at nucleotide level but can not be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000093 -name: amino_acid_substitution -def: "The replacement of a single amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: conservative_amino_acid_substitution -is_a: SO:1000093 ! amino_acid_substitution - -[Term] -id: SO:1000095 -name: nonconservative_amino_acid_substitution -is_a: SO:1000093 ! amino_acid_substitution - -[Term] -id: SO:1000096 -name: amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000099 ! polypeptide_elongation - -[Term] -id: SO:1000101 -name: polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000099 ! polypeptide_elongation - -[Term] -id: SO:1000102 -name: mutation_affecting_level_of_translational_product -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000103 -name: mutation_decreasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: mutation_increasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: mutation_affecting_polypeptide_amino_acid_sequence -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000106 -name: inframe_polypeptide_N_terminal_elongation -synonym: "inframe_polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000100 ! polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: out_of_frame_polypeptide_N_terminal_elongation -synonym: "out_of_frame_polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000100 ! polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000101 ! polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: out_of_frame_polypeptide_C_terminal_elongation -synonym: "out_of_frame_polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000101 ! polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000111 -name: mutation_affecting_3D_structure_of_polypeptide -synonym: "mutation_affecting_3D-structure_of_polypeptide" RELATED [] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000112 -name: no_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: uncharacterised_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: partially_characterised_3D_structural_change -is_a: SO:1000113 ! uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: complex_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: conformational_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: mutation_affecting_polypeptide_function -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000118 -name: loss_of_function_of_polypeptide -synonym: "loss-of-function_of_polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: inactive_ligand_binding_site -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: inactive_catalytic_site -is_a: SO:1000119 ! inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: polypeptide_localization_affected -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: partial_loss_of_function_of_polypeptide -synonym: "partial_loss-of-function_of_polypeptide" RELATED [] -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: gain_of_function_of_polypeptide -synonym: "gain-of-function_of_polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: mutation_affecting_transcript_secondary_structure -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: compensatory_transcript_secondary_structure_mutation -is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: consequences_of_mutation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:1000134 -name: polypeptide_fusion -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo-compound_chromosome" RELATED [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero-compound_chromosome" RELATED [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion-cum-translocation" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -synonym: "(Drosophila)Dpp" RELATED [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000179 -name: partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000177 ! uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: mutation_affecting_gene_structure -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000181 -name: gene_fusion -is_a: SO:1000180 ! mutation_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: mutation_causes_exon_loss -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000185 -name: mutation_causes_intron_gain -def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000186 -name: cryptic_splice_donor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001186 -name: cryptic_splice_acceptor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -is_a: SO:0000115 ! transcript_feature - -[Term] -id: SO:1001188 -name: alternatively_spliced_transcript_encoding_1_polypeptide -is_a: SO:1001187 ! alternatively_spliced_transcript - -[Term] -id: SO:1001189 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide -is_a: SO:1001187 ! alternatively_spliced_transcript - -[Term] -id: SO:1001190 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_same_start_codon_different_stop_codon -is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide -is_a: SO:1001194 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping - -[Term] -id: SO:1001191 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_same_stop_codon -is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide -is_a: SO:1001194 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping - -[Term] -id: SO:1001192 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon -is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_overlapping -is_a: SO:1001192 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon -is_a: SO:1001194 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping - -[Term] -id: SO:1001194 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide - -[Term] -id: SO:1001195 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_not_overlapping -is_a: SO:1001189 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -is_a: SO:0000011 ! non_protein_coding_gene - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -is_a: SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:1001217 -name: member_of_regulon -is_a: SO:0000081 ! member_gene_array - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_a: SO:1001192 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon -is_a: SO:1001195 ! alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_coding_regions_not_overlapping - -[Term] -id: SO:1001246 -name: CDS_independently_known -is_a: SO:1001255 ! status_of_coding_sequence - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [MA:SO] -is_a: SO:1001254 ! CDS_predicted - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -is_a: SO:1001254 ! CDS_predicted - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -is_a: SO:1001254 ! CDS_predicted - -[Term] -id: SO:1001254 -name: CDS_predicted -is_a: SO:1001255 ! status_of_coding_sequence - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -is_a: SO:1001254 ! CDS_predicted - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine Delgarno sequence that is upstream of a non-5' CDS in a polycistronic mRNA." [SO:ke] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "A gene coding an mRNA which is recoded before translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -is_a: SO:0000115 ! transcript_feature - -[Term] -id: SO:1001262 -name: minus_1_translational_frameshift -is_a: SO:0000118 ! transcript_with_translational_frameshift - -[Term] -id: SO:1001263 -name: plus_1_translational_frameshift -is_a: SO:0000118 ! transcript_with_translational_frameshift - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A gene whose mRNA is translated by ribosomes that suspend translation at a particular codon and resume translation at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -is_a: SO:1001261 ! recoded_mRNA - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A gene whose mRNA is recoded by an alteration of codon meaning." [SO:ma] -is_a: SO:1001261 ! recoded_mRNA - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_a: SO:1001267 ! stop_codon_readthrough - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_a: SO:1001265 ! mRNA_recoded_by_codon_redefinition - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding_stimulatory_signal" RELATED [] -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 pase pairs." [SO:ke] -synonym: "4bp_start_codon" RELATED [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_a: SO:1001267 ! stop_codon_readthrough - -[Term] -id: SO:1001271 -name: archeal_intron -def: "Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism." [SO:ma] -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001272 -name: tRNA_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0000009 ! gene_class - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "The stem-loop secondary structural element downstream of the redefined region." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding signal found 5' of the redefined codon." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "It is a downstream sequence important for recoding that contains repetitive elements." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA -domain: SO:0000110 ! located_sequence_feature -range: SO:0000110 ! located_sequence_feature -is_symmetric: true - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -domain: SO:0000085 ! gene_by_genome_location -range: SO:0000704 ! gene -is_obsolete: true - -[Typedef] -id: homologous_to -name: homologous_to -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of.ninverse is collection_of.nWinston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -is_symmetric: true - diff --git a/annotation/NBIS/Ontology/SO/so_2_1.obo b/annotation/NBIS/Ontology/SO/so_2_1.obo deleted file mode 100644 index ef333c66b..000000000 --- a/annotation/NBIS/Ontology/SO/so_2_1.obo +++ /dev/null @@ -1,6704 +0,0 @@ -format-version: 1.2 -date: 14:08:2006 16:22 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.002-beta3 -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "Continuous sequence." [SO:ke] -subset: SOFA -synonym: "sequence" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucelic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.library.csi.cuny.edu/ ~ davis/molbiol/lecture_notes/post-transcriptional_processes/RNACapping.pdf] -synonym: "G-quartet" RELATED [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -subset: SOFA -synonym: "amplicon" RELATED [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote." [http:www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA moleculespresent in the cytoplasm and sometimes nucleus of a eukaryote." [http:www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; always found with the INR_motif. Positioned from +28 to +32 with respect to the TSS (+1). Consensus sequence (A|G)G(A|T)(C|T)(G|A|C). Required for TFIID binding to TATA-less promoters." [PMID:12651739] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000019 -name: RNA_hairpin_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000020 -name: RNA_internal_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: Sarcin_like_RNA_motif -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "Watson_Crick_based_pair" RELATED [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:http\://aptamer.icmb.utexas.edu] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http:www.gene-tools.com/Morpholinos/morpholinos.HTML] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000035 -name: riboswitch -def: "Riboswitches are mRNAs that can act as direct sensors of small molecules to control their own expression. A riboswitch contains a cis element within mRNA, that can act as a direct sensor of metabolites without a protein intermediate." [PMID:2820954] -is_a: SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" RELATED [] -synonym: "scaffold_attachment_site" RELATED [] -synonym: "SMAR" RELATED [] -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts" [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match -relationship: part_of SO:0000038 ! match_set - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000041 -name: operation -def: "An operation that can be applied to a sequence, that results in a chnage." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It is also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: mutation_affecting_regulatory_region -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:0000054 -name: aneuploid -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind hereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene. " [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region. " [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000080 -name: operon_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene_array_member" RELATED [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000084 -name: micronuclear_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000187 ! repeat_family - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversable denaturation." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.exactsciences.com/cic/glossary/_index.htm] -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000109 -name: sequence_variant -def: "A region of sequence where variation has been observed." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000110 -name: located_sequence_feature -def: "A biological feature that can be attributed to a region of biological sequence." [SO:ke] -subset: SOFA -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: integrated_virus -def: "A viral sequence which has integrated into the host genome." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing " [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns -subset: SOFA -synonym: "pre-mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "." [SO:ma] -comment: by: -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post-translationally_regulated" RELATED [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000133 -name: epigenetically_modified -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "An attribute to describe genes that are regulated by maternal imprinting." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: gene: -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located between the promoter and a structural gene that causes partial termination of transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the genome that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000154 -name: PAC -def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm] -synonym: "P1" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000155 -name: plasmid -def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as aplasmids or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996; -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000159 -name: deletion -def: "The sequence that is deleted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000045 ! delete - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -is_a: SO:0000250 ! modified_RNA_base_feature -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "donor" RELATED [] -synonym: "donor_splice_site" RELATED [] -synonym: "splice_donor_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "acceptor" RELATED [] -synonym: "acceptor_splice_site" RELATED [] -synonym: "splice_acceptor_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000167 -name: promoter -def: "The region on a DNA molecule involved in RNA polymerase binding to initiate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_A_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_B_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_C_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000173 -name: GC_rich_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "GC-rich_region" RELATED [] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000170 ! RNApol_II_promoter -relationship: part_of SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "-10_signal" RELATED [] -synonym: "Pribnow_box" RELATED [] -is_a: SO:0000843 ! bacterial_RNApol_promotor_region - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "-35_signal" RELATED [] -is_a: SO:0000843 ! bacterial_RNApol_promotor_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "The primary (initial, unprocessed) transcript; includes five_prime_clip (SO:0000555), five_prime_untranslated_region (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) and three_prime_ untranslated_region (three_prime_UTR), and three_prime_clip (SO:0000557)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "precursor_RNA" RELATED [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction_fragment_length_polymorphism" RELATED [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http:www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "Long interspersed element" RELATED [] -synonym: "Long interspersed nuclear element" RELATED [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -synonym: "noncoding_exon" RELATED [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000049 ! translocate - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000201 -name: interior_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The exon that is most 3-prime on a given transcript." [SO:ma] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated_region" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "five_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "three_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" RELATED [] -synonym: "Short interspersed nuclear element" RELATED [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_polymorphism -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http:www.genetics.org/cgi/reprint/156/4/1983.pdf] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253.)" [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear mRNA (SO:0000274)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone processing to remove parts such as introns and transcribed_spacer_regions." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: mRNA does not contain introns as it is a processd_transcript.nThe equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. -subset: SOFA -synonym: "messenger_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds to a transcription factor." [SO:ke] -subset: SOFA -synonym: "transcription_factor_binding_site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER" [SO:ma, SO:rb] -comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised. -subset: SOFA -synonym: "open_reading_frame" RELATED [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats" [http:www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000240 -name: chromosome_variation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000241 -name: internal_UTR -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polyicistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "IRES" RELATED [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the additon of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, ISBN:0198506732] -subset: SOFA -synonym: "ribsomal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. tRNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). tRNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -subset: SOFA -synonym: "transfer_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000254 -name: alanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000261 -name: glycyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000262 -name: histidyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000264 -name: leucyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000265 -name: lysyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000266 -name: methionyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000268 -name: prolyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000269 -name: seryl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000270 -name: threonyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000273 -name: valyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing" [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, PMID:11733745] -subset: SOFA -synonym: "small_nuclear_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -subset: SOFA -synonym: "small_nucleolar_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. miRNAs are produced from precursor molecules (SO:0000647) that can form local hairpin strcutures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. miRNAs may trigger the cleavage of their target molecules oract as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: formally called transcript_by_bound_factor -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartate and interupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "direct_terminal _repeat" RELATED [] -synonym: "LTR" RELATED [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. Example: GCTGA-----TCAGC." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -is_a: SO:0000001 ! region - -[Term] -id: SO:0000299 -name: specific_recombination_site -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-RS] -synonym: "J-RS" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interupted palidrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: modified base: -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG_island" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The site where transcription begins." [SO:ke] -subset: SOFA -synonym: "TSS" RELATED [] -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding_sequence" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -synonym: "initiation codon" RELATED [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "translation_start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "translation_end" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray_oligonucleotide" RELATED [] -is_a: SO:0000051 ! probe -is_a: SO:0000324 ! tag -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence_tag_site" RELATED [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding_conserved_region" RELATED [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITES do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome whioch promotes recombination." [SO:rd] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of long DNA molecule." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -synonym: "cytological_band" RELATED [] -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -subset: SOFA -synonym: "expressed_sequence_tag" RELATED [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: Cre_recombination_target_region -synonym: "lox_site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FLP_recombination_target_region -synonym: "FRT_site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "A sequence of nucleotides or amino acids that has been designed by an experimentor and which may, or may not, correspond with any natural sequence." [SO:ma] -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000352 -name: DNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which in which markers are co-inherited as the result of the lack of historic recombination between them due to their close proximity." [SO:ma] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000358 -name: protein -comment: This definition no longer matches the meaning of the concept -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "DNA elements capable of mobilizing individual gene cassettes into bacterial chromosomes by site- specific recombination." [http://www.genomicglossaries.com/content/DNA.asp] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction -relationship: position_of SO:0000046 ! insert - -[Term] -id: SO:0000367 -name: attI_site -is_a: SO:0000669 ! sequence_rearrangement_feature -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_a: SO:0000669 ! sequence_rearrangement_feature -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/C.html] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB-RsmB_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http:rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000381 -name: group_IIA_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http:http\://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=1 2409455&dopt=Abstract] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000401 -name: gene_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snRNA -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron." [PMID:1899376] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "18S_rRNA -A large polynucleotide which functions as a part of the small subunit of the ribosome" [SO:ke] -subset: SOFA -synonym: "16S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -def: "A region on the surface of a molecule that may interact with another molecule." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequences differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribue to describe a feature that is invalidated due to genomic contamination. " [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribue to describe a feature that is invalidated due to polyA priming. " [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribue to describe a feature that is invalidated due to partial processing. " [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -def: "A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains." [http:www.molbiol.bbsrc.ac.uk/new_protein/domains.html] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000418 -name: signal_peptide -def: "The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "signal peptide coding sequence" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000419 -name: mature_peptide -def: "The coding sequence for the mature or final peptide or protein product following post-translational modification." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: transposable_element_target_site_duplication -def: "A sequence of DNA that is duplicated when a transposable element inserts; usually found at each end the insertion." [http:www.koko.gov.my/CocoaBioTech/Glossaryt.html] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR_retrotransposon_poly_purine_tract" RELATED [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously_replicating_sequence" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector -def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -synonym: "single stranded oligonucleotide.new synonym" RELATED [] -synonym: "ss_oligonucleotide" RELATED [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -synonym: "double stranded oligonucleotide" RELATED [] -synonym: "ds_oligonucleotide" RELATED [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_type -def: "An attribute to describe the kind of biological sequence." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "five_prime_noncoding_exon" RELATED [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequencyof these components." [SO:ma] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -is_a: SO:0000341 ! chromosome_band - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts ofrepetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-GENE] -synonym: "D-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000559 ! D_cluster -relationship: part_of SO:0000560 ! D_J_cluster - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene her to avoid confusion with the region 'gene'. -synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene" RELATED [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entitity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendent of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-GENE] -synonym: "V_GENE" RELATED [] -synonym: "variable_gene" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000526 ! V_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -synonym: "post-translationally_regulated_by_protein_stability" RELATED [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post-translationally_regulated_by_protein_modification" RELATED [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-GENE] -synonym: "J-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000513 ! J_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000560 ! D_J_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000471 -name: autoregulated -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path.SO:0000472." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic" [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-GENE] -synonym: "C_GENE" RELATED [] -synonym: "constant_gene" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000558 ! C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "TIR" RELATED [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene-cluster" RELATED [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding_primary_transcript" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -synonym: "three_prime_exon_noncoding_region" RELATED [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-CLUSTER] -synonym: "(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000486 -name: five_prime_exon_noncoding_region -def: "The sequence of the 5' exon preceeding the start codon." [SO:ke] -synonym: "five_prime_exon_noncoding_region" RELATED [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-C-CLUSTER] -synonym: "(VDJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-CLUSTER] -synonym: "(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-C-CLUSTER] -synonym: "(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-C-CLUSTER] -synonym: "(VJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-CLUSTER] -synonym: "(VJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-HEPTAMER] -synonym: "3'D-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-NONAMER] -synonym: "3'D-NOMAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-SPACER] -synonym: "3'D-SPACER" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-HEPTAMER] -synonym: "5'D-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-NONAMER] -synonym: "5'D-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-SPACER] -synonym: "5'-SPACER" RELATED [] -synonym: "five_prime_D-spacer" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continous piece of sequence similar to the 'virtual contig' concept of ensembl." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-C-CLUSTER] -synonym: "D-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-CLUSTER] -synonym: "D-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-C-CLUSTER] -synonym: "D-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000507 -name: pseudogenic_exon -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-CLUSTER] -synonym: "D-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-C-CLUSTER] -synonym: "D-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-GENE] -synonym: "V_D_GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-C-CLUSTER] -synonym: "J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-CLUSTER] -synonym: "J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-NONAMER] -synonym: "J-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-HEPTAMER] -synonym: "J-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-SPACER] -synonym: "J-SPACER" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-CLUSTER] -synonym: "V-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-CLUSTER] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-C-CLUSTER] -synonym: "V-(VDJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-CLUSTER] -synonym: "V-(VDJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-CLUSTER] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-C-CLUSTER] -synonym: "V-(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-CLUSTER] -synonym: "V-(VJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-CLUSTER] -synonym: "V-(VJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-CLUSTER] -synonym: "V-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-C-CLUSTER] -synonym: "V-D-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-CLUSTER] -synonym: "V-D-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-C-CLUSTER] -synonym: "V-D-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-CLUSTER] -synonym: "V-D-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-C-CLUSTER] -synonym: "V-D-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-CLUSTER] -synonym: "V-D-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-HEPTAMER] -synonym: "V-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-CLUSTER] -synonym: "V-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-C-CLUSTER] -synonym: "V-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-NONAMER] -synonym: "V-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-SPACER] -synonym: "V-SPACER" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-RS] -synonym: "V-RS" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-C-CLUSTER] -synonym: "(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-C-CLUSTER] -synonym: "(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-C-CLUSTER] -synonym: "(VDJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-C-CLUSTER] -synonym: "V-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous Helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five_prime_ribosome_binding_site" RELATED [] -synonym: "RBS" RELATED [] -synonym: "Shine-Dalgarno_sequence" RELATED [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "five_prime_-clip" RELATED [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-RS] -synonym: "5'RS" RELATED [] -synonym: "five_prime_D-recombination_signal_sequence" RELATED [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "3'-clip" RELATED [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-CLUSTER] -synonym: "C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-CLUSTER] -synonym: "D-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-CLUSTER] -synonym: "D-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: " 7 nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin/T-cell receptor gene" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#HEPTAMER] -synonym: "HEPTAMER" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-C-CLUSTER] -synonym: "V-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-C-CLUSTER] -synonym: "V-(VDJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-C-CLUSTER] -synonym: "V-(VJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promotor -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -synonym: "captured_pseudogene" RELATED [] -is_a: SO:0000010 ! protein_coding -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-RS] -synonym: "3'D-RS" RELATED [] -synonym: "three_prime_D-recombination_signal_sequence" RELATED [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-GENE] -synonym: "D_J_GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster - -[Term] -id: SO:0000573 -name: rRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-GENE] -synonym: "V-D-J-GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster - -[Term] -id: SO:0000575 -name: scRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-GENE] -synonym: "V-J-GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu] -synonym: "pre-edited_region" RELATED [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. tmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa_RNA" RELATED [] -synonym: "ssrA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa_RNA_primary_transcript" RELATED [] -synonym: "ssrA_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal_recognition_particle_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A stem-loop RNA structure where nucleotides in the loop participate in complementary interactions with a region of RNA downstream of the stem-loop." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "H-pseudoknot" RELATED [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "C/D_box_snoRNA" RELATED [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "H/ACA_box_snoRNA" RELATED [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm] -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a perfect duplex (except for the oligoU tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -is_a: SO:0000001 ! region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch_point" RELATED [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -is_a: SO:0000167 ! promoter -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The site where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "tRNA_promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -is_a: SO:0000846 ! RNApol_III_promotor_type_2_region - -[Term] -id: SO:0000620 -name: B_box -is_a: SO:0000846 ! RNApol_III_promotor_type_2_region - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -is_a: SO:0000845 ! RNApol_III_promotor_type_1_region - -[Term] -id: SO:0000623 -name: snRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenence of the end," [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm#s] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000627 -name: insulator -subset: SOFA -synonym: "insulator_element" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA" [SO:ke] -synonym: "mini-exon_donor_RNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "Small RNA molecule that is the product of a longerexogenous or endogenous dsRNA, which is either a bimolecular duplexe or very longhairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulatefrom both strands of the dsRNA. sRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small_interfering_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small_temporal_RNA_primary_transcript" RELATED [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small_temporal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "23S_rRNA" RELATED [] -synonym: "28S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: ncRNA is a processed_transcript so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed_repeat" RELATED [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000661 -name: intron_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000666 -name: mobile_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence identified as having been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000046 ! insert - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non-canonical_splice_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non-canonical_three_prime_splice_site" RELATED [] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non-canonical-five_prime_splice_site" RELATED [] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non-canonical_start_codon" RELATED [] -synonym: "non_ATG_start_codon" RELATED [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000685 -name: DNAaseI_hypersensitive_site -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction -relationship: position_of SO:0000045 ! delete - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000691 -name: translocation_site -def: "The space between two bases in a sequence which marks the position where a translocation has occurred." [SO:ke] -is_a: SO:0000109 ! sequence_variant -relationship: position_of SO:0000049 ! translocate - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single_nucleotide_polymorphism" RELATED [] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006 -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" RELATED [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A junction refers to an interbase location of zero in a sequence." [SO:ke] -subset: SOFA -synonym: "boundary" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A locatable region of genomic sequence, corresponding to a unit of inheritance, which is associated with regulatory regions, transcribed regions and/or other functional sequence regions" [SO:rd] -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The site at which trans-splicing occurs." [SO:ke] -synonym: "trans-splice_donor_site" RELATED [] -is_a: SO:0000163 ! five_prime_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000713 -name: DNA_motif -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000715 -name: RNA_motif -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It does not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD.nAgust 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb] -comment: Term requested by Rama from SGD -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" RELATED [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: origin_of_transfer -def: "A region of a DNA molecule whre transfer is initiated during the process of conjugation or mobilization." [http:http\://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "oriT" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000725 -name: transit_peptide -def: "The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein: this domain is involved in post translational import of the protein into the organelle." [http:http\://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: Added to bring SO inline with the embl ddbj genbank feature table. -subset: SOFA -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: Added to comply with the feature table. -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000727 -name: TF_module -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active" [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis_regulatory_module" RELATED [] -synonym: "CRM" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000728 -name: intein -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000729 -name: intein_containing -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000731 -name: fragment -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: added because of request by MO people. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region" [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000735 -name: sequence_location -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000738 -name: nuclear_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000740 -name: plastid_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000741 -name: kinetoplast_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000742 -name: maxicircle_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000743 -name: apicoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000755 -name: plasmid_vector -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template" [SO:ma] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome. " [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw] -comment: Added in response to comment from Kelly Williams from Indiana.nhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 Nov, 2005 -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw] -comment: Added in response to Kelly Williams from Indiananhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 nov 2005 -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "Quantitative Trait Locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http: http\://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005 -is_a: SO:0000001 ! region - -[Term] -id: SO:0000772 -name: genomic_island -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -is_a: SO:0000001 ! region - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038/nrmicro884 nGENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMSnUlrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible_element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 rmicro884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible_element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands.nnEvolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA genenJohn T. Sullivan and Clive W. RonsonnPNAS 1998 Apr 28 95 (9) 5145-5149n -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -synonym: "natural_transposable_element" RELATED [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An atrribute to describe a region from another species." [SO:ke] -synonym: "foreign_transposable_element" RELATED [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned segment" RELATED [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_a: SO:0000786 ! reagent_attribute - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven. " [SO:ke] -synonym: "validated_clone" RELATED [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -synonym: "invalidated_clone" RELATED [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_a: SO:0000788 ! cloned - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_a: SO:0000788 ! cloned - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: " A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication. " [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [fb:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a regions ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames\n(ORF), usually encoding approximately 9 to 20 amino\nacids, which are expressed in vivo (as distinct from being\nsynthesized as peptide or protein ex vivo and subsequently\ninjected). The in vivo synthesis confers a distinct\nadvantage: the expressed sequences can enter both\nantigen presentation pathways, MHC I (inducing CD8+ T-\ncells, which are usually cytotoxic T-lymphocytes (CTL))\nand MHC II (inducing CD4+ T-cells, usually 'T-helpers'\n(Th)); and can encounter B-cells, inducing antibody\nresponses. Three main vector approaches have been used\nto deliver minigenes: viral vectors, bacterial vectors and\nplasmid DNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=15992153&query_hl=2&itool=pubmed_docsum] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature/or standard laboratory stock" [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000830 -name: chromosome_region -def: "A region of a chromosome" [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promotor_region -def: "A region of sequence which is part of a promotor." [SO:ke] -comment: This is a manufactured term to allow the parts of promotor to have an is_a path back to the root. -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region og UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript" [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -def: "A region of sequence that can be translated into polypeptide sequence. This sequence can be reprsenseted as nucleotide or aminoacid. This sequence must be part of an mRNA sequence." [SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence. " [SO:ke] -comment: A manufactured to to group the parts of repeats, to give them an is_a path back to the root. -is_a: SO:0000001 ! region -relationship: part_of SO:0000657 ! repeat_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promotor_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000844 -name: RNA_II_promotor_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000845 -name: RNApol_III_promotor_type_1_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000846 -name: RNApol_III_promotor_type_2_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000847 -name: tmRNA_region -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "A kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "A kind of homology where divergence occured after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is reprsentative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3.." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An atribute describing am mRNA sequences that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid" [SO:ke] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000882 ! codon_redefined - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Rcoding mRNA where a block of nucleotides is not translated. " [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translational_frameshift -def: "Recoding by frameshifting a particular site." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000893 -name: silenced -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed bak inot the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -is_a: SO:0000751 ! proviral_sequence - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently_known - not predicted." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence smilarity techniques." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence smilarity of a known domain." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence smilarity to EST or cDNA data." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describng a feaure that is predticted by a computer program that did not rely on sequence similarity. " [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: -is_obsolete: true - -[Term] -id: SO:0000916 -name: edit_operation -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C" [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G. " [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000930 -name: guide_RNA_region -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: snRNA_4.5S_primary_transcript -def: "A primary transcript encoding a 4.5S snRNA." [SO:ke] -synonym: "4.5S_snRNA_primary_transcript" RELATED [] -is_a: SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0005839 -name: snRNA_4.5S -synonym: "4.5S_snRNA" RELATED [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005843 -name: rRNA_cleavage_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005845 -name: single_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -is_a: SO:0005851 ! gene_array - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A non-functional gene that, when captured by recombination forms a functional gene." [SO:ma] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -is_a: SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000048 ! substitute - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A mutation event where a single DNA nucleotide changes into another nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0000109 ! sequence_variant - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000047 ! invert - -[Term] -id: SO:1000037 -name: chromosomal_duplication -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: mutation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000050 -name: no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000052 -name: complex_change_in_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000054 -name: mutation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "non-synonymous_codon_change_in_transcript" RELATED [] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000058 ! non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: terminator_codon_change_in_transcript -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: mutation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_mutation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000064 ! mutation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: plus_1_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000067 -name: minus_1_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000068 -name: plus_2_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000069 -name: minus_2_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000070 -name: mutation_affecting_transcript_processing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: mutation_affecting_splicing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000072 -name: splice_donor_mutation -is_a: SO:1000071 ! mutation_affecting_splicing -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1000073 -name: splice_acceptor_mutation -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000074 -name: cryptic_splice_activator_mutation -def: "Mutation creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000075 -name: mutation_affecting_editing -def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000070 ! mutation_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: mutation_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000078 -name: mutation_decreasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: mutation_affecting_transcript_sequence -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000080 -name: mutation_increasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: mutation_affecting_rate_of_transcription -is_a: SO:1000076 ! mutation_affecting_transcription - -[Term] -id: SO:1000082 -name: mutation_affecting_transcript_stability -def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: mutation_increasing_transcript_stability -def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: mutation_decreasing_transcript_stability -def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: mutation_affecting_level_of_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000086 -name: mutation_decreasing_level_of_transcript -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: mutation_increasing_level_of_transcript -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: mutation_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000089 -name: no_change_of_translational_product -def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000090 -name: uncharacterised_change_of_translational_product -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000091 -name: partially_characterised_change_of_translational_product -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000090 ! uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: complex_change_of_translational_product -def: "Any mutation effect that is known at nucleotide level but can not be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000093 -name: amino_acid_substitution -def: "The replacement of a single amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: conservative_amino_acid_substitution -is_a: SO:1000093 ! amino_acid_substitution - -[Term] -id: SO:1000095 -name: nonconservative_amino_acid_substitution -is_a: SO:1000093 ! amino_acid_substitution - -[Term] -id: SO:1000096 -name: amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000099 ! polypeptide_elongation - -[Term] -id: SO:1000101 -name: polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000099 ! polypeptide_elongation - -[Term] -id: SO:1000102 -name: mutation_affecting_level_of_translational_product -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000103 -name: mutation_decreasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: mutation_increasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: mutation_affecting_polypeptide_amino_acid_sequence -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000106 -name: inframe_polypeptide_N_terminal_elongation -synonym: "inframe_polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000100 ! polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: out_of_frame_polypeptide_N_terminal_elongation -synonym: "out_of_frame_polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000100 ! polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000101 ! polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: out_of_frame_polypeptide_C_terminal_elongation -synonym: "out_of_frame_polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000101 ! polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000111 -name: mutation_affecting_3D_structure_of_polypeptide -synonym: "mutation_affecting_3D-structure_of_polypeptide" RELATED [] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000112 -name: no_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: uncharacterised_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: partially_characterised_3D_structural_change -is_a: SO:1000113 ! uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: complex_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: conformational_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: mutation_affecting_polypeptide_function -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000118 -name: loss_of_function_of_polypeptide -synonym: "loss-of-function_of_polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: inactive_ligand_binding_site -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: inactive_catalytic_site -is_a: SO:1000119 ! inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: polypeptide_localization_affected -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: partial_loss_of_function_of_polypeptide -synonym: "partial_loss-of-function_of_polypeptide" RELATED [] -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: gain_of_function_of_polypeptide -synonym: "gain-of-function_of_polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: mutation_affecting_transcript_secondary_structure -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: compensatory_transcript_secondary_structure_mutation -is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: consequences_of_mutation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:1000134 -name: polypeptide_fusion -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo-compound_chromosome" RELATED [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero-compound_chromosome" RELATED [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion-cum-translocation" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -synonym: "(Drosophila)Dpp" RELATED [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000179 -name: partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000177 ! uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: mutation_affecting_gene_structure -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000181 -name: gene_fusion -is_a: SO:1000180 ! mutation_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: mutation_causes_exon_loss -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000185 -name: mutation_causes_intron_gain -def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000186 -name: cryptic_splice_donor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001186 -name: cryptic_splice_acceptor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:1001217 -name: member_of_regulon -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine Delgarno sequence that is upstream of a non-5' CDS in a polycistronic mRNA." [SO:ke] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001262 -name: minus_1_translational_frameshift -def: "An attribute describing a translational frameshift of -1. " [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001263 -name: plus_1_translational_frameshift -def: "An attribute describing a translational frameshift of +1. " [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding_stimulatory_signal" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 pase pairs." [SO:ke] -synonym: "4bp_start_codon" RELATED [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archeal_intron -def: "Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism." [SO:ma] -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001272 -name: tRNA_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0000001 ! region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "The stem-loop secondary structural element downstream of the redefined region." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding signal found 5' of the redefined codon." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "It is a downstream sequence important for recoding that contains repetitive elements." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA -domain: SO:0000110 ! located_sequence_feature -range: SO:0000110 ! located_sequence_feature -is_symmetric: true - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -domain: SO:0000085 ! gene_by_genome_location -range: SO:0000704 ! gene -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of.ninverse is collection_of.nWinston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - diff --git a/annotation/NBIS/Ontology/SO/so_2_2.obo b/annotation/NBIS/Ontology/SO/so_2_2.obo deleted file mode 100644 index 8775cdb88..000000000 --- a/annotation/NBIS/Ontology/SO/so_2_2.obo +++ /dev/null @@ -1,10208 +0,0 @@ -format-version: 1.2 -date: 30:08:2007 13:40 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.101 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.library.csi.cuny.edu/~davis/molbiol/lecture_notes/post-transcriptional_processes/RNACapping.pdf] -synonym: "G-quartet" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecule spresent in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: " small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; always found with the INR_motif. Positioned from +28 to +32 with respect to the TSS (+1). Consensus sequence (A|G)G(A|T)(C|T)(G|A|C). Required for TFIID binding to TATA-less promoters." [PMID:12651739] -synonym: "downstream core promoter element" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: " transcription factor B-recognition element" EXACT [] -synonym: "B-recognition element" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000019 -name: RNA_hairpin_loop -def: "A region of single stranded RNA where the 3 dimensional structure folds back upon and base pairing occurs. The structure when drawn in 2D resembles a hairpin." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interuption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: Sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://proton.chem.yale.edu/pdf/7897662.pdf] -synonym: " sarcin/ricin RNA domain" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/Morpholinos/morpholinos.HTML] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "" RELATED [] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000041 -name: operation -def: "An operation that can be applied to a sequence, that results in a chnage." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene that is processed." [SO:xp] -synonym: "pseudogene by reverse transcription" RELATED [] -is_a: SO:0000336 ! implied link automatically realized ! pseudogene -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000900 ! processed -relationship: has_quality SO:0000900 ! implied link automatically realized ! processed - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -is_a: SO:0000336 ! implied link automatically realized ! pseudogene -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000901 ! unequally_crossed_over -relationship: has_quality SO:0000901 ! implied link automatically realized ! unequally_crossed_over - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It is also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: mutation_affecting_regulatory_region -def: "A kind of mutation that affects a regulatory region of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind hereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! implied link automatically realized ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! implied link automatically realized ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! implied link automatically realized ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! implied link automatically realized ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -is_a: SO:0000088 ! implied link automatically realized ! mt_gene -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence -relationship: has_origin SO:0000741 ! implied link automatically realized ! kinetoplast_sequence - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence -relationship: has_origin SO:0000740 ! implied link automatically realized ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! implied link automatically realized ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! implied link automatically realized ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! implied link automatically realized ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! implied link automatically realized ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! implied link automatically realized ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence -relationship: has_origin SO:0000748 ! implied link automatically realized ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! implied link automatically realized ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location -relationship: has_origin SO:0000749 ! implied link automatically realized ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location -relationship: has_origin SO:0000751 ! implied link automatically realized ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -is_a: SO:0000099 ! implied link automatically realized ! proviral_gene -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence -relationship: has_origin SO:0000903 ! implied link automatically realized ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: " transposon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.exactsciences.com/cic/glossary/_index.htm] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! implied link automatically realized ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! implied link automatically realized ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translational_frameshift -relationship: has_quality SO:0000887 ! implied link automatically realized ! translational_frameshift - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constants amounts with out regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "An repressor molecule is required for transcription to stop." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced -relationship: has_quality SO:0000893 ! implied link automatically realized ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A silenced gene by DNA modification." [SO:xp] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification -relationship: has_quality SO:0000894 ! implied link automatically realized ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A silenced gene silenced DNA methylation." [SO:xp] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! implied link automatically realized ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation -relationship: has_quality SO:0000895 ! implied link automatically realized ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level -relationship: has_quality SO:0000904 ! implied link automatically realized ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: Gene:. -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000154 -name: PAC -def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000155 -name: plasmid -def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0000695 ! reagent -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000045 ! delete - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000727 ! CRM -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -is_a: SO:0000165 ! implied link automatically realized ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor -relationship: has_quality SO:0000277 ! implied link automatically realized ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nThe region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT-box" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000173 -name: GC_rich_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter -relationship: part_of SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus;10" RELATED [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus;35" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000049 ! translocate - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -synonym: "5' coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000201 -name: interior_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence lenght variation" EXACT [] -synonym: "simple sequence length polymorphism" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal RNA primary transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear mRNA (SO:0000274)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone the necessary modifications for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processd_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SO:ma, SO:rb] -comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000240 -name: chromosome_variation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000241 -name: internal_UTR -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polyicistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "IRES" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: " ribosomal ribonucleic acid" EXACT [] -synonym: "ribsomal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' binding region." [SO:ke] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' binding region." [SO:ke] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an asaprtic acid anticodon, and a 3' binding region." [SO:ke] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine acid anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine acid anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenyle alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid -relationship: has_quality SO:0000876 ! implied link automatically realized ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein -relationship: has_quality SO:0000875 ! implied link automatically realized ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000285 ! implied link automatically realized ! foreign_gene -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift -relationship: has_quality SO:0000866 ! implied link automatically realized ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -is_a: SO:0000111 ! implied link automatically realized ! transposable_element_gene -is_a: SO:0000281 ! implied link automatically realized ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartate and interupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion -relationship: has_quality SO:0000806 ! implied link automatically realized ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000287 ! implied link automatically realized ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: " ori" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "displacement loop" RELATED [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -is_a: SO:0000001 ! region - -[Term] -id: SO:0000299 -name: specific_recombination_site -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interupted palidrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "stem-loop" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The site where transcription begins." [SO:ke] -subset: SOFA -synonym: "TSS" EXACT [] -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift -relationship: has_quality SO:0000868 ! implied link automatically realized ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "translation_end" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe -is_a: SO:0000324 ! tag -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift -relationship: has_quality SO:0000869 ! implied link automatically realized ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift -relationship: has_quality SO:0000867 ! implied link automatically realized ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITES do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome whioch promotes recombination." [SO:rd] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "A sequence of nucleotides or amino acids that has been designed by an experimentor and which may, or may not, correspond with any natural sequence." [SO:ma] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000358 -name: protein -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -is_a: SO:0000902 ! implied link automatically realized ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed -relationship: has_quality SO:0000359 ! implied link automatically realized ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000046 ! insert - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "" RELATED [] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000047 ! invert -relationship: associated_with SO:0000047 ! implied link automatically realized ! invert - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5. 8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5. 8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5. 8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000381 -name: group_IIA_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000401 -name: gene_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snRNA -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron." [PMID:1899376] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide which functions as a part of the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S rRNA" RELATED [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! implied link automatically realized ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! implied link automatically realized ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequences differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribue to describe a feature that is invalidated due to genomic contamination." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribue to describe a feature that is invalidated due to polyA priming." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribue to describe a feature that is invalidated due to partial processing." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. Region which has been shown to recur throughout evolution." [EBIBS:GAR, http://www.molbiol.bbsrc.ac.uk/new_protein/domains.html] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" NARROW [] -synonym: "DNA_bind" NARROW [] -synonym: "domain" RELATED [] -synonym: "np_bind" NARROW [] -synonym: "polypeptide_domain" EXACT [] -synonym: "zn_fing" NARROW [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminal that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "signal peptide coding sequence" EXACT [] -synonym: "signal_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The extent of a polypeptide chain in the mature protein." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" EXACT [] -synonym: "mature peptide" RELATED [] -synonym: "mature_protein_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector -def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequencyof these components." [SO:ma] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -is_a: SO:0000341 ! chromosome_band - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000871 ! polyadenylated_mRNA -relationship: associated_with SO:0000871 ! implied link automatically realized ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000865 ! frameshift -relationship: associated_with SO:0000865 ! implied link automatically realized ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged -relationship: has_quality SO:0000940 ! implied link automatically realized ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome dulication involving an insertion from another chromosome." [SO:ke] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000559 ! D_cluster -relationship: part_of SO:0000560 ! D_J_cluster - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000479 ! trans_spliced_transcript -relationship: associated_with SO:0000479 ! implied link automatically realized ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene her to avoid confusion with the region 'gene'. -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encdoes more than one transcript." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendent of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000526 ! V_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting proteinregulated by the stability of the resulting protein." [SO:ke] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000513 ! J_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000560 ! D_J_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000558 ! C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: has_quality SO:0000870 ! implied link automatically realized ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -synonym: "three prime exon noncoding region" RELATED [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000486 -name: five_prime_exon_noncoding_region -def: "The sequence of the 5' exon preceeding the start codon." [SO:ke] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-NOMAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000507 -name: pseudogenic_exon -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V_D_GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000873 ! edited_transcript -relationship: associated_with SO:0000873 ! implied link automatically realized ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'RS" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmb/LIGMlect?query=7] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or\nJ-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "HEPTAMER" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of protein-coding genes where the protein product has been retrotransposed." [SO:ke] -is_a: SO:0000010 ! protein_coding -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-RS" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster - -[Term] -id: SO:0000573 -name: rRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster - -[Term] -id: SO:0000575 -name: scRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A stem-loop RNA structure where nucleotides in the loop participate in complementary interactions with a region of RNA downstream of the stem-loop." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "H-pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm] -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added) oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -is_a: SO:0000001 ! region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -is_a: SO:0000167 ! promoter -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -is_a: SO:0000752 ! gene_group_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The site where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase II type 1 promoter , with consensus sequence CAnnCCn." [SO:ke] -is_a: SO:0000845 ! RNApol_III_promoter_type_1_region - -[Term] -id: SO:0000623 -name: snRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm] -subset: SOFA -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic processed transcript" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic processed transcript" RELATED [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! implied link automatically realized ! plasmid -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000742 ! maxicircle_sequence -relationship: has_origin SO:0000742 ! implied link automatically realized ! maxicircle_sequence - -[Term] -id: SO:0000655 -name: ncRNA -def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed repeat" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! implied link automatically realized ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000046 ! insert - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non-canonical splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000685 -name: DNAaseI_hypersensitive_site -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000045 ! delete - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000078 ! polycistronic_transcript -relationship: associated_with SO:0000078 ! implied link automatically realized ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -namespace: BS -alt_id: BS:00067 -def: "Indicates when the initator methionine has been cleaved from the mature sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved_initiator_methionine" EXACT [] -synonym: "init_met" RELATED [] -synonym: "initator methioninie" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -is_a: SO:0000690 ! implied link automatically realized ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: associated_with SO:0000079 ! dicistronic_transcript -relationship: associated_with SO:0000079 ! implied link automatically realized ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:1001261 ! recoded_mRNA -relationship: associated_with SO:1001261 ! implied link automatically realized ! recoded_mRNA - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000883 ! stop_codon_read_through -relationship: associated_with SO:0000883 ! implied link automatically realized ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000884 ! stop_codon_redefined_as_pyrrolysine -relationship: associated_with SO:0000884 ! implied link automatically realized ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nA gene may be considered as a unit of inheritance. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The site at which trans-splicing occurs." [SO:ke] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000885 ! stop_codon_redefined_as_selenocysteine -relationship: associated_with SO:0000885 ! implied link automatically realized ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000886 ! recoded_by_translational_bypass -relationship: associated_with SO:0000886 ! implied link automatically realized ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000887 ! translational_frameshift -relationship: associated_with SO:0000887 ! implied link automatically realized ! translational_frameshift - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000634 ! implied link automatically realized ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb] -comment: Term requested by Rama from SGD. -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:1001197 ! dicistronic_primary_transcript -relationship: associated_with SO:1001197 ! implied link automatically realized ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:0000716 ! dicistronic_mRNA -relationship: associated_with SO:0000716 ! implied link automatically realized ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminal of the peptide that directs the protein to an organelle (chloroplast, mitochonrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to comply with the feature table. -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that A is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000731 -name: fragment -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar -relationship: has_quality SO:0000864 ! implied link automatically realized ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000742 -name: maxicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000743 -name: apicoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000755 -name: plasmid_vector -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "Quantitative Trait Locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulck of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -is_a: SO:0000637 ! implied link automatically realized ! engineered_plasmid -is_a: SO:0000768 ! implied link automatically realized ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An atrribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000815 ! implied link automatically realized ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [SO:mc] -comment: Modified as requested by Lynn - FB. May 2007. -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic -relationship: derives_from SO:0000151 ! implied link automatically realized ! clone -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [SO:mc] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! implied link automatically realized ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [SO:mc] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [SO:mc] -is_a: SO:0000720 ! implied link automatically realized ! foreign_transposable_element -is_a: SO:0000798 ! implied link automatically realized ! engineered_transposable_element -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [fb:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered sequence" EXACT [] -is_a: SO:0000001 ! implied link automatically realized ! region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! implied link automatically realized ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -is_a: SO:0000324 ! implied link automatically realized ! tag -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000789 ! validated -relationship: has_quality SO:0000789 ! implied link automatically realized ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated -relationship: has_quality SO:0000790 ! implied link automatically realized ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA -relationship: has_quality SO:0000362 ! implied link automatically realized ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination -relationship: has_quality SO:0000414 ! implied link automatically realized ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA -relationship: has_quality SO:0000415 ! implied link automatically realized ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing -relationship: has_quality SO:0000416 ! implied link automatically realized ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a regions ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames\n(ORF), usually encoding approximately 9 to 20 amino\nacids, which are expressed in vivo (as distinct from being\nsynthesized as peptide or protein ex vivo and subsequently\ninjected). The in vivo synthesis confers a distinct\nadvantage: the expressed sequences can enter both\nantigen presentation pathways, MHC I (inducing CD8+ T-\ncells, which are usually cytotoxic T-lymphocytes (CTL))\nand MHC II (inducing CD4+ T-cells, usually 'T-helpers'\n(Th)); and can encounter B-cells, inducing antibody\nresponses. Three main vector approaches have been used\nto deliver minigenes: viral vectors, bacterial vectors and\nplasmid DNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=15992153&query_hl=2&itool=pubmed_docsum] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! implied link automatically realized ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature/or standard laboratory stock." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type -relationship: has_quality SO:0000817 ! implied link automatically realized ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! implied link automatically realized ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! implied link automatically realized ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! implied link automatically realized ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! implied link automatically realized ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! implied link automatically realized ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence -relationship: has_origin SO:0000083 ! implied link automatically realized ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence -relationship: has_origin SO:0000084 ! implied link automatically realized ! micronuclear_sequence - -[Term] -id: SO:0000826 -name: kinetoplast_chromosome -def: "A chromosome with origin in the kinetoplast." [SO:xp] -is_a: SO:0000819 ! implied link automatically realized ! mitochondrial_chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence -relationship: has_origin SO:0000741 ! implied link automatically realized ! kinetoplast_sequence - -[Term] -id: SO:0000827 -name: maxicircle_chromosome -def: "A chromosome originating in a maxi-circle." [SO:xp] -is_a: SO:0000826 ! implied link automatically realized ! kinetoplast_chromosome -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000742 ! maxicircle_sequence -relationship: has_origin SO:0000742 ! implied link automatically realized ! maxicircle_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! implied link automatically realized ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! implied link automatically realized ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promotor." [SO:ke] -comment: This is a manufactured term to allow the parts of promotor to have an is_a path back to the root. -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a protein." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" EXACT [] -synonym: "positional polypeptide feature" EXACT [] -synonym: "region or site annotation" EXACT [] -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -is_a: SO:0000001 ! region -relationship: part_of SO:0000657 ! repeat_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000844 -name: RNA_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNA_II_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000847 -name: tmRNA_region -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000856 ! conserved -relationship: has_quality SO:0000856 ! implied link automatically realized ! conserved - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction witht he paralogous_to relationship. -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous -relationship: has_quality SO:0000859 ! implied link automatically realized ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjuction with the similarity relationships defined in SO. -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous -relationship: has_quality SO:0000858 ! implied link automatically realized ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "A kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "A kind of homology where divergence occured after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! implied link automatically realized ! cap -relationship: has_quality SO:0000146 ! implied link automatically realized ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! implied link automatically realized ! cap -relationship: has_quality SO:0000146 ! implied link automatically realized ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated -relationship: adjacent_to SO:0000610 ! implied link automatically realized ! polyA_sequence -relationship: has_quality SO:0000246 ! implied link automatically realized ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000479 ! implied link automatically realized ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: adjacent_to SO:0000636 ! implied link automatically realized ! spliced_leader_RNA - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: associated_with SO:0000602 ! guide_RNA -intersection_of: associated_with SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited -relationship: associated_with SO:0000602 ! implied link automatically realized ! guide_RNA -relationship: associated_with SO:0000977 ! implied link automatically realized ! anchor_binding_site -relationship: has_quality SO:0000116 ! implied link automatically realized ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -is_a: SO:0000929 ! implied link automatically realized ! edited_mRNA -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000919 ! substitute_A_to_I -relationship: associated_with SO:0000919 ! implied link automatically realized ! substitute_A_to_I - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing am mRNA sequences that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000882 ! codon_redefined - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translational_frameshift -def: "Recoding by frameshifting a particular site." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted -relationship: has_quality SO:0000135 ! implied link automatically realized ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted -relationship: has_quality SO:0000136 ! implied link automatically realized ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated -relationship: has_quality SO:0000130 ! implied link automatically realized ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated -relationship: has_quality SO:0000473 ! implied link automatically realized ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated -relationship: has_quality SO:0000475 ! implied link automatically realized ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An epigenetic process where a gene is innactivated at transcriptional or translational level." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An epigenetic process where a gene is innactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An epigenetic process where a gene is innactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated -relationship: has_quality SO:0000131 ! implied link automatically realized ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded -relationship: has_quality SO:0000137 ! implied link automatically realized ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! implied link automatically realized ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed bak inot the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently_known - not predicted." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence smilarity techniques." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence smilarity of a known domain." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence smilarity to EST or cDNA data." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describng a feaure that is predticted by a computer program that did not rely on sequence similarity." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! implied link automatically realized ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! implied link automatically realized ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -is_a: SO:0000873 ! implied link automatically realized ! edited_transcript -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited -relationship: associated_with SO:0000916 ! implied link automatically realized ! edit_operation - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited -relationship: associated_with SO:0000916 ! implied link automatically realized ! edit_operation -relationship: has_quality SO:0000116 ! implied link automatically realized ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000301 ! vertebrate_immune_system_gene_recombination_feature -relationship: associated_with SO:0000301 ! implied link automatically realized ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! implied link automatically realized ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! implied link automatically realized ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA -relationship: has_quality SO:0000356 ! implied link automatically realized ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! implied link automatically realized ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! implied link automatically realized ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000968 -name: replication_mode -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000969 -name: rolling_circle -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000970 -name: theta_replication -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000971 -name: DNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000972 -name: RNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "IS" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000974 -name: minicircle_chromosome -is_a: SO:0000826 ! implied link automatically realized ! kinetoplast_chromosome -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000980 ! minicircle_sequence -relationship: has_origin SO:0000980 ! implied link automatically realized ! minicircle_sequence - -[Term] -id: SO:0000975 -name: minicircle_gene -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000980 ! minicircle_sequence -relationship: has_origin SO:0000980 ! implied link automatically realized ! minicircle_sequence - -[Term] -id: SO:0000976 -name: cryptic -is_a: SO:0000116 ! edited - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000988 -name: circular -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba\nDictyostelium discoideum and localized in the cytoplasm." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4646] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4646] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -is_a: SO:0000914 ! implied link automatically realized ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC -relationship: derives_from SO:0000153 ! implied link automatically realized ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.\nIt should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -is_a: SO:0000001 ! implied link automatically realized ! region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! implied link automatically realized ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000994 ! implied link automatically realized ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! implied link automatically realized ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000842 ! implied link automatically realized ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragment -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragment - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide which functions as a part of the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S rRNA" RELATED [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "23S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -subset: SOFA -synonym: "25S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -is_a: SO:0000001 ! implied link automatically realized ! region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity -relationship: has_quality SO:0001004 ! implied link automatically realized ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GO:jl] -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GO:jl] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [http://nar.oxfordjournals.org/cgi/content/abstract/26/20/4696] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments.\nThe PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common\nlength > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000005 ! complex_substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G\nand U, which is important for the secondary structure of RNAs. It has\nsimilar thermodynamic stability to the Watson-Crick pairing. Wobble base\npairs only have two hydrogen bonds. Other wobble base pair possibilities\nare I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the\nlocations of the splice sites in group I intron splicing and has catalytic\nactivity." [SO:cb] -synonym: "IGS" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: mutation_affecting_copy_number -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0001022 -name: inversion_breakpoint -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001027 ! genotype - -[Term] -id: SO:0001029 -name: direction_attribute -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001030 -name: forward -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -is_a: SO:0000737 ! implied link automatically realized ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -is_a: SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0001034 -name: mitron -def: "An intron from whose debranched product an miRNA is derived." [SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -is_a: SO:0000276 ! implied link automatically realized ! miRNA -intersection_of: SO:0000276 ! miRNA -intersection_of: derives_from SO:0000188 ! intron -relationship: derives_from SO:0000188 ! implied link automatically realized ! intron - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA , part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' alanine binding region." [SO:ke] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -synonym: "MGE" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001041 -name: virus -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0001042 -name: phage -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage sequence" EXACT [] -is_a: SO:0001041 ! virus - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A pseudogene that is nuclear/mitochondrial." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! implied link automatically realized ! pseudogene -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000899 ! nuclear_mitochondrial -relationship: has_quality SO:0000899 ! implied link automatically realized ! nuclear_mitochondrial - -[Term] -id: SO:0001045 -name: cointegrated_replicon -def: "A MGE region consisting of two fused replicons/plasmids resulting from a replicative transposition event." [Phigo:at] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragment -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragment - -[Term] -id: SO:0001051 -name: nested_region -is_a: SO:0000001 ! region - -[Term] -id: SO:0001052 -name: nested_repeat -is_a: SO:0001051 ! implied link automatically realized ! nested_region -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000657 ! repeat_region -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001050 ! repeat_fragment -relationship: derives_from SO:0000657 ! implied link automatically realized ! repeat_region -relationship: has_part SO:0001050 ! implied link automatically realized ! repeat_fragment - -[Term] -id: SO:0001053 -name: nested_transposon -is_a: SO:0001051 ! implied link automatically realized ! nested_region -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000101 ! transposable_element -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001054 ! transposon_fragment -relationship: derives_from SO:0000101 ! implied link automatically realized ! transposable_element -relationship: has_part SO:0001054 ! implied link automatically realized ! transposon_fragment - -[Term] -id: SO:0001054 -name: transposon_fragment -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragment -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragment - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_a: SO:0000165 ! enhancer - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -namespace: BS -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide_cleavage_site" EXACT [] -is_a: SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -namespace: BS -alt_id: BS:00077 -def: "Describes part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [] -synonym: "propeptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule. Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "active_peptide" EXACT [] -synonym: "peptide" EXACT [] -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region -alt_id: BS:00068 -def: "Extent of a compositionally biased region. Used for homopolymeric stretches of residues and also for regions which are rich in a particular amino acid. Not used for a run of less than 4 residues." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" EXACT [] -synonym: "compositional bias" EXACT [] -synonym: "compositionally biased" EXACT [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" RELATED [] -synonym: "polypeptide_motif" EXACT [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide_repeat" EXACT [] -synonym: "repeat" RELATED [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001069 -name: polypeptide_structural_domain -alt_id: BS:00134 -def: "A polypeptide domain is a structural domain that is self-stabilizing and folds independently of the rest of the protein chain." [EBIBS:GAR, PMID:7020376] -subset: biosapiens -synonym: "polypeptide_structural_domain" EXACT [] -synonym: "structural domain" EXACT [] -is_a: SO:0000417 ! polypeptide_domain -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001070 -name: structural_region -alt_id: BS:00337 -def: "Backbone conformation of the polypeptide." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "structural_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001072 -name: extramembrane -alt_id: BS:00154 -def: "Extent of the region not transversing the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "extramembrane" EXACT [] -synonym: "topo_dom" EXACT [] -is_a: SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasm_location -alt_id: BS:00145 -def: "Region of the peptide in the cytoplasm." [EBIBS:GAR] -subset: biosapiens -synonym: "cytoplasm_location" EXACT [] -synonym: "inside" RELATED [] -is_a: SO:0001072 ! extramembrane - -[Term] -id: SO:0001074 -name: non_cytoplasm_location -alt_id: BS:00144 -def: "Region of peptide not in the cytoplasm. N.B. This could be inside an organelle within the cell." [EBIBS:GAR] -subset: biosapiens -synonym: "non_cytoplasm_location" EXACT [] -synonym: "outside" RELATED [] -is_a: SO:0001072 ! extramembrane - -[Term] -id: SO:0001075 -name: intramembrane -alt_id: BS:00156 -def: "Extent of the region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" EXACT [] -is_a: SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_loop -alt_id: BS:00155 -def: "Extent of region which enters the membrane bilayer but emerges on the same side which it entered." [EBIBS:GAR] -subset: biosapiens -synonym: "membrane_loop" EXACT [] -is_a: SO:0001075 ! intramembrane - -[Term] -id: SO:0001077 -name: transmembrane -alt_id: BS:00158 -def: "Extent of region transversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED [] -synonym: "transmembrane" EXACT [] -is_a: SO:0001075 ! intramembrane - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" EXACT [] -synonym: "secondary structure" EXACT [] -synonym: "secondary structure region" RELATED [] -synonym: "secondary_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "structural_motif" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED [] -synonym: "coiled_coil" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "DNA binding motif" RELATED [] -synonym: "helix_turn_helix" EXACT [] -synonym: "HTH" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_adjacent_residues" EXACT [] -synonym: "non_cons" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "" RELATED [] -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [] -synonym: "non_terminal_residue" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [] -synonym: "sequence_conflict" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "sequence_uncertainty" EXACT [] -synonym: "unsure" RELATED [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -namespace: BS -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_a: SO:0001090 ! covalent_binding_site - -[Term] -id: SO:0001088 -name: disulfide_bond -namespace: BS -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_a: SO:0001090 ! covalent_binding_site - -[Term] -id: SO:0001089 -name: post_translational_modification -namespace: BS -alt_id: BS:00052 -def: "A transformation that occurs in a protein after it has been synthesized, which may regulate, stabilize, crosslink, or introduce new chemical functionalities in the protein." [EBIBS:GAM, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" RELATED [] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -is_a: SO:0001090 ! covalent_binding_site - -[Term] -id: SO:0001090 -name: covalent_binding_site -namespace: BS -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -namespace: BS -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001092 -name: metal_binding -namespace: BS -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal" RELATED [] -synonym: "metal_binding" EXACT [] -is_a: SO:0001091 ! non_covalent_binding_site - -[Term] -id: SO:0001093 -name: protein_protein_interaction -namespace: BS -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein_protein_interaction" EXACT [] -is_a: SO:0001091 ! non_covalent_binding_site - -[Term] -id: SO:0001094 -name: Ca_contact_site -namespace: BS -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "Ca_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001095 -name: Co_contact_site -namespace: BS -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001096 -name: Cu_contact_site -namespace: BS -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "ca_bind" RELATED [uniprot:curation] -synonym: "Cu_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001097 -name: Fe_contact_site -namespace: BS -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "ca_bind" RELATED [uniprot:curation] -synonym: "Fe_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001098 -name: Mg_contact_site -namespace: BS -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001099 -name: Mn_contact_site -namespace: BS -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001100 -name: Mo_contact_site -namespace: BS -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001101 -name: Ni_contact_site -namespace: BS -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001102 -name: W_contact_site -namespace: BS -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001103 -name: Zn_contact_site -namespace: BS -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001104 -name: active_site_residue -namespace: BS -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [] -synonym: "active site residue" EXACT [] -synonym: "catalitic residues" RELATED [] -synonym: "site" BROAD [] -is_a: SO:0001105 ! protein_ligand_interaction - -[Term] -id: SO:0001105 -name: protein_ligand_interaction -namespace: BS -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "protein-ligand interaction" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "asx_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "beta_strand" EXACT [] -synonym: "strand" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001116 -name: right_handed_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" EXACT [] -synonym: "alpha_helix" EXACT [] -synonym: "helix" RELATED [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001120 -name: nest -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" EXACT [] -synonym: "nest_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: nest_left_right -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001122 -name: nest_right_left -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: st_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: st_staple -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: turn -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "turn" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -namespace: BS -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_il" RELATED [] -synonym: "asx_turn_left_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -namespace: BS -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iil" EXACT [] -synonym: "asx_turn_left_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -namespace: BS -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iir" EXACT [] -synonym: "asx_turn_right_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_type_right_handed_type_one -namespace: BS -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_ir" EXACT [] -synonym: "asx_turn_type_right_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -namespace: BS -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -namespace: BS -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_il" EXACT [] -synonym: "beta_turn_left_handed_type_one" EXACT [] -synonym: "Type I' beta turn" EXACT [] -synonym: "Type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -namespace: BS -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iil" EXACT [] -synonym: "beta_turn_left_handed_type_two" EXACT [] -synonym: "Type II' beta turn" EXACT [] -synonym: "Type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -namespace: BS -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_ir" EXACT [] -synonym: "beta_turn_right_handed_type_one" EXACT [] -synonym: "Type I beta turn" EXACT [] -synonym: "Type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -namespace: BS -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iir" EXACT [] -synonym: "beta_turn_right_handed_type_two" EXACT [] -synonym: "Type II beta turn" EXACT [] -synonym: "Type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma_turn_classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn_inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: st_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_il" EXACT [] -synonym: "st_turn_left_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iil" EXACT [] -synonym: "st_turn_left_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_ir" EXACT [] -synonym: "st_turn_right_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iir" EXACT [] -synonym: "st_turn_right_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001146 -name: sequence_variation_attribute -alt_id: BS:00336 -def: "An attribute to catgorize the different kinds of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "variant" RELATED [] -is_a: SO:0001146 ! sequence_variation_attribute - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" RELATED [] -synonym: "mutagenisis" RELATED [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! sequence_variation_attribute - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" RELATED [] -synonym: "var_seq" RELATED [] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! sequence_variation_attribute - -[Term] -id: SO:0001150 -name: beta_turn_type_six -namespace: BS -subset: biosapiens -synonym: "Type VI beta turn" EXACT [] -synonym: "Type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -namespace: BS -subset: biosapiens -synonym: "Type VI a beta turn" EXACT [] -synonym: "Type VI a turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -namespace: BS -subset: biosapiens -synonym: "Type VI a1 beta turn" EXACT [] -synonym: "Type VI a1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -namespace: BS -subset: biosapiens -synonym: "Type VI a2 beta turn" EXACT [] -synonym: "Type VI a2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -namespace: BS -subset: biosapiens -synonym: "Type VI b beta turn" EXACT [] -synonym: "Type VI b turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -namespace: BS -subset: biosapiens -synonym: "Type VIII beta turn" EXACT [] -synonym: "Type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: snRNA_4.5S_primary_transcript -def: "A primary transcript encoding a 4.5S snRNA." [SO:ke] -synonym: "4.5S snRNA primary transcript" EXACT [] -is_a: SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0005839 -name: snRNA_4.5S -synonym: "4.5S snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005843 -name: rRNA_cleavage_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005845 -name: single_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -is_a: SO:0005851 ! gene_array - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A non-functional gene that, when captured by recombination forms a functional gene." [SO:ma] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -is_a: SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://tbase.jax.org/docs/glossary.html] -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic -relationship: has_quality SO:0000860 ! implied link automatically realized ! syntenic - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000048 ! substitute - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000047 ! invert - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: mutation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000050 -name: mutation_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000052 -name: mutation_affecting_complex_change_in_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000054 -name: mutation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: mutation_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: mutation_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: mutation_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: mutation_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "non-synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: mutation_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000058 ! mutation_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: mutation_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: mutation_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: mutation_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: mutation_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: mutation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "out of frame mutation" RELATED [] -is_a: SO:1000064 ! mutation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000067 -name: minus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000068 -name: plus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000069 -name: minus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000070 -name: mutation_affecting_transcript_processing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: mutation_affecting_splicing -def: "A mutation that affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000072 -name: splice_donor_mutation -def: "A mutation that affects the splice donor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000073 -name: splice_acceptor_mutation -def: "A mutation that affects the splice acceptor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000074 -name: cryptic_splice_activator_mutation -def: "A kind of mutation that creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000075 -name: mutation_affecting_editing -def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000070 ! mutation_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: mutation_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000078 -name: mutation_decreasing_rate_of_transcription -def: "A mutation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: mutation_affecting_transcript_sequence -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000080 -name: mutation_increasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: mutation_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000076 ! mutation_affecting_transcription - -[Term] -id: SO:1000082 -name: mutation_affecting_transcript_stability -def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: mutation_increasing_transcript_stability -def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: mutation_decreasing_transcript_stability -def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: mutation_affecting_level_of_transcript -def: "A mutation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000086 -name: mutation_decreasing_level_of_transcript -def: "A mutation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: mutation_increasing_level_of_transcript -def: "A mutation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: mutation_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000089 -name: mutation_causing_no_change_of_translational_product -def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000090 -name: mutation_causing_uncharacterised_change_of_translational_product -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000091 -name: mutation_causing_partially_characterised_change_of_translational_product -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000090 ! mutation_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: mutation_causing_complex_change_of_translational_product -def: "Any mutation effect that is known at nucleotide level but can not be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000093 -name: mutation_causing_amino_acid_substitution -def: "The replacement of a single amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: mutation_causing_conservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: mutation_causing_nonconservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: mutation_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: mutation_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: mutation_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: mutation_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: mutation_affecting_level_of_translational_product -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000103 -name: mutation_decreasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: mutation_increasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: mutation_affecting_polypeptide_amino_acid_sequence -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_mutation -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000111 -name: mutation_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000112 -name: mutation_causing_no_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: mutation_causing_uncharacterised_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: mutation_causing_partially_characterised_3D_structural_change -is_a: SO:1000113 ! mutation_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: mutation_causing_complex_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: mutation_causing_conformational_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: mutation_affecting_polypeptide_function -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000118 -name: mutation_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: mutation_causing_inactive_ligand_binding_site -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: mutation_causing_inactive_catalytic_site -is_a: SO:1000119 ! mutation_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: mutation_causing_polypeptide_localization_change -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: mutation_causing_polypeptide_post_translational_processing_change -synonym: "polypeptide post-translational processing affected" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: mutation_causing_partial_loss_of_function_of_polypeptide -synonym: "partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: mutation_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: mutation_affecting_transcript_secondary_structure -def: "A mutation that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: mutation_causing_compensatory_transcript_secondary_structure_mutation -is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: mutation -def: "An event that changes nucleotide sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:1000134 -name: mutation_causing_polypeptide_fusion -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: mutation_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000179 -name: mutation_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000177 ! mutation_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: mutation_affecting_gene_structure -def: "A kind of mutation that affects the structure of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000181 -name: mutation_causing_gene_fusion -def: "A kind of mutation that affects the structure of a gene by causing a fusion to another gene." [SO:ke] -is_a: SO:1000180 ! mutation_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: mutation_causes_exon_loss -def: "A mutation that affects splicing and causes an exon loss." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000185 -name: mutation_causes_intron_gain -def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000186 -name: cryptic_splice_donor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001186 -name: cryptic_splice_acceptor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced -relationship: has_quality SO:0000877 ! implied link automatically realized ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence, but use different stop codon." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence, but use different start codon." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not overlap peptide sequence." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence, but use different start and stop codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -is_a: SO:0000654 ! implied link automatically realized ! maxicircle_gene -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic -relationship: has_quality SO:0000976 ! implied link automatically realized ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000631 ! implied link automatically realized ! polycistronic_primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known -relationship: has_quality SO:0000906 ! implied link automatically realized ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan -relationship: has_origin SO:0000910 ! implied link automatically realized ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match -relationship: has_quality SO:0000908 ! implied link automatically realized ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity -relationship: has_quality SO:0000907 ! implied link automatically realized ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! implied link automatically realized ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA -relationship: has_quality SO:0000909 ! implied link automatically realized ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Delgarno sequence that stimulates recoding through interactions with the anti-Shine-Delgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "A gene coding an mRNA which is recoded before translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! implied link automatically realized ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translational_frameshift -def: "An attribute describing a translational frameshift of -1." [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001263 -name: plus_1_translational_frameshift -def: "An attribute describing a translational frameshift of +1." [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A mRNA is translated by ribosomes that suspend translation at a particular codon and resume translation at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! implied link automatically realized ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A gene whose mRNA is recoded by an alteration of codon meaning." [SO:ma] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined -relationship: has_quality SO:0000882 ! implied link automatically realized ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 pase pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archeal_intron -def: "Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism." [SO:ma] -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001272 -name: tRNA_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0000001 ! region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA -domain: SO:0000110 ! sequence_feature -range: SO:0000110 ! sequence_feature - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: BS -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of s=ome instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SO/so_2_3.obo b/annotation/NBIS/Ontology/SO/so_2_3.obo deleted file mode 100644 index 0dbcab498..000000000 --- a/annotation/NBIS/Ontology/SO/so_2_3.obo +++ /dev/null @@ -1,10948 +0,0 @@ -format-version: 1.2 -date: 30:01:2008 17:03 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.101 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.library.csi.cuny.edu/~davis/molbiol/lecture_notes/post-transcriptional_processes/RNACapping.pdf] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G_quadruplex" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: " small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: " transcription factor B-recognition element" EXACT [] -synonym: "B-recognition element" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: Sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://proton.chem.yale.edu/pdf/7897662.pdf] -synonym: " sarcin/ricin RNA domain" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/Morpholinos/morpholinos.HTML] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001183 ! morpholino -relationship: has_quality SO:0001183 ! implied link automatically realized ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! implied link automatically realized ! genomic_DNA - -[Term] -id: SO:0000041 -name: operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene that is processed." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! implied link automatically realized ! pseudogene -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000900 ! processed -relationship: has_quality SO:0000900 ! implied link automatically realized ! processed - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -is_a: SO:0000336 ! implied link automatically realized ! pseudogene -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000901 ! unequally_crossed_over -relationship: has_quality SO:0000901 ! implied link automatically realized ! unequally_crossed_over - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: mutation_affecting_regulatory_region -def: "A kind of mutation that affects a regulatory region of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! implied link automatically realized ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! implied link automatically realized ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! implied link automatically realized ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! implied link automatically realized ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -is_a: SO:0000088 ! implied link automatically realized ! mt_gene -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence -relationship: has_origin SO:0000741 ! implied link automatically realized ! kinetoplast_sequence - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence -relationship: has_origin SO:0000740 ! implied link automatically realized ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! implied link automatically realized ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! implied link automatically realized ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! implied link automatically realized ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! implied link automatically realized ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! implied link automatically realized ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence -relationship: has_origin SO:0000748 ! implied link automatically realized ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! implied link automatically realized ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location -relationship: has_origin SO:0000749 ! implied link automatically realized ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location -relationship: has_origin SO:0000751 ! implied link automatically realized ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -is_a: SO:0000099 ! implied link automatically realized ! proviral_gene -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence -relationship: has_origin SO:0000903 ! implied link automatically realized ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: " transposon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.exactsciences.com/cic/glossary/_index.htm] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! implied link automatically realized ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! implied link automatically realized ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! implied link automatically realized ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced -relationship: has_quality SO:0000893 ! implied link automatically realized ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification -relationship: has_quality SO:0000894 ! implied link automatically realized ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! implied link automatically realized ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation -relationship: has_quality SO:0000895 ! implied link automatically realized ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level -relationship: has_quality SO:0000904 ! implied link automatically realized ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: Gene:. -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000154 -name: PAC -def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000155 -name: plasmid -def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0000695 ! reagent -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000045 ! delete - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000727 ! CRM -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -is_a: SO:0000165 ! implied link automatically realized ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor -relationship: has_quality SO:0000277 ! implied link automatically realized ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nThe region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT-box" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter -relationship: part_of SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -subset: SOFA -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -subset: SOFA -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon -relationship: part_of SO:0000655 ! ncRNA - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000049 ! translocate - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -synonym: "5' coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone the necessary modifications for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SO:ma, SO:rb] -comment: The definition was modified by Rama. This term is now basically the same as a CDS. This must be revised. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000240 -name: chromosome_variation -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000241 -name: internal_UTR -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "IRES" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid -relationship: has_quality SO:0000876 ! implied link automatically realized ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein -relationship: has_quality SO:0000875 ! implied link automatically realized ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000285 ! implied link automatically realized ! foreign_gene -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift -relationship: has_quality SO:0000866 ! implied link automatically realized ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -is_a: SO:0000111 ! implied link automatically realized ! transposable_element_gene -is_a: SO:0000281 ! implied link automatically realized ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion -relationship: has_quality SO:0000806 ! implied link automatically realized ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000287 ! implied link automatically realized ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: " ori" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "displacement loop" RELATED [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -is_a: SO:0000001 ! region - -[Term] -id: SO:0000299 -name: specific_recombination_site -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem-loop" EXACT [] -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "TSS" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! implied link automatically realized ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift -relationship: has_quality SO:0000868 ! implied link automatically realized ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "translation_end" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe -is_a: SO:0000324 ! tag -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift -relationship: has_quality SO:0000869 ! implied link automatically realized ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift -relationship: has_quality SO:0000867 ! implied link automatically realized ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000358 -name: protein -def: "An attribute describing a sequence composed of amino acid residues joined by peptide bonds." [SO:ke] -comment: Do not use this for feature annotation. Use polypeptide (SO:0000104) instead. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -is_a: SO:0000902 ! implied link automatically realized ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed -relationship: has_quality SO:0000359 ! implied link automatically realized ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000046 ! insert - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! implied link automatically realized ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000047 ! invert -relationship: associated_with SO:0000047 ! implied link automatically realized ! invert - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! implied link automatically realized ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! implied link automatically realized ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000401 -name: gene_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! implied link automatically realized ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! implied link automatically realized ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. Region which has been shown to recur throughout evolution." [EBIBS:GAR, http://www.molbiol.bbsrc.ac.uk/new_protein/domains.html] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" NARROW [] -synonym: "DNA_bind" NARROW [] -synonym: "domain" RELATED [] -synonym: "np_bind" NARROW [] -synonym: "polypeptide_domain" EXACT [] -synonym: "zn_fing" NARROW [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "signal peptide coding sequence" EXACT [] -synonym: "signal_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The extent of a polypeptide chain in the mature protein." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" EXACT [] -synonym: "mature peptide" RELATED [] -synonym: "mature_protein_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector -def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -is_a: SO:0000341 ! chromosome_band - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000871 ! polyadenylated_mRNA -relationship: associated_with SO:0000871 ! implied link automatically realized ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000865 ! frameshift -relationship: associated_with SO:0000865 ! implied link automatically realized ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged -relationship: has_quality SO:0000940 ! implied link automatically realized ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000559 ! D_cluster -relationship: part_of SO:0000560 ! D_J_cluster - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000479 ! trans_spliced_transcript -relationship: associated_with SO:0000479 ! implied link automatically realized ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000526 ! V_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000513 ! J_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000560 ! D_J_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000558 ! C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: has_quality SO:0000870 ! implied link automatically realized ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-NOMAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V_D_GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000873 ! edited_transcript -relationship: associated_with SO:0000873 ! implied link automatically realized ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'RS" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmb/LIGMlect?query=7] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or\nJ-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "HEPTAMER" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occured as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-RS" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster - -[Term] -id: SO:0000573 -name: rRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster - -[Term] -id: SO:0000575 -name: scRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! implied link automatically realized ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box C/D snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm] -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -is_a: SO:0000001 ! region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -is_a: SO:0000752 ! gene_group_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase II type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -is_a: SO:0000845 ! RNApol_III_promoter_type_1_region - -[Term] -id: SO:0000623 -name: snRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm] -subset: SOFA -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic processed transcript" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic processed transcript" RELATED [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! implied link automatically realized ! plasmid -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000742 ! maxicircle_sequence -relationship: has_origin SO:0000742 ! implied link automatically realized ! maxicircle_sequence - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed repeat" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! implied link automatically realized ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000046 ! insert - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non-canonical splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000685 -name: DNAaseI_hypersensitive_site -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000045 ! delete - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000078 ! polycistronic_transcript -relationship: associated_with SO:0000078 ! implied link automatically realized ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "Indicates when the initator methionine has been cleaved from the mature sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved_initiator_methionine" EXACT [] -synonym: "init_met" RELATED [] -synonym: "initator methionine" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -is_a: SO:0000690 ! implied link automatically realized ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: associated_with SO:0000079 ! dicistronic_transcript -relationship: associated_with SO:0000079 ! implied link automatically realized ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:1001261 ! recoded_mRNA -relationship: associated_with SO:1001261 ! implied link automatically realized ! recoded_mRNA - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000883 ! stop_codon_read_through -relationship: associated_with SO:0000883 ! implied link automatically realized ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000884 ! stop_codon_redefined_as_pyrrolysine -relationship: associated_with SO:0000884 ! implied link automatically realized ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nA gene may be considered as a unit of inheritance. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The site at which trans-splicing occurs." [SO:ke] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000885 ! stop_codon_redefined_as_selenocysteine -relationship: associated_with SO:0000885 ! implied link automatically realized ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000886 ! recoded_by_translational_bypass -relationship: associated_with SO:0000886 ! implied link automatically realized ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000887 ! translationally_frameshifted -relationship: associated_with SO:0000887 ! implied link automatically realized ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000634 ! implied link automatically realized ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb] -comment: Term requested by Rama from SGD. -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:1001197 ! dicistronic_primary_transcript -relationship: associated_with SO:1001197 ! implied link automatically realized ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:0000716 ! dicistronic_mRNA -relationship: associated_with SO:0000716 ! implied link automatically realized ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to comply with the feature table. -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000731 -name: fragment -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar -relationship: has_quality SO:0000864 ! implied link automatically realized ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000742 -name: maxicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000743 -name: apicoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000755 -name: plasmid_vector -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulck of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -is_a: SO:0000637 ! implied link automatically realized ! engineered_plasmid -is_a: SO:0000768 ! implied link automatically realized ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An atrribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000815 ! implied link automatically realized ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [SO:mc] -comment: Modified as requested by Lynn - FB. May 2007. -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic -relationship: derives_from SO:0000151 ! implied link automatically realized ! clone -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [SO:mc] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! implied link automatically realized ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [SO:mc] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [SO:mc] -is_a: SO:0000720 ! implied link automatically realized ! foreign_transposable_element -is_a: SO:0000798 ! implied link automatically realized ! engineered_transposable_element -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [fb:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered sequence" EXACT [] -is_a: SO:0000001 ! implied link automatically realized ! region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! implied link automatically realized ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -is_a: SO:0000324 ! implied link automatically realized ! tag -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated -relationship: has_quality SO:0000789 ! implied link automatically realized ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated -relationship: has_quality SO:0000790 ! implied link automatically realized ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA -relationship: has_quality SO:0000362 ! implied link automatically realized ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination -relationship: has_quality SO:0000414 ! implied link automatically realized ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA -relationship: has_quality SO:0000415 ! implied link automatically realized ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing -relationship: has_quality SO:0000416 ! implied link automatically realized ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL))\nand MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! implied link automatically realized ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type -relationship: has_quality SO:0000817 ! implied link automatically realized ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! implied link automatically realized ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! implied link automatically realized ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! implied link automatically realized ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! implied link automatically realized ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! implied link automatically realized ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence -relationship: has_origin SO:0000083 ! implied link automatically realized ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence -relationship: has_origin SO:0000084 ! implied link automatically realized ! micronuclear_sequence - -[Term] -id: SO:0000826 -name: kinetoplast_chromosome -def: "A chromosome with origin in the kinetoplast." [SO:xp] -is_a: SO:0000819 ! implied link automatically realized ! mitochondrial_chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence -relationship: has_origin SO:0000741 ! implied link automatically realized ! kinetoplast_sequence - -[Term] -id: SO:0000827 -name: maxicircle_chromosome -def: "A chromosome originating in a maxi-circle." [SO:xp] -is_a: SO:0000826 ! implied link automatically realized ! kinetoplast_chromosome -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000742 ! maxicircle_sequence -relationship: has_origin SO:0000742 ! implied link automatically realized ! maxicircle_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! implied link automatically realized ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! implied link automatically realized ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a protein." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" EXACT [] -synonym: "positional polypeptide feature" EXACT [] -synonym: "region or site annotation" EXACT [] -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -is_a: SO:0000001 ! region -relationship: part_of SO:0000657 ! repeat_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000847 -name: tmRNA_region -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologue" EXACT [] -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous -relationship: has_quality SO:0000857 ! implied link automatically realized ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogue" EXACT [] -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous -relationship: has_quality SO:0000859 ! implied link automatically realized ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologue" EXACT [] -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous -relationship: has_quality SO:0000858 ! implied link automatically realized ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occured after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! implied link automatically realized ! cap -relationship: has_quality SO:0000146 ! implied link automatically realized ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! implied link automatically realized ! cap -relationship: has_quality SO:0000146 ! implied link automatically realized ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated -relationship: adjacent_to SO:0000610 ! implied link automatically realized ! polyA_sequence -relationship: has_quality SO:0000246 ! implied link automatically realized ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000479 ! implied link automatically realized ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: adjacent_to SO:0000636 ! implied link automatically realized ! spliced_leader_RNA - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: associated_with SO:0000602 ! guide_RNA -intersection_of: associated_with SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited -relationship: associated_with SO:0000602 ! implied link automatically realized ! guide_RNA -relationship: associated_with SO:0000977 ! implied link automatically realized ! anchor_binding_site -relationship: has_quality SO:0000116 ! implied link automatically realized ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -is_a: SO:0000929 ! implied link automatically realized ! edited_mRNA -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000919 ! substitute_A_to_I -relationship: associated_with SO:0000919 ! implied link automatically realized ! substitute_A_to_I - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing am mRNA sequences that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000882 ! codon_redefined - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted -relationship: has_quality SO:0000135 ! implied link automatically realized ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted -relationship: has_quality SO:0000136 ! implied link automatically realized ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated -relationship: has_quality SO:0000130 ! implied link automatically realized ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated -relationship: has_quality SO:0000473 ! implied link automatically realized ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated -relationship: has_quality SO:0000475 ! implied link automatically realized ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An epigenetic process where a gene is innactivated at transcriptional or translational level." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An epigenetic process where a gene is innactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An epigenetic process where a gene is innactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated -relationship: has_quality SO:0000131 ! implied link automatically realized ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded -relationship: has_quality SO:0000137 ! implied link automatically realized ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! implied link automatically realized ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! implied link automatically realized ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! implied link automatically realized ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -is_a: SO:0000873 ! implied link automatically realized ! edited_transcript -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited -relationship: associated_with SO:0000916 ! implied link automatically realized ! edit_operation - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited -relationship: associated_with SO:0000916 ! implied link automatically realized ! edit_operation -relationship: has_quality SO:0000116 ! implied link automatically realized ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000301 ! vertebrate_immune_system_gene_recombination_feature -relationship: associated_with SO:0000301 ! implied link automatically realized ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! implied link automatically realized ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! implied link automatically realized ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA -relationship: has_quality SO:0000356 ! implied link automatically realized ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! implied link automatically realized ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! implied link automatically realized ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000968 -name: replication_mode -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0001060 ! sequence_variant - -[Term] -id: SO:0000969 -name: rolling_circle -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000970 -name: theta_replication -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000971 -name: DNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000972 -name: RNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "IS" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000974 -name: minicircle_chromosome -is_a: SO:0000826 ! implied link automatically realized ! kinetoplast_chromosome -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000980 ! minicircle_sequence -relationship: has_origin SO:0000980 ! implied link automatically realized ! minicircle_sequence - -[Term] -id: SO:0000975 -name: minicircle_gene -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000980 ! minicircle_sequence -relationship: has_origin SO:0000980 ! implied link automatically realized ! minicircle_sequence - -[Term] -id: SO:0000976 -name: cryptic -is_a: SO:0000116 ! edited - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -is_a: SO:0000914 ! implied link automatically realized ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC -relationship: derives_from SO:0000153 ! implied link automatically realized ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.\nIt should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -is_a: SO:0000001 ! implied link automatically realized ! region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! implied link automatically realized ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000994 ! implied link automatically realized ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! implied link automatically realized ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000842 ! implied link automatically realized ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragment -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragment - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as\npart of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -is_a: SO:0000001 ! implied link automatically realized ! region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity -relationship: has_quality SO:0001004 ! implied link automatically realized ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GO:jl] -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GO:jl] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001184 ! PNA -relationship: has_quality SO:0001184 ! implied link automatically realized ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000005 ! complex_substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: mutation_affecting_copy_number -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0001022 -name: inversion_breakpoint -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001027 ! genotype - -[Term] -id: SO:0001029 -name: direction_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -is_a: SO:0000737 ! implied link automatically realized ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -is_a: SO:0000745 ! implied link automatically realized ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A debranched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -synonym: "MGE" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A pseudogene that is nuclear/mitochondrial." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! implied link automatically realized ! pseudogene -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000899 ! nuclear_mitochondrial -relationship: has_quality SO:0000899 ! implied link automatically realized ! nuclear_mitochondrial - -[Term] -id: SO:0001045 -name: cointegrated_replicon -def: "A MGE region consisting of two fused replicons/plasmids resulting from a replicative transposition event." [Phigo:at] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragment -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragment - -[Term] -id: SO:0001051 -name: nested_region -is_a: SO:0000001 ! region - -[Term] -id: SO:0001052 -name: nested_repeat -is_a: SO:0001051 ! implied link automatically realized ! nested_region -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000657 ! repeat_region -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001050 ! repeat_fragment -relationship: derives_from SO:0000657 ! implied link automatically realized ! repeat_region -relationship: has_part SO:0001050 ! implied link automatically realized ! repeat_fragment - -[Term] -id: SO:0001053 -name: nested_transposon -is_a: SO:0001051 ! implied link automatically realized ! nested_region -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000101 ! transposable_element -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001054 ! transposon_fragment -relationship: derives_from SO:0000101 ! implied link automatically realized ! transposable_element -relationship: has_part SO:0001054 ! implied link automatically realized ! transposon_fragment - -[Term] -id: SO:0001054 -name: transposon_fragment -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragment -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragment - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_a: SO:0000165 ! enhancer - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide_cleavage_site" EXACT [] -is_a: SO:0001063 ! immature_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Describes part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [] -synonym: "propeptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule. Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "active_peptide" EXACT [] -synonym: "peptide" EXACT [] -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region -alt_id: BS:00068 -def: "Extent of a compositionally biased region. Used for homopolymeric stretches of residues and also for regions which are rich in a particular amino acid. Not used for a run of less than 4 residues." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" EXACT [] -synonym: "compositional bias" EXACT [] -synonym: "compositionally biased" EXACT [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" RELATED [] -synonym: "polypeptide_motif" EXACT [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide_repeat" EXACT [] -synonym: "repeat" RELATED [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001069 -name: polypeptide_structural_domain -alt_id: BS:00134 -def: "A polypeptide domain is a structural domain that is self-stabilizing and folds independently of the rest of the protein chain." [EBIBS:GAR, PMID:7020376] -subset: biosapiens -synonym: "polypeptide_structural_domain" EXACT [] -synonym: "structural domain" EXACT [] -is_a: SO:0000417 ! polypeptide_domain -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001070 -name: structural_region -alt_id: BS:00337 -def: "Backbone conformation of the polypeptide." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "structural_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001072 -name: extramembrane_region -alt_id: BS:00154 -def: "Extent of the region not transversing the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "extramembrane" EXACT [] -synonym: "topo_dom" EXACT [] -is_a: SO:0001070 ! structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_region -alt_id: BS:00145 -def: "Region of the peptide in the cytoplasm." [EBIBS:GAR] -subset: biosapiens -synonym: "cytoplasm_location" EXACT [] -synonym: "inside" RELATED [] -is_a: SO:0001072 ! extramembrane_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_region -alt_id: BS:00144 -def: "Region of peptide not in the cytoplasm. N.B. This could be inside an organelle within the cell." [EBIBS:GAR] -subset: biosapiens -synonym: "non_cytoplasm_location" EXACT [] -synonym: "outside" RELATED [] -is_a: SO:0001072 ! extramembrane_region - -[Term] -id: SO:0001075 -name: intramembrane_region -alt_id: BS:00156 -def: "Extent of the region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" EXACT [] -is_a: SO:0001070 ! structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_loop -alt_id: BS:00155 -def: "Extent of region which enters the membrane bilayer but emerges on the same side which it entered." [EBIBS:GAR] -subset: biosapiens -synonym: "membrane_loop" EXACT [] -is_a: SO:0001075 ! intramembrane_region - -[Term] -id: SO:0001077 -name: transmembrane_region -alt_id: BS:00158 -def: "Extent of region transversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED [] -synonym: "transmembrane" EXACT [] -is_a: SO:0001075 ! intramembrane_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" EXACT [] -synonym: "secondary structure" EXACT [] -synonym: "secondary structure region" RELATED [] -synonym: "secondary_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "structural_motif" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED [] -synonym: "coiled_coil" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "DNA binding motif" RELATED [] -synonym: "helix_turn_helix" EXACT [] -synonym: "HTH" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_adjacent_residues" EXACT [] -synonym: "non_cons" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [] -synonym: "non_terminal_residue" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [] -synonym: "sequence_conflict" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "sequence_uncertainty" EXACT [] -synonym: "unsure" RELATED [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAM, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" RELATED [] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -is_a: SO:0100001 ! biochemical_region - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal" RELATED [] -synonym: "metal_binding" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein_protein_interaction" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: Ca_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" EXACT [] -synonym: "Ca_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001095 -name: Co_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001096 -name: Cu_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001097 -name: Fe_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001098 -name: Mg_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001099 -name: Mn_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001100 -name: Mo_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001101 -name: Ni_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001102 -name: W_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001103 -name: Zn_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [] -synonym: "active site residue" EXACT [] -synonym: "site" BROAD [] -is_a: SO:0100001 ! biochemical_region - -[Term] -id: SO:0001105 -name: protein_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "protein-ligand interaction" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "asx_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "beta_strand" EXACT [] -synonym: "strand" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001116 -name: right_handed_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" EXACT [] -synonym: "alpha_helix" EXACT [] -synonym: "helix" RELATED [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001120 -name: nest -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" EXACT [] -synonym: "nest_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: nest_left_right -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001122 -name: nest_right_left -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: st_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: st_staple -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: turn -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "turn" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_il" RELATED [] -synonym: "asx_turn_left_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iil" EXACT [] -synonym: "asx_turn_left_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iir" EXACT [] -synonym: "asx_turn_right_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_ir" EXACT [] -synonym: "asx_turn_type_right_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_il" EXACT [] -synonym: "beta_turn_left_handed_type_one" EXACT [] -synonym: "Type I' beta turn" EXACT [] -synonym: "Type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iil" EXACT [] -synonym: "beta_turn_left_handed_type_two" EXACT [] -synonym: "Type II' beta turn" EXACT [] -synonym: "Type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_ir" EXACT [] -synonym: "beta_turn_right_handed_type_one" EXACT [] -synonym: "Type I beta turn" EXACT [] -synonym: "Type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iir" EXACT [] -synonym: "beta_turn_right_handed_type_two" EXACT [] -synonym: "Type II beta turn" EXACT [] -synonym: "Type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma_turn_classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn_inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: st_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_il" EXACT [] -synonym: "st_turn_left_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iil" EXACT [] -synonym: "st_turn_left_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_ir" EXACT [] -synonym: "st_turn_right_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iir" EXACT [] -synonym: "st_turn_right_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "variant" RELATED [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" RELATED [] -synonym: "mutagenesis" RELATED [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" RELATED [] -synonym: "var_seq" RELATED [] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -subset: biosapiens -synonym: "Type VI beta turn" EXACT [] -synonym: "Type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -subset: biosapiens -synonym: "Type VI a beta turn" EXACT [] -synonym: "Type VI a turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "Type VI a1 beta turn" EXACT [] -synonym: "Type VI a1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "Type VI a2 beta turn" EXACT [] -synonym: "Type VI a2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -subset: biosapiens -synonym: "Type VI b beta turn" EXACT [] -synonym: "Type VI b turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -subset: biosapiens -synonym: "Type VIII beta turn" EXACT [] -synonym: "Type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME\nalgorithm within core promoter sequences from -60 to +40, with an E value\nof 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU-rich element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "locked nucleic acid" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001188 ! LNA -relationship: has_quality SO:0001188 ! implied link automatically realized ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001190 ! TNA -relationship: has_quality SO:0001190 ! implied link automatically realized ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001192 ! GNA -relationship: has_quality SO:0001192 ! implied link automatically realized ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA -relationship: has_quality SO:0001194 ! implied link automatically realized ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA -relationship: has_quality SO:0001196 ! implied link automatically realized ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin\ntranscription." [xenbase:jb] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "Aregion (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more\nlikely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding -relationship: has_quality SO:0000010 ! implied link automatically realized ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -is_a: SO:0000667 ! implied link automatically realized ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed -relationship: has_quality SO:0000569 ! implied link automatically realized ! retrotransposed - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://tbase.jax.org/docs/glossary.html] -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic -relationship: has_quality SO:0000860 ! implied link automatically realized ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region -def: "A region that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -is_a: SO:0100001 ! biochemical_region - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "Prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -is_a: SO:0000703 ! experimental_result_region -isa: SO:0000703 - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000048 ! substitute - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000047 ! invert - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: mutation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000050 -name: mutation_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000052 -name: mutation_affecting_complex_change_in_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000054 -name: mutation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: mutation_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: mutation_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: mutation_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: mutation_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "non-synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: mutation_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000058 ! mutation_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: mutation_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: mutation_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: mutation_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: mutation_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: mutation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "out of frame mutation" RELATED [] -is_a: SO:1000064 ! mutation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000067 -name: minus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000068 -name: plus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000069 -name: minus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000070 -name: mutation_affecting_transcript_processing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: mutation_affecting_splicing -def: "A mutation that affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000072 -name: splice_donor_mutation -def: "A mutation that affects the splice donor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000073 -name: splice_acceptor_mutation -def: "A mutation that affects the splice acceptor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000074 -name: cryptic_splice_activator_mutation -def: "A kind of mutation that creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000075 -name: mutation_affecting_editing -def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000070 ! mutation_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: mutation_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000078 -name: mutation_decreasing_rate_of_transcription -def: "A mutation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: mutation_affecting_transcript_sequence -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000080 -name: mutation_increasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: mutation_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000076 ! mutation_affecting_transcription - -[Term] -id: SO:1000082 -name: mutation_affecting_transcript_stability -def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: mutation_increasing_transcript_stability -def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: mutation_decreasing_transcript_stability -def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: mutation_affecting_level_of_transcript -def: "A mutation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000086 -name: mutation_decreasing_level_of_transcript -def: "A mutation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: mutation_increasing_level_of_transcript -def: "A mutation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: mutation_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000089 -name: mutation_causing_no_change_of_translational_product -def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000090 -name: mutation_causing_uncharacterised_change_of_translational_product -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000091 -name: mutation_causing_partially_characterised_change_of_translational_product -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000090 ! mutation_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: mutation_causing_complex_change_of_translational_product -def: "Any mutation effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000093 -name: mutation_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: mutation_causing_conservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: mutation_causing_nonconservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: mutation_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: mutation_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: mutation_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: mutation_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: mutation_affecting_level_of_translational_product -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000103 -name: mutation_decreasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: mutation_increasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: mutation_affecting_polypeptide_amino_acid_sequence -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_mutation -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000111 -name: mutation_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000112 -name: mutation_causing_no_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: mutation_causing_uncharacterised_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: mutation_causing_partially_characterised_3D_structural_change -is_a: SO:1000113 ! mutation_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: mutation_causing_complex_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: mutation_causing_conformational_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: mutation_affecting_polypeptide_function -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000118 -name: mutation_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: mutation_causing_inactive_ligand_binding_site -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: mutation_causing_inactive_catalytic_site -is_a: SO:1000119 ! mutation_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: mutation_causing_polypeptide_localization_change -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: mutation_causing_polypeptide_post_translational_processing_change -synonym: "polypeptide post-translational processing affected" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: mutation_causing_partial_loss_of_function_of_polypeptide -synonym: "partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: mutation_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: mutation_affecting_transcript_secondary_structure -def: "A mutation that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: mutation_causing_compensatory_transcript_secondary_structure_mutation -is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: mutation -def: "An event that changes nucleotide sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001060 ! sequence_variant - -[Term] -id: SO:1000134 -name: mutation_causing_polypeptide_fusion -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: mutation_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000179 -name: mutation_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000177 ! mutation_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: mutation_affecting_gene_structure -def: "A kind of mutation that affects the structure of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000181 -name: mutation_causing_gene_fusion -def: "A kind of mutation that affects the structure of a gene by causing a fusion to another gene." [SO:ke] -is_a: SO:1000180 ! mutation_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: mutation_causes_exon_loss -def: "A mutation that affects splicing and causes an exon loss." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000185 -name: mutation_causes_intron_gain -def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000186 -name: cryptic_splice_donor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001186 -name: cryptic_splice_acceptor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced -relationship: has_quality SO:0000877 ! implied link automatically realized ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -is_a: SO:0000654 ! implied link automatically realized ! maxicircle_gene -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic -relationship: has_quality SO:0000976 ! implied link automatically realized ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000631 ! implied link automatically realized ! polycistronic_primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known -relationship: has_quality SO:0000906 ! implied link automatically realized ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan -relationship: has_origin SO:0000910 ! implied link automatically realized ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match -relationship: has_quality SO:0000908 ! implied link automatically realized ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity -relationship: has_quality SO:0000907 ! implied link automatically realized ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! implied link automatically realized ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA -relationship: has_quality SO:0000909 ! implied link automatically realized ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "A gene coding an mRNA which is recoded before translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! implied link automatically realized ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A mRNA is translated by ribosomes that suspend translation at a particular codon and resume translation at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! implied link automatically realized ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A gene whose mRNA is recoded by an alteration of codon meaning." [SO:ma] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined -relationship: has_quality SO:0000882 ! implied link automatically realized ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0000001 ! region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SO/so_2_4.obo b/annotation/NBIS/Ontology/SO/so_2_4.obo deleted file mode 100644 index 312b6b999..000000000 --- a/annotation/NBIS/Ontology/SO/so_2_4.obo +++ /dev/null @@ -1,14931 +0,0 @@ -format-version: 1.2 -date: 01:10:2009 12:37 -auto-generated-by: OBO-Edit 2.000-beta41 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000699 ! implied link automatically realized ! junction - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://proton.chem.yale.edu/pdf/7897662.pdf] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino -relationship: has_quality SO:0001183 ! implied link automatically realized ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! implied link automatically realized ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A region of a molecule that binds to a restriction enzyme." [SO:cb] -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme binding site" EXACT [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! implied link automatically realized ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! implied link automatically realized ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! implied link automatically realized ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! implied link automatically realized ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -is_a: SO:0000088 ! implied link automatically realized ! mt_gene -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast -relationship: has_origin SO:0000741 ! implied link automatically realized ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence -relationship: has_origin SO:0000740 ! implied link automatically realized ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! implied link automatically realized ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! implied link automatically realized ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! implied link automatically realized ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! implied link automatically realized ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! implied link automatically realized ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence -relationship: has_origin SO:0000748 ! implied link automatically realized ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! implied link automatically realized ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location -relationship: has_origin SO:0000749 ! implied link automatically realized ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location -relationship: has_origin SO:0000751 ! implied link automatically realized ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -is_a: SO:0000099 ! implied link automatically realized ! proviral_gene -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence -relationship: has_origin SO:0000903 ! implied link automatically realized ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequenece_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! implied link automatically realized ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! implied link automatically realized ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! implied link automatically realized ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward -relationship: has_quality SO:0001030 ! implied link automatically realized ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced -relationship: has_quality SO:0000893 ! implied link automatically realized ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification -relationship: has_quality SO:0000894 ! implied link automatically realized ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! implied link automatically realized ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation -relationship: has_quality SO:0000895 ! implied link automatically realized ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse -relationship: has_quality SO:0001031 ! implied link automatically realized ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level -relationship: has_quality SO:0000904 ! implied link automatically realized ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinary. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a placeholder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! implied link automatically realized ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor -relationship: has_quality SO:0000277 ! implied link automatically realized ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon'is inclusive of the stop_codon)." [SO:ke] -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid -relationship: has_quality SO:0000876 ! implied link automatically realized ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein -relationship: has_quality SO:0000875 ! implied link automatically realized ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000285 ! implied link automatically realized ! foreign_gene -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift -relationship: has_quality SO:0000866 ! implied link automatically realized ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! implied link automatically realized ! transposable_element_gene -is_a: SO:0000281 ! implied link automatically realized ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion -relationship: has_quality SO:0000806 ! implied link automatically realized ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000287 ! implied link automatically realized ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! implied link automatically realized ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift -relationship: has_quality SO:0000868 ! implied link automatically realized ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift -relationship: has_quality SO:0000869 ! implied link automatically realized ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift -relationship: has_quality SO:0000867 ! implied link automatically realized ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a paricular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! implied link automatically realized ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed -relationship: has_quality SO:0000359 ! implied link automatically realized ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! implied link automatically realized ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion -relationship: has_quality SO:1000036 ! implied link automatically realized ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! implied link automatically realized ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! implied link automatically realized ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! implied link automatically realized ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! implied link automatically realized ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA -relationship: transcribed_to SO:0000871 ! implied link automatically realized ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! implied link automatically realized ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged -relationship: has_quality SO:0000940 ! implied link automatically realized ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript -relationship: transcribed_to SO:0000479 ! implied link automatically realized ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: has_quality SO:0000870 ! implied link automatically realized ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript -relationship: transcribed_to SO:0000873 ! implied link automatically realized ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmb/LIGMlect?query=7] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occured as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! implied link automatically realized ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! implied link automatically realized ! plasmid -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle -relationship: part_of SO:0000742 ! implied link automatically realized ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! implied link automatically realized ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -is_a: SO:0001037 ! implied link automatically realized ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript -relationship: transcribed_to SO:0000078 ! implied link automatically realized ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! implied link automatically realized ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript -relationship: transcribed_to SO:0000079 ! implied link automatically realized ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! implied link automatically realized ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through -relationship: has_part SO:0000883 ! implied link automatically realized ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine -relationship: has_part SO:0000884 ! implied link automatically realized ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine -relationship: has_part SO:0000885 ! implied link automatically realized ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! implied link automatically realized ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! implied link automatically realized ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000634 ! implied link automatically realized ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript -relationship: transcribed_to SO:1001197 ! implied link automatically realized ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA -relationship: transcribed_to SO:0000716 ! implied link automatically realized ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! implied link automatically realized ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar -relationship: has_quality SO:0000864 ! implied link automatically realized ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! implied link automatically realized ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! implied link automatically realized ! engineered_plasmid -is_a: SO:0000768 ! implied link automatically realized ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! implied link automatically realized ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! implied link automatically realized ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic -relationship: derives_from SO:0000151 ! implied link automatically realized ! clone -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0001476 ! implied link automatically realized ! natural_plasmid -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! implied link automatically realized ! foreign_transposable_element -is_a: SO:0000798 ! implied link automatically realized ! engineered_transposable_element -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! implied link automatically realized ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! implied link automatically realized ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! implied link automatically realized ! tag -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated -relationship: has_quality SO:0000789 ! implied link automatically realized ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated -relationship: has_quality SO:0000790 ! implied link automatically realized ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA -relationship: has_quality SO:0000362 ! implied link automatically realized ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination -relationship: has_quality SO:0000414 ! implied link automatically realized ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA -relationship: has_quality SO:0000415 ! implied link automatically realized ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing -relationship: has_quality SO:0000416 ! implied link automatically realized ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! implied link automatically realized ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type -relationship: has_quality SO:0000817 ! implied link automatically realized ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! implied link automatically realized ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! implied link automatically realized ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! implied link automatically realized ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! implied link automatically realized ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! implied link automatically realized ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence -relationship: has_origin SO:0000083 ! implied link automatically realized ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence -relationship: has_origin SO:0000084 ! implied link automatically realized ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! implied link automatically realized ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! implied link automatically realized ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous -relationship: has_quality SO:0000857 ! implied link automatically realized ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous -relationship: has_quality SO:0000859 ! implied link automatically realized ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous -relationship: has_quality SO:0000858 ! implied link automatically realized ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! implied link automatically realized ! cap -relationship: has_quality SO:0000146 ! implied link automatically realized ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! implied link automatically realized ! cap -relationship: has_quality SO:0000146 ! implied link automatically realized ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated -relationship: adjacent_to SO:0000610 ! implied link automatically realized ! polyA_sequence -relationship: has_quality SO:0000246 ! implied link automatically realized ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000479 ! implied link automatically realized ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: adjacent_to SO:0000636 ! implied link automatically realized ! spliced_leader_RNA - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited -relationship: guided_by SO:0000602 ! implied link automatically realized ! guide_RNA -relationship: has_part SO:0000977 ! implied link automatically realized ! anchor_binding_site -relationship: has_quality SO:0000116 ! implied link automatically realized ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript -is_a: SO:0000929 ! implied link automatically realized ! edited_mRNA - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted -relationship: has_quality SO:0000135 ! implied link automatically realized ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted -relationship: has_quality SO:0000136 ! implied link automatically realized ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated -relationship: has_quality SO:0000130 ! implied link automatically realized ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated -relationship: has_quality SO:0000473 ! implied link automatically realized ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated -relationship: has_quality SO:0000475 ! implied link automatically realized ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated -relationship: has_quality SO:0000131 ! implied link automatically realized ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded -relationship: has_quality SO:0000137 ! implied link automatically realized ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! implied link automatically realized ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! implied link automatically realized ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! implied link automatically realized ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000873 ! implied link automatically realized ! edited_transcript -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! implied link automatically realized ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! implied link automatically realized ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! implied link automatically realized ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA -relationship: has_quality SO:0000356 ! implied link automatically realized ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! implied link automatically realized ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! implied link automatically realized ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! implied link automatically realized ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! implied link automatically realized ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle -relationship: part_of SO:0000980 ! implied link automatically realized ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! implied link automatically realized ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC -relationship: derives_from SO:0000153 ! implied link automatically realized ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! implied link automatically realized ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000994 ! implied link automatically realized ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! implied link automatically realized ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! implied link automatically realized ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity -relationship: has_quality SO:0001004 ! implied link automatically realized ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w dbxref -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA -relationship: has_quality SO:0001184 ! implied link automatically realized ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! implied link automatically realized ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000005 ! complex_substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001027 ! genotype - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! implied link automatically realized ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! implied link automatically realized ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! implied link automatically realized ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A debranched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! implied link automatically realized ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! implied link automatically realized ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! implied link automatically realized ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] -disjoint_from: SO:0000240 ! implied link automatically realized ! chromosome_variation -disjoint_from: SO:0000400 ! implied link automatically realized ! sequence_attribute - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA -relationship: has_quality SO:0001188 ! implied link automatically realized ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA -relationship: has_quality SO:0001190 ! implied link automatically realized ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA -relationship: has_quality SO:0001192 ! implied link automatically realized ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA -relationship: has_quality SO:0001194 ! implied link automatically realized ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA -relationship: has_quality SO:0001196 ! implied link automatically realized ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with astop codon will be partially coding and partially non coding. -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding -relationship: has_quality SO:0000010 ! implied link automatically realized ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! implied link automatically realized ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! implied link automatically realized ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed -relationship: has_quality SO:0000569 ! implied link automatically realized ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference -relationship: has_quality SO:0001220 ! implied link automatically realized ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification -relationship: has_quality SO:0001221 ! implied link automatically realized ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation -relationship: has_quality SO:0001222 ! implied link automatically realized ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation -relationship: has_quality SO:0001223 ! implied link automatically realized ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000001 ! region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! implied link automatically realized ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by nicole, tracker id 1911479. It is required to gather evidense together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding -relationship: has_quality SO:0000979 ! implied link automatically realized ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding -relationship: has_quality SO:0000571 ! implied link automatically realized ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding -relationship: has_quality SO:0000575 ! implied link automatically realized ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding -relationship: has_quality SO:0000578 ! implied link automatically realized ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! implied link automatically realized ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding -relationship: has_quality SO:0000642 ! implied link automatically realized ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding -relationship: has_quality SO:0000656 ! implied link automatically realized ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding -relationship: has_quality SO:0000659 ! implied link automatically realized ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding -relationship: has_quality SO:0000663 ! implied link automatically realized ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! implied link automatically realized ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic -relationship: has_quality SO:0000976 ! implied link automatically realized ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an innactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: argenine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! implied link automatically realized ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -synonym: "U14 snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -synonym: "methylation guide snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -synonym: "rRNA cleavage RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "exon of single exon gene" EXACT [] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -synonym: "cassette array member" EXACT [] -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -synonym: "gene cassette member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -synonym: "gene subarray member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "primer binding site" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_binding_site "wiki" -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -synonym: "gene array" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -synonym: "gene subarray" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -synonym: "gene cassette" EXACT [] -xref: http://en.wikipedia.org/wiki/Gene_cassette "wiki" -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -synonym: "gene cassette array" EXACT [] -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "selenocysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl tRNA" EXACT [] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://www.informatics.jax.org/silverbook/glossary.shtml] -synonym: "syntenic region" EXACT [] -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic -relationship: has_quality SO:0000860 ! implied link automatically realized ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region_of_peptide -def: "A region of a peptide that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "biochemical motif" EXACT [] -synonym: "biochemical region of peptide" EXACT [] -synonym: "biochemical_region" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "molecular contact region" RELATED [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_polypeptide_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" RELATED BS [] -synonym: "intrinsically unstructured polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "alpha beta motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A peptide that acts as a signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "lipoprotein signal peptide" EXACT [] -synonym: "prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -synonym: "no output" EXACT BS [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100012 -name: peptide_coil -def: "Irregular, unstructured regions of a protein's backbone, as distinct from the regular region (namely alpha helix and beta strand - characterised by specific patterns of main-chain hydrogen bonds)." [EBIBS:GAR] -subset: biosapiens -synonym: "coil" RELATED BS [] -synonym: "peptide coil" EXACT [] -synonym: "random coil" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100013 -name: hydrophobic_region_of_peptide -def: "Hydrophobic regions are regions with a low affinity for water." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "hydropathic" RELATED [] -synonym: "hydrophobic region of peptide" RELATED [] -synonym: "hydrophobic_region" EXACT [] -synonym: "hydrophobicity" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100014 -name: n_terminal_region -def: "The amino-terminal positively-charged region of a signal peptide (approx 1-5 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "N-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100015 -name: c_terminal_region -def: "The more polar, carboxy-terminal region of the signal peptide (approx 3-7 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "C-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100016 -name: central_hydrophobic_region_of_signal_peptide -def: "The central, hydrophobic region of the signal peptide (approx 7-15 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "central hydrophobic region of signal peptide" EXACT [] -synonym: "central_hydrophobic_region" RELATED [] -synonym: "H-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100017 -name: polypeptide_conserved_motif -def: "A conserved motif is a short (up to 20 amino acids) region of biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "motif" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100018 -name: polypeptide_binding_motif -def: "A polypeptide binding motif is a short (up to 20 amino acids) polypeptide region of biological interest that contains one or more amino acids experimentally shown to bind to a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "binding" RELATED [uniprot:feature_type] -synonym: "polypeptide binding motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100019 -name: polypeptide_catalytic_motif -def: "A polypeptide catalytic motif is a short (up to 20 amino acids) polypeptide region that contains one or more active site residues." [EBIBS:GAR] -subset: biosapiens -synonym: "catalytic_motif" RELATED [] -synonym: "polypeptide catalytic motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100020 -name: polypeptide_DNA_contact -def: "Residues involved in interactions with DNA." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide DNA contact" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0100021 -name: polypeptide_conserved_region -def: "A subsection of sequence with biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide conserved region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "partially characterised change in DNA sequence" EXACT [] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "uncharacterised change in nucleotide sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -synonym: "pyrimidine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -synonym: "C to T transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "C to T transition at pCpG site" EXACT [] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -synonym: "T to C transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -synonym: "purine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -synonym: "A to G transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -synonym: "G to A transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Transversion "wiki" -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -synonym: "pyrimidine to purine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -synonym: "C to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -synonym: "C to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -synonym: "T to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -synonym: "T to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -synonym: "purine to pyrimidine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -synonym: "A to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -synonym: "A to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -synonym: "G to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -synonym: "G to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -synonym: "intrachromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -synonym: "chromosomal deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_deletion "wiki" -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -synonym: "chromosomal inversion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_inversion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -synonym: "interchromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Indel "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide insertion" EXACT [] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide duplication" EXACT [] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -synonym: "chromosomal duplication" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_duplication "wiki" -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -synonym: "intrachromosomal duplication" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -synonym: "direct tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -synonym: "inverted tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "intrachromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -synonym: "compound chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -synonym: "Robertsonian fusion" EXACT [] -xref: http://en.wikipedia.org/wiki/Robertsonian_fusion "wiki" -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -synonym: "chromosomal translocation" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_translocation "wiki" -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -synonym: "ring chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Ring_chromosome "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -synonym: "pericentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -synonym: "paracentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -synonym: "reciprocal chromosomal translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: sequence_variation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript" EXACT [] -synonym: "sequence variation affecting transcript" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000050 -name: sequence_variant_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change in transcript" RELATED [] -synonym: "sequence variant causing no change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000052 -name: sequence_variation_affecting_complex_change_in_transcript -synonym: "mutation affecting complex change in transcript" EXACT [] -synonym: "sequence variation affecting complex change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000054 -name: sequence_variation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting coding sequence" EXACT [] -synonym: "sequence variation affecting coding sequence" RELATED [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: sequence_variant_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing initiator codon change in transcript" RELATED [] -synonym: "sequence variant causing initiator codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: sequence_variant_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutaton causing amino acid coding codon change in transcript" RELATED [] -synonym: "sequence variant causing amino acid coding codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: sequence_variant_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing synonymous codon change in transcript" RELATED [] -synonym: "sequence variant causing synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: sequence_variant_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing non synonymous codon change in transcript" RELATED [] -synonym: "non-synonymous codon change in transcript" EXACT [] -synonym: "sequence variant causing non synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: sequence_variant_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing missense codon change in transcript" RELATED [] -synonym: "sequence variant causing missense codon change in transcript" EXACT [] -is_a: SO:1000058 ! sequence_variant_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: sequence_variant_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing conservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing conservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: sequence_variant_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing nonconservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonconservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: sequence_variant_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing nonsense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonsense codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: sequence_variant_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -synonym: "mutation causing terminator codon change in transcript" RELATED [] -synonym: "sequence variant causing terminator codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: sequence_variation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a sequence variation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting reading frame" EXACT [] -synonym: "sequence variation affecting reading frame" RELATED [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_sequence_variation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "frameshift mutation" EXACT [] -synonym: "frameshift sequence variation" RELATED [] -synonym: "out of frame mutation" RELATED [] -xref: http://en.wikipedia.org/wiki/Frameshift_mutation "wiki" -is_a: SO:1000064 ! sequence_variation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: sequence_variant_causing_plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -synonym: "plus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 1 frameshift mutation" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000067 -name: sequence_variant_causing_minus_1_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -synonym: "minus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 1 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000068 -name: sequence_variant_causing_plus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -synonym: "plus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000069 -name: sequence_variant_causing_minus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -synonym: "minus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000070 -name: sequence_variant_affecting_transcript_processing -def: "Sequence variant affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript processing" RELATED [] -synonym: "sequence variant affecting transcript processing" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: sequence_variant_affecting_splicing -def: "A sequence_variant_effect where the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences is changed." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splicing" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000072 -name: sequence_variant_affecting_splice_donor -def: "A sequence_variant_effect that changes the splice donor sequence." [SO:ke] -synonym: "mutation affecting splice donor" RELATED [] -synonym: "sequence variant affecting splice donor" RELATED [] -synonym: "splice donor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000073 -name: sequence_variant_affecting_splice_acceptor -def: "A sequence_variant_effect that changes the splice acceptor sequence." [SO:ke] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splice acceptor" RELATED [] -synonym: "splice acceptor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000074 -name: sequence_variant_causing_cryptic_splice_activation -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: A cryptic splice site is only used when the natural splice site has been disrupted by a sequence alteration. -synonym: "cryptic splice activator sequence variant" EXACT [] -synonym: "mutation causing cryptic splice activator" RELATED [] -synonym: "sequence variant causing cryptic splice activator" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000075 -name: sequence_variant_affecting_editing -def: "Sequence variant affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting editing" RELATED [] -synonym: "sequence variant affecting editing" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: sequence_variant_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcription" RELATED [] -synonym: "sequence variant affecting transcription" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000078 -name: sequence_variant_decreasing_rate_of_transcription -def: "A sequence variation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation decreasing rate of transcription" RELATED [] -synonym: "sequence variation decreasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: sequence_variation_affecting_transcript_sequence -synonym: "mutation affecting transcript sequence" EXACT [] -synonym: "sequence variation affecting transcript sequence" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000080 -name: sequence_variant_increasing_rate_of_transcription -synonym: "mutation increasing rate of transcription" RELATED [] -synonym: "sequence variation increasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: sequence_variant_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation affecting rate of transcription" RELATED [] -synonym: "sequence variant affecting rate of transcription" EXACT [] -is_a: SO:1000076 ! sequence_variant_affecting_transcription - -[Term] -id: SO:1000082 -name: sequence variant_affecting_transcript_stability -def: "Sequence variant affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript stability" RELATED [] -synonym: "sequence variant affecting transcript stability" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: sequence_variant_increasing_transcript_stability -def: "Sequence variant increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation increasing transcript stability" RELATED [] -synonym: "sequence variant increasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: sequence_variant_decreasing_transcript_stability -def: "Sequence variant decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation decreasing transcript stability" RELATED [] -synonym: "sequence variant decreasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: sequence_variation_affecting_level_of_transcript -def: "A sequence variation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation affecting level of transcript" RELATED [] -synonym: "sequence variation affecting level of transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000086 -name: sequence_variation_decreasing_level_of_transcript -def: "A sequence variation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation decreasing level of transcript" EXACT [] -synonym: "sequence variation decreasing level of transcript" RELATED [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: sequence_variation_increasing_level_of_transcript -def: "A sequence_variation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation increasing level of transcript" EXACT [] -synonym: "sequence variation increasing level of transcript" EXACT [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: sequence_variant_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting translational product" RELATED [] -synonym: "sequence variant affecting translational product" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000089 -name: sequence_variant_causing_no_change_of_translational_product -def: "The sequence variant at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change of translational product" RELATED [] -synonym: "sequence variant causing no change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000090 -name: sequence_variant_causing_uncharacterised_change_of_translational_product -def: "A sequence variant causing an uncharacterized change of translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change of translational product" RELATED [] -synonym: "sequence variant causing uncharacterised change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000091 -name: sequence_variant_causing_partially_characterised_change_of_translational_product -def: "A sequence variant causing a partially uncharacterised change in translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The nature of the mutation event is only partially characterised. -synonym: "mutation causing partially characterised change of translational product" RELATED [] -synonym: "sequence variant causing partially characterised change of translational product" EXACT [] -is_a: SO:1000090 ! sequence_variant_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: sequence_variant_causing_complex_change_of_translational_product -def: "Any sequence variant effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing complex change of translational product" RELATED [] -synonym: "sequence variant causing complex change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000093 -name: sequence_variant_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid substitution" RELATED [] -synonym: "sequence variant causing amino acid substitution" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: sequence_variant_causing_conservative_amino_acid_substitution -synonym: "mutation causing conservative amino acid substitution" RELATED [] -synonym: "sequence variant causing conservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: sequence_variant_causing_nonconservative_amino_acid_substitution -synonym: "mutation causing nonconservative amino acid substitution" RELATED [] -synonym: "sequence variant causing nonconservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: sequence_variant_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid insertion" RELATED [] -synonym: "sequence variant causing amino acid insertion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: sequence_variant_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid deletion" RELATED [] -synonym: "sequence variant causing amino acid deletion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: sequence_variant_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide truncation" RELATED [] -synonym: "sequence variant causing polypeptide truncation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: sequence_variant_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide elongation" RELATED [] -synonym: "sequence variant causing polypeptide elongation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide N terminal elongation" EXACT [] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide C terminal elongation" EXACT [] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: sequence_variant_affecting_level_of_translational_product -synonym: "mutation affecting level of translational product" RELATED [] -synonym: "sequence variant affecting level of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000103 -name: sequence_variant_decreasing_level_of_translation_product -synonym: "mutationdecreasing level of translation product" RELATED [] -synonym: "sequence variant decreasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: sequence_variant_increasing_level_of_translation_product -synonym: "mutationt increasing level of translation product" RELATED [] -synonym: "sequence variant increasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: sequence_variant_affecting_polypeptide_amino_acid_sequence -synonym: "mutation affecting polypeptide amino acid sequence" RELATED [] -synonym: "sequence variant affecting polypeptide amino acid sequence" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -synonym: "mutation causing inframe polypeptide N terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "mutation causing out of frame polypeptide N terminal elongation" EXACT [] -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -synonym: "mutaton causing inframe polypeptide C terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "mutation causing out of frame polypeptide C terminal elongation" EXACT [] -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_sequence_variant -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring mutation" EXACT [] -synonym: "frame restoring sequence variant" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000111 -name: sequence_variant_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D structure of polypeptide" RELATED [] -synonym: "sequence variant affecting 3D structure of polypeptide" EXACT [] -synonym: "sequence variant affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000112 -name: sequence_variant_causing_no_3D_structural_change -synonym: "mutation causing no 3D structural change" RELATED [] -synonym: "sequence variant causing no 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: sequence_variant_causing_uncharacterised_3D_structural_change -synonym: "mutation causing uncharacterised 3D structural change" RELATED [] -synonym: "sequence variant causing uncharacterised 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: sequence_variant_causing_partially_characterised_3D_structural_change -synonym: "mutation causing partially characterised 3D structural change" RELATED [] -synonym: "sequence variant causing partially characterised 3D structural change" EXACT [] -is_a: SO:1000113 ! sequence_variant_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: sequence_variant_causing_complex_3D_structural_change -synonym: "mutation causing complex 3D structural change" RELATED [] -synonym: "sequence variant causing complex 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: sequence_variant_causing_conformational_change -synonym: "mutation causing conformational change" RELATED [] -synonym: "sequence variant causing conformational change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: sequence_variant_affecting_polypeptide_function -synonym: "mutation affecting polypeptide function" RELATED [] -synonym: "sequence variant affecting polypeptide function" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000118 -name: sequence_variant_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -synonym: "mutation causing loss of function of polypeptide" RELATED [] -synonym: "sequence variant causing loss of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: sequence_variant_causing_inactive_ligand_binding_site -synonym: "mutation causing inactive ligand binding site" RELATED [] -synonym: "sequence variant causing inactive ligand binding site" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: sequence_variant_causing_inactive_catalytic_site -synonym: "mutation causing inactive catalytic site" RELATED [] -synonym: "sequence variant causing inactive catalytic site" EXACT [] -is_a: SO:1000119 ! sequence_variant_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: sequence_variant_causing_polypeptide_localization_change -synonym: "mutation causing polypeptide localization change" RELATED [] -synonym: "sequence variant causing polypeptide localization change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: sequence_variant_causing_polypeptide_post_translational_processing_change -synonym: "mutation causing polypeptide post translational processing change" RELATED [] -synonym: "polypeptide post-translational processing affected" EXACT [] -synonym: "sequence variant causing polypeptide post translational processing change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: sequence_variant_causing_partial_loss_of_function_of_polypeptide -synonym: "mutation causing partial loss of function of polypeptide" RELATED [] -synonym: "partial loss of function of polypeptide" EXACT [] -synonym: "sequence variant causing partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: sequence_variant_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -synonym: "mutation causing gain of function of polypeptide" RELATED [] -synonym: "sequence variant causing gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: sequence_variant_affecting_transcript_secondary_structure -def: "A sequence variant that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -synonym: "mutation affecting transcript secondary structure" RELATED [] -synonym: "sequence variant affecting transcript secondary structure" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: sequence_variant_causing_compensatory_transcript_secondary_structure_mutation -synonym: "mutation causing compensatory transcript secondary structure mutation" RELATED [] -synonym: "sequence variant causing compensatory transcript secondary structure mutation" EXACT [] -is_a: SO:1000126 ! sequence_variant_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: sequence_variant_effect -def: "The effect of a change in nucleotide sequence." [SO:ke] -comment: Updated after discussion with Peter Taschner - Feb 09. -synonym: "sequence variant effect" RELATED [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:1000134 -name: sequence_variant_causing_polypeptide_fusion -synonym: "mutation causing polypeptide fusion" RELATED [] -synonym: "sequence variant causing polypeptide fusion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -synonym: "autosynaptic chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo compound chromosome" EXACT [] -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero compound chromosome" EXACT [] -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -synonym: "chromosome fission" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -synonym: "dexstrosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -synonym: "laevosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -synonym: "free duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -synonym: "free ring duplication" EXACT [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -synonym: "complex chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -synonym: "deficient translocation" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -synonym: "bipartite duplication" EXACT [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [FB:reference_manual] -synonym: "cyclic translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [FB:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -synonym: "bipartite inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -synonym: "uninverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -synonym: "inverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -synonym: "insertional duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "interchromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted intrachromosomal transposition" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -synonym: "unoriented insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -synonym: "uncharacterised chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -synonym: "deficient inversion" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -synonym: "tandem duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -synonym: "partially characterised chromosomal mutation" EXACT [] -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: sequence_variant_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change in transcript" RELATED [] -synonym: "sequence variant causing uncharacterised change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000179 -name: sequence_variant_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing partially characterised change in transcript" RELATED [] -synonym: "sequence variant causing partially characterised change in transcript" EXACT [] -is_a: SO:1000177 ! sequence_variant_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: sequence_variant_affecting_gene_structure -def: "A sequence_variant_effect that changes the gene structure." [SO:ke] -synonym: "mutation affecting gene structure" RELATED [] -synonym: "sequence variant affecting gene structure" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000181 -name: sequence_variant_causing_gene_fusion -def: "A sequence_variant_effect that changes the gene structure by causing a fusion to another gene." [SO:ke] -synonym: "mutation causing gene fusion" RELATED [] -synonym: "sequence variant causing gene fusion" EXACT [] -is_a: SO:1000180 ! sequence_variant_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -synonym: "chromosome number variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -synonym: "chromosome structure variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: sequence_variant_causes_exon_loss -def: "A sequence variant affecting splicing and causes an exon loss." [SO:ke] -synonym: "mutation causes exon loss" RELATED [] -synonym: "sequence variant causes exon loss" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000185 -name: sequence_variant_causes_intron_gain -def: "A sequence variant effect, causing an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causes intron gain" RELATED [] -synonym: "sequence variant causes intron gain" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000186 -name: sequence_variant_causing_cryptic_splice_donor_activation -synonym: "sequence variant causing cryptic splice donor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001186 -name: sequence_variant_causing_cryptic_splice_acceptor_activation -synonym: "sequence variant causing cryptic splice acceptor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -synonym: "alternatively spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced -relationship: has_quality SO:0000877 ! implied link automatically realized ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -synonym: "encodes 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -synonym: "encodes greater than 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -synonym: "encodes different polypeptides different stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -synonym: "encodes overlapping peptides different start" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -synonym: "encodes disjoint polypeptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -synonym: "encodes overlapping polypeptides different start and stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -synonym: "encodes overlapping peptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -is_a: SO:0000654 ! implied link automatically realized ! maxicircle_gene -is_a: SO:0001431 ! implied link automatically realized ! cryptic_gene -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -synonym: "dicistronic primary transcript" EXACT [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000631 ! implied link automatically realized ! polycistronic_primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -synonym: "member of regulon" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -synonym: "CDS independently known" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known -relationship: has_quality SO:0000906 ! implied link automatically realized ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -synonym: "orphan CDS" EXACT [] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan -relationship: has_origin SO:0000910 ! implied link automatically realized ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -synonym: "CDS supported by domain match data" EXACT [] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match -relationship: has_quality SO:0000908 ! implied link automatically realized ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -synonym: "CDS supported by sequence similarity data" EXACT [] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity -relationship: has_quality SO:0000907 ! implied link automatically realized ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -synonym: "CDS predicted" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! implied link automatically realized ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -synonym: "CDS supported by EST or cDNA data" EXACT [] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA -relationship: has_quality SO:0000909 ! implied link automatically realized ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine Dalgarno sequence" EXACT [] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "The sequence of a mature mRNA transcript, modified before translation or during translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "recoded mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! implied link automatically realized ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -synonym: "minus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -synonym: "plus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A recoded_mRNA where translation was suspended at a particular codon and resumed at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "mRNA recoded by translational bypass" EXACT [] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! implied link automatically realized ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A recoded_mRNA that was modified by an alteration of codon meaning." [SO:ma] -synonym: "mRNA recoded by codon redefinition" EXACT [] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined -relationship: has_quality SO:0000882 ! implied link automatically realized ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory region" EXACT [] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -synonym: "four bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -synonym: "archaeal intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -synonym: "tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -synonym: "CTG start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -synonym: "SECIS element" EXACT [] -xref: http://en.wikipedia.org/wiki/SECIS_element "wiki" -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -synonym: "three prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -synonym: "three prime stem loop structure" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -synonym: "five prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -synonym: "flanking three prime quadruplet recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -synonym: "UAG stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -synonym: "UAA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -synonym: "UGA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -synonym: "three prime repeat recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -synonym: "distant three prime recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -synonym: "stop codon signal" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:3000000 -name: gene_segment -def: "A gene component region which acts as a recombinational unit of a gene whose functional form is generated through somatic recombination." [GOC:add] -comment: Requested by tracker 2021594, July 2008, by Alex. -synonym: "gene segment" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SO/so_2_4_1.obo b/annotation/NBIS/Ontology/SO/so_2_4_1.obo deleted file mode 100644 index 511203ec5..000000000 --- a/annotation/NBIS/Ontology/SO/so_2_4_1.obo +++ /dev/null @@ -1,14946 +0,0 @@ -format-version: 1.2 -date: 02:12:2009 09:48 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta1 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://proton.chem.yale.edu/pdf/7897662.pdf] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A region of a molecule that binds to a restriction enzyme." [SO:cb] -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme binding site" EXACT [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -is_a: SO:0000088 ! implied link automatically realized ! mt_gene -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -is_a: SO:0000099 ! implied link automatically realized ! proviral_gene -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequenece_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! implied link automatically realized ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinary. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a placeholder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! implied link automatically realized ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon'is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000285 ! implied link automatically realized ! foreign_gene -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! implied link automatically realized ! transposable_element_gene -is_a: SO:0000281 ! implied link automatically realized ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000287 ! implied link automatically realized ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a paricular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! implied link automatically realized ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! implied link automatically realized ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/ligmb/LIGMlect?query=7] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occured as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! implied link automatically realized ! plasmid -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -is_a: SO:0001037 ! implied link automatically realized ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! implied link automatically realized ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000634 ! implied link automatically realized ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! implied link automatically realized ! engineered_plasmid -is_a: SO:0000768 ! implied link automatically realized ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! implied link automatically realized ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0001476 ! implied link automatically realized ! natural_plasmid -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! implied link automatically realized ! foreign_transposable_element -is_a: SO:0000798 ! implied link automatically realized ! engineered_transposable_element -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! implied link automatically realized ! tag -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000479 ! implied link automatically realized ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript -is_a: SO:0000929 ! implied link automatically realized ! edited_mRNA - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000873 ! implied link automatically realized ! edited_transcript -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! implied link automatically realized ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000994 ! implied link automatically realized ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! implied link automatically realized ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w dbxref -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! implied link automatically realized ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! implied link automatically realized ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A debranched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! implied link automatically realized ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with astop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! implied link automatically realized ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000001 ! region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by nicole, tracker id 1911479. It is required to gather evidense together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an innactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: argenine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or unfiltered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbor regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region\ncomposed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere DNA Element III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in resonse to tracker request by patrick chain. The pape:r Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -synonym: "U14 snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -synonym: "methylation guide snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -synonym: "rRNA cleavage RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "exon of single exon gene" EXACT [] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -synonym: "cassette array member" EXACT [] -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -synonym: "gene cassette member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -synonym: "gene subarray member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "primer binding site" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_binding_site "wiki" -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -synonym: "gene array" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -synonym: "gene subarray" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -synonym: "gene cassette" EXACT [] -xref: http://en.wikipedia.org/wiki/Gene_cassette "wiki" -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -synonym: "gene cassette array" EXACT [] -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "selenocysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl tRNA" EXACT [] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://www.informatics.jax.org/silverbook/glossary.shtml] -synonym: "syntenic region" EXACT [] -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region_of_peptide -def: "A region of a peptide that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "biochemical motif" EXACT [] -synonym: "biochemical region of peptide" EXACT [] -synonym: "biochemical_region" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "molecular contact region" RELATED [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_polypeptide_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" RELATED BS [] -synonym: "intrinsically unstructured polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "alpha beta motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A peptide that acts as a signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "lipoprotein signal peptide" EXACT [] -synonym: "prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -synonym: "no output" EXACT BS [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100012 -name: peptide_coil -def: "Irregular, unstructured regions of a protein's backbone, as distinct from the regular region (namely alpha helix and beta strand - characterised by specific patterns of main-chain hydrogen bonds)." [EBIBS:GAR] -subset: biosapiens -synonym: "coil" RELATED BS [] -synonym: "peptide coil" EXACT [] -synonym: "random coil" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100013 -name: hydrophobic_region_of_peptide -def: "Hydrophobic regions are regions with a low affinity for water." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "hydropathic" RELATED [] -synonym: "hydrophobic region of peptide" RELATED [] -synonym: "hydrophobic_region" EXACT [] -synonym: "hydrophobicity" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100014 -name: n_terminal_region -def: "The amino-terminal positively-charged region of a signal peptide (approx 1-5 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "N-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100015 -name: c_terminal_region -def: "The more polar, carboxy-terminal region of the signal peptide (approx 3-7 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "C-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100016 -name: central_hydrophobic_region_of_signal_peptide -def: "The central, hydrophobic region of the signal peptide (approx 7-15 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "central hydrophobic region of signal peptide" EXACT [] -synonym: "central_hydrophobic_region" RELATED [] -synonym: "H-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100017 -name: polypeptide_conserved_motif -def: "A conserved motif is a short (up to 20 amino acids) region of biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "motif" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100018 -name: polypeptide_binding_motif -def: "A polypeptide binding motif is a short (up to 20 amino acids) polypeptide region of biological interest that contains one or more amino acids experimentally shown to bind to a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "binding" RELATED [uniprot:feature_type] -synonym: "polypeptide binding motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100019 -name: polypeptide_catalytic_motif -def: "A polypeptide catalytic motif is a short (up to 20 amino acids) polypeptide region that contains one or more active site residues." [EBIBS:GAR] -subset: biosapiens -synonym: "catalytic_motif" RELATED [] -synonym: "polypeptide catalytic motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100020 -name: polypeptide_DNA_contact -def: "Residues involved in interactions with DNA." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide DNA contact" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0100021 -name: polypeptide_conserved_region -def: "A subsection of sequence with biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide conserved region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -synonym: "pyrimidine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -synonym: "C to T transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "C to T transition at pCpG site" EXACT [] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -synonym: "T to C transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -synonym: "purine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -synonym: "A to G transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -synonym: "G to A transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Transversion "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -synonym: "pyrimidine to purine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -synonym: "C to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -synonym: "C to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -synonym: "T to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -synonym: "T to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -synonym: "purine to pyrimidine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -synonym: "A to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -synonym: "A to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -synonym: "G to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -synonym: "G to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -synonym: "intrachromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -synonym: "chromosomal deletion" EXACT [] -synonym: "deficiency" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_deletion "wiki" -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -synonym: "chromosomal inversion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_inversion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -synonym: "interchromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A sequence alteration which included an insertion and a deletion, affecting 2 or more bases." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html, http:http\://www.hgvs.org/mutnomen/recs-DNA.html#indel] -comment: Indels can have a different number of bases than the corresponding reference sequence. -xref: http://en.wikipedia.org/wiki/Indel "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000035 -name: duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide duplication" EXACT [] -synonym: "nucleotide_duplication" RELATED [] -is_a: SO:0000667 ! insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -synonym: "chromosomal duplication" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_duplication "wiki" -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -synonym: "intrachromosomal duplication" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -synonym: "direct tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -synonym: "inverted tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "intrachromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -synonym: "compound chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -synonym: "Robertsonian fusion" EXACT [] -xref: http://en.wikipedia.org/wiki/Robertsonian_fusion "wiki" -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -synonym: "chromosomal translocation" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_translocation "wiki" -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -synonym: "ring chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Ring_chromosome "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -synonym: "pericentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -synonym: "paracentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -synonym: "reciprocal chromosomal translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: sequence_variation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript" EXACT [] -synonym: "sequence variation affecting transcript" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000050 -name: sequence_variant_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change in transcript" RELATED [] -synonym: "sequence variant causing no change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000052 -name: sequence_variation_affecting_complex_change_in_transcript -synonym: "mutation affecting complex change in transcript" EXACT [] -synonym: "sequence variation affecting complex change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000054 -name: sequence_variation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting coding sequence" EXACT [] -synonym: "sequence variation affecting coding sequence" RELATED [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: sequence_variant_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing initiator codon change in transcript" RELATED [] -synonym: "sequence variant causing initiator codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: sequence_variant_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutaton causing amino acid coding codon change in transcript" RELATED [] -synonym: "sequence variant causing amino acid coding codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: sequence_variant_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing synonymous codon change in transcript" RELATED [] -synonym: "sequence variant causing synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: sequence_variant_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing non synonymous codon change in transcript" RELATED [] -synonym: "non-synonymous codon change in transcript" EXACT [] -synonym: "sequence variant causing non synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: sequence_variant_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing missense codon change in transcript" RELATED [] -synonym: "sequence variant causing missense codon change in transcript" EXACT [] -is_a: SO:1000058 ! sequence_variant_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: sequence_variant_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing conservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing conservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: sequence_variant_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing nonconservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonconservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: sequence_variant_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing nonsense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonsense codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: sequence_variant_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -synonym: "mutation causing terminator codon change in transcript" RELATED [] -synonym: "sequence variant causing terminator codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: sequence_variation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a sequence variation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting reading frame" EXACT [] -synonym: "sequence variation affecting reading frame" RELATED [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_sequence_variation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "frameshift mutation" EXACT [] -synonym: "frameshift sequence variation" RELATED [] -synonym: "out of frame mutation" RELATED [] -xref: http://en.wikipedia.org/wiki/Frameshift_mutation "wiki" -is_a: SO:1000064 ! sequence_variation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: sequence_variant_causing_plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -synonym: "plus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 1 frameshift mutation" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000067 -name: sequence_variant_causing_minus_1_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -synonym: "minus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 1 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000068 -name: sequence_variant_causing_plus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -synonym: "plus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000069 -name: sequence_variant_causing_minus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -synonym: "minus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000070 -name: sequence_variant_affecting_transcript_processing -def: "Sequence variant affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript processing" RELATED [] -synonym: "sequence variant affecting transcript processing" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: sequence_variant_affecting_splicing -def: "A sequence_variant_effect where the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences is changed." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splicing" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000072 -name: sequence_variant_affecting_splice_donor -def: "A sequence_variant_effect that changes the splice donor sequence." [SO:ke] -synonym: "mutation affecting splice donor" RELATED [] -synonym: "sequence variant affecting splice donor" RELATED [] -synonym: "splice donor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000073 -name: sequence_variant_affecting_splice_acceptor -def: "A sequence_variant_effect that changes the splice acceptor sequence." [SO:ke] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splice acceptor" RELATED [] -synonym: "splice acceptor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000074 -name: sequence_variant_causing_cryptic_splice_activation -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: A cryptic splice site is only used when the natural splice site has been disrupted by a sequence alteration. -synonym: "cryptic splice activator sequence variant" EXACT [] -synonym: "mutation causing cryptic splice activator" RELATED [] -synonym: "sequence variant causing cryptic splice activator" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000075 -name: sequence_variant_affecting_editing -def: "Sequence variant affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting editing" RELATED [] -synonym: "sequence variant affecting editing" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: sequence_variant_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcription" RELATED [] -synonym: "sequence variant affecting transcription" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000078 -name: sequence_variant_decreasing_rate_of_transcription -def: "A sequence variation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation decreasing rate of transcription" RELATED [] -synonym: "sequence variation decreasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: sequence_variation_affecting_transcript_sequence -synonym: "mutation affecting transcript sequence" EXACT [] -synonym: "sequence variation affecting transcript sequence" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000080 -name: sequence_variant_increasing_rate_of_transcription -synonym: "mutation increasing rate of transcription" RELATED [] -synonym: "sequence variation increasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: sequence_variant_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation affecting rate of transcription" RELATED [] -synonym: "sequence variant affecting rate of transcription" EXACT [] -is_a: SO:1000076 ! sequence_variant_affecting_transcription - -[Term] -id: SO:1000082 -name: sequence variant_affecting_transcript_stability -def: "Sequence variant affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript stability" RELATED [] -synonym: "sequence variant affecting transcript stability" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: sequence_variant_increasing_transcript_stability -def: "Sequence variant increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation increasing transcript stability" RELATED [] -synonym: "sequence variant increasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: sequence_variant_decreasing_transcript_stability -def: "Sequence variant decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation decreasing transcript stability" RELATED [] -synonym: "sequence variant decreasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: sequence_variation_affecting_level_of_transcript -def: "A sequence variation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation affecting level of transcript" RELATED [] -synonym: "sequence variation affecting level of transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000086 -name: sequence_variation_decreasing_level_of_transcript -def: "A sequence variation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation decreasing level of transcript" EXACT [] -synonym: "sequence variation decreasing level of transcript" RELATED [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: sequence_variation_increasing_level_of_transcript -def: "A sequence_variation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation increasing level of transcript" EXACT [] -synonym: "sequence variation increasing level of transcript" EXACT [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: sequence_variant_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting translational product" RELATED [] -synonym: "sequence variant affecting translational product" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000089 -name: sequence_variant_causing_no_change_of_translational_product -def: "The sequence variant at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change of translational product" RELATED [] -synonym: "sequence variant causing no change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000090 -name: sequence_variant_causing_uncharacterised_change_of_translational_product -def: "A sequence variant causing an uncharacterized change of translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change of translational product" RELATED [] -synonym: "sequence variant causing uncharacterised change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000091 -name: sequence_variant_causing_partially_characterised_change_of_translational_product -def: "A sequence variant causing a partially uncharacterised change in translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The nature of the mutation event is only partially characterised. -synonym: "mutation causing partially characterised change of translational product" RELATED [] -synonym: "sequence variant causing partially characterised change of translational product" EXACT [] -is_a: SO:1000090 ! sequence_variant_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: sequence_variant_causing_complex_change_of_translational_product -def: "Any sequence variant effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing complex change of translational product" RELATED [] -synonym: "sequence variant causing complex change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000093 -name: sequence_variant_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid substitution" RELATED [] -synonym: "sequence variant causing amino acid substitution" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: sequence_variant_causing_conservative_amino_acid_substitution -synonym: "mutation causing conservative amino acid substitution" RELATED [] -synonym: "sequence variant causing conservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: sequence_variant_causing_nonconservative_amino_acid_substitution -synonym: "mutation causing nonconservative amino acid substitution" RELATED [] -synonym: "sequence variant causing nonconservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: sequence_variant_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid insertion" RELATED [] -synonym: "sequence variant causing amino acid insertion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: sequence_variant_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid deletion" RELATED [] -synonym: "sequence variant causing amino acid deletion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: sequence_variant_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide truncation" RELATED [] -synonym: "sequence variant causing polypeptide truncation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: sequence_variant_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide elongation" RELATED [] -synonym: "sequence variant causing polypeptide elongation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide N terminal elongation" EXACT [] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide C terminal elongation" EXACT [] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: sequence_variant_affecting_level_of_translational_product -synonym: "mutation affecting level of translational product" RELATED [] -synonym: "sequence variant affecting level of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000103 -name: sequence_variant_decreasing_level_of_translation_product -synonym: "mutationdecreasing level of translation product" RELATED [] -synonym: "sequence variant decreasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: sequence_variant_increasing_level_of_translation_product -synonym: "mutationt increasing level of translation product" RELATED [] -synonym: "sequence variant increasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: sequence_variant_affecting_polypeptide_amino_acid_sequence -synonym: "mutation affecting polypeptide amino acid sequence" RELATED [] -synonym: "sequence variant affecting polypeptide amino acid sequence" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -synonym: "mutation causing inframe polypeptide N terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "mutation causing out of frame polypeptide N terminal elongation" EXACT [] -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -synonym: "mutaton causing inframe polypeptide C terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "mutation causing out of frame polypeptide C terminal elongation" EXACT [] -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_sequence_variant -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring mutation" EXACT [] -synonym: "frame restoring sequence variant" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000111 -name: sequence_variant_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D structure of polypeptide" RELATED [] -synonym: "sequence variant affecting 3D structure of polypeptide" EXACT [] -synonym: "sequence variant affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000112 -name: sequence_variant_causing_no_3D_structural_change -synonym: "mutation causing no 3D structural change" RELATED [] -synonym: "sequence variant causing no 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: sequence_variant_causing_uncharacterised_3D_structural_change -synonym: "mutation causing uncharacterised 3D structural change" RELATED [] -synonym: "sequence variant causing uncharacterised 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: sequence_variant_causing_partially_characterised_3D_structural_change -synonym: "mutation causing partially characterised 3D structural change" RELATED [] -synonym: "sequence variant causing partially characterised 3D structural change" EXACT [] -is_a: SO:1000113 ! sequence_variant_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: sequence_variant_causing_complex_3D_structural_change -synonym: "mutation causing complex 3D structural change" RELATED [] -synonym: "sequence variant causing complex 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: sequence_variant_causing_conformational_change -synonym: "mutation causing conformational change" RELATED [] -synonym: "sequence variant causing conformational change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: sequence_variant_affecting_polypeptide_function -synonym: "mutation affecting polypeptide function" RELATED [] -synonym: "sequence variant affecting polypeptide function" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000118 -name: sequence_variant_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -synonym: "mutation causing loss of function of polypeptide" RELATED [] -synonym: "sequence variant causing loss of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: sequence_variant_causing_inactive_ligand_binding_site -synonym: "mutation causing inactive ligand binding site" RELATED [] -synonym: "sequence variant causing inactive ligand binding site" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: sequence_variant_causing_inactive_catalytic_site -synonym: "mutation causing inactive catalytic site" RELATED [] -synonym: "sequence variant causing inactive catalytic site" EXACT [] -is_a: SO:1000119 ! sequence_variant_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: sequence_variant_causing_polypeptide_localization_change -synonym: "mutation causing polypeptide localization change" RELATED [] -synonym: "sequence variant causing polypeptide localization change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: sequence_variant_causing_polypeptide_post_translational_processing_change -synonym: "mutation causing polypeptide post translational processing change" RELATED [] -synonym: "polypeptide post-translational processing affected" EXACT [] -synonym: "sequence variant causing polypeptide post translational processing change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: sequence_variant_causing_partial_loss_of_function_of_polypeptide -synonym: "mutation causing partial loss of function of polypeptide" RELATED [] -synonym: "partial loss of function of polypeptide" EXACT [] -synonym: "sequence variant causing partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: sequence_variant_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -synonym: "mutation causing gain of function of polypeptide" RELATED [] -synonym: "sequence variant causing gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: sequence_variant_affecting_transcript_secondary_structure -def: "A sequence variant that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -synonym: "mutation affecting transcript secondary structure" RELATED [] -synonym: "sequence variant affecting transcript secondary structure" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: sequence_variant_causing_compensatory_transcript_secondary_structure_mutation -synonym: "mutation causing compensatory transcript secondary structure mutation" RELATED [] -synonym: "sequence variant causing compensatory transcript secondary structure mutation" EXACT [] -is_a: SO:1000126 ! sequence_variant_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: sequence_variant_effect -def: "The effect of a change in nucleotide sequence." [SO:ke] -comment: Updated after discussion with Peter Taschner - Feb 09. -synonym: "sequence variant effect" RELATED [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:1000134 -name: sequence_variant_causing_polypeptide_fusion -synonym: "mutation causing polypeptide fusion" RELATED [] -synonym: "sequence variant causing polypeptide fusion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -synonym: "autosynaptic chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo compound chromosome" EXACT [] -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero compound chromosome" EXACT [] -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -synonym: "chromosome fission" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -synonym: "dexstrosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -synonym: "laevosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -synonym: "free duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -synonym: "free ring duplication" EXACT [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -synonym: "complex chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -synonym: "deficient translocation" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -synonym: "bipartite duplication" EXACT [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [FB:reference_manual] -synonym: "cyclic translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [FB:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -synonym: "bipartite inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -synonym: "uninverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -synonym: "inverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -synonym: "insertional duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "interchromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted intrachromosomal transposition" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -synonym: "unoriented insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -synonym: "uncharacterised chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -synonym: "deficient inversion" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -synonym: "tandem duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -synonym: "partially characterised chromosomal mutation" EXACT [] -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: sequence_variant_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change in transcript" RELATED [] -synonym: "sequence variant causing uncharacterised change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000179 -name: sequence_variant_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing partially characterised change in transcript" RELATED [] -synonym: "sequence variant causing partially characterised change in transcript" EXACT [] -is_a: SO:1000177 ! sequence_variant_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: sequence_variant_affecting_gene_structure -def: "A sequence_variant_effect that changes the gene structure." [SO:ke] -synonym: "mutation affecting gene structure" RELATED [] -synonym: "sequence variant affecting gene structure" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000181 -name: sequence_variant_causing_gene_fusion -def: "A sequence_variant_effect that changes the gene structure by causing a fusion to another gene." [SO:ke] -synonym: "mutation causing gene fusion" RELATED [] -synonym: "sequence variant causing gene fusion" EXACT [] -is_a: SO:1000180 ! sequence_variant_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -synonym: "chromosome number variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -synonym: "chromosome structure variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: sequence_variant_causes_exon_loss -def: "A sequence variant affecting splicing and causes an exon loss." [SO:ke] -synonym: "mutation causes exon loss" RELATED [] -synonym: "sequence variant causes exon loss" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000185 -name: sequence_variant_causes_intron_gain -def: "A sequence variant effect, causing an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causes intron gain" RELATED [] -synonym: "sequence variant causes intron gain" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000186 -name: sequence_variant_causing_cryptic_splice_donor_activation -synonym: "sequence variant causing cryptic splice donor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001186 -name: sequence_variant_causing_cryptic_splice_acceptor_activation -synonym: "sequence variant causing cryptic splice acceptor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -synonym: "alternatively spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -synonym: "encodes 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -synonym: "encodes greater than 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -synonym: "encodes different polypeptides different stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -synonym: "encodes overlapping peptides different start" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -synonym: "encodes disjoint polypeptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -synonym: "encodes overlapping polypeptides different start and stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -synonym: "encodes overlapping peptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -is_a: SO:0000654 ! implied link automatically realized ! maxicircle_gene -is_a: SO:0001431 ! implied link automatically realized ! cryptic_gene -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -synonym: "dicistronic primary transcript" EXACT [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000631 ! implied link automatically realized ! polycistronic_primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -synonym: "member of regulon" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -synonym: "CDS independently known" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -synonym: "orphan CDS" EXACT [] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -synonym: "CDS supported by domain match data" EXACT [] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -synonym: "CDS supported by sequence similarity data" EXACT [] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -synonym: "CDS predicted" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -synonym: "CDS supported by EST or cDNA data" EXACT [] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine Dalgarno sequence" EXACT [] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "The sequence of a mature mRNA transcript, modified before translation or during translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "recoded mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -synonym: "minus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -synonym: "plus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A recoded_mRNA where translation was suspended at a particular codon and resumed at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "mRNA recoded by translational bypass" EXACT [] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A recoded_mRNA that was modified by an alteration of codon meaning." [SO:ma] -synonym: "mRNA recoded by codon redefinition" EXACT [] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory region" EXACT [] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -synonym: "four bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -synonym: "archaeal intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -synonym: "tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -synonym: "CTG start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -synonym: "SECIS element" EXACT [] -xref: http://en.wikipedia.org/wiki/SECIS_element "wiki" -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -synonym: "three prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -synonym: "three prime stem loop structure" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -synonym: "five prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -synonym: "flanking three prime quadruplet recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -synonym: "UAG stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -synonym: "UAA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -synonym: "UGA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -synonym: "three prime repeat recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -synonym: "distant three prime recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -synonym: "stop codon signal" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:3000000 -name: gene_segment -def: "A gene component region which acts as a recombinational unit of a gene whose functional form is generated through somatic recombination." [GOC:add] -comment: Requested by tracker 2021594, July 2008, by Alex. -synonym: "gene segment" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SO/so_2_4_2.obo b/annotation/NBIS/Ontology/SO/so_2_4_2.obo deleted file mode 100644 index 5ea567df9..000000000 --- a/annotation/NBIS/Ontology/SO/so_2_4_2.obo +++ /dev/null @@ -1,15509 +0,0 @@ -format-version: 1.2 -date: 08:04:2010 11:18 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta3 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a Eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://www.ncbi.nlm.nih.gov/pubmed/7897662] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promoters, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -comment: FLAG - this term is should probably be a part of rather than an is_a. -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A region of a molecule that binds to a restriction enzyme." [SO:cb] -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme binding site" EXACT [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000029 ! implied link automatically realized ! chromosomal_deletion -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:0000159 ! deletion - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -def: "An interchromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [SO:ke] -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -def: "A chromosome structure variation whereby an arm exists as an individual chromosome element." [SO:ke] -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -is_a: SO:0000088 ! implied link automatically realized ! mt_gene -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -is_a: SO:0000099 ! implied link automatically realized ! proviral_gene -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! implied link automatically realized ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! implied link automatically realized ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:1000132 ! sequence_variant_effect -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000285 ! implied link automatically realized ! foreign_gene -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! implied link automatically realized ! transposable_element_gene -is_a: SO:0000281 ! implied link automatically realized ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000287 ! implied link automatically realized ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! implied link automatically realized ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! implied link automatically realized ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! implied link automatically realized ! plasmid -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -is_a: SO:0001037 ! implied link automatically realized ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! implied link automatically realized ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000634 ! implied link automatically realized ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! implied link automatically realized ! genome -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! implied link automatically realized ! engineered_plasmid -is_a: SO:0000768 ! implied link automatically realized ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! implied link automatically realized ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! implied link automatically realized ! foreign_transposable_element -is_a: SO:0000798 ! implied link automatically realized ! engineered_transposable_element -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! implied link automatically realized ! tag -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000479 ! implied link automatically realized ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript -is_a: SO:0000929 ! implied link automatically realized ! edited_mRNA - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0001720 ! implied link automatically realized ! epigenetically_modified_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000873 ! implied link automatically realized ! edited_transcript -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! implied link automatically realized ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000994 ! implied link automatically realized ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! implied link automatically realized ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w dbxref -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! implied link automatically realized ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! implied link automatically realized ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! implied link automatically realized ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! implied link automatically realized ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: argenine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! implied link automatically realized ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or unfiltered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbor regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region\ncomposed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! implied link automatically realized ! epigenetically_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation -def: "A histone modification where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation -def: "A kind of histone modification, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation" EXACT [] -is_a: SO:0001702 ! histone_acetylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation -def: "A kind of histone modification, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation" EXACT [] -is_a: SO:0001702 ! histone_acetylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 monomethylation" EXACT [] -synonym: "H3K4me1" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 trimethylation" EXACT [] -synonym: "H3K4me3" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation -def: "A kind of histone modification, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 trimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation -def: "A kind of histone modification, whereby the 27th residue (a lysine), from the start of the H3 histone protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation -def: "A kind of histone modification, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 trimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation -def: "A kind of histone modification, whereby the 79th residue (a lysine), from the start of the H3 histone protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation -def: "A kind of histone modification, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 dimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation -def: "A kind of histone modification, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 trimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation -def: "A kind of histone modification, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation -def: "A kind of histone modification, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -synonym: "U14 snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -synonym: "methylation guide snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -synonym: "rRNA cleavage RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "exon of single exon gene" EXACT [] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -synonym: "cassette array member" EXACT [] -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -synonym: "gene cassette member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -synonym: "gene subarray member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "primer binding site" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_binding_site "wiki" -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -synonym: "gene array" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -synonym: "gene subarray" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -synonym: "gene cassette" EXACT [] -xref: http://en.wikipedia.org/wiki/Gene_cassette "wiki" -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -synonym: "gene cassette array" EXACT [] -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "selenocysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl tRNA" EXACT [] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://www.informatics.jax.org/silverbook/glossary.shtml] -synonym: "syntenic region" EXACT [] -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region_of_peptide -def: "A region of a peptide that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "biochemical motif" EXACT [] -synonym: "biochemical region of peptide" EXACT [] -synonym: "biochemical_region" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "molecular contact region" RELATED [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_polypeptide_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" RELATED BS [] -synonym: "intrinsically unstructured polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "alpha beta motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A peptide that acts as a signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "lipoprotein signal peptide" EXACT [] -synonym: "prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -synonym: "no output" EXACT BS [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100012 -name: peptide_coil -def: "Irregular, unstructured regions of a protein's backbone, as distinct from the regular region (namely alpha helix and beta strand - characterised by specific patterns of main-chain hydrogen bonds)." [EBIBS:GAR] -subset: biosapiens -synonym: "coil" RELATED BS [] -synonym: "peptide coil" EXACT [] -synonym: "random coil" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100013 -name: hydrophobic_region_of_peptide -def: "Hydrophobic regions are regions with a low affinity for water." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "hydropathic" RELATED [] -synonym: "hydrophobic region of peptide" RELATED [] -synonym: "hydrophobic_region" EXACT [] -synonym: "hydrophobicity" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100014 -name: n_terminal_region -def: "The amino-terminal positively-charged region of a signal peptide (approx 1-5 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "N-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100015 -name: c_terminal_region -def: "The more polar, carboxy-terminal region of the signal peptide (approx 3-7 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "C-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100016 -name: central_hydrophobic_region_of_signal_peptide -def: "The central, hydrophobic region of the signal peptide (approx 7-15 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "central hydrophobic region of signal peptide" EXACT [] -synonym: "central_hydrophobic_region" RELATED [] -synonym: "H-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100017 -name: polypeptide_conserved_motif -def: "A conserved motif is a short (up to 20 amino acids) region of biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "motif" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100018 -name: polypeptide_binding_motif -def: "A polypeptide binding motif is a short (up to 20 amino acids) polypeptide region of biological interest that contains one or more amino acids experimentally shown to bind to a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "binding" RELATED [uniprot:feature_type] -synonym: "polypeptide binding motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100019 -name: polypeptide_catalytic_motif -def: "A polypeptide catalytic motif is a short (up to 20 amino acids) polypeptide region that contains one or more active site residues." [EBIBS:GAR] -subset: biosapiens -synonym: "catalytic_motif" RELATED [] -synonym: "polypeptide catalytic motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100020 -name: polypeptide_DNA_contact -def: "Residues involved in interactions with DNA." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide DNA contact" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0100021 -name: polypeptide_conserved_region -def: "A subsection of sequence with biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide conserved region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -synonym: "pyrimidine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -synonym: "C to T transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "C to T transition at pCpG site" EXACT [] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -synonym: "T to C transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -synonym: "purine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -synonym: "A to G transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -synonym: "G to A transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Transversion "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -synonym: "pyrimidine to purine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -synonym: "C to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -synonym: "C to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -synonym: "T to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -synonym: "T to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -synonym: "purine to pyrimidine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -synonym: "A to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -synonym: "A to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -synonym: "G to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -synonym: "G to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -def: "A chromosomal structure variation within a single chromosome." [SO:ke] -synonym: "intrachromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation -intersection_of: SO:1000183 ! chromosome_structure_variation -intersection_of: has_quality SO:0001510 ! intrachromosomal - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -synonym: "chromosomal deletion" EXACT [] -synonym: "deficiency" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_deletion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000159 ! deletion - -[Term] -id: SO:1000030 -name: chromosomal_inversion -def: "An interchromosomal mutation where a region of the chromosome is inverted with respect to wild type." [SO:ke] -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -synonym: "chromosomal inversion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_inversion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -def: "A chromosomal structure variation whereby more than one chromosome is involved." [SO:ke] -synonym: "interchromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation -intersection_of: SO:1000183 ! chromosome_structure_variation -intersection_of: has_quality SO:0001511 ! interchromosomal - -[Term] -id: SO:1000032 -name: indel -def: "A sequence alteration which included an insertion and a deletion, affecting 2 or more bases." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html, http:http\://www.hgvs.org/mutnomen/recs-DNA.html#indel] -comment: Indels can have a different number of bases than the corresponding reference sequence. -xref: http://en.wikipedia.org/wiki/Indel "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000035 -name: duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide duplication" EXACT [] -synonym: "nucleotide_duplication" RELATED [] -is_a: SO:0000667 ! insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -synonym: "chromosomal duplication" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_duplication "wiki" -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -def: "A duplication that occurred within a chromosome." [SO:ke] -synonym: "intrachromosomal duplication" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:1000035 ! duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -def: "A tandem duplication where the individual regions are in the same orientation." [SO:ke] -synonym: "direct tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -def: "A tandem duplication where the individual regions are not in the same orientation." [SO:ke] -synonym: "inverted tandem duplication" EXACT [] -synonym: "mirror duplication" RELATED [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -def: "A chromosome structure variation whereby a transposition occurred within a chromosome." [SO:ke] -synonym: "(Drosophila)Tp" RELATED [] -synonym: "intrachromosomal transposition" EXACT [] -is_a: SO:0000453 ! chromosomal_transposition -is_a: SO:1000038 ! intrachromosomal_duplication -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000199 ! translocation -intersection_of: has_part SO:1000035 ! duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -def: "A chromosome structure variant where a monocentric element is caused by the fusion of two chromosome arms." [SO:ke] -synonym: "compound chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -def: "A non reciprocal translocation whereby the participating chromosomes break at their centromeres and the long arms fuse to form a single chromosome with a single centromere." [http://en.wikipedia.org/wiki/Robertsonian_translocation] -synonym: "centric-fusion translocations" EXACT [] -synonym: "Robertsonian fusion" EXACT [] -synonym: "whole-arm translocations" EXACT [] -xref: http://en.wikipedia.org/wiki/Robertsonian_fusion "wiki" -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -def: "An interchromosomal mutation. Rearrangements that alter the pairing of telomeres are classified as translocations." [FB:reference_manual] -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -synonym: "chromosomal translocation" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_translocation "wiki" -is_a: SO:1000031 ! interchromosomal_mutation -intersection_of: SO:1000031 ! interchromosomal_mutation -intersection_of: has_part SO:0000199 ! translocation - -[Term] -id: SO:1000045 -name: ring_chromosome -def: "A ring chromosome is a chromosome whose arms have fused together to form a ring, often with the loss of the ends of the chromosome." [http://en.wikipedia.org/wiki/Ring_chromosome] -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -synonym: "ring chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Ring_chromosome "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:1000046 -name: pericentric_inversion -def: "A chromosomal inversion that includes the centromere." [FB:reference_manual] -synonym: "pericentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -intersection_of: SO:1000030 ! chromosomal_inversion -intersection_of: has_quality SO:0001518 ! pericentric - -[Term] -id: SO:1000047 -name: paracentric_inversion -def: "A chromosomal inversion that does not include the centromere." [FB:reference_manual] -synonym: "paracentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -intersection_of: SO:1000030 ! chromosomal_inversion -intersection_of: has_quality SO:0001519 ! paracentric - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -def: "A chromosomal translocation with two breaks; two chromosome segments have simply been exchanged." [FB:reference_manual] -synonym: "reciprocal chromosomal translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: sequence_variation_affecting_transcript -alt_id: SO:1000177 -alt_id: SO:1000179 -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript" EXACT [] -synonym: "mutation causing partially characterised change in transcript" RELATED [] -synonym: "mutation causing uncharacterised change in transcript" RELATED [] -synonym: "sequence variant causing partially characterised change in transcript" EXACT [] -synonym: "sequence variant causing uncharacterised change in transcript" EXACT [] -synonym: "sequence variation affecting transcript" EXACT [] -synonym: "sequence_variant_causing_partially_characterised_change_in_transcript" EXACT [] -synonym: "sequence_variant_causing_uncharacterised_change_in_transcript" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000050 -name: sequence_variant_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change in transcript" RELATED [] -synonym: "sequence variant causing no change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000052 -name: sequence_variation_affecting_complex_change_in_transcript -synonym: "mutation affecting complex change in transcript" EXACT [] -synonym: "sequence variation affecting complex change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000054 -name: sequence_variation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting coding sequence" EXACT [] -synonym: "sequence variation affecting coding sequence" RELATED [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: sequence_variant_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing initiator codon change in transcript" RELATED [] -synonym: "sequence variant causing initiator codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: sequence_variant_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutaton causing amino acid coding codon change in transcript" RELATED [] -synonym: "sequence variant causing amino acid coding codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: sequence_variant_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing synonymous codon change in transcript" RELATED [] -synonym: "sequence variant causing synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: sequence_variant_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing non synonymous codon change in transcript" RELATED [] -synonym: "non-synonymous codon change in transcript" EXACT [] -synonym: "sequence variant causing non synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: sequence_variant_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing missense codon change in transcript" RELATED [] -synonym: "sequence variant causing missense codon change in transcript" EXACT [] -is_a: SO:1000058 ! sequence_variant_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: sequence_variant_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing conservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing conservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: sequence_variant_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing nonconservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonconservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: sequence_variant_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing nonsense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonsense codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: sequence_variant_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -synonym: "mutation causing terminator codon change in transcript" RELATED [] -synonym: "sequence variant causing terminator codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: sequence_variation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a sequence variation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting reading frame" EXACT [] -synonym: "sequence variation affecting reading frame" RELATED [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_sequence_variation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "frameshift mutation" EXACT [] -synonym: "frameshift sequence variation" RELATED [] -synonym: "out of frame mutation" RELATED [] -xref: http://en.wikipedia.org/wiki/Frameshift_mutation "wiki" -is_a: SO:1000064 ! sequence_variation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: sequence_variant_causing_plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -synonym: "plus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 1 frameshift mutation" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000067 -name: sequence_variant_causing_minus_1_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -synonym: "minus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 1 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000068 -name: sequence_variant_causing_plus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -synonym: "plus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000069 -name: sequence_variant_causing_minus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -synonym: "minus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000070 -name: sequence_variant_affecting_transcript_processing -def: "Sequence variant affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript processing" RELATED [] -synonym: "sequence variant affecting transcript processing" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: sequence_variant_affecting_splicing -def: "A sequence_variant_effect where the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences is changed." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splicing" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000072 -name: sequence_variant_affecting_splice_donor -def: "A sequence_variant_effect that changes the splice donor sequence." [SO:ke] -synonym: "mutation affecting splice donor" RELATED [] -synonym: "sequence variant affecting splice donor" RELATED [] -synonym: "splice donor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000073 -name: sequence_variant_affecting_splice_acceptor -def: "A sequence_variant_effect that changes the splice acceptor sequence." [SO:ke] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splice acceptor" RELATED [] -synonym: "splice acceptor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000074 -name: sequence_variant_causing_cryptic_splice_activation -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: A cryptic splice site is only used when the natural splice site has been disrupted by a sequence alteration. -synonym: "cryptic splice activator sequence variant" EXACT [] -synonym: "mutation causing cryptic splice activator" RELATED [] -synonym: "sequence variant causing cryptic splice activator" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000075 -name: sequence_variant_affecting_editing -def: "Sequence variant affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting editing" RELATED [] -synonym: "sequence variant affecting editing" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: sequence_variant_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcription" RELATED [] -synonym: "sequence variant affecting transcription" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000078 -name: sequence_variant_decreasing_rate_of_transcription -def: "A sequence variation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation decreasing rate of transcription" RELATED [] -synonym: "sequence variation decreasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: sequence_variation_affecting_transcript_sequence -synonym: "mutation affecting transcript sequence" EXACT [] -synonym: "sequence variation affecting transcript sequence" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000080 -name: sequence_variant_increasing_rate_of_transcription -synonym: "mutation increasing rate of transcription" RELATED [] -synonym: "sequence variation increasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: sequence_variant_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation affecting rate of transcription" RELATED [] -synonym: "sequence variant affecting rate of transcription" EXACT [] -is_a: SO:1000076 ! sequence_variant_affecting_transcription - -[Term] -id: SO:1000082 -name: sequence variant_affecting_transcript_stability -def: "Sequence variant affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript stability" RELATED [] -synonym: "sequence variant affecting transcript stability" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: sequence_variant_increasing_transcript_stability -def: "Sequence variant increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation increasing transcript stability" RELATED [] -synonym: "sequence variant increasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: sequence_variant_decreasing_transcript_stability -def: "Sequence variant decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation decreasing transcript stability" RELATED [] -synonym: "sequence variant decreasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: sequence_variation_affecting_level_of_transcript -def: "A sequence variation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation affecting level of transcript" RELATED [] -synonym: "sequence variation affecting level of transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000086 -name: sequence_variation_decreasing_level_of_transcript -def: "A sequence variation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation decreasing level of transcript" EXACT [] -synonym: "sequence variation decreasing level of transcript" RELATED [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: sequence_variation_increasing_level_of_transcript -def: "A sequence_variation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation increasing level of transcript" EXACT [] -synonym: "sequence variation increasing level of transcript" EXACT [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: sequence_variant_affecting_translational_product -alt_id: SO:1000090 -alt_id: SO:1000091 -def: "A sequence variant causing a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting translational product" RELATED [] -synonym: "mutation causing partially characterised change of translational product" RELATED [] -synonym: "mutation causing uncharacterised change of translational product" RELATED [] -synonym: "sequence variant affecting translational product" EXACT [] -synonym: "sequence variant causing partially characterised change of translational product" EXACT [] -synonym: "sequence variant causing uncharacterised change of translational product" EXACT [] -synonym: "sequence_variant_causing_partially_characterised_change_of_translational_product" EXACT [] -synonym: "sequence_variant_causing_uncharacterised_change_of_translational_product" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000089 -name: sequence_variant_causing_no_change_of_translational_product -def: "The sequence variant at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change of translational product" RELATED [] -synonym: "sequence variant causing no change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000092 -name: sequence_variant_causing_complex_change_of_translational_product -def: "Any sequence variant effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing complex change of translational product" RELATED [] -synonym: "sequence variant causing complex change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000093 -name: sequence_variant_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid substitution" RELATED [] -synonym: "sequence variant causing amino acid substitution" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: sequence_variant_causing_conservative_amino_acid_substitution -synonym: "mutation causing conservative amino acid substitution" RELATED [] -synonym: "sequence variant causing conservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: sequence_variant_causing_nonconservative_amino_acid_substitution -synonym: "mutation causing nonconservative amino acid substitution" RELATED [] -synonym: "sequence variant causing nonconservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: sequence_variant_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid insertion" RELATED [] -synonym: "sequence variant causing amino acid insertion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: sequence_variant_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid deletion" RELATED [] -synonym: "sequence variant causing amino acid deletion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: sequence_variant_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide truncation" RELATED [] -synonym: "sequence variant causing polypeptide truncation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: sequence_variant_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide elongation" RELATED [] -synonym: "sequence variant causing polypeptide elongation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide N terminal elongation" EXACT [] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide C terminal elongation" EXACT [] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: sequence_variant_affecting_level_of_translational_product -synonym: "mutation affecting level of translational product" RELATED [] -synonym: "sequence variant affecting level of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000103 -name: sequence_variant_decreasing_level_of_translation_product -synonym: "mutationdecreasing level of translation product" RELATED [] -synonym: "sequence variant decreasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: sequence_variant_increasing_level_of_translation_product -synonym: "mutationt increasing level of translation product" RELATED [] -synonym: "sequence variant increasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: sequence_variant_affecting_polypeptide_amino_acid_sequence -synonym: "mutation affecting polypeptide amino acid sequence" RELATED [] -synonym: "sequence variant affecting polypeptide amino acid sequence" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -synonym: "mutation causing inframe polypeptide N terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "mutation causing out of frame polypeptide N terminal elongation" EXACT [] -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -synonym: "mutaton causing inframe polypeptide C terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "mutation causing out of frame polypeptide C terminal elongation" EXACT [] -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_sequence_variant -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring mutation" EXACT [] -synonym: "frame restoring sequence variant" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000111 -name: sequence_variant_affecting_3D_structure_of_polypeptide -alt_id: SO:1000113 -alt_id: SO:1000114 -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D structure of polypeptide" RELATED [] -synonym: "mutation causing partially characterised 3D structural change" RELATED [] -synonym: "mutation causing uncharacterised 3D structural change" RELATED [] -synonym: "sequence variant affecting 3D structure of polypeptide" EXACT [] -synonym: "sequence variant affecting 3D-structure of polypeptide" EXACT [] -synonym: "sequence variant causing partially characterised 3D structural change" EXACT [] -synonym: "sequence variant causing uncharacterised 3D structural change" EXACT [] -synonym: "sequence_variant_causing_partially_characterised_3D_structural_change" EXACT [] -synonym: "sequence_variant_causing_uncharacterised_3D_structural_change" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000112 -name: sequence_variant_causing_no_3D_structural_change -synonym: "mutation causing no 3D structural change" RELATED [] -synonym: "sequence variant causing no 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000115 -name: sequence_variant_causing_complex_3D_structural_change -synonym: "mutation causing complex 3D structural change" RELATED [] -synonym: "sequence variant causing complex 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: sequence_variant_causing_conformational_change -synonym: "mutation causing conformational change" RELATED [] -synonym: "sequence variant causing conformational change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: sequence_variant_affecting_polypeptide_function -synonym: "mutation affecting polypeptide function" RELATED [] -synonym: "sequence variant affecting polypeptide function" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000118 -name: sequence_variant_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -synonym: "mutation causing loss of function of polypeptide" RELATED [] -synonym: "sequence variant causing loss of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: sequence_variant_causing_inactive_ligand_binding_site -synonym: "mutation causing inactive ligand binding site" RELATED [] -synonym: "sequence variant causing inactive ligand binding site" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: sequence_variant_causing_inactive_catalytic_site -synonym: "mutation causing inactive catalytic site" RELATED [] -synonym: "sequence variant causing inactive catalytic site" EXACT [] -is_a: SO:1000119 ! sequence_variant_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: sequence_variant_causing_polypeptide_localization_change -synonym: "mutation causing polypeptide localization change" RELATED [] -synonym: "sequence variant causing polypeptide localization change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: sequence_variant_causing_polypeptide_post_translational_processing_change -synonym: "mutation causing polypeptide post translational processing change" RELATED [] -synonym: "polypeptide post-translational processing affected" EXACT [] -synonym: "sequence variant causing polypeptide post translational processing change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: sequence_variant_causing_partial_loss_of_function_of_polypeptide -synonym: "mutation causing partial loss of function of polypeptide" RELATED [] -synonym: "partial loss of function of polypeptide" EXACT [] -synonym: "sequence variant causing partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: sequence_variant_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -synonym: "mutation causing gain of function of polypeptide" RELATED [] -synonym: "sequence variant causing gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: sequence_variant_affecting_transcript_secondary_structure -def: "A sequence variant that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -synonym: "mutation affecting transcript secondary structure" RELATED [] -synonym: "sequence variant affecting transcript secondary structure" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: sequence_variant_causing_compensatory_transcript_secondary_structure_mutation -synonym: "mutation causing compensatory transcript secondary structure mutation" RELATED [] -synonym: "sequence variant causing compensatory transcript secondary structure mutation" EXACT [] -is_a: SO:1000126 ! sequence_variant_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: sequence_variant_effect -def: "The effect of a change in nucleotide sequence." [SO:ke] -comment: Updated after discussion with Peter Taschner - Feb 09. -synonym: "sequence variant effect" RELATED [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:1000134 -name: sequence_variant_causing_polypeptide_fusion -synonym: "mutation causing polypeptide fusion" RELATED [] -synonym: "sequence variant causing polypeptide fusion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -def: "An autosynaptic chromosome is the aneuploid product of recombination between a pericentric inversion and a cytologically wild-type chromosome." [PMID:6804304] -synonym: "(Drosophila)A" RELATED [] -synonym: "autosynaptic chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -def: "A compound chromosome whereby two copies of the same chromosomal arm attached to a common centromere. The chromosome is diploid for the arm involved." [SO:ke] -synonym: "homo compound chromosome" EXACT [] -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -def: "A compound chromosome whereby two arms from different chromosomes are connected through the centromere of one of them." [FB:reference_manual, SO:ke] -synonym: "hetero compound chromosome" EXACT [] -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -def: "A chromosome that occurred by the division of a larger chromosome." [SO:ke] -synonym: "chromosome fission" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -def: "An autosynaptic chromosome carrying the two right (D = dextro) telomeres." [FB:manual] -synonym: "dexstrosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -def: "LS is an autosynaptic chromosome carrying the two left (L = levo) telomeres." [FB:manual] -synonym: "laevosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -def: "A chromosome structure variation whereby the duplicated sequences are carried as a free centric element." [FB:reference_manual] -synonym: "free duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -def: "A ring chromosome which is a copy of another chromosome." [SO:ke] -synonym: "(Drosophila)R" RELATED [] -synonym: "free ring duplication" EXACT [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication -intersection_of: SO:1000045 ! ring_chromosome -intersection_of: has_quality SO:0001516 ! free - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -def: "A chromosome structure variant with 4 or more breakpoints." [FB:reference_manual, SO:ke] -synonym: "complex chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A chromosomal deletion whereby a translocation occurs in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -synonym: "deficient translocation" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation -intersection_of: SO:1000044 ! chromosomal_translocation -intersection_of: has_part SO:0000159 ! deletion -intersection_of: has_part SO:0000199 ! translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "A chromosomal translocation whereby the first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000199 ! translocation -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "An interchromosomal mutation whereby the (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -synonym: "bipartite duplication" EXACT [] -is_a: SO:1000031 ! interchromosomal_mutation -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "A chromosomal translocation whereby three breaks occurred in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [FB:reference_manual] -synonym: "cyclic translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "A chromosomal inversion caused by three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [FB:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -synonym: "bipartite inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "An insertional duplication where a copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -synonym: "uninverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "An insertional duplication where a copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -synonym: "inverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region (as opposed to a free duplication)." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -synonym: "insertional duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -def: "A chromosome structure variation whereby a transposition occurred between chromosomes." [SO:ke] -synonym: "(Drosophila)Tp" RELATED [] -synonym: "interchromosomal transposition" EXACT [] -is_a: SO:0000453 ! chromosomal_transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -def: "An interchromosomal transposition whereby a copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segment." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -def: "An interchromosomal transition where the segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby the segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted intrachromosomal transposition" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition -is_a: SO:1000148 ! implied link automatically realized ! inversion_cum_translocation -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby the segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:0001514 ! direct - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "An insertional duplication where a copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -comment: Flag - unknown in the definition. -synonym: "(Drosophila)uDp" RELATED [] -synonym: "unoriented insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unoriented_interchromosomal_transposition -def: "An interchromosomal transposition whereby a copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -comment: FLAG - term describes an unknown. -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unoriented_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby the segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -comment: FLAG - definition describes an unknown. -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -synonym: "uncharacterised chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "A chromosomal deletion whereby three breaks occur in the same chromosome; one central region is lost, and the other is inverted." [FB:reference_manual, SO:ke] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -synonym: "deficient inversion" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000159 ! deletion -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -def: "A duplication consisting of 2 identical regions, which are adjacent." [SO:ke] -synonym: "tandem duplication" EXACT [] -is_a: SO:1000035 ! duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -synonym: "partially characterised chromosomal mutation" EXACT [] -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000180 -name: sequence_variant_affecting_gene_structure -def: "A sequence_variant_effect that changes the gene structure." [SO:ke] -synonym: "mutation affecting gene structure" RELATED [] -synonym: "sequence variant affecting gene structure" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000181 -name: sequence_variant_causing_gene_fusion -def: "A sequence_variant_effect that changes the gene structure by causing a fusion to another gene." [SO:ke] -synonym: "mutation causing gene fusion" RELATED [] -synonym: "sequence variant causing gene fusion" EXACT [] -is_a: SO:1000180 ! sequence_variant_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -synonym: "chromosome number variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -synonym: "chromosome structure variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: sequence_variant_causes_exon_loss -def: "A sequence variant affecting splicing and causes an exon loss." [SO:ke] -synonym: "mutation causes exon loss" RELATED [] -synonym: "sequence variant causes exon loss" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000185 -name: sequence_variant_causes_intron_gain -def: "A sequence variant effect, causing an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causes intron gain" RELATED [] -synonym: "sequence variant causes intron gain" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000186 -name: sequence_variant_causing_cryptic_splice_donor_activation -synonym: "sequence variant causing cryptic splice donor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001186 -name: sequence_variant_causing_cryptic_splice_acceptor_activation -synonym: "sequence variant causing cryptic splice acceptor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -synonym: "alternatively spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -synonym: "encodes 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -synonym: "encodes greater than 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -synonym: "encodes different polypeptides different stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -synonym: "encodes overlapping peptides different start" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -synonym: "encodes disjoint polypeptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -synonym: "encodes overlapping polypeptides different start and stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -synonym: "encodes overlapping peptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -is_a: SO:0000654 ! implied link automatically realized ! maxicircle_gene -is_a: SO:0001431 ! implied link automatically realized ! cryptic_gene -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -synonym: "dicistronic primary transcript" EXACT [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000631 ! implied link automatically realized ! polycistronic_primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -synonym: "member of regulon" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -synonym: "CDS independently known" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -synonym: "orphan CDS" EXACT [] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -synonym: "CDS supported by domain match data" EXACT [] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -synonym: "CDS supported by sequence similarity data" EXACT [] -is_a: SO:1001254 ! implied link automatically realized ! CDS_predicted -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -synonym: "CDS predicted" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -synonym: "CDS supported by EST or cDNA data" EXACT [] -is_a: SO:1001251 ! implied link automatically realized ! CDS_supported_by_sequence_similarity_data -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine Dalgarno sequence" EXACT [] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "The sequence of a mature mRNA transcript, modified before translation or during translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "recoded mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -synonym: "minus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -synonym: "plus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A recoded_mRNA where translation was suspended at a particular codon and resumed at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "mRNA recoded by translational bypass" EXACT [] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A recoded_mRNA that was modified by an alteration of codon meaning." [SO:ma] -synonym: "mRNA recoded by codon redefinition" EXACT [] -is_a: SO:1001261 ! implied link automatically realized ! recoded_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory region" EXACT [] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -synonym: "four bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -synonym: "archaeal intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -synonym: "tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -synonym: "CTG start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -synonym: "SECIS element" EXACT [] -xref: http://en.wikipedia.org/wiki/SECIS_element "wiki" -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -synonym: "three prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -synonym: "three prime stem loop structure" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -synonym: "five prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -synonym: "flanking three prime quadruplet recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -synonym: "UAG stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -synonym: "UAA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -synonym: "UGA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -synonym: "three prime repeat recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -synonym: "distant three prime recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -synonym: "stop codon signal" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:3000000 -name: gene_segment -def: "A gene component region which acts as a recombinational unit of a gene whose functional form is generated through somatic recombination." [GOC:add] -comment: Requested by tracker 2021594, July 2008, by Alex. -synonym: "gene segment" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SO/so_2_4_3.obo b/annotation/NBIS/Ontology/SO/so_2_4_3.obo deleted file mode 100644 index e7d11c26e..000000000 --- a/annotation/NBIS/Ontology/SO/so_2_4_3.obo +++ /dev/null @@ -1,16818 +0,0 @@ -format-version: 1.2 -date: 01:06:2010 10:46 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta3 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a Eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://www.ncbi.nlm.nih.gov/pubmed/7897662] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promoters, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -comment: FLAG - this term is should probably be a part of rather than an is_a. -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A region of a molecule that binds to a restriction enzyme." [SO:cb] -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme binding site" EXACT [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000029 ! implied link automatically realized ! chromosomal_deletion -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:0000159 ! deletion - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -def: "An interchromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [SO:ke] -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -def: "A chromosome structure variation whereby an arm exists as an individual chromosome element." [SO:ke] -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -is_a: SO:0000088 ! implied link automatically realized ! mt_gene -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -is_a: SO:0000090 ! implied link automatically realized ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -is_a: SO:0000099 ! implied link automatically realized ! proviral_gene -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! implied link automatically realized ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! implied link automatically realized ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0000727 ! implied link automatically realized ! CRM -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:1000132 ! sequence_variant_effect -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000285 ! implied link automatically realized ! foreign_gene -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! implied link automatically realized ! transposable_element_gene -is_a: SO:0000281 ! implied link automatically realized ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000287 ! implied link automatically realized ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! implied link automatically realized ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! implied link automatically realized ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! implied link automatically realized ! plasmid -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -is_a: SO:0001037 ! implied link automatically realized ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! implied link automatically realized ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000634 ! implied link automatically realized ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! implied link automatically realized ! genome -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! implied link automatically realized ! engineered_plasmid -is_a: SO:0000768 ! implied link automatically realized ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! implied link automatically realized ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! implied link automatically realized ! foreign_transposable_element -is_a: SO:0000798 ! implied link automatically realized ! engineered_transposable_element -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! implied link automatically realized ! tag -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000479 ! implied link automatically realized ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript -is_a: SO:0000929 ! implied link automatically realized ! edited_mRNA - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0001720 ! implied link automatically realized ! epigenetically_modified_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000873 ! implied link automatically realized ! edited_transcript -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! implied link automatically realized ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000994 ! implied link automatically realized ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! implied link automatically realized ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:0001537 ! structural_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! implied link automatically realized ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! implied link automatically realized ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! implied link automatically realized ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! implied link automatically realized ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! implied link automatically realized ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant which alters a biological process or function." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "increased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A variant that changes editing of a transcript." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decrease_transcript_stability -def: "A sequence variant that decreases transcript stability." [SO:ke] -synonym: "decrease transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increase_transcript_stability -def: "A sequence variant that increases transcript stability." [SO:ke] -synonym: "increase transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increase_transcription_rate -def: "A sequence variant that increases the rate of transcription." [SO:ke] -synonym: "increase transcription rate" RELATED [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decrease_transcription_rate -def: "A sequence variant that decreases the rate of transcription." [SO:ke] -synonym: "decrease transcription rate" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function -def: "A sequence variant which causes gain of polypeptide function." [SO:ke] -synonym: "polypeptide gain of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function -def: "A sequence variant that causes the loss of a polypeptide function." [SO:ke] -synonym: "polypeptide loss of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_structure_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located with a regulatory region such as a promoter." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_change_in_transcript -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex_indel" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT [] -synonym: "stop_lost" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001581 -name: codon_variant -def: "A sequence variant that changes at least one base in a codon." [SO:ke] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:56Z - -[Term] -id: SO:0001582 -name: initiator_codon_change -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiator codon change" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: non_synonymous_codon -alt_id: SO:0001584 -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different amino acid." [SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. -synonym: "missense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense_codon" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http:http\://en.wikipedia.org/wiki/Missense_mutation "wiki" -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT [] -synonym: "stop_gained" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001588 -name: synonymous_codon -def: "A sequence variant whereby a base of a codon is changed, but there is no resulting change to the encoded amino acid." [SO:ke] -comment: EBI term: Synonymous SNPs - In coding sequence, not resulting in an amino acid change (i.e. silent mutation).\nThis term is sometimes used synonomously with the more general term 'silent mutation', although a silent mutation may occur in non coding sequence. The best practice is to annotate to the most specific term. -synonym: "coding-synon" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "silent mutation" RELATED [] -synonym: "silent substitution" RELATED [] -synonym: "silent_mutation" RELATED [] -synonym: "synonymous codon" EXACT [] -synonym: "synonymous_coding" EXACT ebi_variants [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: http://en.wikipedia.org/wiki/Synonymous_mutation -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -is_a: SO:1000132 ! sequence_variant_effect -created_by: kareneilbeck -creation_date: 2010-03-22T02:39:38Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -synonym: "terminator codon variant" EXACT [] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001625 -name: terminal_codon_variant -def: "A codon variant that changes at least one base of the last codon of the transcript." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:49:55Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 basepairs of an intron. -synonym: "essential_splice_site" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http\://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! implied link automatically realized ! epigenetically_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! implied link automatically realized ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! implied link automatically realized ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! implied link automatically realized ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000285 ! implied link automatically realized ! foreign_gene -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! implied link automatically realized ! transposable_element_gene -is_a: SO:0000281 ! implied link automatically realized ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! implied link automatically realized ! engineered_gene -is_a: SO:0000287 ! implied link automatically realized ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! implied link automatically realized ! repeat_region -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! implied link automatically realized ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! implied link automatically realized ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! implied link automatically realized ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! implied link automatically realized ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! implied link automatically realized ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation.\n The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000665 ! implied link automatically realized ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! implied link automatically realized ! polycistronic_transcript -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! implied link automatically realized ! plasmid -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -is_a: SO:0001037 ! implied link automatically realized ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! implied link automatically realized ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! implied link automatically realized ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! implied link automatically realized ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! implied link automatically realized ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! implied link automatically realized ! dicistronic_transcript -is_a: SO:0000634 ! implied link automatically realized ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! implied link automatically realized ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! implied link automatically realized ! genome -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! implied link automatically realized ! engineered_plasmid -is_a: SO:0000768 ! implied link automatically realized ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! implied link automatically realized ! rescue_region -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! implied link automatically realized ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! implied link automatically realized ! transposable_element -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! implied link automatically realized ! foreign_transposable_element -is_a: SO:0000798 ! implied link automatically realized ! engineered_transposable_element -is_a: SO:0000805 ! implied link automatically realized ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! implied link automatically realized ! tag -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! implied link automatically realized ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! implied link automatically realized ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! implied link automatically realized ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! implied link automatically realized ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! implied link automatically realized ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000479 ! implied link automatically realized ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript -is_a: SO:0000929 ! implied link automatically realized ! edited_mRNA - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! implied link automatically realized ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -is_a: SO:0001720 ! implied link automatically realized ! epigenetically_modified_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! implied link automatically realized ! clone_insert -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000873 ! implied link automatically realized ! edited_transcript -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! implied link automatically realized ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! implied link automatically realized ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! implied link automatically realized ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! implied link automatically realized ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! implied link automatically realized ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! implied link automatically realized ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! implied link automatically realized ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! implied link automatically realized ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! implied link automatically realized ! mRNA -is_a: SO:0000994 ! implied link automatically realized ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! implied link automatically realized ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! implied link automatically realized ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:0001537 ! structural_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! implied link automatically realized ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! implied link automatically realized ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! implied link automatically realized ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! implied link automatically realized ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! implied link automatically realized ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! implied link automatically realized ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! implied link automatically realized ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! implied link automatically realized ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! implied link automatically realized ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! implied link automatically realized ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! implied link automatically realized ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! implied link automatically realized ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! implied link automatically realized ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant which alters a biological process or function." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "increased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A variant that changes editing of a transcript." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decrease_transcript_stability -def: "A sequence variant that decreases transcript stability." [SO:ke] -synonym: "decrease transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increase_transcript_stability -def: "A sequence variant that increases transcript stability." [SO:ke] -synonym: "increase transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increase_transcription_rate -def: "A sequence variant that increases the rate of transcription." [SO:ke] -synonym: "increase transcription rate" RELATED [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decrease_transcription_rate -def: "A sequence variant that decreases the rate of transcription." [SO:ke] -synonym: "decrease transcription rate" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function -def: "A sequence variant which causes gain of polypeptide function." [SO:ke] -synonym: "polypeptide gain of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function -def: "A sequence variant that causes the loss of a polypeptide function." [SO:ke] -synonym: "polypeptide loss of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_structure_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located with a regulatory region such as a promoter." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_change_in_transcript -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex_indel" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT [] -synonym: "stop_lost" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001581 -name: codon_variant -def: "A sequence variant that changes at least one base in a codon." [SO:ke] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:56Z - -[Term] -id: SO:0001582 -name: initiator_codon_change -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiator codon change" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: non_synonymous_codon -alt_id: SO:0001584 -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different amino acid." [SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. -synonym: "missense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense_codon" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http:http\://en.wikipedia.org/wiki/Missense_mutation "wiki" -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT [] -synonym: "stop_gained" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001588 -name: synonymous_codon -def: "A sequence variant whereby a base of a codon is changed, but there is no resulting change to the encoded amino acid." [SO:ke] -comment: EBI term: Synonymous SNPs - In coding sequence, not resulting in an amino acid change (i.e. silent mutation).\nThis term is sometimes used synonomously with the more general term 'silent mutation', although a silent mutation may occur in non coding sequence. The best practice is to annotate to the most specific term. -synonym: "coding-synon" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "silent mutation" RELATED [] -synonym: "silent substitution" RELATED [] -synonym: "silent_mutation" RELATED [] -synonym: "synonymous codon" EXACT [] -synonym: "synonymous_coding" EXACT ebi_variants [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: http://en.wikipedia.org/wiki/Synonymous_mutation -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:39:38Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -synonym: "terminator codon variant" EXACT [] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001625 -name: terminal_codon_variant -def: "A codon variant that changes at least one base of the last codon of the transcript." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:49:55Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. -synonym: "essential_splice_site" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http\://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! implied link automatically realized ! engineered_region -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001651 -name: inframe_codon_gain -def: "A sequence variant which gains a codon, and does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "inframe codon gain" RELATED [] -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:29:08Z - -[Term] -id: SO:0001652 -name: inframe_codon_loss -def: "A sequence variant which loses a codon, and does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "inframe codon loss" RELATED [] -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:29:35Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: CHiP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "CHiP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! implied link automatically realized ! epigenetically_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001907 ! feature_elongation -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, where the change may be longer than 3 bases, and at least one base of a codon is changed resulting in a codon that encodes for a different amino acid." [EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -synonym: "missense_variant" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non synonymous variant" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001906 ! feature_truncation -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mis-matches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: genomically_imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -synonym: "genomically imprinted" EXACT [] -synonym: "imprinted" BROAD [] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_adenine -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_DNA_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -synonym: "complementary DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -synonym: "mtDNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001907 ! feature_elongation -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved." [EBI:fc, EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non synonymous variant" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001906 ! feature_truncation -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mis-matches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: genomically_imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -synonym: "genomically imprinted" EXACT [] -synonym: "imprinted" BROAD [] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_adenine -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature -is_a: SO:0001962 ! modified_adenine - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "" RELATED [] -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_DNA_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -synonym: "transcription_start_site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -synonym: "coding_sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory region where transcription factor binding sites clustered to regulate various aspects of transcription activities. (CRMs can be located a few kb to hundred kb upstream of the basal promoter, in the coding sequence, within introns, or in the downstream 3'UTR sequences, as well as on different chromosome). A single gene can be regulated by multiple CRMs to give precise control of its spatial and temporal expression. CRMs function as nodes in large, intertwined regulatory network." [PMID:19660565, SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -synonym: "transcription factor module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -synonym: "complementary DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:0002007 ! MNV - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -synonym: "mtDNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [PMID:9679020, SO:regcreative] -subset: SOFA -synonym: "transcription-control region" EXACT [] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "ca_bind" EXACT BS [uniprot:feature_type] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss_variant -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -synonym: "intron gain variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the 2 base pair region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001907 ! feature_elongation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved." [EBI:fc, EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001906 ! feature_truncation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: non_coding_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! non_coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001969 ! coding_transcript_intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rDNA" EXACT [] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "np_bind" EXACT BS [uniprot:feature] -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -def: "DNA segment that ranges from about -250 to -40 relative to +1 of RNA transcription start site, where sequence specific DNA-binding transcription factors binds, such as Sp1, CTF (CCAAT-binding transcription factor), and CBF (CCAAT-box binding factor)." [PMID:12515390, PMID:9679020, SO:ml] -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001954 ! restriction_enzyme_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001954 ! restriction_enzyme_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mis-matches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H4histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: genomically_imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -synonym: "genomically imprinted" EXACT [] -synonym: "imprinted" BROAD [] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_adenine -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature -is_a: SO:0001962 ! modified_adenine - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -alt_id: SO:0000649 -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene (or the product of other non coding RNA genes. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (usually via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:11081512, PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -synonym: "small temporal RNA" EXACT [] -synonym: "stRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_DNA_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -synonym: "transcription_start_site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -synonym: "coding_sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -relationship: part_of SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendant of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -def: "An unregulated promoter that allows continuous expression." [SO:ke] -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -alt_id: SO:0000648 -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -synonym: "stRNA_primary_transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript -relationship: has_part SO:0001244 ! pre_miRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through inter-genomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the EMBL, DDBJ, GenBank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory region where transcription factor binding sites clustered to regulate various aspects of transcription activities. (CRMs can be located a few kb to hundred kb upstream of the basal promoter, in the coding sequence, within introns, or in the downstream 3'UTR sequences, as well as on different chromosome). A single gene can be regulated by multiple CRMs to give precise control of its spatial and temporal expression. CRMs function as nodes in large, intertwined regulatory network." [PMID:19660565, SO:SG] -comment: Requested by Stephen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -synonym: "transcription factor module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxicircles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -synonym: "complementary DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendant of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occurred after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognized by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:0002007 ! MNV - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with PharmGKB. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -synonym: "mtDNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: miRtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain -relationship: has_part SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular mobility, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [PMID:9679020, SO:regcreative] -subset: SOFA -synonym: "transcription-control region" EXACT [] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "ANNOVAR:unknown" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "sequence variant" EXACT [] -synonym: "VAAST:sequence_variant" EXACT VAR [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "ca_bind" EXACT BS [uniprot:feature_type] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corresponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occurring events such as polymorphisms and alternative splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin structure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -alt_id: SO:0001270 -synonym: "miRNA gene" EXACT [] -synonym: "stRNA gene" EXACT [] -synonym: "stRNA_gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000571 ! miRNA_encoding -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a modified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidine amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that base pairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence, PMID:7785338, PMID:8005434] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. \n\nFrom Janos Demeter: The only region shared by all chromosome ends, the X element core sequence is a small conserved element (~475 bp) that contains an ARS sequence and in most cases an Abf1p binding site. Between these is a GC-rich region nearly identical to the meiosis-specific regulatory sequence URS1. -synonym: "X element" RELATED [] -synonym: "X element core sequence" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -synonym: "snpEff:GENE" EXACT VAR [] -synonym: "VAAST:gene_variant" EXACT VAR [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "snpEff:SYNONYMOUS_STOP" EXACT VAR [] -synonym: "stop retained variant" EXACT [] -synonym: "VAAST:stop_retained" EXACT VAR [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss_variant -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -synonym: "snpEff:EXON_DELETED" EXACT VAR [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain_variant -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -synonym: "intron gain variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "snpEff:SPLICE_SITE_ACCEPTOR" EXACT VAR [] -synonym: "splice acceptor variant" EXACT [] -synonym: "VAAST:splice_acceptor_variant" EXACT VAR [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the 2 base pair region at the 5' end of an intron." [SO:ke] -synonym: "snpEff:SPLICE_SITE_DONOR" EXACT VAR [] -synonym: "splice donor variant" EXACT [] -synonym: "VAAST:splice_donor_variant" EXACT VAR [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "snpEff:TRANSCRIPT" EXACT VAR [] -synonym: "transcript variant" EXACT [] -synonym: "VAAST:transcript_variant" EXACT VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "ANNOVAR:stoploss" EXACT VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "snpEff:STOP_LOST" EXACT VAR [] -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "VAAST:stop_lost" EXACT VAR [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001907 ! feature_elongation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -synonym: "snpEff:CDS" EXACT VAR [] -synonym: "snpEff:CODON_CHANGE" RELATED VAR [] -synonym: "VAAST:coding_sequence_variant" EXACT VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -comment: This is being used to annotate changes to the first codon of a transcript, when the first annotated codon is not to methionine. A variant is predicted to change the first amino acid of a translation irrespective of the fact that the underlying codon is an AUG. As such for transcripts with an incomplete CDS (sequence does not start with an AUG), it is still called. -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved." [EBI:fc, EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "ANNOVAR:nonsynonymous SNV" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -synonym: "snpEff:NON_SYSNONYMOUS_CODING" EXACT VAR [] -synonym: "VAAST:non_synonymous_codon" RELATED VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "ANNOVAR:stopgain" EXACT VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "snpEFF:STOP_GAINED" EXACT VAR [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "VAAST:stop_gained" EXACT VAR [] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001906 ! feature_truncation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "ANNOVAR:frameshift block substitution" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "snpEff:FRAME_SHIFT" EXACT VAR [] -synonym: "VAAST:frameshift_variant" EXACT VAR [] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -synonym: "VAAST:amino_acid_substitution" EXACT VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: non_coding_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "ANNOVAR:ncRNA" RELATED VAR [http:http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! non_coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "ANNOVAR:UTR5" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "snpEff:UTR_5_PRIME" EXACT VAR [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "VAAST:five_prime_UTR_variant" EXACT VAR [] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "ANNOVAR:UTR3" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "snpEff:UTR_3_PRIME" EXACT VAR [] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "VAAST:three_prime_UTR_variant" EXACT VAR [] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "ANNOVAR:intronic" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "snpEff:INTRON" EXACT VAR [] -synonym: "VAAST:intron_variant" EXACT VAR [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "ANNOVAR:intergenic" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -synonym: "snpEff:INTERGENIC" EXACT VAR [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "ANNOVAR:splicing" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "splice region variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "VAAST:splice_region_variant" EXACT VAR [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "ANNOVAR:upstream" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "snpEff:UPSTREAM" EXACT VAR [] -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "ANNOVAR:downstream" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "downstream gene variant" EXACT [] -synonym: "snpEff:DOWNSTREAM" EXACT VAR [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rDNA" EXACT [] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "ANNOVAR:nonframeshift block substitution" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -synonym: "VAAST:inframe_variant" EXACT VAR [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "np_bind" EXACT BS [uniprot:feature] -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -def: "DNA segment that ranges from about -250 to -40 relative to +1 of RNA transcription start site, where sequence specific DNA-binding transcription factors binds, such as Sp1, CTF (CCAAT-binding transcription factor), and CBF (CCAAT-box binding factor)." [PMID:12515390, PMID:9679020, SO:ml] -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001954 ! restriction_enzyme_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001954 ! restriction_enzyme_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mismatches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H4histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36 -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located between the promoter and a structural gene that causes partial termination of transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000147 -name: exon -def: "A region of the genome that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -def: "The sequence that is deleted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "donor" RELATED [] -synonym: "donor_splice_site" RELATED [] -synonym: "splice_donor_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "acceptor" RELATED [] -synonym: "acceptor_splice_site" RELATED [] -synonym: "splice_acceptor_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000167 -name: promoter -def: "The region on a DNA molecule involved in RNA polymerase binding to initiate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "The primary (initial, unprocessed) transcript; includes five_prime_clip (SO:0000555), five_prime_untranslated_region (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) and three_prime_ untranslated_region (three_prime_UTR), and three_prime_clip (SO:0000557)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "precursor_RNA" RELATED [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction_fragment_length_polymorphism" RELATED [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated_region" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "five_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "three_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone processing to remove parts such as introns and transcribed_spacer_regions." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: mRNA does not contain introns as it is a processd_transcript.nThe equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. -subset: SOFA -synonym: "messenger_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds to a transcription factor." [SO:ke] -subset: SOFA -synonym: "transcription_factor_binding_site" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER" [SO:ma, SO:rb] -comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised. -subset: SOFA -synonym: "open_reading_frame" RELATED [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, ISBN:0198506732] -subset: SOFA -synonym: "ribsomal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. tRNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). tRNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -subset: SOFA -synonym: "transfer_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing" [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, PMID:11733745] -subset: SOFA -synonym: "small_nuclear_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -subset: SOFA -synonym: "small_nucleolar_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. miRNAs are produced from precursor molecules (SO:0000647) that can form local hairpin strcutures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. miRNAs may trigger the cleavage of their target molecules oract as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. Example: GCTGA-----TCAGC." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: modified base: -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG_island" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The site where transcription begins." [SO:ke] -subset: SOFA -synonym: "TSS" RELATED [] -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding_sequence" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence_tag_site" RELATED [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding_conserved_region" RELATED [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of long DNA molecule." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -subset: SOFA -synonym: "expressed_sequence_tag" RELATED [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http:rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http:http\://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=1 2409455&dopt=Abstract] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snRNA -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "18S_rRNA -A large polynucleotide which functions as a part of the small subunit of the ribosome" [SO:ke] -subset: SOFA -synonym: "16S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000409 -name: binding_site -def: "A region on the surface of a molecule that may interact with another molecule." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequences differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -def: "The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "signal peptide coding sequence" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000419 -name: mature_peptide -def: "The coding sequence for the mature or final peptide or protein product following post-translational modification." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously_replicating_sequence" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts ofrepetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entitity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendent of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path.SO:0000472." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding_primary_transcript" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continous piece of sequence similar to the 'virtual contig' concept of ensembl." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal_recognition_particle_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a perfect duplex (except for the oligoU tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch_point" RELATED [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The site where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenence of the end," [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm#s] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000627 -name: insulator -subset: SOFA -synonym: "insulator_element" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "Small RNA molecule that is the product of a longerexogenous or endogenous dsRNA, which is either a bimolecular duplexe or very longhairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulatefrom both strands of the dsRNA. sRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small_interfering_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small_temporal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "23S_rRNA" RELATED [] -synonym: "28S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: ncRNA is a processed_transcript so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed_repeat" RELATED [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence identified as having been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single_nucleotide_polymorphism" RELATED [] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006 -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" RELATED [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A junction refers to an interbase location of zero in a sequence." [SO:ke] -subset: SOFA -synonym: "boundary" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A locatable region of genomic sequence, corresponding to a unit of inheritance, which is associated with regulatory regions, transcribed regions and/or other functional sequence regions" [SO:rd] -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It does not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD.nAgust 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000724 -name: origin_of_transfer -def: "A region of a DNA molecule whre transfer is initiated during the process of conjugation or mobilization." [http:http\://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "oriT" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000725 -name: transit_peptide -def: "The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein: this domain is involved in post translational import of the protein into the organelle." [http:http\://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: Added to bring SO inline with the embl ddbj genbank feature table. -subset: SOFA -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_region -def: "A region of a chromosome" [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region og UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000839 -name: polypeptide_region -def: "A region of sequence that can be translated into polypeptide sequence. This sequence can be reprsenseted as nucleotide or aminoacid. This sequence must be part of an mRNA sequence." [SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A mutation event where a single DNA nucleotide changes into another nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA -domain: SO:0000110 ! located_sequence_feature -range: SO:0000110 ! located_sequence_feature -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of.ninverse is collection_of.nWinston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -subset: SOFA -is_transitive: true - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_2.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_2.obo deleted file mode 100644 index 802579112..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_2.obo +++ /dev/null @@ -1,1723 +0,0 @@ -format-version: 1.2 -date: 30:08:2007 13:40 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.101 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: " small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It is also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind hereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: " transposon" EXACT [] - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral sequence" RELATED [] - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: Gene:. -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000147 -name: exon -def: "A region that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nThe region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone the necessary modifications for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processd_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SO:ma, SO:rb] -comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: " ribosomal ribonucleic acid" EXACT [] -synonym: "ribsomal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: " ori" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The site where transcription begins." [SO:ke] -subset: SOFA -synonym: "TSS" EXACT [] -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5. 8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5. 8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5. 8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snRNA -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide which functions as a part of the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S rRNA" RELATED [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequences differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminal that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "signal peptide coding sequence" EXACT [] -synonym: "signal_peptide" EXACT [] - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The extent of a polypeptide chain in the mature protein." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" EXACT [] -synonym: "mature peptide" RELATED [] -synonym: "mature_protein_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendent of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added) oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The site where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm] -subset: SOFA - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed repeat" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nA gene may be considered as a unit of inheritance. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminal of the peptide that directs the protein to an organelle (chloroplast, mitochonrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit_peptide" EXACT [] - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a protein." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" EXACT [] -synonym: "positional polypeptide feature" EXACT [] -synonym: "region or site annotation" EXACT [] -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide which functions as a part of the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S rRNA" RELATED [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "23S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -subset: SOFA -synonym: "25S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA -domain: SO:0000110 ! sequence_feature -range: SO:0000110 ! sequence_feature - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: BS -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of s=ome instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_3.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_3.obo deleted file mode 100644 index d5be4fbeb..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_3.obo +++ /dev/null @@ -1,1815 +0,0 @@ -format-version: 1.2 -date: 30:01:2008 17:03 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.101 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: " small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: " transposon" EXACT [] - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral sequence" RELATED [] - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: Gene:. -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nThe region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -subset: SOFA - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -subset: SOFA - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon -relationship: part_of SO:0000655 ! ncRNA - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone the necessary modifications for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SO:ma, SO:rb] -comment: The definition was modified by Rama. This term is now basically the same as a CDS. This must be revised. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: " ori" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "TSS" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -intersection_of: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -intersection_of: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "signal peptide coding sequence" EXACT [] -synonym: "signal_peptide" EXACT [] - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The extent of a polypeptide chain in the mature protein." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" EXACT [] -synonym: "mature peptide" RELATED [] -synonym: "mature_protein_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three_prime_exon_noncoding_region" EXACT [] - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five_prime_exon_noncoding_region" EXACT [] - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -intersection_of: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm] -subset: SOFA - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed repeat" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nA gene may be considered as a unit of inheritance. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit_peptide" EXACT [] - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a protein." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" EXACT [] -synonym: "positional polypeptide feature" EXACT [] -synonym: "region or site annotation" EXACT [] -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as\npart of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_4.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_4.obo deleted file mode 100644 index 2001126f6..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_4.obo +++ /dev/null @@ -1,2539 +0,0 @@ -format-version: 1.2 -date: 09:10:2009 15:48 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta1 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequenece_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinary. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a placeholder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon'is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with astop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000001 ! region - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_4_1.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_4_1.obo deleted file mode 100644 index 7379465dd..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_4_1.obo +++ /dev/null @@ -1,2539 +0,0 @@ -format-version: 1.2 -date: 02:12:2009 09:48 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta1 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequenece_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinary. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a placeholder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon'is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with astop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000001 ! region - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_4_2.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_4_2.obo deleted file mode 100644 index fae3c9cd5..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_4_2.obo +++ /dev/null @@ -1,2565 +0,0 @@ -format-version: 1.2 -date: 08:04:2010 11:18 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta3 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a Eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_4_3.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_4_3.obo deleted file mode 100644 index d719cbfcb..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_4_3.obo +++ /dev/null @@ -1,2578 +0,0 @@ -format-version: 1.2 -date: 01:06:2010 10:46 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta3 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a Eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript -intersection_of: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron -intersection_of: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_4_4.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_4_4.obo deleted file mode 100644 index 48fa31fa3..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_4_4.obo +++ /dev/null @@ -1,2703 +0,0 @@ -format-version: 1.2 -date: 02:11:2010 13:22 -auto-generated-by: OBO-Edit 2.1-beta3 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a Eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! implied link automatically realized ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! implied link automatically realized ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! implied link automatically realized ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation.\n The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! implied link automatically realized ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! implied link automatically realized ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z -modified_by: kareneilbeck -modification_date: 2010-11-02T01:19:49Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z -modified_by: kareneilbeck -modification_date: 2010-11-02T01:22:15Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z -modified_by: kareneilbeck -modification_date: 2010-11-02T01:20:37Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A region of sequence that is involved in the control of a biological process." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence. X adjacent_to Y iff X and Y share a boundary but do not overlap." [PMID:20226267, SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: connects_on -name: connects_on -def: "X connects_on Y, Z, R iff whenever Z is on a R, X is adjacent to a Y and adjacent to a Z." [PMID:20226267] -comment: Example: A splice_junction connects_on exon, exon, mature_transcript. -created_by: kareneilbeck -creation_date: 2010-10-14T01:38:51Z - -[Typedef] -id: contained_by -name: contained_by -def: "X contained_by Y iff X starts after start of Y and X ends before end of Y." [PMID:20226267] -comment: The inverse is contains. Example: intein contained_by immature_peptide_region. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:26:16Z - -[Typedef] -id: contains -name: contains -def: "The inverse of contained_by." [PMID:20226267] -comment: Example: pre_miRNA contains miRNA_loop. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:32:15Z - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: disconnected_from -name: disconnected_from -def: "X is disconnected_from Y iff it is not the case that X overlaps Y." [PMID:20226267] -comment: Example: intron disconnected from exon \{on primary_transcript}. -created_by: kareneilbeck -creation_date: 2010-10-14T01:42:10Z - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: finished_by -name: finished_by -def: "Xy is finished_by Y if Y part of X, and X and Y share a 3' boundary." [PMID:20226267] -comment: Example CDS finished_by stop_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:45:45Z - -[Typedef] -id: finishes -name: finishes -def: "X finishes Y if X is part_of Y and X and Y share a 3' or C terminal boundary." [PMID:20226267] -comment: Example: stop_codon finishes CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T02:17:53Z - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_integral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -is_a: has_part ! has_part -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -is_a: part_of ! part_of -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: is_consecutive_sequence_of -name: is_consecutive_sequence_of -def: "R is_consecutive_sequence_of R iff every instance of R is equivalent to a collection of instances of U:u1, u2, un, such that no pair of ux uy is overlapping and for all ux, it is adjacent to ux-1 and ux+1, with the exception of the initial and terminal u1,and un (which may be identical)." [PMID:20226267] -comment: Example: region is consecutive_sequence of base. -created_by: kareneilbeck -creation_date: 2010-10-14T02:19:48Z - -[Typedef] -id: maximally_overlaps -name: maximally_overlaps -def: "A maximally_overlaps X iff all parts of A (including A itself) overlap both A and Y." [PMID:20226267] -comment: Example: non_coding_region_of_exon maximally_overlaps the intersections of exon and UTR. -created_by: kareneilbeck -creation_date: 2010-10-14T01:34:48Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: overlaps -name: overlaps -def: "X overlaps Y iff there exists some Z such that Z contained_by X and Z contained_by Y." [PMID:20226267] -comment: Example: coding_exon overlaps CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:33:15Z - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: started_by -name: started_by -def: "X is strted_by Y if Y is part_of X and X and Y share a 5' boundary." [PMID:20226267] -comment: Example: CDS started_by start_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:43:55Z - -[Typedef] -id: starts -name: starts -def: "X starts Y if X is part of Y, and A and Y share a 5' or N-terminal boundary." [PMID:20226267] -comment: Example: start_codon starts CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:47:53Z - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_5.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_5.obo deleted file mode 100644 index bece342bd..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_5.obo +++ /dev/null @@ -1,3052 +0,0 @@ -format-version: 1.2 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: DBVAR "database of genomic structural variation" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: dbvar "DBVAR" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -ontology: so-xp/subsets/SOFA -default-namespace: sequence - -[Term] -id: SO:0000000 -name: Sequence_Ontology -namespace: sequence -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -namespace: sequence -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000699 ! junction - -[Term] -id: SO:0000004 -name: interior_coding_exon -namespace: sequence -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -namespace: sequence -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -namespace: sequence -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -namespace: sequence -def: "One of a pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "mate pair" EXACT [] -synonym: "read-pair" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000149 ! contig -relationship: part_of SO:0001790 ! paired_end_fragment - -[Term] -id: SO:0000013 -name: scRNA -namespace: sequence -def: "A small non coding RNA sequence, present in the cytoplasm." [SO:ke] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000038 -name: match_set -namespace: sequence -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -namespace: sequence -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -namespace: sequence -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -namespace: sequence -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -namespace: sequence -def: "A binding site that, of a nucleotide molecule, that interacts selectively and non-covalently with polypeptide residues of a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -namespace: sequence -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -namespace: sequence -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -namespace: sequence -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -namespace: sequence -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -namespace: sequence -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -namespace: sequence -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -namespace: sequence -def: "An oligo to which new deoxyribonucleotides can be added by DNA polymerase." [SO:ke] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -namespace: sequence -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -namespace: sequence -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -namespace: sequence -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -namespace: sequence -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000140 -name: attenuator -namespace: sequence -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -namespace: sequence -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -namespace: sequence -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -namespace: sequence -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -namespace: sequence -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -namespace: sequence -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -namespace: sequence -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -namespace: sequence -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -namespace: sequence -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_A -namespace: sequence -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -namespace: sequence -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -namespace: sequence -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -namespace: sequence -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -namespace: sequence -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -namespace: sequence -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -namespace: sequence -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -namespace: sequence -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -namespace: sequence -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -namespace: sequence -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -namespace: sequence -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -namespace: sequence -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -namespace: sequence -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -namespace: sequence -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -namespace: sequence -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -namespace: sequence -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -namespace: sequence -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -namespace: sequence -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -namespace: sequence -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -namespace: sequence -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -namespace: sequence -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -namespace: sequence -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -namespace: sequence -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -namespace: sequence -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -namespace: sequence -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -namespace: sequence -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -namespace: sequence -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -namespace: sequence -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -namespace: sequence -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -namespace: sequence -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000289 -name: microsatellite -namespace: sequence -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -namespace: sequence -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -namespace: sequence -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -namespace: sequence -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_base -namespace: sequence -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -namespace: sequence -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -namespace: sequence -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -namespace: sequence -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -namespace: sequence -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -namespace: sequence -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -namespace: sequence -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -namespace: sequence -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -namespace: sequence -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -namespace: sequence -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -namespace: sequence -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -namespace: sequence -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -namespace: sequence -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -namespace: sequence -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -namespace: sequence -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -namespace: sequence -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -namespace: sequence -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -namespace: sequence -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -namespace: sequence -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -namespace: sequence -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -namespace: sequence -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -namespace: sequence -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -namespace: sequence -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -namespace: sequence -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -namespace: sequence -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -namespace: sequence -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -namespace: sequence -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -namespace: sequence -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -namespace: sequence -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -namespace: sequence -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -namespace: sequence -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -namespace: sequence -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -namespace: sequence -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -namespace: sequence -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -namespace: sequence -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -namespace: sequence -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -namespace: sequence -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -namespace: sequence -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -namespace: sequence -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -namespace: sequence -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -namespace: sequence -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -namespace: sequence -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -namespace: sequence -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -namespace: sequence -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -namespace: sequence -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -namespace: sequence -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -namespace: sequence -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -namespace: sequence -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -namespace: sequence -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -namespace: sequence -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -namespace: sequence -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -namespace: sequence -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -namespace: sequence -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -namespace: sequence -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -namespace: sequence -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -namespace: sequence -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -namespace: sequence -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -namespace: sequence -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -namespace: sequence -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -namespace: sequence -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -namespace: sequence -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -namespace: sequence -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -namespace: sequence -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -namespace: sequence -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -namespace: sequence -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -namespace: sequence -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -namespace: sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -namespace: sequence -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -namespace: sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -namespace: sequence -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -namespace: sequence -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -namespace: sequence -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -namespace: sequence -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -namespace: sequence -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -namespace: sequence -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -namespace: sequence -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -namespace: sequence -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -namespace: sequence -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -namespace: sequence -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -namespace: sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -namespace: sequence -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -namespace: sequence -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -namespace: sequence -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -namespace: sequence -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -namespace: sequence -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -namespace: sequence -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -namespace: sequence -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -namespace: sequence -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -namespace: sequence -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -namespace: sequence -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -namespace: sequence -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -namespace: sequence -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -namespace: sequence -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -namespace: sequence -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -namespace: sequence -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -namespace: sequence -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0001411 ! biological_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -namespace: sequence -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -namespace: sequence -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -namespace: sequence -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -namespace: sequence -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -namespace: sequence -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -namespace: sequence -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -namespace: sequence -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -namespace: sequence -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -namespace: sequence -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -namespace: sequence -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -namespace: sequence -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -namespace: sequence -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -namespace: sequence -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -namespace: sequence -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -namespace: sequence -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -namespace: sequence -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -namespace: sequence -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -namespace: sequence -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -namespace: sequence -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -namespace: sequence -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -namespace: sequence -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -namespace: sequence -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -namespace: sequence -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -namespace: sequence -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -namespace: sequence -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -namespace: sequence -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000727 -name: CRM -namespace: sequence -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -namespace: sequence -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -namespace: sequence -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -namespace: sequence -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -namespace: sequence -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -namespace: sequence -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -namespace: sequence -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -namespace: sequence -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -namespace: sequence -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -namespace: sequence -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -namespace: sequence -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -namespace: sequence -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -namespace: sequence -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -namespace: sequence -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -namespace: sequence -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -namespace: sequence -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -namespace: sequence -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -namespace: sequence -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -namespace: sequence -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -namespace: sequence -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -namespace: sequence -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -namespace: sequence -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -namespace: sequence -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -namespace: sequence -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -namespace: sequence -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -namespace: sequence -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -namespace: sequence -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -namespace: sequence -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -namespace: sequence -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -namespace: sequence -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -namespace: sequence -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -namespace: sequence -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -namespace: sequence -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -namespace: sequence -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -namespace: sequence -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -namespace: sequence -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001647 -name: kozak_sequence -namespace: sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -namespace: sequence -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -namespace: sequence -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001683 -name: sequence_motif -namespace: sequence -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -namespace: sequence -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001790 -name: paired_end_fragment -namespace: sequence -def: "An assembly region that has been sequenced from both ends resulting in a read_pair (mate_pair)." [SO:ke] -subset: SOFA -synonym: "paired end fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -created_by: kareneilbeck -creation_date: 2011-04-14T01:48:20Z - -[Term] -id: SO:0005836 -name: regulatory_region -namespace: sequence -def: "A region of sequence that is involved in the control of a biological process." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -namespace: sequence -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -namespace: sequence -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -namespace: sequence -def: "A sequence alteration where the length of the change in the variant is the same as that of the reference." [SO:ke] -subset: SOFA -xref: loinc:LA6690-7 "Substitution" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -namespace: sequence -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -namespace: sequence -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -namespace: sequence -def: "A continuous nucleotide sequence is inverted in the same position." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: DBVAR -subset: SOFA -synonym: "inversion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -xref: loinc:LA6689-9 "Inversion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -namespace: sequence -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -namespace: sequence -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -namespace: sequence -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence. X adjacent_to Y iff X and Y share a boundary but do not overlap." [PMID:20226267, SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -namespace: sequence -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -namespace: sequence -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: connects_on -name: connects_on -namespace: sequence -def: "X connects_on Y, Z, R iff whenever Z is on a R, X is adjacent to a Y and adjacent to a Z." [PMID:20226267] -comment: Example: A splice_junction connects_on exon, exon, mature_transcript. -created_by: kareneilbeck -creation_date: 2010-10-14T01:38:51Z - -[Typedef] -id: contained_by -name: contained_by -namespace: sequence -def: "X contained_by Y iff X starts after start of Y and X ends before end of Y." [PMID:20226267] -comment: The inverse is contains. Example: intein contained_by immature_peptide_region. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:26:16Z - -[Typedef] -id: contains -name: contains -namespace: sequence -def: "The inverse of contained_by." [PMID:20226267] -comment: Example: pre_miRNA contains miRNA_loop. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:32:15Z - -[Typedef] -id: derives_from -name: derives_from -namespace: sequence -subset: SOFA -is_transitive: true - -[Typedef] -id: disconnected_from -name: disconnected_from -namespace: sequence -def: "X is disconnected_from Y iff it is not the case that X overlaps Y." [PMID:20226267] -comment: Example: intron disconnected from exon {on primary_transcript}. -created_by: kareneilbeck -creation_date: 2010-10-14T01:42:10Z - -[Typedef] -id: edited_from -name: edited_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -namespace: sequence -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -namespace: sequence -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: finished_by -name: finished_by -namespace: sequence -def: "Xy is finished_by Y if Y part of X, and X and Y share a 3' boundary." [PMID:20226267] -comment: Example CDS finished_by stop_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:45:45Z - -[Typedef] -id: finishes -name: finishes -namespace: sequence -def: "X finishes Y if X is part_of Y and X and Y share a 3' or C terminal boundary." [PMID:20226267] -comment: Example: stop_codon finishes CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T02:17:53Z - -[Typedef] -id: gained -name: gained -namespace: sequence -def: "X gained Y if X is a variant_of X' and Y part of X but not X'." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may gain a stop codon not present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:51:10Z - -[Typedef] -id: genome_of -name: genome_of -namespace: sequence - -[Typedef] -id: guided_by -name: guided_by -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_integral_part -name: has_integral_part -namespace: sequence -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -is_a: has_part ! has_part -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin -namespace: sequence - -[Typedef] -id: has_part -name: has_part -namespace: sequence -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -namespace: sequence -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -namespace: sequence -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -is_a: part_of ! part_of -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: is_consecutive_sequence_of -name: is_consecutive_sequence_of -namespace: sequence -def: "R is_consecutive_sequence_of R iff every instance of R is equivalent to a collection of instances of U:u1, u2, un, such that no pair of ux uy is overlapping and for all ux, it is adjacent to ux-1 and ux+1, with the exception of the initial and terminal u1,and un (which may be identical)." [PMID:20226267] -comment: Example: region is consecutive_sequence of base. -created_by: kareneilbeck -creation_date: 2010-10-14T02:19:48Z - -[Typedef] -id: lost -name: lost -namespace: sequence -def: "X lost Y if X is a variant_of X' and Y part of X' but not X." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may have lost a stop codon present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:53:16Z - -[Typedef] -id: maximally_overlaps -name: maximally_overlaps -namespace: sequence -def: "A maximally_overlaps X iff all parts of A (including A itself) overlap both A and Y." [PMID:20226267] -comment: Example: non_coding_region_of_exon maximally_overlaps the intersections of exon and UTR. -created_by: kareneilbeck -creation_date: 2010-10-14T01:34:48Z - -[Typedef] -id: member_of -name: member_of -namespace: sequence -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -namespace: sequence -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: overlaps -name: overlaps -namespace: sequence -def: "X overlaps Y iff there exists some Z such that Z contained_by X and Z contained_by Y." [PMID:20226267] -comment: Example: coding_exon overlaps CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:33:15Z - -[Typedef] -id: paralogous_to -name: paralogous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: sequence -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -namespace: sequence -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of -namespace: sequence - -[Typedef] -id: processed_from -name: processed_from -namespace: sequence -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -namespace: sequence -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: sequence_of -name: sequence_of -namespace: sequence - -[Typedef] -id: similar_to -name: similar_to -namespace: sequence -subset: SOFA -is_symmetric: true - -[Typedef] -id: started_by -name: started_by -namespace: sequence -def: "X is strted_by Y if Y is part_of X and X and Y share a 5' boundary." [PMID:20226267] -comment: Example: CDS started_by start_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:43:55Z - -[Typedef] -id: starts -name: starts -namespace: sequence -def: "X starts Y if X is part of Y, and A and Y share a 5' or N-terminal boundary." [PMID:20226267] -comment: Example: start_codon starts CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:47:53Z - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -namespace: sequence -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -namespace: sequence -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -namespace: sequence -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -namespace: sequence -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -namespace: sequence -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_5_1.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_5_1.obo deleted file mode 100644 index b38778092..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_5_1.obo +++ /dev/null @@ -1,3056 +0,0 @@ -format-version: 1.2 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: DBVAR "database of genomic structural variation" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: dbvar "DBVAR" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -ontology: so-xp/subsets/SOFA -default-namespace: sequence - -[Term] -id: SO:0000000 -name: Sequence_Ontology -namespace: sequence -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -namespace: sequence -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000699 ! junction - -[Term] -id: SO:0000004 -name: interior_coding_exon -namespace: sequence -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -namespace: sequence -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -namespace: sequence -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -namespace: sequence -def: "One of a pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "mate pair" EXACT [] -synonym: "read-pair" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000149 ! contig -relationship: part_of SO:0001790 ! paired_end_fragment - -[Term] -id: SO:0000013 -name: scRNA -namespace: sequence -def: "A small non coding RNA sequence, present in the cytoplasm." [SO:ke] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000038 -name: match_set -namespace: sequence -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -namespace: sequence -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -namespace: sequence -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -namespace: sequence -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -namespace: sequence -def: "A binding site that, of a nucleotide molecule, that interacts selectively and non-covalently with polypeptide residues of a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -namespace: sequence -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -namespace: sequence -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -namespace: sequence -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -namespace: sequence -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -namespace: sequence -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -namespace: sequence -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -namespace: sequence -def: "An oligo to which new deoxyribonucleotides can be added by DNA polymerase." [SO:ke] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -namespace: sequence -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_cytosine -namespace: sequence -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -synonym: "methylated_C" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -namespace: sequence -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -namespace: sequence -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -namespace: sequence -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -namespace: sequence -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -namespace: sequence -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -namespace: sequence -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -namespace: sequence -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -namespace: sequence -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -namespace: sequence -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -namespace: sequence -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -namespace: sequence -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_adenine -namespace: sequence -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000162 -name: splice_site -namespace: sequence -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -namespace: sequence -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -namespace: sequence -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -namespace: sequence -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -namespace: sequence -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -namespace: sequence -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -namespace: sequence -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -namespace: sequence -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -namespace: sequence -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -namespace: sequence -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -namespace: sequence -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -namespace: sequence -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -namespace: sequence -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -namespace: sequence -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -namespace: sequence -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -namespace: sequence -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -namespace: sequence -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -namespace: sequence -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -namespace: sequence -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -namespace: sequence -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -namespace: sequence -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -namespace: sequence -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -namespace: sequence -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -namespace: sequence -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -namespace: sequence -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -namespace: sequence -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -namespace: sequence -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -namespace: sequence -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -namespace: sequence -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000289 -name: microsatellite -namespace: sequence -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -namespace: sequence -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -namespace: sequence -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -namespace: sequence -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_DNA_base -namespace: sequence -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -namespace: sequence -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -namespace: sequence -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -namespace: sequence -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -namespace: sequence -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -namespace: sequence -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -namespace: sequence -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -namespace: sequence -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -namespace: sequence -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -namespace: sequence -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -namespace: sequence -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -namespace: sequence -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -namespace: sequence -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -namespace: sequence -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -namespace: sequence -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -namespace: sequence -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -namespace: sequence -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -namespace: sequence -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -namespace: sequence -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -namespace: sequence -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -namespace: sequence -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -namespace: sequence -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -namespace: sequence -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -namespace: sequence -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -namespace: sequence -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -namespace: sequence -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -namespace: sequence -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -namespace: sequence -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -namespace: sequence -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -namespace: sequence -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -namespace: sequence -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -namespace: sequence -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -namespace: sequence -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -namespace: sequence -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -namespace: sequence -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -namespace: sequence -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -namespace: sequence -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -namespace: sequence -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -namespace: sequence -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -namespace: sequence -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -namespace: sequence -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -namespace: sequence -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -namespace: sequence -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -namespace: sequence -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -namespace: sequence -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -namespace: sequence -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -namespace: sequence -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -namespace: sequence -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -namespace: sequence -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -namespace: sequence -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -namespace: sequence -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -namespace: sequence -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -namespace: sequence -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -namespace: sequence -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -namespace: sequence -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -namespace: sequence -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -namespace: sequence -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -namespace: sequence -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -namespace: sequence -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -namespace: sequence -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -namespace: sequence -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -namespace: sequence -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -namespace: sequence -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -namespace: sequence -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -namespace: sequence -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -namespace: sequence -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -namespace: sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -namespace: sequence -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -namespace: sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -namespace: sequence -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -namespace: sequence -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -namespace: sequence -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -namespace: sequence -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -namespace: sequence -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -namespace: sequence -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -namespace: sequence -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -namespace: sequence -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -namespace: sequence -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -namespace: sequence -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -namespace: sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -namespace: sequence -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -namespace: sequence -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -namespace: sequence -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -namespace: sequence -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -namespace: sequence -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -namespace: sequence -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -namespace: sequence -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -namespace: sequence -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -namespace: sequence -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -namespace: sequence -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -namespace: sequence -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -namespace: sequence -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -namespace: sequence -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -namespace: sequence -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -namespace: sequence -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -namespace: sequence -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0001411 ! biological_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -namespace: sequence -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -namespace: sequence -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -namespace: sequence -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -namespace: sequence -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -namespace: sequence -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -namespace: sequence -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -namespace: sequence -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -namespace: sequence -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -namespace: sequence -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -namespace: sequence -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -namespace: sequence -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -namespace: sequence -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -namespace: sequence -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -namespace: sequence -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -namespace: sequence -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -namespace: sequence -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -namespace: sequence -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -namespace: sequence -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -namespace: sequence -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -namespace: sequence -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -namespace: sequence -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -namespace: sequence -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -namespace: sequence -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -namespace: sequence -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -namespace: sequence -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -namespace: sequence -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000727 -name: CRM -namespace: sequence -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -namespace: sequence -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -namespace: sequence -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -namespace: sequence -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -namespace: sequence -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -namespace: sequence -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -namespace: sequence -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -namespace: sequence -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -namespace: sequence -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -namespace: sequence -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -namespace: sequence -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -namespace: sequence -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -namespace: sequence -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -namespace: sequence -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -namespace: sequence -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -namespace: sequence -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -namespace: sequence -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -namespace: sequence -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -namespace: sequence -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -namespace: sequence -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -namespace: sequence -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -namespace: sequence -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -namespace: sequence -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -namespace: sequence -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -namespace: sequence -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -namespace: sequence -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -namespace: sequence -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -namespace: sequence -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -namespace: sequence -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -namespace: sequence -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -namespace: sequence -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -namespace: sequence -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -namespace: sequence -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -namespace: sequence -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -namespace: sequence -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -namespace: sequence -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001647 -name: kozak_sequence -namespace: sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -namespace: sequence -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -namespace: sequence -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001683 -name: sequence_motif -namespace: sequence -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -namespace: sequence -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001790 -name: paired_end_fragment -namespace: sequence -def: "An assembly region that has been sequenced from both ends resulting in a read_pair (mate_pair)." [SO:ke] -subset: SOFA -synonym: "paired end fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -created_by: kareneilbeck -creation_date: 2011-04-14T01:48:20Z - -[Term] -id: SO:0005836 -name: regulatory_region -namespace: sequence -def: "A region of sequence that is involved in the control of a biological process." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -namespace: sequence -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -namespace: sequence -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -namespace: sequence -def: "A sequence alteration where the length of the change in the variant is the same as that of the reference." [SO:ke] -subset: SOFA -xref: loinc:LA6690-7 "Substitution" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -namespace: sequence -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -namespace: sequence -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -namespace: sequence -def: "A continuous nucleotide sequence is inverted in the same position." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: DBVAR -subset: SOFA -synonym: "inversion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -xref: loinc:LA6689-9 "Inversion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -namespace: sequence -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -namespace: sequence -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -namespace: sequence -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence. X adjacent_to Y iff X and Y share a boundary but do not overlap." [PMID:20226267, SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -namespace: sequence -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -namespace: sequence -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: connects_on -name: connects_on -namespace: sequence -def: "X connects_on Y, Z, R iff whenever Z is on a R, X is adjacent to a Y and adjacent to a Z." [PMID:20226267] -comment: Example: A splice_junction connects_on exon, exon, mature_transcript. -created_by: kareneilbeck -creation_date: 2010-10-14T01:38:51Z - -[Typedef] -id: contained_by -name: contained_by -namespace: sequence -def: "X contained_by Y iff X starts after start of Y and X ends before end of Y." [PMID:20226267] -comment: The inverse is contains. Example: intein contained_by immature_peptide_region. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:26:16Z - -[Typedef] -id: contains -name: contains -namespace: sequence -def: "The inverse of contained_by." [PMID:20226267] -comment: Example: pre_miRNA contains miRNA_loop. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:32:15Z - -[Typedef] -id: derives_from -name: derives_from -namespace: sequence -subset: SOFA -is_transitive: true - -[Typedef] -id: disconnected_from -name: disconnected_from -namespace: sequence -def: "X is disconnected_from Y iff it is not the case that X overlaps Y." [PMID:20226267] -created_by: kareneilbeck -creation_date: 2010-10-14T01:42:10Z - -[Typedef] -id: edited_from -name: edited_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -namespace: sequence -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -namespace: sequence -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: finished_by -name: finished_by -namespace: sequence -def: "Xy is finished_by Y if Y part of X, and X and Y share a 3' boundary." [PMID:20226267] -comment: Example CDS finished_by stop_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:45:45Z - -[Typedef] -id: finishes -name: finishes -namespace: sequence -def: "X finishes Y if X is part_of Y and X and Y share a 3' or C terminal boundary." [PMID:20226267] -comment: Example: stop_codon finishes CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T02:17:53Z - -[Typedef] -id: gained -name: gained -namespace: sequence -def: "X gained Y if X is a variant_of X' and Y part of X but not X'." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may gain a stop codon not present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:51:10Z - -[Typedef] -id: genome_of -name: genome_of -namespace: sequence - -[Typedef] -id: guided_by -name: guided_by -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_integral_part -name: has_integral_part -namespace: sequence -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -is_a: has_part ! has_part -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin -namespace: sequence - -[Typedef] -id: has_part -name: has_part -namespace: sequence -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -namespace: sequence -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -namespace: sequence -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -is_a: part_of ! part_of -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: is_consecutive_sequence_of -name: is_consecutive_sequence_of -namespace: sequence -def: "R is_consecutive_sequence_of R iff every instance of R is equivalent to a collection of instances of U:u1, u2, un, such that no pair of ux uy is overlapping and for all ux, it is adjacent to ux-1 and ux+1, with the exception of the initial and terminal u1,and un (which may be identical)." [PMID:20226267] -comment: Example: region is consecutive_sequence of base. -created_by: kareneilbeck -creation_date: 2010-10-14T02:19:48Z - -[Typedef] -id: lost -name: lost -namespace: sequence -def: "X lost Y if X is a variant_of X' and Y part of X' but not X." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may have lost a stop codon present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:53:16Z - -[Typedef] -id: maximally_overlaps -name: maximally_overlaps -namespace: sequence -def: "A maximally_overlaps X iff all parts of A (including A itself) overlap both A and Y." [PMID:20226267] -comment: Example: non_coding_region_of_exon maximally_overlaps the intersections of exon and UTR. -created_by: kareneilbeck -creation_date: 2010-10-14T01:34:48Z - -[Typedef] -id: member_of -name: member_of -namespace: sequence -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -namespace: sequence -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: overlaps -name: overlaps -namespace: sequence -def: "X overlaps Y iff there exists some Z such that Z contained_by X and Z contained_by Y." [PMID:20226267] -comment: Example: coding_exon overlaps CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:33:15Z - -[Typedef] -id: paralogous_to -name: paralogous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: sequence -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -namespace: sequence -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of -namespace: sequence - -[Typedef] -id: processed_from -name: processed_from -namespace: sequence -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -namespace: sequence -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: sequence_of -name: sequence_of -namespace: sequence - -[Typedef] -id: similar_to -name: similar_to -namespace: sequence -subset: SOFA -is_symmetric: true - -[Typedef] -id: started_by -name: started_by -namespace: sequence -def: "X is strted_by Y if Y is part_of X and X and Y share a 5' boundary." [PMID:20226267] -comment: Example: CDS started_by start_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:43:55Z - -[Typedef] -id: starts -name: starts -namespace: sequence -def: "X starts Y if X is part of Y, and A and Y share a 5' or N-terminal boundary." [PMID:20226267] -comment: Example: start_codon starts CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:47:53Z - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -namespace: sequence -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -namespace: sequence -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -namespace: sequence -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -namespace: sequence -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -namespace: sequence -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_5_2.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_5_2.obo deleted file mode 100644 index f7c4ea15a..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_5_2.obo +++ /dev/null @@ -1,3062 +0,0 @@ -format-version: 1.2 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: DBVAR "database of genomic structural variation" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: dbvar "DBVAR" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -ontology: so-xp/subsets/SOFA -default-namespace: sequence - -[Term] -id: SO:0000000 -name: Sequence_Ontology -namespace: sequence -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -namespace: sequence -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000699 ! junction - -[Term] -id: SO:0000004 -name: interior_coding_exon -namespace: sequence -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -namespace: sequence -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -namespace: sequence -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -namespace: sequence -def: "One of a pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "mate pair" EXACT [] -synonym: "read-pair" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000149 ! contig -relationship: part_of SO:0001790 ! paired_end_fragment - -[Term] -id: SO:0000013 -name: scRNA -namespace: sequence -def: "A small non coding RNA sequence, present in the cytoplasm." [SO:ke] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000038 -name: match_set -namespace: sequence -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -namespace: sequence -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -namespace: sequence -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -namespace: sequence -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -namespace: sequence -def: "A binding site that, of a nucleotide molecule, that interacts selectively and non-covalently with polypeptide residues of a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -namespace: sequence -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -namespace: sequence -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -namespace: sequence -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -namespace: sequence -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -namespace: sequence -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -namespace: sequence -def: "Any extent of continuous biological sequence." [LAMHDI:mb, SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -namespace: sequence -def: "An oligo to which new deoxyribonucleotides can be added by DNA polymerase." [SO:ke] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -namespace: sequence -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_cytosine -namespace: sequence -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -synonym: "methylated_C" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -namespace: sequence -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -namespace: sequence -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -namespace: sequence -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -namespace: sequence -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -namespace: sequence -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -namespace: sequence -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -namespace: sequence -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -namespace: sequence -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -namespace: sequence -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -namespace: sequence -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -namespace: sequence -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_adenine -namespace: sequence -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000162 -name: splice_site -namespace: sequence -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -namespace: sequence -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -namespace: sequence -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -namespace: sequence -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -namespace: sequence -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -namespace: sequence -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -namespace: sequence -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -namespace: sequence -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -namespace: sequence -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -namespace: sequence -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -namespace: sequence -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -namespace: sequence -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -namespace: sequence -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -namespace: sequence -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -namespace: sequence -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -namespace: sequence -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -namespace: sequence -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -namespace: sequence -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -namespace: sequence -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "" RELATED [] -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -namespace: sequence -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -namespace: sequence -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -namespace: sequence -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -namespace: sequence -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -namespace: sequence -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -namespace: sequence -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -namespace: sequence -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -namespace: sequence -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -namespace: sequence -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -namespace: sequence -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000289 -name: microsatellite -namespace: sequence -def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -namespace: sequence -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -namespace: sequence -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -namespace: sequence -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_DNA_base -namespace: sequence -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -namespace: sequence -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -namespace: sequence -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -namespace: sequence -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -namespace: sequence -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -synonym: "transcription_start_site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -namespace: sequence -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -synonym: "coding_sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -namespace: sequence -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -namespace: sequence -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -namespace: sequence -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -namespace: sequence -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -namespace: sequence -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -namespace: sequence -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -namespace: sequence -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -namespace: sequence -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -namespace: sequence -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -namespace: sequence -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -namespace: sequence -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -namespace: sequence -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -namespace: sequence -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -namespace: sequence -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -namespace: sequence -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -namespace: sequence -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -namespace: sequence -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -namespace: sequence -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -namespace: sequence -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -namespace: sequence -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -namespace: sequence -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -namespace: sequence -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -namespace: sequence -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -namespace: sequence -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -namespace: sequence -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -namespace: sequence -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -namespace: sequence -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -namespace: sequence -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -namespace: sequence -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -namespace: sequence -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -namespace: sequence -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -namespace: sequence -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -namespace: sequence -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -namespace: sequence -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -namespace: sequence -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -namespace: sequence -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -namespace: sequence -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -namespace: sequence -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -namespace: sequence -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -namespace: sequence -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -namespace: sequence -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -namespace: sequence -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -namespace: sequence -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -namespace: sequence -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -namespace: sequence -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -namespace: sequence -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -namespace: sequence -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -namespace: sequence -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -namespace: sequence -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -namespace: sequence -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -namespace: sequence -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -namespace: sequence -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -namespace: sequence -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -namespace: sequence -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -namespace: sequence -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -namespace: sequence -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -namespace: sequence -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -namespace: sequence -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -namespace: sequence -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -namespace: sequence -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -namespace: sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -namespace: sequence -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -namespace: sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -namespace: sequence -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -namespace: sequence -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -namespace: sequence -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -namespace: sequence -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -namespace: sequence -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -namespace: sequence -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -namespace: sequence -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -namespace: sequence -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -namespace: sequence -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -namespace: sequence -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -namespace: sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -namespace: sequence -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -namespace: sequence -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -namespace: sequence -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -namespace: sequence -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -namespace: sequence -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -namespace: sequence -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -namespace: sequence -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -namespace: sequence -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -namespace: sequence -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -namespace: sequence -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -namespace: sequence -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000649 -name: stRNA -namespace: sequence -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -namespace: sequence -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -namespace: sequence -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -namespace: sequence -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -namespace: sequence -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0001411 ! biological_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -namespace: sequence -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -namespace: sequence -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -namespace: sequence -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -namespace: sequence -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -namespace: sequence -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -namespace: sequence -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -namespace: sequence -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -namespace: sequence -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -namespace: sequence -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -namespace: sequence -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -namespace: sequence -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -namespace: sequence -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -namespace: sequence -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -namespace: sequence -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -namespace: sequence -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -namespace: sequence -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -namespace: sequence -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -namespace: sequence -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -namespace: sequence -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -namespace: sequence -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -namespace: sequence -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -namespace: sequence -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -namespace: sequence -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -namespace: sequence -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -namespace: sequence -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -namespace: sequence -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000727 -name: CRM -namespace: sequence -def: "A regulatory region where transcription factor binding sites clustered to regulate various aspects of transcription activities. (CRMs can be located a few kb to hundred kb upstream of the basal promoter, in the coding sequence, within introns, or in the downstream 3'UTR sequences, as well as on different chromosome). A single gene can be regulated by multiple CRMs to give precise control of its spatial and temporal expression. CRMs function as nodes in large, intertwined regulatory network." [PMID:19660565, SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -synonym: "transcription factor module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -namespace: sequence -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -namespace: sequence -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -namespace: sequence -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -namespace: sequence -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -namespace: sequence -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -namespace: sequence -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -namespace: sequence -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -namespace: sequence -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -namespace: sequence -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -namespace: sequence -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -namespace: sequence -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -namespace: sequence -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -namespace: sequence -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -namespace: sequence -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -namespace: sequence -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -namespace: sequence -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -namespace: sequence -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -namespace: sequence -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -namespace: sequence -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -namespace: sequence -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -namespace: sequence -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates the transcription of a gene or genes." [PMID:9679020, SO:regcreative] -subset: SOFA -synonym: "transcription-control region" EXACT [] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -namespace: sequence -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -namespace: sequence -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -namespace: sequence -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -namespace: sequence -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -namespace: sequence -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -namespace: sequence -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -namespace: sequence -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -namespace: sequence -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -namespace: sequence -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -namespace: sequence -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -namespace: sequence -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -namespace: sequence -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -namespace: sequence -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -namespace: sequence -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001647 -name: kozak_sequence -namespace: sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -namespace: sequence -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -namespace: sequence -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001683 -name: sequence_motif -namespace: sequence -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -namespace: sequence -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001790 -name: paired_end_fragment -namespace: sequence -def: "An assembly region that has been sequenced from both ends resulting in a read_pair (mate_pair)." [SO:ke] -subset: SOFA -synonym: "paired end fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -created_by: kareneilbeck -creation_date: 2011-04-14T01:48:20Z - -[Term] -id: SO:0005836 -name: regulatory_region -namespace: sequence -def: "A region of sequence that is involved in the control of a biological process." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -namespace: sequence -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -namespace: sequence -def: "The cleaved_peptide_region is the region of a peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -namespace: sequence -def: "A sequence alteration where the length of the change in the variant is the same as that of the reference." [SO:ke] -subset: SOFA -xref: loinc:LA6690-7 "Substitution" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -namespace: sequence -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -namespace: sequence -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -namespace: sequence -def: "A continuous nucleotide sequence is inverted in the same position." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: DBVAR -subset: SOFA -synonym: "inversion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -xref: loinc:LA6689-9 "Inversion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -namespace: sequence -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -namespace: sequence -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -namespace: sequence -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence. X adjacent_to Y iff X and Y share a boundary but do not overlap." [PMID:20226267, SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -namespace: sequence -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -namespace: sequence -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: connects_on -name: connects_on -namespace: sequence -def: "X connects_on Y, Z, R iff whenever Z is on a R, X is adjacent to a Y and adjacent to a Z." [PMID:20226267] -comment: Example: A splice_junction connects_on exon, exon, mature_transcript. -created_by: kareneilbeck -creation_date: 2010-10-14T01:38:51Z - -[Typedef] -id: contained_by -name: contained_by -namespace: sequence -def: "X contained_by Y iff X starts after start of Y and X ends before end of Y." [PMID:20226267] -comment: The inverse is contains. Example: intein contained_by immature_peptide_region. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:26:16Z - -[Typedef] -id: contains -name: contains -namespace: sequence -def: "The inverse of contained_by." [PMID:20226267] -comment: Example: pre_miRNA contains miRNA_loop. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:32:15Z - -[Typedef] -id: derives_from -name: derives_from -namespace: sequence -subset: SOFA -is_transitive: true - -[Typedef] -id: disconnected_from -name: disconnected_from -namespace: sequence -def: "X is disconnected_from Y iff it is not the case that X overlaps Y." [PMID:20226267] -created_by: kareneilbeck -creation_date: 2010-10-14T01:42:10Z - -[Typedef] -id: edited_from -name: edited_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -namespace: sequence -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -namespace: sequence -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: finished_by -name: finished_by -namespace: sequence -def: "Xy is finished_by Y if Y part of X, and X and Y share a 3' boundary." [PMID:20226267] -comment: Example CDS finished_by stop_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:45:45Z - -[Typedef] -id: finishes -name: finishes -namespace: sequence -def: "X finishes Y if X is part_of Y and X and Y share a 3' or C terminal boundary." [PMID:20226267] -comment: Example: stop_codon finishes CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T02:17:53Z - -[Typedef] -id: gained -name: gained -namespace: sequence -def: "X gained Y if X is a variant_of X' and Y part of X but not X'." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may gain a stop codon not present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:51:10Z - -[Typedef] -id: genome_of -name: genome_of -namespace: sequence - -[Typedef] -id: guided_by -name: guided_by -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_integral_part -name: has_integral_part -namespace: sequence -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -is_a: has_part ! has_part -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin -namespace: sequence - -[Typedef] -id: has_part -name: has_part -namespace: sequence -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -namespace: sequence -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -namespace: sequence -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -is_a: part_of ! part_of -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: is_consecutive_sequence_of -name: is_consecutive_sequence_of -namespace: sequence -def: "R is_consecutive_sequence_of R iff every instance of R is equivalent to a collection of instances of U:u1, u2, un, such that no pair of ux uy is overlapping and for all ux, it is adjacent to ux-1 and ux+1, with the exception of the initial and terminal u1,and un (which may be identical)." [PMID:20226267] -comment: Example: region is consecutive_sequence of base. -created_by: kareneilbeck -creation_date: 2010-10-14T02:19:48Z - -[Typedef] -id: lost -name: lost -namespace: sequence -def: "X lost Y if X is a variant_of X' and Y part of X' but not X." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may have lost a stop codon present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:53:16Z - -[Typedef] -id: maximally_overlaps -name: maximally_overlaps -namespace: sequence -def: "A maximally_overlaps X iff all parts of A (including A itself) overlap both A and Y." [PMID:20226267] -comment: Example: non_coding_region_of_exon maximally_overlaps the intersections of exon and UTR. -created_by: kareneilbeck -creation_date: 2010-10-14T01:34:48Z - -[Typedef] -id: member_of -name: member_of -namespace: sequence -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -namespace: sequence -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: overlaps -name: overlaps -namespace: sequence -def: "X overlaps Y iff there exists some Z such that Z contained_by X and Z contained_by Y." [PMID:20226267] -comment: Example: coding_exon overlaps CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:33:15Z - -[Typedef] -id: paralogous_to -name: paralogous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: sequence -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -namespace: sequence -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of -namespace: sequence - -[Typedef] -id: processed_from -name: processed_from -namespace: sequence -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -namespace: sequence -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: sequence_of -name: sequence_of -namespace: sequence - -[Typedef] -id: similar_to -name: similar_to -namespace: sequence -subset: SOFA -is_symmetric: true - -[Typedef] -id: started_by -name: started_by -namespace: sequence -def: "X is strted_by Y if Y is part_of X and X and Y share a 5' boundary." [PMID:20226267] -comment: Example: CDS started_by start_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:43:55Z - -[Typedef] -id: starts -name: starts -namespace: sequence -def: "X starts Y if X is part of Y, and A and Y share a 5' or N-terminal boundary." [PMID:20226267] -comment: Example: start_codon starts CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:47:53Z - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -namespace: sequence -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -namespace: sequence -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -namespace: sequence -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -namespace: sequence -def: "X is translation of Y if Y is translated by ribosome to create X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -namespace: sequence -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOFA/sofa_2_5_3.obo b/annotation/NBIS/Ontology/SOFA/sofa_2_5_3.obo deleted file mode 100644 index 5cf5c6525..000000000 --- a/annotation/NBIS/Ontology/SOFA/sofa_2_5_3.obo +++ /dev/null @@ -1,3057 +0,0 @@ -format-version: 1.2 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: DBVAR "database of genomic structural variation" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: dbvar "DBVAR" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -synonymtypedef: VAR "variant annotation term" -ontology: so-xp/subsets/SOFA -default-namespace: sequence - -[Term] -id: SO:0000000 -name: Sequence_Ontology -namespace: sequence -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -namespace: sequence -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000699 ! junction - -[Term] -id: SO:0000004 -name: interior_coding_exon -namespace: sequence -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -namespace: sequence -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -namespace: sequence -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -namespace: sequence -def: "One of a pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "mate pair" EXACT [] -synonym: "read-pair" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000149 ! contig -relationship: part_of SO:0001790 ! paired_end_fragment - -[Term] -id: SO:0000013 -name: scRNA -namespace: sequence -def: "A small non coding RNA sequence, present in the cytoplasm." [SO:ke] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000038 -name: match_set -namespace: sequence -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -namespace: sequence -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000050 -name: gene_part -namespace: sequence -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000057 -name: operator -namespace: sequence -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000059 -name: nuclease_binding_site -namespace: sequence -def: "A binding site that, of a nucleotide molecule, that interacts selectively and non-covalently with polypeptide residues of a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0000101 -name: transposable_element -namespace: sequence -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -namespace: sequence -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -namespace: sequence -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -namespace: sequence -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000109 -name: sequence_variant_obs -namespace: sequence -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -namespace: sequence -def: "Any extent of continuous biological sequence." [LAMHDI:mb, SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] - -[Term] -id: SO:0000112 -name: primer -namespace: sequence -def: "An oligo to which new deoxyribonucleotides can be added by DNA polymerase." [SO:ke] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -namespace: sequence -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_cytosine -namespace: sequence -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -synonym: "methylated_C" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -namespace: sequence -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000139 -name: ribosome_entry_site -namespace: sequence -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -namespace: sequence -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -namespace: sequence -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000143 -name: assembly_component -namespace: sequence -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000147 -name: exon -namespace: sequence -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -namespace: sequence -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -namespace: sequence -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -namespace: sequence -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -namespace: sequence -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000159 -name: deletion -namespace: sequence -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000161 -name: methylated_adenine -namespace: sequence -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000162 -name: splice_site -namespace: sequence -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -namespace: sequence -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000167 -name: promoter -namespace: sequence -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000177 -name: cross_genome_match -namespace: sequence -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -namespace: sequence -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -namespace: sequence -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -namespace: sequence -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000183 -name: non_transcribed_region -namespace: sequence -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000185 -name: primary_transcript -namespace: sequence -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000187 -name: repeat_family -namespace: sequence -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -namespace: sequence -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000193 -name: RFLP_fragment -namespace: sequence -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000195 -name: coding_exon -namespace: sequence -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -namespace: sequence -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -namespace: sequence -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -namespace: sequence -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -namespace: sequence -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -namespace: sequence -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -namespace: sequence -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -namespace: sequence -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -namespace: sequence -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -namespace: sequence -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -namespace: sequence -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -namespace: sequence -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000239 -name: flanking_region -namespace: sequence -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000252 -name: rRNA -namespace: sequence -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -namespace: sequence -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -namespace: sequence -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -namespace: sequence -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -namespace: sequence -alt_id: SO:0000649 -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene (or the product of other non coding RNA genes. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (usually via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:11081512, PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -synonym: "small temporal RNA" EXACT [] -synonym: "stRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: part_of SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000289 -name: microsatellite -namespace: sequence -def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000294 -name: inverted_repeat -namespace: sequence -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000296 -name: origin_of_replication -namespace: sequence -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000303 -name: clip -namespace: sequence -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000305 -name: modified_DNA_base -namespace: sequence -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -namespace: sequence -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -namespace: sequence -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000314 -name: direct_repeat -namespace: sequence -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -namespace: sequence -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -synonym: "transcription_start_site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -namespace: sequence -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -synonym: "coding_sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000318 -name: start_codon -namespace: sequence -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -namespace: sequence -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000324 -name: tag -namespace: sequence -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -namespace: sequence -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -namespace: sequence -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000330 -name: conserved_region -namespace: sequence -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -namespace: sequence -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -namespace: sequence -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -namespace: sequence -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -namespace: sequence -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000336 -name: pseudogene -namespace: sequence -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -namespace: sequence -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000340 -name: chromosome -namespace: sequence -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -namespace: sequence -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000343 -name: match -namespace: sequence -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -namespace: sequence -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -namespace: sequence -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000347 -name: nucleotide_match -namespace: sequence -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000349 -name: protein_match -namespace: sequence -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000353 -name: sequence_assembly -namespace: sequence -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000360 -name: codon -namespace: sequence -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000366 -name: insertion_site -namespace: sequence -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -namespace: sequence -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -namespace: sequence -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000372 -name: enzymatic_RNA -namespace: sequence -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000374 -name: ribozyme -namespace: sequence -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5_8S -namespace: sequence -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -namespace: sequence -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -namespace: sequence -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -namespace: sequence -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -namespace: sequence -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -namespace: sequence -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -namespace: sequence -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -namespace: sequence -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -namespace: sequence -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -namespace: sequence -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -namespace: sequence -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -namespace: sequence -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -namespace: sequence -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -namespace: sequence -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000403 -name: U14_snoRNA -namespace: sequence -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0000404 -name: vault_RNA -namespace: sequence -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -namespace: sequence -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000407 -name: rRNA_18S -namespace: sequence -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000409 -name: binding_site -namespace: sequence -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -namespace: sequence -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000412 -name: restriction_fragment -namespace: sequence -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -namespace: sequence -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000418 -name: signal_peptide -namespace: sequence -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -namespace: sequence -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000436 -name: ARS -namespace: sequence -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000441 -name: ss_oligo -namespace: sequence -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -namespace: sequence -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000454 -name: rasiRNA -namespace: sequence -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000462 -name: pseudogenic_region -namespace: sequence -def: "A non-functional descendant of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000464 -name: decayed_exon -namespace: sequence -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000468 -name: golden_path_fragment -namespace: sequence -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000472 -name: tiling_path -namespace: sequence -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000474 -name: tiling_path_fragment -namespace: sequence -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000483 -name: nc_primary_transcript -namespace: sequence -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000195 ! coding_exon - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -namespace: sequence -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000499 -name: virtual_sequence -namespace: sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000502 -name: transcribed_region -namespace: sequence -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -namespace: sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000553 -name: polyA_site -namespace: sequence -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000577 -name: centromere -namespace: sequence -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000581 -name: cap -namespace: sequence -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000587 -name: group_I_intron -namespace: sequence -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -namespace: sequence -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000590 -name: SRP_RNA -namespace: sequence -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -namespace: sequence -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000602 -name: guide_RNA -namespace: sequence -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -namespace: sequence -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000605 -name: intergenic_region -namespace: sequence -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000610 -name: polyA_sequence -namespace: sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -namespace: sequence -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -namespace: sequence -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000616 -name: transcription_end_site -namespace: sequence -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000624 -name: telomere -namespace: sequence -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -namespace: sequence -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000627 -name: insulator -namespace: sequence -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -namespace: sequence -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000643 -name: minisatellite -namespace: sequence -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -namespace: sequence -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -namespace: sequence -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -namespace: sequence -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -namespace: sequence -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -namespace: sequence -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -namespace: sequence -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000655 -name: ncRNA -namespace: sequence -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000657 -name: repeat_region -namespace: sequence -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0001411 ! biological_region - -[Term] -id: SO:0000658 -name: dispersed_repeat -namespace: sequence -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000662 -name: spliceosomal_intron -namespace: sequence -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000667 -name: insertion -namespace: sequence -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -namespace: sequence -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000673 -name: transcript -namespace: sequence -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -namespace: sequence -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000687 -name: deletion_junction -namespace: sequence -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -namespace: sequence -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -namespace: sequence -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000694 -name: SNP -namespace: sequence -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -namespace: sequence -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -namespace: sequence -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000699 -name: junction -namespace: sequence -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -namespace: sequence -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -namespace: sequence -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -namespace: sequence -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -namespace: sequence -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -namespace: sequence -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -namespace: sequence -def: "Two or more adjacent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -namespace: sequence -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000714 -name: nucleotide_motif -namespace: sequence -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -namespace: sequence -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000717 -name: reading_frame -namespace: sequence -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000719 -name: ultracontig -namespace: sequence -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000724 -name: oriT -namespace: sequence -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -namespace: sequence -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the EMBL, DDBJ, GenBank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000727 -name: CRM -namespace: sequence -def: "A regulatory region where transcription factor binding sites clustered to regulate various aspects of transcription activities. (CRMs can be located a few kb to hundred kb upstream of the basal promoter, in the coding sequence, within introns, or in the downstream 3'UTR sequences, as well as on different chromosome). A single gene can be regulated by multiple CRMs to give precise control of its spatial and temporal expression. CRMs function as nodes in large, intertwined regulatory network." [PMID:19660565, SO:SG] -comment: Requested by Stephen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -synonym: "transcription factor module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000730 -name: gap -namespace: sequence -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -namespace: sequence -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -namespace: sequence -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -namespace: sequence -def: "A non functional descendant of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -namespace: sequence -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000830 -name: chromosome_part -namespace: sequence -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -namespace: sequence -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000833 -name: transcript_region -namespace: sequence -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -namespace: sequence -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -namespace: sequence -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -namespace: sequence -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -namespace: sequence -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000839 -name: polypeptide_region -namespace: sequence -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -namespace: sequence -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -namespace: sequence -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000851 -name: CDS_region -namespace: sequence -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -namespace: sequence -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0001000 -name: rRNA_16S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -namespace: sequence -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -namespace: sequence -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001019 -name: copy_number_variation -namespace: sequence -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001037 -name: mobile_genetic_element -namespace: sequence -def: "A nucleotide region with either intra-genome or intracellular mobility, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -namespace: sequence -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates the transcription of a gene or genes." [PMID:9679020, SO:regcreative] -subset: SOFA -synonym: "transcription-control region" EXACT [] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -namespace: sequence -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -namespace: sequence -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001063 -name: immature_peptide_region -namespace: sequence -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -namespace: sequence -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -namespace: sequence -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001235 -name: replicon -namespace: sequence -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -namespace: sequence -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001248 -name: assembly -namespace: sequence -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001409 -name: biomaterial_region -namespace: sequence -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -namespace: sequence -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -namespace: sequence -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -namespace: sequence -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001419 -name: cis_splice_site -namespace: sequence -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -namespace: sequence -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001483 -name: SNV -namespace: sequence -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -namespace: sequence -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001647 -name: kozak_sequence -namespace: sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -namespace: sequence -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -namespace: sequence -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001683 -name: sequence_motif -namespace: sequence -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -namespace: sequence -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001790 -name: paired_end_fragment -namespace: sequence -def: "An assembly region that has been sequenced from both ends resulting in a read_pair (mate_pair)." [SO:ke] -subset: SOFA -synonym: "paired end fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -created_by: kareneilbeck -creation_date: 2011-04-14T01:48:20Z - -[Term] -id: SO:0005836 -name: regulatory_region -namespace: sequence -def: "A region of sequence that is involved in the control of a biological process." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005855 -name: gene_group -namespace: sequence -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -namespace: sequence -def: "The cleaved_peptide_region is the region of a peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:1000002 -name: substitution -namespace: sequence -def: "A sequence alteration where the length of the change in the variant is the same as that of the reference." [SO:ke] -subset: SOFA -xref: loinc:LA6690-7 "Substitution" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -namespace: sequence -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -namespace: sequence -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000036 -name: inversion -namespace: sequence -def: "A continuous nucleotide sequence is inverted in the same position." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: DBVAR -subset: SOFA -synonym: "inversion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -xref: loinc:LA6689-9 "Inversion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001284 -name: regulon -namespace: sequence -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:2000061 -name: databank_entry -namespace: sequence -def: "The sequence referred to by an entry in a databank such as GenBank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -namespace: sequence -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence. X adjacent_to Y iff X and Y share a boundary but do not overlap." [PMID:20226267, SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -namespace: sequence -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -namespace: sequence -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: connects_on -name: connects_on -namespace: sequence -def: "X connects_on Y, Z, R iff whenever Z is on a R, X is adjacent to a Y and adjacent to a Z." [PMID:20226267] -comment: Example: A splice_junction connects_on exon, exon, mature_transcript. -created_by: kareneilbeck -creation_date: 2010-10-14T01:38:51Z - -[Typedef] -id: contained_by -name: contained_by -namespace: sequence -def: "X contained_by Y iff X starts after start of Y and X ends before end of Y." [PMID:20226267] -comment: The inverse is contains. Example: intein contained_by immature_peptide_region. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:26:16Z - -[Typedef] -id: contains -name: contains -namespace: sequence -def: "The inverse of contained_by." [PMID:20226267] -comment: Example: pre_miRNA contains miRNA_loop. -is_transitive: true -created_by: kareneilbeck -creation_date: 2010-10-14T01:32:15Z - -[Typedef] -id: derives_from -name: derives_from -namespace: sequence -subset: SOFA -is_transitive: true - -[Typedef] -id: disconnected_from -name: disconnected_from -namespace: sequence -def: "X is disconnected_from Y iff it is not the case that X overlaps Y." [PMID:20226267] -created_by: kareneilbeck -creation_date: 2010-10-14T01:42:10Z - -[Typedef] -id: edited_from -name: edited_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -namespace: sequence -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -namespace: sequence -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: finished_by -name: finished_by -namespace: sequence -def: "Xy is finished_by Y if Y part of X, and X and Y share a 3' boundary." [PMID:20226267] -comment: Example CDS finished_by stop_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:45:45Z - -[Typedef] -id: finishes -name: finishes -namespace: sequence -def: "X finishes Y if X is part_of Y and X and Y share a 3' or C terminal boundary." [PMID:20226267] -comment: Example: stop_codon finishes CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T02:17:53Z - -[Typedef] -id: gained -name: gained -namespace: sequence -def: "X gained Y if X is a variant_of X' and Y part of X but not X'." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may gain a stop codon not present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:51:10Z - -[Typedef] -id: genome_of -name: genome_of -namespace: sequence - -[Typedef] -id: guided_by -name: guided_by -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_integral_part -name: has_integral_part -namespace: sequence -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -is_a: has_part ! has_part -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin -namespace: sequence - -[Typedef] -id: has_part -name: has_part -namespace: sequence -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -namespace: sequence -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -namespace: sequence -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -is_a: part_of ! part_of -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: is_consecutive_sequence_of -name: is_consecutive_sequence_of -namespace: sequence -def: "R is_consecutive_sequence_of R iff every instance of R is equivalent to a collection of instances of U:u1, u2, un, such that no pair of ux uy is overlapping and for all ux, it is adjacent to ux-1 and ux+1, with the exception of the initial and terminal u1,and un (which may be identical)." [PMID:20226267] -comment: Example: region is consecutive_sequence of base. -created_by: kareneilbeck -creation_date: 2010-10-14T02:19:48Z - -[Typedef] -id: lost -name: lost -namespace: sequence -def: "X lost Y if X is a variant_of X' and Y part of X' but not X." [SO:ke] -comment: A relation with which to annotate the changes in a variant sequence with respect to a reference.\nFor example a variant transcript may have lost a stop codon present in the reference sequence. -created_by: kareneilbeck -creation_date: 2011-06-28T12:53:16Z - -[Typedef] -id: maximally_overlaps -name: maximally_overlaps -namespace: sequence -def: "A maximally_overlaps X iff all parts of A (including A itself) overlap both A and Y." [PMID:20226267] -comment: Example: non_coding_region_of_exon maximally_overlaps the intersections of exon and UTR. -created_by: kareneilbeck -creation_date: 2010-10-14T01:34:48Z - -[Typedef] -id: member_of -name: member_of -namespace: sequence -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -namespace: sequence -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: overlaps -name: overlaps -namespace: sequence -def: "X overlaps Y iff there exists some Z such that Z contained_by X and Z contained_by Y." [PMID:20226267] -comment: Example: coding_exon overlaps CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:33:15Z - -[Typedef] -id: paralogous_to -name: paralogous_to -namespace: sequence -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: sequence -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -namespace: sequence -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of -namespace: sequence - -[Typedef] -id: processed_from -name: processed_from -namespace: sequence -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -namespace: sequence -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: sequence_of -name: sequence_of -namespace: sequence - -[Typedef] -id: similar_to -name: similar_to -namespace: sequence -subset: SOFA -is_symmetric: true - -[Typedef] -id: started_by -name: started_by -namespace: sequence -def: "X is strted_by Y if Y is part_of X and X and Y share a 5' boundary." [PMID:20226267] -comment: Example: CDS started_by start_codon. -created_by: kareneilbeck -creation_date: 2010-10-14T01:43:55Z - -[Typedef] -id: starts -name: starts -namespace: sequence -def: "X starts Y if X is part of Y, and A and Y share a 5' or N-terminal boundary." [PMID:20226267] -comment: Example: start_codon starts CDS. -created_by: kareneilbeck -creation_date: 2010-10-14T01:47:53Z - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -namespace: sequence -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -namespace: sequence -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -namespace: sequence -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -namespace: sequence -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -namespace: sequence -def: "X is translation of Y if Y is translated by ribosome to create X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -namespace: sequence -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOXP/so-xp_2.5.obo b/annotation/NBIS/Ontology/SOXP/so-xp_2.5.obo deleted file mode 100644 index 90457bd83..000000000 --- a/annotation/NBIS/Ontology/SOXP/so-xp_2.5.obo +++ /dev/null @@ -1,19457 +0,0 @@ -format-version: 1.2 -date: 13:04:2012 11:41 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1.1-beta7 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: DBVAR "database of genomic structural variation" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: dbvar "DBVAR" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by: DAG-Edit version 1.417\nsaved-by: eilbeck\ndate: Tue May 11 15:18:44 PDT 2004\nversion: $Revision: 1.295 $ -ontology: so-xp.obo - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000699 ! junction - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "One of a pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "mate pair" EXACT [] -synonym: "read-pair" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000149 ! contig -relationship: part_of SO:0001790 ! paired_end_fragment - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "A small non coding RNA sequence, present in the cytoplasm." [SO:ke] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739, PMID:16858867] -comment: Binds TAF1, TAF2. -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739:12537576, PMID:16858867] -comment: Binds TAF6, TAF9. -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0000016 -name: BREu_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739, PMID:16858867] -comment: Binds TFIIB. -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "BREu" RELATED [] -synonym: "BREu motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://www.ncbi.nlm.nih.gov/pubmed/7897662] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morphant" BROAD [] -synonym: "morpholino" EXACT [] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino_backbone -relationship: has_quality SO:0001183 ! morpholino_backbone - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -is_a: SO:0000151 ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promoters, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_obsolete: true -replaced_by: SO:0001556 - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A binding site that, of a nucleotide molecule, that interacts selectively and non-covalently with polypeptide residues of a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -comment: FLAG - this term is should probably be a part of rather than an is_a. -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a restriction enzyme." [SO:cb] -comment: A region of a molecule that binds to a restriction enzyme. -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction enzyme binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:0000159 ! deletion -relationship: has_part SO:0000159 ! deletion - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -def: "An interchromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [SO:ke] -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -def: "A chromosome structure variation whereby an arm exists as an individual chromosome element." [SO:ke] -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -is_a: SO:0000088 ! mt_gene -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast -relationship: has_origin SO:0000741 ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence -relationship: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -is_a: SO:0000090 ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -is_a: SO:0000090 ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -is_a: SO:0000090 ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -is_a: SO:0000090 ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -is_a: SO:0000090 ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -is_a: SO:0000090 ! plastid_gene -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence -relationship: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location -relationship: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location -relationship: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -is_a: SO:0000099 ! proviral_gene -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence -relationship: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "An oligo to which new deoxyribonucleotides can be added by DNA polymerase." [SO:ke] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward -relationship: has_quality SO:0001030 ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000116 ! edited -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding -relationship: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001907 ! feature_elongation -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, where the change may be longer than 3 bases, and at least one base of a codon is changed resulting in a codon that encodes for a different amino acid." [EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -synonym: "missense_variant" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non synonymous variant" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001906 ! feature_truncation -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mis-matches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36 -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A silenced gene by DNA modification." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A silenced gene silenced DNA methylation." [SO:xp] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post-translationally_regulated" RELATED [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000133 -name: epigenetically_modified -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "An attribute to describe genes that are regulated by maternal imprinting." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: gene: -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located between the promoter and a structural gene that causes partial termination of transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the genome that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000154 -name: PAC -def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm] -synonym: "P1" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000155 -name: plasmid -def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as aplasmids or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996; -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000159 -name: deletion -def: "The sequence that is deleted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000045 ! delete - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -is_a: SO:0000250 ! modified_RNA_base_feature -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "donor" RELATED [] -synonym: "donor_splice_site" RELATED [] -synonym: "splice_donor_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "acceptor" RELATED [] -synonym: "acceptor_splice_site" RELATED [] -synonym: "splice_acceptor_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "The region on a DNA molecule involved in RNA polymerase binding to initiate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_A_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_B_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "RNA_polymerase_C_promoter" RELATED [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000173 -name: GC_rich_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "GC-rich_region" RELATED [] -is_a: SO:0000844 ! RNA_II_promotor_region - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000170 ! RNApol_II_promoter -relationship: part_of SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "-10_signal" RELATED [] -synonym: "Pribnow_box" RELATED [] -is_a: SO:0000843 ! bacterial_RNApol_promotor_region - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "-35_signal" RELATED [] -is_a: SO:0000843 ! bacterial_RNApol_promotor_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "The primary (initial, unprocessed) transcript; includes five_prime_clip (SO:0000555), five_prime_untranslated_region (SO:0000204), open reading frames (SO:0000236), introns (SO:0000188) and three_prime_ untranslated_region (three_prime_UTR), and three_prime_clip (SO:0000557)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "precursor_RNA" RELATED [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction_fragment_length_polymorphism" RELATED [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http:www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "Long interspersed element" RELATED [] -synonym: "Long interspersed nuclear element" RELATED [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -synonym: "noncoding_exon" RELATED [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000049 ! translocate - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000201 -name: interior_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The exon that is most 3-prime on a given transcript." [SO:ma] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated_region" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "five_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "three_prime_untranslated_region" RELATED [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" RELATED [] -synonym: "Short interspersed nuclear element" RELATED [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_polymorphism -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http:www.genetics.org/cgi/reprint/156/4/1983.pdf] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253.)" [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear mRNA (SO:0000274)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone processing to remove parts such as introns and transcribed_spacer_regions." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: mRNA does not contain introns as it is a processd_transcript.nThe equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. -subset: SOFA -synonym: "messenger_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds to a transcription factor." [SO:ke] -subset: SOFA -synonym: "transcription_factor_binding_site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER" [SO:ma, SO:rb] -comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised. -subset: SOFA -synonym: "open_reading_frame" RELATED [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats" [http:www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000240 -name: chromosome_variation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000241 -name: internal_UTR -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polyicistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "IRES" RELATED [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the additon of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, ISBN:0198506732] -subset: SOFA -synonym: "ribsomal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. tRNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). tRNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -subset: SOFA -synonym: "transfer_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000254 -name: alanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000261 -name: glycyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000262 -name: histidyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000264 -name: leucyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000265 -name: lysyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000266 -name: methionyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000268 -name: prolyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000269 -name: seryl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000270 -name: threonyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000273 -name: valyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing" [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types, PMID:11733745] -subset: SOFA -synonym: "small_nuclear_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -subset: SOFA -synonym: "small_nucleolar_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. miRNAs are produced from precursor molecules (SO:0000647) that can form local hairpin strcutures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. miRNAs may trigger the cleavage of their target molecules oract as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: formally called transcript_by_bound_factor -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by anucleic acid." [SO:xp] -comment: formally called transcript_by_bound_nucleic_acid -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: formally called transcript_by_bound_protein -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift" [SO:xp] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartate and interupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "direct_terminal _repeat" RELATED [] -synonym: "LTR" RELATED [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion. " [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. Example: GCTGA-----TCAGC." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -is_a: SO:0000001 ! region - -[Term] -id: SO:0000299 -name: specific_recombination_site -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-RS] -synonym: "J-RS" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interupted palidrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: modified base: -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG_island" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The site where transcription begins." [SO:ke] -subset: SOFA -synonym: "TSS" RELATED [] -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding_sequence" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -synonym: "initiation codon" RELATED [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift" [SO:ke] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "translation_start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "translation_end" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray_oligonucleotide" RELATED [] -is_a: SO:0000051 ! probe -is_a: SO:0000324 ! tag -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "AAn mRNA with a plus 2 frameshift." [SO:xp] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence_tag_site" RELATED [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding_conserved_region" RELATED [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/ ~ ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITES do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome whioch promotes recombination." [SO:rd] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of long DNA molecule." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -synonym: "cytological_band" RELATED [] -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -subset: SOFA -synonym: "expressed_sequence_tag" RELATED [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: Cre_recombination_target_region -synonym: "lox_site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FLP_recombination_target_region -synonym: "FRT_site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "A sequence of nucleotides or amino acids that has been designed by an experimentor and which may, or may not, correspond with any natural sequence." [SO:ma] -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000352 -name: DNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which in which markers are co-inherited as the result of the lack of historic recombination between them due to their close proximity." [SO:ma] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000358 -name: protein -comment: This definition no longer matches the meaning of the concept -is_a: SO:0000443 ! polymer_type - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "DNA elements capable of mobilizing individual gene cassettes into bacterial chromosomes by site- specific recombination." [http://www.genomicglossaries.com/content/DNA.asp] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction -relationship: position_of SO:0000046 ! insert - -[Term] -id: SO:0000367 -name: attI_site -is_a: SO:0000669 ! sequence_rearrangement_feature -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_a: SO:0000669 ! sequence_rearrangement_feature -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/C.html] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000047 ! invert - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB-RsmB_RNA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http:rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000381 -name: group_IIA_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http:http\://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=1 2409455&dopt=Abstract] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000401 -name: gene_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snRNA -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron." [PMID:1899376] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "18S_rRNA -A large polynucleotide which functions as a part of the small subunit of the ribosome" [SO:ke] -subset: SOFA -synonym: "16S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -def: "A region on the surface of a molecule that may interact with another molecule." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" RELATED [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequences differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribue to describe a feature that is invalidated due to genomic contamination. " [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribue to describe a feature that is invalidated due to polyA priming. " [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribue to describe a feature that is invalidated due to partial processing. " [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -def: "A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains." [http:www.molbiol.bbsrc.ac.uk/new_protein/domains.html] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000418 -name: signal_peptide -def: "The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "signal peptide coding sequence" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000419 -name: mature_peptide -def: "The coding sequence for the mature or final peptide or protein product following post-translational modification." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: transposable_element_target_site_duplication -def: "A sequence of DNA that is duplicated when a transposable element inserts; usually found at each end the insertion." [http:www.koko.gov.my/CocoaBioTech/Glossaryt.html] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR_retrotransposon_poly_purine_tract" RELATED [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously_replicating_sequence" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector -def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -synonym: "single stranded oligonucleotide.new synonym" RELATED [] -synonym: "ss_oligonucleotide" RELATED [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -synonym: "double stranded oligonucleotide" RELATED [] -synonym: "ds_oligonucleotide" RELATED [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_type -def: "An attribute to describe the kind of biological sequence." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "five_prime_noncoding_exon" RELATED [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequencyof these components." [SO:ma] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -is_a: SO:0000341 ! chromosome_band - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts ofrepetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-GENE] -synonym: "D-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000559 ! D_cluster -relationship: part_of SO:0000560 ! D_J_cluster - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene her to avoid confusion with the region 'gene'. -synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene" RELATED [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entitity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendent of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-GENE] -synonym: "V_GENE" RELATED [] -synonym: "variable_gene" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000526 ! V_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -synonym: "post-translationally_regulated_by_protein_stability" RELATED [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post-translationally_regulated_by_protein_modification" RELATED [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-GENE] -synonym: "J-GENE" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000513 ! J_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000560 ! D_J_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000471 -name: autoregulated -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path.SO:0000472." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic" [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-GENE] -synonym: "C_GENE" RELATED [] -synonym: "constant_gene" RELATED [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000558 ! C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced. " [SO:xp] -synonym: "trans-spliced_transcript" RELATED [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "TIR" RELATED [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate_immunoglobulin/T-cell_receptor_gene-cluster" RELATED [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding_primary_transcript" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -synonym: "three_prime_exon_noncoding_region" RELATED [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-CLUSTER] -synonym: "(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000486 -name: five_prime_exon_noncoding_region -def: "The sequence of the 5' exon preceeding the start codon." [SO:ke] -synonym: "five_prime_exon_noncoding_region" RELATED [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-C-CLUSTER] -synonym: "(VDJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-J-CLUSTER] -synonym: "(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-C-CLUSTER] -synonym: "(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-C-CLUSTER] -synonym: "(VJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VJ)-J-CLUSTER] -synonym: "(VJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-HEPTAMER] -synonym: "3'D-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-NONAMER] -synonym: "3'D-NOMAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-SPACER] -synonym: "3'D-SPACER" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-HEPTAMER] -synonym: "5'D-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-NONAMER] -synonym: "5'D-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-SPACER] -synonym: "5'-SPACER" RELATED [] -synonym: "five_prime_D-spacer" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continous piece of sequence similar to the 'virtual contig' concept of ensembl." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-C-CLUSTER] -synonym: "D-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-CLUSTER] -synonym: "D-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-C-CLUSTER] -synonym: "D-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000507 -name: pseudogenic_exon -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-(DJ)-J-CLUSTER] -synonym: "D-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-C-CLUSTER] -synonym: "D-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-GENE] -synonym: "V_D_GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-C-CLUSTER] -synonym: "J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-CLUSTER] -synonym: "J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-NONAMER] -synonym: "J-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-HEPTAMER] -synonym: "J-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#J-SPACER] -synonym: "J-SPACER" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-CLUSTER] -synonym: "V-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-CLUSTER] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-C-CLUSTER] -synonym: "V-(VDJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-CLUSTER] -synonym: "V-(VDJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-CLUSTER] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-C-CLUSTER] -synonym: "V-(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-CLUSTER] -synonym: "V-(VJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-CLUSTER] -synonym: "V-(VJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-CLUSTER] -synonym: "V-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-C-CLUSTER] -synonym: "V-D-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-CLUSTER] -synonym: "V-D-(DJ)-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-C-CLUSTER] -synonym: "V-D-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-(DJ)-J-CLUSTER] -synonym: "V-D-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-C-CLUSTER] -synonym: "V-D-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-CLUSTER] -synonym: "V-D-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-HEPTAMER] -synonym: "V-HEPTAMER" RELATED [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-CLUSTER] -synonym: "V-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-C-CLUSTER] -synonym: "V-J-C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-NONAMER] -synonym: "V-NONAMER" RELATED [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-SPACER] -synonym: "V-SPACER" RELATED [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-RS] -synonym: "V-RS" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-C-CLUSTER] -synonym: "(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(DJ)-J-C-CLUSTER] -synonym: "(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#(VDJ)-C-CLUSTER] -synonym: "(VDJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-C-CLUSTER] -synonym: "V-(DJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous Helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five_prime_ribosome_binding_site" RELATED [] -synonym: "RBS" RELATED [] -synonym: "Shine-Dalgarno_sequence" RELATED [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "five_prime_-clip" RELATED [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#5'D-RS] -synonym: "5'RS" RELATED [] -synonym: "five_prime_D-recombination_signal_sequence" RELATED [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -synonym: "3'-clip" RELATED [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#C-CLUSTER] -synonym: "C-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-CLUSTER] -synonym: "D-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-CLUSTER] -synonym: "D-J-CLUSTER" RELATED [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: " 7 nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin/T-cell receptor gene" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#HEPTAMER] -synonym: "HEPTAMER" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(DJ)-J-C-CLUSTER] -synonym: "V-(DJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VDJ)-J-C-CLUSTER] -synonym: "V-(VDJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-(VJ)-J-C-CLUSTER] -synonym: "V-(VJ)-J-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promotor -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -synonym: "captured_pseudogene" RELATED [] -is_a: SO:0000010 ! protein_coding -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#3'D-RS] -synonym: "3'D-RS" RELATED [] -synonym: "three_prime_D-recombination_signal_sequence" RELATED [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#D-J-GENE] -synonym: "D_J_GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster - -[Term] -id: SO:0000573 -name: rRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-D-J-GENE] -synonym: "V-D-J-GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster - -[Term] -id: SO:0000575 -name: scRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: " Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)" [http://imgt.cines.fr/ligmdb/LIGMlect?query=7#V-J-GENE] -synonym: "V-J-GENE" RELATED [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu] -synonym: "pre-edited_region" RELATED [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. tmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa_RNA" RELATED [] -synonym: "ssrA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa_RNA_primary_transcript" RELATED [] -synonym: "ssrA_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal_recognition_particle_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A stem-loop RNA structure where nucleotides in the loop participate in complementary interactions with a region of RNA downstream of the stem-loop." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "H-pseudoknot" RELATED [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "C/D_box_snoRNA" RELATED [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "H/ACA_box_snoRNA" RELATED [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm] -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a perfect duplex (except for the oligoU tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -is_a: SO:0000001 ! region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch_point" RELATED [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -is_a: SO:0000167 ! promoter -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The site where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "tRNA_promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -is_a: SO:0000846 ! RNApol_III_promotor_type_2_region - -[Term] -id: SO:0000620 -name: B_box -is_a: SO:0000846 ! RNApol_III_promotor_type_2_region - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -is_a: SO:0000845 ! RNApol_III_promotor_type_1_region - -[Term] -id: SO:0000623 -name: snRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenence of the end," [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm#s] -subset: SOFA -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000627 -name: insulator -subset: SOFA -synonym: "insulator_element" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic_processed_transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic_processed_transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA" [SO:ke] -synonym: "mini-exon_donor_RNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered_plasmid_gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "Small RNA molecule that is the product of a longerexogenous or endogenous dsRNA, which is either a bimolecular duplexe or very longhairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulatefrom both strands of the dsRNA. sRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small_interfering_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro_RNA_primary_transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small_temporal_RNA_primary_transcript" RELATED [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small_temporal_RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "23S_rRNA" RELATED [] -synonym: "28S_rRNA" RELATED [] -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000654 -name: maxi_circle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle" RELATED [] -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000742 ! maxicircle_sequence - -[Term] -id: SO:0000655 -name: ncRNA -def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: ncRNA is a processed_transcript so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding_RNA" RELATED [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed_repeat" RELATED [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000661 -name: intron_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -is_a: SO:0000830 ! chromosome_region - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic" [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence identified as having been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000046 ! insert - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non-canonical_splice_site" RELATED [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non-canonical_three_prime_splice_site" RELATED [] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non-canonical-five_prime_splice_site" RELATED [] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non-canonical_start_codon" RELATED [] -synonym: "non_ATG_start_codon" RELATED [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000685 -name: DNAaseI_hypersensitive_site -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke] -subset: SOFA -is_a: SO:0000109 ! sequence_variant -is_a: SO:0000699 ! junction -relationship: position_of SO:0000045 ! delete - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: translocation_site -def: "The space between two bases in a sequence which marks the position where a translocation has occurred." [SO:ke] -is_a: SO:0000109 ! sequence_variant -relationship: position_of SO:0000049 ! translocate - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: associated_with SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded. " [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:1001261 ! recoded_mRNA - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single_nucleotide_polymorphism" RELATED [] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006 -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" RELATED [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough. " [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A junction refers to an interbase location of zero in a sequence." [SO:ke] -subset: SOFA -synonym: "boundary" RELATED [] -is_a: SO:0000110 ! located_sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A locatable region of genomic sequence, corresponding to a unit of inheritance, which is associated with regulatory regions, transcribed regions and/or other functional sequence regions" [SO:rd] -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/ ~ smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The site at which trans-splicing occurs." [SO:ke] -synonym: "trans-splice_donor_site" RELATED [] -is_a: SO:0000163 ! five_prime_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000887 ! translational_frameshift - -[Term] -id: SO:0000713 -name: DNA_motif -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000715 -name: RNA_motif -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic_processed_transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It does not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD.nAgust 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb] -comment: Term requested by Rama from SGD -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004 -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004 -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycisronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004 -synonym: "gene_with_dicistronic_processed_transcript" RELATED [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" RELATED [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: origin_of_transfer -def: "A region of a DNA molecule whre transfer is initiated during the process of conjugation or mobilization." [http:http\://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -subset: SOFA -synonym: "oriT" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000725 -name: transit_peptide -def: "The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein: this domain is involved in post translational import of the protein into the organelle." [http:http\://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: Added to bring SO inline with the embl ddbj genbank feature table. -subset: SOFA -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html#line_types] -comment: Added to comply with the feature table. -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000727 -name: TF_module -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active" [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis_regulatory_module" RELATED [] -synonym: "CRM" RELATED [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000728 -name: intein -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000729 -name: intein_containing -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000731 -name: fragment -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: added because of request by MO people. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region" [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http:mged.sourceforge.net/ontologies/MGEDontology.php#exemplar_mRNA] -comment: Added for the MO people. -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000738 -name: nuclear_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000740 -name: plastid_sequence -is_a: SO:0000736 ! organelle_location - -[Term] -id: SO:0000741 -name: kinetoplast_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000742 -name: maxicircle_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000743 -name: apicoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000755 -name: plasmid_vector -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template" [SO:ma] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome. " [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw] -comment: Added in response to comment from Kelly Williams from Indiana.nhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 Nov, 2005 -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [Indiana:kw] -comment: Added in response to Kelly Williams from Indiananhttp://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10 nov 2005 -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "Quantitative Trait Locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http: http\://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005 -is_a: SO:0000001 ! region - -[Term] -id: SO:0000772 -name: genomic_island -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -is_a: SO:0000001 ! region - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038/nrmicro884 nGENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMSnUlrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible_element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 rmicro884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible_element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands.nnEvolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA genenJohn T. Sullivan and Clive W. RonsonnPNAS 1998 Apr 28 95 (9) 5145-5149n -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006 -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -synonym: "natural_transposable_element" RELATED [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An atrribute to describe a region from another species." [SO:ke] -synonym: "foreign_transposable_element" RELATED [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned segment" RELATED [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_a: SO:0000786 ! reagent_attribute - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven. " [SO:ke] -synonym: "validated_clone" RELATED [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -synonym: "invalidated_clone" RELATED [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_a: SO:0000788 ! cloned - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_a: SO:0000788 ! cloned - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" RELATED [] -synonym: "engineered rescue segment" RELATED [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini-gene" RELATED [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineereed and foreign." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: " A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication. " [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [fb:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered sequence" RELATED [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -intersection_of: SO:0000151 ! clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid. " [SO:xp] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a regions ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames\n(ORF), usually encoding approximately 9 to 20 amino\nacids, which are expressed in vivo (as distinct from being\nsynthesized as peptide or protein ex vivo and subsequently\ninjected). The in vivo synthesis confers a distinct\nadvantage: the expressed sequences can enter both\nantigen presentation pathways, MHC I (inducing CD8+ T-\ncells, which are usually cytotoxic T-lymphocytes (CTL))\nand MHC II (inducing CD4+ T-cells, usually 'T-helpers'\n(Th)); and can encounter B-cells, inducing antibody\nresponses. Three main vector approaches have been used\nto deliver minigenes: viral vectors, bacterial vectors and\nplasmid DNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=15992153&query_hl=2&itool=pubmed_docsum] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature/or standard laboratory stock" [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromsome originating in a chloroplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromsome originating in a chromoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000826 -name: kinetoplast_chromosome -def: "A chromosome with origine in the kinetoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000827 -name: maxicircle_chromosome -def: "A chromosome originating in a maxi-circle." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000742 ! maxicircle_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000340 ! chromosome - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_region -def: "A region of a chromosome" [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promotor_region -def: "A region of sequence which is part of a promotor." [SO:ke] -comment: This is a manufactured term to allow the parts of promotor to have an is_a path back to the root. -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region og UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript" [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -def: "A region of sequence that can be translated into polypeptide sequence. This sequence can be reprsenseted as nucleotide or aminoacid. This sequence must be part of an mRNA sequence." [SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence. " [SO:ke] -comment: A manufactured to to group the parts of repeats, to give them an is_a path back to the root. -is_a: SO:0000001 ! region -relationship: part_of SO:0000657 ! repeat_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promotor_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000844 -name: RNA_II_promotor_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000845 -name: RNApol_III_promotor_type_1_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000846 -name: RNApol_III_promotor_type_2_region -is_a: SO:0000832 ! promotor_region -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000847 -name: tmRNA_region -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000856 ! conserved - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction witht he paralogous_to relationship. -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjuction with the similarity relationships defined in SO. -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "A kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "A kind of homology where divergence occured after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped" [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is reprsentative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3.." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases" [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced" [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000919 ! substitute_A_to_I - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An atribute describing am mRNA sequences that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid" [SO:ke] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000882 ! codon_redefined - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Rcoding mRNA where a block of nucleotides is not translated. " [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translational_frameshift -def: "Recoding by frameshifting a particular site." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted. " [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded" [SO:xp] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed bak inot the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -is_a: SO:0000751 ! proviral_sequence - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently_known - not predicted." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence smilarity techniques." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence smilarity of a known domain." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence smilarity to EST or cDNA data." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describng a feaure that is predticted by a computer program that did not rely on sequence similarity. " [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: -is_obsolete: true - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000792 ! cloned_cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_origin SO:0000791 ! cloned_genomic - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C" [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G. " [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: intermediate_edited_mRNA -def: "A pre-edited mRNA that has been partially edited." [SO:xp] -intersection_of: SO:0000932 ! pre_edited_mRNA -intersection_of: associated_with SO:0000602 ! guide_RNA -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000933 ! intermediate - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertibrate immune system." [SO:xp] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A pseudogene that is nuclear/mitochondrial." [SO:xp] -synonym: "nuclear_mitochondrial_pseudogene" RELATED [] -synonym: "NUMT" RELATED [] -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000899 ! nuclear_mitochondrial - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: snRNA_4.5S_primary_transcript -def: "A primary transcript encoding a 4.5S snRNA." [SO:ke] -synonym: "4.5S_snRNA_primary_transcript" RELATED [] -is_a: SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0005839 -name: snRNA_4.5S -synonym: "4.5S_snRNA" RELATED [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005843 -name: rRNA_cleavage_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005845 -name: single_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -is_a: SO:0005851 ! gene_array - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A non-functional gene that, when captured by recombination forms a functional gene." [SO:ma] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -is_a: SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://tbase.jax.org/docs/glossary.html] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000048 ! substitute - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A mutation event where a single DNA nucleotide changes into another nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0000109 ! sequence_variant - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0000109 ! sequence_variant -relationship: sequence_of SO:0000047 ! invert - -[Term] -id: SO:1000037 -name: chromosomal_duplication -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: mutation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000050 -name: no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000052 -name: complex_change_in_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000054 -name: mutation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "non-synonymous_codon_change_in_transcript" RELATED [] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000058 ! non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: terminator_codon_change_in_transcript -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: mutation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_mutation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000064 ! mutation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: plus_1_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000067 -name: minus_1_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000068 -name: plus_2_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000069 -name: minus_2_frameshift_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000070 -name: mutation_affecting_transcript_processing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: mutation_affecting_splicing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000072 -name: splice_donor_mutation -is_a: SO:1000071 ! mutation_affecting_splicing -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1000073 -name: splice_acceptor_mutation -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000074 -name: cryptic_splice_activator_mutation -def: "Mutation creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000075 -name: mutation_affecting_editing -def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000070 ! mutation_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: mutation_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000078 -name: mutation_decreasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: mutation_affecting_transcript_sequence -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000080 -name: mutation_increasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: mutation_affecting_rate_of_transcription -is_a: SO:1000076 ! mutation_affecting_transcription - -[Term] -id: SO:1000082 -name: mutation_affecting_transcript_stability -def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: mutation_increasing_transcript_stability -def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: mutation_decreasing_transcript_stability -def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: mutation_affecting_level_of_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000086 -name: mutation_decreasing_level_of_transcript -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: mutation_increasing_level_of_transcript -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: mutation_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000089 -name: no_change_of_translational_product -def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000090 -name: uncharacterised_change_of_translational_product -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000091 -name: partially_characterised_change_of_translational_product -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000090 ! uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: complex_change_of_translational_product -def: "Any mutation effect that is known at nucleotide level but can not be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000093 -name: amino_acid_substitution -def: "The replacement of a single amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: conservative_amino_acid_substitution -is_a: SO:1000093 ! amino_acid_substitution - -[Term] -id: SO:1000095 -name: nonconservative_amino_acid_substitution -is_a: SO:1000093 ! amino_acid_substitution - -[Term] -id: SO:1000096 -name: amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000099 ! polypeptide_elongation - -[Term] -id: SO:1000101 -name: polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000099 ! polypeptide_elongation - -[Term] -id: SO:1000102 -name: mutation_affecting_level_of_translational_product -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000103 -name: mutation_decreasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: mutation_increasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: mutation_affecting_polypeptide_amino_acid_sequence -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000106 -name: inframe_polypeptide_N_terminal_elongation -synonym: "inframe_polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000100 ! polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: out_of_frame_polypeptide_N_terminal_elongation -synonym: "out_of_frame_polypeptide_N-terminal_elongation" RELATED [] -is_a: SO:1000100 ! polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000101 ! polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: out_of_frame_polypeptide_C_terminal_elongation -synonym: "out_of_frame_polypeptide_C-terminal_elongation" RELATED [] -is_a: SO:1000101 ! polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_mutation -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000111 -name: mutation_affecting_3D_structure_of_polypeptide -synonym: "mutation_affecting_3D-structure_of_polypeptide" RELATED [] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000112 -name: no_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: uncharacterised_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: partially_characterised_3D_structural_change -is_a: SO:1000113 ! uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: complex_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: conformational_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: mutation_affecting_polypeptide_function -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000118 -name: loss_of_function_of_polypeptide -synonym: "loss-of-function_of_polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: inactive_ligand_binding_site -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: inactive_catalytic_site -is_a: SO:1000119 ! inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: polypeptide_localization_affected -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: partial_loss_of_function_of_polypeptide -synonym: "partial_loss-of-function_of_polypeptide" RELATED [] -is_a: SO:1000118 ! loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: gain_of_function_of_polypeptide -synonym: "gain-of-function_of_polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: mutation_affecting_transcript_secondary_structure -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: compensatory_transcript_secondary_structure_mutation -is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: consequences_of_mutation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:1000134 -name: polypeptide_fusion -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo-compound_chromosome" RELATED [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero-compound_chromosome" RELATED [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion-cum-translocation" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -synonym: "(Drosophila)Dpp" RELATED [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000179 -name: partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000177 ! uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: mutation_affecting_gene_structure -is_a: SO:1000132 ! consequences_of_mutation - -[Term] -id: SO:1000181 -name: gene_fusion -is_a: SO:1000180 ! mutation_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: mutation_causes_exon_loss -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000185 -name: mutation_causes_intron_gain -def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000186 -name: cryptic_splice_donor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001186 -name: cryptic_splice_acceptor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic. " [SO:xp] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine Delgarno sequence that is upstream of a non-5' CDS in a polycistronic mRNA." [SO:ke] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "A gene coding an mRNA which is recoded before translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translational_frameshift -def: "An attribute describing a translational frameshift of -1. " [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001263 -name: plus_1_translational_frameshift -def: "An attribute describing a translational frameshift of +1. " [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "mRNA is translated by ribosomes that suspend translation at a particular codon and resume translation at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A gene whose mRNA is recoded by an alteration of codon meaning." [SO:ma] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding_stimulatory_signal" RELATED [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 pase pairs." [SO:ke] -synonym: "4bp_start_codon" RELATED [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archeal_intron -def: "Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism." [SO:ma] -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001272 -name: tRNA_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0000001 ! region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "The stem-loop secondary structural element downstream of the redefined region." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding signal found 5' of the redefined codon." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "It is a downstream sequence important for recoding that contains repetitive elements." [SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov 80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA -domain: SO:0000110 ! located_sequence_feature -range: SO:0000110 ! located_sequence_feature -is_symmetric: true - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -domain: SO:0000085 ! gene_by_genome_location -range: SO:0000704 ! gene -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of.ninverse is collection_of.nWinston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_a: part_of ! part_of - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - diff --git a/annotation/NBIS/Ontology/SOXP/so-xp_2_2.obo b/annotation/NBIS/Ontology/SOXP/so-xp_2_2.obo deleted file mode 100644 index 61701aefc..000000000 --- a/annotation/NBIS/Ontology/SOXP/so-xp_2_2.obo +++ /dev/null @@ -1,9871 +0,0 @@ -format-version: 1.2 -date: 30:08:2007 13:40 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.101 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.library.csi.cuny.edu/~davis/molbiol/lecture_notes/post-transcriptional_processes/RNACapping.pdf] -synonym: "G-quartet" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecule spresent in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: " small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; always found with the INR_motif. Positioned from +28 to +32 with respect to the TSS (+1). Consensus sequence (A|G)G(A|T)(C|T)(G|A|C). Required for TFIID binding to TATA-less promoters." [PMID:12651739] -synonym: "downstream core promoter element" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: " transcription factor B-recognition element" EXACT [] -synonym: "B-recognition element" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000019 -name: RNA_hairpin_loop -def: "A region of single stranded RNA where the 3 dimensional structure folds back upon and base pairing occurs. The structure when drawn in 2D resembles a hairpin." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interuption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: Sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://proton.chem.yale.edu/pdf/7897662.pdf] -synonym: " sarcin/ricin RNA domain" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/Morpholinos/morpholinos.HTML] -synonym: "morpholino oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "" RELATED [] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000041 -name: operation -def: "An operation that can be applied to a sequence, that results in a chnage." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene that is processed." [SO:xp] -synonym: "pseudogene by reverse transcription" RELATED [] -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000900 ! processed - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000901 ! unequally_crossed_over - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It is also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: mutation_affecting_regulatory_region -def: "A kind of mutation that affects a regulatory region of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind hereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: " transposon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.exactsciences.com/cic/glossary/_index.htm] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translational_frameshift - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constants amounts with out regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "An repressor molecule is required for transcription to stop." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A silenced gene by DNA modification." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A silenced gene silenced DNA methylation." [SO:xp] -synonym: "methylation-silenced gene" EXACT [] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: Gene:. -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region that codes for portion of spliced messenger RNA (SO:0000234); may contain 5'-untranslated region (SO:0000204), all open reading frames (SO:0000236) and 3'-untranslated region (SO:0000205)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000154 -name: PAC -def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000155 -name: plasmid -def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0000695 ! reagent -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000045 ! delete - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000727 ! CRM -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nThe region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT-box" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000173 -name: GC_rich_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000844 ! RNA_II_promoter_region - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter -relationship: part_of SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus;10" RELATED [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus;35" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -is_a: SO:0000195 ! coding_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000049 ! translocate - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -synonym: "5' coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000201 -name: interior_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence lenght variation" EXACT [] -synonym: "simple sequence length polymorphism" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal RNA primary transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear mRNA (SO:0000274)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone the necessary modifications for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processd_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SO:ma, SO:rb] -comment: The definition was modified by Rama. This terms now basically is the same as a CDS. This must be revised. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000240 -name: chromosome_variation -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000241 -name: internal_UTR -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polyicistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "IRES" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: " ribosomal ribonucleic acid" EXACT [] -synonym: "ribsomal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' binding region." [SO:ke] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' binding region." [SO:ke] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an asaprtic acid anticodon, and a 3' binding region." [SO:ke] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine acid anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine acid anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenyle alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000274 -name: snRNA -def: "Small non-coding RNA in the nucleoplasm. A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000275 -name: snoRNA -def: "Small nucleolar RNAs (snoRNAs) are involved in the processing and modification of rRNA in the nucleolus. There are two main classes of snoRNAs: the box C/D class, and the box H/ACA class. U3 snoRNA is a member of the box C/D class. Indeed, the box C/D element is a subset of the six short sequence elements found in all U3 snoRNAs, namely boxes A, A', B, C, C', and D. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpinprecursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartate and interupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: " ori" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "displacement loop" RELATED [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -is_a: SO:0000001 ! region - -[Term] -id: SO:0000299 -name: specific_recombination_site -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interupted palidrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "stem-loop" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The site where transcription begins." [SO:ke] -subset: SOFA -synonym: "TSS" EXACT [] -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000151 ! clone - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "translation_end" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe -is_a: SO:0000324 ! tag -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITES do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome whioch promotes recombination." [SO:rd] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "A sequence of nucleotides or amino acids that has been designed by an experimentor and which may, or may not, correspond with any natural sequence." [SO:ma] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000358 -name: protein -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000046 ! insert - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "" RELATED [] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000047 ! invert - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5. 8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5. 8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5. 8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000381 -name: group_IIA_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000372 ! enzymatic_RNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA -An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000401 -name: gene_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snRNA -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016] -subset: SOFA -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonuceoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron." [PMID:1899376] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide which functions as a part of the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S rRNA" RELATED [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequences differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribue to describe a feature that is invalidated due to genomic contamination." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribue to describe a feature that is invalidated due to polyA priming." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribue to describe a feature that is invalidated due to partial processing." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. Region which has been shown to recur throughout evolution." [EBIBS:GAR, http://www.molbiol.bbsrc.ac.uk/new_protein/domains.html] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" NARROW [] -synonym: "DNA_bind" NARROW [] -synonym: "domain" RELATED [] -synonym: "np_bind" NARROW [] -synonym: "polypeptide_domain" EXACT [] -synonym: "zn_fing" NARROW [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminal that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "signal peptide coding sequence" EXACT [] -synonym: "signal_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The extent of a polypeptide chain in the mature protein." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" EXACT [] -synonym: "mature peptide" RELATED [] -synonym: "mature_protein_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector -def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequencyof these components." [SO:ma] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -is_a: SO:0000341 ! chromosome_band - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome dulication involving an insertion from another chromosome." [SO:ke] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000559 ! D_cluster -relationship: part_of SO:0000560 ! D_J_cluster - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene her to avoid confusion with the region 'gene'. -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encdoes more than one transcript." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendent of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000526 ! V_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting proteinregulated by the stability of the resulting protein." [SO:ke] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000513 ! J_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000560 ! D_J_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000558 ! C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans-spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -synonym: "three prime exon noncoding region" RELATED [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000486 -name: five_prime_exon_noncoding_region -def: "The sequence of the 5' exon preceeding the start codon." [SO:ke] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0000852 ! exon_region -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-NOMAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000507 -name: pseudogenic_exon -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V_D_GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'RS" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmb/LIGMlect?query=7] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or\nJ-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "HEPTAMER" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of protein-coding genes where the protein product has been retrotransposed." [SO:ke] -is_a: SO:0000010 ! protein_coding -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-RS" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster - -[Term] -id: SO:0000573 -name: rRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster - -[Term] -id: SO:0000575 -name: scRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron -is_a: SO:0000374 ! ribozyme - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A stem-loop RNA structure where nucleotides in the loop participate in complementary interactions with a region of RNA downstream of the stem-loop." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "H-pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm] -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added) oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -is_a: SO:0000001 ! region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -is_a: SO:0000167 ! promoter -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -is_a: SO:0000752 ! gene_group_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The site where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase II type 1 promoter , with consensus sequence CAnnCCn." [SO:ke] -is_a: SO:0000845 ! RNApol_III_promoter_type_1_region - -[Term] -id: SO:0000623 -name: snRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm] -subset: SOFA -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic processed transcript" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -subset: SOFA -is_a: SO:0000252 ! rRNA - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000742 ! maxicircle_sequence - -[Term] -id: SO:0000655 -name: ncRNA -def: "An mRNA sequence that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed repeat" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000046 ! insert - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non-canonical splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000685 -name: DNAaseI_hypersensitive_site -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occured." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000045 ! delete - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -namespace: BS -alt_id: BS:00067 -def: "Indicates when the initator methionine has been cleaved from the mature sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved_initiator_methionine" EXACT [] -synonym: "init_met" RELATED [] -synonym: "initator methioninie" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: associated_with SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:1001261 ! recoded_mRNA - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nA gene may be considered as a unit of inheritance. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The site at which trans-splicing occurs." [SO:ke] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000887 ! translational_frameshift - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb] -comment: Term requested by Rama from SGD. -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic processed transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminal of the peptide that directs the protein to an organelle (chloroplast, mitochonrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to comply with the feature table. -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that A is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unkown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000731 -name: fragment -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000742 -name: maxicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000743 -name: apicoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000755 -name: plasmid_vector -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "Quantitative Trait Locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulck of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An atrribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue segment" EXACT [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini-gene" EXACT [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [SO:mc] -comment: Modified as requested by Lynn - FB. May 2007. -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [fb:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered sequence" EXACT [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a regions ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames\n(ORF), usually encoding approximately 9 to 20 amino\nacids, which are expressed in vivo (as distinct from being\nsynthesized as peptide or protein ex vivo and subsequently\ninjected). The in vivo synthesis confers a distinct\nadvantage: the expressed sequences can enter both\nantigen presentation pathways, MHC I (inducing CD8+ T-\ncells, which are usually cytotoxic T-lymphocytes (CTL))\nand MHC II (inducing CD4+ T-cells, usually 'T-helpers'\n(Th)); and can encounter B-cells, inducing antibody\nresponses. Three main vector approaches have been used\nto deliver minigenes: viral vectors, bacterial vectors and\nplasmid DNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=15992153&query_hl=2&itool=pubmed_docsum] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature/or standard laboratory stock." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000826 -name: kinetoplast_chromosome -def: "A chromosome with origin in the kinetoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000827 -name: maxicircle_chromosome -def: "A chromosome originating in a maxi-circle." [SO:xp] -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000742 ! maxicircle_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promotor." [SO:ke] -comment: This is a manufactured term to allow the parts of promotor to have an is_a path back to the root. -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a protein." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" EXACT [] -synonym: "positional polypeptide feature" EXACT [] -synonym: "region or site annotation" EXACT [] -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -is_a: SO:0000001 ! region -relationship: part_of SO:0000657 ! repeat_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000844 -name: RNA_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNA_II_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000847 -name: tmRNA_region -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000856 ! conserved - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction witht he paralogous_to relationship. -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjuction with the similarity relationships defined in SO. -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "A kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "A kind of homology where divergence occured after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -intersection_of: SO:0000673 ! transcript -intersection_of: associated_with SO:0000602 ! guide_RNA -intersection_of: associated_with SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000919 ! substitute_A_to_I - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing am mRNA sequences that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000882 ! codon_redefined - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translational_frameshift -def: "Recoding by frameshifting a particular site." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An epigenetic process where a gene is innactivated at transcriptional or translational level." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An epigenetic process where a gene is innactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An epigenetic process where a gene is innactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed bak inot the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently_known - not predicted." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence smilarity techniques." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence smilarity of a known domain." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence smilarity to EST or cDNA data." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describng a feaure that is predticted by a computer program that did not rely on sequence similarity." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: replication_mode -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0000969 -name: rolling_circle -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000970 -name: theta_replication -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000971 -name: DNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000972 -name: RNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "IS" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000974 -name: minicircle_chromosome -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000980 ! minicircle_sequence - -[Term] -id: SO:0000975 -name: minicircle_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000980 ! minicircle_sequence - -[Term] -id: SO:0000976 -name: cryptic -is_a: SO:0000116 ! edited - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000988 -name: circular -comment: Attributes added to describe the diferent kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba\nDictyostelium discoideum and localized in the cytoplasm." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4646] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4646] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.\nIt should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragment - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide which functions as a part of the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S rRNA" RELATED [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "23S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -subset: SOFA -synonym: "25S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GO:jl] -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GO:jl] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [http://nar.oxfordjournals.org/cgi/content/abstract/26/20/4696] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments.\nThe PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common\nlength > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000005 ! complex_substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G\nand U, which is important for the secondary structure of RNAs. It has\nsimilar thermodynamic stability to the Watson-Crick pairing. Wobble base\npairs only have two hydrogen bonds. Other wobble base pair possibilities\nare I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the\nlocations of the splice sites in group I intron splicing and has catalytic\nactivity." [SO:cb] -synonym: "IGS" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: mutation_affecting_copy_number -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0001022 -name: inversion_breakpoint -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001027 ! genotype - -[Term] -id: SO:0001029 -name: direction_attribute -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001030 -name: forward -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -is_a: SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0001034 -name: mitron -def: "An intron from whose debranched product an miRNA is derived." [SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -intersection_of: SO:0000276 ! miRNA -intersection_of: derives_from SO:0000188 ! intron - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA , part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' alanine binding region." [SO:ke] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -synonym: "MGE" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001041 -name: virus -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0001042 -name: phage -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage sequence" EXACT [] -is_a: SO:0001041 ! virus - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A pseudogene that is nuclear/mitochondrial." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000899 ! nuclear_mitochondrial - -[Term] -id: SO:0001045 -name: cointegrated_replicon -def: "A MGE region consisting of two fused replicons/plasmids resulting from a replicative transposition event." [Phigo:at] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragment - -[Term] -id: SO:0001051 -name: nested_region -is_a: SO:0000001 ! region - -[Term] -id: SO:0001052 -name: nested_repeat -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000657 ! repeat_region -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001050 ! repeat_fragment - -[Term] -id: SO:0001053 -name: nested_transposon -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000101 ! transposable_element -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001054 ! transposon_fragment - -[Term] -id: SO:0001054 -name: transposon_fragment -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragment - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_a: SO:0000165 ! enhancer - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -namespace: BS -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide_cleavage_site" EXACT [] -is_a: SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -namespace: BS -alt_id: BS:00077 -def: "Describes part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [] -synonym: "propeptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule. Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "active_peptide" EXACT [] -synonym: "peptide" EXACT [] -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region -alt_id: BS:00068 -def: "Extent of a compositionally biased region. Used for homopolymeric stretches of residues and also for regions which are rich in a particular amino acid. Not used for a run of less than 4 residues." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" EXACT [] -synonym: "compositional bias" EXACT [] -synonym: "compositionally biased" EXACT [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" RELATED [] -synonym: "polypeptide_motif" EXACT [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide_repeat" EXACT [] -synonym: "repeat" RELATED [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001069 -name: polypeptide_structural_domain -alt_id: BS:00134 -def: "A polypeptide domain is a structural domain that is self-stabilizing and folds independently of the rest of the protein chain." [EBIBS:GAR, PMID:7020376] -subset: biosapiens -synonym: "polypeptide_structural_domain" EXACT [] -synonym: "structural domain" EXACT [] -is_a: SO:0000417 ! polypeptide_domain -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001070 -name: structural_region -alt_id: BS:00337 -def: "Backbone conformation of the polypeptide." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "structural_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001072 -name: extramembrane -alt_id: BS:00154 -def: "Extent of the region not transversing the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "extramembrane" EXACT [] -synonym: "topo_dom" EXACT [] -is_a: SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasm_location -alt_id: BS:00145 -def: "Region of the peptide in the cytoplasm." [EBIBS:GAR] -subset: biosapiens -synonym: "cytoplasm_location" EXACT [] -synonym: "inside" RELATED [] -is_a: SO:0001072 ! extramembrane - -[Term] -id: SO:0001074 -name: non_cytoplasm_location -alt_id: BS:00144 -def: "Region of peptide not in the cytoplasm. N.B. This could be inside an organelle within the cell." [EBIBS:GAR] -subset: biosapiens -synonym: "non_cytoplasm_location" EXACT [] -synonym: "outside" RELATED [] -is_a: SO:0001072 ! extramembrane - -[Term] -id: SO:0001075 -name: intramembrane -alt_id: BS:00156 -def: "Extent of the region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" EXACT [] -is_a: SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_loop -alt_id: BS:00155 -def: "Extent of region which enters the membrane bilayer but emerges on the same side which it entered." [EBIBS:GAR] -subset: biosapiens -synonym: "membrane_loop" EXACT [] -is_a: SO:0001075 ! intramembrane - -[Term] -id: SO:0001077 -name: transmembrane -alt_id: BS:00158 -def: "Extent of region transversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED [] -synonym: "transmembrane" EXACT [] -is_a: SO:0001075 ! intramembrane - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" EXACT [] -synonym: "secondary structure" EXACT [] -synonym: "secondary structure region" RELATED [] -synonym: "secondary_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "structural_motif" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED [] -synonym: "coiled_coil" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "DNA binding motif" RELATED [] -synonym: "helix_turn_helix" EXACT [] -synonym: "HTH" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_adjacent_residues" EXACT [] -synonym: "non_cons" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "" RELATED [] -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [] -synonym: "non_terminal_residue" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [] -synonym: "sequence_conflict" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "sequence_uncertainty" EXACT [] -synonym: "unsure" RELATED [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -namespace: BS -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_a: SO:0001090 ! covalent_binding_site - -[Term] -id: SO:0001088 -name: disulfide_bond -namespace: BS -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_a: SO:0001090 ! covalent_binding_site - -[Term] -id: SO:0001089 -name: post_translational_modification -namespace: BS -alt_id: BS:00052 -def: "A transformation that occurs in a protein after it has been synthesized, which may regulate, stabilize, crosslink, or introduce new chemical functionalities in the protein." [EBIBS:GAM, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" RELATED [] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -is_a: SO:0001090 ! covalent_binding_site - -[Term] -id: SO:0001090 -name: covalent_binding_site -namespace: BS -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -namespace: BS -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001092 -name: metal_binding -namespace: BS -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal" RELATED [] -synonym: "metal_binding" EXACT [] -is_a: SO:0001091 ! non_covalent_binding_site - -[Term] -id: SO:0001093 -name: protein_protein_interaction -namespace: BS -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein_protein_interaction" EXACT [] -is_a: SO:0001091 ! non_covalent_binding_site - -[Term] -id: SO:0001094 -name: Ca_contact_site -namespace: BS -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "Ca_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001095 -name: Co_contact_site -namespace: BS -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001096 -name: Cu_contact_site -namespace: BS -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "ca_bind" RELATED [uniprot:curation] -synonym: "Cu_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001097 -name: Fe_contact_site -namespace: BS -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "ca_bind" RELATED [uniprot:curation] -synonym: "Fe_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001098 -name: Mg_contact_site -namespace: BS -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001099 -name: Mn_contact_site -namespace: BS -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001100 -name: Mo_contact_site -namespace: BS -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001101 -name: Ni_contact_site -namespace: BS -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001102 -name: W_contact_site -namespace: BS -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001103 -name: Zn_contact_site -namespace: BS -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_binding - -[Term] -id: SO:0001104 -name: active_site_residue -namespace: BS -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [] -synonym: "active site residue" EXACT [] -synonym: "catalitic residues" RELATED [] -synonym: "site" BROAD [] -is_a: SO:0001105 ! protein_ligand_interaction - -[Term] -id: SO:0001105 -name: protein_ligand_interaction -namespace: BS -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "protein-ligand interaction" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "asx_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "beta_strand" EXACT [] -synonym: "strand" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001116 -name: right_handed_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" EXACT [] -synonym: "alpha_helix" EXACT [] -synonym: "helix" RELATED [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001120 -name: nest -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" EXACT [] -synonym: "nest_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: nest_left_right -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001122 -name: nest_right_left -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: st_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: st_staple -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: turn -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "turn" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -namespace: BS -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_il" RELATED [] -synonym: "asx_turn_left_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -namespace: BS -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iil" EXACT [] -synonym: "asx_turn_left_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -namespace: BS -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iir" EXACT [] -synonym: "asx_turn_right_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_type_right_handed_type_one -namespace: BS -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_ir" EXACT [] -synonym: "asx_turn_type_right_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -namespace: BS -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -namespace: BS -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_il" EXACT [] -synonym: "beta_turn_left_handed_type_one" EXACT [] -synonym: "Type I' beta turn" EXACT [] -synonym: "Type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -namespace: BS -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iil" EXACT [] -synonym: "beta_turn_left_handed_type_two" EXACT [] -synonym: "Type II' beta turn" EXACT [] -synonym: "Type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -namespace: BS -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_ir" EXACT [] -synonym: "beta_turn_right_handed_type_one" EXACT [] -synonym: "Type I beta turn" EXACT [] -synonym: "Type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -namespace: BS -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iir" EXACT [] -synonym: "beta_turn_right_handed_type_two" EXACT [] -synonym: "Type II beta turn" EXACT [] -synonym: "Type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma_turn_classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn_inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: st_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_il" EXACT [] -synonym: "st_turn_left_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iil" EXACT [] -synonym: "st_turn_left_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_ir" EXACT [] -synonym: "st_turn_right_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iir" EXACT [] -synonym: "st_turn_right_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001146 -name: sequence_variation_attribute -alt_id: BS:00336 -def: "An attribute to catgorize the different kinds of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "variant" RELATED [] -is_a: SO:0001146 ! sequence_variation_attribute - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" RELATED [] -synonym: "mutagenisis" RELATED [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! sequence_variation_attribute - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" RELATED [] -synonym: "var_seq" RELATED [] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! sequence_variation_attribute - -[Term] -id: SO:0001150 -name: beta_turn_type_six -namespace: BS -subset: biosapiens -synonym: "Type VI beta turn" EXACT [] -synonym: "Type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -namespace: BS -subset: biosapiens -synonym: "Type VI a beta turn" EXACT [] -synonym: "Type VI a turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -namespace: BS -subset: biosapiens -synonym: "Type VI a1 beta turn" EXACT [] -synonym: "Type VI a1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -namespace: BS -subset: biosapiens -synonym: "Type VI a2 beta turn" EXACT [] -synonym: "Type VI a2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -namespace: BS -subset: biosapiens -synonym: "Type VI b beta turn" EXACT [] -synonym: "Type VI b turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -namespace: BS -subset: biosapiens -synonym: "Type VIII beta turn" EXACT [] -synonym: "Type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: snRNA_4.5S_primary_transcript -def: "A primary transcript encoding a 4.5S snRNA." [SO:ke] -synonym: "4.5S snRNA primary transcript" EXACT [] -is_a: SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0005839 -name: snRNA_4.5S -synonym: "4.5S snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005843 -name: rRNA_cleavage_snoRNA -is_a: SO:0000275 ! snoRNA - -[Term] -id: SO:0005845 -name: single_exon -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -is_a: SO:0005851 ! gene_array - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A non-functional gene that, when captured by recombination forms a functional gene." [SO:ma] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -is_a: SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://tbase.jax.org/docs/glossary.html] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000048 ! substitute - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000047 ! invert - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: mutation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000050 -name: mutation_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000052 -name: mutation_affecting_complex_change_in_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000054 -name: mutation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: mutation_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: mutation_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: mutation_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: mutation_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "non-synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: mutation_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000058 ! mutation_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: mutation_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: mutation_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: mutation_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: mutation_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: mutation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "out of frame mutation" RELATED [] -is_a: SO:1000064 ! mutation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000067 -name: minus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000068 -name: plus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000069 -name: minus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000070 -name: mutation_affecting_transcript_processing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: mutation_affecting_splicing -def: "A mutation that affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000072 -name: splice_donor_mutation -def: "A mutation that affects the splice donor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000073 -name: splice_acceptor_mutation -def: "A mutation that affects the splice acceptor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000074 -name: cryptic_splice_activator_mutation -def: "A kind of mutation that creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000075 -name: mutation_affecting_editing -def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000070 ! mutation_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: mutation_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000078 -name: mutation_decreasing_rate_of_transcription -def: "A mutation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: mutation_affecting_transcript_sequence -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000080 -name: mutation_increasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: mutation_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000076 ! mutation_affecting_transcription - -[Term] -id: SO:1000082 -name: mutation_affecting_transcript_stability -def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: mutation_increasing_transcript_stability -def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: mutation_decreasing_transcript_stability -def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: mutation_affecting_level_of_transcript -def: "A mutation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000086 -name: mutation_decreasing_level_of_transcript -def: "A mutation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: mutation_increasing_level_of_transcript -def: "A mutation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: mutation_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000089 -name: mutation_causing_no_change_of_translational_product -def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000090 -name: mutation_causing_uncharacterised_change_of_translational_product -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000091 -name: mutation_causing_partially_characterised_change_of_translational_product -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000090 ! mutation_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: mutation_causing_complex_change_of_translational_product -def: "Any mutation effect that is known at nucleotide level but can not be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000093 -name: mutation_causing_amino_acid_substitution -def: "The replacement of a single amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: mutation_causing_conservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: mutation_causing_nonconservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: mutation_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: mutation_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: mutation_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: mutation_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: mutation_affecting_level_of_translational_product -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000103 -name: mutation_decreasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: mutation_increasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: mutation_affecting_polypeptide_amino_acid_sequence -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_mutation -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000111 -name: mutation_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000112 -name: mutation_causing_no_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: mutation_causing_uncharacterised_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: mutation_causing_partially_characterised_3D_structural_change -is_a: SO:1000113 ! mutation_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: mutation_causing_complex_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: mutation_causing_conformational_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: mutation_affecting_polypeptide_function -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000118 -name: mutation_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: mutation_causing_inactive_ligand_binding_site -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: mutation_causing_inactive_catalytic_site -is_a: SO:1000119 ! mutation_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: mutation_causing_polypeptide_localization_change -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: mutation_causing_polypeptide_post_translational_processing_change -synonym: "polypeptide post-translational processing affected" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: mutation_causing_partial_loss_of_function_of_polypeptide -synonym: "partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: mutation_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: mutation_affecting_transcript_secondary_structure -def: "A mutation that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: mutation_causing_compensatory_transcript_secondary_structure_mutation -is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: mutation -def: "An event that changes nucleotide sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:1000134 -name: mutation_causing_polypeptide_fusion -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: mutation_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000179 -name: mutation_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000177 ! mutation_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: mutation_affecting_gene_structure -def: "A kind of mutation that affects the structure of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000181 -name: mutation_causing_gene_fusion -def: "A kind of mutation that affects the structure of a gene by causing a fusion to another gene." [SO:ke] -is_a: SO:1000180 ! mutation_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement that is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: mutation_causes_exon_loss -def: "A mutation that affects splicing and causes an exon loss." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000185 -name: mutation_causes_intron_gain -def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000186 -name: cryptic_splice_donor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001186 -name: cryptic_splice_acceptor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence, but use different stop codon." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence, but use different start codon." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not overlap peptide sequence." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence, but use different start and stop codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do overlap peptide sequence." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Delgarno sequence that stimulates recoding through interactions with the anti-Shine-Delgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "A gene coding an mRNA which is recoded before translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translational_frameshift -def: "An attribute describing a translational frameshift of -1." [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001263 -name: plus_1_translational_frameshift -def: "An attribute describing a translational frameshift of +1." [SO:ke] -is_a: SO:0000887 ! translational_frameshift - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A mRNA is translated by ribosomes that suspend translation at a particular codon and resume translation at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A gene whose mRNA is recoded by an alteration of codon meaning." [SO:ma] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 pase pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archeal_intron -def: "Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism." [SO:ma] -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001272 -name: tRNA_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0000001 ! region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA -domain: SO:0000110 ! sequence_feature -range: SO:0000110 ! sequence_feature - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -namespace: BS -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of s=ome instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOXP/so-xp_2_3.obo b/annotation/NBIS/Ontology/SOXP/so-xp_2_3.obo deleted file mode 100644 index cfd7e8d83..000000000 --- a/annotation/NBIS/Ontology/SOXP/so-xp_2_3.obo +++ /dev/null @@ -1,10579 +0,0 @@ -format-version: 1.2 -date: 30:01:2008 17:03 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 1.101 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.library.csi.cuny.edu/~davis/molbiol/lecture_notes/post-transcriptional_processes/RNACapping.pdf] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G_quadruplex" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: " small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: " transcription factor B-recognition element" EXACT [] -synonym: "B-recognition element" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: Sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://proton.chem.yale.edu/pdf/7897662.pdf] -synonym: " sarcin/ricin RNA domain" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/Morpholinos/morpholinos.HTML] -synonym: "morpholino oligo" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001183 ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast isa match_part." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene that is processed." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000900 ! processed - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000901 ! unequally_crossed_over - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: mutation_affecting_regulatory_region -def: "A kind of mutation that affects a regulatory region of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: " transposon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.exactsciences.com/cic/glossary/_index.htm] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "methylation-silenced gene" EXACT [] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -comment: Gene:. -subset: SOFA -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of sequence which may be used to manufacture a longer assembled, sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in E. coli or some other organism." [http://www.geospiza.com/community/support/glossary/] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000154 -name: PAC -def: "P1 Artificial Chromosome. These vectors can hold large inserts, typically 80-200 kb, and propagate in E. coli as a single copy episome." [http://www.ncbi.nlm.nih.gov/genome/guide/mouse/glossary.htm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000155 -name: plasmid -def: "A self-replicating circular DNA molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0000695 ! reagent -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000045 ! delete - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A methylated adenine." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "The position where intron is excised." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_splice_site -def: "The junction between the 3 prime end of an exon and the following intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000164 -name: three_prime_splice_site -def: "The junction between the 3 prime end of an intron and the following exon." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000727 ! CRM -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nThe region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT-box" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter -relationship: part_of SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -is_a: SO:0000843 ! bacterial_RNApol_promoter_region - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A polymorphism detectable by the size differences in DNA fragments generated by a restriction enzyme." [PMID:6247908] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon, including the stop_codon." [SO:ke] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_exon_coding_region -def: "The sequence of the 5' exon that encodes for protein." [SO:ke] -subset: SOFA -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_exon_coding_region -def: "The sequence of the 3' exon that encodes for protein." [SO:ke] -subset: SOFA -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon -relationship: part_of SO:0000655 ! ncRNA - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000049 ! translocate - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -synonym: "5' coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime and three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: processed_transcript -def: "A transcript which has undergone the necessary modifications for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SO:ma, SO:rb] -comment: The definition was modified by Rama. This term is now basically the same as a CDS. This must be revised. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The DNA sequences extending on either side of a specific locus." [http://biotech.icmb.utexas.edu/search/dict-search.mhtml] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000240 -name: chromosome_variation -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000241 -name: internal_UTR -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "IRES" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [ems:WB, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A very short unit sequence of DNA (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: " ori" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "displacement loop" RELATED [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -is_a: SO:0000001 ! region - -[Term] -id: SO:0000299 -name: specific_recombination_site -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem-loop" EXACT [] -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: transcription_start_site -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "TSS" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "translation_end" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe -is_a: SO:0000324 ! tag -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http:www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "Expressed Sequence Tag: The sequence of a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [http://genomics.phrma.org/lexicon/e.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000695 ! reagent -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000358 -name: protein -def: "An attribute describing a sequence composed of amino acid residues joined by peptide bonds." [SO:ke] -comment: Do not use this for feature annotation. Use polypeptide (SO:0000104) instead. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together signify a unique amino acid or the termination of translation." [http://genomics.phrma.org/lexicon/c.html] -subset: SOFA -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000046 ! insert - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000047 ! invert - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5.8S -def: "5.8S ribosomal RNA (5.8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5.8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ukans.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001026 ! genome -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0000401 -name: gene_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "Any of the individual polynucleotide sequences produced by digestion of DNA with a restriction endonuclease." [http://www.agron.missouri.edu/cgi-bin/sybgw_mdb/mdb3/Term/119] -subset: SOFA -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. Region which has been shown to recur throughout evolution." [EBIBS:GAR, http://www.molbiol.bbsrc.ac.uk/new_protein/domains.html] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" NARROW [] -synonym: "DNA_bind" NARROW [] -synonym: "domain" RELATED [] -synonym: "np_bind" NARROW [] -synonym: "polypeptide_domain" EXACT [] -synonym: "zn_fing" NARROW [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "signal peptide coding sequence" EXACT [] -synonym: "signal_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The extent of a polypeptide chain in the mature protein." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" EXACT [] -synonym: "mature peptide" RELATED [] -synonym: "mature_protein_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector -def: "A DNA molecule that can be used to transfer DNA molecules between organisms." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -is_a: SO:0000341 ! chromosome_band - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000559 ! D_cluster -relationship: part_of SO:0000560 ! D_J_cluster - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000526 ! V_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000513 ! J_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000532 ! V_D_J_cluster -relationship: part_of SO:0000534 ! V_J_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000560 ! D_J_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [CJM:SO] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000509 ! D_J_C_cluster -relationship: part_of SO:0000511 ! J_C_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000531 ! V_D_J_C_cluster -relationship: part_of SO:0000535 ! V_J_C_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000558 ! C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans-spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-C-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-NOMAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'D-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V_D_GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! processed_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "5'RS" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmb/LIGMlect?query=7] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or\nJ-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "HEPTAMER" RELATED [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occured as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "3'D-RS" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000485 ! DJ_J_cluster -relationship: part_of SO:0000504 ! D_DJ_C_cluster -relationship: part_of SO:0000505 ! D_DJ_cluster -relationship: part_of SO:0000506 ! D_DJ_J_C_cluster -relationship: part_of SO:0000508 ! D_DJ_J_cluster -relationship: part_of SO:0000518 ! V_DJ_cluster -relationship: part_of SO:0000519 ! V_DJ_J_cluster -relationship: part_of SO:0000527 ! V_D_DJ_C_cluster -relationship: part_of SO:0000528 ! V_D_DJ_cluster -relationship: part_of SO:0000529 ! V_D_DJ_J_C_cluster -relationship: part_of SO:0000530 ! V_D_DJ_J_cluster -relationship: part_of SO:0000539 ! DJ_C_cluster -relationship: part_of SO:0000540 ! DJ_J_C_cluster -relationship: part_of SO:0000542 ! V_DJ_C_cluster -relationship: part_of SO:0000564 ! V_DJ_J_C_cluster - -[Term] -id: SO:0000573 -name: rRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-D-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000487 ! VDJ_J_C_cluster -relationship: part_of SO:0000488 ! VDJ_J_cluster -relationship: part_of SO:0000520 ! V_VDJ_C_cluster -relationship: part_of SO:0000521 ! V_VDJ_cluster -relationship: part_of SO:0000522 ! V_VDJ_J_cluster -relationship: part_of SO:0000541 ! VDJ_C_cluster -relationship: part_of SO:0000565 ! V_VDJ_J_C_cluster - -[Term] -id: SO:0000575 -name: scRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/ligmdb/LIGMlect?query=7] -synonym: "V-J-GENE" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -relationship: part_of SO:0000489 ! VJ_C_cluster -relationship: part_of SO:0000490 ! VJ_J_C_cluster -relationship: part_of SO:0000491 ! VJ_J_cluster -relationship: part_of SO:0000523 ! V_VJ_C_cluster -relationship: part_of SO:0000524 ! V_VJ_cluster -relationship: part_of SO:0000525 ! V_VJ_J_cluster -relationship: part_of SO:0000566 ! V_VJ_J_C_cluster - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://www.rna.ucla.edu] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box C/D snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -def: "The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs." [http://nsm1.utdallas.edu/bio/miller/physarum/overview.htm] -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "The region between two known genes." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://www.rna.ucla/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://www.rna.ucla.edu/] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -is_a: SO:0000001 ! region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -is_a: SO:0000752 ! gene_group_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -is_a: SO:0000846 ! RNApol_III_promoter_type_2_region - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase II type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -is_a: SO:0000845 ! RNApol_III_promoter_type_1_region - -[Term] -id: SO:0000623 -name: snRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "Combination of short DNA sequence elements which suppress the transcription of an adjacent gene or genes." [http://www.brunel.ac.uk/depts/bio/project/old_hmg/gloss3.htm] -subset: SOFA -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic processed transcript" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repetitive sequence spanning 500 to 20,000 base pairs (a repeat unit is 5 - 30 base pairs)." [http://www.rerf.or.jp/eigo/glossary/minisate.htm] -subset: SOFA -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000742 ! maxicircle_sequence - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "interspersed repeat" EXACT [] -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -is_a: SO:0000661 ! intron_attribute - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000046 ! insert - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non-canonical splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000675 ! canonical_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site -is_a: SO:0000674 ! non_canonical_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -is_a: SO:0000233 ! processed_transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeting by a nuclease enzyme." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000685 -name: DNAaseI_hypersensitive_site -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -is_a: SO:0000699 ! junction -relationship: position_of SO:0000045 ! delete - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "Indicates when the initator methionine has been cleaved from the mature sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved_initiator_methionine" EXACT [] -synonym: "init_met" RELATED [] -synonym: "initator methionine" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: associated_with SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: associated_with SO:1001261 ! recoded_mRNA - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [http://www.cgr.ki.se/cgb/groups/brookes/Articles/essence_of_snps_article.pdf] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology.\nA gene may be considered as a unit of inheritance. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a DNA sequence." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The process that produces mature transcripts by combining exons of independent pre-mRNA molecules. The acceptor site lies on the 3' of these molecules." [SO:ke] -subset: SOFA -is_a: SO:0000164 ! three_prime_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The site at which trans-splicing occurs." [SO:ke] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0000163 ! five_prime_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: associated_with SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: associated_with SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SO:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SO:rb] -comment: Term requested by Rama from SGD. -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic processed transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: associated_with SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit_peptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "A single repeat element." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to comply with the feature table. -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! assembly - -[Term] -id: SO:0000731 -name: fragment -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast_sequence -is_a: SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000742 -name: maxicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000743 -name: apicoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000755 -name: plasmid_vector -is_a: SO:0000440 ! vector - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [http://nar.oxfordjournals.org/cgi/content/full/32/15/4531n10, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulck of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An atrribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue segment" EXACT [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini-gene" EXACT [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [SO:mc] -comment: Modified as requested by Lynn - FB. May 2007. -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [SO:mc] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [fb:gm] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered sequence" EXACT [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL))\nand MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000826 -name: kinetoplast_chromosome -def: "A chromosome with origin in the kinetoplast." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000827 -name: maxicircle_chromosome -def: "A chromosome originating in a maxi-circle." [SO:xp] -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000742 ! maxicircle_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: processed_transcript_region -def: "A region of a processed transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a processed transcript and give them an is_a path to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A region of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a protein." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" EXACT [] -synonym: "positional polypeptide feature" EXACT [] -synonym: "region or site annotation" EXACT [] -is_a: SO:0000001 ! region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -is_a: SO:0000001 ! region -relationship: part_of SO:0000657 ! repeat_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -is_a: SO:0000001 ! region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_a: SO:0000832 ! promoter_region -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000847 -name: tmRNA_region -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -subset: SOFA -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologue" EXACT [] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogue" EXACT [] -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologue" EXACT [] -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occured after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -intersection_of: SO:0000673 ! transcript -intersection_of: associated_with SO:0000602 ! guide_RNA -intersection_of: associated_with SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000919 ! substitute_A_to_I - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing am mRNA sequences that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000882 ! codon_redefined - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An epigenetic process where a gene is innactivated at transcriptional or translational level." [SO:ke] -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An epigenetic process where a gene is innactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An epigenetic process where a gene is innactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -is_a: SO:0000042 ! pseudogene_attribute - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -is_a: SO:0000041 ! operation - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -is_a: SO:0000916 ! edit_operation - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -is_a: SO:0000921 ! insert_dinucleotide - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: associated_with SO:0000916 ! edit_operation -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: associated_with SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: replication_mode -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0001060 ! sequence_variant - -[Term] -id: SO:0000969 -name: rolling_circle -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000970 -name: theta_replication -is_a: SO:0000971 ! DNA_replication_mode - -[Term] -id: SO:0000971 -name: DNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000972 -name: RNA_replication_mode -is_a: SO:0000968 ! replication_mode - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "IS" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000974 -name: minicircle_chromosome -intersection_of: SO:0000826 ! kinetoplast_chromosome -intersection_of: has_origin SO:0000980 ! minicircle_sequence - -[Term] -id: SO:0000975 -name: minicircle_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: has_origin SO:0000980 ! minicircle_sequence - -[Term] -id: SO:0000976 -name: cryptic -is_a: SO:0000116 ! edited - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle_sequence -is_a: SO:0000741 ! kinetoplast_sequence - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.\nIt should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragment - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as\npart of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GO:jl] -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GO:jl] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000005 ! complex_substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: mutation_affecting_copy_number -is_a: SO:1000132 ! mutation - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0001022 -name: inversion_breakpoint -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! mutation - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001027 ! genotype - -[Term] -id: SO:0001029 -name: direction_attribute -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A debranched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -synonym: "MGE" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A pseudogene that is nuclear/mitochondrial." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -intersection_of: SO:0000336 ! pseudogene -intersection_of: has_quality SO:0000899 ! nuclear_mitochondrial - -[Term] -id: SO:0001045 -name: cointegrated_replicon -def: "A MGE region consisting of two fused replicons/plasmids resulting from a replicative transposition event." [Phigo:at] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragment - -[Term] -id: SO:0001051 -name: nested_region -is_a: SO:0000001 ! region - -[Term] -id: SO:0001052 -name: nested_repeat -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000657 ! repeat_region -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001050 ! repeat_fragment - -[Term] -id: SO:0001053 -name: nested_transposon -intersection_of: SO:0001051 ! nested_region -intersection_of: derives_from SO:0000101 ! transposable_element -intersection_of: has_part SO:0000001 ! region -intersection_of: has_part SO:0001054 ! transposon_fragment - -[Term] -id: SO:0001054 -name: transposon_fragment -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragment - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_a: SO:0000165 ! enhancer - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide_cleavage_site" EXACT [] -is_a: SO:0001063 ! immature_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Describes part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [] -synonym: "propeptide" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule. Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "active_peptide" EXACT [] -synonym: "peptide" EXACT [] -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region -alt_id: BS:00068 -def: "Extent of a compositionally biased region. Used for homopolymeric stretches of residues and also for regions which are rich in a particular amino acid. Not used for a run of less than 4 residues." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" EXACT [] -synonym: "compositional bias" EXACT [] -synonym: "compositionally biased" EXACT [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" RELATED [] -synonym: "polypeptide_motif" EXACT [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide_repeat" EXACT [] -synonym: "repeat" RELATED [] -is_a: SO:0000417 ! polypeptide_domain - -[Term] -id: SO:0001069 -name: polypeptide_structural_domain -alt_id: BS:00134 -def: "A polypeptide domain is a structural domain that is self-stabilizing and folds independently of the rest of the protein chain." [EBIBS:GAR, PMID:7020376] -subset: biosapiens -synonym: "polypeptide_structural_domain" EXACT [] -synonym: "structural domain" EXACT [] -is_a: SO:0000417 ! polypeptide_domain -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001070 -name: structural_region -alt_id: BS:00337 -def: "Backbone conformation of the polypeptide." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "structural_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001072 -name: extramembrane_region -alt_id: BS:00154 -def: "Extent of the region not transversing the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "extramembrane" EXACT [] -synonym: "topo_dom" EXACT [] -is_a: SO:0001070 ! structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_region -alt_id: BS:00145 -def: "Region of the peptide in the cytoplasm." [EBIBS:GAR] -subset: biosapiens -synonym: "cytoplasm_location" EXACT [] -synonym: "inside" RELATED [] -is_a: SO:0001072 ! extramembrane_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_region -alt_id: BS:00144 -def: "Region of peptide not in the cytoplasm. N.B. This could be inside an organelle within the cell." [EBIBS:GAR] -subset: biosapiens -synonym: "non_cytoplasm_location" EXACT [] -synonym: "outside" RELATED [] -is_a: SO:0001072 ! extramembrane_region - -[Term] -id: SO:0001075 -name: intramembrane_region -alt_id: BS:00156 -def: "Extent of the region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" EXACT [] -is_a: SO:0001070 ! structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_loop -alt_id: BS:00155 -def: "Extent of region which enters the membrane bilayer but emerges on the same side which it entered." [EBIBS:GAR] -subset: biosapiens -synonym: "membrane_loop" EXACT [] -is_a: SO:0001075 ! intramembrane_region - -[Term] -id: SO:0001077 -name: transmembrane_region -alt_id: BS:00158 -def: "Extent of region transversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED [] -synonym: "transmembrane" EXACT [] -is_a: SO:0001075 ! intramembrane_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" EXACT [] -synonym: "secondary structure" EXACT [] -synonym: "secondary structure region" RELATED [] -synonym: "secondary_structure" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "structural_motif" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED [] -synonym: "coiled_coil" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "DNA binding motif" RELATED [] -synonym: "helix_turn_helix" EXACT [] -synonym: "HTH" EXACT [] -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_adjacent_residues" EXACT [] -synonym: "non_cons" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [] -synonym: "non_terminal_residue" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [] -synonym: "sequence_conflict" EXACT [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "sequence_uncertainty" EXACT [] -synonym: "unsure" RELATED [] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAM, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" RELATED [] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -is_a: SO:0100001 ! biochemical_region - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal" RELATED [] -synonym: "metal_binding" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein_protein_interaction" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: Ca_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" EXACT [] -synonym: "Ca_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001095 -name: Co_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001096 -name: Cu_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001097 -name: Fe_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001098 -name: Mg_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001099 -name: Mn_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001100 -name: Mo_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001101 -name: Ni_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001102 -name: W_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001103 -name: Zn_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [] -synonym: "active site residue" EXACT [] -synonym: "site" BROAD [] -is_a: SO:0100001 ! biochemical_region - -[Term] -id: SO:0001105 -name: protein_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "protein-ligand interaction" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "asx_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "beta_bulge_loop_six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "beta_strand" EXACT [] -synonym: "strand" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel_beta_strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001116 -name: right_handed_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" EXACT [] -synonym: "alpha_helix" EXACT [] -synonym: "helix" RELATED [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -is_a: SO:0001116 ! right_handed_helix - -[Term] -id: SO:0001120 -name: nest -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" EXACT [] -synonym: "nest_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: nest_left_right -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001122 -name: nest_right_left -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -is_a: SO:0001120 ! nest - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann_loop_six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: st_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: st_staple -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: turn -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "turn" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_il" RELATED [] -synonym: "asx_turn_left_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iil" EXACT [] -synonym: "asx_turn_left_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_iir" EXACT [] -synonym: "asx_turn_right_handed_type_two" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx_turn_ir" EXACT [] -synonym: "asx_turn_type_right_handed_type_one" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_il" EXACT [] -synonym: "beta_turn_left_handed_type_one" EXACT [] -synonym: "Type I' beta turn" EXACT [] -synonym: "Type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iil" EXACT [] -synonym: "beta_turn_left_handed_type_two" EXACT [] -synonym: "Type II' beta turn" EXACT [] -synonym: "Type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_ir" EXACT [] -synonym: "beta_turn_right_handed_type_one" EXACT [] -synonym: "Type I beta turn" EXACT [] -synonym: "Type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta_turn_iir" EXACT [] -synonym: "beta_turn_right_handed_type_two" EXACT [] -synonym: "Type II beta turn" EXACT [] -synonym: "Type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma_turn_classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma_turn_inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: st_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAM, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! turn - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_il" EXACT [] -synonym: "st_turn_left_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iil" EXACT [] -synonym: "st_turn_left_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_ir" EXACT [] -synonym: "st_turn_right_handed_type_one" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st_turn_iir" EXACT [] -synonym: "st_turn_right_handed_type_two" EXACT [] -is_a: SO:0001141 ! st_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "variant" RELATED [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" RELATED [] -synonym: "mutagenesis" RELATED [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" RELATED [] -synonym: "var_seq" RELATED [] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -subset: biosapiens -synonym: "Type VI beta turn" EXACT [] -synonym: "Type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -subset: biosapiens -synonym: "Type VI a beta turn" EXACT [] -synonym: "Type VI a turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "Type VI a1 beta turn" EXACT [] -synonym: "Type VI a1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "Type VI a2 beta turn" EXACT [] -synonym: "Type VI a2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -subset: biosapiens -synonym: "Type VI b beta turn" EXACT [] -synonym: "Type VI b turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -subset: biosapiens -synonym: "Type VIII beta turn" EXACT [] -synonym: "Type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME\nalgorithm within core promoter sequences from -60 to +40, with an E value\nof 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000844 ! RNApol_II_promoter_region - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -is_a: SO:0000834 ! processed_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU-rich element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -is_a: SO:0000837 ! UTR_region - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "locked nucleic acid" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin\ntranscription." [xenbase:jb] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "Aregion (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more\nlikely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [http://www.genpromag.com/scripts/glossary.asp?LETTER=R] -subset: SOFA -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http:www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -is_a: SO:0000001 ! region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://tbase.jax.org/docs/glossary.html] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region -def: "A region that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -is_a: SO:0100001 ! biochemical_region - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" EXACT [] -is_a: SO:0001070 ! structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd_srv/msdmotif/] -subset: biosapiens -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "Prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -is_a: SO:0000703 ! experimental_result_region -isa: SO:0000703 - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000048 ! substitute - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0000001 ! region -is_a: SO:0001059 ! sequence_alteration -relationship: sequence_of SO:0000047 ! invert - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: mutation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000050 -name: mutation_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000052 -name: mutation_affecting_complex_change_in_transcript -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000054 -name: mutation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: mutation_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: mutation_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: mutation_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: mutation_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "non-synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: mutation_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000058 ! mutation_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: mutation_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: mutation_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -is_a: SO:1000059 ! mutation_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: mutation_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000056 ! mutation_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: mutation_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: mutation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a mutation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000054 ! mutation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "out of frame mutation" RELATED [] -is_a: SO:1000064 ! mutation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000067 -name: minus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000068 -name: plus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000069 -name: minus_2_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000070 -name: mutation_affecting_transcript_processing -def: "Mutation affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: mutation_affecting_splicing -def: "A mutation that affects the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000072 -name: splice_donor_mutation -def: "A mutation that affects the splice donor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000073 -name: splice_acceptor_mutation -def: "A mutation that affects the splice acceptor sequence." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000074 -name: cryptic_splice_activator_mutation -def: "A kind of mutation that creates a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000075 -name: mutation_affecting_editing -def: "Mutation affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000070 ! mutation_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: mutation_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000078 -name: mutation_decreasing_rate_of_transcription -def: "A mutation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: mutation_affecting_transcript_sequence -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000080 -name: mutation_increasing_rate_of_transcription -is_a: SO:1000081 ! mutation_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: mutation_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -is_a: SO:1000076 ! mutation_affecting_transcription - -[Term] -id: SO:1000082 -name: mutation_affecting_transcript_stability -def: "Mutation affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: mutation_increasing_transcript_stability -def: "Mutation increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: mutation_decreasing_transcript_stability -def: "Mutation decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000082 ! mutation_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: mutation_affecting_level_of_transcript -def: "A mutation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000086 -name: mutation_decreasing_level_of_transcript -def: "A mutation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: mutation_increasing_level_of_transcript -def: "A mutation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -is_a: SO:1000085 ! mutation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: mutation_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000089 -name: mutation_causing_no_change_of_translational_product -def: "The change at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000090 -name: mutation_causing_uncharacterised_change_of_translational_product -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000091 -name: mutation_causing_partially_characterised_change_of_translational_product -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000090 ! mutation_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: mutation_causing_complex_change_of_translational_product -def: "Any mutation effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000093 -name: mutation_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: mutation_causing_conservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: mutation_causing_nonconservative_amino_acid_substitution -is_a: SO:1000093 ! mutation_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: mutation_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: mutation_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: mutation_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: mutation_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! mutation_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: mutation_affecting_level_of_translational_product -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000103 -name: mutation_decreasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: mutation_increasing_level_of_translation_product -is_a: SO:1000102 ! mutation_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: mutation_affecting_polypeptide_amino_acid_sequence -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_mutation -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -is_a: SO:1000065 ! frameshift_mutation - -[Term] -id: SO:1000111 -name: mutation_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000112 -name: mutation_causing_no_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: mutation_causing_uncharacterised_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: mutation_causing_partially_characterised_3D_structural_change -is_a: SO:1000113 ! mutation_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: mutation_causing_complex_3D_structural_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: mutation_causing_conformational_change -is_a: SO:1000111 ! mutation_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: mutation_affecting_polypeptide_function -is_a: SO:1000088 ! mutation_affecting_translational_product - -[Term] -id: SO:1000118 -name: mutation_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: mutation_causing_inactive_ligand_binding_site -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: mutation_causing_inactive_catalytic_site -is_a: SO:1000119 ! mutation_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: mutation_causing_polypeptide_localization_change -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: mutation_causing_polypeptide_post_translational_processing_change -synonym: "polypeptide post-translational processing affected" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: mutation_causing_partial_loss_of_function_of_polypeptide -synonym: "partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! mutation_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: mutation_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! mutation_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: mutation_affecting_transcript_secondary_structure -def: "A mutation that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -is_a: SO:1000079 ! mutation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: mutation_causing_compensatory_transcript_secondary_structure_mutation -is_a: SO:1000126 ! mutation_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: mutation -def: "An event that changes nucleotide sequence." [SO:ke] -is_a: SO:0000000 ! Sequence_Ontology -disjoint_from: SO:0000041 ! operation -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0000968 ! replication_mode -disjoint_from: SO:0001060 ! sequence_variant - -[Term] -id: SO:1000134 -name: mutation_causing_polypeptide_fusion -is_a: SO:1000105 ! mutation_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [fb:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [fb:reference_manual] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [fb:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [fb:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [fb:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [fb:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: mutation_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000049 ! mutation_affecting_transcript - -[Term] -id: SO:1000179 -name: mutation_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000177 ! mutation_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: mutation_affecting_gene_structure -def: "A kind of mutation that affects the structure of a gene." [SO:ke] -is_a: SO:1000132 ! mutation - -[Term] -id: SO:1000181 -name: mutation_causing_gene_fusion -def: "A kind of mutation that affects the structure of a gene by causing a fusion to another gene." [SO:ke] -is_a: SO:1000180 ! mutation_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: mutation_causes_exon_loss -def: "A mutation that affects splicing and causes an exon loss." [SO:ke] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000185 -name: mutation_causes_intron_gain -def: "Mutation causes an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000071 ! mutation_affecting_splicing - -[Term] -id: SO:1000186 -name: cryptic_splice_donor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001186 -name: cryptic_splice_acceptor_activation -is_a: SO:1000074 ! cryptic_splice_activator_mutation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "A gene coding an mRNA which is recoded before translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A mRNA is translated by ribosomes that suspend translation at a particular codon and resume translation at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A gene whose mRNA is recoded by an alteration of codon meaning." [SO:ma] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0000001 ! region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -is_a: SO:0000695 ! reagent - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. -is_symmetric: true - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an atrribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -subset: SOFA -is_transitive: true - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOXP/so-xp_2_4_1.obo b/annotation/NBIS/Ontology/SOXP/so-xp_2_4_1.obo deleted file mode 100644 index 964a67f97..000000000 --- a/annotation/NBIS/Ontology/SOXP/so-xp_2_4_1.obo +++ /dev/null @@ -1,14731 +0,0 @@ -format-version: 1.2 -date: 02:12:2009 09:48 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta1 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://proton.chem.yale.edu/pdf/7897662.pdf] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morpholino oligo" EXACT [] -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A region of a molecule that binds to a restriction enzyme." [SO:cb] -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme binding site" EXACT [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequenece_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinary. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a placeholder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon'is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a paricular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/ligmb/LIGMlect?query=7] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occured as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w dbxref -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A debranched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with astop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000001 ! region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by nicole, tracker id 1911479. It is required to gather evidense together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an innactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: argenine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or unfiltered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbor regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region\ncomposed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere DNA Element III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in resonse to tracker request by patrick chain. The pape:r Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -synonym: "U14 snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -synonym: "methylation guide snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -synonym: "rRNA cleavage RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "exon of single exon gene" EXACT [] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -synonym: "cassette array member" EXACT [] -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -synonym: "gene cassette member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -synonym: "gene subarray member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "primer binding site" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_binding_site "wiki" -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -synonym: "gene array" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -synonym: "gene subarray" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -synonym: "gene cassette" EXACT [] -xref: http://en.wikipedia.org/wiki/Gene_cassette "wiki" -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -synonym: "gene cassette array" EXACT [] -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "selenocysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl tRNA" EXACT [] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://www.informatics.jax.org/silverbook/glossary.shtml] -synonym: "syntenic region" EXACT [] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region_of_peptide -def: "A region of a peptide that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "biochemical motif" EXACT [] -synonym: "biochemical region of peptide" EXACT [] -synonym: "biochemical_region" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "molecular contact region" RELATED [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_polypeptide_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" RELATED BS [] -synonym: "intrinsically unstructured polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "alpha beta motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A peptide that acts as a signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "lipoprotein signal peptide" EXACT [] -synonym: "prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -synonym: "no output" EXACT BS [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100012 -name: peptide_coil -def: "Irregular, unstructured regions of a protein's backbone, as distinct from the regular region (namely alpha helix and beta strand - characterised by specific patterns of main-chain hydrogen bonds)." [EBIBS:GAR] -subset: biosapiens -synonym: "coil" RELATED BS [] -synonym: "peptide coil" EXACT [] -synonym: "random coil" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100013 -name: hydrophobic_region_of_peptide -def: "Hydrophobic regions are regions with a low affinity for water." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "hydropathic" RELATED [] -synonym: "hydrophobic region of peptide" RELATED [] -synonym: "hydrophobic_region" EXACT [] -synonym: "hydrophobicity" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100014 -name: n_terminal_region -def: "The amino-terminal positively-charged region of a signal peptide (approx 1-5 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "N-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100015 -name: c_terminal_region -def: "The more polar, carboxy-terminal region of the signal peptide (approx 3-7 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "C-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100016 -name: central_hydrophobic_region_of_signal_peptide -def: "The central, hydrophobic region of the signal peptide (approx 7-15 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "central hydrophobic region of signal peptide" EXACT [] -synonym: "central_hydrophobic_region" RELATED [] -synonym: "H-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100017 -name: polypeptide_conserved_motif -def: "A conserved motif is a short (up to 20 amino acids) region of biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "motif" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100018 -name: polypeptide_binding_motif -def: "A polypeptide binding motif is a short (up to 20 amino acids) polypeptide region of biological interest that contains one or more amino acids experimentally shown to bind to a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "binding" RELATED [uniprot:feature_type] -synonym: "polypeptide binding motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100019 -name: polypeptide_catalytic_motif -def: "A polypeptide catalytic motif is a short (up to 20 amino acids) polypeptide region that contains one or more active site residues." [EBIBS:GAR] -subset: biosapiens -synonym: "catalytic_motif" RELATED [] -synonym: "polypeptide catalytic motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100020 -name: polypeptide_DNA_contact -def: "Residues involved in interactions with DNA." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide DNA contact" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0100021 -name: polypeptide_conserved_region -def: "A subsection of sequence with biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide conserved region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -synonym: "pyrimidine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -synonym: "C to T transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "C to T transition at pCpG site" EXACT [] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -synonym: "T to C transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -synonym: "purine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -synonym: "A to G transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -synonym: "G to A transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Transversion "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -synonym: "pyrimidine to purine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -synonym: "C to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -synonym: "C to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -synonym: "T to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -synonym: "T to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -synonym: "purine to pyrimidine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -synonym: "A to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -synonym: "A to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -synonym: "G to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -synonym: "G to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -synonym: "intrachromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -synonym: "chromosomal deletion" EXACT [] -synonym: "deficiency" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_deletion "wiki" -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -synonym: "chromosomal inversion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_inversion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -synonym: "interchromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A sequence alteration which included an insertion and a deletion, affecting 2 or more bases." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html, http:http\://www.hgvs.org/mutnomen/recs-DNA.html#indel] -comment: Indels can have a different number of bases than the corresponding reference sequence. -xref: http://en.wikipedia.org/wiki/Indel "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000035 -name: duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide duplication" EXACT [] -synonym: "nucleotide_duplication" RELATED [] -is_a: SO:0000667 ! insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -synonym: "chromosomal duplication" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_duplication "wiki" -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -synonym: "intrachromosomal duplication" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -synonym: "direct tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -synonym: "inverted tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "intrachromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -synonym: "compound chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -synonym: "Robertsonian fusion" EXACT [] -xref: http://en.wikipedia.org/wiki/Robertsonian_fusion "wiki" -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -synonym: "chromosomal translocation" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_translocation "wiki" -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -synonym: "ring chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Ring_chromosome "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -synonym: "pericentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -synonym: "paracentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -synonym: "reciprocal chromosomal translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: sequence_variation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript" EXACT [] -synonym: "sequence variation affecting transcript" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000050 -name: sequence_variant_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change in transcript" RELATED [] -synonym: "sequence variant causing no change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000052 -name: sequence_variation_affecting_complex_change_in_transcript -synonym: "mutation affecting complex change in transcript" EXACT [] -synonym: "sequence variation affecting complex change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000054 -name: sequence_variation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting coding sequence" EXACT [] -synonym: "sequence variation affecting coding sequence" RELATED [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: sequence_variant_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing initiator codon change in transcript" RELATED [] -synonym: "sequence variant causing initiator codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: sequence_variant_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutaton causing amino acid coding codon change in transcript" RELATED [] -synonym: "sequence variant causing amino acid coding codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: sequence_variant_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing synonymous codon change in transcript" RELATED [] -synonym: "sequence variant causing synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: sequence_variant_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing non synonymous codon change in transcript" RELATED [] -synonym: "non-synonymous codon change in transcript" EXACT [] -synonym: "sequence variant causing non synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: sequence_variant_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing missense codon change in transcript" RELATED [] -synonym: "sequence variant causing missense codon change in transcript" EXACT [] -is_a: SO:1000058 ! sequence_variant_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: sequence_variant_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing conservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing conservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: sequence_variant_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing nonconservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonconservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: sequence_variant_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing nonsense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonsense codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: sequence_variant_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -synonym: "mutation causing terminator codon change in transcript" RELATED [] -synonym: "sequence variant causing terminator codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: sequence_variation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a sequence variation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting reading frame" EXACT [] -synonym: "sequence variation affecting reading frame" RELATED [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_sequence_variation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "frameshift mutation" EXACT [] -synonym: "frameshift sequence variation" RELATED [] -synonym: "out of frame mutation" RELATED [] -xref: http://en.wikipedia.org/wiki/Frameshift_mutation "wiki" -is_a: SO:1000064 ! sequence_variation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: sequence_variant_causing_plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -synonym: "plus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 1 frameshift mutation" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000067 -name: sequence_variant_causing_minus_1_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -synonym: "minus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 1 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000068 -name: sequence_variant_causing_plus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -synonym: "plus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000069 -name: sequence_variant_causing_minus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -synonym: "minus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000070 -name: sequence_variant_affecting_transcript_processing -def: "Sequence variant affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript processing" RELATED [] -synonym: "sequence variant affecting transcript processing" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: sequence_variant_affecting_splicing -def: "A sequence_variant_effect where the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences is changed." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splicing" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000072 -name: sequence_variant_affecting_splice_donor -def: "A sequence_variant_effect that changes the splice donor sequence." [SO:ke] -synonym: "mutation affecting splice donor" RELATED [] -synonym: "sequence variant affecting splice donor" RELATED [] -synonym: "splice donor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000073 -name: sequence_variant_affecting_splice_acceptor -def: "A sequence_variant_effect that changes the splice acceptor sequence." [SO:ke] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splice acceptor" RELATED [] -synonym: "splice acceptor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000074 -name: sequence_variant_causing_cryptic_splice_activation -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: A cryptic splice site is only used when the natural splice site has been disrupted by a sequence alteration. -synonym: "cryptic splice activator sequence variant" EXACT [] -synonym: "mutation causing cryptic splice activator" RELATED [] -synonym: "sequence variant causing cryptic splice activator" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000075 -name: sequence_variant_affecting_editing -def: "Sequence variant affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting editing" RELATED [] -synonym: "sequence variant affecting editing" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: sequence_variant_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcription" RELATED [] -synonym: "sequence variant affecting transcription" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000078 -name: sequence_variant_decreasing_rate_of_transcription -def: "A sequence variation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation decreasing rate of transcription" RELATED [] -synonym: "sequence variation decreasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: sequence_variation_affecting_transcript_sequence -synonym: "mutation affecting transcript sequence" EXACT [] -synonym: "sequence variation affecting transcript sequence" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000080 -name: sequence_variant_increasing_rate_of_transcription -synonym: "mutation increasing rate of transcription" RELATED [] -synonym: "sequence variation increasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: sequence_variant_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation affecting rate of transcription" RELATED [] -synonym: "sequence variant affecting rate of transcription" EXACT [] -is_a: SO:1000076 ! sequence_variant_affecting_transcription - -[Term] -id: SO:1000082 -name: sequence variant_affecting_transcript_stability -def: "Sequence variant affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript stability" RELATED [] -synonym: "sequence variant affecting transcript stability" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: sequence_variant_increasing_transcript_stability -def: "Sequence variant increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation increasing transcript stability" RELATED [] -synonym: "sequence variant increasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: sequence_variant_decreasing_transcript_stability -def: "Sequence variant decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation decreasing transcript stability" RELATED [] -synonym: "sequence variant decreasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: sequence_variation_affecting_level_of_transcript -def: "A sequence variation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation affecting level of transcript" RELATED [] -synonym: "sequence variation affecting level of transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000086 -name: sequence_variation_decreasing_level_of_transcript -def: "A sequence variation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation decreasing level of transcript" EXACT [] -synonym: "sequence variation decreasing level of transcript" RELATED [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: sequence_variation_increasing_level_of_transcript -def: "A sequence_variation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation increasing level of transcript" EXACT [] -synonym: "sequence variation increasing level of transcript" EXACT [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: sequence_variant_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting translational product" RELATED [] -synonym: "sequence variant affecting translational product" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000089 -name: sequence_variant_causing_no_change_of_translational_product -def: "The sequence variant at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change of translational product" RELATED [] -synonym: "sequence variant causing no change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000090 -name: sequence_variant_causing_uncharacterised_change_of_translational_product -def: "A sequence variant causing an uncharacterized change of translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change of translational product" RELATED [] -synonym: "sequence variant causing uncharacterised change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000091 -name: sequence_variant_causing_partially_characterised_change_of_translational_product -def: "A sequence variant causing a partially uncharacterised change in translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The nature of the mutation event is only partially characterised. -synonym: "mutation causing partially characterised change of translational product" RELATED [] -synonym: "sequence variant causing partially characterised change of translational product" EXACT [] -is_a: SO:1000090 ! sequence_variant_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: sequence_variant_causing_complex_change_of_translational_product -def: "Any sequence variant effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing complex change of translational product" RELATED [] -synonym: "sequence variant causing complex change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000093 -name: sequence_variant_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid substitution" RELATED [] -synonym: "sequence variant causing amino acid substitution" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: sequence_variant_causing_conservative_amino_acid_substitution -synonym: "mutation causing conservative amino acid substitution" RELATED [] -synonym: "sequence variant causing conservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: sequence_variant_causing_nonconservative_amino_acid_substitution -synonym: "mutation causing nonconservative amino acid substitution" RELATED [] -synonym: "sequence variant causing nonconservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: sequence_variant_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid insertion" RELATED [] -synonym: "sequence variant causing amino acid insertion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: sequence_variant_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid deletion" RELATED [] -synonym: "sequence variant causing amino acid deletion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: sequence_variant_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide truncation" RELATED [] -synonym: "sequence variant causing polypeptide truncation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: sequence_variant_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide elongation" RELATED [] -synonym: "sequence variant causing polypeptide elongation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide N terminal elongation" EXACT [] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide C terminal elongation" EXACT [] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: sequence_variant_affecting_level_of_translational_product -synonym: "mutation affecting level of translational product" RELATED [] -synonym: "sequence variant affecting level of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000103 -name: sequence_variant_decreasing_level_of_translation_product -synonym: "mutationdecreasing level of translation product" RELATED [] -synonym: "sequence variant decreasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: sequence_variant_increasing_level_of_translation_product -synonym: "mutationt increasing level of translation product" RELATED [] -synonym: "sequence variant increasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: sequence_variant_affecting_polypeptide_amino_acid_sequence -synonym: "mutation affecting polypeptide amino acid sequence" RELATED [] -synonym: "sequence variant affecting polypeptide amino acid sequence" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -synonym: "mutation causing inframe polypeptide N terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "mutation causing out of frame polypeptide N terminal elongation" EXACT [] -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -synonym: "mutaton causing inframe polypeptide C terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "mutation causing out of frame polypeptide C terminal elongation" EXACT [] -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_sequence_variant -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring mutation" EXACT [] -synonym: "frame restoring sequence variant" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000111 -name: sequence_variant_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D structure of polypeptide" RELATED [] -synonym: "sequence variant affecting 3D structure of polypeptide" EXACT [] -synonym: "sequence variant affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000112 -name: sequence_variant_causing_no_3D_structural_change -synonym: "mutation causing no 3D structural change" RELATED [] -synonym: "sequence variant causing no 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: sequence_variant_causing_uncharacterised_3D_structural_change -synonym: "mutation causing uncharacterised 3D structural change" RELATED [] -synonym: "sequence variant causing uncharacterised 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: sequence_variant_causing_partially_characterised_3D_structural_change -synonym: "mutation causing partially characterised 3D structural change" RELATED [] -synonym: "sequence variant causing partially characterised 3D structural change" EXACT [] -is_a: SO:1000113 ! sequence_variant_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: sequence_variant_causing_complex_3D_structural_change -synonym: "mutation causing complex 3D structural change" RELATED [] -synonym: "sequence variant causing complex 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: sequence_variant_causing_conformational_change -synonym: "mutation causing conformational change" RELATED [] -synonym: "sequence variant causing conformational change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: sequence_variant_affecting_polypeptide_function -synonym: "mutation affecting polypeptide function" RELATED [] -synonym: "sequence variant affecting polypeptide function" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000118 -name: sequence_variant_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -synonym: "mutation causing loss of function of polypeptide" RELATED [] -synonym: "sequence variant causing loss of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: sequence_variant_causing_inactive_ligand_binding_site -synonym: "mutation causing inactive ligand binding site" RELATED [] -synonym: "sequence variant causing inactive ligand binding site" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: sequence_variant_causing_inactive_catalytic_site -synonym: "mutation causing inactive catalytic site" RELATED [] -synonym: "sequence variant causing inactive catalytic site" EXACT [] -is_a: SO:1000119 ! sequence_variant_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: sequence_variant_causing_polypeptide_localization_change -synonym: "mutation causing polypeptide localization change" RELATED [] -synonym: "sequence variant causing polypeptide localization change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: sequence_variant_causing_polypeptide_post_translational_processing_change -synonym: "mutation causing polypeptide post translational processing change" RELATED [] -synonym: "polypeptide post-translational processing affected" EXACT [] -synonym: "sequence variant causing polypeptide post translational processing change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: sequence_variant_causing_partial_loss_of_function_of_polypeptide -synonym: "mutation causing partial loss of function of polypeptide" RELATED [] -synonym: "partial loss of function of polypeptide" EXACT [] -synonym: "sequence variant causing partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: sequence_variant_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -synonym: "mutation causing gain of function of polypeptide" RELATED [] -synonym: "sequence variant causing gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: sequence_variant_affecting_transcript_secondary_structure -def: "A sequence variant that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -synonym: "mutation affecting transcript secondary structure" RELATED [] -synonym: "sequence variant affecting transcript secondary structure" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: sequence_variant_causing_compensatory_transcript_secondary_structure_mutation -synonym: "mutation causing compensatory transcript secondary structure mutation" RELATED [] -synonym: "sequence variant causing compensatory transcript secondary structure mutation" EXACT [] -is_a: SO:1000126 ! sequence_variant_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: sequence_variant_effect -def: "The effect of a change in nucleotide sequence." [SO:ke] -comment: Updated after discussion with Peter Taschner - Feb 09. -synonym: "sequence variant effect" RELATED [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:1000134 -name: sequence_variant_causing_polypeptide_fusion -synonym: "mutation causing polypeptide fusion" RELATED [] -synonym: "sequence variant causing polypeptide fusion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -synonym: "autosynaptic chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo compound chromosome" EXACT [] -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero compound chromosome" EXACT [] -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -synonym: "chromosome fission" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -synonym: "dexstrosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -synonym: "laevosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -synonym: "free duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -synonym: "free ring duplication" EXACT [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -synonym: "complex chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -synonym: "deficient translocation" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -synonym: "bipartite duplication" EXACT [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [FB:reference_manual] -synonym: "cyclic translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [FB:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -synonym: "bipartite inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -synonym: "uninverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -synonym: "inverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -synonym: "insertional duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "interchromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted intrachromosomal transposition" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -synonym: "unoriented insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -synonym: "uncharacterised chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -synonym: "deficient inversion" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -synonym: "tandem duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -synonym: "partially characterised chromosomal mutation" EXACT [] -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: sequence_variant_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change in transcript" RELATED [] -synonym: "sequence variant causing uncharacterised change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000179 -name: sequence_variant_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing partially characterised change in transcript" RELATED [] -synonym: "sequence variant causing partially characterised change in transcript" EXACT [] -is_a: SO:1000177 ! sequence_variant_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: sequence_variant_affecting_gene_structure -def: "A sequence_variant_effect that changes the gene structure." [SO:ke] -synonym: "mutation affecting gene structure" RELATED [] -synonym: "sequence variant affecting gene structure" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000181 -name: sequence_variant_causing_gene_fusion -def: "A sequence_variant_effect that changes the gene structure by causing a fusion to another gene." [SO:ke] -synonym: "mutation causing gene fusion" RELATED [] -synonym: "sequence variant causing gene fusion" EXACT [] -is_a: SO:1000180 ! sequence_variant_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -synonym: "chromosome number variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -synonym: "chromosome structure variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: sequence_variant_causes_exon_loss -def: "A sequence variant affecting splicing and causes an exon loss." [SO:ke] -synonym: "mutation causes exon loss" RELATED [] -synonym: "sequence variant causes exon loss" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000185 -name: sequence_variant_causes_intron_gain -def: "A sequence variant effect, causing an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causes intron gain" RELATED [] -synonym: "sequence variant causes intron gain" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000186 -name: sequence_variant_causing_cryptic_splice_donor_activation -synonym: "sequence variant causing cryptic splice donor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001186 -name: sequence_variant_causing_cryptic_splice_acceptor_activation -synonym: "sequence variant causing cryptic splice acceptor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -synonym: "alternatively spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -synonym: "encodes 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -synonym: "encodes greater than 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -synonym: "encodes different polypeptides different stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -synonym: "encodes overlapping peptides different start" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -synonym: "encodes disjoint polypeptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -synonym: "encodes overlapping polypeptides different start and stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -synonym: "encodes overlapping peptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -synonym: "dicistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -synonym: "member of regulon" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -synonym: "CDS independently known" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -synonym: "orphan CDS" EXACT [] -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -synonym: "CDS supported by domain match data" EXACT [] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -synonym: "CDS supported by sequence similarity data" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -synonym: "CDS predicted" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -synonym: "CDS supported by EST or cDNA data" EXACT [] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine Dalgarno sequence" EXACT [] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "The sequence of a mature mRNA transcript, modified before translation or during translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "recoded mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -synonym: "minus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -synonym: "plus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A recoded_mRNA where translation was suspended at a particular codon and resumed at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A recoded_mRNA that was modified by an alteration of codon meaning." [SO:ma] -synonym: "mRNA recoded by codon redefinition" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory region" EXACT [] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -synonym: "four bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -synonym: "archaeal intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -synonym: "tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -synonym: "CTG start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -synonym: "SECIS element" EXACT [] -xref: http://en.wikipedia.org/wiki/SECIS_element "wiki" -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -synonym: "three prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -synonym: "three prime stem loop structure" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -synonym: "five prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -synonym: "flanking three prime quadruplet recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -synonym: "UAG stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -synonym: "UAA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -synonym: "UGA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -synonym: "three prime repeat recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -synonym: "distant three prime recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -synonym: "stop codon signal" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:3000000 -name: gene_segment -def: "A gene component region which acts as a recombinational unit of a gene whose functional form is generated through somatic recombination." [GOC:add] -comment: Requested by tracker 2021594, July 2008, by Alex. -synonym: "gene segment" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOXP/so-xp_2_4_2.obo b/annotation/NBIS/Ontology/SOXP/so-xp_2_4_2.obo deleted file mode 100644 index 29fb55ed4..000000000 --- a/annotation/NBIS/Ontology/SOXP/so-xp_2_4_2.obo +++ /dev/null @@ -1,15290 +0,0 @@ -format-version: 1.2 -date: 08:04:2010 11:18 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta3 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -namespace-id-rule: * SO:$sequence(7,0,9999999)$ -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a Eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://www.ncbi.nlm.nih.gov/pubmed/7897662] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morpholino oligo" EXACT [] -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promoters, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -comment: FLAG - this term is should probably be a part of rather than an is_a. -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A region of a molecule that binds to a restriction enzyme." [SO:cb] -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme binding site" EXACT [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:0000159 ! deletion - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -def: "An interchromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [SO:ke] -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -def: "A chromosome structure variation whereby an arm exists as an individual chromosome element." [SO:ke] -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:1000132 ! sequence_variant_effect -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w dbxref -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: argenine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or unfiltered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbor regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region\ncomposed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -is_a: SO:0001089 ! post_translationally_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation -def: "A histone modification where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation -def: "A kind of histone modification, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation" EXACT [] -is_a: SO:0001702 ! histone_acetylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation -def: "A kind of histone modification, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation" EXACT [] -is_a: SO:0001702 ! histone_acetylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 monomethylation" EXACT [] -synonym: "H3K4me1" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 trimethylation" EXACT [] -synonym: "H3K4me3" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation -def: "A kind of histone modification, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 trimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation -def: "A kind of histone modification, whereby the 27th residue (a lysine), from the start of the H3 histone protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation -def: "A kind of histone modification, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 trimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation -def: "A kind of histone modification, whereby the 79th residue (a lysine), from the start of the H3 histone protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation -def: "A kind of histone modification, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 dimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation -def: "A kind of histone modification, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 trimethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation -def: "A kind of histone modification, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation -def: "A kind of histone modification, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 monomethylation" EXACT [] -is_a: SO:0001701 ! histone_methylation -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -synonym: "U14 snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -synonym: "methylation guide snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -synonym: "rRNA cleavage RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "exon of single exon gene" EXACT [] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -synonym: "cassette array member" EXACT [] -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -synonym: "gene cassette member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -synonym: "gene subarray member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "primer binding site" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_binding_site "wiki" -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -synonym: "gene array" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -synonym: "gene subarray" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -synonym: "gene cassette" EXACT [] -xref: http://en.wikipedia.org/wiki/Gene_cassette "wiki" -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -synonym: "gene cassette array" EXACT [] -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "selenocysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl tRNA" EXACT [] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://www.informatics.jax.org/silverbook/glossary.shtml] -synonym: "syntenic region" EXACT [] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region_of_peptide -def: "A region of a peptide that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "biochemical motif" EXACT [] -synonym: "biochemical region of peptide" EXACT [] -synonym: "biochemical_region" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "molecular contact region" RELATED [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_polypeptide_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" RELATED BS [] -synonym: "intrinsically unstructured polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "alpha beta motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A peptide that acts as a signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "lipoprotein signal peptide" EXACT [] -synonym: "prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -synonym: "no output" EXACT BS [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100012 -name: peptide_coil -def: "Irregular, unstructured regions of a protein's backbone, as distinct from the regular region (namely alpha helix and beta strand - characterised by specific patterns of main-chain hydrogen bonds)." [EBIBS:GAR] -subset: biosapiens -synonym: "coil" RELATED BS [] -synonym: "peptide coil" EXACT [] -synonym: "random coil" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100013 -name: hydrophobic_region_of_peptide -def: "Hydrophobic regions are regions with a low affinity for water." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "hydropathic" RELATED [] -synonym: "hydrophobic region of peptide" RELATED [] -synonym: "hydrophobic_region" EXACT [] -synonym: "hydrophobicity" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100014 -name: n_terminal_region -def: "The amino-terminal positively-charged region of a signal peptide (approx 1-5 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "N-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100015 -name: c_terminal_region -def: "The more polar, carboxy-terminal region of the signal peptide (approx 3-7 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "C-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100016 -name: central_hydrophobic_region_of_signal_peptide -def: "The central, hydrophobic region of the signal peptide (approx 7-15 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "central hydrophobic region of signal peptide" EXACT [] -synonym: "central_hydrophobic_region" RELATED [] -synonym: "H-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100017 -name: polypeptide_conserved_motif -def: "A conserved motif is a short (up to 20 amino acids) region of biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "motif" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100018 -name: polypeptide_binding_motif -def: "A polypeptide binding motif is a short (up to 20 amino acids) polypeptide region of biological interest that contains one or more amino acids experimentally shown to bind to a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "binding" RELATED [uniprot:feature_type] -synonym: "polypeptide binding motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100019 -name: polypeptide_catalytic_motif -def: "A polypeptide catalytic motif is a short (up to 20 amino acids) polypeptide region that contains one or more active site residues." [EBIBS:GAR] -subset: biosapiens -synonym: "catalytic_motif" RELATED [] -synonym: "polypeptide catalytic motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100020 -name: polypeptide_DNA_contact -def: "Residues involved in interactions with DNA." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide DNA contact" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0100021 -name: polypeptide_conserved_region -def: "A subsection of sequence with biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide conserved region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -synonym: "pyrimidine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -synonym: "C to T transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "C to T transition at pCpG site" EXACT [] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -synonym: "T to C transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -synonym: "purine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -synonym: "A to G transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -synonym: "G to A transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Transversion "wiki" -is_a: SO:0001483 ! SNV - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -synonym: "pyrimidine to purine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -synonym: "C to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -synonym: "C to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -synonym: "T to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -synonym: "T to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -synonym: "purine to pyrimidine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -synonym: "A to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -synonym: "A to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -synonym: "G to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -synonym: "G to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -def: "A chromosomal structure variation within a single chromosome." [SO:ke] -synonym: "intrachromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation -intersection_of: SO:1000183 ! chromosome_structure_variation -intersection_of: has_quality SO:0001510 ! intrachromosomal - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -synonym: "chromosomal deletion" EXACT [] -synonym: "deficiency" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_deletion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000159 ! deletion - -[Term] -id: SO:1000030 -name: chromosomal_inversion -def: "An interchromosomal mutation where a region of the chromosome is inverted with respect to wild type." [SO:ke] -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -synonym: "chromosomal inversion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_inversion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -def: "A chromosomal structure variation whereby more than one chromosome is involved." [SO:ke] -synonym: "interchromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation -intersection_of: SO:1000183 ! chromosome_structure_variation -intersection_of: has_quality SO:0001511 ! interchromosomal - -[Term] -id: SO:1000032 -name: indel -def: "A sequence alteration which included an insertion and a deletion, affecting 2 or more bases." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html, http:http\://www.hgvs.org/mutnomen/recs-DNA.html#indel] -comment: Indels can have a different number of bases than the corresponding reference sequence. -xref: http://en.wikipedia.org/wiki/Indel "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000035 -name: duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide duplication" EXACT [] -synonym: "nucleotide_duplication" RELATED [] -is_a: SO:0000667 ! insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -synonym: "chromosomal duplication" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_duplication "wiki" -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -def: "A duplication that occurred within a chromosome." [SO:ke] -synonym: "intrachromosomal duplication" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:1000035 ! duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -def: "A tandem duplication where the individual regions are in the same orientation." [SO:ke] -synonym: "direct tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -def: "A tandem duplication where the individual regions are not in the same orientation." [SO:ke] -synonym: "inverted tandem duplication" EXACT [] -synonym: "mirror duplication" RELATED [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -def: "A chromosome structure variation whereby a transposition occurred within a chromosome." [SO:ke] -synonym: "(Drosophila)Tp" RELATED [] -synonym: "intrachromosomal transposition" EXACT [] -is_a: SO:0000453 ! chromosomal_transposition -is_a: SO:1000038 ! intrachromosomal_duplication -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000199 ! translocation -intersection_of: has_part SO:1000035 ! duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -def: "A chromosome structure variant where a monocentric element is caused by the fusion of two chromosome arms." [SO:ke] -synonym: "compound chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -def: "A non reciprocal translocation whereby the participating chromosomes break at their centromeres and the long arms fuse to form a single chromosome with a single centromere." [http://en.wikipedia.org/wiki/Robertsonian_translocation] -synonym: "centric-fusion translocations" EXACT [] -synonym: "Robertsonian fusion" EXACT [] -synonym: "whole-arm translocations" EXACT [] -xref: http://en.wikipedia.org/wiki/Robertsonian_fusion "wiki" -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -def: "An interchromosomal mutation. Rearrangements that alter the pairing of telomeres are classified as translocations." [FB:reference_manual] -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -synonym: "chromosomal translocation" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_translocation "wiki" -is_a: SO:1000031 ! interchromosomal_mutation -intersection_of: SO:1000031 ! interchromosomal_mutation -intersection_of: has_part SO:0000199 ! translocation - -[Term] -id: SO:1000045 -name: ring_chromosome -def: "A ring chromosome is a chromosome whose arms have fused together to form a ring, often with the loss of the ends of the chromosome." [http://en.wikipedia.org/wiki/Ring_chromosome] -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -synonym: "ring chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Ring_chromosome "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:1000046 -name: pericentric_inversion -def: "A chromosomal inversion that includes the centromere." [FB:reference_manual] -synonym: "pericentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -intersection_of: SO:1000030 ! chromosomal_inversion -intersection_of: has_quality SO:0001518 ! pericentric - -[Term] -id: SO:1000047 -name: paracentric_inversion -def: "A chromosomal inversion that does not include the centromere." [FB:reference_manual] -synonym: "paracentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -intersection_of: SO:1000030 ! chromosomal_inversion -intersection_of: has_quality SO:0001519 ! paracentric - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -def: "A chromosomal translocation with two breaks; two chromosome segments have simply been exchanged." [FB:reference_manual] -synonym: "reciprocal chromosomal translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: sequence_variation_affecting_transcript -alt_id: SO:1000177 -alt_id: SO:1000179 -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript" EXACT [] -synonym: "mutation causing partially characterised change in transcript" RELATED [] -synonym: "mutation causing uncharacterised change in transcript" RELATED [] -synonym: "sequence variant causing partially characterised change in transcript" EXACT [] -synonym: "sequence variant causing uncharacterised change in transcript" EXACT [] -synonym: "sequence variation affecting transcript" EXACT [] -synonym: "sequence_variant_causing_partially_characterised_change_in_transcript" EXACT [] -synonym: "sequence_variant_causing_uncharacterised_change_in_transcript" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000050 -name: sequence_variant_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change in transcript" RELATED [] -synonym: "sequence variant causing no change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000052 -name: sequence_variation_affecting_complex_change_in_transcript -synonym: "mutation affecting complex change in transcript" EXACT [] -synonym: "sequence variation affecting complex change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000054 -name: sequence_variation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting coding sequence" EXACT [] -synonym: "sequence variation affecting coding sequence" RELATED [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: sequence_variant_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing initiator codon change in transcript" RELATED [] -synonym: "sequence variant causing initiator codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: sequence_variant_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutaton causing amino acid coding codon change in transcript" RELATED [] -synonym: "sequence variant causing amino acid coding codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: sequence_variant_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing synonymous codon change in transcript" RELATED [] -synonym: "sequence variant causing synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: sequence_variant_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing non synonymous codon change in transcript" RELATED [] -synonym: "non-synonymous codon change in transcript" EXACT [] -synonym: "sequence variant causing non synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: sequence_variant_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing missense codon change in transcript" RELATED [] -synonym: "sequence variant causing missense codon change in transcript" EXACT [] -is_a: SO:1000058 ! sequence_variant_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: sequence_variant_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing conservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing conservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: sequence_variant_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing nonconservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonconservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: sequence_variant_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing nonsense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonsense codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: sequence_variant_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -synonym: "mutation causing terminator codon change in transcript" RELATED [] -synonym: "sequence variant causing terminator codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: sequence_variation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a sequence variation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting reading frame" EXACT [] -synonym: "sequence variation affecting reading frame" RELATED [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_sequence_variation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "frameshift mutation" EXACT [] -synonym: "frameshift sequence variation" RELATED [] -synonym: "out of frame mutation" RELATED [] -xref: http://en.wikipedia.org/wiki/Frameshift_mutation "wiki" -is_a: SO:1000064 ! sequence_variation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: sequence_variant_causing_plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -synonym: "plus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 1 frameshift mutation" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000067 -name: sequence_variant_causing_minus_1_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -synonym: "minus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 1 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000068 -name: sequence_variant_causing_plus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -synonym: "plus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000069 -name: sequence_variant_causing_minus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -synonym: "minus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000070 -name: sequence_variant_affecting_transcript_processing -def: "Sequence variant affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript processing" RELATED [] -synonym: "sequence variant affecting transcript processing" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: sequence_variant_affecting_splicing -def: "A sequence_variant_effect where the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences is changed." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splicing" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000072 -name: sequence_variant_affecting_splice_donor -def: "A sequence_variant_effect that changes the splice donor sequence." [SO:ke] -synonym: "mutation affecting splice donor" RELATED [] -synonym: "sequence variant affecting splice donor" RELATED [] -synonym: "splice donor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000073 -name: sequence_variant_affecting_splice_acceptor -def: "A sequence_variant_effect that changes the splice acceptor sequence." [SO:ke] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splice acceptor" RELATED [] -synonym: "splice acceptor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000074 -name: sequence_variant_causing_cryptic_splice_activation -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: A cryptic splice site is only used when the natural splice site has been disrupted by a sequence alteration. -synonym: "cryptic splice activator sequence variant" EXACT [] -synonym: "mutation causing cryptic splice activator" RELATED [] -synonym: "sequence variant causing cryptic splice activator" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000075 -name: sequence_variant_affecting_editing -def: "Sequence variant affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting editing" RELATED [] -synonym: "sequence variant affecting editing" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: sequence_variant_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcription" RELATED [] -synonym: "sequence variant affecting transcription" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000078 -name: sequence_variant_decreasing_rate_of_transcription -def: "A sequence variation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation decreasing rate of transcription" RELATED [] -synonym: "sequence variation decreasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: sequence_variation_affecting_transcript_sequence -synonym: "mutation affecting transcript sequence" EXACT [] -synonym: "sequence variation affecting transcript sequence" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000080 -name: sequence_variant_increasing_rate_of_transcription -synonym: "mutation increasing rate of transcription" RELATED [] -synonym: "sequence variation increasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: sequence_variant_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation affecting rate of transcription" RELATED [] -synonym: "sequence variant affecting rate of transcription" EXACT [] -is_a: SO:1000076 ! sequence_variant_affecting_transcription - -[Term] -id: SO:1000082 -name: sequence variant_affecting_transcript_stability -def: "Sequence variant affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript stability" RELATED [] -synonym: "sequence variant affecting transcript stability" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: sequence_variant_increasing_transcript_stability -def: "Sequence variant increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation increasing transcript stability" RELATED [] -synonym: "sequence variant increasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: sequence_variant_decreasing_transcript_stability -def: "Sequence variant decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation decreasing transcript stability" RELATED [] -synonym: "sequence variant decreasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: sequence_variation_affecting_level_of_transcript -def: "A sequence variation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation affecting level of transcript" RELATED [] -synonym: "sequence variation affecting level of transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000086 -name: sequence_variation_decreasing_level_of_transcript -def: "A sequence variation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation decreasing level of transcript" EXACT [] -synonym: "sequence variation decreasing level of transcript" RELATED [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: sequence_variation_increasing_level_of_transcript -def: "A sequence_variation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation increasing level of transcript" EXACT [] -synonym: "sequence variation increasing level of transcript" EXACT [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: sequence_variant_affecting_translational_product -alt_id: SO:1000090 -alt_id: SO:1000091 -def: "A sequence variant causing a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting translational product" RELATED [] -synonym: "mutation causing partially characterised change of translational product" RELATED [] -synonym: "mutation causing uncharacterised change of translational product" RELATED [] -synonym: "sequence variant affecting translational product" EXACT [] -synonym: "sequence variant causing partially characterised change of translational product" EXACT [] -synonym: "sequence variant causing uncharacterised change of translational product" EXACT [] -synonym: "sequence_variant_causing_partially_characterised_change_of_translational_product" EXACT [] -synonym: "sequence_variant_causing_uncharacterised_change_of_translational_product" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000089 -name: sequence_variant_causing_no_change_of_translational_product -def: "The sequence variant at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change of translational product" RELATED [] -synonym: "sequence variant causing no change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000092 -name: sequence_variant_causing_complex_change_of_translational_product -def: "Any sequence variant effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing complex change of translational product" RELATED [] -synonym: "sequence variant causing complex change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000093 -name: sequence_variant_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid substitution" RELATED [] -synonym: "sequence variant causing amino acid substitution" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: sequence_variant_causing_conservative_amino_acid_substitution -synonym: "mutation causing conservative amino acid substitution" RELATED [] -synonym: "sequence variant causing conservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: sequence_variant_causing_nonconservative_amino_acid_substitution -synonym: "mutation causing nonconservative amino acid substitution" RELATED [] -synonym: "sequence variant causing nonconservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: sequence_variant_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid insertion" RELATED [] -synonym: "sequence variant causing amino acid insertion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: sequence_variant_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid deletion" RELATED [] -synonym: "sequence variant causing amino acid deletion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: sequence_variant_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide truncation" RELATED [] -synonym: "sequence variant causing polypeptide truncation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: sequence_variant_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide elongation" RELATED [] -synonym: "sequence variant causing polypeptide elongation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide N terminal elongation" EXACT [] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide C terminal elongation" EXACT [] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: sequence_variant_affecting_level_of_translational_product -synonym: "mutation affecting level of translational product" RELATED [] -synonym: "sequence variant affecting level of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000103 -name: sequence_variant_decreasing_level_of_translation_product -synonym: "mutationdecreasing level of translation product" RELATED [] -synonym: "sequence variant decreasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: sequence_variant_increasing_level_of_translation_product -synonym: "mutationt increasing level of translation product" RELATED [] -synonym: "sequence variant increasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: sequence_variant_affecting_polypeptide_amino_acid_sequence -synonym: "mutation affecting polypeptide amino acid sequence" RELATED [] -synonym: "sequence variant affecting polypeptide amino acid sequence" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -synonym: "mutation causing inframe polypeptide N terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "mutation causing out of frame polypeptide N terminal elongation" EXACT [] -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -synonym: "mutaton causing inframe polypeptide C terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "mutation causing out of frame polypeptide C terminal elongation" EXACT [] -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_sequence_variant -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring mutation" EXACT [] -synonym: "frame restoring sequence variant" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000111 -name: sequence_variant_affecting_3D_structure_of_polypeptide -alt_id: SO:1000113 -alt_id: SO:1000114 -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D structure of polypeptide" RELATED [] -synonym: "mutation causing partially characterised 3D structural change" RELATED [] -synonym: "mutation causing uncharacterised 3D structural change" RELATED [] -synonym: "sequence variant affecting 3D structure of polypeptide" EXACT [] -synonym: "sequence variant affecting 3D-structure of polypeptide" EXACT [] -synonym: "sequence variant causing partially characterised 3D structural change" EXACT [] -synonym: "sequence variant causing uncharacterised 3D structural change" EXACT [] -synonym: "sequence_variant_causing_partially_characterised_3D_structural_change" EXACT [] -synonym: "sequence_variant_causing_uncharacterised_3D_structural_change" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000112 -name: sequence_variant_causing_no_3D_structural_change -synonym: "mutation causing no 3D structural change" RELATED [] -synonym: "sequence variant causing no 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000115 -name: sequence_variant_causing_complex_3D_structural_change -synonym: "mutation causing complex 3D structural change" RELATED [] -synonym: "sequence variant causing complex 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: sequence_variant_causing_conformational_change -synonym: "mutation causing conformational change" RELATED [] -synonym: "sequence variant causing conformational change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: sequence_variant_affecting_polypeptide_function -synonym: "mutation affecting polypeptide function" RELATED [] -synonym: "sequence variant affecting polypeptide function" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000118 -name: sequence_variant_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -synonym: "mutation causing loss of function of polypeptide" RELATED [] -synonym: "sequence variant causing loss of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: sequence_variant_causing_inactive_ligand_binding_site -synonym: "mutation causing inactive ligand binding site" RELATED [] -synonym: "sequence variant causing inactive ligand binding site" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: sequence_variant_causing_inactive_catalytic_site -synonym: "mutation causing inactive catalytic site" RELATED [] -synonym: "sequence variant causing inactive catalytic site" EXACT [] -is_a: SO:1000119 ! sequence_variant_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: sequence_variant_causing_polypeptide_localization_change -synonym: "mutation causing polypeptide localization change" RELATED [] -synonym: "sequence variant causing polypeptide localization change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: sequence_variant_causing_polypeptide_post_translational_processing_change -synonym: "mutation causing polypeptide post translational processing change" RELATED [] -synonym: "polypeptide post-translational processing affected" EXACT [] -synonym: "sequence variant causing polypeptide post translational processing change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: sequence_variant_causing_partial_loss_of_function_of_polypeptide -synonym: "mutation causing partial loss of function of polypeptide" RELATED [] -synonym: "partial loss of function of polypeptide" EXACT [] -synonym: "sequence variant causing partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: sequence_variant_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -synonym: "mutation causing gain of function of polypeptide" RELATED [] -synonym: "sequence variant causing gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: sequence_variant_affecting_transcript_secondary_structure -def: "A sequence variant that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -synonym: "mutation affecting transcript secondary structure" RELATED [] -synonym: "sequence variant affecting transcript secondary structure" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: sequence_variant_causing_compensatory_transcript_secondary_structure_mutation -synonym: "mutation causing compensatory transcript secondary structure mutation" RELATED [] -synonym: "sequence variant causing compensatory transcript secondary structure mutation" EXACT [] -is_a: SO:1000126 ! sequence_variant_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: sequence_variant_effect -def: "The effect of a change in nucleotide sequence." [SO:ke] -comment: Updated after discussion with Peter Taschner - Feb 09. -synonym: "sequence variant effect" RELATED [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:1000134 -name: sequence_variant_causing_polypeptide_fusion -synonym: "mutation causing polypeptide fusion" RELATED [] -synonym: "sequence variant causing polypeptide fusion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -def: "An autosynaptic chromosome is the aneuploid product of recombination between a pericentric inversion and a cytologically wild-type chromosome." [PMID:6804304] -synonym: "(Drosophila)A" RELATED [] -synonym: "autosynaptic chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -def: "A compound chromosome whereby two copies of the same chromosomal arm attached to a common centromere. The chromosome is diploid for the arm involved." [SO:ke] -synonym: "homo compound chromosome" EXACT [] -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -def: "A compound chromosome whereby two arms from different chromosomes are connected through the centromere of one of them." [FB:reference_manual, SO:ke] -synonym: "hetero compound chromosome" EXACT [] -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -def: "A chromosome that occurred by the division of a larger chromosome." [SO:ke] -synonym: "chromosome fission" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -def: "An autosynaptic chromosome carrying the two right (D = dextro) telomeres." [FB:manual] -synonym: "dexstrosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -def: "LS is an autosynaptic chromosome carrying the two left (L = levo) telomeres." [FB:manual] -synonym: "laevosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -def: "A chromosome structure variation whereby the duplicated sequences are carried as a free centric element." [FB:reference_manual] -synonym: "free duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -def: "A ring chromosome which is a copy of another chromosome." [SO:ke] -synonym: "(Drosophila)R" RELATED [] -synonym: "free ring duplication" EXACT [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication -intersection_of: SO:1000045 ! ring_chromosome -intersection_of: has_quality SO:0001516 ! free - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -def: "A chromosome structure variant with 4 or more breakpoints." [FB:reference_manual, SO:ke] -synonym: "complex chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A chromosomal deletion whereby a translocation occurs in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -synonym: "deficient translocation" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation -intersection_of: SO:1000044 ! chromosomal_translocation -intersection_of: has_part SO:0000159 ! deletion -intersection_of: has_part SO:0000199 ! translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "A chromosomal translocation whereby the first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000199 ! translocation -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "An interchromosomal mutation whereby the (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -synonym: "bipartite duplication" EXACT [] -is_a: SO:1000031 ! interchromosomal_mutation -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "A chromosomal translocation whereby three breaks occurred in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [FB:reference_manual] -synonym: "cyclic translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "A chromosomal inversion caused by three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [FB:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -synonym: "bipartite inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "An insertional duplication where a copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -synonym: "uninverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "An insertional duplication where a copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -synonym: "inverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region (as opposed to a free duplication)." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -synonym: "insertional duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -def: "A chromosome structure variation whereby a transposition occurred between chromosomes." [SO:ke] -synonym: "(Drosophila)Tp" RELATED [] -synonym: "interchromosomal transposition" EXACT [] -is_a: SO:0000453 ! chromosomal_transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -def: "An interchromosomal transposition whereby a copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segment." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -def: "An interchromosomal transition where the segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby the segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted intrachromosomal transposition" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby the segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:0001514 ! direct - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "An insertional duplication where a copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -comment: Flag - unknown in the definition. -synonym: "(Drosophila)uDp" RELATED [] -synonym: "unoriented insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unoriented_interchromosomal_transposition -def: "An interchromosomal transposition whereby a copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -comment: FLAG - term describes an unknown. -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unoriented_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby the segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -comment: FLAG - definition describes an unknown. -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -synonym: "uncharacterised chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "A chromosomal deletion whereby three breaks occur in the same chromosome; one central region is lost, and the other is inverted." [FB:reference_manual, SO:ke] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -synonym: "deficient inversion" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion -intersection_of: SO:1000028 ! intrachromosomal_mutation -intersection_of: has_part SO:0000159 ! deletion -intersection_of: has_part SO:1000036 ! inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -def: "A duplication consisting of 2 identical regions, which are adjacent." [SO:ke] -synonym: "tandem duplication" EXACT [] -is_a: SO:1000035 ! duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -synonym: "partially characterised chromosomal mutation" EXACT [] -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000180 -name: sequence_variant_affecting_gene_structure -def: "A sequence_variant_effect that changes the gene structure." [SO:ke] -synonym: "mutation affecting gene structure" RELATED [] -synonym: "sequence variant affecting gene structure" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000181 -name: sequence_variant_causing_gene_fusion -def: "A sequence_variant_effect that changes the gene structure by causing a fusion to another gene." [SO:ke] -synonym: "mutation causing gene fusion" RELATED [] -synonym: "sequence variant causing gene fusion" EXACT [] -is_a: SO:1000180 ! sequence_variant_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -synonym: "chromosome number variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -synonym: "chromosome structure variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: sequence_variant_causes_exon_loss -def: "A sequence variant affecting splicing and causes an exon loss." [SO:ke] -synonym: "mutation causes exon loss" RELATED [] -synonym: "sequence variant causes exon loss" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000185 -name: sequence_variant_causes_intron_gain -def: "A sequence variant effect, causing an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causes intron gain" RELATED [] -synonym: "sequence variant causes intron gain" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000186 -name: sequence_variant_causing_cryptic_splice_donor_activation -synonym: "sequence variant causing cryptic splice donor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001186 -name: sequence_variant_causing_cryptic_splice_acceptor_activation -synonym: "sequence variant causing cryptic splice acceptor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -synonym: "alternatively spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -synonym: "encodes 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -synonym: "encodes greater than 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -synonym: "encodes different polypeptides different stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -synonym: "encodes overlapping peptides different start" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -synonym: "encodes disjoint polypeptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -synonym: "encodes overlapping polypeptides different start and stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -synonym: "encodes overlapping peptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -synonym: "dicistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -synonym: "member of regulon" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -synonym: "CDS independently known" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -synonym: "orphan CDS" EXACT [] -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -synonym: "CDS supported by domain match data" EXACT [] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -synonym: "CDS supported by sequence similarity data" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -synonym: "CDS predicted" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -synonym: "CDS supported by EST or cDNA data" EXACT [] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine Dalgarno sequence" EXACT [] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "The sequence of a mature mRNA transcript, modified before translation or during translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "recoded mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -synonym: "minus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -synonym: "plus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A recoded_mRNA where translation was suspended at a particular codon and resumed at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A recoded_mRNA that was modified by an alteration of codon meaning." [SO:ma] -synonym: "mRNA recoded by codon redefinition" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory region" EXACT [] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -synonym: "four bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -synonym: "archaeal intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -synonym: "tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -synonym: "CTG start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -synonym: "SECIS element" EXACT [] -xref: http://en.wikipedia.org/wiki/SECIS_element "wiki" -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -synonym: "three prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -synonym: "three prime stem loop structure" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -synonym: "five prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -synonym: "flanking three prime quadruplet recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -synonym: "UAG stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -synonym: "UAA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -synonym: "UGA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -synonym: "three prime repeat recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -synonym: "distant three prime recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -synonym: "stop codon signal" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:3000000 -name: gene_segment -def: "A gene component region which acts as a recombinational unit of a gene whose functional form is generated through somatic recombination." [GOC:add] -comment: Requested by tracker 2021594, July 2008, by Alex. -synonym: "gene segment" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/NBIS/Ontology/SOXP/so-xp_2_4_3.obo b/annotation/NBIS/Ontology/SOXP/so-xp_2_4_3.obo deleted file mode 100644 index ea29ebf5b..000000000 --- a/annotation/NBIS/Ontology/SOXP/so-xp_2_4_3.obo +++ /dev/null @@ -1,16598 +0,0 @@ -format-version: 1.2 -date: 01:06:2010 10:46 -saved-by: kareneilbeck -auto-generated-by: OBO-Edit 2.1-beta3 -subsetdef: biosapiens "biosapiens protein feature ontology" -subsetdef: SOFA "SO feature annotation" -synonymtypedef: aa1 "amino acid 1 letter code" -synonymtypedef: aa3 "amino acid 3 letter code" -synonymtypedef: AAMOD "amino acid modification" -synonymtypedef: BS "biosapiens" -synonymtypedef: dbsnp "dbsnp variant terms" -synonymtypedef: ebi_variants "ensembl variant terms" -synonymtypedef: RNAMOD "RNA modification" EXACT -default-namespace: sequence -namespace-id-rule: * SO:$sequence(7,0,9999999)$ -remark: autogenerated-by\: DAG-Edit version 1.417\nsaved-by\: eilbeck\ndate\: Tue May 11 15\:18\:44 PDT 2004\nversion\: $Revision\: 1.45 $ - -[Term] -id: SO:0000000 -name: Sequence_Ontology -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000001 -name: region -def: "A sequence_feature with an extent greater than zero. A nucleotide region is composed of bases and a polypeptide region is composed of amino acids." [SO:ke] -subset: SOFA -synonym: "sequence" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000002 -name: sequence_secondary_structure -def: "A folded sequence." [SO:ke] -synonym: "sequence secondary structure" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000003 -name: G_quartet -def: "G-quartets are unusual nucleic acid structures consisting of a planar arrangement where each guanine is hydrogen bonded by hoogsteen pairing to another guanine in the quartet." [http://www.ncbi.nlm.nih.gov/pubmed/7919797?dopt=Abstract] -synonym: "G quartet" EXACT [] -synonym: "G tetrad" EXACT [] -synonym: "G-quadruplex" EXACT [] -synonym: "G-quartet" EXACT [] -synonym: "G-tetrad" EXACT [] -synonym: "G_quadruplex" EXACT [] -synonym: "guanine tetrad" EXACT [] -xref: http://en.wikipedia.org/wiki/G-quadruplex "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000004 -name: interior_coding_exon -subset: SOFA -synonym: "interior coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000005 -name: satellite_DNA -def: "The many tandem repeats (identical or related) of a short basic repeating unit; many have a base composition or other property different from the genome average that allows them to be separated from the bulk (main band) genomic DNA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "satellite DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Satellite_DNA "wiki" -is_a: SO:0000705 ! tandem_repeat - -[Term] -id: SO:0000006 -name: PCR_product -def: "A region amplified by a PCR reaction." [SO:ke] -comment: This term is mapped to MGED. This term is now located in OBI, with the following ID OBI_0000406. -subset: SOFA -synonym: "amplicon" RELATED [] -synonym: "PCR product" EXACT [] -xref: http://en.wikipedia.org/wiki/RAPD "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000007 -name: read_pair -def: "A pair of sequencing reads in which the two members of the pair are related by originating at either end of a clone insert." [SO:ls] -subset: SOFA -synonym: "read-pair" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000008 -name: gene_sensu_your_favorite_organism -is_obsolete: true - -[Term] -id: SO:0000009 -name: gene_class -is_obsolete: true - -[Term] -id: SO:0000010 -name: protein_coding -synonym: "protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000011 -name: non_protein_coding -synonym: "non protein-coding" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000012 -name: scRNA_primary_transcript -def: "The primary transcript of any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -synonym: "scRNA primary transcript" EXACT [] -synonym: "scRNA transcript" EXACT [] -synonym: "small cytoplasmic RNA" RELATED [] -synonym: "small cytoplasmic RNA transcript" EXACT [] -synonym: "small_cytoplasmic_RNA" RELATED [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000013 -name: scRNA -def: "Any one of several small cytoplasmic RNA molecules present in the cytoplasm and sometimes nucleus of a Eukaryote." [http://www.ebi.ac.uk/embl/WebFeat/align/scRNA_s.html] -subset: SOFA -synonym: "small cytoplasmic RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000012 ! scRNA_primary_transcript - -[Term] -id: SO:0000014 -name: INR_motif -def: "A sequence element characteristic of some RNA polymerase II promoters required for the correct positioning of the polymerase for the start of transcription. Overlaps the TSS. The mammalian consensus sequence is YYAN(T|A)YY; the Drosophila consensus sequence is TCA(G|T)t(T|C). In each the A is at position +1 with respect to the TSS. Functionally similar to the TATA box element." [PMID:12651739] -synonym: "DMp2" RELATED [] -synonym: "initiator" EXACT [] -synonym: "initiator motif" EXACT [] -synonym: "INR motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000015 -name: DPE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters; Positioned from +28 to +32 with respect to the TSS (+1). Experimental results suggest that the DPE acts in conjunction with the INR_motif to provide a binding site for TFIID in the absence of a TATA box to mediate transcription of TATA-less promoters. Consensus sequence (A|G)G(A|T)(C|T)(G|A|C)." [PMID:12651739\:12537576] -synonym: "CRWMGCGWKCGCTTS" NARROW [] -synonym: "downstream core promoter element" EXACT [] -synonym: "DPE motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000016 -name: BRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements at -37 to -32 with respect to the TSS (+1). Consensus sequence is (G|C)(G|C)(G|A)CGCC. Binds TFIIB." [PMID:12651739] -synonym: "B-recognition element" EXACT [] -synonym: "BRE motif" EXACT [] -synonym: "TFIIB recognition element" RELATED [] -synonym: "transcription factor B-recognition element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000017 -name: PSE_motif -def: "A sequence element characteristic of the promoters of snRNA genes transcribed by RNA polymerase II or by RNA polymerase III. Located between -45 and -60 relative to the TSS. The human PSE_motif consensus sequence is TCACCNTNA(C|G)TNAAAAG(T|G)." [PMID:12651739] -synonym: "proximal sequence element" EXACT [] -synonym: "PSE motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter - -[Term] -id: SO:0000018 -name: linkage_group -def: "A group of loci that can be grouped in a linear order representing the different degrees of linkage among the genes concerned." [ISBN:038752046] -synonym: "linkage group" EXACT [] -xref: http://en.wikipedia.org/wiki/Linkage_group "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000020 -name: RNA_internal_loop -def: "A region of double stranded RNA where the bases do not conform to WC base pairing. The loop is closed on both sides by canonical base pairing. If the interruption to base pairing occurs on one strand only, it is known as a bulge." [SO:ke] -synonym: "RNA internal loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000021 -name: asymmetric_RNA_internal_loop -def: "An internal RNA loop where one of the strands includes more bases than the corresponding region on the other strand." [SO:ke] -synonym: "asymmetric RNA internal loop" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000022 -name: A_minor_RNA_motif -def: "A region forming a motif, composed of adenines, where the minor groove edges are inserted into the minor groove of another helix." [SO:ke] -synonym: "A minor RNA motif" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000023 -name: K_turn_RNA_motif -def: "The kink turn (K-turn) is an RNA structural motif that creates a sharp (~120 degree) bend between two continuous helices." [SO:ke] -synonym: "K turn RNA motif" EXACT [] -synonym: "K-turn" EXACT [] -synonym: "kink turn" EXACT [] -synonym: "kink-turn motif" EXACT [] -xref: http://en.wikipedia.org/wiki/K-turn "wiki" -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000024 -name: sarcin_like_RNA_motif -def: "A loop in ribosomal RNA containing the sites of attack for ricin and sarcin." [http://www.ncbi.nlm.nih.gov/pubmed/7897662] -synonym: "sarcin like RNA motif" EXACT [] -synonym: "sarcin/ricin domain" EXACT [] -synonym: "sarcin/ricin loop" EXACT [] -synonym: "sarcin/ricin RNA domain" EXACT [] -is_a: SO:0000021 ! asymmetric_RNA_internal_loop - -[Term] -id: SO:0000025 -name: symmetric_RNA_internal_loop -def: "An internal RNA loop where the extent of the loop on both stands is the same size." [SO:ke] -synonym: "A-minor RNA motif" EXACT [] -is_a: SO:0000020 ! RNA_internal_loop - -[Term] -id: SO:0000026 -name: RNA_junction_loop -synonym: "RNA junction loop" EXACT [] -is_a: SO:0000715 ! RNA_motif - -[Term] -id: SO:0000027 -name: RNA_hook_turn -synonym: "hook turn" RELATED [] -synonym: "hook-turn motif" EXACT [] -synonym: "RNA hook turn" EXACT [] -is_a: SO:0000026 ! RNA_junction_loop - -[Term] -id: SO:0000028 -name: base_pair -synonym: "base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Base_pair "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000029 -name: WC_base_pair -def: "The canonical base pair, where two bases interact via WC edges, with glycosidic bonds oriented cis relative to the axis of orientation." [PMID:12177293] -synonym: "canonical base pair" EXACT [] -synonym: "Watson Crick base pair" EXACT [] -synonym: "Watson-Crick base pair" RELATED [] -synonym: "Watson-Crick pair" EXACT [] -synonym: "WC base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000030 -name: sugar_edge_base_pair -def: "A type of non-canonical base-pairing." [PMID:12177293] -synonym: "sugar edge base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000031 -name: aptamer -def: "DNA or RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -xref: http://en.wikipedia.org/wiki/Aptamer "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000032 -name: DNA_aptamer -def: "DNA molecules that have been selected from random pools based on their ability to bind other molecules." [http:aptamer.icmb.utexas.edu] -synonym: "DNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000033 -name: RNA_aptamer -def: "RNA molecules that have been selected from random pools based on their ability to bind other molecules." [http://aptamer.icmb.utexas.edu] -synonym: "RNA aptamer" EXACT [] -is_a: SO:0000031 ! aptamer - -[Term] -id: SO:0000034 -name: morpholino_oligo -def: "Morpholino oligos are synthesized from four different Morpholino subunits, each of which contains one of the four genetic bases (A, C, G, T) linked to a 6-membered morpholine ring. Eighteen to 25 subunits of these four subunit types are joined in a specific order by non-ionic phosphorodiamidate intersubunit linkages to give a Morpholino." [http://www.gene-tools.com/] -synonym: "morpholino oligo" EXACT [] -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001183 ! morpholino - -[Term] -id: SO:0000035 -name: riboswitch -def: "A riboswitch is a part of an mRNA that can act as a direct sensor of small molecules to control their own expression. A riboswitch is a cis element in the 5' end of an mRNA, that acts as a direct sensor of metabolites." [PMID:2820954] -synonym: "riboswitch RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Riboswitch "wiki" -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000036 -name: matrix_attachment_site -def: "A DNA region that is required for the binding of chromatin to the nuclear matrix." [SO:ma] -synonym: "MAR" EXACT [] -synonym: "matrix association region" EXACT [] -synonym: "matrix attachment region" EXACT [] -synonym: "matrix attachment site" EXACT [] -synonym: "nuclear matrix association region" EXACT [] -synonym: "nuclear matrix attachment site" EXACT [] -synonym: "S/MAR" EXACT [] -synonym: "S/MAR element" RELATED [] -synonym: "scaffold attachment site" EXACT [] -synonym: "scaffold matrix attachment region" EXACT [] -synonym: "SMAR" EXACT [] -xref: http://en.wikipedia.org/wiki/Matrix_attachment_site "wiki" -is_a: SO:0000626 ! chromosomal_regulatory_element - -[Term] -id: SO:0000037 -name: locus_control_region -def: "A DNA region that includes DNAse hypersensitive sites located 5' to a gene that confers the high-level, position-independent, and copy number-dependent expression to that gene." [SO:ma] -synonym: "LCR" EXACT [] -synonym: "locus control element" RELATED [] -synonym: "locus control region" EXACT [] -xref: http://en.wikipedia.org/wiki/Locus_control_region "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000038 -name: match_set -def: "A collection of match parts." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000039 -name: match_part -def: "A part of a match, for example an hsp from blast is a match_part." [SO:ke] -subset: SOFA -synonym: "match part" EXACT [] -is_a: SO:0001410 ! experimental_feature -relationship: part_of SO:0000343 ! match - -[Term] -id: SO:0000040 -name: genomic_clone -def: "A clone of a DNA region of a genome." [SO:ma] -synonym: "genomic clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000041 -name: sequence_operation -def: "An operation that can be applied to a sequence, that results in a change." [SO:ke] -synonym: "sequence operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000042 -name: pseudogene_attribute -def: "An attribute of a pseudogene (SO:0000336)." [SO:ma] -synonym: "pseudogene attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000043 -name: processed_pseudogene -def: "A pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promoters, but often including a polyA tail." [SO:xp] -comment: Please not the synonym R psi M uses the spelled out form of the greek letter. -synonym: "processed pseudogene" EXACT [] -synonym: "pseudogene by reverse transcription" RELATED [] -synonym: "R psi G" RELATED [] -synonym: "retropseudogene" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000044 -name: pseudogene_by_unequal_crossing_over -def: "A pseudogene caused by unequal crossing over at recombination." [SO:ke] -synonym: "pseudogene by unequal crossing over" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0000045 -name: delete -def: "To remove a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000046 -name: insert -def: "To insert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000047 -name: invert -def: "To invert a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000048 -name: substitute -def: "To substitute a subsection of sequence for another." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000049 -name: translocate -def: "To translocate a subsection of sequence." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000050 -name: gene_part -def: "A part of a gene, that has no other route in the ontology back to region. This concept is necessary for logical inference as these parts must have the properties of region. It also allows us to associate all the parts of genes with a gene." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000051 -name: probe -def: "A DNA sequence used experimentally to detect the presence or absence of a complementary nucleic acid." [SO:ma] -xref: http://en.wikipedia.org/wiki/Hybridization_probe "wiki" -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000052 -name: assortment_derived_deficiency -synonym: "assortment-derived_deficiency" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000053 -name: sequence_variant_affecting_regulatory_region -def: "A sequence_variant_effect which changes the regulatory region of a gene." [SO:ke] -synonym: "mutation affecting regulatory region" RELATED [] -synonym: "sequence variant affecting regulatory region" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000054 -name: aneuploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Aneuploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0000055 -name: hyperploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as extra chromosomes are present." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hyperploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000056 -name: hypoploid -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number as some chromosomes are missing." [SO:ke] -xref: http://en.wikipedia.org/wiki/Hypoploid "wiki" -is_a: SO:0000054 ! aneuploid - -[Term] -id: SO:0000057 -name: operator -def: "A regulatory element of an operon to which activators or repressors bind thereby effecting translation of genes in that operon." [SO:ma] -subset: SOFA -synonym: "operator segment" EXACT [] -xref: http://en.wikipedia.org/wiki/Operator_(biology)#Operator "wiki" -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000058 -name: assortment_derived_aneuploid -synonym: "assortment-derived_aneuploid" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000059 -name: nuclease_binding_site -def: "A region of a molecule that binds to a nuclease." [SO:cb] -subset: SOFA -synonym: "nuclease binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0000060 -name: compound_chromosome_arm -comment: FLAG - this term is should probably be a part of rather than an is_a. -synonym: "compound chromosome arm" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:0000061 -name: restriction_enzyme_binding_site -def: "A region of a molecule that binds to a restriction enzyme." [SO:cb] -synonym: "restriction endonuclease binding site" EXACT [] -synonym: "restriction endonuclease recognition site" RELATED [] -synonym: "restriction enzyme binding site" EXACT [] -synonym: "restriction enzyme recognition site" RELATED [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000062 -name: deficient_intrachromosomal_transposition -def: "An intrachromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "deficient intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition -intersection_of: SO:1000041 ! intrachromosomal_transposition -intersection_of: has_part SO:0000159 ! deletion - -[Term] -id: SO:0000063 -name: deficient_interchromosomal_transposition -def: "An interchromosomal transposition whereby a translocation in which one of the four broken ends loses a segment before re-joining." [SO:ke] -synonym: "deficient interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:0000064 -name: gene_by_transcript_attribute -comment: This classes of attributes was added by MA to allow the broad description of genes based on qualities of the transcript(s). A product of SO meeting 2004. -is_obsolete: true - -[Term] -id: SO:0000065 -name: free_chromosome_arm -def: "A chromosome structure variation whereby an arm exists as an individual chromosome element." [SO:ke] -synonym: "free chromosome arm" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000066 -name: gene_by_polyadenylation_attribute -is_obsolete: true - -[Term] -id: SO:0000067 -name: gene_to_gene_feature -synonym: "gene to gene feature" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000068 -name: overlapping -def: "An attribute describing a gene that has a sequence that overlaps the sequence of another gene." [SO:ke] -is_a: SO:0000067 ! gene_to_gene_feature - -[Term] -id: SO:0000069 -name: inside_intron -def: "An attribute to describe a gene when it is located within the intron of another gene." [SO:ke] -synonym: "inside intron" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000070 -name: inside_intron_antiparallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the opposite strand." [SO:ke] -synonym: "inside intron antiparallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000071 -name: inside_intron_parallel -def: "An attribute to describe a gene when it is located within the intron of another gene and on the same strand." [SO:ke] -synonym: "inside intron parallel" EXACT [] -is_a: SO:0000069 ! inside_intron - -[Term] -id: SO:0000072 -name: end_overlapping_gene -is_obsolete: true - -[Term] -id: SO:0000073 -name: five_prime_three_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's 3' region." [SO:ke] -synonym: "five prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000074 -name: five_prime_five_prime_overlap -def: "An attribute to describe a gene when the five prime region overlaps with another gene's five prime region." [SO:ke] -synonym: "five prime-five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000075 -name: three_prime_three_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 3' region." [SO:ke] -synonym: "three prime-three prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000076 -name: three_prime_five_prime_overlap -def: "An attribute to describe a gene when the 3' region overlaps with another gene's 5' region." [SO:ke] -synonym: "5' 3' overlap" EXACT [] -synonym: "three prime five prime overlap" EXACT [] -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000077 -name: antisense -def: "A region sequence that is complementary to a sequence of messenger RNA." [SO:ke] -xref: http://en.wikipedia.org/wiki/Antisense "wiki" -is_a: SO:0000068 ! overlapping - -[Term] -id: SO:0000078 -name: polycistronic_transcript -def: "A transcript that is polycistronic." [SO:xp] -synonym: "polycistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000079 -name: dicistronic_transcript -def: "A transcript that is dicistronic." [SO:ke] -synonym: "dicistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000080 -name: operon_member -synonym: "operon member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0000081 -name: gene_array_member -synonym: "gene array member" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000082 -name: processed_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000083 -name: macronuclear_sequence -synonym: "macronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000084 -name: micronuclear_sequence -synonym: "micronuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000085 -name: gene_by_genome_location -is_obsolete: true - -[Term] -id: SO:0000086 -name: gene_by_organelle_of_genome -is_obsolete: true - -[Term] -id: SO:0000087 -name: nuclear_gene -def: "A gene from nuclear sequence." [SO:xp] -synonym: "nuclear gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000088 -name: mt_gene -def: "A gene located in mitochondrial sequence." [SO:xp] -synonym: "mitochondrial gene" EXACT [] -synonym: "mt gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000089 -name: kinetoplast_gene -def: "A gene located in kinetoplast sequence." [SO:xp] -synonym: "kinetoplast gene" EXACT [] -intersection_of: SO:0000088 ! mt_gene -intersection_of: has_origin SO:0000741 ! kinetoplast - -[Term] -id: SO:0000090 -name: plastid_gene -def: "A gene from plastid sequence." [SO:xp] -synonym: "plastid gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000091 -name: apicoplast_gene -def: "A gene from apicoplast sequence." [SO:xp] -synonym: "apicoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0000092 -name: ct_gene -def: "A gene from chloroplast sequence." [SO:xp] -synonym: "chloroplast gene" EXACT [] -synonym: "ct gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000093 -name: chromoplast_gene -def: "A gene from chromoplast_sequence." [SO:xp] -synonym: "chromoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000094 -name: cyanelle_gene -def: "A gene from cyanelle sequence." [SO:xp] -synonym: "cyanelle gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000095 -name: leucoplast_gene -def: "A plastid gene from leucoplast sequence." [SO:xp] -synonym: "leucoplast gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000096 -name: proplastid_gene -def: "A gene from proplastid sequence." [SO:ke] -synonym: "proplastid gene" EXACT [] -intersection_of: SO:0000090 ! plastid_gene -intersection_of: has_origin SO:0000748 ! proplastid_sequence - -[Term] -id: SO:0000097 -name: nucleomorph_gene -def: "A gene from nucleomorph sequence." [SO:xp] -synonym: "nucleomorph gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000098 -name: plasmid_gene -def: "A gene from plasmid sequence." [SO:xp] -synonym: "plasmid gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000749 ! plasmid_location - -[Term] -id: SO:0000099 -name: proviral_gene -def: "A gene from proviral sequence." [SO:xp] -synonym: "proviral gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_origin SO:0000751 ! proviral_location - -[Term] -id: SO:0000100 -name: endogenous_retroviral_gene -def: "A proviral gene with origin endogenous retrovirus." [SO:xp] -synonym: "endogenous retroviral gene" EXACT [] -intersection_of: SO:0000099 ! proviral_gene -intersection_of: has_origin SO:0000903 ! endogenous_retroviral_sequence - -[Term] -id: SO:0000101 -name: transposable_element -def: "A transposon or insertion sequence. An element that can insert in a variety of DNA sequences." [http://www.sci.sdsu.edu/~smaloy/Glossary/T.html] -subset: SOFA -synonym: "transposable element" EXACT [] -synonym: "transposon" EXACT [] -xref: http://en.wikipedia.org/wiki/Transposable_element "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000102 -name: expressed_sequence_match -def: "A match to an EST or cDNA sequence." [SO:ke] -subset: SOFA -synonym: "expressed sequence match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000103 -name: clone_insert_end -def: "The end of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert end" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000104 -name: polypeptide -alt_id: SO:0000358 -def: "A sequence of amino acids linked by peptide bonds which may lack appreciable tertiary structure and may not be liable to irreversible denaturation." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The term 'protein' was merged with 'polypeptide'. Although 'protein' was a sequence_attribute and therefore meant to describe the quality rather than an actual feature, it was being used erroneously. It is replaced by 'peptidyl' as the polymer attribute. -subset: SOFA -synonym: "protein" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypeptide "wiki" -is_a: SO:0001411 ! biological_region -relationship: derives_from SO:0000316 ! CDS - -[Term] -id: SO:0000105 -name: chromosome_arm -def: "A region of the chromosome between the centromere and the telomere. Human chromosomes have two arms, the p arm (short) and the q arm (long) which are separated from each other by the centromere." [http://www.medterms.com/script/main/art.asp?articlekey=5152] -synonym: "chromosome arm" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000106 -name: non_capped_primary_transcript -is_obsolete: true - -[Term] -id: SO:0000107 -name: sequencing_primer -synonym: "sequencing primer" EXACT [] -is_a: SO:0000112 ! primer - -[Term] -id: SO:0000108 -name: mRNA_with_frameshift -def: "An mRNA with a frameshift." [SO:xp] -synonym: "frameshifted mRNA" EXACT [] -synonym: "mRNA with frameshift" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000109 -name: sequence_variant_obs -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -subset: SOFA -synonym: "mutation" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000110 -name: sequence_feature -def: "An extent of biological sequence." [SO:ke] -subset: SOFA -synonym: "located sequence feature" RELATED [] -synonym: "located_sequence_feature" EXACT [] -synonym: "sequence feature" EXACT [] -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000111 -name: transposable_element_gene -def: "A gene encoded within a transposable element. For example gag, int, env and pol are the transposable element genes of the TY element in yeast." [SO:ke] -synonym: "transposable element gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: part_of SO:0000101 ! transposable_element -relationship: part_of SO:0000101 ! transposable_element - -[Term] -id: SO:0000112 -name: primer -def: "A short preexisting polynucleotide chain to which new deoxyribonucleotides can be added by DNA polymerase." [http://www.ornl.gov/TechResources/Human_Genome/publicat/primer2001/glossary.html] -subset: SOFA -synonym: "DNA primer" EXACT [] -synonym: "primer oligonucleotide" EXACT [] -synonym: "primer polynucleotide" EXACT [] -synonym: "primer sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_(molecular_biology) "wiki" -is_a: SO:0000441 ! ss_oligo - -[Term] -id: SO:0000113 -name: proviral_region -def: "A viral sequence which has integrated into a host genome." [SO:ke] -subset: SOFA -synonym: "proviral region" EXACT [] -synonym: "proviral sequence" RELATED [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000114 -name: methylated_C -def: "A methylated deoxy-cytosine." [SO:ke] -subset: SOFA -synonym: "methylated C" EXACT [] -synonym: "methylated cytosine" EXACT [] -synonym: "methylated cytosine base" EXACT [] -synonym: "methylated cytosine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000115 -name: transcript_feature -is_obsolete: true - -[Term] -id: SO:0000116 -name: edited -def: "An attribute describing a sequence that is modified by editing." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000117 -name: transcript_with_readthrough_stop_codon -is_obsolete: true - -[Term] -id: SO:0000118 -name: transcript_with_translational_frameshift -def: "A transcript with a translational frameshift." [SO:xp] -synonym: "transcript with translational frameshift" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000119 -name: regulated -def: "An attribute to describe a sequence that is regulated." [SO:ke] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000120 -name: protein_coding_primary_transcript -def: "A primary transcript that, at least in part, encodes one or more proteins." [SO:ke] -comment: May contain introns. -subset: SOFA -synonym: "pre mRNA" RELATED [] -synonym: "protein coding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000121 -name: forward_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA forward primer" EXACT [] -synonym: "forward DNA primer" EXACT [] -synonym: "forward primer" EXACT [] -synonym: "forward primer oligo" EXACT [] -synonym: "forward primer oligonucleotide" EXACT [] -synonym: "forward primer polynucleotide" EXACT [] -synonym: "forward primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001030 ! forward - -[Term] -id: SO:0000122 -name: RNA_sequence_secondary_structure -def: "A folded RNA sequence." [SO:ke] -synonym: "RNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000123 -name: transcriptionally_regulated -def: "An attribute describing a gene that is regulated at transcription." [SO:ma] -comment: By:. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:1000132 ! sequence_variant_effect -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:0001537 ! structural_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant which alters a biological process or function." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "increased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A variant that changes editing of a transcript." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decrease_transcript_stability -def: "A sequence variant that decreases transcript stability." [SO:ke] -synonym: "decrease transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increase_transcript_stability -def: "A sequence variant that increases transcript stability." [SO:ke] -synonym: "increase transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increase_transcription_rate -def: "A sequence variant that increases the rate of transcription." [SO:ke] -synonym: "increase transcription rate" RELATED [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decrease_transcription_rate -def: "A sequence variant that decreases the rate of transcription." [SO:ke] -synonym: "decrease transcription rate" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function -def: "A sequence variant which causes gain of polypeptide function." [SO:ke] -synonym: "polypeptide gain of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function -def: "A sequence variant that causes the loss of a polypeptide function." [SO:ke] -synonym: "polypeptide loss of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_structure_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located with a regulatory region such as a promoter." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_change_in_transcript -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex_indel" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT [] -synonym: "stop_lost" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001581 -name: codon_variant -def: "A sequence variant that changes at least one base in a codon." [SO:ke] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:56Z - -[Term] -id: SO:0001582 -name: initiator_codon_change -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiator codon change" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: non_synonymous_codon -alt_id: SO:0001584 -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different amino acid." [SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. -synonym: "missense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense_codon" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http:http\://en.wikipedia.org/wiki/Missense_mutation "wiki" -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT [] -synonym: "stop_gained" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001588 -name: synonymous_codon -def: "A sequence variant whereby a base of a codon is changed, but there is no resulting change to the encoded amino acid." [SO:ke] -comment: EBI term: Synonymous SNPs - In coding sequence, not resulting in an amino acid change (i.e. silent mutation).\nThis term is sometimes used synonomously with the more general term 'silent mutation', although a silent mutation may occur in non coding sequence. The best practice is to annotate to the most specific term. -synonym: "coding-synon" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "silent mutation" RELATED [] -synonym: "silent substitution" RELATED [] -synonym: "silent_mutation" RELATED [] -synonym: "synonymous codon" EXACT [] -synonym: "synonymous_coding" EXACT ebi_variants [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: http://en.wikipedia.org/wiki/Synonymous_mutation -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -is_a: SO:1000132 ! sequence_variant_effect -created_by: kareneilbeck -creation_date: 2010-03-22T02:39:38Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -synonym: "terminator codon variant" EXACT [] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001625 -name: terminal_codon_variant -def: "A codon variant that changes at least one base of the last codon of the transcript." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:49:55Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 basepairs of an intron. -synonym: "essential_splice_site" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http\://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation.\n The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:0001537 ! structural_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" EXACT [] -synonym: "partially_characterised_change_in_DNA_sequence" EXACT [] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single base pair positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_ element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http\://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000577 ! centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant which alters a biological process or function." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level -def: "A sequence variant that increases the level of mature, spliced and processed RNA." [SO:ke] -synonym: "increased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A variant that changes editing of a transcript." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decrease_transcript_stability -def: "A sequence variant that decreases transcript stability." [SO:ke] -synonym: "decrease transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increase_transcript_stability -def: "A sequence variant that increases transcript stability." [SO:ke] -synonym: "increase transcript stability" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increase_transcription_rate -def: "A sequence variant that increases the rate of transcription." [SO:ke] -synonym: "increase transcription rate" RELATED [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decrease_transcription_rate -def: "A sequence variant that decreases the rate of transcription." [SO:ke] -synonym: "decrease transcription rate" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function -def: "A sequence variant which causes gain of polypeptide function." [SO:ke] -synonym: "polypeptide gain of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function -def: "A sequence variant that causes the loss of a polypeptide function." [SO:ke] -synonym: "polypeptide loss of function" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_structure_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located with a regulatory region such as a promoter." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_change_in_transcript -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex_indel" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT [] -synonym: "stop_lost" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001581 -name: codon_variant -def: "A sequence variant that changes at least one base in a codon." [SO:ke] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:56Z - -[Term] -id: SO:0001582 -name: initiator_codon_change -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiator codon change" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: non_synonymous_codon -alt_id: SO:0001584 -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different amino acid." [SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. -synonym: "missense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense_codon" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http:http\://en.wikipedia.org/wiki/Missense_mutation "wiki" -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_codon -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT [] -synonym: "stop_gained" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001583 ! non_synonymous_codon -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001588 -name: synonymous_codon -def: "A sequence variant whereby a base of a codon is changed, but there is no resulting change to the encoded amino acid." [SO:ke] -comment: EBI term: Synonymous SNPs - In coding sequence, not resulting in an amino acid change (i.e. silent mutation).\nThis term is sometimes used synonomously with the more general term 'silent mutation', although a silent mutation may occur in non coding sequence. The best practice is to annotate to the most specific term. -synonym: "coding-synon" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "silent mutation" RELATED [] -synonym: "silent substitution" RELATED [] -synonym: "silent_mutation" RELATED [] -synonym: "synonymous codon" EXACT [] -synonym: "synonymous_coding" EXACT ebi_variants [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: http://en.wikipedia.org/wiki/Synonymous_mutation -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:39:38Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -synonym: "terminator codon variant" EXACT [] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: http://www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001625 -name: terminal_codon_variant -def: "A codon variant that changes at least one base of the last codon of the transcript." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -is_a: SO:0001581 ! codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:49:55Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001625 ! terminal_codon_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. -synonym: "essential_splice_site" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://uswest.ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://uswest.ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http\://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001651 -name: inframe_codon_gain -def: "A sequence variant which gains a codon, and does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "inframe codon gain" RELATED [] -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:29:08Z - -[Term] -id: SO:0001652 -name: inframe_codon_loss -def: "A sequence variant which loses a codon, and does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "inframe codon loss" RELATED [] -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:29:35Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: CHiP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "CHiP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: genomically_imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -synonym: "genomically imprinted" EXACT [] -synonym: "imprinted" BROAD [] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_adenine -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_DNA_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -synonym: "complementary DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000116 ! edited -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -synonym: "mtDNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -subset: SOFA -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding -relationship: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the2 base region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001907 ! feature_elongation -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved." [EBI:fc, EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -synonym: "non synonymous codon" EXACT [] -synonym: "non synonymous variant" EXACT [] -synonym: "non_synonymous_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001650 ! inframe_variant -is_a: SO:0001906 ! feature_truncation -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: nc_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! nc_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT [] -synonym: "splice_region_variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mis-matches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylatoin site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H34histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001702 ! histone_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: genomically_imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -synonym: "genomically imprinted" EXACT [] -synonym: "imprinted" BROAD [] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_adenine -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature -is_a: SO:0001962 ! modified_adenine - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "" RELATED [] -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_DNA_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -synonym: "transcription_start_site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -synonym: "coding_sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory region where transcription factor binding sites clustered to regulate various aspects of transcription activities. (CRMs can be located a few kb to hundred kb upstream of the basal promoter, in the coding sequence, within introns, or in the downstream 3'UTR sequences, as well as on different chromosome). A single gene can be regulated by multiple CRMs to give precise control of its spatial and temporal expression. CRMs function as nodes in large, intertwined regulatory network." [PMID:19660565, SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -synonym: "transcription factor module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -synonym: "complementary DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000116 ! edited -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:0002007 ! MNV - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with pharmgkb. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -synonym: "mtDNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [PMID:9679020, SO:regcreative] -subset: SOFA -synonym: "transcription-control region" EXACT [] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "ca_bind" EXACT BS [uniprot:feature_type] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding -relationship: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element" RELATED [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "stop retained variant" EXACT [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss_variant -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -synonym: "intron gain variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "splice acceptor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the 2 base pair region at the 5' end of an intron." [SO:ke] -synonym: "splice donor variant" EXACT [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "transcript variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001907 ! feature_elongation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved." [EBI:fc, EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001906 ! feature_truncation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: non_coding_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! non_coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001969 ! coding_transcript_intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "splice region variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "downstream gene variant" EXACT [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rDNA" EXACT [] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "np_bind" EXACT BS [uniprot:feature] -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -def: "DNA segment that ranges from about -250 to -40 relative to +1 of RNA transcription start site, where sequence specific DNA-binding transcription factors binds, such as Sp1, CTF (CCAAT-binding transcription factor), and CBF (CCAAT-box binding factor)." [PMID:12515390, PMID:9679020, SO:ml] -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001954 ! restriction_enzyme_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001954 ! restriction_enzyme_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mis-matches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H4histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced -relationship: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification -relationship: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -is_a: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation -relationship: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -is_a: SO:0000112 ! primer -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse -relationship: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: genomically_imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -synonym: "genomically imprinted" EXACT [] -synonym: "imprinted" BROAD [] -xref: http:http://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! genomically_imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occurring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Examples are x-inactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level -relationship: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0001680 ! translation_regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unavailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilizes the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -alt_id: SO:1000033 -def: "The point at which one or more contiguous nucleotides were excised." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -synonym: "nucleotide deletion" EXACT [] -synonym: "nucleotide_deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -xref: loinc:LA6692-3 "Deletion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_adenine -def: "A modified base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -synonym: "methylated_A" EXACT [] -is_a: SO:0000306 ! methylated_DNA_base_feature -is_a: SO:0001962 ! modified_adenine - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinery. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a place holder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -is_a: SO:0000165 ! enhancer -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor -relationship: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:16858867] -comment: Binds TBP. -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA. This region is associated with sigma factor 70." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001671 ! bacterial_RNApol_promoter_sigma_70 -relationship: part_of SO:0001913 ! bacterial_RNApol_promoter_sigma_ecf - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A region of a primary transcript that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon' is inclusive of the stop_codon)." [SO:ke] -subset: SOFA -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding_exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -subset: DBVAR -synonym: "transchr" RELATED [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -subset: SOFA -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -subset: SOFA -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a nucleotide molecule that binds a Transcription Factor or Transcription Factor complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The in-frame interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterized by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -is_a: SO:0001507 ! variant_collection -disjoint_from: SO:0000400 ! sequence_attribute -relationship: part_of SO:0001524 ! chromosomally_aberrant_genome - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -alt_id: SO:0000649 -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene (or the product of other non coding RNA genes. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (usually via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:11081512, PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -synonym: "small temporal RNA" EXACT [] -synonym: "stRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid -relationship: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein -relationship: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000285 ! foreign_gene -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift -relationship: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposable_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -is_a: SO:0000111 ! transposable_element_gene -is_a: SO:0000281 ! engineered_foreign_gene -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion -relationship: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -is_a: SO:0000280 ! engineered_gene -is_a: SO:0000287 ! fusion_gene -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units of 2 to 10 bp repeated in tandem." [http://www.informatics.jax.org/silver/glossary.shtml, NCBI:th] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_DNA_base -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base -is_a: SO:0001720 ! epigenetically_modified_region - -[Term] -id: SO:0000306 -name: methylated_DNA_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_DNA_base - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experimentally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -synonym: "transcription_start_site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -synonym: "coding_sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -is_a: SO:0000151 ! clone -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -relationship: part_of SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift -relationship: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -subset: SOFA -synonym: "35S rRNA primary transcript" EXACT [] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift -relationship: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -is_a: SO:0000108 ! mRNA_with_frameshift -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift -relationship: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0001411 ! biological_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -def: "A region of intronic nucleotide sequence targeted by a nuclease enzyme." [SO:ke] -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a particular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -is_a: SO:0000902 ! transgene -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed -relationship: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposable element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion -relationship: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -is_a: SO:0000372 ! enzymatic_RNA -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [PMID:2436805] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -is_a: SO:0000715 ! RNA_motif -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterized activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesizes telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A biological_region of sequence that, in the molecule, interacts selectively and non-covalently with other molecules. A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: See GO:0005488 : binding. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with polypeptide molecules." [SO:ke] -comment: See GO:0042277 : peptide binding. -subset: SOFA -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -is_a: SO:0000695 ! reagent -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0001527 ! peptide_localization_signal -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat -intersection_of: SO:0000314 ! direct_repeat -intersection_of: derives_from SO:0000101 ! transposable_element -relationship: derives_from SO:0000101 ! transposable_element - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA -relationship: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: chromosomal_transposition -def: "A chromosome structure variant whereby a region of a chromosome has been transferred to another position. Among interchromosomal rearrangements, the term transposition is reserved for that class in which the telomeres of the chromosomes involved are coupled (that is to say, form the two ends of a single DNA molecule) as in wild-type." [FB:reference_manual, SO:ke] -synonym: "chromosomal transposition" EXACT [] -synonym: "transposition" NARROW [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift -relationship: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged -relationship: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene_segment -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript -relationship: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendant of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome deletion whereby a chromosome is generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene_segment -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene_segment -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occurring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000510 -name: VD_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosomal deletion whereby a chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript -relationship: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -def: "A chromosome structural variation whereby either a chromosome exists in addition to the normal chromosome complement or is lacking." [SO:ke] -comment: Examples are Nullo-4, Haplo-4 and triplo-4 in Drosophila. -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -synonym: "polyadenylation termination signal" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "A region in the 5' UTR that pairs with the 16S rRNA during formation of the preinitiation complex." [SO:jh] -comment: Not found in Eukaryotic sequence. -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -alt_id: SO:0001430 -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation. The boundary between the UTR and the polyA sequence." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA cleavage site" EXACT [] -synonym: "polyA junction" EXACT [] -synonym: "polyA site" EXACT [] -synonym: "polyA_junction" EXACT [] -synonym: "polyadenylation site" RELATED [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene_segment - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000572 ! DJ_gene_segment - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000574 ! VDJ_gene_segment - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene_segment -relationship: has_part SO:0000470 ! J_gene_segment -relationship: has_part SO:0000478 ! C_gene_segment -relationship: has_part SO:0000576 ! VJ_gene_segment - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -def: "An unregulated promoter that allows continuous expression." [SO:ke] -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occurred as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene_segment -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene_segment -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://www.imgt.org/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and Eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyze their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -is_a: SO:0000188 ! intron -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic -relationship: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -subset: SOFA -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -comment: Binds TFIIIC. -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0001660 ! core_promoter_element - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognized by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -comment: Binds TFIIIC. -synonym: "B-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A transcriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -is_a: SO:0000234 ! mRNA -is_a: SO:0000665 ! monocistronic_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -is_a: SO:0000078 ! polycistronic_transcript -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic -relationship: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -is_a: SO:0000155 ! plasmid -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -alt_id: SO:0000648 -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -synonym: "stRNA_primary_transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript -relationship: has_part SO:0001244 ! pre_miRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilizes 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle -relationship: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic -relationship: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -is_a: SO:0000188 ! intron -is_a: SO:0001037 ! mobile_genetic_element -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -alt_id: SO:1000034 -def: "The sequence of one or more nucleotides added between two adjacent nucleotides in the sequence." [SO:ke] -subset: DBVAR -subset: SOFA -synonym: "insertion" EXACT dbvar [http://www.ncbi.nlm.nih.gov/dbvar/] -synonym: "nucleotide insertion" EXACT [] -synonym: "nucleotide_insertion" EXACT [] -xref: loinc:LA6687-3 "Insertion" -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "A chromosomal translocation whereby the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements. This occurs for some translocations, particularly but not exclusively, reciprocal translocations." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript -relationship: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -is_a: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript -relationship: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -is_a: SO:0001217 ! protein_coding_gene -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded -relationship: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives exist in normal individuals in some population(s), wherein the least frequent variant has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:0001483 ! SNV - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through -relationship: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine -relationship: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -synonym: "breakpoint" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjacent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL1 RNA leader sequence to the 5' end of most mRNAs." [SO:nlw] -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -def: "A trans_splicing_acceptor_site which appends the 22nt SL2 RNA leader sequence to the 5' end of mRNAs. SL2 acceptor sites occur in genes in internal segments of polycistronic transcripts." [SO:nlw] -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine -relationship: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass -relationship: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -is_a: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted -relationship: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001683 ! sequence_motif - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -subset: SOFA -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -is_a: SO:0000079 ! dicistronic_transcript -is_a: SO:0000634 ! polycistronic_mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic -relationship: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through inter-genomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -synonym: "superscaffold" RELATED [] -is_a: SO:0001876 ! partial_genomic_sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript -relationship: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -is_a: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA -relationship: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the EMBL, DDBJ, GenBank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory region where transcription factor binding sites clustered to regulate various aspects of transcription activities. (CRMs can be located a few kb to hundred kb upstream of the basal promoter, in the coding sequence, within introns, or in the downstream 3'UTR sequences, as well as on different chromosome). A single gene can be regulated by multiple CRMs to give precise control of its spatial and temporal expression. CRMs function as nodes in large, intertwined regulatory network." [PMID:19660565, SO:SG] -comment: Requested by Stephen Grossmann Dec 2004. -subset: SOFA -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -synonym: "transcription factor module" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site -relationship: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar -relationship: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxicircles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001026 ! genome -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000742 ! maxicircle -intersection_of: has_part SO:0000980 ! minicircle -relationship: has_part SO:0000742 ! maxicircle -relationship: has_part SO:0000980 ! minicircle - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propagated by the clone." [SO:ke] -subset: SOFA -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -synonym: "complementary DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendant of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -is_a: SO:0000768 ! episome -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -is_a: SO:0000411 ! rescue_region -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -is_a: SO:0000815 ! mini_gene -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic -relationship: derives_from SO:0000151 ! clone -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -is_a: SO:0000101 ! transposable_element -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -is_a: SO:0000720 ! foreign_transposable_element -is_a: SO:0000798 ! engineered_transposable_element -is_a: SO:0000805 ! engineered_foreign_region -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0001504 ! assortment_derived_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "construct" EXACT [] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign -relationship: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -is_a: SO:0000324 ! tag -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated -relationship: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -is_a: SO:0000317 ! cDNA_clone -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated -relationship: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA -relationship: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination -relationship: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA -relationship: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -is_a: SO:0000809 ! invalidated_cDNA_clone -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing -relationship: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue -relationship: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -xref: loinc:LA9658-1 "wild type" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type -relationship: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence -relationship: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence -relationship: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence -relationship: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence -relationship: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence -relationship: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence -relationship: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence -relationship: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence -relationship: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence -relationship: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -subset: SOFA -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -is_a: SO:0000330 ! conserved_region -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous -relationship: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous -relationship: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -is_a: SO:0000853 ! homologous_region -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous -relationship: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occurred after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped -relationship: adjacent_to SO:0000581 ! cap -relationship: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated -relationship: adjacent_to SO:0000610 ! polyA_sequence -relationship: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000479 ! trans_spliced_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced -relationship: adjacent_to SO:0000636 ! spliced_leader_RNA -relationship: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -is_a: SO:0000673 ! transcript -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000116 ! edited -relationship: guided_by SO:0000602 ! guide_RNA -relationship: has_part SO:0000977 ! anchor_binding_site -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted -relationship: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted -relationship: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated -relationship: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated -relationship: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated -relationship: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated -relationship: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -is_a: SO:0000898 ! epigenetically_modified_gene -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded -relationship: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -is_a: SO:0000704 ! gene -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A transgene is a gene that has been transferred naturally or by any of a number of genetic engineering techniques from one organism to another." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA -relationship: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -is_a: SO:0000753 ! clone_insert -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA -relationship: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -is_a: SO:0000753 ! clone_insert -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000873 ! edited_transcript -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited -relationship: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognized by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -is_a: SO:0000954 ! DNA_chromosome -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -is_a: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -is_a: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA -relationship: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single -relationship: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear -relationship: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -is_a: SO:0000961 ! RNA_chromosome -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double -relationship: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -is_a: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -is_a: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular -relationship: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -is_a: SO:0000089 ! kinetoplast_gene -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle -relationship: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -is_a: SO:0000914 ! cloned_genomic_insert -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC -relationship: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms. It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -is_a: SO:0000234 ! mRNA -is_a: SO:0000994 ! consensus_region -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus -relationship: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted -relationship: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -is_a: SO:0000842 ! gene_component_region -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -synonym: "BAC end sequence" EXACT [] -synonym: "BES" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity -relationship: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category:Cryptic_Prophage.w -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA -relationship: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -is_a: SO:0000696 ! oligo -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic -relationship: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:0002007 ! MNV - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -def: "A sequence variant that does not affect protein function. Silent mutations may occur in genic ( CDS, UTR, intron etc) and intergenic regions. Silent mutations may have affects on processes such as splicing and regulation." [SO:ke] -comment: Added in March 2007 in after meeting with PharmGKB. Although this term is in common usage, it is better to annotate with the most specific term possible, such as synonymous codon, intron variant etc. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -xref: loinc:LA6700-4 "Silent" -is_a: SO:0001878 ! feature_variant - -[Term] -id: SO:0001018 -name: epitope -def: "A binding site that, in the molecule, interacts selectively and non-covalently with antibodies, B cells or T cells." [http://en.wikipedia.org/wiki/Epitope, SO:cb] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -subset: SOFA -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_obsolete: true -replaced_by: SO:0001563 - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -synonym: "allelomorph" EXACT [] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001023 ! allele - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001235 ! replicon -relationship: has_part SO:0001235 ! replicon - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001507 ! variant_collection -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001507 ! variant_collection - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -synonym: "mtDNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -is_a: SO:0000737 ! mitochondrial_sequence -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -is_a: SO:0000745 ! chloroplast_sequence -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA -relationship: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: miRtron -def: "A de-branched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 describe a new class of miRNAs that are derived from de-branched introns. -is_a: SO:0001014 ! intron_domain -relationship: has_part SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular mobility, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -subset: SOFA -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile -relationship: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -subset: SOFA -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid -relationship: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of either coding or non-coding mitochondria derived sequence." [SO:xp] -comment: Definition change requested by Val, 3172757. -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -xref: http://en.wikipedia.org/wiki/Numt "wikipedia" -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001649 ! nested_repeat - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -def: "A portion of a transposon, interrupted by the insertion of another element." [SO:ke] -synonym: "transposon fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0001648 ! nested_transposon - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [PMID:9679020, SO:regcreative] -subset: SOFA -synonym: "transcription-control region" EXACT [] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -subset: SOFA -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -alt_id: SO:1000004 -alt_id: SO:1000007 -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -comment: Merged with partially characterized change in nucleotide sequence. -subset: SOFA -synonym: "partially characterised change in DNA sequence" NARROW [] -synonym: "partially_characterised_change_in_DNA_sequence" NARROW [] -synonym: "sequence alteration" EXACT [] -synonym: "sequence variation" RELATED [] -synonym: "uncharacterised_change_in_nucleotide_sequence" NARROW [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "ANNOVAR:unknown" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "sequence variant" EXACT [] -synonym: "VAAST:sequence_variant" EXACT VAR [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -subset: SOFA -synonym: "immature peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non covalent binding site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with metal ions." [EBIBS:GAR, SO:cb, UniProt:curation_manual] -comment: Residue is part of a binding site for a metal ion. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0001656 ! metal_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "A binding site that, in the protein molecule, interacts selectively and non-covalently with polypeptide residues." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with calcium ions." [EBIBS:GAR] -comment: Residue involved in contact with calcium. -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "ca_bind" EXACT BS [uniprot:feature_type] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with cobalt ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with copper ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with iron ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with magnesium ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with manganese ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with molybdenum ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with nickel ions." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with tungsten ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "A binding site that, in the polypeptide molecule, interacts selectively and non-covalently with zinc ions." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0001657 ! ligand_binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corresponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three ten helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i,( i+1),( i+2) if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occurring events such as polymorphisms and alternative splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0001659 ! promoter_element - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576:15231738, PMID:16858867] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0001659 ! promoter_element -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino_backbone -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -synonym: "morpholino backbone" EXACT [] -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -synonym: "peptide nucleic acid" RELATED [] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA -relationship: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA -relationship: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -is_a: SO:0001247 ! synthetic_oligo -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA -relationship: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA -relationship: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -is_a: SO:0001193 ! GNA_oligo -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA -relationship: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with a stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -subset: SOFA -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding -relationship: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -def: "An insertion that derives from another organism, via the use of recombinant DNA technology." [SO:bm] -synonym: "transgenic insertion" EXACT [] -is_a: SO:0000667 ! insertion -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic -relationship: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed -relationship: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference -relationship: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -is_a: SO:0000127 ! silenced_gene -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification -relationship: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation -relationship: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -is_a: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation -relationship: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin structure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -is_a: SO:0000340 ! chromosome -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence -relationship: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by Nicole, tracker id 1911479. It is required to gather evidence together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding -relationship: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -alt_id: SO:0001270 -synonym: "miRNA gene" EXACT [] -synonym: "stRNA gene" EXACT [] -synonym: "stRNA_gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding -intersection_of: has_quality SO:0000656 ! stRNA_encoding -relationship: has_quality SO:0000571 ! miRNA_encoding -relationship: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding -relationship: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding -relationship: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene -relationship: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding -relationship: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding -relationship: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding -relationship: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a modified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues of a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -is_a: SO:0000316 ! CDS -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary -relationship: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidine amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -subset: SOFA -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -subset: SOFA -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -subset: SOFA -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -subset: SOFA -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -subset: SOFA -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -subset: SOFA -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0001655 ! nucleotide_binding_site - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -is_a: SO:0000704 ! gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic -relationship: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -comment: OBSOLETE: This term was deleted as it conflated more than one term. The alteration is separate from the effect. -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_obsolete: true -replaced_by: SO:0001545 - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an inactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the Trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0001760 ! non_processed_pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: arginine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A binding site that, in an insulator region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A binding site that, in the enhancer region of a nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0001654 ! nucleotide_to_protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001085 ! sequence_conflict -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001085 ! sequence_conflict -intersection_of: has_part SO:0000149 ! contig -relationship: has_part SO:0000149 ! contig - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -synonym: "long intergenic non-coding RNA" EXACT [] -is_a: SO:0001877 ! lnc_RNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that base pairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -synonym: "trans-splice junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000782 ! natural -relationship: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -synonym: "shadow enhancer" EXACT [] -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0001483 -name: SNV -def: "SNVs are single nucleotide positions in genomic DNA at which different sequence alternatives exist." [SO:bm] -subset: SOFA -synonym: "single nucleotide variant" EXACT [] -is_a: SO:1000002 ! substitution -created_by: kareneilbeck -creation_date: 2009-10-08T11:37:49Z - -[Term] -id: SO:0001484 -name: X_element_combinatorial_repeat -def: "An X element combinatorial repeat is a repeat region located between the X element and the telomere or adjacent Y' element." [http://www.yeastgenome.org/help/glossary.html] -comment: X element combinatorial repeats contain Tbf1p binding sites,\nand possible functions include a role in telomerase-independent telomere\nmaintenance via recombination or as a barrier against transcriptional\nsilencing. These are usually present as a combination of one or more of\nseveral types of smaller elements (designated A, B, C, or D). This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "X element combinatorial repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T11:03:37Z - -[Term] -id: SO:0001485 -name: Y_prime_element -def: "A Y' element is a repeat region (SO:0000657) located adjacent to telomeric repeats or X element combinatorial repeats, either as a single copy or tandem repeat of two to four copies." [http:http://www.yeastgenome.org/help/glossary.html] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. -synonym: "Y' element" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T12:08:57Z - -[Term] -id: SO:0001486 -name: standard_draft -def: "The status of a whole genome sequence, where the data is minimally filtered or un-filtered, from any number of sequencing platforms, and is assembled into contigs. Genome sequence of this quality may harbour regions of poor quality and can be relatively incomplete." [DOI:10.1126] -synonym: "standard draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:48:32Z - -[Term] -id: SO:0001487 -name: high_quality_draft -def: "The status of a whole genome sequence, where overall coverage represents at least 90 percent of the genome." [DOI:10.1126] -synonym: "high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:52:36Z - -[Term] -id: SO:0001488 -name: improved_high_quality_draft -def: "The status of a whole genome sequence, where additional work has been performed, using either manual or automated methods, such as gap resolution." [DOI:10.1126] -synonym: "improved high quality draft" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:54:35Z - -[Term] -id: SO:0001489 -name: annotation_directed_improved_draft -def: "The status of a whole genome sequence,where annotation, and verification of coding regions has occurred." [DOI:10.1126] -synonym: "annotation directed improvement" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T12:57:10Z - -[Term] -id: SO:0001490 -name: noncontiguous_finished -def: "The status of a whole genome sequence, where the assembly is high quality, closure approaches have been successful for most gaps, misassemblies and low quality regions." [DOI:10.1126] -synonym: "non contiguous finished" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:01:07Z - -[Term] -id: SO:0001491 -name: finished_genome -def: "The status of a whole genome sequence, with less than 1 error per 100,000 base pairs." [DOI:10.1126] -synonym: "finished" EXACT [] -synonym: "finished genome" EXACT [] -is_a: SO:0001499 ! whole_genome_sequence_status -created_by: kareneilbeck -creation_date: 2009-10-23T01:04:43Z - -[Term] -id: SO:0001492 -name: intronic_regulatory_region -def: "A regulatory region that is part of an intron." [SO:ke] -synonym: "intronic regulatory region" EXACT [] -is_a: SO:0001679 ! transcription_regulatory_region -relationship: part_of SO:0000188 ! intron -created_by: kareneilbeck -creation_date: 2009-11-08T02:48:02Z - -[Term] -id: SO:0001493 -name: centromere_DNA_Element_I -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region composed of 8-11bp which enables binding by the centromere binding factor 1(Cbf1p)." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEI" EXACT [] -synonym: "Centromere DNA Element I" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:47:23Z - -[Term] -id: SO:0001494 -name: centromere_DNA_Element_II -def: "A centromere DNA Element II (CDEII) is part a conserved region of the centromere, consisting of a consensus region that is AT-rich and ~ 75-100 bp in length." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEII" EXACT [] -synonym: "centromere DNA Element II" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:51:26Z - -[Term] -id: SO:0001495 -name: centromere_DNA_Element_III -def: "A centromere DNA Element I (CDEI) is a conserved region, part of the centromere, consisting of a consensus region that consists of a 25-bp which enables binding by the centromere DNA binding factor 3 (CBF3) complex." [PMID:11222754] -comment: This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880699. -synonym: "CDEIII" EXACT [] -synonym: "centromere DNA Element III" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0001794 ! point_centromere -created_by: kareneilbeck -creation_date: 2009-11-09T05:54:47Z - -[Term] -id: SO:0001496 -name: telomeric_repeat -def: "The telomeric repeat is a repeat region, part of the chromosome, which in yeast, is a G-rich terminal sequence of the form (TG(1-3))n or more precisely ((TG)(1-6)TG(2-3))n." [PMID:8720065] -comment: The repeats are maintained by telomerase and there is generally 300 (+/-) 75 bp of TG(1-3) at a given end. Telomeric repeats function in completing chromosome replication and protecting the ends from degradation and end-to-end fusions. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880739. -synonym: "telomeric repeat" EXACT [] -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-09T06:00:42Z - -[Term] -id: SO:0001497 -name: X_element -def: "The X element is a conserved region, of the telomere, of ~475 bp that contains an ARS sequence and in most cases an Abf1p binding site." [http://www.yeastgenome.org/help/glossary.html#xelemcoresequence, PMID:7785338, PMID:8005434] -comment: Possible functions include roles in chromosomal segregation,\nmaintenance of chromosome stability, recombinational sequestering, or as a\nbarrier to transcriptional silencing. This term was requested 2009-10-16 by Michel Dumontier, tracker id 2880747. \n\nFrom Janos Demeter: The only region shared by all chromosome ends, the X element core sequence is a small conserved element (~475 bp) that contains an ARS sequence and in most cases an Abf1p binding site. Between these is a GC-rich region nearly identical to the meiosis-specific regulatory sequence URS1. -synonym: "X element" RELATED [] -synonym: "X element core sequence" EXACT [] -is_a: SO:0000330 ! conserved_region -relationship: part_of SO:0000624 ! telomere -created_by: kareneilbeck -creation_date: 2009-11-10T10:56:54Z - -[Term] -id: SO:0001498 -name: YAC_end -def: "A region of sequence from the end of a YAC clone that may provide a highly specific marker." [SO:ke] -synonym: "YAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000152 ! YAC -created_by: kareneilbeck -creation_date: 2009-11-19T11:07:18Z - -[Term] -id: SO:0001499 -name: whole_genome_sequence_status -def: "The status of whole genome sequence." [DOI:10.1126] -comment: This terms and children were added to SO in response to tracker request by Patrick Chain. The paper Genome Project Standards in a New Era of Sequencing. Science October 9th 2009, addresses these terms. -synonym: "whole genome sequence status" EXACT [] -is_a: SO:0000905 ! status -created_by: kareneilbeck -creation_date: 2009-10-23T12:47:47Z - -[Term] -id: SO:0001500 -name: heritable_phenotypic_marker -def: "A biological_region characterized as a single heritable trait in a phenotype screen. The heritable phenotype may be mapped to a chromosome but generally has not been characterized to a specific gene locus." [JAX:hdene] -synonym: "heritable phenotypic marker" EXACT [] -synonym: "phenotypic marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2009-12-07T01:50:55Z - -[Term] -id: SO:0001501 -name: peptide_collection -def: "A collection of peptide sequences." [BBOP:nlw] -comment: Term requested via tracker ID: 2910829. -synonym: "peptide collection" EXACT [] -synonym: "peptide set" EXACT [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0000104 ! polypeptide -relationship: has_part SO:0000104 ! polypeptide -created_by: kareneilbeck -creation_date: 2009-12-11T10:58:58Z - -[Term] -id: SO:0001502 -name: high_identity_region -def: "An experimental feature with high sequence identity to another sequence." [SO:ke] -comment: Requested by tracker ID: 2902685. -synonym: "high identity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2009-12-11T11:06:05Z - -[Term] -id: SO:0001503 -name: processed_transcript -def: "A transcript for which no open reading frame has been identified and for which no other function has been determined." [MGI:hdeen] -comment: Ensembl and Vega also use this term name. Requested by Howard Deen of MGI. -synonym: "processed transcript" EXACT [] -is_a: SO:0000673 ! transcript -created_by: kareneilbeck -creation_date: 2009-12-21T05:37:14Z - -[Term] -id: SO:0001504 -name: assortment_derived_variation -def: "A chromosome variation derived from an event during meiosis." [SO:ke] -synonym: "assortment derived variation" RELATED [] -is_a: SO:0000240 ! chromosome_variation -created_by: kareneilbeck -creation_date: 2010-03-02T05:03:18Z - -[Term] -id: SO:0001505 -name: reference_genome -def: "A collection of sequences (often chromosomes) taken as the standard for a given organism and genome assembly." [SO:ke] -synonym: "reference genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:10:03Z - -[Term] -id: SO:0001506 -name: variant_genome -def: "A collection of sequences (often chromosomes) of an individual." [SO:ke] -synonym: "variant genome" RELATED [] -is_a: SO:0001026 ! genome -created_by: kareneilbeck -creation_date: 2010-03-03T02:11:25Z - -[Term] -id: SO:0001507 -name: variant_collection -def: "A collection of one or more sequences of an individual." [SO:ke] -synonym: "variant collection" RELATED [] -is_a: SO:0001260 ! sequence_collection -intersection_of: SO:0001260 ! sequence_collection -intersection_of: has_part SO:0001059 ! sequence_alteration -relationship: has_part SO:0001059 ! sequence_alteration -created_by: kareneilbeck -creation_date: 2010-03-03T02:13:28Z - -[Term] -id: SO:0001508 -name: alteration_attribute -synonym: "alteration attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:53:23Z - -[Term] -id: SO:0001509 -name: chromosomal_variation_attribute -synonym: "chromosomal variation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:54:30Z - -[Term] -id: SO:0001510 -name: intrachromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:25Z - -[Term] -id: SO:0001511 -name: interchromosomal -is_a: SO:0001509 ! chromosomal_variation_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:43Z - -[Term] -id: SO:0001512 -name: insertion_attribute -def: "A quality of a chromosomal insertion,." [SO:ke] -synonym: "insertion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:55:56Z - -[Term] -id: SO:0001513 -name: tandem -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:37Z - -[Term] -id: SO:0001514 -name: direct -def: "A quality of an insertion where the insert is not in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:56:49Z - -[Term] -id: SO:0001515 -name: inverted -def: "A quality of an insertion where the insert is in a cytologically inverted orientation." [SO:ke] -is_a: SO:0001512 ! insertion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:40Z - -[Term] -id: SO:0001516 -name: free -def: "The quality of a duplication where the new region exists independently of the original." [SO:ke] -is_a: SO:0001523 ! duplication_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:57:51Z - -[Term] -id: SO:0001517 -name: inversion_attribute -synonym: "inversion attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:10Z - -[Term] -id: SO:0001518 -name: pericentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:24Z - -[Term] -id: SO:0001519 -name: paracentric -is_a: SO:0001517 ! inversion_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:35Z - -[Term] -id: SO:0001520 -name: translocaton_attribute -synonym: "translocation attribute" EXACT [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:58:47Z - -[Term] -id: SO:0001521 -name: reciprocal -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:34Z - -[Term] -id: SO:0001522 -name: insertional -is_a: SO:0001520 ! translocaton_attribute -created_by: kareneilbeck -creation_date: 2010-03-04T02:59:51Z - -[Term] -id: SO:0001523 -name: duplication_attribute -synonym: "duplication attribute" RELATED [] -is_a: SO:0001508 ! alteration_attribute -created_by: kareneilbeck -creation_date: 2010-03-05T01:56:33Z - -[Term] -id: SO:0001524 -name: chromosomally_aberrant_genome -synonym: "chromosomally aberrant genome" RELATED [] -is_a: SO:0001506 ! variant_genome -created_by: kareneilbeck -creation_date: 2010-03-05T02:21:00Z - -[Term] -id: SO:0001525 -name: assembly_error_correction -def: "A region of sequence where the final nucleotide assignment differs from the original assembly due to an improvement that replaces a mistake." [SO:ke] -synonym: "assembly error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:16:31Z - -[Term] -id: SO:0001526 -name: base_call_error_correction -def: "A region of sequence where the final nucleotide assignment is different from that given by the base caller due to an improvement that replaces a mistake." [SO:ke] -synonym: "base call error correction" RELATED [] -is_a: SO:0000413 ! sequence_difference -created_by: kareneilbeck -creation_date: 2010-03-09T02:18:07Z - -[Term] -id: SO:0001527 -name: peptide_localization_signal -def: "A region of peptide sequence used to target the polypeptide molecule to a specific organelle." [SO:ke] -subset: SOFA -synonym: "localization signal" RELATED [] -synonym: "peptide localization signal" EXACT [] -is_a: SO:0000839 ! polypeptide_region -created_by: kareneilbeck -creation_date: 2010-03-11T02:15:05Z - -[Term] -id: SO:0001528 -name: nuclear_localization_signal -def: "A polypeptide region that targets a polypeptide to the nucleus." [SO:ke] -synonym: "NLS" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_localization_signal "wikipedia" -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:16:38Z - -[Term] -id: SO:0001529 -name: endosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the endosome." [SO:ke] -synonym: "endosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:20:58Z - -[Term] -id: SO:0001530 -name: lysosomal_localization_signal -def: "A polypeptide region that targets a polypeptide to the lysosome." [SO:ke] -synonym: "lysosomal localization signal" EXACT [] -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:24:10Z - -[Term] -id: SO:0001531 -name: nuclear_export_signal -def: "A polypeptide region that targets a polypeptide to he cytoplasm." [SO:ke] -synonym: "NES" EXACT [] -synonym: "nuclear export signal" EXACT [] -xref: http://en.wikipedia.org/wiki/Nuclear_export_signal -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T02:25:25Z - -[Term] -id: SO:0001532 -name: recombination_signal_sequence -def: "A region recognized by a recombinase." [SO:ke] -synonym: "recombination signal sequence" RELATED [] -xref: http://en.wikipedia.org/wiki/Recombination_Signal_Sequences "wikipedia" -is_a: SO:0000299 ! specific_recombination_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:16:47Z - -[Term] -id: SO:0001533 -name: cryptic_splice_site -def: "A splice site that is in part of the transcript not normally spliced. They occur via mutation or transcriptional error." [SO:ke] -synonym: "cryptic splice signal" RELATED [] -synonym: "cryptic splice site" EXACT [] -is_a: SO:0000162 ! splice_site -created_by: kareneilbeck -creation_date: 2010-03-11T03:25:06Z - -[Term] -id: SO:0001534 -name: nuclear_rim_localization_signal -def: "A polypeptide region that targets a polypeptide to the nuclear rim." [SO:ke] -synonym: "nuclear rim localization signal" RELATED [] -xref: PMID:16027110 -is_a: SO:0001527 ! peptide_localization_signal -created_by: kareneilbeck -creation_date: 2010-03-11T03:31:30Z - -[Term] -id: SO:0001535 -name: p_element -def: "A P_element is a DNA transposon responsible for hybrid dysgenesis." [SO:ke] -synonym: "P element" RELATED [] -is_a: SO:0000182 ! DNA_transposon -created_by: kareneilbeck -creation_date: 2010-03-12T03:40:33Z - -[Term] -id: SO:0001536 -name: functional_variant -def: "A sequence variant in which the function of a gene product is altered with respect to a reference." [SO:ke] -synonym: "functional variant" EXACT [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:30:25Z - -[Term] -id: SO:0001537 -name: structural_variant -def: "A sequence variant that changes one or more sequence features." [SO:ke] -synonym: "structural variant" RELATED [] -is_a: SO:0001060 ! sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:31:01Z - -[Term] -id: SO:0001538 -name: transcript_function_variant -def: "A sequence variant which alters the functioning of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript function variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:32:58Z - -[Term] -id: SO:0001539 -name: translational_product_function_variant -def: "A sequence variant that affects the functioning of a translational product with respect to a reference sequence." [SO:ke] -synonym: "translational product variant" EXACT [] -is_a: SO:0001536 ! functional_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:46:15Z - -[Term] -id: SO:0001540 -name: level_of_transcript_variant -def: "A sequence variant which alters the level of a transcript." [SO:ke] -synonym: "level of transcript variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:07Z - -[Term] -id: SO:0001541 -name: decreased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "decreased transcript level" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:47:47Z - -[Term] -id: SO:0001542 -name: increased_transcript_level_variant -def: "A sequence variant that increases the level of mature, spliced and processed RNA with respect to a reference sequence." [SO:ke] -synonym: "increased transcript level variant" EXACT [] -is_a: SO:0001540 ! level_of_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:17Z - -[Term] -id: SO:0001543 -name: transcript_processing_variant -def: "A sequence variant that affects the post transcriptional processing of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript processing variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:48:48Z - -[Term] -id: SO:0001544 -name: editing_variant -def: "A transcript processing variant whereby the process of editing is disrupted with respect to the reference." [SO:ke] -synonym: "editing variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:25Z - -[Term] -id: SO:0001545 -name: polyadenylation_variant -def: "A sequence variant that changes polyadenylation with respect to a reference sequence." [SO:ke] -synonym: "polyadenylation variant" EXACT [] -is_a: SO:0001543 ! transcript_processing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:49:40Z - -[Term] -id: SO:0001546 -name: transcript_stability_variant -def: "A variant that changes the stability of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcript stability variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:01Z - -[Term] -id: SO:0001547 -name: decreased_transcript_stability_variant -def: "A sequence variant that decreases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "decrease transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:23Z - -[Term] -id: SO:0001548 -name: increased_transcript_stability_variant -def: "A sequence variant that increases transcript stability with respect to a reference sequence." [SO:ke] -synonym: "increased transcript stability variant" EXACT [] -is_a: SO:0001546 ! transcript_stability_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:50:39Z - -[Term] -id: SO:0001549 -name: transcription_variant -def: "A variant that changes alters the transcription of a transcript with respect to a reference sequence." [SO:ke] -synonym: "transcription variant" EXACT [] -is_a: SO:0001538 ! transcript_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:26Z - -[Term] -id: SO:0001550 -name: rate_of_transcription_variant -def: "A sequence variant that changes the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "rate of transcription variant" EXACT [] -is_a: SO:0001549 ! transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:51:50Z - -[Term] -id: SO:0001551 -name: increased_transcription_rate_variant -def: "A sequence variant that increases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "increased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:17Z - -[Term] -id: SO:0001552 -name: decreased_transcription_rate_variant -def: "A sequence variant that decreases the rate of transcription with respect to a reference sequence." [SO:ke] -synonym: "decreased transcription rate variant" EXACT [] -is_a: SO:0001550 ! rate_of_transcription_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:52:43Z - -[Term] -id: SO:0001553 -name: translational_product_level_variant -def: "A functional variant that changes the translational product level with respect to a reference sequence." [SO:ke] -synonym: "translational product level variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:32Z - -[Term] -id: SO:0001554 -name: polypeptide_function_variant -def: "A sequence variant which changes polypeptide functioning with respect to a reference sequence." [SO:ke] -synonym: "polypeptide function variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:53:54Z - -[Term] -id: SO:0001555 -name: decreased_translational_product_level -def: "A sequence variant which decreases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "decrease translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:54:25Z - -[Term] -id: SO:0001556 -name: increased_translational_product_level -def: "A sequence variant which increases the translational product level with respect to a reference sequence." [SO:ke] -synonym: "increase translational product level" EXACT [] -is_a: SO:0001553 ! translational_product_level_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:55:25Z - -[Term] -id: SO:0001557 -name: polypeptide_gain_of_function_variant -def: "A sequence variant which causes gain of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide gain of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:12Z - -[Term] -id: SO:0001558 -name: polypeptide_localization_variant -def: "A sequence variant which changes the localization of a polypeptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide localization variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:37Z - -[Term] -id: SO:0001559 -name: polypeptide_loss_of_function_variant -def: "A sequence variant that causes the loss of a polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide loss of function variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:56:58Z - -[Term] -id: SO:0001560 -name: inactive_ligand_binding_site -def: "A sequence variant that causes the inactivation of a ligand binding site with respect to a reference sequence." [SO:ke] -synonym: "inactive ligand binding site" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:00Z - -[Term] -id: SO:0001561 -name: polypeptide_partial_loss_of_function -def: "A sequence variant that causes some but not all loss of polypeptide function with respect to a reference sequence." [SO:ke] -synonym: "polypeptide partial loss of function" EXACT [] -is_a: SO:0001559 ! polypeptide_loss_of_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:58:32Z - -[Term] -id: SO:0001562 -name: polypeptide_post_translational_processing_variant -def: "A sequence variant that causes a change in post translational processing of the peptide with respect to a reference sequence." [SO:ke] -synonym: "polypeptide post translational processing variant" EXACT [] -is_a: SO:0001554 ! polypeptide_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T11:59:06Z - -[Term] -id: SO:0001563 -name: copy_number_change -def: "A sequence variant where copies of a feature (CNV) are either increased or decreased." [SO:ke] -synonym: "copy number change" EXACT [] -is_a: SO:0001537 ! structural_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:27:33Z - -[Term] -id: SO:0001564 -name: gene_variant -def: "A sequence variant where the structure of the gene is changed." [SO:ke] -synonym: "gene structure variant" EXACT [] -synonym: "snpEff:GENE" EXACT VAR [] -synonym: "VAAST:gene_variant" EXACT VAR [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:01Z - -[Term] -id: SO:0001565 -name: gene_fusion -def: "A sequence variant whereby a two genes have become joined." [SO:ke] -synonym: "gene fusion" EXACT [] -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:28Z - -[Term] -id: SO:0001566 -name: regulatory_region_variant -def: "A sequence variant located within a regulatory region." [SO:ke] -comment: EBI term: Regulatory region variations - In regulatory region annotated by Ensembl. -synonym: "regulatory region variant" EXACT [] -synonym: "regulatory_region_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:28:48Z - -[Term] -id: SO:0001567 -name: stop_retained_variant -def: "A sequence variant where at least one base in the terminator codon is changed, but the terminator remains." [SO:ke] -synonym: "snpEff:SYNONYMOUS_STOP" EXACT VAR [] -synonym: "stop retained variant" EXACT [] -synonym: "VAAST:stop_retained" EXACT VAR [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001819 ! synonymous_variant -created_by: kareneilbeck -creation_date: 2010-04-19T05:02:30Z - -[Term] -id: SO:0001568 -name: splicing_variant -def: "A sequence variant that changes the process of splicing." [SO:ke] -synonym: "splicing variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:22Z - -[Term] -id: SO:0001569 -name: cryptic_splice_site_variant -def: "A sequence variant causing a new (functional) splice site." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "cryptic splice site activation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:29:41Z - -[Term] -id: SO:0001570 -name: cryptic_splice_acceptor -def: "A sequence variant whereby a new splice site is created due to the activation of a new acceptor." [SO:ke] -synonym: "cryptic splice acceptor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:11Z - -[Term] -id: SO:0001571 -name: cryptic_splice_donor -def: "A sequence variant whereby a new splice site is created due to the activation of a new donor." [SO:ke] -synonym: "cryptic splice donor" EXACT [] -is_a: SO:0001569 ! cryptic_splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:30:35Z - -[Term] -id: SO:0001572 -name: exon_loss_variant -def: "A sequence variant whereby an exon is lost from the transcript." [SO:ke] -synonym: "exon loss" EXACT [] -synonym: "snpEff:EXON_DELETED" EXACT VAR [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:09Z - -[Term] -id: SO:0001573 -name: intron_gain_variant -def: "A sequence variant whereby an intron is gained by the processed transcript; usually a result of an alteration of the donor or acceptor." [EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "intron gain" EXACT [] -synonym: "intron gain variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:25Z - -[Term] -id: SO:0001574 -name: splice_acceptor_variant -def: "A splice variant that changes the 2 base region at the 3' end of an intron." [SO:ke] -synonym: "snpEff:SPLICE_SITE_ACCEPTOR" EXACT VAR [] -synonym: "splice acceptor variant" EXACT [] -synonym: "VAAST:splice_acceptor_variant" EXACT VAR [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:31:52Z - -[Term] -id: SO:0001575 -name: splice_donor_variant -def: "A splice variant that changes the 2 base pair region at the 5' end of an intron." [SO:ke] -synonym: "snpEff:SPLICE_SITE_DONOR" EXACT VAR [] -synonym: "splice donor variant" EXACT [] -synonym: "VAAST:splice_donor_variant" EXACT VAR [] -is_a: SO:0001629 ! splice_site_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:10Z - -[Term] -id: SO:0001576 -name: transcript_variant -def: "A sequence variant that changes the structure of the transcript." [SO:ke] -synonym: "snpEff:TRANSCRIPT" EXACT VAR [] -synonym: "transcript variant" EXACT [] -synonym: "VAAST:transcript_variant" EXACT VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:32:41Z - -[Term] -id: SO:0001577 -name: complex_transcript_variant -def: "A transcript variant with a complex INDEL- Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: Complex InDel - Insertion or deletion that spans an exon/intron border or a coding sequence/UTR border. -synonym: "complex transcript variant" EXACT [] -synonym: "complex_indel" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "complext change in transcript" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:33:03Z - -[Term] -id: SO:0001578 -name: stop_lost -def: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript." [SO:ke] -comment: EBI term: Stop lost - In coding sequence, resulting in the loss of a stop codon. -synonym: "ANNOVAR:stoploss" EXACT VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "snpEff:STOP_LOST" EXACT VAR [] -synonym: "stop codon lost" EXACT [] -synonym: "stop lost" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "VAAST:stop_lost" EXACT VAR [] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001907 ! feature_elongation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:46:42Z - -[Term] -id: SO:0001579 -name: transcript_sequence_variant -synonym: "transcript sequence variant" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001580 -name: coding_sequence_variant -alt_id: SO:0001581 -def: "A sequence variant that changes the coding sequence." [SO:ke] -synonym: "coding sequence variant" EXACT [] -synonym: "coding variant" EXACT [] -synonym: "codon variant" EXACT [] -synonym: "codon_variant" EXACT [] -synonym: "snpEff:CDS" EXACT VAR [] -synonym: "snpEff:CODON_CHANGE" RELATED VAR [] -synonym: "VAAST:coding_sequence_variant" EXACT VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:34:36Z - -[Term] -id: SO:0001582 -name: initiator_codon_variant -def: "A codon variant that changes at least one base of the first codon of a transcript." [SO:ke] -comment: This is being used to annotate changes to the first codon of a transcript, when the first annotated codon is not to methionine. A variant is predicted to change the first amino acid of a translation irrespective of the fact that the underlying codon is an AUG. As such for transcripts with an incomplete CDS (sequence does not start with an AUG), it is still called. -synonym: "initiatior codon variant" EXACT [] -synonym: "initiator codon change" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: loinc:LA6695-6 "Initiating Methionine" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:18Z - -[Term] -id: SO:0001583 -name: missense_variant -alt_id: SO:0001584 -alt_id: SO:0001783 -def: "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved." [EBI:fc, EBI:gr, SO:ke] -comment: EBI term: Non-synonymous SNPs. SNPs that are located in the coding sequence and result in an amino acid change in the encoded peptide sequence. A change that causes a non_synonymous_codon can be more than 3 bases - for example 4 base substitution. -synonym: "ANNOVAR:nonsynonymous SNV" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "missense" EXACT [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "missense codon" EXACT [] -synonym: "snpEff:NON_SYSNONYMOUS_CODING" EXACT VAR [] -synonym: "VAAST:non_synonymous_codon" RELATED VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -xref: http://en.wikipedia.org/wiki/Missense_mutation -xref: loinc:LA6698-0 "Missense" -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:35:49Z - -[Term] -id: SO:0001585 -name: conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for a different but similar amino acid. These variants may or may not be deleterious." [SO:ke] -synonym: "conservative missense codon" EXACT [] -synonym: "conservative missense variant" EXACT [] -synonym: "neutral missense codon" RELATED [] -synonym: "quiet missense codon" RELATED [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:36:40Z - -[Term] -id: SO:0001586 -name: non_conservative_missense_variant -def: "A sequence variant whereby at least one base of a codon is changed resulting in a codon that encodes for an amino acid with different biochemical properties." [SO:ke] -synonym: "non conservative missense codon" EXACT [] -synonym: "non conservative missense variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001583 ! missense_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:16Z - -[Term] -id: SO:0001587 -name: stop_gained -def: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript." [SO:ke] -comment: EBI term: Stop gained - In coding sequence, resulting in the gain of a stop codon (i.e. leading to a shortened peptide sequence). -synonym: "ANNOVAR:stopgain" EXACT VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "nonsense" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "nonsense codon" EXACT [] -synonym: "snpEFF:STOP_GAINED" EXACT VAR [] -synonym: "stop codon gained" RELATED [] -synonym: "stop gained" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "VAAST:stop_gained" EXACT VAR [] -xref: loinc:LA6699-8 "Nonsense" -is_a: SO:0001906 ! feature_truncation -is_a: SO:0001992 ! nonsynonymous_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:37:52Z - -[Term] -id: SO:0001589 -name: frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -comment: EBI term:Frameshift variations - In coding sequence, resulting in a frameshift. -synonym: "ANNOVAR:frameshift block substitution" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "frameshift variant" EXACT [] -synonym: "frameshift_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "frameshift_coding" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "snpEff:FRAME_SHIFT" EXACT VAR [] -synonym: "VAAST:frameshift_variant" EXACT VAR [] -xref: loinc:LA6694-9 "Frameshift" -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:19Z - -[Term] -id: SO:0001590 -name: terminator_codon_variant -alt_id: SO:0001625 -def: "A sequence variant whereby at least one of the bases in the terminator codon is changed." [SO:ke] -comment: The terminal codon may be the terminator, or in an incomplete transcript the last available codon. -synonym: "terminal codon variant" EXACT [] -synonym: "terminal_codon_variant" EXACT [] -synonym: "terminator codon variant" EXACT [] -xref: loinc:LA6700-2 "Stop Codon Mutation" -is_a: SO:0001580 ! coding_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:40:37Z - -[Term] -id: SO:0001591 -name: frame_restoring_variant -def: "A sequence variant that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:09Z - -[Term] -id: SO:0001592 -name: minus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base ahead." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "-1 frameshift variant" EXACT [] -synonym: "minus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:30Z - -[Term] -id: SO:0001593 -name: minus_2_frameshift_variant -synonym: "-2 frameshift variant" EXACT [] -synonym: "minus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:41:52Z - -[Term] -id: SO:0001594 -name: plus_1_frameshift_variant -def: "A sequence variant which causes a disruption of the translational reading frame, by shifting one base backward." [http://arjournals.annualreviews.org/doi/pdf/10.1146/annurev.ge.08.120174.001535] -synonym: "+1 frameshift variant" EXACT [] -synonym: "plus 1 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:06Z - -[Term] -id: SO:0001595 -name: plus_2_frameshift_variant -synonym: "+2 frameshift variant" EXACT [] -synonym: "plus 2 frameshift variant" EXACT [] -is_a: SO:0001589 ! frameshift_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:42:23Z - -[Term] -id: SO:0001596 -name: transcript_secondary_structure_variant -def: "A sequence variant within a transcript that changes the secondary structure of the RNA product." [SO:ke] -synonym: "transcript secondary structure variant" EXACT [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:18Z - -[Term] -id: SO:0001597 -name: compensatory_transcript_secondary_structure_variant -def: "A secondary structure variant that compensate for the change made by a previous variant." [SO:ke] -synonym: "compensatory transcript secondary structure variant" EXACT [] -is_a: SO:0001596 ! transcript_secondary_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:43:54Z - -[Term] -id: SO:0001598 -name: translational_product_structure_variant -def: "A sequence variant within the transcript that changes the structure of the translational product." [SO:ke] -synonym: "translational product structure variant" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001564 ! gene_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:17Z - -[Term] -id: SO:0001599 -name: 3D_polypeptide_structure_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "3D polypeptide structure variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:44:46Z - -[Term] -id: SO:0001600 -name: complex_3D_structural_variant -def: "A sequence variant that changes the resulting polypeptide structure." [SO:ke] -synonym: "complex 3D structural variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:13Z - -[Term] -id: SO:0001601 -name: conformational_change_variant -def: "A sequence variant in the CDS region that causes a conformational change in the resulting polypeptide sequence." [SO:ke] -synonym: "conformational change variant" EXACT [] -is_a: SO:0001599 ! 3D_polypeptide_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:45:48Z - -[Term] -id: SO:0001602 -name: complex_change_of_translational_product_variant -synonym: "complex change of translational product variant" EXACT [] -is_a: SO:0001539 ! translational_product_function_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:46:54Z - -[Term] -id: SO:0001603 -name: polypeptide_sequence_variant -def: "A sequence variant with in the CDS that causes a change in the resulting polypeptide sequence." [SO:ke] -synonym: "polypeptide sequence variant" EXACT [] -is_a: SO:0001598 ! translational_product_structure_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:13Z - -[Term] -id: SO:0001604 -name: amino_acid_deletion -def: "A sequence variant within a CDS resulting in the loss of an amino acid from the resulting polypeptide." [SO:ke] -synonym: "amino acid deletion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:36Z - -[Term] -id: SO:0001605 -name: amino_acid_insertion -def: "A sequence variant within a CDS resulting in the gain of an amino acid to the resulting polypeptide." [SO:ke] -synonym: "amino acid insertion" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:47:56Z - -[Term] -id: SO:0001606 -name: amino_acid_substitution -def: "A sequence variant of a codon resulting in the substitution of one amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "amino acid substitution" EXACT [] -synonym: "VAAST:amino_acid_substitution" EXACT VAR [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:17Z - -[Term] -id: SO:0001607 -name: conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a similar amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:48:57Z - -[Term] -id: SO:0001608 -name: non_conservative_amino_acid_substitution -def: "A sequence variant of a codon causing the substitution of a non conservative amino acid for another in the resulting polypeptide." [SO:ke] -synonym: "non conservative amino acid substitution" EXACT [] -is_a: SO:0001606 ! amino_acid_substitution -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:23Z - -[Term] -id: SO:0001609 -name: elongated_polypeptide -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence." [SO:ke] -synonym: "elongated polypeptide" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:49:52Z - -[Term] -id: SO:0001610 -name: elongated_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide C terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:20Z - -[Term] -id: SO:0001611 -name: elongated_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated polypeptide N terminal" EXACT [] -is_a: SO:0001609 ! elongated_polypeptide -created_by: kareneilbeck -creation_date: 2010-03-22T02:50:31Z - -[Term] -id: SO:0001612 -name: elongated_in_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated in frame polypeptide C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:05Z - -[Term] -id: SO:0001613 -name: elongated_out_of_frame_polypeptide_C_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the C terminus." [SO:ke] -synonym: "elongated polypeptide out of frame C terminal" EXACT [] -is_a: SO:0001610 ! elongated_polypeptide_C_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:20Z - -[Term] -id: SO:0001614 -name: elongated_in_frame_polypeptide_N_terminal_elongation -def: "A sequence variant with in the CDS that causes in frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated in frame polypeptide N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:51:49Z - -[Term] -id: SO:0001615 -name: elongated_out_of_frame_polypeptide_N_terminal -def: "A sequence variant with in the CDS that causes out of frame elongation of the resulting polypeptide sequence at the N terminus." [SO:ke] -synonym: "elongated out of frame N terminal" EXACT [] -is_a: SO:0001611 ! elongated_polypeptide_N_terminal -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:05Z - -[Term] -id: SO:0001616 -name: polypeptide_fusion -def: "A sequence variant that causes a fusion of two polypeptide sequences." [SO:ke] -synonym: "polypeptide fusion" EXACT [] -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:52:43Z - -[Term] -id: SO:0001617 -name: polypeptide_truncation -def: "A sequence variant of the CD that causes a truncation of the resulting polypeptide." [SO:ke] -synonym: "polypeptide truncation" EXACT [] -xref: EBI:www.ebi.ac.uk/mutations/recommendations/mutevent.html -is_a: SO:0001603 ! polypeptide_sequence_variant -created_by: kareneilbeck -creation_date: 2010-03-22T02:53:07Z - -[Term] -id: SO:0001618 -name: inactive_catalytic_site -def: "A sequence variant that causes the inactivation of a catalytic site with respect to a reference sequence." [SO:ke] -synonym: "inactive catalytic site" EXACT [] -is_a: SO:0001560 ! inactive_ligand_binding_site -created_by: kareneilbeck -creation_date: 2010-03-22T03:06:14Z - -[Term] -id: SO:0001619 -name: non_coding_transcript_variant -def: "A transcript variant of a non coding RNA gene." [SO:ke] -comment: Within non-coding gene - Located within a gene that does not code for a protein. -synonym: "ANNOVAR:ncRNA" RELATED VAR [http:http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "nc transcript variant" EXACT [] -synonym: "non coding transcript variant" EXACT [] -synonym: "within_non_coding_gene" EXACT dbsnp [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:23Z - -[Term] -id: SO:0001620 -name: mature_miRNA_variant -def: "A transcript variant located with the sequence of the mature miRNA." [SO:ke] -comment: EBI term: Within mature miRNA - Located within a microRNA. -synonym: "mature miRNA variant" EXACT [] -synonym: "within_mature_miRNA" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001619 ! non_coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:16:58Z - -[Term] -id: SO:0001621 -name: NMD_transcript_variant -def: "A variant in a transcript that is the target of NMD." [SO:ke] -synonym: "NMD transcript variant" EXACT [] -synonym: "NMD_transcript" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:20:40Z - -[Term] -id: SO:0001622 -name: UTR_variant -def: "A transcript variant that is located within the UTR." [SO:ke] -synonym: "UTR variant" EXACT [] -synonym: "UTR_" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001791 ! exon_variant -is_a: SO:0001968 ! coding_transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:22:58Z - -[Term] -id: SO:0001623 -name: 5_prime_UTR_variant -def: "A UTR variant of the 5' UTR." [SO:ke] -comment: EBI term: 5prime UTR variations - In 5prime UTR (untranslated region). -synonym: "5'UTR variant" EXACT [] -synonym: "5PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "ANNOVAR:UTR5" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "five prime UTR variant" EXACT [] -synonym: "snpEff:UTR_5_PRIME" EXACT VAR [] -synonym: "untranslated-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "VAAST:five_prime_UTR_variant" EXACT VAR [] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:29Z - -[Term] -id: SO:0001624 -name: 3_prime_UTR_variant -def: "A UTR variant of the 3' UTR." [SO:ke] -comment: EBI term 3prime UTR variations - In 3prime UTR. -synonym: "3'UTR variant" EXACT [] -synonym: "3PRIME_UTR" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "ANNOVAR:UTR3" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "snpEff:UTR_3_PRIME" EXACT VAR [] -synonym: "three prime UTR variant" EXACT [] -synonym: "untranslated-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "VAAST:three_prime_UTR_variant" EXACT VAR [] -is_a: SO:0001622 ! UTR_variant -created_by: kareneilbeck -creation_date: 2010-03-23T11:23:54Z - -[Term] -id: SO:0001626 -name: incomplete_terminal_codon_variant -def: "A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed." [SO:ke] -comment: EBI term: Partial codon - Located within the final, incomplete codon of a transcript with a shortened coding sequence where the end is unknown. -synonym: "incomplete terminal codon variant" EXACT [] -synonym: "partial_codon" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001590 ! terminator_codon_variant -is_a: SO:0001650 ! inframe_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:51:15Z - -[Term] -id: SO:0001627 -name: intron_variant -def: "A transcript variant occurring within an intron." [SO:ke] -comment: EBI term: Intronic variations - In intron. -synonym: "ANNOVAR:intronic" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "intron variant" EXACT [] -synonym: "intron_" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -synonym: "intronic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "snpEff:INTRON" EXACT VAR [] -synonym: "VAAST:intron_variant" EXACT VAR [] -is_a: SO:0001576 ! transcript_variant -created_by: kareneilbeck -creation_date: 2010-03-23T03:52:38Z - -[Term] -id: SO:0001628 -name: intergenic_variant -def: "A sequence variant located in the intergenic region, between genes." [SO:ke] -comment: EBI term Intergenic variations - More than 5 kb either upstream or downstream of a transcript. -synonym: "ANNOVAR:intergenic" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "intergenic" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "intergenic variant" EXACT [] -synonym: "snpEff:INTERGENIC" EXACT VAR [] -is_a: SO:0001878 ! feature_variant -created_by: kareneilbeck -creation_date: 2010-03-23T05:07:37Z - -[Term] -id: SO:0001629 -name: splice_site_variant -def: "A sequence variant that changes the first two or last two bases of an intron, or the 5th base from the start of the intron in the orientation of the transcript." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term - essential splice site - In the first 2 or the last 2 base pairs of an intron. The 5th base is on the donor (5') side of the intron. Updated to b in line with Cancer Genome Project at the Sanger. -synonym: "essential_splice_site" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "splice site variant" EXACT [] -is_a: SO:0001568 ! splicing_variant -is_a: SO:0001627 ! intron_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:42:00Z - -[Term] -id: SO:0001630 -name: splice_region_variant -def: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." [http://ensembl.org/info/docs/variation/index.html] -comment: EBI term: splice site - 1-3 bps into an exon or 3-8 bps into an intron. -synonym: "ANNOVAR:splicing" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "splice region variant" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "VAAST:splice_region_variant" EXACT VAR [] -is_a: SO:0001568 ! splicing_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:46:02Z - -[Term] -id: SO:0001631 -name: upstream_gene_variant -def: "A sequence variant located 5' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "ANNOVAR:upstream" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "snpEff:UPSTREAM" EXACT VAR [] -synonym: "upstream gene variant" EXACT [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:13Z - -[Term] -id: SO:0001632 -name: downstream_gene_variant -def: "A sequence variant located 3' of a gene." [SO:ke] -comment: Different groups annotate up and downstream to different lengths. The subtypes are specific and are backed up with cross references. -synonym: "ANNOVAR:downstream" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "downstream gene variant" EXACT [] -synonym: "snpEff:DOWNSTREAM" EXACT VAR [] -is_a: SO:0001628 ! intergenic_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:49:38Z - -[Term] -id: SO:0001633 -name: 5KB_downstream_variant -def: "A sequence variant located within 5 KB of the end of a gene." [SO:ke] -comment: EBI term Downstream variations - Within 5 kb downstream of the 3prime end of a transcript. -synonym: "5KB downstream variant" EXACT [] -synonym: "downstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -synonym: "within 5KB downstream" RELATED [] -is_a: SO:0001632 ! downstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:16Z - -[Term] -id: SO:0001634 -name: 500B_downstream_variant -def: "A sequence variant located within a half KB of the end of a gene." [SO:ke] -synonym: "500B downstream variant" EXACT [] -synonym: "near-gene-3" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001633 ! 5KB_downstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:50:42Z - -[Term] -id: SO:0001635 -name: 5KB_upstream_variant -def: "A sequence variant located within 5KB 5' of a gene." [SO:ke] -comment: EBI term Upstream variations - Within 5 kb upstream of the 5prime end of a transcript. -synonym: "5kb upstream variant" EXACT [] -synonym: "upstream" EXACT ebi_variants [http://ensembl.org/info/docs/variation/index.html] -is_a: SO:0001631 ! upstream_gene_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:06Z - -[Term] -id: SO:0001636 -name: 2KB_upstream_variant -def: "A sequence variant located within 2KB 5' of a gene." [SO:ke] -synonym: "2KB upstream variant" EXACT [] -synonym: "near-gene-5" EXACT dbsnp [ftp://ftp.ncbi.nih.gov/snp/specs/docsum_3.1.xsd] -is_a: SO:0001635 ! 5KB_upstream_variant -created_by: kareneilbeck -creation_date: 2010-03-24T09:51:22Z - -[Term] -id: SO:0001637 -name: rRNA_gene -def: "A gene that encodes for ribosomal RNA." [SO:ke] -synonym: "rDNA" EXACT [] -synonym: "rRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:10:32Z - -[Term] -id: SO:0001638 -name: piRNA_gene -def: "A gene that encodes for an piwi associated RNA." [SO:ke] -synonym: "piRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:11:36Z - -[Term] -id: SO:0001639 -name: RNase_P_RNA_gene -def: "A gene that encodes an RNase P RNA." [SO:ke] -synonym: "RNase P RNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:23Z - -[Term] -id: SO:0001640 -name: RNase_MRP_RNA_gene -def: "A gene that encodes a RNase_MRP_RNA." [SO:ke] -synonym: "RNase MRP RNA gene" RELATED [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:13:58Z - -[Term] -id: SO:0001641 -name: lincRNA_gene -def: "A gene that encodes large intervening non-coding RNA." [SO:ke] -synonym: "lincRNA gene" EXACT [] -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-04-21T10:14:24Z - -[Term] -id: SO:0001642 -name: mathematically_defined_repeat -def: "A mathematically defined repeat (MDR) is a experimental feature that is determined by querying overlapping oligomers of length k against a database of shotgun sequence data and identifying regions in the query sequence that exceed a statistically determined threshold of repetitiveness." [SO:jestill] -comment: Mathematically defined repeat regions are determined without regard to the biological origin of the repetitive region. The repeat units of a MDR are the overlapping oligomers of size k that were used to for the query. Tools that can annotate mathematically defined repeats include Tallymer (Kurtz et al 2008, BMC Genomics: 517) and RePS (Wang et al, Genome Res 12(5): 824-831.). -synonym: "mathematically defined repeat" EXACT [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-05-03T11:50:14Z - -[Term] -id: SO:0001643 -name: telomerase_RNA_gene -def: "A telomerase RNA gene is a non coding RNA gene the RNA product of which is a component of telomerase." [SO:ke] -synonym: "Telomerase RNA component" EXACT [] -synonym: "telomerase RNA gene" EXACT [] -synonym: "TERC" EXACT [] -xref: http:http://en.wikipedia.org/wiki/Telomerase_RNA_component "wikipedia" -is_a: SO:0001263 ! ncRNA_gene -created_by: kareneilbeck -creation_date: 2010-05-18T05:26:38Z - -[Term] -id: SO:0001644 -name: targeting_vector -def: "An engineered vector that is able to take part in homologous recombination in a host with the intent of introducing site specific genomic modifications." [MGD:tm, PMID:10354467] -synonym: "targeting vector" RELATED [] -is_a: SO:0000440 ! vector_replicon -is_a: SO:0000804 ! engineered_region -intersection_of: SO:0000440 ! vector_replicon -intersection_of: has_part SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000783 ! engineered -relationship: has_part SO:0000853 ! homologous_region -relationship: has_quality SO:0000783 ! engineered -created_by: kareneilbeck -creation_date: 2010-05-28T02:05:25Z - -[Term] -id: SO:0001645 -name: genetic_marker -def: "A measurable sequence feature that varies within a population." [SO:db] -synonym: "genetic marker" RELATED [] -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-05-28T02:33:07Z - -[Term] -id: SO:0001646 -name: DArT_marker -def: "A genetic marker, discovered using Diversity Arrays Technology (DArT) technology." [SO:ke] -synonym: "DArT marker" EXACT [] -is_a: SO:0001645 ! genetic_marker -created_by: kareneilbeck -creation_date: 2010-05-28T02:34:43Z - -[Term] -id: SO:0001647 -name: kozak_sequence -def: "A kind of ribosome entry site, specific to Eukaryotic organisms that overlaps part of both 5' UTR and CDS sequence." [SO:ke] -subset: SOFA -synonym: "kozak consensus" EXACT [] -synonym: "kozak consensus sequence" EXACT [] -synonym: "kozak sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Kozak_consensus_sequence "wikipedia" -is_a: SO:0000139 ! ribosome_entry_site -created_by: kareneilbeck -creation_date: 2010-06-07T03:12:20Z - -[Term] -id: SO:0001648 -name: nested_transposon -def: "A transposon that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested transposon" EXACT [] -is_a: SO:0000101 ! transposable_element -created_by: kareneilbeck -creation_date: 2010-06-23T03:22:57Z - -[Term] -id: SO:0001649 -name: nested_repeat -def: "A repeat that is disrupted by the insertion of another element." [SO:ke] -synonym: "nested repeat" RELATED [] -is_a: SO:0000657 ! repeat_region -created_by: kareneilbeck -creation_date: 2010-06-23T03:24:55Z - -[Term] -id: SO:0001650 -name: inframe_variant -def: "A sequence variant which does not cause a disruption of the translational reading frame." [SO:ke] -synonym: "ANNOVAR:nonframeshift block substitution" RELATED VAR [http://www.openbioinformatics.org/annovar/annovar_download.html] -synonym: "cds-indel" EXACT dbsnp [] -synonym: "inframe variant" EXACT [] -synonym: "VAAST:inframe_variant" EXACT VAR [] -is_a: SO:0001818 ! protein_altering_variant -created_by: kareneilbeck -creation_date: 2010-07-19T01:24:44Z - -[Term] -id: SO:0001653 -name: retinoic_acid_responsive_element -def: "A transcription factor binding site of variable direct repeats of the sequence PuGGTCA spaced by five nucleotides (DR5) found in the promoters of retinoic acid-responsive genes, to which retinoic acid receptors bind." [PMID:11327309, PMID:19917671] -synonym: "RARE" EXACT [] -synonym: "retinoic acid responsive element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000167 ! promoter -created_by: kareneilbeck -creation_date: 2010-08-03T10:46:12Z - -[Term] -id: SO:0001654 -name: nucleotide_to_protein_binding_site -def: "A binding site that, in the nucleotide molecule, interacts selectively and non-covalently with polypeptide residues." [SO:ke] -subset: SOFA -synonym: "nucleotide to protein binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:26:05Z - -[Term] -id: SO:0001655 -name: nucleotide_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with nucleotide residues." [SO:cb] -comment: See GO:0000166 : nucleotide binding. -synonym: "np_bind" EXACT BS [uniprot:feature] -synonym: "nucleotide binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:30:04Z - -[Term] -id: SO:0001656 -name: metal_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with metal ions." [SO:cb] -comment: See GO:0046872 : metal ion binding. -synonym: "metal binding site" RELATED [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:31:42Z - -[Term] -id: SO:0001657 -name: ligand_binding_site -def: "A binding site that, in the molecule, interacts selectively and non-covalently with a small molecule such as a drug, or hormone." [SO:ke] -synonym: "ligand binding site" EXACT [] -is_a: SO:0000409 ! binding_site -created_by: kareneilbeck -creation_date: 2010-08-03T12:32:58Z - -[Term] -id: SO:0001658 -name: nested_tandem_repeat -def: "An NTR is a nested repeat of two distinct tandem motifs interspersed with each other." [SO:AF] -comment: Tracker ID: 3052459. -synonym: "nested tandem repeat" EXACT [] -synonym: "NTR" EXACT [] -is_a: SO:0001649 ! nested_repeat -created_by: kareneilbeck -creation_date: 2010-08-26T09:36:16Z - -[Term] -id: SO:0001659 -name: promoter_element -synonym: "promoter element" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: overlaps SO:0000235 ! TF_binding_site -created_by: kareneilbeck -creation_date: 2010-10-01T11:48:32Z - -[Term] -id: SO:0001660 -name: core_promoter_element -synonym: "core promoter element" EXACT [] -synonym: "general transcription factor binding site" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T11:49:03Z - -[Term] -id: SO:0001661 -name: RNA_polymerase_II_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase II." [PMID:16858867] -synonym: "RNA polymerase II TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:42:12Z - -[Term] -id: SO:0001662 -name: RNA_polymerase_III_TATA_box -def: "A TATA box core promoter of a gene transcribed by RNA polymerase III." [SO:ke] -synonym: "RNA polymerase III TATA box" EXACT [] -is_a: SO:0000174 ! TATA_box -relationship: part_of SO:0000171 ! RNApol_III_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:43:16Z - -[Term] -id: SO:0001663 -name: BREd_motif -def: "A core TRNA polymerase II promoter element with consensus (G/A)T(T/G/A)(T/A)(G/T)(T/G)(T/G)." [PMID:16858867] -synonym: "BREd" EXACT [] -synonym: "BREd motif" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:49:55Z - -[Term] -id: SO:0001664 -name: DCE -def: "A discontinuous core element of RNA polymerase II transcribed genes, situated downstream of the TSS. It is composed of three sub elements: SI, SII and SIII." [PMID:16858867] -synonym: "downstream core element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0001669 ! RNApol_II_core_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T02:56:41Z - -[Term] -id: SO:0001665 -name: DCE_SI -def: "A sub element of the DCE core promoter element, with consensus sequence CTTC." [PMID:16858867, SO:ke] -synonym: "DCE SI" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:10Z - -[Term] -id: SO:0001666 -name: DCE_SII -def: "A sub element of the DCE core promoter element with consensus sequence CTGT." [PMID:16858867, SO:ke] -synonym: "DCE SII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:30Z - -[Term] -id: SO:0001667 -name: DCE_SIII -def: "A sub element of the DCE core promoter element with consensus sequence AGC." [PMID:16858867, SO:ke] -synonym: "DCE SIII" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001664 ! DCE -created_by: kareneilbeck -creation_date: 2010-10-01T03:00:44Z - -[Term] -id: SO:0001668 -name: proximal_promoter_element -def: "DNA segment that ranges from about -250 to -40 relative to +1 of RNA transcription start site, where sequence specific DNA-binding transcription factors binds, such as Sp1, CTF (CCAAT-binding transcription factor), and CBF (CCAAT-box binding factor)." [PMID:12515390, PMID:9679020, SO:ml] -synonym: "proximal promoter element" RELATED [] -synonym: "specific transcription factor binding site" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:10:23Z - -[Term] -id: SO:0001669 -name: RNApol_II_core_promoter -def: "The minimal portion of the promoter required to properly initiate transcription in RNA polymerase II transcribed genes." [PMID:16858867] -synonym: "RNApol II core promoter" EXACT [] -is_a: SO:0000170 ! RNApol_II_promoter -created_by: kareneilbeck -creation_date: 2010-10-01T03:13:41Z - -[Term] -id: SO:0001670 -name: distal_promoter_element -synonym: "distal promoter element" RELATED [] -is_a: SO:0001678 ! regulatory_promoter_element -created_by: kareneilbeck -creation_date: 2010-10-01T03:21:08Z - -[Term] -id: SO:0001671 -name: bacterial_RNApol_promoter_sigma_70 -synonym: "bacterial RNA polymerase promoter sigma 70" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:41:34Z - -[Term] -id: SO:0001672 -name: bacterial_RNApol_promoter_sigma54 -synonym: "bacterial RNA polymerase promoter sigma54" EXACT [] -is_a: SO:0000613 ! bacterial_RNApol_promoter -created_by: kareneilbeck -creation_date: 2010-10-06T01:42:37Z - -[Term] -id: SO:0001673 -name: minus_12_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 12 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:44:57Z - -[Term] -id: SO:0001674 -name: minus_24_signal -def: "A conserved region about 12-bp upstream of the start point of bacterial transcription units, involved with sigma factor 54." [PMID:18331472] -synonym: "minus 24 signal" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0001672 ! bacterial_RNApol_promoter_sigma54 -created_by: kareneilbeck -creation_date: 2010-10-06T01:45:24Z - -[Term] -id: SO:0001675 -name: A_box_type_1 -def: "An A box within an RNA polymerase III type 1 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 1" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:43:43Z - -[Term] -id: SO:0001676 -name: A_box_type_2 -def: "An A box within an RNA polymerase III type 2 promoter." [SO:ke] -comment: The A box can be found in the promoters of type 1 and type 2 (pol III) so sub-typing here allows the part of relationship of the subtypes to remain true. -synonym: "A box type 2" RELATED [] -is_a: SO:0000619 ! A_box -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 -created_by: kareneilbeck -creation_date: 2010-10-06T05:44:18Z - -[Term] -id: SO:0001677 -name: intermediate_element -def: "A core promoter region of RNA polymerase III type 1 promoters." [PMID:12381659] -synonym: "IE" EXACT [] -synonym: "intermediate element" RELATED [] -is_a: SO:0001660 ! core_promoter_element -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 -created_by: kareneilbeck -creation_date: 2010-10-06T05:52:03Z - -[Term] -id: SO:0001678 -name: regulatory_promoter_element -def: "A promoter element that is not part of the core promoter, but provides the promoter with a specific regulatory region." [PMID:12381659] -synonym: "regulatory promoter element" RELATED [] -is_a: SO:0001659 ! promoter_element -created_by: kareneilbeck -creation_date: 2010-10-07T04:39:48Z - -[Term] -id: SO:0001679 -name: transcription_regulatory_region -def: "A regulatory region that is involved in the control of the process of transcription." [SO:ke] -subset: SOFA -synonym: "transcription regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:49:35Z - -[Term] -id: SO:0001680 -name: translation_regulatory_region -def: "A regulatory region that is involved in the control of the process of translation." [SO:ke] -synonym: "translation regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:52:45Z - -[Term] -id: SO:0001681 -name: recombination_regulatory_region -def: "A regulatory region that is involved in the control of the process of recombination." [SO:ke] -synonym: "recombination regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:53:35Z - -[Term] -id: SO:0001682 -name: replication_regulatory_region -def: "A regulatory region that is involved in the control of the process of nucleotide replication." [SO:ke] -synonym: "replication regulatory region" RELATED [] -is_a: SO:0005836 ! regulatory_region -created_by: kareneilbeck -creation_date: 2010-10-12T03:54:09Z - -[Term] -id: SO:0001683 -name: sequence_motif -def: "A sequence motif is a nucleotide or amino-acid sequence pattern that may have biological significance." [http://en.wikipedia.org/wiki/Sequence_motif] -subset: SOFA -synonym: "sequence motif" RELATED [] -xref: http://en.wikipedia.org/wiki/Sequence_motif "wikipedia" -is_a: SO:0001411 ! biological_region -created_by: kareneilbeck -creation_date: 2010-10-14T04:13:22Z - -[Term] -id: SO:0001684 -name: experimental_feature_attribute -def: "An attribute of an experimentally derived feature." [SO:ke] -synonym: "experimental feature attribute" RELATED [] -is_a: SO:0000733 ! feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:22:23Z - -[Term] -id: SO:0001685 -name: score -def: "The score of an experimentally derived feature such as a p-value." [SO:ke] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:23:16Z - -[Term] -id: SO:0001686 -name: quality_value -def: "An experimental feature attribute that defines the quality of the feature in a quantitative way, such as a phred quality score." [SO:ke] -synonym: "quality value" RELATED [] -is_a: SO:0001684 ! experimental_feature_attribute -created_by: kareneilbeck -creation_date: 2010-10-28T02:24:11Z - -[Term] -id: SO:0001687 -name: restriction_enzyme_recognition_site -def: "The nucleotide region (usually a palindrome) that is recognized by a restriction enzyme. This may or may not be equal to the restriction enzyme binding site." [SO:ke] -synonym: "restriction endonuclease recognition site" EXACT [] -synonym: "restriction enzyme recognition site" EXACT [] -is_a: SO:0001954 ! restriction_enzyme_region -created_by: kareneilbeck -creation_date: 2010-10-29T12:29:57Z - -[Term] -id: SO:0001688 -name: restriction_enzyme_cleavage_junction -def: "The boundary at which a restriction enzyme breaks the nucleotide sequence." [SO:ke] -synonym: "restriction enzyme cleavage junction" EXACT [] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:35:02Z - -[Term] -id: SO:0001689 -name: five_prime_restriction_enzyme_junction -def: "The restriction enzyme cleavage junction on the 5' strand of the nucleotide sequence." [SO:ke] -synonym: "5' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:36:24Z - -[Term] -id: SO:0001690 -name: three_prime_restriction_enzyme_junction -synonym: "3' restriction enzyme junction" EXACT [] -is_a: SO:0001694 ! single_strand_restriction_enzyme_cleavage_site -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:37:52Z - -[Term] -id: SO:0001691 -name: blunt_end_restriction_enzyme_cleavage_site -synonym: "blunt end restriction enzyme cleavage site" EXACT [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:39:53Z - -[Term] -id: SO:0001692 -name: sticky_end_restriction_enzyme_cleavage_site -synonym: "sticky end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001687 ! restriction_enzyme_recognition_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:40:50Z - -[Term] -id: SO:0001693 -name: blunt_end_restriction_enzyme_cleavage_junction -def: "A restriction enzyme cleavage site where both strands are cut at the same position." [SO:ke] -synonym: "blunt end restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -relationship: part_of SO:0001691 ! blunt_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:43:14Z - -[Term] -id: SO:0001694 -name: single_strand_restriction_enzyme_cleavage_site -def: "A restriction enzyme cleavage site whereby only one strand is cut." [SO:ke] -synonym: "single strand restriction enzyme cleavage site" RELATED [] -is_a: SO:0001688 ! restriction_enzyme_cleavage_junction -created_by: kareneilbeck -creation_date: 2010-10-29T12:44:48Z - -[Term] -id: SO:0001695 -name: restriction_enzyme_single_strand_overhang -def: "A terminal region of DNA sequence where the end of the region is not blunt ended." [SO:ke] -synonym: "single strand overhang" EXACT [] -synonym: "sticky end" RELATED [] -is_a: SO:0001954 ! restriction_enzyme_region -relationship: part_of SO:0001692 ! sticky_end_restriction_enzyme_cleavage_site -created_by: kareneilbeck -creation_date: 2010-10-29T12:48:35Z - -[Term] -id: SO:0001696 -name: experimentally_defined_binding_region -def: "A region that has been implicated in binding although the exact coordinates of binding may be unknown." [SO:ke] -synonym: "experimentally defined binding region" RELATED [] -is_a: SO:0001410 ! experimental_feature -created_by: kareneilbeck -creation_date: 2010-11-02T11:39:59Z - -[Term] -id: SO:0001697 -name: ChIP_seq_region -def: "A region of sequence identified by CHiP seq technology to contain a protein binding site." [SO:ke] -synonym: "ChIP seq region" RELATED [] -is_a: SO:0001696 ! experimentally_defined_binding_region -relationship: contains SO:0000410 ! protein_binding_site -created_by: kareneilbeck -creation_date: 2010-11-02T11:43:07Z - -[Term] -id: SO:0001698 -name: ASPE_primer -def: "\"A primer containing an SNV at the 3' end for accurate genotyping." [http://www.ncbi.nlm.nih.gov/pubmed/11252801] -synonym: "allele specific primer extension primer" EXACT [] -synonym: "ASPE primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:25:21Z - -[Term] -id: SO:0001699 -name: dCAPS_primer -def: "A primer with one or more mismatches to the DNA template corresponding to a position within a restriction enzyme recognition site." [http://www.ncbi.nlm.nih.gov/pubmed/9628033] -synonym: "dCAPS primer" EXACT [] -synonym: "derived cleaved amplified polymorphic primer" EXACT [] -is_a: SO:0000112 ! primer -created_by: kareneilbeck -creation_date: 2010-11-11T03:27:09Z - -[Term] -id: SO:0001700 -name: histone_modification -def: "Histone modification is a post translationally modified region whereby residues of the histone protein are modified by methylation, acetylation, phosphorylation, ubiquitination, sumoylation, citrullination, or ADP-ribosylation." [http:en.wikipedia.org/wiki/Histone] -synonym: "histone modification" EXACT [] -synonym: "histone modification site" RELATED [] -is_a: SO:0001089 ! post_translationally_modified_region -is_a: SO:0001720 ! epigenetically_modified_region -intersection_of: SO:0001089 ! post_translationally_modified_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-31T10:22:08Z - -[Term] -id: SO:0001701 -name: histone_methylation_site -def: "A histone modification site where the modification is the methylation of the residue." [SO:ke] -synonym: "histone methylation" EXACT [] -synonym: "histone methylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:02Z - -[Term] -id: SO:0001702 -name: histone_acetylation_site -def: "A histone modification where the modification is the acylation of the residue." [SO:ke] -synonym: "histone acetylation" EXACT [] -synonym: "histone acetylation site" EXACT [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-03-31T10:23:27Z - -[Term] -id: SO:0001703 -name: H3K9_acetylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 acetylation site" EXACT [] -synonym: "H3K9Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:05Z - -[Term] -id: SO:0001704 -name: H3K14_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K14 acetylation site" EXACT [] -synonym: "H3K14Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:25:53Z - -[Term] -id: SO:0001705 -name: H3K4_monomethylation_site -def: "A kind of histone modification, whereby the 4th residue (a lysine), from the start of the H3 protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 mono-methylation site" EXACT [] -synonym: "H3K4me1" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:28:14Z - -[Term] -id: SO:0001706 -name: H3K4_trimethylation -def: "A kind of histone modification site, whereby the 4th residue (a lysine), from the start of the H3 protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K4 tri-methylation" EXACT [] -synonym: "H3K4me3" RELATED [] -is_a: SO:0001734 ! H3K4_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:29:12Z - -[Term] -id: SO:0001707 -name: H3K9_trimethylation_site -def: "A kind of histone modification site, whereby the 9th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K9 tri-methylation site" EXACT [] -synonym: "H3K9Me3" RELATED [] -is_a: SO:0001736 ! H3K9_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:30:34Z - -[Term] -id: SO:0001708 -name: H3K27_monomethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2K27 mono-methylation site" EXACT [] -synonym: "H2K27Me1" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:31:54Z - -[Term] -id: SO:0001709 -name: H3K27_trimethylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K27 tri-methylation site" EXACT [] -synonym: "H3K27Me3" RELATED [] -is_a: SO:0001732 ! H3K27_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:32:41Z - -[Term] -id: SO:0001710 -name: H3K79_monomethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is mono- methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 mono-methylation site" EXACT [] -synonym: "H3K79me1" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:33:42Z - -[Term] -id: SO:0001711 -name: H3K79_dimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is di-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 di-methylation site" EXACT [] -synonym: "H3K79Me2" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:34:39Z - -[Term] -id: SO:0001712 -name: H3K79_trimethylation_site -def: "A kind of histone modification site, whereby the 79th residue (a lysine), from the start of the H3 histone protein is tri-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H3K79 tri-methylation site" EXACT [] -synonym: "H3K79Me3" RELATED [] -is_a: SO:0001735 ! H3K79_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:35:30Z - -[Term] -id: SO:0001713 -name: H4K20_monomethylation_site -def: "A kind of histone modification site, whereby the 20th residue (a lysine), from the start of the H4histone protein is mono-methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H4K20 mono-methylation site" EXACT [] -synonym: "H4K20Me1" RELATED [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:36:43Z - -[Term] -id: SO:0001714 -name: H2BK5_monomethylation_site -def: "A kind of histone modification site, whereby the 5th residue (a lysine), from the start of the H2B protein is methylated." [http://en.wikipedia.org/wiki/Histone] -synonym: "H2BK5 mono-methylation site" EXACT [] -is_a: SO:0001701 ! histone_methylation_site -created_by: kareneilbeck -creation_date: 2010-03-31T10:38:12Z - -[Term] -id: SO:0001715 -name: ISRE -def: "An ISRE is a transcriptional cis regulatory region, containing the consensus region: YAGTTTC(A/T)YTTTYCC, responsible for increased transcription via interferon binding." [http://genesdev.cshlp.org/content/2/4/383.abstrac] -comment: Term requested via tracker (2981725) by Alan Ruttenberg, April 2010. It has been described as both an enhancer and a promoter, so the parent is the more general term. -synonym: "interferon stimulated response element" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region -created_by: kareneilbeck -creation_date: 2010-04-05T11:15:08Z - -[Term] -id: SO:0001716 -name: histone_ubiqitination_site -def: "A histone modification site where ubiquitin may be added." [SO:ke] -synonym: "histone ubiquitination site" RELATED [] -is_a: SO:0001700 ! histone_modification -created_by: kareneilbeck -creation_date: 2010-04-13T10:12:18Z - -[Term] -id: SO:0001717 -name: H2B_ubiquitination_site -def: "A histone modification site on H2B where ubiquitin may be added." [SO:ke] -synonym: "H2BUbiq" RELATED [] -is_a: SO:0001716 ! histone_ubiqitination_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:13:28Z - -[Term] -id: SO:0001718 -name: H3K18_acetylation_site -def: "A kind of histone modification site, whereby the 14th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K18 acetylation site" EXACT [] -synonym: "H3K18Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:39:35Z - -[Term] -id: SO:0001719 -name: H3K23_acylation_site -def: "A kind of histone modification, whereby the 23rd residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K23 acylation site" RELATED [] -synonym: "H3K23Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:42:45Z - -[Term] -id: SO:0001720 -name: epigenetically_modified_region -def: "A biological region implicated in inherited changes caused by mechanisms other than changes in the underlying DNA sequence." [http://en.wikipedia.org/wiki/Epigenetics, SO:ke] -subset: SOFA -synonym: "epigenetically modified region" RELATED [] -is_a: SO:0001411 ! biological_region -intersection_of: SO:0001411 ! biological_region -intersection_of: has_quality SO:0000133 ! epigenetically_modified -relationship: has_quality SO:0000133 ! epigenetically_modified -created_by: kareneilbeck -creation_date: 2010-03-27T12:02:29Z - -[Term] -id: SO:0001721 -name: H3K27_acylation_site -def: "A kind of histone modification site, whereby the 27th residue (a lysine), from the start of the H3 histone protein is acylated." [SO:ke] -synonym: "H3K27 acylation site" EXACT [] -synonym: "H3K27Ac" RELATED [] -is_a: SO:0001973 ! histone_3_acetylation_site -created_by: kareneilbeck -creation_date: 2010-04-13T10:44:09Z - -[Term] -id: SO:0001722 -name: H3K36_monomethylation_site -def: "A kind of histone modification site, whereby the 36th residue (a lysine), from the start of the H3 histone protein is mono-methylated." [SO:ke] -synonym: "H3K36 mono-methylation site" EXACT [] -synonym: "H3K36. -synonym: "transcriptionally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000124 -name: transcriptionally_constitutive -def: "Expressed in relatively constant amounts without regard to cellular environmental conditions such as the concentration of a particular substrate." [SO:ke] -synonym: "transcriptionally constitutive" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000125 -name: transcriptionally_induced -def: "An inducer molecule is required for transcription to occur." [SO:ke] -synonym: "transcriptionally induced" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000126 -name: transcriptionally_repressed -def: "A repressor molecule is required for transcription to stop." [SO:ke] -synonym: "transcriptionally repressed" EXACT [] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000127 -name: silenced_gene -def: "A gene that is silenced." [SO:xp] -synonym: "silenced gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000893 ! silenced - -[Term] -id: SO:0000128 -name: gene_silenced_by_DNA_modification -def: "A gene that is silenced by DNA modification." [SO:xp] -synonym: "gene silenced by DNA modification" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000129 -name: gene_silenced_by_DNA_methylation -def: "A gene that is silenced by DNA methylation." [SO:xp] -synonym: "gene silenced by DNA methylation" EXACT [] -synonym: "methylation-silenced gene" EXACT [] -intersection_of: SO:0000128 ! gene_silenced_by_DNA_modification -intersection_of: has_quality SO:0000895 ! silenced_by_DNA_methylation - -[Term] -id: SO:0000130 -name: post_translationally_regulated -def: "An attribute describing a gene that is regulated after it has been translated." [SO:ke] -synonym: "post translationally regulated" EXACT [] -synonym: "post-translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000131 -name: translationally_regulated -def: "An attribute describing a gene that is regulated as it is translated." [SO:ke] -synonym: "translationally regulated" EXACT [] -is_a: SO:0000119 ! regulated - -[Term] -id: SO:0000132 -name: reverse_primer -def: "A single stranded oligo used for polymerase chain reaction." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "DNA reverse primer" EXACT [] -synonym: "reverse DNA primer" EXACT [] -synonym: "reverse primer" EXACT [] -synonym: "reverse primer oligo" EXACT [] -synonym: "reverse primer oligonucleotide" EXACT [] -synonym: "reverse primer sequence" EXACT [] -intersection_of: SO:0000112 ! primer -intersection_of: has_quality SO:0001031 ! reverse - -[Term] -id: SO:0000133 -name: epigenetically_modified -def: "This attribute describes a gene where heritable changes other than those in the DNA sequence occur. These changes include: modification to the DNA (such as DNA methylation, the covalent modification of cytosine), and post-translational modification of histones." [SO:ke] -synonym: "epigenetically modified" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000134 -name: imprinted -def: "Imprinted genes are epigenetically modified genes that are expressed monoallelically according to their parent of origin." [SO:ke] -xref: http:http\://en.wikipedia.org/wiki/Genomic_imprinting "wiki" -is_a: SO:0000119 ! regulated -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000135 -name: maternally_imprinted -def: "The maternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "maternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000136 -name: paternally_imprinted -def: "The paternal copy of the gene is modified, rendering it transcriptionally silent." [SO:ke] -synonym: "paternally imprinted" EXACT [] -is_a: SO:0000134 ! imprinted - -[Term] -id: SO:0000137 -name: allelically_excluded -def: "Allelic exclusion is a process occuring in diploid organisms, where a gene is inactivated and not expressed in that cell." [SO:ke] -comment: Exapmles are x-innactivation and immunoglobulin formation. -synonym: "allelically excluded" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000138 -name: gene_rearranged_at_DNA_level -def: "An epigenetically modified gene, rearranged at the DNA level." [SO:xp] -synonym: "gene rearranged at DNA level" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000904 ! rearranged_at_DNA_level - -[Term] -id: SO:0000139 -name: ribosome_entry_site -def: "Region in mRNA where ribosome assembles." [SO:ke] -subset: SOFA -synonym: "ribosome entry site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000140 -name: attenuator -def: "A sequence segment located within the five prime end of an mRNA that causes premature termination of translation." [SO:as] -subset: SOFA -synonym: "attenuator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Attenuator "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000141 -name: terminator -def: "The sequence of DNA located either at the end of the transcript that causes RNA polymerase to terminate transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "terminator sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Terminator_(genetics) "wiki" -is_a: SO:0005836 ! regulatory_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000142 -name: DNA_sequence_secondary_structure -def: "A folded DNA sequence." [SO:ke] -synonym: "DNA sequence secondary structure" EXACT [] -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000143 -name: assembly_component -def: "A region of known length which may be used to manufacture a longer region." [SO:ke] -subset: SOFA -synonym: "assembly component" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000144 -name: primary_transcript_attribute -is_obsolete: true - -[Term] -id: SO:0000145 -name: recoded_codon -def: "A codon that has been redefined at translation. The redefinition may be as a result of translational bypass, translational frameshifting or stop codon readthrough." [SO:xp] -synonym: "recoded codon" EXACT [] -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000146 -name: capped -def: "An attribute describing when a sequence, usually an mRNA is capped by the addition of a modified guanine nucleotide at the 5' end." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000147 -name: exon -def: "A region of the transcript sequence within a gene which is not removed from the primary RNA transcript by RNA splicing." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Exon "wiki" -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000148 -name: supercontig -def: "One or more contigs that have been ordered and oriented using end-read information. Contains gaps that are filled with N's." [SO:ls] -subset: SOFA -synonym: "scaffold" RELATED [] -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000719 ! ultracontig - -[Term] -id: SO:0000149 -name: contig -def: "A contiguous sequence derived from sequence assembly. Has no gaps, but may contain N's from unvailable bases." [SO:ls] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Contig "wiki" -is_a: SO:0000143 ! assembly_component -is_a: SO:0000353 ! sequence_assembly -relationship: part_of SO:0000148 ! supercontig - -[Term] -id: SO:0000150 -name: read -def: "A sequence obtained from a single sequencing experiment. Typically a read is produced when a base calling program interprets information from a chromatogram trace file produced from a sequencing machine." [SO:rd] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000149 ! contig - -[Term] -id: SO:0000151 -name: clone -def: "A piece of DNA that has been inserted in a vector so that it can be propagated in a host bacterium or some other organism." [SO:ke] -subset: SOFA -xref: http:http\://en.wikipedia.org/wiki/Clone_(genetics) "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000152 -name: YAC -def: "Yeast Artificial Chromosome, a vector constructed from the telomeric, centromeric, and replication origin sequences needed for replication in yeast cells." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "yeast artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000153 -name: BAC -def: "Bacterial Artificial Chromosome, a cloning vector that can be propagated as mini-chromosomes in a bacterial host." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "bacterial artificial chromosome" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000154 -name: PAC -def: "The P1-derived artificial chromosome are DNA constructs that are derived from the DNA of P1 bacteriophage. They can carry large amounts (about 100-300 kilobases) of other sequences for a variety of bioengineering purposes. It is one type of vector used to clone DNA fragments (100- to 300-kb insert size; average, 150 kb) in Escherichia coli cells." [http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. Drosophila melanogaster PACs carry an average insert size of 80 kb. The library represents a 6-fold coverage of the genome. -synonym: "P1" EXACT [] -synonym: "P1 artificial chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/P1-derived_artificial_chromosome "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000155 -name: plasmid -def: "A self replicating, using the hosts cellular machinery, often circular nucleic acid molecule that is distinct from a chromosome in the organism." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "plasmid sequence" EXACT [] -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000156 -name: cosmid -def: "A cloning vector that is a hybrid of lambda phages and a plasmid that can be propagated as a plasmid or packaged as a phage,since they retain the lambda cos sites." [SO:ma] -comment: Paper: vans GA et al. High efficiency vectors for cosmid microcloning and genomic analysis. Gene 1989; 79(1):9-20. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cosmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Cosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000157 -name: phagemid -def: "A plasmid which carries within its sequence a bacteriophage replication origin. When the host bacterium is infected with \"helper\" phage, a phagemid is replicated along with the phage DNA and packaged into phage capsids." [SO:ma] -synonym: "phagemid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Phagemid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000158 -name: fosmid -def: "A cloning vector that utilises the E. coli F factor." [SO:ma] -comment: Birren BW et al. A human chromosome 22 fosmid resource: mapping and analysis of 96 clones. Genomics 1996. -synonym: "fosmid vector" RELATED [] -xref: http://en.wikipedia.org/wiki/Fosmid "wiki" -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000159 -name: deletion -def: "The point at which a deletion occured." [SO:ke] -subset: SOFA -synonym: "deleted_sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000160 -name: lambda_clone -def: "A linear clone derived from lambda bacteriophage. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -is_obsolete: true - -[Term] -id: SO:0000161 -name: methylated_A -def: "A modified RNA base in which adenine has been methylated." [SO:ke] -subset: SOFA -synonym: "methylated A" EXACT [] -synonym: "methylated adenine" EXACT [] -synonym: "methylated adenine base" EXACT [] -synonym: "methylated adenine residue" EXACT [] -is_a: SO:0000306 ! methylated_base_feature - -[Term] -id: SO:0000162 -name: splice_site -def: "Consensus region of primary transcript bordering junction of splicing. A region that overlaps exactly 2 base and adjacent_to splice_junction." [SO:cjm, SO:ke] -comment: With spliceosomal introns, the splice sites bind the spliceosomal machinary. -subset: SOFA -synonym: "splice site" EXACT [] -xref: http://en.wikipedia.org/wiki/Splice_site "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000163 -name: five_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 5' edge of the intron. A splice_site that is downstream_adjacent_to exon and starts intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "5' splice site" EXACT [] -synonym: "donor" RELATED [] -synonym: "donor splice site" EXACT [] -synonym: "five prime splice site" EXACT [] -synonym: "splice donor site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000164 -name: three_prime_cis_splice_site -def: "Intronic 2 bp region bordering the exon, at the 3' edge of the intron. A splice_site that is upstream_adjacent_to exon and finishes intron." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html, SO:cjm, SO:ke] -subset: SOFA -synonym: "3' splice site" RELATED [] -synonym: "acceptor" RELATED [] -synonym: "acceptor splice site" EXACT [] -synonym: "splice acceptor site" EXACT [] -synonym: "three prime splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000165 -name: enhancer -def: "A cis-acting sequence that increases the utilization of (some) eukaryotic promoters, and can function in either orientation and in any location (upstream or downstream) relative to the promoter." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: An enhancer may participate in an enhanceosome GO:0034206. A protein-DNA complex formed by the association of a distinct set of general and specific transcription factors with a region of enhancer DNA. The cooperative assembly of an enhanceosome confers specificity of transcriptional regulation. This comment is a placeholder should we start to make cross products with GO. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Enhancer_(genetics) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000166 -name: enhancer_bound_by_factor -def: "An enhancer bound by a factor." [SO:xp] -synonym: "enhancer bound by factor" EXACT [] -intersection_of: SO:0000165 ! enhancer -intersection_of: has_quality SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000167 -name: promoter -def: "A regulatory_region composed of the TSS(s) and binding sites for TF_complexes of the basal transcription machinery." [SO:regcreative] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. The region on a DNA molecule involved in RNA polymerase binding to initiate transcription. -subset: SOFA -synonym: "promoter sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Promoter "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000168 -name: restriction_enzyme_cut_site -def: "A specific nucleotide sequence of DNA at or near which a particular restriction enzyme cuts the DNA." [SO:ma] -is_obsolete: true - -[Term] -id: SO:0000169 -name: RNApol_I_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase I binds, to begin transcription." [SO:ke] -synonym: "pol I promoter" EXACT [] -synonym: "polymerase I promoter" EXACT [] -synonym: "RNA polymerase A promoter" EXACT [] -synonym: "RNApol I promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000170 -name: RNApol_II_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase II binds, to begin transcription." [SO:ke] -synonym: "pol II promoter" RELATED [] -synonym: "polymerase II promoter" EXACT [] -synonym: "RNA polymerase B promoter" EXACT [] -synonym: "RNApol II promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000171 -name: RNApol_III_promoter -def: "A DNA sequence in eukaryotic DNA to which RNA polymerase III binds, to begin transcription." [SO:ke] -synonym: "pol III promoter" EXACT [] -synonym: "polymerase III promoter" EXACT [] -synonym: "RNA polymerase C promoter" EXACT [] -synonym: "RNApol III promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter -relationship: has_part SO:0000174 ! TATA_box - -[Term] -id: SO:0000172 -name: CAAT_signal -def: "Part of a conserved sequence located about 75-bp upstream of the start point of eukaryotic transcription units which may be involved in RNA polymerase binding; consensus=GG(C|T)CAATCT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "CAAT box" EXACT [] -synonym: "CAAT signal" EXACT [] -synonym: "CAAT-box" EXACT [] -xref: http://en.wikipedia.org/wiki/CAAT_box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000173 -name: GC_rich_promoter_region -def: "A conserved GC-rich region located upstream of the start point of eukaryotic transcription units which may occur in multiple copies or in either orientation; consensus=GGGCGG." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "GC rich promoter region" EXACT [] -synonym: "GC-rich region" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0000174 -name: TATA_box -def: "A conserved AT-rich septamer found about 25-bp before the start point of many eukaryotic RNA polymerase II transcript units; may be involved in positioning the enzyme for correct initiation; consensus=TATA(A|T)A(A|T)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "Goldstein-Hogness box" EXACT [] -synonym: "TATA box" EXACT [] -xref: http://en.wikipedia.org/wiki/TATA_box "wiki" -is_a: SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000175 -name: minus_10_signal -def: "A conserved region about 10-bp upstream of the start point of bacterial transcription units which may be involved in binding RNA polymerase; consensus=TAtAaT." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-10 signal" EXACT [] -synonym: "minus 10 signal" EXACT [] -synonym: "Pribnow box" EXACT [] -synonym: "Pribnow Schaller box" EXACT [] -synonym: "Pribnow-Schaller box" EXACT [] -xref: http://en.wikipedia.org/wiki/Pribnow_box "wiki" -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000176 -name: minus_35_signal -def: "A conserved hexamer about 35-bp upstream of the start point of bacterial transcription units; consensus=TTGACa or TGTTGACA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "-35 signal" EXACT [] -synonym: "minus 35 signal" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000613 ! bacterial_RNApol_promoter - -[Term] -id: SO:0000177 -name: cross_genome_match -def: "A nucleotide match against a sequence from another organism." [SO:ma] -subset: SOFA -synonym: "cross genome match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000178 -name: operon -def: "A group of contiguous genes transcribed as a single (polycistronic) mRNA from a single regulatory region." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Operon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0000179 -name: clone_insert_start -def: "The start of the clone insert." [SO:ke] -subset: SOFA -synonym: "clone insert start" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000180 -name: retrotransposon -def: "A transposable element that is incorporated into a chromosome by a mechanism that requires reverse transcriptase." [http://www.dddmag.com/Glossary.aspx#r] -synonym: "class I" RELATED [] -synonym: "class I transposon" EXACT [] -synonym: "retrotransposon element" EXACT [] -xref: http://en.wikipedia.org/wiki/Retrotransposon "wiki" -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000181 -name: translated_nucleotide_match -def: "A match against a translated sequence." [SO:ke] -subset: SOFA -synonym: "translated nucleotide match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0000182 -name: DNA_transposon -def: "A transposon where the mechanism of transposition is via a DNA intermediate." [SO:ke] -synonym: "class II" RELATED [] -synonym: "class II transposon" EXACT [] -synonym: "DNA transposon" EXACT [] -is_a: SO:0000101 ! transposable_element - -[Term] -id: SO:0000183 -name: non_transcribed_region -def: "A region of the gene which is not transcribed." [SO:ke] -subset: SOFA -synonym: "non transcribed region" EXACT [] -synonym: "non-transcribed sequence" EXACT [] -synonym: "nontranscribed region" EXACT [] -synonym: "nontranscribed sequence" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Term] -id: SO:0000184 -name: U2_intron -def: "A major type of spliceosomal intron spliced by the U2 spliceosome, that includes U1, U2, U4/U6 and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AG or AT-AG 5' and 3' boundaries. -synonym: "U2 intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000185 -name: primary_transcript -def: "A transcript that in its initial state requires modification to be functional." [SO:ma] -subset: SOFA -synonym: "precursor RNA" EXACT [] -synonym: "primary transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Primary_transcript "wiki" -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000186 -name: LTR_retrotransposon -def: "A retrotransposon flanked by long terminal repeat sequences." [SO:ke] -synonym: "long terminal repeat retrotransposon" EXACT [] -synonym: "LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000187 -name: repeat_family -def: "A group of characterized repeat sequences." [SO:ke] -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000188 -name: intron -def: "A segment of DNA that is transcribed, but removed from within the transcript by splicing together the sequences (exons) on either side of it." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Intron "wiki" -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000189 -name: non_LTR_retrotransposon -def: "A retrotransposon without long terminal repeat sequences." [SO:ke] -synonym: "non LTR retrotransposon" EXACT [] -is_a: SO:0000180 ! retrotransposon - -[Term] -id: SO:0000190 -name: five_prime_intron -synonym: "5' intron" EXACT [] -synonym: "5' intron sequence" EXACT [] -synonym: "five prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000191 -name: interior_intron -synonym: "interior intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000192 -name: three_prime_intron -synonym: "3' intron" EXACT [] -synonym: "3' intron sequence" RELATED [] -synonym: "three prime intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000193 -name: RFLP_fragment -def: "A DNA fragment used as a reagent to detect the polymorphic genomic loci by hybridizing against the genomic DNA digested with a given restriction enzyme." [GOC:pj] -subset: SOFA -synonym: "restriction fragment length polymorphism" EXACT [] -synonym: "RFLP" EXACT [] -synonym: "RFLP fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment_length_polymorphism "wiki" -is_a: SO:0000412 ! restriction_fragment - -[Term] -id: SO:0000194 -name: LINE_element -def: "A dispersed repeat family with many copies, each from 1 to 6 kb long. New elements are generated by retroposition of a transcribed copy. Typically the LINE contains 2 ORF's one of which is reverse transcriptase, and 3'and 5' direct repeats." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -synonym: "LINE" EXACT [] -synonym: "LINE element" EXACT [] -synonym: "Long interspersed element" EXACT [] -synonym: "Long interspersed nuclear element" EXACT [] -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000195 -name: coding_exon -def: "An exon whereby at least one base is part of a codon (here, 'codon'is inclusive of the stop_codon)." [SO:ke] -synonym: "coding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000196 -name: five_prime_coding_exon_coding_region -def: "The sequence of the five_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "five prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000197 -name: three_prime_coding exon_coding_region -def: "The sequence of the three_prime_coding_exon that codes for protein." [SO:cjm] -subset: SOFA -synonym: "three prime exon coding region" EXACT [] -is_a: SO:0001215 ! coding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000198 -name: noncoding_exon -def: "An exon that does not contain any codons." [SO:ke] -subset: SOFA -synonym: "noncoding exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000199 -name: translocation -def: "A region of nucleotide sequence that has translocated to a new position." [SO:ke] -synonym: "translocated sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:0000200 -name: five_prime_coding_exon -def: "The 5' most coding exon." [SO:ke] -synonym: "5' coding exon" EXACT [] -synonym: "five prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000201 -name: interior_exon -def: "An exon that is bounded by 5' and 3' splice sites." [PMID:10373547] -synonym: "interior exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0000202 -name: three_prime_coding_exon -def: "The coding exon that is most 3-prime on a given transcript." [SO:ma] -synonym: "3' coding exon" RELATED [] -synonym: "three prime coding exon" EXACT [] -is_a: SO:0000195 ! coding_exon - -[Term] -id: SO:0000203 -name: UTR -def: "Messenger RNA sequences that are untranslated and lie five prime or three prime to sequences which are translated." [SO:ke] -subset: SOFA -synonym: "untranslated region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000204 -name: five_prime_UTR -def: "A region at the 5' end of a mature transcript (preceding the initiation codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "5' UTR" EXACT [] -synonym: "five prime UTR" EXACT [] -synonym: "five_prime_untranslated_region" EXACT [] -xref: http://en.wikipedia.org/wiki/5'_UTR "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000205 -name: three_prime_UTR -def: "A region at the 3' end of a mature transcript (following the stop codon) that is not translated into a protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "three prime untranslated region" EXACT [] -synonym: "three prime UTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Three_prime_untranslated_region "wiki" -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000206 -name: SINE_element -def: "A repetitive element, a few hundred base pairs long, that is dispersed throughout the genome. A common human SINE is the Alu element." [SO:ke] -synonym: "Short interspersed element" EXACT [] -synonym: "Short interspersed nuclear element" EXACT [] -synonym: "SINE element" EXACT [] -xref: http://en.wikipedia.org/wiki/Short_interspersed_nuclear_element "wiki" -is_a: SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000207 -name: simple_sequence_length_variation -synonym: "simple sequence length polymorphism" RELATED [] -synonym: "simple sequence length variation" EXACT [] -synonym: "SSLP" RELATED [] -is_a: SO:0000248 ! sequence_length_variation - -[Term] -id: SO:0000208 -name: terminal_inverted_repeat_element -def: "A DNA transposable element defined as having termini with perfect, or nearly perfect short inverted repeats, generally 10 - 40 nucleotides long." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "terminal inverted repeat element" EXACT [] -synonym: "TIR element" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000209 -name: rRNA_primary_transcript -def: "A primary transcript encoding a ribosomal RNA." [SO:ke] -synonym: "ribosomal RNA primary transcript" EXACT [] -synonym: "rRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000210 -name: tRNA_primary_transcript -def: "A primary transcript encoding a transfer RNA (SO:0000253)." [SO:ke] -synonym: "tRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000211 -name: alanine_tRNA_primary_transcript -def: "A primary transcript encoding alanyl tRNA." [SO:ke] -synonym: "alanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000212 -name: arginine_tRNA_primary_transcript -def: "A primary transcript encoding arginyl tRNA (SO:0000255)." [SO:ke] -synonym: "arginine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000213 -name: asparagine_tRNA_primary_transcript -def: "A primary transcript encoding asparaginyl tRNA (SO:0000256)." [SO:ke] -synonym: "asparagine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000214 -name: aspartic_acid_tRNA_primary_transcript -def: "A primary transcript encoding aspartyl tRNA (SO:0000257)." [SO:ke] -synonym: "aspartic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000215 -name: cysteine_tRNA_primary_transcript -def: "A primary transcript encoding cysteinyl tRNA (SO:0000258)." [SO:ke] -synonym: "cysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000216 -name: glutamic_acid_tRNA_primary_transcript -def: "A primary transcript encoding glutaminyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamic acid tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000217 -name: glutamine_tRNA_primary_transcript -def: "A primary transcript encoding glutamyl tRNA (SO:0000260)." [SO:ke] -synonym: "glutamine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000218 -name: glycine_tRNA_primary_transcript -def: "A primary transcript encoding glycyl tRNA (SO:0000263)." [SO:ke] -synonym: "glycine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000219 -name: histidine_tRNA_primary_transcript -def: "A primary transcript encoding histidyl tRNA (SO:0000262)." [SO:ke] -synonym: "histidine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000220 -name: isoleucine_tRNA_primary_transcript -def: "A primary transcript encoding isoleucyl tRNA (SO:0000263)." [SO:ke] -synonym: "isoleucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000221 -name: leucine_tRNA_primary_transcript -def: "A primary transcript encoding leucyl tRNA (SO:0000264)." [SO:ke] -synonym: "leucine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000222 -name: lysine_tRNA_primary_transcript -def: "A primary transcript encoding lysyl tRNA (SO:0000265)." [SO:ke] -synonym: "lysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000223 -name: methionine_tRNA_primary_transcript -def: "A primary transcript encoding methionyl tRNA (SO:0000266)." [SO:ke] -synonym: "methionine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000224 -name: phenylalanine_tRNA_primary_transcript -def: "A primary transcript encoding phenylalanyl tRNA (SO:0000267)." [SO:ke] -synonym: "phenylalanine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000225 -name: proline_tRNA_primary_transcript -def: "A primary transcript encoding prolyl tRNA (SO:0000268)." [SO:ke] -synonym: "proline tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000226 -name: serine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "serine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000227 -name: threonine_tRNA_primary_transcript -def: "A primary transcript encoding threonyl tRNA (SO:000270)." [SO:ke] -synonym: "threonine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000228 -name: tryptophan_tRNA_primary_transcript -def: "A primary transcript encoding tryptophanyl tRNA (SO:000271)." [SO:ke] -synonym: "tryptophan tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000229 -name: tyrosine_tRNA_primary_transcript -def: "A primary transcript encoding tyrosyl tRNA (SO:000272)." [SO:ke] -synonym: "tyrosine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000230 -name: valine_tRNA_primary_transcript -def: "A primary transcript encoding valyl tRNA (SO:000273)." [SO:ke] -synonym: "valine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000231 -name: snRNA_primary_transcript -def: "A primary transcript encoding a small nuclear RNA (SO:0000274)." [SO:ke] -synonym: "snRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000232 -name: snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar mRNA (SO:0000275)." [SO:ke] -synonym: "snoRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000233 -name: mature_transcript -def: "A transcript which has undergone the necessary modifications, if any, for its function. In eukaryotes this includes, for example, processing of introns, cleavage, base modification, and modifications to the 5' and/or the 3' ends, other than addition of bases. In bacteria functional mRNAs are usually not modified." [SO:ke] -comment: A processed transcript cannot contain introns. -subset: SOFA -synonym: "mature transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Mature_transcript "wiki" -is_a: SO:0000673 ! transcript -relationship: derives_from SO:0000185 ! primary_transcript - -[Term] -id: SO:0000234 -name: mRNA -def: "Messenger RNA is the intermediate molecule between DNA and protein. It includes UTR and coding sequences. It does not contain introns." [SO:ma] -comment: An mRNA does not contain introns as it is a processed_transcript. The equivalent kind of primary_transcript is protein_coding_primary_transcript (SO:0000120) which may contain introns. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "messenger RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000235 -name: TF_binding_site -def: "A region of a molecule that binds a TF complex [GO:0005667]." [SO:ke] -subset: SOFA -synonym: "TF binding site" EXACT [] -synonym: "transcription factor binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000236 -name: ORF -def: "The inframe interval between the stop codons of a reading frame which when read as sequential triplets, has the potential of encoding a sequential string of amino acids. TER(NNN)nTER." [SGD:rb, SO:ma] -comment: The definition was modified by Rama. ORF is defined by the sequence, whereas the CDS is defined according to whether a polypeptide is made. This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "open reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000237 -name: transcript_attribute -synonym: "transcript attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000238 -name: foldback_element -def: "A transposable element with extensive secondary structure, characterised by large modular imperfect long inverted repeats." [http://www.genetics.org/cgi/reprint/156/4/1983.pdf] -synonym: "foldback element" EXACT [] -synonym: "long inverted repeat element" RELATED [] -synonym: "LVR element" RELATED [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000239 -name: flanking_region -def: "The sequences extending on either side of a specific region." [SO:ke] -subset: SOFA -synonym: "flanking region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000240 -name: chromosome_variation -synonym: "chromosome variation" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000400 ! sequence_attribute -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000241 -name: internal_UTR -def: "A UTR bordered by the terminal and initial codons of two CDSs in a polycistronic transcript. Every UTR is either 5', 3' or internal." [SO:cjm] -synonym: "internal UTR" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000242 -name: untranslated_region_polycistronic_mRNA -def: "The untranslated sequence separating the 'cistrons' of multicistronic mRNA." [SO:ke] -synonym: "untranslated region polycistronic mRNA" EXACT [] -is_a: SO:0000203 ! UTR - -[Term] -id: SO:0000243 -name: internal_ribosome_entry_site -def: "Sequence element that recruits a ribosomal subunit to internal mRNA for translation initiation." [SO:ke] -synonym: "internal ribosomal entry sequence" EXACT [] -synonym: "internal ribosomal entry site" EXACT [] -synonym: "internal ribosome entry sequence" RELATED [] -synonym: "internal ribosome entry site" EXACT [] -synonym: "IRES" EXACT [] -xref: http://en.wikipedia.org/wiki/Internal_ribosome_entry_site "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000244 -name: four_cutter_restriction_site -synonym: "4-cutter_restriction_site" RELATED [] -synonym: "four-cutter_restriction_sit" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000245 -name: mRNA_by_polyadenylation_status -is_obsolete: true - -[Term] -id: SO:0000246 -name: polyadenylated -def: "A attribute describing the addition of a poly A tail to the 3' end of a mRNA molecule." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000247 -name: mRNA_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000248 -name: sequence_length_variation -synonym: "sequence length variation" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000249 -name: six_cutter_restriction_site -synonym: "6-cutter_restriction_site" RELATED [] -synonym: "six-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000250 -name: modified_RNA_base_feature -def: "A post_transcriptionally modified base." [SO:ke] -synonym: "modified RNA base feature" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000251 -name: eight_cutter_restriction_site -synonym: "8-cutter_restriction_site" RELATED [] -synonym: "eight-cutter_restriction_site" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000252 -name: rRNA -def: "RNA that comprises part of a ribosome, and that can provide both structural scaffolding and catalytic activity." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, ISBN:0198506732] -subset: SOFA -synonym: "ribosomal ribonucleic acid" EXACT [] -synonym: "ribosomal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000253 -name: tRNA -def: "Transfer RNA (tRNA) molecules are approximately 80 nucleotides in length. Their secondary structure includes four short double-helical elements and three loops (D, anti-codon, and T loops). Further hydrogen bonds mediate the characteristic L-shaped molecular structure. Transfer RNAs have two regions of fundamental functional importance: the anti-codon, which is responsible for specific mRNA codon recognition, and the 3' end, to which the tRNA's corresponding amino acid is attached (by aminoacyl-tRNA synthetases). Transfer RNAs cope with the degeneracy of the genetic code in two manners: having more than one tRNA (with a specific anti-codon) for a particular amino acid; and 'wobble' base-pairing, i.e. permitting non-standard base-pairing at the 3rd anti-codon position." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00005, ISBN:0198506732] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "transfer ribonucleic acid" RELATED [] -synonym: "transfer RNA" RELATED [] -xref: http://en.wikipedia.org/wiki/TRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0000254 -name: alanyl_tRNA -def: "A tRNA sequence that has an alanine anticodon, and a 3' alanine binding region." [SO:ke] -synonym: "alanyl tRNA" EXACT [] -synonym: "alanyl-transfer ribonucleic acid" EXACT [] -synonym: "alanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000211 ! alanine_tRNA_primary_transcript - -[Term] -id: SO:0000255 -name: rRNA_small_subunit_primary_transcript -def: "A primary transcript encoding a small ribosomal subunit RNA." [SO:ke] -synonym: "rRNA small subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000256 -name: asparaginyl_tRNA -def: "A tRNA sequence that has an asparagine anticodon, and a 3' asparagine binding region." [SO:ke] -synonym: "asparaginyl tRNA" EXACT [] -synonym: "asparaginyl-transfer ribonucleic acid" EXACT [] -synonym: "asparaginyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000213 ! asparagine_tRNA_primary_transcript - -[Term] -id: SO:0000257 -name: aspartyl_tRNA -def: "A tRNA sequence that has an aspartic acid anticodon, and a 3' aspartic acid binding region." [SO:ke] -synonym: "aspartyl tRNA" EXACT [] -synonym: "aspartyl-transfer ribonucleic acid" EXACT [] -synonym: "aspartyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000214 ! aspartic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000258 -name: cysteinyl_tRNA -def: "A tRNA sequence that has a cysteine anticodon, and a 3' cysteine binding region." [SO:ke] -synonym: "cysteinyl tRNA" EXACT [] -synonym: "cysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "cysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000215 ! cysteine_tRNA_primary_transcript - -[Term] -id: SO:0000259 -name: glutaminyl_tRNA -def: "A tRNA sequence that has a glutamine anticodon, and a 3' glutamine binding region." [SO:ke] -synonym: "glutaminyl tRNA" EXACT [] -synonym: "glutaminyl-transfer ribonucleic acid" EXACT [] -synonym: "glutaminyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000216 ! glutamic_acid_tRNA_primary_transcript - -[Term] -id: SO:0000260 -name: glutamyl_tRNA -def: "A tRNA sequence that has a glutamic acid anticodon, and a 3' glutamic acid binding region." [SO:ke] -synonym: "glutamyl tRNA" EXACT [] -synonym: "glutamyl-transfer ribonucleic acid" EXACT [] -synonym: "glutamyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000217 ! glutamine_tRNA_primary_transcript - -[Term] -id: SO:0000261 -name: glycyl_tRNA -def: "A tRNA sequence that has a glycine anticodon, and a 3' glycine binding region." [SO:ke] -synonym: "glycyl tRNA" EXACT [] -synonym: "glycyl-transfer ribonucleic acid" RELATED [] -synonym: "glycyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000218 ! glycine_tRNA_primary_transcript - -[Term] -id: SO:0000262 -name: histidyl_tRNA -def: "A tRNA sequence that has a histidine anticodon, and a 3' histidine binding region." [SO:ke] -synonym: "histidyl tRNA" EXACT [] -synonym: "histidyl-transfer ribonucleic acid" EXACT [] -synonym: "histidyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000219 ! histidine_tRNA_primary_transcript - -[Term] -id: SO:0000263 -name: isoleucyl_tRNA -def: "A tRNA sequence that has an isoleucine anticodon, and a 3' isoleucine binding region." [SO:ke] -synonym: "isoleucyl tRNA" EXACT [] -synonym: "isoleucyl-transfer ribonucleic acid" EXACT [] -synonym: "isoleucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000220 ! isoleucine_tRNA_primary_transcript - -[Term] -id: SO:0000264 -name: leucyl_tRNA -def: "A tRNA sequence that has a leucine anticodon, and a 3' leucine binding region." [SO:ke] -synonym: "leucyl tRNA" EXACT [] -synonym: "leucyl-transfer ribonucleic acid" EXACT [] -synonym: "leucyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000221 ! leucine_tRNA_primary_transcript - -[Term] -id: SO:0000265 -name: lysyl_tRNA -def: "A tRNA sequence that has a lysine anticodon, and a 3' lysine binding region." [SO:ke] -synonym: "lysyl tRNA" EXACT [] -synonym: "lysyl-transfer ribonucleic acid" EXACT [] -synonym: "lysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000222 ! lysine_tRNA_primary_transcript - -[Term] -id: SO:0000266 -name: methionyl_tRNA -def: "A tRNA sequence that has a methionine anticodon, and a 3' methionine binding region." [SO:ke] -synonym: "methionyl tRNA" EXACT [] -synonym: "methionyl-transfer ribonucleic acid" EXACT [] -synonym: "methionyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000223 ! methionine_tRNA_primary_transcript - -[Term] -id: SO:0000267 -name: phenylalanyl_tRNA -def: "A tRNA sequence that has a phenylalanine anticodon, and a 3' phenylalanine binding region." [SO:ke] -synonym: "phenylalanyl tRNA" EXACT [] -synonym: "phenylalanyl-transfer ribonucleic acid" EXACT [] -synonym: "phenylalanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000224 ! phenylalanine_tRNA_primary_transcript - -[Term] -id: SO:0000268 -name: prolyl_tRNA -def: "A tRNA sequence that has a proline anticodon, and a 3' proline binding region." [SO:ke] -synonym: "prolyl tRNA" EXACT [] -synonym: "prolyl-transfer ribonucleic acid" EXACT [] -synonym: "prolyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000225 ! proline_tRNA_primary_transcript - -[Term] -id: SO:0000269 -name: seryl_tRNA -def: "A tRNA sequence that has a serine anticodon, and a 3' serine binding region." [SO:ke] -synonym: "seryl tRNA" EXACT [] -synonym: "seryl-transfer ribonucleic acid" RELATED [] -synonym: "seryl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000226 ! serine_tRNA_primary_transcript - -[Term] -id: SO:0000270 -name: threonyl_tRNA -def: "A tRNA sequence that has a threonine anticodon, and a 3' threonine binding region." [SO:ke] -synonym: "threonyl tRNA" EXACT [] -synonym: "threonyl-transfer ribonucleic acid" EXACT [] -synonym: "threonyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000227 ! threonine_tRNA_primary_transcript - -[Term] -id: SO:0000271 -name: tryptophanyl_tRNA -def: "A tRNA sequence that has a tryptophan anticodon, and a 3' tryptophan binding region." [SO:ke] -synonym: "tryptophanyl tRNA" EXACT [] -synonym: "tryptophanyl-transfer ribonucleic acid" EXACT [] -synonym: "tryptophanyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000228 ! tryptophan_tRNA_primary_transcript - -[Term] -id: SO:0000272 -name: tyrosyl_tRNA -def: "A tRNA sequence that has a tyrosine anticodon, and a 3' tyrosine binding region." [SO:ke] -synonym: "tyrosyl tRNA" EXACT [] -synonym: "tyrosyl-transfer ribonucleic acid" EXACT [] -synonym: "tyrosyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000229 ! tyrosine_tRNA_primary_transcript - -[Term] -id: SO:0000273 -name: valyl_tRNA -def: "A tRNA sequence that has a valine anticodon, and a 3' valine binding region." [SO:ke] -synonym: "valyl tRNA" EXACT [] -synonym: "valyl-transfer ribonucleic acid" EXACT [] -synonym: "valyl-transfer RNA" RELATED [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000230 ! valine_tRNA_primary_transcript - -[Term] -id: SO:0000274 -name: snRNA -def: "A small nuclear RNA molecule involved in pre-mRNA splicing and processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, PMID:11733745, WB:ems] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "small nuclear RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000231 ! snRNA_primary_transcript - -[Term] -id: SO:0000275 -name: snoRNA -def: "A snoRNA (small nucleolar RNA) is any one of a class of small RNAs that are associated with the eukaryotic nucleus as components of small nucleolar ribonucleoproteins. They participate in the processing or modifications of many RNAs, mostly ribosomal RNAs (rRNAs) though snoRNAs are also known to target other classes of RNA, including spliceosomal RNAs, tRNAs, and mRNAs via a stretch of sequence that is complementary to a sequence in the targeted RNA." [GOC:kgc] -subset: SOFA -synonym: "small nucleolar RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SnoRNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000276 -name: miRNA -def: "Small, ~22-nt, RNA molecule that is the endogenous transcript of a miRNA gene. Micro RNAs are produced from precursor molecules (SO:0000647) that can form local hairpin structures, which ordinarily are processed (via the Dicer pathway) such that a single miRNA molecule accumulates from one arm of a hairpin precursor molecule. Micro RNAs may trigger the cleavage of their target molecules or act as translational repressors." [PMID:12592000] -subset: SOFA -synonym: "micro RNA" EXACT [] -synonym: "microRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MiRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA -relationship: derives_from SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000277 -name: bound_by_factor -def: "An attribute describing a sequence that is bound by another molecule." [SO:ke] -comment: Formerly called transcript_by_bound_factor. -synonym: "bound by factor" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000278 -name: transcript_bound_by_nucleic_acid -def: "A transcript that is bound by a nucleic acid." [SO:xp] -comment: Formerly called transcript_by_bound_nucleic_acid. -synonym: "transcript bound by nucleic acid" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000876 ! bound_by_nucleic_acid - -[Term] -id: SO:0000279 -name: transcript_bound_by_protein -def: "A transcript that is bound by a protein." [SO:xp] -comment: Formerly called transcript_by_bound_protein. -synonym: "transcript bound by protein" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000875 ! bound_by_protein - -[Term] -id: SO:0000280 -name: engineered_gene -def: "A gene that is engineered." [SO:xp] -synonym: "engineered gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000281 -name: engineered_foreign_gene -def: "A gene that is engineered and foreign." [SO:xp] -synonym: "engineered foreign gene" EXACT [] -intersection_of: SO:0000280 ! engineered_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000282 -name: mRNA_with_minus_1_frameshift -def: "An mRNA with a minus 1 frameshift." [SO:xp] -synonym: "mRNA with minus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000866 ! minus_1_frameshift - -[Term] -id: SO:0000283 -name: engineered_foreign_transposable_element_gene -def: "A transposible_element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign transposable element gene" EXACT [] -intersection_of: SO:0000111 ! transposable_element_gene -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000284 -name: type_I_enzyme_restriction_site -def: "The recognition site is bipartite and interrupted." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000285 -name: foreign_gene -def: "A gene that is foreign." [SO:xp] -synonym: "foreign gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000286 -name: long_terminal_repeat -def: "A sequence directly repeated at both ends of a defined sequence, of the sort typically found in retroviruses." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "direct terminal repeat" RELATED [] -synonym: "long terminal repeat" EXACT [] -synonym: "LTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Long_terminal_repeat "wiki" -is_a: SO:0000657 ! repeat_region -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000287 -name: fusion_gene -def: "A gene that is a fusion." [SO:xp] -synonym: "fusion gene" EXACT [] -xref: http://en.wikipedia.org/wiki/Fusion_gene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000806 ! fusion - -[Term] -id: SO:0000288 -name: engineered_fusion_gene -def: "A fusion gene that is engineered." [SO:xp] -synonym: "engineered fusion gene" EXACT [] -intersection_of: SO:0000287 ! fusion_gene -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000289 -name: microsatellite -def: "A repeat_region containing repeat_units (2 to 4 bp) that is repeated multiple times in tandem." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -synonym: "microsatellite locus" EXACT [] -synonym: "microsatellite marker" EXACT [] -synonym: "VNTR" EXACT [] -xref: http://en.wikipedia.org/wiki/Microsatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000290 -name: dinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite" EXACT [] -synonym: "dinucleotide repeat microsatellite feature" EXACT [] -synonym: "dinucleotide repeat microsatellite locus" EXACT [] -synonym: "dinucleotide repeat microsatellite marker" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000291 -name: trinucleotide_repeat_microsatellite_feature -synonym: "dinucleotide repeat microsatellite marker" RELATED [] -synonym: "rinucleotide repeat microsatellite" EXACT [] -synonym: "trinucleotide repeat microsatellite feature" EXACT [] -synonym: "trinucleotide repeat microsatellite locus" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000292 -name: repetitive_element -is_obsolete: true - -[Term] -id: SO:0000293 -name: engineered_foreign_repetitive_element -def: "A repetitive element that is engineered and foreign." [SO:xp] -synonym: "engineered foreign repetitive element" EXACT [] -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000294 -name: inverted_repeat -def: "The sequence is complementarily repeated on the opposite strand. It is a palindrome, and it may, or may not be hyphenated. Examples: GCTGATCAGC, or GCTGA-----TCAGC." [SO:ke] -subset: SOFA -synonym: "inverted repeat" EXACT [] -synonym: "inverted repeat sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Inverted_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000295 -name: U12_intron -def: "A type of spliceosomal intron spliced by the U12 spliceosome, that includes U11, U12, U4atac/U6atac and U5 snRNAs." [PMID:9428511] -comment: May have either GT-AC or AT-AC 5' and 3' boundaries. -synonym: "U12 intron" EXACT [] -synonym: "U12-dependent intron" EXACT [] -is_a: SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000296 -name: origin_of_replication -def: "The origin of replication; starting site for duplication of a nucleic acid molecule to give two identical copies." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "ori" EXACT [] -synonym: "origin of replication" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_replication "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0001235 ! replicon - -[Term] -id: SO:0000297 -name: D_loop -def: "Displacement loop; a region within mitochondrial DNA in which a short stretch of RNA is paired with one strand of DNA, displacing the original partner DNA strand in this region; also used to describe the displacement of a region of one strand of duplex DNA by a single stranded invader in the reaction catalyzed by RecA protein." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "D-loop" EXACT [] -synonym: "displacement loop" RELATED [] -xref: http://en.wikipedia.org/wiki/D_loop "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000298 -name: recombination_feature -synonym: "recombination feature" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000299 -name: specific_recombination_site -synonym: "specific recombination site" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000300 -name: recombination_feature_of_rearranged_gene -synonym: "recombination feature of rearranged gene" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000301 -name: vertebrate_immune_system_gene_recombination_feature -synonym: "vertebrate immune system gene recombination feature" EXACT [] -is_a: SO:0000300 ! recombination_feature_of_rearranged_gene - -[Term] -id: SO:0000302 -name: J_gene_recombination_feature -def: "Recombination signal including J-heptamer, J-spacer and J-nonamer in 5' of J-region of a J-gene or J-sequence." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene recombination feature" EXACT [] -synonym: "J-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000303 -name: clip -def: "Part of the primary transcript that is clipped off during processing." [SO:ke] -subset: SOFA -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000304 -name: type_II_enzyme_restriction_site -def: "The recognition site is either palindromic, partially palindromic or an interrupted palindrome. Cleavage occurs within the recognition site." [http://www.promega.com] -is_obsolete: true - -[Term] -id: SO:0000305 -name: modified_base_site -def: "A modified nucleotide, i.e. a nucleotide other than A, T, C. G or (in RNA) U." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Modified base:. -subset: SOFA -synonym: "modified base site" EXACT [] -is_a: SO:0001236 ! base - -[Term] -id: SO:0000306 -name: methylated_base_feature -def: "A nucleotide modified by methylation." [SO:ke] -subset: SOFA -synonym: "methylated base feature" EXACT [] -is_a: SO:0000305 ! modified_base_site - -[Term] -id: SO:0000307 -name: CpG_island -def: "Regions of a few hundred to a few thousand bases in vertebrate genomes that are relatively GC and CpG rich; they are typically unmethylated and often found near the 5' ends of genes." [SO:rd] -subset: SOFA -synonym: "CG island" EXACT [] -synonym: "CpG island" EXACT [] -xref: http://en.wikipedia.org/wiki/CpG_island "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000308 -name: sequence_feature_locating_method -is_obsolete: true - -[Term] -id: SO:0000309 -name: computed_feature -is_obsolete: true - -[Term] -id: SO:0000310 -name: predicted_ab_initio_computation -is_obsolete: true - -[Term] -id: SO:0000311 -name: computed_feature_by_similarity -def: "." [SO:ma] -comment: similar to: -is_obsolete: true - -[Term] -id: SO:0000312 -name: experimentally_determined -def: "Attribute to describe a feature that has been experiemntally verified." [SO:ke] -synonym: "experimentally determined" EXACT [] -is_a: SO:0000789 ! validated - -[Term] -id: SO:0000313 -name: stem_loop -alt_id: SO:0000019 -def: "A double-helical region of nucleic acid formed by base-pairing between adjacent (inverted) complementary sequences." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "RNA_hairpin_loop" EXACT [] -synonym: "stem loop" EXACT [] -synonym: "stem-loop" EXACT [] -xref: http://en.wikipedia.org/wiki/Stem_loop "wiki" -is_a: SO:0000122 ! RNA_sequence_secondary_structure - -[Term] -id: SO:0000314 -name: direct_repeat -def: "A repeat where the same sequence is repeated in the same direction. Example: GCTGA-----GCTGA." [SO:ke] -subset: SOFA -synonym: "direct repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Direct_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000315 -name: TSS -def: "The first base where RNA polymerase begins to synthesize the RNA transcript." [SO:ke] -subset: SOFA -synonym: "transcription start site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000316 -name: CDS -def: "A contiguous sequence which begins with, and includes, a start codon and ends with, and includes, a stop codon." [SO:ma] -subset: SOFA -synonym: "coding sequence" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000317 -name: cDNA_clone -def: "Complementary DNA; A piece of DNA copied from an mRNA and spliced into a vector for propagation in a suitable host." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "cDNA clone" EXACT [] -intersection_of: SO:0000151 ! clone -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000318 -name: start_codon -def: "First codon to be translated by a ribosome." [SO:ke] -subset: SOFA -synonym: "initiation codon" EXACT [] -synonym: "start codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Start_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000319 -name: stop_codon -def: "In mRNA, a set of three nucleotides that indicates the end of information for protein synthesis." [SO:ke] -subset: SOFA -synonym: "stop codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Stop_codon "wiki" -is_a: SO:0000360 ! codon - -[Term] -id: SO:0000320 -name: intronic_splice_enhancer -def: "Sequences within the intron that modulate splice site selection for some introns." [SO:ke] -synonym: "intronic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000321 -name: mRNA_with_plus_1_frameshift -def: "An mRNA with a plus 1 frameshift." [SO:ke] -synonym: "mRNA with plus 1 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000868 ! plus_1_frameshift - -[Term] -id: SO:0000322 -name: nuclease_hypersensitive_site -synonym: "nuclease hypersensitive site" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000323 -name: coding_start -def: "The first base to be translated into protein." [SO:ke] -synonym: "coding start" EXACT [] -synonym: "translation initiation site" EXACT [] -synonym: "translation start" RELATED [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000324 -name: tag -def: "A nucleotide sequence that may be used to identify a larger sequence." [SO:ke] -subset: SOFA -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000325 -name: rRNA_large_subunit_primary_transcript -def: "A primary transcript encoding a large ribosomal subunit RNA." [SO:ke] -synonym: "rRNA large subunit primary transcript" EXACT [] -is_a: SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000326 -name: SAGE_tag -def: "A short diagnostic sequence tag, serial analysis of gene expression (SAGE), that allows the quantitative and simultaneous analysis of a large number of transcripts." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=7570003&dopt=Abstract] -subset: SOFA -synonym: "SAGE tag" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000327 -name: coding_end -def: "The last base to be translated into protein. It does not include the stop codon." [SO:ke] -synonym: "coding end" EXACT [] -synonym: "translation termination site" EXACT [] -synonym: "translation_end" EXACT [] -is_a: SO:0000851 ! CDS_region - -[Term] -id: SO:0000328 -name: microarray_oligo -synonym: "microarray oligo" EXACT [] -synonym: "microarray oligonucleotide" EXACT [] -is_a: SO:0000051 ! probe - -[Term] -id: SO:0000329 -name: mRNA_with_plus_2_frameshift -def: "An mRNA with a plus 2 frameshift." [SO:xp] -synonym: "mRNA with plus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000869 ! plus_2_framshift - -[Term] -id: SO:0000330 -name: conserved_region -def: "Region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "conserved region" EXACT [] -xref: http://en.wikipedia.org/wiki/Conserved_region "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000331 -name: STS -def: "Short (typically a few hundred base pairs) DNA sequence that has a single occurrence in a genome and whose location and base sequence are known." [http://www.biospace.com] -subset: SOFA -synonym: "sequence tag site" EXACT [] -is_a: SO:0000324 ! tag - -[Term] -id: SO:0000332 -name: coding_conserved_region -def: "Coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "coding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000333 -name: exon_junction -def: "The boundary between two exons in a processed transcript." [SO:ke] -subset: SOFA -synonym: "exon junction" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000334 -name: nc_conserved_region -def: "Non-coding region of sequence similarity by descent from a common ancestor." [SO:ke] -subset: SOFA -synonym: "nc conserved region" EXACT [] -synonym: "noncoding conserved region" EXACT [] -is_a: SO:0000330 ! conserved_region - -[Term] -id: SO:0000335 -name: mRNA_with_minus_2_frameshift -def: "A mRNA with a minus 2 frameshift." [SO:ke] -synonym: "mRNA with minus 2 frameshift" EXACT [] -intersection_of: SO:0000108 ! mRNA_with_frameshift -intersection_of: has_quality SO:0000867 ! minus_2_frameshift - -[Term] -id: SO:0000336 -name: pseudogene -def: "A sequence that closely resembles a known functional gene, at another locus within a genome, that is non-functional as a consequence of (usually several) mutations that prevent either its transcription or translation (or both). In general, pseudogenes result from either reverse transcription of a transcript of their \"normal\" paralog (SO:0000043) (in which case the pseudogene typically lacks introns and includes a poly(A) tail) or from recombination (SO:0000044) (in which case the pseudogene is typically a tandem duplication of its \"normal\" paralog)." [http://www.ucl.ac.uk/~ucbhjow/b241/glossary.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Pseudogene "wiki" -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000704 ! gene - -[Term] -id: SO:0000337 -name: RNAi_reagent -def: "A double stranded RNA duplex, at least 20bp long, used experimentally to inhibit gene function by RNA interference." [SO:rd] -subset: SOFA -synonym: "RNAi reagent" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0000338 -name: MITE -def: "A highly repetitive and short (100-500 base pair) transposable element with terminal inverted repeats (TIR) and target site duplication (TSD). MITEs do not encode proteins." [http://www.pnas.org/cgi/content/full/97/18/10083] -synonym: "miniature inverted repeat transposable element" EXACT [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000339 -name: recombination_hotspot -def: "A region in a genome which promotes recombination." [SO:rd] -synonym: "recombination hotspot" EXACT [] -xref: http://en.wikipedia.org/wiki/Recombination_hotspot "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000340 -name: chromosome -def: "Structural unit composed of a nucleic acid molecule which controls its own replication through the interaction of specific proteins at one or more origins of replication." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Chromosome "wiki" -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0000341 -name: chromosome_band -def: "A cytologically distinguishable feature of a chromosome, often made visible by staining, and usually alternating light and dark." [SO:ma] -subset: SOFA -synonym: "chromosome band" EXACT [] -synonym: "cytoband" EXACT [] -synonym: "cytological band" EXACT [] -xref: http://en.wikipedia.org/wiki/Cytological_band "wiki" -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000342 -name: site_specific_recombination_target_region -synonym: "site specific recombination target region" EXACT [] -is_a: SO:0000299 ! specific_recombination_site - -[Term] -id: SO:0000343 -name: match -def: "A region of sequence, aligned to another sequence with some statistical significance, using an algorithm such as BLAST or SIM4." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000344 -name: splice_enhancer -def: "Region of a transcript that regulates splicing." [SO:ke] -subset: SOFA -synonym: "splice enhancer" EXACT [] -is_a: SO:0001056 ! splicing_regulatory_region - -[Term] -id: SO:0000345 -name: EST -def: "A tag produced from a single sequencing read from a cDNA clone or PCR product; typically a few hundred base pairs long." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "expressed sequence tag" EXACT [] -is_a: SO:0000324 ! tag -relationship: derives_from SO:0000234 ! mRNA - -[Term] -id: SO:0000346 -name: loxP_site -synonym: "Cre-recombination target region" RELATED [] -synonym: "loxP site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000347 -name: nucleotide_match -def: "A match against a nucleotide sequence." [SO:ke] -subset: SOFA -synonym: "nucleotide match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000348 -name: nucleic_acid -def: "An attribute describing a sequence consisting of nucleobases bound to repeating units. The forms found in nature are deoxyribonucleic acid (DNA), where the repeating units are 2-deoxy-D-ribose rings connected to a phosphate backbone, and ribonucleic acid (RNA), where the repeating units are D-ribose rings connected to a phosphate backbone." [CHEBI:33696, RSC:cb] -synonym: "nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleic_acid "wiki" -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000349 -name: protein_match -def: "A match against a protein sequence." [SO:ke] -subset: SOFA -synonym: "protein match" EXACT [] -is_a: SO:0000343 ! match - -[Term] -id: SO:0000350 -name: FRT_site -def: "An inversion site found on the Saccharomyces cerevisiae 2 micron plasmid." [SO:ma] -synonym: "FLP recombination target region" EXACT [] -synonym: "FRT site" EXACT [] -is_a: SO:0000948 ! inversion_site - -[Term] -id: SO:0000351 -name: synthetic_sequence -def: "An attribute to decide a sequence of nucleotides, nucleotide analogs, or amino acids that has been designed by an experimenter and which may, or may not, correspond with any natural sequence." [SO:ma] -synonym: "synthetic sequence" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000352 -name: DNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a 2-deoxy-D-ribose ring connected to a phosphate backbone." [RSC:cb] -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000353 -name: sequence_assembly -def: "A sequence of nucleotides that has been algorithmically derived from an alignment of two or more different sequences." [SO:ma] -subset: SOFA -synonym: "sequence assembly" EXACT [] -xref: http://en.wikipedia.org/wiki/Sequence_assembly "wiki" -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0000354 -name: group_1_intron_homing_endonuclease_target_region -synonym: "group 1 intron homing endonuclease target region" EXACT [] -is_a: SO:0000684 ! nuclease_sensitive_site - -[Term] -id: SO:0000355 -name: haplotype_block -def: "A region of the genome which is co-inherited as the result of the lack of historic recombination within it." [SO:ma] -synonym: "haplotype block" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000356 -name: RNA -def: "An attribute describing a sequence consisting of nucleobases bound to a repeating unit made of a D-ribose ring connected to a phosphate backbone." [RSC:cb] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0000357 -name: flanked -def: "An attribute describing a region that is bounded either side by a paricular kind of region." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000359 -name: floxed -def: "An attribute describing sequence that is flanked by Lox-P sites." [SO:ke] -xref: http://en.wikipedia.org/wiki/Floxed "wiki" -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000360 -name: codon -def: "A set of (usually) three nucleotide bases in a DNA or RNA sequence, which together code for a unique amino acid or the termination of translation and are contained within the CDS." [http://www.everythingbio.com/glos/definition.php?word=codon, SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Codon "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000361 -name: FRT_flanked -def: "An attribute to describe sequence that is flanked by the FLP recombinase recognition site, FRT." [SO:ke] -synonym: "FRT flanked" EXACT [] -is_a: SO:0000357 ! flanked - -[Term] -id: SO:0000362 -name: invalidated_by_chimeric_cDNA -def: "A cDNA clone constructed from more than one mRNA. Usually an experimental artifact." [SO:ma] -synonym: "invalidated by chimeric cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000363 -name: floxed_gene -def: "A transgene that is floxed." [SO:xp] -synonym: "floxed gene" EXACT [] -intersection_of: SO:0000902 ! transgene -intersection_of: has_quality SO:0000359 ! floxed - -[Term] -id: SO:0000364 -name: transposable_element_flanking_region -def: "The region of sequence surrounding a transposible element." [SO:ke] -synonym: "transposable element flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0000365 -name: integron -def: "A region encoding an integrase which acts at a site adjacent to it (attI_site) to insert DNA which must include but is not limited to an attC_site." [SO:as] -xref: http://en.wikipedia.org/wiki/Integron "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000366 -name: insertion_site -def: "The junction where an insertion occurred." [SO:ke] -subset: SOFA -synonym: "insertion site" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000367 -name: attI_site -def: "A region within an integron, adjacent to an integrase, at which site specific recombination involving an attC_site takes place." [SO:as] -synonym: "attI site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000368 -name: transposable_element_insertion_site -def: "The junction in a genome where a transposable_element has inserted." [SO:ke] -subset: SOFA -synonym: "transposable element insertion site" EXACT [] -is_a: SO:0000366 ! insertion_site - -[Term] -id: SO:0000369 -name: integrase_coding_region -is_obsolete: true - -[Term] -id: SO:0000370 -name: small_regulatory_ncRNA -def: "A non-coding RNA, usually with a specific secondary structure, that acts to regulate gene expression." [SO:ma] -subset: SOFA -synonym: "small regulatory ncRNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000371 -name: conjugative_transposon -def: "A transposon that encodes function required for conjugation." [http://www.sci.sdsu.edu/~smaloy/Glossary/C.html] -synonym: "conjugative transposon" EXACT [] -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000372 -name: enzymatic_RNA -def: "An RNA sequence that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: This was moved to be a child of transcript (SO:0000673) because some enzymatic RNA regions are part of primary transcripts and some are part of processed transcripts. -subset: SOFA -synonym: "enzymatic RNA" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0000373 -name: recombinationally_inverted_gene -def: "A recombinationally rearranged gene by inversion." [SO:xp] -synonym: "recombinationally inverted gene" EXACT [] -intersection_of: SO:0000456 ! recombinationally_rearranged_gene -intersection_of: has_quality SO:1000036 ! inversion - -[Term] -id: SO:0000374 -name: ribozyme -def: "An RNA with catalytic activity." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Ribozyme "wiki" -intersection_of: SO:0000372 ! enzymatic_RNA -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000375 -name: rRNA_5_8S -def: "5_8S ribosomal RNA (5. 8S rRNA) is a component of the large subunit of the eukaryotic ribosome. It is transcribed by RNA polymerase I as part of the 45S precursor that also contains 18S and 28S rRNA. Functionally, it is thought that 5.8S rRNA may be involved in ribosome translocation. It is also known to form covalent linkage to the p53 tumour suppressor protein. 5_8S rRNA is also found in archaea." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00002] -subset: SOFA -synonym: "5.8S LSU rRNA" EXACT [] -synonym: "5.8S ribosomal RNA" EXACT [] -synonym: "5.8S rRNA" EXACT [] -synonym: "rRNA 5 8S" EXACT [] -xref: http://en.wikipedia.org/wiki/5.8S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000376 -name: RNA_6S -def: "A small (184-nt in E. coli) RNA that forms a hairpin type structure. 6S RNA associates with RNA polymerase in a highly specific manner. 6S RNA represses expression from a sigma70-dependent promoter during stationary phase." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00013] -synonym: "6S RNA" EXACT [] -synonym: "RNA 6S" EXACT [] -xref: http://en.wikipedia.org/wiki/6S_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000377 -name: CsrB_RsmB_RNA -def: "An enterobacterial RNA that binds the CsrA protein. The CsrB RNAs contain a conserved motif CAGGXXG that is found in up to 18 copies and has been suggested to bind CsrA. The Csr regulatory system has a strong negative regulatory effect on glycogen biosynthesis, glyconeogenesis and glycogen catabolism and a positive regulatory effect on glycolysis. In other bacteria such as Erwinia caratovara the RsmA protein has been shown to regulate the production of virulence determinants, such extracellular enzymes. RsmA binds to RsmB regulatory RNA which is also a member of this family." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00018] -synonym: "CsrB RsmB RNA" EXACT [] -synonym: "CsrB-RsmB RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000378 -name: DsrA_RNA -def: "DsrA RNA regulates both transcription, by overcoming transcriptional silencing by the nucleoid-associated H-NS protein, and translation, by promoting efficient translation of the stress sigma factor, RpoS. These two activities of DsrA can be separated by mutation: the first of three stem-loops of the 85 nucleotide RNA is necessary for RpoS translation but not for anti-H-NS action, while the second stem-loop is essential for antisilencing and less critical for RpoS translation. The third stem-loop, which behaves as a transcription terminator, can be substituted by the trp transcription terminator without loss of either DsrA function. The sequence of the first stem-loop of DsrA is complementary with the upstream leader portion of RpoS messenger RNA, suggesting that pairing of DsrA with the RpoS message might be important for translational regulation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00014] -synonym: "DsrA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/DsrA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000379 -name: GcvB_RNA -def: "A small untranslated RNA involved in expression of the dipeptide and oligopeptide transport systems in Escherichia coli." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00022] -synonym: "GcvB RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/GcvB_RNA "wiki" -is_a: SO:0000378 ! DsrA_RNA - -[Term] -id: SO:0000380 -name: hammerhead_ribozyme -def: "A small catalytic RNA motif that catalyzes self-cleavage reaction. Its name comes from its secondary structure which resembles a carpenter's hammer. The hammerhead ribozyme is involved in the replication of some viroid and some satellite RNAs." [http://rnaworld.bio.ku.edu/class/RNA/RNA00/RNA_World_3.html] -subset: SOFA -synonym: "hammerhead ribozyme" EXACT [] -xref: http://en.wikipedia.org/wiki/Hammerhead_ribozyme "wiki" -intersection_of: SO:0000715 ! RNA_motif -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000381 -name: group_IIA_intron -synonym: "group IIA intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000382 -name: group_IIB_intron -synonym: "group IIB intron" EXACT [] -is_a: SO:0000603 ! group_II_intron - -[Term] -id: SO:0000383 -name: MicF_RNA -def: "A non-translated 93 nt antisense RNA that binds its target ompF mRNA and regulates ompF expression by inhibiting translation and inducing degradation of the message." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00033] -synonym: "MicF RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/MicF_RNA "wiki" -is_a: SO:0000644 ! antisense_RNA - -[Term] -id: SO:0000384 -name: OxyS_RNA -def: "A small untranslated RNA which is induced in response to oxidative stress in Escherichia coli. Acts as a global regulator to activate or repress the expression of as many as 40 genes, including the fhlA-encoded transcriptional activator and the rpoS-encoded sigma(s) subunit of RNA polymerase. OxyS is bound by the Hfq protein, that increases the OxyS RNA interaction with its target messages." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00035] -synonym: "OxyS RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/OxyS_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000385 -name: RNase_MRP_RNA -def: "The RNA molecule essential for the catalytic activity of RNase MRP, an enzymatically active ribonucleoprotein with two distinct roles in eukaryotes. In mitochondria it plays a direct role in the initiation of mitochondrial DNA replication. In the nucleus it is involved in precursor rRNA processing, where it cleaves the internal transcribed spacer 1 between 18S and 5.8S rRNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00030] -subset: SOFA -synonym: "RNase MRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000386 -name: RNase_P_RNA -def: "The RNA component of Ribonuclease P (RNase P), a ubiquitous endoribonuclease, found in archaea, bacteria and eukarya as well as chloroplasts and mitochondria. Its best characterised activity is the generation of mature 5 prime ends of tRNAs by cleaving the 5 prime leader elements of precursor-tRNAs. Cellular RNase Ps are ribonucleoproteins. RNA from bacterial RNase Ps retains its catalytic activity in the absence of the protein subunit, i.e. it is a ribozyme. Isolated eukaryotic and archaeal RNase P RNA has not been shown to retain its catalytic function, but is still essential for the catalytic activity of the holoenzyme. Although the archaeal and eukaryotic holoenzymes have a much greater protein content than the bacterial ones, the RNA cores from all the three lineages are homologous. Helices corresponding to P1, P2, P3, P4, and P10/11 are common to all cellular RNase P RNAs. Yet, there is considerable sequence variation, particularly among the eukaryotic RNAs." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00010] -subset: SOFA -synonym: "RNase P RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000387 -name: RprA_RNA -def: "Translational regulation of the stationary phase sigma factor RpoS is mediated by the formation of a double-stranded RNA stem-loop structure in the upstream region of the rpoS messenger RNA, occluding the translation initiation site. Clones carrying rprA (RpoS regulator RNA) increased the translation of RpoS. The rprA gene encodes a 106 nucleotide regulatory RNA. As with DsrA Rfam:RF00014, RprA is predicted to form three stem-loops. Thus, at least two small RNAs, DsrA and RprA, participate in the positive regulation of RpoS translation. Unlike DsrA, RprA does not have an extensive region of complementarity to the RpoS leader, leaving its mechanism of action unclear. RprA is non-essential." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00034] -synonym: "RprA RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RprA_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000388 -name: RRE_RNA -def: "The Rev response element (RRE) is encoded within the HIV-env gene. Rev is an essential regulatory protein of HIV that binds an internal loop of the RRE leading, encouraging further Rev-RRE binding. This RNP complex is critical for mRNA export and hence for expression of the HIV structural proteins." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00036] -synonym: "RRE RNA" EXACT [] -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000389 -name: spot_42_RNA -def: "A 109-nucleotide RNA of E. coli that seems to have a regulatory role on the galactose operon. Changes in Spot 42 levels are implicated in affecting DNA polymerase I levels." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00021] -synonym: "spot-42 RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Spot_42_RNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000390 -name: telomerase_RNA -def: "The RNA component of telomerase, a reverse transcriptase that synthesises telomeric DNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00025] -subset: SOFA -synonym: "telomerase RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomerase_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000391 -name: U1_snRNA -def: "U1 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Its 5' end forms complementary base pairs with the 5' splice junction, thus defining the 5' donor site of an intron. There are significant differences in sequence and secondary structure between metazoan and yeast U1 snRNAs, the latter being much longer (568 nucleotides as compared to 164 nucleotides in human). Nevertheless, secondary structure predictions suggest that all U1 snRNAs share a 'common core' consisting of helices I, II, the proximal region of III, and IV." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00003] -subset: SOFA -synonym: "small nuclear RNA U1" EXACT [RSC:cb] -synonym: "snRNA U1" EXACT [RSC:cb] -synonym: "U1 small nuclear RNA" EXACT [RSC:cb] -synonym: "U1 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U1_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000392 -name: U2_snRNA -def: "U2 is a small nuclear RNA (snRNA) component of the spliceosome (involved in pre-mRNA splicing). Complementary binding between U2 snRNA (in an area lying towards the 5' end but 3' to hairpin I) and the branchpoint sequence (BPS) of the intron results in the bulging out of an unpaired adenine, on the BPS, which initiates a nucleophilic attack at the intronic 5' splice site, thus starting the first of two transesterification reactions that mediate splicing." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00004] -subset: SOFA -synonym: "small nuclear RNA U2" EXACT [RSC:CB] -synonym: "snRNA U2" EXACT [RSC:CB] -synonym: "U2 small nuclear RNA" EXACT [RSC:CB] -synonym: "U2 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U2_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000393 -name: U4_snRNA -def: "U4 small nuclear RNA (U4 snRNA) is a component of the major U2-dependent spliceosome. It forms a duplex with U6, and with each splicing round, it is displaced from U6 (and the spliceosome) in an ATP-dependent manner, allowing U6 to refold and create the active site for splicing catalysis. A recycling process involving protein Prp24 re-anneals U4 and U6." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U4" EXACT [RSC:cb] -synonym: "snRNA U4" EXACT [RSC:cb] -synonym: "U4 small nuclear RNA" EXACT [RSC:cb] -synonym: "U4 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U4_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000394 -name: U4atac_snRNA -def: "An snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U6atac_snRNA (SO:0000397)." [PMID:=12409455] -subset: SOFA -synonym: "small nuclear RNA U4atac" EXACT [RSC:cb] -synonym: "snRNA U4atac" EXACT [RSC:cb] -synonym: "U4atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U4atac snRNA" EXACT [] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000395 -name: U5_snRNA -def: "U5 RNA is a component of both types of known spliceosome. The precise function of this molecule is unknown, though it is known that the 5' loop is required for splice site selection and p220 binding, and that both the 3' stem-loop and the Sm site are important for Sm protein binding and cap methylation." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00020] -subset: SOFA -synonym: "small nuclear RNA U5" EXACT [RSC:cb] -synonym: "snRNA U5" EXACT [RSC:cb] -synonym: "U5 small nuclear RNA" EXACT [RSC:cb] -synonym: "U5 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U5_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000396 -name: U6_snRNA -def: "U6 snRNA is a component of the spliceosome which is involved in splicing pre-mRNA. The putative secondary structure consensus base pairing is confined to a short 5' stem loop, but U6 snRNA is thought to form extensive base-pair interactions with U4 snRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00015] -subset: SOFA -synonym: "small nuclear RNA U6" EXACT [RSC:cb] -synonym: "snRNA U6" EXACT [RSC:cb] -synonym: "U6 small nuclear RNA" EXACT [RSC:cb] -synonym: "U6 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U6_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000397 -name: U6atac_snRNA -def: "U6atac_snRNA is an snRNA required for the splicing of the minor U12-dependent class of eukaryotic nuclear introns. It forms a base paired complex with U4atac_snRNA (SO:0000394)." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=retrieve&db=pubmed&list_uids=12409455&dopt=Abstract] -subset: SOFA -synonym: "snRNA U6atac" EXACT [RSC:cb] -synonym: "U6atac small nuclear RNA" EXACT [RSC:cb] -synonym: "U6atac snRNA" EXACT [RSC:cb] -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000398 -name: U11_snRNA -def: "U11 snRNA plays a role in splicing of the minor U12-dependent class of eukaryotic nuclear introns, similar to U1 snRNA in the major class spliceosome it base pairs to the conserved 5' splice site sequence." [PMID:9622129] -subset: SOFA -synonym: "small nuclear RNA U11" EXACT [RSC:cb] -synonym: "snRNA U11" EXACT [RSC:cb] -synonym: "U11 small nuclear RNA" EXACT [RSC:cb] -synonym: "U11 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U11_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000399 -name: U12_snRNA -def: "The U12 small nuclear (snRNA), together with U4atac/U6atac, U5, and U11 snRNAs and associated proteins, forms a spliceosome that cleaves a divergent class of low-abundance pre-mRNA introns." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00007] -subset: SOFA -synonym: "small nuclear RNA U12" EXACT [RSC:cb] -synonym: "snRNA U12" EXACT [RSC:cb] -synonym: "U12 small nuclear RNA" EXACT [RSC:cb] -synonym: "U12 snRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/U12_snRNA "wiki" -is_a: SO:0000274 ! snRNA - -[Term] -id: SO:0000400 -name: sequence_attribute -def: "An attribute describes a quality of sequence." [SO:ke] -synonym: "sequence attribute" EXACT [] -disjoint_from: SO:0000110 ! sequence_feature -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0001060 ! sequence_variant -disjoint_from: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0000401 -name: gene_attribute -synonym: "gene attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000402 -name: enhancer_attribute -is_obsolete: true - -[Term] -id: SO:0000403 -name: U14_snoRNA -alt_id: SO:0005839 -def: "U14 small nucleolar RNA (U14 snoRNA) is required for early cleavages of eukaryotic precursor rRNAs. In yeasts, this molecule possess a stem-loop region (known as the Y-domain) which is essential for function. A similar structure, but with a different consensus sequence, is found in plants, but is absent in vertebrates." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00016, PMID:2551119] -comment: An evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA. -subset: SOFA -synonym: "small nucleolar RNA U14" EXACT [] -synonym: "snoRNA U14" EXACT [] -synonym: "U14 small nucleolar RNA" EXACT [] -synonym: "U14 snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0005837 ! U14_snoRNA_primary_transcript - -[Term] -id: SO:0000404 -name: vault_RNA -def: "A family of RNAs are found as part of the enigmatic vault ribonucleoprotein complex. The complex consists of a major vault protein (MVP), two minor vault proteins (VPARP and TEP1), and several small untranslated RNA molecules. It has been suggested that the vault complex is involved in drug resistance." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00006] -subset: SOFA -synonym: "vault RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Vault_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000405 -name: Y_RNA -def: "Y RNAs are components of the Ro ribonucleoprotein particle (Ro RNP), in association with Ro60 and La proteins. The Y RNAs and Ro60 and La proteins are well conserved, but the function of the Ro RNP is not known. In humans the RNA component can be one of four small RNAs: hY1, hY3, hY4 and hY5. These small RNAs are predicted to fold into a conserved secondary structure containing three stem structures. The largest of the four, hY1, contains an additional hairpin." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00019] -subset: SOFA -synonym: "Y RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Y_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000406 -name: twintron -def: "An intron within an intron. Twintrons are group II or III introns, into which another group II or III intron has been transposed." [PMID:1899376, PMID:7823908] -xref: http://en.wikipedia.org/wiki/Twintron "wiki" -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000407 -name: rRNA_18S -def: "A large polynucleotide in eukaryotes, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "18S ribosomal RNA" EXACT [] -synonym: "18S rRNA" EXACT [] -synonym: "rRNA 18S" EXACT [] -xref: http://en.wikipedia.org/wiki/18S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0000408 -name: site -def: "The interbase position where something (eg an aberration) occurred." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000409 -name: binding_site -alt_id: BS:00033 -def: "A region on the surface of a molecule that may interact with another molecule. When applied to polypeptides: Amino acids involved in binding or interactions. It can also apply to an amino acid bond which is represented by the positions of the two flanking amino acids." [EBIBS:GAR, SO:ke] -comment: Discrete. -subset: biosapiens -subset: SOFA -synonym: "binding_or_interaction_site" EXACT [] -synonym: "site" RELATED [] -xref: http://en.wikipedia.org/wiki/Binding_site "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000410 -name: protein_binding_site -def: "A region of a molecule that binds to a protein." [SO:ke] -synonym: "protein binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000411 -name: rescue_region -def: "A region that rescues." [SO:xp] -synonym: "rescue fragment" EXACT [] -synonym: "rescue region" EXACT [] -synonym: "rescue segment" RELATED [] -intersection_of: SO:0000695 ! reagent -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000412 -name: restriction_fragment -def: "A region of polynucleotide sequence produced by digestion with a restriction endonuclease." [SO:ke] -subset: SOFA -synonym: "restriction fragment" EXACT [] -xref: http://en.wikipedia.org/wiki/Restriction_fragment "wiki" -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0000413 -name: sequence_difference -def: "A region where the sequence differs from that of a specified sequence." [SO:ke] -subset: SOFA -synonym: "sequence difference" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000414 -name: invalidated_by_genomic_contamination -def: "An attribute to describe a feature that is invalidated due to genomic contamination." [SO:ke] -synonym: "invalidated by genomic contamination" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000415 -name: invalidated_by_genomic_polyA_primed_cDNA -def: "An attribute to describe a feature that is invalidated due to polyA priming." [SO:ke] -synonym: "invalidated by genomic polyA primed cDNA" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000416 -name: invalidated_by_partial_processing -def: "An attribute to describe a feature that is invalidated due to partial processing." [SO:ke] -synonym: "invalidated by partial processing" EXACT [] -is_a: SO:0000790 ! invalidated - -[Term] -id: SO:0000417 -name: polypeptide_domain -alt_id: BS:00012 -alt_id: BS:00134 -alt_id: SO:0001069 -def: "A structurally or functionally defined protein region. In proteins with multiple domains, the combination of the domains determines the function of the protein. A region which has been shown to recur throughout evolution." [EBIBS:GAR] -comment: Range. Old definition from before biosapiens: A region of a single polypeptide chain that folds into an independent unit and exhibits biological activity. A polypeptide chain may have multiple domains. -subset: biosapiens -synonym: "ca_bind" RELATED BS [uniprot:feature_type] -synonym: "DNA_bind" RELATED BS [uniprot:feature_type] -synonym: "domain" BROAD BS [uniprot:feature_type] -synonym: "np_bind" RELATED BS [uniprot:feature_type] -synonym: "polypeptide domain" EXACT [] -synonym: "polypeptide_structural_domain" EXACT BS [] -synonym: "structural domain" BROAD BS [] -synonym: "zn_fing" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0000418 -name: signal_peptide -alt_id: BS:00159 -def: "The signal_peptide is a short region of the peptide located at the N-terminus that directs the protein to be secreted or part of membrane components." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Old def before biosapiens:The sequence for an N-terminal domain of a secreted protein; this domain is involved in attaching nascent polypeptide to the membrane leader sequence. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [uniprot:feature_type] -synonym: "signal peptide" EXACT [] -synonym: "signal peptide coding sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Signal_peptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000419 -name: mature_protein_region -alt_id: BS:00149 -def: "The polypeptide sequence that remains when the cleaved peptide regions have been cleaved from the immature peptide." [EBIBS:GAR, http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html, SO:cb] -comment: This term mature peptide, merged with the biosapiens term mature protein region and took that to be the new name. Old def: The coding sequence for the mature or final peptide or protein product following post-translational modification. -subset: biosapiens -subset: SOFA -synonym: "chain" RELATED [uniprot:feature_type] -synonym: "mature peptide" RELATED [] -synonym: "mature protein region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0000420 -name: five_prime_terminal_inverted_repeat -synonym: "5' TIR" EXACT [] -synonym: "five prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000421 -name: three_prime_terminal_inverted_repeat -synonym: "3' TIR" EXACT [] -synonym: "three prime terminal inverted repeat" EXACT [] -is_a: SO:0000481 ! terminal_inverted_repeat - -[Term] -id: SO:0000422 -name: U5_LTR_region -synonym: "U5 long terminal repeat region" EXACT [] -synonym: "U5 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000423 -name: R_LTR_region -synonym: "R long terminal repeat region" EXACT [] -synonym: "R LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000424 -name: U3_LTR_region -synonym: "U3 long terminal repeat region" EXACT [] -synonym: "U3 LTR region" EXACT [] -is_a: SO:0000848 ! LTR_component - -[Term] -id: SO:0000425 -name: five_prime_LTR -synonym: "5' long terminal repeat" EXACT [] -synonym: "5' LTR" EXACT [] -synonym: "five prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000426 -name: three_prime_LTR -synonym: "3' long terminal repeat" EXACT [] -synonym: "3' LTR" EXACT [] -synonym: "three prime LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000427 -name: R_five_prime_LTR_region -synonym: "R 5' long term repeat region" EXACT [] -synonym: "R five prime LTR region" EXACT [] -is_a: SO:0000423 ! R_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000428 -name: U5_five_prime_LTR_region -synonym: "U5 5' long terminal repeat region" EXACT [] -synonym: "U5 five prime LTR region" EXACT [] -is_a: SO:0000422 ! U5_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000429 -name: U3_five_prime_LTR_region -synonym: "U3 5' long term repeat region" EXACT [] -synonym: "U3 five prime LTR region" EXACT [] -is_a: SO:0000424 ! U3_LTR_region -is_a: SO:0000850 ! five_prime_LTR_component - -[Term] -id: SO:0000430 -name: R_three_prime_LTR_region -synonym: "R 3' long terminal repeat region" EXACT [] -synonym: "R three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000431 -name: U3_three_prime_LTR_region -synonym: "U3 3' long terminal repeat region" EXACT [] -synonym: "U3 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000432 -name: U5_three_prime_LTR_region -synonym: "U5 3' long terminal repeat region" EXACT [] -synonym: "U5 three prime LTR region" EXACT [] -is_a: SO:0000849 ! three_prime_LTR_component - -[Term] -id: SO:0000433 -name: non_LTR_retrotransposon_polymeric_tract -def: "A polymeric tract, such as poly(dA), within a non_LTR_retrotransposon." [SO:ke] -synonym: "non LTR retrotransposon polymeric tract" EXACT [] -is_a: SO:0000657 ! repeat_region -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000189 ! non_LTR_retrotransposon - -[Term] -id: SO:0000434 -name: target_site_duplication -def: "A sequence of the target DNA that is duplicated when a transposable element or phage inserts; usually found at each end the insertion." [http://www.koko.gov.my/CocoaBioTech/Glossaryt.html] -synonym: "target site duplication" EXACT [] -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0000435 -name: RR_tract -def: "A polypurine tract within an LTR_retrotransposon." [SO:ke] -synonym: "LTR retrotransposon poly purine tract" RELATED [] -synonym: "RR tract" EXACT [] -is_a: SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0000436 -name: ARS -def: "A sequence that can autonomously replicate, as a plasmid, when transformed into a bacterial host." [SO:ma] -subset: SOFA -synonym: "autonomously replicating sequence" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000437 -name: assortment_derived_duplication -is_obsolete: true - -[Term] -id: SO:0000438 -name: gene_not_polyadenylated -is_obsolete: true - -[Term] -id: SO:0000439 -name: inverted_ring_chromosome -synonym: "inverted ring chromosome" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000045 ! ring_chromosome - -[Term] -id: SO:0000440 -name: vector_replicon -def: "A replicon that has been modified to act as a vector for foreign sequence." [SO:ma] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "vector" EXACT [] -synonym: "vector replicon" EXACT [] -xref: http://en.wikipedia.org/wiki/Vector_(molecular_biology) "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000441 -name: ss_oligo -def: "A single stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "single strand oligo" EXACT [] -synonym: "single strand oligonucleotide" EXACT [] -synonym: "single stranded oligonucleotide" EXACT [] -synonym: "ss oligo" EXACT [] -synonym: "ss oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000442 -name: ds_oligo -def: "A double stranded oligonucleotide." [SO:ke] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "double stranded oligonucleotide" EXACT [] -synonym: "ds oligo" EXACT [] -synonym: "ds-oligonucleotide" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0000443 -name: polymer_attribute -def: "An attribute to describe the kind of biological sequence." [SO:ke] -synonym: "polymer attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000444 -name: three_prime_noncoding_exon -def: "Non-coding exon in the 3' UTR." [SO:ke] -synonym: "three prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000445 -name: five_prime_noncoding_exon -def: "Non-coding exon in the 5' UTR." [SO:ke] -synonym: "5' nc exon" EXACT [] -synonym: "5' non coding exon" EXACT [] -synonym: "five prime noncoding exon" EXACT [] -is_a: SO:0000198 ! noncoding_exon - -[Term] -id: SO:0000446 -name: UTR_intron -def: "Intron located in the untranslated region." [SO:ke] -synonym: "UTR intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000447 -name: five_prime_UTR_intron -def: "An intron located in the 5' UTR." [SO:ke] -synonym: "five prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000448 -name: three_prime_UTR_intron -def: "An intron located in the 3' UTR." [SO:ke] -synonym: "three prime UTR intron" EXACT [] -is_a: SO:0000446 ! UTR_intron - -[Term] -id: SO:0000449 -name: random_sequence -def: "A sequence of nucleotides or amino acids which, by design, has a \"random\" order of components, given a predetermined input frequency of these components." [SO:ma] -synonym: "random sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000450 -name: interband -def: "A light region between two darkly staining bands in a polytene chromosome." [SO:ma] -synonym: "chromosome interband" RELATED [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000451 -name: gene_with_polyadenylated_mRNA -def: "A gene that encodes a polyadenylated mRNA." [SO:xp] -synonym: "gene with polyadenylated mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000871 ! polyadenylated_mRNA - -[Term] -id: SO:0000452 -name: transgene_attribute -is_obsolete: true - -[Term] -id: SO:0000453 -name: transposition -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000454 -name: rasiRNA -def: "A small, 17-28-nt, small interfering RNA derived from transcripts of repetitive elements." [http://www.developmentalcell.com/content/article/abstract?uid=PIIS1534580703002284] -subset: SOFA -synonym: "repeat associated small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/RasiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000455 -name: gene_with_mRNA_with_frameshift -def: "A gene that encodes an mRNA with a frameshift." [SO:xp] -synonym: "gene with mRNA with frameshift" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000865 ! frameshift - -[Term] -id: SO:0000456 -name: recombinationally_rearranged_gene -def: "A gene that is recombinationally rearranged." [SO:ke] -synonym: "recombinationally rearranged gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000940 ! recombinationally_rearranged - -[Term] -id: SO:0000457 -name: interchromosomal_duplication -def: "A chromosome duplication involving an insertion from another chromosome." [SO:ke] -synonym: "interchromosomal duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:0000458 -name: D_gene -def: "Germline genomic DNA including D-region with 5' UTR and 3' UTR, also designated as D-segment." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D gene" EXACT [] -synonym: "D-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000459 -name: gene_with_trans_spliced_transcript -def: "A gene with a transcript that is trans-spliced." [SO:xp] -synonym: "gene with trans spliced transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000479 ! trans_spliced_transcript - -[Term] -id: SO:0000460 -name: vertebrate_immunoglobulin_T_cell_receptor_segment -comment: I am using the term segment instead of gene here to avoid confusion with the region 'gene'. -synonym: "vertebrate immunoglobulin T cell receptor segment" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000461 -name: inversion_derived_bipartite_deficiency -def: "A chromosome generated by recombination between two inversions; has a deficiency at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite deficiency" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000462 -name: pseudogenic_region -def: "A non-functional descendent of a functional entity." [SO:cjm] -subset: SOFA -synonym: "pseudogenic region" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000463 -name: encodes_alternately_spliced_transcripts -def: "A gene that encodes more than one transcript." [SO:ke] -synonym: "encodes alternately spliced transcripts" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0000464 -name: decayed_exon -def: "A non-functional descendant of an exon." [SO:ke] -subset: SOFA -synonym: "decayed exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon - -[Term] -id: SO:0000465 -name: inversion_derived_deficiency_plus_duplication -def: "A chromosome generated by recombination between two inversions; there is a deficiency at one end of the inversion and a duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus duplication" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000466 -name: V_gene -def: "Germline genomic DNA including L-part1, V-intron and V-exon, with the 5' UTR and 3' UTR." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene" EXACT [] -synonym: "V-GENE" EXACT [] -synonym: "variable_gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000467 -name: post_translationally_regulated_by_protein_stability -def: "An attribute describing a gene sequence where the resulting protein is regulated by the stability of the resulting protein." [SO:ke] -synonym: "post translationally regulated by protein stability" EXACT [] -synonym: "post-translationally regulated by protein stability" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000468 -name: golden_path_fragment -def: "One of the pieces of sequence that make up a golden path." [SO:rd] -subset: SOFA -synonym: "golden path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000688 ! golden_path - -[Term] -id: SO:0000469 -name: post_translationally_regulated_by_protein_modification -def: "An attribute describing a gene sequence where the resulting protein is modified to regulate it." [SO:ke] -synonym: "post translationally regulated by protein modification" EXACT [] -synonym: "post-translationally regulated by protein modification" EXACT [] -is_a: SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000470 -name: J_gene -def: "Germline genomic DNA of an immunoglobulin/T-cell receptor gene including J-region with 5' UTR (SO:0000204) and 3' UTR (SO:0000205), also designated as J-segment." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J gene" EXACT [] -synonym: "J-GENE" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000471 -name: autoregulated -def: "The gene product is involved in its own transcriptional regulation." [SO:ke] -is_a: SO:0000123 ! transcriptionally_regulated - -[Term] -id: SO:0000472 -name: tiling_path -def: "A set of regions which overlap with minimal polymorphism to form a linear sequence." [SO:cjm] -subset: SOFA -synonym: "tiling path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000473 -name: negatively_autoregulated -def: "The gene product is involved in its own transcriptional regulation where it decreases transcription." [SO:ke] -synonym: "negatively autoregulated" EXACT [] -is_a: SO:0000126 ! transcriptionally_repressed -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000474 -name: tiling_path_fragment -def: "A piece of sequence that makes up a tiling_path (SO:0000472)." [SO:ke] -subset: SOFA -synonym: "tiling path fragment" EXACT [] -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000472 ! tiling_path - -[Term] -id: SO:0000475 -name: positively_autoregulated -def: "The gene product is involved in its own transcriptional regulation, where it increases transcription." [SO:ke] -synonym: "positively autoregulated" EXACT [] -is_a: SO:0000125 ! transcriptionally_induced -is_a: SO:0000471 ! autoregulated - -[Term] -id: SO:0000476 -name: contig_read -def: "A DNA sequencer read which is part of a contig." [SO:ke] -synonym: "contig read" EXACT [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0000477 -name: polycistronic_gene -def: "A gene that is polycistronic." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000478 -name: C_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including C-region (and introns if present) with 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "C gene" EXACT [] -synonym: "C_GENE" EXACT [] -synonym: "constant gene" EXACT [] -is_a: SO:0000460 ! vertebrate_immunoglobulin_T_cell_receptor_segment - -[Term] -id: SO:0000479 -name: trans_spliced_transcript -def: "A transcript that is trans-spliced." [SO:xp] -synonym: "trans spliced transcript" EXACT [] -synonym: "trans-spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000480 -name: tiling_path_clone -def: "A clone which is part of a tiling path. A tiling path is a set of sequencing substrates, typically clones, which have been selected in order to efficiently cover a region of the genome in preparation for sequencing and assembly." [SO:ke] -synonym: "tiling path clone" EXACT [] -is_a: SO:0000151 ! clone -is_a: SO:0000474 ! tiling_path_fragment - -[Term] -id: SO:0000481 -name: terminal_inverted_repeat -def: "An inverted repeat (SO:0000294) occuring at the termini of a DNA transposon." [SO:ke] -synonym: "terminal inverted repeat" EXACT [] -synonym: "TIR" EXACT [] -is_a: SO:0000294 ! inverted_repeat -relationship: part_of SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000482 -name: vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor gene cluster" EXACT [] -synonym: "vertebrate_immunoglobulin/T-cell receptor gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000483 -name: nc_primary_transcript -def: "A primary transcript that is never translated into a protein." [SO:ke] -subset: SOFA -synonym: "nc primary transcript" EXACT [] -synonym: "noncoding primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000484 -name: three_prime_coding_exon_noncoding_region -def: "The sequence of the 3' exon that is not coding." [SO:ke] -subset: SOFA -synonym: "three prime coding exon noncoding region" EXACT [] -synonym: "three_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000202 ! three_prime_coding_exon - -[Term] -id: SO:0000485 -name: DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene, and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-CLUSTER" EXACT [] -synonym: "DJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000486 -name: five_prime_coding_exon_noncoding_region -def: "The sequence of the 5' exon preceding the start codon." [SO:ke] -subset: SOFA -synonym: "five prime coding exon noncoding region" EXACT [] -synonym: "five_prime_exon_noncoding_region" EXACT [] -is_a: SO:0001214 ! noncoding_region_of_exon -relationship: part_of SO:0000200 ! five_prime_coding_exon - -[Term] -id: SO:0000487 -name: VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-C-CLUSTER" EXACT [] -synonym: "VDJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000488 -name: VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-J-CLUSTER" EXACT [] -synonym: "VDJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000489 -name: VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-C-CLUSTER" RELATED [] -synonym: "VJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000490 -name: VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-C-CLUSTER" EXACT [] -synonym: "VJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000491 -name: VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VJ)-J-CLUSTER" EXACT [] -synonym: "VJ J cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000492 -name: D_gene_recombination_feature -synonym: "D gene recombination feature" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000493 -name: three_prime_D_heptamer -def: "7 nucleotide recombination site like CACAGTG, part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-HEPTAMER" EXACT [] -synonym: "three prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000494 -name: three_prime_D_nonamer -def: "A 9 nucleotide recombination site (e.g. ACAAAAACC), part of a 3' D-recombination signal sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-NOMAMER" EXACT [] -synonym: "three prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000495 -name: three_prime_D_spacer -def: "A 12 or 23 nucleotide spacer between the 3'D-HEPTAMER and 3'D-NONAMER of a 3'D-RS." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-SPACER" EXACT [] -synonym: "three prime D spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000570 ! three_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000496 -name: five_prime_D_heptamer -def: "7 nucleotide recombination site (e.g. CACTGTG), part of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-HEPTAMER" EXACT [] -synonym: "five prime D heptamer" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000497 -name: five_prime_D_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a five_prime_D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'D-NONAMER" EXACT [] -synonym: "five prime D nonamer" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000498 -name: five_prime_D_spacer -def: "12 or 23 nucleotide spacer between the 5' D-heptamer (SO:0000496) and 5' D-nonamer (SO:0000497) of a 5' D-recombination signal sequence (SO:0000556) of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'-SPACER" EXACT [] -synonym: "five prime D spacer" EXACT [] -synonym: "five prime D-spacer" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000556 ! five_prime_D_recombination_signal_sequence - -[Term] -id: SO:0000499 -name: virtual_sequence -def: "A continuous piece of sequence similar to the 'virtual contig' concept of the Ensembl database." [SO:ke] -subset: SOFA -synonym: "virtual sequence" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000500 -name: Hoogsteen_base_pair -def: "A type of non-canonical base-pairing. This is less energetically favourable than watson crick base pairing. Hoogsteen GC base pairs only have two hydrogen bonds." [PMID:12177293] -synonym: "Hoogsteen base pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Hoogsteen_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000501 -name: reverse_Hoogsteen_base_pair -def: "A type of non-canonical base-pairing." [SO:ke] -synonym: "reverse Hoogsteen base pair" EXACT [] -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0000502 -name: transcribed_region -def: "A region of sequence that is transcribed. This region may cover the transcript of a gene, it may emcompas the sequence covered by all of the transcripts of a alternately spliced gene, or it may cover the region transcribed by a polycistronic transcript. A gene may have 1 or more transcribed regions and a transcribed_region may belong to one or more genes." [SO:ke] -comment: This concept cam about as a direct result of the SO meeting August 2004.nThe exact nature of the relationship between transcribed_region and gene is still up for discussion. We are going with 'associated_with' for the time being. -subset: SOFA -is_obsolete: true - -[Term] -id: SO:0000503 -name: alternately_spliced_gene_encodeing_one_transcript -is_obsolete: true - -[Term] -id: SO:0000504 -name: D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ C cluster" EXACT [] -synonym: "D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000505 -name: D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene and one DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ cluster" EXACT [] -synonym: "D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000506 -name: D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J C cluster" EXACT [] -synonym: "D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000507 -name: pseudogenic_exon -def: "A non functional descendant of an exon, part of a pseudogene." [SO:ke] -comment: This is the analog of the exon of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic exon" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000147 ! exon -relationship: part_of SO:0000516 ! pseudogenic_transcript - -[Term] -id: SO:0000508 -name: D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one D-gene, one DJ-gene, and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D DJ J cluster" EXACT [] -synonym: "D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000509 -name: D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J C cluster" EXACT [] -synonym: "D-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000510 -name: VD_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including L-part1, V-intron and V-D-exon, with the 5' UTR (SO:0000204) and 3' UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V_D_GENE" EXACT [] -synonym: "VD gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000511 -name: J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J C cluster" EXACT [] -synonym: "J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000512 -name: inversion_derived_deficiency_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a deficiency at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived deficiency plus aneuploid" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion - -[Term] -id: SO:0000513 -name: J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J cluster" EXACT [] -synonym: "J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000514 -name: J_nonamer -def: "9 nucleotide recombination site (e.g. GGTTTTTGT), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J nonamer" EXACT [] -synonym: "J-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000515 -name: J_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J heptamer" EXACT [] -synonym: "J-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000516 -name: pseudogenic_transcript -def: "A non functional descendant of a transcript, part of a pseudogene." [SO:ke] -comment: This is the analog of the transcript of a functional gene. The term was requested by Rama - SGD to allow the annotation of the parts of a pseudogene. Non-functional is defined as either its transcription or translation (or both) are prevented due to one or more mutatations. -synonym: "pseudogenic transcript" EXACT [] -is_a: SO:0000462 ! pseudogenic_region -relationship: non_functional_homolog_of SO:0000673 ! transcript -relationship: part_of SO:0000336 ! pseudogene - -[Term] -id: SO:0000517 -name: J_spacer -def: "12 or 23 nucleotide spacer between the J-nonamer and the J-heptamer of a J-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "J spacer" EXACT [] -synonym: "J-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000302 ! J_gene_recombination_feature - -[Term] -id: SO:0000518 -name: V_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ cluster" EXACT [] -synonym: "V-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000519 -name: V_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J cluster" EXACT [] -synonym: "V-(DJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000520 -name: V_VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ C cluster" EXACT [] -synonym: "V-(VDJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000521 -name: V_VDJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VDJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ cluster" EXACT [] -synonym: "V-(VDJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000522 -name: V_VDJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J cluster" EXACT [] -synonym: "V-(VDJ)-J-CLUSTER" RELATED [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000523 -name: V_VJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ C cluster" EXACT [] -synonym: "V-(VJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000524 -name: V_VJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene and one VJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ cluster" EXACT [] -synonym: "V-(VJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000525 -name: V_VJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J cluster" EXACT [] -synonym: "V-(VJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000526 -name: V_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one V-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V cluster" EXACT [] -synonym: "V-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene - -[Term] -id: SO:0000527 -name: V_D_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ C cluster" EXACT [] -synonym: "V-D-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000528 -name: V_D_DJ_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ cluster" EXACT [] -synonym: "V-D-(DJ)-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000529 -name: V_D_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J C cluster" EXACT [] -synonym: "V-D-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000530 -name: V_D_DJ_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one D-gene, one DJ-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D DJ J cluster" EXACT [] -synonym: "V-D-(DJ)-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000531 -name: V_D_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J C cluster" EXACT [] -synonym: "V-D-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000532 -name: V_D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one D-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V D J cluster" EXACT [] -synonym: "V-D-J-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000533 -name: V_heptamer -def: "7 nucleotide recombination site (e.g. CACAGTG), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V heptamer" EXACT [] -synonym: "V-HEPTAMER" EXACT [] -is_a: SO:0000561 ! heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000534 -name: V_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J cluster" EXACT [] -synonym: "V-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000535 -name: V_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one V-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V J C cluster" EXACT [] -synonym: "V-J-C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000536 -name: V_nonamer -def: "9 nucleotide recombination site (e.g. ACAAAAACC), part of V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V nonamer" EXACT [] -synonym: "V-NONAMER" EXACT [] -is_a: SO:0000562 ! nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000537 -name: V_spacer -def: "12 or 23 nucleotide spacer between the V-heptamer and the V-nonamer of a V-gene recombination feature of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V spacer" EXACT [] -synonym: "V-SPACER" EXACT [] -is_a: SO:0000563 ! vertebrate_immune_system_gene_recombination_spacer -relationship: part_of SO:0000538 ! V_gene_recombination_feature - -[Term] -id: SO:0000538 -name: V_gene_recombination_feature -def: "Recombination signal including V-heptamer, V-spacer and V-nonamer in 3' of V-region of a V-gene or V-sequence of an immunoglobulin/T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V gene recombination feature" EXACT [] -synonym: "V-RS" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000539 -name: DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-C-CLUSTER" EXACT [] -synonym: "DJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000540 -name: DJ_J_C_cluster -def: "Genomic DNA in rearranged configuration including at least one D-J-GENE, one J-GENE and one C-GENE." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(DJ)-J-C-CLUSTER" EXACT [] -synonym: "DJ J C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000541 -name: VDJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one VDJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "(VDJ)-C-CLUSTER" EXACT [] -synonym: "VDJ C cluster" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000542 -name: V_DJ_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ C cluster" EXACT [] -synonym: "V-(DJ)-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000543 -name: alternately_spliced_gene_encoding_greater_than_one_transcript -is_obsolete: true - -[Term] -id: SO:0000544 -name: helitron -def: "A rolling circle transposon. Autonomous helitrons encode a 5'-to-3' DNA helicase and nuclease/ligase similar to those encoded by known rolling-circle replicons." [http://www.pnas.org/cgi/content/full/100/11/6569] -synonym: "ISCR" RELATED [] -xref: http://en.wikipedia.org/wiki/Helitron "wiki" -is_a: SO:0000182 ! DNA_transposon - -[Term] -id: SO:0000545 -name: recoding_pseudoknot -def: "The pseudoknots involved in recoding are unique in that, as they play their role as a structure, they are immediately unfolded and their now linear sequence serves as a template for decoding." [http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=33937] -synonym: "recoding pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot -relationship: part_of SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:0000546 -name: designed_sequence -synonym: "designed sequence" EXACT [] -is_a: SO:0000351 ! synthetic_sequence - -[Term] -id: SO:0000547 -name: inversion_derived_bipartite_duplication -def: "A chromosome generated by recombination between two inversions; there is a duplication at each end of the inversion." [FB:km] -synonym: "inversion derived bipartite duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000548 -name: gene_with_edited_transcript -def: "A gene that encodes a transcript that is edited." [SO:xp] -synonym: "gene with edited transcript" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: transcribed_to SO:0000873 ! edited_transcript - -[Term] -id: SO:0000549 -name: inversion_derived_duplication_plus_aneuploid -def: "A chromosome generated by recombination between two inversions; has a duplication at one end and presumed to have a deficiency or duplication at the other end of the inversion." [FB:km] -synonym: "inversion derived duplication plus aneuploid" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:0000550 -name: aneuploid_chromosome -synonym: "aneuploid chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:0000551 -name: polyA_signal_sequence -def: "The recognition sequence necessary for endonuclease cleavage of an RNA transcript that is followed by polyadenylation; consensus=AATAAA." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "poly(A) signal" EXACT [] -synonym: "polyA signal sequence" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0000552 -name: Shine_Dalgarno_sequence -def: "Region in 5' UTR where ribosome assembles on mRNA." [SO:ke] -synonym: "five prime ribosome binding site" EXACT [] -synonym: "RBS" RELATED [] -synonym: "Shine Dalgarno sequence" EXACT [] -synonym: "Shine-Dalgarno sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Shine-Dalgarno_sequence "wiki" -is_a: SO:0000139 ! ribosome_entry_site - -[Term] -id: SO:0000553 -name: polyA_site -def: "The site on an RNA transcript to which will be added adenine residues by post-transcriptional polyadenylation." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "polyA site" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR -relationship: part_of SO:0000233 ! mature_transcript - -[Term] -id: SO:0000554 -name: assortment_derived_deficiency_plus_duplication -is_obsolete: true - -[Term] -id: SO:0000555 -name: five_prime_clip -def: "5' most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "5' clip" RELATED [] -synonym: "five prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000556 -name: five_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 5' D-nonamer (SO:0000497), 5' D-spacer (SO:0000498), and 5' D-heptamer (SO:0000396) in 5' of the D-region of a D-gene, or in 5' of the D-region of DJ-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "5'RS" EXACT [] -synonym: "five prime D recombination signal sequence" EXACT [] -synonym: "five prime D-recombination signal sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000557 -name: three_prime_clip -def: "3'-most region of a precursor transcript that is clipped off during processing." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "3'-clip" EXACT [] -synonym: "three prime clip" EXACT [] -is_a: SO:0000303 ! clip - -[Term] -id: SO:0000558 -name: C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene including more than one C-gene." [http://imgt.cines.fr/ligmb/LIGMlect?query=7] -synonym: "C cluster" EXACT [] -synonym: "C-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000478 ! C_gene - -[Term] -id: SO:0000559 -name: D_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including more than one D-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D cluster" EXACT [] -synonym: "D-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene - -[Term] -id: SO:0000560 -name: D_J_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in germline configuration including at least one D-gene and one J-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D J cluster" EXACT [] -synonym: "D-J-CLUSTER" EXACT [] -is_a: SO:0000482 ! vertebrate_immunoglobulin_T_cell_receptor_gene_cluster -relationship: has_part SO:0000458 ! D_gene -relationship: has_part SO:0000470 ! J_gene - -[Term] -id: SO:0000561 -name: heptamer_of_recombination_feature_of_vertebrate_immune_system_gene -def: "Seven nucleotide recombination site (e.g. CACAGTG), part of V-gene, D-gene or J-gene recombination feature of an immunoglobulin or T-cell receptor gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "HEPTAMER" RELATED [] -synonym: "heptamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000562 -name: nonamer_of_recombination_feature_of_vertebrate_immune_system_gene -synonym: "nonamer of recombination feature of vertebrate immune system gene" EXACT [] -is_a: SO:0000939 ! vertebrate_immune_system_gene_recombination_signal_feature - -[Term] -id: SO:0000563 -name: vertebrate_immune_system_gene_recombination_spacer -synonym: "vertebrate immune system gene recombination spacer" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000564 -name: V_DJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one DJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V DJ J C cluster" EXACT [] -synonym: "V-(DJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000572 ! DJ_gene - -[Term] -id: SO:0000565 -name: V_VDJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VDJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VDJ J C cluster" EXACT [] -synonym: "V-(VDJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000574 ! VDJ_gene - -[Term] -id: SO:0000566 -name: V_VJ_J_C_cluster -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in rearranged configuration including at least one V-gene, one VJ-gene, one J-gene and one C-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V VJ J C cluster" EXACT [] -synonym: "V-(VJ)-J-C-CLUSTER" EXACT [] -is_a: SO:0000938 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -relationship: has_part SO:0000466 ! V_gene -relationship: has_part SO:0000470 ! J_gene -relationship: has_part SO:0000478 ! C_gene -relationship: has_part SO:0000576 ! VJ_gene - -[Term] -id: SO:0000567 -name: inversion_derived_aneuploid_chromosome -def: "A chromosome may be generated by recombination between two inverversions; presumed to have a deficiency or duplication at each end of the inversion." [FB:km] -synonym: "inversion derived aneuploid chromosome" EXACT [] -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:0000568 -name: bidirectional_promoter -synonym: "bidirectional promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0000569 -name: retrotransposed -alt_id: SO:0100042 -def: "An attribute of a feature that occured as the product of a reverse transcriptase mediated event." [SO:ke] -comment: GO:0003964 RNA-directed DNA polymerase activity. -xref: http://en.wikipedia.org/wiki/Retrotransposed "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000570 -name: three_prime_D_recombination_signal_sequence -def: "Recombination signal of an immunoglobulin/T-cell receptor gene, including the 3' D-heptamer (SO:0000493), 3' D-spacer, and 3' D-nonamer (SO:0000494) in 3' of the D-region of a D-gene." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "3'D-RS" EXACT [] -synonym: "three prime D recombination signal sequence" EXACT [] -synonym: "three_prime_D-recombination_signal_sequence" EXACT [] -is_a: SO:0000492 ! D_gene_recombination_feature - -[Term] -id: SO:0000571 -name: miRNA_encoding -synonym: "miRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000572 -name: DJ_gene -def: "Genomic DNA of immunoglobulin/T-cell receptor gene in partially rearranged genomic DNA including D-J-region with 5' UTR and 3' UTR, also designated as D-J-segment." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "D-J-GENE" EXACT [] -synonym: "DJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000573 -name: rRNA_encoding -synonym: "rRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000574 -name: VDJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-D-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-D-J-GENE" EXACT [] -synonym: "VDJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000575 -name: scRNA_encoding -synonym: "scRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000576 -name: VJ_gene -def: "Rearranged genomic DNA of immunoglobulin/T-cell receptor gene including L-part1, V-intron and V-J-exon, with the 5'UTR (SO:0000204) and 3'UTR (SO:0000205)." [http://imgt.cines.fr/cgi-bin/IMGTlect.jv?query=7#] -synonym: "V-J-GENE" EXACT [] -synonym: "VJ gene" EXACT [] -is_a: SO:0000936 ! vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment - -[Term] -id: SO:0000577 -name: centromere -def: "A region of chromosome where the spindle fibers attach during mitosis and meiosis." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Centromere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000578 -name: snoRNA_encoding -synonym: "snoRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000579 -name: edited_transcript_feature -def: "A locatable feature on a transcript that is edited." [SO:ma] -synonym: "edited transcript feature" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000580 -name: methylation_guide_snoRNA_primary_transcript -def: "A primary transcript encoding a methylation guide small nucleolar RNA." [SO:ke] -synonym: "methylation guide snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000581 -name: cap -def: "A structure consisting of a 7-methylguanosine in 5'-5' triphosphate linkage with the first nucleotide of an mRNA. It is added post-transcriptionally, and is not encoded in the DNA." [http://seqcore.brcf.med.umich.edu/doc/educ/dnapr/mbglossary/mbgloss.html] -subset: SOFA -xref: http://en.wikipedia.org/wiki/5%27_cap "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000582 -name: rRNA_cleavage_snoRNA_primary_transcript -def: "A primary transcript encoding an rRNA cleavage snoRNA." [SO:ke] -synonym: "rRNA cleavage snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000583 -name: pre_edited_region -def: "The region of a transcript that will be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "pre edited region" EXACT [] -synonym: "pre-edited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000584 -name: tmRNA -def: "A tmRNA liberates a mRNA from a stalled ribosome. To accomplish this part of the tmRNA is used as a reading frame that ends in a translation stop signal. The broken mRNA is replaced in the ribosome by the tmRNA and translation of the tmRNA leads to addition of a proteolysis tag to the incomplete protein enabling recognition by a protease. Recently a number of permuted tmRNAs genes have been found encoded in two parts. TmRNAs have been identified in eubacteria and some chloroplasts but are absent from archeal and eukaryote nuclear genomes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00023] -synonym: "10Sa RNA" RELATED [] -synonym: "ssrA" RELATED [] -xref: http://en.wikipedia.org/wiki/TmRNA "wiki" -is_a: SO:0000370 ! small_regulatory_ncRNA - -[Term] -id: SO:0000585 -name: C_D_box_snoRNA_encoding -synonym: "C/D box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000586 -name: tmRNA_primary_transcript -def: "A primary transcript encoding a tmRNA (SO:0000584)." [SO:ke] -synonym: "10Sa RNA primary transcript" RELATED [] -synonym: "ssrA RNA primary transcript" RELATED [] -synonym: "tmRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000587 -name: group_I_intron -def: "Group I catalytic introns are large self-splicing ribozymes. They catalyse their own excision from mRNA, tRNA and rRNA precursors in a wide range of organisms. The core secondary structure consists of 9 paired regions (P1-P9). These fold to essentially two domains, the P4-P6 domain (formed from the stacking of P5, P4, P6 and P6a helices) and the P3-P9 domain (formed from the P8, P3, P7 and P9 helices). Group I catalytic introns often have long ORFs inserted in loop regions." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00028] -comment: GO:0000372. -subset: SOFA -synonym: "group I intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_I_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000588 -name: autocatalytically_spliced_intron -def: "A self spliced intron." [SO:ke] -subset: SOFA -synonym: "autocatalytically spliced intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001186 ! ribozymic - -[Term] -id: SO:0000589 -name: SRP_RNA_primary_transcript -def: "A primary transcript encoding a signal recognition particle RNA." [SO:ke] -synonym: "SRP RNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000590 -name: SRP_RNA -def: "The signal recognition particle (SRP) is a universally conserved ribonucleoprotein. It is involved in the co-translational targeting of proteins to membranes. The eukaryotic SRP consists of a 300-nucleotide 7S RNA and six proteins: SRPs 72, 68, 54, 19, 14, and 9. Archaeal SRP consists of a 7S RNA and homologues of the eukaryotic SRP19 and SRP54 proteins. In most eubacteria, the SRP consists of a 4.5S RNA and the Ffh protein (a homologue of the eukaryotic SRP54 protein). Eukaryotic and archaeal 7S RNAs have very similar secondary structures, with eight helical elements. These fold into the Alu and S domains, separated by a long linker region. Eubacterial SRP is generally a simpler structure, with the M domain of Ffh bound to a region of the 4.5S RNA that corresponds to helix 8 of the eukaryotic and archaeal SRP S domain. Some Gram-positive bacteria (e.g. Bacillus subtilis), however, have a larger SRP RNA that also has an Alu domain. The Alu domain is thought to mediate the peptide chain elongation retardation function of the SRP. The universally conserved helix which interacts with the SRP54/Ffh M domain mediates signal sequence recognition. In eukaryotes and archaea, the SRP19-helix 6 complex is thought to be involved in SRP assembly and stabilizes helix 8 for SRP54 binding." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00017] -subset: SOFA -synonym: "7S RNA" RELATED [] -synonym: "signal recognition particle RNA" RELATED [] -synonym: "SRP RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000589 ! SRP_RNA_primary_transcript - -[Term] -id: SO:0000591 -name: pseudoknot -def: "A tertiary structure in RNA where nucleotides in a loop form base pairs with a region of RNA downstream of the loop." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Pseudoknot "wiki" -is_a: SO:0000002 ! sequence_secondary_structure - -[Term] -id: SO:0000592 -name: H_pseudoknot -def: "A pseudoknot which contains two stems and at least two loops." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=10334330&dopt=Abstract] -synonym: "classical pseudoknot" EXACT [] -synonym: "H pseudoknot" EXACT [] -synonym: "H-pseudoknot" EXACT [] -synonym: "H-type pseudoknot" EXACT [] -synonym: "hairpin-type pseudoknot" EXACT [] -is_a: SO:0000591 ! pseudoknot - -[Term] -id: SO:0000593 -name: C_D_box_snoRNA -def: "Most box C/D snoRNAs also contain long (>10 nt) sequences complementary to rRNA. Boxes C and D, as well as boxes C' and D', are usually located in close proximity, and form a structure known as the box C/D motif. This motif is important for snoRNA stability, processing, nucleolar targeting and function. A small number of box C/D snoRNAs are involved in rRNA processing; most, however, are known or predicted to serve as guide RNAs in ribose methylation of rRNA. Targeting involves direct base pairing of the snoRNA at the rRNA site to be modified and selection of a rRNA nucleotide a fixed distance from box D or D'." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box C/D snoRNA" EXACT [] -synonym: "C D box snoRNA" EXACT [] -synonym: "C/D box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000595 ! C_D_box_snoRNA_primary_transcript - -[Term] -id: SO:0000594 -name: H_ACA_box_snoRNA -def: "Members of the box H/ACA family contain an ACA triplet, exactly 3 nt upstream from the 3' end and an H-box in a hinge region that links two structurally similar functional domains of the molecule. Both boxes are important for snoRNA biosynthesis and function. A few box H/ACA snoRNAs are involved in rRNA processing; most others are known or predicted to participate in selection of uridine nucleosides in rRNA to be converted to pseudouridines. Site selection is mediated by direct base pairing of the snoRNA with rRNA through one or both targeting domains." [http://www.bio.umass.edu/biochem/rna-sequence/Yeast_snoRNA_Database/snoRNA_DataBase.html] -synonym: "box H/ACA snoRNA" EXACT [] -synonym: "H ACA box snoRNA" EXACT [] -synonym: "H/ACA box snoRNA" EXACT [] -is_a: SO:0000275 ! snoRNA -relationship: derives_from SO:0000596 ! H_ACA_box_snoRNA_primary_transcript - -[Term] -id: SO:0000595 -name: C_D_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box C/D family." [SO:ke] -synonym: "C/D box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000596 -name: H_ACA_box_snoRNA_primary_transcript -def: "A primary transcript encoding a small nucleolar RNA of the box H/ACA family." [SO:ke] -synonym: "H ACA box snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0000597 -name: transcript_edited_by_U_insertion/deletion -def: "The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa." [http://www.rna.ucla.edu/index.html] -is_obsolete: true - -[Term] -id: SO:0000598 -name: edited_by_C_insertion_and_dinucleotide_insertion -synonym: "transcript_edited_by_C-insertion_and_dinucleotide_insertion" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000599 -name: edited_by_C_to_U_substitution -is_obsolete: true - -[Term] -id: SO:0000600 -name: edited_by_A_to_I_substitution -is_obsolete: true - -[Term] -id: SO:0000601 -name: edited_by_G_addition -is_obsolete: true - -[Term] -id: SO:0000602 -name: guide_RNA -def: "A short 3'-uridylated RNA that can form a duplex (except for its post-transcriptionally added oligo_U tail (SO:0000609)) with a stretch of mature edited mRNA." [http://www.rna.ucla.edu/index.html] -subset: SOFA -synonym: "gRNA" EXACT [] -synonym: "guide RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Guide_RNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000603 -name: group_II_intron -def: "Group II introns are found in rRNA, tRNA and mRNA of organelles in fungi, plants and protists, and also in mRNA in bacteria. They are large self-splicing ribozymes and have 6 structural domains (usually designated dI to dVI). A subset of group II introns also encode essential splicing proteins in intronic ORFs. The length of these introns can therefore be up to 3kb. Splicing occurs in almost identical fashion to nuclear pre-mRNA splicing with two transesterification steps. The 2' hydroxyl of a bulged adenosine in domain VI attacks the 5' splice site, followed by nucleophilic attack on the 3' splice site by the 3' OH of the upstream exon. Protein machinery is required for splicing in vivo, and long range intron-intron and intron-exon interactions are important for splice site positioning. Group II introns are further sub-classified into groups IIA and IIB which differ in splice site consensus, distance of bulged A from 3' splice site, some tertiary interactions, and intronic ORF phylogeny." [http://www.sanger.ac.uk/Software/Rfam/browse/index.shtml] -comment: GO:0000373. -subset: SOFA -synonym: "group II intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_II_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0000604 -name: editing_block -def: "Edited mRNA sequence mediated by a single guide RNA (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing block" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000605 -name: intergenic_region -def: "A region containing or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the chromosome." [SO:cjm] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -subset: SOFA -synonym: "intergenic region" EXACT [] -xref: http://en.wikipedia.org/wiki/Intergenic_region "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000606 -name: editing_domain -def: "Edited mRNA sequence mediated by two or more overlapping guide RNAs (SO:0000602)." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "editing domain" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000607 -name: unedited_region -def: "The region of an edited transcript that will not be edited." [http://dna.kdna.ucla.edu/rna/index.aspx] -synonym: "unedited region" EXACT [] -is_a: SO:0000579 ! edited_transcript_feature - -[Term] -id: SO:0000608 -name: H_ACA_box_snoRNA_encoding -synonym: "H ACA box snoRNA encoding" EXACT [] -is_a: SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0000609 -name: oligo_U_tail -def: "The string of non-encoded U's at the 3' end of a guide RNA (SO:0000602)." [http://www.rna.ucla.edu/] -synonym: "oligo U tail" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000602 ! guide_RNA - -[Term] -id: SO:0000610 -name: polyA_sequence -def: "Sequence of about 100 nucleotides of A added to the 3' end of most eukaryotic mRNAs." [SO:ke] -subset: SOFA -synonym: "polyA sequence" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: adjacent_to SO:0000234 ! mRNA - -[Term] -id: SO:0000611 -name: branch_site -def: "A pyrimidine rich sequence near the 3' end of an intron to which the 5'end becomes covalently bound during nuclear splicing. The resulting structure resembles a lariat." [SO:ke] -subset: SOFA -synonym: "branch point" EXACT [] -synonym: "branch site" EXACT [] -synonym: "branch_point" EXACT [] -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000612 -name: polypyrimidine_tract -def: "The polypyrimidine tract is one of the cis-acting sequence elements directing intron removal in pre-mRNA splicing." [http://nar.oupjournals.org/cgi/content/full/25/4/888] -subset: SOFA -synonym: "polypyrimidine tract" EXACT [] -xref: http://en.wikipedia.org/wiki/Polypyrimidine_tract "wiki" -is_a: SO:0000841 ! spliceosomal_intron_region - -[Term] -id: SO:0000613 -name: bacterial_RNApol_promoter -def: "A DNA sequence to which bacterial RNA polymerase binds, to begin transcription." [SO:ke] -synonym: "bacterial RNApol promoter" EXACT [] -is_a: SO:0000752 ! gene_group_regulatory_region -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0000614 -name: bacterial_terminator -def: "A terminator signal for bacterial transcription." [SO:ke] -synonym: "bacterial terminator" EXACT [] -is_a: SO:0000141 ! terminator -is_a: SO:0000752 ! gene_group_regulatory_region - -[Term] -id: SO:0000615 -name: terminator_of_type_2_RNApol_III_promoter -def: "A terminator signal for RNA polymerase III transcription." [SO:ke] -synonym: "terminator of type 2 RNApol III promoter" EXACT [] -is_a: SO:0000951 ! eukaryotic_terminator - -[Term] -id: SO:0000616 -name: transcription_end_site -def: "The base where transcription ends." [SO:ke] -subset: SOFA -synonym: "transcription end site" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0000617 -name: RNApol_III_promoter_type_1 -synonym: "RNApol III promoter type 1" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000618 -name: RNApol_III_promoter_type_2 -synonym: "RNApol III promoter type 2" EXACT [] -synonym: "tRNA promoter" RELATED [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000619 -name: A_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence TGGCnnAGTGG." [SO:ke] -synonym: "A-box" EXACT [] -xref: http://en.wikipedia.org/wiki/A-box "wiki" -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000620 -name: B_box -def: "A variably distant linear promoter region recognised by TFIIIC, with consensus sequence AGGTTCCAnnCC." [SO:ke] -synonym: "B-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000618 ! RNApol_III_promoter_type_2 - -[Term] -id: SO:0000621 -name: RNApol_III_promoter_type_3 -synonym: "RNApol III promoter type 3" EXACT [] -is_a: SO:0000171 ! RNApol_III_promoter - -[Term] -id: SO:0000622 -name: C_box -def: "An RNA polymerase III type 1 promoter with consensus sequence CAnnCCn." [SO:ke] -synonym: "C-box" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000617 ! RNApol_III_promoter_type_1 - -[Term] -id: SO:0000623 -name: snRNA_encoding -synonym: "snRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000624 -name: telomere -def: "A specific structure at the end of a linear chromosome, required for the integrity and maintenance of the end." [SO:ma] -subset: SOFA -synonym: "telomeric DNA" EXACT [] -synonym: "telomeric sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Telomere "wiki" -is_a: SO:0000628 ! chromosomal_structural_element - -[Term] -id: SO:0000625 -name: silencer -def: "A regulatory region which upon binding of transcription factors, suppress the transcription of the gene or genes they control." [SO:ke] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Silencer_(DNA) "wiki" -is_a: SO:0000727 ! CRM - -[Term] -id: SO:0000626 -name: chromosomal_regulatory_element -synonym: "chromosomal regulatory element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000627 -name: insulator -def: "A trancriptional cis regulatory region that when located between a CM and a gene's promoter prevents the CRM from modulating that genes expression." [SO:regcreative] -subset: SOFA -synonym: "insulator element" EXACT [] -xref: http://en.wikipedia.org/wiki/Insulator_(genetics) "wiki" -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0000628 -name: chromosomal_structural_element -subset: SOFA -synonym: "chromosomal structural element" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000629 -name: five_prime_open_reading_frame -synonym: "five prime open reading frame" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000204 ! five_prime_UTR - -[Term] -id: SO:0000630 -name: upstream_AUG_codon -def: "A start codon upstream of the ORF." [SO:ke] -synonym: "upstream AUG codon" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0000631 -name: polycistronic_primary_transcript -def: "A primary transcript encoding for more than one gene product." [SO:ke] -synonym: "polycistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000632 -name: monocistronic_primary_transcript -def: "A primary transcript encoding for one gene product." [SO:ke] -synonym: "monocistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000633 -name: monocistronic_mRNA -def: "An mRNA with either a single protein product, or for which the regions encoding all its protein products overlap." [SO:rd] -synonym: "monocistronic mRNA" EXACT [] -synonym: "monocistronic processed transcript" EXACT [] -xref: http://en.wikipedia.org/wiki/Monocistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000634 -name: polycistronic_mRNA -def: "An mRNA that encodes multiple proteins from at least two non-overlapping regions." [SO:rd] -synonym: "polycistronic mRNA" EXACT [] -synonym: "polycistronic processed transcript" RELATED [] -xref: http://en.wikipedia.org/wiki/Polycistronic_mRNA "wiki" -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000880 ! polycistronic - -[Term] -id: SO:0000635 -name: mini_exon_donor_RNA -def: "A primary transcript that donates the spliced leader to other mRNA." [SO:ke] -synonym: "mini exon donor RNA" EXACT [] -synonym: "mini-exon donor RNA" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000636 -name: spliced_leader_RNA -synonym: "mini-exon" RELATED [] -synonym: "spliced leader RNA" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000635 ! mini_exon_donor_RNA - -[Term] -id: SO:0000637 -name: engineered_plasmid -def: "A plasmid that is engineered." [SO:xp] -synonym: "engineered plasmid" EXACT [] -synonym: "engineered plasmid gene" RELATED [] -intersection_of: SO:0000155 ! plasmid -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000638 -name: transcribed_spacer_region -def: "Part of an rRNA transcription unit that is transcribed but discarded during maturation, not giving rise to any part of rRNA." [http://oregonstate.edu/instruction/bb492/general/glossary.html] -synonym: "transcribed spacer region" EXACT [] -is_a: SO:0000838 ! rRNA_primary_transcript_region - -[Term] -id: SO:0000639 -name: internal_transcribed_spacer_region -def: "Non-coding regions of DNA sequence that separate genes coding for the 28S, 5.8S, and 18S ribosomal RNAs." [SO:ke] -synonym: "internal transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000640 -name: external_transcribed_spacer_region -def: "Non-coding regions of DNA that precede the sequence that codes for the ribosomal RNA." [SO:ke] -synonym: "external transcribed spacer region" EXACT [] -is_a: SO:0000638 ! transcribed_spacer_region - -[Term] -id: SO:0000641 -name: tetranucleotide_repeat_microsatellite_feature -synonym: "tetranucleotide repeat microsatellite feature" EXACT [] -is_a: SO:0000289 ! microsatellite - -[Term] -id: SO:0000642 -name: SRP_RNA_encoding -synonym: "SRP RNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000643 -name: minisatellite -def: "A repeat region containing tandemly repeated sequences having a unit length of 10 to 40 bp." [http://www.informatics.jax.org/silver/glossary.shtml] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Minisatellite "wiki" -is_a: SO:0000005 ! satellite_DNA - -[Term] -id: SO:0000644 -name: antisense_RNA -def: "Antisense RNA is RNA that is transcribed from the coding, rather than the template, strand of DNA. It is therefore complementary to mRNA." [SO:ke] -subset: SOFA -synonym: "antisense RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Antisense_RNA "wiki" -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000645 ! antisense_primary_transcript - -[Term] -id: SO:0000645 -name: antisense_primary_transcript -def: "The reverse complement of the primary transcript." [SO:ke] -subset: SOFA -synonym: "antisense primary transcript" EXACT [] -is_a: SO:0000185 ! primary_transcript - -[Term] -id: SO:0000646 -name: siRNA -def: "A small RNA molecule that is the product of a longer exogenous or endogenous dsRNA, which is either a bimolecular duplex or very long hairpin, processed (via the Dicer pathway) such that numerous siRNAs accumulate from both strands of the dsRNA. SRNAs trigger the cleavage of their target molecules." [PMID:12592000] -subset: SOFA -synonym: "small interfering RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/SiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000647 -name: miRNA_primary_transcript -def: "A primary transcript encoding a micro RNA." [SO:ke] -synonym: "micro RNA primary transcript" EXACT [] -synonym: "miRNA primary transcript" EXACT [] -is_a: SO:0000483 ! nc_primary_transcript - -[Term] -id: SO:0000648 -name: stRNA_primary_transcript -def: "A primary transcript encoding a small temporal mRNA (SO:0000649)." [SO:ke] -synonym: "small temporal RNA primary transcript" EXACT [] -synonym: "stRNA primary transcript" EXACT [] -is_a: SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0000649 -name: stRNA -def: "Non-coding RNAs of about 21 nucleotides in length that regulate temporal development; first discovered in C. elegans." [PMID:11081512] -subset: SOFA -synonym: "small temporal RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/StRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000650 -name: small_subunit_rRNA -def: "Ribosomal RNA transcript that structures the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "small subunit rRNA" EXACT [] -synonym: "SSU RNA" EXACT [RSC:cb] -synonym: "SSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000255 ! rRNA_small_subunit_primary_transcript - -[Term] -id: SO:0000651 -name: large_subunit_rRNA -def: "Ribosomal RNA transcript that structures the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "large subunit rRNA" EXACT [] -synonym: "LSU RNA" EXACT [RSC:cb] -synonym: "LSU rRNA" EXACT [RSC:cb] -is_a: SO:0000252 ! rRNA -relationship: derives_from SO:0000325 ! rRNA_large_subunit_primary_transcript - -[Term] -id: SO:0000652 -name: rRNA_5S -def: "5S ribosomal RNA (5S rRNA) is a component of the large ribosomal subunit in both prokaryotes and eukaryotes. In eukaryotes, it is synthesised by RNA polymerase III (the other eukaryotic rRNAs are cleaved from a 45S precursor synthesised by RNA polymerase I). In Xenopus oocytes, it has been shown that fingers 4-7 of the nine-zinc finger transcription factor TFIIIA can bind to the central region of 5S RNA. Thus, in addition to positively regulating 5S rRNA transcription, TFIIIA also stabilises 5S rRNA until it is required for transcription." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00001] -subset: SOFA -synonym: "5S LSU rRNA" EXACT [] -synonym: "5S ribosomal RNA" EXACT [] -synonym: "5S rRNA" EXACT [] -synonym: "rRNA 5S" EXACT [] -xref: http://en.wikipedia.org/wiki/5S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000653 -name: rRNA_28S -def: "A component of the large ribosomal subunit." [SO:ke] -subset: SOFA -synonym: "28S LSU rRNA" EXACT [] -synonym: "28S ribosomal RNA" EXACT [] -synonym: "28S rRNA" EXACT [] -synonym: "rRNA 28S" EXACT [] -xref: http://en.wikipedia.org/wiki/28S_ribosomal_RNA "wiki" -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0000654 -name: maxicircle_gene -def: "A mitochondrial gene located in a maxicircle." [SO:xp] -synonym: "maxi-circle gene" EXACT [] -synonym: "maxicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000742 ! maxicircle - -[Term] -id: SO:0000655 -name: ncRNA -def: "An RNA transcript that does not encode for a protein rather the RNA molecule is the gene product." [SO:ke] -comment: A ncRNA is a processed_transcript, so it may not contain parts such as transcribed_spacer_regions that are removed in the act of processing. For the corresponding primary_transcripts, please see term SO:0000483 nc_primary_transcript. -subset: SOFA -synonym: "noncoding RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/NcRNA "wiki" -is_a: SO:0000233 ! mature_transcript - -[Term] -id: SO:0000656 -name: stRNA_encoding -synonym: "stRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000657 -name: repeat_region -def: "A region of sequence containing one or more repeat units." [SO:ke] -subset: SOFA -synonym: "repeat region" EXACT [] -is_a: SO:0001412 ! topologically_defined_region -relationship: has_part SO:0000726 ! repeat_unit - -[Term] -id: SO:0000658 -name: dispersed_repeat -def: "A repeat that is located at dispersed sites in the genome." [SO:ke] -subset: SOFA -synonym: "dispersed repeat" EXACT [] -synonym: "interspersed repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Interspersed_repeat "wiki" -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000659 -name: tmRNA_encoding -synonym: "tmRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000660 -name: DNA_invertase_target_sequence -is_obsolete: true - -[Term] -id: SO:0000661 -name: intron_attribute -is_obsolete: true - -[Term] -id: SO:0000662 -name: spliceosomal_intron -def: "An intron which is spliced by the spliceosome." [SO:ke] -comment: GO:0000398. -subset: SOFA -synonym: "spliceosomal intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0000663 -name: tRNA_encoding -synonym: "tRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000664 -name: introgressed_chromosome_region -synonym: "introgressed chromosome region" EXACT [] -is_a: SO:0000830 ! chromosome_part - -[Term] -id: SO:0000665 -name: monocistronic_transcript -def: "A transcript that is monocistronic." [SO:xp] -synonym: "monocistronic transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000878 ! monocistronic - -[Term] -id: SO:0000666 -name: mobile_intron -def: "An intron (mitochondrial, chloroplast, nuclear or prokaryotic) that encodes a double strand sequence specific endonuclease allowing for mobility." [SO:ke] -synonym: "mobile intron" EXACT [] -intersection_of: SO:0000188 ! intron -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0000667 -name: insertion -def: "A region of sequence that has been inserted." [SO:ke] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000668 -name: EST_match -def: "A match against an EST sequence." [SO:ke] -subset: SOFA -synonym: "EST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000669 -name: sequence_rearrangement_feature -synonym: "sequence rearrangement feature" EXACT [] -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000670 -name: chromosome_breakage_sequence -def: "A sequence within the micronuclear DNA of ciliates at which chromosome breakage and telomere addition occurs during nuclear differentiation." [SO:ma] -synonym: "chromosome breakage sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000671 -name: internal_eliminated_sequence -def: "A sequence eliminated from the genome of ciliates during nuclear differentiation." [SO:ma] -synonym: "internal eliminated sequence" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000672 -name: macronucleus_destined_segment -def: "A sequence that is conserved, although rearranged relative to the micronucleus, in the macronucleus of a ciliate genome." [SO:ma] -synonym: "macronucleus destined segment" EXACT [] -is_a: SO:0000669 ! sequence_rearrangement_feature - -[Term] -id: SO:0000673 -name: transcript -def: "An RNA synthesized on a DNA or RNA template by an RNA polymerase." [SO:ma] -subset: SOFA -xref: http://en.wikipedia.org/wiki/RNA "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0000674 -name: non_canonical_splice_site -def: "A splice site where the donor and acceptor sites differ from the canonical form." [SO:ke] -synonym: "non canonical splice site" EXACT [] -synonym: "non-canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000678 -consider: SO:0000679 - -[Term] -id: SO:0000675 -name: canonical_splice_site -def: "The major class of splice site with dinucleotides GT and AG for donor and acceptor sites, respectively." [SO:ke] -synonym: "canonical splice site" EXACT [] -is_obsolete: true -consider: SO:0000676 -consider: SO:0000677 - -[Term] -id: SO:0000676 -name: canonical_three_prime_splice_site -def: "The canonical 3' splice site has the sequence \"AG\"." [SO:ke] -synonym: "canonical 3' splice site" EXACT [] -synonym: "canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000677 -name: canonical_five_prime_splice_site -def: "The canonical 5' splice site has the sequence \"GT\"." [SO:ke] -synonym: "canonical 5' splice site" EXACT [] -synonym: "canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000678 -name: non_canonical_three_prime_splice_site -def: "A 3' splice site that does not have the sequence \"AG\"." [SO:ke] -synonym: "non canonical 3' splice site" RELATED [] -synonym: "non canonical three prime splice site" EXACT [] -synonym: "non-canonical three prime splice site" EXACT [] -is_a: SO:0000164 ! three_prime_cis_splice_site - -[Term] -id: SO:0000679 -name: non_canonical_five_prime_splice_site -def: "A 5' splice site which does not have the sequence \"GT\"." [SO:ke] -synonym: "non canonical 5' splice site" EXACT [] -synonym: "non canonical five prime splice site" EXACT [] -synonym: "non-canonical five prime splice site" EXACT [] -is_a: SO:0000163 ! five_prime_cis_splice_site - -[Term] -id: SO:0000680 -name: non_canonical_start_codon -def: "A start codon that is not the usual AUG sequence." [SO:ke] -synonym: "non ATG start codon" EXACT [] -synonym: "non canonical start codon" EXACT [] -synonym: "non-canonical start codon" EXACT [] -is_a: SO:0000318 ! start_codon - -[Term] -id: SO:0000681 -name: aberrant_processed_transcript -def: "A transcript that has been processed \"incorrectly\", for example by the failure of splicing of one or more exons." [SO:ke] -synonym: "aberrant processed transcript" EXACT [] -is_a: SO:0000673 ! transcript - -[Term] -id: SO:0000682 -name: splicing_feature -is_obsolete: true - -[Term] -id: SO:0000683 -name: exonic_splice_enhancer -def: "Exonic splicing enhancers (ESEs) facilitate exon definition by assisting in the recruitment of splicing factors to the adjacent intron." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12403462&dopt=Abstract] -synonym: "exonic splice enhancer" EXACT [] -is_a: SO:0000344 ! splice_enhancer - -[Term] -id: SO:0000684 -name: nuclease_sensitive_site -def: "A region of nucleotide sequence targeted by a nuclease enzyme." [SO:ma] -subset: SOFA -synonym: "nuclease sensitive site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0000685 -name: DNAseI_hypersensitive_site -synonym: "DHS" EXACT [] -synonym: "DNAseI hypersensitive site" EXACT [] -is_a: SO:0000322 ! nuclease_hypersensitive_site - -[Term] -id: SO:0000686 -name: translocation_element -def: "For some translocations, particularly but not exclusively, reciprocal translocations, the chromosomes carrying non-homologous centromeres may be recovered independently. These chromosomes are described as translocation elements." [SO:ma] -synonym: "translocation element" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:0000687 -name: deletion_junction -def: "The space between two bases in a sequence which marks the position where a deletion has occurred." [SO:ke] -subset: SOFA -synonym: "deletion junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0000688 -name: golden_path -def: "A set of subregions selected from sequence contigs which when concatenated form a nonredundant linear sequence." [SO:ls] -subset: SOFA -synonym: "golden path" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000689 -name: cDNA_match -def: "A match against cDNA sequence." [SO:ke] -subset: SOFA -synonym: "cDNA match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0000690 -name: gene_with_polycistronic_transcript -def: "A gene that encodes a polycistronic transcript." [SO:xp] -synonym: "gene with polycistronic transcript" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: transcribed_to SO:0000078 ! polycistronic_transcript - -[Term] -id: SO:0000691 -name: cleaved_initiator_methionine -alt_id: BS:00067 -def: "The initiator methionine that has been cleaved from a mature polypeptide sequence." [EBIBS:GAR] -subset: biosapiens -synonym: "cleaved initiator methionine" EXACT [] -synonym: "init_met" RELATED [uniprot:feature_type] -synonym: "initiator methionine" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000692 -name: gene_with_dicistronic_transcript -def: "A gene that encodes a dicistronic transcript." [SO:xp] -synonym: "gene with dicistronic transcript" EXACT [] -intersection_of: SO:0000690 ! gene_with_polycistronic_transcript -intersection_of: transcribed_to SO:0000079 ! dicistronic_transcript - -[Term] -id: SO:0000693 -name: gene_with_recoded_mRNA -def: "A gene that encodes an mRNA that is recoded." [SO:xp] -synonym: "gene with recoded mRNA" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:0000694 -name: SNP -def: "SNPs are single base pair positions in genomic DNA at which different sequence alternatives (alleles) exist in normal individuals in some population(s), wherein the least frequent allele has an abundance of 1% or greater." [SO:cb] -subset: SOFA -synonym: "single nucleotide polymorphism" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:0000695 -name: reagent -def: "A sequence used in experiment." [SO:ke] -comment: Requested by Lynn Crosby, jan 2006. -subset: SOFA -is_a: SO:0001409 ! biomaterial_region - -[Term] -id: SO:0000696 -name: oligo -def: "A short oligonucleotide sequence, of length on the order of 10's of bases; either single or double stranded." [SO:ma] -subset: SOFA -synonym: "oligonucleotide" EXACT [] -xref: http://en.wikipedia.org/wiki/Oligonucleotide "wiki" -is_a: SO:0000695 ! reagent - -[Term] -id: SO:0000697 -name: gene_with_stop_codon_read_through -def: "A gene that encodes a transcript with stop codon readthrough." [SO:xp] -synonym: "gene with stop codon read through" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000698 -name: gene_with_stop_codon_redefined_as_pyrrolysine -def: "A gene encoding an mRNA that has the stop codon redefined as pyrrolysine." [SO:xp] -synonym: "gene with stop codon redefined as pyrrolysine" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_part SO:0000884 ! stop_codon_redefined_as_pyrrolysine - -[Term] -id: SO:0000699 -name: junction -def: "A sequence_feature with an extent of zero." [SO:ke] -comment: A junction is a boundary between regions. A boundary has an extent of zero. -subset: SOFA -synonym: "boundary" EXACT [] -is_a: SO:0000110 ! sequence_feature -disjoint_from: SO:0000001 ! region - -[Term] -id: SO:0000700 -name: remark -def: "A comment about the sequence." [SO:ke] -subset: SOFA -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000701 -name: possible_base_call_error -def: "A region of sequence where the validity of the base calling is questionable." [SO:ke] -subset: SOFA -synonym: "possible base call error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000702 -name: possible_assembly_error -def: "A region of sequence where there may have been an error in the assembly." [SO:ke] -subset: SOFA -synonym: "possible assembly error" EXACT [] -is_a: SO:0000413 ! sequence_difference - -[Term] -id: SO:0000703 -name: experimental_result_region -def: "A region of sequence implicated in an experimental result." [SO:ke] -subset: SOFA -synonym: "experimental result region" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0000704 -name: gene -def: "A region (or regions) that includes all of the sequence elements necessary to encode a functional transcript. A gene may include regulatory regions, transcribed regions and/or other functional sequence regions." [SO:immuno_workshop] -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. A gene may be considered as a unit of inheritance. -subset: SOFA -xref: http://en.wikipedia.org/wiki/Gene "wiki" -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000705 -name: tandem_repeat -def: "Two or more adjcent copies of a region (of length greater than 1)." [SO:ke] -subset: SOFA -synonym: "tandem repeat" EXACT [] -xref: http://en.wikipedia.org/wiki/Tandem_repeat "wiki" -xref: http://www.sci.sdsu.edu/~smaloy/Glossary/T.html -is_a: SO:0000657 ! repeat_region - -[Term] -id: SO:0000706 -name: trans_splice_acceptor_site -def: "The 3' splice site of the acceptor primary transcript." [SO:ke] -comment: This region contains a polypyridine tract and AG dinucleotide in some organisms and is UUUCAG in C. elegans. -subset: SOFA -synonym: "3' trans splice site" RELATED [] -synonym: "trans splice acceptor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000707 -name: trans_splice_donor_site -def: "The 5' five prime splice site region of the donor RNA." [SO:ke] -comment: SL RNA contains a donor site. -synonym: "5 prime trans splice site" RELATED [] -synonym: "trans splice donor site" EXACT [] -synonym: "trans-splice donor site" EXACT [] -is_a: SO:0001420 ! trans_splice_site - -[Term] -id: SO:0000708 -name: SL1_acceptor_site -synonym: "SL1 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000709 -name: SL2_acceptor_site -synonym: "SL2 acceptor site" EXACT [] -is_a: SO:0000706 ! trans_splice_acceptor_site - -[Term] -id: SO:0000710 -name: gene_with_stop_codon_redefined_as_selenocysteine -def: "A gene encoding an mRNA that has the stop codon redefined as selenocysteine." [SO:xp] -synonym: "gene with stop codon redefined as selenocysteine" EXACT [] -intersection_of: SO:0000697 ! gene_with_stop_codon_read_through -intersection_of: has_part SO:0000885 ! stop_codon_redefined_as_selenocysteine - -[Term] -id: SO:0000711 -name: gene_with_mRNA_recoded_by_translational_bypass -def: "A gene with mRNA recoded by translational bypass." [SO:xp] -synonym: "gene with mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0001217 ! protein_coding_gene -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:0000712 -name: gene_with_transcript_with_translational_frameshift -def: "A gene encoding a transcript that has a translational frameshift." [SO:xp] -synonym: "gene with transcript with translational frameshift" EXACT [] -intersection_of: SO:0000693 ! gene_with_recoded_mRNA -intersection_of: has_quality SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:0000713 -name: DNA_motif -def: "A motif that is active in the DNA form of the sequence." [SO:ke] -synonym: "DNA motif" EXACT [] -xref: http://en.wikipedia.org/wiki/DNA_motif "wiki" -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000714 -name: nucleotide_motif -def: "A region of nucleotide sequence corresponding to a known motif." [SO:ke] -subset: SOFA -synonym: "nucleotide motif" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0000715 -name: RNA_motif -def: "A motif that is active in RNA sequence." [SO:ke] -synonym: "RNA motif" EXACT [] -is_a: SO:0000714 ! nucleotide_motif - -[Term] -id: SO:0000716 -name: dicistronic_mRNA -def: "An mRNA that has the quality dicistronic." [SO:ke] -synonym: "dicistronic mRNA" EXACT [] -synonym: "dicistronic processed transcript" RELATED [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:0000717 -name: reading_frame -def: "A nucleic acid sequence that when read as sequential triplets, has the potential of encoding a sequential string of amino acids. It need not contain the start or stop codon." [SGD:rb] -comment: This term was added after a request by SGD. August 2004. Modified after SO meeting in Cambridge to not include start or stop. -subset: SOFA -synonym: "reading frame" EXACT [] -xref: http://en.wikipedia.org/wiki/Reading_frame "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000718 -name: blocked_reading_frame -def: "A reading_frame that is interrupted by one or more stop codons; usually identified through intergenomic sequence comparisons." [SGD:rb] -comment: Term requested by Rama from SGD. -synonym: "blocked reading frame" EXACT [] -is_a: SO:0000717 ! reading_frame - -[Term] -id: SO:0000719 -name: ultracontig -def: "An ordered and oriented set of scaffolds based on somewhat weaker sets of inferential evidence such as one set of mate pair reads together with supporting evidence from ESTs or location of markers from SNP or microsatellite maps, or cytogenetic localization of contained markers." [FB:WG] -subset: SOFA -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000720 -name: foreign_transposable_element -def: "A transposable element that is foreign." [SO:ke] -comment: requested by Michael on 19 Nov 2004. -synonym: "foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000721 -name: gene_with_dicistronic_primary_transcript -def: "A gene that encodes a dicistronic primary transcript." [SO:xp] -comment: Requested by Michael, 19 nov 2004. -synonym: "gene with dicistronic primary transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:1001197 ! dicistronic_primary_transcript - -[Term] -id: SO:0000722 -name: gene_with_dicistronic_mRNA -def: "A gene that encodes a polycistronic mRNA." [SO:xp] -comment: Requested by MA nov 19 2004. -synonym: "gene with dicistronic mRNA" EXACT [] -synonym: "gene with dicistronic processed transcript" EXACT [] -intersection_of: SO:0000692 ! gene_with_dicistronic_transcript -intersection_of: transcribed_to SO:0000716 ! dicistronic_mRNA - -[Term] -id: SO:0000723 -name: iDNA -def: "Genomic sequence removed from the genome, as a normal event, by a process of recombination." [SO:ma] -synonym: "intervening DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/IDNA "wiki" -is_a: SO:0000298 ! recombination_feature - -[Term] -id: SO:0000724 -name: oriT -def: "A region of a DNA molecule where transfer is initiated during the process of conjugation or mobilization." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -subset: SOFA -synonym: "origin of transfer" EXACT [] -xref: http://en.wikipedia.org/wiki/Origin_of_transfer "wiki" -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000725 -name: transit_peptide -alt_id: BS:00055 -def: "The transit_peptide is a short region at the N-terminus of the peptide that directs the protein to an organelle (chloroplast, mitochondrion, microbody or cyanelle)." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -comment: Added to bring SO inline with the embl ddbj genbank feature table. Old definition before biosapiens: The coding sequence for an N-terminal domain of a nuclear-encoded organellar protein. This domain is involved in post translational import of the protein into the organelle. -subset: biosapiens -subset: SOFA -synonym: "signal" RELATED [] -synonym: "transit" RELATED [uniprot:feature_type] -synonym: "transit peptide" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000726 -name: repeat_unit -def: "The simplest repeated component of a repeat region. A single repeat." [SO:ke] -comment: Added to comply with the feature table. A single repeat. -synonym: "repeat unit" EXACT [] -xref: http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000727 -name: CRM -def: "A regulatory_region where more than 1 TF_binding_site together are regulatorily active." [SO:SG] -comment: Requested by Stepen Grossmann Dec 2004. -synonym: "cis regulatory module" EXACT [] -synonym: "TF module" EXACT [] -intersection_of: SO:0001055 ! transcriptional_cis_regulatory_region -intersection_of: has_part SO:0000235 ! TF_binding_site - -[Term] -id: SO:0000728 -name: intein -def: "A region of a peptide that is able to excise itself and rejoin the remaining portions with a peptide bond." [SO:ke] -comment: Intein-mediated protein splicing occurs after mRNA has been translated into a protein. -synonym: "protein intron" RELATED [] -xref: http://en.wikipedia.org/wiki/Intein "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0000729 -name: intein_containing -def: "An attribute of protein-coding genes where the initial protein product contains an intein." [SO:ke] -synonym: "intein containing" EXACT [] -is_a: SO:0000010 ! protein_coding - -[Term] -id: SO:0000730 -name: gap -def: "A gap in the sequence of known length. The unknown bases are filled in with N's." [SO:ke] -subset: SOFA -is_a: SO:0000143 ! assembly_component -relationship: part_of SO:0000353 ! sequence_assembly - -[Term] -id: SO:0000731 -name: fragmentary -def: "An attribute to describe a feature that is incomplete." [SO:ke] -comment: Term added because of request by MO people. -synonym: "fragment" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000732 -name: predicted -def: "An attribute describing an unverified region." [SO:ke] -xref: http://en.wikipedia.org/wiki/Predicted "wiki" -is_a: SO:0000905 ! status - -[Term] -id: SO:0000733 -name: feature_attribute -def: "An attribute describing a located_sequence_feature." [SO:ke] -synonym: "feature attribute" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000734 -name: exemplar_mRNA -def: "An exemplar is a representative cDNA sequence for each gene. The exemplar approach is a method that usually involves some initial clustering into gene groups and the subsequent selection of a representative from each gene group." [http://mged.sourceforge.net/ontologies/MGEDontology.php] -comment: Added for the MO people. -synonym: "exemplar mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000864 ! exemplar - -[Term] -id: SO:0000735 -name: sequence_location -synonym: "sequence location" EXACT [] -is_a: SO:0000400 ! sequence_attribute - -[Term] -id: SO:0000736 -name: organelle_sequence -synonym: "organelle sequence" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000737 -name: mitochondrial_sequence -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "mitochondrial sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000738 -name: nuclear_sequence -synonym: "nuclear sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000739 -name: nucleomorphic_sequence -synonym: "nucleomorphic sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000740 -name: plastid_sequence -synonym: "plastid sequence" EXACT [] -is_a: SO:0000736 ! organelle_sequence - -[Term] -id: SO:0000741 -name: kinetoplast -alt_id: SO:0000826 -def: "A kinetoplast is an interlocked network of thousands of minicircles and tens of maxi circles, located near the base of the flagellum of some protozoan species." [PMID:8395055] -synonym: "kinetoplast_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Kinetoplast "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0000742 -name: maxicircle -alt_id: SO:0000827 -def: "A maxicircle is a replicon, part of a kinetoplast, that contains open reading frames and replicates via a rolling circle method." [PMID:8395055] -synonym: "maxicircle_chromosome" EXACT [] -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000743 -name: apicoplast_sequence -synonym: "apicoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000744 -name: chromoplast_sequence -synonym: "chromoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000745 -name: chloroplast_sequence -synonym: "chloroplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000746 -name: cyanelle_sequence -synonym: "cyanelle sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000747 -name: leucoplast_sequence -synonym: "leucoplast sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000748 -name: proplastid_sequence -synonym: "proplastid sequence" EXACT [] -is_a: SO:0000740 ! plastid_sequence - -[Term] -id: SO:0000749 -name: plasmid_location -synonym: "plasmid location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000750 -name: amplification_origin -def: "An origin_of_replication that is used for the amplification of a chromosomal nucleic acid sequence." [SO:ma] -synonym: "amplification origin" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000751 -name: proviral_location -synonym: "proviral location" EXACT [] -is_a: SO:0000735 ! sequence_location - -[Term] -id: SO:0000752 -name: gene_group_regulatory_region -subset: SOFA -synonym: "gene group regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region -relationship: member_of SO:0005855 ! gene_group - -[Term] -id: SO:0000753 -name: clone_insert -def: "The region of sequence that has been inserted and is being propogated by the clone." [SO:ke] -synonym: "clone insert" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000151 ! clone - -[Term] -id: SO:0000754 -name: lambda_vector -def: "The lambda bacteriophage is the vector for the linear lambda clone. The genes involved in the lysogenic pathway are removed from the from the viral DNA. Up to 25 kb of foreign DNA can then be inserted into the lambda genome." [ISBN:0-1767-2380-8] -synonym: "lambda vector" EXACT [] -is_a: SO:0000440 ! vector_replicon - -[Term] -id: SO:0000755 -name: plasmid_vector -synonym: "plasmid vector" EXACT [] -xref: http://en.wikipedia.org/wiki/Plasmid_vector#Vectors "wiki" -is_a: SO:0000440 ! vector_replicon -intersection_of: SO:0001235 ! replicon -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0000756 -name: cDNA -def: "DNA synthesized by reverse transcriptase using RNA as a template." [SO:ma] -xref: http://en.wikipedia.org/wiki/CDNA "wiki" -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000757 -name: single_stranded_cDNA -synonym: "single strand cDNA" EXACT [] -synonym: "single stranded cDNA" EXACT [] -synonym: "single-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000758 -name: double_stranded_cDNA -synonym: "double strand cDNA" RELATED [] -synonym: "double stranded cDNA" EXACT [] -synonym: "double-strand cDNA" RELATED [] -is_a: SO:0000756 ! cDNA - -[Term] -id: SO:0000759 -name: plasmid_clone -is_obsolete: true - -[Term] -id: SO:0000760 -name: YAC_clone -is_obsolete: true - -[Term] -id: SO:0000761 -name: phagemid_clone -is_obsolete: true - -[Term] -id: SO:0000762 -name: PAC_clone -synonym: "P1_clone" RELATED [] -is_obsolete: true - -[Term] -id: SO:0000763 -name: fosmid_clone -is_obsolete: true - -[Term] -id: SO:0000764 -name: BAC_clone -is_obsolete: true - -[Term] -id: SO:0000765 -name: cosmid_clone -is_obsolete: true - -[Term] -id: SO:0000766 -name: pyrrolysyl_tRNA -def: "A tRNA sequence that has a pyrrolysine anticodon, and a 3' pyrrolysine binding region." [SO:ke] -synonym: "pyrrolysyl tRNA" EXACT [] -synonym: "pyrrolysyl-transfer ribonucleic acid" EXACT [] -synonym: "pyrrolysyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0001178 ! pyrrolysine_tRNA_primary_transcript - -[Term] -id: SO:0000767 -name: clone_insert_start -is_obsolete: true - -[Term] -id: SO:0000768 -name: episome -def: "A plasmid that may integrate with a chromosome." [SO:ma] -is_a: SO:0000155 ! plasmid - -[Term] -id: SO:0000769 -name: tmRNA_coding_piece -def: "The region of a two-piece tmRNA that bears the reading frame encoding the proteolysis tag. The tmRNA gene undergoes circular permutation in some groups of bacteria. Processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw, issn:1362-4962] -comment: Added in response to comment from Kelly Williams from Indiana. Nov 2005. -synonym: "tmRNA coding piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000770 -name: tmRNA_acceptor_piece -def: "The acceptor region of a two-piece tmRNA that when mature is charged at its 3' end with alanine. The tmRNA gene undergoes circular permutation in some groups of bacteria; processing of the transcripts from such a gene leaves the mature tmRNA in two pieces, base-paired together." [doi:10.1093/nar/gkh795, Indiana:kw] -comment: Added in response to Kelly Williams from Indiana. Date: Nov 2005. -synonym: "tmRNA acceptor piece" EXACT [] -is_a: SO:0000847 ! tmRNA_region - -[Term] -id: SO:0000771 -name: QTL -def: "A quantitative trait locus (QTL) is a polymorphic locus which contains alleles that differentially affect the expression of a continuously distributed phenotypic trait. Usually it is a marker described by statistical association to quantitative variation in the particular phenotypic trait that is thought to be controlled by the cumulative action of alleles at multiple loci." [http://rgd.mcw.edu/tu/qtls/] -comment: Added in respose to request by Simon Twigger November 14th 2005. -synonym: "quantitative trait locus" EXACT [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0000772 -name: genomic_island -def: "A genomic island is an integrated mobile genetic element, characterized by size (over 10 Kb). It that has features that suggest a foreign origin. These can include nucleotide distribution (oligonucleotides signature, CG content etc.) that differs from the bulk of the chromosome and/or genes suggesting DNA mobility." [Phigo:at, SO:ke] -comment: Genomic islands are transmissible elements characterized by large size (>10kb). -synonym: "genomic island" EXACT [] -xref: http://en.wikipedia.org/wiki/Genomic_island "wiki" -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0000773 -name: pathogenic_island -def: "Mobile genetic elements that contribute to rapid changes in virulence potential. They are present on the genomes of pathogenic strains but absent from the genomes of non pathogenic members of the same or related species." [SO:ke] -comment: Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "pathogenic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000774 -name: metabolic_island -def: "A transmissible element containing genes involved in metabolism, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Genes for phenolic compound degradation in Pseudomonas putida are found on metabolic islands. -synonym: "metabolic island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000775 -name: adaptive_island -def: "An adaptive island is a genomic island that provides an adaptive advantage to the host." [SO:ke] -comment: The iron-uptake ability of many pathogens are conveyed by adaptive islands. Nature Reviews Microbiology 2, 414-424 (2004); doi:10.1038 micro 884 GENOMIC ISLANDS IN PATHOGENIC AND ENVIRONMENTAL MICROORGANISMS Ulrich Dobrindt, Bianca Hochhut, Ute Hentschel & Jorg Hacker. -synonym: "adaptive island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000776 -name: symbiosis_island -def: "A transmissible element containing genes involved in symbiosis, analogous to the pathogenicity islands of gram negative bacteria." [SO:ke] -comment: Nitrogen fixation in Rhizobiaceae species is encoded by symbiosis islands. Evolution of rhizobia by acquisition of a 500-kb symbiosis island that integrates into a phe-tRNA gene. John T. Sullivan and Clive W. Ronso PNAS 1998 Apr 28 95 (9) 5145-5149. -synonym: "symbiosis island" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0000777 -name: pseudogenic_rRNA -def: "A non functional descendent of an rRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic rRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic rRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000778 -name: pseudogenic_tRNA -def: "A non functional descendent of a tRNA." [SO:ke] -comment: Added Jan 2006 to allow the annotation of the pseudogenic tRNA by flybase. Non-functional is defined as its transcription is prevented due to one or more mutatations. -subset: SOFA -synonym: "pseudogenic tRNA" EXACT [] -is_a: SO:0000462 ! pseudogenic_region - -[Term] -id: SO:0000779 -name: engineered_episome -def: "An episome that is engineered." [SO:xp] -comment: Requested by Lynn Crosby Jan 2006. -synonym: "engineered episome" EXACT [] -intersection_of: SO:0000768 ! episome -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000780 -name: transposable_element_attribute -comment: Added by KE Jan 2006 to capture the kinds of attributes of TEs -is_obsolete: true - -[Term] -id: SO:0000781 -name: transgenic -def: "Attribute describing sequence that has been integrated with foreign sequence." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000782 -name: natural -def: "An attribute describing a feature that occurs in nature." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000783 -name: engineered -def: "An attribute to describe a region that was modified in vitro." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000784 -name: foreign -def: "An attribute to describe a region from another species." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000785 -name: cloned_region -comment: Added in response to Lynn Crosby. A clone insert may be composed of many cloned regions. -synonym: "cloned region" EXACT [] -synonym: "cloned segment" EXACT [] -is_a: SO:0000695 ! reagent -relationship: part_of SO:0000753 ! clone_insert - -[Term] -id: SO:0000786 -name: reagent_attribute -comment: Added jan 2006 by KE. -synonym: "reagent attribute" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000787 -name: clone_attribute -is_obsolete: true - -[Term] -id: SO:0000788 -name: cloned -is_obsolete: true - -[Term] -id: SO:0000789 -name: validated -def: "An attribute to describe a feature that has been proven." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000790 -name: invalidated -def: "An attribute describing a feature that is invalidated." [SO:ke] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000791 -name: cloned_genomic -is_obsolete: true - -[Term] -id: SO:0000792 -name: cloned_cDNA -is_obsolete: true - -[Term] -id: SO:0000793 -name: engineered_DNA -is_obsolete: true - -[Term] -id: SO:0000794 -name: engineered_rescue_region -def: "A rescue region that is engineered." [SO:xp] -synonym: "engineered rescue fragment" EXACT [] -synonym: "engineered rescue region" EXACT [] -synonym: "engineered rescue segment" EXACT [] -intersection_of: SO:0000411 ! rescue_region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000795 -name: rescue_mini_gene -def: "A mini_gene that rescues." [SO:xp] -synonym: "rescue mini gene" EXACT [] -synonym: "rescue mini-gene" EXACT [] -intersection_of: SO:0000815 ! mini_gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000796 -name: transgenic_transposable_element -def: "TE that has been modified in vitro, including insertion of DNA derived from a source other than the originating TE." [FB:mc] -comment: Modified as requested by Lynn - FB. May 2007. -synonym: "transgenic transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: derives_from SO:0000151 ! clone -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000797 -name: natural_transposable_element -def: "TE that exists (or existed) in nature." [FB:mc] -synonym: "natural transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000782 ! natural - -[Term] -id: SO:0000798 -name: engineered_transposable_element -def: "TE that has been modified by manipulations in vitro." [FB:mc] -synonym: "engineered transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000799 -name: engineered_foreign_transposable_element -def: "A transposable_element that is engineered and foreign." [FB:mc] -synonym: "engineered foreign transposable element" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000800 -name: assortment_derived_duplication -def: "A multi-chromosome duplication aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment derived duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000801 -name: assortment_derived_deficiency_plus_duplication -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency and a duplication." [FB:gm] -synonym: "assortment derived deficiency plus duplication" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000802 -name: assortment_derived_deficiency -def: "A multi-chromosome deficiency aberration generated by reassortment of other aberration components." [FB:gm] -synonym: "assortment-derived deficiency" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000803 -name: assortment_derived_aneuploid -def: "A multi-chromosome aberration generated by reassortment of other aberration components; presumed to have a deficiency or a duplication." [FB:gm] -synonym: "assortment derived aneuploid" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:0000804 -name: engineered_region -def: "A region that is engineered." [SO:xp] -synonym: "engineered region" EXACT [] -synonym: "engineered sequence" EXACT [] -is_a: SO:0001409 ! biomaterial_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000805 -name: engineered_foreign_region -def: "A region that is engineered and foreign." [SO:xp] -synonym: "engineered foreign region" EXACT [] -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000783 ! engineered -intersection_of: has_quality SO:0000784 ! foreign - -[Term] -id: SO:0000806 -name: fusion -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000807 -name: engineered_tag -def: "A tag that is engineered." [SO:xp] -synonym: "engineered tag" EXACT [] -intersection_of: SO:0000324 ! tag -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000808 -name: validated_cDNA_clone -def: "A cDNA clone that has been validated." [SO:xp] -synonym: "validated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000789 ! validated - -[Term] -id: SO:0000809 -name: invalidated_cDNA_clone -def: "A cDNA clone that is invalid." [SO:xp] -synonym: "invalidated cDNA clone" EXACT [] -intersection_of: SO:0000317 ! cDNA_clone -intersection_of: has_quality SO:0000790 ! invalidated - -[Term] -id: SO:0000810 -name: chimeric_cDNA_clone -def: "A cDNA clone invalidated because it is chimeric." [SO:xp] -synonym: "chimeric cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000362 ! invalidated_by_chimeric_cDNA - -[Term] -id: SO:0000811 -name: genomically_contaminated_cDNA_clone -def: "A cDNA clone invalidated by genomic contamination." [SO:xp] -synonym: "genomically contaminated cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000414 ! invalidated_by_genomic_contamination - -[Term] -id: SO:0000812 -name: polyA_primed_cDNA_clone -def: "A cDNA clone invalidated by polyA priming." [SO:xp] -synonym: "polyA primed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000415 ! invalidated_by_genomic_polyA_primed_cDNA - -[Term] -id: SO:0000813 -name: partially_processed_cDNA_clone -def: "A cDNA invalidated clone by partial processing." [SO:xp] -synonym: "partially processed cDNA clone" EXACT [] -intersection_of: SO:0000809 ! invalidated_cDNA_clone -intersection_of: has_quality SO:0000416 ! invalidated_by_partial_processing - -[Term] -id: SO:0000814 -name: rescue -def: "An attribute describing a region's ability, when introduced to a mutant organism, to re-establish (rescue) a phenotype." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000815 -name: mini_gene -def: "By definition, minigenes are short open-reading frames (ORF), usually encoding approximately 9 to 20 amino acids, which are expressed in vivo (as distinct from being synthesized as peptide or protein ex vivo and subsequently injected). The in vivo synthesis confers a distinct advantage: the expressed sequences can enter both antigen presentation pathways, MHC I (inducing CD8+ T- cells, which are usually cytotoxic T-lymphocytes (CTL)) and MHC II (inducing CD4+ T-cells, usually 'T-helpers' (Th)); and can encounter B-cells, inducing antibody responses. Three main vector approaches have been used to deliver minigenes: viral vectors, bacterial vectors and plasmid DNA." [PMID:15992143] -synonym: "mini gene" EXACT [] -is_a: SO:0000236 ! ORF - -[Term] -id: SO:0000816 -name: rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "rescue gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000814 ! rescue - -[Term] -id: SO:0000817 -name: wild_type -def: "An attribute describing sequence with the genotype found in nature and/or standard laboratory stock." [SO:ke] -synonym: "wild type" EXACT [] -xref: http://en.wikipedia.org/wiki/Wild_type "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000818 -name: wild_type_rescue_gene -def: "A gene that rescues." [SO:xp] -synonym: "wild type rescue gene" EXACT [] -is_a: SO:0000816 ! rescue_gene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000817 ! wild_type - -[Term] -id: SO:0000819 -name: mitochondrial_chromosome -def: "A chromosome originating in a mitochondria." [SO:xp] -synonym: "mitochondrial chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000737 ! mitochondrial_sequence - -[Term] -id: SO:0000820 -name: chloroplast_chromosome -def: "A chromosome originating in a chloroplast." [SO:xp] -synonym: "chloroplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000745 ! chloroplast_sequence - -[Term] -id: SO:0000821 -name: chromoplast_chromosome -def: "A chromosome originating in a chromoplast." [SO:xp] -synonym: "chromoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000744 ! chromoplast_sequence - -[Term] -id: SO:0000822 -name: cyanelle_chromosome -def: "A chromosome originating in a cyanelle." [SO:xp] -synonym: "cyanelle chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000746 ! cyanelle_sequence - -[Term] -id: SO:0000823 -name: leucoplast_chromosome -def: "A chromosome with origin in a leucoplast." [SO:xp] -synonym: "leucoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000747 ! leucoplast_sequence - -[Term] -id: SO:0000824 -name: macronuclear_chromosome -def: "A chromosome originating in a macronucleus." [SO:xp] -synonym: "macronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000083 ! macronuclear_sequence - -[Term] -id: SO:0000825 -name: micronuclear_chromosome -def: "A chromosome originating in a micronucleus." [SO:xp] -synonym: "micronuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000084 ! micronuclear_sequence - -[Term] -id: SO:0000828 -name: nuclear_chromosome -def: "A chromosome originating in a nucleus." [SO:xp] -synonym: "nuclear chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000738 ! nuclear_sequence - -[Term] -id: SO:0000829 -name: nucleomorphic_chromosome -def: "A chromosome originating in a nucleomorph." [SO:xp] -synonym: "nucleomorphic chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000739 ! nucleomorphic_sequence - -[Term] -id: SO:0000830 -name: chromosome_part -def: "A region of a chromosome." [SO:ke] -comment: This is a manufactured term, that serves the purpose of allow the parts of a chromosome to have an is_a path to the root. -subset: SOFA -synonym: "chromosome part" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0000831 -name: gene_member_region -def: "A region of a gene." [SO:ke] -comment: A manufactured term used to allow the parts of a gene to have an is_a path to the root. -subset: SOFA -synonym: "gene member region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: member_of SO:0000704 ! gene - -[Term] -id: SO:0000832 -name: promoter_region -def: "A region of sequence which is part of a promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000833 -name: transcript_region -def: "A region of a transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "transcript region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000673 ! transcript - -[Term] -id: SO:0000834 -name: mature_transcript_region -def: "A region of a mature transcript." [SO:ke] -comment: A manufactured term to collect together the parts of a mature transcript and give them an is_a path to the root. -subset: SOFA -synonym: "mature transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000835 -name: primary_transcript_region -def: "A part of a primary transcript." [SO:ke] -comment: This term was added to provide a grouping term for the region parts of primary_transcript, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "primary transcript region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000185 ! primary_transcript - -[Term] -id: SO:0000836 -name: mRNA_region -def: "A region of an mRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of mRNA, thus giving them an is_a path back to the root. -subset: SOFA -synonym: "mRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000234 ! mRNA - -[Term] -id: SO:0000837 -name: UTR_region -def: "A region of UTR." [SO:ke] -comment: A region of UTR. This term is a grouping term to allow the parts of UTR to have an is_a path to the root. -subset: SOFA -synonym: "UTR region" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0000838 -name: rRNA_primary_transcript_region -def: "A region of an rRNA primary transcript." [SO:ke] -comment: To allow transcribed_spacer_region to have a path to the root. -synonym: "rRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000209 ! rRNA_primary_transcript - -[Term] -id: SO:0000839 -name: polypeptide_region -alt_id: BS:00124 -alt_id: BS:00331 -def: "Biological sequence region that can be assigned to a specific subsequence of a polypeptide." [SO:GAR, SO:ke] -comment: Added to allow the polypeptide regions to have is_a paths back to the root. -subset: biosapiens -subset: SOFA -synonym: "positional" RELATED [] -synonym: "positional polypeptide feature" RELATED [] -synonym: "region" NARROW [uniprot:feature_type] -synonym: "region or site annotation" RELATED [] -synonym: "site" NARROW [uniprot:feature_type] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0000840 -name: repeat_component -def: "A region of a repeated sequence." [SO:ke] -comment: A manufactured to group the parts of repeats, to give them an is_a path back to the root. -synonym: "repeat component" EXACT [] -is_a: SO:0001412 ! topologically_defined_region - -[Term] -id: SO:0000841 -name: spliceosomal_intron_region -def: "A region within an intron." [SO:ke] -comment: A terms added to allow the parts of introns to have is_a paths to the root. -subset: SOFA -synonym: "spliceosomal intron region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000662 ! spliceosomal_intron - -[Term] -id: SO:0000842 -name: gene_component_region -subset: SOFA -synonym: "gene component region" EXACT [] -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000704 ! gene - -[Term] -id: SO:0000843 -name: bacterial_RNApol_promoter_region -def: "A region which is part of a bacterial RNA polymerase promoter." [SO:ke] -comment: This is a manufactured term to allow the parts of bacterial_RNApol_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000844 -name: RNApol_II_promoter_region -def: "A region of sequence which is a promoter for RNA polymerase II." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_II_promoter to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000845 -name: RNApol_III_promoter_type_1_region -def: "A region of sequence which is a promoter for RNA polymerase III type 1." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_1 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000846 -name: RNApol_III_promoter_type_2_region -def: "A region of sequence which is a promoter for RNA polymerase III type 2." [SO:ke] -comment: This is a manufactured term to allow the parts of RNApol_III_promoter_type_2 to have an is_a path back to the root. -is_obsolete: true - -[Term] -id: SO:0000847 -name: tmRNA_region -def: "A region of a tmRNA." [SO:cb] -comment: This term was added to provide a grouping term for the region parts of tmRNA, thus giving them an is_a path back to the root. -synonym: "tmRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000584 ! tmRNA - -[Term] -id: SO:0000848 -name: LTR_component -synonym: "long term repeat component" EXACT [] -synonym: "LTR component" EXACT [] -is_a: SO:0000840 ! repeat_component -relationship: part_of SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0000849 -name: three_prime_LTR_component -synonym: "3' long terminal repeat component" EXACT [] -synonym: "three prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000426 ! three_prime_LTR - -[Term] -id: SO:0000850 -name: five_prime_LTR_component -synonym: "5' long term repeat component" EXACT [] -synonym: "five prime LTR component" EXACT [] -is_a: SO:0000848 ! LTR_component -relationship: part_of SO:0000425 ! five_prime_LTR - -[Term] -id: SO:0000851 -name: CDS_region -def: "A region of a CDS." [SO:cb] -subset: SOFA -synonym: "CDS region" EXACT [] -is_a: SO:0000836 ! mRNA_region -relationship: part_of SO:0000316 ! CDS - -[Term] -id: SO:0000852 -name: exon_region -def: "A region of an exon." [RSC:cb] -synonym: "exon region" EXACT [] -is_a: SO:0000833 ! transcript_region -relationship: part_of SO:0000147 ! exon - -[Term] -id: SO:0000853 -name: homologous_region -def: "A region that is homologous to another region." [SO:ke] -synonym: "homolog" EXACT [] -synonym: "homologous region" EXACT [] -synonym: "homologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Homology_(biology) "wiki" -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000857 ! homologous - -[Term] -id: SO:0000854 -name: paralogous_region -def: "A homologous_region that is paralogous to another region." [SO:ke] -comment: A term to be used in conjunction with the paralogous_to relationship. -synonym: "paralog" EXACT [] -synonym: "paralogous region" EXACT [] -synonym: "paralogue" EXACT [] -xref: http://en.wikipedia.org/wiki/Paralog#Paralogy "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000859 ! paralogous - -[Term] -id: SO:0000855 -name: orthologous_region -def: "A homologous_region that is orthologous to another region." [SO:ke] -comment: This term should be used in conjunction with the similarity relationships defined in SO. -synonym: "ortholog" EXACT [] -synonym: "orthologous region" EXACT [] -synonym: "orthologue" EXACT [] -xref: http://en.wikipedia.org/wiki/Ortholog#Orthology "wiki" -intersection_of: SO:0000853 ! homologous_region -intersection_of: has_quality SO:0000858 ! orthologous - -[Term] -id: SO:0000856 -name: conserved -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000857 -name: homologous -def: "Similarity due to common ancestry." [SO:ke] -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000858 -name: orthologous -def: "An attribute describing a kind of homology where divergence occured after a speciation event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000859 -name: paralogous -def: "An attribute describing a kind of homology where divergence occurred after a duplication event." [SO:ke] -is_a: SO:0000857 ! homologous - -[Term] -id: SO:0000860 -name: syntenic -def: "Attribute describing sequence regions occurring in same order on chromosome of different species." [SO:ke] -xref: http://en.wikipedia.org/wiki/Syntenic "wiki" -is_a: SO:0000856 ! conserved - -[Term] -id: SO:0000861 -name: capped_primary_transcript -def: "A primary transcript that is capped." [SO:xp] -synonym: "capped primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000862 -name: capped_mRNA -def: "An mRNA that is capped." [SO:xp] -synonym: "capped mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000581 ! cap -intersection_of: has_quality SO:0000146 ! capped - -[Term] -id: SO:0000863 -name: mRNA_attribute -def: "An attribute describing an mRNA feature." [SO:ke] -synonym: "mRNA attribute" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000864 -name: exemplar -def: "An attribute describing a sequence is representative of a class of similar sequences." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000865 -name: frameshift -def: "An attribute describing a sequence that contains a mutation involving the deletion or insertion of one or more bases, where this number is not divisible by 3." [SO:ke] -xref: http://en.wikipedia.org/wiki/Frameshift "wiki" -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000866 -name: minus_1_frameshift -def: "A frameshift caused by deleting one base." [SO:ke] -synonym: "minus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000867 -name: minus_2_frameshift -def: "A frameshift caused by deleting two bases." [SO:ke] -synonym: "minus 2 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000868 -name: plus_1_frameshift -def: "A frameshift caused by inserting one base." [SO:ke] -synonym: "plus 1 frameshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000869 -name: plus_2_framshift -def: "A frameshift caused by inserting two bases." [SO:ke] -synonym: "plus 2 framshift" EXACT [] -is_a: SO:0000865 ! frameshift - -[Term] -id: SO:0000870 -name: trans_spliced -def: "An attribute describing transcript sequence that is created by splicing exons from diferent genes." [SO:ke] -synonym: "trans-spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000871 -name: polyadenylated_mRNA -def: "An mRNA that is polyadenylated." [SO:xp] -synonym: "polyadenylated mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000610 ! polyA_sequence -intersection_of: has_quality SO:0000246 ! polyadenylated - -[Term] -id: SO:0000872 -name: trans_spliced_mRNA -def: "An mRNA that is trans-spliced." [SO:xp] -synonym: "trans-spliced mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: adjacent_to SO:0000636 ! spliced_leader_RNA -intersection_of: has_quality SO:0000870 ! trans_spliced - -[Term] -id: SO:0000873 -name: edited_transcript -def: "A transcript that is edited." [SO:ke] -synonym: "edited transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: guided_by SO:0000602 ! guide_RNA -intersection_of: has_part SO:0000977 ! anchor_binding_site -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000874 -name: edited_transcript_by_A_to_I_substitution -def: "A transcript that has been edited by A to I substitution." [SO:ke] -synonym: "edited transcript by A to I substitution" EXACT [] -is_a: SO:0000873 ! edited_transcript - -[Term] -id: SO:0000875 -name: bound_by_protein -def: "An attribute describing a sequence that is bound by a protein." [SO:ke] -synonym: "bound by protein" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000876 -name: bound_by_nucleic_acid -def: "An attribute describing a sequence that is bound by a nucleic acid." [SO:ke] -synonym: "bound by nucleic acid" EXACT [] -is_a: SO:0000277 ! bound_by_factor - -[Term] -id: SO:0000877 -name: alternatively_spliced -def: "An attribute describing a situation where a gene may encode for more than 1 transcript." [SO:ke] -synonym: "alternatively spliced" EXACT [] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000878 -name: monocistronic -def: "An attribute describing a sequence that contains the code for one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000879 -name: dicistronic -def: "An attribute describing a sequence that contains the code for two gene products." [SO:ke] -is_a: SO:0000880 ! polycistronic - -[Term] -id: SO:0000880 -name: polycistronic -def: "An attribute describing a sequence that contains the code for more than one gene product." [SO:ke] -is_a: SO:0000237 ! transcript_attribute - -[Term] -id: SO:0000881 -name: recoded -def: "An attribute describing an mRNA sequence that has been reprogrammed at translation, causing localized alterations." [SO:ke] -is_a: SO:0000863 ! mRNA_attribute - -[Term] -id: SO:0000882 -name: codon_redefined -def: "An attribute describing the alteration of codon meaning." [SO:ke] -synonym: "codon redefined" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000883 -name: stop_codon_read_through -def: "A stop codon redefined to be a new amino acid." [SO:ke] -synonym: "stop codon read through" EXACT [] -synonym: "stop codon readthrough" RELATED [] -is_a: SO:0000145 ! recoded_codon - -[Term] -id: SO:0000884 -name: stop_codon_redefined_as_pyrrolysine -def: "A stop codon redefined to be the new amino acid, pyrrolysine." [SO:ke] -synonym: "stop codon redefined as pyrrolysine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000885 -name: stop_codon_redefined_as_selenocysteine -def: "A stop codon redefined to be the new amino acid, selenocysteine." [SO:ke] -synonym: "stop codon redefined as selenocysteine" EXACT [] -is_a: SO:0000883 ! stop_codon_read_through - -[Term] -id: SO:0000886 -name: recoded_by_translational_bypass -def: "Recoded mRNA where a block of nucleotides is not translated." [SO:ke] -synonym: "recoded by translational bypass" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000887 -name: translationally_frameshifted -def: "Recoding by frameshifting a particular site." [SO:ke] -synonym: "translationally frameshifted" EXACT [] -is_a: SO:0000881 ! recoded - -[Term] -id: SO:0000888 -name: maternally_imprinted_gene -def: "A gene that is maternally_imprinted." [SO:xp] -synonym: "maternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000135 ! maternally_imprinted - -[Term] -id: SO:0000889 -name: paternally_imprinted_gene -def: "A gene that is paternally imprinted." [SO:xp] -synonym: "paternally imprinted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000136 ! paternally_imprinted - -[Term] -id: SO:0000890 -name: post_translationally_regulated_gene -def: "A gene that is post translationally regulated." [SO:xp] -synonym: "post translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000130 ! post_translationally_regulated - -[Term] -id: SO:0000891 -name: negatively_autoregulated_gene -def: "A gene that is negatively autoreguated." [SO:xp] -synonym: "negatively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000473 ! negatively_autoregulated - -[Term] -id: SO:0000892 -name: positively_autoregulated_gene -def: "A gene that is positively autoregulated." [SO:xp] -synonym: "positively autoregulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000475 ! positively_autoregulated - -[Term] -id: SO:0000893 -name: silenced -def: "An attribute describing an epigenetic process where a gene is inactivated at transcriptional or translational level." [SO:ke] -xref: http://en.wikipedia.org/wiki/Silenced "wiki" -is_a: SO:0000126 ! transcriptionally_repressed - -[Term] -id: SO:0000894 -name: silenced_by_DNA_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA modifications, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0000895 -name: silenced_by_DNA_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by DNA methylation, resulting in repression of transcription." [SO:ke] -synonym: "silenced by DNA methylation" EXACT [] -is_a: SO:0000894 ! silenced_by_DNA_modification - -[Term] -id: SO:0000896 -name: translationally_regulated_gene -def: "A gene that is translationally regulated." [SO:xp] -synonym: "translationally regulated gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000131 ! translationally_regulated - -[Term] -id: SO:0000897 -name: allelically_excluded_gene -def: "A gene that is allelically_excluded." [SO:xp] -synonym: "allelically excluded gene" EXACT [] -intersection_of: SO:0000898 ! epigenetically_modified_gene -intersection_of: has_quality SO:0000137 ! allelically_excluded - -[Term] -id: SO:0000898 -name: epigenetically_modified_gene -def: "A gene that is epigenetically modified." [SO:ke] -synonym: "epigenetically modified gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000899 -name: nuclear_mitochondrial -def: "An attribute describing a nuclear pseudogene of a mitochndrial gene." [SO:ke] -synonym: "nuclear mitochondrial" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000900 -name: processed -def: "An attribute describing a pseudogene where by an mRNA was retrotransposed. The mRNA sequence is transcribed back into the genome, lacking introns and promotors, but often including a polyA tail." [SO:ke] -is_obsolete: true - -[Term] -id: SO:0000901 -name: unequally_crossed_over -def: "An attribute describing a pseudogene that was created by tandem duplication and unequal crossing over during recombination." [SO:ke] -synonym: "unequally crossed over" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000902 -name: transgene -def: "A gene that is transgenic." [SO:xp] -xref: http://en.wikipedia.org/wiki/Transgene "wiki" -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0000903 -name: endogenous_retroviral_sequence -synonym: "endogenous retroviral sequence" EXACT [] -is_a: SO:0000751 ! proviral_location - -[Term] -id: SO:0000904 -name: rearranged_at_DNA_level -def: "An attribute to describe the sequence of a feature, where the DNA is rearranged." [SO:ke] -synonym: "rearranged at DNA level" EXACT [] -is_a: SO:0000133 ! epigenetically_modified - -[Term] -id: SO:0000905 -name: status -def: "An attribute describing the status of a feature, based on the available evidence." [SO:ke] -comment: This term is the hypernym of attributes and should not be annotated to. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000906 -name: independently_known -def: "Attribute to describe a feature that is independently known - not predicted." [SO:ke] -synonym: "independently known" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0000907 -name: supported_by_sequence_similarity -def: "An attribute to describe a feature that has been predicted using sequence similarity techniques." [SO:ke] -synonym: "supported by sequence similarity" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000908 -name: supported_by_domain_match -def: "An attribute to describe a feature that has been predicted using sequence similarity of a known domain." [SO:ke] -synonym: "supported by domain match" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000909 -name: supported_by_EST_or_cDNA -def: "An attribute to describe a feature that has been predicted using sequence similarity to EST or cDNA data." [SO:ke] -synonym: "supported by EST or cDNA" EXACT [] -is_a: SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:0000910 -name: orphan -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000911 -name: predicted_by_ab_initio_computation -def: "An attribute describing a feature that is predicted by a computer program that did not rely on sequence similarity." [SO:ke] -synonym: "predicted by ab initio computation" EXACT [] -is_a: SO:0000732 ! predicted - -[Term] -id: SO:0000912 -name: asx_turn -alt_id: BS:00203 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Aspartate or Asparagine (Asx), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0000913 -name: cloned_cDNA_insert -def: "A clone insert made from cDNA." [SO:xp] -synonym: "cloned cDNA insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000756 ! cDNA - -[Term] -id: SO:0000914 -name: cloned_genomic_insert -def: "A clone insert made from genomic DNA." [SO:xp] -synonym: "cloned genomic insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000991 ! genomic_DNA - -[Term] -id: SO:0000915 -name: engineered_insert -def: "A clone insert that is engineered." [SO:xp] -synonym: "engineered insert" EXACT [] -intersection_of: SO:0000753 ! clone_insert -intersection_of: has_quality SO:0000783 ! engineered - -[Term] -id: SO:0000916 -name: edit_operation -synonym: "edit operation" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000917 -name: insert_U -def: "An edit to insert a U." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "insert U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000918 -name: delete_U -def: "An edit to delete a uridine." [SO:ke] -comment: The insertion and deletion of uridine (U) residues, usually within coding regions of mRNA transcripts of cryptogenes in the mitochondrial genome of kinetoplastid protozoa. -synonym: "delete U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000919 -name: substitute_A_to_I -def: "An edit to substitute an I for an A." [SO:ke] -synonym: "substitute A to I" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000920 -name: insert_C -def: "An edit to insert a cytidine." [SO:ke] -synonym: "insert C" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000921 -name: insert_dinucleotide -def: "An edit to insert a dinucleotide." [SO:ke] -synonym: "insert dinucleotide" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000922 -name: substitute_C_to_U -def: "An edit to substitute an U for a C." [SO:ke] -synonym: "substitute C to U" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000923 -name: insert_G -def: "An edit to insert a G." [SO:ke] -synonym: "insert G" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000924 -name: insert_GC -def: "An edit to insert a GC dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GC" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000925 -name: insert_GU -def: "An edit to insert a GU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert GU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000926 -name: insert_CU -def: "An edit to insert a CU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert CU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000927 -name: insert_AU -def: "An edit to insert a AU dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AU" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000928 -name: insert_AA -def: "An edit to insert a AA dinucleotide." [SO:ke] -comment: The type of RNA editing found in the mitochondria of Myxomycota, characterized by the insertion of mono- and dinucleotides in RNAs relative to their mtDNA template and in addition, C to U base conversion. The most common mononucleotide insertion is cytidine, although a number of uridine mononucleotides are inserted at specific sites. Adenine and guanine have not been observed in mononucleotide insertions. Five different dinucleotide insertions have been observed, GC, GU, CU, AU and AA. Both mono- and dinucleotide insertions create open reading frames in mRNA and contribute to highly conserved structural features of rRNAs and tRNAs. -synonym: "insert AA" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000929 -name: edited_mRNA -def: "An mRNA that is edited." [SO:xp] -synonym: "edited mRNA" EXACT [] -intersection_of: SO:0000873 ! edited_transcript -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000930 -name: guide_RNA_region -def: "A region of guide RNA." [SO:ma] -synonym: "guide RNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000602 ! guide_RNA - -[Term] -id: SO:0000931 -name: anchor_region -def: "A region of a guide_RNA that base-pairs to a target mRNA." [SO:jk] -synonym: "anchor region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000932 -name: pre_edited_mRNA -synonym: "pre-edited mRNA" EXACT [] -is_a: SO:0000120 ! protein_coding_primary_transcript - -[Term] -id: SO:0000933 -name: intermediate -def: "An attribute to describe a feature between stages of processing." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000934 -name: miRNA_target_site -def: "A miRNA target site is a binding site where the molecule is a micro RNA." [FB:cds] -synonym: "miRNA target site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0000935 -name: edited_CDS -def: "A CDS that is edited." [SO:xp] -synonym: "edited CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000116 ! edited - -[Term] -id: SO:0000936 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_segment -synonym: "vertebrate immunoglobulin T cell receptor rearranged segment" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000937 -name: vertebrate_immune_system_feature -is_obsolete: true - -[Term] -id: SO:0000938 -name: vertebrate_immunoglobulin_T_cell_receptor_rearranged_gene_cluster -synonym: "vertebrate immunoglobulin T cell receptor rearranged gene cluster" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000939 -name: vertebrate_immune_system_gene_recombination_signal_feature -synonym: "vertebrate immune system gene recombination signal feature" EXACT [] -is_a: SO:0000301 ! vertebrate_immune_system_gene_recombination_feature - -[Term] -id: SO:0000940 -name: recombinationally_rearranged -synonym: "recombinationally rearranged" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000941 -name: recombinationally_rearranged_vertebrate_immune_system_gene -def: "A recombinationally rearranged gene of the vertebrate immune system." [SO:xp] -synonym: "recombinationally rearranged vertebrate immune system gene" EXACT [] -is_a: SO:0000456 ! recombinationally_rearranged_gene - -[Term] -id: SO:0000942 -name: attP_site -def: "An integration/excision site of a phage chromosome at which a recombinase acts to insert the phage DNA at a cognate integration/excision site on a bacterial chromosome." [SO:as] -synonym: "attP site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0001042 ! phage_sequence - -[Term] -id: SO:0000943 -name: attB_site -def: "An integration/excision site of a bacterial chromosome at which a recombinase acts to insert foreign DNA containing a cognate integration/excision site." [SO:as] -synonym: "attB site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000944 -name: attL_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attB_site and the 3' portion of attP_site." [SO:as] -synonym: "attBP'" RELATED [] -synonym: "attL site" RELATED [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000945 -name: attR_site -def: "A region that results from recombination between attP_site and attB_site, composed of the 5' portion of attP_site and the 3' portion of attB_site." [SO:as] -synonym: "attPB'" RELATED [] -synonym: "attR site" EXACT [] -is_a: SO:0000946 ! integration_excision_site - -[Term] -id: SO:0000946 -name: integration_excision_site -def: "A region specifically recognised by a recombinase, which inserts or removes another region marked by a distinct cognate integration/excision site." [SO:as] -synonym: "attachment site" RELATED [] -synonym: "integration excision site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000947 -name: resolution_site -def: "A region specifically recognised by a recombinase, which separates a physically contiguous circle of DNA into two physically separate circles." [SO:as] -synonym: "res site" EXACT [] -synonym: "resolution site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000948 -name: inversion_site -def: "A region specifically recognised by a recombinase, which inverts the region flanked by a pair of sites." [SO:ma] -comment: A target region for site-specific inversion of a DNA region and which carries binding sites for a site-specific recombinase and accessory proteins as well as the site for specific cleavage by the recombinase. -synonym: "inversion site" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0000949 -name: dif_site -def: "A site at which replicated bacterial circular chromosomes are decatenated by site specific resolvase." [SO:as] -synonym: "dif site" EXACT [] -is_a: SO:0000947 ! resolution_site - -[Term] -id: SO:0000950 -name: attC_site -def: "An attC site is a sequence required for the integration of a DNA of an integron." [SO:as] -synonym: "attC site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000365 ! integron - -[Term] -id: SO:0000951 -name: eukaryotic_terminator -synonym: "eukaryotic terminator" EXACT [] -is_a: SO:0000141 ! terminator - -[Term] -id: SO:0000952 -name: oriV -def: "An origin of vegetative replication in plasmids and phages." [SO:as] -synonym: "origin of vegetative replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000953 -name: oriC -def: "An origin of bacterial chromosome replication." [SO:as] -synonym: "origin of bacterial chromosome replication" EXACT [] -is_a: SO:0000296 ! origin_of_replication - -[Term] -id: SO:0000954 -name: DNA_chromosome -def: "Structural unit composed of a self-replicating, DNA molecule." [SO:ma] -synonym: "DNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0000955 -name: double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded DNA molecule." [SO:ma] -synonym: "double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000956 -name: single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded DNA molecule." [SO:ma] -synonym: "single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000954 ! DNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000957 -name: linear_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear DNA molecule." [SO:ma] -synonym: "linear double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000958 -name: circular_double_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular DNA molecule." [SO:ma] -synonym: "circular double stranded DNA chromosome" EXACT [] -intersection_of: SO:0000955 ! double_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000959 -name: linear_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear DNA molecule." [SO:ma] -synonym: "linear single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000960 -name: circular_single_stranded_DNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded DNA chromosome" EXACT [] -intersection_of: SO:0000956 ! single_stranded_DNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000961 -name: RNA_chromosome -def: "Structural unit composed of a self-replicating, RNA molecule." [SO:ma] -synonym: "RNA chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_quality SO:0000356 ! RNA - -[Term] -id: SO:0000962 -name: single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded RNA molecule." [SO:ma] -synonym: "single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000984 ! single - -[Term] -id: SO:0000963 -name: linear_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, linear RNA molecule." [SO:ma] -synonym: "linear single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000964 -name: linear_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, linear RNA molecule." [SO:ma] -synonym: "linear double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000987 ! linear - -[Term] -id: SO:0000965 -name: double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded RNA molecule." [SO:ma] -synonym: "double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000961 ! RNA_chromosome -intersection_of: has_quality SO:0000985 ! double - -[Term] -id: SO:0000966 -name: circular_single_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, single-stranded, circular DNA molecule." [SO:ma] -synonym: "circular single stranded RNA chromosome" EXACT [] -intersection_of: SO:0000962 ! single_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000967 -name: circular_double_stranded_RNA_chromosome -def: "Structural unit composed of a self-replicating, double-stranded, circular RNA molecule." [SO:ma] -synonym: "circular double stranded RNA chromosome" EXACT [] -intersection_of: SO:0000965 ! double_stranded_RNA_chromosome -intersection_of: has_quality SO:0000988 ! circular - -[Term] -id: SO:0000968 -name: sequence_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "sequence replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000969 -name: rolling_circle -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070581. -synonym: "rolling circle" EXACT [] -xref: http://en.wikipedia.org/wiki/Rolling_circle "wiki" -is_obsolete: true - -[Term] -id: SO:0000970 -name: theta_replication -comment: This has been obsoleted as it represents a process. replaced_by: GO:0070582 -synonym: "theta replication" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000971 -name: DNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0006260. -synonym: "DNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000972 -name: RNA_replication_mode -comment: This has been obsoleted as it represents a process. replaced_by: GO:0034961. -synonym: "RNA replication mode" EXACT [] -is_obsolete: true - -[Term] -id: SO:0000973 -name: insertion_sequence -def: "A terminal_inverted_repeat_element that is bacterial and only encodes the functions required for its transposition between these inverted repeats." [SO:as] -synonym: "insertion sequence" EXACT [] -synonym: "IS" RELATED [] -xref: http://en.wikipedia.org/wiki/Insertion_sequence "wiki" -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0000975 -name: minicircle_gene -synonym: "minicircle gene" EXACT [] -intersection_of: SO:0000089 ! kinetoplast_gene -intersection_of: part_of SO:0000980 ! minicircle - -[Term] -id: SO:0000976 -name: cryptic -def: "A feature_attribute describing a feature that is not manifest under normal conditions." [SO:ke] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000977 -name: anchor_binding_site -comment: Part of an edited transcript only. -synonym: "anchor binding site" EXACT [] -is_a: SO:0000833 ! transcript_region - -[Term] -id: SO:0000978 -name: template_region -def: "A region of a guide_RNA that specifies the insertions and deletions of bases in the editing of a target mRNA." [SO:jk] -synonym: "information region" EXACT [] -synonym: "template region" EXACT [] -is_a: SO:0000930 ! guide_RNA_region - -[Term] -id: SO:0000979 -name: gRNA_encoding -def: "A non-protein_coding gene that encodes a guide_RNA." [SO:ma] -synonym: "gRNA encoding" EXACT [] -is_a: SO:0000011 ! non_protein_coding - -[Term] -id: SO:0000980 -name: minicircle -alt_id: SO:0000974 -def: "A minicircle is a replicon, part of a kinetoplast, that encodes for guide RNAs." [PMID:8395055] -synonym: "minicircle_chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Minicircle "wiki" -is_a: SO:0001235 ! replicon -relationship: part_of SO:0000741 ! kinetoplast - -[Term] -id: SO:0000981 -name: rho_dependent_bacterial_terminator -synonym: "rho dependent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000982 -name: rho_independent_bacterial_terminator -synonym: "rho independent bacterial terminator" EXACT [] -is_a: SO:0000614 ! bacterial_terminator - -[Term] -id: SO:0000983 -name: strand_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "strand attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0000984 -name: single -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000985 -name: double -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -is_a: SO:0000983 ! strand_attribute - -[Term] -id: SO:0000986 -name: topology_attribute -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "topology attribute" EXACT [] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0000987 -name: linear -def: "A quality of a nucleotide polymer that has a 3'-terminal residue and a 5'-terminal residue." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "two-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000988 ! circular - -[Term] -id: SO:0000988 -name: circular -def: "A quality of a nucleotide polymer that has no terminal nucleotide residues." [SO:cb] -comment: Attributes added to describe the different kinds of replicon. SO workshop, September 2006. -synonym: "zero-ended" RELATED [] -is_a: SO:0000986 ! topology_attribute -disjoint_from: SO:0000987 ! linear - -[Term] -id: SO:0000989 -name: class_II_RNA -def: "Small non-coding RNA (59-60 nt long) containing 5' and 3' ends that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -synonym: "class II RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000990 -name: class_I_RNA -def: "Small non-coding RNA (55-65 nt long) containing highly conserved 5' and 3' ends (16 and 8 nt, respectively) that are predicted to come together to form a stem structure. Identified in the social amoeba Dictyostelium discoideum and localized in the cytoplasm." [PMID:15333696] -comment: Requested by Karen Pilcher - Dictybase. song-Term Tracker-1574577. -synonym: "class I RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0000991 -name: genomic_DNA -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "genomic DNA" EXACT [] -is_a: SO:0000352 ! DNA - -[Term] -id: SO:0000992 -name: BAC_cloned_genomic_insert -comment: Requested by Andy Schroder - Flybase Harvard, Nov 2006. -synonym: "BAC cloned genomic insert" EXACT [] -intersection_of: SO:0000914 ! cloned_genomic_insert -intersection_of: derives_from SO:0000153 ! BAC - -[Term] -id: SO:0000993 -name: consensus -comment: Term added Dec 06 to comply with mapping to MGED terms.It should be used to generate consensus regions. The specific cross product terms they require are consensus_region and consensus_mRNA. -is_a: SO:0000905 ! status - -[Term] -id: SO:0000994 -name: consensus_region -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000995 -name: consensus_mRNA -comment: DO not obsolete without considering MGED mapping. -synonym: "consensus mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000993 ! consensus - -[Term] -id: SO:0000996 -name: predicted_gene -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "predicted gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:0000997 -name: gene_fragment -comment: This term is mapped to MGED. Do not obsolete without consulting MGED ontology. -synonym: "gene fragment" EXACT [] -intersection_of: SO:0000842 ! gene_component_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0000998 -name: recursive_splice_site -def: "A recursive splice site is a splice site which subdivides a large intron. Recursive splicing is a mechanism that splices large introns by sub dividing the intron at non exonic elements and alternate exons." [http://www.genetics.org/cgi/content/full/170/2/661] -synonym: "recursive splice site" EXACT [] -is_a: SO:0001419 ! cis_splice_site - -[Term] -id: SO:0000999 -name: BAC_end -def: "A region of sequence from the end of a BAC clone that may provide a highly specific marker." [SO:ke] -comment: Requested by Keith Boroevich December, 2006. -synonym: "BAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000153 ! BAC - -[Term] -id: SO:0001000 -name: rRNA_16S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the small subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "16S ribosomal RNA" EXACT [] -synonym: "16S rRNA" RELATED [] -synonym: "16S SSU RNA" EXACT [] -synonym: "rRNA 16S" EXACT [] -xref: http://en.wikipedia.org/wiki/16S_ribosomal_RNA "wiki" -is_a: SO:0000650 ! small_subunit_rRNA - -[Term] -id: SO:0001001 -name: rRNA_23S -def: "A large polynucleotide in Bacteria and Archaea, which functions as the large subunit of the ribosome." [SO:ke] -subset: SOFA -synonym: "23S LSU rRNA" EXACT [] -synonym: "23S ribosomal RNA" RELATED [] -synonym: "23S rRNA" EXACT [] -synonym: "rRNA 23S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001002 -name: rRNA_25S -def: "A large polynucleotide which functions as part of the large subunit of the ribosome in some eukaryotes." [RSC:cb] -subset: SOFA -synonym: "25S LSU rRNA" EXACT [] -synonym: "25S ribosomal RNA" EXACT [] -synonym: "25S rRNA" EXACT [] -synonym: "rRNA 25S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001003 -name: solo_LTR -def: "A recombination product between the 2 LTR of the same element." [SO:ke] -comment: Requested by Hadi Quesneville January 2007. -synonym: "solo LTR" EXACT [] -is_a: SO:0000286 ! long_terminal_repeat - -[Term] -id: SO:0001004 -name: low_complexity -synonym: "low complexity" EXACT [] -is_a: SO:0000905 ! status - -[Term] -id: SO:0001005 -name: low_complexity_region -synonym: "low complexity region" EXACT [] -is_a: SO:0001410 ! experimental_feature -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001004 ! low_complexity - -[Term] -id: SO:0001006 -name: prophage -def: "A phage genome after it has established in the host genome in a latent/immune state either as a plasmid or as an integrated \"island\"." [GOC:jl] -xref: http://en.wikipedia.org/wiki/Prophage "wiki" -is_a: SO:0000113 ! proviral_region - -[Term] -id: SO:0001007 -name: cryptic_prophage -def: "A remnant of an integrated prophage in the host genome or an \"island\" in the host genome that includes phage like-genes." [GOC:jl] -comment: This is not cryptic in the same sense as a cryptic gene or cryptic splice site. -synonym: "cryptic prophage" EXACT [] -xref: http://ecoliwiki.net/colipedia/index.php/Category\:Cryptic_Prophage.w dbxref -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001008 -name: tetraloop -def: "A base-paired stem with loop of 4 non-hydrogen bonded nucleotides." [SO:ke] -xref: http://en.wikipedia.org/wiki/Tetraloop "wiki" -is_a: SO:0000313 ! stem_loop - -[Term] -id: SO:0001009 -name: DNA_constraint_sequence -def: "A double-stranded DNA used to control macromolecular structure and function." [http:/www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search&db=pubmed&term=SILVERMAN+SK[au\]&dispmax=50] -synonym: "DNA constraint" EXACT [] -synonym: "DNA constraint sequence" EXACT [] -is_a: SO:0000442 ! ds_oligo - -[Term] -id: SO:0001010 -name: i_motif -def: "A cytosine rich domain whereby strands associate both inter- and intramolecularly at moderately acidic pH." [PMID:9753739] -synonym: "i motif" EXACT [] -synonym: "short intercalated motif" EXACT [] -is_a: SO:0000142 ! DNA_sequence_secondary_structure - -[Term] -id: SO:0001011 -name: PNA_oligo -def: "Peptide nucleic acid, is a chemical not known to occur naturally but is artificially synthesized and used in some biological research and medical treatments. The PNA backbone is composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [SO:ke] -synonym: "peptide nucleic acid" EXACT [] -synonym: "PNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Peptide_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001184 ! PNA - -[Term] -id: SO:0001012 -name: DNAzyme -def: "A DNA sequence with catalytic activity." [SO:cb] -comment: Added by request from Colin Batchelor. -synonym: "catalytic DNA" EXACT [] -synonym: "deoxyribozyme" RELATED [] -synonym: "DNA enzyme" EXACT [] -intersection_of: SO:0000696 ! oligo -intersection_of: has_quality SO:0001185 ! enzymatic - -[Term] -id: SO:0001013 -name: MNP -def: "A multiple nucleotide polymorphism with alleles of common length > 1, for example AAA/TTT." [http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?rs=rs2067431] -synonym: "multiple nucleotide polymorphism" RELATED [] -is_a: SO:1000005 ! complex_substitution - -[Term] -id: SO:0001014 -name: intron_domain -comment: Requested by Colin Batchelor, Feb 2007. -synonym: "intron domain" EXACT [] -is_a: SO:0000835 ! primary_transcript_region -relationship: part_of SO:0000188 ! intron - -[Term] -id: SO:0001015 -name: wobble_base_pair -def: "A type of non-canonical base pairing, most commonly between G and U, which is important for the secondary structure of RNAs. It has similar thermodynamic stability to the Watson-Crick pairing. Wobble base pairs only have two hydrogen bonds. Other wobble base pair possibilities are I-A, I-U and I-C." [PMID:11256617] -synonym: "wobble base pair" EXACT [] -synonym: "wobble pair" EXACT [] -xref: http://en.wikipedia.org/wiki/Wobble_base_pair "wiki" -is_a: SO:0000028 ! base_pair - -[Term] -id: SO:0001016 -name: internal_guide_sequence -def: "A purine-rich sequence in the group I introns which determines the locations of the splice sites in group I intron splicing and has catalytic activity." [SO:cb] -synonym: "IGS" EXACT [] -synonym: "internal guide sequence" EXACT [] -is_a: SO:0001014 ! intron_domain -relationship: part_of SO:0000587 ! group_I_intron - -[Term] -id: SO:0001017 -name: silent_mutation -comment: Added in March 2007 in after meeting with pharmgkb. -synonym: "silent mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Silent_mutation "wiki" -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001018 -name: epitope -def: "A region of a macromolecule that is recognized by the immune system." [http://en.wikipedia.org/wiki/Epitope] -comment: Requested by Trish Whetzel. -xref: http://en.wikipedia.org/wiki/Epitope "wiki" -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001019 -name: copy_number_variation -def: "A variation that increases or decreases the copy number of a given region." [SO:ke] -synonym: "CNP" EXACT [] -synonym: "CNV" EXACT [] -synonym: "copy number polymorphism" EXACT [] -synonym: "copy number variation" EXACT [] -xref: http://en.wikipedia.org/wiki/Copy_number_variation "wiki" -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001020 -name: sequence_variant_affecting_copy_number -synonym: "mutation affecting copy number" EXACT [] -synonym: "sequence variant affecting copy number" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:0001021 -name: chromosome_breakpoint -alt_id: SO:0001242 -synonym: "aberration breakpoint" EXACT [] -synonym: "aberration_junction" EXACT [] -synonym: "chromosome breakpoint" EXACT [] -is_a: SO:0000699 ! junction -relationship: part_of SO:0000340 ! chromosome - -[Term] -id: SO:0001022 -name: inversion_breakpoint -def: "The point within a chromosome where an inversion begins or ends." [SO:cb] -synonym: "inversion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001023 -name: allele -def: "An allele is one of a set of coexisting sequence variants of a gene." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Allele "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000704 ! gene - -[Term] -id: SO:0001024 -name: haplotype -def: "A haplotype is one of a set of coexisting sequence variants of a haplotype block." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Haplotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0000355 ! haplotype_block - -[Term] -id: SO:0001025 -name: polymorphic_sequence_variant -def: "A sequence variant that is segregating in one or more natural populations of a species." [SO:immuno_workshop] -synonym: "polymorphic sequence variant" EXACT [] -is_a: SO:0001060 ! sequence_variant - -[Term] -id: SO:0001026 -name: genome -def: "A genome is the sum of genetic material within a cell or virion." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genome "wiki" -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001027 -name: genotype -def: "A genotype is a variant genome, complete or incomplete." [SO:immuno_workshop] -xref: http://en.wikipedia.org/wiki/Genotype "wiki" -is_a: SO:0001060 ! sequence_variant -relationship: variant_of SO:0001026 ! genome - -[Term] -id: SO:0001028 -name: diplotype -def: "A diplotype is a pair of haplotypes from a given individual. It is a genotype where the phase is known." [SO:immuno_workshop] -is_a: SO:0001027 ! genotype - -[Term] -id: SO:0001029 -name: direction_attribute -synonym: "direction attribute" EXACT [] -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001030 -name: forward -def: "Forward is an attribute of the feature, where the feature is in the 5' to 3' direction." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001031 -name: reverse -def: "Reverse is an attribute of the feature, where the feature is in the 3' to 5' direction. Again could be applied to primer." [SO:ke] -is_a: SO:0001029 ! direction_attribute - -[Term] -id: SO:0001032 -name: mitochondrial_DNA -comment: This terms is used by MO. -synonym: "mitochondrial DNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Mitochondrial_DNA "wiki" -intersection_of: SO:0000737 ! mitochondrial_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001033 -name: chloroplast_DNA -comment: This term is used by MO. -synonym: "chloroplast DNA" EXACT [] -intersection_of: SO:0000745 ! chloroplast_sequence -intersection_of: has_quality SO:0000352 ! DNA - -[Term] -id: SO:0001034 -name: mirtron -def: "A debranched intron which mimics the structure of pre-miRNA and enters the miRNA processing pathway without Drosha mediated cleavage." [PMID:17589500, SO:ma] -comment: Ruby et al. Nature 448:83 desribe a new class of miRNAs that are derived from debranched introns. -is_a: SO:0001014 ! intron_domain - -[Term] -id: SO:0001035 -name: piRNA -def: "A small non coding RNA, part of a silencing system that prevents the spreading of selfish genetic elements." [SO:ke] -synonym: "piwi-associated RNA" EXACT [] -xref: http://en.wikipedia.org/wiki/PiRNA "wiki" -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001036 -name: arginyl_tRNA -def: "A tRNA sequence that has an arginine anticodon, and a 3' arginine binding region." [SO:ke] -synonym: "arginyl tRNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0000212 ! arginine_tRNA_primary_transcript - -[Term] -id: SO:0001037 -name: mobile_genetic_element -def: "A nucleotide region with either intra-genome or intracellular moblity, of varying length, which often carry the information necessary for transfer and recombination with the host genome." [PMID:14681355] -synonym: "MGE" EXACT [] -synonym: "mobile genetic element" EXACT [] -xref: http://en.wikipedia.org/wiki/Mobile_genetic_element "wiki" -is_a: SO:0001411 ! biological_region -intersection_of: SO:0000001 ! region -intersection_of: has_quality SO:0001234 ! mobile - -[Term] -id: SO:0001038 -name: extrachromosomal_mobile_genetic_element -def: "An MGE that is not integrated into the host chromosome." [SO:ke] -synonym: "extrachromosomal mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001039 -name: integrated_mobile_genetic_element -def: "An MGE that is integrated into the host chromosome." [SO:ke] -synonym: "integrated mobile genetic element" EXACT [] -is_a: SO:0001037 ! mobile_genetic_element - -[Term] -id: SO:0001040 -name: integrated_plasmid -def: "A plasmid sequence that is integrated within the host chromosome." [SO:ke] -synonym: "integrated plasmid" EXACT [] -intersection_of: SO:0001039 ! integrated_mobile_genetic_element -intersection_of: derives_from SO:0000155 ! plasmid - -[Term] -id: SO:0001041 -name: viral_sequence -def: "The region of nucleotide sequence of a virus, a submicroscopic particle that replicates by infecting a host cell." [SO:ke] -comment: The definitions of the children of this term were revised Decemeber 2007 after discussion on song-devel. The resulting definitions are slightly unweildy but hopefully more logically correct. -synonym: "viral sequence" EXACT [] -synonym: "virus sequence" EXACT [] -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -is_a: SO:0001235 ! replicon - -[Term] -id: SO:0001042 -name: phage_sequence -def: "The nucleotide sequence of a virus that infects bacteria." [SO:ke] -synonym: "bacteriophage" EXACT [] -synonym: "phage" EXACT [] -synonym: "phage sequence" EXACT [] -xref: http://en.wikipedia.org/wiki/Bacteriophage "wiki" -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001043 -name: attCtn_site -def: "An attachment site located on a conjugative transposon and used for site-specific integration of a conjugative transposon." [Phigo:at] -synonym: "attCtn site" EXACT [] -is_a: SO:0000946 ! integration_excision_site -relationship: part_of SO:0000371 ! conjugative_transposon - -[Term] -id: SO:0001044 -name: nuclear_mt_pseudogene -def: "A nuclear pseudogene of a mitochndrial gene." [SO:xp] -synonym: "nuclear mitochondrial pseudogene" EXACT [] -synonym: "nuclear mt pseudogene" EXACT [] -synonym: "NUMT" EXACT [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001045 -name: cointegrated_plasmid -def: "A MGE region consisting of two fused plasmids resulting from a replicative transposition event." [phigo:at] -synonym: "cointegrated plasmid" EXACT [] -synonym: "cointegrated replicon" EXACT [] -is_a: SO:0001039 ! integrated_mobile_genetic_element - -[Term] -id: SO:0001046 -name: IRLinv_site -def: "Component of the inversion site located at the left of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRLinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001047 -name: IRRinv_site -def: "Component of the inversion site located at the right of a region susceptible to site-specific inversion." [Phigo:at] -synonym: "IRRinv site" EXACT [] -is_a: SO:0001048 ! inversion_site_part -relationship: part_of SO:0000948 ! inversion_site - -[Term] -id: SO:0001048 -name: inversion_site_part -def: "A region located within an inversion site." [SO:ke] -comment: A term created to allow the parts of an inversion site have an is_a path back to the root. -synonym: "inversion site part" EXACT [] -is_a: SO:0000342 ! site_specific_recombination_target_region - -[Term] -id: SO:0001049 -name: defective_conjugative_transposon -def: "An island that contains genes for integration/excision and the gene and site for the initiation of intercellular transfer by conjugation. It can be complemented for transfer by a conjugative transposon." [Phigo:ariane] -synonym: "defective conjugative transposon" EXACT [] -is_a: SO:0000772 ! genomic_island - -[Term] -id: SO:0001050 -name: repeat_fragment -def: "A portion of a repeat, interrupted by the insertion of another element." [SO:ke] -comment: Requested by Chris Smith, and others at Flybase to help annotate nested repeats. -synonym: "repeat fragment" EXACT [] -is_a: SO:0000840 ! repeat_component -intersection_of: SO:0000657 ! repeat_region -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001051 -name: nested_region -is_obsolete: true - -[Term] -id: SO:0001052 -name: nested_repeat -is_obsolete: true - -[Term] -id: SO:0001053 -name: nested_transposon -is_obsolete: true - -[Term] -id: SO:0001054 -name: transposon_fragment -synonym: "transposon fragment" EXACT [] -intersection_of: SO:0000101 ! transposable_element -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001055 -name: transcriptional_cis_regulatory_region -def: "A regulatory_region that modulates the transcription of a gene or genes." [SO:regcreative] -synonym: "transcriptional cis regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001056 -name: splicing_regulatory_region -def: "A regulatory_region that modulates splicing." [SO:ke] -synonym: "splicing regulatory region" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001057 -name: enhanceosome -is_obsolete: true - -[Term] -id: SO:0001058 -name: promoter_targeting_sequence -def: "A transcriptional_cis_regulatory_region that restricts the activity of a CRM to a single promoter and which functions only when both itself and an insulator are located between the CRM and the promoter." [SO:regcreative] -synonym: "promoter targeting sequence" EXACT [] -is_a: SO:0001055 ! transcriptional_cis_regulatory_region - -[Term] -id: SO:0001059 -name: sequence_alteration -def: "A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence." [SO:ke] -synonym: "sequence alteration" EXACT [] -is_a: SO:0000110 ! sequence_feature - -[Term] -id: SO:0001060 -name: sequence_variant -def: "A sequence_variant is a non exact copy of a sequence_feature or genome exhibiting one or more sequence_alteration." [SO:ke] -synonym: "sequence variant" EXACT [] - -[Term] -id: SO:0001061 -name: propeptide_cleavage_site -alt_id: BS:00063 -def: "The propeptide_cleavage_site is the arginine/lysine boundary on a propeptide where cleavage occurs." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "propeptide cleavage site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0001062 ! propeptide - -[Term] -id: SO:0001062 -name: propeptide -alt_id: BS:00077 -def: "Part of a peptide chain which is cleaved off during the formation of the mature protein." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "propep" RELATED [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Propeptide "wiki" -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001063 -name: immature_peptide_region -alt_id: BS:00129 -def: "An immature_peptide_region is the extent of the peptide after it has been translated and before any processing occurs." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "immature_peptide_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001064 -name: active_peptide -alt_id: BS:00076 -def: "Active peptides are proteins which are biologically active, released from a precursor molecule." [EBIBS:GAR, UniProt:curation_manual] -comment: Hormones, neuropeptides, antimicrobial peptides, are active peptides. They are typically short (<40 amino acids) in length. -subset: biosapiens -synonym: "active peptide" EXACT [] -synonym: "peptide" BROAD [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Peptide "wiki" -is_a: SO:0000419 ! mature_protein_region - -[Term] -id: SO:0001066 -name: compositionally_biased_region_of_peptide -alt_id: BS:00068 -def: "Polypeptide region that is rich in a particular amino acid or homopolymeric and greater than three residues in length." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "compbias" RELATED [uniprot:feature_type] -synonym: "compositional bias" RELATED [] -synonym: "compositionally biased" RELATED [] -synonym: "compositionally biased region of peptide" RELATED [] -synonym: "compositionally_biased_region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001067 -name: polypeptide_motif -alt_id: BS:00032 -def: "A sequence motif is a short (up to 20 amino acids) region of biological interest. Such motifs, although they are too short to constitute functional domains, share sequence similarities and are conserved in different proteins. They display a common function (protein-binding, subcellular location etc.)." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "motif" BROAD [uniprot:feature_type] -synonym: "polypeptide motif" EXACT [] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001068 -name: polypeptide_repeat -alt_id: BS:00070 -def: "A polypeptide_repeat is a single copy of an internal sequence repetition." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "polypeptide repeat" EXACT [] -synonym: "repeat" RELATED [uniprot:feature_type] -is_a: SO:0100021 ! polypeptide_conserved_region - -[Term] -id: SO:0001070 -name: polypeptide_structural_region -alt_id: BS:00337 -def: "Region of polypeptide with a given structural property." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "polypeptide structural region" EXACT [] -synonym: "structural_region" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001071 -name: membrane_structure -alt_id: BS:00128 -def: "Arrangement of the polypeptide with respect to the lipid bilayer." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "membrane_structure" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001072 -name: extramembrane_polypeptide_region -alt_id: BS:00154 -def: "Polypeptide region that is localized outside of a lipid bilayer." [EBIBS:GAR, SO:cb] -comment: Range. -subset: biosapiens -synonym: "extramembrane" RELATED BS [] -synonym: "extramembrane polypeptide region" EXACT [] -synonym: "extramembrane_region" RELATED BS [] -synonym: "topo_dom" RELATED BS [uniprot:feature_type] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001073 -name: cytoplasmic_polypeptide_region -alt_id: BS:00145 -def: "Polypeptide region that is localized inside the cytoplasm." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "cytoplasm_location" EXACT BS [] -synonym: "cytoplasmic polypeptide region" EXACT [] -synonym: "inside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001074 -name: non_cytoplasmic_polypeptide_region -alt_id: BS:00144 -def: "Polypeptide region that is localized outside of a lipid bilayer and outside of the cytoplasm." [EBIBS:GAR, SO:cb] -comment: This could be inside an organelle within the cell. -subset: biosapiens -synonym: "non cytoplasmic polypeptide region" EXACT [] -synonym: "non_cytoplasm_location" EXACT BS [] -synonym: "outside" RELATED BS [] -is_a: SO:0001072 ! extramembrane_polypeptide_region - -[Term] -id: SO:0001075 -name: intramembrane_polypeptide_region -alt_id: BS:00156 -def: "Polypeptide region present in the lipid bilayer." [EBIBS:GAR] -subset: biosapiens -synonym: "intramembrane" RELATED BS [] -synonym: "intramembrane polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region -relationship: part_of SO:0001071 ! membrane_structure - -[Term] -id: SO:0001076 -name: membrane_peptide_loop -alt_id: BS:00155 -def: "Polypeptide region localized within the lipid bilayer where both ends traverse the same membrane." [EBIBS:GAR, SO:cb] -subset: biosapiens -synonym: "membrane peptide loop" EXACT [] -synonym: "membrane_loop" RELATED BS [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001077 -name: transmembrane_polypeptide_region -alt_id: BS:00158 -def: "Polypeptide region traversing the lipid bilayer." [EBIBS:GAR, UniProt:curator_manual] -subset: biosapiens -synonym: "transmem" RELATED BS [uniprot:feature_type] -synonym: "transmembrane" RELATED BS [] -synonym: "transmembrane polypeptide region" EXACT [] -is_a: SO:0001075 ! intramembrane_polypeptide_region - -[Term] -id: SO:0001078 -name: polypeptide_secondary_structure -alt_id: BS:00003 -def: "A region of peptide with secondary structure has hydrogen bonding along the peptide chain that causes a defined conformation of the chain." [EBIBS:GAR] -comment: Biosapien term was secondary_structure. -subset: biosapiens -synonym: "2nary structure" RELATED BS [] -synonym: "polypeptide secondary structure" EXACT [] -synonym: "secondary structure" RELATED BS [] -synonym: "secondary structure region" RELATED BS [] -synonym: "secondary_structure" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Secondary_structure "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001079 -name: polypeptide_structural_motif -alt_id: BS:0000338 -def: "Motif is a three-dimensional structural element within the chain, which appears also in a variety of other molecules. Unlike a domain, a motif does not need to form a stable globular unit." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide structural motif" RELATED [] -synonym: "structural_motif" RELATED BS [] -xref: http://en.wikipedia.org/wiki/Structural_motif "wiki" -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0001080 -name: coiled_coil -alt_id: BS:00041 -def: "A coiled coil is a structural motif in proteins, in which alpha-helices are coiled together like the strands of a rope." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "coiled" RELATED BS [uniprot:feature_type] -synonym: "coiled coil" EXACT [] -xref: http://en.wikipedia.org/wiki/Coiled_coil "wiki" -is_a: SO:0001079 ! polypeptide_structural_motif - -[Term] -id: SO:0001081 -name: helix_turn_helix -alt_id: BS:00147 -def: "A motif comprising two helices separated by a turn." [EBIBS:GAR] -subset: biosapiens -synonym: "helix turn helix" EXACT [] -synonym: "helix-turn-helix" EXACT [] -synonym: "HTH" RELATED BS [] -is_a: SO:0001079 ! polypeptide_structural_motif -relationship: has_part SO:0001114 ! peptide_helix -relationship: has_part SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001082 -name: polypeptide_sequencing_information -alt_id: BS:00125 -def: "Incompatibility in the sequence due to some experimental problem." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "sequencing_information" EXACT [] -is_a: SO:0000700 ! remark - -[Term] -id: SO:0001083 -name: non_adjacent_residues -alt_id: BS:00182 -def: "Indicates that two consecutive residues in a fragment sequence are not consecutive in the full-length protein and that there are a number of unsequenced residues between them." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "non consecutive" EXACT [] -synonym: "non_cons" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001084 -name: non_terminal_residue -alt_id: BS:00072 -def: "The residue at an extremity of the sequence is not the terminal residue." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "non terminal" EXACT [] -synonym: "non_ter" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001085 -name: sequence_conflict -alt_id: BS:00069 -def: "Different sources report differing sequences." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "conflict" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001086 -name: sequence_uncertainty -alt_id: BS:00181 -def: "Describes the positions in a sequence where the authors are unsure about the sequence assignment." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "unsure" EXACT [uniprot:feature_type] -is_a: SO:0001082 ! polypeptide_sequencing_information - -[Term] -id: SO:0001087 -name: cross_link -alt_id: BS:00178 -def: "Posttranslationally formed amino acid bonds." [EBIBS:GAR, UniProt:curation_manual] -subset: biosapiens -synonym: "cross_link" EXACT [] -synonym: "crosslink" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001088 -name: disulfide_bond -alt_id: BS:00028 -def: "The covalent bond between sulfur atoms that binds two peptide chains or different parts of one peptide chain and is a structural determinant in many protein molecules." [EBIBS:GAR, UniProt:curation_manual] -comment: 2 discreet & joined. -subset: biosapiens -synonym: "disulfid" RELATED [] -synonym: "disulfide" RELATED [] -synonym: "disulfide bond" RELATED [] -synonym: "disulfide_bond" EXACT [] -synonym: "disulphide" EXACT [] -synonym: "disulphide bond" RELATED [] -is_obsolete: true - -[Term] -id: SO:0001089 -name: post_translationally_modified_region -alt_id: BS:00052 -def: "A region where a transformation occurs in a protein after it has been synthesized. This which may regulate, stabilize, crosslink or introduce new chemical functionalities in the protein." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mod_res" EXACT [uniprot:feature_type] -synonym: "modified residue" EXACT [] -synonym: "post_translational_modification" EXACT [] -xref: http://en.wikipedia.org/wiki/Post_translational_modification "wiki" -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001090 -name: covalent_binding_site -alt_id: BS:00246 -def: "Binding involving a covalent bond." [EBIBS:GAR] -subset: biosapiens -synonym: "covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001091 -name: non_covalent_binding_site -alt_id: BS:00029 -def: "Binding site for any chemical group (co-enzyme, prosthetic group, etc.)." [EBIBS:GAR] -comment: Discrete. -subset: biosapiens -synonym: "binding" RELATED [uniprot:curation] -synonym: "binding site" RELATED [] -synonym: "non_covalent_binding_site" EXACT [] -is_obsolete: true - -[Term] -id: SO:0001092 -name: polypeptide_metal_contact -alt_id: BS:00027 -def: "Residue is part of a binding site for a metal ion." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "metal_binding" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001093 -name: protein_protein_contact -alt_id: BS:00131 -def: "Residues involved in protein-protein interactions." [EBIBS:GAR, UniProt:Curation_manual] -subset: biosapiens -synonym: "protein protein contact" EXACT [] -synonym: "protein protein contact site" EXACT [] -synonym: "protein_protein_interaction" RELATED [] -xref: http://en.wikipedia.org/wiki/Protein_protein_interaction "wiki" -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001094 -name: polypeptide_calcium_ion_contact_site -alt_id: BS:00186 -def: "Residue involved in contact with calcium." [EBIBS:GAR] -subset: biosapiens -synonym: "ca bind" RELATED [] -synonym: "Ca_contact_site" EXACT [] -synonym: "polypeptide calcium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001095 -name: polypeptide_cobalt_ion_contact_site -alt_id: BS:00136 -def: "Residue involved in contact with cobalt." [EBIBS:GAR] -subset: biosapiens -synonym: "Co_contact_site" EXACT [] -synonym: "polypeptide cobalt ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001096 -name: polypeptide_copper_ion_contact_site -alt_id: BS:00146 -def: "Residue involved in contact with copper." [EBIBS:GAR] -subset: biosapiens -synonym: "Cu_contact_site" EXACT [] -synonym: "polypeptide copper ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001097 -name: polypeptide_iron_ion_contact_site -alt_id: BS:00137 -def: "Residue involved in contact with iron." [EBIBS:GAR] -subset: biosapiens -synonym: "Fe_contact_site" EXACT [] -synonym: "polypeptide iron ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001098 -name: polypeptide_magnesium_ion_contact_site -alt_id: BS:00187 -def: "Residue involved in contact with magnesium." [EBIBS:GAR] -subset: biosapiens -synonym: "Mg_contact_site" EXACT [] -synonym: "polypeptide magnesium ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001099 -name: polypeptide_manganese_ion_contact_site -alt_id: BS:00140 -def: "Residue involved in contact with manganese." [EBIBS:GAR] -subset: biosapiens -synonym: "Mn_contact_site" EXACT [] -synonym: "polypeptide manganese ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001100 -name: polypeptide_molybdenum_ion_contact_site -alt_id: BS:00141 -def: "Residue involved in contact with molybdenum." [EBIBS:GAR] -subset: biosapiens -synonym: "Mo_contact_site" EXACT [] -synonym: "polypeptide molybdenum ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001101 -name: polypeptide_nickel_ion_contact_site -alt_id: BS:00142 -def: "Residue involved in contact with nickel." [EBIBS:GAR] -subset: biosapiens -synonym: "Ni_contact_site" EXACT [] -synonym: "polypeptide nickel ion contact site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001102 -name: polypeptide_tungsten_ion_contact_site -alt_id: BS:00143 -def: "Residue involved in contact with tungsten." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide tungsten ion contact site" EXACT [] -synonym: "W_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001103 -name: polypeptide_zinc_ion_contact_site -alt_id: BS:00185 -def: "Residue involved in contact with zinc." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide zinc ion contact site" EXACT [] -synonym: "Zn_contact_site" EXACT [] -is_a: SO:0001092 ! polypeptide_metal_contact - -[Term] -id: SO:0001104 -name: catalytic_residue -alt_id: BS:00026 -def: "Amino acid involved in the activity of an enzyme." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "act_site" RELATED [uniprot:feature_type] -synonym: "active site residue" EXACT [] -synonym: "catalytic residue" EXACT [] -is_a: SO:0001237 ! amino_acid -relationship: part_of SO:0100019 ! polypeptide_catalytic_motif - -[Term] -id: SO:0001105 -name: polypeptide_ligand_contact -alt_id: BS:00157 -def: "Residues which interact with a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide ligand contact" EXACT [] -synonym: "protein-ligand interaction" RELATED [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0001106 -name: asx_motif -alt_id: BS:00202 -def: "A motif of five consecutive residues and two H-bonds in which: Residue(i) is Aspartate or Asparagine (Asx), side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3), main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001107 -name: beta_bulge -alt_id: BS:00208 -def: "A motif of three residues within a beta-sheet in which the main chains of two consecutive residues are H-bonded to that of the third, and in which the dihedral angles are as follows: Residue(i): -140 degrees < phi(l) -20 degrees , -90 degrees < psi(l) < 40 degrees. Residue (i+1): -180 degrees < phi < -25 degrees or +120 degrees < phi < +180 degrees, +40 degrees < psi < +180 degrees or -180 degrees < psi < -120 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge" EXACT [] -xref: http://en.wikipedia.org/wiki/Beta_bulge "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001108 -name: beta_bulge_loop -alt_id: BS:00209 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds. Beta bulge loops often occur at the loop ends of beta-hairpins." [EBIBS:GAR, Http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001109 -name: beta_bulge_loop_five -alt_id: BS:00210 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+4), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+3), these loops have an RL nest at residues i+2 and i+3." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop five" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001110 -name: beta_bulge_loop_six -alt_id: BS:00211 -def: "A motif of three residues within a beta-sheet consisting of two H-bonds in which: the main-chain NH of residue(i) is H-bonded to the main-chain CO of residue(i+5), the main-chain CO of residue i is H-bonded to the main-chain NH of residue(i+4), these loops have an RL nest at residues i+3 and i+4." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta bulge loop six" EXACT [] -is_a: SO:0001108 ! beta_bulge_loop - -[Term] -id: SO:0001111 -name: beta_strand -alt_id: BS:00042 -def: "A beta strand describes a single length of polypeptide chain that forms part of a beta sheet. A single continuous stretch of amino acids adopting an extended conformation of hydrogen bonds between the N-O and the C=O of another part of the peptide. This forms a secondary protein structure in which two or more extended polypeptide regions are hydrogen-bonded to one another in a planar array." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "strand" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Beta_sheet "wiki" -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001112 -name: antiparallel_beta_strand -alt_id: BS:0000341 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (one running N-terminal to C-terminal and one running C-terminal to N-terminal). Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i) and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they form two mutual backbone hydrogen bonds to each other's flanking peptide groups; this is known as a close pair of hydrogen bonds. The peptide backbone dihedral angles (phi, psi) are about (-140 degrees, 135 degrees) in antiparallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "antiparallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001113 -name: parallel_beta_strand -alt_id: BS:00151 -def: "A peptide region which hydrogen bonded to another region of peptide running in the oposite direction (both running N-terminal to C-terminal). This orientation is slightly less stable because it introduces nonplanarity in the inter-strand hydrogen bonding pattern. Hydrogen bonding occurs between every other C=O from one strand to every other N-H on the adjacent strand. In this case, if two atoms C-alpha (i)and C-alpha (j) are adjacent in two hydrogen-bonded beta strands, then they do not hydrogen bond to each other; rather, one residue forms hydrogen bonds to the residues that flank the other (but not vice versa). For example, residue i may form hydrogen bonds to residues j - 1 and j + 1; this is known as a wide pair of hydrogen bonds. By contrast, residue j may hydrogen-bond to different residues altogether, or to none at all. The dihedral angles (phi, psi) are about (-120 degrees, 115 degrees) in parallel sheets." [EBIBS:GAR, UniProt:curation_manual] -comment: Range. -subset: biosapiens -synonym: "parallel beta strand" EXACT [] -is_a: SO:0001111 ! beta_strand - -[Term] -id: SO:0001114 -name: peptide_helix -alt_id: BS:00152 -def: "A helix is a secondary_structure conformation where the peptide backbone forms a coil." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "helix" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001115 -name: left_handed_peptide_helix -alt_id: BS:00222 -def: "A left handed helix is a region of peptide where the coiled conformation turns in an anticlockwise, left handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix-l" RELATED [] -synonym: "left handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001116 -name: right_handed_peptide_helix -alt_id: BS:0000339 -def: "A right handed helix is a region of peptide where the coiled conformation turns in a clockwise, right handed screw." [EBIBS:GAR] -subset: biosapiens -synonym: "helix" RELATED BS [] -synonym: "right handed helix" EXACT [] -is_a: SO:0001114 ! peptide_helix - -[Term] -id: SO:0001117 -name: alpha_helix -alt_id: BS:00040 -def: "The helix has 3.6 residues per turn which corersponds to a translation of 1.5 angstroms (= 0.15 nm) along the helical axis. Every backbone N-H group donates a hydrogen bond to the backbone C=O group of the amino acid four residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "a-helix" RELATED BS [] -synonym: "helix" RELATED BS [uniprot:feature_type] -xref: http://en.wikipedia.org/wiki/Alpha_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001118 -name: pi_helix -alt_id: BS:00153 -def: "The pi helix has 4.1 residues per turn and a translation of 1.15 (=0.115 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid five residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "pi helix" EXACT [] -xref: http://en.wikipedia.org/wiki/Pi_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001119 -name: three_ten_helix -alt_id: BS:0000340 -def: "The 3-10 helix has 3 residues per turn with a translation of 2.0 angstroms (=0.2 nm) along the helical axis. The N-H group of an amino acid forms a hydrogen bond with the C=O group of the amino acid three residues earlier." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "3(10) helix" EXACT [] -synonym: "3-10 helix" EXACT [] -synonym: "310 helix" EXACT [] -synonym: "three_ten_helix" EXACT [] -xref: http://en.wikipedia.org/wiki/310_helix "wiki" -is_a: SO:0001116 ! right_handed_peptide_helix - -[Term] -id: SO:0001120 -name: polypeptide_nest_motif -alt_id: BS:00223 -def: "A motif of two consecutive residues with dihedral angles. Nest should not have Proline as any residue. Nests frequently occur as parts of other motifs such as Schellman loops." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest" RELATED BS [] -synonym: "nest_motif" EXACT [] -synonym: "polypeptide nest motif" RELATED [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001121 -name: polypeptide_nest_left_right_motif -alt_id: BS:00224 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_left_right" EXACT [] -synonym: "nest_lr" EXACT [] -synonym: "polypeptide nest left right motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001122 -name: polypeptide_nest_right_left_motif -alt_id: BS:00225 -def: "A motif of two consecutive residues with dihedral angles: Residue(i): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "nest_right_left" EXACT [] -synonym: "nest_rl" EXACT [] -synonym: "polypeptide nest right left motif" EXACT [] -is_a: SO:0001120 ! polypeptide_nest_motif - -[Term] -id: SO:0001123 -name: schellmann_loop -alt_id: BS:00226 -def: "A motif of six or seven consecutive residues that contains two H-bonds." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "paperclip" RELATED BS [] -synonym: "paperclip loop" RELATED [] -synonym: "schellmann_loop" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001124 -name: schellmann_loop_seven -alt_id: BS:00228 -def: "Wild type: A motif of seven consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+6), the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+5)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop seven" EXACT [] -synonym: "seven-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001125 -name: schellmann_loop_six -alt_id: BS:00227 -def: "Common Type: A motif of six consecutive residues that contains two H-bonds in which: the main-chain CO of residue(i) is H-bonded to the main-chain NH of residue(i+5) the main-chain CO of residue(i+1) is H-bonded to the main-chain NH of residue(i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "schellmann loop six" EXACT [] -synonym: "six-residue schellmann loop" EXACT [] -is_a: SO:0001123 ! schellmann_loop - -[Term] -id: SO:0001126 -name: serine_threonine_motif -alt_id: BS:00229 -def: "A motif of five consecutive residues and two hydrogen bonds in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2) or (i+3) , the main-chain CO group of residue(i) is H-bonded to the main-chain NH of residue(i+3) or (i+4)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine motif" EXACT [] -synonym: "st motif" EXACT [] -synonym: "st_motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001127 -name: serine_threonine_staple_motif -alt_id: BS:00230 -def: "A motif of four or five consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain OH of residue(i) is H-bonded to the main-chain CO of residue(i3) or (i4), Phi angles of residues(i1), (i2) and (i3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine threonine staple motif" EXACT [] -synonym: "st_staple" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001128 -name: polypeptide_turn_motif -alt_id: BS:00148 -def: "A reversal in the direction of the backbone of a protein that is stabilized by hydrogen bond between backbone NH and CO groups, involving no more than 4 amino acid residues." [EBIBS:GAR, uniprot:feature_type] -comment: Range. -subset: biosapiens -synonym: "turn" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0001129 -name: asx_turn_left_handed_type_one -alt_id: BS:00206 -def: "Left handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type one" EXACT [] -synonym: "asx_turn_il" RELATED [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001130 -name: asx_turn_left_handed_type_two -alt_id: BS:00204 -def: "Left handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn left handed type two" EXACT [] -synonym: "asx_turn_iil" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001131 -name: asx_turn_right_handed_type_two -alt_id: BS:00205 -def: "Right handed type II (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, +80 degrees < psi +120 degrees < +180 degrees. Residue(i+1): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn right handed type two" EXACT [] -synonym: "asx_turn_iir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001132 -name: asx_turn_right_handed_type_one -alt_id: BS:00207 -def: "Right handed type I (dihedral angles):- Residue(i): -140 degrees < chi (1) -120 degrees < -20 degrees, -90 degrees < psi +120 degrees < +40 degrees. Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "asx turn type right handed type one" EXACT [] -synonym: "asx_turn_ir" EXACT [] -is_a: SO:0000912 ! asx_turn - -[Term] -id: SO:0001133 -name: beta_turn -alt_id: BS:00212 -def: "A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles of the second and third residues, which are the basis for sub-categorization." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001134 -name: beta_turn_left_handed_type_one -alt_id: BS:00215 -def: "Left handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles:- Residue(i+1): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees. Residue(i+2): -140 degrees > phi > -20 degrees, -90 degrees > psi > +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type one" EXACT [] -synonym: "beta_turn_il" EXACT [] -synonym: "type I' beta turn" EXACT [] -synonym: "type I' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001135 -name: beta_turn_left_handed_type_two -alt_id: BS:00213 -def: "Left handed type II: A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees > phi > -20 degrees, +80 degrees > psi > +180 degrees. Residue(i+2): +20 degrees > phi > +140 degrees, -40 degrees > psi > +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn left handed type two" EXACT [] -synonym: "beta_turn_iil" EXACT [] -synonym: "type II' beta turn" EXACT [] -synonym: "type II' turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001136 -name: beta_turn_right_handed_type_one -alt_id: BS:00216 -def: "Right handed type I:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees. Residue(i+2): -140 degrees < phi < -20 degrees, -90 degrees < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type one" EXACT [] -synonym: "beta_turn_ir" EXACT [] -synonym: "type I beta turn" EXACT [] -synonym: "type I turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001137 -name: beta_turn_right_handed_type_two -alt_id: BS:00214 -def: "Right handed type II:A motif of four consecutive residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth. It is characterized by the dihedral angles: Residue(i+1): -140 degrees < phi < -20 degrees, +80 degrees < psi < +180 degrees. Residue(i+2): +20 degrees < phi < +140 degrees, -40 degrees < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "beta turn right handed type two" EXACT [] -synonym: "beta_turn_iir" EXACT [] -synonym: "type II beta turn" EXACT [] -synonym: "type II turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001138 -name: gamma_turn -alt_id: BS:00219 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001139 -name: gamma_turn_classic -alt_id: BS:00220 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=75.0 - psi(i+1)=-64.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "classic gamma turn" EXACT [] -synonym: "gamma turn classic" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001140 -name: gamma_turn_inverse -alt_id: BS:00221 -def: "Gamma turns, defined for 3 residues i, i+1, i+2 if a hydrogen bond exists between residues i and i+2 and the phi and psi angles of residue i+1 fall within 40 degrees: phi(i+1)=-79.0 - psi(i+1)=69.0." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "gamma turn inverse" EXACT [] -is_a: SO:0001138 ! gamma_turn - -[Term] -id: SO:0001141 -name: serine_threonine_turn -alt_id: BS:00231 -def: "A motif of three consecutive residues and one H-bond in which: residue(i) is Serine (S) or Threonine (T), the side-chain O of residue(i) is H-bonded to the main-chain NH of residue(i+2)." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "serine/threonine turn" EXACT [] -synonym: "st_turn" EXACT [] -is_a: SO:0001128 ! polypeptide_turn_motif - -[Term] -id: SO:0001142 -name: st_turn_left_handed_type_one -alt_id: BS:00234 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type one" EXACT [] -synonym: "st_turn_il" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001143 -name: st_turn_left_handed_type_two -alt_id: BS:00232 -def: "The peptide twists in an anticlockwise, left handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn left handed type two" EXACT [] -synonym: "st_turn_iil" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001144 -name: st_turn_right_handed_type_one -alt_id: BS:00235 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, -90 degrees psi +120 degrees < +40 degrees, residue(i+1): -140 degrees < phi < -20 degrees, -90 < psi < +40 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type one" EXACT [] -synonym: "st_turn_ir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001145 -name: st_turn_right_handed_type_two -alt_id: BS:00233 -def: "The peptide twists in an clockwise, right handed manner. The dihedral angles for this turn are: Residue(i): -140 degrees < chi(1) -120 degrees < -20 degrees, +80 degrees psi +120 degrees < +180 degrees, residue(i+1): +20 degrees < phi < +140 degrees, -40 < psi < +90 degrees." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "st turn right handed type two" EXACT [] -synonym: "st_turn_iir" EXACT [] -is_a: SO:0001141 ! serine_threonine_turn - -[Term] -id: SO:0001146 -name: polypeptide_variation_site -alt_id: BS:00336 -def: "A site of sequence variation (alteration). Alternative sequence due to naturally occuring events such as polymorphisms and altermatve splicing or experimental methods such as site directed mutagenesis." [EBIBS:GAR, SO:ke] -comment: For example, was a substitution natural or mutated as part of an experiment? This term is added to merge the biosapiens term sequence_variations. -subset: biosapiens -synonym: "sequence_variations" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0001147 -name: natural_variant_site -alt_id: BS:00071 -def: "Describes the natural sequence variants due to polymorphisms, disease-associated mutations, RNA editing and variations between strains, isolates or cultivars." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "natural_variant" BROAD [] -synonym: "sequence variation" BROAD [] -synonym: "variant" BROAD [uniprot:feature_type] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001148 -name: mutated_variant_site -alt_id: BS:00036 -def: "Site which has been experimentally altered." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "mutagen" EXACT BS [uniprot:feature_type] -synonym: "mutagenesis" EXACT [] -synonym: "mutated_site" EXACT [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001149 -name: alternate_sequence_site -alt_id: BS:00073 -alt_id: SO:0001065 -def: "Description of sequence variants produced by alternative splicing, alternative promoter usage, alternative initiation and ribosomal frameshifting." [EBIBS:GAR, UniProt:curation_manual] -comment: Discrete. -subset: biosapiens -synonym: "alternative_sequence" EXACT [] -synonym: "isoform" NARROW [] -synonym: "sequence variation" NARROW [] -synonym: "var_seq" EXACT [uniprot:feature_type] -synonym: "varsplic" NARROW [] -is_a: SO:0001146 ! polypeptide_variation_site - -[Term] -id: SO:0001150 -name: beta_turn_type_six -def: "A motif of four consecutive peptide resides of type VIa or type VIb and where the i+2 residue is cis-proline." [SO:cb] -subset: biosapiens -synonym: "beta turn type six" EXACT [] -synonym: "cis-proline loop" EXACT [] -synonym: "type VI beta turn" EXACT [] -synonym: "type VI turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001151 -name: beta_turn_type_six_a -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -90 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six a" EXACT [] -synonym: "type VIa beta turn" EXACT [] -synonym: "type VIa turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001152 -name: beta_turn_type_six_a_one -subset: biosapiens -synonym: "beta turn type six a one" EXACT [] -synonym: "type VIa1 beta turn" EXACT [] -synonym: "type VIa1 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001153 -name: beta_turn_type_six_a_two -subset: biosapiens -synonym: "beta turn type six a two" EXACT [] -synonym: "type VIa2 beta turn" EXACT [] -synonym: "type VIa2 turn" EXACT [] -is_a: SO:0001151 ! beta_turn_type_six_a - -[Term] -id: SO:0001154 -name: beta_turn_type_six_b -def: "A motif of four consecutive peptide residues, of which the i+2 residue is proline, and that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -120 degrees, psi ~ 120 degrees. Residue(i+2): phi ~ -60 degrees, psi ~ 0 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type six b" EXACT [] -synonym: "type VIb beta turn" EXACT [] -synonym: "type VIb turn" EXACT [] -is_a: SO:0001150 ! beta_turn_type_six - -[Term] -id: SO:0001155 -name: beta_turn_type_eight -def: "A motif of four consecutive peptide residues that may contain one H-bond, which, if present, is between the main-chain CO of the first residue and the main-chain NH of the fourth and is characterized by the dihedral angles: Residue(i+1): phi ~ -60 degrees, psi ~ -30 degrees. Residue(i+2): phi ~ -120 degrees, psi ~ 120 degrees." [PMID:2371257, SO:cb] -subset: biosapiens -synonym: "beta turn type eight" EXACT [] -synonym: "type VIII beta turn" EXACT [] -synonym: "type VIII turn" EXACT [] -is_a: SO:0001133 ! beta_turn - -[Term] -id: SO:0001156 -name: DRE_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -10 and -60 relative to the TSS. Consensus sequence is WATCGATW." [PMID:12537576] -comment: This consensus sequence was identified computationally using the MEME algorithm within core promoter sequences from -60 to +40, with an E value of 1.7e-183. Tends to co-occur with Motif 7. Tends to not occur with DPE motif (SO:0000015) or motif 10. -synonym: "DRE motif" EXACT [] -synonym: "NDM4" EXACT [] -synonym: "WATCGATW_motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001157 -name: DMv4_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, located immediately upstream of some TATA box elements with respect to the TSS (+1). Consensus sequence is YGGTCACACTR. Marked spatial preference within core promoter; tend to occur near the TSS, although not as tightly as INR (SO:0000014)." [PMID:16827941\:12537576] -synonym: "directional motif v4" EXACT [] -synonym: "DMv4" EXACT [] -synonym: "DMv4 motif" EXACT [] -synonym: "motif 1 element" EXACT [] -synonym: "promoter motif 1" EXACT [] -synonym: "YGGTCACATR" NARROW [] -is_a: SO:0000713 ! DNA_motif - -[Term] -id: SO:0001158 -name: E_box_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and +1 relative to the TSS. Consensus sequence is AWCAGCTGWT. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015)." [PMID:12537576\:16827941] -synonym: "AWCAGCTGWT" NARROW [] -synonym: "E box motif" EXACT [] -synonym: "generic E box motif" EXACT [] -synonym: "NDM5" RELATED [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001159 -name: DMv5_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -50 and -10 relative to the TSS. Consensus sequence is KTYRGTATWTTT. Tends to co-occur with DMv4 (SO:0001157) . Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v5" EXACT [] -synonym: "DMv5" EXACT [] -synonym: "DMv5 motif" EXACT [] -synonym: "KTYRGTATWTTT" NARROW [] -synonym: "promoter motif 6" RELATED [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001160 -name: DMv3_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -30 and +15 relative to the TSS. Consensus sequence is KNNCAKCNCTRNY. Tends to co-occur with DMv2 (SO:0001161). Tends to not occur with DPE motif (SO:0000015) or MTE (0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v3" EXACT [] -synonym: "DMv3" EXACT [] -synonym: "DMv3 motif" EXACT [] -synonym: "KNNCAKCNCTRNY" NARROW [] -synonym: "promoter motif 7" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001161 -name: DMv2_motif -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between -60 and -45 relative to the TSS. Consensus sequence is MKSYGGCARCGSYSS. Tends to co-occur with DMv3 (SO:0001160). Tends to not occur with DPE motif (SO:0000015) or MTE (SO:0001162)." [PMID:12537576\:16827941] -synonym: "directional motif v2" EXACT [] -synonym: "DMv2" EXACT [] -synonym: "DMv2 motif" EXACT [] -synonym: "MKSYGGCARCGSYSS" NARROW [] -synonym: "promoter motif 8" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001162 -name: MTE -def: "A sequence element characteristic of some RNA polymerase II promoters, usually located between +20 and +30 relative to the TSS. Consensus sequence is CSARCSSAACGS. Tends to co-occur with INR motif (SO:0000014). Tends to not occur with DPE motif (SO:0000015) or DMv5 (SO:0001159)." [PMID:12537576\:15231738] -synonym: "CSARCSSAACGS" NARROW [] -synonym: "motif ten element" EXACT [] -synonym: "motif_ten_element" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001163 -name: INR1_motif -def: "A promoter motif with consensus sequence TCATTCG." [PMID:16827941] -synonym: "directional motif p3" EXACT [] -synonym: "directional promoter motif 3" EXACT [] -synonym: "DMp3" EXACT [] -synonym: "INR1 motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001164 -name: DPE1_motif -def: "A promoter motif with consensus sequence CGGACGT." [PMID:16827941] -synonym: "directional motif 5" EXACT [] -synonym: "directional promoter motif 5" RELATED [] -synonym: "DMp5" EXACT [] -synonym: "DPE1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001165 -name: DMv1_motif -def: "A promoter motif with consensus sequence CARCCCT." [PMID:16827941] -synonym: "directional promoter motif v1" RELATED [] -synonym: "DMv1" RELATED [] -synonym: "DMv1 motif" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001166 -name: GAGA_motif -def: "A non directional promoter motif with consensus sequence GAGAGCG." [PMID:16827941] -synonym: "GAGA" EXACT [] -synonym: "GAGA motif" EXACT [] -synonym: "NDM1" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001167 -name: NDM2_motif -def: "A non directional promoter motif with consensus CGMYGYCR." [PMID:16827941] -synonym: "NDM2" EXACT [] -synonym: "NDM2 motif" EXACT [] -synonym: "non directional promoter motif 2" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001168 -name: NDM3_motif -def: "A non directional promoter motif with consensus sequence GAAAGCT." [PMID:16827941] -synonym: "NDM3" EXACT [] -synonym: "NDM3 motif" EXACT [] -synonym: "non directional motif 3" EXACT [] -is_a: SO:0000713 ! DNA_motif -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001169 -name: ds_RNA_viral_sequence -def: "A ds_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded RNA." [SO:ke] -synonym: "double stranded RNA virus sequence" EXACT [] -synonym: "ds RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001170 -name: polinton -def: "A kind of DNA transposon that populates the genomes of protists, fungi, and animals, characterized by a unique set of proteins necessary for their transposition, including a protein-primed DNA polymerase B, retroviral integrase, cysteine protease, and ATPase. Polintons are characterized by 6-bp target site duplications, terminal-inverted repeats that are several hundred nucleotides long, and 5'-AG and TC-3' termini. Polintons exist as autonomous and nonautonomous elements." [PMID:16537396] -synonym: "maverick element" RELATED [] -is_a: SO:0000208 ! terminal_inverted_repeat_element - -[Term] -id: SO:0001171 -name: rRNA_21S -def: "A component of the large ribosomal subunit in mitochondrial rRNA." [RSC:cb] -synonym: "21S LSU rRNA" EXACT [] -synonym: "21S ribosomal RNA" EXACT [] -synonym: "21S rRNA" EXACT [] -synonym: "rRNA 21S" EXACT [] -is_a: SO:0000651 ! large_subunit_rRNA - -[Term] -id: SO:0001172 -name: tRNA_region -def: "A region of a tRNA." [RSC:cb] -synonym: "tRNA region" EXACT [] -is_a: SO:0000834 ! mature_transcript_region -relationship: part_of SO:0000253 ! tRNA - -[Term] -id: SO:0001173 -name: anticodon_loop -def: "A sequence of seven nucleotide bases in tRNA which contains the anticodon. It has the sequence 5'-pyrimidine-purine-anticodon-modified purine-any base-3." [ISBN:0716719207] -synonym: "anti-codon loop" EXACT [] -synonym: "anticodon loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001174 -name: anticodon -def: "A sequence of three nucleotide bases in tRNA which recognizes a codon in mRNA." [RSC:cb] -synonym: "anti-codon" EXACT [] -xref: http://en.wikipedia.org/wiki/Anticodon "wiki" -is_a: SO:0001172 ! tRNA_region -relationship: part_of SO:0001173 ! anticodon_loop - -[Term] -id: SO:0001175 -name: CCA_tail -def: "Base sequence at the 3' end of a tRNA. The 3'-hydroxyl group on the terminal adenosine is the attachment point for the amino acid." [ISBN:0716719207] -synonym: "CCA sequence" EXACT [] -synonym: "CCA tail" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001176 -name: DHU_loop -def: "Non-base-paired sequence of nucleotide bases in tRNA. It contains several dihydrouracil residues." [ISBN:071671920] -synonym: "D loop" RELATED [] -synonym: "DHU loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001177 -name: T_loop -def: "Non-base-paired sequence of three nucleotide bases in tRNA. It has sequence T-Psi-C." [ISBN:0716719207] -synonym: "T loop" EXACT [] -synonym: "TpsiC loop" EXACT [] -is_a: SO:0001172 ! tRNA_region - -[Term] -id: SO:0001178 -name: pyrrolysine_tRNA_primary_transcript -def: "A primary transcript encoding pyrrolysyl tRNA (SO:0000766)." [RSC:cb] -synonym: "pyrrolysine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0001179 -name: U3_snoRNA -def: "U3 snoRNA is a member of the box C/D class of small nucleolar RNAs. The U3 snoRNA secondary structure is characterised by a small 5' domain (with boxes A and A'), and a larger 3' domain (with boxes B, C, C', and D), the two domains being linked by a single-stranded hinge. Boxes B and C form the B/C motif, which appears to be exclusive to U3 snoRNAs, and boxes C' and D form the C'/D motif. The latter is functionally similar to the C/D motifs found in other snoRNAs. The 5' domain and the hinge region act as a pre-rRNA-binding domain. The 3' domain has conserved protein-binding sites. Both the box B/C and box C'/D motifs are sufficient for nuclear retention of U3 snoRNA. The box C'/D motif is also necessary for nucleolar localization, stability and hypermethylation of U3 snoRNA. Both box B/C and C'/D motifs are involved in specific protein interactions and are necessary for the rRNA processing functions of U3 snoRNA." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00012] -comment: The definition is most of the old definition for snoRNA (SO:0000275). -synonym: "small nucleolar RNA U3" EXACT [] -synonym: "snoRNA U3" EXACT [] -synonym: "U3 small nucleolar RNA" EXACT [] -synonym: "U3 snoRNA" EXACT [] -xref: http://en.wikipedia.org/wiki/Small_nucleolar_RNA_U3 "wiki" -is_a: SO:0000593 ! C_D_box_snoRNA - -[Term] -id: SO:0001180 -name: AU_rich_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is rich in AUUUA pentamers. Messenger RNAs bearing multiple AU-rich elements are often unstable." [PMID:7892223] -synonym: "ARE" RELATED [] -synonym: "AU rich element" EXACT [] -synonym: "AU-rich element" EXACT [] -xref: http://en.wikipedia.org/wiki/AU-rich_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001181 -name: Bruno_response_element -def: "A cis-acting element found in the 3' UTR of some mRNA which is bound by the Drosophila Bruno protein and its homologs." [PMID:10893231] -comment: Not to be confused with BRE_motif (SO:0000016), which binds transcription factor II B. -synonym: "BRE" RELATED [] -synonym: "Bruno response element" EXACT [] -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000205 ! three_prime_UTR - -[Term] -id: SO:0001182 -name: iron_responsive_element -def: "A regulatory sequence found in the 5' and 3' UTRs of many mRNAs which encode iron-binding proteins. It has a hairpin structure and is recognized by trans-acting proteins known as iron-regulatory proteins." [PMID:3198610, PMID:8710843] -synonym: "IRE" EXACT [] -synonym: "iron responsive element" EXACT [] -xref: http://en.wikipedia.org/wiki/Iron_responsive_element "wiki" -is_a: SO:0000837 ! UTR_region -relationship: part_of SO:0000203 ! UTR - -[Term] -id: SO:0001183 -name: morpholino -def: "An attribute describing a sequence composed of nucleobases bound to a morpholino backbone. A morpholino backbone consists of morpholine (CHEBI:34856) rings connected by phosphorodiamidate linkages." [RSC:cb] -comment: Do not use this for feature annotation. Use morpholino_oligo (SO:0000034) instead. -xref: http://en.wikipedia.org/wiki/Morpholino "wiki" -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001184 -name: PNA -def: "An attribute describing a sequence composed of peptide nucleic acid (CHEBI:48021), a chemical consisting of nucleobases bound to a backbone composed of repeating N-(2-aminoethyl)-glycine units linked by peptide bonds. The purine and pyrimidine bases are linked to the backbone by methylene carbonyl bonds." [RSC:cb] -comment: Do not use this term for feature annotation. Use PNA_oligo (SO:0001011) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001185 -name: enzymatic -def: "An attribute describing the sequence of a transcript that has catalytic activity with or without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use enzymatic_RNA (SO:0000372) instead. -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001186 -name: ribozymic -def: "An attribute describing the sequence of a transcript that has catalytic activity even without an associated ribonucleoprotein." [RSC:cb] -comment: Do not use this for feature annotation. Use ribozyme (SO:0000374) instead. -is_a: SO:0001185 ! enzymatic - -[Term] -id: SO:0001187 -name: pseudouridylation_guide_snoRNA -def: "A snoRNA that specifies the site of pseudouridylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA pseudouridylation guide activity (GO:0030558). -synonym: "pseudouridylation guide snoRNA" EXACT [] -is_a: SO:0000594 ! H_ACA_box_snoRNA - -[Term] -id: SO:0001188 -name: LNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of 'locked' deoxyribose rings connected to a phosphate backbone. The deoxyribose unit's conformation is 'locked' by a 2'-C,4'-C-oxymethylene link." [CHEBI:48010] -comment: Do not use this term for feature annotation. Use LNA_oligo (SO:0001189) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001189 -name: LNA_oligo -def: "An oligo composed of LNA residues." [RSC:cb] -synonym: "LNA oligo" EXACT [] -synonym: "locked nucleic acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Locked_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001188 ! LNA - -[Term] -id: SO:0001190 -name: TNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of threose rings connected to a phosphate backbone." [CHEBI:48019] -comment: Do not use this term for feature annotation. Use TNA_oligo (SO:0001191) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001191 -name: TNA_oligo -def: "An oligo composed of TNA residues." [RSC:cb] -synonym: "threose nucleic acid" EXACT [] -synonym: "TNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Threose_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001190 ! TNA - -[Term] -id: SO:0001192 -name: GNA -def: "An attribute describing a sequence consisting of nucleobases attached to a repeating unit made of an acyclic three-carbon propylene glycol connected to a phosphate backbone. It has two enantiomeric forms, (R)-GNA and (S)-GNA." [CHEBI:48015] -comment: Do not use this term for feature annotation. Use GNA_oligo (SO:0001192) instead. -is_a: SO:0000348 ! nucleic_acid - -[Term] -id: SO:0001193 -name: GNA_oligo -def: "An oligo composed of GNA residues." [RSC:cb] -synonym: "glycerol nucleic acid" EXACT [] -synonym: "glycol nucleic acid" EXACT [] -synonym: "GNA oligo" EXACT [] -xref: http://en.wikipedia.org/wiki/Glycerol_nucleic_acid "wiki" -intersection_of: SO:0001247 ! synthetic_oligo -intersection_of: has_quality SO:0001192 ! GNA - -[Term] -id: SO:0001194 -name: R_GNA -def: "An attribute describing a GNA sequence in the (R)-GNA enantiomer." [CHEBI:48016] -comment: Do not use this term for feature annotation. Use R_GNA_oligo (SO:0001195) instead. -synonym: "R GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001195 -name: R_GNA_oligo -def: "An oligo composed of (R)-GNA residues." [RSC:cb] -synonym: "(R)-glycerol nucleic acid" EXACT [] -synonym: "(R)-glycol nucleic acid" EXACT [] -synonym: "R GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001194 ! R_GNA - -[Term] -id: SO:0001196 -name: S_GNA -def: "An attribute describing a GNA sequence in the (S)-GNA enantiomer." [CHEBI:48017] -comment: Do not use this term for feature annotation. Use S_GNA_oligo (SO:0001197) instead. -synonym: "S GNA" EXACT [] -is_a: SO:0001192 ! GNA - -[Term] -id: SO:0001197 -name: S_GNA_oligo -def: "An oligo composed of (S)-GNA residues." [RSC:cb] -synonym: "(S)-glycerol nucleic acid" EXACT [] -synonym: "(S)-glycol nucleic acid" EXACT [] -synonym: "S GNA oligo" EXACT [] -intersection_of: SO:0001193 ! GNA_oligo -intersection_of: has_quality SO:0001196 ! S_GNA - -[Term] -id: SO:0001198 -name: ds_DNA_viral_sequence -def: "A ds_DNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as double stranded DNA." [SO:ke] -synonym: "double stranded DNA virus" EXACT [] -synonym: "ds DNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001199 -name: ss_RNA_viral_sequence -def: "A ss_RNA_viral_sequence is a viral_sequence that is the sequence of a virus that exists as single stranded RNA." [SO:ke] -synonym: "single strand RNA virus" EXACT [] -synonym: "ss RNA viral sequence" EXACT [] -is_a: SO:0001041 ! viral_sequence - -[Term] -id: SO:0001200 -name: negative_sense_ssRNA_viral_sequence -def: "A negative_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that is complementary to mRNA and must be converted to positive sense RNA by RNA polymerase before translation." [SO:ke] -synonym: "negative sense single stranded RNA virus" RELATED [] -synonym: "negative sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001201 -name: positive_sense_ssRNA_viral_sequence -def: "A positive_sense_RNA_viral_sequence is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus that can be immediately translated by the host." [SO:ke] -synonym: "positive sense single stranded RNA virus" RELATED [] -synonym: "positive sense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001202 -name: ambisense_ssRNA_viral_sequence -def: "A ambisense_RNA_virus is a ss_RNA_viral_sequence that is the sequence of a single stranded RNA virus with both messenger and anti messenger polarity." [SO:ke] -synonym: "ambisense single stranded RNA virus" EXACT [] -synonym: "ambisense ssRNA viral sequence" EXACT [] -is_a: SO:0001199 ! ss_RNA_viral_sequence - -[Term] -id: SO:0001203 -name: RNA_polymerase_promoter -def: "A region (DNA) to which RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "RNA polymerase promoter" EXACT [] -is_a: SO:0000167 ! promoter - -[Term] -id: SO:0001204 -name: Phage_RNA_Polymerase_Promoter -def: "A region (DNA) to which Bacteriophage RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "Phage RNA Polymerase Promoter" EXACT [] -is_a: SO:0001203 ! RNA_polymerase_promoter - -[Term] -id: SO:0001205 -name: SP6_RNA_Polymerase_Promoter -def: "A region (DNA) to which the SP6 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "SP6 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001206 -name: T3_RNA_Polymerase_Promoter -def: "A DNA sequence to which the T3 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T3 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001207 -name: T7_RNA_Polymerase_Promoter -def: "A region (DNA) to which the T7 RNA polymerase binds, to begin transcription." [xenbase:jb] -synonym: "T7 RNA Polymerase Promoter" EXACT [] -is_a: SO:0001204 ! Phage_RNA_Polymerase_Promoter - -[Term] -id: SO:0001208 -name: five_prime_EST -def: "An EST read from the 5' end of a transcript that usually codes for a protein. These regions tend to be conserved across species and do not change much within a gene family." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "5' EST" EXACT [] -synonym: "five prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001209 -name: three_prime_EST -def: "An EST read from the 3' end of a transcript. They are more likely to fall within non-coding, or untranslated regions(UTRs)." [http://www.ncbi.nlm.nih.gov/About/primer/est.html] -synonym: "3' EST" EXACT [] -synonym: "three prime EST" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001210 -name: translational_frameshift -def: "The region of mRNA (not divisible by 3 bases) that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "ribosomal frameshift" EXACT [] -synonym: "translational frameshift" EXACT [] -xref: http://en.wikipedia.org/wiki/Translational_frameshift "wiki" -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:0001211 -name: plus_1_translational_frameshift -def: "The region of mRNA 1 base long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 1 ribosomal frameshift" EXACT [] -synonym: "plus 1 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001212 -name: plus_2_translational_frameshift -def: "The region of mRNA 2 bases long that is skipped during the process of translational frameshifting (GO:0006452), causing the reading frame to be different." [SO:ke] -synonym: "plus 2 ribosomal frameshift" EXACT [] -synonym: "plus 2 translational frameshift" EXACT [] -is_a: SO:0001210 ! translational_frameshift - -[Term] -id: SO:0001213 -name: group_III_intron -def: "Group III introns are introns found in the mRNA of the plastids of euglenoid protists. They are spliced by a two step transesterification with bulged adenosine as initiating nucleophile." [PMID:11377794] -comment: GO:0000374. -synonym: "group III intron" EXACT [] -xref: http://en.wikipedia.org/wiki/Group_III_intron "wiki" -is_a: SO:0000588 ! autocatalytically_spliced_intron - -[Term] -id: SO:0001214 -name: noncoding_region_of_exon -def: "The maximal intersection of exon and UTR." [SO:ke] -comment: An exon either containing but not starting with a start codon or containing but not ending with astop codon will be partially coding and partially non coding. -synonym: "noncoding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001215 -name: coding_region_of_exon -def: "The region of an exon that encodes for protein sequence." [SO:ke] -comment: An exon containing either a start or stop codon will be partially coding and partially non coding. -synonym: "coding region of exon" EXACT [] -is_a: SO:0000852 ! exon_region - -[Term] -id: SO:0001216 -name: endonuclease_spliced_intron -def: "An intron that spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -synonym: "endonuclease spliced intron" EXACT [] -is_a: SO:0000188 ! intron - -[Term] -id: SO:0001217 -name: protein_coding_gene -synonym: "protein coding gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000010 ! protein_coding - -[Term] -id: SO:0001218 -name: transgenic_insertion -synonym: "transgenic insertion" EXACT [] -intersection_of: SO:0000667 ! insertion -intersection_of: has_quality SO:0000781 ! transgenic - -[Term] -id: SO:0001219 -name: retrogene -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000569 ! retrotransposed - -[Term] -id: SO:0001220 -name: silenced_by_RNA_interference -def: "An attribute describing an epigenetic process where a gene is inactivated by RNA interference." [RSC:cb] -comment: RNA interference is GO:0016246. -synonym: "silenced by RNA interference" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001221 -name: silenced_by_histone_modification -def: "An attribute describing an epigenetic process where a gene is inactivated by histone modification." [RSC:cb] -comment: Histone modification is GO:0016570. -synonym: "silenced by histone modification" EXACT [] -is_a: SO:0000893 ! silenced - -[Term] -id: SO:0001222 -name: silenced_by_histone_methylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone methylation." [RSC:cb] -comment: Histone methylation is GO:0016571. -synonym: "silenced by histone methylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001223 -name: silenced_by_histone_deacetylation -def: "An attribute describing an epigenetic process where a gene is inactivated by histone deacetylation." [RSC:cb] -comment: Histone deacetylation is GO:0016573. -synonym: "silenced by histone deacetylation" EXACT [] -is_a: SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001224 -name: gene_silenced_by_RNA_interference -def: "A gene that is silenced by RNA interference." [SO:xp] -synonym: "gene silenced by RNA interference" EXACT [] -synonym: "RNA interference silenced gene" EXACT [] -synonym: "RNAi silenced gene" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001220 ! silenced_by_RNA_interference - -[Term] -id: SO:0001225 -name: gene_silenced_by_histone_modification -def: "A gene that is silenced by histone modification." [SO:xp] -synonym: "gene silenced by histone modification" EXACT [] -intersection_of: SO:0000127 ! silenced_gene -intersection_of: has_quality SO:0001221 ! silenced_by_histone_modification - -[Term] -id: SO:0001226 -name: gene_silenced_by_histone_methylation -def: "A gene that is silenced by histone methylation." [SO:xp] -synonym: "gene silenced by histone methylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001222 ! silenced_by_histone_methylation - -[Term] -id: SO:0001227 -name: gene_silenced_by_histone_deacetylation -def: "A gene that is silenced by histone deacetylation." [SO:xp] -synonym: "gene silenced by histone deacetylation" EXACT [] -intersection_of: SO:0001225 ! gene_silenced_by_histone_modification -intersection_of: has_quality SO:0001223 ! silenced_by_histone_deacetylation - -[Term] -id: SO:0001228 -name: dihydrouridine -def: "A modified RNA base in which the 5,6-dihydrouracil is bound to the ribose ring." [RSC:cb] -synonym: "D" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Dihydrouridine "wiki" -xref: RNAMOD:051 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001229 -name: pseudouridine -def: "A modified RNA base in which the 5- position of the uracil is bound to the ribose ring instead of the 4- position." [RSC:cb] -comment: The free molecule is CHEBI:17802. -synonym: "Y" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Pseudouridine "wiki" -xref: RNAMOD:050 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001230 -name: inosine -def: "A modified RNA base in which hypoxanthine is bound to the ribose ring." [http://library.med.utah.edu/RNAmods/, RSC:cb] -comment: The free molecule is CHEBI:17596. -synonym: "I" RELATED [] -synonym: "RNAMOD:017" RELATED [] -xref: http://en.wikipedia.org/wiki/Inosine "wiki" -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001231 -name: seven_methylguanine -def: "A modified RNA base in which guanine is methylated at the 7- position." [RSC:cb] -comment: The free molecule is CHEBI:2274. -synonym: "7-methylguanine" EXACT [] -synonym: "seven methylguanine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001232 -name: ribothymidine -def: "A modified RNA base in which thymine is bound to the ribose ring." [RSC:cb] -comment: The free molecule is CHEBI:30832. -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001233 -name: methylinosine -def: "A modified RNA base in which methylhypoxanthine is bound to the ribose ring." [RSC:cb] -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001234 -name: mobile -def: "An attribute describing a feature that has either intra-genome or intracellular mobility." [RSC:cb] -xref: http://en.wikipedia.org/wiki/Mobile "wiki" -is_a: SO:0000733 ! feature_attribute - -[Term] -id: SO:0001235 -name: replicon -def: "A region containing at least one unique origin of replication and a unique termination site." [ISBN:0716719207] -xref: http://en.wikipedia.org/wiki/Replicon_(genetics) "wiki" -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0001236 -name: base -def: "A base is a sequence feature that corresponds to a single unit of a nucleotide polymer." [SO:ke] -xref: http://en.wikipedia.org/wiki/Nucleobase "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000001 ! region - -[Term] -id: SO:0001237 -name: amino_acid -def: "A sequence feature that corresponds to a single amino acid residue in a polypeptide." [RSC:cb] -comment: Probably in the future this will cross reference to Chebi. -synonym: "amino acid" EXACT [] -xref: http://en.wikipedia.org/wiki/Amino_acid "wiki" -is_a: SO:0001411 ! biological_region -relationship: part_of SO:0000104 ! polypeptide - -[Term] -id: SO:0001238 -name: major_TSS -synonym: "major transcription start site" EXACT [] -synonym: "major TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001239 -name: minor_TSS -synonym: "minor TSS" EXACT [] -is_a: SO:0000315 ! TSS - -[Term] -id: SO:0001240 -name: TSS_region -def: "The region of a gene from the 5' most TSS to the 3' TSS." [BBOP:nw] -synonym: "TSS region" EXACT [] -is_a: SO:0000842 ! gene_component_region -relationship: has_part SO:0000315 ! TSS - -[Term] -id: SO:0001241 -name: encodes_alternate_transcription_start_sites -synonym: "encodes alternate transcription start sites" EXACT [] -is_a: SO:0000401 ! gene_attribute - -[Term] -id: SO:0001243 -name: miRNA_primary_transcript_region -def: "A part of an miRNA primary_transcript." [SO:ke] -synonym: "miRNA primary transcript region" EXACT [] -is_a: SO:0000835 ! primary_transcript_region - -[Term] -id: SO:0001244 -name: pre_miRNA -def: "The 60-70 nucleotide region remain after Drosha processing of the primary transcript, that folds back upon itself to form a hairpin sructure." [SO:ke] -synonym: "pre-miRNA" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0000647 ! miRNA_primary_transcript - -[Term] -id: SO:0001245 -name: miRNA_stem -def: "The stem of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA stem" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001246 -name: miRNA_loop -def: "The loop of the hairpin loop formed by folding of the pre-miRNA." [SO:ke] -synonym: "miRNA loop" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -relationship: part_of SO:0001244 ! pre_miRNA - -[Term] -id: SO:0001247 -name: synthetic_oligo -def: "An oligo composed of synthetic nucleotides." [SO:ke] -synonym: "synthetic oligo" EXACT [] -is_a: SO:0000696 ! oligo - -[Term] -id: SO:0001248 -name: assembly -def: "A region of the genome of known length that is composed by ordering and aligning two or more different regions." [SO:ke] -xref: http://en.wikipedia.org/wiki/Genome_assembly#Genome_assembly "wiki" -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001249 -name: fragment_assembly -def: "A fragment assembly is a genome assembly that orders overlapping fragments of the genome based on landmark sequences. The base pair distance between the landmarks is known allowing additivity of lengths." [SO:ke] -synonym: "fragment assembly" EXACT [] -synonym: "physical map" EXACT [] -is_a: SO:0001248 ! assembly - -[Term] -id: SO:0001250 -name: fingerprint_map -def: "A fingerprint_map is a physical map composed of restriction fragments." [SO:ke] -synonym: "BACmap" EXACT [] -synonym: "fingerprint map" EXACT [] -synonym: "FPC" EXACT [] -synonym: "FPCmap" EXACT [] -synonym: "restriction map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000412 ! restriction_fragment - -[Term] -id: SO:0001251 -name: STS_map -def: "An STS map is a physical map organized by the unique STS landmarks." [SO:ke] -synonym: "STS map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001252 -name: RH_map -def: "A radiation hybrid map is a physical map." [SO:ke] -synonym: "radiation hybrid map" EXACT [] -synonym: "RH map" EXACT [] -is_a: SO:0001249 ! fragment_assembly -relationship: has_part SO:0000331 ! STS - -[Term] -id: SO:0001253 -name: sonicate_fragment -def: "A DNA fragment generated by sonication. Sonication is a technique used to sheer DNA into smaller fragments." [SO:ke] -synonym: "sonicate fragment" EXACT [] -is_a: SO:0000143 ! assembly_component - -[Term] -id: SO:0001254 -name: polyploid -def: "A kind of chromosome variation where the chromosome complement is an exact multiple of the haploid number and is greater than the diploid number." [SO:ke] -xref: http://en.wikipedia.org/wiki/Polyploid "wiki" -is_a: SO:1000182 ! chromosome_number_variation - -[Term] -id: SO:0001255 -name: autopolyploid -def: "A polyploid where the multiple chromosome set was derived from the same organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Autopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001256 -name: allopolyploid -def: "A polyploid where the multiple chromosome set was derived from a different organism." [SO:ke] -xref: http://en.wikipedia.org/wiki/Allopolyploid "wiki" -is_a: SO:0001254 ! polyploid - -[Term] -id: SO:0001257 -name: homing_endonuclease_binding_site -def: "The binding site (recognition site) of a homing endonuclease. The binding site is typically large." [SO:ke] -synonym: "homing endonuclease binding site" EXACT [] -is_a: SO:0000059 ! nuclease_binding_site - -[Term] -id: SO:0001258 -name: octamer_motif -def: "A sequence element characteristic of some RNA polymerase II promoters with sequence ATTGCAT that binds Pou-domain transcription factors." [GOC:dh, PMID:3095662] -comment: Nature. 1986 Oct 16-22;323(6089):640-3. -synonym: "octamer motif" EXACT [] -is_a: SO:0000235 ! TF_binding_site -relationship: part_of SO:0000170 ! RNApol_II_promoter - -[Term] -id: SO:0001259 -name: apicoplast_chromosome -def: "A chromosome originating in an apicoplast." [SO:xp] -synonym: "apicoplast chromosome" EXACT [] -intersection_of: SO:0000340 ! chromosome -intersection_of: has_origin SO:0000743 ! apicoplast_sequence - -[Term] -id: SO:0001260 -name: sequence_collection -def: "A collection of discontinuous sequences." [SO:ke] -synonym: "sequence collection" EXACT [] - -[Term] -id: SO:0001261 -name: overlapping_feature_set -def: "A continuous region of sequence composed of the overlapping of multiple sequence_features, which ultimately provides evidence for another sequence_feature." [SO:ke] -comment: This feature was requested by nicole, tracker id 1911479. It is required to gather evidense together for annotation. An example would be overlapping ESTs that support an mRNA. -synonym: "overlapping feature set" EXACT [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0001262 -name: overlapping_EST_set -def: "A continous experimental result region extending the length of multiple overlapping EST's." [SO:ke] -synonym: "overlapping EST set" EXACT [] -is_a: SO:0001261 ! overlapping_feature_set -relationship: has_part SO:0000345 ! EST - -[Term] -id: SO:0001263 -name: ncRNA_gene -synonym: "ncRNA gen" EXACT [] -synonym: "ncRNA gene" EXACT [] -synonym: "non-coding RNA gene" RELATED [] -is_a: SO:0000704 ! gene - -[Term] -id: SO:0001264 -name: gRNA_gene -synonym: "gRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000979 ! gRNA_encoding - -[Term] -id: SO:0001265 -name: miRNA_gene -synonym: "miRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000571 ! miRNA_encoding - -[Term] -id: SO:0001266 -name: scRNA_gene -synonym: "scRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000575 ! scRNA_encoding - -[Term] -id: SO:0001267 -name: snoRNA_gene -synonym: "snoRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000578 ! snoRNA_encoding - -[Term] -id: SO:0001268 -name: snRNA_gene -synonym: "snRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0001263 ! ncRNA_gene - -[Term] -id: SO:0001269 -name: SRP_RNA_gene -synonym: "SRP RNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000642 ! SRP_RNA_encoding - -[Term] -id: SO:0001270 -name: stRNA_gene -synonym: "stRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000656 ! stRNA_encoding - -[Term] -id: SO:0001271 -name: tmRNA_gene -synonym: "tmRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000659 ! tmRNA_encoding - -[Term] -id: SO:0001272 -name: tRNA_gene -synonym: "tRNA gene" EXACT [] -intersection_of: SO:0001263 ! ncRNA_gene -intersection_of: has_quality SO:0000663 ! tRNA_encoding - -[Term] -id: SO:0001273 -name: modified_adenosine -def: "A modified adenine is an adenine base feature that has been altered." [SO:ke] -synonym: "modified adenosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001274 -name: modified_inosine -def: "A modified inosine is an inosine base feature that has been altered." [SO:ke] -synonym: "modified inosine" EXACT [] -is_a: SO:0001230 ! inosine - -[Term] -id: SO:0001275 -name: modified_cytidine -def: "A modified cytidine is a cytidine base feature which has been altered." [SO:ke] -synonym: "modified cytidine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001276 -name: modified_guanosine -synonym: "modified guanosine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001277 -name: modified_uridine -synonym: "modified uridine" EXACT [] -is_a: SO:0000250 ! modified_RNA_base_feature - -[Term] -id: SO:0001278 -name: one_methylinosine -def: "1-methylinosine is a modified insosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylinosine" EXACT [] -synonym: "m1I" EXACT RNAMOD [] -synonym: "one methylinosine" EXACT [] -xref: RNAMOD:018 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001279 -name: one_two_prime_O_dimethylinosine -def: "1,2'-O-dimethylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylinosine" EXACT [] -synonym: "m'Im" EXACT RNAMOD [] -synonym: "one two prime O dimethylinosine" EXACT [] -xref: RNAMOD:019 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001280 -name: two_prime_O_methylinosine -def: "2'-O-methylinosine is a modified inosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylinosine" EXACT [] -synonym: "Im" EXACT RNAMOD [] -synonym: "two prime O methylinosine" EXACT [] -xref: RNAMOD:081 -is_a: SO:0001274 ! modified_inosine - -[Term] -id: SO:0001281 -name: three_methylcytidine -def: "3-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylcytidine" EXACT [] -synonym: "m3C" EXACT RNAMOD [] -synonym: "three methylcytidine" EXACT [] -xref: RNAMOD:020 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001282 -name: five_methylcytidine -def: "5-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylcytidine" EXACT [] -synonym: "five methylcytidine" EXACT [] -synonym: "m5C" EXACT RNAMOD [] -xref: RNAMOD:021 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001283 -name: two_prime_O_methylcytidine -def: "2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylcytidine" EXACT [] -synonym: "Cm" EXACT RNAMOD [] -synonym: "two prime O methylcytidine" EXACT [] -xref: RNAMOD:022 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001284 -name: two_thiocytidine -def: "2-thiocytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiocytidine" EXACT [] -synonym: "s2C" EXACT RNAMOD [] -synonym: "two thiocytidine" EXACT [] -xref: RNAMOD:023 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001285 -name: N4_acetylcytidine -def: "N4-acetylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4C" EXACT RNAMOD [] -synonym: "N4 acetylcytidine" EXACT [] -synonym: "N4-acetylcytidine" EXACT [] -xref: RNAMOD:024 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001286 -name: five_formylcytidine -def: "5-formylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formylcytidine" EXACT [] -synonym: "f5C" EXACT RNAMOD [] -synonym: "five formylcytidine" EXACT [] -xref: RNAMOD:025 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001287 -name: five_two_prime_O_dimethylcytidine -def: "5,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethylcytidine" EXACT [] -synonym: "five two prime O dimethylcytidine" EXACT [] -synonym: "m5Cm" EXACT RNAMOD [] -xref: RNAMOD:026 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001288 -name: N4_acetyl_2_prime_O_methylcytidine -def: "N4-acetyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac4Cm" EXACT RNAMOD [] -synonym: "N4 acetyl 2 prime O methylcytidine" EXACT [] -synonym: "N4-acetyl-2'-O-methylcytidine" EXACT [] -xref: RNAMOD:027 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001289 -name: lysidine -def: "Lysidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "k2C" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Lysidine "wiki" -xref: RNAMOD:028 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001290 -name: N4_methylcytidine -def: "N4-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4C" EXACT RNAMOD [] -synonym: "N4 methylcytidine" EXACT [] -synonym: "N4-methylcytidine" EXACT [] -xref: RNAMOD:082 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001291 -name: N4_2_prime_O_dimethylcytidine -def: "N4,2'-O-dimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m4Cm" EXACT RNAMOD [] -synonym: "N4 2 prime O dimethylcytidine" EXACT [] -synonym: "N4,2'-O-dimethylcytidine" EXACT [] -xref: RNAMOD:083 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001292 -name: five_hydroxymethylcytidine -def: "5-hydroxymethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxymethylcytidine" EXACT [] -synonym: "five hydroxymethylcytidine" EXACT [] -synonym: "hm5C" EXACT RNAMOD [] -xref: RNAMOD:084 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001293 -name: five_formyl_two_prime_O_methylcytidine -def: "5-formyl-2'-O-methylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "5-formyl-2'-O-methylcytidine" EXACT [] -synonym: "f5Cm" EXACT RNAMOD [] -synonym: "five formyl two prime O methylcytidine" EXACT [] -xref: RNAMOD:095 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001294 -name: N4_N4_2_prime_O_trimethylcytidine -def: "N4_N4_2_prime_O_trimethylcytidine is a modified cytidine." [http://library.med.utah.edu/RNAmods/] -synonym: "m42Cm" EXACT RNAMOD [] -synonym: "N4,N4,2'-O-trimethylcytidine" EXACT [] -xref: RNAMOD:107 -is_a: SO:0001275 ! modified_cytidine - -[Term] -id: SO:0001295 -name: one_methyladenosine -def: "1_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyladenosine" EXACT [] -synonym: "m1A" EXACT RNAMOD [] -synonym: "one methyladenosine" EXACT [] -xref: RNAMOD:001 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001296 -name: two_methyladenosine -def: "2_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methyladenosine" EXACT [] -synonym: "m2A" EXACT RNAMOD [] -synonym: "two methyladenosine" EXACT [] -xref: RNAMOD:002 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001297 -name: N6_methyladenosine -def: "N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6A" EXACT RNAMOD [] -synonym: "N6 methyladenosine" EXACT [] -synonym: "N6-methyladenosine" EXACT [] -xref: RNAMOD:003 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001298 -name: two_prime_O_methyladenosine -def: "2prime_O_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyladenosine" EXACT [] -synonym: "Am" EXACT RNAMOD [] -synonym: "two prime O methyladenosine" EXACT [] -xref: RNAMOD:004 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001299 -name: two_methylthio_N6_methyladenosine -def: "2_methylthio_N6_methyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-methyladenosine" EXACT [] -synonym: "ms2m6A" EXACT RNAMOD [] -synonym: "two methylthio N6 methyladenosine" EXACT [] -xref: RNAMOD:005 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001300 -name: N6_isopentenyladenosine -def: "N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "i6A" EXACT RNAMOD [] -synonym: "N6 isopentenyladenosine" EXACT [] -synonym: "N6-isopentenyladenosine" EXACT [] -xref: RNAMOD:006 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001301 -name: two_methylthio_N6_isopentenyladenosine -def: "2_methylthio_N6_isopentenyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-isopentenyladenosine" EXACT [] -synonym: "ms2i6A" EXACT RNAMOD [] -synonym: "two methylthio N6 isopentenyladenosine" EXACT [] -xref: RNAMOD:007 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001302 -name: N6_cis_hydroxyisopentenyl_adenosine -def: "N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "io6A" EXACT RNAMOD [] -synonym: "N6 cis hydroxyisopentenyl adenosine" EXACT [] -synonym: "N6-(cis-hydroxyisopentenyl)adenosine" EXACT [] -xref: RNAMOD:008 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001303 -name: two_methylthio_N6_cis_hydroxyisopentenyl_adenosine -def: "2_methylthio_N6_cis_hydroxyisopentenyl_adenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine" EXACT [] -synonym: "ms2io6A" EXACT RNAMOD [] -synonym: "two methylthio N6 cis hydroxyisopentenyl adenosine" EXACT [] -xref: RNAMOD:009 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001304 -name: N6_glycinylcarbamoyladenosine -def: "N6_glycinylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "g6A" EXACT RNAMOD [] -synonym: "N6 glycinylcarbamoyladenosine" EXACT [] -synonym: "N6-glycinylcarbamoyladenosine" EXACT [] -xref: RNAMOD:010 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001305 -name: N6_threonylcarbamoyladenosine -def: "N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-threonylcarbamoyladenosine" EXACT [] -synonym: "t6A" EXACT RNAMOD [] -xref: RNAMOD:011 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001306 -name: two_methylthio_N6_threonyl_carbamoyladenosine -def: "2_methylthio_N6_threonyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-threonyl carbamoyladenosine" EXACT [] -synonym: "ms2t6A" EXACT RNAMOD [] -synonym: "two methylthio N6 threonyl carbamoyladenosine" EXACT [] -xref: RNAMOD:012 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001307 -name: N6_methyl_N6_threonylcarbamoyladenosine -def: "N6_methyl_N6_threonylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6t6A" EXACT RNAMOD [] -synonym: "N6 methyl N6 threonylcarbamoyladenosine" EXACT [] -synonym: "N6-methyl-N6-threonylcarbamoyladenosine" EXACT [] -xref: RNAMOD:013 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001308 -name: N6_hydroxynorvalylcarbamoyladenosine -def: "N6_hydroxynorvalylcarbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "hn6A" EXACT RNAMOD [] -synonym: "N6 hydroxynorvalylcarbamoyladenosine" EXACT [] -synonym: "N6-hydroxynorvalylcarbamoyladenosine" EXACT [] -xref: RNAMOD:014 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001309 -name: two_methylthio_N6_hydroxynorvalyl_carbamoyladenosine -def: "2_methylthio_N6_hydroxynorvalyl_carbamoyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2-methylthio-N6-hydroxynorvalyl carbamoyladenosine" EXACT [] -synonym: "ms2hn6A" EXACT RNAMOD [] -synonym: "two methylthio N6 hydroxynorvalyl carbamoyladenosine" EXACT [] -xref: RNAMOD:015 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001310 -name: two_prime_O_ribosyladenosine_phosphate -def: "2prime_O_ribosyladenosine_phosphate is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosyladenosine (phosphate)" EXACT [] -synonym: "Ar(p)" EXACT RNAMOD [] -synonym: "two prime O ribosyladenosine phosphate" EXACT [] -xref: RNAMOD:016 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001311 -name: N6_N6_dimethyladenosine -def: "N6_N6_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62A" EXACT RNAMOD [] -synonym: "N6,N6-dimethyladenosine" EXACT [] -xref: RNAMOD:080 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001312 -name: N6_2_prime_O_dimethyladenosine -def: "N6_2prime_O_dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m6Am" EXACT RNAMOD [] -synonym: "N6 2 prime O dimethyladenosine" EXACT [] -synonym: "N6,2'-O-dimethyladenosine" EXACT [] -xref: RNAMOD:088 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001313 -name: N6_N6_2_prime_O_trimethyladenosine -def: "N6_N6_2prime_O_trimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "m62Am" EXACT RNAMOD [] -synonym: "N6,N6,2'-O-trimethyladenosine" EXACT [] -xref: RNAMOD:089 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001314 -name: one_two_prime_O_dimethyladenosine -def: "1,2'-O-dimethyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethyladenosine" EXACT [] -synonym: "m1Am" EXACT RNAMOD [] -synonym: "one two prime O dimethyladenosine" EXACT [] -xref: RNAMOD:097 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001315 -name: N6_acetyladenosine -def: "N6_acetyladenosine is a modified adenosine." [http://library.med.utah.edu/RNAmods/] -synonym: "ac6A" EXACT RNAMOD [] -synonym: "N6 acetyladenosine" EXACT [] -synonym: "N6-acetyladenosine" EXACT [] -xref: RNAMOD:102 -is_a: SO:0001273 ! modified_adenosine - -[Term] -id: SO:0001316 -name: seven_deazaguanosine -def: "7-deazaguanosine is a moddified guanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-deazaguanosine" RELATED [] -synonym: "seven deazaguanosine" EXACT [] -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001317 -name: queuosine -def: "Queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "Q" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/Queuosine "wiki" -xref: RNAMOD:043 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001318 -name: epoxyqueuosine -def: "Epoxyqueuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "eQ" EXACT RNAMOD [] -xref: RNAMOD:044 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001319 -name: galactosyl_queuosine -def: "Galactosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "galactosyl queuosine" EXACT [] -synonym: "galactosyl-queuosine" EXACT [] -synonym: "galQ" EXACT RNAMOD [] -xref: RNAMOD:045 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001320 -name: mannosyl_queuosine -def: "Mannosyl_queuosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "mannosyl queuosine" EXACT [] -synonym: "mannosyl-queuosine" EXACT [] -synonym: "manQ" EXACT RNAMOD [] -xref: RNAMOD:046 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001321 -name: seven_cyano_seven_deazaguanosine -def: "7_cyano_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-cyano-7-deazaguanosine" EXACT [] -synonym: "preQ0" EXACT RNAMOD [] -synonym: "seven cyano seven deazaguanosine" EXACT [] -xref: RNAMOD:047 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001322 -name: seven_aminomethyl_seven_deazaguanosine -def: "7_aminomethyl_7_deazaguanosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "7-aminomethyl-7-deazaguanosine" EXACT [] -synonym: "preQ1" EXACT RNAMOD [] -synonym: "seven aminomethyl seven deazaguanosine" EXACT [] -xref: RNAMOD:048 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001323 -name: archaeosine -def: "Archaeosine is a modified 7-deazoguanosine." [http://library.med.utah.edu/RNAmods/] -synonym: "G+" EXACT RNAMOD [] -xref: RNAMOD:049 -is_a: SO:0001316 ! seven_deazaguanosine - -[Term] -id: SO:0001324 -name: one_methylguanosine -def: "1_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylguanosine" EXACT [] -synonym: "m1G" EXACT RNAMOD [] -synonym: "one methylguanosine" EXACT [] -xref: RNAMOD:029 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001325 -name: N2_methylguanosine -def: "N2_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2G" EXACT RNAMOD [] -synonym: "N2 methylguanosine" EXACT [] -synonym: "N2-methylguanosine" EXACT [] -xref: RNAMOD:030 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001326 -name: seven_methylguanosine -def: "7_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "7-methylguanosine" EXACT [] -synonym: "m7G" EXACT RNAMOD [] -synonym: "seven methylguanosine" EXACT [] -xref: RNAMOD:031 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001327 -name: two_prime_O_methylguanosine -def: "2prime_O_methylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylguanosine" EXACT [] -synonym: "Gm" EXACT RNAMOD [] -synonym: "two prime O methylguanosine" EXACT [] -xref: RNAMOD:032 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001328 -name: N2_N2_dimethylguanosine -def: "N2_N2_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22G" EXACT RNAMOD [] -synonym: "N2,N2-dimethylguanosine" EXACT [] -xref: RNAMOD:033 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001329 -name: N2_2_prime_O_dimethylguanosine -def: "N2_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2Gm" EXACT RNAMOD [] -synonym: "N2 2 prime O dimethylguanosine" EXACT [] -synonym: "N2,2'-O-dimethylguanosine" EXACT [] -xref: RNAMOD:034 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001330 -name: N2_N2_2_prime_O_trimethylguanosine -def: "N2_N2_2prime_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m22Gmv" EXACT RNAMOD [] -synonym: "N2,N2,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:035 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001331 -name: two_prime_O_ribosylguanosine_phosphate -def: "2prime_O_ribosylguanosine_phosphate is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-ribosylguanosine (phosphate)" EXACT [] -synonym: "Gr(p)" EXACT RNAMOD [] -synonym: "two prime O ribosylguanosine phosphate" EXACT [] -xref: RNAMOD:036 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001332 -name: wybutosine -def: "Wybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "yW" EXACT RNAMOD [] -xref: RNAMOD:037 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001333 -name: peroxywybutosine -def: "Peroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "o2yW" EXACT RNAMOD [] -xref: RNAMOD:038 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001334 -name: hydroxywybutosine -def: "Hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW" EXACT RNAMOD [] -xref: RNAMOD:039 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001335 -name: undermodified_hydroxywybutosine -def: "Undermodified_hydroxywybutosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "OHyW*" EXACT RNAMOD [] -synonym: "undermodified hydroxywybutosine" EXACT [] -xref: RNAMOD:040 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001336 -name: wyosine -def: "Wyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "IMG" EXACT RNAMOD [] -xref: RNAMOD:041 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001337 -name: methylwyosine -def: "Methylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mimG" EXACT RNAMOD [] -xref: RNAMOD:042 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001338 -name: N2_7_dimethylguanosine -def: "N2_7_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7G" EXACT RNAMOD [] -synonym: "N2 7 dimethylguanosine" EXACT [] -synonym: "N2,7-dimethylguanosine" EXACT [] -xref: RNAMOD:090 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001339 -name: N2_N2_7_trimethylguanosine -def: "N2_N2_7_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,2,7G" EXACT RNAMOD [] -synonym: "N2,N2,7-trimethylguanosine" EXACT [] -xref: RNAMOD:091 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001340 -name: one_two_prime_O_dimethylguanosine -def: "1_2prime_O_dimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1,2'-O-dimethylguanosine" EXACT [] -synonym: "m1Gm" EXACT RNAMOD [] -synonym: "one two prime O dimethylguanosine" EXACT [] -xref: RNAMOD:096 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001341 -name: four_demethylwyosine -def: "4_demethylwyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-demethylwyosine" EXACT [] -synonym: "four demethylwyosine" EXACT [] -synonym: "imG-14" EXACT RNAMOD [] -xref: RNAMOD:100 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001342 -name: isowyosine -def: "Isowyosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "imG2" EXACT RNAMOD [] -xref: RNAMOD:101 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001343 -name: N2_7_2prirme_O_trimethylguanosine -def: "N2_7_2prirme_O_trimethylguanosine is a modified guanosine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "m2,7Gm" EXACT RNAMOD [] -synonym: "N2 7 2prirme O trimethylguanosine" EXACT [] -synonym: "N2,7,2'-O-trimethylguanosine" EXACT [] -xref: RNAMOD:106 -is_a: SO:0001276 ! modified_guanosine - -[Term] -id: SO:0001344 -name: five_methyluridine -def: "5_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyluridine" EXACT [] -synonym: "five methyluridine" EXACT [] -synonym: "m5U" EXACT RNAMOD [] -xref: http://en.wikipedia.org/wiki/5-methyluridine "wiki" -xref: RNAMOD:052 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001345 -name: two_prime_O_methyluridine -def: "2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methyluridine" EXACT [] -synonym: "two prime O methyluridine" EXACT [] -synonym: "Um" EXACT RNAMOD [] -xref: RNAMOD:053 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001346 -name: five_two_prime_O_dimethyluridine -def: "5_2_prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5,2'-O-dimethyluridine" EXACT [] -synonym: "five two prime O dimethyluridine" EXACT [] -synonym: "m5Um" EXACT RNAMOD [] -xref: RNAMOD:054 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001347 -name: one_methylpseudouridine -def: "1_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methylpseudouridine" EXACT [] -synonym: "m1Y" EXACT RNAMOD [] -synonym: "one methylpseudouridine" EXACT [] -xref: RNAMOD:055 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001348 -name: two_prime_O_methylpseudouridine -def: "2prime_O_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2'-O-methylpseudouridine" EXACT [] -synonym: "two prime O methylpseudouridine" EXACT [] -synonym: "Ym" EXACT RNAMOD [] -xref: RNAMOD:056 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001349 -name: two_thiouridine -def: "2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thiouridine" EXACT [] -synonym: "s2U" EXACT RNAMOD [] -synonym: "two thiouridine" EXACT [] -xref: RNAMOD:057 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001350 -name: four_thiouridine -def: "4_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "4-thiouridine" EXACT [] -synonym: "four thiouridine" EXACT [] -synonym: "s4U" EXACT RNAMOD [] -xref: RNAMOD:058 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001351 -name: five_methyl_2_thiouridine -def: "5_methyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyl-2-thiouridine" EXACT [] -synonym: "five methyl 2 thiouridine" EXACT [] -synonym: "m5s2U" EXACT RNAMOD [] -xref: RNAMOD:059 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001352 -name: two_thio_two_prime_O_methyluridine -def: "2_thio_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "2-thio-2'-O-methyluridine" EXACT [] -synonym: "s2Um" EXACT RNAMOD [] -synonym: "two thio two prime O methyluridine" EXACT [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001353 -name: three_three_amino_three_carboxypropyl_uridine -def: "3_3_amino_3_carboxypropyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-(3-amino-3-carboxypropyl)uridine" EXACT [] -synonym: "acp3U" EXACT RNAMOD [] -xref: RNAMOD:061 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001354 -name: five_hydroxyuridine -def: "5_hydroxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-hydroxyuridine" EXACT [] -synonym: "five hydroxyuridine" EXACT [] -synonym: "ho5U" EXACT RNAMOD [] -xref: RNAMOD:060 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001355 -name: five_methoxyuridine -def: "5_methoxyuridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxyuridine" EXACT [] -synonym: "five methoxyuridine" EXACT [] -synonym: "mo5U" EXACT RNAMOD [] -xref: RNAMOD:063 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001356 -name: uridine_five_oxyacetic_acid -def: "Uridine_5_oxyacetic_acid is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "cmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid" EXACT [] -synonym: "uridine five oxyacetic acid" EXACT [] -xref: RNAMOD:064 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001357 -name: uridine_five_oxyacetic_acid_methyl_ester -def: "Uridine_5_oxyacetic_acid_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "mcmo5U" EXACT RNAMOD [] -synonym: "uridine 5-oxyacetic acid methyl ester" EXACT [] -synonym: "uridine five oxyacetic acid methyl ester" EXACT [] -xref: RNAMOD:065 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001358 -name: five_carboxyhydroxymethyl_uridine -def: "5_carboxyhydroxymethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine" EXACT [] -synonym: "chm5U" EXACT RNAMOD [] -synonym: "five carboxyhydroxymethyl uridine" EXACT [] -xref: RNAMOD:066 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001359 -name: five_carboxyhydroxymethyl_uridine_methyl_ester -def: "5_carboxyhydroxymethyl_uridine_methyl_ester is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(carboxyhydroxymethyl)uridine methyl ester" EXACT [] -synonym: "five carboxyhydroxymethyl uridine methyl ester" EXACT [] -synonym: "mchm5U" EXACT RNAMOD [] -xref: RNAMOD:067 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001360 -name: five_methoxycarbonylmethyluridine -def: "Five_methoxycarbonylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyluridine" EXACT [] -synonym: "five methoxycarbonylmethyluridine" EXACT [] -synonym: "mcm5U" EXACT RNAMOD [] -xref: RNAMOD:068 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001361 -name: five_methoxycarbonylmethyl_two_prime_O_methyluridine -def: "Five_methoxycarbonylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five methoxycarbonylmethyl two prime O methyluridine" EXACT [] -synonym: "mcm5Um" EXACT RNAMOD [] -xref: RNAMOD:069 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001362 -name: five_methoxycarbonylmethyl_two_thiouridine -def: "5_methoxycarbonylmethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methoxycarbonylmethyl-2-thiouridine" EXACT [] -synonym: "five methoxycarbonylmethyl two thiouridine" EXACT [] -synonym: "mcm5s2U" EXACT RNAMOD [] -xref: RNAMOD:070 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001363 -name: five_aminomethyl_two_thiouridine -def: "5_aminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-aminomethyl-2-thiouridine" EXACT [] -synonym: "five aminomethyl two thiouridine" EXACT [] -synonym: "nm5s2U" EXACT RNAMOD [] -xref: RNAMOD:071 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001364 -name: five_methylaminomethyluridine -def: "5_methylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyluridine" EXACT [] -synonym: "five methylaminomethyluridine" EXACT [] -synonym: "mnm5U" EXACT RNAMOD [] -xref: RNAMOD:072 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001365 -name: five_methylaminomethyl_two_thiouridine -def: "5_methylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-thiouridine" EXACT [] -synonym: "five methylaminomethyl two thiouridine" EXACT [] -synonym: "mnm5s2U" EXACT RNAMOD [] -xref: RNAMOD:073 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001366 -name: five_methylaminomethyl_two_selenouridine -def: "5_methylaminomethyl_2_selenouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methylaminomethyl-2-selenouridine" EXACT [] -synonym: "five methylaminomethyl two selenouridine" EXACT [] -synonym: "mnm5se2U" EXACT RNAMOD [] -xref: RNAMOD:074 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001367 -name: five_carbamoylmethyluridine -def: "5_carbamoylmethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyluridine" EXACT [] -synonym: "five carbamoylmethyluridine" EXACT [] -synonym: "ncm5U" EXACT RNAMOD [] -xref: RNAMOD:075 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001368 -name: five_carbamoylmethyl_two_prime_O_methyluridine -def: "5_carbamoylmethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carbamoylmethyl-2'-O-methyluridine" EXACT [] -synonym: "five carbamoylmethyl two prime O methyluridine" EXACT [] -synonym: "ncm5Um" EXACT RNAMOD [] -xref: RNAMOD:076 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001369 -name: five_carboxymethylaminomethyluridine -def: "5_carboxymethylaminomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyluridine" EXACT [] -synonym: "cmnm5U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyluridine" EXACT [] -xref: RNAMOD:077 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001370 -name: five_carboxymethylaminomethyl_two_prime_O_methyluridine -def: "5_carboxymethylaminomethyl_2_prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl- 2'-O-methyluridine" EXACT [] -synonym: "cmnm5Um" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two prime O methyluridine" EXACT [] -xref: RNAMOD:078 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001371 -name: five_carboxymethylaminomethyl_two_thiouridine -def: "5_carboxymethylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethylaminomethyl-2-thiouridine" EXACT [] -synonym: "cmnm5s2U" EXACT RNAMOD [] -synonym: "five carboxymethylaminomethyl two thiouridine" EXACT [] -xref: RNAMOD:079 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001372 -name: three_methyluridine -def: "3_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methyluridine" EXACT [] -synonym: "m3U" EXACT RNAMOD [] -synonym: "three methyluridine" EXACT [] -xref: RNAMOD:085 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001373 -name: one_methyl_three_three_amino_three_carboxypropyl_pseudouridine -def: "1_methyl_3_3_amino_3_carboxypropyl_pseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "1-methyl-3-(3-amino-3-carboxypropyl) pseudouridine" EXACT [] -synonym: "m1acp3Y" EXACT RNAMOD [] -xref: RNAMOD:086 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001374 -name: five_carboxymethyluridine -def: "5_carboxymethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-carboxymethyluridine" EXACT [] -synonym: "cm5U" EXACT RNAMOD [] -synonym: "five carboxymethyluridine" EXACT [] -xref: RNAMOD:087 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001375 -name: three_two_prime_O_dimethyluridine -def: "3_2prime_O_dimethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3,2'-O-dimethyluridine" EXACT [] -synonym: "m3Um" EXACT RNAMOD [] -synonym: "three two prime O dimethyluridine" EXACT [] -xref: RNAMOD:092 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001376 -name: five_methyldihydrouridine -def: "5_methyldihydrouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-methyldihydrouridine" EXACT [] -synonym: "five methyldihydrouridine" EXACT [] -synonym: "m5D" EXACT RNAMOD [] -xref: RNAMOD:093 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001377 -name: three_methylpseudouridine -def: "3_methylpseudouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "3-methylpseudouridine" EXACT [] -synonym: "m3Y" EXACT RNAMOD [] -synonym: "three methylpseudouridine" EXACT [] -xref: RNAMOD:094 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001378 -name: five_taurinomethyluridine -def: "5_taurinomethyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyluridine" EXACT [] -synonym: "five taurinomethyluridine" EXACT [] -synonym: "tm5U" EXACT RNAMOD [] -xref: RNAMOD:098 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001379 -name: five_taurinomethyl_two_thiouridine -def: "5_taurinomethyl_2_thiouridineis a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-taurinomethyl-2-thiouridine" EXACT [] -synonym: "five taurinomethyl two thiouridine" EXACT [] -synonym: "tm5s2U" EXACT RNAMOD [] -xref: RNAMOD:099 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001380 -name: five_isopentenylaminomethyl_uridine -def: "5_isopentenylaminomethyl_uridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)uridine" EXACT [] -synonym: "five isopentenylaminomethyl uridine" EXACT [] -synonym: "inm5U" EXACT RNAMOD [] -xref: RNAMOD:103 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001381 -name: five_isopentenylaminomethyl_two_thiouridine -def: "5_isopentenylaminomethyl_2_thiouridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2-thiouridine" EXACT [] -synonym: "five isopentenylaminomethyl two thiouridine" EXACT [] -synonym: "inm5s2U" EXACT RNAMOD [] -xref: RNAMOD:104 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001382 -name: five_isopentenylaminomethyl_two_prime_O_methyluridine -def: "5_isopentenylaminomethyl_2prime_O_methyluridine is a modified uridine base feature." [http://library.med.utah.edu/RNAmods/] -synonym: "5-(isopentenylaminomethyl)- 2'-O-methyluridine" EXACT [] -synonym: "five isopentenylaminomethyl two prime O methyluridine" EXACT [] -synonym: "inm5Um" EXACT RNAMOD [] -xref: RNAMOD:105 -is_a: SO:0001277 ! modified_uridine - -[Term] -id: SO:0001383 -name: histone_binding_site -def: "A region of a DNA molecule that is bound by a histone." [SO:ke] -synonym: "histone binding site" EXACT [] -is_a: SO:0000410 ! protein_binding_site - -[Term] -id: SO:0001384 -name: CDS_fragment -synonym: "CDS fragment" EXACT [] -synonym: "incomplete CDS" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000731 ! fragmentary - -[Term] -id: SO:0001385 -name: modified_amino_acid_feature -def: "A post translationally modified amino acid feature." [SO:ke] -synonym: "modified amino acid feature" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001386 -name: modified_glycine -def: "A post translationally modified glycine amino acid feature." [SO:ke] -synonym: "ModGly" EXACT AAMOD [] -synonym: "modified glycine" EXACT [] -xref: MOD:00908 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001387 -name: modified_L_alanine -def: "A post translationally modified alanine amino acid feature." [SO:ke] -synonym: "ModAla" EXACT AAMOD [] -synonym: "modified L alanine" EXACT [] -synonym: "modified L-alanine" EXACT [] -xref: MOD:00901 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001388 -name: modified_L_asparagine -def: "A post translationally modified asparagine amino acid feature." [SO:ke] -synonym: "ModAsn" EXACT AAMOD [] -synonym: "modified L asparagine" EXACT [] -synonym: "modified L-asparagine" EXACT [] -xref: MOD:00903 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001389 -name: modified_L_aspartic_acid -def: "A post translationally modified aspartic acid amino acid feature." [SO:ke] -synonym: "ModAsp" EXACT AAMOD [] -synonym: "modified L aspartic acid" EXACT [] -synonym: "modified L-aspartic acid" EXACT [] -xref: MOD:00904 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001390 -name: modified_L_cysteine -def: "A post translationally modified cysteine amino acid feature." [SO:ke] -synonym: "ModCys" EXACT AAMOD [] -synonym: "modified L cysteine" EXACT [] -synonym: "modified L-cysteine" EXACT [] -xref: MOD:00905 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001391 -name: modified_L_glutamic_acid -synonym: "ModGlu" EXACT AAMOD [] -synonym: "modified L glutamic acid" EXACT [] -synonym: "modified L-glutamic acid" EXACT [] -xref: MOD:00906 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001392 -name: modified_L_threonine -def: "A post translationally modified threonine amino acid feature." [SO:ke] -synonym: "modified L threonine" EXACT [] -synonym: "modified L-threonine" EXACT [] -synonym: "ModThr" EXACT AAMOD [] -xref: MOD:00917 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001393 -name: modified_L_tryptophan -def: "A post translationally modified tryptophan amino acid feature." [SO:ke] -synonym: "modified L tryptophan" EXACT [] -synonym: "modified L-tryptophan" EXACT [] -synonym: "ModTrp" EXACT AAMOD [] -xref: MOD:00918 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001394 -name: modified_L_glutamine -def: "A post translationally modified glutamine amino acid feature." [SO:ke] -synonym: "ModGln" EXACT [] -synonym: "modified L glutamine" EXACT [] -synonym: "modified L-glutamine" EXACT [] -xref: MOD:00907 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001395 -name: modified_L_methionine -def: "A post translationally modified methionine amino acid feature." [SO:ke] -synonym: "modified L methionine" EXACT [] -synonym: "modified L-methionine" EXACT [] -synonym: "ModMet" EXACT AAMOD [] -xref: MOD:00913 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001396 -name: modified_L_isoleucine -def: "A post translationally modified isoleucine amino acid feature." [SO:ke] -synonym: "modified L isoleucine" EXACT [] -synonym: "modified L-isoleucine" EXACT [] -synonym: "ModIle" EXACT AAMOD [] -xref: MOD:00910 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001397 -name: modified_L_phenylalanine -def: "A post translationally modified phenylalanine amino acid feature." [SO:ke] -synonym: "modified L phenylalanine" EXACT [] -synonym: "modified L-phenylalanine" EXACT [] -synonym: "ModPhe" EXACT AAMOD [] -xref: MOD:00914 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001398 -name: modified_L_histidine -def: "A post translationally modified histidie amino acid feature." [SO:ke] -synonym: "ModHis" EXACT [] -synonym: "modified L histidine" EXACT [] -synonym: "modified L-histidine" EXACT [] -xref: MOD:00909 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001399 -name: modified_L_serine -def: "A post translationally modified serine amino acid feature." [SO:ke] -synonym: "modified L serine" EXACT [] -synonym: "modified L-serine" EXACT [] -synonym: "MosSer" EXACT AAMOD [] -xref: MOD:00916 "http://www.psidev.info/index.php?q=node/104" -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001400 -name: modified_L_lysine -def: "A post translationally modified lysine amino acid feature." [SO:ke] -synonym: "modified L lysine" EXACT [] -synonym: "modified L-lysine" EXACT [] -synonym: "ModLys" EXACT AAMOD [] -xref: MOD:00912 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001401 -name: modified_L_leucine -def: "A post translationally modified leucine amino acid feature." [SO:ke] -synonym: "modified L leucine" EXACT [] -synonym: "modified L-leucine " EXACT [] -synonym: "ModLeu" EXACT AAMOD [] -xref: MOD:00911 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001402 -name: modified_L_selenocysteine -def: "A post translationally modified selenocysteine amino acid feature." [SO:ke] -synonym: "modified L selenocysteine" EXACT [] -synonym: "modified L-selenocysteine" EXACT [] -xref: MOD:01158 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001403 -name: modified_L_valine -def: "A post translationally modified valine amino acid feature." [SO:ke] -synonym: "modified L valine" EXACT [] -synonym: "modified L-valine" EXACT [] -synonym: "ModVal" EXACT AAMOD [] -xref: MOD:00920 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001404 -name: modified_L_proline -def: "A post translationally modified proline amino acid feature." [SO:ke] -synonym: "modified L proline" EXACT [] -synonym: "modified L-proline " EXACT [] -synonym: "ModPro" EXACT AAMOD [] -xref: MOD:00915 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001405 -name: modified_L_tyrosine -def: "A post translationally modified tyrosine amino acid feature." [SO:ke] -synonym: "modified L tyrosine" EXACT [] -synonym: "modified L-tyrosine" EXACT [] -synonym: "ModTry" EXACT AAMOD [] -xref: MOD:00919 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001406 -name: modified_L_arginine -def: "A post translationally modified arginine amino acid feature." [SO:ke] -synonym: "ModArg" EXACT AAMOD [] -synonym: "modified L arginine" EXACT [] -synonym: "modified L-arginine" EXACT [] -xref: MOD:00902 -is_a: SO:0001385 ! modified_amino_acid_feature - -[Term] -id: SO:0001407 -name: peptidyl -def: "An attribute describing the nature of a proteinaceous polymer, where by the amino acid units are joined by peptide bonds." [SO:ke] -is_a: SO:0000443 ! polymer_attribute - -[Term] -id: SO:0001408 -name: cleaved_for_gpi_anchor_region -def: "The C-terminal residues of a polypeptide which are exchanged for a GPI-anchor." [EBI:rh] -synonym: "cleaved for gpi anchor region" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0001409 -name: biomaterial_region -def: "A region which is intended for use in an experiment." [SO:cb] -synonym: "biomaterial region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001410 -name: experimental_feature -def: "A region which is the result of some arbitrary experimental procedure. The procedure may be carried out with biological material or inside a computer." [SO:cb] -synonym: "analysis feature" RELATED [] -synonym: "experimental output artefact" EXACT [] -synonym: "experimental_output_artefact" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001411 -name: biological_region -def: "A region defined by its disposition to be involved in a biological process." [SO:cb] -synonym: "biological region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001412 -name: topologically_defined_region -def: "A region that is defined according to its relations with other regions within the same sequence." [SO:cb] -synonym: "topologically defined region" EXACT [] -is_a: SO:0000001 ! region - -[Term] -id: SO:0001413 -name: translocation_breakpoint -def: "The point within a chromosome where a translocation begins or ends." [SO:cb] -synonym: "translocation breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001414 -name: insertion_breakpoint -def: "The point within a chromosome where a insertion begins or ends." [SO:cb] -synonym: "insertion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001415 -name: deletion_breakpoint -def: "The point within a chromosome where a deletion begins or ends." [SO:cb] -synonym: "deletion breakpoint" EXACT [] -is_a: SO:0001021 ! chromosome_breakpoint - -[Term] -id: SO:0001416 -name: five_prime_flanking_region -def: "A flanking region located five prime of a specific region." [SO:chado] -synonym: "5' flanking region" RELATED [] -synonym: "five prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001417 -name: three_prime_flanking_region -def: "A flanking region located three prime of a specific region." [SO:chado] -synonym: "3' flanking region" RELATED [] -synonym: "three prime flanking region" EXACT [] -is_a: SO:0000239 ! flanking_region - -[Term] -id: SO:0001418 -name: transcribed_fragment -def: "An experimental region, defined by a tiling array experiment to be transcribed at some level." [SO:ke] -comment: Term requested by the MODencode group. -synonym: "transcribed fragment" EXACT [] -synonym: "transfrag" RELATED [] -is_a: SO:0001410 ! experimental_feature - -[Term] -id: SO:0001419 -name: cis_splice_site -def: "Intronic 2 bp region bordering exon. A splice_site that adjacent_to exon and overlaps intron." [SO:cjm, SO:ke] -synonym: "cis splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001420 -name: trans_splice_site -def: "Primary transcript region bordering trans-splice junction." [SO:ke] -synonym: "trans splice site" EXACT [] -is_a: SO:0000162 ! splice_site - -[Term] -id: SO:0001421 -name: splice_junction -def: "The boundary between an intron and an exon." [SO:ke] -synonym: "splice boundary" EXACT [] -synonym: "splice junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001422 -name: conformational_switch -def: "A region of a polypeptide, involved in the transition from one conformational state to another." [SO:ke] -comment: MM Young, K Kirshenbaum, KA Dill & S Highsmith. Predicting conformational switches in proteins. Protein Science, 1999, 8, 1752-64. K. Kirshenbaum, M.M. Young and S. Highsmith. Predicting Allosteric Switches in Myosins. Protein Science 8(9):1806-1815. 1999. -synonym: "polypeptide conformational switch" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0001423 -name: dye_terminator_read -def: "A read produced by the dye terminator method of sequencing." [SO:ke] -synonym: "dye terminator read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001424 -name: pyrosequenced_read -def: "A read produced by pyrosequencing technology." [SO:ke] -comment: An example is a read produced by Roche 454 technology. -synonym: "pyorsequenced read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001425 -name: ligation_based_read -def: "A read produced by ligation based sequencing technologies." [SO:ke] -comment: An example of this kind of read is one produced by ABI SOLiD. -synonym: "ligation based read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001426 -name: polymerase_synthesis_read -def: "A read produced by the polymerase based sequence by synthesis method." [SO:ke] -comment: An example is a read produced by Illumina technology. -synonym: "polymerase synthesis read" RELATED [] -is_a: SO:0000150 ! read - -[Term] -id: SO:0001427 -name: cis_regulatory_frameshift_element -def: "A structural region in an RNA molecule which promotes ribosomal frameshifting of cis coding sequence." [RFAM:jd] -synonym: "cis regulatory frameshift element" EXACT [] -is_a: SO:0005836 ! regulatory_region - -[Term] -id: SO:0001428 -name: expressed_sequence_assembly -def: "A sequence assembly derived from expressed sequences." [SO:ke] -comment: From tracker [ 2372385 ] expressed_sequence_assembly. -synonym: "expressed sequence assembly" EXACT [] -is_a: SO:0000353 ! sequence_assembly - -[Term] -id: SO:0001429 -name: DNA_binding_site -def: "A region of a molecule that binds to DNA." [SO:ke] -synonym: "DNA binding site" EXACT [] -is_a: SO:0000409 ! binding_site - -[Term] -id: SO:0001430 -name: polyA_junction -def: "The boundary between the UTR and the polyA sequence." [SO:ke] -synonym: "polyA junction" EXACT [] -is_a: SO:0000699 ! junction - -[Term] -id: SO:0001431 -name: cryptic_gene -def: "A gene that is not transcribed under normal conditions and is not critical to normal cellular functioning." [SO:ke] -synonym: "cryptic gene" EXACT [] -intersection_of: SO:0000704 ! gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:0001432 -name: sequence_variant_affecting_polyadenylation -synonym: "mutation affecting polyadenylation" RELATED [] -synonym: "sequence variant affecting polyadenylation" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:0001433 -name: three_prime_RACE_clone -def: "A three prime RACE (Rapid Amplification of cDNA Ends) clone is a cDNA clone copied from the 3' end of an mRNA (using a poly-dT primer to capture the polyA tail and a gene-specific or randomly primed 5' primer), and spliced into a vector for propagation in a suitable host." [modENCODE:nlw] -synonym: "3' RACE clone" RELATED [] -is_a: SO:0000317 ! cDNA_clone - -[Term] -id: SO:0001434 -name: cassette_pseudogene -def: "A cassette pseudogene is a kind of gene in an innactive form which may recombine at a telomeric locus to form a functional copy." [SO:ke] -comment: Requested by the trypanosome community. -synonym: "cassette pseudogene" EXACT [] -synonym: "cassette type psedogene" RELATED [] -is_a: SO:0000336 ! pseudogene - -[Term] -id: SO:0001435 -name: alanine -comment: A place holder for a cross product with chebi. -synonym: "A" EXACT aa1 [] -synonym: "Ala" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001436 -name: valine -comment: A place holder for a cross product with chebi. -synonym: "V" EXACT aa1 [] -synonym: "Val" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001437 -name: leucine -comment: A place holder for a cross product with chebi. -synonym: "L" EXACT aa1 [] -synonym: "Leu" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001438 -name: isoleucine -comment: A place holder for a cross product with chebi. -synonym: "I" EXACT aa1 [] -synonym: "Ile" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001439 -name: proline -comment: A place holder for a cross product with chebi. -synonym: "P" EXACT aa1 [] -synonym: "Pro" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001440 -name: tryptophan -comment: A place holder for a cross product with chebi. -synonym: "Trp" EXACT aa3 [] -synonym: "W" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001441 -name: phenylalanine -comment: A place holder for a cross product with chebi. -synonym: "F" EXACT aa1 [] -synonym: "Phe" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001442 -name: methionine -comment: A place holder for a cross product with chebi. -synonym: "M" EXACT aa1 [] -synonym: "Met" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001443 -name: glycine -comment: A place holder for a cross product with chebi. -synonym: "G" EXACT aa1 [] -synonym: "Gly" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001444 -name: serine -comment: A place holder for a cross product with chebi. -synonym: "S" EXACT aa1 [] -synonym: "Ser" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001445 -name: threonine -comment: A place holder for a cross product with chebi. -synonym: "T" EXACT aa1 [] -synonym: "Thr" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001446 -name: tyrosine -comment: A place holder for a cross product with chebi. -synonym: "Tyr" EXACT aa3 [] -synonym: "Y" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001447 -name: cysteine -comment: A place holder for a cross product with chebi. -synonym: "C" EXACT aa1 [] -synonym: "Cys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001448 -name: glutamine -comment: A place holder for a cross product with chebi. -synonym: "Gln" EXACT aa3 [] -synonym: "Q" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001449 -name: asparagine -comment: A place holder for a cross product with chebi. -synonym: "Asn" EXACT aa3 [] -synonym: "N" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001450 -name: lysine -comment: A place holder for a cross product with chebi. -synonym: "K" EXACT aa1 [] -synonym: "Lys" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001451 -name: argenine -comment: A place holder for a cross product with chebi. -synonym: "Arg" EXACT aa3 [] -synonym: "R" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001452 -name: histidine -comment: A place holder for a cross product with chebi. -synonym: "H" EXACT aa1 [] -synonym: "His" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001453 -name: aspartic_acid -comment: A place holder for a cross product with chebi. -synonym: "Asp" EXACT aa3 [] -synonym: "aspartic acid" EXACT [] -synonym: "D" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001454 -name: glutamic_acid -comment: A place holder for a cross product with chebi. -synonym: "E" EXACT aa1 [] -synonym: "Glu" EXACT aa3 [] -synonym: "glutamic acid" EXACT [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001455 -name: selenocysteine -comment: A place holder for a cross product with chebi. -synonym: "Sec" EXACT aa3 [] -synonym: "U" EXACT aa1 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001456 -name: pyrrolysine -comment: A place holder for a cross product with chebi. -synonym: "O" EXACT aa1 [] -synonym: "Pyl" EXACT aa3 [] -is_a: SO:0001237 ! amino_acid - -[Term] -id: SO:0001457 -name: transcribed_cluster -def: "A region defined by a set of transcribed sequences from the same gene or expressed pseudogene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "transcribed cluster" EXACT [] -synonym: "unigene cluster" RELATED [] -is_a: SO:0001410 ! experimental_feature -relationship: has_part SO:0000695 ! reagent - -[Term] -id: SO:0001458 -name: unigene_cluster -def: "A kind of transcribed_cluster defined by a set of transcribed sequences from the a unique gene." [SO:ke] -comment: This term was requested by Jeff Bowes, using the tracker, ID = 2594157. -synonym: "unigene cluster" RELATED [] -is_a: SO:0001457 ! transcribed_cluster - -[Term] -id: SO:0001459 -name: CRISPR -def: "Clustered Palindromic Repeats interspersed with bacteriophage derived spacer sequences." [RFAM:jd] -synonym: "Clustered_Regularly_Interspaced_Short_Palindromic_Repeat" EXACT [] -synonym: "CRISPR element" EXACT [] -xref: http:en.wikipedia.org/wiki/CRISPR -is_a: SO:0000314 ! direct_repeat - -[Term] -id: SO:0001460 -name: insulator_binding_site -def: "A protein_binding_site located within an insulator." [SO:ke] -comment: See tracker ID 2060908. -synonym: "insulator binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000627 ! insulator - -[Term] -id: SO:0001461 -name: enhancer_binding_site -def: "A protein_binding_site located within an enhancer." [SO:ke] -synonym: "enhancer binding site" RELATED [] -is_a: SO:0000410 ! protein_binding_site -relationship: part_of SO:0000165 ! enhancer - -[Term] -id: SO:0001462 -name: contig_collection -def: "A collection of contigs." [SO:ke] -comment: See tracker ID: 2138359. -synonym: "contig collection" EXACT [] -is_a: SO:0001260 ! sequence_collection - -[Term] -id: SO:0001463 -name: lincRNA -def: "A multiexonic non-coding RNA transcribed by RNA polymerase II." [PMID:19182780, SO:ke] -synonym: "large intervening non-coding RNA" EXACT [] -is_a: SO:0000655 ! ncRNA - -[Term] -id: SO:0001464 -name: UST -def: "An EST spanning part or all of the untranslated regions of a protein-coding transcript." [SO:nlw] -synonym: "UTR sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001465 -name: three_prime_UST -def: "A UST located in the 3'UTR of a protein-coding transcript." [SO:nlw] -synonym: "3' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001466 -name: five_prime_UST -def: "An UST located in the 5'UTR of a protein-coding transcript." [SO:nlw] -synonym: "5' UST" RELATED [] -is_a: SO:0001464 ! UST - -[Term] -id: SO:0001467 -name: RST -def: "A tag produced from a single sequencing read from a RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "RACE sequence tag" EXACT [] -is_a: SO:0000345 ! EST - -[Term] -id: SO:0001468 -name: three_prime_RST -def: "A tag produced from a single sequencing read from a 3'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "3' RST" EXACT [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001469 -name: five_prime_RST -def: "A tag produced from a single sequencing read from a 5'-RACE product; typically a few hundred base pairs long." [SO:nlw] -synonym: "5' RST" RELATED [] -is_a: SO:0001467 ! RST - -[Term] -id: SO:0001470 -name: UST_match -def: "A match against an UST sequence." [SO:nlw] -synonym: "UST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001471 -name: RST_match -def: "A match against an RST sequence." [SO:nlw] -synonym: "RST match" EXACT [] -is_a: SO:0000102 ! expressed_sequence_match - -[Term] -id: SO:0001472 -name: primer_match -def: "A nucleotide match to a primer sequence." [SO:nlw] -synonym: "primer match" EXACT [] -is_a: SO:0000347 ! nucleotide_match - -[Term] -id: SO:0001473 -name: miRNA_antiguide -def: "A region of the pri miRNA that basepairs with the guide to form the hairpin." [SO:ke] -synonym: "miRNA antiguide " EXACT [] -synonym: "miRNA passenger strand" EXACT [] -synonym: "miRNA star" EXACT [] -is_a: SO:0001243 ! miRNA_primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-05-27T03:35:43Z - -[Term] -id: SO:0001474 -name: trans_splice_junction -def: "The boundary between the spliced leader and the first exon of the mRNA." [SO:ke] -is_a: SO:0000699 ! junction -created_by: kareneilbeck -creation_date: 2009-07-13T04:50:49Z - -[Term] -id: SO:0001475 -name: outron -def: "A region of a primary transcript, that is removed via trans splicing." [PMID:16401417, SO:ke] -is_a: SO:0000835 ! primary_transcript_region -created_by: kareneilbeck -creation_date: 2009-07-14T11:36:08Z - -[Term] -id: SO:0001476 -name: natural_plasmid -def: "A plasmid that occurs naturally." [SO:xp] -synonym: "natural plasmid" EXACT [] -is_a: SO:0000155 ! plasmid -is_a: SO:0001038 ! extrachromosomal_mobile_genetic_element -intersection_of: has_quality SO:0000782 ! natural -created_by: kareneilbeck -creation_date: 2009-09-01T03:43:06Z - -[Term] -id: SO:0001477 -name: gene_trap_construct -def: "A gene trap construct is a type of engineered plasmid which is designed to integrate into a genome and produce a fusion transcript between exons of the gene into which it inserts and a reporter element in the construct. Gene traps contain a splice acceptor, do not contain promoter elements for the reporter, and are mutagenic. Gene traps may be bicistronic with the second cassette containing a promoter driving an a selectable marker." [ZFIN:dh] -synonym: "gene trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:49:09Z - -[Term] -id: SO:0001478 -name: promoter_trap_construct -def: "A promoter trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when inserted in close proximity to a promoter element. Promoter traps typically do not contain promoter elements and are mutagenic." [ZFIN:dh] -synonym: "promoter trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:52:01Z - -[Term] -id: SO:0001479 -name: enhancer_trap_construct -def: "An enhancer trap construct is a type of engineered plasmid which is designed to integrate into a genome and express a reporter when the expression from a basic minimal promoter is enhanced by genomic enhancer elements. Enhancer traps contain promoter elements and are not usually mutagenic." [ZFIN:dh] -synonym: "enhancer trap construct" EXACT [] -is_a: SO:0000637 ! engineered_plasmid -created_by: kareneilbeck -creation_date: 2009-09-01T03:53:26Z - -[Term] -id: SO:0001480 -name: PAC_end -def: "A region of sequence from the end of a PAC clone that may provide a highly specific marker." [ZFIN:mh] -synonym: "PAC end" EXACT [] -is_a: SO:0000150 ! read -relationship: part_of SO:0000154 ! PAC -created_by: kareneilbeck -creation_date: 2009-09-09T05:18:12Z - -[Term] -id: SO:0001481 -name: RAPD -def: "RAPD is a 'PCR product' where a sequence variant is identified through the use of PCR with random primers." [ZFIN:mh] -synonym: "Random Amplification Polymorphic DNA" EXACT [] -is_a: SO:0000006 ! PCR_product -created_by: kareneilbeck -creation_date: 2009-09-09T05:26:10Z - -[Term] -id: SO:0001482 -name: shadow_enhancer -is_a: SO:0000165 ! enhancer -created_by: kareneilbeck -creation_date: 2009-09-09T05:29:29Z - -[Term] -id: SO:0005836 -name: regulatory_region -def: "A DNA sequence that controls the expression of a gene." [SO:ke] -subset: SOFA -synonym: "regulatory region" EXACT [] -xref: http://en.wikipedia.org/wiki/Regulatory_region "wiki" -is_a: SO:0000831 ! gene_member_region - -[Term] -id: SO:0005837 -name: U14_snoRNA_primary_transcript -def: "The primary transcript of an evolutionarily conserved eukaryotic low molecular weight RNA capable of intermolecular hybridization with both homologous and heterologous 18S rRNA." [PMID:2251119] -synonym: "4.5S snRNA primary transcript" EXACT [] -synonym: "U14 snoRNA primary transcript" EXACT [] -is_a: SO:0000232 ! snoRNA_primary_transcript - -[Term] -id: SO:0005841 -name: methylation_guide_snoRNA -def: "A snoRNA that specifies the site of 2'-O-ribose methylation in an RNA molecule by base pairing with a short sequence around the target residue." [GOC:mah, PMID:12457565] -comment: Has RNA 2'-O-ribose methylation guide activity (GO:0030561). -synonym: "methylation guide snoRNA" EXACT [] -is_a: SO:0000593 ! C_D_box_snoRNA -relationship: derives_from SO:0000580 ! methylation_guide_snoRNA_primary_transcript - -[Term] -id: SO:0005843 -name: rRNA_cleavage_RNA -def: "An ncRNA that is part of a ribonucleoprotein that cleaves the primary pre-rRNA transcript in the process of producing mature rRNA molecules." [GOC:kgc] -synonym: "rRNA cleavage RNA" EXACT [] -is_a: SO:0000655 ! ncRNA -relationship: derives_from SO:0000582 ! rRNA_cleavage_snoRNA_primary_transcript - -[Term] -id: SO:0005845 -name: exon_of_single_exon_gene -def: "An exon that is the only exon in a gene." [RSC:cb] -synonym: "exon of single exon gene" EXACT [] -synonym: "single_exon" RELATED [] -synonym: "singleton exon" EXACT [] -is_a: SO:0000147 ! exon - -[Term] -id: SO:0005847 -name: cassette_array_member -synonym: "cassette array member" EXACT [] -is_a: SO:0005848 ! gene_cassette_member - -[Term] -id: SO:0005848 -name: gene_cassette_member -synonym: "gene cassette member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005849 -name: gene_subarray_member -synonym: "gene subarray member" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:0005850 -name: primer_binding_site -def: "Non-covalent primer binding site for initiation of replication, transcription, or reverse transcription." [http://www.ebi.ac.uk/embl/Documentation/FT_definitions/feature_table.html] -synonym: "primer binding site" EXACT [] -xref: http://en.wikipedia.org/wiki/Primer_binding_site "wiki" -is_a: SO:0000409 ! binding_site -relationship: part_of SO:0000186 ! LTR_retrotransposon - -[Term] -id: SO:0005851 -name: gene_array -def: "An array includes two or more genes, or two or more gene subarrays, contiguously arranged where the individual genes, or subarrays, are either identical in sequence, or essentially so." [SO:ma] -comment: This would include, for example, a cluster of genes each encoding the major ribosomal RNAs and a cluster of histone gene subarrays. -synonym: "gene array" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005852 -name: gene_subarray -def: "A subarray is, by defintition, a member of a gene array (SO:0005851); the members of a subarray may differ substantially in sequence, but are closely related in function." [SO:ma] -comment: This would include, for example, a cluster of genes encoding different histones. -synonym: "gene subarray" EXACT [] -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:0005853 -name: gene_cassette -def: "A gene that can be substituted for a related gene at a different site in the genome." [SGD:se] -comment: This would include, for example, the mating type gene cassettes of S. cerevisiae. Gene cassettes usually exist as linear sequences as part of a larger DNA molecule, such as a chromosome or plasmid. -synonym: "gene cassette" EXACT [] -xref: http://en.wikipedia.org/wiki/Gene_cassette "wiki" -is_a: SO:0000704 ! gene - -[Term] -id: SO:0005854 -name: gene_cassette_array -def: "An array of non-functional genes whose members, when captured by recombination form functional genes." [SO:ma] -comment: This would include, for example, the arrays of non-functional VSG genes of Trypanosomes. -synonym: "gene cassette array" EXACT [] -is_a: SO:0005855 ! gene_group -relationship: has_part SO:0005853 ! gene_cassette - -[Term] -id: SO:0005855 -name: gene_group -def: "A collection of related genes." [SO:ma] -subset: SOFA -synonym: "gene group" EXACT [] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:0005856 -name: selenocysteine_tRNA_primary_transcript -def: "A primary transcript encoding seryl tRNA (SO:000269)." [SO:ke] -synonym: "selenocysteine tRNA primary transcript" EXACT [] -is_a: SO:0000210 ! tRNA_primary_transcript - -[Term] -id: SO:0005857 -name: selenocysteinyl_tRNA -def: "A tRNA sequence that has a selenocysteine anticodon, and a 3' selenocysteine binding region." [SO:ke] -synonym: "selenocysteinyl tRNA" EXACT [] -synonym: "selenocysteinyl-transfer ribonucleic acid" EXACT [] -synonym: "selenocysteinyl-transfer RNA" EXACT [] -is_a: SO:0000253 ! tRNA -relationship: derives_from SO:0005856 ! selenocysteine_tRNA_primary_transcript - -[Term] -id: SO:0005858 -name: syntenic_region -def: "A region in which two or more pairs of homologous markers occur on the same chromosome in two or more species." [http://www.informatics.jax.org/silverbook/glossary.shtml] -synonym: "syntenic region" EXACT [] -intersection_of: SO:0000330 ! conserved_region -intersection_of: has_quality SO:0000860 ! syntenic - -[Term] -id: SO:0100001 -name: biochemical_region_of_peptide -def: "A region of a peptide that is involved in a biochemical function." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "biochemical motif" EXACT [] -synonym: "biochemical region of peptide" EXACT [] -synonym: "biochemical_region" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100002 -name: molecular_contact_region -def: "A region that is involved a contact with another molecule." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "molecular contact region" RELATED [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100003 -name: intrinsically_unstructured_polypeptide_region -def: "A region of polypeptide chain with high conformational flexibility." [EBIBS:GAR] -subset: biosapiens -synonym: "disordered region" RELATED BS [] -synonym: "intrinsically unstructured polypeptide region" EXACT [] -is_a: SO:0001070 ! polypeptide_structural_region - -[Term] -id: SO:0100004 -name: catmat_left_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100005 -name: catmat_left_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4l" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100006 -name: catmat_right_handed_three -def: "A motif of 3 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -75 bounds -100 to -50, res i+1: psi 140 bounds 110 to 170. An extra restriction of the length of the O to O distance would be useful, that it be less than 5 Angstrom. More precisely these two oxygens are the main chain carbonyl oxygen atoms of residues i-1 and i+1." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-3r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100007 -name: catmat_right_handed_four -def: "A motif of 4 consecutive residues with dihedral angles as follows: res i: phi -90 bounds -120 to -60, res i: psi -10 bounds -50 to 30, res i+1: phi -90 bounds -120 to -60, res i+1: psi -10 bounds -50 to 30, res i+2: phi -75 bounds -100 to -50, res i+2: psi 140 bounds 110 to 170. The extra restriction of the length of the O to O distance is similar, that it be less than 5 Angstrom. In this case these two Oxygen atoms are the main chain carbonyl oxygen atoms of residues i-1 and i+2." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "catmat-4r" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100008 -name: alpha_beta_motif -def: "A motif of five consecutive residues and two H-bonds in which: H-bond between CO of residue(i) and NH of residue(i+4), H-bond between CO of residue(i) and NH of residue(i+3),Phi angles of residues(i+1), (i+2) and (i+3) are negative." [EBIBS:GAR, http://www.ebi.ac.uk/msd-srv/msdmotif/] -subset: biosapiens -synonym: "alpha beta motif" EXACT [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100009 -name: lipoprotein_signal_peptide -def: "A peptide that acts as a signal for both membrane translocation and lipid attachment in prokaryotes." [EBIBS:GAR] -subset: biosapiens -synonym: "lipoprotein signal peptide" EXACT [] -synonym: "prokaryotic membrane lipoprotein lipid attachment site" EXACT [] -is_a: SO:0100011 ! cleaved_peptide_region - -[Term] -id: SO:0100010 -name: no_output -def: "An experimental region wherean analysis has been run and not produced any annotation." [EBIBS:GAR] -subset: biosapiens -synonym: "no output" EXACT BS [] -is_a: SO:0000703 ! experimental_result_region - -[Term] -id: SO:0100011 -name: cleaved_peptide_region -def: "The cleaved_peptide_regon is the a region of peptide sequence that is cleaved during maturation." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "cleaved peptide region" EXACT [] -is_a: SO:0000839 ! polypeptide_region -relationship: part_of SO:0001063 ! immature_peptide_region - -[Term] -id: SO:0100012 -name: peptide_coil -def: "Irregular, unstructured regions of a protein's backbone, as distinct from the regular region (namely alpha helix and beta strand - characterised by specific patterns of main-chain hydrogen bonds)." [EBIBS:GAR] -subset: biosapiens -synonym: "coil" RELATED BS [] -synonym: "peptide coil" EXACT [] -synonym: "random coil" RELATED BS [] -is_a: SO:0001078 ! polypeptide_secondary_structure - -[Term] -id: SO:0100013 -name: hydrophobic_region_of_peptide -def: "Hydrophobic regions are regions with a low affinity for water." [EBIBS:GAR] -comment: Range. -subset: biosapiens -synonym: "hydropathic" RELATED [] -synonym: "hydrophobic region of peptide" RELATED [] -synonym: "hydrophobic_region" EXACT [] -synonym: "hydrophobicity" RELATED [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:0100014 -name: n_terminal_region -def: "The amino-terminal positively-charged region of a signal peptide (approx 1-5 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "N-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100015 -name: c_terminal_region -def: "The more polar, carboxy-terminal region of the signal peptide (approx 3-7 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "C-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100016 -name: central_hydrophobic_region_of_signal_peptide -def: "The central, hydrophobic region of the signal peptide (approx 7-15 aa)." [EBIBS:GAR] -subset: biosapiens -synonym: "central hydrophobic region of signal peptide" EXACT [] -synonym: "central_hydrophobic_region" RELATED [] -synonym: "H-region" RELATED [] -is_a: SO:0100011 ! cleaved_peptide_region -relationship: part_of SO:0000418 ! signal_peptide - -[Term] -id: SO:0100017 -name: polypeptide_conserved_motif -def: "A conserved motif is a short (up to 20 amino acids) region of biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "motif" RELATED [] -is_a: SO:0001067 ! polypeptide_motif - -[Term] -id: SO:0100018 -name: polypeptide_binding_motif -def: "A polypeptide binding motif is a short (up to 20 amino acids) polypeptide region of biological interest that contains one or more amino acids experimentally shown to bind to a ligand." [EBIBS:GAR] -subset: biosapiens -synonym: "binding" RELATED [uniprot:feature_type] -synonym: "polypeptide binding motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100019 -name: polypeptide_catalytic_motif -def: "A polypeptide catalytic motif is a short (up to 20 amino acids) polypeptide region that contains one or more active site residues." [EBIBS:GAR] -subset: biosapiens -synonym: "catalytic_motif" RELATED [] -synonym: "polypeptide catalytic motif" EXACT [] -is_a: SO:0100001 ! biochemical_region_of_peptide - -[Term] -id: SO:0100020 -name: polypeptide_DNA_contact -def: "Residues involved in interactions with DNA." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide DNA contact" EXACT [] -is_a: SO:0000409 ! binding_site -is_a: SO:0100002 ! molecular_contact_region - -[Term] -id: SO:0100021 -name: polypeptide_conserved_region -def: "A subsection of sequence with biological interest that is conserved in different proteins. They may or may not have functional or structural significance within the proteins in which they are found." [EBIBS:GAR] -subset: biosapiens -synonym: "polypeptide conserved region" EXACT [] -is_a: SO:0000839 ! polypeptide_region - -[Term] -id: SO:1000002 -name: substitution -def: "Any change in genomic DNA caused by a single event." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000004 -name: partially_characterised_change_in_DNA_sequence -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "partially characterised change in DNA sequence" EXACT [] -is_a: SO:1000007 ! uncharacterised_change_in_nucleotide_sequence - -[Term] -id: SO:1000005 -name: complex_substitution -def: "When no simple or well defined DNA mutation event describes the observed DNA change, the keyword \"complex\" should be used. Usually there are multiple equally plausible explanations for the change." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -synonym: "complex substitution" EXACT [] -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000007 -name: uncharacterised_change_in_nucleotide_sequence -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "uncharacterised change in nucleotide sequence" EXACT [] -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000008 -name: point_mutation -def: "A single nucleotide change which has occurred at the same position of a corresponding nucleotide in a reference sequence." [SO:immuno_workshop] -subset: SOFA -synonym: "point mutation" EXACT [] -xref: http://en.wikipedia.org/wiki/Point_mutation "wiki" -is_a: SO:1000002 ! substitution - -[Term] -id: SO:1000009 -name: transition -def: "Change of a pyrimidine nucleotide, C or T, into an other pyrimidine nucleotide, or change of a purine nucleotide, A or G, into an other purine nucleotide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000010 -name: pyrimidine_transition -def: "A substitution of a pyrimidine, C or T, for another pyrimidine." [SO:ke] -synonym: "pyrimidine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000011 -name: C_to_T_transition -def: "A transition of a cytidine to a thymine." [SO:ke] -synonym: "C to T transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000012 -name: C_to_T_transition_at_pCpG_site -def: "The transition of cytidine to thymine occurring at a pCpG site as a consequence of the spontaneous deamination of 5'-methylcytidine." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "C to T transition at pCpG site" EXACT [] -is_a: SO:1000011 ! C_to_T_transition - -[Term] -id: SO:1000013 -name: T_to_C_transition -synonym: "T to C transition" EXACT [] -is_a: SO:1000010 ! pyrimidine_transition - -[Term] -id: SO:1000014 -name: purine_transition -def: "A substitution of a purine, A or G, for another purine." [SO:ke] -synonym: "purine transition" EXACT [] -is_a: SO:1000009 ! transition - -[Term] -id: SO:1000015 -name: A_to_G_transition -def: "A transition of an adenine to a guanine." [SO:ke] -synonym: "A to G transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000016 -name: G_to_A_transition -def: "A transition of a guanine to an adenine." [SO:ke] -synonym: "G to A transition" EXACT [] -is_a: SO:1000014 ! purine_transition - -[Term] -id: SO:1000017 -name: transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G, or vice versa." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Transversion "wiki" -is_a: SO:1000008 ! point_mutation - -[Term] -id: SO:1000018 -name: pyrimidine_to_purine_transversion -def: "Change of a pyrimidine nucleotide, C or T, into a purine nucleotide, A or G." [SO:ke] -synonym: "pyrimidine to purine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000019 -name: C_to_A_transversion -def: "A transversion from cytidine to adenine." [SO:ke] -synonym: "C to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000020 -name: C_to_G_transversion -synonym: "C to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000021 -name: T_to_A_transversion -def: "A transversion from T to A." [SO:ke] -synonym: "T to A transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000022 -name: T_to_G_transversion -def: "A transversion from T to G." [SO:ke] -synonym: "T to G transversion" EXACT [] -is_a: SO:1000018 ! pyrimidine_to_purine_transversion - -[Term] -id: SO:1000023 -name: purine_to_pyrimidine_transversion -def: "Change of a purine nucleotide, A or G , into a pyrimidine nucleotide C or T." [SO:ke] -synonym: "purine to pyrimidine transversion" EXACT [] -is_a: SO:1000017 ! transversion - -[Term] -id: SO:1000024 -name: A_to_C_transversion -def: "A transversion from adenine to cytidine." [SO:ke] -synonym: "A to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000025 -name: A_to_T_transversion -def: "A transversion from adenine to thymine." [SO:ke] -synonym: "A to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000026 -name: G_to_C_transversion -def: "A transversion from guanine to cytidine." [SO:ke] -synonym: "G to C transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000027 -name: G_to_T_transversion -def: "A transversion from guanine to thymine." [SO:ke] -synonym: "G to T transversion" EXACT [] -is_a: SO:1000023 ! purine_to_pyrimidine_transversion - -[Term] -id: SO:1000028 -name: intrachromosomal_mutation -synonym: "intrachromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000029 -name: chromosomal_deletion -def: "An incomplete chromosome." [SO:ke] -synonym: "(bacteria)&Dgr;" RELATED [] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(fungi)D" RELATED [] -synonym: "chromosomal deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_deletion "wiki" -is_a: SO:0000550 ! aneuploid_chromosome -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000030 -name: chromosomal_inversion -synonym: "(bacteria)IN" RELATED [] -synonym: "(Drosophila)In" RELATED [] -synonym: "(fungi)In" RELATED [] -synonym: "chromosomal inversion" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_inversion "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000031 -name: interchromosomal_mutation -synonym: "interchromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000032 -name: indel -def: "A hybrid term (insertion/deletion) to describe sequence length change when the direction of the change is unspecified." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -xref: http://en.wikipedia.org/wiki/Indel "wiki" -is_a: SO:0001059 ! sequence_alteration - -[Term] -id: SO:1000033 -name: nucleotide_deletion -def: "One or more continuous nucleotides are excised from the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide deletion" EXACT [] -xref: http://en.wikipedia.org/wiki/Nucleotide_deletion "wiki" -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000034 -name: nucleotide_insertion -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide insertion" EXACT [] -is_a: SO:1000032 ! indel - -[Term] -id: SO:1000035 -name: nucleotide_duplication -def: "One or more nucleotides are added between two adjacent nucleotides in the sequence; the inserted sequence derives from, or is identical in sequence to, nucleotides adjacent to insertion point." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "nucleotide duplication" EXACT [] -is_a: SO:1000034 ! nucleotide_insertion - -[Term] -id: SO:1000036 -name: inversion -def: "A continuous nucleotide sequence is inverted in the same position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -subset: SOFA -is_a: SO:0001059 ! sequence_alteration -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1000037 -name: chromosomal_duplication -def: "An extra chromosome." [SO:ke] -synonym: "(Drosophila)Dp" RELATED [] -synonym: "(fungi)Dp" RELATED [] -synonym: "chromosomal duplication" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_duplication "wiki" -is_a: SO:0000550 ! aneuploid_chromosome - -[Term] -id: SO:1000038 -name: intrachromosomal_duplication -synonym: "intrachromosomal duplication" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000039 -name: direct_tandem_duplication -synonym: "direct tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000040 -name: inverted_tandem_duplication -synonym: "inverted tandem duplication" EXACT [] -is_a: SO:1000173 ! tandem_duplication - -[Term] -id: SO:1000041 -name: intrachromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "intrachromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000042 -name: compound_chromosome -synonym: "compound chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000043 -name: Robertsonian_fusion -synonym: "Robertsonian fusion" EXACT [] -xref: http://en.wikipedia.org/wiki/Robertsonian_fusion "wiki" -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000044 -name: chromosomal_translocation -synonym: "(Drosophila)T" RELATED [] -synonym: "(fungi)T" RELATED [] -synonym: "chromosomal translocation" EXACT [] -xref: http://en.wikipedia.org/wiki/Chromosomal_translocation "wiki" -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000045 -name: ring_chromosome -synonym: "(Drosophila)R" RELATED [] -synonym: "(fungi)C" RELATED [] -synonym: "ring chromosome" EXACT [] -xref: http://en.wikipedia.org/wiki/Ring_chromosome "wiki" -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000046 -name: pericentric_inversion -synonym: "pericentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000047 -name: paracentric_inversion -synonym: "paracentric inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000048 -name: reciprocal_chromosomal_translocation -synonym: "reciprocal chromosomal translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000049 -name: sequence_variation_affecting_transcript -def: "Any change in mature, spliced and processed, RNA that results from a change in the corresponding DNA sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript" EXACT [] -synonym: "sequence variation affecting transcript" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000050 -name: sequence_variant_causing_no_change_in_transcript -def: "No effect on the state of the RNA." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change in transcript" RELATED [] -synonym: "sequence variant causing no change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000052 -name: sequence_variation_affecting_complex_change_in_transcript -synonym: "mutation affecting complex change in transcript" EXACT [] -synonym: "sequence variation affecting complex change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000054 -name: sequence_variation_affecting_coding_sequence -def: "Any of the amino acid coding triplets of a gene are affected by the DNA mutation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting coding sequence" EXACT [] -synonym: "sequence variation affecting coding sequence" RELATED [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000055 -name: sequence_variant_causing_initiator_codon_change_in_transcript -def: "The DNA mutation changes, usually destroys, the first coding triplet of a gene. Usually prevents translation although another initiator codon may be used." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing initiator codon change in transcript" RELATED [] -synonym: "sequence variant causing initiator codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000056 -name: sequence_variant_causing_amino_acid_coding_codon_change_in_transcript -def: "The DNA mutation affects the amino acid coding sequence of a gene; this region includes both the initiator and terminator codons." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutaton causing amino acid coding codon change in transcript" RELATED [] -synonym: "sequence variant causing amino acid coding codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000057 -name: sequence_variant_causing_synonymous_codon_change_in_transcript -def: "The changed codon has the same translation product as the original codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing synonymous codon change in transcript" RELATED [] -synonym: "sequence variant causing synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000058 -name: sequence_variant_causing_non_synonymous_codon_change_in_transcript -def: "A DNA point mutation that causes a substitution of an amino acid by an other." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing non synonymous codon change in transcript" RELATED [] -synonym: "non-synonymous codon change in transcript" EXACT [] -synonym: "sequence variant causing non synonymous codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000059 -name: sequence_variant_causing_missense_codon_change_in_transcript -def: "The nucleotide change in the codon leads to a new codon coding for a new amino acid." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing missense codon change in transcript" RELATED [] -synonym: "sequence variant causing missense codon change in transcript" EXACT [] -is_a: SO:1000058 ! sequence_variant_causing_non_synonymous_codon_change_in_transcript - -[Term] -id: SO:1000060 -name: sequence_variant_causing_conservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change does not change the gross properties (size, charge, hydrophobicity) of the amino acid at that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing conservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing conservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000061 -name: sequence_variant_causing_nonconservative_missense_codon_change_in_transcript -def: "The amino acid change following from the codon change changes the gross properties (size, charge, hydrophobicity) of the amino acid in that position." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The exact rules need to be stated, a common set of rules can be derived from e.g. BLOSUM62 amino acid distance matrix. -synonym: "mutation causing nonconservative missense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonconservative missense codon change in transcript" EXACT [] -is_a: SO:1000059 ! sequence_variant_causing_missense_codon_change_in_transcript - -[Term] -id: SO:1000062 -name: sequence_variant_causing_nonsense_codon_change_in_transcript -def: "The nucleotide change in the codon triplet creates a terminator codon." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing nonsense codon change in transcript" RELATED [] -synonym: "sequence variant causing nonsense codon change in transcript" EXACT [] -is_a: SO:1000056 ! sequence_variant_causing_amino_acid_coding_codon_change_in_transcript - -[Term] -id: SO:1000063 -name: sequence_variant_causing_terminator_codon_change_in_transcript -def: "The nucleotide change in the codon triplet changes the stop codon, causing an elongated transcript sequence." [SO:ke] -synonym: "mutation causing terminator codon change in transcript" RELATED [] -synonym: "sequence variant causing terminator codon change in transcript" EXACT [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000064 -name: sequence_variation_affecting_reading_frame -def: "An umbrella term for terms describing an effect of a sequence variation on the frame of translation." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting reading frame" EXACT [] -synonym: "sequence variation affecting reading frame" RELATED [] -is_a: SO:1000054 ! sequence_variation_affecting_coding_sequence - -[Term] -id: SO:1000065 -name: frameshift_sequence_variation -def: "A mutation causing a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three." [SO:ke] -synonym: "frameshift mutation" EXACT [] -synonym: "frameshift sequence variation" RELATED [] -synonym: "out of frame mutation" RELATED [] -xref: http://en.wikipedia.org/wiki/Frameshift_mutation "wiki" -is_a: SO:1000064 ! sequence_variation_affecting_reading_frame - -[Term] -id: SO:1000066 -name: sequence_variant_causing_plus_1_frameshift_mutation -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of a nucleotide." [SO:ke] -synonym: "plus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 1 frameshift mutation" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000067 -name: sequence_variant_causing_minus_1_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of a nucleotide." [SO:ke] -synonym: "minus 1 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 1 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000068 -name: sequence_variant_causing_plus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the insertion of two nucleotides." [SO:ke] -synonym: "plus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing plus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000069 -name: sequence_variant_causing_minus_2_frameshift -def: "A mutation causing a disruption of the translational reading frame, due to the deletion of two nucleotides." [SO:ke] -synonym: "minus 2 frameshift mutation" EXACT [] -synonym: "sequence variant causing minus 2 frameshift" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000070 -name: sequence_variant_affecting_transcript_processing -def: "Sequence variant affects the way in which the primary transcriptional product is processed to form the mature transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript processing" RELATED [] -synonym: "sequence variant affecting transcript processing" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000071 -name: sequence_variant_affecting_splicing -def: "A sequence_variant_effect where the way in which the primary transcriptional product is processed to form the mature transcript, specifically by the removal (splicing) of intron sequences is changed." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splicing" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000072 -name: sequence_variant_affecting_splice_donor -def: "A sequence_variant_effect that changes the splice donor sequence." [SO:ke] -synonym: "mutation affecting splice donor" RELATED [] -synonym: "sequence variant affecting splice donor" RELATED [] -synonym: "splice donor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000073 -name: sequence_variant_affecting_splice_acceptor -def: "A sequence_variant_effect that changes the splice acceptor sequence." [SO:ke] -synonym: "mutation affecting splicing" RELATED [] -synonym: "sequence variant affecting splice acceptor" RELATED [] -synonym: "splice acceptor mutation" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000074 -name: sequence_variant_causing_cryptic_splice_activation -def: "A sequence variant causing a new (functional) splice site." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: A cryptic splice site is only used when the natural splice site has been disrupted by a sequence alteration. -synonym: "cryptic splice activator sequence variant" EXACT [] -synonym: "mutation causing cryptic splice activator" RELATED [] -synonym: "sequence variant causing cryptic splice activator" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000075 -name: sequence_variant_affecting_editing -def: "Sequence variant affects the editing of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting editing" RELATED [] -synonym: "sequence variant affecting editing" EXACT [] -is_a: SO:1000070 ! sequence_variant_affecting_transcript_processing - -[Term] -id: SO:1000076 -name: sequence_variant_affecting_transcription -def: "Mutation affects the process of transcription, its initiation, progression or termination." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcription" RELATED [] -synonym: "sequence variant affecting transcription" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000078 -name: sequence_variant_decreasing_rate_of_transcription -def: "A sequence variation that decreases the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation decreasing rate of transcription" RELATED [] -synonym: "sequence variation decreasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000079 -name: sequence_variation_affecting_transcript_sequence -synonym: "mutation affecting transcript sequence" EXACT [] -synonym: "sequence variation affecting transcript sequence" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000080 -name: sequence_variant_increasing_rate_of_transcription -synonym: "mutation increasing rate of transcription" RELATED [] -synonym: "sequence variation increasing rate of transcription" EXACT [] -is_a: SO:1000081 ! sequence_variant_affecting_rate_of_transcription - -[Term] -id: SO:1000081 -name: sequence_variant_affecting_rate_of_transcription -def: "A mutation that alters the rate a which transcription of the sequence occurs." [SO:ke] -synonym: "mutation affecting rate of transcription" RELATED [] -synonym: "sequence variant affecting rate of transcription" EXACT [] -is_a: SO:1000076 ! sequence_variant_affecting_transcription - -[Term] -id: SO:1000082 -name: sequence variant_affecting_transcript_stability -def: "Sequence variant affects the stability of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting transcript stability" RELATED [] -synonym: "sequence variant affecting transcript stability" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000083 -name: sequence_variant_increasing_transcript_stability -def: "Sequence variant increases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation increasing transcript stability" RELATED [] -synonym: "sequence variant increasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000084 -name: sequence_variant_decreasing_transcript_stability -def: "Sequence variant decreases the stability (half-life) of the transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation decreasing transcript stability" RELATED [] -synonym: "sequence variant decreasing transcript stability" EXACT [] -is_a: SO:1000082 ! sequence variant_affecting_transcript_stability - -[Term] -id: SO:1000085 -name: sequence_variation_affecting_level_of_transcript -def: "A sequence variation that causes a change in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation affecting level of transcript" RELATED [] -synonym: "sequence variation affecting level of transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000086 -name: sequence_variation_decreasing_level_of_transcript -def: "A sequence variation that causes a decrease in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation decreasing level of transcript" EXACT [] -synonym: "sequence variation decreasing level of transcript" RELATED [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000087 -name: sequence_variation_increasing_level_of_transcript -def: "A sequence_variation that causes an increase in the level of mature, spliced and processed RNA, resulting from a change in the corresponding DNA sequence." [SO:ke] -synonym: "mutation increasing level of transcript" EXACT [] -synonym: "sequence variation increasing level of transcript" EXACT [] -is_a: SO:1000085 ! sequence_variation_affecting_level_of_transcript - -[Term] -id: SO:1000088 -name: sequence_variant_affecting_translational_product -def: "Mutation causes a change in primary translation product of a transcript." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation affecting translational product" RELATED [] -synonym: "sequence variant affecting translational product" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000089 -name: sequence_variant_causing_no_change_of_translational_product -def: "The sequence variant at RNA level does not lead to any change in polypeptide." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing no change of translational product" RELATED [] -synonym: "sequence variant causing no change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000090 -name: sequence_variant_causing_uncharacterised_change_of_translational_product -def: "A sequence variant causing an uncharacterized change of translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change of translational product" RELATED [] -synonym: "sequence variant causing uncharacterised change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000091 -name: sequence_variant_causing_partially_characterised_change_of_translational_product -def: "A sequence variant causing a partially uncharacterised change in translational product." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -comment: The nature of the mutation event is only partially characterised. -synonym: "mutation causing partially characterised change of translational product" RELATED [] -synonym: "sequence variant causing partially characterised change of translational product" EXACT [] -is_a: SO:1000090 ! sequence_variant_causing_uncharacterised_change_of_translational_product - -[Term] -id: SO:1000092 -name: sequence_variant_causing_complex_change_of_translational_product -def: "Any sequence variant effect that is known at nucleotide level but cannot be explained by using other key terms." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing complex change of translational product" RELATED [] -synonym: "sequence variant causing complex change of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000093 -name: sequence_variant_causing_amino_acid_substitution -def: "The replacement of a single amino acid by another." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid substitution" RELATED [] -synonym: "sequence variant causing amino acid substitution" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000094 -name: sequence_variant_causing_conservative_amino_acid_substitution -synonym: "mutation causing conservative amino acid substitution" RELATED [] -synonym: "sequence variant causing conservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000095 -name: sequence_variant_causing_nonconservative_amino_acid_substitution -synonym: "mutation causing nonconservative amino acid substitution" RELATED [] -synonym: "sequence variant causing nonconservative amino acid substitution" EXACT [] -is_a: SO:1000093 ! sequence_variant_causing_amino_acid_substitution - -[Term] -id: SO:1000096 -name: sequence_variant_causing_amino_acid_insertion -def: "The insertion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid insertion" RELATED [] -synonym: "sequence variant causing amino acid insertion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000097 -name: sequence_variant_causing_amino_acid_deletion -def: "The deletion of one or more amino acids from the polypeptide, without affecting the surrounding sequence." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing amino acid deletion" RELATED [] -synonym: "sequence variant causing amino acid deletion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000098 -name: sequence_variant_causing_polypeptide_truncation -def: "The translational product is truncated at its C-terminus, usually a result of a nonsense codon change in transcript (SO:1000062)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide truncation" RELATED [] -synonym: "sequence variant causing polypeptide truncation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000099 -name: sequence_variant_causing_polypeptide_elongation -def: "The extension of the translational product at either (or both) the N-terminus and/or the C-terminus." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide elongation" RELATED [] -synonym: "sequence variant causing polypeptide elongation" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000100 -name: mutation_causing_polypeptide_N_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide N terminal elongation" EXACT [] -synonym: "polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000101 -name: mutation_causing_polypeptide_C_terminal_elongation -def: "." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing polypeptide C terminal elongation" EXACT [] -synonym: "polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000099 ! sequence_variant_causing_polypeptide_elongation - -[Term] -id: SO:1000102 -name: sequence_variant_affecting_level_of_translational_product -synonym: "mutation affecting level of translational product" RELATED [] -synonym: "sequence variant affecting level of translational product" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000103 -name: sequence_variant_decreasing_level_of_translation_product -synonym: "mutationdecreasing level of translation product" RELATED [] -synonym: "sequence variant decreasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000104 -name: sequence_variant_increasing_level_of_translation_product -synonym: "mutationt increasing level of translation product" RELATED [] -synonym: "sequence variant increasing level of translation product" EXACT [] -is_a: SO:1000102 ! sequence_variant_affecting_level_of_translational_product - -[Term] -id: SO:1000105 -name: sequence_variant_affecting_polypeptide_amino_acid_sequence -synonym: "mutation affecting polypeptide amino acid sequence" RELATED [] -synonym: "sequence variant affecting polypeptide amino acid sequence" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000106 -name: mutation_causing_inframe_polypeptide_N_terminal_elongation -synonym: "inframe polypeptide N-terminal elongation" EXACT [] -synonym: "mutation causing inframe polypeptide N terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000107 -name: mutation_causing_out_of_frame_polypeptide_N_terminal_elongation -synonym: "mutation causing out of frame polypeptide N terminal elongation" EXACT [] -synonym: "out of frame polypeptide N-terminal elongation" EXACT [] -is_a: SO:1000100 ! mutation_causing_polypeptide_N_terminal_elongation - -[Term] -id: SO:1000108 -name: mutaton_causing_inframe_polypeptide_C_terminal_elongation -synonym: "inframe_polypeptide C-terminal elongation" EXACT [] -synonym: "mutaton causing inframe polypeptide C terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000109 -name: mutation_causing_out_of_frame_polypeptide_C_terminal_elongation -synonym: "mutation causing out of frame polypeptide C terminal elongation" EXACT [] -synonym: "out of frame polypeptide C-terminal elongation" EXACT [] -is_a: SO:1000101 ! mutation_causing_polypeptide_C_terminal_elongation - -[Term] -id: SO:1000110 -name: frame_restoring_sequence_variant -def: "A mutation that reverts the sequence of a previous frameshift mutation back to the initial frame." [SO:ke] -synonym: "frame restoring mutation" EXACT [] -synonym: "frame restoring sequence variant" EXACT [] -is_a: SO:1000065 ! frameshift_sequence_variation - -[Term] -id: SO:1000111 -name: sequence_variant_affecting_3D_structure_of_polypeptide -def: "A mutation that changes the amino acid sequence of the peptide in such a way that it changes the 3D structure of the molecule." [SO:ke] -synonym: "mutation affecting 3D structure of polypeptide" RELATED [] -synonym: "sequence variant affecting 3D structure of polypeptide" EXACT [] -synonym: "sequence variant affecting 3D-structure of polypeptide" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000112 -name: sequence_variant_causing_no_3D_structural_change -synonym: "mutation causing no 3D structural change" RELATED [] -synonym: "sequence variant causing no 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000113 -name: sequence_variant_causing_uncharacterised_3D_structural_change -synonym: "mutation causing uncharacterised 3D structural change" RELATED [] -synonym: "sequence variant causing uncharacterised 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000114 -name: sequence_variant_causing_partially_characterised_3D_structural_change -synonym: "mutation causing partially characterised 3D structural change" RELATED [] -synonym: "sequence variant causing partially characterised 3D structural change" EXACT [] -is_a: SO:1000113 ! sequence_variant_causing_uncharacterised_3D_structural_change - -[Term] -id: SO:1000115 -name: sequence_variant_causing_complex_3D_structural_change -synonym: "mutation causing complex 3D structural change" RELATED [] -synonym: "sequence variant causing complex 3D structural change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000116 -name: sequence_variant_causing_conformational_change -synonym: "mutation causing conformational change" RELATED [] -synonym: "sequence variant causing conformational change" EXACT [] -is_a: SO:1000111 ! sequence_variant_affecting_3D_structure_of_polypeptide - -[Term] -id: SO:1000117 -name: sequence_variant_affecting_polypeptide_function -synonym: "mutation affecting polypeptide function" RELATED [] -synonym: "sequence variant affecting polypeptide function" EXACT [] -is_a: SO:1000088 ! sequence_variant_affecting_translational_product - -[Term] -id: SO:1000118 -name: sequence_variant_causing_loss_of_function_of_polypeptide -synonym: "loss of function of polypeptide" RELATED [] -synonym: "mutation causing loss of function of polypeptide" RELATED [] -synonym: "sequence variant causing loss of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000119 -name: sequence_variant_causing_inactive_ligand_binding_site -synonym: "mutation causing inactive ligand binding site" RELATED [] -synonym: "sequence variant causing inactive ligand binding site" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000120 -name: sequence_variant_causing_inactive_catalytic_site -synonym: "mutation causing inactive catalytic site" RELATED [] -synonym: "sequence variant causing inactive catalytic site" EXACT [] -is_a: SO:1000119 ! sequence_variant_causing_inactive_ligand_binding_site - -[Term] -id: SO:1000121 -name: sequence_variant_causing_polypeptide_localization_change -synonym: "mutation causing polypeptide localization change" RELATED [] -synonym: "sequence variant causing polypeptide localization change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000122 -name: sequence_variant_causing_polypeptide_post_translational_processing_change -synonym: "mutation causing polypeptide post translational processing change" RELATED [] -synonym: "polypeptide post-translational processing affected" EXACT [] -synonym: "sequence variant causing polypeptide post translational processing change" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000123 -name: polypeptide_post_translational_processing_affected -synonym: "polypeptide_post-translational_processing_affected" RELATED [] -is_obsolete: true - -[Term] -id: SO:1000124 -name: sequence_variant_causing_partial_loss_of_function_of_polypeptide -synonym: "mutation causing partial loss of function of polypeptide" RELATED [] -synonym: "partial loss of function of polypeptide" EXACT [] -synonym: "sequence variant causing partial loss of function of polypeptide" EXACT [] -is_a: SO:1000118 ! sequence_variant_causing_loss_of_function_of_polypeptide - -[Term] -id: SO:1000125 -name: sequence_variant_causing_gain_of_function_of_polypeptide -synonym: "gain of function of polypeptide" EXACT [] -synonym: "mutation causing gain of function of polypeptide" RELATED [] -synonym: "sequence variant causing gain of function of polypeptide" EXACT [] -is_a: SO:1000117 ! sequence_variant_affecting_polypeptide_function - -[Term] -id: SO:1000126 -name: sequence_variant_affecting_transcript_secondary_structure -def: "A sequence variant that affects the secondary structure (folding) of the RNA transcript molecule." [SO:ke] -synonym: "mutation affecting transcript secondary structure" RELATED [] -synonym: "sequence variant affecting transcript secondary structure" EXACT [] -is_a: SO:1000079 ! sequence_variation_affecting_transcript_sequence - -[Term] -id: SO:1000127 -name: sequence_variant_causing_compensatory_transcript_secondary_structure_mutation -synonym: "mutation causing compensatory transcript secondary structure mutation" RELATED [] -synonym: "sequence variant causing compensatory transcript secondary structure mutation" EXACT [] -is_a: SO:1000126 ! sequence_variant_affecting_transcript_secondary_structure - -[Term] -id: SO:1000132 -name: sequence_variant_effect -def: "The effect of a change in nucleotide sequence." [SO:ke] -comment: Updated after discussion with Peter Taschner - Feb 09. -synonym: "sequence variant effect" RELATED [] -disjoint_from: SO:0000240 ! chromosome_variation -disjoint_from: SO:0000400 ! sequence_attribute - -[Term] -id: SO:1000134 -name: sequence_variant_causing_polypeptide_fusion -synonym: "mutation causing polypeptide fusion" RELATED [] -synonym: "sequence variant causing polypeptide fusion" EXACT [] -is_a: SO:1000105 ! sequence_variant_affecting_polypeptide_amino_acid_sequence - -[Term] -id: SO:1000136 -name: autosynaptic_chromosome -synonym: "(Drosophila)A" RELATED [] -synonym: "autosynaptic chromosome" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000138 -name: homo_compound_chromosome -synonym: "homo compound chromosome" EXACT [] -synonym: "homo-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000140 -name: hetero_compound_chromosome -synonym: "hetero compound chromosome" EXACT [] -synonym: "hetero-compound chromosome" EXACT [] -is_a: SO:1000042 ! compound_chromosome - -[Term] -id: SO:1000141 -name: chromosome_fission -synonym: "chromosome fission" EXACT [] -is_a: SO:1000028 ! intrachromosomal_mutation - -[Term] -id: SO:1000142 -name: dexstrosynaptic_chromosome -synonym: "dexstrosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000143 -name: laevosynaptic_chromosome -synonym: "laevosynaptic chromosome" EXACT [] -is_a: SO:1000136 ! autosynaptic_chromosome - -[Term] -id: SO:1000144 -name: free_duplication -synonym: "free duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000145 -name: free_ring_duplication -synonym: "(Drosophila)R" RELATED [] -synonym: "free ring duplication" EXACT [] -is_a: SO:1000045 ! ring_chromosome -is_a: SO:1000144 ! free_duplication - -[Term] -id: SO:1000146 -name: complex_chromosomal_mutation -synonym: "complex chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000147 -name: deficient_translocation -def: "A translocation in which one of the four broken ends loses a segment before re-joining." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfT" RELATED [] -synonym: "deficient translocation" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000148 -name: inversion_cum_translocation -def: "The first two breaks are in the same chromosome, and the region between them is rejoined in inverted order to the other side of the first break, such that both sides of break one are present on the same chromosome. The remaining free ends are joined as a translocation with those resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)InT" RELATED [] -synonym: "(Drosophila)T" RELATED [] -synonym: "inversion cum translocation" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000149 -name: bipartite_duplication -def: "The (large) region between the first two breaks listed is lost, and the two flanking segments (one of them centric) are joined as a translocation to the free ends resulting from the third break." [FB:reference_manual] -synonym: "(Drosophila)bDp" RELATED [] -synonym: "bipartite duplication" EXACT [] -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000150 -name: cyclic_translocation -def: "Three breaks in three different chromosomes. The centric segment resulting from the first break listed is joined to the acentric segment resulting from the second, rather than the third." [FB:reference_manual] -synonym: "cyclic translocation" EXACT [] -is_a: SO:1000044 ! chromosomal_translocation - -[Term] -id: SO:1000151 -name: bipartite_inversion -def: "Three breaks in the same chromosome; both central segments are inverted in place (i.e., they are not transposed)." [FB:reference_manual] -synonym: "(Drosophila)bIn" RELATED [] -synonym: "bipartite inversion" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000152 -name: uninverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eDp" RELATED [] -synonym: "uninverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000153 -name: inverted_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iDp" RELATED [] -synonym: "inverted insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000154 -name: insertional_duplication -def: "A chromosome duplication involving the insertion of a duplicated region." [SO:ke] -synonym: "(Drosophila)Dpp" RELATED [] -synonym: "insertional duplication" EXACT [] -is_a: SO:1000037 ! chromosomal_duplication - -[Term] -id: SO:1000155 -name: interchromosomal_transposition -synonym: "(Drosophila)Tp" RELATED [] -synonym: "interchromosomal transposition" EXACT [] -is_a: SO:0000453 ! transposition -is_a: SO:1000031 ! interchromosomal_mutation - -[Term] -id: SO:1000156 -name: inverted_interchromosomal_transposition -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000157 -name: uninverted_interchromosomal_transposition -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000158 -name: inverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically inverted orientation with respect to its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)iTp" RELATED [] -synonym: "inverted intrachromosomal transposition" EXACT [] -is_a: SO:1000030 ! chromosomal_inversion -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000159 -name: uninverted_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the insertion is in cytologically the same orientation as its flanking segments." [FB:reference_manual] -synonym: "(Drosophila)eTp" RELATED [] -synonym: "uninverted intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000160 -name: unoriented_insertional_duplication -def: "A copy of the segment between the first two breaks listed is inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uDp" RELATED [] -synonym: "unoriented insertional duplication" EXACT [] -is_a: SO:1000154 ! insertional_duplication - -[Term] -id: SO:1000161 -name: unorientated_interchromosomal_transposition -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated interchromosomal transposition" EXACT [] -is_a: SO:1000155 ! interchromosomal_transposition - -[Term] -id: SO:1000162 -name: unorientated_intrachromosomal_transposition -def: "The segment between the first two breaks listed is removed and inserted at the third break; the orientation of the insertion with respect to its flanking segments is not recorded." [FB:reference_manual] -synonym: "(Drosophila)uTp" RELATED [] -synonym: "unorientated intrachromosomal transposition" EXACT [] -is_a: SO:1000041 ! intrachromosomal_transposition - -[Term] -id: SO:1000170 -name: uncharacterised_chromosomal_mutation -synonym: "uncharacterised chromosomal mutation" EXACT [] -is_a: SO:1000183 ! chromosome_structure_variation - -[Term] -id: SO:1000171 -name: deficient_inversion -def: "Three breaks in the same chromosome; one central region lost, the other inverted." [FB:reference_manual] -synonym: "(Drosophila)Df" RELATED [] -synonym: "(Drosophila)DfIn" RELATED [] -synonym: "deficient inversion" EXACT [] -is_a: SO:1000029 ! chromosomal_deletion -is_a: SO:1000030 ! chromosomal_inversion - -[Term] -id: SO:1000173 -name: tandem_duplication -synonym: "tandem duplication" EXACT [] -is_a: SO:1000038 ! intrachromosomal_duplication - -[Term] -id: SO:1000175 -name: partially_characterised_chromosomal_mutation -synonym: "partially characterised chromosomal mutation" EXACT [] -is_a: SO:1000170 ! uncharacterised_chromosomal_mutation - -[Term] -id: SO:1000177 -name: sequence_variant_causing_uncharacterised_change_in_transcript -def: "The nature of the mutation event is either uncharacterised or only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing uncharacterised change in transcript" RELATED [] -synonym: "sequence variant causing uncharacterised change in transcript" EXACT [] -is_a: SO:1000049 ! sequence_variation_affecting_transcript - -[Term] -id: SO:1000179 -name: sequence_variant_causing_partially_characterised_change_in_transcript -def: "The nature of the mutation event is only partially characterised." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causing partially characterised change in transcript" RELATED [] -synonym: "sequence variant causing partially characterised change in transcript" EXACT [] -is_a: SO:1000177 ! sequence_variant_causing_uncharacterised_change_in_transcript - -[Term] -id: SO:1000180 -name: sequence_variant_affecting_gene_structure -def: "A sequence_variant_effect that changes the gene structure." [SO:ke] -synonym: "mutation affecting gene structure" RELATED [] -synonym: "sequence variant affecting gene structure" EXACT [] -is_a: SO:1000132 ! sequence_variant_effect - -[Term] -id: SO:1000181 -name: sequence_variant_causing_gene_fusion -def: "A sequence_variant_effect that changes the gene structure by causing a fusion to another gene." [SO:ke] -synonym: "mutation causing gene fusion" RELATED [] -synonym: "sequence variant causing gene fusion" EXACT [] -is_a: SO:1000180 ! sequence_variant_affecting_gene_structure - -[Term] -id: SO:1000182 -name: chromosome_number_variation -def: "A kind of chromosome variation where the chromosome complement is not an exact multiple of the haploid number." [SO:ke] -synonym: "chromosome number variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000183 -name: chromosome_structure_variation -synonym: "chromosome structure variation" EXACT [] -is_a: SO:0000240 ! chromosome_variation - -[Term] -id: SO:1000184 -name: sequence_variant_causes_exon_loss -def: "A sequence variant affecting splicing and causes an exon loss." [SO:ke] -synonym: "mutation causes exon loss" RELATED [] -synonym: "sequence variant causes exon loss" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000185 -name: sequence_variant_causes_intron_gain -def: "A sequence variant effect, causing an intron to be gained by the processed transcript; usually a result of a donor acceptor mutation (SO:1000072)." [http://www.ebi.ac.uk/mutations/recommendations/mutevent.html] -synonym: "mutation causes intron gain" RELATED [] -synonym: "sequence variant causes intron gain" EXACT [] -is_a: SO:1000071 ! sequence_variant_affecting_splicing - -[Term] -id: SO:1000186 -name: sequence_variant_causing_cryptic_splice_donor_activation -synonym: "sequence variant causing cryptic splice donor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001186 -name: sequence_variant_causing_cryptic_splice_acceptor_activation -synonym: "sequence variant causing cryptic splice acceptor activation" EXACT [] -is_a: SO:1000074 ! sequence_variant_causing_cryptic_splice_activation - -[Term] -id: SO:1001187 -name: alternatively_spliced_transcript -def: "A transcript that is alternatively spliced." [SO:xp] -synonym: "alternatively spliced transcript" EXACT [] -intersection_of: SO:0000673 ! transcript -intersection_of: has_quality SO:0000877 ! alternatively_spliced - -[Term] -id: SO:1001188 -name: encodes_1_polypeptide -def: "A gene that is alternately spliced, but encodes only one polypeptide." [SO:ke] -synonym: "encodes 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001189 -name: encodes_greater_than_1_polypeptide -def: "A gene that is alternately spliced, and encodes more than one polypeptide." [SO:ke] -synonym: "encodes greater than 1 polypeptide" EXACT [] -is_a: SO:0000463 ! encodes_alternately_spliced_transcripts - -[Term] -id: SO:1001190 -name: encodes_different_polypeptides_different_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different stop codons." [SO:ke] -synonym: "encodes different polypeptides different stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001191 -name: encodes_overlapping_peptides_different_start -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start codons." [SO:ke] -synonym: "encodes overlapping peptides different start" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001192 -name: encodes_disjoint_polypeptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that do not have overlapping peptide sequences." [SO:ke] -synonym: "encodes disjoint polypeptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001193 -name: encodes_overlapping_polypeptides_different_start_and_stop -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences, but use different start and stop codons." [SO:ke] -synonym: "encodes overlapping polypeptides different start and stop" EXACT [] -is_a: SO:1001195 ! encodes_overlapping_peptides - -[Term] -id: SO:1001194 -name: alternatively_spliced_gene_encoding_greater_than_1_polypeptide_coding_regions_overlapping -is_obsolete: true - -[Term] -id: SO:1001195 -name: encodes_overlapping_peptides -def: "A gene that is alternately spliced, and encodes more than one polypeptide, that have overlapping peptide sequences." [SO:ke] -synonym: "encodes overlapping peptides" EXACT [] -is_a: SO:1001189 ! encodes_greater_than_1_polypeptide - -[Term] -id: SO:1001196 -name: cryptogene -def: "A maxicircle gene so extensively edited that it cannot be matched to its edited mRNA sequence." [SO:ma] -intersection_of: SO:0000654 ! maxicircle_gene -intersection_of: has_quality SO:0000976 ! cryptic - -[Term] -id: SO:1001197 -name: dicistronic_primary_transcript -def: "A primary transcript that has the quality dicistronic." [SO:xp] -synonym: "dicistronic primary transcript" EXACT [] -intersection_of: SO:0000185 ! primary_transcript -intersection_of: has_quality SO:0000879 ! dicistronic - -[Term] -id: SO:1001217 -name: member_of_regulon -synonym: "member of regulon" EXACT [] -is_a: SO:0000081 ! gene_array_member - -[Term] -id: SO:1001244 -name: alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non_overlapping -synonym: "alternatively_spliced_transcript_encoding_greater_than_1_polypeptide_different_start_codon_different_stop_codon_coding_regions_non-overlapping" RELATED [] -is_obsolete: true - -[Term] -id: SO:1001246 -name: CDS_independently_known -def: "A CDS with the evidence status of being independently known." [SO:xp] -synonym: "CDS independently known" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000906 ! independently_known - -[Term] -id: SO:1001247 -name: orphan_CDS -def: "A CDS whose predicted amino acid sequence is unsupported by any experimental evidence or by any match with any other known sequence." [SO:ma] -synonym: "orphan CDS" EXACT [] -intersection_of: SO:1001254 ! CDS_predicted -intersection_of: has_origin SO:0000910 ! orphan - -[Term] -id: SO:1001249 -name: CDS_supported_by_domain_match_data -def: "A CDS that is supported by domain similarity." [SO:xp] -synonym: "CDS supported by domain match data" EXACT [] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000908 ! supported_by_domain_match - -[Term] -id: SO:1001251 -name: CDS_supported_by_sequence_similarity_data -def: "A CDS that is supported by sequence similarity data." [SO:xp] -synonym: "CDS supported by sequence similarity data" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000907 ! supported_by_sequence_similarity - -[Term] -id: SO:1001254 -name: CDS_predicted -def: "A CDS that is predicted." [SO:ke] -synonym: "CDS predicted" EXACT [] -intersection_of: SO:0000316 ! CDS -intersection_of: has_quality SO:0000732 ! predicted - -[Term] -id: SO:1001255 -name: status_of_coding_sequence -is_obsolete: true - -[Term] -id: SO:1001259 -name: CDS_supported_by_EST_or_cDNA_data -def: "A CDS that is supported by similarity to EST or cDNA data." [SO:xp] -synonym: "CDS supported by EST or cDNA data" EXACT [] -intersection_of: SO:1001251 ! CDS_supported_by_sequence_similarity_data -intersection_of: has_quality SO:0000909 ! supported_by_EST_or_cDNA - -[Term] -id: SO:1001260 -name: internal_Shine_Dalgarno_sequence -def: "A Shine-Dalgarno sequence that stimulates recoding through interactions with the anti-Shine-Dalgarno in the RNA of small ribosomal subunits of translating ribosomes. The signal is only operative in Bacteria." [PMID:12519954, SO:ke] -synonym: "internal Shine Dalgarno sequence" EXACT [] -synonym: "internal Shine-Dalgarno sequence" EXACT [] -is_a: SO:0000243 ! internal_ribosome_entry_site -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001261 -name: recoded_mRNA -def: "The sequence of a mature mRNA transcript, modified before translation or during translation, usually by special cis-acting signals." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "recoded mRNA" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000881 ! recoded - -[Term] -id: SO:1001262 -name: minus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of -1." [SO:ke] -synonym: "minus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001263 -name: plus_1_translationally_frameshifted -def: "An attribute describing a translational frameshift of +1." [SO:ke] -synonym: "plus 1 translationally frameshifted" EXACT [] -is_a: SO:0000887 ! translationally_frameshifted - -[Term] -id: SO:1001264 -name: mRNA_recoded_by_translational_bypass -def: "A recoded_mRNA where translation was suspended at a particular codon and resumed at a particular non-overlapping downstream codon." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8811194&dopt=Abstract] -synonym: "mRNA recoded by translational bypass" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000886 ! recoded_by_translational_bypass - -[Term] -id: SO:1001265 -name: mRNA_recoded_by_codon_redefinition -def: "A recoded_mRNA that was modified by an alteration of codon meaning." [SO:ma] -synonym: "mRNA recoded by codon redefinition" EXACT [] -intersection_of: SO:0000234 ! mRNA -intersection_of: has_quality SO:0000882 ! codon_redefined - -[Term] -id: SO:1001266 -name: stop_codon_redefinition_as_selenocysteine -is_obsolete: true - -[Term] -id: SO:1001267 -name: stop_codon_readthrough -is_obsolete: true - -[Term] -id: SO:1001268 -name: recoding_stimulatory_region -def: "A site in an mRNA sequence that stimulates the recoding of a region in the same mRNA." [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=12519954&dopt=Abstract] -synonym: "recoding stimulatory region" EXACT [] -synonym: "recoding stimulatory signal" EXACT [] -is_a: SO:0000836 ! mRNA_region - -[Term] -id: SO:1001269 -name: four_bp_start_codon -def: "A non-canonical start codon with 4 base pairs." [SO:ke] -synonym: "4bp start codon" EXACT [] -synonym: "four bp start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001270 -name: stop_codon_redefinition_as_pyrrolysine -is_obsolete: true - -[Term] -id: SO:1001271 -name: archaeal_intron -def: "An intron characteristic of Archaeal tRNA and rRNA genes, where intron transcript generates a bulge-helix-bulge motif that is recognised by a splicing endoribonuclease." [PMID:9301331, SO:ma] -comment: Intron characteristic of tRNA genes; splices by an endonuclease-ligase mediated mechanism. -synonym: "archaeal intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001272 -name: tRNA_intron -def: "An intron found in tRNA that is spliced via endonucleolytic cleavage and ligation rather than transesterification." [SO:ke] -comment: Could be a cross product with Gene ontology, GO:0006388. -synonym: "pre-tRNA intron" EXACT [] -synonym: "tRNA intron" EXACT [] -is_a: SO:0001216 ! endonuclease_spliced_intron - -[Term] -id: SO:1001273 -name: CTG_start_codon -def: "A non-canonical start codon of sequence CTG." [SO:ke] -synonym: "CTG start codon" EXACT [] -is_a: SO:0000680 ! non_canonical_start_codon - -[Term] -id: SO:1001274 -name: SECIS_element -def: "The incorporation of selenocysteine into a protein sequence is directed by an in-frame UGA codon (usually a stop codon) within the coding region of the mRNA. Selenoprotein mRNAs contain a conserved secondary structure in the 3' UTR that is required for the distinction of UGA stop from UGA selenocysteine. The selenocysteine insertion sequence (SECIS) is around 60 nt in length and adopts a hairpin structure which is sufficiently well-defined and conserved to act as a computational screen for selenoprotein genes." [http://www.sanger.ac.uk/cgi-bin/Rfam/getacc?RF00031] -synonym: "SECIS element" EXACT [] -xref: http://en.wikipedia.org/wiki/SECIS_element "wiki" -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001275 -name: retron -def: "Sequence coding for a short, single-stranded, DNA sequence via a retrotransposed RNA intermediate; characteristic of some microbial genomes." [SO:ma] -is_a: SO:0001411 ! biological_region - -[Term] -id: SO:1001277 -name: three_prime_recoding_site -def: "The recoding stimulatory signal located downstream of the recoding site." [SO:ke] -synonym: "three prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001279 -name: three_prime_stem_loop_structure -def: "A recoding stimulatory region, the stem-loop secondary structural element is downstream of the redefined region." [PMID:12519954, SO:ke] -synonym: "three prime stem loop structure" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001280 -name: five_prime_recoding_site -def: "The recoding stimulatory signal located upstream of the recoding site." [SO:ke] -synonym: "five prime recoding site" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:1001281 -name: flanking_three_prime_quadruplet_recoding_signal -def: "Four base pair sequence immediately downstream of the redefined region. The redefined region is a frameshift site. The quadruplet is 2 overlapping codons." [PMID:12519954, SO:ke] -synonym: "flanking three prime quadruplet recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001282 -name: UAG_stop_codon_signal -def: "A stop codon signal for a UAG stop codon redefinition." [SO:ke] -synonym: "UAG stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001283 -name: UAA_stop_codon_signal -def: "A stop codon signal for a UAA stop codon redefinition." [SO:ke] -synonym: "UAA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001284 -name: regulon -def: "A group of genes, whether linked as a cluster or not, that respond to a common regulatory signal." [ISBN:0198506732] -subset: SOFA -xref: http://en.wikipedia.org/wiki/Regulon "wiki" -is_a: SO:0005855 ! gene_group - -[Term] -id: SO:1001285 -name: UGA_stop_codon_signal -def: "A stop codon signal for a UGA stop codon redefinition." [SO:ke] -synonym: "UGA stop codon signal" EXACT [] -is_a: SO:1001288 ! stop_codon_signal - -[Term] -id: SO:1001286 -name: three_prime_repeat_recoding_signal -def: "A recoding stimulatory signal, downstream sequence important for recoding that contains repetitive elements." [PMID:12519954, SO:ke] -synonym: "three prime repeat recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001287 -name: distant_three_prime_recoding_signal -def: "A recoding signal that is found many hundreds of nucleotides 3' of a redefined stop codon." [http://www.ncbi.nlm.nih.gov\:80/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=8709208&dopt=Abstract] -synonym: "distant three prime recoding signal" EXACT [] -is_a: SO:1001277 ! three_prime_recoding_site - -[Term] -id: SO:1001288 -name: stop_codon_signal -def: "A recoding stimulatory signal that is a stop codon and has effect on efficiency of recoding." [PMID:12519954, SO:ke] -comment: This term does not include the stop codons that are redefined. An example would be a stop codon that partially overlapped a frame shifting site would be an example stimulatory signal. -synonym: "stop codon signal" EXACT [] -is_a: SO:1001268 ! recoding_stimulatory_region - -[Term] -id: SO:2000061 -name: databank_entry -def: "The sequence referred to by an entry in a databank such as Genbank or SwissProt." [SO:ke] -subset: SOFA -synonym: "accession" RELATED [] -synonym: "databank entry" EXACT [] -is_a: SO:0000695 ! reagent - -[Term] -id: SO:3000000 -name: gene_segment -def: "A gene component region which acts as a recombinational unit of a gene whose functional form is generated through somatic recombination." [GOC:add] -comment: Requested by tracker 2021594, July 2008, by Alex. -synonym: "gene segment" EXACT [] -is_a: SO:0000842 ! gene_component_region - -[Typedef] -id: adjacent_to -name: adjacent_to -def: "A geometric operator, specified in Egenhofer 1989. Two features meet if they share a junction on the sequence." [SO:ke] -subset: SOFA - -[Typedef] -id: associated_with -name: associated_with -comment: This relationship is vague and up for discussion. - -[Typedef] -id: complete_evidence_for_feature -name: complete_evidence_for_feature -def: "B is complete_evidence_for_feature A if the extent (5' and 3' boundaries) and internal boundaries of B fully support the extent and internal boundaries of A." [SO:ke] -comment: If A is a feature with multiple regions such as a multi exon transcript, the supporting EST evidence is complete if each of the regions is supported by an equivalent region in B. Also there must be no extra regions in B that are not represented in A. This relationship was requested by jeltje on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: derives_from -name: derives_from -subset: SOFA -is_transitive: true - -[Typedef] -id: edited_from -name: edited_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:45Z - -[Typedef] -id: edited_to -name: edited_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:19:11Z - -[Typedef] -id: evidence_for_feature -name: evidence_for_feature -def: "B is evidence_for_feature A, if an instance of B supports the existence of A." [SO:ke] -comment: This relationship was requested by nlw on the SO term tracker. The thread for the discussion is available can be accessed via tracker ID:1917222. -is_transitive: true - -[Typedef] -id: exemplar_of -name: exemplar_of -def: "X is exemplar of Y if X is the best evidence for Y." [SO:ke] -comment: Tracker id: 2594157. - -[Typedef] -id: genome_of -name: genome_of - -[Typedef] -id: guided_by -name: guided_by -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:04Z - -[Typedef] -id: guides -name: guides -created_by: kareneilbeck -creation_date: 2009-08-19T02:27:24Z - -[Typedef] -id: has_genome_location -name: has_genome_location -is_obsolete: true - -[Typedef] -id: has_intergral_part -name: has_integral_part -def: "X has_integral_part Y if and only if: X has_part Y and Y part_of X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: mRNA has_integral_part CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:01:46Z - -[Typedef] -id: has_origin -name: has_origin - -[Typedef] -id: has_part -name: has_part -def: "Inverse of part_of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: operon has_part gene. - -[Typedef] -id: has_quality -name: has_quality -comment: The relationship between a feature and an attribute. - -[Typedef] -id: homologous_to -name: homologous_to -subset: SOFA -is_symmetric: true -is_a: similar_to ! similar_to - -[Typedef] -id: integral_part_of -name: integral_part_of -def: "X integral_part_of Y if and only if: X part_of Y and Y has_part X." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: exon integral_part_of transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:03:28Z - -[Typedef] -id: member_of -name: member_of -comment: A subtype of part_of. Inverse is collection_of. Winston, M, Chaffin, R, Herrmann: A taxonomy of part-whole relations. Cognitive Science 1987, 11:417-444. -subset: SOFA -is_transitive: true - -[Typedef] -id: non_functional_homolog_of -name: non_functional_homolog_of -def: "A relationship between a pseudogenic feature and its functional ancestor." [SO:ke] -subset: SOFA -is_a: homologous_to ! homologous_to - -[Typedef] -id: orthologous_to -name: orthologous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: paralogous_to -name: paralogous_to -subset: SOFA -is_symmetric: true -is_a: homologous_to ! homologous_to - -[Typedef] -id: part_of -name: part_of -def: "X part_of Y if X is a subregion of Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: amino_acid part_of polypeptide. -subset: SOFA -is_transitive: true - -[Typedef] -id: partial_evidence_for_feature -name: partial_evidence_for_feature -def: "B is partial_evidence_for_feature A if the extent of B supports part_of but not all of A." [SO:ke] -is_a: evidence_for_feature ! evidence_for_feature - -[Typedef] -id: position_of -name: position_of - -[Typedef] -id: processed_from -name: processed_from -def: "Inverse of processed_into." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA processed_from miRNA_primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:14:00Z - -[Typedef] -id: processed_into -name: processed_into -def: "X is processed_into Y if a region X is modified to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: miRNA_primary_transcript processed into miRNA. -created_by: kareneilbeck -creation_date: 2009-08-19T12:15:02Z - -[Typedef] -id: recombined_from -name: recombined_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:21:03Z - -[Typedef] -id: recombined_to -name: recombined_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:20:07Z - -[Typedef] -id: regulated_by -name: regulated_by -is_obsolete: true - -[Typedef] -id: sequence_of -name: sequence_of - -[Typedef] -id: similar_to -name: similar_to -subset: SOFA -is_symmetric: true - -[Typedef] -id: trans_spliced_from -name: trans_spliced_from -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:14Z - -[Typedef] -id: trans_spliced_to -name: trans_spliced_to -created_by: kareneilbeck -creation_date: 2009-08-19T02:22:00Z - -[Typedef] -id: transcribed_from -name: transcribed_from -def: "X is transcribed_from Y if X is synthesized from template Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: primary_transcript transcribed_from gene. -created_by: kareneilbeck -creation_date: 2009-08-19T12:05:39Z - -[Typedef] -id: transcribed_to -name: transcribed_to -def: "Inverse of transcribed_from." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: gene transcribed_to primary_transcript. -created_by: kareneilbeck -creation_date: 2009-08-19T12:08:24Z - -[Typedef] -id: translates_to -name: translates_to -def: "Inverse of translation _of." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: codon translates_to amino_acid. -created_by: kareneilbeck -creation_date: 2009-08-19T12:11:53Z - -[Typedef] -id: translation_of -name: translation_of -def: "X is translation of Y if X is translated by ribosome to create Y." [http://precedings.nature.com/documents/3495/version/1] -comment: Example: Polypeptide translation_of CDS. -created_by: kareneilbeck -creation_date: 2009-08-19T12:09:59Z - -[Typedef] -id: variant_of -name: variant_of -def: "A' is a variant (mutation) of A = definition every instance of A' is either an immediate mutation of some instance of A, or there is a chain of immediate mutation processes linking A' to some instance of A." [SO:immuno_workshop] -comment: Added to SO during the immunology workshop, June 2007. This relationship was approved by Barry Smith. - diff --git a/annotation/README.md b/annotation/README.md index 4534f4445..ec51a6859 100644 --- a/annotation/README.md +++ b/annotation/README.md @@ -7,21 +7,14 @@ # The NBIS annotation toolkit.
### (c) The NBIS Genome Annotation Platform -#### [NBIS](NBIS) -Perl library - -#### [LsfTemplates](LsfTemplates) +#### [lsf_templates](lsf_templates) It contains batch scripts to launch different job on lsf scheduler. -#### [Tools](Tools) +#### [tools](tools) The most important. All convenient tools. -[>>All the scripts are gathered here<<](Tools/bin/) -[Prerequisite installation information](https://github.com/NBISweden/GAAS/tree/master/annotation/Tools/Util/gff) - -#### [WebApollo](WebApollo) -All the development related to webapollo +[>>All the scripts are gathered here<<](../bin/) #### [data_test](data_test) Contains data used to test the different development -#### [CheatSheet](CheatSheet) +#### [knowledge](knowledge) diff --git a/annotation/Tools/Abinitio/Augustus/augustus_filter_training_set.pl b/annotation/Tools/Abinitio/Augustus/augustus_filter_training_set.pl deleted file mode 100644 index 45f3594e1..000000000 --- a/annotation/Tools/Abinitio/Augustus/augustus_filter_training_set.pl +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Bio:: -use Time::Piece; -use Time::Seconds; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--gff filename] - The name of the file to read. - - [--genome filename] - The genome sequence to read. - - Ouput: - [--outfile filename] - The name of the output file. By default the output is the - standard output -}; - -my $outfile = undef; -my $gff = undef; -my $genome = undef; -my $help; - -GetOptions( - "help" => \$help, - "gff=s" => \$gff, - "genome=s" => \$genome, - "outfile=s" => \$outfile); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ($outfile) { - open(STDOUT, ">$outfile") or die("Cannot open $outfile"); -} - -# Create protein dataset - -run_blast(gff,genome); - - -#my $gffio = Bio::Tools::GFF->new(-file => $gff, -gff_version => 3); - - - - - - - -sub run_blast { - - my $gff = shift; - my $genome = shift; - - print $gff ; - -} - - -# -------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); - print STDERR $line unless $quiet; -} - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -sub err { - $quiet=0; - msg(@_); - exit(2); -} - - diff --git a/annotation/Tools/Abinitio/snap_train.sh b/annotation/Tools/Abinitio/snap_train.sh deleted file mode 100755 index b726e3c7b..000000000 --- a/annotation/Tools/Abinitio/snap_train.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -NAME=$1 - -if [ -z "$NAME" ] -then - echo "Must provide a name!" -else - fathom -categorize 1000 genome.ann genome.dna - fathom -export 1000 -plus uni.ann uni.dna - forge export.ann export.dna - hmm-assembler.pl $NAME . > $NAME.hmm -fi - diff --git a/annotation/Tools/ComparativeGenomic/kraken_statMap.pl b/annotation/Tools/ComparativeGenomic/kraken_statMap.pl deleted file mode 100755 index 362f0e693..000000000 --- a/annotation/Tools/ComparativeGenomic/kraken_statMap.pl +++ /dev/null @@ -1,744 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use File::Basename; -use Getopt::Long; -use Statistics::R; -use IO::File; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Clone 'clone'; -use NBIS::GFF3::Omniscient; -use NBIS::Plot::R qw(:Ok); - -##### -# What we call parial gene (containing "_partial_part-" in the ID) ? -# This gene has been seen as patial: During a lift-over a gene can be detected on 2 several contigs. -# (full kraken file => features with kraken attribute to TRUE are on contig of the target genome (Transfert annotation on), the others (kraken attribute to FALSE) are on the reference genome to liftfover (where annotations are taken to try to liftover) ) -##### -# -# TODO: add tag kraken_cn (for copy number) with nb of mapping. 1 if only one liftover. -# Ask Manfred why some region map at different location. -# - -my $usage = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $valueK = undef; -my $verbose = undef; -my $kraken_tag = "Kraken_mapped"; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "threshold|t=i" => \$valueK, - "verbose|v" => \$verbose, - "outfile|output|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -# Print Help and exit -if ($help) { - pod2usage( { -message => "$usage", - -verbose => 2, - -exitval => 2 } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "\nAt least 1 parameter is mandatory:\nInput reference gtf file (--f)\n\n". - "$usage\n", - -verbose => 0, - -exitval => 2 } ); -} - -## Manage output file -my $gffout; -my $outReport = IO::File->new(); -if ($outfile) { - $outfile=~ s/.gff//g; - - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - - $outReport->open($outfile."_report.txt", 'w') or die "Could not open file '$outfile'_report.txt $!"; -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - - $outReport->fdopen( fileno(STDOUT), 'w' ) or die "Could not open file STDOUT $!"; -} - -## -----Manage plot output file----- -my $ostreamPlotFile = new IO::File; -my $pathPlotFile="geneMapped.txt"; -my $pathOutPlot="geneMapped_plot.pdf"; -if ($outfile) { - $pathPlotFile=$outfile."-geneMapped.txt"; - $pathOutPlot=$outfile."-geneMapped_plot.pdf"; -} -$ostreamPlotFile->open($pathPlotFile, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $pathPlotFile, $! ) - ); - -# Message -my $messageValue; -if ( defined($valueK) ){ - $messageValue = "You choose to keep in output only genes mapped over $valueK percent.\n" -}else{ - $messageValue = "We will keep all the mapped features.\n"; - $valueK=0; -} -$messageValue.="The kraken attribute tag that will be used is: ".$kraken_tag."\n"; - -#print info -if ($outfile) { - print $outReport $messageValue; - }else{print $messageValue;} - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -# checks are deactivated except _remove_orphan_l1 -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ - input => $gff, - no_check => 1, - no_check_skip => qw(_remove_orphan_l1) - }); - -#track stats -my $nbOriginalGene = nb_feature_level1($hash_omniscient); #total gene at the beginning -my $nbRecordMap=0; #total original gene that map before check the threshold -my $nbOriginalL0Map=0; #total map before check the threshold -my $nbOriginal_multiMap_seqdif=0; #number of gene that have multimap on seq location before check the threshold -my $nbOriginal_total_multiMap_seqdif=0; #total of multimap on different seq before check the threshold -my $nbOriginal_multiMap_sameseq=0; #number of gene that have multimap on same seq after check the threshold -my $nbOriginal_total_multiMap_sameseq=0; #total of multimap on different seq after check the threshold - -#my $nbMapL1=0; #total gene that map after check the threshold -#my $nbMapL1Uniq=0; -my $nbGeneIdUniqMap=0; -my $nb_multiMap_seqdif=0; #number of gene that have multimap on different seq after check the threshold -my $nb_total_multiMap_seqdif=0; #total of multimap on different seq after check the threshold -my $nb_total_multiMap_seqdif_bothcase=0; -my $nb_total_multiMap_sameseq_bothcase=0; -my $nb_multiMap_sameseq=0; #number of gene that have multimap on same seq after check the threshold -my $nb_total_multiMap_sameseq=0; #total of multimap on different seq after check the threshold -my $bothCase=0; - -# track errors: -my $KrakenFakeGene=0; - - -my %mappedPercentPerGene; #Keep information for R plot -my %n_omniscient; -my $nb_noCaseL3=0; -my $new_omniscient=\%n_omniscient; -my $list_uID_new_omniscient; -my $loop=0; - -################# -# == LEVEL level1 -################ -foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - - ######################################## - # Prepare hash in case of muli mapping # - ######################################## - my %listOfProperHash; - my %listHashWithTrue; - my $gene_feature = $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; - my $l1_original_id = $id_tag_key_level1; - ################# - # == LEVEL 1 == # - ################# - $listOfProperHash{$gene_feature->seq_id()}{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}=$gene_feature; - # write down if kraken_mapped=true - if($gene_feature->has_tag($kraken_tag)){ - if( lc($gene_feature->_tag_value($kraken_tag)) eq "true"){ - $listHashWithTrue{$gene_feature->seq_id()}++; - } - } - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys($hash_omniscient, ('level2',$primary_tag_key_level2,$id_tag_key_level1) ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - push(@{$listOfProperHash{$feature_level2->seq_id()}{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}},$feature_level2) ; - my $level2_ID = lc($feature_level2->_tag_value('ID')); - # write down if kraken_mapped=true - if($feature_level2->has_tag($kraken_tag)){ - if( lc($feature_level2->_tag_value($kraken_tag)) eq "true"){ - $listHashWithTrue{$feature_level2->seq_id()}++; - } - } - ################# - # == LEVEL 3 == # - ################# - foreach my $primary_tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if ( exists_keys( $hash_omniscient, ('level3', $primary_tag_l3, $level2_ID) ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - push(@{$listOfProperHash{$feature_level3->seq_id()}{'level3'}{$primary_tag_l3}{$level2_ID}}, $feature_level3); - # write down if kraken_mapped=true - if($feature_level3->has_tag($kraken_tag)){ - if( lc($feature_level3->_tag_value($kraken_tag)) eq "true"){ - $listHashWithTrue{$feature_level3->seq_id()}++; - - } - } - } - } - } - } - } - } - - #count multi map - my $nbMapTrueHere = keys %listHashWithTrue; - if($nbMapTrueHere > 1){ - $nbOriginal_multiMap_seqdif++; - $nbOriginal_total_multiMap_seqdif+=$nbMapTrueHere; - } - - - my $sucessMapL0=0; - my $sucessMapL1OusideScope=0; - my $firstL0Map="yes"; - - - # A record contains 1 or several LEVEL 0 - ################ - # == LEVEL 0 - ################ - foreach my $seqid_key (keys %listOfProperHash){ - - #if it contains a feature mapped we continue - if(exists_keys(\%listHashWithTrue,$seqid_key)){ - - #keep track of statistics - if($firstL0Map){ # first record of - $nbRecordMap++; - $firstL0Map=undef; - } - $nbOriginalL0Map++; - - #The HAsh is made only by feature with TRUE (because the false one are not collected because different) - my $hash = clone($listOfProperHash{$seqid_key}); # We have to clone it otherwise the "merge_omniscients" function can change the id and brakes the original data from hash_omniscient that is used to retrieve the original size of the feature - if(! exists_keys($hash,('level3'))){ - $nb_noCaseL3++; - } - - my ($hash_omniscient_clean, $hash_mRNAGeneLink_clean) = slurp_gff3_file_JD({ input => $hash - }); - - if($verbose){ - print "\nA proper hash:\n"; - print_omniscient($hash_omniscient_clean, $gffout); - print "\n"; - } - - ################################################################################### - # NOW we call deal properly with each proper hash containing only mapped features - ## ################################################################################ - ################ - # == LEVEL 1 - ################ - my $sucessMapL1=0; - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient_clean->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient_clean->{'level1'}{$primary_tag_key_level1}}){ - - - $gene_feature = $hash_omniscient_clean->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; - my @ListmrnaNoMatch; - print "level1 feature:\n".$gene_feature->gff_string."\n" if $verbose; - - ################ - # == LEVEL 2 - ################ - - my $sucessMapL2=0; - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient_clean->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys($hash_omniscient_clean, ('level2',$primary_tag_key_level2,$id_tag_key_level1) ) ){ - foreach my $feature_level2 ( @{$hash_omniscient_clean->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - print "level2 feature:\n".$feature_level2->gff_string."\n" if $verbose; - - my $percentMatch=0; - - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - ################################################### - # == LEVEL 3 - # We will look the size of mapped features - # Feature can be exon ,cds or utr in that order - ################################################### - - my $refListFetaureL3 = takeOneListLevel3From1idLevel2($hash_omniscient_clean, $level2_ID); - if(! $refListFetaureL3 ){ # No l3 found we clean this l2 feature - my @listF2; push (@listF2, $feature_level2); - remove_omniscient_elements_from_level2_feature_list ($hash_omniscient_clean, \@listF2); - if (! exists_keys($hash_omniscient_clean,('level1',$primary_tag_key_level1,$id_tag_key_level1))){ - $KrakenFakeGene++; - } - next; - } - - my $matchSize = 0; - my $matchFeatureExample = undef; - - foreach my $feature (@{$refListFetaureL3}){ - print "level3 feature:\n".$feature->gff_string."\n" if $verbose; - my $end=$feature->end(); - my $start=$feature->start(); - - my $mapping_state = undef; - if($feature->has_tag($kraken_tag)){ - $mapping_state = lc($feature->_tag_value($kraken_tag)); - } - else{ print "error !! No $kraken_tag attribute found for the feature".$feature->gff_string()."\n";} - - if( $mapping_state eq "true"){ - $matchSize+=($end-$start)+1; - $matchFeatureExample=$feature; - } - elsif(! $mapping_state eq "false"){ - print "error !! We don't understand the $kraken_tag attribute value found for the feature".$feature->gff_string()."\n Indeed, we expect false or true.\n"; - } - } - - - #compute the total sie (has to be compute against the original hash) - my $totalSize=0; - $totalSize = compute_total_size($hash_omniscient, $l1_original_id, $matchFeatureExample); - #print "totalSize=$totalSize \n"; - #print "matchSize $matchSize\n"; - - #compute the MATCH. A MATCH can be over 100% because we compute the size of the original feature l3 against the new feature l3. The new feature l3 (i.e exon) could have been strenghten to fit a new size/map of feature l2. - $percentMatch=($matchSize*100)/$totalSize; - print "$id_tag_key_level1 / $level2_ID maps at ".$percentMatch." percent.\n" if $verbose; - #if($percentMatch > 100){ - # print $id_tag_key_level1."\n";exit; - #} - - ####### - # Add information to gff - ######## - if ($percentMatch >= $valueK) { - $sucessMapL2++; - #We print gene only if a percentage match value is superior to the threshold fixed (No threshold equal everything = 0) - $percentMatch = sprintf('%.2f', $percentMatch); - - manage_gene_label($gene_feature, $percentMatch, $kraken_tag); # add info to level1 (gene) feature - - create_or_replace_tag($feature_level2,$kraken_tag,$percentMatch."%"); # add info to level2 (mRNA) feature - create_or_replace_tag($feature_level2,'description',"Mapped at ".$percentMatch."%"); # add info to level2 (mRNA) feature - - #save best value for gene - if(!exists($mappedPercentPerGene{$l1_original_id})){ # case where it doesn t exist - $mappedPercentPerGene{$l1_original_id}=$percentMatch; - $nbGeneIdUniqMap++; - } - elsif($mappedPercentPerGene{$l1_original_id} < $percentMatch){ # case where it exists but better value to save - $mappedPercentPerGene{$l1_original_id}=$percentMatch; - } - } - # Do not pass the threshold we have to remove this l2 feature - else{ - my @listF2; push (@listF2, $feature_level2); - remove_omniscient_elements_from_level2_feature_list ($hash_omniscient_clean, \@listF2); - if (! exists_keys($hash_omniscient_clean,('level1',$primary_tag_key_level1,$id_tag_key_level1))){ - $KrakenFakeGene++; - } - next; - } - } - } - } - if($sucessMapL2){ - $sucessMapL1++; - } - } - } - if ($sucessMapL1){ # We have a result over the threshold to save - $sucessMapL0++; - $sucessMapL1OusideScope=$sucessMapL1; - #$nbMapL1Uniq++; - #$nbMapL1 +=$sucessMapL1; - #save the result by appending the result hash and take care of duplicated names - if($loop == 0){ - $new_omniscient = $hash_omniscient_clean; - $loop++; - } - elsif($loop == 1){ - ($new_omniscient, $list_uID_new_omniscient) = merge_omniscients($new_omniscient, $hash_omniscient_clean); - $loop++; - } - else{ - ($new_omniscient, $list_uID_new_omniscient) = merge_omniscients($new_omniscient, $hash_omniscient_clean, $list_uID_new_omniscient); - } - - #keep track of successful multimap (same sequences) > Cases saved with different GeneID in new_omniscient () - if($sucessMapL1 > 1){ - $nb_multiMap_sameseq++; - $nb_total_multiMap_sameseq+=$sucessMapL1; - } - } - } - } - #keep track of successful multimap (different sequences) => Cases saved with different GeneID in new_omniscient - if($nbMapTrueHere > 1 and $sucessMapL0 > 1){ - if( $sucessMapL1OusideScope > 1){ - $bothCase++; print "Both case:\nNb multi map seq diff=$sucessMapL0\nNb multi map same seq =$sucessMapL1OusideScope\n" if $verbose; - $nb_total_multiMap_seqdif_bothcase+=$sucessMapL0; - $nb_total_multiMap_sameseq_bothcase+=$sucessMapL1OusideScope; - $nb_multiMap_sameseq--; - $nb_total_multiMap_sameseq-=$sucessMapL1OusideScope; - } - else{ - $nb_multiMap_seqdif++; - $nb_total_multiMap_seqdif+=$sucessMapL0; - } - } - } -} -print "Calcul of mapped percentage length finished !\n"; - - -###################### -# Check if nothing mapped -my $nbKey = keys %mappedPercentPerGene; -if ($nbKey == 0){ - print "No succefully mapped feature found!\n"; -} - -############### -# print the value per gene in a temporary file for R plot -foreach my $key (keys %mappedPercentPerGene){ - print $ostreamPlotFile "$mappedPercentPerGene{$key}\n"; -} - - -######## -#print GFF the selected features (over the choosen treshold) -######## -print_omniscient($new_omniscient, $gffout); - -my $nbEndGene = nb_feature_level1($new_omniscient); -my $total_multi_map = $nbEndGene - $nbOriginalL0Map; -#my $nbGeneMapped = $nbOriginalL0Map - ($nbOriginal_total_multiMap_seqdif - $nbOriginal_multiMap_seqdif); # Total of gene mapped - -my $messageEnd; -$messageEnd.= "\nTo resume:\n==========\n\n"; -$messageEnd.= "The original file contained $nbOriginalGene genes\n\n"; - -$messageEnd.= "Before filtering:\nWe have $nbRecordMap mapped genes for a total of $nbOriginalL0Map maps.\n$nbOriginal_multiMap_seqdif genes have several maps on different sequences for a total of $nbOriginal_total_multiMap_seqdif maps.\n"; -$messageEnd.= "/!\\ Multi map on same sequence not taken into account\n\n"; - -$messageEnd.= "After filtering:\nWe have $nbGeneIdUniqMap mapped genes for a total of $nbEndGene maps.(Over the $valueK % match threshold).\n"; -my $nbGeneOneMap=$nbGeneIdUniqMap - $nb_multiMap_seqdif - $nb_multiMap_sameseq - $bothCase; -$messageEnd.= "We have $nbGeneOneMap genes that map to only one location.\n"; -$messageEnd.= "We have $nb_multiMap_seqdif genes that map on different sequences for a total of $nb_total_multiMap_seqdif maps \n"; -$messageEnd.= "We have $nb_multiMap_sameseq genes that map on different location of the same sequences for a total of $nb_total_multiMap_sameseq maps \n"; -$messageEnd.= "We have $bothCase genes that map on different location of the same sequences and on different sequences for a total of $nb_total_multiMap_sameseq_bothcase maps on same sequence and $nb_total_multiMap_seqdif_bothcase maps on different sequences\n"; -#report error if necessary -if( $nb_noCaseL3 ){ - $messageEnd.= "About potential problem met:\nWe found $nb_noCaseL3 cases where a l1/l2 feature mapped to true but do not have any feature l3. As we are using feature l3 to perform the analysis we have skiped them.\n"; - $messageEnd.= "When it was the case for all the l2 of one recored we have removed the record (l1): number of such case: $KrakenFakeGene.\n"; -} -$messageEnd.= "\n"; - -#print info -if ($outfile) { -print $outReport $messageEnd; -}else{print $messageEnd;} - - -############# -#PLOT -############# -my $messagePlot; -if ($nbGeneIdUniqMap){ - # Create the legend - my $nbOfGeneSelected = $nbGeneIdUniqMap; - # parse file name to remove extension - my ($file1,$dir1,$ext1) = fileparse($gff, qr/\.[^.]*/); - my $legend=$nbOfGeneSelected." genes selected from ".$file1; - - my @listTuple=([$pathPlotFile,$legend]); - my $R_command=rcc_density_one_row_per_file(\@listTuple,"histogram","Percentage of gene length mapped","10","",$pathOutPlot); # create the R command - execute_R_command($R_command); - - my $messagePlot; - $messagePlot = "Plot done in the pdf file named $pathOutPlot\n"; -} -else{ - $messagePlot = "Cannot perform any plot without data.\n"; -} - -#print info -if ($outfile) { - print $outReport $messagePlot; -} -else{print $messagePlot;} - -# Delete temporary file -unlink "$pathPlotFile"; - -#END -print "We finished !! Bye Bye.\n"; - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - -# We have the l1 id from hash omniscient and the omniscient, and we are looking for the proper feature l2 and its subfeture l3. -# When this function is called, a new_omniscient containing only one isoform has been newly created. -# To be sure to retrieve the proper l2 from which the current l2 has been created, -# We look a the transcript_id attribute rather than the ID, because only this attribute is stayed un-modified. -# -sub compute_total_size{ - my ($hash_omniscient, $l1_original_id, $feature_l3)=@_; - - my $l2_transcipt_id = lc($feature_l3->_tag_value('transcript_id')); - my $total_size=0; - - - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys($hash_omniscient, ('level2',$primary_tag_key_level2, $l1_original_id) ) ){ - my $found=undef; - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$l1_original_id}}) { - if($l2_transcipt_id eq lc($feature_level2->_tag_value('transcript_id'))){ - my $l2_original_id = lc($feature_level2->_tag_value('ID')); - foreach my $primary_tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if( lc($primary_tag_l3) eq lc( $feature_l3->primary_tag() ) ){ - if ( exists_keys( $hash_omniscient, ('level3', $primary_tag_l3, $l2_original_id) ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_l3}{$l2_original_id}}){ - $total_size+=($feature_level3->end - $feature_level3->start)+1; - } - } - } - } - $found =1; - last; #No need to continue the loop if the proper l2 has been already found - } - } - if(! $found){ - print "l2_transcipt_id $l2_transcipt_id not found in hash_omniscient\n"; - } - } - - } - if($total_size == 0){ - print "Something went wrong, total_size is 0 while we exepect a positive value.\n"; - } - return $total_size; -} - -# Feature we look at are in the order exon ,cds or utr -# -sub takeOneListLevel3From1idLevel2 { - my ($hash_omniscient, $level2_ID)=@_; - - my $refListFetaureL3=undef; - my $refListExon=undef; - my $refListCDS=undef; - my $refListUTR=undef; - my $get_one_true=undef; - - if ( exists_keys($hash_omniscient, ('level3','exon',$level2_ID) ) ){ - $refListFetaureL3 = $hash_omniscient->{'level3'}{'exon'}{$level2_ID}; - $refListExon = $hash_omniscient->{'level3'}{'exon'}{$level2_ID}; - - #get if one exon mapped otherwise we have to use CDS instead - foreach my $feature (@{$refListFetaureL3}){ - if($feature->has_tag($kraken_tag)){ - if (lc($feature->_tag_value($kraken_tag)) eq "true"){ - $get_one_true = 1; - } - } - } - } - if ( exists_keys($hash_omniscient,('level3','cds',$level2_ID) ) and ! $get_one_true){ - $refListFetaureL3 = $hash_omniscient->{'level3'}{'cds'}{$level2_ID}; - $refListCDS = $hash_omniscient->{'level3'}{'cds'}{$level2_ID}; - - #get if one cds mapped otherwise we have to use UTR instead - foreach my $feature (@{$refListFetaureL3}){ - if($feature->has_tag($kraken_tag)){ - if (lc($feature->_tag_value($kraken_tag)) eq "true"){ - $get_one_true = 1; - } - } - } - } - if(! $get_one_true){ - my $match=undef; - foreach my $tag (keys %{$hash_omniscient->{'level3'}}){ - if($hash_omniscient->{'level3'}{$tag}{$level2_ID}){ - $match="yes"; - if($tag =~ "utr"){ - $refListFetaureL3 = $hash_omniscient->{'level3'}{$tag}{$level2_ID}; - $refListUTR = $hash_omniscient->{'level3'}{$tag}{$level2_ID}; - - #get if one cds mapped otherwise we have to use UTR instead - foreach my $feature (@{$refListFetaureL3}){ - if($feature->has_tag($kraken_tag)){ - if (lc($feature->_tag_value($kraken_tag)) eq "true"){ - $get_one_true = 1; - } - } - } - } - } - } - } - - if(! $get_one_true){ - if ($refListExon){ - $refListFetaureL3 = $refListExon; - } - elsif($refListCDS){ - $refListFetaureL3 = $refListCDS; - } - elsif($refListUTR){ - $refListFetaureL3 = $refListUTR; - } - else{ - print "No feature level3 expected found for ".$level2_ID." level2 ! (Probalby an error from kraken that have added a fake l1 and consequently a fake l2. So we will remove the case.)\n"; - } - } - return $refListFetaureL3; -} - -sub manage_gene_label{ - - my ($gene_feature, $percentMatch, $kraken_tag)=@_; - if (! $gene_feature->has_tag($kraken_tag)){ # No kraken_mapped attribute - label_by_value($gene_feature, $percentMatch, $kraken_tag); - } - else{ # kraken_mapped tag exists, check if we have to change it - my @values = $gene_feature->get_tag_values($kraken_tag); - my $alreadyMap = lc(shift @values) ; - if ($alreadyMap eq "false" or $alreadyMap eq "true"){ - label_by_value($gene_feature, $percentMatch, $kraken_tag); - } - elsif ( ($alreadyMap ne 'full') and (( $percentMatch != 0 ) and ($alreadyMap eq 'none')) ){ # if the existing tag is full or the new tag we want to add is none ($percentMatch == 0), we skip it. - create_or_replace_tag($gene_feature,$kraken_tag,'partial'); # add info to gene feature - } - } -} - -sub label_by_value{ - my ($gene_feature, $percentMatch, $kraken_tag)=@_; - - if($percentMatch == 100){ - create_or_replace_tag($gene_feature,$kraken_tag,'full'); # add info to gene feature - } - elsif ($percentMatch != 0){ - create_or_replace_tag($gene_feature,$kraken_tag,'partial'); # add info to gene feature - } - else{ - create_or_replace_tag($gene_feature,$kraken_tag,'none'); # add info to gene feature - } -} - -sub plotR { - my ($pathIn, $pathOut, $title)=@_; - - my $R = Statistics::R->new() or die "Problem with R : $!\n"; - -#R command -$R->send( - qq` - listValues=as.matrix(read.table("$pathIn", sep="\t", he=F)) - legendToDisplay=paste("Number of value used : ",length(listValues)) - - pdf("$pathOut") - hist(listValues, breaks=seq(0,100,5), xlab="Percentage of cds lift-over", ylab="Number", main="$title") - dev.off()` - ); - -# Close the bridge -$R->stopR(); - -# Delete temporary file -# unlink "$pathIn"; -} - - -__END__ - -=head1 NAME - -kraken_statMap.pl - -The script take a gff file as input. It will analyse the kraken_mapped attributes to calculate the mapped percentage of each mRNA. -/!\ The script handles chimeric files (i.e containg gene part mapped on the template genome and others on the de-novo one) -/!\/!\ If the file is complete (containing kraken_mapped="TRUE" and kraken_mapped="FALSE" attributes), the script calcul the real percentage lentgh that has been mapped. Else the calcul is only based on feature with kraken_mapped="TRUE" attributes. So in this case the result most of time will be 100%, execpt for cases where several piecies are mapped at different area of the de-novo genome. -According to a threshold (0 by default), gene with a mapping percentage over that value will be reported. -A plot nammed geneMapped_plot.pdf is performed to visualize the result. - -=head1 SYNOPSIS - - ./kraken_statMap.pl -gtf=infile.gff [ -o outfile ] - ./kraken_statMap.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-gff> - -Input gff file that will be read. - -=item B<--threshold> or B<-t> - -Gene mapping percentage over which a gene must be reported. By default the value is 0. - -=item B<--verbose> or B<-v> - -Verbose information. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Converter/b2go2gff.pl b/annotation/Tools/Converter/b2go2gff.pl deleted file mode 100755 index 7b65666e5..000000000 --- a/annotation/Tools/Converter/b2go2gff.pl +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use Getopt::Long; -use Bio::Tools::GFF; - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--gff filename] - The name of the gff file to read. - - [--b2go filename] - The name of the Blast2Go annotation file to read - - Ouput: - [--outfile filename] - The name of the output file. -}; - -my $outfile = undef; -my $gff = undef; -my $b2go = undef; -my $help; - -GetOptions( - "help" => \$help, - "gff=s" => \$gff, - "b2go=s" => \$b2go, - "outfile=s" => \$outfile); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ($outfile) { - open(STDOUT, ">$outfile") or die("Cannot open $outfile"); -} - - -#### Read the annotation file - -my %lookup = read_annotation_file($b2go); - - -### Parse GFF input file and add annotations - -open(GFF, "<$gff") || die("Can't open $gff."); - -my $gffio = Bio::Tools::GFF->new(-file => $gff, -gff_version => 3); -my $gffout = Bio::Tools::GFF->new(-fh => $outfile, -gff_version => 3); - - -while( my $feature = $gffio->next_feature()) { - - if ($feature->primary_tag =~ /mRNA/) { - - my @values = $feature->get_tag_values('ID'); - my $id = shift @values; - - my $annotation = $lookup{$id} ; - - if (defined $annotation) { - $feature->remove_tag('Description') if ($feature->has_tag('Description')); - $feature->add_tag_value('Description','Predicted: ' . $annotation); - print $feature->gff_string($gffout) , "\n"; - } else { - print $feature->gff_string($gffout) , "\n"; - } - - } else { - print $feature->gff_string($gffout) , "\n"; - } - -} - -$gffio->close(); - - - -sub read_annotation_file(file) { - - my $file = shift; - my %lookup = {}; - - open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; - - while (<$IN>) { - chomp; - my $line = $_; - - my ($id,$go,$name) = split("\t", $line); - $lookup{$id} = $name ; - } - - close ($IN); - - return %lookup; - -} - - - diff --git a/annotation/Tools/Converter/b2go2table.pl b/annotation/Tools/Converter/b2go2table.pl deleted file mode 100755 index c9cc1850b..000000000 --- a/annotation/Tools/Converter/b2go2table.pl +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use Getopt::Long; -use Bio::Tools::GFF; - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - - [--b2go filename] - The name of the Blast2Go annotation file to read - - Ouput: - [--outfile filename] - The name of the output file. -}; - -my $outfile = undef; -my $b2go = undef; -my $help; - -GetOptions( - "help" => \$help, - "b2go=s" => \$b2go, - "outfile=s" => \$outfile); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ($outfile) { - open(STDOUT, ">$outfile") or die("Cannot open $outfile"); -} - - -my %lookup = {}; - -open (my $IN, '<', $b2go) or die "FATAL: Can't open file: $b2go for reading.\n$!\n"; - -while (<$IN>) { - chomp; - my $line = $_; - my ($id,$go,$name) = split("\t", $line); - if (exists $lookup{$id}) { - $lookup{$id} .= "," . $go ; - } else { - $lookup{$id} = $go ; - } - -} - -foreach my $transcript (keys %lookup) { - - my $go_terms = $lookup{$transcript}; - - print $transcript . "\t" . $go_terms . "\n"; -} - -close ($IN); - - - - diff --git a/annotation/Tools/Converter/bed2gff.pl b/annotation/Tools/Converter/bed2gff.pl deleted file mode 100755 index 257285fba..000000000 --- a/annotation/Tools/Converter/bed2gff.pl +++ /dev/null @@ -1,263 +0,0 @@ -#!/usr/bin/env perl - -## NBIS 2015 -## jacques.dainat@nbis.se -# BED format described here: https://genome.ucsc.edu/FAQ/FAQformat.html#format1 - -use strict; -use Pod::Usage; -use Getopt::Long; -use Bio::Tools::GFF; -use Data::Dumper; - -my $usage = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -/!\\Only first 6 column inplemented... if your bed file contains more columns and you need their information... you need to finish the implementation }; - -my $outfile = undef; -my $bed = undef; -my $help; - -if( !GetOptions( - "help" => \$help, - "bed=s" => \$bed, - "outfile|output|o|out|gff=s" => \$outfile)) -{ - pod2usage( { -message => "$header\nFailed to parse command line.\n", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) {-message => "$header", - -verbose => 2, - -exitval => 2 } ); -} - -if ( ! (defined($bed)) ){ - pod2usage( { - -message => "$header\nMissing the --bed argument.\n", - -verbose => 0, - -exitval => 1 } ); -} - -## Manage output file -my $gffout; -if ($outfile) { -open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -# Ask for specific GFF information -print "Some information needed in a GFF3 file dont exist in a BED file... as we cannot guess is, please fill the following information:\n\n", -"Enter a source (3rd field in a gff file). Example: Cufflinks,Maker,Augustus,etc. [default: data]\n"; -my $source_tag = ; -chomp $source_tag; -if ($source_tag eq '') {$source_tag = 'data';} -if ($source_tag =~ /\s/) {die("Can't have whitespace in $source_tag\n") } - -print "What is the data type ? Example: gene,mRNA,CDS,etc. [default: match]\n"; -my $primary_tag = ; -chomp $primary_tag; -if ($primary_tag eq '') {$primary_tag = 'match';} -if ($primary_tag =~ /\s/) {die("Can't have whitespace in $primary_tag\n") } - - -### Read bed input file. -open my $fh, $bed or die "Could not open $bed: $!"; - - - - - - - ####################### - # MAIN # - ####################### - -my %bedOmniscent; -my $UniqID=0; -my $cpt_warning=0; -while( my $line = <$fh>) { - chomp $line; - - if ($line =~ /#/){next;} #skip commented lines - - if (! $line =~ /\t/) {die("$line <> is not a tabulated format !\n") } - else{ - - my @fields = split /\t/, $line; - if ($#fields == 0){ - if($cpt_warning == 0){ - print "This file doesnt look tabulated. BAD BOY ! So I will try to continue with space sparator.\n"; - $cpt_warning++; - } - @fields = split /\s/, $line; - } - my $fieldNumber=$#fields+1; - if($fieldNumber < 3 or $fieldNumber >12){ - print "Problem with that line:\n$line\nA bed file has at least three required fields ! 9 others fields are optional. So, a maximum of 12 fields is allowed !", - "\n Your line contain $fieldNumber fields. Check the sanity of your file. BYE.\n";exit; - } - if($fieldNumber >= 7){ - print "sorry I have to implement to take in account all the optional fields !\n";exit; - } - my $cptField=0; - $UniqID++; - foreach my $field (@fields){ - $cptField++; - - ########## - #MANDATORY fields - ########### - - # 1 chrom - The name of the chromosome (e.g. chr3, chrY, chr2_random) or scaffold (e.g. scaffold10671). - if($cptField == 1){ - $bedOmniscent{$UniqID}{'chrom'}=$field; - } - - # 2 chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0. - if($cptField == 2){ - $bedOmniscent{$UniqID}{'chromStart'}=$field; - } - - # 3 chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99. - if($cptField == 3){ - $bedOmniscent{$UniqID}{'chromEnd'}=$field; - } - - ########## - # OPTIONAL fields - ########## - - # 4 name - Defines the name of the BED line. This label is displayed to the left of the BED line in the Genome Browser window when the track is open to full display mode or directly to the left of the item in pack mode. - if($cptField == 4){ - $bedOmniscent{$UniqID}{'name'}=$field; - } - - # 5 score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). - if($cptField == 5){ - $bedOmniscent{$UniqID}{'score'}=$field; - } - - # 6 strand - Defines the strand - either '+' or '-'. - if($cptField == 6){ - $bedOmniscent{$UniqID}{'strand'}=$field; - } - -###==== I STOPPED HERE the implemetation (need to spend time on) - - # 7 thickStart - The starting position at which the feature is drawn thickly (for example, the start codon in gene displays). When there is no thick part, thickStart and thickEnd are usually set to the chromStart position. - if($cptField == 7){ - $bedOmniscent{$UniqID}{'thickStart'}=$field; - } - - # 8 thickEnd - The ending position at which the feature is drawn thickly (for example, the stop codon in gene displays). - if($cptField == 8){ - $bedOmniscent{$UniqID}{'thickEnd'}=$field; - } - - # 9 itemRgb - An RGB value of the form R,G,B (e.g. 255,0,0). If the track line itemRgb attribute is set to "On", this RBG value will determine the display color of the data contained in this BED line. NOTE: It is recommended that a simple color scheme (eight colors or less) be used with this attribute to avoid overwhelming the color resources of the Genome Browser and your Internet browser. - if($cptField == 9){ - $bedOmniscent{$UniqID}{'itemRgb'}=$field; - } - - # 10 blockCount - The number of blocks (exons) in the BED line. - if($cptField == 10){ - $bedOmniscent{$UniqID}{'blockCount'}=$field; - } - - # 11 blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount. - if($cptField == 11){ - $bedOmniscent{$UniqID}{'blockSizes'}=$field; - } - - # 12 blockStarts - A comma-separated list of block starts. All of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount. - if($cptField == 12){ - $bedOmniscent{$UniqID}{'blockStarts'}=$field; - } - } - } -} - - -### -# MANAGE BED OMNISCIEN FOR OUTPUT -foreach my $id (keys %bedOmniscent){ -# foreach my $key (keys %{$bedOmniscent{$id}}){ - - my $seq_id=$bedOmniscent{$id}{'chrom'}; - #my $source_tag; #fill at the beginning - #my $primary_tag; #fill at the beginning - my $start=$bedOmniscent{$id}{'chromStart'}; - my $end=$bedOmniscent{$id}{'chromEnd'}; - my $frame; - - my $score; - if(exists($bedOmniscent{$UniqID}{'score'})){ - $score=$bedOmniscent{$UniqID}{'score'}; - } - - my $strand; - if(exists($bedOmniscent{$UniqID}{'strand'})){ - $strand=$bedOmniscent{$UniqID}{'strand'}; - } - - my $feature = Bio::SeqFeature::Generic->new(-seq_id => $seq_id, - -source_tag => $source_tag, - -primary_tag => $primary_tag, - -start => $start, -end => $end , - -frame => $frame , - -strand =>$strand, - tag => {'ID' => $id} - ) ; - - if(exists($bedOmniscent{$id}{'name'})){ - $feature->add_tag_value('Name',$bedOmniscent{$id}{'name'}); - } - - $gffout->write_feature($feature); -} - - -close $fh; - - -__END__ - -=head1 NAME - -bed2gff.pl - -The script take a bed file as input, and will translate it in gff format. /!\ Not implemented in it's totality... - -=head1 SYNOPSIS - - ./bed2gff.pl --bed=infile.bed [ -o outfile ] - -=head1 OPTIONS - -=over 8 - -=item B<--bed> - -Input bed file that will be convert. - -=item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--gff> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Converter/cegma_gff2bed.pl b/annotation/Tools/Converter/cegma_gff2bed.pl deleted file mode 100755 index 24decacba..000000000 --- a/annotation/Tools/Converter/cegma_gff2bed.pl +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env perl - -## NBIS 2015 (www.nbis.se) -## jacques.dainat@nbis.se - -use strict; -use Getopt::Long; -use Bio::Tools::GFF; -use Pod::Usage; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--gff filename] - The name of the cegma gff file to convert. - - Ouput: - [--outfile filename] - The name of the output file (A bed file). - - At least the input cegma gff file is mandatory: - Usage: script.pl --gff infile.gff [--outfile outfile.bed] - - - This script allows to convert a gff file from cegma output to a bed file. -}; - -my $outfile = undef; -my $gff = undef; -my $attributes = undef ; -my $help; - -GetOptions( - "help" => \$help, - "gff=s" => \$gff, - "outfile=s" => \$outfile); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ( ! (defined($gff)) ){ - print $usage; - exit(0); -} - -## Manage output file -my $fh; -if ($outfile) { -open($fh, '>', $outfile) or die "Could not open file '$outfile' $!"; -} -else{ - $fh=\*STDOUT; -} - -### Read gff input file -open(my $fhIn, '<:encoding(UTF-8)', $gff) - or die "Could not open file '$gff' $!"; - -## Read line by line and save in hash with geneName as key. Allows to merge feature belonging to the same gene. -my %hash; -while (my $row = <$fhIn>) { - chomp $row; - my @tab = split( /\s/,$row); - if (lc($tab[2]) eq "exon"){ - if(exists($hash{$tab[8]})){ - push (@{$hash{$tab[8]}}, $row) - } - else{$hash{$tab[8]}=[$row]} - } -} -$fhIn->close(); - - -my $nbGene=keys %hash; -print "$nbGene genes read.\n"; - -# foreach gene we have a list of feature -foreach my $key (keys %hash){ - my @tabValues=@{$hash{$key}}; - my $nbValue=0; - my @listSizeStart; - #foreach fetaure of a gene - foreach my $value (@tabValues){ - #cut the gff feature - my ($chr,$tool,$type,$start,$stop,$score,$dir,$col8,$name) = split(/\s/,$value); - my $size=$stop-$start; - #In the case where we have only one feature - if ($#tabValues == 0){ - print $fh "$chr\t$start\t$stop\t$name\t$score\t$dir\t$start\t$stop\t0\t1\t$size,\t0\n"; - } - #In the case where we have several features we save information - elsif ($nbValue!=$#tabValues){ - push(@listSizeStart,[$size,$start]); - $nbValue++; - } - #In the case where we have several features we manage information kept and manage them to print the good final bed feature. - else{ - push(@listSizeStart,[$size,$start]); #save information needed of the last feature of the gene - my (@listSorted)=sortByPos(@listSizeStart); #sort the infromation - my $final_sizeList; my $final_startList; - my $cpt=0;my $originStart; - #foreach information save we will create correct strings for the output - foreach my $tabDuoRef (@listSorted){ - my ($res_size,$res_start)=@{$tabDuoRef}; - if($cpt==0){ $originStart=$res_start; $cpt++;} - my $start_corrected=$res_start-$originStart; - $final_sizeList.="$res_size,"; - $final_startList.="$start_corrected,"; - } - #print result in case where gene has several features - print $fh "$chr\t$start\t$stop\t$name\t$score\t$dir\t$start\t$stop\t0\t1\t$final_sizeList\t$final_startList\n"; - } - - } -} -$fh->close(); - - - - -#This function sort a tab of tab according to the second value of the sub-tab ... -sub sortByPos { - my (@featureList) = @_; - - my @featureListSorted = ( sort ({ $a->[1] <=> $b->[1] } @featureList)); - return @featureListSorted; -} diff --git a/annotation/Tools/Converter/embl2gff.pl b/annotation/Tools/Converter/embl2gff.pl deleted file mode 100755 index aaa772a79..000000000 --- a/annotation/Tools/Converter/embl2gff.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl -w - -## NBIS 2015 -## jacques.dainat@nbis.se - -## TO DO => Deal With sequences. Write the DNA sequence of the "source" primary tag within the output gff3 - - -use strict; -use Pod::Usage; -use Getopt::Long; -use Bio::Tools::GFF; -use Data::Dumper; -use Bio::SeqIO; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $embl = undef; -my $primaryTags = undef; -my $discard = undef; -my $keep = undef; -my $help; - -if( !GetOptions( - "help" => \$help, - "embl=s" => \$embl, - "ptag|t=s" => \$primaryTags, - "d|s" => \$discard, - "k" => \$keep, - "outfile|output|o|out|gff=s" => \$outfile)) -{ - pod2usage( { -message => "Failed to parse command line\n$header", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($embl)) ){ - pod2usage( { - -message => "$header\nMissing the --embl argument", - -verbose => 0, - -exitval => 1 } ); -} - -################## -# MANAGE OPTION # -if($discard and $keep){ - print "Cannot discard and keep the same primary tag. You have to choose if you want to discard it or to keep it.\n"; -} - -### If primaryTags given, parse them: - -my @listprimaryTags; -if ($primaryTags){ - @listprimaryTags= split(/,/, $primaryTags); - - if($discard){ - print "We will not keep the following primary tag:\n"; - foreach my $tag (@listprimaryTags){ - print $tag,"\n"; - } - } - elsif($keep){ # Attribute we have to replace by a new name - print "We will keep only the following primary tag:\n"; - foreach my $tag (@listprimaryTags){ - print $tag,"\n"; - } - } - else{print "You gave a list of primary tag wihtout telling me what you want I do with. Discard them or keep only them ?\n";} -} - - -################## -# MANAGE OUTPUT # -my $gff_out; -if ($outfile) { -open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $gff_out= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3); -} -else{ - $gff_out = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -### Read gb input file. -my $embl_in = Bio::SeqIO->new(-file => $embl, -format => 'embl'); - - -### MAIN ### - -while( my $seq_obj = $embl_in->next_seq) { - - for my $feat_obj ($seq_obj->get_SeqFeatures) { - my $skipit=undef; - - # In case we should discard some - if($discard){ - - foreach my $pTag (@listprimaryTags){ - if(lc($pTag) eq lc($feat_obj->primary_tag)){ - $skipit=1;last; - } - } - } - # In case we should keep only some - elsif($keep){ - my $skipit=1; - foreach my $pTag (@listprimaryTags){ - if(lc($pTag) eq lc($feat_obj->primary_tag)){ - $skipit=undef;last; - } - } - - } - - if(! $skipit){ - $gff_out->write_feature($feat_obj); - } - } -} - -__END__ - -=head1 NAME - -embl2gff.pl - -The script take a EMBL file as input, and will translate it in gff format. - -=head1 SYNOPSIS - - ./embl2gff.pl --embl infile.embl [ -o outfile ] - -=head1 OPTIONS - -=over 8 - -=item B<--embl> - -Input EMBL file that will be read - -=item B<-primary_tag>, B<--pt>, B<-t> - -List of "primary tag". Useful to discard or keep specific features. -The tags have to be separated by a coma. - -=item B<-d> - -Means that primary tags provided by the option "prinary_tag" will be discarded. - -=item B<-d> - -Means that only primary tags provided by the option "prinary_tag" will be kept. - -=item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--gff> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Converter/gff2bed.pl b/annotation/Tools/Converter/gff2bed.pl deleted file mode 100755 index d92e78d9a..000000000 --- a/annotation/Tools/Converter/gff2bed.pl +++ /dev/null @@ -1,190 +0,0 @@ -#!/usr/bin/env perl - -## NBIS 2015 (www.nbis.se) -## jacques.dainat@nbis.se - -use strict; -use warnings; -use Getopt::Long; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; -use Pod::Usage; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $attributes = undef ; -my $help; - -if( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "outfile|output|out|o=s" => \$outfile)) -{ - pod2usage( { -message => "Failed to parse command line.", - -verbose => 1, - -exitval => 1 } ); -} -# Print Help and exit -if ($help) { - pod2usage( { -message => "$header", - -verbose => 2, - -exitval => 2 } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameters is mandatory.\n", - -verbose => 0, - -exitval => 1 } ); -} - -## Manage output file -my $bedout; -if ($outfile) { -open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - #$bedout= Bio::FeatureIO->new(-fh => $fh, -format => 'bed' ); - $bedout=$fh; -} -else{ - #$bedout = Bio::FeatureIO->new(-fh => \*STDOUT, -format => 'bed'); - $bedout=\*STDOUT ; -} - -### Parse GTF input file -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -# END parsing - - -# NOT USED BECAUSE in Bio::FeatureIO::bed: -# my $block_count = ''; #not implemented, used for sub features -# my $block_sizes = ''; #not implemented, used for sub features -# my $block_starts = ''; #not implemented, used for sub feature -# ################# -# # == LEVEL 1 == # -# ################# -# foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # tag_l1 = gene or repeat etc... -# foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - -# #my feature is a Bio::SeqFeature::Generic -# my $feature_l1=$hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - -# #create a new Bio::SeqFeature::Annotated object; -# my $newObj = Bio::SeqFeature::Annotated->new(); -# #initialize this object with the contents of another feature -# $newObj->from_feature($feature_l1); -# #print the new object -# print $bedout->write_feature($newObj); -# } -# } - - -################# -# == LEVEL 1 == # -################# -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # tag_l1 = gene or repeat etc... - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - - my $feature_l1=$hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - my $size_l1 = $feature_l1->end - $feature_l1->start; - - print $bedout $feature_l1->seq_id."\t".$feature_l1->start."\t".$feature_l1->end."\t".$feature_l1->primary_tag."\t".$feature_l1->score."\t".$feature_l1->strand."\t".$feature_l1->start."\t".$feature_l1->end."\t0\t1\t".$size_l1."\t0\n"; - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - my $size_l2 = $feature_l2->end - $feature_l2->start; - print $bedout $feature_l2->seq_id."\t".$feature_l2->start."\t".$feature_l2->end."\t".$feature_l2->primary_tag."\t".$feature_l2->score."\t".$feature_l2->strand."\t".$feature_l2->start."\t".$feature_l2->end."\t0\t1\t".$size_l2."\t0\n"; - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc( $feature_l2->_tag_value('ID') ); - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$tag_l3}{$level2_ID} ) ){ - - my $first_feature_l3; - my $last_feature_l3; - my $final_sizeList; - my $final_startList; - - my $originStart; - my $cpt = 0 ; - my $nb_feat = 0 ; - my $score = 0 ; - - foreach my $feature_l3 ( sort { $a->start <=> $b->start } @{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}) { - - my $size_l3 = $feature_l3->end - $feature_l3->start; - - - if($cpt==0){ - $first_feature_l3 = $feature_l3; - $originStart = $feature_l3->start; - - $final_sizeList.="$size_l3"; - $final_startList.="0"; - $cpt++; - } - else{ - my $start_corrected= $feature_l3->start - $originStart; - $final_sizeList.=",$size_l3"; - $final_startList.=",$start_corrected"; - } - $last_feature_l3 = $feature_l3; - $nb_feat++; - $score += $feature_l3->score; - } - $score = $score/$nb_feat; - print $bedout $first_feature_l3->seq_id."\t".$first_feature_l3->start."\t".$last_feature_l3->end."\t".$first_feature_l3->primary_tag."\t".$score."\t".$first_feature_l3->strand."\t".$first_feature_l3->start."\t".$last_feature_l3->end."\t0\t1\t".$final_sizeList."\t".$final_startList."\n"; - } - } - } - } - } - } -} - -__END__ - -=head1 NAME - -gff2bed.pl - -The script take a gff3 file as input and convert it into bed. - -=head1 SYNOPSIS - - ./gff2bed.pl --gff file.gff [ -o outfile ] - ./gff2bed.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> - -Input GFF3 file that will be read - -=item B<--outfile>, B<--out>, B<--output>, or B<-o> - -File where will be written the result. If no output file is specified, the output will be written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Converter/gff2gtf.pl b/annotation/Tools/Converter/gff2gtf.pl deleted file mode 100755 index e31bc9a75..000000000 --- a/annotation/Tools/Converter/gff2gtf.pl +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env perl - -## NBIS 2015 -## jacques.dainat@nbis.se - -use strict; -use warning; -use Pod::Usage; -use Getopt::Long; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $att = undef; -my $nf =undef; -my $help; - -if( !GetOptions( - "help" => \$help, - "gff|in=s" => \$gff, - "a|att" => \$att, - "nf" => \$nf, - "outfile|output|o|out|gtf=s" => \$outfile)) -{ - pod2usage( { -message => "Failed to parse command line.", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -message => "$header", - -verbose => 2, - -exitval => 2 } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameters is mandatory.\n", - -verbose => 0, - -exitval => 1 } ); -} - -## Manage output file -my $gtf_out; -if ($outfile) { -open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $gtf_out = Bio::Tools::GFF->new(-fh => $fh, -gff_version => 2.5); -} -else{ - $gtf_out = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 2.5); -} - -###################### -### Parse GFF input # -### Read gff input file. -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); - -if(! $att){ - print_omniscient($hash_omniscient, $gtf_out); -} -else{ # rebuild gene_id and transcript_id feature; - - my $gene_id=undef; - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_tag_key_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - my $feature_level1=$hash_omniscient->{'level1'}{$primary_tag_key_level1}{$id_tag_key_level1}; - - # Gene ID level1 - my $gene_id_att=undef; - if($feature_level1->has_tag('gene_id')){ - $gene_id_att=$feature_level1->_tag_value('gene_id'); - } - - my $transcript_id=undef; - my $level3_gene_id=undef; - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { - - - - # Gene ID level2 - my $gene_id_mrna_att=undef; - if($feature_level2->has_tag('gene_id')){ - $gene_id_mrna_att=$feature_level2->_tag_value('gene_id'); - } - - my $transcript_id_mrna_att=undef; - if($feature_level2->has_tag('transcript_id')){ - $transcript_id_mrna_att=$feature_level2->_tag_value('transcript_id'); - } - - # get gff3 feature (ID) - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - my $level3_transcript_id=undef; - ################# - # == LEVEL 3 == # - ################# - - ############ - # Go through one time to check if gene_id and transcript_id are present and save them - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - - #Get level3 gene_id - if(! $level3_gene_id){ - if($feature_level3->has_tag('gene_id')){ - $level3_gene_id=$feature_level3->_tag_value('gene_id'); - } - } - - #Get level3 transcript_id - if(! $level3_transcript_id){ - if($feature_level3->has_tag('transcript_id')){ - $level3_transcript_id=$feature_level3->_tag_value('transcript_id'); - } - } - if($level3_gene_id and $level3_transcript_id){last;} - } - } - if($level3_gene_id and $level3_transcript_id){last;} - } - - ################# - # CHOOSE the gene_id. We take the first from level1 to level3. - if($gene_id_att){ - $gene_id=$gene_id_att; - } - elsif($gene_id_mrna_att){ - $gene_id=$gene_id_mrna_att - } - elsif($level3_gene_id){ - $gene_id=$level3_gene_id; - } - else{ # We didn't find any gene_id we will the ID of level1 as gene_id. - $gene_id=$feature_level1->_tag_value('ID'); - } - - ################# - # CHOOSE the transcript_id. We take the first from level2 to level3. - if($transcript_id_mrna_att){ - $transcript_id=$transcript_id_mrna_att; - } - elsif($level3_transcript_id){ - $transcript_id=$level3_transcript_id; - } - else{ # We didn't find any gene_id we will the ID of level2 as transcript_id. - $transcript_id=$feature_level2->_tag_value('ID'); - } - - ############## - # Second pass of level3 features - # Add gene_id and transcript_id to level3 feature that don't have this information - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - - #Check add gene_id - if(! $feature_level3->has_tag('gene_id')) { - $feature_level3->add_tag_value('gene_id', $gene_id); - } - elsif($feature_level3->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. - warn("We replace the transcript_id ".$feature_level3->_tag_value('gene_id')." by ".$gene_id.". Is it normal ?\n");exit; - $feature_level3->add_tag_value('gene_id', $gene_id); - } - #Check add transcript_id - if(! $feature_level3->has_tag('transcript_id')){ - $feature_level3->add_tag_value('transcript_id', $transcript_id); - } - elsif($feature_level3->_tag_value('transcript_id') ne $transcript_id){ #transcript_id different, we replace it. - warn("We replace the transcript_id ".$feature_level3->_tag_value('transcript_id')." by ".$transcript_id.". Is it normal ?\n");exit; - $feature_level3->add_tag_value('transcript_id', $transcript_id); - } - } - } - } - - ## add level2 missing information gene_id - if(! $feature_level2->has_tag('gene_id')) { - $feature_level2->add_tag_value('gene_id', $gene_id); - } - elsif($feature_level2->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. - warn("We replace the transcript_id ".$feature_level2->_tag_value('gene_id')." by ".$gene_id.". Is it normal ?\n");exit; - $feature_level2->add_tag_value('gene_id', $gene_id); - } - # add level2 missing information transcript_id - if(! $feature_level2->has_tag('transcript_id')){ - $feature_level2->add_tag_value('transcript_id', $transcript_id); - } - elsif($feature_level2->_tag_value('transcript_id') ne $transcript_id){ #gene_id transcript_id, we replace it. - warn("We replace the transcript_id ".$feature_level2->_tag_value('transcript_id')." by ".$transcript_id.". Is it normal ?\n");exit; - $feature_level2->add_tag_value('transcript_id', $transcript_id); - } - } - } - } - ## add level1 missing information gene_id - if(! $feature_level1->has_tag('gene_id')) { - $feature_level1->add_tag_value('gene_id', $gene_id); - } - elsif($feature_level1->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. - warn("We replace the transcript_id ".$feature_level1->_tag_value('gene_id')." by ".$gene_id.". Is it normal ?\n");exit; - $feature_level1->add_tag_value('gene_id', $gene_id); - } - } - } - # print results - print_omniscient($hash_omniscient, $gtf_out); -} - -if($nf){ - if($outfile){ - `cp $outfile tmp`; - `sed 's/ / /g' tmp > tmp2`; - `sed 's/ ;/;/g' tmp2 > $outfile`; - `rm tmp tmp2`; - } - else{print "!! option nf can be used only to write result in a file. Doesn't work with STDOUT\n";} -} -else{ - print "\nKeep in mind that the current format of attibutes/values of the 9th column is like that:\n". - "attribute1 value1 ; attribute2 value2\nSome tools (i.e Kraken) struggle with the space between and <;>. If you want to remove it relaunch the script using the option.\n"; -} - -print "Bye Bye\n"; - -__END__ - -=head1 NAME - -gff32gtf.pl - -The script take a gff file as input, and will translate it in gtf format. -Keep in mind that some bioperl versions forget to add the header (##gff-version 2) in the output. Check the output to add it if missing, it will avoid you troubles during your downstream analyses. - -=head1 SYNOPSIS - - ./gff2gtf.pl --gff=infile.gff [ -o outfile ] - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<--in> - -Input GFF file that will be read - -=item B<--att> or B<-a> - -With this option, attributes "gene_id" and "transcript_id" will be created when they are missing. - -=item B<--nf> - -With this option, attibutes/values of the 9th column are written "attribute1 value1; attribute2 value2" instead of "attribute1 value1 ; attribute2 value2". (The difference is the space before the semilicon) - -=item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--gtf> - -Output GTF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Converter/gff2table.pl b/annotation/Tools/Converter/gff2table.pl deleted file mode 100755 index ba9edfd80..000000000 --- a/annotation/Tools/Converter/gff2table.pl +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Bio::Tools::GFF; -use Time::Piece; -use Time::Seconds; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--gff filename] - The name of the file to read. - Ouput: - [--outfile filename] - The name of the output file. By default the output is the - standard output -}; - -my $outfile = undef; -my $gff = undef; -my $help; - -GetOptions( - "help" => \$help, - "gff=s" => \$gff, - "outfile=s" => \$outfile); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ($outfile) { - open(STDOUT, ">$outfile") or die("Cannot open $outfile"); -} - -open(GFF, "<$gff") || die("Can't open $gff."); - -my $gffio = Bio::Tools::GFF->new(-file => $gff, -gff_version => 3); -my $gffout = Bio::Tools::GFF->new(-gff_version => 3); - - -while( my $feature = $gffio->next_feature()) { - - if ( $feature->primary_tag =~ /gene/ or $feature->primary_tag =~ /mRNA/ ) { - - my $description = "" ; - my $dbxref = "" ; - - if ($feature->primary_tag eq "mRNA") { - - if ($feature->has_tag('description') ) { - my @values = $feature->get_tag_values('description'); - $description = join(",",@values) ; - } - - if ($feature->has_tag('Dbxref') ) { - my @values = $feature->get_tag_values('Dbxref') ; - $dbxref = join(",",@values) ; - } - - } - - my @id_values = $feature->get_tag_values('ID'); - my $id = shift @id_values; - - - print $feature->primary_tag . "\t" . $id . "\t" . $feature->seq_id . "\t" . $feature->start . "\t" . $feature->end . "\t" . $feature->strand . "\t" . $description . "\t" . $dbxref . "\n" ; - - } - -} - -$gffio->close(); - - -# -------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; -} - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -sub err { - msg(@_); - exit(2); -} - - diff --git a/annotation/Tools/Converter/gff2zff.pl b/annotation/Tools/Converter/gff2zff.pl deleted file mode 100755 index 27f9df9cb..000000000 --- a/annotation/Tools/Converter/gff2zff.pl +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env perl -use strict; - -my @exons; -my $gene_count = 0; -my $current_seq = ""; -while(my $line = ) -{ - if($line =~ m/^###/) - { - flush(\@exons); - next; - } - my @fields = split(/\t/, $line); - if($fields[2] eq "mRNA") - { - flush(\@exons); - } - elsif($fields[2] eq "exon") - { - if ($fields[0] ne $current_seq) - { - $current_seq = $fields[0]; - printf(">%s\n", $current_seq); - } - push(@exons, \@fields); - } -} -flush(); - -sub flush -{ - my $num_exons = scalar(@exons); - return if($num_exons == 0); - - my $group = sprintf("%s.%d", $exons[0]->[0], $gene_count); - $gene_count++; - - if($num_exons == 1) - { - my($start, $end) = ($exons[0]->[3], $exons[0]->[4]); - if($exons[0]->[6] eq "-") - { - ($start, $end) = ($exons[0]->[4], $exons[0]->[3]); - } - printf("Esngl\t%lu\t%lu\t%s\n", $start, $end, $group); - } - else - { - @exons = reverse(@exons) if($exons[0]->[6] eq "-"); - for(my $i = 0; $i < $num_exons; $i++) - { - my $exon_type = "Exon"; - if($i == 0) - { - $exon_type = "Einit"; - } - elsif($i == $num_exons - 1) - { - $exon_type = "Eterm"; - } - - my($start, $end) = ($exons[$i]->[3], $exons[$i]->[4]); - if($exons[0]->[6] eq "-") - { - ($start, $end) = ($exons[$i]->[4], $exons[$i]->[3]); - } - - printf("%s\t%lu\t%lu\t%s\n", $exon_type, $start, $end, $group); - } - } - @exons = (); -} diff --git a/annotation/Tools/Converter/gff3_sp_to_tabulated.pl b/annotation/Tools/Converter/gff3_sp_to_tabulated.pl deleted file mode 100755 index 95cabd1ef..000000000 --- a/annotation/Tools/Converter/gff3_sp_to_tabulated.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $help= 0; -my $primaryTag=undef; -my $attributes=undef; -my $opt_merge = undef; -my $opt_comonTag=undef; -my $opt_output=undef; -my $add = undef; -my $cp = undef; - -if ( !GetOptions( - "help|h" => \$help, - 'c|ct=s' => \$opt_comonTag, - "gff|f=s" => \$gff, - 'ml|merge_loci!' => \$opt_merge, - "output|outfile|out|o=s" => \$opt_output)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -# Manage Output -my $ostream = IO::File->new(); -if(defined($opt_output)) -{ -$ostream->open( $opt_output, 'w' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $opt_output, $! ) ); -} -else{ - $ostream->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - - - ##################### - # MAIN # - ##################### - -my $cpt_tag=1; -my %tag_to_number; -my %number_to_tag; -my $content; -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ - input => $gff, - locus_tag => $opt_comonTag, - merge_loci => $opt_merge - }); -print ("GFF3 file parsed\n"); - - -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - - my $feature_l1=$hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - - manage_attributes($feature_l1); - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - - manage_attributes($feature_l2); - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$tag_l3}{$level2_ID} ) ){ - foreach my $feature_l3 ( @{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}) { - manage_attributes($feature_l3); - } - } - } - } - } - } - } -} - - -print $ostream "seq_id\tsource_tag\tprimary_tag\tstart\tend\tscore\tstrand\tframe"; -foreach my $key (sort { $a <=> $b } keys %number_to_tag){ - print $ostream "\t".$number_to_tag{$key}; -} -print $ostream "\n".$content; -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub manage_attributes{ - my ($feature)=@_; - $content .= $feature->seq_id."\t".$feature->source_tag."\t".$feature->primary_tag."\t".$feature->start."\t".$feature->end."\t".$feature->score."\t".$feature->strand."\t".$feature->frame; - my @tag_list = $feature->get_all_tags(); - my %tag_hash; - foreach my $tag (@tag_list) { - $tag_hash{$tag}++; - } - - foreach my $key (sort { $a <=> $b } keys %number_to_tag){ - my $sorted_tag = $number_to_tag{$key}; - if( exists_keys(\%tag_hash,($sorted_tag))){ - my @values = $feature->get_tag_values($sorted_tag); - $content .= "\t".join(", ", @values); - delete $tag_hash{$sorted_tag}; - } - } - - foreach my $tag ( keys %tag_hash ) { - $tag_to_number{$tag} = $cpt_tag; - $number_to_tag{$cpt_tag} = $tag; - my @values = $feature->get_tag_values($tag); - $content .= "\t".join(", ", @values); - $cpt_tag++; - } - $content .= "\n"; -} - - - - -__END__ - -=head1 NAME - -gff3_sp_to_tabulated.pl - -The script take a gff3 file as input and writte a tabulated file. -Attribute's tag from the 9th column becomes title. - -=head1 SYNOPSIS - - ./gff3_sp_to_tabulated.pl -gff file.gff [ -o outfile ] - ./gff3_sp_to_tabulated.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<-c> or B<--ct> - -When the gff file provided is not correcly formated and features are linked to each other by a comon tag (by default locus_tag), this tag can be provided to parse the file correctly. - -=item B<--ml> or B<--merge_loci> - -Merge loci parameter, default deactivated. You turn on the parameter if you want to merge loci into one locus when they overlap. -(at CDS level for mRNA, at exon level for other level2 features. Strand has to be the same). Prokaryote can have overlaping loci so it should not use it for prokaryote annotation. -In eukaryote, loci rarely overlap. Overlaps could be due to error in the file, mRNA can be merged under the same parent gene if you acticate the option. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Converter/gxf_to_gff3.pl b/annotation/Tools/Converter/gxf_to_gff3.pl deleted file mode 100755 index 6d8e0236a..000000000 --- a/annotation/Tools/Converter/gxf_to_gff3.pl +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $opt_gfffile; -my $opt_merge = undef; -my $opt_comonTag=undef; -my $opt_verbose = 1; -my $opt_no_check = undef; -my $opt_output; -my $opt_expose_feature_levels = undef; -my $opt_help = 0; -my $opt_version_input = undef; -my $opt_version_output = 3; - -# OPTION MANAGMENT -my @copyARGV=@ARGV; -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'c|ct=s' => \$opt_comonTag, - 'v=i' => \$opt_verbose, - 'o|output=s' => \$opt_output, - 'efl|expose!' => \$opt_expose_feature_levels, - 'nc|no_check!' => \$opt_no_check, - 'gff_version_input|gvi=f' => \$opt_version_input, - 'gff_version_output|gvo=f' => \$opt_version_output, - 'ml|merge_loci!' => \$opt_merge, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if (! defined($opt_gfffile) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (-g).\n\n". - "Ouptut is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -#print perl version -print "-------------------------------------------------------------------------------\n"; -print "This script is being run by perl ".$^V."\n"; -print "Bioperl location being used: ".substr($INC{"Bio/Tools/GFF.pm"}, 0 , -12)."\n"; -print "-------------------------------------------------------------------------------\n"; -############################# -# check version input value # -check_version($opt_version_input); -#check_version($opt_version_output); - -###################### -# Manage output file # -my $gffout; -if ($opt_output) { - open(my $fh, '>', $opt_output) or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => $opt_version_output ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => $opt_version_output); -} - - ##################### - # MAIN # - ##################### - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ - input => $opt_gfffile, - locus_tag => $opt_comonTag, - gff_version => $opt_version_input, - verbose => $opt_verbose, - merge_loci => $opt_merge, - no_check => $opt_no_check, - expose_feature_levels => $opt_expose_feature_levels - }); -print ("GFF3 file parsed\n"); - -### -# Print result - -print_omniscient($hash_omniscient, $gffout); #print gene modified - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "usage: $0 @copyARGV\n"; -print "Job done in $run_time seconds\n"; - - -sub check_version{ - my ($version) = @_; - if($version and ($version != 1 and $version != 2 and $version != 3)){ - print "Gff version accepted is 1,2 or 3. $version is not a correct value.\n"; - exit; - } -} - -__END__ -=head1 NAME - -gff3_IO.pl - -This script read and print a gff file. It will be read by GFF3::Omniscient parser that will look for duplicate features, duplicate IDs and will print the features sorted. -The result is written to the specified output file, or to STDOUT. - -=head1 SYNOPSIS - - ./gxf_to_gff3.pl -g infile.gff [ -o outfile ] - ./gxf_to_gff3.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-c> or B<--ct> - -When the gff file provided is not correcly formated and features are linked to each other by a comon tag (by default locus_tag), this tag can be provided to parse the file correctly. - -=item B<--efl> or B<--expose> - -If you want to see, add or modified the feature relationships you will have to use this option. -It will copy past in you working directory the json files used to define the relation between feature types and their level organisation. -Typical level organisation: Level1 => gene; Level2 => mRNA; level3 => exon,cds,utrs -If you get warning from the Omniscient parser that a feature relationship is not defined, you can provide information about it within the exposed json files. -Indeed, if the json files exists in your working directory, they will be used by default. - -=item B<--ml> or B<--merge_loci> - -Merge loci parameter, default deactivated. You turn on the parameter if you want to merge loci into one locus when they overlap. -(at CDS level for mRNA, at exon level for other level2 features. Strand has to be the same). Prokaryote can have overlaping loci so it should not use it for prokaryote annotation. -In eukaryote, loci rarely overlap. Overlaps could be due to error in the file, mRNA can be merged under the same parent gene if you acticate the option. - -=item B<-v> - -Verbose option. To modify vefbosity. Default 1. 0 is quiet, 2 and 3 are increasing verbosity. - -=item B<--nc> or B<--no_check> - -To deacticate all check that can be performed by the parser (e.g fixing UTR, exon, coordinates etc...) - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<--gvi> or B<--gff_version_input> - -If you don't want to use the autodection of the gff/gft version you give as input, you can force the tool to use the parser of the gff version you decide to use: 1,2,2.5 or 3. Remind: 2.5 is suposed to be gtf. - -=item B<--gvo> or B<--gff_version_output> - -If you don't want to use the autodection of the gff/gft version you give as input, you can force the tool to use the parser of the gff version you decide to use: 1,2,2.5 or 3. Remind: 2.5 is suposed to be gtf. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Converter/ips2genomic.pl b/annotation/Tools/Converter/ips2genomic.pl deleted file mode 100755 index b9d44eeb9..000000000 --- a/annotation/Tools/Converter/ips2genomic.pl +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env perl - -#----------------------------------------------------------------------- -# Reads mRNA and CDS data from a GFF file and then maps the output from -# InterProScan to genomic locations given InterProScan results in TSV -# format. -# - -use strict; -use warnings; -use Getopt::Long; -use IO::File; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - List of all available databases - [--gff_file] - A GFF file with gene models (reference data) - [--ips_file] - An Interpro TSV formatted annotation -}; - -my $gff_file = undef; -my $ips_file = undef; -my $help; - -GetOptions( - "help" => \$help, - "gff_file=s" => \$gff_file, - "ips_file=s" => \$ips_file); - -sub parse_gff_line -{ - my ($line) = @_; - - my @fields = split( /\t/, $line ); - - my %entry; - - $entry{'seqid'} = $fields[0]; - $entry{'type'} = $fields[2]; - $entry{'start'} = $fields[3]; - $entry{'end'} = $fields[4]; - $entry{'strand'} = $fields[6]; - - $fields[8] =~ /ID=([^;]+)/; - $entry{'id'} = $1; - - $fields[8] =~ /Parent=([^;]+)/; - $entry{'parent'} = $1; - - return \%entry; -} - -my $gff_in = IO::File->new($gff_file) - or die( sprintf( "Unable to open GFF file '%s' for reading:\n%s\n", - $gff_file, $! ) ); - -my $ips_in = IO::File->new($ips_file) - or die( sprintf( "Unable to open " - . "InterProScan result file '%s' for reading:\n%s\n", - $ips_file, $! ) ); - -my %mRNA; - -print( STDERR ">> Reading GFF file...\n" ); -while ( my $line = $gff_in->getline() ) { - chomp($line); - - my $entry = parse_gff_line($line); - - # Only care about the 'mRNA' and 'CDS' entries. Hook all CDS - # entries up to the corresponding mRNA. - if ( $entry->{'type'} eq 'mRNA' ) { - $mRNA{ $entry->{'id'} } = $entry; - } - elsif ( $entry->{'type'} eq 'CDS' ) { - if ( exists( $mRNA{ $entry->{'parent'} } ) ) { - push( @{ $mRNA{ $entry->{'parent'} }{'CDS'} }, $entry ); - } - else { - die( sprintf( "No parent '%s' for CDS '%s'", - $entry->{'parent'}, $entry->{'id'} ) ); - } - } -} - -$gff_in->close(); - -print( STDERR ">> Calculating CDS coordinates...\n" ); -foreach my $mRNA_entry ( values(%mRNA) ) { - my $cds_length_sum = 0; - - $mRNA_entry->{'CDS'} = [ sort { $a->{'start'} <=> $b->{'start'} } - @{ $mRNA_entry->{'CDS'} } ]; - - foreach my $cds_entry ( @{ $mRNA_entry->{'CDS'} } ) { - my $cds_length = $cds_entry->{'end'} - $cds_entry->{'start'} + 1; - - $cds_entry->{'CDS_start'} = $cds_length_sum + 1; - $cds_entry->{'CDS_end'} = $cds_length_sum + $cds_length; - - $cds_length_sum += $cds_length; - } -} - -print( STDERR ">> Processing InterProScan results...\n" ); -while ( my $line = $ips_in->getline() ) { - chomp($line); - - my @ips_fields = split( /\t/, $line ); - - my $mRNA_id = $ips_fields[0]; - if ( !exists( $mRNA{$mRNA_id} ) ) { - warn( - sprintf( "mRNA ID '%s' not found in GFF file, skipping\n", $mRNA_id ) ); - next; - } - - my $mRNA_entry = $mRNA{$mRNA_id}; - - my $hit_cds_start = 3 * ( $ips_fields[6] - 1 ) + 1; - my $hit_cds_end = 3 * ( $ips_fields[7] - 1 ) + 1; - - my $hit_start; - my $hit_end; - my @hit_coords; # List of "match_part" coordinates. - - # Go through the CDS entries for this mRNA until we've found the - # correct one for both hit start and hit end. - foreach my $cds_entry ( @{ $mRNA_entry->{'CDS'} } ) { - if ( $hit_cds_start >= $cds_entry->{'CDS_start'} - && $hit_cds_start <= $cds_entry->{'CDS_end'} ) - { - # Start of hit is in this CDS. - $hit_start = $cds_entry->{'start'} + - ( $hit_cds_start - $cds_entry->{'CDS_start'} ); - - # This "match_part" starts part way into this CDS. Its end - # is still unknown. - @hit_coords = ( [ $hit_start, undef ] ); - } - elsif ( defined($hit_start) ) { - - # CDS for hit start has been found already and this CDS is - # part of the hit, so this "match_part" starts at the start - # of the CDS. Its end is still unknown. - push( @hit_coords, [ $cds_entry->{'start'}, undef ] ); - } - - if ( $hit_cds_end >= $cds_entry->{'CDS_start'} - && $hit_cds_end <= $cds_entry->{'CDS_end'} ) - { - # End of hit is in this CDS. - $hit_end = - $cds_entry->{'start'} + ( $hit_cds_end - $cds_entry->{'CDS_start'} ); - - # Complete the last "match_part" by filling in the hit end, - # which is part way into this CDS. - $hit_coords[-1][1] = $hit_end; - - last; # Done with this protein match. - } - elsif ( defined($hit_start) ) { - - # This CDS is part of the hit, so this "match_part" ends at - # the end of the CDS. - $hit_coords[-1][1] = $cds_entry->{'end'}; - } - } ## end foreach my $cds_entry ( @{ ...}) - - if ( !defined($hit_start) || !defined($hit_end) ) { - die( sprintf( "\nProtein match falls outside of CDS for %s::%s on %s\n", - $ips_fields[3], $ips_fields[4], $mRNA_id ) ); - } - - my $feature_id = sprintf( "%s:%s:%s", - $mRNA_entry->{'id'}, $ips_fields[3], - $ips_fields[4] ); - - # Output the "protein_match" feature. - printf( "%s\tinterproscan\tprotein_match\t%d\t%d\t%g\t%s\t.\t" - . "ID=%s;" - . "Name=%s:%s;" - . "description=%s\n", - $mRNA_entry->{'seqid'}, $hit_coords[0][0], - $hit_coords[-1][1], 0, - $mRNA_entry->{'strand'}, $feature_id, - $ips_fields[3], $ips_fields[4], - $ips_fields[5] ); - - # Output the "match_part" features. - my $count = 0; - foreach my $coords (@hit_coords) { - printf( "%s\tinterproscan\tmatch_part\t%d\t%d\t%g\t%s\t.\t" - . "ID=%s:%d;" - . "Name=%s:%s;" - . "Parent=%s\n", - $mRNA_entry->{'seqid'}, $coords->[0], $coords->[1], - $ips_fields[8], $mRNA_entry->{'strand'}, $feature_id, - ++$count, $ips_fields[3], $ips_fields[4], - $feature_id ); - } - -} ## end while ( my $line = $ips_in...) - -$ips_in->close(); - diff --git a/annotation/Tools/Converter/jgi2gff3.pl b/annotation/Tools/Converter/jgi2gff3.pl deleted file mode 100755 index de0168594..000000000 --- a/annotation/Tools/Converter/jgi2gff3.pl +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env perl -# jgi2gff.pl ; from gtf2gff.pl -# d.gilbert; 2006 - update 2007 for stop_codon insert to CDS - -use strict; - -=item input jgi gff - -note not same as gtf; stop_codon is contained in last CDS - -scaffold_1 JGI exon 102936 103037 . + . name "estExt_fgenesh1_pg.C_10005"; transcriptId 219910 -scaffold_1 JGI CDS 102936 103037 . + 0 name "estExt_fgenesh1_pg.C_10005"; proteinId 219910; exonNumber 1 -scaffold_1 JGI start_codon 102936 102938 . + 0 name "estExt_fgenesh1_pg.C_10005" -scaffold_1 JGI exon 103597 103794 . + . name "estExt_fgenesh1_pg.C_10005"; transcriptId 219910 -scaffold_1 JGI CDS 103597 103794 . + 0 name "estExt_fgenesh1_pg.C_10005"; proteinId 219910; exonNumber 2 -scaffold_1 JGI exon 104011 104369 . + . name "estExt_fgenesh1_pg.C_10005"; transcriptId 219910 -scaffold_1 JGI CDS 104011 104331 . + 0 name "estExt_fgenesh1_pg.C_10005"; proteinId 219910; exonNumber 3 -scaffold_1 JGI stop_codon 104329 104331 . + 0 name "estExt_fgenesh1_pg.C_10005" - -to: -scaffold_1 JGI gene 59340 60199 . + . ID=fgenesh1_pg.C_scaffold_1000003;trI -D=Dappu1_FM5_93892 -scaffold_1 JGI mRNA 59340 60199 . + . ID=Dappu1_FM5_93892;Parent=fgenesh1_p -g.C_scaffold_1000003 -scaffold_1 JGI exon 59340 59370 . + . Parent=Dappu1_FM5_93892;ni=15 -scaffold_1 JGI CDS 59340 59370 . + 0 Parent=Dappu1_FM5_93892;ni=16 -scaffold_1 JGI exon 59491 59620 . + . Parent=Dappu1_FM5_93892;ni=17 -scaffold_1 JGI CDS 59491 59620 . + 1 Parent=Dappu1_FM5_93892;ni=18 -scaffold_1 JGI exon 59944 60199 . + . Parent=Dappu1_FM5_93892;ni=19 -scaffold_1 JGI CDS 59944 60199 . + 2 Parent=Dappu1_FM5_93892;ni=20 - - -=cut - - -my $idprefix="JGI_V11_"; # Fixme: option - -my $suf=".gff"; -my %renameft = ( -'5UTR' => 'five_prime_UTR', -'3UTR' => 'three_prime_UTR', -); -my %dropft = ( -'start_codon' => 1,'stop_codon' => 1, # these are all subsumed by CDS/UTR ? -); -my %renamea = ( -#'gene_id' => 'Parent', -#'name' => 'Name', -'transcriptId' => 'Parent', # to gene -'proteinId' => 'Parent', # to mRNA -'exonNumber' => 'ni', -'transcript_id' => 'Parent', -); -my %dropa = ( -#'transcript_id' => 1, -'gene_id' => 1, -'name' => 1, # not for exons; move to gene ID -); - -my ($gid,$tid,$lgid,$ltid, $llgid, $xid, $gname, $lgname, @gv); -my ($stopb,$stope,$stopo)=(0,0,0); - -die "usage: jgi2gff.pl files.jgi-gff : convert to files.gff version 3\n" - if(!@ARGV || $ARGV[0] =~ /^\-/); - -my ($gidc,$tidc) = (0) x 10; - -foreach my $in (@ARGV) { - my $out= $in; $out =~ s/.gz$//; - unless( $out =~ s/\.\w+$/$suf/ && $out ne $in) { $out.= $suf; } - - my $ok= ($in =~ /\.gz$/) ? open(IN,"gunzip -c $in|") : open(IN,$in); - die "open $in" unless($ok); - rename($out,"$out.old") if(-e $out); - open(OUT,">$out") or die "write $out"; - - $xid=0; # exon-id; helpful - print OUT "##gff-version 3\n"; - while(){ - if(/^#/ && !/##gff/) { print OUT; next; } - next unless(/^\w/); - chomp; - my @v=split"\t"; - - my $isrev= ($v[6] eq '-'); - $gidc++ if(($isrev and $v[2] eq 'stop_codon') or (!$isrev and $v[2] eq 'start_codon') ) ; - - ($stopb,$stope,$stopo)= ($v[3],$v[4],$v[6]) if($v[2] eq 'stop_codon'); - next if ($dropft{$v[2]}); # 07apr: need to add stop_codon loc to last CDS **** - - $v[2]=$renameft{$v[2]} || $v[2]; - my @at=split( /\s*;\s*/, $v[8]); - my @an=(); - $tid= $gid= 0; # $gidc; #?? - foreach (@at) { - my ($k,$v)= split " ",$_,2; - $v=~ s/"//g; - if($k eq 'name') { - $gid= $v; - if($gid =~ s/[^\w\.-]/_/g){ $gname= $v; } # $k= "Name"; - else { $gname=""; } - # keep this one, name is dropped; BUT only for gene entry, not exons - } - $tid= $v= $idprefix .$v if($k eq 'transcriptId'); - $tid= $v= $idprefix .$v if($k eq 'proteinId'); # not always same? - $k= $renamea{$k} || $k; - push(@an, "$k=$v") unless( $dropa{$k}); - } - $v[8]= join(";",@an); - if(!$gid and !$tid) { $gid= $tid= $gidc; } - elsif (!$gid) { $gid= $tid; } - - # print OUT join("\t",@v),"\n"; # save gene models; adjust stop_codon - printGene($lgid, $ltid, \@gv, $lgname) if($tid ne $ltid) ; - - push(@gv, \@v); - ($lgid,$ltid,$lgname)=($gid,$tid,$gname); - } - - printGene($lgid, $ltid, \@gv, $lgname) ; - - close(OUT); close(IN); -} - -sub printGene { - my($gid, $tid, $rgv, $gname)= @_; - return unless (@gv); - - my ($tb,$te)=(0,0); - my $isrev= ($stopo eq '-' || $stopo < 0); - foreach my $g (@gv) { - -# not for jgi gff -# if($$g[2] eq 'CDS' && $isrev && $stope == $$g[3]-1) { $$g[3]= $stopb; } -# elsif($$g[2] eq 'CDS' && !$isrev && $stopb == $$g[4]+1) { $$g[4]= $stope; } - -# if($$g[8] !~ m/;ni=/ && ($$g[2] eq 'CDS' || $$g[2] eq 'exon')) { -# $xid++; $$g[8] =~ s/$/;ni=$xid/ ; -# } - - # also need to add gene, mRNA lines .... - $tb= $$g[3] if($tb > $$g[3] || $te==0); - $te= $$g[4] if($te < $$g[4]); - } - - my @tr= @{$gv[0]}; - $tr[2]= "mRNA"; - $tr[5]= "."; # score - $tr[7]= "."; # phase - $tr[3]= $tb; $tr[4]= $te; - $tr[8] =~ s/Parent=/Parent=$gid;ID=/; - $tr[8] =~ s/;ni=\w+//; - my $tr= join("\t",@tr); - my $gn=""; - if($gid ne $llgid) { - $tr[8] =~ s/ID=/trID=/; - $tr[8] =~ s/Parent=/ID=/; - $tr[8] =~ s/$/;Name=$gname/ if($gname); - $tr[2]= "gene"; - $gn= join("\t",@tr); - } - $llgid= $gid; - - print OUT join("\n", $gn, $tr, map { join("\t",@$_);} @gv),"\n" ; - @gv=(); ($stopb,$stope,$stopo)=(0,0,0); -} - - - -=item GTF - - ref: http://mblab.wustl.edu/GTF2.html - ===== GTF stop_codon is outside of CDS exon, but before UTR ** ====== - - chr1 UCSC start_codon 914833 914835 0 + . gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 914833 914983 0 + 0 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 916667 916829 0 + 2 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 917659 917774 0 + 1 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 917933 918011 0 + 2 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 918082 918581 0 + 1 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 918776 918900 0 + 2 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 919221 919331 0 + 0 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC CDS 919431 919673 0 + 0 gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC stop_codon 919674 919676 0 + . gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC 3UTR 919677 920104 0 + . gene_id "NM_152486"; transcript_id "NM_152486.a"; - chr1 UCSC 5UTR 936110 936216 0 + . gene_id "NM_198317"; transcript_id "NM_198317.a"; - - .... - AB000123 Twinscan CDS 193817 194022 . - 2 gene_id "AB000123.1"; transcript_id "AB00123.1.2"; - AB000123 Twinscan CDS 199645 199752 . - 2 gene_id "AB000123.1"; transcript_id "AB00123.1.2"; - AB000123 Twinscan CDS 200369 200508 . - 1 gene_id "AB000123.1"; transcript_id "AB00123.1.2"; - AB000123 Twinscan CDS 215991 216028 . - 0 gene_id "AB000123.1"; transcript_id "AB00123.1.2"; - AB000123 Twinscan start_codon 216026 216028 . - . gene_id "AB000123.1"; transcript_id "AB00123.1.2"; - AB000123 Twinscan stop_codon 193814 193816 . - . gene_id "AB000123.1"; transcript_id "AB00123.1.2"; - - -=cut - - -## this isn't helpful; use plain perl -__END__ -use strict; -use Bio::Tools::GFF; -use Bio::FeatureIO; -use Getopt::Long; - -my ($output,$input,$format,$type,$help,$cutoff,$sourcetag,$comp, - $gffver,$match,$quiet); -$format = 'gtf'; # by default -$gffver = 3; -# GTF, is also known as GFF v2.5 - -GetOptions( - 'i|input:s' => \$input, - 'o|output:s' => \$output, - 'f|format:s' => \$format, - 'v|version:i'=> \$gffver, - 'q|quiet' => \$quiet, - 'h|help' => sub{ exec('perldoc',$0); exit(0) }, - ); - -# if no input is provided STDIN will be used -my $parser = new Bio::Tools::GFF(-gff_version => 2.5, -file => $input); -#my $parser = new Bio::FeatureIO(-format => $format, -file => $input); - -my $out; -if( defined $output ) { - $out = new Bio::Tools::GFF(-gff_version => $gffver, -file => ">$output"); - #$out = new Bio::FeatureIO(-format => 'gff', version => $gffver, -file => ">$output"); -} else { - $out = new Bio::Tools::GFF(-gff_version => $gffver); # STDOUT - #$out = new Bio::FeatureIO(-format => 'gff', version => $gffver); # STDOUT -} - -while( my $result = $parser->next_feature ) { - $out->write_feature($result); - } -__END__ - - -set dp=dana -set species=ananassae -set dpid=${dp}_caf051209 -set scd=$sc/${dp}3 - -$gbl/bin/lu_bulk_load_gff.pl -create -java $gbl/lib/java \ --data lucene-dana_caf1annot \ -$scd/*trna*.gff $scd/*nscan*gff.gz >& log.lu.dana_caf051209 - diff --git a/annotation/Tools/Converter/makergff2evm.pl b/annotation/Tools/Converter/makergff2evm.pl deleted file mode 100755 index 36c878ae0..000000000 --- a/annotation/Tools/Converter/makergff2evm.pl +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env perl - -# A filter for Uniprot and RefSeq fasta files that makes the fasta -# headers a bit more terse. Reads from STDIN, writes to STDOUT. -# - -use strict; -use warnings; - -my $first_line = ; -chomp($first_line); - - -my %parsers = ( - 'null' => sub { - my ($line) = @_; - return $line; - }, - - 'est' => sub { - my ($line) = @_; - - $line =~ s/match_part/EST_match/g ; - - return $line; - - }, - 'protein' => sub { - my ($line) = @_; - - $line =~ s/match_part/nucleotide_to_protein_match/g ; - - return $line; - } ); - -my $parser = 'null'; - -if ( $first_line =~ /^.*est2genome.*/ ) { $parser = 'est'; } -elsif ( $first_line =~ /^.*Cufflinks.*/ ) { $parser = 'est'; } -elsif ( $first_line =~ /^.*protein2genome.*/ ) { $parser = 'protein'; } - -print( $parsers{$parser}($first_line), "\n" ); - -while ( my $line = ) { - chomp($line); - next unless ($line =~ /match_part/ ); - print $parsers{$parser}($line), "\n"; -} diff --git a/annotation/Tools/Converter/mfannot2gff.pl b/annotation/Tools/Converter/mfannot2gff.pl deleted file mode 100755 index 73d15c0c7..000000000 --- a/annotation/Tools/Converter/mfannot2gff.pl +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/perl - -# Convert Mfannot output file to GFF3 format -# kbseah@mpi-bremen.de 2015-04-01 -# modified by jacques dainat 2017-11: jacques.dainat@nbis.se - -use strict; -use warnings; -use Getopt::Long; -use Pod::Usage; -use Data::Dumper; - -my $mfannot_file; -my $gff_file; -my %startend_hash; # Stores start and end positions of each feature reported -#my %end_hash; # Stores end positions of each feature reported -my %contig_hash; # Stores contig each feature falls on -my %gencode_hash; - -GetOptions( - 'mfannot|m|i=s' => \$mfannot_file, - 'gff|g|o=s' => \$gff_file, - 'help|h' => sub { pod2usage( -exitstatus=>2, -verbose=>2 ); }, - 'man' => sub { pod2usage(-exitstatus=>0, -verbose=>2); } -) or pod2usage ( -exitstatus=>2, -verbose=>2 ); - -if (!defined $mfannot_file) { - pod2usage( -message=>"Insufficient options supplied", -exitstatus=>2 ); -} - -## MAIN ############################################################## - -read_mfannot($mfannot_file); -write_gff($gff_file); - -## SUBROUTINES ####################################################### - -sub usage { - print STDERR "Convert Mfannot Masterfile to GFF3 format\n"; - print STDERR "\n"; - print STDERR "Usage: perl mfannot2gff.pl -m input.new -g output.gff \n"; - print STDERR "\n"; - exit(); -} - -sub read_mfannot { - my $current_contig; # Track the current contig - my $current_genetic_code; # Track current genetic code - my $current_pos=1; # Track current position - #my $current_feature; # Track current feature - #my $current_startend; # Track current feature start/end - #my $current_leftright; - my $current_comment; # Track current commentfield - my $writeflag=0; - my $previousSplit1=""; - my $previousSplit2=""; - - open(INPUT, "<", "$_[0]") or die ("$!\n"); - # Open Mfannot file for reading - while () { - chomp; - if ($_ =~ /^>(.*) gc=(\d+)/) { - # If a header line, update the current contig and genetic code - ($current_contig, $current_genetic_code) = ($1, $2); - $current_pos=1; # Reset the position counter - $gencode_hash{$current_contig} = $current_genetic_code; - } - elsif ($_ =~ /^\s+(\d+)\s+([ATCGatcgNn]+)/) { - # If line is a numbered sequence line - my ($pos_begin,$seqline) = ($1, $2); # Sequence position - $current_pos = length($seqline) + $pos_begin - 1; - } - elsif ($_ =~ /^;+\s+G-(\w.*)/) { - # If line is a feature boundary, save that information - my @splitline = split /\s/, $1; - #push (@{$contig_hash{$current_contig}}, substr($splitline[0],2)); - $contig_hash{$current_contig}{$splitline[0]} = 1; - - - if ($splitline[1] eq "<==" && $splitline[2] eq "start" ) { - if (defined $startend_hash{$splitline[0]}{"start"}) { - - if ($previousSplit1 eq $splitline[1] and $previousSplit2 eq $splitline[2]){ #keep the first key and the second value - my $i = keys %{$startend_hash{$splitline[0]}{"start"}}; - $startend_hash{$splitline[0]}{"start"}{$i-1} = $current_pos; - print STDERR "11 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - next; - } - - my $i = keys %{$startend_hash{$splitline[0]}{"start"}}; - $startend_hash{$splitline[0]}{"start"}{$i} = $current_pos; - print STDERR "1 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - } - else { $startend_hash{$splitline[0]}{"start"}{0} = $current_pos; } - } - elsif ($splitline[1] eq "==>" && $splitline[2] eq "end" ) { - if (defined $startend_hash{$splitline[0]}{"end"}{0}) { - - if ($previousSplit1 eq $splitline[1] and $previousSplit2 eq $splitline[2]){ #keep the first key and the second value - my $i = keys %{$startend_hash{$splitline[0]}{"end"}}; - $startend_hash{$splitline[0]}{"end"}{$i-1} = $current_pos; - print STDERR "22 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - next; - } - - my $i = keys %{$startend_hash{$splitline[0]}{"end"}}; - $startend_hash{$splitline[0]}{"end"}{$i} = $current_pos; - print STDERR "2 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - } - else { $startend_hash{$splitline[0]}{"end"}{0} = $current_pos; } - - } - elsif ($splitline[1] eq "==>" && $splitline[2] eq "start") { - if (defined $startend_hash{$splitline[0]}{"start"}{0}) { - - if ($previousSplit1 eq $splitline[1] and $previousSplit2 eq $splitline[2]){ - print STDERR "3 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - next; - } #keep the first key and the first value - - my $i = keys %{$startend_hash{$splitline[0]}{"start"}}; - $startend_hash{$splitline[0]}{"start"}{$i} = $current_pos + 1; - print STDERR "3 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - } - else { $startend_hash{$splitline[0]}{"start"}{0} = $current_pos + 1; } - } - elsif ($splitline[1] eq "<==" && $splitline[2] eq "end") { - if (defined $startend_hash{$splitline[0]}{"end"}{0}) { - - if ($previousSplit1 eq $splitline[1] and $previousSplit2 eq $splitline[2]){ - print STDERR "44 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - next; - } #keep the first key and the first val - - my $i = keys %{$startend_hash{$splitline[0]}{"end"}}; - $startend_hash{$splitline[0]}{"end"}{$i} = $current_pos + 1; - print STDERR "4 - Feature ". $splitline[0]. " already defined. Please manually verify in $mfannot_file\n"; - } - else { $startend_hash{$splitline[0]}{"end"}{0} = $current_pos + 1; } - } - else { print STDERR "Exception to possible combination of feature boundaries and directions: $_ \n"; } - $previousSplit1=$splitline[1]; - $previousSplit2=$splitline[2]; - } - } - close(INPUT); -} - -sub write_gff { - open(GFF, ">", "$_[0]") or die ("$!\n"); - print GFF "##gff-version 3\n"; # header line - foreach my $thecontig (keys %contig_hash) { - foreach my $thefeature (keys %{$contig_hash{$thecontig}}) { - - foreach my $featureNB (keys %{$startend_hash{$thefeature}{"start"}}) { - my $featuretype; - if ($thefeature =~ /^rnl/ | $thefeature =~ /^rns/) { $featuretype="rRNA"; } - elsif ($thefeature =~ /^trn/) { $featuretype = "tRNA"; } - else {$featuretype="CDS";} - my $featuredir; - my $frame; - my $start; - my $end; - if ($startend_hash{$thefeature}{"end"}{$featureNB} < $startend_hash{$thefeature}{"start"}{$featureNB}) { - $featuredir = "-"; - $start = $startend_hash{$thefeature}{"end"}{$featureNB}; - $end = $startend_hash{$thefeature}{"start"}{$featureNB}; - } else { - $featuredir="+"; - $start = $startend_hash{$thefeature}{"start"}{$featureNB}; - $end = $startend_hash{$thefeature}{"end"}{$featureNB}; - } - if ($featuretype eq "CDS") { $frame="0"; } else { $frame = "."; } - my @gff3_line = ($thecontig, - "mfannot", - $featuretype, - $start, - $end, - ".", - $featuredir, - $frame, - "ID=$thefeature;Name=$thefeature;transl_table=$gencode_hash{$thecontig};gene=$thefeature" - ); - print GFF join ("\t", @gff3_line)."\n"; - } - } - } - close (GFF); -} - -=head1 NAME - -mfannot2gff.pl - Convert MFannot Masterfile to GFF3 format - -=head1 SYNOPSIS - -perl mfannot2gff.pl -m -g -perl mfannot2gff.pl --help - -=head1 DESCRIPTION - -Conversion utility for MFannot "masterfile" annotation produced by the MFannot -pipeline (http://megasun.bch.umontreal.ca/RNAweasel/). Reports GFF3 format. If -more than one instance of a gene annotation (e.g. more than one ORF annotated -as "nad10"), then you will have to manually verify the MFannot file and give -them distinguishing names before running this script again. - -=head1 COPYRIGHT AND LICENSE - -Copyright (C) 2015, Brandon Seah (kbseah@mpi-bremen.de) -... GPL-3 ... -modified by jacques dainat 2017-11 - -=head1 OPTIONS - -=over 8 - -=item B<-m> or B<-i> or B<--mfannot> - -The mfannot input file - -=item B<-g> or B<-o> or B<--gff> - -the gff output file - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Deprecated/gff3_sq_augustusTest_to_properTrack.pl b/annotation/Tools/Deprecated/gff3_sq_augustusTest_to_properTrack.pl deleted file mode 100755 index 1d7d0176f..000000000 --- a/annotation/Tools/Deprecated/gff3_sq_augustusTest_to_properTrack.pl +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Pod::Usage; -use IO::File; -use List::MoreUtils qw(uniq); -use File::Basename; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2014 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $opt_output = undef; - -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - 'o|output=s' => \$opt_output, - "gff|f=s" => \$gff)) - -{ - pod2usage( { -message => "Failed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -#### OUT -my $gffout; -if ($opt_output) { - ##### Stream out - open($gffout, '>', $opt_output) or die "Could not open file '$opt_output' $!"; - } -else{ - open($gffout, '>&', STDOUT,) or die "Could not open file '$opt_output' $!"; -} - - ##################### - # MAIN # - ##################### - - - -###################### -### Parse GFF input # -print "Reading file $gff\n"; -my $fh1; -if ($gff) { - open($fh1, '<', $gff) or die "Could not open file '$gff' $!"; -} -### END Parse GFF input # -######################### - -while( my $line = <$fh1>) { - my @list = split(/\s/,$line); - my $header = $list[0]; - my @header_parts = split(/_/,$header); - my @positions = split(/-/,$header_parts[1]); - my $start = $positions[0]; - if($list[1] ne "database"){ - print $gffout $header_parts[0]."\t".$list[1]."\t".$list[2]."\t".($start+$list[3])."\t".($start+$list[4])."\t".$list[5]."\t".$list[6]."\t".$list[7]."\t".$list[8]."\n"; - } -} - - - -print "Done\n"; - - - - -__END__ - -=head1 NAME - -gff3_sq_augustusTest_to_properTrack.pl - -The script take a gff3 file from Augustus as input. It has to have been preproceed by gxf_to_gff.pl first. -It will recreate proper coordinate to visualise the gff file into a browser. - -=head1 SYNOPSIS - - ./gff3_sq_augustusTest_to_properTrack.pl -gff file.gff [ -o outfile ] - ./gff3_sq_augustusTest_to_properTrack.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<--output> or B<-o> - -File where will be written the result. If no output file is specified, the output will be written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Deprecated/gff_filter_by_mrnaBlastValue.pl b/annotation/Tools/Deprecated/gff_filter_by_mrnaBlastValue.pl deleted file mode 100755 index 2e57fdcb0..000000000 --- a/annotation/Tools/Deprecated/gff_filter_by_mrnaBlastValue.pl +++ /dev/null @@ -1,235 +0,0 @@ -#!/usr/bin/perl - -### -# Original develloped by Marc Hoeppner -# Modified by Jacques Dainat -# 2015/03 -### - -use Carp; -use strict; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Bio::Tools::GFF; -use Time::Piece; -use Time::Seconds; -use Data::Dumper; -use lib $ENV{ANDREASCODE}; -use Private::Bio::IO::GFF; -use URI::Escape; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--gff filename] - The name of the file to read. - - [--blast filename] - The list of the all-vs-all blast file (outfmt 6, blastp) - - Ouput: - [--outfile filename] - The name of the output file. By default the output is the - standard output -}; - -my $outfile = undef; -my $gff = undef; -my $blast = undef; -my $help; - -GetOptions( "help" => \$help, - "gff=s" => \$gff, - "blast=s" => \$blast, - "outfile=s" => \$outfile ); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -# Open Input gff3 file # -my $ref_istream = IO::File->new(); -$ref_istream->open( $gff, 'r' ) or - croak( sprintf( "Can not open '%s' for reading: %s", $gff, $! ) ); -my $ref_in = Private::Bio::IO::GFF->new( istream => $ref_istream ); - -# Open Output files # -my $ostream = IO::File->new(); -if ($outfile) { - $ostream->open( $outfile, 'w' ) or - croak( sprintf( "Can not open '%s' for writing %s", $outfile, $! ) ); -} -else { - $ostream->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} -my $out = Private::Bio::IO::GFF->new( ostream => $ostream ); - -#### MAIN #### - -# Read killlist # -my $killlist = parse_blast($blast); - -# Read mRNA feature from the GFF file. -my @mRNA; -while ( my $feature = $ref_in->read_feature() ) { - if ( lc( $feature->feature_type() ) eq 'mrna' ) { - push( @mRNA, $feature ); - } -} - -# Unlink all mRNA specified by the kill-list from their (gene-) parents. -foreach my $mRNA_feature (@mRNA) { - my $id = $mRNA_feature->get_attribute('ID'); - if ( exists( $killlist->{$id} ) ) { - $mRNA_feature->remove_parent(); - } -} - -# Write the remaining things to output -my %genes_written; -foreach my $mRNA_feature (@mRNA) { - my $id = $mRNA_feature->get_attribute('ID'); - if ( defined( $mRNA_feature->parent() ) ) { - my $gene_id = $mRNA_feature->parent()->get_attribute('ID'); - if ( !exists( $killlist->{$id} ) && - !exists( $genes_written{$gene_id} ) ) - { - $out->write_feature_group( $mRNA_feature->parent() ); - $genes_written{$gene_id} = 1; - } - } -} - -# -------------- - -sub parse_blast -{ - my %answer; - my %moreThanOneTest; - my %duo_answer; - my %hashAns; - my $infile = shift; - my $cpt2 = 0; - # This is one way to open a file... - open( my $IN, '<', $infile ) or - die "FATAL: Can't open BLAST file: $infile for reading.\n$!\n"; - - # Streaming the file, line by line - while (<$IN>) { - chomp; - my $line = $_; - - my @elements = split( "\t", $line ); - - my ( $query, $target, $score ) = @elements[ 0 .. 2 ]; - - # Matches that we need to remove - if ( $query ne $target and $score > 80.0 ) { - ####### <<<<<<<<<<<<<<<<<<<< HERE THE BLAST VALUE CONSIDERED - my $id = "$query$target"; - my $idInver = "$target$query"; - - if ( ( !exists( $hashAns{$id} ) ) && ( !exists( $hashAns{$idInver} ) ) ) - { # avoid redundance info - $hashAns{$id}++; - $cpt2++; - - # keep the 2 ids We will then remove one randomly - $duo_answer{$target} = [ $target, $query ]; - - $moreThanOneTest{$target}++; - $moreThanOneTest{$query}++ - ; # Allows to detect mRNA present more than 1 times - # (In this case they will be selected in priority - # during step 3) - } - - } - } ## end while (<$IN>) - - # We should close the file to make sure that the transaction - # finishes cleanly. - close($IN); - - #print "$cpt2\n"; - - # Detect case to remove absolutely to select in a tuple this one if - # the other we can keep it - my %caseToAvoid; - my $cpt = 0; - foreach my $key ( keys %moreThanOneTest ) { - - if ( $moreThanOneTest{$key} > 1 ) { - $caseToAvoid{$key}++; - my $valueUnEsc = uri_unescape($key); - $answer{$valueUnEsc}++; # name from blast must be unescape - $cpt++; - } - } - #print "We will remove $cpt\n"; - - ## Step3 - my $cptCount = 0; - my $removed = 0; - - # We will keep one of the tuple - foreach my $key ( keys %duo_answer ) { - my ( $val1, $val2 ) = @{ $duo_answer{$key} }; - if ( ( !exists( $caseToAvoid{$val1} ) ) and - ( !exists( $caseToAvoid{$val2} ) ) ) - { # case remove one randomly - my $valueUnEsc = uri_unescape($val1); - $answer{$valueUnEsc}++; # name from blast must be unescape - $cptCount++; - } - } - - #print "We will removed $cptCount more.\n"; - my $nbremove = keys %answer; - print "$nbremove gene will be removed !\n"; - - return \%answer; -} ## end sub parse_blast - - -=head1 NAME - -gff_filter_by_mrnaBlastValue.pl -ancient name gff_filter_by_mrna_id.pl -The script aims to remove from a gff file all the mRNA that have a similarity over THRESHOLD with another mRNA. This is typically useful when creating a list of MRNA to use to train abinitio gene finder. -A reciprocal blast of the sequences need to have been performed prior to the use of this script in order to get the blastp input file. - -=head1 SYNOPSIS - - ./gff_filter_by_mrnaBlastValue.pl --gff=infile.gff3 --blast blastfile --outfile outFile - ./gff_filter_by_mrnaBlastValue.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> - -Input GFF3 file correponding to gene build. - -=item B<--blast> - -The list of the all-vs-all blast file (outfmt 6, blastp) - -=item B<--outfile> - -The name of the output file. By default the output is the standard output. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut \ No newline at end of file diff --git a/annotation/Tools/Deprecated/gff_sq_get_stats.pl b/annotation/Tools/Deprecated/gff_sq_get_stats.pl deleted file mode 100755 index e5302a91d..000000000 --- a/annotation/Tools/Deprecated/gff_sq_get_stats.pl +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; -use List::MoreUtils 'any'; - -use Bio::Tools::GFF; - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--infile filename] - The name of the GFF/GTF file to read. - -}; - -my $outfile = undef; -my $infile = undef; -my $help; - -GetOptions( - "help" => \$help, - "infile=s" => \$infile, - "outfile=s" => \$outfile); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ($outfile) { - open(STDOUT, ">$outfile") or die("Cannot open $outfile"); -} - -# Here we store all the numbers -my %data_store = ( 'exons' => 0 , 'mRNAs' => 0 , 'genes' => 0 , 'coding_nt' => 0 ); - -# We want to find out what sort of file we are presented with: -my $gff_file; -my $is_gtf = 0; - -if ($infile =~ /\.gff/) { - $gff_file = Bio::Tools::GFF->new(-file =>$infile , -gff_version => 3); -} elsif ($infile =~ /\.gtf/) { - $is_gtf = 1; - $gff_file = Bio::Tools::GFF->new(-file => $infile , -gff_version => 2.5 ); -} else { - die "Sorry, no clue what file format this is (need extension .gtf or .gff)\n"; -} - -my @transcript_ids = () ; -my @gene_ids = (); - -while(my $feature = $gff_file->next_feature()) { - - if ($feature->primary_tag eq "exon" ) { - $data_store{'exons'} += 1 ; - } elsif ($feature->primary_tag =~ /[mstsno]RNA/ or $feature->primary_tag eq "transcript") { - $data_store{'mRNAs'} += 1 ; - } elsif($feature->primary_tag eq "gene") { - $data_store{'genes'} += 1; - } elsif ($is_gtf == 1) { - - my @tvalues = $feature->get_tag_values('transcript_id'); - my $transcript_id = shift @tvalues; - - unless (my ($matched) = grep $_ eq $transcript_id, @transcript_ids) { - push(@transcript_ids,$transcript_id); - $data_store{'mRNAs'} += 1; - } - - my @gvalues = $feature->get_tag_values('gene_id'); - my $gene_id = shift @gvalues ; - - unless (my ($matched) = grep $_ eq $gene_id, @gene_ids) { - push(@gene_ids,$gene_id); - $data_store{'genes'} += 1; - } - - } -} - -$gff_file->close(); - -print "#genes: $data_store{'genes'}\n"; -print "#transcripts: $data_store{'mRNAs'}\n"; -print "#exons: $data_store{'exons'}\n" ; - -# -------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); -} - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -sub err { - msg(@_); - exit(2); -} - - diff --git a/annotation/Tools/Deprecated/gff_sq_loadTranscriptGOtoGeneLevel.pl b/annotation/Tools/Deprecated/gff_sq_loadTranscriptGOtoGeneLevel.pl deleted file mode 100755 index 3bdddd001..000000000 --- a/annotation/Tools/Deprecated/gff_sq_loadTranscriptGOtoGeneLevel.pl +++ /dev/null @@ -1,547 +0,0 @@ -#!/usr/bin/env perl - -## TO DO: -## Need to build UTR features from the difference -## between CDS and exon features. -use Carp; -use strict; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use lib $ENV{ANDREASCODE}; -use Private::Bio::IO::GFF; - -my $usage = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## - -That script lift-over the transcript GO term to the gene level. (duplicates are removed) - -Usage: perl my_script.pl --gff Infile [--out outfile] - Getting help: - [--help] - - Input: - [--gff filename] - The name of the gff3 file to work with. - - Ouput: - [--out filename] - The name of the output file (A GFF file). - -}; - -my $outfile = undef; -my $gff = undef; -my $valueK = undef; -my $attributes = undef ; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "value|v=i" => \$valueK, - "outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 0, - -message => "$usage\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "\nAt least 1 parameter is mandatory:\nInput reference gtf file (--f)\n\n". - "$usage\n", - -verbose => 0, - -exitval => 2 } ); -} - -if ( defined($valueK) ){ - print "You choose to keep in output only genes mapped over $valueK percent.\n" -} - -## Manage output file -my $ostreamPlotFile = IO::File->new(); - -my $pathPlotFile="geneMapped.txt"; -my $pathOutPlot="geneMapped_plot.pdf"; -$ostreamPlotFile->open($pathPlotFile, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $pathPlotFile, $! ) - ); - -my $ostream = IO::File->new(); -if ($outfile) { - $ostream->open( $outfile, 'w' ) or - croak( sprintf( "Can not open '%s' for writing %s", $outfile, $! ) ); -} -else{ - $ostream->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - -my $output = Private::Bio::IO::GFF->new( ostream => $ostream ); - -### Parse GFF input file and add annotations -# Manage input gff3 file -my $ref_istream = IO::File->new(); -$ref_istream->open( $gff, 'r' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $gff, $! ) ); -my $ref_in = Private::Bio::IO::GFF->new(istream => $ref_istream); -# declaration hashes reference -my $ref_genes; my $refmRNA;my $refexon; my $refcds; my $refUTR5; my $refUTR3; my $refUTR; my $reftRNA; my $refRepeat; my $refRepeatMatch_part; my $refpieceStudied; - - -#Parse GFF -($ref_genes,$refmRNA,$refexon, $refcds, $refUTR5, $refUTR3, $refUTR, $reftRNA, $refRepeat, $refRepeatMatch_part, $refpieceStudied) = parseGFF ($ref_in, $gff); -print "Parsing FINISH\n"; - -foreach my $geneName (keys %$ref_genes){ # For each gene - printGene($geneName); -} - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - -# method dedicated to the Exon,UTR,CDS printing -sub brickToprintHash{ - my ($featureList)=@_; - foreach my $feature (@$featureList){ - #print feature - $output->write_feature($feature); - } -} - -# method allowing to print all gene information (mRNAs,CDSs,Exons,UTRs...) -sub printGene { - my ($geneName) = @_; - - # print gene - my $geneFeature=$ref_genes->{$geneName}; - #get GO term - my @listGO; - my @transcriptList=@{$refmRNA->{$geneName}}; - foreach my $transcriptFeature (@transcriptList){ # For each transcript - $geneFeature->add_attribute('Ontology_term',$transcriptFeature->get_attribute('Ontology_term')); - } - $output->write_feature($geneFeature); - - # print mRNA - foreach my $transcriptFeature (@transcriptList){ # For each transcript - $output->write_feature($transcriptFeature); - - #print other feature (CDS,UTR,EXON) - my $transcriptName=$transcriptFeature->get_attribute('ID'); - #Get exonhash of the current mRNA studied - if(exists $refexon->{$transcriptName}){ # If exon are compacted some mRNA could not have exons ... - my @exonList=@{$refexon->{$transcriptName}}; - brickToprintHash(\@exonList); - } - #Get cdshash of the current mRNA studied - if (exists $refcds->{$transcriptName}){ - my @cdsList=@{$refcds->{$transcriptName}}; - brickToprintHash(\@cdsList); - } - #Get UTR5hash of the current mRNA studied if exist - if (exists $refUTR5->{$transcriptName}){ - my @UTR5List=@{$refUTR5->{$transcriptName}}; - brickToprintHash(\@UTR5List); - } - #Get UTR3hash of the current mRNA studied if exist - if (exists $refUTR3->{$transcriptName}){ - my @UTR3List=@{$refUTR3->{$transcriptName}}; - brickToprintHash(\@UTR3List); - } - #Get UTR3hash of the current mRNA studied if exist - if (exists $refUTR->{$transcriptName}){ - my @UTRHash=@{$refUTR->{$transcriptName}}; - brickToprintHash(\@UTRHash); - } - } -} - -sub addDataToHashOfHash { - my ($hashOfHash,$key,$data) = @_; - - #Test if exon hash already exists - if (exists $hashOfHash->{$key}){ - # put hash in hash - push (@{$hashOfHash->{$key}} , $data); - } - else{ - $hashOfHash->{$key} = [$data]; - } -} - -# method to parse GFF3 files -# take in account features gens,mRNA,tRNA,exon,CDS,three_prime_UTR and five_prime_UTR -sub parseGFF { - my($file_in,$fileName) = @_; - print( "Reading features from $fileName...\n"); - # counter statement - my $countFeatures = 0; my $mRNAcount=0; my $tRNAcount=0; my $exonCount=0; my %CDScount; my %UTRcount; my $nbExonExpanded=0; - my %UTR3count; my $sizeUTR3=0; my %UTR5count; my $sizeUTR5=0; my %UTRbothSideCount; my %UTR3SideCount; my %UTR5SideCount; my %UTRanyideCount; - # hash for duplication check statement - my %geneDupli; my %mRNAdupli; my %tRNAdupli; my %CDSdupli; my %exonDupli; my %UTRdupli; my %UTR3dupli; my %UTR5dupli; - # repeat variables statement - my $sizeRepeatMasker=0; my %RepeatMaskerDupli; my $sizeRepeatRunner=0; my %RepeatRunnerDupli; my %RepeatMatchPartDupli; - # hash of data (from feature) statement - my %genes; my %mRNAHash; my %exonHash; my %cdsHash; my %UTR5Hash; my %UTR3Hash; my %UTRHash; my %tRNAHash; my %repeatHash; my %repeatMatch_part; my %pieceStudied; - # various variable statement - my $duplicate="no"; my $exonExpandable=0; my $exonBelongsToMultipleParent=0; - - # read file and decompose it - while (my $feature = $file_in->read_feature() ) { - $countFeatures++; - my $type = $feature->feature_type(); - my $ID = $feature->get_attribute('ID'); - my $seqname=$feature->seqname(); - my $start=$feature->start(); - my $end=$feature->end(); - my $strand=$feature->strand(); - - #################################################### - ########## Manage feature WITHOUT parent ########### - #################################################### - if(! ($feature->has_parent())){ - print "$type $feature\n"; - if ( lc($type) eq 'gene' ) { - $geneDupli{$ID}++; - if($geneDupli{$ID} == 1){ - $genes{$ID} = $feature; - my $pieceStudiedName="$seqname.$strand"; - push( @{ $pieceStudied{$pieceStudiedName}}, $ID ); - } - next(); - } - - if ( (lc ($feature->source()) =~ m/repeat/) and (lc ($type) eq "match")){ - $RepeatMaskerDupli{$ID}++; - if ( $RepeatMaskerDupli{$ID} == 1) { - $repeatHash{$ID} = $feature; - $sizeRepeatMasker=$sizeRepeatMasker+($feature->end() - $feature->start()); - } - next(); - } - if ( (lc ($feature->source()) =~ m/repeatrunner/) and (lc ($type) eq "protein_match")){ - $RepeatRunnerDupli{$ID}++; - if($RepeatRunnerDupli{$ID} == 1){ - $repeatHash{$ID} = $feature; - $sizeRepeatRunner=$sizeRepeatRunner+($feature->end() - $feature->start()); - } - next(); - } - printf( STDERR "Feature $type not yet taken in account...\n"); - } - - - ################################################ - ########## Manage feature WITH parent ########## - ################################################ - - else { # IF NOT a GENE FEATURE - my $parentID; my @Parent; - # Manage Attributes - my $uniqParent="yes"; - my $attributes=$feature->attributes(); - if (ref($feature->get_attribute('Parent')) eq 'ARRAY'){ - @Parent = @{$feature->get_attribute('Parent')}; - $parentID=$Parent[0]; #If several parent ID We take only the first one ! - $uniqParent="no"; - if(lc($type) ne "exon") { # check if multiple feature other than exon. It is not till implemented - print "STOP - Your file contains feature $type with multiple parents. Sorry but currently the script manage multiple parent only for exon !\n"; - print "This Warning means you cannot use the \"expand\" option. Consequently you cannot use the \"id\" option that use also the expand option.\n"; exit; } - } - else {$parentID= $feature->get_attribute('Parent');} - - # all the following attributes must have parentID - if ($parentID eq ""){ - print "No Parent attributes found for $ID ! It is mandatory...";exit; - } - - if ((lc($type) eq 'mrna') || (lc($type) eq 'transcript')){ - if ( !defined( $genes{$parentID}) ) { - printf( STDERR "ID ".$parentID." dont exists. Gene should be read before ...\n"); exit(); - } - $mRNAcount++;$mRNAdupli{$ID}++; - if($mRNAdupli{$ID} == 1){ - addDataToHashOfHash(\%mRNAHash,$parentID,$feature); - } - next(); - } - elsif(lc ($type) eq "trna"){ - $tRNAcount++;$tRNAdupli{$ID}++; - if($tRNAdupli{$ID} == 1){ - addDataToHashOfHash(\%tRNAHash,$parentID,$feature); - } - next(); - } - elsif (lc($type) eq "exon"){ - if ($uniqParent eq "no"){ #<<<<<<<<<<<<<<<< Expand exon >>>>>>>>>>>>>>> - $exonCount++;$exonDupli{$ID}++; - if($exonDupli{$ID} == 1){ - # for each parent of the exon, create an exon (even if only one...) - foreach my $mRNA_ID (@Parent){ - $nbExonExpanded++; - my $featureSaved=$feature->copy(); - - # Remove all parents with other IDs than $mRNA_ID from $feature. - foreach my $parent_to_remove ( @{ $featureSaved->parents() } ) { - if ( $parent_to_remove->get_attribute('ID') ne $mRNA_ID ) { - $featureSaved->remove_parent($parent_to_remove); - } - } - - #change ID to be Uniq - $featureSaved->set_attribute('ID',"$ID-$mRNA_ID"); - addDataToHashOfHash(\%exonHash,$mRNA_ID,$featureSaved); - } - $nbExonExpanded--; - } - } # end option expand - else{ # case where no expand option - if($uniqParent eq "no"){ $exonExpandable=$exonExpandable+$#Parent; $exonBelongsToMultipleParent++; } - $exonCount++;$exonDupli{$ID}++; - if($exonDupli{$ID} == 1){ - addDataToHashOfHash(\%exonHash,$parentID,$feature); - } - } - next(); - } - elsif (lc($type) eq "cds"){ # /!\ CDS feature is described by several features that have the same ID - my $createdID="$seqname.$start.$end.$ID"; - $CDScount{$ID}++;$CDSdupli{$createdID}++; - if($CDSdupli{$createdID} == 1){ - addDataToHashOfHash(\%cdsHash,$parentID,$feature); - } - next(); - } - elsif (lc($type) eq "five_prime_utr"){ # /!\ CDS feature is described by several features that have the same ID - my $createdID="$seqname.$start.$end.$ID"; - $UTR5count{$parentID}++;$UTR5dupli{$createdID}++; - if($UTR5dupli{$createdID} == 1){ - addDataToHashOfHash(\%UTR5Hash,$parentID,$feature); - $sizeUTR5=$sizeUTR5+($feature->end() - $feature->start()); - my $mRNAfeature=$feature->parent(); - my $geneID=$mRNAfeature->get_attribute('Parent'); - $UTR5SideCount{$geneID}++;$UTRanyideCount{$geneID}++; - if (exists ($UTR3count{$parentID})){ - $UTRbothSideCount{$geneID}++; - } - } - next(); - } - elsif (lc($type) eq "three_prime_utr"){ # /!\ CDS feature is described by several features that have the same ID - my $createdID="$seqname.$start.$end.$ID"; - $UTR3count{$parentID}++;$UTR3dupli{$createdID}++; - if($UTR3dupli{$createdID} == 1){ - addDataToHashOfHash(\%UTR3Hash,$parentID,$feature); - $sizeUTR3=$sizeUTR3+($feature->end() - $feature->start()); - my $mRNAfeature=$feature->parent(); - my $geneID=$mRNAfeature->get_attribute('Parent'); - $UTR3SideCount{$geneID}++;$UTRanyideCount{$geneID}++; - if (exists ($UTR5count{$parentID})){ - $UTRbothSideCount{$geneID}++; - } - } - next(); - } - elsif (lc($type) eq "utr"){ # /!\ CDS feature is described by several features that have the same ID - my $createdID="$seqname.$start.$end.$ID"; - $UTRcount{$parentID}++;$UTRdupli{$createdID}++; - if($UTRdupli{$createdID} == 1){ - addDataToHashOfHash(\%UTRHash,$parentID,$feature); - } - next(); - } - elsif( (lc ($feature->source()) =~ m/repeat/) and (lc ($type) eq "match_part")){ - $RepeatMatchPartDupli{$ID}++; - if($RepeatMatchPartDupli{$ID} == 1){ - addDataToHashOfHash(\%repeatMatch_part,$parentID,$feature); - } - next(); - } - else{ - printf( STDERR "Feature $type not yet taken in account...\n"); -# exit(); - } - } - } - - $ref_istream->close(); - - # Display information for the user: - my $stringPrint; - $stringPrint = "Read $countFeatures features.\n\n" ; - $stringPrint .= "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; - $stringPrint .= "vvvvvvvv Checking of duplicated features vvvvvvvv\n\n"; -# About duplicate # - my $nbDupli=0; - foreach my $id ( keys %geneDupli ){ - if ( $geneDupli{$id} != "1"){ - $nbDupli=$nbDupli+($geneDupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "Gene => $nbDupli duplicated (ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %mRNAdupli ){ - if ( $mRNAdupli{$id} != "1"){ - $nbDupli=$nbDupli+($mRNAdupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "mRNA => $nbDupli duplicated (ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %tRNAdupli ){ - if ( $tRNAdupli{$id} != "1"){ - $nbDupli=$nbDupli+($tRNAdupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "tRNA => $nbDupli duplicated (ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %exonDupli ){ - if ( $exonDupli{$id} != "1"){ - $nbDupli=$nbDupli+($exonDupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "Exon => $nbDupli duplicated (ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %CDSdupli ){ - if ( $CDSdupli{$id} != "1"){ - $nbDupli=$nbDupli+($CDSdupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "CDS => $nbDupli duplicated (seqname().start().end().ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %UTRdupli ){ - if ( $UTRdupli{$id} != "1"){ - $nbDupli=$nbDupli+($UTRdupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "UTR => $nbDupli duplicated (seqname().start().end().ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %UTR3dupli ){ - if ( $UTR3dupli{$id} != "1"){ - $nbDupli=$nbDupli+($UTR3dupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "UTR3 => $nbDupli duplicated (seqname().start().end().ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %UTR5dupli ){ - if ( $UTR5dupli{$id} != "1"){ - $nbDupli=$nbDupli+($UTR5dupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "UTR5 => $nbDupli duplicated (seqname().start().end().ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %RepeatMaskerDupli ){ - if ( $RepeatMaskerDupli{$id} != "1"){ - $nbDupli=$nbDupli+($RepeatMaskerDupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "RepeatMasker => $nbDupli duplicated (ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %RepeatRunnerDupli ){ - if ( $RepeatRunnerDupli{$id} != "1"){ - $nbDupli=$nbDupli+($RepeatRunnerDupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "RepeatRunner => $nbDupli duplicated (ID analyzed)\n"; - $nbDupli=0; - foreach my $id ( keys %RepeatMatchPartDupli ){ - if ( $RepeatMatchPartDupli{$id} != "1"){ - $nbDupli=$nbDupli+($RepeatMatchPartDupli{$id}-1);$duplicate="yes"; - } - } - $stringPrint .= "RepeatMatchpPart => $nbDupli duplicated (ID analyzed)\n"; - if ($duplicate eq "yes"){ - $stringPrint .= "##################################\n# Achthung /\\ Attention /\\ Be carefull => ID duplicate found ! #\n". - "# Duplicated features have been removed (Keep only one per ID)\n##################################\n\n"; - } - else {$stringPrint .= "##################################\n# Congratulation no duplicated ID #\n##################################\n";} - $stringPrint .= "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"; - -# STATISTICS -$stringPrint .= "vvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; -$stringPrint .= "vvvvvvvv STATISTICS vvvvvvvv\n\n"; -# GENE # - $stringPrint .= "Gene information:\n"; - my $countGeneUniq = keys (%geneDupli); - $stringPrint .= "Read $countGeneUniq total uniq gene\n" ; - my $nbrmRNAkey = keys (%mRNAHash); - $stringPrint .= "read gene from mRNA: $nbrmRNAkey\n" ; - my $nbtRNAkey = keys (%tRNAHash); - $stringPrint .= "read gene from tRNA: $nbtRNAkey\n\n" ; -# mRNA # - $stringPrint .= "mRNA information:\n"; - my $nbtotalRNA= ($tRNAcount+$mRNAcount); - my $nbmRNAisoform=($mRNAcount+$tRNAcount)-$countGeneUniq; - $stringPrint .= "Read $mRNAcount mRNA. \n"; - $stringPrint .= "Read $tRNAcount tRNA.\n"; - $stringPrint .= "Total RNA= $nbtotalRNA rna\n$nbmRNAisoform mRNA isoforms.\n"; - my $nbrExonKey = keys (%exonHash); - my $nbRNAwithoutExonDueToMutipleParentalExon=$nbtotalRNA-$nbrExonKey; - my $verifSize=$nbRNAwithoutExonDueToMutipleParentalExon+$nbrExonKey; - $stringPrint .= "Nb mRNA key from exons: $nbrExonKey. $nbRNAwithoutExonDueToMutipleParentalExon mRNA without exon du to multiple parents. Total is $verifSize. (Must be the same value as Total RNA)\n\n"; -# EXON # - $stringPrint .= "exon information:\nRead $exonCount exon \n"; - $stringPrint .= "\n"; -# CDS # - my $nbCDS = keys (%CDScount); - $stringPrint .= "CDS information:\nRead $nbCDS CDS\n" ; - my $nbCDSkey = keys (%CDSdupli); - $stringPrint .= "Read $nbCDSkey CDS exon\n\n" ; -# UTR # - my $nbUTR5exon = keys (%UTR5dupli); my $nbUTR5 = keys (%UTR5count); - my $nbUTR3exon = keys (%UTR3dupli); my $nbUTR3 = keys (%UTR3count); - my $nbUTRexon = keys (%UTRdupli); my $nbUTR = keys (%UTRcount); - my $totalUTR=$nbUTR5+$nbUTR3+$nbUTR; - my $UTRbothSideNb= keys (%UTRbothSideCount); - my $UTR3SideNB= keys %UTR3SideCount; - my $UTR5SideNB= keys %UTR5SideCount; - my $UTRanySideNB= keys %UTRanyideCount; - $stringPrint .= "UTR information:\nRead $totalUTR UTR exon\n...Read $nbUTR3 UTR3 <=> $nbUTR3exon exon <=> $sizeUTR3 bp length\n...Read $nbUTR5 UTR5 <=> $nbUTR5exon exon <=> $sizeUTR5 bp length\n". - "...Read $nbUTR UTR <=> $nbUTRexon exon (whithout more details if come from 3 or 5 prime)\n". - "...Nb gene that have both UTR: $UTRbothSideNb\n...Nb gene that have 3' UTR: $UTR3SideNB\n...Nb gene that have 5' UTR: $UTR5SideNB\n...Nb gene that have at least one UTR: $UTRanySideNB\n\n" ; -# Repeat # - $stringPrint .= "Repeat information:\n"; - my $nbRepeatMaskerFeatureUniq = keys (%RepeatMaskerDupli); - my $nbRepeatRunnerFeatureUniq = keys (%RepeatRunnerDupli); - my $nbRepeatFeat = $nbRepeatRunnerFeatureUniq+$nbRepeatMaskerFeatureUniq; - my $nbRepeatMatch = keys (%RepeatMatchPartDupli); - $stringPrint .= "Read $nbRepeatFeat repeat and $nbRepeatMatch match_part\nWe have $nbRepeatMaskerFeatureUniq repeat features from repeatmasker corresponding to $sizeRepeatMasker bases\n"; - $stringPrint .= "We have $nbRepeatRunnerFeatureUniq repeat features from repeatrunner corresponding to $sizeRepeatRunner bases\n\n"; - $stringPrint .= "Exons expansion added $nbExonExpanded new exons\n\n"; - - # display - print "$stringPrint"; - - return \%genes,\%mRNAHash,\%exonHash,\%cdsHash,\%UTR5Hash,\%UTR3Hash, \%UTRHash, \%tRNAHash, \%repeatHash, \%repeatMatch_part, \%pieceStudied; -} - - - -__END__ - diff --git a/annotation/Tools/Deprecated/maker_checkFusionSplitBetweenTwoBuilds.pl b/annotation/Tools/Deprecated/maker_checkFusionSplitBetweenTwoBuilds.pl deleted file mode 100755 index 1b2ca0762..000000000 --- a/annotation/Tools/Deprecated/maker_checkFusionSplitBetweenTwoBuilds.pl +++ /dev/null @@ -1,1261 +0,0 @@ -#!/usr/bin/env perl - -################################################ -# maker_checkFusionSplitBetweenTwoBuilds.pl v1 # -# Jacques Dainat 10/2014 # -# Jacques.dainat@nbis.se # -################################################ - -use strict; -use warnings; -use Data::Dumper; -use Carp; -use Getopt::Long; -use IO::File; -use Pod::Usage; - -use FindBin qw( $Bin ); -use lib "$Bin/../../lib/perl"; - -use Bio::Tools::GFF; - -my $opt_expand="no";my $nbexpand; -my $opt_reffile; -my $opt_tarfile; -my $opt_dirRes; -my $opt_help = 0; - -if ( !GetOptions( 'f|ref|reffile=s' => \$opt_reffile, - 't|tar|tarfile=s' => \$opt_tarfile, - 'o|out|output=s' => \$opt_dirRes, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); -} - -if ( !( defined($opt_reffile) && defined($opt_tarfile) && defined($opt_dirRes) ) ) { - pod2usage( { - -message => "Must specify 3 parameters:\nReference data gff3 file (--ref) " . - "\nTargeted gff3 file (--tar)\nOuput directory (--out)", - -verbose => 0, - -exitval => 2 } ); -} - -##################### -# Manage Input File # -##################### -my $ref_istream = IO::File->new(); -my $add_istream = IO::File->new(); - -$ref_istream->open( $opt_reffile, 'r' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $opt_reffile, $! ) ); -$add_istream->open( $opt_tarfile, 'r' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $opt_tarfile, $! ) ); - -my $ref_in = Bio::Tools::GFF->new( -fh => $ref_istream , -gff_version => 3); -my $add_in = Bio::Tools::GFF->new( -fh => $add_istream , -gff_version => 3); -######################### -# END Manage Input File # -######################### -################################# -# Manage Ouput Directory / File # -################################# -if (-d $opt_dirRes){ - print "The output directory choosen already exists. Please give me another Name.\n";exit(); -} -my $outDir=""; -if ($opt_dirRes =~ /$\//){ - $outDir=$opt_dirRes; -}else{$outDir="$opt_dirRes/";} -unless(mkdir $outDir) { - die "Unable to create $outDir"; -} -my $ostreamResume = IO::File->new(); -my $ostream = IO::File->new(); -my $ostream2 = IO::File->new(); -my $ostream3 = IO::File->new(); -my $ostream4 = IO::File->new(); -my $ostream5 = IO::File->new(); -my $ostream6 = IO::File->new(); -my $ostream7 = IO::File->new(); -my $ostream8 = IO::File->new(); - -my $opt_outputCluster=$outDir."ouputClusterRef.gff"; -my $opt_outputMerge=$outDir."ouputSplitMergeRef.gff"; -my $opt_outputCluster2=$outDir."ouputClusterTar.gff"; -my $opt_outputMerge2=$outDir."ouputSplitMergeTar.gff"; -my $opt_ouputGeneTarMergedInRef=$outDir."ouputGeneTarMergedInRef.gff"; -my $opt_ouputGeneRefMergedInTar=$outDir."ouputGeneRefMergedInTar.gff"; -my $opt_ouputGeneTarSplitInRef=$outDir."ouputGeneTarSplitInRef.gff"; -my $opt_ouputGeneRefSplitInTar=$outDir."ouputGeneRefSplitInTar.gff"; -my $resumeFile=$outDir."resume.txt"; -$ostreamResume->open( $resumeFile, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $resumeFile, $! ) - ); -$ostream->open( $opt_outputCluster, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_outputCluster, $! ) - ); -$ostream2->open( $opt_outputMerge, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_outputMerge, $! ) - ); -$ostream3->open( $opt_outputCluster2, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_outputCluster2, $! ) - ); -$ostream4->open( $opt_outputMerge2, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_outputMerge2, $! ) - ); -$ostream5->open( $opt_ouputGeneTarMergedInRef, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_ouputGeneTarMergedInRef, $! ) - ); -$ostream6->open( $opt_ouputGeneRefMergedInTar, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_ouputGeneRefMergedInTar, $! ) - ); -$ostream7->open( $opt_ouputGeneTarSplitInRef, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_ouputGeneTarSplitInRef, $! ) - ); -$ostream8->open( $opt_ouputGeneRefSplitInTar, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_ouputGeneRefSplitInTar, $! ) - ); - -my $outputCluster = $ostream ; -my $outputMerge = $ostream2 ; -my $outputCluster2 = $ostream3 ; -my $outputMerge2 = $ostream4 ; -my $ouputGeneTarMergedInRef = $ostream5 ; -my $ouputGeneRefMergedInTar = $ostream6 ; -my $ouputGeneTarSplitInRef = $ostream7 ; -my $ouputGeneRefSplitInTar = $ostream8 ; - -print $outputCluster "##gff-version 3\n"; -print $outputMerge "##gff-version 3\n"; -print $outputCluster2 "##gff-version 3\n"; -print $outputMerge2 "##gff-version 3\n"; -print $ouputGeneTarMergedInRef "##gff-version 3\n"; -print $ouputGeneRefMergedInTar "##gff-version 3\n"; -print $ouputGeneTarSplitInRef "##gff-version 3\n"; -print $ouputGeneRefSplitInTar "##gff-version 3\n"; -##################################### -# END Manage Ouput Directory / File # -##################################### - -# declaration hashes reference -# data from file one -my $ref_genes; my $ref_trnagenes; my $refmRNA;my $refexon; my $refcds; my $refUTR5; my $refUTR3; my $reftRNA; my $refpieceStudied; -# data from file 2 -my $tar_genes; my $tar_trnagenes; my $tarmRNA;my $tarexon; my $tarcds; my $tarUTR5; my $tarUTR3; my $tartRNA; my $tarpieceStudied; -# data created by mixing data from file1 and file2 -my $mix_genes; my $mix_trnagenes; my $mixmRNA;my $mixexon; my $mixcds; my $mixUTR5; my $mixUTR3; my $mixtRNA; my $mixpieceStudied; - -#Parse GFF genes,\%mRNA,\%exon,\%cds,\%UTR5,\%UTR3,\%tRNA; -($ref_genes,$ref_trnagenes,$refmRNA,$refexon, $refcds, $refUTR5, $refUTR3, $reftRNA, $refpieceStudied) = parseGFF($ref_in,$opt_reffile, $opt_expand); -($tar_genes,$tar_trnagenes,$tarmRNA,$tarexon, $tarcds, $tarUTR5, $tarUTR3, $tartRNA, $tarpieceStudied) = parseGFF($add_in,$opt_tarfile, $opt_expand); -print("Parsing Finished\n"); - -sortByPos($refexon); -sortByPos($tarexon); -sortByPos($refcds); -sortByPos($tarcds); -sortByPos($refUTR5); -sortByPos($tarUTR5); -sortByPos($refUTR3); -sortByPos($tarUTR3); -print("Sort Finished\n"); - -my $countGeneA=0; -my $countGeneB=0; -my $nbGeneFromContigSpecificRef=0; -my $nbSpecificToTar=0; -my %refpieceStudiedHash=%$refpieceStudied; -my %tarpieceStudiedHash=%$tarpieceStudied; -my @listContigsBoth;my @listContigsA;my @listContigsB; - - - ############################################################## - # A) Manage scaffold (stranded) annotated only by one method # print genes and remove them from hashes - ############################################################## - -######################################################### -# A.1) Manage gene from contig annotated only in file 1 # contig direction took in account -######################################################### -my $method1=0; -foreach my $ContigName (keys %refpieceStudiedHash) { - if (! exists $tarpieceStudiedHash{$ContigName}){ - $method1++; - my $gene=$refpieceStudiedHash{$ContigName}; - foreach my $geneName (@{$gene}) { - $nbGeneFromContigSpecificRef++; - $countGeneA++; - } - my $tmpContigName = $ContigName; - $tmpContigName =~ s/[+-]//g; - push(@listContigsA, $tmpContigName); - delete $refpieceStudiedHash{$ContigName}; - } -} -my @listContigsAUniq = Array_Unique(@listContigsA); -my $nbContigAUniq=$#listContigsAUniq+1; -print("$nbContigAUniq contigs annotated only in the reference build\n"); -#^^^^^^^^^^^^^^^^^^^ END ^^^^^^^^^^^^^^^^^^^ - -######################################################### -# A.2) Manage gene from contig annotated only in file 2 # contig direction took in account -######################################################### -my $method2=0; -foreach my $ContigName (keys %tarpieceStudiedHash) { - if (! exists $refpieceStudiedHash{$ContigName}){ - $method2++; - my $gene=$tarpieceStudiedHash{$ContigName}; - foreach my $geneName (@{$gene}) { - $nbSpecificToTar++; - $countGeneB++; - } - my $tmpContigName = $ContigName; - $tmpContigName =~ s/[+-]//g; - push(@listContigsB, $tmpContigName); - delete $tarpieceStudiedHash{$ContigName}; - } -} -my @listContigsBUniq = Array_Unique(@listContigsB); -my $nbContigBUniq=$#listContigsBUniq+1; -print("$nbContigBUniq contigs annotated only in the target build\n"); -#^^^^^^^^^^^^^^^^^^^ END ^^^^^^^^^^^^^^^^^^^ - - ############################################################################ - # B) Manage scaffold (stranded) which contain annotation from both methods # - ############################################################################ -my $countSingleGeneB=0; -my $OverlapingB=0; -my $OverlapingA=0; -my $nbNoOverlapingA=0; -my $nbNoOverlapingB=0; -my %toStretch; -my %clusterCase; -my %fusionORsplit; -my $fusion; -my $split; -print "Now we are analyzing the contigs containing annotations from both builds:\n"; -foreach my $ContigName (keys %refpieceStudiedHash) { -# print("Study of ContigName $ContigName\n"); - my $tmpContigName = $ContigName; - $tmpContigName =~ s/[+-]//g; - push(@listContigsBoth, $tmpContigName); - #for each gene A to B - my @tempTabName = @{$refpieceStudiedHash{$ContigName}}; # use of temporary variable to be sure to loop over all element. - foreach my $GeneName (@tempTabName) { - $countGeneA++; - #print("\nStudy overlap of gene $GeneName\n"); - - ###### Test if gene already studied (started by another gene but due to overlap it is already studied) - my $geneAlreadyStudied="yes"; - foreach my $gene (@{$refpieceStudiedHash{$ContigName}}){ - if ($gene eq $GeneName){$geneAlreadyStudied="no";last;} - } - if ($geneAlreadyStudied eq "yes"){next;}# print "$GeneName already analyzed because overlap other chunck: NEXT \n"; - ##### End test if already studied. If not, we continue - - else - { - # Declare table which I will work with - my @ListRefOverlapAtotest;my @ListOverlapAtested;my @ListNoOverlapA; my @ListOverlapAtestnoneed; my @ListPerfectOverlapA; - my @ListRefOverlapBtotest;my @ListOverlapBtested;my @ListNoOverlapB; my @ListOverlapBtestnoneed; my @ListPerfectOverlapB; - - my @LinkTocurrentGeneFeature=$ref_genes->{$GeneName}; #### >>>>>> BASE OF THE FEATURE TESTED FOR OVERLAP !!! CURRENTLY WE CHECK THE GENE FEATURE ! - my $firstRound=0; - - push (@ListRefOverlapAtotest, @LinkTocurrentGeneFeature); - - while ($#ListRefOverlapAtotest != -1 or $#ListRefOverlapBtotest != -1){ - - $firstRound++; - if ($#ListRefOverlapAtotest != -1){ - # test every A - my ($ListRefOverlapBtotest, $ListOverlapBtestnoneed, $ListOverlapAtested, $ListNoOverlapA, $ListPerfectOverlapA) = retrieveAllOverlap( $firstRound, $refpieceStudiedHash{$ContigName}, $tarpieceStudiedHash{$ContigName}, $tar_genes, \@ListRefOverlapAtotest, \@ListOverlapAtested, \@ListNoOverlapA, \@ListPerfectOverlapA, \@ListOverlapBtestnoneed,1); - @ListRefOverlapBtotest = @$ListRefOverlapBtotest; - # print "Nb overlap B to test = $#ListRefOverlapBtotest\n"; - @ListOverlapBtestnoneed = @$ListOverlapBtestnoneed; - @ListRefOverlapAtotest = (); - # print Dumper($ListOverlapAtested); - # foreach my $i (@{$ListOverlapAtested->[0]}){print "\nPPPPP $i\n"; } - # print "\nPPPPP @{$ListOverlapAtested->[0]}\n"; - @ListOverlapAtested = @$ListOverlapAtested; - @ListPerfectOverlapA = @$ListPerfectOverlapA; - @ListNoOverlapA = @$ListNoOverlapA; - - # print "ListOverlapAtested A $#ListRefOverlapBtotest, $#ListOverlapBtestnoneed, $#ListOverlapAtested, $#ListNoOverlapA, $#ListPerfectOverlapA\n"; - next(); #stop here and avoid test B - } - # print "\nPPPPP1 $ListOverlapAtested[0]\n"; - if ($#ListRefOverlapBtotest != -1){ - # test every B - #print "List size overlap B to test = $#ListRefOverlapBtotest $firstRound \n"; - - my ($ListRefOverlapAtotest, $ListOverlapAtestnoneed, $ListOverlapBtested, $ListNoOverlapB, $ListPerfectOverlapB) = retrieveAllOverlap( $firstRound, $tarpieceStudiedHash{$ContigName}, $refpieceStudiedHash{$ContigName}, $ref_genes, \@ListRefOverlapBtotest, \@ListOverlapBtested,\@ListNoOverlapB, \@ListPerfectOverlapB, \@ListOverlapAtestnoneed,0); - @ListRefOverlapAtotest = @$ListRefOverlapAtotest; - @ListOverlapAtestnoneed = @$ListOverlapAtestnoneed; - - @ListRefOverlapBtotest = (); - @ListOverlapBtested = @$ListOverlapBtested; - @ListPerfectOverlapB = @$ListPerfectOverlapB; - @ListNoOverlapB = @$ListNoOverlapB; - # print "ListOverlapBtested B $#ListOverlapBtested\n"; - # print "\nPPPPP2 @{$ListOverlapAtested[0]}\n"; - next(); #stop here - } - } - $nbNoOverlapingA=$nbNoOverlapingA+$#ListNoOverlapA+1; - $OverlapingA=$OverlapingA+$#ListOverlapAtested+$#ListOverlapAtestnoneed+$#ListPerfectOverlapA+3; - - $nbNoOverlapingB=$nbNoOverlapingB+$#ListNoOverlapB+1; - $OverlapingB=$OverlapingB+$#ListOverlapBtested+$#ListOverlapBtestnoneed+$#ListPerfectOverlapA+3; - - - my $nbFragment = ($#ListOverlapAtested+$#ListOverlapBtested+2); - #print ("\nOVERLAP step 1 END\n"); - if ( $nbFragment < 2 ){ - ################################## - # Manage single gene from file 1 # - ################################## - #HEre can be printed => Single gene from file A and PerfectMatch from A or B depending to an iption like my $contigFusionA = "ok"; < /!\ > - #^^^^^^^^^^^^ END ^^^^^^^^^^^^ - } - elsif ( $nbFragment == 2){ #case can be stretched - #print "ListOverlapAtested @ListOverlapAtested ListOverlapBtested @ListOverlapBtested"; - push ( @{ $toStretch{$GeneName} }, [@ListOverlapAtested],[@ListOverlapBtested]); - } - elsif ($nbFragment == 3){ - #print "FUSION (ref->tar)/SPLIT(tar->ref) case\n"; - push ( @{ $fusionORsplit{$GeneName} }, [[@ListOverlapAtested],[@ListOverlapBtested]]); - # Fusion in target Build - if ($#ListOverlapAtested > $#ListOverlapBtested){ - $fusion++; - } else{$split++;} # Split in the target Build - } - else{ # Cluster case (more than 3 segments) - # print "CLUSTER CASE"; - push ( @{ $clusterCase{$GeneName} }, [[@ListOverlapAtested],[@ListOverlapBtested]]); - } - - } - } - -################################## -# Manage single gene from file 2 # -################################## -my @ListSingleB=@{$tarpieceStudiedHash{$ContigName}}; -$countSingleGeneB= $countSingleGeneB + $#ListSingleB+1; -## END ## -} - - -################# -# Display results - -my $nbrSplitMergeToManage = keys (%fusionORsplit); -my $nbrClusterToManage = keys (%clusterCase); -my $nbrStretchingCaseToManage = keys (%toStretch); - -my $totalSpecificToA=$nbGeneFromContigSpecificRef+$nbNoOverlapingA; -my $totalA=$OverlapingA+$totalSpecificToA; - -my $totalSpecificToB=$nbSpecificToTar+$nbNoOverlapingB+$countSingleGeneB; -my $totalB=$OverlapingB+$totalSpecificToB; - -my @listContigsUniq = Array_Unique(@listContigsBoth); -my $nbContigUniq=$#listContigsUniq+1; - -my $resultToPrint=""; -$resultToPrint.= "\n\n######### RESULTS #########:\n\n"; -$resultToPrint.= "File1 ($opt_reffile):\n"; -$resultToPrint.= "We studied ($nbContigUniq) contigs and $countGeneA genes\n"; -$resultToPrint.= "=> $totalSpecificToA genes are nonoverlapping (i.e unique to this gene build => $nbGeneFromContigSpecificRef are on contig(s) only annotated in this gene build and $nbNoOverlapingA are on contigs also annotated by the second gene build.)\n"; -$resultToPrint.= "=> $OverlapingA genes overlap genes from file 2\n"; -$resultToPrint.= "Total gene checked : $totalA \n\n"; -$resultToPrint.= "File2 ($opt_tarfile):\n"; -$resultToPrint.= "=> $totalSpecificToB genes are nonoverlapping (i.e unique to this gene build => $nbSpecificToTar are on contig(s) only annotated in this gene build and $countSingleGeneB are on contigs also annotated by the first gene build.)\n"; -$resultToPrint.= "=> $OverlapingB genes overlap genes from file 1\n"; -$resultToPrint.= "Total gene checked = $totalB\n\n"; -$resultToPrint.= "Results:\n"; -$resultToPrint.= "Number of SPLIT/MERGE case : $nbrSplitMergeToManage\n"; -my $nbGeneImplicatedF=$fusion*2; -my $nbGeneImplicatedS=$split*2; -$resultToPrint.= "More precisely: $fusion cases of fusion in the target build detected. (corresponding to $nbGeneImplicatedF genes from reference build implicated) \n"; -$resultToPrint.= " $split cases of split in the target build detected. (corresponding to $nbGeneImplicatedS genes from target build implicated) \n"; -$resultToPrint.= "Number of CLUSTER case : $nbrClusterToManage\n"; -$resultToPrint.= "=> Result are written in gff3 format in $outDir directory\n\n"; -print $resultToPrint; -print $ostreamResume "$resultToPrint"; - -################################ -# B.1) Manage case to stretch # -################################ - -# Nothing to do - -################################## -# B.2) Manage split/fusion case # -################################## -for my $geneKey (keys %fusionORsplit){ - my @A_geneList = @{ @{ $ { $fusionORsplit{$geneKey} } [0] } [0]}; - my @B_geneList = @{ @{ $ { $fusionORsplit{$geneKey} } [0] } [1]}; - # print all gene A - for my $geneInfo (@A_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($outputMerge, $geneName, $ref_genes, $refmRNA, $refexon, $refcds, $refUTR5, $refUTR3, $reftRNA) ; - } - # print all gene B - for my $geneInfo (@B_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($outputMerge2, $geneName, $tar_genes, $tarmRNA, $tarexon, $tarcds, $tarUTR5, $tarUTR3, $tartRNA) ; - } - #Print Independently Fusion or Split - if ($#A_geneList > $#B_geneList){ - #Print ref genes merged in target build - for my $geneInfo (@A_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($ouputGeneRefMergedInTar, $geneName, $ref_genes, $refmRNA, $refexon, $refcds, $refUTR5, $refUTR3, $reftRNA) ; - } - # print result of gene fusion in target build - for my $geneInfo (@B_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($ouputGeneTarSplitInRef, $geneName, $tar_genes, $tarmRNA, $tarexon, $tarcds, $tarUTR5, $tarUTR3, $tartRNA) ; - } - } - else{ - #Print target genes merged in ref build - for my $geneInfo (@A_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($ouputGeneTarMergedInRef, $geneName, $ref_genes, $refmRNA, $refexon, $refcds, $refUTR5, $refUTR3, $reftRNA) ; - } - # print result of gene fusion in ref build - for my $geneInfo (@B_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($ouputGeneRefSplitInTar, $geneName, $tar_genes, $tarmRNA, $tarexon, $tarcds, $tarUTR5, $tarUTR3, $tartRNA) ; - } - } -} - -############################# -# B.2) Manage Cluster case # -############################# -for my $geneKey (keys %clusterCase){ -# print "/!\\WARNING/!\\ Cluster case ! We keep the genes intact but you have to manage manualy this case:\n"; -# print "$geneKey\n"; - my @A_geneList = @{ @{ $ { $clusterCase{$geneKey} } [0] } [0]}; - my @B_geneList = @{ @{ $ { $clusterCase{$geneKey} } [0] } [1]}; - # print all gene A - for my $geneInfo (@A_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($outputCluster, $geneName, $ref_genes, $refmRNA, $refexon, $refcds, $refUTR5, $refUTR3, $reftRNA) ; - } - # print all gene B - for my $geneInfo (@B_geneList){ - my $geneName = @{$geneInfo}[0]; - printgenes($outputCluster2, $geneName, $tar_genes, $tarmRNA, $tarexon, $tarcds, $tarUTR5, $tarUTR3, $tartRNA) ; - } -} - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub Array_Unique -{ - my @List = @_; - my %FutureList; - foreach(@List) - { - $FutureList{$_} = 1; # remove doublons - } - return (keys(%FutureList)); -} - -sub retrieveAllOverlap { - my ($firstRound, $pieceStudiedHashA, $pieceStudiedHashB, $genesHashB, $ListA_Overlap_totest, $ListA_Overlap_tostretch, $ListA_NoOverlap, $ListA_PerfectOverlap, $ListOverlaptestnoneedB,$ok) = @_; - my @newListrefOverlaptotestB; -# my $sizeListrefOverlaptotestA = @$ListrefOverlaptotestA; my $sizeListOverlaptestedA = @$ListOverlaptestedA; my $sizeListNoOverlapA = @$ListNoOverlapA; my $sizeListOverlaptestnoneedB = @$ListOverlaptestnoneedB; my $sizeListPerfectOverlapA = @$ListPerfectOverlapA; -# print "firstRound $firstRound\tListrefOverlaptotestA $sizeListrefOverlaptotestA\tListOverlaptestedA $sizeListOverlaptestedA\tListNoOverlapA $sizeListNoOverlapA\tListOverlaptestnoneedB $sizeListOverlaptestnoneedB\tListPerfectOverlapA $sizeListPerfectOverlapA\n"; -# print "Begin retrieveAllOverlap\n"; - for my $linkToGene (@{$ListA_Overlap_totest}) { - #print "Link = @{$linkToGene} \n"; - my $startA=@{$linkToGene}[4]; - my $endA=@{$linkToGene}[5]; - my $wecanremove="no";my @listGneToRemove; - my $Overlaped="no"; - my $PerfectOverlap="no"; - - for my $geneOppo (@{$pieceStudiedHashB}){ # allow to work only on gene on the contig - my $startB=$genesHashB->{$geneOppo}[4]; - my $endB=$genesHashB->{$geneOppo}[5]; - - my $resuOverlap = testOverlap($startA, $endA, $startB, $endB); ## <====== Call Overlap method - if ($resuOverlap eq "perfectOverlap"){ - push (@{$ListA_PerfectOverlap}, [@{$linkToGene}] ); # keep reference - $wecanremove="yes"; - push (@listGneToRemove, $geneOppo); ################## <=============== Change to keep the possible fusion ?? - $Overlaped="yes";$PerfectOverlap="yes"; - #print "OVERLAP FOUND => perfect Overlap\n"; - last; - } - elsif ($resuOverlap eq "noNeedToverify"){ - # print "geneOppo @{$genesHash->{$geneOppo}}\n"; - push (@{$ListOverlaptestnoneedB}, [ @{$genesHashB->{$geneOppo}} ] ); # save features and remove from HashGeneral - $wecanremove="yes"; # Need to be out of the loop for remove the gene because affect the loop - push (@listGneToRemove, $geneOppo); - $Overlaped="yes"; - #print "OVERLAP FOUND => No NeedToverify\n"; - } - elsif ($resuOverlap eq "needToVerify"){ - # print "geneOppo $geneOppo\n"; - push (@newListrefOverlaptotestB, $genesHashB->{$geneOppo} ); - $Overlaped="yes"; - #print "OVERLAP FOUND => needToVerify $genesHashB->{$geneOppo}\n"; - } - } - - if($Overlaped eq "no"){ - if ($firstRound == "1"){ - #print "NO Overlap\n"; - push (@{$ListA_NoOverlap}, [@{$linkToGene}]); - } - else{ - #print "No More Overlap\n"; - push (@{$ListA_Overlap_tostretch}, [@{$linkToGene}]); - } - } - elsif ($PerfectOverlap ne "yes"){ - #print "test overlap stretched @{$linkToGene}"; - push (@{$ListA_Overlap_tostretch}, [@{$linkToGene}]); - } - - if($wecanremove eq "yes") { - for my $gene (@listGneToRemove){ - removeElementInList($pieceStudiedHashB, $gene); - } - } - # REMOVE - # Need to be deleted to not re-use it for retrieve overlap => Because we will found again the same // The removed one will be display through @ListA_Overlap_tested - removeElementInList($pieceStudiedHashA, $linkToGene->[0]); - } - return (\@newListrefOverlaptotestB, $ListOverlaptestnoneedB, $ListA_Overlap_tostretch, $ListA_NoOverlap, $ListA_PerfectOverlap); -} - -# This method allows to shift on left all number key of a hash -sub deleteFirstElementAndReorganise { - my ($refHash) = @_; - - my %tmpHash=%$refHash; - my $lastPos= keys %tmpHash; - - delete $refHash->{$lastPos}; - foreach my $key (keys %{$refHash}){ - $refHash->{$key} = $tmpHash{$key+1}; - } -} - -sub firstExonRemovedAffectedCDS{ - my ($exon,$cds)=@_; - my $exonStart=$exon->[4]; - my $exonEnd=$exon->[5]; - my $cdsStart=$cds->[4]; - my $cdsEnd=$cds->[5]; - my $affected=""; - if (($cdsStart >= $exonStart) and ($cdsEnd >= $exonEnd)) { - $affected="no"; - } - else{$affected="yes";} -return $affected; -} - -sub lastExonRemovedAffectedCDS{ - my ($exon,$cds)=@_; - my $exonStart=$exon->[4]; - my $exonEnd=$exon->[5]; - my $cdsStart=$cds->[4]; - my $cdsEnd=$cds->[5]; - my $affected=""; - if (($cdsStart <= $exonStart) and ($cdsEnd <= $exonEnd)) { - $affected="no"; - } - else{$affected="yes";} -return $affected; -} - -sub exonFinishByCDS{ - my ($exon,$cds)=@_; - my $exonEnd=$exon->[5]; - my $cdsEnd=$cds->[5]; - my $finishByCDS=""; - if ( $exonEnd == $cdsEnd ) { - $finishByCDS="yes"; - } - else{$finishByCDS="no";} -return $finishByCDS; -} - -sub exonStartByCDS{ - my ($exon,$cds, $strand)=@_; - my $exonStart; - my $cdsStart; - - if ($strand eq "+"){ - $exonStart=$exon->[4]; - $cdsStart=$cds->[4]; - } - else{ - $exonStart=$exon->[5]; - $cdsStart=$cds->[5]; - } - my $startByCDS=""; - if ( $exonStart == $cdsStart ) { - $startByCDS="yes"; - } - else{$startByCDS="no";} -return $startByCDS; -} - -sub featureOnExon{ # start or end should be common !. - my ($feature1,$feature2)=@_; - my $feature1Start=$feature1->[4]; - my $feature1End=$feature1->[5]; - my $feature2Start=$feature2->[4]; - my $feature2End=$feature2->[5]; - my $onExon=""; - if (($feature1Start == $feature2Start) or ($feature1End == $feature2End)) { - $onExon="yes"; - } - else{$onExon="no";} -return $onExon; -} - -sub chooseLongerCDSfrommRNAandPrint{ - my ($output, $A_mRNAname, $geneA, $B_mRNAname, $geneB)= @_; - my $CDSsizeA=CDSsize($A_mRNAname,"ref"); - my $CDSsizeB=CDSsize($B_mRNAname,"tar"); - if ($CDSsizeA >= $CDSsizeB){ - printgenes($output, $geneA, $ref_genes, $refmRNA, $refexon, $refcds, $refUTR5, $refUTR3, $reftRNA); #<========== # HEre can be printed <== - } - else{ - printgenes($output, $geneB, $tar_genes, $tarmRNA, $tarexon, $tarcds, $tarUTR5, $tarUTR3, $reftRNA) #<========== # HEre can be printed <== - } -} - -sub keepmRNAofTheLongestCDS{ - my ($mRNAHash,$type) = @_; - my $CDSsize=0; - my $mRNA=""; - foreach my $mRNAlistInfo (keys %{$mRNAHash} ){ # For each mRNA - #get mRNA name - my $mRNAname= $mRNAHash->{$mRNAlistInfo}[0]; - my $mRNAnameSize=CDSsize($mRNAname,$type); - - if ($mRNAnameSize > $CDSsize){ - $mRNA=$mRNAname; - } - } - return $mRNA; -} - -sub CDSsize { - my ($mRNAname,$WhichCDS) = @_; - - my $CDS="";my $CDSsize=0; - if ($WhichCDS eq "ref"){$CDS=$refcds;}else{$CDS=$tarcds;} - my %hashCDs=@{${$CDS}{$mRNAname}}; - - foreach my $key (keys %hashCDs){ - my $chunckSize=$hashCDs{$key}[5]-$hashCDs{$key}[4]; - $CDSsize=$CDSsize+$chunckSize; - } - return $CDSsize; -} - -sub removeElementInList { - my ($hash1, $element_omitted) = @_; - - @{$hash1}= grep { $_ ne $element_omitted } @{$hash1}; #remove element of the list -# print "What I delete ? $hash2->{$element_omitted}"; -# delete $hash2->{$element_omitted}; # remove tuple in hash -# print "Now I deleted verification:"; -# if (! exists $hash2->{$element_omitted}){ -# print "DELETION OK \n"; -# } -# else {print "DELETION ERROR \n";} -} - -sub testOverlap { - my ($startA, $endA, $startB, $endB) = @_; - if($startA == $startB and $endA == $endB){ #overlap perfect ---- - return "perfectOverlap"; # ---- - } - elsif($startA <= $startB and $endA >= $endB){# No need to verify -------- - return "noNeedToverify"; # -- - } - elsif (($startA >= $startB and $startA <= $endB) or ($endA >= $startB and $endA <= $endB)) { # --- --- --- - return "needToVerify"; # ------- ---- ------ - } - else{ - return "noOverlap"; - } -} - -sub printNewGene{ - my($output, $A_gene, $A_Longest_mRNA, $A_exon, $A_CDS, $A_UTR, $side)= @_; - print "\nSTART print BRICK\n"; - my $geneName = $A_gene->[0]; - my $geneID = "new_$A_gene->[0]"; - $A_Longest_mRNA->[0] =~ /.*(-mRNA-.*)/ ; - my $mRNAName = "$geneID$1"; - - brickToprintTabNewName($output, $A_gene, $geneID, $geneID); - brickToprintTabNewName($output, $A_Longest_mRNA, $mRNAName, $geneID); - brickToprintHashNewName($output, $A_exon, $mRNAName); - brickToprintHashNewName($output, $A_CDS, $mRNAName); - - if($side eq "right"){ # print UTR5 fron B and UTR3 from A - if (exists $refUTR5->{$geneName}){ #refUTR is getting as variable reaching fron everywhere - my %hashUTR5=@{$tarUTR5->{$geneName}}; - brickToprintHashNewName($output, \%hashUTR5, $mRNAName); - } - brickToprintHashNewName($output, $A_UTR, $mRNAName); - } - elsif($side eq "left"){ - brickToprintHash($output, $A_UTR); - if (exists $refUTR3->{$geneName}){ - my %hashUTR3=@{$refUTR3->{$geneName}}; - brickToprintHashNewName($output, \%hashUTR3, $mRNAName); - } - } - else{ - #UTR5 and UTR3 data are in the same hash - brickToprintHashNewName($output, $A_UTR, $mRNAName); - } - print "END print BRICK\n\n"; -} - -sub brickToprintTab{ - my ($output, $refTab)=@_; - - my $cpt=0; - foreach my $Element (@{$refTab}) { - if (!($cpt==0 or $cpt==9)){print $output "$Element\t";} - if ($cpt == 9){ - foreach my $element (@{$Element}){ - print $output "$element->[0]=$element->[1];"; - } - print $output "\n" - } - $cpt++; - } -} - -sub brickToprintTabNewName{ - my ($output, $refTab, $ID, $Parent)=@_; - - my $cpt=0; - foreach my $Element (@{$refTab}) { - if (!($cpt==0 or $cpt==9)){print $output "$Element\t";} - if ($cpt == 9){ - foreach my $element (@{$Element}){ - if($element->[0] eq "ID"){ - print $output "ID=$ID;"; - } - elsif($element->[0] eq "Parent"){ - print $output "Parent=$Parent;"; - } - elsif($element->[0] eq "Name"){ - print $output "Name=$ID;"; - } - else{ - print $output "$element->[0]=$element->[1];"; - } - } - print $output "\n" - } - $cpt++; - } -} - -sub brickToprintHashNewName{ - my ($output, $refEle, $prefix)=@_; - - my %refEle = %$refEle; - my $nbEle = keys %refEle; - for (my $cptEl = 1 ; $cptEl <= $nbEle ; $cptEl++) { - my @tabEle=@{$refEle{$cptEl}}; - my $cpt=0; - # # print exon info - foreach my $Element (@tabEle) { - if (!($cpt==0 or $cpt==9)){print $output "$Element\t";} - if ($cpt == 9){ - foreach my $element (@{$Element}){ - if($element->[0] eq "ID"){ - $element->[1] =~ /.+-mRNA-[0-9]*(:.*)/ ; - print $output "ID=$prefix$1;old_name=$element->[1];"; - } - elsif($element->[0] eq "Parent"){ - print $output "Parent=$prefix;old_parent=$element->[1];"; - } - else{ - print $output "$element->[0]=$element->[1];"; - } - } - print $output "\n" - } - $cpt++; - } - } -} - -# print in sorted -sub brickToprintHash{ - my ($output, $refEle)=@_; - - my %refEle = %$refEle; - my $nbEle = keys %refEle; - for (my $cptEl = 1 ; $cptEl <= $nbEle ; $cptEl++) { - my @tabEle=@{$refEle{$cptEl}}; - my $cpt=0; - # # print exon info - foreach my $Element (@tabEle) { - if (!($cpt==0 or $cpt==9)){print $output "$Element\t";} - if ($cpt == 9){ - foreach my $element (@{$Element}){ - print $output "$element->[0]=$element->[1];"; - } - print $output "\n" - } - $cpt++; - } - } -} - -#This function use printgene -sub printgenes { - my ($output, $gene, $OriginalGenesHash, $mRNA, $exon, $cds, $UTR5, $UTR3, $tRNA) = @_; - my %OriginalGenesHashOk = %$OriginalGenesHash; - - #if $gene is list/hash of gene => print all of them - if (ref $gene eq 'ARRAY') { - foreach my $geneName (@{$gene}) { - my $current_geneTab = $OriginalGenesHashOk{$geneName}; - printgene($output, $current_geneTab, $mRNA, $exon, $cds, $UTR5, $UTR3, $tRNA); - } - } - #Print one gene if is just one gene name - else{ - my $current_geneTab = $OriginalGenesHashOk{$gene}; - printgene($output, $current_geneTab, $mRNA, $exon, $cds, $UTR5, $UTR3, $tRNA); - } -} - -sub printgene { - my ($output, $genetab, $mRNA, $exon, $cds, $UTR5, $UTR3, $tRNA) = @_; - my $geneName=$genetab->[0]; - #print gene tab - brickToprintTab($output, $genetab); - # Get hash of hash of mRNA - if(exists $mRNA->{$geneName}){ - my %mRNAHash=@{$mRNA->{$geneName}}; - # For each mRNA known - foreach my $mRNAnum (keys %mRNAHash) { - #Get mRNAhash of the current gene studied - my $current_mRNAHash=$mRNAHash{$mRNAnum}; - brickToprintTab($output, $current_mRNAHash); - my $mRNAname=@{$current_mRNAHash}[0]; - #Get exonhash of the current mRNA studied - if(exists $exon->{$mRNAname}){ # If exon are compacted some mRNA could not have exons ... - my %exonHash=@{$exon->{$mRNAname}}; - brickToprintHash($output, \%exonHash); - } - #Get cdshash of the current mRNA studied - my %cdsHash=@{$cds->{$mRNAname}}; - brickToprintHash($output, \%cdsHash); - #Get UTR5hash of the current mRNA studied if exist - if (exists $UTR5->{$mRNAname}){ - my %UTR5Hash=@{$UTR5->{$mRNAname}}; - brickToprintHash($output, \%UTR5Hash); - } - #Get UTR3hash of the current mRNA studied if exist - if (exists $UTR3->{$mRNAname}){ - my %UTR3Hash=@{$UTR3->{$mRNAname}}; - brickToprintHash($output, \%UTR3Hash); - } - } - } # tRNA - elsif(exists $tRNA->{$geneName}){ - my %tRNAHash=@{$tRNA->{$geneName}}; - foreach my $tRNAnum (keys %tRNAHash) { - my $current_tRNAHash=$tRNAHash{$tRNAnum}; - brickToprintTab($output, $current_tRNAHash); - my $tRNAname=@{$current_tRNAHash}[0]; - - my %exonHash=@{$exon->{$tRNAname}}; - brickToprintHash($output, \%exonHash); - #Get cdshash of the current tRNA studied - if (exists $cds->{$tRNAname}){ - my %cdsHash=@{$cds->{$tRNAname}}; - brickToprintHash($output, \%cdsHash); - } - #Get UTR5hash of the current tRNA studied if exist - if (exists $UTR5->{$tRNAname}){ - my %UTR5Hash=@{$UTR5->{$tRNAname}}; - brickToprintHash($output, \%UTR5Hash); - } - #Get UTR3hash of the current tRNA studied if exist - if (exists $UTR3->{$tRNAname}){ - my %UTR3Hash=@{$UTR3->{$tRNAname}}; - brickToprintHash($output, \%UTR3Hash); - } - } - } -} - -#This function sort hash to give key in order from one to ... -sub sortByPos { - my ($hashRef) = @_; - foreach my $key (keys %{$hashRef}) { - my %hashDeep = @{ ${$hashRef} {$key} }; - - my $cpt=1; - my %hashtmp; - foreach my $kDeep ( sort ({ $hashDeep{$a}[4] <=> $hashDeep{$b}[4] } keys %hashDeep)){ - @{ $hashtmp{$cpt} } = @{$hashDeep{$kDeep}}; - $cpt++; - } - @{ ${$hashRef} {$key} } = %hashtmp ; - } -} - -sub parseGFF { - - my($file_in,$fileName, $opt_expand) = @_; - print( "Reading features from $fileName...\n"); - - my %genes; my %trnagenes; my %mRNA;my %exon; my %cds; my %UTR5; my %UTR3; my %tRNA; my %nctpss; my %ncfpss; my %pieceStudied; - my $cptgenes; my $cptmRNA;my $cptexon; my $cptcds; my $cptUTR5; my $cptUTR3; my $cpttRNA; my $cptgenetRNA; my $cptnctpss; my $cptncfpss; - - # read file and decompose it - while (my $feature = $file_in->next_feature() ) { - my $seqname = $feature->seq_id();#print "$seqname \n"; - my $source = $feature->source_tag();#print "source =$source \n"; - my $type = $feature->primary_tag();#print "type= $type \n"; - #Manage feature position // shoud be always sorted - my $start = $feature->start(); - my $end = $feature->end(); - if ($start > $end){my $tmp=$start; $start=$end; $end=$tmp;} - my $score = $feature->score();if (! defined $score){$score = ".";} - my $strand = $feature->strand();if ($strand eq "1"){$strand="+";}elsif($strand eq "0"){$strand="-";}#print "strand= $strand \n"; - my $frame = $feature->frame(); if ( ! defined $frame){$frame = ".";} #print "frame= $frame"; - - my @ID = $feature->get_tag_values('ID'); - my $ID=$ID[0];#print "ID= $ID\n"; - my $ParentID; - - my @groups; - my @tags = $feature->get_all_tags(); - foreach my $tag (@tags){ - my @tagValue = $feature->get_tag_values($tag); - push (@groups, [$tag, $tagValue[0]]); - if ($tag eq "Parent"){$ParentID=$tagValue[0];} - } - - if ( $type eq 'gene' ) { - if ($ID =~ m/trnascan/){ - $cptgenetRNA++; - push( @{ $trnagenes{$ID}}, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - } - else{ - my $pieceStudiedName="$seqname$strand"; - push( @{ $pieceStudied{$pieceStudiedName}}, $ID ); - push( @{ $genes{$ID}}, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - $cptgenes++; - } - } - else { -# $output->write_feature($feature); - if ($type eq 'mRNA'){ - my %mRNAsInfo; $cptmRNA++; - if ( !defined( $genes{$ParentID}) ) { - printf( STDERR "mRNA -> gene parent ".$ParentID." dont exists. Line= $ID, $seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups\n"); exit(); - } - if (exists $mRNA{$ParentID}){ - # get the hash - my %mRNAsInfo=@{ $mRNA{$ParentID}}; - #nb element in hash - my $nbr = keys (%mRNAsInfo); - # printf ("\nthere is $nbr keys described\n"); - # add value in hash - push ( @{ $mRNAsInfo{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - # put hash in hash - @{ $mRNA{$ParentID} } = %mRNAsInfo ; - } - else{ - @{ $mRNAsInfo{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups); - push ( @{ $mRNA{$ParentID} }, %mRNAsInfo); - - } - - } - - elsif ($type eq "exon"){ - $cptexon++; -#<<<<<<<<<<<<<<<< Expand exon If seceral parents >>>>>>>>>>>>>>> - #We will expand exon if necessary - my @Parent = $feature->get_tag_values('Parent'); - my $sizeParent=$#Parent; - # for each parent of the exon, create an exon (even if only one...) - my $nbmRNAexpanded=0; - foreach my $mRNA_ID (@Parent){ - $nbmRNAexpanded++; - my @groupsR; - if ($sizeParent != 0){ # If this exon need to be expand - if ($nbmRNAexpanded != 1){ #dont count the original exon - $nbexpand++; - } - # print "expand mRNA $mRNA_ID\n"; - @tags = $feature->get_all_tags(); - foreach my $tag (@tags){ - if ($tag eq "Parent"){ - push (@groupsR, [$tag, $mRNA_ID]); - # print " test1=$mRNA_ID\n"; - } - elsif($tag eq "ID"){ # Change name of expanded exon tin order to have a unique identifier/ID - my @tagValue = $feature->get_tag_values($tag); - push (@groupsR, [$tag, "$tagValue[0]]-$mRNA_ID"]); - } - else{ - my @tagValue = $feature->get_tag_values($tag); - push (@groupsR, [$tag, $tagValue[0]]); - # print "tagvalue = $tagValue[0] " - } - - } - } - else {@groupsR=@groups;} # If this exon cannot be expand - - #Now save result in Hash - my %exonsInfo=(); - #Test if exon hash already exists - if (exists $exon{$mRNA_ID}){ - # get the hash - my %exonsInfo=@{ $exon{$mRNA_ID}}; - #nb element in hash - my $nbr = keys (%exonsInfo); - # add value in hash - push ( @{ $exonsInfo{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groupsR) ); - # put hash in hash - @{ $exon{$mRNA_ID} } = %exonsInfo ; - } - else{ - @{ $exonsInfo{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groupsR); - push ( @{ $exon{$mRNA_ID} }, %exonsInfo); - } - } # end foreach (even if only one loop) - } - - elsif ($type eq "CDS"){ - $cptcds++; - my %cdsInfo; - my $mRNA_ID=$ParentID; - #Test if cds hash already exists - if (exists $cds{$mRNA_ID}){ - # get the hash - my %cdsInfo=@{ $cds{$mRNA_ID}}; - #nb element in hash - my $nbr = keys (%cdsInfo); - # printf ("\nthere is $nbr keys described\n"); - # add value in hash - push ( @{ $cdsInfo{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - # put hash in hash - @{ $cds{$mRNA_ID} } = %cdsInfo ; - } - else{ - @{ $cdsInfo{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups); - push ( @{ $cds{$mRNA_ID} }, %cdsInfo); - } - - } - - elsif ($type eq "five_prime_UTR"){ - $cptUTR5++; - my %UTR5Info; - my $mRNA_ID=$ParentID; - if (exists $UTR5{$mRNA_ID}){ - # get the hash - my %UTR5Info=@{ $UTR5{$mRNA_ID}}; - #nb element in hash - my $nbr = keys (%UTR5Info); - # add value in hash - push ( @{ $UTR5Info{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - # put hash in hash - @{ $UTR5{$mRNA_ID} } = %UTR5Info ; - } - else{ - @{ $UTR5Info{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups); - push ( @{ $UTR5{$mRNA_ID} }, %UTR5Info); - } - } - - elsif ($type eq "three_prime_UTR"){ - $cptUTR3++; - my %UTR3Info; - my $mRNA_ID=$ParentID; - if (exists $UTR3{$mRNA_ID}){ - # get the hash - my %UTR3Info=@{ $UTR3{$mRNA_ID}}; - #nb element in hash - my $nbr = keys (%UTR3Info); - # add value in hash - push ( @{ $UTR3Info{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - # put hash in hash - @{ $UTR3{$mRNA_ID} } = %UTR3Info ; - } - else{ - @{ $UTR3Info{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups); - push ( @{ $UTR3{$mRNA_ID} }, %UTR3Info); - } - } - - elsif($type eq "tRNA"){ - $cpttRNA++; - my %tRNAsInfo; - my $geneID = $ParentID; - if ( !defined( $trnagenes{$geneID}) ) { - printf( STDERR "tRNA -> geneID ".$geneID." dont exists. Line= $ID, $seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups\n"); exit(); - } - if (exists $tRNA{$geneID}){ - # get the hash - my %tRNAsInfo=@{$tRNA{$geneID}}; - #nb element in hash - my $nbr = keys (%tRNAsInfo); - # add value in hash - push ( @{ $tRNAsInfo{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - # put hash in hash - @{ $tRNA{$geneID} } = %tRNAsInfo ; - } - else{ - @{ $tRNAsInfo{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups); - push ( @{ $tRNA{$geneID} }, %tRNAsInfo); - - } - } - elsif($type eq "non_canonical_three_prime_splice_site"){ - my %spliceInfo;$cptnctpss++; - if (exists $nctpss{$ParentID}){ - # get the hash - my %spliceInfo=@{$nctpss{$ParentID}}; - #nb element in hash - my $nbr = keys (%spliceInfo); - # add value in hash - push ( @{ $spliceInfo{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - # put hash in hash - @{ $nctpss{$ParentID} } = %spliceInfo ; - } - else{ - @{ $spliceInfo{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups); - push ( @{ $nctpss{$ParentID} }, %spliceInfo); - - } - } - elsif($type =~ m/non_canonical_five_/){ - my %spliceInfo;$cptncfpss++; - if (exists $ncfpss{$ParentID}){ - # get the hash - my %spliceInfo=@{$ncfpss{$ParentID}}; - #nb element in hash - my $nbr = keys (%spliceInfo); - # add value in hash - push ( @{ $spliceInfo{$nbr+1} }, ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups) ); - # put hash in hash - @{ $ncfpss{$ParentID} } = %spliceInfo ; - } - else{ - @{ $spliceInfo{1} }= ($ID,$seqname,$source,$type,$start,$end,$score,$strand,$frame,\@groups); - push ( @{ $ncfpss{$ParentID} }, %spliceInfo); - - } - } - - else{ - printf( STDERR "Feature skipped: $type... Use option\n"); - exit(); - } - } - } - - $ref_istream->close(); - printf( "Read %d genes.(tRNA included if present)\n", $cptgenes ); - my $nbGeneFrommRNA = keys (%mRNA); - printf( STDERR "Read genes $nbGeneFrommRNA (tRNA excluded if present)\n"); - my $nbmRNA = keys (%exon); - printf( STDERR "Read mRNAs $nbmRNA\n"); - -# if ($opt_expand eq "yes"){ print "$nbexpand exons added during the expansion\n";} - - return \%genes, \%trnagenes, \%mRNA, \%exon, \%cds, \%UTR5, \%UTR3, \%tRNA, \%pieceStudied; -} - - -__END__ - -=head1 NAME - -maker_checkFusionSplitBetweenTwoBuilds.pl - Compare two gene build in GFF3 format in order -to detect the gene fron build 1 (--ref file) that are split or fused in the gene build 2 (--tar file). The result is written in the file "outputSplitMergeRef" (For gene involved from build1) and -"outputSplitMergeTar" (For gene involved from build2) in the specified output directory. In more, complex cases -where more than 2 genes of each build are overlapping, we sort them in a separate file call Cluster as well written in the specidied output directory. - -=head1 SYNOPSIS - - ./maker_checkFusionSplitBetweenTwoBuilds.pl --ref=infile --tar=infile --output=outDirectory - ./maker_checkFusionSplitBetweenTwoBuilds.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--ref>, B<--reffile> or B<-f> - -Input GFF3 file correponding to gene build 1. - -=item B<--tar>, B<--tarfile> or B<-t> - -Input GFF3 file corresponding to gene build 2. - -=item B<--out>, B<--output> or B<-o> - -Output directory where diffrent output files will be written. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Deprecated/maker_gff3manager_JD_v8.pl b/annotation/Tools/Deprecated/maker_gff3manager_JD_v8.pl deleted file mode 100755 index a59947128..000000000 --- a/annotation/Tools/Deprecated/maker_gff3manager_JD_v8.pl +++ /dev/null @@ -1,1070 +0,0 @@ -#!/usr/bin/env perl - -########################################### -# gff manager v8 - Jacques Dainat 11/2014 # -########################################### - -#libraries -use strict; -use warnings; -use Carp; -use Time::Piece; -use Time::Seconds; -use POSIX qw(strftime); -use Getopt::Long; -use IO::File; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; -# END libraries - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -# PARAMETERS - OPTION -my $opt_reffile; -my $opt_output; -my $opt_BlastFile; -my $opt_InterproFile; -my $opt_name; -my $opt_nameU; -my $optFillFrame; -my $optForceFillFrame; -my $opt_genomeSize; -my $opt_removeUTR; -my $opt_removemRNAduplicated; -my $opt_help = 0; -# END PARAMETERS - OPTION - -# for ID name -my $nbGeneName; -my $nbmRNAname; -my $nbCDSname; -my $nbExonName; -my $nbOTHERName; -my $nbUTRName; -my $nbRepeatName; -# END ID name - -# FOR FUNCTIONS BLAST# -my %nameBlast; -my %geneNameBlast; -my %mRNANameBlast; -my %mRNAproduct; -my %geneNameGiven; -my %duplicateNameGiven; -my $nbDuplicateNameGiven=0; -my $nbDuplicateName=0; -my $nbNamedGene=0; -my $nbGeneNameInBlast=0; -# END FOR FUNCTION BLAST# - -# FOR FUNCTIONS INTERPRO# -my %TotalTerm; -my %finalID; -my %GeneAssociatedToTerm; -my %mRNAAssociatedToTerm; -my %functionData; -my %functionDataAdded; -my %functionOutput; -my %functionStreamOutput; -my %geneWithoutFunction; -my %geneWithFunction; -my $nbmRNAwithoutFunction=0; -my $nbmRNAwithFunction=0; -my $nbGeneWithGOterm=0; -my $nbTotalGOterm=0; -# END FOR FUNCTION INTERPRO# - -# OPTION MANAGMENT -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|ref|reffile|gff|gff3=s' => \$opt_reffile, - 'b|blast=s' => \$opt_BlastFile, - 'i|interpro=s' => \$opt_InterproFile, - 'id=s' => \$opt_name, - 'idau=s' => \$opt_nameU, - 'g|gs=s' => \$opt_genomeSize, - 'gf=i' => \$nbGeneName, - 'mf=i' => \$nbmRNAname, - 'cf=i' => \$nbCDSname, - 'ef=i' => \$nbExonName, - 'uf=i' => \$nbUTRName, - 'of=i' => \$nbOTHERName, - 'rf=i' => \$nbRepeatName, - 'ff' => \$optFillFrame, - 'o|output=s' => \$opt_output, - - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($opt_reffile)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--f)\n\n". - "Many optional parameters are available. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -# counters for ids initialisation -if (! $nbGeneName){$nbGeneName=1}; -if (! $nbmRNAname){$nbmRNAname=1}; -if (! $nbCDSname){$nbCDSname=1}; -if (! $nbExonName){$nbExonName=1}; -if (! $nbUTRName){$nbUTRName=1}; -if (! $nbOTHERName){$nbOTHERName=1}; -if (! $nbRepeatName){$nbRepeatName=1}; - - -################################################# -####### START Manage files (input output) ####### -################################################# - -my $streamBlast = IO::File->new(); -my $streamInter = IO::File->new(); - -# Manage Blast File -if (defined $opt_BlastFile){ -$streamBlast->open( $opt_BlastFile, 'r' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $opt_BlastFile, $! ) ); -} - -# Manage Interpro file -if (defined $opt_InterproFile){ -$streamInter->open( $opt_InterproFile, 'r' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $opt_InterproFile, $! ) ); -} - -########################## -##### Manage Output ###### -my @outputTab; - -if (defined($opt_output) ) { - if (-f $opt_output){ - print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); - } - if (-d $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); - } - #### Case 1 => option ouput option onlyStat - mkdir $opt_output; - - my $ostreamReport=IO::File->new(">".$opt_output."/report.txt" ) or - croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/report.txt", $! )); - push (@outputTab, $ostreamReport); - - #### Case 2 => option ouput NO option onlyStat - my $ostreamCoding=Bio::Tools::GFF->new(-file => ">".$opt_output."/AllFeatures.gff", -gff_version => 3 ) or - croak(sprintf( "Can not open '%s' for writing %s", $opt_output."AllFeatures.gff", $! )); - push (@outputTab, $ostreamCoding); - - my $ostreamNormalGene=Bio::Tools::GFF->new(-file => ">".$opt_output."/codingGeneFeatures.gff", -gff_version => 3 ) or - croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/codingGeneFeatures.gff", $! )); - push (@outputTab, $ostreamNormalGene); - - my $ostreamOtherRNAGene=Bio::Tools::GFF->new(-file => ">".$opt_output."/otherRNAfeatures.gff", -gff_version => 3 ) or - croak(sprintf( "Can not open '%s' for writing %s", $opt_output."/otherRNAfeatures.gff", $! )); - push (@outputTab, $ostreamOtherRNAGene); - - my $ostreamRepeats=Bio::Tools::GFF->new(-file => ">".$opt_output."/repeatsFeatures.gff", -gff_version => 3 )or - croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/repeatsFeatures.gff", $! )); - push (@outputTab, $ostreamRepeats); - -} -### Case 3 => No output option => everithing will be display on screen. -### Case 4 => If option onlyStat provided the script will stop before writting results. -else { - my $ostreamReport = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - push (@outputTab, $ostreamReport); - - my $ostream = IO::File->new(); - $ostream->fdopen( fileno(STDOUT), 'w' ) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - my $outputGFF = Bio::Tools::GFF->new( -fh => $ostream, -gff_version => 3) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - - #my $outputGFF = Bio::Tools::GFF->new( \*STDOUT, -gff_version => 3 ) or - #croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - push (@outputTab, $outputGFF); - push (@outputTab, $outputGFF); - push (@outputTab, $outputGFF); - push (@outputTab, $outputGFF); - push (@outputTab, $outputGFF); ### Creation of a list of output stream <= In this case every time the same ! Because it for display to the screen -} - -############################################### -####### END Manage files (input output) ####### -############################################### -#my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -my $stringPrint = strftime "%m/%d/%Y", localtime; - -$stringPrint .= "\nusage: $0 @copyARGV\n". - "vvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n". - "vvvvvvvv OPTION INFO vvvvvvvv\n\n". - "->We will calculate statistics about the input file\n". - "->The feature will be sorted before to print them\n"; - -my $prefixName; -if ($opt_name){ - $prefixName=$opt_name; - $stringPrint .= "->IDs will be changed using $opt_name as prefix.\nIn the case of discontinuous features (i.e. a single feature that exists over multiple genomic locations) the same ID may appear on multiple lines.". - " All lines that share an ID collectively represent a signle feature.\n"; - $stringPrint .= "-> Exon will be expanded even if not asked to avoid loss of multiple parent during exon renaming.\n" -} -if ($opt_nameU){ - $stringPrint .= "->IDs will be changed using $opt_nameU as prefix. Id of features that share an ID collectively will be change in different and uniq ID.\n"; - $prefixName=$opt_nameU; -} -if($optFillFrame or $optForceFillFrame){ - $stringPrint .= "->CDS frame will be fill\n"; - $stringPrint .= "-> Exon will be expanded even if not asked to avoid loss of multiple parent during exon renaming.\n" -} -$stringPrint .= "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"; - -# Display -$outputTab[0]->print($stringPrint); -if($opt_output){ print_time("$stringPrint");} # When ostreamReport is a file we have to also display on screen - - - - # +------------------------------------------------------+ - # |+----------------------------------------------------+| - # || MAIN || - # |+----------------------------------------------------+| - # +------------------------------------------------------+ - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_reffile - }); -print_time("Parsing Finished\n\n"); -### END Parse GFF input # -######################### - -#Print directly what has been read -#my $stat = gff3_statistics($hash_omniscient, $opt_genomeSize); -# foreach my $info (@$stat){ -# $outputTab[0]->print("$info"); -# } - -################################ -# MANAGE FUNCTIONAL INPUT FILE # - -# Manage Blast File -if (defined $opt_BlastFile){ - parseAnnieBlast($streamBlast,$opt_BlastFile); -} - -# Manage Interpro File -if (defined $opt_InterproFile){ - parseAnnieInterpro($streamInter,$opt_InterproFile); - - # create streamOutput - if($opt_output){ - foreach my $type (keys %functionData){ - my $ostreamFunct = IO::File->new(); - $ostreamFunct->open( $opt_output."/$type.txt", 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_output."/$type.txt", $! ) - ); - $functionStreamOutput{$type}=$ostreamFunct; - } - } -} -# END MANAGE FUNCTIONAL INPUT FILE # -#################################### - -################################# -# GO THROUGH OMISCIENT # Will create - -my %omniscient_gene; -my %omniscient_other; -my %omniscient_repeat; -my @list_geneID_l1; -my @list_OtherRnaID_l1; -my @list_repeatID_l1; -################# -# create list by 3 type of feature (gene, trna, repeats). Allows to create different outputs - -# level 1 -foreach my $primary_tag_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_level1 = gene or repeat etc... - foreach my $id_level1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_level1}}){ - if($primary_tag_level1 =~ /repeat/){ - push(@list_repeatID_l1, $id_level1) - } - else{ - # get one level2 feature to check wich level1 feature it is - foreach my $primary_tag_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_level2 = mrna or mirna or ncrna or trna etc... - if ( exists ($hash_omniscient->{'level2'}{$primary_tag_level2}{$id_level1} ) ){ - my $one_feat=@{$hash_omniscient->{'level2'}{$primary_tag_level2}{$id_level1}}[0]; - if(lc($one_feat->primary_tag) eq "mrna"){ - push(@list_geneID_l1, $id_level1); - last; - } - else{ - push(@list_OtherRnaID_l1, $id_level1); - last; - } - } - } - } - } -} - -########################## -# create sub omniscients -my %hash_of_omniscient; -if(@list_geneID_l1){ - fill_omniscient_from_other_omniscient_level1_id(\@list_geneID_l1, $hash_omniscient, \%omniscient_gene); - $hash_of_omniscient{'Coding_Gene'}=\%omniscient_gene; -} -if(@list_OtherRnaID_l1){ - fill_omniscient_from_other_omniscient_level1_id(\@list_OtherRnaID_l1, $hash_omniscient, \%omniscient_other); - $hash_of_omniscient{'Non_Coding_Gene'}=\%omniscient_other; -} -if(@list_repeatID_l1){ - fill_omniscient_from_other_omniscient_level1_id(\@list_repeatID_l1, $hash_omniscient, \%omniscient_repeat); - $hash_of_omniscient{'Repeat'}=\%omniscient_repeat; -} - -############## -# STATISTICS # -foreach my $key_hash (keys %hash_of_omniscient){ - $outputTab[0]->print("Information about $key_hash\n"); - if($opt_output){print "Information about $key_hash\n";} # When ostreamReport is a file we have to also display on screen - my $hash_ref = $hash_of_omniscient{$key_hash}; - my $stat; - my $distri; - if($opt_genomeSize){ - ($stat, $distri) = gff3_statistics($hash_ref, $opt_genomeSize); - } - else{ - ($stat, $distri) = gff3_statistics($hash_ref); - } - - #print statistics - foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - $outputTab[0]->print("$info"); - if($opt_output){print_time(print "$info");} # When ostreamReport is a file we have to also display on screen - } - $outputTab[0]->print("\n"); - if($opt_output){print "\n";} # When ostreamReport is a file we have to also display on screen - } -} - -# END STATISTICS # -################## - -################### -#Fil frame is asked -if($optFillFrame){ - print_time( "fill frame information\n"); - foreach my $key_hash (keys %hash_of_omniscient){ - my $hash_ref = $hash_of_omniscient{$key_hash}; - fil_cds_frame($key_hash); - } -} - -########################### -# change FUNCTIONAL information if asked for -if ($opt_BlastFile || $opt_InterproFile ){#|| $opt_BlastFile || $opt_InterproFile){ - print_time( "load FUNCTIONAL information\n" ); - my $hash_ref = $hash_of_omniscient{'Coding_Gene'}; - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_level1 (keys %{$hash_ref ->{'level1'}}){ # primary_tag_level1 = gene or repeat etc... - foreach my $id_level1 (keys %{$hash_ref ->{'level1'}{$primary_tag_level1}}){ - - my $feature_level1=$hash_ref->{'level1'}{$primary_tag_level1}{$id_level1}; - # Clean NAME attribute - if($feature_level1->has_tag('Name')){ - $feature_level1->remove_tag('Name'); - } - - #Manage Name if otpion setting - if( $opt_BlastFile ){ - if (exists ($geneNameBlast{$id_level1})){ - create_or_replace_tag($feature_level1, 'Name', $geneNameBlast{$id_level1}); - $nbNamedGene++; - - # Check name duplicated given - my $nameClean=$geneNameBlast{$id_level1}; - $nameClean =~ s/_([2-9]{1}[0-9]*|[0-9]{2,})*$//; - - my $nameToCompare; - if(exists ($nameBlast{$nameClean})){ # We check that is really a name where we added the suffix _1 - $nameToCompare=$nameClean; - } - else{$nameToCompare=$geneNameBlast{$id_level1};} # it was already a gene_name like BLABLA_12 - - if(exists ($geneNameGiven{$nameToCompare})){ - $nbDuplicateNameGiven++; # track total - $duplicateNameGiven{$nameToCompare}++; # track diversity - } - else{$geneNameGiven{$nameToCompare}++;} # first time we have given this name - } - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_ref->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys ($hash_ref, ('level2', $primary_tag_key_level2, $id_level1) ) ){ - foreach my $feature_level2 ( @{$hash_ref->{'level2'}{$primary_tag_key_level2}{$id_level1}}) { - - my $level2_ID = lc($feature_level2->_tag_value('ID')); - # Clean NAME attribute - if($feature_level2->has_tag('Name')){ - $feature_level2->remove_tag('Name'); - } - - #Manage Name if option set - if($opt_BlastFile){ - if (exists ($mRNANameBlast{$level2_ID})){ - my $mRNABlastName=$mRNANameBlast{$level2_ID}; - create_or_replace_tag($feature_level2, 'Name', $mRNABlastName); - } - my $productData=printProductFunct($level2_ID); - if ($productData ne ""){ - create_or_replace_tag($feature_level2, 'product', $productData); - } - else { - create_or_replace_tag($feature_level2, 'product', "hypothetical protein"); - } #Case where the protein is not known - } - - # print function if option - if($opt_InterproFile){ - my $parentID=$feature_level2->_tag_value('Parent'); - - if (addFunctions($feature_level2, $opt_output)){ - $nbmRNAwithFunction++;$geneWithFunction{$parentID}++; - if(exists ($geneWithoutFunction{$parentID})){ - delete $geneWithoutFunction{$parentID}; - } - } - else{ - $nbmRNAwithoutFunction++; - if(! exists ($geneWithFunction{$parentID})){ - $geneWithoutFunction{$parentID}++; - } - } - } - } - } - } - } - } -} - - -########################### -# change names if asked for -if ($opt_nameU || $opt_name ){#|| $opt_BlastFile || $opt_InterproFile){ - print_time( "load NAME information\n"); - foreach my $key_hash (keys %hash_of_omniscient){ - my $hash_ref = $hash_of_omniscient{$key_hash}; - - my %hash_sortBySeq; - foreach my $tag_level1 ( keys %{$hash_ref->{'level1'}}){ - foreach my $level1_id ( keys %{$hash_ref->{'level1'}{$tag_level1}}){ - my $position=$hash_ref->{'level1'}{$tag_level1}{$level1_id}->seq_id; - push (@{$hash_sortBySeq{$position}{$tag_level1}}, $hash_ref->{'level1'}{$tag_level1}{$level1_id}); - } - } - - ################# - # == LEVEL 1 == # - ################# - #Read by seqId to sort properly the output by seq ID - foreach my $seqid (sort alphaNum keys %hash_sortBySeq){ # loop over all the feature level1 - - foreach my $primary_tag_level1 (sort {$a cmp $b} keys %{$hash_sortBySeq{$seqid}}){ - - foreach my $feature_level1 ( sort {$a->start <=> $b->start} @{$hash_sortBySeq{$seqid}{$primary_tag_level1}}){ - my $level1_ID=$feature_level1->_tag_value('ID'); - my $id_level1 = lc($level1_ID); - my $newID_level1=undef; - #print_time( "Next gene $id_level1\n"); - - #keep track of Maker ID - if($opt_BlastFile){#In that case the name given by Maker is removed from ID and from Name. We have to kee a track - create_or_replace_tag($feature_level1, 'makerName', $level1_ID); - } - - if(lc($primary_tag_level1) =~ /repeat/ ){ - $newID_level1 = manageID($prefixName,$nbRepeatName,'R'); - $nbRepeatName++; - create_or_replace_tag($feature_level1, 'ID', $newID_level1); - } - else{ - $newID_level1 = manageID($prefixName,$nbGeneName,'G'); - $nbGeneName++; - create_or_replace_tag($feature_level1, 'ID', $newID_level1); - } - - $finalID{$feature_level1->_tag_value('ID')}=$newID_level1; - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_ref->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys ($hash_ref, ('level2', $primary_tag_key_level2, $id_level1) ) ){ - foreach my $feature_level2 ( @{$hash_ref->{'level2'}{$primary_tag_key_level2}{$id_level1}}) { - - my $level2_ID = $feature_level2->_tag_value('ID'); - my $newID_level2=undef; - - #keep track of Maker ID - if($opt_InterproFile){#In that case the name given by Maker is removed from ID and from Name. We have to kee a track - create_or_replace_tag($feature_level2, 'makerName', $level2_ID); - } - - if(lc($feature_level2) =~ /repeat/ ){ - print "What should we do ? implement something. L1 and l2 repeats will have same name ...\n";exit; - } - else{ - $newID_level2 = manageID($prefixName,$nbmRNAname,"T"); - $nbmRNAname++; - create_or_replace_tag($feature_level2, 'ID', $newID_level2); - create_or_replace_tag($feature_level2, 'Parent', $newID_level1); - } - - $finalID{$level2_ID}=$newID_level2; - ################# - # == LEVEL 3 == # - ################# - - foreach my $primary_tag_level3 (keys %{$hash_ref->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - if ( exists_keys ($hash_ref,('level3',$primary_tag_level3, lc($level2_ID)) ) ){ - - foreach my $feature_level3 ( @{$hash_ref->{'level3'}{$primary_tag_level3}{lc($level2_ID)}}) { - - #keep track of Maker ID - my $level3_ID = $feature_level3->_tag_value('ID'); - if($opt_InterproFile){#In that case the name given by Maker is removed from ID and from Name. We have to kee a track - create_or_replace_tag($feature_level3, 'makerName', $level3_ID); - } - - my $newID_level3 =""; - if($primary_tag_level3 =~ /exon/ ){ - $newID_level3 = manageID($prefixName,$nbExonName,'E'); - $nbExonName++; - create_or_replace_tag($feature_level3, 'ID', $newID_level3); - create_or_replace_tag($feature_level3, 'Parent', $newID_level2); - - } - elsif($primary_tag_level3 =~ /cds/){ - $newID_level3 = manageID($prefixName,$nbCDSname,'C'); - if($opt_nameU){$nbCDSname++;} - create_or_replace_tag($feature_level3, 'ID', $newID_level3); - create_or_replace_tag($feature_level3, 'Parent', $newID_level2); - } - - elsif($primary_tag_level3 =~ /utr/){ - $newID_level3 = manageID($prefixName,$nbUTRName,'U'); - if($opt_nameU){$nbUTRName++;} - create_or_replace_tag($feature_level3, 'ID', $newID_level3); - create_or_replace_tag($feature_level3, 'Parent', $newID_level2); - } - else{ - $newID_level3 = manageID($prefixName,$nbOTHERName,'O'); - $nbOTHERName++; - create_or_replace_tag($feature_level3, 'ID', $newID_level3); - create_or_replace_tag($feature_level3, 'Parent', $newID_level2); - } - $finalID{$level3_ID}=$newID_level3; - } - #save the new l3 into the new l2 id name - $hash_ref->{'level3'}{$primary_tag_level3}{lc($newID_level2)} = delete $hash_ref->{'level3'}{$primary_tag_level3}{lc($level2_ID)} # delete command return the value before deteling it, so we just transfert the value - } - if ($opt_name and $primary_tag_level3 =~ /utr/){$nbUTRName++;} # with this option we increment UTR name only for each UTR - if ($opt_name and $primary_tag_level3 =~ /cds/){$nbCDSname++;} # with this option we increment cds name only for each cds - } - } - if($newID_level1){ - $hash_ref->{'level2'}{$primary_tag_key_level2}{lc($newID_level1)} = delete $hash_ref->{'level2'}{$primary_tag_key_level2}{$id_level1}; # modify the id key of the hash. The delete command return the value before deteling it, so we just transfert the value - } - } - } - - if($newID_level1){ - $hash_ref->{'level1'}{$primary_tag_level1}{lc($newID_level1)} = delete $hash_ref->{'level1'}{$primary_tag_level1}{$id_level1}; # modify the id key of the hash. The delete command return the value before deteling it, so we just transfert the value - } - } - } - } - } -} - -########################### -# RESULT PRINTING -########################### - -############################## -# print FUNCITONAL INFORMATION - -# first table name\tfunction -if($opt_output){ - foreach my $function_type (keys %functionOutput){ - my $streamOutput=$functionStreamOutput{$function_type}; - foreach my $ID (keys %{$functionOutput{$function_type}}){ - - if ($opt_nameU || $opt_name ){ - print $streamOutput $finalID{$ID}."\t".$functionOutput{$function_type}{$ID}."\n"; - } - else{ - print $streamOutput $ID."\t".$functionOutput{$function_type}{$ID}."\n"; - } - } - } -} - - -# NOW summerize -$stringPrint =""; # reinitialise (use at the beginning) -if ($opt_InterproFile){ - #print INFO - my $lineB= "___________________________________________________________________________________________________"; - $stringPrint .= " ".$lineB."\n"; - $stringPrint .= "| | Nb Total term | Nb mRNA with term | Nb mRNA updated by term | Nb gene updated by term |\n"; - $stringPrint .= "| | in Annie File | in Annie File | in our annotation file | in our annotation file |\n"; - $stringPrint .= "|".$lineB."|\n"; - - foreach my $type (keys %functionData){ - my $total_type = $TotalTerm{$type}; - my $mRNA_type_Annie = $functionDataAdded{$type}; - my $mRNA_type = keys %{$mRNAAssociatedToTerm{$type}}; - my $gene_type = keys %{$GeneAssociatedToTerm{$type}}; - $stringPrint .= "|".sizedPrint(" $type",10)."|".sizedPrint($total_type,15)."|".sizedPrint($mRNA_type_Annie,20)."|".sizedPrint($mRNA_type,25)."|".sizedPrint($gene_type,25)."|\n|".$lineB."|\n"; - } - - #RESUME TOTAL OF FUNCTION ATTACHED - my $listOfFunction; - foreach my $funct (keys %functionData){ - $listOfFunction.="$funct,"; - } - chop $listOfFunction; - $stringPrint .= "nb mRNA without Functional annotation ($listOfFunction) = $nbmRNAwithoutFunction\n"; - $stringPrint .= "nb mRNA with Functional annotation ($listOfFunction) = $nbmRNAwithFunction\n"; - my $nbGeneWithoutFunction= keys %geneWithoutFunction; - $stringPrint .= "nb gene without Functional annotation ($listOfFunction) = $nbGeneWithoutFunction\n"; - my $nbGeneWithFunction= keys %geneWithFunction; - $stringPrint .= "nb gene with Functional annotation ($listOfFunction) = $nbGeneWithFunction\n"; - -} - -if($opt_BlastFile){ - my $nbGeneDuplicated=keys %duplicateNameGiven; - $nbDuplicateNameGiven=$nbDuplicateNameGiven+$nbGeneDuplicated; # Until now we have counted only name in more, now we add the original name. - $stringPrint .= "$nbGeneNameInBlast gene names have been retrieved in the blast file. $nbNamedGene gene names have been successfully inferred.\n". - "Among them there are $nbGeneDuplicated names that are shared at least per two genes for a total of $nbDuplicateNameGiven genes.\n"; - # "We have $nbDuplicateName gene names duplicated ($nbDuplicateNameGiven - $nbGeneDuplicated)."; - - if($opt_output){ - my $duplicatedNameOut=IO::File->new(">".$opt_output."/duplicatedNameFromBlast.txt" ); - foreach my $name (sort { $duplicateNameGiven{$b} <=> $duplicateNameGiven{$a} } keys %duplicateNameGiven){ - print $duplicatedNameOut "$name\t".($duplicateNameGiven{$name}+1)."\n"; - } - } -} - - -# Display -$outputTab[0]->print("$stringPrint"); -if(defined $opt_output){print_time( "$stringPrint" ) ;} - -#################### -# PRINT IN FILES -#################### -#print step -printf("Writing result\n"); -if($opt_output){ - #print gene (mRNA) - print_omniscient(\%omniscient_gene, $outputTab[2]); - #print other RNA gene - print_omniscient(\%omniscient_other, $outputTab[3]); - #print repeat - print_omniscient(\%omniscient_repeat, $outputTab[4]); -} -else{ - #print gene (mRNA) - print_omniscient(\%omniscient_gene, $outputTab[1]); - #print other RNA gene - print_omniscient(\%omniscient_other, $outputTab[1]); - #print repeat - print_omniscient(\%omniscient_repeat, $outputTab[1]); -} - ######################### - ######### END ########### - ######################### -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -# print with time -sub print_time{ - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print $line; -} - -# each mRNA of a gene has its proper gene name. Most often is the same, and annie added a number at the end. To provide only one gene name, we remove this number and then remove duplicate name (case insensitive). -# If it stay at the end of the process more than one name, they will be concatenated together. -# It remove redundancy intra name. -sub manageGeneNameBlast{ - my ($geneName)=@_; - foreach my $element (keys %$geneName){ - my @tab=@{$geneName->{$element}}; - - my %seen; - my @unique; - for my $w (@tab) { # remove duplicate in list case insensitive - $w =~ s/_[0-9]+$// ; - next if $seen{lc($w)}++; - push(@unique, $w); - } - - my $finalName=""; - my $cpt=0; - foreach my $name (@unique){ #if several name we will concatenate them together - if ($cpt == 0){ - $finalName .="$name"; - } - else{$finalName .="_$name"} - } - $geneName->{$element}=$finalName; - $nameBlast{lc($finalName)}++; - } -} - -# creates gene ID correctly formated (PREFIX,TYPE,NUMBER) like HOMSAPG00000000001 for a Homo sapiens gene. -sub manageID{ - my ($prefix,$nbName,$type)=@_; - my $result=""; - my $numberNum=11; - my $GoodNum=""; - for (my $i=0; $i<$numberNum-length($nbName); $i++){ - $GoodNum.="0"; - } - $GoodNum.=$nbName; - $result="$prefix$type$GoodNum"; - - return $result; -} - -# Create String containing the product information associated to the mRNA -sub printProductFunct{ - my ($refname)=@_; - my $String=""; - my $first="yes"; - if (exists $mRNAproduct{$refname}){ - foreach my $element (@{$mRNAproduct{$refname}}) - { - if($first eq "yes"){ - $String.="$element"; - $first="no"; - } - else{$String.=",$element";} - } - } - return $String; -} - -sub addFunctions{ - my ($feature, $opt_output)=@_; - - my $functionAdded=undef; - my $ID=lc($feature->_tag_value('ID')); - foreach my $function_type (keys %functionData){ - - - if(exists ($functionData{$function_type}{$ID})){ - $functionAdded="true"; - - my $data_list; - - if(lc($function_type) eq "go"){ - foreach my $data (@{$functionData{$function_type}{$ID}}){ - $feature->add_tag_value('Ontology_term', $data); - $data_list.="$data,"; - $functionDataAdded{$function_type}++; - } - } - else{ - foreach my $data (@{$functionData{$function_type}{$ID}}){ - $feature->add_tag_value('Dbxref', $data); - $data_list.="$data,"; - $functionDataAdded{$function_type}++; - } - } - - if ($opt_output){ - my $ID = $feature->_tag_value('ID'); - chop $data_list; - $functionOutput{$function_type}{$ID}=$data_list; - } - } - } - return $functionAdded; -} - -# method to par annie blast file -sub parseAnnieBlast { - my($file_in,$fileName) = @_; - print( "Reading features from $fileName...\n"); - my %geneName; my %linkBmRNAandGene; - my $geneID =""; - my $nameGene=""; - - #FIRST PARSE THE FILE - while( my $line = <$file_in>) { - next if $line =~ /^\s*$/; - my @values = split(/\s/, $line); - - if ($values[1] eq "name"){ - $geneID=$values[0]; - $nameGene=$values[2]; - $nameGene=~ s/\n//g; - push ( @{ $geneName{lc($geneID)} }, lc($nameGene) ); - } - elsif ($values[1] eq "product"){ - my $mRNAID=$values[0]; - @values = split(/\sproduct\s/, $line); - my $product=$values[1]; - $product=~ s/\n//g; - push ( @{ $mRNAproduct{lc($mRNAID)} }, $product ); - if ($nameGene ne ""){ - push( @{ $linkBmRNAandGene{lc($geneID)}}, lc($mRNAID)); # save mRNA name for each gene name - $geneID =""; - $nameGene=""; - } - } - else {print "/!\\ Achtung !! something strange in this file ... line is: $line";} - } - - # secondly Manage NAME (If several) - manageGeneNameBlast(\%geneName); # Remove redundancy to have only one name for each gene - - #Then CLEAN NAMES REDUNDANCY inter gene - my %geneNewNameUsed; - foreach my $geneID (keys %geneName){ - - $nbGeneNameInBlast++; - - my @mRNAList=@{$linkBmRNAandGene{$geneID}}; - my $String = $geneName{$geneID}; -# print "$String\n"; - if (! exists( $geneNewNameUsed{$String})){ - $geneNewNameUsed{$String}++; - $geneNameBlast{$geneID}=$String; - # link name to mRNA and and isoform name _1 _2 _3 if several mRNA - my $cptmRNA=1; - if ($#mRNAList != 0) { - foreach my $mRNA (@mRNAList){ - $mRNANameBlast{$mRNA}=$String."_iso".$cptmRNA; - $cptmRNA++; - } - } - else{$mRNANameBlast{$mRNAList[0]}=$String;} - } - else{ #in case where name was already used, we will modified it by addind a number like "_2" - $nbDuplicateName++; - $geneNewNameUsed{$String}++; - my $nbFound=$geneNewNameUsed{$String}; - $String.="_$nbFound"; - $geneNewNameUsed{$String}++; - $geneNameBlast{$geneID}=$String; - # link name to mRNA and and isoform name _1 _2 _3 if several mRNA - my $cptmRNA=1; - if ($#mRNAList != 0) { - foreach my $mRNA (@mRNAList){ - $mRNANameBlast{$mRNA}=$String."_iso".$cptmRNA; - $cptmRNA++; - } - } - else{$mRNANameBlast{$mRNAList[0]}=$String;} - } - } -} - -# method to par annie Interpro file -sub parseAnnieInterpro { - my($file_in,$fileName) = @_; - print( "Reading features from $fileName...\n"); - - while( my $line = <$file_in>) { - my @values = split(/\t/, $line); - my $mRNAID=lc($values[0]); - if ((lc($values[1]) eq "db_xref") || (lc($values[1]) eq "dbxref")){ - my $data=$values[2]; - $data=~ s/\n//g; - $data=~ s/\s//g; #remove space - my @element = split(/:/,$data); - my $typeEl = $element[0]; - @element = split(/\|/,$data); #cut at character | - - foreach my $oneEl (@element){ - $TotalTerm{$typeEl}++; - push ( @{$functionData{$typeEl}{$mRNAID}} , $oneEl ); - if ( exists $hash_mRNAGeneLink->{$mRNAID}){ ## check if exists among our current gff annotation file analyzed - $mRNAAssociatedToTerm{$typeEl}{$mRNAID}++; - $GeneAssociatedToTerm{$typeEl}{$hash_mRNAGeneLink->{$mRNAID}}++; - } - } - } - } -} - -sub sizedPrint{ - my ($term,$size) = @_; - my $result; my $sizeTerm=length($term); - if ($sizeTerm > $size ){ - $result=substr($term, 0,$size); - return $result; - } - else{ - my $nbBlanc=$size-$sizeTerm; - $result=$term; - for (my $i = 0; $i < $nbBlanc; $i++){ - $result.=" "; - } - return $result; - } -} - -#Sorting mixed strings => Sorting alphabetically first, then numerically -# how to use: my @y = sort by_number @x; -sub alphaNum { - my ( $alet , $anum ) = $a =~ /([^\d]+)(\d+)/; - my ( $blet , $bnum ) = $b =~ /([^\d]+)(\d+)/; - ( $alet || "a" ) cmp ( $blet || "a" ) or ( $anum || 0 ) <=> ( $bnum || 0 ) -} - -__END__ - -=head1 NAME - -gff3manager_JD.pl - -The script take a gff3 file as input. - -Without option the script only sort the data. - -With corresponding parameters, it can add functional annotations from output files ->The blast against Prot Database file from annie allows to fill the field NAME for gene and PRODUCT for mRNA. ->The blast against Interpro Database tsv file from annie allows to fill the DBXREF field with pfam, tigr, interpro and GO terms data. -The script expand exons sharing multiple mRNA (Parent attributes contains multiple parental mRNA). One exon by parental mRNA will be created. -With the option the script will change all the ID field by an Uniq ID created from the given prefix, a letter to specify the kind of feature (G,T,C,E,U), and the feature number. - -The result is written to the specified output file, or to STDOUT. -Remark: If there is duplicate in the file they will be removed in the output. In that case you should be informed. - -=head1 SYNOPSIS - - ./gff3manager_JD.pl -f=infile.gff [ -b annie_blast_infile -i annie_interpro_infile.tsv -x annie_interpro_infile.xml -e --id ABCDEF [-gf 20] -s -utr -utrr 10 --output outfile ] - ./gff3manager_JD.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-f>, B<--reffile>,B<-ref> , B<--gff> or B<--gff3> - -Input GFF3 file that will be read (and sorted) - -=item B<-b> or B<--blast> - -Input annie blast file that will be used to complement the features read from -the first file (specified with B<--ref>). - -=item B<-i> or B<--interpro> - -Input annie interpro file (.tsv) that will be used to complement the features read from -the first file (specified with B<--ref>). - -=item B<-g> or B<--gs> - -This option inform about the genome size in oder to compute more statistics. You can give the size in Nucleotide or directly the fasta file. - -=item B<-id> - -This option will changed the id name. It will create from id prefix (usually 6 letters) given as input, uniq IDs like prefixE00000000001. Where E mean exon. Instead E we can have C for CDS, G for gene, T for mRNA, U for Utr. -In the case of discontinuous features (i.e. a single feature that exists over multiple genomic locations) the same ID may appear on multiple lines. All lines that share an ID collectively represent a signle feature. - -=item B<-idau> - -This option (id all uniq) is similar to -id option but Id of features that share an ID collectively will be change by different and uniq ID. - -=item B<-gf> - -Usefull only if -id is used. -This option is used to define the number that will be used to begin to number the gene id (gf for "gene from"). By default begin by 1. - -=item B<-mf> - -Usefull only if -id is used. -This option is used to define the number that will be used to begin to number the mRNA id (mf for "mRNA from"). By default begin by 1. - -=item B<-cf> - -Useful only if -id is used. -This option is used to define the number that will be used to begin to number the CDS id (cf for "CDS from"). By default begin by 1. - -=item B<-ef> - -Useful only if -id is used. -This option is used to define the number that will be used to begin to number the exon id (ef for "Exon from"). By default begin by 1. - -=item B<-uf> - -Useful only if -id is used. -This option is used to define the number that will be used to begin to number the UTR id (uf for "UTR from"). By default begin by 1. - -=item B<-rf> - -Useful only if -id is used. -This option is used to define the number that will be used to begin to number the repeat id (rf for "Repeat from"). By default begin by 1. - -=item B<-ff> - -ff means fill frame. -This option is used to add the CDS frame. If frames already exist, the script overwrite them. - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Dev/perl/gff2embl.pl b/annotation/Tools/Dev/perl/gff2embl.pl deleted file mode 100755 index 8dac753da..000000000 --- a/annotation/Tools/Dev/perl/gff2embl.pl +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env perl - -## NBIS 2015 -## jacques.dainat@nbis.se - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use POSIX qw(strftime); -use Bio::SeqIO; -use Data::Dumper; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; -use Bio::DB::Fasta; - -my $usage = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## - -Usage: perl my_script.pl --gff Infile [--out outfile] - Getting help: - [--help] - - Input: - [--gff filename] - The name of the GFF file to convert. - - [--fasta filename] - fasta file name. - - Ouput: - [--out filename] - The name of the output file (A EMBL file). -}; - -my $outfile = undef; -my $gff = undef; -my $file_fasta=undef; -my $help; - -if( !GetOptions( - "help" => \$help, - "gff|in=s" => \$gff, - "fasta|fa=s" => \$file_fasta, - "outfile|output|o|out=s" => \$outfile)) -{ - pod2usage( { -message => "Failed to parse command line\n$usage", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ( ! (defined($gff)) or ! (defined($file_fasta)) ){ - pod2usage( { - -message => "Missing the --gff argument\n$usage", - -verbose => 0, - -exitval => 2 } ); -} - -## Manage output file -my $embl_out; -if ($outfile) { -open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $embl_out = Bio::SeqIO->new(-fh => $fh, -format => 'embl'); -} -else{ - $embl_out = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'embl'); -} - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - -my $hash_by_group=group_features_from_omniscient($hash_omniscient); -print ("GFF3 data grouped\n"); -### MAIN ### - -### -# or for GMT formatted appropriately for your locale: -my $datestring = uc(strftime "%e-%h-%g", gmtime); - -#################### -# index the genome # -my $db = Bio::DB::Fasta->new($file_fasta); -print ("Genome fasta parsed\n"); - -#Foreach sequence (sequence = Contig/scaffold/chromosome) -foreach my $seq_id (keys %{$hash_by_group} ){ - - #create a sequence_object - my $seqObject = Bio::Seq::RichSeq->new(-seq => $db->seq($seq_id)); - - # Add creation date - $seqObject->add_date($datestring); - # Add last modification date - $seqObject->add_date($datestring); - - foreach my $gene_grouped(keys %{$hash_by_group->{$seq_id}}){ - - foreach my $feature (@{$hash_by_group->{$seq_id}{$gene_grouped}}){ - # $location->add_sub_Location($feature->location); -# print $feature->location; -# my $genef = Bio::SeqFeature::Generic->new(-location =>$location, -primary_tag => 'CDS'); - $seqObject->add_SeqFeature($feature); - - # print ref($seqObject); exit; #Check object type - # print Dumper($seqObject);exit; - } - } - # print Dumper($seqObject); - $embl_out->write_seq($seqObject); -} - -#my $db->seq( $seqid) -#while( my $feature = $gtfio->next_feature) { -# $embl_out->write_seq($seq) -#} - -__END__ - -It is probably easiest to just group things and make a split location. -You will have the most control over the objects you create. - -my %genes; -while( my $f = $gff->next_feature ) { - my ($group) = $feature->get_tag_values('Group'); # substitute group -with whatever you have in the group field - push @{$gene{$group}}, $feature; -} -# get a Bio::Seq object called $seq somehow, either by reading in a -fasta sequence file, etc... -while( my ($gene,$features) = each %genes ) { - my $location = Bio::Location::Split->new(); - for my $f ( @$features ) { - $location->add_sub_Location($f->location); - } - my $genef = Bio::SeqFeature::Generic->new(-location =>$location, --primary_tag => 'CDS'); - $seq->add_SeqFeature($genef); -} -my $seqio = Bio::SeqIO->new(-format => 'genbank'); -$seqio->write_seq($seq); - -=head1 NAME - -gb2embl.pl - -The script take a EMBL file as input, and will translate it in Genbank format. - -=head1 SYNOPSIS - - ./embl2gb.pl --embl=infile.gff [ -o outfile ] - -=head1 OPTIONS - -=over 8 - -=item B<--embl> - -Input EMBL file that will be read - -=item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--gff> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Dev/perl/gff3_plotGO_mapTree.pl b/annotation/Tools/Dev/perl/gff3_plotGO_mapTree.pl deleted file mode 100755 index c7ddbf30b..000000000 --- a/annotation/Tools/Dev/perl/gff3_plotGO_mapTree.pl +++ /dev/null @@ -1,293 +0,0 @@ -#!/usr/bin/env perl - -##################################################################### -# maker_checkFusionSplitBetweenTwoBuilds v1 - Jacques Dainat 10/2014 # -##################################################################### - -use strict; -use warnings; -use POSIX qw(strftime); -use Data::Dumper; -use Carp; -use Getopt::Long; -use IO::File; -use Pod::Usage; -use Statistics::R; -use NBIS::GFF3::Omniscient; -use Bio::OntologyIO; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - - -my $opt_reffile; -my $opt_obo=undef; -my $opt_output=undef; -my $opt_help = 0; -my $DefaultUTRnb=5; - -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|gff|ref|reffile=s' => \$opt_reffile, - 'obo=s' => \$opt_obo, - 'o|out|output=s' => \$opt_output, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! defined($opt_reffile ) or ! $opt_output or ! $opt_obo) { - pod2usage( { - -message => "$header\nMust specify at least 2 parameters:\nReference data gff3 file (--gff)\nOne UTR option (3, 5 , both, plot)", - -verbose => 0, - -exitval => 1 } ); -} - -# ##################### -# # Manage Input File # -# ##################### - my $ref_istream = Bio::Tools::GFF->new(-file => $opt_reffile, -gff_version => 3 ) or - croak(sprintf( "Can not open '%s' for writing %s", $opt_reffile, $! )); - -# ######################### -# # END Manage Input File # -# ######################### -# ####################### -# # START Manage Option # -# ####################### -if (-f $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); -} - -my $ostreamReport = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - -my $string1 = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$string1 .= "\n\nusage: $0 @copyARGV\n\n"; - -print $ostreamReport $string1; - -# ##################################### -# # END Manage OPTION -# ##################################### -# print "parse obo file\n"; -# my $parser = Bio::OntologyIO->new -# ( -format => "obo", -# -file => $opt_obo); - -# my %hash_onto; -# while(my $ont = $parser->next_ontology()) { -# #print Dumper($ont); -# my $ontoName= $ont->name(); -# print "read ontology ",$ont->name()," with ", -# scalar($ont->get_root_terms)," root terms, and ", -# scalar($ont->get_all_terms)," total terms, and ", -# scalar($ont->get_leaf_terms)," leaf terms\n"; -# $hash_onto{$ontoName} = $ont; -# } -# my $hash_bioprocess = $hash_onto{'biological_process'}; -# my @term = $hash_bioprocess->find_terms(-identifier => "GO:0071428"); -# #print Dumper($term); -# print $term[0]->identifier(), "\n"; -# print $term[0]->name(), "\n"; -# # print $term[0]->description(), "\n"; -# # print $term->definition(), "\n"; -# # print $term->is_obsolete(), "\n"; -# # print $term->comment(), "\n"; -# exit; - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -# ################################# -# # Manage Ouput Directory / File # -# ################################# - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_reffile - }); -print("Parsing Finished\n\n"); -### END Parse GFF input # -######################### - -########################### -# get GO terms information -########################### -my %GOdist; -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - if($feature_level2->has_tag('Ontology_term')){ - my @GOterms=$feature_level2->get_tag_values('Ontology_term'); - foreach my $GOterm (@GOterms){ - $GOdist{$GOterm}++; - } - } - else{ - $GOdist{"none"}++; - } - } - } - } - } -} - - -##################### -# Plot distribution - - - - -my $txtFile; -my $outPlot; - -$txtFile = $opt_output.".txt"; -$outPlot = $opt_output.".pdf"; - -#print file thtat will be read by R -open(FH, ">".$txtFile) || die "Erreur E/S:$!\n"; -my $firstLine="yes"; -foreach my $GO_type (keys %GOdist) { - if($firstLine){ - print FH $GO_type."\t".0; - $firstLine=undef; - }else{ - print FH "\n".$GO_type."\t".0; - } -} -close FH; -exit; -my $R = Statistics::R->new() or die "Problem with R : $!\n"; - -#R command -#$R->run(q`install.packages( "treemap" );`); -#$R->run(q`library(treemap) `); - - $R->send( - qq` - #install library if needed - if(!require(treemap)){ - chooseCRANmirror() - options("repos") - install.packages("treemap") - library(treemap) - } - library(treemap)` - ); - - - -# listValues=as.matrix(read.table("$txtFile", sep="\t", he=F)) ##///!!!\\\\\ -# legendToDisplay=paste("Number of value used : ",length(listValues)) -# listValueMoreThan <- listValues[listValues[,1]>5,] - -# pdf("$outPlot") -# plot(listValues[,2]~listValues[,1], xlab="Contig size", ylab="Frequency", main="Size distribution of $utr_type") -# dev.off() - -# pdf("$outPlotOver") -# plot(listValueMoreThan[,2]~listValueMoreThan[,1], xlab="Contig size", ylab="Frequency", main="Size distribution of $utr_type over 5") -# dev.off()` -# ); - -# Close the bridge -$R->stopR(); - -# Delete temporary file -unlink "$txtFile"; - - - - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -__DATA__ - - -__END__ - - -=head1 NAME - -maker_manageUTR.pl - Detect the genes containing too much UTR's exon according to a choosen threshold. -If no UTR option (3, 5, 3 and 5, both) is given the threshold will be not used. -option 3 and 5 together is different of "both". In the first case the gene is discarded if either the 3' or the 5' UTR contains more exon than the threshold given. -In the second case, will be discarded only the genes where the addition of UTR's exon of both side is over the threshold given. - -=head1 SYNOPSIS - - ./maker_manageUTR.pl --ref=infile --three --five -p --out=outFile - ./maker_manageUTR.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--ref>, B<--reffile> or B<-f> - -Input GFF3 file correponding to gene build. - -=item B<-n>, B<--nb> or B<--number> - -Threshold of exon's number of the UTR. Over or equal to this threshold, the UTR will be discarded. Default value is 5. - -=item B<-3>, B<--three> or B<--tree_prime_utr> - -The threshold of the option will be applied on the 3'UTR. - -=item B<-5>, B<--five> or B<--five_prime_utr> - -The threshold of the option will be applied on the 5'UTR. - -=item B<-b>, B<--both> or B<--bs> - -The threshold of the option will be applied on genes where the number of UTR exon (3' and 5' additioned) is over it. - -=item B<--p>, B<--plot> or B<-o> - -Allows to create an histogram in pdf of UTR sizes distribution. - -=item B<--out>, B<--output> or B<-o> - -Output gff3 file where the gene incriminated will be write. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/EnsEMBL/chromosome_chunk.pl b/annotation/Tools/EnsEMBL/chromosome_chunk.pl deleted file mode 100755 index 21bdeaf4b..000000000 --- a/annotation/Tools/EnsEMBL/chromosome_chunk.pl +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/local/env/perl -use strict; -use warnings; - -=head1 NAME - -chunk_chromosome.pl - -=head1 SYNOPSIS - -chunk_chromosome.pl - -=head1 DESCRIPTION - -Split a chromosome assembly into chunks (50kb by default). - -=head1 OPTIONS - - -fasta_file the fasta file with chromosome sequences - -chunk_file the fasta file of chunks that will be created [default .chunk] - -agp_file the name of the agp file that will be created [default .agp] - -size the size of the chunks, in bases [default 50000] - -help displays this documentation with PERLDOC - -=cut - -use Getopt::Long; -use Bio::Seq; -use Bio::SeqIO; - -my $help; -my $fasta_file; -my $chunk_file; -my $agp_file; -my $chunk_size; - -&GetOptions( - 'fasta_file=s' => \$fasta_file, - 'chunk_file:s' => \$chunk_file, - 'agp_file:s' => \$agp_file, - 'size:i' => \$chunk_size, - 'h|help' => \$help, -) or ($help = 1); - -if(! $fasta_file || !-e $fasta_file) { - print STDERR "Fasta file not given or doesn't exist\n"; -} - -if ($help) { - exec('perldoc', $0); -} - -$chunk_file = "$fasta_file.chunk" unless $chunk_file; -$agp_file = "$fasta_file.agp" unless $agp_file; -$chunk_size = 50000 unless $chunk_size; - -my $seq_in = new Bio::SeqIO(-format=>'Fasta', -file=>$fasta_file); -my $seq_out = new Bio::SeqIO(-format=>'Fasta', -file=>">$chunk_file"); -my $assembly; -my $assembly_index = 1; -while ( my $seq = $seq_in->next_seq ) { - my $name = $seq->id; - my $chunk_index = 1; - my $obj_seq = $seq->seq; - - while (length($obj_seq) > $chunk_size) { - my $chunk = substr($obj_seq, 0, $chunk_size, ''); - my $new_seq = new Bio::Seq(-id=>"$name\_$chunk_index", -seq=>$chunk); - $seq_out->write_seq($new_seq); - my $obj_start = 1 + (($chunk_index-1) * $chunk_size); - my $obj_end = $obj_start + $new_seq->length - 1; - my @assembly = ($name, $obj_start, $obj_end, $assembly_index, 'O', "$name\_$chunk_index", 1, $new_seq->length, '+'); - $assembly .= join("\t", @assembly)."\n"; - $chunk_index++; - $assembly_index++; - } - my $new_seq = new Bio::Seq(-id=>"$name\_$chunk_index", -seq=>$obj_seq); - $seq_out->write_seq($new_seq); - my $obj_start = 1 + (($chunk_index-1) * $chunk_size); - my $obj_end = $obj_start + $new_seq->length - 1; - my @assembly = ($name, $obj_start, $obj_end, $assembly_index, 'O', "$name\_$chunk_index", 1, $new_seq->length, '+'); - $assembly .= join("\t", @assembly)."\n"; - $assembly_index++; -} - -open(AGP, ">$agp_file"); -print AGP $assembly; -close(AGP); diff --git a/annotation/Tools/EnsEMBL/clone_database.ksh b/annotation/Tools/EnsEMBL/clone_database.ksh deleted file mode 100755 index 50d2d6687..000000000 --- a/annotation/Tools/EnsEMBL/clone_database.ksh +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/ksh -# $Id: clone_database.ksh,v 1.2 2013-11-06 16:55:45 ak4 Exp $ - -self="$0" -self_base="${self##*/}" - -function parse_dbarg -{ - # Arguments: - # $1 a name of a parameter to be used for output. - # $2 is a database specification on the form "database@host:port". - # - # This function parses the database specification (the ":port" part is - # optional) and returns a string which can be eval'ed and which then - # sets the '.dbname', '.dbhost' and '.dbport' fields of the compound - # variable whose name is given as the first argument. - - print $2 | tr '@:' ' ' | - awk -vparam="$1" ' - NF == 3 { - printf("%s=( dbname='%s' dbhost='%s' typeset -i dbport=%d )", - param, $1, $2, $3); - } - NF == 2 { - printf("%s=( dbname='%s' dbhost='%s' typeset -i dbport=3306 )", - param, $1, $2); - }' -} - -function usage -{ - cat < /dev/null 2>&1") -end - -require 'fileutils' -require 'net/ftp' - -species = ARGV.shift -version = ARGV.shift -pathVersion="" -nbis_path="/projects/references/genomes" - -abort "You should really remember to provide a species_name and optionaly a release!" if species.nil? - -raise "This does not look like a valid species name (must be lower case scientific name with underscore)" unless species.match(/[a-z]*_[a-z]*/) -if version.nil? - puts "No EnsEMBL release provided, we will take the current release as default" - pathVersion ="current" -else - pathVersion="release-"+version -end - -$ftp=Net::FTP.new -#connect to Ensembl vertebrates ftp -$ftp.connect("ftp.ensembl.org") -$ftp.passive = true -$ftp.login - -# Check for fastx toolkit -fasta_loaded = false -cufflinks_loaded = false - -unless command?("fasta_formatter") - puts "Couldn't find fasta_formatter in your PATH" -# command("module load fastx/0.0.13") -# fasta_loaded = true - raise "You must load the fastx module !" -end - -unless command?("gffread -x") - puts "Couldn't find cufflinks in your PATH, trying to load it" -# system("module load cufflinks/2.2.1") -# cufflinks_loaded = true - raise "You must load the cufflinks module !" -end - -found="no" -fetched_path="" -puts "Downloading genome for #{species}, release #{pathVersion} on ensembl FTP server" -#dirExists = ftp.ChangeRemoteDir("/pub/#{pathVersion}/fasta/#{species}/dna/") -begin - if pathVersion == "current" - fetched_path="/pub/#{pathVersion}_fasta/#{species}/dna/" - $ftp.chdir("/pub/#{pathVersion}_fasta/#{species}/dna/") - else - fetched_path="/pub/#{pathVersion}/fasta/#{species}/dna/" - $ftp.chdir("/pub/#{pathVersion}/fasta/#{species}/dna/") - end -rescue - puts "Not found on ensembl FTP server !!" -else - found="yes" -end - -if found=="no" - puts "Downloading genome for #{species}, release #{pathVersion} on ensemblgenomes FTP server" - $ftp.close - #connect to the other Ensembl ftp server - $ftp.connect("ftp.ensemblgenomes.org") - $ftp.passive = true - $ftp.login - begin - puts "Try on Plants..." - fetched_path="/pub/plants/#{pathVersion}/fasta/#{species}/dna/" - $ftp.chdir("/pub/plants/#{pathVersion}/fasta/#{species}/dna/") - rescue - puts "Not found on plants !!" - else - found="yes" - end - if found=="no" - begin - puts "Try on metazoa..." - fetched_path="/pub/metazoa/#{pathVersion}/fasta/#{species}/dna/" - $ftp.chdir("/pub/metazoa/#{pathVersion}/fasta/#{species}/dna/") - rescue - puts "Not found on metazoa !!" - else - found="yes" - end - end - if found=="no" - begin - puts "Try on fungi..." - fetched_path="/pub/fungi/#{pathVersion}/fasta/#{species}/dna/" - $ftp.chdir("/pub/fungi/#{pathVersion}/fasta/#{species}/dna/") - rescue - puts "Not found on fungi !!" - else - found="yes" - end - end - if found=="no" - begin - puts "Try on protists..." - fetched_path="/pub/protists/#{pathVersion}/fasta/#{species}/dna/" - $ftp.chdir("/pub/protists/#{pathVersion}/fasta/#{species}/dna/") - rescue - puts "Not found on protists !!" - else - found="yes" - end - end - if found=="no" - begin - puts "Try on bacteria..." - fetched_path="/pub/bacteria/#{pathVersion}/fasta/#{species}/dna/" - $ftp.chdir("/pub/bacteria/#{pathVersion}/fasta/#{species}/dna/") - rescue - puts "Not found on bacteria !!" - else - found="yes" - end - end -end - -if found == "no" - raise "Something went wrong. We check all Ensembl database and this species name was not found (Check spelling!)" -end - - -releaseName=String($ftp.pwd).split('/')[2] -pathToWrite="" -if File.directory?(nbis_path) - pathToWrite="#{nbis_path}/#{species}/EnsEMBL/#{releaseName}"; - -else - puts "apparently we are not on the nbis annotation cluster" - pathToWrite="#{species}_#{releaseName}" -end - -# check presence of directories -if File.directory?(pathToWrite) - raise "The directory #{pathToWrite} exists ! Apparently you already download that genome release." -end - -puts "I will save result here: #{pathToWrite}" - - FileUtils.mkdir_p("#{pathToWrite}") - FileUtils.cd("#{pathToWrite}") - file = $ftp.nlst.find{|f| f.include?("dna.primary_assembly.fa.gz") } - - file = $ftp.nlst.find{|f| f.include?("dna.toplevel.fa.gz")} if file.nil? or file.length < 1 - - puts "Downloading genome sequence: #{file}" - $ftp.get(file,File.basename(file)) # Download the file - puts "Extracting genomes" - #system("rm *.fa")unless Dir.entries(Dir.getwd).select{|e| e.include?(".fa")}.empty? #remove .fa file if exists - system("gunzip #{file}") - file = Dir.entries(Dir.getwd).find{|f| f.include?(".fa")} - puts "Cleaning genome #{file}" - system("sed 's/ dna.*//g' #{file} > #{species}.fa.tmp") # Remove the rubbish ensembl encodes in the FASTA headers - system("rm #{file}") - puts "Formatting genome" - system("/sw/bioinfo/fastx-0.0.13/fasta_formatter -i #{species}.fa.tmp -o tmp.fa -w 80") # Ensure that FASTA lines are of equal length - system("rm *fa.tmp") - system("mv tmp.fa #{species}.fa") - system("rm *toplevel*") - system("rm *.tmp") - -begin - puts "Downloading GTF for #{species}" - #prepare GTF path - gtfPath=fetched_path.gsub(/\/fasta\//,"/gtf/") - gtfPath=gtfPath.gsub(/\/dna\//,"/") - gtfPath=gtfPath.gsub(/_fasta\//,"_gtf/") - puts "FTP gtf path ckecked: #{gtfPath}" - $ftp.chdir("#{gtfPath}") - file = $ftp.nlst.find{|f| f.include?("gtf.gz")} - raise "No GTF file was found for this species" if file.nil? - $ftp.get(file,File.basename(file)) - system("gunzip *.gz") - system("mv *.gtf #{species}.gtf") - warn "Reformatting GTF file" - system("gffread /projects/references/genomes/#{species}/EnsEMBL/#{releaseName}/#{species}.gtf -L -F -g /projects/references/genomes/#{species}/EnsEMBL/#{releaseName}/#{species}.fa -T -o tmp.gtf") # Make EnsEMBL gtf fully compatible with Cufflinks and Tophat - system("mv tmp.gtf #{species}.gtf") - system("sed 's/gene_name \"[A-Za-z0-9_]*\"\; //g' #{species}.gtf > #{species}.no_names.gtf") # Create a copy without gene_name attribute (needed for cufflinks package) -rescue - raise "Something went wrong. Maybe no GTF file exists for that species." -end - -$ftp.close - -puts "All Done!" - - -# Unload modules only if we had to specifically load them for this script - -#system("module unload cufflinks/2.1.1") if cufflinks_loaded -#system("module unload fastx/0.0.13") if fasta_loaded diff --git a/annotation/Tools/EnsEMBL/rakefile_create_project.rb b/annotation/Tools/EnsEMBL/rakefile_create_project.rb deleted file mode 100755 index f4674dd92..000000000 --- a/annotation/Tools/EnsEMBL/rakefile_create_project.rb +++ /dev/null @@ -1,722 +0,0 @@ -#!/usr/bin/ruby -# Loads a genome assembly into a new EnsEMBL database -# = PREREQS: -# - contig2chromosome AGP file (.contigs.agp) -# - Contig fasta file -# = OPTIONAL: -# - Supercontig2contig AGP file (.supercontigs.agp) -# = NOTE: -# Additional analyses need to be added to this script as -# needed! (last part of the setup) -# = USAGE -# 0. Check the settings in this script, especially the analyses/rules - -# 3. rake submission:create genome=my_genome_name -# 4. Manually edit configs/pipeline-configs/modules/Bio/EnsEMBL/Pipelines/Config/BatchQueue.pm -# 5. rake sanity_check genome=my_genome_name -# 6. submit with rulemanager.pl - -GENOME = ENV['genome'] ||= 'test' -CONTIGS = FileList["*contigs.fa"][0] -CONTIG_AGP_FILE = FileList['*contigs.agp'][0] - -raise "Contig Fasta file is missing" unless CONTIGS -raise "Contig AGP file is missing!" unless CONTIG_AGP_FILE - -# The supercontig is optional, let's account for that. -supercontig_file = FileList['*supercontig*.agp'] -supercontig_file.length > 0 ? SUPERCONTIG_AGP_FILE = supercontig_file : SUPERCONTIG_AGP_FILE = nil - -## TO DO LIST ### - -# Run Tests and actual analyses (?) -# Figure out how to set PERL5LIB from script - -################################ -# CONFIGURATION OF ENVIRONMENT # -################################ - -MYSQL_ROOT = "ensembl_admin" -MYSQL_ROOT_PW = "annotationadmin" -MYSQL_USER = "ensembl_user" -MYSQL_PW = "annotation" -MYSQL_PORT = 3306 -MYSQL_HOST = ENV['host'] || 'bdb' - -ENSEMBL_PATH = "/sw/ensembl/ensembl-live" - -DUMP_PATH = "dump/" - -################################ -# PRODUCTION DATABASE ########## -################################ - -# The Production DB is a fully finished database from which we can steal meta data -# Ideally, we use human for this. - -PRODUCTION_DB="homo_sapiens_core_74_37" - -################################ -# LOCATION OF REFERENCE FILES ## -################################ - -protein_db = "/projects/references/databases/uniprot/reference_proteomes/caenorhabditis_elegans.fa" -uniprot_db = "/projects/references/databases/uniprot/uniprot_sprot.clean" - -################################ -# LOCATION OF BINARIES########## -################################ - -repeat_masker = "/sw/bioinfo/RepeatMasker/RepeatMasker" -pmatch = "pmatch" - -def command?(command) - system("which #{ command} > /dev/null 2>&1") -end - -raise "Missing RepeatMasker binary at #{repeat_masker}" unless command?(repeat_masker) - -################################ -# Quick API for short checks ### -################################ - -require 'active_record' - -module EnsemblDB - - include ActiveRecord - - class DBConnection < ActiveRecord::Base - - self.abstract_class = true - self.pluralize_table_names = false - - def self.connect(args={}) - establish_connection( - :adapter => 'mysql', - :host => MYSQL_HOST, - :database => "ensembl_#{GENOME}", - :username => args[:username] ||= MYSQL_USER, - :password => args[:password] ||= MYSQL_PW - ) - end - end - - class InputIdAnalysis < DBConnection - self.primary_key = 'input_id' - end - -end - -###################################### -# CONFIGURATION OF ANALYSES/RULES #### -###################################### - - -analyses = [ - - "[SubmitContig]" , - "input_id_type=CONTIG" , - "module=Dummy", - "" , - "[repeatmask]" , - "db=repbase" , - "db_version=0129" , - "db_file=repbase" , - "program=RepeatMasker" , - "program_version=3.1.8" , - "program_file=#{repeat_masker}" , - "parameters=-nolow -species nematoda -s" , - "module=RepeatMasker" , - "gff_source=RepeatMasker" , - "gff_feature=repeat" , - "input_id_type=CONTIG" , - "" , - "[genscan]", - "db=HumanIso.smat", - "db_file=/sw/bioinfo/genscan/HumanIso.smat", - "program_file=/sw/bioinfo/genscan/genscan", - "module=Genscan", - "input_id_type=CONTIG", - "", - "[trnascan]", - "db=trna", - "program_file=/sw/bioinfo/tRNAscan-1.3.1/bin/tRNAscan-SE", - "module=tRNAscan_SE", - "parameters=-G", - "input_id_type=CONTIG", - "", - "[uniprot]" , - "db=uniprot", - "db_file=#{uniprot_db}" , - "program=blastall" , - "program_file=blastall", - "module=BlastGenscanPep", - "parameters=", - "input_id_type=CONTIG", - "", - "[blast_wait]", - "module=Accumulator", - "input_id_type=ACCUMULATOR", - "", - -] - -rules = [ - "[repeatmask]" , - "condition=SubmitContig", - "", - "[trnascan]", - "condition=SubmitContig", - "", - "[genscan]", - "condition=SubmitContig", - "", - "[uniprot]" , - "condition=genscan" , - "" , - "[blast_wait]", - "condition=uniprot", - "", - -] - -############### -# Directories # -############### - -directory 'log' -directory 'dump' -directory 'analyses_and_rules' -directory 'configs/pipeline-configs/modules/Bio/EnsEMBL/' -directory 'output' - -################ -# RAKE targets # -################ - -# Rake targets for :environment -db_create_log = "log/sql.#{GENOME}.sql" -db_coretable_log = "log/sql.coretables.#{GENOME}.sql" -db_pipelinetable_log = "log/sql.pipelinetables.#{GENOME}.sql" - -# Rake targets for :assembly -db_chromosome_level_log = "log/perl.assembly.chromosome.#{GENOME}.log" -db_supercontig_level_log = "log/perl.assembly.supercontig.#{GENOME}.log" -db_contig_level_log = "log/perl.assembly.contig.#{GENOME}.log" -db_chromosome2contig_log = "log/perl.assembly.chromosome2contig.map.#{GENOME}.log" -db_supercontig2contig_log = "log/perl.assembly.supercontig2contig.map.#{GENOME}.log" -db_set_attributes_log = "log/perl.assembly.set_attributes.#{GENOME}.log" -db_set_toplevel_log = "log/perl.assembly.set_toplevel.#{GENOME}.log" - -dumped_sequences_log = "log/dumped_seq.toplevel.#{GENOME}.log" -dumped_database = "dump/database.#{GENOME}.dump" -load_genewise_data = "log/mysql.load_genewise.log" - - -##################################### -# START OF PIPELINE! ################ -##################################### - -desc 'Creates the blank database and tables' -namespace :environment do - - file db_create_log => ['log', CONTIGS ] do - warn "---------------------------------------------------" - warn "Initializing EnsEMBL database ensembl_#{GENOME}" - warn "Initializing analysis database genewise_#{GENOME}" - warn "---------------------------------------------------" - - # 1. Drop DBs if exists - system("mysql -u#{MYSQL_ROOT} -h#{MYSQL_HOST} -e 'DROP DATABASE IF EXISTS ensembl_#{GENOME}'") - system("mysql -u#{MYSQL_ROOT} -h#{MYSQL_HOST} -e 'DROP DATABASE IF EXISTS genewise_#{GENOME}'") - - # 2. Create DBs - system("mysql -u#{MYSQL_ROOT} -h#{MYSQL_HOST} -e 'CREATE DATABASE ensembl_#{GENOME}; GRANT SELECT,INSERT,UPDATE,DELETE ON ensembl_#{GENOME}.* TO ensembl_user;' > #{db_create_log}") - system("mysql -u#{MYSQL_ROOT} -h#{MYSQL_HOST} -e 'CREATE DATABASE genewise_#{GENOME}; GRANT SELECT,INSERT,UPDATE,DELETE ON genewise_#{GENOME}.* TO ensembl_user;' >> #{db_create_log}") - - end - - file db_coretable_log => db_create_log do - - warn "---------------------------------------------------" - warn "Loading CORE tables into database ensembl_#{GENOME}" - warn "---------------------------------------------------" - - # 1. Load CORE tables - system("mysql -D ensembl_#{GENOME} -u#{MYSQL_USER} -p#{MYSQL_PW} -h #{MYSQL_HOST} < #{ENSEMBL_PATH}/ensembl/sql/table.sql > #{db_coretable_log}") - system("mysql -D genewise_#{GENOME} -u#{MYSQL_USER} -p#{MYSQL_PW} -h #{MYSQL_HOST} < #{ENSEMBL_PATH}/ensembl/sql/table.sql >> #{db_coretable_log}") - - end - - file db_pipelinetable_log => db_coretable_log do - - warn "-------------------------------------------------------" - warn "Loading pipeline tables into database ensembl_#{GENOME}" - warn "-------------------------------------------------------" - - # 1. Load Pipeline tables - system("mysql -D ensembl_#{GENOME} -u#{MYSQL_USER} -p#{MYSQL_PW} -h #{MYSQL_HOST} < #{ENSEMBL_PATH}/ensembl-pipeline/sql/table.sql > #{db_pipelinetable_log}") - system("mysql -D genewise_#{GENOME} -u#{MYSQL_USER} -p#{MYSQL_PW} -h #{MYSQL_HOST} < #{ENSEMBL_PATH}/ensembl-pipeline/sql/table.sql >> #{db_pipelinetable_log}") - - # Fix Engine of job table - system("mysql -u#{MYSQL_ROOT} -h#{MYSQL_HOST} -D ensembl_#{GENOME} -e 'ALTER TABLE job_status ENGINE=MyISAM;'") - system("mysql -u#{MYSQL_ROOT} -h#{MYSQL_HOST} -D genewise_#{GENOME} -e 'ALTER TABLE job_status ENGINE=MyISAM;'") - - # Fix the table schema to be compatible with Apollo - system("mysql -u#{MYSQL_USER} -p#{MYSQL_PW} -h#{MYSQL_HOST} -D ensembl_#{GENOME} -e 'create view gene_stable_id AS select gene_id,stable_id,version,created_date,modified_date FROM gene;create view transcript_stable_id AS SELECT transcript_id,stable_id,version,created_date,modified_date FROM transcript;create view exon_stable_id AS SELECT exon_id,stable_id,version,created_date,modified_date FROM exon;create view translation_stable_id AS SELECT translation_id,stable_id,version,created_date,modified_date FROM translation;'") - - # 2. Report back the schema version of EnsEMBL used for initialization - warn "-------------------------------------" - warn " Schema version used for this Db is: " - system("mysql -D ensembl_#{GENOME} -u#{MYSQL_USER} -p#{MYSQL_PW} -h #{MYSQL_HOST} -e 'select meta_value from meta where meta_key = \"schema_version\";'") - - - end - - desc 'Creates a new EnsEMBL database' - task :db => db_pipelinetable_log - -end - -######################################## -# LOAD ASSEMBLY INTO DB ################ -######################################## - -desc 'Loads the assembly files into the DB' -namespace :assembly do - - file db_chromosome_level_log => db_pipelinetable_log do - warn "---------------------------------------------------" - warn "Loading chromosomes into database ensembl_#{GENOME}" - warn "---------------------------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/load_seq_region.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -coord_system_name chromosome -coord_system_version v#{GENOME} -rank 1 -default_version -agp_file #{CONTIG_AGP_FILE} > #{db_chromosome_level_log}") - end - - file db_supercontig_level_log => db_chromosome_level_log do - - if SUPERCONTIG_AGP_FILE - warn "---------------------------------------------------" - warn "Loading supercontigs into database ensembl_#{GENOME}" - warn "---------------------------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/load_seq_region.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -coord_system_name supercontig -coord_system_version v#{GENOME} -rank 2 -default_version -agp_file #{SUPERCONTIG_AGP_FILE} > #{db_supercontig_level_log}") - else - system("echo 'Nothing to do' > #{db_supercontig_level_log}") - end - - end - - file db_contig_level_log => db_chromosome_level_log do - warn "---------------------------------------------------" - warn "Loading contigs into database ensembl_#{GENOME}" - warn "---------------------------------------------------" - - SUPERCONTIG_AGP_FILE ? seq_region_id = 3 : seq_region_id = 2 - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/load_seq_region.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -coord_system_name contig -coord_system_version v#{GENOME} -rank #{seq_region_id} -sequence_level -default_version -fasta_file #{CONTIGS} > #{db_contig_level_log} 2> /dev/null") - end - - file db_chromosome2contig_log => db_contig_level_log do - - warn "-----------------------------------------------------------------" - warn "Loading chromosome2contig mapping into database ensembl_#{GENOME}" - warn "-----------------------------------------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/load_agp.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -assembled_name chromosome -component_name contig -agp_file #{CONTIG_AGP_FILE} > #{db_chromosome2contig_log}") - end - - file db_supercontig2contig_log => db_chromosome2contig_log do - - if SUPERCONTIG_AGP_FILE - - warn "------------------------------------------------------------------" - warn "Loading supercontig2contig mapping into database ensembl_#{GENOME}" - warn "------------------------------------------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/load_agp.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -assembled_name supercontig -component_name contig -agp_file #{SUPERCONTIG_AGP_FILE} > #{db_supercontig2contig_log}") - - else - system("echo 'Nothing to do' > #{db_supercontig2contig_log}") - end - - end - - file db_set_attributes_log => [ 'dump', db_supercontig2contig_log ] do - warn "-------------------------------------------------------------" - warn "Setting attributes for assembly in database ensembl_#{GENOME}" - warn "-------------------------------------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-production/scripts/production_database/populate_production_db_tables.pl -d ensembl_#{GENOME} -h #{MYSQL_HOST} -u #{MYSQL_USER} -p #{MYSQL_PW} -dumppath #{DUMP_PATH} -md #{PRODUCTION_DB} -mh #{MYSQL_HOST} -mu #{MYSQL_USER} -mp #{MYSQL_PW} > #{db_set_attributes_log}") - - end - - file db_set_toplevel_log => db_set_attributes_log do - warn "----------------------------------------------------------------------" - warn "Setting toplevel attributes for assembly in database ensembl_#{GENOME}" - warn "----------------------------------------------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/set_toplevel.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} > #{db_set_toplevel_log}") - - end - - file dumped_sequences_log => db_set_toplevel_log do - warn "-------------------------------------------------------------" - warn "Dumping toplevel sequences from assembly in ensembl_#{GENOME}" - warn "-------------------------------------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-analysis/scripts/sequence_dump.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -coord_system_name chromosome -output_dir dump > #{dumped_sequences_log}") - - end - - file dumped_database => dumped_sequences_log do - warn "--------------------------------------------------" - warn "Dumping database ensembl_#{GENOME} (safe-keeping!)" - warn "--------------------------------------------------" - - system("mysqldump --opt -u#{MYSQL_USER} -p#{MYSQL_PW} -h #{MYSQL_HOST} ensembl_#{GENOME} > #{dumped_database}") - end - - file load_genewise_data => dumped_database do - warn "----------------------------------------------------" - warn " Loading dumped data into gene wise for replication" - warn "----------------------------------------------------" - - system("mysql -D genewise_#{GENOME} -u#{MYSQL_USER} -p#{MYSQL_PW} -h #{MYSQL_HOST} < #{dumped_database} > #{load_genewise_data}") - end - - desc 'Loads assembly into Database' - task :load => load_genewise_data - -end - - -############################### -# Generate basic config files # -############################### - -analysis_file = "configs/pipeline-confings/modules/Bio/EnsEMBL/Analysis/RunnableDB.pm" -pipeline_file = "configs/pipeline-confings/modules/Bio/EnsEMBL/Pipeline/Analysis.pm" - -namespace :configs do - - file analysis_file => [ 'configs/pipeline-configs/modules/Bio/EnsEMBL/' , "assembly:load" ] do - warn "---------------------------------------------------" - warn "Copying the analysis config files from CVS checkout" - warn "---------------------------------------------------" - - system("cp -R #{ENSEMBL_PATH}/ensembl-pipeline/modules/Bio/EnsEMBL/Pipeline configs/pipeline-configs/modules/Bio/EnsEMBL/") - end - - file pipeline_file => analysis_file do - warn "---------------------------------------------------" - warn "Copying the pipeline config files from CVS checkout" - warn "---------------------------------------------------" - - system("cp -R #{ENSEMBL_PATH}/ensembl-analysis/modules/Bio/EnsEMBL/Analysis configs/pipeline-configs/modules/Bio/EnsEMBL/") - end - - task :create => pipeline_file do - warn "Exporting updated PERL5LIB" - ENV['PERL5LIB'] = "#{ENV['PERL5LIB']}:/#{Dir.getwd}/configs/pipeline-configs/modules" - end - -end - -############################################# -# INITIALIZING ANALYSES ##################### -############################################# - -config_file = "analyses_and_rules/contig_ana.conf" -analysis2db_log = "log/analysis.contig2db.#{GENOME}.log" - -# Analyses define which software should be run -# and on which sequence type (Contig, Chromosome, etc) - -desc 'Analyses to be performed on the assembly' -namespace :analysis do - - file config_file => [ 'analyses_and_rules' , "configs:create" ] do - - f = File.new(config_file, "w+") - - f.puts analyses.join("\n") - - f.close - - end - - file analysis2db_log => config_file do - warn "-------------------------------------------------------------" - warn "Adding analyses into DB ensembl_#{GENOME}" - warn "-------------------------------------------------------------" - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/analysis_setup.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -read -file #{config_file} > #{analysis2db_log}") - - end - - task :define => analysis2db_log - -end - -ana_rule = "analyses_and_rules/rules.conf" -input_ids_log = "log/rules.make_input_ids.log" - -############################### -# Generate rules for analyses # -############################### - -# Rules define the hierarchy of analyses -# Names correspond to the analysis objects -namespace :rules do - - file ana_rule => "analysis:define" do - warn "---------------------------" - warn "Creating rules for analyses" - warn "---------------------------" - - f = File.new(ana_rule,"w+") - - f.puts rules.join("\n") - - f.close - - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/rule_setup.pl -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -read -file #{ana_rule}") - - end - - file input_ids_log => ana_rule do - - warn "---------------------------------" - warn "Generating input ids for analysis" - warn "---------------------------------" - - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/make_input_ids -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -logic_name SubmitContig -slice -coord_system contig > #{input_ids_log}") - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/make_input_ids -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -logic_name SubmitChromosome -slice -coord_system chromosome >> #{input_ids_log}") - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/make_input_ids -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -logic_name SubmitGenome -slice -coord_system chromosome >> #{input_ids_log}") - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/make_input_ids -dbhost #{MYSQL_HOST} -dbuser #{MYSQL_USER} -dbport #{MYSQL_PORT} -dbname ensembl_#{GENOME} -dbpass #{MYSQL_PW} -logic_name SubmitSlice -slice -coord_system chromosome -slice_size 1000000 >> #{input_ids_log}") - - end - - task :create => input_ids_log - -end - -analysis_database_pm_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Analysis/Config/Databases.pm" -update_analysis_database_pm_log = "log/submission.update_databasepm.log" -analysis_general_pm_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Analysis/Config/General.pm" -pipeline_batch_queue_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Pipeline/Config/BatchQueue.pm" -pipeline_general_pm_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Pipeline/Config/General.pm" -analysis_pmatch_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Pmatch.pm" -analysis_blast_config_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Analysis/Config/Blast.pm" -analysis_minigenewise_config_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/BlastMiniGenewise.pm" -analysis_killlist_config_file = "#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/KillListFilter.pm" - -################################## -# PREPARE PROJECT FOR SUBMISSION # -################################## - -namespace :submission do - - - file analysis_database_pm_file => input_ids_log do - warn "-------------------------------------------------" - warn "Creating a copy of Databases.pm for this analysis" - warn "-------------------------------------------------" - - system("cp #{ENSEMBL_PATH}/ensembl-analysis/modules/Bio/EnsEMBL/Analysis/Config/Databases.pm.example #{analysis_database_pm_file}") - - end - - # The Database.pm file requires information on our database, needs updating" - file update_analysis_database_pm_log => analysis_database_pm_file do - warn "--------------------------------------------" - warn "Updating Databases.pm file for this analysis" - warn "--------------------------------------------" - - ref_db = false - gw_db = false - - file = File.open(analysis_database_pm_file) - output = File.new(update_analysis_database_pm_log, "w+") - while ( line = file.gets) - - ref_db = true if line.match(/^(\s+)REFERENCE_DB.*$/) - ref_db = false if line.match(/^(\s+)\},$/) - gw_db = true if line.match(/^(\s+)GENEWISE_DB.*$/) - gw_db = false if line.match(/^(\s+)\},$/) - - if line.include?("-dbname") - output.puts line.gsub(/\'\'/, "\'ensembl_#{GENOME}\'") if ref_db - output.puts line.gsub(/\'\'/, "\'genewise_#{GENOME}\'") if gw_db - elsif line.include?("-host") - output.puts line.gsub(/\'\'/, "\'#{MYSQL_HOST}\'") if ref_db or gw_db - elsif line.include?("-port") - output.puts line.gsub(/\'\'/, "\'#{MYSQL_PORT}\'") if ref_db or gw_db - elsif line.include?("-user") - output.puts line.gsub(/\'\'/, "\'#{MYSQL_USER}\'") if ref_db or gw_db - elsif line.include?("-pass") - output.puts line.gsub(/\'\'/, "\'#{MYSQL_PW}\'") if ref_db or gw_db - else - output.puts line - end - - end - output.close - file.close - - system("cp #{update_analysis_database_pm_log} #{analysis_database_pm_file}") - end - - file analysis_general_pm_file => update_analysis_database_pm_log do - warn "------------------------------------------" - warn "Updating General.pm file for this analysis" - warn "------------------------------------------" - - system("cp #{analysis_general_pm_file}.example #{analysis_general_pm_file}") - - end - - file pipeline_batch_queue_file => analysis_general_pm_file do - warn "---------------------------------------------" - warn "Updating BatchQueue.pm file for this Pipeline" - warn "---------------------------------------------" - - system("cp #{pipeline_batch_queue_file}.example #{pipeline_batch_queue_file}") - end - - file pipeline_general_pm_file => pipeline_batch_queue_file do - warn "---------------------------------------------" - warn "Updating General.pm file for this Pipeline" - warn "---------------------------------------------" - - system("cp #{pipeline_general_pm_file}.example #{pipeline_general_pm_file}") - - end - - file analysis_pmatch_file => pipeline_general_pm_file do - warn "---------------------------------------------" - warn "Updating Pmatch.pm file for this analysis" - warn "---------------------------------------------" - - system("cp #{analysis_pmatch_file}.example #{analysis_pmatch_file}") - - end - - file analysis_blast_config_file => analysis_pmatch_file do - warn "---------------------------------------------" - warn "Updating Blast.pm file for this analysis" - warn "---------------------------------------------" - - system("cp #{analysis_blast_config_file}.example #{analysis_blast_config_file}") - end - - file analysis_minigenewise_config_file => analysis_blast_config_file do - warn "----------------------------------------------------" - warn "Updating BlastMiniGenewise.pm file for this analysis" - warn "----------------------------------------------------" - - system("cp #{analysis_minigenewise_config_file}.example #{analysis_minigenewise_config_file}") - - end - - file analysis_killlist_config_file => analysis_minigenewise_config_file do - warn "----------------------------------------------------" - warn "Updating KillListFilter.pm file for this analysis" - warn "----------------------------------------------------" - - system("cp #{analysis_killlist_config_file}.example #{analysis_killlist_config_file}") - end - - task :create => [ "output" , analysis_killlist_config_file ] - -end - - -task :synchronise_dbs => "submission:create" do - - warn "--------------------------------------" - warn " Synchronizing databases" - warn "--------------------------------------" - - tables = [ "analysis" , "assembly", "assembly_exception", "coord_system", "seq_region", "seq_region_attrib", "meta", "attrib_type", "seq_region_synonym" ] - - tables.each do |table| - system("mysqldump --no-create-db -u#{MYSQL_USER} -p#{MYSQL_PW} -h#{MYSQL_HOST} ensembl_#{GENOME} #{table} > dump/#{table}.sql") - system("mysql -D genewise_#{GENOME} -u#{MYSQL_USER} -p#{MYSQL_PW} -h#{MYSQL_HOST} < dump/#{table}.sql") - end - - warn "--------------------------------------" - warn " Done synching tables " - warn "--------------------------------------" - -end - - - -############################# -### STARTS THE PIPELINE ##### -############################# - -desc 'Start the pipeline' -task :build => "synchronise_dbs" do - - warn "**********************************************************" - warn "Everything has been created, now begins the manual labor!" - warn "1. Edit BatchQueue.pm to include all necessary analyses and configure environment" - warn "2. Edit Pmatch.pm to add protein file and database info" - warn "3. Rinse an repeat for all other analyses (need to add to script!)" - warn "Run the test_RunnableDB script to check data" - warn "Run the pipeline!" -end - - - -task :pathfile do - path = "PERL5LIB=$PERL5LIB:#{Dir.getwd}/configs/pipeline-configs/modules:#{ENSEMBL_PATH}/ensembl-pipeline/scripts:#{ENSEMBL_PATH}/ensembl-killlist/modules:/usr/bin/tRNAscan-SE" - f = File.new("update_path.sh","w+") - f.puts path - f.close -end - - -############################# -# Database queries ########## -############################# - -task :analysis_ids do - EnsemblDB::DBConnection.connect - analyses_input_ids = EnsemblDB::InputIdAnalysis.find(:all).group_by{|i| i.input_id_type } - analyses_input_ids.each do |typ,input_ids| - puts "#{typ}" - input_ids[0..12].each {|i| puts "\t#{i.input_id}"} - end -end - -task :sanity_check do - system("perl #{ENSEMBL_PATH}/ensembl-pipeline/scripts/pipeline_sanity.pl -dbname ensembl_#{GENOME} -dbuser #{MYSQL_USER} -dbpass #{MYSQL_PW} -dbhost #{MYSQL_HOST}") -end - -task :perl_lib do - # Create a PERL5LIB source - f = File.new("perl5lib.sh","w+") - f.puts "PERL5LIB=#{ENSEMBL_PATH}/bioperl-live:#{ENSEMBL_PATH}/ensembl-pipeline/modules:#{ENSEMBL_PATH}/ensembl-pipeline/scripts:#{ENSEMBL_PATH}/ensembl-analysis/modules:#{ENSEMBL_PATH}/ensembl/modules:#{Dir.getwd}/configs/pipeline-configs/modules:/usr/bin/tRNAscan-SE:#{Dir.getwd}/configs/pipeline-configs/modules/Bio/EnsEMBL/Analysis/Runnable:" - f.puts "export PERL5LIB" - f.puts "echo $PERL5LIB" - f.puts "BLASTMAT=/references/software/util/BLOSSUM62" - f.puts "export BLASTMAT" - f.close - -end - -############################# -# Cleaning up and resetting # -############################# - -desc "Resetting the entire analysis" -task :clean do - system("rm -R analyses_and_rules") - system("rm -R log") - system("rm -R dump") - system("rm -R configs") - system("rm -R output*") -end diff --git a/annotation/Tools/EnsEMBL/tmp b/annotation/Tools/EnsEMBL/tmp deleted file mode 100755 index f5114ab7a..000000000 --- a/annotation/Tools/EnsEMBL/tmp +++ /dev/null @@ -1,165 +0,0 @@ -#!/bin/ksh -# $Id: clone_database.ksh,v 1.2 2013-11-06 16:55:45 ak4 Exp $ - -self="$0" -self_base="${self##*/}" - -function parse_dbarg -{ - # Arguments: - # $1 a name of a parameter to be used for output. - # $2 is a database specification on the form "database@host:port". - # - # This function parses the database specification (the ":port" part is - # optional) and returns a string which can be eval'ed and which then - # sets the '.dbname', '.dbhost' and '.dbport' fields of the compound - # variable whose name is given as the first argument. - - print $2 | tr '@:' ' ' | - awk -vparam="$1" ' - NF == 3 { - printf("%s=( dbname='%s' dbhost='%s' typeset -i dbport=%d )", - param, $1, $2, $3); - } - NF == 2 { - printf("%s=( dbname='%s' dbhost='%s' typeset -i dbport=3306 )", - param, $1, $2); - }' -} - -function usage -{ - cat < \$help, - "hints=s" => \$hints, - "species=s" => \$species, - "genome=s" => \$genome, - "outdir=s" => \$outdir); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -# .. Check that all binaries are available in $PATH - -my @tools = ( "augustus" ); # List of tools to check for! -foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } - -my $augustus_config_pathj = $ENV{'AUGUSTUS_CONFIG_PATH'} or die "AUGUSTUS_CONFIG_PATH is not set, aborting." ; - -# .. Create output directory - -if (-d $outdir ) { - die "Output directory $outdir exists. Please remove and try again"; -} else { - msg("Creating output directory $outdir"); - runcmd("mkdir -p $outdir") -} - -# .. set up log file - -my $logfile = "$outdir/augustus.log"; -msg("Writing log to: $logfile"); -open LOG, '>', $logfile or err("Can't open logfile"); - -# .. load grid module (courtesy of Brian Haas) - -my $grid_computing_module = "BilsGridRunner"; -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; - -# .. Read genome fasta file. -my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); - -# .. and create chunks -msg("Creating chunks for GRID\n"); - -my $counter = 10000; -my $seq; - -while( $seq = $inseq->next_seq() ) { - $counter += 1; - my $outfile = $outdir . "/seq_" . $counter . ".fa"; - my $cmd = "augustus --species=$species --hintsfile=$hints --alternatives-from-evidence=true --gff3=on --extrinsicCfgFile=/references/software/augustus/config/extrinsic/extrinsic.E.cfg --uniqueGeneId=true $outfile > $outfile.augustus" ; - push(@cmds,$cmd); - my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); - $seq_out->write_seq($seq); -} - -# Submit job chunks to grid - -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); - - -# ..Postprocessing here, like merging of output files - -# -------------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); - print STDERR $line unless $quiet; -} - -# -------------------- - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -# -------------------- - -sub check_bin { - length(`which @_`) > 0 ? return 1 : return 0; -} - -#---------------------------------------------------------------------- - -sub err { - $quiet=0; - msg(@_); - exit(2); -} diff --git a/annotation/Tools/Grid/blat2grid.pl b/annotation/Tools/Grid/blat2grid.pl deleted file mode 100644 index 3c522fbbd..000000000 --- a/annotation/Tools/Grid/blat2grid.pl +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; -use FindBin; -use lib ("$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors"); -use File::Basename; -use Bio::SeqIO; -use Cwd; -use Carp; -no strict qw(subs refs); - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--fasta filename] - The name of the protein file to read. - [--genome filename] - The name of the genome file to align to. - Ouput: - [--outdir name] - The name of the output directory. - -}; - -my $scipio_outfile = "scipio.merged.gff"; -my $outdir = undef; -my $genome = undef; -my $fasta = undef; -my $chunk_size = 500; # Partition size of fasta input -my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... -my @cmds = (); # Stores the commands to send to farm -my $quiet; -my $help; - -GetOptions( - "help" => \$help, - "fasta=s" => \$fasta, - "genome=s" => \$genome, - "chunk_size=i" => \$chunk_size, - "outdir=s" => \$outdir); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -# .. Check that all binaries are available in $PATH - -my @tools = ("blat" ); -foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } - -# .. Create output directory - -if (-d $outdir ) { - die "Output directory $outdir exists. Please remove and try again"; -} else { - msg("Creating output directory $outdir"); - runcmd("mkdir -p $outdir") -} - -# .. set up log file - -my $logfile = "$outdir/scipio2grid.log"; -msg("Writing log to: $logfile"); -open LOG, '>', $logfile or err("Can't open logfile"); - -# .. load grid module (courtesy of Brian Haas) - -my $grid_computing_module = "BilsGridRunner"; -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; - -# .. Read protein fasta file. -my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); - -# .. and create chunks -msg("Creating chunks for GRID\n"); - -my @seqarray = (); -my $counter = 0; -my $chunk_counter = 1; - -my $seq; - -while( $seq = $inseq->next_seq() ) { - $counter += 1; - push(@seqarray,$seq); - - if ($counter == $chunk_size) { - my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; - write_chunk($outfile,@seqarray); - @seqarray = (); - $chunk_counter += 1; - $counter = 0; - } -} -my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... -write_chunk($outfile,@seqarray); - -# Push all jobs into the command list - -for (my $i=1;$i<=$chunk_counter;$i++) { - my $cmd = "blat -minIdentity=98 -minScore=80 $genome $outdir/chunk_$i.fa $outdir/chunk_$i.psl"; - push(@cmds,$cmd); -} - -# Submit job chunks to grid -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); - - -# Merging the outputs -msg("Merging outputs from chunks"); - -my @files = <$outdir/*.psl>; - -foreach my $file (@files) { - system("cat $file >> $outdir/blat.merged"); -} - -msg("Finished BLAT grid run."); - -# -------------------- - -sub write_chunk { - my $outfile = shift; - my @seqs = @_; - my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); - foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; -} - -# -------------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); - print STDERR $line unless $quiet; -} - -# -------------------- - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -# -------------------- - -sub check_bin { - length(`which @_`) > 0 ? return 1 : return 0; -} - -#---------------------------------------------------------------------- - -sub err { - $quiet=0; - msg(@_); - exit(2); -} diff --git a/annotation/Tools/Grid/exonerate2grid.pl b/annotation/Tools/Grid/exonerate2grid.pl deleted file mode 100644 index e500ee6cb..000000000 --- a/annotation/Tools/Grid/exonerate2grid.pl +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; -use FindBin; -use lib ("$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors"); -use File::Basename; -use Bio::SeqIO; -use Cwd; -use Carp; -use Bio::SeqFeature::Generic; -use Bio::Tools::GFF; -no strict qw(subs refs); - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--genome filename] - The name of the protein file to read. - [--blast filename] - Blast-output (tblastn) - - Ouput: - [--outdir name] - The name of the output directory. - -}; - -my $grid_computing_module = "BilsGridRunner"; -my $gff_formatter = Bio::Tools::GFF->new(-gff_version => 3); - -my $outdir = undef; -my $genome = undef; -my $blast = undef; -my @cmds = (); # Stores the commands to send to farm -my $quiet; -my @annotations = (); # Stores Rfama annotations as hashes -my $help; - -GetOptions( - "help" => \$help, - "blast=s" => \$blast, - "genome=s" => \$fasta, - "outdir=s" => \$outdir); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -# .. Check that all binaries are available in $PATH - -my @tools = ( "exonerate" ); # List of tools to check for! -foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } - -# .. Create output directory - -if (-d $outdir ) { - die "Output directory $outdir exists. Please remove and try again"; -} else { - msg("Creating output directory $outdir"); - runcmd("mkdir -p $outdir") -} - -# .. set up log file - -my $logfile = "$outdir/exonerate_search.log"; -msg("Writing log to: $logfile"); -open LOG, '>', $logfile or err("Can't open logfile"); - -# .. load grid module (courtesy of Brian Haas) - -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; - -# .. Read genome fasta file. -my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); - -# .. and create chunks -msg("Creating chunks for grid\n"); - -my $seq; - -my $seq_counter = 0; - -while( $seq = $inseq->next_seq() ) { - $seq_counter += 1; - my $outfile = $outdir . "/" . $seq->display_id . ".fasta" ; # We could also use the display_id, but this can cause trouble with special characters - my $seq_out = Bio::SeqIO->new(-file => ">$outfile" , -format => 'fasta'); - $seq_out->write_seq($seq); - my $command = "exonerate --showtargetgff --refine region --model protein2genome --percent 60 $proteins $outfile > $outfile.exonerate 2> /dev/null" ; - push(@cmds,$command); -} - -# Submit job chunks to grid - -msg("Sending $seq_counter jobs to LSF grid\n"); - -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); - -# ..Postprocessing here, merging of output and printing gff - -msg("Merging output and writing GFF file"); - -my @files = <$outdir/*.exonerate>; - -my $outfile = $outdir . "/exonerate_annotations.gff"; -open (my $OUT, '>', $outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; - -foreach my $file (@files) { - - open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; - - while (<$IN>) { - chomp; - my $line = $_; - next if ($line =~ /^#.*/); # Skipping comment lines - - print $OUT $line , "\n"; - - } -} - -close($OUT); - -# -------------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); - print STDERR $line unless $quiet; -} - -# -------------------- - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -# -------------------- - -sub check_bin { - length(`which @_`) > 0 ? return 1 : return 0; -} - -#---------------------------------------------------------------------- - -sub err { - $quiet=0; - msg(@_); - exit(2); -} diff --git a/annotation/Tools/Grid/interpro2grid.pl b/annotation/Tools/Grid/interpro2grid.pl deleted file mode 100644 index 68e922bc6..000000000 --- a/annotation/Tools/Grid/interpro2grid.pl +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; -use FindBin; -use lib ("$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors"); -use File::Basename; -use Bio::SeqIO; -use Cwd; -use Carp; -no strict qw(subs refs); - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--fasta filename] - The name of the protein file to read. - Ouput: - [--outdir name] - The name of the output directory. - -}; - -my $outdir = undef; -my $genome = undef; -my $fasta = undef; -my $chunk_size = 10; # Partition size of fasta input -my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... -my @cmds = (); # Stores the commands to send to farm -my $quiet; -my $help; - -GetOptions( - "help" => \$help, - "fasta=s" => \$fasta, - "chunk_size=i" => \$chunk_size, - "outdir=s" => \$outdir); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -# .. Check that all binaries are available in $PATH - -my $interproscan = "/sw/bioinfo/interproscan-5.3-46.0/interproscan.sh" ; - -if (-f $interproscan ) { - msg ("Found interproscan at $interproscan"); -} else { - die "Could not find Interproscan at the default location." ; -} - -# .. Create output directory - -if (-d $outdir ) { - die "Output directory $outdir exists. Please remove and try again"; -} else { - msg("Creating output directory $outdir"); - runcmd("mkdir -p $outdir") -} - -# .. set up log file - -my $logfile = "$outdir/generic.log"; -msg("Writing log to: $logfile"); -open LOG, '>', $logfile or err("Can't open logfile"); - -# .. load grid module (courtesy of Brian Haas) - -my $grid_computing_module = "BilsGridRunner"; -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; - - -#### HERE YOU READ YOUR FILE TO BE CHUNKED AND RUN ON GRID! #### -#### Example below: Read a FASTA file, split into smaller sub-files -#### and analyse with e.g. blast or whatever via grid-submission - -# .. Read e.g. protein fasta file. -my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); - -# .. and create chunks -msg("Creating chunks for GRID\n"); - -my @seqarray = (); # Stores entries for a given chunk to be printed later -my $counter = 0; -my $chunk_counter = 1; - -my $seq; - -while( $seq = $inseq->next_seq() ) { - $counter += 1; - push(@seqarray,$seq); - - if ($counter == $chunk_size) { - my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; - write_chunk($outfile,@seqarray); - @seqarray = (); - $chunk_counter += 1; - $counter = 0; - } -} -my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... -write_chunk($outfile,@seqarray); - -# Push all jobs into the command list - -for (my $i=1;$i<=$chunk_counter;$i++) { - - my $command = $interproscan . " -i $outdir/chunk_$i.fa -d $outdir" ; - push(@cmds,$command); -} - -# Submit job chunks to grid - -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); - - -# Merging the outputs -msg("Merging outputs from chunks"); - -my @files = <$outdir/*.tsv>; - -foreach my $file (@files) { - system("cat $file >> $outdir/interprosan.merged.tsv"); -} - -msg("Finished with InterProScan"); - - -# -------------------- - -sub write_chunk { - my $outfile = shift; - my @seqs = @_; - my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); - foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; -} - -# -------------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); - print STDERR $line unless $quiet; -} - -# -------------------- - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -# -------------------- - -sub check_bin { - length(`which @_`) > 0 ? return 1 : return 0; -} - -#---------------------------------------------------------------------- - -sub err { - $quiet=0; - msg(@_); - exit(2); -} diff --git a/annotation/Tools/Grid/scipio2grid.pl b/annotation/Tools/Grid/scipio2grid.pl deleted file mode 100644 index 87ebca93a..000000000 --- a/annotation/Tools/Grid/scipio2grid.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; -use FindBin; -use lib ("$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors"); -use File::Basename; -use Bio::SeqIO; -use Cwd; -use Carp; -no strict qw(subs refs); - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--protein filename] - The name of the protein file to read. - [--genome filename] - The name of the genome file to align to. - Ouput: - [--outdir name] - The name of the output directory. - -}; - -my $scipio_outfile = "scipio.merged.gff"; -my $outdir = undef; -my $genome = undef; -my $protein = undef; -my $chunk_size = 10; # Partition size of fasta input -my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... -my @cmds = (); # Stores the commands to send to farm -my $quiet; -my $help; - -GetOptions( - "help" => \$help, - "protein=s" => \$protein, - "genome=s" => \$genome, - "chunk_size=i" => \$chunk_size, - "outdir=s" => \$outdir); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -# .. Check that all binaries are available in $PATH - -my @tools = ( "scipio.pl" , "blat" ); -foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } - -# .. Create output directory - -if (-d $outdir ) { - die "Output directory $outdir exists. Please remove and try again"; -} else { - msg("Creating output directory $outdir"); - runcmd("mkdir -p $outdir") -} - -# .. set up log file - -my $logfile = "$outdir/scipio2grid.log"; -msg("Writing log to: $logfile"); -open LOG, '>', $logfile or err("Can't open logfile"); - -# .. load grid module (courtesy of Brian Haas) - -my $grid_computing_module = "BilsGridRunner"; -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; - -# .. Read protein fasta file. -my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); - -# .. and create chunks -msg("Creating chunks for GRID\n"); - -my @seqarray = (); -my $counter = 0; -my $chunk_counter = 1; - -my $seq; - -while( $seq = $inseq->next_seq() ) { - $counter += 1; - push(@seqarray,$seq); - - if ($counter == $chunk_size) { - my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; - write_chunk($outfile,@seqarray); - @seqarray = (); - $chunk_counter += 1; - $counter = 0; - } -} -my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... -write_chunk($outfile,@seqarray); - -# Push all jobs into the command list - -for (my $i=1;$i<=$chunk_counter;$i++) { - my $scipio_cmd = "scipio.pl $outdir/chunk_$i.fa $protein > $outdir/chunk_$i.scipio"; - push(@cmds,$scipio_cmd); -} - -# Submit job chunks to grid -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); - - -# Merging the outputs -msg("Merging outputs from chunks"); - -my @files = <$outdir/*.scipio>; - -foreach my $file (@files) { - system("cat $file >> $outdir/scipio.merged"); -} - -system("yaml2gff.1.4.pl $outdir/scipio.merged > $scipio_outfile 2> /dev/null"); - -msg("Finished scipio grid run."); - -# -------------------- - -sub write_chunk { - my $outfile = shift; - my @seqs = @_; - my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); - foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; -} - -# -------------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); - print STDERR $line unless $quiet; -} - -# -------------------- - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -# -------------------- - -sub check_bin { - length(`which @_`) > 0 ? return 1 : return 0; -} - -#---------------------------------------------------------------------- - -sub err { - $quiet=0; - msg(@_); - exit(0); -} - -exit 0; diff --git a/annotation/Tools/Maker/gmod_gff3_preprocessor.pl b/annotation/Tools/Maker/gmod_gff3_preprocessor.pl deleted file mode 100644 index ec112ee99..000000000 --- a/annotation/Tools/Maker/gmod_gff3_preprocessor.pl +++ /dev/null @@ -1,335 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Bio::FeatureIO; -use Getopt::Long; -use FileHandle; -use lib '/home/cain/cvs_stuff/schema/chado/lib'; -use lib '/home/scott/cvs_stuff/schema/chado/lib'; -use Bio::GMOD::DB::Adapter; -use Bio::GMOD::Config; -use Bio::GMOD::DB::Config; - -=head1 NAME - -$0 - Prepares a GFF3 file for bulk loading into a chado database. - -=head1 SYNOPSIS - - % gmod_gff_preprocessor [options] --gfffile - -=head1 COMMAND-LINE OPTIONS - - --gfffile The file containing GFF3 (optional, can read - from stdin) - --outfile The name kernel that will be used for naming result files - --splitfile Split the files into more managable chunks, providing - an argument to control splitting - --onlysplit Split the files and then quit (ie, don't sort) - --nosplit Don't split the files (ie, only sort) - --hasrefseq Set this if the file contains a reference sequence line - (Only needed if not splitting files) - --dbprofile Specify a gmod.conf profile name (otherwise use default) - --inheritance_tiers How many levels of inheritance do you expect tis file - to have (default: 3) - -=head1 DESCRIPTION - - -splitfile -- Just setting this flag to 1 will cause the file to be split -by reference sequence. If you provide an optional argument, it will be -further split according to these rules: - - source=1 Splits files according to the value in the source column - source=a,b,c Puts lines with sources that match (via regular expression) - 'a', 'b', or 'c' in a separate file - type=a,b,c Puts lines with types that match 'a', 'b', or 'c' in a - separate file - -For example, if you wanted all of your analysis results to go in a separate -file, you could indicate '--splitfile type=match', and all cDNA_match, -EST_match and cross_genome_match features would go into separate files -(separate by reference sequence). - -inheritence_tiers -- The number of levels of inheritance this file has. -For example, if the file has "central dogma" genes in it (gene/mRNA/ -exon,polypeptide), then it has 3. Up to 4 is supported but the higher -the number, the more slowly it performs. If you don't know, 3 is a -reasonable guess. - -=head2 FASTA sequence - -If the GFF3 file contains FASTA sequence at the end, the sequence -will be placed in a separate file with the extention '.fasta'. This -fasta file can be loaded separately after the split and/or sorted -GFF3 files are loaded, using the command: - - gmod_bulk_load_gff3.pl -g - -=head1 AUTHOR - -Scott Cain Ecain@cshl.orgE - -Copyright (c) 2006-2007 - -This library is free software; you can redistribute it and/or modify -it under the same terms as Perl itself. - -=cut - -my (@GFFFILE, $OUTFILE, $SPLITFILE,$ONLYSPLIT,$NOSPLIT,$HASREFSEQ, - $DBPROFILE, $INHERITANCE_TIERS); - -GetOptions( - 'gfffile=s' => \@GFFFILE, - 'outfile=s' => \$OUTFILE, - 'splitfile=s' => \$SPLITFILE, - 'onlysplit' => \$ONLYSPLIT, - 'nosplit' => \$NOSPLIT, - 'hasrefseq' => \$HASREFSEQ, - 'dbprofile=s' => \$DBPROFILE, - 'inheritance_tiers=i' => \$INHERITANCE_TIERS, -) or ( system( 'pod2text', $0 ), exit -1 ); - -@GFFFILE = split(/,/,join(',',@GFFFILE)); - -$DBPROFILE ||='default'; - -$INHERITANCE_TIERS ||= 3; - -my ($split_on_source, $split_on_type, $split_on_ref); -if ($SPLITFILE) { - if ($SPLITFILE and $SPLITFILE !~ /=/ and $SPLITFILE == 1) { - $split_on_ref = 1; - } - else { - my @splits = split /;/, $SPLITFILE; - for (@splits) { - my ($tag, $value) = split /=/; - $value =~ s/,/|/g; - if ($tag eq 'source') { - $split_on_source = $value; - } - elsif ($tag eq 'type') { - $split_on_type = $value; - } - else { - die "unsupported splitfile tag: $tag\n"; - } - } - } -} - - -my %has_ref_seq; -my @gfffiles; -for my $GFFFILE (@GFFFILE) { - $GFFFILE ||='-'; - $OUTFILE ||="$GFFFILE.out.gff3"; - my $FASTA = "$OUTFILE.fasta"; - - if ($SPLITFILE && !$NOSPLIT) { - - open GFFIN, "<", $GFFFILE or die "couldn't open $GFFFILE for reading: $!"; - open FASTA, ">", $FASTA or die " couldn't open $FASTA for writing: $!"; - - my $fasta_flag = 0; - my %files; - while ( ) { - if (/^##FASTA/) { - $fasta_flag = 1; - print FASTA; - next; - } elsif ($fasta_flag) { - print FASTA; - next; - } - next if /^#/; - my @la = split /\t/; - - (warn "ignored gff line: $_" && next) if (scalar @la != 9); - - my $has_ref_seq; - chomp $la[8]; - if ( $la[8] =~ /ID=([^;]+);*.*$/ ) { - my $id = $1; - if ( $id eq $la[0] ) { - $has_ref_seq = $id; - } - } - - if ( $split_on_source && $split_on_source eq 1 ) { - my $source = $la[1]; - my $filename = "$la[0].$la[1].$OUTFILE"; - unless ( defined $files{ $filename } ) { - $files{ $filename } = new FileHandle $filename, "w"; - push @gfffiles, $filename; - } - $files{ $filename }->print( $_ ); - - push @{$has_ref_seq{ $filename }}, $has_ref_seq if $has_ref_seq; - } - elsif ( $split_on_source && $la[1] =~ /$split_on_source/) { - my $filename = "$la[0].source.$OUTFILE"; - unless ( defined $files{ $filename } ) { - $files{ $filename } = new FileHandle $filename, "w"; - push @gfffiles, $filename; - } - $files{ $filename }->print( $_ ); - - push @{$has_ref_seq{ $filename }}, $has_ref_seq if $has_ref_seq; - } - elsif ( $split_on_type && $la[2] =~ /$split_on_type/) { - my $filename = $la[0].'.type.'.$OUTFILE; - unless ( defined $files{ $filename } ) { - $files{ $filename } = new FileHandle $filename, "w"; - push @gfffiles, $filename; - } - $files{ $filename }->print( $_ ); - - push @{$has_ref_seq{ $filename }}, $has_ref_seq if $has_ref_seq; - } - else { - my $filename = $la[0].'.'.$OUTFILE; - unless ( defined $files{ $filename } ) { - $files{ $filename } = new FileHandle $filename, "w"; - push @gfffiles, $filename; - } - $files{ $filename }->print( $_ ); - - push @{$has_ref_seq{ $filename }}, $has_ref_seq if $has_ref_seq; - } - } - - for my $key (keys %files) { - $files{$key}->close; - } - } - else { - push @gfffiles, $GFFFILE; - push @{ $has_ref_seq{ $GFFFILE } }, $GFFFILE if $HASREFSEQ; - } - -} -exit(0) if $ONLYSPLIT; - -my $gmod_conf = Bio::GMOD::Config->new(); -my $db_conf = Bio::GMOD::DB::Config->new($gmod_conf, $DBPROFILE); -my $db = Bio::GMOD::DB::Adapter->new( - dbuser => $db_conf->user, - dbpass => $db_conf->password || '', - dbhost => $db_conf->host, - dbport => $db_conf->port, - dbname => $db_conf->name, - notransact => 1, - skipinit => 1, - ); - -$db->sorter_create_table; -for my $gfffile (@gfffiles) { - $db->sorter_delete_from_table; - - my $outfile = $gfffile.'.sorted'; - my $fasta = "$outfile.fasta"; - - open IN, "<", $gfffile or die "couldn't open $gfffile for reading: $!\n"; - - my $fasta_flag = 0; - print STDERR "Sorting the contents of $gfffile ...\n"; - while( ) { - if (/^##FASTA/) { - $fasta_flag = 1; - open FASTA, - ">", $fasta or die "couldn't open $fasta for writing: $!\n"; - - print FASTA "##FASTA\n"; - print FASTA; - next; - } - elsif ($fasta_flag) { - print FASTA; - next; - } - my $line = $_; - my @line_array = split /\t/, $line; - - if ($line =~ /^#/ or scalar @line_array != 9) { - next; - } - - my $refseq = $line_array[0]; - - my ($id, @parents,@derives_froms); - if ( $line_array[8] =~ /ID=([^;]+);*.*$/ ) { - $id = $1; - chomp $id; - } - if ( $line_array[8] =~ /Parent=([^;]+);*.*$/ ) { - @parents = split /,/, $1; - } - if ( $line_array[8] =~ /Derives_from=([^;]+);*.*$/ ) { - @derives_froms = split /,/, $1; - } - - if (@parents > 0 || @derives_froms > 0) { - for my $parent ( (@parents,@derives_froms) ) { - chomp $parent; - $db->sorter_insert_line($refseq, $id, $parent, $line); - } - } - elsif ($id) { - $db->sorter_insert_line($refseq, $id, undef, $line); - } - else { - $db->sorter_insert_line($refseq, undef, undef, $line); - } - } - close IN; - close FASTA if $fasta_flag; - - print STDERR "Writing sorted contents to $outfile ...\n"; - open OUT,">", $outfile or die "couldn't open $outfile for writing: $!\n"; - -#to print: -# -get ref seqs (refseq == id) -# -get things with no parent - - print OUT "##gff-version 3\n"; - - my @refseqs = $db->sorter_get_refseqs(); - for my $refseq (@refseqs) { - print OUT $refseq; #already has the line feed - } - - my @no_parents = $db->sorter_get_no_parents(); - for my $no_parent (@no_parents) { - print OUT $no_parent; - } - @no_parents = ''; - - if ($INHERITANCE_TIERS >= 2) { - my @second_tiers = $db->sorter_get_second_tier(); - for my $second_tier (@second_tiers) { - print OUT $second_tier; - } - } - - if ($INHERITANCE_TIERS >= 3) { - my @third_tiers = $db->sorter_get_third_tier(); - for my $third_tier (@third_tiers) { - print OUT $third_tier; - } - } - -#yes, four tiers can happen, like transposible_element->te_gene->mRNA->exon - if ($INHERITANCE_TIERS >= 4) { - my @forth_tiers = $db->sorter_get_fourth_tier(); - for my $fourth_tier (@forth_tiers) { - print OUT $fourth_tier; - } - } - - close OUT; - -} - diff --git a/annotation/Tools/Maker/maker_select_models_by_AED_score.pl b/annotation/Tools/Maker/maker_select_models_by_AED_score.pl deleted file mode 100755 index a2575614a..000000000 --- a/annotation/Tools/Maker/maker_select_models_by_AED_score.pl +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use POSIX qw(strftime); -use List::MoreUtils qw(uniq); -use Pod::Usage; -use Bio::Tools::GFF; -use IO::File; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $opt_output= undef; -my $opt_score = undef; -my $opt_test = undef; -my $opt_gff = undef; -my $opt_help; - -# OPTION MANAGMENT -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|ref|reffile|gff=s' => \$opt_gff, - 's|score|v=f' => \$opt_score, - 't|test=s' => \$opt_test, - 'o|output=s' => \$opt_output, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! $opt_gff or ! defined($opt_score) or ! $opt_test ){ - pod2usage( { - -message => "$header\nAt least 3 parameters are mandatory:\n1) Input reference gff file (--f)\n". - "2) score use for filtering (between 0 and 1) with option --v\n3) test to apply (> < = >= <=) with the option -t. And don't forget to quote you parameter if it contains the character < or >.\n\n", - -verbose => 0, - -exitval => 1 } ); -} - -############### -# Test options -if($opt_score < 0 or $opt_score > 1){ - print "The value of the score option is Wrong: $opt_score.\n We want a value between 0 and 1.";exit; -} -if($opt_test ne "<" and $opt_test ne ">" and $opt_test ne "<=" and $opt_test ne ">=" and $opt_test ne "="){ - print "The test to apply is Wrong: $opt_test.\nWe want something among this list: <,>,<=,>= or =.";exit; -} - -############### -# Manage Output - -# FOR REPORT -my $ostreamReport = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - -## FOR GFF FILE -my $ostream_ok = IO::File->new(); -my $ostream_discarded = IO::File->new(); -if($opt_output){ - print $opt_output; - $opt_output =~ s/\.gff$//g; - print "after ".$opt_output; - my $out_ok = "$opt_output".".gff"; - my $out_discarded = $opt_output."_discarded.gff"; - - if(-f $out_ok or -f $out_discarded){ - print "File already exist.\n";exit; - } - $ostream_ok->open( $out_ok, 'w' ) or croak( sprintf( "Can not open '%s' for reading: %s", $opt_output, $! ) ); - $ostream_discarded->open( $out_discarded, 'w' ) or croak( sprintf( "Can not open '%s' for reading: %s", $opt_output, $! ) ); -} -else{ - $ostream_ok->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - $ostream_discarded->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} -my $gffout_ok = Bio::Tools::GFF->new( -fh => $ostream_ok , -gff_version => 3) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -my $gffout_discarded = Bio::Tools::GFF->new( -fh => $ostream_discarded , -gff_version => 3) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - - - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# start with some interesting information -my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$stringPrint .= "\nusage: $0 @copyARGV\n"; - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ - input => $opt_gff, - verbose => 1 - }); -print("Parsing Finished\n\n"); -### END Parse GFF input # -######################### - - -########################### -# Main compute -my @listIDl2discarded; -my @listIDl2ok; -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - if($feature_level2->has_tag('_AED')){ - my $AED_score=$feature_level2->_tag_value('_AED'); - my $id_level2=lc($feature_level2->_tag_value('ID')); - - if ($opt_test eq ">"){ - if ($AED_score > $opt_score){ - push @listIDl2ok, $id_level2 ; - }else{push @listIDl2discarded, $id_level2 ;} - } - if ($opt_test eq "<"){ - if ($AED_score < $opt_score){ - push @listIDl2ok, $id_level2 ; - }else{push @listIDl2discarded, $id_level2 ;} - } - if ($opt_test eq "="){ - if ($AED_score == $opt_score){ - push @listIDl2ok, $id_level2 ; - }else{push @listIDl2discarded, $id_level2 ;} - } - if ($opt_test eq "<="){ - if ($AED_score <= $opt_score){ - push @listIDl2ok, $id_level2 ; - }else{push @listIDl2discarded, $id_level2 ;} - } - if ($opt_test eq ">="){ - if ($AED_score >= $opt_score){ - push @listIDl2ok, $id_level2 ; - }else{push @listIDl2discarded, $id_level2 ;} - } - } - else{ - print "WARNING: _AED attribute not found for feature ".$ostreamReport->write_feature($feature_level2); - } - } - } - } - } -} - -# remove duplicate in case several option tends to give the same case -if(@listIDl2ok){ - my $sizeList= @listIDl2ok; - $stringPrint.= "$sizeList RNA(s) that reach your quality request. ($opt_test $opt_score)\n"; - my @listIDl2okUniq = uniq(@listIDl2ok); - my $omniscient_ok = create_omniscient_from_idlevel2list($hash_omniscient, $hash_mRNAGeneLink, \@listIDl2okUniq); - print_omniscient($omniscient_ok, $gffout_ok); -} -if(@listIDl2discarded){ - my $sizeList= @listIDl2discarded; - $stringPrint.= "$sizeList RNA(s) discarded because don't reach your quality request. ($opt_test $opt_score)\n"; - my @listIDl2discardedUniq = uniq(@listIDl2discarded); - my $omniscient_discarded = create_omniscient_from_idlevel2list($hash_omniscient, $hash_mRNAGeneLink, \@listIDl2discarded); - print_omniscient($omniscient_discarded, $gffout_discarded); -} - -#Print Info Output -print $ostreamReport $stringPrint; - - -=head1 NAME - -maker_select_models_by_AED_score.pl - -The script take a gff3 file as input. - - -The result is written to the specified output file, or to STDOUT. -Remark: If there is duplicate in the file they will be removed in the output. In that case you should be informed. - -=head1 SYNOPSIS - - ./maker_select_models_by_AED_score.pl -f infile.gff -v 1 -t = [ --output outfile ] - ./maker_select_models_by_AED_score.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-f>, B<--reffile>, B<--gff> or B<-ref> - -Input GFF3 file that will be read - -=item B<-v>, B<--score> or B<-s> - -Score use for filtering (between 0 and 1) with option. -Can be a float. - -=item B<-t> or B<--test> -Test to apply (> < = >= <=). If you us one of these two character >, <, please don't forget to quote you parameter liket that "<=". Else your terminal will complain. - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Maker/maker_split_file.sh b/annotation/Tools/Maker/maker_split_file.sh deleted file mode 100755 index 3dc0bbe2d..000000000 --- a/annotation/Tools/Maker/maker_split_file.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -file=$1 -script="~/git/code/Tools/Maker/split_gff_by_source.pl" -basename=$(basename $file .gff) - -echo "Splitting file $file" -mkdir -p annotation/$basename - -perl $script --input $file -d annotation/$basename - -echo "Converting Maker file to GTF" -/sw/bioinfo/cufflinks-2.1.1/gffread -o annotation/$basename/maker.gtf -T -F annotation/$basename/maker.gff - -echo "All done!" diff --git a/annotation/Tools/Maker/split_gff_by_source.pl b/annotation/Tools/Maker/split_gff_by_source.pl deleted file mode 100755 index fcf2a8e22..000000000 --- a/annotation/Tools/Maker/split_gff_by_source.pl +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; - -use Getopt::Long qw(:config no_ignore_case bundling); -use IO::File; -use File::Basename; - -my $EXPECTED_NUM_COLUMNS = 9; -my $in = \*STDIN; -my $out_dir; - -parse_options(); -split_gff3(); - -sub parse_options { - my $help; - my $infile; - GetOptions("i|input=s" => \$infile, - "d|output_directory=s" => \$out_dir, - "h|help" => \$help); - print_usage() if $help; - die "Missing required output directory (-d) option\n" if !$out_dir; - $in = new IO::File($infile) or die "Error reading gff3: $!\n" - if $infile; -} - -sub print_usage { - my $progname = basename($0); - die << "END"; -usage: $progname - [-i|--input ] - -d|--output_directory - [-h|--help] - - i: input GFF3 [default: stdin] - d: directory to write split fasta files into -END -} - -sub split_gff3 { - my %data = (); - - while (my $line = <$in>) { - chomp $line; - my @tokens = split /\t+/, $line; - next if scalar(@tokens) != $EXPECTED_NUM_COLUMNS; - next if $tokens[1] eq "."; - push @{$data{$tokens[1]}}, \@tokens; - } - - my @sources = keys(%data); - - for (my $i = 0; $i < scalar(@sources); ++$i) { - my $source = $sources[$i]; - my $out = new IO::File(">$out_dir/$source.gff") or - die "Error writing $out_dir/$source.gff: $!\n"; - print $out "##gff-version 3\n"; - foreach my $gff (@{$data{$source}}) { - print $out join("\t", @{$gff}), "\n"; - } - $out->close(); - } -} - diff --git a/annotation/Tools/NCBI/CleanTaxonomicTreeFromNCBI.pl b/annotation/Tools/NCBI/CleanTaxonomicTreeFromNCBI.pl deleted file mode 100644 index 1b2423ef4..000000000 --- a/annotation/Tools/NCBI/CleanTaxonomicTreeFromNCBI.pl +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; -use Bio::DB::Taxonomy; -use Bio::TreeIO; -use Bio::Tree::NodeNHX; -use Getopt::Long; -use IO::File; -use Pod::Usage; - -#VERIABLE DECLARATION - -my $opt_tree; -my $opt_help; -my $opt_output; -my $nbProt=0; - -# OPTION MANAGMENT -if ( !GetOptions( 't=s' => \$opt_tree, - 'o|output=s' => \$opt_output, - 'h|help!' => \$opt_help ) ) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); -} - -if ( ! ( (defined($opt_tree)) ) ){ - pod2usage( { - -message => "\nAt least 1 parameter is mandatory:\nInput reference file (--t)\n\n", - -verbose => 0, - -exitval => 2 } ); -} - -#my $out = new Bio::TreeIO->new(-file => '>'.$opt_output.'.svg', -# -format => 'svggraph'); - - -#Get Tree in File -my $treeString; -open(FIC,$opt_tree) or die "Couldn't open the file $opt_tree\n"; -while( my $line = ) { - $line =~ s/\n//g; - $treeString.=$line; -} - -# Analyse the TREE string -my $newTString=$treeString; -my $currentTString=$treeString; -my $sizeTreeBefore=0; -my $sizeCurrentTree=length($treeString); -my $newStringPart1; my $newStringPart2; -my $positionLastP; my $positionBeforeLastP; - -while( ! ($sizeTreeBefore == $sizeCurrentTree)){ - - #parcours de l'abre - my $endPosition=$sizeCurrentTree-1; - my $nbClosedP=0; my $comaBetween="no"; - - while($endPosition != 0){ - my $char = substr $currentTString, $endPosition, 1; - - if (($char eq ",") && ($nbClosedP>=1)){ - $comaBetween="yes";print "We found >>>, \n"; - } - - if($char eq ")"){ - print "We found >>>) \n"; - $nbClosedP++; - if($nbClosedP == 1){$positionLastP=$endPosition;} - - if($nbClosedP > 1){ - - #define position of two last closed parenthesis - if($nbClosedP == 2){$positionBeforeLastP=$endPosition;} - if($nbClosedP > 2){$positionLastP=$positionBeforeLastP; $positionBeforeLastP=$endPosition;} - - ### Unitary Test (XXXX)blabla - print "Unitary Test comaBetween $comaBetween\n"; - my $currentinfo=substr $currentTString, $positionLastP; - my $positionUniq=checkUniqParenthesis($positionLastP-1, $currentTString); #return "(" - if ($positionUniq != -1) { - print "We have to remove \n"; - my $positionEndCut=getPositionEndCut($positionLastP+1,$currentTString); - ### remove opening parenthesis part - my $newStringPartA=substr $currentTString, 0, $positionUniq; - my $newStringPartB=substr $currentTString, $positionUniq+1; - my $currentTStringMinusPO="$newStringPartA$newStringPartB"; - ### remove closing parenthesis part - $newStringPart1=substr $currentTStringMinusPO, 0, $positionLastP-1; -# print "A=>newStringPart1 $newStringPart1\n"; - $newStringPart2=substr $currentTStringMinusPO, $positionEndCut-1; -# print "A=>newStringPart2 $newStringPart2\n"; - $newTString="$newStringPart1$newStringPart2"; - $comaBetween="no";last; - } - - ### CHECK opening parenthesis // get position - if ($comaBetween eq "no"){ - print "Second Test\n"; - my $posiotionFirstOP=checkOpeningParenthesis($positionLastP, $currentTString); - my $posiotionAfterFirstOP=checkOpeningParenthesis($positionBeforeLastP, $currentTString); - - ### IF closeing parenthesis are in consecutive position - if(($posiotionAfterFirstOP) == ($posiotionFirstOP+1)){ - #### remove opening parenthesis part // comsequences shift position to minus 1 in the newTString -# print "CONSECUTIVE POSITION FOR OPENING PARENTHESIS => \n"; - $newStringPart1=substr $currentTString, 0, $posiotionFirstOP; - #print "B=>newStringPart1 $newStringPart1\n"; - my $posiotionGetEnd=$posiotionFirstOP+1; - $newStringPart2=substr $currentTString, $posiotionGetEnd; - #print "B=>newStringPart2 $newStringPart2\n"; - my $sizeBeforenewString=length($newTString); - $newTString="$newStringPart1$newStringPart2"; # create the new string - my $sizeAfter=length($newTString); - - #### remove closed parenthesis part of the new string - $newStringPart1=substr $newTString, 0, $positionLastP-1; - print "C=>newStringPart1 $newStringPart1\n"; - my $positionEndCut=getPositionEndCut($positionLastP,$newTString); - $newStringPart2=substr $newTString, $positionEndCut; - $newTString="$newStringPart1$newStringPart2"; - print "C=>newStringPart2 $newStringPart2\n"; - $sizeAfter=length($newTString); -# print "REMOVING closing parenthesis =>new Size = $sizeAfter \n\n"; - $comaBetween="no";last; - } - } - } - $comaBetween="no"; - } - $endPosition--; - } - $sizeTreeBefore=$sizeCurrentTree; # print "sizeTreeBefore $sizeTreeBefore\n"; - $sizeCurrentTree=length($newTString); # print "sizeCurrentTree $sizeCurrentTree - print"\nEND OF ROUND !!$newTString\n\n"; - $currentTString=$newTString; -} - -### final case / last round: -### Unitary Test (XXXX)blabla -my $positionUniq=checkUniqParenthesis($positionBeforeLastP-1, $currentTString); #return "(" -if ($positionUniq != -1) { - my $positionEndCut=getPositionEndCut($positionBeforeLastP+1,$currentTString); - my $newStringPartA=substr $currentTString, 0, $positionUniq; - my $newStringPartB=substr $currentTString, $positionUniq+1; - my $currentTStringMinusPO="$newStringPartA$newStringPartB"; - $newStringPart1=substr $currentTStringMinusPO, 0, $positionBeforeLastP-1; - $newStringPart2=substr $currentTStringMinusPO, $positionEndCut-1; - $newTString="$newStringPart1$newStringPart2"; -} - -print "\nfinalTree= $newTString\n"; - -#open(my $fh, '>', "treeCleanResult.txt") or die "Could not open file 'treeCleanResult' $!"; -#print $fh "$finalTree\n"; -#close $fh; - - - -##### METHODS ####### - -sub getPositionEndCut{ - my ($endPosition,$treeString)=@_; -# print "getPositionEndCut $endPosition\n"; - while($endPosition < length($treeString)){ - my $char = substr $treeString, $endPosition, 1; - print "ENDCUT char = $char \n"; - if($char eq ',' || $char eq ')' || $char eq ';'){ - return $endPosition; - } - $endPosition++; - } -} - -sub checkOpeningParenthesis{ - my ($positionClosedP, $treeString)=@_; - my $endPosition=$positionClosedP; - my $positionOP=0; - my $nbPopened=0; my $nbPclosed=0; - while($endPosition >= 0){ - my $char = substr $treeString, $endPosition, 1; -# print "char = $char $endPosition <= $nbPopened $nbPclosed\n"; - if($char eq '('){$nbPopened++;} - if($char eq ')'){$nbPclosed++;} - if($nbPopened eq $nbPclosed){ - $positionOP=$endPosition; - return $positionOP; - } - $endPosition--; - } - print "No symetric open parenthesis !\n";exit; -} - -sub checkUniqParenthesis{ - my ($endPosition, $treeString)=@_; - print "checkUniqParenthesis $endPosition\n"; - my $nbPclosed=0; - while($endPosition >= 0){ - my $char = substr $treeString, $endPosition, 1; - print "charUniq = $char $endPosition\n"; - if( ( $char eq ')') || ($char eq ",") ){ return -1;} - if($char eq '('){ - return $endPosition; - } - $endPosition--; - } -} - diff --git a/annotation/Tools/Util/.gitignore b/annotation/Tools/Util/.gitignore deleted file mode 100644 index 5509140f2..000000000 --- a/annotation/Tools/Util/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.DS_Store diff --git a/annotation/Tools/Util/busco_compare.pl b/annotation/Tools/Util/busco_compare.pl deleted file mode 100755 index 9250f39ed..000000000 --- a/annotation/Tools/Util/busco_compare.pl +++ /dev/null @@ -1,366 +0,0 @@ -#!/usr/bin/env perl - -################################################### -# Jacques Dainat 01/2018 # -# National Bioinformatics Infrastructure Sweden # -# jacques.dainat@nbis.se # -################################################### - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use List::Util 'first'; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $start_run = time(); - -my $folderIn1=undef; -my $folderIn2=undef; -my $outfolder=undef; -my $verbose=undef; -my $opt_help = 0; - - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('f1=s' => \$folderIn1, - "f2=s" => \$folderIn2, - 'o|output=s' => \$outfolder, - 'v|verbose!' => \$verbose, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); -} - -if ( !defined($folderIn1) or !defined($folderIn2) ){ - pod2usage( { -message => 'at least 2 parameters are mandatory: --f1 and --f2', - -verbose => 1, - -exitval => 1 } ); -} - -# Manage input folder1 -my $fh1; -$folderIn1 = remove_slash_path_folder($folderIn1); -opendir(DIR, "$folderIn1") or die "Unable to read Directory : $!"; -my @files_table1 = grep(/^full_table_/,readdir(DIR)); -my $path1=$folderIn1."/".$files_table1[0]; -open($fh1, '<', $path1) or die "Could not open file '$path1' $!"; - -#Manage input folder2 -my $fh2; -$folderIn2 = remove_slash_path_folder($folderIn2); -opendir(DIR, "$folderIn2") or die "Unable to read Directory $folderIn2 : $!";; -my @files_table2 = grep(/^full_table_/,readdir(DIR)); -my $path2=$folderIn2."/".$files_table2[0]; -open($fh2, '<', $path2) or die "Could not open file '$path2' $!"; - - -#Manage output folder -if ($outfolder) { - $outfolder = remove_slash_path_folder($outfolder); - if(! -d $outfolder ){ - mkdir $outfolder; - } - else{ - print "$outfolder output folder already exists !\n"; exit; - } -} - -# Manage Output gff files -my $gffout_complete; -my $gffout_fragmented; -my $gffout_duplicated; -my %gff_out; -if ($outfolder) { - my $outfile="f1_complete.gff"; - open(my $fh, '>', $outfolder."/".$outfile) or die "Could not open file '$outfile' $!"; - $gffout_complete= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - - $outfile="f1_fragmented.gff"; - open(my $fh2, '>', $outfolder."/".$outfile) or die "Could not open file '$outfile' $!"; - $gffout_fragmented= Bio::Tools::GFF->new(-fh => $fh2, -gff_version => 3 ); - - $outfile="f1_duplicated.gff"; - open(my $fh3, '>', $outfolder."/".$outfile) or die "Could not open file '$outfile' $!"; - $gffout_duplicated= Bio::Tools::GFF->new(-fh => $fh3, -gff_version => 3 ); -} -else{ - $gffout_complete = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3 ); - $gffout_fragmented = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3 ); - $gffout_duplicated = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3 ); -} -$gff_out{'complete'}=$gffout_complete; -$gff_out{'fragmented'}=$gffout_fragmented; -$gff_out{'duplicated'}=$gffout_duplicated; - - -############################################################# -# MAIN -############################################################# - -#Read busco1 file -my %busco1; -while( my $line = <$fh1>) { - - if( $line =~ m/^\w+\s{1}Complete/){ - my @list = split(/\s/,$line); - $busco1{'complete'}{$list[0]}=$line; - } - if( $line =~ m/^\w+\s{1}Missing/){ - my @list = split(/\s/,$line); - $busco1{'missing'}{$list[0]}=$line; - } - if( $line =~ m/^\w+\s{1}Fragmented/){ - my @list = split(/\s/,$line); - $busco1{'fragmented'}{$list[0]}=$line; - } - if( $line =~ m/^\w+\s{1}Duplicated/){ - my @list = split(/\s/,$line); - $busco1{'duplicated'}{$list[0]}=$line; - } -} - -#Read busco2 file -my %busco2; -while( my $line = <$fh2>) { - - if( $line =~ m/^\w+\s{1}Complete/){ - my @list = split(/\s/,$line); - $busco2{'complete'}{$list[0]}=$line; - } - if( $line =~ m/^\w+\s{1}Missing/){ - my @list = split(/\s/,$line); - $busco2{'missing'}{$list[0]}=$line; - } - if( $line =~ m/^\w+\s{1}Fragmented/){ - my @list = split(/\s/,$line); - $busco2{'fragmented'}{$list[0]}=$line; - } - if( $line =~ m/^\w+\s{1}Duplicated/){ - my @list = split(/\s/,$line); - $busco2{'duplicated'}{$list[0]}=$line; - } -} - -my %hashCases; -my %streamOutputs; -#compare busco1 and busco2 -foreach my $type1 (keys %busco1){ - foreach my $id1 (keys %{$busco1{$type1}} ){ - - foreach my $type2 (keys %busco2){ - if($type1 ne $type2){ - if(exists_keys (\%busco2,($type2,$id1) ) ){ - - my $name=$type1."2".$type2; - $hashCases{$id1}=$name; - # create streamOutput - if($outfolder){ - if (! exists_keys (\%streamOutputs,($name)) ){ - my $ostream = IO::File->new(); - $ostream->open( $outfolder."/$name.txt", 'w' ) or croak( sprintf( "Can not open '%s' for writing %s", $outfolder."/$name.txt", $! ) ); - $streamOutputs{$name}=$ostream; - } - my $streamOut=$streamOutputs{$name}; - print $streamOut $busco1{$type1}{$id1}; - } - else{ - print "$id1 was $type1 and it is now $type2\n"; - } - } - } - } - if(! exists_keys(\%hashCases,($id1) ) ){ - $hashCases{$id1}=$type1."2".$type1; - } - } -} - -#extract gff from folder1 -my %f_omniscient; -my $full_omniscient=\%f_omniscient; -my $loop = 0; -my $list_uID_new_omniscient=undef; -my $augustus_gff_folder=$folderIn1."/augustus_output/predicted_genes"; - -if (-d $augustus_gff_folder){ - opendir(DH, $augustus_gff_folder); - my @files = readdir(DH); - - my %track_found; - my @list_cases=("complete","fragmented","duplicated"); - foreach my $type (@list_cases){ - print "extract gff for $type cases\n" if $verbose; - foreach my $id (keys %{$busco1{$type}}){ - my @list = split(/\s/,$busco1{$type}{$id}); - my $seqId = $list[2]; - my $start = $list[3]; - my $end = $list[4]; - - my @matches = grep { /\Q$id/ } @files; - if( @matches){ - foreach my $match (@matches){ - my $path = $augustus_gff_folder."/".$match; - if (-f $path ){ - my $found=undef; - print $path."\n" if $verbose; - my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $path - }); - if (!keys %{$hash_omniscient}){ - print "No gene found for $path\n";exit; - } - - my @listIDl1ToRemove; - if( exists_keys ($hash_omniscient,('level1','gene'))){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{'gene'}}){ - my $feature = $hash_omniscient->{'level1'}{'gene'}{$id_l1}; - if ($feature->seq_id() eq $seqId and $feature->start == $start and $feature->end == $end){ - $found=1; - $track_found{$type}{$id}++; - - #Add the OG name to the feature, to be displayed in WA - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - if( exists_keys($hash_omniscient,('level2', $tag_l2, $id_l1))){ - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}} ){ - my $value=$id."-".$hashCases{$id}; - $feature_l2->add_tag_value('description', $value); - } - } - } - } - else{push(@listIDl1ToRemove,$id_l1);} - } - - if ($found){ - if(@listIDl1ToRemove){ - print "lets remove those supernumary annotation: @listIDl1ToRemove \n" if $verbose; - remove_omniscient_elements_from_level1_id_list($hash_omniscient, \@listIDl1ToRemove); - } - - if($loop == 0){ - $full_omniscient = clone($hash_omniscient); - $loop++; - } - elsif($loop == 1){ - $full_omniscient, $list_uID_new_omniscient = merge_omniscients($full_omniscient, $hash_omniscient); - $loop++; - } - else{ - $full_omniscient, $list_uID_new_omniscient = merge_omniscients($full_omniscient, $hash_omniscient, $list_uID_new_omniscient); - } - } - else{ - print "No annotation as described in the tsv file found in the gff file $path\n" if $verbose; - } - } - else{ - print "No annotation in the file $path, lets look the next one.\n" if $verbose; - } - } - else{ - print "A) file $id not found among augustus gff output\n" if $verbose; - } - } - } - else{ - print "file $id not found among augustus gff output\n" if $verbose; - } - if(! exists_keys(\%track_found,($type,$id))){ - print "WARNING After reading all the files related to id $id we didn't found any annotation matching its described in the tsv file.\n"; - } - } - my $out = $gff_out{$type}; - print_omniscient($full_omniscient, $out); - %$full_omniscient = (); # empty hash - $list_uID_new_omniscient=undef; #Empty Id used; - my $nb = keys %{$track_found{$type}}; - $loop = 0; - print "We found $nb annotations from $type busco\n"; - } - -} -else{ print "$augustus_gff_folder folder doesn't exits\n"; exit;} - - - - -##Last round -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub remove_slash_path_folder{ - my ($folder_path)=@_; - if ( $folder_path =~ /\/$/){ - return $folder_path = substr $folder_path, 0, -1; - } - else{ - return $folder_path; - } -} - -__END__ - - -=head1 NAME - -busco_compare.pl - -Will look at the results from two different runs of busco in order to look at the differentces. -The script look at the complete,fragmented and duplicated genes (not the missing ones) from the 1st run that will be classified differently in the second run. -the script also extracts the annotation of the complete,fragmented and duplicated genes from the 1st run in gff. -Loading the gff tracks on webapollo and looking for BUSCO group classified differently allows to catch easily the locus with potential problems. - -=head1 SYNOPSIS - - busco_compare.pl --f1 --f2 [-o ] - busco_compare.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--f1> - -STRING: Input busco folder1 - -=item B<--f2> - -STRING: Input busco folder2 - -=item B<-v> or B<--verbose> - -For displaying extra information - -=item B<-o> or B<--output> - -STRING: Output folder. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/fasta/fasta-splitter.pl b/annotation/Tools/Util/fasta/fasta-splitter.pl deleted file mode 100755 index 977160d89..000000000 --- a/annotation/Tools/Util/fasta/fasta-splitter.pl +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env perl -# -# FASTA Splitter - a script for partitioning a FASTA file into pieces -# -# Version 0.2.0 (February 14, 2014) -# -# Copyright (c) 2012-2014 Kirill Kryukov -# -# This software is provided 'as-is', without any express or implied -# warranty. In no event will the authors be held liable for any damages -# arising from the use of this software. -# -# Permission is granted to anyone to use this software for any purpose, -# including commercial applications, and to alter it and redistribute it -# freely, subject to the following restrictions: -# -# 1. The origin of this software must not be misrepresented; you must not -# claim that you wrote the original software. If you use this software -# in a product, an acknowledgment in the product documentation would be -# appreciated but is not required. -# 2. Altered source versions must be plainly marked as such, and must not be -# misrepresented as being the original software. -# 3. This notice may not be removed or altered from any source distribution. -# - -use Getopt::Long; -use strict; -$| = 1; - -my $start_time = time; - -my ($opt_n_parts,$opt_part_size,$opt_measure,$opt_line_len,$opt_eol,$opt_version,$opt_help); -GetOptions("n-parts=i" => \$opt_n_parts, - "part-size=i" => \$opt_part_size, - "measure=s" => \$opt_measure, - "line-length=i" => \$opt_line_len, - "eol=s" => \$opt_eol, - "version" => \$opt_version, - "help" => \$opt_help) -or die "Can't parse command line arguments\n"; - -if ($opt_version) { show_version(); } -if ($opt_help) { show_help(); } - -if (!defined($opt_n_parts) and !defined($opt_part_size)) -{ - if ($opt_version || $opt_help) { exit(0); } - else { die "Splitting method is not specified\nUse -h for help\n"; } -} -if (!@ARGV) { die "File for splitting is not specified\n"; } - -if (defined($opt_n_parts) and $opt_n_parts <= 0) { die "Non-positive number of parts\n"; } -if (defined($opt_part_size) and $opt_part_size <= 0) { die "Non-positive part size\n"; } -if (defined($opt_measure) and $opt_measure ne 'all' and $opt_measure ne 'seq' and $opt_measure ne 'count') { die "Unknown value of --measure option\n"; } -if (defined($opt_eol) and $opt_eol ne 'dos' and $opt_eol ne 'mac' and $opt_eol ne 'unix') { die "Unknown value of --eol option\n"; } - -my $n_parts = defined($opt_n_parts) ? $opt_n_parts : 0; -my $part_size = defined($opt_part_size) ? $opt_part_size : 0; -my $line_len = (defined($opt_line_len) and $opt_line_len >= 0) ? $opt_line_len : 60; -my $eol = defined($opt_eol) ? (($opt_eol eq 'dos') ? "\x0D\x0A" : ($opt_eol eq 'mac') ? "\x0D" : "\x0A") : "\x0A"; -my $eol_len = length($eol); -my $measure = defined($opt_measure) ? (($opt_measure eq 'count') ? 0 : ($opt_measure eq 'seq') ? 1 : 2) : 2; -my @part_start = (); -my ($base,$ext,$num_len,$total_size); -my ($OUT,$name,$data,$written_total,$written_this_part,$part_end,$part); - -foreach my $infile (@ARGV) { split_file($infile); } - -my $end_time = time; -my $elapsed_time = $end_time - $start_time; -print "All done, $elapsed_time second", (($elapsed_time==1)?'':'s'), " elapsed\n"; - -sub split_file -{ - my ($infile) = @_; - if (!-e $infile or !-f $infile) { print "Can't find file \"$infile\"\n"; return; } - print $infile; - - ($base,$ext) = ($infile,''); - if ($base =~ /[\/\\]([^\/\\]+)$/) { $base = $1; } - if ($base =~ /^(.+?)(\.(fasta|faa|fna|fa))$/i) { ($base,$ext) = ($1,$2); } - - @part_start = (); - my ($n_seq,$total_seq_len,$n_parts_found) = (0,0,0); - - if ($part_size) - { - ($n_seq,$total_seq_len,$total_size,$n_parts_found) = get_file_size_and_part_boundaries($infile); - if (!$n_parts) { print ": $n_seq sequences, $total_seq_len bp"; } - print ' => ', ($n_parts ? 'extracting' : 'dividing into'), ' ', $n_parts_found, ' part', ($n_parts_found > 1 ? 's' : ''), - " of <= $part_size ", ($measure ? (($measure > 1) ? 'bytes' : 'bp') : 'sequences'), "\n"; - open(my $IN,'<',$infile) or die "Error: Can't open file \"$infile\"\n"; - binmode $IN; - $num_len = length($n_parts_found); - $OUT = undef; - my ($out_file,$part,$si,$buffer) = (undef,0,-1,''); - while (<$IN>) - { - $_ =~ s/[\x0D\x0A]+$//; - if (substr($_,0,1) eq '>') - { - if ($OUT) - { - if ($line_len == 0) { if ($si > 1) { print $OUT $eol; } } - elsif ($buffer ne '') { print $OUT $buffer, $eol; $buffer = ''; } - } - $si++; - if ($si >= $part_start[$part+1]) - { - if ($OUT) { close $OUT; } - $part++; - if ($part > $n_parts_found) { last; } - $out_file = sprintf("%s.part-%0*d%s",$base,$num_len,$part,$ext); - open($OUT,'>',$out_file) or die "Can't create output file \"$out_file\"\n"; - binmode $OUT; - } - print $OUT $_, $eol; - next; - } - if (!$line_len) { print $OUT $_; } - else - { - $buffer .= $_; - while (length($buffer) >= $line_len) { print $OUT substr($buffer,0,$line_len,''), $eol; } - } - } - close $IN; - if ($OUT) - { - if (!$line_len) { if ($si > 1) { print $OUT $eol; } } - elsif ($buffer ne '') { print $OUT $buffer, $eol; $buffer = ''; } - close $OUT; - } - } - else - { - ($n_seq,$total_seq_len,$total_size) = get_file_size($infile); - print ": $n_seq sequences, $total_seq_len bp => dividing into $n_parts part", ($n_parts > 1 ? 's' : ''), " "; - open(my $IN,'<',$infile) or die "Error: Can't open file \"$infile\"\n"; - binmode $IN; - $num_len = length($n_parts); - ($OUT,$name,$data,$written_total,$written_this_part,$part_end,$part) = (undef,undef,'',0,0,int($total_size / $n_parts),1); - while(<$IN>) - { - $_ =~ s/[\x0D\x0A]+$//; - if (substr($_,0,1) eq '>') - { - if (defined $name) { dump_seq(); } - $name = $_; $data = ''; next; - } - $data .= $_; - } - if (defined $name) { dump_seq(); } - close $IN; - if ($OUT) { close $OUT; } - print " OK\n"; - } -} - -sub dump_seq -{ - my $slen = length($data); - my $seq_size = ($measure == 0) ? 1 : ($measure == 1) ? $slen : $slen + length($name) + $eol_len*(1 + ($line_len ? int(($slen+$line_len-1)/$line_len) : 1)); - my $new_written_total = $written_total + $seq_size; - if ( !$OUT or - ($written_this_part and ($new_written_total > $part_end) and ($new_written_total - $part_end > $part_end - $written_total)) ) - { - if ($OUT) { close $OUT; $part++; } - my $part_file = $base; - if ($part_file !~ /\.part-\d+$/) { $part_file .= '.part'; } - $part_file .= sprintf("-%0*d%s",$num_len,$part,$ext); - open($OUT,'>',$part_file) or die "Error: Can't create file \"$part_file\"\n"; - binmode $OUT; - $part_end = int($total_size / $n_parts * $part) + 1; - $written_this_part = 0; - print "."; - } - print $OUT $name, $eol; - if ($line_len) { for (my $s=0; $s<$slen; $s+=$line_len) { print $OUT substr($data,$s,$line_len), $eol; } } - else { print $OUT $data, $eol; } - $written_this_part += $seq_size; - $written_total += $seq_size; -} - -sub get_file_size_and_part_boundaries -{ - my ($file) = @_; - open(my $IN,'<',$file) or die "Error: Can't open file \"$file\"\n"; - binmode $IN; - my ($nseq,$total_seq_length,$total_size,$n_parts_found,$this_part_size,$nlen,$slen,$stop) = (0,0,0,1,0,0,0,0); - $part_start[1] = 0; - while (<$IN>) - { - $_ =~ s/[\x0D\x0A]+$//; - my $len = length($_); - if (substr($_,0,1) eq '>') - { - if ($nlen) - { - my $seq_size = seq_size($nlen,$slen); - if ($part_size and $this_part_size and ($this_part_size + $seq_size > $part_size)) - { - if ($n_parts and $n_parts_found == $n_parts) { $stop = 1; last; } - else { $this_part_size = $seq_size; $n_parts_found++; $part_start[$n_parts_found] = $nseq; } - } - else { $this_part_size += $seq_size; } - $nseq++; $total_seq_length += $slen; $total_size += $seq_size; - } - ($nlen,$slen) = ($len,0); next; - } - if ($nlen) { $slen += $len; } - } - if ($nlen and !$stop) - { - my $seq_size = seq_size($nlen,$slen); - if ($part_size and $this_part_size and ($this_part_size + $seq_size > $part_size)) - { - if ($n_parts and $n_parts_found == $n_parts) { $stop = 1; } - else { $this_part_size = $seq_size; $n_parts_found++; $part_start[$n_parts_found] = $nseq; } - } - if (!$stop) { $nseq++; $total_seq_length += $slen; $total_size += $seq_size; } - } - close $IN; - $part_start[$n_parts_found+1] = $nseq; - return ($nseq,$total_seq_length,$total_size,$n_parts_found); -} - -sub get_file_size -{ - my ($file) = @_; - open(my $IN,'<',$file) or die "Error: Can't open file \"$file\"\n"; - binmode $IN; - my ($nseq,$total_seq_length,$total_size,$nlen,$slen) = (0,0,0,0,0); - while (<$IN>) - { - $_ =~ s/[\x0D\x0A]+$//; - my $len = length($_); - if (substr($_,0,1) eq '>') - { - if ($nlen) { $nseq++; $total_seq_length += $slen; $total_size += seq_size($nlen,$slen); } - ($nlen,$slen) = ($len,0); next; - } - if ($nlen) { $slen += $len; } - } - if ($nlen) { $nseq++; $total_seq_length += $slen; $total_size += seq_size($nlen,$slen); } - close $IN; - return ($nseq,$total_seq_length,$total_size); -} - -sub seq_size -{ - my ($nlen,$slen) = @_; - return ($measure == 0) ? 1 : - ($measure == 1) ? $slen : - $slen + $nlen + $eol_len*(1 + ($line_len ? int(($slen+$line_len-1)/$line_len) : 1)); -} - -sub show_version -{ - print q{FASTA Splitter 0.2.0 (February 14, 2014) -Copyright (c) 2012-2014 Kirill Kryukov -}; -} - -sub show_help() -{ - print q{Usage: fasta-splitter.pl [options] ... -Options: - --n-parts - Divide into parts - --part-size - Divide into parts of size - --measure (all|seq|count) - Specify whether all data, sequence length, or - number of sequences is used for determining part - sizes ('all' by default). - --line-length - Set output sequence line length, 0 for single line - (default: 60). - --eol (dos|mac|unix) - Choose end-of-line character ('unix' by default). - --version - Show version. - --help - Show help. -}; -} diff --git a/annotation/Tools/Util/fasta/fasta2snap_filter.pl b/annotation/Tools/Util/fasta/fasta2snap_filter.pl deleted file mode 100755 index 91e8de7d1..000000000 --- a/annotation/Tools/Util/fasta/fasta2snap_filter.pl +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env perl -use strict; -use Bio::SeqIO; - -my $filterfile = $ARGV[0]; -my $seqs_to_keep = {}; -open(my $FILTER, "<", $filterfile) or die("Error: could not open '$filterfile' $!"); -while(<$FILTER>) -{ - chomp(); - $seqs_to_keep->{$_} = 1; -} -close($FILTER); - -my $seqs = {}; -my $loader = Bio::SeqIO->new(-fh => \*STDIN, -format => 'Fasta'); -while(my $seq = $loader->next_seq) -{ - $seqs->{$seq->id} = $seq; -} - -my @keys = sort(keys(%$seqs)); -my $writer = Bio::SeqIO->new( -fh => \*STDOUT, -format => 'Fasta'); -foreach my $seqid(@keys) -{ - $writer->write_seq($seqs->{$seqid}) if($seqs_to_keep->{$seqid}); -} diff --git a/annotation/Tools/Util/fasta/fasta_size_select.rb b/annotation/Tools/Util/fasta/fasta_size_select.rb deleted file mode 100755 index be6934e91..000000000 --- a/annotation/Tools/Util/fasta/fasta_size_select.rb +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/ruby -# == NAME -# fasta_size_select.rb -# -# == USAGE -# ./fasta_size_select.rb [ -h | --help ] -# [ -i | --infile ] | [ -s | --size ] -# == DESCRIPTION -# A script that uses BioRuby to find fasta entries above -# -# == OPTIONS -# -h,--help:: Show help -# -i,--infile=INFILE:: a Fasta file -# -s,--size=SIZE:: Minimum size to keep - -# -# == EXPERT OPTIONS -# -# == AUTHOR -# Marc Hoeppner, marc.hoeppner@web.de - -require 'rdoc/usage' -require 'optparse' -require 'ostruct' -require 'logger' -require 'bio' - -### Define modules and classes here - -### Get the script arguments and open relevant files -options = OpenStruct.new() -opts = OptionParser.new() -opts.on("-h","--help","Display the usage information") {RDoc::usage} -opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } -opts.on("-s","--size", "=SIZE","Min Size") {|argument| options.size = argument.to_i } - -opts.parse! rescue RDoc::usage('usage') - -log = Logger.new(File.new('this_script.log', File::WRONLY | File::TRUNC | File::CREAT)) -log_level = Logger::INFO # or: DEBUG, WARN, FATAL, UNKNOWN - -log.info('Script this_script.rb started') -log.info('Options:') -log.info(options.to_yaml) - -ff = Bio::FastaFormat.open(options.infile) - -ff.each_entry do | entry| - - next if entry.naseq.length < options.size - - puts entry.to_s - -end diff --git a/annotation/Tools/Util/fasta/fasta_trim_assembly.rb b/annotation/Tools/Util/fasta/fasta_trim_assembly.rb deleted file mode 100755 index c92b34efd..000000000 --- a/annotation/Tools/Util/fasta/fasta_trim_assembly.rb +++ /dev/null @@ -1,15 +0,0 @@ -require 'bio' - -fasta = Bio::FastaFormat.open(ARGV.shift,"r") - -fasta.each_entry do |entry| - - leading = entry.naseq.slice(/^n*/).length - trailing = entry.naseq.reverse.slice(/^n*/).length - length = entry.nalen - - - - puts entry.naseq.subseq(leading+1,length-(trailing)).to_fasta(entry.definition,80) - -end diff --git a/annotation/Tools/Util/fasta/splitMultiFasta.pl b/annotation/Tools/Util/fasta/splitMultiFasta.pl deleted file mode 100755 index ded7a14d9..000000000 --- a/annotation/Tools/Util/fasta/splitMultiFasta.pl +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/perl -#split_fasta.pl version 1.0 -#This script accepts a file consisting of multiple FASTA formatted sequence records. -#It splits the file into multiple new files, each consisting of a subset of the original records. -# -#There are three command line options: -# -#-i input file. -#-o output file prefix. This script will append numbers to this prefix name so that each created file is unique. -#-n the number of sequences to place in each output file. -# -#Example usage: -# -#perl split_fasta.pl -i sample_in.txt -o new_sequences -n 100 -# -#Written by Paul Stothard, Canadian Bioinformatics Help Desk. -# -#stothard@ualberta.ca - -use strict; -use warnings; - -#Command line processing. -use Getopt::Long; - -my $inputFile; -my $outputFile; -my $numberToCopy; - -Getopt::Long::Configure ('bundling'); -GetOptions ('i|input_file=s' => \$inputFile, - 'o|output_file_prefix=s' => \$outputFile, - 'n|number=i' => \$numberToCopy); - -if(!defined($inputFile)) { - die ("Usage: split_fasta.pl -i -o -n \n"); -} - -if(!defined($outputFile)) { - die ("Usage: split_fasta.pl -i -o -n \n"); -} - -if(!defined($numberToCopy)) { - die ("Usage: split_fasta.pl -i -o -n \n"); -} - -if ($numberToCopy <= 0) { - die ("-n value must be greater than 0.\n"); -} - -#count the number of sequences in the file -#read each record from the input file - -my $seqCount = 0; -my $fileCount = 0; -my $seqThisFile = 0; - -open (OUTFILE, ">" . $outputFile . "_" . $fileCount) or die ("Cannot open file for output: $!"); - -open (SEQFILE, $inputFile) or die( "Cannot open file : $!" ); -$/ = ">"; - -while (my $sequenceEntry = ) { - - if ($sequenceEntry =~ m/^\s*>/){ - next; - } - - my $sequenceTitle = ""; - if ($sequenceEntry =~ m/^([^\n]+)/){ - $sequenceTitle = $1; - } - else { - $sequenceTitle = "No title was found!"; - } - - $sequenceEntry =~ s/^[^\n]+//; - $sequenceEntry =~ s/[^A-Za-z]//g; - - #write record to file - print (OUTFILE ">$sequenceTitle\n"); - print (OUTFILE "$sequenceEntry\n"); - $seqCount++; - $seqThisFile++; - - if ($seqThisFile == $numberToCopy) { - $fileCount++; - $seqThisFile = 0; - close (OUTFILE) or die( "Cannot close file : $!"); - open (OUTFILE, ">" . $outputFile . "_" . $fileCount) or die ("Cannot open file for output: $!"); - } - -}#end of while loop - -close (SEQFILE) or die( "Cannot close file : $!"); - -close (OUTFILE) or die( "Cannot close file : $!"); \ No newline at end of file diff --git a/annotation/Tools/Util/gff/README.md b/annotation/Tools/Util/gff/README.md deleted file mode 100644 index bef6a5b17..000000000 --- a/annotation/Tools/Util/gff/README.md +++ /dev/null @@ -1,101 +0,0 @@ -

gff toolkit

---------------------------- - -Bench of tool to handle gff3 files. -To know more about gff3 format it's over there => https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md - -################### -# I) PREREQUISITE # -################### - -Once you cloned the git repositpry you have to configure your installation as explained below: - -## 1) Most of scripts use the NBIS librairy which is located here: GAAS/annotation/NBIS -Consequently, in order to use those scripts, you must add the library location to your path like that: - - export PERL5LIB=$PERL5LIB:/pathTo/GAAS/annotation - -## 2) Bioperl must as well be installed as well as sepcific perl modules often not included by default (Moose,Clone) - -### 2.1) Uppmax user (Swedish research cluster)=> This will be easy for you, It's a lucky day ;) -Just load the needed modules by executing these commands:
- - module load bioinfo-tools - module load BioPerl/1.6.922 - module load perl_modules/5.18.4 # This module contains the Moose and Clone librairy - -P.S: Instead to use the perl_modules/5.18.4 module you may also install your own local perl librairy by following these instructions: http://www.uppmax.uu.se/support/faq/software-faq/installing-local-perl-packages/
- -### 2.2) For non-uppmax user please follow these instructions: - -During the below processes, if you encountered right problem, use the "super user" command to do the installation as administrator. - - e.g: sudo cpanm bioperl - -In that case you will be prompt to type the super user paswword. Obviously you need to have that privilege. - -#### 2.2.1) Bioperl - -The easiest way to have bioperl is to clone the bioperl-live project here => https://github.com/bioperl/bioperl-live -and add the path to it within your PERL5LIB path. - - export PERL5LIB=$PERL5LIB:/pathTo/bioperl-live - -Otherwise you should be able to install bioperl using your favorite package manager (cpan, cpanm, etc). - - e.g: cpanm bioperl - -#### 2.2.2) Other mandatory modules -You must install other libraries (e.g Moose and Clone). -You can install them by using your favorite package manager (cpan, cpanm, etc). - - e.g: cpanm Clone - e.g: cpanm Moose - e.g: cpanm Graph::Directed - e.g: cpanm LWP::UserAgent - e.g: cpanm Statistics::R - e.g: cpanm JSON - e.g: cpanm Sort::Naturally - -################################################# -# II) Script name and classificaiton by prefix # - -_**As most as possible we will try to name the script with understandable names. -For that purpose we try to use a controled vocabulary**_ - -## A) gff vs gff3 prefix - -Script not prefixed by gff3 but only with gff means that they havn't be checked or are not compatible with the gff3 standard. In other term, it means that a file not following the gff3 standards might not work with the script prefixed by gff3. Lot of modifcation could be post process if your file don't follow the gff3 standart. We will develop that in the part 3 of this readme. - - -## B) \_sq\_ AND \_sp\_ - -### B.1) \_sq\_ => Means SEQUENTIAL - -The gff file is read and processed from top to the end. This is memory efficient !! -But in other hand it hard to create complex script. Moreover, If data are not written sorted (e.g an exon of a gene located in the middle of the descritpion of another gene) some troubles could occur. - -### B.2) \_sp\_ => Means SLURP - -The gff file will be saved in memory before to process it. This is handle by the slurp_gff3_file_JD method. It has a memory cost. So if your gff3 files are over Gb size and your computer do not have enough ram memory, it might crash. -That approach allows to peform more complicated task and more efficiency. Moreover, it allows to fix/correct, in the limit of the possibilities given by the format, the issues present in the gff you give in input. See part 3 for more information about it. - - -################################################# -# III) What does the SLURP method for you => GFF3 Standardization for a full GFF3 compliant to any tool !!! -######### -**_This method create a hash structure containing all the data in memory. We call it OMNISCIENT.
-The OMNISCNIENT structure is a three levels structure :_** - -$omniscient{level1}{tag_l1}{level1_id} = feature <= tag could be gene,etc
-$omniscient{level2}{tag_l2}{idY} = @featureList <= tag could be mRNA,rRNA,tRNA,etc. idY is a level1_id (know as Parent attribute within the level2 feature). The @featureList is a list to be able to manage isoform cases.
-$omniscient{level3}{tag_l3}{idZ} = @featureList <= tag could be exon,cds,utr3,utr5,etc. idZ is the ID of a level2 feature (know as Parent attribute within the level3 feature). The @featureList is a list to be able to put all the feature of a same tag together.
- - -It creates an ID attribute if missing
-It check for duplicated features (same position, same ID, same Parent)
-It expand level3 features (e.g. exon) sharing multiple mRNA (Parent attributes contains multiple parental mRNA). One exon by parental mRNA will be created.
-If a level 2 feature doesn t have parent feature but has the attribute we create the level1 feature.
-If a feature doesn t have the parent attribute we create the attribute !! But not the feature in the case of a parent of a level 3 feature. ( Could be implemented )
- -INFO:Access to element of an omniscient is most of time from level1 to level3. Consequently if a level3 feature don't have any parent, it will not be printed.
diff --git a/annotation/Tools/Util/gff/gff3_sp_Prokka_inferNameFromAttributes.pl b/annotation/Tools/Util/gff/gff3_sp_Prokka_inferNameFromAttributes.pl deleted file mode 100755 index a42c5a210..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_Prokka_inferNameFromAttributes.pl +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $help= 0; -my $force=undef; -my $outfile=undef; - -if ( !GetOptions( - "help|h" => \$help, - "gff|f=s" => \$gff, - "force" => \$force, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - -my $nbNameAdded=0; - -foreach my $tag (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id (keys %{$hash_omniscient->{'level1'}{$tag}}){ - - my $feature=$hash_omniscient->{'level1'}{$tag}{$id}; - - if($feature->has_tag('name')){ - my $name=$feature->_tag_value('name'); - create_or_replace_tag($feature,'Name', $name); - $feature->remove_tag('name'); - } - - #Name already contained in the gene attribute. - if($feature->has_tag('gene')){ - # we get the Name - my $name=$feature->_tag_value('gene'); - - # If no attribute Name or if we have to replace it - if(! $feature->has_tag('Name') or ($force)){ - create_or_replace_tag($feature,'Name', $name); - $nbNameAdded++; - } - elsif($feature->has_tag('Name') and ( ! $force)){ - print "Feature contains already an attribute Name. You can force it replacement by using the option --force\n"; - } - print "Name found in gene attribute = $name\n"; - }# Name not found in gene attribute. So we try to get the name included in the inference attribute. - elsif($feature->has_tag('inference')){ - my @inferenceAtt=$feature->get_tag_values('inference'); - if ($#inferenceAtt > 0){ - - - #foreach my $val(@inferenceAtt){ - # print "ok".$val."\n"; - #} - my @tab = split /\|/,$inferenceAtt[$#inferenceAtt]; # split the last value by the character | - my $name = $tab[$#tab]; - - # SKIP case - if($name =~ /protein motif:Pfam:/i){ - next; - } - if($name =~ /protein motif:CLUSTERS:/i){ - next; - } - if($name =~ /similar to AA sequence:UniProtKB:/i){ - next; - } - # ELSE name contains the Uniprot header of the protein coming from "--proteins" option ( Fasta file of trusted proteins to first annotate from ). - - - if(! $feature->has_tag('Name') or ($force)){ - create_or_replace_tag($feature,'Name', $name); - $nbNameAdded++; - } - elsif($feature->has_tag('Name') and ( ! $force)){ - print "Feature contains already an attribute Name. You can force it replacement by using the option --force\n"; - } - print "My Name get in inference attribute = $name\n"; - } - #else{ - # print "We skip: ".$feature->gff_string."\n"; - #} - } - } -} -print "We added $nbNameAdded Name attributes\n"; - -print_omniscient($hash_omniscient, $gffout); #print gene modified -__END__ - -=head1 NAME - -gff3_addProkkaNameFromInferenceAttribute.pl - -The script take a gff3 file as input. - -The script give basic statistics of a gff file. -Remark: identical feature from level1 or level2 with identical ID will be merged as well as their subsequent features (Level2 or level3). - -=head1 SYNOPSIS - - ./gff3_addProkkaNameFromInferenceAttribute.pl -gff file.gff [ -o outfile ] - ./gff3_addProkkaNameFromInferenceAttribute.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<--force> - -If Name attribute already exists, they will be replaced if a new one is found - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_add_introns.pl b/annotation/Tools/Util/gff/gff3_sp_add_introns.pl deleted file mode 100755 index c81f58e17..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_add_introns.pl +++ /dev/null @@ -1,228 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use POSIX qw(strftime); -use List::MoreUtils qw(natatime);; -use Carp; -use Getopt::Long; -use Pod::Usage; -use Clone 'clone'; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - - -my $intronID = 1; - -my $opt_file; -my $opt_output=undef; -my $opt_help = 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|gff|ref|reffile=s' => \$opt_file, - 'o|out|output=s' => \$opt_output, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! defined( $opt_file) ) { - pod2usage( { - -message => "$header\nMust specify at least 1 parameters:\nReference data gff3 file (--gff)\n", - -verbose => 0, - -exitval => 1 } ); -} - -# ####################### -# # START Manage Option # -# ####################### - -my $gffout; -if ($opt_output) { - $opt_output=~ s/.gff//g; - open(my $fh, '>', $opt_output.".gff") or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - -# ##################################### -# # END Manage OPTION -# ##################################### - - - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -#PART 1 -################################### -# Read input gff3 files one by one and save value in hash of list - - - ###################### - ### Parse GFF input # - my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_file - }); - print("Parsing Finished\n\n"); - ### END Parse GFF input # - ######################### - - #print statistics - my ($stat, $distri) = gff3_statistics($hash_omniscient); - #print statistics - foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - print "$info"; - } - print "\n"; - } - - - ###################### - ### Parse GFF input # - # get nb of each feature in omniscient; - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level2'}{$tag_l2}}){ - my $one_f2 = $hash_omniscient->{'level2'}{$tag_l2}{$id_l1}[0]; - - ####################### - #get feature1 and info - my $feature_l1=undef; - my $tag_l1; - foreach my $tag_level1 (keys %{$hash_omniscient->{'level1'}}){ - if (exists ($hash_omniscient->{'level1'}{$tag_level1}{$id_l1})){ - $feature_l1=$hash_omniscient->{'level1'}{$tag_level1}{$id_l1}; - $tag_l1=$tag_level1; - last; - } - } - if(! $feature_l1){print "Problem ! We didnt retrieve the level1 feature with id $id_l1\n";exit;} - - ##### - # get all level2 - my $All_l2_single=1; - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}} ){ - - my @introns=(); - my $feature_example; - - ###### - #get all level3 - my $id_l2=lc($feature_l2->_tag_value('ID')); - - if(exists_keys($hash_omniscient, ('level3','exon',$id_l2) ) ){ - - my $counterL3=-1; - #Initialize intron to 0 to avoid error during printing results - my $indexLast = $#{$hash_omniscient->{'level3'}{'exon'}{$id_l2}}; - - my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$id_l2}}; - - foreach my $feature_l3 ( @sortedList ){ - - #count number feature of tag_l3 type - $counterL3++; - - ################ - #Manage Introns# - # from the second intron to the last (from index 1 to last index of the table sortedList) - # We go inside this loop only if we have more than 1 feature. - if($counterL3 > 0 and $counterL3 <= $indexLast){ - my $intronStart = $sortedList[$counterL3-1]->end+1; - my $intronEnd = $sortedList[$counterL3]->start-1; - push @introns, ($intronStart, $intronEnd); - $feature_example=clone($sortedList[$counterL3]); - } - }# END FOREACH L3 - } - - #Now add introns features - if(@introns){ - my $it = natatime 2, @introns; - while (my @tuple = $it->()) { - my $intron_feature = clone($feature_example); - $intron_feature->primary_tag('intron'); - my $ID='intron_added-'.$intronID; - $intronID++; - create_or_replace_tag($intron_feature,'ID', $ID); #modify ID to replace by parent value - $intron_feature->start($tuple[0]); - $intron_feature->end($tuple[1]); - push (@{$hash_omniscient->{"level3"}{'intron'}{lc($id_l2)}}, $intron_feature); - } - } - } - } - } - -print_omniscient($hash_omniscient, $gffout); #print gene modified - - ######################### - ######### END ########### - ######################### - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - - -__END__ - - -=head1 NAME - -gff3_sp_manage_introns.pl - This script - -=head1 SYNOPSIS - - ./gff3_sp_manage_introns.pl --gff=infile --out=outFile - ./gff3_sp_manage_introns.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<-f>, B<--ref> or B<-reffile> - -Input GFF3 file correponding to gene build. - -=item B<--out>, B<--output> or B<-o> - -Output gff3 file where the gene incriminated will be write. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_add_start_and_stop.pl b/annotation/Tools/Util/gff/gff3_sp_add_start_and_stop.pl deleted file mode 100755 index b0550fa34..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_add_start_and_stop.pl +++ /dev/null @@ -1,426 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use POSIX qw(strftime); -use List::MoreUtils qw(natatime);; -use Carp; -use Getopt::Long; -use Pod::Usage; -use Bio::DB::FASTA; -use Bio::Tools::CodonTable; -use Clone 'clone'; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - - -my $start_id = 1; -my $stop_id = 1; - -my $opt_file=undef; -my $file_fasta=undef; -my $codon_table_id=1; -my $opt_output=undef; -my $verbose=undef; -my $opt_help = 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( 'i|g|gff=s' => \$opt_file, - "fasta|fa|f=s" => \$file_fasta, - "table|codon|ct=i" => \$codon_table_id, - 'o|out|output=s' => \$opt_output, - 'v!' => \$verbose, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if(! $opt_file or ! $file_fasta ) { - pod2usage( { - -message => "$header\nMust specify at least 2 parameters:\nA gff file (--gff) and a fasta file (--fasta) \n", - -verbose => 0, - -exitval => 1 } ); -} - -# ####################### -# # START Manage Option # -# ####################### - -my $gffout; -if ($opt_output) { - open(my $fh, '>', $opt_output) or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -if($codon_table_id<0 and $codon_table_id>25){ - print "$codon_table_id codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 might be problematic !)\n"; -} -else{ - print "We will use the codon table $codon_table_id. If it is not what you want please stop the tool and use the --table option. \n"; -} -my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id); -# ##################################### -# # END Manage OPTION -# ##################################### - - - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -#PART 1 -################################### -# Read input gff3 files one by one and save value in hash of list - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_file - }); -print("Parsing Finished\n\n"); -### END Parse GFF input # -######################### - -#################### -# index the genome # -my $db = Bio::DB::Fasta->new($file_fasta); -print ("Genome fasta parsed\n"); - -my $counter_start_missing = 0; -my $counter_start_added = 0; -my $counter_end_missing = 0; -my $counter_end_added = 0; - -###################### -### Parse GFF input # -# get nb of each feature in omniscient; -foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level2'}{$tag_l2}}){ - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}} ){ - - # get level2 id - my $id_level2 = lc($feature_l2->_tag_value('ID')); - - ############################## - #If it's a mRNA = have CDS. # - if ( exists ($hash_omniscient->{'level3'}{'cds'}{$id_level2} ) ){ - - ############## - # Manage CDS # - my @cds_feature_list = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$id_level2}}; # be sure that list is sorted - my $cds_dna_seq = concatenate_feature_list(\@cds_feature_list); - print "sequence: $cds_dna_seq\n" if ($verbose); - #create the cds object - my $cds_obj = Bio::Seq->new(-seq => $cds_dna_seq, -alphabet => 'dna' ); - #Reverse the object depending on strand - my $strand="+"; - if ($feature_l2->strand == -1 or $feature_l2->strand eq "-"){ - $cds_obj = $cds_obj->revcom(); - $strand = "-"; - print "feature on minus strand\n" if ($verbose); - } - - #------------------------- - # START CASE - #------------------------- - if ( exists ($hash_omniscient->{'level3'}{'start_codon'}{$id_level2} ) ){ - print "start_codon already exists for $id_level2\n" if ($verbose); - } - else{ - - my $first_codon = substr( $cds_obj->seq, 0, 3 ); - print "first_codon = $first_codon \n" if ($verbose); - - if ($codon_table->is_start_codon($first_codon)) { - $counter_start_added++; - print "first_codon is a start codon \n" if ($verbose); - # create start feature - my $start_feature = clone($cds_feature_list[0]); - $start_feature->primary_tag('start_codon'); - my $ID='start_added-'.$start_id; - $start_id++; - create_or_replace_tag($start_feature,'ID', $ID); #modify ID to replace by parent val - - - if($strand eq "+"){ - #set start position of the start codon - $start_feature->start($cds_feature_list[0]->start()); - - #set stop position of the start codon - my $step=3; - my $cpt=0; - my $size = $cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - while($size < 3){ - - my $start_feature_new = clone( $start_feature ); - $start_feature_new->end($cds_feature_list[$cpt]->start()+$size-1); - my $ID='start_added-'.$start_id; - $start_id++; - create_or_replace_tag($start_feature_new,'ID', $ID); #modify ID to replace by parent val - push @{$hash_omniscient->{'level3'}{'start_codon'}{$id_level2}}, $start_feature_new; - - $cpt++; - $step-=$size; - $start_feature->start($cds_feature_list[$cpt]->start()); - $size += $size + $cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - } - $start_feature->end($cds_feature_list[$cpt]->start()+$step-1); - } - else{ - #set start position of the start codon - $start_feature->end($cds_feature_list[$#cds_feature_list]->end()); - - #set stop position of the start codon - my $step=3; - my $cpt=$#cds_feature_list; - my $size=$cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - while($size < 3){ - - my $start_feature_new = clone( $start_feature ); - $start_feature_new->start($cds_feature_list[$cpt]->end()-$size+1); - my $ID='start_added-'.$start_id; - $start_id++; - create_or_replace_tag($start_feature_new,'ID', $ID); #modify ID to replace by parent val - push @{$hash_omniscient->{'level3'}{'start_codon'}{$id_level2}}, $start_feature_new; - - $cpt--; - $step-=$size; - $start_feature->end($cds_feature_list[$cpt]->end()); - $size += $size + $cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - } - print $cds_feature_list[$cpt]->end()."\n"; - $start_feature->start($cds_feature_list[$cpt]->end()-$step+1); - } - push @{$hash_omniscient->{'level3'}{'start_codon'}{$id_level2}}, $start_feature; - } - else{ - $counter_start_missing++; - } - } - - #------------------------- - # STOP CASE - #------------------------- - if ( exists ($hash_omniscient->{'level3'}{'stop_codon'}{$id_level2} ) ){ - print "stop_codon already exists for $id_level2\n" if ($verbose); - } - else{ - my $last_codon = substr( $cds_obj->seq, -3 ); - print "last_codon = $last_codon \n" if ($verbose); - - if ( $codon_table->is_ter_codon( $last_codon )){ - $counter_end_added++; - print "last codon is a stop codon \n" if ($verbose); - # create stop feature - my $stop_feature = clone($cds_feature_list[0]); - $stop_feature->primary_tag('stop_codon'); - my $ID='stop_added-'.$stop_id; - $stop_id++; - create_or_replace_tag($stop_feature,'ID', $ID); #modify ID to replace by parent value - - if($strand eq "+"){ - - # set start position of the stop codon - $stop_feature->end($cds_feature_list[$#cds_feature_list]->end()); - - #set stop position of the stop codon - my $step=3; - my $cpt=$#cds_feature_list; - my $size=$cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - while($size < 3){ - - my $stop_feature_new = clone( $stop_feature ); - $stop_feature_new->start($cds_feature_list[$cpt]->end()-$size+1); - my $ID='start_added-'.$start_id; - $start_id++; - create_or_replace_tag($stop_feature_new,'ID', $ID); #modify ID to replace by parent val - push @{$hash_omniscient->{'level3'}{'stop_codon'}{$id_level2}}, $stop_feature_new; - - $cpt--; - $step-=$size; - $stop_feature->end($cds_feature_list[$cpt]->end()); - $size += $size + $cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - } - #print $cds_feature_list[$cpt]->end()."\n"; - $stop_feature->start($cds_feature_list[$cpt]->end()-$step+1); - } - else{ - #set start position of the stop codon - $stop_feature->start($cds_feature_list[0]->start()); - - #set stop position of the stop codon - my $step=3; - my $cpt=0; - my $size = $cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - while($size < 3){ - - my $stop_feature_new = clone( $stop_feature ); - $stop_feature_new->end($cds_feature_list[$cpt]->start()+$size-1); - my $ID='start_added-'.$start_id; - $start_id++; - create_or_replace_tag($stop_feature_new,'ID', $ID); #modify ID to replace by parent val - push @{$hash_omniscient->{'level3'}{'stop_codon'}{$id_level2}}, $stop_feature_new; - - $cpt++; - $step-=$size; - $stop_feature->start($cds_feature_list[$cpt]->start()); - $size += $size + $cds_feature_list[$cpt]->end()-$cds_feature_list[$cpt]->start()+1; - } - $stop_feature->end($cds_feature_list[$cpt]->start()+$step-1); - } - push @{$hash_omniscient->{'level3'}{'stop_codon'}{$id_level2}}, $stop_feature; - } - else{ - $counter_end_missing++; - } - } - } - } - } -} - -print_omniscient($hash_omniscient, $gffout); #print gene modified -print "$counter_start_added start codon added and $counter_start_missing CDS do not start by a start codon\n"; -print "$counter_end_added stop codon added and $counter_end_missing CDS do not end by a stop codon \n"; -print "bye bye\n"; - - ######################### - ######### END ########### - ######################### - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub concatenate_feature_list{ - - my ($feature_list) = @_; - - my $seq = ""; - foreach my $feature (@$feature_list) { - my $start=$feature->start(); - my $end=$feature->end(); - my $seqid=$feature->seq_id(); - $seq .= $db->seq( $seqid, $start, $end ); - } - return $seq; -} - -__END__ -EXAMPLE NORMAL -##gff-version 3 -Pcoprophilum_scaf_9 . contig 1 1302582 . . . ID=Pcoprophilum_scaf_9;Name=Pcoprophilum_scaf_9 -Pcoprophilum_scaf_9 maker gene 189352 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.0 -Pcoprophilum_scaf_9 maker mRNA 189352 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=398|1|1|1|0.5|0.33|3|343|825 -Pcoprophilum_scaf_9 maker exon 189352 189520 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:exon:96;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker exon 189643 189922 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:exon:97;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker exon 189978 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:exon:98;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker five_prime_UTR 189352 189520 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:five_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker five_prime_UTR 189643 189871 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:five_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 189872 189922 . + 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 189978 192404 . + 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker three_prime_UTR 192405 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker gene 197438 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.3 -Pcoprophilum_scaf_9 maker mRNA 197438 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=208|1|1|1|1|1|2|259|211 -Pcoprophilum_scaf_9 maker exon 197438 198116 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:exon:141;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker exon 198291 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:exon:140;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker five_prime_UTR 198507 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:five_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 198291 198506 . - 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 197697 198116 . - 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker three_prime_UTR 197438 197696 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -EXAMPLE WITH SPREADED START AND STOP -##gff-version 3 -Pcoprophilum_scaf_9 maker gene 189352 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.0 -Pcoprophilum_scaf_9 maker mRNA 189352 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1;_AED=0.00;_eAED=0.00;_QI=398|1|1|1|0.5|0.33|3|343|825 -Pcoprophilum_scaf_9 maker exon 189352 189520 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:exon:96;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker exon 189643 189922 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:exon:97;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker exon 189978 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:exon:98;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker five_prime_UTR 189352 189520 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:five_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker five_prime_UTR 189643 189871 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:five_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 189872 189873 . + 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 189874 189922 . + 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 189978 192402 . + 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 192403 192404 . + 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker three_prime_UTR 192405 192747 . + . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.0-mRNA-1 -Pcoprophilum_scaf_9 maker gene 197438 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.3 -Pcoprophilum_scaf_9 maker mRNA 197438 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3;Name=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1;_AED=0.00;_eAED=0.00;_QI=208|1|1|1|1|1|2|259|211 -Pcoprophilum_scaf_9 maker exon 197438 198116 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:exon:141;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker exon 198291 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:exon:140;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker five_prime_UTR 198507 198714 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:five_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 198505 198506 . - 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 198291 198504 . - 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 197699 198116 . - 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker CDS 197697 197698 . - 0 ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 -Pcoprophilum_scaf_9 maker three_prime_UTR 197438 197696 . - . ID=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_9-processed-gene-2.3-mRNA-1 - - -=head1 NAME - -gff3_sp_add_start_and_stop.pl.pl - This script adds start and stop codons when a CDS feature exists. The script looks at the sequence and check the presence of start and stop codon. -The script works even if the start or stop codon are split over several CDS features. - - - -=head1 SYNOPSIS - - ./gff3_sp_add_start_and_stop.pl.pl --gff infile.gff --fasta genome.fa --out outfile.gff - ./gff3_sp_add_start_and_stop.pl.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<-i> or B<-g> - -Input GFF file. - -=item B<--fasta>, B<--fa> or B<-f> - -Input fasta file. Needed to check that CDS sequences start by start codon and stop by stop codon. - -=item B<--ct>, B<--codon> or B<--table> - -Codon table to use. 1 By default. - -=item B<--out>, B<--output> or B<-o> - -Output gff file updated - -=item B<-v> - -Verbose for debugging purpose. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_alignment_output_style.pl b/annotation/Tools/Util/gff/gff3_sp_alignment_output_style.pl deleted file mode 100755 index 426095a6e..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_alignment_output_style.pl +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $opt_gfffile; -my $opt_comonTag=undef; -my $opt_verbose=undef; -my $opt_deep=undef; -my $opt_output; -my $opt_help = 0; - -# OPTION MANAGMENT -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'c|ct=s' => \$opt_comonTag, - 'v' => \$opt_verbose, - 'd' => \$opt_deep, - 'o|output=s' => \$opt_output, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2 } ); -} - -if (! defined($opt_gfffile) ){ - pod2usage( { - -message => "\nAt least 1 parameter is mandatory:\nInput reference gff file (-g).\n\n". - "Ouptut is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # - -my $gffout; -if ($opt_output) { - $opt_output=~ s/.gff//g; - open(my $fh, '>', $opt_output.".gff") or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - ##################### - # MAIN # - ##################### - -###################### -### Parse GFF input # -if($opt_verbose and $opt_deep) {$opt_verbose = 2 ;} -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ - input => $opt_gfffile, - locus_tag => $opt_comonTag, - verbose => $opt_verbose - }); -print ("GFF3 file parsed\n"); - -### -# Print result - -print_omniscient_as_match($hash_omniscient, $gffout); #print gene modified - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -__END__ - -=head1 NAME - -gff3_sp_alignment_output_style.pl - -This script take a normal gff3 annotation format file and convert it to gff3 alignment format. It means it add a structure of match / match_part as relationship between the different features. - -=head1 SYNOPSIS - - ./gff3_sp_alignment_output_style.pl -g infile.gff [ -o outfile ] - ./gff3_sp_alignment_output_style --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-c> or B<--ct> - -When the gff file provided is not correcly formated and features are linked to each other by a comon tag (by default locus_tag), this tag can be provided to parse the file correctly. - -=item B<-v> - -Verbose option to see the warning messages when parsing the gff file. - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_clipN_seqExtremities_and_fixCoordinates.pl b/annotation/Tools/Util/gff/gff3_sp_clipN_seqExtremities_and_fixCoordinates.pl deleted file mode 100755 index b2068c807..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_clipN_seqExtremities_and_fixCoordinates.pl +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use Bio::SeqIO ; -use Bio::DB::Fasta; -use Bio::Tools::GFF; -use File::Basename; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2017 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $opt_gfffile; -my $opt_fastafile; -my $opt_output_fasta; -my $opt_output_gff; -my $opt_help; -my $width = 60; # line length printed - -# OPTION MANAGMENT -my @copyARGV=@ARGV; -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'f|fa|fasta=s' => \$opt_fastafile, - 'of=s' => \$opt_output_fasta, - 'og=s' => \$opt_output_gff, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => "$header\nFailed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); -} - -if ( (! (defined($opt_gfffile)) ) or (! (defined($opt_fastafile)) ) ){ - pod2usage( { - -message => "\nAt least 2 parametes are mandatory:\nInput reference gff file (-g); Input reference fasta file (-f)\n\n". - "Output is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 2 } ); -} - - -my $ostream; -if ($opt_output_fasta) { - $opt_output_fasta=~ s/.fasta//g; - $opt_output_fasta=~ s/.fa//g; - open(my $fh, '>', $opt_output_fasta.".fa") or die "Could not open file '$opt_output_fasta' $!"; - $ostream= Bio::SeqIO->new(-fh => $fh, -format => 'Fasta' ); -} -else{ - $ostream = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); -} - -my $gffout; -if ($opt_output_gff) { - my($opt_output_gff, $dirs, $suffix) = fileparse($opt_output_gff, (".gff",".gff1",".gff2",".gff3",".gtf",".gtf1",".gtf2",".gtf3",".txt")); #remove extension - open(my $fh, '>', $opt_output_gff.".gff3") or die "Could not open file '$opt_output_gff' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -##### MAIN #### -#### read gff file and save info in memory -###################### -### Parse GFF input # -print "Reading file $opt_gfffile\n"; -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_gfffile - }); -print "Parsing Finished\n"; -### END Parse GFF input # -######################### - -my $hash_l1_grouped = group_l1IDs_from_omniscient($hash_omniscient); - -#### read fasta -my $nbFastaSeq=0; -my $db = Bio::DB::Fasta->new($opt_fastafile); -my @ids = $db->get_all_primary_ids; -my %allIDs; # save ID in lower case to avoid cast problems -foreach my $id (@ids ){$allIDs{lc($id)}=$id;} - - -my $cpt_Nleft=0; -my $cpt_Nright=0; -my $cpt_Nboth=0; - -foreach my $seq_id (@ids ){ - my $seqObject = $db->get_Seq_by_id($seq_id); - my $seq = $seqObject->seq; - - my @letters = split (//,$seq); - - ################ - # look at N at the beginning of the sequence - my $nb_N_start = 0; - foreach my $letter (@letters){ - - if ( lc($letter) eq 'n'){ - $nb_N_start++ - } - else{ - last; - } - } - - #start by N, let's remove them - if ($nb_N_start != 0){ - $seq = substr $seq, $nb_N_start; - shift_annotation($hash_omniscient, $hash_l1_grouped->{$seq_id}, $nb_N_start); - } - - - #################### - # look at N at the end of the sequence - my $nb_N_end = 0; - foreach my $letter (reverse (@letters ) ){ - if ( lc($letter) eq 'n'){ - $nb_N_end++ - } - else{ - last; - } - } - - ############## - # CLIP Ns - #start by N, let's remove them - if ($nb_N_end != 0){ - $seq = substr $seq, 0, -$nb_N_end; # -0 will remove nothing at the end - } - - if ($nb_N_end != 0 or $nb_N_start != 0){ - #create sequence object - my $header = $db->header($seq_id); - my $seqObj = Bio::Seq->new( '-format' => 'fasta' , -seq => $seq, -id => $header ); - - # print sequence object - $ostream->write_seq($seqObj); - } - else{ - $ostream->write_seq($seqObject); #original object not modified - } - - ##################### - #Handle counter to resume information - if($nb_N_end != 0){ - $cpt_Nright++; - } - if($nb_N_start != 0){ - $cpt_Nleft++; - } - if($nb_N_end != 0 and $nb_N_start != 0){ - $cpt_Nboth++; - } -} - -# print annotation whith shifter location -print_omniscient($hash_omniscient, $gffout); #print gene modified - -print "We found $cpt_Nleft sequence(s) starting with N\n"; -print "We found $cpt_Nright sequence(s) ending with N\n"; -print "We found $cpt_Nboth sequence(s) having N both extremities\n"; - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - - -######################################################################################## - -sub shift_annotation{ - my ($hash_omniscient, $list_id_l1, $nb_N_start) =@_; - - #Handle annotation - foreach my $primary_tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - - foreach my $id_tag_key_level1_raw (@$list_id_l1){ - my $id_tag_key_level1 = lc($id_tag_key_level1_raw); - if(exists ($hash_omniscient->{'level1'}{$primary_tag_l1}{$id_tag_key_level1})){ - - my $feature_level1 = $hash_omniscient->{'level1'}{$primary_tag_l1}{$id_tag_key_level1}; - # Shift position - $feature_level1->start($feature_level1->start-$nb_N_start); - $feature_level1->end($feature_level1->end-$nb_N_start); - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_l2 ( keys %{$hash_omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_l2, $id_tag_key_level1) ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_key_level1}}) { - $feature_level2; - # Shift position - $feature_level2->start($feature_level2->start-$nb_N_start); - $feature_level2->end($feature_level2->end-$nb_N_start); - - - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - ############ - # THEN ALL THE REST - foreach my $primary_tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_l3 = cds or exon or start_codon or utr etc... - if ( exists_keys( $hash_omniscient, ('level3', $primary_tag_l3, $level2_ID) ) ){ - foreach my $feature_level3 (@{$hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { - - # Shift position - $feature_level3->start($feature_level3->start-$nb_N_start); - $feature_level3->end($feature_level3->end-$nb_N_start); - } - } - } - } - } - } - } - } - } -} - -__END__ - -=head1 NAME - -This script aim to clip the N's extremities of the sequences. The annotation from the sequence clipped are modified accrodingly to stau consistent - -=head1 SYNOPSIS - - ./script.pl -g=infile.gff -f=infile.fasta [ -o outfile ] - ./script.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-f> or B<--fasta> - -Input fasta file. - -=item B<--of> - -Output fixed fasta file. If no output file is specified, the output will be -written to STDOUT. - -=item B<--og> - -Output fixed GFF file. If no output file is specified, the output will be -written to STDOUT - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_clip_UTRs.pl b/annotation/Tools/Util/gff/gff3_sp_clip_UTRs.pl deleted file mode 100755 index 40018ffa6..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_clip_UTRs.pl +++ /dev/null @@ -1,668 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $verbose = undef; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - "gff|g=s" => \$gff, - "verbose|v!" => \$verbose, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if (! $gff){ - pod2usage( { - -message => "\nAt least 1 files is mandatory:\n --gff file1\n\n", - -verbose => 0, - -exitval => 2 } ); -} - -###################### -# Manage output file # -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; -open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -my $resume_case=undef; - -###################### -### Parse GFF input # -my ($omniscient, $mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("$gff GFF3 file parsed\n"); -info_omniscient($omniscient); - - -# Quick stat hash before complement -my %quick_stat1; -foreach my $level ( ('level1', 'level2') ){ - foreach my $tag (keys %{$omniscient->{$level}}) { - my $nb_tag = keys %{$omniscient->{$level}{$tag}}; - $quick_stat1{$level}{$tag} = $nb_tag; - } -} - -######## -# Sort the genes to loop over them from the left to right. -my $sortBySeq = gather_and_sort_l1_location_by_seq_id($omniscient); - - -foreach my $locusID ( keys %{$sortBySeq}){ # tag_l1 = gene or repeat etc... - - foreach my $tag_l1 ( keys %{$sortBySeq->{$locusID}} ) { - - # Go through location from left to right ### !! - while ( @{$sortBySeq->{$locusID}{$tag_l1}} ){ - - my $location = shift @{$sortBySeq->{$locusID}{$tag_l1}}; - my $id_l1_left = $location->[0]; - - my $overlap=1; - while($overlap){ - - # Go through location from left to right ### !! - my $continue = 0; - while ( defined($continue) ){ - - # Next location - my $location2 = @{$sortBySeq->{$locusID}{$tag_l1}}[$continue]; - my $id_l1_right = $location2->[0]; - - #if overlap - if ( ($location->[1] <= $location2->[2]) and ($location->[2] >= $location2->[1])){ - print "$id_l1_left and $id_l1_right overlaps\n" if($verbose); - _check_gene_overlap_at_UTR($omniscient , $location, $location2, $verbose); #If contains UTR - - $continue++; - } - else{ - $overlap=undef; - last; - } - } - } - } - } -} - print "We fixed $resume_case case where feature has been merged within the same locus\n" if($verbose >= 1 and $resume_case); - -######## -# Print results -print_omniscient($omniscient, $gffout); - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub _check_gene_overlap_at_UTR{ - my ($hash_omniscient, $location, $location2, $verbose)=@_; - - my $gene_id = $location->[0]; - my $gene_id2 = $location2->[0]; - - #One has to have UTR - if( l1_has_l3_type($omniscient, $gene_id, 'utr', 1) or l1_has_l3_type($omniscient, $gene_id2, 'utr', 1) ){ - print "At least one of them has UTR\n" if($verbose); - - if( l1_has_l3_type($omniscient, $gene_id, 'cds') and l1_has_l3_type($omniscient, $gene_id2, 'cds') ){ - #collect extrem cds positions. - my ($gene1_cds_start, $gene1_cds_end) = get_most_right_left_cds_positions($omniscient, $gene_id); - my ($gene2_cds_start, $gene2_cds_end) = get_most_right_left_cds_positions($omniscient, $gene_id2); - #print "gene1_cds_start $gene1_cds_start gene1_cds_end $gene1_cds_end gene2_cds_start $gene2_cds_start gene2_cds_end $gene2_cds_end\n"; - - if ( ($gene1_cds_start <= $gene2_cds_end) and ($gene1_cds_end >= $gene2_cds_start)){ - print "overlap within the CDS we don't touch them...\n" if($verbose); - } - else{ - #if single exon gene is modified it can be on different strand otherwise we don't touch the UTR - #if( and is_single_exon_gene() or is_single_exon_gene()) - #print "let's go with $gene_id $gene_id2\n"; - } - } - else{ - print "At least one of them do not have CDS we don't touch them2...\n" if($verbose); - } - # my $utr_mrna1 = undef; - # my $utr_mrna2 = undef; - # print "lets go: $gene_id, $gene_id2 \n"; - - - # foreach my $l2_type (keys %{$hash_omniscient->{'level2'}} ){ - # if(exists_keys($hash_omniscient,('level2', $l2_type, $gene_id))){ - # foreach my $mrna_feature (@{$hash_omniscient->{'level2'}{$l2_type}{$gene_id}}){ - # my $righ_utrs = undef; - # my $sorted_cds = get_cds_from_l2($hash_omniscient, $mrna_feature); - # if ($sorted_cds){ #this l2 has a cds - # $righ_utrs = _get_right_utrs($hash_omniscient, $mrna_feature, $gene1_cds_end); - # } - - - # foreach my $l2_type_B (keys %{$hash_omniscient->{'level2'}} ){ - # if(exists_keys($hash_omniscient,('level2', $l2_type_B, $gene_id2))){ - # foreach my $mrna_feature_B (@{$hash_omniscient->{'level2'}{$l2_type}{$gene_id2}}){ - # my $sorted_cds_B = get_cds_from_l2($hash_omniscient, $mrna_feature_B); - # if ($sorted_cds_B){ #this l2 has a cds - # if($sorted_cds_B[0]->start <= $sorted_cds[$#sorted_cds]->end and $sorted_cds_B[$#sorted_cds_B]->end >= $sorted_cds[0]->start){ - # print "Both CDS overlap, we will not touch their UTRs\n"; - # } - # my $left_utrs = _get_left_utrs($hash_omniscient, $mrna_feature_B, $gene2_cds_start); - # if($righ_utrs and $left_utrs){ - # #-----HERE BOTH HAVE UTR----- - # my $length_utr_M1 = $righ_utrs->[$#righ_utrs]->end - $righ_utrs->[0]->start +1; - # my $length_utr_M2 = $left_utrs->[$#left_utrs]->end - $left_utrs->[0]->start + 1; - # my $separting_point = undef; - # if($length_utr_M1 > $length_utr_M2){ - # $separting_point = $M2_utr_left_start; - # } - # #print "($length_utr_M1 > $dist_between_M1_and_M2 and $length_utr_M2 > $dist_between_M1_and_M2)\n" if $verbose; - # # If $dist_between_M1_and_M2 = 1 we cannot share one nucleotide between two utr. Both should have it - # if($length_utr_M1 > $dist_between_M1_and_M2 and $length_utr_M2 > $dist_between_M1_and_M2 and $dist_between_M1_and_M2 > 1) { - # $separting_point = $M1_cds_end + int($dist_between_M1_and_M2 / 2) + 1; - # #print "separting_point $separting_point $M2_cds_start - $M1_cds_end - 1 = $dist_between_M1_and_M2 ".int($dist_between_M1_and_M2 / 2)."\n" if $verbose; - # } - # #_shrink_utr_right($separting_point) - # #_shrink_utr_left($separting_point) - # } - # elsif($righ_utrs){ - # #-----HERE left gene has UTR to his right to check - # my $separting_point = $sorted_cds_B->[0]->start; - # #_shrink_utr_right($separting_point) - # } - # elsif($left_utrs){ - # #-----HERE right gene has UTR to his left to check - # my $separting_point = $sorted_cds->[$#$sorted_cds]->end; - # #_shrink_utr_left($separting_point) - # } - # else{ - # print "None of the two features have CDS" if ($verbose); - # } - # } - # } - # } - # } - # } - # } - - } -} - - -__END__ - -############################################################ -#check if UTR right of model left is overlaping, and fix it. -sub _control_utr_from_model_left{ - my ($l2_feature_M1, $M1_cds_start, $M1_cds_end, $l2_feature_M2, $M2_cds_start, $M2_cds_end, $omniscient, $verbose)=@_; - - #################################### - #peculiar case CDS1 and CD2 overlap. We dont touch that case - if ($M1_cds_start <= $M2_cds_end and $M1_cds_end >= $M2_cds_start){ - print "peculiar case CDS1 and CD2 overlap. We dont touch that case\n"; - return; - } - - my $l2_M1_id = lc($l2_feature_M1->_tag_value('ID')); - my $l2_M2_id = lc($l2_feature_M2->_tag_value('ID')); - - ############################## - # Look at utr of the M1erence - my ($M1_utr_left_start, $M1_utr_left_end, $M1_utr_right_start, $M1_utr_right_end) = _get_utrs_extremities($l2_M1_id, $M1_cds_end, $omniscient); - if(! $M1_utr_right_start){return;} #No UTR at right of model1, nothing to modify - - ########################### - # Look at utr of the M2 - my ($M2_utr_left_start, $M2_utr_left_end, $M2_utr_right_start, $M2_utr_right_end) = _get_utrs_extremities($l2_M2_id, $M2_cds_end, $omniscient); - - - ############################## - # CHECK RIGHT UTR OF MODEL 1 # - ############################## - my $utr_redefined=undef; - - if ($M2_utr_left_start){ # UTR OVERLAP UTR. As we know M1_mrna has utr in his right (otherwise "return") if we have UTR in M2 left, it is sure they overlap beetween them - my $length_utr_M1 = $M1_utr_right_end - $M1_utr_right_start +1; - my $length_utr_M2 = $M2_utr_left_end - $M2_utr_left_start + 1; - my $dist_between_M1_and_M2 = $M2_cds_start - $M1_cds_end - 1; # /!\ CALCUL DIFFERENT - - my $separting_point = undef; - if($length_utr_M1 > $length_utr_M2){ - $separting_point = $M2_utr_left_start; - } - #print "($length_utr_M1 > $dist_between_M1_and_M2 and $length_utr_M2 > $dist_between_M1_and_M2)\n" if $verbose; - # If $dist_between_M1_and_M2 = 1 we cannot share one nucleotide between two utr. Both should have it - if($length_utr_M1 > $dist_between_M1_and_M2 and $length_utr_M2 > $dist_between_M1_and_M2 and $dist_between_M1_and_M2 > 1) { - $separting_point = $M1_cds_end + int($dist_between_M1_and_M2 / 2) + 1; - #print "separting_point $separting_point $M2_cds_start - $M1_cds_end - 1 = $dist_between_M1_and_M2 ".int($dist_between_M1_and_M2 / 2)."\n" if $verbose; - } - - if($separting_point){ - print "lets shrink the UTR M1 (separting_point=$separting_point): \n"; - foreach my $l3_type (keys %{$omniscient->{'level3'}} ){ - if ($l3_type =~ 'utr' or $l3_type =~ 'exon'){ #lets shrink it - if(exists_keys($omniscient,('level3', $l3_type, lc($l2_M1_id)))){ - my @new_list_of_feature3; - foreach my $feature_l3 ( sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$l3_type}{lc($l2_M1_id)}} ){ - if($feature_l3->start() >= $separting_point){ #feature is completely in the cds we remove it - print "we throw the feature case1A ".$feature_l3->gff_string."\n" if $verbose; - } - elsif ($feature_l3->start() <= $separting_point and $feature_l3->end() >= $separting_point){ - print "we modfify the feature case1: ".$feature_l3->gff_string."\n" if $verbose; - $feature_l3->end($separting_point-1); - print "after modification ".$feature_l3->gff_string."\n" if $verbose; - push(@new_list_of_feature3, $feature_l3); - } - else{ #feature not concerned we keep it - push(@new_list_of_feature3, $feature_l3); - } - } - if (scalar @new_list_of_feature3 > 0){ - @{$omniscient->{'level3'}{$l3_type}{lc($l2_M1_id)}}=@new_list_of_feature3; - } - else{ - print "delete it\n"; - delete $omniscient->{'level3'}{$l3_type}{lc($l2_M1_id)}; - #clean_from_l3($omniscient, $l3_type, $l2_feature_M1); - } - } - } - } - $utr_redefined=1; - } - } - - elsif ($M1_utr_right_start <= $M2_cds_end and $M1_utr_right_end >= $M2_cds_start){ #overlap CDS - print "shrink_right utr of model 1\n"; - foreach my $l3_type (keys %{$omniscient->{'level3'}} ){ - if ($l3_type =~ 'utr' or $l3_type =~ 'exon'){ #lets shrink it - if(exists_keys($omniscient,('level3', $l3_type, lc($l2_M1_id)))){ - my @new_list_of_feature3; - foreach my $feature_l3 ( sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$l3_type}{lc($l2_M1_id)}} ){ - if($feature_l3->start() >= $M2_cds_start){ #feature is completely in the cds we remove it - print "we throw the feature case1B ".$feature_l3->gff_string."\n" if $verbose; - } - elsif ($feature_l3->start() <= $M2_cds_end and $feature_l3->end() >= $M2_cds_start){ - print "we modfify the feature case1: ".$feature_l3->gff_string."\n" if $verbose; - $feature_l3->end($M2_cds_start-1); - push(@new_list_of_feature3, $feature_l3); - } - else{ #feature not concerned we keep it - push(@new_list_of_feature3, $feature_l3); - } - } - if (scalar @new_list_of_feature3 > 0){ - @{$omniscient->{'level3'}{$l3_type}{lc($l2_M1_id)}}=@new_list_of_feature3; - } - else{ - print "delete it\n"; - clean_from_l3($omniscient, $l3_type, $l2_feature_M1); - } - } - } - } - $utr_redefined=1; - } - else{ #utr left de not overlap => continue - print "Interesting, right utr overlap but not in UTR. Probably several features where overlaping and this one where more right than the other already studied \n" if $verbose; - } - - - ####################################### - # control sanity l2 and l1 location after having modified l3 location - if($utr_redefined){ - check_mrna_positions($l2_feature_M1, $omniscient->{'level3'}{'exon'}{lc($l2_M1_id)}, $verbose); - # print "mrna_id2 $mrna_id2".$mrna_feature2_clean->gff_string."\n"; - # check gene feature extremities - my $l1_M1_id = $l2_feature_M1->_tag_value('Parent'); - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - if(exists_keys($omniscient,('level1', $tag_level1, lc($l1_M1_id)))){ - check_level1_positions($omniscient, $omniscient->{'level1'}{$tag_level1}{lc($l1_M1_id)}, $verbose); - } - } - } -} - - - -############################################################ -#check if UTR left of model right is overlaping, and fix it. -sub _control_utr_from_model_right{ - my ($l2_feature_M1, $M1_cds_start, $M1_cds_end, $l2_feature_M2, $M2_cds_start, $M2_cds_end, $omniscient, $verbose)=@_; - - print "_control_utr_left_from_model_right\n"; - - #################################### - #peculiar case CDS1 and CD2 overlap. We dont touch that case - if ($M1_cds_start <= $M2_cds_end and $M1_cds_end >= $M2_cds_start){ - print "peculiar case CDS1 and CD2 overlap. We dont touch that case\n"; - return; - } - - my $l2_M1_id = lc($l2_feature_M1->_tag_value('ID')); - my $l2_M2_id = lc($l2_feature_M2->_tag_value('ID')); - - ############################## - # Look at utr of the M1 - my ($M1_utr_left_start, $M1_utr_left_end, $M1_utr_right_start, $M1_utr_right_end) = _get_utrs_extremities($l2_M1_id, $M1_cds_end, $omniscient); - - ########################### - # Look at utr of the M2 - my ($M2_utr_left_start, $M2_utr_left_end, $M2_utr_right_start, $M2_utr_right_end) = _get_utrs_extremities($l2_M2_id, $M2_cds_end, $omniscient); - if(! $M2_utr_left_start){return;} #No UTR at left of model2, nothing to modify - - ############################## - # CHECK RIGHT UTR OF MODEL 1 # - ############################## - my $utr_redefined=undef; - - ###############analysis cases - if ($M1_utr_right_start){ # UTR OVERLAP UTR. As we know M1_mrna has utr in his right (otherwise "return") if we have UTR in M2 left, it is sure they overlap beetween them - print "case1 ?\n"; - my $length_utr_M1 = $M1_utr_right_end - $M1_utr_right_start +1; - my $length_utr_M2 = $M2_utr_left_end - $M2_utr_left_start + 1; - my $dist_between_M1_and_M2 = $M2_cds_start - $M1_cds_end - 1; # /!\ CALCUL DIFFERENT - - my $separting_point = undef; - if($length_utr_M2 > $length_utr_M1){ - $separting_point = $M1_utr_right_end; - } - #print "($length_utr_M1 > $dist_between_M1_and_M2 and $length_utr_M2 > $dist_between_M1_and_M2)\n" if $verbose; - # If $dist_between_M1_and_M2 = 1 we cannot share one nucleotide between two utr. Both should have it - if($length_utr_M1 > $dist_between_M1_and_M2 and $length_utr_M2 > $dist_between_M1_and_M2 and $dist_between_M1_and_M2 > 1) { - $separting_point = $M1_cds_end + int($dist_between_M1_and_M2 / 2) + 1; - #print "separting_point $separting_point\n" if $verbose; - } - - if($separting_point){ - print "lets shrink the UTR M1: \n"; - foreach my $l3_type (keys %{$omniscient->{'level3'}} ){ - if ($l3_type =~ 'utr' or $l3_type =~ 'exon'){ #lets shrink it - if(exists_keys($omniscient,('level3', $l3_type, lc($l2_M2_id)))){ - my @new_list_of_feature3; - foreach my $feature_l3 ( sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$l3_type}{lc($l2_M2_id)}} ){ - if($feature_l3->end() <= $separting_point){ - print "we throw the feature caseX1 ".$feature_l3->gff_string."\n" if $verbose; - } - elsif ($feature_l3->start() <= $separting_point and $feature_l3->end() >= $separting_point){ - print "we modfify the feature caseX1: ".$feature_l3->gff_string."\n" if $verbose; - $feature_l3->start($separting_point+1); - push(@new_list_of_feature3, $feature_l3); - } - else{ #feature not concerned we keep it - push(@new_list_of_feature3, $feature_l3); - } - } - if (scalar @new_list_of_feature3 > 0){ - @{$omniscient->{'level3'}{$l3_type}{lc($l2_M2_id)}}=@new_list_of_feature3; - } - else{ - print "delete it\n"; - clean_from_l3($omniscient, $l3_type, $l2_feature_M2); - } - } - } - } - $utr_redefined=1; - } - } - elsif ($M2_utr_left_start <= $M1_cds_end and $M2_utr_left_end >= $M1_cds_start){ #overlap CDS - print "case2 ?\n"; - #shrink_left_utr of model 2 - foreach my $l3_type (keys %{$omniscient->{'level3'}} ){ - if ($l3_type =~ 'utr' or $l3_type =~ 'exon'){ #lets shrink it - if(exists_keys($omniscient,('level3', $l3_type, lc($l2_M2_id)))){ - my @new_list_of_feature3; - foreach my $feature_l3 ( sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$l3_type}{lc($l2_M2_id)}} ){ - print "$l3_type ".$feature_l3->start()."<= $M1_cds_end and ".$feature_l3->end()." >= $M1_cds_end \n"; - if($feature_l3->end() <= $M1_cds_end){ - print "we throw the feature case1C ".$feature_l3->gff_string."\n" if $verbose; - } - elsif ($feature_l3->start() <= $M1_cds_end and $feature_l3->end() >= $M1_cds_end){ - print "we modfify the feature case1: ".$feature_l3->gff_string."\n" if $verbose; - $feature_l3->start($M1_cds_end+1); - push(@new_list_of_feature3, $feature_l3); - } - else{ #feature not concerned we keep it - push(@new_list_of_feature3, $feature_l3); - } - } - if (scalar @new_list_of_feature3 > 0){ - @{$omniscient->{'level3'}{$l3_type}{lc($l2_M2_id)}}=@new_list_of_feature3; - } - else{ - print "delete it\n"; - clean_from_l3($omniscient, $l3_type, $l2_feature_M2); - } - } - } - } - $utr_redefined=1; - } - else{ #utr left de not overlap => continue - print "Interesting2, right utr overlap but not in UTR. Probably several features where overlaping and this one where more right than the other already studied \n" if $verbose; - } - - ####################################### - # control sanity l2 and l1 location after having modified l3 location - if($utr_redefined){ - check_mrna_positions($l2_feature_M2, $omniscient->{'level3'}{'exon'}{lc($l2_M2_id)}, $verbose); - # print "mrna_id2 $mrna_id2".$mrna_feature2_clean->gff_string."\n"; - # check gene feature extremities - my $l1_M2_id = $l2_feature_M2->_tag_value('Parent'); - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - if(exists_keys($omniscient,('level1', $tag_level1, lc($l1_M2_id)))){ - check_level1_positions($omniscient, $omniscient->{'level1'}{$tag_level1}{lc($l1_M2_id)}, $verbose); - } - } - } -} - -sub _get_cds_location{ - my ($l2_feature, $omniscient)=@_; - - my $cds_start = undef; - my $cds_end = undef; - my $l2_id = $l2_feature->_tag_value('ID'); - if(exists_keys($omniscient,('level3', 'cds', lc($l2_id)))){ - sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{'cds'}{lc($l2_id)}}; - $cds_start = $omniscient->{'level3'}{'cds'}{lc($l2_id)}[0]->start; #first element of the array - $cds_end = $omniscient->{'level3'}{'cds'}{lc($l2_id)}[$#{$omniscient->{'level3'}{'cds'}{lc($l2_id)}}]->end; #last element of the array - } - else{ - print "unextpected the feature".$l2_feature->gff_string." doesnt have cds while it has a utr...!\n"; exit; - } - return $cds_start, $cds_end; -} - -sub _get_utrs_extremities{ - my ($l2_id, $cds_end, $omniscient)=@_; - my $verbose = undef; - my $utr_left_start=undef; - my $utr_left_end=undef; - my $utr_right_start=undef; - my $utr_right_end=undef; - - foreach my $l3_type (keys %{$omniscient->{'level3'}} ){ - print "$l3_type\n" if $verbose; - if ($l3_type =~ 'utr'){ - - if(exists_keys($omniscient,('level3', $l3_type, lc($l2_id)))){ - print "exists_keys $l3_type for $l2_id\n" if $verbose; - sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$l3_type}{lc($l2_id)}}; - - my $utr_start = $omniscient->{'level3'}{$l3_type}{lc($l2_id)}[0]->start; #first element of the array - my $utr_end = $omniscient->{'level3'}{$l3_type}{lc($l2_id)}[$#{$omniscient->{'level3'}{$l3_type}{lc($l2_id)}}]->end; #last element of the array - - #check with utr is that 5' or 3' - if($utr_start > $cds_end ){ - $utr_right_start = $utr_start ; - $utr_right_end = $utr_end; - } - else{ - $utr_left_start = $utr_start ; - $utr_left_end = $utr_end; - } - } - } - } - return $utr_left_start, $utr_left_end, $utr_right_start, $utr_right_end; -} - -sub clean_from_l3{ - my ($omniscient, $l3_type, $l2_feature) = @_; - - my $l2_id = lc($l2_feature->_tag_value('ID')); - - #Clean l3 - delete $omniscient->{'level3'}{$l3_type}{l2_id}; - - #clean l2 - foreach my $l3_type (keys %{$omniscient->{'level3'}} ){ - if(exists_keys($omniscient,('level3', $l3_type, $l2_id))){ - return; - } - } - my $l2_parent = lc($l2_feature->_tag_value('Parent')); - foreach my $l2_type (keys %{$omniscient->{'level2'}} ){ - if(exists_keys($omniscient,('level2', $l2_type, $l2_parent))){ - print Dumper($omniscient->{'level2'}{$l2_type}{$l2_parent}); - foreach my $feature (@{$omniscient->{'level2'}{$l2_type}{$l2_parent}}){ - my $id = lc($feature->_tag_value('ID')); - if ($id eq $l2_id){ - print "removing $l2_id\n"; - delete $omniscient->{'level2'}{$l2_type}{$l2_parent}; - #remove l1 - if ($#{$omniscient->{'level2'}{$l2_type}{$l2_parent}} == 0){ - foreach my $tag (keys %{$omniscient->{'level1'}}){ - if(exists_keys($omniscient,('level1', $tag, $l2_parent))){ - delete $omniscient->{'level1'}{$tag}{$l2_parent}; - } - } - } - } - } - } - } -} - -sub _get_right_utrs{ - my ($hash_omniscient, $l2_feature, $cds_end) = @_; - - my $l2_id = lc($l2_feature->_tag_value('ID')); - - foreach my $tag_l3 (keys %{$omniscient->{'level3'}}){ - if($tag_l3 =~ "utr"){ - if (exists_keys ($omniscient, ('level2', $tag_l3, $l2_id) ) ){ - my @sorted_utr = sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$tag_l3}{$l2_id}}; - if ($sorted_utr[0]->start > $cds_end) - return \@sorted_utr; - } - } - } - } - return undef; -} - -sub _get_left_utrs{ - my ($hash_omniscient, $l2_feature, $cds_start) = @_; - - my $l2_id = lc($l2_feature->_tag_value('ID')); - - foreach my $tag_l3 (keys %{$omniscient->{'level3'}}){ - if($tag_l3 =~ "utr"){ - if (exists_keys ($omniscient, ('level2', $tag_l3, $l2_id) ) ){ - my @sorted_utr = sort {$a->start <=> $b->start} @{$omniscient->{'level3'}{$tag_l3}{$l2_id}}; - if ($sorted_utr[$#sorted_utr]->end < $cds_start) - return \@sorted_utr; - } - } - } - } - return undef; -} - -__END__ - -=head1 NAME - -gff3_sp_clip_UTRs.pl - -This script focuses on UTR and it aims at cleaning overpredicted UTRs (e.g when annotating with RNAseq unstranded in Fungi). It will clip the left/right UTRs to avoid overlaps with other UTR/cds. -The only case where a UTR overlaping with something is not clipped, it is when the CDS of the reference mRNA is overlaping the CDS of the neighbor mRNA investigated. - - -=head1 SYNOPSIS - - ./gff3_sp_clip_UTRs.pl --gff annotation.gff --out=outFile - ./gff3_sp_clip_UTRs.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-g> - -Input GFF3 file(s) used as M1erence. - - -=item B<--out>, B<--output>, B<--outfile> or B<-o> - -Output gff3 containing the M1erence annotation with all the non-overlapping newly added genes from addfiles.gff. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_complement_annotations.pl b/annotation/Tools/Util/gff/gff3_sp_complement_annotations.pl deleted file mode 100755 index 27b6a1df3..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_complement_annotations.pl +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $outfile = undef; -my @opt_files; -my $ref = undef; -my $size_min = 0; -my $help= 0; - -# OPTION MANAGMENT -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "ref|r|i=s" => \$ref, - "add|a=s" => \@opt_files, - "size_min|s=i" => \$size_min, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if (! $ref or ! @opt_files ){ - pod2usage( { - -message => "\nAt least 2 files are mandatory:\n --ref file1 --add file2\n\n", - -verbose => 0, - -exitval => 2 } ); -} - -###################### -# Manage output file # -my $gffout; -if ($outfile) { -open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # - -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $ref - }); -print ("$ref GFF3 file parsed\n"); -info_omniscient($hash_omniscient); - -#Add the features of the other file in the first omniscient. It takes care of name to not have duplicates -foreach my $next_file (@opt_files){ - my ($hash_omniscient2, $hash_mRNAGeneLink2) = slurp_gff3_file_JD({ input => $next_file - }); - print ("$next_file GFF3 file parsed\n"); - info_omniscient($hash_omniscient2); - - ################################ - # First rename ID to be sure to not add feature with ID already used - rename_ID_existing_in_omniscient($hash_omniscient, $hash_omniscient2); - print ("\n$next_file IDs checked and fixed.\n"); - - - # Quick stat hash before complement - my %quick_stat1; - foreach my $level ( ('level1', 'level2') ){ - foreach my $tag (keys %{$hash_omniscient->{$level}}) { - my $nb_tag = keys %{$hash_omniscient->{$level}{$tag}}; - $quick_stat1{$level}{$tag} = $nb_tag; - } - } - - ####### COMPLEMENT ####### - complement_omniscients($hash_omniscient, $hash_omniscient2, $size_min); - print ("\nComplement done !\n"); - - - #RESUME COMPLEMENT - my $complemented=undef; - # Quick stat hash after complement - my %quick_stat2; - foreach my $level ( ('level1', 'level2') ){ - foreach my $tag (keys %{$hash_omniscient->{$level}}) { - my $nb_tag = keys %{$hash_omniscient->{$level}{$tag}}; - $quick_stat2{$level}{$tag} = $nb_tag; - } - } - - #About tag from hash1 added which exist in hash2 - foreach my $level ( ('level1', 'level2') ){ - foreach my $tag (keys %{$quick_stat1{$level}}){ - if ($quick_stat1{$level}{$tag} != $quick_stat2{$level}{$tag} ){ - print "We added ".($quick_stat2{$level}{$tag}-$quick_stat1{$level}{$tag})." $tag(s)\n"; - $complemented=1; - } - } - } - #About tag from hash2 added which dont exist in hash1 - foreach my $level ( ('level1', 'level2') ){ - foreach my $tag (keys %{$quick_stat2{$level}}){ - if (! exists $quick_stat1{$level}{$tag} ){ - print "We added ".$quick_stat2{$level}{$tag}." $tag(s)\n"; - $complemented=1; - } - } - } - #If nothing added - if(! $complemented){ - print "\nNothing has been added\n"; - } - else{ - print "\nNow the data contains:\n"; - info_omniscient($hash_omniscient); - } -} - -######## -# Print results -print_omniscient($hash_omniscient, $gffout); - -#END -print "usage: $0 @copyARGV\n"; -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -__END__ - -=head1 NAME - -gff3_sp_complement_annotations.pl - -This script allow to complement a reference annotation with other annotations. -A l1 feature from the addfile.gff that does not overlap a l1 feature from the reference annotation will be added. -A l1 feature from the addfile.gff without a CDS that overlaps a l1 feature with a CDS from the reference annotation will be added. -A l1 feature from the addfile.gff with a CDS that overlaps a l1 feature without a CDS from the reference annotation will be added. -A l1 feature from the addfile.gff with a CDS that overlaps a l1 feature with a CDS from the reference annotation will be added only if the CDSs don't overlap. -A l1 feature from the addfile.gff without a CDS that overlaps a l1 feature without a CDS from the reference annotation will be added only if none of the l3 features overlap. -/!\ It is sufficiant that only one isoform is overlapping to prevent the whole gene (l1 feature) from the addfile.gff to be added in the output. - - -=head1 SYNOPSIS - - ./gff3_sp_complement_annotations.pl --ref annotation_ref.gff --add=addfile1.gff --add=addfile2.gff --out=outFile - ./gff3_sp_complement_annotations.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--ref>, B<-r> or B<-i> - -Input GFF3 file(s) used as reference. - -=item B<--add> or B<-a> - -Annotation(s) file you would like to use to complement the reference annotation. You can specify as much file you want like so: -a addfile1 -a addfile2 -a addfile3 -/!\ The order you provide these files matter. Once the reference file has been complemented by file1, this new annotation becomes the new reference that will be complemented by file2 etc. -/!\ The result with -a addfile1 -a addfile2 will differ to the result from -a addfile2 -a addfile1. So, be aware of what you want if you use several addfiles. - -=item B<--size_min> or B<-s> - -Option to keep the non-overlping gene only if the CDS size (in nucleotide) is over the minimum size defined. Default = 0 that means all of them are kept. - -=item B<--out>, B<--output>, B<--outfile> or B<-o> - -Output gff3 containing the reference annotation with all the non-overlapping newly added genes from addfiles.gff. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_ensembl_output_style.pl b/annotation/Tools/Util/gff/gff3_sp_ensembl_output_style.pl deleted file mode 100755 index c9b73495c..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_ensembl_output_style.pl +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2017 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $opt_gfffile; -my $opt_comonTag=undef; -my $opt_verbose=undef; -my $opt_output; -my $opt_help = 0; - -# OPTION MANAGMENT -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'c|ct=s' => \$opt_comonTag, - 'v' => \$opt_verbose, - 'o|output=s' => \$opt_output, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2 } ); -} - -if (! defined($opt_gfffile) ){ - pod2usage( { - -message => "\nAt least 1 parameter is mandatory:\nInput reference gff file (-g).\n\n". - "Ouptut is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # - -my $gffout; -if ($opt_output) { - $opt_output=~ s/.gff//g; - open(my $fh, '>', $opt_output.".gff") or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - ##################### - # MAIN # - ##################### - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ - input => $opt_gfffile, - locus_tag => $opt_comonTag, - verbose => $opt_verbose - }); -print ("GFF3 file parsed\n"); - - -####################### -# Convert FULL standard gff3 to ensembl gff type -convert_omniscient_to_ensembl_style($hash_omniscient); - -### -# Print result -print_omniscient($hash_omniscient, $gffout); #print gene modified - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -__END__ - -=head1 NAME - -gff3_sp_alignment_output_style.pl - -This script take a normal gff3 annotation format file and convert it to gff3 ensembl format. - -=head1 SYNOPSIS - - ./gff3_sp_ensembl_output_style.pl -g infile.gff [ -o outfile ] - ./gff3_sp_ensembl_output_style --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-c> or B<--ct> - -When the gff file provided is not correcly formated and features are linked to each other by a comon tag (by default locus_tag), this tag can be provided to parse the input file correctly. - -=item B<-v> - -Verbose option to see the warning messages when parsing the gff file. - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_extract_attributes.pl b/annotation/Tools/Util/gff/gff3_sp_extract_attributes.pl deleted file mode 100755 index 1e75973e0..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_extract_attributes.pl +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use Getopt::Long; -use Pod::Usage; -use IO::File; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my %handlers; -my $gff = undef; -my $one_tsv = undef; -my $help= 0; -my $primaryTag=undef; -my $attributes=undef; -my $outfile=undef; -my $outInOne=undef; -my $doNotReportEmptyCase=undef; - -if ( !GetOptions( - "help|h" => \$help, - "gff|f=s" => \$gff, - "d!" => \$doNotReportEmptyCase, - "m|merge!" => \$one_tsv, - "p|t|l=s" => \$primaryTag, - "attributes|a|att=s" => \$attributes, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -# If one output file we can create it here -if($one_tsv){ - if ($outfile) { - open($outInOne, '>', $outfile) or die "Could not open file $outfile $!"; - } - else{ - $outInOne->fdopen( fileno(STDOUT), 'w' ); - } -} - -# Manage $primaryTag -my @ptagList; -if(! $primaryTag or $primaryTag eq "all"){ - print "We will work on attributes from all features\n"; - push(@ptagList, "all"); -}elsif($primaryTag =~/^level[123]$/){ - print "We will work on attributes from all the $primaryTag features\n"; - push(@ptagList, $primaryTag); -}else{ - @ptagList= split(/,/, $primaryTag); - foreach my $tag (@ptagList){ - if($tag =~/^level[123]$/){ - print "We will work on attributes from all the $tag features\n"; - } - else{ - print "We will work on attributes from $tag feature.\n"; - } - } -} - -# Manage attributes if given -### If attributes given, parse them: -my @attListOk; -if ($attributes){ - my @attList = split(/,/, $attributes); # split at comma as separated value - - foreach my $attribute (@attList){ - push @attListOk, $attribute; - print "$attribute attribute will be processed.\n"; - - } - print "\n"; -} - - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - - -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - - my $feature_l1=$hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - - manage_attributes($feature_l1, 'level1', \@ptagList,\@attListOk); - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - - manage_attributes($feature_l2,'level2',, \@ptagList,\@attListOk); - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$tag_l3}{$level2_ID} ) ){ - foreach my $feature_l3 ( @{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}) { - manage_attributes($feature_l3, 'level3', \@ptagList,\@attListOk); - } - } - } - } - } - } - } -} -#print "We added $nbNameAdded Name attributes\n"; - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub manage_attributes{ - my ($feature, $level, $ptagList, $attListOk)=@_; - - my $primary_tag=$feature->primary_tag; - - # check primary tag (feature type) to handle - foreach my $ptag (@$ptagList){ - - if($ptag eq "all"){ - tag_from_list($feature,$attListOk); - } - elsif(lc($ptag) eq $level){ - tag_from_list($feature,$attListOk); - } - elsif(lc($ptag) eq lc($primary_tag) ){ - tag_from_list($feature,$attListOk); - } - } -} - -sub tag_from_list{ - my ($feature, $attListOk)=@_; - - my $tags_string = undef; - foreach my $att ( @{$attListOk} ){ - - # create handler if needed (on the fly) - if (! $one_tsv){ - if(! exists ( $handlers{$att} ) ) { - my $out = IO::File->new(); - if ($outfile) { - $outfile=~ s/.gff//g; - open($out, '>', $outfile."_".$att.".txt") or die "Could not open file '$outfile'_'$att.txt' $!"; - } - else{ - $out->fdopen( fileno(STDOUT), 'w' ); - } - $handlers{$att}=$out; - } - } - - - if ($feature->has_tag($att)){ - - # get values of the attribute - my @values = $feature->get_tag_values($att); - - # print values of one attribute per file - if (! $one_tsv){ - my $out = $handlers{$att}; - print $out join(",", @values), "\n"; - } - else{ # put everything in one tsv - $tags_string .= join(",", @values)."\t"; - } - } - else{ - if (! $one_tsv){ - my $out = $handlers{$att}; - print $out ".\n" if (! $doNotReportEmptyCase); - } - else{ # put everything in one tsv - if (! $doNotReportEmptyCase){ - $tags_string .= ".\t"; - } - else{ - $tags_string .= "\t"; - } - } - } - } - if($tags_string){ - chop $tags_string; - print $outInOne $tags_string."\n"; - } -} - - -__END__ - - -# while( my $feature = $gffio->next_feature()) { - -# #manage handler -# my $source_tag = lc($feature->source_tag); -# if(! exists ( $handlers{$source_tag} ) ) { - -# open(my $fh, '>', $splitedData_dir."/".$source_tag.".gff") or die "Could not open file '$source_tag' $!"; -# my $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -# $handlers{$source_tag}=$gffout; -# } - -# my $gffout = $handlers{$source_tag}; -# $gffout->write_feature($feature); -# } - -=head1 NAME - -gff3_extract_attributes.pl - -The script take a gff3 file as input. - -The script allows to extract choosen attributes of all or specific feature types. -The 9th column of a gff/gtf file contains a list of attributes. An attribute (gff3) is like that tag=value - -=head1 SYNOPSIS - - ./gff3_extract_attributes.pl -gff file.gff -att locus_tag,product,name -p level2,cds,exon [ -o outfile ] - ./gff3_extract_attributes.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<-p>, B<-t> or B<-l> - -primary tag option, case insensitive, list. Allow to specied the feature types that will be handled. -You can specified a specific feature by given its primary tag name (column 3) as: cds, Gene, MrNa -You can specify directly all the feature of a particular level: - level2=mRNA,ncRNA,tRNA,etc - level3=CDS,exon,UTR,etc -By default all feature are taking in account. fill the option by the value "all" will have the same behaviour. - -=item B<--attributes>, B<--att>, B<-a> - -Attributes specified, will be extracted from the feature type specified by the option p (primary tag). List of attributes must be coma separated. -/!\\ You must use "" if name contains spaces. - -=item B<--merge> or B<-m> - -By default the values of each attribute tag is writen in its dedicated file. To write the values of all tags in only one file use this option. - -=item B<-d> -By default when an attribute is not found for a feature, a dot (.) is reported. If you don't want anything to be printed in such case use this option. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_extract_sequences.pl b/annotation/Tools/Util/gff/gff3_sp_extract_sequences.pl deleted file mode 100755 index be27ebb72..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_extract_sequences.pl +++ /dev/null @@ -1,687 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Pod::Usage; -use Clone 'clone'; -use Getopt::Long; -use Sort::Naturally; -use Bio::SeqIO; -use Bio::DB::Fasta; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $DONOTREVCOMP = undef; -my $start_run = time(); -my $codonTable=1; -my $opt_gfffile; -my $opt_fastafile; -my $opt_output; -my $opt_AA=undef; -my $opt_help = 0; -my $opt_full=undef; -my $opt_split=undef; -my $opt_extremity_only=undef; -my $opt_upstreamRegion=undef; -my $opt_downRegion=undef; -my $opt_cdna=undef; -my $opt_OFS=undef; -my $opt_type = 'cds'; -my $opt_cleanFinalStop=undef; -my $opt_cleanInternalStop=undef; -my $quiet = undef; - -# OPTION MANAGMENT -my @copyARGV=@ARGV; -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'f|fa|fasta=s' => \$opt_fastafile, - 't=s' => \$opt_type, - 'ofs=s' => \$opt_OFS, - 'protein|p|aa' => \$opt_AA, - 'cdna' => \$opt_cdna, - 'cfs' => \$opt_cleanFinalStop, - 'cis' => \$opt_cleanInternalStop, - 'full!' => \$opt_full, - 'split!' => \$opt_split, - 'eo!' => \$opt_extremity_only, - 'dnrc!' => \$DONOTREVCOMP, - 'table|codon|ct=i' => \$codonTable, - 'up|5|five|upstream=i' => \$opt_upstreamRegion, - 'do|3|three|down|downstream=i' => \$opt_downRegion, - 'o|output=s' => \$opt_output, - 'q|quiet!' => \$quiet, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => "$header\nFailed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} -# shortcut for cdna -if($opt_cdna){$opt_type="exon";} - -# Print Help and exit -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); -} - -if ( (! (defined($opt_gfffile)) ) or (! (defined($opt_fastafile)) ) ){ - pod2usage( { - -message => "\nAt least 2 parametes are mandatory:\nInput reference gff file (-g); Input reference fasta file (-f)\n\n". - "Output is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 2 } ); -} - -if( $opt_full and $opt_split) -{print "Options --full and --split cannot be used concomitantly. Please read the help\n"; exit;} - -my $ostream; -if ($opt_output) { - open(my $fh, '>', $opt_output) or die "Could not open file '$opt_output' $!"; - $ostream= Bio::SeqIO->new(-fh => $fh, -format => 'Fasta' ); -} -else{ - $ostream = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); -} - -print "We will extract the $opt_type sequences.\n"; -$opt_type=lc($opt_type); - -if($codonTable<0 and $codonTable>25){ - print "$codonTable codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} - -my $OFS=" "; -if($opt_OFS){ - $OFS = $opt_OFS; -} - -##### MAIN #### -#### read gff file and save info in memory -###################### -### Parse GFF input # -print "Reading file $opt_gfffile\n"; -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_gfffile - }); -print "Parsing Finished\n"; -### END Parse GFF input # -######################### - -my $hash_l1_grouped = group_l1features_from_omniscient($hash_omniscient); - -#### read fasta -my $nbFastaSeq=0; -my $db = Bio::DB::Fasta->new($opt_fastafile); -my @ids = $db->get_all_primary_ids; -my %allIDs; # save ID in lower case to avoid cast problems -foreach my $id (@ids ){$allIDs{lc($id)}=$id;} - - -print ("Genome fasta parsed\n"); - -foreach my $seqname (sort { (($a =~ /(\d+)$/)[0] || 0) <=> (($b =~ /(\d+)$/)[0] || 0) } keys %{$hash_l1_grouped}) { - - foreach my $feature_l1 ( sort { ncmp ($a->start.$a->end.$a->_tag_value('ID'), $b->start.$b->end.$b->_tag_value('ID') ) } @{$hash_l1_grouped->{$seqname}}) { - - my $id_l1=$feature_l1->_tag_value('ID'); - my $name=undef; - - if ($feature_l1->has_tag('Name')){ - $name = $feature_l1->_tag_value('Name'); - } - elsif($feature_l1->has_tag('gene')){ - $name = $feature_l1->_tag_value('gene'); - } - - if( $opt_type eq lc($feature_l1->primary_tag()) or $opt_type eq "l1" or $opt_type eq "level1" ){ - - #Handle Header - my $id_seq = clean_string($id_l1); - my $description=""; - if($name){ - $description.=clean_tag("name=").clean_string($name).$OFS.clean_tag("seq_id=").clean_string($seqname).$OFS.clean_tag("type=").clean_string($opt_type); - } - else{ - $description.=clean_tag("seq_id=").clean_string($seqname).$OFS.clean_tag("type=").clean_string($opt_type); - } - - my @ListSeq=($feature_l1); - extract_sequences(\@ListSeq, $db, $id_seq, $description, $opt_full, $opt_upstreamRegion, $opt_downRegion, $opt_split, $opt_extremity_only, 'level1'); - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $ptag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$ptag_l2}{lc($id_l1)} ) ){ - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$ptag_l2}{lc($id_l1)}}) { - - #For Header - my $id_l2 = $feature_l2->_tag_value('ID'); - if ($feature_l2->has_tag('Name') and ! $name){ - $name = $feature_l2->_tag_value('Name'); - } - elsif($feature_l2->has_tag('gene') and ! $name){ - $name = $feature_l2->_tag_value('gene'); - } - - #Handle Header - my $id_seq = clean_string($id_l2); - my $description=clean_tag("gene=").clean_string($id_l1); - if($name){ - $description.=$OFS.clean_tag("name=").clean_string($name); - } - - $description.=$OFS.clean_tag("seq_id=").clean_string($seqname).$OFS.clean_tag("type=").clean_string($opt_type); - - if( $opt_type eq $ptag_l2 or $opt_type eq "l2" or $opt_type eq "level2" ){ - my @ListSeq=($feature_l2); - extract_sequences(\@ListSeq, $db, $id_seq, $description, $opt_full, $opt_upstreamRegion, $opt_downRegion, $opt_split, $opt_extremity_only, 'level2'); - } - - ################# - # == LEVEL 3 == # - ################# - foreach my $ptag_l3 (keys %{$hash_omniscient->{'level3'}}){ - if ( exists ($hash_omniscient->{'level3'}{$ptag_l3}{lc($id_l2)} ) ){ - - if( $opt_type eq $ptag_l3 or $opt_type eq "l3" or $opt_type eq "level3" ){ - extract_sequences(\@{$hash_omniscient->{'level3'}{$ptag_l3}{lc($id_l2)}}, $db, $id_seq, $description, $opt_full, $opt_upstreamRegion, $opt_downRegion, $opt_split, $opt_extremity_only, 'level3'); - } - } - } - } - } - } - } -} - -#END -print "usage: $0 @copyARGV\n"; - -if($opt_upstreamRegion and $opt_downRegion){ - print "$nbFastaSeq $opt_type converted in fasta with $opt_upstreamRegion upstream nucleotides and $opt_downRegion downstream nucleotides.\n"; -} -elsif($opt_upstreamRegion){ - print "$nbFastaSeq $opt_type converted in fasta with $opt_upstreamRegion upstream nucleotides.\n"; -} -elsif($opt_downRegion){ - print "$nbFastaSeq $opt_type converted in fasta with $opt_downRegion downstream nucleotides.\n"; -} -else{ - print "$nbFastaSeq $opt_type converted in fasta.\n"; -} - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub clean_string{ - my ($string) = @_; - - my $replaceBy = "_"; - if($OFS eq "_"){$replaceBy = "-";} - - if($string =~ m/\Q$OFS/){ - if ($OFS eq " "){ - print "The string <$string> contains spaces while is is used as Output Field Separator (OFS) to create fasta header, so we have quoted it (\"string\").\n". - "If you want to keep the string/header intact, please chose another OFS using the option --ofs\n" if ! $quiet; - $string="\"".$string."\""; - } - else{ - print "The fasta header has been modified !! Indeed, the string <$string> contains the Output Field Separator (OFS) <$OFS> used to build the header, so we replace it by <$replaceBy>.". - "If you want to keep the string/header intact, please chose another OFS using the option --ofs\n" if ! $quiet; - eval "\$string =~ tr/\Q$OFS\E/\Q$replaceBy\E/"; - } - } - return $string -} - -sub clean_tag{ - my ($string) = @_; - - my $replaceBy = "_"; - if($OFS eq "="){$replaceBy = ":";} - - if($string =~ m/\Q$OFS/){ - eval "\$string =~ tr/\Q$OFS\E/\Q$replaceBy\E/"; - } - return $string -} - -sub extract_sequences{ - my($feature_list, $db, $id_seq, $description, $opt_full, $opt_upstreamRegion, $opt_downRegion, $opt_split, $opt_extremity_only, $level )=@_; - - #sort the list - my @sortedList = sort {$a->start <=> $b->start} @$feature_list; - my $seq_id = $sortedList[0]->seq_id; - #set strand, check if need to be reverse complement - my $minus = undef; - if($sortedList[0]->strand eq "-1" or $sortedList[0]->strand eq "-"){ $minus = 1; } - - - # ------ Full sequence with introns ------ - if($opt_full){ - my $start = $sortedList[0]->start; - my $end = $sortedList[$#sortedList]->end; - my $info = ""; my $right_piece = ""; my $left_piece = ""; my $sequence = ""; - - # take and append the left piece if asked for - if ( ( $opt_upstreamRegion and ! $minus ) or ( $opt_downRegion and $minus ) ){ - ($left_piece, $info) = get_left_extremity($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info); - } - - # take and append the right piece if asked for - if ( ( $opt_downRegion and !$minus ) or ( $opt_upstreamRegion and $minus ) ){ - ($right_piece, $info) = get_right_extremity($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info); - } - - # append only extremities - if($opt_extremity_only){ - $sequence = $left_piece.$right_piece; - } - else{ # append extremity to main sequence even if empty - $sequence = get_sequence($db, $seq_id, $start, $end); - $sequence = $left_piece.$sequence.$right_piece; - } - - # create object - my $seqObj = create_seqObj($sequence, $id_seq, $description, $minus, $info); - # print object - print_seqObj($ostream, $seqObj, $opt_AA, $codonTable); - } - # -------------------------------------- - - - # ------ all pieces independantly ------ - elsif($opt_split){ - - foreach my $feature ( @sortedList ){ - my $start = $feature->start; - my $end = $feature->end; - my $info = ""; my $right_piece = ""; my $left_piece = ""; my $sequence = ""; - - # take and append the left piece if asked for - if ( ( $opt_upstreamRegion and ! $minus ) or ( $opt_downRegion and $minus ) ){ - ($left_piece, $info) = get_left_extremity($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info); - } - - # take and append the right piece if asked for - if ( ( $opt_downRegion and !$minus ) or ( $opt_upstreamRegion and $minus ) ){ - ($right_piece, $info) = get_right_extremity($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info); - } - - # append only extremities - if($opt_extremity_only){ - $sequence = $left_piece.$right_piece; - } - else{ # append extremity to main sequence even if empty - $sequence = get_sequence($db, $seq_id, $start, $end); - $sequence = $left_piece.$sequence.$right_piece; - } - - my $seqObj = undef; - if($level eq 'level3' ){ #update header's id information - my $id_l3 = $feature->_tag_value('ID'); - my $updated_description="transcript=".$id_seq.$OFS.$description; - #create object - $seqObj = create_seqObj($sequence, $id_l3, $updated_description, $minus, $info); - } - else{ - $seqObj = create_seqObj($sequence, $id_seq, $description, $minus, $info); - } - - #print object - print_seqObj($ostream, $seqObj, $opt_AA, $codonTable); - } - } - # -------------------------------------- - - - # ------ Collapse spreaded features ------ - else{ - my $sequence="";my $info = ""; - - # create sequence part 1 - foreach my $feature ( @sortedList ){ - $sequence .= get_sequence($db, $feature->seq_id, $feature->start, $feature->end); - } - - # update sequence with extremities if option - if($opt_upstreamRegion or $opt_downRegion){ - my $start = $sortedList[0]->start; - my $end = $sortedList[$#sortedList]->end; - my $right_piece = ""; my $left_piece = ""; - - # take and append the left piece if asked for - if ( ( $opt_upstreamRegion and ! $minus ) or ( $opt_downRegion and $minus ) ){ - ($left_piece, $info) = get_left_extremity($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info); - } - - # take and append the right piece if asked for - if ( ( $opt_downRegion and !$minus ) or ( $opt_upstreamRegion and $minus ) ){ - ($right_piece, $info) = get_right_extremity($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info); - } - - # append only extremities - if($opt_extremity_only){ - $sequence = $left_piece.$right_piece; - } - else{ # append extremity to main sequence even if empty - $sequence = $left_piece.$sequence.$right_piece; - } - } - - #create object - my $seqObj = create_seqObj($sequence, $id_seq, $description, $minus, $info); - #print object - print_seqObj($ostream, $seqObj, $opt_AA, $codonTable); - } - # -------------------------------------- -} - - -# Get left extremity regardless if it is 5' or 3' -sub get_left_extremity{ - - my ($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info)=@_; - - if( $info ne ""){$info.=$OFS;} - - my $new_start = undef; - - if ( $minus ){ - $new_start = $start-$opt_downRegion; - # add info left it is 3' - if($new_start < 0){$info.=clean_tag("3'extra=").($start-1)."nt";} - else{$info.=clean_tag("3'extra=").$opt_downRegion."nt";} - } - else{ - - $new_start=$start-$opt_upstreamRegion; - # add info left it is 5' - if($new_start < 0){$info.=clean_tag("5'extra=").($start-1)."nt";} - else{$info.=clean_tag("5'extra=").$opt_upstreamRegion."nt";} - } - - # extract the chunck - my $sequence = ""; - if ($new_start > $start){ # Deal with neagtive value for $opt_upstreamRegion, $opt_downRegion (e.g when trying to extract the start and stop codons from a CDS or splice sites of intron feature) - $sequence = get_sequence($db, $seq_id, $start, $new_start-1); - } - else{ # Majority of cases, positive value for $opt_upstreamRegion, $opt_downRegion - $sequence = get_sequence($db, $seq_id, $new_start, $start-1); - } - - return $sequence, $info; -} - - -# Get right extremity regardless if it is 5' or 3' -sub get_right_extremity{ - my ($db, $seq_id, $opt_upstreamRegion, $opt_downRegion, $minus, $start, $end, $info)=@_; - - if( $info ne ""){$info.=$OFS;} - - my $new_end= undef; - - if ( $minus ){ - $new_end = $end+$opt_upstreamRegion; - if($end > $db->length($seq_id) ){ $info.=clean_tag("5'extra=").($db->length($seq_id)-$end)."nt" ;} - else{$info.=clean_tag("5'extra=").$opt_upstreamRegion."nt";} - } - else{ - $new_end = $end+$opt_downRegion; - # add info right it is 3' - if($new_end > $db->length($seq_id) ){$info.=clean_tag("3'extra=").$db->length($seq_id)-$end."nt" ;} - else{$info.=clean_tag("3'extra=").$opt_downRegion."nt";} - } - - # extract the chunck - my $sequence = ""; - if ($new_end < $end){ # Deal with neagtive value for $opt_upstreamRegion, $opt_downRegion (e.g when trying to extract the start and stop codons from a CDS or splice sites of intron feature) - $sequence = get_sequence($db, $seq_id, $new_end+1, $end); - } - else{ # Majority of cases, positive value for $opt_upstreamRegion, $opt_downRegion - $sequence = get_sequence($db, $seq_id, $end+1, $new_end); - } - - return $sequence, $info; -} - - -# -sub create_seqObj{ - my ($sequence, $id_seq, $description, $minus, $info)=@_; - - my $seqObj = Bio::Seq->new( '-format' => 'fasta' , -seq => $sequence); - - #check if need to be reverse complement - $seqObj=$seqObj->revcom if $minus and !$DONOTREVCOMP; - - # build description - if($info){ - $description.=$OFS.$info; - } - - # fill object with id and description - $seqObj->id($id_seq); - $seqObj->description($description); - - return $seqObj; -} - - -# extract the sequence from the DB -sub get_sequence{ - my ($db, $seq_id, $start, $end) = @_; - - my $sequence=""; - my $seq_id_correct = undef; - if( exists $allIDs{lc($seq_id)}){ - - $seq_id_correct = $allIDs{lc($seq_id)}; - - $sequence = $db->subseq($seq_id_correct, $start, $end); - - if($sequence eq ""){ - warn "Problem ! no sequence extracted for - $seq_id !\n"; exit; - } - if( length($sequence) != abs($end-$start+1) ){ - my $wholeSeq = $db->subseq($seq_id_correct); - $wholeSeq = length($wholeSeq); - warn "Problem ! The size of the sequence extracted ".length($sequence)." is different than the specified span: ".abs($end-$start+1). - ".\nThat often occurs when the fasta file does not correspond to the annotation file. Or the index file comes from another fasta file which had the same name and haven't been removed.\n". - "As last possibility your gff contains location errors (Already encountered for a Maker annotation)\n", - "Supplement information: seq_id=$seq_id ; seq_id_correct=$seq_id_correct ; start=$start ; end=$end ; $seq_id sequence length: $wholeSeq )\n"; - } - } - else{ - warn "Problem ! ID $seq_id not found !\n"; - } - - return $sequence; -} - -# Print the sequence object -sub print_seqObj{ - my($ostream, $seqObj, $opt_AA, $codonTable) = @_; - - $nbFastaSeq++; - - if($opt_AA){ #translate if asked - my $transObj = $seqObj->translate(-CODONTABLE_ID => $codonTable); - - if($opt_cleanFinalStop and $opt_cleanInternalStop){ #this case is needed to be able to remove two final stop codon in a raw when the bothotpion are activated. - my $lastChar = substr $transObj->seq(),-1,1; - my $cleanedSeq=$transObj->seq(); - if ($lastChar eq "*"){ # if last char is a stop we remove it - chop $cleanedSeq; - } - $cleanedSeq =~ tr/*/X/; #X = Any / unknown Amino Acid - $transObj->seq($cleanedSeq); - } - - elsif($opt_cleanFinalStop){ - my $lastChar = substr $transObj->seq(),-1,1; - - if ($lastChar eq "*"){ # if last char is a stop we remove it - my $cleanedSeq=$transObj->seq(); - chop $cleanedSeq; - $transObj->seq($cleanedSeq); - } - } - - elsif($opt_cleanInternalStop){ - my $lastChar = substr $transObj->seq(),-1,1; - - my $seqMinus1=$transObj->seq(); - chop $seqMinus1; - $seqMinus1 =~ tr/*/X/; #X = Any / unknown Amino Acid - my $cleanedSeq=$seqMinus1.$lastChar; - $transObj->seq($cleanedSeq); - } - - $ostream->write_seq($transObj); - } - else{ - $ostream->write_seq($seqObj); - } -} - - -__END__ - -=head1 NAME - -gff3_extract_sequences.pl - -This script extract sequence in fasta format from gff file. You can extract the fasta of any kind of feature define by the 3th column in the gff file. -The result is written to the specified output file, or to STDOUT. - -The Header are formated like that: ->mRNA_ID gene=gene_ID name=NAME seq_id=Chromosome_ID type=cds 5'extra=VALUE - ^ <----------------------------v------------------------------------> - ID description (Where the OFS can be modified) - -/!\The ID will be the gene_ID extracting gene. -Name is optional and will be written only if the Name attribute exists in th gff. -type will be the feature type extracted. -5'extra or 3'extra is otpional, according to the use of the upstream and downstream options. - -=head1 SYNOPSIS - - ./gff3_extract_sequences.pl -g=infile.gff -f=infile.fasta [ -o outfile ] - ./gff3_extract_sequences.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-f> or B<--fasta> - -Input fasta file. - -=item B<-dnrc> -dnrc means `do not reverse complemt`, by default if a feature is indicated on the minus strand, the tool will reverse complement the extrated sequence. -You can deactivate the behavior by using this option. - -=item B<-t> - -Define the feature you want to extract the sequnece from. By deafault it's 'cds'. Most common choice are: gene,mrna,exon,cds,trna,three_prime_utr,five_prime_utr. -When you chose exon (or cds,utr,etc.), all the exon related to a same L2 feature are attached together before to extract the exon. (It doesnt provide one sequence by exon !!) - -=item B<-p>, B<--protein> or B<--aa> - -Will translate the extracted sequence in Amino acid. By default the codon table used is the 1 (Standard). See codon table option for more options. - -=item B<--codon>, B<--table> or B<--ct> - -Allow to choose another type of codon table for the translation. - -=item B<--eo> - -Called ´extremity only', this option allows the extracttion of adjacent parts of a feature. This option has to be activated with -u and/or -p option. -/!\ using -u and -p together builds a chimeric sequence which will be the concatenation of the left and right extremities of a feature. - -=item B<--split> - -By default, all level3 features (exon, cds, utr) collectively linled to a level2 feature (rna, mRNA) are merge together to shape an entire feature -(e.g. several cds pieces can be merged to create the CDS in its whole). -If you wish to extract all the subfetures independantly activate tge --split option. - -=item B<--full> - -This option allows dealing with multifeature like cds or exon, to extract the full sequence from start extremity to the end extremity, i.e with introns. -Use of that option with exon will give the same result as extract the mrna sequence (-t mRNA) and corresponds to the cdna*. -(To actually extract an mRNA as it is defined biologicaly you need to use the -t exon option wihtout the --full option and wihtout the --split option) -Use of that option on cds will give the cdna* wihtout the untraslated sequences. -*Not a real cdna because it is not reversed - -=item B<-u>, B<--up>, B<-5>, B<--five> or B<-upstream> - -Integer. It will take that number of nucleotide in more at the 5' extremity. -/!\ You must activate the option "--full" if you with to extract only the most upstream part of certain feature (exon,cds,utr) -otherwise you will extract each upstream parts of the subfeatures (e.g many cds parts may be needed to shape a cds in its whole). - -=item B<-d>, B<--do>, B<-3>, B<--three>, B<-down> or B<-downstream> - -Integer. It will take that number of nucleotide in more at the 3' extremity. -/!\ You must activate the option "--full" if you with to extract only the most downstream part of certain feature (exon,cds,utr) -otherwise you will extract each downstream parts of the subfeatures (e.g many cds parts may be needed to shape a cds in its whole). - -=item B<--cdna> - -This extract the cdna* sequence (i.e transcribed sequence (devoid of introns, but containing untranslated exons)). It corresponds to extract the exons sequences. -*Not a real cdna because it is not reversed - -=item B<--ofs> - -Output Fields Separator for the description field. By default it's a space < > but can be modified by any String or character using this option. - -=item B<--cis> - -The Clean Internal Stop option allows replacing the translation of the stop codons present among the sequence that is represented by the <*> character by . Indeed the <*> character can be disturbing for many programs (e.g interproscan) - -=item B<--cfs> - -The Clean Final Stop option allows removing the translation of the final stop codons that is represented by the <*> character. This character can be disturbing for many programs (e.g interproscan) - -=item B<-o> or B<--output> - -Output fasta file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_filter_by_ORF_size.pl b/annotation/Tools/Util/gff/gff3_sp_filter_by_ORF_size.pl deleted file mode 100755 index 4f5659d0a..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_filter_by_ORF_size.pl +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use POSIX qw(strftime); -use Getopt::Long; -use Pod::Usage; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $start_run = time(); -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $codonTableId=1; -my $PROT_LENGTH = 100; -my $file_fasta=undef; -my $outfile = undef; -my $verbose = undef; -my $opt_test = undef; -my $gff = undef; -my $help= 0; - -my @copyARGV=@ARGV; -Getopt::Long::Configure ('bundling'); -if ( !GetOptions( - "help|h" => \$help, - "g|gff=s" => \$gff, - 't|test=s' => \$opt_test, - "size|s=i" => \$PROT_LENGTH, - "table|codon|ct=i" => \$codonTableId, - "v!" => \$verbose, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\n Input reference gff file (--gff)\n\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Option check -if($codonTableId<0 and $codonTableId>25){ - print "$codonTableId codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} -if($opt_test){ - if($opt_test ne "<" and $opt_test ne ">" and $opt_test ne "<=" and $opt_test ne ">=" and $opt_test ne "=" and $opt_test ne "=="){ - print "The test to apply is Wrong: $opt_test.\nWe want something among this list: <,>,<=,>=,== or =.";exit; - } -} -else{ - $opt_test = ">"; -} - -###################### -# Manage output file # -my $gffout_pass; -my $gffout_notpass; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.$opt_test.$PROT_LENGTH.".gff") or die "Could not open file '$outfile' $!"; - $gffout_pass= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout_pass= Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3 ); -} - -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile."_NOT_".$opt_test.$PROT_LENGTH.".gff") or die "Could not open file '$outfile' $!"; - $gffout_notpass= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout_notpass= Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3 ); -} - -# print usage performed -my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$stringPrint = "Launched the ".$stringPrint."\nusage: $0 @copyARGV\n"; -$stringPrint .= "We are filtering the gene with protein size $opt_test $PROT_LENGTH\n"; -print $stringPrint; - - ##################### - # MAIN # - ##################### - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - -my @good_gene_list; -my @bad_gene_list; -my $number_pass; -foreach my $primary_tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_l1 = gene or repeat etc... - foreach my $gene_id_l1 (keys %{$hash_omniscient->{'level1'}{$primary_tag_l1}}){ - my $gene_feature=$hash_omniscient->{'level1'}{$primary_tag_l1}{$gene_id_l1}; - print "Study gene $gene_id_l1\n" if($verbose); - - foreach my $primary_tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - my $there_is_cds=undef; - my $one_pass=undef; - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_l2, $gene_id_l1) ) ){ - foreach my $level2_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$gene_id_l1}}) { - - # get level2 id - my $id_level2 = lc($level2_feature->_tag_value('ID')); - - ############################## - #If it's a mRNA = have CDS. # - if ( exists ($hash_omniscient->{'level3'}{'cds'}{$id_level2} ) ){ - $there_is_cds="true"; - - ############## - # Manage CDS # - my $cds_size=0; - foreach my $cds (@{$hash_omniscient->{'level3'}{'cds'}{$id_level2}}){ - $cds_size+= ($cds->end() - $cds->start() + 1); - } - $cds_size = ($cds_size - 3) / 3; # Remove the stop codon and divide by 3 to get Amnino acid - - if(test_size($cds_size, $PROT_LENGTH, $opt_test) ){ - $one_pass="true"; - } - } - } - if($there_is_cds){ - if($one_pass){ - push(@good_gene_list, $gene_id_l1); - $number_pass++; - } - else{ - push(@bad_gene_list, $gene_id_l1); - } - } - else{ - print "No cds for $gene_id_l1\n" if ($verbose); - push(@good_gene_list, $gene_id_l1); - } - } - } - } -} - -#resume - -my $number_notpass=$#bad_gene_list+1; -print_omniscient_from_level1_id_list($hash_omniscient, \@good_gene_list, $gffout_pass); #print intact gene to the file -print_omniscient_from_level1_id_list($hash_omniscient, \@bad_gene_list, $gffout_notpass); #print intact gene to the file - -print "$number_pass genes passed the test.\n"; -print "$number_notpass genes didn't pass the test.\n"; - -# END -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub test_size{ - my ($size, $PROT_LENGTH, $operator) = @_; - - if ($operator eq ">"){ - if ($size > $PROT_LENGTH){ - return "true"; - } - } - if ($operator eq "<"){ - if ($size < $PROT_LENGTH){ - return "true"; - } - } - if ($operator eq "=" or $operator eq "=="){ - if ($size == $PROT_LENGTH){ - return "true"; - } - } - if ($operator eq "<="){ - if ($size <= $PROT_LENGTH){ - return "true"; - } - } - if ($operator eq ">="){ - if ($size >= $PROT_LENGTH){ - return "true"; - } - } - -return undef; -} - -#check if reference exists in hash. Deep infinite : hash{a} or hash{a}{b} or hash{a}{b}{c}, etc. -# usage example: exists_keys($hash_omniscient,('level3','cds',$level2_ID) -sub exists_keys { - my ($hash, $key, @keys) = @_; - - if (ref $hash eq 'HASH' && exists $hash->{$key}) { - if (@keys) { - return exists_keys($hash->{$key}, @keys); - } - return 1; - } - return ''; -} - -__END__ - - - - -=head1 NAME - -gff3_sp_filter_by_ORF_size.pl - -The script reads a gff annotation file, and create two output files, one contains the gene models with ORF passing the test, the other contains the rest. -By default the test is "> 100" that means all gene models that have ORF longer than 100 Amino acids, will pass the test. - -=head1 SYNOPSIS - - ./gff3_sp_filter_by_ORF_size.pl --gff infile.gff [ -o outfile ] - ./gff3_sp_filter_by_ORF_size.pl -h - -=head1 OPTIONS - -=over 8 - -=item B<-g> or B<--gff> - -Annotation file - -=item B<-s> or B<--size> - -ORF size to apply the test. Default 100. - -=item B<--ct> or B<--table> or B<--codon> - -This option allows specifying the codon table to use - It expects an integer (1 by default = standard) - -=item B<-t> or B<--test> -Test to apply (> < = >= <=). If you us one of these two character >, <, please don't forget to quote you parameter liket that "<=". Else your terminal will complain. -By default it will be ">" - -=item B<-v> - -Verbose. Useful for debugging purpose. Bolean - -=item B<-o> or B<--out> or B<--output> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_filter_by_locus_distance.pl b/annotation/Tools/Util/gff/gff3_sp_filter_by_locus_distance.pl deleted file mode 100755 index 6bddc98ea..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_filter_by_locus_distance.pl +++ /dev/null @@ -1,281 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use File::Basename; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $add_flag=undef; -my $opt_dist=500; -my $verbose = undef; -my $help= 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "add_flag|af!" => \$add_flag, - "d|dist=i" => \$opt_dist, - "v!" => \$verbose, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! defined($gff) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff)\n\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # -my $gffout; -if ($outfile) { - open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - -#counters -my $geneCounter_skip=0; -my $geneCounter_ok=0; -my $total=0; -my @gene_id_ok; - -my $sortBySeq = gather_and_sort_l1_location_by_seq_id($omniscient); - -foreach my $locusID ( keys %{$sortBySeq}){ # tag_l1 = gene or repeat etc... - - foreach my $tag_l1 ( keys %{$sortBySeq->{$locusID}} ) { - - # Go through location from left to right ### !! - while ( @{$sortBySeq->{$locusID}{$tag_l1}} ){ - $total++; - - #location A - my $location = shift @{$sortBySeq->{$locusID}{$tag_l1}};# This location will be updated on the fly - my $id_l1 = $location->[0]; - #print "id_l1 $id_l1\n"; - - my $continue = 1; - my $overlap = 0; - my $jump = undef; - #loop to look at potential set of overlaping genes otherwise go through only once - while ( $continue ){ - - # Next location - my $location2 = @{$sortBySeq->{$locusID}{$tag_l1}}[0]; - my $id2_l1 = $location2->[0]; - my $dist = $location2->[1] - $location->[2] + 1; - print "distance $id_l1 - id2_l1 = $dist\n" if ($verbose); - - ############################ - #deal with overlap - if ( ($location->[1] <= $location2->[2]) and ($location->[2] >= $location2->[1])){ - - if( ! $overlap){ - - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys($omniscient, ('level1', $tag_level1, lc($id_l1) ) ) ){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{lc($id_l1)}; - add_info($level1_feature, 'O', $verbose); - } - } - } - - $overlap=1; - - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys($omniscient, ('level1', $tag_level1, lc($id2_l1) ) ) ){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{lc($id2_l1)}; - add_info($level1_feature, 'O', $verbose); - } - } - - if($location2->[2] < $location->[2]){ - my $tothrow = shift @{$sortBySeq->{$locusID}{$tag_l1}};# Throw location B. We still need to use location A to check the left extremity of the next locus - #location A ------------------------- -------------------------- - #location B --------- - $total++; - next; - } - else{ - # We need to use the location B to check the left extremity of the next locus - #location A ------------------------- -------------------------- - #location B ------------------------ - $jump = 1; - last; - } - - } - # - ############################ - $continue = 0; - - - print "after overlap check\n"; - # locus distance is under minimum distance - if( $dist < $opt_dist) { - print "$dist < $opt_dist\n"; - - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys($omniscient, ('level1', $tag_level1, lc($id_l1) ) ) ){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{lc($id_l1)}; - add_info($level1_feature, 'R'.$dist, $verbose); - } - } - - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys($omniscient, ('level1', $tag_level1, lc($id2_l1) ) ) ){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{lc($id2_l1)}; - add_info($level1_feature, 'L'.$dist, $verbose); - } - } - } - - # distance with next is ok but we have to check what was the result with the previous locus - else{ - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - if (exists_keys($omniscient, ('level1', $tag_level1, lc($id_l1) ) ) ){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{lc($id_l1)}; - if(! $level1_feature->has_tag('low_dist')){ - $geneCounter_ok ++; - push @gene_id_ok, lc($id_l1); - } - } - } - } - } - } - } -} - -if($add_flag){ - print_omniscient($omniscient, $gffout); #print result -} -else{ - print_omniscient_from_level1_id_list ($omniscient, \@gene_id_ok, $gffout); #print result -} - -#END -my $string_to_print="usage: $0 @copyARGV\n". - "Results:\n". - "Total number investigated: $total\n". - "Number of skipped loci: $geneCounter_skip\n". - "Number of loci with distance to the surrounding loci over $opt_dist: $geneCounter_ok \n"; -print $string_to_print; -print "Bye Bye.\n"; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - -sub add_info{ - my ($feature, $value, $verbose)=@_; - - if($feature->has_tag('low_dist')){ - $feature->add_tag_value('low_dist', $value); - print $feature->_tag_value('ID')." add $value\n" if ($verbose); - } - else{ - create_or_replace_tag($feature, 'low_dist', $value); - $geneCounter_skip++; - print $feature->_tag_value('ID')." create $value\n" if ($verbose); - } - -} -__END__ - -=head1 NAME - -gff3_sp_filter_by_locus_distance.pl - - -The script aims to remove or flag loci that are too close to each other. -Close loci are important to remove when training abinitio tools in order to train intergenic region properly. Indeed if intergenic region (surrouneded part of a locus) contain part of another locus, the training on intergenic part will be biased. - -=head1 SYNOPSIS - - ./gff3_sp_filter_by_locus_distance.pl -gff=infile.gff [ -o outfile ] - ./gff3_sp_filter_by_locus_distance.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-gff> - -Input GFF3 file that will be read - -=item B<--dist> or B<-d> - -The minimum inter-loci distance to allow. No default (will not apply -filter by default). - -=item B<--add> or B<--add_flag> - -Instead of filter the result into two output files, write only one and add the flag in the gff.(tag = Lvalue or tag = Rvalue where L is left and R right and the value is the distance with accordingle the left or right locus) - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-v> - -Verbose option, make it easier to follow what is going on for debugging purpose. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_filter_by_mrnaBlastValue.pl b/annotation/Tools/Util/gff/gff3_sp_filter_by_mrnaBlastValue.pl deleted file mode 100755 index bcbf4c6f6..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_filter_by_mrnaBlastValue.pl +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; -use URI::Escape; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $blast = undef; -my $help; - -if ( !GetOptions( "help" => \$help, - "gff=s" => \$gff, - "blast=s" => \$blast, - "outfile=s" => \$outfile )) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) or !(defined($blast)) ){ - pod2usage( { - -message => "$header\nAt least 2 parameter is mandatory:\nInput reference gff file (--gff) and Input blast file (--blast)\n\n", - -verbose => 0, - -exitval => 1 } ); -} - - -# Open Input gff3 file # -my $ref_in = Bio::Tools::GFF->new(-file =>$gff , -gff_version => 3); - -# Open Output files # -my $out; -if ($outfile) { - open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $out= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else { - $out = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - -#### MAIN #### - -# Read killlist # -my $killlist = parse_blast($blast); - -### Parse GFF input # -print ("Parse file $gff\n"); -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("$gff file parsed\n"); - -# Remove all mRNA specified by the kill-list from their (gene-) parents. -remove_omniscient_elements_from_level2_ID_list ($hash_omniscient, $killlist); - -# Write the remaining things to output -print_omniscient($hash_omniscient, $out); #print gene modified in file - - ######################### - ######### END ########### - ######################### -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub parse_blast -{ - my @answer; - my %moreThanOneTest; - my %duo_answer; - my %hashAns; - my $infile = shift; - my $cpt2 = 0; - # This is one way to open a file... - open( my $IN, '<', $infile ) or - die "FATAL: Can't open BLAST file: $infile for reading.\n$!\n"; - - # Streaming the file, line by line - while (<$IN>) { - chomp; - my $line = $_; - - my @elements = split( "\t", $line ); - - my ( $query, $target, $score ) = @elements[ 0 .. 2 ]; - - # Matches that we need to remove - if ( $query ne $target and $score > 80.0 ) { - ####### <<<<<<<<<<<<<<<<<<<< HERE THE BLAST VALUE CONSIDERED - my $id = "$query$target"; - my $idInver = "$target$query"; - - if ( ( !exists( $hashAns{$id} ) ) && ( !exists( $hashAns{$idInver} ) ) ) - { # avoid redundance info - $hashAns{$id}++; - $cpt2++; - - # keep the 2 ids We will then remove one randomly - $duo_answer{$target} = [ $target, $query ]; - - $moreThanOneTest{$target}++; - $moreThanOneTest{$query}++ - ; # Allows to detect mRNA present more than 1 times - # (In this case they will be selected in priority - # during step 3) - } - - } - } ## end while (<$IN>) - - # We should close the file to make sure that the transaction - # finishes cleanly. - close($IN); - - #print "$cpt2\n"; - - # Detect case to remove absolutely to select in a tuple this one if - # the other we can keep it - my %caseToAvoid; - my $cpt = 0; - foreach my $key ( keys %moreThanOneTest ) { - - if ( $moreThanOneTest{$key} > 1 ) { - $caseToAvoid{$key}++; - my $valueUnEsc = uri_unescape($key); - push (@answer, $valueUnEsc); # name from blast must be unescape - $cpt++; - } - } - #print "We will remove $cpt\n"; - - ## Step3 - my $cptCount = 0; - my $removed = 0; - - # We will keep one of the tuple - foreach my $key ( keys %duo_answer ) { - my ( $val1, $val2 ) = @{ $duo_answer{$key} }; - if ( ( !exists( $caseToAvoid{$val1} ) ) and - ( !exists( $caseToAvoid{$val2} ) ) ) - { # case remove one randomly - my $valueUnEsc = uri_unescape($val1); - push (@answer, $valueUnEsc); # name from blast must be unescape - $cptCount++; - } - } - - #print "We will removed $cptCount more.\n"; - my $nbremove = @answer; - print "$nbremove gene will be removed !\n"; - - return \@answer; -} ## end sub parse_blast - - -=head1 NAME - -gff_filter_by_mrnaBlastValue.pl -ancient name gff_filter_by_mrna_id.pl gff_filter_by_mrnaBlastValue.pl -The script aims to remove from a gff file all the sequence that have a similarity over THRESHOLD with another sequence (will keep only one). -This is typically useful when creating a list of mRNA to use to train abinitio gene finder. -A reciprocal blast of the sequences need to have been performed prior to the use of this script in order to get the blastp input file. - -=head1 SYNOPSIS - - ./gff_filter_by_mrnaBlastValue.pl --gff=infile.gff3 --blast blastfile --outfile outFile - ./gff_filter_by_mrnaBlastValue.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> - -Input GFF3 file correponding to gene build. - -=item B<--blast> - -The list of the all-vs-all blast file (outfmt 6, blastp) - -=item B<--outfile> - -The name of the output file. By default the output is the standard output. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_filter_incomplete_gene_coding_models.pl b/annotation/Tools/Util/gff/gff3_sp_filter_incomplete_gene_coding_models.pl deleted file mode 100755 index 84ce2ecb9..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_filter_incomplete_gene_coding_models.pl +++ /dev/null @@ -1,374 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use File::Basename; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use Bio::DB::Fasta; -use Bio::SeqIO; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $file_fasta=undef; -my $codonTableId=1; -my $skip_start_check=undef; -my $skip_stop_check=undef; -my $add_flag=undef; -my $verbose = undef; -my $help= 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "fasta|fa|f=s" => \$file_fasta, - "table|codon|ct=i" => \$codonTableId, - "add_flag|af!" => \$add_flag, - "skip_start_check|sstartc!" => \$skip_start_check, - "skip_stop_check|sstopc!" => \$skip_stop_check, - "v!" => \$verbose, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) or !(defined($file_fasta)) ){ - pod2usage( { - -message => "$header\nAt least 2 parameter is mandatory:\nInput reference gff file (--gff) and Input fasta file (--fasta)\n\n", - -verbose => 0, - -exitval => 1 } ); -} - -my $codonTable; -if($codonTableId<0 and $codonTableId>25){ - print "$codonTableId codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} -else{ - $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); -} - -###################### -# Manage output file # -my $gffout; -my $gffout_incomplete; -if ($outfile) { - my ($filename,$path,$ext) = fileparse($outfile,qr/\.[^.]*/); - my $outputname = $path.$filename.$ext; - open(my $fh, '>', $outputname) or die "Could not open file '$outputname' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - $outputname = $path.$filename."_incomplete".$ext; - open(my $fh, '>', $outputname) or die "Could not open file '$outputname' $!"; - $gffout_incomplete= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - $gffout_incomplete = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - - -#################### -# index the genome # -my $nbFastaSeq=0; -my $db = Bio::DB::Fasta->new($file_fasta); -my @ids = $db->get_all_primary_ids; -my %allIDs; # save ID in lower case to avoid cast problems -foreach my $id (@ids ){$allIDs{lc($id)}=$id;} -print ("Genome fasta parsed\n"); -#################### - -#counters -my %mrnaCounter={1=>0, 2=>0, 3=>0}; -my $geneCounter=0; -my %omniscient_incomplete; -my @incomplete_mRNA; - - -foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - my $gene_feature = $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$gene_id}; - my $strand = $gene_feature->strand(); - print "gene_id = $gene_id\n" if $verbose; - - my @level1_list=(); - my @level2_list=(); - my @level3_list=(); - - my $ncGene=1; - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_key_level2, $gene_id) ) ){ - - my $geneInc=undef; - foreach my $level2_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id}}) { - my $start_missing=undef; - my $stop_missing=undef; - - # get level2 id - my $level2_ID = lc($level2_feature->_tag_value('ID')); - - if ( exists_keys( $hash_omniscient, ('level3', 'cds', $level2_ID) ) ){ - $ncGene=undef; - - my $seqobj = extract_cds(\@{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}, $db); - - #------------- check start ------------- - if (! $skip_start_check){ - my $start_codon = $seqobj->subseq(1,3); - if(! $codonTable->is_start_codon( $start_codon )){ - print "start= $start_codon is not a valid start codon\n" if ($verbose); - $start_missing="true"; - if($add_flag){ - create_or_replace_tag($level2_feature, 'incomplete', '1'); - } - } - } - #------------- check stop -------------- - if (! $skip_stop_check){ - my $seqlength = length($seqobj->seq()); - my $stop_codon = $seqobj->subseq($seqlength - 2, $seqlength) ; - - if(! $codonTable->is_ter_codon( $stop_codon )){ - print "stop= $stop_codon is not a valid stop codon\n" if ($verbose); - $stop_missing="true"; - if($add_flag){ - if($start_missing){ - create_or_replace_tag($level2_feature, 'incomplete', '3'); - } - else{ - create_or_replace_tag($level2_feature, 'incomplete', '2'); - } - } - } - } - } - else{ #No CDS - print "Not a coding rna (no CDS) we skip it"; - } - if($start_missing or $stop_missing){ - #Keep track counter - if ($start_missing and $stop_missing) { - $mrnaCounter{'3'}++; - } - elsif($start_missing){ - $mrnaCounter{'1'}++; - } - else{ - $mrnaCounter{'2'}++; - } - $geneInc="true"; - print "$level2_ID\n"; - if(! $add_flag){ - push(@incomplete_mRNA, $level2_ID); # will be removed at the end - push(@level2_list, $level2_feature); # will be appended to omniscient_incomplete - foreach my $primary_tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID} ) ){ - push(@level3_list, @{$hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) - } - } - } - } - } - if($geneInc){ - $geneCounter++; - #Save the mRNA and parent and child features - if(! $add_flag){ - @level1_list=($gene_feature); - append_omniscient(\%omniscient_incomplete, \@level1_list, \@level2_list, \@level3_list); - } - } - } - } - #after checking all mRNA of a gene - if($ncGene){ - print "This is a non coding gene (no cds to any of its RNAs)"; - } - } -} - - -#END -my $string_to_print="usage: $0 @copyARGV\n"; -$string_to_print .="Results:\n"; - -if ($geneCounter) { - $string_to_print .="Number of gene affected: $geneCounter\n"; - $string_to_print .="There are ".$mrnaCounter{3}." mRNAs without start and stop codons.\n"; - $string_to_print .="There are ".$mrnaCounter{2}." mRNAs without stop codons.\n"; - $string_to_print .="There are ".$mrnaCounter{1}." mRNAs without start codons.\n"; -} -else{ - $string_to_print .="No gene with incomplete mRNA!\n"; -} -print $string_to_print; - -if(! $add_flag){ - #clean for printing - if (@incomplete_mRNA){ - _check_all_level2_positions(\%omniscient_incomplete,0); # review all the feature L2 to adjust their start and stop according to the extrem start and stop from L3 sub features. - _check_all_level1_positions(\%omniscient_incomplete,0); - - remove_omniscient_elements_from_level2_ID_list($hash_omniscient, \@incomplete_mRNA); - _check_all_level2_positions($hash_omniscient,0); # review all the feature L2 to adjust their start and stop according to the extrem start and stop from L3 sub features. - _check_all_level1_positions($hash_omniscient,0); # Check the start and end of level1 feature based on all features level2. - } -} - -print_omniscient($hash_omniscient, $gffout); #print result - -if(@incomplete_mRNA){ - print "Now print incomplete models:\n"; - print_omniscient(\%omniscient_incomplete, $gffout_incomplete); #print result -} - -print "Bye Bye.\n"; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub extract_cds{ - my($feature_list, $db)=@_; - - my @sortedList = sort {$a->start <=> $b->start} @$feature_list; - my $sequence=""; - foreach my $feature ( @sortedList ){ - $sequence .= get_sequence($db, $feature->seq_id, $feature->start, $feature->end); - } - - #create sequence object - my $seq = Bio::Seq->new( '-format' => 'fasta' , -seq => $sequence); - - #check if need to be reverse complement - if($sortedList[0]->strand eq "-1" or $sortedList[0]->strand eq "-"){ - $seq=$seq->revcom; - } - return $seq; -} - -sub get_sequence{ - my ($db, $seq_id, $start, $end) = @_; - - my $sequence=""; - my $seq_id_correct = undef; - if( exists $allIDs{lc($seq_id)}){ - - $seq_id_correct = $allIDs{lc($seq_id)}; - - $sequence = $db->subseq($seq_id_correct, $start, $end); - - if($sequence eq ""){ - warn "Problem ! no sequence extracted for - $seq_id !\n"; exit; - } - if(length($sequence) != ($end-$start+1)){ - my $wholeSeq = $db->subseq($seq_id_correct); - $wholeSeq = length($wholeSeq); - warn "Problem ! The size of the sequence extracted ".length($sequence)." is different than the specified span: ".($end-$start+1).".\nThat often occurs when the fasta file does not correspond to the annotation file. Or the index file comes from another fasta file which had the same name and haven't been removed.\n". - "As last possibility your gff contains location errors (Already encountered for a Maker annotation)\nSupplement information: seq_id=$seq_id ; seq_id_correct=$seq_id_correct ; start=$start ; end=$end ; $seq_id sequence length: $wholeSeq )\n"; - } - } - else{ - warn "Problem ! ID $seq_id not found !\n"; - } - - return $sequence; -} - -__END__ - -=head1 NAME - -gff3_sp_filter_incomplete_gene_coding_models.pl - - -The script aims to remove incomplete gene models. An incomplete gene coding model is a gene coding with start and/or stop codon missing in its cds. -You can modify the behavior using the skip_start_check or skip_stop_check options. - -=head1 SYNOPSIS - - ./gff3_sp_filter_incomplete_gene_coding_models.pl -gff=infile.gff --fasta genome.fa [ -o outfile ] - ./gff3_sp_filter_incomplete_gene_coding_models.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-gff> - -Input GFF3 file that will be read - -=item B<-fa> or B<--fasta> - -Genome fasta file -The name of the fasta file containing the genome to work with. - -=item B<--ct> or B<--table> or B<--codon> - -This option allows specifying the codon table to use - It expects an integer (1 by default = standard) - -=item B<--ad> or B<--add_flag> - -Instead of filter the result into two output files, write only one and add the flag in the gff.(tag = inclomplete, value = 1, 2, 3. 1=start missing; 2=stop missing; 3=both) - -=item B<--skip_start_check> or B<--sstartc> - -Gene model must have a start codon. Activated by default. - -=item B<--skip_stop_check> or B<--sstopc> - -Gene model must have a stop codon. Activated by default. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-v> - -Verbose option, make it easier to follow what is going on for debugging purpose. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_fix_cds_frame.pl b/annotation/Tools/Util/gff/gff3_sp_fix_cds_frame.pl deleted file mode 100755 index fe19c12b7..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_fix_cds_frame.pl +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use Bio::Tools::GFF; -use Bio::DB::Fasta; -use NBIS::GFF3::Omniscient; - - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $opt_fasta = undef; -my $opt_gfffile; -my $opt_verbose; -my $codonTable=0; -my $opt_output; -my $opt_help = 0; - -# OPTION MANAGMENT -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'o|output=s' => \$opt_output, - "fasta|fa=s" => \$opt_fasta, - "v|vebose!" => \$opt_verbose, - "table|codon|ct=i" => \$codonTable, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($opt_help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); -} - -if (! defined($opt_gfffile) or ! defined($opt_fasta)){ - pod2usage( { - -message => "$header\nAt least 2 parameters are mandatory:\nInput reference gff file (-g) and Input fasta file (--fasta).\n\n". - "Ouptut is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # - -my $gffout; -if ($opt_output) { - $opt_output=~ s/.gff//g; - open(my $fh, '>', $opt_output.".gff") or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -if($codonTable<0 and $codonTable>25){ - print "$codonTable codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} -else{ - print "We will use the codon table ".$codonTable.". If it is not what you want please stop the tool and use the --table option. \n"; -} - - ##################### - # MAIN # - ##################### - -#################### -# index the genome # -my $db = Bio::DB::Fasta->new($opt_fasta); -print ("Genome fasta parsed\n"); - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_gfffile - }); -print ("GFF3 file parsed\n"); - -### -# Fix frame -fil_cds_frame($hash_omniscient, $db, $opt_verbose); - -### -# Print result -print_omniscient($hash_omniscient, $gffout); #print gene modified - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -__END__ - -=head1 NAME - -gff3_fix_cds_frame.pl - -This script will fix the cds phases. -The result is written to the specified output file, or to STDOUT. - -=head1 SYNOPSIS - - ./gff3_fix_cds_frame.pl -g infile.gff -f fasta[ -o outfile ] - ./gff3_fix_cds_frame.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file - -=item B<-fa> or B<--fasta> - -Genome fasta file - -=item B<--ct>, B<--codon> or B<--table> - -Codon table to use. 0 By default. - -=item B<-v> or B<--verbose> - -Add verbosity - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_fix_features_locations_duplicated.pl b/annotation/Tools/Util/gff/gff3_sp_fix_features_locations_duplicated.pl deleted file mode 100755 index 25fad3787..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_fix_features_locations_duplicated.pl +++ /dev/null @@ -1,604 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Getopt::Long; -use POSIX qw(strftime); -use Pod::Usage; -use File::Basename; -use List::MoreUtils qw(uniq); -use NBIS::GFF3::Omniscient; - -my $usage = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $ref = undef; -my $verbose = undef; -my $help= 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "f|file|gff3|gff=s" => \$ref, - "v" => \$verbose, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 0, - -message => "$usage\n" } ); -} - -if ( ! (defined($ref)) ){ - pod2usage( { - -message => "\nAt least 1 parameters is mandatory:\n". - "$usage\n", - -verbose => 0, - -exitval => 2 } ); -} - -###################### -# Manage output file # -my $gffout; -my $reportout; -if ($outfile) { - my ($filename,$path,$ext) = fileparse($outfile,qr/\.[^.]*/); - $reportout=IO::File->new(">".$path.$filename."_report.txt" ) or croak( sprintf( "Can not open '%s' for writing %s", $filename."_report.txt", $! )); - - open(my $fh, '>', $outfile) or die "Could not open file '$filename' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $reportout = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} -# END Manage Ouput Directory / File # -##################################### - - -my $string1 = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$string1 .= "\n\nusage: $0 @copyARGV\n\n"; -print $reportout $string1; -if($outfile){print $string1;} - - ###################### - # MAIN # - ###################### - -my $nb_case1=0; -my $nb_case2aa=0; -my $nb_case2a=0; -my $nb_case2b=0; -my $nb_case3=0; -my $nb_gene_removed=0; - -### Parse GFF input # -print ("Parse file $ref\n"); -my ($omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $ref - }); -print ("$ref file parsed\n"); - -# sort by seq id -my $hash_sortBySeq = gather_and_sort_l1_location_by_seq_id_and_strand($omniscient); - -#find overlap -my %checked_l1; -foreach my $seqid (keys %{$hash_sortBySeq}){ # loop over all the feature level1 - - if( exists_keys($hash_sortBySeq,($seqid ) ) ){ - foreach my $tag (keys %{$hash_sortBySeq->{$seqid}}){ - - foreach my $location ( @{$hash_sortBySeq->{$seqid}{$tag}}){ - my $gene_feature_id = lc($location->[0]); - - if (! exists_keys($omniscient, ('level1',$tag,$gene_feature_id) ) ){ next;} #feature has been removed from previous check - $checked_l1{$gene_feature_id}{$gene_feature_id}++; # to not check agaisnt himself - my $gene_feature = $omniscient->{'level1'}{$tag}{$gene_feature_id}; - - ################################################# - # START Take care of isoforms with duplicated location: - my @L2_list_to_remove = (); - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if(exists_keys($omniscient,('level2', $l2_type, $gene_feature_id)) and scalar @{$omniscient->{'level2'}{$l2_type}{$gene_feature_id}} > 1){ # more than one l2 feature of that type - #print "More than 2 mRNA let's check them\n" if $verbose; - - my %checked; - foreach my $l2_1 (sort {$b->_tag_value('ID') cmp $a->_tag_value('ID')} @{$omniscient->{'level2'}{$l2_type}{$gene_feature_id}}){ - my $id_l2_1 = lc($l2_1->_tag_value('ID')); - $checked{$id_l2_1}{$id_l2_1}++; - - foreach my $l2_2 (sort {$b cmp $a} @{$omniscient->{'level2'}{$l2_type}{$gene_feature_id}}){ - my $id_l2_2 = lc($l2_2->_tag_value('ID')); - - # If not itself and not already checked (A -> B is the same as B -> A), and A or B already removed and must now be skiped (skipme key) - if( ! exists_keys(\%checked, ( $id_l2_1 , $id_l2_2 ) ) ){ # - $checked{$id_l2_1 }{$id_l2_2}++; - $checked{$id_l2_2}{$id_l2_1 }++; - - #check their position are identical - if($l2_2->start().$l2_2->end() eq $l2_1->start().$l2_1->end()){ - - if(exists_keys($omniscient,('level3', 'exon', $id_l2_1))){ - if(exists_keys($omniscient,('level3', 'exon', $id_l2_2))){ - if(scalar @{$omniscient->{'level3'}{'exon'}{$id_l2_1}} == scalar @{$omniscient->{'level3'}{'exon'}{$id_l2_2}}){ - - #Check their subfeature are identicals - if(featuresList_identik(\@{$omniscient->{'level3'}{'exon'}{$id_l2_1}}, \@{$omniscient->{'level3'}{'exon'}{$id_l2_2}}, $verbose )){ - print "case1: $id_l2_2 and $id_l2_1 have same exon list\n" if ($verbose); - - my $size_cds1 = cds_size($omniscient, $id_l2_1); - my $size_cds2 = cds_size($omniscient, $id_l2_2); - if($size_cds1 >= $size_cds2 ){ - push(@L2_list_to_remove, $id_l2_2); - print "case1: push1\n" if $verbose; - } - elsif($size_cds1 < $size_cds2){ - push(@L2_list_to_remove, $id_l2_1); - print "case1: push2\n" if $verbose; - } - elsif($size_cds1){ - push(@L2_list_to_remove, $id_l2_2); - print "case1: push3\n" if $verbose; - } - else{ - push(@L2_list_to_remove, $id_l2_1); - print "case1: push4\n" if $verbose; - } - } - } - } - } - } - } - } - } - } - } - - if(@L2_list_to_remove){ - my @L2_list_to_remove_filtered = uniq(@L2_list_to_remove); - $nb_case1 = $nb_case1 + scalar @L2_list_to_remove_filtered; - print "case1 (removing mRNA isoform identic ): ".join(",", @L2_list_to_remove_filtered)."\n"; - remove_omniscient_elements_from_level2_ID_list($omniscient, \@L2_list_to_remove_filtered); - } - # END Take care of isoforms with duplicated location: - ####################################################### - - - - ####################################################### - # START Take care of other gene with duplicated location - # - #foreach my $gene_feature_id2 (@sorted_genefeature_ids){ - foreach my $location2 (@{$hash_sortBySeq->{$seqid}{$tag}}){ - my $gene_feature_id2 = lc($location2->[0]); - - if (! exists_keys(\%checked_l1,($gene_feature_id,$gene_feature_id2 ) ) ){ - #print $gene_feature_id.":".$hash_sortBySeq->{$seqid}{'level1'}{$tag}{$gene_feature_id}[1]." $gene_feature_id2:".$hash_sortBySeq->{$seqid}{'level1'}{$tag}{$gene_feature_id2}[1]."\n"; - if ( $location2->[1] > $location->[1] ) { last; } # start of gene2 is over start of gene 1 we could stop to loop... no need to look at following genes in the list - if (! exists_keys($omniscient, ('level1',$tag,$gene_feature_id2) ) ){ next;} #feature has been removed from previous check - - $checked_l1{$gene_feature_id }{$gene_feature_id2}++; - $checked_l1{$gene_feature_id2 }{$gene_feature_id}++; - my @L2_list_to_remove = (); - my $gene_feature2 = $omniscient->{'level1'}{$tag}{$gene_feature_id2}; - - #The two genes overlap - if( ($gene_feature2->start <= $gene_feature->end() ) and ($gene_feature2->end >= $gene_feature->start) ){ - print "$gene_feature_id and $gene_feature_id2 overlap\n" if $verbose; - # Loop over the L2 from the first gene feature - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$l2_type}{$gene_feature_id} ) ){ - - foreach my $l2_1 (sort {$b->_tag_value('ID') cmp $a->_tag_value('ID')} @{$omniscient->{'level2'}{$l2_type}{$gene_feature_id}}){ - my $id_l2_1 = lc($l2_1->_tag_value('ID')); - - # Loop over the L2 from the second gene feature - if ( exists ($omniscient->{'level2'}{$l2_type}{$gene_feature_id2} ) ){ - - my $keep = 1; - foreach my $l2_2 (sort {$b->_tag_value('ID') cmp $a->_tag_value('ID')} @{$omniscient->{'level2'}{$l2_type}{$gene_feature_id2}}){ - my $id_l2_2 = lc($l2_2->_tag_value('ID')); - - #check their position are identical - if($l2_2->start().$l2_2->end() eq $l2_1->start().$l2_1->end()){ - - if(exists_keys($omniscient,('level3', 'exon', $id_l2_1))){ - if(exists_keys($omniscient,('level3', 'exon', $id_l2_2))){ - if(scalar @{$omniscient->{'level3'}{'exon'}{$id_l2_1}} == scalar @{$omniscient->{'level3'}{'exon'}{$id_l2_2}}){ - - #EXON identicals - if(featuresList_identik(\@{$omniscient->{'level3'}{'exon'}{$id_l2_1}}, \@{$omniscient->{'level3'}{'exon'}{$id_l2_2}}, $verbose )){ - #EXON CDS - print "case2: $id_l2_2 and $id_l2_1 have same exon list\n" if $verbose; - if ( ! exists_keys($omniscient, ('level3','cds',$id_l2_1)) and ! exists_keys($omniscient, ('level3','cds',$id_l2_2) ) ) { - print "case2aa: $id_l2_2 and $id_l2_1 have no CDS\n" if $verbose; - $nb_case2aa++; - push(@L2_list_to_remove, $id_l2_2); - } - else{ - if(featuresList_identik(\@{$omniscient->{'level3'}{'cds'}{$id_l2_1}}, \@{$omniscient->{'level3'}{'cds'}{$id_l2_2}}, $verbose )){ - print "case2: $id_l2_2 and $id_l2_1 have same CDS list\n" if $verbose; - $nb_case2a++; - #identik because no CDS, we could remove one randaomly - - my $size_cds1 = cds_size($omniscient, $id_l2_1); - my $size_cds2 = cds_size($omniscient, $id_l2_2); - if($size_cds1 >= $size_cds2 ){ - push(@L2_list_to_remove, $id_l2_2); - print "case2: push1 $size_cds1 $size_cds2\n" if $verbose; - } - elsif($size_cds1 < $size_cds2){ - push(@L2_list_to_remove, $id_l2_1); - print "case2: push2\n" if $verbose; - } - elsif($size_cds1){ - push(@L2_list_to_remove, $id_l2_2); - print "case2: push3\n" if $verbose; - } - else{ - push(@L2_list_to_remove, $id_l2_1); - print "case2: push4\n" if $verbose; - } - } - - # CDS are not identic Let's reshape UTRS - else{ - $nb_case2b++; - reshape_the_2_gene_models($omniscient, $gene_feature_id, $gene_feature_id2, $verbose); - print "case2-A (Exon structure identic from different genes, but CDS different, Let's reshape the UTRs to make them different.): $id_l2_1 <=> $id_l2_2\n"; - } - } - } - } - } - } - } - } - } - } - } - } - if(@L2_list_to_remove){ - print "case2 (removing mRNA identic from different genes: ".join(",", @L2_list_to_remove)."\n"; - remove_omniscient_elements_from_level2_ID_list($omniscient, \@L2_list_to_remove); - if (! exists_keys($omniscient, ('level1',$tag,$gene_feature_id2) ) or ! exists_keys($omniscient, ('level1',$tag,$gene_feature_id) ) ){ $nb_gene_removed++;} - } - } - - - # Not identik at exon level but identik a gene level. Whan arriving at this particular case, it means that the CDS do not overlap. - # We have to shrink the UTR and reshape gene and mRNA extremities. - if (exists_keys($omniscient, ('level1',$tag,$gene_feature_id2) ) and exists_keys($omniscient, ('level1',$tag,$gene_feature_id) ) ){ - - if( ($gene_feature2->start == $gene_feature->start) and ($gene_feature2->end == $gene_feature->end) ){ - print "case3 (reshaping genes): $gene_feature_id2 and $gene_feature_id have same location \n"; - $nb_case3++; - - reshape_the_2_gene_models($omniscient, $gene_feature_id, $gene_feature_id2, $verbose); - } - } - } - } - } - } - } -} - -my $string_print = "\nWe found $nb_case1 cases where isoforms have identical exon structures (we removed duplicates by keeping the one with longest CDS).\n"; -$string_print .= "We found $nb_case2aa cases where l2 from different gene identifier have identical exon but no CDS at all (we removed one duplicate).\n"; -$string_print .= "We found $nb_case2a cases where l2 from different gene identifier have identical exon and CDS structures (we removed duplicates by keeping the one with longest CDS).\n"; -$string_print .= "We found $nb_case2b cases where l2 from different gene identifier have identical exon structures (we reshaped UTRs to modify gene locations).\n"; -$string_print .= "Whe removed $nb_gene_removed genes because no more l2 were linked to them.\n"; -$string_print .= "We found $nb_case3 cases where 2 genes have same location while CDS are differents. In that case we modified the gene locations by clipping UTRs.\n"; -print_omniscient($omniscient, $gffout); #print gene modified - -print $reportout $string_print; -if($outfile){print $string_print;} - -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - -sub reshape_the_2_gene_models{ - my ($omniscient, $gene_feature_id, $gene_feature_id2, $verbose)=@_; - - my $extrem_cds_start = get_extrem_cds_start($omniscient, $gene_feature_id); - my $extrem_cds_start2 = get_extrem_cds_start($omniscient, $gene_feature_id2); - my $extrem_cds_end = get_extrem_cds_end($omniscient, $gene_feature_id); - my $extrem_cds_end2 = get_extrem_cds_end($omniscient, $gene_feature_id2); - - if($extrem_cds_start < $extrem_cds_start2){ - print "remove after $gene_feature_id and before $gene_feature_id2\n" if $verbose; - #take care of gene_feature_id - # remodelate exon list - remodelate_exon_list_right($omniscient, $gene_feature_id, $extrem_cds_end); - remodelate_exon_list_left($omniscient, $gene_feature_id2, $extrem_cds_end); #+1 to avoid creating overlaping feature - } - else{ - print "remove before $gene_feature_id and after $gene_feature_id2\n" if $verbose; - remodelate_exon_list_right($omniscient, $gene_feature_id2, $extrem_cds_end2); - remodelate_exon_list_left($omniscient, $gene_feature_id, $extrem_cds_end2); - } - handle_l3_features($omniscient, $gene_feature_id2); - check_record_positions($omniscient, $gene_feature_id2); - handle_l3_features($omniscient, $gene_feature_id); - check_record_positions($omniscient, $gene_feature_id); -} - -# We will remodelate the l3 features -sub handle_l3_features{ - my ($omniscient, $id_l1)=@_; - - # Remove all level3 feature execept exon - my @tag_list=('exon'); - my $l2_id_list= get_all_id_l2($omniscient, $id_l1); - my %hash_cds_positions; # keep track of start - stop - foreach my $l2_id_x (@$l2_id_list){ - my ($cds_start, $cds_end) = get_cds_positions($omniscient, $id_l1, $l2_id_x); - $hash_cds_positions{$l2_id_x} = [$cds_start, $cds_end]; - } - #remove all l3 feature except exon - remove_tuple_from_omniscient($l2_id_list, $omniscient, 'level3', 'false', \@tag_list); - foreach my $l2_id_x (@$l2_id_list){ - my $cds_start = $hash_cds_positions{$l2_id_x}[0]; - my $cds_end = $hash_cds_positions{$l2_id_x}[1]; - - my ($utr5_list, $cds_list, $utr3) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($omniscient->{'level3'}{'exon'}{$l2_id_x}, $cds_start, $cds_end); - my @level1_list; - my @level2_list; - my @level3_list=(@$cds_list, @$utr5_list, @$utr3); - append_omniscient($omniscient, \@level1_list, \@level2_list, \@level3_list); - } - -} - -sub get_all_id_l2{ - my ($omniscient, $id_l1)=@_; - my @result; - - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$l2_type}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$omniscient->{'level2'}{$l2_type}{$id_l1}}) { - my $level2_ID = lc($feature_level2->_tag_value('ID')); - push( @result, $level2_ID); - } - } - } - return \@result; -} - - -sub remodelate_exon_list_right{ - my ($omniscient, $id_l1, $limit)=@_; - - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$l2_type}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$omniscient->{'level2'}{$l2_type}{$id_l1}}) { - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - if ( exists ($omniscient->{'level3'}{'exon'}{$level2_ID} ) ){ - my $mustModifyList=undef; - my @listok; - foreach my $feature_level3 ( @{$omniscient->{'level3'}{'exon'}{$level2_ID}}) { - if ($feature_level3->start() >= $limit){ - $mustModifyList="yes"; - } - elsif ($feature_level3->end() > $limit){ - $feature_level3->end($limit); - push(@listok, $feature_level3); - } - else{ - push(@listok, $feature_level3); - } - } - if($mustModifyList){ # at least one feature has been removed from list. Save the new list - @{$omniscient->{'level3'}{'exon'}{$level2_ID} } = @listok; - } - } - } - } - } -} - -sub remodelate_exon_list_left{ - my ($omniscient, $id_l1, $limit)=@_; - - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$l2_type}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$omniscient->{'level2'}{$l2_type}{$id_l1}}) { - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - if ( exists ($omniscient->{'level3'}{'exon'}{$level2_ID} ) ){ - my $mustModifyList=undef; - my @listok; - foreach my $feature_level3 ( @{$omniscient->{'level3'}{'exon'}{$level2_ID}}) { - if ($feature_level3->end() <= $limit){ - $mustModifyList="yes"; - } - elsif ($feature_level3->start() < $limit){ - $feature_level3->start($limit); - push(@listok, $feature_level3); - } - else{ - push(@listok, $feature_level3); - } - } - if($mustModifyList){ # at least one feature has been removed from list. Save the new list - @{$omniscient->{'level3'}{'exon'}{$level2_ID} } = @listok; - } - } - } - } - } -} - -sub get_cds_positions{ - my ($omniscient, $id_l1, $id_l2)=@_; - my $start=0; - my $end=0; - - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys ($omniscient, ('level2', $l2_type, $id_l1) ) ){ - - if ( exists_keys ($omniscient, ('level3', 'cds', $id_l2 ) ) ){ - my @sorted_cds = sort { $a->start() <=> $b->start() } @{$omniscient->{'level3'}{'cds'}{$id_l2}}; - $start = @{$omniscient->{'level3'}{'cds'}{$id_l2}}[0]->start; - $end = @{$omniscient->{'level3'}{'cds'}{$id_l2}}[$#{$omniscient->{'level3'}{'cds'}{$id_l2}}]->end; - - } - else{ - print "WARNING $id_l2 do not exists\n"; - } - } - } - return $start,$end; -} - -sub get_extrem_cds_start{ - my ($omniscient, $id_l1)=@_; - my $result=100000000000; - - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$l2_type}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$omniscient->{'level2'}{$l2_type}{$id_l1}}) { - my $level2_ID = lc($feature_level2->_tag_value('ID')); - if ( exists ($omniscient->{'level3'}{'cds'}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$omniscient->{'level3'}{'cds'}{$level2_ID}}) { - if ($feature_level3->start() < $result){ - $result = $feature_level3->start(); - } - } - } - } - } - } - return $result; -} - -sub get_extrem_cds_end{ - my ($omniscient, $id_l1)=@_; - my $result=0; - - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ # primary_tag_l2 = mrna or mirna or ncrna or trna etc... - if ( exists ($omniscient->{'level2'}{$l2_type}{$id_l1} ) ){ - foreach my $feature_level2 ( @{$omniscient->{'level2'}{$l2_type}{$id_l1}}) { - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - if ( exists ($omniscient->{'level3'}{'cds'}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$omniscient->{'level3'}{'cds'}{$level2_ID}}) { - if ($feature_level3->end() > $result){ - $result = $feature_level3->end(); - } - } - } - } - } - } - - return $result; -} - -sub cds_size{ - my ($omniscient, $id_l2)=@_; - my $size=undef; - - if ( exists_keys($omniscient, ('level3', 'cds', lc($id_l2)))){ - foreach my $l3 ( @{$omniscient->{'level3'}{'cds'}{lc($id_l2)}} ){ - $size+= $l3->end - $l3->start +1; - } - } - return $size; -} - -__END__ - -=head1 NAME - -gff3_sp_fix_features_locations_duplicated.pl - Check a gff3 annotation file to -find cases where differents gene features have CDS that overlap. In this case the gene features will be merged in only one. -One gene is choosen as reference, and the mRNA from the other gene will be linked to it. So, it creates isoforms. - -=head1 SYNOPSIS - - ./gff3_sp_fix_features_locations_duplicated.pl -f infile [-o outfile] - ./gff3_sp_fix_features_locations_duplicated.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-f>, B<--file>, B<--gff3> or B<--gff> - -Input GFF3 file. - -=item B<-o>, B<--out>, B<--output> or B<--outfile> - -Output file. If none given, will be display in standard output. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut -Test case for first part: -@001900F|arrow|arrow maker gene 5082 6945 . + . ID=ACAOBTG00000034334;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0 -@001900F|arrow|arrow maker mRNA 5082 6945 5456 + . ID=ACAOBTM00000062562;Parent=ACAOBTG00000034334;_AED=0.22;_QI=61|1|1|1|0|0|2|575|165;_eAED=0.22;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1;product=hypothetical protein -@001900F|arrow|arrow maker exon 5082 5815 . + . ID=ACAOBTE00000370675;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:1 -@001900F|arrow|arrow maker exon 6546 6945 . + . ID=ACAOBTE00000370676;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:2 -@001900F|arrow|arrow maker CDS 5143 5640 . + 0 ID=ACAOBTC00000063258;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:cds -@001900F|arrow|arrow maker five_prime_UTR 5082 5142 . + . ID=ACAOBTF00000063257;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr -@001900F|arrow|arrow maker three_prime_UTR 5641 5815 . + . ID=ACAOBTT00000063257;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:three_prime_utr -@001900F|arrow|arrow maker three_prime_UTR 6546 6945 . + . ID=ACAOBTT00000063257;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:three_prime_utr -@001900F|arrow|arrow maker mRNA 5082 6945 . + . ID=ACAOBTM00000062561;Parent=ACAOBTG00000034334;_AED=0.22;_QI=722|1|1|1|0|0.5|2|28|127;_eAED=0.22;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1;product=hypothetical protein -@001900F|arrow|arrow maker exon 5082 5815 . + . ID=ACAOBTE00000370673;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:1 -@001900F|arrow|arrow maker exon 6546 6945 . + . ID=ACAOBTE00000370674;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:2 -@001900F|arrow|arrow maker CDS 5804 5815 . + 0 ID=ACAOBTC00000063257;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:cds -@001900F|arrow|arrow maker CDS 6546 6917 . + 0 ID=ACAOBTC00000063257;Parent=ACAOBTM00000062561;makerName=IDmodified-cds-30904 -@001900F|arrow|arrow maker five_prime_UTR 5082 5803 . + . ID=ACAOBTF00000063256;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:five_prime_utr -@001900F|arrow|arrow maker three_prime_UTR 6918 6945 . + . ID=ACAOBTT00000063256;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:three_prime_utr - - -Test case for second part: -@001900F|arrow|arrow maker gene 5082 6945 . + . ID=ACAOBTG00000034334;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0 -@001900F|arrow|arrow maker mRNA 5082 6945 5456 + . ID=ACAOBTM00000062562;Parent=ACAOBTG00000034334;_AED=0.22;_QI=61|1|1|1|0|0|2|575|165;_eAED=0.22;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1;product=hypothetical protein -@001900F|arrow|arrow maker exon 5082 5815 . + . ID=ACAOBTE00000370675;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:1 -@001900F|arrow|arrow maker exon 6546 6945 . + . ID=ACAOBTE00000370676;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:2 -@001900F|arrow|arrow maker CDS 5143 5640 . + 0 ID=ACAOBTC00000063258;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:cds -@001900F|arrow|arrow maker five_prime_UTR 5082 5142 . + . ID=ACAOBTF00000063257;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:five_prime_utr -@001900F|arrow|arrow maker three_prime_UTR 5641 5815 . + . ID=ACAOBTT00000063257;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:three_prime_utr -@001900F|arrow|arrow maker three_prime_UTR 6546 6945 . + . ID=ACAOBTT00000063257;Parent=ACAOBTM00000062562;makerName=maker-@001900F|arrow|arrow-exonerate_est2genome-gene-0.0-mRNA-1:three_prime_utr -@001900F|arrow|arrow maker gene 5082 6945 . + . ID=ACAOBTG00000034333;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22 -@001900F|arrow|arrow maker mRNA 5082 6945 . + . ID=ACAOBTM00000062561;Parent=ACAOBTG00000034333;_AED=0.22;_QI=722|1|1|1|0|0.5|2|28|127;_eAED=0.22;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1;product=hypothetical protein -@001900F|arrow|arrow maker exon 5082 5815 . + . ID=ACAOBTE00000370673;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:1 -@001900F|arrow|arrow maker exon 6546 6945 . + . ID=ACAOBTE00000370674;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:2 -@001900F|arrow|arrow maker CDS 5804 5815 . + 0 ID=ACAOBTC00000063257;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:cds -@001900F|arrow|arrow maker CDS 6546 6917 . + 0 ID=ACAOBTC00000063257;Parent=ACAOBTM00000062561;makerName=IDmodified-cds-30904 -@001900F|arrow|arrow maker five_prime_UTR 5082 5803 . + . ID=ACAOBTF00000063256;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:five_prime_utr -@001900F|arrow|arrow maker three_prime_UTR 6918 6945 . + . ID=ACAOBTT00000063256;Parent=ACAOBTM00000062561;makerName=augustus_masked-@001900F|arrow|arrow-processed-gene-0.22-mRNA-1:three_prime_utr diff --git a/annotation/Tools/Util/gff/gff3_sp_fix_fusion.pl b/annotation/Tools/Util/gff/gff3_sp_fix_fusion.pl deleted file mode 100755 index 3c0264e58..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_fix_fusion.pl +++ /dev/null @@ -1,1445 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use File::Basename; -use Getopt::Long; -use Statistics::R; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use Bio::DB::Fasta; -use Bio::SeqIO; -use Bio::Tools::CodonTable; -use NBIS::GFF3::Omniscient; -use NBIS::Plot::R qw(:Ok); - -my $start_run = time(); -my $startP=time; -my $SIZE_OPT=21; -my $PREFIX_CPT_EXON=1; -my $PREFIX_CPT_MRNA=1; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $file_fasta=undef; -my $opt_codonTableID=0; -my $stranded=undef; -my $threshold=undef; -my $verbose=undef; -my $help= 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "fasta|fa=s" => \$file_fasta, - "stranded|s" => \$stranded, - "table|codon|ct=i" => \$opt_codonTableID, - "verbose|v" => \$verbose, - "threshold|t=i" => \$threshold, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) or !(defined($file_fasta)) ){ - pod2usage( { - -message => "$header\nAt least 2 parameter is mandatory:\nInput reference gff file (--gff) and Input fasta file (--fasta)\n\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # -my $gffout; -my $gffout2; -my $gffout3; -my $gffout4; -if ($outfile) { - $outfile=~ s/.gff//g; -open(my $fh, '>', $outfile."-intact.gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -open(my $fh2, '>', $outfile."-only_modified.gff") or die "Could not open file '$outfile' $!"; - $gffout2= Bio::Tools::GFF->new(-fh => $fh2, -gff_version => 3 ); -open(my $fh3, '>', $outfile."-all.gff") or die "Could not open file '$outfile' $!"; - $gffout3= Bio::Tools::GFF->new(-fh => $fh3, -gff_version => 3 ); -open($gffout4, '>', $outfile."-report.txt") or die "Could not open file '$outfile' $!"; -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - $gffout2 = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - $gffout3 = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - $gffout4 = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -if($opt_codonTableID<0 and $opt_codonTableID>25){ - print "$opt_codonTableID codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} -else{ - print "We will use the codon table ".$opt_codonTableID.". If it is not what you want please stop the tool and use the --table option. \n"; -} - -if(!$threshold){ - $threshold=100; -} -print "Minimum protein length taken in account = $threshold AA\n"; - -if($stranded){ - $stranded=1; - print "You say that annotation has been done using stranded RNA. So, most probable fusion will be between close gene in same direction. We will focuse on that !\n"; -} -else{ print "You didn't use the option stranded. We will look for fusion in all strand (+ and -)!\n";} - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - -#################### -# index the genome # -my $db = Bio::DB::Fasta->new($file_fasta); -print ("Genome fasta parsed\n"); - -#################### -my $pseudo_threshold=70; -#counters -my $counter_case21=0; -my $geneCounter=0; -my $mRNACounter=0; -my $mRNACounter_fixed=0; -my $special_or_partial_mRNA=0; - -my %omniscient_modified_gene; -my @intact_gene_list; - -# create the hash temp -my %tmpOmniscientR; -my $tmpOmniscient=\%tmpOmniscientR; -my @mRNAlistToTakeCareR; -my $mRNAlistToTakeCare=\@mRNAlistToTakeCareR; - -# manage progression bar variables -my $TotalFeatureL1 = nb_feature_level1($hash_omniscient); -my $featureChecked = 0; -local $| = 1; # Or use IO::Handle; STDOUT->autoflush; Use to print progression bar - -foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - - #Display progression - $featureChecked++; - if ((10 - (time - $startP)) < 0) { - my $done = ($featureChecked*100)/$TotalFeatureL1; - $done = sprintf ('%.0f', $done); - if($verbose) { print "Progress : $done %"; } - else{ print "\rProgress : $done %"; } - $startP= time; - } - - my $gene_feature=$hash_omniscient->{'level1'}{$primary_tag_key_level1}{$gene_id}; - my $oneMRNAmodified=undef; - my $mrna_pseudo=0; - my @list_mrna_pseudo; - my $one_level2_modified; # check if one of the level2 feature will be modified - my $number_mrna=0; - - # COPY gene and subfeatures in tmpOmniscient. - %$tmpOmniscient = (); # empty the hash - @$mRNAlistToTakeCare = (); # empty the list - my @tmpListID=($gene_id); - fill_omniscient_from_other_omniscient_level1_id(\@tmpListID,$hash_omniscient,$tmpOmniscient); - - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if (exists_keys($hash_omniscient, ('level2', $primary_tag_key_level2, $gene_id)) ){ # check if they have mRNA avoiding autovivifcation - foreach my $level2_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id}}) { - - $PREFIX_CPT_MRNA=1; - - # get multiple info - $number_mrna=$#{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id}}+1; - my $id_level2 = lc($level2_feature->_tag_value('ID')); - push (@$mRNAlistToTakeCare,$id_level2); - - ############################## - #If UTR3 # - my $oneRoundAgain="yes"; - my $nbNewUTR3gene=0; - if ( exists_keys($hash_omniscient, ('level3', 'three_prime_utr', $id_level2)) ){ - - while($oneRoundAgain){ - if($verbose) {print "\nNew round three_prime_utr\n";} - my ($breakRound, $nbNewUTRgene, $mRNAlistToTakeCare) = take_care_utr('three_prime_utr', $tmpOmniscient, $mRNAlistToTakeCare, $stranded, $gffout); - $oneRoundAgain = $breakRound; - $nbNewUTR3gene = $nbNewUTR3gene+$nbNewUTRgene; - } - } - ############################## - #If UTR5 # - $oneRoundAgain="yes"; - my $nbNewUTR5gene=0; - if ( exists_keys($hash_omniscient, ('level3', 'five_prime_utr', $id_level2)) ){ - - while($oneRoundAgain){ - if($verbose) { print "\nNew round five_prime_utr\n";} - my ($breakRound, $nbNewUTRgene, $mRNAlistToTakeCare) = take_care_utr('five_prime_utr', $tmpOmniscient, $mRNAlistToTakeCare, $stranded, $gffout); - $oneRoundAgain = $breakRound; - $nbNewUTR5gene = $nbNewUTR5gene+$nbNewUTRgene; - } - } - ########################## - #If UTR not well defined # - if ( exists ($hash_omniscient->{'level3'}{'utr'}{$id_level2} ) ){ - print "Sorry but we need to know which utr it is ... 5 or 3 ?\n";exit; - } - - ############# - # CHECK AFTER ALL UTR ANALIZED - my $totalNewUTRgene=$nbNewUTR3gene+$nbNewUTR5gene; - if($totalNewUTRgene > 0){ - $oneMRNAmodified="yes"; - $mRNACounter_fixed++; # Count only mRNA modified - } - @$mRNAlistToTakeCare = (); # empty the list - } # End foreach mRNA - } - if($oneMRNAmodified){ - $geneCounter++; - $mRNACounter=$mRNACounter+$number_mrna; #add all the mRNA if at least one modified - #save remodelate gene name - - merge_omniscients(\%omniscient_modified_gene, $tmpOmniscient); - } - else{push(@intact_gene_list, $gene_id);} - } - } -} -# end progreesion bar -if($verbose) { print "Progress : 100 %\n"; } -else{print "\rProgress : 100 %\n"; } - -### -# Fix frame -fil_cds_frame(\%omniscient_modified_gene, $db, $opt_codonTableID); -fil_cds_frame($hash_omniscient, $db, $opt_codonTableID); - - -##################################### -# Manage modified gene to be sure they not overlap already existing gene. If yes => we give the same gene ID and remove one. -print "Managing spurious labelling at gene level\n"; -# 1) create a hash omniscient intact -my %hash_omniscient_intactR; -my $hash_omniscient_intact=\%hash_omniscient_intactR; -fill_omniscient_from_other_omniscient_level1_id(\@intact_gene_list,$hash_omniscient,$hash_omniscient_intact); -delete $hash_omniscient->{$_} for (keys %{$hash_omniscient}); - -# 2) print the intact one -print "print intact...\n"; -print_omniscient($hash_omniscient_intact, $gffout); #print intact gene to the file - -# 3) Sort by seq_id - review all newly created gene -my $hash_sortBySeq = gather_and_sort_l1_by_seq_id_and_strand($hash_omniscient_intact); -my $overlap=0; -foreach my $tag_l1 (keys %{$omniscient_modified_gene{'level1'}} ){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient_modified_gene{'level1'}{$tag_l1}} ) { - my $geneFeature = $omniscient_modified_gene{'level1'}{$tag_l1}{$id_l1}; - if (find_overlap_between_geneFeature_and_sortBySeqId($geneFeature, \%omniscient_modified_gene, $hash_omniscient_intact, $hash_sortBySeq) ){ - $overlap++ - } - } -} - -# 4) special case where two newly created gene from to different gene are overlapping -# Be careful If you by testing 2 identical omniscient, the method could remove element haven't yet been loop over. So check the gene exists before to analyse it ! -$hash_sortBySeq = gather_and_sort_l1_by_seq_id_and_strand(\%omniscient_modified_gene); -foreach my $tag_l1 (keys %{$omniscient_modified_gene{'level1'}} ){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $id_l1 (keys %{$omniscient_modified_gene{'level1'}{$tag_l1}} ) { - - if( exists_keys( \%omniscient_modified_gene, ('level1', $tag_l1, $id_l1 ) ) ) { - my $geneFeature = $omniscient_modified_gene{'level1'}{$tag_l1}{$id_l1}; - if (find_overlap_between_geneFeature_and_sortBySeqId($geneFeature, \%omniscient_modified_gene, \%omniscient_modified_gene, $hash_sortBySeq) ){ - $overlap++ - } - } - } -} - -# 5) Print modified genes -print "print modified...\n"; -print_omniscient(\%omniscient_modified_gene, $gffout2); #print gene modified in file - -# 6) Print all together -merge_omniscients_fuse_l1duplicates($hash_omniscient_intact, \%omniscient_modified_gene); -print "print all together...\n"; -print_omniscient($hash_omniscient_intact, $gffout3); - -if ($overlap and $verbose){print "We found $overlap case gene overlapping at CDS level wihout the same ID, we fixed them.\n";} -# End manage overlaping name -##################################### - -#END -my $string_to_print="usage: $0 @copyARGV\n"; -$string_to_print .="Results:\n"; -$string_to_print .="$geneCounter genes affected and $mRNACounter_fixed mRNA.\n"; -my $end_run = time(); -my $run_time = $end_run - $start_run; -$string_to_print .= "Job done in $run_time seconds\n"; - -if($outfile){ - print $gffout4 $string_to_print -} -print $string_to_print; -print "Bye Bye.\n"; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub merge_omniscients_fuse_l1duplicates { - my ($hash_omniscient1, $hash_omniscient2)=@_; - - # == LEVEL 1 == # - - foreach my $tag_l1 (keys %{$hash_omniscient2->{'level1'}}){ - foreach my $id_l1_2 (keys %{$hash_omniscient2->{'level1'}{$tag_l1}}){ - - if ( ! exists_keys ( $hash_omniscient1, ('level1', $tag_l1, $id_l1_2 ) ) ){ - my $feature = $hash_omniscient2->{'level1'}{$tag_l1}{lc($id_l1_2)}; - $hash_omniscient1->{'level1'}{$tag_l1}{$id_l1_2} = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1_2}; # save feature level1 - } - - # == LEVEL 2 == # - - foreach my $tag_l2 (keys %{$hash_omniscient2->{'level2'}}){ - if (exists_keys ($hash_omniscient2, ('level2', $tag_l2, $id_l1_2) ) ){ - foreach my $feature_l2 ( @{$hash_omniscient2->{'level2'}{$tag_l2}{$id_l1_2}}) { - - my $id_l2 = lc($feature_l2->_tag_value('ID')); - - # == LEVEL 3 == # - - foreach my $tag_l3 (keys %{$hash_omniscient2->{'level3'}}){ - - if (exists_keys ($hash_omniscient2, ('level3', $tag_l3, $id_l2) ) ){ - $hash_omniscient1->{'level3'}{$tag_l3}{$id_l2} = delete $hash_omniscient2->{'level3'}{$tag_l3}{$id_l2}; # save @l3 - } - } - } - - if ( ! exists_keys ( $hash_omniscient1, ('level2', $tag_l2, $id_l1_2 ) ) ){ - $hash_omniscient1->{'level2'}{$tag_l2}{$id_l1_2} = delete $hash_omniscient2->{'level2'}{$tag_l2}{$id_l1_2}; # save @l2 - } - else{ - push @{$hash_omniscient1->{'level2'}{$tag_l2}{$id_l1_2}}, @{$hash_omniscient2->{'level2'}{$tag_l2}{$id_l1_2}}; - } - } - } - } - } - return $hash_omniscient1; -} - -# @Purpose: The hash of reference will be the Hash target (HashT). The nkept name will come from the hash of reference. -# When an overlap is found, the ID/parent are fixed and we return 1 as a success ! -# @input: 4 => object(gene feature), hash(omniscient), hash(omniscient), hash(sortBySeq) -# @output: 1 => undef || integer(1) -sub find_overlap_between_geneFeature_and_sortBySeqId { - my ($geneFeature, $hash_source, $hashT, $hashT_sortBySeq )=@_; - - my $tag = lc($geneFeature->primary_tag); - my $seqid = $geneFeature->seq_id; - my $strand = $geneFeature->strand; - my $gene_idS = $geneFeature->_tag_value('ID'); - - #find overlap - my $total_overlap=0; - my $nb_feat_overlap=0; - my @ListOverlapingGene=(); - foreach my $gene_featureT ( @{$hashT_sortBySeq->{"$seqid$strand"}{$tag}}){ - - my $gene_idT = $gene_featureT->_tag_value('ID'); - - - if($gene_idT eq $gene_idS){ next;} # avoid to compare same feature if we are checking same omniscient - - my ($start1,$end1) = get_most_right_left_cds_positions($hashT,$gene_idT); # look at CDS because we want only ioverlapinng CDS - my ($start2,$end2) = get_most_right_left_cds_positions($hash_source,$gene_idS); # look at CDS becaus ewe want only ioverlapinng CDS - - if( ($start2 <= $end1) and ($end2 >= $start1) ){ #feature overlap considering extrem start and extrem stop. It's just to optimise the next step. Avoid to do the next step every time. So at the end, that test (current one) could be removed - # Even if true, they do not necessarly overlap on the spreded features - #now check at each CDS feature independently - if (_two_features_overlap_two_hashes($hash_source,$gene_idS, $hashT, $gene_idT)){ - #print "These two features overlap without same id ! :\n".$geneFeature->gff_string."\n".$gene_featureT->gff_string."\n"; - $nb_feat_overlap++; - - push(@ListOverlapingGene, $gene_featureT); - } - } - } - - # Now manage name if some feature overlap - if( $nb_feat_overlap > 0){ - my $reference_feature = shift(@ListOverlapingGene); - push(@ListOverlapingGene, $geneFeature); - #print "$nb_feat_overlap overlapping feature found ! We will treat them now:\n"; - #print "We decided to keep that one: ".$reference_feature->gff_string."\n"; - - my $gene_id_ref = $reference_feature->_tag_value('ID'); - - #change level2 parent for feature of level2 that have a feature of level1 in $ListToRemove list - foreach my $featureToRemove (@ListOverlapingGene){ - - my $gene_id_to_remove = lc($featureToRemove->_tag_value('ID')); - - ####### - #which hash the feature come from ? - my $currentHash=undef; - foreach my $tag_l1 (keys %{$hash_source->{'level1'}} ){ # primary_tag_key_level1 = gene or repeat etc... - if($hash_source->{'level1'}{$tag_l1}{$gene_id_to_remove} ){ - $currentHash = $hash_source; - } - } - if(! $currentHash){$currentHash = $hashT;} - # ok now hash is choosen - ################ - - foreach my $tag_level2 (keys %{$currentHash->{'level2'}}){ - - if (exists_keys($currentHash, ('level2',$tag_level2,$gene_id_to_remove)) ){ # check if they have cds avoiding autovivification. - - my @list_tmp_features = @{$currentHash->{'level2'}{$tag_level2}{$gene_id_to_remove}}; # As we will remove element of the list we cannot loop over it directly, we have to save the list in a temporary list; - foreach my $level2_feature (@list_tmp_features){ #replace Parent of each feature level2 by the new level1 reference - # Change parent feature - create_or_replace_tag($level2_feature,'Parent',$gene_id_ref); - - #add it in other list - push (@{$currentHash->{'level2'}{$tag_level2}{lc($gene_id_ref)}},$level2_feature); - - #remove mRNA from list <= not mandatory - my $mrna_id_to_remove = $level2_feature->_tag_value('ID'); - my @tag_list=('all'); - my @id_list=($gene_id_to_remove);my @id_list2=($mrna_id_to_remove); - - remove_element_from_omniscient(\@id_list, \@id_list2, $currentHash, 'level2', 'false', \@tag_list); - - } - } - } - - foreach my $tag_level1 (keys %{$currentHash->{'level1'}}){ # remove the old feature level1 now - my $new_l1_feature = clone($reference_feature); - delete $currentHash->{'level1'}{$tag_level1}{$gene_id_to_remove}; # delete level1 - $currentHash->{'level1'}{$tag_level1}{lc($gene_id_ref)} = $new_l1_feature; - } - - } #END FEATURE TO HANDLE - ### - # check end and start of the new feature - my $gene_id=lc($reference_feature->_tag_value('ID')); - check_gene_positions($hashT, $gene_id); - return 1; - } - else{return undef;} -} - -# @Purpose: Check if two genes have at least one mRNA isoform which overlap at cds level. -# @input: 4 => hash(omniscient), string(gene identifier), hash(omniscient), string(gene identifier) -# @output: 1 => undef || string(yes) -sub _two_features_overlap_two_hashes{ - my ($hash1, $gene_id1, $hash2, $gene_id2)=@_; - my $resu=undef; - - #check full CDS for each mRNA - foreach my $mrna_feature (@{$hash1->{'level2'}{'mrna'}{lc($gene_id1)}}){ - foreach my $mrna_feature2 (@{$hash2->{'level2'}{'mrna'}{lc($gene_id2)}}){ - - my $mrna_id1 = $mrna_feature->_tag_value('ID'); - my $mrna_id2 = $mrna_feature2->_tag_value('ID'); - - #check all cds pieces - foreach my $cds_feature1 (@{$hash1->{'level3'}{'cds'}{lc($mrna_id1)}}){ - foreach my $cds_feature2 (@{$hash2->{'level3'}{'cds'}{lc($mrna_id2)}}){ - - if(($cds_feature2->start <= $cds_feature1->end) and ($cds_feature2->end >= $cds_feature1->start )){ # they overlap - $resu="yes";last; - } - } - if($resu){last;} - } - if($resu){last;} - } - if($resu){last;} - } - return $resu; -} - -sub take_care_utr{ - - my ($utr_tag, $tmpOmniscient, $mRNAlistToTakeCare, $stranded, $gffout)=@_; - - my $oneRoundAgain=undef; - my $nbNewUTRgene=0; - - foreach my $primary_tag_key_level1 (keys %{$tmpOmniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id (sort keys %{$tmpOmniscient->{'level1'}{$primary_tag_key_level1}}){ - - my $gene_feature=$tmpOmniscient->{'level1'}{$primary_tag_key_level1}{$gene_id}; - my $gene_id = lc($gene_feature->_tag_value('ID')); - #print "\ntake care utr GeneID = $gene_id\n"; - - foreach my $primary_tag_key_level2 (sort keys %{$tmpOmniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if (exists_keys($tmpOmniscient, ('level2', $primary_tag_key_level2, $gene_id)) ){ # check if they have mRNA avoiding autovivifcation - foreach my $level2_feature ( @{$tmpOmniscient->{'level2'}{$primary_tag_key_level2}{$gene_id}}) { - - my $id_level2=lc($level2_feature->_tag_value('ID')); - foreach my $mRNAtoTakeCare (@{$mRNAlistToTakeCare}){ - - if($mRNAtoTakeCare eq $id_level2){ # ok is among the list of those to analyze - if($verbose) { print "id_level2 -- $id_level2 ***** to take_care -- $mRNAtoTakeCare \n";} - if(exists ($tmpOmniscient->{'level3'}{$utr_tag}) and exists ($tmpOmniscient->{'level3'}{$utr_tag}{$id_level2}) ){ - - ################################################## - # extract the concatenated exon and cds sequence # - my $oppDir=undef; - my $original_strand=$level2_feature->strand; - - ############### - # Manage UTRS # - my @utr_feature_list = sort {$a->start <=> $b->start} @{$tmpOmniscient->{'level3'}{$utr_tag}{$id_level2}}; # be sure that list is sorted - my ($utrExtremStart, $utr_seq, $utrExtremEnd) = concatenate_feature_list(\@utr_feature_list); - - #If UTR shorter than the minimum DNA size expected, we skip it => WE SAVE TIME - if(length($utr_seq) < ($threshold*3) ){ - next; - } - - #create the utr object - my $utr_obj = Bio::Seq->new(-seq => $utr_seq, -alphabet => 'dna' ); - - #Reverse complement according to strand - if ($original_strand == -1 or $original_strand eq "-"){ - $utr_obj = $utr_obj->revcom(); - } - - # get the revcomp - my $opposite_utr_obj = $utr_obj->revcom(); - - - my $longest_ORF_prot_obj; - my $orf_utr_region; - ################################# - # Get the longest ORF positive ## record ORF = start, end (half-open), length, and frame - my ($longest_ORF_prot_obj_p, $orf_utr_region_p) = translate_JD($utr_obj, - -orf => 'longest', - -codontable => $opt_codonTableID); - ######################################## - # Get the longest ORF opposite strand ## record ORF = start, end (half-open), length, and frame - my $length_longest_ORF_prot_obj_n=0; - my $longest_ORF_prot_obj_n; - my $orf_utr_region_n; - - if(! $stranded){ - ($longest_ORF_prot_obj_n, $orf_utr_region_n) = translate_JD($opposite_utr_obj, - -orf => 'longest', - -codontable => $opt_codonTableID); - $length_longest_ORF_prot_obj_n = $longest_ORF_prot_obj_n->length(); - } - - ################# - # Choose the best - if($longest_ORF_prot_obj_p->length() >= $length_longest_ORF_prot_obj_n){ - $longest_ORF_prot_obj= $longest_ORF_prot_obj_p; - $orf_utr_region= $orf_utr_region_p; - } - else{ - $longest_ORF_prot_obj= $longest_ORF_prot_obj_n; - $orf_utr_region= $orf_utr_region_n; - $oppDir=1; - #my @cds_feature_list = sort {$a->start <=> $b->start} @{$tmpOmniscient->{'level3'}{'cds'}{$id_level2}}; # be sure that list is sorted - #($cdsExtremStart, $cds_dna_seq, $cdsExtremEnd) = concatenate_feature_list($cds_feature_list); # we have to change these value because it was not predicted as same direction as mRNA - } - - - ######################## - # prediction is longer than threshold# - if($longest_ORF_prot_obj->length() > $threshold){ - - if($verbose) {print "Longer AA in utr = ".$longest_ORF_prot_obj->length()."\n".$longest_ORF_prot_obj->seq."\n";} - - my @exons_features = sort {$a->start <=> $b->start} @{$tmpOmniscient->{'level3'}{'exon'}{$id_level2}};# be sure that list is sorted - my ($exonExtremStart, $mrna_seq, $exonExtremEnd) = concatenate_feature_list(\@exons_features); - - my @cds_feature_list = sort {$a->start <=> $b->start} @{$tmpOmniscient->{'level3'}{'cds'}{$id_level2}}; # be sure that list is sorted - my ($cdsExtremStart, $cds_dna_seq, $cdsExtremEnd) = concatenate_feature_list(\@cds_feature_list); - - # set real start and stop to orf - my $realORFstart; - my $realORFend; - #print "mRNA length: ".length($mrna_seq)." UTR length: ".length($utr_seq)."\n"; - #print "start in UTR piece ".$orf_utr_region->[0]." end ".$orf_utr_region->[1]."\n"; - - #################################### - # Recreate position of start in mRNA positive strand - my $startUTRinMRNA=length($mrna_seq) - length($utr_seq); - if ($utr_tag eq 'three_prime_utr' ){ - if($original_strand == 1 or $original_strand eq "+" ){ - if(! $oppDir){ - $orf_utr_region->[0]=$orf_utr_region->[0]+($startUTRinMRNA); - } - else{ #opposite direction - $orf_utr_region->[0]=length($mrna_seq) - $orf_utr_region->[1]; - } - } - else{ #minus strand - if(! $oppDir){ - $orf_utr_region->[0]=length($utr_seq) - $orf_utr_region->[1]; #flip position - } - } - } - elsif ($utr_tag eq 'five_prime_utr'){ - if($original_strand == 1 or $original_strand eq "+"){ - if($oppDir){ - $orf_utr_region->[0]=length($utr_seq) - $orf_utr_region->[1]; - } - } - else{ #minus strand - if(! $oppDir){ - $orf_utr_region->[0]=(length($utr_seq) - $orf_utr_region->[1])+($startUTRinMRNA); - } - else{ #opposite direction - $orf_utr_region->[0]=$orf_utr_region->[0]+($startUTRinMRNA); - } - } - } - - #calcul the real start end stop of utr in genome - ($realORFstart, $realORFend) = calcul_real_orf_end_and_start($orf_utr_region, \@exons_features); - - #save the real start and stop - $orf_utr_region->[0]=$realORFstart; - $orf_utr_region->[1]=$realORFend; - - # Now manage splitting the old gene to obtain two genes - $mRNAlistToTakeCare = split_gene_model($tmpOmniscient, $gene_feature, $level2_feature, \@exons_features, \@cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, $oppDir, $mRNAlistToTakeCare, $gffout); - - $oneRoundAgain="yes"; - $nbNewUTRgene++; - } # We predict something in UTR - else{ if($verbose) { print "Nothing predicted over threshold :". $longest_ORF_prot_obj->length()." ! Next\n";} } - } # End there is UTR - else{ if($verbose) {print "There is no UTR ! Next\n";} } - } - #else{print "Not among the list mRNAtoTakeCare. Next \n";} - } - } - } - } - } - } - return $oneRoundAgain, $nbNewUTRgene, $mRNAlistToTakeCare; -} - -############ -# P.S: when a gene is newly created, it has a new name even if it overlap at CDS level an another gene that is not part of the current temporary omniscient studied. So, an extra step at the end will catch and fix those kind of cases. -sub split_gene_model{ - - my ($tmpOmniscient, $gene_feature, $level2_feature, $exons_features, $cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, $oppDir, $mRNAlistToTakeCare, $gffout)=@_; - - my $gene_id = $gene_feature->_tag_value('ID'); - my $id_level2 = lc($level2_feature->_tag_value('ID')); - my $newcontainerUsed=0; - - ###################### - # Recreate exon list # - my $bolean_original_is_first; - my $first_end; - my $second_start; - #if new prediction after on the sequence - if($realORFstart >= $cdsExtremEnd){ - $bolean_original_is_first="true"; - $first_end=$cdsExtremEnd; - $second_start=$realORFstart; - }else{ # ($realORFend < $cdsExtremStart) - $bolean_original_is_first="false"; - $first_end=$realORFend; - $second_start=$cdsExtremStart; - } - my ($newOrignal_exon_list, $newPred_exon_list) = create_two_exon_lists($tmpOmniscient, $exons_features, $first_end, $second_start, $bolean_original_is_first, $oppDir); - - #################################### - # Remodelate ancient gene - #################################### - if($verbose) { print "Remodelate ancient gene\n"; } - ############################################################# - # Remove all level3 feature execept cds - my @tag_list=('cds'); - my @l2_id_list=($id_level2); - remove_tuple_from_omniscient(\@l2_id_list, $tmpOmniscient, 'level3', 'false', \@tag_list); - - - ############# - # Recreate original exon - @{$tmpOmniscient->{'level3'}{'exon'}{$id_level2}}=@$newOrignal_exon_list; - - ######### - #RE-SHAPE last/first exon if less than 3 nucleotides (1 or 2 must be romved) when the CDS finish 1 or 2 nuclotide before... because cannot be defined as UTR - shape_exon_extremity($newOrignal_exon_list,$cds_feature_list); - - ######## - # calcul utr - if($verbose) { print "Remodelate ancient gene ($gene_id)".$gene_feature->start." ".$gene_feature->end."\n";} - - my ($original_utr5_list, $variable_not_needed, $original_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($newOrignal_exon_list, $cdsExtremStart, $cdsExtremEnd); - @{$tmpOmniscient->{'level3'}{'five_prime_utr'}{$id_level2}}=@$original_utr5_list; - @{$tmpOmniscient->{'level3'}{'three_prime_utr'}{$id_level2}}=@$original_utr3_list; - - - #### - # Check existance - my ($new_gene, $new_mrna, $overlaping_gene_ft, $overlaping_mrna_ft) = must_be_a_new_gene_new_mrna($tmpOmniscient, $cds_feature_list, $newOrignal_exon_list); - - - if ($new_mrna){ - ######### - #RE-SHAPE mrna extremities - check_mrna_positions($level2_feature, $newOrignal_exon_list); - - } - else{ - if($verbose) { print "*** remove IT *** because exon and CDS IDENTIK ! $id_level2 \n"; } - my @l2_feature_list=($level2_feature); - remove_omniscient_elements_from_level2_feature_list($tmpOmniscient, \@l2_feature_list); - } - - ######### - #RE-SHAPE gene extremities - check_gene_positions($tmpOmniscient, $gene_id); - - ################################### - # Remodelate New Prediction - ################################### - if($verbose) { print "Remodelate New Prediction\n"; } - # If newPred_exon_list list is empty we skipt the new gene modeling part - #if(!@$newPred_exon_list){ - # next; - #} - ############################################### - # modelate level3 features for new prediction # - my ($new_pred_utr5_list, $new_pred_cds_list, $new_pred_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($newPred_exon_list, $realORFstart, $realORFend); - - #################################### - #RE-SHAPE last/first exon if less than 3 nucleotides (1 or 2 must be removed) when the CDS finish 1 or 2 nuclotide before... because cannot be defined as UTR - if(shape_exon_extremity($newPred_exon_list, $new_pred_cds_list)){ - #we reshaped the exon, it means that the UTR are not correct anymore, we have to recalculate them - ($new_pred_utr5_list, $new_pred_cds_list, $new_pred_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($newPred_exon_list, $realORFstart, $realORFend); - } - - my @level1_list; - my @level2_list; - my @level3_list; - my $transcript_id = $newPred_exon_list->[0]->_tag_value('Parent'); - ############################################# - # Modelate gene features for new prediction # - - # $containerUsed exist when we already use the gene container. So in the case where we have only one mRNA, the split will give 2 mRNA. One is linked to the original gene container (done before) - # The second must be linked to a new gene container. So, even if must_be_a_new_gene method say no, we must create it because the original one has been already used. - ($new_gene, $new_mrna, $overlaping_gene_ft, $overlaping_mrna_ft) = must_be_a_new_gene_new_mrna($tmpOmniscient, $new_pred_cds_list, $newPred_exon_list); - if ( $new_gene ){ - $newcontainerUsed++; - $gene_id = take_care_gene_id($gene_id, $tmpOmniscient); - my $new_gene_feature = Bio::SeqFeature::Generic->new(-seq_id => $newPred_exon_list->[0]->seq_id, -source_tag => $newPred_exon_list->[0]->source_tag, -primary_tag => 'gene' , -start => $newPred_exon_list->[0]->start, -end => $newPred_exon_list->[$#{$newPred_exon_list}]->end, -frame => $newPred_exon_list->[0]->frame, -strand => $newPred_exon_list->[0]->strand , -tag => { 'ID' => $gene_id }) ; - @level1_list=($new_gene_feature); - #print "create_a_new_gene for ".$transcript_id." !!!! - ".$new_gene_feature->gff_string."\n"; - - } - else{ #the new mRNA still overlap an isoform. So we keep the link with the original gene - - # change gene ID - $gene_id = $overlaping_gene_ft->_tag_value('ID'); - #print "We use $gene_id\n"; - check_gene_positions($tmpOmniscient, $gene_id); - @level1_list=($overlaping_gene_ft); - } - - ############################################# - # Modelate mRNA features for new prediction # - if ( $new_mrna ){ - my $new_mRNA_feature = Bio::SeqFeature::Generic->new(-seq_id => $newPred_exon_list->[0]->seq_id, -source_tag => $newPred_exon_list->[0]->source_tag, -primary_tag => $level2_feature->primary_tag() , -start => $newPred_exon_list->[0]->start, -end => $newPred_exon_list->[$#{$newPred_exon_list}]->end, -frame => $newPred_exon_list->[0]->frame, -strand => $newPred_exon_list->[0]->strand , -tag => { 'ID' => $transcript_id , 'Parent' => $gene_id }) ; - push (@$mRNAlistToTakeCare, lc($transcript_id)); - @level2_list=($new_mRNA_feature); - - @level3_list=(@$newPred_exon_list, @$new_pred_cds_list, @$new_pred_utr5_list, @$new_pred_utr3_list); - - #Save the gene (not necesserely new) and mRNA feature (necesseraly new) - append_omniscient($tmpOmniscient, \@level1_list, \@level2_list, \@level3_list); - - #Now we have the new transcript we can test the gene end and start - check_gene_positions($tmpOmniscient, $gene_id); - } - else{ - if($verbose){print "*** Not creating mRNA *** because exon and CDS IDENTIK ! \n";} - } - - return $mRNAlistToTakeCare; -} - - -#create an Uniq gene ID -sub take_care_gene_id{ - - my ($gene_id, $tmpOmniscient) = @_; - - #clean geneid if necessary - $gene_id =~ /^(new[0-9]+_)?(.*)$/; - my $clean_id=$2; - - #count current gene number - should be one if first analysis - my $primary_tag_key_general; - my $numberOfNewGene=1; - - foreach my $primary_tag_key_level1 (keys %{$tmpOmniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id_from_hash (keys %{$tmpOmniscient->{'level1'}{$primary_tag_key_level1}}){ - if($gene_id_from_hash =~ /(new[1-9]+_)/){ - $numberOfNewGene++; - } - } - #primary tag key containg gene name has been found. No need to see the others. - $primary_tag_key_general=$primary_tag_key_level1; - last; - } - - # From tmpOmniscient: Between ( new1_geneA, new2_geneA, new3_geneA). It happens that new2_geneA has been deleted. In that case we try to create new3_geneA but as already exists we try new2_geneA (--) - # If new2_geneA also already exist, it means that it exist in hash_omniscient. so we will try decrementing $numberGeneIDToCheck until 1; Then we will try incrementing $numberGeneIDToCheck over new3_geneA (in other term we try new4_geneA ) - my $testok=undef; - my $nbToadd=-1; - my $numberGeneIDToCheck=$numberOfNewGene; - my $new_id; - while (! $testok){ - my $newGenePrefix="new".$numberGeneIDToCheck."_"; - $new_id="$newGenePrefix$clean_id"; - - if((! defined ($tmpOmniscient->{'level1'}{$primary_tag_key_general}{lc($new_id)})) and (! defined ($hash_omniscient->{'level1'}{$primary_tag_key_general}{lc($new_id)}))){ - $testok=1; - } - else{ - if($numberGeneIDToCheck == 1){ - $nbToadd=1;$numberGeneIDToCheck=$numberOfNewGene; - } - $numberGeneIDToCheck=$numberGeneIDToCheck+$nbToadd;} - } - #print "old_gene_id --- $gene_id ***** new_gene_id --- $new_id\n"; - - return $new_id; -} - -#create an Uniq mRNA ID -sub take_care_mrna_id { - - my ($tmpOmniscient, $mRNA_id) = @_; - - #clean geneid if necessary - $mRNA_id =~ /^(new[0-9]+_)?(.*)$/; - my $clean_id=$2; - - #count current gene number - should be one if first analysis - my %id_to_avoid; - - foreach my $primary_tag_key_level1 (keys %{$tmpOmniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id_from_hash (keys %{$tmpOmniscient->{'level1'}{$primary_tag_key_level1}}){ - - foreach my $primary_tag_key_level2 (keys %{$tmpOmniscient->{'level2'}}){ # primary_tag_key_level1 = gene or repeat etc... - if( exists_keys($tmpOmniscient, ('level2', $primary_tag_key_level2, $gene_id_from_hash)) ){ - foreach my $featureL2 (@{$tmpOmniscient->{'level2'}{$primary_tag_key_level2}{$gene_id_from_hash}}){ - my $mrna_id_from_hash=$featureL2->_tag_value('ID'); - if($mrna_id_from_hash =~ /(new[1-9]+_)/){ - $id_to_avoid{lc($mrna_id_from_hash)}; - $PREFIX_CPT_MRNA++; - } - } - } - } - } - } - - my $testok=undef; - my $nbToadd=-1; - my $numberMRNA_IDToCheck=$PREFIX_CPT_MRNA; - my $new_id; - while (! $testok){ - my $newPrefix="new".$numberMRNA_IDToCheck."_"; - $new_id="$newPrefix$clean_id"; - - if( (! defined ($hash_mRNAGeneLink->{lc($new_id)})) and (! defined ($id_to_avoid{lc($new_id)})) ) { - $testok=1; - } - else{ - if($numberMRNA_IDToCheck == 1){ - $nbToadd=1;$numberMRNA_IDToCheck=$PREFIX_CPT_MRNA; - } - $numberMRNA_IDToCheck=$numberMRNA_IDToCheck+$nbToadd;} - } - #print "old_mrna_id --- $mRNA_id ***** new_mrna_id --- $new_id\n"; - - return $new_id; -} - -#As based on a Uniq mRNA ID, this will create a Uniq ID; -#PREFIX_CPT_EXON allows to kepp track of name already given during a exon list spliting -sub take_care_level3_id { - - my ($tmpOmniscient, $feature) = @_; - - #clean geneid if necessary - my $level3_id = $feature->_tag_value('ID'); - $level3_id =~ /^(new[0-9]+_)?(.*)$/; - my $clean_id=$2; - my $newPrefix="new".$PREFIX_CPT_EXON."_"; - my $new_id="$newPrefix$clean_id"; - - my $primary_tag = lc($feature->primary_tag); - - while(ID_exists_at_level3($tmpOmniscient, $new_id, $primary_tag )){ - $PREFIX_CPT_EXON++; - $new_id =~ /^(new[0-9]+_)?(.*)$/; - my $clean_id=$2; - my $newPrefix="new".$PREFIX_CPT_EXON."_"; - $new_id="$newPrefix$clean_id"; - } - return $new_id; -} - -#return undef if the ID is not existing in tmpOmniscient -sub ID_exists_at_level3{ - - my ($tmpOmniscient, $ID, $primary_tag ) = @_; - - foreach my $level2_ID (keys %{$tmpOmniscient->{'level3'}{$primary_tag}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - foreach my $feature_l3 ( @{$tmpOmniscient->{'level3'}{$primary_tag}{$level2_ID}}) { - my $existingID = $feature_l3->_tag_value('ID'); - if ($existingID eq $ID){ - return 1; - } - } - } - return undef; -} - -# Yes if mRNA doesnt overlap an other existing isoform -# mRNA "true" true mean no overlap at CDS level -sub must_be_a_new_gene_new_mrna{ - my ($omniscient, $new_pred_cds_list, $newPred_exon_list)=@_; - - my $overlaping_mrna_ft=undef; - my $overlaping_gene_ft=undef; - my $Need_new_gene="true"; - my $Need_new_mRNA="true"; - my $strand=$new_pred_cds_list->[0]->strand; - - foreach my $primary_tag_key_level1 (keys %{$omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id_from_hash (keys %{$omniscient->{'level1'}{$primary_tag_key_level1}}){ - my $gene_feature= $omniscient->{'level1'}{$primary_tag_key_level1}{$gene_id_from_hash}; - - if($strand eq $gene_feature->strand){ - foreach my $primary_tag_key_level2 (keys %{$omniscient->{'level2'}}){ # primary_tag_key_level1 = gene or repeat etc... - if( exists_keys($omniscient, ('level2', $primary_tag_key_level2, $gene_id_from_hash)) ){ - - foreach my $featureL2 (@{$omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id_from_hash}}){ - - # get level2 id - my $featureL2_id = lc($featureL2->_tag_value('ID')); - my $featureL2_original_id=lc($newPred_exon_list->[0]->_tag_value('Parent')); - - if($featureL2_id ne $featureL2_original_id){ - - #Now check if overlap - my @cds_feature_list = @{$omniscient->{'level3'}{'cds'}{$featureL2_id}}; - my @exon_feature_list = @{$omniscient->{'level3'}{'exon'}{$featureL2_id}}; - - my $overlap_cds = featuresList_overlap(\@cds_feature_list, $new_pred_cds_list); - if(defined ($overlap_cds)){ #If CDS overlap - $Need_new_gene=undef; - $overlaping_gene_ft=$gene_feature; - #print "CDS Overlap entre $featureL2_id and $featureL2_original_id !\n"; - if(featuresList_identik(\@cds_feature_list, $new_pred_cds_list)){ - #print "cds identik !\n"; - if(featuresList_identik(\@exon_feature_list, $newPred_exon_list)){ - if($verbose) { print "RNA identik BETWEEN $featureL2_id and $featureL2_original_id \n"; } - $Need_new_mRNA=undef; - $overlaping_mrna_ft=$featureL2_id; - last; - } - } - } - } - } - } - } - } - } - } - - return $Need_new_gene, $Need_new_mRNA, $overlaping_gene_ft, $overlaping_mrna_ft; -} - -#remove small remaining pieces in the of UTR in the exon shape -sub shape_exon_extremity{ - #exon_features is a sorted list - #cds_features is a sorted list - my $modified=undef; - my ($exon_features,$cds_features)=@_; - - #test between first exon and first cds - if( (abs($cds_features->[0]->start - $exon_features->[0]->start) < 3) and (abs($cds_features->[0]->start - $exon_features->[0]->start) > 0) ){ #We have to shape the exon start. We don't want a non multiple of 3 inferior to 3 - $exon_features->[0]->start($cds_features->[0]->start); - $modified=1; - } - #test between last exon and last cds - if(abs($exon_features->[$#{ $exon_features }]->end - $cds_features->[$#{ $cds_features }]->end ) < 3){ #We have to shape the exon end - $exon_features->[$#{ $exon_features }]->end($cds_features->[$#{ $cds_features }]->end); - $modified=1; - } - return $modified; -} - -sub calcul_real_orf_end_and_start{ - #exons_features is sorted - my ($orf_cds_region, $exons_features)=@_; - - my $realORFstart; - my $realORFend; - - my $orf_start=$orf_cds_region->[0]; # get start to begin - my $orf_length=$orf_cds_region->[2]; # get lentgh to map - - my $first="yes"; - my $total_exon_length=0; - my $total_exon_length_previous_round=0; - my $mapped_length=0; - my $mapped_length_total=0; - my $the_rest_to_map=0; - - foreach my $exon_feature (@$exons_features){ - # Allows to follow the path on mRNA - my $exon_length=($exon_feature->end - $exon_feature->start)+1; - $total_exon_length_previous_round=$total_exon_length; - $total_exon_length=$total_exon_length+$exon_length; - # Allows to follow the path on the CDS - $mapped_length_total=$mapped_length_total+$mapped_length; - $the_rest_to_map=$orf_length-$mapped_length_total; - # exon overlap CDS - if($total_exon_length >= $orf_start){ #they begin to overlap - - if($first eq "yes"){ - # $realORFstart=$exon_feature->start+($orf_start - 1); - $realORFstart=$exon_feature->start+($orf_start - $total_exon_length_previous_round ); - my $end_part_of_exon=$exon_feature->start- $realORFstart + 1; - if($end_part_of_exon >= $orf_length){ #exon ============================================ - $realORFend=$realORFstart+$orf_length-1; #cds ========================= - last; - } - $mapped_length=$exon_feature->end - $realORFstart + 1; - $first="no"; - } - else{ - $mapped_length=$exon_feature->end - $exon_feature->start + 1; - } - } - #exon are over the end of cds => we finish at this round - if($total_exon_length >= ($orf_start+$orf_length) ){ #exon ============================================ - if($realORFstart > $exon_feature->start){ #cds ========================= - $realORFend=$realORFstart+$the_rest_to_map - 1 ; - last; - }else{ - $realORFend=$exon_feature->start + $the_rest_to_map - 1 ; - last; - } - } - } -return $realORFstart, $realORFend; -} - -sub change_strand{ - my ($feature)=@_; - - if($feature->strand eq "-" or $feature->strand eq "-1"){ - $feature->strand('+'); - }else{$feature->strand('-');} -} - -# The exons containing the original cds keep their parent names. The exon containing the new cds will have a new parent name. -sub create_two_exon_lists { - # orignalFirst == true if original gene is first on the prediction - my ($tmpOmniscient, $exons_features, $firstEnd, $secondStart, $orignalFirst, $oppDir)=@_; - my @list_exon_originalPred; - my @list_exon_newPred; - #print "firstEnd $firstEnd, secondStart $secondStart, $orignalFirst, $oppDir\n"; - $PREFIX_CPT_EXON=1; - - my $value = $exons_features->[0]->_tag_value('Parent'); - my $NewParentName = take_care_mrna_id($tmpOmniscient, $value); - - foreach my $exon_feature (@$exons_features){ #for each exon - if(two_positions_on_feature($exon_feature,$firstEnd,$secondStart)){ # We have to split the exon_feature - my $duplicated_exon_feature=clone($exon_feature);#create a copy of the feature - - $exon_feature->end($secondStart-1); - $duplicated_exon_feature->start($secondStart); - - if($orignalFirst eq "true"){ - - push( @list_exon_originalPred, $exon_feature); - - my $value = take_care_level3_id($tmpOmniscient,$duplicated_exon_feature); - create_or_replace_tag($duplicated_exon_feature,'ID', $value); - create_or_replace_tag($duplicated_exon_feature,'Parent', $NewParentName); - if($oppDir){ - change_strand($duplicated_exon_feature); - } - push( @list_exon_newPred, $duplicated_exon_feature); - next; - }else{ #original pred after - $duplicated_exon_feature->start($secondStart-1); - push( @list_exon_originalPred, $duplicated_exon_feature); - - my $value = take_care_level3_id($tmpOmniscient, $exon_feature); - create_or_replace_tag($exon_feature,'ID', $value); - - create_or_replace_tag($exon_feature,'Parent', $NewParentName); - if($oppDir){ - change_strand($exon_feature); - } - push( @list_exon_newPred, $exon_feature); - next; - } - } - if(! (($exon_feature->end <= $secondStart) and ($exon_feature->start >= $firstEnd))){ # avoid exon between CDSs - if ($exon_feature->end <= $secondStart) { - if ($orignalFirst eq "true"){ - push( @list_exon_originalPred, $exon_feature); - }else{ - my $duplicated_exon_feature=clone($exon_feature);#create a copy of the feature - my $value = take_care_level3_id($tmpOmniscient, $duplicated_exon_feature); - create_or_replace_tag($duplicated_exon_feature,'ID', $value); - create_or_replace_tag($duplicated_exon_feature,'Parent', $NewParentName); - if($oppDir){ - change_strand($duplicated_exon_feature); - } - push( @list_exon_newPred, $duplicated_exon_feature); - } - } - if ($exon_feature->start >= $firstEnd) { - if($orignalFirst eq "true"){ - my $duplicated_exon_feature=clone($exon_feature);#create a copy of the feature - my $value = take_care_level3_id($tmpOmniscient, $duplicated_exon_feature); - create_or_replace_tag($duplicated_exon_feature,'ID', $value); - create_or_replace_tag($duplicated_exon_feature,'Parent', $NewParentName); - if($oppDir){ - change_strand($duplicated_exon_feature); - } - push( @list_exon_newPred, $duplicated_exon_feature); - } - else{ - push( @list_exon_originalPred, $exon_feature); - } - } - } - if(($exon_feature->end <= $secondStart) and ($exon_feature->start >= $firstEnd)){ # Exon between CDSs - if ($orignalFirst eq "true"){ - push( @list_exon_originalPred, $exon_feature); - }else{ - my $duplicated_exon_feature=clone($exon_feature);#create a copy of the feature - my $value = take_care_level3_id($tmpOmniscient, $duplicated_exon_feature); - create_or_replace_tag($duplicated_exon_feature,'ID', $value); - create_or_replace_tag($duplicated_exon_feature,'Parent', $NewParentName); - if($oppDir){ - change_strand($duplicated_exon_feature); - } - push( @list_exon_newPred, $duplicated_exon_feature); - } - } - } - my @list_exon_originalPred_sorted = sort {$a->start <=> $b->start} @list_exon_originalPred; - my @list_exon_newPred_sorted = sort {$a->start <=> $b->start} @list_exon_newPred; - # print "list1: @list_exon_originalPred_sorted\n"; - # foreach my $u (@list_exon_originalPred_sorted){ - # print $u->gff_string."\n"; - # } - # print "list2: @list_exon_newPred_sorted\n"; - # foreach my $u (@list_exon_newPred_sorted){ - # print $u->gff_string."\n"; - # } - return \@list_exon_originalPred_sorted, \@list_exon_newPred_sorted; -} - -#Check if feature overlap one position -sub position_on_feature { - - my ($feature,$position)=@_; - - my $isOnSameExon=undef; - if ( ($position >= $feature->start and $position <= $feature->end)){ - $isOnSameExon="true"; - } - return $isOnSameExon; -} - -#Check if feature overlap two positions (start and stop) -sub two_positions_on_feature { - - my ($feature,$position1,$position2)=@_; - - my $areOnSameExon=undef; - if ( ($position1 >= $feature->start and $position1 <= $feature->end) and ($position2 >= $feature->start and $position2 <= $feature->end) ){ - $areOnSameExon="true"; - } - return $areOnSameExon; -} - -# We do not use the official translate function from the PrimarySeqI object/lib because we want to keep track to the ORF positions too. So we have modified it consequently. -sub translate_JD { - my ($self,@args) = @_; - my ($terminator, $unknown, $frame, $codonTableId, $complete, - $complete_codons, $throw, $codonTable, $orf, $start_codon, $offset); - - ## new API with named parameters, post 1.5.1 - if ($args[0] && $args[0] =~ /^-[A-Z]+/i) { - ($terminator, $unknown, $frame, $codonTableId, $complete, - $complete_codons, $throw,$codonTable, $orf, $start_codon, $offset) = - $self->_rearrange([qw(TERMINATOR - UNKNOWN - FRAME - CODONTABLE_ID - COMPLETE - COMPLETE_CODONS - THROW - CODONTABLE - ORF - START - OFFSET)], @args); - ## old API, 1.5.1 and preceding versions - } else { - ($terminator, $unknown, $frame, $codonTableId, - $complete, $throw, $codonTable, $offset) = @args; - } - - ## Initialize termination codon, unknown codon, codon table id, frame - $terminator = '*' unless (defined($terminator) and $terminator ne ''); - $unknown = "X" unless (defined($unknown) and $unknown ne ''); - $frame = 0 unless (defined($frame) and $frame ne ''); - $codonTableId = 1 unless (defined($codonTableId) and $codonTableId ne ''); - $complete_codons ||= $complete || 0; - - ## Get a CodonTable, error if custom CodonTable is invalid - if ($codonTable) { - $self->throw("Need a Bio::Tools::CodonTable object, not ". $codonTable) - unless $codonTable->isa('Bio::Tools::CodonTable'); - } else { - - # shouldn't this be cached? Seems wasteful to have a new instance - # every time... - $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); - } - - ## Error if alphabet is "protein" - $self->throw("Can't translate an amino acid sequence.") if - ($self->alphabet =~ /protein/i); - - ## Error if -start parameter isn't a valid codon - if ($start_codon) { - $self->throw("Invalid start codon: $start_codon.") if - ( $start_codon !~ /^[A-Z]{3}$/i ); - } - - my $seq; - - if ($offset) { - $self->throw("Offset must be 1, 2, or 3.") if - ( $offset !~ /^[123]$/ ); - my ($start, $end) = ($offset, $self->length); - ($seq) = $self->subseq($start, $end); - } else { - ($seq) = $self->seq(); - } - - ## ignore frame if an ORF is supposed to be found - my $orf_region; - if ( $orf ) { - ($orf_region) = $self->_find_orfs_nucleotide($seq, $codonTable, $start_codon, $orf eq 'longest' ? 0 : 'first_only' ); - $seq = $self->_orf_sequence( $seq, $orf_region ); - } else { - ## use frame, error if frame is not 0, 1 or 2 - $self->throw("Valid values for frame are 0, 1, or 2, not $frame.") - unless ($frame == 0 or $frame == 1 or $frame == 2); - $seq = substr($seq,$frame); - } - - ## Translate it - my $output = $codonTable->translate($seq, $complete_codons); - # Use user-input terminator/unknown - $output =~ s/\*/$terminator/g; - $output =~ s/X/$unknown/g; - - ## Only if we are expecting to translate a complete coding region - if ($complete) { - my $id = $self->display_id; - # remove the terminator character - if( substr($output,-1,1) eq $terminator ) { - chop $output; - } else { - $throw && $self->throw("Seq [$id]: Not using a valid terminator codon!"); - $self->warn("Seq [$id]: Not using a valid terminator codon!"); - } - # test if there are terminator characters inside the protein sequence! - if ($output =~ /\Q$terminator\E/) { - $id ||= ''; - $throw && $self->throw("Seq [$id]: Terminator codon inside CDS!"); - $self->warn("Seq [$id]: Terminator codon inside CDS!"); - } - # if the initiator codon is not ATG, the amino acid needs to be changed to M - if ( substr($output,0,1) ne 'M' ) { - if ($codonTable->is_start_codon(substr($seq, 0, 3)) ) { - $output = 'M'. substr($output,1); - } elsif ($throw) { - $self->throw("Seq [$id]: Not using a valid initiator codon!"); - } else { - $self->warn("Seq [$id]: Not using a valid initiator codon!"); - } - } - } - - my $seqclass; - if ($self->can_call_new()) { - $seqclass = ref($self); - } else { - $seqclass = 'Bio::PrimarySeq'; - $self->_attempt_to_load_Seq(); - } - my $out = $seqclass->new( '-seq' => $output, - '-display_id' => $self->display_id, - '-accession_number' => $self->accession_number, - # is there anything wrong with retaining the - # description? - '-desc' => $self->desc(), - '-alphabet' => 'protein', - '-verbose' => $self->verbose - ); - return $out, $orf_region; -} - -sub concatenate_feature_list{ - - my ($feature_list) = @_; - - my $seq = ""; - my $ExtremStart=1000000000000; - my $ExtremEnd=0; - - foreach my $feature (@$feature_list) { - my $start=$feature->start(); - my $end=$feature->end(); - my $seqid=$feature->seq_id(); - $seq .= $db->seq( $seqid, $start, $end ); - - if ($start < $ExtremStart){ - $ExtremStart=$start; - } - if($end > $ExtremEnd){ - $ExtremEnd=$end; - } - } - return $ExtremStart, $seq, $ExtremEnd; -} - -__END__ - -=head1 NAME - -gff3_fixFusion.pl - -The script take a gff3 file as input. - -The script looks for other ORF in UTRs (UTR3 and UTR5) of each gene model described in the gff file. -Several ouput files will be written if you specify an output. One will contain the gene not modified (intact), -one the gene models fixed. - -=head1 SYNOPSIS - - ./gff3_fixLongestORF.pl -gff=infile.gff --fasta genome.fa [ -o outfile ] - ./gff3_fixLongestORF.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-gff> - -Input GFF3 file that will be read (and sorted) - -=item B<-fa> or B<--fasta> - -Genome fasta file -The name of the fasta file containing the genome to work with. - -=item B<--ct>, B<--codon> or B<--table> - -Codon table to use. 0 By default. - -=item B<-t> or B<--threshold> - -This is the minimum length of new protein predicted that will be taken in account. -By default this value is 100 AA. - -=item B<-s> or B<--stranded> - -By default we predict protein in UTR3 and UTR5 and in both direction. The fusion assumed can be between gene in same direction and in opposite direction. -If RNAseq data used during the annotation was stranded, only fusion of close genes oriented in same direction are expected. In that case this option should be activated. -When activated, we will try to predict protein in UTR3 and UTR5 only in the same orientation than the gene investigated. - -=item B<-v> or B<--verbose> - -Output verbose information. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_fix_longest_ORF.pl b/annotation/Tools/Util/gff/gff3_sp_fix_longest_ORF.pl deleted file mode 100755 index 1b69f2bc5..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_fix_longest_ORF.pl +++ /dev/null @@ -1,1390 +0,0 @@ -#!/usr/bin/env perl - -## if IUPAC: -## We consider a stop only if we are sure it is one -## CDS can contains putative stop codon (but not sure stop one like YAA that can be TAA or CAA). -## We consider a start even if is not sure like AYG that can be ATG or ACG - -##TO DO -## Consider longest ORF wihtout checking start (can be incomplete) <= otpion to check start - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use File::Basename; -use Getopt::Long; -use Statistics::R; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use Bio::DB::Fasta; -use Bio::SeqIO; -use NBIS::GFF3::Omniscient; -use NBIS::Plot::R qw(:Ok); - -my $SIZE_OPT=21; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $model_to_test = undef; -my $file_fasta=undef; -my $split_opt=undef; -my $codonTable=0; -my $verbose = undef; -my $help= 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "fasta|fa|f=s" => \$file_fasta, - "split|s" => \$split_opt, - "table|codon|ct=i" => \$codonTable, - "m|model=s" => \$model_to_test, - "v!" => \$verbose, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) or !(defined($file_fasta)) ){ - pod2usage( { - -message => "$header\nAt least 2 parameter is mandatory:\nInput reference gff file (--gff) and Input fasta file (--fasta)\n\n", - -verbose => 0, - -exitval => 1 } ); -} - -if($codonTable<0 and $codonTable>25){ - print "$codonTable codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} -else{ - print "We will use the codon table ".$codonTable.". If it is not what you want please stop the tool and use the --table option. \n"; -} - -###################### -# Manage output file # - -my $fh_error = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - -my $gffout; my $gffout2; my $gffout3; my $report; #my $gffout4; - -if ($outfile) { - $outfile=~ s/.gff//g; -open(my $fh, '>', $outfile."-intact.gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -open(my $fh2, '>', $outfile."-only_modified.gff") or die "Could not open file '$outfile' $!"; - $gffout2= Bio::Tools::GFF->new(-fh => $fh2, -gff_version => 3 ); -open(my $fh3, '>', $outfile."-all.gff") or die "Could not open file '$outfile' $!"; - $gffout3= Bio::Tools::GFF->new(-fh => $fh3, -gff_version => 3 ); -open($report, '>', $outfile."-report.txt") or die "Could not open file '$outfile' $!"; -#open(my $fh3, '>', $outfile."-pseudogenes.gff") or die "Could not open file '$outfile' $!"; -# $gffout4= Bio::Tools::GFF->new(-fh => $fh3, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - $gffout2 = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); - $gffout3 = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -# $gffout4 = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -my %ListModel; -if(!($model_to_test)){ - $ListModel{1}=0; - $ListModel{2}=0; - $ListModel{3}=0; - $ListModel{4}=0; - $ListModel{5}=0; -}else{ - my @fields= split(',', $model_to_test); - foreach my $field (@fields){ - if($field =~ m/^[012345]$/){ - $ListModel{$field}=0; - }else{ - print "This model $field is not known. Must be an Integer !\n";exit; - } - } -} - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) =slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - - -#################### -# index the genome # -my $db = Bio::DB::Fasta->new($file_fasta); -print ("Genome fasta parsed\n"); - -#################### -my $pseudo_threshold=70; -#counters -my $counter_case21=0; -my $geneCounter=0; -my $mRNACounter=0; -my $mRNACounter_fixed=0; -#my $mrna_pseudo_suspected=0; -#my $gene_pseudo_suspected=0; -#my $mrna_pseudo_removed=0; -#my $gene_pseudo_removed=0; -my $special_or_partial_mRNA=0; - -my %omniscient_modified_gene; -#my %omniscient_pseudogene; -my @modified_gene_list; -my @intact_gene_list; - -foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id_tag_key (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - my $gene_feature=$hash_omniscient->{'level1'}{$primary_tag_key_level1}{$gene_id_tag_key}; - - my $one_ORFmodified="no"; - #my $mrna_pseudo=0; - #my @list_mrna_pseudo; - my $number_mrna=0; - - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_key_level2, $gene_id_tag_key) ) ){ - foreach my $level2_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id_tag_key}}) { - - my $ORFmodified="no"; - $number_mrna=$#{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id_tag_key}}+1; - - # get level2 id - my $id_level2 = lc($level2_feature->_tag_value('ID')); - - ############################## - #If it's a mRNA = have CDS. # - if ( exists ($hash_omniscient->{'level3'}{'cds'}{$id_level2} ) ){ - - ############## - # Manage CDS # - my @cds_feature_list = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$id_level2}}; # be sure that list is sorted - my ($cdsExtremStart, $cds_dna_seq, $cdsExtremEnd) = concatenate_feature_list(\@cds_feature_list); - #create the cds object - my $cds_obj = Bio::Seq->new(-seq => $cds_dna_seq, -alphabet => 'dna' ); - #Reverse the object depending on strand - if ($level2_feature->strand == -1 or $level2_feature->strand eq "-"){ - $cds_obj = $cds_obj->revcom(); - } - #translate cds in protein - my $original_prot_obj = $cds_obj->translate(-codontable_id => $codonTable) ; #codontable_id by default=0 strict M as start codon. IUPAC => STOP codon even if not sure ... - my $cds_prot=$original_prot_obj->seq; - #print $original_prot_obj->seq."\n"; - my $originalProt_size=length($cds_prot); - - ################################################ - # mRNA: extract the concatenated exon sequence # - my @exons_features = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$id_level2}}; - my ($exonExtremStart, $mrna_seq, $exonExtremEnd) = concatenate_feature_list(\@exons_features); - #create the mrna object - my $mrna_obj = Bio::Seq->new(-seq => $mrna_seq, -alphabet => 'dna' ); - - #Reverse complement according to strand - if ($level2_feature->strand == -1 or $level2_feature->strand eq "-"){ - $mrna_obj = $mrna_obj->revcom(); - } - - ####################### - # Get the longest ORF ## record ORF = start, end (half-open), length, and frame - my ($longest_ORF_prot_obj, $orf_cds_region) = translate_JD($mrna_obj, - -orf => 'longest', - -codontable_id => $codonTable); - # print Dumper($orf_cds_region)."\n"; - # set real start and stop to orf - my $realORFstart; - my $realORFend; - # change the start for negative strand - if ($level2_feature->strand == -1 or $level2_feature->strand eq "-"){ - $orf_cds_region->[0]=(length($mrna_seq) - $orf_cds_region->[1]); - } - #calcul the real start end stop of cds in genome - # print Dumper($orf_cds_region)."\n".$mrna_obj->seq."\n"; - ($realORFstart, $realORFend) = calcul_real_orf_end_and_start($orf_cds_region, \@exons_features); - # print "$id_level2 $realORFstart $realORFend\n"; - #save the real start and stop - $orf_cds_region->[0]=$realORFstart; - $orf_cds_region->[1]=$realORFend; - - ############# - # Tests # - ############# - - ######################## - # prediction is longer # - print $id_level2." - size before: ".$originalProt_size." size after: ".$longest_ORF_prot_obj->length()."\n" if $verbose; - #print $original_prot_obj->seq."\n"; - if($longest_ORF_prot_obj->length() > $originalProt_size){ - - #Model1 ############################################### - # sequence original is part of new prediction # - if (index($longest_ORF_prot_obj->seq,$cds_prot) != -1){ - if ( exists($ListModel{1}) ){ - if(!(($longest_ORF_prot_obj->seq =~ m/^X/) and ($longest_ORF_prot_obj->length() < $originalProt_size+$SIZE_OPT))){ #avoid case of ambigous methionine (Written X) -> Need to be over 21 AA to decide ok is longer and can be a M - - $ListModel{1}++; print "Model 1: gene=$gene_id_tag_key mRNA=$id_level2\n" if ($verbose); - print "original:$cds_prot\nnew:". $longest_ORF_prot_obj->seq."\n" if $verbose; - modify_gene_model($hash_omniscient, \%omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, \@exons_features, \@cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, 'model1', $gffout); - $ORFmodified="yes"; - - } - } - } - ################################################# - # protein original and predicted are different - else{ - - ######################################### - #Model2 # Prediction don't overlap original CDS # - if( ($realORFend < $cdsExtremStart) or ($realORFstart > $cdsExtremEnd) ){ - my $model; - - if( exists($ListModel{2}) ){ - $ListModel{2}++; print "Model 2: gene=$gene_id_tag_key mRNA=$id_level2\n" if ($verbose); - $model=1; - if($split_opt){ - split_gene_model(\@intact_gene_list, $hash_omniscient, \%omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, \@exons_features, \@cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, 'model2', $gffout); - } - else{ - modify_gene_model($hash_omniscient, \%omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, \@exons_features, \@cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, 'model2', $gffout); - } - $ORFmodified="yes"; - } - } # End they don't overlap - - ######################################### - # Prediction Overlap original CDS # - else { # They overlap - #Model3 ############### - # original protein and predicted one are different; the predicted one is longest, they overlap each other. - if( exists($ListModel{3}) ){ - $ListModel{3}++; print "Model 3: gene=$gene_id_tag_key mRNA=$id_level2\n" if ($verbose); - - modify_gene_model($hash_omniscient, \%omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, \@exons_features, \@cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, 'model3', $gffout); - $ORFmodified="yes"; - } - } - } - }# End prediction longer - - ########################### - # The real ORF looks to be shorter than the one originaly described ! Selenocysteine ? pseudogene ? Or just case where prediction begin by L instead of M (correct !) or begin by XXXXXX - elsif($longest_ORF_prot_obj->length() < $originalProt_size){ - - #Model4 ############### - # /!\ Here we compare the CDS traduction (traduct in IUPAC) against longest CDS in mRNA IUPAC modified to take in account for stops codon only those that are trustable only (TGA, TAR...). - if( exists($ListModel{4}) ){ - $ListModel{4}++; print "Model 4: gene=$gene_id_tag_key mRNA=$id_level2\n" if ($verbose); - - print "Original: ".$original_prot_obj->seq."\n" if $verbose; - print "longestl: ".$longest_ORF_prot_obj->seq."\n" if $verbose; - # contains stop codon but not at the last position - if( (index($original_prot_obj->seq, '*') != -1 ) and (index($original_prot_obj->seq, '*') != length($original_prot_obj->seq)-1) ){ - print "Original sequence contains premature stop codon.\n"; - ## Pseudogene THRESHOLD ## - # my $threshold_size=(length($original_prot_obj->seq)*$pseudo_threshold)/100; #70% of the original size - # if(length($longest_ORF_prot_obj->seq) < $threshold_size){ # inferior to threshold choosen, we suspect it to be a pseudogene - #print Dumper($original_prot_obj); - #print Dumper($longest_ORF_prot_obj); - # $mrna_pseudo++; - # push(@list_mrna_pseudo, $id_level2); - # } - # else{ - #remodelate a shorter gene - modify_gene_model($hash_omniscient, \%omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, \@exons_features, \@cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, 'model4', $gffout); - $ORFmodified="yes"; - # } - }# Doesn't contain stop in the middle of the sequence - else{$special_or_partial_mRNA++;} - } - } - ########################### - # The real ORF is same size but check if +1 or +2 bp shit that give same number of AA but give frame shifts - elsif( (index($original_prot_obj->seq, '*') != -1 ) and (index($original_prot_obj->seq, '*') != length($original_prot_obj->seq)-1) ){ - if( exists($ListModel{5}) ){ - $ListModel{5}++; print "Model 5: gene=$gene_id_tag_key mRNA=$id_level2\n" if ($verbose); - print "my CDS was containing stop codon => model5\n" if $verbose; - modify_gene_model($hash_omniscient, \%omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, \@exons_features, \@cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, 'model3', $gffout); - $ORFmodified="yes"; - } - } - - - } # End there is a CDS - if($ORFmodified eq "yes"){ - $one_ORFmodified="yes"; - $mRNACounter_fixed++; # Count only mRNA modified - } - } # End foreach mRNA - } - # if($mrna_pseudo > 0){ - # all mRNA are pseudogene, we change the gene status to pseudogenes. - # if($mrna_pseudo == $number_mrna){ - # $mrna_pseudo_suspected=$mrna_pseudo_suspected+$number_mrna; - # $gene_pseudo_suspected++; - # $gene_feature->primary_tag('pseudogene'); - #transfert the gene and sub-feature to the omniscient_pseudogene hash - # my @level1_list=($gene_id_tag_key); - # fill_omniscient_from_other_omniscient_level1_id(\@level1_list, $hash_omniscient, \%omniscient_pseudogene); # If already exists in omniscient_modified_gene, it will be replaced by the modified one - # } - #only some of the isoform are pseudo... we remove them - # else{ - # $mrna_pseudo_removed=$mrna_pseudo_removed+$mrna_pseudo; - # $gene_pseudo_removed++; - # my @tag_list=('all'); - # my @id_list=($gene_id_tag_key); - # remove_element_from_omniscient(\@id_list, \@list_mrna_pseudo, $hash_omniscient, 'level2', 'false', \@tag_list); - # remove_tuple_from_omniscient(\@list_mrna_pseudo, $hash_omniscient, 'level3', 'false', \@tag_list); - # remove_element_from_omniscient(\@id_list, \@list_mrna_pseudo, \%omniscient_modified_gene, 'level2', 'false', \@tag_list); - # remove_tuple_from_omniscient(\@list_mrna_pseudo, \%omniscient_modified_gene, 'level3', 'false', \@tag_list); - # print "@list_mrna_pseudo has been removed because are isoform containing stop codon\n"; - # } - # } - } - - if($one_ORFmodified eq "yes"){ - $geneCounter++; - $mRNACounter=$mRNACounter+$number_mrna; #add all the mRNA if at least one modified - #save remodelate gene name - push(@modified_gene_list, $gene_id_tag_key); - } - else{push(@intact_gene_list, $gene_id_tag_key);} - - } -} - -########### -# Fix frame -fil_cds_frame(\%omniscient_modified_gene, $db, $codonTable); -#fil_cds_frame(\%omniscient_pseudogene); -fil_cds_frame($hash_omniscient, $db, $codonTable); - -#Clean omniscient_modified_gene of duplicated/identical genes and isoforms -print "removing duplicates\n" if $verbose; -_check_overlap_name_diff(\%omniscient_modified_gene, undef, $verbose); -_check_identical_isoforms(\%omniscient_modified_gene, undef, $verbose); - -######## -# Print results -print "print intact...\n"; -print_omniscient_from_level1_id_list($hash_omniscient, \@intact_gene_list, $gffout); #print intact gene to the file - -print "print modified...\n"; -print_omniscient(\%omniscient_modified_gene, $gffout2); #print gene modified in file - -# create a hash containing everything -print "print all with name of overlaping features resolved...\n"; -my $hash_all = subsample_omniscient_from_level1_id_list($hash_omniscient, \@intact_gene_list); -merge_omniscients( $hash_all, \%omniscient_modified_gene); -_check_overlap_name_diff($hash_all, undef, $verbose); -_check_identical_isoforms($hash_all, undef, $verbose); -print_omniscient($hash_all, $gffout3); #print gene modified in file - -#print_omniscient(\%omniscient_pseudogene, $gffout4); #print putative pseudogene in file - -#END -my $string_to_print="usage: $0 @copyARGV\nCodon table used:".$codonTable."\n"; -$string_to_print .="Results:\n"; -$string_to_print .= "$geneCounter genes has been modified. These gene has $mRNACounter mRNA, and among them $mRNACounter_fixed had their ORF fixed.\n"; -if (exists ($ListModel{1})){ - $string_to_print .= "$ListModel{1} model1: Prediction(s) contains the orignal prediction but is longer.\n"; -} -if (exists ($ListModel{2})){ - $string_to_print .= "$ListModel{2} model2: Longest ORF found non-overlaping the original one."; - if ($split_opt){ - $string_to_print .= " Thus, sequences have been split en two different genes (Consequently $ListModel{2} new genes has been created"; - } - $string_to_print .= "\n"; -} -if (exists ($ListModel{3})){ - $string_to_print .= "$ListModel{3} model3: sequences have been re-shaped/re-modeled (Longest ORF found overlaping the original one but doesn't contain it.)\n"; -} -if (exists ($ListModel{4})){ - my $withStop=$ListModel{4}-$special_or_partial_mRNA; - #my $withStop_butstillgene=$ListModel{4}-($mrna_pseudo_suspected+$mrna_pseudo_removed)-$special_or_partial_mRNA; - $string_to_print .="$ListModel{4} model4: The new prediction was shorter than the original:\n". - "Among them, $withStop are shorter due to the presence of stop codon. We have remodelated them (to avoid it do not select model4)". - "Among them, $special_or_partial_mRNA were partials (begining or finishing by NNNN or XXXX). The prediction is probably shorter because use an earlier start codon.\n". - - # " The threshold to declare them as a pseudogene (comparing to the original size) is $pseudo_threshold percent.\n". - # "According to this threshold, we change the gene status (primary_tag) of $gene_pseudo_suspected genes (corresponding to $mrna_pseudo_suspected mRNA) to pseudogene.\n". - # "According to this threshold, we suspect $gene_pseudo_suspected genes to be pseudogenes (corresponding to $mrna_pseudo_suspected mRNA). So they habe been reported in a secpific output file.\n". - # "$withStop_butstillgene mRNA(s) containing stop but over this treshold has been re-modelate.\n". - " They have been remodeleted.\n"; - - # "Moreover, $mrna_pseudo_removed putative pseudo mRNA isoforms have been removed because the gene has as well non-pseudo mRNA.\n"; -} -if (exists ($ListModel{5})){ - $string_to_print .= "$ListModel{5} model5: The prediction contained stop codons. \n"; -} - -$string_to_print .="\n/!\\Remind:\n L and M are AA are possible start codons for standard codon table.\nParticular case: If we have a triplet as WTG, AYG, RTG, RTR or ATK it will be seen as a possible Methionine codon start (it's a X aa)\n"; -#"An arbitrary choisce has been done: The longer translate can begin by a L only if it's longer by 21 AA than the longer translate beginning by M. It's happened $counter_case21 times here.\n"; - -print $string_to_print; -if($outfile){ - print $report $string_to_print -} -print "Bye Bye.\n"; - - -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## -sub modify_gene_model{ - - my ($hash_omniscient, $omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, $exons_features, $cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, $model, $gffout)=@_; - - ############################################### - # create CDS for new prediction # - my ($new_pred_utr5_list, $new_pred_cds_list, $new_pred_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($exons_features, $realORFstart, $realORFend); - - ######### - #RE-SHAPE last/first exon if less than 3 nucleotides (1 or 2 must be romved) when the CDS finish 1 or 2 nuclotide before... because cannot be defined as UTR - shape_exon_extremity($exons_features, $new_pred_cds_list); - - # Create UTR - my $variable_not_needed; - ($new_pred_utr5_list, $variable_not_needed, $new_pred_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($exons_features, $realORFstart, $realORFend); - - ############################################################# - # Remove ancient cds - my @tag_list=('exon'); - my @id_list=($id_level2); - remove_tuple_from_omniscient(\@id_list, $hash_omniscient, 'level3', 'false', \@tag_list); - - #################### - # Add new CDS/UTRs - foreach my $cds_feature (@$new_pred_cds_list){ - push (@{$hash_omniscient->{'level3'}{'cds'}{$id_level2}}, $cds_feature); - } - foreach my $utr5_feature (@$new_pred_utr5_list){ - push (@{$hash_omniscient->{'level3'}{'five_prime_utr'}{$id_level2}}, $utr5_feature); - } - foreach my $utr3_feature (@$new_pred_utr3_list){ - push (@{$hash_omniscient->{'level3'}{'three_prime_utr'}{$id_level2}}, $utr3_feature); - } - $level2_feature->add_tag_value('orfix', $model); - - check_start_end_of_mrna_feature($level2_feature, $exons_features); - check_start_end_of_gene_feature($hash_omniscient, $gene_id_tag_key); - - #transfert the gene and sub-feature to the omniscient_modified_gene hash - my @level1_list=($gene_id_tag_key); - fill_omniscient_from_other_omniscient_level1_id(\@level1_list, $hash_omniscient, $omniscient_modified_gene); # If already exists in omniscient_modified_gene, it will be replaced by the modified one - -} -############ /!\ -# P.S: To be perfect, when a gene is newly created, we should verify if it is not created where another one has already been created. If yes, the should be linked together !! -############ -sub split_gene_model{ - - my ($intact_gene_list, $hash_omniscient, $omniscient_modified_gene, $gene_feature, $gene_id_tag_key, $level2_feature, $id_level2, $exons_features, $cds_feature_list, $cdsExtremStart, $cdsExtremEnd, $realORFstart, $realORFend, $model, $gffout)=@_; - - my $numberOfNewGene=1; - - my @values=$gene_feature->get_tag_values('ID'); - my $realGeneName=shift(@values); - - ###################### - # Recreate exon list # - my $bolean_original_is_first; - my $first_end; - my $second_start; - #if new prediction after on the sequence - if($realORFstart >= $cdsExtremEnd){ - $bolean_original_is_first="true"; - $first_end=$cdsExtremEnd; - $second_start=$realORFstart; - } - else{ # ($realORFend < $cdsExtremStart) - $bolean_original_is_first="false"; - $first_end=$realORFend; - $second_start=$cdsExtremStart; - } - my ($newOrignal_exon_list, $newPred_exon_list) = create_two_exon_lists($exons_features,$first_end,$second_start,$bolean_original_is_first); - - #################################### - # Remodelate ancient gene - #################################### - - ############################################################# - # Remove all level3 feature execept cds - my @tag_list=('cds'); - my @id_list=($id_level2); - remove_tuple_from_omniscient(\@id_list, $hash_omniscient, 'level3', 'false', \@tag_list); - ############# - # Recreate original exon - @{$hash_omniscient->{'level3'}{'exon'}{$id_level2}}=@$newOrignal_exon_list; - - ######### - #RE-SHAPE last/first exon if less than 3 nucleotides (1 or 2 must be romved) when the CDS finish 1 or 2 nuclotide before... because cannot be defined as UTR - shape_exon_extremity($newOrignal_exon_list,$cds_feature_list); - - ######## - # calcul utr - my ($original_utr5_list, $variable_not_needed, $original_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($newOrignal_exon_list, $cdsExtremStart, $cdsExtremEnd); - - ######### - #RE-SHAPE mrna extremities - check_start_end_of_mrna_feature($level2_feature, $newOrignal_exon_list); - $level2_feature->add_tag_value('orfix',$model); - ######### - #RE-SHAPE gene model - if (must_be_a_new_gene($hash_omniscient, $gene_id_tag_key, $id_level2, $level2_feature)){ - ## create a new gene - my $new_gene_id="new_".$realGeneName."-".$numberOfNewGene; - $numberOfNewGene++; - my $new_gene_feature = Bio::SeqFeature::Generic->new(-seq_id => $level2_feature->seq_id, -source_tag => $level2_feature->source_tag, -primary_tag => 'gene' , -start => $level2_feature->start, -end => $level2_feature->end, -frame => $level2_feature->frame, -strand => $level2_feature->strand , -tag => { 'ID' => $new_gene_id }) ; - create_or_replace_tag($level2_feature,'Parent',$new_gene_id); - - # append new gene in omniscient_modified_gene - my @level1_list=($new_gene_feature); - my @level2_list=($level2_feature); - my @level3_list=(@$newOrignal_exon_list, @$cds_feature_list, @$original_utr5_list, @$original_utr3_list); - append_omniscient($omniscient_modified_gene, \@level1_list, \@level2_list, \@level3_list); - } - else{ # keep the original gene model that we modified - # check shape of original gene - check_start_end_of_gene_feature($hash_omniscient, $gene_id_tag_key); - - #include UTRS - if ( @$original_utr5_list ){ - $hash_omniscient->{'level3'}{$original_utr5_list->[0]->primary_tag()}{$id_level2}=[@$original_utr5_list]; - } - if ( @$original_utr3_list ){ - $hash_omniscient->{'level3'}{$original_utr3_list->[0]->primary_tag()}{$id_level2}=[@$original_utr3_list]; - } - - # append gene modified in omniscient_modified_gene - my @level1_list=($gene_id_tag_key); - fill_omniscient_from_other_omniscient_level1_id(\@level1_list, $hash_omniscient, $omniscient_modified_gene); # If already exists in omniscient_modified_gene, it will be replaced by the modified one - } - - ################################### - # Remodelate New Prediction - ################################### - - ############################################### - # Create CDS # - my ($new_pred_utr5_list, $new_pred_cds_list, $new_pred_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($newPred_exon_list, $realORFstart, $realORFend); - - #################################### - #RE-SHAPE last/first exon if less than 3 nucleotides (1 or 2 must be romved) when the CDS finish 1 or 2 nuclotide before... because cannot be defined as UTR - shape_exon_extremity($newPred_exon_list, $new_pred_cds_list); - - #create UTR - ($new_pred_utr5_list, $variable_not_needed, $new_pred_utr3_list) = modelate_utr_and_cds_features_from_exon_features_and_cds_start_stop($newPred_exon_list, $realORFstart, $realORFend); - - ###################################################### - # Modelate gene and mRNA features for new prediction # - @values = $newPred_exon_list->[0]->get_tag_values('Parent'); - my $transcript_id = shift @values; - my $new_mRNA_feature = Bio::SeqFeature::Generic->new(-seq_id => $newPred_exon_list->[0]->seq_id, -source_tag => $newPred_exon_list->[0]->source_tag, -primary_tag => 'mRNA' , -start => $newPred_exon_list->[0]->start, -end => $newPred_exon_list->[$#{$newPred_exon_list}]->end, -frame => $newPred_exon_list->[0]->frame, -strand => $newPred_exon_list->[0]->strand , -tag => { 'ID' => $transcript_id , 'Parent' => $realGeneName }) ; - - my @level1_list; - my @level2_list; - my @level3_list; - - #$numberOfNewGene == 1 mean we already use the gene container. So in the case where we have oly one mRNA, the split will give 2 mRNA. One is linked to the original gene container (done before) - # The second must be linked to a new gene container. So, even if must_be_a_new_gene method say no, we must create it because the original one has been already used. - my $create_a_new_gene=must_be_a_new_gene($hash_omniscient, $gene_id_tag_key, $transcript_id, $new_mRNA_feature); - if ( ($#{$hash_omniscient->{'level2'}{'mrna'}{$gene_id_tag_key}} == 0) and $numberOfNewGene == 1){ $create_a_new_gene="true";} - if ( $create_a_new_gene ){ - my $new_gene_id="new_".$realGeneName."-".$numberOfNewGene; - create_or_replace_tag($new_mRNA_feature, 'Parent', $new_gene_id); - my $new_gene_feature = Bio::SeqFeature::Generic->new(-seq_id => $newPred_exon_list->[0]->seq_id, -source_tag => $newPred_exon_list->[0]->source_tag, -primary_tag => 'gene' , -start => $newPred_exon_list->[0]->start, -end => $newPred_exon_list->[$#{$newPred_exon_list}]->end, -frame => $newPred_exon_list->[0]->frame, -strand => $newPred_exon_list->[0]->strand , -tag => { 'ID' => $new_gene_id } , 'orfix' => $model) ; - @level1_list=($new_gene_feature); - @level2_list=($new_mRNA_feature); - } - else{ #the new mRNA still overlap an isoform. So we keep the link with the original gene - # append new gene in omniscient_modified_gene - check_start_end_of_gene_feature($hash_omniscient, $gene_id_tag_key); - @level1_list=($gene_feature); - @level2_list=($new_mRNA_feature); - } - @level3_list=(@$newPred_exon_list, @$new_pred_cds_list, @$new_pred_utr5_list, @$new_pred_utr3_list); - append_omniscient($omniscient_modified_gene, \@level1_list, \@level2_list, \@level3_list); # If already exists , no replacement - - if ($numberOfNewGene > 1){ - #remove the mRNA from original omnicient (because the two mRNAs form the splited one are no linked to the original gene - # but are now linked to newly created gene features). The same for all linked level 3 features, so we remove them. - my @tag_list=('all'); - my @id_list=($id_level2); - - remove_tuple_from_omniscient(\@id_list, $hash_omniscient, 'level3', 'false', \@tag_list); - @id_list=($gene_id_tag_key);my @id_list2=($id_level2); - remove_element_from_omniscient(\@id_list, \@id_list2, $hash_omniscient, 'level2', 'false', \@tag_list); - #reshape end and start - check_start_end_of_gene_feature($hash_omniscient, $gene_id_tag_key); - push(@{$intact_gene_list}, $gene_id_tag_key); - } -} - -# Yes if mRNA doesnt overlap an other existing isoform -sub must_be_a_new_gene{ - my ($hash_omniscient, $gene_id, $id_level2, $level2_feature)=@_; - - my $result="true"; - my @list_mrna=@{$hash_omniscient->{'level2'}{'mrna'}{$gene_id}}; - - if($#list_mrna > 0){ #more than only one mrna - foreach my $mrna (@list_mrna){ - # get level2 id - my @values = $mrna->get_tag_values('ID'); - my $mrna_id = lc(shift @values); - if(lc($id_level2) ne lc($mrna_id)){ # we dont check mrna against itself - #Now check if overlap - if( ($level2_feature->start <= $mrna->end) and ($level2_feature->end >= $mrna->start) ){ # if it overlaps - $result=undef;last; - } - } - } - } - else{$result=undef;} #only one mRNA - - return $result; -} - -sub shape_exon_extremity{ - #exon_features is a sorted list - #cds_features is a sorted list - - my ($exon_features,$cds_features)=@_; - - #test between first exon and first cds - if( (abs($cds_features->[0]->start - $exon_features->[0]->start) < 3) and (abs($cds_features->[0]->start - $exon_features->[0]->start) > 0) ){ #We have to shape the exon start. We don't want a non multiple of 3 inferior to 3 - - $exon_features->[0]->start($cds_features->[0]->start); -# print "start reshaped\n"; - } - #test between last exon and last cds - if(abs($exon_features->[$#{ $exon_features }]->end - $cds_features->[$#{ $cds_features }]->end ) < 3){ #We have to shape the exon end - $exon_features->[$#{ $exon_features }]->end($cds_features->[$#{ $cds_features }]->end); -# print "end reshaped\n"; - } -} - -sub calcul_real_orf_end_and_start{ - #exons_features is sorted - my ($orf_cds_region, $exons_features)=@_; - - my $realORFstart; - my $realORFend; - - my $orf_start=$orf_cds_region->[0]; # get start to begin - my $orf_length=$orf_cds_region->[2]; # get start to begin - - my $first="yes"; - my $total_exon_length=0; - my $total_exon_length_previous_round=0; - my $mapped_length=0; - my $mapped_length_total=0; - my $the_rest_to_map=0; - - foreach my $exon_feature (@$exons_features){ - # Allows to follow the path on mRNA - my $exon_length=($exon_feature->end - $exon_feature->start)+1; - $total_exon_length_previous_round=$total_exon_length; - $total_exon_length=$total_exon_length+$exon_length; - # Allows to follow the path on the CDS - $mapped_length_total=$mapped_length_total+$mapped_length; - $the_rest_to_map=$orf_length-$mapped_length_total; - # exon overlap CDS - if($total_exon_length >= $orf_start){ #they begin to overlap - if($first eq "yes"){ - # $realORFstart=$exon_feature->start+($orf_start - 1); - $realORFstart=$exon_feature->start+($orf_start - $total_exon_length_previous_round ); - my $end_part_of_exon=$exon_feature->start- $realORFstart + 1; - if($end_part_of_exon >= $orf_length){ #exon ============================================ - $realORFend=$realORFstart+$orf_length-1; #cds ========================= - last; - } - $mapped_length=$exon_feature->end - $realORFstart + 1; - $first="no"; - } - else{ - $mapped_length=$exon_feature->end - $exon_feature->start + 1; - } - } - #exon are over the end of cds => we finish at this round - if($total_exon_length >= ($orf_start+$orf_length) ){ #exon ============================================ - if($realORFstart > $exon_feature->start){ #cds ========================= - $realORFend=$realORFstart+$the_rest_to_map - 1 ; - last; - }else{ - $realORFend=$exon_feature->start + $the_rest_to_map - 1 ; - last; - } - } - } -return $realORFstart, $realORFend; -} - -# Check the start and end of mRNA and gene feature; -sub check_start_end_of_mrna_feature{ - - my ($mRNA_feature, $exon_list)=@_; - - ###### - #Modify mRNA start-end based on exon features - my $exonStart=$exon_list->[0]->start; - my $exonEnd=$exon_list->[$#{$exon_list}]->end; - if ($mRNA_feature->start != $exonStart){ - $mRNA_feature->start($exonStart); - } - elsif($mRNA_feature->end != $exonEnd){ - $mRNA_feature->end($exonEnd); - } -} - -# Check the start and end of gene feature based on its mRNA; -sub check_start_end_of_gene_feature{ - - my ($hash_omniscient, $gene_id)=@_; - - ##### - #Modify gene start-end (have to check size of each mRNA) - my $geneExtremStart=1000000000000; - my $geneExtremEnd=0; - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - foreach my $mrna_feature ( @{$hash_omniscient->{'level2'}{'mrna'}{$gene_id}}) { - my $start=$mrna_feature->start(); - my $end=$mrna_feature->end(); - - if ($start < $geneExtremStart){ - $geneExtremStart=$start; - } - if($end > $geneExtremEnd){ - $geneExtremEnd=$end; - } - } - } - my $gene_feature=$hash_omniscient->{'level1'}{'gene'}{$gene_id}; - if ($gene_feature->start != $geneExtremStart){ - $gene_feature->start($geneExtremStart); - } - elsif($gene_feature->end != $geneExtremEnd){ - $gene_feature->end($geneExtremEnd); - } -} - - -# The exons containing the original cds keep their parent names. The exon containing the new cds will have a new parent name. -sub create_two_exon_lists { - # orignalFirst == true if original gene is first on the prediction - my ($exons_features,$firstEnd, $secondStart, $orignalFirst)=@_; - my @list_exon_originalPred; - my @list_exon_newPred; - - foreach my $exon_feature (@$exons_features){ #for each exon -# print "start:".$exon_feature->start." end:".$exon_feature->end."\n"; - if(two_positions_on_feature($exon_feature,$firstEnd,$secondStart)){ # We have to split the exon_feature P.S: We will loss sequence between the two positions -# print "both on feature\n"; - my $duplicated_exon_feature=clone($exon_feature);#create a copy of the feature - #manage original exon - $exon_feature->end($firstEnd); - $duplicated_exon_feature->start($secondStart); - - if($orignalFirst eq "true"){ - push( @list_exon_originalPred, $exon_feature); - - my @values = $duplicated_exon_feature->get_tag_values('ID'); - my $value = $values[0]; - create_or_replace_tag($duplicated_exon_feature,'ID', 'new_'.$value); - @values = $duplicated_exon_feature->get_tag_values('Parent'); - $value = $values[0]; - create_or_replace_tag($duplicated_exon_feature,'Parent', 'new_'.$value); - push( @list_exon_newPred, $duplicated_exon_feature); - next; - }else{ #original pred after - push( @list_exon_originalPred, $duplicated_exon_feature); - - my @values = $exon_feature->get_tag_values('ID'); - my $value = $values[0]; - create_or_replace_tag($exon_feature,'ID', 'new_'.$value); - @values = $exon_feature->get_tag_values('Parent'); - $value = $values[0]; - create_or_replace_tag($exon_feature,'Parent', 'new_'.$value); - push( @list_exon_newPred, $exon_feature); - next; - } - } - if(! (($exon_feature->end <= $secondStart) and ($exon_feature->start >= $firstEnd))){ # We remove it because exon between CDSs - if ($exon_feature->end <= $secondStart) { - if ($orignalFirst eq "true"){ - push( @list_exon_originalPred, $exon_feature); - }else{ - my $duplicated_exon_feature=clone($exon_feature);#create a copy of the feature - my @values = $duplicated_exon_feature->get_tag_values('ID'); - my $value = $values[0]; - create_or_replace_tag($duplicated_exon_feature,'ID', 'new_'.$value); - @values = $duplicated_exon_feature->get_tag_values('Parent'); - $value = $values[0]; - create_or_replace_tag($duplicated_exon_feature,'Parent', 'new_'.$value); - push( @list_exon_newPred, $duplicated_exon_feature); - } - } - if ($exon_feature->start >= $firstEnd) { - if($orignalFirst eq "true"){ - my $duplicated_exon_feature=clone($exon_feature);#create a copy of the feature - my @values = $duplicated_exon_feature->get_tag_values('ID'); - my $value = $values[0]; - create_or_replace_tag($duplicated_exon_feature,'ID', 'new_'.$value); - @values = $duplicated_exon_feature->get_tag_values('Parent'); - $value = $values[0]; - create_or_replace_tag($duplicated_exon_feature,'Parent', 'new_'.$value); - push( @list_exon_newPred, $duplicated_exon_feature); - }else{ - push( @list_exon_originalPred, $exon_feature); - } - } - } - } - my @list_exon_originalPred_sorted = sort {$a->start <=> $b->start} @list_exon_originalPred; - my @list_exon_newPred_sorted = sort {$a->start <=> $b->start} @list_exon_newPred; - - return \@list_exon_originalPred_sorted, \@list_exon_newPred_sorted; -} - -sub position_on_feature { - - my ($feature,$position)=@_; - - my $isOnSameExon=undef; - if ( ($position >= $feature->start and $position <= $feature->end)){ - $isOnSameExon="true"; - } - return $isOnSameExon; -} - -sub two_positions_on_feature { - - my ($feature,$position1,$position2)=@_; - - my $areOnSameExon=undef; - if ( ($position1 >= $feature->start and $position1 <= $feature->end) and ($position2 >= $feature->start and $position2 <= $feature->end) ){ - $areOnSameExon="true"; - } - return $areOnSameExon; -} - -sub translate_JD { - my ($self,@args) = @_; - my ($terminator, $unknown, $frame, $codonTableId, $complete, - $complete_codons, $throw, $codonTable, $orf, $start_codon, $no_start_by_aa, $offset); - - ## new API with named parameters, post 1.5.1 - if ($args[0] && $args[0] =~ /^-[A-Z]+/i) { - ($terminator, $unknown, $frame, $codonTableId, $complete, - $complete_codons, $throw,$codonTable, $orf, $start_codon, $no_start_by_aa, $offset) = - $self->_rearrange([qw(TERMINATOR - UNKNOWN - FRAME - CODONTABLE_ID - COMPLETE - COMPLETE_CODONS - THROW - CODONTABLE - ORF - START - NOSTARTBYAA - OFFSET)], @args); - ## old API, 1.5.1 and preceding versions - } else { - ($terminator, $unknown, $frame, $codonTableId, - $complete, $throw, $codonTable, $offset) = @args; - } - - ## Initialize termination codon, unknown codon, codon table id, frame - $terminator = '*' unless (defined($terminator) and $terminator ne ''); - $unknown = "X" unless (defined($unknown) and $unknown ne ''); - $frame = 0 unless (defined($frame) and $frame ne ''); - $codonTableId = 1 unless (defined($codonTableId) and $codonTableId ne ''); - $complete_codons ||= $complete || 0; - - ## Get a CodonTable, error if custom CodonTable is invalid - if ($codonTable) { - $self->throw("Need a Bio::Tools::CodonTable object, not ". $codonTable) - unless $codonTable->isa('Bio::Tools::CodonTable'); - } else { - - # shouldn't this be cached? Seems wasteful to have a new instance - # every time... - $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); - } - - ## Error if alphabet is "protein" - $self->throw("Can't translate an amino acid sequence.") if - ($self->alphabet =~ /protein/i); - - ## Error if -start parameter isn't a valid codon - if ($start_codon) { - $self->throw("Invalid start codon: $start_codon.") if - ( $start_codon !~ /^[A-Z]{3}$/i ); - } - - my $seq; - - if ($offset) { - $self->throw("Offset must be 1, 2, or 3.") if - ( $offset !~ /^[123]$/ ); - my ($start, $end) = ($offset, $self->length); - ($seq) = $self->subseq($start, $end); - } else { - ($seq) = $self->seq(); - } - - ## ignore frame if an ORF is supposed to be found - my $orf_region; - if ( $orf ) { - ($orf_region) = _find_orfs_nucleotide_JD( $self, $seq, $codonTable, $start_codon, $no_start_by_aa, $orf eq 'longest' ? 0 : 'first_only' ); - $seq = $self->_orf_sequence( $seq, $orf_region ); - } else { - ## use frame, error if frame is not 0, 1 or 2 - $self->throw("Valid values for frame are 0, 1, or 2, not $frame.") - unless ($frame == 0 or $frame == 1 or $frame == 2); - $seq = substr($seq,$frame); - } - - ## Translate it - my $output = $codonTable->translate($seq, $complete_codons); - # Use user-input terminator/unknown - $output =~ s/\*/$terminator/g; - $output =~ s/X/$unknown/g; - - ## Only if we are expecting to translate a complete coding region - if ($complete) { - my $id = $self->display_id; - # remove the terminator character - if( substr($output,-1,1) eq $terminator ) { - chop $output; - } else { - $throw && $self->throw("Seq [$id]: Not using a valid terminator codon!"); - $self->warn("Seq [$id]: Not using a valid terminator codon!"); - } - # test if there are terminator characters inside the protein sequence! - if ($output =~ /\Q$terminator\E/) { - $id ||= ''; - $throw && $self->throw("Seq [$id]: Terminator codon inside CDS!"); - $self->warn("Seq [$id]: Terminator codon inside CDS!"); - } - # if the initiator codon is not ATG, the amino acid needs to be changed to M - if ( substr($output,0,1) ne 'M' ) { - if ($codonTable->is_start_codon(substr($seq, 0, 3)) ) { - $output = 'M'. substr($output,1); - } elsif ($throw) { - $self->throw("Seq [$id]: Not using a valid initiator codon!"); - } else { - $self->warn("Seq [$id]: Not using a valid initiator codon!"); - } - } - } - - my $seqclass; - if ($self->can_call_new()) { - $seqclass = ref($self); - } else { - $seqclass = 'Bio::PrimarySeq'; - $self->_attempt_to_load_Seq(); - } - my $out = $seqclass->new( '-seq' => $output, - '-display_id' => $self->display_id, - '-accession_number' => $self->accession_number, - # is there anything wrong with retaining the - # description? - '-desc' => $self->desc(), - '-alphabet' => 'protein', - '-verbose' => $self->verbose - ); - return $out, $orf_region; -} - -sub concatenate_feature_list{ - - my ($feature_list) = @_; - - my $seq = ""; - my $ExtremStart=1000000000000; - my $ExtremEnd=0; - - foreach my $feature (@$feature_list) { - my $start=$feature->start(); - my $end=$feature->end(); - my $seqid=$feature->seq_id(); - $seq .= $db->seq( $seqid, $start, $end ); - - if ($start < $ExtremStart){ - $ExtremStart=$start; - } - if($end > $ExtremEnd){ - $ExtremEnd=$end; - } - } - return $ExtremStart, $seq, $ExtremEnd; -} - -sub _find_orfs_nucleotide_JD { - my ( $self, $sequence, $codon_table, $start_codon, $no_start_by_aa, $first_only ) = @_; - $sequence = uc $sequence; - $start_codon = uc $start_codon if $start_codon; - - my $is_start = $start_codon - ? sub { shift eq $start_codon } - : sub { $codon_table->is_start_codon( shift ) }; - - # stores the begin index of the currently-running ORF in each - # reading frame - my @current_orf_start = (-1,-1,-1); - - #< stores coordinates of longest observed orf (so far) in each - # reading frame - my @orfs; - - # go through each base of the sequence, and each reading frame for each base - my $seqlen = CORE::length $sequence; - for( my $j = 0; $j <= $seqlen-3; $j++ ) { - my $frame = $j % 3; - - my $this_codon = substr( $sequence, $j, 3 ); - my $AA = $codon_table->translate($this_codon); - - # if in an orf and this is either a stop codon or the last in-frame codon in the string - if ( $current_orf_start[$frame] >= 0 ) { - if ( $codon_table->is_ter_codon( $this_codon ) ||( my $is_last_codon_in_frame = ($j >= $seqlen-5)) ) { - # record ORF start, end (half-open), length, and frame - my @this_orf = ( $current_orf_start[$frame], $j+3, undef, $frame ); - my $this_orf_length = $this_orf[2] = ( $this_orf[1] - $this_orf[0] ); - - $self->warn( "Translating partial ORF " - .$self->_truncate_seq( $self->_orf_sequence( $sequence,\@ this_orf )) - .' from end of nucleotide sequence' - ) - if $first_only && $is_last_codon_in_frame; - - return\@ this_orf if $first_only; - push @orfs,\@ this_orf; - $current_orf_start[$frame] = -1; - } - } - # if this is a start codon - elsif ($is_start->($this_codon)) { - if($no_start_by_aa){ - - if($AA ne $no_start_by_aa){ - $current_orf_start[$frame] = $j; - } - } - else{ - $current_orf_start[$frame] = $j; - } - } - } - - return sort { $b->[2] <=> $a->[2] } @orfs; -} - -# L1: LocusID->level->typeFeature->ID->[ID,start,end] -# LocusID->level->typeFeature->Parent->[ID,start,end] -# @Purpose: When two feature overlap at level3, and are the same type level 2 they have to be merged under the same level 1 feature. -# @input: 2 => hash, integer for verbosity -# @output: 0 -sub _check_overlap_name_diff{ - my ($omniscient, $mRNAGeneLink, $verbose) = @_; - my $resume_case=undef; - - my $sortBySeq = gather_and_sort_l1_location_by_seq_id_and_strand($omniscient); - - foreach my $locusID ( keys %{$sortBySeq}){ # tag_l1 = gene or repeat etc... - - foreach my $tag_l1 ( keys %{$sortBySeq->{$locusID}} ) { - - # Go through location from left to right ### !! - while ( @{$sortBySeq->{$locusID}{$tag_l1}} ){ - - my $location = shift @{$sortBySeq->{$locusID}{$tag_l1}};# This location will be updated on the fly - - my $id_l1 = $location->[0]; - - # Go through location from left to right ### !! - foreach my $location_to_check ( @{$sortBySeq->{$locusID}{$tag_l1}} ) { - my $id2_l1 = $location_to_check->[0]; - - #If location_to_check start if over the end of the reference location, we stop - if($location_to_check->[1] > $location->[2]) {last;} - - my ($location, $overlap) = location_overlap_update($location, $location_to_check); # location is updated on the fly, and the newly modified location is the one that will be used at the next loop - - # Let's check at Gene LEVEL - if($overlap){ - - #let's check at CDS level - if(check_gene_overlap_at_CDSthenEXON($omniscient, $omniscient , lc($id_l1), lc($id2_l1) )){ #If contains CDS it has to overlap at CDS level to be merged, otherwise any type of feature level3 overlaping is sufficient to decide to merge the level1 together - #they overlap in the CDS we should give them the same name - $resume_case++; - - print "$id_l1 and $id2_l1 same locus. We merge them together. Below the corresponding feature groups in their whole.\n" if ($verbose >= 3); - print "$id_l1 and $id2_l1 same locus. We merge them together. Below the corresponding feature groups in their whole.\n"; - print_omniscient_from_level1_id_list($omniscient, [lc($id_l1),lc($id2_l1)], $fh_error ) if ($verbose >= 3); - # remove the level1 of the ovelaping one - delete $omniscient->{'level1'}{$tag_l1}{lc($id2_l1)}; - # remove the level2 to level1 link stored into the mRNAGeneLink hash. The new links will be added just later after the check to see if we keep the level2 feature or not (we remove it when identical) - foreach my $l2_type (%{$omniscient->{'level2'}}){ - if(exists_keys($omniscient,('level2', $l2_type, lc($id2_l1) ))){ - foreach my $feature_l2 (@{$omniscient->{'level2'}{$l2_type}{lc($id2_l1)}}){ - delete $mRNAGeneLink->{lc($feature_l2->_tag_value('ID'))}; - } - } - } - - # Let's change the parent of all the L2 features - foreach my $l2_type (%{$omniscient->{'level2'}} ){ - - if(exists_keys($omniscient,('level2', $l2_type, lc($id2_l1) ))){ - ############################### - # REMOVE THE IDENTICAL ISOFORMS - - # first list uniqs - my $list_of_uniqs = keep_only_uniq_from_list2($omniscient, $omniscient->{'level2'}{$l2_type}{lc($id_l1)}, $omniscient->{'level2'}{$l2_type}{lc($id2_l1)}, $verbose); # remove if identical l2 exists - - - #Now manage the rest - foreach my $feature_l2 (@{$list_of_uniqs}){ - - create_or_replace_tag($feature_l2,'Parent', $id_l1); #change the parent - # Add the corrected feature to its new L2 bucket - push (@{$omniscient->{'level2'}{$l2_type}{lc($id_l1)}}, $feature_l2); - - # Attach the new parent into the mRNAGeneLink hash - $mRNAGeneLink->{lc($feature_l2->_tag_value('ID'))}=$feature_l2->_tag_value('Parent'); - - } - # remove the old l2 key - delete $omniscient->{'level2'}{$l2_type}{lc($id2_l1)}; - } - } - check_level1_positions($omniscient, $omniscient->{'level1'}{$tag_l1}{lc($id_l1)}, 0); - - #Update the location on the fly - if($omniscient->{'level1'}{$tag_l1}{lc($id_l1)}->end > $sortBySeq->{$locusID}{'level1'}{$tag_l1}{lc($id_l1)}[2]){ - $sortBySeq->{$locusID}{'level1'}{$tag_l1}{lc($id_l1)}[2] = $omniscient->{'level1'}{$tag_l1}{lc($id_l1)}->end; - #print "This one Now !!\n";exit; - } - } - } - } - } - } - } - print "We fixed $resume_case case where feature has been merged within the same locus\n" if($verbose and $resume_case); -} - - -# @Purpose: When too feature l2 isoform are identical, we remove one -# @input: 2 => hash, integer for verbosity -# @output: 0 -sub _check_identical_isoforms{ - my ($omniscient, $mRNAGeneLink, $verbose) = @_; - my $resume_case=undef; - - # Go through oall l2 feature - foreach my $l2_type (keys %{$omniscient->{'level2'}}){ - foreach my $id2_l1 (keys %{$omniscient->{'level2'}{$l2_type}}){ - # If more than 1 related to level1 - - if(exists_keys($omniscient,('level2', $l2_type, $id2_l1)) and scalar @{$omniscient->{'level2'}{$l2_type}{$id2_l1}} > 1){ # more than one l2 feature of that type - - my @L2_list_to_remove; - my %checked; - foreach my $feature2 (sort {$b->_tag_value('ID') cmp $a->_tag_value('ID')} @{$omniscient->{'level2'}{$l2_type}{$id2_l1}}){ - $checked{lc($feature2->_tag_value('ID'))}{lc($feature2->_tag_value('ID'))}++; - - my $keep = 1; - foreach my $feature1 (sort {$b cmp $a} @{$omniscient->{'level2'}{$l2_type}{$id2_l1}}){ - - # If not itself and not already checked (A -> B is the same as B -> A), and A or B already removed and must now be skiped (skipme key) - if( (! exists_keys(\%checked, (lc($feature2->_tag_value('ID')), "skipme"))) and (! exists_keys(\%checked, (lc($feature1->_tag_value('ID')), "skipme"))) and ! exists_keys(\%checked, ( lc($feature2->_tag_value('ID')), lc($feature1->_tag_value('ID')) ) ) ){ # - $checked{lc($feature2->_tag_value('ID'))}{lc($feature1->_tag_value('ID'))}++; - $checked{lc($feature1->_tag_value('ID'))}{lc($feature2->_tag_value('ID'))}++; - - #check their position are identical - if($feature1->start().$feature1->end() eq $feature2->start().$feature2->end()){ - - #Check their subfeature are identicals - if(l2_identical($omniscient, $feature1, $feature2, $verbose )){ - $keep = undef; - last; - } - } - } - } - # We dont keep the l2 feature so we have to remove all related features and itself - if(! $keep){ - $resume_case++; - print "Lets remove isoform ".$feature2->_tag_value('ID')."\n" if ($verbose >= 2); - $checked{lc($feature2->_tag_value('ID'))}{"skipme"}++;# will be removed later do not check anymore this one - - foreach my $tag (keys %{$omniscient->{'level3'}}){ - if(exists_keys($omniscient, ('level3', $tag, lc($feature2->_tag_value('ID'))))){ - delete $omniscient->{'level3'}{$tag}{lc($feature2->_tag_value('ID'))}; - } - } - #Has to be removed once we finished to go through the l2 list - my $ID_to_remove = lc($feature2->_tag_value('ID')); - push(@L2_list_to_remove,$ID_to_remove); - delete $mRNAGeneLink->{$ID_to_remove}; - } - } - - #L2 has to be removed from List - my @newL2List; - foreach my $feature ( @{$omniscient->{'level2'}{$l2_type}{$id2_l1}} ){ - my $keep = 1; - foreach my $id_l2 (@L2_list_to_remove){ - if( lc($feature->_tag_value('ID')) eq lc($id_l2) ){ - $keep = undef; - } - } - if($keep){ - push (@newL2List,$feature) - } - } - @{$omniscient->{'level2'}{$l2_type}{$id2_l1}}=@newL2List; - - } - } - } - print "We removed $resume_case cases where gene where identical.\n" if($verbose and $resume_case); -} - -# Sort by locusID !!!! -# L1 => LocusID->level->typeFeature->ID =[ID,start,end] -# L2 and L3 => LocusID->level->typeFeature->Parent->ID = [ID,start,end] -# -# -sub _sort_by_seq{ - my ($omniscient) = @_; - - my %hash_sortBySeq; - - foreach my $tag_level1 (keys %{$omniscient->{'level1'}}){ - foreach my $level1_id (keys %{$omniscient->{'level1'}{$tag_level1}}){ - my $level1_feature = $omniscient->{'level1'}{$tag_level1}{$level1_id}; - my $ID = $level1_feature->_tag_value('ID'); - my $strand="+"; - if($level1_feature->strand != 1){$strand = "-";} - my $position_l1=$level1_feature->seq_id."".$strand; - - $hash_sortBySeq{$position_l1}{"level1"}{$tag_level1}{$level1_id} = [$ID, int($level1_feature->start), int($level1_feature->end)]; - } - } - return \%hash_sortBySeq; -} - -__END__ - -=head1 NAME - -gff3_fixLongestORF.pl - -The script take a gff3 file as input. - -The script looks for other ORF in each gene model described in the gff file. -Several ouput files will be written if you specify an output. One will contain the gene not modified (intact), -one the gene models fixed, one contains the putative pseudogene detected (As they are just putatuve, they are also present among the intacts ), and a last a report of the results. -Pseudogene particularity: If gene contains mRNA models goods and mRNA that look like a pseudogene, the pseudogene one will be removed. - - -=head1 SYNOPSIS - - ./gff3_fixLongestORF.pl -gff=infile.gff --fasta genome.fa [ -o outfile ] - ./gff3_fixLongestORF.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-gff> - -Input GFF3 file that will be read (and sorted) - -=item B<-fa> or B<--fasta> - -Genome fasta file -The name of the fasta file containing the genome to work with. - - -=item B<--ct>, B<--codon> or B<--table> - -Codon table to use. 0 By default. - -=item B<-m> or B<--model> - -Kind of ORF fix Model you want. By default all are used. To define specific model writte: --model 1,4 -Model1 = sequence original is part of new prediction; the predicted one is longest -Model2 = sequence original predicted are different; the predicted one is longest, they don't overlap each other. -Model3 = original protein and predicted one are different; the predicted one is longest, they overlap each other. -Model4 = The prediction is shorter... /!\ -Model5 = The prediction is same size but not correct frame (+1 or +2 bp gives frame shift). The predicted... /!\ - -=item B<-s> or B<--split> - -This option is usefull for Model2. Indeed when the prediction is non overlaping the original cds, it is possible to split the gene into two different genes. By default we don't split it. -We keep the longest. If you want to split it type: -s - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-v> - -verbose - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut - - -MODEL5 example (extra C at the begining of the CDS): -sequence= CATGTTCAAACGTCTCGAAAATATAGCCGTCCAATCATCCTCCTTCCCCCAGGCAATCTCCTTGATCCAGCAAAACCACCTCTCTCCAAAACTCTTCTTTGATCCCCAGACCTACTCCAAGATCTTCCAGAAACTCTCCCTCAAAGACCAATACCCTCGTTCCTCCCAGTCCCTATGCATCATAGACTACCACGTAGGCTTCACGCCCTTCTCCTACTTCCTCCATAAGGAGCTACACCCTGCACATCACGTCATCTTCCCCGATAGTGTCGCTGCCAACAAGTTCTGGACCCAGATGCTACTGAAGGACCCCGACTGTAAGGACATGGTCATAGACGAGACTCAGGCAAACACAGTGCTAAAGCACAATTTCCTCAATAGATCGCTGGAATTGGGCCACGTCGTTGCAGTAGAACAGACAGACCTAACTAAGGTCAACGACTCGATACTATTGACCGGTAACTTCGTCGATACTTCCGGCGGGGACTCTCTACGGATCTTACTCTTCTTTAATCAGATGAAAACTTCCGTCTTTCAGTATAATAACGTCAAGTTCTTGGCGTGGCTGCCCGCCTCGGAGTCTCTGAAGTTCATAGGACCGATTGGATCGAGGCATAGACGGTCCAATGCGCTGATGACCAACCTATTTGCCAACGTTGACGTGGTAGCGTACTCTAATTATGGCAAGAAGAAGAGCGTTTCCCGAGTCTTGGACGAATATAAGGACGCTGTCAAGCTACCACAGATTCCTGGACAGAAAGACGTATGTTTGATGGAATTTCAGTCGAACTATTCCAAATACGACATTAAATTTCCTGACGAATTGCATTTGATCATACACAAGATGTTGATATCGCCCAGCAATAAGTTGATTGACAATCTTCATTTGCTTGGGCCCGGTGCAAAGGAGTATTTGGAGACCAAGTTGGATCCCGAGCTGTTACAGAAGCCTGCGCCGAACATTACGGAGCAGGAGTTTATAGATATCAGCGAGGCGTATTATTACTGGCCGTTCAAGCCTAACGTTCACTTGGAGACGTATTTAGGAGATCCTCCGGAGGAGGAGTAG -GFF = -y922_scaffold13 . gene 1 1068 . + . ID=DEKNAAG101268;Name=DEKNAAG101268 -y922_scaffold13 . mRNA 1 1068 . + . ID=DEKNAAT101273;Parent=DEKNAAG101268;Name=DEKNAAT101273;description=Predicted: mitochondrial rna polymerase specificity factor -y922_scaffold13 . exon 1 1068 . + . ID=DEKNAAE101408;Parent=DEKNAAT101273;Name=DEKNAAE101408 -y922_scaffold13 . CDS 1 1068 . + 0 ID=DEKNAAC101407;Parent=DEKNAAT101273;Name=DEKNAAC101407 diff --git a/annotation/Tools/Util/gff/gff3_sp_fix_overlaping_genes.pl b/annotation/Tools/Util/gff/gff3_sp_fix_overlaping_genes.pl deleted file mode 100755 index c47419c0c..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_fix_overlaping_genes.pl +++ /dev/null @@ -1,353 +0,0 @@ -#!/usr/bin/env perl - -## merge gene feature if they have CDSs that overlap -# 2015-07 - -use strict; -use warnings; -use Carp; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use NBIS::GFF3::Omniscient; - -my $usage = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $ref = undef; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - "f|file|gff3|gff=s" => \$ref, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 0, - -message => "$usage\n" } ); -} - -if ( ! (defined($ref)) ){ - pod2usage( { - -message => "\nAt least 1 parameters is mandatory:\n". - "$usage\n", - -verbose => 0, - -exitval => 2 } ); -} - -###################### -# Manage output file # -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; -open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -##################################### -# END Manage Ouput Directory / File # -##################################### -my $error_found=undef; -### Parse GFF input # -print ("Parse file $ref\n"); -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $ref - }); -print ("$ref file parsed\n"); - -# sort by seq id -my %hash_sortBySeq; -foreach my $tag_level1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $level1_id (keys %{$hash_omniscient->{'level1'}{$tag_level1}}){ - if (exists_keys($hash_omniscient, ('level2','mrna',$level1_id)) ){ # check if they have mRNA avoiding autovivifcation - my @mrna_values = $hash_omniscient->{'level2'}{'mrna'}{$level1_id}[0]->get_tag_values('ID'); - my $mrna_id = shift @mrna_values; - if (exists_keys($hash_omniscient, ('level3','cds',lc($mrna_id))) ){ # check if they have cds avoiding autovivification. Allow to skip tRNA. - my $position=$hash_omniscient->{'level1'}{$tag_level1}{$level1_id}->seq_id."".$hash_omniscient->{'level1'}{$tag_level1}{$level1_id}->strand; - push (@{$hash_sortBySeq{$tag_level1}{$position}}, $hash_omniscient->{'level1'}{$tag_level1}{$level1_id}); - } - } - } -} - -my $total_overlap=0; -#find overlap -my %feature_studied; -foreach my $tag (keys %hash_sortBySeq){ # loop over all the feature level1 - - foreach my $seqid (keys %{$hash_sortBySeq{$tag}}){ - - foreach my $gene_feature ( @{$hash_sortBySeq{$tag}{$seqid}}){ - my @values = $gene_feature->get_tag_values('ID'); - my $gene_id = shift @values; - $feature_studied{$gene_id}++; - my @ListOverlapingGene=(); - my $nb_feat_overlap=0; - my ($start1,$end1) = get_longest_cds_start_end($hash_omniscient,$gene_id); # look at CDS because we want only ioverlapinng CDS - - foreach my $gene_feature2 ( @{$hash_sortBySeq{$tag}{$seqid}}){ # loop over all the level1 feature except the one we are already focusing on - my @values2 = $gene_feature2->get_tag_values('ID'); - my $gene_id2 = shift @values2; - - if(! exists($feature_studied{$gene_id2}) ){ #we compare different feature - my ($start2,$end2) = get_longest_cds_start_end($hash_omniscient,$gene_id2); # look at CDS becaus ewe want only ioverlapinng CDS - - if( ($start2 <= $end1) and ($end2 >= $start1) ){ #feature overlap considering extrem start and extrem stop. It's just to optimise the next step. Avoid to do the next step every time. So at the end, that test (current one) could be removed - - #now check at each CDS feature independently - if (two_features_overlap($hash_omniscient,$gene_id, $gene_id2)){ - print "These two features overlap without same id ! :\n".$gene_feature->gff_string."\n".$gene_feature2->gff_string."\n"; - $error_found="yes"; - $nb_feat_overlap++; - $total_overlap++; - $feature_studied{$gene_id2}++; - push(@ListOverlapingGene, $gene_feature2); - } - } - } - } - - # Now manage name if some feature overlap - if( $nb_feat_overlap > 0){ - push(@ListOverlapingGene, $gene_feature); - print "$nb_feat_overlap overlapping feature found ! We will treat them now:\n"; - my ($reference_feature, $ListToRemove)=take_one_as_reference(\@ListOverlapingGene); - print "We decided to keep that one: ".$reference_feature->gff_string."\n"; - - my $gene_id_ref = $reference_feature->_tag_value('ID'); - - #change level2 parent for feature of level2 that have a feature of level1 in $ListToRemove list - foreach my $featureToRemove (@$ListToRemove){ - - my @values_to_remove = $featureToRemove->get_tag_values('ID'); - my $gene_id_to_remove = lc(shift @values_to_remove); - - foreach my $tag_level2 (keys %{$hash_omniscient->{'level2'}}){ - - if (exists_keys($hash_omniscient, ('level2',$tag_level2,$gene_id_to_remove)) ){ # check if they have cds avoiding autovivification. - - my @list_tmp_features = @{$hash_omniscient->{'level2'}{$tag_level2}{$gene_id_to_remove}}; # As we will remove element of the list we cannot loop over it directly, we have to save the list in a temporary list; - foreach my $level2_feature (@list_tmp_features){ #replace Parent of each feature level2 by the new level1 reference - # Change parent feature - create_or_replace_tag($level2_feature,'Parent',$gene_id_ref); - - #add it in other list - push (@{$hash_omniscient->{'level2'}{$tag_level2}{lc($gene_id_ref)}},$level2_feature); - - #remove mRNA from list <= not mandatory - my @mrna_values_to_remove = $level2_feature->get_tag_values('ID'); - my $mrna_id_to_remove = lc(shift @mrna_values_to_remove); - - my @tag_list=('all'); - my @id_list=($gene_id_to_remove);my @id_list2=($mrna_id_to_remove); - - remove_element_from_omniscient(\@id_list, \@id_list2, $hash_omniscient, 'level2', 'false', \@tag_list); - - } - } - } - foreach my $tag_level1 (keys %{$hash_omniscient->{'level1'}}){ # remove the old feature level1 now - delete $hash_omniscient->{'level1'}{$tag_level1}{$gene_id_to_remove}; # delete level1 - } - } #END FEATURE TO HANDLE - ### - # check end and start of the new feature - my $gene_id=lc($reference_feature->_tag_value('ID')); - check_gene_positions($hash_omniscient, $gene_id); - print "\n\n"; - } - } - } -} - -if(! $error_found){ - print "No gene overlaping with different name has been found !\n"; -}else{ - print "$total_overlap genes overlap\n"; -} -print_omniscient($hash_omniscient, $gffout); #print gene modified -print "END\n"; - -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub take_one_as_reference{ - my ($ListOverlapingGene)=@_; - - my $reference_feature=undef; - foreach my $feature (@$ListOverlapingGene){ - # case of the crow project (We developped this script for this project first of all) - if ($feature->has_tag('oId')){ #check again that part please - if ($feature->has_tag('Name')){ - my @values_ref = $feature->get_tag_values('Name'); - my $id = shift @values_ref; - if ($id !~ /"{2,}?/){ # If there is a name - $reference_feature=$feature;last; - } - } - if(! $reference_feature){ - $reference_feature=$feature; - } - } - #fix_fusion case - if ($feature->has_tag('ID')){ - my $id_current= $feature->_tag_value('ID'); - if($id_current =~ /^[^new]/){ - $reference_feature=$feature; - if ($feature->has_tag('Name')){ - my $name_current= $feature->_tag_value('Name'); - if(($name_current =~ /^[^new]/) and (! index($name_current, 'NO NAME ASSIGNED') != -1 )) { - $reference_feature=$feature;last; - } - elsif($name_current =~ /^[^new]/){ - $reference_feature=$feature; - } - else{$reference_feature=undef;} #If "NO NAME ASSIGNED" we don't keep it to try another - } - } - } - } - - # so get it randomly - if(! $reference_feature){ - $reference_feature=shift(@$ListOverlapingGene); - } - else{ - my @values_ref = $reference_feature->get_tag_values('ID'); - my $id_ref = shift @values_ref; - my @new_list; - foreach my $feature (@$ListOverlapingGene){ - my @values = $feature->get_tag_values('ID'); - my $id = shift @values; - if($id_ref ne $id){ - push(@new_list, $feature); - } - } - $ListOverlapingGene=\@new_list; - } - -return $reference_feature, $ListOverlapingGene; -} - -sub get_longest_cds_start_end{ - my ($hash_omniscient,$gene_id)=@_; - my $resu_start=100000000000; - my $resu_end=0; - - #check full CDS for each mRNA - foreach my $mrna_feature (@{$hash_omniscient->{'level2'}{'mrna'}{lc($gene_id)}}){ - my @values = $mrna_feature->get_tag_values('ID'); - my $mrna_id = shift @values; - my $extrem_start=100000000000; - my $extrem_end=0; - - #check all cds pieces - foreach my $cds_feature (@{$hash_omniscient->{'level3'}{'cds'}{lc($mrna_id)}}){ - if ($cds_feature->start < $extrem_start){ - $extrem_start=$cds_feature->start; - } - if($cds_feature->end > $extrem_end){ - $extrem_end=$cds_feature->end ; - } - } - - if($extrem_start < $resu_start){ - $resu_start=$extrem_start; - } - if($extrem_end > $resu_end){ - $resu_end=$extrem_end; - } - } - return $resu_start,$resu_end; -} - -#Check if two genes have at least one mRNA isoform which overlap at cds level. -sub two_features_overlap{ - my ($hash_omniscient,$gene_id, $gene_id2)=@_; - my $resu=undef; - - #check full CDS for each mRNA - foreach my $mrna_feature (@{$hash_omniscient->{'level2'}{'mrna'}{lc($gene_id)}}){ - foreach my $mrna_feature2 (@{$hash_omniscient->{'level2'}{'mrna'}{lc($gene_id2)}}){ - - my @values1 = $mrna_feature->get_tag_values('ID'); - my $mrna_id1 = shift @values1; - - my @values2 = $mrna_feature2->get_tag_values('ID'); - my $mrna_id2 = shift @values2; - - #check all cds pieces - foreach my $cds_feature1 (@{$hash_omniscient->{'level3'}{'cds'}{lc($mrna_id1)}}){ - foreach my $cds_feature2 (@{$hash_omniscient->{'level3'}{'cds'}{lc($mrna_id2)}}){ - - if(($cds_feature2->start <= $cds_feature1->end) and ($cds_feature2->end >= $cds_feature1->start )){ # they overlap - $resu="yes";last; - } - } - if($resu){last;} - } - if($resu){last;} - } - if($resu){last;} - } - return $resu; -} - -__END__ - - -=head1 NAME - -gff3_sp_fix_overlaping_genes.pl - Check a gff3 annotation file to -find cases where differents gene features have CDS that overlap. In this case the gene features will be merged in only one. -One gene is choosen as reference, and the mRNA from the other gene will be linked to it. So, it creates isoforms. - -=head1 SYNOPSIS - - ./gff3_sp_fix_overlaping_genes.pl -f infile [-o outfile] - ./gff3_sp_fix_overlaping_genes.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-f>, B<--file>, B<--gff3> or B<--gff> - -Input GFF3 file. - -=item B<-o>, B<--out>, B<--output> or B<--outfile> - -Output file. If none given, will be display in standard output. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_fix_small_exon_from_extremities.pl b/annotation/Tools/Util/gff/gff3_sp_fix_small_exon_from_extremities.pl deleted file mode 100755 index fcb7f1540..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_fix_small_exon_from_extremities.pl +++ /dev/null @@ -1,337 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use File::Basename; -use Getopt::Long; -use Statistics::R; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use Bio::DB::Fasta; -use Bio::SeqIO; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my $gff = undef; -my $file_fasta=undef; -my $codonTableId=1; -my $SIZE_OPT=15; -my $verbose = undef; -my $help= 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "gff=s" => \$gff, - "fasta|fa|f=s" => \$file_fasta, - "table|codon|ct=i" => \$codonTableId, - "size|s=i" => \$SIZE_OPT, - "v!" => \$verbose, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) or !(defined($file_fasta)) ){ - pod2usage( { - -message => "$header\nAt least 2 parameter is mandatory:\nInput reference gff file (--gff) and Input fasta file (--fasta)\n\n", - -verbose => 0, - -exitval => 1 } ); -} - -if($codonTableId<0 and $codonTableId>25){ - print "$codonTableId codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; -} - -###################### -# Manage output file # -my $gffout; -#my $gffout4; -if ($outfile) { -open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - - -#################### -# index the genome # -my $db = Bio::DB::Fasta->new($file_fasta); -print ("Genome fasta parsed\n"); - -#################### - -#counters -my $exonCounter=0; -my $mrnaCounter=0; -my $geneCounter=0; - - -foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - - my $gene_feature = $hash_omniscient->{'level1'}{$primary_tag_key_level1}{$gene_id}; - my $strand = $gene_feature->strand(); - print "gene_id = $gene_id\n" if $verbose; - - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_key_level2, $gene_id) ) ){ - my $rnaFix=undef; - foreach my $level2_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id}}) { - - # get level2 id - my $level2_ID = lc($level2_feature->_tag_value('ID')); - - my $exonFix=undef; - if ( exists_keys( $hash_omniscient, ('level3', 'exon', $level2_ID) ) ){ - my @exon_sorted = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}; - - my $number_exon=$#{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}+1; - - ##################### - #start with left exon - my $left_exon = $exon_sorted[0]; - my $exon_size = ($left_exon->end - $left_exon->start +1); - - if($exon_size < $SIZE_OPT){ - - my $original_exon_start = $left_exon->start; - my $new_exon_start = $left_exon->start-($SIZE_OPT - $exon_size ); - - #modify the exon start - $left_exon->start($new_exon_start); - $exonCounter++; - $exonFix=1; - - print "left_exon start fixed\n" if $verbose; - - #take care of CDS if needed - if ( exists_keys( $hash_omniscient, ('level3', 'cds', $level2_ID) ) ){ - my @cds_sorted = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}; - - #Check if the exon modification could affect the CDS - if($original_exon_start == $cds_sorted[0]->start()){ - - my $original_cds_start = $original_exon_start; - - #get the sequence - my $sequence = $db->seq( $gene_feature->seq_id() ); - #get codon table - my $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); - - #extract the codon - my $this_codon = substr( $sequence, $original_cds_start-1, 3); - - if($strand eq "+" or $strand == "1"){ - #Check if it is not terminal codon, otherwise we have to extend the CDS. - - if(! $codonTable->is_start_codon( $this_codon )){ - print "first exon plus strand : this is not a start codon\n";exit; - } - - } - if($strand eq "-" or $strand == "-1"){ - #reverse complement - my $seqobj = Bio::Seq->new(-seq => $this_codon); - $this_codon = $seqobj->revcom()->seq; - - #Check if it is not terminal codon, otherwise we have to extend the CDS. - if(! $codonTable->is_ter_codon( $this_codon )){ - print "first exon minus strand : this is not a terminal codon\n";exit; - } - } - } - } - - } - ################ - #then right exon - if($number_exon > 1){ - - my $right_exon = $exon_sorted[$#exon_sorted]; - my $exon_size = ($right_exon->end - $right_exon->start +1); - - if($exon_size < $SIZE_OPT){ - - my $original_exon_end = $right_exon->end; - my $new_exon_end = $right_exon->end+($SIZE_OPT - $exon_size ); - - #modify the exon end - $right_exon->end($new_exon_end); - $exonCounter++; - $exonFix=1; - - print "right_exon end fixed\n" if $verbose; - - #take care of CDS if needed - if ( exists_keys( $hash_omniscient, ('level3', 'cds', $level2_ID) ) ){ - my @cds_sorted = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}; - - #Check if the exon modification could affect the CDS - if($original_exon_end == $cds_sorted[$#cds_sorted]->end()){ - - my $original_cds_end = $original_exon_end; - - #get the sequence - my $sequence = $db->seq( $gene_feature->seq_id() ); - #get codon table - my $codonTable = Bio::Tools::CodonTable->new( -id => $codonTableId); - - #extract the codon - my $this_codon = substr( $sequence, $original_cds_end-3, 3); - - if($strand eq "+" or $strand == "1"){ - print "last plus strand\n" if $verbose; - #Check if it is not terminal codon, otherwise we have to extend the CDS. - - if(! $codonTable->is_ter_codon( $this_codon )){ - - print "last exon plus strand : $this_codon is not a stop codon\n";exit; - } - - } - if($strand eq "-" or $strand == "-1"){ - print "last minus strand\n" if $verbose; - - #reverse complement - my $seqobj = Bio::Seq->new(-seq => $this_codon); - $this_codon = $seqobj->revcom()->seq; - - #Check if it is not terminal codon, otherwise we have to extend the CDS. - if(! $codonTable->is_start_codon( $this_codon )){ - print "last exon minus strand : $this_codon is not a start codon\n";exit; - } - } - } - } - } - } - } - if($exonFix){ - $mrnaCounter++; - } - } - if($rnaFix){ - $geneCounter++; - } - } - } - } -} - -_check_all_level2_positions($hash_omniscient,0); # review all the feature L2 to adjust their start and stop according to the extrem start and stop from L3 sub features. -_check_all_level1_positions($hash_omniscient,0); # Check the start and end of level1 feature based on all features level2. - -#END -my $string_to_print="usage: $0 @copyARGV\n"; -$string_to_print .="Results:\n"; -$string_to_print .="nb gene affected: $geneCounter\n"; -$string_to_print .="nb rna affected: $mrnaCounter\n"; -$string_to_print .="nb exon affected: $exonCounter\n"; -print $string_to_print; - -print_omniscient($hash_omniscient, $gffout); #print result - -print "Bye Bye.\n"; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -__END__ -if ( !GetOptions( - - "table|codon|ct=i" => \$codonTableId, - -=head1 NAME - -gff3_fix_small_exon_from_extremities.pl - - -The script aims to extend the small exons to make them longer. -When submitting annotation to ENA they expect exon size of 15 nt minimum. Currently we extend only the exon from extremities, otherwise we risk to break the predicted ORF. -/!\ Script under development. When we extend an exon and the CDS has to be extended too (because is was a partial CDS), we exit; - - -=head1 SYNOPSIS - - ./gff3_fix_small_exon_from_extremities.pl -gff=infile.gff --fasta genome.fa [ -o outfile ] - ./gff3_fix_small_exon_from_extremities.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-gff> - -Input GFF3 file that will be read - -=item B<-fa> or B<--fasta> - -Genome fasta file -The name of the fasta file containing the genome to work with. - -=item B<--ct> or B<--table> or B<--codon> - -This option allows specifying the codon table to use - It expects an integer (1 by default = standard) - -=item B<--size> or B<-s> - -Minimum exon size accepted in nucleotide. All exon below this size will be extended to this size. Default value = 15. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-v> - -Verbose option, make it easier to follow what is going on for debugging purpose. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_flag_short_intron.pl b/annotation/Tools/Util/gff/gff3_sp_flag_short_intron.pl deleted file mode 100755 index 5f01f042f..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_flag_short_intron.pl +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use POSIX qw(strftime); -use File::Basename; -use Carp; -use Getopt::Long; -use IO::File; -use Pod::Usage; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $opt_file; -my $opt_output=undef; -my $verbose=undef; -my $Xsize=10; -my $opt_help = 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|gff|ref|reffile=s' => \$opt_file, - 'o|out|output=s' => \$opt_output, - 'v|verbose!' => \$verbose, - 'i|intron_size=i' => \$Xsize, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} -if ( ! defined($opt_file) ) { - pod2usage( { - -message => "$header\nMust specify at least 1 parameters:\nReference data gff3 file (--gff)\n", - -verbose => 0, - -exitval => 1 } ); -} - -# ####################### -# # START Manage Option # -# ####################### -my $gffout; -my $ostreamReport; -if (defined($opt_output) ) { - my ($filename,$path,$ext) = fileparse($opt_output,qr/\.[^.]*/); - $ostreamReport=IO::File->new(">".$path.$filename."_report.txt" ) or croak( sprintf( "Can not open '%s' for writing %s", $filename."_report.txt", $! )); - - open(my $fh, '>', $opt_output) or die "Could not open file $opt_output $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $ostreamReport = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} -my $string1 = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$string1 .= "\n\nusage: $0 @copyARGV\n\n"; - -print $ostreamReport $string1; -if($opt_output){print $string1;} - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -###################### -### Parse GFF input # -print "Reading ".$opt_file,"\n"; -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_file - }); -print("Parsing Finished\n\n"); -### END Parse GFF input # -######################### - -my $nb_cases=0; -my $tag = "pseudo"; -###################### -### Parse GFF input # -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - my $shortest_intron=10000000000; - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - if (exists_keys($hash_omniscient,('level2',$tag_l2,$id_l1) ) ){ - # #MATCH CASE - We ahve to count the L2 match features - # if($tag_l2 =~ "match"){ - # my $counterL2_match=-1; - # foreach my $feature_l2 (@{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}){ - - # my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}; - # my $indexLastL2 = $#{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}; - # $counterL2_match++; - - # if($counterL2_match > 0 and $counterL2_match <= $indexLastL2){ - # my $intronSize = $sortedList[$counterL2_match]->start - $sortedList[$counterL2_match-1]->end; - # $shortest_intron = $intronSize if($intronSize < $shortest_intron) - # } - # } - # } - # else{ - foreach my $feature_l2 (@{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}){ - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - # if ( exists_keys($hash_omniscient,('level3','exon',$level2_ID) ) ){ - # my $counterL3=-1; - # my $indexLast = $#{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}; - # my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}; - # foreach my $feature_l3 ( @sortedList ){ - # #count number feature of tag_l3 type - # $counterL3++; - # #Manage Introns## from the second intron to the last (from index 1 to last index of the table sortedList) ## We go inside this loop only if we have more than 1 feature. - # if($counterL3 > 0 and $counterL3 <= $indexLast){ - # my $intronSize = $sortedList[$counterL3]->start - $sortedList[$counterL3-1]->end; - # $shortest_intron = $intronSize if($intronSize < $shortest_intron) - # } - # } - # } - # else{ - if ( exists_keys($hash_omniscient,('level3','cds',$level2_ID)) ){ - my $counterL3=-1; - my $indexLast = $#{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}; - my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}; - foreach my $feature_l3 ( @sortedList ){ - #count number feature of tag_l3 type - $counterL3++; - #Manage Introns## from the second intron to the last (from index 1 to last index of the table sortedList) ## We go inside this loop only if we have more than 1 feature. - if($counterL3 > 0 and $counterL3 <= $indexLast){ - my $intronSize = $sortedList[$counterL3]->start - $sortedList[$counterL3-1]->end; - $shortest_intron = $intronSize if($intronSize < $shortest_intron) - } - } - } - # foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - # if (index(lc($tag_l3), 'utr') != -1) { - # if ( exists_keys($hash_omniscient,('level3',$tag_l3,$level2_ID)) ){ - # my $counterL3=-1; - # my $indexLast = $#{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}; - # my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}; - # foreach my $feature_l3 ( @sortedList ){ - # #count number feature of tag_l3 type - # $counterL3++; - # #Manage Introns## from the second intron to the last (from index 1 to last index of the table sortedList) ## We go inside this loop only if we have more than 1 feature. - # if($counterL3 > 0 and $counterL3 <= $indexLast){ - # my $intronSize = $sortedList[$counterL3]->start - $sortedList[$counterL3-1]->end; - # $shortest_intron = $intronSize if($intronSize < $shortest_intron) - # } - # } - # } - # } - # } - #} - #} - } - } - } - print "Shortest intron for $id_l1:".$shortest_intron."\n" if($shortest_intron != 10000000000 and $verbose); - if ($shortest_intron < $Xsize){ - print "flag the gene $id_l1\n"; - $nb_cases++; - - my $feature_l1 = $hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - $feature_l1->add_tag_value($tag, $shortest_intron); - if($feature_l1->has_tag('product') ){ - $feature_l1->add_tag_value('note', $feature_l1->get_tag_values('product')); - $feature_l1->remove_tag('product'); - } - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - if (exists_keys ($hash_omniscient, ('level2', $tag_l2, $id_l1) ) ) { - foreach my $feature_l2 (@{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}){ - my $level2_ID = lc($feature_l2->_tag_value('ID')); - $feature_l2->add_tag_value($tag, $shortest_intron); - if($feature_l2->has_tag('product') ){ - $feature_l2->add_tag_value('note', $feature_l2->get_tag_values('product')); - $feature_l2->remove_tag('product'); - } - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - if ( exists_keys($hash_omniscient, ('level3', $tag_l3, $level2_ID) ) ){ - foreach my $feature_l3 (@{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}){ - $feature_l3->add_tag_value($tag, $shortest_intron); - if($feature_l3->has_tag('product') ){ - $feature_l3->add_tag_value('note', $feature_l3->get_tag_values('product')); - $feature_l3->remove_tag('product'); - } - } - } - } - } - } - } - } - } -} - -my $toprint = "We found $nb_cases cases where introns were < $Xsize, we flagged them with the attribute $tag. The value of this tag is size of the shortest intron found in this gene.\n"; -print $ostreamReport $toprint; -if($opt_output){print $toprint;} -print_omniscient($hash_omniscient, $gffout); #print gene modified - ######################### - ######### END ########### - ######################### - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - - -__END__ - - -=head1 NAME - -gff3_sp_flag_short_introns.pl - This script will flag the short introns with the attribute pseudo. Is is usefull to avoid ERROR when submiting the -data to EBI. (Typical EBI error message: ********ERROR: Intron usually expected to be at least 10 nt long. Please check the accuracy) - -=head1 SYNOPSIS - - ./gff3_sp_flag_short_introns.pl --gff infile --out outFile - ./gff3_sp_flag_short_introns.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<-f>, B<--ref> or B<-reffile> - -Input GFF3 file correponding to gene build. - -=item B<--intron_size> or B<-i> - -Minimum intron size, default 10. All genes with an intron < of this size will be flagged with the pseudo attribute (the value will be the size of the smallest intron found within the incriminated gene) - -=item B<--out>, B<--output> or B<-o> - -Output gff3 file where the result will be printed. - -=item B<-v> - -Bolean. Verbose for debugging purpose. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_functional_statistics.pl b/annotation/Tools/Util/gff/gff3_sp_functional_statistics.pl deleted file mode 100755 index abf5de2b2..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_functional_statistics.pl +++ /dev/null @@ -1,418 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Pod::Usage; -use Statistics::R; -use IO::File; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $opt_output = undef; -my $opt_genomeSize = undef; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - 'g|gs=s' => \$opt_genomeSize, - 'o|output=s' => \$opt_output, - "gff|f=s" => \$gff)) - -{ - pod2usage( { -message => "Failed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -#### IN / OUT -my $out; -if ($opt_output) { - - if (-f $opt_output){ - print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); - } - if (-d $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); - } - mkdir $opt_output; - - $out=IO::File->new(">".$opt_output."/report.txt" ) or croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/report.txt", $! )); - } -else{ - $out = IO::File->new(); - $out->fdopen( fileno(STDOUT), 'w' ); -} - - - ##################### - # MAIN # - ##################### - -###################### -### Parse GFF input # -print "Reading file $gff\n"; -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print "Parsing Finished\n"; -### END Parse GFF input # -######################### - -############################################################### -### Print Statistics structural first -############################################################### -#check number of level1 -my $nbLevel1 = 0; -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - $nbLevel1 += keys %{$hash_omniscient->{'level1'}{$tag_l1}}; -} - -#chech number of level2 -my $nbLevel2 = keys %$hash_mRNAGeneLink; - -############## -# STATISTICS # -my $stat; -my $distri; -if($opt_genomeSize){ - ($stat, $distri) = gff3_statistics($hash_omniscient, $opt_genomeSize); -} -else{ - ($stat, $distri) = gff3_statistics($hash_omniscient); -} - -#print statistics -foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - print $out "$info"; - } - print $out "\n"; -} - -############################################################### -### Print Statistics function -###############################################################: -my %names_l1; -my $name_l1_nb=undef; -my %names_l2; -my $name_l2_nb=undef; -my %products; -my $product_l2_nb=undef; -my %descriptions; -my $description_l2_nb=undef; -my %ontology_terms; -my $ontology_term_l2_nb=undef; - -my %DB_omni_mrna; -my %DB_omni_gene; - -my $nbmRNAwithFunction = 0; -my $nbGeneWithFunction = 0; -my $nbGeneWithProduct = 0; -my $nbGeneWithDescription = 0; -my $total_nb_l1 = 0; -my $total_nb_l2 = 0; - - ################# - # == LEVEL 1 == # - ################# -foreach my $primary_tag_key_level1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $gene_id_tag_key (keys %{$hash_omniscient->{'level1'}{$primary_tag_key_level1}}){ - $total_nb_l1++; - my $l1_has_function=undef; - my $l1_has_product=undef; - my $l1_has_description=undef; - my $gene_feature=$hash_omniscient->{'level1'}{$primary_tag_key_level1}{$gene_id_tag_key}; - my $id_gene=$gene_feature->_tag_value('ID'); - - #Check For NAME - if($gene_feature->has_tag('Name') ){ - my $value = $gene_feature->_tag_value('Name'); - $names_l1{$value}++; - $name_l1_nb++; - #print "l1 has tag name with value:".$value."\n"; - } - - - - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - if ( exists_keys( $hash_omniscient, ('level2', $primary_tag_key_level2, $gene_id_tag_key) ) ){ - foreach my $level2_feature ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$gene_id_tag_key}}) { - $total_nb_l2++; - my $l2_has_function=undef; - my $l2_has_product=undef; - my $l2_has_description=undef; - my $id_mrna=$level2_feature->_tag_value('ID'); - - #Check For NAME - if($level2_feature->has_tag('Name') ){ - my $value = $level2_feature->_tag_value('Name'); - $names_l2{$value}++; - $name_l2_nb++; - #print "l2 has tag name with value:".$value."\n"; - } - - #Check For product - if($level2_feature->has_tag('product') ){ - my $value = $level2_feature->_tag_value('product'); - if ($value ne "hypothetical protein"){ - $products{$value}++; - $product_l2_nb++; - $l2_has_product=1; - #print "l2 has tag product with value:".$value."\n"; - } - } - - - #Check For description - if($level2_feature->has_tag('description') ){ - my $value = $level2_feature->_tag_value('description'); - if ($value ne "hypothetical protein"){ - $descriptions{$value}++; - $description_l2_nb++; - $l2_has_description=1; - #print "l2 has tag descritpion with value:".$value."\n"; - } - } - - #Check For Ontology_term - if($level2_feature->has_tag('Ontology_term') ){ - my @values = $level2_feature->get_tag_values('Ontology_term'); - foreach my $tuple (@values){ - my ($type,$value) = split /:/,$tuple; - $ontology_terms{$value}++; - $ontology_term_l2_nb++; - $l2_has_function=1; - push @{$DB_omni_mrna{'Ontology_term'}{$id_mrna}}, $value; - $DB_omni_gene{'Ontology_term'}{$id_gene}++; - #print "l2 has tag ontology_term with value:".$value."\n"; - } - } - - #Check For Dbxref - if($level2_feature->has_tag('Dbxref') ){ - my @values = $level2_feature->get_tag_values('Dbxref'); - foreach my $tuple (@values){ - my ($type,$value) = split /:/,$tuple; - push @{$DB_omni_mrna{$type}{$id_mrna}}, $value; - $DB_omni_gene{$type}{$id_gene}++; - $l2_has_function=1; - } - } - - if($l2_has_function){ - $nbmRNAwithFunction++; - $l1_has_function=1; - } - if($l2_has_product){ - $l1_has_product=1; - } - if($l2_has_description){ - $l1_has_description=1; - } - } - } - } - if($l1_has_function){ - $nbGeneWithFunction++; - } - if($l1_has_product){ - $nbGeneWithProduct++; - } - if($l1_has_description){ - $nbGeneWithDescription++; - } - } -} - -#print result per type within dedicated file when output provided -# create streamOutput -if($opt_output){ - foreach my $type (keys %DB_omni_mrna){ - my $ostreamFunct = IO::File->new(); - $ostreamFunct->open( $opt_output."/$type.txt", 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_output."/$type.txt", $! ) - ); - foreach my $seq_id (keys %{$DB_omni_mrna{$type}}){ - print $ostreamFunct $seq_id."\t".join( ',', @{$DB_omni_mrna{$type}{$seq_id}} )."\n"; - } - } -} - -my $nbmRNAwithoutFunction= $total_nb_l2 - $nbmRNAwithFunction; -my $nbGeneWithoutFunction= $total_nb_l1 - $nbGeneWithFunction; -my $nbGeneWithoutProduct= $total_nb_l1 - $nbGeneWithProduct; - -my $listOfFunction; -foreach my $funct (sort keys %DB_omni_mrna){ - $listOfFunction.="$funct,"; -} -chop $listOfFunction; - -# NOW summerize -my $stringPrint=undef; -my $lineB= "_______________________________________________________________________________________________________"; -$stringPrint .= " ".$lineB."\n"; -$stringPrint .= "|".sizedPrint(" ",25)."|".sizedPrint("Nb term linked to mRNA",25)."|".sizedPrint("Nb mRNA with term",25)."|".sizedPrint("Nb gene with term",25)."|\n"; -$stringPrint .= "|".$lineB."|\n"; - -foreach my $type (sort keys %DB_omni_mrna){ - my $total_term_mRNA=0; - foreach my $id_l2 (keys %{$DB_omni_mrna{$type}} ){ - $total_term_mRNA+=scalar @{$DB_omni_mrna{$type}{$id_l2}}; - } - my $nbmRNA_with_term = keys %{$DB_omni_mrna{$type}}; - my $nbGenewith_term = keys %{$DB_omni_gene{$type}}; - - my $mRNA_type =0; #keys %{$mRNAAssociatedToTerm{$type}}; - my $gene_type =0; #keys %{$GeneAssociatedToTerm{$type}}; - $stringPrint .= "|".sizedPrint(" $type",25)."|".sizedPrint($total_term_mRNA,25)."|".sizedPrint($nbmRNA_with_term,25)."|".sizedPrint($nbGenewith_term,25)."|\n|".$lineB."|\n"; - } - - - -$stringPrint .= "\nnb mRNA without Functional annotation ($listOfFunction) = $nbmRNAwithoutFunction (remind: total mRNA = $total_nb_l2)\n". - "nb mRNA with Functional annotation ($listOfFunction) = $nbmRNAwithFunction (remind: total mRNA = $total_nb_l2)\n". - "nb gene without Functional annotation ($listOfFunction) = $nbGeneWithoutFunction (remind: total gene = $total_nb_l1)\n". - "nb gene with Functional annotation ($listOfFunction) = $nbGeneWithFunction (remind: total gene = $total_nb_l1)\n\n"; - - -#-----name------ -if ($name_l1_nb){ - $stringPrint .= "We found $name_l1_nb genes with attribute. (remind: total gene = $total_nb_l1)\n"; -} -else{$stringPrint .= "No gene with attribute found.\n";} -if ($name_l2_nb){ - $stringPrint .= "We found $name_l2_nb mRNAs with attribute. They probably have the same names as their parent genes. (remind: total mRNA = $total_nb_l2)\n"; -} -else{$stringPrint .= "No mRNA with attribute found.\n";} - -#-----description------ -if ($nbGeneWithDescription){ - $stringPrint .= "We found $nbGeneWithDescription genes with attribute.\n"; -} -else{$stringPrint .= "No gene with attribute found.\n";} -if ($description_l2_nb){ - $stringPrint .= "We have $description_l2_nb mRNAs with attribute.\n"; -} -else{$stringPrint .= "No mRNA with attribute found.\n";} - -#-----product------ -if($nbGeneWithProduct){ - $stringPrint .= "We found $nbGeneWithProduct genes with attribute.\n"; -} -else{$stringPrint .= "No gene with attribute found.\n";} -if ($product_l2_nb){ - $stringPrint .= "We have $product_l2_nb mRNAs with attribute.\n"; -} -else{$stringPrint .= "No mRNA with attribute found.\n";} - - -print $out $stringPrint; -# END STATISTICS # -################## -print "Bye Bye.\n"; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - -sub sizedPrint{ - my ($term,$size) = @_; - my $result; - my $sizeTerm = ($term) ? length($term) : 0; - if ($sizeTerm > $size ){ - $result=substr($term, 0,$size); - return $result; - } - else{ - my $nbBlanc=$size-$sizeTerm; - - my $float = $nbBlanc/2; - my $nbBlanc_before = sprintf "%.0f", $float; - my $nbBlanc_after = $nbBlanc - $nbBlanc_before; - - $result=""; - for (my $i = 0; $i < $nbBlanc_before; $i++){ - $result.=" "; - } - $result.=$term; - for (my $i = 0; $i < $nbBlanc_after; $i++){ - $result.=" "; - } - return $result; - } -} - - -__END__ - -=head1 NAME - -gff3_statistics.pl - -The script take a gff3 file as input. - -The script give basic statistics of a gff file. -Remark: identical feature from level1 or level2 with identical ID will be merged as well as their subsequent features (Level2 or level3). - -=head1 SYNOPSIS - - ./gff3_statistics.pl --gff file.gff [ -o outfile ] - ./gff3_statistics.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<--gs> or B<-g> - -This option inform about the genome size in oder to compute more statistics. You can give the size in Nucleotide or directly the fasta file. - - -=item B<--output> or B<-o> - -File where will be written the result. If no output file is specified, the output will be written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_infer_function_colocalisation.pl b/annotation/Tools/Util/gff/gff3_sp_infer_function_colocalisation.pl deleted file mode 100755 index 872f879fb..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_infer_function_colocalisation.pl +++ /dev/null @@ -1,1200 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use File::Basename; -use Statistics::R; -use Pod::Usage; -use Bio::Tools::GFF; -use List::MoreUtils qw(uniq); -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $output = undef; -my $ref = undef; -my $tar = undef; -my $inv = undef; -my $_dblr = undef; -my $_ablt= undef; -my $liftOver=undef; -my $overlapT = undef; -my $featureType=undef; -my $checkMultiOverlap=undef; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - "ref|r=s" => \$ref, - "mapped|m|tar=s" => \$tar, - "inv|inverse|oppposite" => \$inv, - "value|threshold|overlap=i" => \$overlapT, - "t|transfert|lift" => \$liftOver, - "feature=s" => \$featureType, - "dblr" => \$_dblr, - "ablt" => \$_ablt, - "cmo" => \$checkMultiOverlap, - "outdir|out|o=s" => \$output)) - -{ - pod2usage( { -message => "$header"."Failed to parse command line.", - -verbose => 1, - -exitval => 1} ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -message => "$header", - -verbose => 2, - -exitval => 2 } ); -} - -if ( ! (defined($ref)) or ! (defined($tar)) ){ - pod2usage( { - -message => "$header\nAt least 2 parameters are mandatory.\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # -my($filename_tar, $dirs, $suffix) = fileparse($tar,qr/\.[^.]*/); -my($filename_ref, $dirs, $suffix) = fileparse($ref,qr/\.[^.]*/); - -my $gffout; -my $outReport; - -if ($output) { - $output=~ s/.gff//g; - if (-d $output ){ - print "Directory $output already exists.\n";exit; - } - else{ - mkdir $output; - - my $out=">".$output."/".$filename_tar."_FuncLiftOn_".$filename_ref.".gff"; - open(my $fh, $out) or die "Could not open file '$out' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - - my $out=$output."/".$filename_tar."_FuncLiftOn_".$filename_ref."-report.txt"; - open($outReport, '>', $out) or die "Could not open file '$out' $!"; - } -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -################## -# Manage overlap threshold -if (! $overlapT ){ - $overlapT = 20; -} -print "We will consider gene with overlap over $overlapT\n"; - -################## -# Manage feature Type (Level2) -if(! $featureType){ - $featureType='mRNA'; -} -$featureType=lc($featureType); -##################################### -# END Manage Options # -##################################### - -########### -# DEFINE NBIS PATTERN OF NAMES -########### -my $nbis_suffix_p=qr/_([0-9]*(_iso[0-9]+)?$|iso[0-9]+$)/o; -my $nbis_suffix_d=qr/_partial_part.*/; - - ##################### - # MAIN # - ##################### -############ -# Parse GFF reference # -print ("Parse file $ref\n"); -my ($refhash_omniscient, $refhash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $ref - }); -print ("$ref file parsed\n"); - -############## -# Manage gene name reference -my %hash_geneName; -my %hash_geneNameLab; -if(! $_dblr){ - # save gene names - foreach my $tag_level1 (keys %{$refhash_omniscient->{'level1'}}){ - foreach my $geneID (keys %{$refhash_omniscient->{'level1'}{$tag_level1}} ) { - my $gene_feature = $refhash_omniscient->{'level1'}{$tag_level1}{$geneID}; - my $tag=undef; - if($gene_feature->has_tag('Name')){ - $tag='Name'; - } - elsif($gene_feature->has_tag('gene_name')){ - $tag='gene_name'; - } - if($tag){ - my @tmp=$gene_feature->get_tag_values($tag); - my $name=lc(shift @tmp); - $hash_geneNameLab{$name}++; - $name =~ s/$nbis_suffix_p//; # We remove what has been added by NBIS during gene name annotation - $hash_geneName{$name}++; - } - } - } -} - -############ -# Parse GFF target # -print ("Parse file $tar\n"); -my ($tarhash_omniscient, $tarhash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $tar - }); -print ("$tar file parsed\n"); - -#count level1 in reference -my $count_ref_level1=0; -foreach my $tag_level1 (keys %{$refhash_omniscient->{'level1'}}) { - my $nbKeys= keys %{$refhash_omniscient->{'level1'}{$tag_level1}}; - $count_ref_level1=$count_ref_level1+ $nbKeys; -} - -#count level1 in tar -my $count_tar_level1=0; -foreach my $tag_level1 (keys %{$tarhash_omniscient->{'level1'}}) { - my $nbKeys= keys %{$tarhash_omniscient->{'level1'}{$tag_level1}}; - $count_tar_level1=$count_tar_level1+ $nbKeys; -} - -############# -# sort by seq id and featuretype -print ("Sort files by seqid\n"); -my $refhash_sortBySeq = sort_by_seq($refhash_omniscient, $featureType) ; -my $tarhash_sortBySeq = sort_by_seq($tarhash_omniscient, $featureType) ; -print ("Sorting files terminated\n"); - -#count level1 in ref sort by seq -my $count_ref_level1_sortBySeq=0; -foreach my $tag_level1 (keys %{$refhash_sortBySeq}) { - foreach my $Contig (keys %{$refhash_sortBySeq->{$tag_level1}}) { - my $nbKeys= @{$refhash_sortBySeq->{$tag_level1}{$Contig}}; - $count_ref_level1_sortBySeq=$count_ref_level1_sortBySeq + $nbKeys; - } -} - -#count level1 in tar sort by seq -my $count_tar_level1_sortBySeq=0; -foreach my $tag_level1 (keys %{$tarhash_sortBySeq}) { - foreach my $Contig (keys %{$tarhash_sortBySeq->{$tag_level1}}) { - my $nbKeys= @{$tarhash_sortBySeq->{$tag_level1}{$Contig}}; - $count_tar_level1_sortBySeq=$count_tar_level1_sortBySeq + $nbKeys; - } -} - -print "Now we are analyzing the contigs containing annotations from both annotation builds:\n"; -my $OverlapingB=0; -my $OverlapingA=0; -my $nbNoOverlapingA=0; -my $nbTotalClusterCaseA=0; -my $nbTotalClusterCaseB=0; -my %clusterCase; -my %o_one2one; -my %o_one2many; -my %o_many2one; -my %o_many2many; -my %split_omniscient; -my %fusion_omniscient; - -###### -# LOOP over all the contig -foreach my $tag_level1 (keys %{$refhash_sortBySeq}) { - foreach my $ContigName (keys %{$refhash_sortBySeq->{$tag_level1}}) { - - ###### - # LOOP over all gene feature - # USE A COPY - use of of temporary variable to be sure to loop over all element. - my @copyContig = @{$refhash_sortBySeq->{$tag_level1}{$ContigName}}; # - foreach my $copyGene (@copyContig) { - - my @copyGeneNameList=$copyGene->get_tag_values('ID'); - my $GeneName=lc(shift @copyGeneNameList); - #print "\nStudy of ".$GeneName.":\n"; - - ###### Test if gene already studied (started by another gene but due to overlap it is already studied) - my $letStudyThatGene=undef; - foreach my $gene_feature (@{$refhash_sortBySeq->{$tag_level1}{$ContigName}}){ - my @GeneNameList=$gene_feature->get_tag_values('ID'); - my $geneNameOriginal=lc(shift @GeneNameList); - if ($GeneName eq $geneNameOriginal){$letStudyThatGene="yes";} # already studied if we cannot find it among refhash_sortBySeq - } - ##### End test if already studied. If not, we continue - - - if ($letStudyThatGene){ - - # Declare table which I will work with - my @ListRefOverlapAtotest_list;my @ListOverlapAtested_list;my @ListNoOverlapA_list; my @ListOverlapAtestnoneed_list; - my @ListRefOverlapBtotest_list;my @ListOverlapBtested_list;my @ListNoOverlapB_list; my @ListOverlapBtestnoneed_list; - - my $ListBtotest=\@ListRefOverlapBtotest_list; my $ListOverlapAtested=\@ListOverlapAtested_list; my $ListNoOverlapA=\@ListNoOverlapA_list; - my $ListOverlapBtested=\@ListOverlapBtested_list; my $ListNoOverlapB=\@ListNoOverlapB_list; - - #### Initialize list of gene to test - my @LinkTocurrentGeneFeature; - push (@LinkTocurrentGeneFeature, $copyGene); #### >>>>>> BASE OF THE FEATURE TESTED FOR OVERLAP !!! CURRENTLY WE CHECK THE GENE FEATURE ! - my $ListAtotest=\@LinkTocurrentGeneFeature; - - - my $lap=0; - while (@{$ListAtotest} != 0 or @{$ListBtotest} != 0 ){ - $lap++; - - ######### - # TEST A side - if (@{$ListAtotest} != 0){ - #print "START START START TEST AAAAAAAA\n"; - my ($ListToTestX, $ListOverlapTested, $ListNoOverlapTested) = retrieveAllOverlap( $lap, $ContigName, - $refhash_sortBySeq, $tarhash_sortBySeq, $ListAtotest, $ListOverlapAtested, $ListNoOverlapA,1, - $refhash_omniscient, $tarhash_omniscient, $featureType); - - $ListBtotest = $ListToTestX; - $ListOverlapAtested = $ListOverlapTested; - $ListNoOverlapA = $ListNoOverlapTested; - - # Reinitialisation empty - my @list_empty; - $ListAtotest = \@list_empty; - - #print "END END END TEST AAAAAAAAA\n"; - next(); #stop here and avoid test B - } - - ########### - # TEST B side - if (@{$ListBtotest} != 0){ - # test every B - #print "START TEST BBBBBBBBBBBBBBBB\n"; - my ($ListToTestX, $ListOverlapTested, $ListNoOverlapTested) = retrieveAllOverlap( $lap, $ContigName, - $tarhash_sortBySeq, $refhash_sortBySeq, $ListBtotest, $ListOverlapBtested,$ListNoOverlapB,2, - $tarhash_omniscient, $refhash_omniscient, $featureType); - $ListAtotest = $ListToTestX; - $ListOverlapBtested = $ListOverlapTested; - $ListNoOverlapB = $ListNoOverlapTested; - - # Reinitialisation empty - my @list_empty; - $ListBtotest =\@list_empty; - - #print "END TEST BBBBBBBBBBBBBBBBBB\n"; - next(); #stop here - } - } - - $nbNoOverlapingA=$nbNoOverlapingA+@{$ListNoOverlapA}; - my $nbOverlpInA=@{$ListOverlapAtested}; - my @OverlapInA=(@{$ListOverlapAtested}); - $OverlapingA+=$nbOverlpInA; - - my $nbOverlpInB=@{$ListOverlapBtested}; - my @OverlapInB=(@{$ListOverlapBtested}); - $OverlapingB+=$nbOverlpInB; - - my $nbFragment = $nbOverlpInA+$nbOverlpInB; - - ################################## - # Manage different cases # - ################################## - #print ("OVERLAP step 1 END frgt nb:$nbFragment\n"); - if ( $nbFragment < 2 ){ - #HEre can be printed => Single gene from file A and PerfectMatch from A or B depending to an iption like my $contigFusionA = "ok"; < /!\ > - } - elsif ( $nbFragment == 2){ #case can be stretched - @{$o_one2one{$GeneName}} = (@OverlapInA,@OverlapInB); - } - else{ # $nbFragment > 2) - if (@OverlapInA == 1){ - @{$o_one2many{$GeneName}} = (@OverlapInA,[@OverlapInB]); - } - elsif(@OverlapInB == 1){ - @{ $o_many2one{$GeneName}} = ([@OverlapInA],@OverlapInB); - } # Split in the target Build - else{ # - - @{ $o_many2many{$GeneName}} = ([@OverlapInA],[@OverlapInB]); - $nbTotalClusterCaseA=$nbTotalClusterCaseA+$nbOverlpInA; - $nbTotalClusterCaseB=$nbTotalClusterCaseB+$nbOverlpInB; - } - } - } - } - } -} -print "Now managing overlaping cases found\n"; -############################## -#Manage overlap one2one -my $nb_one2one = keys (%o_one2one); -#print "We have $nb_one2one cases one 2 one\n\n"; -my $nbLifted_one2one=0; -my $nbNameChanged_one2one=0; -my $nbNewName_one2one=0; -my $overlapOK=0; - -if ($liftOver){ - ($overlapOK, $nbNameChanged_one2one, $nbNewName_one2one) = manage_one2one(\%o_one2one,$refhash_omniscient,$tarhash_omniscient,$overlapT, \%hash_geneName); -} -$nbLifted_one2one=$nbNameChanged_one2one + $nbNewName_one2one; - -############################## -#Manage overlap one2many -my $nb_one2many=0; -my $nb_fusion_ok=0; -my $nb_fusion_notV=0; -my $nbLifted_one2many=0; -my $nbNameChanged_one2many=0; -my $nbNewName_one2many=0; -my $overlapOK_one2many=0; -my %tmp_o_one2one; - -$nb_one2many = keys (%o_one2many); -#print "We have $nb_one2many cases one2many\n\n"; -#Check if same gene that is split in 2 -foreach my $key (keys %o_one2many){ - - my $geneA_feature = $o_one2many{$key}[0]; - my @geneB = @{$o_one2many{$key}[1]}; - -# print $geneA_feature->gff_string."\n"; -# print $geneB[0]->gff_string."\n"; -# print $geneB[1]->gff_string."\n\n"; - - # prepare in case we have to save the feature - my $geneA_name = lc($geneA_feature->_tag_value('ID')); - my @IDlist_A=($geneA_name); - - my $sameGene=check_feature_same_names(\%hash_geneName, \@geneB, 'Bs'); - - if($sameGene eq "none"){# GENE DIFFERENT in B (cannot really compare because one is missing) - only one long in A - $nb_fusion_notV++; - fill_omniscient_from_other_omniscient_level1_id(\@IDlist_A, $refhash_omniscient, \%fusion_omniscient); - } - elsif(! $sameGene){ # GENE DIFFERENT in B - only one long in A - $nb_fusion_ok++; - fill_omniscient_from_other_omniscient_level1_id(\@IDlist_A, $refhash_omniscient, \%fusion_omniscient); - } - elsif($liftOver){ # Gene are the same / Option lift given / we try to lift name from one gene choosen randomly - @{$tmp_o_one2one{$geneA_name}} = (($geneA_feature),($geneB[0])); - } - - - ### force try to change name for better tuple - if(((! $sameGene) or ($sameGene eq "none") ) and ($checkMultiOverlap) and ($liftOver)){ - - my $overlap_percent_1 = testOverlaplevel3($geneA_feature, $geneB[0], $refhash_omniscient, $tarhash_omniscient, $featureType); - my $overlap_percent_2 = testOverlaplevel3($geneA_feature, $geneB[1], $refhash_omniscient, $tarhash_omniscient, $featureType); - - ## We don't consider if both have 100% overlap - my $bestFeature = undef; - if ($overlap_percent_1 > $overlap_percent_2){ - $bestFeature=$geneB[0]; - } - elsif($overlap_percent_1 < $overlap_percent_2){ - $bestFeature=$geneB[1]; - } - - if ($bestFeature){ - @{$tmp_o_one2one{$geneA_name}} = (($geneA_feature),($bestFeature)); - } - } -} -($overlapOK_one2many, $nbNameChanged_one2many, $nbNewName_one2many) = manage_one2one(\%tmp_o_one2one,$refhash_omniscient,$tarhash_omniscient,$overlapT, \%hash_geneName); -$nbLifted_one2many = $nbNameChanged_one2many + $nbNewName_one2many; - -############################## -#Manage overlap many2one -my $nb_many2one=0; -my $nb_split_ok=0; -my $nb_split_notV=0; -my $nbLifted_many2one=0; -my $nbNameChanged_many2one=0; -my $nbNewName_many2one=0; -my $overlapOK_many2one=0; -my %tmp_o_one2one; - -$nb_many2one = keys (%o_many2one); -#print "We have $nb_many2one cases many2one\n"; - -foreach my $key (keys %o_many2one){ - - my @geneA = @{$o_many2one{$key}[0]}; - my $geneB_feature = $o_many2one{$key}[1]; - - my $sameGene=check_feature_same_names(\%hash_geneName, \@geneA, 'As'); - - # prepare in case we have to save the feature - my $geneA0_name=lc($geneA[0]->_tag_value('ID')); - my $geneA1_name=lc($geneA[1]->_tag_value('ID')); - my @IDlist=($geneA0_name, $geneA1_name); - - if(! $sameGene){ # NAME DIFFERENT in A - only one long in B - #print "Looks like $geneA0_name and $geneA1_name should be only one gene which has been split.\n"; - fill_omniscient_from_other_omniscient_level1_id(\@IDlist, $refhash_omniscient, \%split_omniscient); - } - elsif($sameGene eq "none"){ # NAME DIFFERENT in A (cannot really compare because one is missing) - only one long in B - $nb_split_notV++; - fill_omniscient_from_other_omniscient_level1_id(\@IDlist, $refhash_omniscient, \%split_omniscient); - } - else{$nb_split_ok++; - if ($liftOver) { #same gene we take one randomly to use the name - @{$tmp_o_one2one{$geneA[0]}} = ($geneA[0],$geneB_feature); - @{$tmp_o_one2one{$geneA[1]}} = ($geneA[1],$geneB_feature); - } - } - - ### force try to change name for better tuple - if(((! $sameGene) or ($sameGene eq "none") ) and ($checkMultiOverlap) and ($liftOver)){ - @{$tmp_o_one2one{$geneA[0]}} = (($geneA[0]),($geneB_feature)); - @{$tmp_o_one2one{$geneA[1]}} = (($geneA[1]),($geneB_feature)); - - #print $geneA[0]->gff_string."\n"; - #print $geneA[1]->gff_string."\n"; - #print $geneB_feature->gff_string."\n\n"; - #print $bestFeature->gff_string."\n"; - #print "$overlap_percent_1 $overlap_percent_2 \n\n"; - } -} - -($overlapOK_many2one, $nbNameChanged_many2one, $nbNewName_many2one) = manage_one2one(\%tmp_o_one2one,$refhash_omniscient,$tarhash_omniscient,$overlapT, \%hash_geneName); -$nbLifted_many2one=$nbNameChanged_many2one, $nbNewName_many2one; - -############################## -#Manage overlap many2many -my $nb_many2many = 0; -my $nbLifted_many2many=0; -my $nbNameChanged_many2many=0; -my $nbNewName_many2many=0; -my $overlapOK_many2many=0; -my %tmp_o_one2one; - -$nb_many2many = keys (%o_many2many); - -if( ($checkMultiOverlap) and ($liftOver) ){ - foreach my $key (keys %o_many2many){ - my @List_geneA_feature = @{$o_many2many{$key}[0]}; - my @List_geneB_feature = @{$o_many2many{$key}[1]}; - - foreach my $featureA (@List_geneA_feature){ - my $best_overlap_featureB=undef; - my $best_overlap_percent=-1; - foreach my $featureB (@List_geneB_feature){ - if(defined(testOverlaplevel3($featureA, $featureB, $refhash_omniscient, $tarhash_omniscient, $featureType))){ - my $overlap_percent = testOverlaplevel3($featureA, $featureB, $refhash_omniscient, $tarhash_omniscient, $featureType); - - if($overlap_percent > $best_overlap_percent){ - $best_overlap_percent=$overlap_percent; - $best_overlap_featureB=$featureB; - } - } - } - - #Save the best overlaping gene B to Manage downstream - if($best_overlap_featureB){ - @{$tmp_o_one2one{lc($featureA->_tag_value('ID'))}} = (($featureA),($best_overlap_featureB)); - } - } - } -} - -( $overlapOK_many2many, $nbNameChanged_many2many, $nbNewName_many2many ) = manage_one2one(\%tmp_o_one2one,$refhash_omniscient,$tarhash_omniscient,$overlapT, \%hash_geneName); -$nbLifted_many2many=$nbNameChanged_many2many + $nbNewName_many2many; - -################# -# Display results -my $totalA=$OverlapingA+$nbNoOverlapingA; - -my $resultToPrint.= "\n\n######### RESULTS #########:\n\n"; -$resultToPrint.= "File1 ($ref):\nTotal gene=$count_ref_level1\n". - "Total gene Sort by seq $count_ref_level1_sortBySeq\n". - "Total gene checked : $totalA \n". - " => $OverlapingA genes overlap genes from file 2\n". - " => $nbNoOverlapingA genes seem to be \"orphan\"\n\n"; - -$resultToPrint.= "File2 ($tar):\nTotal gene = $count_tar_level1\nTotal gene Sort by seq $count_tar_level1_sortBySeq\n"; -$resultToPrint.= "Total gene checked = $OverlapingB\n"; -$resultToPrint.= "=> $OverlapingB genes overlap genes from file 1\n\n"; - -$resultToPrint.= "Further details:\n================\n\n"; -$resultToPrint.= "$nb_one2one one2one cases. (A one2one case is 1 gene from $ref overlaping 1 genes of $tar. \n"; -if($liftOver){ - $resultToPrint.= "Among the one2one cases $overlapOK genes overlap correctly. We lifted annotation for $nbLifted_one2one of these cases. Among them:\nWe modified previous annotation for $nbNameChanged_one2one case(s).\n". - "We newly annotated $nbNewName_one2one previously unannotated case(s).\n"; - -} - -$resultToPrint.= "\n$nb_one2many one2many (putative fusion) cases. (A fusion case is 1 gene from $ref overlaping >=2 genes of $tar. \n". - " => $nb_fusion_ok of them are clearly right and reported in output (If we suppose that Target is true)($tar genes have identical names).\n". - " => $nb_fusion_notV of them cannot be checked (do not have any name).\n"; -if($checkMultiOverlap or $liftOver){ - $resultToPrint.= "=> Among the one2many cases $overlapOK_one2many genes overlap correctly. We lifted annotation for $nbLifted_one2many of these cases. Among them:\nWe modified previous annotation(s) for $nbNameChanged_one2many case(s).\n". - "We newly annotated $nbNewName_one2many previously unannotated case(s).\n"; -} - -$resultToPrint.= "\n$nb_many2one many2one (putative split) cases. (A split case is >=2 genes from $ref overlaping 1 gene of $tar. \n". - " => $nb_split_ok of them are clearly right and reported in output ($ref genes have identical names).\n". - " => $nb_split_notV of them cannot be checked (do not have any name)\n"; #The others have to be checked because can be close duplicate/paralog.\n\n"; -if($checkMultiOverlap or $liftOver){ - $resultToPrint.= "=> Among the many2one cases $overlapOK_many2one genes overlap correctly. We lifted annotation for $nbLifted_many2one of these cases. Among them:\nWe modified previous annotation(s) for $nbNameChanged_many2one case(s).\n". - "We newly annotated $nbNewName_many2one previously unannotated case(s).\n"; -} - -$resultToPrint.= "\nNumber many2many : $nb_many2many\n"; -$resultToPrint.= "Among the many2many cases there is a total of $nbTotalClusterCaseA genes from $ref and $nbTotalClusterCaseB from $tar\n"; -if($checkMultiOverlap){ - $resultToPrint.= "=> Among the many2many cases $overlapOK_many2many genes overlap correctly. We lifted annotation for $nbLifted_many2many of these cases. Among them:\nWe modified previous annotation(s) for $nbNameChanged_many2many case(s).\n". - "We newly annotated $nbNewName_many2many previously unannotated case(s).\n"; -} - -$resultToPrint.= "\n!! Split and fusion results don't take in account possible rearrangement that could occur independently during the evolution of each genome investigated.\n". - "!! Please consider that Split, Fusion and Cluster cases an also be mostly symply independant gene overlaping (Common in prokaryotes). <= Don't forget that using the option \"all\" you compare any kind of features\n"; - - -$resultToPrint.="\nCommand line executed:"; -foreach (@ARGV) { $resultToPrint.= "$_ " };$resultToPrint.="\n"; -## OUTPUT -if($output){ - - $resultToPrint.= "=> Result are written in gff3 format in $output directory\n\n"; - print $outReport $resultToPrint; - - if($nb_one2one != 0){ - print "results with one2one cases\n"; - print_omniscient($refhash_omniscient, $gffout); - } - if($nb_one2many != 0) { - print "one2many cases\n"; - open(my $fh, '>', $output."/".$filename_tar."_FuncLiftOn_".$filename_ref."_fusion.gff") or die "Could not open file '$output' $!"; - my $gffout_fusion= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - print_omniscient(\%fusion_omniscient, $gffout_fusion); - } - if($nb_many2one != 0) { - print "many2one cases\n"; - open(my $fh, '>', $output."/".$filename_tar."_FuncLiftOn_".$filename_ref."_split.gff") or die "Could not open file '$output' $!"; - my $gffout_split= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - print_omniscient(\%split_omniscient, $gffout_split); - } -} - -print $resultToPrint; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - -sub retrieveAllOverlap { - my ($lap, $ContigName, $pieceStudiedHashA, $pieceStudiedHashB, $List_geneRef_to_test, $ListA_tested, $ListA_NoOverlap, $ok, $ref_omniscient, $oppo_omniscient, $featureType) = @_; - my @newListToTest; - my @listGeneToRemove; - - foreach my $geneRef_f (@{$List_geneRef_to_test}) { - - my $startA = $geneRef_f->start; - my $endA = $geneRef_f->end; - my $Overlaped_FinalAnswer=undef; # default value - - # Now test of all the opposite pieces - foreach my $tagB (keys %{$pieceStudiedHashB}){ # allow to work only on gene on the contig - foreach my $geneOppo_f( @{$pieceStudiedHashB->{$tagB}{$ContigName}}){ # allow to work only on gene on the contig - - my $startB=$geneOppo_f->start; - my $endB=$geneOppo_f->end; - - my $resuOverlap = testOverlap($startA, $endA, $startB, $endB); ## <====== Call Overlap method - - if ($resuOverlap){ - - if(defined(testOverlaplevel3($geneRef_f, $geneOppo_f, $ref_omniscient, $oppo_omniscient, $featureType))){ - - my @tmp=$geneOppo_f->get_tag_values('ID'); - my $name_geneOppo=lc(shift @tmp); - - # check if doublon - my $tmp_list = pushIfNotExit(\@newListToTest, $geneOppo_f); - @newListToTest = @{$tmp_list}; - - $Overlaped_FinalAnswer="yes"; - #print "OVERLAP FOUND => No NeedToverify\n"; - } - } - } - } - ## END ALL OPPO CHECKED - - if(! $Overlaped_FinalAnswer){ - if ($lap == "1"){ - #print "NO Overlap\n"; - push (@{$ListA_NoOverlap}, $geneRef_f); - } - else{ - #print "No More Overlap\n"; - $ListA_tested = pushIfNotExit($ListA_tested, $geneRef_f); - } - } - else{ - $ListA_tested = pushIfNotExit($ListA_tested, $geneRef_f); - } - - # REMOVE - # Need to be deleted to not re-use it for retrieve overlap => Because we will found again the same // The removed one will be display through @ListA_Overlap_tested - removeElementInList($pieceStudiedHashA, $geneRef_f, $ContigName); - } - - return (\@newListToTest, $ListA_tested, $ListA_NoOverlap); -} - -sub testOverlaplevel3{ - - my ($geneA_feature, $geneB_feature, $refhash_omniscient, $tarhash_omniscient, $featureType)=@_; - my $overlap=undef; - - my $name_geneA=lc($geneA_feature->_tag_value('ID')); - my $name_geneB=lc($geneB_feature->_tag_value('ID')); - - if($featureType eq "all"){ - - foreach my $tag_level2_A (keys %{$refhash_omniscient->{'level2'}}){ - if (exists $refhash_omniscient->{'level2'}{$tag_level2_A}{$name_geneA}){ - - foreach my $feature_level2_A (@{$refhash_omniscient->{'level2'}{$tag_level2_A}{$name_geneA}}){ - my $name_feature_level2_A=lc($feature_level2_A->_tag_value('ID')); - - foreach my $tag_level2_B (keys %{$tarhash_omniscient->{'level2'}}){ - if (exists $tarhash_omniscient->{'level2'}{$tag_level2_B}{$name_geneB}){ - - foreach my $feature_level2_B (@{$tarhash_omniscient->{'level2'}{$tag_level2_B}{$name_geneB}}){ - my $name_feature_level2_B=lc($feature_level2_B->_tag_value('ID')); - - my $ref_list_to_checkA=undef; - if(exists($refhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_A})){ - $ref_list_to_checkA=$refhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_A}; - } - elsif(exists($refhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_A})){ - $ref_list_to_checkA=$refhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_A} - } - - my $ref_list_to_checkB=undef; - if(exists($tarhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_B})){ - $ref_list_to_checkB=$tarhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_B}; - } - elsif(exists($tarhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_B})){ - $ref_list_to_checkB=$tarhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_B}; - } - - if($ref_list_to_checkA and $ref_list_to_checkB){ # we found at least exon or cds for both - if(defined(featuresList_overlap($ref_list_to_checkA, $ref_list_to_checkB))){ - $overlap=featuresList_overlap($ref_list_to_checkA, $ref_list_to_checkB); - } - } - } - } - } - } - } - } - } - else{ - #if (exists_keys($refhash_omniscient, ('level2', $featureType, $name_geneA)) ){ - if (exists $refhash_omniscient->{'level2'}{$featureType}{$name_geneA}){ - - foreach my $feature_level2_A (@{$refhash_omniscient->{'level2'}{$featureType}{$name_geneA}}){ - my $name_feature_level2_A=lc($feature_level2_A->_tag_value('ID')); - - # if (exists_keys($refhash_omniscient, ('level2', $featureType, $name_geneB)) ){ - if (exists $tarhash_omniscient->{'level2'}{$featureType}{$name_geneB}){ - - foreach my $feature_level2_B (@{$tarhash_omniscient->{'level2'}{$featureType}{$name_geneB}}){ - my $name_feature_level2_B=lc($feature_level2_B->_tag_value('ID')); - - my $ref_list_to_checkA=undef; - if(exists($refhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_A})){ - $ref_list_to_checkA=$refhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_A}; - } - elsif(exists($refhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_A})){ - $ref_list_to_checkA=$refhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_A} - } - - my $ref_list_to_checkB=undef; - if(exists($tarhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_B})){ - $ref_list_to_checkB=$tarhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_B}; - } - elsif(exists($tarhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_B})){ - $ref_list_to_checkB=$tarhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_B}; - } - - if($ref_list_to_checkA and $ref_list_to_checkB){ # we found at least exon or cds for both - if(defined(featuresList_overlap($ref_list_to_checkA, $ref_list_to_checkB))){ - $overlap=featuresList_overlap($ref_list_to_checkA, $ref_list_to_checkB); - } - } - } - } - } - } - } - return $overlap; -} - -sub pushIfNotExit{ - - my ($list, $feature)=@_; - - my @listEx=@$list; - - my @tmp = $feature->get_tag_values('ID'); - my $feature_id = shift @tmp; - - foreach my $f (@listEx){ - - my @tmp = $f->get_tag_values('ID'); - my $f_id = shift @tmp; - - if ($feature_id eq $f_id){ - return $list; - } - } - - push (@listEx, $feature); - return \@listEx; -} - -sub removeElementInList { - my ($hash_sortbyseq, $feature, $ContigName) = @_; - - foreach my $tagB (keys %{$hash_sortbyseq}){ # allow to work only on gene on the contig - if(exists ($hash_sortbyseq->{$tagB}{$ContigName})){ # allow to work only on gene on the contig - # print "size before ".$#{$hash_sortbyseq->{$tagB}{$ContigName}}."\n"; - @{$hash_sortbyseq->{$tagB}{$ContigName}}= grep { $_ ne $feature } @{$hash_sortbyseq->{$tagB}{$ContigName}}; #remove element of the list - # print "size after ".$#{$hash_sortbyseq->{$tagB}{$ContigName}}."\n"; - my @List=($feature); - } - else{print "problem when deleting...!\n";} - } -} - -sub sort_by_seq{ - my ($hash_omniscient, $featureType)=@_; - - my %hash_sortBySeq; - - foreach my $tag_level1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $level1_id (keys %{$hash_omniscient->{'level1'}{$tag_level1}}){ - my $level1_feature = $hash_omniscient->{'level1'}{$tag_level1}{$level1_id}; - - # foreach my $tag_level2 (keys %{$hash_omniscient->{'level2'}}){ - if (exists_keys($hash_omniscient, ('level2', $featureType, $level1_id)) ){ # check if they have mRNA avoiding autovivifcation - my $firstFeature=$hash_omniscient->{'level2'}{$featureType}{$level1_id}[0]; - - if($firstFeature->has_tag('ID')){ - my @mrna_values = $firstFeature->get_tag_values('ID'); - my $mrna_id = shift @mrna_values; - } - else{print "tag missing\n";exit;} - - my $position=$level1_feature->seq_id."".$level1_feature->strand; - push (@{$hash_sortBySeq{$tag_level1}{$position}}, $level1_feature); - } - # } - } - } - return \%hash_sortBySeq; -} - -sub testOverlap { - my ($startA, $endA, $startB, $endB) = @_; - - my $overlap = undef; - - if($startA <= $endB and $endA >= $startB){ - $overlap="yes"; - } - - return $overlap; -} - -sub manage_one2one{ - my ($o_one2one,$refhash_omniscient,$reftar_omniscient, $overlapT, $hash_geneName)=@_; - - my $nameChanged=0; - my $newName=0; - my $overlapOK=0; - - foreach my $key (keys %{$o_one2one}){ - - my $geneA_feature = $o_one2one->{$key}[0]; - my $geneB_feature = $o_one2one->{$key}[1]; - - #print "\n\nCheck the gene ".$geneA_feature->gff_string."\nagainst ".$geneB_feature->gff_string." \n\n"; - my $best_overlap=0; - - my $name_geneA=lc($geneA_feature->_tag_value('ID')); - my $name_geneB=lc($geneB_feature->_tag_value('ID')); - - foreach my $tag_level2_A (keys %{$refhash_omniscient->{'level2'}}){ - - if (exists $refhash_omniscient->{'level2'}{$tag_level2_A}{$name_geneA}){ - - foreach my $feature_level2_A (@{$refhash_omniscient->{'level2'}{$tag_level2_A}{$name_geneA}}){ - - my $name_feature_level2_A=lc($feature_level2_A->_tag_value('ID')); - - foreach my $tag_level2_B (keys %{$tarhash_omniscient->{'level2'}}){ - if (exists $tarhash_omniscient->{'level2'}{$tag_level2_B}{$name_geneB}){ - foreach my $feature_level2_B (@{$tarhash_omniscient->{'level2'}{$tag_level2_B}{$name_geneB}}){ - my $name_feature_level2_B=lc($feature_level2_B->_tag_value('ID')); - - my $ref_list_to_checkA=undef; - if(exists($refhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_A})){ - $ref_list_to_checkA=$refhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_A}; - } - elsif(exists($refhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_A})){ - $ref_list_to_checkA=$refhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_A} - } - - my $ref_list_to_checkB=undef; - if(exists($tarhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_B})){ - $ref_list_to_checkB=$tarhash_omniscient->{'level3'}{'exon'}{$name_feature_level2_B}; - } - elsif(exists($tarhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_B})){ - $ref_list_to_checkB=$tarhash_omniscient->{'level3'}{'cds'}{$name_feature_level2_B}; - } - - my $overlap_percent=0; - if(! $inv){ - if($ref_list_to_checkA and $ref_list_to_checkB){ - $overlap_percent = featuresList_overlap($ref_list_to_checkA, $ref_list_to_checkB); - } - } - else{ - if($ref_list_to_checkA and $ref_list_to_checkB){ - $overlap_percent = featuresList_overlap($ref_list_to_checkB, $ref_list_to_checkA); - } - } - - if($overlap_percent > $best_overlap){ - $best_overlap=$overlap_percent; - } - } - } - } - } - } - } - ####### - #NOW CHECK IF WE ARE UNDER THE THRESHOLD TO CONSIDER IT - #print "My best overlap is: $best_overlap\n"; - if($best_overlap >= $overlapT){ - $overlapOK++; - my ($nameChanged_u, $newName_u) = liftGeneName($geneA_feature, $geneB_feature, $refhash_omniscient, $hash_geneName); - $nameChanged=$nameChanged+$nameChanged_u; - $newName=$newName+$newName_u; - } - } -return $overlapOK, $nameChanged, $newName; -} - -sub liftGeneName{ - my ($geneA_feature, $geneB_feature, $refhash_omniscient, $hash_geneName)=@_; - my $nameChanged=0; - my $newName=0; - - my $id_geneA = lc($geneA_feature->_tag_value('ID')); - my $id_geneB = lc($geneB_feature->_tag_value('ID')); - - my @list_features=($geneA_feature, $geneB_feature); - - foreach my $tag_level1 (keys %{$refhash_omniscient->{'level1'}}){ - if (exists $refhash_omniscient->{'level1'}{$tag_level1}{$id_geneA}){ - my $featureA = $refhash_omniscient->{'level1'}{$tag_level1}{$id_geneA}; - - ####################### - # MANAGE NAME (ONLY IF DIFERENT) - my $same_gene = check_feature_same_names($hash_geneName, \@list_features, 'AB'); - if((! $same_gene) or ($same_gene eq "none")){ - my $tag=undef; - if($geneB_feature->has_tag('Name')){ - $tag='Name'; - } - elsif($geneB_feature->has_tag('gene_name')){ - $tag='gene_name'; - } - elsif($geneB_feature->has_tag('gene_Name')){ - $tag='gene_Name'; - } - else{print "No tag Name or gene_name for geneB feature !!".$geneB_feature->gff_string."\n";} - - if($tag){ - my $name_featureA="Name Absent"; - if ($geneA_feature->has_tag('Name')){ - $name_featureA = uc($geneA_feature->_tag_value('Name')); - } - my $name_featureB=uc($geneB_feature->_tag_value($tag)); - - ##### - # We take add label if necessary if not deactivated - if (! $_dblr){ - if(exists ($hash_geneNameLab{$name_featureB}) ){ # Name already - my $cpt_label=1; - my $nameB=$name_featureB."_".$cpt_label; - while($hash_geneNameLab{$nameB}){ - $cpt_label++; - $nameB=$name_featureB."_".$cpt_label; - } - } - } - - #################### - # Display info old vs new name - #print "\nP1311_101 gene: ".$geneA_feature->gff_string."\n"; #print "K12 gene: ".$geneB_feature->gff_string."\n"; - if($output){ - print $outReport "old name = $name_featureA <=> New name = $name_featureB\n"; - }else{print "old name = $name_featureA <=> New name = $name_featureB\n";} - - create_or_replace_tag($geneA_feature, 'Name', $name_featureB); - - # Track modified gene names - if($name_featureA eq "Name Absent"){ - $newName=1 - }else{$nameChanged=1;} - - ################ - ## take care of "description" attribute - my $new_description=get_attribute_value($refhash_omniscient, $geneB_feature, $id_geneB, 'description'); - # Now we try to transfert description on genefeature A - if($geneA_feature->has_tag('description') and !$new_description){ - $geneA_feature->remove_tag('description'); - } - elsif($new_description){ - create_or_replace_tag($geneA_feature, 'description', $new_description); - } - - ################ - ## take care of "product" attribute - my $new_product=get_attribute_value($refhash_omniscient, $geneB_feature, $id_geneB, 'product'); - # Now we try to transfert product on genefeature A - if($geneA_feature->has_tag('product') and !$new_product){ - $geneA_feature->remove_tag('product'); - } - elsif($new_product){ - create_or_replace_tag($geneA_feature, 'product', $new_product); - } - - ######### - #change now info of all mRNA of geneA - foreach my $tag_level2 (keys %{$refhash_omniscient->{'level2'}}){ - if (exists $refhash_omniscient->{'level2'}{$tag_level2}{$id_geneA}){ - foreach my $feature (@{$refhash_omniscient->{'level2'}{$tag_level2}{$id_geneA}}){ - create_or_replace_tag($feature, 'Name', $name_featureB); - - # Now we try to transfert description on feature level2 - if($feature->has_tag('description') and !$new_description){ - $feature->remove_tag('description'); - } - elsif($new_description){ - create_or_replace_tag($feature, 'description', $new_description); - } - - # Now we try to transfert product on feature level2 - if($feature->has_tag('product') and !$new_product){ - $feature->remove_tag('product'); - } - elsif($new_product){ - create_or_replace_tag($feature, 'product', $new_product); - } - } - } - } - } - } - ########################### - # ADD ORTHOLOGY INFORMATION at gene level - my $name_geneB=$geneB_feature->_tag_value('ID'); - $name_geneB =~ s/$nbis_suffix_d//; - create_or_replace_tag($geneA_feature, 'orthology', $name_geneB); - } - } - -return $nameChanged, $newName; -} - -sub get_attribute_value{ - - my ($refhash_omniscient, $gene_feature, $id_gene, $attribute)=@_; - - my $value=undef; - - #first we try to get if from genefeature - if($gene_feature->has_tag($attribute)){ #get it at gene level - $value=$gene_feature->_tag_value($attribute); - } - else{ #Not found at gene level, we try to find it at level2 - foreach my $tag_level2 (keys %{$refhash_omniscient->{'level2'}}){ - if (exists $refhash_omniscient->{'level2'}{$tag_level2}{$id_gene}){ - foreach my $feature (@{$refhash_omniscient->{'level2'}{$tag_level2}{$id_gene}}){ - if($feature->has_tag($attribute)){ - $value=$gene_feature->_tag_value($attribute); - return $value; - } - } - } - } - } - return $value; -} - -#check name of a list of features -sub check_feature_same_names{ - - my ($hash_geneName, $ListFeatures, $type)= @_; - my $sameName="yes"; - my $nameBefore; - my $gene_cpt=0; - - foreach my $gene_feature (@{$ListFeatures}){ - my $tag=undef; - if($gene_feature->has_tag('Name')){ - $tag='Name'; - } - elsif($gene_feature->has_tag('gene_name')){ - $tag='gene_name'; - } - else{ - # print "Warning: No name found for ".$gene_feature->gff_string."\n"; - $sameName="none";last; - } - - if($tag){ - my @tmp=$gene_feature->get_tag_values($tag); - my $name_gene_feature=lc(shift @tmp); - $gene_cpt++; - - #### - # - my $typeCheck; - if ($type ne 'AB'){ - $typeCheck = $type; - } - else{ - if($gene_cpt == 1){$typeCheck = 'As';} else {$typeCheck = 'Bs';} - } - #### - # MANAGE IF Name on target comes from nbis functional annotation. We have to remove _1 _2 etc (Not DEFAULT behavior) - if($_ablt and $typeCheck eq "Bs") # We remove what has been added by NBIS during gene name annotation - { - $name_gene_feature =~ s/$nbis_suffix_p//; - } - #### - # MANAGE IF Name on reference comes from nbis functional annotation. We have to remove _1 _2 etc (DEFAULT behavior) - if(! $_dblr and $typeCheck eq "As") # We remove what has been added by NBIS during gene name annotation - { - $name_gene_feature =~ s/$nbis_suffix_p//; - } - - if ($gene_cpt >= 2){ - if ($nameBefore ne $name_gene_feature){ # the two names are different ! - $sameName=undef; - } - } - $nameBefore=$name_gene_feature; - } - } - return $sameName; -} -=head1 NAME - -infer_function_from_synteny.pl - -From 2 gffs file from the same assembly, the tool will lift functional information from a target file (--tar file) on the reference file (--ref file). -So the tool the detect the genes from target that overlap, according to the threshold choosen, the genes of the reference file. -Thus, one2one,one2many (fusion), many2one (split) and many2many cases are detected. -Function for one2one cases are liftover, and other cases are reported in corresponding output file. - -=head1 SYNOPSIS - - ./infer_function_from_synteny.pl --ref=infile --tar=infile [Options] - ./infer_function_from_synteny.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--ref>, B<--reffile> or B<-f> - -Input GFF3 file correponding to the reference file where function will be written on. - -=item B<--tar>, B<--mapped> or B<--m> - -Input GFF3 file corresponding to the target file containing function that will be lift over on the reference. - -=item B<--inv>, B<--inverse> or B<--oppposite> - -By default the ref file genes are tested against the tar file genes for the overlaping percentage. To do the opposite just set this parameter (Do not correspond to a shift between reference and target files). - -=item B<--transfert>, B<--lift> or B<-t> - -Lift the function of genes from target file to gene of reference file is they overlap on2one with length percentage over "value" parameter. -Particular cases: - one2many => if the many genes from tar have the same name we lift it to the ref according to the overlap percentage value threshold. - - many2one => if the many genes from ref have the same name we try to lift the ref name according to the overlap percentage value threshold. - -=item B<--cmo> - -cno = checkMultiOverlap => When this option is activated, multi-overlaping genes (one2many, many2one (cases not takken in account by the --lift option), and many2many) will be checked. -The most overlaping couple of gene will be taken to try to lift the name according to the overlap percentage value threshold. -If a gene has the same overlapping value within several couple we do not lift-over the function because we cannot define wich one is the real 'ortholog' one. - -=item B<--value>, B<--threshold> or B<--overlap> - -Define the percentage of overlaping to use to consider genes as ortholog. Usefull only if "lift" option activated. -/!\\ Dont forget your kraken data set has already been selected by an overlapping filer. So if the previous kraken scipt you choose to keep gene mapped over 20 percent; -Here choose a value of 20 will mean you accept to consider the gene as ortholog even if 20% of 20% is mapped (~4%). -Exon features are considered. If there is no exon, cds will be used instead. - -=item B<--dblr> - -Deactivate NBIS Label Reference. By default to compare names from two files we remove a potential label _(0-9)* at the end of geme names in the reference file because they can have been added by NBIS during the functional annotation process. -If we lift a new name to the reference annotation, we first check that the name is already existing elsewhere in the annotation. If it exists, we also add the labbel according to the number of gene with that name. -If you don't want to take in account the labbel, you can deactivate the behaviour by calling that otpion (--dbl). - -=item B<--ablt> - -Activate NBIS Label Target. By default we don't look for label in target file. Most of time has not been annotated by NBIS. But in case where the target file is also annotated by NBIS, -it possible to take in account the possible label at the end of gene names by activating the option. - -=item B<--feature> - -The script checks the overlap using the cds or exon of level2 features. By default we are considering only mRNA as level2 feature. If you want to change the level2 feature type you can set up that option and defining the new feature to consider. --feature tRNA. -An option "all" has to be fully implemented. - -=item B<--out>, B<--output> or B<-o> - -Output directory where diffrent output files will be written. - -=item B<--help> or B<-h> - -Getting help. -Display the full information. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_keep_longest_isoform.pl b/annotation/Tools/Util/gff/gff3_sp_keep_longest_isoform.pl deleted file mode 100755 index 9e8570681..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_keep_longest_isoform.pl +++ /dev/null @@ -1,143 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Pod::Usage; -use IO::File; -use List::MoreUtils qw(uniq); -use File::Basename; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $opt_output = undef; -my $opt_genomeSize = undef; -my $opt_plot = undef; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - 'o|output=s' => \$opt_output, - "gff|f=s" => \$gff)) - -{ - pod2usage( { -message => "Failed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -#### OUT -my $gffout; -if ($opt_output) { - open(my $fh, '>', $opt_output) or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - ##################### - # MAIN # - ##################### - - - -###################### -### Parse GFF input # -print "Reading file $gff\n"; -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print "Parsing Finished\n"; -### END Parse GFF input # -######################### - -#check number of level1 -my $nbLevel1 = 0; -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - $nbLevel1 += keys %{$hash_omniscient->{'level1'}{$tag_l1}}; -} -#chech number of level2 -my $nbLevel2 = keys %$hash_mRNAGeneLink; - -#Check if we have isoforms -if($nbLevel1 != $nbLevel2){ - - - #create list of level2 where we kept only level2 that have cds and only the longest isoform ! - my $list_id_l2 = get_longest_cds_level2($hash_omniscient); - - # create a new omniscient with only one mRNA isoform per gene - my $omniscientNew = create_omniscient_from_idlevel2list($hash_omniscient, $hash_mRNAGeneLink, $list_id_l2); - - # print omniscientNew containing only the longest isoform per gene - print_omniscient($omniscientNew, $gffout); - print $nbLevel2 - $nbLevel1." isoforms removed ! \n"; -} -else{ - print "Nothing to do... this file doesn't contain any isoform !\n"; - print_omniscient($hash_omniscient, $gffout); -} - -# END STATISTICS # -################## -print "Done\n"; - - - - -__END__ - -=head1 NAME - -gff3_sp_keep_longest_isoform.pl - -The script take a gff3 file as input. - -The script give basic statistics of a gff file. -Remark: identical feature from level1 or level2 with identical ID will be merged as well as their subsequent features (Level2 or level3). - -=head1 SYNOPSIS - - ./gff3_sp_keep_longest_isoform.pl -gff file.gff [ -o outfile ] - ./gff3_sp_keep_longest_isoform.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<--output> or B<-o> - -File where will be written the result. If no output file is specified, the output will be written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_list_short_introns.pl b/annotation/Tools/Util/gff/gff3_sp_list_short_introns.pl deleted file mode 100755 index bb129e89d..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_list_short_introns.pl +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use POSIX qw(strftime); -use List::MoreUtils qw(natatime);; -use Carp; -use Getopt::Long; -use Pod::Usage; -use Clone 'clone'; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - - - - -my $opt_file; -my $INTRON_LENGTH = 10; -my $opt_output=undef; -my $opt_help = 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|gff|ref|reffile=s' => \$opt_file, - 'o|out|output=s' => \$opt_output, - "size|s=i" => \$INTRON_LENGTH, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! defined( $opt_file) ) { - pod2usage( { - -message => "$header\nMust specify at least 1 parameters:\nReference data gff3 file (--gff)\n", - -verbose => 0, - -exitval => 1 } ); -} - -# ####################### -# # START Manage Option # -# ####################### -my $fh; -if ($opt_output) { - open($fh, '>', $opt_output) or die "Could not open file '$opt_output' $!"; - } -else{ - $fh = *STDOUT; -} - - -# ##################################### -# # END Manage OPTION -# ##################################### - - - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -#PART 1 -################################### -# Read input gff3 files one by one and save value in hash of list - - - ###################### - ### Parse GFF input # - my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_file - }); - print("Parsing Finished\n\n"); - ### END Parse GFF input # - ######################### - - #print statistics - my ($stat, $distri) = gff3_statistics($hash_omniscient); - #print statistics - foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - print "$info"; - } - print "\n"; - } - - print $fh "List introns inferior to $INTRON_LENGTH nucleotides:\n\n"; - print $fh "Seq_id\tGene_name\tintron_start\tintron_size\n"; - -my $total_intron = 0; -my %total_gene; -my %result; - - ###################### - ### Parse GFF input # - # get nb of each feature in omniscient; - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level2'}{$tag_l2}}){ - my $one_f2 = $hash_omniscient->{'level2'}{$tag_l2}{$id_l1}[0]; - - ####################### - #get feature1 and info - my $feature_l1=undef; - my $tag_l1; - foreach my $tag_level1 (keys %{$hash_omniscient->{'level1'}}){ - if (exists ($hash_omniscient->{'level1'}{$tag_level1}{$id_l1})){ - $feature_l1=$hash_omniscient->{'level1'}{$tag_level1}{$id_l1}; - $tag_l1=$tag_level1; - last; - } - } - if(! $feature_l1){print "Problem ! We didnt retrieve the level1 feature with id $id_l1\n";exit;} - - ##### - # get all level2 - my $All_l2_single=1; - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}} ){ - - my @introns=(); - my $feature_example; - - ###### - #get all level3 - my $id_l2=lc($feature_l2->_tag_value('ID')); - - if(exists_keys($hash_omniscient, ('level3','exon',$id_l2) ) ){ - - my $counterL3=-1; - #Initialize intron to 0 to avoid error during printing results - my $indexLast = $#{$hash_omniscient->{'level3'}{'exon'}{$id_l2}}; - - my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$id_l2}}; - - foreach my $feature_l3 ( @sortedList ){ - - #count number feature of tag_l3 type - $counterL3++; - - ################ - #Manage Introns# - # from the second intron to the last (from index 1 to last index of the table sortedList) - # We go inside this loop only if we have more than 1 feature. - if($counterL3 > 0 and $counterL3 <= $indexLast){ - my $intronStart = $sortedList[$counterL3-1]->end+1; - my $intronEnd = $sortedList[$counterL3]->start-1; - my $intron_size = ($intronEnd - $intronStart + 1); - if ($intron_size < $INTRON_LENGTH){ - my $seqid = $feature_l1->seq_id(); - - $total_intron++; - $total_gene{$id_l1}++; - $result{$seqid}{$total_intron} = "$seqid\t$id_l1\t$intronStart\t$intron_size\n"; - - } - } - }# END FOREACH L3 - } - } - } - } -foreach my $seqid (keys %result){ - foreach my $cpt (keys $result{$seqid}){ - print $fh $result{$seqid}{$cpt}; - } -} - -my $gene_number = keys %total_gene; -print $fh "\n$total_intron introns found for $gene_number uniq genes\n"; - ######################### - ######### END ########### - ######################### - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - - -__END__ - -my $opt_file; -my $INTRON_LENGTH = 10; -my $opt_output=undef; -my $opt_help = 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|gff|ref|reffile=s' => \$opt_file, - 'o|out|output=s' => \$opt_output, - "size|s=i" => \$INTRON_LENGTH, - 'h|help!' => \$opt_help ) ) - -=head1 NAME - -gff3_sp_list_short_introns.pl -The script aims to list all the introns inferior to a certain size. Introns are calculated on the fly from exons. (intron feature will not be used) - -=head1 SYNOPSIS - - ./gff3_sp_list_short_introns.pl --gff=infile --out=outFile - ./gff3_sp_list_short_introns.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<-f>, B<--ref> or B<-reffile> - -Input GFF3 file correponding to gene build. - -=item B<--size> or B<-s> - -Minimum intron size accepted in nucleotide. All introns under this size will be reported. Default value = 10. - -=item B<--out>, B<--output> or B<-o> - -Output gff3 file where the gene incriminated will be write. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_load_function_from_protein_align.pl b/annotation/Tools/Util/gff/gff3_sp_load_function_from_protein_align.pl deleted file mode 100755 index 9ba4c6f00..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_load_function_from_protein_align.pl +++ /dev/null @@ -1,1298 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Try::Tiny; -use File::Basename; -use POSIX qw(strftime); -use Bio::DB::Taxonomy; -use Getopt::Long; -use Bio::DB::Fasta; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - - -my $usage = qq{ -######################################################## -# NBIS 2017 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -#The cases are exclusive, one result could not be part of several cases. -my %cases_explanation = ( - -1 => "No protein alignement overlap the gene model.", - 0 => "There is protein alignment that overlap the gene model but none goes over the threshold defined.", - 1 => "There is protein alignment that overlap the gene model, the overlap is over the threshold defined, the protein is from one of the species defined, and the PE value is as defined. P.S: Case only possible when both options sp and pe are activated.", - 21 => "There is protein alignment that overlap the gene model, the overlap is over the threshold defined, the PE value is as defined.", - 22 => "There is protein alignment that overlap the gene model, the overlap is over the threshold defined, the protein is from one of the species defined.", - 3 => "There is protein alignment that overlap the gene model, the overlap is over the threshold defined." -); - -my $opt_output = undef; -my $annotation_gff = undef; -my $protein_gff = undef; -my $protein_fasta = undef; -my $valueK = 50; -my $opt_test = "="; -my $attributes = undef ; -my $whole_sequence_opt = undef ; -my $priority_opt = "pe"; -my $sort_method_by_species = undef ; -my $sort_method_by_pe = undef ; -my $verbose = undef; -my $method_opt = "replace"; -my $help = 0; - - -my @copyARGV=@ARGV; -if ( !GetOptions( - "help|h" => \$help, - "annotation|a=s" => \$annotation_gff, - "pgff=s" => \$protein_gff, - "sp:s" => \$sort_method_by_species, - 'test=s' => \$opt_test, - 'pe:i' => \$sort_method_by_pe, - 'priority|p=s' => \$priority_opt, - "fasta|pfasta=s" => \$protein_fasta, - "w" => \$whole_sequence_opt, - "value|threshold=i" => \$valueK, - 'method|m:s' => \$method_opt, - "verbose|v" => \$verbose, - "output|out|o=s" => \$opt_output)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$usage\n" } ); -} - -if ( ! ($annotation_gff and $protein_gff and $protein_fasta) ){ - pod2usage( { - -message => "\nAt least 3 parameters are mandatory:\nAnnotation gff file (-a), Protein gff file (--pgff), Protein fasta file (--pfasta)\n\n". - "$usage\n", - -verbose => 0, - -exitval => 2 } ); -} - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || Manage OPTIONS || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - - - -#Manage test option -if($opt_test ne "<" and $opt_test ne ">" and $opt_test ne "<=" and $opt_test ne ">=" and $opt_test ne "="){ - print "The test to apply is Wrong: $opt_test.\nWe want something among this list: <,>,<=,>= or =.";exit; -} -if(defined ($sort_method_by_pe)){ - if ($sort_method_by_pe){ - if(! (5 >= $sort_method_by_pe and $sort_method_by_pe >= 1) ){ - print "The value of the Protein Existence value is Wrong: $sort_method_by_pe.\n We want a value between 1 and 5.";exit; - } - } - else{ - $sort_method_by_pe=1; - } -} - -########################## -##### Manage Output ###### - -## Manage screen output -our $screen_out = undef; - -our @outputTab; - -if (defined($opt_output) ) { - if (-f $opt_output){ - print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); - } - if (-d $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); - } - - #To continue to display on screen - $screen_out = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - - #create the folder - mkdir $opt_output; - - my $outfile_gff = $annotation_gff; - $outfile_gff =~ s/.gff//g; - $outfile_gff = $outfile_gff."_updated.gff"; - - #0 txt - my $ostreamReport = IO::File->new(">".$opt_output."/report.txt" ) or - croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/report.txt", $! )); - push (@outputTab, $ostreamReport); - #1 txt - my $ostreamFAadded = IO::File->new(">".$opt_output."/function_added.txt" ) or - croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/function_added.txt", $! )); - push (@outputTab, $ostreamFAadded); - _print("Gene ID\tmRNA ID\tGene name\tProduct\n", 1); - #2 gff - my $ostreamCoding = Bio::Tools::GFF->new(-file => ">".$opt_output."/".$outfile_gff, -gff_version => 3 ) or - croak(sprintf( "Can not open '%s' for writing %s", $opt_output."/".$outfile_gff, $! )); - push (@outputTab, $ostreamCoding); - -} -else{ - my $ostreamReport = \*STDOUT ; - push (@outputTab, $ostreamReport); - my $ostreamFAadded = \*STDOUT ; - push (@outputTab, $ostreamFAadded); - my $ostream = IO::File->new(); - $ostream->fdopen( fileno(STDOUT), 'w' ) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - my $outputGFF = Bio::Tools::GFF->new( -fh => $ostream, -gff_version => 3) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - push (@outputTab, $outputGFF); -} - -# Manage species names -my $db = Bio::DB::Taxonomy->new(-source => 'entrez'); - -if(defined($sort_method_by_species) ){ - if($sort_method_by_species){ - my %hash; - my @listValue= split /,/, $sort_method_by_species; - my $counter=1; - foreach my $element (@listValue ){ - $element =~ s/^\s+|\s+$//g; #removing leading and trailing white spaces - my @taxonids = $db->get_taxonids($element); - if(@taxonids){ - $hash{$counter++}=$element; - } - else{ - _print("/!\\ species <$element> is unknown in the NCBI taxonomy DB, we skip it !\n",0); - } - } - $sort_method_by_species = \%hash; - } - else{ - $sort_method_by_species = _taxid_ref_sorted(); - } - print "Priority in this order will be used for selecting the referential protein form matching proteins:\n"; - foreach my $priority (sort {$a <=> $b} keys %{$sort_method_by_species}){ - _print( $priority." - ".$sort_method_by_species->{$priority}."\n",0); - } -} - -#Manage priority -if ($priority_opt ne "pe" and $priority_opt ne "sp"){ - print "Priority option with value $priority_opt doesn't exist. Please select pe or sp. (i.e help)\n";exit; -} - -#Manage method -if ($method_opt eq "replace"){ - print "We will add or replace the product and Name values when a protein maps properly.\n"; -} -elsif($method_opt eq "add"){ - print "We will add the lfp_product and lfp_name values when a protein maps properly.\n"; -} -elsif($method_opt eq "complete"){ - print "We will add the product and Name values when a protein maps properly and no product and/or Name value exists.\n"; -} -else{print "Method option must be replace, add or complete. Please check the help for more information. (replace by default)\n";exit;} - - - -my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$stringPrint .= "\nusage: $0 @copyARGV\n"; -_print($stringPrint, 0); - - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || MAIN || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - -###################### -### Parse GFF input # -_print( "Parsing file $annotation_gff\n",0); -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $annotation_gff - }); -_print( "Done\nParsing file $protein_gff\n",0); -my ($prot_omniscient, $prot_mRNAGeneLink) = slurp_gff3_file_JD({ input => $protein_gff - }); -_print( "Done\n",0); - -########################### -#### Parse protein fasta # -_print( "Parsing the protein fasta file $protein_fasta\n",0); -my $nbFastaSeq=0; -my $db_prot = Bio::DB::Fasta->new($protein_fasta); -my @long_ids_prot = $db_prot->get_all_primary_ids; -my %allIDs_prot; # save ID in lower case to avoid cast problems -foreach my $long_id (@long_ids_prot){ - #uniprot case long_id=>sp|Q5R8S7|PPIA_PONPY short_id=Q5R8S7 - my $short_id = _take_clean_id($long_id); - $allIDs_prot{lc($short_id)}=$long_id; -} -_print( "Done\n",0); - -my $omniscient1_sorted = gather_and_sort_l1_location_by_seq_id_and_strand($hash_omniscient); -my $omniscient2_sorted = gather_and_sort_l1_location_by_seq_id_and_strand($prot_omniscient); - -my %cases; - -foreach my $locusID ( keys %{$omniscient1_sorted}){ # tag_l1 = protein_match match .... - foreach my $tag_l1 ( keys %{$omniscient1_sorted->{$locusID}} ) { - - # Go through location from left to right ### !! - my @aligns; - my $selected=undef; - while ( @{$omniscient1_sorted->{$locusID}{$tag_l1}} ){ - - my $location = shift @{$omniscient1_sorted->{$locusID}{$tag_l1}};# This location will be updated on the fly - my $id1_l1 = lc($location->[0]); - - if( exists_keys($omniscient2_sorted, ($locusID ) ) ) { - - foreach my $tag2_l1 ( keys %{$omniscient2_sorted->{$locusID}} ) { - - while ( @{$omniscient2_sorted->{$locusID}{$tag2_l1}} ){ - - my $location2 = shift @{$omniscient2_sorted->{$locusID}{$tag2_l1}};# This location will be updated on the fly - my $id2_l1 = lc($location2->[0]); - - #If location_to_check start if over the end of the reference location, we stop - if($location2->[1] > $location->[2]) {last;} - #If location_to_check end if inferior to the start of the reference location, we continue next - if($location2->[2] < $location->[1]) {next;} - - # Let's check at Gene LEVEL - if( location_overlap($location, $location2) ){ #location overlap at gene level check now level3 - - ################################### - # let's check at the deeper level # - my $prot_tag = $prot_omniscient->{'level1'}{$tag2_l1}{$id2_l1}->_tag_value('Name'); - my $align = check_gene_overlap_gffAlign($hash_omniscient, $prot_omniscient, $id1_l1, $id2_l1, $prot_tag); #If contains CDS it has to overlap at CDS level to be merged, otherwise any type of feature level3 overlaping is sufficient to decide to merge the level1 together - #print Dumper( $align); - if(@{$align} ){ #check is not empty - # @list_res = ($gene_id2, $w_overlap12, $w_overlap12_abs, $w_overlap21, $w_overlap21_abs, $overlap12, $overlap12_abs, $overlap21, $overlap21_abs); - # align contains: level1 id of the protein into the gff file (gene_id2), - ## overlap percent whole gene model against protein (w_overlap12), - # absolute overlap percent whole gene model against the protein ($w_overlap12_abs). Absolute means we check the cigar annotation of the protein to check the shift in the reading frame and take in account the few nucleotide in more or in less - ## overlap percent of the protein against the whole gene model ($w_overlap21), - # absolute overlap percent of the protein against the whole gene model ($w_overlap21_abs), - ## overlap percent cds part of the gene model against protein (overlap12), - # absolute overlap percent cds part of the gene model against the protein ($overlap12_abs). Absolute means we check the cigar annotation of the protein to check the shift in the reading frame and take in account the few nucleotide in more or in less - ## overlap percent of the protein against the cds part of the gene model ($overlap21), - # absolute overlap percent of the protein against the cds part of the gene model ($overlap21_abs), - # absolute overlap percent of the protein against the whole gene model but rationalized by the total length (prot+gene-overlap) $w_overlap_JD_abs - # absolute overlap percent of the protein against the coding part of the gene model but rationalized by the total length (prot+gene-overlap) $overlap_JD_abs - push @aligns, [@{$align}]; - } - } - } - } - } - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || NOW WE HAVE ALL THE ALIGNEMENT VALUE || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - # Let's filter them - #We have at least one prot aligned to this gene model - my @aligns_filtered; - if(@aligns){ - - # Manage option coding sequence or whole sequence - my $col = 6; - if($whole_sequence_opt){$col = 5;} - - #Sort results and filter by the overlap value threshold - foreach my $result ( sort {$b->[$col] <=> $a->[$col] } @aligns ){ - - # filter by value Threshold - if($result->[$col] > $valueK){ - push @aligns_filtered, $result; - } - } - - #print @aligns_filtered results - if ($verbose){ - _print( "\n\nprotein aligned to the gene $id1_l1 over the threshold $valueK:\n", 0); - foreach my $result ( @aligns_filtered){ - if($result->[$col] > $valueK){ - _print( "@$result\n", 0 ); - } - } - print "\n"; - } - - if(@aligns_filtered){ - - ######################################### - # 1) filter by pe and specific species - if($sort_method_by_pe and $sort_method_by_species){ - _print( "get_result_sort_method_by_pe_and_species case 1 !\n",0) if ($verbose); - $selected = get_result_sort_method_by_pe_and_species(\@aligns_filtered, $col, $sort_method_by_pe, $opt_test, $sort_method_by_species); - if($selected){$cases{1}++;} - } - - ##################################################### - # 2.1) filter by pe or if need be by specific species - if(!$selected and $priority_opt eq "pe"){ - - # filter by protein existence value - if(! $selected and $sort_method_by_pe){ - _print( "pe case 2.1.1!\n", 0) if ($verbose); - $selected = get_result_sort_method_by_pe(\@aligns_filtered, $col, $sort_method_by_pe, $opt_test); - if($selected){$cases{211}++;$cases{21}++;} - } - - # filter by specific species - if(! $selected and $sort_method_by_species){ - _print( "sort_method_by_species case 2.1.2!\n", 0) if ($verbose); - $selected = get_result_sort_method_by_species($sort_method_by_species, \@aligns_filtered, $col); - if($selected){$cases{212}++;$cases{22}++;} - } - } - ##################################################### - # 2.2) filter by specific species or if need be by pe - elsif(! $selected and $priority_opt eq "sp"){ - ######################################### - # filter by specific species - if(! $selected and $sort_method_by_species){ - _print( "sort_method_by_species case 2.2.1!\n", 0) if ($verbose); - $selected = get_result_sort_method_by_species($sort_method_by_species, \@aligns_filtered, $col); - if($selected){$cases{221}++;$cases{22}++;} - } - - ######################################### - # filter by protein existence value - if(! $selected and $sort_method_by_pe){ - _print( "pe case 2.2.2!\n", 0) if ($verbose); - $selected = get_result_sort_method_by_pe(\@aligns_filtered, $col, $sort_method_by_pe, $opt_test); - if($selected){$cases{222}++;$cases{21}++;} - } - } - - ######################################### - # 3) Take the first = the best overlap value - if(! $selected){ - _print( "Normal case 3!\n", 0 ) if ($verbose); - # read from best value to the lowest one - $selected = $aligns_filtered[0]; - $cases{3}++; - } - - _print( "Protein selected = $selected\n",0) if ($verbose); - - -# +------------------------------------------------------+ -# |+----------------------------------------------------+| -# || NOW WE ATTACH THE INFORMATION FOUND || -# |+----------------------------------------------------+| -# +------------------------------------------------------+ - - #Modify l1 - my $name_added=1; - my $geneName = _get_gn($selected); - my $feature_l1 = $hash_omniscient->{'level1'}{$tag_l1}{$id1_l1}; - if( $method_opt ne "add"){ - if( ! ($method_opt eq "complete" and $feature_l1->has_tag('Name') ) ){ - create_or_replace_tag($feature_l1, 'Name', $geneName); - }else{$name_added=0;} - } - else{ - create_or_replace_tag($feature_l1, 'lfp_name', $geneName); - } - - #Now Modify l2 - my $product_added=1; - my $product = _get_function($selected); - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - if( exists_keys($hash_omniscient,('level2', $tag_l2, $id1_l1))){ - foreach my $feature_l2 (@{$hash_omniscient->{'level2'}{$tag_l2}{$id1_l1}}){ - #Modify each l2 - if( $method_opt ne "add"){ - if( ! ($method_opt eq "complete" and $feature_l1->has_tag('product') ) ){ - create_or_replace_tag($feature_l2, 'product', $product); - }else{$product_added=0;} - } - else{ - create_or_replace_tag($feature_l2, 'lfp_product', $product); - } - - if($name_added or $product_added){ - if($name_added and $product_added){ - _print($id1_l1."\t".$feature_l2->_tag_value('ID')."\t".$geneName."\t".$product."\n", 1); - } - elsif($name_added){ - _print($id1_l1."\t".$feature_l2->_tag_value('ID')."\t".$geneName."\t-\n", 1); - } - else{ - _print($id1_l1."\t".$feature_l2->_tag_value('ID')."\t-\t".$product."\n", 1); - } - } - } - } - } - - - - } - else{ - $cases{0}++; - _print( "No protein overlap over the threshold $valueK for this gene model: $id1_l1\n", 0) if ($verbose); - } - } - else{ - $cases{-1}++; - _print( "No protein aligned to this gene model: $id1_l1\n", 0) if ($verbose); - } - } - } - -} - -###################### -##### Print the result -print_omniscient($hash_omniscient, $outputTab[2]); - -########################################## -#### SUMMERIZING########################## -if(keys %cases){_print( "\nThe liftover of function from proteins to the gene models has been done as following:\n",0);} -foreach my $key (keys %cases){ - if(!$verbose and ($key == 211 or $key == 212 or $key == 221 or $key == 222) ) { - _print( "we have $cases{$key} cases $key\n",0); - } - else{ - _print( "we have $cases{$key} cases $key\n",0); - } -} -_print( "\nLet's remind the different cases:\n",0); -foreach my $key (sort{$a <=> $b} keys %cases_explanation){ - _print( "$key => $cases_explanation{$key}\n",0); -} - -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -############################ -## PROTEIN PRIORETIZATION - -#1) keep score as calculated and select the best - -#2) by ProteinExistence value (take the best score in better or equal value of the PE expected) -# 1. Experimental evidence at protein level -# 2. Experimental evidence at transcript level -# 3. Protein inferred from homology -# 4. Protein predicted -# 5. Protein uncertain - -#3) Taking into account a specific species following an order of choices -# It's an arbitrary choice -sub _taxid_ref_sorted{ - - my %sorted = ( 1 => 'Homo sapiens', - 2 => 'Mus musculus', - 3 => 'Drosophila melanogaster', - 4 => 'Caenorhabditis elegans', - 5 => 'Arabidopsis thaliana' - ); - - return \%sorted; -} - -sub get_result_sort_method_by_pe_and_species{ - my ($aligns, $col, $sort_method_by_pe, $opt_test, $sort_method_by_species)=@_; - - my $selected = undef; - - my $counter=1; - while(! $selected){ - - if(! exists ($sort_method_by_species->{$counter}) ){last;} - - # read from best value to the lowest one - foreach my $result ( @{$aligns} ){ - - my $species = _get_species($result); - - if(lc($species) eq lc($sort_method_by_species->{$counter}) ){ - - my $pe = _get_pe($result); - - if ($opt_test eq ">"){ - if ($pe > $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq "<"){ - if ($pe < $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq "="){ - if ($pe == $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq "<="){ - if ($pe <= $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq ">="){ - if ($pe >= $sort_method_by_pe){ - $selected=$result;last; - } - } - } - } - $counter++; - } - - return $selected; - -} - -sub get_result_sort_method_by_species{ - my ($sort_method_by_species, $aligns, $col) = @_; - - my $selected = undef; - - my $found_sp=undef; - my $counter=1; - while(! $selected){ - - if(! exists ($sort_method_by_species->{$counter}) ){last;} - - # read from best value to the lowest one - foreach my $result ( @{$aligns} ){ - - my $species = _get_species($result); - #print "<$species> vs >$sort_method_by_species->{$counter}<\n"; - - if(lc($species) eq lc($sort_method_by_species->{$counter}) ){ - $selected=$result;last; - } - } - $counter++; - } - - return $selected; -} - -sub get_result_sort_method_by_pe{ - my ($aligns, $col, $sort_method_by_pe, $opt_tes ) = @_; - - my $selected = undef; - - # read from best value to the lowest one - foreach my $result ( @{$aligns} ){ - - my $pe = _get_pe($result); - - if ($opt_test eq ">"){ - if ($pe > $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq "<"){ - if ($pe < $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq "="){ - if ($pe == $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq "<="){ - if ($pe <= $sort_method_by_pe){ - $selected=$result;last; - } - } - if ($opt_test eq ">="){ - if ($pe >= $sort_method_by_pe){ - $selected=$result;last; - } - } - } - - return $selected; -} - -#get function -sub _get_function{ - my ($self)=@_; - my $header = $self->[$#{$self}]; - my $egal = index($header, '='); - my $function = substr $header, 0, $egal-2; - - $function =~ s/^\s+|\s+$//g; #removing leading and trailing white spaces - - return $function ; -} - -#get species -sub _get_species{ - my ($self)=@_; - - my $species = undef; - - my $header = $self->[$#{$self}]; - - my $egal = index($header, '='); - my $abb = substr $header, $egal-2, 2; - my $clipped = substr $header, $egal+1; - - while (lc($abb) ne "os") { - $egal = index($clipped, '='); - if($egal == -1){last;} - $abb = substr $clipped, $egal-2, 2; - $clipped = substr $clipped, $egal+1; - } - if($egal == -1){ warn("No species name found in this fasta header: $self");return $species;} - $egal = index($clipped, '='); - if($egal != -1){ - $species = substr $clipped, 0, $egal-2; - } - else{ - $species = $clipped; - } - $species =~ s/^\s+|\s+$//g; #removing leading and trailing white spaces - - return $species; -} - -#get gene name -sub _get_gn{ - my ($self)=@_; - - my $geneName = undef; - - my $header = $self->[$#{$self}]; - - my $egal = index($header, '='); - my $abb = substr $header, $egal-2, 2; - my $clipped = substr $header, $egal+1; - - while (lc($abb) ne "gn") { - $egal = index($clipped, '='); - if($egal == -1){last;} #no = character found - - $abb = substr $clipped, $egal-2, 2; - $clipped = substr $clipped, $egal+1; - } - if($egal == -1){ warn("No gene name found in this fasta header: $self");return $geneName;} - $egal = index($clipped, '='); - if($egal != -1){ - $geneName = substr $clipped, 0, $egal-2; - } - else{ - $geneName = $clipped; - } - - $geneName =~ s/^\s+|\s+$//g; #removing leading and trailing white spaces - - return $geneName ; -} - -#get ProteinExistence -sub _get_pe{ - my ($self)=@_; - - my $pe = undef; - - my $header = $self->[$#{$self}]; - - my $egal = index($header, '='); - my $abb = substr $header, $egal-2, 2; - my $clipped = substr $header, $egal+1; - - while (lc($abb) ne "pe") { - $egal = index($clipped, '='); - if($egal == -1){last;} #no = character found - - $abb = substr $clipped, $egal-2, 2; - $clipped = substr $clipped, $egal+1; - } - if($egal == -1){ warn("No pe found in this fasta header: $self"); return $pe; } - $egal = index($clipped, '='); - if($egal != -1){ - $pe = substr $clipped, 0, $egal-2; - } - else{ - $pe = $clipped; - } - - $pe =~ s/^\s+|\s+$//g; #removing leading and trailing white spaces - - return $pe ; -} - -#get sequence version -sub _get_sv{ - my ($self)=@_; - - my $sv = undef; - - my $header = $self->[$#{$self}]; - - my $egal = index($header, '='); - my $abb = substr $header, $egal-2, 2; - my $clipped = substr $header, $egal+1; - - while (lc($abb) ne "sv") { - $egal = index($clipped, '='); - if($egal == -1){last;} #no = character found - - $abb = substr $clipped, $egal-2, 2; - $clipped = substr $clipped, $egal+1; - } - if($egal == -1){ warn("No sv found in this fasta header: $self"); return $sv; } - $egal = index($clipped, '='); - if($egal != -1){ - $sv = substr $clipped, 0, $egal-2; - } - else{ - $sv = $clipped; - } - - $sv =~ s/^\s+|\s+$//g; #removing leading and trailing white spaces - - return $sv ; -} - -sub _get_sequence{ - my ($db, $seq_id) = @_; - - my $sequence = ""; - my $descritpion = ""; - my $seq_id_correct = _take_clean_id($seq_id); - if( exists $allIDs_prot{lc($seq_id_correct)}){ - - my $seq_id_original= $allIDs_prot{lc($seq_id_correct)}; - - $sequence = $db->subseq($seq_id_original); - $descritpion = (split(/\s+/, $db->header($seq_id_original), 2))[1]; #take header and remove the first element wihch is the seq_id_original - - if($sequence eq ""){ - warn "Problem ! no sequence extracted for - $seq_id_correct !\n"; exit; - } - } - else{ - warn "Problem ! protein ID $seq_id_correct not found into the protein fasta file!\n"; - } - - return length($sequence), $seq_id_correct, $descritpion; -} - -sub _take_clean_id { - my ($id) = @_ ; - - my $correct_id=$id; - - if($correct_id =~ m/\|/){ - my @tmp = split /\|/, $correct_id; - $correct_id = $tmp[1]; - } - if($correct_id =~ m/\./){ - my @tmp = split /\./, $correct_id; - $correct_id = $tmp[0]; - } - if($correct_id =~ m/\-/){ - my @tmp = split /\-/, $correct_id; - $correct_id = $tmp[0]; - } - return $correct_id; -} - -#Check if two kind of L2 overlap at l3 -sub check_gene_overlap_gffAlign{ - my ($hash_omniscient, $prot_omniscient, $gene_id, $gene_id2, $prot_tag)=@_; - -# my $overlap12=undef; - my $overlap12_abs=undef - -# my $w_overlap12=undef; - my $w_overlap12_abs=undef - -# my $overlap21=undef; - my $overlap21_abs=undef; - -# my $w_overlap21=undef; - my $w_overlap21_abs=undef; - - my $w_overlap_JD_abs = -1; - my $overlap_JD_abs = -1; - my @list_res; - - - #print "IN $gene_id, $gene_id2\n"; - foreach my $l2_type (keys %{$hash_omniscient->{'level2'}} ){ - - if(exists_keys($hash_omniscient,('level2', $l2_type, lc($gene_id)))){ - - foreach my $mrna_feature ( sort {$a->start <=> $b->start } @{$hash_omniscient->{'level2'}{$l2_type}{lc($gene_id)}}){ # from here bothe feature level2 are the same type - my $mrna_id = $mrna_feature->_tag_value('ID'); - - - #hash_omniscient contains match - foreach my $l1_type (keys %{$prot_omniscient->{'level1'}} ){ - - #check full CDS for each mRNA - if(exists_keys($prot_omniscient,('level1', $l1_type, lc($gene_id2)))){ - - #calcul lenght2 - my $lenght2=0; - foreach my $tag_l2 (keys %{$prot_omniscient->{'level2'}}){ - - if(exists_keys($prot_omniscient,('level2', $tag_l2, lc($gene_id2)))){ - foreach my $feature2 (@{$prot_omniscient->{'level2'}{$tag_l2}{lc($gene_id2)}}){ - #print$feature2->end." - ".$feature2->start."\n"; - #print $feature2->end - $feature2->start." + 1\n"; - $lenght2 = $lenght2 + ($feature2->end - $feature2->start + 1); - } - } - } - #print "lenght protein: $lenght2\n"; - - - my $w_overlap=0; - my $w_abs_overlap=0; - my $w_lenght1=0; - #################################### - # CALCUL ONTO THE WHOLE GENE MODEL # - my @list_tag_l3=('exon'); - if(! exists_keys( $hash_omniscient, ('level3','exon'))){ - warn "No exon found into the annoation file for feature $gene_id, we will use all the other l3 features\n"; - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ - push @list_tag_l3,$tag_l3; - } - } - - foreach my $tag_l3 ( @list_tag_l3 ){ - - if(exists_keys($hash_omniscient,('level3', $tag_l3, lc($mrna_id)))){ - foreach my $feature1 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$tag_l3}{lc($mrna_id)}}){ - - #print "annot location: ".$feature1->start." ".$feature1->end."\n"; - $w_lenght1 = $w_lenght1 + ($feature1->end - $feature1->start + 1); - #print $feature1->gff_string."\n"; - - foreach my $tag_l2 (keys %{$prot_omniscient->{'level2'}}){ - - if(exists_keys($prot_omniscient,('level2', $tag_l2, lc($gene_id2)))){ - foreach my $feature2 (sort {$a->start <=> $b->start } @{$prot_omniscient->{'level2'}{$tag_l2}{lc($gene_id2)}}){ - #print " ".$feature2->gff_string."\n"; - if(($feature2->start <= $feature1->end) and ($feature2->end >= $feature1->start )){ # they overlap - - if($feature2->start > $feature1->end) {last;} - if($feature2->end < $feature1->start) {next;} - #print "prot location: ".$feature2->start." ".$feature2->end."\n"; - my $start = $feature2->start > $feature1->start ? $feature2->start : $feature1->start; - - my $end = $feature2->end < $feature1->end ? $feature2->end : $feature1->end; - - my $chunck_abs_overlap += get_absolute_match($feature2, $start, $end); - $w_abs_overlap += $chunck_abs_overlap; - #print "chunck_abs_overlap = $chunck_abs_overlap\n"; - #print "chunck_overlap= ".($end - $start + 1)."\n"; - $w_overlap += ($end - $start + 1); - - } - } - } - } - } - #print "gene_id $gene_id, gene_id2 $gene_id2 <=> w_overlap = $w_overlap - w_abs_overlap = $w_abs_overlap\n"; - } - } - - #1 -> 2 - #$w_overlap12 = sprintf "%.1f", ($w_overlap*100/$w_lenght1); - $w_overlap12_abs = sprintf "%.1f", ($w_abs_overlap*100/$w_lenght1); - - - my $overlap=0; - my $abs_overlap=0; - my $lenght1=0; - # ########################################################################## - # # CALCUL ONTO THE CODING SEQUENCE PART OF THE GENE MODEL ONLY (SKIP UTR) # - if(exists_keys( $hash_omniscient, ('level3','cds',lc($mrna_id) ))){ - - foreach my $feature1 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{lc($mrna_id)}}){ - - # #print "annot location: ".$feature1->start." ".$feature1->end."\n"; - $lenght1 = $lenght1 + ($feature1->end - $feature1->start + 1); - # print $feature1->gff_string."\n"; - - foreach my $tag_l2 (keys %{$prot_omniscient->{'level2'}}){ - - if(exists_keys($prot_omniscient,('level2', $tag_l2, lc($gene_id2)))){ - foreach my $feature2 (sort {$a->start <=> $b->start } @{$prot_omniscient->{'level2'}{$tag_l2}{lc($gene_id2)}}){ - - if(($feature2->start <= $feature1->end) and ($feature2->end >= $feature1->start )){ # they overlap - - if($feature2->start > $feature1->end) {last;} - if($feature2->end < $feature1->start) {next;} - - # print "We overlap !\n"; - #print "prot location: ".$feature2->start." ".$feature2->end."\n"; - my $start = $feature2->start > $feature1->start ? $feature2->start : $feature1->start; - - my $end = $feature2->end < $feature1->end ? $feature2->end : $feature1->end; - - my $chunck_abs_overlap += get_absolute_match($feature2, $start, $end); - $abs_overlap += $chunck_abs_overlap; - #print "chunck_abs_overlap = $chunck_abs_overlap\n"; - #print "chunck_overlap= ".($end - $start + 1)."\n"; - $overlap += ($end - $start + 1); - - } - #print "overlap = $overlap - abs_overlap = $abs_overlap\n"; - } - } - } - } - #1 -> 2 - #$overlap12 = sprintf "%.1f", ($overlap*100/$lenght1); - $overlap12_abs = sprintf "%.1f", ($abs_overlap*100/$lenght1); - } - - ########################### - # CORRECT THE 21 value by the real length of the protein (Not the whole original protein is aligned into the protein gff file) - try{ - my ($protLenSeqOriginal, $proteinName, $descritpion) = _get_sequence($db_prot, $prot_tag); - my $lenght_corrected = $protLenSeqOriginal*3; - #2 -> 1 whole sequence - #$w_overlap21 = sprintf "%.1f", ($w_overlap*100/$lenght_corrected); - $w_overlap21_abs = sprintf "%.1f", ($w_abs_overlap*100/$lenght_corrected); - #2 -> 1 coding sequence - #$overlap21 = sprintf "%.1f", ($overlap*100/$lenght_corrected); - $overlap21_abs = sprintf "%.1f", ($abs_overlap*100/$lenght_corrected); - - $overlap_JD_abs = sprintf "%.1f", ( $abs_overlap*100/($lenght_corrected+$lenght1-$abs_overlap)); - $w_overlap_JD_abs = sprintf "%.1f", ( $abs_overlap*100/($lenght_corrected+$w_lenght1-$abs_overlap)); - #@list_res = ($gene_id2, $w_overlap12, $w_overlap12_abs, $w_overlap21, $w_overlap21_abs, $overlap12, $overlap12_abs, $overlap21, $overlap21_abs, $w_overlap_JD_abs, $overlap_JD_abs , $proteinName, $descritpion); - @list_res = ($gene_id2, $w_overlap12_abs, $w_overlap21_abs, $overlap12_abs, $overlap21_abs, $w_overlap_JD_abs, $overlap_JD_abs , $proteinName, $descritpion); - } - catch{ - print "We cannot check the real protein length, let's continue without this one: $prot_tag\n"; - #2 -> 1 whole sequence - #$w_overlap21 = sprintf "%.1f", ($w_overlap*100/$lenght2); - #$w_overlap21_abs = sprintf "%.1f", ($w_abs_overlap*100/$lenght2); - #2 -> 1 coding sequence - #$overlap21 = sprintf "%.1f", ($overlap*100/$lenght2); - #$overlap21_abs = sprintf "%.1f", ($abs_overlap*100/$lenght2); - }; - } - } - } - } - } - #print "overlap12 $overlap12 and overlap21 $overlap21\n"; - return \@list_res; -} - -# Protein could well align with the genome sequene, but less well with the gene model -# So we re-compute the protein overlap according only to the gene model -sub get_absolute_match{ - my ($feature, $start, $end)=@_; - my $absMatch=0; - my $nuc_polish=0; - - # We first need to check that the GAP feature is present among the protein attributes - if(! $feature->has_tag('Gap')){ - warn "I cannot calculate the absolute match because the tag Gap is absent !\n"; - } - else{ - - # Parse the GAP attribute - my $gap = $feature->_tag_value('Gap'); - my @gap = split/ /, $gap; # Split gap by space - - - my $nuc_left=abs($feature->start - $start+1); - ############################################ - #Have to re-compute the GAP tag from left # Case where protein match is longer than the overlap with the gene model (left side) - if($nuc_left > 0){ - my @newGap; - - foreach my $gap (@gap){ - - #compute how long is the piece in nucleotide - my $nuc = nuc_gap_val($gap); - - #if nucleotide to shrink is over the size of the piece we skip the piece, and compute the size to shrink left - if ($nuc_left >= $nuc){ - $nuc_left=$nuc_left-$nuc; - next; - } - #if nucleotide to shrink is under the size of the piece we recalculate the piece - elsif ($nuc_left != 0 and $nuc_left < $nuc){ - my $newGapNucPiece = $nuc - $nuc_left; - my $letter = substr $gap, 0, 1; - - #has to be modulo3 - my $AAval; - if($gap =~ /^M/ or $gap =~ /^D/){ - $nuc_polish += $newGapNucPiece % 3; - $AAval = int($newGapNucPiece/3); - } - #avoid case wehre we remove a lot and the piece doesnt exist anymore - if($AAval != 0){ - my $gap_val_ok = $letter.$AAval; - push @newGap, $gap_val_ok - } - } - else{ - push @newGap, $gap - } - } - @gap = @newGap; - } - - my $nuc_right=abs($feature->end - $end+1); - ############################################ - #Have to re-compute the GAP tag from right # Case where protein match is longer than the overlap with the gene model (right side) - if($nuc_right > 0){ - my @newGap; - - foreach my $gap (@gap){ - - #compute how long is the piece in nucleotide - my $nuc = nuc_gap_val($gap); - - #if nucleotide to shrink is over the size of the piece we skip the piece, and compute the size to shrink left - if ($nuc_right >= $nuc){ - $nuc_right=$nuc_right-$nuc; - next; - } - #if nucleotide to shrink is under the size of the piece we recalculate the piece - elsif ($nuc_right != 0 and $nuc_right < $nuc){ - my $newGapNucPiece = $nuc - $nuc_right; - my $letter = substr $gap, 0, 1; - - #has to be modulo3 - my $AAval; - if($gap =~ /^M/ or $gap =~ /^D/){ - $nuc_polish += $newGapNucPiece % 3; - $AAval = int($newGapNucPiece/3); - } - #avoid case wehre we remove a lot and the piece doesnt exist anymore - if($AAval != 0){ - my $gap_val_ok = $letter.$AAval; - push @newGap, $gap_val_ok - } - } - else{ - push @newGap, $gap - } - } - @gap = @newGap; - } - - my ($match_size, $nuc_polish) = calcul_match_gap(\@gap, $nuc_polish); - - $absMatch+=$match_size; - #print "match_size = $match_size\n"; - #my $plus= int(abs($feature->end - $end)/3); - #my $modPlus = int(abs($feature->end - $end) % 3); - #print "modPlus=".$modMinus." plus".$minus."\n"; - - } - - return $absMatch*3+$nuc_polish; -} - -#@Output : 2 , Match in AA, nuc_polish in nucleotide -sub calcul_match_gap{ - my ($gap, $nuc_polish)=@_; - - my $match_size=0; - - foreach my $gap (@{$gap}){ - - #get value every time it was a match - if($gap =~ /^M/){ #MATCH - M1 in a protein space is actually an amino acid match (matches 3 bp in nucleotide space) - substr($gap, 0, 1) = ""; - $match_size += $gap; - } - elsif($gap =~ /^D/){ # deletion = insert a gap into the target (delete from reference) - D1 is an amino acid deletion (3bp in nucleotide space) - substr($gap, 0, 1) = ""; - $nuc_polish -= $gap; - } - elsif($gap =~ /^I/){ # insert a gap into the reference sequence - I1 is an amino acid insertion (3bp in nucleotide space) - substr($gap, 0, 1) = ""; - $nuc_polish -= ($gap*3); - } - elsif($gap =~ /^R/){# frameshift reverse in the reference sequence - F and R therefore allow for single bp movement either to the left or right within amino acid space. Sometime this happens in Exonerate where it appears as a slightly shifted codon (codons look stacked ), but it also happens when an amino acid is split across a splice site (1st part of a codon is on one exon and second part on the next exon). - substr($gap, 0, 1) = ""; - $nuc_polish -= $gap; - } - elsif($gap =~ /^F/){# frameshift forward in the reference sequence - substr($gap, 0, 1) = ""; - $nuc_polish -= $gap; - } - else{ - warn "Cannot interpret this CIGAR substring: $gap !\n"; - } - } - - return $match_size, $nuc_polish; - -} - -sub nuc_gap_val{ - my ($gap) = @_; - - my $nuc=0; - if($gap =~ /^M/){ #MATCH - M1 in a protein space is actually an amino acid match (matches 3 bp in nucleotide space) - $nuc = substr $gap, 1; - $nuc=$nuc*3; - } - elsif($gap =~ /^D/){ # deletion = insert a gap into the target (delete from reference) - D1 is an amino acid deletion (3bp in nucleotide space) - $nuc = substr $gap, 1; - } - elsif($gap =~ /^I/){ # insert a gap into the reference sequence - I1 is an amino acid insertion (3bp in nucleotide space) - $nuc = substr $gap, 1; - $nuc=$nuc*3; - } - elsif($gap =~ /^F/){ # frameshift forward in the reference sequence - F and R therefore allow for single bp movement either to the left or right within amino acid space. Sometime this happens in Exonerate where it appears as a slightly shifted codon (codons look stacked ), but it also happens when an amino acid is split across a splice site (1st part of a codon is on one exon and second part on the next exon). - $nuc = substr $gap, 1; - } - elsif($gap =~ /^R/){ # frameshift reverse in the reference sequence - - $nuc = substr $gap, 1; - } - else{ - warn "Cannot interpret this CIGAR substring: $gap !\n"; - } - - return $nuc; -} - -sub _print{ - my ($mesage, $optionType) = @_; - - if(! $optionType and $optionType != 0){ - print $screen_out $mesage; - } - else{ - if($screen_out){ - print $screen_out $mesage; - } - $outputTab[$optionType]->print($mesage); - } -} - -__END__ - -=head1 NAME - -script.pl - -The script take an annotation in gff format, a protein alignment in gff format and a protein fasta file as input. It checks if protein alignement overlap gene models, and will load the gene name and/or the function to the gene model according to the user requirements. -The script apllies these following steps: -For each gene model structure it take the proteins aligned against, and sort them by an overlaping score. The best coming first. -Then it filters them by applying the overlaping score threshold. -1) If you activated the PE and the species filtering, we will try to find the best protein that follows the defined requirement. -2.1) If you activated the PE filtering or the precedent filtering (1) didn't succeed, we take the best protein according to the PE requirement. -2.2) If you activated the species filtering or the precedent filtering (1) didn't succeed, we take the best protein according to the list of prioritized species defined. -3) If no option or the precedent filtering (1,2.1,2.2)didn't succeed, the best protein will be selected. -You can flip the 2.1 and 2.2 test using the priority option. - - -=head1 SYNOPSIS - - ./script.pl -a annotation.gff --pgff protein.gff --pfasta protein.fasta [ -o outfile ] - ./script.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-a> or B<--annotation> - -Input gff file of an annotation. - -=item B<-pgff> - -Input alignment gff file of protein. - -=item B<-pfasta> - -Input protein fasta file where the extra information will be retrieved for each aligned protein. - -=item B<-m> or B<--method> - -Rule to apply to lift function when a protein map properly. -1) replace => replace or add the product and Name attribute's values. -2) complete => add the product and Name attribute's values only if doesn't exist. -3) add => add the lfp_product and lfp_name attributes with the corresponding values - -=item B<--value>, B<--threshold> or B<-t> - -Gene mapping percentage over which a gene must be reported. By default the value is 50. - -=item B<-w> - -Compute the overlap score based on the whole annotation sequence. By default we use only the coding sequence part. - -=item B<--pe> - -Protein existence value. We will take the best overlap score protein according to the PE expected -1. Experimental evidence at protein level -2. Experimental evidence at transcript level -3. Protein inferred from homology -4. Protein predicted -5. Protein uncertain - -=item B<--test> - -Test to apply (> < = >= <=). If you us one of these two character >, <, please don't forget to quote you parameter liket that "<=". Else your terminal will complain. - -=item B<--sp> - -Species, between the set of the best protein aligned we try first to take the one that follow the species prioritization defined. There is a default one, but you can define you own (quoted and coma separated value)like that: "mus Musculus, Homo Sapiens" from the most important to the less important. In that case Mus will be taken first even if a better overlaping one exist for human. -If none of them is found we take anyway the best overlapping one. - -=item B<-p> or B<--priority> - -By default the priority is PE test before species test when both are applied. You can flip these two test by activating this option like this: -p species - -=item B<-v> - -Be verbose. - -=item B<-o> , B<--output> or B<--out> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_manage_IDs.pl b/annotation/Tools/Util/gff/gff3_sp_manage_IDs.pl deleted file mode 100755 index 3570e9c0c..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_manage_IDs.pl +++ /dev/null @@ -1,319 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Pod::Usage; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $help= 0; -my @opt_tag=(); -my $outfile=undef; -my $prefix=undef; -my $nbIDstart=1; - -if ( !GetOptions( - "help|h" => \$help, - "gff|f=s" => \$gff, - "nb=i" => \$nbIDstart, - "prefix=s" => \$prefix, - "p|t|l=s" => \@opt_tag, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -# Manage $primaryTag -my %ptagList; -if(! @opt_tag){ - print "We will work on attributes from all features\n"; - $ptagList{'level1'}++; - $ptagList{'level2'}++; - $ptagList{'level3'}++; -} -else{ - foreach my $tag (@opt_tag){ - if($tag eq ""){next;} - if($tag eq "all"){ - print "We will work on attributes from all features\n"; - $ptagList{'level1'}++; - $ptagList{'level2'}++; - $ptagList{'level3'}++; - } - else{ - print "We will work on attributes from all the $tag features\n"; - $ptagList{lc($tag)}++; - } - } -} - - - - - ##################### - # MAIN # - ##################### - -my %keepTrack; -my %tag_hash; -my @tagLetter_list; - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - -# sort by seq id -my %hash_sortBySeq; -foreach my $tag_level1 ( keys %{$hash_omniscient->{'level1'}}){ - foreach my $level1_id ( keys %{$hash_omniscient->{'level1'}{$tag_level1}}){ - my $position=$hash_omniscient->{'level1'}{$tag_level1}{$level1_id}->seq_id; - push (@{$hash_sortBySeq{$position}{$tag_level1}}, $hash_omniscient->{'level1'}{$tag_level1}{$level1_id}); - } -} - -#Read by seqId to sort properly for ID naming -foreach my $seqid (sort alphaNum keys %hash_sortBySeq){ # loop over all the feature level1 - - foreach my $tag_l1 (sort {$a cmp $b} keys %{$hash_sortBySeq{$seqid}}){ - - foreach my $feature_l1 ( sort {$a->start <=> $b->start} @{$hash_sortBySeq{$seqid}{$tag_l1}}){ - my $id_l1 = lc($feature_l1->_tag_value('ID')); - my $l1_ID_modified=undef; - - if(exists ($ptagList{$tag_l1}) or exists ($ptagList{'level1'}) ){ - if(! exists_keys(\%keepTrack,($tag_l1))){$keepTrack{$tag_l1}=$nbIDstart;} - manage_attributes($feature_l1,\%keepTrack, $prefix); - $keepTrack{$tag_l1}++; - $l1_ID_modified=$feature_l1->_tag_value('ID'); - $hash_omniscient->{'level1'}{$tag_l1}{lc($l1_ID_modified)} = delete $hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (sort {$a cmp $b} keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - - my $l2_ID_modified=undef; - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - if(exists ($ptagList{$tag_l2}) or exists ($ptagList{'level2'}) ){ - if(! exists_keys(\%keepTrack,($tag_l2))){$keepTrack{$tag_l2}=$nbIDstart;} - manage_attributes($feature_l2,\%keepTrack, $prefix); - $keepTrack{$tag_l2}++; - $l2_ID_modified=$feature_l2->_tag_value('ID'); - } - - #Modify parent if necessary - if($l1_ID_modified){ - create_or_replace_tag($feature_l2,'Parent', $l1_ID_modified); - } - - ################# - # == LEVEL 3 == # - ################# - foreach my $tag_l3 (sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - if ( exists_keys($hash_omniscient, ('level3', $tag_l3 , $level2_ID) ) ){ - - foreach my $feature_l3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}) { - - if(exists ($ptagList{$tag_l3}) or exists ($ptagList{'level3'}) ){ - if(! exists_keys(\%keepTrack,($tag_l3))){$keepTrack{$tag_l3}=$nbIDstart;} - manage_attributes($feature_l3,\%keepTrack, $prefix); - $keepTrack{$tag_l3}++; - } - - #Modify parent if necessary - if($l2_ID_modified){ - create_or_replace_tag($feature_l3,'Parent', $l2_ID_modified); - } - - } - - if($l2_ID_modified){ - $hash_omniscient->{'level3'}{$tag_l3}{lc($l2_ID_modified)} = delete $hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}; - } - } - } - } - if($l1_ID_modified){ - $hash_omniscient->{'level2'}{$tag_l2}{lc($l1_ID_modified)} = delete $hash_omniscient->{'level2'}{$tag_l2}{$id_l1}; - } - } - } - } - } -} - -# Print results -print_omniscient($hash_omniscient, $gffout); #print gene modified - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub manage_attributes{ - my ($feature, $keepTrack, $prefix)=@_; - - my $primary_tag = lc($feature->primary_tag); - - if ($prefix){ - - my $nbName = $keepTrack->{$primary_tag}; - - my $numberNum=11; - my $GoodNum=""; - for (my $i=0; $i<$numberNum-length($nbName); $i++){ - $GoodNum.="0"; - } - $GoodNum.=$nbName; - - my $abb = uc(select_abb($feature)); - - my $result="$prefix$abb$GoodNum"; - create_or_replace_tag($feature,'ID', $result); - } - else{ - create_or_replace_tag($feature,'ID', $primary_tag."-".$keepTrack->{$primary_tag}); - } -} - -#Select the proper abbreviation for the tag -sub select_abb{ - my ($feature)=@_; - - # get the tag - my $primary_tag = lc($feature->primary_tag); - - if(! exists_keys (\%tag_hash,( $primary_tag ))) { - - my $cpt=1; - my $letter = uc(substr($primary_tag, 0, $cpt)); - - while( grep( /^\Q$letter\E$/, @tagLetter_list) ) { # to avoid duplicate - $cpt++; - $letter = uc(substr($primary_tag, 0, $cpt)); - } - $tag_hash{$primary_tag}=$letter; - push(@tagLetter_list, $letter) - } - return $tag_hash{$primary_tag} -} - -#Sorting mixed strings => Sorting alphabetically first, then numerically -# how to use: my @y = sort by_number @x; -sub alphaNum { - my ( $alet , $anum ) = $a =~ /([^\d]+)(\d+)/; - my ( $blet , $bnum ) = $b =~ /([^\d]+)(\d+)/; - ( $alet || "a" ) cmp ( $blet || "a" ) or ( $anum || 0 ) <=> ( $bnum || 0 ) -} - -__END__ - -=head1 NAME - -gff3_manageIDs.pl - -The script take a gff3 file as input and will go through all feature to overwrite the uniq ID. -By default the ID is build as follow: - primary_tag(i.e. 3rd column)-Number. -If you provide a specific prefix the ID is build as follow (Ensembl like format ENSG00000000022): - $prefix.$letterCode.0*.Number where the number of 0 i adapted in order to have 11 digits - -By default the numbering start to 1, but you can decide to change this value using the --nb option. -The $letterCode is generated on the fly to be uniq. By defaut it used the first letter of the feature type (3rd colum). If two feature types -start with the same letter, the second one meet will have the two first letter as $letterCode (and so one). - - -=head1 SYNOPSIS - - ./gff3_manageIDs.pl -gff file.gff -p level2 -p cds -p exon [ -o outfile ] - ./gff3_manageIDs.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<--prefix> - -String. Add a specific prefix to the ID - -=item B<-p>, B<-t> or B<-l> - -primary tag option, case insensitive, list. Allow to specied the feature types that will be handled. -You can specified a specific feature by given its primary tag name (column 3) as: cds, Gene, MrNa -You can specify directly all the feature of a particular level: - level2=mRNA,ncRNA,tRNA,etc - level3=CDS,exon,UTR,etc -By default all feature are taken into account. fill the option by the value "all" will have the same behaviour. - -=item B<--nb> - -Integer. Start numbering to this value. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_manage_UTRs.pl b/annotation/Tools/Util/gff/gff3_sp_manage_UTRs.pl deleted file mode 100755 index 34d70851e..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_manage_UTRs.pl +++ /dev/null @@ -1,534 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use POSIX qw(strftime); -use Carp; -use IO::File; -use Pod::Usage; -use Getopt::Long qw(:config no_auto_abbrev); -use Statistics::R; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - - -my $opt_reffile; -my $opt_plot; -my $opt_nbUTR; -my $opt_bst=undef; -my $opt_utr3=undef; -my $opt_utr5=undef; -my $opt_output=undef; -my $opt_help = 0; -my $DefaultUTRnb=5; - -my @copyARGV=@ARGV; -print "ARG @copyARGV\n"; -if ( !GetOptions( 'f|gff|ref|reffile=s' => \$opt_reffile, - 'n|t|nb|number=i' => \$opt_nbUTR, - '3|three|three_prime_utr!' => \$opt_utr3, - '5|five|five_prime_utr!' => \$opt_utr5, - 'b|both|bs!' => \$opt_bst, - 'o|out|output=s' => \$opt_output, - 'p|plot!' => \$opt_plot, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! defined($opt_reffile ) or ! ($opt_utr3 or $opt_utr5 or $opt_bst or $opt_plot) ) { - pod2usage( { - -message => "$header\nMust specify at least 2 parameters:\nReference data gff3 file (--gff)\nOne UTR option (3, 5 , both, plot)", - -verbose => 0, - -exitval => 1 } ); -} - -# ####################### -# # START Manage Option # -# ####################### - -if (defined($opt_output) ) { - if (-d $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); - } - else{ - mkdir $opt_output; - } -} - -my $ostreamReport; -if (defined($opt_output) ) { - $ostreamReport=IO::File->new(">".$opt_output."/report.txt" ) or croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/report.txt", $! )); -} -else{ - $ostreamReport = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); -} -my $string1 = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$string1 .= "\n\nusage: $0 @copyARGV\n\n"; - -if (! $opt_nbUTR){ - $opt_nbUTR=$DefaultUTRnb; -}elsif(!($opt_utr3 or $opt_utr5 or $opt_bst)){$string1 .= "The value $opt_nbUTR of the parameter will no be taken into account. Indeed no UTRs option called. (three, five, both).\n";} -if($opt_utr3 or $opt_utr5 or $opt_bst){ - $string1 .= "Genes with more than $opt_nbUTR UTRs will be reported.\n"; -} - - -print $ostreamReport $string1; -if($opt_output){print $string1;} -# ##################################### -# # END Manage OPTION -# ##################################### - - - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -# ################################# -# # Manage Ouput Directory / File # -# ################################# - -my $ostreamUTR; -my $ostreamUTRdiscarded; - -if (defined($opt_output) ) { - - my $file_in=$opt_reffile; - $file_in =~ s/.gff.*//g; - - #manage name output - my $utr_type_under=undef; - my $utr_type_over=undef; - # case no filter so we don't create discarded file output. - if (! ($opt_utr3 or $opt_utr5 or $opt_bst)){ - - $utr_type_under = $file_in; - $utr_type_over = $file_in; - my $nameUTRok=$opt_output."/".$file_in.".gff"; - open(my $fhUTRok, '>', $nameUTRok) or die "Could not open file '$nameUTRok' $!"; - $ostreamUTR = Bio::Tools::GFF->new(-fh => $fhUTRok, -gff_version => 3); - } - else{ # case with filter so we create discarded file output and a of file output. - if ($opt_utr3){ - $utr_type_under = $file_in."_UTR3_under".$opt_nbUTR; - $utr_type_over = $file_in."_UTR3_overORequal".$opt_nbUTR; - } - if ($opt_utr5){ - if($utr_type_under){ - $utr_type_under.="_and_UTR5_under".$opt_nbUTR; - $utr_type_over.="_and_UTR5_overORequal".$opt_nbUTR; - } - else{ - $utr_type_under=$file_in."_UTR5_under".$opt_nbUTR; - $utr_type_over=$file_in."_UTR5_overORequal".$opt_nbUTR; - } - } - if ($opt_bst){ - if($utr_type_under){ - $utr_type_under.="_and_bothSides_under".$opt_nbUTR; - $utr_type_over.="_and_bothSides_overORequal".$opt_nbUTR; - } - else{ - $utr_type_under=$file_in."_bothSides_under".$opt_nbUTR; - $utr_type_over=$file_in."_bothSides_overORequal".$opt_nbUTR; - } - } - - my $nameUTRok=$opt_output."/".$utr_type_under.".gff"; - open(my $fhUTRok, '>', $nameUTRok) or die "Could not open file '$nameUTRok' $!"; - $ostreamUTR = Bio::Tools::GFF->new(-fh => $fhUTRok, -gff_version => 3); - - my $nameUTRdiscarded=$opt_output."/".$utr_type_over.".gff"; - open(my $fhUTRdiscarded, '>', $nameUTRdiscarded) or die "Could not open file '$nameUTRdiscarded' $!"; - $ostreamUTRdiscarded = Bio::Tools::GFF->new(-fh => $fhUTRdiscarded, -gff_version => 3) ; - } -} -else { # No output provided, we print everything on screen - my $ostream = IO::File->new(); - $ostream->fdopen( fileno(STDOUT), 'w' ) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - $ostreamUTR = Bio::Tools::GFF->new( -fh => $ostream, -gff_version => 3) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - my $ostream_d = IO::File->new(); - $ostream_d->fdopen( fileno(STDOUT), 'w' ) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); - $ostreamUTRdiscarded = Bio::Tools::GFF->new( -fh => $ostream, -gff_version => 3 ) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_reffile - }); -print("Parsing Finished\n\n"); -### END Parse GFF input # -######################### - -my %UTRdistribution; -my %UTRbymRNA; -my %UTRoverview; -# ######################################################### -# # A.1) Assign utr side if they are not -# ######################################################### - -foreach my $tag_l3 ( keys %{$hash_omniscient->{'level3'}} ) { - if($tag_l3 =~ /utr/){ - if ($tag_l3 ne 'three_prime_utr' and $tag_l3 ne 'five_prime_utr') { - - foreach my $id_l2 ( keys %{$hash_omniscient->{'level3'}{$tag_l3}} ){ - - my $geneID = $hash_mRNAGeneLink->{$id_l2}; - my $feature_l2 = get_feature_l2_from_id_l2_l1($hash_omniscient, $id_l2, $geneID); - my $strand = $feature_l2->strand(); - my $cds_feature_example = $hash_omniscient->{'level3'}{'cds'}{$id_l2}[0]; #if utr exists, cds should exists - - foreach my $feature_l3 ( @{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){ - if($feature_l3->start <= $cds_feature_example->start){ - if ($strand eq "+" or $strand eq "1"){ - $feature_l3->primary_tag= "five_prime_utr"; - } - else{$feature_l3->primary_tag= "three_prime_utr";} - } - else{ - if ($strand eq "+" or $strand eq "1"){ - $feature_l3->primary_tag= "three_prime_utr"; - } - else{ - $feature_l3->primary_tag= "five_prime_utr"; - } - } - } - } - } - } -} - -# ######################################################### -# # A.1) Count utr exon by side and total -# ######################################################### - -foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}) { - foreach my $id_l1 (keys %{$hash_omniscient->{'level2'}{$tag_l2}}) { - foreach my $feature_l2 (@{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - - my $has_an_utr=undef; - my $id_l2= lc($feature_l2->_tag_value('ID')); - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}) { - if($tag_l3 =~ /utr/){ - if(exists ($hash_omniscient->{'level3'}{$tag_l3}{$id_l2})){ - $has_an_utr="yes"; - - my $nbUTR = $#{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}+1; #nb utrs - $UTRdistribution{$tag_l3}{$nbUTR}++; - $UTRbymRNA{$tag_l3}{$id_l2}=$nbUTR; - $UTRoverview{$tag_l3}++; - if(! exists $UTRbymRNA{'both'}{$id_l2}){ - $UTRbymRNA{'both'}{$id_l2}=$nbUTR; - }else{$UTRbymRNA{'both'}{$id_l2}+=$nbUTR;} - - # print_omniscient_from_level1_id_list($hash_omniscient,[$geneID],$utr_gff{$tag_l3}); - } - } - } - if(!$has_an_utr){ - $UTRbymRNA{'both'}{$id_l2}=0; - } - } - } -} - -########################### -# compute if on of UTR option called -########################### - -########################### -# Overview of UTRs -if($opt_utr3 or $opt_utr5 or $opt_bst){ - # print preliminary results - my $stringPrint=""; - foreach my $key (keys %UTRoverview) { - $stringPrint.="There is $UTRoverview{$key} $key\n"; - my $total=0; - foreach my $value ( sort {$b <=> $a} keys %{$UTRdistribution{$key}}){ - if($value >= $opt_nbUTR){ - $total+=$UTRdistribution{$key}{$value}; - } - else{last;} - } - $stringPrint.= "There is $total $key cases that have over or equal $opt_nbUTR exons.\n"; - } - - ########################### - # Main compute - my @listIDl2discarded; - my @listIDlok; - my %geneName; - my %geneName_ok; - foreach my $tag (keys %UTRbymRNA) { - foreach my $id_level2 (keys %{$UTRbymRNA{$tag}}){ - - # case only opt_utr3 - if ($opt_utr3 and $tag eq "three_prime_utr"){ - if ($UTRbymRNA{$tag}{$id_level2} >= $opt_nbUTR){ - push @listIDl2discarded, $id_level2 ; - $geneName{$hash_mRNAGeneLink->{$id_level2}}++; - } - else{ - push @listIDlok, $id_level2 ; - $geneName_ok{$hash_mRNAGeneLink->{$id_level2}}++; - } - } - # case only opt_utr5 - if ($opt_utr5 and $tag eq "five_prime_utr"){ - if ($UTRbymRNA{$tag}{$id_level2} >= $opt_nbUTR){ - push @listIDl2discarded, $id_level2 ; - $geneName{$hash_mRNAGeneLink->{$id_level2}}++; - } - else{ - push @listIDlok, $id_level2; - $geneName_ok{$hash_mRNAGeneLink->{$id_level2}}++; - } - } ### REMOVE OPTION BOTH ? - # case both side together (when added) should be over $opt_nbUTR) - if ($opt_bst and $tag eq "both"){ - if ($UTRbymRNA{$tag}{$id_level2} >= $opt_nbUTR){ - push @listIDl2discarded, $id_level2; - $geneName{$hash_mRNAGeneLink->{$id_level2}}++; - } - else{ - push @listIDlok, $id_level2 ; - $geneName_ok{$hash_mRNAGeneLink->{$id_level2}}++; - } - } - # case both side independant (side 3 and and5 should be over $opt_nbUTR) - - # case no option print all so put all in @listIDlok - if(! $opt_utr3 and ! $opt_utr5 and ! $opt_bst) { # in case where no option, We do by default side3 side5 idependant. On sufficiant to discard the mRNA - push @listIDlok, $id_level2 ; - $geneName_ok{$hash_mRNAGeneLink->{$id_level2}}++; - } - } - } - - # remove duplicate in case several option tends to give the same case - if(@listIDl2discarded){ - my $sizeList= @listIDl2discarded; - my $nbGene = keys %geneName; - $stringPrint.= "$sizeList RNA discarded from $nbGene genes\n"; - my @listIDl2discardedUniq = uniq(@listIDl2discarded); - my $omniscient_discarded = create_omniscient_from_idlevel2list($hash_omniscient, $hash_mRNAGeneLink, \@listIDl2discarded); - print_omniscient($omniscient_discarded, $ostreamUTRdiscarded); - } - if(@listIDlok){ - my $sizeList= @listIDlok; - my $nbGeneOk = keys %geneName_ok; - $stringPrint.= "$sizeList RNA from $nbGeneOk gene that reach your request.\n"; - my @listIDlokUniq = uniq(@listIDlok); - my $omniscient_ok = create_omniscient_from_idlevel2list($hash_omniscient, $hash_mRNAGeneLink, \@listIDlokUniq); - print_omniscient($omniscient_ok, $ostreamUTR); - } - if(@listIDl2discarded and @listIDlok){ - my $union=0; - foreach my $name (keys %geneName){ - if (exists ($geneName_ok{$name}) ){ - $union++; - } - } - $stringPrint.= "$union genes have RNA isoform that reach you request and RNA discarded.\n"; - } - - #Print Info OUtput - print $ostreamReport $stringPrint; - if($opt_output){ - print $stringPrint; - } -} - -############################ -# Plot distribution if asked -if ($opt_plot){ - - foreach my $utr_type (keys %UTRdistribution) { - - my $txtFile; - my $outPlot; - my $txtFileOver; - my $outPlotOver; - if($opt_output){ - if($opt_utr3 or $opt_utr5 or $opt_bst){ - $txtFileOver = $opt_output."/".$utr_type."_overORequal".$opt_nbUTR.".txt"; - $outPlotOver = $opt_output."/".$utr_type."_overORequal".$opt_nbUTR.".pdf"; - $txtFile = $opt_output."/".$utr_type."_under".$opt_nbUTR.".txt"; - $outPlot = $opt_output."/".$utr_type."_under".$opt_nbUTR.".pdf"; - } - else{ - $txtFile = $opt_output."/".$utr_type.".txt"; - $outPlot = $opt_output."/".$utr_type.".pdf"; - } - }else{ - $txtFile = $utr_type.".txt"; - $outPlot = $utr_type.".pdf"; - if($opt_utr3 or $opt_utr5 or $opt_bst){ - $txtFileOver = $utr_type."_over".$opt_nbUTR."txt"; - $outPlotOver = $utr_type."_over".$opt_nbUTR."pdf"; - } - } - #print file thtat will be read by R - open(FH, ">".$txtFile) || die "Erreur E/S:$!\n"; - if($opt_utr3 or $opt_utr5 or $opt_bst){ - open(FH_filter, ">".$txtFileOver) || die "Erreur E/S:$!\n"; - } - my $firstLine="yes"; - my $firstLineOver="yes"; - foreach my $value (keys %{$UTRdistribution{$utr_type}}) { - - if($opt_utr3 or $opt_utr5 or $opt_bst){ #we have a filter - if($value >= $opt_nbUTR){ #print utr over threshold - if($firstLineOver){ - print FH_filter $value."\t".$UTRdistribution{$utr_type}{$value}; - $firstLineOver=undef; - } - else{ - print FH_filter "\n".$value."\t".$UTRdistribution{$utr_type}{$value}; - } - } - else{ #print utr under threshold - if($firstLine){ - print FH $value."\t".$UTRdistribution{$utr_type}{$value}; - $firstLine=undef; - } - else{ - print FH "\n".$value."\t".$UTRdistribution{$utr_type}{$value}; - } - } - - } - else{ # no filter we print everything - if($firstLine){ - print FH $value."\t".$UTRdistribution{$utr_type}{$value}; - $firstLine=undef; - } - else{ - print FH "\n".$value."\t".$UTRdistribution{$utr_type}{$value}; - } - } - } - close FH; - - my $R = Statistics::R->new() or die "Problem with R : $!\n"; - - #R command - $R->send( - qq` - listValues=as.matrix(read.table("$txtFile", sep="\t", he=F)) ##///!!!\\\\\ - legendToDisplay=paste("Number of value used : ",length(listValues)) - listValueMoreThan <- listValues[listValues[,1]>5,] - - pdf("$outPlot") - plot(listValues[,2]~listValues[,1], xlab="Contig size", ylab="Frequency", main="Size distribution of $utr_type") - dev.off() - - pdf("$outPlotOver") - plot(listValueMoreThan[,2]~listValueMoreThan[,1], xlab="Contig size", ylab="Frequency", main="Size distribution of $utr_type over 5") - dev.off()` - ); - - # Close the bridge - $R->stopR(); - - # Delete temporary file - unlink "$txtFile"; - if($opt_utr3 or $opt_utr5 or $opt_bst){ - unlink "$txtFileOver"; - } - } - -} - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub uniq { - my %seen; - grep !$seen{$_}++, @_; -} - -__END__ - - -=head1 NAME - -maker_manageUTR.pl - Detect the genes containing too much UTR's exon according to a choosen threshold. -If no UTR option (3, 5, 3 and 5, both) is given the threshold will be not used. -option 3 and 5 together is different of "both". In the first case the gene is discarded if either the 3' or the 5' UTR contains more exon than the threshold given. -In the second case, will be discarded only the genes where the addition of UTR's exon of both side is over the threshold given. - -=head1 SYNOPSIS - - ./maker_manageUTR.pl --ref=infile --three --five -p --out=outFile - ./maker_manageUTR.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--ref>, B<--reffile> or B<-f> - -Input GFF3 file correponding to gene build. - -=item B<-n>, B<-t>, B<--nb> or B<--number> - -Threshold of exon's number of the UTR. Over or equal to this threshold, the UTR will be discarded. Default value is 5. - -=item B<-3>, B<--three> or B<--tree_prime_utr> - -The threshold of the option will be applied on the 3'UTR. - -=item B<-5>, B<--five> or B<--five_prime_utr> - -The threshold of the option will be applied on the 5'UTR. - -=item B<-b>, B<--both> or B<--bs> - -The threshold of the option will be applied on genes where the number of UTR exon (3' and 5' additioned) is over it. - -=item B<--p>, B<--plot> or B<-o> - -Allows to create an histogram in pdf of UTR sizes distribution. - -=item B<--out>, B<--output> or B<-o> - -Output gff3 file where the gene incriminated will be write. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_manage_attributes.pl b/annotation/Tools/Util/gff/gff3_sp_manage_attributes.pl deleted file mode 100755 index 05da64e80..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_manage_attributes.pl +++ /dev/null @@ -1,316 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $help= 0; -my $primaryTag=undef; -my $attributes=undef; -my $outfile=undef; -my $add = undef; -my $cp = undef; - -if ( !GetOptions( - "help|h" => \$help, - "gff|f=s" => \$gff, - "add" => \$add, - "cp" => \$cp, - "p|type|l=s" => \$primaryTag, - "tag|att=s" => \$attributes, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - -# Manage $primaryTag -my @ptagList; -if(! $primaryTag or $primaryTag eq "all"){ - print "We will work on attributes from all features\n"; - push(@ptagList, "all"); -}elsif($primaryTag =~/^level[123]$/){ - print "We will work on attributes from all the $primaryTag features\n"; - push(@ptagList, $primaryTag); -}else{ - @ptagList= split(/,/, $primaryTag); - foreach my $tag (@ptagList){ - if($tag =~/^level[123]$/){ - print "We will work on attributes from all the $tag features\n"; - } - else{ - print "We will work on attributes from $tag feature.\n"; - } - } -} - -# Manage attributes if given -### If attributes given, parse them: -my %attListOk; -my @attListPair; -if ($attributes){ - - if ($attributes eq "all_attributes"){ - if($add){ - print "You cannot use the all_attributes value with the add option. Please change the parameters !\n";exit; - } - print "All attributes will be removed except ID and Parent attributes !\n"; - $attListOk{"all_attributes"}++; - } - else{ - @attListPair= split(/,/, $attributes); - my $nbAtt=$#attListPair+1; - - foreach my $attributeTuple (@attListPair){ - my @attList= split(/\//, $attributeTuple); - if($#attList == 0){ # Attribute alone - #check for ID attribute - if(lc($attList[0]) eq "id" and ! $add){print "It's forbidden to remove the ID attribute in a gff3 file !\n";exit;} - #check for Parent attribute - if(lc($attList[0]) eq "parent" and ! $add){ - foreach my $tag (@ptagList){ - if($tag ne "gene" and $tag ne "level1"){ - print "It's forbidden to remove the $attList[0] attribute to a $tag feature in a gff3 file !\n"; - exit; - } - } - } - $attListOk{$attList[0]}="null"; - if($add){ - print "$attList[0] attribute will be added. The value will be empty.\n"; - } - else{ - print "$attList[0] attribute will be removed.\n"; - } - } - else{ # Attribute will be replaced/copied with a new tag name - $attListOk{$attList[0]}=$attList[1]; - print "$attList[0] attribute will be replaced by $attList[1].\n"; - } - } - } - print "\n"; -} - - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $gff - }); -print ("GFF3 file parsed\n"); - - -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - - my $feature_l1=$hash_omniscient->{'level1'}{$tag_l1}{$id_l1}; - - manage_attributes($feature_l1, 'level1', \@ptagList,\%attListOk); - - ################# - # == LEVEL 2 == # - ################# - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$id_l1} ) ){ - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}) { - - manage_attributes($feature_l2,'level2',, \@ptagList,\%attListOk); - ################# - # == LEVEL 3 == # - ################# - my $level2_ID = lc($feature_l2->_tag_value('ID')); - - foreach my $tag_l3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists ($hash_omniscient->{'level3'}{$tag_l3}{$level2_ID} ) ){ - foreach my $feature_l3 ( @{$hash_omniscient->{'level3'}{$tag_l3}{$level2_ID}}) { - manage_attributes($feature_l3, 'level3', \@ptagList,\%attListOk); - } - } - } - } - } - } - } -} -#print "We added $nbNameAdded Name attributes\n"; - -print_omniscient($hash_omniscient, $gffout); #print gene modified - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub manage_attributes{ - my ($feature, $level, $ptagList, $attListOk)=@_; - - my $primary_tag=$feature->primary_tag; - - # check primary tag (feature type) to handle - foreach my $ptag (@$ptagList){ - - if($ptag eq "all"){ - remove_tag_from_list($feature,$attListOk); - } - elsif(lc($ptag) eq $level){ - remove_tag_from_list($feature,$attListOk); - } - elsif(lc($ptag) eq lc($primary_tag) ){ - remove_tag_from_list($feature,$attListOk); - } - } -} - -sub remove_tag_from_list{ - my ($feature, $attListOk)=@_; - - if (exists ($attListOk{"all_attributes"} ) ){ # all attributes removed except ID and Parent - my @list_att = $feature->get_all_tags; - foreach my $tag (@list_att){ - if(lc($tag) ne "id" and lc($tag) ne "parent"){ - $feature->remove_tag($tag); - } - } - } - else{ - foreach my $att (keys %{$attListOk}){ - - if ($feature->has_tag($att)){ - - if ($attListOk{$att} eq "null" ){ # the attribute name is kept inctact - $feature->remove_tag($att); - } - else{ # We replace the attribute name - - my @values=$feature->get_tag_values($att); - my $newAttributeName=$attListOk{$att}; - create_or_replace_tag($feature,$newAttributeName, @values); - if(! $cp){ - $feature->remove_tag($att); #remove old attribute if it is not the cp option - } - } - } - - elsif($add){ - if ($attListOk{$att} eq "null" ){ # the attribute name is kept inctact - create_or_replace_tag($feature,$att,'empty'); - } - } - } - } -} - - -__END__ - -=head1 NAME - -gff3_manageAttributes.pl - -The script take a gff3 file as input. - -The script allows to remove choosen attributes to choosen features. - -=head1 SYNOPSIS - - ./gff3_manageAttributes.pl -gff file.gff -att locus_tag,product,name/NewName -p level2,cds,exon [ -o outfile ] - ./gff3_manageAttributes.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<-p>, B<--type> or B<-l> - -primary tag option, case insensitive, list. Allow to specied the feature types that will be handled. -You can specified a specific feature by given its primary tag name (column 3) as: cds, Gene, MrNa -You can specify directly all the feature of a particular level: - level2=mRNA,ncRNA,tRNA,etc - level3=CDS,exon,UTR,etc -By default all feature are taking in account. fill the option by the value "all" will have the same behaviour. - -=item B<--tag>, B<--att> - -Attributes with the tag specified will be removed from the feature type specified by the option p (primary tag). List of tag must be coma separated. -/!\\ You must use "" if name contains spaces. -Instead to remove an attribute, you can replace its Tag by a new Tag using this formulation tagName/newTagName. -To remove all attributes non mandatory (only ID and Parent are mandatory) you can use the option with parameter. - -=item B<--add> - -Attribute specified will be added if doesn't exist. The value will be 'empty'. - -=item B<--cp> - -When attributes specied are with this form: tagName/newTagName. By using this parameter, the tag will not be modified but duplicated with the new -tagName. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_manage_functional_annotation.pl b/annotation/Tools/Util/gff/gff3_sp_manage_functional_annotation.pl deleted file mode 100755 index f8087c0ed..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_manage_functional_annotation.pl +++ /dev/null @@ -1,1145 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use File::Basename; -use Carp; -use Time::Piece; -use Time::Seconds; -use POSIX qw(strftime); -use Getopt::Long; -use IO::File; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::DB::Fasta; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -# PARAMETERS - OPTION -my $opt_reffile; -my $opt_output; -my $opt_BlastFile; -my $opt_InterproFile; -my $opt_name=undef; -my $opt_nameU; -my $opt_verbose=undef; -my $opt_help = 0; -my $opt_blastEvalue=1e-6; -my $opt_dataBase = undef; -my $opt_pe = 5; -my %numbering; -my $nbIDstart=1; -my $prefixName=undef; -my %tag_hash; -my @tag_list; -# END PARAMETERS - OPTION - -# FOR FUNCTIONS BLAST# -my %nameBlast; -my %geneNameBlast; -my %mRNANameBlast; -my %mRNAUniprotIDFromBlast; -my %mRNAproduct; -my %geneNameGiven; -my %duplicateNameGiven; -my $nbDuplicateNameGiven=0; -my $nbDuplicateName=0; -my $nbNamedGene=0; -my $nbGeneNameInBlast=0; -# END FOR FUNCTION BLAST# - -# FOR FUNCTIONS INTERPRO# -my %TotalTerm; -my %finalID; -my %GeneAssociatedToTerm; -my %mRNAAssociatedToTerm; -my %functionData; -my %functionDataAdded; -my %functionOutput; -my %functionStreamOutput; -my %geneWithoutFunction; -my %geneWithFunction; -my $nbmRNAwithoutFunction=0; -my $nbmRNAwithFunction=0; -my $nbGeneWithGOterm=0; -my $nbTotalGOterm=0; -# END FOR FUNCTION INTERPRO# - -# OPTION MANAGMENT -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|ref|reffile|gff|gff3=s' => \$opt_reffile, - 'b|blast=s' => \$opt_BlastFile, - 'd|db=s' => \$opt_dataBase, - 'be|blast_evalue=i' => \$opt_blastEvalue, - 'pe=i' => \$opt_pe, - 'i|interpro=s' => \$opt_InterproFile, - 'id=s' => \$opt_name, - 'idau=s' => \$opt_nameU, - 'nb=i' => \$nbIDstart, - 'o|output=s' => \$opt_output, - 'v' => \$opt_verbose, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($opt_reffile)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--f)\n\n". - "Many optional parameters are available. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -################################################# -####### START Manage files (input output) ####### -################################################# - -if($opt_pe>5 or $opt_pe<1){ - print "Error the Protein Existence (PE) value must be between 1 and 5\n";exit; -} - -my $streamBlast = IO::File->new(); -my $streamInter = IO::File->new(); - -# Manage Blast File -if (defined $opt_BlastFile){ - if (! $opt_dataBase){ - print "To use the blast output we also need the fasta of the database used for the blast (--db)\n";exit; - } - $streamBlast->open( $opt_BlastFile, 'r' ) or croak( sprintf( "Can not open '%s' for reading: %s", $opt_BlastFile, $! ) ); -} - -# Manage Interpro file -if (defined $opt_InterproFile){ - $streamInter->open( $opt_InterproFile, 'r' ) or croak( sprintf( "Can not open '%s' for reading: %s", $opt_InterproFile, $! ) ); -} - -########################## -##### Manage Output ###### -my $ostreamReport; -my $ostreamGFF; -my $ostreamLog; -if (defined($opt_output) ) { - if (-f $opt_output){ - print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); - } - if (-d $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); - } - mkdir $opt_output; - - $ostreamReport=IO::File->new(">".$opt_output."/report.txt" ) or - croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/report.txt", $! )); - - my $file_out_name = fileparse($opt_reffile); - $ostreamGFF=Bio::Tools::GFF->new(-file => ">$opt_output/$file_out_name", -gff_version => 3 ) or - croak(sprintf( "Can not open '%s' for writing %s", $opt_output."/".$opt_reffile, $! )); - - $ostreamLog=IO::File->new(">".$opt_output."/error.txt" ) or - croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/log.txt", $! )); -} -else { - $ostreamReport = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - $ostreamLog = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); - $ostreamGFF = Bio::Tools::GFF->new( -fh => \*STDOUT, -gff_version => 3) or croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - -############################################### -####### END Manage files (input output) ####### -############################################### -#my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -my $stringPrint = strftime "%m/%d/%Y", localtime; -$stringPrint .= "\nusage: $0 @copyARGV\n"; -if ($opt_name){ - $prefixName=$opt_name; - $stringPrint .= "->IDs are changed using <$opt_name> as prefix.\nIn the case of discontinuous features (i.e. a single feature that exists over multiple genomic locations) the same ID may appear on multiple lines.". - " All lines that share an ID collectively represent a signle feature.\n"; -} -if ($opt_nameU){ - $stringPrint .= "->IDs will be changed using <$opt_nameU> as prefix. Features that shared an ID collectively (e.g. CDS, UTRs, etc...) will now have each an uniq ID.\n"; - $prefixName=$opt_nameU; -} - - - -# Display -$ostreamReport->print($stringPrint); -if($opt_output){ print_time("$stringPrint");} # When ostreamReport is a file we have to also display on screen - - - - # +------------------------------------------------------+ - # |+----------------------------------------------------+| - # || MAIN || - # |+----------------------------------------------------+| - # +------------------------------------------------------+ - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_reffile - }); -print_time("Parsing Finished\n"); -### END Parse GFF input # -######################### - -#Print directly what has been read -my ($stat, $distri) = gff3_statistics($hash_omniscient); -$ostreamReport->print("Statistics:\n==========\n"); -if($opt_output){print "Statistics:\n==========\n";} # When ostreamReport is a file we have to also display on screen -foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - $ostreamReport->print("$info"); - if($opt_output){print "$info";} # When ostreamReport is a file we have to also display on screen - } - $ostreamReport->print("\n"); - if($opt_output){print "\n";} # When ostreamReport is a file we have to also display on screen -} - -################################ -# MANAGE FUNCTIONAL INPUT FILE # - -##################### -# Manage Blast File # -my $db; -my %allIDs; -if (defined $opt_BlastFile){ - # read fasta file and save info in memory - print ("look at the fasta database\n"); - $db = Bio::DB::Fasta->new($opt_dataBase); - # save ID in lower case to avoid cast problems - my @ids = $db->get_all_primary_ids; - foreach my $id (@ids ){$allIDs{lc($id)}=$id;} - print_time("Parsing Finished\n\n"); - - # parse blast output - print( "Reading features from $opt_BlastFile...\n"); - parse_blast($streamBlast, $opt_blastEvalue, $hash_mRNAGeneLink); -} - -######################## -# Manage Interpro File # -if (defined $opt_InterproFile){ - parse_interpro_tsv($streamInter,$opt_InterproFile); - - # create streamOutput - if($opt_output){ - foreach my $type (keys %functionData){ - my $ostreamFunct = IO::File->new(); - $ostreamFunct->open( $opt_output."/$type.txt", 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $opt_output."/$type.txt", $! ) - ); - $functionStreamOutput{$type}=$ostreamFunct; - } - } -} -# END MANAGE FUNCTIONAL INPUT FILE # -#################################### - -########################### -# change FUNCTIONAL information if asked for -if ($opt_BlastFile || $opt_InterproFile ){#|| $opt_BlastFile || $opt_InterproFile){ - print_time( "load FUNCTIONAL information\n" ); - - ################# - # == LEVEL 1 == # - ################# - foreach my $primary_tag_level1 (keys %{$hash_omniscient ->{'level1'}}){ # primary_tag_level1 = gene or repeat etc... - foreach my $id_level1 (keys %{$hash_omniscient ->{'level1'}{$primary_tag_level1}}){ - - my $feature_level1=$hash_omniscient->{'level1'}{$primary_tag_level1}{$id_level1}; - # Clean NAME attribute - if($feature_level1->has_tag('Name')){ - $feature_level1->remove_tag('Name'); - } - - #Manage Name if otpion setting - if( $opt_BlastFile ){ - if (exists ($geneNameBlast{$id_level1})){ - create_or_replace_tag($feature_level1, 'Name', $geneNameBlast{$id_level1}); - $nbNamedGene++; - - # Check name duplicated given - my $nameClean=$geneNameBlast{$id_level1}; - $nameClean =~ s/_([2-9]{1}[0-9]*|[0-9]{2,})*$//; - - my $nameToCompare; - if(exists ($nameBlast{$nameClean})){ # We check that is really a name where we added the suffix _1 - $nameToCompare=$nameClean; - } - else{$nameToCompare=$geneNameBlast{$id_level1};} # it was already a gene_name like BLABLA_12 - - if(exists ($geneNameGiven{$nameToCompare})){ - $nbDuplicateNameGiven++; # track total - $duplicateNameGiven{$nameToCompare}++; # track diversity - } - else{$geneNameGiven{$nameToCompare}++;} # first time we have given this name - } - } - - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys ($hash_omniscient, ('level2', $primary_tag_key_level2, $id_level1) ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_level1}}) { - - my $level2_ID = lc($feature_level2->_tag_value('ID')); - # Clean NAME attribute - if($feature_level2->has_tag('Name')){ - $feature_level2->remove_tag('Name'); - } - - #Manage Name if option set - if($opt_BlastFile){ - # add gene Name - if (exists ($mRNANameBlast{$level2_ID})){ - my $mRNABlastName=$mRNANameBlast{$level2_ID}; - create_or_replace_tag($feature_level2, 'Name', $mRNABlastName); - } - my $productData=printProductFunct($level2_ID); - - #add UniprotID attribute - if (exists ($mRNAUniprotIDFromBlast{$level2_ID})){ - my $mRNAUniprotID=$mRNAUniprotIDFromBlast{$level2_ID}; - create_or_replace_tag($feature_level2, 'uniprot_id', $mRNAUniprotID); - } - - #add product attribute - if ($productData ne ""){ - if($feature_level2->has_tag('pseudo')){ - create_or_replace_tag($feature_level2, 'Note', "product:$productData"); - } - else{ - create_or_replace_tag($feature_level2, 'product', $productData); - } - } - else { - if($feature_level2->has_tag('pseudo')){ - create_or_replace_tag($feature_level2, 'Note', "product:hypothetical protein"); - } - else{ - create_or_replace_tag($feature_level2, 'product', "hypothetical protein"); - } - - } #Case where the protein is not known - } - - # print function if option - if($opt_InterproFile){ - my $parentID=$feature_level2->_tag_value('Parent'); - - if (addFunctions($feature_level2, $opt_output)){ - $nbmRNAwithFunction++;$geneWithFunction{$parentID}++; - if(exists ($geneWithoutFunction{$parentID})){ - delete $geneWithoutFunction{$parentID}; - } - } - else{ - $nbmRNAwithoutFunction++; - if(! exists ($geneWithFunction{$parentID})){ - $geneWithoutFunction{$parentID}++; - } - } - } - } - } - } - } - } -} - - -########################### -# change names if asked for -if ($opt_nameU || $opt_name ){#|| $opt_BlastFile || $opt_InterproFile){ - print_time("load new IDs"); - - my %hash_sortBySeq; - foreach my $tag_level1 ( keys %{$hash_omniscient->{'level1'}}){ - foreach my $level1_id ( keys %{$hash_omniscient->{'level1'}{$tag_level1}}){ - my $position=$hash_omniscient->{'level1'}{$tag_level1}{$level1_id}->seq_id; - push (@{$hash_sortBySeq{$position}{$tag_level1}}, $hash_omniscient->{'level1'}{$tag_level1}{$level1_id}); - } - } - - ################# - # == LEVEL 1 == # - ################# - #Read by seqId to sort properly the output by seq ID - foreach my $seqid (sort alphaNum keys %hash_sortBySeq){ # loop over all the feature level1 - - foreach my $primary_tag_level1 (sort {$a cmp $b} keys %{$hash_sortBySeq{$seqid}}){ - - foreach my $feature_level1 ( sort {$a->start <=> $b->start} @{$hash_sortBySeq{$seqid}{$primary_tag_level1}}){ - my $level1_ID=$feature_level1->_tag_value('ID'); - my $id_level1 = lc($level1_ID); - my $newID_level1=undef; - #print_time( "Next gene $id_level1\n"); - - #keep track of Maker ID - if($opt_BlastFile){#In that case the name given by Maker is removed from ID and from Name. We have to kee a track - create_or_replace_tag($feature_level1, 'makerName', $level1_ID); - } - - my $letter_tag = get_letter_tag($primary_tag_level1); - - if(! exists_keys(\%numbering,($letter_tag ))){$numbering{$letter_tag }=$nbIDstart;} - $newID_level1 = manageID($prefixName, $numbering{$letter_tag }, $letter_tag ); - $numbering{$letter_tag }++; - create_or_replace_tag($feature_level1, 'ID', $newID_level1); - - $finalID{$feature_level1->_tag_value('ID')}=$newID_level1; - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_level2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys ($hash_omniscient, ('level2', $primary_tag_level2, $id_level1) ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$primary_tag_level2}{$id_level1}}) { - - my $level2_ID = $feature_level2->_tag_value('ID'); - my $newID_level2=undef; - - #keep track of Maker ID - if($opt_InterproFile){#In that case the name given by Maker is removed from ID and from Name. We have to kee a track - create_or_replace_tag($feature_level2, 'makerName', $level2_ID); - } - - my $letter_tag = get_letter_tag($primary_tag_level2); - if(! exists_keys(\%numbering,($letter_tag))){$numbering{$letter_tag}=$nbIDstart;} - $newID_level2 = manageID($prefixName, $numbering{$letter_tag},$letter_tag); - $numbering{$letter_tag}++; - create_or_replace_tag($feature_level2, 'ID', $newID_level2); - create_or_replace_tag($feature_level2, 'Parent', $newID_level1); - - $finalID{$level2_ID}=$newID_level2; - ################# - # == LEVEL 3 == # - ################# - - foreach my $primary_tag_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - - if ( exists_keys ($hash_omniscient,('level3',$primary_tag_level3, lc($level2_ID)) ) ){ - - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_level3}{lc($level2_ID)}}) { - - #keep track of Maker ID - my $level3_ID = $feature_level3->_tag_value('ID'); - if($opt_InterproFile){#In that case the name given by Maker is removed from ID and from Name. We have to kee a track - create_or_replace_tag($feature_level3, 'makerName', $level3_ID); - } - - my $letter_tag = get_letter_tag($primary_tag_level3); - if(! exists_keys(\%numbering,($letter_tag))){$numbering{$letter_tag}=$nbIDstart;} - my $newID_level3 = manageID($prefixName, $numbering{$letter_tag},$letter_tag); - if( $primary_tag_level3 =~ /cds/ or $primary_tag_level3 =~ /utr/ ) { - if($opt_nameU){ - $numbering{$letter_tag}++; - } - } - else{ - $numbering{$letter_tag}++; - } - create_or_replace_tag($feature_level3, 'ID', $newID_level3); - create_or_replace_tag($feature_level3, 'Parent', $newID_level2); - - $finalID{$level3_ID}=$newID_level3; - } - #save the new l3 into the new l2 id name - $hash_omniscient->{'level3'}{$primary_tag_level3}{lc($newID_level2)} = delete $hash_omniscient->{'level3'}{$primary_tag_level3}{lc($level2_ID)} # delete command return the value before deteling it, so we just transfert the value - } - if ($opt_name and ( $primary_tag_level3 =~ /cds/ or $primary_tag_level3 =~ /utr/ ) ){ - my $letter_tag = get_letter_tag($primary_tag_level3); - $numbering{$letter_tag}++; - } # with this option we increment UTR name only for each UTR (cds also) - - } - } - if($newID_level1){ - $hash_omniscient->{'level2'}{$primary_tag_level2}{lc($newID_level1)} = delete $hash_omniscient->{'level2'}{$primary_tag_level2}{$id_level1}; # modify the id key of the hash. The delete command return the value before deteling it, so we just transfert the value - } - } - } - - if($newID_level1){ - $hash_omniscient->{'level1'}{$primary_tag_level1}{lc($newID_level1)} = delete $hash_omniscient->{'level1'}{$primary_tag_level1}{$id_level1}; # modify the id key of the hash. The delete command return the value before deteling it, so we just transfert the value - } - } - } - } -} - -########################### -# RESULT PRINTING -########################### - -############################## -# print FUNCTIONAL INFORMATION - -# first table name\tfunction -if($opt_output){ - foreach my $function_type (keys %functionOutput){ - my $streamOutput=$functionStreamOutput{$function_type}; - foreach my $ID (keys %{$functionOutput{$function_type}}){ - - if ($opt_nameU || $opt_name ){ - print $streamOutput $finalID{$ID}."\t".$functionOutput{$function_type}{$ID}."\n"; - } - else{ - print $streamOutput $ID."\t".$functionOutput{$function_type}{$ID}."\n"; - } - } - } -} - - -# NOW summerize -$stringPrint =""; # reinitialise (use at the beginning) -if ($opt_InterproFile){ - #print INFO - my $lineB= "_________________________________________________________________________________________________________________________________"; - $stringPrint .= " ".$lineB."\n"; - $stringPrint .= "| | Nb Total term | Nb mRNA with term | Nb mRNA updated by term | Nb gene updated by term |\n"; - $stringPrint .= "| | in raw File | in raw File | in our annotation file | in our annotation file |\n"; - $stringPrint .= "|".$lineB."|\n"; - - foreach my $type (sort keys %functionData){ - my $total_type = $TotalTerm{$type}; - my $mRNA_type_raw = $functionDataAdded{$type}; - my $mRNA_type = keys %{$mRNAAssociatedToTerm{$type}}; - my $gene_type = keys %{$GeneAssociatedToTerm{$type}}; - $stringPrint .= "|".sizedPrint(" $type",25)."|".sizedPrint($total_type,25)."|".sizedPrint($mRNA_type_raw,25)."|".sizedPrint($mRNA_type,25)."|".sizedPrint($gene_type,25)."|\n|".$lineB."|\n"; - } - - #RESUME TOTAL OF FUNCTION ATTACHED - my $listOfFunction; - foreach my $funct (sort keys %functionData){ - $listOfFunction.="$funct,"; - } - chop $listOfFunction; - my $nbGeneWithoutFunction= keys %geneWithoutFunction; - my $nbGeneWithFunction= keys %geneWithFunction; - $stringPrint .= "nb mRNA without Functional annotation ($listOfFunction) = $nbmRNAwithoutFunction\n". - "nb mRNA with Functional annotation ($listOfFunction) = $nbmRNAwithFunction\n". - "nb gene without Functional annotation ($listOfFunction) = $nbGeneWithoutFunction\n". - "nb gene with Functional annotation ($listOfFunction) = $nbGeneWithFunction\n"; -} - -if($opt_BlastFile){ - my $nbGeneDuplicated=keys %duplicateNameGiven; - $nbDuplicateNameGiven=$nbDuplicateNameGiven+$nbGeneDuplicated; # Until now we have counted only name in more, now we add the original name. - $stringPrint .= "$nbGeneNameInBlast gene names have been retrieved in the blast file. $nbNamedGene gene names have been successfully inferred.\n". - "Among them there are $nbGeneDuplicated names that are shared at least per two genes for a total of $nbDuplicateNameGiven genes.\n"; - # "We have $nbDuplicateName gene names duplicated ($nbDuplicateNameGiven - $nbGeneDuplicated)."; - - #Lets keep track the duplicated names - if($opt_output){ - my $duplicatedNameOut=IO::File->new(">".$opt_output."/duplicatedNameFromBlast.txt" ); - foreach my $name (sort { $duplicateNameGiven{$b} <=> $duplicateNameGiven{$a} } keys %duplicateNameGiven){ - print $duplicatedNameOut "$name\t".($duplicateNameGiven{$name}+1)."\n"; - } - } -} - -if($opt_name or $opt_nameU){ - $stringPrint .= "\nList of Letter use to create the uniq ID:\n"; - foreach my $tag ( keys %tag_hash){ - $stringPrint .= "$tag => $tag_hash{$tag}\n"; - } - $stringPrint .= "\n"; -} - -# Display -$ostreamReport->print("$stringPrint"); -if(defined $opt_output){print_time( "$stringPrint" ) ;} - -#################### -# PRINT IN FILES -#################### -print_time("Writing result..."); -print_omniscient($hash_omniscient, $ostreamGFF); - - ######################### - ######### END ########### - ######################### -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - -#create or take the uniq letter TAG -sub get_letter_tag{ - my ($tag)=@_; - - $tag = lc($tag); - if(! exists_keys (\%tag_hash,( $tag ))) { - - my $substringLength=1; - my $letter = uc(substr($tag, 0, $substringLength)); - - while( grep( /^\Q$letter\E$/, @tag_list) ) { # to avoid duplicate - $substringLength++; - $letter = uc(substr($tag, 0, $substringLength)); - } - $tag_hash{ $tag }=uc($letter); - push(@tag_list, $letter) - } - return $tag_hash{ $tag }; -} - -# print with time -sub print_time{ - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print $line; -} - -# each mRNA of a gene has its proper gene name. Most often is the same, and annie added a number at the end. To provide only one gene name, we remove this number and then remove duplicate name (case insensitive). -# If it stay at the end of the process more than one name, they will be concatenated together. -# It removes redundancy intra name. -sub manageGeneNameBlast{ - my ($geneName)=@_; - foreach my $element (keys %$geneName){ - my @tab=@{$geneName->{$element}}; - - my %seen; - my @unique; - for my $w (@tab) { # remove duplicate in list case insensitive - $w =~ s/_[0-9]+$// ; - next if $seen{lc($w)}++; - push(@unique, $w); - } - - my $finalName=""; - my $cpt=0; - foreach my $name (@unique){ #if several name we will concatenate them together - if ($cpt == 0){ - $finalName .="$name"; - } - else{$finalName .="_$name"} - } - $geneName->{$element}=$finalName; - $nameBlast{lc($finalName)}++; - } -} - -# creates gene ID correctly formated (PREFIX,TYPE,NUMBER) like HOMSAPG00000000001 for a Homo sapiens gene. -sub manageID{ - my ($prefix,$nbName,$type)=@_; - my $result=""; - my $numberNum=11; - my $GoodNum=""; - for (my $i=0; $i<$numberNum-length($nbName); $i++){ - $GoodNum.="0"; - } - $GoodNum.=$nbName; - $result="$prefix$type$GoodNum"; - - return $result; -} - -# Create String containing the product information associated to the mRNA -sub printProductFunct{ - my ($refname)=@_; - my $String=""; - my $first="yes"; - if (exists $mRNAproduct{$refname}){ - foreach my $element (@{$mRNAproduct{$refname}}) - { - if($first eq "yes"){ - $String.="$element"; - $first="no"; - } - else{$String.=",$element";} - } - } - return $String; -} - -sub addFunctions{ - my ($feature, $opt_output)=@_; - - my $functionAdded=undef; - my $ID=lc($feature->_tag_value('ID')); - foreach my $function_type (keys %functionData){ - - - if(exists ($functionData{$function_type}{$ID})){ - $functionAdded="true"; - - my $data_list; - - if(lc($function_type) eq "go"){ - foreach my $data (@{$functionData{$function_type}{$ID}}){ - $feature->add_tag_value('Ontology_term', $data); - $data_list.="$data,"; - $functionDataAdded{$function_type}++; - } - } - else{ - foreach my $data (@{$functionData{$function_type}{$ID}}){ - $feature->add_tag_value('Dbxref', $data); - $data_list.="$data,"; - $functionDataAdded{$function_type}++; - } - } - - if ($opt_output){ - my $ID = $feature->_tag_value('ID'); - chop $data_list; - $functionOutput{$function_type}{$ID}=$data_list; - } - } - } - return $functionAdded; -} - -# method to parse blast file -sub parse_blast { - my($file_in, $opt_blastEvalue, $hash_mRNAGeneLink) = @_; - -################################################################################# -####### Step 1 : CATCH all candidates (the better candidate for each mRNA)####### (with a gene name) - - my %candidates; - - while( my $line = <$file_in>) { - my @values = split(/\t/, $line); - my $l2_name = lc($values[0]); - my $prot_name = $values[1]; - my @prot_name_sliced = split(/\|/, $values[1]); - my $uniprot_id = $prot_name_sliced[1]; - print "uniprot_id: ".$uniprot_id."\n" if($opt_verbose); - my $evalue = $values[10]; - print "Evalue: ".$evalue."\n" if($opt_verbose); - - #if does not exist fill it if over the minimum evalue - if (! exists_keys(\%candidates,($l2_name)) or @{$candidates{$l2_name}}> 3 ){ # the second one means we saved an error message as candidates we still have to try to find a proper one - if( $evalue <= $opt_blastEvalue ) { - my $protID_correct=undef; - if( exists $allIDs{lc($prot_name)}){ - $protID_correct = $allIDs{lc($prot_name)}; - my $header = $db->header( $protID_correct ); - - if ($header =~ m/GN=/){ - if($header =~ /PE=([1-5])\s/){ - if($1 <= $opt_pe){ - $candidates{$l2_name}=[$header, $evalue, $uniprot_id]; - } - } - else{$ostreamLog->print("No Protein Existence (PE) information in this header: $header\n")if($opt_verbose or $opt_output); } - } - else{ - $ostreamLog->print( "No gene name (GN=) in this header $header\n") if($opt_verbose or $opt_output); - $candidates{$l2_name}=["error", $evalue, $prot_name."-".$l2_name]; - } - } - else{ - $ostreamLog->print( "ERROR $prot_name not found among the db! You probably didn't give to me the same fasta file than the one used for the blast. (l2=$l2_name)\n" ) if($opt_verbose or $opt_output); - $candidates{$l2_name}=["error", $evalue, $prot_name."-".$l2_name]; - } - } - } - elsif( $evalue < $candidates{$l2_name}[1] ) { # better evalue for this record - my $protID_correct=undef; - if( exists $allIDs{lc($prot_name)}){ - $protID_correct = $allIDs{lc($prot_name)}; - my $header = $db->header( $protID_correct ); - - if ($header =~ m/GN=/){ - if($header =~ /PE=([1-5])\s/){ - if($1 <= $opt_pe){ - $candidates{$l2_name}=[$header, $evalue, $uniprot_id]; - } - } - else{ $ostreamLog->print( "No Protein Existence (PE) information in this header: $header\n") if($opt_verbose or $opt_output); } - } - else{ $ostreamLog->print("No gene name (GN=) in this header $header\n") if($opt_verbose or $opt_output); } - } - else{ $ostreamLog->print( "ERROR $prot_name not found among the db! You probably didn't give to me the same fasta file than the one used for the blast. (l2=$l2_name)\n") if($opt_verbose or $opt_output);} - } - } - - my $nb_desc = keys %candidates; - $ostreamLog->print( "We have $nb_desc description candidates.\n") if($opt_verbose or $opt_output); - -################################################## -####### Step 2 : go through all candidates ####### report gene name for each mRNA - - my %geneName; - my %linkBmRNAandGene; - - foreach my $l2 (keys %candidates){ - if( $candidates{$l2}[0] eq "error" ){ - $ostreamLog->print( "error nothing found for $candidates{$l2}[2]\n") if($opt_verbose or $opt_output); next; - } - - #Save uniprot id of the best match - print "save for $l2 ".$candidates{$l2}[2]."\n" if($opt_verbose); - $mRNAUniprotIDFromBlast{$l2} = $candidates{$l2}[2]; - print "save for $l2 ".$candidates{$l2}[2]."\n"; - my $header = $candidates{$l2}[0]; - print "header: ".$header."\n" if($opt_verbose); - - if ($header =~ m/(^[^\s]+)\s(.+?(?= \w{2}=))(.+)/){ - my $protID = $1; - my $description = $2; - my $theRest = $3; - $theRest =~ s/\n//g; - $theRest =~ s/\r//g; - my $nameGene = undef; - push ( @{ $mRNAproduct{$l2} }, $description ); - - #deal with the rest - my %hash_rest; - my $tuple=undef; - while ($theRest){ - ($theRest, $tuple) = stringCatcher($theRest); - my ($type,$value) = split /=/,$tuple; - #print "$protID: type:$type --- value:$value\n"; - $hash_rest{lc($type)}=$value; - } - - if(exists($hash_rest{"gn"})){ - $nameGene=$hash_rest{"gn"}; - - if(exists_keys ($hash_mRNAGeneLink,($l2)) ){ - my $geneID = $hash_mRNAGeneLink->{$l2}; - #print "push $geneID $nameGene\n"; - push ( @{ $geneName{lc($geneID)} }, lc($nameGene) ); - push( @{ $linkBmRNAandGene{lc($geneID)}}, lc($l2)); # save mRNA name for each gene name - } - else{ $ostreamLog->print( "No parent found for $l2 (defined in the blast file) in hash_mRNAGeneLink (created by the gff file).\n") if($opt_verbose or $opt_output); } - } - else{ $ostreamLog->print( "Header from the db fasta file doesn't match the regular expression: $header\n") if($opt_verbose or $opt_output); } - } - } - - #################################################### - ####### Step 3 : Manage NAME final gene name ####### several isoforms could have different gene name reported. So we have to keep that information in some way to report only one STRING to gene name attribute of the gene feature. - ################# Remove redundancy to have only one name for each gene - - manageGeneNameBlast(\%geneName); - - - ########################################################## - ####### Step 4 : CLEAN NAMES REDUNDANCY inter gene ####### - - my %geneNewNameUsed; - foreach my $geneID (keys %geneName){ - $nbGeneNameInBlast++; - - my @mRNAList=@{$linkBmRNAandGene{$geneID}}; - my $String = $geneName{$geneID}; - # print "$String\n"; - if (! exists( $geneNewNameUsed{$String})){ - $geneNewNameUsed{$String}++; - $geneNameBlast{$geneID}=$String; - # link name to mRNA and and isoform name _1 _2 _3 if several mRNA - my $cptmRNA=1; - if ($#mRNAList != 0) { - foreach my $mRNA (@mRNAList){ - $mRNANameBlast{$mRNA}=$String."_iso".$cptmRNA; - $cptmRNA++; - } - } - else{$mRNANameBlast{$mRNAList[0]}=$String;} - } - else{ #in case where name was already used, we will modified it by addind a number like "_2" - $nbDuplicateName++; - $geneNewNameUsed{$String}++; - my $nbFound=$geneNewNameUsed{$String}; - $String.="_$nbFound"; - $geneNewNameUsed{$String}++; - $geneNameBlast{$geneID}=$String; - # link name to mRNA and and isoform name _1 _2 _3 if several mRNA - my $cptmRNA=1; - if ($#mRNAList != 0) { - foreach my $mRNA (@mRNAList){ - $mRNANameBlast{$mRNA}=$String."_iso".$cptmRNA; - $cptmRNA++; - } - } - else{$mRNANameBlast{$mRNAList[0]}=$String;} - } - } -} - -#uniprotHeader string spliter -sub stringCatcher{ - my($String) = @_; - my $newString=undef; - - if ( $String =~ m/(\w{2}=.+?(?= \w{2}=))(.+)/ ) { - $newString = substr $String, length($1)+1; - return ($newString, $1); - } - else{ return (undef, $String); } -} - -# method to parse Interpro file -sub parse_interpro_tsv { - my($file_in,$fileName) = @_; - print( "Reading features from $fileName...\n"); - - while( my $line = <$file_in>) { - - my @values = split(/\t/, $line); - my $sizeList = @values; - my $mRNAID=lc($values[0]); - - #Check for the specific DB - my $db_name=$values[3]; - my $db_value=$values[4]; - my $db_tuple=$db_name.":".$db_value; - print "Specific dB: ".$db_tuple."\n" if($opt_verbose); - - if (! grep( /^\Q$db_tuple\E$/, @{$functionData{$db_name}{$mRNAID}} ) ) { #to avoid duplicate - $TotalTerm{$db_name}++; - push ( @{$functionData{$db_name}{$mRNAID}} , $db_tuple ); - if ( exists $hash_mRNAGeneLink->{$mRNAID}){ ## check if exists among our current gff annotation file analyzed - $mRNAAssociatedToTerm{$db_name}{$mRNAID}++; - $GeneAssociatedToTerm{$db_name}{$hash_mRNAGeneLink->{$mRNAID}}++; - } - } - - #check for interpro - if( $sizeList>11 ){ - my $db_name="InterPro"; - my $interpro_value=$values[11]; - $interpro_value=~ s/\n//g; - my $interpro_tuple = "InterPro:".$interpro_value; - print "interpro dB: ".$interpro_tuple."\n" if($opt_verbose); - - if (! grep( /^\Q$interpro_tuple\E$/, @{$functionData{$db_name}{$mRNAID}} ) ) { #to avoid duplicate - $TotalTerm{$db_name}++; - push ( @{$functionData{$db_name}{$mRNAID}} , $interpro_tuple ); - if ( exists $hash_mRNAGeneLink->{$mRNAID}){ ## check if exists among our current gff annotation file analyzed - $mRNAAssociatedToTerm{$db_name}{$mRNAID}++; - $GeneAssociatedToTerm{$db_name}{$hash_mRNAGeneLink->{$mRNAID}}++; - } - } - } - - #check for GO - if( $sizeList>13 ){ - my $db_name="GO"; - my $go_flat_list = $values[13]; - $go_flat_list=~ s/\n//g; - my @go_list = split(/\|/,$go_flat_list); #cut at character | - foreach my $go_tuple (@go_list){ - print "GO term: ".$go_tuple."\n" if($opt_verbose); - - if (! grep( /^\Q$go_tuple\E$/, @{$functionData{$db_name}{$mRNAID}} ) ) { #to avoid duplicate - $TotalTerm{$db_name}++; - push ( @{$functionData{$db_name}{$mRNAID}} , $go_tuple ); - if ( exists $hash_mRNAGeneLink->{$mRNAID}){ ## check if exists among our current gff annotation file analyzed - $mRNAAssociatedToTerm{$db_name}{$mRNAID}++; - $GeneAssociatedToTerm{$db_name}{$hash_mRNAGeneLink->{$mRNAID}}++; - } - } - } - } - - #check for pathway - if( $sizeList>14 ){ - my $pathway_flat_list = $values[14]; - $pathway_flat_list=~ s/\n//g; - $pathway_flat_list=~ s/ //g; - my @pathway_list = split(/\|/,$pathway_flat_list); #cut at character | - foreach my $pathway_tuple (@pathway_list){ - my @tuple = split(/:/,$pathway_tuple); #cut at character : - my $db_name = $tuple[0]; - print "pathway info: ".$pathway_tuple."\n" if($opt_verbose); - - if (! grep( /^\Q$pathway_tuple\E$/, @{$functionData{$db_name}{$mRNAID}} ) ) { # to avoid duplicate - $TotalTerm{$db_name}++; - push ( @{$functionData{$db_name}{$mRNAID}} , $pathway_tuple ); - if ( exists $hash_mRNAGeneLink->{$mRNAID}){ ## check if exists among our current gff annotation file analyzed - $mRNAAssociatedToTerm{$db_name}{$mRNAID}++; - $GeneAssociatedToTerm{$db_name}{$hash_mRNAGeneLink->{$mRNAID}}++; - } - } - } - } - } -} - -sub sizedPrint{ - my ($term,$size) = @_; - my $result; - my $sizeTerm = ($term) ? length($term) : 0; - if ($sizeTerm > $size ){ - $result=substr($term, 0,$size); - return $result; - } - else{ - my $nbBlanc=$size-$sizeTerm; - $result=$term; - for (my $i = 0; $i < $nbBlanc; $i++){ - $result.=" "; - } - return $result; - } -} - -#Sorting mixed strings => Sorting alphabetically first, then numerically -# how to use: my @y = sort by_number @x; -sub alphaNum { - my ( $alet , $anum ) = $a =~ /([^\d]+)(\d+)/; - my ( $blet , $bnum ) = $b =~ /([^\d]+)(\d+)/; - ( $alet || "a" ) cmp ( $blet || "a" ) or ( $anum || 0 ) <=> ( $bnum || 0 ) -} - -__END__ - -=head1 NAME - -gff3_sp_manage_functional_annotation.pl - -The script take a gff3 file as input and blast and/or interpro output in order to attach functional annotation to corresponding features within the gff file. ->The blast against Protein Database (outfmt 6) allows to fill the field/attribute NAME for gene and PRODUCT for mRNA. ->The Interpro result (.tsv) file allows to fill the DBXREF field/attribute with pfam, tigr, interpro, GO, KEGG, etc... terms data. -With the option the script will change all the ID field by an Uniq ID created from the given prefix, a letter to specify the kind of feature (G,T,C,E,U), and the feature number. -The result is written to the specified output file, or to STDOUT. - -About the TSV format from interproscan: -======================================= - -The TSV format presents the match data in columns as follows: - -1.Protein Accession (e.g. P51587) -2.Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579) -3.Sequence Length (e.g. 3418) -4.Analysis (e.g. Pfam / PRINTS / Gene3D) -5.Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140) -6.Signature Description (e.g. BRCA2 repeat profile) -7.Start location -8.Stop location -9.Score - is the e-value (or score) of the match reported by member database method (e.g. 3.1E-52) -10.Status - is the status of the match (T: true) -11.Date - is the date of the run -12.(InterPro annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprlookup option is switched on) -13.(InterPro annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprlookup option is switched on) -14.(GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on) -15.(Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on) - -P.S: The 9th column contains most of time e-value, but can contain also score (e.g Prosite). To understand the difference: https://myhits.isb-sib.ch/cgi-bin/help?doc=scores.html - -About the outfmt 6 from blast: -============================== - - 1. qseqid query (e.g., gene) sequence id - 2. sseqid subject (e.g., reference genome) sequence id - 3. pident percentage of identical matches - 4. length alignment length - 5. mismatch number of mismatches - 6. gapopen number of gap openings - 7. qstart start of alignment in query - 8. qend end of alignment in query - 9. sstart start of alignment in subject - 10. send end of alignment in subject - 11. evalue expect value - 12. bitscore bit score - -Currently the best e-value win... That means another hit with a lower e-value ( but still over the defined threshold anyway) even if it has a better PE value - will not be reported. - -=head1 SYNOPSIS - - ./gff3_sp_manage_functional_annotation.pl -f=infile.gff [ -b blast_infile --db uniprot.fasta -i interpro_infile.tsv --id ABCDEF --output outfile ] - ./gff3_sp_manage_functional_annotation.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-f>, B<--reffile>,B<-ref> , B<--gff> or B<--gff3> - -Input GFF3 file that will be read (and sorted) - -=item B<-b> or B<--blast> - -Input blast ( outfmt 6 = tabular )file that will be used to complement the features read from -the first file (specified with B<--ref>). - -=item B<-d> or B<--db> - -The fasta file that has been used as DB for the blast. Gene names and products/descriptions will be fished from this file. - -=item B<--be> or B<--blast_evalue> - - Maximum e-value to keep the annotation from the blast file. By default 1e-6. - -=item B<--pe> - -The PE (protein existence) in the uniprot header indicates the type of evidence that supports the existence of the protein. -You can decide until which protein existence level you want to consider to lift the finctional information. Default 5. - -1. Experimental evidence at protein level -2. Experimental evidence at transcript level -3. Protein inferred from homology -4. Protein predicted -5. Protein uncertain - -=item B<-i> or B<--interpro> - -Input interpro file (.tsv) that will be used to complement the features read from -the first file (specified with B<--ref>). - -=item B<-id> - -This option will changed the id name. It will create from id prefix (usually 6 letters) given as input, uniq IDs like prefixE00000000001. Where E mean exon. Instead E we can have C for CDS, G for gene, T for mRNA, U for Utr. -In the case of discontinuous features (i.e. a single feature that exists over multiple genomic locations) the same ID may appear on multiple lines. All lines that share an ID collectively represent a signle feature. - -=item B<-idau> - -This option (id all uniq) is similar to -id option but Id of features that share an ID collectively will be change by different and uniq ID. - -=item B<-nb> - -Usefull only if -id is used. -This option is used to define the number that will be used to begin the numbering. By default begin by 1. - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-v> - -Verbose (bolean). For debug purpose. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_manage_introns.pl b/annotation/Tools/Util/gff/gff3_sp_manage_introns.pl deleted file mode 100755 index f481e4ead..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_manage_introns.pl +++ /dev/null @@ -1,369 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use POSIX qw(strftime); -use File::Basename; -use Carp; -use Getopt::Long; -use IO::File; -use Pod::Usage; -use Statistics::R; -use Bio::Tools::GFF; -use NBIS::CheckModule qw(:Ok); -use NBIS::Plot::R qw(:Ok); -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - - -my @opt_files; -my $opt_output=undef; -my $opt_breaks; -my $Xpercent=1; -my $opt_help = 0; - -my @copyARGV=@ARGV; -if ( !GetOptions( 'f|gff|ref|reffile=s' => \@opt_files, - 'o|out|output=s' => \$opt_output, - 'w|window|b|break|breaks=i' => \$opt_breaks, - 'x|p=f' => \$Xpercent, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} -if ( ! ( $#opt_files >= 0) ) { - pod2usage( { - -message => "$header\nMust specify at least 1 parameters:\nReference data gff3 file (--gff)\n", - -verbose => 0, - -exitval => 1 } ); -} - -# ####################### -# # START Manage Option # -# ####################### - -if (defined($opt_output) ) { - if (-d $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); - } - else{ - mkdir $opt_output; - } -} - -my $ostreamReport; -if (defined($opt_output) ) { - $ostreamReport=IO::File->new(">".$opt_output."/report.txt" ) or croak( sprintf( "Can not open '%s' for writing %s", $opt_output."/report.txt", $! )); -} -else{ - $ostreamReport = \*STDOUT or die ( sprintf( "Can not open '%s' for writing %s", "STDOUT", $! )); -} -my $string1 = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; -$string1 .= "\n\nusage: $0 @copyARGV\n\n"; - -print $ostreamReport $string1; -if($opt_output){print $string1;} - - -############################# -####### Manage R option ##### -############################# - -#Choose breaks value: -if(! $opt_breaks){ - $opt_breaks="1000"; -} - -############################# -####### Manage output ####### -############################# -my $outputPDF_prefix; -if (defined($opt_output) ) { - if (-f $opt_output){ - print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); - } - $outputPDF_prefix=$opt_output."/intronPlot_"; -} -else{ - $outputPDF_prefix="intronPlot_"; -} - -# Check R is available. If not we try to load it through Module software -if ( system("R --version 1>/dev/null 2>/dev/null") == 0 ) { - print "R is available. We can continue\n"; -} -else { - print "R is not loaded. We try to load it.\n"; - if(module_software_installed){ - module_load("R"); - } - else{ - print "Module tool doesn't exists. We cannot load R through it."; - } -} - -# ##################################### -# # END Manage OPTION -# ##################################### - - - - ####################### - # MAIN # -# >>>>>>>>>>>>>>>>>>>>>>>>> ####################### <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -#PART 1 -################################### -# Read input gff3 files one by one and save value in hash of list - -my %introns; -foreach my $file (@opt_files){ - - print "Reading ".$file,"\n"; - - # parse file name te remove extension - my ($file1,$dir1,$ext1) = fileparse($file, qr/\.[^.]*/); - - ###################### - ### Parse GFF input # - my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $file - }); - print("Parsing Finished\n\n"); - ### END Parse GFF input # - ######################### - - #print statistics - my ($stat, $distri) = gff3_statistics($hash_omniscient); - - #print statistics - foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - print $ostreamReport "$info"; - } - print $ostreamReport "\n"; - } - - - ###################### - ### Parse GFF input # - # get nb of each feature in omniscient; - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ - foreach my $id_l1 (keys %{$hash_omniscient->{'level2'}{$tag_l2}}){ - my $one_f2 = $hash_omniscient->{'level2'}{$tag_l2}{$id_l1}[0]; - - ####################### - #get feature1 and info - my $feature_l1=undef; - my $tag_l1; - foreach my $tag_level1 (keys %{$hash_omniscient->{'level1'}}){ - if (exists ($hash_omniscient->{'level1'}{$tag_level1}{$id_l1})){ - $feature_l1=$hash_omniscient->{'level1'}{$tag_level1}{$id_l1}; - $tag_l1=$tag_level1; - last; - } - } - if(! $feature_l1){print "Problem ! We didnt retrieve the level1 feature with id $id_l1\n";exit;} - - ##### - # get all level2 - my $All_l2_single=1; - my $counterL2_match=-1; - foreach my $feature_l2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}} ){ - - #MATCH CASE - We ahve to count the L2 match features - if($tag_l2 =~ "match"){ - my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}; - my $indexLastL2 = $#{$hash_omniscient->{'level2'}{$tag_l2}{$id_l1}}; - $counterL2_match++; - - if($counterL2_match > 0 and $counterL2_match <= $indexLastL2){ - my $intronSize = $sortedList[$counterL2_match]->start - $sortedList[$counterL2_match-1]->end; - push @{$introns{$tag_l2}}, $intronSize; - } - } - - ###### - #get all level3 - my $id_l2=lc($feature_l2->_tag_value('ID')); - - foreach my $tag_l3 ( keys %{$hash_omniscient->{'level3'}} ){ - - if(exists_keys($hash_omniscient,('level3',$tag_l3, $id_l2))){ - - my $counterL3=-1; - #Initialize intron to 0 to avoid error during printing results - my $indexLast = $#{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}; - - my @sortedList = sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}; - - foreach my $feature_l3 ( @sortedList ){ - - #count number feature of tag_l3 type - $counterL3++; - - ################ - #Manage Introns# - # from the second intron to the last (from index 1 to last index of the table sortedList) - # We go inside this loop only if we have more than 1 feature. - if($counterL3 > 0 and $counterL3 <= $indexLast){ - my $intronSize = $sortedList[$counterL3]->start - $sortedList[$counterL3-1]->end; - push @{$introns{$tag_l3}}, $intronSize; - } - }# END FOREACH L3 - } - } - } - } - } -} - -# PART 2 - -foreach my $tag (keys %introns){ - ############################### - my $biggest_value=0; - my $pathIntron="tmp_intron_".$tag.".txt"; - my @sorted_intron = (sort { $a <=> $b } @{$introns{$tag}}); - ######################### - # Write value in tmp files - - # Manage Output - my $ostreamAED = IO::File->new(); - $ostreamAED->open( $pathIntron, 'w' ) or - croak( - sprintf( "Can not open '%s' for writing %s", $pathIntron, $! ) - ); - foreach my $value ( @sorted_intron ){ - print $ostreamAED "$value\n"; - if($value > $biggest_value){ - $biggest_value=$value; - } - } - $ostreamAED->close(); - - - # Part 3 - ######################################### - #Calcul longest after remove X percent # - my $lastIndex = $#sorted_intron; - my $nbValueToRemove = int(($Xpercent*($lastIndex+1))/100); - my $resu = $sorted_intron[$lastIndex-$nbValueToRemove]; - - my $stringPrint = "Introns in feature $tag: Removing $Xpercent percent of the highest values ($nbValueToRemove values) gives you $resu bp as the longest intron in $tag. It's a good choice for MAKER ;-) \n"; - - print $ostreamReport $stringPrint; - if($opt_output){print $stringPrint;} - - - - # Part 4 - ######### - # PLOT # - #chose output file name - my $outputPDF=$outputPDF_prefix.$tag.".pdf"; - #Choose a main title: - my $title="Intron distribution in $tag"; - #choose x title - my $xlab="size bp"; - - ## check using R - my $R = Statistics::R->new() or die "Problem with R : $!\n"; - - #calculate the breaks - my $breaks_ok=int($biggest_value/$opt_breaks); - - #R command - $R->run(qq` - - listValues1=as.matrix(read.table("$pathIntron", sep="\t", he=F)) - pdf("$outputPDF") - hist1<-hist(listValues1,breaks=$breaks_ok,main="$title", xlab="$xlab") - plot(hist1\$mids,hist1\$counts) - mylims <- par("usr") - # Add Title second plot - title(main="$title")` - ); - - # Close the bridge - $R->stopR(); -} - - -# remove temporary files -unlink $pathIntron; - - ######################### - ######### END ########### - ######################### - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - - - -__END__ - - -=head1 NAME - -gff3_sp_manage_introns.pl - This script give some information about introns (longest, shortest size mean ...) using the statistic method, -then plot all the intron size values to get an overview of the introns size distribution. -It gives you as well the value of the longest intron after removing X percent(s) of the longest (removing potential biais / false positive). - -=head1 SYNOPSIS - - ./gff3_sp_manage_introns.pl --gff=infile --out=outFile - ./gff3_sp_manage_introns.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<-f>, B<--ref> or B<-reffile> - -Input GFF3 file correponding to gene build. You can use several input files by doing: -f file1 -f file2 -f file3 - -=item B<-w>, B<--window>, B<--break>, B<--breaks> or B<-b> - -It the number of break used within the histogram plot. By default it's 1000. You can modify the value to get something more or less precise. - -=item B<-x>, B<--p> - -Allows to modify the X values to calculate the percentage of the longest introns to remove. By default the value is 1 (We remove 1 percent of the longest). - -=item B<--out>, B<--output> or B<-o> - -Output gff3 file where the gene incriminated will be write. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_merge_annotations.pl b/annotation/Tools/Util/gff/gff3_sp_merge_annotations.pl deleted file mode 100755 index 8906f444c..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_merge_annotations.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Getopt::Long; -use Pod::Usage; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $outfile = undef; -my @opt_files; -my $file2 = undef; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - "gff|f=s" => \@opt_files, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! @opt_files or (@opt_files and ($#opt_files < 1) ) ){ - pod2usage( { - -message => "\nAt least 2 files are mandatory:\n --gff file1 --gff file2\n\n", - -verbose => 0, - -exitval => 2 } ); -} - -###################### -# Manage output file # -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; -open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - - ##################### - # MAIN # - ##################### - - -###################### -### Parse GFF input # - -my $file1 = shift @opt_files; -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $file1 - }); -print ("$file1 GFF3 file parsed\n"); -info_omniscient($hash_omniscient); - -#Add the features of the other file in the first omniscient. It takes care of name to not have duplicates -foreach my $next_file (@opt_files){ - my ($hash_omniscient2, $hash_mRNAGeneLink2) = slurp_gff3_file_JD({ input => $next_file - }); - print ("$next_file GFF3 file parsed\n"); - info_omniscient($hash_omniscient2); - - #merge annotation taking care of Uniq name. Does not look if mRNA are identic or so one, it will be handle later. - merge_omniscients($hash_omniscient, $hash_omniscient2); - print ("\n$next_file added we now have:\n"); - info_omniscient($hash_omniscient); -} - -# Now all the feature are in the same omniscient -# We have to check the omniscient to merge overlaping genes together and remove the identical ones -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $hash_omniscient, - merge_loci => 1 - }); -print ("\nfinal result:\n"); -info_omniscient($hash_omniscient); - -######## -# Print results -print_omniscient($hash_omniscient, $gffout); - -__END__ - -=head1 NAME - -gff3_sp_merge_annotations.pl - -This script merge different gff annotation files in gff format in one. It uses the NBIS GXF HANDLER that takes care of duplicated names and fixes other oddities met in those files. - -=head1 SYNOPSIS - - ./gff3_sp_merge_annotations.pl --gff=infile1 --gff=infile2 --out=outFile - ./gff3_sp_merge_annotations.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file(s). You can specify as much file you want like so: -f file1 -f file2 -f file3 - -=item B<--out>, B<--output> or B<-o> - -Output gff3 file where the gene incriminated will be write. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_splitByLevel2Feature.pl b/annotation/Tools/Util/gff/gff3_sp_splitByLevel2Feature.pl deleted file mode 100755 index f845400d9..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_splitByLevel2Feature.pl +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $opt_gfffile; -my $opt_output; -my $opt_help = 0; - -# OPTION MANAGMENT -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'o|output=s' => \$opt_output, - - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($opt_help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); -} - -if (! defined($opt_gfffile) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (-g).\n\n". - "Ouptut is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # - -my $gffout; -if ($opt_output) { - $opt_output=~ s/.gff//g; - } -else{ - print "Default output name: split_result\n"; - $opt_output="split_result"; -} - -if (-d $opt_output){ - print "The output directory choosen already exists. Please give me another Name.\n";exit(); -} -mkdir $opt_output; - - ##################### - # MAIN # - ##################### - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_gfffile - }); -print ("GFF3 file parsed\n"); - - -my %handlers; -my $gffout; -################# -# == LEVEL 1 == # -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc... - foreach my $key_l1 (keys %{$hash_omniscient->{'level1'}{$tag_l1}}){ - - ################# - # == LEVEL 2 == # - my $level1_printed=undef; - foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists ($hash_omniscient->{'level2'}{$tag_l2}{$key_l1} ) ){ - foreach my $feature_level2 ( @{$hash_omniscient->{'level2'}{$tag_l2}{$key_l1}}) { - #manage handler - if(! exists ( $handlers{$tag_l2} ) ) { - open(my $fh, '>', $opt_output."/".$tag_l2.".gff") or die "Could not open file '$tag_l2' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - $handlers{$tag_l2}=$gffout; - } - $gffout = $handlers{$tag_l2}; - - ################# - # == LEVEL 1 == # - if(! $level1_printed){ - $gffout->write_feature($hash_omniscient->{'level1'}{$tag_l1}{$key_l1}); # print feature - $level1_printed=1; - } - - ################# - # == LEVEL 2 == # - $gffout->write_feature($feature_level2); - - ################# - # == LEVEL 3 == # - my $level2_ID = lc($feature_level2->_tag_value('ID')); - - ########### - # Before tss - if ( exists_keys($hash_omniscient,('level3','tss',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'tss'}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - - ###### - # FIRST EXON - if ( exists_keys($hash_omniscient,('level3','exon',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - ########### - # SECOND CDS - if ( exists_keys($hash_omniscient,('level3','cds',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - - ########### - # Last tts - if ( exists_keys($hash_omniscient,('level3','tts',$level2_ID)) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{'tts'}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - - ########### - # The rest - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if( ($primary_tag_key_level3 ne 'cds') and ($primary_tag_key_level3 ne 'exon') and ($primary_tag_key_level3 ne 'tss') and ($primary_tag_key_level3 ne 'tts')){ - if ( exists ($hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID} ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - $gffout->write_feature($feature_level3); - } - } - } - } - } - } - } - } -} - - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -__END__ - -=head1 NAME - -gff3_fix_cds_frame.pl - -The script will split the gff input file into different files according to the different Level2 feature that it contains. - -=head1 SYNOPSIS - - ./gff3_sp_splitByLevel2Feature.pl -g infile.gff [ -o outfolder ] - ./gff3_sp_splitByLevel2Feature.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-o> or B<--output> - -Output folder. If no output folder provided, the default name will be . - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_statistics.pl b/annotation/Tools/Util/gff/gff3_sp_statistics.pl deleted file mode 100755 index 19a64af43..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_statistics.pl +++ /dev/null @@ -1,283 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Pod::Usage; -use Statistics::R; -use IO::File; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; -use NBIS::CheckModule qw(:Ok); -use NBIS::Plot::R qw(:Ok); - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $gff = undef; -my $opt_output = undef; -my $opt_genomeSize = undef; -my $opt_plot = undef; -my $help= 0; - -if ( !GetOptions( - "help|h" => \$help, - 'o|output=s' => \$opt_output, - 'd|p' => \$opt_plot, - 'g|gs=s' => \$opt_genomeSize, - "gff|f=s" => \$gff)) - -{ - pod2usage( { -message => "Failed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -#### IN / OUT -my $out = IO::File->new(); -if ($opt_output) { - - if (-f $opt_output){ - print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); - } - if (-d $opt_output){ - print "The output directory choosen already exists. Please geve me another Name.\n";exit(); - } - - open($out, '>', $opt_output) or die "Could not open file '$opt_output' $!"; - } -else{ - $out->fdopen( fileno(STDOUT), 'w' ); -} - -#Manage plot folder output -if($opt_plot){ - if ($opt_output){ - $opt_plot = $opt_output."_distribution_plots"; - } - else{ - $opt_plot = "distribution_plots"; - - if (-f $opt_plot){ - print "Cannot create a directory with the name $opt_plot because a file with this name already exists.\n";exit(); - } - if (-d $opt_plot){ - print "The default output directory $opt_plot use to save the distribution plots already exists. Please give me another folder name.\n";exit(); - } - } - - # Check R is available. If not we try to load it through Module software - if ( system("R --version 1>/dev/null 2>/dev/null") == 0 ) { - print "R is available. We can continue\n"; - } - else { - print "R is not loaded. We try to load it.\n"; - if(module_software_installed){ - module_load("R"); - } - else{ - print "Module tool doesn't exists. We cannot load R through it."; - } - } -} - - ##################### - # MAIN # - ##################### - - - -###################### -### Parse GFF input # -print "Reading file $gff\n"; -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ - input => $gff, - verbose => 1 - }); -print "Parsing Finished\n"; -### END Parse GFF input # -######################### - -#check number of level1 -my $nbLevel1 = 0; -foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ - $nbLevel1 += keys %{$hash_omniscient->{'level1'}{$tag_l1}}; -} - -#chech number of level2 -my $nbLevel2 = keys %$hash_mRNAGeneLink; - -############## -# STATISTICS # -my $stat; -my $distri; -if($opt_genomeSize){ - ($stat, $distri) = gff3_statistics($hash_omniscient, $opt_genomeSize); -} -else{ - ($stat, $distri) = gff3_statistics($hash_omniscient); -} - -#print statistics -foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - print $out "$info"; - } - print $out "\n"; -} - -#Check if we have isoforms -if($nbLevel1 != $nbLevel2){ - - #print distribution before removing isoforms - if($opt_plot){ - print_distribution($opt_plot, "with_isoforms", $distri); - } - - print $out "\nApparently we have isoforms : Number of level1 features: $nbLevel1 / Number of level2 features: $nbLevel2\n"; - print $out "We will proceed to the statistics analysis using only the mRNA with the longest cds\n"; - - #create list of level2 where we kept only level2 that have cds and only the longest isoform ! - my $list_id_l2 = get_longest_cds_level2($hash_omniscient); - - # create a new omniscient with only one mRNA isoform per gene - my $omniscientNew = create_omniscient_from_idlevel2list($hash_omniscient, $hash_mRNAGeneLink, $list_id_l2); - - # print stats - my $stat; - my $distri; - if($opt_genomeSize){ - ($stat, $distri) = gff3_statistics($omniscientNew, $opt_genomeSize); - }else{ - ($stat, $distri) = gff3_statistics($omniscientNew); - } - - #print statistics - foreach my $infoList (@$stat){ - foreach my $info (@$infoList){ - print $out "$info"; - } - print $out "\n"; - } - - #print distribution after having removed the isoforms - if($opt_plot){ - print_distribution($opt_plot, "without_isoforms", $distri); - } -} -else{ #No isoforms - if($opt_plot){ - print_distribution($opt_plot, "without_isoforms", $distri); - } -} - -# END STATISTICS # -################## -print "Bye Bye.\n"; -####################################################################################################################### - #################### - # METHODS # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub print_distribution{ - my ($folder, $subfolder, $distri)=@_; - - foreach my $type (keys %{$distri} ) { - - foreach my $level (keys %{$distri->{$type}} ) { - foreach my $tag ( keys %{$distri->{$type}{$level}} ) { - if( exists_keys ($distri,($type, $level, $tag, 'whole') ) ){ - - if(! -d $folder){ - mkdir $folder; - } - - if(! -d $folder."/".$subfolder){ - mkdir $folder."/".$subfolder; - } - - my $outputPDF = $folder."/".$subfolder."/".$type."Class_".$tag.".pdf"; - - #CREATE THE R COMMAND - my $nbValues = @{$distri->{$type}{$level}{$tag}{'whole'}}; - my $R_command = rcc_plot_from_list($distri->{$type}{$level}{$tag}{'whole'}, undef, "histogram", "$tag"." size (nt)", "Number of $tag", "Distribution of $tag sizes\nMade with $nbValues $tag"."s", $outputPDF); - #EXECUTE THE R COMMAND - execute_R_command($R_command); - } - - if( exists_keys ($distri,($type, $level, $tag, 'piece') ) ){ - } - - } - } - } -} - -__END__ - -=head1 NAME - -gff3_statistics.pl - -The script take a gff3 file as input. - -The script give basic statistics of a gff file. -Remark: identical feature from level1 or level2 with identical ID will be merged as well as their subsequent features (Level2 or level3). - -=head1 SYNOPSIS - - ./gff3_statistics.pl --gff file.gff [ -o outfile ] - ./gff3_statistics.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<--gs> or B<-g> - -This option inform about the genome size in oder to compute more statistics. You can give the size in Nucleotide or directly the fasta file. - -=item B<-d> or B<-p> - -When this option is used, an histogram of distribution of the features will be printed in pdf files. (d means distribution, p means plot). - -=item B<--output> or B<-o> - -File where will be written the result. If no output file is specified, the output will be written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sp_webApollo_compliant.pl b/annotation/Tools/Util/gff/gff3_sp_webApollo_compliant.pl deleted file mode 100755 index 1ea5d0e1f..000000000 --- a/annotation/Tools/Util/gff/gff3_sp_webApollo_compliant.pl +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env perl - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - - -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use NBIS::GFF3::Omniscient; -use Bio::Tools::GFF; - -my $start_run = time(); -my $opt_gfffile; -my $opt_output; -my $opt_help = 0; - -# OPTION MANAGMENT -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'o|output=s' => \$opt_output, - - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2 } ); -} - -if (! defined($opt_gfffile) ){ - pod2usage( { - -message => "\nAt least 1 parameter is mandatory:\nInput reference gff file (-g).\n\n". - "Ouptut is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 1 } ); -} - -###################### -# Manage output file # - -my $gffout; -if ($opt_output) { - $opt_output=~ s/.gff//g; - open(my $fh, '>', $opt_output.".gff") or die "Could not open file '$opt_output' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - } -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - ##################### - # MAIN # - ##################### - -###################### -### Parse GFF input # -my ($hash_omniscient, $hash_mRNAGeneLink) = slurp_gff3_file_JD({ input => $opt_gfffile - }); -print ("GFF3 file parsed\n"); - -######## -# Transform thing needed for webapollo. -webapollo_compliant($hash_omniscient); - -############# -# Print result -print_omniscient($hash_omniscient, $gffout); #print gene modified - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; -__END__ - -=head1 NAME - -gff3_webApollo_compliant.pl - -This script aim to remove useless/problematic information for webapollo, change some feeaturee type to avoid problem whem loading them into webapollo, and optimize some attribute for a nice displaying. -=head1 SYNOPSIS - - ./gff3_webApollo_compliant.pl -g infile.gff [ -o outfile ] - ./gff3_webApollo_compliant.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-g>, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_add_hash_tag.pl b/annotation/Tools/Util/gff/gff3_sq_add_hash_tag.pl deleted file mode 100755 index ebf6b4245..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_add_hash_tag.pl +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $inputFile=undef; -my $outfile=undef; -my $opt_help = 0; -my $interval=1; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('file|input|gff=s' => \$inputFile, - 'i|interval=i' => \$interval, - 'o|output=s' => \$outfile, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); -} - -if ((!defined($inputFile)) ){ - pod2usage( { -message => "$header\nAt least 1 parameter is mandatory: -i", - -verbose => 0, - -exitval => 2 } ); -} - -if (( $interval > 2 or $interval < 1) ){ - pod2usage( { -message => 'interval must be 1 or 2. Have a look to the help to know more', - -verbose => 1, - -exitval => 1 } ); -} - -my $ostream = IO::File->new(); - -# Manage input fasta file -my $ref_in = Bio::Tools::GFF->new(-file => $inputFile, -gff_version => 3); - -# Manage Output -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} -my $gffXtra=$gffout->{"_filehandle"}; #to add extra lines to gff!! - -#time to calcul progression -my $startP=time; -my $nbLine=`wc -l < $inputFile`; -$nbLine =~ s/ //g; -chomp $nbLine; -print "$nbLine line to process...\n"; - -my $line_cpt=0; -my $count=0; -my $nextGroup=0; -my @bucket=(); -my $before=""; -my $actual=""; -while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - - #What do we follow - - if ($interval eq "1"){ #per sequence - - $actual=lc($feature->seq_id); - if( ($actual ne $before) and ($actual ne "" and $before ne "") ) { - _write_bucket(\@bucket, $gffout); - $count++; - $nextGroup=0; - @bucket=(); - } - push (@bucket,$feature); - $before=lc($feature->seq_id); - } - - - if ($interval eq "2"){ #per feature group - $actual=lc($feature->primary_tag); - if ( ($actual ne $before) and ($before ne "") and ($actual eq "gene" or $actual eq "expressed_sequence_match" or $actual eq "match") ) { - _write_bucket(\@bucket, $gffout); - $count++; - $nextGroup=0; - @bucket=(); - } - push (@bucket,$feature); - $before=lc($feature->primary_tag); - } - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgression : $done % processed.\n"; - $startP= time; - } -} - -##Last round - _write_bucket(\@bucket, $gffout); -$count++; - -if($count > 0){ - print "$count line added !\n"; -} -else{print "No line added !\n";} -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - - -sub _write_bucket{ - my($bucket, $gffout)=@_; - foreach my $feature (@$bucket){ - $gffout->write_feature($feature); - } - - # Get the filehandle - print $gffXtra "###\n"; -} - -__END__ - -=head1 NAME - -gff3_remove_redundant_entries.pl - -remove redundant entries: same seq_id,source_tag,start,stop. - -=head1 SYNOPSIS - - gff3_remove_redundant_entries.pl -i [-o ] - gff3_remove_redundant_entries.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--file> or B<--input> - -STRING: Input gff file that will be read. - -=item B<-i> or B<--interval> - -Integer: 1 or 2. 1 will add ### after each new sequence (column1 of the gff), while 2 will add the ### after each group of feature (gene). -By default the value is 1. - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_add_locus_tag.pl b/annotation/Tools/Util/gff/gff3_sq_add_locus_tag.pl deleted file mode 100755 index 9585e8e09..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_add_locus_tag.pl +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient; - -my $start_run = time(); -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $inputFile=undef; -my $outfile=undef; -my $outformat=undef; -my $primaryTag=undef; -my $opt_help = 0; -my $locus_tag="locus"; -my $quiet = undef; -my $locus_cpt=1; -my $tag_in=undef; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('file|input|gff=s' => \$inputFile, - 'to|lo=s' => \$locus_tag, - 'ti|li=s' => \$tag_in, - "p|type|l=s" => \$primaryTag, - 'o|output=s' => \$outfile, - 'of=i' => \$outformat, - 'q|quiet!' => \$quiet, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ((!defined($inputFile)) ){ - pod2usage( { -message => "$header\nAt least 1 parameter is mandatory: -i", - -verbose => 0, - -exitval => 1 } ); -} - -my $ostream = IO::File->new(); - -# Manage input fasta file -my $format = select_gff_format($inputFile); -my $ref_in = Bio::Tools::GFF->new(-file => $inputFile, -gff_version => $format); - -# Manage Output -if(! $outformat){ - $outformat=$format; -} - -# Manage Output -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => $outformat ); - -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => $outformat ); -} - -#define the locus tag -if(! $locus_tag){ - $locus_tag="locus_tag"; -} - -# Manage $primaryTag -my @ptagList; -my ($LEVEL1, $LEVEL2, $LEVEL3) = load_levels(); -if(! $primaryTag){ - print "We will work on attributes from all Level1 features.\n"; - push(@ptagList, "all"); -} -else{ - @ptagList= split(/,/, $primaryTag); - foreach my $tag (@ptagList){ - if (exists($LEVEL1->{lc($tag)}) ){ - print "We will work on attributes from <$tag> feature.\n"; - } - else{ - print "<$tag> feature is not a level1 feature. Current accepted value are:\n"; - foreach my $key ( keys %{$LEVEL1}){ - print $key." "; - } - print "\n"; exit; - } - } -} - -#time to calcul progression -my $startP=time; -my $nbLine=`wc -l < $inputFile`; -$nbLine =~ s/ //g; -chomp $nbLine; -print "$nbLine line to process...\n"; - -my $line_cpt=0; -my $locus=undef; -while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - - my $ptag = lc($feature->primary_tag()); - - if ( exists($LEVEL1->{ $ptag }) ){ - - # initialize locus_tag - if ( grep( /^$ptag/, @ptagList ) or grep( /^all/, @ptagList ) ) { - - # if locus_tag has to be the value of an existing attribute. - if( $tag_in){ - if( $feature->has_tag($tag_in)){ - $locus = $feature->_tag_value($tag_in); - } - else{ - print "No attribute $tag_in for the following feature:\n".$feature->gff_string()."\n" if (! $quiet); - $locus = $locus_tag.$locus_cpt;$locus_cpt++; - print "We will use the created locus_tag value: $locus instead to name the locus!\n" if (! $quiet); - } - } - else{ - $locus = $locus_tag.$locus_cpt;$locus_cpt++; - } - } - else{ - $locus=undef; - } - # if level1 and part of those to provide locus_tag - if($locus){ - create_or_replace_tag($feature,$locus_tag, $locus); - } - } - else{ - # if not level 1 and we have to spread locus_tag to sub feature. - if($locus){ - create_or_replace_tag($feature,$locus_tag, $locus); - } - } - - $gffout->write_feature($feature); - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgression : $done % processed.\n"; - $startP= time; - } -} - - -##Last round -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -__END__ - -=head1 NAME - -gff3_add_locus_tag.pl - -Add a shared locus tag per record. A record is all features linked by each other -by parent/children relationship (e.g Gene,mRNA,exon, CDS). - - -=head1 SYNOPSIS - - gff3_add_locus_tag.pl --gff [-o ] - gff3_add_locus_tag.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--file> or B<--input> - -STRING: Input gff file that will be read. - -=item B<-p>, B<--type> or B<-l> - -Primary tag option, case insensitive, list. Allow to specied the Level1 feature types that will be handled. -By default all feature Level1 are taken into account. - -=item B<--lo> or B<--to> - -Locus tag output, by defaut it will be called locus_tag, but using this option you can specied the name of this attribute. - -=item B<--li> or B<--ti> - -Tag input, by default the value of the locus tag attribute will be locusX where X is an incremented number. -You can use the values of an existing attribute instead e.g the ID value: --li ID. - -=item B<--of> - -Output format, if no ouput format is given, the same as the input one detected will be used. -Otherwise you can force to have a gff version 1 or 2 or 3 by giving the corresponding number. - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<-q> or B<--quiet> - -To remove verbosity. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_create_stable_id.pl b/annotation/Tools/Util/gff/gff3_sq_create_stable_id.pl deleted file mode 100755 index 38a4c6038..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_create_stable_id.pl +++ /dev/null @@ -1,229 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Getopt::Long; -use Bio::Tools::GFF; - - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--gff filename] - The name of the gff file to read. - - [--clear_names] - Flag to remove the 'Name' attribute from gene and mRNA features - [--id_trunk ] - 6-character base for all IDs (e.g. HOMSAP for Homo sapiens) - [--clean ] - Set to anything to remove Maker quality scores from GFF file - [--ccount ] - Set the cdna counter to something other than 0 - [--gcount ] - Set the gene counter to something other than 0 - [--exount ] - Set the exon counter to something other than 0 - [--tcount ] - Set the transcript counter to something other than 0 - - Ouput: - [--outfile filename] - The name of the output file. -}; - -my $outfile = undef; -my $gff = undef; -my $clear_names; - -my $gcount = undef; -my $tcount = undef; -my $ecount = undef; -my $ccount = undef; - -my $clean = undef; -my $id_trunk = undef; -my $current_gene = undef; -my %transcript_hash ; -my %lookup ; - -my $help; - -GetOptions( - "help" => \$help, - "gff=s" => \$gff, - "clear_names" => \$clear_names, - "gcount=i" => \$gcount, - "tcount=i" => \$tcount, - "ecount=i" => \$ecount, - "ccount=i" => \$ccount, - "clean=s" => \$clean, - "id_trunk=s" => \$id_trunk, - "outfile=s" => \$outfile); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -if ($outfile) { - open(STDOUT, ">$outfile") or die("Cannot open $outfile"); -} - -### Create ID trunk - -my $gcounter = $gcount || 100000; -my $tcounter = $tcount || 100000; -my $ecounter = $ecount || 100000; -my $ccounter = $ccount || 100000; -my $ucounter = 100000; - -### Parse GFF input file and add annotations - -open(GFF, "<$gff") || die("Can't open $gff."); - -my $gffio = Bio::Tools::GFF->new(-file => $gff, -gff_version => 3); -my $gffout = Bio::Tools::GFF->new(-gff_version => 3); - -while( my $feature = $gffio->next_feature()) { - - my $old_id = ($feature->get_tag_values('ID'))[0]; - - if ($feature->has_tag('Name') and defined ($clear_names) ) { - $feature->remove_tag('Name'); - } - - my @parents = undef; - unless ($feature->primary_tag =~/gene/) { - @parents = $feature->get_tag_values('Parent'); - } - - if ($feature->primary_tag =~ /gene/) { - $gcounter += 1; - my $new_id = $id_trunk . "G" . $gcounter ; - $current_gene = $new_id; - $feature->remove_tag('ID'); - $feature->add_tag_value('ID',$new_id); - - $feature->add_tag_value("oId",$old_id); - - } elsif ($feature->primary_tag =~ /mRNA/ or $feature->primary_tag =~ /RNA/) { - $tcounter += 1; - my $new_id = $id_trunk . "T" . $tcounter ; - - $transcript_hash{$old_id} = $new_id; - - $feature->remove_tag('Parent'); - $feature->add_tag_value('Parent',$current_gene); - - $feature->remove_tag('ID'); - $feature->add_tag_value('ID',$new_id); - - $feature->add_tag_value('oId',$old_id); - - if (defined $clean) { - $feature->remove_tag('_AED') if ($feature->has_tag('_AED')); - $feature->remove_tag('_eAED') if ($feature->has_tag('_eAED')); - $feature->remove_tag('_QI') if ($feature->has_tag('_QI')); - } - - - - } elsif ($feature->primary_tag =~ /exon/) { - $ecounter += 1; - my $new_id = $id_trunk . "E" . $ecounter ; - - # Parents can be > 1, so we need to look up all new IDs - my @new_parents = (); - - foreach my $parent (@parents) { - - my $this_mapped_id = $transcript_hash{$parent} ; - push(@new_parents,$this_mapped_id); - - } - - $feature->remove_tag('Parent'); - $feature->add_tag_value('Parent',@new_parents); - - $feature->remove_tag('ID'); - $feature->add_tag_value('ID',$new_id); - - } elsif ($feature->primary_tag =~ /CDS/ ) { - - # CDS features are one-id-multiple-location features, so only one ID. - # $ccounter += 1; - # my $new_id = $id_trunk . "C" . $ccounter ; - - my $new_id = "" ; - - if (exists $lookup{$old_id}) { - $new_id = $lookup{$old_id}; - } else { - $ccounter += 1; - $new_id = $id_trunk . "C" . $ccounter ; - $lookup{$old_id} = $new_id; - } - - my $parent = shift @parents; - - my $current_transcript = $transcript_hash{$parent}; - - $feature->remove_tag('Parent'); - $feature->add_tag_value('Parent',$current_transcript); - - $feature->remove_tag('ID'); - $feature->add_tag_value('ID',$new_id); - - } elsif ($feature->primary_tag =~ /utr/ or $feature->primary_tag =~ /UTR/ ) { - - my $new_id = ""; - - if (exists $lookup{$old_id}) { - $new_id = $lookup{$old_id}; - } else { - $ucounter += 1; - $new_id = $id_trunk . "U" . $ucounter ; - $lookup{$old_id} = $new_id; - } - - my $parent = shift @parents; - - my $current_transcript = $transcript_hash{$parent}; - - $feature->remove_tag('ID'); - $feature->add_tag_value('ID',$new_id); - - - $feature->remove_tag('Parent'); - $feature->add_tag_value('Parent',$current_transcript); - - $feature->add_tag_value('OId',$old_id); - - } elsif ($feature->primary_tag =~ /stop_codon_read_through/ ) { - - my $parent = shift @parents; - my $parent_id = $lookup{$parent}; - - $feature->remove_tag('Parent'); - $feature->add_tag_value('Parent',$parent_id); - } elsif ($feature->primary_tag =~ /non_canonical/ ) { - - my $parent = shift @parents ; - - my $parent_id = $transcript_hash{$parent} ; - - $feature->remove_tag('Parent'); - $feature->add_tag_value('Parent',$parent_id); - - } - - - print $feature->gff_string($gffout) , "\n"; -} - -$gffio->close(); diff --git a/annotation/Tools/Util/gff/gff3_sq_keep_annotation_from_fastaSeq.pl b/annotation/Tools/Util/gff/gff3_sq_keep_annotation_from_fastaSeq.pl deleted file mode 100755 index e7ca7c212..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_keep_annotation_from_fastaSeq.pl +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use Bio::DB::Fasta; -use IO::File ; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient qw(exists_keys); - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my $opt_gfffile=undef; -my $verbose=undef; -my $opt_fastafile=undef; -my $outfile=undef; -my $opt_help = 0; - - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('file|input|gff=s' => \$opt_gfffile, - 'f|fasta=s' => \$opt_fastafile, - 'o|output=s' => \$outfile, - 'v|verbose!' => \$verbose, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => "$header\nFailed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); -} - -if ((!defined($opt_gfffile)) ){ - pod2usage( { -message => 'at least 2 parameters are mandatory', - -verbose => 0, - -exitval => 2 } ); -} - -my $ostream = IO::File->new(); - -# Manage input fasta file -my $ref_in = Bio::Tools::GFF->new(-file => $opt_gfffile, -gff_version => 3); - -# Manage Output -my $gffout; -if ($outfile) { - open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - -#### read fasta -my $nbFastaSeq=0; -my $db = Bio::DB::Fasta->new($opt_fastafile); -print ("Genome fasta parsed\n"); - -#time to calcul progression -my $startP=time; -my $cpt_removed=0; -my %seqNameSeen; -my $cpt_kept=0; -while (my $feature = $ref_in->next_feature() ) { - - if($db->seq($feature->seq_id)){ - $gffout->write_feature($feature); - # to count number of sequence with annotation - if(! exists_keys(\%seqNameSeen, ($feature->seq_id))){ - $seqNameSeen{$feature->seq_id}++; - } - $cpt_kept++; - } - else{ - print "SequenceID ".$feature->seq_id." is absent from the fasta file\n" if($verbose); - $cpt_removed++; - } -} - -print "We removed $cpt_removed annotations.\n"; -my $nbSeqWithAnnotation = scalar keys %seqNameSeen; -print "We kept $cpt_kept annotations that are linked to $nbSeqWithAnnotation sequences.\n"; -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -__END__ - -=head1 NAME - -gff3_sq_keep_annotation_from_fastaSeq.pl - -This script is a kind of annotation filter by sequence name. It goes through the gff annotation features and remove those that are not linked to a sequence from the fasta file provided. -The match between sequence name in the fasta file and the 1st column of the gff3 file is case sensitive. - -=head1 SYNOPSIS - - gff3_sq_keep_annotation_from_fastaSeq.pl --gff --fasta [-o ] - gff3_sq_keep_annotation_from_fastaSeq.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--file> or B<--input> - -STRING: Input gff file. - -=item B<-f> or B<--fasta> - -STRING: fasta file. - -=item B<-v> or B<--verbose> - -For verbosity - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_list_attributes.pl b/annotation/Tools/Util/gff/gff3_sq_list_attributes.pl deleted file mode 100755 index 86de44f73..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_list_attributes.pl +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Clone 'clone'; -use Getopt::Long; -use Pod::Usage; -use IO::File; -use List::MoreUtils qw(uniq); -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient qw(select_gff_format exists_keys); - -my $start_run = time(); -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my %handlers; -my $gff = undef; -my $help= 0; -my $primaryTag=undef; -my $outfile=undef; - -if ( !GetOptions( - "help|h" => \$help, - "gff|f=s" => \$gff, - "p|t|l=s" => \$primaryTag, - "output|outfile|out|o=s" => \$outfile)) - -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! (defined($gff)) ){ - pod2usage( { - -message => "$header\nAt least 1 parameter is mandatory:\nInput reference gff file (--gff) \n\n", - -verbose => 0, - -exitval => 2 } ); -} - -# Manage $primaryTag -my @ptagList; -if(! $primaryTag or $primaryTag eq "all"){ - print "We will work on attributes from all features\n"; - push(@ptagList, "all"); -} -else{ - @ptagList= split(/,/, $primaryTag); - foreach my $tag (@ptagList){ - print "We will work on attributes from $tag feature.\n"; - } -} - -# Manage input fasta file -my $format = select_gff_format($gff); -my $ref_in = Bio::Tools::GFF->new(-file => $gff, -gff_version => $format); - - - ##################### - # MAIN # - ##################### - -my %all_attributes; -my %attributes_per_level; -###################### -### Parse GFF input # - -#time to calcul progression -my $startP=time; -my $nbLine=`wc -l < $gff`; -$nbLine =~ s/ //g; -chomp $nbLine; -print "$nbLine line to process...\n"; - -my $geneName=undef; -my $line_cpt=0; -while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - - - manage_attributes($feature, \@ptagList, \%all_attributes, \%attributes_per_level); - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgression : $done % processed.\n"; - $startP= time; - } -} - -#print "We added $nbNameAdded Name attributes\n"; -my $out = IO::File->new(); -if ($outfile) { - open($out, '>', $outfile) or die "Could not open file $outfile.txt $!"; -} -else{ - $out->fdopen( fileno(STDOUT), 'w' ); -} - -# Print information by feature -my $nbFeat = scalar keys %attributes_per_level; -print $out "\nWe met ".$nbFeat." different feature types."; -foreach my $feature_type ( sort keys %attributes_per_level){ - my $nbAtt = scalar keys %{$attributes_per_level{$feature_type}}; - print $out "\nHere the list of all the attributes tags met for the feature type <".$feature_type."> (".$nbAtt." attributes):\n"; - foreach my $attribute ( sort keys %{$attributes_per_level{$feature_type}} ){ - print $out $attribute."\n"; - } -} - -# Print Global information -my $nbAtt = scalar keys %all_attributes; -print $out "\nHere the list of all the attributes tags met (".$nbAtt." attributes):\n"; -foreach my $attribute ( sort keys %all_attributes){ - print $out $attribute."\n"; -} - - -##Last round -my $end_run = time(); -my $run_time = $end_run - $start_run; -print $out "\nJob done in $run_time seconds\n"; - -####################################################################################################################### - #################### - # methods # - ################ - ############## - ############ - ########## - ######## - ###### - #### - ## - -sub manage_attributes{ - my ($feature, $ptagList, $all_attributes, $attributes_per_level)=@_; - - my $primary_tag=$feature->primary_tag; - - # check primary tag (feature type) to handle - foreach my $ptag (@$ptagList){ - - if($ptag eq "all"){ - tag_from_list($feature,$all_attributes, $attributes_per_level); - } - elsif(lc($ptag) eq lc($primary_tag) ){ - tag_from_list($feature,$all_attributes, $attributes_per_level); - } - } -} - -sub tag_from_list{ - my ($feature, $all_attributes, $attributes_per_level)=@_; - - foreach my $tag ($feature->get_all_tags) { - # create handler if needed (on the fly) - if(! exists_keys( $all_attributes,($tag) ) ) { - $all_attributes{$tag}++; - } - if(! exists_keys ( $attributes_per_level,($feature->primary_tag,$tag) ) ) { - $attributes_per_level{$feature->primary_tag}{$tag}++; - } - - } -} - - -__END__ - - -=head1 NAME - -gff3_sp_list_attributes.pl - -The script take a gff3 file as input. - -The script give information about attribute tags used within you file. - -=head1 SYNOPSIS - - ./gff3_sp_list_attributes.pl -gff file.gff -p level2,cds,exon [ -o outfile ] - ./gff3_sp_list_attributes.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff> or B<-f> - -Input GFF3 file that will be read (and sorted) - -=item B<-p>, B<-t> or B<-l> - -primary tag option, case insensitive, list. Allow to specied the feature types that will be handled. -You can specified a specific feature by given its primary tag name (column 3) as: cds, Gene, MrNa -You can specify directly all the feature of a particular level: - level2=mRNA,ncRNA,tRNA,etc - level3=CDS,exon,UTR,etc -By default all feature are taking in account. fill the option by the value "all" will have the same behaviour. - -=item B<-o> , B<--output> , B<--out> or B<--outfile> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_manage_ID.pl b/annotation/Tools/Util/gff/gff3_sq_manage_ID.pl deleted file mode 100755 index 47a7b537c..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_manage_ID.pl +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env perl - -################################################### -# Jacques Dainat 01/2016 # -# Bioinformatics Infrastructure for Life Sciences # -# jacques.dainat@nbis.se # -################################################### - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient qw(select_gff_format create_or_replace_tag exists_keys); - -my $start_run = time(); -my $inputFile=undef; -my $outfile=undef; -my $outformat=undef; -my $opt_help = 0; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('file|input|gff|i=s' => \$inputFile, - 'of=i' => \$outformat, - 'o|output=s' => \$outfile, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); -} - -if ((!defined($inputFile)) ){ - pod2usage( { -message => 'at least 1 parameter is mandatory: -i', - -verbose => 1, - -exitval => 1 } ); -} - - -# Manage input fasta file -my $format = select_gff_format($inputFile); -my $ref_in = Bio::Tools::GFF->new(-file => $inputFile, -gff_version => $format); - - -# Manage Output -if(! $outformat){ - $outformat=$format; -} - -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => $outformat ); - -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => $outformat); -} - -#time to calcul progression -my $startP=time; -my $nbLine=`wc -l < $inputFile`; -$nbLine =~ s/ //g; -chomp $nbLine; -print "$nbLine line to process...\n"; - -my $line_cpt=0; -my %hash_IDs; -my %featCount; -my %mapID; -while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - - _uniq_ID ($feature, \%hash_IDs, \%featCount, \%mapID); - - if($feature->has_tag('Parent')){ - my $parent = lc($feature->_tag_value('Parent')); - if(! exists($mapID{$parent})){ - print "How is it possible ? This parent hasn't been seen before\n"; - } - create_or_replace_tag($feature,'Parent', $mapID{$parent}); - } - - $gffout->write_feature($feature); - - ##################### - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgression : $done % processed.\n"; - $startP= time; - } -} - -##Last round -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - - - -sub _uniq_ID{ - my ($feature, $hash_IDs, $miscCount, $mapID) = @_; - - - my $key=lc($feature->primary_tag); - $miscCount->{$key}++; - my $id = $key."-".$miscCount->{$key}; - - while( exists_keys($hash_IDs, ($id) ) ){ #loop until we found an uniq tag - $miscCount->{$key}++; - $id = $key."-".$miscCount->{$key}; - } - - #push the new ID - $hash_IDs->{$id}++; - $mapID->{lc($feature->_tag_value('ID'))} = $id; - - # modify the feature ID with the correct one chosen - create_or_replace_tag($feature,'ID', $id); #modify ID to replace by parent value -} - -__END__ - -=head1 NAME - -gff3_sq_manage_ID.pl - -change IDs to give uniq one. This script is sequential, it means it will works great even if 2 different group of feature an parent that have the same ID. At the end they will have different IDs. - -=head1 SYNOPSIS - - gff3_sq_manage_ID.pl --gff [-o ] - gff3_sq_manage_ID.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--file> or B<--input> - -STRING: Input gff(1 or 2 or 3) or gtf file that will be read. - -=item B<--of> - -Output format, if no ouput format is given, the same as the input one detected will be used. Otherwise you can force to have a gff version 1 or 2 or 3 by giving the corresponding number. - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_manage_ID_cufflinks.pl b/annotation/Tools/Util/gff/gff3_sq_manage_ID_cufflinks.pl deleted file mode 100755 index bf5fe9780..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_manage_ID_cufflinks.pl +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env perl - -################################################### -# Jacques Dainat 01/2016 # -# Bioinformatics Infrastructure for Life Sciences # -# jacques.dainat@nbis.se # -################################################### - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::Tools::GFF; -use NBIS::GFF3::Omniscient qw(select_gff_format create_or_replace_tag); - -my $start_run = time(); - -my $inputFile=undef; -my $outfile=undef; -my $outformat=undef; -my $opt_help = 0; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('file|input|gff|i=s' => \$inputFile, - 'of=i' => \$outformat, - 'o|output=s' => \$outfile, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); -} - -if ((!defined($inputFile)) ){ - pod2usage( { -message => 'at least 1 parameter is mandatory: -i', - -verbose => 1, - -exitval => 1 } ); -} - - -# Manage input fasta file -my $format = select_gff_format($inputFile); -my $ref_in = Bio::Tools::GFF->new(-file => $inputFile, -gff_version => $format); - - -# Manage Output -if(! $outformat){ - $outformat=$format; -} - -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; - open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => $outformat ); - -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => $outformat); -} - -#time to calcul progression -my $startP=time; -my $nbLine=`wc -l < $inputFile`; -$nbLine =~ s/ //g; -chomp $nbLine; -print "$nbLine line to process...\n"; - -my $line_cpt=0; -my $previousGeneID=""; -my $gene_id=0; -my $previousTranscriptID=""; -my $transcript_id=0; -while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - - if($feature->has_tag('gene_id')){ - my $parent = lc($feature->_tag_value('gene_id')); - if($parent ne $previousGeneID){ - $gene_id++; - $previousGeneID=$parent; - } - create_or_replace_tag($feature,'gene_id', $gene_id); - } - if($feature->has_tag('transcript_id')){ - my $parent = lc($feature->_tag_value('transcript_id')); - if($parent ne $previousTranscriptID){ - $transcript_id++; - $previousTranscriptID=$parent; - } - create_or_replace_tag($feature,'transcript_id', $gene_id); - } - - $gffout->write_feature($feature); - - ##################### - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgression : $done % processed.\n"; - $startP= time; - } -} - -##Last round -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -__END__ - -=head1 NAME - -gff3_sq_manage_ID.pl - -change IDs to give uniq one. This script is sequential, it means it will works great even if 2 different group of feature an parent that have the same ID. At the end they will have different IDs. - -=head1 SYNOPSIS - - gff3_sq_manage_ID.pl --gff [-o ] - gff3_sq_manage_ID.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--file> or B<--input> - -STRING: Input gff(1 or 2 or 3) or gtf file that will be read. - -=item B<--of> - -Output format, if no ouput format is given, the same as the input one detected will be used. Otherwise you can force to have a gff version 1 or 2 or 3 by giving the corresponding number. - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_mask.pl b/annotation/Tools/Util/gff/gff3_sq_mask.pl deleted file mode 100755 index 71aded448..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_mask.pl +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env perl - -#### -# Jacques Dainat 2015/03 -# jacques.dainat@nbis.se -#### -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use Bio::SeqIO ; -use IO::File ; -use Bio::Tools::GFF; - -my $start_run = time(); -my $opt_HardMask; -my $opt_SoftMask; -my $opt_gfffile; -my $opt_fastafile; -my $opt_output; -my $opt_help = 0; - -# Character for hardMask -my $hardMaskChar; -my $width = 60; # line length printed - -# OPTION MANAGMENT -if ( !GetOptions( 'g|gff=s' => \$opt_gfffile, - 'f|fa|fasta=s' => \$opt_fastafile, - 'hm:s' => \$opt_HardMask, - 'sm' => \$opt_SoftMask, - 'o|output=s' => \$opt_output, - - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); -} - -if ( (! (defined($opt_gfffile)) ) || (! (defined($opt_fastafile)) ) || ( (! defined($opt_HardMask) && (! defined($opt_SoftMask))) ) ){ - pod2usage( { - -message => "\nAt least 3 parametes are mandatory:\nInput reference gff file (-g); Input reference fasta file (-f); Mask type (-hm for hard mask or -sm for soft mask)\n\n". - "Ouptut is optional. Look at the help documentation to know more.\n", - -verbose => 0, - -exitval => 2 } ); -} - -if (defined ($opt_HardMask) && defined ($opt_SoftMask)){ - print "It is not possible to HardMask and SoftMask at the same time. Choose only one the options and try again !\n"; exit(); -} - -my $ostream = IO::File->new(); -if (defined($opt_output) ) { - $ostream->open( $opt_output, 'w' ) -} -else{ - $ostream->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - -if (defined( $opt_HardMask)){ - print "You choose to Hard Mask the genome.\n"; - if (! $opt_HardMask){ - $hardMaskChar = "n"; - } - elsif(length($opt_HardMask) == 1){ - $hardMaskChar = $opt_HardMask; - } - else{print "$opt_HardMask cannot be used to Mask. A character is mandatory.\n";exit;} - print "Charcater uses for Mask: $hardMaskChar\n"; -} -if (defined( $opt_HardMask)){ - print "You choose to Soft Mask the genome.\n"; -} -##### MAIN #### - -#### read gff file and save info in memory -my %gff; my $nbLineRead=0; - -# Manage input fasta file -my $gff_in = Bio::Tools::GFF->new(-file => $opt_gfffile, -gff_version => 3); - - -print( "Reading features from $opt_gfffile...\n"); - while (my $feature = $gff_in->next_feature()) { - my $seqname=$feature->seq_id(); - my $start=$feature->start(); - my $end=$feature->end(); - push @{$gff{uc $seqname}},"$start $end"; - $nbLineRead++; - } -close gff_in; -print "$nbLineRead lines read\n"; - -#### read fasta -my $nbFastaSeq=0; -my $nucl_masked=0; -my $inFasta = Bio::SeqIO->new(-file => "$opt_fastafile" , '-format' => 'Fasta'); - -while ($_=$inFasta->next_seq()) { - my $seqname = $_->id; - my $sequence = $_->seq; - - foreach (@{$gff{uc $seqname}}) { - my ($start,$end) = split; - if ($opt_SoftMask){ - my $strinTolo = substr($sequence,$start-1,$end+1-$start); - substr($sequence,$start-1,$end+1-$start) = lc $strinTolo; - } - else{ - substr($sequence,$start-1,$end+1-$start) = $hardMaskChar x ($end+1-$start); - } - $nucl_masked=$nucl_masked+($end-$start+1); - } - - print $ostream ">$seqname\n"; - for (my $i=0;$i, B<--gff> or B<-ref> - -Input GFF3 file that will be read (and sorted) - -=item B<-f> or B<--fasta> - -Input fasta file that will be masked - -=item B<-sm> - -SoftMask option =>Sequences masked will be in lowercase - -=item B<-hm> - -HardMask option => Sequences masked will be replaced by a character. By default the character used is 'n'. But you are allowed to speceify any character of your choice. To use 'z' instead of 'n' type: -hm z - -=item B<-o> or B<--output> - -Output GFF file. If no output file is specified, the output will be -written to STDOUT. - -=item B<-h> or B<--help> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_remove_redundant_entries.pl b/annotation/Tools/Util/gff/gff3_sq_remove_redundant_entries.pl deleted file mode 100755 index b7947887c..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_remove_redundant_entries.pl +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; -my $start_run = time(); -my $inputFile; -my $outfile; -my $opt_help = 0; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('i|file|input|gff=s' => \$inputFile, - 'o|output=s' => \$outfile, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -# Print Help and exit -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ((!defined($inputFile)) ){ - pod2usage( { -message => 'at least 1 parameter is mandatory: -i', - -verbose => 0, - -exitval => 2 } ); -} - -my $ostream = IO::File->new(); - -# Manage input fasta file -my $ref_in = Bio::Tools::GFF->new(-file => $inputFile, -gff_version => 3); - -# Manage Output -my $gffout; -if ($outfile) { - $outfile=~ s/.gff//g; -open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); -} -else{ - $gffout = Bio::Tools::GFF->new(-fh => \*STDOUT, -gff_version => 3); -} - - -#time to calcul progression -my $startP=time; -my $nbLine=`wc -l < $inputFile`; -$nbLine =~ s/ //g; -chomp $nbLine; -print "$nbLine line to process...\n"; -my $line_cpt=0; - -my $count=0; -my %check; # keep track of signature seen - -while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - - my $position=lc($feature->seq_id)."".lc($feature->primary_tag)."".$feature->start()."".$feature->end(); #uniq position - - if(exists ($check{$position} ) ){ - $count++; - next; - } - else{ - $gffout->write_feature($feature); - $check{$position}++; - } - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "Progression : $done % processed.\n"; - $startP= time; - } -} - -if($count > 0){ - print "$count entries removed !\n"; -} -else{print "No entry removed !\n";} -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -__END__ - -=head1 NAME - -gff3_remove_redundant_entries.pl - -remove redundant entries: same seq_id,primary_tag,start,stop. - -=head1 SYNOPSIS - - gff3_remove_redundant_entries.pl -i [-o ] - gff3_remove_redundant_entries.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-i>, B<--gff>, B<--file> or B<--input> - -STRING: Input gff file that will be read. - - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_repeats_analyzer.pl b/annotation/Tools/Util/gff/gff3_sq_repeats_analyzer.pl deleted file mode 100755 index 0a2f4d7d1..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_repeats_analyzer.pl +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/env perl - - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::SeqIO; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my @inputFile; -my $outputFile; -my $genome; -my $opt_help = 0; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('i|file|input|gff=s' => \@inputFile, - 'o|output=s' => \$outputFile, - 'g|genome=s' => \$genome, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if (! @inputFile ){ - pod2usage( { -message => "$header\nAt least 1 input file is mandatory", - -verbose => 0, - -exitval => 1 } ); -} - -my $ostream = IO::File->new(); - -# Manage Output -if(defined($outputFile)) -{ -$ostream->open( $outputFile, 'w' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $outputFile, $! ) ); -} -else{ - $ostream->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - -#check genome size -my $genomeSize=undef; - if($genome){ - if( $genome =~ /^[0-9]+$/){ #check if it's a number - $genomeSize=$genome; - } - elsif($genome){ - my $seqio = Bio::SeqIO->new(-file => $genome, '-format' => 'Fasta'); - while(my $seq = $seqio->next_seq) { - my $string = $seq->seq; - $genomeSize += length($string); - } - } - printf("%-45s%d%s", "Total sequence length", $genomeSize,"\n"); - } - -#time to calcul progression -my $type_count; -my $type_bp; -my %check; #track the repeat already annotated to not. Allow to skip already read repeats - -foreach my $file (@inputFile){ -# Manage input fasta file - print "Reading $file\n"; - my $ref_in = Bio::Tools::GFF->new(-file => $file, -gff_version => 3); - - my $startP=time; - my $nbLine=`wc -l < $file`; - $nbLine =~ s/ //g; - chomp $nbLine; - print "$nbLine line to process...\n"; - my $line_cpt=0; - - local $| = 1; # Or use IO::Handle; STDOUT->autoflush; Use to print progression bar - while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - my $type = lc($feature->primary_tag); - ## repeatMasker or repeatRunner - if (($type eq 'match') or ($type eq 'protein_match')){ - - my $position=$feature->seq_id."".$feature->start()."".$feature->end(); #uniq position - if(exists ($check{$position} ) ){next;} - else{ - - my $nameAtt=$feature->_tag_value('Name'); - my $genus=(split ":", (split /\|/, (split /\s+/,$nameAtt)[0])[-1])[-1]; - $type_count->{$genus}++; - $type_bp->{$genus}+=($feature->end()-$feature->start())+1; - $check{$position}++; - } - } - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgress : $done %"; - $startP= time; - } - } - print "\rProgress : 100 %\n"; -} - -my $totalNumber=0; -my $totalSize=0; - -if(defined($genomeSize)){ -print $ostream "Repeat type\tNumber\tSize total (kb)\tSize mean (bp)\t% of the genome\t/!\\Results are rounding to two decimal places \n"; - foreach my $gnx (sort {$a cmp $b} keys(%$type_count)) { - my $Sitotal=sprintf("%0.2f",($type_bp->{$gnx}/1000)); - my $SizeMean=sprintf("%0.2f",($type_bp->{$gnx}/$type_count->{$gnx})); - my $xGenome=sprintf("%0.2f",($type_bp->{$gnx}/$genomeSize)*100); - print $ostream $gnx,"\t",$type_count->{$gnx},"\t",$Sitotal,"\t",$SizeMean,"\t",$xGenome,"\n"; - - $totalNumber=$totalNumber+$type_count->{$gnx}; - $totalSize=$totalSize+$type_bp->{$gnx}; - - } -} -else{ - print $ostream "Repeat type\tNumber\tSize total (kb)\tSize mean (bp)\t/!\\Results are rounding to two decimal places \n"; - foreach my $gnx (sort {$a cmp $b} keys(%$type_count)) { - my $Sitotal=sprintf("%0.2f",($type_bp->{$gnx}/1000)); - my $SizeMean=sprintf("%0.2f",($type_bp->{$gnx}/$type_count->{$gnx})); - print $ostream $gnx,"\t",$type_count->{$gnx},"\t",$Sitotal,"\t",$SizeMean,"\n"; - - $totalNumber=$totalNumber+$type_count->{$gnx}; - $totalSize=$totalSize+$type_bp->{$gnx}; - - } -} - -my $goodTotalSize=sprintf("%0.2f",($totalSize/1000)); -my $goodTotalSizeMean=sprintf("%0.2f",($totalSize/$totalNumber)); - -if(defined($genomeSize)){ - my $goodxGenome=sprintf("%0.2f",($totalSize/$genomeSize)*100); - print $ostream "Total\t",$totalNumber,"\t",$goodTotalSize,"\t",$goodTotalSizeMean,"\t",$goodxGenome,"\n"; -} -else{ - print $ostream "Total\t",$totalNumber,"\t",$goodTotalSize,"\t",$goodTotalSizeMean,"\n"; -} - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -__END__ - -=head1 NAME - -gffRepeat_analyzer.pl - -The script allows to generate a tabulated format report of repeats annotated from a gff file containing repeats (type must be math or protein_match). - -=head1 SYNOPSIS - - gff3_sq_repeats_analyzer.pl -i [-g -o ] - gff3_sq_repeats_analyzer.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-i>, B<--gff>, B<--file> or B<--input> - -STRING: Input gff file that will be read. Several files can be processed at once: -i file1 -i file2 - -=item B<-g>, B<--genome> - -That input is design to know the genome size in order to calculate the percentage of the genome represented by each kind of repeats. -You can provide an INTEGER or the genome in fasta format. If you provide the fasta, the genome size will be calculated on the fly. - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_rfam_analyzer.pl b/annotation/Tools/Util/gff/gff3_sq_rfam_analyzer.pl deleted file mode 100755 index 561eef1a8..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_rfam_analyzer.pl +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env perl - -use Carp; -use strict; -use warnings; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::SeqIO; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my @inputFile; -my $outputFile; -my $genome; -my $opt_help = 0; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('i|file|input|gff=s' => \@inputFile, - 'o|output=s' => \$outputFile, - 'g|genome=s' => \$genome, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => "$header\nFailed to parse command line", - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); -} - -if (! @inputFile ){ - pod2usage( { -message => 'at least 1 input file is mandatory', - -verbose => 0, - -exitval => 2 } ); -} - -# Manage Output -my $ostream = IO::File->new(); -if(defined($outputFile)) -{ -$ostream->open( $outputFile, 'w' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $outputFile, $! ) ); -} -else{ - $ostream->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - -#check genome size -my $genomeSize=undef; - if($genome){ - if( $genome =~ /^[0-9]+$/){ #check if it's a number - $genomeSize=$genome; - } - elsif($genome){ - my $seqio = Bio::SeqIO->new(-file => $genome, '-format' => 'Fasta'); - while(my $seq = $seqio->next_seq) { - my $string = $seq->seq; - $genomeSize += length($string); - } - } - printf("%-45s%d%s", "Total sequence length", $genomeSize,"\n"); - } - -#time to calcul progression -my $type_count; -my $type_bp; -my %check; #track the repeat already annotated to not. Allow to skip already read repeats - -foreach my $file (@inputFile){ -# Manage input fasta file - print "Reading $file\n"; - my $ref_in = Bio::Tools::GFF->new(-file => $file, -gff_version => 3); - - my $startP=time; - my $nbLine=`wc -l < $file`; - $nbLine =~ s/ //g; - chomp $nbLine; - print "$nbLine line to process...\n"; - my $line_cpt=0; - - local $| = 1; # Or use IO::Handle; STDOUT->autoflush; Use to print progression bar - while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - my $type = lc($feature->primary_tag); - ## repeatMasker or repeatRunner - if (($type eq 'ncrna') or ($type eq 'nc_rna')){ - - my $ID = lc($feature->_tag_value('ID')); - my $position=$feature->seq_id."".$feature->start()."".$feature->end(); #uniq position - my $genus=$feature->_tag_value('rfam-id'); - - if(exists ($check{$ID}) ) { - if(! exists($check{$ID}{$position} ) ){ - $type_bp->{$genus}+=($feature->end()-$feature->start())+1; - $check{$ID}{$position}++; - } - } - else{ - $type_count->{$genus}++; - $type_bp->{$genus}+=($feature->end()-$feature->start())+1; - $check{$ID}{$position}++; - } - } - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgress : $done %"; - $startP= time; - } - } - print "\rProgress : 100 %\n"; -} - -my $totalNumber=0; -my $totalSize=0; - -if(defined($genomeSize)){ -print $ostream "ncRNA type\tNumber\tSize total (kb)\tSize mean (bp)\t% of the genome\t/!\\Results are rounding to two decimal places \n"; - foreach my $gnx (sort {$a cmp $b} keys(%$type_count)) { - my $Sitotal=sprintf("%0.2f",($type_bp->{$gnx}/1000)); - my $SizeMean=sprintf("%0.2f",($type_bp->{$gnx}/$type_count->{$gnx})); - my $xGenome=sprintf("%0.2f",($type_bp->{$gnx}/$genomeSize)*100); - print $ostream $gnx,"\t",$type_count->{$gnx},"\t",$Sitotal,"\t",$SizeMean,"\t",$xGenome,"\n"; - - $totalNumber=$totalNumber+$type_count->{$gnx}; - $totalSize=$totalSize+$type_bp->{$gnx}; - - } -} -else{ - print $ostream "ncRNA type\tNumber\tSize total (kb)\tSize mean (bp)\t/!\\Results are rounding to two decimal places \n"; - foreach my $gnx (sort {$a cmp $b} keys(%$type_count)) { - my $Sitotal=sprintf("%0.2f",($type_bp->{$gnx}/1000)); - my $SizeMean=sprintf("%0.2f",($type_bp->{$gnx}/$type_count->{$gnx})); - print $ostream $gnx,"\t",$type_count->{$gnx},"\t",$Sitotal,"\t",$SizeMean,"\n"; - - $totalNumber=$totalNumber+$type_count->{$gnx}; - $totalSize=$totalSize+$type_bp->{$gnx}; - - } -} - -my $goodTotalSize=sprintf("%0.2f",($totalSize/1000)); -my $goodTotalSizeMean=sprintf("%0.2f",($totalSize/$totalNumber)); - -if(defined($genomeSize)){ - my $goodxGenome=sprintf("%0.2f",($totalSize/$genomeSize)*100); - print $ostream "Total\t",$totalNumber,"\t",$goodTotalSize,"\t",$goodTotalSizeMean,"\t",$goodxGenome,"\n"; -} -else{ - print $ostream "Total\t",$totalNumber,"\t",$goodTotalSize,"\t",$goodTotalSizeMean,"\n"; -} - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -__END__ - -=head1 NAME - -gff3_sq_rfam_analyzer.pl - -The script allows to generate a tabulated format report of rfam-id annotated from a gff file containing rfam result (type of the 3rd column must be ncRNA or nc_RNA - not case sensitive. And the 9th column must contain the rfam-id attribute). e.g: - -ScG6Pog_82 Rfam ncRNA 737595 737663 20.7 + 0 ID=RF00134_ScG6Pog_82_737595;Name=RF00134_ScG6Pog_82_737595;evalue=0.45;gc-content=0.28;model_end=1;model_start=1;rfam-acc=RF00134;rfam-id=snoZ196 -ScG6Pog_82 Rfam ncRNA 305023 305103 20.8 + 0 ID=RF00227_ScG6Pog_82_305023;Name=RF00227_ScG6Pog_82_305023;evalue=0.35;gc-content=0.31;model_end=1;model_start=1;rfam-acc=RF00227;rfam-id=FIE3 - -=head1 SYNOPSIS - - gff3_sq_rfam_analyzer.pl -i [-g -o ] - gff3_sq_rfam_analyzer.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-i>, B<--gff>, B<--file> or B<--input> - -STRING: Input gff file that will be read. Several files can be processed at once: -i file1 -i file2 - -=item B<-g>, B<--genome> - -That input is design to know the genome size in order to calculate the percentage of the genome represented by each kind of rfam-id. -You can provide an INTEGER or the genome in fasta format. If you provide the fasta, the genome size will be calculated on the fly. - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_split.pl b/annotation/Tools/Util/gff/gff3_sq_split.pl deleted file mode 100755 index 09d40ef45..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_split.pl +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env perl - -################################################### -# Jacques Dainat 01/2016 # -# National Bioinformatics Infrastructure Sweden # -# jacques.dainat@nbis.se # -################################################### - - -use strict; -use warnings; -use Carp; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::Tools::GFF; - -my $start_run = time(); - -my $inputFile=undef; -my $outfolder=undef; -my $opt_help = 0; -my $interval=10; -my $feature_type="gene"; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('file|input|gff=s' => \$inputFile, - 'ft|feature_type=s' => \$feature_type, - 'i|interval=i' => \$interval, - 'o|output=s' => \$outfolder, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); -} - -if ( !(defined($inputFile)) or !(defined($outfolder)) ){ - pod2usage( { -message => 'at least 2 parameters are mandatory: -i inputFile and -o $outfolder', - -verbose => 1, - -exitval => 1 } ); -} - -# Manage input fasta file -my $ref_in = Bio::Tools::GFF->new(-file => $inputFile, -gff_version => 3); - -# Manage Output -if (-d $outfolder) { - print "The output directory <$outfolder> already exists.\n";exit; -} -else{ - print "Creating the $outfolder folder\n"; - mkdir $outfolder; -} - -print "I will split the file into files containing $interval group of feature. The top feature of the group of feature is currenlty defined by <$feature_type>.\n"; - -#time to calcul progression -my $startP=time; -my $nbLine=`wc -l < $inputFile`; -$nbLine =~ s/ //g; -chomp $nbLine; -print "$nbLine line to process...\n"; -my $line_cpt=0; - -#my $fh=undef; -my $count_feature=0; -my $count_file=1; -my $file_name=$inputFile; -$file_name=~ s/.gff//g; -$file_name=~ s/.gff3//g; -my $gffout; -open(my $fh, '>', $outfolder."/".$file_name."_".$count_file.".gff") or die "Could not open file $file_name.'_'.$count_file.'.gff' $!"; -$gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - -while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - - #What do we follow - if($feature->primary_tag eq $feature_type){ - if($count_feature == $interval){ - close $fh; - $count_file++; - open(my $fh, '>', $outfolder."/".$file_name."_".$count_file.".gff") or die "Could not open file $file_name.'_'.$count_file.'.gff' $!"; - $gffout= Bio::Tools::GFF->new(-fh => $fh, -gff_version => 3 ); - $count_feature=0; - } - $count_feature++; - } - $gffout->write_feature($feature); - #print $fh $feature->gff_string()."\n"; - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgression : $done % processed.\n"; - $startP= time; - } -} -close $fh; - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -__END__ - -=head1 NAME - -gff3_sq_split.pl - -split gff3 file into several files. -By default we create files containing 1000 genes and all sub-features associated. GFF3 input file must be sequential. - -=head1 SYNOPSIS - - gff3_sq_split.pl -i -o - gff3_sq_split.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<--gff>, B<--file> or B<--input> - -STRING: Input gff file that will be read. - -=item B<-i> or B<--interval> -Integer. Number of group of feature to include in each file. 1000 by default. - -=item B<--ft> or B<--feature_type> -The top feature of the feature group. By default "gene". - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/gff/gff3_sq_stat_basic.pl b/annotation/Tools/Util/gff/gff3_sq_stat_basic.pl deleted file mode 100755 index 367bb9221..000000000 --- a/annotation/Tools/Util/gff/gff3_sq_stat_basic.pl +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use Carp; -use warnings; -use Pod::Usage; -use Getopt::Long; -use IO::File ; -use Bio::SeqIO; -use Bio::Tools::GFF; - -my $header = qq{ -######################################################## -# NBIS 2019 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -my $start_run = time(); -my @inputFile; -my $outputFile; -my $genome; -my $opt_help = 0; - -Getopt::Long::Configure ('bundling'); -if ( !GetOptions ('i|file|input|gff=s' => \@inputFile, - 'o|output=s' => \$outputFile, - 'g|genome=s' => \$genome, - 'h|help!' => \$opt_help ) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if (! @inputFile ){ - pod2usage( { -message => "$header\nAt least 1 input file is mandatory", - -verbose => 0, - -exitval => 1 } ); -} - -my $ostream = IO::File->new(); - -# Manage Output -if(defined($outputFile)) -{ -$ostream->open( $outputFile, 'w' ) or - croak( - sprintf( "Can not open '%s' for reading: %s", $outputFile, $! ) ); -} -else{ - $ostream->fdopen( fileno(STDOUT), 'w' ) or - croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); -} - -#check genome size -my $genomeSize=undef; - if($genome){ - if( $genome =~ /^[0-9]+$/){ #check if it's a number - $genomeSize=$genome; - } - elsif($genome){ - my $seqio = Bio::SeqIO->new(-file => $genome, '-format' => 'Fasta'); - while(my $seq = $seqio->next_seq) { - my $string = $seq->seq; - $genomeSize += length($string); - } - } - printf("%-45s%d%s", "Total sequence length", $genomeSize,"\n"); - } - -#time to calcul progression -my $type_count; -my $type_bp; -my %check; #track the repeat already annotated to not. Allow to skip already read repeats - -foreach my $file (@inputFile){ -# Manage input fasta file - print "Reading $file\n"; - my $ref_in = Bio::Tools::GFF->new(-file => $file, -gff_version => 3); - - my $startP=time; - my $nbLine=`wc -l < $file`; - $nbLine =~ s/ //g; - chomp $nbLine; - print "$nbLine line to process...\n"; - my $line_cpt=0; - - local $| = 1; # Or use IO::Handle; STDOUT->autoflush; Use to print progression bar - while (my $feature = $ref_in->next_feature() ) { - $line_cpt++; - my $type = lc($feature->primary_tag); - $type_count->{$type}++; - $type_bp->{$type}+=($feature->end()-$feature->start())+1; - - #Display progression - if ((30 - (time - $startP)) < 0) { - my $done = ($line_cpt*100)/$nbLine; - $done = sprintf ('%.0f', $done); - print "\rProgress : $done %"; - $startP= time; - } - } - print "\rProgress : 100 %\n"; -} - -my $totalNumber=0; -my $totalSize=0; - -if(defined($genomeSize)){ -print $ostream "Type (3rd column)\tNumber\tSize total (kb)\tSize mean (bp)\t% of the genome\t/!\\Results are rounding to two decimal places \n"; - foreach my $gnx (sort {$a cmp $b} keys(%$type_count)) { - my $Sitotal=sprintf("%0.2f",($type_bp->{$gnx}/1000)); - my $SizeMean=sprintf("%0.2f",($type_bp->{$gnx}/$type_count->{$gnx})); - my $xGenome=sprintf("%0.2f",($type_bp->{$gnx}/$genomeSize)*100); - print $ostream $gnx,"\t",$type_count->{$gnx},"\t",$Sitotal,"\t",$SizeMean,"\t",$xGenome,"\n"; - - $totalNumber=$totalNumber+$type_count->{$gnx}; - $totalSize=$totalSize+$type_bp->{$gnx}; - - } -} -else{ - print $ostream "Type (3rd column)\tNumber\tSize total (kb)\tSize mean (bp)\t/!\\Results are rounding to two decimal places \n"; - foreach my $gnx (sort {$a cmp $b} keys(%$type_count)) { - my $Sitotal=sprintf("%0.2f",($type_bp->{$gnx}/1000)); - my $SizeMean=sprintf("%0.2f",($type_bp->{$gnx}/$type_count->{$gnx})); - print $ostream $gnx,"\t",$type_count->{$gnx},"\t",$Sitotal,"\t",$SizeMean,"\n"; - - $totalNumber=$totalNumber+$type_count->{$gnx}; - $totalSize=$totalSize+$type_bp->{$gnx}; - - } -} - -my $goodTotalSize=sprintf("%0.2f",($totalSize/1000)); -my $goodTotalSizeMean=sprintf("%0.2f",($totalSize/$totalNumber)); - -if(defined($genomeSize)){ - my $goodxGenome=sprintf("%0.2f",($totalSize/$genomeSize)*100); - print $ostream "Total\t",$totalNumber,"\t",$goodTotalSize,"\t",$goodTotalSizeMean,"\t",$goodxGenome,"\n"; -} -else{ - print $ostream "Total\t",$totalNumber,"\t",$goodTotalSize,"\t",$goodTotalSizeMean,"\n"; -} - -my $end_run = time(); -my $run_time = $end_run - $start_run; -print "Job done in $run_time seconds\n"; - -__END__ - -=head1 NAME - -gff3_sq_stat_basic.pl - -The script allows to generate a tabulated format report of feature types annotated from a gff file. - -=head1 SYNOPSIS - - gff3_sq_stat_basic.pl -i [-g -o ] - gff3_sq_stat_basic.pl --help - -=head1 OPTIONS - -=over 8 - -=item B<-i>, B<--gff>, B<--file> or B<--input> - -STRING: Input gff file that will be read. Several files can be processed at once: -i file1 -i file2 - -=item B<-g>, B<--genome> - -That input is design to know the genome size in order to calculate the percentage of the genome represented by each kind of feature type. -You can provide an INTEGER or the genome in fasta format. If you provide the fasta, the genome size will be calculated on the fly. - -=item B<-o> or B<--output> - -STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Util/rfam_scan-11.0.pl b/annotation/Tools/Util/rfam_scan-11.0.pl deleted file mode 100644 index 9655dac32..000000000 --- a/annotation/Tools/Util/rfam_scan-11.0.pl +++ /dev/null @@ -1,698 +0,0 @@ -#!/usr/bin/env perl - -use warnings; -use strict; -use Getopt::Long; -use File::Copy; - -#use Bio::SearchIO; -use Bio::SeqIO; -#use Bio::FeatureIO; -use Bio::SeqFeature::Generic; -use Bio::Tools::GFF; -use IO::File; - -my( $local, - $global, - $blastdb, - $thresh, - $evalueThresh, - $noclean, - $help, - $outfile, - $nobig, - @exclude, - $verbose, - ); - -my $VERSION = '1.0'; -my $blastcut = 0.01; -my $outputfmt = 'gff'; -my $filter = 'ncbi'; -my $starttime = `date`; -chomp $starttime; -my $cmdline = $0.' '.join( ' ', @ARGV ); - -&GetOptions( "local" => \$local, - "global" => \$global, - "blastdb=s" => \$blastdb, - "t=s" => \$thresh, - "e=s" => \$evalueThresh, - "bt=s" => \$blastcut, - "f=s" => \$outputfmt, - "h" => \$help, - "nobig" => \$nobig, - "exclude=s@" => \@exclude, - "noclean" => \$noclean, - "filter=s" => \$filter, - "o=s" => \$outfile, - "v" => \$verbose, - ); - -my $cmfile = shift; -my $fafile = shift; - -if( $help or not $fafile ) { - &help(); - exit(1); -} - -sub help { - print STDERR < cm_file fasta_file - Options - -h : show this help - -o : write the output to - -blastdb : use Rfam blast database for speed - (otherwise do full slow CM search) - - Expert options - -t : specify cutoff in bits - --bt : specify blast evalue cutoff - --local : perform local mode search - --global : perform global mode search - --nobig : skip the large ribosomal RNAs - --exclude [acc] : exlude family [acc] from the search - --filter [ncbi|wu] : use wublast/ncbiblast (default ncbi) - -EOF -} - -if( $global or $local ) { - print STDERR < 1, - "RF00028" => 1, - "RF00029" => 1, - ); -} -foreach my $acc ( @exclude ) { - $exclude{$acc} = 1; -} - -print STDERR "read fasta file\n" if( $verbose ); -my $seqs; -my $in = Bio::SeqIO -> new( -file => $fafile, - -format => 'Fasta' ); -while( my $seq = $in->next_seq() ) { - $seqs->{ $seq->id() } = $seq; -} - -print STDERR "read CM library\n" if( $verbose ); -my $cmfh = IO::File->new( $cmfile ); -my $cm = read_cm_library( $cmfh ); - -my ($features, $cmsearchOut, @cmsearchOut); -if( $blastdb ) { - print STDERR "run blast pre-filter\n" if( $verbose ); - my $resfile = run_blast_pre_filter(); - print STDERR "parse blast results\n" if( $verbose ); - my $results = parse_blast_table( $resfile ); - - print STDERR "run infernal search\n" if( $verbose ); - ($features, $cmsearchOut) = run_multi_infernal_search( $cmfile, $results ); - @cmsearchOut = @{$cmsearchOut}; -} -else { - print STDERR "run infernal search\n" if( $verbose ); - my ($resfile, $cmsfile) = run_infernal_search( $cmfile, $fafile ); - print STDERR "parse infernal results\n" if( $verbose ); - push(@cmsearchOut, $cmsfile); - $features = parse_infernal_table( $resfile ); -} - -my $outfh = \*STDOUT; -if( $outfile ) { - $outfh = IO::File->new( ">$outfile" ); -} - -my %counts; -if( $outputfmt eq 'gff' ) { - my $endtime = `date`; - chomp $endtime; -{ - print $outfh <score <=> $a->score } @{$features} ) { - my ($rfamid) = $f->get_tag_values('rfam-id'); - $counts{$rfamid}++; - $f->add_tag_value( 'id', $rfamid.'.'.$counts{$rfamid} ); - $f->gff_format( Bio::Tools::GFF->new(-gff_version => 3) ); - print $outfh $f->gff_string, "\n"; - } -} -elsif($outputfmt eq 'align'){ - foreach my $outfile (@cmsearchOut){ - open(UT, "< $outfile"); - my $outStr = join('', ); - print $outfh $outStr; - close(UT); - } -} - -cleanup() if( !$noclean ); - -exit; - - -#### - -sub run_multi_infernal_search { - my $cmfile = shift; - my $results = shift; - - my (@f, @cmsearchOut); - my $count=0; - # loop over families from blast results - foreach my $acc ( keys %{$results} ) { - if( exists $exclude{$acc} ) { - next; # skip anything we're asked to - } - $count++; - - my $out = Bio::SeqIO->new( -file => ">/tmp/$$.seq", - -format => 'Fasta' ); - - my $rfamid; - foreach my $seqid ( keys %{ $results->{$acc} } ) { - foreach my $hit ( @{ $results->{$acc}->{$seqid} } ) { - $rfamid = $hit->{-id}; - my( $start, $end ) = ( $hit->{-start}, - $hit->{-end}, - ); - print "WARNING: start>end in [$seqid/$start-$end]!\n" if $start>$end; - my $newseq = $seqs->{$seqid}->trunc( $start, $end ); - $newseq->revcom() if $start>$end; - $newseq->display_id( "$seqid/$start-$end" ); - $out->write_seq( $newseq ); - - print STDERR "searching [$seqid/$start-$end] with [$rfamid]\n" if( $verbose ); - } - } - $out->close; - die "FATAL: can't find a file I've written in /tmp [$$.seq]" if( not -s "/tmp/$$.seq" ); - - my $cmfile = get_cm_from_id( $cmfh, $rfamid ); - my ($resfile, $cmsfile) = run_infernal_search( $cmfile, "/tmp/$$.seq", $rfamid ); - my $feat = parse_infernal_table( $resfile ); - copy($cmsfile, $cmsfile . '.' . $count); - push( @f, @{$feat} ); - push(@cmsearchOut, $cmsfile . '.' . $count); - } - return (\@f, \@cmsearchOut); -} - - -sub cleanup { - unlink "/tmp/$$.res"; - unlink "/tmp/$$.blast"; - unlink "/tmp/$$.seq"; - unlink "/tmp/$$.cm"; -} - - -sub run_blast_pre_filter { - my $blastcmd; - if( $filter =~ /ncbi/ ) { - $blastcmd = "blastall -p blastn -i $fafile -d $blastdb -e $blastcut -W7 -F F -b 1000000 -v 1000000 -m 8"; - } - elsif( $filter =~ /wu/ ) { - $blastcmd = "wublastn $blastdb $fafile -e $blastcut W=7 B=1000000 V=1000000 -hspmax 0 -gspmax 0 -kap -mformat 2"; - } - my $outfile = "/tmp/$$.blast"; - system "$blastcmd > $outfile" and die "FATAL: failed to run blast [$blastcmd]\n"; - return $outfile; -} - - -sub run_infernal_search { - my $cmfile = shift; - my $fafile = shift; - my $rfamid = shift; - my $options = ""; - - if( defined $thresh ) { - $options = " -T $thresh "; - } - elsif ( defined $evalueThresh ){ - $options = " -E $evalueThresh "; - } - else { - $options = " --ga "; - } - - if( $global ) { - $options .= " -g"; - } - elsif( $local ) { - # default in infernal 1.0 - } - elsif( $cm->{$rfamid}->{-options} ) { - $options .= " ".$cm->{$rfamid}->{-options}; - } - -# system "cmsearch $options $cmfile $fafile > /tmp/$$.res" and die; - system "cmsearch --tabfile /tmp/$$.res $options $cmfile $fafile > /tmp/$$.cmsearch" and die; -# system "cat /tmp/$$.res"; - return ("/tmp/$$.res", "/tmp/$$.cmsearch"); -} - - - -sub parse_infernal_table { - my $file = shift; - my $fh = IO::File->new( $file ); - my @f; - my $rfamid; - while(<$fh>) { - if( /^\#\s+CM:\s+(\S+)/ ) { - $rfamid = $1; - } - next if( /^\#/ ); - my( $model, $seqid, $start, $end, $modst, $moden, $bits, $evalue, $gc ); - if( (( $model, $seqid, $start, $end, $modst, $moden, $bits, $evalue, $gc ) = - /^\s*(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)$/) || - (( $seqid, $start, $end, $modst, $moden, $bits, $evalue, $gc ) = - /^\s*(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)$/) ) { - - my $strand = 1; - if( $end < $start ) { - ( $start, $end ) = ( $end, $start ); - $strand = -1; - } - - # recalculate start and end if name/start-end - if( my( $n,$s,$e) = $seqid =~ /^(\S+)\/(\d+)-(\d+)/ ) { - $seqid = $n; - if( $s > $e ) { - ( $s, $e ) = ( $e, $s ); - $strand = 0-$strand; - } - $start += ($s-1); - $end += ($s-1); - } - - my %tags = ( 'rfam-id' => $rfamid, - 'rfam-acc' => ($cm->{$rfamid}->{-accession} || 'unknown'), - 'model_start' => $modst, - 'model_end' => $moden, - 'gc-content' => $gc, - ); - - if( $evalue =~ /[0-9]/ ) { - $tags{'evalue'} = $evalue; - } - - my $f = Bio::SeqFeature::Generic->new( -seq_id => $seqid, - -start => $start, - -end => $end, - -strand => $strand, - -primary_tag => 'similarity', - -source_tag => 'Rfam', - -score => $bits, - -tag => \%tags, - ); - push( @f, $f ); - } - } - return \@f; -} - - -sub parse_infernal_results { - my $file = shift; - my $parser = Bio::SearchIO->new( -file => $file, - -format => 'infernal1' - ); - - my @f; - while( my $res = $parser->next_result ) { - my $rfamid = $res->query_name; - my $rfamacc = $cm->{$rfamid}->{-accession} || 'unknown'; - - foreach my $hit ( sort { $b->score <=> $a->score } $res->hits ) { - foreach my $hsp ( sort { $b->score <=> $a->score } $hit->hsps ) { - my( $name, $start, $end ) = - ( $hit->name, $hsp->start('hit'), $hsp->end('hit') ); - - my $strand = 1; - if( $hsp->strand('query') != $hsp->strand('hit') ) { - $strand = -1; - } - - # recalculate start and end if name/start-end - if( my( $n,$s,$e) = $name =~ /^(\S+)\/(\d+)-(\d+)/ ) { - $name = $n; - if( $s > $e ) { - ( $s, $e ) = ( $e, $s ); - $strand = 0-$strand; - } - $start += ($s-1); - $end += ($s-1); - } - - my $f = Bio::SeqFeature::Generic->new( -seq_id => $name, - -start => $start, - -end => $end, - -strand => $strand, - -primary_tag => 'similarity', - -source_tag => 'Rfam', - -score => $hsp->score, - -tag => { 'rfam-id' => $rfamid, - 'rfam-acc' => $rfamacc, - 'model_start' => $hsp->start('query'), - 'model_end' => $hsp->end('query'), - }, - ); - push( @f, $f ); - } - } - } - return \@f; -} - - -sub parse_blast_table { - my $blastfile = shift; - my $hits = {}; - my $fh = IO::File->new(); - # sort hits so they go in to add_non_overlapping_hit in coordinate - # order - $fh->open( "sort -k7n $blastfile |" ); - while(<$fh>) { - next if( /^\#/ ); - my @col = split( /\s+/, $_ ); - - my( $qname, $hname, $start, $end ); - - if( $filter =~ /ncbi/ ) { - ( $qname, $hname, $start, $end ) = ( $col[0], - $col[1], - $col[6], - $col[7] ); - } - elsif( $filter =~ /wu/ ) { - ( $qname, $hname, $start, $end ) = ( $col[0], - $col[1], - $col[17], - $col[18] ); - } - - my( $rfamacc, $rfamid ) = split( ';', $hname ); - my $length = $seqs->{$qname}->length; - my $win = $cm->{$rfamid}->{-length}; - - print "WARNING: uninitialised values (start,end,win):[$start,$end,$win] in blast output [$_]!\n" if (not defined $start or not defined $end or not defined $win); - $start -= $win; - $end += $win; - $start = 1 if( $start < 1 ); - $end = $length if( $end > $length ); - - my $newhit = { -acc => $rfamacc, - -id => $rfamid, - -seqid => $qname, - -start => $start, - -end => $end, - }; - - add_non_overlapping_hit( $hits, $newhit ); - } - - return $hits; -} - - -sub parse_blast { - my $blastfile = shift; - my $hits = {}; - my $searchin = Bio::SearchIO->new( '-file' => $blastfile, - '-format' => 'Blast' ); - - while( my $result = $searchin->next_result() ) { - my $qname = $result->query_name; - while( my $hit = $result->next_hit() ) { - my( $rfamacc, $rfamid ) = split( ';', $hit->name ); - while( my $hsp = $hit->next_hsp() ) { - my( $start, $end ) = ( $hsp->start('query'), - $hsp->end('query') ); - my $length = $seqs->{$qname}->length; - my $win = $cm->{$rfamid}->{-length}; - - $start -= $win; - $end += $win; - $start = 1 if( $start < 1 ); - $end = $length if( $end > $length ); - - my $newhit = { -acc => $rfamacc, - -id => $rfamid, - -seqid => $qname, - -start => $start, - -end => $end, - }; - - add_non_overlapping_hit( $hits, $newhit ); - } - } - } - return $hits; -} - - -sub add_non_overlapping_hit { - # this doesn't do what it is meant to, because the HSPs come at - # us in order of score, not sorted by coordinates. - # -- fixed by pre-sorting blast table hits - my $hits = shift; - my $newhit = shift; - my( $acc, $seqid, $start, $end ) = ( $newhit->{-acc}, - $newhit->{-seqid}, - $newhit->{-start}, - $newhit->{-end} ); - my $already; - if( exists $hits->{$acc}->{$seqid} ) { - foreach my $se ( #sort { $a->{-start} <=> $b->{-start} } - @{ $hits->{$acc}->{$seqid} } ) { - if( $se->{-start} >= $start and $se->{-start} <= $end ) { - $se->{-start} = $start; - $already = 1; - } - if( $se->{-end} >= $start and $se->{-end} <= $end ) { - $se->{-end} = $end; - $already = 1; - } - if( $se->{-start} <= $start and $se->{-end} >= $end ) { - $already = 1; - } - } - } - - return 0 if( $already ); - - push( @{ $hits->{$acc}->{$seqid} }, $newhit ); - return $hits; -} - - -sub read_cm_library { - my $fh = shift; - my $off = 0; - my %cm; - my $name; - while(<$fh>) { - if( /^NAME\s+(\S+)/ ) { - $name = $1; - $cm{$name}->{-offset} = $off; - } - if( /^CLEN\s+(\d+)/ ) { - $cm{$name}->{-length} = $1; - } - if(( /^ACCESSION\s+(\S+)/ ) || (/^ACC\s+(\S+)/)) { - $cm{$name}->{-accession} = $1; - } - if( /^SCOM\s+cmsearch\s+(.*)\s+\S+\s+\S+/ ) { - my $options = ''; - my @opts = split( /\s+/, $1 ); - while( my $opt = shift @opts ) { - # don't want to propagate some cmsearch options from the Rfam.cm file - if( $opt =~ /^-Z$/ ) { - shift @opts; - } - elsif( $opt =~ /^-E$/ ) { - shift @opts; - } - elsif( $opt =~ /^--forward/){ - shift @opts; - } - elsif( $opt =~ /--toponly/ ) { - } - else { - $options .= $opt." "; - } - } - $cm{$name}->{-options} = $options; - } - if( /^\/\// ) { - $off = tell($fh); - } - } - return \%cm; -} - -sub get_cm_from_id { - my $fh = shift; - my $id = shift; - my $outfile = "/tmp/$$.cm"; - seek($fh,$cm->{$id}->{-offset},0); - open( F, ">$outfile" ) or die "FATAL: Failed to retrieve CM [$id] from handle [$fh]\n"; - while(<$fh>) { - print F $_; - last if( /^\/\// ); - } - return $outfile; -} - -###### - - - -=head1 NAME - -rfam_scan.pl - search a nucleotide fasta sequence against the Rfam -library of covariance models. - -=head1 VERSION - -This is version 1.0.4 of rfam_scan.pl. It has been tested with Perl -5.6.1, Rfam 11.0, Bioperl 1.5.2/1.6 and INFERNAL 1.0. It should work with -versions higher than these, except where file formats change! - -=head1 REQUIREMENTS - - - this script - - - Perl 5.6 or higher (and maybe lower) - - - The Rfam database (downloadable from - ftp://ftp.sanger.ac.uk/pub/databases/Rfam) - - - INFERNAL software v1.0 and up (from http://infernal.janelia.edu/) - - - NCBI BLAST binaries (from http://www.ncbi.nlm.nih.gov/Ftp/) - - - Bioperl (from http://bio.perl.org/) - -The Bioperl modules directory must be in your perl library path, and -the INFERNAL and BLAST binaries must be in your executable path. - -You also need to be able to read and write to /tmp on your machine. - -=head1 HOW TO INSTALL RFAM LOCALLY - -1. Get the Rfam database from - ftp://ftp.sanger.ac.uk/pub/databases/Rfam/. In particular you need - the files Rfam.fasta and Rfam.cm - -2. Unzip them if necessary - $ gunzip Rfam*.gz - -3. Grab and install INFERNAL, NCBI BLAST and Bioperl, and make sure - your paths etc are set up correctly. - -=head1 SEARCHING YOUR SEQUENCE AGAINST RFAM - -The INFERNAL user manual has information about how to search sequences -using covariance models. This is very compute intensive. This script -provides some hacks to speed up the process. - -Run rfam_scan.pl -h to get a list of options. - -=head1 THINGS TO NOTE - -It is important that every sequence in your input fasta file has a -unique name. - -This script can take a long while to run on big sequences, -particularly if your sequence looks anything like a ribosomal RNA. -You will want to test on something small and sensible first. -Ribosomal RNAs should be relatively easy to find using things like -BLAST, so you can omit the SSU and LSU rRNAs from Rfam searches (along -with group I and group II catalytic introns) with the --nobig option. - -=head1 BUGS - - - Full cmsearch (no --blastdb option) doesn't use the global/local - state of the model. Curated thresholds may therefore not be - meaningful. - - Many options are not rigorously tested. - - Error messages are uninformative. - - The documentation is inadequate. - -=head1 HISTORY -v1.0.4 2012-08-13 - - Rfam 11.0 - minor edit to accommodate change to CM formatting - -V1.0.3 - -Rfam 10.1 - -v1.0.2 - -Rfam 10.0 - -v1.0 2010-02-28 - - New style command line - - Use Infernal 1.0 - - Remove most of bioperl reliance (slow parsing of blast/infernal files) - - Use either wu or ncbi blast - - Output only in GFF format - - Use global/local state of the model and Rfam cmsearch options from CM - SCOM line - - Don't need Rfam.thr file - -v0.3 2007-06-20 - - New parser (bioperl style) for INFERNAL 0.81 output - - Probably need Bioperl 1.5, and extra Bio::SearchIO::infernal - - Using WUBLAST, instead of NCBI - -v0.2 2005-02-22 - - add --slow option - - 'aln' format option gives you cmsearch style alignments - - fix -o output option - -v0.1 2003-11-19 - - first effort at something useful - - return Rfam hits as tab delimited or gff format - - -=head1 CONTACT - -Copyright (c) 2003-2006 Genome Research Ltd -Copyright (c) 2007-2010 Sam Griffiths-Jones - -Please contact rfam@sanger.ac.uk for help. - -=cut diff --git a/annotation/Tools/Util/scaffold2AGP.pl b/annotation/Tools/Util/scaffold2AGP.pl deleted file mode 100755 index 3393852a6..000000000 --- a/annotation/Tools/Util/scaffold2AGP.pl +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env perl -# -# Creates a AGP-file needed by e.g. EMBL for a scaffolded assembly -# -# By Henrik Lantz, NBIS/Uppsala University, Sweden - -# usage: perl scaffold2AGP.pl -i scaffoldfile.fasta -o scaffoldfile.agp - -use warnings; -use strict; -use Bio::SeqIO ; -use Getopt::Long; - -my $infile; -my $outfile; -my $contigcount=0; -my $usage = "\nUsage: perl scaffold2AGP.pl -i -o \n"; -$usage .= "Contigs will be saved in \"contigs.fasta\"\n"; - -GetOptions( - 'i=s' => \$infile, # scaffoldfile - 'o=s' => \$outfile); - -die "\nPlease provide filename(s)\n$usage\n" unless $infile; - -open (AGP_FILE, ">$outfile") or - die "\nPlease provide filename(s)\n$usage"; - - -my $inseq = Bio::SeqIO->new('-file' => "<$infile", - '-format' => 'Fasta' ); - -my $outseq = Bio::SeqIO->new( - -file => ">contigs.fasta", - -format => 'fasta', - ); - -#Read scaffolded FASTA-file -while (my $seq_obj = $inseq->next_seq ) { - my $scaffold = $seq_obj->id; - my $sequence = $seq_obj->seq; - my $start=1; - my $oldsum; - my $newsum; - my $count=0; - my $rounded; - - next if ($scaffold =~ /^contig/i); - foreach my $substring_sequence (split /(N{20,})/i, $sequence){ - my $type; - my $substring_length = length($substring_sequence); - $count++; - $oldsum=$start; - $newsum=$oldsum+$substring_length-1; - - if ($substring_sequence !~ m/^N+$/i){ - $type="W"; - $contigcount++; - $rounded=sprintf("%05s", $contigcount); - my $contig_obj = Bio::Seq->new(-seq => "$substring_sequence", - -display_id => "contig$rounded", - -alphabet => "dna" ); - $outseq->write_seq($contig_obj); - } - elsif ($substring_sequence =~ m/^N+$/i){ - $type="N"; - } - $start += $substring_length; - if ($type eq "W"){ - print AGP_FILE "$scaffold\t$oldsum\t$newsum\t$count\t$type\tcontig$rounded\t1\t$substring_length\t+\n"; - } - if ($type eq "N"){ - print AGP_FILE "$scaffold\t$oldsum\t$newsum\t$count\t$type\t$substring_length\tscaffold\tyes\tpaired-ends\n"; - } - } -} - -close AGP_FILE; diff --git a/annotation/Tools/Util/screen_mito.pl b/annotation/Tools/Util/screen_mito.pl deleted file mode 100755 index 781c620e8..000000000 --- a/annotation/Tools/Util/screen_mito.pl +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use IO::File; -use File::Basename; -use Getopt::Long; -use Pod::Usage; - -my @copyARGV=@ARGV; - -my $opt_output = undef; -my $tabfile = undef; -my $help= undef; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - -if ( !GetOptions("tab=s" => \$tabfile, - "o|out=s" => \$opt_output, - "h|help" => \$help) ) -{ - pod2usage( { -message => 'Failed to parse command line', - -verbose => 1, - -exitval => 1 } ); -} - -if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header\n" } ); -} - -if ( ! defined( $tabfile) ) { - pod2usage( { - -message => "$header\nMust specify at least 1 file. \n", - -verbose => 0, - -exitval => 1 } ); -} - -##### Stream in 1 -my $fh1; -if ($tabfile) { - open($fh1, '<', $tabfile) or die "Could not open file '$tabfile' $!"; -} - - -my %info; - -while( my $line = <$fh1>) { - - if( $line =~ m/^#/){ next; } - - my @list = split(/\s/,$line); - my $ID = $list[0]; - my $start = $list[9]; - my $end = $list[10]; - #print $start." ".$end."\n"; - push (@{$info{$ID}}, [$start, $end]); - -} - -my %omni; -foreach my $contig (keys %info){ - - my @uniq_list; - #print "contig: ".$contig."\n"; - - my $prev_start = -1; - my $prev_end = -1; - my $start = -1; - my $end = -1; - my $printed=undef; - - foreach my $tuple (sort {$a->[0] <=> $b->[0] } @{$info{$contig}}){ - - $start = @$tuple[0]; - $end = @$tuple[1]; - - if ( ($prev_start <= $end) and ($prev_end >= $start) ){ #it overlaps or are consecutive - #print "it overlaps\n"; - if ($end > $prev_end){ - $prev_end = $end; - } - } - elsif($start > $prev_end){ - if($prev_start != -1){ - push (@uniq_list, [$prev_start,$prev_end]); - #print "I push the tuple [$prev_start,$prev_end]\n"; - } - $prev_start = $start ; - $prev_end = $end ; - } - - } - # Deal with the last round - push (@uniq_list, [$prev_start,$prev_end]); - #print "I push the last tuple [$prev_start,$prev_end]\n"; - - push (@{$omni{$contig}}, @uniq_list) -} - - #calculate bp incremented non-overlaping hit size - my %size; - foreach my $contig (keys %omni){ - foreach my $tuple ( @{$omni{$contig}} ){ - $size{$contig}+=(@$tuple[1]-@$tuple[0]+1); - } - } - - print "SequenceID\tNumber_of_Hit\tTotal_bp\n"; - # sort by number of non-overlaping hits - foreach my $contig (sort { @{$omni{$a}} <=> @{$omni{$b}} } keys %omni){ - print $contig."\t".@{$omni{$contig}}."\t".$size{$contig}."\n"; - - } - - -__END__ - - -=head1 NAME - -Based on a balst output ( -outfmt '6 qseqid staxids bitscore std sscinames sskingdoms stitle') the script aims to tell you how many non-overlaping hit has been found -by sequence. One hit may roughly be considered as one gene. It gives also the total size of those hits in bp. -The script aims to help determining the contigs from an assembly which are mitochondrial. An assembly graph could be helpful to check if the suspicious (those that might be mitochondrial) contigs sounds to be circular -as expected for a mitochondrial genome. - -=head1 SYNOPSIS - - ./screen_mito.pl --tab=infile -o=outFile - ./screen_mito.pl --help - -Mitochondrial genome size (from wikipedia) - -Genome Type Kingdom Introns Size Shape Description -1 Animal No 11–28kbp Circular Single molecule -2 Fungi, Plant, Protista Yes 19–1000kbp Circular Single molecule -3 Fungi, Plant, Protista No 20–1000kbp Circular Large molecule and small plasmid like structures -4 Protista No 1–200kbp Circular Heterogeneous group of molecules -5 Fungi, Plant, Protista No 1–200kbp Linear Homogeneous group of molecules -6 Protista No 1–200kbp Linear Heterogeneous group of molecules - - - -=head1 OPTIONS - -=over 8 - -=item B<--tab> - -Input tabulated blast file -outfmt '6 qseqid staxids bitscore std sscinames sskingdoms stitle' - -=item B<--out>, B<--output> or B<-o> - -The output will be the EMBL file with the record "headers" modified - -=item B<--help> or B<-h> - -Display this helpful text. - -=back - -=cut diff --git a/annotation/Tools/Wrapper/run_blast2go.pl b/annotation/Tools/Wrapper/run_blast2go.pl deleted file mode 100644 index b7222edee..000000000 --- a/annotation/Tools/Wrapper/run_blast2go.pl +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use Env qw (@B2G4PIPEPATH); -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; - -my $usage = qq{ -perl run_blast2go.pl - Getting help: - [--help] - - Input: - [--blast filename] - The name of the blast file (-m 7) - [--ipr file] - The name of the iprscan xml file - Ouput: - [--output filename] - The name of the output file. - - Misc: - [--mem amount] - Specify the memory to be used by Blast2Go (e.g. 800m, 10G) - Default: 1G - -}; - -my $blast = undef; -my $ipr = undef; -my $output = undef; -my $mem = "1G"; - -my $help; - -GetOptions( - "help" => \$help, - "blast=s" => \$blast, - "ipr=s" => \$ipr, - "mem=s" => \$mem, - "output=s" => \$output); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - - -if (-f "@B2G4PIPEPATH/b2gPipe.properties") { - print "Found Blast2Go installation, starting analysis\n"; -} else { - die "Couldn't find the Blast2Go binary - make sure you load the relevant module!\n" -} - -# $B2G4PIPE/*:$B2G4PIPE/ext/*: - -runcmd("java -Xmx$mem -cp @B2G4PIPEPATH/*:@B2G4PIPEPATH/ext/* es.blast2go.prog.B2GAnnotPipe -in $blast -out $output -prop @B2G4PIPEPATH/b2gPipe.properties -ips $ipr -annot"); - - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; -} - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -sub err { - msg(@_); - exit(2); -} - - diff --git a/annotation/Tools/Wrapper/run_interproscan.pl b/annotation/Tools/Wrapper/run_interproscan.pl deleted file mode 100755 index e24120ee0..000000000 --- a/annotation/Tools/Wrapper/run_interproscan.pl +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use Getopt::Long; -use Scalar::Util qw(openhandle); -use Time::Piece; -use Time::Seconds; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--infile filename] - The name of the file to read. - [--type biotype] - Indicate whether input sequences are protein (p) or nucleotide (n). Default is n, nucleotide. - Ouput: - [--outdir foldername] - The name of the output folder. -}; - -my $outdir = undef; -my $infile = undef; -my $type = "n"; -my $quiet = undef; -my $help; - -GetOptions( - "help" => \$help, - "type=s" => \$type, - "infile=s" => \$infile, - "outdir=s" => \$outdir); - -# Print Help and exit -if ($help) { - print $usage; - exit(0); -} - -defined $outdir or die "Must specify a directory (--outdir)\n"; -defined $infile or die "Must specify an input fasta file (--infile)\n"; - -# Create outdir if it does not consist - -if (-d $outdir) { - msg("Outdir found!"); -} elsif (-f $outdir . "/" . $infile . ".gff3") { - msg("This folder already contains an InterProScan analysis. Aborting!") and die; -} else { - runcmd("mkdir $outdir"); - msg("Outdir created!"); -} - -msg("Starting InterProScan"); - -# Check whether sequence is nucleotide or protein - -# Run InterProScan - -runcmd("/sw/bioinfo/interproscan-5.2-45.0/interproscan.sh -appl PfamA-27.0,TIGRFAM-13.0,ProDom-2006.1 -d $outdir -i $infile -f GFF3,XML -iprlookup -ms 40 -T /tmp -t $type"); - -# -------------- - -sub msg { - my $t = localtime; - my $line = "[".$t->hms."] @_\n"; - print LOG $line if openhandle(\*LOG); - print STDERR $line unless $quiet; -} - -sub runcmd { - msg("Running:", @_); - system(@_)==0 or err("Could not run command:", @_); -} - -sub err { - $quiet=0; - msg(@_); - exit(2); -} - - diff --git a/annotation/Tools/bin/AGP2chromosome.pl b/annotation/Tools/bin/AGP2chromosome.pl deleted file mode 120000 index 9c4f303db..000000000 --- a/annotation/Tools/bin/AGP2chromosome.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/AGP2chromosome.pl \ No newline at end of file diff --git a/annotation/Tools/bin/CleanTaxonomicTreeFromNCBI.pl b/annotation/Tools/bin/CleanTaxonomicTreeFromNCBI.pl deleted file mode 120000 index 82ad475cc..000000000 --- a/annotation/Tools/bin/CleanTaxonomicTreeFromNCBI.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/NCBI/CleanTaxonomicTreeFromNCBI.pl \ No newline at end of file diff --git a/annotation/Tools/bin/README.md b/annotation/Tools/bin/README.md deleted file mode 100644 index e3f90fc63..000000000 --- a/annotation/Tools/bin/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# BIN folder - -Here is gathered all scripts of the annotation part of the repository. - -Many scripts ( especially those related to gff) need dependancies (NBIS library, bioperl, etc). -Please read the [Prerequisite installation information](https://github.com/NBISweden/GAAS/tree/master/annotation/Tools/Util/gff) and/or the [home page](https://github.com/NBISweden/GAAS). diff --git a/annotation/Tools/bin/add_track.rb b/annotation/Tools/bin/add_track.rb deleted file mode 120000 index 2cf0079e0..000000000 --- a/annotation/Tools/bin/add_track.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo1/add_track.rb \ No newline at end of file diff --git a/annotation/Tools/bin/apollo_track_helper.rb b/annotation/Tools/bin/apollo_track_helper.rb deleted file mode 120000 index 3cc0ded2d..000000000 --- a/annotation/Tools/bin/apollo_track_helper.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo2/apollo_track_helper.rb \ No newline at end of file diff --git a/annotation/Tools/bin/augustus2grid.pl b/annotation/Tools/bin/augustus2grid.pl deleted file mode 120000 index 5afd18d0f..000000000 --- a/annotation/Tools/bin/augustus2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/augustus2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/augustus_create_hints.rb b/annotation/Tools/bin/augustus_create_hints.rb deleted file mode 120000 index 7594caa58..000000000 --- a/annotation/Tools/bin/augustus_create_hints.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Abinitio/Augustus/augustus_create_hints.rb \ No newline at end of file diff --git a/annotation/Tools/bin/augustus_filter_training_set.pl b/annotation/Tools/bin/augustus_filter_training_set.pl deleted file mode 120000 index b93fc0679..000000000 --- a/annotation/Tools/bin/augustus_filter_training_set.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Abinitio/Augustus/augustus_filter_training_set.pl \ No newline at end of file diff --git a/annotation/Tools/bin/b2go2gff.pl b/annotation/Tools/bin/b2go2gff.pl deleted file mode 120000 index 8d3d49353..000000000 --- a/annotation/Tools/bin/b2go2gff.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/b2go2gff.pl \ No newline at end of file diff --git a/annotation/Tools/bin/b2go2table.pl b/annotation/Tools/bin/b2go2table.pl deleted file mode 120000 index 3a9a8df1f..000000000 --- a/annotation/Tools/bin/b2go2table.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/b2go2table.pl \ No newline at end of file diff --git a/annotation/Tools/bin/bam2bigwig.sh b/annotation/Tools/bin/bam2bigwig.sh deleted file mode 120000 index fc6bfa3dd..000000000 --- a/annotation/Tools/bin/bam2bigwig.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/bam2bigwig.sh \ No newline at end of file diff --git a/annotation/Tools/bin/bam_to_wiggle.py b/annotation/Tools/bin/bam_to_wiggle.py deleted file mode 120000 index b4db2615c..000000000 --- a/annotation/Tools/bin/bam_to_wiggle.py +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/bam_to_wiggle.py \ No newline at end of file diff --git a/annotation/Tools/bin/bed2gff.pl b/annotation/Tools/bin/bed2gff.pl deleted file mode 120000 index 7e6912be7..000000000 --- a/annotation/Tools/bin/bed2gff.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/bed2gff.pl \ No newline at end of file diff --git a/annotation/Tools/bin/bed2wiggle.rb b/annotation/Tools/bin/bed2wiggle.rb deleted file mode 120000 index 0abdec716..000000000 --- a/annotation/Tools/bin/bed2wiggle.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Converter/bed2wiggle.rb \ No newline at end of file diff --git a/annotation/Tools/bin/bed_create_random_feature.pl b/annotation/Tools/bin/bed_create_random_feature.pl deleted file mode 120000 index 1d9c2a4d8..000000000 --- a/annotation/Tools/bin/bed_create_random_feature.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/bed_create_random_feature.pl \ No newline at end of file diff --git a/annotation/Tools/bin/blastp2grid.pl b/annotation/Tools/bin/blastp2grid.pl deleted file mode 120000 index 5dfe79ece..000000000 --- a/annotation/Tools/bin/blastp2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/blastp2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/blat2grid.pl b/annotation/Tools/bin/blat2grid.pl deleted file mode 120000 index 2038fff42..000000000 --- a/annotation/Tools/bin/blat2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/blat2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/build_template_WA_1_0_X.rb b/annotation/Tools/bin/build_template_WA_1_0_X.rb deleted file mode 120000 index eaeeb0423..000000000 --- a/annotation/Tools/bin/build_template_WA_1_0_X.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo1/build_template_WA_1_0_X.rb \ No newline at end of file diff --git a/annotation/Tools/bin/busco_compare.pl b/annotation/Tools/bin/busco_compare.pl deleted file mode 120000 index a77909392..000000000 --- a/annotation/Tools/bin/busco_compare.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/busco_compare.pl \ No newline at end of file diff --git a/annotation/Tools/bin/cegma_gff2bed.pl b/annotation/Tools/bin/cegma_gff2bed.pl deleted file mode 120000 index 23f98dfc6..000000000 --- a/annotation/Tools/bin/cegma_gff2bed.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/cegma_gff2bed.pl \ No newline at end of file diff --git a/annotation/Tools/bin/chromosome_chunk.pl b/annotation/Tools/bin/chromosome_chunk.pl deleted file mode 120000 index ef540c858..000000000 --- a/annotation/Tools/bin/chromosome_chunk.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/EnsEMBL/chromosome_chunk.pl \ No newline at end of file diff --git a/annotation/Tools/bin/cp_all_links.sh b/annotation/Tools/bin/cp_all_links.sh deleted file mode 120000 index b256538b4..000000000 --- a/annotation/Tools/bin/cp_all_links.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/System/cp_all_links.sh \ No newline at end of file diff --git a/annotation/Tools/bin/create_annotation_project.pl b/annotation/Tools/bin/create_annotation_project.pl deleted file mode 120000 index 0bc630a98..000000000 --- a/annotation/Tools/bin/create_annotation_project.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Manager/create_annotation_project.pl \ No newline at end of file diff --git a/annotation/Tools/bin/create_annotation_project.rb b/annotation/Tools/bin/create_annotation_project.rb deleted file mode 120000 index 38065ed2c..000000000 --- a/annotation/Tools/bin/create_annotation_project.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Manager/create_annotation_project.rb \ No newline at end of file diff --git a/annotation/Tools/bin/create_delivery_dir.sh b/annotation/Tools/bin/create_delivery_dir.sh deleted file mode 120000 index 01ac66d20..000000000 --- a/annotation/Tools/bin/create_delivery_dir.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Manager/create_delivery_dir.sh \ No newline at end of file diff --git a/annotation/Tools/bin/create_preautomated_report.pl b/annotation/Tools/bin/create_preautomated_report.pl deleted file mode 120000 index cdc348f6a..000000000 --- a/annotation/Tools/bin/create_preautomated_report.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Manager/create_preautomated_report.pl \ No newline at end of file diff --git a/annotation/Tools/bin/cufflinks2hints.rb b/annotation/Tools/bin/cufflinks2hints.rb deleted file mode 120000 index 225276ca8..000000000 --- a/annotation/Tools/bin/cufflinks2hints.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Converter/cufflinks2hints.rb \ No newline at end of file diff --git a/annotation/Tools/bin/embl2gb.pl b/annotation/Tools/bin/embl2gb.pl deleted file mode 120000 index 0f9f003df..000000000 --- a/annotation/Tools/bin/embl2gb.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/embl2gb.pl \ No newline at end of file diff --git a/annotation/Tools/bin/embl2gff.pl b/annotation/Tools/bin/embl2gff.pl deleted file mode 120000 index 7e6e581ac..000000000 --- a/annotation/Tools/bin/embl2gff.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/embl2gff.pl \ No newline at end of file diff --git a/annotation/Tools/bin/eugene2maker.rb b/annotation/Tools/bin/eugene2maker.rb deleted file mode 120000 index 8a6d7299d..000000000 --- a/annotation/Tools/bin/eugene2maker.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Converter/eugene2maker.rb \ No newline at end of file diff --git a/annotation/Tools/bin/exonerate2grid.pl b/annotation/Tools/bin/exonerate2grid.pl deleted file mode 120000 index 81043ba2f..000000000 --- a/annotation/Tools/bin/exonerate2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/exonerate2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta-splitter.pl b/annotation/Tools/bin/fasta-splitter.pl deleted file mode 120000 index e1d15c275..000000000 --- a/annotation/Tools/bin/fasta-splitter.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta-splitter.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta2snap_filter.pl b/annotation/Tools/bin/fasta2snap_filter.pl deleted file mode 120000 index 6c38f5d64..000000000 --- a/annotation/Tools/bin/fasta2snap_filter.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta2snap_filter.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_bionano_filter_not_scaffolded_part.pl b/annotation/Tools/bin/fasta_bionano_filter_not_scaffolded_part.pl deleted file mode 120000 index 31b604f32..000000000 --- a/annotation/Tools/bin/fasta_bionano_filter_not_scaffolded_part.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_bionano_filter_not_scaffolded_part.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_checkProteins.pl b/annotation/Tools/bin/fasta_checkProteins.pl deleted file mode 120000 index 45b7c0ed8..000000000 --- a/annotation/Tools/bin/fasta_checkProteins.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_checkProteins.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_cleaner.pl b/annotation/Tools/bin/fasta_cleaner.pl deleted file mode 120000 index 762170a39..000000000 --- a/annotation/Tools/bin/fasta_cleaner.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/fasta/fasta_cleaner.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_domain_extractor.pl b/annotation/Tools/bin/fasta_domain_extractor.pl deleted file mode 120000 index 570403e10..000000000 --- a/annotation/Tools/bin/fasta_domain_extractor.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_domain_extractor.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_extractFaFromMultiFa.sh b/annotation/Tools/bin/fasta_extractFaFromMultiFa.sh deleted file mode 120000 index bb8a042ca..000000000 --- a/annotation/Tools/bin/fasta_extractFaFromMultiFa.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_extractFaFromMultiFa.sh \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_extract_sequence_from_OG.pl b/annotation/Tools/bin/fasta_extract_sequence_from_OG.pl deleted file mode 120000 index 22de0bc1a..000000000 --- a/annotation/Tools/bin/fasta_extract_sequence_from_OG.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_extract_sequence_from_OG.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_extract_sequence_from_id.pl b/annotation/Tools/bin/fasta_extract_sequence_from_id.pl deleted file mode 120000 index cdd88140f..000000000 --- a/annotation/Tools/bin/fasta_extract_sequence_from_id.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_extract_sequence_from_id.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_filter_by_accession.rb b/annotation/Tools/bin/fasta_filter_by_accession.rb deleted file mode 120000 index dc287774c..000000000 --- a/annotation/Tools/bin/fasta_filter_by_accession.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/fasta/fasta_filter_by_accession.rb \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_filter_by_size.pl b/annotation/Tools/bin/fasta_filter_by_size.pl deleted file mode 120000 index 7a1088292..000000000 --- a/annotation/Tools/bin/fasta_filter_by_size.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_filter_by_size.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_get_longestORF.pl b/annotation/Tools/bin/fasta_get_longestORF.pl deleted file mode 120000 index b25cdc047..000000000 --- a/annotation/Tools/bin/fasta_get_longestORF.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_get_longestORF.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_removeFromFasta1_intersection_withFasta2ID.pl b/annotation/Tools/bin/fasta_removeFromFasta1_intersection_withFasta2ID.pl deleted file mode 120000 index 8d81beb24..000000000 --- a/annotation/Tools/bin/fasta_removeFromFasta1_intersection_withFasta2ID.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_removeFromFasta1_intersection_withFasta2ID.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_removeSeqFromIDlist.pl b/annotation/Tools/bin/fasta_removeSeqFromIDlist.pl deleted file mode 120000 index 43976a7c5..000000000 --- a/annotation/Tools/bin/fasta_removeSeqFromIDlist.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_removeSeqFromIDlist.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_size_select.rb b/annotation/Tools/bin/fasta_size_select.rb deleted file mode 120000 index f8eb0ccc5..000000000 --- a/annotation/Tools/bin/fasta_size_select.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/fasta/fasta_size_select.rb \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_splitMultFastaFile.sh b/annotation/Tools/bin/fasta_splitMultFastaFile.sh deleted file mode 120000 index 1fabdf6db..000000000 --- a/annotation/Tools/bin/fasta_splitMultFastaFile.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_splitMultFastaFile.sh \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_spliter_overlap.pl b/annotation/Tools/bin/fasta_spliter_overlap.pl deleted file mode 120000 index 34d94aa0e..000000000 --- a/annotation/Tools/bin/fasta_spliter_overlap.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/fasta/fasta_spliter_overlap.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_statisticsAndPlot.pl b/annotation/Tools/bin/fasta_statisticsAndPlot.pl deleted file mode 120000 index 1a36c11c3..000000000 --- a/annotation/Tools/bin/fasta_statisticsAndPlot.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/fasta_statisticsAndPlot.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fasta_trim_assembly.rb b/annotation/Tools/bin/fasta_trim_assembly.rb deleted file mode 120000 index ab4758cb1..000000000 --- a/annotation/Tools/bin/fasta_trim_assembly.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/fasta/fasta_trim_assembly.rb \ No newline at end of file diff --git a/annotation/Tools/bin/fastq_check_sync_pair1_pair2.pl b/annotation/Tools/bin/fastq_check_sync_pair1_pair2.pl deleted file mode 120000 index 93484bf8e..000000000 --- a/annotation/Tools/bin/fastq_check_sync_pair1_pair2.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fastq/fastq_check_sync_pair1_pair2.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fastq_deinterleave_bash.pl b/annotation/Tools/bin/fastq_deinterleave_bash.pl deleted file mode 120000 index 73038dd9e..000000000 --- a/annotation/Tools/bin/fastq_deinterleave_bash.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fastq/fastq_deinterleave_bash.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fastq_guessMyFormat.pl b/annotation/Tools/bin/fastq_guessMyFormat.pl deleted file mode 120000 index c4dfba108..000000000 --- a/annotation/Tools/bin/fastq_guessMyFormat.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fastq/fastq_guessMyFormat.pl \ No newline at end of file diff --git a/annotation/Tools/bin/fastq_interleave.py b/annotation/Tools/bin/fastq_interleave.py deleted file mode 120000 index 02c3ff885..000000000 --- a/annotation/Tools/bin/fastq_interleave.py +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fastq/fastq_interleave.py \ No newline at end of file diff --git a/annotation/Tools/bin/fastq_pairfq_lite.pl b/annotation/Tools/bin/fastq_pairfq_lite.pl deleted file mode 120000 index 0958ccb8e..000000000 --- a/annotation/Tools/bin/fastq_pairfq_lite.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fastq/fastq_pairfq_lite.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gb2embl.pl b/annotation/Tools/bin/gb2embl.pl deleted file mode 120000 index 8ea11334e..000000000 --- a/annotation/Tools/bin/gb2embl.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/gb2embl.pl \ No newline at end of file diff --git a/annotation/Tools/bin/get_ensembl_genome_JD.rb b/annotation/Tools/bin/get_ensembl_genome_JD.rb deleted file mode 120000 index a838f8221..000000000 --- a/annotation/Tools/bin/get_ensembl_genome_JD.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/EnsEMBL/get_ensembl_genome_JD.rb \ No newline at end of file diff --git a/annotation/Tools/bin/gff2bed.pl b/annotation/Tools/bin/gff2bed.pl deleted file mode 120000 index c38d92579..000000000 --- a/annotation/Tools/bin/gff2bed.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/gff2bed.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff2embl.pl b/annotation/Tools/bin/gff2embl.pl deleted file mode 120000 index 1074e3696..000000000 --- a/annotation/Tools/bin/gff2embl.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Dev/perl/gff2embl.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff2gtf.pl b/annotation/Tools/bin/gff2gtf.pl deleted file mode 120000 index 8823159b1..000000000 --- a/annotation/Tools/bin/gff2gtf.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/gff2gtf.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff2table.pl b/annotation/Tools/bin/gff2table.pl deleted file mode 120000 index f587fd923..000000000 --- a/annotation/Tools/bin/gff2table.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/gff2table.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff2zff.pl b/annotation/Tools/bin/gff2zff.pl deleted file mode 120000 index cbb325876..000000000 --- a/annotation/Tools/bin/gff2zff.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/gff2zff.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_plotGO_mapTree.pl b/annotation/Tools/bin/gff3_plotGO_mapTree.pl deleted file mode 120000 index c6307a508..000000000 --- a/annotation/Tools/bin/gff3_plotGO_mapTree.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Dev/perl/gff3_plotGO_mapTree.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_Prokka_inferNameFromAttributes.pl b/annotation/Tools/bin/gff3_sp_Prokka_inferNameFromAttributes.pl deleted file mode 120000 index e14f228af..000000000 --- a/annotation/Tools/bin/gff3_sp_Prokka_inferNameFromAttributes.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_Prokka_inferNameFromAttributes.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_add_introns.pl b/annotation/Tools/bin/gff3_sp_add_introns.pl deleted file mode 120000 index ee2c92dfd..000000000 --- a/annotation/Tools/bin/gff3_sp_add_introns.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_add_introns.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_add_start_and_stop.pl b/annotation/Tools/bin/gff3_sp_add_start_and_stop.pl deleted file mode 120000 index 59b22aac0..000000000 --- a/annotation/Tools/bin/gff3_sp_add_start_and_stop.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_add_start_and_stop.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_alignment_output_style.pl b/annotation/Tools/bin/gff3_sp_alignment_output_style.pl deleted file mode 120000 index a16bc407f..000000000 --- a/annotation/Tools/bin/gff3_sp_alignment_output_style.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_alignment_output_style.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_clipN_seqExtremities_and_fixCoordinates.pl b/annotation/Tools/bin/gff3_sp_clipN_seqExtremities_and_fixCoordinates.pl deleted file mode 120000 index 9e5a6078a..000000000 --- a/annotation/Tools/bin/gff3_sp_clipN_seqExtremities_and_fixCoordinates.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_clipN_seqExtremities_and_fixCoordinates.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_clip_UTRs.pl b/annotation/Tools/bin/gff3_sp_clip_UTRs.pl deleted file mode 120000 index 1c080adb8..000000000 --- a/annotation/Tools/bin/gff3_sp_clip_UTRs.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/gff/gff3_sp_clip_UTRs.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_complement_annotations.pl b/annotation/Tools/bin/gff3_sp_complement_annotations.pl deleted file mode 120000 index fc241185b..000000000 --- a/annotation/Tools/bin/gff3_sp_complement_annotations.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_complement_annotations.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_ensembl_output_style.pl b/annotation/Tools/bin/gff3_sp_ensembl_output_style.pl deleted file mode 120000 index cb4e745e3..000000000 --- a/annotation/Tools/bin/gff3_sp_ensembl_output_style.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_ensembl_output_style.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_extract_attributes.pl b/annotation/Tools/bin/gff3_sp_extract_attributes.pl deleted file mode 120000 index 74f7c4569..000000000 --- a/annotation/Tools/bin/gff3_sp_extract_attributes.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_extract_attributes.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_extract_sequences.pl b/annotation/Tools/bin/gff3_sp_extract_sequences.pl deleted file mode 120000 index 1c11edb7b..000000000 --- a/annotation/Tools/bin/gff3_sp_extract_sequences.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_extract_sequences.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_filter_by_ORF_size.pl b/annotation/Tools/bin/gff3_sp_filter_by_ORF_size.pl deleted file mode 120000 index ba0609d80..000000000 --- a/annotation/Tools/bin/gff3_sp_filter_by_ORF_size.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_filter_by_ORF_size.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_filter_by_locus_distance.pl b/annotation/Tools/bin/gff3_sp_filter_by_locus_distance.pl deleted file mode 120000 index a1db2daa0..000000000 --- a/annotation/Tools/bin/gff3_sp_filter_by_locus_distance.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_filter_by_locus_distance.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_filter_by_mrnaBlastValue.pl b/annotation/Tools/bin/gff3_sp_filter_by_mrnaBlastValue.pl deleted file mode 120000 index c80404a91..000000000 --- a/annotation/Tools/bin/gff3_sp_filter_by_mrnaBlastValue.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_filter_by_mrnaBlastValue.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_filter_incomplete_gene_coding_models.pl b/annotation/Tools/bin/gff3_sp_filter_incomplete_gene_coding_models.pl deleted file mode 120000 index 2e82d7b69..000000000 --- a/annotation/Tools/bin/gff3_sp_filter_incomplete_gene_coding_models.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_filter_incomplete_gene_coding_models.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_fix_cds_frame.pl b/annotation/Tools/bin/gff3_sp_fix_cds_frame.pl deleted file mode 120000 index 36cbc59fd..000000000 --- a/annotation/Tools/bin/gff3_sp_fix_cds_frame.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_fix_cds_frame.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_fix_features_locations_duplicated.pl b/annotation/Tools/bin/gff3_sp_fix_features_locations_duplicated.pl deleted file mode 120000 index a3b11db37..000000000 --- a/annotation/Tools/bin/gff3_sp_fix_features_locations_duplicated.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_fix_features_locations_duplicated.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_fix_fusion.pl b/annotation/Tools/bin/gff3_sp_fix_fusion.pl deleted file mode 120000 index d48a75b9f..000000000 --- a/annotation/Tools/bin/gff3_sp_fix_fusion.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_fix_fusion.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_fix_longest_ORF.pl b/annotation/Tools/bin/gff3_sp_fix_longest_ORF.pl deleted file mode 120000 index 60febe54c..000000000 --- a/annotation/Tools/bin/gff3_sp_fix_longest_ORF.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_fix_longest_ORF.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_fix_overlaping_genes.pl b/annotation/Tools/bin/gff3_sp_fix_overlaping_genes.pl deleted file mode 120000 index ac2249395..000000000 --- a/annotation/Tools/bin/gff3_sp_fix_overlaping_genes.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_fix_overlaping_genes.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_fix_small_exon_from_extremities.pl b/annotation/Tools/bin/gff3_sp_fix_small_exon_from_extremities.pl deleted file mode 120000 index 4004f1459..000000000 --- a/annotation/Tools/bin/gff3_sp_fix_small_exon_from_extremities.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/gff/gff3_sp_fix_small_exon_from_extremities.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_flag_short_intron.pl b/annotation/Tools/bin/gff3_sp_flag_short_intron.pl deleted file mode 120000 index 2c8aeef6b..000000000 --- a/annotation/Tools/bin/gff3_sp_flag_short_intron.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_flag_short_intron.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_functional_statistics.pl b/annotation/Tools/bin/gff3_sp_functional_statistics.pl deleted file mode 120000 index 4cfa8f222..000000000 --- a/annotation/Tools/bin/gff3_sp_functional_statistics.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_functional_statistics.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_infer_function_colocalisation.pl b/annotation/Tools/bin/gff3_sp_infer_function_colocalisation.pl deleted file mode 120000 index 2fb023958..000000000 --- a/annotation/Tools/bin/gff3_sp_infer_function_colocalisation.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_infer_function_colocalisation.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_keep_longest_isoform.pl b/annotation/Tools/bin/gff3_sp_keep_longest_isoform.pl deleted file mode 120000 index 366ffc196..000000000 --- a/annotation/Tools/bin/gff3_sp_keep_longest_isoform.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_keep_longest_isoform.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_list_short_introns.pl b/annotation/Tools/bin/gff3_sp_list_short_introns.pl deleted file mode 120000 index 98c9d4fe6..000000000 --- a/annotation/Tools/bin/gff3_sp_list_short_introns.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/gff/gff3_sp_list_short_introns.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_load_function_from_protein_align.pl b/annotation/Tools/bin/gff3_sp_load_function_from_protein_align.pl deleted file mode 120000 index 7b22cc286..000000000 --- a/annotation/Tools/bin/gff3_sp_load_function_from_protein_align.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_load_function_from_protein_align.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_manage_IDs.pl b/annotation/Tools/bin/gff3_sp_manage_IDs.pl deleted file mode 120000 index 81da61da4..000000000 --- a/annotation/Tools/bin/gff3_sp_manage_IDs.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_manage_IDs.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_manage_UTRs.pl b/annotation/Tools/bin/gff3_sp_manage_UTRs.pl deleted file mode 120000 index c7420103f..000000000 --- a/annotation/Tools/bin/gff3_sp_manage_UTRs.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_manage_UTRs.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_manage_attributes.pl b/annotation/Tools/bin/gff3_sp_manage_attributes.pl deleted file mode 120000 index 4adf34206..000000000 --- a/annotation/Tools/bin/gff3_sp_manage_attributes.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_manage_attributes.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_manage_functional_annotation.pl b/annotation/Tools/bin/gff3_sp_manage_functional_annotation.pl deleted file mode 120000 index ebf92d0ee..000000000 --- a/annotation/Tools/bin/gff3_sp_manage_functional_annotation.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/gff/gff3_sp_manage_functional_annotation.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_manage_introns.pl b/annotation/Tools/bin/gff3_sp_manage_introns.pl deleted file mode 120000 index 5d76498b8..000000000 --- a/annotation/Tools/bin/gff3_sp_manage_introns.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_manage_introns.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_merge_annotations.pl b/annotation/Tools/bin/gff3_sp_merge_annotations.pl deleted file mode 120000 index 780a7bdf7..000000000 --- a/annotation/Tools/bin/gff3_sp_merge_annotations.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_merge_annotations.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_splitByLevel2Feature.pl b/annotation/Tools/bin/gff3_sp_splitByLevel2Feature.pl deleted file mode 120000 index 39feec0e0..000000000 --- a/annotation/Tools/bin/gff3_sp_splitByLevel2Feature.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_splitByLevel2Feature.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_statistics.pl b/annotation/Tools/bin/gff3_sp_statistics.pl deleted file mode 120000 index 7e2bf4cdf..000000000 --- a/annotation/Tools/bin/gff3_sp_statistics.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_statistics.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_to_tabulated.pl b/annotation/Tools/bin/gff3_sp_to_tabulated.pl deleted file mode 120000 index eea18e7c1..000000000 --- a/annotation/Tools/bin/gff3_sp_to_tabulated.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/gff3_sp_to_tabulated.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sp_webApollo_compliant.pl b/annotation/Tools/bin/gff3_sp_webApollo_compliant.pl deleted file mode 120000 index d514ee92d..000000000 --- a/annotation/Tools/bin/gff3_sp_webApollo_compliant.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sp_webApollo_compliant.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_add_hash_tag.pl b/annotation/Tools/bin/gff3_sq_add_hash_tag.pl deleted file mode 120000 index 113eb2a14..000000000 --- a/annotation/Tools/bin/gff3_sq_add_hash_tag.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_add_hash_tag.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_add_locus_tag.pl b/annotation/Tools/bin/gff3_sq_add_locus_tag.pl deleted file mode 120000 index 6c6e0b7e0..000000000 --- a/annotation/Tools/bin/gff3_sq_add_locus_tag.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_add_locus_tag.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_create_stable_id.pl b/annotation/Tools/bin/gff3_sq_create_stable_id.pl deleted file mode 120000 index 53562128e..000000000 --- a/annotation/Tools/bin/gff3_sq_create_stable_id.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_create_stable_id.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_keep_annotation_from_fastaSeq.pl b/annotation/Tools/bin/gff3_sq_keep_annotation_from_fastaSeq.pl deleted file mode 120000 index 664af1971..000000000 --- a/annotation/Tools/bin/gff3_sq_keep_annotation_from_fastaSeq.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_keep_annotation_from_fastaSeq.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_list_attributes.pl b/annotation/Tools/bin/gff3_sq_list_attributes.pl deleted file mode 120000 index b2dd71fae..000000000 --- a/annotation/Tools/bin/gff3_sq_list_attributes.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_list_attributes.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_manage_ID.pl b/annotation/Tools/bin/gff3_sq_manage_ID.pl deleted file mode 120000 index 38f247ab9..000000000 --- a/annotation/Tools/bin/gff3_sq_manage_ID.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_manage_ID.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_manage_ID_cufflinks.pl b/annotation/Tools/bin/gff3_sq_manage_ID_cufflinks.pl deleted file mode 120000 index df7007ac5..000000000 --- a/annotation/Tools/bin/gff3_sq_manage_ID_cufflinks.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_manage_ID_cufflinks.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_mask.pl b/annotation/Tools/bin/gff3_sq_mask.pl deleted file mode 120000 index ab9e424aa..000000000 --- a/annotation/Tools/bin/gff3_sq_mask.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_mask.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_remove_redundant_entries.pl b/annotation/Tools/bin/gff3_sq_remove_redundant_entries.pl deleted file mode 120000 index a51227e6f..000000000 --- a/annotation/Tools/bin/gff3_sq_remove_redundant_entries.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_remove_redundant_entries.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_repeats_analyzer.pl b/annotation/Tools/bin/gff3_sq_repeats_analyzer.pl deleted file mode 120000 index b184bf956..000000000 --- a/annotation/Tools/bin/gff3_sq_repeats_analyzer.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/gff/gff3_sq_repeats_analyzer.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_rfam_analyzer.pl b/annotation/Tools/bin/gff3_sq_rfam_analyzer.pl deleted file mode 120000 index 929f433e8..000000000 --- a/annotation/Tools/bin/gff3_sq_rfam_analyzer.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/gff/gff3_sq_rfam_analyzer.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_split.pl b/annotation/Tools/bin/gff3_sq_split.pl deleted file mode 120000 index 152f70e6f..000000000 --- a/annotation/Tools/bin/gff3_sq_split.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_split.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gff3_sq_stat_basic.pl b/annotation/Tools/bin/gff3_sq_stat_basic.pl deleted file mode 120000 index 41340b6ef..000000000 --- a/annotation/Tools/bin/gff3_sq_stat_basic.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/gff/gff3_sq_stat_basic.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gmod_gff3_preprocessor.pl b/annotation/Tools/bin/gmod_gff3_preprocessor.pl deleted file mode 120000 index 71a65f2df..000000000 --- a/annotation/Tools/bin/gmod_gff3_preprocessor.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/gmod_gff3_preprocessor.pl \ No newline at end of file diff --git a/annotation/Tools/bin/gxf_to_gff3.pl b/annotation/Tools/bin/gxf_to_gff3.pl deleted file mode 120000 index ec85bd405..000000000 --- a/annotation/Tools/bin/gxf_to_gff3.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/gxf_to_gff3.pl \ No newline at end of file diff --git a/annotation/Tools/bin/info_BioPerlVersion.sh b/annotation/Tools/bin/info_BioPerlVersion.sh deleted file mode 120000 index 6f80c09cd..000000000 --- a/annotation/Tools/bin/info_BioPerlVersion.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/System/info_BioPerlVersion.sh \ No newline at end of file diff --git a/annotation/Tools/bin/info_find_perl_module.sh b/annotation/Tools/bin/info_find_perl_module.sh deleted file mode 120000 index 9d145c724..000000000 --- a/annotation/Tools/bin/info_find_perl_module.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/System/info_find_perl_module.sh \ No newline at end of file diff --git a/annotation/Tools/bin/install_WA_2_0_X.rb b/annotation/Tools/bin/install_WA_2_0_X.rb deleted file mode 120000 index 79b4d003c..000000000 --- a/annotation/Tools/bin/install_WA_2_0_X.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo2/install_WA_2_0_X.rb \ No newline at end of file diff --git a/annotation/Tools/bin/interpro2grid.pl b/annotation/Tools/bin/interpro2grid.pl deleted file mode 120000 index d300ae6cf..000000000 --- a/annotation/Tools/bin/interpro2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/interpro2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/ips2genomic.pl b/annotation/Tools/bin/ips2genomic.pl deleted file mode 120000 index e7264afa3..000000000 --- a/annotation/Tools/bin/ips2genomic.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/ips2genomic.pl \ No newline at end of file diff --git a/annotation/Tools/bin/jgi2gff3.pl b/annotation/Tools/bin/jgi2gff3.pl deleted file mode 120000 index 44ac72117..000000000 --- a/annotation/Tools/bin/jgi2gff3.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/jgi2gff3.pl \ No newline at end of file diff --git a/annotation/Tools/bin/junctions2hints.pl b/annotation/Tools/bin/junctions2hints.pl deleted file mode 120000 index edabe9755..000000000 --- a/annotation/Tools/bin/junctions2hints.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Abinitio/Augustus/junctions2hints.pl \ No newline at end of file diff --git a/annotation/Tools/bin/kraken_statMap.pl b/annotation/Tools/bin/kraken_statMap.pl deleted file mode 120000 index 704a7750c..000000000 --- a/annotation/Tools/bin/kraken_statMap.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/ComparativeGenomic/kraken_statMap.pl \ No newline at end of file diff --git a/annotation/Tools/bin/lastz2grid.pl b/annotation/Tools/bin/lastz2grid.pl deleted file mode 120000 index 6345ebd76..000000000 --- a/annotation/Tools/bin/lastz2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/lastz2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/maker_AEDmeanInGffFile.sh b/annotation/Tools/bin/maker_AEDmeanInGffFile.sh deleted file mode 120000 index a5f4d6275..000000000 --- a/annotation/Tools/bin/maker_AEDmeanInGffFile.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_AEDmeanInGffFile.sh \ No newline at end of file diff --git a/annotation/Tools/bin/maker_AEDplot.pl b/annotation/Tools/bin/maker_AEDplot.pl deleted file mode 120000 index 76aa87e8b..000000000 --- a/annotation/Tools/bin/maker_AEDplot.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_AEDplot.pl \ No newline at end of file diff --git a/annotation/Tools/bin/maker_check_progress.sh b/annotation/Tools/bin/maker_check_progress.sh deleted file mode 120000 index 12a048b0b..000000000 --- a/annotation/Tools/bin/maker_check_progress.sh +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Maker/maker_check_progress.sh \ No newline at end of file diff --git a/annotation/Tools/bin/maker_check_progress_deeply.sh b/annotation/Tools/bin/maker_check_progress_deeply.sh deleted file mode 120000 index 3f02a03d1..000000000 --- a/annotation/Tools/bin/maker_check_progress_deeply.sh +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Maker/maker_check_progress_deeply.sh \ No newline at end of file diff --git a/annotation/Tools/bin/maker_getRidOfContig_v2.pl b/annotation/Tools/bin/maker_getRidOfContig_v2.pl deleted file mode 120000 index 98b8fe6c5..000000000 --- a/annotation/Tools/bin/maker_getRidOfContig_v2.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_getRidOfContig_v2.pl \ No newline at end of file diff --git a/annotation/Tools/bin/maker_get_rm_genome.pl b/annotation/Tools/bin/maker_get_rm_genome.pl deleted file mode 120000 index 1989b9039..000000000 --- a/annotation/Tools/bin/maker_get_rm_genome.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_get_rm_genome.pl \ No newline at end of file diff --git a/annotation/Tools/bin/maker_merge_outputs_from_datastore.pl b/annotation/Tools/bin/maker_merge_outputs_from_datastore.pl deleted file mode 120000 index a8d01c043..000000000 --- a/annotation/Tools/bin/maker_merge_outputs_from_datastore.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_merge_outputs_from_datastore.pl \ No newline at end of file diff --git a/annotation/Tools/bin/maker_merge_outputs_from_index.pl b/annotation/Tools/bin/maker_merge_outputs_from_index.pl deleted file mode 120000 index 1d004584e..000000000 --- a/annotation/Tools/bin/maker_merge_outputs_from_index.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_merge_outputs_from_index.pl \ No newline at end of file diff --git a/annotation/Tools/bin/maker_moveResultsSafely.sh b/annotation/Tools/bin/maker_moveResultsSafely.sh deleted file mode 120000 index 5ffc3d4ff..000000000 --- a/annotation/Tools/bin/maker_moveResultsSafely.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_moveResultsSafely.sh \ No newline at end of file diff --git a/annotation/Tools/bin/maker_select_models_by_AED_score.pl b/annotation/Tools/bin/maker_select_models_by_AED_score.pl deleted file mode 120000 index 1a3a45127..000000000 --- a/annotation/Tools/bin/maker_select_models_by_AED_score.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_select_models_by_AED_score.pl \ No newline at end of file diff --git a/annotation/Tools/bin/maker_split_file.sh b/annotation/Tools/bin/maker_split_file.sh deleted file mode 120000 index c2db4c74a..000000000 --- a/annotation/Tools/bin/maker_split_file.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/maker_split_file.sh \ No newline at end of file diff --git a/annotation/Tools/bin/makergff2evm.pl b/annotation/Tools/bin/makergff2evm.pl deleted file mode 120000 index cafc1994e..000000000 --- a/annotation/Tools/bin/makergff2evm.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/makergff2evm.pl \ No newline at end of file diff --git a/annotation/Tools/bin/manage_backup.sh b/annotation/Tools/bin/manage_backup.sh deleted file mode 120000 index 54cc699da..000000000 --- a/annotation/Tools/bin/manage_backup.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/System/manage_backup.sh \ No newline at end of file diff --git a/annotation/Tools/bin/manage_species.rb b/annotation/Tools/bin/manage_species.rb deleted file mode 120000 index 15f26bc9c..000000000 --- a/annotation/Tools/bin/manage_species.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo2/manage_species.rb \ No newline at end of file diff --git a/annotation/Tools/bin/mfannot2gff.pl b/annotation/Tools/bin/mfannot2gff.pl deleted file mode 120000 index acdaa1f44..000000000 --- a/annotation/Tools/bin/mfannot2gff.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Converter/mfannot2gff.pl \ No newline at end of file diff --git a/annotation/Tools/bin/multiplotAll.R b/annotation/Tools/bin/multiplotAll.R deleted file mode 120000 index 39fba66e6..000000000 --- a/annotation/Tools/bin/multiplotAll.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/R/multiplotAll.R \ No newline at end of file diff --git a/annotation/Tools/bin/ncbi_get_genome_tree.pl b/annotation/Tools/bin/ncbi_get_genome_tree.pl deleted file mode 120000 index fac2cfe9d..000000000 --- a/annotation/Tools/bin/ncbi_get_genome_tree.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/NCBI/ncbi_get_genome_tree.pl \ No newline at end of file diff --git a/annotation/Tools/bin/ncbi_get_reference_data.pl b/annotation/Tools/bin/ncbi_get_reference_data.pl deleted file mode 120000 index 25387ed46..000000000 --- a/annotation/Tools/bin/ncbi_get_reference_data.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/NCBI/ncbi_get_reference_data.pl \ No newline at end of file diff --git a/annotation/Tools/bin/ncbi_get_sequence_from_list.pl b/annotation/Tools/bin/ncbi_get_sequence_from_list.pl deleted file mode 120000 index bd2477d48..000000000 --- a/annotation/Tools/bin/ncbi_get_sequence_from_list.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/NCBI/ncbi_get_sequence_from_list.pl \ No newline at end of file diff --git a/annotation/Tools/bin/new_species_WA_1_0_X.rb b/annotation/Tools/bin/new_species_WA_1_0_X.rb deleted file mode 120000 index ad12266f6..000000000 --- a/annotation/Tools/bin/new_species_WA_1_0_X.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo1/new_species_WA_1_0_X.rb \ No newline at end of file diff --git a/annotation/Tools/bin/orthomcl_analyzeOG.pl b/annotation/Tools/bin/orthomcl_analyzeOG.pl deleted file mode 120000 index 8f8a5302b..000000000 --- a/annotation/Tools/bin/orthomcl_analyzeOG.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/ComparativeGenomic/orthomcl_analyzeOG.pl \ No newline at end of file diff --git a/annotation/Tools/bin/pasa_create_chunks.rb b/annotation/Tools/bin/pasa_create_chunks.rb deleted file mode 120000 index b54a3d5ea..000000000 --- a/annotation/Tools/bin/pasa_create_chunks.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/pasa/pasa_create_chunks.rb \ No newline at end of file diff --git a/annotation/Tools/bin/pasa_find_duplicates.rb b/annotation/Tools/bin/pasa_find_duplicates.rb deleted file mode 120000 index 82a7f9df6..000000000 --- a/annotation/Tools/bin/pasa_find_duplicates.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/pasa/pasa_find_duplicates.rb \ No newline at end of file diff --git a/annotation/Tools/bin/pfam2grid.pl b/annotation/Tools/bin/pfam2grid.pl deleted file mode 120000 index b2b0b35c7..000000000 --- a/annotation/Tools/bin/pfam2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/pfam2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/prepare_matrice_by_window_v2.pl b/annotation/Tools/bin/prepare_matrice_by_window_v2.pl deleted file mode 120000 index cd9b2449f..000000000 --- a/annotation/Tools/bin/prepare_matrice_by_window_v2.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/ComparativeGenomic/prepare_matrice_by_window_v2.pl \ No newline at end of file diff --git a/annotation/Tools/bin/protein2hints.rb b/annotation/Tools/bin/protein2hints.rb deleted file mode 120000 index 70d8f6c2b..000000000 --- a/annotation/Tools/bin/protein2hints.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Abinitio/Augustus/protein2hints.rb \ No newline at end of file diff --git a/annotation/Tools/bin/rakefile_create_project.rb b/annotation/Tools/bin/rakefile_create_project.rb deleted file mode 120000 index 56e4e0443..000000000 --- a/annotation/Tools/bin/rakefile_create_project.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/EnsEMBL/rakefile_create_project.rb \ No newline at end of file diff --git a/annotation/Tools/bin/refresh_list.sh b/annotation/Tools/bin/refresh_list.sh deleted file mode 100755 index a486d2f1b..000000000 --- a/annotation/Tools/bin/refresh_list.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -#While looking at all script within the repo we skip all Deprecated folder and what it contains -for i in $(find ../.. -not \( -path */Deprecated -prune \) -name '*.pl' -o -name '*.sh' -o -name '*.py' -o -name '*.r' -o -name '*.R' -o -name '*.rb');do - - name=$(basename $i) - - #Populate scripts - #if script does not exist, create a link - if [[ ! -f $name ]];then - ln -s $i - fi - - #remove script no existing anymore - # test if file exists (test actual file, not symbolic link) - for j in *;do - if [ ! -e "$j" ] ; then - echo "$j link broken" - # code if the symlink is broken - unlink $j - fi - done - - -done diff --git a/annotation/Tools/bin/removeIsoforms.sh b/annotation/Tools/bin/removeIsoforms.sh deleted file mode 120000 index 6c77be46a..000000000 --- a/annotation/Tools/bin/removeIsoforms.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/EnsEMBL/removeIsoforms.sh \ No newline at end of file diff --git a/annotation/Tools/bin/reveal_hidden_characters.pl b/annotation/Tools/bin/reveal_hidden_characters.pl deleted file mode 120000 index f5746c89c..000000000 --- a/annotation/Tools/bin/reveal_hidden_characters.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/System/reveal_hidden_characters.pl \ No newline at end of file diff --git a/annotation/Tools/bin/rfam2apollo.rb b/annotation/Tools/bin/rfam2apollo.rb deleted file mode 120000 index 8f220f6d1..000000000 --- a/annotation/Tools/bin/rfam2apollo.rb +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Converter/rfam2apollo.rb \ No newline at end of file diff --git a/annotation/Tools/bin/rfam2grid.pl b/annotation/Tools/bin/rfam2grid.pl deleted file mode 120000 index 3fc8057f7..000000000 --- a/annotation/Tools/bin/rfam2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/rfam2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/rfam_scan-11.0.pl b/annotation/Tools/bin/rfam_scan-11.0.pl deleted file mode 120000 index 7261df859..000000000 --- a/annotation/Tools/bin/rfam_scan-11.0.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/rfam_scan-11.0.pl \ No newline at end of file diff --git a/annotation/Tools/bin/rs_PlotDensityAEDFromMakerAnnotationFiles.R b/annotation/Tools/bin/rs_PlotDensityAEDFromMakerAnnotationFiles.R deleted file mode 120000 index 279357f16..000000000 --- a/annotation/Tools/bin/rs_PlotDensityAEDFromMakerAnnotationFiles.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/rs_PlotDensityAEDFromMakerAnnotationFiles.R \ No newline at end of file diff --git a/annotation/Tools/bin/rs_PlotDensityOneRowPerFile.R b/annotation/Tools/bin/rs_PlotDensityOneRowPerFile.R deleted file mode 120000 index 666c7e794..000000000 --- a/annotation/Tools/bin/rs_PlotDensityOneRowPerFile.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/rs_PlotDensityOneRowPerFile.R \ No newline at end of file diff --git a/annotation/Tools/bin/run_blast2go.pl b/annotation/Tools/bin/run_blast2go.pl deleted file mode 120000 index 81067ffe0..000000000 --- a/annotation/Tools/bin/run_blast2go.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Wrapper/run_blast2go.pl \ No newline at end of file diff --git a/annotation/Tools/bin/run_interproscan.pl b/annotation/Tools/bin/run_interproscan.pl deleted file mode 120000 index 3b050d5c5..000000000 --- a/annotation/Tools/bin/run_interproscan.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Wrapper/run_interproscan.pl \ No newline at end of file diff --git a/annotation/Tools/bin/scaffold2AGP.pl b/annotation/Tools/bin/scaffold2AGP.pl deleted file mode 120000 index ea588d25d..000000000 --- a/annotation/Tools/bin/scaffold2AGP.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/scaffold2AGP.pl \ No newline at end of file diff --git a/annotation/Tools/bin/scipio2grid.pl b/annotation/Tools/bin/scipio2grid.pl deleted file mode 120000 index 59ede17d7..000000000 --- a/annotation/Tools/bin/scipio2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/scipio2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/screen_mito.pl b/annotation/Tools/bin/screen_mito.pl deleted file mode 120000 index c73f71d5d..000000000 --- a/annotation/Tools/bin/screen_mito.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/screen_mito.pl \ No newline at end of file diff --git a/annotation/Tools/bin/screen_mito_tblastn.pl b/annotation/Tools/bin/screen_mito_tblastn.pl deleted file mode 120000 index 15d8bacdd..000000000 --- a/annotation/Tools/bin/screen_mito_tblastn.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/screen_mito_tblastn.pl \ No newline at end of file diff --git a/annotation/Tools/bin/setup_rakefile.rb b/annotation/Tools/bin/setup_rakefile.rb deleted file mode 120000 index 2e1e85e61..000000000 --- a/annotation/Tools/bin/setup_rakefile.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo1/setup_rakefile.rb \ No newline at end of file diff --git a/annotation/Tools/bin/snap_train.sh b/annotation/Tools/bin/snap_train.sh deleted file mode 120000 index 8f8bf4c40..000000000 --- a/annotation/Tools/bin/snap_train.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Abinitio/snap_train.sh \ No newline at end of file diff --git a/annotation/Tools/bin/splitMultiFasta.pl b/annotation/Tools/bin/splitMultiFasta.pl deleted file mode 120000 index 2768aaa92..000000000 --- a/annotation/Tools/bin/splitMultiFasta.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/fasta/splitMultiFasta.pl \ No newline at end of file diff --git a/annotation/Tools/bin/split_gff_by_source.pl b/annotation/Tools/bin/split_gff_by_source.pl deleted file mode 120000 index 41ed9f3b1..000000000 --- a/annotation/Tools/bin/split_gff_by_source.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Maker/split_gff_by_source.pl \ No newline at end of file diff --git a/annotation/Tools/bin/sr_AllResu_AllIntervalPlotMean.R b/annotation/Tools/bin/sr_AllResu_AllIntervalPlotMean.R deleted file mode 120000 index b99994841..000000000 --- a/annotation/Tools/bin/sr_AllResu_AllIntervalPlotMean.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/R/sr_AllResu_AllIntervalPlotMean.R \ No newline at end of file diff --git a/annotation/Tools/bin/sr_MadeGraphAndRegByInterval.R b/annotation/Tools/bin/sr_MadeGraphAndRegByInterval.R deleted file mode 120000 index da89ff8c1..000000000 --- a/annotation/Tools/bin/sr_MadeGraphAndRegByInterval.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/R/sr_MadeGraphAndRegByInterval.R \ No newline at end of file diff --git a/annotation/Tools/bin/sr_Mean2col_AllIntervalPlotMean.R b/annotation/Tools/bin/sr_Mean2col_AllIntervalPlotMean.R deleted file mode 120000 index 889cbabbe..000000000 --- a/annotation/Tools/bin/sr_Mean2col_AllIntervalPlotMean.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/R/sr_Mean2col_AllIntervalPlotMean.R \ No newline at end of file diff --git a/annotation/Tools/bin/sr_Mean2col_PlotPoints.R b/annotation/Tools/bin/sr_Mean2col_PlotPoints.R deleted file mode 120000 index 238426f65..000000000 --- a/annotation/Tools/bin/sr_Mean2col_PlotPoints.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/R/sr_Mean2col_PlotPoints.R \ No newline at end of file diff --git a/annotation/Tools/bin/sr_Mean2col_PlotPointsAndRegs.R b/annotation/Tools/bin/sr_Mean2col_PlotPointsAndRegs.R deleted file mode 120000 index 05d294e93..000000000 --- a/annotation/Tools/bin/sr_Mean2col_PlotPointsAndRegs.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Util/R/sr_Mean2col_PlotPointsAndRegs.R \ No newline at end of file diff --git a/annotation/Tools/bin/sync_dat_and_embl.pl b/annotation/Tools/bin/sync_dat_and_embl.pl deleted file mode 120000 index c0a19e615..000000000 --- a/annotation/Tools/bin/sync_dat_and_embl.pl +++ /dev/null @@ -1 +0,0 @@ -../..//Tools/Util/sync_dat_and_embl.pl \ No newline at end of file diff --git a/annotation/Tools/bin/sync_user_db.rb b/annotation/Tools/bin/sync_user_db.rb deleted file mode 120000 index 3208aefe6..000000000 --- a/annotation/Tools/bin/sync_user_db.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo1/sync_user_db.rb \ No newline at end of file diff --git a/annotation/Tools/bin/synplot.R b/annotation/Tools/bin/synplot.R deleted file mode 120000 index 4a974f126..000000000 --- a/annotation/Tools/bin/synplot.R +++ /dev/null @@ -1 +0,0 @@ -../../Tools/ComparativeGenomic/synplot/synplot.R \ No newline at end of file diff --git a/annotation/Tools/bin/synplot.pl b/annotation/Tools/bin/synplot.pl deleted file mode 120000 index 24e597085..000000000 --- a/annotation/Tools/bin/synplot.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/ComparativeGenomic/synplot/synplot.pl \ No newline at end of file diff --git a/annotation/Tools/bin/tblastn2grid.pl b/annotation/Tools/bin/tblastn2grid.pl deleted file mode 120000 index 4f072cce9..000000000 --- a/annotation/Tools/bin/tblastn2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/tblastn2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/tester.sh b/annotation/Tools/bin/tester.sh deleted file mode 120000 index e36ada939..000000000 --- a/annotation/Tools/bin/tester.sh +++ /dev/null @@ -1 +0,0 @@ -../../NBIS/GFF3/test/test_gff_syntax/tester.sh \ No newline at end of file diff --git a/annotation/Tools/bin/transplant.rb b/annotation/Tools/bin/transplant.rb deleted file mode 120000 index 54d6dbaed..000000000 --- a/annotation/Tools/bin/transplant.rb +++ /dev/null @@ -1 +0,0 @@ -../..//WebApollo/apollo1/transplant.rb \ No newline at end of file diff --git a/annotation/Tools/bin/transposonPSI2grid.pl b/annotation/Tools/bin/transposonPSI2grid.pl deleted file mode 120000 index 2d6a007d3..000000000 --- a/annotation/Tools/bin/transposonPSI2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/transposonPSI2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/trnascan2grid.pl b/annotation/Tools/bin/trnascan2grid.pl deleted file mode 120000 index a38079378..000000000 --- a/annotation/Tools/bin/trnascan2grid.pl +++ /dev/null @@ -1 +0,0 @@ -../../Tools/Grid/trnascan2grid.pl \ No newline at end of file diff --git a/annotation/Tools/bin/unlink_all.sh b/annotation/Tools/bin/unlink_all.sh deleted file mode 120000 index 7203c3675..000000000 --- a/annotation/Tools/bin/unlink_all.sh +++ /dev/null @@ -1 +0,0 @@ -../../Tools/System/unlink_all.sh \ No newline at end of file diff --git a/annotation/Tools/test/genome/pcoprophilum_scaf_1.fa b/annotation/Tools/test/genome/pcoprophilum_scaf_1.fa deleted file mode 100644 index b25cb44f1..000000000 --- a/annotation/Tools/test/genome/pcoprophilum_scaf_1.fa +++ /dev/null @@ -1,91473 +0,0 @@ ->Pcoprophilum_scaf_1 -CGTGAGAGGATCTGTGAGCTCCACTCCGCTGCTCATTGGGGCTATAGGCGTATCCACCAG -AGATATCCATGGATCTCTATCTCTACAATCCGGTATACGATTAAGAAGGAACATGAAAGG -CGTGCTGGCGTTTCGAAGCCTCGCTCTGGTCGACCAAGGAAGCTAGATACTACTGATAAA -GTACGTCTACTGGATGCTATCTCAGAGAATCCACGTATTACCCACGAAGATCTACTTGCT -GAGGTCTCTTATAAAGTCAAAATCGACTCAATTCGACGCCTCTTAAATACTGAGAACCTC -CGGAAGTGGAGATACTCTTAGAGACCTTACCTAACGGAGGAACACGCGCTAAAACGCCTC -CGTTGGGCTACCCGATACCGTCACTTTACAAAGGAGGACTGGGCCCGGGTTTTCTGGTCT -GATGAATGTACAGTGGAACGTGGGATTGGTGTACGTCGAGAATGGACCTTTGTCCGTCCA -AAAGACCAGCCGAAAGAGGGACAATGCCAAGGGTTACCACACCGAGGGAAATAAGTAAAG -CAGATGTTTTGGGCTACTTTCTCAGGGGCTACCCGTCGGACTGGGCTTATCCCGCTCTTT -GGGAACCCAGAGGCTGAGCGTACAGGTATCACAGGCCTTGTTATTGAAGAGTTATACCGG -CGGATCCTACCTACCTTGATCTTGAACGAAGGGGCTATTTTTCAGCATGATAATGTGCCT -ACGCATACTGTGTACGTTGTTCGGAATGCTTTATCTGAAATGAATATCGAGGTTATAGAA -TGGCCACTACACTCACCTGATTTAAATCCAATTGAAAACCTCTGGGCGTTGTTGAAAGTA -AAGATATACGAGCTTCGTCCCGATCTCCTACATATGGGTAACAATGATATAATAAAGGAA -ATATTGGTGGCCACAGCACAGCAGGCATGGGATGAGCTAGAGCTACGGCACCTAGAGCAT -CTCTCCGAGACTATGCCGCACCGGGTGGAAGCGATTATCGAGAGCCAGGGGTGGTATACT -GCATACTAGATTCCATAGTATGAATTCATGGAGGTTTTGACCGGCAGTAAATTATATAAT -CAAGGGGGTGTGGGGCCAGTGTTGTTAACACTTTTGCGTAGTCACTGTATTCGGTTATTT -TCATTAGGTCGTGCATGGTTGCTCCGCATAGCGGTAATTACATTACGTTGGTGTAAGCTA -GATTAGAAGAGGTAACTCCGAGGAGCCCGCAGTGGGTGCTGTACCACGTGAGATTGTATA -TATCCATCTATCCAAATGCATAGTAGACCTAGTGTATATCAGCAATACTGAAAGATACTC -TTATCAAAATCAAATCTTGTAAAGTAGTGGTGCACTCATTATGATCCATTTCGATCTATG -CCCGACCTAAGCCTGTCCACCGGTCCGTTCACAAGCGCCGATAAAACAATAGCCATAGAA -TACATATGTATGTCGTGAGAGAAGCTCTCTAAAGATCAGGAATATGCACGCTGAAAAAAA -TTGCAGCACATGGGTCAAGAAGACTTTGGTAAAGCCAGACGAAGATATCAAAGTGACTCT -CCGTGAAGAAAATTCGTTGCGTTCGTGAGCCATCACATCGGATCAGAAATTGACGAAGCC -AGTCCCGAAAAGGCAGGCTCAAAAATCTCTTGTTGGAACCTCAATATCAAGACAATGTCA -CAAAATCAGCAGGTTCCAACTGACATCCTAGGATTGCCACTCTTACTCGCTGGAAGCAGG -AGCTTCCGCCGAGCTAGGACCAGGGCCGCTTGcaccgccacgaccacgtccgccacgacc -gcctcgtcttccaccacgtccacgaccCTCGGGGGCACGACCTTCGCCATTCCGGCCCTC -AGGAGCACGCTTAGCATCATTCCGGCCTCGGCCACGGTTTCCCTGTCCGTGACGGCGGCG -TCCAGCAGACTTGGTACCGTTCTCACCCTGAGTCGAAACATCAGCACCAACCGAGCCATT -ATTCAACCCCGCATCAGCGCTAGCCGGGGTAGCCTGAAGGCCAGTGTCGGTCTCATCAGG -GTTGCGAGGAAGTTTGACCCATTCTCCGCTCTCATCGGAAGTAGGAGCCCAGGTTTGCTC -CGCGGTAGAGTTTCCAGCATCAGAAACCATAGTTTGCGCAGGAGGAACCACCTGATCAGT -TTGGGAAGCTGCAGGCTCAGCCTGAGCGCCGAACGAGCTATCCTGAAGCTCGGTCAATCC -AGCATTGGCCATGGTGGGGTCGGTCGTAGATTCTGTGGCGGGGGGTGCCTCCGAGACAAC -TTCGGTAGTCTCCTCGGGCGGAGCGAACTTGTCAGATGCTTGCTTCACATCGCCATCTAA -GGAGACATATTGTCAGCAAAGTGCCTTCAAGCATATTTGGCGGGGGCGGGGCATTCACTA -CTGACAGGTAAAATCAAGAGTGGTGCCCTCAACAGCATTAACCTTCTCATCGGCACCCTC -GGCAATCTTCAGTAGCGAGTTGACTGCAGCCTGATTGCCAGCGTAAACCTCGAATAGAAC -GCCCTCGAAAGCACGGCTCTCAGTGGTGACGGCATCACCGTCGCGTCGAAGGATGGCCGC -GGCGCACAAGAATTTAGTCACGGTATACAACTGCTGTCGCAGTGCCTTCTTGGAAGCCTC -AGTCGCATCGGCAATGGCGTTCTGACGGACAGCTTCGAGCTCCGCCTCATGCGCCTTGAC -AAGTGCGGCCTTCTGAGCAGCCAGGCGATCCTCGTATTGAACAGCGAATTGCTTGTAGTG -ACCAATCTGCTCCTCAGCCTGGGCAACGGTGGCTTGGAGGGTAGGCTTCTTGAGAACCTG -GGCCTTCTGGTCGTTGTTGATCTTCTTCTCCTCAATCAGTTCATCAAGCGACTTGTCGGG -GTTTTCGGCGATGATGGCGTCGACTTTGGCGAGGGAGTTCTGGTCAAAAAAAAGCAGTTA -GAAACTTATGGGAAATCCCATCGAAATTTGATTCAATAATGACGCACCAACTTCTTCATG -GCATTGCGGAGGGTCCTAAACGGGGAATACAGTTAGCTCACTTTCACCTGCGCATAGGCA -ATAATCACACAGCCAGGGTCAGAAGATTTTGAACATACTTCTGCAATTCCTTCAAATAGC -TACTCTCTTGACCATCAGAGCTGGGACTCTCCGGGTCCTTGGAGACGCTTGAAGCTCGGG -AGTTGCCCTTCTCCTCGTCCTTGAGCTTCTGAGTAGCAGAAGCGGGAGGCATGATGGGGA -TGTGGGTGACACAAATATTAAAGTATTTCCGAAAAGCTTGTACAGGTCTGAAAGAATTAA -ACGTGATAACGTTGAAGAATTGAAGAAACAGATGGGTGTTATGGAAGAAGAATCAATACG -CGGGTCGGGAACACCTTTCTTGGTCGGTGTGGCCCACTGGCTTTGGGCAGACTCTACGCG -TTTAGGGGCGAGGTAAGCAAATCACAGTATTATTCATGTCGATACACATGTGCAAACTAC -CAACTGAATTTGTTATTTGATCTCTTCCCTTGTATTGCTCCAGGGTATCATATTCACATT -TGCCCACAACGCCATAATATCACAAAAATGCACGCCAATCTCAGCACAGTAGTCATTCGC -CTTTCATATCCATGCTCAATGGCTCTTTACCAAAATGACAAATTGAGATTCTGTGGCGGG -CTTTTCTATCCCTGGAACCTGCGCAGTGTTTTCTAGATGAACCCATACCTGTCAGTACAT -TCTTATAAGTCTTCCAGACAAGCTCTTAGGTCGATCTCAGATGACGCACCTCAGTGACTT -TGCGCACAGTATTAGCTGAGATGTGCTTCTCTCCAGACTTGCGCGACCGAATAGACGCCT -CACGCATTAACCTATGATCTGTTAGCTCACCAGTTCAGTCACGTCGGTGGTAAGAACGCG -AATCGAAGGGTTGCGATCGGTGGACCAAGCCACAAAGATATATGAAGACGCACTCTTGCA -TGAATAGCATGTAGTCTAAGAATATCTTTTAATTGATATTAGCGACTAAAGAACAGGAAG -GGTTGATATCGGGCACATAGCGTACCAGGATGTCGGCATTCTTGCTCATGCTCCGATTGG -AGTGGGCTTTGACAATGCGCTTGACTGTTCCTCGTGGATAAAGCTTCTGAGCTGCCATGA -CAAGTAGAATAGAGGAGACAAAGTTGATGATGGTTGTTCAATGCTTACCCCCAAAAAGGA -AGTGGGGAGCTCTCTAATCTGATTACTGTAATGATCTGTTTCTTATGTAATTTGGTATTT -GTATTTCCGGGCTCCACAAACACGTGAGATCGTACGATGTAAACTGTAGCTGGAGATCTT -TATTGTTATATCATTTTTGCAAATGTATATTGCCAATGTACACTTCAACGCAGCCGTCGA -AGAATTCGCCAGGCTGAAACACCAGCAATCGAGCCAATTGCTACACGCCAGGCTAGGAAG -ACCCATATTGCCATGCGCAAGCGATCATAATACCCACGGAGCATGATCCCAACAGCATAA -GAAGTCCCATCTGTTGCTTGGTCTGTCGGGCCATAATATCGTCCCACTCCGAGGCCACCG -ATTCCCACTTCATTGTGATCATGTTCAAAATCCGCTTCCTGAGTGACTGGGAGTGCAGCC -GTTTCCCCATCAGAGGCCGTGTTACTAGGACGAGCACCTGATCTCACATTGAGTGCATTA -CGCTTCCGCAGATTATAAGTCACAATGCTCACGATCAACCGCAGATCAGGCACGGTAAAT -GTTGGATATTTCCACAGCTGCACGGCCTGGTGGGGAGGGAGCATATCCAAGAAGTCAGCC -AGGACACCGAGAGTGAGACCCCAGAGGAGCAGCGGACGGGTAGTGTCACTCGAATCTTCT -TGTCCACTCAGACACCAATTTTTGAAACGCTGAATCAGGGAAGTTTGGGATATATTCTCT -TCAGGAATAAACCCTGGAGTTGTGGTGCATTGAAGGCTCTCCGACGGGAGCAGCCTCACG -GCCGAGAACTGCATCCAGCCCATCAGAGATCGAACTGCAAGACGGGTCAGAAACCCACCT -TGTCGACCGAAGCGCTGCGACATATCCACATATTCCACCGTCCGCAGCAATGGTGATAGA -AGGGCGCGTAGGGGAATCCAATGTGTTGATGCTACTTCCGTGGGCTGCAATCGAAGGATC -GGAGAGTCGCTGCGAGTCGTGAGAAAGACGAAGGGACAAAGAATCATCAATCTGCCATTC -AAGTCAGTAATACAGTTATCAGCGACGGAAGTAAATGGCATTCGTACGGTTCAGATCCCC -AACTGGTAGCGACCACCCGCTCGGGGAGATTGCCCACACTAATGGTGTCATCCGTTGTTA -AATCAAATCCGATTTCCTCTGAGGCTTCTCTGATAGCTGCAGACTTGTCATCTGCGTCTT -CCGGGTCTCGCTTTCCGCCCGGTAAGGCAACATGGCCAGTCCATCGATCCCCCACGCGTG -AAGCACGCTTGATGAACAGAACCTCCGGTTCGCCATACTGGACCCATGGCTGTGAAAAGA -ACTCATCTAAACGTTGTTCGACTGGCTGAGATGCATCTTTTATGGGCTTTGAATTGGGCC -AATGGTCGTAAGTTGGTCGTACACGAACAATCAAAGCAACCGAAGCGCGTTTCTTGCATG -ATGGGGGGTTTGGGACGTAAGGATAAGGTCGACGATGTAGGTCCACTAATACGCCATGCA -AATGATGGGACAGACCCGGGATTTGTCCAGACTTCTCCATCAAGTTGAATATCTAATACG -TACAATTTGTACTATCAGATGTCAACAGATTGGATTGTGGAGGATATCGGTATCAGAGTC -TCCCTTGGTCCCGGGTATCGGTCTAGCCCATACTCCCCCGTATTCTCAGGTAAAATAACA -TACCATGTCATCGGAACAAAGATTGTCCGGATAGCTGGTCTATGGGGCTCTGATTGGCCC -AATCTACCCAGCACGTGCGTCTATCGCTGGCTGTCGAACGCTTTCAGTTTGAAGTTGCAG -CTTAGTTTCAAGCTAATGAGCTATTTTTCTTTGGTGTCTCATATTTTTCAATCTATTAAA -TATCAAACTTTGCTTTTTCTATCGATTTGTCTCAATCATCCAGGGCTCTCATCCATTCTT -CCCTTCCGAATACCCCCAATCAGACGTGCACACCCTATTTGAGTAACAACTGCACGACGC -CATTTACAACGGGACTAGCGCAAAATGCGGTTTGGAAAAACCCTCCGTGCGGCAGTGTAC -CCTCCCTGGAAAGGGAAGTACATTGACTACACCAAGCTCAAGACCTTGCTAAGAGAGAAT -GATGTCACCCGGGACGGAGAAGACGCCAGTGATTCCGATGACGATCAATGGACTGAACAA -GATGAGGAGGCCTTTGTCCATGAACTGCTCAATGTGGAACTGGACAAGGTAAATTCTTTC -CAAGCAGAGACATCACAGCAACTCCGAGAACGGACAACAGCATGTGAAGTTAAACTACGA -CCCTTGGCATCTACTCCGGAGCAAGAAATCCCGACCCTCGATGAACAGACGAAAAGGACT -GTAGCCTCAGAAGTTTTACAAGAGCTCGATAATATCACCAAGGAAGTCAGTGCCTTGGAA -AAGTACAGCCGAATCAACTTCACTGGCTTCCTCAAGGCTGCAAAGAAGCATGATCGCAAA -CGAGGGGCCCGATACCGGGTGAAACCTTTACTCCAAGTGCGACTGTCGCAACTGCCATTC -AACTCAGAAGACTATTCGCCGTTGGTCCACCGGCTCTCGGTGATGTACTCATTTGTTCGA -GAGACCCTTGACCACGATATCGTACAACCAAGGGAGACAGAGCATGGATTTGGCCGTGAG -ACCTATTCATCTTACAAATTCTGGGTGCACTCAGACAACGTTTTGGAGGTCAAGACTTAC -ATCCTTCGTCGATTGCCAGTTCTTATCTACAGGCCGGGAACTTCCAAAGACCTCGATACC -CTCACTGAAGATCCCACCATTACTTCTCTCTATTTCGATTCCCCTCAATTTGATCTCTAC -AATCAAAAAGTCGCACGAGCACCCGAAGCCGGATCTTTGCGACTGCGCTGGACGGGGTCA -CTGAAAGACAAACCTCCGATCCACTTGGAAAAGAAGATTGTGACAGACGACGACGAGAGT -CGGCAAGTCAAGGTACAGCTCAAAGACAAGCACATTAAGGAATTTTTGGACGGCGAGTAT -CGTTTCGATAAAAAGCTTCATCGCATGGAGGATTCAAACAACGGAGAGTCGGCGGCTGCT -GAAGCCTTGAAAAGGGATGTGGATGAGCTGCAGTCCTTCATCAAAGACAATGACCTGCAA -CCCATGCTCCGAGCCAACTATACGCGCACCGCCTTCCAAATTCCTGGCAATGATCGGATC -CGTATTTCTCTCGATACCAACCTAGCGCTCATTCGGGAAGATTCCCTCGACAGTGAACGT -CCATGTCGCGATCCAGCTGAGTGGCACCGTACAGACTTGGATGATGCAGACATGACTTAT -CCCTTCAACGGTGTCAGGACTGGAGAGATCACCCGCTTTCCTCACGCTTTGCTCGAGATC -CAGCTTCGAGGCAAGGCCCACAACACAGAGTGGGTCAAGGACCTAATGGTCTCCCACCTT -GTTAAGGACGCTCCCCGGTTCTCCAAGTTCGTCCATGGCGTTGCCTCGTTATTTGAAGAT -TATGTCAACAGCTTCCCCTTCTGGCTTGGTGAGCTTGAAAGCGATATTCGCCGTGATCCC -GAAACTGCATTCCATGAGGAGCAAGAGCGACTGGCTCGACGAGCCGAGGAAAATATAGCC -GTCGGCAGCTTCATGGGAGAGGCGCGCAGCCCAGGTGTCCGCCCACAGGTCGGATCACCC -TACCATCAATACTCTGCCACCGGCTCTCCCTCGGCTATACGGAGAACCTCAGCAGTCACA -GAACCTGCAGCGCGGCACCCACGTCCCTCAATCCCAGAACCTCAACGCCGTGATACTGAA -CCCGCTCCCGAACCTGCAGAAGAACCTGAGCCACCCACTAGACTCGAGTCGGTCTGTCAA -TTTTTGGGGCTATCTCCTCAACGCTGGCTTGGAACGGGATCTGTCTCTTTGCCGCCTGGC -GTTCGACACCCCGGCGTCTGGATCAAAGATGCTGGGCCCGTTCGGGTGGAAAGCAAAGTT -TACCTTGCCAATCAGCGAACATTTATCAAATGGCTTCACATCAGTATCCTCCTGTCGAGT -TTGTCATTGGGCCTCTACAATGCTGCTGGCAAGCACAATCAGGTAGCCCAAGCACTGGCT -GTTGTCTATACATTCTTTGCCATCTTCGCTGCAGTCTGGGGATGGTTTATATACGAGAGA -CGAGCCCGCCTCATTCGCCAACGGAGTGGAAAGGATCTAGACAACATGTTCGGTCCTATC -GTAGTTTGTATTGGTTTGGCCATTGCCTTGGTTCTGAACTTTGTCTTCAAGGTTAGACTT -CCAGATCTTCTCGCAGCAACCCTGGCCTGCTCGCTAACTCGTCAACAGTATTCATCGGCA -CTTGACCAAGGCCGCAATGACCCACTTCCTAGTGTGCCTGTTCACTTTAACTCATCGGCT -GTCTCCGATAGCTCAGGAGGTTCGTGGGGCAACTTCAACCAGGCTCAGCAGATTCTGTCA -TAGTCATTCTGGACCTCCTCTTTTCTATTTAAGTATGTAATCAACAAACCCCACGGCCTT -CGGACCCACAAGTCCCGAGGAATTTTGTGCTTAAAACTTACGTGGAGCTTGAATGTATTT -GGATAATAGCGTGTACATCGGAATAACATCGGGGACACTGTGGGATTAATAATTGTCTCA -AGTATAGAGCACCCGTGGACCTTTTGGAAGATTGGAGATCCCAAAAGATGAAAGACAAAA -AAACTGCTCCCGTTCAGGGCATCTGCATAATGTAACGCTTACAAATCGAGACTGTCCCCA -ACATCAAAACGACAAAGCTCCCCATGAAAGCAATATCCTCAAATTCAACTCTCTCACCTA -TATCCTTCTGGTATCTTTGATACGATGACGAACTTCCAGATCCAGATCATCTCTGACACC -GTCTGCCCCGTAAGTATTCCGACCATGTACACAATTATATATATATCCTTCCACGCAAAT -TCCCCCTCAAATCCACAAACTAACCATTCTAAATAATAAAAGTGGTGCTATGTCGGCTAC -CGCCGCCTCTCACGAGCAATAACTATTCACCAAGCCAAGTACCCAACAGACACATTCAGC -CTGCACTGGAAAGCATTCTACCTCAACCCGGCAGCCGCCGAATATCCCGGTGTGAACAAA -GTCGAGATGTACAGCCAGAAATTCGGAGCTGAGCGCATGGAGGCAATATTTGCGCGTCTA -TCTGCTGTGGGCAAGAGCGAAGGAATTCAGTTTAGTTTTGGCGGCAATACAGGTTCCACA -CGGGATTCGCATCGCTTGCTCTGGTTTGCTGGGCAGCAGGAGGCTGAAGAGACGGTTAAG -AAGGAGGATGGTGTTGTTGGTGGGTTGCAGACGAGGGTTGCGGAGCAGCTGTTTCGGAGT -TATTTTGAGGAGGAGAAGAATATCACCAATTTGAAGGTTCTCGTGGAGGCGGGTGTTGGG -GCGGGATTGGATCGGGAGATTGTGAAGAAGATGTTGGATGAGGATGTTGGTGCTCAGGAG -GTTGATTTGGAGGCCAAGGCGGCAGCCAGGAGGTTGGTGTCTGGTGTGCCATATATCTCC -GTACAGGGGAGATATCATGTTGAAGGCGCCGACGAACCCGAGATCTTCATGGAGATCTTT -GAGAAAGTGAAGGTCGAACAGAAGGAATGAAGGATCATACAGGTCGAACGTGGAATGATT -TCAATATGATTATGATCATCTGTATAGTTCTATCGACACACCATAGCAAGTAAAACTATT -CGGTTTATCAAGCTAAAGCGTAGTTGATTCAGTCTTGATGCATTTCGTGTCATATCTGCA -CAAGAATGGAATGTTGAGGCAGGGACACTCTTAGTGACCTGTCACTTCCAGCCCATAAAG -CTAGCGAGCATCCCCGGTCATGAAAGTGCACAAAGCCAAGGCCTGTCCTGCAACAAGTGT -CTAGGTCATCTGCaagaaaagaaaaaaaaaagaaagaaaaaaaatcaaatcaaatcaaaa -acaaCGTGGAGAGAAAAAGACGTTGATGATACACAATCAAGATGAAAATAGGAAATAAAA -ATCAAGGATACAGCCCAATGAAGAGCAGAAAGGGCAACGAGGAAAAATATGCAAGTGCAG -TGGCGACTATGAATGGCCAGAGGCTCAACTCCCGGCCAAGTCATCCCGCAAAAGAAGTCA -TGATAGATCGTAGCAAGTCATGGAACTTTAAAACTCCATGTTCAAATTCAACGGGCATTC -AATGGGTCGTCTGTGCTTTGACCTGATGCAGTTCGTAATTGGGAAGATAATCGAACCAAT -CAGTCTCAGGATTATAGCGGTAGGCCTTGAAATCTACCCCATATGGTTTGGGCTCAAGCA -TGTACAGGACGCCCAGCGGGACATCAAGATCAGCAACCTCGTCACGTTTCAGACCACGGA -AGTAGGCCAGGAGCACGCGAGCAACGGATCTATGCCCGACCAGGAGCACATGATCGGTAG -TGCGCTCTATCTCAACAATGACTGCGCGCGATCGATTGATGATGTCGAGATAGCTTTCTC -CACCTGGTCCGGGGTATCGGTAGAAGAGCTTATCTTTCTTGCGGTGGGCATATTCTTCGG -GGTACTGCTCTTGGATTTCCTTGTATGTCATACCTTCCATCCTGCCGGAATGGATCTCGT -CTAGCATCCTCATCTGCTTAACATCGTAATCTTCTTCGTTGAAGTATTCAGCAGTTTGGA -CTGAGCGTTGCATCATTGAGGACCAGACACAGAAGTTGCGTGGCCCTTCCCGAGGAATAT -AAGATGGGTTGGGAGGTGTGCTATCACCGGGTCGAGGCGGGAAGTGCGCGAGAAGGTCTT -TCTGTCGCTGATATGCCTCCCACTGCTTCCGTTGGTGATCCACGAAGCGAGCCAATGCCT -TGGCATACCTTCTCCCGTTCTCGCTAAGGTCAGAGTCACCACCGATGCGGCCTGCGGCAT -CGTCTAAGCTTTCGCCGTGCCGTGTGATCCAAATTTGGCGCGGAGAAAGATTGAAATTGA -GCAAGTAGTAGACAACTTGAGAAGCAAGGAAACCGTGAGTCTGGTGAGCAACGACTTTAC -GACCGACGTCGATCATCTGGATGTAAGCCATACGATGCTTCTCCTCGTATTCGCCGAGGG -GAACGTAAGACTTTTCGTACAGAGCAACACGCTTTTTGAAGTCTTCAAGTGCCTCGACTG -GGTCCTGGCCTTTATAGTCAGGACCGGAGAGTTTTAGGCGCATGTTAGCCTCAAGCAAAT -CTTGATCCACGCAGCTGCTCTCGAGGAACAGGATGTTTAGCTCAGGGCCAGCGCGATTGC -GAATATGGTCCACAATCGCTTTGCGGCGTTCCATGGTGCTGTTAGTCGCATCAAGGATAC -CGATACTGCCACCTTCCTCCAAAATGTACTCAAGCAACTCATCAAGAGTATCCAACGCGA -CCTGCTCTCTCATCTTGACGGCGCGCTGATTCTGGGGATCAAAGAAACTGGCGGTCTGAT -CCATTGGTTCAGGGGAAGCTTCATTGATAGATCCTTCATTCTCACGAGATTGCGCAGGGC -CCGCGTCGACCGAGGGTACAACAGTACTTCCGTTTTGAGAAACTTGTTCAGGATCTTCTC -CATTCACGAGAATCTTGGCTGGTACCACTGGAGGGGGAAGTGGTGCCTCATTCTCTGGCG -GGGAGACATCTGCTGGGTTCAAAGTCGTGCCAACGCTGACACTCAGCCTGCGCACTGAGT -CCACGAGATCCTTGTGGAAGGCACTTGGTTTCCGGTCCTGGCGATCAATCGGGGCAGGTG -ATGGGGACTTTCCAGCGGCCACACGACGACGCTGTCCAACATTGAAGATTTCGGTGTCAT -GTTGGAGCCAGTTCAGATAGCGGGCCAGCTTCTTGGTGATATAACTTTTGCCTCGGGCCG -GGAGGCCGACCATGACAATGACAAGCTTTGCACCAACATCCCGCTGGGCGATGCGGCCAT -CTGGAGACACCTTCGATTTAGTCAATCCCGGAATATCGAGAGTAGTTGCTCGCACTCGAC -CGGGAGTACCGGGAGGGTGGTAAAGACCCGATCTACAAGTATCAGATTAGTCACAATTTG -CCAAGCGCATCCAAGAAAGTCACAAGCAAACGTAAAAACGACACGTTAAGAGGTGGACTT -GGTTCTGGGGTAAAGTAGATAAGCTCATGATAACGCTGCAAAATGCCACAGACAGAACTG -ATCCCTCTGTATCTTGTTGCCGATCATACTGGCCTCGTTCCGATAGGTCTATAGCAAAGA -CCAAAGTGGTCTAGACAAAAAAAAAGGAGACCAGACTTACATCTGTGGAGATGGAGGTGC -GGTAGAAACCGGAGTGTCTGTCAAGGCCGTTCCGACTCCGCCCTTAGGCGGCAGTGAAGA -TTTGTGGGAATCGGAATGCAGGTCATTACCTAGCGCTCGGTTGACCACAGTTTTCGGAGG -CATAAGAACTCCTTCTTTTCCCGATCCCGAGCCATTAGACGCGCCGTTGGGCTTAATATT -ATTCCTCTGATCCCCCGTCTCGGAGGCCGACGTCATGGCCGACCGTGCAATGCTAGAATG -TTGCGCCATCCCCCGCGGGGATATTCCTTATTTGTGTGCTTGATTTAAAAAAAAAAATGG -GAAAAGGCTTTGCTCTCTTTCAGAATCTCACAACGTCAAGTTGACGAAACTTGGGTAGGT -AATTCGATGAGAAAAAGAAGATGTCGAGGGGGAGAAAAGAAAGTGTCGGAGGTTTGGATG -AGTCAACCAACGGGAACACCGGGGCACTTGTGACCTGAAATATTCAAAGGAGACTTGAGA -TATTCCAGGGATTAAAAGAGGTTGGGCAGGGTTTTGGATCTGGGCTTCTTTTTGCAAGGA -TATATATTTTGTGAATTTTGATCTCTGGGGAGGATAATACCAAAGTGCCAAGGATGTCGA -GGTAGTTCAAGGAACCATTCAAATGCAACTTTAAGTGAGAAAATGCTTTGAAATGATGAA -ATTACGCAGGTGGACATGGGCCATAGGGTGGATCGATGGCTGAAAGATCGCCCATATATG -ATGTCAAAGGGGGAAAGAGGAATGGATAAGAATCTAAGTATTCAGATATTCAGATCAGCA -GCTCCTATACACATATTGATAGGTCTCATGAAAATCAAGCATCTGTCCTATCTTCAATGA -TGCTTGACTTAAAGCATATGGGAAACTACAGATTCAGAAAACATAACATAACAACAAAAA -TAAGTGTCGAACTACAACATATACTCACTCTCGCACCTTTTGCTGCGATATCACTGATAG -CCGTGAGTTTCTCGTGTCCACCTCTTTTCAGACGTGGCTCAACAAAAGTACACACATACT -TCCCCCATATTTGAAATCGAACTCAGCCTCCAATGGAGAAGAAACCAGAAATTATTGGAA -ACACCTTGTGTGACCTGAATCTCACCGCTCGAGCTGATCCATACTCGGCATCCTTGGCAA -CTTTCGACACAGATTGATCTCCGACCAACTGTGGGCGTCCATTGCTGCACACTGATCTCC -GCTGCCGTAAGCCCCGAAAGGTAATACATATTCCGTGGTCAGGTGCGGAATCCTTGGCGC -TAGGCGGTAACGCAGGGGGTATCACAGATGCCTAGATAGCCTCTAGGGATATTGAATACG -GTGTGAATGTATTGCGTGCAACCACGCAGAATGTTTCTTATCAGTTGCTCAGGTTCAAGG -TCAAGCCAATCAAGCATGCGTATGCCGCAATACTCAGGGCCGTCAAGACCAGGACCGCAT -TTCCACTGGTACGGAATTTATGCCGATGGATTCCATCCTCCCCTACTTCGGAGAAGAACT -GTCTATTTCCCTGTGAGCGACGGAAAAGTCCAATGATGAGGACACCTGTGCCATATATCG -CGAACAGAGCTCCAATACTGTAGAACTCGGCGGTGAAAATTTTGAGTACGACGAGGGCGA -ACGAGAACTGAGAAAGAGCGGTTCGGACATAGGCCCCCTCAAAAGTGCGTTGGGCCGCGC -GCACCTCGACCATTTCATCTGAGGTCAGCACTGCCGAGCGGGCTCGGTGCATTCGAAGGC -GATAACGATCCCCTGCGCGACGCTGAGAAAATCAGACAACCTGGTAAAGTAAAATGGGAG -ATATGAAGAATATGGGGTAGACCGGGGATTTCTCGGTAAACACACACCTCAAACATTGCG -ATATCACCAAAATTTAAAGACGTTAGGAGGGTTTCAAGGACGGGACTATATCGAACTTGG -CTCTAATTTCAAGGAGTTGGTTTCAGGAGATGTGAGAGAAGGAAGGAAGGTCCTGGAGGG -GAGAAGAAGTGCGGATGACACCTTTAGCTCTCACGTGGACTCAATCATCTTTACATACGT -TAGATAGATAATATACGTTGTCTATCAAAAAAAGTATCGTTCACGTACTCTTCTCTTCTC -TTCAGAAGTTATTTGCATTTGATAATATTCAAAATATTTTCCACCAATAACGTGGAACTC -GATTCCAATTCCTTCTCGGCAATCCAAATCGCCTAACGAGGAAAAAGCGGCTACTGACGT -TCCCCACGCTCTAGCTGTATCTTCTCCAATTAACTtctcttccctttatttcccatctac -tctctctccctctctttctatctctctGGCCCCCAAGTGCTTCATATCGACTCTCCACAG -GCACTATCCTTCAACCTATCCTTGCGCCGTGAGAATGGCCAAGATATTCGATGCGACAGA -AGGTATGTGTTCTCCGCACCGGTGGCCCCCACGCGGGATACTTCGACTGACTGAAGAAAT -AGTCGCAAAGCACAACACTCCTGAAAGTTGCTGGGTAGTTCTATACGGCAAGGTCTACGA -TGTATGATTTCATGCTATAAGATACTCATTGTGTATTGCATACACTTCCCTGGCCACATC -TGACTCTTGTCTAGGTTACCGATTTCCTGGGCAGCCACCCCGGTGGTGCCAAGATCATTT -TGAAACTTGCCGGCCAGGATGCCACCGAGGAGTATGATCCCATTCACCCACCAGGTACAC -TAGAGGAGGGACTTAAGCCCGAGGCGTGCTTAGGAACAATTGATGCCAGCACGTTACCAA -AGGATGTATCCTCCGAACCTCAGGAAGAGATCGAGGGACCACCCCCTATGGAGAATCTTC -TCAACTTGGACGAGATTGAGGAGGTTGCTACTAAGCAAGTGAGCAAGAAGGCCTGGGCGT -ACTATTACTCCGCATCCGACGATAAAATCAGCAAAACCTTCAATAACGAGGTTTATCGGT -CCATTCTCTTGCGACCTAGAGTCTTTGTGGATTGCACAAAGTGTGAGCTAGACACAACCG -TCCTTGGATATAAACTTGAAACGCCTGTTTACGTCTCTCCCGCGGCGATGGCCCGTCTAG -GACACCCATCTGGTGAGGCCGGCATTGCAGAAGCGTGTCGTAACTTTGGAGCCATGCAAA -TCATCTCCAATAACGCTTCCATGACACCAGAGCAGATCGTCAAAGACGCTGCTCCGGGTC -AAATATTTGGCTGGCAGCTTTATGTTCAGATTGACCGCAAAAAGAGTGAGACGATGCTTG -CGCGCATCAATAAGCTCAAGGCGATCAAGTTCATTGTCCTCACCCTCGACGCCCCGGTTC -CAGGCAAACGCGAGGAGGATGAACGAGCTGGTATGACCGGACGGACCGCCGCCGTGCCCA -GCGGCGTGAAAGAGGTCGAGCACTCGTCGGATGATGCCCCCAACTCGACCCAAGGATCTG -GAGGTGTTGGCCAGCAATTGTTTGCAGGCACTGACCCCTCACTGACATGGACGAAGACTT -TGGCTTGGCTTGCCACTCAAACAGATTTGCCTATTGTCTTGAAGGGTCTGCAAACTCATG -AAGATGCATACCTTGCCTCACTCCACGCACCTCAAGTTCAAGGTATCATTCTCTCTAATC -ACGGAGGACGCGCCGCGGATACTGCCCCTCCGGCGGTTCATACCCTCTTGGAGATTCGAA -AATACTGTCCTGGGGTCTTTGACAAGATCGAAGTGTATGTGGACGGGGGTATTCGTCGTG -GCACAGATGTTGTTAAGGCTCTCTGCCTAGGCGCGAAGGCAGTTGGCCTTGGACGTCCGG -CTCTGTGGGGCTTAGCGGCGGGTGGAGTCGATGGCGTTCGTCGAACATTGCAGAGTAAGT -TGACTGGGTATTTGATACGTCTCTATTTCTAACTCTGTGGTAGTTCTGAACGATGAAATC -AAGACGTGCATGCGGCTGCTTGGAGTGGAACGGGTTGATCAATTAGGAGTTCGACATGTA -AGTGAGCTCCTATCAATTCCTCTTTTGGTATACTAATACTTCTCTTAGATCAACACCCGC -GTTGCTGAACAGCAGATTTACGATGGCCCTACTGGTCTCGAAGCCTTACGGCGTGCATTC -CGTGCGAGGTTGTAGGGTGAAAACCGGATTCATACACTCCGAATATAAAATGTGTTTATC -TCGAATATAATCCTAGATTCTTATTATACCCTTACCATTCAACATACTCCAATGGTGTTT -CCGAAGAAGAAAAAAAAAAGTGTCCTAATCCGTTGGTCCACTATATCGCCACCCTAGCCG -ATCTCCACCGACCAAGAAAGACTTCAAAAAAATAGTTTCCCTGGGAATCGAACCCAGAAC -CTTTCCGGAACACTTGCTTGAAGCATTTGTTAACGGAAAATCATAACCGCTAGACCAGGA -AACTGATTGATGTCTGTACAGGCATTTTTGACAATCATAATCCAAAAACAAATTCACGGT -TTTTATTGATTTTGGAAAAGCCATGTTTGATCCAACTACATTATATGAATATCTATAATC -TTATAGTGTACTCCGTGGGTGTTTCGATTTTTAAATTATCTATAGAGCCGCACGGTTATG -TGCACGTCCACAGGTACCTATTTTATCGCTGTATCTTCCCCGCATCTTCCATCTGTAAGA -CTGCTCAAGTCGGCCCCTTTAATACTTCCTCAATATTACAATGGCAAATATTAAGCAATG -GTTTCCCCATACAACAACTCCATTCATTGCGAATGCCCCGATGTTTGGATTTGCCGATGT -CAACCTAGCCACTGCTGTCGGAAAGTCCGGGGGATTCGGTATGATAAACTAACCTGGTTG -CAGACATTAGGCTGATCATCTATAGGATTTATTGGCGGGGGCTTTGATTTCCGATCTGAA -TCTACCCAGCTCAGAGGCCTTGATACTCAGCTGGTGGGTGCCCGTTCTACCTTTGGTTTG -ACCGATGATCAACCATTGCCACTTGGTGTCGGCTTCATTACCTTCCAGCCTGACGGACTC -ATTGAAAATGCTATTCTCGTGCTCCAAAAGCATCGAGTAGCGGCAGTCTGGCTGTCTTTT -CCTCGGGCTGACACTGATCACCTTCCGATCATACAAGCCATTCGGAAGGTCCAGGAAAGT -TCTGACTGGGATGTCAGGATATTTGTCCAGGTAGGAACTGTCAAGGCTGCAGAAGAAGCA -CTTCGGCAGGGGGTAGATGTTCTGGTGGTTCAAGGAAGCGATGCGGGAGGACACCAGTGG -GCTCGGGGTGCTAGTTTGATATCCCTCTTGCCTGAGGTGCGAGATCTTCTATTAAAGGCT -CAGAATACAACCACCGCTATCTTGGCAGCTGGTGGCATTGTGGATGGAAGAGGCTGTGTT -GCTGCTCTCAGCCTAGGTGGGTTGTTCTGTACCTCATGCTTACAGCTAATTGACAAAAAT -AGGTGCCGATGGGATTGTCATGGGTACAAGGGTAAGTCTTGCACATTCTAATATACATGA -ACCATTGCCAATCGAGTCCAGTTTGTTGCAACGTCGGAATGTCCTGCGCCGTGTACAATT -AAACAGACCATTGTATCGAGAAGCGATGGTGGAATCTCAACCATCAAGTCCATTCAACAT -GATGTCTTCCAGTCAACTGATCTCTTTCCGAGACAGTATGATGGCAGGGCTATCATCGGT -GTGAGTTACGAGGATTCGCAGGAGGGCGTCAGTGACGAGGAAATCATTCGACGATACAAC -GAGGCTAAAGAGGCCGGCGAACACCAACGGAGAACTGTATGGGCGTGAGTATACCTGGTT -CGGGCATTATATAGTTCCTGGACTTTCTTTATTAATGGTCTTGATAGGGGTGCGAGTATT -GGAAGTATCAGTGCGGTAGTCTCTGTTGAGCATGTCATACGGTCTACCCAGCAGGAGGTG -GAGATGATAGTAGGGCGATTGAGGGCTGGGATTTAACTTGATATATAGATAGTATCTTAG -TCCTTTTGTTTCTTGTGGTTTTATCATATCAATATATTAGGCTTATAACAGTGTATACTG -ATATAATATATATGTGAACATTAGGATGAGCACAAACACCATAAAAACAAGGTCAACCAG -GCTTACCCAACTAAATATCAGATCAAACGTCTTCTTCGAAATTAGGAAATTGTCAGATCA -AGATCGTCAAGCTCTGGCAGGCCGAATTGAGTACCCCCCCCCTTCCACCCCTCTTTATTC -CCTCAATTCTCAATTCACCTTCGACATCTACCTCTCTTCTTTGCCACAACTACACACCTT -TCAGAAAAATCAAAATTCACGGCCGTTTTGGGTCAGCCTTGAAGTTGGGACCTTGGAGTT -CCCTTCTGAGCTGCCCGCGATTTATTGCTCGATTAATTCCTGTCCGCCCAGCGACGAATA -CAACGCATCGTGAATTCCTGACCGAGACCCGCAAGAACCTATTCCCTACTCCTCCCACCC -TCAGAATTCTCGAATTGAACCTCTGCGACAAGAAATCCGCCATGAATAAGACCACACCTT -CGCCGAGCCCCGTCCCTAAAGCTTCGCCAGGCTCGAAACCCTCTCCCTCGCCGACCGCAG -GCACCAAGCGCAAGAGAAACGCCGCGGCCAAATACTACGCTGTCAAGGCAGGTCACAAAC -CGGGTATCTACTATGGGTGGGATGACTGTCTGGCTCAGATAACGGGATTCAAGGGAGCGA -TTTGTGAGTGCACATCTCTCGCAATGGTTCCTCCAATAGGCGCGCCAATTTACCTGCGGA -CTGACACATTAGCCCTTTAGTCCAATCATTTCCCTCACAAGAAGAAGCCAATGCGTTTTT -GAACGGCATCAAGCTCCCTGCCGCGTCGCAGAGCTCGGAGAACACACGATTCTATGGCAT -CCAAAGGGGCCGGGTTACTGGAGTATACACAGACTGGACAACGGCGCAGGAGCAGATTCG -CGGGTTTCCGCGACCTCGATATCGCAAGTTCTCCACCCGCGAAGAGGCAGAGGAATTTGT -ACGGGAAGGACAGACTCAGCTCCCAGTCGGGTTCGGCGTGGCAACTGAGCCCTCAGAGCC -TCATGGGATAACGACCGAAAAACTGAAAGATGCAGAAGGGGTTGAGTATGCGCCAGGTGA -CGGTCCATTGCCGACAGGAGCTGAGGATGGGTTTGATCCCAACGTCCTCCTAGACCCCGC -CACTGGCAAGGTGGTGTACAAGACTGCCCCCCAGATGGCAGCGACCAAGACACAATCAAC -AGGTATCCCAGGGATGCTCCGGATATACACCGATGGGAGTTCATTAAGGAATGGCACCCC -ATTAGCATCGGCCGGTGTAGGGGTATTCTTCGGGCCTGGGGATTCAAGGTTCGTACATTC -GCCCGCTGGGTTATTATCCAGCCACGCCTTCTAACTTCCATCACTTATCCTAGCAGAAAC -GTATCGGAGCCACTAAAAGGCAGTCGCCAAACAAACCAGCGCGCGGAATTGACCGCGATC -CTTCGGGCGATTGATATTGCGCCTCGGCACCGAGATGTAACCATTATCACCGATAGCCGA -TATTCGATTGATTGCGTTACCGTATGGTTTATCAACTGGCGTCGCAACAATTGGACAACC -CGGGACAAGAAGCCGGTCGAGAATAAGGATCTGATTGAGTCTATTCTGATCAAAATCGAG -GAACGCAATGATCTCAAGGTGAAGACCCTGTTTGAATGGGTCAAAGGTCATAATAAAGAC -CCTGGCAATGAAGAAGCCGATAAATTAGCCGTCAATGGAGCCCAAAGGGGAGTATCTGCC -AAGGCCGAGGCGCTGCATGTGGCTCAAGAGATACCCGATGAAGTGTTTGATGAAGATTTT -TAAGCATTTGCAAAACAAAGAAAACATCTTGCACTTAGCATTGGAATGTGGAAGATATAG -TTTCTAGTATGTACATCGTGTAGGGTGTTGCCAAGACCACCTCTAAGCCTAAGACGTTTT -CCACAACCTCAGAAATCTCCTCTTCACATCACCTCTATAGGCTCCTACAATTGCTCTTTG -CTTTATTACAGTACCCCCCCAGATGACATTGCCTATAACCACACAGGAAATTCAAACTAT -GATTCAATAACATTTGCTTGTACTTCAACCTTGACAATACAGATATTTATCTCAGACTCT -AGATCTCACTGATCTGACTCTTTTGGGTTCCGACCTGCTACTTTACTTAAGTAGTTGATA -TATTTGAGTCTTCATCGCTCTTAGCTACAACCTAGAACGACTGAAGAGTATCCTGCAGAG -CAAAATGCCAACACTCGCCTTGATCAACTTCAACATCGTCTGCGCGACGCTTGGAGGGTT -CATTTCCCTCTTTGGGTTAGTATCCTACCTTTGCAAGGAGAAGTTCTACCTATCCGAAGC -ATGTATGTGTTAAGTCCTCGGTCCTATCACTCCATTAACTAACATGAACTAGTAATCTCC -TTGCTCGCCGGAGTGGTATTCTCTCCCCATGCAGCCAATTTCATTAGACCAGAAGACTAT -GCACTACACTCCGAAGAAAACCTCGAGGCAATCACCTTGCATTTTACACGCCTAGTCCTG -GGGGTACAGCTAGTCCTCGCTGGAGTCCAACTCCCAAAGCGGTACCTGCAGATCGAATGG -CGGAGCCTAGGGCTACTCCTTGGCCCAGGCATGGCTGCCATGTGGATGTGCAGTAGCCTA -GTGATCTGGGCCATGGTTCCGAACTTCAAATTCCTCCATGCCCTGGTCGTCGGCGCATGC -GTGACCCCAACCGATCCCGTTCTATCCAATTCAATTGTCAAGGGCAAGTTTGCCGATAAG -CATGTTCCCCGTGAGCTGCAGAGAATTATTATCGCTGAATCTGGCGCCAACGATGGCCTT -GGGTACCCATTCCTTTTCTTTGCGATTTATCTCGTCCAGTATACTGGAATGGGAAGCCAG -AGCTATAGTGGTGGTGCGGGGAAGGCGATGGCCTTGTGGTTCTATGAGACCTGGGTATAT -ACCATTATTTTGAGTGTGGTCTATGGGACTTTGATCGGTTGGATTGCGAGGAAGTTGTTG -CACTGGGCGGAGGAGAAGCGCTATGTTGACCGGGAGAGCTTTTTGGTCTTTGCCATTGCT -CTTGCAGTGAGTTTACACCCAAGCTGAAAAAAAGGGATGCTTTGCTGATAATTCGAATAG -CTATTCATTGTGGGAACCTGTGGTCTGATTGGGACAGATGACCTTCTTGCCTGTTTTATC -GCTGGAAACGTATTCACACAAGAGTAGGTGACCCCGACGCGCGCGCTATGGTATCACCAA -TGCTAACAGAGACAGTGACTGGTTCCGCCTTGAAACCATGGACGATTCCCTCCAACCGAC -AATCGACATGCTTCTCAACCTGTCTGTCTTCATGTGGTTCGGAGCAGTCTGCCCGTGGTC -ATCCTTCCTGAATAACAGCGTGATACCAATCCATCGGCTCATCTTTCTGGGAATATTGAT -CCTCCTAGTCCGCCGTCTCCCTATTGTCTTCGCCATGCACAAATGGATCCCTCAGATTGA -GCTTGTCTCTCAGGCTGCATTCGTCGGCTTCTTCGGTCCCATCGGAGTGGGCGCCATTTT -CTACCTCTCAGTCAGCCTTGAATTCCTGCGCAAGATCCAAGTCGACGGCGAAGTCCCAGA -AGACGTAAAGCACATTATGGAGACCGTCCAGGTTGTGGTATGGTTCTTGGTAGTCTGCAG -TATTGTGAGTCTTCTCCCCAAATTCTCCTATCCCATTCATCTTCTGACCAAACTCAGGTT -GTGCATGGCCTATCAGTACCCATCGGAAAGGCTGGGTACAACCTCCCAGCAACCATCTCT -AGCGTCATCAGCACCAGTGCATACGAGGAGCGCGAACCGGTCCCCATATCGAATACCAGC -CACACGCACAGCACGGCCACACCTCTTACAACGGGAGAGGATGTGAGTTACCGCAGCCGC -AAGCGTGGGTTCCGAAGTGCACACCTTCGCCCACCATTCTTTGGAATTGGTGGTTCGGTG -ATCCGTCCTCGTTCGCCGTCGCACCAGCCCGGCGTGGGTCAATCCGATGAGCCGGAGAGA -CCTGTTCACCTGGTTCAGCCCGGTGAATCACAGAGACCCGCTCACGTGGTTATTGATCAG -ACTAGAAGCGGGGATAACAAGCCTTAGCCTTAGTTGCCCTGGGGGTTTGAATGACCCAGG -CCAGGAATGCTTATGTCTTGATTGGATTCATCCAATCACCTGATTAGCAGAGCAAGTGTT -GCATATGGGTCATTATGTCGCTTACCTTGCAAATTCTAGACAGCATGCATAATAAGCATA -TTGTTTTTTTGGTCTATTTTGACAGCGTTCTTTGGGGAGGGGGGAGGCGGTTGCCCAGAT -ACTGGGCCATCTCAGCGCTAGACTCGACTTTGTTATACTGATTGATTCGAATGTGTACAT -AGTCAGTAACCCCAATTAAATATCTACCCGAATCTCGAGTCATTTTCTGAATATTCTTCA -TTTCTTTTGCCTACGGGATACAGTCAAGGCTGAAAATCTGATGTCAATCCGAACGTGCGG -GCCTGTGCAGCATCTAGAGTCTTATGTTACTGGAATTGTATTGCCCGAGAAGGATATATG -GGATGCATTTGATGTCGTATTTCCGCCAATGACGGCATCTTATATTCCATATCATGCATG -TCTCAGAGCTATTGAGCGCTTAGAGGATTAACCGACAAAGCTGTATTTTGGTGCTGTCAT -CATGAGCGAAGATTCAGAATCATTTAAAGCTGCACTTGTTTTGCGTACTGTCTTTCAGGA -TCGGAATCGTGTATGAGTAAAAGCCGGAGATGAGACCTAATATAAAGTGAGATTGTCAAA -AGGACTGAGCAATGAGAGCACCTACACATAGAGCAGATACTGAATTTAAGCCATCGCAAT -AGTTTCATAATAACGCCATGCCATATGGAACCCCGAGAGTGGAAGAAATAGATACAAGAG -ACGTAAAGCTTTACAAGCCCTAGACACTATGCCCAGATCATATAGATCCCAGAAGAATAG -CAAATACAGTGTGCATTGGTATCAAATGTGCCCGGACATCCGTTTTCCATGCCCCAACTA -AAGGAAACTTCATAGCCTCGTCAAGGTGTTTTGAACCTGATAAGTAAGATAAACCGTTCC -ACCCATCCCGAGGGATAACCGGAGAGCATATTCTCTGATCGAATTCATGTGTTTTTTTAG -AATTTTTTTGAACTCATTCATTCCTTCGATATCGTTACACGTCATACAGGAAAGCAAAAT -GATGATTCTGTTGATTGGGTCCATATTGAGCGAGATTATGAACTTGCAAAAAGGCGAGAG -ATAGAACGTAATTTAGGACAAGCTGGCGTGCAGTTCACGGGCACGGCGAGCGAGGGGTGA -AATTAGTCGCAAAATTGGGTCATGCGGATTGCGCTCAACGACTGACATGTACAGGGTATT -GAGGCGCTGGCTCATGATGTCCAGAATGCGAGGTGCGACTTCGCGGAGGTCGGTGTCCTA -AAAGGAAGAGCGTTAGTCAACATGTTACAAAGAGTTGAGATAGAGTAATGACATACCCTG -GGGTTCACATTTTGGAGAACCACCTCGAGCCAGTGGAGCCGCTGGCCAATGTTGGTCTGC -ATGGTAGTGGTGACAGCAACAGCCACAGAAAGCATCACGATCGCAGAAAGTCGGGACAAG -TAAGTGGGCGATATGTGGACGAAGAAGTTGTCAAACAGGTCAGCTTGCTGGTTCGATTGG -AGCCACTAGAAATAATCGAGATTAGTAGGGACATCGAATATATGAAAGTTATTGAAACCA -ACCTTGATAGTGCCCTGCTCGAAATGACCAGTATTGATGAGCTGCACAACATCTCCCATT -TCGATTTCCTCGGGGGACCGCGACACAGGGGAAGCGACAGGAACCGGAGCTGGAGGCTGG -ACCGGGATAATCTGAGGCTGTAGCACAGGCTGAGACAACGGCTGCGGCATGCGCACGTGA -TTCTCACTTGGCTGTCTACCCGAGGAGCGCTTGTTGACGTTCAAGATCTCGCGGTGGGAG -TTCTCGTGCTGCGGGCTTTGGCTCTGCTGAATTTGGTGGCCCTGGTGCGCGGCCAGCAAA -GAAACGGTCTTGGACAGGCCACGCAAGGCTGCTGACATCTCCTCAATCTTTGCGGTATCA -CTCTGGCGCTGGGCCTCGTATTGATTTATCTGTCCCTGGAAGTAGCGTTCCATGTCAGAC -TCCACCTTTTTCGTGGTACGCGCCAAGCTCTCAATCATTGGGGCAATCGAAGACTGCATG -ACTCGTGTGACTTCCGCCTCAATGCGAGGCGCAAGCTTCTGTGCAACCGAATCGGAGACA -GCCGTCACCACAGGTGGCTGCTGCATAGCATTGATAATAGCCTTGGGAAGAACCTGTCGC -AACTCCTCATTGAATGCGCCCCCAAGTTGTTGAGAGACAGCACCGTTCAGTTTCTTGTTG -ACTGCTGCAGAAGTGGCATCGCCGATAACAGGAATGACATCGGTCTGGATACTGGCTGAA -ACAATACGCGCTAAGTTCTTCTCGACGTTGTCAGAGAGCGTGCTAGAGACCAGGCGTAGC -ACTTGGTCTTGCTTGGCCGACGAGGCAGCATCCCAGCTGCGACGTTCATCGTCGAAGCGA -CGATGGAGGTTCTCAAACTCATGGCCAAGACTCTTGTTGAACTCGCTTGAAACACTGCCC -TGCAGGGTCTGGATGTGTTTGTTGAAAGCTTCACTTGAGAAACCGAAACCAGGCAATGCC -TGCTCGGCAGCCTGAGTTGCACCCATCGCCGCAGTGGAAGAGGCAAGAGTCTCGAATTCC -TTGGTAGTGGCAGAAGGCGGCAGAGACTCGACACCCACAGTTATGGATGGCTCCTTTGCC -TTGGCAGCTTCGGCAGCGGCAGCAGCcttcttcttcttcttcttttcGGACTTCTCAGCG -ACCACAGGTGCCGCTACCGGCTCACTAGGCTCTTCCTTCTTCTTGGCCACCTTGGTAGGT -GATGCGTTACGCGAAGGAGCAACCCCAGAAGCCAATTCTCCAATCTCTGGCTTTTTCGCA -GATTCCTTGGGACCAACGCGAGACTGGATGGATGCTGTGTCACCGTTGACAGATGGATCG -TCGGTAGCATACGTAGGGAAAGTTTGAAGTTCGCTGATTTCAATCAAGCCGTGCTCCATT -GCATCAATAGGGCGAATGACCTTGTTATCCGCAGTCCAGCCTAAATCCTCCTTCTTGATG -TTCAAGCAGGTCACACCGCGCGAGTGCATAACATAAAGCTCAAATAGGGGATTATCTTCA -GTGCCGCGCTTGTCAGTAGCGGACTTTGTGATAGGGAGAAGCTCCAAACTTCTCAGCTGA -CCCTTGGAACCGAGAGAGATCTCTCGAATTCCACTCAAGCAAGCGGTAGACTCGGGCTTG -GGTAGCGTGTTTTCCTTGTCGTTGAGCTCTCGGATGTAGGCTGCCTGTGACATCTGAGGC -AAGTTGTACCGAGGAGCTGAAAGATGAGCAAAGTAGATAGAGTTACGCGTGGGATGTCCA -ACAACAATGACCCCTGATGATGGGTGATAAGCCACGCTGCAGATGGCATCAGACTCGTTC -TCGTGCGGGAACTTCAATTCCTGCACGGCCTTGCCCAGCCCAATATCCCAGAGCTGCAAG -GTGTGGTTTTGCTTGAGTCCAACCAAAACATAACGGAGCGCCATGGATCGCGAATAAGCG -CGAAGCTTATCGAGGAAGAGGACCGAGGTGGGCCAAGACTTTTCGGTTGGAGAGCCAGTG -ACAAAGGTAGTCAAGGGCACCCGAATCTCAGCAGGAGCAGGGCCTTCCACGAAGCTCGCA -TCGTTCAGCATATCACTGATATCCCAGAAGCGCAGTCGGCCAGTTTTGTCCAAGGAGGCG -ATGACGGAATCATCATCACTGAACGCGAAGTCTTTACCAGCTTTACCAGTCGAGATCTTC -AAAGCACGCTCCTTGAAGAATTTCTCAGTGTTGACTGTACGCTGAGTGCCAGAAATGCCG -TAGACAGGGTTCATTGCTGCCTTCGGCGAAACAACGTAGATATTCTTGCCGCGACCAATG -GCAAACAAGTTTGTGTGACGAGTACTCCGCTTGGCCCGCGTTTTCAGCTGGCCACCAGAG -GTATTCTCGTCCGAGGCCGGGAAAGGCGGGAAGATTAGGCTTTCGCTTTCCAAGGCATCC -ATATCAAAGAAATCCTTTCCAGAACGCGAGATCAAAGCCCAGTATACAGAGCCGCTGACT -CCGATACCGAGTATGGCCTGGACTTCGGAATCGCCACTCGCAGAAGGAGCTGTACACAGG -GCAACGTTGAACACTCGATCACGGGTGGAACGGAACACCTGTCGGTCGCTACCGTCATCC -TGTCGAATGATTCTGATTCCGCCAGTTTTGGCGAGCGCGTATACGATGTATTCGGATGTA -GCCGCAGTCAACGAGCGGTCAAGCTGATCGAACTCCTTCTTCAGACGAGCAATATCCATG -ACATCATCATCACGGAAAGTTGTAAGTTTTCCAGACGTGGCTTTCACAGCAATCGAGATG -AAGGGCCTCATTGGGAAGTTGTGAACAGAGACAACTCGTTCTTCTTCCTTTTCAGCACTA -TCCTCAGCACTCTCCCAGCTGTCAGCCAAAGCCTCGGTGTTCTCAGTCGCCTTCGGGGCA -ACTGGAACCGCTGAAGGTTCGGCCTCCATCATGAGCTCGTCAACCTCGGCCAAGCTGGCC -GCTTCTATAGCTTCAGTAGCAGTCTCGCGCAGGTGACTAGCGACCGACGGAAGAACCGTT -TCAGGGGATTCCTCCTGGACAGTAGAAACAGCAGCATTGGCTTCCGTTGCAGCCTGGGGT -TCATCGCCACTGAGAGAGCGGCTAATTTCATCCACGAGTCGACCAACAACTTCATGCACA -GCGTCTCTCTGACCTTCATTCAAAATTGGAGACTGGGTCCTCTCCCTAGGGGCCACAATC -TCGGGTGTCTGCGAAGACTCCAGCTTAGCAGAAACATTGATCTTTTGTCTCTGGGTGACT -TCAACAGCTGGGCTCTGGAAGCCAGAGGTAGACTGGGGAGACTTGTTCACAGGAGAGATA -GCGGCCAATTGGTCAAAGGGATTGACATAAGTGAACATTGAGTCCTTCGAGTTTGGCGAT -ACGACCGTTTCCGGGTTCGCAGGCGCCTTCGGACTCTCATCCACAACTTCCATGAAGACA -TTATTGGGTGTCTGATCCTTCTGTTCACCAGCAACAGGCTGCATATATGGTACCTGTTGA -GTCGAAGCCGGCATCGGTGATTCGGCGACAGCAGGACTTGGCTGAGAGCGGTTGAGCAGG -CGGAGTAACATATCCTGTGTGTTCGCATTAGGGCCATCGGGGTGGCCAGAGTGGAATGCC -GGGGGAGGGCCAGCGGGTGCAGTCGCAGAGACAGCAGGGGCCTGTTGGCCGAGCAAAGTA -GCAACTAAGTCAGATGCGGAAATGTTTCGTCTGTGAGACGCAGGAGCTCCCTGGGCCGGC -GGAGACTGCAAGTGCTCGGGAGCTGCAGCGGAGGCTTGTGGCTGAGGGGAAGCACTGGGA -CGCTGACTGAATCTGAGAAGATTCAAAAGATCCACAGAGCCTGGGCTGGTGTGCTGCGTT -TGCGGTTGCAGCTGCTGCGGCCATTCGCCCCGAGGAGAAGACACGTTAGGGCTCAAGATA -TCAGAGCCCTGATGAGGAGGAGCATTCATTGGGGCAGGACTTTGAGTGGTCGAAGATACC -GATGGATGTTGGTAGCCATGCGACTGGGGATGAGGGTAGCTCTGGCCATCGTAGGGTTGC -TGCTGCTGAGGGTGCGGTGGGAACAGACCAGGCTGGTAATACGATTGGCGCTGGGGCTGA -GGGTTATCCTGGCCAGGAGTGTTCGAAGGAGAAGGACGAGGCCGGATACTGGCTAGGAGA -GCCTGGAGATCGGTGGGGGTAGACATTTCTGCCGTTACGCCTTAGTTATAGGCGACCTCC -GCCGATGGGATCCAAGCTTTGCAGAGCTCTCAGAAGCTCTTTTTTGAGAGGGACAGGATA -TGAAATTGGGAATCAGAAAGGCTAAAAAAGGCGACAAAAGAGCAAATCAGAGATAAGTAC -GAGAAAAGAGAGATGAAATTGAAGAACGTGGTGAAGTAAAGAGGAAGGGTGAAGAAGAGA -AGAACTCTCGGTAATATTGTTCAAAGCCGTGGGTGTGGCTGAGGCTGCCGAGCTTGTGGC -GTACTGTGGCGAGTGTTATTTCCTGCCCGACTCGCGTCATTAGTAAATCGGATTACTTTT -ACTTTCTTTAGCCCACTTTCACTTGTCGAGTTCAAAGGTGAAGAGCTCATTGGATCAACA -ACATATTTTAGTTATCCTAAACAGCGCCTTTGTCCAGCTATCCGCGCCCGCGCCCTTCCG -CCAGAAGGCCAGCGCCCGCCCATCATGTTCTGTCTACGGAGGTGAGAGGAGCAAGCAACC -AGCAACCAGCCACAAAGCAAGTGATTGAGACTAATCTAGATGATCCATTCTGTAGCTGGC -TACCTCTTCTTTTCATCCCGACCAACGCCTCCCCGCTCTTCATCATCTCGTTTGTTGCCT -TGACATACATCATTCACCGGCCATGTGTCTACTGTTCTGCATTACTCTTGATCCTCTTCG -TCTCATCATGCCATTGGTCAGATCGGTGCATCTTTGATCTACGGAGTAACTGGTTTGCAC -CACGCTATACCTCAGAACCTGGCGATTATGCCATCTCAGGCAACACAACGTCCGAAGCAA -CAGTGTCAGACGAGGGTCTGAGGGAATTTGTTTTCGATACTATGAACTCCACTGCCAAAG -CCTTGGCTGGTGCTGCATTTGACAGCCTCCAACAACAACTTTCCCCCAATGGAATGGAAT -TAAGGCCTAACGAGTGGACAGGAATCGGGTTGGAGTGGATGCGCAGCCTATTGGGAAGAC -GGGAGTGGACTCTTCCATGTGTTGATGTCAAAGTTCGACTATGAAGTGCGCAAAAAGGTT -GCCTTGGCATTCTTCGTCTGTTGATGCATCGAGATGAGCTTTTGATTCCATATATATTAT -TCATGACTTGTGTATTTTCTGTTCATGTGTAGATTTCAATATCCTTTCCTAAACAATTAC -GAGCGGCCATTGTACATAACCCATTTCAGAGTAATGTGTCAATCATTGATAGACTTCTAG -CTAGAGATAGAGATTCTCCTGTTGTTCTAACTAAACGCAAGTGGTGGCTATATACTACGC -AATACGATAATGGTTTTGAAGGGAAAGATGTCGTTTATTCTAAGCCTGGTCCACCCCCAT -CTGTCCCTCATCCCAGGTATGCAATAATACTGATGAAAACAAGAAGAACAGAAAGACAGC -CAAAATTCCTACACGTATGAGACTGACTGCTACATGCACCGAGGGCTGACTGGAGGCTTG -AACTATGGCAAACAATGACAATATTCCCAACCATCATGCGGCTGAAGGCCGCATGGTGAT -TACCTCGAAGAAGACTTATCGCCGCCTTCATTGTTTGAGACCGGGCAAATGCCGAGTGCA -ACAAGCATATTGTTAAGCGTTGGATAACAAGCAACACATGAGCAGAGCTAGACTCGGATT -TTAGTTTCGTTTCCAAAGAAAGATCGATGAAAAGAAAAGAACATACCTGACTCATGGTGG -TTGTTAAACACTTTTGCAGAGCAACCGGATCACCGTTGGTAAGAGCATTAGCAATGCACG -TAAGCTGGGCGGTCAAGTTGAGACAAGCAGCGACACTACATGCCTCTGGGAGCTGCCGGA -CGATCTCTAGATTTTGTTGCCGAACTACATATATGCTTCGTGTTAGCATGCTAGCTTAGC -CGAATTGCCATCGCAGAATTGAGATTATGGAAAGGTATACGTACAGGAGGTTTGGTTATG -TATAATTTTGGCCACAGGAGCTATTCCTGCAGCTGAGGTCAGCCCAGAGGCGAGGGCGAG -AGCAGCCATGTTAAAACGCATATTTGAGACCTTTTGGGGAATACTTGGAAGCGGATGACC -GAGAGAGCAGATTCAAATGTGGCATCTATTGTTACAAGGAGGAAAGGTTACGCTATTGTT -GAATTATATGGCTGCTACTTATATACACCGAAGCTTTAGAGTTGTTCGTAACAAGGAGAT -AGACAGCCAGTCCTCGGCATGATTGACATTGGCAGCCAACTGGTAGATGGGCACATGAGG -TGAAGTGTAGACTAGCATGTCCAAGTGGTCACACTGAGTTGCATTCAAAGACTTGTCTTT -CCCCAATTTCTGTATCAGGAGTGGGTAGGTAGTATCAGATACTACCTACCTAATATAAGA -GATTTGAAGGGTGTTTACACAAAATCCCCCCTCTCCACCCCAATGACTTTGGTCTGGGGA -TTGAGAATCGGCTTTTTTTGGTTGGGAATTGATGAACTTGGACAAAGTGAGATAGCTGAA -GTCGATTATTTGACATTTGTTACACTAAGTACATGGCGAAGAGGTATCTGTCATAAAACA -AAATAACGGGTAAACGCCGGCGGGGAAGTGGTCGACCGGTTACTGGACTGGCAGATCGAA -TCTCCCGCAAATAATTTCATAAAACGTAGATTGGCTGGAACGAATCATAACGCAAGGCAT -GACAACCACCGTGGAGGTCAGAGATGGCAGGTAAGTCGTGAAGCATGACTTAAGGAAATG -GGGTATCACAACGTCGACGATCCCAGTTCAAGATCCAGATTTGTAAATAAGCGTGCAGAA -AGTTATGTATGGCTGGAGGATAATTAGACTGTAAATCTGCACGTAAGGAACTGACGAAGC -AAAGTCATAGATAATGCAAATGACATAATTAACAAATCAGTTGTAACCACCGCCACCCAC -AGGGAGAGGCCAGTCGTCTTCAGTGATACTGCTCCAGAAGGCCCAGACCCCGGAGACAGA -AAGGAGGGTGAAGAGAGTTTCGAATAAAGCCCCATTGATTTTGCCGTTCGAGGCCTGGCA -CAGGTGGGTGCTGATCACATAATTGAAGACCTGACCAATTGCGAACAACAAGCCAGCACC -GGTAAGATACACTGGTGTACCATTAGCATATGTCAATCTTGAGTCTAGCGTGGTGTACTC -ACGCATTGGTCGGAACTCGCCCAGGACGCGGAGCACTAGTACGGCTTCCAGGAGGTAGAA -CAAGACAAGACAAACGAGTGGGAAGAGTTGGTACAGAACATAGAGGGCGATGTTGCGATT -TGGCGGTTCGTGACTTGACTGGAAACGACCAGTCCAATCGAATCCAGTGTCCAAGGCAAT -ATAGCCAGTCCCGATGAAGAGGGCGGCAGCAGATGCGCATATGAGACCGATGGAGGCTGC -CGTGCCGTCATCAATCAACTGAAATCCGACAAGTGCGTTCATGAGAAGGACCCAGCAAGT -AGCAGTGATCGCGGCAATGTGTACTGCGGAAAACCCCTAGATACGTCAGCCGAGGCCTTG -CGTGCATCTCGTAGGAATCCTACCTTTCGGACTGCGTCGCTCAATGGGAAGCCACCCACT -GTGAAGATCTCACAAATCTCAATGATGATGAAACCGAGTAAGAACAATTGCATTTCCCTG -CACACACCTCCATGTCAGTTAATATCTCCATAACTGACAAAGATATTCCATACCTCCGGC -CGACGGCAGCGTGCTTCTTGTCCGATCTCCATATCATACCCACGGTCAAGACAATGGCAA -TGAAGGCCAGCAGAATTGAACCTATCACAAATCAGCAATCGTGCTCATGTCATTTGTCCC -TAATCGCATCGGCCCCGGCTTCGCGGATTGTACCTAAGTTCCGAAGGTATCTGTCCCCAC -TGAGAGGAATGCCGGTTAGATCGCACGCGTGGAAGGCAGTAGTCGCGTTTTGATTGGTGA -AAAGCTGTATGTGAGGAATGATATAGTCAGTCGATAATTCTAGAGTCATCATCGTGTCAA -AAATATCTCACATTGCAGACTGGTAGGGTTGAGTCGCGACAGAAGTTCTGTGATACCCCA -TACAGGTGTGCGGTCAGTATCTGCCCATTGTAGGATCTCCAGGTCGATCTGCACTTACGT -GAAAGTTCCCGAATTGAGTGGACCCCATTTTGAATTGAGGCGCGATTCAAAAGGTGGGAA -AAGAAATGAAGTTGACAAAGACAGAGTTCAAGACCCGCGGAGGACCGAACCAGATAGTGA -AACCAGGTTGAAGTAGAAGTCAAAGTCGCAGAATCGAAGGATGAAGCTGTGTAGCCAAGG -CAAAGACCGAACTGTGAGGGGAAATAAGTCGAGGTGATCCCCACGGGTTACTCGGGGGAC -TCTAGGGCGCTAGTGTGGCTTAGAGTGAAAAAAAAACGTCTTGACTTCCAAGTTTTCAGT -AATCTCGATGATCAAAATTGATCAAATCAAAACAAATCCAAAGAGTTCGAAGGTACAATG -GATGATTCTTTGGGTCCAGTCACTAATAATAGTACTCTCTTCCGCCTAGGCAACCCGTTA -TATATCTGGATACTTGCTTATTTTCCATAGCTGATATTGTTTTCTTGATGATTATCTCCT -TGCTGAAGACAGTAATATATGGTCGCCAGGTCGACCGGGTCTCAAGCCACAATCAATTTT -TTATGACATACGATCCTTTGAGCTTGGCATTTATATCTTGATTAAGATATGCGGTCACGT -TTTGTGTAGTACTGTGCCGTTACTCTGTACCTCTACAGAATACTAGTATATAAGTAGACT -CAACCCCCTCAAGAGCTCAATCTGGTCTAATTCAAAATCGATTGATTCACGTCCGATTTT -AGCGAGCTAGCCTCTTCTCTATCGTACCCCATGTAACTCGGCAACACTCAAACATTACCG -CTTAACACACATCCTCAATCACCAACTTTTGCCCATCGTGCTCTCCCAGATCCAGCTTCA -AGGTGCGCCCAAAGAGCTCAAGCAGCATCTCGAAGCAGACCTTGGTAATAGCAGGGTCAT -ACCGACCCTTGCTGAGCTCATCACGAATAAAGGCATCTGGGTAGATATCGTCAGCATATA -TGTGTTTTGATGGTGCTGAGAAATGTTTTCAACTCACGTTGCGCCCAAGCAATTTCATAG -AAGCCGAACAAGACGCTCTTATCATGCAGGGTCTTGCGGATGAGATCCCGGCCCTCGGGC -GGGACGTGGTTATCATTTTTACCGAAGATCTAGAGATTGTGTTTAGCAATCGACACAAAT -GGGGGGAAAGAAAAAAAGAAAAAAGAGTTGAGACGTACCATTAACATCTCACCCTTGATA -TCCCCGGCTCTCTCCAAACTATCATCATTCTTGCCCAGGGCAAGGGTCTTGCTGTGGATA -TCCGTGGCAAAGTAACACACGCTCGCCTTGACCCGGCTATCCAGCGCGCAACGATACGCC -AAATGGCCACCTAGACACATGCCTGTGGCACCCACCCGGCCGTTGCAAGTGGGTAGCGAG -AGTAGATAGTCCACACTGAGACTAGCATCTTCATCATAAGCGGCAAGTTTCTGATGAACC -GTTCGTTAGTCCCGCAAATTATAACAAGGATAAGAGCCCAATAAAAGACAAGAATGAGGA -TAAAGTATGTAATCATGGGAGCTGTATACTCCACTTACCTTGCCAATCTTCCATGCATTC -CCCTTGTCGGTGTCTTCTACGTCATACTTCAGAGGCTCCGGACCAGTGAATTCATGGTAG -CTTGAAGGAGCTGCGCAGATGTAGCCCTGGCCTGCGATCTGGCGTGCAAAACGTGCCACG -GGGCCCGTTACTATAGGGTTGGGATCTTGAGTTGAGGTGGTCGGGCATGATCGTGGTGGA -AGGGGCACGAACCTTGATAGATCTCACTGAAAACTACAACTCCCGGGAAGCGGGCTTTAG -GGTAGCCCGGGATTGTCGGGTGGAATACATAGATGCCTGTCAAGTTGTTAGTATTGTCCT -TATGAAGAAACATTGGCTTTATAGAGGTGATAAAATGAGCTGTACGGGAAAGATCATAGT -GCACATACGCATCGTCCCCTGTCCATCTGCTTTGGTGGGGACATCATGATAAGATTCTTG -GATCAGCATGGTGTTTTTATTTCCTTTTCAACGATAGGAAAATATATTGGAACATAGTCG -ATAAATCAATTGGAAGACTATTCAGGCGAGAAGACATTATCGATAACACACGCGGGGCAG -TCATGTGCTGATACTGGTGCTGATACTGGTGTCTCAACACTTTAATGTACGTTTGCCTTC -CAATGCAGGAATTATGAATTAAGATAAATAGCTTATGCCTACAAATCTAGATCAGCCTTA -GAATTCGCGTGACTCGTCGCAGTCCAAAAAAAAGCCATCAAACTCATAGCGGCGGGTTTC -AAATAAGTTAAAGGCTATAGAATGGCAGGAATATCTTGCTTCATGATTGGTGCAGGTATT -AATAATCTATTTGACATTTTCTTTTATACATATGGTCTACTAAAATTGGCTGGCTGGAAT -ATGGTCTGGCCAGACCTAAATAAGAAAAATCCCCTGGGGTTCCAACTACCAAGCCCCCAA -AATGAGACCCAAGGGAAAGCAGCGGGCTGCGATTAGAAGACGTACTGTAGTAAATTTAGC -CACAAGCGCTAAGAGAAATTTCAAGATCAAATCACTCACAGGAATAAACAGGTAAGGCAC -CTGGCGTTCGTATTCGAGCCAAGTCTCGCCGTACTTGGTGCGACACCGCTGGATGTCTCG -AAGAGCGCGGTGGCTGATCATACAAGCGAAGAAGACAGGGTAGAACCAAGGGAAAGGACT -TGAGAAGCCGGTAATCAAGCCCCAGTTAAGGGCGAAGTAAAGATCGCACGAGTAGTGGAT -CTTGCGGGCCTTGCCATCTACACAGTTGTTAGTAACAGAGCTCGtagatatatatatata -tattgatatttgatatTTGGCATGAACTTACACCATCCATCCACAAGAATCTTTGAGCCA -TCGGCTGCAGTAAGAGTCTTGGGGTTCTCCAATGTCTGCCAAGGAAGTTGAGGGAAGGTG -TTGCGGGATACCATGGTACCACGCTCCTGCTGGCGGAAACGGTTCTTCTGGCTGTTGGTA -GTATCCCACACCCAGTAGACGAACAGATAGGCGATGAATAGGAAAGCAAGGAAGTAGCGG -TTCCAGCGGTATTCAGCAGGATCGTGATTAGCGAGGTAAATAGTGCAGTGACAGTAGCTC -AGGGGCACACCGGCCAAGTTCCAGAAGATCAGCATGAAGCCCCACTTCTCGTAGTACATG -TCCCTGCAAAGTGTTAGAAGATAATCTTGATTCTGGGACCAAAGATATCCTTACCAGGTG -GAAACAATGCACTCCTCACCCTTAGAGCAGGCATTGGCGTAGAGATAATGAGCCATGACA -AGGAACATAACCTCGCCCGACACATAACCGTACATCTCCCATTGGCGGGCAGCGGCACCC -ATAGACAGACCCAGCAGGATGTACCAGGGCAGGCGGACCTCGAAGAACATCTTGAAATCG -AGGATCTTGAACATACGAGGGTTGAGCTCGGCACCCATGAAGAAGTCATAGATCGGATAG -CCAGTCATACGGTGCTCTGCACCACGCGCCAAAGCCGAGAAGTAGGCAATGAAAGAGACG -ATGTATCCAGACAGGATAGCAACAGTGAGCAATGGGCCGAACTCATCAATGATGGTGTAC -AGCTTGAAGACCCCAGTTACGTGCGCAACGAGTGCCAATGTAATGCTGGTGTAGAAAGAC -CACACGCCAGAGCAGTAATATGGCAACTGCTTGCCCCCCATGTGCGGTAGGGGACGACCG -GAGACGCTAACACCGGGAAGAAGCACGTAGCAGGCACCCTCGAAGATGAAGAAAACCCAG -TAGATAGTCCACGCCTTGACCGATGGGAAAGCACCGGTGTAGACCAAATTGACCAAGTGT -GCGAAAAAGTCGCCGTAACTTTCGTTCTCCGCTGGGCGAGGGAACTTGCCGTCGTAGTAG -GTAGCACCAATCCACATGTAGTACATGAGCATGGGGAATCCGATCATCATGGATAGCACA -CCCAGGGAGCCACCGAATTCAAAATGTCCGCTGTAATCCACCTTGGGATCGAGGCCTTCC -TCCCAGCCATCCACCACGCGCGATGCGGACTTGGACTTAGGGGCGGTCGTCTTGCCATTG -GTGGTCTTCACCGGCGCATCATCTTCCTCAGTCTTGACTTTGCCCGATGTGCGACGACGG -GAGGCGGACTTGGATCTCTCTAGGGAGGGGGAAATGGAGACCTCATTCTCGCTACCATCG -AAGATAGAACTGTTGCGGGTGGTGCGACGGCCACCGGGGGTCTCAATGAAGCCGGGACGA -TCCATTTTTCTGAAGGACGAAAGGATTCCAAATTAGCTCGACAAAAAGTCTACCATGTAC -AAGCCAAAACTTCCGGGCAATGGCAAGATATAACCCTGGAGAATTCAGGGTGTGGATACG -TTGTAACTAAGATCCGGTGGAAACTACTCACTTGGGGGTTTTCCCCGTTTGAGAGCGGGT -TAAAGTCATTGTCGCTTAATCGAAAGACCAAGGTTTCGATTGCAGAGGGTAAGGAGAGGA -AGGGTAAAAAGGAGCGAGGAAGAAAACAAGAATGTCCTGAATGTTTACTCTGGGCGGTAC -TAAGTGATTACATAATAATCTAAAATCTTGGATGGCAATTTTAAGGTACAAAGTAATGAA -AGTAATGAGAGAGGTATTTACATTTAGTTGTAGATTCCTTTGATATTAGATCGAAACATA -TTGATATAGATATCGAAAAATAAAAAGCAGAGAGAGAAGTTGAATATGAATCCCATAGAT -ACTTAAAGGGGTATACTTTCTTTTTCCTCCCAGGGAATTTACCATTTCTAAGGGGGAAAT -TGGTGGGGAACTCGTTGTCGACTGCCTGTTTTGACTTGGGGACCATGCCCAAAGAGATCT -TACGGAGTAGAGTAATGCAAGCCCTAATCCGGAGATCTATGCAATTTACCCCTTACCGCG -TCTCATTACGGAGTCTTGACTCACATTATCTGCCTGAACTAATTTCATTTCTTACCGACT -CCTCTCAACGACCTTAACTTGTTTTCTCTGTCTAAAATCAAATGGATCACATTACTCAAT -TGCATATGTAATATTTCAGAAATGTCCACTATCACCAGTAGAACAGAGTAGAATACGGGC -CAGAACAGTTACTAAGACAACAACCCAAAGAATCGGGGAAATGGATCATATATGGTAATA -TAGTAGGAGCTTGGTGGGAATTGAGAACAATGAAACAAACAAGACTTGATCAAGACCTAA -GCCCCTCTCTGGCTGCTAGTGGCCATCAATGTGATCTTGGTTTTGTTTTTCTTCTTTTTC -TTCCCCCCACTGGAAGATGGACCAGAATTTAAATTCTGGTCTCCAGAAGCCGTCTGAGCG -AGCAGATCAGATTCTTGCTGGGCGAAAAGCTCATCCTCCCAGCCCTGCCGCCAGCCATCA -CTATGTATAGCTCGTTGAGCCTCAAGGTTTGGTGACAGTGAGCCCACTGCCGTTGTGCCC -CAAACAGTGCGGATGCCTGGAGGACTGGTAGAAGGAGAAGCCAATGCCCCAAATCCAGAC -GATGAACGGGGAGGGGAGGCTGAGGAGGCGGTGGTGTTAAATAGTGTGGGATCCGCATTG -TGAGGAAGTGGTTGGAAGTCATGAGCAGAGAATGGAGCTTCAGTGACGCTGGCACTTCCA -ATGCTTGGCCGCTTGCGCCGAGCGGCAGCCCACCGTTTGTCGTCCTCCTCCTTCTCCGCG -CGAATGCGATCCTTTTCTTCTCGGGCTTCTTTTTCCCGGTTACGCTTTCGTCTTCTATCA -GTCTCTGCGCGGAAACGCCCCAAAACCTCGGGGACAACCACGTCCCTCCAGTCGCACTCG -AGGAAGCTAACCTCACATCCCTGCGGAAGATGCCCGAGATACTTCACTCGCTTGCGAAGC -TCATCATCCACGATGTGGCCTGTTGATATGTGCTCCACTCGTGGTAGAATGGTTGCAGGA -AATAGAGAATATTCCCCAAATGCAGCCTTGAGAATGCGAATATCCAAGGATGAGAGGTAG -AACTGAGGCAACGCTTGATAAAAATGATATGATTGATCTGGCCCGGAAGACACAGGTGTT -TGTTTGCTCCGACCTTTCTCTTTAGCTCTGATACTGGCTTCAGGATCAAGATGAACTCTG -TCTAGGCCTTCGGAGAGATCACCCGCTACATCTGATGTAACCGGCGTCACAATGGATTGT -GTAGACGAGCTACTGGCAGGAGTAGCTGTAATAGTGCGTTCTTTGACTGGCCGAACATCA -GGCGGATTGCCAATGCCTTTCAGCTTCTCTTTGGCATCCATGATAGAACCAATTGCTTTT -CTGGTCCAAGTATTCTCATCACCAAAGAGAAGCTCGTCTTCGACTTCCTGTCTGCGGAGG -TCTTCAATTTCAGCATCATACTGAGCGGTCATATATTCTTCGCCACCTTTCATGATTCGA -GCATAGTCGGCAACTTCTGCCACATGATACCACGGAACATCCTCGCCTGGACCAAGTGTC -TCAGATCCATCACGCGGCAGAGCAAGGGTGCTGCCCGACTCTCGCTTTACAAGTCTCAAT -ACAACATCACCGCCTTCGACGGGAAGATCGCCTTCTTGGCCACGGAACCACCTGACAGGC -CGAGTCTCGGAGATATACACAGAGTCCCAGCAGATAGGACATTTCTTCCAACGGTGCCGT -TTTTCAGGGACCGTGTTCTCGTCGTCGGACGAGTGCATATATCGGATAAGACAAGGAAGG -CAGAAGATATGACCACACCGTGCCATCCGTGGTGCAACCGGAGTAGAAAGACAAATGGGA -CAACTGGCAGCCTGTGTCTGGGCAGACACCAGTACTTGAAGGACTGAAGCCCAATCGAGA -TGAACATCGGCATTGGCTGCCTGAGCATGGAAGTTTTGGTTTGGCATCACAATAAACCGG -TAATTCGCATGAACATAACGCGATTTATCTACTGCACCCGCTCCCCAGGATGTATACCGT -CGGATGTTGCGAGGAGGCGCTTGGTATTGTGGGCGAGGTGGAAGTGAAAAGTTCATCAAA -TGAGTGATGGACGTCTGTCCCTTGCGGCTGGTGGTGGATTTCATAGCGGCCTAGATAATG -CCTTCAGGATCAGCAGGTAGCTTTTGAGATAAATAGACCCGTGAACCCACCGATTCGTTG -TACTCGTCATCATCCAATAACCGAGGTTTTCTTTGACGGTTATGCTGACTTTTCCGAGCT -TGATTATTCCTTGGGGTGGCTGAAGTTTTCGATGATGTCCCTGGCCGAGAACTTCCAGAG -CCTCCTATTCGCCTTTGAGAGCTGTTGTCCGCTGGAGCTGTATAGTAGAATGGAGTAGAC -GAGGCTGTATTCGGAATATTTACCGCTTTAGAAGGGGGCGGGTTCTGTCCTGCATTGGAT -GACATATCTCTGTAGCAACAGTTCTTCAATGGAAAACACAATGAACCAAGCAAGGTAGAG -AGACGAGACGAGTCAGTTGGTTGAATGGAGGGAGGGGTTCTATGGGGACTGGATGTAGTC -AACAACTGATAAAACGTCAATCAGCTAGCAGGCAGTATTTTGAGTGATCCGCCAGATTCT -GAACAAGCCGAACAGGAATAAGAGAATAAATGGCCTTGAAGGCCATGTAAGAATAGTAGG -GAAAATCGAGGGACGGCCCTCAGGGTGCCAGTTGATGTCGTCAAGGTCAATTTCGCCCCT -CTCCGGTTACAAATCGTAATGTCACGTGAATCAGCAAGTCCCGACACCCAATTCGGTCTC -GGTCCCCTTCAGCTTCATTTCGTTTGACGGTGAATCGAGGCTTCCTAACCCTCATCTCTT -ATTTCTTTTTCTTTTTTACCTTCTGCTCACTCGCTTTTCACCCTCCACTTGTCGACTTTC -TGCGCCTAGAGAGTTACTTGTACAGCGATTTGCTTCTTTCTCTTCTTGTCTCCTCACTCC -GATCTGGTTTGCTAGTCTCCTTCCACAGCAACCCAACAGCTCCTCCGACCCAGTTCCTCC -TCTACTCCCACTCTCAAACATATCCAACAATTCTTACTGAGGCCACGATGGCTGCCTAAA -TGCGCCAGCAGCTCTAGATATCTCCTTCTTCTTGACTGCTATCGCGCATGGCACTCATTT -GAGGCAACGCGTGACCGAACGTCAAGACAACTTCTTTTTCTAGCCAACTCAAAATGAACC -ACCACCTCCCTGCCATGCCGCATGGGCAGCTGCCCATGCCTCCTTCGAGGCGCCAGCAAG -ACTTCTCTTACAACGCCGCGCCTCCCCACATGCGATCACCACAGTACATGGGATATCCTC -CTCACATGAATGGCCACATGCCTCATCAGCCATACTCATCGCAGCAATACCCTTATTGGT -ATCCACCATATGGACATATCCAGGGTCCTCCACGTCCATACCAGGCGCCCTATGGACCGA -TGATTGTATCTTCTTATCCTCACTCGCAGCCTGTCATGGCGCCCACGCATATTCCTCATT -CGATGCCTATGCATCAAAGAACTCCGACTCCACTGCAACCGATAATGTCTCCATCTGTCA -TGCATCCATCGGTTCAGCCCATTCAGCCTGAAATACAGGAATACCCTGCTGTTCTACCAC -ACTCGGTCCAAGGATATCCCATCGCTTCTCCGCCACCACGATGGGAGCCACAGTCAAGCC -TGCCACCGAAACCCACTTTTGCCCCACCTGTAAGTGTTCACTTTTATAGTGTATTTAGCC -TGAATTTTTTCTGACATTTCGGCTAGATTCCTTGGTTGTCGGTTCCAGAGCAGCCCTTCC -CGGCCAGAATCCCGCGCAGACGTCGCAAGGGCCGTGTCATGCAATCATCAGTTGAGCTTC -CCACCAAAGACACGCACATGGTCATCGAAGGAGATGAAAACGAAGGGACTCAGCCATCCT -GTTCTTCTACCCAAGAATTCTCGGAACCTCAAACCCCAATCACCGCTACATTCCAGCAGT -CGGACACAGACTCCACACAGCCTACCACTCCTTCGTCAGTTGCGAAATCACAAGTACACT -CGAAAAACTCAAAGCCTGCTCCGATTGTTCCAGTGGTGCCTAATGTGCCATCTACCCCAC -GTCGCCAAGCAAAAGATGGCAGCGTCGCATCTGGAACTCCTAAATCGACTGCACCTGCTA -CTCCTGCTCCAGTGGTAGAATCGGATAAGGTTTCTTCTGCAGATACCAAGATAAGCTCGC -CTGCGCCAGTTGCACCCAAGTCTTGGGCTGATTTGGTCCGTTCGAAGGAGTCGGCGCGGG -CAGCAAGCGCCGCTGCCGCCGCTGCTTTGGCCGAGCCACATGGTTTGCCCGCTCAGAAAA -ACAAATCCCTCGCAGATGTCTTGAGCATGCTGGGAGAAGATGTTACTCAATACAGCGACA -AGATCGCATTTCTTGAACCTCGAGGACTGGTCAACACTGGCAACATGTGCTACATGAATT -CTGTGTTGCAAATTCTTGTTTCTTGTACCCCGTTCTATCAGTTCCTTGATCATCTCGGCC -GACGTGCAGCCCACACCTTCCACAGTGATCTTCCTTTGATCGATGCGATGATCATGTTCA -TGAAAGAATTCCGTGTTATTGATGCGGCCTCTTCTGAAGACCAACTTCGCCTCCGATTGA -AGCCCAACGAGCTTGAGCAATATGGCGACTCCTTTGTGCCTGAATTTGTTTATGAGATGG -TTCGACAACTGCCACGATTCAGGGACATGCGGCGCGGACATCAGCAGGATGCCCAGGAGT -TCTTAGGCTTCCTCCTAGAGGAGATGCACGAGGAGTGCGCTCGTGCCGAAGCGCACACCG -CAGCGAAGGAAGGAGGCGATTCTTCTGTTGATGGCTGGTTAGAAGTTGGCCACAAGCAAA -AATCTGCGACCACCCAGCTGTCTGGATCTCTTGCTGCTGAGTCGCCGGTCACCAGAATAT -TTGGGGGAAAGCTTCGGTCAGAATTCAAGGTGCCTGGCAACAAGACTTCTGTCACGTTGG -AGCCATATCAACCTTTGCAACTTGACATCGGATCGCATGACGTTCACAACATTCTCGATG -CGCTCAGGGGAATGACAAAGCCGGAAAGCATCCAGGGCGATTTCAACTCATCGCGTGGAC -CCAACATCACCGCAACTAAGCAGATCTTCATTGAAACATTACCCCCTGTCCTCATTCTTC -ATCTCAAGCGCTTCCAGTACGACAGTGTTACAGGTGCCACTCAGAAGATTTGGAAGAAGG -TTGGATATCCCCTCGACTTAGAGCTTCCGCGAGAGGTTTTCCCAGCTCATCGACGTAACG -CCATGGCAGCACATGGAGGCCCCCCCAAGTATCGCCTTATGGGTGTCATTTACCACCACG -GTAAGAACGCCAGCGGCGGTCACTACACGGTCGATGTTCGGCGACAGGAAGGCTTGGAAT -GGATCCGCATGGACGATACTGCCATACGTCGCGTTCGCAGCGAGGAAGTGGCGGAAGCCG -GTGGCGAGGAAGACCCCAAGGTTCTCGCTGCAGCTCTCGAGCAGCACAAGCACGGAAAAA -TTCCAAACGGCAATATTTTTGACCATATCGATCAGGACGACATGGATTTGACTGACAGCG -ACAAGGGCTGGAGCCAGGTCAACGGTAATGGTGCCAATGGACACGCAAGCAAAAAATCAG -TGAATGGAGTCACTCCACCTCCTGCCGCTCCCTCTGGGGTTCGAACTCCTCTGGGCCGCT -ATGGCTCTCGCGATAACAAAGTTGCCTACCTATTGTTCTACCAGCGAATGGATTGAGGGC -TCTCACCCAAAAGTTTCGCCATTGGACTGTACGTCCCTTCTTCAAAAGACACATTGAAAA -AGCCTCACACCACTACATCCTGCGACATATCGAGTTTTGTGTCTTTCACCAAGTCATGTA -TTTTGATCCTCACCTCAGAGGATGATCTGCCACCTGTATCGACAGGTCATACATAACATC -TATCAATCTCGGCGAGCTTGTTCCTTTTTTGTGTTTACAGCTCGCCTCTGCACATTACAC -CCCTTTCACAATAGAGTCATCGAGGAGCACAATCTAGGGTACGGCTGTCCCTGGTCATCT -ACCTCTCCCTCCCGCCTCCCAAAAGGCTCATTCATCATGTCTCACTTTCGCTTCACGAAA -GCATTTACCTGGATTCTGTCTTGACCTTTTTTTACCCTTTCTCTTTCAGTTTAACGAACC -CAACACATCTTCTACTTCTACCCACAACCCATTGTCTTTTGAGCCAATTGTTTTTTTTTC -CCGTCTACCTTTGGGCTGTATCTTATGGGATGTCGACCTCGGTTTATACCCCTCTCCGTT -TTCTCATGGATATCGTTCATACCCATCTTTGGCCCTATCGCATACGACTAGGCCAAAAGA -AATGGCTCTCTTTGAGCTATTCCTACCAACTTCTTTTTTTTTTTTCATGTACTTTGCCAT -GTCCCGACATTTGGCGAGGGTACAGGAGATCCTGTGACGAAGTCACGAATTTCCTTGTCT -TGATCTTGATTGCTTTGTCTGCATAGATGGCGGGTGTTGAAGAGGGCTTGGGCTGTTCAG -CCTGGGGCGTCGGGGGTGTTTCTTGCTCGTGTTGTGGGGGAAGAGGAGGGGGGTTAATGG -TGCAGATGGTACAGACCAGTGGGTTTGCAGGTTTGGGGGAGTGTGGAGGCTACAGGTCGA -ATATCTGAATTGCACGAGAACGCAGACTACCAGGTGTGGTATGTCAAAAAGCCTGCTTTT -ACAAAAAAAAAATTTCAAAGGAAGTTTATCTCAAGTGTGTATTGCTGTAACATGTAGTCT -GATCTATGTAGCATGTGGAACTTAGACTCATGACTACAGTATCGAATATATCAAGCATGA -AAGACTGGCAATCCAGCCTTCTCGCCCTTCACTTCCACTTCCACACCCCCATTCTCCACA -AACTCAAACCCAAGAGCCCTCCAGTTCAACTCCTTCCTCCAATGTAGCCCATAGTACCTC -CTCTCGTCTTGTTTATCAACTGCAGAAGCATCAGCCTTTGCAGCTTGATCAACCTCATCG -GCCACAGGACCAACAGCCGGCTCAGAAGACAGGCCACCACTATCTAACCTCCCACGTCTG -GGGAAGATGACAGATCGGAACCGGGAAAGAAGCATGCCCATAGAGCAACACAGACACGGC -TCATGGGTTAGGTAGACGTCCAAGTCTGTGCAGAGATAACCACCCTGTGAGCGAGTGCGA -ATGCGCGGGGCTGCAGGCGTTATAGCCGGGGTAGAAGAGCCCGCAAGTACCACGGTAGAT -GGGGGAGGAGCTGGGAGAGGCGGGTGGTGCAGCTGGGATTGGCAAGGGTCTGTAGAGGGG -TCGATAGTCAGGTCGGATTCAGGGTTGGGGTCTTGGTATTTCCTCTTCAATAGAGAAGAC -GGAGACTGAGGTGAGTTGGAGATGTTGGTGGATGTAACAGGACGGTGGCGGTATAAGAAA -TATGATTCCAAGGCGCTCAGCTCTGGTTGTTCTAGTTCTGGCTTCGATGGACCCGCGGTC -GTCGTTGATGCGGACGGGTCGTCAAGACGATCGTGTTCTCGGCGCCAGCGAGCAATCAAA -TCGACAGCACGCATCAGCGCGTGCAATTCAGGCCCGCCCTCAACATCCGCATTGTATATC -TTGCTCGCTGGGTTTGGCGCTACCCCGGCGTGCAGCTGGGCCTGGGATGGTGCACCCGCC -TCGGATCGGGAGTACCGCGCATCGCCGGCAACGGCGAGAACTGCTTGATCCCATTCGGTT -GTAGTGTCATGTCCGGTCTGATAGACTGTCTCGGTGATTTCTTCTTCAATGGCAGGGTCG -ACTATCACTGCGCCTACGCCGCGGCCTAGGCCGGATTGTTCTGCTTCCTCTGCTACTTTG -CGCGCCAGGGACAGGTAGTACCCTGCCCGGGGTTCAATGGATTCTTGTGCGCGTTTTAAG -ATTTGAGGGGGTGGTGCGACAGTTGAGCGGGGTGCCGCTGGGTTGAATACAACTGGCCAG -AGGGTCTTGGTCCATGTCTCGGCTTGAGCTGCGTTGAAGGGAGGGCAGAGCGGGATTGTG -GCAGGGTGGAGTTGAATAGTATGGGCGGAAGGATCGGGATGTGAAGCTTCACCCGGCACT -TCATTTGTAGTTGGAGAATCGTTTGTAGCGGCTGGCGCCGTGTCTGGCGCAAATGGAGCG -AGCAGAGCCTTGAGTGTGGTGAGATCAGGCAACGGTGGTGATATGAGGACGAATATGGTT -TGTGCTGGGGCCGCTGGAGGAGGGCTCTGGCTTTCAATTGCTGCCCTCAGCAGATCTGGG -AGTAGTTCTCTTTTGGCGAAGCGACGTAGGTGGGATAAAGGAAGGGAAGGGTTTTTGGGG -AAAGCCGCGTCGAGAGCCCTAATTGAGGAATTAATCTGGCGTTCAGACTGGCGAAGCACA -TAATGCGATAGCTTACTTGATCACTTTGGATGCAGACTTCACATTGACTTCTGCAACGAA -GGCATTGCCTATTTGTTGGTCAATCCTGATGTCGCAGGAAGAGGGCGATGTTATAGGTCT -TACCGAATTCTTCTGGAGGCCTTGTCTCTTGAACTGTTCGAATGGGAACCACTTGTCCAT -CGAGAGGTTGAAGGTCTCTGATTGTCTTCTCGACGTCCATTCTAGCTATCTCCAAAGGCA -CACCCAGTCACGCGCGGGGTGAAATAGAAGAAGCGTATCAAGATAGGTTGACCGTGTACA -ACCAATGGCTTAGCCCAAAGAAGCTCTAAATTACAAAGCCTTTGAGTTGTGCTTAACCCT -CAATTGCCGATTTAAACTGTAATAATTCAGTAAAAGAGGATTTGTTCATATGAGGACTGG -AAGAAAAGAAGAAAGCTATACAAAGTTCGGAGGCAAACAGTGCAAACTTTGTCTCCACGG -GTTTACCGCTAGGCCATTTTTGGTCTTTTGAGTCTGTATAGTTTCATATTCTTCGAAGGG -TTTActctctctctctctctctctctctctctctctctctctctcAATGAACAGGCATTA -TTGATATATCTAAGATAGAAGCTAAAACGTGGAAAGAGGATACCAATGGTCCACGAGTAG -TCGCTCGATCTTACTTCGCACCCATCAGAATTATAGTGTATAGGATGCAAAAGCCGAAAG -ATAGTAGCTACACATGTATATAAAACATGAAAGATCTACAATCGCAAGTGCTCAGGTCAT -GAATGGATATTTGAGTTTTATATGGTTCTAGAACAGTGAAGACGATAGGTGATTTGATCA -TAGATATCACCCTCATCGTGTAGAAAATCAAACGCCGGATGCCCCTGTGAGATAAAAATC -GCTGAAAAGTAGGAACAAGGTACTGCCCATCGACCTTTTGATATATGCTGAGATAGATCC -CTGTTCTGAAGCGCAATGTCTGATGTCGGTGCTCCTCGATATTAGATCAAAAGCAATGGT -GCATCGAGATCTGAATCACTCTGATGCAAGTGGTAGGTATTAAATCACTCCGCCAGTGAC -TCAGCGGGTGACGGGCAAAATAATCCACTCATCCATAGCTGACTCGAGACCAGCCCGAGT -GGTCCAGGGCAGAACGTAATTTGTGCAGATTTCGCATCGGCGTAATTTGGGCGTATTGAT -CCAGCCTCAAGCAGAAACAGGACCAGCTGCTGGAGCAATGCCGGTTACCCCGGCTTTGGC -ACTGCCATTTTGGCAGATGCCTTCAATTTCAAAACCGTCGCATCGATCGACGAGGCTTTG -ATAATAGTCCACTAAAACTTCCGGCTGTGGGACAATGCGCCATTCGACTCGGAACAAGAA -ATCTAGTTCAAGCATGGCTAATTCTGTCATTCCAATTCCACCAACTCGAGCATACGTCTT -GTTGGTCCAAAAACTGTCGCTGAGTCCCTTGCTGGCGACTGTGGCACTGGTGATCAGGAA -ACGATGGATGGTCAGGCTGGAAACGGTGAAGGCGGGGTACAGCGCACAGAGCCGATCTAT -GTAGTAGACCATGCTGAGAAGGATAGGGGGTGAGAGAGTAGCGTGGGTTGTCAATCGCTG -TAGGTAATCTTGCACGGAGATTCGGGGCGGAGAGCGCGAGTGGAAGCGGGTCAGGCGACC -CTGGTGTAGTGGAATTTTATCGTTGAATCGGATCAATTCCATGAGCATGCTGGATATTAG -CACGACCATGTCACGGGGGTCGGCCAGTTCATATTGCGTGGGCAACATCTTGACATCTGG -TTTGGAAGGTCGCAATCGCTTAGACTGCGACCTCTCCAAATCTTCGCCATCGTACTCCGC -CGGCGTCGCATCCGATGAGTGATGGTCGCGTTGGCTGTTTCGCTTGTTGGAACCCGAGGT -AGGGGCGGGCATCGAAGGGGCGGAGGAGGAAGTCGGGGGTGGATTCGTAGATGTGGCGGT -CGCCGGAATGGTTGCAGTCACAGCAGAAGACGTGGTCGATTGGGGTGTCGAAGCTAAGGC -CGAATTGTGGACCAGGGTCTTCAATGTATCAGATGTGGATGCGCGCCTGCGTTGAAGATC -TCCTGAAGTTTCAGGAGCTGAAGTTTCAGGAGAAGAGGGATCGACAGTATGCCGATTCAA -GGCTGAGCGAAGTTCGTCAATGGCCAGACGTGAGGATCGAGGACTATTGAATTGTGGAGC -GCCAGTTGGCTCATAGTGGGAATGTGACCCCGATGTTGAAGGAAGCGGGGAGCGCAATGC -AGCCGTTGAAGGCGAGGGTGAAGATGCCAACATGTTGAGGTATTAGGTTATCAAGGAATG -CTAGGAATCTGAGCATTCACAGTGCAAGGTGAGACCACGTGGGCGCACAGTCAGGGCGCG -CTGTGGATGGCGTGGGTGCACGGCCGGCCTCATCCTCAGAGGCTTAAAGGCTCAGAGGAA -AGCACAAGGTATGCAAGGGGTTATAAAGGTATAAAGGAAGAGAAGAGGGGGGCCTGGGAA -TAATACGCCGACCTTTGACAGAACAGAAAGGGGGCAAAGGCAAAGCCGGTTGAAAATCTT -GTTCGTTGGGGTGAGGTAATTACATTCATGCTGTCAGTGGCTGGTTGCTTCCGGTGACTG -TATCGTTACAGTATGGGTATTTTTCTTTAAAGATTAAAATTATCATTATATATATAAAAT -ATACGGAGTGCCTTGATAGATATATATCTTGTATATACTTAAGCTGATTGATACCATTGA -AACAAGACTGAGATTCCTAATAGTCTGCTCAAACAACCCTAGATGTTGGATACCACTCTT -TCTCCCTCAAGGGCCCCAGTGGGAATCGATATTTTACATACAATGTGACTAGTTGGGAAT -GATGATGGATCTACTCCGTACTGAGAGAGAGATACCGGTATCTAGGGAAAAATCCATGAC -TAAATTAGCTCGTATCAAGTAATCCCTGAACACTTTCTTCTTATAGGAGACATAATGCAC -TCCAATGTATACTCTCGGGATTTCAAGCCCACGGGAAGCGGGGGTAACTACCGTGCCCTA -GCTGGATTAGGTTAAAGTGCCAAGAGTTcaagtatacaattattgcatgtacagtataca -atatactccatagctatacgcaatatataaccaaatatgcaatataTGACTAAACATGGC -TGATAATAAGATCTCATAACTATAGCTTCGGCAGGAAGTTTTCCTTGGACTTACATCATC -AGTATATCGAATTTGTCGTAGTGCTGGCGGCATATACATGGCCAAGAACTAATACAAAGA -TCTTGCAAATATAGACTACACATACGGAGCACTAGATCTAGGCTACAGTAGGGAGTACTT -CGTATAGGGTACTCTTCTTATGGAAAAGCAAACAATCTCGGAAACCATGCGACGTTGGAC -TCCAGATGCTGTCCCTATCACACTCGATAATGGCTTTTTTATAACCGCAGATCTTATAGA -ATCATTTAGCGTGTACAAGTTTTATAGTAAAGTACGAAATATTTCTGTGACCATGACCCA -GACCGGCGACAGCATGATAGAGTAAAACATGTACAATATAAAGAGTAACATGGCTTGTCG -CTCTTTCATTATTCCGGGACCACTTTTCGGGGAACGGTGTGTCTCGGCAGAATTGAAGTG -CGGAGTACCTCTAGGCCCGGCTCTACAGTTCCGGATTGCCATCATATCCATGTTGTACAT -CCAGGTCTGTGGAATCTTCTCATTGACGGATATAATTTCCTCGATATCATTCTAATATCT -CTATCAACTTTAATTTAATCAACAAAGTGAAACGTCGAGTACCTCCCCACTGGGAGGGGC -TGATCGCCAGACGATGACGCGAGCTGCCGCACCGCCGATTCTGCTACAGCTACAGAGCGC -CGAATCGGCCTCCTCGCAGGTTACAGCTTTGCGCACTCTGAAGAACGAACTGATTGGCCA -TGACCAGCGCAAGGAAGTCTACATTGTCGAGGGGATCATACCAGCTTTGGCACAGGTGCT -GACATCGAGATGGCCGGGAACGGCCGAGTTTAATGAATCGATACTGCACCAAGATCAGTC -CACGCCAAAAGCTCCAGAAGATTATGAAGCATGCCTACAAGCAACCTTAATTGTTGGGAG -CCTGGCACAAGGTATGTGAACGACATATGCAGATCCATGTCTAGATCCGCCAAGACAGAC -ATTGACTGTACTGAGCAGGTGGTCCGACGTTTCTCACCCCACTGTTCGCCAGCAATATCC -CCCAGACCTTACTTTCAATCTTTTCGTCCCGGCACTGCCCAGATTCATTCTATCTTCCAA -TTCTTCGACTTCTGAACACAATCGCAGACCGGATACCGTTGCAAAGCCAGGAACAATGGC -CTCGGGAGACACAGCTTGCGGATCTTGTTTTTGCTTCTGCAAATATCACCTCTCTGAGGC -GTATAGTTTCGCAGGAGTACGGAGACCGGCGCAGTCAGACTATGATCGAGCTAGCAGCCT -CCCTGATCGGCAAGCTATGCACGGAGGAAACCCACAAGACGGCGTTAGCAGAATCCGGGG -TGTTGGATGCTTTGGCGCTCAAGATTGCCTCCTTTGTTGTGGCCAAGGGATTTGTTTTGC -CCGGCGCGGAGGATCGCCTTCAAGAACCTGGTGCATTGAATAATCTGCCACTTCCGGCCC -CTGAATCCGCCCAGCTAGCACCGATTCTGCGCGCAGTTGCTGTCATTATTGAACAGTCAA -AATGGAGGGCGGAGCATTTCTTGTCATCACCTGGCATTGTAACTGTGTTTCCGAAACAAA -TCCCTGGGTTTGCTCCATCAGACATCAAGAAAGGACCGTGGGGATCCACATACTTGTCGG -GCTCCGCAGTACCCCGTCATATTGGCGCCAACCCCATAGAACAGCTTTTGCCATCAGTTC -CTCTAGCACACACAAAATCCTCATCCAGTTCTGCCAATTTCCCACCTCTGGGACATGGAG -GCTCACAGCGCCGACACAGCCATTCTTACCCCACACCTTTTTCACTGTCCGAAACTCCTC -TCCCGGAAGATGACGAGAATGCCATTGTCCCTTGGCTGCTCTGCATTCTCCGCTCAGAGA -ATGGCATGACACGGTTAATGTCTGCCCGTCTTGTCACGGCCCTCTTTCGGTTAGGATTGG -CGAAGAAACATCGCGTCCCAATGTTCAGTTATCTGCTGATCCCCATCTTGATTCGTATGC -TAGATAAGAACGTTCGACTGCCGGATGAAGAAGGCGCTATACATGATGGGCTGCTCTCGT -CAACCCTACGCTTGAAAGAAGAGGCGCCTGCCGTATTAGCTAACTTAGTCATGGACGACC -AGGAGCTGCAGAGACACGCAGTAGAAGGCAATGCACTTAAGCGACTTTCACAACTTCTCA -AAGAGACTTACAACCCAATCACGGAGACATCCCGGCCAATGTGGCACGCAGAGGATGGCC -CAGCTCGAAACCTTGAGTCTCTTTCACAAGAGTGCTGCCTCGGGCCTTCCGGTTACTCCC -CGACTCTTTGCCATGTAATGCGTTACAGAGAGAACATTCTCAAGGCCTTAGCAGCACTCG -TCCCTTTCAAGGACGAGTACCGCAAGGCGATTTGCGAACATGGCGTAGTCCCTTACATCA -TTGACGCTCTCAAACCAAGGCCTAGTGATGCCCTAGCTGATGCTGCAATGCCCAGGAACA -CTGCAGAGGATGGAAATCCGATTCCGACAATTCTGGCAGCTTGTGGTACAGCTCGAATGC -TGACCAGATCCGTGAGCGTGCTAAGAACCAGTCTTATTGATGCTGGTGTGGCTAAGCCAC -TGTTTGCCCTACTTCGGCATTCTGACCTCGAGGTGCAAATTGCAGCAACGGCTGCGATCT -GTAACTTGGCCTTGGACTTTAGCCCCATGAAAGAGGTTTGTTCATGCTGATGTTTTGCTC -ATAAGCTTGGCGGCTAACTTAGTTTCGTTAGGCAATCATTTCCCACAACATCATTCCCAT -CATTTGCGAACATGCCCATTCATCCAACACCAAATTACAGATTGAATCACTCTGGGCTCT -CAAGCACATGGTTTATGACACAGCAAACGATATCAGAATAAACATTATCAAGACATTGGG -CTCCGAATGGATTATGGAGGTTATCTCCCAGGATCCGGTCCGCGCAGTGGCCCGGCGCGC -GATAGAAGAAGAGACAGATCAAGGGTCCGGGTTTGCCATGGGTCGATCTAATTCGGCTGG -TGAACAGGTTGATATTCTCAACCCTATGGATGATGCACTGGACTGGGATGAGGACCTCAA -GATTGCCGATATCATGCCCCCCTCCAAAATGAGCTTGGACATGTTCCTTCCCGATGCAAG -ACGTAGACGCAAACTCGTTCTTCACGGAACCCTGGCCCAAACCACACAGTCTCGACAGGA -CGACATTGCCGTTCAGGAGCAGACATTCGACTTACTGCGCAACATGATATGTGGTCGAGG -CGCGCCTGAGATGATTGAATATCTCTTCATGGAGCTGGGTGAGAATGATTTCCTCGATAC -CATCGCAGACACATTGCGACCACGCACCATCCAACTGCCTCACCGCCGGGACTCCAGTAG -TCAGTCCCTGCAAGTCCCCAATGAGATCATCAACGCAGCCGCCGCTCTCATCGTCCACTT -GGCAGCCGGACACCCGCGTCACCGGCGCACCATCGCATTCCACCGTGACTTGCTACGGTC -GCTCGGAAATTATTTCAACCATTCTCACAAAGAAATTCGGTTGAACTGTGTTTGGGTAGT -GATTAACCTCATCTACGAAGAAGACCAGAGCGATCATGAAGGTTGTAGGGAACGGGCAGT -GAGACTTCGTTCTCTTGGGTTTGCGGAACGGCTGGCTAGCCTTGAGGATGACACGGAGCT -AGACACGAGGGAGCGTACGAAGACCGCGCTGCATTTGTTGAACAAAATGTCTCCTGCATA -GTTGATTTCTTTGTCATGGGTAAAAAGATTGCCCGTGGACGTTCTGAATGGATCGAAATA -CTGTAGGAAAATATGGATGATTCTTTTCTTTAACTCGTACTCGGCTCATTTAGTACTGTG -ACTGAAGATAGGATGTATCAAACTTGATGAATATGTGCCTGACAACTGTAAGTAATAATT -CCCCCTATATGTATGCTTCTCCCACGCTACCTTGTCACCTGTGGACTTCTCTCCAAGTAT -TGTTAGGAAGCATTCCATATACCCAGCACTCTTGACTCTTCATAATTCTATACGCCACCC -ACGATGCCTAATGTTCCATTCCCACTTTACACCGGATCAATTAGTCTCTCAAGACCTGGT -TATAAAGGTACCACCAAGGTATGTTGTATAGGTAGTGAAGTTGTTGGTGATAAGAGCTTC -CTTATCGATGTGGAGAACAACGTCATAGGTCTTGCAATCCGGTCTATTCACATCTTCGCA -TTCTACTCATAGCTAAATATCTAAGACAAATTTGTTATTCCTAGCTCCAGAGATGCAATC -ATGTCTCCAGTACCAAACCCCATCATTCAAGCCCTCTCGCTCCCATCTGAATCAAAAGTG -GGCCTCTCTACGTCCGGCCTAGGCTCAGGCTTCACAAGCACAGGCACCATCCACGCCCAG -ATCCCCGGCCAAAATGGCCCAGAAGAGCGACTTTACTTTGTAAAGACCTCTTCGAATGGA -AAAGCCGCAGAAGAAATGTTTGAAGGCGAATCAGAGTCCCTCAATGCAATCGCAGCCTCC -GTTCCGGGGTTCTGTCCGCAAGCCCTAGCTAGGGGGCCACTTGACGAGCTCGGGACAAAG -GCCAAAACACACTTCCTCGCAACAGAGTTTCTCGACCTCGGCGGAAAGGTAAGCCGTAGC -ACGAGCGACGAAACCACCCTCGCGCACCGCCTCGGAAAATTACATACCACACACGCACCT -CCAGACTCAGACACCGGCCGGCGCAGATTCGGATTCCCAGTGCCGACCTTCTGTGGCAAT -ACCAAGCAGCCAAACCGGTTCTGTGATAGCTGGGCGGATTTTTATGCGAACGAACGGCTG -CTTACGATCCTGGGGACATCCGAGGAGCGTAATGGACCCGATGCTGGGCTGCGCGAGGCG -GTTGAGAAGACGGCACAGAAGGTTGTGCCTCGGCTGCTCGGGGATAACCATTTAGGGTAT -AACTCCAGTGGCGAGGGAAACGGGATTGTACCTGTTGTCATTCATGGGGATTTATGGAGC -GGGAATGCTGGTTGGGGTCGGATTGTTGGCTCGGGTCGTGATGAGACGCCTGGGGATGTG -GTGTATGACCCAGCGGCTTGTTATGGACATAGTGAGTTTGAGCTTGGGATTATGCATATG -TTCGGTGGGTTTGGGGCTCGGTTCTTTGATCAGTATCATCATGTTGTGCCGAAGACTGAG -CCTGTGGCGGAGTATGCTGATCGCCTAGAACTGTACGAACTGTGGGTTTTCTTCTTCTTT -CCTCCTTTTTATGCTGTCAAAAGAATTCACTAATTATGATCCAAAAGATATCATCATCTG -AACCATCATGCTATCTTTGGTGGAGGATATCGGTCTGGTGCTATGTCGATTATGCAGAAG -TTGATCAGGAAGTATGGAAAAGACTAGGGTCGAATATTTAGTTTTTAAAAGTTTTCTATT -CTCTTTGTTGACCTTTTGAGATGTCAAGCAATAACTAGCTGAAGACACTGTGCTCCACGA -GTATACTTAAGAAATTATATCGCACTAATATCCATGACTTGCTCGGCTCTAGACTCTTCG -TCCTACCTTTCACTTATCCCTTGGAAATACTAAATTACATTCGATAATACCAGCTCACCC -TGATTTGAATCTACCTTCCGAGGCCAGTGGGCTGGCCAAGGCTCTGGTCGATCAGCATCA -CACTGAACAGCCATTGAAGTCGTGTGTGTGGAGTGGTTCTGTCCGTATGTCAACGACTCC -TCTATCCTCAAGCTAAATGTCTGCTCACAAACTATTCAGCTTTTGCTTCCCTCCAGTCAC -TTATTTCTAAGCAGCAATATCAACGAAACAAAGTACCACGAGTCTGGCTCGCCCTCGAAG -AGAAAATGATCGCCTACGAATATACCGAGGTCGGCCCATACAGCAAACCAGACTCACTGC -TCACAATAGAGCCTGGAAGCCTAGTCCCTACACTCTCAACGTCAGATGGTCCATACCCGC -AGCCACTGGACGAATCAACTATGATCCTAGAGCATTTGAAGAAAGCGGATCAAGATCACC -AACCACGCTTCCCACCAGAGGATGCCTATGAGCGTGCCCGAACACACATTTGGATTGATT -ATGTTACATTTTGCAAAATCCCATCTGTCCCCGTTTCTTGCAGTACCAGCCTGCAGATGG -CAGCGCGCAAAACATAGATGCCGGGTTGGATTGGCTCAGCAAGGGTGTTCGAGTCACCCA -AAAGCCTGAATCGGGAGATGCTCACTGATGAATTTTTTTTCTGGTGAGGATGGCATCTTG -CTGCATTTTGCGCTGGCTTTGTGGACCGTACGACTGTGGGTGTTTGAAAAGTTTAAGAAT -CTTGGATTTGCAATTCTGAGAAAGGGGGAAAGGAGATGCAAATGATAATATTTGGGCATG -ATAAAAAACTTGAATGTCTGCTGTTGAGAGCAGACGGAGTATCAAACTGACTACCAGTTG -TCGCATAATTTTCCCATCTACAAGAGATATGTGGATAATACCGCCCAGGTGAGTTGGCAA -AGGCGATACGAGCTGATCGTAGGGCTCCTTGAGTTTGTGAGATAATACTTGGCTTTTCGT -GTATTCTCCAGCATGATCAATAGCTTTATAATTGCACATCCATTTCCAGGAAATGAAATA -AGATGGGTAAGACACCGGTGACCGGAGTATGTTGCATACACTTCGGATCAGGCAATAATA -TATTTCCGATAAACCTACACGTGATTGTGAGCTCTTGGACCGACACGTGATCGCTATATC -AATCCAGTCACGGCGCTCTTTCATCCTGCAACTTCCAAAGATATCGCTTAGTTTTGCCAT -ATTTCTAAGAAACCCAAGAAACTGCCATGGCACGCAATGTGAGGGACGAGTCTCCCCGCA -GGAGCTCAACGCTGGAACGGGGGCTGTCAGGCCCGGCCATCCACCCGTCTTTTATACAGG -TGGCCAACCCATACATATTCGAACAGACAGTCGAGAACTGCATTGAGGCGATGGGCGTTA -ATCCCCTTCGTGAAACTTCGTTGCGTCTTCAGGGTGTAGCGTGGATCGACAGTGTGCGGA -GAGCTCTCAACCTGTAAGTTGACTTCAAACCAGGCAAAATGTATCAGAACTAACTGAGAA -TCCAGTCCCATTCGCACTTTTGATACAGCAGTGGGATATTACCACCGATTTCGATTGGTG -CACCCAGACAATGAGTATAATTTCACAGTGTGTTCCCTCGGAAACCCTCCAAGGAGACTA -GCCTAACTCGGGATACAGGATGCTGCTGCAGCAGCTTTATTCACAGCATGTAAGATTGAA -GATACGCTCAAAAAGTCACGAGACATCGTCTGCGCAGCGTACAACTTGAAACTAGCTCCA -TCAGAGCACTTATCGGCTGACGATCCCGTGAGTCATGATGCCAGATGATACAGGACAGAG -GACTCACGTTTGCCGACAGATGTTTGAATCTAATGCGCGTGGCATCATTGGTCTTGAGCG -ACTCATGTTAGAGGCCTCGGGCTTTGACTTTCGAACTCGACATCCCCAAAAGACTCTCAT -GAAGCTAGGTCGGCATCATGGACTATTAAAAAACTCCGAAGTCTCCAATCTTGCCTACCG -CATTTCGTCTGATCTCTACCGTACATTTGCACCCATCAAGCAAAACTCATCAACGATGGC -ATTTAGCTGCCTTGAGCTTGCAGGACGACTCTTGGACCAGCGCGTCGAACAAGTGGAATC -CGGGGTAGACTATGCGCAGTGGAGCACTAGCCGAGCTGAAATCATGGGTAAGTATTCTTG -CTGTATTTGCCCTATCCAGCAAGTGCCACATACTTACAGCCATATAGAAACCCTCTTCGA -CCTTTTAGAGCTCTATACCCATCACCGATCACAAACCGCCGTCGGCTTGGAATTCCCAGC -AGACCGGTTCTTAACAGTCCGCATCCCGCTCAACCAAGAAGCTAGCGAGCACAACATCCC -GCGCTACCACCCGTGGATTGGCCAGCCACGCAAGCAATCAAATGGTAGTGGCGCGAACGA -CCAAGATCCTCCACGACCTGCTCATCCCTTGACACCAATTGCAGCGAACGGGGACCGCCA -GAGGACAGGTGAGCGAGGCCGTGACGCTGCCGTGCGATTCATGCTCGATCCCGCATGTGC -GGATGAGGAGAGGCGACAGGTCGCAGCGTACTTCAAGGTCGAAATGGAAGAATATGAAGT -CGACGGATAAAAGGCGCTTATTACTATTCGAGTCCCTGGCACCTGGTGTGCCCCATGCGG -TGAAGCGCAGCTAAGAGAAGATTTTGCTTGTTTCCATCAACCGATGATATGGTAGCGGGT -TTGTTATTGAGCTAGGGCGCCCCTATATACTCTGTCTCATCTTGGTTTTAGCCGAACAAT -GCTCCGATTAACGGTGACTCGCTATCGACCGTTCGTCCTCATTCTTGAAATACACCGGCA -TATTTCTTAAGCCCCAATAAGGGAGGCCTCGGTCCACCCTTTGCCGACAGATGAACAAAA -CTCGGTTTCTTACCTCAAGAGTTTTGAAACTGAGTTGTCTCAAGACTCACATTCCAAAAG -TTTCCCATATTGTTCCCTATTTCCCTTGATTTTCCAATACCCCAGACATCTATGACCAGT -GACGAGATATCGATAGATACAATGAAAAGAACCCATTAACTGTCGATAACCGCTTTCTCA -TTGTAGGACAGCACCGTGATGGATTTTTGCTATACTCCATACCTACGCACACATTGTTAT -GACAGATATAAGGGATAAAGCAAAATCAGAAATCAGCTCCAATATTCGAACTGAGAGCTA -TGTACAAATCTACAGCCTGTTCCCTTCATGTGTCTCAAAACAGCTAAGCTATAGACAAGA -CCACCAGAGCAAAAAACCCAGACATGTCTCCCAACATTCCCCTTCTTCCAGCCACTTCTA -CAGAGGGACGTGTCCGCTCCACGCATCGGGAGTATAACAGAGCTCCAGGTCAGGCGACCG -AGCAACAAGTCATCAAATTAAGGCTCGGTCCAGGGCCCGGCATCAACCAGAAAGGAaaga -gaagagaagagaagagaagagaaAAAAAAAAGGAGTAGTGAACAAAGGTACAGGGAAACA -GCCGTAAATGAAGAAAGAAAAAAAGTCGTTCCCGTGCCTCCGGCTGGAGACTTTCAGTCT -TCACACTGGTTCATTAGAGGCCTTGTCATAGGTCATTTTGTTCCGTGTTTATGCCGTGTT -GAAAGGCGATTAAGGCGAGCGGGGCGCATGTATGTACATAACATGTATGCACATATTAAG -GCTTGCATGCGTATTCAATGAACCCAAATCAAGTCCTGAATGAAGCAAGAAGCAATCGGA -GATTTGCAAGGGGGGAAGCGCGGGGTTGAAAGGTCGTCCTCGAGTAGTGGAAATGAAAGT -GAAAGATGAGTGTCATGAGTTTGGGTGGTTTTTTTTTCAGTCTGAATGCCGGACAATCGA -AGTTCATCGACGCTTGAGTTCGGTAAGAAGACAATCGAGAATCTGAAAGGGATTAGTGAC -ACAATCATTGTGAAGGGTGCAAGGAAACATACCGATTCTGAGACGGCGCCCTGGTTGTAC -AAGTACTTGCCCAAGCCACGGACGACCTCGAGCTGCTCGTCAACTGTAAGGCCATTGATG -GCGGTCTCGATAATATCGCCCACGCTGGCCTTGTAGACAGCAATGAAGCTGTGATAGTAC -ATGAAAGAGAAGAAACCCATGATGAACTGGCAATCCATGCGATCGGTGATACCACCGTGG -CCAGGAATGGATTCGCCGAAGTCCTTGATCTTGAAGGTGCGCTTCAGGCCAGAAGCAAAG -AATCCACCGAACGGAGCGATCAGCGAAGCGAAAGTGGCGAAGGCCAAGATATGGAACTGG -ATAGGTTCCACCCAGAAAGTCTTGTCCACCCCAAAGATTTCAAGAGAGTATGGCTGCGAC -GTGAAGACCGGGTTTGGATCACATTCCAGACCGGTGAAAGCATTCGAGCCCAGGTCATTG -ACAGGGCAGATGAAGTACTTGTAGCGGATCAGCAGGTTGGTCATGAAATAACCAAAGCCA -ACGGTGCAGATCCATGCGCCGAGGAATCCCTCGACAGTCTTCTTCGGGGAAAGCTGAATA -AGCTGTGTCTTTCCAAAACTAATACCGCAGATATAGGCAAAGATATCATTCGTGATGACC -AACGCGGCGGGCAGGAAGAACCAGATCATGCCCTCGAAAATGTTGTTCATGACAAAGTGA -GCTTGGACAACAATCAAATACAAAGCCATATGGGTCCAAGCAAATTGGGTGAACTGGAAT -CTGTAGTGGCCCTTCTGCAGAGATCCGACAAAGAACACGAATCCTATCAACCGATATTAG -CAGAGTAAAGGGGGAGCGGTCGAGGCACTTACCCATTACGTAAAGTGTGAAGCTGATGAA -GCGGTGATGGTTCGCCAACGGAAGCAGAACCTTGTCAACGAGGAGGATGTGCTTAAAATA -GTAGATCACGCTCTCCCCGTACAGGAAATACATAGTGGTCGCCAGGAAGTACCAATTCAA -TGATTTGGTGAACCGCAAGTTCTTCTCCTTATTGGGGATACTGGCGATAGCGATAACTTC -CTTGAACGAGATGATTTGAATACCAGTAATTAGGGCAATAATATAAATGTGACCGGAGAA -CAAGGCGACGAAGAAACCGGCAATCATCACAAAAGTCCATATCGTCCGGGTAATGAAAGT -CTGTTTCTTCTTCTCATATTCGGACTGGGGTGGTTTCTAGAAATGCAGATCTTGTCAGAG -TTATATTCCGATGCAATACTCCACTAGAAACTGACCTCTTCACTTATTGTAGATGGTTTG -GACCCATTGCCATTTTTGCTGGGGCTAGTGGGCTCTGAGGGCGCATCACTGACGTCGGAC -ACGCTGAGGCGTCGCTCATCATTTGTCCGGTGTTGAAACCTAACATTACGGCGAGATTTC -GACATGATGGAGAAAAAAGGGGGCCAGAAAGAGAATCTGGGCAGCAAGAGTGGAGAGTGC -AGAGAGAGAGGGTCATAGGGATTAGAGGTTGATGAGCTCATGTAAGTGCCACGCCGAAAG -CGGTCAGCTTTAGCTTCAGCCCCGTGTTTACTTTTTTTTTTTCTTTGCCTGTTTCTTTTA -TACCTGGTGTATTATGTCTTGTTGATTGCATAAATGAATTATATTATTTTTCGTGGCTAG -AAATTGAAATCGGATGTGCAGGGTTGTTGTCCAGGTACCTTAAACTACCGTGTTTtctct -ctctctctctctctctTCAACCTCGGAACCACTCGAACTTGAGGATGATCAAAAGTCATC -CGTGAACTAAAAACCCCTCAACTCCCTCCGGTAGTCCATTCATGGCGGGGGCATGgaaga -cgacgacgacgacgactacggctacgacgaGTTTGACTCCCTGCCACCGGGAACGTTGTA -TGAGCTGGAACAGAATGCGTTCCAAGCAACCCAGGCGTCAGCCTCACAATACCACTCAAA -TCCCATAAACAGCAACCCCGTGCTCCGCGCTCAAGCAGTACGATTCAACTATAATCCTGG -ATCGTTGAGGCCGCCGCCCCGTCTACACACCGGATTGACGAGCGACTACAATACACTGGA -GGTGGGGGAATTGGAGGCCGAGGTGTATGATGATGTTGGTAAACCGCATGCCATTCCGCA -TGGGCAACATGTAATCGCCACAGATTCTGGGCTTGCAGCCAATGTGCTGACGGGCGATGC -CATGGATGTGGACGAGTACTATGGTCAAGGGAACGACGCCACAGAGATCAATGCACACAT -GGTTCAGGTGAGTTAGTTGACGAGCTTGTTTGCATAGGCTCTAATAACCTTGTATAGATG -GAACAAGAAAGAGAACGCATCTTACAGGAGCTAGCTGAAGCAAGGATGCAACTGGAAACG -AAAGCCGGAGAAATCTCAATCATTCGGAGGAAGCAGACAACGATGACACAAGACTATGAC -CGGCAAATAGCGGCTTTAAGGAAATCTATGGCTGACGAAGTGGCGAGACACAAGCAAGAG -ATTGATGCAGCCGTGTCCGAAGGCAAGGTACTGGCGACAGAGAATATATTTCTTCAACAA -GACCTCGTGGATGAAACCTACCAGCTTCGGAGCTACAAATCCAAATCTCGCGAGATGGAG -GCCCCAGTGACTCCTCGAAAGTCCCGGGTGCTTCCCTTCCGCGATGGATTTGAAGATGAC -GAAATAGCCATGGTGTCGCCCAGCAAGTCTGCAGCACGTTCCAAACGAGCCACGCCAACA -GTTCCAGGAAAGCGCAAACGGCAACTAAGCCAAGATGGCGCTCCGCCACTAGATCTGAAT -CCCGCAGGCGTGGAACATATTATGATTGGTGCGACTGAAGTTCCAGCCGAAGTTGACGAG -ATAAAGCGCAAGTCTGCAGAGGGTGGTCGTAATCAGCGGTTTATGATGCGAATTTTGAGC -CACCGTACATATCCCAACCAAGACACAGATTTCGAAGCGATGGCCAATCTTGCATTTCCC -TCGGAGCCACATCAACACATGTCCAGCATCGTTATGGAGGCCATGGCTCGTGTCGACCCG -AGCAGCTACGTATTAGAATATACGCAAACAATCGCTTCGTTGTGGCAGCGTGCTATCAAC -GAGAAGTTCTACGAGCCGATCCCCCGATTTGAAGCTATCACAAAGTATAGTTTGATGTCA -GATAATGTCCCACTCTCCGAATTGATCACACCTCTAGTCGGAGTCTTGCAAGATACTGTG -GAAGTGAATGCAGTTCCTCGCTTCAAGTACGCCCCGGCATCACGAGATAAACGAGTTACT -CGACAGACACCACAATCAGATCTGCAGCCCCTCGTTGACTCGACTGGGGCCATGCGTCTA -CTTCACCAGATTGCATGTGGAGTATTGCACATCAAAAGTGCAATGGAGACATTCTGGCAA -AACATCCGTATCTCGTTCATTCTCGTGATGTTGCACCCTTCACACCCACTGGGTGACATT -GTGATTTTGATGAACCTCCTATCAACAAGTATTCGTGCAGATTCCTTTGGTCCGATCAGA -ACTTCCGACCAGGACCAATTAGACGTGCAAAAGTGGATTGTGGACCGCCTCACGCACATG -CTGAGCGAGCCCGCGATACCAGATGAAGGCGTGGAGCCATACACCGCATATGACATTTGC -GCAATGCGCCTCGAGGTACTGCTGCTGCTGGAGTCACTTGCATTCAACCCTATGGCCCCA -TCCCAAAAACATGCTAGCACCATCCTTGCATTGCACCCGAATGCACTCGCCCGTTTATTC -CGGTCGATGCATGATGAGCTAGATGCACTTTACTCTTCCCCGCCAGAGTCAGAGCTGCGT -GTTGCGTTGGTGAATGGCCTTATGCGGCTGGTTTTTGGGGTTATGCGGCAACATGGCCCC -CTGATCAATATGCAGGAGAAGCTGGCGTGTGTTCCTGGCAGCAAACAGAAGCATCTCGTT -GTGTTAACTCGCTTAGCGTTCTGTGATGGCACCGTGCTGGAGGCGGGCATTGACGATGAC -ACTGTTGACATGGCGCATGAACTTCTCGAGGAATGGACTAATCCTCAAGAAGCAGAGTCA -TTGGCGGAGGCATTCCCCAGTTCTAGGAGAGAGTGAAACGGTGAGGATAGGGATGATGTT -CAATCTTGTACGACTAAGATTGTTGTGTACATTTACGTACTGCGAGGTGTTCTCTGGAGT -TATGGCGCGTTTCATTACCGTATACCAAATCAATTCCAATTTCGTTCAAAGCATTCTAGA -GGTGAGAGGTTAAATTACAGAGTACACATATTTGGAATAAGAGAGGTTTTGCGTCCAACA -AACTGAGGCGACGGAATTTCCAAGGATCAGTCCATCTCCTAATAGTCAGACTTTCAGCTC -TCAGCGGTTAAGAGAGGGCTGCGAGAGACCAGGCAAGGCAGTCAAGTGACACAGCAGTTA -GGTCGAGAACAGGAGAGGCATTGTGGGCACCCGACGGCCGGTCACGCAGATTCTGGACAA -ATTCATATTGCCTGAATAGAGCTTGCGCCCTTGTCAAGAGGGGTGAGACGGGACACCACT -TGTGCATCTCTTCAAGTTGAACTAAGATACGCTTCGCTTTACTTCAGCAGTGGTAGAAAA -TTAAGGATTATGTTCTAAATTACGGTGTCCTCTTCTCCAACCTCGTGCCACTGGCTCAAG -AGTTGATGAAAAGAATATGTTCCACGAATGGAGGTGCCAAAGAATACGCCTGTCAATGAC -CCGAAAGCCATTGAACAGACTGTTCCTCCGCGCTGTGCAATGCAGGGACCAGCCTGTGGT -GAACCGCCTTATTGATCGCAGTCACCCAAACAAGGTAAGTATGAAGAAGAAAGAAAAGGG -GAGGAAGGGCCGCAGAGAGAGAGCTATATATACCTCTCCCGGAGGGCGGTGGCAGAATAT -GCCAGGGCGGTGGCATTAGGCGAGCATGAGCTGACCAATAAATCTTCATTCTGTCTTACA -TAATATGCAAACATTACAAACATTTCATCCAGCAACCCAACAGAGTTTATTCCATATAAT -TAAGGCACTATGGTCTTAATAGTCACGTGTCATGAAGTTTTGAGACTGGGCCAATTCGGA -CTTGGATAACATATCATTTTGCTATACAACATTGTGTCTACAGCATCACCACAATGAGAA -ACAGAGCTAGCAACACGTTGTCGAAAGGCGTATGTCCCCACCACTGTACTTGATACCCCC -AATTTCAATAGACTAACAAAATACTCTTCAGAAAATCCGTCAATCATGGAGCAAATACAA -CCTGTATAACCTCCAACGATTCCGCAGCCCGCCCACCGCCAATCGCACATTCTTCCAGCA -AAAATGGTCTGCTAAGGCTGCCTCCCGTGCCTATCACGGCGAGCAAGTTCGCGAGGGACA -ATGGAAACGCATGTTCAACCATCGCATCCGGAGTGTGATCCCCATGAATGCCGCCGATTT -AGCCGCCGATGACGGTACCCTAGTCTCATCCGGTCGAGGCTCAGGACTCGATAAGGATGG -GAAGTCCGCTCACCAGACACGTCCTACACCTTACAGCCACATGACCTTTGCGCCCCTAGA -GCGCCGGCTGGATGTGGCCATCTTCCGGGCTTTGTTTGCTAGCAGTGCCAGACAGGCAAG -ACAGTTTGTCATCCATGGAGCGGTTACTGTTAACGGTCAAAAGGTATGACGCGACGTGGG -AAACTACCAATTCCTTGGTCACTGGCTCCTTCGGTTACTGACTCGCTCTTGTGTTGCTAG -ATGAAACACCCCAGCTACCTCCTTAACCCTGGTGACCTCTTCCAGGTCGACCCAGACCGC -GTCTTGTACGCTACTGGTCACACGAAGACTCCATTTGAGCGACGTGAGGGCCGATTGGCT -CGGAGAAAGGCTAAATCACAAAGCTCACAGGATGCGGAAGAAGCCGCTGCCGCACCAGAG -GTTGAGGCAGAGTCAAAGGAGACCGTAAAGCAAGACATCAAGGGAACTCCGAAGGAGGAT -ACTAAAGAGACTCTGAAGGCGCTACTGGCACAGGCTAAGTCTGTCATGTCTGCCGGCAAG -GATACACTCGCGCCCCAGCGCAAGCAGGAGCTTCGCGGCTTCCAACAAGCTGTCCGCCGT -GTATTGTCCCGCTCCGAGTCCAGCACCATCCTCGCAGACAGCCTGGAGGCCCAATTCTCC -CAGCTAACCCTACTTCTCAAGGCCAAGCGGGGCGAGAAGCCAGCTAAGAAGGATGTGAAG -CCCCGCCCAGAAGACGCCTTCGCATCCGACAACGGAGATACGGAAGCGGCTTCCGCCAAG -CGAGATACAGAAACCGCAGCCAGTGACAAGCTCTCCGAGGCCTTCGCACAAGCAACCTTG -GGTAATGATGTCGATGCCTCAGAGCTCACCGACGAAGAATTCGATACGCTTCAGCGCGCT -CTCACTCAAATGCGCGACAACCCTCTCGATAACTCCAAGACCTATTCCACCCCCTGGCAA -CCACGGCAATACATGTCTGCGTTTGCCTTCATCCCACGCTACCTCGAGGTCAACCAGAAC -ATCTGTGCGGCCGTGTACCTACGCCACCCCGTGGCCCGCCCTGGTTCTGCTGAGGTGCCC -ACTCCCTTTGGCGAGTCCATTGCCACTCCGGCCTATGGCTGGTACCTGAGGAGACGGTGG -TAGAGGATGCGATTGGCCTGTCTTTCTCTGTTCAGCCTGTTCCTTCCTTGTTTGTCCTTA -CTACTTTGCTATTTTTCCTCCCCTTAACTTCCCGTCTGTGTATTAGTGGGCGCTTAGATG -TCCCTCATCGCTTTCTGCATATTTCTGGATCATGTTGACCAAGGTGTTGGTTCCTGTTTT -GCTTGTCTTTGGGGCTTTTTGTGTCAGCTCCACCCCCTGTACCACCATAGCCGTGATGTG -TTGTAAATGGATATATTATTGATGCTGTTGCTTTGCGCTTTATATTCTTGGATGATCCTT -TTGAGTAGATAAAGCCACTTTGCCCGCGTATATATGATAGATTTCATGCTGAGCAGTCTG -CTCTCAATATACTGCTGGCTTCTAAATCCTGGCTATATGTCACGGCCTCGCATGAAGTTA -TATGGGGACATGATAAATCTCGTGGACAATCACCTAGAGGTATCGAAGCTCCAATTCATA -CCTCACAAATATATAAATGTGCCAATGAAGACCAAAGCCCGGCGGGGACATGAATAGTTC -AAACAGATACGTATAATAAGACACAATGAAAACACTTCAATGTTTAAACTGTTCGAATCA -AAGCTCATGCCCATAATCTACAAGGATTGCATAGCCAAGGCTGGAATATATGTTTAGCGC -ACGGACTTCATGGAAAACCCTCAGGTGAAATCCATCGCAGCTTTATCTTTATCCCCACAG -ATCGACAAGAACATTCTGCATTGGTCTGCTACACCTTTGTCACTCTATTGATGGCTAAAT -AATCAGGTTGACTCAAGGAGCCTATCTCCTGGACTTTATAGACCATCCGATTTGCACTTG -ATCTCCCACAGCGGCTGCTCGGCTACTGCTACTGCTACTGCGCAAGCTGCTGTCCCCATG -ACGCGCATTCAATCACACGAATACAGCCGTCCTTTGTATCACACTCCTTCACAAAATTCC -TTCGGCCGACCATCGGGCCTGCGCTCAGGGTTCAGCCCAGCTTCCCAGTCAGTGTCTGTG -GTGGTGAGGAATCCACCCACGGCGACCGATAACACGTCCGAGAGCAGCGAAGAGACACAT -GGAGGGGTAAACCTAACCGGATGCGGTTCAAATGAAGGCGGCATCATAGAAAGCGATCTA -TACTATGCGCCGATGCCAGTGGGGGTAGTTCAAACCAGCGAAGGAGAAGGGTCAAGGTTG -ATATCGAGATCAACCACCAACAGAGGCACGAAGCGCAGCCACAATGCCGACGATGAGCTC -GATGGCGACACCTCGCGGCAGCATAGGCGTCTCACGACCAAAGAAGAAGTAGCGCTCTTC -GATATTTGCAACCAAAACGCAGACACCTTCGGCAACCGCAGTAACCTGTGCAAATGGTGG -ATATCCATTGCCGACGAGTTCAAGCGTACCCATGAGGGTCGCTCATACTCGTGGCACTCG -GTGCGACGCAAAGTTGAAATCGTAACGAGGCAGCGCATCAAGTTCCTCGAGGACCAGCGA -CAGCGGGGCTCGAACGCACCCGGCTCAACGGCCGAGGAGTTGATGAACCCACAATGGCTC -GCTGCCGTCGATGCCTGGATTCCGACCTGGCAGCGGTGGGAAGAAGCCGAGAACCAGCGT -ATTGCAAAGCGGGACGAGATTAAGAAGCGTAGACAGCCGCAGCCCTGGCGCCAAAATTCC -AAAAACGGAGACCAGGATCCATGGCAGAACCTGCCTGCATCATCTGCCACTAGCCCGACT -GATGTCAATACCTCAATGATGGGTATGATGCATCACCCTGTGGTGAACACTGTCGATGAT -GCCACCTTGCCGGCTACTAGCCCTACGCCCTCCCCCTCAATGACTGGCCCTGCACCACCC -AGGCATTTCCTCGAACCACCATCCACCCCTCTCTCTGCAGCCACCTCATTGAAATTACCA -CCTGGCTTCGAAAATATGTTCTCAACCCCACAACTCACTTCACAAGTCGCACCCATCCCC -GCCATATCCACACCGCCAGTTCCACAATCCGATGGCCGTATGGTCTCAGCTGTCCTCGAA -ACCCTGGGGAAACTGAACAAACACCTAGACGCCACATCGGGGGGTAGCATAGATGCGAGT -GCCGCCTCACCCATGATCTCGGCTTTGGTACAAGCTGCCTCAGAATCCCCGGCTCTGGCT -TCTTCCCGGCAATCACAACTCCTGCCAGTacagcctacagtccagcaacagccacaatca -ccgcAAGGTCTCATTCAAATTCAAATTGAAAAAATGAAGGAAGAGCTCCGGCAGGAAATG -CAGGCCCAGTTTCGAGTTGAGCTGGAGCGAGAACGCATTGCGATGGAAGAAAAGCTGGAT -ACTGTGCAACGGACTCAAGACCTGATTCTTGAGATGTTGAGGCAGGAGCCGGCTTGATGC -AGATGAAGTCTTAGTTCGGTACCTCTTCTCTAGTTATGGCTTGTGCTGTTTCACTTCCCC -TATTCTTGCTTGTACTTTTACTGTTGTTTTTTACCGTCTCTTTATTTCTAATATGATACC -CATTATCGACCGTGAAGTCTTGAAATTTTTTGATATTCTTGATGGAGACATTGCAAGACT -AATCCCTAAAATTTGATGAACTTAACTTGACAAGGACCCAGCCGATGGCAATTGCTCTCT -ACTCTAGAACAGAAAACAGGTAAACAAACATAAGAGAAAATAGTCATCGGGCTAAATTAC -AGATAAATGATAGAACGACTCAAGAGTGATTTCCCGAGCCTTCACACAACGATTAAAAGC -TCTTCCATGATGCACAGAAATGCCTATAGGGCGAAACAATCTAGTAGTAGAAAGTCTCAT -CGACAGTACGACCACGCATCCACTTGGGCTCAGAGCCACGGACGTAGATGTCCTCGAAGA -GGTTACGGGCAGTGTTCTCAGGGACGGGCATCTGCTCAGCCTCGGCGACCTCGGCGTCAA -CGTTGGCACGGGCGGTCTTGTCGAGACCCTTGAGCTCCTCCTCGGAGGTGACGCCCCACT -CGAGCATCTTCTGCTTGAGGCCAGCGATGGGGTCGTTGGTGCTGCGCATGCGCTGAATCT -CCTCACGGCTGCGGTAGGTGGTACCGGGGTCGGACATGGAGTGGCCACCGTAACGGTAGG -TGACGTACTCTAGGACAAGGGGTCCGTTGCCGGAGATGGCGTAGTTCTTGCTGTACTGGA -CGGCAGCCTTGGTAGCCAAAACATCCATGCCGTTGACCTTCAGGCCGGGAATATACTGGC -CACGCTTGTAGTACTCGGTCAGAGCGGAGGAGCGCGAGGCGGAGGTACCCATGCCGTACT -TGTTGTCTAGAGCAAATATTGTCAGTACTTCTGTGATTTTTCGAGGTTATAAAGACTTGA -TATGAATCACTTACTCTCACAACCGAAAAGAACGGGAAGGTTCCACAGCTTGGCCATGTT -GAAGGCCTCGAAGACCTGGCCCTGGTTGGAGGCACCGTCACCGTAAAGAACGATGGAAGT -GTTGGGCTGTTCGTTGTACTGCTGGGCGAAGGCTAGACCGGCACCGACGGGCACCTGGGC -ACCGACGATACCATTACCACCGTAGAAGTTCTCGGCGAACATGTGCATGGAACCACCCTT -ACCGTACGCGATACCCTCACGACGGCCGAGAAGCTCACCAATGATAGACTTGACGGTACC -ACCACGCATCATGGCGAAACCGTGGCAACGGTAAGCGGTAATAATCTTGTCCATGCGGGT -GATGGCGTGCTCAATTCCGACAGCGACGGCTTCCTGTCCGGTCGACAGATGGCAGAAACC -CCGGATCTTCTTCTCCTTGTACAGGCGATCGGCGGCCATCTCCATACGTCTGCAAAAGAG -AAAACGGTCAGTGGCTAGTTTAGCTATCCGCTGTTGTTATTATCCTACCTCATCGCGACC -ATGTCGTAGTACATCTGCTTGAGCTCCTTCTTGGAGGTCTCGAGAGTGTACGGGGGTGGG -TCGAGCTCGTAAGTCTCGAAGCTCTCATCGGAGAGACGGACAGTGAAGAGCTTTTCATCC -TCCTGTTACAGCGCAAACAAGCATCAGCAATTGAACCCCTGGAAACCTCCTGGTGAGGTT -TTTCAGTTTTCCGGATAGGGCTCAACAAACCTGCGGCACTGGGCTCTCGACGTTGGCGGA -AGCGGCGTCGGTGGTGACAGTGCGACGGGCAATAGGAGTGAAGGCCTGGCGCCTCAAAGG -CGCAGCCTGACGGACCGCAGCACGGAAGAGCATTTTGCAAGGGAATTGGAGATGGTACGT -TGAAGATGATAAGCACCTTTTTCCGTCCCAACAACTCTCTCAATTTTGCCGTCTTAGTCA -GCTTCCAGATCGGGCCCGCCCGGTGAGGGAACTCAGGCCGGTTCCGATACGCTTGAAACT -ACCTCTTGGACACTGTTTCTGTTAAATTTGCTTTGGATATTTCCTAAACCTGGCAAATGA -CTCCATTGGAGATACTCAAATGGACAAATTCAACAGAGAATTGGCATCGCTGTTATTTCC -ATAGAGTATCTGGGACTCTATTGCTGGTTGCGATCTCCGATCCGAGCCCAACGGTTCGCT -GCGCCTCTATATAACATGGCAACTTCAAGCCTTTGGTGATAAGATCATATCTTGTGAGAA -AAAATAAATGCCTCTTAAATGCCTTCAAGCATTCAATTCGCCTTGTGACAAGATCGGTAA -ATAAATAAAGCCTTTGTCGAGCAGAATATACTGTACAAGGCTGGGATAGATCAGAGATTG -AAAGGGAAGCACACCCACAACCTAACTTGATATATGATCTTTATTGCATTGCCGGTCAAT -AGAGCAGAGTCTTGGGTAACCAGACCTGCATGTAGCAACATCCAAAGTGTGTTTGCTTTG -ACCATGAGACAGAAGCAGGTCACGCCAAAACGGATATCAAAACTGACCTCAGGATTATTG -AGAGGAATGACTACCAAGGGCCTCAAATGGCAAAAGGATCTTTTCGGTAATTCTTTTGAG -CAAGTCCTATATCTTCAATACTAGGCGAATAAGACCCCAAGAGGATGGATCATTGATTTT -ATTGATTTTATTGACTTTATTGACATTCCAATGGAAGCAGTTTGCTCTTGGTATCGTGTG -TCGCCCATTGATCCTCATGATTCAAAATGTTGCTTTTTCCCTCCATCGTCATCTTCTTCT -CTCCACCAGCTTCTGAATGGCCTGATCAAGGTATCGTTATTCCACGTCTCTGCCCAAAGA -TACGAATCAAAGGCACGAATCTAATGCACCATCAAGCAGTACTAATCTCGACATGGCGGA -ATTTACTGCCACAACGAGCTTCAATACAGGTCCCCCCGCGGGTCCTCCCCTTGGCCCTGC -TTCGGAACCTTTGATTGTGTTCGTTGCTCGTGGCGCTCCACCTCGCATGTACGATGACCT -CGACCAGCTAAAGTACTATCTTCGTCCCGCTCTACTAGAGCTTCAAGAGAATTTTGAGCG -CAAATATGGAAAGCTAGAGAATCGACCCTACTGCTACTGCCCATTGATTCATAAATCCAC -AAGCCCCCTGGATCCCGATTGCGACTCGATGAAGTCCATGACCGACATACTCTTGTATGG -CCGAACTTATGCCCGCGACATCATCTTTGTATTGGCCCACTGGGACTCCATTACCAGTGA -CACGTTGAGCTTTGTCAATGTCTTCAAAGAATTTACAGACGTCAAGGTGACTATCCGCGT -CTTCGGCACGCTTTCAGCTGACCATAACCGCGCATTCCATGGATTCGACGCCCACAGAGT -TGGCGCTCACTACCAGGGCTTAATCAGGCTGGAAGAGGACTTTGTTATTGACGATGCATT -GCGCTTTGTGGTGAGACTCGAAGAAACCCGCATAGTCAGGATTGGAGTGGAGGAGTCGGT -CGGCCTGATGGTGCGATTAACAGGCCGCTCCGAGGCAGAGATGTATGATAGAGTGTTGTG -GATACTCTGACGCAGGGGAGTGAACCGAGGAAGTTGAGCTATTGGGTATTTATGGAGATG -CGAGGAAATCTGACGACGGACGTATCCCCTCCATGGATTGAGACATGCATTTCTATGCCT -AAGTTTTTTAGTCCGAGAGTTGAAGCTTTAGATGTATCCTACCCCAGCTTTGACGTGGCC -TCGAATGCCTGAACAAGCCTTGAGCTCAGCTTTTCTCTCGAATCGATACTTATCGAACAA -GGAAACAATCGATACATTCATCTAGTTACTTCTGAATGATTATTTCAATCATTGTGTAGT -GTAGGAGATGATCTTTTCTGCCCTGCACGCCTCTTGGGTAACCACGCAAAGCTGAGTTTA -CAAACTGTGAAGCAATTTGTCGGGCACGATCCCTGAGTGCTGCTAGAACCAGCACGTTTA -AAACAAGTTGAACATCGAACGAGCTCGTTGCGCCTGGTAAGAACACAACCATTTTCCACG -ACCGTACCGCAGAAAGTCGACATCGTCCAGTCACCACAAGGTAGTCAGTAATATAGCACC -GCATATGTCCTCGGGTATTATGATTCCGAATTAGATGACAGGTTCAGATCCAGCGAACCT -AGAAGTACGTATATCAAGATTATCGCCCTTATGAGATCAGCCAATAGCTAGGAATTTTTT -GACTATACCTAAGCACGTTACAAATGTATCGCTATATAAATTTACAGACCAGCCATTAAT -GAAGACACGGTGCTTTGTCTAAGATCACCTCATAACTGCTGGACTGTGAGACTATCTTAC -ATGACTCTGCCTACGGACCAATCAGACATCTATATACCAAGCGTCAGTACACTATTATGT -CAATCAGTGAACACAATCTATGGTGTACAATATAGTAAGATTTGCTTATAGGTGAGCCAA -TCAAAAGATACAAAACATATGTACGTATGGACTCGACACATCACAGTGGATCAAAACCGT -TGAGCACAGGCAAGAACAGATACGTACATACCGGCTTAAGACCTATCATAAGGAAATACG -TGTCAGATATACCAACATACAAAGCTCCGTAATCCCTAAACAATAGATATATCCAAAATC -ACCCTATCGAAGTACAAAAGTGATGGATCTTAGAAACGCTGGGAGGCCAGCCATAATCAC -TAGGTCAAAGTGGGTAGTTGTATAAGCTATTCAGCCGCTTCTCCATCAGTTCGCATATTG -GGGGGGGGTATCCATAATTAATATCAGTTTATAGAGTACCTACGGAGTACTCTGTAGATT -ACCACTTATTTGAAATGGTCACGATGTTAAGCCTCATAAATAATAGACCTGAAACACCAA -GTTGATTCAGGTGAGATGGTCGTGATAGTGGTCCGGGCAGAAGGAAACTAGCTCAGTTCT -GCTTGGTTTCATGGTAGACGAGAACTGTTTCAGGGGTGTTTCAGGGGTCATCCTGGTGTA -GAACGCACTTTGTATGCAGTCTGATGTGTGTGCGAGGATGTTTGAAGGAATATTTCTTAG -GATTTGAACAATCCACTTTCATAGCCAATCTGTGTTGTGTGCCGTGCACTATAATTCCGA -GGTATTACGAAGCTGGTGTCGGAGCTTACTACATCTACAATACAGAGTATACCGAACTTC -GATCCATATCTCACGGTCTCGAGTTTGTTATGCAGCTAGAGACCATCGGGCCTTGGGATC -GGCCCGGCCAATGAGACCTGCATGTACATGCCCGTAATTGCATTCAACCCAAAGGATTAA -AGGGTTACATTTCCGAGCCTAGAAGGTATGTACGAAGTAGGATAATGCTGCATAGTATAT -TGTAGATATCATTGTGAACCTGTCATTTCTAGGTTGCAGTAAGTCACTTCTTCAGAAGTG -ACCAGGTTACTCTGAAACACTGGTTTGACGATGTACTCTGCCATTTAAAAATCGAGATGA -AAGGATCAACACGCTCGAACGAGTTAGACTGTGTTTACAGTAACTCCGCTCTGCAAACTT -TCAACGGTGAGGATCATTGAATGATGCATATATGCTTTAGTCACATACGATATATATTTG -GATGGTATGAAGTACAGAATTGAGCTGAAACCTGAAGCTGAAAGCAACGACGGATAAAGC -CGTACATATTATAGCATTGCGGACGAATACCCAGATATGTTATTTTTACGGTCTATGCCA -AGGCAGCTGGCCACCGAGCTGTCAGGAAGGGTTGCCACATGTGGGTATCTCCCTATGCCG -AGACTTGACAGCTCTAGGGACGAGCAAGGATTTGATACCAGTCAACTCCGATCTTGTCGA -TTGATTGTGGTCTCGATTGACCTGCATGGTTATTCTCCGACGACGATAGACATGCAGGTC -AGGGGTGTACAACATACATAGTTTGCGATAGGTCCAATATCAAAAACCCCACCTCGAAAG -GGGCGTTGTCAATCCTCATCTTCCGGCCAATATTGTATCGACATGGGCTTCCGAGACAAT -GGAGAAGTCAAAGCGAGCACAGATATTTTACAACCCCCCCTCAAAAGAAACCCAGGAGTA -TGTCGATCATAGCGTAAACGGCATTCACATCCACCTCGGCAACGGCACGGTATGGACCAT -AAACCGGGAGCTGGAGTCAATGATGTGCAATGAAATTTCTCCTAGCCTCGCCTCCCCTAT -TTCCAGGGTCGATGGTCAAGCGCTAATGAATAGTGCGAATACGATACGCCGGTGTCGAAT -GCAGCTGCGACGGAGATCAGGAGTCCAAGCGAGGGCTAATTAGACCAATGAAGCCTTCCC -CCCCCCTTAGAACACCCAATTGCGGGCGAGGGGGAGCTGTCTTTCATGTTGGCATAAAGT -CTCCGTTGGAGGAGCGATCGTCAAAAATCGCGGAAGTGCGTCTAGATGCCCCGTAACCCG -TTTACCAATTTTATCCCTATTTGGATGTCGATTAAAGACAGCAGGGGTTAATAGACTTGG -GGGCTTGGGCTTGGAGCTGGGGTAATTTCTGTGAAGAGAAACTGCGTCATTGCAGAAAGA -TTTTCTCCTGGAAAAAAAATAAAGTCCCGAGTGCTTTTTAGAGCCAAAGACTGACAGGGG -GGGAACCCGTTGGAGGATCGACAAACAAATTTGCCACAGTTCCCAAGAGCGTGGTCCTTA -GCCAATATAGAGTGGGTATAGGGTAAAATGAGCCGCTAGATCCCGTGACTTCCAGATTGG -AAATTACGTTCATCCGATCCGAAGTACATCCCAATCAGATCTTTGGAACGTCTGGAGTAA -TTTTCAATTTGCCGAATACCTAGGTACCTAGGTATTTTGTATGAGTTCGGAAATATTTGT -AACTTGAAATCTTGCAAATCATATGGAAGACTTCGAATTGATCAGAGTTGAAGTGAAAAG -GTTATCGAAATATAATTTTAAGTACTCCGTACTCCATCGAGGACTCCATAGTAAATTAAA -TAACCCGATTTCAGGATCAGATCCCTTGAATTGTAAGCCGATCGGATATGCATAGTGACG -GAACCGAGGTATTGAGTATTCACTGTAACTCTACTCCATACCATTGACTGTACTTTGtac -atatatattttaatctagatatgtatatcacatacacatataCGGAGTGCGTGCAGGAGA -AGTGCGAAAAAGAGCAAAATCCCGATTCAGGGGATCTCCACCTCCATATCCATGGTCCAA -AGTGGAAACTATTGTATTTTTCTTTTTTGGGCTTTCTTATTTAGTTCCCATGTGTTCTTC -TAGGCGTGTAGAAACCCCCTATGCATCTCCATCTTTCTCAACTCCGTATACATTTCTCTC -TTATTTAATTATATCCTTAATATTCTTTGATTTTCTATTTTTGATTAGTTTGGGATTAGT -TTATTTTGGGTCCTTGTTTTTTTTAATTTTTTTTTTCTTACCCCCACCTCTTTGTTTCCC -TTTGAGTCGATGTGGAAACGTTCACCTAGATCGGAGGACTCTGGGAACGACTCAACTCAC -CATCTATCTGAGCTGGGTTTCCTAGGTCGATCCAAAAAGTCCAAAAGTAAGTGTCGCTGG -TATTTTCCGGATCTGAGACCTATTGTGGCCGGTTCTAGATGGGAGAGTCTAAACTCTCCC -CTGAGTGTCATTTGAAGCGTTGTTTGGGGGCTTTCTTTCCTTCTTTTATTCACACCTTTA -AAACTTGAGCTTCTGCAAAGTAGCTTTAACCTGTTATTAGCCTTGAAGATTGCTAACCAG -GGATCTGGGACGTACAGAATTTCAATCGCAGCCAGAACCATCCTCTCGTTCCCCTCTCCG -TTCATTTCGTTCCTTCACCCGGGGTGTAATCGACCGACCTAGCAGCAGTGTTTCCAACCA -GACCGACATCACCCGGCCAAGAGCACCCTCGACGGTCTCTGGATCGTATCATTCTGCTTA -CCCTGAACTCGCTGGTCAACCTGCGCATTCAAATCGCTCACGAGCATCATTTAATTCAAG -ATCGGGACGAACCATCGGACCCATGGAGGTAGATCGCACCCACACACGGAGGGAGAGGAC -ATTTGTAGGGAGCGAATGCGCGGTTTGTGAGGAACCCTTGGAACATACCTTACGTGGCGA -GCGAGTCCTGCAGCTTGCGTGCTCCCATGTCTCCCACGAGGCCTGTTTCTATGAATTTTT -GCGGGAATGCGATGGACAATACTGCCCGACCTGCAATGCGCCCTTGGCCTTGGACACAAG -TCGCGGTGGAAATGTCTTGGATATCGGTATGTCCCTCGTGCGGGGGTGAGAGGGCCAGAC -GGCTAATGTGATCTTTCCCAGAGAAAATCAGTAATATCGTTCGTTCAGTGACGTCGCATG -ATACCGCTACAGTACGCAGTGGCTTGTCAACATCTACTCCCTGGGAGCAGGCCTCGAGTC -GACGACCACCGAGTGATTCCGGCAGCCGCTACACCTCGGGGACCCGAGAGCAACCACCTT -ACACCCCGTCAACAAGGGAGGCATCTTACAACCCTTCAGCAAAAGAACCATCTTACAACC -CCTCGAGAGATCCATCTTATAATCGACGGGATAGCCGTGATACCAGCAGCCATCGTGAGC -GCGTAGAGCGATTGACGGTGGGCTCCAACCCTCGTCAACCCCACTCCCGGAACGGCAGCG -CGGCTGGTTCATCGGGTGAATACAATGAAGGCCAGCACACGAGCACTGGGCGACGCCATG -ATTACGATGTCCAAGCTATGGAATCAGACCTTAGTCCTCGCCCTGGAGTCGCCAAGAATC -CCATTCCCGCGCCGATGGTCACCGTTCGAAGCGAGTTTCCGACTATGAATCGATCCCGCC -AACAGCAGACCCTCACATGTCTAATTACTGTCGAGGTGCCTGATGCCAATTGGCGTCCCG -ATCTGGATGATCTACGACACACGCCCTCGGGACAGTCGCAGCCGGATGGCCCATACGCGG -GACGATTTGGTGGTGGACAAGATGCTCGATCGATTCAGTATGAGCCAGCGGAGAATCTGG -AAGAAGTGGCCGAGGAATTGCGCAACAGGGTCGATAATTGGCATGGCCTTGAGTTCCAAC -GGTTTGGCAAGCTGCGTCTCCACGGACATATGCGCGTGGGCAAGGATCGAGATTCCTGGC -AGGAGTTGGAATGCTATCTCTTCGGAGAGATGTTGATCTGTGTCAAGGAAAAAAAATCGG -CTGACCCGAACCAATTCGATGCGACCGGGCGACGCAAGCCTGCTCGTTGCAGTCTCAAGG -GGTCGATCTTGATTAAAAAACATTTAAAGTCCATCGATGCGTCGCCAGATGAACCGATTC -TTTCCTTGAACCTTTCTGTCACCGAGCTGCCCTGTTTCTATCTTCACTTCCGGAACCGCA -ACCAGCTTGATACGTGGCGTCGGTCTTTGATCGATTTGCACCCAGAGGCCATTTCACGCC -AAAATGACTATGACTACGAAAACTCGGGGGCGGAGGAGGATGACTACCGTGGTAGCCGTG -GAATCCAACGACAAGCTTCTGTCAATTCATCTTACGGCGCAGGGAAATCTATCAACACCG -CCATTACCGACTACACCAACCCCGACGCAGAATTCCCTTCCATCAACACAGTTCATATCC -CTCTCGATCTTGTCGTGGTCATTCCAGTATCCTCGTCTATGCAAGGCTTGAAAATTACTC -TCCTGCGCGATGCCCTCAAATTCCTCGTGCACAATCTGGGCCCTCGGGACCGGATGGGCC -TGGTGACTTTTGGTTCAAGTGGCGGAGGTGTGCCCCTGGTGGGTATGACCACCAAGTCCT -GGACCGGATGGTCCAAGATCCTCGAATCGATCCGGCCAGTTGGCCAAAAGAGTCTCCGCG -CGGACGTGGTTGAAGGTGCCAATGTCGCCATGGATCTTCTGATGCAGCGCAAATTCAACA -ACCCCGTCTCCACAATTCTTCTAATCAGTGACTCCTCGATCTCCGACCCTGAAAGTGTGG -ATTTCGTGGTCTCCCGCGCGGAAGCTGCCAAGGTCAGCATCCACTCGTTTGGACTTGGAT -TGACCCACAAGCCCGATACCATGATCGAGTTATCTACCCGTACTAAAGGGTCTTACTCCT -ATGTGAAGGATTGGATGATGTTGCGAGAATGTGTTGCTGGATGCCTGGGCGCGTTGCAGA -CAACCTCTCACCAAAACGTCAAGTTGAAGCTTCGCCTACCTGAAGGATCGCCCGCCAAGT -TTGTCAAGATCAGCGGCGCTCTTCACACCACAAAGCGTGCAACTGGCAAAGATGCCGAAG -CCGCACTGGGAGATCTTCGATTCGGTGACAAGCGCGATGTGCTGGTTCAACTTGTCATCC -AACCCGACAATGCTACGCAGGACAACATGCCACAAGATCCGTGGGAGAGCCTGGTCTCCG -GATTGGAGGCTCTCGGCGGTGGCTCTGATGGTGACGAATCACGAGTCCTGAGTGTTGAGG -AGGTTCCATTGATCCAGGCTGATCTAACTTACGGCGACCTGCTCCGTGATGGCCATCTCA -CCCACTCTCCCCGCCCTTCACTCCTAGCCATTACCATGCTGCCCACTAACCCTCGAGCCA -AGGGCCAGCGATCCTCGACCCCTCCAATTCCTCCTCACCCCTCGATTGTGCAGCGCCGCA -TGGAGCTACTCACTTCTGACATGCTGACCCGCGCTCTCACCTTGGTTTCTCGAGGTCAGC -ATGATCGCGCGATGCATCTCCTCAACGAAACCCGCAGTATCCTTAAAGGCCTGGGCAAGG -GTGGCCTACCACCTTTGCCTCCGGGTGCCTCCCGGTCCGACAGCGATGCCGACAGCCGCG -GTGACACCCCAATTTCGGCCTCCTCTCCTAAATCGTCTACCTTTGGTGGAACACATTCAT -CCACATCCGACACCAATACCATCACACCGCCCTCTGCGGTTGATGCTCAGACGATGCTTG -CCCTGAACGCAGATCTAGAAGCGGCCCTGGAGTGGATCAACCACCCGGCAGTCTTCGGGC -GGGATTCACGCAAGGCGGTGTTACAAAGCATTGGCGTAATCTCCTCACAGCGCGCCTACA -CTTTCCGCACTCCTTCCGAAGCGCATTGGGCTCAACGTGTCTCCGGTGTGCGTCGGTTGA -CGGAGCGATCCCAAGACTGGCGTGAGACTGGCGACGATGCTTTGACCGAGGAGTAGGGTG -TCAGCTCGCAACTCTCTCAACTCGGCATGTGTTCTTATCCCTCTTGTTTTTGATATTTGA -AACGCTCTACATTGTCTCCCATTCCCGACTTGCACATACTGAGTCGACATGTCATTTCCC -GTTTTTCTTTTCCCCTTTGCAGATTTCTTTTGCCATCCCTATGATACCACTTTGTTCCTA -TGACTACGCAACATGATACCAGGGGCTTTGTTCTTGATATGTTTCCGCTCCAGTTGGATT -TATGCCGTATTTGGACATAACATCTCTGTTTCTATGTAATTGCTCTGGCGGTCTCGGCTA -TGGTGCGCGGGTGGGATGGGGTTCTGTTGTGTTTTTTTTTTTTACTTGTTTGAACATATG -ATGATTTACCGGACTTGCCTTTATTTTTCTATATATCCAGCGTTCTATAATTTATCTGGA -CCTATTAGTTACCTTGGAGTAGATACATATACCTCGTCAAAACCATTTTTTCTTTTTTTT -TTGCTTCTAGCTACCTGGGCAAAACATTCAAGGTCTGTCAACCTGTGAGTTTAGTTAAGT -ATGTACTGTGAGAACTTGTGATGCACGGAGGAACAGTCACATATGTACTCCAGAGATCTT -GACTTAACTCAAATAAGAAGAAAGGATACCCTCACAGGCGGTTGAGTGGGGATTTTGGAG -ACATGACGTTGCTCTTAGGCCGAATGTCACAATGGGGCATGTGGCTTATCGCCGGCTCAG -ATCTTCAACTTGCAACTTCCCCGGGGAGGTCTGGTTCCACCTGGAACATTGTAAAATCGT -CAACTCTTTTCTTGGAATTTGTGGACCTCAATCTAGAGAGCTAGGCAGCTCTCAACAGCT -CTCGACCATGCTCCGTCAATCTATTACGCGCGCTTCCCACGCATGCGCATCCGCATACCC -TTTCTTATCGCCGAAACCACCCATCCATCCAAAGGCCGTCTCATTAGCAACTCAGACCTC -GCGCTCATATAATACCTCAGGACCGCAGTCCTTCACTCGTTGGGCTGCATTTCCATCCTA -TCAAACCGCACGACTACGACAGACCCGACCCCAATCACAATTCTCATTTCACACCCCTTC -CCCACATAATCGGCGTACTTTCTTCTCAAGCCACCGGTCCTCCTACCAACAAAGGCAAAA -CCAGTACAATCGATTTCGAGGAAGCGGACGCCGACCTATTGCCTACCGCTTAGTCCAGAA -TGCTAAACCGCATCACTTTGTTGTGATTGGCGTGGGAATATCTGGACTTTACCTATACAA -CACCGAGATAGTGGAGATGACCGGCCGACGACGGTTCAACTGTATCTCTCATCACCAGGA -GCTCAAAATGGGCGAAGAGAGTTATCGTGAGGTCCTCATTGCCGAGCGTGGCAAGGTTCT -GTCCTATAACCATCCTCTTACTCGCATGGTGGATGGTGTACTGCAACGATTAGTCCCGCA -GGCGCCTATCGAGGGCGCTGATTGGAAGGTTCATGTTATCAAGGATGATGAAATGGTCAA -TGCTTTTGTTCTTCCCGGGTAGGTTTTGGTTTGTGGTGTTGTGTGTTTGGGCTCTAGGGT -TGACGACATCGCTGACATTGTTTTGCTTCGTCTAGGGGAAAGGTATTTGTCTACACGGGT -ATCTTACCAATTTGCAAAGACGAGGATGGTCTGGCTGCTGTGCTGGGACATGAGATTGCT -CATGTTGTGGCTCATCACCCTGCAGAGCGTATGAGCACCAGTTTCATTACGCTAGGGGCT -GTATTCGCTATCTCTTTCTTGTTCGATGTCTCCGGGCAATTCTCTTCATTTCTTCTGAAT -CTTATGTATAGCTTGCCGAACTCACGGACGCAAGAGGTATGGTTGGCCTTTTGTTTTTAC -TATATGGTCACTTGGCTAATCTTTACATACAGGCCGAAGCAGACAAAATCGGACTAAGTA -TGAACTTTTGAGGTTCAGGATCAATTTGCGATTATGTTGCTAACGATTGTGACCAGTGAT -GATGTCCAAAGCCTGTTTTAATCCCGAGGCTGCCGTCAAGCTGTAAGTCCATCTGAAAAC -GTGCATCAGCGTCTTGAGCTTTGCTGATGACTGCTAGATGGGCCCGCATGTATGAGCAGG -AGAAAGAGGCTCCTCCGCAGTTTATGTCCACCCACCCATCGGTAGGCTTGGAGCAGACTT -ATAGAGGGGCAATGACTGACCTGAAGTAGAGTTACAACCGAATGGAAGCTATTCAAGGAT -GGTATGATCACTCGTCAGCGTTGACTTGATTGTTACTAATTCATTTAAATAGGCTAGACA -AGGCCGAGGCAATCTATGATGAGGCTGGGTGCAGCTCTGTGAAAGGATACAGTAAGAGTT -CTACTCCTATCTTATATATAGTGATTTGCTAACTTCTTTAGTGCCTGGTTTCCAGAATGC -TTATAATTCGCATGTCGCATATTTGAGATGATCAACAGAACATTCAAGTCAATAATGCAT -TGATGGTAACTGTATAGTAGAATTAGCCAATGTATCAATAGACACTAAAGTATTTCCTAT -TAAACCATAACATAAGAACAAACGAGAGCAAATGTAGAAGAAGGAACTGGAGATTGTAAT -GTCGCTGATCTATAGTGAACGCCAAGCAACTGGCGGGCTCCCCCCTCGTGCAAACCGAGC -AAACCCACCACCGTAGCTCCAAAAATTGCCTGGCACAACAGCTGCATCCACTCCCTCACA -TTCAACCCATTTGATATCAGTCGCATATCTCTCAGACGCGACCTAATATCATGTCCCTAT -CAATTCCCGGTCCATCCCAGGCGGGGCTCTTCAAGCCCGGGTACCAGAGGTGAGTTCGAA -AAGTCGGGAAGCCCGAAAACCCACCGCAATAGGGACCTCCTTATCTGGCGCCCCTCCAAG -AAGAAGCGAAGCAATTGACTGATCTGTGTTTCCCGCTGTATAGCCACGACGCCGAAGATG -GAGCCGTTATCCGTAACATTGAAGCCTGCCAGGCTATCTCGGGCACCGTCCAGACCTCCC -TGGGCCCCTATGGCCGCAACAAGATCGTCATCAACCACCTGCAAAAGATGATTCTGACCT -CCGACGCGGCTACAATTCTCCGTGAGTTGGATGTCGTTCACCCCGCTGCTAAGCTGCTTG -TCATGGCGAGTCAGCAGCAGGATGCGGAGATGGGCGATGGTACCAACTTGGTTATTATGT -TGGCCGGTGAATTGTTGAAGAAGGCAGAGGAGTTGATCCGCATGGGTCTCAAGACGAGTG -ATATTGTTTCCGGATACGAGAAGGCTCAGAACTTTGCCTTGTCGGTTCTAGAAGGTATGT -TTTCCCCTCCCATGATCGATTTGTCGATATACAACTTAGGGAATGTTTCTGATTTCCTAC -AGAGCTCGAGGTCGACAGACTCCAGGATATGCGATCGGCAACAGAACTGAGCAAGGCTCT -CCGCACAGTGGTGGCCTCTAAGCAGTCCGGCACGGAGGATACCTTGGCTGCGTTGGTTGC -GGAGGCGGTTTTGGCTGTCTTGCCCAAGAACCCCCTTAACTTCAACGTTGACAACGTGCG -AGTCGTTAAGATTATGGGTGGTAGCTTGGAACAGTCGAAGGTGGTGAAGGGCATGGTCTT -CCCCCGGGAGCCCGATGGAATTATCAAGAAGGCCAGCAGGGCCAAGGTCGGCGTTTTCAG -CTGCCCCATCGATATCAGCCAGACCGAGACTAAGGGCACAGTTCTCCTGAAGAACGCCCA -GGAGATGATGGACTTCACCGCGGGTGAGGAGGATCGCCTGGAGACTGCCATTAAGGAGCT -CTACGACTCCGGTCTCCGTGTGGTCGTTGCCGGTTCCACCGTTGGCGACTTGGCAATGCA -CTACCTCAACCGTTTCAATATCCTCGTCATTAAGATTTTGTCCAAGTTCGAGCTCCGCCG -CTTGTGCCGTGTTGTCGGCGCCACACCTCTGGCTCGCCTGGGCGCCCCCATGCCCGATGA -GATGGGCCAGATCGACATTGTCGAGACAACCGAGATTGGCGGTGACCGAGTGACCGTCTT -CCGCCAGGAGGATGCTAACGCTATCACCCGCACAGCAACTATTGTCCTGCGCGGTGCCAC -CCAAAACCACCTGGAGGACGTCGAGCGTGCCATCGACGACGGCGTCAACGTTGTCAAAGC -CATCACCAAGGACCCCCGCCTCGTCCCTGGTGCCGGTGCCACTGAGATCCAGCTCGTTGA -GCGCATCTCCAACTTCGCCGACCGAACCCCCGGCTTGCCCCAGTACGCCATCCGCAAGTT -TGCCGAGGCCTTCGAGGTCGTTCCCCGCACACTCGCCGAGTCTGCCGGTCTCGATGCCAC -CGAGGTTCTCTCGCGTCTGTACACCGCACACCACCGCACTACTGCTGCTGGCGAGTCATC -TTCCGAAGAGGAGGAGGGCAGCTCGTCTGAAGAGGAGGAGCCATACTGGACTACAGGTGT -GGACCTTGAGATTGGCGACTCCGACGGCACCCTGGACACTACCGAGGAGGGTATCCTAGA -TCTTATGGCCACGAAGATGTCTGCGATCCGCCTGGCCAGTGAGTCTGCCCGGACGGTGCT -GAGCGTCGACCAGATCATTGTCTCGAGACAGGCGGGAGGTCCCAAGCCACCGCAGGGTGG -AGGTGATTGGGACCAGGACTAAATTCTTTAAAAAAAAAGGAGCAATGAGGGTCTGGATGC -TATTTGATAATATGAGGCCTGATAGATCTTCGTACGATGATATGCTTTTTGAGAACATTG -TAAATTGGACTACCTTTCTCTTTCAGTACTTTCTTCTTTGGTTCCTTTGAGTATGCATCT -TTTGCATTGAAGAATCTTCTAAGGGATGTTAGAATACACTTAATCATACCTTTGATGCTG -CGGGCCGTGTTTGCCTGATAGATAGGATAAAGGTGCCTGAGGCGCCATACGTCACGTGCC -ACTCCCAGATTTTCCTCAAGTGTTTCCTTTCGGTCTTGATATCAACGAGCCGCGATTCCA -ATCCCCACCACTTTCCTCTAAATTCCCTTCCTCACGAGCTTCCTGCCTCTTTCTCACCTT -CGCCATGTCCTTCTCTACCCAGGTCGCAGCCCGCTGCGCCAGACAGCTCTCCGGCTCTGT -CCGCCCGTCCTCCCTGCGCATCGCGACCTCCGCTACATACCTCACAGCCCGTCGCACACC -CAGCAGCCGGCGATGCGAATCTACTCAGGCAACCCCTGCGGCTGCTACCAACCCCAAGAT -CTCCCAGATTGTCGACCAGATCAGCACATTGACGCTGCTCGAGACTGCCGACCTGGTGTC -TAGCTTGAAGGTGGGTTTGACAATATACACATAATTGATGGGTTGCCGAACAGGACCTGC -GTTTTTGCATAAATTTCTCTATCTGCATCGTCACCCTACTCAATTGAGGTATTTAATACG -AACCATAGCTAATTAAAACGTTCACACAGACCCGCCTGAACATCCCCGACCTTCCCGTTG -GCGGCTTCGCCATGGCCGCCGGTGGTGCCCCCGGTGCGGCCGCTCCCGTCGAGGAGGAAG -AGGCTGCCCCCGCCGCGGCCGAGAAGACGCTCTTCAACCTGAAGCTCGAGTCTTTCGAGC -CTACCTCCAAGCCCAAGGTCATCAAGGAGATCAAGACCATGCTCGGCCTGTCGCTCGTGG -ACAGCAAGAAGTTCGTCGAGTCCGCACCCAAGATTCTGAAGGAGTCTGTGCCCAAGGAGG -AGGCAGAGAAGCTTATCGAGACATTCAAGGCGCTCGGTGCTAAGGCGACTATGGAGTAAA -AGCTGTTTATTGTTTAAGCACGGGAAGGGGATAGATTGGGAAGAGAGTCAGGGAAGATTT -TGTTGTTTTGGTCTTTTCCTCTCTTTTTTTTCCTTCGGTTTCTTGCTTTTGAGAGATCTT -GTTGAGCATTTATTGTGTTTGGCCTGGCAGATCAAGGCAAAATGTACATTATACGCATTG -TATTCTGGGGCCGTCTGGTACAGAGGGCTCGTTTGGTTAATTGATGCATTTAGCATATCG -ATGCAATTGTATTTCTAAAAAACTGTAAATATAATGAAATCCTACTGTATAGGCAAGCTA -TTGGTATATCTAAATGTCGGAGCAGCCTCGCGGTGAAATCCCCGATCTCCAGAACTTCAA -CCAGAGAAAACCTACCCACCTCACCAACCTCCACCATTTCCCATCCGCACTATTTCCAAT -GGCGAGACCATTGGCTTCGCCGTTACAATGGCGTCAATTACATCAAGCTCTTTCCTCTAA -AGCATATGCCTCGCGCCACGGCGCGCAAGCTTGCACCGCTCGAGCTGTCACCAATAGCAA -CACAACCATATTGAAACGGCCCTTCCACACCACGCCATCTCGATCGGCTATCAGGCCTTC -TCGTAAGGCACCATCTGTTCGGAAACAACTGAGGAACGATGATGTCTTCGAAGGCCCAGG -TGGATTAAAATTGGACCGTGACGGGTTTTGGAATGCTTATTGTGGTGCACATGTTGAACC -AACAATGGTTGAGTTTAAGCAATTCACACGCCAATTGTATGAGCGCTTTACTCACGTTAT -GCCGCCGGGTGTCAACATGGCGACGTTCGCGTCCGTGGGTGAACAATTGATTAGGCTATC -GCACTCGCAGGGCCCTTCCGCAAGTCTTGTTCGGAGTATCTCAATAGGTAAATTCAAATT -CTGGGTGAAAGGCTACGGATTTAACTATGCGATATTAACCATGCAATTTCTTATAGATGT -TGACGCCGTCTATCGAATCGCAATCATTCTTGGAGAACTTCAAACGGGCCGCTACATCTA -TCAATGGGCATTGACCGGCTGCGCAAAGGCAAATTCGCGCCGAGCCCTAGTCGACCTCGT -GAGCCGGTACATGCAGACGAAAAATGTAGACATCTACCGAGACAACGAGTACATAGCGCG -GGTCAGAGACTTGGCTCTCAAAGATGAATTCCCTCATGCAATTATGTTATATGCTGAGCT -GCTCATCTGGCGTGGTGAAAACGCACAAGCCGCCCGACTTCTGGAGCAGAAGATTTTACC -GTATCTGCAACCCACTAGGATACGCCCTCCCCACTGGGAGGATATAATGTTAGTGGATAA -GTTTGATTCGCCCTGGCGGATGTACGCTGTTGCTGTTGAGAAGGAACGAGGTCTTGAGGG -TATCCAGAGTGCCACACGTCGAGCTGCCCTGGAGTTCCACGACCCGGTTGCTATGACTGA -CTACGCTGTTACCGTGTTGGAAACGGAGGCTCTCGACAAATACGAGACGTACGAGTCCTT -CATGGCCGCCGCTGCGTTGGCGGGACATAGTCCAGCCTGCTTTTACCTTGCGAATTATTA -TTACCGTATCTCTCAAGGCGAATTCACCACCGAAGCAGAGAGGAATGCAAAGCAGAGGGA -AGAGGCCAATGCCTCGCGCAACACCTGGCTGAGACCTTTCGAGCCTATAACGAATTGGGT -TTATACGGTGTTCAACCAACCCATGGATCGCAAGACATATCGCATGCTCGCGATGGACTG -GTACGAGCTTGCTTTTGATAAGGGAAACAATGAGGCTGGGTATATCCTGGCCATGCTCTT -TCGCGAGGATGGCGATATGGAGAAGAGCCGTGAGATTTACGATCTTACTGCCAAAAGGGG -CCTTCCAACCTCGTTGTCGAAGAAAAGTCTGGTGGAAATGAAGGATAAGTGGGAGGACCG -GACATTCAATCCTGGCTTGCCTCCTAAGCTCTTGATGATTACTTAAAGTAATTCAAAGCT -GTTGTATGTATATCTGTCCTGCACATGCTTTTTGCCTGTATACCGCTATGTGTTATAACA -ATGTATCGTTTTATGGCCATGATACCCTTCTATATATTCCCCGCTCCCAAGCATTCGCGT -CTCATCTTGCCTCATATTCCATGCCGAACTGACAAATAAAAAAGGAAAGCAAAAGATCAA -AACTGTCGTTATGATTGAAGAAAATCATCAAACGCCATGCCTGTTTCATCCCCAATTCCC -AGTCTTCCTTTCCTCTCGCCAAACCCGGGAGATTCACAGACTCCGGACCAACCAAGATAT -CTGAAGGATGATAATATCAACACAACAGACGAGAGGAGAATTCTGCCTCCAAACTACAAA -CCCTCAATCATCGTCTCTTCTTCCCCTTTCCCTTCCCGTGCCCCCTCGTCTTCAATTCCA -CAAACCCAAGCTCCTTCAACCCCTCCGCAAGAATTGACGAATCCCTCATCCCAGGGAAAT -CCTTCGCAGGATACACACGGAACGGATTAGACCACGCCTCCGCAAGTATCGGCATCGACT -GCCCCGTGTCCTCAACAGAGCCCCAATTCATCAATCGGAACTGGAGCCTGTACAGCCCCG -CCGAGCGGATTGAAAGGTCTGCGAAGATAAAAAAGGTAGCTGGTTGGTGGAGTTGCGAGG -CGGCGTGATTGTAGACGTTCTGCGGAGGGTTGGAGAGGTTGGAATCGGTGGAGGATGGGT -GTGCGGGTGCGGAGTTGGGGTCTGGGTCGGCGTCAACGTAAAAGGGGGACATGAAGGCTT -TACCGGATAGTAACGGGGTGGAGAAGTTGTCGTCTGTGCGGGCGATGCCGTCTGTTTCTC -TTTCTCTATCGCGATGCCGGTCCCGGTCTTTATCGTGGTCCTGCTCGCGCTCGCGCTCGC -GCTCATGCGCATGTGTGTCTGTGGGTTCGGCCCATGGTAAAGAGGGCGAGACGGGGAATA -GTAGACAGCCCACTGTGAAGCGGGGATCTTGGAGGAGATCTCGGTCTTCTTGTGAACTGG -AGTCGAAGTCGGTCAGGAGGATCTGCACAATTGGGGGCGGATCGACGGGGCGACGGTCTC -TGTCGCCTGCGCCGCAGGCGCGTGCGGCGATGGGTTGTTGTCGGATATGTAGATGGTATC -TAGATTGCGTGGAAGGTCGTCTGGGTGGTGGAGGGGGTGATTCTGGCTCTGGTTCATGGT -GGAGGAGTCTGTCGAAGCCGTGCGGCATTCGGTAATTCGCGGCTCGGGATGGTTTCGGGT -CTTGATGAGTGCTGGCCAAGATAGGGTTGAATGCGTGGGCAGCGCTTTTATCTGACCAGA -CGGGACTGGGCATGGCCGTGGAACTGCTGCTATGTCTATGTGGGATTTGGTCAGAATATG -GGTTCTTGAATATCTCCCTGCATTGTGTAACCTAGGACAGATGTGACATGAGAGGGATTG -TGTTCTGAAATCTCAAGCCAAAGACATTGCATCAGGATGGGCCACTGACAGATCGAGGTG -GTTCGACAGAAACGAAAAGAGAGGGACTTTACTGGGACTCCACGTCGTATGTCCAGCTCC -GGTGACTGTCATAGTCCCGGGCATGGCTCGACGTTGGGTATCTCTCTTCGTATGCGTGAC -GGGCATCAGCACCTCCCGGATAAGATCGCAGCGGGAGCGCTTGCGGCACCGGTTGTGTAT -TCATGTAGGCATTGTCGGAGGAGAGCATATATCGGGGATCCGTGCTTCTGTCCGGAAGAA -CCAAGGCGGGAATCCTCGAAGCAAGGGGTGCTCCGAGCATGGGTCGCTCATATCCCGTCT -GGGGTACCTGTCGAGGCTCATGTCGGTACGGATATGGCATGGATATCGGGCCGTGATGTT -GAGTGCGGGGGACCGAATCGATACATGGCCATTCCGGGGAATTCCCACGTTTCTGGAGCG -GTGGTCTCGACATCTCGGAGACTTGCAGCGTTATGTTTTAGCCGTCTTCGGGTCAGATGA -GACATTGAATACCCAGAAGATAGTCACAAGTAGAATTTAGATATCAGTTGGGAACGGGAA -AGAGAAAAAACACGATACATATCCGGTGAAGATATCTGATAATCATGTGAAGTCAAGATC -TCATCACACGATATATGAACTAGTTCTACTTCACAGATGTCAATTTTGCTCAGGAACCGG -GAAGAAAAAAAGAAAGAAAAAACCCAAAATTCAACCCGATTTCTCAATCTGAAAATAAGG -ATATGACAGGGAAGTGAGAATGGCATCAATACTCTTTTTTTTTTTTTTCAGAGTATCCAC -TGAGATGAGCTGGGGAACGGGCCGAGCGGGTTTTGCGAGGATACAAGTGGATTCGGTGGG -CATTCGAAAGACGGTCAGGCAATGGAATATGAGTGGAAGTTGCGGTGGAACTGAAGCTTT -TCGAATGGGAAGTCCTCAGGTCGTGGACCTATCGGGAGCCAATCCAACAAATCCAACTTA -CGCCTTTTTGCGAACTGGTCCGACAATTGGGCTTGGTCGTTTGGCAAGTGGCTGAAGTGT -GGCGCATGTTATTACCATTAGCTATATATCTACAATGCCGGATATCATTCCAGTCCAACA -GGCTGTATAATCTATGGACATCTCTTTGGGGAATGGGCCCAAGCGCTAACTCGATTGCGA -TGTACTGCTTACCCGGGATCTGATATCTACGTTTGAGGTGTTTGGAGTGAGTTTCAGCAG -ATTTGCCTTGTCATCTTGGAATATGGTGGAAGACTGAACTCCGAAGGGAGTGGAATTGTG -CCAATGGTGGCCGTTGGCGCTCTCTACACCTCGGTAGAACTAGGAAAACTCGGAATCGCG -GGACCTTCGCGCTAATTCTCAACTTCAATAGACGCAGATCACGATGACCTCATTGCAATC -CTTAAAATTGAATGATGTCAATCCCAGTCGAAGTCCGTATGCACCGTCAGAAGTGTGACG -ACGTAGTTCGTTGTAGATCCTCATTCCATCGACGTGATTGCCCCCTTGAGAGCCTTGCGG -ATCTGCAACTGTATGTTGCAGTGAAATTCAAATCCCCACGTGTTGATCGTACAACCGGCG -TGGATAGGTACCTAGGGAGCGATGGTCTGACAAGAAATCTTGGCCAAGGCCCTAAAGGCT -TCAATATGGGGCCCCCCCCACCGTTACCGATGATCCCGATGGCAAATGGCGTGGGTGGCT -TGATTCCCTGAGCGAAAGCACTCGATGACATACTAGAGAACACCGAGGCACGCCGGGCTG -TGATCAGTATCCTAGTTGGATTCAAGACAAAATGACAGAAGACCGTTAAAAGTCCGCAAA -CTTGTTTGAAAATGGAGTAATACTGGAGATGGGAAACCACCCGAAAACGATAAGACCGGT -GACTACAACGTTGTTGGTACATCTTGATTGTACCGAAGCATATCACCGCACAGCCTCTAT -CAACATCCAATATTCATCCCTCCAAGTGTCAATTTTCATGATTGTCATCCTTCATGACTT -CATGTGTTCTACACTGTAGGAAGATCATCTAATACTCTAGATCTGCACTGGGATCCACAT -CTAGATAGTACGGTCCCCAGCAATTTCGAGAATGAGGCCGTATACCACTAGGATATATTG -TTCGGTCGTCATTGGTTGAAATTGTGAGATGCATCAAGGCACCAAGAGGGCCATCTGAAG -GCGCAGGTCCACAATCAATGACTCCGGTGTTCTGCCCAACATAAAGCCCGAGGAGCTGCC -CGAAATGACGGTCCCAGACCACAATATATCCTGATGGTTGCCATGGTTGGTCCGGGCCAT -ACCGCATCTCCATGCAGCCGAGTTGCATCAAATGTAAATTGTGGATCCCCATTTGAGGAA -CTCCATGAATGATCAAACGAGCTCGGATAAGACCTAGGGTAGATGGCGTCATAGGCTCAG -AGAAATCACGCAGCGCTGGAATCTGGGAGTCAGAAAATTCAATCGGTGAGTGTGCTGGCA -CCCATCGGAGCTCGAGCAATCCGAAATTAGACCGAAATTCGTCAAGCCTTGCCGGGTTGT -AATCGAGCCAGGTGACAATATTCTGGGACTGTCCGTCGAAGTCATTCAACAGATCTGTCA -TCCTGCTTAGTGTAGCCGGAGACTTAGAGCAGCCATAAGCGCAAAGCTCTGCGTACAGCT -GTCCATGATCAGATCCAATAGGATCGTTACCGATTTTGGGCAACATTTGGTCCTCGAATG -CTTCCCAGTTTGTATCTAACGGGTCTAAGGCGATATTGAGATGACGCTGACTCTCAGGGC -TCCATTGGAAACCCATCCTTTGATCTATGTCATCAAGACAGACTTGGAGTAGCTCCAGGT -AGTCCTCTCCTCTTACTAGGTCAGGTAGTGCCTCGCCCACAATAGAGTCACCAAGGAGGG -ATTGAAGATTCGGGAACTTGGCAATGAATGGCAAGTTATCACCACAATGTTGAGAGACAT -CTTCCAACCATCTCGCTGCTGTTTGGGATCTAGCATTTTGGACCAGAGCCAGCACTACCT -GATAGGCGGCAATGCAGCTGTGATAGATTTCAGGCTGAATGTTCTTCAAGAAATGAGTCC -AGGACATGTGAAGGACCTCATAATTTCCAGTCCTAACCAGTAGACACATATATTCCTGGG -CCTTGGTTCTGAAATCAAATCGTTTGCCAATTTGATCAGCAGGTTGCGTCCAGAACAAGA -TATCGTGCAAACTGTGATATTGGGGCATGAGCCCTCCTTCTCCTGCGATCTTAGAAAGAA -TGGGATCCATGTCGTAGTATGGATCCTCATAAGCTTTGCAATCAATTGCCTCAGAGCAAT -ATTGGACTATCTGCAGACTATCCGCGAGACCTAGGGCTGGAAAGCCCATGTGGTGGTAGT -CATTCAAGAAGTGCTTCAGGTATGACGTTGAAAGATCAAGAGATGCAAAATATAATGCTG -TAGAATATATACCAGGCTGGATGGGAAGTTGGAGACGCTTCAGTCTTGTACACAAGTCAC -TGAGCACCGATAGGATTTCTGCGGATGTATTTTTCCGTTGACAGTTTTGCAAGGCGTTAT -TTAGCGGCAAACAATCACGATCCTGCAAAATCTCTGCCCCGTGTTTATCATTGACGTATC -TCTCAATTAAGCTCCGCAATGATGAGACGTCAGTCATCTCGCTATTGACACCCTCCAATA -GCTCTTGGTTCCGCGAGGTAAGCCCTTTGAGGGAGGTCCACAAGGGGGGAGGTGGAGTGT -TAAAGCTCCATTGGGGGACAAAAAGTTCACGGAGATACCCAGATAAGGCCTCATTAGCAT -TTGAGTCATGGACTGGCTTAGTTGAATCCCTCAAAGCGAATCGAAGCAGTTCAGATGGTG -ATGGGTCACGGGCTGGTAGTTTCATATTTGCGGGATAGATGTTGCGCACCAATCGCTGTG -TAAACAAATTTCCACTTGTAAGCTGTCGTCTCGGTTGCAGCAAAAGCAACCGAGTGTGCG -GTCGTACCATGACTTAGGACTAAAGTAAAGCTGATCTGGAAGATGTGGAGAAGATAAACA -CGGGCCGATAAGGGGCTGAGTCTGGGGTTTGACTGGCAGAACAAGGGCGGTAGGAGAGGC -GGTATTGGACCTCATTCGCGCTTTTATCTCATCTTCAACCAGCTGCCATTTCTTATCAAC -CTGGCTGCCAACAAAACTCCCACCGATTTTTATCCATAGCTGCTTTTGGTTGCTTTATCA -ACCCAAGAAAAGTATGGTCACGACTAGAAATGCGGATAAATCCGCAAAGGACGAGGTGAG -CCAAGACCAGTCGTCCATTGAGCGACCGACCGAGAACAAGAGCGAATCGCCTGTCGCAAC -TTCCGAAAAAAAGGCGGACGCCCAACCAGACGACAAAAAGGATGATGGTGCATCCAAAGC -GCGCGAGAGAATGGATAGATTTAAAGCCCTGCAGGCTCGAGCTGTATGTGTCCCTCAGAA -CATGACTCGTGCGGATCAATAGGCTTGGTCACTCCAAGACCATCACATCACTAACACAAC -ATAAAGAAATCTGCAACGGAGCGCAACCTAAAAGAGACTGCAGCCGAATCACAGCGTCTG -ACCACGGACCCCGCCCTGCTAAATACGTTGTCCCGCAAGCATGCATTCGCCTCCCACAAC -CTCCTCAAAGCGGACACGGAAGCCGACGGAGAAGACTTTGAACGCAAGCGCGCATGGGAC -TGGACTGTTGACGAATCGGAGAAGTGGGATAGGCGCATGGAAAAGAAGCAACGCCACCGA -GATGACGTGGCTTTCCAGGACTACACTCAAGATGCCCGGAAGATCTACAAGAGACAGCTG -AGGGAAATGGCGCCGAACTTGGAATCCTACGAAAAAGAAAAGCTTGCCGCGATCGAGAGA -GCCGCGGCCAATGGCGACCTCGAGATTGTGGAGACTGAGGACGGTGAAATGATTGCGATT -GATAAGAACGGTAGCTTCTATTCCACCGCGGACTCGACTGGATTTACAGACAGCAAACCG -GATCGGGCTGCTGTTGACAGACTAGTCAACGATATCCAGAAGGCAGAGGAGGTGCGCCTA -AAGAAGCGCAAGGAGCGCCGTGGTGGCGATGATGATGCGGATGTGACATACATCAACGAG -AAGAACAGGCAGTTCAACCAGAAACTTGCTCGCTTCTACAACAAGGTCAGCATCCCACTG -ATCAGGTTTTGTGATCGTTTCGCTAACATTTTTCTAGTACACCACCGAAATTCGGGACAG -CTTCGAACGTGGCACCATGATCTAATTTGAATGTGAAATTGATTGCATTATTTTACATAG -GTGGATGTTCCGAGATATACTTGAACATGGCATCAATCAACCGGCAGAGCTTCTATGCGT -TTTCCCCTCTGCCCCCCAAAAAAACATTGTTATACAAACATACAAGTCACTTGATCAGCC -AGCAGACCTAAAGCATCGTCTTTCGCCAGGCTTTTGTTGCAAGTGCCTCAATCTGTTTTG -TTCCGACCGCACCCTTGGTGGACTCATCCTCCAATGCCTCAACCACTGCTTCGCTCACGA -CATCGACTTTGAGGGGCTTTTCGGCCATAGATCCAAGGAACCCAAGCTTATTCCCGAGGA -ATGCGTTGAACTCCGAGGCAACAAAGCCACCCATTGCAATTGGCAGTGTGAACTTCCGAC -TCGAGTCGTACATGAAGCCCGGGCGTATGAAGATGCTGCGTAGGTTCGGGAGGGTGGATG -TGATAGTTGCCTCTGCTTCTCTTTTAGTACTGATGTACCTGGCCGGCAGAAGCGGTGCCC -CAGCCGCCGCAGAGATGTAAACGAAGGCTGGAACATGCTCATTGGTGGACTCCTGGGCTA -GTGCAATTGCTAAATTACACCTGTGTCAGCTTTCGAAGCGGACTTTAGAACTCTTGTACA -CACCCGAATCTCGATTCATCAATTCATAGGTCAGCTGCCCATCTTTCTCCTTTGGCTCCA -AAGCTTCACCCTCTTGTCTTGTTAGAGGGTTCTGGCTTCCAAGCTTAGATGTACTAAAAG -CTCGTTGAAGGCCACTTAAGATTGGTTCTCTCCCTTGCACAACGCCTTTGTAGTCGGCTT -CCAACAGAATACCCATAGTGTGAACAACAGCGGAAGCACCATTCAAATGTGACTTGTAGC -TCTCTGGCTTGAGGATATCGGCCTTGGCCCATTCCACGGACCCTGCCCAGCTAGGTCGTT -CCCGTGAATCAGTGACTGCATCCCAGCGGGGCTCGCCAGATCGGCTATATTGAGGTCAGT -ACATTGCTCTTTTGTAGATACATCCCTAAATAGGCGTGAAAGCTACCTTAGTGATGTGAC -TGTCCATCCCCTTGCTGTTGCGGCTTTACAGATCCTGGATCCTGTAAGAGCTAGTCAGTA -AACCGATATTAGTATTTTATCCTAGATGGGCATCACAAACCCAAAAATCCGCTTCCTCCG -GCGACGACTAGTCTTTTAGTTTGCATGCTGTAGTTGTAGCTTTCGAATAGCTCGGGAGAA -TGATATCACTAGAGTATCGGAACAGATGTTCCGGAATATTCAACAATTGGAACTTTAGAG -CGAAAAGAGGTCAATATATAGTTCCGCAATAGGTAGAGCGGTTGAATAGTTCAAGGTAAT -TGTCTCTTGCAGGACTTTGAAGTTAGCGTCATTGATATCATTCGGTATCGTTCGCCTTCT -TGTCGGGTTACTTGAGCCCCGACGGATTATCATTCTTCCGTGCCTATCTTCCGCCCTTGT -TCTGTACGTTCCGTTCCCATTTAATTCAACAATCTACAATCAACTTACAAGATGCCATCT -GCGACAGGAAACAAGCGCGTTAGGGTAAGCTCCAACTCCTGGGGAATGCTGAAATCAAAC -TAACGCGCTCGCAGGGCGTCTCGGTGTTCCGACCATTTGGTATGTCCGCAAAGGGCGCAT -TATCGGCGCAAGTCACAAGCTAACAAACACAGTATTTGGCAGTATAGCGCATCCCTTCGA -TCCTGAGAACAAGCCCGCAGATTGTCCTCCAGACCACACTCACAGGTGGGAGATATTTGT -CAAGGGAATCAACGGCGAGGATATCTCGTACTGGCTAAAGAAAGTCCAATTCAAGCTGCA -TGAGACATACGCCCATAATGTGCGCTCGATCGAGCAGCCGCCCTTCGAAGTCTCCGAAAC -TGGCTGGGGTGAATTCGAAATCCAGATCAAGCTCTACTTCGTGCCCGAATCAAATGAGAA -GCCCCAGACCCTCTGGCATAGTTTGAAGCTCCATCCTTACGGCCCAGATGCGGAAGGGAT -GAAAGAACGTCGCGAAAATGTGGTCAGCCAGAACTACGAAGAGATCATTTTCAATGAGCC -CGTTGAGCCATTCTACGAGATCCTCACCGGAGGTTCTGCTGCTAGTCAGCCTGGCAAGAG -CAAGGGGAAGAACACCAAACAGATCGGACAGGGTAGGACGGCGGATATTCCCATGAATGA -TGCGCCCGGGAACCCATATAGTCGCATGACCGAGAGGAAAGAGCTTGATCGCATGGCCGA -GGCCACGCAGACAGTTGAGCAGATGATTAAGGAAGAGAAGGAGCGTCTGATCGAAAGGGA -GAAGTATCTGGCCGAGCTACGAGAGAGTGAGGGTGTTCCAACCAACACGAAGAAGCGGTG -ATCGCGTCGCACAGTTTTCTGAAGCCAAGGACCCGGGCCATTGCTGCCGTGTGGCTGAGA -GTTGAGCATCTCGTCTGAATGTGCCTGGCCAAAAATCCTCACTCCTCCTGTACAAAGGTT -TCTCGGTGCTGGGACCTTTTTGAGTGGCTCTTCTGCATTGAACAGTCGAAGTTCGTGTTT -TTACACCTCTCATAATATCTTGGCCACAAGATTCAAACTGGTCTGTTCCTACCTAATTCA -TCACGACTATTGCATTATCTGCCAGCTACGACCGACAGGCTCGAATATCTGTGTTATCGC -AGGTTCGTGCCTATGACTCACAGGAGGTCTTTTTCTTGTTACCCGCGAGGTGGAAAATTT -TAGACCATGTCATAGGGTGTCTTGGGCATGCACCAGGGATCCCATCCTTACCATGTTCGT -ACCAAGTTTGCAACAAGATTCACAGGGGTATTTGCTACCTACTCGAGAAGGGAAAACCCA -GCAAGGATGCAGTCCTATCTCTGCTGGTGTACTTCCAGATTTCGGGACATATACTATGTT -AGTTCGTGAGATGATTCTTTGTTGAGGAAGATCATTTTGAACCATAGTACTGGTAGAAAC -GAATATCAATTTACAATATGAAAAGACAAATCTATATCACAATTTTCGTTCAGTCGTCGC -ACGTCCATCTGGACACCTAGCACTTCGCATCTTGAACTGTATCTCTTGTTCTTTTTAGAA -AGGGTATCAAGTCCGTGCCACCCGTACCAGCAAGCTTGGTTTTCGTGCCTGTTTCTTTCA -TCTGAGAGCTAGCCGTGGCTAGATTCATTCTGCCTTTAGCCTGCTCAACTGGATTAGGCT -GCCTCGACGCCATGACAATGTATCGAGTAACCATCTGGATATGTGTATCGCGGAAGGACT -TTAGCTCATTCACAGCTGCATTATATGCATCTTTGAGCGGTTGAACCGGCGAGTCCAGTA -TAAACGAACGAACCTTGGATCGAGCCGACAGATCTTCAAGGAAGCGTCTATGCGGACCAG -GCATGTAGTGTCGCATTTGCTGCTTTGATGTTAATACTTATTCGTTGTCTGGTGGCGGAA -GTTCCCAAGATATCCTGTGATGTTCTTACCTGAATAAAACTCGGACCACTAGCCTGCTTC -CCATCCGTTGCAGAGTGTTTGATACCAAGGAAAATATCAAGTGCCTGGATCAGCGAACTT -TGTGCATTGCTCCCGCCACTCAACTGGAGCCAATGCTCTGGTCTTTCCACCCCGTCACTA -TCACCAACATCGTAGTACACACCGTTGGGCAGGCCAGCATGGGCCATATTCTTGCTACCT -GCCAGGAACGGCCGAAGCTGGTGGAAGAAAACAGACGGCTTGTTGTGCTCGTACATCCTT -TTCAGAGTCAATGTGAGGTCGCATAAACCGTCGGCGAACCTGCACAAGAGCGCAGTGAGC -AGGTCTACGTCGTTGGCAGCAACTGCCTTGAAAGCGTCCCGCATCAGCGAGATGAGCTTG -ATGCCTTTGGCTTCAACGGCAACGGAAATAACCATGAACCATTCCTCGTCTTTTGTACCG -GTGAAAGAGGTCTGAACGTGCAGGTTGTCCGGGTCTGTGATGTCTGCTTCTGGGTTCGAA -GGTATGAAGTTCCATAGTGTGAGGCCAGCGTAGGTGGCACAGGGAGGGAGTTCCAGATGT -GCTGATATCTCAAGGAAGGGCTTTGAGATTGAGGGGGGGAGTCTCTGTTGTCTGGATGTC -AGGAGATCCTTAAACGGAAAGGAAAGGTTGGTGGATGCTTGCATCTTTTGGGATCTCTCC -TCCCCAGATATAGGCATGAGCAAAATATCCCATAATAACACAAGCGCGTCGCCACTCTGG -TTCGTTTTGAAGATTCTCAGTCGTGTAAACGGGCATGTCATCGATTTTGCCACGGATCTG -GCTCGTTTGAATGAGGTTCGGGAGTTGGCTGGCTATATCCTCCCAAGTCTTGTAATAGGG -ATTGGAGAGTCTGGCTAATGGAAGAGAATCCTGGAGAAAGCCTTGCTCGGCATACTTTTC -GAAAAGATCATTGAGAGCAGCATCTGTTGATTGCAACATTGTGGATGATGATGGAGCGTT -TGGAAAAGGGGAACCGCCTGTATATAAACAAGAACAAAGGGGAAAGGCTGGCATTTTGGT -TTACAGTGCGAGGAGTAATATTATTACTTCATGGCCTCATTATTTTAATATCTCCCATCG -GTTTATATCGGGGCTATTTAGTCAAACAATCTATACAATCCGGAAGCTATCCGGATCTCT -ATCCGTCTTCCGCTTTGTAGAACCGTGTAGATCCATGCATATACATAGGCTTAGTTGTGT -TACTTTGGTTCAACCGAATTCGAAATTGTTGAGAATATCTAGTTCTCCAAGCTCATATGG -GGGAGTGCTTTTCAGGTTGAGATTCTGATTGAGACTTGACAGAACCAATATTGGTCCTGT -TGCTAATCAAAGACTTTGCTTTTCTTTAGTACATAGTAGACCTGTAGATTTATGCATTGT -GCATATGATAGCAAAAAAATAGATGACTTATGAGAAGATCGAGATATTTTATGTGTAGAA -TAATCAAAAGTGCTTCTATGATGTAAGGCTATAGACACGGATCGGGGAAACGGAAGCGGA -ACACGATCCGTGTTACCCATTGTAGCCCTCGAAGGTCCCTTTGTCTCCGATTCGGCTCTT -ATAAGAAGTAGGAGCCTAGTTCCGAGCTCTTTATTTTCTTCTCCTCGTATACAATTGTCA -CCTACTTTACACTATATTCGAGCTATCTCACACCATTTATCAACATGACGGGCACCAAAC -TGTCTAAGCCAGTCTTTCCTGCAGAGGCAGCAACAAAGGATTATGCTGCGTCTCTGGATG -CAGCAGATCCTTTGCGTGAATTCCGCGACCAGTTTGTCATTCCCTCCAAGGCGAATCTCG -CGACGAAGAAGTTGGCAAAGCCAGGTGATACATCCCCGCCGAAAAGCGAAATATAGTTTG -ATTTCTAACAAACCCATACAGGTCTCTCGGCAGAGCCAAGCACATATTTTTGCGGTAACT -CATTGGGTATTCAGCCCAAGGCCGTGTCAAAGTACATGGAGGCCCAGTTGGATACGTGGT -CGTCCATCGGTGTCTGTGGACACTTTACTGATCTTGAAGACTCACCACTGAAACAATGGC -AACTACTGTCTGAACAGGCAGCTGCTTCAATGTCCCGTATTGTCGGAGCTGCACCTGAAG -AGGTCGCAGCCATGGGGACTTTAACGACAAACCTCCACCTGTTGCTTGCAAGCTTCTACA -AGCCGACTGAGACCAAGCGGAAGATTCTTATGGATTGGAAGGCATTCCCTAGTGATCATG -TGAGTGACACCTTATAACAAGAAGGATTCTCCTAGCATCAAGACCGTGATAAACTGCTGT -CTCAATTACCATCACTAATGCGCCTGTAGTATGCAATCGAATCACATATCGCTTGGCATG -ACCTAGATGCAAAGGAGAACATGATTCTCATTGGTCCAGATGAGGGACAATATGAAATAT -CAACGGAGAAAATTCTGTCCTATATTGACAAGCATGCTGAAAGCGCAGCGCTGCTTCTTC -TCCCCGGTATCCAATACTATACCGGTCAGTTATTTGACATTTCTACAATTACCAAATACG -CCCAATCAAAGAACCTCGTCGTTGGATGGGATCTAGCACACGCATATGGAAATGTGGAGC -TGAAGATGCACGACTGGAATGTCGACTTCGCAGCTTGGTGCACGTACAAGTACGGCAACG -CCGGGCCCGGCGCGATGGGTGGTCTTTACGTGCATGAGAGACATGGTCAGGTTGATTATA -GTGCTGGACCAGATGCTCCCAAGTTCCGTCACCGCTTGACAGGATGGTATGGTGGTGACC -GGTCAGTGAGGTTTAAGATGGACAACAGTTAGTTTCTTTCTCTTCGTGTCACGCGATGCA -TGATCTATCTAACCACCAGCAGAGTTCAAGCCCATCCCCGGAGCTGGAGGTTTCCAGGTC -TCCAACCCCTCAGTAATTGATCTGACCTGTCTATGTGCGGCTTTGTCCGTCTTCGATCAA -ACCTGCATGGTCGACTTACGCCAAAAATCTTTGAAGCTGACGGCATATCTGGAGTTCCTG -CTTCTGAAGGATACCGATGAAAAGACTCGTCTGTTTGATATTATCACTCCTTCGAATCCC -TACGCTCGAGGCGCCCAACTGAGTGTTCTGCTCAAGCCTGGCCTGCTACACAAGGTATCA -GAACGCCTACAGGACGCGGGTATCATTTGTGACAAGCGGGAGCCAGGCGTTGTTCGTGTG -GCGCCTGTGCCGCTGTACAACACTTTTTCTGAGGTGTTTAACTTTGTCAAGGAGTTCAAG -GGCGCTTTGGCCGATTCGTAGTTGATAATTTATGTAATGAAAACAAATGTGGATAGAATG -TCGAGTATGATAATGCAGTGCACAATGATTAACATGCTGAGAAATTTTCGGAACTATTCA -ATATCATGATCAAAGACAAGATGTATATACAAAAGAATTGAAGGTTGCTATTGCAAGCCC -TTAGAAGTTGCCGACGCTTTCCCAATTATCACCACTGCGGAGGAACTCTTGCCCCTGAGA -AAAGAAATTGGCAGCAAGAATAGAGAACTCATCATTAGATAGGGTCAAATGCTGTGGTGA -AAGCAAAGGGTTTGCTGGGTAGTCCTGACTAGGCAAGGCCGTATTGGTACCGAGTGATTG -AGACTGACAACTTGGTGTATCTTCTTCCTGGACATTCAAGCGAAATGGCGCAGGCAAAGG -TGATGTCTGAGACACTTCGATGCCTGCAGGGGATGGACCTGCATAAAGCGACTCTCCGAA -TAGACGCCGAGCAGAGTCTATGTCACTGTTGGATGAGTATTCGAGGATTTCCCAGAATCG -CGATAGATCAATTAGGAGGCTTTGGTTTTGTGCGCCTGGATCTGATTGATACGTTGCCGA -GACTGCGTCTTCTAGTCGCTGTAGTCTATGTGCCTGTTGATATGACGAATCAAGTGTTAG -AATGTTATTCTAACTCAAAAAAGAAACCCGAGAGGGTGGAGGTTCATACCAGTCGGGCCA -TGTGCGGCCACCTTTCCCCTATGGTTTGTACAAACTTGACACACTTGGCGAATCTTTCAC -GCTTACGTTCTCTGATAGTCAAGTCTTCTGTGAAACTGAGCTGTAGCTCGATTGTAGCGA -CAACGGCGGCACTGTAGCCTAGTAATGGATCTGATACTAGGAATGACTTCTCTTCAAAGC -AGTCGATGAAGTGAATGATCCAAGTTGTATGCGAGGAGATCAAGTCCGAGGTGTGCTGAA -GAAAAATCTCCGGAATAGTACTCCGGAAATTACGAAGACTGAGCGAGAGAAGCAGTGGAT -GATTGAGTAGACATAGGCTGGTATGGTACATGAATTGATTCAGAAACCATGGACCCCAGT -ATTCACGATGGGTCTGTAATTCCTCAAGAGACCGCTGAGCGAGGTTGGCTGGCTTGAACC -GGTGGATATATGGCATGCGTGTTTCGAGATCCATCTGTAGAGCAAGAATTTTTGAGTATT -CGGAATGAGGAGACCATGGTGGGACATTGCTTGGGCGTCCGTGAAGGCGCACGTATCGTG -CTGTCCTTGCAAATACTTCGCTCAACATCACCACATATGCAATTATTCCTTCGTCTTGTA -TATCACTTCGCCGTATTTCGACAGTTGGTGGCCCCGGGGAGAGAAATTGTGCTGGTCGTA -CCGGGCTTCGGGGATACAGAGGGCCTGCGGCCTGGGGCATGTCTAAGATTGGGAAGTCGC -CGCCATGTAGCCGCTTGAGAAGACAAATACTCCAGAAACACCGTCTGCGTTCTTCGCGGA -CTATTGTGCTAGTCGATGAATGCGGCTCTTGTCCTAGATTGGCACACTGTGCAAGGTTCA -TTGCTAAGCTGCTGTGGATGCTGGAACGATGTGTATTGCCGTCTGGTAGATATTATTAGT -CCAACTCTAGATATTGGAAAACTGGAATCATACTCGTGAAGTCGACCAAGCTCAAAATGC -ACAGACATTGAAGTGTCGAAAGCTCCACTGGCCCTTCTGAAATTCTTTTCATAACGAGGC -CCCGAGCAACTTCAGCATAGCCATTTATCAGGCTAGTAGATTCGGCCTTATTTCGATGAG -CGTATGAGAAGCGAAAGGCAAGTGCCAATATGGCAAAAAGAAGTTCCGCTTCGCGTGTTT -GTAGACTGGAGATAAAACTATCCCTGAAAAAAAGAGGCAGAGGCTGAGAATCGCAGTATA -GCAGATATAGTTCTGCAGTGGGAATAATCTCTTCCCACGGTGGTAGAGTAATAGATGAGC -TTGGCACCGATAATGCCACAGACGAAACATGAGATTCTGTGGGCATTTGTGGCTCAGTCC -TCGAGCTGCAGCATGTTCCTAAGAGTAAGTAATTGCTTGAATATGTATCATGTATGTTCT -CACCCAAGTACATTCAAAACCTCCCCCACTTTGCTTTCAAGGCTTCGCAATCTATCTTCC -TATGATGGAATTAGTATTTGGATAGAACCTGGTGGGGGCTTGCATTCATAGAAAAGAATA -CTCACCAGAATGTGAGATGTTCGGTATTCTGAATCATTGAGTTTTGGACTCGACTCAGGA -AGTGGCCGACGCCTTTCATCGGCGTATAAACATTGTTGCCGTAACCTGGCACACAGAGAA -CAGACCGGCTGCTCTCCGGGACATTTGGCCTTCTTACGCCTGCGAAATTGGTTGACCGCT -ATCTTTCAAACAATACAATGGGCTTGGTACCTGCAAGGCTCACAGGCCTGACGAACCCGC -TTGGTGGGTCTCTCCAAATAGGTGAAGCTCTCGTCTTCCATCGTTAGATTCGTGATATTT -AATATCAGGACCTATGGCTCTCAATGAGGACAGTCATCCTAGGAGCATGCATCAGGTGTT -CAAGGGCTCCGATTCACATACTATGTTGAGAGAAATATTCAGTGCATGCTCTCACTCATA -GGGACATATGGATCTGAACAAGCAAAGCCGTTGTGTGTATTACCCGGAGACGGAGGTAAG -GCCGTGAGATTGGCTCCAGAGCTGAGTAATCAAGTGTATTGTCGGGAAGCAGCCTGGCCT -CGGGGGTGCTTGTCAGCCATGGACTCGGCCCGTGGAGGCGCCTGGGGCCCGGAGCTCAAA -ATCCTATATGCTCACATATTTGAAACGTCCACGTCTTCTAGGCTCACCTCTTGTGAGCTT -TTATTGCGGTTAAGATTTCCTGAGCGAATGCCTAACTGACCCTATACGGCCTGAACCACG -TGTGAACCACGTCCATCAAAAGATATATGGGTCCTATGTCTCGCACTCTAGGGATCTGTT -TCGCACAATTACCCAGACTCCAAAACCAACAAAATTCAACATACCACAGATCTTGAAATA -AGGGTGCAGATTTGAATCTATAAATGTGCGCTTTCCCAAGACGGTGTTGATTGTGATGAC -AGCTGTACTGAAACTTGAAGAGTCCCAAATTTTGATCACAGAAGTATCAAACCATCGGAC -GTCCACATTATGTACATGGCTACGAAAGAGGTGAAGATACAGCTTTTCATGAAAGGGGAT -TGTTCATCTCTTGTGTATGCCCGCCCAGCCTTGGGAATAGCCCAGGCAAGTCATGCGTCA -TCCACTTCGACCTTCACCAATCAAGATATCTCTCCACTGTCATACGAGGATGTGACCAAT -ACTGTATTTAATGCGTAGACGTATTATTACGGCCGCTTGGATGCTTAAAAGGTCCAATAA -GAGCCGAAATAGCCCCGCTTGGGCTTGTTGCGAATCATACCACCACAATATCTACTCTAT -CAAAAACCAAACATGCGGCTTGGATCCGAACGGAGACTTTTGTGGGCTCAGGGAAACGAT -ATATAGCAGCATCTCTTGGATTGGCGCGTGAATTGATCATATGTACAGCTCCAAATCTGC -GTGGCTGCCAACCCATCTAACCCCTTCCTGGAGTAGAAATACACCATACAAAAAACAACT -GATGCCGCAGCTTGAATAACCAACGAAACTGAGAATGGCATCATCTAGCTCTGCAAACCG -GAGTCTCGGGTCCTATGATTTAGATCCAAGTCTTGTGGCCCTATAAATAAGTCAGCATGT -GAAGGGGTTAAATCAGTCACCCGTGGTTTGAGAGGGCGCGGGTTGGGTAGAACTAACGAG -AACCATGCGGCACATGGCGTAGGTGGTAGCTATTATTGATTAGTTTTCTATTCTTGACCA -GATTCATATCGGAAGATAGGTCCACTCACCAGCACCGAGGCCGTAGACCGAGATGAGGTA -GGGGTACATGATGTAACCGGAGCGGGAAGTCTGCGAGACAAATATCAAGTCAGCTTGAAT -GTTCTCTCTGAAGCTCAAATGAGGGCATATGTGCCCTTTCCGGCATGTACTGACCTTCCA -CCATTGGCGCTTGCCATCGTGGTTCTGGAAGAGACGCTGGTAGTAGGGCACACGGTTCTC -GCGGAACACGAAGCTAAGCAGGGGCAGTCAGTCAGTATAAATTGCAGTCATATGGCGGGA -TAGCAGGTTCCCGCAGCTGCATCAACCGCAATTGGACCCCGAAGATCGCAAAAGGAAATT -CGATGGTCGCACTCTAACGATCGGGTTTGTGCGGGGGAGGATTTATGCAGCGATTCGAAG -GGAGCACATACCCAGCCATGCGGGAGCTAGTAGCACGGAACATTTTCGCGGTTAGAAGCG -AGGAAGGAGCGGGGATGTGAGGTACGACGTGGGCAAGATGGGGATGCGAAGTTGGTCCGG -AATGAGCTGGATCTCGTGTCACGTGCATAGCATCACCTGATCGAATCAAAAGTTGTGTCT -GGTCATCATCCCAAAAGGGAAAAGCGCCAGAAAAGTACCCAGACGCGTATCAACTTCTGA -ATATTCGGCAACGAAGCTTTTGAATCGACGCCGGAACTCGAAGAAGGATGTTTGGGGTCG -AGAACTCGCTTTACCAAAATTCATTTCTTAAACACTCATGCGCCCGGCGCTTCTGCGACT -GTTGAAGAGGCCGTCCGCCGTCTCGATCCTTAACACACTCACAGCAACACCGGTCGGAAT -TGAGCAATTGGAGTCCGGGTACAGGTGTCTGCGATGTCATTCACGACGCGCCAAAGAAGA -ATCACTCGTGGAACCCAACAATAGCCCTACCCGACATCAGCGTGAGGATACATGCCGAGG -GAAACGGCCATTCAGCTTTCCCGTCTACGACATTGACACCTCGAATGAACCAAATAACCT -TGAACCCACGCCTTTTGCGGGGGATAAAGAGCATAAAAACACCACAAAATGCCCTTTAAA -ACTTCTATCTCTTCGACCGGAAAAGCTTGAATTTGAATCGGACGTCGGACACCTTAACAA -CATTGGAACTCGACTAGTGGACAATCCGGAGCATCGAAATAACCTTGATCTGTGGGAGGA -ACTTCTCCGGTACCGCCAGCGTCACTATGGTGACAATGGCACTCTGCACATATGGGAAGG -GTTGAGGGTCCGAGTGGATGGTGTATGGCTTCCAGTTTTAGGAGAGCAAGCGGACTTCTT -TTGGCAGAGTTTTGTGGACATGGGTTTGAGGCGAGAACTTTTTCTCGGGGAGATTCTTGA -CTATGCTGTTCTTATACAGGAGCAAAAAGGCAACCGATGGCCACAGCTGTATGAGCGAGT -GGTGGGTGGACTTCTTGAGCAAGGCTTGACAAAACGTGCGGTCGAATGGCACAAAAAGCT -TCAAAGTTCTCATTTAGCCAGTGCAAATGATGTGCTGAAGATCCTTCCCTCGGCGATTCG -TTCTTCGTCTCTTCCGGCTGGCACGGAATTTGCGATGTCGGCTGATCTCGAGCGGTCGTC -CTTGTCGCCGGGATTGCAGGCATTCCAAGCTGCATGCTCCACGACTCCCGACCATAATTT -CTACGGCCCTGTGATTGCAATGCTCATACAGCATGGCCATGGAGAGGCGGCGATCTCAAT -GCATCATTTCCTTGCTCGACGCCAGGATCATCCACAGAGCCCCGACGAGATACAACCGTT -GCTGGAATACGTTGAGAAATTTGGGCTACGAAAAGAGTTCAATCGGCTCCGTGGCTATGT -GAAAAAGCGGTTTGATACCGAAGCCTCGATTGATCAACTTGGCCCAATAGATACAGCCCT -GAAATCTGAGAAAAAGATGTCGCAAGACGAAAAACCCTTCAAAGATGACATTGGTGCTAA -GCTGTTTGCCACTCGTGCTCTCAATTTCGATATGATTGTTGGTGGGTTGAAGATGCTAGG -TGTATCAGAGATCGGCCACCGGACGTTGCGCGAACTAGCCACGAGAGCTCACGGCAACCA -GGATCTTCTGGACAAGCTAAAAATACTCAAGCAGTCCGGCATTTCCACAGGAAGCACTGT -TTTCTCGCGCTTAGTTCAAAAACTTGCAGCCCAAAACCGCGACATCCTCCTTTCCGACAT -CCTTCGCAGCGATCAGCATCCTGACATTCTCGAAGACAAGCGTATGCAGGAGTCCATGCT -GGTATCTTACTACATGGCACGGGATTGGCGATTGTACAACATGACCCTGGCGGTGTTAAC -AGAACTATACCCGGGCGCACCGGACCTCTACGACATCCATTTCCGAAAGCATATCGCGGC -CTGGGAATTGGGCGCGGCATCCAAGGTTGCCGACGAGCTAGCTCTCCGTGGCCGTACTTT -GGACGAAGCCAGCGTGGATTTCATGGCCGACCAGATTCTCACCCCTCGGCGAATGAATCA -TCGCCCGCCGCCGGGTCAACGCCTGTCTGCCGTCGACGAAGTGATCTTTATCTTCAAGAT -TCTAAAGCGCGTGGTTCCTGCGGGAGGGTATGTCAGTGCCGCATTCTGGGTTGAGATGCT -CAAGCGTCTTGGTATGGCAGATGCATGGGCAGACTGGGATAAACTCCGAGATTGCTGTCA -GTGGCTTGTCCGCCAATACGCCGAAAGCCCTGGGCAGAAGCCCAACGTGCCACACGATCC -CACCAAGCAGGCCAACGGCCGCGATCGGCGGATGCTCGATCTGGTCTTTAATCCACAAAT -GCAAGCGGCCATCGTCTCCTGGGGCTTCACGTTCCGAATACTGGAGACCACCGCTTCGAA -GTTCGCTATCGCCTCGCCTAACTCCCAAACGGACGCAAAACTCATTCCATGGGTACGGGG -GCTGGTTCTACTTCGAGAGCTAGAGCAATCCGGTCTTCGTCTTGACAAGCGATTGATTTC -TCAGGCAGTCCGCCACAGGCTGGCCATGCTCTACAGCCACCATGTTCTGTCAGCGCGACG -CATGAACCGCATGCTGCGGCGGCGGAACCCATATCCTCTGCAGCAAGTGCTGAACGATGT -GTTCCAGGCCTGGGGCGATCCATATCTCTTTGATGGGATGGAACAGAATTTGGAGCAGCT -GGTAAATCCGCCAAGGGCGACTAGGACGAAGCGACGTGCCCCGGGTATACGTTTATCGCT -TAAAGGCCTGTGAGACTTGCGTCCATGTATAATTAGCAAATATGTAAATAATTGATCATA -ATACCCCATATCATACAAGAAGCGTACACGTAAGCCTTCCCTGTGAGACGATGATCGTCA -AGGATCAATAAAAGTTCATTTAAATCGTATGTATTACTCTATCGCTAATGATTATGCCTA -AATGACTGGGCTATGTCACAAGTAAATGACCCAATGGCAGTGCAGCCCCGGAGTTGTCGA -TCCACCGATCTAAGAATAGTTGAGACGGCAACGCGGCCTCGTAAACATTCTATtatttct -atttgtatttgtatttgtatttcttctttttcttttctcttatttatatttgATCTTACA -GAGTATATTGTTACATCTTCCTCATTTGTGTCCAGTTATGCGAGTGTTCAACTTTCTGGC -AGTGTTGCCCATGGTCTTTGCGAACCCTTTGATTCAGAAGCGTACGACAGCATGCAACAA -CTCTCCTGATCTATGCTCCAAGTCCTATGGAGAAATCACACATCTCGGTGCCCACGACAG -TCCTTTCGTGCGTGACGACAGTACCGGCAACTCGATAGCGGCCAACCAGTATGTTCACCT -CCAAATTCCAATGATCACATGGCAGACTAATACCCGTTCTCTAGGTACTACGACACTCCA -ACGCAGCTATCAGCAGGTGTTCGATTGGTAACAGCCCAAGTCCACAAGAGCAATTCACAA -TGGCGTTTATGTCATTCTAATTGCGACCTGCTCGATGCGGGGCTGCTGAGCGATTGGCTG -AAGAAGATCAAGACGTGGCTGGATGAGAATCCAAACGAAGGTGAGTGAGCTCCGCAAGGA -TGCATGAAGAGAAGAAAGAAAAGACTGACAGTCTCCAAAATAGTGATCACGATTCTACTA -GTCAACTCCGACGACGCCTCAGCCTCCGATCTCAACACTGAATTCACAACGGCCAATATC -ACCGACTACGCCTACAAACCAACCAACCCAGGCACTGCCCCAACTACTTGGCCAACCCTA -CAATCCATGATTGACGACGGCAAGCGACTAGTCGTCTTCGTGGCCTCCCTTGACACCAGT -GCCAGCTACCCCTACCTAATGGACGAGTGGAGCTACATCTGGGAGAATCCATACGACGTC -AGCTCCGCGTCAAACTTCTCATGCTTGCCGGACCGACCGTCGGCATACAAAGATAACAGC -GCATCGGCACTCGCAGCGAACCTCCTCCCACTGATGAACCACTTCCTTTACTCGAGCAAC -CTCGCCATCATAGACGTGGAGTACCCGAACGCCAGCTACGTCGGTACGACGAACGGTGCG -TCCGGCGGAACGGGCAACCTCGGTACTTCGGCTACAGAATGCAAGAAGGCGTGGAATGGG -CGACAGCCAAGCTATATCATGGTGGACTTCTTCAACCGCGGACCAGCAATTGATACCGTC -GACAAGTTGAATAATGTTACCAATCCCGTTGGCCGTAAATCTGTTTCGACTTCGGCGGAT -TCGACCAGCGATGCCTCCTCGACGAGTAATGTGTTCAAGGCGCTGGTTGAGCTGGCAGAC -TCAGCCAGGTCGGGCACGTCGGTGACTATGGGGAACTGGATTTGGACTGGTGGGAACTGG -GGAAATCTTCTCGGTGGGGGAATCTCTTTCTGAGCTGATTGGTTTGGCCTCGGTCAGTTT -TGTCATTCTTTTTGCTTTCTTCTGGAGCGTTTTTGGATACTCGAGTCCTGCGGTACGGGG -CTGGCCCTGGCTGGACGGTGCATAGGTATTGTCTGCATTCAAGGGCATGGCGTTTTGGAT -TAGAATTAGGATGGTGGGCTTCAGCATTTCCCCCAAGGAATATATGGTATAGATTCTACG -AATGGCATGATTTCAAAGTCAACTCGTGCTTAAGTACTACTCTAGTCTTTTCCCAATAGC -TGATGCACCTTGGTCACCTTATCCTCGAAGGACTGCACTTTATCAGTCCTGGAGATAATT -AGCATTCAGAACTGGAGTACCCAAACAAAAGACGACCTCACCTCGATCCAACACGAATAT -CGGTATGAATTCGAACAATGCCCTTCTGGTGAAGAATGGTATGCGCCTGTCCAATAACCT -GATGAACCCGGTCCCAAGATCCTTCTACAAAAGTGAAAGAAGTTAGTCAATAAAGAAATA -GTGTTACACAGCCTTGTCAGCTCCCAGCTCTCCTTTTTTCCCTCGATCTTAGTTCCAGCT -GCAGGGACAAAAGAGATGATGGTAGGGATGGGAGCAGATTCCATGTAGGGATCAGAACTA -CGGAGGCCGGGGTAATGATATGTAGTATATTACAGCTTCGAGACATGTATTCTATACGTA -CCGACGGTTGTGCCCGCGGAATGCATGGTATACTTCAAGCCGCATGTTTCAATCAGCCGC -TGGACGTCAGCGACTTGGTCTGCGACGGAGGGCGAAGACGTGCCGATCTGAGCTGTTAGC -TAGTGGAAATGAACGAGGGACTTTTAGGCTTACAGGCACAAGACAGAAATCGGCGACGCA -GTGGGCAGGGGTCGGAATGGAAGCCATGGTGAAAGTAAAATAGTGAGATAGATGAAAAAC -AGGGGTTCGGAGATATTTGGGAGATATAGGGAGTAGATGAGAGATGAGGTGAGGATGTGG -TCAGTGTGACACACCCCCATCTCTGCAGGGCTGAAACCTCGACATTTTGGTCTATAGAAG -ATCAAAAGTACACATTTACAACATACATAGTTCCATATTTACACGTTTAGATCTCTTTAC -TTTTCCAACTTTACCGAACCCTCGAGCCTCCTTATCATCTCACATCTCACATCTGCAGAG -TGTCATTTGCTGCATCTCTACAGTTCGAGGCATATTGCTGACCGCCATTGCCCCGATAAA -TGCGGCTTCAACTTCGCTCTGAACATATTTTTTCTCCCTTCAGAATATTCCAAAATCTAA -ACTCCCCTATACTCCGTACTGCATGATCGACTCCAGGGGGTACGCCACTCAGTTATTGAG -AGTCCCCACAACTCAACCCTGCTTTTTTGATCACCCGGGGGGTTTTTTGAATGTCGGGCG -TTTTTGTCCGGAATCTCCATCAAAATCCTTCTTTGGTCGCCTCAAGAGCTTTGTCTGTTT -TCTTTTTACGAATATCACTTGAATTTTCGCGCACATTTTCGTTGAAATTGATTGGATATC -CTAATCGTGATCAAGTGTGATCCCAACTTAACTCGGTTTAGCCCCTCTGGATCGCTTTTA -AGCTCTCACCACTGGATGCATATGCATAATCTGCTGAAGAGGCGTGTTTCACATATATTC -CCAACTTTCCGATCTGGGATATTCATGCAATACATTATCTGTGGCAGAGACACCTAACTC -GTCTACTATCACGTCATGATCACAGATATAATTTGGATTTCCAGCCCAACACCCCTTCCT -TCAACGGATGACCAAACCAAATATCCTTGAACTCCCCTTGGGAGTTCAGCTCGTCTATCT -CGCCGAGGGAGGAGCGAATGTGATTTATCGATTTGTCGGAAATCCAATCCTCGGCAAAAA -AGATCCGAAGAGGCCACTGTCTTCATCTGCACTCGACGCGGATCGTTGCAACCTACCATC -CCAATTCAAGGGGAAATTGCTGCGACTTCGCAAAGAAACCGCAGCCGACATCTCCTATAA -AGAGATTATCCGAAACTTCGACACTATCATCCGACCGTTATTCAACCCCGAAGAACTAGT -CGACCAGGCCCTCATCAGACTCCCAGAGGGCCTTCTTTCCAGCTGCAACGAGCAACTCCG -GACTGCTGAATTAAACGGAGCACGTCCAAAGAAGCGCCATGGTGGCTATCTCTGCCTACA -TGAGCCGTTCGGCCTCCTCATCACTGACATGACGACGGCCGGAGATCCCGGCGCTAGTCT -CGCCGAGTTGAAACCCAAGTGGCTCATCCAGTCGCCATCGGCACCTGCGACAGCGCATAG -GTGTCGGACCTGTGCGCTGAGGGAGATGAAAAGCCGCGAGTCCCAACTGCAAGGCCTAAA -AGTGCAGCGGTCGTTCTGTCCGTTTGATCTGGTGTCGGAGCAGTATGAGAATGTACTGCG -GGCGACGGGGCTGATCAAGGGCTGCAAGGATCGACCGCGACTAGCTCGGATTTTGTATCG -GAACCCGACACTTCAAAGGCTGCAGTCGTTGCAGAAGACTGAAAGGGATGTCGGACTGCA -GGGTCCGGCGGCGCAGTCTCGGGAGATGTCGCTTGCGATGACTCTGCGGGACTGTACGAT -GTTTATCAAGGTGAGTCATGCTCCAAAACCGGGATGCGTTGATCCCATCAACATTAAGGT -CCTATCCTTTGGGAATCATCGTCCAGATTCGAGTATTGACCACTCTACAGATCCCCCATG -ACGAAAAGTCACCAGTCGAAATCCGCCTTGGCGACCTCGACCTGAAGACAGGCGCCGGCG -GGAAAGCTGGATACTGGCGAGACCTTGAAACGCAGCTGATCAGCCAGGGCTGGTATCAGG -GAAGCAACATCAGCCAAGATTCGGGCGAGTGCGCTTTGGATAGCCCACGAAGACCCTCCC -AGTCCCATTTATTATGAGGCTATATCAGACTTGTTTGGAATGGCGATACTAGTTCTTAAT -GCATTGGATTTTGTTTTATGTTCTGGTGGTTTGAACAAAGACAATGGATGCCGTGGGTGT -GGTCGGCGTGGCTCCTCTGACTTGTATAATAATTCCTTTTGGTTTTTGTGATTTATTTTG -GGGGGCTTTTTGGGCTTTGTTTTTCACGGTTTGGTTTTGGGCTATGGTGGATCTCTGATA -ATATGTTTTAATGACTGGACGGTCCTTCGAATGAAAAACACAATCACCTGGGAAAGGTGT -CATTAATCCCTACCACAAGGTCATAAGATTATACCCAAGCTAGTACTACCTAGGTAATCT -ATGAATCAAGTACAAATTCCTTGAATCCCTTCAGGGCTACATTGTGGGCTGATTGAACCT -GCCGGCCAGTGGATTTGGTACCTTTGTGCTCGGATCCTCTCAAACAAGGAGGCCGAGGAT -GTCTTTGAATTCCTGAGATGCAGCCTTATTGGCAGCTTCTTTAGCCCGCTGGTCCGCCTC -TGTATTCCATTCTCTTGGAATACGCCAGAAGTGAATCTGCATTCCTTCCTCATCCCGCCG -TTTGACCTCCCCGAGTAAACATCTTCAAAGGTCCTGGTTTTTGACCGGTGCTCCCACTTT -CGTTTTCCAGTTGTTCCGTATCCATCCTTTGACCCATCTCGTTGTTCCCTCTACCACGTA -TTCAGAATCAGTAGCAATGACGAGGCAAGTACAATCTTCGCCAAGCCAGTATCGAAATCG -CATAGCCGCAATGACAGCGTGCAGTTCAGCGCGATTGCTTATTTGCGGTTGCTTAATACC -TATGGGACCCGGTTTTTCGAGCAGAAAGCTTAGATAGCCACAGACTTGAGGGTTCGACGT -AGAATTTCTGTAAACGAAGGAACAACCGGCTCTTGGGGTTTGCACCATCGTTGTCTAAGC -ACACGCCATCTGTGTATATCAGGAATTGATTGTTTTCGTTTCGACAAATGAATCGGTATA -CAGGTGGACTAGCCGTCTCAGAACTGCCGAGTGGGAACAGCGACTCGGGCGTGTCACTTT -GATTTGGGGGGTTGAACTTGATGGGGAAGGTGCGTTTCGTGCTGACTCTGCACTCTGGGC -CAGGAGCAAACACACTGCGAGGGAGCCCATCTTTTTTGGAATTCTCCGTGTCATCATCCG -TACAGTCTCTCTGTTCCGAAGGGTTTCTGCATCATCTGACAACCCATCCATGGAGCTGTA -GTCCGTGGTACAGTGATGACGAATTATAAGTTCGCGTGTGCTGCAGACGAGGCGACCATT -GGGAAGCATCACAGGATGATCTGGACGCCAGGCCATGTTAAAGATAATTTCTGGAGAAGT -TCTGACAGAATCGTAATCCTAGGGCTAGAGGTACGTGATGATGTGGATCAACTTGTGGAA -TCAATCAACTGGATGTTTCCAGTAGGGAGAGTGCGAGCCCAGTGGCCAAGGGCAAATGTT -CCTCCACAGCTTCTATGGTTAAGGGTCCACCGGTATTTATAAAATAGCGCCCTAGTCTTG -TGTACATCTAAGGCGAATGTGCTTAATATAGCGGTGCTAGGAATAAGTAGATAGAATTTA -TATAGAGTGTTAGACCCTGTTAGGACTGAGGGTCGTTCCTTCCACGATAGATCAATAAGA -TTATCAGAATGGCTTTTCTAGAAGGCGCGACTTTTTCACGTGTGAGTATAAACCCCCCCC -CCTTCGTCTTCGATTTTCGCATTCATTCCAGGTCCCAGCACAAGTGAAATCTCTCATCCT -CAACAAGCGTGGAACCAAATATAGGTAAACCCCTATATCCTCTCTCATCCAAACACAGCA -TGACGCCAGTCCTTGCTTCCGAATGGCCAATCCGTATACATTAACATCTCCAATCTCCAA -TCCCTTGTGCCATATGAGGGAACTCCTAGAGCTGATGAAGGACGCATCTAAAAGCCCCCA -AACATGCCACCCAGCGGCAAGCTAGGCTTCAATACTGGGCTAGCGCTAGCACTCGGCAAC -GGGATAGAAAAGCTAGGTGCCCGAGAGGGGAATGCGCTCGGGGTGGCTTGCTTCGTCAAA -AAGGTGGGGATAGGCGTGCTAGATGGGCCTGGAGTTTGGCTGGATACATCTAGTCCACCT -AGTGCACCGAGGCCATTGATGCCGCTGAGGGGGAATTGGGCTGCGACATTGCTGCTCAGA -GCTGTCAGGGTGGACAGAGTGAGGAGAATGTTGAAGTGCATTTTGGTTGTGGAGGGGTGT -TGGCGGGCTTGGATTGTAGGAAGATGGTGTTCACTGAGGGACTTTGTTTGATGAGGAGGA -GCATGCTGGGAATATATAAGATATATAAATATGGATTGGCGAAAGAGCAAGGAGGTGGTT -GGATTTGTCTAGATCGCACCGCTGTGTGATGAAAAGCGGAACGACCTGGGGATCAGTATC -AACTCTGACAGCAATTCCATATTATCATTTTCGGGGATTGGCCACGATAGGCTGCAGTCT -TTGTTTTTGAGCAAAAGGATTCTTTTAGTTCGATGCTAGTTGTGCAGGAACAAGCATACA -TTGAAGATCGCTGGCTCTATCCCTAGATTAAACATGTAGCTTGCTTTTGGTCCCCTGAGC -TCCTACAGATTTCAGCGCATGAAATAAGCATGTTGAATATATGCGAAGTAGATCAGGGTT -GTTTCAGGGTGTCATTGTTGGTCACGCTGTAGCACTACATCATCCCCACCATCATATGTC -AAAGGTAAGAGATGAGCGACTTATTTACCCTCTGTAGGTGTTTACATTATACACAATCAT -CAAAGTACAACATGGTTGAATCAAAAATATGCAATCTTTCAATCAGCAATAATATCTACC -ACAATAACAAGAGTACTTGTGGAGCCGAGAGAGATATCTGCCCACTTCCCGATAAGCGCC -GAATCACACTTCATACAACTTCCCCAAAATATCCCTGTACGACATACAACGTAAAATGGA -CCCTAAAGTCGGAATCGGCGTCTTCGTTTTCAACGCTGCAGGCAAGTTCGTCATTGGCAA -ACGAAAGGGGAGTCTTGGCGCCGGTACGTGCATCCACATGCAATAGACAAGCAATGATAT -GTAACTATGAGCAATGCTTATCATTGATTCAAGGCACATGGGGTTTACCAGGTGGACACC -TCGAATTCGGTGAATCCTTCGAGTCCTGCGCGGCCCGTGAGACCCTTGAAGAAACAGGTC -TGAAGATTCTGGATGTGCGGTTCCTCAATGCCACGAATTCAATCATGAAAGCAGAGAATA -AGCACTACATTACCATTTTTATGGGAGGAGCCTGTGAGGAGAATGCCGATCCTCAGGTGA -GTGGATCTTTGCTTTTTGTGTGCTCCGTGATTTTATGTGAACTTGCTTTTTTTTTTTCGT -GATACCTTTGCATATTCCCCTATCGTGGCTGTTTACTCTGCAGGGACGGAACGAGGATAC -ACAGCTTACTTGTTCCTGTAGATACTTGAGCCTGAGAAGTGTGAAGGATGGGAATGGATC -TCATGGGATGAGTTGCGGGTGTTTGGGGAGGAGGAGATGAAGGCAGGTGCTGGGTTTGAG -GGGAGAAGGCTGTTTTTGCCTCTTTTAGAATTATTTAGACAGCGACCGGATTTTCGGGTT -TAAGATTTCTAGTAGAATCAAGGAATCCACATATTTTCGCGGGGGTACAGATGAGACGAG -ACAGAGAGAAAATCTAGGGAATTACGGTGAAGCGCCAAAAGCGATGAAGTAGCGTTGCTC -TTGTATTCGACTCTGATCGAATATGTGGTACAGATGTTGATTGTGTTATCGTAGGGTCGA -AGGGACAAATTGTCAAGGGCTAAACTGACCAGTTGATCAAGGTGGAGATCTCGCCAGTTT -GCTCCTTTTTGCCAGTTCGCTCTTCTCAACCGCTCGCACTGAATTCAGAGAGCTAACATG -GGACAACACCCGGTTTTCGAAAACGAATCCTCTCTCCAACTTGCATCTGATACTCAGACA -TCTTAACGAAGAGATATAATAGCTCTTGTATTTCATGATATTCCCCATCGTTTCCGTGAG -CACAATGTCAAGTTGATATCCAAAGCAAATCAAGAAACACGCTGGCCAGAATCTTCATCC -GCAATGCGCTTAAGACTCACTCGTAGGACTATTCTCGAGAAGTACGACCGGATTGCCTCT -TGTAACCACAGAGGGAAGACCCTAAGGGAGCCAACCGCCGAGGCATGCCTAGGCACGATG -CTTTGCCCACTTTTCCGAGTAACGATATGTTTTACTACTGCCTCGGAAACTTCCTTCGGA -TCCAAAAGTGGTTGACCGAAATCAGCTTGGTATTCAGTAAAGCCTTTGAATGGGAGTCCG -AACCCAAAGAGGGTGAATGGAACTAGGTGTTGTCAGTCTCATTTGTTTTGAAGAAAGGCT -TCCCTAAACTAACCTCGTCCGTACATTCGGAGCCTTGTACCAAAACTTCAACTCTTGTCG -AAGGCCTTCCTGGAAAGCAAGAGCGCCAGCCTTTGAACAAGCATAAGTCACCATTTCGCC -TATCGTAACAAAGCTTGCAACGCTCGCTACTGTAACGACATGCCCACGGTTCATACGGAC -CATGTACGGAAGGAATTCTTTCAAGATGAGGAAGTGAGAGATGATGTTGACATCGAAAGT -CTGACGCAATTTTTCCTCTGGTGTCTCGAGGATGGTCCCATGGTAGAAAACAGCAGCATT -GTTCACAATAACAGTCGGCTCTCCGTGTGACTTTCTAATAGCAGCCCCCGTTTCGGAAAG -ATGCCTGGCAGAGGTTATATCTGTTTGGTAAAATGAGACGTTTTCTGGAAGCTCAAAGAT -CGGCCGGTTGATATCGAGGATCACAACCCGTACTTTCTTGCGAGAGAGATCTTCCATTAT -TTGTCTGCCAATGCCCCCACTTCCACCAGTCAAGAGGACTAGCTCATTCTCTGGGATCCA -TGGCTCGGTGGCTTGAAAGTTATTCAGAACTTTCTCTGAAATATTTGTGTGAAGGTAGTA -AAGGACCAAAGAGCCGATCAAAACACTTAGCGTAGATGTGATGATTGACGCATTCAAAAA -TTCAAACATGATGTCAATGAACAAAGTCCGAGGAAAAATAAGCACAAGTCGAGCCCAGGC -AATTTTCAACTTCTTATACCCTCTGATGTGCTGGCCCAGCTATACTGCGGGGCTGTGTAC -AGGATCGACTGTCATTTTTGCCGAGATGTTGAGCTTTGATATACTCCGTGCGTTGGTAAC -ACTATGTGTAACAGAAGATATGCTACATCGGAATGATCTTTCTTTGACTTTAAGGTGATT -TTGATGAAATATTGACAAAGGGAACCAAGAAGACGCTGCTTATGGCTTCTTATCATAAAG -ATGGACGGATAGAGGGAAAAAGGAGAGGGGGAGGGAACAACCTTGGTATGTATCTACTTA -ACCTGTCACAACTGCATGGGTGCCTGAGGCATCCAGCTCCTAAGACTATGCTGACTGGTA -TATTGAGCTAGCTATTTTAACTAGGTATGTAAGACCTGAGATTCCAAGAAGGTCACCCCA -GCATTCAACATATTCCTATCTACGAGAAACATGGAGCTCAGGGATCAAATGAAGCAAACG -GACCGAAGATCTTCCAACAAACCATGAATTTCGACATCCCTCCCTTCGAGACAGGTCTAG -ATTCATTGTATGGGAAAGGCCCAATTATTTTCACACCATGCGGCGAATCTCCCGGGGATG -AACTCAAAAACAAGTTGGTGTACGCCGGCCGCATACTTGTAATCTGATAGGCCGGGGCAC -TATATGGAGGAAAAAGTTCCCGGCGAGAGTAAGCAAGGTACGGCGGTTCATACTGTACGC -ATGCTGAGGTAGGCCTACCGGGAGGGTGACTTCGAGGTGCATGGCTGAGATATATCTTGT -TATTGAAGAAATTGATGCTAGAAGTGTGGCCGACAAAGGACGTCGATATGTACCCACTTC -TCCTCGGTGAGATAGGGTTCAAAAATTCAGAGCCATGTTCGCAGAACAGTGCCCAAAAAT -TCCCAGAAAATGGATCAGCATGTGAAATTATCCTTAGAAAATAGACCTATTGTGTGGAGT -CATGTCAACTGACTATGGTGTCGGTTCATGGTGTAGATAACTTGTCATTGCCATAGTGGC -AACTATCAGGATGAACATGGGGATGAGAGGTAGGGTTTGCGGGCTGGGTAGTGTTCTTGT -AAACATATTCTATTCAAGATTGAGGGACTCGGTATTCATGCAGCGTGTCAACAAACAGAA -TAGATGGTGGGTATAGGATAAATGTATAACGGACACTATAATAGCATGAAGATCCACGGG -AAGAGGATACACTTTTGTATAGTATGCGGCACGTATGATCTGGTTTGCACAGCAAATCAA -CACAACATGCTAGAATCTTGGGGTGAAATCAAGTTGATAGAGATCAGCGCCCCAGATATC -TGCCAAGCCTAAAACAAGAATTGGAAACACCTTGGCATTTGCTTGACTTGCTTGACTTAT -GCTCAAAACAACGAAAAACAAAGAACCCAATCAACTTGAACCGAAAACCCAGTCGGATAC -AATAACAGCAATGGTCGCAACCTGTAAAAAATGTCAGTCTGTACACACCCAAGATTCTAC -AGACTCGAATACTCACCAGCGAAATATCAACTCCAGGCATTATAATGACATCCATAGCAG -GCCGATATCCCTGCCTTGAGCCTCTCATCAAAGCCAGTTTCTCATTATGATGAATGCTCT -CCCGAACTTCGGCAACCTTCCTATCTCCATCAACGATATCAAAAAGCGCCAGCGCATTTC -CGTGTCGTTCGATATGCAAAGTAAGCGTTTTCTCATCCATGCGTTTCCCCTCAAATGCAG -CCGCATTCTCAAATGAAAGATCAAAATTCCCAGAGGAAGTATTCCCCAGTGTCCACCGCG -GAGCGCCAGTTGCTATCGTTGCAACATCACATCCGGGCAAGCTAACCGACCAACTATTCG -TCCATGATAATTTTCGATGCAGCTCGAAGAGCGGTAGACCCGAATGATCGCGAAACTCCC -GGCATGAGCGGTCAGTGTATTTGCGACCAGTCACCGTGAACTGTGGCGTGCCATCCTCTT -CAGTGATTTTATATGCAACGGCAGAGTGGGGACTGCCGAATGGTCTGAGCACGAGGGTGG -TCTTTGAGCGTGCTATGTATTGATTGCGGAAGGCGATGTCGCGATTTGGTGATCGGAGGG -TCTTGCGGGGCTTGGAGGTGTATGTTTGCGATACCCGGGCGTTGGAATAGTAGGTGTAGG -ACATGGCTATTGGCATGAGCTGTTTATTCGTCGCAAGATGTATGCGCTGGGCCTATGGTG -GGGTGACTCTTTATACCTCCAGGGATAGCTTGCATGAATGTATCTTGCGTTACATATCCT -TGTTGCGGCATTTATATCATCCAATACTTGATGATCTCACTAAGTGCTGAGCCACACCAT -GCCAAGGATTATACATGAAGCTATCCCAGGACTTTCAGCGGGGTAAGTGAGATAATGTGC -TTGTGGCTGCATCCTGTCTCGAATGGTGTGTGATTGATAATATGAGCTCAGCCACCTCTC -ATACCCCTCTCGGAATGCAAATGTCCCTAATAATTAGGGAAATACAACATATGTCACATC -AGGAATCCCAAGTATACAAGAGAAAACAACCTCGGATGCATTGCTTAGTAGTGGAGCCAA -TGTATGATGAGTCATGCACCCTCCACCTAGGCAGGGGATGAGCACCGTTAGGCAACCGAA -TATCATCACTCTTCATACAGGTCATTCAGTTACACAATGGGGAGATTCGCTGTCAAATCT -CTAGACCATCTGGTCCTAACAGTCCGCAATATTCCCAAAACTATCGCATTCTATACAACC -TACCTGGGCATGCGGCACGAGATCTTCACACCAGCTAACCAATCTATTCAGAGGTATGCC -CTCCCCCGCTTCCCCCCCCTCATAATCTTACTTAGTGCTGAAAAGGCTATTGACCATAAT -AGACATGCCCTCATCTTTGGATCTCAGAAGATAAATCTTCACCAATCGGGCAAGGAATTC -GAACCCAAGGCCCAGGACGTTATGCCAGGAAGTGCCGATTTATGCTTTCTGACAGACGAG -AATGTGGAAGAGGTCTTGAAAGTATTTCAAGATGCCAGAATGGAAGTATGCGCATTCAGT -CTTTTCTCGTATATCAGATGTAAGTAGAGACTGATCTTTACAGGTTCTGGAAGGCGCCGA -GGTTGTAGAAAGAACTGGGGCTGTTGGGAAAATTCGCAGCGTATACGTGAGAGATCCTGA -TGGAAATCTTATTGAGTGAGCTTATTTTCCTTGTCTCTTGAAATACCAAGTTGGCTAACC -ACTGGCTGCAGAGTGTCGAACTATGTTTGATGAACCAATCTGGGACCAATATCCTCAGAT -ATCTTCTTTCGCTGTGGAACGTATCCTTTCCTATCGTTCTTCTATGAGGAAGTAAGGGCA -AGTTTCGATTAGACCCACCTCAAGGATACACGCACGGTACTTGTCTTGCTCCCTCGCTGA -CGAAGAAATCAACAATGATATTTTCAATACTCTCACCGCACGCTTGACGAATCCCATTGT -CTCAAGTATCATCCAGTTCTTCTTGCGTCTCGGAGGCGTAATATCTACTCGCTTCGGCTT -TCTGTGGTGTCTTGAATTGTTCCATTCTTCTGGTAAGTTAATCGATTGCACCCGTCGCCA -CTCCTACCACCCATCCTATTCAACATTGCCCATGTAATGCAGCGTTGACTTGTCATCATT -GATGGGGTTGAACATCCCTGGATACTACTGGCTAGAAGAGACGATCCCAAAGTAATATCA -TGTAGTTTATCAGAGCCGTATCTACTTGATCATCTGCTAAGACTTTGGCGTATATTTATG -GGGTTTGGAGTGGGTAAGCCGGAAGTTTTGGAAGTTAAGTCATCCTTGGAGAGACAAGAA -ACAGATATGACAAATGACAGAGTGTCAGACCTTTTTGGATCGATACAGGCAATTGTATCA -TATCTTAGCCCTCATCGTTGATCCAGTTTTCATCAAACGGGCAGCTATCAGCACCAACGC -GCCCAGAAGGAAGGTCAAACCACCCCAAATCTGCGCTCCAATATAGCCGTCGGCCCTAGC -AATGAGCGCCCCGCCAAGGGGTGGACCAGTCAGACAGGCAAAGCTCACCACCGTGAAGCC -CATGCCGATGCGGACTCCGGTTTTGCTTAGGTCGGGTGTAAGACTAGAGAGCGTTGATGG -CCAGAGACCCTGCACACCATTGGAAAATATGCCATAAAGCACAGCAAAGACGTACAGGCC -TGCGGTCGAATGAACAGCAGTCCAGCAAAACATCATCAAGCTAGCACCTAGGGTCAGTGG -AATGATGGTGTTGAGGGGTCCAAACTTGGAATCGGCCAAATAGGCAGGGATGAGGCGACC -GAGGATTCCTGCTCCATTGGTAACAATGATTAAATTGGTCGACTCCGAGTAACTTAGACC -GAGAACACTGCGGCCAAATGCACCAATGTAGTAGAAGGCGAAATATAACCCCCAGAAATT -CAGGAACATGCCCAGACAGAAAAGTACGTAGGTCGGCTCCCTGAACGCCGTCCATTCAAT -GAAAGGACCAGCTCTACGTGGTGGTAGTCGAGTCTGTAACAGAAGTATGGCAGGGATACT -CGTAGCGAACATGACAAATCCCAACACCCTAACCGTCCAACCAAATCCGATCAACGGCAT -GAGAGCCTTCACCAAGCCGGGAAGCAGCATACCTCCAGTCCCACTCCCAACCAGGCAGAA -CGCCAAGGCAAACACTCTCCGTCCAGAGAAATAGGTAGCGACCAGCCCCATCACAGGGCA -GAACTGTAGACCACTGCCTAGGCCTGTACATATTCCCTGAGCGAGAAAGAGCTGCCAATA -GCGCGTCGATAGCGAAGTCATGAACACCCCGAGAAGCTGCAGGAGGATCCCTGCATAAAA -GACTGGGCGAAAGAAGCCCGCATCCAGGGCCCGGCCGGAGAACGTTCCGATAAAGAAAAT -GAGGAAAATCTGGACGGATCCAACCCACGAAATCGCGGATGGTGAAACGCCAAGCGTCGA -TTGGTAGTAGGCCTGGAACACGCCAAAGCTGGCAATGTACCCCCAGGTGTTGAACGCCAC -AAGGTGGCCCATCATAGCTTGCGTCCATGCTTTTGCTCCTCCGTCGGGCGGGGGTGTAAT -GAGAAAAGACTGTCGGTCGTGTGAAGACTCGAGGTCTTCATTCTTCTCTGTCATGGTGGT -TTGACGTGCAATGCAACATGAACCAGCCGATGTGGGAACAGGATTGATTTGGGATGATTG -AGACTCTTTGTGTCCCACTGGGGTGTCTGAGGGAAGATACACCCTTGAAATGACTCAAAG -TGTCACATCTATAAGAAGCAATTGCGACAACCCGTTTTACCCGCTATACCCGCTATACCC -ACGCTTCTTTTAAAGAGCTATAAATATACTAATAATTCAGATGTCAAGGTCTATTAATTG -ACACATTTAGGCATTTCAAGATCCCCCTAGATCTCCATTTTACATGCCAAGCAACTGCGT -TATTTTATTTATCTAATCCCACTGTCTAGCACTGCCTTTGGTACTTTCTTTCGACTCGTG -TGCCTGGCCTTGCATGTGAGCATCTTCTCCGCTGGGTGGCGAGTGGTGAGCTGGGCCCGA -CAGAATAGATTGACATTGAAGTATCCAGATCCGTCAGCCTCGAGATTTTTCAGCCGAAAT -TGACAGCATTCTTGATCATATCAATTGAGCAGGGCAATTCAATGGTACTCGAGTACTTCT -CGCAGTGTAATTAGCGTCGCTACCTAGAATCATTTCCCGGAAGTGAGTATCGATCAATTC -ATCAAAGAGGTTCTTGTGAGACTCTTTTGCAAGTCATTAAGTCGGTGATTATTGATCTCA -CCATCGGCAAATTATCGACCACGAAAGAAGGGTTTTAAGCGCATCAGTCCATCTACTTGC -GATGATTTCCTTGAAGTTGTTTAAAACTGCTCGGATGTCGTAGACACCGGCTTATGTTAT -GGGATACGTTGTCTCTCCGTCGACTCAGATTCTGCTACATAGACTTGGCTTCGTGGGTTT -ATCTTGAGCATTCACTGGGTAATTCTAGGCAAGCGTCTTCTGCGAGTTTTTGTATTGTAC -AATGTCAACATCTTTTGGATACAGTCTAGCGTGATTGTACTATGTGCTAGAGTCACAAGG -CCAGAGAATTTAAAAGCTCACCCTTCTACCTGGACCATTAGTGAATATCAAACATTCTCC -ACAAAAAGCTGGTGGGTTTCACTCCGCCTATTTCATGCGGAATGTAAATCTCAATATCAA -TGACGACATCGTGGATCAGTTCTGGACAGATAGGGCCACTGCCCTCTCGCGTAAACAAAT -GGTCCTTATAAAAGTATCTGAAGGGGTGAATCACAATCCCTGCGACTGCCAATGAATTGT -GCAAATCTCGCCGAATCCCAAGAAATGCTTCTGTTTTACCCGAGTCGTCTTTAGGGTATC -TCCTTTTATATGGGACGGCTTGAAAGATGAAATTTCGACAATAATGCGAATGATTATAAT -CGGATCTTATTTGCTCACATACACGCAGTTGAGGGATTGATTGTCTTCATATCAATATCA -AACATCTTTCCGTGATGGTTCCTGTTCCTGGGGACCGACTAGCTCAGGTACGCGCACTTC -CATTTTGTCAATTCCTCACTTCTTTTTCGGTCTGCTTTTTTCTTTTTTTTTTaaaaaaaa -aaaaaaaaaattgaaaacaagaaaaaaggacaaaagaaaaaGGCACACGATGATGATACT -TCTATCTGTGACTTACTTTTCATGGTGAAATTTGATACCTTTATGTTTCAAGACGGGTCT -CTGAGTGTGCCAATCTTACCTTGTGTTTTGCAATCTCTACAACTTATGTGTCCATGCTAA -CTCAAGTGAAGTCTCTAGTCATCTACTAAACAAGGCTTGATGCTTTTGAGTGAATTGAAA -TATTTGGGACGATATTGAAGTAGAAATCTAGGACTCCAAAGTCAGCCCTATCTTTGATAC -CTATATGTAGGTAGATAGCTTCAATTACCCCAAAATTTATAATTCTGCTATAATTTCTTC -TGCTATAATTCCTTAATAATCAAATCAAATTGTTATGTTAACTAAAACGGATTGAACTGC -TGATTAATTACTACATAGTTATTTCCGCAATTCCGCGATTACAGCGGCCAAAATTGCCCC -TAGCTACCCGTGGACGATTATCAATCGAGTGATAAGCAAGCCGTGGAAAGGCTTCAGGTC -AGCATATATAATCGACTGATCCCCACAAAAGCTATTTATACCACTCATCTAACCCCCCAA -AATGACAACGCGCCTGCAATCCCCCCTATACGCCATCACCCGGGCACTATCCAGGAGCAA -TTCAAAGCGTCTGGTATCCACCTACCAAACTCATCTATATCGGACCCAAACAAAAACGCA -AAAAGCTCGCTCATTCTCCACAACCTCATCACCAGAAATGTCCGCGCTAATAGATCTAGC -CAAGAATAGGCGCACGATCTATAAGCTCGGCAAGAAAAGTCCAGTCCCAGATTCCAAAAT -CGAGGAACTTGTGAATGCCGCCATCCTGCATGTCCCTAGCGCATTCAATACCCAATCTAC -ACGTCTAGTTGTGCTCCTACATGACCAGCATGATAGACTGTGGGATATCGCCATTGAGGC -GTTCGAAGGTCTTGTTAAAGGGGGTAAGATTTCGCAAGAGATGTGGGAGAAGCAGACGTT -GCCCAAGTTGCTTGGCTTCAAGGGAGCGTATGGAACGGTACGTTATTTGCTTTTACCTTT -TGGCTTGGGGTGTCTCCTGGGACTTGAAATATGCAGTCTTTTTCATGGCTAATCAATACT -CAGGTCCTGTTCTACGAAGACCCAGCACACATCAAGCCCATGCAGGAGAAATTCGCTACA -TACAAGGATAATTTCCTACCGTGGGCAGACCACTCCAATGCTATGCATCAGTACTTCCGT -ATGTTCACTTTCCATCTCTTTTATCCTTCTTGTGTGTTGGTTTGGTAGTTCTAACGGTCA -CCAGTCTGGACGGGCCTTGAGGCGCTCGGCTTCGGTGCAAACTTGCAGCACTATAGCCCA -CTCATTGACAGCAGTGTTGCAAAACAGTGGAATCTACCCAGTGAATGGCGGAATGTTGCT -CAATTAGTCTTTGGGAGTCCTGAAGTTGATGCTGGGGAGAAGGTACAAAAGCCTGTTGAG -GAGAGGGTCAAAATTTTTGGGAAGCTGTAGGGTTAGACATATACTATGGCATATGCCTTT -ATAATTGAAGTAGCCTGCCCAGGCGATTCTACTAGCGTTTCCAACTGTCATAAATGAAGT -TTTTGATATTGAATATGAGTAAATATTTATCTATATAAGAGTGCACAACCGCTTACACGA -TCTCATGATATCTTATGAGTCTTTTCAATGGTCTTTCAGATATTCCTCTTAGAAATCTTT -GAGATAGGTTTCGCCCTTTCTATAGGTAGACAGTAGAAAAAGACAAAAAAATCAGACCTT -GCGTAAAGACTTTTGCATAAATAACCTCTGGCCTTTAGACGAGTGATAAGGTCAAAAATA -AAGCTTTTATTCTCCCAACACTTGGAGATATCACCATTTATAGGTATAGAAACCAGAATA -TTTATTTTTGACATTTTTGACACTGAAAAAACCACCAAAGAATAAATTATTTACAGCATA -GTGTAGAGAACACTTTATAGGGCGGTACAAATTAAGACATTCAGACACTAAACCTTCCTC -CTCAAAAGGGGTTTGAAGCCTGGCCACTATACAGCTTCGTATTCTGTACGCAGTCGGCTG -AATGACTGGACTCATACGTTACGTAATTCCGAACCTCTGGGCACCTGGGCTAGGCAGAGC -TCGCTATTATCCACATTGAGCTCTCTTGCTCTCTTCTTCTGCGAAGAGGGATAATCTATT -CTTATTCACATCGCTCGCCAATGCCTCGGCCTAAAGTGGACCCAAGGTTCCGCAAGCGAA -TCTCCAAGGCGTGTGTATACTGTCGCAGGACAAAACAGAAATGCGATGGACTGGCCCCCT -GCGAACAGTGTATAAGACGCAATCGCTCGACAAGTTGCGCTTACTCGCCGCATGAGCAGT -CATTTGGTCGTCGTCGCAACCGTCGGGAGTATGTGCACCGTACTCCTGACGCTGTGCCAC -AAGCTATTAGGGAAACGGAAGTATCAGCTGCGGTATCAGGTTCCCTACCGCCGCCACATG -GGACAGAACGATGTACCCAGTCCTACGGCGAAAGTACACCTACGAATGAACCAGAGGTTA -TTGTACCTGAACTATCACATGTCCTATATGACAATAAAGGGAAAGTTGGTAGGTACATTG -GTACTTCTGTCTCTCAATAGGCTTTCTTCTGTATTTAAGCAAAGCCGATTCAGAACTGAT -AACGGAGGTGATCTTTTAGTATACCTCGGGAATTGTGCAGCCCTGTCTTTTCTTCAAAAC -ATCCAACAGTTGATCGAAACTGAAACCGGATGCGCTTCTATTGCAGCAGATGTCGCGGGT -CTCTCGGTTATTGAAGAGTTGCCACCTCTTCCACCTGATGACTTGATGGCATACAATCCC -GAGGATACGGAAGAATTGGAACGTCTAATTCGGGCGTATTTTATATCTGTACAATTCTTA -TTTGATCACCAATATTGAGCGGATGAATAATAGCTAACCATCAACTAGACATGTGGTATA -GTCGATCTTTTCGACAGGTCTCACATCGAAAGTCTATTACATCGCTGGATCAACGGCCTG -GTTCCCATAAAGAGTGGTTCTGCGGCGGTTCTGTATCTAGTCATAGCCTGTGGGGCGCAA -ATTAGCTCCTCGAATATGCTTGACAACCTTCGAGCACAATCTTTCTATCACCATGGTCGG -CAGATAGCTTTACTTGAGCTGACTAACTATCCCAGTGTCGAGACTGTGCAAGCATTTGTT -TTAATCAGTATCTTCATGTTAGGATGCTCTAGGCGAAATGGTGCCTCCTTGAACCTAGGG -ATTGCCATTGGTGCTGCAAAATCTCTTGGATACCACCAGTCCAATGCAAATTCAGCCTAC -GACGATAATGAGCGCCGACAGAGGCAAGTGGATTTCACCCTGAGGACATTGGATTACCGA -GAATTAACATTCTGAGGATGTGCAGAGCGCAGATATGGAAAACCTTGCGCTATCATGACT -TGTTCTTCTCTGCAATGATGGGTCGAAGCTCCTCCACCTCTGATACAAGTTTTAGCCTTG -ATGAAGCGCCATCTTCTACCCCCATTGAGGATGAATATGATCAGAGCCTATCTATGACAG -AGTCAGCGCGTGCATTCCTCGTCATGGAACATATAATTAACGATGTGTACACGAAACGAA -CGGTGTCATTAGGCTTGCTGCAGTCTCTTGCCCAGGAGTTACAGGAAAAGTGCTCTGTGC -TGCCGAAGTCGTTACGAAAATCTACCGGGCCTACGGTTCATGGCAGGGTTTGTCAATCAA -TCCAACAACGCAATCTTCGCAACGCTCACGTCACTTGTTCGTACTACTTCGCCATGATGC -TGCTCACTAGACCGTTCTTGATTACTATCTTGAGGGCGAAGACACCCTCAAAATCTGCGA -GAAACCCACGTGCAAGAGGAGCCAACAACGGGGGTGATATGGATATATCCTCGGAAATCG -CCCATGGTGCGACAACTTGTATTGAGTCGGCTACGTATACTATACAGCTTTTGCACGAAT -TGCTCACAGCCAACATGCTGTTTAACAACATGCCATTTTTTATGTCAGTCACATAGCNCC -TCTCTCTCCGCCCCCCAAATCTCTGTGGCTCCTCTGCCCTAAACCCTGCATCATAAGGAA -ATTAACCATCATTAACATGGCTTTAGTGCGTGGGTTTTCGTCTCTGTGCTCGTCTTGAGC -TCGGCCTATTTTGGCCAACTAGATCCTCAGACATCGATAACGCGAACCCTGCGCCAAGCC -GATGACATCCTGGCTTGCTTTGCAGTGAACAGTCCGCAAGCCAGGCGGTATCGCCTTATT -TTGAAGAAACTTTCAAAAGCCGCAGAGTGCCAGAACCAGATAAAGTCGCAGTCTCGTGTT -CTGTATATGCCACGATTATTCCACTTGGACTCTGACGGCTCTGGAGGTGGTCCACTCGCC -GCGGGTTCCATTTCCCCCAAGCCCATAGAACAAAATACCAGCAGATATGGCTGCGCTTTG -GAAGGGGAAGCAGCAGATGGTGGAAATACAGATCAGCCTAAAAGATGTAACTGTTGTGGG -GTTAATCCAGGCAGACGAGCAGCAGGTCCTGTTGGTAGGTCCGAAACCTACCACAACTCT -GCCCCATTCCCGCAGTCTGTCTTCCATTCCGAAGATATCTTAAATTCTTCTGGCTCGAAC -TTGGGCGGACTCCCAATGACAACTACGAACGACTCGCTCAGACATGGATCTCTATTCCAG -CTTGAAGATGATGCGTCTATTTGGGACTTTACATGGGCCGGCTCACTTTAGTCCAGCATA -GCATCTACCGACATTGTAGAGAATAGAAGAAACAGATTACAACAAATGTCTTCTGATTTT -GTCACCGAACTGTAGGTCCGCTTTATGGCCTTCCCCGACTTGGAACAGCTCATCATCTTG -GATTTTTGTAGAGACTGAGAATAATATACACTCCTCGGTGCTTTAGATACTGAGTGGCTG -TGGCATCCCCGGCTTCGCCTCATAGCTAATCCTTCCCGCGGGTAAAAAAAACAAGGCTAC -TAGTGCACCACCCCTCACTTCTGGATAGTGATGCGTGCCGGGGCCGGGGCTAGTCCAGCC -CTCGCCTTGCCAGCCGCTCATTCCCATCAGCTCGGCGTTGGGACTGAGCTGGACAACACA -ATTGATCTCGCCATAGGGGTGAGCGTGGTACTGGCCACGGTATTCATCGACGCTCTCCAT -GTAAACGGTCGTGATGCTGAAGTAGCGATTTTCGGGGCCCGGCCAAGAGACCTTGGAACG -CCGGTACTTGGGGCCGTCTAGCTCATCGGTCGCTACCCAGCCCTCATTGTTGGCAAGCCC -GGCCTTGATGAGCTCGCAGAATCCGTCATAATAACGGTTGCCCCGGCCATAGTCAGCATT -TAAGCGGGCTTCAAGGTCCTTTCCAGGGGTTCTGAGCAAGCGATGTCAGAGGGAAGCACG -AAAAATAAGGATAACGAGAGAGGGGAGGAGGGGGGCGCGCTGCTCACAAATTTTCGACCT -CAGCAAAAAACTCCCCCGCAAGTTCGACGAGACGTAGCTGATTGGGGTGGACTGGCTTGT -TTTGGACATTGATAGACATTTTGGAGATATAGTTCAAAGCATGTATAGGGAATGGGAAAG -GCGATGCGATGCGTAAGAAACACCAGAATAATCAAGCGCTGTGGCGAAGTGACTCGGACG -TAGATGGGGAGATTTATCATTCAACGATATGAAATGAGAGCCCAAAACAATCAAAAGCAC -TTAGCAAGCTTTACACTATCGATATGGGGCTATTATTGGTGCAAAGTTAACTATACAGGG -CAGCATCCGGTGCAGCATCCGGTGCAGAACCCGATGTACCATGTCCGAATAGGTTTACTG -GCATAGTATACTACAACGACACCAGCTAAAGCACGATGAACTAGAACGTGTTTCCTCCTA -ATTGGTCTAACCGACATGTCTCTTACTGTCTTGCTCACTCTCTCGCTCTCTTCCCCTTCA -TATCCGATATACGAACACTTCCGAAAATCACGCACAGGACACTCATCAGTTTCCGCCACG -ATAGTAATTTCGCTCATGCTTGTGTGTCTATTGTATGAAGTACAGATAGGGAGAGGGCAG -TAGTATAAAACGACCGACGGCCGACTTGACCTCCGACGATATTACTTACCCTACCCTCTT -GGAGCATTTTCTTATAAGCCTTCTTCCTCCCATCTCTCTGTCAACTACACTCTTCTTCCT -ATTTTATGGGGGACGGTCTCTCTTCTTCGTTTTGTTGTACATTACCCACCACGGCAGAAT -GTGGTTATCTTTCGCTGCGCCTCCCCTCGCCACCATCCCAATAGCGGGTCTGGCTCTTCT -TCTCGCCTACTATATCAGCACGTACTTGTGCAAGAATAAGGACTTGTCGGGGATTCCTGC -GGCCTCGCCATGGGCTGCCTTGACGAGATTCTGGCTTCTACGCGAAGCAAGATTCGGTCG -ACGCTATCTCACCATCGACAAGGCGCACCAGAAGCATGGTGATATGGTCCGTATCCAGCC -CAATCATGTCTCCATCGCCAATGTCGAGGCCATCAATGCCATATATGGTCATGGGAATGG -ATTTCTCAAGAGGCAAGAGGTCCACTATCGCGCCGACTGTCTGTCCTGGAGCTCCACGGT -TAACATCCACGCAGTGACTTCTACGATGCCTTCGTCGCCGTCTCGCGCAGTCTCTTCAGC -ACTCGCAATCGCGCTGATCACACCCGCAAGCGAAAGATTGTCTCTCACACATTCGCCGCC -AAGAGCGTCATGCAGTTCGAGCAGTACATGCACCAGAACCTGAACGAGCTCCTGCGCCAG -TGGGACCTCATCTGTGAGAGAGCCCCTGCAAACACCAAGTTTGTGCGCTTTGATTGTCTC -CCGTGGTTTGCTTATCTCGCCTTTGACACTATCGGAGACCTTGCCTTCGGCGCCCCTTTT -GGCATGTTGAAGAACGGCCAGGACACGGTCGAATATATGGACTATGCTGGCGGTCCTTCC -AAATACATGCGCGCCATCGAGGCACTAAGTAGGCGCGGTGAGGCTTTCGCTGTGCTGGGA -TGTCTTCCATTTTTGAAGACCATCAGCTATCTCCTGCCCGACCCGTTCCTGCGACATGGC -GTCCGCTCCGGAGAACAGGTTGCGGGGATCGCTATTGCACGCGTCAACGAGCGCCTTGTG -TCGACAGCAACAAATGACAATAAGCGAGTGGATATACTAGCACGCTTAATGGAGGGCAAA -GACATGAACGGCCAACCCTTAGGCCGAGAGGAACTCACCGCGGAGGCATTGACTCAGCTC -ATTGCCGGTAGTGATACTACCTCGAATACGCTTACGGGGTACGTTTTCCCATCTTTTCTC -TTGCGTCGAGTCTGAACAGGAAAATTTAATATTCATAGTGTCTTCTATTGGCTTCTAAAA -ACTCCGGGTGTTCTTCAAAAGCTCCAGGCCGAGCTCGACGCTGTTATCCCCTCGCCAAAT -GACGAGATTTCTTTCCAATCTGTCAAGGACCTCCCATAGTACGACCGTCTTTTCCCTCTA -TGTGCGAGCAATAAAACAAACTAACGTCATCGCACAGTCTTAAGGCCTGCTTGAATGAAG -GCATGCGTATCCACTCCACATCAGCCCTGGGTCTCCCGCGAGTTGTCCCTTACAATGGTC -CCGCTGTAGATATTTGCGGTCACAAATTCGAGCCTGGTACCGTTTTGTCGGTACCTAACT -ACACGATCCATCGCCTCGAGAGCATCTGGGGCAATGACAGCAACCAATACCGACCCGAAC -GCTGGGAGAGCTTAACAGCGGACCAGAGGAAGGCATTTGTACCGTTTGGTCACGGACCGC -GATCCTGTGTGGGACGCAATGTTGCGGAGATGGAGATGACGCTCGCGCTGGCGAGCTTAG -TTCGGCGCTACGACTTCGAGCTCTACCAGGATAATTTTGACACATGGGAGGGATTCTTAC -GGAAGGGGTTTCACTGCGAGGTTGGAATTTGTAGGAGAACGACATGAAATTTGGAGGGAG -ATGCAGTAAATCAACGAGGGTAGGGTATGGTTAAATATCGTGCGTAGAGTAGCCGGGTGT -GGTTTGATATTTATGAAAATTGAACATTATAGGAAGAAAAGAGCCCAAACTACGAGCAGA -AGGATTCGTTAAGCTAGAGTCAAAAACGCATATATTTCACATATTTATAGCCCTAGGGGC -CTCCAGCTCTCCGACTGTCCCTTCCATGTGTAGGCCCCATACGTTCTAGGTTATTGACCT -AGACCACCTATCTCCATATAACGACCGGCTAATGTACTCATCATTGTAGTATGGTTGCAT -AAAAGCACACAAATTATCCCGTTACAATCGTACCGGATGTACATACTTAATGATAGCTGT -ATGTTTACATGGTATTAGTATAAGTAGTACGCCTCAGATGAATGAATGTATGGATTGAAT -TTGTTAGACCTGTCACCTTTACATGTGTAGACTAGTAGACTTGTACTTTATAGACTTGCT -TATTACTCGATTCATAACCCAGGGGGTACGTTGGCAATTGGGCAGCGCATCGGGCCGGGT -CGCTTGGCACGTCCACATACCCCCTAAGAGGTTTATGCAACAAAACGACCCCTCTCCCTC -TGCTACCATTATTCCTGCCAAGCAGCCCGAAGCAAAGCCCCAAAGGATATGAATCCAATG -CAGCAAAGTGGCGAGGTAAACCTTGTACCGGAGGGCCCCCTTAGTGGCGGGTCACCTGCT -GGGACTTCCCAAGCCGTGAATGAGTCCAAACTAATCAAGAGAGAGGGCCAATCAAATCCC -ATTCTGGATTCCGTATCCAACACTCCGCTGAACAGACCAGGAGTCCCCGACATGGCAATT -GGCGCTCTAGAACCCATCGACCTGGATGGTATGATTTCTCGGCTCCTAGACACGTCATCG -ATCAGAGCTACCAAGACGGTGTGTCTAGAGAATTTCGAGATCACCGCCATTTGCGAATCC -ACTCGTGAGCTCTTTCTTTCTGAGCCGGTGCTTTTAGATTTGCCTGCTCCCGTGAAGATC -GTCGGCGATATTCATGGTCAATACATCGATCTGATCCGAGTCTTTGAAATGTGCGGATTT -CCACCGGAATCAAAGTATCTTTTCCTCGGCAACTATATAGGTCACGGGAAGCAGATGCTA -GAAACTATCCTCTTGCTACTATGCTATAAACTCAAGTACCCGAATAACGTCTTCCTGCTT -CGCGGCAACCACGAGTGCGCCGGTGTCAGTAGAATAGGAAGCCTCTACGATGAGTGCAAG -TGGCGTTGCAATGTTAAGATGTGGAAAGTTTTCGTCGACACTTTCAACTGCCTTCCTATA -GCAGCGATCATATCAGACAAAATATTCTGCGTTCACGGTGGACTGTCTCCTACTTTGTTA -CATATGGACGATATTCGCAGTATCGCTCGACCTACTGATATACCTGATCATGGGCTACTG -ACCGATCTTCTATGGAGTGATCCGATTGGAGTAAACACGGAAGAAGACTGGGTATCTAAT -GACCGTGGTGTGAGCTTCTGCTTTGGTAAAAACGTCATTAAAGACTTTTTGGACCGACAC -GGTTTTGATCTGGTGTGCCGGTCCCATATGGTGGTCAGTAATGGATATGAATTCTCTGGG -GACCGGACCCTAGTGACAATTTTCTCAGCGCCGAATGTGAATTGACTTCCTAACTTGTTA -AGCGATTGCTAATCTTCCCTAGTATTGCGACGAGTTCGACAATTCGGGTGCTATCATGTC -CATCTCGAAGGAGCTCCTCTATAGTTTCGACCTGTTGGAGCCCCTGGATTGGGCTGCGTT -AAAGAAATACATGAAGGAAAGGCGATTCAAGCGATACAATATGCCAAACAGATCTGTAAG -TCTTGTGTGTTCTTGTCTCGTGTTAGGTGATCGTGTGCGACAATGCTGGGAGTCCCAGCA -TCTACCCTTCAGCTACTGACGAATGATATTAGACCGAGGTATTTCCAGCCCAATCATGTT -AAATAAGATATATTGGGATATGTGAATTCCTTTTAGTCAGTTAGCCATGTCATCCAGGAA -TTTCCCTGGTGCGATTGCTAGCACCCTGTATTGATCAGCCGGTTTTGATATGATGGGTAG -ACCAAGCCGATAACATAGAACTACATTGGAGAGATGCCTCTGTCTGCACAGCGGCTTTGT -GAATGATAACGTAGAATTTCAGCAAAACTTAGGGCCTCTACACACAGACAATCCTCGACC -CGAGCACGAAGACCAAAAAAAAAAAAAACAATCACCCAGATCAGCCAGAAAACCAAACAT -AGCTCCCACCAGTACCAGCTATCTACCTCCTATCGCGATACCAATACTCAGGCGAATCAC -CAAGACACTCGCCAACAGTAACAGCTATAAGCGGAATCCGTTAGCCATAAAACCATAGAC -ACAGACAAACACACAAAAAAGAAAAACCTACCATTTAACCCCCTCTTCTCAATATCAGCA -AGCATATTAACCGTCAAGATCTCGACCGTATACTGATGCGAATCATGCGCCAACACAATC -GTGCCACCAGCGTCAACCTCCCTCGAGAACTTCTCAAAGCTAACCCAATTTGTATCAGGA -TTATCATTCTCGTAGTCTTTCGTATCGACACTAGCCCCAATAACATGGTATCCAAGATCG -GCCATGGCACCGAGCACGACGGGGGTATGTCTGAGGAACGGAAGGCGCATGTATGTCGGC -CAGAAGCCTAGGATACGCACGAAGGCTTCTTCGAGGATTTTCATTTGATGCACGATTTCT -TCATATGGAAGAGTATCAAGAGAGGGGTGGTTCCATCTGGATGGCTATTTGGTTAGTGTA -TGTTGGTGCCTGGACTGGCTGGGCTGGGCTGGGGAAGAGAATGCGGATGGGTGGTTGGTA -TCAACGAACGTATGAGATCCCAGTTGGTGTCCTTCCGCCAATATGCGCTGCACTATCTCG -GGCATGGCATCGATACTTCCTCTGTTCTGACCATTGAGAAAGAAGGTTGCTCTTACACCA -TGTTGAGCGAGAGTATCTAGCAGGTGGGATGTATAGATGTACGGGCCATCGTCGAAAGTG -AGGGCGAGAGTGCCAGGAGTTGTGCATTGGGTGATTATGGCTCCGAATGGTAAGTCGGAG -GCATTGGACTGGGGGATTTGGGCGGTAGTATTGATCTTGGTACTGTTCCTCGTGGGAGAG -GCGTAGGATAGGGTTGTGGTTAAAAGAAAGAATGGAATTGAGAGAGTGATGAAAGTGATG -AAATGCATTACGACGAATGAATAAAATAGGATAGAGTCCACGGGTATGTAATGATGATTG -GAATGATTGTTGGTGCTGGATTGGAGTGAGGTCTGGTTGAGATGTTCCAAGGTTGCCCAA -GCATGAGGCGACAGGGCTACTTGGGCATCTACTTCGCTGCATCCTGCAACTTGGAGCAGA -CACAGGGTTCAATAATCTGAACATAAAATCTGCATTGAATATAGCTGAATGTTTGAAGAT -TGCATCCCCAAACGCAGGGTAGATATTTTATCTTCCACATATAAATATGAATACATCAAA -TTAAAAAGGTATCAAAAGGGTAGGATAAGACATTCAACGCGCATCTTTCCCACAGAAAAG -AGCGAGAAGACAGAATAAGAATCCTCCAGAACAAAGACACCCGTCTAACCCTTCCGGGTG -CTCTTCTTCGCCAGGCGTCTCTGTCCCTTCTGAGCCTCACGATCAAGGAACTTGCGCTGC -TCGTCGGCACTCAATCCACGCAGAATGCGCTCACGTTCTTCCTTCTTGATCTTCTCGGCG -GCAATGCGGCGCTCCTCGGCCTTCTCTTCCTCCTCGACACGGCGCAGCTTCTTGATCTCT -TCTTCGCGAACGCTGCGGACCTTACGCATGACTTCGCCACGGAAGTGGGCGGAGGCGACC -AGCTTGTCGGATAAGAGTAGGAACTGGTTGAACAAAGGAAGAGTGGAGGCGTAGCCAGAC -GCGGAGGAGGCGAGGTTAGCAGCGAGCTGAATGCGCTTCTTCGGGATGGTCTCGTCGATC -CTGGCGACACACACATTCGTTAACAATAGATCTGGTCAAATTATCGCACTGAGGCTACTT -ACTTGGTGGGTCTGTCAACGGGCTGGTCTGTGACGATCAAATATTCAAAGTCATTACCAG -CCTGCTCAATGGCCTGGATCAACTCCGGGGTGAGAAGGGTATCGCTAATCTCGGCACTTT -CGGTCATCACGGTGACCCATGAGGGAAGCTTGGGGTTATCCTTGGAGAAAGTGAGGGAGG -CATCGTAGCGGTCGTTGCGGAACTTGCGCATGTGGCTCTTGTGAACAACAGCCCAAATGA -AGCCGTCGTAAGTGGAGCTAGGCACTTTAATGGGAGCAGAGTCCTTGGCGAAAACGGGCA -CGAGATCCTTCTCCTTGCCATCAAAAGCATACAGTAGGGCCTCGTATTTCTCAACGGGCG -GCTCCCAACTTTCAAAGAAGAAGCTGAAGACATACTCCATGATGAACGTGATGGGGTTGT -ATCGCTTCGGCATTTTGATGTTGACGTCGAGGAAGGCAACATTCTGGCGGCCAGTGGCAT -ACGCGGCGAACTCGGAGGCAGATTTCTCCTTCAGCAGCGACTCGGGGTTGGCGAGCTCCA -CAGTGATTGCATCACCGTCGGCGGCAGTCGATCGGGAGATTCCATCAAATCCAACGACGG -CAAACTCCTTTTGGAGAGAAGGTGCATGGGCCTGCGCCCAGTCCCTCGCCCTTCGTCGGT -TCTTACGCGCTCCCCAGAGGTGGAAGAGGACAATCAAGAGGATGACAGGCATGATCATTG -CCTCCTGCTTAAAGTCGCTTGGGGATGTCCTCTCCCATACCCGGTACCAAGCTGTGTAAG -GAACAGCCTTGGCGGCCAGTGTGTTGGCAGCTGGGACGGAGGTCGTGTCAGCGAGGAGGG -ACGCAGGCGACGGGTTAGGAGCTTCCACGAAATCGGCGAAGTCTACCAGAGTCAGCTCAG -CAATTGAGATACATCAATTAACATACCATCATCGGATTTAGCCTCCTGGGAGCCAAACAC -GTTTTTGAACATGCCCGCCATATTGGAATTGGGATGATGGATGGAAGGGAAAGGAGTGGG -AAGGAATCGAGAGATGGCCGGTGCCCGGTGCGGAGCTAAGTTCCTCCCGGGTCTAGCGCC -TGTCTCCAGCTTTTCTACCCGCATCGGAGATTTAGACGACACGATTGGAACAATTTGGGA -AACTCATATAATTCTCAACCCAATTTCCAGCCCATCTTGTCTAGGTAACCCCATAACCTT -CCCTAAGATAATCAATACAAAAAAAAAAAGGCTAAAGAGAGTGTGGTATCATGTAGAAAT -GATACCCTCGAACAAAATCCAACGCCAACTGTACCATGACAGAACCCAATTTTGTACAAT -CCCGCTCCAAAGAAATCTCAATCTAGCTCAAAAAAGGAACAAAAAAAGACCCAAGAAATG -GCGAGAACAAGAAAGAACAGCTAAAGAAAATCGTAACGGTCATGCTTGAGGTTTGCTCAA -GTGTAAAAAAGATGAGGGCAAGCTAAAAGCTCAGTGCCCATAAGCAGGGGGAGGACTGTC -TTCATGCTGACGGCTCCAGAAGCCACTGCATGCATTTGTCATAAGCTTGTCCTCACGCAT -CCGACGAGCAATTTCACCGGCCAGTCTTTCAACCAGAACGGAATCAACGCTTTCATTGTT -GGGTCTTTCGTCTTTGTTGAACGGGAGAACAGTCGCAGGTAGCTCGGGGCGCTGATTGCT -TGACGGTCCCGGCTCGGCCTTCTCCTGAGGATAACCACGCACAGTAGCGCGAGGGGTGCC -GTCGACATCCTCTGCCTTGGGTCCGCGGATAGTCTCATGGTGGTATGACAAGTCCCGGTC -TTGACGTGTTGGGCGGGAGAGGCTGGACAAATGCCTTTCGGGAATAATCGGCAAGTGGGA -GTCGACTTCGGCTGACAAAGGTGGTGTGACTGGGTCATCTGACGGCTCGTTCACAATGGG -GAGAGGCGCGGAGACGATGGAGGGAGCTCGGGACGACTCGGTCACGACACTTTCATCGCT -TACGCTCATGGCGTCATCGGTGGTCTTGGGGCGGCGCAAATCCACAACACGCTTCTTGCG -TTTCCTAGAGTGAGCTTTGGAATGACGGCCAGCGCCCAGGCTCAGACCGTAGTTACTCGT -ACTGAATGAATGGCTAGACATAGATGGTCGACTTTGCATTGAGACAGTGCTGGCGGTTTC -GTGAAGGGAAGACATGCTTCCCATTTGAGAGTGTGTACGCGAAAGGGTCGGGCTCAGTGG -AGAGATCACACTCGAAGGGAATTCGCTGGTATCGATTGGAGAAGTCCATGCTCGATACAC -GACTTCCTGGATCGAGGGAGTGAATAAAGATAGCGTACGTTGTGAATCTGTCAGAGCGGC -GAGGCGAAGTAAATTCTTTTGCGAGAAGAGGGAGTGGGTCTCGACCGATGAATCGAGGGA -GAGCGATGCGATTTCCGACTCGTTTAATGCATTTCCGAGGGAATCAACGGGATCCTGAGG -CGGGCTAGCTAGAGGATCTTGGCCAGGGCCTTCGCTTCTTGACTTGGCTGTGTTATCGGT -CTCGTCGTTCATTTCCTCCCCTGTGCGATATTCTGGCACCCACAACCGGAGCGATAGTCG -ATGGATAATGGCCGGGAGCTCGTCCATGAACAAAATCCGAAGCTGGGCTTCGATTTCTTT -TTGTAGGAAGTCGCGCACGAAGGGGATCGAATCGAAGGTCGAGGAAACCTTGAGCGACTC -AAGCGGATCATTGCGAAACACGACTGTAATTCCTTTTTGCTTTGAGAAAACCAGTATGAC -GAATCCAGAGAGTTTGAAGTCGGAGAGGGTGATTTGTAAAGGTATAGTGAGCGGGGTGCT -AGAATCGCGACAGTGTCAGCAAAGCATATCTAATAAGATGGATTTGGAGAAATGATAAGT -GGGAATGAAGACTTACGCTGCAGCAAGAGGTACTGGCGAGCCAAAGGTTGGTCGAGTGAG -GAGGAAGGTATTGAGTGGATTCGCCTGAACTCGGGTCTTCAAGGTGAGAAAAGCGTCACC -AGTATAGGACATCTTGAATATGCCACGAAAGCGATCTTCAGCCAAGTCACCGATTTCCAA -AATCTCGAGTTCTGGAGGGATGGAGCCCAAGTTCAACTCAGTGACGTGGATATCGTCAAC -GATTATTGGAGGTTTTGGCGATTTATTTAGAGCGGCAGTGAGCAGATCTTGGGCGCGAGT -GTAAAAACTCGCGTCCGCCATCAAAGGCGACCAGTTGAAGTTGAACGCCATACCCAAGCA -TCGAGAAAAGTATTAAAAGAATATTAAGAGGACATTAGATGACATTAGATAGTCAAGgag -agagagagagggagagGCGGGAGGCGGAAAATGAAAAAACCAAGAAAAATGGCAAAACCC -GAGGCGCGTGCTCTTCCGGGGCCACTTGACTTCACTtttcttctccctttgattcctttt -ttctttctttttttctggcttcagttttctgcctttctttTCTAATGAATCATATCATTC -AAACGGCCTTTCCTTTTCTTTGCTTTGTGTATAATCCTTTCAACTATAGAGATAACCGGT -ACGTCACAATGTGACTTGCAGGAACCACTTCCGGTCAGCCCCGGCTTGATGTCCACTGTG -GATGGCCTTGAGCCATTCACCATTCGAGGATTTTCGGCTTGCATGCACCTAGACTCTTGG -ACTATCAACTATTTTATGGCTATTTTTTCCCGAAATCCGGAGAAAGGGAGGAAAACACGG -GATGTGTTTCTAACATGGAGGTGTGCTACTTTGTACTCCATACTCCATACGGGGGTGCAG -ATCTTCAAGGTCGAAGTCCCACGTTTCATGTAAGATGTAAGGGAAAGACTCCGAGAGAAT -GTCCAATATGTCTCAATATCAAAACAATGAAAACGAAAATCAGAATGTATCCCTAGTATA -ACATGCCAAATGTACCGTTGATTGAGTCCAGGCTACACCCATTCTCGGAATTGTGACTTA -ATCGTGGACATCTATATCTTTCTATCAAAGAATAATGCCGATTCTTATGTATTTCCTGGA -AAGGTCATTTTTAACAAATCAAGCCTTCGAAAATATCCTTACAGACATCACAAGGCCTTG -GAAAAGTTCAAAGTAATATAGACAACAGTATGGAATATATTGTACTCCCTATACGGagtc -caacatacaaacaaaagtccaaaggccaaagGCAGAATGTTTTCACCGCAGCAACCCTGC -GCTAGACCCGATTAGGCTAAATATCAACATACTCCTTCACTTCGGGTCCAGATCATCTTT -CTCGATCACACTCTGTCGAACAGAACCGTCCTAATACCTTAATACAGGCGTTTCAAATCC -CAGACTAATATACTGGGTGTATAATTTTCCTCGACAACGTCAACCCGGTCACCCTCGCCC -AGTCTATAGTCACGCGACACCAATGGAGCCCAAACAATCCAAACCCAAGTCGGTGGCTCA -GCGCAGAGTGTACGGGAAACGAAGAGCCAATGCCCCCAGGGCGGTATTTGACCAAGGAAG -CCCGATGAGAGAAACCAGATCCAGAACCCAACATGTCGATCCCGTGGAAAACCTCCAAGC -GAAACTAGCCCAAGTGACAATCGATAATGACACGACCCACCAACCAGACAATCATGCCTG -GCCTCCACATATTGAGAGAAAACAGCAGCAAGAGCTTGCAGAAACTCCTCCCGATGAATC -AAGTCCGTCCACGACAGAGAACACTTCTGACACCCCGACGACGCAATCCTCTATCGAATC -CTCCTTTGAGGAAGCGCCAAAACCGAAGCAATGCGAAACGATGGTGGAGGTCAGAATATG -TCCGGAGACTCCAGAAAAGCTCCCTCAAATTACATCAGATTCCCCAGCTCCGAAGTCAGA -GTTCACAAGCGAGGATCGAGAAAAGCCAAGTACAACATCTTCCTACAGAAGGAGAGCGGA -AAGAAAAAAGACTCCTGCGCCCAGAAGGTCATCTGGAGTTGTTCACGACAGCAAAGCCAG -CGACTATGTTGGCCCAATTCTGAGTGAGGCACTATCTCCAATTGCCGCCCAAGGCATTCA -GAGGTTCGATTCATGGGCTTCGCGATCAGCAAATATGTTTGATGTCGCAAAACTCGCGGA -GGGATCTTACGGTGAAGTCTACAAGCTTCATCTGCGGGAGGAAGCTTGTAGACCAGTGGT -TTCAAAATCAAAGTTGGCCAAACTGAAATCATATGGCGATGGGGTCTTCAAGGTCGTACC -TCTTCGAGCCAAAAGCGGTCCCGGATCCAAAAAATTCACCACCATTGACGAGATTGTTTC -TGAGGTCAAGATGCTCAAGTATTTGGACCCTATCCCTGGCTTCGCGCGCTTCAGAGAGAT -TCATGTTGTTCAAGGCCGTTTCCCAGAATCGTTCCAAAATGCATGGGATCACTATAAGAT -GACTAAAGATGACTGCATAAATCCTAACCCGTCCAACAAAAGAGCTTATCCCGATACGCA -GCTATGGGCAATTGTGGAGATGGACGATGCAGGGTGTGAACTGGAGAAGTTCGCTTGGTC -ATCGATTTTCCAGATCTATGACATCTTCTGGGGAGTTGCCATGGCTCTGGCCCGGGCAGA -GGAGTATGCTATGTTCGAACATCGAGATCTCCATTTGGGAAATGTTTGTATACGTTCTAC -GCGGGAAGATGGCTGCATGGATCCTCCCACAGAGCTTGATGTTGCACGTCAGCCATCCTC -TAGTGGATTTGGAATCAGCTCACTTGAGACTACCATCATTGACTACTCGCTCTCTCGAGC -GGATCTGCTCCTAACCAATGATCCCACCGGCCTCACTGAGGTTGCGTCATCAGACCTGGA -CAAGAAGCAATTGTTCGATGCTGTTGGCCAAGATGAAGACGAGATTATGCAAAGAAACAC -CTACCGATAGTATGTTACCCCATGTCTGCTCATCTGATTTCTGGAACAACTTGCTAACAA -TAAAAATAAGTATGCGCGCAACACTTTATACTGGTTGCCCCATAGAAACAGAAAAAACGA -CGGATATTCCTGGCATTTGGGCAGAATATTCCCCACGTACCAACTTGGTCTGGCTACTGT -TCCTGCTTCAGAGTCTGTTCAAGAACCGCAAGCCTGAGGCATCGCCTACACAGCCACAGC -GAAAAGCTCTTGCATCTTGCTCGCCTAACAAGATGACACTGAAGCCTGAGACGGCCAAAG -GGAAAGACCAAAACTTGGCCGGGTCCCTCGTCAAGGAGAGACAGACTAAGAATGCCCACG -CTGGTATTTCCCGGCTCAAGCAGACACTTGAAGACAGGCTGAAAGCCGTTCTCGAGCTTC -TCGACCTGGAGCATGGACATGAAGACATGTGTTGCGCTGCGGATTTGGTAGCCTACGCGA -TGGACTCGCAATGGCTGGACGAACAAGACTTTTTCTAATGATACCCTGGTATACAAAAAG -CATTAGCACGTGGAGTTGCATGGATTTTGTAATTTGCATGATGGGGAGTTATCATTTGTT -TACTCTGTACATATTGTGGGCTTGGGGCTAGTGTTGGGATGGGTATGAGGCGCATTACAT -TGGATGCATCTCCAGTACAAAGGACTTGCATTTATATCATCTTTCTGTTCTACCTTTTTC -TATACGGAATACATGACTCTACTTTCGCCTCAAACTACGGGCATAATCAAATGGAAGCAT -TGTCGAGGAACGTGTCACATCCACCATAAACCGTCTATCGGCTTACAGCCTGTCCTATGA -CATGAATTTGTATCTGACACTTTTTTTTTCTATTCTGGTCAAAAGGTCTAGCCCCCTAAT -TTTTCCCGACCTTTTGCATGTTATGTTCAAATATCCCAGGGAGGAAGAAACAAACAGCCC -AGCCGCGACCCCCGAGACAAAACAGTAAACACCATCGCAAAGTCATTCAAAAACAAACGC -TTTTCGGTCCCATGCTTTTTCACGATTGAGACGGACCACCGCCTCAACCGGATATGTTGT -AAAAATTGAAAATGGCGTAACATCGTGGTTTGTGTGTCGTAAATGTAGAAAAATTACAAA -TCAGATCCAGCACCGGCACGGAACAGCTTGACCATCAGCTCAGGTCGGCTCCAGAGTGAG -AAAGACTTCTGGTTAGTCCAGTTGACAGATTCGGGGAGCAGCTCCAGGTCGAAGTTCCAC -AGAAGCTTAGCAAGCAGAAGACGCATCTCAGCGTTGGCAAGGCTGTAGATGAAAGGTCAG -GAAACACTGCCAGGTATGGCTTAGGCGAAAGCATGGATGGAGTGAGAAGGTAGTCATAGA -ATGTGTGCTTACTGTTGTCCGAGGCAGTTGCGTGGACCATAGGAGTAAGGCTGGAAGGCC -TCCTTCTTGTCATTGCTGTACTCGTTGGACTCGCTCAACCAGCGCTCTGGGACGAAGGCT -TCGGGGCTCTTGAAGTGTGAGGCTGCGTGGAAGGCAGAGTACAAAGAGACGGATACGGAG -ACCTGTTTTCATACAACTGCTGTCAGTATCTAAAAAGCCATGCAATGGAAAGAGCAAAGA -TAGAGTTGCCTCTGATAGAACGGGTTCCACGCCCTTCATATATTGCCCACAAACCATCAC -GGGGGATATACCAGCACTAGAGAATTATACTAACCTTTTCAGGGACGTAGCGGTCGTTGA -TCATGGCGCCTCCCTCGGGAACAATTCGCGGAAGCATGGAAGGAATAGGTGGGTAGATGC -GCATCGCCTCGGTCAAAACAGCGTTGAGATATGGGAGATCCACAAGGGTGGACAGCTTAA -TGTCAGCGTCCTTGTCGAAAGCGCCGCGTATTTCCCGGATCAGACGCTCATACTTGTCAC -GGCAGCGCAGAAGGTAGTATGTCACACCAGAGAGCAGGGCAGCGGTAGTCTCGCTGCCGG -CCAAAACGAACACCGCAGAGTTTGCATCCATCTCCTCAGACGACATACCCTTCCCGTCCC -TAGAGTGCTTAAGAATATACGAAGTGAAGTCGGGGCGAGTGGCACCCAGGGCGAGGCGAC -GATGGATCTTCTGGACGCTCAGGTCAAAGCTGGCTTGGCGTTTTTGAAGCAAGTGCTTGG -GGACCAGGACCCGGACCAATGGACCAAGAAAAGGGTAGAACCAAAAGACCTTGAGATAGG -CACTCGCCTTGACTGCATCCAGAATAATTCGAACCCACCAGTGATATGTGCTGTTTGACA -AGCAGCCGAATGGTTCTCCAAAAGCGAGGTCGCCAATGAGGTCGAAGGTAGTGAAGTTGA -ACCAGCAGGTGATGTCAATGTCCTGGGACAAAGCACTTCCCATGAGACCCTTGAGCTTTT -CAATAAATATTCCAGCGTACATCTGGAGTATTTCTTCCTGCTCACCCAGAGCTTTGTTGG -AGAAGGCATGGGTTAGCAAACGGCGCATGCGGGAATGGTCATGCTCATTGGCTGTTATGA -TTGCGTTAACACCGTTGGGAGTGGGTGCATACAAGGCTGGATCCTTGATGAAGACCAGTT -GTCCCTTCTTGCGGTGACCGTAGATGTCCTTCCAGGCGGTTGAAGCACGGTAGACGAGGG -CATTTGGAGCAATACGAACAACATCTCCATATTTATCGTGAAGCCGCTTCATGGTCGAGT -GCTGCTTACCCTGGATATCCCAGATGAGATGAACCAAAGGAGTGATGGCAGCCAGCTTGG -GTCCAGGGTAGCCGCTCAAAGGGTGAAAGTAAAGAGTGTGGATGATCCAGCCCACATAAT -AGACAAACAGCTGTGTTTTTGACTGAGATGAGACACACAATCATTTATGTCTTGTATTTG -AGTACTCACCAAGGCAAGGATTCCACTCAGTAGACTCCAGAAGGAGGGGGCAGCTAGGGC -ATGGCACCAGCTGCTCATAGCCAAGGAATTGAAAATCATTATGAAGAGTTAACTAGAGAC -GGTAAGATACAAGACCAAAGACACCTAAGGAGGTTCATCTTACTGAAAGACACCCAGAAG -AAAACGAATATCAAAAGAGAGAGGTATGTAGGGAGGAAAGAAGAGAGGAAGAGAAGTGAA -GGATGAAGGAACTGAGGAGGGGACTGGGAGGTGGACAAACAGCAGGTATCTTACAGTACG -AATGGGATGAGAGACGTAAAAGGGTAGGGAGATTGATTAGAGGGGATTGTGCAGAGTATC -AAGTGTGTGTGTATTTAAAATCTATCGAATTCGCGAAGGTGATAGCCGGCAAACTTCGAC -TTGGAGGCTCAACTTAGGGCTATCCCAGCCAAACAGAATATAGGGCTACATTAAAGAGCT -CAATAGGTCGCCAAACAAAGAAGTACTCATGCTCAGATCAAGTAATGGAAGTATTTTCAT -GACTCGTCATTCGTTATATTTGGGTCTATTAGATGAACACAAATTGTGTGGCACAAAAAT -CAACCGCTGGCAACAGAGAAACTCAACAAGCTCCCAGCAGCCAGTCCACCAACGAGAACC -AGGCTCATGAATCCACCAGCAGCCGGCCTTTCATCGGCAACAACCCAGTGACTAGCACCC -ATCATACAGCTTGTCCCCAGATATCCATTGCTTGCACCAAAGAAGAACTGTACAAGGAAA -TAGAAAATGTCACTCTTGACGGCAGCCCCTTGGCCGTTGACGTTGCAGAGCTGGTACAAG -GGAATGAAGATAACACGGGCAATGGCCACAATGGCAGCTATCTGTGGGTGCTGTCCCAAA -GACACCCCCGGAATAGCCACACACATTCTCCCCGCCAAGTCGCCCAAATTCCAAACGAGA -AACGCCAATGGAACAAAGACACTAGGATCATACATCCGTGAACTGGCAGGGTCATTGACG -GACTTGATTTCCGCCGTGTAGACTGGGAACACCATGGAGACCAAAAAGCAGACGAACACA -GCGAATGCCAGGAAGCGCAACTTGACGAACAAATTCCAGAGGCTGACAGTCTTGGAATGC -TCGTGCTCATCGGTAACGGACTCGTCGTCATCTCTTACCAACTTTTGCCTCACAGAGGGT -TGCTGTCTCAGCAGATAGAGGAAGGCCAAGAGAGCAATGACCGAGACACCCGTAGACGCA -AGGAAGTAGATAAACGCCGATGCCGACGAACTCGTCTGCGGAATGCTGGGGTCTTGGTCT -CGGTCAACGCTACCATTCTTTGACGGCACAGCTAACACGGAGACTATCTGAACAATGCAA -GGCAGTACACCAGCAACACCCTGCCCACCCATAATAGCCTGGGTATATTCCTCTCTCCCG -AACCCGGAAACGTATGCAAAGACACCATTTTGGTTGATACCAGTGGCTAAACTAGCGGCG -AACACCATTACCATCAGGAAGCCAAAGTACGCCCTCGGCGAGGATTCGGTCATGATAACT -GCTGATAATGCCAAGATGGTAAATACAACAGAGCTCATCAATAACGAAAACGTTATTCGC -CGTGGATAGGAGGCGTTCTTCTGTAGCTTTGCTAGGGTATAGGCGGATGCGAGGTTGGTG -ACGGTTGATACGGACTGAATCGACGGCTGGTAGTGCAGTTTCGTCCAATTATCCGACTGA -AATCGGCTATAGAAATAGGGTGCGGCGGCGAGAAACATGTTCCTTATCGTGCCTCCACGT -TAGTTCATGGTTGATTGAACACATATAGCTCGTTTCTCCGGGTTAGGAGACTCTTAGGTT -CCACTTACCAGGCCCAGAGCATAGAAACTCCCAGAAGGAAGAATATGCCATACTGTAATC -TTGAGAAGGGAGCTTGCTGGTGTCGCCGCGTATATGTCTCATCGGCCTCATCTGCGGCGG -AGTCCTGCAGTTGCTCATACGCCGCTGGACGGGCAATGAGACTACGTATGCGGTTCATGT -CTGTCTTTTCGAGCCTTTATTTATTAAGCGTAAGTAAAGGATTTTGTAAAAACATGGGGG -TTATCTCATAGCAGAGAGCTGCATTGAGGTTCCAAGGGGGCATATGACTGGTGACTAATC -AAGATATGGATAGATCCATCTGCATCGGACAGCCCCGTCAGGCGATCTATGTTGTACATG -TAAACTGAAAGGTTCTCCGGTCAGCAGCTGCCATACCCTTTTGTAGCAAGATGAGTTCAG -TCACTACGGTTTCCAAAGCTGTGTATCTTTTCGAGGCGCCAGTTCTCACCTTTTGATATC -TTCGAAAATCTTCTTAGATACATGTATATGTATGTCCTTGATATGTTTTTCTCTTTAAGG -TTAAGCAATCAAAGTAGAGCCGGAACAATAAGATTGCTATCTAGATTTGTACACAGTCGT -TACCGACTTAGAATCTATCACAAAATCAAACATGCAGAATGTGTATGGGAGTGTGAGAAG -GGTGGTGGGGCACAGTGAGAGGTGAGTGTAGATGAGGGAACATATATTTGGACAAAAAGA -ATTCAACAAGAATGAACAGACATCGTAGTAGTAAGATAGTAGTATAAAATATGGCCAGAG -GTGAGCGAGGTGTTGATAAAGATTAATCCTTCTTCAGAGGGACTTCCTCTCCGTCCTCTT -CCTCCAGCTCAGGCTCTGACTCCTCCTTCTGGAGCTCCCAGAGCTGATCTGCCTCAATAG -GCTTGCCCGCGGGGGCCGTGGTTTCACCGCCACTCATTTGCCATTCCATGAGCTTTGTAA -AGGCGCCATCCGGGTTAGAGCTGAGCTCATCGTAGGTGCCTTGCTCCGCAACCTTGCCGT -CGTTGCCAAGGACAATAATCGTGTCGGAGCGCTTGATGGTGGAAAGACGATGGGCGATGC -TAATCGTCGTGTTGTTGCCGCGAAGCAGAGCGGCTAAAGCACTGTTGACAAGGGTCTCGG -ACTCGGCGTCGAGGGCAGAAGTAGCCTCATCCAGAATCAGAATATCAGGCTCCTTGATAA -GAGCACGCGCAATGGCAATACGCTGCTTTTGACCACCGGACAGCTGAGCTCCACGAGCTC -CGACGTGAGTATCGAGTCCATCGGGCTGTGGTGTATTATATTAGCTAGGTTCGTGAAAAT -AATAGTTGCTCGGCGGACTTACGAAGTCGCTGATGAATTGACAGTTGGCCTTGCGTGCAG -CAGCAATGATCTCTGAGCGTGTTGCCCGAGGCATGCCATAAGATATGTTATCGGCGATTG -TTCCAGAGAAGAGCACGGGCTCCTGGGAAACCACGCCGATCTTTCTGCGAAGAGACTTCG -CGTTCATTTCCTTGATATCTTTCCCATTGATCAAAACCCGCCCCTCGGTAGGACTATAGA -ACCGGAGCAAAATGGAAGCAATTGTGGATTTGCCTCCACCAGATGGCCCAACGATTGCAA -CGTTGGTTCCTTGGGGAATCTCGAAGTTCACTTCCCGGAAAATCTTCACAGCTGGACGGG -TAGGGTAGCTGAACGTGACATTCTCAAATCGAATGGGTCCACGTGCGGTCTCAACCTTGA -GACCCTTAGTTGGGTGAATTGTGGGCTGGCGATCTTGTAGCTCAAAGAGTCGACTCGCCG -CACCCACGCCCTTCATCAGCTCTGAATAGAAGCTAGAGAGGCCGAACATGCTGGAGCCTG -CATAAGCAGTGTACATTAAAAACGAAGTCAATTCTCCAATGCTGATAGCACCCGATTGAA -CCATCCCACCTCCAACATACAGCAAGGTCAAAATGGTCATATTGCCCATCAAGCCAGTCT -AGTAGATAGTCAGTAAAATTCGAAAAAGAAAGGCCTTGAATAATCAATACTGCCGACTTA -CCGAGCTGAAGAAGGTTGCGCTAATCAGAGATTCTTTTTTGCCAAGCTCGAATATCTTAC -GGACTTGTTGATTGTACCGGTGGACTTCGATGACTTCACCAGCAAAGGACTGACTGGTCT -TGACATTTCCCAAACGCTCTTCCGCAATCTTAGTCAAAGATCCCAAGTTCTTTTGGATTT -TGCGACTGAGATTCCTGATGGCTCGGCCATAGAACAGAGCGCCAAGGCCAATTGGAGGAA -GCAACACAAGCAAGATCATAGACAGCTTTGCACTCGTGTAGGCCATCAAACTGAATCCAG -CCACACCACTAACAGCAGCTCGTAGACCGTCAGACAAGTTCTGGGTGATACTCTTGCCAA -CAATGATAGTGTCAGAGCTAAGGCGCGAGATCAGATCACCAACCCGATTGGCGTCAAAGA -ACTCAGCATCCTGCACAAAGGTTTGACGGAACAGCTTGGAGCGTAGCCTGGCTACAATTC -GCTCGCCAACAATTCGAAGAATTATGATGCGCCCATAGTTTGCACCAGCACCGAGAAGTA -GTATTCCACCAAGAGCAGTGTAAAACATGGGCATGCTCAGGCCGAACAATTCGCCGCCTC -CTTCCATTGCACCCTTAGTCGCAGCATCCATGATTTTACCGATGGAGAAGGGAACTGCCA -TTGTGATCGATGAGGAGACTAGGAGAAAGAAGAAGGCGAAGCCGAGGGCCTTGGATTCGG -GGCGCGCGATCTTCAAAAGCCGCCAGATTTCGGCGAATCCGGATTTCTTTCCCGATGCAC -CTTCCTTGGCCAATCTGGCGCTCATATTTACCTGCGCTGCTTGCGTTGCTCGCTCAGACA -TTTCAAAGCCCTTGTCCTCCTCCTCTTCTTCTTTTGGTTCCGAGGCTGACCGGGTAGTTG -GGTCTGTGGAGGTCGCGGGTGGCTTCGCTTGGAGAAGTTGAGGTGTTGAACATAAAAGAC -GTGTTTGGGAGACAATTTGAGACGTAGAAGGGGTATATTTGAAGCTGTTGTTGTGAACCA -AGAGTGGGTTGGCACCATCGCGGAGTGCACCCGAGGCGGTTGAATAGAGACGGTTCAACC -CATTGTGATCGCCGGAGACAAAGGGGGATCTTGTTTTGATGAGTAGATCGCAGGCAAATG -AGTTATATGGTCTCAAAGCCTGCCGTGGGCTGATGCCCAAGGCCAGAGACACAGGCAGCC -TGGCACCGCGCATCTCGGAGTTGTATCGGATGAGGCTCGAAATGAAACGGAGTTACAATG -ATGTCACCGAATAAAACCTGGACAGGACATATCTGAGCGGAATAGGTTCAAATCGAGCAC -ATCACATGTTACACATTCGGCTCCTTGGAACGTGCGATGGATTGAGAGCGATGTATCTAG -AGCTTCGTCTGACGCCGCCATCGATTGGTAAAGCGGTTTTCGGGGCCGATTTTGCCTTGA -TCGACCGCTCGTGATTTTGCTAGAACTCACCGCCTGTGGAAGCTGGTCGAGAGCTCTGGA -TACTTCGTCTGATCTGGTGCTAGAAGCTTTGTTGATTCTGAATTTCACTGAGTTGAATCG -CCTCAAAAAGAATACATTGGACAGATTACTGTACCATTAGTTGCCGGTTATCCAAAACTT -TTGTTTCCCACCTATCTTCTCTGCCACTGATACATCACCCCACCTCATACCTATTCTACA -CATTCACTGTCCCACAATATGGTGCTTGACTACAGTAAATGGGACACCTTGGAGCTGTCC -GACGACTCGGACATCGAAGTTCATCCCAATGTCGATAAGCGCTCTTTCATCCGAGCAAAA -CAGGCCCAAATTCACCAGCAACGAGACCAGCGTCGGCATGAAATCAAAACCCTGAAGTAC -GAGAGCATTATCAACGATGGATTGCTGTCGCGCATCGATAAGCTTCTTGATTCTCTCAAG -AAGCATGAGGACTCGTCTGCTTCCCCCGACGAGTTCATTTTCACAACGATTATGGAGTTC -GCAAGCAACCCCGCTGAAGACCAGGCCCCGGCCCCGCCGGAGGGAGTATACACACACGAG -GCGGAACAACCCAAGTTTTCCCAGATGATGAGCTCACTGGTGGACCAGGTGAAGAAGGAA -ATCGGTGATTCTAAGCCCGAGAACCTCTTCAAGGCATACATTCAGGGTGTCAATGGACAC -TTAGAAAAGATCAAAGCACTGCAGAAAGAATTGTTCGCAAAGCTTGCGCAACTTGAGAAG -GAGGAAGGTGCCAAAATCACCAGTGATAAACTACATGAAGGCTTCAATCAATCTCATGTT -GCCAAGATTGCTGAGAAGGAGAAGGCAGCTGCTAAAACAAAGGAGAGCTCTGTTGAGCTC -CTGAACCCTGGTGCAGGAAGCTCCACCACCAAGGCTGACGATGAAGGCGATGATGATGAT -CCTGCCGATATTGAGATTAGTCCACTGGCAAAGAAATTTGCTCAGCTCAAGCCTGGAGAC -TACAGCGCCTATCTAAAATTCATCTCCGCAAACCCAGATATTGTCGCTGAGAAAGAGACC -GACGGTCTACTTGTCGAGGCATTCAACAGTCAGCTCAAGGGCAAAGATGAGTATGCTCGC -CACTGCGTACACCAAGGTCTGCTTCTGCAATACTGCCGCTCGCTTGGCCGTGACGGTATC -CAGCTGTTCTTCACACGGTATGCCAATATTTCGCTTCTTTTTTCAGTCAATGTCTAACAT -AAAAACCTCCAGCATCACTACCAAGGATCACCGCGCATCAACCTTGTTCCTCAATGACGT -CAATGAAACTTACAATCGTATCAAAACTCGCTCCGCTGAACTCAGCAAGGATGGCTCGGC -ATCCAACGATCCTGCAGGCGTAGAGCAAATCCAGCTGCACGCCGTTGATCCCAACACCAA -GATCACCATCAATTTGCCCGCTGCCAACAGCGAGGACCCCACTGAGATTGAGGTGCGCAA -GATCTTCGATGCTTTCCCTGAAGACCTGCAAAAGGCATTGGCGACTGAGTCATTGGACGA -AGTCAACAAGGTCCTTGGCAAGATGTCCGTTGAGGAGGCAGAGGTTGTTGTAGAGCAATT -GGGCAATGGTGGCATGCTCAGCCTGGAGGAAGGAATCGTGGATGCCACGACAGCAGAAGG -CCAGAAGAAACTAAAGGAGTTGGAAGAAGAGGGAAAGCAGGAAGTTGGTGAGCCCGGTGG -TGACGTTACCGAGCTTGACTAAGGCTATGTAAAAAGCGATGATAATATGAATTGAGTATG -CGTTTATTTCTGTTGAACTGAGGGTAGGTTACCAACCACTTTCAAGCTTGTTGTACATTT -GTAGCTGTTCATTTCACTGTTCCCAAGGGTTAACCTCTATTTAAAATACTTGAATCAGGG -CCAATCTACCGGAGGAATCCGTGGTATTAACGATGCTATCAATTTGAGTGAGATACTAAT -TTACAAACCCCCCTAAGGACTTCGCTGTGTGGAGTTAAATAAAATAAAAAGGACAAGAGA -CATAGCTGCCAGATCACAGTCAGGCTACCAAATAATGCGGGAAGGGGGCTGTGGAAAAAG -CCATATCATATGAGTAAATGTGGGAAAGTTGTGAAATACCACGTCTTAACCCCGGACTCC -CGAATCTTCTAACCCCCACGCGCCAAAGTAAACCACTTAGATTCCGCTGGATCCGGGAGC -CTCCTGGTCTTTTCGAAGGTCGATCAATAGTTCGCGGAAACGCTCCTTTCCGGAGCCACT -GGCACCGCCCGCGGAGCCCTTGCCCGAAACATCAACCAGCTTAGCAATGCGCTCCCAGCT -GGTGCCACCAGCGGAAGTGTCCTCTCGGTTGGCAAGGAATTGTTCAGCCTCGGCGCGGGC -GTGGGAACGGTTCTTATCGGCCTTGTTGTTGTAGGAAACGTAGAAGTCGTCAATGTCCTC -CTGTGCCTTCTTGACGGTAGCATCCTTCTTTTCGGCGGAGATTTCTGCACGGCGGGCGAT -CTCAGCATCGCGCTTCTCGCGCCATTCACTAAGGAATGAAATACAGAAATTAGCTTGGAC -ATTCTAGACCGAAACAAGATGGATGCAGATCGCAAAGGCAATGAACCCGTCATCCAGGAT -TTGAGATTGAACTCACCGAACAGCATCTCCCTCATCTTCGGGTTCCTGTGCATTCTGATA -TCCGGTGCGGGGGAATGGTGAGCCGGAGCCAGTGATTGTGCCACCCGGAGCAACGCGCTG -CATAGATGTAACCCCTGGTCAGCATACCGTATGTTCCCCACCATGTACCTAATAGCTAAT -CCATACCTCATTTTGGTTCTGTGTCGCTACTGAGGGGAAGGATGATTCGAATTGGCCAAT -CTCCTCAGTGGGCTCGTCGCCTGCGCCGAGTAGGTCGTCCGCGCCTACGTTCTGGCCGCC -GGCAACGTGGTCTTGGGAAGTAGCAAATTGGTCAGCATCTTCACCTAGCGCAGCGCGCTC -ACGGGCAAGGAAGTCGTCATCGGTGGTGCTCTGGACATCGGCGACTTCGGTTTGGCCTAA -ATGCGGGTCATGTTAGCTCCGGTGTAAGGAGAGCAAGTAAGGTGGCTTGAGCATACCCTC -AGAGAAGTCGTCTAGGGAAGGGAAACGGTCAGCCATACTGGGGATGGATTTTCTTTGACG -GTTGTATGATTCAATAAAGAAGTTTGAGTCAAGAAGATGGTCGTGAAGAGAAGTCGAAGG -GCTAGGAAGACGATGGTTATAGGAAGTAGTGTTGACAGGTGTGGCCGGGGCGGGCTCGGA -GCCCCGTTGTTCTGCCGCTTCCGGGGCGGTAAGCTTATCCATGTTTCGAGCTTGGGGTAC -AAACCACTTCAACTTCACTTGTGATGGAGAGGATTGAGAGTCGAGATCTTCGCTTGATAA -TCCTTTGACTTTGCTCCAGTGAATTTCCTTGATTGAAATCTAAGTGCAGTCCTAAAACTC -ACCCGTGATCTTCAATTGCTCGTTGCCTGTTTGTCTTCCACCGTGATTGCGACTTGCGCT -TCTAGGATCTGATAGATTTCACTTTTCAAAAACTCGCCTTTTAATTCAGATTGCTATTTT -ATCATCATGGCTTTTACTGATGATGCTGTCAAAGCAAAGCTATCAGCTTTGAATGACTCA -CAGGAGAGCATCGTTACTGTTGCGCAATGGGTGATGTTCCACAGGTACAGACACTTGCTC -CAACCAAAGAGACCTGCTAATCTTTTCGTCTATAACTGAAGACGCCATGCTGAACGGACC -GGACAAATCTGGCTTCAAAAGATTCGCGATTCGCCGCCTCCCAAACGACTCAACCTGATC -TACCTAGCCAATGGTACACATACCCTGCTACACCTCAGCTCCGCTCCGCTTGCTAATTAT -ATCTGCAGAGGTGGCCCAACAGTCTAAGGTACGCGGGAAAAATGATTTTCTCATTGCGCT -ATCTCCGGTTTGTCCCCTACCATCCCGCCATTGGGACTCAAAACTAATAAAGCAATAGAT -AATCGTCGATGCTACATCAGCAGCTTACAAGGGCTCGTCGAGTGAATACCAACAAAGAAT -CCGCCGAGTTGTGGAAGTCTGGAGACAACGCAATGTATTTGATGGAGCTATTTTGGATGC -AGTGGAAGCTCGTATAGACGGTAAGTCAACTCACCTGGCACGAGGACGGACTAGTCAAAC -TGATCATGTTGCAGAACTCGATAAAGCTCGTCCTACAAACAAGAAGCAAACCCTCGGAGG -CTCCTTCTTCAAAGACACTTCCTCGGGATCCACACCGTCCGAACTACAGCCTCTCAACCC -ATTGCAGGTTGCTCTCACCAAGGCCGTCATCAGCTCCAGCAATTCCACCACAACCGCTAC -TTCTGAGTTCGACAAACTTCATGATCCCAAGAACCCGAAGCCTACACTGCCAGTTCATGC -TGCACGTCTGTCTTCACTATTGAAGACGCTCGCCAACGCGGAAAACTCCGTCTCCGAGGT -AATCAAGTCGCGGCGCGCCTTGATCGATGGGCTCGAGAAGATCCTCCAGACCAACCGTGC -GGAGCTATCGAAGGAGGAGGCTCTTAGCGCCGAGCTGTCGCAGAAGAAAATCTCAGTCGA -TACCGAAAAGCGTGAGGTCGAAGATGCGATTATGCGCGGTCTGCCCAGTGAAGAGTCATT -CACGAATCCTGGTGAACACAACGGACACGATGATCCTTCCGCCCGCCCAGAAGTTGAAGC -TTTGACGCCTCCCCCGGTGGAGGCTATTACGCCGGTTGGGTCCCCACAGCCAGAAAAGCA -GCAGCTCCGCCGCGGCCCTTTTACCGAGGAGGCGGACGACACCTCAGAACGGAACCCCAA -GAGCATCCCCAACATCCAGGAGTCAGGCAACGGGACTACGATCACTGAACAATCCACTGC -TATTTCTCAGGGCCATGAAAATGACTTTGATATTACTGCCGATGGCCACGCAAAAAGGCG -CAAGATCTCGCATGGCGAAGAGGACTATGCGGCTTTTGCAAGCGGCGACCTAGATGATGA -TGTGGCTGACCTACTTGCCAGCCATGGTAAGTAAGTGAGATTGACGCATTATGGTAAAGG -CTATACGTTCCCTAGTAATGTATGTGCAGCGACTGCCTTTATTTGGTGTGATTAATTCAT -TTCTTTTTTCATTTGTTTCATTCCATCAAGTTCCTATTAGGCATTCTCGTTTCGGTTTAT -ATTGGATATCATTTGTTTTATGTATGAGTTAACTAATGTATCTGTTGGAACATGTAACAT -ATGGTATTGATCTCAGGCCTGCAGTCCAATTTTCTCCGAATTTTGTCCACACAGTGGTCA -ATACTAAGGTAGGATATATAGTTAGGACTACCCTGTTCATCTCTTCCGATAAGCTACGTG -TTCGCCGTGTTCTGGTCTCTTTGTGACATCAGAGACTAGATGTGTAGATTCAGCTGTATT -CATAAGTAATGACCAGCCCACACCAGGCTTGAACATCCATATACATTGTGTCTAGGTGTC -TGATTACTTGCAACCAAAACTTAGTCACTACGGCAACTATTAGCATGTAAAGGGGACGCC -TGCTCTTCTGGTCATATTAACTATGAACCAATGCTAATTTGCGGGAAAAAAAGTTATCGG -AGTATATCCGGATCCATCCACCGATCTCGTGCGAATCTGTTCGACAATTGATGCCTTCTC -CCTCGGTAAGCCAACACAGAGCGGATATTGTTTGAAGATTCATCCTCTGGGGGTGCAGGT -CGTGGCGGCGCTGAACTGCTTCCTTTTGGTTTCACAACCAGTACAATGATCATTCCCACC -ACAATCAGGAGGAGTAATAGCATGATCTTGCGCTTTGTGGTTCGTTTTTGATAATTAGTA -GCCTACACGAGTCAGTTTCACGGTTAAGCGATACCTAATAGAAATTGGCTGGGGGCATGG -CGAAAAACAATTGCCGGTGAAGACAGAACATACCACTTTAAGCTCCTTATCTGCAGCCTT -AACCTCGGTGCCCATTCGCTCGACATTGTAATCTATACGGTCCAACATGGTTCCTTGGTC -AATAATCATGCTTTGCAGTTCACGGAATATATCCGAGAGCTCAATAATCCCCTTGGCGAT -ATCGTTGATTTCGCGCTCCCGCTGCGCAATAGCAGCATCATTCTGGCCGGTCTGTCGTTG -TGAGGTCTGCTGCAACATCGTCTGCGAAAAGGACCTGTCGGCATCAGACTCCATCATCGA -CGGATCCGTGTATGGGTTCTGGGCTTGCGTAGGCGTGCCATCGAAGGGCGTTGCGATACC -CTCAAGATCTCGCAGTTCTGCACACAAAGAAACAACACCGCATTAGCATATCTGAGCCAA -TTGATTCCGTCTCCACCGCGGGAAGAGAGCAAGAAGAACACCAAAGGAGATAAGAGTCCC -ATACTTCGCAAATAAGTACTCTGCTTTTTCCTAAACCTGGCACTAGCTTCCTGCACCCTG -GCAGCAAGCGAGATCTGAATATTCTTCGCCATGGTCTCCTCTCCACTCGTAACACCGCCA -TGCGCCTTGGACTCGGCCACCATAGTCTCGATGCGCATGATAGAGCGCTGACAACGGTGG -AAAGAGCGCGTAACTTCCTGCGTCAAGCGCTCAATAACACCTTCATCCTGCTTGCGCAGG -TCCTCGTCACCGAAGCCGGGGAGTAGGTGTTTCTGGTGTAGTTTGTCGAGTTGCGCGGAA -TTCTGGGCGATCTCGGATAACAGTTCGGATACCTCTTCCTGCACGTCGACCCAGCGCGGC -GGAAGGAGGTCCATCTCGATAACCATGTCACCATCTTCGTCTGTTTCGGATATTAGGCGC -CTGTTTTCCTCCGGGTGCGAGGGTGTGTCGGAGAAGCCATTGCTCGGTGTGAATTGTGGC -TTTTTGGCTGGGTGATGGATTAGGGATTGGCGATAGGAGAGGTAGCTGGGTTTCACGGTG -GAGTGTTAGCTGTTATACTTTGTAGGCTGGTGTTTTATGGTGGTTCTTTGCTTTTTTGTC -TCTTTCCGGCGCTGATGGTTGCGCTTGGGGTTGAGTAGGGTATATTGGGGGATTCATACA -GGTTGGTGCGATCCCGCCACATGATGAGGGCGTTGGGAGGATCGAGTCTGGTTTACTGAG -TTGAGATTATAGCAATCATGATCCGGGTTGATTGAAAGGTAGGAGCTCTCCATATGAAGG -GGTCACGTATGCCCAGGTATTCATCATGTAATCCTTTGGTTTTGTCCACGTGGAGAGGGG -TAATTTTGGGTATCGACAAGACAGCTTCCCACATAGTATTTTCCGTAATCTCCGGTATTA -AATAGATATCTCTGTACTGTTGTAGGTAGATCGTGTGCTCAATTACATTCTCGTACTTAT -AACTTATCTCAATATTCTTATGTGTAAATTTGAAAGCTCCAAAACGACGTATATCATCCA -TTAGGAGAGATATTTCGACTATTATAATGTGCTATAGTCTCTTCGGTTCTAGATCTACTA -TGAAACCCACCCCGCAGTCTACACCTTTACACTTAAAAGATCAAAGCAAACACAAAAGCC -AAGACCTTCCTAATGCAATTAGGCTCCCCTGGCATTTCATCAACATGGCTCTGGAGTGAG -ATCAAGCTTTTTTGAGGCTACATAGGACCTCAGATGCAGGCAGATAATTATGTATGAAAG -AAAAAGAAGTTGGGTATTATGTGCAAGGTTGAAATATACCAAAATCCAAGTCTGTCAACC -GTGGCAACATTTCAGCCCTGGGGAGGGTCCTACCGCCCTGAGACGAAACGACGCTGACTG -GGCCCCTCAGGCACGCTATTTTGGTCTCGTAAGGCTGTTCCCCTTAACACTCTTCATTAT -TTGACCCCTCCCTTACTTTGTACTTTCATCCATCTACATTCATCATGGTGTGTCCCCGAT -ACCCAAGCTGATTTCCTTCAGCTATGTGTTGAAAATAAAACCCCCACATTGCGGGGCATC -ACTGTGCAACCTCACTGATCGCATTCTCTAGGCTGATTCCCTCACTGAAGAGCAAGTGTC -TGAGTACAAGGAGGCCTTCTCCCTATTTGTGAGTGACTACATATACAGATCTTTGACACG -GTCGAACCCATGCTGACCGGGAGTTTGTTTTGCGAAATAGGACAAGGATGGCGATGGTAC -GTGTGGTCGCGCCCGACAGCTCAGTTGAGGCCACAGCAGTGTCCTCTGCTATCGAATTTG -AAAAGGAAAAGCATTCTAATATACAATCCTACCATAGGACAAATCACCACCAAGGAGCTT -GGCACCGTCATGCGCTCGCTGGGCCAGAACCCCTCCGAGTCCGAACTGCAGGATATGATC -AACGAGGTTGACGCCGACAACAACGGCACCATTGACTTCCCCGGTACTTACTCACACCCC -ACTGGTATCAAAGAGAGATGGTTATTGACGTGCACTAGAATTCCTCACTATGATGGCTCG -TAAGATGAAGGATACCGATTCCGAGGAGGAGATCCGTGAGGCTTTCAAGGTCTTCGATCG -CGATAACAACGGTTTCATTTCCGCCGCCGAGCTGCGCCACGTCATGACTTCCATCGGTGA -GAAGCTGACTGACGACGAGGTGGACGAGATGATCCGCGAGGCGGACCAGGATGGTGATGG -CCGGATCGACTGTATGTTTTGGAAACAATTTTGGAAACAATCTTGGAGCCTACCCGCGCT -TGCCTGCCCAGATGCTAACCTCTCCATCTCCATACAGACAACGAGTTCGTCCAGCTCATG -ATGCAAAAATAAACGATGCATTTGCACCGTGCGCCTTTTTCCATGTTAGCTGACGTCGCG -GTTTTTTTAACGAGAATGTTTCTCTTCCATTTCAATTTTCCAATACCGATTTCCTGTAGT -CTATACTCGCTAGGCTATACATCTTGGATGATTTACGATCACGCGTGTGCTTTGAAATGC -AGACGGAATGGACTGGGCGGTACTTTGGGTCAAGTGTTCATAATATAGGAATGATGGCTT -TCCTTATAACCTTGACAGTGCTGGGCTTGTTACAACGTTGCCACCTTGTTAACCAGCCAT -GTCCATAAAACCCTGTAGCAAGCGTATAAGAAGTAATCGTCCATCTCAGTCCCCAGCACT -CTGAGCATCCAACTATCGATTAAACCTCAAAACCTCCATAAATTGCCTCGCAACATCCGC -TCCAGACCACGGATCAGGATCTTCCTCAGCACGCTCACCAACCACAGAGCCAACCCCATC -AACATTAGTATCAACCCTAGTATCAACCCCGGACTCAGCCTTCTTCCACCTCCTCAGTCG -GTCCTTAACTTCCATCCCAACATTCGGTATAAGAATCGCCTCCCGGGCCTCAACGCCCAA -TCTCCCCCAAAACTCTCGAATCAGCGACGCAGTCGCCGCATACTCTGCATCGATATCCGA -CTCCTCGATCGCAGATCTAGCGAGGTCGATGCCCGTGGCTTCAACATCCACATCACCGTC -ACCTCCAGCAGCACCGAATGACCCATCCCCCCTGCCCACAGGGATCGCCTCCACAACCGA -CGGACCGCGCCATTCGACCTCAATGACCGTACCACGCACCCGCGCTGTCGGGAACGTCTG -CCGCACGTCACCAAGACGCACGCGGAAATCACCAACCTCGTAGACGATCCCGTTCGGGAC -AGCTAGAGTGAGCCGGTGGGCCCAGAAGGGCTGACAGGCGTAGGTAAAGTGCTGGAAGAG -GGTTCCGTATGACGCGGGGGGAACAGTTGTCATGACCATTGATGGGGAAGGGGAGTGGTC -TGGTGCGATGCCATTGATGGGACCAGGCGCAGCTGCTGCATTTTGGTTTTGGTCTGGTGC -TGGGGCTGATTTCTCGGCCTCTGAGGTGTAGATGAAGCCGTGAGTTGGGTAGTGGGTTAA -TGAGAGGAATTGCATGTAGCGTGGATTTGGGGGCTTCTTGTTTGAGTTTGAGGAGGCGGG -GAGTAGGCCTGGGGTGTCACGGAGCAGCTTTTGCTCCAGACCCCAGCGGCCGATTGGAGT -TAGTTCTTCATTTGGGAAGGCGGCGCGGAGGCGCTCTGTTACTAATTGGAGGGCTGTTGA -GGCGTTTGGGTTTGAAGGGATAAAGTAGACACTTTGTGGGGTCAGTTTGGGGTGTTTCTG -TTGACTTGGAATAGGAGGTTTACCCTGTGATAGGCATTTCTTATGTTTGTTTTCTTCTTG -TATTGTGGGCAGGCGCAAAGGAGATTGGAATGATCGTCACAGCAGTGTTGAGCTGTGGGG -TATCAACTGGGATTGATGGGTTGAGAAGACGGCGTGAGTTCCCAGATAGCCTTCAGACCT -GCTGGCAATTGATTGAGGGAATATTTAGCAGGTAGCCGCCATGCAATATAGATAGATATA -GAGATCAATCAATAAAAATGATGATCAAGCCCAGTGTCTGGGAAATGATTGAAAAGGACT -TTTAAACAGGCTAGGACCTTCAAAATCTATTGATCACATTAGTATAAGAATTGACAAGGA -AAAGAATCTGATACCTTTGGAGCATTTAATGTTTCTATATGATGTTGTTCTGGAGAGTCA -TCAGCAATTCTCCAGAAAGCCATCATGGGTGCAAACAGAGCAAACATATACTCAACGGGG -TTGAAACCAGCTGGGCAGATGTTTTGCGTTTGATAATTCCGTCGTTTTCAATGTTGTTTG -TTCGCAAGATTGCAAGACATCGGAAAACTTCTAATGTTGGTGTTTTTTATCCCTCTTTTT -GTCTCATATTGATTTGGCTCGAAGTTTCAGCCTTACGGTCAGCTTCTCAGCCTCTCTATA -CCAATCCCTTGAACCCCGGATGCAGTACTGAAGCAGATAATCGTACTCCGCATGGGTCTC -TCAGTGATATGATGTCAATATAGCAATCCTTGAACCTTGGAACGTGTGACTATCCGATCT -TCCAAGAATCGATATGAAAATCAACAAGATCAATCATCAGAGTCAGGGGCCGCAGTGGGG -TTGGTGGCGCCCCTCAGACTACAACTTAAGCACAGCCCTGAAAGACAGAAGAGCCTCTTG -GCACTTTGATGACAAGTCTAGACTTGACTATACTCTAATAATAATATAGGTTGGACCCTG -CAGTCCCTGATTGCGTGGCCTGGTGTGAAAGGGGTGACATTCTCAAATTGCACCAATCAT -CATTTGACCCTGTGCATCATCATGCCTCTTACTTCCAAAAGCCACCAAGGCCGATCAATG -GAGCTCCAGAGAACTCGGCCGCGTCTGATTCCCCTTCGCAGAGACCTCTCACAAGCCATG -ACGTGCCGCATTGTGTAAAAATGTGTCCATTTCCTTTTTTTGTTTACTCAAGCCGACAGG -GATCTTGTTCTGTCCGAAACATCAATGTCTGCTTTTGTTACTAGTTCCTTGACTTGACTT -GTCATTCCACTCCTTCTTTAAGCTTATGACTGTTCCTTTGTTATTACTTTGGTTATTTTG -CACCCCTACTCCATCCATTCATTATGTACAACAAGCGCAAAAACCGACCTCTATTAACAG -CATTTTTCCTGATTGTTGCGACATATTTCTTCTTTTTCTGGCTACCTAGTACAAAAACGT -ATCGAGGTCGCCTAACCAAGCCAATCTCTAGAAAAGACAACACCATTCGGCACGATGCCA -TTCGGTTAGATAAAGTCATCGAGAGATTCCCCGTTACGGAGTACATCCCACTGCCAAAAA -GTTCGGGGAAAATTCCCCGCATCCAGCATAAATTTCCCAAAGAGACCAACAGCGAGCGCA -AGGCCAGGCTCAAGAAGCGCGATGCTGTGAAGGAGGCGTTTCTTCATTCATGGAATGGAT -ATAAGGATTATGCGTGGATGCGAGACGAGGTAAAACCCCGCACTGGAGGGTATCAGGATA -CATTCAATGGCTGGGGCGCCACGCTTGTTGATTCGCTAGATGCGCTTGTGATTATGGGGC -TGGACGATGAGCTGCAGCTGGCTCTGGAGGCTCTGGAGGAAATCGACTTCACGACGACGA -AGAGCAAGCAAGTGCCTGTCTTTGAGATCATCATTCGCTATATGGGTGGCTTTATCGCTG -CCCATGATCTCACCGAGGGCAAACACCCTATTCTATTAAAAAAAGCTGTCGAGCTGGGTG -AGATGATTTTCAATGCGTTTGATACGCATAATCGCATGCCCCAAGTGCGCTGGGAGTGGA -CTAGGTATGCCAAAAAGAACTCATTCAAATATCCAATGAGAACTTGAGACTGATAATACC -CAGGTCCGCCCAAGGCAAAGAAATCACACCATCTTCACGCACAAGCTTAGCGGAAATGGG -CTCCTTGACCATGGAATTCACTCGTCTGACCCAATTGACTGGTGATCCTAAGTACTATGA -TGCTGTGCAACGAATTATGAACGAGCTCGAAATCGGACAAGACAAGACTCGGATGCCTGG -CATGTGGCCGACATGGATTGACACCGACACAATGGACTTTGATGACTCCGAGTTCACCGT -CGGTGGATGTGCCGATTCCGCGTACGAGTATTTGCCCAAAGAACACATTCTTCTGGGCGC -ACAGACGGAAAAATATCACCGGATGTATGAAAAGGCCATTAAAACACTCAACGAGAATCT -GCTGTTCAGAGGAATGACCCAGGACGAGGATCAGCATATCCTTTTCACATCTAACGTCAT -TGCACTGCGAGGTAACTCAAAGACCTTCCAGTACACGCCTGATCACCTCAAGTGTTTCAT -GGGTGGCACCGTGGCTATTGGGGCTAAGGTCTTCAACCGGCCAGAGGACATGTTTGTTGC -CCGGGGACTTACTGACGGCTGTGTCTGGGCATATGATGTTATGCCTACGGGGATTATGCC -GGAGATCTTCAAGGTCAGCCCTTGCAAGGACATTGATGAATGTCCATGGGATGAAGAGCA -GTGGATGTCGGATGTCATCTCACGGTCTATTGAAACTGAGGAGGACCGCGAGAAAGCGGA -GAAGCGCATTGAGGCTGAAAAACTGCCACCAGGTGTGACAAGTGTTCAAGATGCATCGTA -CAAGCTCAGGTATGGGTTTATCTTATTCCTCGTGGTGCATTTCACTAACATACTATCCGT -CTAGACCTGAAGCTCTTGAGTCCTTGTTCATCATGTACCGGATTACTGGAGATAAGACTC -TTCAAGACTCCGCGTGGCGCATGTTCAAGAACATTGATAATGTGACTCGGACTAATTTTG -GCCATTCATCCATCAACGATGTGCGTCATTCGAGACCAAAACACGAAGACAAAATGGAGA -GCTTTTGGCTTGCGGAGACTTTGAAGTACCTCTATCTTATCTTTTCTGAGCCAGATCATA -TCAGTCTGGACGACTATGTGCTGTACGTTTGCCACTTAGCTGTTTTTATCCTGTCTTCAA -GCGCTAATGATTCTTTTTCTCAGGAGCACCGAAGCCCATCCTTTTAAGCGGCCAAAGAAC -TAAAGCGTTCTAGCGTTCTAGCATTGGGAGTCTGTGTACTCCGTACAGCTTTTCTCATCA -GCAATATCTCAATTTGATTCCACAGGACCGGCGTTACGGGCATTACATTACGGCTTTCAA -TATACAGCATCTGGTTCCTCCACAAGCGCCATAGCAAAAGATATCCCAATCACTAATTTA -TCTGTGCATAATATCCCACATTCATACATCCCACAAGTAGGCATAATACAAGATTATAAT -TATACATAAGCAGAGACATCAACAGAATTAACCTTCAGATCCTCATCCTCCCGCCTCCTC -AGCTCCGCTTCATCCTCATATTGAATGGAGTACCGAATCTCCTTATGTATTACCATTTTA -TCCTCCCCATCCCCTTGCTCGGTCGCAACACTCGCATGTCTAGCATGCGACCTCGGCACA -AGTTCAATCAAATCTGACGCCATTGCAAACGACCCCACAGACCCAGCCCTCACAGCTTTT -TTCCCCTTTTCCATCCTCTGGGAGCTCTGCTTCTTGCGCGGCGGACTGAGCGGGAACATG -GACCGACGCATTGGTGTAAGTACCGTAGCAGACCTACCACTATTCCCATTGCTTCGATTA -TCTATAAGCGCTCTGTGCGCATGGAGAGTGGTATCAAGACTTGGAGGACCCTTACTCGAT -CCTTGGGTCTTTGACTGTCGCGACCCCGAGCCCGAACCATTAAAAGTTGGAGTAACCAGA -GGCCATGACCCGGTACTTGCTATTTTTCTCTGTATGCGAGAATGCGACATGGACCTTGAA -ACGGTATTCGATCTCGATCTCGTGCCTGACCCCGACCCAGATGCGGTCTTCGAGCCTTCT -TTCGTGCCGTTTGATCTTGACGCGCTAGATTCCAGGGTCGAGTCACCCGCTGTACCGTAG -TTCGTGCTGACTGCTGCCATAAAGGGCCGGAGACAGAATACGCTGCATGCAACTAGGGCG -TAGTGGAGCTCGACTTGCATCCAGACTGTTGGTCTGACAGCGGCGAGGGTTGTGTCTTCG -TTTTTTAGAGCGGTGTGGAGGGTTGAGATGTGGATCATGGAGAAGATTATGAGGCTGGGA -TTGTGCGGTCAGATTGCAGATCGGGGATGGGGATTGAGAATAGATAAATGCCGATGTAGA -TTACTCACGGGAAACGGAATATAAATGCAAAGCCGACGGCTAGTTTGCGGCGAACGGACA -TGAAAAGACCTTTCAAGAGCATCACAGCTAGGAGGAAGAGGAGGAGTTCGGTTGTAACGT -CGACTGCTGCGATGAATTGCCATCGTATGTACTATTTTTGGGAGATGAGAGGCGGCTTCG -GGGACTTGATGCATGATATGTACATCATTGGTGGCTTACATACCAAGTTATGGCATTGGC -CGCTGTCGTTCCGCCAAGGTGTGTTTAGTTCACAGTTGACTAGGACAATGAATATAGCCG -GGATGATCCAGGCCGTGCAGAGAGCTAGTGTTGCCCAGGATGCGCGATTATGTGGTTTCT -GTGGCGTGAGGCGGAGGTAGATAGCCACGACGCAGCATTTGGAGAGGTAGATGATTATTA -GCGCGATGATATCGCTTGTTGCAATTAACTTAGGTAGCCGCTTAGTTTGGGTTCGTTGAT -TCATTGCAACTTGATATCAAATGACAACTCACAGCCTGAATCTGATCGAGTCGACCATCC -TTCAATAACGATATAGAAGTTCCGAATCCGCGCGAGCATGCATAGAAGACGAGGCTTGAT -TGCACCACAGCAGCAGCCTGCGCACAAATTTGAATTACATGTCAGCAGTGTGCTCAGACA -ATCCATCCAACCCCAGCCTGGCTCGACACGACGCGATTCTTCAGGTCCTAACTCACTGTA -GCACCTAACAAAACCCAATCATCGCGCGCAAACGGCGGAATCAGCATGAGCCGCACATAC -AGCCGAATCAAGAAACACGCCAGGGACACAACCAGTCCCATAGCGGCAGTGATGATCACC -CACGCGCCATGGTGCAAGTCATCGACGACCTCGAAGGGCGGCGACTGGCCTGGCGGCACA -TACGGGCCCATCGCAGTGGGCAAGAAGAGTGGCCACTGCAAACGGCAGCAGGTTCTAGAC -AATTATTGTTCTCCGGGCTATCATTCATATCATGTGGGTGCATATATTGCGGTGTTGTGT -GTAATGACTTATCAACACAAACACAGAAACAACGGTAAAAGGCTGTCAGATCTGTCAGCC -ATGTGGGCTGGATGGCAGGACGGGAATATGTAAATCGAAGAAGGGGTTTGATTTGAGACG -CTCGGACACTTTTGTGTAGATGAGTAGAAGCAGGTATGCAGGCGTCATGGCGAGGCGATA -ACGGTTGAGCTAGGTTCCGTTTCTCATATGCACTTGCAATGGTAATTGTCTTTGCCTTGG -GGAAATCATATGCCGATACCCGATAGAAACATGGACTATCGAGTCCAGACATGTATGTAA -GTTGGGCTGGCTGGGCTCGCTCGATATACCAAGCTGGGGGTCACATTCGAATGCGCATGG -CAGGTTTATCCCTTTCTTCCGTTGGAGGGTACAAATGGTGTCGATTATTATCATTTTTGG -TGTAAGTTCAAGTCATCTGGTTGGAATGGTAAATTGCCTTCCCTCCCTCACGTCGAACGT -CGAACTTTGAAGTACATTCGTACTGTGGGCACGCTGCTCTAGGGCACAGCGGCCCGCTGT -AGGGGGTCGTCGATCAGCATGGGCCTTTGAGCATTAGGCTTTGGCTGCTCGGGAGGTAAG -CAGCCGATAAGCCAGATGGTTTGTTTTCGAGCAAAGCAAATAAAGCGAGGGGCTTATGCA -TTCACCCGCCATTTCGGGACGTGTATGTGGCGGAATTGTCTGGGGGATGGGCATCAAAGG -GATGTGCAACCGAGATAAAAATGGTTAGTGGATTAGCGCGAGTGATGCATCAGAAATGTG -TCGGGTAGTTGATGCCGGGTCTGTTCAAGGGATTAGGATTGTTTATTTCTTGCATTTTAC -GTTGTATACTCCATATCATTTTGTGGACTGCGCTCTTAACCTCGGAAGAGAGAAAGAAGT -CACAATCAATGTTTTGCACAACTGGATTGACATTTAAAGTAGCATTACTACAACCCCGAT -AGTCAGTCCAGATTTGAATATCTCCCAAGTATGGAGATGTTAAGTTGACAACCCCCCTTG -GCATGCAAGGATTGGGATCCCATTCCGGGCAAGAGTCAAGGGATAACAAATCTCAAAAAT -GAGTTTCAACAGTCCCATTCTCAATTGGATCTGGTGCATCACCCTCGATCAAACAGCGAT -GTATCATTCGGATCGCTGCTTGGGTATTCCGGGTTGTTCTCTCCCACATGGCGCCCGAGA -ATTTTCCCAAGCCATGCCTTGCTCCCACGTCAATACCACGCTTCGATCCCTGTAACTCAG -TGGTCGTCATGATTCCATTCCTTTGAGTTTTCTCCTAATACCTGCTTCTTCTGGTTACGG -CTCTTTTCTCTCTCTTTTGTTGGCATTATTAGAGGTTGCGTTTGTCGGGTGCTTTTATAC -TTCACATCCTAGATTTTTCGGGGCGTCGAGTTGCAAGGCGATACAGGCTTTTGCTTTTAT -TCTCGCAACCCCCTGCTTTCTTGTGCGAAATTGTCTTGTCTCATCTACGATATATTTTAA -TATAGTCATTGAGTCACGCGCGCAATCATGACAGTCTCCAAAAAGTCTAGACCTGACTCG -CAGCAGACGATGACTGTACTTGAGGACGACGAGGAGTATGAGCAGAAAGGCCTTGTTGAG -CCTCGGTATATGGGCACACTGGCAGATCGGCGTGATATGAATGCTCTTGGTCGAGTGCAG -GTTCTGCGAGTAAGTTGCTTTTCTTTTGTGATTCCGGTGTGTTGGCCTTGCTAACATTAG -AAAGAGGAACTTTCGGTTTATATCTATTGTTGGATTTGGATGTACTTTGATCTGCACATG -GGAAGTAATCTTGACGTACGCACTAGCCCCTACGTACAGTAATCATGCAGGCTGCTGAAC -CCATTAATAGATTGCTGGCGGCGGGCTTGACCGACGGCGGTACAGCAGGTTTGATATGGG -GATTCATCGGCGTTTCAGTCGGATTTACCCTTGTCTATGCCAGTATTGCTGAGATGGCCT -CGATGTTTGTGACTGACAACTGTCTGCCACTAGCTAGATCGTCGCTAATCAATCTAATAG -GGCTCCAACTGCTGGTGGTCAGTATCATTGGGTGTCGGAGTTTGCACCGAAGAGTGGACA -AAAGTATCTCAGCTATATAACAGGTAACATGACTACCTATTGCCTATTTAAGTCTAATTA -GCTAACATAAAATCTCAGGATGGCTATCTGCAATGGGTTGGCAATGTGCCATTGTATCCA -TTGCCTACCTTGCCGGCACAATCATTCAAGGTTTGATCGTCTTGAATCAGCCAGACTATG -ATTTCCAGAGATGGCATGGTACCATGCTGGTAATTGCCATCTCGACATTCTCAATTTTAT -TCAACACTTTCCTAGCCAAAAATCTTCCCTTCGTGGAAGGATTGATTTTAATCATCCATA -TTGTTGGCCTGTTCGCCATCATCATCCCGTTGTGGGTACTCGCACCACGCAACAATGCTC -GCGCAGTCTTCACGACGTTCAACAACGGGGGCGGCTGGGATAGCCCTGGGACGGCAACAT -TGGTCGGCCTGTCTACTACCATAACCTCCATGCTTGGTTATGATTGCTCGGTTCATATGT -GTAAGAGAGAGCTCTATAAATGACCTCAGGCCTGGTATGTGATGCTGACTTTCAGAAGCT -GAAGAAATCAAAGATGCGTCAGAAACACTTCCAAAGGCGATGATGACGTCTGTTGCCGTG -AACGGCGTGTTGGGATTCATCATGCTGGTCACACTGTGCTTCACACTAGGCGATATCAAG -GACGTCCTCGACACCCCGACAAGATTCCCGTTCATCCAGATATTCTACAATACCACGGGT -AGCGTTGCTGCCACAAACGCCATGACGGCCGTCTTGGTGGTAACGCTGACCGCCAGCACA -ATTACCGAAGTGGCAACAGCATCGCGCCAGCTCTGGTCCTTTGCCCGGGACGAGGGGCTG -CCCTGTTCGTCGTTTTTCGCATATGTAAGCCACGTCTTCTCTCTGTTCTGGCCTCTGTGT -GATAAGAAACACAGCATGTTAACTCGGATATATTAACATGCCATAACCAGGTCACACCGG -GCTGGAACATCCCCCTCAACTCCGTGATGGTGTCTCTAGTCATCACAGTGCTACTATCGA -TGATCAACATCGGGTCGCAGGTTGCTTTAAATGCGGTCATCTCGCTCACCATCACATCCC -TGTTGTCCGCGTATATCATCTCGATCGGGTGCGTTCTTCTGAAACGGGTCCGGGGCGAGA -TGCTCCCACATCACCGGTGGTCACTTGGAAAATATGGCATGGCGGTGAACCTTGGGGCGT -TAGCGTTTCTCTGTCCGATTTTCGTCTTTGCATTCTTTCCCCTGTCAACGGAGGTCACCG -TTGAAACCATGAACTGGAGTGTGGCGATGTATGTTGGCATCGTTGGATCTGCGACGATTT -ATTACGTCGCTCGTGGGAGACATCATTTCATCCCTCCTGTAGCTTTGGTTCAAAGGGATG -GGAATAATTAATGTGGCACTGGTTTGGCTGGGTTAGAGTATTTGCCTTTTTTTTTTCTTG -TCTTTCTTTTTCTGTCTATGTTTTGGCTGCTGGGGGGAATTGTGTGTATTTAGACGACGT -GATGATGTGACGTACTCCATACATAGTTACGATCTTGAGATTATGCTGTTATCATATGAG -AATCATTGCTTCATTTGTCTTATTCGAAAACCAACCTACGGAGTCAATGGTACAATAATG -AGGTGGTGCTCACCCCTATTCACGATGATACTTACTCGATTGAAATGTCTCGGAGACTTT -CAAAAGCACGGGGACCTATCAGCCGGTCATCGGCATGAAAATTAAGTTATCAGATGGAAC -CTAGTATTAGCCCAAATCCTTCTTCCCCTAAAAAGAACAAGGGCCACTACCAGTCATATG -TAGAGCACAGCATTCTTACCCGACGTTACAAAGAACATAGAATGAATGCAATAGCTCAGA -TGCAGTGCCATTAGTACGGAGTACATAGAATAGAGTCAGTCACACACCAAAAGTCCCAAA -TATGAGCTATCTGTGAAGAAGTTAGCTCATAGTATCGGCAGGGAGTGGTTTCACACTATA -TGTTATTATTGACTCAAGGATCAGCCAGTGGATCTTCCAGCTGAAAGAGTCAGGGGTAAT -CTGTTTTGTTTTGTTTTGTTTACGGGGTACGGAGTATGTACTTTTTTTCTAGAAGATCCT -CTGACCCACTGATTGATCTCGGCCGGTCTGGCATCCCCACTTTGGACCGAATGGTTGGGA -AATTAATTTACCAACCGTTAGGGACGTGTAAGCTCAAGCTGTGGGAGCTGAAGGGGGCTG -TGATGGGCCAACGGAATACACAAGGTTAGGTCCGGCTGGTCGGAAAGTGGGGGGTGCGTT -CTATAGAGAGAGACCCATTCAATGATGAGATTTTGATAACATAAGAGCCCCCTTTCCAGG -ATCTTCCTATGAGGTAGGGCACCTGCGTACATATGGAGTGAAAGTGAAGACAGAGTTTCC -AATTCTGAGAGGTAAGGTTATCAGATCAGATGACATAAAAACCTCCCGAGGTCGGAGAGC -GGACAATCCGCCTATCTGATTGATCTGATAACCGAGGTACTAGTAATTTGTATTGTAATG -TACCGTTATGTATATTACAGTTTTCAATTCAACATTTTCTGTCGGCCCACCTCGGAACGT -ACATTTTTTTATTTCCAATTGCAGTTACATTATCCCTGTAGAACCCCTGTATAACGTACG -TTGTACGGAGTACTCTAGGGGAATAGTTTAAATTGCAATCATCTGATATCTTATTATAGT -TATTCCGGGTTAACAGCCGATACTTCTCGGGTGTTGCTAACGCCACCAAATACGTGTAAA -GTACAAGTTCATCTTCATATGCACTGTCGATCGGAATGGATACGATAGCTTGAAATGGAG -TCATTGCCATATCCAGTAGTTGGGTCCCTATTACTGCTGACAATCAAGATCATCGTTAGT -CCAGATCTTATATATTCTAGCTCTTTATATGGTAAAATATCAATGCCGAAACCAACCGCG -TAACATACCCGATGACGAAGAATGTGGCAAATCTGATAAATCCCTGTTCTGTTTCTGATC -AACGCTTGTGAGACCCTGTATTTCTCATTTTCATACCGTCATTTTCACTTTCCCGAACCT -GTACGGAACTATCGTCCCGACCATTCTTCGGGTCCACGCTTTCCTCCCTCATTCCACTCT -TCTATTCTGTTTCTATGCTCTTTCCATAGAAAAAAAAGGGGGGGTGTTCTCTACGATACT -TATCATCACGTCTCGACTCATATACCCCGGTCTTCCCTTGTTGCGCCGACTTTTGTGCAA -CTTTTCGGTGACTCCCAACACCTCTTTAAATTTCCTCATGGGTATGCAGAGAACGGTCAA -TCCCAATCCAATTGTTAAGACTCACATTCTGATCTATCAATTAAGTTCACGCCGTTCAAT -CTATCATGCCGTCGGCGACTTATGCTTCAACGGGGTCTCGCCCCCGCAGAGGCACTTATC -AAGAAATCCCAGTATCCTCGACCTCCCACCCAGATAACCCAATTCCGGAAGAACCTCGAG -GTGTACGTTCACTTTCAGTGACTGACACCACATTCAATGTCATCTCGTCGGGCTCCTCCC -AGGACACTGCATCAAGCGGTTCAAATTCCCCAAATACCCCAAATACCCCTCTAACACCAC -CCACCGAGTCCCAATCGGATGAAGAAAATGCGGAGGAAAAGGTCGACGATGGTCAGCAAC -GGCGACGAGCATCAACTGTCCTCATTTCACAGAACGCGGACGATATGCGAAGGGTATTGG -AAAATGTGGGCACAGCAGGAACTCAGAAGCTTCAATCCCTCTGCTGTGGAGGGGGGTGTT -GTCGAGGACAGGCACTGCGCCCTGTAGATGGACCGATTTCAGGCTTCAATTCTATTGTTG -GACCTGTCGGCAACAAGGCCTTTGATAGCCTCAAGCTAAACCTCAATCTCCTCACCATGG -ACAGTAAGCTTTCCAACATTGTCCCGCTCCCAGAGAAGACCGTTTCGTTCAAGCCAGTCC -CCGCATCGGTGGTCGAAATGGCCTTTGGCCCCGCAGACCATCCGCCAGAATTCGTGCAGC -CGCACCCACCTTACCAGGTGTACCGTGCGCCTCTGTTCCACACCCGTGAGTTGACGGGGC -CTGGCGCCGAGAAACGCACGTATCACTTTGATATCGATGTCACAGACTACCCTGCCGAAA -GCGGGATGGTCGACTTTGTAGTCGGTGGTGCAATTGGCGTGTGCCCCAAAAACAAGGACT -CCGAAGTCGAAGAAGTGCTCAACCTCCTAGGTGTGCCAAAGTCTATGCGCGACAAGAAAG -TTCTCATGCGCACGACCAAGGGCCGTTGGCCGACAATCTGGGGTGACGACAAACCTCGCG -AACTCATCACTACCCGTCGCGAAGTCCTAAGTTGGTGCTCCGACATTCAGAGTTACCCCC -CAACAAAGCCACTATTCCGCCTCCTCGCTGAGTACACGAGCGAGCCGAACGAGAAGAAAA -TTCTCGAATATCTCTCCTCTGCCCAAGGCCAAGGTGCCTTCTGCGACATTCGCACAAGCT -CTTTCGTCTCCCTCACCCAACTCCTAACCGCCTTCCCAAGCTCCCAACCACCCCTCGACC -ACCTCCTCTCCGTCCTGAACACCCTCATGCCCCGCTTCTATTCCCTCTCCCAAGACCCCC -AGATCTCTTGCCAATACAAAGGCACCGAATGCCGACGCCTAATCGAAATTGCCGTTACAG -TCGCTGAATCCGACGACTGGCGCGGCGGGACCCGCACAGGCGTAGGATCCGGGTACCTGG -AGTCTCTTGCGCGGCGAGCGATTGCAGCCGAAGCCGCTGGCGAGAAACTTGATCTCCACG -TACCCATGTTCCGCGGACTGATGGCGAATCCGCTGGCGAAACGATTTGCATCAGATGGCC -CAATGCTGCTCATCGGCGCGGGCGTTGGTATCGCACCTTTCCGCGGCTTTGTGCAGCGCC -GTCTGCAGTCGGCGAACTGCGCTAACAAGGTCTGGGTGCTGCAGGGCGTGCGCGACTCAC -TGCTTGACGAGCTTTACTCGGGCGAGTGGGGTGTGCATGAGGATAAGGTGCGCACAGTTG -TGCAGAGCCGTAGGGGCGAGAGTCGCTATGTTCAGGAGGAGGTTAGACACCAGGCTGATC -TTGTTTGGTTTGTCATCAACTCGCTGGACGGTCGTGTCTTTGTTTGTGGTTCGGGGAAGG -GTATGGGTGAAGGTGTTGAGGCTGCTCTTATTGAGGTTGCTATGGCCAAGGGCAACCTCA -ATGCTCAAGAGGCCGATATGTTTTGGCAGCGGAAGAAGGAGGCTGGACAGTACATTGCTG -TAAGTTTGATGTGTTTTGATTTCTAGGGCTATGCTGACTCTTCTAGGAAACTTGGTAATC -CCTTCAATTCCAGGTTGGCTTGACGCATCGTGCCTTGATGGCTGAATGCTCAGTATAATC -ATGTGCAAAAGACATTGTCGGTCCTTTGTCTGTACATATTTTGTCCCATCCTGCCTGCAT -CTGTAAAAGCTCTGTGgttttgttttgttttttcttttttgcttCGAGCTCATGGCTAGA -CACACTCGACCAGGTCGTCGACTGGCCGTTGTTTTTGCTCTTTGCTTGATTCTGGTCTTT -TCTTAGCGTGGTGGTCATCTGTTCTCTCGTCTTTCTTATTTTGCATTCTTAGTATCCCGG -TTTCAGTTTTCGTTCCTCGGGTTTATGGGCTGTTATCTTGGTTCTATCTTCAATTGAATA -TGCATAGGTCTATCTTTGTTCTTTCTTAATTTTTCGGTCTTTTGTTATTGCCACGGTTTG -TTTGCTGCTTTGACTGAATTCATGTCTACTTTGGGGCAGCCTCAACTCCCAACCCAAATC -ACAAAAAAGACGACTTAGCCCGACCAACCCATACACATGGGGATCAAGCATCAAATTGAT -TTCTCCGACCTCTCATGTTTTTTTTTCCCGCACCGTCACGGCCATCCTCAGCTGACCTTC -AGAACCATTCATTCTTCTCCTGGTTGAAACCAACCTACCCTCTACATCTACGAAGCTTTG -ATCGGTTCTCTTTTTAAGGGACAATCACATCTGATTGTCGCTGCGGCGGTAAAACATAGG -CTTCCAGCTCCGACAGGAACACTTATGTTATGATGGTTGCCTAGAATCTTAGATTTCAAG -CCATTGCCCAAGGTAGGTTGACTGCTTTCCAGAGGGGATTTTTCCCCGTGGTCTCATGGT -TCTCTATCTTTCTTCTTGTTTCTAACCAGTTACCATAGGCCCCAGCAGCTCACGTCTCTC -CCCTCGACCATTTCCCCAAATGGGATATATGAGATCTGTGTTTTATGTTTCCCACTAGCA -TTCAGCAAAATGGATCCGAAATCATTAAAATTCACATGTTCTCAGATATCTCCTACGATA -CATGCTAATCCTTAGAATGTTTCACTTTGGCTTGTTTCAGTGTGATTACACCGCAATATC -ATATGATAGCCTTTTTGGAAGCTGTTCAAAGTACCCACAATTTGGTACTTAAGACTTGAC -ACCCTCCCCTGTACACCAACTTTATCACGCCCGCATTCCATTGTCATATTGATGCTAGTG -GACAGCTTCAGTCACCAGCAGTAAAATCTCGGAGATGATGGACTCTCATAATACTGGCTC -TTTGGAAACACCGAAAATTGGAGGGATGAGCGATAACGCAGATGACACAATGTTTGCCCG -AGGCCTAGTGGACTTCTTTTCACATTTTGCCAGCACCGAGGTTATTAGCCATGGGACTTT -TGACTCTTCGCCGCCCAGCAGGGATCCACTTGGACTGGAGTTCCTAGCCCGCCATCCAGC -ATTCCCCCCACAAATTGCCACCGAGGGACTTATACCCCTCCTCGACAGTTACATAGACCG -GTCCGTTTCAGCGGTCAGATCAGCCAGTGCCGTGCGGTTGGCGCTTCCACAACACGACTT -GTCGCAGGTTTCGAGTATATATGGCATACCTCAAGCCCCAGCGGTGCCCTATGTGTCACA -GTCCAGTTTCGGGAGTCATGATAGGCAGCCGGAACCCCCTGGACCGAGAGAGCTGTACCG -CATAGCTTCAAACTGCATGTTGTCGGCTTTTAACTTAAGCAAGGCTGTCAAGGGCGCCAA -GAGCGCCAAGGGTGCCAAGGATGACAACGTGGTGCGCATCAGCACGGACCGCCCAACTAT -TCCTAAATCGACCATCGCTCTTAGTCCTGAATATGTGTTTGCGACCTTTATGTTTGGAGG -ACTTTGGTATTTTTACGCCAGTCGTTCTTCTATCGAGCCAGTCGTGGACAATTGGTGCTC -ATGGCTCAATGACAGGATTGCTGGTGCTAGGGCACCGCCAATCAAGAATACTCCTTGGCT -CGGCTCTACTAAATCGTTGTTTGCTGCCAATACAGAGCATGATATTCTTGCAACTTTGTG -GACTTTTGTGACTCGTGGTGCTGGTAATCTTCAGCCGACTGGGTATCAGGCTAGCTTTTC -TGAGTTCTATTCAATGATGGCACTCCGGGCAGGTGTCTCTTTCGTGGCCTACGACGGCAC -CCTCTTAATTGATACTGCTGTCGTCATGCAGACAGTGGCCCAGGTTCCAAATCTTGGATA -TGATGGTATGACTGGTGATCAAAGAGGAGCTGTGATTTTTTTCGATTGCGAGATGGGCAA -AGCCTGGGCTGCTCGCTATCTTGCAGCGGTCCGTTGGTATACCCACTGCTCTCCTAGCGC -CATGGGATTAAGCACAATAGCAAGTGGCTTATATGCATATGCATCACAACGCCATGGGCC -TGGCTACTGGAAATACATCCGGCCACGGCTCATCCACACACAGGGTCACACGTCCAGCTT -TGACTCGCCCGTCTGTACCCCCAGCTTTTGGTACGATGGTGGACTTGAAGGGCGCCACCC -ACCACCACTCCATCCATGCAAACACTGTCCACAACATATCTATGATTCCCCCATAATTGG -GGCTATCAACGCCGTCTCATCAATCACAGAGTGTGAGACTTATGAGCAATGGCGCTCCCA -AATTGATATATCCGTTGCCAGCGTGGATAGTTGGCTGCGCGGCCTCGCAACTCGTGGTGA -GAGCGATGTCTTGCTTCGTGCCGCAGTAGGAGCAGCCCCAAGCTTCCTTAATCTATTGTC -TAGGACCCTTACTGCTGTACTGGAATTCGATTGATTGCCTTATGCAGGACTTACGGATTT -GATAGAGGTATATATAAGAAGAGAACTGTTGGGATTTTTGAATACTGCTCTTCGATCTGG -GAATGCAATAGTACCATTGTTTGTCGTATTGCCACTGCAAATGGGTCATGTGTACAGCTC -CAGTATTTGTGATATGGTACTTGAACTCCCAGGCATCTTCTCTTGCTGATGTGTTTGGAG -CGAAATTGGGAGAGCATCCAATGGTCCGATGCAAAGATTCCATTGCATAAGATACCAAGG -TTGTGTCATCCATGGATCTCAGAGCTTCTTTCCATGGACGTATCTTTCTTTTGTTCCGGA -TATGGAAGGCGATCTCGGGAAGGCCGAATGTGGGGCCATTCATCAACGTTGTCCACCTAG -ATATTCCAAGATTCTCTCGACAAGCGACCATCCTCATCCAATGAGATCCTTCTTACCTCA -GGCCCTTAACTCACATCGCCCCCCTTGTGATTATCGATATCACAGCCCAAGGTGGATAAC -AAATCTGATCAAGCGGGATCCGAAAGTCCCCTCGCTCAGCTTGGGCAACGGTGTTTCCTA -GATGAGGTTGGAGCCATATTCAAGGAATCGCGGATGACAAATTTTACGATATACAGCATA -AGAAGCGGGACGTGACAGTCCTGATATCTCACGAAGGCCCGCAGTATTTCCTAATTGGGC -GACTTGACGTTAAATTTGGGCTCTAAATTTAGCCGGCGAGGGGAGCCAAGCTCTAATACA -GAAAGCAGACAAAGAAACCGAATACTATCGACATGGTCTTTAGCGCCGAGTCAGGCACTT -ATGCATAACCTACCGATCCTCCCGTTGCTACACCTTTCACCATCCAACAAATCACCTCAC -TCCAGGACAGGATAGGCAATCTATTGCCGAGCCCTGCCTGACTGCCTAACCTGTTCAAGA -AAGAGACGACTGACACCAACTCACGTCATATTCCAAGCAAGAACCGCCCTGTAGAAATAA -TAAGGGTGGATTTGACCAATAGAGAAGAGAGTCTAGAATATTATGTCCCATGGCGGCGAG -GGATTGGTGGCCTTGTCGTAAACTATATGCTTGGCACACCCAGATCATGCATGAACTACT -CCTGTGACACAAGTCTATATTGCCTTGGGCAGCCAGATCAGCAAACAAATTGGATTGTCT -AGGTTCCAAGTGCCCCAGGTTCCGCACTTGCTGGAAGCCATGCATCACGCAACGGTCTAG -ACATTAAATTTGAGGTTGAGTTTATGCAGAGAGCCCTTTTGGGATGCCCTAGGTTGCATA -ACTGTGTTACGGTGCTGCAGTACTATACCGCCCATTATTATATTCAATATTCCCAATCGC -AGTGCTTTTGTGATACCCACATGGAGATGAGAAAGAAGGATGGAGCCGACAGTTCAAGAC -CATTACTTTTAGAAGGAACTCTAGGCTGAGTTTATAAAAAAAAAAAAAAAACCTTATAAT -AGAAGCTCCTAAGTCCGAATAATGAAAATAAGGTTAAACACCCAAAATTGGAGAAATATA -TAAGAATCAATAAAATGCTAAAATGCTAAAATTATAAAATTATAAAAGACAAAGATAAAA -TAATAATGGTAAAAACCCCGGTAGACCCAAGTACCGGGAAACGGAAATTACAGATAATGC -ATGGTTCCAATATGTATTCCCACCGATCTGGAGCAATTCCTCCGATGCTACTCCGTAGTG -TATACAAGTCAAAAGTGTATGCAAGAGGGATATCGTGACGGAGGACTCCGTACGGAGTAC -CCCTCTCTATTCTCTGGCTTTGAGAACTCAAACCCTCGCATTTTGGACCAGTCACTTTCC -CAGGGACGATCCTGGTTGCCTGCGAGATCCGATGATCTGCATTCCCTCATGCACGATCTC -GGATCTCTTAGGGTCGAATGGCATGGCAGGTATGGCGATCGGCTGAAATCtcattttcat -tttcatttttctcattttcccattcctttttttttttttttttttcttAATTGGCTTTGG -GATTTCCAAATAAGTTGAAAGTTCGGACTTTTCAAGATATTTATGAACGGGCGAATCAAA -GGAAACGCTCGGTAGATCCGAAGCTTCGAGGCTAGTTCCAATTCTGGAAATAAAATAAAA -ATTGCCAAATCTGAAAAGGATCTTCCCTTTGATGGGTGGATCAAATCTTAGCTTGAATGC -AATACCCAAAGGACATACCTTAGGGCCCGAGGTGACACTGCCGTGTTTAAGTAATTTATT -AAACTTTCAAGTCGATAGACTTGGGTGTCAGATGCAGACGGGTGATTCAAGATACTATAG -GATGATAGGGCTAAAATGGAAGCGAACGACAATTCCAATCCTCTTTTGATTAGTACATTG -TACTCTCGTATAGGATTTCACTAGGAATCCTTTACACATACAAAATTCGGAAATAGGGAT -TTCTTTGTTCCGATTTGATGGGCAGGCTGGCAATGGACTAGATCACCCTCAGCTTGCATT -GGTTTCGGCATCTCCAGACTCTTTCGCTCTTAAATTGTCAGAATGTGACTAGTTTCGAGA -GCGGGTCTATTAGTGCGATATGTGTCGTGTCAAGCTGAGAAACTACTCCAGAATATAGTG -CTCTAGATTACATATGGAGGCGATACGCGAGGCAAAGATCCTTAGCGCAAATCACGTTCC -GATCGGTGACATTTTTGAAATAAGCACCTAAAATGCGGACACACCTGAGAGATATCATAT -GCGAGCATGTTTCTCTACTCCAATCAAGTTCTCCCTAATTGACCAACTCAGTAACGTGCC -CAAGAAGGGGGAAAGCAAACAAGCCAAAGCGGGTTCTCATGATCCTCTGACGTTCGGATA -CAAAAAAAATCCCAGGAAAAAAAAAAGCAGACGCCACAAGCTCTGGAAGGAAGTGGAGAT -GGAAATTGAAGAACCCCTAGATCCCTGTAAACCAATACCAGCCCCTCTTTCTgatcgatc -aatcatcgatcgatcgaGCCGAGGCGTCATCTGTGCGCTTTCTCATTCGTTCAAATCGAC -GAACATTTCCGTCTACTTGACCATCTGGGTTAAGCTCCGGAACACGATCTTTTTGGAACC -AGTGACAACTCTCCTTGGAGGCGGCGATCGTTTCCTTCGGCTTGATGATTCCAACTTGGA -CTATTTGTTTTCCCCTTCTAAGAGCAGTGTCCGGGTTATATCTTTTGATCTTGGATTGAA -CACTCTAGCCTTATCCATGGATAATCCTTGATTTTGAATTCCTCAACTCCGCCGTACTCA -TTCAATCTCCCAGATCCCCAGAGAAAAAGCAATTAGAATGTGATTTGATTTAATATATTA -CAATTAGCTTTCGCCTGACTCAGGCACATCATGACAGAATCCTCCCCTCCTTACTCCGTA -CTCCTGTGGATGTTCGGACATACCCAACCTTTTCGATATTGCTTTTTTGTTGGAGAGACA -GAGGTACTCTCAAGTATAGCGTACGGGGTACCTCGTTCTCGGGTTCAACTTGAGAATCTC -TCTTTTTAGGATAATATCGACTTGCTGATAAACACTGGCCTGGGATTTTTCGATTTTGAC -GTCCACACTAGTGAACCACACTTAAtttcttttttctttttttccttttttctattttAA -ATAAAATCCCTTCCTCTCCGAAATCTCCAATTCCTCCAATGAGAACCTTGGAACACCTCA -AATACAATCTATAGTACTCCGTAGTTCATGAGAGATCCCAGAGATTCAACCTTAAGGGCC -CCTGTGCCCTTGACACGGCTGTACCCACCAGCCCACGATCGTCATTTTTCGGCCCCCCCT -GTTCTTGCTTTTCCATGGACATGTAACAGCCGGATATCTCtttttttttttaattttctt -ttattttcttttaattaatttacttagattttGGGGCGACTTTTCATTTTAGTATAATGT -ACGGGGCCCTGCATTATTTGGGACCGCATTTCCATCTTTGCCCTCATTCCCCAACCTTGC -ATATAATCTTGGCCTTTCTCTCTTTTTTTTTTTATTCTACCATGTCAACGTAAAATCAGC -CTGCTTTCCCTGGTGAGTGCCTGTCCCTTTTTTACACTTTTTGTTACAAACTTTCCCGAT -ATTCGATTCTGACCGTGTCGAGTATAGTTCTCTGTAAGACATTGTGAAACTCTTTGCTAC -AGACATTTCTATCCGTGTCCCCAAATACCTTGTCTATGTCACAGTCCCACGAGGTAGACG -AAGATACCGATATGGCTATTTCGTATACCCAATCGTCAACCGGACACCCTCCCCAAACCC -TCCCCTCATTCCGCGAGGTAAGATACCTTACTGACCTGGTGAACTGCCTGAAAGGAAATA -TCTAACGTCTCGCAGCTCCTACCTCCCCACCTCCATGATGAAATTGAATCAACCTCCCCA -TTCTACAACACAACCCGCCGCTCACAAGACCGCCCACCCTCGGGTCACGACATGGCCGAC -CCCCGCTCAATCCCATATGGCAACCAATCCAGCAACATGGCCTACGATTCCCATCGCGAA -TACACAGTCTCTCGAGGCGCAGAACAACAACCCCGCATGCCAGATCCCAGCCACCACCAC -ATGCATCTATCCGAGAACGCATCCCGCGGTCCCAGCCCCATTCTCCCTCCAATCCGCGAT -CTGGACACCATGCCCGGCCGCGCAATGAACTCTGCCAGCAAAGGGTATTCTGAGCGGGCA -CCACGCTCCGACCCCTTCGTCGCGCAGGAATATCGCCAGCCCGGACCGGCAGCAATGTCC -ACTGATCCGCATCCCCGCGGTGAGCACTTCGCAGCGCCTATCATGCACCCCCAGTCACCG -TATGGGCACCCGCCTATCGGGTATGCGGATGAGCAAATGTCGCCCCAGATGATGGGACAT -GGTCAGGGGAATTTCGGGATAATGGGCGATCAGATCGATCCCAAGACAAAGCGGAGACGG -GGGAATTTGCCCAAGCCCGTGACGGATATCTTGCGGGCGTGGTTTCATGAGCACTTGGAC -CACCCTTATCCCAGTGAGGAGGATAAGCAAATGTTTATGACGAGGACCGGGCTCTCGATC -AACCAGGTGAGCATCTTGACCGCCGATTGAGTTGCTTGCATGGCTGGATTGCTCACACTT -CTATAGATTAGCAACTGGTTTATCAATGCTCGGAGACGACAACTTCCTGCGCTGCGCAAT -CAAATGCGAAGTGGTGCGGATGCGGAGTCTCAACGACAGTCACCCTTTAGTGATGCTGAT -GGATCGGAGCATCTGCCCTCTCCTCATCATTAGAGTGACGACTCACCTGGACCTAGAGGT -TCGGGTCTAATAATGGCCTTTTTTTTTTCCTAATTTTGTTCATTTTTGAACGTTACATGG -CATTCGCAACGTCAGCGTTTTATTCACTTCACGACTGGGAGGCTATATATTGTTTCAACC -TACCCTTTACGAATCACGGGGTTGAGGATACGGCGAGCTCGATAGACTGGCATCttttct -ttttttttttcccttcttttctttctctttctctcttttttttCACCCTCCAGTTCTCTG -GACTTCACCTTACGTGTGTCGCATCGCCGTCCGGTACGGATACTTTGGTCAGACATTACA -TTTTGATACCCTTTCTCGTTGGTCGCGTTTACCAAGCGGAAACTTATTTTGTTGGGAAAT -CAACAGGCAAAAGGGGTACGCGACAACTTCTGGCATTGGAATCCCACCAAATCATTTCAG -TGATTGATTCTACTTTGTGTTTTTCTTTTTTTTTATCCATGCTGCACCACATTTTCATCT -GCATTGAGTGCTCCTGGACCAACCGCGAAAATTGCATTATTAGGTGTTTAGAGGAATCTT -CGGCTTTCGAATGTGCACACATTTATTTATTTTTTCCAGCTTCGTTCTCTGCTTCCGATA -AGTTTTTATCCTACAAGCATGATTCGCATATCAAATGTCCTAAGATGAAAATCCGGAATC -GGCAGATCTCAGCTGCCCACCGCGCAACCCGTCTCCACTACGTCTCCATCGACACTAACC -CATGTTGTATGTTCGCATCCCGCGGGAACGTGGACCTGGCCTTACCAATGGCTCGCAGCA -CTGTGTACCTTTCCCCCTCTTTTCCGCGATAATGACCTCTTGCGAGTAGTCACGCATCAT -ATCGGAAGGATGTGGACTCATACTGTCGTATATTTGTCTCTGTCTCAACCATCAAGAACC -TGAAAAATACCATTGAAAAAGAGTGGTTTTCGGGCCGGAGGAAGGAGACAACCAAGGTTT -GGACTGTCACTTAATTCCAAATTGATAGACCCGTAAACTAGTGGATAATTCTATACTTCA -TATGTCAAGATCGACCCCAATTTTAACTTTTATCATTTTCATGACGTCGATTTCCCAAAC -ATCGATCTCAATCCTACCACGGTCTTTGTTCGTTTTAACCTTGGAGTATGGTCCGGTTGG -AATCTGCTTTGCTCTTTTATCAGCATTACATTGTGAATCTTATCTTTCCAATGACAGTCG -TGGAATCCGAGCAAGTACTGATCTGCACTATCTTCCGAAGTTTCAGATTCCCGCGAACTG -ACAGTTGCATCGAGTCCGACTTAAACTCCAACTCTTCGGCCTCTCCGTTGGAGTACCGTG -GATTTTCCACAAATCTAAAAAAACCCACAAATCCACACGATCCACCTCAGACCATGGAAT -CCTTCGCCTTTACGATTTACTAGTGTATCACGTGAGTTCCAGATTCAATCTGATCTCCAG -TGTTTTTTTTGGCTCTTTTGCCATTATTCTAACCAGACTGAATTTCGATGTGGAGTTGAC -CATATTAATCCAACACTTCCAACGATTGAGCCATATATGGCTTATTATGGAACCTTAGTA -TCTACCGCTTGGGGGAGTCTCTTTGTTGCTTCTCAGCTCAAATCCTATTCTGATTCCGCA -ACAGATCCGAATCGAGTGCGCGTGGAAAGACAGGTGGAGGTTCCTTTGAATCACATCACA -TTACTGAAATATTGTACCTGCGAGTACATCTGTTGGAGAGGTTCTATTCGTACTCTTCTT -GTTTATGCTCTGTGATTTTCCCGAGATATCCCCTGAAGACACAATTATGGGGTTCTGACA -CGGCCCTAAAATCTTTGACTGCAGATCTTAAGCTTCGGTCATCTTGCCGAGTTGAATTGA -ATCATTTGAACCGGGTGGGGACAATATACCATGTAGGTTCTATACCATACCACTTATGGT -ACATGGCTTCAAATTTGAGACCTCAGTGGGGGGACTGCAACCCACTTGGTTGAACTCATA -TTGTAAAAGTGATATCCAACGCACATCCAAGGATTCGAATAAAGAAAAAGAAAAAGAAAA -AACAGGGTTAGGGTTGCAGTTACATCTATCTACTCTGTAATTCTACCGCGAAGAAGTAAA -TATGCATATAGAATGCACGACGTAGCTCGACAGATTAAGAAGCAAAACCCCCCAAAAAAA -AAGTAAGCACTTCATGGCCATACCCTCAATATACAAGGTACAGAATCTGTAACAGGGGGT -AATCCCGTACCCAAGTCCACATAGGGGGGAGCGGATATGCAAGCCACCAGCACAAACCCC -GAACATCCAGCATCTCCGGCGCCCACGACTGGCCGATCTAAAAAGCAAAGATGCACCCAA -CAAAAAGTCGTGCAGCGTACATACAACGTCCCAGATTTTGGCCGAGCTTGACCGAATCAC -AGTGCATGGAAACCTTCACCCCCTTCCTTACCGATGTACCTTCGCAACCCTGCCTCTTGT -CACTGAACTAATCCAAGACAAAGACCCAACCAAACCCTGGTTCTGCGCCTTACGATGGAG -TCCCGATCCGTGTCTTGTCCACCCCCCACCTCCCCATCCGGTGTCCGGTATCCCAGATAC -GTGCATCCGACCCATCATATAGCGGTGAAATCGGCATCTGACACTTCGTTGTTGTGTACC -AAATGCACATCTTGCGGTATGTTGGGTAAGTACTGTGCTTTTGTCCCTCCGTGCGTCGTA -CATATTATGTTGGGAGCTACGTGTATATGCGTGCGTTTTCCCTTCCCGCGTTGGTATGAC -ATGAGCGTTTAGGCCTCGGTCTGGGACGTGCGTGTCGATCACCAAAACGTCTCTGCTTGG -TGAGGTGGGAAATGTGGGCTCCCTGTCTTTATCGGAGGTGTCAACTGCGGGATTTGTGAG -CTGTCTCATGTCAGGATTTGGGTTTTGCTGATCGCGTGTAGATCGATAGCTAATTCATCT -ACATTGTAGTCTCACCGTAAACTCCGGAGGGCAACGGAACACTTGCATATGCTGTGCTCC -GTACAACGTATGTTGTATGTAGTCTGCACATATTTTGAATAAATGGATCGGAGCACGGAG -TACGCGGAGCTGTATAATGTATACGATATGATCTCCACATTTCAGATTTCGGTCGCGTTG -AAGTGGGGATTGAAAATAGAAACACCTCTCGTCGACCTTGGATTTCATATGATATTATTC -GTTACGGCGATTTATTTTTGGATGTTGCTCGCTAGAAGTCGCCTAGCACGATCCCGGTTT -GGTCCCTGTCCTCCCACAAACTATTGAATCAATCTATATTAATTCCAGATGAATGATCGG -CAGACCCACCGTTCGACGCCTATTATGTATGAAAATCAATAGCGTCGCTTAGCCTGACCG -CGGTTGCATTTTTGACATTTGCGCTCTCCCTTTTTCAGGGATATTAGCACGGGGGTTTGT -TTATTTGGGGACTAGCGTGATACTTTGTAGACGAGATAGCCGCTTATACTCCTTTTTCAC -TATTGCACACTACATGTGTCAAGGCTGCCTTGTCAATGCTGGGCGTAAGGTGTGTTTACG -GGCTGTCGGTGTGCTTGTGGATTGATTCATGGTGTTTGGGGGATATGGTGTACCTATCGA -AAAACAGGGTTGACGCTGGTTATTCGTTTGTAATCAGGGATGGCATGACTCGCCATGTCC -CAAAATTCATTTCATTATTGACATGTCTAGTCATGTAGATAAACTTGGCGGTTATGTTGC -ATGTGCACGCCACTGCCAACCATCTATATGTTAATTGGCCAATCCTAGGCAGCAATATGC -AGCTATGTACATCAGATTGTTCCCAAAATGATATATATATATGATGTCAAGTCAATATCT -GAGTCACGGTCTTAAATTGGCCACCACAGTTATGCAAGGATGGAAGCAAACACGGACGTG -CATCAGCCAGTCTTTGTGTACAGAATGTCTATGACTCAGGTTCACCGAGTGTTGCAAGTT -ACAAGTAGGATGTCACGAGTCAGAAAAAATGAGAATAAATGAGATGTCAGGCAGATCTGT -CCGTCGATGCATATCGCTTCTCGGGTATCTAGGAGCTGGCACGTCTCAATCGGGCACGCT -AATGCATACTCGATGTGATTAGCGTCAGGATACCCTGGTATGTAAGCATGACATTTGACC -TAACTCCTAGTAGATGGGTCTTTTCTAGTGGTTTGCCTATCTAGGTAATTCAGTCCGTGG -GCGCATTACCTAGTATGAGTAACATCCGACCGTTAAATAGACTATTTGCTCGTGCAAGGG -TTGCTAGTATGGTTAGTGCTGAGACATACCTTAAGCTTCTCATTGCGCTTCGTAGTGTAA -CCGTGATATATACTTTTCTTTGGTTTTGTATTAGCTATGACGGTGTCAATATCTAAGATT -ATTGGGAATCAAATATTTATTTAATTGGGTTATTGGCCTAGGCTACGCGCCGGCTATCGA -GTATGTGGAAATTTATTGCTTAGAAAAACCGGCCATGAGAATTAAACCTGTCAAATGCCC -TCCTCTCGGGCTATTTACCAGACTGCTTCCTTAACCTATGTAATCCGAGGTCACCATACC -TTAATTTTTCTCTCTAAGTATGCCCGTTTATCTAGGAATGCTGCAGAATGCTTGAACTTT -TCCCCCGCTCTAGGGCTAAATTTTTCTTTCACAGCTGCATGTTGAACAAATAACCTCAAG -CACCCCGGTATACGACCATAACGCTATGCGACAAGTAAAAAGGCTGTATATGCCCATCTG -CTGGCTTTATGTGTTTTGCGGCTAGTGTACAGTTGTGTTTCCTGGCCAAGTTTACTGGTT -CGCTGCACTCATGTGTGATTATTCCGATCTTTCGTTCTTTTCCCACAGTGTCTGATTTCT -GCATTGGGACTACGGCCCCCCTGCTCTGCTTATTTGCGAGACCCTTTCCTCGTAGCAATT -ACCTCCAACTATTCGAATATGTAACGAGGCACGTCTCCCATATGTAGCACTTATGCAGCC -TTGTATGCTTGCTCCCGGCGACAACTTTCTCGCCTCGATACTATGACCATACTCATGTCT -ATCTGATTTCTCATTAGTATGCTCGGACCCAGCATTGTTCCAAACATTGTGCAGTCACTC -ACCTTGCTCGCTCTCACCGTCCCGATAAAATATATAACCGGATCAGGAGCCGATCCAGTG -GATTTCAAAAGCTCAGAAAACGCCCATCCAGGGAAGCAGCAACATCACAGAAAGTAAATT -TACCCAATTACACGCAAAATATCAACCCCACAAACCTGAAGTTGCCAGATTATGCCTGCG -AGAGCTCCCAAACCCAGGGTCATGTGCAACACAAGCGGGGTTAACCGAATGTAAGCGTGT -GTACGCGGATCCCACAGGGCCTTCGCTTATAGTACATATTTAGGTTGACCCATACAACAA -TAGCACGGCCGGCTGGCCCAATGGCAAGGCGCTTGACTACGAATCAAGAGATTGCAGGTT -CGATCCCTGCGTCGGTCACTATCTTTTTTTTATATATATTTTTTCTTTGCCGTGAATTGT -TAGAAACAACGTGTCCAGCAGAATGGTGCAGAAGTGCAAGATCTGGCGATGTTGTCGATT -CCCCCCCCTCGAGTCTGTCATACATACCACAATGCCAGAGAGAACCCAAGAATGAAACCA -AAAAATGCGAGCAAGAAGACGAAAGAAGAGAAAGAGAGAAAGACATGATATGATGCTCTT -GGTCTAATGGAAACCTACGTGAGCAGCAGGACAGTACAATGCTGGGGCGGCCTCAGAAAT -AGGACAAATTGTAGGCGAAGTATGTCTAGCAGTATGGCACGGAAGCATTAGGCGACCAGT -CTGACCATGTCCAGTGCGCACAAATGCCAAGGGCGGATGACGAGTATTCTCAACATTATT -CCTCATCTGCAATATTCACGTGTAGACTGCACGTAGACATAAACTCAGCAGTCTCCTAAG -CGCTCAAGGGACGCAATTCAAGATGAAAGCATGCAGCTTCAGCACCCACCTATTCTAGTA -CATATTCAGAAAGTTAATTTCCCAATGGCTTATCCTACGGCAAGCTTGGAAACGCCACAG -GACTTTAAGAGCTAGATAAAACGTTCCGGCGACACTGCATTGATAATGAACACCGGGGTT -TCGGGGTCTTGGGAACCCCCGAGGTCCTTTTGTCGACGCTAACGAATAAATCCCGAACCA -GAGATTGTGTCTGAGCTTAAAGCTTATGTACAATACAGACGAGGCATATACGTTGGTCCT -GACCAAACTCAAACATCCGGCTCTATAGCATGATTTTGGATAGGGAGTAGATCAATCCAT -AATGCTATGACATGTGCCCGATCTAGACACGGGCCGAGAGATTTAAGATGTAGTCTCGAT -CCATCGCAAACAAGGCAGGGATTTCTTACTGTCAACATGATCCTTATCTCTGGGAAGTCG -TGTCTGCCTTGCTGGGCACCAAGTCGAAAGTAGATTGTCAGCCCGAAAACCTGTGCGTTT -TTAGATACACGTGCGGCTTCCTGGCGGTTGTTGATTGGAGAAAGCATGGTCAGATGGCCC -TGCAGGGAAGATGCAGAGAAGCCAGAAGGAAGGGGAAAACTTCCGACGAGCCAATCAAGT -TGACAGTGCTTTTGATCTCCCCACGCCGATTGATGTGTAAGGGTGGCGGTTTGTGGCTTC -CTGTCCGTAGCTTCCGTTTCTTGATGGAAATATACTTTTCTCAGTATCATTCTGCGAGAC -ATGGAATGATCGGCGCGTAGTTTCTATGCGAAAGATAATGAAAATGTGACAAATGAAGTT -TATATTGGCGTCGGCACTTTATCTCCCCAATATACATCCAACAGACAAACATCCAACATA -CACATCCACGTTCACATCCTTGTAGAGCGCATCGCCGAGTCTCTGGGCGTAGCTTGATCG -GCCGGTTCCGCGACGCCCAAAATCAAAAATTTACGCAATCATAAACTTAAGCGATACGGC -TCGCATAATGCAGCTCAATCTGGGGTCCCGATACTTTAGGCCGAGGTGAAGGTCGGCTGG -TTCGTCACCTGTAGACCAGCCCAGCCCGTCGATTACCCCAGAATCCAATGGGCCAAGCAA -CCTCGAATAATCAGGATCGTATATTAAATTTTAAACTGACCGCTTTTTTATCCGAGCTTC -CCGCGTTCGAGGTCCAAAGCCAAAAAAAAAGAACACTAAGACCCGGAAGGGCAGGCAGTA -AGCCCTCTGCATAAAACTTATACTGACCAAGTGGATCAAGGGAAAAAGATACCTCGGGGA -TCTTTTCGGGTGACTACTCTATATTCTTCCGCACTGCCACTCAGCATGGCGCGATTAGCA -ATTTGGGGAGGGTTGCATCCCCATTTCGATTGACCCGTTCAAGCTTGACATGGCCCCGTC -TGGGATCAGCGCTGTCAACGTTGAAGAATTTTCTAGACCTTTCCTGACCTTTCCTGACCT -TGACTGACCTTGAGTGACCTTGATTTTTGCTTTCAGCTAAGGTTCAATATTGATTAAAAT -GGGGAGACGGAAAAGGAAACGGCTGGAAAGTGCGATATCCCTAAGGATAAGACTAAATAG -AGCGCTAAGGGTATTAACCGGTGAGATACCCGTAGTTAACCGGGTCTAGAAAAGTTACAA -GGTCACATTGGGACTCTCTTCTGATAGGCATTTCTTTTGAGACATTGCACATGTTAGACA -CTATGAGCCAATGATTGCCGGGCTTGTTGTCCCTCTCCACTTGGGCCCTGATCCTCCCAC -TAAAATGATCAATTCAACAAGGATGATGGTCAAATCATATGAGGCCTCTAATCCTTCCGC -TAGAACCCTGGTCTAAATCTAACTGAGGTCGGATACCTCCGAGCAAGACCCCCCCCCAAA -GAAAAAAAAACAAAGGGGTTCAAATTAGATAGGAAAGGATCTCATAATCTCCGAAATCAC -AAATGAAGTCCAAGAATCAATACGCCGTACGGATGAGAAAAAAAAAAAAAAAGAGAATGT -ATCAAGTATAGAACGTGCCATTCAAAGCCCCTTAAACTCCCCTACGGTTCTCGTCGGGGA -AATTGGGACAGGGATGTAGTCAATGCGACATTATCAACCCCTCTTGAAGTCCTGCGACAA -TCGCTACCTTTCTAGAAGGCAGGAAATTTTTCAGATTGGTCGATGCACATTCGATTACCC -CAGGTCCGCTATGGGCTATAACGTACCTGCGCGGAGCTCGATATCGTCAGTAATCAGTCT -CTTGTCGCAATTGGTTCAACACTCCCACTCAGTCTAGTGACCCGCTATTATAAGGCTTGG -ATCTCCCTGATAAACAATGATCTTCAGGATTTTGATTTTGGGCTTCTGTTAAGGGTCCAA -TTGTTGATCATTGACTCTTTCCTTCTTCTCATCTTTTCTCCCTATCAGGTATAGGCGATC -TCCCTAAACAGTATACCTACACAATACCTATACCCCCAGTAACCTGTCCGTTCAATGCAA -TGACGAGAACCGGCCCTACAAATAATCCTATCGCGTGGGAAACCAAGACCATTGTGCCAG -ACGATGGGGGCAGAATTGACTCGGTCGCATGTCAAGATACGAGGCCCAAGGGGCGCATTC -GACGCTCAATGACTGCTTGTAATACTTGTCGCAAGCTCAAGACTCGCTGCGATGTGGATC -CCCGGGGTCATGCTTGTCGTCGCTGTTTGTCACTGAGGTTTGTTGGGCCCTTTTGATTCA -TGCTGTGCCTGACAGATACCCCGGGttttgctttttttttttgcttttttttgctttttc -tttCCCATTGCTTGCTTTTTTGAGAGTCCCCAGGTTGTCAATATGCTGACTGTCGTAAAA -GGCTTGAATGTGAGCTCCCCGAGACAACAGAGCGATTCCAGGACAATGCATCAACCTGGT -CTGATGCCACCGCTATACCGTCGATCGAGGAGCGGCTGGTCTCTCTCGAAAGAGGCATGG -GAGAGATGATCCATCTCATGCGGCAGATGGTGAACCGCTCCCCCAGCATGTCCTGCAGCC -CAACCTCTCAGGCTAGAAGCAACAGCATAGATGGAACATCCTTAAGCGATAGCATGTCTT -CATCTTTCTATCCGCTCAAGCCGGCACAACTAATTCGAGATCTGCAAGCCGAATGTTTCG -GCGAGAGAGATCACTTCTCCGATGCTGACATCCTTGGAGACATCGTCACCCAGGGCATTG -TAGATTCTAAACTTTCCATGAAATTGATTGAACTGTGTGTCTAAATGTGTGTCTGTCACC -GGACCTCCGTGCTAATGATCTCGACAGTTTCGTCGAATATTTTGGCCATTGGGTCTCAAT -AAATCATTCGTCCAGCATCCAACGGTCGAATACACTCCTTTTCAACACTGCATGTCTTCT -GGCTTCGCGATATCTGCCCGGCCTACCACAACACACTGTCCGCGACATTTCACTTCATGT -ACAACATGCCGTGGCAAAGGTGCTGTGGAAGCCCCCGCCGATGACGAGTGATATGCTCCA -GGCGTTGACCTTGCTTTGTCTATACTCTACTTCTATTCACAAAGAAGGACTGATGGATGA -CTGGTTGCTGAGCGGGATCTCGATCAACCATGCCCTCATCACATTTAACTTCCTCAATAC -CGTGCCTGGAGACGGGTTAAATCCGGACGAGCTATTTGCTCAGTTACGTTTGTGGAATAC -ACTCTGTGTAACCCAGCTACAGTACGTTCTATCCCACAGAGCTATTGAATATACCATCTA -ACATGAAACTCATAGCTCCGCCCTCGCAAACGGCCGCACCGTCAACATCCAGCAACAATA -CATCGATCAATGTCCCCGCATTTTAGAACACGCAGGCGCTACAGCAGAGGACGGAAGAAT -TGTGGCAGAAATCCAATTATACCGCATCGCCCTCCGACTCCAACATAGCCAGCACCGCCT -CCAATTCGCAGAACCCGAATACGAAGAATTGGAGCGCTGGAAAATGGAATGGGCCCATCT -CCTAAGTACATCAACCCTCTCACTAAATGCCCAACCCCAATCTAACATCCACCCCCCAAT -AGCCACCAGCGAAGAATCAACCCTCAACCTCAACCTCTGGTTCTGCCAACTCCTCCTCCA -CCGAACAGCTGCCCGCCTCCAACCAGATAGCGAGCGTCTGGTCCCAGAAATATGCGGTAC -CGCCCGCCTAATAATAACCCAATTCCTCCAAACTCGCTTTACTTCCGCACCCGCTCTAAT -CGACCACGTCTACTTCATCGTTGGCTACGCCGCACTCACGCTATGCGACTACACACTCTC -TGACCCACTAATCAGCCAAGTCCGCGGCTTCCTACTCCACCTCGCCCCAGGGGGCGACAA -CCTCTCGTACCGGATCGCGTGTATTGTCGGCGAAGTACAGCGGCGGTACTCGGAGGCTAC -TGCTGTTGTAGCTAACGCCTCGCATTCGTCGTCGCCAGTTGCAGAGGTCAAGGGCGTGCA -GATGTTCAGTCCATCGCACCATCGTCCCGGTATGGACCTCTCCCAACTTATGTCTGGAGC -GGAGGGATTGGATTCCCTTGTTGAGGGGTATAATTGCCTTGAGCAAATGATGCCTGGGTA -TACGGCGTCGCAGCCTGGGTTTGAGGCGCCGGATTTGTTTCAACACTCTCCTGGGACGGG -TGTTACGGGTGGAGCTATGCCTGTTGGCCTGGTGCCGCGGGCTTTGCATGATTGGTGATG -GGATTTTGGGGTGAGGTTGGCTCTTTGGTTATAGATCATGGGAGTTGGGGATGCGGGAAC -ATGCTGAGTTGATAATGAGGTTGGATTTGTTGGGGCTCGAGCATTATGTTGTGTTCTTTT -TTTCTGGCGCCGGATCGCAGTAGTAAAAAAGCACAAATGGAAATAGTCGAGACTTGTTCT -GAATTGGGCTTCTTTGTTTCGTCTTTCGATCCCGCATGGCGTAAATAGTTCATCTTTTTG -CATCTCTCCTAGTATATTCTTTCTTCAGCCTTCAGCAGGTTCTTATGTTTGTAATGGGAT -GAGCCTACAAGGTGCAAGGCAGATAGAGGGAAAGAGAGTTGTATTGGTAAAGCAGACCAG -CTTCCGGACTATGTACTAGTTATTTAATTGCCAGTCATCCTGGGTTAGGTATATGATCTG -ATCTGCCAGGCGACTACACTCTCAACACAATCTCAAGACCAACTCGATCGAACATGATAT -GGTATTTTTAGATATTGATCAATTGATGATTCCTTCGAAAAGAACTCCAAAAGCACCAGT -GAGCTTATGTATACACCTTTTTCCATTGCCAACAAAGCCAAAGGAAGAATATCTATCTAA -AAAAATCCCCCGAAAGAGTAAAGGGGTAGTCGTGACCTCGATGACCTCAACCGCAAACAC -CCCGAACCCGAATTCTATCACGAACACAGACTTGAGCCCTCAAGCAAAGAGGTAGGGAGC -AAAATTCTTGGCTCTCCGCCCGCCATAGGAACAGGAACCTCCCCACCCTCAGCAACAGTA -ACCCTGGCGCAGGATACAACCTCCGTCAACTTATCACCAGCCCTGAATCCAGTATTAGGA -ATGGAAAGCGAATACTCTTTACCTCCCGCACCGAGATTGGTCAAGACGGTGATTGCCTGT -CCGCCATCATAGCCCTTCCGCATGGCGATGGTGCTCTCGTCTTTGTAGATTGGGTAGTTC -TGTGAAGTATCAATTTTTTCCGTTCTTCTTTTCCTCCTTTCTTTAAGATAGAAAGAGACA -AGCCATACCTGGTAGGTAGTATAATTGGTACCCTTGGCAATAGCCACATTTCTAGCGCCG -TTGGCCTTCGCAATGAGCTTGTACAGTTCGCTATCCGTGTCGAATCCAGACAGCCACATA -GCCTCGCGATTAGCTGGATCCTCGCCACCGCTATAGTGCTGCTCCTGTCCTGCGTAGACA -ATAGGAATGCCGTCCGTCATGATAGTGAACGTGGCTGCATTCTTGGCGAGGGCAATATCG -TCGGTGACGCTAAAACCCCGAGGGATAGTTATTAGTTTCTAGACCTATAGAGTGCAGTGG -TTACTTGTTAGATTGATGCAAGTTCCACTTACGAGGCAAAACGTGGGTTATCGTGGTTTT -CCAAGAAATTACCAAGAAGCGTTGAATCCTTACAGCTCGATTTCACCGTGTTGATGGCGT -CGTATAGGGCAGCCATGTTGCCGGAGGTCGATTGAAACGCGTTGAGGAGTGGATAGTAAC -TGTTTTCACAAATTAGCCTTGTCTCTCCCTGCGTATCGGGTAAAATCTATGGGCAAGCGG -AATCTCACATTGGGTAGTTGAGCACTCCGTCCAATACCTCCTGGTATGGACAGGTATAAG -CAACGTCGCCGTCAAAGACCTCGCCCACACAGTAGACGCCGGCAGCTTTGTTGTAACCGG -GCCAGAAGTCTTTTTGGACATGTTTGACAGTATCGATACGAAGACCGTCAACTAGATATT -GGTCAGTATTTGAATATGGCAACATTGTTTTGCTACAGCAGGAGACCAAGGGACAAAAAC -CTACTCGAGAAGTCAGTAACAAACTTCTCAATCCAGTCATACCAAATCTTCTGCACCTCC -GCGCTATCTGTATCCAGATCCGGTAGCGACACGGAATTATCGCCAAGCCAGCAATTTTGC -GCCATGGTGTCGTTGCTGTAATCGGTGATTGGGCAGTAAGAGTGGAAATACTTCGCATTA -TTGAAGGGGTTAAATTTGGAGTAGTCTACGTCAGTTCCTGCACCGGCATAGCCCTTTTTG -TTCGATGTATGTCAGTCTCAGCCCGAACGGATTGCTCAGCCGGATGATTGGACTCACCAT -GTGATTTGCGACTACATCGACCATGAGATACATGTCGCGTTCATGCAAGGCCGCAGCGAG -AGCTTTAAGATCATCTGCAGTTCCGTAGTTCGGGTTCAGAGCCTCGCTGAGATTCGGTCA -GTAAGGATGTATGGCTGGAGGGTGTTTCTTTTCTGCTTTGCGATACCTACATGTCCTGTT -GCCAATATCCGTGATATGCATATCCATACTTGGTATCCTCGGTCAGTTGACCGGTCACGG -GAGTGATCCAAATGGCTGTGAAGCCCATTCCTTGAATGTAGTCCAGCTAAGAGATTGTTA -GTTGTATGAGATAACAAGGGTTGGGTCGTTACTGACTTTGTCTAAGATTCCATGCCATGT -TCCACCACAGTATCTCTATTAACATAGTTAACTCTAGCCTAATACAACAGACTCAATACA -TAGATGGAAACTTACTCTATCACTGGTATTACAAGTAGCAGTAGTCGATCCATCAGTTCG -GGCAAAGCGATCAGTGAGCATGAAGTAGATCGACTGCGAGCGCCATTCAGCTGGAGTTGC -AGCAATAGCAGTGCCAGCTAAGCTGGCAAGCCAAGCAAATTGCGCCAGAATCATCTCGTA -GATGTTTCAGCACCAGTGAGCGAAAGAGAGAAATTTGCCAACGGGATGATCAACAACAAA -AGCGCTGCAAGGGACTTGGTACAAACATCCCAGCAAACGCAATGCACACAACGGGTACTC -GGCAGACAAACAGAGATACTAGAGACCAATAGCTGTTGAAATCGGACCCCGTAATGAAGT -ATGGGCTTAAGCCAATAAACGCCAAAAGCCCAGATGAAGATCTTCACGGATGCAACTGTG -TAAATGAAAAGATAACCCCTCTAAATGCCTGTCTCCAGCCGGAGCAGATTCCACGACGAG -ATAAGAATACACCGAAAGGGGATCAAATGGCCAACTAATATATTAACCGAACAGAAGCTA -ACGCAACCGAAAATATGATCCTTCACATCTAATTTGTCAGATCCCGATTTCCCCAAAGCT -GAGCAAAAGCAAGCAATTACGACTATAGATGCATAACCTTGAAATGATTCGCTCAGCCAG -GACAAATCCTGACATGTGTTCAATGATATGATCCTTGCCAAAGCCCCTGACACAGGGACT -GGTCGAATCAGCATGCGCGGGGAGTTGCAGGAGTCGCACGCTGAACCTGCAGGAATGATG -AGGGGGTTGCGATGGGCTATTGGCTCGTTTTAGATGTCGATCCTTTTGTACAAAGTAAAT -TCCGGCTTCAAGTCGGAGAAGTGCTAAGAGTCTTGGTAGATGCAGCTAGAATAACTTAGG -ATATGTACCCCGTATGTTGTGGGATGGACTACCTATAGGGCTAGATATGATGACGTGTGT -GCCATGAGACCCAATATCTACAACATAGGCAGCCAATATTTGGCTTCATAATTCGGCCCG -ACATGGAGAAAGTCTGTGCATTGTTGAGTCGGCAATTTATGAATGCGACCGGAGGATCGT -GCGGGCTTTTCTCTACTCAGCTGAGCTATACGAGGTCACCGGGACTTTAACGATAGGAGC -TCGGGCTGATATATGACAAGGAAGACAAAAACAAGATAATCAGGAGATGTCGTGAAAGAA -GAGAGAAAGTATATGAAAATTAGGAGTTCATTGTAGGCGTTAATCAATAGAGGCACCTAG -GTGCTATTGGGATAAACCAGAGAGAAAACAGCAGACTCCTATAGCATTCCACAATCTACC -ACTTCAAAGTCCAGTTCTCACTGAAAGCACCCTTCTTAGTCAACTCCTGCAAACCACCAA -CAGACAGCACATGCGAAGTAGCATTATAATGCACCCCAGAAGCCGGCACAGGTTTACCAT -TAAACCTAATAGCATTAGGCTTCTTAGACACACCAAGAACAGTGACAGTAGCAAGAGGAT -TTGACTCCTCCCAGTCACCCTTTGCCCTAGCGCTCAAGCTAGGACCCTTGACTCTGAAAG -AAACATCAAGAGTTTCATCCGGAGCATTGCTCTCTCCGTCATCGAGATGCAACTGACCTA -ATGCAGCCCCGCTGCCACCGAGCGCGACGAGTAAGGACCAGGGTGTACCACGTGCCTCCT -TTGTAGTCAAAGCAGGCTCCTGCATTGGCAAGACGCTGCCTCCACGCACAAAAACGGGGA -TATGACCGAGCGGGGCAGGGATGGTGGTGTTAACACCGGGCTTTGCGTCCACGGCTGTCT -GGGTGTACCAGTCGTACCATACCTCGCCGTGCTTGAGGCCTGGGAATATGCCCTTGACAG -AGGTGGCTTGGGGGGCGAGCACGGGTACGACCATGATTGAAGGTCCGAGGAGGAACTGGG -TATCAACTGCGGCGAGGGAGGGGTCTGTGGGGAATTCCCAAGCCAGGGCGCGCATGACGG -TGGAGCCCGTTGTGTGAGCTTCGTGGAAGAGGGTGTAGAAGTAAGGGAGGATTGCGTAGC -GGATCTTCATGGCGGATTTGGTGGCGGCTGCGACGGATTCCCAGACGTAGGGCTCTTGGG -GAATTGCAGAGAGGGTGTTGTGGTTGCGGTAGAAGGGGAAGAACGCTGAGAGTTGCATCC -AACGGTTGCAGAGTTCTTCATCGGTGTTACCGTTGAAGCCGCAGGTGTCAACGCCGAACA -TGGGGATTCCGAAGAGGGAGAATGAGAGGGCTTGGGGGATGGAGAAGAACATGAATGCCC -ACCTGGAGGCGTTGTCACCACCCCAGTGGCCAGCCCATTTACCGGAGCCTGCGAATGTCG -AGCGGCCGATGATGAAGGGACGTTTCTTTTCATCGACCTTGAGCAGACCATGGTAAGTGG -CGTTGAGGAGTTGGTGACCGAACAGGTTGTGCACATCGTATTCTTGGACACCATCGGAGT -GAGTGGCATTCGGGGACACGGCGTGCGAGCTGAGGTCATGGCCACCCTGGTCATGGTTGA -TCACGTATGGTGGGTAGTTCACGTTGCGTACTCCCGCAGTGGGTGTAGTCCGCAGATAAG -AGGTAGACGAAGAGGCAGAGCCACCAGTTGCGGCGGCAGCAGCCTGGCTGGAAGACGCCG -CAGACGCAGATGCAGCCTCGGTCTTATTGCTGAGCTCAAACCCTTCTGGATAGTCATAGA -TAACGTTACCAGGCTCCCCAGGCAGGGCGAACGGAGGGTGGGCTGGGTTCATGCTAAGAT -TTCCAGATCCACAGCTGCCAACGCAGAAGGAAGAAACCTCGTTCATATCAATCCAGATAC -CATCAATGGCTACTTTCTTGTGCCACGTAACGATTTCGTTCGCCCAGAAATCGCCGGTCT -TAGGATTATGCCAGTCAGGAAAGACAGTGTAGCCGGGCCAGACAGCGCCAATGTACTCGC -TTCCATCAGGGTTCTTCAGGAAAACGTCATCCTCGTGTCCGCGTGTGTAGGTATCATAGC -TGAGTGCAAATTAGCATTGTTTCTGCAGGACAGGCCTAGGGAGACTTACGCATCAGAGGC -GTTCTCAGGGTTTGGAATGTAGATAGCCGCATCTATGATGGGAACATAGTGACGACCACT -GCTGTGCAGCTTATCAAGGAACTCCTCACCCTCGCTGTAGGGAAAACGATGTTGATCATT -ATCAAAGTCGCGGTAACCATGCATATAATCAATATCATTCCTAGTCATGTTAGAATTTGA -ATCATTCAAAGGTGCGGTGCGTCAATTTACCAGATAGTCTCGAGCGGGATCTCAAACTTC -TCAAAGTTGGCGACAACTTGTTCAAGCTCAGTCCAATTGTTGTAACCCCAGCGGCACTGG -TGGAAGCCAAAAGTAAAGTACTGCTGCAGTGCTGGCAAACCAATGGTGCTCCGCTGGTAG -TTCTTGGTCACATCGGCTTGGCTAGGACCCGAGTAGAAGGTGAGATCAATGCTACCTCCC -AGAGTGCGCCACTTCAGACCCTCAGGGTTCATGACAACCTCCTGACCATGGGCATTTCTG -GAGAAGACACCGTGCGAGAAGGAAACATAGTTCTTAGACTGATCGGCCTTGTCACTTGCA -ATGAGAGTGTGATGACCCTTGTGATTGACCTCGTAGTAGCGGGTATCCAGATAGAAAGGC -TGGGAGCCGTAGAGGTTGCTAGCAAAAGTAGTCAGCACCAGAAGAGCACAAGCGAAGGGA -GAAAATGATGGGGAATGAAACTCACTCATCGATAGGGTCACCAATATCCGACGCATAGAG -GGTCAAAGTGAGATTGTTCAAGAGGCGCAGCTGCTGAATATGCTCTCCAATGCCATACAG -GTTATAATCCTTGGGTAAGGAGGTCACGAATTCAACAAACTGGTTCTCAAATACCAAGAC -CGAACCAGTCGTATCGAAGATCGGATCACGGGTAGCCTTGCGGGTCACCTTGAAGTGGAA -AGAGGGCTGGTTCGACCAAGTGATCTCTAGATCGCTATGTTTCTTTGAAGCATGACGATC -GGAAGTGGGTCGGGGAACGACATGCTCATCCAATATGAACCAAGAATAGTTCGAAGAATC -GAGATAAGTCGGGACGATTTGAACGTTGAGGCGGTCATTAGCGAGGTACTCGATGGACAA -GTCCAACGAGTCTACATCGGTACCGTAGACATTGCATGGCTTTCCAGCTAGGGTGAGAGT -AGCAGTCAATCCACGAGCCGACTCCTTGACGTTCGATGCCTTGTACCCGGGACAAACCGA -CTGGGCGTCGACGGCTTCAGGGTCATCGATATTGGCAATCAGCTGTGCGCCAACATCAAC -GCTAGCGGGGATGGTAAATTGGGAGTATTTTCCCGAAGAAGTCGAAGAAACACGAGAGCC -CGAGCTCGTGGCGGATGTTGAGAAAGCCCCAAAGGCTGTTGGCAAGAGCCACGCACCGGC -AAGAAGGTTCTTCACACAAACCATGACCACTTCCCTACTTTGGTTGCAAATTCGGGGGAC -GTCTCATTTATGTTCCTCGTTTTTATAGGTTGAGTATAACCAGTCCGTAGTATGTGTAGA -AGGAAACCCAATGCCGACCCCGCATGAAATTCCACTGATTCCCCGATGCAATCAGATCGA -GGTAAGAGGTCTTGGGGTGAAGTTGGTGGTCAAGACAAGGGGCTGGGATATGCAGAGTCC -CTTACGTCGTGTATAACACGAAAGAGGGGTACATTTTTATGACTTCATAAAAGACTACAA -ATGTCTGCATGTTGTATATGGTGTCTATTGTAGACATATTCTTGATTGTCGATTGTATAT -CACCTATACTCAGTCTACTACCTCTACGGAGCAGGGATGAATGTACCAACCGTGGAAGAT -TATTACTCCGAATTCTACTTGAGAAGCCGCCGATCATATAGATCACGCCATTATAGCCTG -GTGATTGGTCAGTTGTCCGACAAATGCCCGTCTACATTAGTAGACCACAAACGGACCTGA -ATTTAATATACAGCAACCCACTTTGTAGAAGGTCATTATAATCTGATAATCCGGCTGCGG -TGACTTCTTTCTCTCTAGCGGAAAATCTTACCACGGGATGCCCGGACATTTGGCAAGATT -TTGTCACTCCCGGGACTCTCGAACCTCGGGGCATTATTGAACTGGATCTCTATTAAACTA -TACAAACTGTCATCATTCAAGCTCCATGAAATCAGACAAGCTCTCCCCAACTATACGAGC -CATATCCAACTCTATGTATACGAAGGTAATACCGCAATGATGTACTCCGTACTTCAACCC -ACATGATATGTAGAAGCACTCAGTCCCCCCTTAACGCACATTCGGGAAGCCGAGAACAAT -ACAATAAGTCCTCTCGGTAAGTTGTACTTGCGGGAAATCCAGGGCCAGACCTGTGCATAT -ATTAATGGAGCCGTGAAGATATTTGATTGGATCGGGGATTATGTGAAACGGAAATGAATG -CTCAGCGTAGCCTAGCGGTGTCCACTATATACTAAACTGGTTAAATCCGGCGAGAAAATC -CCGCAATGATTTCAAGGCACCTGTCAATCAGCGCTATGAAGAGATGTCATTAGAGCCGAC -TTGGCCCCGAGAAGTACCTCGCCAAGAGGGGGGACTTCGCTTACGGGCCGAGAGTTGGGA -TCCATATTAATCTTAGCAGAAGATCATAGGGATACCTGAGATGGGGTCACTCAATGTGTC -AGATGTCTTGGTGATCGTGGTGATCTCTCTGATGATCTTTATAATCACTGAATGTATCTT -TATAGTTTAAAAGGCTGAGGGGATGTAGAGAAGTCGGGCCGAGAGGAAGCGCCCCCTGAT -CTCGAAGAGTAGTAAGAATAGCATCCAGGACTTCCATGCAAGTAATATCAAAACACAATG -AAGCATAACGCCAAGGGGCCTATGCGGGGGGACTCTTGCCCCCCCGCTTCCCCAGGTTTC -TAGTTTTCGTTATAATGAGCCACATCCTGGAAAGTCCCCGGACGAAACGTCTTTTTTATC -CCCCTAACATGAACTTCAACTCTACTGAGTCTACACAAGACTTAAACAATGTCCAAACAA -TCCAAACCAAAGCAGGCATGCGACAACTGTCGCCGTCGCAAAATAAAATGTTCCCGTGAA -CTACCATGCGACAAATGCCAACGCCTCCTTCTTTCCTGCTCATACAGCGACGTCCTCCGT -CGCAAGGGGCCCAAGTTTCGTACCCTATATCCCCTAGCCCCGATACATCCTCTCTCATCC -CGACTCGACCCACCCGAGAGCCTGTATCCCGAGGACGCCAGCACCGGCGAGATCCCGGAT -TACCAATTCAATTCGCCAATCTCGCCAGGCTTTATCATCTCAGACTCGCAGTTCTCAGGC -CAGGACTTCTCGGATACTTTTTCCCACTTGCCACCCCCGGAACTTGTCTCGTCTCCAGGC -TCCACAGACTCAAATGTGGAATCTGCCATTGGCTATGGCTATGGATATGTGAGACCGGCC -ACCCGCCGTCTCTCACCGCAGATTCTGCTTGTTCATGTCAATATCTACCTCAAGTACTTG -TTCCCAATTATGCCGGTGGTCCGAGGTGACCAGCTCCGGCTGGATTGTCAACAACCCGGA -CAGCTGTCTGCGCAGCGATATGCCTTTTTGGCATCGCTGTGTGCGGCTACGCATATCCAG -TTGAAACTGGATGGGCCGACGCCTGTTTCAGATCCTGCACGTTTGCAGAATATGAGTATT -GGGGATGGACACTCGTTGATGTCTGGAGAAGATCTTCTCGCGGAGGCCGTGCGGGCGCGG -CGTGATTGTGATGTTTTGGAAGAACTTAGCACAGAGTCACTTTTGACTTCATTCTTTTTG -TTTGCCTCGTATGGGAACCTTGATCGTCAGGATCAGGCTTGGTTCTTTTTGTGCCAGGCT -ACCTCCTTTGCATTCACACTTGGCTTGCATCGTGAGCCAACCTATGCAGAGTGTGGGGTC -GAGGAAGCGGAAGAAAGACGCCGTGTATTCTGGCTGTTGTTCATCACAGAGAGGTAAGCG -GCCGGTTATGAAGAATCAATGATAGTTTGGACGGGTGCTGATTGATGAATCAACAGAGGT -TATGCTCTCCAGCAAGCGAAGCCAGTCATGCTGCGCAGTTCAATTCATAAGCCCCAGGTC -CTTGGCTCGGAAGACCCCATCCTGGCTTATGGATTCATCAACCTGATCACTATTTTCGAG -AATCTCACACCGAACCTTTATGATTGGATCTCCGCCGGTGGCGGAGATGGCATCTTGGAG -AGGCCACCAACCTCCGCTATCCAATCCAATCTGTGCAAGGCTATTTCCCTAGAGGGAGTC -CTTGAGATCCAGCAGGTTGATATCTTGATCACGCAACAGTGGCTGCAGACGATGATGTGG -AAGCTGTCTATGAGCCATGCCACACAACCTGGTTCGCGCGATGATGCAGTTTTACCATTT -CACTTACCAGTGATTGTGGGTAAATCTGTGATGAGCGTGATTGGTGCTGCATCGCAAGGT -GCTGTTGATGCGCACGGAATTGGAATGGTAAGTCAACCCGAGAAGAAGTTGTTCGTTCGT -ATCCGCATACTGATTCAAACGTTATAGGAACAAAAGCTCTTTGATCTGGGATCCTCTATC -GCTGATGTGACGAGGTCTCTTGGACCAAAAGCCGTCCATAGGCTGGCCGAGTCAATTATT -GATCCAAACGAGTTGCTGTGGGGTATACTTCACACGCTTTCTCAAATACGTGGCTCGCCC -TCGTACCTCTTCCCCACACTACTGGATCGTTGCAGAAACGTGCTTGGGCTCGACTGCAGC -ATCACCACGGGGAATTTCCTTCCTATACTCGGCGCTGCTGTGCCGGACCAGCTGGCTTCA -TGGTCTGGCCAGCATGTCTGGGATTATGCTGTTAGTGCGGAGGATATTGACATCCACGAT -GTCGATGGCCAAGCCGAGTCAGCTTCTCATGCGCATGTTCCACAAGAAATGGGCTTCGAT -AATTGTCTCTTGGGATGATAGGCAACTGTTTGAATTTGGATACGGATTTCGTTTGTTGTA -TGATACCATGTGGAGTTTTATAGATGGATGTTTATGATATGAGAGCAGAGCTGAATTATA -GAACACGATCAATGCCGCGGTGCATCAAATCCATGACCATTGTTGTGTAGTCGATGTACA -TCGTAGTCGAGAAAGCTCATAGCTTGTGAGAAGTCAAATTGTCTTGGCATGTTTGTCTCG -GCATCTCCAATTTGGTGATCGGCTTCATGCGACTTTCTCTTCTCAATTGCTCAGGATATT -CTCTATTTGGTATGCCCTATATTGCCTTTAATACTTTCTGAAACGTGCAATTGACAATTT -TGCAGATAATCCTCGAGCCTCCGCATCAATTAAAGCTCACCAGAGCTCCAAACCTCGGAG -CCAATTCTCCATTGAACCTCAGCCTCGCTCACAACCAACACCATTGCCTACACAATGGCC -TCCCTTCGACCCCGTACTTTCCCTCTCTCAGCTCGAGCCCTCCGCTCAACTCTCCGCTCA -ACAAGGCCTTCACCAAACCACTTCCTATTTGCGCCCGCATCCGCCCCATCAACCCCAACA -CCTCTAACCCGCGCGCACTCAACATCCTCCGTCTCCGCCGATGAGCTCTCCCACTTCTCC -GCCCTCGCCAGCTCCTGGTGGGACCCAATGGGCCCCTCCCGAATCCTGCACCTAATGAAC -CCCCTCCGCCACGAATTCATCGCCTCCTGCCTAGCAGAAGGAACCTCAAACCCAACCGCA -TCCACCCCCTCCACCGCAACCCTCAACTACCTCGACGTAGGCTGCGGCGGCGGCATCTTC -GCCGAATCCCTAGCACGCACAATCCCACTCGACCCGACATCCCCAGCCCCCACCCCAACA -CTCGCCGCCTCAATGACCGCAATCGACCCTTCAACAGACCTGATCCGAATGGCGCGCGAG -CACGCGCGCATGGATCCAACCGTCGACACGCACCTGCGCACAGGTCGTTTCAAGTACCTC -AACACCACGCTGGAGGATGTACTTGCTGGCAATGCGCCGATTTCGACGTCCGCTTCAACC -CTACCAGATTCCAGCTCTACTTCAACTCAAGTCACACCCTCACAATACGACGTCGTAACT -CTCTTCGAGGTCCTTGAACACATCGATCCGAAAACCTCAACACCGTTAAGCTTCTTGACT -AACTGTCTGCGCGCTCTCAAGCCTGGCGGTTGGCTGATTGGATCTACTATCTCGCGGACA -TTGCCGTCATTCATCCTGAACCAGGTTATTGCTGAAGCGCCGTGGCCGATTGGTGTTGTG -CCGCGCGGGACGCATGAGTGGAATAAGTTCGTGAACCCGGATGAGGTCAAGGGATGGTTG -CAGGAAGGGTTGATGAGAGCTGCTGATACGGGTGTTTCGAGGGGTGGGAGTGCGGTTGCG -GAGGGGATGAGGTGGAAGTGTGTGGGTGCTATCTATGTTCCTGGAATTGGGTGGAAGATG -GTGCCTGGGAGTGAGGATTGGGGAAATTATTTCTGGGCTGTTAGGAAGGAGCTTTAGACT -GGATTTGAGGTGTGTATCTATACTATACTTGCATTGTTTTGAGCTATTTGTATATTATTT -GGTTTTTGTCAACTTCTACTGGCTTGTACTCATTGCATATCTCGATGATTCATAAATTCA -TACATGTGATCATTGACTCGTTTTCAGTGTACTCAATACACATCGCTAAATACACATAAC -CACCCAGCAGCCATCCCTATTAAGGATGCAAAAGAATACAGCCCTAGAACCAATCCCAAT -CCTGAGCAAAAGTGAAGGCAAAACCTTCCTCTCCAAGACTACTTCTTGCCCTTTCCCTTC -CTCTTTTTCTTGGAAGCAGCCGGCGGCTCAGGCACAGAGACAGTTCCTTGCTCAACTTCC -TCACCCTGCCCCTGCAACCTTTGCGTGCCGCTAATAAAGAAAGTCAAATCCCGATTTTGC -TCCGCTAGATCCGCATTGGCAACCTTCAGCTCATCAACCTCTTTAGCAAGACCCTCCAAC -CGCAACAACATATTCTGATTCATAGCCTTCTCCTCCTGAAATCCCTTCTCGAACGAGCGC -GCTAGAGCCTCAAATCTTTCCGCTCTCTTCTCCGCACGTGCACGGTCGTGTTCAAGCGAC -GGCAAAGTCTCAATCGTTACGGCATCGTACTTAGCTTGTAATTCTTCTAGACATGATGAG -GCGCTGGTAGCGTTAATTTCCGCCACGGCGGCGGCGGCACTAGCCTGCGATGCTTTGTCG -ACGGCGCGCTCGACGACTTCTTCAAAGTACGCGCGCTGGCTTTCAAGCTGGCTGGTGAGA -AGGTGTGTGTATTCGATACTCATGTTTTCTAATTTCTCGCGGGGGACGGCATCGCCCCAA -TCTGGTGGGTCGAGGGCACTGTGGTCTGCAGCGGGTAGCTCGAGGAGCTTTCCATCTGTT -TTGTTTTGGATGATGCGGTGAACGTAGGCGTCGCCTACGTAGTCCCATACGCGCTGGGAG -GTGAGATCCATGGAGAAGGCATGGGCTGTTTCTTTCCAGTGGGCGAAGGCGTGGGCGTTG -TCATAGCGGCCGCAGCCAACCGTGCCGCATATCAAACAGGCCCAGAGGTTCTCTTCTGAG -TGGCAGACGCTGCATTCTTGGGGTTCTTCATCTGGCTTCAGGGATGCGTTGCTTTTACGA -TAGTCGTCTTGTGTGTATCGGCACACGGGACAGCCGCTGCCTTTCCATCTCTGAAGACAA -GTGCAGTGGAAGACGTGTTGGCAGTTAATTGTTAGGAGCCCTGTAGTTTCGTCCATTCGC -TCTAGACAGACTGGACAGGTTGGTAGTTCGATTAGTGCGGGAGTAGGTGGTGCAAGAGGT -TTGCCAGTGAGAGTTGCTTGTCCGGTAGAGGATGCCGGGGGTCGTGAGGGGGTCGAGAGG -TTCGAAGAAATAGCTCCATGCTCTGAAGTTGGTGTTCCGTCATCCATGACCTGAATTTCG -ACCGATTTCACAAATACCACATGGCAGGTCTCAGGCTCGATGCTATTAAACACGTGCCCA -TTCCATTCTTTCTGCCATTCCTTCGCCTTGCGCCCACTCCGGAACTTCATCAAAACCATA -TACCGATTCGCGCGCGCAGTCCTAATCATTCGAAAATGACTAACATCATCCATAGTTTTC -TGTCCCATAAACCCCAAAAAGTCCGGCGGCGATAGATAAGATGGCACAGCGAGAATACAT -AGTGTTGTACAATCCTCATCAGGACTTTGCGCGGAGGAAGAAGTCGTCTCAGGGAAGGGG -TATTCAGCAAGGCCCGCCGTCCCAGGTTGTTTGGCACCCGCTGCCCCGGCTACACTCTGC -CTCACAGTAGCCTGCTGCTCACCGGGGTTGCGCGCCGCAGCTGAGGAGCCCTTCAGGTAG -GCGGGGTGCTCATCGTCGTTTCCGAGGTAAGGTGTTTCATGTGAGTCGCGGTACAGGTGC -ACAATTCCCCATACACTATCGGCAAGCTCGTAATCGAGAGGGATGTATCTGCCTTTCGTG -CGAAGGCCGCCTAGGATATCGGTGCCGATGCCTGCAGCGACGAGGTTCTCTGGTTCTGTG -GGTGCGGAGCCGCGCTTCAAGGTGCGTTTGATCGGTTGCCTTGGGGTGACCATGTCAACG -CACTCAATGGAGATAGCGCCGAAGCGCTTGTCGTCTTCGAGTGGGGTGGTGCTGGTGGAA -GGAGTGGTTCGAAGTGGAGTGGGAGTGGTATGGGTGGAGAGAGTGGATATGACTTTCGGT -CGATGCGCTAAAGTGTGATCTGGGAGTGGGGTGTGGGTGTTATACAGGCGGTTGGAGGTT -GAAGGCCCGCTCCGTTGTTGGAATGGCTGGAGAGCTTCCCATGCGGATTGTAGTGCGGGA -GAGGATTCCTGCGAGCTCGGCGCGTCGGATGAGCCGTGGGAGAGGAGTTGCAAGCTAAGG -TGATAGAAGTAAGCGGGCATGCCGAATGATGGCTCGCATCCTCTGGAGCTTTTGCGGCTT -CTTGGAATTGTCGAAAGAGGGAGGCGTGTTATCACGATAAGGATGTATGATTACTTAATA -CATTAATCACGTTGGAATCGCCCACTCATGTGGGCTCGTCATGTCTTGATCGTCTTTTCT -GATGTTACTATGAGATGTCTGTTTTGGAATTTTTTCTCTCCCCTTCGAAACTTATAACTG -TAACACAAAAGGCGCCAAATAAAAAAGTGTGAATCATCACAGCTAACTTCTAGTATAACT -GGAGATTTTATTGTCGCCCGCGTCTGCACCTTGCGCCACAAGTGGTTCCTGGCACGCGAG -AGTGGCAAAGAGGCAAAAATGATTGAAATTGTCGCGCGCGCGTTCGAGAAGGTTGTAGTG -GCGAAAACTTGTGTGCTTAAACCGAATTACCTAAACGAGACAGTTGTATGGGACTTGCAC -ATAGGTATGATGCTGGACAGATAGACCTCCCAACATCCAGACATGTCTATACTGTACGTC -ACTGACATCCATGTACCATATGATTTCTGTCAGTAAATATTACAGAACAATTACAATGGA -ATGGCATGCTAGGCAAAGACTAGCAAAATATTGCTAGAGTGGATTGTAGAGAACTATCTA -TCACATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCCCAATAGGCCGAGGCCAGAT -AAGGGGTGAACGTACGAGTCTTGACAAAGATCTGCATGATGGGCTTTATTATTTGAGTTG -AAGATGTTGATTGGAAGGGGAGGATGCTCTCAAGTGAAACAACCTGGATAGATGGTGGGT -TGAAAGAAGGGAGAAAGTGAAAGAGGGAAATCAGCGTCCTTATGTAGGAAACCTTGGAAG -ATGAGGGGGCGAGGCGTCTTAGTCAGGTGCAGGTGACGCCGTGAAAATGCCAGAAGGTGC -CGGATGAGTCAAAATGGGTGCCTGGGAACAGTTTGACGCCACCTAGAAAATTCCGAAGTA -CAAATCTTATTGGATATCTACAAATAGACCTTCTGTAATCTTCCACTATAGATATTCTTA -TATAGGGGGTTCTAATATATGCCTCAAGTGTTTTGTCCAATTCCCCCAAGTGATGACGTT -GCTTATCGGCGTCAAAAAGAGGGGGTGGACCTTTTATATGGAGACACTAGTGAGAAAGTA -CAGGGTACGGGGCTCTACGCGCTCACTGCTGCGGTCTCCCGGGTACCCTGTACCCGCTTC -CAAATTTAGTACTCACCGGTCGTTTCCTATCTTTTTATAGTTCAAAAATGATAGTATAGT -CAAAGAATTCAAGTATTCAAGTATAGATTTAAAAACTATGGGCCCATTGGCAAATGTATA -AACAATCCGCTCTCGCAAAATTTCCTCCGACGCCGTCCCCGGCAGATATGCGCCACCAAG -AATGTGGTTCGGGGATAAGACAGCCTGCACACCCTTTCTCAGGGCCTTGTTTTAACTTGA -ATTGTTGGCCTGTTCCGTGACGGGTGCCAGATGCCACAAAAATGAAAACAAAACAATAGA -AACACCTTCGGCCGTATACGGTCATGACATTCGTGATCCTCGTTCATCGCTGGAGACAAA -GCTATGCGAAGAAAGGTATACACCAAAGACAAGAGCAAAAAAAAAAAAAAACCTTCACAA -AAAGGAAATGGTAAATATGACAAAAAAAGACACCAAAAACAAGGCTCATAAAAAAAAAGC -TAGATCAAAAGGAAAAAAAGGAAAGTCGGTTCGCTCAAAACCAAAGTACAAAAAGGGGTA -GAACCTCGCTAGGAGGGGGACCCATCGGATTCATCGTTAAAGAGCGATAGATGCGCTCCA -CATCGTCACGACAATGGTTCGCATATTCTCAGTGTTATCATTTAGCATTTAGAACGAAAT -CGCGGAACTGTACGCGCTGGAAACAGCAGTCGTGGTAAATATCATCATCATAATGCGTTT -GCGCATCGGGGTCGTTTAATTGGCGCGAATGACCAGTCGCGACACAAATTTATTTGAAAG -GCAGTCGAAGTCGCCACGGTTAAATTTCGTGCCGGTGCGGCTGCGGAGGTAGGTTTCTGC -AGGATGGGATTAGTTTCGGAGCAGTGGAAAAATTGGAAAGAGCTTCTCTTACCATGTTGC -TCGACATGTTGGCGGACTTCGGTCATGGTGTTGCAAACATCCGGGTTGGAGTACAGCAGC -TGGTGGTGCTTTTGTGAGTCCCACTCGGTGAACGGGGGCAGCTTCTGTGAGATGGCTTGG -AACAGTCCGACAATGACGCCCACCGGGGTGGTTTCCATATCGGTGCAGAATTTGTCAACG -GCCGACTGATCTGGGGTCCGGTAGTGGCTATCGAACTGCATCTCCTCCCACACGGCCAGC -AGAATGGAGGCCAGGATGAAGATTGTGGGCCAGTTCTTCAGCTTGTCGCCGCTGTACACA -CTGGAGTAAAGCGATGAAAGCTCCTCCAGTACATCCTTCTGCAGGTCACGCCACATGCTC -GCCATGGCATACTTAATTTGGAAGTTGATCATGACTGGGGCCATGGTCTTGCCAGCGAAC -TTGGAGCCCATGGACTTGATCTTGCCAAGGAAGCCTTCCTCCTCGCCGATGCCTTCAACC -ATGGTGACATGCATGGTAAGGTTGTAAGCAACCACCAGCTTGAGTGCCTTGCGGATGACG -GGCAGCTTAGTGCGGAAGTAGAAGCGGAACGCGGTCTTCAGCATTTGCGTCAGGAAAGGA -GTGCCCTCGAAGTAATCGTCGACAAACTTCTCAAAGGTGCCATTGCCGTCGATGTGGCGA -TCAAGATAATCCGACAACATCGCTTGTGAGATGCCCTCCATACCAGCTGAGAGTTTGGCG -GTCTCGACCTCGTATTCTGTTGGATCACGCTGCATCGACTCAATCCAGTCAATGTTGAAG -CACTTCTCGTCACGAACGTAGACCTCGCGAGCGTTGATGGGCATGACCTGTCCATAACCG -TGAGTGATGAAGAGGGTGCGCTCCTGGTCCGAGAATCCGGTGATGTTGCCAACAGAGAAG -CCCATGGTGATGTGGCGCTCGTAATCTGCCTTCCAGTCCTTCATGAAATAACCAATCTCC -TTGATATCAATGCGAGTACATGGCACCTGCCATAACCGAGCGTGGGATGGTTGGCATCCT -GCACAAGGCTCGCCCTTGTCACACGTCTTCTTCAGGAACTTGCAGCGCAGACATGCGCGT -AACTTCCGGATCTCGCTGGCCTGTTTGCGCTGGTCGGGCTTGAGGGGGCCCTTTCTCTTG -CCGACACGCTTCTCGGTTTCGGGCTTGCCGGTCTGGGACTGCTTGCGAACCTTGGTCTCG -GCGGTCTTGGCGGCAATGGGACTCTTGCGGGATGGTTTCCGGCTCGGTGGGGAGGTGGAT -GCTTGAGAAGACCCAGCTGATCGAGCCGGGGATGTTGATTTCTTGATCGGAACCGGGATC -GGCCGCACCATTCCGATAGGGCTGACTGCAGAAGGCGAGCGAGAACCTATCGAAGACTGG -TCGCGAAGGACAATTGCGCTGCTTGTCGGGGAGGTACCGTGGATTGGAACGGAGCTGctg -tgactgtgactgtgactatggctgtggttgtggctgtgagtatggctatgactataacta -tggccgtgggcgtgacCATGATGGTTATAGCAAGGCAAGTCCATATTGTTCTCAGAACCG -GGAGAACCGATGGGATTCGTGATATCCACAAAACTGCCGTATGATGTGGAATATGAAGAC -TCTGAAAGACTACGGTCATGCAGTGTCTGGGTGGGGTTGACGAAAACACCTTGTTCGGGA -TAACCAAAGTCGAGAGAGCGGCGGTGGTCGATAGTACTCCAACCGTTGTCACTACTGGAC -AGGGAACGGACCTCGAGATAAGTATCGGTGGGCGAGCTACTTTCAAGAACAGGGTGCGGG -GCATAGGAAACATCCGGAAGCCCGTTAGGGAAAGTCATCAAATCCGCCTGAAAGTCCTGA -TATGTGAATGGAACCATGTCGACCGGTGCCGACATCGGCAGATAAGGACCATCGAGCAAG -CCGGCAGTCATCTGGCCCTGGCCAGACATGAGATCGACGGGCGAAGAATGCATGGGCATG -CCATAACTCGTGGTAAATTGGGGCGCTGACGGGTCCTCGGACGAATATGAGAGGTGGTGA -GAGGGGTGTAATTGTTGAAAGTGCCAATCGACCATGAGATTGGGATCATGGGTCACCGAC -TGTTCCATAGGGTGACCGCCAGTTTGCAATGGGGAGAGACCATGGGCGTCTTCTGACCGT -GGGGGTACTTGTTCTACCGCCACAGGCCATGTTTGGTGCTGAAGATAACCTCTGGAGTCA -TAATCGACGCCAATAAGGCCATCACTGGATATATCGACATTGGACATGACTTTTTCGGAG -TCAAAATGATCACGGGAAAGTGAAATAGAAGGGGAAGAAAAGGACCAGCCTGGATGAGAA -GACCTTCTTCAGAAGGGTGACACCACCATTGAAGAAGAGGGAAGTGAGGCGGTGGGGGGG -ACAAAGCAGCGGTGAGCGAAGAAATGCGCTAGGGGAGTTGATTGTGGAGAGTGGAGAGGC -TGGCTGGGCGTGGAGGGAACGTTGATCCAGTACTGAGCGATGAATTATTTCCACGGGAAT -TAGTTTGCATTAGTGTTCATTAGAGGGAAGAGGAATTAAGGAGAAATATGAGATCGAGCC -TGTTACGGGAGTTTGCTGATCCCATGATCTATTGAATTCTTACCCAGGAAGTTGCCTTGT -TTTTCTTTTTCCTATTCCATATACTTATTTCAAACTCCAAGTATTTCATATCTTGATTCC -ATATACGGAGTACTCCGTCACTATTCCATATGATATTACTATATACTGTTTTGTTTCCCT -AGGTTCTGACTGAGATCTTTATGGTAAATCATCCGTGAGACCGAACCCGGAAGATCAGTG -GGGAGAAAAAAACCCTACCACTGCGTGGCAATAGGAGTAACCGAGATCTGACAGTCGGAT -CGTATACCCAAAATAAAGTCCTTCCTAATCAGATCCAATCAGACGGGGGTTCTGCCCTTT -CGTATCTCTGGTATTTAGGCCAATTCGGGATGCTGACCATGCAGCTAAGGATTCCGACAA -AATTAATATAATAAATATCCATAAGCTAGCGCTAAGGTATGTAGATTGCGAGAACTATGT -TATGTTGTACTCGAAATCTTGAGAGTGCCAAGTGGATATCGACCCTAGAGAGACAACAAA -TTGGACATGTCTGGTCTTTTTTGAGAAAAAAAAAACCCTCATATTCGTACAGATACTACT -CCGTACTGGGGGACACCTGGGCACATGCAGCAAGGGTAAGCCGACAATATACTTGCACAC -TGCATGGCGTCAGATCCATCACATATTTATATATATATTGACTTTTGTGACATGCAGAAC -CTCTTCAGGAGTACTCCATACTCCATCATAGCAATATATGCATCTTCAAGGCCACGAGAG -ACCCCGATTCTAGCCAGATTAAGAAAGTTGAAACTATCCGAGATGTGATGTCAAGAGATG -GGAAGACCCGGTACGGGGTAGAGAATCAAGTGGGGGGTACAAAGAAATTAAGACTTTTTA -CATTCACAAGAACAAGAACAATAGTAAGAAAAGGTCCAATTTACATTTCTTTGAGCTTCA -AATCTACAAAATTATCCCATGCCGCTCATCGGGAAACCTCGCAAAGTTCGATTTCCGAAC -GCACAAAAACCCAACCCAACTTCAACAGCGCCAGCCAAAACGACAGCCATGTCTACTGAC -TCTAACCAACCGCGCCCGGGGATCCCGGCCTTGTTTACGCAGCCGCCGCCCATCCGTGAC -CCGCTAATCACCGAGACCATCGATCTGCAGAACGCGACGCTGGAAAAATGCCTGCCATTC -TTGAAAGGCATTCACAGCACCCAGAAGGACATCAACAAACATGGTGTGCCTGCCCTGCAA -CGTGACATCCACACCAGCTTCCTATACGATGCCCTCGAGGATTATCCCGAGGGGTTCGTC -GCTATGGACGCCAGCCGCCCGTGGATTGTGTATTGGGCGTTGGCGGGACTCGCCATGCTG -GGAGAAGATACGACTCGATTTCGTGAACGGTACGTGCATGCTCTACATCCTTGCTTTTTT -TTTAATTATTTTGATATTATTCCCCTTTCCCCGCCTCGTTCTCCAATTTGCTTTTTTCCT -TTTATCTGTCATATGTCCACACAATGGCGAGCCTCTGGAGATATACCCTGATTGGAAACG -ATCATCTCATCTGTACAAGGTGAAAGATGAACCAAAAAAAAACGTCATCCTGAACCTTCC -TCTATTTTAGTGTGATTACTACGCTTCGGCCGATGCAAAACCCCACAGGGGGATTTGGCG -GCGGCCACGGCCAGACATCCCATCTAGCGGGCAGTTATGCAGCCACCCTATCGCTGGCTA -TGGTTGGAGGCGAGGAAGCCTTTGGATTGGTGGATCGAAACGCGATGTGAGTTGTCACCA -GCAGCTTGCTGCTGCATGTGCGTCTTGTGTGATCCTGTGCGTCTGACCCTTTTCTGGCTT -CCCCAGGTGGCAATGGATCGGCAGATTGAAGCAGCCAGATGGAGGATTTCGTGTTTGTGA -GGGCGGAGAGGAGGATGTGCGGTAAGTGAAACTATACAGGGCTGGAGCAGGCGCTTGCTC -GTAATCCCGTTCCTCTTCCCCGATGCAAGATCTGACCTGTCATTAATGCAGTGGCGCATA -CTGCGCTATGACTTTGATCTCGCTACTTGATCTGCCGCTGACGCTCGCTCCGGGCTCTCA -AGCGCGGCAAGCCGGATTGGAGAGTTTGACGAGTGGTCTTCCTGAATATCTCTCACGATG -TAAGAGTTTGATGTCGGGGTTATAGCAGACTTTGAGATGTGCTGACGTTTGCGCTTAGGC -CAAACATATGAAGGTGGTATCTCCGGTAGCCCTGGGTCGGAGGCCCATGGGGCATATGCC -TTTTGCGCTCTGGCATGTCTATCTATCTTGGGACCCCCCGAGGAGATCTTCAACAGGTCC -GCGACCACTCTAGACTAGAATATCCTCCCTGTTTTCATATGCTAATGTTTCCTGACAGAC -ATATGGACATCCCCATGTTAGTGTCATGGTTGTCCGCACGTCAGTCCGCCCCAGAAGGTG -GTCTTTCCGGACGCACAAATAAGCTGGTAGATGGATGTTACAGTCATTGGGTTGGTGGAT -GCTGGCCATTGCTTGAGTCTTCTCTGCAGGGAAAACCAGATAACACCGAGCCACCAGCCA -ACAGTCTGTTCAGCCGCGAAGGTTTAACCCGATACATCCTTGGATGTTGTCAAGGGATCG -ATGGCGGTCTCCGTGACAAACCGGGAAAGTATGTTTCCTATCTGTGACAGGTTCTTTTTG -CACATATGGCTGACCAAGACCAAGGCACGTCGATTCATACCACACCTGTTACGTTATGGC -AGGATTGAGTGCGACTCAAAACCATCACTACCGGACGGACTCGAGTGTCTCAAGCGGCAA -CTTTTCATCCTCGTTTTCCTGGAAGGCTTCCCCGAACCGCGATCCGGGCAATGTGTTCTC -AAGAGGTGATCGACTAGAGTCGATGCACCCTTTGTATGTGATCCCGCACAAAGCCGCGGA -GCAAATGCGGCTCTGGTCCGAGACCCAGCCCCTCATTGTCTAACCGAGAGGGGCGTGAGC -CGATGCGATATCAATAAGAAGACACGGGACCAAGCGCCACGACGGATTGAGGTTGGCTTC -AAAAAGTTGAAGCAGGTGTCATTGGTAACCAGACATTCCGAACGAGTGCATATGCATGAC -TAGCCTTGGATGCCTGAGACCGCAAGCCCCACGGTCATAGCAGCGGTCATAACATGGAAG -CCACGAGGGTGATTTTGACCAAGCCAAGGAGGATGCTGTGAACGAGACTCAGACGCAAGG -GGTTGGGTTGGGAATGTTGGGGGGTCCGAGTATGACTTTACAGAATAGGTCATAGCACAT -AGCACCAATTCCTTCTATTTTTTTTTTCTTTTATGCTGGCAGTGATTTTACCGATTCTGT -GCAGTTCTCACCAAAAAAAACTCACCCCTTCAGCGCTAAGTTTGAAGATTTTACTTTTCC -CGCCATTCTCGCCGATCAATACAACATATGAAAATGCTCTTATGATGTACATCATAATGA -GTTTGCCACAAAACACACAAGCTGCACGAGGCCCAAGAGGGCAAAAGTAATCGTCAATAC -CAGAATATGCAGATAAATCCAACGCCTTTCCCCACTGCAATGCCAGCTCATCACTGCGTC -CGATGCCTGCCGAGTAATTGAAGTTCTTCGACGCATGCATATTTTTCCATTCTCCGATAG -TCCAATTCATCCTATTACCCACTTGATGTCTTTCATGATTCCATTGACCCATTGACTCAT -AATCCACAAGCAATTGACCCCTCTTCAGACGCGTTCTGCACAACATGAACGGCCCCTCCT -TGGCGCCAATCACCATTCACAGACCTGCGCGTCTCGATATCTAGCAGGGCGGCAAATTTA -ATTACGCTTGAATGCGTCTGTGGGGCCAAAAATGCAATGAAAAGGCCACTCCGAGAGTCA -ACAAAAAAAAACCCACCCGTGTACGAAGTGTATACAGCATAGCAGTACCATAGCACTGAT -TTCATTCCAACCACTCTGTAATACTGGGATCCATAATCTTCAGATATTATAATTCTGCAG -ATTACAAGGTGTCTGTTAGTCTCCGAGAATGTTCATGTTTTGATCGGCGAATTGCAGGGG -CATAGTCCGACAGCTTACCCACGGTGACTCCCGGTTCTACTGGCAAAGAGAGATGCCAGC -GCATCAGTAAAACCCGCCGAGAAAACGGCACACGAAATACCTCGAACCTCCGTTTGAGTT -ACTGTTACCCTTTTGTTTTGGGCTATTTCCGAGGTCCAATGCGAGTCTGGCGTTTGCGGG -CATCTATCTTCTTCTCTTCTCTTCCTTTGTACTTTGTTTACTTGAGGGAGGTAGTCTATA -CTCGGATTCCAGATCCATGTCCTAGGGTCTCTTTCAAGATTGGATTGTTTACTAGctgct -taatattactgttactgcttactgtatactCGTCTTTTCTTTTTCTTTTTCCTCAACCTT -TCAACGGTGGGTATTAAAATCTTAGTAGATTTGACTTTTTGGTTCAATCCCTTTTTTCTA -GTTCAGGTTATGTGCATTGGTACTAAATATCTCCTTTTTTCTGTTTCTTTGTGCGCTGTC -TCAATTGCAACCGTTTCCCTCTGTTGGTACTGTGTACACCGGCCAGCTCACGGTATGACT -TGGGAAGGGTATCAATGCAAAAAGCCCCACCTATTTTCCAGAAGTAATCAAGTATATGAC -AAGCAGCAGCTTTGTTCCCCTCATTAATTTGTTTTTCATTTTTTCCCCCTGTTGTACGGG -AGAAAGGGGGGGGGGGAGACTCGGTTCTAGTATATCTGTATATGGGCGTGGCATCTTTCC -GATATTCCTTTCATTTTGATTTAGTTCTCAATCCCTCTATATTGTACTGCTCTCACTATC -GTGCTTTGTATGCCCTCTTTGCAATAGACCTTGGCTTTTGCCAGCATTTTAATCGTGCTT -TTGTTTAGAACGCCAGCACATCCCCACCCAGCTGCTAGCAAAGCAAACGCGGTCCGCCAG -CCGTTGGGCATCACAATCCGCATATCTTCTAACTGCACACCAGCTAGCAGCACAGCCTCC -GCCCTAAGCCTGTCTCCCTGCAAGCTAGATCTGTCCAATGCGTGTCGAGTAATCATCACC -ATTCGTTGCAATCAGACGTGTGGCAAAGCTTTCGTCCGTCACCCGTACCCAGTTTCCCCA -TCTTTCTAGCTTCCAGCTTGCTTGCGTCCAGTCCGGATCGGAAGATGTTCTATATCCTCT -ACCTAACCAGTATGTCATCACCCTCCCCACCTCCTCCCCGAGCTATGTATGTCATGCATC -GTTGCCAACTCGAGCCACTCCCCCAAATATCCGATCTAGAAAAGCAAACATCACAGCCCT -CATCCCGTCCCTCACCCCAGCCACAAAATCCCACCAATCCAAAAGCTTGACTGACCAAAT -CCAAATCTAGGCCTTTGCCGGCGCCGCAATTGGCCCGAACCAATCTATAACGCCTACACA -AGTAGCAGCGGGTACACCTGCACAGTGCGCGTCAACAATCGCGAGTACCAAACAGACAGT -ATCTGCAGCAACGAGACGCTTGCACGTGAGAGCGCCGCCATGCGTGCCTACCTCATCTGC -CGCAATTTCTCCGTCAACGACGGCATGTACCCGGCAGGGCATGAGCATGGCGGCGCTGTG -CAGGGTATGCGCGCTGCAATTGGCTCGGGACGAAAGATTGCTCGTGAGGATAATATGGTT -TCATTTGGGACTGGAAGTAAGACTGCAAGTGAGAGTGATGGTTCTCAGGGCGAAAGTTGG -AGTGGGGGAAGTAGTCCCATTAGGATTGGAATTGAGGGGGATGGACGAGGTGGATTTGGA -GGGAGGGTGTGAGACTTGATTTGACTGGAGGAGGAAGTTACGATTTTATGATATTATGGA -TCTTTATGACTACGCCTTTTTGGCATGATTGTGGACTTTTTAGAGGGAAAAGCCGAGGGA -TTTTGAATGACTGATTTGCGATTCTTGCTGCTTAAGTTATGTTATGGTTGGACTAATGAT -GCTTTTCTCTAGATGGTGGATTCGCTTTTGATTTGTTTCCGAGTGGAGGTTAGCGATGAA -TTCATGTTATGTGGTTGTTTGGTTTTGATAGTGTTGAATTTCTAGCATATAAGACTCGTC -AACTTGTGGACAATTAGCGCAAACTAATATACTTCTACGAATTAGCTGTTAAGTAAAACT -CCTCAAGTTCGTTCTAAATATCTACTGTGCAGAGCTTCATGCCTTTAAATGTATACCCAT -ACTTGCTCTACTCATGTCACCTCACAATCTCTGGGCAACCTTCAAAAGTGGCAGCTAAGA -TAGGAAAAGATCTAGCAGAAGAATCCTGATACTTAGATTCTCTTGCATGATACATTTAAA -TTTTAGAAAATGAAGGAGAAGGGAGCGCAAACCCAAATGTGGATTCTCTATAGATGTCAA -AACTATATATATATCGGCAAAATTCTTATAATGAAGCAGGTACAACCTCATCTCTATACC -ACTTGCATTATATTCCATGCCCTCGATTGATCTCATTCTCGAACTCGTCACTCTGTTCAA -TGAGCCTAAACAGTAGATAGTGTATTGTCAACTGTGCCTATACCACTTCACTTCCGTCCA -AATGTTGGCTTTTACTGAAAATCTACCACCCATAAAATTGGAGTCTTGTGATCAGATAGG -CCTCCCCTCTTGCCTTATCTCTACCCCTTCCCCCTTTTCGCCCCCCTTGACAGTGAATGC -ATTGCATTCTTTCTCTCTATAGAAGTCCGTCATTGAGTAATGCTCTCTAGAAACTCCCAT -TCCGCTGGTTTCCAACCACGGTCAGGGGAGGCTGTATATCTCCTGAACCGGAGAAGATCA -TATAAACGTAAACAAAGATCCTCGTCGTCTGGAACGTGGGCTCTTACACCCGCTGGGAGC -TCATCAAGCCGAGAGTCGAAAGCTAGTAGGAGATCTGGCTTGTGAAGTCTATAAGACTTG -TAGCGAGAATGGCTTGTATTGAGGTAAGTGCCATAGATAGGAGCGTAGACTAGTACCTCA -ACAATCTGTAAGACTGCCTTCTGATCATCATGCGGGCCGAAGATACCAAGTGCTTCCCAT -ATCTGTTTAGTGTGTCTGATACTCATTCCCCGAAAAAAATTGCAGGTAGCTCTCGACATG -CTGGGAGATGATACGAGCGAAAAATATAGATAAAGATGCCCTGTTGCGGTATTAAGTTGC -TGAAGTGTTAGATTGTTTGAACGACGATTGATGCTGAGTATTGGAAGTTGACGAGAGACT -TACCGATTATTTAAGCCCCGATACTCGGCTAGCTCTTTGTGCTCGAACATAATGGCTTTC -AACCTCGGCATGTATTGTTGCATTGTTTGTGTATTATCCATGGTAGAAGATTTCAAAGGA -TCAAAGAAGAAAGAAACTCAAAAAGACACAAGGGAGTCAAACATAGTAGGACTAACCATA -CATGCTATAGGCTTGTTCTCCCGCTCTGTCGATATTCGTCGTGCAAAGAGCTAGATCATT -TTCCAGCTTGAGCGTCCTTAGGCCTTCAGTCCTTGGTTGCTTGATGGACATATTCATTGT -ACGGGTTATACCTAGACTATGCCGGTCACGTCGGTCGCTGGAACCTTGGAAGCCAATGAG -GCTGACTGTTTTTGAGGCTCAGCAGGAGATGCCCCTGGATCCGCTTCCACTCATGATCAC -CTACTACAGTCTCCAAATCACAAAATTTCTCTATTCTAGCCGATAAAAACATTGTAACAC -GAGTAAATCCTGCAGCAGGACCTCCACTTTTACATTGCGCCAGCAAAAATGGAGTTGCAA -GAATCACAGTTCCATGGTCTCACGTGGGTAGAAGAGACAATTTTCAGTTCGGAGCCATGA -TGGACTGTAGAGCCTGATACCGGGACTGTCAAGCAGACAGTACAGTCGCTTTGACCGTAA -AGTACGGTTGATGTGACTTTCCTCGCCCAGGGTGCATTCAACAAAATCTACCAACGTTAG -CATAGATGACGAACCGTTCATTATGCGAGTCACATTGCCTATCGGTCCACGCCACAAACT -CTTAAGTGATGTGGCAACAATGGACTATGTGCGCCGTACTACAAGCCTCCCCATCCCTAG -AATTATCACTTACGAACCTTCCCGAGACAGCTTTGAATGAATGCTTATGACCAAGATGCC -CGGAAAGCCCTTCAGTGAGCTTTGCCAGTCTCTTTCCTTCGACGTGAAAGTACGCCTAGT -TAGGGAGCTTGCGGCAAGCTCGGCTTGCCTTTTTCGAAATCAACTACGTGGAATCAAAAA -CATCTATGGGAAACTGTCACTTGTTGAAAGATCAACATCGAGCGAGAAAAATCTGCCCTA -CAGAGAGCTAGTCGATGCCAAAATATCAGATTCGGCAAAGGAGTCCGAGTCAGATGATGG -TAGCGCGGCCATGGGGAAGAATTTGGGAGCTCTTCTATCCTATGAATTCACCAAAGCCTT -TTTAGAAGGGACCTTGCCGGCTGCAGACCGGATTGTCTCTATGCAATTTTTCTGGGACTC -ACATATCCGTCAAGATGTCCACCGAGGCCGATTCCGCTCAAGTAAGGACTGGATTATAGC -TTGCCTTTCGTTCAGCGAAAACGAATGTCATTCGGCTCTAAATAGATATTCAGCTGGAGA -ACTTGATAGTGATGCCGAAGACGAAGCGGAGGGTGCGAAAATAACGCTACAAACAATCGA -GAACCTCGAGTCCCTGCTTCCTCTAGTATTTCGAGCGGATAGTCATGATCTGGAACCTTC -AATGATATTTCACGATGAAATCTCACGACATAGTTTGCTCCTTGATGATAGTGGGAAGTT -GGCTGGCCTGCTAGATTAGGAGTGTGTCTCAACGGTGCCTTTATGGAAAGCCTGTGACTA -TCCTTCGTTTCTAGGAAAGCGACCACGCCACTCAAAGCCTCTTCCTAGAAGCTATAAACT -CAAAGAAAATGGGGAGCCTAATGAACTCTCTTTTGAGCACCTATGGCATTATGAGACGAC -TCTCTTACGCGATAAATTTATTGATGAGATGAGAAGACTAGAAGTGGGATGGGTGGAAAT -TTTTGAAAGAGCCAGGCCAAAAGAGATTTTGACTATGCAGTGCAGCATCGTGATAGTAAA -TTATCTGCCCGCGAGATCGCACCATGGATCGGAGACATAATCGCAGGTATCAGCAGTCTG -CGCAGCTAGAGTGATAGATTCTGGTCAGCGTAAACAGTGCAGTGGAGCAATGTATATGTT -GTATGATGGCTGTCCTCTCTCAAGGGCCTATGCTCCATGTCAATATTAATGATTGACAAT -AGTGGGGATCTTCGGAATTTGTTGAAAAAAAAAAGAACTTGACTCGGACTGCCCTCGTTC -AATACCCTAGGAACCATTCAAATTTTGAGAAACTCACCACGCGGCTTCTTCTCTGGCCGA -TTAGGATAGACGATGTTTCTTAATTGTTCTGGCGACTTGTGCATCTTCAGAATAATCAAC -TGAAAGACAAAACTAATCATACCCTTCTTACTATCTTACAGAGAGGAACGATATCTATCA -AATTAGTTTCCTTCCTCATTGATTCATTCATATGTATTTAATTGCTTTCCTCTACAATTC -CACATTAAAGCCCCGAGCCAGGATAGTTGACTATCTAACGTGATAAGTAAAAAGGGGACG -GCTAAAGAGAAAAGAAAAGCAACCAAGCAAGCATGCATCCGGGGAACAAATGTGAATTGG -AATGCGGAGTGGATGAAGAAACACATTTTAATGTAGCTGATGACTCGGCACTCCGTACCT -TTTGTAGTATTTGGTCAGGTCCGATGTTCGAAATTTACTGTGAGTGTTGATAAGGGCAAG -CATGAACGAGACTGCTACTGGCTTGAGGTCTGTGTATGCTGGGGCGGTTAAGGCTGTTGT -TATTATGACAGGTGAGGGAAAGGCGGCTACATGCCCCTCAGGGAAACGATTCAAGAATTT -TGAGTCCATGTACAGATTATTACCCAAAAAGAGTGGCGAGAGAAGCAATCTCAGCCGTGC -TCGCATATTGCAACCCCGCCCCTTTAGTTTTCATATTATTCTAGGCAGGGAGCATATCTC -AATCTTGGAAGTGGATCAGTTAATTGCCATATTAGTGCGTTTTGCATTTCTCAGAACTTG -ATAGATGTGCATAGGCGCTTCGAGGGTGCTACTCTCCAAATATTACTGGAGGGGTTGTTT -GAAATTTGCAAATTAATGGAAGATCTGGAAGGGCCTCTGCGAGTAGGACACAGTTGAGGC -CTAAACAGGGATTATCTGCTTGTCCATCACATCCAAAGCCCGATTGGTTTTCGAGATGCG -GGGGCTAGCGTGAATATCAAATCAGCATTGGCACAACTCATTAGTCCCGAGATGAAATCT -TGCTATTAAATAAATATTTATATTTACACTATTATGATCAATAGATCTGCCTAGGCTTGT -TTGAGCCTCTTTGTGAGATCCATCTTTGATTCGTGGCTGATAACTCCGAGATATAACCAT -GCAGTCGCAAGGGCCAAATCAATTGAGATCGCGGAGCACCCCGCACAAGGACTGCTGGTG -GGTTATCAACTACTCCAGATGTGGCAGGTACATAAGGGCACGCCTAAATAGTATGCGATT -AATCGACGTCAGATGGTACGATGCACCCCGATTTTATTATGGAATCCTAGACAAGAGAAA -CCATATCAGGGGAACTTGCCTATGCCTCCACCAGATATGTCGATCTTCTCAAATACATGC -ATGAGCAGCTATATTGACATGTGTACTTTCATTTATGGTGGGAGGTATTATCTACCGTGG -AATAATGTCATTACATCTCACTCGTAGAAATAGCCAAGGCAATGTTTTTCTATCGATAGG -AGAGATTTTGTGCCTACTGCCGAATCATGGCTCGTGCTAGAACTGTTGCGGCAAAGTATG -TAGAATGTGTAGCCGAGGTTTAAACATGAAGATTAATCAGTTACAACACTCCACACGTTT -AATGAAGTTCATAGTTACGTTTTGCACAAGTCAAGAATACCTGTTTTTAATGGCAAATTA -ACAGAGTTCCTGTTTTAAATATGCATCATAAGAGTGTAATCCTAATCTAGTACCCCACAA -ATCCAAGGAAACGGCCTACGATGTACCATCAACCTAAATTACATTGCCATTGTACAATGT -AGAACGAATGATGAGTGGAATCTTCAATGATCATGTGGATGTAAGAACAGGTAATTATCC -ATTTTCTTACATCACCGAGCTGGCTAAGCCGTTAGTTGGTTCTTGTTTGTTCCATGACGT -CATAGTATAAACCCTGTCACCAAAGACCCTGCCATTTACTATAAGAAGCCCCTTCTGCTA -CAGCTAATTTCCTCCATCACACAATCCACAAACATTCTAATCAACCAACATCTGAAACTT -CCAAACTCATTTCAACAACCTTCAAACACACAACATCATAATGGAGACCGCCAAGGTTCG -CACCACATGTTCTGGATAACATTTTTGTTTCAAGTACTAATACATGACCTTAGAACGCCG -TTAACTACGTTGCTGAGACCGTCCAGGGCGGTGGTGCCGAGGCATCCAAGGAGGCCAACA -AGCATGTCGCCAAGGACTCTGATGCCAGCTTGGGCAGCCGCGCTAGCGCCGCAAAGGATG -CCGTTGTTGACAAGAAGGACGAGGCATCCCACAACACCAAGGCCGATGTCCACAAGGGTA -AGATCGCCAGGAAGCCAACATGACTGGGGAATGGAATTAATGCTAATGATATATATACAG -AGGCCGCAAAGCACTAAACGGCTACTCACTGAAATTGGACACCTGGGATAATGAGATACC -CCTTTTGTTAGCACTCGGGTGATAAGCAGAGGCTCTTTACCTGTACAATATTTGCAGATG -ACTTTACGGAAATGAATTACTTTGTAACATGCATATCTTGAATCCTGCGTACAATGGGCA -TGTAGCCTTGATCTCGCTTTGTCTTACCGTGACGATCTTCGTAGTTAGGTACCAGATTGA -CTTTCAACTATACTCAAACTTTTTAACACTATGAATATTCAATTGTAAGATGGATAGCCA -TCTTGATACGAGACAGATCTTGAAAACCTGGGGATTGAGACTATCTGATCGGACCCACAT -ATGACATCACCGGCATATGATACCATCTACAACAGCACAGCAAAAGGTATGTTGGTCACC -CCCTAATGACCATAAGTAATAATTCTGATAATGTCAATGGAAATAAACTCGATCTTTCAC -TTTTAACACTTGGGGTTAACCTTGGAAGCTGTGTCAAGGTACGTCATCAGCGATCATTAA -ATCGGCAATAGAGGCGAAAACAAGGACACAAAACAAACAATGCCGCCCAATCCTGCTGTG -TGCCAAATATGTTCACAAAAACACTGTCCTATAGCATACCTCGTCATCGAGTCCCAGAAT -TTACTTTAGTACAAAGACCTGCTCCTAGGTTAGAGTTGCCCCTTTGAATCCAAATCATCT -TCTTTTCTTTCGAGATTACCTACAAAACACTCCTGCTACTATCCCACTTCTCACACGAGG -AGCTCCCACAAATCCTCTCAGAAACAATGGCATCAATTATTACTTCTGTCAAAGACCTTA -TTACCTCCATATTTGAGGTGATATTCTCTGTCTTCAAGAGCATTCTGGATACCGTGTATC -AGCTGCTCATGGCCTTTGTCAACTTCTTCGCTAGTATTCCCAAGATGCTACAGCAGATGG -TGAAGGGCAGTTTGGAAGCTGCAGGTGGCGTTGGGTCATTCATTGCTAGTGAGTTTTTAT -ACCCGCCGATTCAGCGAAGTATTCAGCTAACAATTGGTGCTAAAAGGCAACATCGTTGTT -ATAGCCTTCATCGCCCTTGGTGGTTATGGTTACTTGGCTTATCAACGTCGTCAGGGTCGC -TCAGTGCAGGCTGGGTCCAAGAAGTTGAATTAAGGAAAAATTGGCAGTTTGATATTTCAG -CGAGAAAGTTGTTAAAGACCTGAGGATATATTACATGTCGACGAGTCTCGAGTGCAATAT -CCAGAGCTTTGCATGTTCTTTAATACGCCCACTGGGGCAATGCTATGCTATGCTTTGGCA -GTTTGAAAAATATTTTTTTTTGCCTCTCTGTGATCTTACACTTCTGTATCTAGTCCCTCA -ATTGTTGAATGCGTGATACCAACAGAAAGAATAGGCAACAACATCAGATTCCGTCGGAAT -CGAAGTAGTATGGAAGTGAGAAATACTCGAGAGAAAAGACGCGTGTAGATACTCGCGCAG -TGTAGTAATTGCATGGACGAAGACTATAAAACTGCATAGGGTGGACCAGTCCCATATTAA -TGACCGAAAACAAACTGGGATCATCAAAGAGTCCTGATCGATCATTCCTCAAACTCCCAA -ACTTGGACGCAAGTCTTCCTTCGATACCCTCCTTGCGGCGTGGTTCAGCACGACACCATG -CCCATCATCTCTACTTACCAGACAACAGAATTCCTCGTCAATACTGCTTTCCTACATGTG -TCGTTTATGCTTCAAAATATCAATTACAGTCCCTCCAGTTGCCATGGTCCGCTTTCTTTC -AAATAGGTATAATCTAGCGTGGAAGGAATGTATATCGCGTTGTTTTGTTAGAGGTAGGGG -TAGAGTAGAGTTGCCTTCTAAGATTGGAATAGAAATATTGTTAATAATTGCTAAACTTAG -GATCATGTCTATACCCCTCTGGGTCAGAAGTTAGCTCTAGTGAATATTGAAAACAGAAAT -ATAACCATAGTCGCACCTCGGGGGCCCCCGGGTCCTAAATATGACGGACCCTTCCATTCT -TTTTGAACACCACGCTGGCTGTAAATCCGGGTCCTCTTTTCGTTTCCACTCATTTTTATC -AAATCTATGTTGTGCTGCGTGTGGTTGAGCTGTCTAATCAGCTCTCTTCTTGCCGCGCAT -GACCTTGCGTCTTTGTGCAATCATGTGCGTATACAGGACATAAGATCCTGGTACGTAGTC -AGTAATGAATACTATGAACACTGCGAGAGAGAGGAACATACCAGGCACGTAGATCACAAG -AATTGCGATCAAGAGATAACGGAAAAGTGGGCTCAGCTCGCCAGCAGGACCAAGGGCCTG -GAAGATAAGCGTGCACTCGCTTGAGATACCAATGGGGTAGAAGATGTAGAAAGTGTTGTA -TCTAGTCAACGCGAGGATCATAAGCCCAGTGAACATGGTAGCCATGCAGGGAAAAAAATA -GACTTACCGCAGCCAAGTCCACCAGCTCGGAACCGAGTTCCCAGACAAGGTAATAGCGAA -GAACCCATAACGGATGACCTCCGTAACACCCCAAGCAAAAACACATCCGAGATAGGCATA -ATCACCCAGCTGGGTATTGCGCCCAGCACCAACAATCTCCCCACCAAACTGAGCCATAAT -CCCCCACACCAGCAACAGCCGGCTAGAAACCTGCATGGCAGTCGTCATGAACGGCGCGCG -CACGAGGCCTACCAGGCTGTGCACAACTTCCAACAGGGCAAGGGACTGCGTGGCCAGAAG -GAGTGGGGAGAAAGTCTCGCCGAAGACCTTCGGAAGGGTTTTGGTCGAGACATGGGTTGG -GAGGAGGGATGCCGTATAGACGACACAGGTGCCCCAGGCGAGGAGGTTGAGGGTGTTGTA -GGTGAAGAGGTAGGCTTTTGTTAGGCCCGAAGAGCCGGAGCGCGGGGCGCTTGACGTTTT -GGCTGAGGACATTTTTGACCAATATTATTCTTCCGTCGGTCTCGTGGAGGGGATTGGTGA -TGATGTTGCCCTGGATGGAGGAGCTAAAAATGTTGATTTGGTTGGGAAGCTCGCGATCAA -CAAAAAAAAAGGGACCGAGGTTGCCGGTTCGTTACGTAAGCATTCTGGAAGAGCCCGCGA -AGAAGCTGTGGAGAAAAAGCGTGGGATCTGGGGTATAGTATATCTTTATTTTGTTGTCTA -CTGAGTTTCTTGGGACAATTTATGCATAATCTTATTGGGAGTATATCAGTCTCACTATAC -TTCTGATATTACCAGTCGTGAGATGCATCTTACCTAGCCAATCACCTTTTTCAAAGCATC -AGTCATACATTCTCTTTGCCCCTCACCCCGAATATAGTCCGTAGCCGGGGTGTCGGACCC -GGTTATAGCTTCTCTGCAAGTCTGGACTATGTGGACTATTTCAGAACCTCTCTGCCGAGG -AATAGATTTGATGTGATTCGTTTAAGGCAATTGAAAGTGCATAAAAAGGCAGCCAAGCGG -ATTACTTCCCCTGGAGGCGCTCTTGGCTGCCAGGGATTCAAAGGCTAGCTGATCCCGTGG -TGTAAAACTGCATGATCTAGTCCAGGCTACTGCAAGACTCTACCGGTAATCTCGTATTGC -CAACAGCAAACATGCATAGTCTGGTCCATTTTTGACTTGACTGGTTTGGGCATTTCCGAG -AGGCTAAATGGATACTAGAGGAATTTTTGATGTGATCGCCTGGATTTGAGATATCAGTTC -ATTGCCATATTTTGGTGAGCACGTGGACTGAGCGTAGTCACCCCACGTTTATCCAAACTA -ATTCAATTCATAGTCCATTGATCCTGCCGGTATTTAGTCTGATGTATGTTGTACATTAAC -ACTATTGAAGGTAACTAACTCAATACAACTTTACATGTCTTGCATTTGTACTCCGTACTA -CAGTCACTCTCATGTTGTACATAACCTGGTCCAAGGGTGTGAGATGACATCCCATCTTTA -CATAGGTCTCAATCTTAAGAAGAAAAGTAGAGACTTGACACTACAGAATGTAAATTTGCA -CCTTACAACGGGTACAGTAGAGAGTGAAGCATTGGGTAATTGACGTAATCGGCCTAATTT -GCCGGTACGCCAAAATGACATCAGCGGCTGGATGAGCTGCCCCATCATATGATGTCATGG -CTACCATCGCATCTTCTCATTGGTGGAGAAGGAAGAAGAGTGTGCAATAGCCAACCACCA -CACACAAAAACCAGGGCCAAGGAGATGCACAAATGCCCCCCTAATTTCTTTATATAAACA -AAGATAGCTGCGGTTCGCGAGTGTTGATACAACTTCGACACTTGATATATACACCAGGGC -TTTTTCTAAAGAAACCGAACCGTTAAGCCAGGAAAAAAAAAGCAATGAGAATGAAAGAAA -AGCAACCAAAACACCCATCGTCATCGGTGACAGCATAATACCGCGATACCGTGTGTCTTA -GACCAGCAATATTACCCCATTTTCCCTCTTAGAGAGGAAAGAAAAAACGAGCCGTGGAGC -CGTGGGCATATACAACTCACTGTCACCGAGTACGTCAGATGAGAAATCTATGCAACACAC -AGAACGAACGCGTCCCGATTTATTGGACACGGTCCTTGAGGGTCCAGACCAACTGGATCA -GGTTATCTCTGCGCACTTGGCCCAAATCGACGCTTCCCTGCTGTACGAGCCGGGATTATT -ACGTCTTTCTCTTTTTTTTTCTCATTGCGGCTAAAAGTCTACCAGGACACTACGTGTTTT -TTTTACTGTTTTTTACTGTTTTCCAAGCAGTACTGACCGGGTCATGAAAAAGTCCTGATC -CTCTCTTGCACTCAAAGAATGGCCCCTCAGCGACGATAAATAAAGGCCATCGCTATTTAC -AGTTTCCCAAATGGTTCCGCCGTGTGACTGTTCGAGTTCGCTTTGGGAAAGACCAAATGA -CGTTCCCTGTGATCGGCGGCCCGATGATGTCGTCTTGCTGGCACCCCCCTACTCAATGCC -TGCGAGATGGGTATATAGTGTATGAGTTCTTTTGCGGTCAGAAAACGTGTGATTTTTTTT -GTATTGTTTTTTTGCCCTATCAGGCTCCACTAACCCTGTACTATTCAGTACACTTATAGG -TCAAAGGTTTATCCTGTCATTCCATGCTTTTTTTTTCTTCTCGGTGGACCCATCAGTTGG -AAAGGTGATGCCACGGTTAGACTCCACTGGTCTCATAAACGGAGCCCGGAGTAACTCAAT -CCAGAGATAACAGTTCTGCTGAGAGATCATGGATGAAACTTCAAGATGATAATGGTACGA -TTTAACCAAAAAAGGCAAGAGTATAGTCCAAATGACCCTATGGAGCACTTCGTACAATAT -TGATGAATAGTGTATGTTCAGTATTGCTTTTTTTTTTTTCGGAGTAAAGGAAAAGCAATA -ATCTCACGTTTTCAGATAATCGAATCCAATTAGAAACCATTGTATAGATTGTAGCTAATA -ATAATATAATGCACTAGCTCTACAATCATGTTTTCTCCCCGAGTATGCCAAGGCGAAGAG -CAACAGTACCATACTTGAACATTTGTACATCATATATTACAATATCCAAGTGAATATGAA -ATGAACTGTTAGAATGTACATATATATATATGTAAATTTTTTCTCCTCTGATATTTCGTC -TAATTCTAATTCCCCTAATTTTCCCATAAAATCCTACCAAAATCCCAACATCCTCTAGTT -GGTTCCTTTGGAATTGAACCCACCCTGctctctctctctcttttctccgcccctccgatt -tcctcccccctctctttctctcttctctttgatttcccctttctctctgtcttcttttct -ttctTGTCGCCTTGCTTTTTCGGCTTTGGGATTTATTGTGTCTCCTTCGGTTTTTCGGGA -CGTCTCAAAAAAACGAGCAAAAATTTCTCATAGGTGGGATGAACATTCCTATTTTGCCCC -TCTCTCTTTGAATATCCTCCTTTTTTTTTTTCCCTTTAAATTCCCTGTCAAGTGGAGACC -CTGGCTTTGGGGGCTGCAGCTCTTGCGATGCACTAGCCCCTGTTGACAAGGCTGTGACTG -GATCACGTTCGAGTACATTGCGCTGAATGGTTCTGTCTGTCCCATTTACCACATATACCT -CCGCTTTAATTATTTTCCCCCTCCAAGCCCTGCTTTGTATCGCATTAACCCCCATACCCC -CCCTTCAACTTGTGATTTTCTTACCTCTCACTATCCATTACTGGTCACTACCTTGATTAC -TGGATTTGGTGGCTGTATTTCGCCTCAGTTCCAGCCGCTTCCCCTTGACCATAGACACTG -GAGCCGCTCACCATTGTCCCTGTTAACACCTTGCTACTTTGGTGCCTGGATCCGTAACAC -CCTGCTGGATTTGTAAGACGACATATGCGCTGTGTGCAGATTTCGCGATTCTGACGTCCT -TGTTGTATCATTGATTTTTGCATCCGGGACCTCGTTGATTTGCCACATCGTTTTATCTGT -CCAGAATCTGGATCTGTTGGTTGAATCTGAGCTGACTGAGATTTGCAAACAGAGCCCCTC -TCGTATTCTATCTGGATTGGCTGAAAATCAGACCGGAGAAATCAACCCCGTTTTCATTCA -ACAATTGTGACGATCTGCGCAGTGCACTAGCTTGCTTCCACCTACCCCTCTAAGCATCTC -CGCATACACACATACACAGCCATGGCACCAACCGTTACCAAAAAGGCCGAGCCGGTAAGT -GCTTCTTGTTCCGCCACCAAAGGTTCATCGGCCTGACATTCGAATCTAGGTCAAGCAGGG -CCCACCCAAGCTCTCGGATTATAAGGAAATTCTCGACGAGAGCACATTCGAACAGATCCT -TGAGATGGACGATGATGAGGAGGATCGCGATTTCAGCAAGAGCATCGTGTACGGTTTCTT -TGACCAGGCCGAGAATACCTTCAAGAAGATACAGAAGGGAATGTAAGTTTTATTTTCAGG -AACTCGGGTGATTGTCTCCTTTTACTCATCATCCGACAGAGATGACAAGAATCTTGATGA -ACTTTCTGCCCTCGGTCACTTCCTCAAGGGCTCGTCGGCCACCCTCGGCCTTATCAAGGT -CAAGGACGGCTGTGAGAAGATCCAACACTTCGGCGCCAACAAGGACGAGACAGGCCTTAT -TGACGAACCCGATGCGGAGGTCTGCCTCAAAGCCATCAAAAAGACTCTGGATGAAGTCAA -GGTCGAATATCGTAAGGTCGAAAAGCTCCTTCGCCGGTATTACGGTGAGGAGGTGAAGGA -TGAAGAAGAGAAGCCGGAAGAAGAGGAAGTCAAGGAGCAGAAGGAAGAAAAGCCCAAGGA -AGAGCCCAAGAAGGAAGACAAGGAATCCAAGGAGTCCACTAAGGAGCCTACTAAGGACGC -CTCGAAATAAGCGGTCATTCGGGCTCGTTGATATCCCTCCCTCCCCTTTTTCTTTTTAGC -GACGGATGGAGCATGGCAAATGGCTttttcttttctctcgtttcctcttcttttcttttA -CCCCTTTGCCCTTTTTTCACTCCCTTTCTCTTATCTCCCAATCCCTCCGATCCTGGCTTT -TCTAGCTTTCTTGACACGTACATAACAACCTCCTCCACTATCACCATATCTCGCCTTTCC -CATCCGGACACTGGTGTGGAAACTGATATTCCTGGTCTGTCACGTAGGTGCAGGCGCGAG -AATCGTGACGATTCCAAGGAGTCTTGAACAATAGCGACATACGGAAATCGGAGTGCCTCT -GTCAGGCCCTGTTTTCTTGCTTTGTAGCCTGCGTTTTCCATTTGGATCATCGCGTAGTAG -GCATTGCGTCAGTATCGAGGTACGTGTAACGCGAGCAGTAGCCGTTGTGATATGATACAG -TCTAGGCGGAAACTTTAACAAAAGTGTCTGAATACTGCAAGAGGGAGAACTATTTCCCAA -ATCAAATCGCCAATCTCCCCAAGCATCCCGAGAAGTGGCGCTATAAAAAGCTTTTTACCT -TCAGCCACTTACGTGGAGACCCCCTGGGGCCCGAATAAGCTTGAATCAACCTGGGAACTT -TATATATACTTTGACCCATGCGGGAGGGATGGACCTAATTTTGGGTCTTCACTTGACAGA -TTATATTGAGCTGCTGTTACATCCAACAGAGAGAGTGTGTTTTCAATCTTACGTTTCCTC -ATGTTTATCCTGTCTATCTGTTCATAGAAACCATCAACTAGTGAAATTCGATCAAGTCAA -ACGTTGTGGCGTGCGTGTATGTCTGTGCACTTCACCAGGCCCGATCCTCGTATGCTGCAC -TTGACAATTAAATTAGTATTTTTCAAAACAACAGAATCTATGTCCAATGCCTACTCCTAC -TGATAGTAAACTGTGCCGCGTCTGCATCTACATTCGCTCCTATGGCAAATAGCGAATGCT -AAACAAGTCCTGGGGACCAACAGTGGTACGGTCACACATTGCTGGAACTGGAGGCTGACT -GGATTGATTATCTGGTAGCGAACGCTTTTCAAATCTCTAGTCATGGTTGAGTATGGCAGA -TGCAGATGCAGAGGCAGAACGCCCCCATATTCAGGTATATCCTCGACCTGATCATTCTGA -TCTGAGCTCGATCTCTCAAAGTCTGTGCTGATATGCAATGCAGCAGCGGGATTATGCAGT -GTCATTCAAGTCGCGGTGCATAGATGATCCGACTTGCGTGTCCATCGCCCTGTGCACAAT -GTGACATCCGTTCTCTAATCCCAGCACCAGTAAGAAAAAGCTTCGTCGAGCCGGCGACTG -GACGGTGGGACCTTCTGTCAAGGACCGTTTATGTTTACTCTGGTCACGACCTGGTTGCCA -CACTAGCTTTCAGCACATATAgcagctgcaacggcaagctgcaacggcatcttcaacCGT -ATTGAGCTTATAAAGGCACTCTCTTACTCCGAGTCCGCTCAAAAAGCACGTATCTAACAA -ACACCATCCCGCTGTGCTTTTTGCCATGTTGCTCGTCCCATTCAATATTCTTCCCATTGC -ACATGAATCCATCACAAACGAGAAGATTGAAATAGAGTCAAGTCATCAACCAGATCCCCT -TTTCCCGCAACGGGACGCAAATACGCGCTTACCATTGGGAAGTaaaaaaaaaaaaaaaaa -aaaCCGATGCAGATGCAAGCTACAGACCCGATCTGGGTTCCGGTCTGTCAAATTTTGGCT -CAATTGGATCCTGGCCTGGACTCCCGCCTGGCAGGTCACACGACATAGCCAACGAGCCTT -TTTGTTTTCTGCAAGATGCCCGCAAGCCGATGGGTAGGCACAGGTGCCTTATTTGGCTTT -AGTTCCCGCTAGCAAGGGTGACTGAATGTTTCGCTTACTACTGGGTTGGGGCTCAGTGGG -CCTCGATACCGAGTTCGGGATGTACAGTTTGATTCACCTAGCTCTACCCATACATTGAAT -GTTCTTGGTTGCGAATATGGCTTTCCTGATTTGGTTCTGTTAAGCTAGAGCTGTATCGGC -CTCCCTCATACGGATGCGTACAGATGTGTGGTAACGAGACTAGCACACAGGGATACTAGC -TCTCCCTGTTGCTAGTCGGACGGATAGCGAGACGTGCATCCGGTAGTGTGTACACTTATA -CAATATCCTATCTGACCTCGAAGACACACCCCTTTCTTATGCATGCAAGCATACATATAT -CTTCTACCCGTCTGTCTCGGATAGCAGACCTGAGCTCAACAAGTCACCCTCGACATATCA -GAATGCACTCTTCGCGCTCAGTCCTCGGCCCTCAAGTGCTGAGTCCACGATGACTCTGTT -TGCGATTGTCTCGCTCTCGCTTTATTTGACTCGTTACCCTAGCCCGCAAATAATCCTTCT -CAAGGGGACGAGGGCATGGTAGAGACCAAAGGAGGTGAACTTTGTACTACTCCCCTCCAC -TCCTTTCCTCCCTTTCTCGCCTCTGGTGTCAATCACACACCCGCTCGTTTGCCATATCGG -AACACCAGGCCCCCAGCCTACTTCTCACCGAGAAAAACTGTACCACAGCGCCGTAGCCCT -ATTCCCTAAATTCCAATGACCCCCAATTGTTGAGATACCCTGGACACGCCGAACCACCAA -AAACGAACATACTCCAGCCCGGATCGCAGTTTGAGGTACTCCGGGGTTGGATATAAACGG -GTGTGGGATATAAACGGGTGTGCAGTGATCGCCCACACACACACATTGTACATGTACCAC -CCTGACTCCACGAATCCTGCAAGAAGTACAGTATCGTCTCATCCCGGAAAGAGAATTGTA -AGCGCCAGGACAAAAAACCCAAAATAGTAAGAAAATAACTCACTAACAACACTAGACTCG -TGCCCTCAATCTTTTATCATTTCTTTGGCCATGCACAAAAGAAGCAATCGTAATCTTCAC -CTGAGAGGTAAGAGGTGTCTCAGCCTCGAAGCACTTCCTTTAGATCCCTGTCTGCCCCGG -ATCCTTGTACTTGTGTTATATCGAACTGATTGCGGTTTTGCTCTTTGCCCCATTTCCCCA -TTTCCCCGCTTGGTCTGATTGCGTCTGTTTCTGTCTCTGTCTCGCGCTGTCCGTTCGCTC -TGCTTCATTCCGGTATAGTCTCTGATTGTATCGGCTTCGGCGCCGTGTTGCAGCCAACGG -CCGTTAGATGGGTTTCGGCGCCGATGAGATCTGTTTGGATGGGAGTGCTCCACGATATAA -AGTCTGGCTTTGGTGCGAGCTGTAAATACTATTCTTCAATTTTGCATTCACGTTCATATC -CATTGATCGTTCTTCATTCTTCGGTCCTCATCTCACGGCTGATATACAACCTCGGACTCG -GATTTTCGATCAACCATAGCATATATAGCGCCCTACATACCGACACCGTTATCCAATATA -CTCCTCAATTTAATCGAGAGATTCAACAAAAATGCCCGTCGGACCTCGAGTATCCAAAGA -GGAATTCATGCATGCATTGGGTCTAAACCCACAAGACCCGCATCATGAGCAATACTACCG -CGCCATGCGAGTAAGTATATCAGCCGAAATTTCTCCACTTACCTTCTTCCCTCTCTCTCC -TTCTTCCTCTCCCGCCGTCATATCTCCACACATGCCACGTATACCCAACGAGCCTCCCTT -AACAAGCTTAATCACAGGATGAAGCGATCATCGTATACAACCGCATGAACCTAGATACTT -CCAATCTCCTCGACAACATACGCGCCGACCCAGCAACCCGCCCTCCGTTCTTCTGGCACC -ACATTCGCCCTGACTGCCAGCGCTGGGCGATCCTGGAGATCTGCCACAATGCCCCACCGC -TCGTGCGCGGCTTGTTCGAGCGCGGGGCGACGAATGGCGAATACGGTCCGAATTGGGTAG -CCGGGTGGTTGTTGTATAGCGTTTTTCGGTCGCGGGATGTAAGGAATAATCGGAATAGGA -GGAAGGGGGAGGTTGCAGGGTCTGCGGGTTCAGAGCTTGGTATGTATCATCTTACTAGAC -TGGTTGCTGTGAGGCTGTATTGGCGAAGCAGATGCTGACAGATGCTTATAGATAAACGTA -CGAGACAGGAAGAGACGACACAACCGAAAAAATACTATGATCCTGTGCGCAATGGGACGA -TATGATATTTTGGGGCTGAGGGTGTTTCTTGGGAATTTCGTGTTACTTTCGTTTCATATC -CAGTTCTATTAATACTTGACTGATCCTGTATAATTGAAGTATAACACTTGACGACCATAG -ATGTAATGCAAAGTACATACATTTAGGGAGAATGATGAGTAAATCGGGGCTTTAATCATC -ATCCCTGTCATTCTTATCATCATCATCCTTCTTCTTTTCATGCTTTTGCCGACGTTCTGA -CTTCAAGACCTCAGTGACACGTTTGTCTTGCTCATCAGCGCGCTCATTCCACTCAGCATC -ATACTCAGCCGCGATGTTCTTGATTTCCCCACGAAGTGAGTTTCCCTCGTCAATCAAAGA -CTCCCAAAGCTGCAGATCAACGTTATGCAACTGATAAAGCTTCTTTTGCCTCGCACTGAT -CGTAAAATCCAGCGCAGACTTGTAAGAAATCTCCCGATTGTGATACAAAAACCACAAATA -GCCACCCATCAAAGTAGACAAAGAGACCAAGTAGGTCACAGGCTCCATAGTATCCCACCC -GAGATCAGTCTCGAACGTCAGCCTGTAAACTGTGAACCACCAAATGGAAAGAATTCCAAA -CCCGCCAATCGCAACCCGCTGTGCACCCCGATGCGCCAATGAATCACACTTTTCCTTCAC -GTCGGCCATGGTTTGGATGCGTCGCGAGAGCTTGCGCAGCCGCATGCGGAGGTAATACGT -GCGCTCGTTGAACGATGGCACCGTGACGCGGATCTGACTCAGACCAGACGGCGCACCTTC -AATCGTCACGATGAACTCGCCCGCGCGCGAAGCATCCCGGATGAAGTCGCCTATCTCTGT -GGCTTGAGACCAGCGCACGAAACGCTCAGGTTCGCCGTCATGGGGCGGGGTGCCGTCCGT -ATTGCGTGGATAGCTGTAGGTTTCGACGTCTTCTTCACGGGACCGACGGTGCTGGGTGGG -TGATCGGCGGGTGTTGAGGGGTTTGCTTCCACCAACGCCGTCGTTGCCGGGGATGAAGTC -TTCTTTGTCTTGGTTTGCGTCTACGTCGATCTCATCTTCGATTCGCTTTTTGGGTTTGAT -TGCATTGTCATCGTGCTGGAGTGCGATAAAGGATATCGCTGGTGGGCGGAGCTTGTCAGT -GTTGCCTTTGATTGGAGGTAGTTCTGACTGGATCAGGCGTTCTAGGTATGACAATGGTTG -CTGAGGGTGGACTAGTATAGCTATGGGTTCGATTTCTGATTGAGCTGTTAGTTAATGGGA -TATTAGAACGATAGCCAAGGAATGCCCGGAGAATATGCTGAAGATTGTCGTACCTTTATC -GTGGCCTTCACTGCCGATCGTTGTTAAAGGGATGATCAATTTAAAGAGCCGGGATGGCGT -CGTCAACATTTTGCCTATCCCCACAAGTCAACATTGATACTCGAAGCGACAATATCACAC -ATTGTTACCTTTTGTCATTCTTTCAGGCCACTTCTGCTCTTCCTGTGACCTTCCCTCATT -GCCATTTTCACTAAGAACCGTCCGCCGAGGCCGAGCTTCTTGATTACCCTCTTGAACTTC -AAACTTCTCTAAAAACTCTTCAGTCTTCTGTTTCTCGCCTGTTGTTTGTGTGTGATCTGC -CTGGCTGAATTTTCTGGTGTGAATCCATGTAAGGTGATCTCGTGGAACTGCAGCCGGCCC -AGCACATGCCGGCTTCGTTATCAGTGATGCGCCACAAGCTCCGAGACCAATACTCCTGCC -GATACATCGCCCTGAGAATGAACTGATATGTCGAGAAGAAACGGCAGAGTTGTTCTTAGC -CCAGCCATTTGGCCCCGCTGAGCCAATATTCCTTGACAAGGCGTACTGGGCCCCACGTTG -TGATATCCTCCCGGTCGCCGCTGACACGATAGGGGCGCAATTTCGTGCCGGGAAATGCAT -TTCGAAAAGGGGTAGATCCTCTTGCCCTTCAACTGGCTATTGTGAATGTTCAGGTTGCAT -GTAAATTAAGAAAGAATATGTAGTCGACCGTGAGTATCGGATAAGTCTATGTTTATGTCG -CGTAGGAGATTCGAGACATGACGAAGCGGCGTTTCGAGAACACGTATGGAGAAAATATGA -TGTAGGAAGGCCAAGATTATTTCAAGCAATAAGCTAGCCAGGATGATCCATGCACAAAAC -ACAGTAAGTTGGCGTTATGCCTTGGAATATTGAGTATCCTGACGTCAGAGAAGCGTACGG -GATCCATCTGGCATTGGGGCGGTGTGGCTCCGACTGTGGCAGTGCTGGATCTATATATCC -ATTGTCCCATCATGGATTTCATTGGAGGCATTTAATATGTCTTCTTTCAGTTCTATAGAG -ATTTGAAATGGCAGCAAAAAGCATCGACCATACAGATGGCTGTGTAAAACCAGGGGAATC -TATGTGTAATAACAAATAATCTCAAAGCTCGCGCATAATATCCTGCACAGCTTTCGCAGC -AAATCGTTTCCGTTCGCTCTCTGGCAGCTCGGAGCTCATATCTATTGAAATGTTAGTACA -CCGGTCAACCATAGCGAGCCCGAGAGATTAAACAAAGCCATACCTTTGATTGCCTGCATG -CGCATCATCAAAGCCTCCATGCTCTCGACATCAATctcatcctcgacatcatcatgatca -gactcatcttcatccccatcaccacctccacgctcaattgccatACGCAAACCGAACATT -TCGCGTTCTAGCTCATGAACCTCATCACCAAACCCACGAGAGCCATCCAGACCCAATAGT -TGGTCTTGAAGATCATCATCGGAATCCATGTCAGGATCATTATCGCCATCCAAGCCAGAA -TTATCTGCGACCATCGACCAGTCATGAGTCTCAAGGACCTCTTTGATACGAGGCATACCC -TCACGTTCTAGTAATGAAGATACAATCAGCCAAAATCCAGGATCCCACCACAGTGATGCC -CCTGAGAAACAAAAGCTTGCTTCAGTAACTCACCTCCATACTGATTCCTCGTCTCAGTCC -CCAAGCTCTCCTTAGGATCCCATTCAACAACCTCCCACCCGAGCAGCCCCATATCAAACA -ACTGATCTTCCCACCACCCTACCGAAAACGGCACCATCTCTCCACCACCTAGATCCGCAT -CATCAACACCAAGCCCCAGCTCTGTATCGGAATCCTTGGGTCTCTGCTGATTCACACCCC -CAGCCGCAGGACCCTTCCTACCCCCGATCAACACAAACACACCAGGAACATCGCCAAGCC -CCCCGCGCTCCTCGTCAATGCGCTCCTTGACAGCGCCAATACCACGCATCAGGTCACACA -GTGCACGCACATCAGGCCGTTCAGCGGAGTCTGCGTTCGTATCAGAACCCGGTGGAGGTG -TCGCATCGCTGGGAGCATGCGCGCACACAACGAGAGCCCCAACAGCCTCACGCACAATCT -CCGCTTCTGTGCTTAAGAAGTCGGTTTTCCATTGCTCGACTGTGGGTGTTGATGTGGTTG -TTTGTGCATCAGTATCGTGGATCGCGAGTGGGATCTCGTCGACCCAGACGGGGACTTCGG -TTGTGTAGTATTTTGTTTCGAGACGGAGCGGGCTGTGGGTTGTGTAGCCAGCAAAGGTGG -TTGTTGGTGGTGGGGGTGTTGAGCTTGAGCTTGACGTTGGCTGTGGCGTGGTTGGTGAAG -GGGCTGGCTGTTGTGGCGGGTCGAGGACGGGCACGTCGGTTAGGGAGTGTAGGAGTGGGG -GAATTATTGTGAGGGATTGGGAGGTTGGTGCAAGAATTAGGAGGCGGCGCGGGTTGGGGA -TGTTTTTCACTTTGGGCTTCGCGGAGGGTGTGGGGTTGGATGTGGGCGCTGGATTGGTCA -TGATGGCTGTTTCTTTTTTTTTGGAGCAATGGGGATGTTCCTTAGAAGACTAGGGTCAGA -CAATGCTCGGTTCCATTGGCAGTGCTCGGTGGGAGAAGGTAGTGTACGGAGAAAACTGTC -CCCCGCATCGACAGAAAAAGGCGGAGTGGACAAGAACGGATAATGTCCGAAAACCTAGGC -CTTTCCATTTTAGTGCTCACCTCTCGCCGCTTTTTGATTCTTAGACCAAATTTGAGCACT -TACTGGATACTTACCATCAACGCCTACTGGTATATCTTTTTACAATATCTTTTTTTTTCC -TCTCTCAAATTGTTGTATCTTTGGCCCCGTGGAGTGGGCTTATCAAGTGGTATTACGAGT -AGAGGACTGCAATTTGAACAACGGGGAATACTATCAGTCTACCACAGACAGTCAAAGCTG -CGGTAGAAGGGAGAAGGGAAGAACAAGGAGAATCTGTCATGCATTTTATGGAGGCGAGTT -AATTGATCAAACGTCTGGCCATGCAGAAATCTTCTATGCAAAGAAGTGATTCATCTTCCA -GCTTGGCTAATCCTCGAACGGGCCGGTCTCTTTATGTCCTAATATCAGGAATCATTTGCT -TTGATTTGCTTGAGCTATGGTTGCCAATTTTTGCTGGAAGTATCATTCCGGCGAGATCTC -CATCCACGGAGGATTAGTTCTTCCCCCCGTATATTGGATAAGATGATATATACCTAAGCA -AAGCTCATCATCATCCACTATATGGTCCCTTGCAGTCAGTTGGACATTGGGTGGAGCACG -TATATATCTGCAGTAAAGAATTTGCTGGCAACACCCCCCTCCAAATTGTTGTAGAGGAGC -ACAAACTTGCAAAGCAATCTCTAAAACAAAGGCCTAAGAGCCAGGTCATCGCGAAGACTG -GGTGCTTCCCAAGCTTGTCCATAATTCTTGAGAAGTCGTTTTGCAGGAAATGTCCGTGCA -GGGGGTGTTATTACAAATGTCAGCGGCATACTGGGAGATGGCCAGATGGCCAGATGAAAA -GTGCAAACAGCGGACCTATAAGTTGCGACATATGAGACCGATATAAGAGGTGCATCATCA -CTGCTTGTCCTTTATCGCAGGTTTGAGAAGCCATAGTGCCAGCCTATATGATTTAATCAC -CGCTTTATTCTCGCTTGTGTACCTAGACCTATATACTTCTTCGTATACAAACTAGAGAAG -TTCCACGGCAATATAGATTCTAGAACTACATAAGCCGGTACATCCTACTTGATAGATTAG -GAAGATAAAACGGATCAAAAAGTTTCTTGGGCCATGAAAAGCTCTCGTTATTCTTCGCTA -CCTAGTTACCTGACAGTGTATGTCTTCAAAGGTTATATATAGCACCCAAGTAAGACTTAC -ACGTTTAAGCGGCGTTGAGAATCATTAGCCTTGATCGCGAGTGATACTAAGTAGGTTATC -ATTAATCAACATCTATTATATATAGATGTATAGATGCTTACGGTTGAGCCCTAGAAGGCC -CGGGCCTCCGGGGCGGCGGCGGCCCCGGCTGTGCCTATGTTTAGCCAATTTTCAGTCAGT -CAAACTCGGGGCTTAATTGGCACAATCGCTTGAGCCTAAAACCTGCCATAGTCCTTTCCT -TCTCTCACTTTTCCATTCTCCTTAACTCTCTGTTTTGGTTTTGCACTTATTGATGTGTGC -CTCCAATTTCGCACGCCCAGTCAAACAATTCACCTCTCCCACTCACCTTACCCCATCCTG -CTTTCGGTCGTAGGCTGGGGCTGAGCCCTTGTGGCGTACACATTCTACTCTTGGGATTGG -AATCATTTGGCTTCTTCGCGCTGATGGCGTTGGGTAAGGAGGTTTCACTAACACAAACTT -CATACTCTACTCGTTTTATAACAAGATTGTCCTCTGTCTGCTTCATCGATTATCTTCCCT -CATTTTAGATTCAATTATTCGCCACCTCGCCGATTCTCCATCCTTATATCCTGCACATGA -CCCGCTTCTCCGGGGCTGTCTGTTTGCGAATCCTGGTCTGTCATGGCCTGGCTGTGTATT -AGCTACAAGGACTACATCCTATCACAGGTATCTAATGGCAACAAGTATCTAATGGAAAGA -TACATCAATGACGATCTCCAAGGTTCTTATCTAAGCCTAGCCACCGAGCGAGCAGGCAAT -GCTGACCAGCGCCCTTTGCCTCTCGGCCCCGAAATTTACTAAGCACCGCACTCCCGCCTC -ATCGTACTTGATCAATGATCGTATCAAGCTTCTTATCTTGTGGCTTGGCTCAGGTAGAAG -CTGGGGGCCCGGAGGANGGGGGGGACTTGCACGAAGAGTTCACGGAAGAAGTCAATAACT -TATACACGTGTGTACCAACTATAATAACGCAGAATTACCAGCTAATCAATAATGCTATAC -TAAGTGAGGCCATTTGAACATGGTGTTTGAGTAAATAATCGGTAAATCAGCGCGACCTTG -AAGCTAAGTGCTAAATTGACAGGGGTTGACTTTCTGCGCCCAAGCCATTCCCATTCTGCT -TTGCAGGCGTGCTATTCTGTCGCTTGTCCCAAGCTGGCAGGTTCTGTCTTGCATTTTGAA -TAGTGCTTCGTTGGGATAGGAGAGCTACAACGTCCCCAGTGACGACTGTAAAAGTCTTAT -CAACCCCCAAGGCCCAGTTCTCATGGAAGCACTGGCCTGAGTTGGGCTTTTTATGCTTTC -TCAAAGTCTCTTCATACAACCGCATTACTTCCCGAAACTCTGGTTCCTCGCCCCACAGTT -CATCCAGTCCCATGCTCACACGTAAGTCGAGTGTGCCTACCCACACGGCATCAACCTGGT -CCCCTACCGAGGTGAGGATGGCGTCGAGGTTTTTGACTCCACGCTCGGACTCAATTTGGA -TGATGACTGCGGCTTGGTCGTTGACGTTTTCCCAGATGGACCGCGACGCATCAATGGCCG -TGGCGCTCAAACCAGGTATCAAGCGGGCGGGAGGGGCCGAACGGCGTCCATTTCGTACTG -CGCCGTATTTGGCGGCCGAGATAATATGCTCAGCTTGCTCGACAGTGTCGACTTGCGGGA -CGACGATGGAAGCCCCTGCATCTAAAGCAAAGGCGATGTTTGAATGTTCATGACCTGGAA -CTCTTTGAAAAACGTCAGCATGGCAATTTTTATTGCAGCCAAATCTGCAGTTTGAGCACT -CTCACCTCACAAATGCCATGCATCTCCCTTCGCTACATTCGTGGATCGCGTGTACAACCT -GGAATGACGTAAGCCATTGACCCGTTTTGAACGTGGAGAGAACAGTCGCTCACATTTGTC -ATAGTTTCAATACCACACGGAGAATGCTCCCAATCCACCCAGACGAAGTCAAGACCCAGC -TGTGCTGCGATTCTGGCAGTGGGAACGGTAGGCAAGGCTAAATAGAAACCAATCAGGGGG -CCGATTCTGCCCTCATACGCATCGAGGAGCGCCTGACGAGCCTTGTGAGGCTGAAGAAGC -GATGGTGCTCTATAAGCTTGCATACCCCTAGTGCTTACTTCTAGATGCTCTGATCCTGTT -GCATTCATGATGTAGAGGACTAGGCAACGTTTCAAAAAGTGTACGATTAGATTGAGAAAA -TGATATGGTATGAATGGTAGGAAGAAAGTACCATCAGTCATGGTCGACCATGGGGTCTTT -ATAGTTTAGCCCCCGATGCTTCTTCCAATAGCCAGAGTTCCTCACGAGCCAGGGGCTTTG -CTTTGGTCGAAAGACTTAGACGCGGTTGCCTTTGGAAAATCGACCGATCCGTTATTTTGC -AGTCGAGAAATGGGCCTTCGAGGTCCAGGTTTTCGTCTCTGCATCGAACGCAGCGCGGCG -TCCGACAGAGCATCTTTCGCACTTTGGTAGCTCTGACTTATGCCCCGACTTCTAGATCAA -CCGCATTGATTGGGGTTATACCAATGTGACGCGGGTTAGTGAGCACAGAGGAAATAACAG -AGATAAGAGCAGCGGTAGTGTATGCTACGGGATTACTCCATACCATGAGCCCATGCTTAT -GTTTCTGCTTGAATCACCTAATTAGAATAAAGTGATGATTTTAGCAGTACGCAGTGATTA -AATAGTGTTTGACTCCTGCCTCACATGATTATTTAGCTAGAATTCTTTTGAGATCTAACA -CAGTCAACTCGTATATAGATAGACCTGCAGGACAGGTGGATTTGTGTCACCCCAACGGTG -TGGGTCGGGTCAATTAAGGCCAACGGCTCGGGGCAACCCGCCTACGGTCCGGTGGAGTTC -TCCCCAAGAATTCTGTCAAGGTGGCTCCGCCAAGTCTAAGTCTGTCCGTTGGGATGGATC -GGGAAATAAAGAAATTCCATGCTGATTAGTATTTCCCAAAACAATCCAATGCTATTATTG -TCCTATCTACCGTCCCATCCCTATCCCTAATGGCCAGCCCGAGTCAATATGGAGAGTTGG -GCTTCACTTTACCCTACATATATGACTATAGAGCAAAATAGATGAGATCATGAAGAAGAA -TGGACTCTTGATCAGGAAGAAAACAATCATATATTCTGATACGCATATTAAAAAGTGGAT -GCCGACTGTAACAAGATTCTCACATCTCGCACGATGGCATATTGTTGCTAATACACTGTT -GCTATTCTATAAATGATTGCAGGTTGCAAGTACCTCAGCCTCATGCCCTCAGCGCTAGGA -TCTATGAGCCGGGTGGATTGACTCCCGGTGGACCTGATCGTAGACATACTCGTCCAGTTC -ACAGCTAATGTCCCCCCAGCCGCCGCACACAATTTGCCCGTGTACCATGCCGTTAATCCC -AATGCCGTCAACTGGACGAGTCTGGCGCCAACGATAGCAAAGCACCTTGAATTCATCCGT -CAAGTAAGACTACACCATGGACTGAGTGAAAGAGCTTTGCAAAATTCCAGAAGCAGCGCA -ACGGCAGCGCTGCGCTTGATACAAAACCCGGCACTGAAAGTGTTTCATTTCTTTGATCAG -ATTGCTCAGGCGGCCGAAAAAGGTGGGGAATAGCCTTCTCTCGAAACAAAAGAGACACTG -AAGAAGATTCCAAGAGTGGACGAGCCAAGCTAGTCTGCGCGGAATAGATGAATTTGTTGT -TGACGCAGTGAAAGCTTGAATTCATCCTGAATAGATAGACAGGAGATAAGTTACTTGACC -GTGTGGTCTTCCTGTGCTTATATTTATGTACTGTTAAAACCTATGGGGGTTGGGGTACTC -TAGTAGACTAGGATTACAGCTATCTTTAGAAGTGGACTACACACCCTTATTAAAAAAGAG -GACTCGATATATAATATATTTTCAAGTATTCTTATGCCGTTGAGCGTTCTTTGGGTTCAG -ACACGGATTGAATACATGGCGTTCACCCAGGCAGTCATAAGAGGGTTTGGTACTTCACGC -ACTTGAGAGAATTAAGTACAAGCACTGGATCTGAGAGCTAATGGAGGTGCCGTTAATGAC -TTAAGAAATAATAGTAAATGATGAGTACTGTGTATGGCCAAGGCAAAAGGGACATGGAAT -CTGTATGGGCCGTACTGACAGGAAATGACCTTCTGCTGATATAACCGCGTTGTTGACTGA -GACTTATCTAAATATGGAGCAGACATGGGGCAAGGAGTCAAAAGTTAAATAGATCGGGCC -TGTTCTGCGGTTTCCTGGTATATACTAGGACTTGTAGGTACTCCGTGCTAGCCATGATAG -GTCAAGAGCAATAATTGACAACATCGCTTGAAATCTAATTGCCCATCGCATCAACAACCT -GCTTCCTCTCTAACAACTCGCGTGGGTCCAAATAGACGGTGAAAAGGCTCATCCGGTCTT -CCCCCGCCCGTTTACCAAAAATCGCCAGGCCATCAATCTTGATCACCTTCTGCTCCGGAT -CCCCCTTGAGTACATATTCGATATTGGCCTCCTGGTAAATTCGATCCGCCACCACATCCA -CACGGCCAATTGTATGTGTCATGCGATCAATGATGGAAAATTGTTGTTTGAAGAAACTCT -TGATGGCGTCATGGCCCTGCAATGGAGGCTGTCCTGGAAAGGCCATCACGCAATCTGGAG -TATAGAACTCGGGAACAGAGACATCGGGATTGAGAGCATCACCAGATGCGTGGTATCGCT -GAAGCCATTCAATGGCTGATGCTTTGATTTCGGGGTTATCCACTGCGAGACATGTTAGAC -ATTCACTATTGGAACCAGAAGACCGTAGACTTATGGACATACTCGTAAGGGTTGCGGTTT -TGCTAAATCTTGGATTGGCAATTTGGAAAATGATTCAAGTGCATTGAAGGAACCTCTAGG -TCTTATAGCTGTAGATTGCATTCCCCATTTGTAAGCTGCCCCCACGAATATTGTTTCCTA -TCGGCCAATGCCTAGGTACATAAGTACCTACGTATATTTGTCAGCGCTAGCCCTGGTCTA -CAATTTATTGTTAAACATCAGGTATGGCGCCTCTGGGCCCATATAAGAATTACGGGTTTT -GCTGTGGTCGAGCGGTCAAGTACGTATATTTATCTCCATTGAAATTTTGAAATGAACGAG -TGCCAGTATTGTTCATATACCAGCGTTTTGCATATTGACTGATTTAGAATTAGAATGACA -TACTGGACTAGCCAAATCACTGTCGAAGAAAGATATTTATCTACATAGAAAGGTGGCAGG -TCATGTCAAACTAGGGGTCCTATTTTGGATAGTACAGATGGTAGTTTGTATCCATAGCCA -GCCATGCAATAATGGAATGCGTTCACACACTAGGAGATGCGCCTTGGTCATTTCATTCGC -GAGTCTCGAGTACAACTCGCAAACTGTATCCATACGGGTCCAAAAGGTTGTTCACAGAGT -CCCTCCATGCCACTACCCAAGTGTATCAAATAAGTAATGGAGATCCTCTAAACCCTACAT -TGAGGCTATATATCACTTAAAACCTACTAGTCTCCAAGGTAACACCCCTATAGACCACCA -TTTCGAAGGTATCAAGTTGAAAATTATTTCATTTTAAATGTATATCGGACTGCATTGCTG -GCTTTCGTGCACCATCCGCATGTGAAATACGTCTCGCGAAGCCTACTCCGTAGTTGCAGA -AGCACTTCACGCAGGCGAGTTCCGAACTCACCGCTTAGGGGGGACATCTAGTCACGTGAT -AGAGCTTAGCATGCTCAGATTTACTCCGACCTTGATAGCCTCACAAAAGCGGCGGCAGAT -CAGATGTACGGAGCATACCTAAGTACTTCGCTGTCCGAATTTGTGAAGTAAATGGCCAAT -GTTTCTCCCTCTTTTTTTGATCTGCATTAATTCGCTAGATTATGTTTGCGGGTACGACAT -TCTTGGTCCACGAAACATGCTCTTTGGATATGAGCCGTCGTCCAAGCCATTCAATCAATA -TTACTACCCCAATCGGGCTTCTTGAACTGAACATACACAAGAACACATTTAAGCTATGGG -AGCTGGAAAATCATGCCGGGTGTTGATTCATAAATTTAATGGTGATCCGACGAAGCATCT -ACAAGAAGGAATGCCAGCCGGTATTTTAAGACTTCGCAAGATCAAAGCCCGAAAAAGAAA -TCCCCTAAGAACCAGAAAAACAAAGAAAAACGCCAACCACTCTGAATTTGTGAGGCCTTG -AAACTGCGATTTCGTCTTTTCCACATCACGTTTAACCCCGGCCGCGCTGCTGGCCACGCG -CACGCTGCACCGTAGCCTTACCACGCGCCAAACCGACACCGCGCCCGCGCTGGCCTCGTG -TACGGAACATGGGAGCATTACTAGAAGAGAGAGAAAAAGTTAATAAAGATGAGACTGGAG -ATACGCTTATGACTAAATTGCGATCCAAAAGCTGAAGATCAGGGTTTTGACGTACCGGAG -CATGTCAGGCACAATGAAGAATCGCACGTGGCTGCCGCGAATGTAGACCTGGTCGAGATG -GGAAACACGGCCATCGCGAGCGGTGACGGTGATGTCCTTGAGCTGAACGTTCATGTTGTC -CTCGGCTGTAGGGAAAGGCAGTCAGTATGGAACCGCGCAAAAGAAACACTCCCCAGGCTT -GTTTACAGGCCGCCTCTCTAAATAGGGAGCAGATGCTAACAAGCTTGAGCAGAAAGGAGC -CACACACCTTCCAAAAGCTTCCCCCGGTAGACGACTCCAGATGTGATCTCAATCGTCACG -ACATGGCCCTGTTGGAAAACAATATTAGCCCCTCTATCTCGCGCTATCCCCAGTGATCTC -CATTTTCGCAAAACCATGTATTGATCCTTCTTTCACTTCGAATTTTCGGAACAGCAGTAT -CGAGGAATGTAACGCACCTGCGCCTCGTTGAGGAGCTTGATCGGGATGCCGATCGTGGAC -GTGAGCTTGCCCATTCTGACTGTGTGACGGCGAACGGAGATGGAAAGGGGTGAAAATAAG -TCGAAAATGAGTACTGAAAGCTTCCCTCGCGCAACGAAAGCGATAGATCAAGACAATGGG -TGGTCGTGGGGAGATGGTGGGACAGTGATGAGGTCGGAGTAAGTGGAGGTTAAGAGAAAG -GCGAGTTTTGCGGCTAAGCTCATCCTCTCCGCTTTGGGTTCTATCAATTCCGACCAAACA -ACAACTCCAACCCCAAAATCACAAAAACTCCCTCATAATGCGCCTGCAATAGCACCTTCT -ACATCCCTTAAAATCCTACATCAAAATCCAAAACCGCTATCATGGCTGGGTCACCTAATT -CCAATCTACGGGGTTTCAACCACGCCGTCCAAACCCTACTCAAACAACCAACCCAATTCC -TTCCACACCTCACAATCCCAACATTCACCCACCTGCCCGAGAACCTTGGACCACACCTGA -TAAGCGCAAAGAAAGGACTCCAGACACAACCCGAAAACCCACCACGCACGCCCACAATCC -GCGCTCTAGTGCTCGACAAAGATAATACGCTCTGCGCGGCGAAAACAACATCCTTCCCAC -CGCAGATTCTCGCCAAACTCTCCTCCCTACGCACCTCACCAACTTCTCCCTACAATCAAA -CGACAAACCCGCACTCGATTCTGATCGTCTCAAACCGCGCCGGTAGCCACGCGAAATTCG -ACGGCGAAGTCTCCGATCTGGAAGCGCAGCTTGCGCATCTGCGCATTCCTGTCTTCCGGC -TGCCGGCGGGTGCGGAAAAGAAGCCGTTTTGCGGGGACGAGGTCGTGCAGTGGTTCCGAG -AGAGGGGGGTGGTTGAGTCTGCGCATGAGATCGCTGTTGTTGGAGATCGACTCGGGACCG -ATGTACTTATGGCGGGAATGATGGGGTCGTGGAGTGTTTGGTGTAAGGAAGGGGTGTTTG -ATGTTGGGATGGAGGGAAAGCCGGAGCGGAATATCTTGGAGAAGATGGAGGTTTGGATTG -AATGGTTCCTTAGGGAGAAGCGAGGATATACGGCTCCATTGCCGATGGGGTGGGAGAAGC -AGAAGCAGTCGTAGACATGCGATGCATACTGGGTCAATATGAGATGTTTGAGATACCCTT -GTATAATATGTACTAGCGCATAGTAAGTAAAGCGGGAGTGAACTACGGTTACAATCTACA -CGGATCTATATGCGATATAAATGGAATTTATTCAGAGCATAGCGTTAATCCCCCACACTC -GTATACCGCGAGCTCAGCGCTGTCCTTTTTGTATGGTCTCGGCGCAGAGGGTATCACGAC -TTTGCGCTGCCCAAAGGATCAGCAATGGCTGGATGTCAGACAAGTGCCAACACAACAAGC -TCATGAGCAGGATTTCGACTCCAGACCCGTTGGAAAGGGGGACATTGTAACAGGAGTGCA -AATCAGACACAAGTGTAGCACCACCGAGGAAAAACGACAGGAAAAAAGGAAATCGAGCGG -AATGGAGTCGCAGTCCAAGTCGGTCTGTTTGGCATAACCACGTaaaagaagaatgaaaat -ttgaaaatggaaaatgagaaatgagaaaGAAAACCTGGTCATAAAAAGAAGATAGAAAAT -GGTAAAATTGACCGGGAGAAAAGAAGAAGGAAAAAAAGGTCCTGGATTCAACCGGGGTAG -CCGGGAAAACAGAAACAAAAACAGAAAAATCGCTTCAGGACCGGCCCCAGAATGGAAAGG -GTATTTTAATACAACAAGACAGGGTATGTCACGCAAGAAACACATCAAAAGGTAAAATCG -CTTCCCTGGCCATCGCCAGCGAGACGAGAATCGTCCAAAGAGAGAGCAGCCGAGGGGTTT -TGCAAAACACCGAAACCCACCACCTACGCCTCTCGATATTGGACAATGAAGAAATTCTTC -CAAGTTGAAACCACAAAGAATCCATCGACAAGGATAAGCGCCGCATGCACAGGAAAGGCC -GTGAAACATCAAGAGACAAAAAGAGAAAGACAAACTCATGACCCAGTTGGCATACCCTGC -GAATTGATCTCGTCCAGGTTAGCCATACCGTGGGAGTAATCACCCATGCCGGGAGAAGGA -TGGGCAGAGAGGCCATGCATAGGCAGTTCAAAGCTGGGGCTTTCGTGCGAACCGCCAAAA -TTGACCTGATGCTTTGAGTAGATATCGTTCATGGAGAGACCATCATCGAAGCCGGTCTGT -TGGTCGTGCGAAAAGGCATACATATCGGACGAGCTGTGTGACATGCTGGACAATGGCGGC -GAGTGGTTTGCAATCAGTGGATCGGCAGGGCTCATGCTGGCGTAAGGGGATGGTGCGTGG -GAGCCCATTGTTGAGGCATCGGTTGATCCTGATGAGACAAAGGGTACGTTGAATTGAGGC -GCCATGCTGGCTACTGGCGTGGACTCGCCCTGCCCGGCATTGCTGTACATGGGGCTGGCG -AAGTCAGGAGGCGATCCAGTCGTCGTCGTCGCGGACATGGGGTTGGTATGGAAATCTAGG -CCATATCCTTGGGTGTCGATACGGAGGTCGGTAGGAAGATGGTGCACTGACTGTGGAACA -GGCGCGTAAATGCCTGCCTCGGTGTGGTTGAATGGGGGTGGAGGGGGTGGCACGTGGAAT -GACGACATGGGACTCTGGAGTGACGAGATGTCAATTGCAACGGGAACTGAACGGCTTCGT -TGACGTTTGTGGCCACGAGCAGCAGGCATGTTGAAGCCCAGGTGGTTGCTGTCCTGGTAC -AGGTTCATGTAAGTGGGAGCGGCAAATTGGTTAGGCTGAGACGCAGGGCGCTGGATGAAC -GGCGGAGACATCGCAGCTGGCATGGCGAAGGTGGGGTTATTATAAGCTAGGCGATTTTGG -AAAGACTCCAATGATACAAGCTTAGCAAGCTGAACACTCAAGACCTTGGGATGGCCCCCC -AGGTGATGAACCATGATCTGACTAGCTTGCTGCTCCTCGGTGAAATCGCCGCACTGGTAG -AATCCTCCAGAGTTGGACGAGTCCATGTAAAAGTGCGGCGGGCGGTTCAATTCCACGACA -AGACCACCTTGTCGTGGCGGTTCCCCATTCGGCCCAGGGTTCGGATCACCGGTTTCCAAA -GTAATATTCTTGATGTAAGCGAACGGATACTCGATCTTATAGCCTGCAGCATCGTTGTTG -ATATAGTAAGTCATGCATGCCTTATCAGGGGAATAGAAAACGACCAAGTCCATGGCGTTC -TGTCCGATGCGTCGCCAACTGCCGACGGTCAAGGAACGACAAGTGAAGTGAGAAATCACT -AGAAAGACAAATTAGTTGCACAGACGGCGTAGTAAAGGAGGCTCTCAACATACCAACTTT -GCCAGAAGGACTAGTCTCGTTCGCGTACATGCTAGGTGGGCCATATGCGCCAGCACGGCC -AAAGGGATCTCTTGCGCCAGGCTTGTTCGGGTCGAACTGCATCGCAAGATAGTGCCGCAT -GGATTCTGGGATAGAATCACATCCCTCGCCAGTCTCAATGCTCTTTTTCGCCAGCATCTT -GATTTTCGCGCGACTATCTTCGGGTCAGTAAAGATAAAAGATAGAAACACAATAGTAGAT -AGGACCTACCGGTTTTGGAACCAGATTTGAACAGATCGTTCCGTCATATTAATGTCCGAA -GCAATGCGTTCTCTGGTAGCCGCAGTAGGTGTTGGGTTCTTGTTGAATTCGATTTCCAGA -AGGACAAGCTGATCCTGAGTTGCGCGTTGACGTTTGTTGTTTTTCTGTTGTTGAGTTAGG -GTGCTTTTGCGAGGAGGTCGACGTGCGGCAGAAGAATGAGCCGTGGTAGCACTCGAGGTC -GACGGAGAGGGCGTGGAAGCTGGAGGCGAGGAAGAAGACGCCATGGACACACTTGCGCCG -GTTGGCGGGTAATGCATTTCTGTCAACGAAGACATAGTTTAACGTGGTACGGTAGAAACT -GTATCGCACAAAATGGGAATCGCGTCCAAGGTAGTCAATATGCAGTTTTTGGGTTGCGTA -TATAGTTTTTTTTCGGGAAGAAAAGGTACGGTGTGTGACGATCCGGCCGACAGATAATGG -GATGCGTGTGATCGGAAACAGACGGAGTGTTGGAAATCTTGATTCTCTCGAAGCAGCCAC -AGGCGCGAGAGTGTCAAGTTGAAGATGTAAAACGAATGAAAGTCTAGCGTAGGAATCTCG -CGTTCGAGGGTGTCCACGTCAGAGACTGGGTGGTTGGGTTGAAAGGGCGCGTCGCAGAGG -CGTCCACAGAAAAAAAAAGGAGGGGATAAATCAAAAGGCTGGCCTAAGCAGCTGGAAACG -AATGGCAGGGGAATAATTTCTCCCTATATCGACAAGAATCAATGAAGAATCAATGAAGAT -GGCTAATACCCGGGTTGAATCGATCAACGGGGCGGGAACGGGAGGGGGGCAAACCGTTCT -CGAGGAAACGAATGAATTGGGTAGAAAGAGTGATATAACGTCGCCGTgaaagaatgaaac -ggacgcaaaagaaaagaaaagaatggagggaataagaaacaagaaacaccagaagaaagC -GGATCAGGAGATGGGATGCTGACTACACGGGGAACCGGGCATGACATGGACACACCGGGG -GGCTTTCTGAACTATTGGCACCTCGTGGGGGCCCCATTGTTTTTTTTTTTTTTTCCGTCA -GGGCCTCGATTTCTCAGTCTTTTTTTAGTTCTCTTACGTCTTTTTCTTGATTTTCTCCTT -ATATGGGTTTGTACCAGAAAATCTAGTGGTTTCCCATCTGGGTCCAGTATACGAAGTATG -AAATGGAAGTGGAGGGAGACTGGTTTGGGCCCCTGCGTCAACCTGCATAAGCCCAAGCAG -CGGACGGAGTACGGAGGAGAGAGGAATAAGGGCACAGACTGACCATTGTTTTGGAGAGGG -GGTACGAGTGTTCCCCTTACTCCTTACTCGGGGCCTATTGTCTTGCTGTGTTACTCTCAG -GGGCACGTCTAGTACTCCGTACACAATAGGAAATGGTGGCGTCAAATGAACCACAACAAG -TACATCGCTTGGACGCGAACGGCATCCACGTGAGATCGGGGTATCTGAAACATATACCTA -TGTCAATGCGTTTGACAACGAATGAAGTGTATAAAGCGCGGACGTGATAGAAGATTTTCC -CCTGGACAGACAGGAGTTTCCCCTGATTCGAGAATAAGTCAGCCACAGGAACAGGGTGCA -GTGACTATTGGACACCCACAGTTTCCAGGGGGCTAATCCAGTGAACTGGGCCAATAAGCA -TGCAGTTACACGTGCAGTTCCGCGGACCAGAAGGGAACATTCAGGTACCCGCCCCGTCAT -TGATTATGATTGGTTCAGGGGACATGACAGGTCATTCCAGGCCCTGAACCCCGACCAGTC -TTGAATTCAGCCCTCCGTCATATCCGGTTTGATATTCCACGGAGTATAAATCAGATCGAT -ACTGACTCACTATACTCTGTATACATTGTACATTGTGCATTGGAGTCTGTTGGAGTGTAA -CTACATTGAATTTATATTCGTCCCTTCATCAATATCCAAACTAGGGCATTGCATTCCGCT -TACTCTCTCAGTCTAGCGCTGGCTCCTTCGGTACTTTATCTTGCTTCGACTACTGCATGT -AGTAATCTTCGAGTTTAATGACCCGACATGTGTAACTGCCCTATCTTCCATTGTCGACAT -GCGAGTTCCCGACATGTTCATTCTAGGTAGATCCTAAATACTCCGTACCCCTTGGACCGT -TCATTGTTTCATGCGCGGCGAACCGGTTCATTCATTATTACCCGAGGGGAGGGGGAAATT -TTGAAAATAAAAATAAAAAGCCCCTCAAAATATAAAATTAACTTCGTTAGAAGATGAAGA -AACAAGCCCTTCTATCAAGGGCTTGCACATAATTATCCACTATTGACAATTCAATCTCCC -TAGGCCAACCAGATCCACTTCCCAATTCCCGTAGCTTATCCGTCGTTTCCCAGATCTGTT -CAATAAGGTCCATTGCCGAGTTCCGCTGTTGCTCCCTATAATAAGCCGTCATCGACTCAT -GCTTCTGCCGAACCCATGCACGGTCCTCCGGCAATATAGCCTCCCTCGCAGCAAGGTAAA -AAGGCTGAGGCAGCGATGAATCGGGTCCACACCCTGTCGACAGATGATCCATCAGCCCAA -AACACACACGCACCAAATGCTGCACCCGCGGCGAGCACGTTTGATCTCGAAAAACCCGAC -GGGCAAGTAATAAATGCGCAGTCCAGTAAAAGCACTCGGTCAGGATATCCAGTGGTGAGC -GCTCGCATGGCATTGGCAGGGCTCCACTATATCCGGACCCCCAGGCACGAAGCTCAGTGT -CGAGTCGGAATGCCTCATTGAGAATGGCGCTTTCATCTGTATTTGTTTCAAGTCCCTCGC -TCACGTCGCAGGCTAGATGCGCTATTCTTACTAGGAGGCACATCAGTGGCTGAGGACATC -CTGTCATCTTGTAAAACGAGGGCTCGGTGGTGTCCCAGCGATCCTGGTTGGGCTGGTTGA -GTATGAATTTCTGCGGTGCATACGGCCTTCGCTGTCGTGAGATTGCTAGCATTACGTCGA -CCATGCCGAAGCGTTGTACTGCCATGCGTGTGATAGAGTTTTGTGGTATCTCATCGAAGT -AGCCTAGTGCTTGCATGAGCCAAGAAGCACATTTGACGTGGGGCTCCAGTACGCCGTCTG -CTGTTTGCGCAACAACGATCCCTTGAAGGACAACTGCTGTGAGGAGCGCTTCTCGCTCGT -GCTTGTTGTTCGCTACGATTGCACCTGTGTCGCAAGCGGCTTCGGCATTGATACTGAGCA -GGTTTTGTCCGAGTGTTCTGAGCATTCGCTGCTGGCGCTTGGCTGCGATGTCAATGCTAG -TTAATCCAAAGTTAAGCATATGTTCTGTCGATATGCAGACGATGGTCTCGAGAAGGATTG -GTGAGGATAGGGCTGGTATTGAGAGTTGCCGGTAAGGGTTTTCCGGGCCATCTACAGCGG -GGACATTGGATGAAACGAGGCTTATGTCTGTCCACGTATTCTCGATTAGATATTCCAATT -CCAATTACGTACCATGAGGTCACATACAATATTCCCAGGCAGATTTGTCTTCTGCAGAAT -CAAAAAATGTCTCTGCGAATAGTGCTGAAGGCCGACCTAGCATGTTTTGTTCAATTGCTC -GACGGGTGCCTTTGCAGCGAGGCTTATGCCTTGTGGGCATTGGCTCTTTTGGTTGAACCC -ATTTCACACCCGATGAATATTTGCAGTCTAAATTCCGCGAAGCACAGAATGAGCAAACTG -GCTGTTGCTCGTCACATTTATTGTGCTTTTTTCGGCATTGGAGACATCCTGTTCCAAATT -TATATCAGCACTTGTACTTCGCTCTGGTACAAGCTTGCATACCAGTCCGTGACCGTGAGA -ACTTGTCGCCTCCAGGCATTACGACCGAACATATAGTGCAATACTGCCATGATAAGAGTA -GGAGCAAGGATTATATATTGACGGGCTCTGGGGTATCTTGTTGCCCCGCAGCCTTATCTC -TATTATTACGGTCACCTGATAGTTGACCAATTACATTGCTTTAAACTCGAGACGATAGGT -ATAAATTGCAATGTTGATGGATTGGACGAAGTTCATGAGATATGATAGGACTTTATCCAT -TCATCATAATTCACCAGAAGCGAGGATGAGGAAGGAAAAAATGGCTATGGTGAGCGCTTT -TGAAATTGCTGGGAAGACAACTTTTCTGCCTAAATATATACTGTGACGTTGACCCATCAT -ACAGCAGCAAAAAGGTTTTACCGAACATGCTCAAGTACTTATGCATTGTGACTTTGGAAT -CGAGTGGCAATTATAGAATAGCGATAAGATACCATAGACTAATCCGCCGAAAAACATCTA -TCCCAGAACCTCCTGAATGACCCGAACAGTTCGGTTGACAATCTCATCAATCTCCTCGCG -GGTAGTATTATAGGCCGGCGCCAGAATAATATGATCACCTTCGGCACCATCGATACCACC -ATTTCCAGGCATTAACGAAATTGCATTTTCAGGCTTGAGACCTGTCGCGTGCAACGTGCG -AGAAAGCGCCTTTGAAGGCGGGAATGGCTCTTTCGTCTCTTTATTTTGAACAAATTCCAT -CTATAATCATGTCAGTACAGACCAACCAGAAAGATTGATATAAAGAGAACTAACCGCCCA -GAACAAGCCTCTGCCTCGAACTTCGCCCACGTGAGGGTGGCTATCCAAGCGTTCGTGAAG -ACGCTGACCCAGATATTCTCCCATCGCCCGCACATTTGGCAGCAGATTATCTTCTTGTAT -AATTTGCTGCACCTTATATGCAGCCGCACAGGCAACTGGATGGCCCTGGTATGTCTGACT -GTGCATAAAAGCACCAGTACCTTTGCTCAGAGTATCGACGATGTGCTTGTTAATGAGTAA -CCCGGCGACCGGGGCATAGCCGGCACCGAGTCCCTTGGCAATTGTTTGGAGATCAGGGAC -AACGTCTTCTTGCTCCCAAGCATGGAGAGTTCCGGTACGGCCCATGCCGGACATTACTTC -GTCTAGGATGAAGAGCGCACCGTGTCGCTCGCAGACGGCTTTCAGTGCTTTGAGGTAACC -GGGCAATGGGGGCACGCAGCCAAGTGTCTCAGAGACTGTTAGTATATTGAGTATTGTGGG -ATTGGCTTGTACTCACAGCACCTGTCATAGTCTCGGCTACAAATGCACAGACGGTATTGG -GGCCAAGTCTTTGGAATTCGTCCTCCAGTTCTTGGGCAAGTCTTGCGATGTACTCTTCGT -TACTTTCATTGGCATGCATTCCACGGTATGGATAGCATTGAGGCACGTGGTGTATATTCT -TTGACAGAAGGCTTTCATAGATCCTTCGACGGAGGGCGTGGTGTCCGAGCGCAAGGGACC -CCAGGGTATTTCCGTGGTAAGAAGACTTTCGAGAAATGAAGTTAACCCGCTGGGGCTCAG -GGGTGGGCAACTCTGTAAAATACTGAATGGCCATTTTCATGGCAGCCTCAATAGCTTCGG -TACCTGTTGTGATTTTAGTAACCAATGTTGGTTCAGCATGGGTAATAGGCTTCTTACCCG -AGCTCACAATGTAGGCACGAGACATCTTGCCTCCAGTCGAGTCTACCAAAAGCTTCGCAA -GGCTTTCCGCAGCACTGTTGGTGAAGAACATGGAGTAGCAATAGGGGATTTTCTTCAGCT -GGTCCAAGATGGCTTCGTGGACACGCTCGTCGCTGTGGCCCAGACAAGATACAGCTGCAC -CGCATGTTGCATCTAGAATTTCCAGACCGCCATCGGTTATCAGGTAGCTGCCTCGACCGC -CGACGATGTTCAAAGGCTCATTTCGCAGCTGGTGGTAAAGGACTCCGCTGGATGATTTGA -TGCCAGCGACCTCGACCTGCTTCTCGGAAGGAGTAGGCACTGTAGCTGCTACCACGTCTG -GTGTATAGTCGCCCATTGTATGAACTGATTGCCCTGAAGGGGTTGGTGTAGTAAGAGATA -AGTGCTGAATCAGGCGATGTTCACTGAAATTGTTGGACTTGCACTGGTACTCGTGCTGGG -GGATGGTATTTATATATGTACTCACTCCACAATGGCCAAGAATCAAGCTGCAGAATTATG -ATACCGGTACTTGAAAAATAGCCTATACACAACGCCGTGATTGGCGAGTCTCACCTTGTC -GCCCATTTTGAAGTTCATACTTGCCTGAGATAAGCTAATCCCAAAGAGGAAATCTTTGTC -GGACCCACGTTCATTGACTTCAGCGTGAATTTGATAGGTTGGACAATTATTGAGATAGAG -CTTATTGATAAGAGAAAATCTCTGGGGATCGCGCTTTGATTTCAAAATTGATTGACCGGT -TACACATGTGCATAATAGGCGCCATAGAGGGGTATCGGAAAGACTACCGAGGACCGACCA -CATACAGGCCATAAGAGGCCACCGCGCCAACTTATAGAATGAGTCGCTTTATTCTCACCT -ATATACCTAGACTTCTATGCTTCTTTTGTTTCCAGTACAGCTCTCGTTAAATCTAAATCC -ATCCAAATATATAGAGTTCCACGGAAAAATAAGTTCTAGGGCTATATAAGCCGCTACATG -ATCGACAGATGAGGAAAATCCAACGGGTTAAGAAGTATCAAGGCGATACAAATCTATTAT -TATTAATCGCCACCTTTTTACCTAGTGATGTAGGTTAGGTTATACACAGCACGTAAGTAA -GTCTTGCACATTTACGCCGTGTTTGGATTTTTAGGCTGTATAGTGGTACTGAGTAAATTG -TTATCAACGAACTTTTACCATTGTACGGAGAATGGAAAAAATAGATAGATGCTTAGCCCC -CCCCAGGGATAGCTCTAGAGGTCCCAGGCCCCGGGGCGGGGCGGCCCCGGTTGTGATTAG -TTGGCTACTTTTCTGCTAGTCAAAACCGGCGCTTGGTTGGCGCGACGTTTAAAACCGTAC -GGTCACAGCACAAACAGATATTGAGTAAAAGAATGCTCAATATATGTAACAGGTGACCTC -GGAGGCCACACCCGATAACCCTCATGCAGCTTTCCACCAATGGCGCCTTAGTCAGACTAC -CCCTCATTGATATGCCACTCAATTGTTCTCCGGAAACCCTCTTCCATCTGGACAACCGGT -GCCCATCGTAACACTTCTTTCGCCAACGTAATATCCGGCCGACGGACTGTGGGATCATCT -ACCAATGCTGGCAGATACGCAACAGAGACGGGTGGCCGGCCGGTCATCCGTGAAACAAGC -CCGATGACGTTTTCGGCTAGTTCCTGAATAGTGACTTCCCCATCATTACCGATATTCACA -GGGCCTTTGCTATAATCACTTTGCATCAGAGAATACAGGCCATGCGTACAGTCGGTAACA -TATTGAAACGAGCGAGTTGCCGTGCCGTCACCGTTAATAGTTAGATCTTCGCCGGCCAAT -GCAGAAGTGATGAAGCTTGCAACGACGCGGCCATCGCTCGCCCTCATACGCGGGCCGTAC -GTGTTGAATATCCGCGCGATGCGAATATCGACGTTGTGTTGCTCTCGGTAGGCGTAGCAG -AGGGCCTCGGCAACACGCTTCCCTTCGTCGTAGCAGGCTCGTGGGCCGAATGAGTTCACA -TTGCCCCAATAGCTCTCTGGTTGGGGGTGGACTGTGGGGTCACCGTAGACTTCTGTTTTC -ATGGAAGCCCCCTTGGTTAGGTGCATATCAAGGGATAGAGGTGTGCACTGCTAACCTGAG -GTGCTGGTGTGCAGAATGCGTATATTTTTGGATCTCGCAAGATCTAACAGATTTTGAGTA -CCAAGGAAACAAGTACTCAAGGTGGAAACCGGGTCTTTTTGGTAATGGACTGGGCTTGCT -GGACAAGCTAGATTGTAGATTTGGTCGATTCCTTCAAGCTCTGGAAGAGGATCTTGGATA -CCGTGACTGGCTCTTAGTCAGGGACTGGCAAGCTTGCTTCTCATAGGGGAGCAGGATAGA -TTTACTCAAGGAATTCAAACCGGGGATTGTTTTCTAAATGGCTCAAATTTTGCGATGAGC -CACTCTGAAAATTGTCGAGCCCAATCACTGTAGAGCCAGTCTCAAGGAGCAAGTCGACCA -AGTTTGATCCCAAAAAGCCCGCTGCCTACATATAAAATATTCTATATGTTAGAGATCTGG -TGCCAATTCGTCTTGAATATGACTAACTCCTGCCACAAGCACTTTGGGGGGACCTTTCGT -GTTATCCATAGAGCTCACTGGTATATCCTGTGCCGTTTGAATGTTGCTGCAAGGTCAGGA -GGCAGGAGAAATACAGATAGTTAGTTTAAATACACCAGAATGATGGAATAGCTTTTGGGG -CTGGAAATAAACGAGGCCGTGCGATCTACTTGCGGCATTCTGTGTAACTTCCAACTATTG -GGAATGTGGCTGGCTGTATGTAACATGTTGCCTTGGATCGTGTGGAACTATAAAATATAT -CGACGTATTTGAGTAAACATGGCTTTAACGCGAATGGTATACATCATCGAAGATCTCAAA -GGATAGTACAACATAGGCGAAGGGTCTGCGCATTGACCTTTTGCAGCCAGCCACCTTCTT -TTGCTGCGTACGCCATCCAGCCACCCTTTATTTGCCTTGCATGTATACCCCGTAGTCTAC -TGAACAATTCAATCTGGCATGCATTTTGATTTGATACAATGACCATGTCAATCAACAGCC -TGGATCTTCATGAATCTAAAATCCTCAAGGTTAAATTCCGAGCAGATCACAATGATGACA -TGTGTTACTATACAGACAGACATAGATAAGAAATATGAATGTCCAGTGCTTAATGGTAGG -CGGCACTACGGCAGATATTATATATCGTGATCGTGCAATGTGCAATGGCGTTGAGCAGAT -CTATGCGACTGCTTCAGGGTATCACCTTCCCGTACAGCAGTGTCACATATATGCAAGGCA -AGGTTGGTAAATTTGATAGAGTTGATATATGGAGAAGAGCATGACATACCTCTCGTCTGA -ATCTAAAGTCCTCGTAAAGTCCCAACCAAAGGTTTTTGACCCGGCAGTCATCTCGCATTG -AAGCTGCGATTCTTCGACCACACCAAGGAGAGCTTTGATCCATGGCTACCGAAAGAAGTT -AGGTTTTCCAGAATTATCAATGACCGATGTCTATTTTCGATGCCTGACATACCTTGGGAA -CAGGCCAATCAGAAAAACGCAATTATTTGGCTTCTTAAAAAACCTGTTCAGCATCCCACG -ATTCGCCTGGATTACCTAGATATGCCTCGTGCTTATTGGAGCGGAATTCCCTGACCAGAA -GAATATACGGTCGATGCGGTATAGTAAGCGTAGTATCTTTGTGCATGTTGTTCATAATCT -CCATGTCATAGATGCTAGAGTCGGCGCTGGAACTTGCATCCATCGTACGATCAAAGGCAG -CTGCGGACGGTTGAATGAGAAGAAGCGCAGGTGTAAAAGTCCTTTTGTTCGGATACATCC -AGTAGGCCAAGGGCATAGCGACGGGACTAGACGGGGCTAGGAATATCTCGTCCATGGTCT -GTATCCAAGTCAGCTTTTATTGCAACAGGCAAGTCCATTACTGATACGCACACCTGTAGA -GTAGTTGCATCTGAATCCAAGTTTAGAACCCGGTCATATTCCGTTTGGTTGAAGCGAGCA -GTTTTGTGTAGCTCTTTGACCACGTCATTGATAGATGCATTGTAAAGTCAGCTTCATGTA -AGATCATAATAACATCCCAACCAAATGATATGCAACACTGGATGAGCTGGGATCCCTCAC -TATCACTCCCAGACTTCATCTGTATCTCGACTGGCTTGAGCTTGACTCCATAATGATCCC -GTGCATAGCAAAACCTATTGCTTTCCCTCGAATCGGCTGTTTCGGATACGGACCAATCAG -AGGGGTACATCAATACGCGATCGGCTTTATTTTCAAGCCGGCGGAGGTTCTCGAATAGCA -TGACCGAATTGCACAAATGAAATTGGTCGGTGGCATATTGAGTGTACGCAAATCGCGACC -AGTTGACAGGCTTCCCGGCTTCGACAGCGAAGTCTTGGTCCGAATCGCCCAAACGGCTCG -TGGCTCAAAGTTCCAACAAAGTCACAGCTCCCAAGAAAGTAGTCACAACCACGAAGATAA -CTTGAAGCAGTTTCGGAGATGTTGTAGTCTTGCTTAACATGATCGCGATAGGATAGGGTT -CTACTTCAAGAAAACATCGAGCCCTCTCTCATTCATTGGCGCAGCGAAACCATTTGAAGG -TGTTCTGTAACAAAGCAGTCCATGCCAAAATCCTTTCAGAAGAATGATCAAATGATGGAA -AGCTTTTGCAAAGTGGCGCACGCTAAGAGTCTTCCTCACTTAGCCCCTACGGGGTCCTTT -CAAAGGAGGCGCTATGCGTGATTTGACAGACGCTAGTTCGTTGCAGGCTCGTTATGGATT -GGTAATTGTGGATAGCCACACTGAGGGGACTTCGAGATCCTGTTTGGATAATGCCTATGG -GTCTATAATATATTTGTCTATCACCAACTTCTCGAAAGGTGCTCCGTACATTCGCGCCAT -TATGCACGTGCTATTTTTTCAATCCCACGGCTAGAGCTCAGGCCGCTAAAAAAAAGGGAC -TCTATAGGTACATAGGAGGGGCGCTAAGTTGCTTGGATGAATGGCATTCCTGCTCCTAGG -GTTGCTTGGAGGAGGCTGATATCCAGTGTTATTCCATCTACTAGTTGGAATTGGTTTGGG -GGGTCTTGGGCCTTCGCTATCTGAACTTTCTTTGGGGCCGAAGTTATATTGTTATTTAAT -AGGATCGCGAGTCAACTCAGCTTGGGAAATAAGTACTCGAGAATCCTTAAAAAAAAATTT -CTTGTTTACCAGACCTTGACCCGTGCGGTTAGTTATTTTTGGCCACGATAGGGCCCTGAC -AGAGTGACCACCATTGAAGAAAAATTTGGGGGTAGAGTGTTTCTTTTACGGTTCTATCGA -AGTGCCAGATATTTAAATTTGAAATAGGGGGTGAAATACTTCCCTAGGTCATAGATGACT -AAATCCAGCAAGCTTATTTTGCCATATAACAACAGAGACCGTGTCTGTCAGCCTCACCTG -GGACTAGCGGCCCGCAAAGCCCACAAATGGGCCCGGAGCCCAGGCCCATAGCCCACTGTT -GGGCTGGGCTCAACCCTCGAAAGCCAACCCAACCCACAGCCCATTCGTGATGGCCCACAA -GGCCCCTCTCATAGCTCATTTAGGTGGTTTCCACTAGGAACTTATGATCGCTATATTCTG -ACCCTTCTACTAATTATATGCTCTTGAAATCTAAAATACATACTTTAATATCAGGGATAG -ATTTGAGATAGATAACTTCGCCAATAAGAGTGACTCACAACTTCACCCCTGTAAGACGAA -CCAATCTCACACTGACTCATTTGCGCAACTTGCCGATTGCGCAGAACCATCTAGAAATCA -GCTACCCAATATATAAAGTCACCAAGCGATCTGACTTTCTTTACAACCCCAACATCTCCA -CCCCAAAATGGCATCAATATCAGTCTTCCCCTTCCACGTGGAAGAGGCATCGAACGAACG -TGTCAAGCTCATCCCATTCAATCCAGAGCAACACTGTGAAACATTCTTCCGCCTATCATC -GCCACATCCCGACATCTACGCGCATATGCCCATGTGTCCCCCGGCTTCAGCAACCGAGCT -GAAATCCCTGTTCTACAGCAATTCCCCGAGTCACATTCTGTCCTTCTCCAACCCCGAGTC -ATTTGCCTTTGCCATCATAGACAAGACACGGCCACCATCTCCAGAAGACCCTGAAGGTGA -ACTAGCTGGCACTGTGAGCTTTATCCGTACCTCGCCGATAAACCTGTGCACAGAGATTGG -ATTTATTGTCATCCTGCCTCCTTACCAGCGAACCCATGTGGCCACTAATGCTGTTGGCCT -TGCTCTACAGCTTGCCTTTGATTCTACAGAGAAAGGTGGTCTTGGATTACGCAGGGTAGA -TTGGATGGCTAGCACGATGAACCTTGCTAGTGCTCGACTTGCGCAGAGGATGGGATTCGA -AAAGATCGGAATCATACCGTGGCATATGCGATTTGTGAAAGGGAAGACAAATGGAAAAGT -GGGACATGGCAGGGAACCGCCGCCTGGAGGTGATCCTGAAGATTTGTGGCGCGACACAGT -CAGTCTTACTCTGGCGTGGGATCAGTGGGAAAATGGTGCGAGGGAAAAGGCGGCGAAGGT -GATGGAGAGATGAGTAAATTTTGTTGCTGACTTTAACGAGAACAGTTGCTTGTAATTACT -GATGTGTAGAATGCTTTGATTTTGGCGACGGGTTATCAATGATTTCAGGCAATTTGCCCA -CCAATCCCAATGAGCTCCTTAGTTCAATTAGTACTAACGCTTCACTTTATCTATACTGGT -AGTAGGTTCTACATGGGTCATATGTAGTAGGAATGATGAATCATTTGATGTTGTCTATTT -ATATATGGATGGGATATGTGCGACTAACCTGTCTAGTTGAGCAACATAATGAGTTGCCCC -GATTGGGTTATCCAGATTCTTCCCATGAGACAACAGTGGAGCCCAAAGAGGGCCTATAAG -ATATCAGTAATTGACTACTATGTGCCTTGTTCAAATAAACTGCATCTTCAGCTGCTTCGT -CGGCCATAGGGTTCAAATGGCAACGCTCCGGCAACAATTGCCATCACAATGTAAAGGGCC -GCACATATGTAAATCGGGACGGGTGTACTTGTCTGTCAACAATATAAGCAATTTTCTCAG -GGGTGCAAAGCGAAATCCCTACGTTTGCCGCCTGGCCGACAAGAGCGCTGACGATCCCCA -TGATTCGATTGAGCCCAATCGCAACACCATTTCCTGTACCCCGATGGGCAGAGGGGAAGA -CCTACAAAACAATTAGACATGGATTGACGGACCTAGTCAAAAACCAGCAAGTACATACCT -CCGGAGTGTAGGCATAGAGGGTACCATAATAAACATTCAAGCAAAAAGAAATAGCGCACG -TGAAGCCCACGTTCTGATTCGCAGTGTGTACTTGCGTATAGCAGAAAAAGAAAACCATGG -TGACCAGAGCTCCGATAATCATCGTCCCCCGACGACCCCAAAGCCAGGGCGAGCGACACA -TGAATCCCGCCAGAACAGGGCCGAAGATTCCACAGGTATTGTTAATGGCGTAGTTTCGCC -AGGTAATGTACGGGCTACTCTGTCCGAAAGTTACGCCCCGCGAGGCAAGATAAGTGGGAA -GAAACACGTTTTAAAGTGGATAGGCGAGGCCGATGAGAAGCCATGAGACCCAGACCAGGG -ACGTTGAAAGGGCCATTTTGCGAGTTGCAAAGAGTCCACGGAGATGCAAGAATACTTCGC -CAGGGGAGAGTCTCACAAACCAACCACCCCCTGGGGCTCCACGATCAGCGGTTTGTCCAC -ATGCCCGAAGCCGTTCAATGGTGAGGCTGCATGGGCGGTTGTGTCGATCGGCGATTTTGC -GTAGAACGTGTACTGCTTGCTCGTCTCTTCCTTCGGAAATTAGGAACTTGGGGGTCTCTT -TCAAGACGGATTATGGTCACACGGAGGACAGAGAACACAAATACCAAAGCGCCGGACGTG -TACCACACATAACGCCACCCCTGGTTGTTGTCACGAGTACATGTGGCTGCATCGCTACAG -GAGAAGTTGGGAAGAAACGCCCAGGCAAAAAGACCTGCGGTGAGTTGACCCAGACCTAGA -CGGTGTGAGTCAGTTGACTTTCGATAGAAACTCACTGGAGAGTATATTCTCACCCCACCA -TGCTGCCATGAGTGTCAATGGCCACTGTTCCTTGCTGGAAGATATTCCAGGAAGACTGTG -GTATCCAAAACCAGGTTTTCACCAGCACCGAAGGCACCCAAGCAGACAAACAAGCCCAGA -GTGACCCAAGAGGGAGCAGCACCCGCAACGATAATGAATACCGAAGAAACAAACAGGGAG -AAGTTGAAGGCAAACCGTCTGCCGATAATGTCTGCCGAAAAGCCCCAGAAAATGGCACCC -ACCAACATACCAACATAGACCGCGATGGTAAGCCCGGTGGAATATGATGGGCTGAACTCC -AACCTGGCCTGGGTCGAAATAATGGACTGGATCAAGAGGATGAGGGAGTCAACTGCGTAT -CCAAATCCGTTTAGGCAAAACAGTTTCCAGTGGTGAGGTGTAAAACCAATCTCATCAATG -GTCTGTAGAGGACAAACAACCTCGAGTTAATTGCATGTCTCATTCATAGTACGAGGATGT -TATTTTCCCACATCATTGATAAGCGACATCTTAGCAGCAAGAACGTCGTCCACCTTCTCT -CCCTGCAAAATATCTTGGTCTCCATTTCGAACGGATCTTTCCTCTGCCAGCCACGGACTC -GTCCGGTTTCTGTCCTTTCCCGCTGTACTCGGGAAAGCTGAGCTGGGAGAGCATTCAGCA -TCCATCGAAAGCGATGGTAATTTTTCTCTGACATTATCTAGTTTTCTCGGAGTTTGCGAC -AGAGAAAAAGATGCTGGCGCAGCTATTCAAGAAGGAAAGCAAACGAGGAATTTGGACATC -GCGAGGGGCAGGGATCCCTACAAGTGGGATTAGCGTCGGCAAACACCAAGCCCACTGATG -AATCTCTTCACTGGCTTCGTCATGAGAGATGATTTACGGGGTAGCTTGTTGAAAGTCGAC -GACCTCATGGCTTGAAAGTACCTGCATGGGACATTTGCTTTTGCCATGTCAGATTCTAGC -CCTTTCGGCCATCTCAAAGGCAACTTGAATGTCAGAAAGCCGAGACTGGGCGCACGAGCC -TTCACTCGCATTTAGCCTCGCAGAGGGCCGAGGGGAAATGGCGCTATTCCCAATTCATAC -GAAACTTACTGTGCACTTGAGATGCCGGGTCGGGAAATATCTGCACACTGGGTTCCTGGC -TTGATAGTTATTATGACGGTGGCCATTGGAAAAACTCAGTGTAATGTCGCTTCTAGGAGA -AGAAGGACACAGAGCCTGATTCCCAAGGCGGAATTTAAGTCTCAGGATTCAGACAGCGGG -AAATACCAAGTTTGTTAGAACAGAACGAACTTGATAGAACGACAATCGCGATGTTGCATT -GTCTTCAGCCGATAGCAGTGTTGTCCTCCCCTAACGCCCTGGAGATCTCTTCATATTCGA -GCTCACCGCACCGCCATCTCTGGGTTCGACGCGAGCGTGCTCAGAAAGGCTGAGGCCGAT -TGGATCATCAACGGTATTCCTGTCATAGCATGAGTTTAACAAAACGCCCTGGGTAGTGCA -ATTAATTTGGCTATCCCATTCTCCTTCCCTGACAGAATGGCTTGTTTGCTGGGCATGGAT -GTATGGATGTACCCTTGGCCTGTTTGCCTTTCGAGGTACTTGATAAGACCTACTGTAGCC -GAGTGCGAGCTTACTCAAGTACTTTGTACTTATCTGTCAAACCGGGGGATTTTGATCATA -GACTAGAGTCAATACGCTAGCTGTCAAGTATCCTCGATCAATCCTCACGTTGAAAATTGA -GCTAGCATATTTCTTTCAAGACCTAGGGCGACTTCCGACAAATAGATGTCCTTTTGATAC -CAAGTGCTGCCAGATCTGGAGCTAGAAAGGCCGAGGATTCTTTAATAGTTGCATGTATTC -TTCGAGTCCATGGTAGCACGTAAGATACTGGAGGATTGCGTCTTAGCACGTAACCTTGAG -AACCCAGCCCTGTCAGCAAAACAATTCAAAGTCTTCTCTTAGCCGTAAAGATAAGAGTAT -TATAGTATGACTGGCGCATCGTTTGAGAGCTTGCAAGATTTAAGCTAAATTGCGTGAGTG -ATTGCGTATATTTACCCCGAAGAATCGCCATGTTTGTGTCCTGGGGACCCGCTAGAGAAA -CCTATCGGACGCAATCCAGATTAATGTGGGAGTCCTTGCCTAGATTTCTCCTCCCTTTCT -AAATATCAATGCAGGATGGTTGTAAGATCTGTCAGTGACGCGATTACTAGGTTCGCTGAG -GACATATCTCACTAGTCGTATAAAACCAGTGGGATTGCCGGGATCGCTATCAAAACATCT -GGTACAGTTTTGAACTGCGCACTGGGTTATCCAGCGCCACATAACCCCCCAAAACACAAT -GGGAACCGCAGGTGATTCCCAACAAGTGACGACCGCCTCGGTGTACAGCATAGACGCGGA -GAACCAAACCAACAACACCCAGACTGATAATGGTGCAAAGCCTGTTAAGTCATGGAAAGG -TTATGTATGGGACACATGGGAACTTCCACCAGATCAACGCTGGATGCTTTTTAAAGTAGA -TGCTTTCGTCCTCACATTCGCATCTCTGGGTTATTTTTTGAAGAATATCGACCAGTCGAA -TGTTAACAACGCCTTTCTCAGTGGCATGGAGGAGGATCTAGAAATGTTTGGTAATCAACT -AGTGACTAGTATGTACTTTCGTTCTCGCCTGCAACGGGCCTTCCCGTCCTCTTTGGAAAG -AGTCGAGGATGTTTCCAGCATTTGCTAACGAGGACTGTGCTAGGCACGTCTATCTGGACT -GTGGGATACGTTATTGGCCAAATCCCATCTAACCTGTTGTTGACAAGAGTTTCTCCTCGA -TGGGTCATCCCCTCACTGGAGGTAGGATGGGGCCTCGCGACTATCTGTACATCGAGCGTC -AAATCATATAAAGCTCTGTATGCTTTGCGCTTCCTTGTCGGTTTCTTTGAGTATGCCAAT -ATCCTAATCTTAAGCTTACTGCTAGCGGCCAGCTACTGACGTCGCTATAGATCAGGATTT -TACCCTGGAATCCATTACTTATTGGGATCATGGTACACACCACAAGAAATTGGAAAGCGG -GCAATGATCTTCTGGCTGGCCGGCTCAGTCGGCACTCTATTCAGTGGTTTCCTTCAAGCC -GCTGCTTATACAAACCTGAATGGAACACATGGCTACGCTGGATGTATGTGACTGTAATCC -TTCTGAATCCTCCCTTCTCGGCCTTTCAATGCAATTATCCTACACCTTCATCTCGACTCA -CAGGAGAAAAGGAGACAACGGCTGACTGACGAGAATAGGGCGCTGGCTGTTCATTATTGA -TGGCATCATTACGCTACCGCTTGCGCTCGCAGGGTACATATTCTTTCCCAATCTACCACA -AAGTGGCAAGAAGACATGGTGGACAACAGAAAAGGAGCATCAATTGTCAATCAAGAGAAT -GCAAGATATCGGACGCGCCGGGAAGCAGCCTTGGACTAAGGCAAAGGTGAAAGGAATTCT -GCTGAGCTGGCATACATATCTTCTTCGTGAGTCGTACTCTGCGCCTTCCTTGTCATGGAA -CCATATTGACATCCTATAGCATTGCTATACATCCTTTGGAACAATGGCTTGCCTCAGCCT -GCTATGGGCTATTGGCTGAAAAGTTTCAATGCCCACCCCCCTCCTGTCCCCGGAACGTCT -TACACTGTCGCACAGATCAATAACTGTGAGTTTTATTTGGCTCCGCATCCTAATGAATCT -AGCTAATTGTATGTACAAGTGCCTCTCCCTACAACCGGTGTGCTCATCGCCATGTCGTTG -GTTTGGGCTTGGCTATCCGATGGGCCATGTCGTGGCGCTCGCTGGCCATTTATTTATGTC -GGCGCCATTTCTACTGTAAGTCCAGAGAAATTATATCTGTCGAGTGAATTTTGCGGTACG -CGTATCTAACGGCTATCTCTATTCCACAGCTCATCTTTGCCTCCCTCATGATGAAGATGC -CCTTTTACACAAATATCAATGGCCGCACGGTTATTTACTGGCTGAGTAACATTGGCGTAA -GTCTACTATTCCATGGAGCGTTCAATTGTCACATATTGACAGTACCTTTAGGGTGGTGCC -GGCCCATTGATCTTGAGTTGGATCAACGAGATCTGCTCCGATGACACCGAAAAGCGAGCG -CTACTTGTTGCGATGGCCAACGACCTTGCCTATGTTGTACAAGCGGTCGTGAGTATTCCT -TTAGATATTCCCTTGCTTTGAAGTACATACTGACGAAGCTGAACGAAGGCTCCTAATTTC -ATGTGGAAAACCACAGCATTTCCCCAAGCCCCGAAAGGGTATACTTGGTCGATCGTACTT -CAAGTGCTGCTCAGTAAGAATCCCTTTCTCTGTTCCAGCCTAATTTCTTACTGACGCTTA -ATTCTCAGTTTTGGGCACAGCAACTGTTCAGTTCCTTTTGTGGCACGATAAGAAAAAAGC -AGCAAAGGCAGAGGCGCGTCACTCGGCTATTGACCCTCTCGAGTCATCGTCTATAGGAGA -TACATCGGTGGGATCTAGAGAAGACGATGGCAAAGTGGCCAACACTTCTCAGGTTAAACA -ATTTGGCCTGGATTAATTTAAATATGCACCTCAACTCAAATTTGAGATGTCGTTAACAAA -TCAAAACCAAGCACGAGGTTTGATGCTACGTCTACATGTTCGAAGATTCACTCAAGCTTG -ATCATTGCGCTCCTGCTATAGTGCTGCAGTAGTTGAACTACTATCGTATCGCTTCTAAGA -TAATTCAGACAAACTGAAAGACTGTATGTGCCTTGATAGACACTGCGGGGTTCTCTCGAC -CTGCATGTATACACACCGCTTGATGGTTTCAACGGTCAAGTTGAGTTCCAAGGACTGCTC -ACCAACTTGAATCCAGTGCCTCTTTTCTTGAAGGCACCTTTTTCACTTGATCGCACCTCT -TCCCAAGATGGCATTCCCATACAGGCACGTCTTGCTCGTTGGAGCTACATCGGGAATCGG -TAGAGCGCTAGCCGATCGTCTGATACAAGCCGGCATCAAAGTGACTGCCGTAGGGCGGCG -GAAACACCGCCTCGATGAGTTCATTGCAAAGCACGGCGAGGAAAAGGCTTCAGGGATGAA -TTATGATGTCAGCGATATTGACGCGGCTCCCCAGTTTGCCGCAAAGTCAGTTGGCGAACA -CATTGGCTTTGGAAAAACCTGTGAACGAAGAGTATTCTAACACAAAGTTGTACCAGTGCA -ATCGCTCACCATCCCGATATCGACTGTGTCTTCCTCAATCCGGGCTCCCAACGGAAATAC -AAATTGACGGACCCTAAGGAGGGGGACTTGAGCGATTTCAGAGCAGAGATGGACATCAAC -TTCATTAGCTACGTTGCTCTGACTCAAGCATTCATTCCGTTATTGTTGTCCAGGTCCACC -CCAACAAGCTTTATATTGTAAGTCCGCAATCTTCAGAGCATGTTATTTTGATAGACGTTC -TAATATGTTGAGGAAAGTACAACAACGCTTCTGGCTTTGGTGCCTGCGCCGAGAATCCCA -GCCTATTCGGCCGCTAATGCTGCGTTGAACGCTTTTGTGTACTGCCTGCGAGACCAGCTG -AGGGAATCGAATGTTAAAGTGATCGAGTTGGCTCCACCTCTTGTGTCAAGTAAGTGGATA -TCTCGGTCCAATAAGATCCCCGAATGCTCGAACGGAGAATGAAGCGTATCTTTGCTTATG -ATCATGCTGCTTATAGCTGAACTACACGATTATATGGGTATTGAGAAAGGACGAGCTATG -GGGATGCCGGTTGAGCAGTATGCTGAAGACGCCTACCAAGCATTGGAAACGGGAAGTGAT -GAGATTCTCGGTGGGAGTGTTGCACCCTCGGATGCCTTTCAGGATCTGGCAGACAGGCGT -CGCAAGATCTTCGATGAGTTCGCCCCAATGCTTCGTGATGTCGATTGAAAACTGCGTTCT -CAAGCGACTGCCATCATACGTTGAATAGCTAAACCAGATCAACAATTTGATTCTCTTCAA -AAGCTACCACGAGATTCAATGCAGACTGAATGAACTCATATAATAAGATAGTCATGTTGC -CGCTCCGATCCCGCATCGCGAGTCTGCAGAAGTCAGCCAAGATCAAGTAAACAGTGCACC -GAGAGAGCAACTCACAAACCAGATCCATGGAATATTGCTGCTGTCATTTTTTCTCTGTCT -ACCCCGGCCGTCCCAGACAGACTTGAGCAGAGAGATAGACATATGGTTACTCACTCCACC -ATGAGCACGGATATGGTCGAATCGCAGAAGAACGTAATGCCGAGCGTACGGAGACAAAGC -GTCGGCCCCAGCCATGAAAAGTGGCATGACCACCAACTCGATCAGACGACAGTCTGCGGG -AATTTCCGCCAAGGAGTCTAAGATCGCTGTCACGGAATTTTGTATTATGGGCGCCTCAGC -CGGTTCGGTGACATCCAACAAGCGAGATGTATGCAGTATGCATGCGTGACGGAATGCCTC -GGCAACTGCAGGCCAGCGAGGATCATCGTTTGGGTATCTGTCAGTCCTCAAACGCCAGGA -GTAGAACTCATTCCGGCAAATATTGAGGTGCCGCTCGTATTCGGTTGTTGCCATCTTGCC -CATGGCATGCGCCTTGGCATATTCTATCACGTTCCCCATGACCAGGAACATCTCGCGAGG -ACAGCCATTGACTTGCTCGAATTTCAAGTCGCTCAAAGTGGCCACAGCTTCACGAAGCTC -CGGGGGGAAGCATAGCCTTTGTGGATCTGCAAGCGAACGAATGATGTCTAGCCTAGTAGG -ACATGCGTCAGAAATGAGGTCTGACGAGAACGATATGAGATTACTTGCCATGCTAGATTC -CCTAAGAAAAAGATCGTTCTTTCATTTTCCGTAGAAAGCGTATCGCACAAGTGCAACTGG -TTGCAAATCGACAGTGCACCTGTGATATGAACTCCGTGGGTGGAGATGCCCGATTCGAAA -ATCTTCGTTCAGATTCAGCTATTAGTTTCTTGCTTCGACATCAGATAGTGATATGCAATG -TTGTCAACTCACATCTTGGAGTAATAGTATTTGGATAGTAATGAAAATACCATCCCGTTC -ATTCTCATCTAGCTCGCCAGATATCCTGTTGCGGAGAGTGTCTCCAAGGCAACGTATAGC -TCTGAGTCGATATTCAACAGCCAGTGTTTCTCGCAAATATACATCTTCATTTTTTATTCC -CAGGTGGCATGCAGAAAGGCCTAGAACTGCATATAACAGACCTACTTGTTCATATGCAAG -AGGTAAAACATATACTTGGTAAGGGTTGTCCGCGCCGGCAGCTGGAAGCGCGATTTGATG -TGGACATACAATCTTGTCATCTAGAACCTTTGGATTAGTCAAGGGTTGTGTAAGACTGGG -AATACAGTGGACACACAATGTGTCAACAACACTTCCGTCCGTTCTAGCATTAATAAATCA -TCCAACTCCACGGCTCGCCCTTCTGGCTGTGGCCAGGAGTGATCAAGAATTGGCAGTGGA -TCAGTATTCTGATCCTTGCACGGGGTCAAAGCACATGGATGCTGCTGATCAGAATCATTT -TGGCTGGCAACTGGATGATCGGTTTCGATGTCTGGCTGGGTCGAGACAGCATGGTCACTG -CTCACAGCAGGGACCGCCCGGTTCTTCCACTTGCCCCGGGTTGCTATCCCTGCAAAGCGG -AACTTTTCCGGATATCCCTTGCACTCCAAGCCTTTGGTGACACATCTTTGACAAACGGGT -TTTGTCCGGTCGCATTTGCGACACTTTTCACGGCATGTTATACAAGCCGGGCCTGGTGAT -CATTAGAGCCGCATCTTCTGAATGGATATTTTACAAACCTCCCGGGGGCATATCCTCTCT -CACTTAATGACAACAGCTGTCGCAAAAGATTATATATATTTTTAGATAAAATTGAGATGA -GGGAGAGGGAGGTTGGGCCTAACGGCGAAAAGCCGGGAATAACGCCACGTGACGGTTTCC -ATTGTTACTTGCTGATTGTCAATTTCCTTTTCCTTATATGATTCGAAATGGCATTCTATA -ATTCGACATCATTGTTTATGGACTTCAATTTTTCTTGTGATGGAGTGACGAAGGTGATTG -AGGCCTTTCTTACCAAGCCTCCAACAAATGTATCCCCGAAATTGACCAACCCCGCCACAA -CGAATGCCGTATTTCCTCCTTGCTTCAAAGAGATATTTATATTCCATCGAGTTTTTCTTG -CAATTTCCCAGTCTGATTGTGATTATATTTTTCCTACGCGCTGGGATTGACTAGCGTTGG -CGGCAAGCGAAACACTCTAGCCCACCATTGGGTCTTGGAAAAATGCGCCTCAAATGGAGA -TGCCCTACATTAGCCGCCATACTTCAAAAAGGTACCAGCATATAGGAATGTTAGTGTCGT -TTCATCATATGCAACTAGCGACCAAGGAGGCTATCTTTAGGATGCTCTAACCTCAACAAA -TGAGGCTCGGTCGTTCTCGGCTCCCGTCTCTTGGTCAGAAAGAGAACTATGCATTCGATT -CAAGATCCCAAGAGGCTCCAGTGCCCGAGATTCAGCCACGCAAATCTGGGCTTGTTGTGA -TTTGGTAAATCGATGCATTGTAAAGCTCCTGGCGCCTTGCCTGACAATTTGATTACCAGT -TGGACTGACTCTAAATACCAATGGATCTCTGTCCTAGAATCCGAACGCGGCTCTTTCTTC -TCTTACTATTATAATGCAACTTTGATTTACCTATATCTGAGCTTCTCTTTGGATCATGCG -ACTAATTCGAATATTCGATGAGCATAAAAACCTATTTGTCTCATCGAAAAGCATTCAAGA -CCATGACAACCAATTCCCACCATCCAACTCCTGGATCGAGCATCTTCTATTTATCCTCCT -CATTGTCCCTTTAGCGCTGGGCATTCTTCTGCTATACAGTGATGCCTCCAGTTGGCATCT -GCCAGACAACTTGTACGTTTTTGTGAATACAAATCGAACATCTGTTCAGACAGCAGTCCA -GATTTTCGGTGCGATTCTGGCAGCTATTGAAGTCTTCGCACTCTGTCGCCTGATCAACTT -GACAACTCGAGTCCGATTTACACAAACTCCAGTCTCTCTCAATGTGCTTGGATTCTGGTC -AGCACTGTCGACTCCCTCGACGAACTTCGGTCTACCCTTCTGGATGATTACCATCACAGT -GCTTTTCGGCAATCTTTCTGCTGTCATATCAGCACTATGGACTGGTGCCTTGACACCCGC -GGATGCTATCGGCACCCAACACTCTGCCTTGTTCATCCCGGATTGGTCCGACTTAAGTTT -GATCAAAGAATACCCTGGTCAGATCGACAAAACAGGGTTAACAATTCGTGAGACAAAAGG -TTACTTTACCTACTCGGTGGGTCTAGGTCTACTGGGCTCACTGCTATCCTCCGTCAACTC -AGCCTCCCCAATCAACGGCGACGTTCGTAATCACCCCAAACTCGATAACACTCGATACAA -TTATCACGGCCGGTCTTACGGGGTTGGCGCAAGCGCCGGCCTCTCTGACGACAACCTTGT -CGCCATACCCCACATAAAAAACTACACCTTCAACGAAATAGGCCTGGACGCCTCCGTTGA -CTGCATCTACAATACTAGCTCGATGTTTATCATTCAAATCCTTCCGCAGACCACTCTCCA -CGCAGCTCGCGGTTTCCTTCCCGACAGTAACCTCAGCGCACCCGAATACTCAGTCTATAT -CGGACGAGGCGACGATGCAATAGTCTCGTTGGGGGTGGCGGCGTCTCCAATCGCATACAC -TGCAAAGCGGTACATGGCCATCGCTGCAGGGAACTACTATAAGAATCTGGATAAAGTCCA -GTGTGCTGTAACATTTAACCCGGCACTTTTTAATATCTCGGTCGATATTCAAGGACGGAA -CATCAGCGTTAGCAAAGTGAACAGGTCCGAATCATCAGTGGGCCCAGTCCATGATATCGA -CCCCCATCACAATATTACTCACGTGGTCATGCGACAACTCGAACTCATCGCCAACGATCT -GACCAGTTATTATCGCTCAACGCTTGGCGATGCCTTCAATGCTAGCATTAGTGATTATCG -CACCTCCGTCGCTGGTGGAAATCTAAACGAGACGCAGATTGTGATGAAGGGAATGGAGAA -CGCAGTTACATCCTTGGTTGACGATATTCTGGTTGCATATGCGTCGGCTCAATTGGTCGT -CAGCGAATTTAAGAAGTCCACACCCGTATTGGTCCAGGTCGCTGCGCTGCGCGTCGGATC -GCGGGCCTATGTCATTGCCAGTGCGGTGATCACGGGATTAATTGCGCTTCTTGTTATCGG -GGAGGGCTTGCGAATGCGATGGTGGAAGGATCTTCCTTCTTTTGATTACCAGGATAATCG -AGCGTTGATTGTGGGTGCATCTAGGGGTGGGAAGGGTGTGGCGGAGTATGCCGAGCAAGT -GAAATGTAAAGATCTTGGTAGGGTCCCGGTGATTTGGAGGAAAGGACAGGAATCGTGGGA -TCATGGGGAAATAGTTTTCCAGCCCATGCTGGATGACTCGAAAGAGGATATTGAGGATAC -AGCATCTGATAGGACTTCGGTTGCTTGGATATGAGGCCAAAACGGAGTACAACATTTGAT -AACAATAGCATGGGATTTAGCAAGCGAATTAAATATATACAAGTAATTGTTACTGAAGTT -ATATTCCCTGGGACTGTTATCGCATTCCTGGCCGTTGACATCGCGCCCTCCCTGTAAAGG -ACCATCGTTGTACATGCAACAGCAGAAAAAGAGTGCTGTCAAGGAAGGGATTGGGTCTTC -CATTGCCGCCAAGACTTACTGTCACAGTCTCGACCGCAATGAGAAAGTCATAAGGCGCAT -TTATCTTCAGCTTCTGGATATCAGGTGACACGGGGTTGAGTACACCATCCCCGAGTCTCC -GAGTCATCATTGAGCTGACCCCTGTTACTCTCACACCTGGGTGGGACTTCTCTACCCTAC -TCACTTCTTTATCCTACATTGGCTTTGGCCTATGGTCTATTGGACCAATTGCACATCCTA -GGTTTGTCAAGTCTAATACCAATGGATATTAAATCGTACATCGCTCACAGGCCGAATAGC -CCTCTCTCTTACGAACACTACCATCCCTATTACAATCAAGGAACCATTCATCGTACACCA -ATAATGCAACAAATTTTGCTTATTCTGAATTCGCTCGCCTCATTTTCTTCCGCAGCAGCC -GCAATAATTTCCACTGCAAAGCCTGCGTTGCTGTCAAACTCACCTCATGTCACCAATGGC -GAGCGCTTCTATCAGCGCATGTATGCGGTGCGCGCTGTGCCATTGGAAGTACTGACTGCG -ATTCTCCCAATCTATCTTGATGGTCCGGCTGTGGCTTCTATCATCGGCGCCGCTGCGTTT -GTTCAAGCAGCGGATGTGGTGATTGGTATTGGTAGGAAGGATGCAGGGATGATTTTTGGT -GCTTCATTTGCTACAGCGGTTCATGTTCTATGTTTCTTTTCTATCTCATGAGCCAATGTA -AAAAGATATTGTAAAAACACGACTGGAGATATTGGAAATGGAAAAGCGAAAGCTAATAGA -ACCTAATGAGGAAGATATGTACCTACCTGTCTTCTTGAAGGTACGACCTTCGCCAGAGCT -ATCTTGTCGGTCCACCAATACTCCCGAGGTCTACAACAGAACTTACGAAACCAAGAAGAG -AAACCATGTCAAATCTCTAGATTAAAAATAACTGGGCGAGGTAAGAAATATGTACTGGAC -CTCAACCGCAAGAAGGGGAATCACACGTGTGCAATGAGAGCCCGATATAACGCGAAAGAC -GAGACTGCCAACAATATGAGTCGGGGTATCTTGGAACTGACGGTCGACTGTGCAACAGAC -CGAACAGCGGGCAAAAAGGAAAGAAACCACGAAAGTGTGGACAGGAAGCCAAAATCAAAC -GCCTTTCGATGCCCGGGGGGATCGTATGATAAGGGAATGGACAGGAATCGCGTATGCAAT -CGCTATGAAATAGAGTCTTTGAAAATGAAAATAACAGCAGAAGAGAAATAAAGGAAGGGG -AAAGACGGCCACGTCACACATGTACAAATCgaatcttgaatcttgaatcttgaatcttga -atcttgaatcATTTGAGCTGGCCCTTGCGCACAGCACCATAGCCCTTGCTGAAGAGCTCC -TGAATCTCAACGGAGCGTCGACGTCCGGCAGCGTCTTCGGGACAGAGGAGGCCAGACTGG -CGACCACGATCCGTTTCGGCATTCACAACATCGCGACTCGGCACGCTGCTGTTGTGACTA -ACCGTCGCGCTGGCGCTGGTATACCCATCGATCTCGACTTCCTTGGTGTCGGGCTTGGAG -TCCATGACACGGGGGCGGGCAAGTAGCTCGCGGACCTGGCGTTCGCGGGCTTCAATCTCA -GCGAGCTCGGCGTTTTCGCGGGCGATCTCGAAGCTTGATCTCTTGGTACCGTGGTCCCAG -CCCTGTTGGAGTTCGGAGGCGGCTGCTTCTTCGAACCGTCTGCGGATGTCGAGGTCTTGT -TCGTATTCTTGGAAGAGGTCGACGTCTGTTGGGAAGAAGAAGGCGCGAAGGGCTGAGACT -GTAAATTCAAAGAGGAGGACTGCTGAGATGGAGAGGAGGAGGACGGTCCAAAAGAGGAGG -TTGTGGCCGGAGTGATGGATGAAGTTGGAGGGGAGGAAGTAAATGCCTTTGCCGGAAGAC -ATTGTGTATTCGCGGTCAAGGATAAGATCCCACACAAACCAGCCACCGACGGAGATTATT -ATCACAGCTAGCGACATGTAGGTTTTGTTGTGGACTTCCAAAGCTTGGAGTTTGACGTTG -ATGACAATGATGCACGCGGAGAAGGTGAGTAGGCCCAAGGAGAAGATGTCGTTGTCGCTG -GGATTGATTTTGGCAAGGCCGAAGAGACTCCACATCGTGAAGAAGATTATCACTGCTTCG -CACGTCGCCATAAATGACCAGCCCAGGTAGAGCTTGATATTAAAGCCGCCGTGCCTTTGG -CCCTTGGTGTAGAGCTCCGGTACGGCTAACAGAGTCGATGCTGACAGGTCTTTGGTGAAG -ATCCCCAGGAAGATGACAGCGAGCGAGGTGAACAGCGTGTTGAACATACTCAGACTCCAT -GGCTCGTACAAACTAGTGCCAGTGTAACCGTTCCAGCGCTGGTACAGCGCTTGAGTCAGG -TAGAACAACATTTCCTTCCAGAACGTGCCGAGTGTATACTTGCAAGCACGCATGTAGTTC -CACCGACCGTGAACCAAAAGCAGCTTGAGAAGGAAACGGAACTGGCCAATCGAGTAGTCC -GAGATACGTGCGGCCTGTAATCCCTCCTTTCCAGTGATGCCGATACCCACATGGGCCTCC -TGAATCATGGCGATATCGTTGGCGCCGTCTCCAATGGCAAGCGTAATTGCGTCGGTGAGC -TGTTTGCGGATTGATCGGACCAAAAAGGCTTTTTGCTTCGGGCTTGCTCGGCAACAGATG -ACAGAGTCCGCCTTAACAGCTAGTTGGAAGAACCGTTCACGCACCACCTCATTCGCTTCA -ATGGTCGAGAGCGTTTGGCCATCCACTACTACAACAGAATGTGCCACGCGGCCACGAGTA -ATCTCCTCAATAAGGTCCACGATCGTTTGCTCAACATCGCCGTTCTCTTGATCCAAGATA -GTCAGTGTCGAGTACTCCTTGACAAGACGACACGAATGGCCGACATTGATAGCAGTCTCT -CGCTTGTCGCCAGTCAACATCCACATCTTGATGTTGGCTCGTCGCAGTTTTTCGATAGCT -TCGGGCACGCCCTTTTGCAGCTTATCCTCAATAGCAGTAGCACCAGTAAGATCCAGCTGC -TGCTCGATCTGCTCGCCGGCGTCCTCGATTTTCTGCTGTCGATCAACAAGGCTAGTGCAA -GCCTCATTGTAGGCAATTTTCCAGTTCGTGTAGGTCGCTTCATCCAAAAACCGGTGGCCA -TACAACAAAGTCCGCAGGCCGTCGGTGGCAAAATCATTTAGATGCTGGAAGCAGCGCTCG -AAAATAGCCGACTCGTTGACAACCAGTGCCTCCTCCACAAGATCCTCCTCATCATCTTCG -CCATTCGTAGAACTTCCAGAGTCCGATATCGCAGCTGAGGGTCTACCCATCTGCGCCGAA -GGCCGAGGGCTGTAGTACTCACTATCAGCCTCTCTGTTCCGCATACCACCATCGGTCTCC -CGATCGCGCAGCCAAACATCAATACTGGCTCGAAGAGCCGAACCCTGTTGGCCAGTGATC -GAGGACCGCCGATGAGTGAAGCTAGGCCGGGTCATACTGCTCCGGACCATGCTGTTCTTC -CGGCTGTGGGCCTCACTGTTACGTTGGATGACCTGATTCATATCGGCATTCTTGCGTCTG -CTTGCGCGGCGCTCGATCTCAGCCGCCTTCTCGCGTGCCAGATCGGCTTGCTTCAGGAGA -CGCATCAGCGTGGTATCGGCACCCTTACAGAATAAGCAAATTCGCTGGTCGGGCATGCGA -ACAACAACTGACATGCGCTTCCGCGCGCTTGAGAACTCGATCACATCCATGATCTCGTAG -ACTTCGTCCGTGCTCGCTTCGTCCGGTCCGTTCGGTTGAGTGCGGATGGTCAATTTGTTT -GATTGGCGATCCAGGACAAGATATCCAAGCTCCTGGGCTGCAAGGACAAGCGCCAACTCG -TCGGGGGAAGCAGCCTGGAAAGTAGTGTTTCCTTTCTTATCTTCTTCGGGAATACAAGTA -TGACAAAGGGCCATGGACAGAATGAACAGTTTCGCTTTTCGGGCAAAGATGGTGTATGGC -TTGCGCTGGATATATTCGAGCATCTCTTCGGTGCGGTACGTAGTAGCCGAGCGACCAGAC -TGGCGGACAGCATCAAAAGGACCGGAAACATTCGACGGACGGGGCATCTGAGCCTCGGAC -ACATTGGACTTGCGGCCCATTGCCTTTTTCCCCTTCACTCTGCGTTTCTTGTGCATCAGC -TTCGTGCGATCACCTTCAAGGGCGGCCTCCTCTTGGAGATCCGCGTCGTGCAACCACGCG -GTTCCCGCCACACTCATTTTGCGGAAGCGCATGGAGTTGTTTGTCAGGGTACCGGTCTTG -TCGGAGAAAATGTAGCTGACTTGGCCGAGCTCCTCGTTGATGGTGGATGTGCGCGCCTCG -AGCGGGGTGTCGGTTTCGGGATCATACATGTCGATGTCGTTCAGAAGCAGCATCTGGGCA -ACTTTAACGATTTCCATACTCACATATAGAGAAATGGGAATCATGGTATTGAACATGATC -AAGAACGAAGTGAAGATCGGGCCATAGGAGACACTGGCGTTTTCAAGATACCAGGAATGT -TGCTCCACGTCTTGAGACCAGTACTTGTAGGCCACGGTGCAGACAACAGCTAGGATGACG -ACCAACACTACAATCAACATAACCACGCGGTTGACTTTGTCCTGGAGAGCGGGCCTCTTG -ATTCGCGGGTTTTTGTTGGCGTTCATGCGGATCTTGCACTCCTCGCCAGTATAGATCACC -ATACCCAGAGCGCGGTCGGTGTTGCGAAGGATACTGCCACGGTAAACGATCTCGTTGTTG -GTCAAAGGCAATCTCTCCTGGCCATTGACACTGACATGACCGTCAAACTTATAGAGATCC -AAGTTGGGGTCCTCGACGACAAAGTGCAAGGTGTTGCTTTCGATATCCTCCACAGTCGAG -CACACCTTGGATACAGGCTGGCAAGGTTGCTTATTTTTCAAATTTGTCTCACCGTCGAGA -GCCATGGTCTCGATGTAAGCGACGCCGTTGGGTCCATTTGCGTGGAGGAGAGCAATATCA -GCGGGAATCGGCTGGTCGCGCTCAAGTTTAATGACGTCTCCAACACGAATTTCCTCACAC -TTGATCCGCTCCCAATCTTGAGCGTTGCTTATCATAGAGGCGCCATCCTGAATCATCCCG -TGGCCTGGACGAAGCACCCAAGCCTCCCGGTTGTTCTCTTCCTTGTCCAAGCGATACCGG -CGCCAATCATCAAAGCCTTCCTTGCCCATAGAGATACCAACAAAGATCAACAGAGGGACT -ATGGTCGTATAGGTTCCAGTTGTACTCAAGCCAGGAATCATCTGCAAGATAGCGACAATC -AAGAAGTAGAAGTTGGCGACTTTGGTAAATTGGGCGAAGAATTGGCGGGGGAAGAAACTC -CAAAGACTGTACCGACTGGAACGAATGGAGTTCCCAATATAAGGCTTCGCTGTGCGCTCA -TCAATCATGGAACCTACCATAGACGGGTTCAAGTCAATGTGGCGCCCATCTTTAGTAGGT -GGAATATCCTTGATGCGCAAAATGAATTTCTGGATAGACGTCATAACAGCCTGGAAACGC -GCGACTGCCACCGCAGTAGTCGCCTTTTGCTGGCGCCATTTATCCTGGGCCACAGACACG -GAAAATTGTTCCTTGAGAGTTCTTTCTGAAGAGTCCGAGGAGACATACGGGGAAGCATCA -TCCTTATGAGTCGAGTCGAGCGTCGCCGTCGGCGCGACACTAAGAGCATGCTTCTCATCG -GCGGCAGTGAGCGTCTCAGCAGCCGGGGCCGGGGCCTGGGCGGGAGTCACATCAGACGAG -GGTTCCTTGACCTCGCCCAGCTCAGCCAGGGCGACATCATCCTTCTCCGTCGAGTTGATG -GTTTTATTAAAAATGCTGCGACGTAGCGAGTATCCACGGTTTCGCGACCGAGACCGACCG -GCGGACTGCGAGCTTGGGGAAATAGGGGAGAGGGTCGCATTTGGTGTCGCATTGGGAGGT -GAGAGCGGAGAGGTAGGTGAGGGAGCGGGTCGAACAGACGGAGGCGCCAGCGCCGTGTTG -ATGGTCAGGCCGCGGGAAGTCGGGCGTCCGTCCCAGCTTGCTTGTCGTTGTTCCTCGACC -GACTCTCGATCTAGATCGGTTGAGAAGCGCACATGTTGGGAGGAGATCCGCCGCCCGGAG -TCGGAGAGCGTTCGGGACGACACTAGAGATGGGCGTCGGAGGACATCATTCTGGTCGGGT -TGGGTGCCCTCTGTGGATTCTGTAGATTTGTCGGTAGGGGACATGACAGTCGAATTCCTG -GCACGAGTGCGCGGGACAGATCAGATTCCCGACGATCTCGGTATTTTTTTTTTTTTCTCC -CTCCGTGATTTTCGAATCTTTCCTCTGCTAGCGTTTCCCCTTAGGGTCGAAGTATTTGAT -TAATTTGATTAGACGGTCATTCTTCCGCTATTCTTCCAGTGAGGGGAGATAATTTGCCCC -TGTAAAATGAAGGTGTTGCGAGGGTTGAGCCAACGAAAGTACTCCCGGACGCAAGGATAG -AAATGAAGGGCGCAATGATTGTCTCCTATGAGAAAGAAAAGAGGCTAAGAGAAGAGATTA -AGAGACTAAAAAACCCGAGAGACACCAGGAATAGCCAAGACCAAGTTTGGAGGAAATGAT -GCCCAAAACGGCCTAGTGTTCCAGCTGGCGATACCCTGGAGGATACAGAAGATCTCGACA -ATATAATGTGTATGAATATCTGTGATAGGTAGAGAGATAGAAGAAAATTGGAGTGCGCAA -GTGGAACATCCCATGTCCAAAACGTCTCAGGCCTTGACAACCGTTCATGGTAGGCAACGC -GTAAGATGCAACCATACAATATCCACGCTTACGCCTCCCATAGCCCCAGGTTAATTTAAG -AATCACCGAATCAAGATATTTTTGGAGGGGCAACATATGGACCCGGATCCAGATCTGTCT -GCTTTGTTATTGGTCAATTTGAGCATTCCCGTGAGACCCTGGACCCTCCAACCCTATGCA -AAGGGCCATGTAGGGACTTTTTTGTTGCTTTGGCCAGCACATGTGAGGATGCTGTATGTA -CTAATGGTAATCCAAGTCTATCTACTCCGTAGTGCTCTGGAGAGTCCGACATCCATTCCC -TTGACCGCCCTGGTGAACCCGTGGGCTGATTATTTTAGTGCCTTGGCAGGCCCTGGACCT -TGGGAAAGCCCAGGCATCCGTTTTTAAGCAACAGAATTCTCAGGGTCGAACCGTTTGATT -GGAAAACTACGTGGGGTTAAAGAACAATCTAAGAACATCCCCATAACCCTGGGATCTTCA -CCGATGGTTGACGAATGGCAAAAACATATCATTTCAACGCAGGAACCCTATGACGTCATA -ATCAATAGCGTGGCGGCTGTCCATACTGGTAGCCAGATCCAGGTGACGGATACCCCTGTG -GCTGTTGGCCATATCCACCCCGCGATCCTTGGTATGGAGGCTGCTGGGGAGGTTGATATT -GACCCTGTGGGTGACCAGACTGCGAACCGTAATTCGGAGGAGCACCAGGGGGAGCACCAT -ACGGAGGCTGAGCTTGACCCTGGGGAGGTTGGCCGTAACCTCCTCCATACTGGCTTGTCG -ATTGAGGAGGTCCACCGCGACGTTGGTTGTCTCGCTCATCCTTGGTGTCGTAAGAAGAAT -CGATAGCGCCGAGCATCAGTTTGGCACACTACATAGTCAGTTCAATGTGATGCGATGATG -TGGGTAAAAACGACTTACCCAAAGCTCGCGAGTGAGATGCACTGGTGGGTTAGCACGAGA -CATCTCATCCTGCTCAACCTCAAAGTCTAGAACACGTCAGTTTGGATCCACCATGAACGT -CAACGTGGAACTTACTGGAGGTACAGTCAATCAGGTGGCCAATATTAGGATCCTCATCCA -GAGAAGACAAGAAGTCGCGAGCCCTTTGATCAGTTCCCACCTGCGAGAATTGGAAGGATA -CAGCGCCGGGGCCATAATGGGTCTGAGCGGTCTCCGCCACTGCGTAGCGAATGGTATTAC -CCACGGTGTCATGAGGCTCGCCAGCAGGCTGTCCGTCGGTGATGGTGATCACCAGCACGG -GCTTCTCGAGACGATTGGCCCGGACTGGTTGAATGACCATCGGGTCGATAACCTTAGATT -TCAGGCTAGTGCCCAGAGGGGTCAGGCCCGAGAAACGGACGCGGGACACTAGACGTTCGA -CGTCCTCTGCATTGCGAATGCCATCGCCCTTTTCATTGGAATTCATGAACCGGACAGAAA -TACCATCCTGGTCGAAGGTCGACGCAGCAGTAGCGACGATACCAAGAATTTGTCTGAGCT -GTTCCTTTCGGAGTCCCTTCTCTTCAAATTCGATAGATCCACTGTCATCAACGTACAGAA -TCACGTCGAACAGCGAGAGCTTCATCACATCAGTCGCAACCTCCATAGGTACGGCCCATT -CGTGAATCAGCTTGTTGAGCTTGCCTGGTGCTTCGGCGGCAAGCGTTTGGACAAGACGAT -CCAGTCGGTCTTTGGGGTAGAAACTTCCGAGACGCTTCTCTTCAACTGTAGAACTCAAGA -GGGAGTGATAAGCTGCCACCTGTTGAGGGGTGGCCGGAGGGGCTCCGGACTGAGGGGCGC -CACCGGGTGCGCCATATTGGCCTGGCGGTGGTCCGCTCGGTGCGCCATATTGGCCTGGTG -GAGGTCCGCTTGGAGCACTGTACTGGCCCGGTGGAGGTCCGCTTGGTGCACTATACTGGC -CCGGTGGAGGTCCGCTTGGTGCACCATACTGACCTGGTGGTGGTCCACTGGGTGGTCCAG -GCGAGGGTCCAGGACGGCCCTGGCCACCGGGGTATGGAGCTCCCTGAGAAGGGTAGGGTG -GTGCCTGGTTCTAGTTTCCGTTAGCAATCTTGTTCGACCATGAAGCATTCAACGAGCATC -CAAAATGAGATCAGACACGAAACACAAAGGCGGGGATACAGGGTGAGGGGTCTTTTGTAC -ATATGAGGGCTGCTGTCCAGGGTAAGATTGTTGTCCAGGATATTGCTGTTGTCCGGGATA -TTGCTGTTGTCCGGGGTACGGAGAGGCCTGTCCAGGATAAGGCGATTGTGCATGCTGTCC -TGGGTAGGGCGAGCTAGGAGAAGCATAGGGCTGTCCTTGCTGTCCTTGCTGACCTGGATA -GGGCGAACTAGCAGATGCATAGGGCTGACCTGGTGGGGGTGGAGGGGGGCGGCCATAGCC -CTGAGATTGCGGCTGAGATGCGGGGTATGGCGTGGATGGGGGAGGCCTGGAATAGGAATT -CTGTTGCGATGAGCCGCCGTATGGATATCCTTGGTAAGGTCCCGACATTGTTCCAGAGTG -TCGAAGGGTAAAACGTGAAAGTGGGGGAAAACAAAAAAGGAAAAGGAAAAATATAGCTTA -CCGATCCCTGAGGGGCGGGGGATCCTGAATAAGCCTGGTATTGCTGTTGATTCTTGAGTG -AACGTTAGCACTTTCTCATTTTTTTTTCTCTTTATGACGTCGAACTGCGAGTCATAGCAA -AAACAGGCGCAGTAATGGACATATGACTTACCGATCCACCAGCCTGCAATGCTGCAGGAG -GACCTCCAGTGTACCCCGATGGAGGTGCACCTCCATATGAACCACTATTGGCCATCTGGT -TATTTTGAGCTGCGGCGATTTTCTGAGCAAACTAAAGATGGGTCAGAGGGATTCTCGGGA -GTTGTGGAATCTGGAAGCCTTACACCCATGATTTGAATTAGAGGGGGGAAAGAGGCGAAG -AAGTAAGACACAGAACTTGGAGTATCACGAGAGAATACCAAGGCTTGGATGACGTTGCCT -GATGGGGGCCCCTCTTAAAAAGTAGCCCAAGTGAGGCTGAGCTGTTGACGGTACCTCGGA -ATACGTTGCCCCCACTTTTGTTTACTAGCAAATAAAGTATCTCAAAGTATCTTCAATGAA -GTCACCAGGTGTATATATATGCAGCCCTATGAATAATACATACAACACATTTAGGCTGCC -GTTGGGAACCTCCTGCGCCGCTTTATGGAGGCGCCTATAGTCCTCATGGTTTTCACACCA -GGGCACCCGGGGAGCCCCGATTGATTACTCACTCGAACAGTGGACCCATGGTTTTGGGAA -CGAACCCAATGTGCTTGCTCTTGCTACTAAAACCGAACATGTAAATAAACATGTAAAGTG -GAATTCCACACTGCTCACTCAGGCTTCACGGAGCAAATGCGCGGTAACCATGTCCAGATA -AACCCCAGCACGAGGCGTAATGTCCTCGATTCCCTCCAATTTGGCAGCATCATCCCAAAG -TCGAAGTGAGACCATCTCATCGCGAAGTGGATCTTGCTCGAAACGTTCAAGGTCCGCACC -TTGGAACGGGCCGCCTTGAAATGCCAGCGACTTTTGCGAGGCGGATGATAGAGCTTCGTG -GTATCCGTGGTTGGTCGCGGTAAGGTACCTAAAAGGTGAGTCTAAACTCGTACAAGACAG -GCAAGATCACATACCTCTTCGCGGCAACATGACTATTGACCAAGCGACATACTGTCTCAC -TGAACCCAAGGGATTGAAGATATGATGCGCCGATGGCTTCATGGCCGACTCGCCCGACAT -TGTCGCTGCTTTCGCGAAGATTCATACGTACCTCCTTGGTTGATTCTAGCGGAATGATTT -GACCAATGTCATGCAATAAAGCCGCAATGACCAACTCATCACGAGCATCTAGAAGGTTGT -AAGAAACTGTGAGAGATACAGAAAGACAATACTTACTCGCTTCATGGGCTTGATGAGCTG -CCTGTAGACAGTGTTCTAACTGGCTAATTGACTCCCCGATGTAGTCACCTTGGCCATACT -TTTCTAGAGCGCCGATTAAGGCCTCTGCTTTTTCTTGGGCGGACTGCTGATTCATGGTCC -AATTTACCCTTGGGATGAAATAGGAGAAGCGTCGAAGTGAAACAATATCAAGGTAGACAA -AAAAGAGAAGCGAAAGAGGCGGGGGGAATCGAAGTATTTATTTACGCCAGGGGCGTTTCG -TTATATCTCCGTCCAGTTGAGTGTACCCAAACTAAACTCGTCGGGGATCATGCCGGTGGC -CCACACGAATCTGAAGGCTGAACTAGACCAATTTACCTCGATAGCAGATAGGTACCGCCC -AGTTATTTTGCTTCAACTAATCAATATCATTATAAGAATATCTCAAGATCGTATGGAATC -AACCTTCTGCTTGGCACTAAGTAGACTATCCCCGCACCGATCAGACTGGGATGACCTTGG -AGTCCACAGTTAACATAGGGGATCCAAAGGCATATCGCATACGACCTTCCTCGTATGTCT -GGCCGCTCTCCCGCATATGTGTTGCAGGCCAGAGCTTTTGGCGGTCAGAATAGTACTGGT -CGTGAAGATCACCTTCAGCGGCACAGTTATACGTTGCATAAATTGCACGGCGGTCCTTCG -AGCTAACATTGGCGCCACTGCGGTGGGCCAAGTACGAGCCAAAAATCAAGATATCACCTA -CTCCTGTCAATAAAGTCCGATGGAGTATCGGGGATTCTTCAGTTTGCATACCCGAATCAA -GCTCAGCCGGTGTCCAGACGTTGCTTTCGACCCAATCCGACGCAATGCAGCGATCGCTGC -CGAGAGGAATATCCATGCGGTGGCTGCCGCCCACCACTTCCAGACCTCCATTGTCTGCAT -TCATTCCATCAACTGCTGCAAGAATGGTCAAATGCTTGATGTTTTTGACGTGGGTGTAGG -CATTGGCATCGATATGAGGATCGAAACCGCCTATCCGACAGGTGAGCGATCGAATTAGTA -CTATTAAATGGACATCGCTTACCACTTCCTGCCAATTTGTAGTTAATCTTCTCTTTGAAA -AGAAGCATTTCCTCTCCTGCAAGCTGCTGTAGAAGACTTATCGCACGCTGCCCTCGGAGG -AAACTATCGAATCCGGCATGCGAATTGGCAAAGTTTTCAGTGCGACACAGCACCCGTTTG -CCTTGAGCATTCACCTCTTGATAAGGCATCCAGGGCACATCAGGGGTACATGGAAGATCG -TGGACTTCCTGCGCCCATTCTCGCAACAGCTCCGATTCCTTGGGGGTAAAGAAACCACGA -ACTAGCAGATACCCACGCTCGTGGAAACTCTGAAGCTGCTCAGCAGAGAGTGAGTAATGA -AAATTATCCATTTTTTCACCGGTAGATTTCAATAGAAACACAGAGCAGAAGACTTTCGTG -GACCATCGGGAGCCCAGTAGCTCGCATTTTGTAGATCGAGAATTCGCTGCAACGAAAGCC -GAAAGATATCGCCCTTCGCACCGAGATGGGTACTTTTTTGGACTCGATTAGATATTGAAT -ACATAGAAGATGCCCAAGCACATGGGTGAAAGATAAGCCCCAATAGGTTTGGTGATCAGG -TATTACGCTATTCGACCTTCAAGACGATGTGGAGTCTGCCAAGGTAGGCGAGGAAACGAG -AAAAGTCATGAAATGATGCACACCGTCCACGGGCAGAGAGCTTCACAGGGCATGGGAGCG -TCCCCGGGGAGCTTGGACCTCTACTTAGTACGACCCACAGACTATACATTATCTCGCACT -TCCCACCGAGTCTACCGTGCATCAATCCCATGCTGTAATTGGTCAACTTTAAAATTAATT -CGTCGTTAGAACCATTGAATATAGCTTCTATAGATATGAATCAATTCCATATTCTGTGCA -GTATTCTTTTCAACATGGCTACCTCTCCCAGTGACTCTACTATGAAGATGGAAGATCCCA -AGGATCAAGTCTCGCAATCCGAATTGAACATCGAGCAAGAGGAACCCGAGAAGAAACGTT -CCGGCGTTCTCAATGTAGTGATTTCCGGGCTGGCCTTGTTCAGTGATGGATACAACGCTC -AAATCAGTACGTGATCTGTATATCTACACGGCTTGTTTTCTCCCTTGTACTGACCGCTTG -GTAGTTGGCTATATGGAACCTTTGTTCTCGGTCCTGTGAGTTGCCCTGCATTTGAGGCAA -CTCTGGCCAAACTTACTGATCGGTCCAGCTACATGAATGGAATGTCTTCAACTATAAAGT -CGCGCCTGTCGAACTCATATCTCATCGGTGAGATATTTGGCATGCTGTTCTTTGGGGTTC -TGATTGACCGCATTGGTCGTCGGACTGGTATTGTCGCTGCAACAGCGTTTCTTATTATCG -GAGTGGTGCTGGCAACCGCCTCTCATGGAAAATCGCAACTAGGGTATGCTATGCAAGAAC -TCGGGGCTTTTTTGACTCAAGCCCATCACTAACACCTCAATAAGGATGTTCTGGATGATG -ATCGTTGCACGAGGTATTGCAGGGTTTGGTGCTGGAGGAGAATATCCTGTCTGTGCTACC -AGTGCTACTGAGGCCGCCGATGAGACAGTCCACCTTCGAAAGCGACGTGGCTTCTTGGTC -GCACTAACTACCGATTTTGCTATAGATCTGGTCAGTCCTATCACAACATGATTGAGGACC -CTGCTAACTTCGGGATAGGGATTTGTGGGAGCTGGCTTGGTTGCTCTTATTGTTCTTGCA -TGTTACAGCCAACAAAACTCTGAAGGGGTCTGGCGCGTCACCTTTGGTCTGGGTATTGTG -GTAAGTCTTCGTCTCAATAATCCAATAGCTAGCTATTCTGACCCTGGTACCAGTTGCCAC -TGTCTATCTGTTTCTTCCGCGTTCGGATGGTCAACTCTACTCAATACCAAAAGCATGCCA -TCAAGTCGCAATATCCCTACGGATTAGTACTACGTCGGTACTGGAAACCTATGCTTGGAA -CATGTGAGTCGATTGGCTTGATCAAGTTGTGTAGTGGCTGTGGCTAATGTTGATGTTTGT -AGCCTTGGCTTGGTTCTGCTACGACTTTGTAACATATCCATTCGGCCTGTTTTCTTCCAC -TATAATTGAGCAGCTGAACCCCGAAAACACCACGGTTCAGAATATCGGATACGGAGTATG -TCTCCAATACATCATGTCCTAGATAGCTAAACTGACAGTGAAATTTCCAGACCGTTATCA -ACTGTTTCTATTTGCCAGGGTGCTTGCTGGGCGGTATCCTGATGGATCGAATTGGCCGCA -AACAAACCATGACCCTAGGGTTCATGATCTGGGCTATATGGGGATTTATTCTGGGGGCTG -CACTCCGACCTATTCAGAGTGTCTTCCCACTGTTTATCGTCATGTACGGCATTTTTCAAG -CACTCGGAGAAATGGGCCCGGGGGTCTCGACGTTCCTCTGTGCTTCGGAGTCCTTCCCAA -CACCTTTGCGTGGTCATTTCCTCGGATTTGCAGCCGCAGTCGGCAAAGCTGGCGCATCAA -TCGGGACAGAAGTCTTCACTCCAATCCAGAACTCCTTCGATACGACCGCCAAAGGCCAGC -AAGCAATTTTCCTCATTGGTGCGGCTTTCACGTTGGTAGGTGGTTTGGTTGCCTGGTTCC -TCATCCCAGATATGTCTCGAGAGCTTGAGACAGAGGATGCCAAGTTCAAAGCATACTTGG -AAGAACATGGGTATGATGTGAGTTCGTATGGCGAGGCATTGGTTGTCAACGCCCGCGCTT -CAATCTGACTACCCTAGCAGACAAACAAAATATAGAGGACCCAAAATTTGTTCTAGTTTC -ATTTGTGCCAAATCAGGCTTTCGCCATTTTGGCGTAAGTATAGATAGAGTGGTCTTGCAA -CTATAGGAACCTAAGCATGTTTTCTCTCACTTGCCATGAACTTCATCCAATAAGATTTCA -GAGGTCTTAGCTGTTTCATGTCCACATGTCCACCTTCCTAGTAGCTTAACGTCAGGGTGG -ATTATTCTCATTGTTTGTTGAGTGCAGAAAATAATCTTACAGTAATCATCCAGTCGGTCT -ACAGAAATTACAATAAAATATTGTTTGAAGCAAAGTCTATACGTCATGAGTATGATGCTT -TGTTATGATTTGAGATATCTTTCATTTCTAACCAATATCTGTTGAACACATAGTCTAAAG -CATAGATAGGATGAACAGGTAATTTACTATATTAATGCCAATCTTGAATTGAATCCGGGG -TCAAACCCCAGAACTATGAGGTGTAGCCTTGGACTGGATGATGTCACGGAGTTGGGTAGA -GATGTTCCCTTGAAAGTACTTGGGGGAATTTAGATTCTTTGTCTTGCTCCAAGATATCAA -CGAAGCTCTTACTCTAATATTCAACTCTGTTCTTTTTTACAATCTCAACCTGGATCCCTA -CTATCTAAAATATCATTACATTCTCAGTCATTCAGCTCAAATAATACAAAAAAGCCTGGT -CATCCTCCATCTTCGCTCCATCTGTACCATCTGATCTGCTTCCCGCACTTCCGCCAGCAT -GCCCACCCCTGTGCTCCGGCTAGGTATCCTAGGCGCCACAAATGCAGTCCAAGCCACCTA -TTTGCCGGTCCTCCAATCCCTCAAAACTCACTACACCCTCACAGCCATCTACGACCCAAA -CGCAGAAATCGCAACCCAGTGCCAGTCCCGCTTCGACATAACCCACAGCACAACAGTCGT -CGAAGATGTCCTATGCCATGAAGAAGTGGACGTCATCCTCAACCTCCTCCCAATGGAGTA -CCACGAACAATACACCGTGACAGCCCTCGAAGCCGGCAAAGACGTGATGGTCGAAGTCCC -ACTTACAATGAGCATTTCAAGCATGCGCCGCATTCGTGAAGCAATCAAAAATGGCAAAAC -CTCCCGATCTCTCAACGGCGTCACCCAAACAGACGGCCCAAAAGTATTTGTCGGTTGCGC -GCGTCGCTACGCACCATGCTTCACAGAAGTCTTCAAGAAAGAACTCGCCACCATGGGTCG -CGTATACTACGCGCGCTGTCGTCACATCGCCGGACCAATGAACAACGTCGTCACGCCGAC -TTTGAAAGACATCACACATATGAAGAGCCATAATAACCCAGAACAATTCCGCGTTTTGCT -GGAAGACGTCTTCGGGTCTGAAGAAGACCTCACCCCAGACAGAATTGCTTTTTGTCGGTA -CCTCGGTATGTTGGGTTGTCATGACCTATCGGTGATGCGAGAGTCGTTGGGCTTGCCGGA -TGCTGTTTCCAACGTCGCTATCACGGACCCATTCTACTCTGCCATTTTCCACTATACAAA -TTCCGCCGAATATGACGGGCACCCATTTACGCTTCTCTGGGAGGCCGGTGTCGATGCCTT -GCCTCGCTGTGATGCGCATTTGACTGTGTATGGTGCGCACAAGACTCTCAGCGTGGAGTA -TGACTTCCCCCGTCCTGGTGAGAAGATCAGCACTGGTACATATGTACGAGTTGTTGTTGA -GGAAGCGGATGGGACGACAGAGATGGATAATGTCAATGGCAATCAAATTAACGAGACCGA -GAATGCCGTTGCTCTTCCTCGCGTGAAGCGGACGGAGACTGTCAGTACCTGTGATGAGGC -GTATGAGCGTGAATTTATGGCTCTGCATTCTTATCTGGTTGGTGGAGGGTCGGCGGCGAA -GACAAGTACTGAGGACGCTGTCATGGATTTGCGGCTGCTGCTTATGATCTTTGACCACTA -TAATCGCCAGTGTGGTACCATTCGAACTCCATTGGGTTGAATCCTTGTTTCTTAGCAGAC -GGTGTGAAGGCATGTGGAAGACGGTGGGAGGGTATACTCGTCCGGGCATTCCTTAGGGAA -GTTGCTCTGAGCTGGCTCTACGGGGCTTGTTTACATGAGGTTATGGGAACAGTCGGAGTT -TTCGGAGTTGAGAATGGGCGGTAGGAAATTATTCTACTGCCCGTCTTAGTTTTGAGACAT -CCTTGCAAATGAAATATGAGATACTTGTGTTCTAAGTATCGTAGGTAGCATTCAGCCATG -ATCTCTATTTGTCTGAAATTACACTATCGCTTGCCCTTCTTCTTATTCTTCTTAGCATTA -GAGCCGCCACCGCCACCGCCAGACTTTAGTACTTGCTGAGCGGGAGACTCCTCCGCCTTC -TCATTGACAGGACCACCCTGTGTGAGGGCCGCAGCACGCTGCTTGCCCTTCTTGCCCTTG -GGCTTGTAAGAAGAGCGGTCACGCACAGGCAGCCAGCGCTCGGGATCTGGCTTCTTGGCA -GGATCGAAGTCCTTGGGGAGACGGGACTTGCGGACGCGCTTGTTGGCGCGGTCTTCAGCG -CTAGCTGCGGTGCGCTTGCGGGCACcagcgaaggcggcagcggccgcggcaaccgaggag -gcagagggagAGATTCCAGCGGATTCAAGAGCCGAGACGTCGATGTCGGCGATCAGGTCG -CTGACTGTTGGCAGGCGGTTCAGCTCAGACTCAATCTGCGCATAGTCCAGGGTTGCTTGT -GAGGCGACGTAGCCGGCGATAGCGACACGGTCGTTGGGGTCTTGTTGGTGCAGAGCACGG -AATAGATCACCCGCTGTAGCGAGGTCCGCACGTTCTTCGGAGTGCAAGAGGGAGGCGGCT -GCTGCGCGGAGGAGGGACGCGGGGGGCTCGGGCTTCGTGCGCCAGTAGGAGGCGGCTTGT -GCGAGGGTGGTGCGGATCTGGACTATGCGACCTTCAAGCTGGTACAGGGAGACAAGGACG -CTAAGAAGGCCTGGGTTGAATCGGACGGACTCGTTTGTTCCCTCCAGGCTTTGGAGGCTT -CGCTCCATGGCTGTGATAGCAGAGGTAGTGTTGCCGTTGCTGACATAGAGCTGTACCACC -GTCAGAAGAAGGCCAAGGTCTGTGGGCCGTCGCTCCAGCAGTGGGACGATCTTGTGAAGG -GCTTTAGGGCCAGTTTCGCCTTGAGCGTGTGCTGCGGCGTTGTATACAGAGAGAAGGTTG -GGGATGGCGTCGGTGGATGGGCTTGGGCATTTTGACTGTGCCTTGGTTGTGGACCGGATA -ATACCATCGTATTTCTGGACAAGGAGGTCTGCGGCGTGCGAGTTACCGATCATATCACGG -TCCTGGAATTCAAATAGCCGATCGTTATTGGTGGCATCTGGGGTGTCATGGAGGGCCTTG -TATAGGATGTAGGGATTGACGGCAGTTGGGCGAGCGAGGACAATGTTGTTGCGGGCGATC -TTCTTGGTCGATAACTCGGTGATGCTAGGGGGATATGTCAGTAATATGTTTCGCGCTGGT -ATGGTTTAACGCACTTTTCGACGGAGATATCTTTGAGAACCGCCTCGGCCTCTTCCAATT -TTCCGAGTTGAATCAAGACATAAAGTTGCTGGACTGCGATTGGCAGCAGCTCTGCGACCT -TATCTTCAGGTGAAAGGTCCTCCGATGTCCGGCAGATATCTGTGCAAAACGTCAGTCTGA -GCCCTAGTCTTGGGCAATATGGCGGCTTGCTCACTTTGAGCTCGGTTGAGCAAGACCTCG -CTCTGCTTAAATGCCCCCTTTGCAATGTTTAAGCAAGCTGCGTTGTAGGTTGTTTCAAAA -GATTCCAAATCCTCCCTCGACAATCGGTCATGGCGCACAAATTCCGTCTCTCCTTTCCAT -TGAAGCTGCGCATCAGTAGCCCACGAATTGATGTTCAGGTCGTTGACCTCGTGGCCGAAA -GATGTCGTATCTTTCGACAGTTGTTCATAAAGCTCGGCTGCCCGTCGGAAGTGCTCAGCA -CGATAGCGCTGTTATGACAAGTTAATCAATATTGGTCCTCAATCAGCAAAGGATGAGATA -TGGCACCATACCACTTGTGCCTCGAGGTGACTAGCACCACGTCCAGAAGCTGTGTGGGTC -AAGCCTTCTGCAGCATCTTCAAGCTTCCCGCACTTGTATGATGCATAGGCGTACTCCAGA -CCTGCACTCTGCTTCAGCGCATTGCCTCCGGCCTGGAATGTCTTGAGAGCTTCTTCGTAA -CGGTCAAGTTTCAATAGAGCAACAACTTTCATGTGTTGCGCCTGCAGATCAGATTTGGAT -TTGGCGAGGGCTGCATTGCATGACTGGAGGACTTCCTCGTGGTCATCGATCGACGCCCGC -TGCAATAGCGAGGCCAGCGATTGCGCAACCATGGCGGGGGTTGGTCGCTCAAGGATTCGG -GAGGGTATTAATGGGTGAAGAAAAAGACAACTTCAATGTCAGAGAGCAAATAAAGTCGTA -GGATTTGTACAAAGAACACAGTGGCCGATTTCCCCAGATCAACTGATATCAAACAATGTG -CGGAACTGGTCTTTTCCCCATTGGGGGGTTTCTGCGCTCAACGCCATGTGCCCCGCCGTA -GCTCACGTGTCCTGCTTTTATCGTCTAGTTATAGTTATTGTACTCCGTAGTAAGGTTAGA -TCGACAGATTCACAAGTAATGGTGGAACTGATAGTATCTGGAACTTTGATCGCTGATCAG -ATCACATACTGTATCGGTCGTTGTGAATTTGGATATCACAACCTGCAGGGAATTTCTATC -GGAGATGGCTAGGCTACCCGATCGACCCTGAACAACTGATGTCAGCCATCGATCATATTT -CTCGGGGACCCGTTCGGAGTACGGACCTCTCCTCTTTCCATCTAAATATGTCACTTTTGT -TGAAAAAAAACACAGATGAAAATTTACTGAGTCATAGCGCCATATTTATCTGTAATTGGG -TGGCGGAGTGAGAATATCTTTACTCGCAGCACTTCCACTAGATAAATATCTTGTCATCAG -AGGTAAAAAGCCAGAGGCAGTCTTCTTCTGACCTGGATCACAGAATAGGGTCCATGGCCT -GTCGTGGGTTGCTAATATCTGTAAAGTGGAGTTCCCTATAGACTGGTAACCTGGCATATC -AACCATCGGGTGGACATAGTGATTGTTTTGGCAGGCCTAAGGTCTACCATGATGATCCGA -GTAGTAGTGATAGGCCCACTTGCCCTAGCAGGCGGGGAGTTGACGCGGCTCTTGAAGCTC -TTGAAGCGGGGCCTCGCGGAGGCAACAGAATACGTATATGTATATTGTAACTGCTCTCTC -TAATTAATCAATATAATAATGCATGTATACATGTACACTCTTCAGAATATTTCAAGCTTC -TAGCCCAAAACATCACGAAAGTTCGTGGCGTATCGTAGCAGACCCTGGTACCTCCTGGAA -TACCTTGTCCGACTGTAGCTCACTTAAATGAAGAGCCTCTAACTCAGTCGCTGGATGTGG -CTTGCTCTGTTCAACAGGACTCGATACCATGCGATAATTATCTATGGTTCCCGGGTGCGA -ATTTGCTGTGCGCCTTCGGCGAGAGATGATCACAAAAGCAAGGAACCCAATCAATACCAA -GCCACCAACAGCACATCCAATGCCAATGCCAACCTTTGCTCCAGTCGAAAGACCTGAGGT -GGAGAGGTCACTATCAGCCTTTGCCACAGTTGAGATGGTGGAGTTGGAGTTGGAGTTCTT -TGATGAAAGAGGTTCCCAATATCCGTCCCAGCTGTCTTCCCATGAATTGGCTGAAGGCTT -GATTGAGATGTCGTCAACATCAATGCTCTGCTGAGCACCTGATCCAAGTGCCGGCCCGGG -TGCTTGTGCCAAAAACCAGTAGCCACTTCCAATACCTTCTTGCCAATTTGTGCCAATGAA -GGCTGCCTGAAGAAAAGCTCTGCCGAGGACGAATGAATCAGAGGGATAGCATGGGAAGTA -GGTTGTGTTGTGTTCCACTAAGGGCTCTTGAAGTGTGAGCTTTAGAAGTTGGAAAGGGAC -TTTGATGGTGATATTTTGAGTGTTTGTTGCGTCCTTCTCGAATGTAAAGGACATATATGC -CGGAGAGGATGTTATTTTTTCGTAATTTTTGTCATCTGTGTTCCAAAAATATAGACCCAG -ACCATCGTTAAAGGTAACTGGGAGCTCAGCTGCGATTGTATCGCATGCTTGCTTGGGAAG -GTAGAGGTATGGAAGGGTAGGATCAATCTCCACTCTTTTCGCCCTGCTAGTGATGCTGTT -TCCCCATTTGAACAGCCCCGATTTATTCTCAAAGTCAAATGGCGAGTCCCCAGTGGCAAC -TCCTATCCCGATGTCTTCGAGAGAAATCCTGAGATTGCCGAGGTGATTCGTATCGTCAAC -GTGTTGAGAGCTTACTTGGTTCAGTACTCGGTTTTGATCATACCCGCCAAGGATCAGCGA -TCCCGGGATGTCAGGCTCCACAGATCCAATGTGCATCCCCCAGGAGTATGACGGAATGCG -ATCAGACGAAGTATCATTCCAGTGCTGCCAAGCAGATAGCAAATTGAACCTGTTCCCATC -TACCACGTGGGTAGGCTTTGTGGCCCCCAGTGACAAGGTCCCCACAGAAACAGGATATCT -TTTCCCATTTGGGTATTCTTGGTAAGCTTCGTAGATCGACGCCATGGAGACGTTCGGCAC -CCTGTGTCCATTTTGCAATGTGATCGTATCTGCGAGAACCCCTTCTTTGACGTATCCGGT -ACCACCCCAATTGTTCACACCGGCACTGCTCTGATCAGACAGCCATTTTAACTGACTCGG -GTCGGTCGGTGTTTTCAAGGTGACAGCAGATGTTGACGCATTAATGTCGAAGAGACCAGC -TCGTCTTGCATAACAGGGGGTTGAGTCGTCCGAGCAAACACTGTCGAGAAGAATGACACT -AGCCCATCGGTCGCCAGGGTAGAGATCGATGGGTTGCTTGTTGGATCCCAATTCAACTGA -CACGGCATGCCATGGGCCATCCGGACCATAGCTTTGATCAGACCATGGCATTGTCCAAGG -GATGCCATCTGCATGAACGAGGGTCGCAGTGGCGAACAGCCAACTGAGAAGATATTCTGC -AGCGTAGGGGGCCATCCTGAAGAGAGGTTCTACCGTATATTCGGCAAATCAGATGAAATT -TTCGACAGGTTATAGCGGAGTGACAGGAGGCTCTTTAGAAGACAAAATTACGATCATCAT -TTGGCCCCCAACCCGAGCACCTTGGCATGCAGGGTTCATGGGCGCGTAACTGAAACGGAT -TGCTCAACATTTGATGTAGAGCCCACAGTACACTCTGACGGGCGTTACTTAGAGTGGCAT -AGAAGAGTATTGATTTGTGAGTACAAAGGGGCTATGTGGGTACTGTGTTGAGTGTTGTAA -GCTAGCGCCGTGGCCTTCTGTATTGGCAAGGAGCGACATTCGAAGAAGGACGTGCTGAAT -CCAACGCTCGTGTGCAGATTGTGGTGCATGGATCTATTATCCTCCATAACGTGATAATGG -TGAACTTATAAATACATCAACATATATGTTGTAGGCACTCAAGTCCCTCGCCGCATTACC -CGCCTGTGGCGTTGGGACAAGACCACTGACGAGAGGAATTAGTCATGGCAGTCCCTGTTG -ACACTCCTTCTCGATGCATACATTTGTTGACTTAATGGTATCCTTTGAATGCTCATATTG -AACTTGAAATTCACATTACTTACAGATAAATTCCCACCTGAAAGGGGAAAAAAACTTAGA -AATAGCAACATGAATGGGGAATGCCCACCGATTTATAAGCTGCGACCTAATAAATTCAAG -TGAAATGTCTCTACACTAACCATCGGCCCTGCATCCTAAGGGACTAGTGTGGAGGACGAT -GAAATCCGGAGGAAAACAAGGAATATTCACCCGACGATACGCACAGATCTAGGCTGTCTT -ATGGGAGTGGTATTCCAGCCTGGAGTACGGGCGTGGCCCCAGCATTGGCATTGGGGTACT -GGGCCGGCTCGAGCTCAGATGTACGCATTGTCAATGTATCTCGATATGTTCTCGAGTAGA -TTTGTACTCTGTACTCCGTACGAGACTTAACTATCACCCAAAGATCTGCAATCAAGACGG -TGGATCTGAGGGATAGCGTAGTGGGAATGAGTACAACATAAATCAAGCCAGACCCTAAAC -GCAGACATTTAGCCGGTTATTAGGATAATGGCGTCCGGACATTACTCCGTTGCATTTCGT -AACGCTTATCTAAAGCGAGTACGCAGCGCTCCCGACTGTCTTGGCTGCTAAAGTCGGAGT -AAGCAGGACAATTAGAGGACGCGCGGATGTGGGGCTCGGAGCGATCAGCGACGAGTGAAG -AGACGGAGCCATTGCTTAGAAGGTCTTGGCACGGGCAGGGTTGTAATTTAGACGGGAGTA -TTTTGAAGACTGCTTTGGGAGTAATGAGGTCACCATTTGAGATAGAATGGTCATTGGAGT -CATATCTCTCTGTATCTCGTACCTGGATATAACTGCAAGTTATACCTTGGTACCTTAGTA -CCTGGCTAGGTACATGTACATATATGTAGGTACAGACCAGATCCGAAACTTACACTGTAT -ACGAGAGCACGTGACCTCCGCCAAATTCCACCTCAACGTCATTGATCGTCTCTCAGGCCC -CCTAGAGAATGGGCCTTGGCCATTTTCATCCCATGGAACCATGATCGTCCCTTAACCCTG -TCTTCCACCTGTTTTTTTACTttcctcttcttcttcctcttctctatccccttcTCCATT -CCCCTCCAGACCACCTGGGCATCGTTATACTACACTTTCATCCTGCGACCGTGGGCCCTT -GATTGTGTTCTAGACCAAATTCTTGGATCCGCAATGCGGTAGAACCCCGATACAATTTCA -CACAAAATATGATATTCAATGCGCGGGAGGGGCCTGTCCCAGGGGTATCATGGGCCTGGA -AAAAGGTACTGCGGTTTGGAGTCGCCTTCTTGTTTGTAATAGGATTTGTTGTCGTACTAT -GGCCCTCGGTCGACCCAGTGAAACTGCCGCTGGAGACAGCTGTCAACACGACCATCACTG -TCGGCCCCGATGTCCCGTGCGATCTCGATCTCGATAAATTGGCAGACCTGAAGGTCCTCC -ATGTCACCCAATATACCCGCCGAGAAATCGTACTCGAGATGACAGATGAACCAGTGCCCT -ACACACAGTGGCTTGACGAGCCATTCTTGCAACTGTCAAAGGGCACTGAGGGGGCGGCGG -ATGGGTGCTCCATTCCTGCCCCAGTGTCCTTGCGGGTCCCCCAGCCGCCAAAATTGGCGG -ACGCATCACACATCGATTTCGGAGTCGCAACCAGTGTAGAGAGATTGAATGAGTCTCTGA -ATGCATTTGCTCACTGGGCTGGATACTCACGCACTCGCATTTTCGCACTTATCGAACCGG -ATAAGTCGGAAACCGGAATCCGGGATGTCAAGGCCAAGGCCGACAGTCTTGGAGTCAACC -TTCACATCACAGAGTCGGACGATAATTACCTCAACCGATACTTCGCGCTTATCCCTCATT -TACAACAAAACGCCCGGGAATCAACCCAATGGGGCTGTATCATCGACGACGATACATTCT -TCATGTCGATGCCCAAACTAGTGAATGCGCTTGCTGAATACGATCACACCACCTCCATGT -ATATTGGCGGATTGTCGGAGTCTATCCCGCAGATCGCGGCATTCGGAATGATTGGATTTG -GAGGCGCGGGCGTCTTCTTATCGAAACCCCTCCTGACGGAATTGACTAACGTATATGACG -AGTGCTCTGCGATGGATTTCACGGGCGATCGCCGCATCGCCATCTGCATCTACCGCTACA -CCAAAACCCGACTCACGGTGGACCACCGTCTGCGACAGTTGGACTTGGTGCACGACGCCT -CCGGATTCTTCGAGTCCGGCCGCGAGCCCTCGCTCACAGTACACCACTGGAGGTCCTGGT -TCCTCACCGACATGCCGAAGCTAGCGGTGGTCGCTGAACTATGCGGTGACAGCTGTCTCC -TCCGCAAATGGCACTTTGGCGACGGCTGGATCTTGACGAATGGGTTCTCAGTCATCAAGT -ATCACGTCGACCCAGACAAGGACGACATGTCCATGGAGCAGACCTGGGACCCGCACAATG -GCGCCAATGATGAAGCTTACCTGCACGAGCTGGGTCCCCTCCGTCACAAGGATGAGGACA -AGAAGAGCTTCCTGTTGGTGGATGCCATCCACGAGCCAAACGACCGTGTCACTCAGTGGT -ACGTTATGCGCGATGAAAAGAACGGCGATCAAATCTTGGAGCTGTCATGGCGCTTGAGGT -AAATATTTTGGGTTTTGCCTGGTACATGTACAGAACGTGTACGGGCTCCTTTACATGTTC -TCCGGGCCCCGGTGGTCTGCTGGACCAAGAACCCCACATTCAGGCGCATGGGTAAAGGAA -TGACACTTTGCATGGAGTTTTGCTTCGTTGTTTATAAATGATAGATACACCTTCTGTCTA -CTCTAATGAGTTTGAATAATGTGATATGGAAAGTTTTGGATTACCTCTTATACTACACAC -TTGTAGCTGTTGAAGATGCAGTTCCGGATCTTCTCTAGTACTCAATAGGCTGTTTAATTG -GCTCTTCTAGCACAGACACAGACAGTGTTGGGCTCGGGTGTTCGGGTAGCGGCGTCATCA -TTCGCACTCCCCTAAATACGCTAGAGCATTTACAACATAGGACTCGGTACAGATGTCTGA -AAAATGCTTTGAACATCGATAGATGATATGGTGGATATGTAGATTGAGGGAAACAGGCGG -GTCGGACCCGCCCTGTCTACCATGGCAAGCAAGGCCAAAACGCACTCCAGGGCTGGCCAT -AGCTCACTGATAAACAAAATGTACCATGGAACCGATGAATGAGCCCCTTGCATGTTCCCA -GGTGGATACATATATGGCTAGGTATGTATGTACGGAGTACGGAGTAGATATTGAATCTTT -TAGTATGCCCCGTGCATTTGTCAAACATGTGGTTTCATAGCAATCAGATATATATTTGCA -AGAACTGGGTCCGTAATTTTGATCAAGCTTGTCAAATTAATTGAACACACAACTACGATG -TGAGACTAAATCAACCATCCAATTTAatatatatatatatatatatatatatGTGCCACT -TGAGATATAAGATTGGGATAGGATAGAAATGAGTAACCTCTCGCGGAGGCACACCTCAAC -TCCGCGAACACACGGGGTACTATCTGACCACCATCTATCGGAGACAATAACGGCTGGTAG -TAACATCCTAGTATGGAGTACTCCGTCCGCCGTAGGGGTGTACCGTTGACCCAGAAAACT -TTCTATGGGACCCCTGAAGATCGATCGGCGTTAAACGCAGATCGAGATACAATATAACGG -ATATGAGACAAAGTGGAGTAAGCAGCAAGCAGTTAGGATTAGCACTAATGCAGAATAAGT -TAATATTTAAGGACAGTAGGAGATGGAGACTGAATACGGAGTATCGGTATAAACCGATAG -CCTGGATCAAGTTCCATAAAATAGAGTCAGATGCGCAATTTTGAGTCGTAGGGCAATCAA -GACTGGAATAAGAGATGGGAAATTTTAGGCGTGCTGTTTCAGTTTATATCGTTCATATGA -CGGCGGGAATGTGTATGGTCACATCTATATGGACGGGTCCCCAATGACCTTCCCCTTCAG -ACCGTAAACGTTACAACGTGCAAATGGATTCCTATCCCGAGTCGCCCCCCGGTGGAGCAC -GGAGCATAGTGCAACGCAATACCAGAGGACCTGCGTTGGCCCCTACGCCACGGGTTTGGT -ATGGACTAGATTTGGGCGTTTTTAAGGGATATTACGGAGTAGATTTTCTGCAGTCCCAGG -CAACAGGACGAGACGAGCTAGTATTGATTGATCCGGACTATTTATCTTGGTATGATGACT -TAGGCACATCAGTGGGCCAGTGCTCAATTTCCCCAGGAGAAAGTGTGACATCAGAGTGGA -ATCTGCGAAAGGTAGAGGCATAAAAATGGGCCTTTTGGTCATAGCGCCAGCGGCTGTTCC -TGGATTTTGTGGATATTTGTTTTTCTTTGGGTATAACTGCTTGTACATAGAATAGGACGT -GCCTAATCTCTTCGAGGTATGCCAGTCACCTTGGGGAAAGAGGATTCGCAATTTGTCAGA -AACAAACCTTACCCTAAAACACACCAAACCTGACCTGTACAGGGACATGCTCCATCCCAG -GGGCCTACAGGCTGAAGGATATGTTCTACTAATAGAAATATGGAGACTAGCGTGGACATG -ATACACCAAGACACGTCTCAGCCACAACCATGTGGGAATCGCCCCGTGTCGAGCCATCCA -ATGCGCGTCGGCCTGACCCGCCTTAGTTCGATTTTCTGTTTACTGGCGATTGCGCAAAAT -CAAGGAAAAAAAAAATAGACGGACTAAAATAAATGTCTGTATTATGAAATCCTTGCATCC -TATTCGGCTTGATACGGATCTGCATTTTATTTGCAAAAGGTAGGGAAATAATCTCCGCGA -CTCCCCGCAGGACACGGATTCACCAAACCTGGCAACTGCAGTATGCATGTGTGTCGTCAC -TTTCGTCACAATCGTCAGGGGTGTGTTAGCTCGGCGAGGGGCTGAGAACGGTAGGTGTGT -GTGCCAGAGTGTGTGAGAGGGTAGAAGCGCTCCAATGAAGAGAAACAACCCCGAGAAAGG -TAGGATGTGACTGGACCAGTCATCCTGGTGCCCAACCCAATCCCCCCCTTTTTTTTATGT -TTTATGTTATTTTGCGTCTAGGCATGTCGTAATAGGTATAATCTGGGCGTAGCTGTTTGG -TTTATTTATTTTCGTTTCATTAATCCCTCGATGGGACATGGTATAGTGCATAGGCTCAGG -GAGGATCTCCTGGTAAGGCTCCGAACCGTGCGCAGGATGGGGGGGCCGCGCATGTCTTTT -TTACTTCTACGGCTTCCACGGCTTCCACGGCTTCCACGGATGTTCTAAGATCCTTCTGGT -GGCCTTCTGGAAGGTCAGGAGAAGGCTTGGCAGAGTCATCCGTGCCGTCTCTGTGGCAGA -GTGGGTTCTCAGAGGCTGAAGCAAGCCAGCCGGTGGCCTCTAGTGGTCCCGGAAGGGAAA -AGGGGGCGAACGAGATGGGTGTGTAATTACCGATGTACACCGGATGGACTCCACAAGATC -GTCCAAGAAAAACAGAGGGGGGCACCAGAATGAGAATAGCCAGCTTGTCCTACCAATAGT -GTAACCGATATATATGCTACGTGAGATACGAACGGTTTCTTAGGCTTCTAGAAGGTATTT -TAATTTTACGTGTTCGGCTACCACTATCGAGGGTAATCTTATAAAAATAAGTATATCACT -CAATATGTCGCACCATGCGAAACCTGCATGTCATCGTTGATCACCGAGGTCTCCCGTTTC -TTTTTCCTTTTTTTTTTTTAAAAAAAAATCCAGCATGTGTTTTCTTTTTAGCTTCAGGGG -TCATAGTTCCCCCTTTCATACGGTGTATGGGCCTTTTATTAACGCCTTTAAACATTAGTT -CCTCTTAGTCTCTCTTTGGATCTGGGGGAAATCAATACTTGAAATTTCAGGAGACTATAA -GAATTTAAGGAAATTCAGGTTCGGCCTTGATCCATTTTCGGGAGCACGTTGTGGATCTTG -TGGTCAGGTTTCCTTCTAGCGGGCTTCGCTGGGTCTAAACTAGTCTTCTAGAAAGGGAGT -TTAGTTACATTGTTGGGGGAGATATATATATGTATCTATACCTATTACATGGAGTGAAAG -TGTAATCTAGGAAAGTGAGATTTGAAATTACACCTAGATGaatagagataatattcaaaa -agaaaaagtaaataaaagaaaataaaatGATGTACAGAGAGAGAGATGCAGACAGTGGCA -CCTACCACACCTAAATCACCTATAATAATTATAATTCCTAGTTCTCTCCCCCCCCCTTTC -TCTACCTTTTTAACCCTTTTCTTTTCCTTGTGATCTCTTGCTTTATATTTCCCCCACTTC -GAATATTCCTCTTCACTCTCTACTCGGTTCAAAAAGGTGTGTTTTTGCTCTCACCGTCCC -CATGACGCATCTTGCCTTCGCTGCCCAACCCCCTGTAGCTTGGGTTTCGTGAACACGGAC -AATTTCCACATTGGTCTCGCAATTTTCGTATGCGTGTGACTGATCTCACCGAGAATTGCG -CACGAGAAGATCAGCTTACGGCCAAGCGCTTGTGGAAAGAGATCGGAATCGCAACTATCT -TCATACATTTTCATGCTGACCATCGACTTTTTTCCCCCGTTATAGGAACCTTGATACCTA -TCATCCACCTTCAAACGATCGTTCCGCCTTGAACCGACCACAACATTATCGTCTGGGGCG -AGCTTGGGGTTTTGGCCATCGATACTCTTACGCAACCTTGCATTCCACTGCAGAAAAGTG -GAGAATTTACCCCGTTCCATTCTTCGCTGAGGGACTGTCCAAGACAGAACGTGAAATACA -GGTTCAGAGTGAGTTTCTGGCTGCCCAGCGAAGTTTTCGTGTCGAGCTTGATCGACTGCA -CCAGAAGTTGGTTCTGACTCTATCGTGACCCGGCAGAACTTTTCTATTTGTGATCCGAAT -CTCCCCACGACCCGCCGCCCTTCATATCAGCCCAGTCCTTGACGCGTAAGGACTTCGAAT -TCAAGAGCCATCAGACATTGTGGGAACTATTGGTGCAAGGCCTGCTGAAGGACGTCTTCG -AAAGCAGTCCTTGACCACGGTCCCTTGGCTACTTGCTGGGTCGGGTCATGGACTTCACAA -CCATCCTGAACAGGAAAAACTccgcagcagccgccgccgccgccgAAGCGCAGTTACAAC -AACAGTACTTTCAACAGAGCGCACAGTTACACACCGGAGCTTCGCCCACCATGAAGAGCG -AATCAGGTGGCTCGGACAACCCTGTCAACGCATACCCTCCTCACGGCCCCCCGCCCATGC -AAATGGATGCCGGACTTGCCGATTCCTTCTATTATGCACAGCCCACGGGTTCAGCCCCTC -GAAACATGGCCTATGCCCCTGCGGGATACGCTGGCGATCCTCAGATGCAGCAGGAACCTG -TCCCACAGGGAAGGGCGGGGATTGAACCTCCACCAAAGACTTTCCACTGCTCAACCTGCA -ACAAGGGATTCGCACGGCGTAGCGACCTTGCAAGACATGGTATGTATCCTCCAACCCGTG -AAGGAGATGTGGCCATTGACAATTATTAGAACGTATTCATACCGGAGTCCGACCTCACGC -TTGCGAGTGGCCAGGGTGTGGAAAGCAGTTCATTCAACGCTCGGCTTTGACGGTACACTC -CCGTGTACACACTGGAGAGAAGCCTCACATGTGTGAGAGATGTGGCAAGGTGGGTATCGT -TCCTCTTCCTGTTCTGGCTCGATCTAACATTTTATCCAGCCCTTTAGTGACTCGTCCTCG -CTGGCTAGACACCGCCGTATCCACTCTGGCAAACGACCCTACAAATGCCCGTACGCCAAC -TGCCAGAAGACCTTCACGCGCCGCACGACGTTGACACGGCACCAAAATCACCACACCGGG -ACCATCGAAGAAGCCGCGGCTGAAACAGAGGCCCAGTTGCGGCAAAACAAGGATCGTGGA -CGCCCTGGCGAGGGAATGTTTTCCGAGCACGCCTCCATCCATTCTACACCGTCACCCGCC -CAACACCCGTCCATGTCACCTGGCGGTGAGCTCCCGCCGCTGAATATGCACCGCTCTGCT -GGCGACTACTACATGGGCACCGGTCCTATCCCGCCTCATGTGCGTGGGGATTTCCCTCAA -GGAAGCCCCCGGGCTTCCCCGACTGCAACCTCCCCTTCGCTGTCCAGCTACGGCAGTGCA -CCCCACGCCCGGCCATCCATGACCTCGCATCCCTACGCTCCGCCCCAACCTCTTGAACCC -CCGGCCAACAGTGATCACCGTCCCAACAGCGTGAACGGCAGTCCTCACATGGCTAGTCTT -GGATGGGCCTCTCCGTCTCACGGTAGCATGCCGTCGCCCGGATCGGCCAATGACTTCACT -TATCCTGAGCCCACTGGCCCCGCGTACCCGACATCGATGCCGCAGCACATGTACTTCCCC -AACTCTACCATCCGTCGACCTACCAGCACCGAGCCGGAGAACTACGAAATGAAGCCCCGA -GGTGACCACTCATGGTCCACTGCTGTATGATGCGAATCACAACACAAGTGTCTCGCAACA -GCTTGAATTTTCCATTGTTTCCTTTGTTTATTAACTTCATTGCATAAGCCCCTTGATTCC -CACGAGAGAATAAATCTATGCCTCTCGATTGATTTCTTATTTCCTTTGTTTTTTCACGCA -TTTTCTTATATCTTTGTACTATATTTGATGGGAAACCAGCGCACTGGAAGGAGTCGGACG -GTCTGATCGTGTTTCCAAATCTGCGATTTATTGTTTTGTTTCAAGATGTCGTCTCTCGCC -CTTGCACCTTTTCTTTTGTTCAATGCTTGCGGGATGACATTATCGAGACACGATCAGATG -AGGAACTGCATCCATGGGAATAGATAGAGGGTTGGACACAGGATTGTCTTTGGTATGGAT -GGGTTCGGCAAGGATCTGGGTTTGTTTTAGGGAGACTCCTGATGCTGGTGTCTGGCTGGA -ATTTAGTTTTAGTTATGCTTCTTATTGTTTTCTTTTCTTTCGATTGTCTCATACATCTAA -TCATTGTATTGCTTTTTTCACCTGCTCTCTTGACTGTCGTTTATGACGTCCTTTGCTTTT -GTTCCTGCTTGCTCTTCGACGAACACCGTCTTCCAAGTGAGATCGTCGTTCACCAGCGGT -TGTATTTTATATAATAGTGATAATAGACAGTTATGAAACTCACATTCCGTGTCAGATCGC -ATATTACATGAGAGTAGTATTTCACGAAATTCCAGCTTAGTTTGTTGGGCGTCGCAACGT -TTGGTGAATCATATACCTGTTCATGGTGGAACTCCCTCGCCTTGCAAAGTGTACATCCAG -AATCAAGCACTAGCCAAATACAAGGTTGAGGTTACGATACCCATCTTTTCATTCTGTACA -TTGGGCGTGACAGGTGTGCCAATGATAACTGGTGGTGGCGCCAATCAATTGGAAGAGATG -CCTGCACTAAGCCCTTGCTCCCAGCGCTAAATGAGACTCAGAACTACCTGGATAAGCCTC -TCTCGTTGCGCCCACCCTACTCGATGAATCGTCGTATAGCACGCTCCCGCTCACTCATGT -TGGTTGCATCGACCTGTTGGAAACGTGCACTGGGTGTATAACCATGGTAGCTAACAAAGC -TACGATCGAGGGATCTCGTAAAGCTATTGAGCAGTCTGGGCGTCCCAAGAAGTTGGATCT -ACTTGCAATGACTTTCATTCCTCCTCGCTTCTACTCCAGCCTCTCCAGCCTAATAGGTTT -GAACACTGACCTAAGATATGGTCCTTCTATGGCTCAAACCAGAGCTGAGACTTTTGTCAT -GGTCGGCATCAGACTCTACAATCTAGTAGACGAGATCATGAAGGTACAATATGTGTATTA -TAGATTGCGTGCAACAGTGAACGAGAGGGACTTTGATACCGCAAATGTCGCTCGCAAGAT -GGTCGAGCACCTGACCTGCTCAGGTCACAATGAGGACGTGATGTAAGTGCTGTCATTTGA -CTCATTCGCTGAGCGCGTTTCTGACTTCTTCTGGTACTATGATCAACGGCAAGTGAGAAA -TATGATCGGGTGATCGGAATGCGCTGGAGCTGTTCCATCAAGGAATTTCAGGTGCCACTT -CGAGGCCGTACTTCTCGTAATCAAAGTCTTCCAGGCTAGGTATGCTTTCAGCCCAGCCGT -CATCAATCTCTCCTAGCTGCCAATCATCTCCGACCTTAAACATTCCTGACTTCCCTGCCA -GGATATCCTTGTGCATCGACTGATACTCTGCCACTACATCTGGATGTGGTACCCAAACAA -CTCGCATCCCAGCCCGTCTTCCTGCCTCTACCCCAGCGATGCTATCTTCGAAGACTAAAC -ATTCGCGAGGCAGGATGGTATTTCCTTCTGAGCCAACCCCTACATTTAACGACTGTAATG -CGAGCAAATAAATGTCAGGCGCTGGCTTCCCTTGACCCTGCCGCACTCGAGGATCAGCAC -CTAGGACCCTTCTGTCCGGGTGCAAGAAGCTCAGGAGTCTTTTCGATTCTGACCCTGAAG -TTTTCAACTCATAAGTACGGGTTTTAGTGCTGGAAGAGCTAAAGCAAATTGAATCATATC -CCCAGATAAAGCACTCCGTGCACGGCTTAGACGCGATAAAATATTCTCCGCGCCAGGCAG -CGGCCTGCAATCTGGAAAACGTAGCCGCATGCTCTCACTTGATTCGCGGGCGAATTGCTC -GCGGGAGATCGGCAGCTTAGCCCAGTTGTGGAATACGTCGCCATTTGTCGAGTCCGCGAC -GCCCATGAGCTGGGCTTGAATTGACCGAGTCAAAGGGGGCCTTCCATACTTTCCAAGTAA -CTGGTTTGTAGATAGAGCGATGATGTCTTCGGAGTTGATGAGGAGACCGCTCATGTCGAA -AATACAGGCGCGAATTGCGGGGAAGCTAGTGCTCTTCGTCAGCTGGTTTTTCTTGGTATT -CATCTGTTGATGGTCGAGAATGGCTTCGCTGAATGTTGTACATAGATTCCTATGTTGGAT -GTCACTCACTCTGCCTTTGGGGAAGCCATGATGGCCGAATGAAAGGAAACTTTTATCCTG -ATTTGATGGCACTGGGGGTGATATAAGGAGATGTGAAGTGGAATGGGTAAAGTCCCACTC -GCCTTCGCGGCGGTTTGGGACCCCCTGGAGGGGTTAAAACACCCCCATACTTGATTTAGA -TATCTTTTGTATGCCACCATCATACACAAGGTACTTATTTTATAGTCGAACACGGACATG -CACAATGATATTCTAGGACCGGTAGACGCTGATCTGTACAGCATTTTCGGCTTGTCGATG -TCAATTATTCGGATCAAGGCGTGGGCGGTCTTCAACTTCGTTCAGGCATATTCGAGGCTT -TAATCAGGTTGACTCCATGCCCAGTTGGTCAATATACTCCATGAGAATAAAAGAGCTCAA -GCCAAGTGGAACCACCCCTCCAGGACCGTAAAGCAAGATGTAGGAAACAGGGGTAGAAGT -ATGACTATGCATATAATGAATCGCTGCGTCTTCGTTGCGAGCCTGTTCTTCGCGGAACAT -TGTCGATCCAGGCTTGGGGAAACGGAGGATCGCAGAGCCACCAACCTCAGATTTCATGCA -TAATACTATCCAAAAGCACCCGTGCTTAGGGGTACAAATTCCACGGGAGCTTTCTCTTCA -CACATGCTTTTCCATGAAAGCGCTGGTGGTGCGAAGAGTACTTTTTTCGGTGAAGATATC -TTCCACCCATTCTCCTGTGGCATTCTCCTGTGATGTTTTCACTCCATTACCATGCAGTAT -AGTCAAATGCACGCATGGCCTTATTTTCTGCTGATCCTAGGCTTTAGAGGGGTGTAGTTT -TAAAGTGTGAAGGTGAGGACACGGGGGACGCATTTCGCCCTGTAAGGAGTTGGAGGCTGG -CTTTGATTGACTCTTGCTATTTATTTATTCTCTCCTGGGACTATTATTGGGCTTGGTATT -ATTCAATAATAGCTAATAGTTCGTGAGACCATAACTTTTGCCATGAGTGGTGTATTGACT -AGTGAGTAGGGCATGGGTCAGTCAAGCTTGGAGAGATATGAGCTATCCTCGGGCCTTGGC -ATAGATTTCGGCTATATACTCGCGTGTACTGAGATTTATTGTCTATTCATGGTCTGAGAT -GGAGAGAAATTTCAACGATAAATCCCAAGCAATGATTCTACCAGTCGGCAATTTTACCTC -GCAACATCAGGGTTGATCACGTTGGTATACGAACCAACAGAAGCCAATCAATCAAAAACT -TGAATGAAGATTAATGTCTCATGTAAAAGCCAATTGGATCTCGACTTTATAGAACCGCTC -GTGTTGATCCTCGTATTCATGTGGCGTTAGGAGAACATTGATCGTGGTTAGGGAAGGAAA -GTCTGGGGTAGTTGCGAATATGGTTTGAACGTGAAACATCCTCATAAAATGGTGTAAATG -GCTTCGTATCAGGTCATCAGGTCGTGTGCTTGCTCAGAGATTCATGCTTTAAGCAGTTGT -GTGTGTTGGTGTGGGTTGTATATATAGACTCTCATAGAGTCCTTTGCTCCCATCTTTAAT -CAACAACCGCACTGTGCTCTCGCGTAACTGAGACCTAAATATCTAAGCCTGAATAATGCT -TCTTCACAAGGCTCTTATTCTTGTCCTCGCCCATCTCGCCCTGAGTAGTAAGTACTTCCA -CACCCAATTACTCCTCCGTACATCAAACCTGACTTACTAACCACATTTTCCTTCAAAACA -GAAGCAGCGGCTTGCCCAGCAGGAGGATGCGGCAGTGCCCCCGCGTGTGTCCTCACTGAA -ACATGCACAACAGCAACCTTTGTCTCGCCTTCGACAACGACTATCACAACCTGTGTCCCA -ACACCCACCTGCGCGGGAGTCTACTGTAAGAACCCAAAAACCACATCTGAGCACGACGTT -GAATGTATCTCACTCTGACTGAACCAAATGCTAGCATCGTGTGAGTTTGGCACCCCAAAC -AACCAATGCTGCTCGGGCTACTGCGCTGCTAACAAATGTCGATCGACGGATGATGACTGG -CCATCCTGCAAAGAGGATGGTGGGCCTTGCGTTGTCGATGAACACTGTTGTTATGGGAAT -GCTTGCAGAAATGGCATATGTAGTCGGACTTGATGATCTGGAATAGTGATATATCAAAAT -GTCTATAGCAAGTGTATTCAGAATCGATTAATTACTCTATTGAATTAAGACCAGACTATT -TACTTTTGGGCTGCATGATCCTGTCGAGTGTTTGAATGCCTAACTTTGAGTCCAATGTAA -TGGATCCACGCATTTTTCATACAAGCAGTTATACGTATGCACTCACATAGCCAACACTTG -TTCAGCCCGTTCAGTAAACGCCTTCATAGCAGTCCTCCACAACTCTGGCCCAACACTCAC -CCTCGCAACCCCCAGTGCCCGAATCTCCTTCACGCTCAAAAACCCATCTCTCAAATTCAT -CTTCACATTAACCATACCTCCCAGGGCATCAACCAACCTCCGCACTTCCACACTCGAAAC -ACCCCTTCCACTAGGCCCGCCCCAAACAAAGACCGTAGTAGCACCGGCCTCCAAAAATGC -CTTCCCGCGCTCAATAGCCTCTTCAATAGTCCCCTTCTCCGTCAGCAACACATCAGTCCT -AGCATTGACCACAAAATCAGGAACCCCGGCAGCCTTTGCCGCCTGAACCACAGCCCCGAC -CCGCGCCACAGCCTCATCTACCGACCGCAGCACACCCTTCGCGTCCATATCTTCAATATT -ACAACCCACTGCACCTAGCGCGATCACCTGGGCGATGGTATCAGTCAGCTCAGAGACATC -ACCGTACCCATCCTGCACATCGACGGTCAGTGGTTTAGTAGGGTTGATAGCGCGGGCGGA -GGCGGCGATGGCGGCTATCGCGGTCAGGTGTTGGGCTTTTGTTAGGGCGTCATCGTCTAC -GCCGATCGTTGCAGCGATCGCATATGATCCCGTTGCGATGGCGGGTGCTGTTGGTAGTGA -GGCGATTATGGAGGCTGTTGCGGCATCATAGACGTTGGTCAGGATTAGGGGGTTGCCGGG -TTGGTGCAGAGAGCGGAGGTGTTTGGCGATGTCGTTTTGGGTTGGCATTTTGGGTTGATT -TGGTGATATGAGCTAGGGATGAGATTTTTGGGATTTGCTATTCTACATACAACAGACTCA -TGTCTAGAATAAATTCACCTTTGACGTTGAAGTCTTGAGTCATTGAATACGAGGTAAAAG -CGATGACGAAGGATTTCGGAGCCGAGAACATCTGCTCCGATCTACGCAGATCGAGGCAGA -CAATGGGGGTATTATTTCCCCTTGTCTCAGACTAGACTTGGAGAATAAGCCGATGGCGCA -AGGGATCCACGGTATTTTCCCCTCTCCGGCTTATGAAGAATTACATTGAGACCATGCAGG -TCATTACACTGCTTACAGGAACTCACAATGTAACTCCAATGCTCTCCGGTTTGCCACAGC -ATCTGCGTGAGAATGTTGGTTGGGGGTCAAGATCGACTGAAATTGACTGGTCCGACACCC -AACATGGGGGCGAAACTGCAAAGGAACAGaaacagaaaaaaagaaaaaaaaaaGGGCGTA -ATACAATAATATACATCCCAAGCCTGATGGCTTGCTCTCCATTGCTCCATTGGGATGCAT -GATGGACCTTCCATGCAGCTGTGAAACCGAAGATCCAATAAGAGGAACTGCCTGAACCGT -GTGGACTGTGTGAACTGCCTACGGAGGAGCAGACTGCCTGAATTGCCCAGGTGGAATTTA -TGCCATATGAAAATCTTTTCACACAACCTCTTTGTTTACCTTTGGTCCCAGAGAAACAAG -AAAAGCCCTGCTCAAAGTAATCAAAAGACCGATAACATGTGGTACGAGGAATTGGCCCCG -TTTCTCGGCCTATCGAAAAGAGTAAGAGTCCGTAGCAAGTCCACTAGTCCGATGTAGGGC -GAAAACACTGGTAACACTGGGAACCTTCCGGGTTGGATGTCCGGTGCATATGAACGAGGG -GCAAAGCTGTGGCATTCCTTGGGTATTTGAGGTGAGGGTAAAATAATATGGGAGAGAGCA -TTGGAATGTGTTCCTAGGGTCTCTCGTTTCAGATGTTCTTTTTATAAACCCAGCACGGAG -GACTCCGTACTGTTCTCTTTCATTTTGCTGCAAATACTCCGTAGTGTTTAATATATGAAA -GTCAAATCAAACCCGACCACACCCGTCGACGTTGGCAGCCCCGTCCTGTTCCTTCTCCTC -ACGGTATTTGTGTATTTTATTTGGACTGCGGTTGTCGGCTGCGCTTAGGTGAGGCCTCTT -GAGGTATATTAGAGGTATATTATGGGTATCCTCTGGAAACTTACAAGGAAACACCTGGGA -GGATATTTCCAAAGGCCATGAGACAGATAGGATTTGTAAAGTAGACGGATTGAAGTCAGT -AATACAAATTATAGGCCGATAGAAGGAACTACATTACTCTTCGATTCCTCTTTCTATGCT -CTTTCTATGCTCTTTCTATTATATTTCGATGCTATATCTATTATATCTCTTTTATTGGTC -TATTGGATGTCTATTGTATCTATTCGATGTCTAATATACCATTCTGAGCATCGTACTTTG -AGTCAATACATTGAAATTAATTTAAAGAAAAAAGAAAAAAAAAATACCAATCGGGAATCC -CCTGTAATTTTACGAATCGCCGTCAGCTCTCCCCACTCTACGCTTCGCCTTGGACTGCCA -TTCTGCCTTTCTCCTTCTTCTCTCTTACCTCTTACATTTTCCATTGCCTTCATATCCATA -CTTTTAATTCCGATCTTATGTATGGTTTTACCTCACTCCCTGCAGTTGGCCGCCTGACCC -GCAGCTCTTACGCTGCCATCCGACCCTGCTCACCTCGAATCCCTCTCACTTCCTCTCTCC -GACCTTGACCTCTCCTCAACTCCCATAATGTCGACCGGCTTGAAGATTCCCCAGACCGGG -CTGTCTCTCCACCTCGGAGACGAGTCCGGGGATCCTTCTGTTAAGCCAAATCAGATCATG -CGGTTGAACTTGGTGCAGAGCACCCTCGATGATCTGATCGAAAGCCTACGAAAGGATCAA -CCGGCTCGAGTCCGGCTGGGAAAGCACCCATCACTCCACTACGGGGGCAAGACTCAAATG -TTCCATGCGTACCCCGAAACACACCGATCCGAGATCTATCATAGCTCCTCCGATAAAGAG -ACACTGTATTTCACAGGGGTCCTCAGTCATAGTCTTGAGGTGGAAAAGGCTAAAAATGCT -ACCGCGGCCACCGATCAGGCACTGGCAGATTTAGAGGAAAAATTAAATGCCCATGAGAGG -GGGAAGGAGTCAAAGAAAACTCACATCATCTCACATCCGGATGAGTTGAAGGCTCTACGA -GGCAGCAAGGCAGGCTACAAAGGTCCCACCACCAAAGTCGAACTTGAGAAAGACCGTTTT -CTCAAGACTGCTGCCAATCGTTCTCTCACTGCAAGTCCTATCTTGGGTGCTCCCAAGTCT -CCCTCGCTCGCAATGACCCCGACATCTGCTCCAATGATGCAGAACAAAGACCGCGTGAGG -CTCGAAGCCCTCAAGATTCCTTTCATTCACCTTCTCGCCGTCCGCGCCGTATCAGCCAAG -TTTCTTGCCCGCCAGACCCGCAGCACCGTCGAAGACTGCATTGCCCTCGGCGAGAAATTT -GGGGTCGTGAACCGAATCAACCCCGAGAAATACAATCTCAAGGACAAGGTCTACAAGGAT -CTAGATCTGTTTGGCTTTAATTACACCGAGGAGGACCGAAAAGAAGCCATTGAGAATTCG -ATTTCTGCCTTCGATCGCATGCGCATCTCTCGATCCGACAAACTCTGGCAAACGCTTCTA -CCAAAGGCTGAACGAGGCAAAGGGAAGTGCCTTTCACGTCTGAACCTTCGCACTGGCCCA -CCTCAGAAACCTACCGGTCCACTCGCCAAAGCCAGCGGAGAGGACTCGGGCAAGGATAAT -GAGACCGACCGCGCCACAAAGGGATCTGCGCCTGCTATCAAGACTACTTCGGCCTCGCAA -AAGGCTCGTGACAAGGAAGTGACCAAGCGACCCACCAAAACCAAGAATACCAACAGCACC -CTCACCGGGCGGGTTACTAAGAAAACCGGTGGGAAGGCCCCAGCCAAAGTGGACAGCAAG -ATCAAATCTGCGGAGTTCGTCCACAGTTCGGATGACGATGATGATACCGACCTGCCTGAT -ATtccccccgccgcccccactcccgctcccgtgccgacgcctgcacccAAGCAGCAACCG -AAGGAGCACAAACGCACTCCATCCAATTCGAAGCCCCCAGCACCGAAAATTAAGGCACCG -GCCAAGCCTCGCATCACCGAGACTACTTCAGTCCCATCTGCGCCCGCTCCCAAGGCTAAA -CCTGAGACGTCAAAGCCTAAACTTGAACCACCTAAGCTTGAACCACCCAAACCAGTGACC -AAGGTTACAGCTTCCAAGCGTCCTCCCTCTCGACCCTCTACTTCGCCACAGAAGCCGTCT -CCACTCGGTTCCTCTCCTCCTGCCAATGTCTCCGATACTACTAGCAGCAACCGCAGCCGA -TCTGATAGCCAGAACCAATCCTCTGGTTCATCCTCGTCCTCGCCTCTCATCTCCCAGCTA -GCCAAAACCAACAAGGTGGGCCGTGTAGCCCCTGCTGCTGCCAAAACTGTCAAAACTGCC -CCCCAAGCCAACGCTACCACGAAGGCTACCCCCGCTACCAACAACCCATTGAAGCGCAAG -GCTGAGCCCGATCAATCGTCTGTGCCTCGGGCAGGACGTCCTAGTGGAAATCTAGAAGCA -AAGCGGCGTCGAGCAGTCAGCTCCTCCAGCGGTGGAAGCACTGGCAGTGCATCTCCACCA -ATGAGCTATGAAATTCTTCGGCAGCAGCTACGGGAGAAGTCTCAGAAATTCAAGAACTTC -TACACAAAGTACCGCAATCTGCATGACTCCCTAGCAGCTCTCCCCAACCCGCCCCAGGTG -GATCTCGACAAGCTGCAAAAGCAGCACACTCATTTGCAGCGAATGAAAAAGGAAATCTGG -GAGGAAGATCGGCAGCTTCGTGATGGCCTTCATTCGTAATTTAATTTCGTTCTTCTCTCT -TGTCCCATTTGCATGATGGCCATATCGCACTCGACAGAGTGCCAAGTGCACCAGTAGGTT -CGGCGTTTGAATGTTTATTTATCATATGTTCATCTTTACCATAGCAATTCATATTCTTAT -GACCTAGTCTTCTTGTCCTGTTCTAGAGTAGCACTATGCTTTCGGGTAGCCAGGCCACTA -ATCGACATGAACAAGTAACAGCTGAGCACCATGATTCTGACACCAATGAAACCTCACAAA -GTACTGAAATATCCATAACTCAGTCCAATTTCAGTTCGTAAGCAATAAGCATGGAAACAG -TTTCCGGTAGATCATCAATTTTACCCCAGATCGACCAAAACGCCATCTGCCATCTGCTAG -TCCTAATGAGGGATGGTGGACCCTGTCTCATTGGGCGGTGTAGCACGTGGCAGTGTCATG -TGATTGGTGAAGAGAACGCACGTGATTGAAACAGCAGACGTCATTTGGCCGTTCAGCAAG -CTATTACTAGTTAGGCTTAGCTTCTAGGCCATCCGAACTTGCTGGCTCGGAGCATGATAC -TAGCAGTCGTTACACCCACAGCCACTGATTCCTCCTGTGTTATCAAGTGATAGCAATCAA -CCACAGAAGGCATACTCAAGAGCCTGCTTTAAGCTCGTTTGACAATGACCCCCCCGGCTC -GAGGACCGGGGACTAGCCAGCGCAGCCCAACCGTCTTGGTGGGTTTGACTTCCACGCCAA -CCGTACGAATCAAGATCAGTTAACACATATCCTCGATACAGGTAACAGATTCCCGAGACC -AGCAACAACCCCAAGCACCCCGTCGACGCCGCTCGCCAGCAACCCGATTCATCACCGTCG -ACAACGTCCTCCAATACGCCTCTGACGTCCCTTCAATGCAGCAGCGCGGCCCGCCACCCA -TCTCACGGTCGCGCCGTCTTGCCTCCGCAGCTGGAGGTTTAATTACTAGCGCAGGCAGTA -GCAGCTCAAGCGATGTGGCCGCTTCCGGCGGCACGATGAGCCGACTTGCCGCACAACCCC -GACTCCCGCCTCGAACGACAAAGGTTAGCGAGAAGCTGGTTCTTCTGCCCGACACCGGCG -AGATAGAGGAGGGCGAAACGGAAGAAGAGGATGATGACTCTGATGGTGCGGACCTCGTGG -ATGAGGAACTTGTTGCGCGTCTGGCCAAGGAGAAAAATGTCGACCCGGAAAGGATTAGAC -ACCAATTGCTTATGTCCAAACGACGCGGAGGTGATTTTGACGTTGATAATGATCTTGCAC -CCCTGCTGGCTCAAGAAGAGGTCCTGAAGAAACGCCGTATCGCACCCGAGCGCGCGAAGA -GTTATGCCGAGCGGTTGCCGAAGGCGCGGCGGACGGAGAAGCTTGCGCGTGTCACGGCGT -ACTGTACGGCTCAGGCATATAAGATGGGATCGCTTGCTGCTTTCGTTAAGGAGCAGCATG -GTGGGCGGACGAAGCTTTACGATGATTGTCTGTACACCGCGTATCATCTCCCTCTTTTGC -CGGGCCATGAGGGGTACCGTCTGCGGAGTAGCCCTGTGTTGAAATATCCTGGTGGCAAGT -CACTTTTGGATGAGGAGATTGAACGGAACGAACTTCGTGATTATCATGATGAGTACATGC -TCGAGACGGAGGAGCATTCGGTTGGTGGACACAATCGTCCTGAGGATGAGCACCATTATG -AAGCGTCGCCGCGACAGTCCGAGGGTCTTGGTCATGATTCTCGGGAGGAAAATCGTGAGG -AGTTCTTGAATCGGATCACCGAAGAAGTTCGGGGCCACACCAATGGCCATGAGCATGCTG -CCAGCTCCGGCGAAAGCTTCGATGGGTTGCAACTTAACCCGCAACAAGACAGCAATGAAG -TAGCTCGGCAATCTTCGCCTGAACCTGCAGTCCGCCGCCGAAGGCACAGCACGGACGACA -CCCACGCTTTGATTCGAGACCGTCCTTCGCTGCCAGCCACTACACCCTCCTCGTCCTCAT -CACCGCCGACTTCAACGCCGCCAGCCCCAGCTCGGACGTTATATAATGTTGCTGAGATGT -TCGTCTTCAGCTACGGCGTCGTCGTCTTCTGGAACTTTACCGAGCGACAGGAAAAAGACC -TGTTGGCAGATTTGGCCTTTGCGACATCATCGGCAACCGGTATCCCTACTCCTCTGGCGA -CGATGCCTCTTGACGAGGAAGACTTTGAGACAGAAGAGTTTCATTTCGAGTATTCGACTG -AGATCTCGCGCCCACGAGTGTACAACGACATGATTACTCTTCGCAGTGGCGACCACATGA -TCAAACTCGCTATCAGCCATGGTATTTCACAGAGCACTAAGCTGTGCTTCTTTGAGGAAG -TCATGGCCCGGCAGATGGCGGACGCTAAAGACGTTCCCAGACGTCTTGCCGTTTCAGGCC -AGCTGGGCATGAAGCGCGAAGAAGTTTTCCGGATCTTAGGCCGATTGTTCAAGAGTCGGG -TTGAGGTCAATCTTTGTATGTCCCCAATCCCCTTGATAAGTTATATTCCACGCTATGTTG -CAGAGAGGAGACTTTGTTCCATACTAATATTCTCCACCAGCATCTAATATGCTCGATGTC -CCTAACTTCTTCTGGGAGAGCGAGCCAACCCTTTACCCGCTTTACATTGCTGTGCGCGAG -TATTTGGAAATCAAACCACGTATTCAGGTCCTCAACGAGAGATGTCGAGTATTCCTTGAC -CTTGCGGAGATCCTATCTGACTCGATTGCCGACAACCGCACTTCCCGTATGTCTCGACCT -TCTCTTCCCTCCCTAACCTCTCAACTGACCACGTTCTAGACCAAACCTGGATCATCATTG -TGCTCATCGTTATTTCAATCATCGTTACACTCTCCGAAGCCTTCCTCCGCTTCGGCCTCC -TCCATGCCAGTCAGGGTGCCGCTGGCACACCTGCCAGTATCTTAGCCCGTGTCATGGGCC -GATCCGTTACTCCCTCTCCATCCCCAGAATGGAACCCATACTCTACCGCCAACGCGCCAC -CAGGCTGTGTCTGTCCCTCTGTCGGCCCTGCGGGCGCTGGTGCAGATATGGGCGTTAGCG -GAATGATGAGATACTAATTCATTTTACTTTCTAACAGGTGGACATTATGGCGCAATTGGT -GTTAAGGAAAAGGACACTGGACAAATTTTACTTCGGTTACATATTACCTCAAAACTGACA -GATTCTTAATACTTGTTGACTTTTTAAAATCTTTCCTTTACCTGCGGCTTTAGCAACAAT -GTACTTCACATTCCAGTCTTAATACGCCGCTCCAACTGCGTAAACATGCAGTCAGCCCCC -AATATTGAATATTGCCTCCCACAAATCTAATAGCTGCCCCTTGCATCGGCCCCCTCATGT -CGACCACATGACACTAGGTGCCTTTTCAACGCCTATTCAACGCCTTTTCAGCGTCTGCAT -TGGGGGCTTGGATTGCCTAGACGCATGTAATTCGACTGGAAATTTCACATGGCGACTATC -GCAATCTTCGGACCGTACAAAAACCAGCAATAAAAATAAATTTGACTACTAGTGAACTTG -TATTATGGAAAGGTTTACAGACTCCATCTCATTTCAGGGCCATCGTCATACTTGAAAGCT -ATTTGATTGAATAGTTATCGTCCTACTGCTAACGTTCAACTCGACTACCGCGCAACAAGC -TTGGTTAGTAGTGGCTTCTTGACCCATACAACAAAAGCTTTCTGATCAGCCCATTGGGTA -CACTCCGGGGAGAGGTGTAGGTCAAATCGCCATAGAATATTTACCAGAATCAGCTTCATC -TCAGCTTTCGCTAAACTAGATGTATATATCTGTTGGTTAATACTTCCAAGCTTGTATTTG -GCCTAAGGAGGGGATTAAATCCTCTACATACCTCTGCCCAAGACAGCCTCGAGGACCATC -TGAGAATGGCCGTAGGAGGTCCCTGTTGTCACCGGCAAACTGATCATTTTCTCCCTTCAA -CCATCGCTCTGGCAGAAACCGATCTGCTTGAGTGAATTTGGTTGGCGATCGGTAGGAAGC -ATAGTGTGGAACGGAAACAGCAGTCTATCACAGGTTAGAGAATAGGACACGACGGCGGAG -GTGGATCCTGACTTTTCCAGGTACAAATTTGCCGAGGATAGTAGCGCCGCCTTCGGGGAC -GACTCGGGGCATCCCAAGCGGCGTTGCTGGGTACATGCGCAGGGATTCTTGTAGGACGGC -ATCAAGATAAGGAAGGTCAGCCACCGAGGCAAACGTGATCTCCGAGGAAGATTGAAAGCG -CAAGCGGATCTCTGATACTAGTCTTCGGTAGGTCTCTGGCCTATCTAATAGCAAAAAAGT -ACACCCTGACAAAGTAGTTGCAGTGGTTTCACTGCCAGCAATGATCAGAAAGGTGGCATT -AGAACGTATCTCATCCTCGGACATTCTCGCTTTGAGACTTTGTTTGGAATTCACATGGGC -GATGTAGTCTGGGGTGGACGAGTTCTGATCTCTTGCCATCCGTTGAGCAATCTTAGAGTA -AGTGAAGCTGACGCTCTCTGCGCGAGCGCGGAACAAGTCACGATAAAGAAAAGAGGCGAT -AGCCTTGATCGGATATGATAGTAGAGAAATATACTTGGACATGCTGATAATAGTCCACAG -CTTGAGTGATACGAATTGCATAGCGACAAAGGGATGGTAGCGGGCGTCTTCCAGACATGA -AAGTGATTCGCCATACACGTATTCTGCCATGAAATCGAACGTTGTGTAGGTATACCATTC -CACCAGGTCGACAGTAGATTTCATTCTGGCCTTCTCGTGCAGCCTCTTTATGAGGGCATC -AACGTATCCCTGGAGGATATGTTCTTGGTACTTTAGTGCCGCATCAGAGAATGCACTAGC -TAGTATTCGACGCTGACGGTAGTGGATATCGTCTGTAGGAGCAACTGCTAGATTAGAAGC -GCCGCTTTTTGCAACGTTGAAAAACCGCGGGTCCTTTGGGAAATTTGTTGAAAACCCGTA -GATATCCTTCCACGCTTGCTCTGTTCGAAAGCTCAGCTCATCGGGAGCGATCCGGACAAC -ATCACCATACTGCTCGTGTAGGCTTCTAACATAGGTGTGGAGCCGGCCGCTAACGTAGGC -GCCAGCATGGGGGAGCCTTGAGATGGATGCCAGAACGGGTCCTGGATACTGACTGAGCGG -ATGAATGTATCGATTGTAAATCACAGTCCAGATAATATATACTATGATTCCCTGGACAAA -CAAGACATGAACTGTCTGTCAAATTTGAGAGACAAAGGGTACTCACAAGAGTGGCTAGCT -TGAGCATCGGCATATTGCGAAACAACTTCTAATTGTGCAGTCGTAGGTGATACCTGGCAG -CAGTTGTAAATGTCGATAAAGGGGTTGATGGGCTAGGCTGTAGTTGCATCAGCCACACGA -TATTCAGACTTGTTTCAAGGTTGATCCTCAGAACATGATAATGGTTGATGAACACGTTAG -AGTATTTATACAGCACATGAGGTTAGCTCATCGCGCCTAGATAATTGAGTCAGCCCCTTA -TTTGACCGCATTATCTTTGCCTGTACGCCTACTTCTATATGGCTTCATGACGTCTTGCAC -AGTCCTCATTTCTGAATTCTCTTAGGCTTCTTGGTATAGTCAAGATGCAAGCCCGACTTA -CTTTGGTTTATAGGTCATATTATTCAAACCAGGTAACTTCCACGGGTACCTAAAATTCCA -GAACTATATGAACCACTACAACATATGAGACAGACTAGAAACAAGGAATGGGTTAAGAAG -CTAGTAGATACCCCCTATGGTTAATCCCTATGGTTAATCCCTATTACTTCGCCATGCCGT -CTTAAGTTATATACGGCACCTAAGTAAGATTTGAACTTTCACATGTCTTGGGTTGCCTTA -ATTGTGATATTAATATTTTATTAGCATCCACTGTTGTATCATGGATGTTTAGGTGGCTGC -AGCTCCAGAGATCCCGGGGCAAGGGGGCAGCCCCGGCAGTGCATGGCCTACTGGATTTCT -GTTCAAATCCCGCGATGGTTTGCCAAAACATGCAGTATAACTATCTCATACGGTATACTA -AATGTACTCCACAGTAAGAAACGCGTGGAAAGAGCGGTGTACAGACGGAGAAAGATCTCA -AAGAGGCAGGTATATGCATGTATTATTTAGACTAGTCAGATCGAAGCAAGGCGCCGGTTC -TATACAGGCGTGGCTACGCATCTGGCTAAGTTTGGCAACAAACCCCGTATAAGCCTTTGA -TCATGAGGTTGACCAGGCTTGAACGCTATGTAAACATGGGGCAAAATATATATAGATCAC -GACAGACCCAGCGCTCAAACTGTCTCTGATATCTTTTTCCTCGCGGATGTTATTATTTCT -GAAATGACTCGACCCACGCCAGCTACTGTTAATCCCATCAGCCTGGTCTTGGACCTCGGA -GGGGTATTATTCCATCCACAGTTTGGTCAGGTGAAGCTAGCTACCCACGCATCTCCTATA -TCGGTGAAAAGGCTAGTTTCTACATCGACATGGATGGCTTATGAGTGTGGAAAGCTCTCA -GAGACAGAGTGTTACCAACGATTGGCGGATCAATATGGTTTTCAAGCAGATGATCTTGCG -ATAGCCATAAAGAAAGGCCGTGCGGTTGCTGATTATGATAGAAATTTAGTGGCAAGACTT -CGCGACTTCAAAAATGCCATGGCTGGGAAGGTGGTGTTCATACTCGCATCGAACATGTCC -AGCCCAGATTACATCGCTCTTCAGCAGCTGTGGGGAAAGGATTTCTGGTCACTTTTTGAC -CACTGCTTTCCCTCTTCTGCAGTGGGCATTCGCAAACCAAGTATGCGATTCTACCGTCAC -ATGCTAGGAGCCTGTGGTGCTGTGCCGGAAGATACCATTTTTGTGGATGATCAGGCAGAG -AATGTACTAGCGGCTTCTGCTATGGGAATCAAAGGGATTGTATTTACTGATGGGGAGAAT -CTCTTGCGGATTCTCATGAACATGACCGGTGATCCGGTAGAGCGAGGCCAGGCATTCCTC -CAGGCCAAAGCTGGCCAGCACTACTCGATCACAGATCGAGGAGAGGAGGTAGAAGAAAAC -TATTCTCAGCTTTTGATGCTGGAAACCACGGGTGATAGGTACGTTCCAATCTTAGGGTAA -ATGCGCGGACGGATTTAATGAGATTACCTTGGCCAGCTCACTGGTGTCATTGAGTCGTCC -CCCAAGGTTTTGGAACTTTTTTACGGGTGGGTTGCACGATCTTTGAGTGAATGAACAACC -CTAAGTTAACTATCGTCCTGTAGGCGCACCAAAGTTCACTACAGACACCTACCCGGACGA -CCTTGATACTACCTCTTTGGCATTGGCATCGATGCCCTACGAAGCCTCCGTAGTGCATTC -CATGCTGGATGAAATGCTGAACAGTGTTGACGAGGATGGCTTGCTAAAGGTTGGAGAGCT -ACCATATGACTCCCCTACTATCTAGCTAATGAAAGTGGTCCTTAGTTTTACCTCGATGAC -TCGCGCCCTCGGGTTGACGCAGTCATTTGCCTCAACATTCTCACGATCTTCTGCATATAC -AAAAGAAGCTACCAGCTACCAGAAACTTTGAGCTGGATCTACGATATTTTGCTTAATCGG -GCCTATATGCATGGCACTCGATACTACACTACACCGGAGTGGTTCTTGTATTACATGAGT -CGACTCCTCTCCCGGTCGAATGATCCTATCTTGAGGGACCGATTTGAGGGCCTTCTCCGA -ACGCGAGTTATGGAGCGGACCGGTGCCGAAGGTGATGCACTGTGTTTGGCCATGCGTCTG -AGTGCATGCAATTTGCTTGGAATCTCAAATCGCCCTGACTTTGATACTTTGACTGCTGCT -CAGTGCAAGGATGGAGGCTGGGAGGCTTCGTTGATGTATATCATGCCGGGGTCTAATACG -AAACTCGGAAACAGAGGCGTGACGACGGCATTCGCAATTAAGGCGCTCCAGGGTGCCGTT -GACAAACTGAACTAAATTAATATTAGCTATATGTTGATTATCACATCAAGTATACAGATG -ATGTACTTCTCATGTCTCAGGTTGGGTGGGATGCAACTCCCTCTCTCTCGCTCCTTTCAC -ACGTAGCTTCGTCTTATTTTCTCCCATTCGATTCCGGGACTTTTCTCGAGAAATCCTCTG -CATTTCTCAGCTCTTAATCCAACAAAAAGCCGGAGACATGAAATACCATCGTCTTCCACC -GTCGCGTGATAATTGAAGATCTGGACATTGAATGGGGGAAACTGACCTGGCCAAAGGACA -TCAATTTCAGACAGACTAAAGGCTTCCACACTATTCCATCTTGTGACGCACATGGCAGGG -CTCTGAGGTATCTTACCCGCCGTGAATAAAGGATTATATGCTGAAGAAACGCCAGGAGAC -ACCTATTAAAATCCCCCAACAATAGGAGGCTCCTTACCGTTGCCTTCCCCCTCTGCAATA -AGAGCGAGAAGGGACATGTACCGAAATGATTAATGACACCTCAAACGGAATGGCCCTACG -TGGCTCTATTGATGCCCAGTCTGGGACACTTGCAATCGCACTAAAGCCAACCGTGAGTAG -CCAATGACATATACGCCCTATGATCTAGTTTCCCTCACATACAAGTATCATATCAGGACC -AGTACAATGCGCGCTAGGTCAAGGCTTTCCGATAATTCGGACACGATCGACAAGCCTGTA -AGCCGAAGTGCACCCCACAAACATTCAAGGAGGATGGGAGGGCAGATGTCGAGCAGCGTT -TACAGGCTAGACTAGACTGGTCATGGCAAATCATGAGCTCTTAGTTAAATATACGAAATC -ATTAGACCCCAATGAAGAGGTCCTTTTCACAATGGTTTCCCTTTGGCACAGTACAGCATC -AATGGCACTTGACGGGTGTGTATTTGCAGAAAGCACATTTCTGCAAGAGATTGGAATAAC -TTTCGCCATTGAAAAGCTATATATTACATAAATTCTATCAAATCATCAATTCACATGCCA -ATCAAACACTAATATATAGTCTACTCCGAAAATGGAAAGGCCTTTTGGAATTCCGATGCC -CAGACGCTAACTGGCTAATCATATGCAATCCTGGACTAGGGCTTACCTCTGCCTCTGCTT -CAAACACTTCCCTTTCTTTTCTTCAATCCAGAGAGATATACGGGGTACTCCGTAATAGAT -ATTGGGATACTATTGGCACTCAACAAGATACTCTACTCCACCCGAGTCCTTTTGAATTAA -TTCATTCTCATGATTGACTTGAGCGCATGCTCAATCCCGAAATCAGATCATGAGTTCACT -CAACCAAACTGCATCTTCACCCTCGGATACGATCATCCCATTCCATTTTTGGGACGATGT -TCCTCATAACCGAGCATTATGCATAAATGTCACACTCCGATTCGACAAAGAGCTTGATCC -AGAGAGACTTCGCAGTTCTCTAAGCCGTCTACTGGAGATAGACAACTGGCGAAAACTCGG -AGCTCGTGTACGACTAGACGTAAGTGTTTTAAATAACCAAAAAGCCAAATCAAGGTCAAA -TTCCCCAAGAGCTAAACCTTAAACTAGAGCTCGGGAAAATTGGTCTATCATCTCCCGCCA -AAATTCACCAAAGACCGACCAGGCTTCATATTCACAACCGAGGAACATGATAGCAGTATA -AAAGCCCATCCTCTAGCATCACAGATGCCCTCTTCAACACAATTAGACAAAACAAGCAAA -ATCCACATCCTAGATGGCATGGCCAAATACTCCCCCCTCGTCCGCCACAAAACGGCACCA -ACCAGCATAGCAGATTGGCTAGAAACGGACATCCCGCAGCTGGTCATCCATACAGTCCTC -TTCAATGATGCAACGCTCCTTACTTTCACCTTCCAGCACACTCTCATGGATGCAATGGGT -CTATCTTCATTCCTGCATGCATGGACGGCGGTACTATCAAACCGCGAAGAAGACATTCCC -CCGTTCCTCGGGTTCGATAAAGATCTCATAGAATTACATACTGAAGCCGTGTCCGAAAAG -CCAGCACTCGCGGGAATTATATTCGGACAGATTTCGCTACTGCATTTTGCTCTGGTGGGG -TGGTGGGAGAAGTTCTGGTTTCCGCGCGTCAAGGATAAAGTCCTTTTGATTTCCGGGGAT -TATGTGCACGAGTTGAGAGAGAAAATGCTAGAGGAGCTAGATGAAGATGACAAGGTCAGC -ACAAGCACAGCTATCCAGAAGGATGAGCAGGTCAAAGCCCAAGACCGTCATCCACCATTT -GTCAGCGAAAGCGACGTCCTCCTCGCATTGATATCAACACTCCTCGTCACAGCACAGAAT -CCCAGCCGGAACACTCCAGTCCAGATTTCGAATATCTTCGATATTCGCTCTACACTTGGC -CTCCCGTCTCCGGGTGTGTACATCGGGAACGCGATAATGCCATCGTGTGCTGAGTTCCGG -GCACAGGAGTTTGGAGATTCGCATAATGGCACGGGGTTCGAGTGCCGGTTAGGCTCTGTT -GCAATGAAGATCCGCTCTGCGCTAGAGACGCAGCGGACGAAGGAGCAGGTTCTTGCACGT -ACAGCGTTGCGGAAAAAGGCGCTTGGCGAGATGGGGTGTCTGCCGCTTGTTAGCCATCCT -AAACAGCTTGGTGTTTGCTGTACCAATTGGCAGAGGGCTGGGTTCTTTGGGTTTGATTTT -GGGAGCGCAGTTGTTGGAGGTCAGGCTGGGGAGGAATTTCAACCTTGTAGACCTTCTTAT -GTTAATACGGCGGGATCAGGACCGTTCACTGGGAATGGGCCGCGGAATATGGTGGTTGTA -ACTGGGAAGGATAGCATTGGGAACTGGTGGGTGCATATTATTGCGAGAGAGGAGATTTGG -GGGCAGATTGAGGAGTGGACAAGAGAGCAGCATGTGCACTACAAGTGAGGACTACCATGG -GTTTATAGAGTATGTTAATACTAGAACCAGAGAAATGGGCCAAAGAAGAAATAGGTCAAA -GATGAATCCAAAATCCCCGTTGAGACGGACATGCTAAATATGAAACAGAGACAGTAGATA -TCAGTACAAGGGGGCCCGGGATTTCAAAGCACCAGAAACCCAGACCCGGTTTTTCTCACT -CCCCAACATTCGTTTTATCGTATATGCTTCAAAATTTCCTTTCTAGGCGATTCTAGATTT -AACCGGTAACCTTGAAGATGGACTCGGCGAGTCTCTCGACGTTGCCGGAGGTGATACCAG -CGACGGAGATACGGCCGTCCTTGGTAGCGTAGACGGAGTGCTGTAGGAATTGTTAGCAAT -GAAACAAGAGGAAGTATGGAGATATAACTTCTTACCTCCTTGGCAAGGGTGTCCATCTGC -TCGGGCTTCAGGCCAGTGTAGGCGAACATGCCGATCTGAATGTCATGTCAGTCATTTGGG -TCATGGTGAAATGGAATCAGATATCGAGGCTTACCTGGCTGGTGATGTGAGACCAGTCGT -GCTTGCTTCCAAGCTTCTCAAGGTTGGTGCGGAGAAGAGAGCGCATCTCGATGATGCGGC -TGGCCATGCCCTCGACCTCACCGAGCCACTGCTTGTTAAGAGCAGGGTCGTTCATGATGG -TGGAGGCGATGCGAGCACCGTGAACGGGGGGGTTAGAGTAGAAGGGACGAATGAGGATCT -TGATCTGCGAATCGACGCGCTTCTTCTCCTCGGCGTTCTCGCAAACGAGGGAGAAAGCAC -CAACACGCTCGCCGTACAGACCCATGTTCTTGGCGAAACTCTGGCAAAGGGCGATGTTGT -GGCCCTGCTCGACGAAGTGGCGGGGAGCAAAGGCATCCTGGTCGGCGTTACCGCTGGCAA -AGCCCTGGTAGGCCATGTCAAAGAAGGCGAAGTGGCCCTTCTGCTTCATGACATCGCTGA -TCTGGCGCCACTGGGCCTGGGTGGGGTCGACACCGGTGGGGTTGTGAGCGCAAGCGTGGA -GGAGGATGATGCTGCCCTCGGGGGCGGCCTTGATATCGGCAATCAGACCCTCGAAGTCGA -GGCCAATGGTGTCCTTGTTGTAGTAGCTGTACTGAGCGACCTCAAGACCGGAGTCGGTGA -AGACAGCCTTGTGGTTAGCCCAGCTGGGGTTGGGGATGTAAATCTTCTTGGCGCCGGGGT -AGAACTTCTTGAGGAAAGCACCACCAATACGCAGGGCTCCGGTACCGGAGATGGTCTGAG -TGATGACGAGGCGGTTGTCCTTGATGGCGGAGGAGTCGGCGCCGTAAGCGAGCTCGGCAG -CGGCGGTGGTGAAAGCTGGAACACCGGTGATGCCAGCGTACTCCTTGTCCAAGCGCGATG -CGACAACCTTGTCCTCGGCGGCGCGGACGGAGGGCAGAACGTAAGGCTTGCCCTTGTCGT -CACCTGAAACAGCAGGATGGCTCGTCAGCAATCCAAGTTCTACTCCATTGTGCAACGCGA -CGATTGTCTGGAGTGAGGCTTACGGTAGGCACCCACGCCCAAGTTGATCTTCTCCTTGAA -GGAATCGGCCTTGAAAGCTTCGGTGATGCCGAGAATGGCGTCCTATTAACCAACCTACGT -TAGCATTGGCATTTCAAACCATCAACCCGATGATTGCAAGCTCAGCAAAGCTTACAGGAG -GACCCTGGGGAACATTGGCCCAGGCGGAGGCGTGTCTAGCTCCGATCACGACGGTGCGCA -TGTTGACCTCGCGGGAGGCAGCCGACCGGCTGGCGACTCTGAGAGTGGAAAGCATAATGG -CAGAGGAGATGGAGAGAAAATTAAAAGGACAACACCTAGATCTCCTAGTTTTGGGGATTG -AAAAGGGAAAGGGAAGAACCGGAGGAAAGAAAGAATATAAAAGGATCGGAGACGTAGTCC -CGATGTTTTTGCCCTGACTCATGTGATAATGGTACCGAGTACCCGCCAGTTGGTATGTCA -ATCCGCCTCGGCTGAGGTACCAGGTACAACCAGCCCCGGCGAGATGTGCGAATGGGATTG -GCTACATCTGCCGGTTAAATTGCAGGGATCATTGTTTTTAGGGGATTGAGTTATGATTCT -AGGTATTTGGTATATAATACATTTGTAGAATGTTAGGAATGTATCGTGAGACCAAGTATT -CCAGATCTATATATCCCTTTGTGTATTCCGTACTATAACATGTATAGCTATGTCTGTTGT -CAGTACATCAGCCCATCTCCAAATGTCTTCTCGGCAAGTCCAATCCCCCGGTGAACTGAT -CAAGGCTGTTCTCCACATCGTTTCCCCTCACCGTTGGAGTAGAAACGGGATTCTCCTATT -TCCCCGTGTCTCATAAGAGACCTGACAGTTTACGTCGGTTAAATATGATATACTCATTTT -CTCCGCACACGAGATATATAAACATTGCCTGTTCAAGGCCTCATTATAGTGATACAGCTT -AGATCTTTAGAACTTTTGAGATAGACTTTGAATATCGTAAAATTGGTATCTAACTAGTGA -AAAGATAAAGAAGCATCAGCCCATGCGAACACCAGCCTGGCGATTCTTTCGCGCGGCAGT -GTCCATAACAGTGTTCATGCCGGTATTTTGGCTACCGTCTAGCGTGCTCATAGATTCGAG -CTTCTTTTCATCTTCAATCTTCTTCAGACGTTTCTCCGTCTTCATCTTTCCACTGCCCTT -GCCGTGGAATTGGTGACTGAGTTGCTTGAAGGCTTCCTTTTGGTTCATGTTGCGACCAAA -TTCATCCACATACCGCAGCTGGACATCTGGCTTGTATTCGCGGTTGAACACCTCGGCCAT -ATGACGTGCTTCCTGCTGGTCACGCTGCTTGTTCTCCCAGCGCGCGTGTTCCTCACGCTC -ACGGGCTGACATGCGCTCAAGCTTGCCGGAGGTACGGTCTCGCTCACGTTGCTGTCGGGC -GCGTCGCTCGGTTTCGGACTCGCGATTCGACTTCTCGGCCAAGAAACGCTGGCGATCGCG -CATGAGTGCGTTTTTGCTGCTGCCATCGTTGTCTTTGACCAGGCCACGCGACTTCAGCAT -TGATAAAGTAGCGGCTAGACCCTGGTCCAAACTTGACTCCTCTTCTAGACCTGTGCCGGT -GAGTTGTGGAGGCTCGGCCGCAGATTCTTGATCACGCTTAATGCGTGCGGCCAGCTCCTC -CACATCTTCAATATCGCCGTAACGGTCCATATCTATTTCCACGCCCTCCCCTTCCCCTTC -GGCTTCGGGCTCTGCGCTGGGGCTTTCGGGCTCCTTGGTCACAGATCTGATAGGTTTTTC -ATCTTGCGCAAGGAGTGTAGGCTTGTGTAAGTTCGAAACAAACTCTGATGTTTCGTCAAT -AACAAGGCCAGGCTCCTCGCCTTCATTATCGGAATTTTCAATCTCCATTGGGGTCTGATT -CTCTTCTTCTCGAAGCTGTTTCGCAAGTTCCTCAGGGCTGATCTTCTTGCGTTTTTTGAA -GGCTGCGCGTCGTTGCAACGCAAGAGAAGCCTGAAGATCATCATCGTCCACGAAAGAAAC -ATCTTCATTGATGGGCTTCCGGCGAGCAGGAACTGGAACTCCACCATTTGAATCGATATC -CATGGCCGAGGACCCTTGCGCATCGATCGAATCCGCGACCGGGGCAATCTCGTCGTCGTC -CATGAGGGTCCTCTGTTTTGTAGCTTTGGCCTTCTTCTTCTTGGGCTTCTTGATTTTGAC -CTCGCTGATATCCATGTAATCGGAGGACGGTACTTCAGGCTCAAAGTCTAGACTGATAAT -GTTCTTCCGCAATTGATTCGAAACTTCTTGTCGCTTGGCTTCGCGTTCCTCAGTCGAGCC -TTTGGCGTCGAGTGTAAATCTCTTTCGCTTTTTGCCTTCAATCTCATCGTCGTATTGTGA -AAGGATACCCTGATTCTCCGCTGTAGGATCGTAGACGGGCTTTTTCTTCTTGAGTTCCAG -CTTCTCTTCGGTTCTTTCTTTCTCGACCAAATCGAGGTTTTGGAGCTCATCGTCCTCTTC -GTCGTCATCCACTGCGGCGTCCTTCAGCGTCAAGATGTGATCTTCACCACCCTCAAAGGC -ACCCGCAGAGTGCCCGACCTTGATACCAGCCAGGTCCGCTGCAGTGTACTCGGCCGCTGC -AAGGCGCTCCCGCTCCTCGAGTTCCTCGGCCAGTCTCTGGGCGCGTACCTTTTCGATTTT -CTTCTGGCGTTTCTTGGTTTGTGCCAGCCAGGCCTTGGTATCGAGTTCCTCGTCATTCCC -CTCGCCTAGTGTAGATCCTTGAAGTTGAAGATTCCTTTGGGCGATATCACGCGCTCGCTT -GATGGCGGCATTCTTTTCCTCTCGTTTCCGTTTAGCTTCGGCGGCGTCCTGAACTTTCTG -CCAGTTGTCGTAACTTTCCGCCTGTCGTGTTTCGATAGTGCTGGCTGGTTCTTCTTCGTC -GCCGGACTCGCTGTCGTCTTCTCTGAATTCTGGACCAGCGCCGGGGACTGGGAGTGGCTT -GAGGCCAAGGGCCACTCGAATCTTGTTATTTTGCTCAATGGATAGAGCGTCCGCCATTTT -GACGGTGTGTAGAGAAGGAGGGGGAGGAGATGGTAAAGTAATGGCTCTCGGCGCGACGCC -TTTGGGTTACTGATAACTGCAATCGGGTCACATGATGTGGCCACATTGTACTACTCCTAT -CTTGGCCTAGTGTCATAAAATCTAGCGGCCCCACTTGCAACATAAATAAGATCCTTTCCG -CTTTTTCTTTCCTCTTTTCTTCTTTACCATAACCGGATCTGTGTCCAGATTTCCTGTCCA -TGGGCTCCAGTCTGACTGGTTTCTGTGTGATGGTTGAGAGCTGGGCATTTCGACGAGCTT -GGAGCCATCATTTTTTGCGTGGATGGCCTTTACGGTTTCCGGTCGACAGATCTCAGAATA -ATTGTCTGTGCCTTGTGATGCGGGTCTTGAGCTCCCACTGGTGAGCTCGGCTTGACAGCT -TGGTTTCGGACGGCGTCATATTTCATATCTCTTTGCCGATTGGCCGTTGGCCGCTGAGGT -TAGGGAATAACCATGACTTGCCAACATCTCTTATATAGAACTCGTATGTTGTAAAAAAAA -AGATCCTGATGAATCTCCTCGTTACGAGGACTAGGTTGTCTAGGAAACGTGCTACGAGGG -TCTTCTCTGCGAGTATAGCTCTGGTGTAGCACCGCTTGTCGCGTGCTAAACGCGATTGAT -TCAGTAATGTTTGAAAGCTGTTCGTCGACGTCATGATCATCGCCTATCTAAATACAACAA -AAGCTCATGTACAATGTGTAATTTACAGCAGCTTCAAGTGTCCGGTCACCCCGTCAACCA -CGCTCTTGGGGCCCAAGACTCCCAGCACGGTCTTGCTGCCACTTGCAATTTGAGTGCGCC -CAGCATCCTGGATAACGCGCGTGCAAAGCCCAAGGCTGATAGCCTGGGCCTGCATGACTA -GCAGTTCATCTTCGGACTTGACCTGCAATGCGATTTTGGCCTGGCCACCGCGTTCCCATT -TCTTCAGGATAGGCGAGTTGGGCGCGTATTCGAGGAAGTATTTGTAGCATGCGAGGGTAG -CGTGGCCGCATTGAGCAGCGATTTTGCCTATTAAAACGGGATTGTAAGCGCTTGCAACAC -AAGTGTGACGAATACTTGAATCGTTCGTAGAGCACATACCTTTGGTCATGCCCAGATCCA -TGCGGACGACGAGCACCAACTTCACCTCCTCGTTGTTGTCCTTGAAGCTGGACAATTCTT -CACCGTTGCCCTCATCTTCCTCATCAGTCTCTTCTTCTTCAGATTCTGATTCCTCCTCAT -CGGAAGAGTGGCTGTGGACCTTCACATCGTAGCTGTTTGGCCACCCTTCTTTCTTCTCTT -TGAAAAGGTTGAGCGAGGAGCCTTGGCCGAGGAAGTATCCAGTCACACCTGCAATGATAG -CCGTTGCGACCACATATGCAGCTGTAGTGGGAGGTACCCGGTCTAATGGTACTGGGGCGG -CCATtttgttggttgtatggttggtggttggCGATCTCGGCGTGGCGCAAAATAAAAATG -TTCAGGAAGCTCGACCGCCCTGGACATCACGTTTGTTCGACTGAGCTCACAATGGGCTCA -TCTTATTCCCCCCCCCTCAGTTCGAAAAACCTCAAATAAACAGATAAGTAAAATTTCACT -AGCTGTAAGATCTCATATGTTGAGATGTTACCTGTCGTGTGCTCGGTTGAGCTAGTCCTG -GCCGTGTCTTCTCATTCCTACTTACAACATGAAGCTGATCGAAATAAGCTGATATCCGAT -ATGAACCCCGGTAGGAATGATATGGCCGCACGATTTTGCCTCTGTGCTTCAAGAGCCACC -TATTTTAGTTCAAGCTTGCGTTTTATTCAGGTAGAATACCCGCCCCATCATCTATACAGT -TCCTCTGTTCCCCATCTGGTAGGGAAATAGTAATTACACCTGGTATTGATAGGTTACAAA -ATTAGTACGTGTACTTTCTACTTTGTAATCATATAACAATACAATTCAATAACTAACTGA -TTATGTTTTTCCGCCTACCAAATTGAGCGTTGATATAGGAATTAAGTATCAAAAAACGTT -GCCAAACGTCTGTTTATAAGAGTCAAGACCTCTCAAATATGAACCACTCGCTAGATAAGC -GGTTAATAAAAATATCAATAATCGTGCATGTTGACCAACCTCTCTGACGGCCAATACTGG -GTAATTAATTTGCGATTTATCAGTTTCATAGGTTCTTCTATTCGATAATTTCCAATCGCA -TGGAGAGATGAATTTGTGAATACCCGGTATGGAGATAGGGGAGCTGGTAAGAGATACTTT -TTCTTTGATCTGCTTTTAATCGATTTACGCAGGTTCTGGGCCTACACCTGTAGTATCATA -CAGACTTGTCTCAACCCCACCGCCACCAGCCACCGGAACCGAAGAGAGACTGGTCAATGC -TCTTGAGAAATTTATGGATGTAGCCATTGCGCAGACTGGTAATCATGGTCATGCTAGTAA -CTGGGGGCTTGCTGTGGAAATAATCTCAATGCGCTGTGATATTGTGCATTTACATAAGAT -AGTTGACGACATAGATGCTAAACATACGTTGCTAATGGATAGGTATGGTCGAGGGTGCGT -AACCGTAGCTATTTGCATCTACCTTTATTTTTACCTGGAAGTCTCTCGTATATTCACTCG -AAGACTGCAATCGTGTCTTGTTCCACAAATCTTATGCCTACCAGGAGAATTAACATACAG -ACATCCATAGCTCAATGTGTCGAAAGATCGAGGCAAGTATCCTCTGCTGCCGAAAAAGCT -ATGTGGTCCCCGGTTTTCCCAACATCGCAGAAGAGTAAGATATGCGTGGAACGGTCAAGG -GTTTACCCAAGGTCATAAACCATGAAAAATCCAGTGACAGATGAATATATTCAAGAAACT -CCCAGCATACGATAATAAGCTACGCTTGCATTACCCTTGAGATAGCACACGAGTCAAAAT -CATATTTTGTTCCTTTGTTCAACCCCAAATACTCATAATATTTCTTCTTTCCTTCCCGCT -CTACATTCATTACGTCGATAGAAACTAAACACAACGGTCTAAACGAACTTGGGATACGTG -AAGATATTCTGCGTGCAGCAATATATGTGTTCCAGCTGGTTTTCATGCTATGAAAATTGC -TAAAAGTGGGTTAAACGAGAATTGAGAGATGAGGTTTAAGTCTGACACTCACTAGATGAT -GGAATATGAGAGATTTGTGAATATAATTTTGCAATGCATTGATTTGAGTAAATGGTTTTT -GATTTTTCTTGTCCTTTCGCTGCCAAGGTACGAGGTTGGTGCTGTCCTCGATATTGCTGT -GTAGATGGTAGATCACGCTTTGCTTGATCTATTGTGAAGGAGACCCCCTCAGCCTCAAAC -ATCCCCGAGTTTTCTTGAGGTGGGAACAGGCTTATCAGGACATGTATATATGCTAATTCG -ATCTTGCATGAGAAATCAGAAGGAGGACGCTTTCGAACCAGTATGACATTGACGGTTTGG -GCGCCACTTAGAAACAATGAACATCACTACATGGGTCAATGGCTGGTCCTTTATATAAGT -AGCCCTTGCTGTGAGCCTTATGGAGTAGGCCAGAAACAATGACATTTCAGGAATTTCAAG -CTACAGAAATGACACGAATACATTGTTGCTATTTCCACCTGGGAGCCTATAGTGTGAAAA -TTTCAATCTTGAATCTGGTTCTAAGAGACAAAACCAATTTGCGAAAGAATAAGTTGGATT -AAGCTAGGTAGGCATGAGATCCAAGTTTTGATAAGCATTGTGGATACTCAGAGACATACT -ACACAGGGCAAGGTGCATGGGTGCTATGAAAGTCAACAAATGATCGGCCCTCATGCTGTA -GCAAAGGCTCGAATGGATTGACCAATTGGTTTGGCTTTTCTGAGAGAACCACCTCCTTCA -GATTTACAACAAGAGGAATTGAAGGCTTTTCGTGCGTGTCTATTCAGCATCAGTATTCGT -GTCGGCTATTTGAATGCACGCCCGAGTTCTCTCATGTCGTTGTTGATGCTCTAATCGAGG -AGAGTGCTGTTCATAGCGCTCCCATCGAATTATCTTCTTTGGGCCCAGCATGACTGAATC -TAAACTTCTGGGATGAAGACATCCTGTAAGCCATAGTTGCTGGATTAATGACCCAGAAAT -GCGTAGTGGGATGGTAAAACTACAACAGAGGGATAACCCACGAGAGAAGACTGCATTATC -CTCTTCTGATGGGTGATTGTTGTTCAATTGGACTTGAACGGACTAGTTGAACGGCAAATT -TTCCACTCACGGAATCAAAGAGTTCCTAGTCGATTATGGGGATAATCCGGGGTAGATTCA -CGGCGGTATCAATAACTTTGGTCCTGAATCGCGCGATGGCTACAACCTCGAATACGATGG -CTCAGAGTGCAACAGCGTGCGCTTGATGAGAACTCGCAGTATAAATCGAGTAGCAAAGGA -GCTTCAACGTTGGGAAAGCTATCTTATGGGAAAGGATTGTTCTAAAAAAGGAGGCAAAGC -TTGGTATCAAAGAGGTACTAGTAATGCATTCGGTGAATCTGTTACGAAATCCTACTGGAA -AGCTAGGATACGTTGATTAAAAAGGTAGCTGGAAAAAAAGCCCAATATCGAAGAGGCTCT -ATCCCCGACAGATATCTGCCCAACAAAGAACACAACCCAAGCCAAGCATATTGGACACGT -CACAATACACTACCACTGATTATATACAACCCAAAACACCAACATAGACCAGAAGAGGCT -ATTTAGTTTGCTCTACCAGCGGGGGCAGCACGAGGGGCATTTCCAGTAGGACCCGCGCGA -CGGCTCCTAATTCCGTTGTTAGACTGATTAAAAACGACAAGCACAAAGGAATTTCAACTT -ACATCATGGTGCGGCTGAAGAAACTAATACCAATCATGAAGAGCCAGCTGGGAACAGCAC -CGAGAGCCCAGATAACGTAGCCCATAGGGATGTTGAGCTGGTACTTGGCGACCCAGCCGT -CAACGGGGTGGAGAGCACCCTCATAGGGGGGGAACTTCTCGTAGGTCTTCTTAGCGTGGA -TCTCGTAGATGGTCGCGGGGGTAACGGTGGTGGAGTTCAGGTAGATGTTGTGGAGAGGAG -TTGGGGCACGCTCCTCGCCAACAGCCGGTGCCTTGGGGACCTGGTAGTACTGGACGGAGG -GAGCGCTGGCGGACCAGGCGGCGCACAGAACCTGGTTGGCTTCGCAGTCCAAGTATCCGA -GGTGAGGCGAGGTAGGGTCGGCGGAGAACAAGAGAACGGATTCCTGGGAATTACGAACAC -GATGTTAGGGATTTGGGACTTGGCGAAAAGACGCGTCGGGGGCAAAGCATCAACTCACCT -TGAATGCCTTATCAGCAGTTCCGCAGCGACCGAAGCAGGTCTTGTTGCCACCAGTGGTGT -AGATCAACCAGTCCTCCGCCTGGGGTCCCGGGGTAAGGATGGACTGCCAGTTGTCCAAGG -ACACGTGGGTGACGCTCTTTTCGGTAACCTTCTCGGCCAGCTTCTCGATCGGGGAAACCG -GGGCTGCTGTCGGCACGTAGGACTTGACCTGGTTGAACCACCCTTGCACGCGGTCAGCGA -GAGGGACCTGCTCCTGGGCCATGGCCAGGGCAGGGAGGAGGGTGACGACCGACGTGAAAC -GCATCTTGAAGGCTGAAGCGTAGAAAAAGTTAGTGAGACAGGTCTCGGGAATGATAAATC -AAGTTTGACAGATTCAACGAGATCAAGACAACTTACCAACTGAAAGGGATAAGGATTGAC -ACAGTCAGAGACAAGAGGAAGAATGGTGTTCAGAGGGAGTTGGGACAGCTGGCCTGAGCG -CTAACCAACCACAACGCTTTAATTTTACTCCCGCACCTCCTTTCTCTGCACCAATGGCGC -CATCCCCATGCAATTAGTTCTGAGAATGTTCTAGATTTATTTCAGATCCCTTTTTTTGAC -GATAATCTTTTGGTCTTACAATTAGATCTTACCATGTGGATTATGGCTTGCATGGAGCTT -CCGACCCCGCAAATCTAGATGTCAACAATTGAACCTTGATACTATCCATGTGCATCAAAA -GCTCCTGTTTTGGTAGTCGGATCTGTTGAAGACCTTACTCACCACCAAAATACGTTGGTA -ATTTCTTTTGTTCTACAGAAACATGTAAAATCAAAATTATATCGGAATCTATCAGTGGGA -GAGGGTCAAAGGCATGGTTCAATGCCGTCCTGCGATAATACAGTAAATTGCATTCGATAT -CAAACTGCAGTGGCAGTGCGGATCTTACATCCGTAGAGTGTTAAGACTCCTCGGTAGCCG -ATGGAGTCGCATATGTATTTAGTGTTAGCGCTGAAACAGTTGGGGCCATGCTATCTAGAG -CTATAAGATCAACACAAGGGCCGGTTGGTGATACCGAGAGGTTGATAGTAAGCAAACAGC -AAACACCAAAACATTTGATCTTCAATTGACCCATACAAAATAGAAAGATATCTATTCAAA -CTTGATCCTCAATTACATGATTATCGCTCGTGGTGATGTAATTAGCTTATTGTCATGAGA -TATACTGGTGGGTAAAGAATGCAAGTGTATGTTGTATGCACCTTCACCACATTCGAAATT -GAAGACCGAAACTTGGATGTTTAGTGCCGCAAGGAGTTAGTAAATTGTTGACCCTAAATT -TGTGTAGCTTGCTATGTGACTATAGGAGGGGTGCAAATATTGCATGGCCACTCATGCACT -GATAGACTAGTCTAGTCCTGCATGGATACATGGTGAGTCTATTCAGATGGCCTCGACACA -ATGCTTAGTGAGCGTGTCGTTGTAGAAGTGGCAACTTTCATGACAGGAAAAAGCCTATTA -TATACGAACTGCGAACTTTCCAAAGTCAAGTGCGTCTTATGATAAACATACCTGGCGTTC -TATATTAGGCGTTTTATCTGTTCAAACATAGGGTTGGGTTATTCTAATGTGGCGTGAGCC -TGCACTATAATTTGAGATCCCTGCAGATCTCCAGCCAACTAGAGGGCCTGAAATGATGAT -GACATCACTTCCCCGTTGACATGGGACCTGGTGTGGACAGGATCAAATGCTTTGAGATCT -TTAGTAGGAATATACATAAAGATATATACGGAATAGTAACATAGTTCTTACGGAACAGGT -CCGTATAATGTGTCCACGGTAGAAACGATCATTATCAATTGAAGTTGTGGATGCATGAGT -CAGACCCCACGGGTAAGGTCCGATGTCGGTTCCAGGGCATTGAAGTAAACTGCACACACA -TTGAATCAGGGATTTTGGGAAAAGAGTAATTGGAAGTGATAGGCATTTGAGTCATAACAC -AAATAATGCAATTAGTTCAGAATGTGAGAGTCTAAGATCCTATCATACCCGTAGACACCT -TCCCCCACCTCTCCACAATCAAAATTTTCGTTTTTTGCCTAGAGGGGGGCGTCCATTACT -ACACTCACCGCCCACTTACTTCGGCGTGTGAATTTATGACTCTTTCCGTCTCTTTTTTCC -AAGGTTTCTTACTACAAAGATCTTAAGTACCGCTCGATTTCCTTTGAGAATTTCGATCTT -TAATTACTACTCTTAACTTTTGGGCTTCGTATTTTCAATCCATCTCGACCCTACTTAACC -ATCTGCACCATGTGTACCGGTGCCGACTCCGAGCCCAATGGGCAACCCGCTATTGCTTCC -AGTATGTCAATTGACAACTTTTATACTAGAATTGCTCACTGACCTTTACATAGACGGTGA -CCACGCTGGTTTTGTTGGCATTGAAACTCGCCAGAACCCCCACCCCTCCGCATCCCGCAA -CCCTTACGGCCACGACGCTGGCGTCACCGATTTCCTTAGCAACGTCTCCCGCTTCAAGAT -TATCGAGTCTACTCTGCGTGAGGGAGAGCAGTTCGCCAACGCTTTCTTCGATACTGCCAA -GAAGATTGAGATTGCCAAGGCCCTCGATGACTTCGGTGTCGACTACGTGAGTTGGCGGAA -AGAGTTGATGAACATTGAAGACGAGGCTAACGATCATCACGCTATAGATCGAACTTACCA -GTCCCTGCGCTTCTGAGCAGTCCCGTGCGGATTGCGAGGCCATTTGCAAGCTTGGTCTGA -AGGCCAAGGTACGTTAGGGTTTTGCCCGGGTCTCGTGACTCTATCTAATAACGTCTCTAG -ATCCTCACTCACATTCGCTGTCACATGGATGATGCTCGTATTGCCGTTGAGACCGGTGTT -GACGGAGTGTGAGTCGTGCATCCTGTCCTCACCTTGTTGACTCCATACTGACTTTGTGTA -GTGATGTTGTTATCGGTACTTCCTCTTACCTCCGTGAGCACTCTCACGGTAAGGACATGA -CCTACATTAAGAACGCCGCCATTGAAGTTATCGAGTTCGTCAAGTCCAAGGGTATTGAGA -TCCGTTTCTCCAGTGAGGACTCTTTCCGTTCCGACCTCGTTGACCTTCTGTCAATCTACT -CCGCCGTGGACAAGGTTGGCGTGAACCGTGTCGGTATTGCCGACACCGTTGGCTGCGCTT -CCCCCCGCCAGGTTTACGAGCTTGTCCGTGTTCTGCGCGGTGTTGTGGGCTGTGACATTG -AGACCCACTTCCACAACGACACTGGTTGTGCCATTGCCAATGCCTTCTGTGCTCTCGAGG -CCGGTGCCACTCACATCGATACCTCCGTTCTTGGTATCGGCGAGCGTAACGGTATCACCC -CTCTCGGTGGTCTTATGGCCCGTATGATGGTCGCCGATCGCGACTACGTTAAGAGCAAGT -ACAAGCTCGAGAAGCTCAAGGAGATTGAGGACCTCGTCGCCGAGGCCGTTGAGGTCAACA -TTCCTTTCAACAACTACATCACCGGTTTCTGCGCCTTCACCCACAAGGCCGGTATCCACG -CCAAGGCTATCCTCAACAACCCCAGCACTTACGAGATTATCAACCCCGCCGACTTCGGCA -TGACCCGCTACGTCCACTTCGCCTCCCGTTTGACCGGCTGGAACGCCATCAAGTCGCGTG -CCCAGCAGCTCAAGCTTGAGATGACTGACGCTCAATACAAGGAGTGCACTGCCAAGATCA -AGGCCATGGCTGACATTCGTCCTATTGCCGTCGATGATGCTGACAGTATCATCCGTGCCT -ACCACCGTAACCTGAAGTCCGGCGAGAACAAGCCCCTTCTTGACCTGACCGCTGAGGAGC -AAGCCGCATTCGCCGCCAAGGAGAAGGAGCTCCTTGAGGCTCAGGCTGCTGGTCTTGTGG -TATAGGCGATTTCCTTTTGCCGATACGTACATTGTATAATGCTATGTCACAGTGGCTTGT -GCAAAACCCAGGCGTGGCGGAATACCTGATGATTTGATGCTTTTTCATTTGTTTTGACAT -TTTTTTCCATCTAAGGGGTTGAAAAGTCtttctatttttttttttttctttttcttttGG -GAGAGCCAAAAACACATTTGTGTTAGGAGAAGAGAGTTCTCCTTTATTTGTACATTTAGC -GCAGAAGAGGCAGTAACTTATTCTTAACTTTAGACTGGCATTCATATTGTAGTTGAAGTA -CTAATTATAGCTTTGCCTAATTACGTCAGTGGAGAAAGACGAGAACATAGTATATGGAGG -GTAAATGCAGGGTGAATTTGTAAAATACGTATGTATTGTGAAACTATTTTAATGCTATCA -ATGCTGAATCTCTATATCCCAGATGTCTGATTGCGGAAAGTTGACCAGGGATCTGCTTCA -TGTGGCAGCCGAGGCCCGCTAAGACTCAACGGGTCAAAACGTACTTCAATGAAACCTTCC -CCATCGCCACTCCCACCGAGCATTACTGCAAAACTATTCATAGGAATACTAATGAGCTCT -TCGATGCTGCCAGTCGCCCTACAGAACAAGCTGGTCGGCTACAGCCGTGTGTCCAGCGCG -CACCTATCTGCCCTGAACCTTCCAGACCTGTAAGTTAATTTTCCGTTCCCCGCAGAAGGC -CAGAACAAAAGACGAGAGCCAACCATTGACCCATTTTGATTTTCAATGCAGTGTTCGCAA -CGTCGTATTTATTCTGTTCATCCTTCGGTACATGCGCAAGACCTTCTACTCCATTCGTGG -CTATGGCATCCTCGGCAGCATTCGCAACATCTACATCTCGCTCCGTCTGTTCTGCTACGG -CATTTTCCTGCGTGTCCCTGGTGTTCGGGGCCAGGTTGATAAACAAGTGAATACTGCTAT -CACGAAACTCGAGTCGAAGCTGGTCAATTCCGGACCAGATGTTACACGGTATCTTGCGCT -CCCTAAGGAGGGTTGGTCGCCGGAGCAGGTCCGCGCTGAACTTGATAAGTTGGCTGGGCT -TGAACATACCCGGTGGGAAGATGGTCGCGTTAGCGGCGCCGTTTATCATGGAGGTGCAGA -GCTGTTGAAACTACAGGCTGAGGCCTTTGGACAGTTTGGTGTCGCGAACCCGATTCATCC -GGATGTTTTCCCTGGCGTTCGGAAGATGGAGGCTGAGGTGGTTGCTATGGTATGCATGAT -CATCCCGTTATTGGAGTATGGCCTTTGAAGGATGCTCTCTGACGTCTGCTCTAGGTTCTC -GCTCTATTCAACGCCCCTTCGGATGGTGCTGGTGTGACCACCAGCGGTGGCACAGAGTCT -ATTCTGATGGCTTGCTTGGCTGCACGGCAAAAGGCTTTCTTGGAGCGCGGTGTCACTGAG -CCGGAAATGTAAGTTGATATACATATACTATAGTATTCAGCAGTAGGATTGACCAATCAT -TGCAGGATTATCCCCGATACGGCACATGCGGCATTCATCAAGGCTTGCAATTACTTCAAG -ATCAAGCTGCACCGAGTGCCTTGCCCGGAGCCTGAGTTCAAGGTCGACGCGCACGCTGTT -CGCCGTTTAATTAATCCCAATACCGTCCTTCTTGTCGGATCAGCACCTAACTTCCCCCAC -GGCATTGTTGATGATATCCCCGCTCTATCGCGTCTAGCTACCAAGTACAAGATCCCTCTC -CATATTGACTGCTGCTTGGGATCTTTCGTCGTGGCACACCTCAAGAAGGCTGGCTTCCCC -TCTCCCTACGAGGAAGAGGGTGGTTTTGACTTCCGCCAACCTGGTGTGACCAGCATTAGT -GTCGACACCCACAAGTACGGGTTCGCGCCCAAGGGTAACTCGGTCTTGATCTACCGCAAC -AAATCATACCGAAACAACCAGTACTTCATCTACCCAGATTGGTCCGGTGGTGTTTATGCC -TCTCCCTCCGTGGCGGGCTCCCGCCCTGGTGCCCTAATTGCCGGTTGTTGGGCTAGTTTG -ATGAGCGTTGGCGAGGCTGGATACATTAATAGCTGCACCGAGATTATCAACGCGGCACGG -AAGTTCGAGACCGCTGTTCGGACCGATGCGACTATCTCGCTACATATGGAAGTCATTGGA -AACCCCATCGTCAGTGTGGTCGCCTTCCGCAGTAAAAACGGTGCCATTGACATTTATGAT -ATTGCCGATGATCTGTCCGGTAAAGGCTGGCACCTCAACGCGCTGCAATCTCCCCCGGCT -ATTCACTGTGCTTTCACTATTCCCACCGCCAAGGCTGTCGACCAGCTCATTGTTGATCTG -ACTGAAGTCATCGGCAAAGAGCTTGAGAAGGCCGAGCAGCGCAAGCGTGAGGGTAAATCG -TATATCCTCAATCGTGGTGATACCTCTGCTCTGTACGGTGTGGCTGGAAGCATTCCCGAT -AAAAGTGTCGTCAGTCGTCTGGCAGAAGGGTTTTTGGACACTTTGTACAAGGCATGATCC -TTGGAATGAACATGTCAGTTTTCCTTGCTTGTATCTTCTTTACTAGGCCTGTCCGGACTC -CCAAGGCATAGTCTTGGTGTCTTCTAGCTGTCCTTACCTAAGCTGCTGGTGCAGTGGCGT -TATTATTATCCAGGCCCTTTCAAATTGCACATATTTGTTTCAAGTGTTGGATTCCACTGG -ACAGTAATAATGCATATAATACAATAGTTTTCCGGAAAGACATTTGCCCGAAAGATGTGG -ACGTAGAGTACATGACCAAAAGAGAAAAAGAAGAAAAAAAGAAACCCGCTCCATGCAAGC -AAAGCCAAAACAGAAAAAAGAGGACGCAATCGACCAATCGATCAATCGTCCGACCGAGTG -AATGGCATTTCAAAAGACTCAGATCCCATCTCAGTCTTCAAGACTGGCCATAAAAGATCT -TGTCTCAAAAGAGTCCTATTCCACGAGAAGCTCAAGGAGAACAATCACTGACCACGGAGC -TCCCCCCGCTGCTCAGTGCAGCTCTTTTCGGTATAATACCCGGGATCAGGCTTGCGCTCG -AGATGCACCTCCTGCTTCGTGAGGCGGTACTGGCACTCCTCAACCTGGACAAGTTCCTTC -ACTTTCATGATTTGACCCGCGACGGAGGGCGAGACGCTGTGGAAGACTGTTGCCATACGC -TTCTTCAAGCCGAGCGCCTTGAGGACATCTGTTGAACGGCGCGGGAGTCCAATTGCGGAG -CGGACGAGGGTGATTCGGAAGTAGGACATGTTTAATTGTGGAGATCAAGGAGATCTGATA -TTCGGATGTTTGTTCTTTTGATGGTGTGGTTTGGATATTGTCCGGAGTCGTGAAATTGAA -CTCGGAATGAATGGAACGGAATGTGGTCGATTTAGGAGGATACCGGGGTATCAATTGATG -AGAGCTTGCAGCCGGTACCTGTCGTGCAATTGATCCTATAATCTGGCGGACAAACGTCGC -GGATTTGACGATGATCGCGATCCCGCAAATTCAATTAGAAAGCGGTATGTCACGTGTATA -TTCTGCCATGATCCGATGACAGCAGCGACGACAATCTATCGGCCTGTGATAGCTGATATA -GTATTTCACAATTTATAAATTGGGACTATGGGCTGCGTTCTTTCCGGCTTGACAGTACCT -GGATTTATATCATAGTTCTCTGCTACGTACTATCCCTCTGCAAGCAACGTCACATGCCCG -CATTGTTGACACCTTCACCCCTCGCCCTAGCTTCGAGCGATTGAGCCGAAACAACGACCC -TACAGCATACCTGATACCGTACTGCGATCATGGCGCAGGCCGGTAACATTGATACTACGG -TCTCGCTACCGCGACTCGAAGACCTCCTACGGCACCCGGAAGATCTAGATAAAATCAGTG -GTCTCAAAGCGGAATACCTGCGGAAGAAGACGGCAGTTGATTCACGATTGCGTGAAGGTC -TGCGAGATCAGTTGGAGGCTGTACAACGCAGCATCGGTGCTCTTACTGAGGGCCAGCGAC -AAGTCTCGAAAACAAAAGACGAGCTTCAAGGCATCGACAAGCTATGCGCCGAGTCGCAGG -ATAGTGTTGAGGACTTCGCGCAGATTGACCAACTTGCGCGGATCCAACGGCATTTTGATG -CCACACTTATGATGAAACGAGGGTTGGAGAACTTCGGTGCGGATATCCAGGAAGTGGAGG -ATTTGCTTAAAGAGGATGATGACGATATGGAGAATCAGCCTAATATTCTACGGGCTCATA -TGCAGATCTCGCGGTTAAGAGATTTCCGTGATGAGGCTTTGGATCAGATTCGCAAGGCGG -AGGATCAGAGCGCCGAAGAGACTCTGACAGAGTACTTTGAGGGATTGGATTCTGTGATTG -AGTGGTTTGATGATCACCTTGGCACGGCTTGCATGAACCTCATTCCCTTAGTGCAGGCTG -ATAACAAAAGCATGGTCGTGCGGCTTGCTGTTGTTGTCTTGAACGAAGAGAAAAACGATG -ATACAGTCCGAGCGTTGCAAGAGGCGCAGAAAGACCACAAGGACCTCGCAAGCCGTTTCA -AATCTATGAACATTGGTCCTAAGACTGTGCGGGGCTACAAAGACAAATTCATTCAGGCTA -TAGAGCTTTATGCCCAGGGCCAGTTTGAAGAGACCCTTGAAAGCTTCCTTGCAGATCCGG -AAAATCTGGAAAAAAAATTCCGCTGGTACTTCAATGATCTATTCACTGTCAAGCAAGGCA -TGCAGAGCCTTGTCCCAAAGAAGTGGAAGATATTCAAAACATACACCGATATCTATCACC -ACATGATGCATGACTTCTTGCTCAGCATGATCGACGATCCCGAGCTTCCAGCCGATAATC -TGCTCAATATCATCCACTGGAGCGACAAGTACTACAAGAAAATGAAGAAGCTGGGATGGA -CATCAACAGATCTCCAACCCAACATTCTCGACGACCGTGAGCCAGAACTAGTCCGAAAGT -GGCAGAACGTCATCATCAACGCCGTCGAAGAATGGATGGACCGCATTTTCAACGCAGACA -GGAAGTCTCTCGTGGAACGTGCCGCAGATGCTCTAGATAACAACGCTGAAGGCCACTTCC -GCACGAAGACACTGGCTGACATGTGGCGCATGCTCCACGAACAAGTCATGGCATCGGGGG -CATCAGACCGCGCAGACTTGGTAGAGGGCGTCATTGATGCCATGTTCCGGGCCTTGAAGA -ACCGACAGAATGCATGGCAGACACTCTTGGACGAAGAATGCGCGAAGCACCAAGCAGAAG -GCGCCGACCAAGAAGGCGTGCAGTTGCTGCAAGACTGGCTGATCGGAATAGCCAACGACC -AGATCTCTTGCATCGACGACAATGAAGAGGGCAACCAGTTTGGCTACCTCACGCGTTTCA -AGGCCGATTTCGAGCCTATGGTCACACCCAAGTACATGGGCACCACGGCGACTATCGAGC -TGGATGCGCTGCGTGATGGCTACGTGGACCTGAGCACGCACTGCCTGGCGCAGTTCGTGA -GTCTTGTCTTCCAAGTGGATCTAGACACCGTCCTGCCAGACTTCTACACGTCCCGCTGGT -ACGGCGAATTTGCCATGAAGCGCATCACCTCCACCTTTGAAGACTACATGGCGGACTACA -ACTCCGTGCTGCACCCATCCCTGGCCGAGATCCTGGTTGAGGAGTTCTCGGACGAGCTGT -TGGTGCGGTACCTGTCTGCGGTGCGCAACAAGGGTGTTAAATTCCGTCGCCATGTCGACC -CCTTCACAGACAAGTTCAAGGACGATGTCCTCACCGTCTTTGCGTTCTTTGAGAGATACC -CCGACTCATTCGAGGGTACTATCAAGCAGAAGTGGCGGCTGGTTGATTGGCTTGTTCGTC -TCCTTGCCACTGAAAAGGGTCCCGCGCTTGTGGAGGTTTACGAAGCATTCAAACTTGAGT -ACTGGGATCTACAGCTTTCGTGGGTGGAGGCGGTGCTGCGCACCCGTGATGATTTCGAGC -GCAGCATGCTTACCGCCATCAAGGCGAAGGCTGCGGAGTTGTCTGTTGAGCGGGGACAGG -AGACGATCATGAGTCGGTTGAGATGATTTCGTGTACTTAACCGTGTGGCATTCTTTCAAC -CGCACTCATGTTCCTGCTTCTTCTCGGCTCCACATACCATTCACTACGCTCGTCCTCTTC -ACCACACTTTTACAGGGCTTTGGTTGATGTACTATCCATCTGGCTTTGAATTTTAGGCCA -AAATCCCCCAGGCGTGCCTCAAGCAGAGATGTTAGACCATCTCTTCGTTGCTCAGCTGAC -CCCGAATCGCCTTCTACCGCAACGCACATCCCGATGAGCAACCCTACCCACATGTATCAT -CCGCACCCAAGCAGGCTTGGGATCTAGTCTACTCATTAATATCACTAGAAATACAGCAAT -GGAAGCGAATCTCTCAAACAGCTTAGCTTTCTCATAAATATGACCAAAAAAGTTGCCCGG -TCTACGTAGATTATCCCCAACTGATCATCTTCTACTGGATGGAACGGCGTTATGCGGAGC -CTTTGTGTTCGAGAAGGGGCGCGATGCCGGGCTTGGATTGAGGCCCCGTTCTGCAATGAC -CGTATATTCCGCACCGAGACGTCCATTCAGACCATGACGGTCGTCGACACCAGGATGTAA -TTTCCTGGCGCCCATTTCACAGATGCAGATAGTATCCAGAGGAATTTCCTTTGCCCAAAT -GAAAGGATATTCACGCGATGGTGGGGCAGAAGGGGCCGAAATGCTGCTAGGCATGGACAA -AAGAGTGTTTCTGTCGGGGATTGGTGAAGATTTTGCGCGGCGGGGTTCATTTTTTATTTC -TGCGGAAGACCCAGCAGGTGCTCGGTGTGGAATAGTCCGCTTGTCATCAGAGTAATAATT -ACGATATTGTGAGATCAGATTGCGAGCATCAAATGTAAGGCGTTTGGATGTGTGGTTGTC -CTTGGCATCTTTGCTCTGGCCCTGCCGTCCACGAACGTAGATGGTATTGACAATAGTCGC -GTGCAGAAGGAGTGGTCGCGGTTTGGGTTTCTGTAAAGACCCATCACTAGAAACAGTGTC -CGGCTCAAAGGCCCGAGGAGCGTCTTGCACAAAGCGAATGGTTGGGTTTGGTCTTCCTGC -TGGCCTCCTGTCGGGCTCACTTTGGATGAAGCCAGCCTCGACGAATTTGTCACGTAGCTT -TACGCAAAATGGAAGCAAGCGACCTGTAGGATCTACCGGAGAAGCATGTAGAATGGTCGC -CGACTTGCTTTGCGGCAGGGCATGCAATGACTCGAGAGAAACCGTCAAGGGCGATGATTG -GCGAAGGGCGACTTTCTGTTGTGTGTGAGCGGCGACTTGTTCCGCCTCGTTCATTATGTC -TACCAGGTCGAGGGATCGGAAAAAAGCAAGGGCTTGATCGAGACGCTCTTTATTATTGAG -ACTCATCACTCCAAGAGTAAGGTGGAGTGTGCCCAGTGGCCGAAAGGCTGTGCTGGGAAG -ACCCAGGCGAGAGGTGTCCCCATGGGTGTCCCCATGGCCTTTGGAGGGTGATTGGGAAGC -TGGCCCAGGTTCAGCGAGATGAGCCGCTTTGAAGGCCGTGAGGGATTTTTCCAGCTGTGG -CAGCGATTTGGTATTTATCAGGGGGAGACAGAGAAAATGGGTCAATTGCGGCTGCTTTGG -CCGTTTTTGTGGATGTGGTTTGCGGGGCTGATGCCTTGATAAGCTTGCGGTGTCTGTCAT -GATGCACCTGTATCTCCACGAATGTGACAGCATGGAATTCAACAAGCCCTCTGCATGAGT -CAGTCGCCAAAATTTCCACTTGACAGTTGCTGACAGGAAGAAGTCAAGAAGCTTTCAAGC -CTGGAAGTCAGGGGACGCACGGACCAGGACCCACAACCTTGTATCAACTACATGGGACTT -CATGGATGTTTTTTGGGGCATCGCTGAGTTATAGGCCACGATAATATTGGTTATTCAATG -CTAATGTTATCTTCAAAGTGACACTCCATTTCAGCACGTCGGTTTTCCCAAATACTCGAT -CCAAACTAGCGGCGAAACCAAAAGCACCAAATTTACACATTCCAGGAGCGTACCACTCCA -GCGAGATCCCGAGGCATCCAACAAAGGACCTGCAACTTACAATAGATGACTCCTATGCAT -TAAGTCAGCCTGAGTGATTCCGAAATTTGACTTGAGTCTGATAGGATAGTATTCATGAAA -AACCCAGACTGCGGCAGATATGCCTATAACAGCCTCAAGCGTGATCTGGGAGGCATGGAC -TTTGTGGGCCTCCCCTTTAGAAGAATAAAACCATACATCTGTTCCACATCGTCGTTTTGA -TACCAAGCCGTATGAGTCCATACACGGCCTGATGCTCGAAAAGCCCCATACCTGGTTTCG -ATCTTGGGGGCTTCTTTTACTGATTCATTTCCAGATGTGGCGTGATGAGACAAGACTCGC -AAACATGTCCGCTCAAATAAAGGCCAATAGTTCGAACGAATAATCCGAACGAACCCCTAC -AAAGCATCAGATCTGTCATCACTGTCATGAACTATCTAAATGATGATCGAGTACGTCCTC -ATATGGTTGGAACTTTGAATGAAGTATATTGGGAGTATGGCCTAGTAGACTAGTAGTGGG -TCGCCGATGGAATGAGGCAGTATATGTCCAAGATTGGTGGGATCAATGGTTCCGAAACCG -GCTTCAGCGTCTTGGAGGAACAAATACCAAGTGGGTCACGGGCCAGATAGCTGACATTAG -ACGACCATAGGCAGTAAGGACGGGATCTAATGTCACACCTACTATCGAGGCTCTAGATAA -GTTGAACGTGGATGCGTTGGATATGAAGATTGACACGAATGATCTGGACTGAAGCTGAGT -CTGAGTAGGTAACCATGTGAATTGATAGATACATGTATATAGTTCATACAAGATGTCTTT -TGAATTTTGCTTGAAGGGCAATGAGCTTTGATGACAAAACAATTCAGGCTGTTATAAAAG -ACTTAGAGGAACGACGTTAATTTTTTAGACATTGTGTCAGCCGTATCTTGGGTCGTTGAT -TGCCCAGATGGTTCAATGCAATAAACAATTTCGGTCGCAGAACCGCCAATATCCAGCTAT -TGAATCTTGGGTCTAGTTACTGTAGCATTAAAGTCCCACATGACATGACAGAATTACAAA -ACATCAGTACCTTTGGGTCTCATCGAATAATTGTCCTTCATGTAGACTATGTCCTGCATA -TCCAACCACACCTCGAACAACGAGGGCAGGGAACACTTCATCCACCCTGGAAATGACTCA -TATGGATCTCCCTCCAGCGTCTCAACGAGTCTCACATAATAACGTCCCTTCTCTTCGTGT -TTCAAGTCCTCTGGAAGGGGAGCGTTTTGTAATATTTGAAAAATTTACTCGTCAATGACA -ATAAACATGCGATAGCCAGGAAATATATCGAGCACATCAAGCTCAGCTGCCTCGGCCCTG -AAATGCTCGCGGAGCTGGTCTATCGATGCTCCGTCGAACTGGGAAGAATCTTCAATAACC -CGTGGCTGGTAATTGGGCCAGATTTCATCCCAGGGAGTGATTTTGAACTCATTTGCCCTG -GGGTTGTACTGACGGGAAGATTCGTAACAACTGTAGAATCCGTCTTTCATGTAATTCGCG -GTGAGGTCAACCATACGCGGGAAACCGGCATTGGATTGGGGAGTGTAGGTCGTCCGGTAG -ATGGTGTAGCCCCAGGTGGGCTCGGAGTAAGAATAGGACCTGGTAGCCAGTGGCATTGCT -TCATCGGCTTTCGAAGTAGCGACATGATGGGTCGACTTTGGATGTTGCGGTGGTTTGGAA -GGGAGTTGTGGATGGAGAACATTGGAACCTTAGAAGGACCGCCCTATAGTCCCCTTCAGG -GCTGTGAGCAAGTTAATATCAAATATTCTGTCCCCCCTACAATACCCTACTTGTGCTGGT -TCCTGGTCAATCCAGTGTCTCCAGCATAAATTACCGTCAGTTCCTTGGCATCATTCCCAC -ATCGTCTTGGCATGTTCGCATTCGAATTAGGGCATGGACGGAGTACGGAATACGGAGTAG -AGAGTAGAGAGTCATTGTTACTATCTCTCCATGTGGCACTTTCAAGGTTGTATCAATCGA -GAGTCTCGTCAGTATCGGGAAGTCATGAATAATAGGCAATCTCCAAATCTCCAGTTGGCC -TTGTTGCTACATGGTTCATTGCCGAGAGCCGCTCCCCCCTTTGCCGGCTTCTATGACTCA -AACTCGAGTCTCTGGTATGTCAATTTGAAAAGATGGAAGCTCAGGTCAATTTGCCCCATG -ATATACCTTGAAAATAACCATGAGATGTGGCCCTACTGATCACCCAGCCCATCCCAAGGA -AAACCTGACAAGCGACTGAGATACGCTACACCCTCCAGCCAACCTCGAAAGCTCACTTCG -TTGCTCTTGAAGATAATCATTTTGATAAGTGATCCCAGCGAAATGGGACCGGTCTCCAGA -AATTGACGGTGGGACCCATCCTGTGAATCAAGTAGAGACAACATGGAATATTCACCACTA -GGCCACTGTAGCTCAGCAGCGAATCCATATGCACAAAGCGTGAAAGGATTGAAGTCATTT -TATGGCTGCACCTAGAGTACCATGGGAGTCTCAAGTGTTGTGGGAAGAGAGTCAAACATG -ATTAGGGGAAAATGGAGCTGGTCAGGGTTAGACTATTACAAAACCTTGAGGTTGCAAAAA -TAGGTTCTCACTTGATCAAATCCGATGAATGCCATGTTGAGCAGAAAGCAGGGTCGAGTG -GAAAGCAAATTTGAGCTAGTGGGCATGTTACGTCTGGTTGACGGTGAACATGGACCTAGA -AAACCAATAACCGTCAGCAATGCTGGTGAGATCCCGTCGCAGAACGCTATATAAATATCT -ATGGAGGCTGGTGCCGGTGCATTCAACCTCCTTTACAGGTGTATCCGCTAGGAGACCTCA -CCTGCATTTGCAAAGAGCAGATTGGAATTTGATCTCTGTTTTTGAGGTTGACATGAACAT -CGATGACGATGGAGGCGGTGGTGGCTCCATGGCCTGTATATCCCGTTTGCGGGATGCGGT -AATATTGTATCGCGCAAGTCTTTGTTTAATGCCACACAACGCAAAAGAGAAAGCAAACCA -TACACGACAAAATATCAAAGTAAAGGGAGCAATAGTGGAAGGAACAATAATGGAAAGGAC -TAACCAGCTAACATGGTCCATTCGGCATGACCAACTGGGGGTCACCAAGAGGAATCTGAT -GACCAAGAGGAATATGATGCAAACGCATCATAGAGACACAGAAAAAGAAAGAGAGGTGGG -AAGACGATGGTCATGAAGCCATCAATGATGAACAAAAGTCCAAATCCCCAAAGGGGGGAT -GAGGAATTGATGGAATGTGAATGAAAGGATGGAAGATAGATGTAGGAGAAAAGAATAACA -AAAGAGGAGAGAAAAGGACAGGGAGTATTTATATGTTGAGACCGGAGACTCAAGCTGGGG -GTTCTTTCCGTATCAGGAGAACATACATGACCTCTCTACCCAACGTCATCTCTATCGAGG -AGTCTGGGTATTCAGATGATCAAAGACCTATATCGTCAAAGGCAAAGGCCGAATAAGCCG -CTGTCTACTAGCAGATGAATATGTGGAGCATATAAATATGAAGTGTCACGTAGTCTTCAT -TACATGGCGAGAGGGATATCCAGCAAAGATCGGAAAAGGCAAGTACTACAACAGTTTGTT -AGCGGTGGAGGTGACTAAATAGCCTACGCGGTATGATGGATTTGAATAGACAAGAAGAGA -ACGCATAGCACAGCAGGGTATTAATTTGCTCCAAATGCCGACTTCCTAGTAGAAAATACA -AATAGGCACAGGAGCCACGACTTGGCATCTGTGGCAATGACACTCATGAAGCCTCCTTCT -TGCGGCCCTCCACAACACGCATAATATCCGAGACGCTATCTTTGTCACCCTTCCTTTTTT -TGGGGCCACGCTTTCTCTTTTTCCCGCCTCGTTCGTCGTCGTCGAAGTCAAGCAGGGGCC -CATCGTCCTCCTCCTCTCCGTCTTCGTTACGACTTCCTCCGAATCCCATGTCCATATCAG -CGGCATCCATGGCCTCTTGGCGGCGTGCCTCTTCAATGAAGCGCTTGTGACGAAGAGCTG -TGTCAGAAGACTGGTCTGCTTCACCTTCTGCTGAAGGAGAGCTTTGCCGGAGGGCTGCAG -CTCGTTTGAGGGCAGCAAGAATGGTTGGATCTTTGGTGAGAGGGTTGGATCGGTCAACAC -GCTCCTCTTCTGCTGTAGTCGAGGTCGCAAAGTAATTCTTGGGTTTCGCAGGTCCTGGTT -CAGTGGCAGGCTTCCTCACCTCTTCGCTGACAGTGGCTTTTTCAGGTTTGTGAGTGGCCA -AATCTGCCACTTCGCCATCTTCATCAGACGATGATTCGTCTTCGAGACCAGCGAGAGGAT -TGTAATCATCACCGACACCGGCAAAGATATCTTCGTCCTCATCTTCGGGTGCCATTTGTG -CTGCAGCACGGGCAGCAATTTCAGCTGGAACTTCCATACCGAGGGGTGTGAGACTTTGAT -CAGGCATCATAAGATCTCCTACAGCAGGCACAGGCGCTTCAACCGGCTTATCAAGCCATC -TAGTCTTTCTCTTCGTATTTCCTTGCGCATCGGTGATGAGAAGCACTTCGCGTCGGCGGC -CAGTCTCGTCACGCTCCACAAATCGCTTCTTCTCGGGCTTTCCATCGCCCAGCTTCTTGA -ATTTTGAGCCAAGTATGGAGTCCGGCGGCGGAGGAGGCGCAGCAGCAGCAGCCCGGCTTG -CCTTTAATTCCCGCAAGATTTCATCGCGAGTCTTGGGCCTAGCAGGCGGTGGTGCCATAG -TCCCCTTCTTCTTTTCTTTCTGTTCCTTGGGCGCAGCAGGAAGCTCCTCGAGTCCCTTCC -CTTCCATCAGACGCTCGAATTCCTCATCTACGTCTTCTGTTTCAGGGGGCGGCGCAGGTT -TTTCAGCCGCTTGCGATACGTCCTCGCCAGCTTTAATTCTTCTTAACAAATCCATATCCA -GTCCTTTGACCATGTGTGTGCTTTCAATGTTACCTCCGACTCCGAGTTCTCGGCGCAGTT -TGTCAAAGGTAGCCTGGTCGATCTGTCCCAACTTCACCATATCCTCGAGCGCCTTAATGC -GCGACTCCAAGTCCGTGCTGCTCTCATCCTCTAAATTGCGCAATTTTGCACGATCCTGGT -AGCCGGTCGGTAGCGAGGTGCCCTTCGGCGCGGCGGAGGACTTGAATCGCTTGGAAGGTC -GATTGCCCTCATGACGTTGCTCAAAAAGCTGGCGGGCGAAGTCCACACCAGTGACTGAAC -GTCTGAAACAGCTGTTAGCCTCCTTCGGAATCATGGCAATCTGGAAATCAAGACGACGTA -CGGGGTCATGGGAATACTGGACCGCATTCGCGATCCCAGGGAGCCGCCCGGGGTTGCGCC -GCCGCGCGACACATCGCGGCCAGGCGTTGGGCTCTTGTCTGTAGGTTTTGACGATGACTG -GTCGGCTATTAGCCGGCGAAACTGCTCGTTATTCATGATCCGCGAGCTCAAGTTACTTGA -GACTGAAGTTACGAAAGGCTCATGTCAGAGGTGAAGATCGGGCTTGGCATTGGTAAGATA -AGACTTATCGGCGAAGGGGTGGCCAGCGCCGAATGCCTGAAAGTTTAGGGGTTGGTGCCT -GAACTACTTGGAATCTTGGAGGACTCCCAAGGCATGTAATCTCGCTCAAAATGCCTGTCA -TTAATCTCTCTAGCAATTCCAGCATTTTCATATTATGATAGGCCAAGAAACAAAACGCGA -ATGCCGATTACCCAAAAAAAAAAAAAAAAGGAGGAATATACCGTGTCAAAATAAAGTCCT -GTAATAGCCTCGAGAGAACCAATAATAATAGTACATTCACAAATCCAACCCAAATTTGAG -TCGAACGTAGCCTGCTATCTGTGAATCATAACTATTATGTGAGCCGATGGATAAAAAACA -TGTCCGATGTACAGTGAAAAAGAGATGAAAGTAGTATAAATGCATAGTAACGGAAAGAAA -CGAAAAAAGTGGCTAAACACACAATCGCGATTTGGTACATTTCGCCATCAAAAATTCAAA -CGGTAAGAAGTCTAATTTCATGCACATCAACAATCGCAAGGGGGTCGTTTTGATGTTCAT -TAAATAGTCTCCCAGCCTCCGTTGTTGATCTCTGACTCCAACTTGCGGCTGATAGTCGCA -GGGATCGAATGTGACAGCCAGACATCGCGCAGTAGCCGGTCGTACCGTTTTTGATTGAGG -ATAAGCTGGCGGTTGCGACGGAGACCAGGGTCGACTTCACCGTGGCGGTCGAGGTAGGGC -GCATAATGCCAGGATCCATTCGTGTTGTGCAGGTAAAGAACCGTGCACTTGCGAATATTG -ATGAAAAGGCCTATGTTTTTTCCGCACCTATTATGGAAGATTGATTAGCTGGAGAGTACT -TGCGAACGAAGTGCAGGGGTATAATCACTTTTGCAGATGTTGGTTACAGCCGCCCAGCTT -GTTTTTCATGCAGCAGACTGCCTGTGAGCAGAATATTTCTCCGCAGAATAAACAAATGCT -GGGATCAGTGAGCTCTTTCTTGGAATTGGGGCAGCGACGACGGTTTGCCTCTTCGATCAA -GCTATCGAAGTACTTGGGCAGGCCAACAAGCTCGAAGATAGCAGGGTGTGACAAACTTGG -CCAGAGACGATGGTCTTCGAAGCGGATGCCTGAGCGCGAGGCATTCCAGTGGAAGATCCA -CCCTGAGATGACTGCGTCGATGGGACTGTTTGTCTTGCGTGCGGGCTTAACAGACATGAG -AATCTCGTCAATGGAAGGTAGATGAAGTGCTTTAGTCAGGCGCTCGAGCTCAGAGGACTC -CACATCGGCAAATCCTGTGTTGGGGAACTCGACGCCGTGCTGAACATGAAGAAGGATAAC -TGCCTTGCGCAGGAATCCTAGTCCATAGCTCGTAACAAGTCGCCGAATAGCAGCGATGAT -GCTGGGAGTAGCAGTCTCCTCTCCGTCTTTGACATAGCCACTCTCAGCGGGGAAGGAGGA -GCCCTCGGCACGACCGACAGAGTTTGCCTTGAGTTCAGCAACGATCGAGTCGAAGAACTG -CTTCGTCACTCCAAATTGTTCTTCGGATAAATCGGCAGCCATAATATGGTCCTCCGGGTG -GGTGGCAAGTTCCTCCTTCAATCCCAGTGGCCAAAGAATGAAGGTGACAGCGACCTTCAC -AATCTCAGCAACAAAGCACATCCGGACAAGATGCCTGATATCAATATGAAGGACTGGAAG -CAGTGACAGCGATGCTTCTGCGAGGAACACAAAGGTATCCATGGCGAGCAGAGGCTCAAT -GTTGCGTACATCATTGAGCAGAGCTGTGCCACTCAAACAAGGGTGCCCGACAAATAGTTG -GCAGATCTTTTGACGATGCATCTCCTGGAACTCATTTGTTGGACGGCTTTTTGATGGTCC -CGTTGAGTGAAGGGACCCAACGGCAGCATATGTCAGCACCGTCTCTGATAGAACGCGCAG -ATGAGTGAGCGTCAACGAAGGTATTGTGTCGAGCAAAGTCGAGCCAGGCTCTGATTCAAC -ACCGCGCTGCGCGATCTCCACTGCAGCAATGCTGAAACCAAAGCTGCGAATCAAAGAATC -AGTATGTACAAGATCTTCATTGCTATTCGCATCAGAAGTGTTGTTGAACATCGAATTGAC -ATGGTTCATCTCAAGTGTCTTCTTGAGTCGCCTGTAGATCTGCAGCAGCTCCGACATAGG -GCTTTCCGGGGGACTCGGCACCTGCTGGAGTGGACTGGAAGGCGATAAGTCTTCGGTGGG -AGCGAAAACACCTGGCATTGGCATGCGCGCTGGCGTGATGTATTGTGAGGGGGCAGTTGG -TGGGAAAGACGAGACCCTCAGTTGATCAATGGTATTGGAAAGTGGAGGGACTAGTGCTTT -AGACAAGTAGTCAATAAATATGTCGTTGTATGGCTGGGGATAAGCCTTCTCCTTCTCCAT -GAGGACTGGGAAGTTTTGCTGTTGGTTCAAATCTTGCTTGAGGCCTGACTGAATCCACTG -GTCGAATGATTCAGAGGCGTTGAGTGGAACAAGGGGAGACTCTTCTTTGCCCTTCCAAGT -GATGGGGAGGAAGGCGTTTCCGAGAGCTTTGCAGAGAGGACAGACAAATTCCATGAGATC -AGTGTTCTCGGGATGGTTACGCGCAATCTGCTGGGTGTGTCGTCGCTGAGTCGCCGTATA -GTACTGCTCAAAACAAGAGTAATGCATTATGTGCCCACAGCCAGTAGTCACAGGGCCACG -AATAGTACTCTTTGCGTTGAATCCTTTGCTCAGTCCAATCTTTTCACTGACGACTTCTCC -CCCAGAAGAGTCAAGACGTTTGATCGTAGTATGATTTTCCCCGGATACACCGAATGGTCG -CAGTTCTTCAGCGGACCTGTCCAGACTTGTGGGTGTTTTGAGCACCTCACGAATTCTATC -AGAGTCCCTGATATCGGTCTGTCGCAGGATTGTGCTGTCTTGAACCAAGGCAAATGTACC -ATAGAGTCTCGAGTCATTGGTCTCTTCCTGACACAAGATACATGTTCCGCTAGGATACTT -CCATAGCTTCTTCTCTGGGGTGGCGTCTGTATCAGACTCGAAATCGCTGAAATCCTCTTC -GCCCCAATCGATGTTACCTTGGCTGTTCAAGAATTGTTGCTGCTGCTGCTGGAACTGAGC -CATAACCTTGGCTTGTCTCTCCAGCGCTTGCTTCTTCTTAACCTCTTTGTCGTTATCTGC -ATCAATTGCAGGAGAGTTCGTGTCAATTCGGTCGAAGGGGAATTTGAGAGACGCAGTTGC -CGATGAATACGTGCGAGGGCGTTTTTGCCAAAGACGTTTGAGTATATGTCTAATTTTAGG -GCCAGATGCGGTGAATTCAGGCGTGACGGAAATTCTCTCCAATAAACCGACGATGGTCAA -ATGGCCAGCCTTGATCTCTCGGGCCTTCGACAAAGCATTCCAGTTGAATGAATTGGAATG -CTCTTCCGCCATATCAGTCTCCTCAGAATCATCTTCCAACACTGCTGCAAGGATAAGATG -CAAAATCACTTGAAGGAAAGTCTCAACACGTGTCGGTGGGATGGTTGGTGTACACTCTTT -CGATGAGATAACATATTCCAAGCATTGGTGAACAATCTGCGCAAACAGCATTGTTCCGGC -GAATCGGGGCAGATCTGAGAACACGCCGAAGCTGATAGGTCGAAGTTTCGGCTCAAAAAC -AATGTCAGAGGCATTTTTGCCAGTCTTTTTCGCCATCCATTCCTTATAAATATTCTCTGC -CTCGTCTCTTTGGTTCTTTGTGTAATGTGCGCTGTACGGATCAATAAGATCAAGATATTC -GGGCTTCAGTTCGAAAGTACCAGAGTCACTCAAGCCCTCGGGTGGGCGGAAACTAGCGAC -TTCTTCCAACACATCTTGGAACATGTCCGAGTCTAGAAGCTTGTCGCTAAGGCGAGTCGA -GAGGTCGGAGAAAGACAGCGGTTTGAAGCACAAGACATGAGCGATGTCACGTTTGATGTT -TTCGTGGGTGAGGTGAGTGCTGTCATCGATGGCAGTCAAAGAGGTTCGGTCCGTCAGGAG -GATCACCAGTAGATGAACAAATTCCTCTGCCACGTCCACAATCTGGGTGTCTTCGTATCC -AGAACGGGGCATGTAGTTCCGAGCCATCCATTCAACCATACCAAAGCGGTGTGCAATGGA -AGCAAGAACTCTACTAGGATCACACGTAACTAGGGCAGTCTGGAGAAGGAAGATATCTCT -GTAGTACGCAAAGTCACGAGAAGAGACTCCACGGTATTGCGACATTTGATGCCGAAGACT -AAGGCCATTGCGCACCCACATGCCAGCCTTCATTTGGGCCAACCAAGCGCATACTCGAAG -TGGATAATCAAACATGGTCAGAAGCAAGTCCTCCGGCCCAAGAGTAAGCAATACATCATC -AACTTTGCTAGGATCGGCAGTCTTGGCGGCGTGAATCTCACTTCTTGCCGCATTGGCAGC -ATCAAAGAGAACATCGCGCATAGTCTCGCTGCTTATGTTTCGGCCGCATTCAATGAGCCA -TGACAATGTGTAGTGAAGGGCGTGGTGGAAACTGATAGATCCTCGCTCCACCACGAATTC -AACGACGCGATGGCAAGGCACTTGGCCAGTGGCGCCGACCTCGAAATCGAAAGGTGGCAG -TAGTTTGAATCTCACTTCATCCTTGATTTCTGCCTGGTCGAATCGCTTGCGTTCCAGGCC -GGCTGAATGGATGATCGCAGTAGTGGTGGTGGTCTCGATTGCCCTCAGGAGGTTGTCACC -GTTATCAATTTCAGGGTCACGGAAAGACTCGCAGAATTGGCGGCAAAGTCGGTTGATTTC -TCGCATGAGAATCGATGCACTAATCCAAGCGTCCGTTTCGTATTCAACGTGTTCACCAAC -AGCACGGACATTAGGGCAAATGCCCTGTGAAAGCTTGACGAGGTCAAGGAACTGAGGAAG -GTATTGTTCCTCAGTCCGCACACGATGCTGGACATATTCCGATTGTAGAAGATATCGGAG -GTCAAGGAAGAAATGATAGAGACGTCTGTTTGTAACTGAACCAGCATCAAATGCCAACGT -GGCATTGGGGTTGACGGATTGTGGTTCACCGACTTGTCTTGTGGTCAAGAAGGTATAGAG -AATAGCCATCACCTTCGTGAGGAAGTTGCCTCGCACGACAATCTCTTCAGTGATAGAGGG -GGTCGTGAGAAGCTGTAATGAAAGGTTGACAATCGAGTGGTCCGGCTCTCGATCCGCAAT -GAGGTAGAGTTGAGCCAGAGCGGTATACAACGCCGACAAACGAAGACCCATGATGCGCTT -GAACTGAGGAACATTGACAACGGTGCTGATGTACAGATCACGTAAGTCGGTTCGTGTCTT -CTTCCACAACCTCAAGTCGTATAGCACGAGCCAGTCTAGGCGAGTACGCTGCCACAGATC -TTCATACGGAGGTACTTTTTCACCTGGAGCGGGAAGGGACGGCCGGACTTGCCAGTGACT -GTCTGTGTTTGAAGGCCTTTGCCGGTGCAAGCCTGGGGTTTTGGGGATGGGGATATAGCC -GGTAGGGGCAGATTCAGGGCCTGGACTTTGAGACTCGGATGGGCCTCGTTCGGCAGAGGA -CCGATCTGCCGCTTGAGCCTGTGCGGGTCGAGGAGGTAGCGGCTGGATGCCAGTAAGTCC -AGCGAGAATGCGCTCTGCTAAAGCAAGTTCCTCGTCGGCACCCGCCAAAATCATATCTTC -GTCTTCTTCCTGATCACGAGTGAGTTCGATGTCCATTtcatcgtcatcttcatcatccgc -gtcgtgggcctcgacatagtcttcgtcttcctcttcgtctataacatcgtcgtcaaggat -atcgtcgtcgtcatcgtcatcgtcAGTGGTTAGGAGGACTTGGCCATGGTTAGGTAAATT -AATCATGAGCGTGCGACCTGGAAGGCTCTCGGTCCGTTGATGATCATCGATGCCCTTCAT -GCCGATATCTTCGTTATATCCACCACTGCCCTTTTTCCAGGGCGAAAGCATTTCCTCACA -AATGATTTGTCTAAAAATCTCATTGTCCTCAAGGATTTCACAGCCAGCAATATCGGCCAA -CCACTCAACTATCGTTGCGCACATTTGCTCGCGGAACGTATCGCGTGCAGAGCGCACCGT -GACGGTGACCTTGATTTCCTCAATTATCTTAGAAACGCCCAACAGTCTGTTCAGGTCCTT -AGAATACTTTATCACACTGCGGCCCATATCGTTCGTTTCATGAGCTCGCGCAACACCAAA -GCTATCTCGCTCGCGACAAGCACGTGTGACCTGATGAGAGACATCGCGAATTGTATGCTT -CTCGTCATTCCACAGCAGGACTGCAAATTCAGGTTCTTCCTCCTCTTCATCACCAGCACC -ATACCAGTTTGGGCGCAACCGAGATGCCTCCTCATCTTTTTTAATGCCCTCAATGGTCTT -TGGCAGCCGCAATTGCTCTGGGGAGCAAGAGATGACATCACAGAAATAGTCGAGCACACG -GGATATAGTCAAGCGGATCGAGCCCACGAGCTCAGAAGGCAGCACCCGTTGTGCCCGTTC -CTTGCCCTTGGTATTGCCACTATCGGTATGGATCGCGCAGAACAGCGGAAGGCGCCATGC -TTCGTCATCACCACAATCGCAGCAACCACAGTTTCCGGAGGAAAGCGAAATCTGATATTG -ATGTCCGGTATGATCTGAGGAATCGAAGCATCGACTACACAGCACACAGGTATCGTCCGC -GGCACATGTCACGCATCGGTATGTGGCTTCCCCAGCACGGAAGATATGACCGCAGCGCTT -GCCCCTCGCTGCCTCACTGTACTCGGCTCCTTCCCGGGCCCCCTGGGCCTCCGCCAGTTT -CCATGGTCCAACGGGAGTTCCTTCTGGAAATAGCTGGTGTACTAAATCATTGTCGTGTCC -GACAAGCGATCGGAAGAGAAGCTCTAGCAGGTCCTTCTCGGCAGCGGGCCCATAGCGGAA -GTTGTGTTTCTTGGGTAAACGGAGAAGACCATCACCTAGTAGCTTCTCACACTCCCCAAG -CATGAGCATGATTTGTCGTTATGGCCGAGTTATCGTAGCGACCACTCGGGCTATCACAAG -TGAGTGATGGAACGGAACGCCGTTGTCGCGACGATGCGCTCAAGGTAGTAATGGTCCGAC -ACAGGTAGTTTCTTCACGGGTATGCAGGCGGGAAAGAAATTTTCATGTGAATATTCAAGA -GATGAACCGGATCGAGTAGGGTTCTCAGGTGTAGTTCACGTTCACACGCTGATACTTTGT -TGGTGGGGGGGGAAGAAGAGCGACGGGTGAGGGTGGACCAATGCAGGCGAGCAGATCAGC -ACAGGTACGGTGAAGTCATACGCGATTCAAATTCAAAATGTGGAGCTCAAGTGATCCTGA -AGAGTAAAATACAAAAAAAGTAGAAGGAATCAAATGAAGAGTGTTTGTTGAGATCGGAAA -GTAACATTTTTCCTTATGACATCGATGGAACCGGCCTGTCTCAGTAAACGCCACATCCAG -GCTCACGAGCCTGACCACCTACAACACGCCACTAAACATTTGAAGTTTTAAAGAGATTGA -AAGTCAAAATACAGATTTCCATCTATCACGTACAAGCCCACATATGGGATGAGAAATAAC -AGGCACTAGCTTCGGTTGGCTTTCCCGTGCCCCTTTACCCGCGGCGGTAAAATATACCCA -ACATCACTTTCGGCAAGTCCATTTACTCGTCATCCTCCTCCTTAACCATATAACGGTGCT -TGATGTCCTTCCATTGGCGCTGTAAGGTGAGAAAAATAGTCAGCGTGGGTCCTAACCTCG -GGTATTGTAGTCCATCGGGAAAGAGGTTCACATACGCCCTGGTTCCAGCTGTCCCAGACC -TTGTTGGAGGTGATATCGAAGGCACTATTCGGAGATCGGGTAGTCAGTATTCCGTATAAG -GTGTACAGCATGTCTTAACATGTTCCGAGACTGGATGTAACCCACAGCTCAAAAGCGAAG -GCGCCGGCAAAAGCAGTGGTCAGGAGAACAGTGTTCTTGCGCATGAAAGTCCTAAACACA -GCGAAGAGTGCTTCGGTTAGATTTCAATTGGACAGTTTCAATCGCGATCATAGGACCTAG -GAAGGGGAGACATGCGAGTAGATAACGTTTGCGAGCTGTCATGCGAGAGGTCAGTCAGTA -TTGGACATAGCAGTGCAAATGGGAGAACTGAAAAGAGGGATGAAGACATACGCCGGCCAT -TGTGAGAGATTGAGGTGTTTGCACACGACCTAAATTAAAGGATTGGACAGGAGGATGGGG -CAACGAGAGTTGAAGTTGTGAGGTTGAAGTTGCGAAGGCTGAATCTGCGGGAATTGCCGG -TCCGCTGATTGGCCGACCCTCCGGCCTTCCGCACTCAACTTTCAATGTTCGTGTTGTTTT -TCTCCTTCAATCTTTAAAGATATTGATCTTTACCTTGGAGCTTGACTCTGTCAAGTTGCT -ATCAGATATTCTGTATTTTACTAGGCCCATCTCTCAAATTACAGCTTACACCACCCACCA -CACCATGTCCTCCAACGTCGCCAGGTCCGGCATCACCACCGATGCCCGAACCGGCGAACG -ATACATTCCCTCTTCAGTCCGTGCCGACGGCTCTAGGCGCAAAGAGATCCGTGTGCGCCC -TGGCTACAAGCCCCCCGAGGATGTCGAACTCTACAAGAACCGGGCAGCGGCAGCATGGAA -GACCCGCGCAAAAGGCGGGGTGCCGGGTGCCGAAACATTGAGCAGCGAAGACGACAAAAC -CAAGACCACAACGATACCCACAACTACCACCGCTACTCCCGCTACTCCCGCCAGTAACAA -AAACGCCAAACGCCGCGAGGCCAAGCGAAATTCCAAGGAAACCGACGAAGCCGGTCCCAC -CACCGAGGGCAAGGGCGCGGAGTCAAACAATTGGCGTGTCCCTGCACCCACACCTAAGAA -GCAAGAAAAGCCTAGCGAGGAGCCTGTCGATCTCGAAGCGGAGAAGGAGAAGAAAGCCCG -CAACCTGAAGAAGAAGCTCCGCCAGGCGCGGGATCTTCGCGATAAGAAGCAGCAGGGAGA -GGCGCTGTTGCCCGAACAGTTGGAGAAAGTCATTAAGATACAGGAACTTGTCCGGCAGCT -GGATGTTTTGGGCTTTGATTCCAATGGGGACAAGAAAAATGGTGACAGTAATGAGAATGC -TTGAATGCCTTTTTGAACCTTCATACGCGGGGTCAGATTAAATTCAATATCCCATATCCA -TGTGTCGAGTATATGCCATCATGCATAGCAATGTCATCAGGACTTCCTGCAAGCACACTC -CGACCTTGTTTGGGAGCCACCAGACTATCCCACATCGTCGTGACAAGTGATGGGGAGTGG -GGTGGGAGCGTTATTACCTATATGGCAGGGCACAATGCAACCAGTCGGTTCCAGCCTTAC -AATCGCCCATCGCTTGCCTATGAGGAAGTTTGGATGCTGTCACATCTCTCGGCCATTCCC -TCAATCGGGAATCTAGGTGCTCTCGCCCTTGACCCTGATGGAATCAAAACTGAAGATGAC -ATCAGACTTCTCAAATGGAAGTTCAACCTTTTGGTGGACATACCACGGGCTGGGTGACAG -CTAAGTTAGACATCAGAAATTGAACATTTGATATCCGGCCAATGCCGTCGATTTCCAACT -TTCAACAAGAAACAGGCTATCATTACAACGATACATCTATTATACATTTTGACGTTCTCT -TGTATACATTCCCAAGTACAAGTAGGCAAACCACACGGACCAGCCTCATAACATCAGACT -TTGGAGTCAACCAGGCGAACAGGAGGCATCACAATGCCACCCCTTAGTATCCAGCGTTCA -TGATCAAAGGAAGTCCACGGACGCCACGGACTAGGGTCACCATTAAGCCGGTTTCTTCCA -ATGCCATATGTGGCAGATAGGATACCCTGTTTGACACAGTCGCCCACACCTACGCCATTA -TACCAGTTTCCTGCGAGAGTAAGCCCGCCGCTATACTCTTTGCGAACTGTGTTTGAGAGC -CTATACATGCGGTCCAGATGGCCGACCGTGTACTGGGGAATGGCATCCTTTTGGAGACGA -CTCCGTGCGACAGTCGGGGTATCAGTAATATTCATGTGTCTCTTGAGCATGTCGCGGGCC -ATTTTCACTGCGGTATCGTGGTCTGGATATTCATCGAAGCCATCCCAATAATGTCCACCT -AACATGACGGTGAGTTTTGTTCCAGGTGCGGAATCTTGCTTGACTTCAGATGACTCAGGG -TCCGTTCCGTTCCCTACACTAGAAGACGAGGCGAAAATGACACCAAGACCACATTCTGGG -TTCTGCTCGTAGGGTATGGAGCGCGGTATGAGGTATCCAAAGCCATCCTCCACCGGCAGA -AGGTTGGGGTTTGGGTAATAGAGATTGATGACCATTGCTGTAACAGCGTAGTTCTGTTCG -CGTAGTAGGGATGGGGTCAAGCCGACCGGCTTGACCTTTGTATCACTTGTTTTGCGCATG -ACGTTTGCAAGAGCAACTGGAGGGATAGTGGAAATGACATATTTATAATGAGACGTCTTG -ATTTGACCATGCGTTGGTGACCAATGATCCATCTATTGACGGTCAGCATCACTCCTAGGG -CTCATAGAGCTCGGGGATATAGAATGGCGGTGCAGAGGAATTCACATACCGCAATGAAAG -AAGAAAGGTCGTCGATTGCACTCAAGGCTTTGATCTCCGTGTTCATTCGGAAGCTGACCT -TGCCAGATGCTTCCAATGAAGCGATCAGTGCTTCAATAAGCTGGCAAACTCCCCTCTTAA -AGGTGAACGTACTAGCTCCAGCTGCAAGGACTTCTAGATCCTGTTGACGGCGCACTAATT -CAGGGCCTGCAGTAATGGCGTCTATGGCCATAAAATCATCCATATTTCGAGTCTTTGTTT -TGGAGATTGATCTGGAAATAAGCGACGCCGTAACTCCGCCAGAAACAAACCCACCAATGC -CCCCAATGCCACCCTCGAGGTTGCGGATGCTACCCAACAGCGTCTGGGCACTCAATCGAT -CAATATCACCAGCATAAACTCCATGGTACACAGCTGAGACTAAATTATCTGCAACATTGG -GGCCAAAACGACGTCCGATGAAGTCCGCGACAGACTCATCTTTCGCCCATTCAGTTGGGT -GCCGGGATGGGGCGAAGATATCTTTTACTATCCCCGAAATGAATTTGTTAAATAGCGGTT -CCTTCATAGTATTCACGAATCTGTCGAAGTTCTCCCCAAAAGAAAGTTCAGGCTTCGGCG -CCGGCAAGCGAACAAGACGGTCCGGGTAGTAGATGTAGCGATTTTGGGCCGCGGGGCTTG -TCTTCGGGGTGACAATGAGGTCTTTGTACAAGCCTAAATTTGTAGCCTTTGCAGGAGGTC -AGCATTTCAAGGCAAACCAATCTCTGAGTATGTACACGGGAGCAATACTCACCAGGTAAA -GCAGAGGGAGGGATCCTGGGAGCGAGCTCCTTAACGTCCGCGGCCCGTACTCGAACACGA -TATTGCCACCATCGACAGGGATTGTTTCGGAGTCAATCCAGCCACCGAGCCGGTCCGACT -TTTCGAATACATTAATTCTTTCGCACGTAGGGTCTTGCGCGAGTTGCCAGGCGGCTGTGA -GGCCGGTGATACCACCGCCAAGCACGGCAGCTGTGTAGGTGCGTTTTTGGTCATTGAGAA -GATTCAATTGCCTTTGCCTTGGCCTTAATGCTCGAGAGACATGTGGTAGCCGCATCTCTA -CTGATTAAAGCAAGGTTTGATTCAAGGTCGGTCGGAGTATTCAAGCGAAAGGTGAAGGTC -CTCGGAAGCTTCCGTTCTTACTTAAGCTACGCGGGGTGGGCAGGTGGAGCCCTGGAGCTT -AGATCCTCCCCTTACAACCCTACAAGCTTGCACATGGTTCATGAGACATTCGCGAATTGG -TTCTCGCGTTCGTATTATTCTTTGGAAGATCAACGGGGGAATCTATGAACAGGACAGATA -ATAGTATCTGAAACAAAAGCAAATGCATAAAGAGTATCAACGCCCTTCCCTCCAGCCATT -CCTCGTTCATTGGTCAAGATCGGATTGTACATATGGTATTCTTCGCTTCGAAAACAGACT -GTTGTGGCATGTTTATTTTTCTGGGTTTTGGTTTTTGAACAGTGGTTTTTCTTCATCGGA -TCATGTCAGGCGGAGTCACGCGGTCACCTCAGGGTTCTGGGTATTCTCCTCGGTATTATC -CGGTTTGATCAACTCTTGGGGACCATCCACATCCGTAACAAATTTGCGCTCGCTGACATG -CTCTCCAGAGCGTGGGGTCCATACTTTGTCGGTGTAAGCACCGCGTTGACCCTCGGCGTG -CTTGTAGGTGCATTTGGGGTTCAAGCAGGGATTGAACTTGCAAGCAGTCTCGATATGTGT -GAACTTGCATCCTTCGGTCTTGCAGTCTGCTCCATTGCGGCAGACCGGCATACTTGGGTG -CTTGAAGTGACAGGTGGGGTTGGTACAATGCGGGAAGAACCGACACATGGCCTCTGCCTG -GTGGGCGGATCGCACAGCCGGCGAAGGATGACGTCCAGTGCATTTCTTGTTCTTGCAAGC -TGCACCGAATGAGCAAGTATCAGAAGGATCGATTGATGTACCTTCAGGGGCGGCAGGTGA -TTGATGGGCGAACGGGCAATCCTGGCGCGAGCATCGTTGATTGAAGCGACAAACACTGTC -GGGGTTGCTTTCATCCTGTTCACCACCCGCTGTGCCGTCCATATCAACATCTGCTCCTTC -TGCGGGTCTTGACTTGTTGTAACCGCCACCGCGCTTTCCTCCACGCTGGCCACCTCGCTC -GACACGACTGAAGAGCGAACGACCCTGAGGACCATTCTGGGAACCAGATTGGAAAGCGGG -GTTAACAGCGGGAGGCATGAAGCCAGGCATGATTTGCGACATCAACCGGGCCTGTTCTTC -CAACATCGACATAAGTTGCATCTGGCTTTCGGGGGTCAGCTGCACCATGGGGTTGCCTGG -CATTCCCATACCACGGCCACCACGGCCATGTTGCTGTCCGACACCTCTCATTTGGCCGCG -GCCATTGTGGCCAATACGCCCGTTTCCTTGTCCGCGCACACGGTGTAGAGCTGCATCCGA -ACCACGGTCCAGGTTTCGATTGATTTGGTTCAACATTCTTCCCTTGCCGTTTCTACCGTT -ACGCATCGCTTTCGGTCCGGTGGGGCTATCGAGTAGAAATGTCAGCATGTTGTAGGCATT -AGTAGAGAGGCATGGTAGAAGAAACCTACATTGACTCGTTTCCGGCCTCGCTCATCTCGG -CATCCTGGCCGTTGGCCCCACCAAACGGATTGGCCGTCGTTTGAGCTGAGTTGTCAAATT -CCATCGCAGAAGGGTTCAACTGTCCCATGGGAGCATTGGTCGAGGCTGGAGCAGCTCCAT -TTATTTGTTGATTGAGCATCTCCACTTGGTTGAAAAGCCATTGCGAAAACTCGAGAGCCG -CAGTGTCGCCTTCCTGAAGGCCGAGGAAATCATTTGAGAGTTCGGTGGCGATTTGCTCTT -CGGTCTTGCCGTTGACCAGCATGAGAACGATGTATTCAATCAATGGCGACTCTTGGCCCT -CACTCCAGCCCATCTCCACCAATTTCGGTTGGATGACATTTGCGACTGCTTGCGCAAGAG -GAGTCTCCAAAGTAATTTGAGGAAGCATGGCGAAGGCGATTATGAAGTCAGGCAAAAGTG -ATGGGTTGGTGAGAAATTCACAAAGAGAGCAGTGCTGATAAAAGAACTTGCGCCACCCGA -TAAAGGTGTTTCAGATCTTATCGGTGCAACTAACAATGAAGACGCGATGAGAACGAGATC -TGAGGAAGGTTGAAATAGAAAGAGAGGTTGATAGGCCAGTAACAGTTTCTTGGAGGAAAG -AGCAAGACTTTAGCAAGCAGATTGAAGTGAAGCTAATGACCCGATGACCGAAACGGACGT -TCGATCACGTGGCTGTGGCTGTCCGGCGGTGAGGTGGGGCAGTACATACAGTGCAAACAA -CCGCATGTACTGTCGAAGTTGATTTGGGGATATCAATGAAGAGCCCTTGCTCCTATTGAA -TTATCTCTTGATGGCGAAATGGGACGGATAAAAAGGACGAGCTGTTGAATAGAATAAGGG -GTAGAGGTTTGACTACTAGAGTTCATGTTTATATATGATTCACAATGCTATTGTTCTTCT -GTTCAGCTATTGAGCTGTGTTATTGTGAATGGAGTAATCGGAGTGTATGGGAGTTCTGCA -CACAAAGAGAAGTATCCAGATGAAGAACTGACTGGTCAAGTGACATCCGACAAGGGAATA -GTGATTATCTCAAGGCTACTGTGGCCTAAGGCACAAGAATAAAATTTCCTGATACATCAG -GCTCTAAGGACACTTGCATGTGCATAATAATATGCAGTGCAGATATGTTTATGTGGCTGC -AGGCCGTTCATACGATCTTTCCTCTCGCGATCATATGCCATCAACCCTGTCTGCCGAGGT -GTCATTTTGCCCATCTTCCCCAGATCGTCGGTTATCCCCACAAATATACATAATTTGTAG -AGCGTGAAACCGAGACCACAACTGCACCTTGGAGATATGTGCCTAGCCCCATATTACATC -GATAGATTATCCAAACTTCACAGCGGTATGGATGAGCAAACCATGAAGTCACATAATGCG -GGGTTCGCAAGCACCGCTGCTTTGGGCCACCTACTCCGTACGCCTGTTAGGGATGGGGAA -CACACATCATTGGATATCTGAAGAACTGGGGTACTTGTGACCCCAGATTGATATCCACTA -TAACCCGGATGGCCCGTGTTGTTGGACAGGTTCTTCATATACTAGCATTCCCTACTATAT -GTTAACATATCAATTTCTTTTTTTCTCATCTGAACGTGCTTCCTCAAGGCCGGAAGACAA -ACCAAATCCAACTTGGCGGCACGATGATTGAAAACCTTTTCGTACGGATGACCCTCAGTC -GGGCCTCACTGTGGGCTCTAGGGCTATTCGTTGCATTCTGCGTCTTCCGCAAATTTCAAG -CTTCAGCACAAATCGCTCGGCTCGGGGTCAGAGCTCCTAAAATCAAGTTTTATCTTCCAT -ATGCTCTCGACTTCATCTTCCAAGGCTACAAAGCCAATCAAGTGAACCGCGACTTAGAGT -TCTGGGGGGAACGAATGGGGCAAGCAGGCGGCCTGACAAATGTGAAGACAGCCGAACTCG -ATGCTGGCATTTCCACCCGCATGATCCTTACCAAGGATCCAGAAAACATTAAGGCAGTCC -TCACAGGCCAATTCGCCGACTACGGCAAAGGTGAGTCCTTCCACCAGGAGTGGAAGGAGT -TCCTAGGCGACAGCATCTTTGCCACAGACGGGGAACTATGGTCACGGTCCCGCCAGCTCA -TCCGTCCCATGTTCGTGCGTGACCGCATAGTTGACACAGAGATCTTTGAGAAGCATGTTC -AAAAGCTCATTCCGCTTCTCGGCGGCAGCGACTCACCCAGCAGTAGCAAGATAGTGGACG -TCGGGTCACTCTTTTTCCGGTACACTCTCGATGCAGCAACAGACTACCTACTCGGCAAGG -GTACCGACAGTCTAGATAACCCGGCAACACGGTTCGCCGAAGCCTTCAGATATGTGCAAC -AGCGCCAAGCGGAATTTTTCCGTTTCGGGTGAGTGATGATACTCGATCCTGTGTGACGCG -AGCACAGCCTGTCATTGGAGCTTCGTGGAAGCTAATCTAAAATCCATCCCTAAAACAGAA -TCTTTAGCCGTGCCATGTCACGCACCGAGTTTCGCAAGAACCTCGGGATAATGGACGAGT -TTATTCAGCCATATATCCAAACCGTCCTCGCCCGCTCAGCCAGTGAACCTAACGAGAAGC -TCTCAAAACGTGAGACTTTCCTCGACGCGCTAGCTAGCTTTACCCGCGATCCCCGCGTGC -TCCGTGATCAACTCGTTGCCGTTCTCCTCGCAGGCCGCGATACTACAGCCGCCACACTGT -CCTTCTGCCTCTTCGAGCTATCGCGCAACCCGGAGGTCGTTGCCAAGCTACGCGACGAGA -TCCGTGACAGACTCGGCGTTGGCATCAACGCCCAGAAACCTAGCTACACTGATCTTAAGG -AAATGAAATACCTCAATGCAGTGCTACACGAGACGATGCGTGTATACCCAGTTGTGCCGT -TTAACGTGCGCAACTCTCTACACGATACGACTCTCCCCCGCGGCGGCGGACCGGACGGAC -GCTCACCAGTGGGTGTGCGCGCAAACTCGCGGGTTATCTACTCAACGATGTTAATGCAAC -GAGATCCTGATTTATATGACGGTCCCGAATCGGAGAACTACTTCGATCCTGGAAAATGGA -TTCCTGAGCGGTGGGTTTCTGGCTGGCAGCCTAAGCCATGGCATTTTGTTCCGTTTAATG -GTGGCCCGCGGATTTGTATTGGCCAGCAGTTTGCTACTATTGAAATGGGGTACACAGTAA -TTCGGATCTTACAGACTTATGAGCGGATTATTGCGCTACCGGTTGGAGGCAAAGATAAAG -TTGAAGACCCAGTACTTCGGTTTGAAGTTACTCTCAGTCCGGGCTCTGAGTTGAACTGCG -TTTTCCTCAAGCAGGGCGAAGAGGCTGCGCGCAGTGCCACTCAGTCGAGTTCCGCTTGAG -CTGTTTGGTTGATTCTTCGGACCATAGTATAAGATATTTGCGACACGAGTTTAATGCCCT -GCCTTTCGCGTTTGGGTCAAGGTGGATTCTATCTTGTTCTTCCATTTTTAGTAGTCAATA -TGAAGATCCCAAAGGAAAGCCCATACAGGGCTCCACTTCTAATTGCAAACCCTAACATAA -CTACATAACTTGTGCAGAGGAGGAGTGCCTTCAATAACAGGTTGCTATCAATCCTGCAAA -GACCGTTCACTGTCACATGTTCCCGCGACGGCGACACAATCCTGATCATTTTGCAACACA -ATGAAAGGCATCTAAGCATCTTCAACATGGCCGAAACGACTCTGAAGACAGAGATTGTTC -GAAGCTCTTAACACATATCCGGGGAAATTGACAAGTCTGCCACCCTCGGGCCATTCCATT -TTCCCCAAGCAGCCCTCGCTCCCTTTGCAATCCAGCGGGCACTAAGGGGTATCTTCCAAT -CCGAGTGGCTGGTAACAGCGATATATCGAGCAATCTTTAGGGAGGCACATATTTAGGGAA -ACACTTCCAAGTTGAACATCCTCACAAGGTGTTTGGAGACATTGCATTATCTGATAAGTC -CGCCATATTAGACACAACTCTGAATCCAATCAGAATACAATCAATAGCAAGTTGACTGTT -TCCGTAGCTTCCTAAGCCTTTGCAAGATCTCGACTTTCCGCAACTCCAGGAACAAATCCT -CTTCGTCTGCATCATCCAAATCACCCAATACACTCCCCAGTGCTGTCTGCACGGGTGGAG -GCCGGACTTGGATCTCCTTGACCGAGACCCGAGCTCCCCCCTTCGCAGCTGTGTTGTAGG -CAGAGCTCTTGACTACCTCGGAGATTAAGAAATGATTCGAGGTTCGTGTTGCCTTGGGTG -GAGGGAGATCATCGTTCTTCACTGGTACACATGTCTCGTTGGTCAGGCCAACAGCTTCTT -CTTGCTCGGTGCTCTTCGATCCCACCCAGCCTCTACTCATTTGCACGGCCTTTTCCCATT -GTCCGAATTGGGTATCGCTCTCTTGTCGCGAGACCTGGGGCTCGAAGACGGCCCCACCGG -AACGATTGATGTCGCGTAGTTCGGCGAAGTTGCGCCAAACGCCAACAGCAAGGCCGGCTG -CAATTGCTGCACCGAGAGCTGTAGTCTCTCGCATCTTGGGGCGGTAGACAGGAATGGATA -TCAGATCTGCTTGGGTCTATATTCGAGATTAGAATAATTCCTTTACACTCATCGATGTAG -AGGAAGGCACAATATCTTACCTGCATCGCCAAGTCTGAGTTACTCATACCCCCATCTACT -GCCAGTTCGGAGAGTGTATGGCCGCTGTCTTTCTCCATTGCGTCTAGAATTGCTTTGGTC -TGGAAGCAGGTTGCCTCTAGGGTAGCGCGAGCGATATGGCCCTTTTGGGTGTACTGGGTG -ATACCGACTGCGTACACTGTCAGATAAAATATACGAATAGACAGGCAATGAGAGACTCAC -AGATTGTTCCTTTCGCATCATCAATCCAATAAGGCGCAAAAAGTCCACTAAAGGCAGTGA -CAAACACACATCCACCATTGTTGTCCACCGAGTAGGCAAGGTCATTGACCTCCTTAGACT -CCTGAAAGAAATTCAAGTTGTTCTGCAAGAATTTGATCCCAGATCCAGCAACAGCGATGC -TCCCCTCAAGCGCATAGATAGGCTTTCCACTAAAGTGGTAGGCAACCGTAGCCAGCAATC -CATGTGTCGACAACACCGGCTTGTCGCCAACATTGTACAAAAGAAAGCACCCAGTGCCAT -AAGTATTCTTGGCCATTCCAGGCGAGAACCCTTTCTGCCCAACCAAGGCCGCGGACTGGT -CTCCCAGACAGCCCATAATTGGAATCCGTGCCAAGGCACCAGAAGCAACGACTCCATACG -CTGTTGCATCAGACGAGGGCACAATCCTAGGCAGGTGAATCTTGCCTCTAATCCCAAAGA -AGTCCAGAAGCGTCTCATCATACTGCAACGTCTCCAGGTTCACAAACATGGTCCGCGAAG -CGTTTGTTGAATCCGAGACAAAGACGTTGGTAGCCACGCCACCGTTCAAACGGTAAACTA -GCCAAGCATCGACAGTACCAAACGCCAGTGTGCCACGGTCAAATGCATCCTTAACTCTTG -GAACATGCGCTAACATCCAGAGTAGCTTTGTAGCGGACGAGTAGGTTGACAACGGCAAAC -CGCAGAGCGTCTGGAGCTGCGCTGCCCCTGCTCTCTGTTTCAGTTCCGCAACAATCGCCT -GGGACCGTGTGTCCGTCCATACAATGGCATTGTATAAGGGTTCGCCGGTCTCGTGGTCCC -AGACTATGGTTGTTTCGCGCTGGTTTGTTATTCCGATGCTCTTGATGCTGCCTCGAGTAT -GGCCTTGCGCTTCAAACTTCTGGACAGCTTCTTCGATGCAGGTCTCTACTGAGGCTACAA -TCTCGAGGGGGTCGTGTTCGTGCCAGCTAGGGGTGGTTAACATCAGATCCAAGGTGGGCT -TTTGGTCTAGCTATTGGATCGGACTGGGACTTACCCCGGATTTGGGTAGATTTGACTGAA -CTCGACTTGATGGGATGCTACTGGCTCACCCTCGCGGTTAAAAATTAAGAATCTCGTGCT -CGTCGTGCCCTGGTCGATTGACCCCACAAAGGTGTCTGAGGGCTGCATGATTGTAGAGTG -TAGAGGAATTTAGGAAAAGAAACCGTCTATGTAAGCAAGTTGAAGTTTAAGGAATACAAC -AGAGGAAGACCCCCGAGATACAACAGGGGGCGACATAAATAAATTCATCGTACATAAGAT -CTCAGTCACACGGGGGAGAATGAGGGGAAAGAGAAGGGATTGGTGGGATTACCCCTGGTC -AGACGTGAGAGAGGTGACGGGCCAAGCGACATGTATTGGCATGTATTGGCGCTTCGTATA -CTTCGTGGAATTGTTGAACTTGGAATTAGAATGGTCTAAGTCAAGTGTCAATGCACTCGG -GAAGGAAAAGATATCCTCGGATGATCTTTCTCATCCCGACTTTCCCCATAACCCCTAACC -CCTGTCAGCGTGACTTTCCTGCATACTGTGGAGAGAGATCAGCAGATGATTGACGACTCG -GCTCATTTGACGAAGGGCCAGATCCAATTTGGTGGGGAATCGTGCATTGACTCGGTCTTA -TTCTATTGTCATGTAGCAGCAGGACAGCCATTTGCCAAAAGATATGGCCCAGATGGCCCA -AATTGGAACTATCTGACTCGGAATTGCAATTCTTTCAGCAATTCTACACGGGCCTTTTTG -TGACTTTGAACCACTGGAGGATAAGGCCTCATTTTTACCACACACACAGAGTCAAATCCG -AACAACCCCGGTCTCGGTGATGCTACCTCTAAGCACATAGTCACACAGCCAGCGGCCAAT -TGTGCCACTACCGACCAAAACCGAGAGCGACAAAATACTACCCTTGGCGTCATGGAAACC -AATCTCTTTCCGCTACTTTTGCCTCCCTTTGAAGACTGCCGCCTCTGCCAAGGTGACACA -GTTTGGACTTTGCAGAAAATTGCAATTGGGCCTGCCTCGGGTCTGGCACCCGAGCTGACG -AGGAGTAACAGAAGACATAGAAGCTTCTTCTTGAGTCTGAGCAGCCATGGCAAATATCGG -TAAGCGGGAAGTATACACAACGTTGCGGTGTCCAAACATTACCCAAAGGAGCGGAAACAA -CCCAACCGGGATCGGACAGTGACTTCCGGCTACCCAGGCCAAACGGTCATTCCTACAAAC -GACCCCTGAAATGAACTATCAGAGATGCGCTGTCACAAAAGTCTGCTCTGTGGGGAAACG -AAACAAAACAAAAGCCGGTAACAAAAAGGACAATGACCGGCCTAGCCAGTTCAATCAAAA -CAATATGTGCACTTGATGATTCCGAGTCTGTCTGGATTTGACTTCAGCTGATAGCTTCCC -TGCAAGCCTGCACGCCCTCTGGAGCGCTGGAGCTTGGGACAAGGAGGACAAACCTCAAAG -ACACACACTATCTTCTAAGACAAGTCCCTGCATCAGCCAATAGAGCCACCGTCTTGTACG -AGAGACAACACAGCTTGAGTGCAAAAGGACCGGTTACACGGCCGTCCCTGGTTCTACGCC -GTTGATATACCGGACTACTACATCATTTTCGTGTGGTTGAGGAGTTTCCCCCCGACTGGT -CGTACTATATCAACTCGAAGCCCCAACCCTGGCTCCCAGAGCTTGGGATCTCCTTTCGAT -GGCTACCTGGAAAGAGCTTGAGCTTGAGCTTAACTCAATGTCAAAGGTCCCCCGTGAGAT -TCTTTCCTCTACAAGCCATGACAGTGGCTGTGGCTCGCTGTGTCTGGCTGATAAGAACGA -CACGGCTATCCATCATATCAACGCGGAAGTCGAAAGCCCTTATACTCGCCCCCTGCTCTG -GTTCAAAGTGAGAGAGTACTGTCATGATGCGTTCTCAGAGTTCTTTGGGACGATGATCCT -GATTTTGTTTGGAGATGGAGTCGTTGCTCAGGTGCTACTCAGTCATGGCGAGAAGGGGGA -TTACCAATCTATCTCTTGGGGATGGGGGTAAGGCTCTTCTTTTTCTGTCATCTTTTCATG -CTCTCTTCTAACCAGTTCTTATTCAACAGACTCGGTGTCATGCTCGGAGTATACGCCAGC -GGGGCATCAGGAGGTCACATCAATCCCGCTGTGACATTTACAAACTGCGTCCTACGCGGG -TTCCCTTGGCGTAAATTCCCCGTGTATGCACTATCCCAGACCCTTGGTGCAATGTGCGGC -TCTGCTATTGTGTACGGCAACTACAAGTCAGCCATCAACATCTATGAGGGTGGCCCAGAT -ATCCGCACAGTTCCCGGTTACTCGACGACCGCCACAGCCGGCATCTTCTGCACTTACCCA -GCAGAATTTATGACAAAGACGGGGCAATTCTTCTCCGAGTTCCTTGCTAGTGCGGTGTTG -ATGTTCATGATATTCGCTCTAAAAGACAACGGCAATCTAGGTGCCGGGCAGCTGACGCCT -TTGGCGCTCTTCTTCGTTATCTTTGGCATTGGCGCGTGCTTTGGATGGGAGACTGGATAT -GCTATCAACCTGGCACGGGACTTTGGACCTCGGTTGACATCTTACATGATTGGATACGGG -CATGAAGTATGGGCTGCAGGAGACTATTACTTCTGGGTACGTTCTACCTACATGGCATGG -ATCAACCGTTCTGCGCTTTCTCAGCTTACCGATGCTTCTGCCTTAGGTCCCTATGGTTGC -ACCAATACTAGGTTGCACCTTTGGAGGATGGATCTACGATCTATTCCTGTATACTGGGAT -GGATAGTCCGATTAACACACCCTGGATGGGAATCAAGAGGCTGTTTGGACCCCTTAATCA -AAGGCGAGTCGTACTGCAGAGTCCAGTCTGACAGAATCCCGCTCTTACGCGTTACATCAC -ATGTAGGCGTGGAATGATGAGCCCTGCACTGCTACACTGGACTTGTTCTATTTTGTCTTA -TTCATAATTTGTTAATGGCAGTGTACAAAGGGGTGGAACTTAGCTACGGTCGTTATGATT -TGTATTTTGCACTTTTAGCCAGTTTCTGCAGCATTCCTTTTTAGCGTTGCGCAATCTATT -ACATTTCCAGCATTCTTAGATAAATTGATGCTTTTTCCCTATGTAAGCTATATGCCAGTA -CGAGTACGTGTATGCACTGGCTGCGTCTATCGGTCATGGGGTCCATAAGATACCTCCGGC -TTCATGCACGATAAATGTTATATGTTTATTCTTTTACTCAATAGAACTCCATCCAAGCCA -TGTGTCATGTCTATCTATCTGGGCTGCATGATTCCAGAAGAGAGCACAACAGCTGCAGCT -GCAGCTGCTGTGCTTTCCAACTGGCACATGATTGTTGTAATCATCTTGCGCTTTTAAAGA -AGTATTTTCAAAGATAGTAGGCTGTGATTTGCAATCTATAATATTCATTTCTATACATTA -TAGCTTAATTATAGCTTACCTAGTTATCGGAGACTTATAGGATGCTGAGGCCTTACCCTA -AGCAGTAGGGCCTTGCTTCTTCCAGGAGTACTCTCTTTCTCCTTTCCTTCCTATCTAATT -CGTATCTCCTGTACCACTTCAATTACACTTCCAAGGTTCCTCTAAGCTTGTATCAGGTAG -ATAGACCTAGTAAGCATGTGGCAGTCCACTTGCTGTGAAGGCGGCCAACGCAGAGCCCAT -TTACCTCTTTCCGCATTATTATGCACTTACATCTGACGACTTATAGGCATCTGCATTTTT -GTCACAATATCTGACGCTAGTCCAACGCCTATTCTTGCCCTTCTCGAGTATCACCGCGAA -CTTTTTGTGGACCCACTGCTATGGATATTTCGTCACCTCGACCTAGTGTGTTGTCAATTT -GAAGATGTACCGACTTATGGCTGAATTGGTGGCCCCTAGCGTCGTGCTTTTCTCTGAAGT -CTCAACTCTGTCTGACGAAACGAATGATGCCAAGTAAATTGATCCCGTTAGATTTTGATC -TGTCCTCACCGGCAACATGTAGCCGGGTTGATTCAACACCAGATGTACTAGGGCCAGAAG -AATCAGTAATTGGCAGTACAACTGGGGCGCTTTCTTGAGACACAATATTGGCCCCCGCTG -GGCTTCTTCCCCGAATGCTTTTCTGACACTCAAAGAAGCGATTTCCGCAACCTCTATCCT -AAGCCTTATCTAGTCTACGGTTACTGGAGAAACTAACATCACAAATTCAAAGACATGGCA -ATGCTTGGATCAGACTTTAGATTAGACCATAGGTATTACAAGAAAGCTGATCCACTAGAA -GGTAGGATTCCCATAATGTTGTTAGCTTCCGTTCTTAGTATATCGTCAGCATAGCGGTCC -AATCAGCCCCGTTTTCCAACAGCATATGCTGCACGGCAGGAGAAGCATATTTCCAAGCAT -AGTAAAGTGGTGTTTGTCTCTCCCCATCCATAACATTCACATCCGCGCCCCAACGTATAA -GACATTTCACTATCTCTTCTGGGGTGCCTGAATCATCTCCTTCTTTCCAGCAATATGCTG -CAACATGCAGTGGACTAGCCCCCTGTGGAAACACAAGAGTCGGTGTCGACACATTACCAC -CGTCGCAACCCTCAATGACTGGGTGTTGTCGGCGTATAGGGGGACGTATACAACATAAAT -TGTGACGTATAGGTGGACGTATTTCGCAGCAGTGAATGGCGTGATGCAGAACAGTATATC -CCTCAGGGGTCCTTTCATTGAGATCAAATCCACGTCCAAGAAGCAGCTCCAGTATGTCGC -AATTACTAGAATTCACAGCGCGCATTACGGATGCGAGACTGACCCGAGCTCCATGGTCTA -GAAGCAGGCGGACCGTCTCGAGAGTGCATCGACTTGCGGCCAAGCTCAACATGCTCGCAT -GAAGATTTGCTCCATGTCTTAGAAGCAAATCGACTGTCTCAAGGGTTCCTTTTTGAAGTG -CTTCACGTATCAATCTCCCTCTGTCATCATCAGCCAAGGCAATGTCCGCTCCATGATCAA -GAAGGACCTCCACCAAAGATGCATAGTTCGGCTTTCTGCGCACTGCCCAAAACAACGGAG -AGATTCCGCGGACTTGGGCGTTTACATCGACACCGAATTGCATGAGAAGTTTTAGCATCG -GCATTTGGCCGGAAGCCACTGCACAGTTGATCAACCCTCCGTATAACAGGTGAGGTCCTT -CTGTCTGAGCCCCCGATTCAAGAAGTAGCCTGAGGGTACTCTCATGACCCGCAAGAAGGG -CTTTGACCGCGGGATACCTTTGTCCTTCCCAGGATCCTTCTTCGTTGACGTCGTTGTTTA -ACTGAGCTCCGTGGTCTAGGAGCAGCTTTACTATAGCATCATGTCCACCGTTCGCGGCTA -TGCTCAGGGGTGTCTGTCTTGGAGGCATAGGTAGAGCCTTAGTTAGAACCCTAGAGACCC -TACTAGGTAGAAGCACAGGTGGAAAGCGGTTTTCTATTTCTCGTCTTCGAAACTCGGACA -TGGGCTGGACATCCACGTAGGCCCCCCAGTTAAGAAAATGCCGTACGCCTCTTTCATTCC -CATCGGTGGCACATTTGAATAAACCCTCTCCTTCGGAGAATTCCACATTTTGTTGATAGA -GACAGGGAAGAAGAAGGTCGTATAGGCGATGGTTCACCTGTAGTAGGTGTGAGAGGTCTT -CATCGGATTTCAATGTCGTACTTATCAGGAGCAACAACTCATTGGGTAATTCGAGTAAAG -AAATCATAGGATCGAAAAGTACTGTGGAATAATCAAGCCTAGTCCTTTCCTTCAAATCAA -TGGTACGTTGCATTCAAAGGAAACCCCACTAGTGCATGTCTGTCATCCACCGTAGGTGTG -TGTCAACTGGTCAGTCAGATTATGTAAAATAGTTGAGGTGCTGAGTGCGAATTGAAGAGA -AAAAAATGCCATTGGCTATAATGTTTGGCGGAGGGCGATCAGGCAGGATCTTGATCATGA -GTTGAAGGCTAGGGGTTTGGAGTATTTGATTATTGATTCGACTTCTGATATCAGAAAATC -ATGAATTTGAAGAAGCAACCTCGACTTGTGAAGTCCATAGGACATACCAGCTCCCAACCG -GAAGGGTGCTGAACCACCTTCATATGCTTGGGCAGGGTCTACAAATAGTGCTACAAGGTA -GGTAACTCTACGTCCAAAGAACATGCACAATTGAGAAGCAATATCAATGCTGGTATGAAT -ATGATACTGGTACATAGGCATGGTGAAGACACGCAACGAACATGACACGTATCTCACATA -AATAGAAACAAAAGATCCAAGTCAGCTAGCTAGCAAGGCTAGGAGAGAGAGCACCTCGAA -CAGAAAATCTAATGCACACCATTACCGCTTGCCCTTCTTGCGGGGAGCGACAACCTTGAA -CGCAGAGCTGGCAGCCTCTTCCTCGCGCTGagcagcagccgcaacggtagcagcagagcC -CTTCTTGGCAACCTCACTCCAGCCGCCGCCACTCTTCTGCTCACCGAAAGCACTGGGTGA -GACTGGGTCGACAATGCCCTTGTCCGCAAGCTTGCGGCGGCGGATGAATTCCTCCGCGAA -GCGGCGTCCGTCCAGAGTCTGGGAGTTGGCGTAGACAGAATCAGAGATAATCTCAGCCTC -GGCGGGGAGGAAGAGCAATTGCTGAACGAAATCGTCGACTAGAAATAACATGTTAACATG -GAGAAAATGTGGATTAGATATAGATGAAACTCACCATTGATGCTGCTGTTCAACCCCTTG -CTCAGAGCCAAAGTAGCCCACTTGGTAAACTCCTCGACGGCCTGGTTGGGGTTGGACTGA -GAGATGCTGCTGCTTGCAGCACTACGGGTAGTGGTCACAGTTGCAGGACGGGGCTTAGCG -ATAAGAGGCGAGATTGGGACTGTTCCGCTAGCTGTGCGGGGTCCGAGAGGTGGCGCAGTT -GGCACACCCTTGACCTTACCACTAGCACCGACAGTGGTCCAGGCACTGGAGCCCTGACCA -GCGGGAGAAGTAGGGGCAGGGCCCTTACTAGCAAGATCGGCGTAACGCTTGCCGACAGAG -GAGGGAACGGGGGGGCTTGGGGTCGCAGCCATGgcagcagcggcagcagcagcagcGCGG -TTCTTGCGGGCCTCCTCCTCTTTCTGGATCTGGGCGAGGGTCTTCTTAGCGGTAGTGGCG -GTTGGTGCGGGTTGGGCCTTGGTGGCCCAAGGGGAGCCAGCCGAGGCAGGGGTGGCCGGA -GATCCAGCGCTAGCCCAATTAGCCGACGAGGGAAGGCCGGGGGCTGCCTGTGCCTGCACT -TGGCTGAGACGCTCTTGCTCCGCAAGGAATTGGGCACGACGTGCCGCAGCGGCGACTTCC -TCGCGTTGGGCGGCAGTACGAGCCTCGGCTTCCTGAATCTCCTTGAGAGAGGGACCCTTG -GGCAAGTCATTTGCCTCCTTGGCCCATGGTGCGACAGAGGTAGGCGCAGCGTCAACAGGA -GTCTGTGTCTGGGACCGGGACCCAGTGGCCAGAGACTCGGCGACGTTTTGGCGGTTACGC -TGGGCGGTAGGTGCAGGGAGTGGCGAGGCGGAGGGTGGGGGAGGGAAAGGTTGGGGCATG -GCGTGGTCGACCTTGCTTCCCCAGACGGACTCGGGAGGGCCATGGCTATGGTCATGTTCC -TGCTGCTGGCGTTGAGCAGAAGCAGCCTTGTGAACTTGCTGAGAAAGAGTAAGGGGCTCA -GGACCGGTAACAGAGTCGGTCAGCTGCTGGGCTGCCTCCTCCATCTCGGCCGCAACCTCT -GCTTCAAGTTGCTGAGATACAGCAGCGGGGTGGGTCGGCAGGCCCTCGGCGGTGCGCATG -CCCATCTCGCCTTCCTGGGCCCGAAGGGCATGGAACTGGCGAAGACGCTCCTCGCGAGCA -TGCTGATCAAAGGGATCATCTTCGTGGGCCTTATCGAAGTCTTCCTGCTCTTGCTGAAGC -TGAGCACGATCCTGAAGCATCTGAGTAACCTGCTGGGACTGCAAGAAGTTGTCGGGCTGG -CGGCCAGAGAATGAGCCAGCACTACCAAAGGCAAATGGATCCTGGCGGCCGACATTGAGG -CGATCAAGTAAAGTGGGGAGTTGATCCTGGCTGCCGAAATCCGTTCCCAGCATTCCAGGT -CCCACGTTCAAACCACCCTGTCTCGGTGCAACGCCATCGAAGAATCCAGCGACACCTTGG -GACTGCTGCTGCATGGGCGCCTGAATCGGGGATGGCTGGTAGCCAACAGGTGAAGTGATG -CTTGAGAAGCTAGGCTGGCTTTGAAGACTATGTGCGCTAGACTGGTGCTGCAGCGGAGGG -TTCATAGAAGATGAAGGAGCACCAGGAGGGGCCATCTGCTTCATAATAGCCTGCTGCTGA -GCCAAGTGTTCCTTCTGCCGAGCCATCAAGTATTGTTCTTCCTGCTTGCGGCGTTCAAGC -GCATTCTGCTGTTCGGCTGTCAAGGTAGTGCCGAAGCTTGGGAAACTACCTGGGAAAGGA -GGCTGCGCAGATCCGGCGGTTGTGTTGGTCCACTGGGCAACGGTCGGCTCCGGGCCATGA -GGAAGGCCAATCTGGGGCACAAGGAATGGCTCGCGTGAGTTCCCAATGCGTCTCACTAGC -TGAGCTAATGGCTCGAATTCAGTATCCTCAAGCTTCTTGATTTGCAAGTCTGGGCTGAAG -AACCCGGCCTTGAACCAATCATGCATCTCCAGCCCAGTCCACGGGCCCTGGATGTTCCCT -TGGGGGTCCCGGTAGATCCAACGCATCCTATCCGGCATAACCATGGTACGCTGCTGGGAA -GCAGGGATGCCTGCACCAGGAGTTTGACTAGAAGGAATATCATGACCAGCCAGGTTGGGA -ATAGACCCAACGGCCGAAACGGGTGTCTGCGAAGTAGGCGTTGAGCCAGGGTGGCTTGCC -TGACCCGCCTTGTCGCCTTGCGAGGGGTCGAGTCCATCCAAGCCGACAGAGTCGGACCTT -CCTTGCTCGTCCTGCATGGAAGAGAAGAGGGCACCGAGTTTGCTTGATCGGCCGATGCTT -CCAGTACCAGTCATACCGGCGTGGGGGCTAAATAGACCACCTCCACCAAGGGTTGACAAG -CTAGGAGACTGAAGATCGCCCATAGAGCCAAATATGGGGTCACCGAAACCGCCACCAAAT -GCAGAACGCTCACGAGTGGGCGTGCCGACGGCAGCACCAGATGACCAAGTGGAGGCTCCA -CCGAGGGAAGAGAGGCCACCCAAACCACCGAGACCAGAAAAAGGGCGGTTAGCATTAACA -CTTGAGTTCTGGCTGCGGTCCACGGTAGGAAGATCTATTCCAGAGCCAACCCGTCTCATC -GAGCCGAAGGCGTTGGAGGTATCCCTAAGTCCACTGACTCCTGGATGCTGGGCTTGCTGA -ATATCCGATCCGTCAGTATCGACATCTTCGGGTTCCATACGTTCGCCATGGCGGTCACCA -TGCGGGCTCTGGTAGGGATTGGTGTTGGTTGGACTGGTCGGCTCGTGTCCCTGGAGCAGG -CTGGGAGTTGGATTCCGCGAGTTCTCCTGGCTTTGCATCAACTCCCGGAATCCAGGAATG -CTCGAGGTCATTCCGAATGCACCAAACCCGAGATCAGGTTGAGCGGGACCTTCAACATCC -TGAGACCCACCAAGAGCGGCGCTACCACTGCGAGGCTCGTCGAACGGATTAGTTTCGCTT -CGGTTGACACGAGTTTTGACGGAATCCCCCCAGGGCGATTGTGAACGTCCATCACCTTCG -TTTTCAGCAAGACGTTCAAGGTTGCCCATAGGCTTTTCCTTAGCGGAGCCGGCCATATCT -TCGGAGCTATCCTTGGACAACAACCCCTTGAACCTGCTTTCTCCGCGGAGACTTCCGTAT -CCTGGTTTCTTCTCGGCCGTGGGAGTGCCGGAGGCGGAACCAGAACCAAGGGAAAAGGCA -CCAAAGGCACCCATCGGAGAGAAATTGGCATTATGGGAGGCCGAACCCCAGGGAGAGTTC -GGTCCACTAACGCCCGTATTGATGGGGTTGGTGGAGCTGCGCTTTAGGGTGCCAAAGGGA -GAAGCAGGATCCTCCCTGCTATCCTTGGCCTTCTCATCCACCTTCGGATTAGGTTCTCGG -AAATCAGTTTTGCGACGCAACAGGGCCGGTGGGGGTGTAGAGGTATTGGCCTCATCGCGG -AAAAATCGAGAGTTGGAACCGGTAGGAGACATGGGGTTGCCAATGGATTCTCCCGTTTCC -CGGCGTCGGGGACCTGGGCGACCAGCGGTTGGGGAGGAGGTGGTGTAGTTATTGATATGG -CCCTGAGGATAGGAGATCGACGACTTGCGGCCTCCAGTTCCCGGGATGACATTTTCCTTA -GCGGCGTTTGTTGGGGGTGGCTTCAGAGGGGAGTTGACTGAAGCGGCGAACATCTATGAA -CCTTTTAGTAAAAGCACTCACCCAGACCCGAAAGCACAACAAAATACAAACCTCTCTCTC -GTCTTCCGTCATACCAGTGATTCCGAGGGGCTCCACCTGGCCGCCATGATCCCAGCAAAC -TTCAGGCCCCGCGGAGTTGTTCTTGTGGTCATCCCGCTTCCCCCAAGCTCCATTCACTGG -GGCTGAGACGGTGTGGGGATCCCAGTCGGCTACGAAGTATTCCTCCACATTCTTACTCAA -GGTTCCCTCCTCCCGCTGCGATTTATACAACTCTAGAAGTTGATCCTTGGAGTACCGGGC -GTCCCCATTCCGTGCAATGCTAGAAGTATTTGAGCTCAGGTGTGGGGGGATATACGCGCC -GCCGGCCGTGGTTGTCGAGACAGATGCAGGCTGGCTACCATCCCGGTGTTGAGAGGGGGT -AGTAGCAACAGAAGGACGGCGGAAGGTCTGCGTAGCTCCATTCATGCGGTTTCGAGACCT -AGTAGGCATGGAGGGTTAGCATCGGATATCTGGAGCTTAGAGCATTGATGGTCACGGCAG -CAAACAACGATGTTGAACCGACTCTGCTGAACAACAGCTAATGGAGGGGTTTCAAGAAGG -TGAAACAATGTGTGCATACCATTCTCCGCTAGTCGAACCATCCCCCCTCCGGCTGGGGTC -ATGGGTGTTGCCGGCAGCGGCCGAAGCGAAACTCGATGGAAGTGGTGTAGGCATTGTGGT -AAAGAGGCTTGTACGGTCGGATGTTGATCCCCCACGTAAAAACCGATCCTGAGATAATTA -TCGCGAAACCTGTGGCGGGCTTGCAAGAGAATGTGTGATGCGAACGGAATAGGTCGCAGG -ACACGAGATTGGAAAATTGGGTTGAGTTGGAGGATGACAGCTAGGAGCTAGTCAATCGAA -GAAGTGGAGAGCTGTAAGAGAGGTAATGCAGATATGCGAGCGCCAGACTAACAAGGTCGA -GAAAGAGAGAGATCGAGTTTCGATTTCGCTTGGGAAGTTTCCTGGAGCAAAAATGATTAA -GCGGTGGCCGTGGCAATACGCGTGCCTTGCGGTAAATACCCGGTACGAGATTGATAGTGG -TCGCGTGATTTGCCGCCTAAGAGTAGTGATTACAGCGTATTTGTTGGAATCACAATTACA -TACCGCCAAGAGCAGGTATCCGATACACGCACAACACTGTGTCTTCATACTCTGCCCAAT -GTACCTGATACCTTGTTTTACACCGTTCTACCCAATTCTGTGTAGCACAGACAAGAGACA -TTGTATAATAGATGCGGATAGAGTCTTCAAACAATCTACACGAGCTTCAACTCAGAAATC -TACTCTTTCGTCAACTCCAAAGACCAGAGATAGCAAATACGTATGCAATTAGGTGGCTTG -ACGCAAGCAGAGAGATGTGGAAACAATAGGATAGACCTGTATTCCCCACAATAACCACGC -TCCTTATTTAATTGGGTGTTCATCTGCTTTCAAGCAGTTCACCTTGGAAGTTAGGGATTG -GATTGGGCTTCTCAGGACAAAGGAGAGCATGTACTCCATAGATATTTCACAGATGCAAAG -GGGGTTGAAATACACCTACCTTTTGTTGCCCAACCTTTACAATGCTGGGCAAACATGTGT -CCTAAAAAAGATAGATTGAACGGTCTTAATATCTTTAGATTTGGGCTGGAATCCGGTTGG -GTTAAGAATCCCATTATCTGCTGTCCCTGTGAATAGGGCTGGATCATTGTATTGCCCTTT -GACCTCTACTTTCCCCCTGTTTGCCCTTTGACTTATTCCATTATACAGCTACAATGCTTT -CTGAGATTTGCTCGCTCTAAGGGTAACACAGAGACAACATCAAACGACACAATGAACGGT -TCAGCCCAAATTGATATGATGGGCTCAGTCTTAGGGCTTCACGGATTTCATCAAGACTCT -ATAAAAATGACATTCCCTTCTCCGGCAATTTCCTCTTTACATTTCTTTCGCTTCAAGACA -ATTCAGTCAAGTTTCTTTGAGCTATAGACTTTATTGCATAACAGAGCTCCTTCTCTCTCT -CGTGGCTTCCAATACCCAAGTGCATATCGTCTTTGACCAAAGAACATGAGGCGGTCCAGA -GTCACTGGTCAATCTAAATCCAACGATACAGAATTTGGCTGGTACTTCGCTGGCGAGCCA -ATGGAGAAGAGACGCTCTCGAGGCGGCCGAATCAGGCTAGTTCGCTACGGTCCTGAGAGG -GAGATTTGTGCCCACAGTCGATGTCCGTCGCCATCTCCCAGTCCAGAAAAGGCACCTTGC -TCCTGCAGCAAAAGATGCGAGGCTCACCCTCACGGGACACGCTCCCCATCCCCCAGCAAC -ACCGACACCTCTCTTCATGAGATCATACAATGTTGCGGAGGATGCCATGCTATCAGACGC -GAGCGCGAGTTCTCTCCGCTGGTGGAGAAATTGATTTGTACCTGCAACCATCGGTGTGAA -TATCACTCGCACGAATCACGTCCCATCACGCCTAGCAGCAGTGAACATTCTAGCTCTGAA -TCAGCCACCAAACGCTATGGTAGCCGTCACTCAATGAGACGCGAGCGTCGCTCCAACGAC -ACACGTCCTGCCACGCCAAACAGCAGTGATGTCTCAGACTACGATCCGCCCAGAAGACGC -CGCAGCGGTCACCGCATCCCCCAGCGCGAACCAGAGCATCATCCCCAGGTGGATAATTGC -CAATGCGAGGTACACGCATCCCCTCTCACCACACCTGTCATGGCAGATCAGAGGTACAAC -TCCCCACGTGCGTCATTCACGACAGCACCACGCCCCAGCCCAGTAGTCCATTACCACCAT -TGTCCCAAGGCGTCTTATTGCGAAAGTCACCCGGAAAGTTGCCGGGCTTCAATACCTGCA -CCATCACCGGTGCCGAGACTGACGCGAACTCCGACGCACTCCCCCCGGTCGAGTGATAAC -TCCCGTTACAGCTCGCGGGAAAAATCAGAGCCGAAATGCACTTGTAGAAGGTCTGCTTCT -CCTCCAAAGCAAGCTCCTCCCCCAAAGCCAGCGCCGTCCGCCAAGATTACATCACCCAGG -CCTGATTTTCGGCATTGTACCTGTTTCTACATGGATGGGTACCATGCCGATGACTCTGGT -TATTATGACGACACTGCTTCTGAGACCTCCATTCAAGCCATGCCTGGCCATGATAGGGAT -GTGGACGAGTCATTTTGTGTTTATCATCACCGTTGTTGTCCGAGATTTGAGTGTGGTCTC -GGCTGGGTGTGCGGGAGGAAATAAACAAGTCTTTCTGTATTTCATTTCAATGGAAGGACG -ACGCATGCTGCCttctttcgttgctttctttctttttttTTGCCTAATATGTGATCTCTT -GGGGTTGAGATGCCAATGCCATATATGTAGATTCTTGTTTGGATGATTGTCTTTTCCGTG -TCAGAGGTCCAGGACAGCCCCTTGTTCTTCGTTTTCGTCCGTCTGACTTGCGCTCTTGGG -GGTAATGATTGGGCCACTCGTTCAGCTCTACCTTTATAGAGAAATTGATTTGCTATGATG -AAACCTTCAAATCCGATGTTCCCTTTGGCACTATTGTAGTCTCTGCTTCAGAGGTCGAGG -CGTGAGCCCACTCAATTACGTAGTCAATAGCTTCATGACGATGGCATGAGTTTCATTGTC -GCCTACGGGAAGATTATTGTAGCCATTTCAATTTCACTCTATTTTGCTTTCACTACTCAT -CTCGGTTGAGATCATAAAAAAGCCCTTGTTTTAATTTGGAAGTAAGGTACAACACGAAAA -TTTCGTCTCCTCAAAACGTTCCACAAACAAAGGCACAGAAAACCAGTCGTCGAGGTGCTC -ACTTCTCATCCTCATCATCGCTGGCACCCTTAGTCTCCTCGGAGTCATCAACGCCCTCCA -TGGTGGTATCGCcaggggcagatacagaggtagagacagaggcagaggtggaggtagaga -cagagatggagtcagttgcagaggaggcagtgtcagcagcagcgtcggcggcagTCTTCT -TTTCGTTTGCCTTTTTCTCGAAGGTCTCGAGTCGGGTCTTGAGTCGGGTCCAGCGCATGC -GAGCGGCACCAACATTGAGGTTGAGATCCTCAGCTAGAGCAGCGAGGCTAATCTACAATT -TATGTGAGCTCAATATGCTGTGCAATCAATAAACCTGAGGGTGGGTTGATATTACTTCCT -TGTTGGTCAGCTTGAAAGCCTTCCAGATGAGCATCGTGTCTTCATCTTTGAAGTTACTGG -TGGTAGCCTGGGGCTTGCGAACGGCAGAGCGAGACTTGACGTCGGTCCTTTTTAGCGGCA -TGGTTGGTGAGATAATTGCAAGTTGATAGAGAAAGTTGATAAACAGTTGATAGGCTATTT -TGGTTTGATCAAAGTAGTCAACGGGATGgaagtggtaagagagaagagaaaagaagagag -gaggagagaAAATTAATGGCTGTGTTCTTATAGGGGTCGCTGAGTCGTTGCATCGCTAAT -CTCGTCGCTAATTTCGTCGTGAATTGTTGTCGCTAAGTCGTTGCGTTTGATATGCTACCA -TAGGTACGAGTCGCCAGTCCCATTACCACCTCTAGTAACGCGCATCCTATGAGTCGCTGC -CTATGATTTGCTAAGCCAAAGAATACGAAGTAGTAATCTATAAGCTAGTATCTCATGGTC -CAAGATCCCTTGTCACCCAATAAGGAATGATTATTACCAAATAGGGTCATGCCAGTGTTG -CTCGATACGCCTACTATAAACTTATCCGACTCACAGACTAGGCCTTTATCTCAGCCTAGC -GTATTCATATTATTGCGATTGCCTTTGTTCCTGGTCATAAAAACCACTGGGCACAGATGC -CACAAGGAGGATCTACTAGATATGCGTGATCTTTTCGTTGCCCGCCATATCTTGTACCCC -GTACTTAATTGACAAAAGTGTACTCAATTCAGAAAGATCGAGTTTTCTGCTCTCGCTGTG -ATGATTGATTTTAGATCCTGTTTATCAGCTATGTTGGTTTGCTGTTCAAGGCTACAATAT -ATATTCGCAGAAAAAGCAAAAACCGCAACATACAGCAGATCCCAGCTACAATAAACCGAC -TCTTCTGCGTTCCTGTATACTAACTAAGTACGCAACAGACAAGGAACATACAGAAACTAT -GAAATGTCACTATATATAAATCGGTAGCGACAACCCAAACACCCCGTGTTTTCTAGCGAT -AGACGAACATACTATAATCTCGTTTTTGTCATGCGTTTGGTCCAGCTCATCTTTGGTTTC -TAAACTAAATTAGTTAGTACAGCTGATTCGCAACGTCCATTGATAATCCAACATGTGGTC -TCGGACATTCGGCCCCAGACGCGCGATTCGACCTCGGGGGAACCTCGGCTATCGATTAGA -ATATCGATAAGCATTTAAACATCCCCGCACAAGCGTCCTGACTTTCAAGGTCAATTGATT -TTGATTAGCTTCATCTTAAATATATCTAAAATGTCTAAAGCAATTTACATATCCCCAATT -GACGGCAAGCCAGGCAAGCCAGGCCAAGTCTACTACCCGCTCTCCCTACGCACTGTGCCC -AAGCCTTCCCCGCAAGGGGGTGAGCTACTGGTGAAACTGACTGCTGCGGCGCTCAACCAC -CGCGACCTCTTCATCCGCCAACATCTCTATCCCGGCGTGTCCTTTGATGTGCCCTTGCTC -GCAGACGGAGTTGGTCATGTTGTCGGTGCTGGCCCGAACGTCCCCAGCCCAGAAAAATGG -CAAGGCAAGCGGGTGATATTGAGCCCTGGTGCTGGCTGGAAGGATTCACCCGATGGACCG -GAGGACCCTGCTGGACAACGGATTATGGGGGGCACCAAGGTTCTTGATAAGGGCACCTTG -CAGGAATATATTACTATCGAGTACTCGGAGGTTGAGGAGGCCCCTGAGCATCTCTCTGAT -GCGGAGGCTGCAGCTCTGCCCCTGACGGGCTTGACGGGCTGGAGGGCTTTAATTACCAAG -GCTGGTGAGAAGAACTCGGGGGATGGTGCTGCTGTCCTAGTTACTGGCATTGGTGGTGGT -GTGGCTCTGATGGTACTGCGGTTTGCTGTTGCTAGAGGTGCACATGTCTTTGTGACGAGC -TCGAGCCAAGAGAAGATTCAGAGGGCTATTGAACTGGGCGCAACTGGTGGTGTTAGCTAT -AAGGAAGATGGATGGGAAAAGAAGCTGCTGAGTATGCTTCCAGCTGGGAAGAAGAACTTC -GATGCCATCATTGATGGTGCCGGTGGTGATTCTATTGAGAAATCTGTCAAGTTGCTCAAG -GTGAGTGCCACAACGGGACATGCGCTCAATTATGGATGAAATTGCTCACACTTCTAGGCC -GGCGGCGTCCTCTCTATCTATGGAATGACTGTGTCTCCTAAGATGTCATTTACGATGGCT -GCAGTCCTTAAGAACATTGACGTTCGCGGTTCCACAATGGGCTCCCGGAAGGAGTTCAAG -GAGATGGTTGAGTTCATCAATGCTAAGAAGATTCGCCCTATAGTGTCGCGAGTTCTGCAG -ACTGAGCTAGATGACTTGTCCGCAATTGACGGGCTATTTGAAGACATGAAAAAAGGCACA -CAGTTCGGCAAACTGGTGATTGAGTTTGGCAAGTCTTCGGGCAGTAAGCTGTGAACATAT -TCTTGTGACGATTCCCGAGGATTCACGGCGCCGGTATGGCTGTCTACATGTATGCACCTG -ATACTTGCCAATAGAGATACCTTGCAGCAACTGCGAGTGTCCTTCGTACCTTCCCCCTCG -GCCGGCTCACTTCACCTATCACCACATCATAACCTCCATGCATTTGCAACAGATCACTCC -CCATCATCTTTTATTAGAAATCCAACCGAGTAACCGAATCTCGAACCCGCTCATTGTATA -AAAAATTCTTCCATTCATTTCCGGTTTCAACTCCACCATGAACTCCAAGAAGGACCGTAT -TTCCGCCCGACAAATCTCTCGCCCCGTTTCTCGTGGAGGCTGCCATGTGCCTCGTGTGTC -CGACTCCCTCCGTGTGTTAGAGGATTCGTGCGACGGAGGTAGTTTCGAGGTATTACCACG -TCGGGAAAAGGTCGACCCTATGAAACGGCCCTGTTGATACTGACAGTCAATACAGTCCTT -TGATGCCACTCACTATGCACTGCAAACTCTCAACAAGCACATCGAACGAAGGCTCGCAAA -GCTGCAAGCCAATACAATCCGCCTGAAGCGTGAACTGTGGCTGCTCCAGCGACACATCAA -AGAATTTCGCCATCCTCTTTTCGAGAACTGGGAAGCCGACATGCTCACCCGTCTTATCGA -AGTTGCTCACGCCCATCAGCACAAGAAGCTACCAGGTGGCGTTGTGATCGGAGAATTCTC -ATTTGCCGAACGCGAAAATCTCAATCATGCATACAGCATCGCGGCAAAGGCAATCCGGAT -GCCGACTCTTCGTAAACTGGGTCTATCGGAGAAGTATCATCAGGCTCTGCAACGGTACTC -AGAGGTGGGTTATGACCATAGTCTTGGTTTCACGTACCTTGACCTAGTCTCCGTCTAACC -TGGATCAGGTTGCTCCCTATCGAAGCCCAAATCCCTTCCAGACCGAGTTTGCATTCGCCA -AATGGCTGGTCGAGGTGCGGGACGACAGACCGGAACTGTATGGATTCTGGTCTAAGCTTT -TCCCCGTCTGCTATGACCGCAGTGTCCAAGAGAGCGCCTCCATCTTCTAGGGGACCCTAA -AGGATTGACCTCATCATTCTgtcccgtctcgtctcgtcacgtcatgtcacgtTTTTTCTT -TCTTTCCCTGTGGCTATGAGCCAAGCATCCAAAGGGGATTGTTTTGTGGATATAATATTG -CTTCGTCGTTTAAAAATCTCCTCTTGCTAGAGGTTTGTGGGTCATCGCTCTTTGTTTTGG -TCTCATTCTGTGGTGTGCGGTTTCACTGTGTCTCATCTCAGCTCTCCATGTAGGTACCAA -ACCTATTATTATCTTTGACGTGTCAGAACGCAGTACTTGTTTTCGGGTCATATTCTCTCG -AATTCGCTTTAAGTGATGAACTAGTAGATAATGTTTCAGCTTCATTTTGACGGCTAACAA -TGCGTGTACATATATACGGAGTACATCCAGCCTATCTAGCAACATATTACCACCGTTGCA -TCAGATCATAAGCCACCTACCCACCACAGTTGGTATTCACGGATCACTCTGATCCAGAAA -CCACACAGTCTGGATGTCAAACGGGTCAAGCCTTCTGATGTGGCTGGCACTTAGTCCCGT -CATTCTGGTCAGACGGCTGAGCAATCCAACGGTCGAGCAGCAAAGACTAAGCAATGTGTA -ATAATGCGATACACCCCACGTCCACGATCTCGAGAACCCTATATCATCCCACATGCTGCA -TGACGCAGTCAATGAGTTAGTCAATATCATAGCAGTTACACAGTAAAGTTTCCGCTCGCT -AATAATAGCTCCTTTTTACGAGTACAATATAATAAGCTTGATACTAATTGCTAAGTGCGA -ACTAGAGAGTGAGACAACCCTAAGCGGTATATCTTTCCATCACTCCTTCAACGTGTACCC -ACTGGATGAGATCCCAAAAAAGAAGCCGCTAAAATAAGAACCCCAAGAATAGTAATTTTT -CGCACCACACATCCATGATTCGCCAAAACCAACATGTCTACCGGAAACCTAACAACCGTG -AAAGGGGACAGTAGCAGTGCGACCGACAGAATGGGTACATGGCTAAGCCCAAGTCGCAGA -ATGCCCTAAAATAAGAAAAACTCAAAAAAAAAACCGACCAAACAGCAAGACCCTTCGGAA -CATAGCCGAGTCAGGTGAGTTGGGGGTGGTAGACCCTGAAAAACGATCCACGAGGGTTGC -GCTGGCGCCTCGCCAAGAACTGTCCCCCGTAGGGCGGACAAGGGCTTATGTATATCAGGA -TATTTGATGTCGTACGTTGTATTTTGTATATAGATTATGTTAGACGGTACCGAGATAGAC -TAAGCAAAGGTCAGTGGTTTCTAAGCGCGGAAATACCGGTAAACTACCAGCCCAAAATGG -TTGCTGCCTACCTTGGCATTACCGGAATAATGTGTTCATAACATCCACCTTTCGGCTGGG -AAAGGAAAAAAGTGGATGAATTCCTCCGTACACTAGACCACGTTGAAGTCATAAATCTGA -TTATAGCCGGGTCCGACATAACCTGACATAGGGTCCGATATTTGACCAGGAACCAGGGAG -GCGAAGTTTGGGCGGAACTGCCAAAATGCGAAAAGGGAAATTTGTCGGAAGGGAATAATG -GGTCCCGAAGACCCGTGAAATTTGATTTTCAATTCCGAGCTCCACTTGAGATATTCCTAG -ATATCAGTATGTAAGTATATCGAGTCAACGTTCCATGTAAACGACTATGAAAGGTTCTTT -TCTTTAGCCTTGGGTTAAAGTTTGGGCTGACACTCTATAAACCCCTAAATACGGAGCCTA -GGTACGGAGTTTCGAAAACGTTTTATAGGTCCCACTACAGGTTTGGTAGAGTCAATCGCG -GTTCCATCAACCTGAAACCATACTTTGACTTCGATTGTTTCGATCCGCGACTTGATTGGA -GGGATCGCATTGATCGGGGTTGAATGTTGTGATGACTTGTGAAATAGTCGAAGGGTGTAC -GGAATACTGTTCCTGATGTTGTCGTGATATATACATTGGTTCACACCACCGATATGCAAC -ATATATTTTTACTTCTACGGTGATTCCGGATACCCTCTTTTAATTAAAATTGACATGGAA -ATAAATTCAATCTGCTATAATTCCCCATACAACACTCAACTAGAATATCGCGATACCAAA -AAACTATGATCCCACCTATCTGATCCGAGATTACGCCCAAGCTTCTTATCCGAAATGGAT -CGTCATCAGGATCGGTCTCGCAGCCCCTGACTCGTCTAGACCACTCAAAGAGCTATACAT -TCCAAAACGCGCGACTGTCTTCAGACTACTAAAAAAAATCTAGCATCGATATTGTTTCAC -CTGTAGATATTTGGTACGGGGAGAGATAAGAAAAAAAAAGCAGAACTCTAAAGACACGGA -ATCTCGCATCAAACCCACGAGAGCAGCTAAGATATACCACTAAGGCGCCCCAGATACTCC -GTACAGCTGGCTACCATACGGAGTAGACGCAGGGAAACGTGAAAGCAGTGGAGAAGAACC -AAGTTGGTGTAAGATCAGCAGGGTTAGAGTTGGGGCGGTTTCTTCCCTGGTTGTGTTGAA -GATATTCTACATCAAAGAACCTATGAAAATGAATAAGACAGGCTTCGCAATCCTTTTCAT -TCTCTCCACACTGAAACATACGAGCTCGACATAAGTGGCTTCGATTCAGATGGTGCGGGA -AGAAACATGGCTGATCTCGTATGGAGACATCTATTTTAGCCAACCCTCTGGTCTCTTTCT -TGTCTGAGCGCAAAATTAGAATCGTGGCAAATCCCGATCTCAAAGGTAAGCCTAATTTGC -TGCTGCCAGTTTTGATCAGTTGATGCGATCACCTTGTTTTTTTCTCACCACGGGTGTTTC -ATTGACACAGTATAGAGGCTACTACCGGAGAAGAGAGACAGTGCGGGCAAATACGGAGTA -CAGAGTACCGGTGGCAGCTTGGACCATACCATCCGAGGCGGAATAGCCCGCCAGCACTGT -TGACTATTCGATCCGTGGCTGAAACCGTTAACCTCTCGCTGGAGCAGCTAAATACCGTAG -TGGAAGATGTCGAACCCTATTAGCGTTCATTCCCCGGAAGCGCGGGCGGTATGGAGTACT -CTGCTGCTAACGACTTTTCTCTTGAGCAGGAGCCAACGAAGGTTGAAGCTAGCACCGACG -GTCCCCACCGGTGTAAGATTTTGTATCCGTGAGTGGCCAGCGGGGATAGAGCTAACGACA -AGGAGTTCACATATCCCGCCAAAGGATCATCAACGAATGAGGCTGCTCAGCAGGTGACGA -TCTTTGTTGCAGGGGGACAAAAACAAGGAAAGCGCATCGAAGACATGCAGTTGATTCTAG -GTCGTGATTTGCGGTTTTTTTTTTGATCACCCGTTCATCATGGAACGAGAGTGACACCCG -ATGTCACTCTACCACCCAACTCCAAGGGTGCGATATACGACACCAACAGCAAGTCTAGAT -CCTTTCTCTTCACTCGGTATGATACTCGACACCGATCTTTCTCTCTTCTTGAGGACAGGC -ATAGACGTAAGTGAACAGCCGAGCAGTCCTGAAACCTAAAGCGGGTCCACCTCACATTCC -TCCGACTGTGGTGCAGACAAAACCCTCATGTGGACAAGAGTTAGCCAGTGATCTTCCATC -CACCATCAACACGGGGTCCCAAAAAACCGAGGATCGTTGCCGTTTGTCTCCCCCTTCACG -AACAGAACCAGGAGCCCGTGATACAAAATGTAAGGGTGGTGAACCAGGGCAAACagagag -agagagagagagagCCGTCAGTGTCTCTGACGATGTTTCACACCGAGGGGTCCTGTTTCC -CCATGATCCGCGCCCCAATCCCATCCCACTATGCAGCAAGGTCTTTGCTCAATCTTCTCC -TTGGAATTCTCCCTCGAAGGGATTGACGAGGAAAATCCCTACCCAATAATCCCAATAATC -TTCCTCAACCCGTGCTCGATATGCACTATTGTCCCCACATACGCAAGACAGGCACACACA -CATATACAGGCACGAGTGGCGAGATCACGTACATAATATTGACGGATACGGAGTAGATGT -GCAGGCAATAGTTCGGTTGTACCTGCATATTTCCAATGTTTCCTAGAAAGGGTATGGGCC -AAGCTAACCTGCAACCCTTTGAACCCGTATGCAGACAAGGGAGATCGTGGATGCAAAGCA -TATATGTTCGCGAGATAATTCCGAGTCTAAAATCAATAATTAATTATAGGCACGGGGGGA -GGTTTTTTTTTTCCAAGGTTCGCCAATAGGAAAATAAAGGATAGAAGCAGGAACCGGAGA -GCCCGAAGCCCTAGTGGATGCTTACCCGTTTCGATAATATAATCATGGATATGCATGGAA -CCATTACCTGCCAATAGTAATGGCGAGTACGGTATTGACGGAGCAGGCTTCAAACAAGGT -TGCAGGTCCGAAGCCCTGAGGATATAGTGGTCCGCTTTTTTTTGGAAGCTTGGATGTGAT -CCCTTGACGGATTTGAATAATTATTTGCTTGAAACACTAGGAGACGGAGCTGATAAGGGG -ATTGGTCCTTGGAGTCAATATATTTTGATTTTTTTTGGACCTTTTTTCCTTCGCGTTCCT -TCGCGATTTTTCCTTTTGGGGATTTTGTGGGGACTGACGTCCCTCCCGAAGAATAGCCGA -TTAGGAAATACAAACGCCGATTGGGGTTTCCTTGGGGAAATTCCGTTACAAACTAACCAG -AGATCCATTGACTAGAATTTCTGAAGGGGTAGGATTTCATATAGATTGGTCCCTTGGATG -AGGTGCTCCTTGCTCTGACGTTGATATTTGgggtggttgggtgggtggttgggACAGGGA -GATGAAAACACATGTGGATTTTACATGTACGGGGTACAGACAGCAtaggtaggtacctag -gtaggtaggGGAATTAACTAACGAATAGCAAGGACGGTAACAATTAATGCCTGAAGAACC -CGTTTTGAACCCCATTTTAGCCCGTTTAGCCCGTTTTCACACCCGTTTTAGACGCGATCG -CTGATCGGACTGAAGCTTCATGGACAGATTGCTGATCGCTCCACTGTCGTTTTGTATGGT -TCCGATGAGCCCCATTACCGAATTCTGGGAGGCTCTTTTTATTCTCAATATAAAAAGTGT -CCACAACTGTACATACCTCGGGATGCCTTGAACGTGGCAGAATGGCGGAGTTGCATGTCT -TATCGATAACGCGTGATTTTTTCATCCCTTGGAACCCTTTCCATCGTGGAGCTGCACGAA -ATAATTGTGCATCCGAACCGTTATTATCTCCGGTTCGGGGCTTAGCGTATCTCTGTTTGT -TTTGTTGAATTCTGTCTATCTCTCTAGGAGGCAACAACATCCCCATTGAGAGATCACCTT -TCCAATAATGCTTCTGAATATCTAGAACCTTGGTATGATCGGACGGCTGTGATCTTTGAA -ATTTCCATTGGAACAAATCCTTACGGACATTTTGGATTCTATTTTCAATGTGCATCTGCA -GCTGCATTTGCATCTCTGTAACAATAACATGGTTGACGTTACGGAACTGCAGACCTTGAC -GCACTTGGGGTAATTATTCCCGTATGATCTCCTGCGCATTGATCATTTTCGGATCTGGGG -AGCCCATGGTCCGGGAGCCGTTGGAATGGGAATCCGGTACATGTCGTACATGAAGGACAG -GCAATTCCAAATGTTGACGGAGAGGTCTAAATATATATAGAATAAGGTGACAGTGCAGAG -TATCCAAATGTCCAGCTGATAATGGAACGGTCCTGTTACGAGGGAACACGGATGCTCGGG -TTCACGAGACCCGATCGGGTCTAGGGGCACAGGACAACCAATCACAGTCTGGGACCCTTG -CATAGTCAGGCCACCGAGTCACCTGAAAAATGCAAGGTGTGCTAAACATAGACCGAGGGT -TTTTTGGGAAACTCTTAGCTTTGGGGGTTTGGGGACTTGATATTCGGGGTTGGTCAGATG -ATGGTAAGCTTCATAGATTCCCAGGGAAGAGTTTCGGGAAATATCTGGGGCAATCGCTAC -GATGAATACACATACATGCAGCCTCGTGTCGGGGAGATGTGAATTATCATATTGACCGGC -ACGACATATCATGCTCGCTCCGTGGAGGACCTGGAGAGAGGAAAGGGTCCTTGTTTTCCT -CTTTGGTTCGGATATCCAAGGGTCGGCGTAAGGAGATGATGAAAGTCAACTTCATCTCAG -GAGATCATTGGGCTAGAAAATGAGAAAAAGGGTTCCGGATTCGAAGAAAGTACCCGAAAG -CTGTAAGTGCTGATTAGAAGAAAAAAAAACATAACAGGGTTGTCGGTGAAGCTGATTGGC -AGCGGATATAGTAAGAGAGCTCGGAGAGAGTACTTGTTACAGCCACAATCCCTGATTGGA -CCTTTAACCTCCCAACCTTGGAAGGTTACAGACAATGAAAGTGGCTTAGTCAAGTGGGAA -AAAAAACATGAGGATATTCTAACTTCTATCAATTCTTTGAATTTCGATCTTTTTTTTTTT -CCATGTTGAACTCCACTTTTATTTTTCTTCGGCGTAGACCAGATTGGTGTCTATCCCGAG -CACTTCACGCTCCTGCTTTCGATACAGTTCCAAGGTTATAGCAGATTGATGTCAAAGATC -ATAGAAAAGAGGTGGATCTCAGTCGGACAGCCCGGAAAGCATAAGTTCTACCATTTGATA -CCGAGTAGAGTGTTGGAGACTCGCGAATACGTGTGTGACCAGAGAAGGTCGACGCCGTAT -ACTCTAGTCCCATACATAGTCCCATACATAGTTCCATACTCTCCCATACATACTAGTACA -TAGGCCCATACAAAGGGTAATATAACACTGCCAAGTTTTTTTCTTGTGCATCCCCCACGG -ATTTCCCACTAGTCTAAGAGAATGTTTAATATATTATTCAGAATTATGAATTCCTGATTA -aaaaaagaaaagagaaaagagaaaatcgaaaatcgaaataaACCCTTATACAGTTTAGGT -GTGTACTTTGACTGCTAATAATAATCCCCCATATAATCATTACTCTTAACCCTTCTGGTC -CCATCTTCCTTGCATCCCACTTTCCATCCCCCTTTGAAATTTATTATGACTGTCTAAGTC -ACACATTCGCTCACCACGTTCGCTACCGCTTACGAGAACCACGTTCGCTACGTTTCGGGG -ATTCATTAATATTTAATTCTTTTTTCCGCCTTAAACCGACTTTTTATATCCCCCCCCCCC -AATTTGGATCTAAGGGTTATTGGCCCGTTTGCGCGATATCTCTGTAGATCCCGTTTGCTC -CGCGGTCTCAGTTGAACTTCCAACGTTCAAGGAAAAAAAGGATTCTCTGGTGTACTTGCG -ACTATACTCCCCGACTGGAGATTGAAATTCATTCGCTTCGATTGTCTCGGCTAAAGACAA -CTTCCCAGAATCTCGCCTGTGGAAGGGTCTCAGAGTCATTAGGAAAATATTGGATACAAG -AAAGGAACTATTCAGGCAGATCGGTTCATCAGCTTAAGGTTAGTTATCAATGTGCCTCCC -TTTGCCCACGGTCTCACTTGCGTCCCCTCGTGCCTTGCATACTTCCTATTTGGGATCTTG -ATCTTGGGTGTGATCTCTCCTTGCAGGGAATTTGTACTCTCTGGACTATACAGGAAATGA -CGATAATGTCAAATCCGTCGGATAGCTTCCCCGTCCCGCTATTGTCAAGTGTTTGTTGTT -GACGAGTAGACTCTTGTTCAGGTTTAACGACCCTTGGATCTTGCATATCTTGGAAGGTCA -ACAGCACCGGTTTCACAGCAGAGCTTCCTTTTTTTTTTTTAACTTCACTTTGAATCATCT -CGGTGAGTCCATTCCCCCTATTCACCGGTCCGGTTCTCATAGTCTCAAAATCGTCTCGTA -TACTCATTAATTGAGTAGCCTATCATCTGGATCTTGTTCATTGTGGCCCAATCTGCTAGA -AGGTTTCCCGAAACACCTCGCGGATCACGCAAATAAAGAAAAGAGAAACCTCTCCAGTCC -ACGCTCGTTGCTCTCGTCTTCCAACCAATCTCGGCTCTCTCCGTCACAATTGGCGGTGGA -CATTTTCTGGACATCTCTTGACTGTTTCTTCTGCTTCTCTTTATGAACCTTTGGGCACCA -CGGTCGAGATCTGAGTCGATTTCTGCAGCCGGCCGTTCCTAGTCTCAGCCTTAGTGAATT -ATTCCTGGATGGTGATTCGAGGCCTTTTGTTGCTGCCCTTTTTGTGAGACCAACACCAGC -TTCATCTTGTCGCTCGTTCTGTCTCGGAGAGCACTCCATTCTTGCTCGTGCTTCTTCGAC -CTACAATCGTTTGGAATAAATTCCCAGTCATTCGCTTCAGAAGGTTCTGTCTCGTTGCAC -TCGCTGCAACATCTTCAAGCTGGCTTATTCGAAGACACACGGTGTTTTGGTAAGAATTCC -CAACCTTTGGACCTGTCTTCCTCTTTCCTTGTGGATGCCGGCATGTTTTGCCTGGATGTT -CCAATCCCAGGTCATCCGGTCTGAATCTTCTCAACCCTTGCCATCTTTTCCGTGTGCACA -AGAAACCTTCCCGCTGACCATGTCCCTTCTTCCGTCACAGTTCGACCCGGATTTTCAAAC -GCCTCTAATACTCGGCGAGTCGTTGGATCTGTCGGACTCAGAAGAATCACAGTTTGTCGG -TGCATTTTCAGACGGCCTGTGGGCCAAGTCCGAGCCCATCTTGTCCATGTCTGCGTTCCA -GAAGCCCGTTGCCGGCGCCATAATCGGTACGTTGCATTTGAAGAACTTGGTTTGCCATGA -TTTAGCAACAGCCCGTATGCTGAGACCACACACAGATTACGGTTCCACTCGATCTTTGCA -TGATGCTGCTTCTTACTCGAACTACGGTCAATCGCCTTATGTGACGACCCCGATGGTTCC -CTCGCCTATGGCCGATCAGGCAAGTCAGATCTCGGATTGTGTTCCATATCTGCCCAACGA -GTACGCTAGCTCGTACGAAGAGTCTCAGTCGCCCATGATGGGCGCACGCCACCGTCAGCT -ACCCGAAATTGTGACATACAGCCCCCAACGGGGAAGTGAAGGAACTAGGGTGATGGTCCA -GATCCAGTGCCCCTATGACCTTCACGCTTCCTCGTACGCAGCTTTGTATGTGGTTTTTGG -CTCAAAGAAGTGTGAATCGCTCCCGCATTTCCTTGGATTCCAGGGCTCCGCGTTCCAGTA -CGGCCTTTCCGCTGATGTACCAGCATTCTTGTCTACTGGATCCCCATCTTTTGCTGTTCC -TCTATCCGTCTTAATGGAGACCCAGGATGACTGTCCCGCAACTTCTCTCCAGGTCGGTGT -TTACACCTACGAACAAGTTTCACACCCTTCGCCATCAGCCGATTCCCGCAAGAGAAAGTT -CTCCTCATATTCTGATGTCCCTATGGCATCTGCCAAGCGACACAATGGATCTGTGATTCC -CAAGGTCGAACACCACGATGGATATCACAGCCGCTCTGTGTCTGCGTCTTACTCGCCATA -CTTGCAGCCCCTTCCAGCCATGGCCGGGTTTGTCGCACCATACCACGCTGCATCATCTCC -TCAGACCGGCTCTGGCCAATACTCTTCGGTGTCTGCCACCCCCCAGCCCGCCCTCCGTGT -CCCTTCCCCTATCACCCCAGCTTGGAGCCCATCTTTCGTTCCTGTGAGCAATGATGGCCG -CAACTCAGGTCTCGCCTTGAAAAACGGTGTTCCTCAGCACAAGGGTCCCACCCAGCGTAA -CGCTTCAAATCCCACTTTGATCCGTACCTCCACCATCCAAACGCACACCATGCCCCACAA -CCCAGCCTTCAACCCCTATGCCATCTATCCCACCAAGGCCATCCTCAAGCTTAACGGTGA -TCTCGATAGTATGGCTCAAGGCTGGTCAAAAGAAGAACGCGCATGTCAACGCCGCCTAGT -CCAATTCACCCGCTCTCAAACAGGCAGCACTATTCAGGGCGAGTTCAAGGCTGTCACTCC -TGAGGATCGCGCCCCGAGCAGTATCTGCATCAGCTGCATTTCATGGGAGGACAAAAATGA -GTGCTACGTCACCAGCGTGGATACTATCTATCTGCTCGAGTCCCTCGTGGCCGTGCGCTT -CACTGTTGAAGAGAAGAATCGTATTCGTCGCAACTTGGAGGGTTTCCGCCCCCTGACTGT -CTCCAAGGCCAAGGTCGACAGCGAAGAGTTCTTCAAAGTTATCATGGGCTTCCCTGCTCC -CAAGCCTCGCAACATCGAAAAGGATGTCAAGGTCTTCCCTTGGAAGGTTCTAGGCCACGC -TCTGAAGAAGATCATTGGAAAATACGTATGTCTCTTGAAACTTTACACCTCATTGTGACA -AATTACTAATCTCCGTATTCCGTTTAGTCTGCCAGTTATTCCTCAACCGCCAGTGCTCTC -CCTACACCAATGACCACCTACACCAGCCACGCCAGCCATGGTACTGGTTCGGATTCCGGT -ACCGAGCCTCACGCCGCCAACTCCCCCCAGTCCATCTCCGACGCTGGTCCGGGTAACACC -TACACAAGCATGCCAGTACAGAGCTATTCGCAGACAGACCCTTCCGCCTCCCACATGGCT -TCGGCCCCTGATCTCCGATCCATGATCGCAGTAACTCAACCCTATACGTCTGTGGGAGCA -TACTCCTACCCGGCCATCTGCCATCCTCAAAGTGCCCACGCCTTGGCAGCACCGGCTCCG -CGACCATCTTGGGACATGCACACTCTTGGTCACAATCCTCCCCACACCGGTCCACCAGGA -AATGGAGCATGCTACACCTACTTAGATCCTGTGTACTCCATGCACGACACGGCCCATAGC -GGCCACTGATCCTCCTGACTGAGCCTCCATCTGATCTCTTGCTGCGTTCTCGTTCCATTC -TATTCTTCATACTTCCCCTCCGCCTTTTTTGAAGATACGATTTTCTCTGCTCTGCCACTC -CTTACCAAGCCCAGACAGCGGGCTGCCGTCTCATCGCACACAAGGTCTCTCCCAACCTGA -AAAGGTAGCGAGGGCCCTTGTATCTGCCAGACCTTGCATCCCGCACATTGATCATGACTT -TATGTCCTTCCTTATACTACAACATTACGATGCCTGATGCCCCAACCATACACCCAAAAA -TTTGGCTTTTGCATTTCCGTTTTTGATTTTTCTCCTCGACATTTTGGACTTGGTTCCACT -TTGAGACCTGTCCCTCGCATCTAGACGGTGTCTTGTTCATTCCTTCCTTAGGCTGCATAT -CCTACATGCCGGGCATTGCGGCTTCCACTCTTTTTTGCGCAAAAAACATACATTCTGGGC -CTTTGTACTACCATTCAGCATAGCGGGCGCTTGCTTGTCATTCCTTTTGCATTTGCGGGT -TCTTTTTGGATTGGAAATGGGATAGACTTGGGGTTTTGTTCTTGGACTATATTGGTTTTG -TCCCTCTTATCCAATTCTTTGGTGCATTTTCGACGGGGTTCTTAGGCGAAGACGTCTGGC -ACTGGAGTCCTCCTCCAGTCTCAGCTCTTCCCCTCTGATCTCTGTTTATCTTTGACTTCG -CTGTTTGAATTTTTGTTTGGAGTTGATTTGATATTTACCACTTCTTGTTATTTCAGCTGA -GGGTTGTTGTCACTTACTACCTACTGCATTGTTCTGGCGTTCGGGACCTGAACGGTTGGA -CCGTTTATCATCCTACTTTTTTGTCTTAATTTTACCACTTCTGAGTCTTAGTTGTTGCAA -GCAAATCATCAGATTATTTTCCTATAACGCATTGTAAATATTCCTTGTTCATTGTACAAA -CGTAGTGATAAACAAATGATCATGGATGGTAAACCAAATCACGTGAGTATATCAACCCTA -CCTACTTACATCTAACCCGAGGTCCCTCCCGAGGTAGGCACGATAAGGTCAGCTGACATA -CTATCTTATCGAAACACGTGACTCGCGGGGCCCAGCGACACCGGTCTCGAAAAAGGGCAG -TTACAGTTGACTCATAACAGACGGTTTGCTTTTTTAAAACATCGTCTTACCCCATCCCTC -ACTTAAGCATCCAGGTGTGACTTATCCCATCTCTGCACATATATATAGGGACTGTTTGCC -TTCCATTGGTCTCTTCCTTTTCTTCTTATCACCACACTCTTTTGCCCCGCCACTCGCACA -TCGCCTTTCACTATGGAAGCTATCAAGCAGACATTCGCGAAGTGCAAGGCGCAGAAGCGC -GCTGCCCTTGTGGCCTACATCACCGCAGGATACCCCACTGTTGAGGAGGCAGTTGATATC -CTCCTTGGTCTTGAGAATGGCGGCGCCGGTAATTCAAACCCTAAGTGAGAGTTGCTCAAA -GGGATGATCTTTACTAACATTCTGGGATCACATCACAGATATCATTGAGCTTGGCATTCC -TTTCACAGACCCCATCGCTGATGGTCCCACTATCCAGACGGCCAACACCAAGGCCCTTGA -GAATGGTGTTACTCTTACTACTGTCCTTGACCTTGTCCGCACTGCCCGCAAGCGCGGTCT -GCAGACCCCTTTGATGCTTATGGGTTACTTCAACCCCGTCCTCAAGTATGGTGAGGAGCG -TATGCTCCGTGACTGCAAAGAGGCTGGTGTCAATGGTTTCATTATGGTCGACTTGCCCCC -GGAGGAGGCTGTCCGCTTCCGTGACCTCTGCTCGAGCGCTGGGTGAGTGTTTTGATTTTT -TCTTGTTTTGAAATATTCATTACCTGTCTATGATAGTCCGAATGCTTACTCTATTCCAGT -CTCTCATACGTCCCTCTCATCGCCCCCGCCACTTCAGATGCTCGCATGCGTCTCCTCTGC -AAGATCGCCGACAGCTTCATCTACGTCGTTTCCCGCATGGGTGTGACCGGTGCCACCGGA -TCACTGAGCGCCAACATTCCCGAACTTTTAAACCGGGTTCACGAATACTCAGGCAACGTC -CCTGCTGCACTGGGATTCGGTGTCAGTACCCGTGAGCACTTCCTGTCCGTCCAGGACCAG -GCTGAGGGTGTCGTCATTGGCAGCCAAATTATCACTTGCGTTGGCAAGGCTCCTGCTGGG -CAGGCAGCCAAGGCTGCTGAGGAGTACCTGTCCAGCATCACTGGACGCAAGCGTGAGCGT -GATGCCACCGGTGCCTTCACTCGCGAGATCAATGTTCTCGAAGCCATTGAGAAGGCCCAG -TCTACTGCTCAGGTCCACGCCAGCAAAGTCATCACCGATGCCGATACCCCCGCCGGACCT -GGTCTCGCTGATCAGATTGAGGCACTCAACGTCACAAAGGATGTCTCCTCCCAACCCTCT -CGCTTCGGCGAGTTCGGTGGACAGTATGTCCCCGAGTCTCTGATGGATTGTTTGGCTGAG -CTTGAGCGCGGCTTCGACGTCGCTAACAACGACCCCAAGTTTTGGGAAGAATTCCGCTCT -TACTATCCCTACATGGGCCGCCCTAGCAGCCTCCACATGGCCCAGCGTTTGACTGATCAC -GTTGGTGGTGCCAACATCTGGTTGAAGCGTGAGGATCTCAACCACACTGGAAGCCACAAG -ATCAACAACGCTCTTGGACAGATTTTGATTGCTCGTCGCCTTGGCAAGACCCGCATCATC -GCCGAGACTGGCGCTGGTCAGCACGGTGTCGCTACCGCCACTGTCTGTGCCAAGTTCGGT -ATGAAGTGTACCGTTTTCATGGGTGCCGAGGACGTGCGTCGTCAGGGTCTTAACGTCTTC -CGCATGAAGCTCCTTGGTGCCTCCGTCGTTGCTGTCGAGGCCGGCAGCCGCACCCTTCGT -GATGCCGTTAACGAGGCTCTCCGCGCTTGGGTGGTTGAACTTGATACTACCCACTACATC -ATCGGCTCCGCTATTGGCCCTCACCCCTTCCCTACCATTGTCCGCACCTTCCAGTCTGTT -ATCGGAAACGAGACAAAGGAGCAGATGCAGGCCCAGATTGGCAAACTTCCAGATGCCGTT -ATCGCCTGTGTTGGTGGTGGTAGTAACGCCGTTGGAATGTTCTACCCCTTCTCCAACGAC -CCGAGTGTCAAGCTCATCGGTGTCGAGGCTGCCGGTGATGGCGTTGATACCAACCGCCAC -TCTGCCACCCTCTCTGGCGGTTCTCACGGTGTCCTCCACGGTGTCCGTACCTACGTCCTG -CAGAACGAGCATGGCCAGATCTCCGATACCCATTCCATTTCCGCCGGTCTTGACTACCCC -GGTGTCGGCCCCGAGCTGAGCAACTGGAAAGACAACAACCGTGCCACTTTCATCGCCGCT -GACGACAGCCAGGCCCTCGCTGGTTTCCGTGCCCTCGCCCAACACGAGGGTATTATCCCC -GCTCTGGAGTCCTCCCACGCCGTCTGGGGTGTCATGGAAACAGCCAAGGTCATGGGCAAG -GGCAAGAATATTGTGCTCAACCTGAGCGGTCGCGGTGACAAGGATGTGCAGCAGGTGGCT -GACGAGCTCCCCCGACTGGGCCCCAAGATTGGCTGGGATCTGCGCTTCTAGGCGCTTTCC -AGTCTTTCTGAGTGGTTTTCCCCCGTATTGTTCCATATTTTTTTTTCGGTCCTCTCTTTT -GACAGGGTTTCATGATTCTTGATGCTTTTATGCTCTACATTGCGGGATGGTCATGGAAAA -GCGTTCTTGCAGACTTTTTTTTCTTGCATTCTTTTTGGTTAGAAATTAGAAAAAGAAAAA -AAAATACACATTGATATCCTTCATTAGTTGATATCACTTTCTCGGTTTTACCTTTTCCCT -CATTTTTTCAAACTGTCTTCATAACTGTAACATAGATAGCTTTAGGCTCGAAAATACCAC -CATACACGAAATCAACGGACCATTTCATGTTTTATAGAGAGCAAAAATAGCCCTGAAATC -GATTCGATAATATATCAACGAACCAAAAGAAAACAACCAAGAAAAATACGAACAACCGCA -AAAACAAAAGATAGGAGTATATGCAATTGACATCGACAAAAATAGGACCTATTAAATCCC -CTCGCCAGGTCGCTCTAAGGCTTTCCACTTCCACCTCGCACTAAAATAGAGACCCCGGCG -ATAGTTTACTTCAACATAGCCTCAGGGCCCAGGATTTTCCTCACACACCACCGCCTCATG -CGTCGATCAGGTTCTCATGCGAGTTATCGTGGGTATCCTTGTTAGGAATCTTACGTGCGT -GGCGAGCGTAGAACTCGTCGAGAGTGCGCTTGGGGACGCGGTTGAGGAGTTCCTTGGGGT -AGATGCGGAGGAGGTTCCAGGCGAGATCGAGGGATTCTTCGATGGTGCGCGCCTCGTAGG -GAGACTGACTGATGAAGGTGCGCTCGAACTTTTCGAGGAATTCGAGGGAGAGCTTGTCCT -CCGAGGAGAGGGCTTCCTCTCCGACGACGGCTTTCATGGCGGCTGGTTGGGGTTGTTAGA -GGTGTTTTTGGAATGGCAGAATAGGGTATTATTTGGACTGAGTTGCTTACCGGCATCACG -ACCAATGGCGTACTTGGCGTACAGCTGGTTGGACACGTCTCCGTGGTCCTTGCGGGTGCG -GCCCTCGCCGATGGCGGACTTCATCAGACGTGATAGAGAGGGCAGAACGTTGATTGGTGG -GTAGATACCGCGGTTATCGAGCTGACGGTCAATGAAGATCTGGCCCTCGGTAATGTAACC -AGTCAAATCAGGAATGGGGTGGGTGATATCTGCATGAGTATGGGTTAGTGAAGTTCAACT -TGGAGAAGAATAGCTTGAACGACGCACCATCGTTAGGCATGGTCAGAATAGGAATCTGTG -TGATGGAACCATTACGGCCCTCGACTCGTCCTGCTCGCTCGTAAAGGGTGGACAAATCTG -TGTACATGTAACCGGGGTAACCACGACGACCGGGGACTTCTTCTCTGGCAGCGGAGACTT -CACGTAGGGCATCACAGTAGGCGGACATGTCAGTCATAATAACCAGAACGTGCTTCTCGA -GCTGGTAGGCGTAGTATTCGGCGGTGGTCAGGGCAAGACGAGGAGTAATGATACGCTCAA -TAGTAGGGTCGTTGGCCAAGTTCAGGAACAAAGTGACACGCTCCATACTTCCGTTCTCCT -CGAAGTCGCGCTTGAAGAAACGGGCAGTCTCCATGTTCACACCCATAGCGGCGAAGACGA -TAGAGAAGTTCTCCTCGTGGCCGTCGTGAACATCTTTAGTAGGCTTTGCCAGACTTGCTT -GGCGGGCGATCTGAGCAGCAACCTCTGAGTGGGGTAGACCGGCAGCTGAGAAAATCGGGA -TCTTCTGTCCACGGGCAATAGAGTTCATCGTGTCGATGGCGGAGATACCGGTTGAAATAA -TTTCCTCGGGGTACACCTTGTCCATCCATTAGCTTCCTGTATTTTGGTCTTATATAGGTG -GTAGTGCGCATACTCGCGAGTAGGGGTTGATAGGCTGGCCGTTGATATCCAGGTAATCCT -CTGCTAACACCTTAGGGCCCTTGTCGATAGCTCGTCCCGAACCGTCGAATGTTCGACCCA -ACATGTCCTCGGAAACACCAAGCTTCAAGCTGTGGCCGGTGAACTCGACTTTGGTCTGAC -CGATCTGTCAGTATATCCCACAGTCGAATCATTCCCATGGGTAATTGTACCTTCTTCACA -TCAATACCGGAAGTGCCTTCGAACACCTAGTTGACAGCGACGTTAATGTCTTGAATATTA -GGGGTGAGATCTTAGTCCGCACCTGCACAACAGCTCGGTTTCCTATGTGTATATATAGCA -GTCAGCTCACATACTCCTAGACTAAACAGTAGAATGTGGGGAACGGACCTCTGGCTTCCA -GCACTTGACCACTGCGCTCTGTGCCATCGCTAAGTGTCAAGGAGACAATCTCGTTATAGC -GCGGGAATTTGATCTGTTTGTTGGTGTAACTAATTAGCTTATAGTTGCTATATCGTCCAA -CCCTACGTCAACTCACATTGTCGAGGATAACCAGCGGTCCGTTGATGCCTCCGATGGTAT -TGTAGCGGAACCGGGGCTTGATGGAGGACATCCGGGGGTCAATAAAGTCGGCCATGGCGA -ATCTGATCCTCCAAATTCAAAGTAGAAGGGCAAGGATGGAGGAGAGAGCAAATGTGGGAC -CTGGGGGTTGGTCTAGCTGTCTCGATGGGGAGCTCGGGCGGTGACGTTGCACGTGATGAG -GTCCAAGATTGCGTGGGCCGGGTCCTGGTTTCTTGGGCGGGCTAAGCCCGACGTGGACTA -AACCTAAGCCTGAGGTATATCTTCGATTGCCATAAAAATAGAACACTTACATTATGGATG -AAGCAATAGCGCAGGAATCTATCCATGTTTTATGGGGTATAAAGTCCAGCCGGGGTCGAC -ATCTTTTATCCAAGGTGTAATGTATGTATGTACTCCGTATATTGGTTGGTCACTACTAAG -CAATTAAGAACAAGATTGCGCCGATAACGTGCAATGTCAATACCGAAGACTCCGAAGCAA -TAACAATCATCAACGAGGATTATGCCAAGAGAGAAGAAATTGTAAGCCATCATAGTGTGA -GATCTTGAGCTCATTAAGAGCAGGGTATCCATACGCATTAAATAGAGACACTGAAAATCC -AAATCTAATGCACTATTCATAAGATAGGCGCGTGGCCAGGGTCTGATCCATGAAAATACA -AGCCAGAGATTAAACACACGGAAATAGAGTGCATTGAATCAGTCCACTTTGGCTTGAGCT -AGACTCTTCGGGGTAATTAGATTGTTGGATAGGCGCACATTCGCAGTGCCCTTGAGCTTT -TCCTTCCATGTATGCCTCTTCAGCTTTCGGCGAGCAACCATTACCGGCACAGAGGAATTA -GATAGAAGATAGTTGGAGAACGAGCCAAGAAGGACACTAAATGCAAGGTTAGCATCAATT -AAACAAGATAATTAAGGGTCACTCACCCTTTAAGTGCACTCTGACCTCTGGCACCCACAA -TAACGAGTGTGGGCTCAAGCTCATCGATCTGAAAGACAACGTAAGTATGGATTGGTAGGC -AGCAATGTTGAGAGGGAAGAAAATACTTACAGCTTCAGTGATCATAGACTTCGGGTTCTT -GCAGTGGATCACTTCCACAGCAATACGGACCTGCAAGACCGTTTTCCGTAGCAACTTCAC -ACAAGTCTGAGACACAGTTTCGACTGCCCTCACCCGTTCTGCTTCGGCTATTGATGATGC -CCGGGAATCGGCAGAATGTGTCCTGCTAGCAGTACCAGAGCCAAGCCGACCTAGGATTGT -ACGGGATGCATAATTCTGGGTTGCTTCTTTCGTCTGGGTGCCAATGACAGCCGTTGCATC -CTTCATTACTTTGGCTCCCTCACCGACCTGCACCGCTGATGTAGAAGTGGAGTCCTCATG -TATAGCGTAGATGGCGAATATCGTGTCTCCGTCTCTAAGAACCGTTCCAATGGTCCACTC -AAGGGCATAGACCGACTCTTCACTCAAGTCGGTTGCGATTAAATACTTCCGGCGACGACG -GCGGCTTCCATCATCCTGTGAGCCTGAGAAGTTCCCGCGGACAATTGTTCGGATGGATCG -GTTGTGCACACCGTTATCAATCGCTGACATGGAAATACCAAGCTTCTGTGCACGATGGAT -GTCACCGATCTCGGCTTCATCCTCTGAGCCGTAAGGTGTGCCAGCCACTGACTGAACACG -TGGGGTGGGAATGTCGAAACTGGTCTGTGGATGGACCACAGCTTTCAAAGCAGATTTTCG -TGGTCTCTTCTCTTGTCGTGCACTTGGATTTTCGGAAGTTTTTTCTATGGAATGCAAAAT -TAGTACGTTGGATTTCCCATTGAACAGAAGTCATCATACGTTCTTCGAACTGCTTGATAT -TGTTGGATTTTTCGAAGTCGCTTGGCGTTATAGATGTGATATCCGAGTGCTTTTTTTTCC -TGCGGCCTCGTTCGCCGTCCAGAGTGCTGTCTTCATCGCTCGAAGTATCTTCATCTTCAC -TGCTTTCGATCATGTTCTTGTCCCGATCAAAAACATCCTTGGCTAGTCGCTCCCCTCCGT -CCTCCACCGGTGTCGGTTTGATATAATCAGGCACGCTTTCATGGGGTTTAGCTAATGTCT -CGTCTGACTTGCTGAATGTGCCGTGCATGTTCAACGGCTTTTCAGAATCAACAGGGGCGG -GCGTCGAGTTTTCAACGTGAACAGAATGACTGCTGGAGGAACTAGGGCGACCTCGTTCCG -GTGAGGCAGTAGATTGGGCCTCGAGAGACGATCCACGCTGGGAAGATCCCCGTGATGAGT -CTCTGTCTCTGCCACGGGAAAAGCTTCTCGGGAAGAGACGGCCAGACATGATAGCAGCCA -CACCCTTGCGACTCTGCGACCTATCTCGAGCCTGCAAAAATCGCTTTTTCGAGTGCGGTT -GCTCCAGAGAAGTAGAAGCGACATCAGATTTTGCACCATCATTAATACTCAGGCGACGTG -AGGTTGAGCGGACAGCATCAACAGCCTGGTCGGACGATCTACGAGAAGCCTCAAGATCTG -CAGTGTGCCTCTCTGGCGAGCTTGGAGCGGATGCCATATCTGAAGAACTGATGCGCAAAG -GCGAGGATACGAGTGGCTCGACTGGGCTGCGGGTTCCACCTCTAAGCGAGTAGGTTGTCG -CGCCACTTATGGACGGGGCGATCAGCAAGTTCTGTAGAGTACGGCTTATGCCCGATGGTT -TATCGTTTATCGGGGTTGATATAGTACCATTCAAGCTTTGTTGTTCAGGAGAAGTTCCAG -TGTGGTATGACATGGCGTACCGCCTCCGGGTGGGGAACTCAGCATTCAACTCCGCCAAGC -ACTGAGGAATGTGGAGTACGGAGTAGATCACCCTGTTATGACGGTGTTAGGAATGGGTCT -TGACCTGCAATGGATCAGTAAAAATTCAAGAAAATAGGTCCTGGCATCTCACACAGTGTA -CACAGTTGGAAAAGTCACAAGTCCAGTCGACATTTGATAGTTGTCTTGATCCAATATTCA -GCAATTGGATTCTGCAGTATGTATGCTCTTGGGGTTGAAAGTAGAATAGAAAAGCAAAAG -GGGAGATTGGGAGAAGGAGCCCAAGGTGAGGCTTTTTACTGCAGCCACAGTGGCTGCATC -TTTTATCTTCTGCTATGTCTGTTTGAAATACACACATTCATATTTTATATTGTCTTGCCT -GCACATATTTTTGGGTATTATCTTAGAGGACTTTCTTTGTATATTAGCCAGAGTAATGAA -AGGGTATCGTGATATGCAATTACAGAGCTCCTATTAGACTCATGATAAACGCCTAGACAT -GACACATGACTACAGAAAACAAAGACCGCATTCAAGCTCTAGCCCTCTTCGACTTCGGTT -TTGTTCCCTTCCGAGCCCCAACAGCAGCAAATGCTTCCTCGGGGGGCTCATCATCAACAC -CACGTTTCTTCCCGGGCTGCGCATACTTCTCCTCTAAACGAGCGAGGAAGCCCGCACCAC -GGTCCTGTTGGCGCTTAGTGATAATAGCGAGAAGATCATCCTCATTCGCTGCAGCCTTCG -AGCTCTTAGCCGAAGCTCCAGCTTTCTTCTTCTTATTCTCAATCTCCTTCCCCAACTTCT -CCGCCTCCCGCGCCTCCTTCTGCGCTTTCTTTACGCGCTGCTGTTTCTTCTTCTCGGGCT -CCTTGGAATATCGCTCGAAATATTCCACTTCCCCATCTGCAATGGCCTGATCAATGATAC -CGCGGAAACGTTCATCGTCGTCTAGTACATTGGACAGCATCACGGTATCGTAGATGTCAT -CCATGTTGCCCTTGTGCGTCTCATACGCAGCGAGCAGGTCCTTTCTCTCCTCGTCTGAGT -TCTGGTATTTCTTCTGGAAGTCCGAGATCGCCCGCGAGTCGAGCATCGCCGATAGTTGTT -CGCGGTAGAAGTCCATCCAGTTGAAGTCGCCATCTTCGCCAAAAGCTTCGTCTGTGCTGC -CGGTTCGGTCATACACGCTTCGTCGGCGCGCATCAGAGAGTACTCCGTATGCCAGAGCGA -TTCGTTGGAATTTTGCGTTAGCGTCTGCGCGAGCTTCTTCGCTCACCTTGTCTACTCTAA -GGTTAGTATGTTCGAAGCTTTTTTTCGGACCTGCGTGGGTTTTCATACCGGGATGGTTTC -TCAGAGCGCTCTTTTTGTATGCAGTTTTGATGGCTTCTGGGGTTGCATCGGATGCAACGC -CTAGAATCCTGTAGAGATCTTCCTCAACAGACGGTATTCCTTGGTCTGAGGCATGATTTT -CAGATTTCGCGTTTTTCGCCATTGGGGGTTCTCGGTGACGCTATAAGTGTTTGTGGGTCG -CTCGAGTGGTAGGGGCAAGAGCTGAGTATGATCAAGATATGTTAAGCTGGGTCAGAATTT -AATAGATGAGAGACCAATAAGAGCGGTAGCCACACTAGCGTAGGCTGTAGAAACGGAACG -CGATTTAGACGCGACGGGGCCATTTCGTCATTCTGCTGCTACTTCCCGCCTGGCCTATCA -ACCCACTTTCAAGTACTCCAAGCTCGACAGCGCCACTTCACTCTCTTCTAATTCCAAGAG -CTACACAGAGCATTCGGTTCTTAGTCAAAATCCTCATTTTATATCTGAAACAACAGCCTT -GCTCAATTCCCTCCCAACTGAGAAACTCAACCCAATCGCAATTGACATCACTCACTATGA -CGACAACCACCTCCCCTCACATACCCTACATGAAAGAATGCCTCTCCCTAGCGGAAAAAT -CCCCCCCAAGACCTACCAATTTCCGAGTTGGCGCAGTCCTTATATCCCGTAAAGACAACG -ATCCCACCTTCACAGACGACCGGATCCTCTCGACGGGGTACACAATGGAGCTCGCAGGCA -ACACACATGCAGAGCAATGCTGCTTCTCCAACTACGCTGCAGTACACAATGTCCCCGACG -ATAAAGTTAGCGAAGTCCTCCCCGTCGAGCCCGGACGCAAGCTTGTCATGTATGTGACGA -TGGAGCCGTGCGGGAAGCGGCTGTCTGGAAATACGCCATGTGTCCAGCGCATCACGCAGA -CGGTGGAGGGCGGACGAGAAGGTATTCAAAAGGTCTATTTTGGAGTCAAGGAGCCTGGCA -CGTTTGTCGGCGAGTCTGAGGGGTGTCGGATGTTGACTCAGGCTGGCATTGAATGGGAGC -ATGTTAGTGGCTTGGAAAGGGAGATTTTAACTGTTGCTTTTGCGGGGCACGAGAATGCGG -AGAAGGAGATCAGGGCCGCATTGGGAGAGAAGGAGACTAATATTGACGATGTTAGTGCGG -AGGAGAGGAAGAGGCAGGAGGCGATGCCGAGGAATCCATTGAAGAGGATGATGGAGGGGG -ATAAGCATACCTATCTATAAAGATTGTGGGGTATGCTAGAGTATGAAAAGTAGCGCTGAC -AGAAATTAGCAATTAGTATGTGCTCCAGTGCCATTTTCGGCTAGTTGAAGGCCATCTTGA -GATATTGACCTATAACTTCGAAATTATCCAGTAGATTAATGGAGGTATTATCAAGATACA -CCTCGGAACTCTACATGAAGATACACAGAACACTTGCCTCGAGCAAACACAAAGTAACGC -ACACCCGACATTATCTATATAGGATCACGTGATATCAACCTGACTTTCAACGTGAGTTCT -AGTTTCCAGTCCAGCATGAAACTCTGATATCTCCTTCGATATCCTCAAAACAAGGATATC -CCGAGGAAAAAGATGACCCAATGGCAACTTTCAACAATGAGAACCTTGTATTCCATCCCG -CGTTCTGCTTCAAGGCATCACCAACGCACTTTACCTGGGTGAAAATGGGTGCAGCCGATG -TGCATCGCCTCCGAAGATCGAGTAACTTTGTGGGTATGTTCTTTCTCTTTCCGAGCTTGC -TGCAAATTTTTTTCTGCCCCTGAAGGTCGACATGCCTAGCCCGCAGGTGCATATTTTCGG -GTCCTCTAGCATGAAGGCTCGGAAGAGAGGCGGTAGCGTGAAGAAGTTCATTTGACATCT -TCCACCAGGCTGTCGACACTGCCTTTTCAATATGATTGAACTACCTATGTCCCAATGCGC -TCTGCCATGGTTTCCCGACATATCCAAACATATTCTGTCTTCCTATTACCCCATATTTCC -CTCCAACCAAAACCAGCCAAGCCTATCTATACCTGTTGTAAAAGCAAAATAACTAACAAA -CTCCCACAGGCCAAAATATATTTTTCTACAACAATCACCCAATTCAATTCGTCAGCCTTG -TAGGCGTCATAGTGGCGCGGACTGATTTCCCATGGCACACAATCCTAACACTAGACGACA -GTAGCGGCGCAACAATCGATATAGCCGTTCTAAAAAAGACCGAACCAAAACCTAGCACCA -CTAGTCAACAAGTCCCCGCCACAGATCCAGAACAACCGGAATGGTCCTCATTTTCTCTCA -CAGCGCCAACAGCCACCAGCCTTACTCAAACAAAACACTTAACCTCCAAAGACCGCGACG -AGATAGACATTTCAGCTCTCCAACCAGGAGCGCTAGTCCGGGTCAAAGGGACACTGTCAA -CCTTCCGCTCACAGATGCAATTAAATCTTGAACGTTTCTGGATCGTGCGTGATACGAACA -CCGAAATGCAGTTTTTGGATACCCGTCTTCGGTTCCTCATTGAGGTCTTGTCTGTGCCGT -GGGTGCTGACTGATGAGGAGATTGAGACGTTGCGTGCTGATGCCGAGAGGTATGACGAGC -GCACGTTGGAGGATAAGAGGCGTGCGGAAAGAGTTGCAAGGAAAAAGGTTGAACGGGAGG -AGAGACATGCTAAGGCTATTGCGCGCCGGTATGAGAGGGAGGAGTATGAGAGGGAGCAAG -AGCTCAAGAAGATTAGAGAGGATGGAAAGAGAGTTATGAGGAAGTTTGGGTTTGGCGCAG -GATGAGATGAATTCCACTTTGGTACCTGCGAGAGAATGATTCAATCTCCTTTTCTACCAC -TACTTCCTTCCTTCCATTTGAGGACATATGTAAATAAGAAGGCGCAACCCAATACAGAAA -ATTTACTATAAGACTCCCCTCTCGAATGTGAACATTCAAAGGTCACCTGGGCAGATCCCA -ATCACTCACCACTAAGCAATGACCTGAGAAGGTTCCTTCTATGTCCTTACCAGGTTAAAT -GACACAGGGCCGGATGTTTCACGGGCGATTCTGTTCTCAGAATCCACTTTTCATTCCTAA -TGATATTGATGAATTATAATATCGCATGTATTCGGATCTGAACAAAACATCAATCACATA -AGATATCGTCAGTCATACATAACCCCAGAAAGAAGGGGTATATTCGTATCCATGCAACGC -CAAAAGAATCAGAATCAGAATCAAGACCAACAGTTAAGACAGAGAGTTAGCAGAGCAATA -AAAGACACTGACATGCAATAATCGCCAAAAGCAAACTCAAGGCACCAGTATATAAAACAA -ACACACCTCAACGGTATGCATACTTGTCATACTTATCATACCAAGGCCCAGACCCTCCTT -TCTGCCCACTTCCGCGATAATACTCATCTTCGCTACCCCGGCTACCGCGGTGATACCTGC -GCCCATCATCCTCAGACAGGTTCGCAGCAGGCGACTCGGTGTACGCCAAATCATGCGCTG -ATAGTCCCGCATATCTAGCCGAATGATCACGACCATGCAAGTCACCCTCAGAATGCCTAT -AGTCACCACTGCGGTGCTTCACACTTTGAGACGAGCTGCGGGAATCATGTGAGAACCGCT -TATCAGCAGAATGGCCAGATCCTGAGCGTGAGTAATGTGGATCGCGGTCAAAGGACTCGG -ACATGCGGTGTGTTGAACTTGGTCCCGATGCCTGTGCCTTGTAGCTAGAGTCTGTGGGCC -GGGGCGTGGAGAAGGAGCTCGCATGGCTGCGGCCCGGGCCTGATTCATCGTAAACTTTGC -CCACGCCTGGATTGGCGCTGTAAGGCTTGCGTTCACGCTCGATTGGCACGGAGGGGGTGG -GTTCATCTTCTTCGTCGTCGGTGATGACAGTCGAGACGGGTCTTGGTTGTGGTTGTTGGG -CTGGCCTTGGAGTCGAGCCTGGGGGGCGGGGTCGGCCTCGCTCGTATGAGTGAGTTCGCT -CTGTGGGTGATGCGGTCGTGGTGGATGATCGGATCGGTCGGGGGGCGGATTGATTGCGCT -CTTCCGGGGTGATGTATGGTAGATCGACGCCGCAGTGGACCATCATACGCTCCTCGATGT -CTTCTCTAATTGCGATATCTCCAACTTCGGGGAAGAGGCGGCGCGGAAGCTCCTTTGGGA -AGCGCTCTTTCCTGTCGTCTGGGTTGTTAATGGGCATGTTGAGGACTGCTTTTTGGAGCC -GCTCGTATTCACGCCCTGGGTTCGCGAGGATCATCAAGGTTGCCCACGTCTCGAAGCCTT -TGGGTGTTAGACCTGGGATGTCTGGGCGCTTTTCTATGTCATCTTGGACGAGGTGATGTT -CGACTTTGATTTCGCGAAATAGTCTGGAAATCGAAGATGTACGATCGTCAAAAATGTCTA -GATATTTGGTTAGAATATGGCATGTCAATGAACAGATCAAAGCACCAACCTTGCCATGGA -TAGGGGTCATTTGGATCATGAGTATCACGATAGAACTTTTGCAGTTTGTGAGGGGGAATA -ACAAGAGTATTTCCCGGCGGGAAGTCCTCAATCTACTCAAGTTAGCGATCAAACTCCATA -GTTCATACGAGCAGACAAAAACTTCACATACCAGATGAACTGCAATCCCACGCAAAAATT -GGCCAAGCCTCTTTGTCGCTTTACCATCCTCAAACAATTTCCCCCAAATCCGCTCCAGGG -TAGTCATCTCCATAGCGGTAGCGTCCCGATCTGGAGGTTTGGGGCGCTCTGCCTTATGGC -TATGCTGACTCAGATCACTACTCTGACTCGATACAGAACCGCGTCTTTCTGCAAGGGGAT -CAGGAGTAGTTTTGGGGCTGAGACGACTATCACTAGTGTAATCGGGGGAGACGTATGGTT -GGAAATTTCCAGTATGGATATTGTCGGCGGCCGAACTGGGCGGGGGGATTGAGCTGCTTC -GCTGAGAGGGTATAGCAGAGGACTCTGCGCTCGGCTGAAATGAGGTATCCGTTTGCCATG -GTGGTTGTTGTGATTGCGAACGGGGCGGGGGCGCACCCGATCCATGTAGGTTGTCTAAGC -CAGTTGTCTTGAGCTGCTGGATCACTGTCGCTGTGATTTGTGATAGAATTTCCGGGGATA -GGTAATCTGTGGAGCCGCTATTATGAAATGCGGAGCTCACTGCATCATCGATATGTCCAG -TGCTCATGTTTGGGGAAGAGCGCTGGGATTCTGAGTGAATGTATGGCTCTGGGGTTGGGC -TTGGGGCCCGCGAACCAGTCCACGGCACCTGTGGCTGCGGCATGTTCAAGTGGGTTTGTT -CGTGGTAAGATGGATATGGTGGTGTTTGCGAGTACCCATGTTCGTTGGGATCTGGGTTCA -TTTTGCCCGTTTGATATGTATCTCTGGCGTAATTTCCATTGTAGGCCATGCAGGGGTTCG -GGAGGTGAGATATGGGTGGGTTGCTCTAATAGCGCAAAAACGTGTAATCCGCAGCAAGTA -AAGGAGTCAATGTAGAGAGGCTCTAGGAAGAGAAAATGCAGATCTGGGAGAGGTTACGAA -TTACCAATATTATGGTGGAGGGGCTAGTGGGGGGCGAGTGAAACGTTGTGGCGATCACAT -CGTCACCAAGTGCAAGACAATTTTAAGATTCATTCTCTTTGTACTCCCTCGACGTCACAT -TGCTCTTTAATATATTAGCCCTTTGTTATATTAAAGTCCCACACATGTTCTGTCATATCG -CTAGACTGGGTTAGAGCGACACGATCGGCAGACTAATGGAGCATGCATATCGCAGTGCAT -AGCAACCACTGAGAGCTTTGTGATCCTATTGGCTTTTATTGCCACAGATTGGGCACTTTC -GTGGTTTCCTAGGCTACTATAGCTGATAATTAATTGACATATAGCAAGGTACTTCTAGTC -ATATGGATGATGTATTGTATATTGAAGAAAACATCGTAGTGGGATCACACTTGAAGCGTC -TCTAATATCCTTGCACCGACCGGATACTAAAAGTATTAAGTCCTATATCGCTCAGCACCA -AGGTCGCATAAGCCGACTATTTTGCTTCTATATACTCGGCTAGCTTATTATGAGTTTATA -GAGACTATCATTAGAGGAATGATATGCCAACTCAAGTACACTATAGGAAGCACTTTGAAG -GTAGAGCGAGCGGTGAAGTCAATACAATGAGCTCAGTGTTCACTGTGCACTGCGATACAT -GCATCTTCCATGCGGTGTATGCATTACAGTTTCCCCTTGCACCTATCAGAGTTGTGAAGG -ACATGATTTCAAAGATCGAGCTATGTCATTACCAACTCTCGGCTGGGATCCGGCGGTTGG -GTTCCGATCCACAGCTCAGAGTATTATTGTGGTCTTGAGGGTTGTTTAAATATTATGTAG -GACTGCCTTTTCATTCTCACATTATGATTTGATACCATTTTTTGGGAATCAACTGAAAGT -TTATTAGATCCCCCAGATACAAGGTGCTTGATATTGTCCCTCTAGCCATCCTGTCTACCA -GAAACAGCTGGAACATGCTTGAATAACCTTGGAATGATTATGCTGTATAGTAATACACTC -TGCTGAGGGTTTATTTATAGTAGGTGGGTCAACGGAACCACCGATGTCATAACCATGGCG -CTACTACCTTTTGTCCCATAGGAGTTCAAGAAAACAAAACTAAACTTGGATTGGTACCTG -ACTCCTCATAAAATATAGCCAGCTCTGTACTCCATTaaagaaagcaaaaaaaaaaaaaaa -aaGGCACAGTAAACAGGGTTATGTAGTAAACAGGGTTATGTTAGGCGCGGACTTGGATGC -CCCCCCACCCTGAAAATAAGCCCTGTCACTTGGAATATGAAAAAGGTTCCAGGTACAGCA -TGCAGCAGTTTCAGGACTGGAATCACTGGAAGCTCACCTTCCTGAAAAAAGTTACAAAAA -AAAAGCCCAATGTGGATAGGAGGACATCATGGGAAAGATGCGCAGGAGTCGAACCTTGTG -TACGCAAATGCGAACGAGAGATCAGGTGTACTGCAGAGATGAGGTAAAGCGACGATGAAG -AGCAAGTCGCTACGTGCAATATTAGGCATAGCCTGGCAGCCACACTTCCCATAAATAGAA -GCATTGTCAATTATAGCCTGAATGTCGTTTGTAGGAAATTTACTGGCACGGCTTGGGAAG -AAGAGGAGGAATCCGCGGACTGCAATACTAGGTGCAGCCTAGCATGGTGGAAACGACTAG -AGAAACCTTCCTCCTTTCCAAGGAGTGCCATCATTAGCCTTGCGGGTGCCAGGTGCCCAG -CCTGGGGCAGGCTCGGGATCGGGAACAGGCTGTTGACGAGGTCGGTGCGCCTACACGATA -TGAAGTTTTGTCAAAACCTGATTTATATAAGTATTGTTGGAATACAGTCACCCACCTTTA -CTTTTGCAAAGCCCCCCCGTCTTGCATTACTATCACTATCCGAGGAACCATCACGCAGGA -CAAGAGCTTTCTCGATGCCACGCTCCAGACACTCCTGTAGTAGAAATATATACATGAGTA -ACCAATGTTTGGTCAGAGCTGATGTATAACATAGTTCGGATCACTTACCGGCTGGCCCAG -AGAGTCGAAGAGATCGACAGACTCAGGGCATCCCGCAGACCCAGACAGAGCCACAGATCC -CGAGTGAGCTGCAGACTCAGAGTTGAGTTCCTCTTCCTCATTATTTCCGTTTTTTCCAAC -CGCAACTTTGTCTGTGATTGCAAGAGGTTGGTCAATAGGATTGAGGGACATCTGATAGTT -CATGCAGGGCTGACAGAGCTATTGAAAAACTAAGTGTTAGAAGCAGTCAAAGTATATGTT -TTCATAGTCCAAGAAAAAAAAAACAGAACATACTGGACTCTCACGATTTCTTTGATTTCT -GGCGAATTGATCGATGGCACGGTAGTGGCCGCAATGGCGGCACAGTTCTTCACAAACTTG -AGCATTTGTGCATGGACCACATTTAGCAACCTGTTGGTCACGAATGCTACTGTAGCCAAG -TTCGAATATTGCCTGGCGGAGGTAGTTGAGCTGATTGCTCGAGTACTGGGATTTTGGGCG -ACGCCGCTTGCAAACACAACATTTGATCCTATGACGATGAGCAGGTCAATACCAAGACAG -ATGAAGAATACTGACTCGTTTGGAATTGGCATTCGGTTCATGCTATTGATCTGTTGTTAG -CGACTGCCTGACTTCTTGAAATGGGATTCGATCAACAGACGCTTCTTTAATTTCCTGTGT -GTATCCCCGTGCAAACGGATTCAAGGGGGCCCATCTCCGTTGCTGAGGCATGGTCAAATG -GTAGTCAAGAAAGTCAGAAGACTCAGGTAAGGAATTAGCATAGATTTGCCGGATGTGCAC -AGAGGACGTGTAGCTATTGAAGACGAGAGATGAAGAGACATGGAGAGGGAAGGACAGGCG -ATTTATACTGACAGGGGGATGCACAATGAGCAAGCTGCGTGGGTTGGTCTTTCCGAAAGT -GAGGGTTGAGGGCTGGTCTCTTCAATCACCTCGAATTTGCTTTTTTATCACAAACATTGC -TTTGCTTACTCTTTTGGCTTTGGAGCCCTGTGAAAAAGAAGGACTGAAGAAATGAAAATC -AAATTGGTGCTGGGAGGCAGAGGCAGTTCAAAGCATTAACTCTGAAGTAGATATATAATA -ACTGAGCACACCGCTAAGCATGTAGGTACCATGCAGACACTACAAATCAATACCCCTAGC -AAACAACATGAAGGGCATTGTGTGCCGGAAAGCAAACACGCAACATAATAAAGGCCATCT -TGGACATCGAGCCTATTGCTTAGGGCTACAACAGACACTGCCGAAAAAATTCTCTTGAAA -AGCGTCTACAAGGGAGTAAAATGTTCAATTGACATCCATTCTCACATTGCTTCGATGCAC -CGCATCTTACAACCCTCTTTGCTATTTTCTATATCTTGCCGCTGATTCTTTCTGTCATGC -TGGCGACAATGCCAAAACAATAGAGGGAAATACCGAAAAAGGACATCTCTAAAGACTATA -TGAGCAGATACCAAAACGATCTTGCAAAGCATTGCCAAAAACACCACCAAGAAGATATGA -ACAGGCCGCATCAGCCGCCGAATATCCCAAGGCTCTTTTAACTTTTTGGTAATGAGTTTC -GAAAAAAGCAAACATATAACACGATGAAGGCCGACTTGGAATCTACCAAGTCTATTCCTT -AGTAGGGCAATGATGGATATAGTAAGTAGACCTCTTCCAGCACATTCCAAAAAAAAAGGA -GTGACATTAAGCCAAACAGCAGTCTCATCTCCCATCAAGGCATCCACATGTGAGACTACT -CTATTTTCTTTCTAGAAAACCATAGAAGAAGCGGAGAAAATCCCTAATTAGAAGCATAAA -TGATATTCTCTAGATACACTATAGGGGCATCCCCCAATTTAAACAGGGGCTAGTTTCAAA -CCCAACCGGACCACGGACATTAGTTCCCAGAGCAATGGATGAAGCTAGTTAGGTATGACA -CACATAGAGGACCCAGTTTGGGCAGCACGACAGAGACAGGGGAGGAAGGAGGGGCATCTA -TCACTTCACAATGTAGGTACATTACCGTGGTACGCAAGTCATTGCGGCCGCCTTAGCTGG -CGATCTAAATATACCCGGAGCCGGAATTGGACACGAAGAAGATCTATGACATGCAGTAAG -TGGCACTGCCACAGTATAATACCTACACTAGGTAGGATTGAGGGCTAGTGCTAGGTAATT -CTGCAAGTGAAGACTATCTGTTGGTGTGTGTTCTGCTGTAGACACCTTCCCGATTCGCAA -AAGCTTCATATATTACTTCGTCTGTTGTCGAGCTAAAATGTAAGGGTTACTGCAGTGACA -GTGGGACCACTTTAATGACATTCCAACGAAAGAAATTAAGTACCGAGGACCATTTCAAGC -TAAATTAGGCCCCTCGTGCGCATTTGGGACGGGCAATCTACTGGACTCAGATCTGACAGT -TCCAGCGATCAAGGGGTTGTCCCGCGGCTTCCACGGCTTCTGGAAATTCACACAGACGTT -TCGGCGTTTGGAGTGGGCCGCAATTTAATGTCAAAGGCAGTGCCGAGAGTCGAAGGTCTG -AACCTCGAAGTAATTTGCCAACGGTGGCTAGAAAGTTCATTAGGAAGTTCGGTACGAACA -TCCCTGAAATCGTGTGTTCGTTTGTCAGCTTGTTTATTTCCGGAGAAGGGGGTCTGTTAG -GGGGGCAACGAAGCAAGAGAGGCGGATTTGCCCCAAACAACCTCTCGCTTTACAACATAT -GGAGCTAGCCGTCTAAGCAAATGACCCCATCTCAAGGCATACCCATTCAGAGACCGTTGC -CTCGCAAAAAAATTGAATATTTGATGATCACATTGCCAGGAACCGCCTTCTCGAAGGCCA -AAGGAACACGGCGCCTCCCGGCAGTTTGGGAAGCAAATAGTTTCCTTTCCTAAGTGGCGA -CACTGCAATCCTCCCCCCCTATAGGGGCTCTCCGCGCCCTCCGCGAGTATAGGGACTGGG -CCTGGATGAGCCCACTGTGGAGAATTCCGTTTTCCCCCGAATTGACGGTGCGTGACTAGG -CGTTTATGAGGGATGCTGGAGCGTCATTATAGGCACAATCACTGGATAAAGTGGACTTTC -TTTTCGTGATGGAGCCAATGGCATTGTTAGCGCATAGACTCCATCGTACTACTGATCTTT -GGTAGGTGATAATTGGCTTGTGGCTAGTGATTGTGGAGTGATAAGCCCCACTGGACGAAT -CATAACTCGAAAGTGAGAGAAAGTGGAATGAGTGACTATTGTCTCTGTATTCGGTGTGCG -GAGTACGGAGTACGATTCCCTATCGTTATATTGTACGCGGTGTGTACAATATCTTCCGTA -CTCCACGTCTTGTTTTTTTCCCAAATAGTTAGACTATTGTGCGGTCTAGAGAAAAATATC -GAAAAAAAGTAAGTCGGCCCACTAAATTCGAACCTTGGTTTCGTTTCGGTACGGCCCTTG -GATTTGCGTTATATTGGGATGTCAAATCATATGTAAAGATCGAATCAGGGGCGAACCCCG -AACATTGAAACAGTGGCATGTTATGAGTTGATGAGTTGAAATAGACTGATCAAATTCTAT -TCAAATCAGCCTTGCAGAATGAATCTGGGACCGGCCATAACGGTTGCCCCAGGGTTCGTC -CATCTGACAGGTGCAATCCATCCCGCTTCCTGCAGGTAATTACTGTGTACGGAGTAAGTT -GTAAAAGTTGGGCTCCAGTTCGATCACGTATGATTTTTCCACGTCTGTGTTCTCGGGATC -GCGAACGTCCCTTAGCCTTGAAGTTAGTGGAGTAACCGGAATGATGCGGTTAGAGACTTG -CCGCAAAATGGCTCTAACGGGCTGGGTTTATGATCTTACTGTCATCACTTTGATTAGAGT -CTATTCGACCATACAGAGTACATATACTCCGTACCGGACTCTGGAATTTTGTAATATGTT -CACACATCCCGCATCTATGGCAACAGGCACTCTAACATGTCTTTGAGACGGGATCTTAAC -AAAGAAACCCGAATGTGAAGCGAATCCTAGTTCCCAATCGGAGATGGCTGGGGAAAACCT -GTCAATGATCTCCCAAGCATACGGGGGCCAGACCCGTGTACCTTTGAACAAAGAGAAGGG -AACCCATCATATGCAACTCCCAGCTCACCGCGGCCTAAGCTACATGCCTTTCTATGCCGC -GAACCTTTCCGTCCACCCGTGAACCTAGTTTACTCATTCTCTTCCACATGGCCGTGCCAG -CAACTGATCCCGGTGTAGTCCGTATGATTGGCCTCAACCAGTCGGAGGATCAGTCAGAAA -AGAGGCAGTGAGATACCTCTTCTATCCGATGCATCAGGAGATCAGGTGGCGCAAAAAAAA -CAAAAAAAAACGAGGCAACAGGATGTCCGGCGAACAAGTATTCAAACTAATAAGCCAGAA -CCAGCTATTCCATGCGCGAGGCCCTGAAACAGGGATTATCCGTTAGAAGCCGAATAGTAA -GACCGGTGTTCTACTATCTCCGTGCGAGAATGCAACTAGCAGAGGGGAAGACCTCCGAAG -GTAGGACATGACATAACCAACATGCGATGGCGAAGTAGACAGGAATCTCCTTGCTGTTCT -CGCCCAGTCTGCGAGTGTCTGCGGTAGTACATCTTACATGGTACATCTGGTACGTCAACG -GAGAAGCCAGCTACATCTCCGATCGAAGTGACTCGTACAAAGACGTGGGAACCAAGGTGA -TTTGTAGGGACTTTTGTTTCAAGATTTCAGGTGGGATGTCTTTGTAAATGGAGATGTTTG -AATGTGTTAGTCTTGAAGTAACACAAAGTAGTTCGAGGCAGCCAATTCTAATGCTCGAGT -CTAATAAATTACCAAGGCGGCCCAACCCCTTCTGACCCCAAAACAGTGCAGATTAGGGCT -GCGTGGACAACGGTTACAGGTTGCACATATTTATATGCCCAACCTCCTCAATTATTAGGC -GCCGTGCATCTGATGATAAATACACTGGAATCAGGTATACAGATATGATCAACATAATAT -AAAGTATGTATAACATTATACAGAGGACGGAGTACTCCGTATGGAAAATTGAATAAATAA -TTGAGTCACTTTTCTTTTTCAGACTTCATGTTTGAGCTCTGGATTTTGTATGGTAAAATA -AGGTACCCGTTACCATGATGAGCGTCCCATTGGTTGAGACGGGATCAACCCGTTATTTGG -GGTTTTAGCTGGTTTCGGTTGGTTTATTTGGGTTATTTGGGTTATTTTCAGtttattgtt -tttggttttttttgtttggtttattttcgtaaattagtttattttcttttttttttCGCT -TGGTATAATTTCAGTATTGAGTATTGAAGGCCGAATTCTCATACCAGAGAAACACCCCCC -CTCTCTCTATGCGCCTTATGTGTGTGCGATGCCTCTTTTATTCTTTCCTTTCATTTCTTT -ATTCTTATACATATCCTCTTATACTATATTTCGATTCTCTCTTATACATATATCAAAGTG -ACCTCATCTTCGTCCATATTTTGCCACGGTGGACTGATCTGCAACGACCTAAAGTCGTTA -ATTTACCGTCTCCACGGCCCCGTCTAGGAGCTCAAGCTGTCACCGCGTCAGGGGCTCACC -CTCGGTCCTCCGATCCCTCCAGTTTCCACCAGAACGGGTCTCCTCCTATTCTGGATTTTC -CTTTTTATTGTATCGTGCCCGGTCCTCAGTCCTTCCAGATCGCCCACTACATGACCACCG -TCGCTCCCCCTCCAACTGCCTCGTCACTCCTTACTTTTCCGTCAATGGAAACCCAATCGT -CTGGCTCTGATTCTCGGACATGAACACCTGCTGCAATTGACATCTATACATACACACTCA -CTCGGAGATAGTACGAAACTTTCTCCTCTACATAATTGTAAGAACGCCTGATCTTCCGAG -AATGTCGGCAAGCTGACCTGAAACCCCCCATCAGAAACCCCCTCCTTCACATTCCTCTTC -TCCCGATCTGCTGGCAGACCTGAAGAGACATACATCATCACGATGACCGTCACATACGAT -ACACCCACCCGGGGGTCACACACCTCCATGGCAACCCTCGAGGACCGCTTCGAAGTCATC -AAGGAGATTGGCGATGGAAGCTTCGGTAGTGTTGCCGTCGCACGAGTACGGACCGCTGGT -GCTCATATCGCTCGCAGAGGCACTATGGTATGGAATGACGAAATTGGTGGTCTATTGTGC -CTCTACTAACTGATTGGTACAGGTTGCAATCAAAACAATGAAGAAAACATTTGATTCATT -GGGACCATGTTTAGACCTGAGAGAGGTCATTTTCCTCCGAACATTACCTATTCACCCCCA -CCTCGTCCCTGCCCTTGACATCTTCCTTGACCCCCTCTCCCACAAGCTTCACATCTGTAT -GGAATATATGGATGGCAACTTATACCAGTTGATGAAGGCCCGTGATCACAAATACTTTGA -AGGCAAGCATGTCAAGAGTATTCTCTACCAGATCTTGTCCGGACTGGACCACATTCACGC -ACACCACTTCTTCCACCGCGATATTAAACCCGAGAATATTCTTGTGTCGACTTCCGCCCC -CAACGATTCTGCTTTCAGCCGCTACTCTAACCTTGTCACCCCTCCTTCCACCCCTCCTGT -CTACACTGTGAAGATCGCCGACTTTGGTCTTGCTCGTGAGACCCACTCCAAACAACCATA -CACCACATATGTCTCCACACGCTGGTACCGCGCACCTGAAGTGCTTCTACGAGCGGGAGA -ATATTCGGCACCGGTCGATATGTGGGCGATGGGCGCCATGGCAGTCGAGATTGCTACATT -GAAGCCCCTGTTCCCTGGGGGCAACGAGGTAGATCAAGTTTGGCGTGTGTGTGAGATCAT -GGGTAGCCCAGGGAACTGGTACAGCAAGTCAGGCGCTAAGATTGGTGGTGGTGAATGGCG -TGAAGGATCGCGCCTGGCCCACAAGCTCGGTTTCACATTCCCCAAGATGGCACCTCACGC -GATGGAATCCGTGCTTCAGCCCCCGATGTGGCCCGCGGCGTTTAGCGAGTTCGTCACATG -GTGCCTCATGTGGGACCCTAAAAACCGCCCAACGTCTACCCAGGCGCTTAATCACGAGTA -CTTCGCCGATGCGGTTGATCCGGTGCGACCCAAGTCCTCCACCTCGTCGAGACTGCTGGG -CCGCAAGCAGTCCGAGAAGAGCTTCAAGTCACCAATTCTTACTCCTGGTGACTCTCCTAC -ATTGTCTTCGAAGCCTTCATGGTTCCGAAGATCGTTGGTCGTTCGGTCAGACAGCCCGGT -ACCTGCTGCAGATGACAGCCCTGCTCGGCCACTAGCTGTTGCTCAAGTCACCGTGCCTGA -GATCCAGCCTGTCAAGCCGCGTCCCGCGAACCCCAAGCGGGCTACCTGGGCAAATGGTGC -CCCAATGCCAATTCTTCCTTCTATCCGCCCTGTCTCCCCCCTCTCCAACTCAGTTACAGC -ACAGGCAAACGCCACTGTGGCGCACAGCGAGGCTTCCAAGTCCTCCGACACATCAGCTAG -TTCAAAGATCGGCCGTCAGTTGTCTGTCAATTCCAATGGGAATCACTACTCGGACGCGAA -TCGCCAAGAGGCTGAAAGGATGCTCAATGGCTCCGGGACCCACACTCCAACCACAAAGGA -GAGTTTCTTCTCGCATCTGCGCAAACGCGCCCGCAGATTGTCTGGACGAAACCAATCAGC -ACCTGCTGACGACGTCGAGGCTAACGCAGGTTGCATCCCCTGGTCGAACCGTTCATCAGT -GGCTCTTGATGGGCCTACTAGTGATCCAAAGCAAGGTTCCGACTTGACCGAGCTGGACAA -GGCTCTCCAATGTGTCAAGTACTCATTGGACTCGTCTACAATGAGCAACGTCCCCGTGCA -CTTGCACAATTCCGCTGACGGCAACACCAAGCGCCAGTCAATGCCTCAGGCGACTAGCAA -CAACGCAGCTCCTATCTCCAGCAGGACTCGACGTGCCATGCAGATGTCTACCCACCCGGT -TCACCGGTACGAGACCCCTGAAGAAGAAGATGAGTTGCTTGACGAGGTGTTGCACTCCGC -CAGCAAGGCGGCTAGACGCCTGGCTCAGACTCAGTCAATGGCTGTCGATTCGCACAGCCG -TACCCACCGACAGTCAGACAACTCCCGTGCAGCGCAGAATCCCTACCCTACGCCGTCACC -TACGGCCAAGGACAATGCCACGTTTGGTCAACAGTCGACACCTAGCAAGTTGAACATTCA -CAAGATTGATCCCACTGCGCTCAACCGCCACTGGCCCACCCCTCCTTACGAGGAGGCCGA -GTTTCACAATAAGAAACCCGACTACTTCGCCCGTGGATCCAACTACATCTGAGTCTCTCA -ACTATCATCCTGAACCGCATCACCTCCTTTTCAGTTAAGAAAAGCACAAAAATTTAGCAA -AATTGCACAAAACGGAACAAACACCGAGCAACACAGCTCAAGGCGTATGGATTCGGGTAT -AGTTTTTGTTTTCTTCACAAGGTCTGGCCATTTCAGAAGGTGttttgttttttgtctttt -tttttatttttACACATGGGCATTGGGCTTTCGCCCTTTCTTGCAAGTTTCTCATTTCCC -CCACTTCATACCCCTACAGGCATTTTATAGATGGATACGGGCCTCGTTTTTTCCCCTTAT -TATGGAGTCCTTTCGTTTTACCACTTACCAGGGGCGTGATTTCTTTCCTTGACATTTGAT -ACCACATGCGCTAGTCGCATCATATACCGCATTGCTACCTTATTAGGTCACCCTCTCTTC -ATCTGATATAAGATATATCGGATCTGCACATCTGCACATCATTCTTGATGAATATTGCGC -TCTTTTCTTTTGTTTATATAATTTCCCCCCTTCTCCCCCACTTGTACATTGTCCGGCTAC -CGCCATTCCCGTCTTTATTCTTCTTGTCCATCGTTTTTCTACCACTTATTTATCCCTGCA -TTTACATATTCCTGTCATAGCCCTACCTGCTTTGATTTCCTGTATCATTTCCTCCCCAAC -CTCCACATGCTGGCTTGGATCCAGTCCGTCTATACTCTTGTAATATACCTCCGCCTTTGG -CCGTTGTGTCATGCCCATACATACATCTCACTACATATGCATTGAATCTACTAAAAATTT -CATATTGAGACTTTATAATTCTTTGTAATGTTCACCTGCTTCATTATCTTCTCCCAAGTT -TATATCGAATTAGTTGGAGCCGGTGGCGATGAGAATGATCTACAGCGTGGGAAGCCGAAA -TCATGGGTTTAATAGCCTGCAGGTTCAATTTGTGCAGAGGTTAAGTTTATACCGTAGAAG -ACAGGCCATTTCATCTCAAAATTACATGTTGAATGATCATCTTGCATCTATACTTGCCAT -ACGGGTACATGAATCCATATCAGCGGAACAATGATCATCCGTCACCCATCAAGATTTCCT -GATTGAGACTGCTCTGCTTCACTCTACCAACTTTTTTTTCTTGTCCTCAATTGTCCCCTA -GCACCATCTCACCGGTCGAAACTAGACTGGCCATAGCTTGAAAGCCGAGCTACCAACACC -TTGGCATATCCAGACACGGCCCGGCAAGCACTTTTCAAAAATCGCCGGCCGATCACCACA -AAATCAAATTGCCCGCCAACAGCTACGAGCTTAACCTGGGGCAACCGAACTCTAGCTCTT -CCCCCAAAAACACAGCGCTCTATTTTACCCGCTATACGCCAACAATAAACTTACCAAAAT -GTCAAACCAAAGACAAGCCTCCTCCGCCTCCCAAGTCCCCCACCTCTCACCCGCCGAACT -TTCCTACCTCTACACCTCCCTCAGCCTACCCAAGAACCCAATCCGACCAGACGGCCGCTC -ACCCACACAATTCCGCCCCCTCTCTGCCGAGACAACAATCCTCCCCGGTACGAATGGCAG -CGCGCGCATCGGTTTCGCAGATGGCACGCAAGCGATCGTCGGCGTGAAAGCCGAAGTTGA -GAAAACTGTTCTCGCAGCCGACACGCTTGACTCGCGCAGTCTGGCACAGCATGGCGATGC -ACTGAATCGCGAGGGCGAGGAGGGTTCGGCTGCTGTCTCGGGACAGGGAGAGTGGGTGCA -GATGTCGATTGAGATCCCCGGGTTTAGGGACGATGATGCCCTGCCTGTGTTTTTAAGTGA -GATGATGCGGGAGAGTCTGGTTGGTTCCGTTGCCGGTGGGAATAGAGATGGGGCGAAGGA -GATGGCCGGTGGGTTGAAGGGAAGACTTGTGATTAACAAGCGGTGGCATTGGCGCTTGTA -TATTGATGTAGGTCTTTCACACCATACATGGAATCTCTATGGAACCAAACGCATTATTTG -GTCATTGCTAACATGTTATTCTCTAGGTACTGCTTctctcccagcccctcgcataccccc -tccccctccTCTCGCTCACAACCCATCTCGCCCTCCTCAGCACTAAGCTACCGAAGCTCA -AATCCACGGGTGAAGAAGATCCCTTCTTCGACGATGACTGGGCCGCCGCGGAGTATCTTT -ATCCGCGCTCCAATACCTCTAAATCTCCTCTCTCCGCCGCACCATCATACCCTGTTCGCC -CACCAGTAACATTGCTTGTGATTTCCGTCGGTGACAACGTCATCTTCGATCCTAACCGTG -AGGAAATCGCTGTCGCAGACGCTGTTCTGGCCATCTCCATTACGCGCAGCAGTGATTCGG -AAACCTTGAAGTTGCTTTCTATCCGCTCAATCGATCCCCCCTCTCGCCTCACTCAGCCAG -GTATTCCCAATGCTGAAAATGTGAATATGCTCGGTGCTTCTGCGGCTCCTGGCGAGGAGT -CTGGTGGCCAGGAAGAGGTCGAGGGTGTCTGGCGTCCACGAAGGGGTGGTGTTAAGCGCA -GCGTCATTGCGCGCATGATTAAGACTGTCCTTGGTAAGGGCGGTGTTGGTGAGGAAGTTC -TCGAGGGACTTGAAGGGGTTGAAGTGCAATGATTTATTCTTTCAATCCATATCTCATGGC -CATGCATGTGTAATAAAGTACAGTATTACTTTCAGGTTGCAACTGTGGTATCTGATTTCA -TAGGTCGCTACTTCTGGAGTTTGGAGAAAAGCGTTGTCTTTTGATTAAATCGTTTCCGTT -CATCGTTCGACTGAAGTTGAGATAATTCTCCTATCGTAGCTGGACTTGCAGTCCACTAAT -GTACAAAATGGATTGAACACCATTTACATGAACATCACTGCTGTAGGGGTGTTCGACAGT -CCCCCTCTTGCAGGACCCAAATGGTACATACCCACAGCCAAACCACATAGACGGCATATG -TAACATATGTAGCCTATGTTACGCTTATGTAGCTATATGACCCGTATATTTGAATGGTAT -AGTATACCTCTCCCAAGGTAAGTAAGTAGGGAATAAGAATCACCAAATTCCGAGCCACTT -CTTAACGGGCTTCTTATTCCCCTCCCAATCAACAAAATCCTTACCGTCGCCACCCTCAAC -GGCAGCAACAGGAGACTGAGTATCGACCTCATTAGCACCTTCATCCTTCTTCTCAGGCTC -TGCCTCCTCAAGGCCATGCGCGATACGGTACTGCTTTCTCTTCTCCGCATCCTCAACGCG -GCGGTGTCGCTGCTCCCGCACGCGCATTGAATTATGCTGCACATGCATGCGCCACACAGA -GAATGCCTGCGAGATCGTGTCAATGGGCGACGTCAGCAGACTCGACCAGGATGGTAGCAG -GTGCGCGAACGGAGAGGTTGCTTTGAAATTTGTTGTGAAGGTGAATCCGGCGCAGGAGAT -CAGGACGCTCTGTATTATTTGTTAGTTAATGATACACTCAATTCTTCTGGGAGATTGGTA -GATCAAGAGCTAGCTCTGAGGATAGACCCAAAACTAAGTCTGCGCAGCCGGCCACTCACC -AATGCAATCCAGATGTGGATCCACTTGCTCGTCAGGAACTTAAACATCACTGTTCCTTCC -GGCGGGAACATGCCCGGGTACTGCGTCTTGTTCTGCTGCAAACGCTCTTTCTCCGTCGTG -CGCGGGCCATACTCGAATGGTTGGCCTGGGGGTGCGTTCTTCGGAGATGGAGTCGTGAGG -CGCTGGGGATGCGAGGGCGGGCGGAATCTATCGGGTTTTGCCAGGACGCGGGGCTTGGAA -TTGGCATTGGTATTGCTTTTACTGGCTGTTCTCAGCGCGGCGCTGGTCGAGGTGAAGCTG -CGTTGCGCGCTTTGGCGTTGGAGGGCGCGCAGAGTCAACGCTGAGGGTGGGCCTCGTGGG -AGGAGGAAGCCGGGAATTGAAGTTGGCATTTTTGAGGTGCTGTTCTTCCCCTTTACTCGT -GTTAAGGAAACGGGCTGATGTTGTTGACTTGATCTAGATGTGGAACCGTAGATGCAATTT -TTGGAGAAGGTTTTATATCACGTGACACCATCGGGACAGCGGATGCCGATTGAGTCGAGA -TGCTCGCACCCGGCTGACCGCATATCCTAACTATTTTGGGGCTGGAAAAAAAAGTGAGGC -CACTTGACCTTGCGAAATTTCACATGGACGGGTATCCATTATTTCCCGTGTTTTTCACCA -ATTGAAAGGGTCTGAGACAGCTCCAAGCTATACAGAGTATTCGTGATTTCATACTACATA -AAGCAACATAATGAAGGATTATTGTCCAAGCTCACTGCCTCGGCAACAGGCTAAATTGCA -TTCACGGCCGAGAATGACGAAAAATCGTTAATAAATTATTTCTGCCTCAGGATCCACACA -TCGGACCACCAGCGGATCAGCTCGGGCGAATGAGAAACCTGCCATTTCGAAGCCAAGGTG -GATCGGGAAGTTGCTCCAGTTCTCCAGGGGGGACGCGCGCAAGATATGACTGGGGGATAT -GGACACGGTTTCTATGTGTAGGAATCCAGTGCTTCAGTTTGTGAAGCTCGGAGCTTGCAT -ATGCACAAGCATGTTAGTCAGGAACCTGCATTCACCTAGGATAAACTGAGCtgatgatga -tgtgatgagatcatgGGTCGGAATATGGAGTGTACATATTACTTCTTTGCCACAAGCCAA -CTGTCATTTCAAGCTGTTCCCATGGATGGAATTCATAAATCCAAGCAAGGAAGTAGAGAC -AAGAAGTGTAGAAGTGTCGGAGTATAACAGACACACCGAAGTTCCCGGTTGTGCCCGTCT -ACCTGAATCATCTGAGTTCTGATTGGCCGAGAGACGGATTACGAGAGGGAGCGTCGGGGC -CAAGTACAAATAACCAGATTTTCTTTTCTCCCTTCCCAGATTTTTTCTGTACAGTACAGT -GTCATAATAACTTCTTATTATCTTCATTTAATACTTCACAATGGCCCCCAAGAAGCAAGA -ATCTTTCGCCCCTGGTAAGCTACCTACTTTCCCCCATTGAGGTCACAGGTTGAACGGGGA -GGCGCGATGGACGCCATGGAGGAAGGAAGTGAGAGTATCCGACTCCTGCGATGGCGACAC -TGCTACCTACTGGGACATCGATTGATACAATTACTCGCCTCTCACATTGCGACTCCTCGT -CAACCTAACTATCTCTCAACTGCGCGTCGCCGCAACCTCACTCCAAAACACCGCACATCA -GTCATTCAACATGACCACCTTTGATGTGTTCTGCACCTTGACGACTATCGACATGCGCGA -GGCCGAGCGTTTAGAGGCCCTCATTGAACACGCCAAAGAAGAAGTCGACATGATCGCTAA -TATCAGCTTCCTTCCAGGTGACGCCGCCAAGGGTGCCAAGCTCTTCCAGGTACGTTCTTT -ATTGTTTTACGCGCAGACAAGGAAACGACAGAAAACTAACAGTTCTTGTTTTTCTAGACC -CGTTGCGCCCAGTGCCACACCACCGAGGCCGGCGGTCCCCACAAGGTCGGCCCCAACCTC -CACGGTCTCTTCGGCCGTAAGACTGGCTCCTCCGAGGGTTACGCCTACACCGACGCCAAC -AAGCAGGCCGGTGTTGAGTGGAACGAGGACACTCTGGTAAGTGCCAGCCGCCACCACAGC -CCCCGATCGCCCGCTAACGATTTATAGTTCTCCTACCTCGAGAACCCCAAGAAGTTCATC -CCTGGTACCAAGATGGCCTTCGGTGGTCTCAAGAAGACCAAGGAGCGCAACGACCTCATC -ACGTACGTCACCCGCGAGTCTACATTTAAGCATCCAACTAACAACCCCATAGCTGGCTCA -AGGATAACACCGCATAAACGACCTGATGTTCTTTTTTATTCCCCGGCTTTTTGTTCTGTT -TCTCCCCATAGAAGTCTGGGATGGCGCCATGGGCTGATTAATCTTCCCAAGGACCGGGCC -TGTATTTTTACTACAATTTCCATGACCCACAATAGATTTGATCTTCTCCTCTTCAGTTTG -ATATTTCACTCTTTTTATAGAACAAACGCACTGCAAACGGGGGTCATGCAAGGTTTCAAG -TGTTTGAGATTCTCCGGATTACCATGGAGGCCTGGGTTATGATTGGTTGATGGGCCTGGA -TGTATCGATTGGGCAAACATGGGCGGTCAGGCACAGAGATACTTGACGCGTACTTCGCGA -GTACATCGATACTATCGTAAGAGCCACCAAATTGGAAGTACTTGCGGAGTAATGCCCTTG -TATCTTTCCGGTATCTGATATGTACAACGCGTGTTGGCCCTCGAGTCTCCAGAGTCAGCA -CATGACCGAGCAGGGGTCAGGCTGATTGGCCCAACCTCCTCGACTTTGGATACAAGTGAG -GGGATGCTTTCTCGTCTTCTCCCTGCATTGCTCAAGGCGTTTCGATCCCAGTTTCATATT -AATATCCAGCGCGTTTGCTGATCAAATCTTGTCCCACAACCACAACCACTATCGAATGCT -ACCGCGGAGATATCCCGTATCGCCATTTAGAATTCAGTACCTGCCTTGtcgcatcgcatc -gcatcataccttatcgcatcTTTGTCGTGAGGTTGATTGCGCATATATCTTCTGTAACGG -CTAGCGCATGAGACAACCGTCCCAGCGCCGTACTAACCGCTCTATTTTACTTACTCAAGC -CAAAGCGGTTTAACCGCAAGTGTTCGCCATTCATAGGCATTGATCTGCGCTTCGTCGCTG -AGAATTGCAGTGCCTCTTCTTCCGAGGCGCGACTATCGCGATACGCCGTCCGGGATGCTG -GATGACTTAGACAACGTCTTCGACGACCATCCATCCCTGGACGCGTCGCTCGAAGACTTT -GAGAACAATACCAACCCCCACAGATCCCCAATGTTCGCTCTGCCATCCCAACACTCCGGA -TTCCGGTCTGAAGACTCAGATGGGGAAGATGCAGAGATCGAGGACCCCGCCCAGGAGCGT -TGGTCACCTCCAGGCTTCCACCAACCCGAATACGTCCAGGGCAGTGGATGGTACCGCCAC -CAGCCCTATCTGCGCAAAGATAACCTCAATACGGATCGCTTGCAGCTCAAGCCCACAATC -GGGCTCAGTATCTCCCCGTCCCAGTCGCGCGAGCCAAGTCCACAATATGAGGATGCGCTC -GAGAGCCCAACGAAGAGTAGACAGGTCGATCGTGCCACCCCGGGCGACATCTCTGTCGCG -GCTAATGTGCCTCTACCGGCGGGTGCCGATACCCCCCTCCATGGAAGATCCCCAAGTCCG -CCTCCGGCTCCGACAGGACGAACACCTCGCCACAGTCCCGAGGACGACGGGGTGGGCTTT -GGTGCGGAAAATCTGAGCAACTGTAGGCCCTGTCTGATCTAGTTTCAGTTGCAGGTGCGG -GTATACTAACGATTGTTGTGAATGAATAAAGACATTCGCTTTGCAGTGCGTGCAGAAGTC -CAGCACCGAGAACCCGTTGCCGCATTGTTAGGCTTTCTGCGCTCAAAGTTCGAGCGCATG -ACCAGCTCCAAGTCTAACACCACCTTATCTATCATAATCTTCCTCCTCTCAGTCGCGTTC -ATGCGTGCCCTAGTCCTCCCAGGCCTCCCGCAATCGATTCCCGATCTAGTAAAGCTATCC -GGCTTTGCGCGTTCCTTCGAACCACTGATATACTACTCAGAGAACGGCGTCCAGCAAATC -GGCACCCTACAAGAAACCGGTGTCGCAGTCTGGGATCTGAGCGAGTCTGTCCGCGGCACA -AATATGACCAGCGCACCCATAATCGTGCGCCAACTCGATGAACTCTCCGACTCCCTAAAG -TCCCTTTCCCTCGAGCTAACCCGCTTCTTCGCAAATGTCGACTCCGACATCGACTCCATC -CTGATAGTAATGGACTGGGCCAAGCGCGAGCTCGAAACCCTCTCTGCGCAACCGCCAAGC -ACCCTCCCTACAATCGTCTTCGACAACATGCACAACATGCTCTCACGTCTCGGCGCACTC -GAACGCTCAGCCCAGGTTTCCGACAACGGCGTCTCCGCAACCTCAACGACAACAACCCCA -CTCGGTCACCTCGTAATGGCCGTCTTTGGGCCAACTTCCGCACAGCGCACACGTACAACC -CTAACCCGCACCTTCACAGAGTTCCTCTCAGTCCTGGAGGAATCAATAAACAGCGAACTC -ACCCACTCAACCGCACTATTCGCGCTATTTGAGTCCATCGACCGCCAGTTCCTCAATCTG -CAGCGCACGGTTGTCCGCGAATCAGACGCTCAGGAACGCGCAGAGGGCGAGATGCTTTCT -TCTCTGTGGACGCGCGTGCTTGGACCGGATGCTGCGGCAGTTCGCAAGTACGAGAAGAAT -AAGAAACTCCTCGCGAATGTCCGCAGTCGTACGGTCGCTAATAAACACCTTCTCGTTGAC -CACCGCGGCCGACTGCTCACGCTGAAGGTCAACCTTGAAACGCTGCGCAGGAAGCTTGTT -AGCCCGCTTGTGCGTCGGAATGACTCTGTCAGCTTTGTTGGCTCGATCGATGGAGGTGGG -GGGAATGGGAATGGGAATTCTGGCTCTACAGGCCGAATGCTTGGCCCTGTTGAGGCCGTG -ATTGATGGACAGATCCGTGGACTGGAGGGGACGTATGATTATCTGCGTACTGTTCGCGAG -AGGCAGAAGGCGAAGCTCATGGAGCTAGTCTATGGGGCTGGGAGAAAGCCGAGCCGGGCT -ATGATTGATGGGTTGGATGATGATGAGTGAATGGCTCCAGGCCTGATCTTATTTAATGCC -TTTGCTTCCTTTGACCTCATTTGTGGCATAGCATCCTGTTTTCGGTTGGATTCTTGTCTC -CTCTCTGGCGTTTTCTAGGGCATATATGGCAGGGCATCTGCGGCTGCGGGCTTTTGGGAA -TATGGTGGCTCACCTGCTGTGCCTCTTTGGATGACTGCTGTGTTTTTGTTTTCTCTTGAT -TTTGTGGCGAATTATTTTGTGGCGAATTGGCAGGATTGCGAGGAAAAGGGATAGAAACAT -TGGCGCCAGCTTGTTTTACTTCACTTCTTGGAGCCGTGCAGTTATGGGGTTCTCTCGTGG -ATGTATGAATGAACTTTCTTGCTGTTCAGAGTCATTGTTTTCATGCAGCTTTCATACAAG -TCCCTGTTTATATAGAAGACTCCTTTATACTGTTTACGCTTTCTCCTACCTCATGATGTC -GTGTAGAGTTCACAGCTTTCATAGCGACATGATGATCAATCTTCTAACGATGAAACCGTA -GGGGAATATTTGTAAGACATTTTAAGGCACATATGAGAGTTATCCTATATTTGGGGTCCC -CCATATCATATCAAGGTAGAAAAATTAGAGCCCCTACGTTGTTACGGACAGAGGCTGTAT -GTGCGGCTGACTACTCTGGATGTAAATGCAAATTGGATCCTCATTCTTTGTTCCTCTGCT -TGTTTGCTCCCCCCTCCCCCCCTCTGATAGACCACCATTGTAAAGTATTCAAGCCAAGCT -TACTGAGTTCAAGATGATCCTCATTCCCAAGAATCGTCAGGTAGTAGCTCTGGGGATGAC -CTTCTAGGAGCAACGTAACTGTTGCTTCCCCACGAGCCTGTCAGCTCCGCAGAGATGTGC -CTGAGCAGAACCCCCCTGACCAATGAAAAAGGGAGGTGCTCAAGGCACCTGTGCGCCGGG -TCAACATGCTGTTCAACTGATATCAGCATAATAGTCAACGGTGAATCGCTCGTTAACGCA -GACGCAGAGATTTCTATTGAGCTAGCTGCAAGACTATGTAGAGACCATGCAGGGTTCTCT -TCCCTTCCAAGTTCCAACCTTAGACCCAAGGTTGAGAGGATTTATCTAGGTGTGAATAAA -ACGATTTCTCTATGGATCTATGATCTTGCCTAGTTTGAATACACCTAGAGACTTCTTAGC -ATAGAACACCGCGCAAGTCATATCTAGGTGCCAAAACCAAGGAGAAAGTGAAAATGGCAT -GCTTTTTTCAAATTAAATTGAAGAACTGTTGGGATTCTATAAGAATTACAGCTTCTGCCA -TGGTCAAATGTGTTCGCCTGATACAAGATCGGTATCAGATACCCGGAAGTGGTCTCTTTT -GCATATCGTCGATTTCCATCCATTATGAATGCCACATGTTGTGGGATGGGTCCTTGAGCC -AACGTCTTCAATGCAAATTGGTTGATATATCTAAAAGCATTCTCCAAGGCTTTATAAATG -AGATGGAAAGTGTGCATTGTATGAAGTGGAAGATTTCCAAGGCAAATCGAAAAGTCTCTC -CCAACCCGTGGAGTTGATGTTTATACGGAGCTCCAGCTTCTTTCCTCGAAAGATCCGCAA -ACATGTATCACAAGCCACTCAATTCTTTGTGCCCATTCCAAATACTCAGTAGTGTCCAAT -CGCAGAGGCTTCTGACACGGGATCATGTCTACAATGAGCTTGTTTGTTCCCGAGGCCATC -ATCATATGTACCAATATCAATGCCAGTGATTTGGCTCATTTGGATGTCTCAGTTGCACCT -TTCGGGTGCTAGGTAAGTCCCGGAAGCAGTTCACATTCTAAATTTGACCGGCATCTACAG -TTGAAATTGTCTGTTGTATAAACTAGATTCATATCAACCTTGGGACCTCTGGCCTTAACA -CTCTACATACGCGCATATATGCCGGCATGGATACACACAAAAGGAAGCAAAAGCAATCCT -TAATCACCGCTTATTATGTCTCTTGATATGTGTCACTGGTGTTACTTTATCTGCTCTCAA -GTATGACGTGCGTCAATTTTGTGTATGCAATGTTATATCGAGAGATAGTGTACCACTCCA -TCCCGAATTCAAGGTTTTGATAGGATTGGTCGGTTTTGATAAGCATTGCCCTGAGGCTTG -TATTGACGAACAATGGCGGCATTTCCGTCTGAAGTTTGCTTGCGATAGGTCGGGTTCATA -TCTTTATAATGCTCTAATACCTTCGTGCTCTTCTTGGGTTAGACGGGCTATTTTCTCATG -AGAGAGGACGAATACAAACTTGGTAAGGTTGATTGAATGGGACAAAATCCTCAAGATATC -AATACAACATGGACTTTCCGTATAGATCATTTCCTGATCCTCGTTTCCCTTCCACAACCG -AAGCCACTCTGCAGTGGGAGATGATGCACAGCCATGTACCGTCCACCTTAGATTCCCGCA -ATTAGGTGTGAAATCTATACGAGTCCATGACCTGAAGACCAGAATGTTTGAATTCAGTGG -CTAGTCGAAGTTCCACAAAAGCATCCGTATCCGTCGAAGCTTCCCTCATACCTTTCCATG -TCCAGCACGCACCGGCTAAGTTCAAGAAGACCTCGTCCCGCTATCTAGCAGGACCGAATT -CCTTTTTGGTGGCATAAAGCCCTTCTTGTCGTATATTAGATGAAGGGAATCAAACTGGTA -ATTGGCAGGCCCTATCTAGCCTCTGGATATTCGAAGCCACCATGAAACTCGAACATTGTT -ATAATGGGAGGAGGTACTATCGCGCTGACGTGGGACCTCCACTTTGAGTGATCCTTCACT -TTGGATAGCCTGAAATTGTTTTGCTCTGGCCTGAGGTCGCACGTTGCACCTTTGGAGCGC -ATTGGATATTGCTGGGGAGAACTGAAAGGGAATGTCGCCGAGAAAGAGAGACAGAGAGAG -TAGGTGGGGTTTGGTATGGCCGATGATGAGGAGCCAAATTCCAGATAGTTCGAATATATT -GAACGGCGTGTGTTGAGGAGGTCGAATAGGTAGTCGATTTGATTGGAAAGGCCAGGAATG -TGTTTTTTTGTCCGCGTGTCCCGACCACAAGATATCCCTGGGGAAAGGAAAAGGGATAAC -TAGACCTCTCTAGTGAATGCTAGTTCTCCCGTAACGGTATGGAGTACCGTCAAAACGCCC -ACAAACTGCTGGCTCATGCCTGGAGCAGAGGTCCTGAAACAATGTCCCAAATGCAGCTCC -CTTTCGATTGATTGACTGCGACCTCTTTGATAGAGGCCCCTTAATCTGTCCAATATATGC -AAGCATGGGCGTAATCCCATCCACTAAAAACATACTCGCAGAGTTAGCAGAGGATTTCCA -ATGTCTAGAAAATACTTCTTACCGGGAACCAGGGTCTTTTTTTCATTCGTGGACGGAGCC -TGTGATCCGTGATTTTAAACCTGGCCGAGCACCTCCCTTTTGACGGGGAGCGTACATGAG -GCACGAATGAAAATTACTGGATTCCCAGATGCCGACAAATAAGTCGGGCAAAAGGGTGAG -TGGTAGGAAGATGGGAAAAGTAAAGAGAGATTCGATCGCGGAATTTAAATACAAACCCGT -GGCTTCCTAGTTAGGTAGGTACGCATAGAGAGGAAAGATCGATTCATATATGACGTTAGA -ACTTGAGTATACATGTCACATATCAAGGTGAGATTTTATAGATAGTGCTTCGTATATGGC -CTATAACTATAGTGAAAGATTTTTTTATAGAGAGAAAGAGTTTGTTGGGTTCAGTATGGA -ACGGAGTGGACAGGGAGAAAGGGAAAGAGGGAGAGATCCAACTGGCTACATGATCCTGAC -CTGATGACCTGATTGGGGCCTGAGGCCAGCAATATAAGTTGGAGATTCCATGAATCAAGA -ACATTCATTGCTAAGGGTTTGGGCTTGGTTATGGGTATCTGGCAGGGACCATCTTCATTC -GACTGAGGAGTTGACCGGGTTGCACGTAGATGAATTCTTTTACACAATTAATCTATTTCC -TATCCTCATGCTTTGGAGCCTCAGTCGGTCTTCGTGATAGTACGCTTATGCTCAAAGCTT -GTACTTGCAACATCGTCTTTGATCAACCGAGCAAGTCCACCGCGCTGTTTCGGACAATCT -TAGGACCTAGGACTGCAGCCAACTCGGCAATGATTCCCTTGATAGACATTGGCGAAGTCA -TTGACATGGTTTCCCATTTGACAAGATCCTAATTGAGATTAGTAAGGACGTCTCTCTGGG -CCGAGAGAAGAGATAAATGCTTACCTTCATGTGAACAAAGGTGTTGTTAAAGAACAGCAG -CCAATGATCTAAGTCCAGTGCTTCCACTTCGGAAACAGTAAGGACGCCGGGCATACCGAT -ATCAAAGAGGGAAACCTGCTCTAGATCAGAGTCACTTTTCGGGACTTGGGGCGCAGGGGA -GGATGAACGGTCCCGAGACAAAGAGACATAGGAACGAGAATTAAGCTTCTCTGTCATTTC -GCCATCTAGCGGGTCCCAGTCGCCGGATTCAGGGTCTGAGTAACAGAGAAAAGGGTCAGA -TGTTGTGCACAATTTTTACGTAGTACAAAGGGTATGTATCAAGAGACGAGTGGGACATAC -CAAGGATGTACTGGGGGCGGAAATTGGCCTTGTAACGCATCTTCTGGCAGGTGTGAATGT -AGTACCCTACATTCCGATTGTCAAGAGCGAATCAAATAGGGACTAGTTTTACAATCGTAA -CCTACCCATATAGTAGTACATGTACTGATTCTCGATCGAGAAGGCAATTTCGCGCATGGC -GCTCAACTTGCCGATTTCCCATTGCTCGTAGGCGGGATCATAACTATGTGGTATGTCAGC -TTATTGCATATTTTGGTGTTTCAGGGTGTAGTCAGCCATCAGCTTACAAGACGTAGACAG -AGCTGACCCCACTGGGGACTAAGTCCAATACCGCAACAGCAATGAGCTCGCCATCAAGAC -GATAGCACTGATGCCACGAACCAAGTTTCTTTTCAGAAGAATTGGTAGCTGTTAGAGGAT -TTGGAGTTCGCTTGATGCCTGAGCATAAGAAGCGTTGAAAATCCTTGGGTTTCCAGTGAG -AGACATCCTCTTTATGGATGGCCGTTTGGTACTTGAGAAACAGCTCGAACTTTGCCTGAG -ACACAGAGTCACCTTCAATGGTGATTTCAAAGCGGTGCGCTGGTTTAATAGGGAGTTTGG -TGGCAGGGTCGATTGGCCGTTTGACGTGGTCGTACTCGCTCTGGTGGACAGCTGCCAGAA -GGTCGAAGTTGCACTTCCGATGCTTTTTCTCTCTTTGCATTGGTTAGCAGTGATCTGATA -TCAGATTTCCTACATATAGGATTGCATACTCGCGGGTTCGCGGACAAAGCCGGGCTGCCT -TTCGTATATACTCGGGACCTAGGATGAATTTGTTCCACTTGTTGATTGCCTTGCGTTGAT -CGCGGCGGGGCCGATAAGCGGATGCCTCTAGTCTCAAAGTATAGTGAGGACAACAAGACC -GAAGTAGGTTTTGTTTGTAGTAGAGATTTCCCGATCTAACGATGTCAGTATGGTTTGGTG -GGTTACAGGAAAATTAAATCGGGGAATCATACCGCCGCCAGCCGCCGTTGATCAGTTCTT -CATAATGTTCCGGGTGTACGGACGTGGAGTTGGTGTAGTACGATTCACCTTTGACATGTC -AGCATCCTTTTCTAAACCTTTAGCCCTGCGGGAAGAAAAGGGACGAAACAAAACAGGGGT -AGGGACAACAAACTTCCATCTTTGGACTTGCAGTAACCACACGAGTTCTTTTGGTATCCT -GACAGGATCACATTAGTCTTTGCATAAGAGGGGTTGGACGATCCAAGAGCTGCAGCAATT -GGCGTTGATATCCCCGCCGGATTCCTTAAGTGGTAATGCTCTGGGCCAGTGGAAGCGCCA -CGGGGTAACAGAGGTCTATCTCAGTGGTTCCCTGCAGGAGAGTGGGACGTATTAGACGTA -CCCAGTGGACGGAAGAGCGACAACTTTCGCGCGCGTTCGGCGTCAACCCGCTGCATGTTT -AGTTTAGAGATGTGTTGGCGTGCACCGAGTGTGAGTTGTTGTCTCGGAAGAGAGGAGGGA -TTGAAAGTTGAAGTCGGGAAAGTCGGGGGTTTGTTAATACGGTCTATACGGTCTATACGG -TAAGGGGGATGTGTCTACGTTTATGTGGACTGATGTGTTTTAGGTCTATAGCTTTTTTAG -CCGTGTATGTATCTCATCGCTCAAGCCACATAGCCGATCTTATAGTTTCTGGTCAAATCG -CCCGAGTTTCCTCCATCTCGGTACCAGCAAATCCCGTGAGTTTTAAGAGGCTCATTACCC -GCACATTTCAAGGGTGATGTGAGCTGAATTATAGGTTACTAATCCAACTGATTGGAGATT -ATATTCCATCATCGTATATTATATTTTCAATTCTGATAGATTATAGCTAGTATACCAACA -TATATAGCCATATAGTGAAACATCCCAAATGGTTCTTTGATCTAGGGAGTCACATGATCA -TATATAGAGTGAAAAGGAGAAGAGTCAAGATCATTGAACTTATTCCAGGAATTCTTTCTA -TAGTGACTTTAGTGACTCTAGTGCATGTaacaaagattgtaagaatgcaaagaaattaat -aaaaaaaggagagacagagagaaaaagaaaaagaaaaaAAGCAACGTACCTTGACTCTTC -TCCCGGACTGCGCCGCCGGTGCCCCTCCATTCCCACTTCTCTTCATTTCTCCATTCCTCA -ATAACCTCCCATTTCTTAGATCTTCTTTTCAAAACCATGCCTATTTCCACTTACCATTCC -AATCCCCCGTCAGGCATGAACGGGAAGACCTGATCTCCATTGACCTCCGCAAATATCCAA -AGGGAAAAAAAAAATTCCCCCCGACCGCCCTCGCGCTGTACTCCCGCTCTACACCACCCC -CAGACACCTGGAGCTTCCACTCTCACCTCAGAGACCTGACAGGCTCCGAGGGAGCTTGCG -AGAGCATCATCTATTGGATTTTCCTTGATCTCTTTGACACCCTTGGTCTTTCCACTCTTT -TTTTTTCTCTATTTCTTCATTTCCCCCGGAGACTTTCCTTTCCACTCCCCCACTCAAATC -TCGGAGTAAATCCAAGGATACTCGATTGTCTATCACAATGGCTCAAATACGTTCCATGGA -AGCCCGCGTTGGGCGGAAAAACCAACGTGAGTTATCGCTAATGTCGCTTTGCGGCTGCGC -ACGTGGGGGCGACCCGTCCCCCTTACGTATACACCGTTCCTGTGTTTCCTGACTCTAACG -GCTTGTCATAGGCTACGGCTCAAAAGGCGAGCGACTGGTAGCCGGCGTTGTCCCTATTTC -CGCCGACAAGACCAAAGTTCTCATGATTCAGTCCGCAGGACCCGGAGGTTGGGTGCTCCC -CAAGGGTGGATGGGAGCTTGATGAAAAAACCGCTGAGCAGGCTGCATGCCGCGAGGCCTG -GGAGGAAGCCGGTGTCATCTGCATCGTTATCAGAGACCTGGGACTCATTCCGGACATGAG -ACCCTCCGGCCTTCTCACGGCGCAAGCGCCCAAAGCCTCGTACCAGTTCTTTGAAGTCTC -CGTCGAGCGAGAGGAAACGGAGTGGCCCGAGATGCACAAGCGGAAACGGCAATGGGTCCC -TTATGCCCAAGCCGCCACTGCTCTCGCAAATCGACCCGAGTTGCTTGAAGCGTTGAATCG -CAGCTCGTTAAAGCGGTAGCCCCTATCGCGTGCACATTCAGTCCTTTTGCATATCAACTA -TTCTCCCCTCCTCACTCTTCTATCTCTATGTGCATCGCTTTTCTGAGAGAGCTACTCCCT -GATAATGACTCGACATGACCAACTCCAAAAACACGGTGGCTTCGTCGCTTCTACTATTGG -CTTTGTCTACCTTTTTTTTGCATCTTTTTAACGGGCTGCCGCTACCCGTCCTCTGATTCA -TTTGCGAATGAATCATATGAGCCTGCGTGCTTTGCTTTGTCTCGCTCTGATAGACGCCTT -TCACGAACAGGCTTGAGGGCTTTGAGACGTTCTTGCATTGATACTGGAGGATCCCTTGTT -TTATCGACGGTGGTATCGGACTATTTTATCTGAATCCATTGTTTGCACTGTTTCACTCGT -CAGATTTGGGTCGGGCGCCGCACTTCCCTTAGTTCTAGATACCAGTCTTTTGCATGGCTT -TTTGGATGCTGGATCCAGTGTCTATATCCAGCAACTGAATCAATTCTGAACCACTTCCTC -CGCTCTCTAACTCGAGAAAGAGAAGAAGGGAATGAACAGCAAAAAGTATTTATTATAATT -GTAAATGTAAATTAAAAAAACGAAGATTTGATATTGGCACTTTTCAGAGGGTGTGACCCG -ATTAGGCAAAGCGCAAAAAGCTGTCCGAGGGAGCAAATCCGCGTGCCCTCAAGCCAACAA -CTTCGCGCTCGAATCATCTACAGCAACAGTTCTCATTGCCATCTCACTCACCATTGTATT -CCGAACCATTCACCCTCACTTCTTACTTGTATGACCCGCGGATAAATACCTCCAGCTTCA -GACAAGGGTGGCGGCGAGCAGCGCTGCCGCATACATACCAACATGGATTACGAAATGGAT -CTCGAGCCCACAGGGCCCCAAGTGACAGTTCGCGAAGTAAGCCGAGAGACATCTACAGAG -CGCCAGGATGAAATGCTCACAAAACTCACAGGCGGAACCCTACCGCGTCGACTTCAGACT -AACTGCAGTCGACCTCGCATTCGCCAACTCCCTCCGACGAACCATCCTCGCCGAAGTGCC -CACTTTGGCACTCGACCTAATCGAAATTGAGAGTAACACATCTGTCCTCCCCGATGAGTT -CCTCGCCCACCGCCTCGGCATGATCCCCTTGAATTCGTACAactgcgaccaagacctcga -ctacacacgagactgcgactgcgaAGACCACTGTGTGCGCTGCAGCGTGACGCTATCTCT -ACACGCGCGCTGTGGTAGTGGCATCATGTCAGTCTATGCGCGAGATCTGATTGTCGTCGG -CGAGCGGATCAACGAGAAGATTGGTGATCCTGTTGTCACTGACCCGGAGAATAAGGGTCC -TTTGATCTGCAAGCTGCGTAAAGGGCAGGAGATTAAGATGACGTGTCTGGCGAAGAAGGG -AACTGCCAAGGAGCATGCAAAGTGGGCACCCACTGCTGCTGTCGGATTTGAATATGACCC -CAACAACAACCTGCGCCATGTGGACTACTGGTATGAGCAGGATCCGGCGAAGGAATGGTA -TGTCTTTTAATAGTTGATTTGCTATCCACAGGTTACTTGGATCAAGCTGCTAACAACGAC -TTTTATAGGCCTGTATCTGAAAATGCTGCCTGGGAACCCGCTGCCAACCCTGACCAGCCC -TTCGACTACGATGCCGAGCCTAACTGCTTCTACTTTGACGTCGAGAGCATTGGTAATCTT -GAGCCCGATATGATCATTCAGCAGGGTATTGTGTCCCTCCAGCGCAAGCTCGCAACAACC -GTCTCTGTCCTCCTAGGTGAAGGCGAGGATGGACATGCCGGGGGCGCTGAAGATGCTGAG -ATGATGGGTGCTAACGACCCCGATGCTTATGAGCCACCCGAGGGCATTGATGGAAATATG -ACTGCCTATGGAAACGGTGCGGCTAGTGCTTGGGGTGCTAGTGCACAGACACCGTACGGT -GCTACGCCTTACGGACAGAGCAGCTATGGGTTCTAAACGTGCAAAACCCAGCAAGAAAAA -TAGAGCTCGAGCATGCGCAATGGAGTTTTGATTTCCTGGTGAATATGGGGGCGCACTCTG -GAGTTTTCCGTTTTTTGTACCTTTTGCTGTTGGAGACGGTTTTCTGATCCCAGAGAATGA -CTGGTCTCCGTATCCGAAATATCTACTAGTTGGTTCAATGAACCTGAAACCTGAAAATAC -TAAATATCATGGACCCTTTGCTAAAATGACCAAATATCTTCCATGGAAGGGTTACTATAT -CACGCGAGTAGAAACAATCTAAACCTCAGCCTATGCCCATGACGACTAAACGAGCACGAC -AAACCTGAACCCGCATCCTGTTTCTTAGCAGACCGGATTGGAACAATCACAGGACCAGTC -AGAAACCCCTTCTCCTGGCCTCTACACCTAAATAGTTGGGCTAGTCCTGGGCGAGGACCG -ATTTAGAGCAGTGTGACGATCTGTTACTTTTTTAAATACTACCAAAGACGATTCACATGC -CACATAGGTAGATCGTACTATACAACATATTCAGGTAATCATAAGATGCCTGACCTCGTG -ATGAGACGGGAAATTGAACATTCTACTAGGAGAGATCAGAGATGAATATGAGACACTTTT -GATTTGAAATATTATTAACCATAGTCCTGTGTAAAGTAATAGAGAACAAGCAATAAATCC -CGAAACTGCTGAGATGCCCATCCTTTCCCAACGCCTGGCATGCCACAGGATGCAATGACT -AAACACAACACATAACCATGCCTCGCTAATATAAAAGAAACAGAATGCAGAGCACAAATG -AATGGTATCTTCAAGGAAAATCTACTCCATCAACTCAATCGGTGGACGAGAAGTCTTGGG -CGGCATTTGGTTAACCGACATCTCAATCGGGCTCATTTCACTATCCATATCCACATCCTC -AATACCCAGCGTCATATCCTCAACCATAACAGCTGGAAGATTGGGACTATCGCCATGGTC -CTCAGGATTGTAGATGATCGTGCTGCTCCTGCTACGCTGCTGGGAGCCCGCAAGCGGCCA -GCTGGATGTGCGCTTATCGGAAGAGATGGAGACCGGTGTCGCAGGGTCAACAGCAGACGC -CTCAGGCTGCGTCTGGTCCGTATTCAGCTTGGACAGTGCATGAGCCAAATCTGTGGCGTG -GACTTCCTTGTTGGGCGGGTCCTCGTTGCGCACGACCCATCCGTGTGGCGGGCTGGGCGG -CGGCGAGATAAAGAACATCTTGGAAACTTGCGGGGCTTCGAGGAGTTTTGGTCGGCCGCC -GTCTAGGATTGCAGTCGGCTCGCCGAAGTAGATTCGGGGGTAGACGTCACGGTTTAGCAG -GCGCTTGCCGTCCAGGAGTTTGCGCACGGCGGTGGCGTCGGACTCATTCTGGAAAGAGCA -CACGATGCGGCGCATTGATGGAAGTGGGGAGAAGGAGTTTAAAGGTGCCACGGCGGTGAT -TTGGTTGCGGATTTCTTCCAGTGCTGAGGGTTGGAAGACGAGTAGATCATGGAGATTGGT -GATTAGGAGGGTATTGGAGGGCGGTGTTGGCTGAGAGAGTGGGGGCATGTTGGCGATGTC -GAGGGTTAAGGATGGTCGTGAGGAGGAGGGGGGTGAGCTAGTGAACTCAGGCACTGAATC -CAGCGAGCGTGGTGGTGAAATTGAAGTAGCCATTTTGTGTGTATGTGGTTGTATGTTTGG -AGGAGAGGTTGGATAAGGAGAAGATAAAGGAGGTTGCCACCGGATAAGAACTGTTTTAGA -GAAACGTTCTAGAACTTCTGATGGAGTGTTGGATGGCGAGAATGATGCAGAATACTCTGA -AGAGGATGAAAGGTTCTGGGTTTGTCTCTCTTTAATACAGAGAGCAGAGTACCTGGATCA -AATTAAGGTGAGGCGCAGCTATGTATCGTCATTCCCTGCACTGCCTCTACACGCACGCCT -CCACGTTCCTGTAGATCCACTGCTCCCAGCTCAAGCCTACACTTTTTCAGATCAAAGGAA -TGTGTGATCTACTGTATTTTACCCTGTTTTGTGACATTTAGTGGAATTTAGTGGAATAAA -CTACAACGTTCCCAGCCATAGTGCTGTGCATGTTGCATGTGCCACACACGCCACATCTAA -ACCTATTGCATCTCCCCTTTGTTGTGCACCAGCCATACCCCGTGACCTCAATAGAAACTC -CGCCCCTTTTCTGCCGGGACATCCGAATCAATCATGGAACAGGAGTACAATGAATACAGC -GTTGTACGTAGACCTCTCATACATGGTTATCCCGAGGTCTCTTAACTTTCGCCTTGCCTA -AGCCCCCCGTTGGCTTGTTATCATTCTGCCTACTCCATACATCTTTCAGGGCCTCCACTT -TTCATGCATCGCCACCCGCATTTTCCCCAAGCCTTGGATAACGAACCCGACGCTTTACAA -TGGTAAGATTAAGACGAGACAGGTCTGTTTTAACCGCAAGGCTACTCAATTGATTATTTA -TGGAACATATCTCTTTTCCCCGAGACATGGTGAAAGCAAAGCAGGCTATGGCATTGGCAC -AGCTCTGTGGCACATCCGGCCCACAAGCCTCGGTCAGAAATTCAGTGTCCCATTAGTTGT -ACAGGACGCCAGACCCTAGCCTCCCTTTTGTCTTGGGCCGAATGTCGCGAAATTCACTCG -TATATATGGAGTCCAACAAATGGATGTCTTCAGGGTGTCTTTTCATAGAGAAAGTGCCAA -GAAAAAGTGACTACATAGCATACACACAATCCCTTCATGAACTTAAATGATGTGTTAAAT -AAATTACGAGGTAATCGATGAGTGTATTCTATAACTTGCTTCCTTGGAGCTGGATCTCTC -GGCCTATCTTTCTTTCCGGCTCAGCGAGGCGTGCCAGTCGCTGCGCCCCTTCCAGTTGTT -CAACAACAGCGTTGAACGCAGCTTCAACATCGTGTTCCGATAGCCACACTGTTTCTGGCC -TGCCAGTCATTCGAGTAACAAGTGCGGCATCCTCCTCTTCGCGTGTCGCAAAAACAGTAC -CTTTTTTATCGATCTGGTTGTGCACAATACGCAGCAACCGAGCTGTGTGCGGCTGCCTGG -TTTGACTGTATAGCCCCAATGCCTCGTTGATATTTCTAGCTGAGAACGTGGCATTTCTAG -AATAGAACACTTGTTTGAATGCAAGACCAAGGGCAAGTGAGTCGTCTAGTGCCAGCGAGC -CTCCGGCTGCGAAGGCACCGCCGTGAGCGTGGGCTGCGTCCCCGACCAGAGTCACTCGTG -ACCCAAAAACCCAGGTTGGGAGAGCATCTCCCGCGAAATTGGGATAAAGATTTGTGGATG -GTGTGAGCTCTGTTAGAGCTTTGACGGTGGGGTTCCAGTCCTAAGAGCTGTGAGTCAAGT -GGCTTCGGACAGGTCATCTAAGAACGCACCTTGTATCTCTGTCTTAGAAAGTCCACATTT -CCAAGTTGATCCCATGCAATGCTTTTCTCCACTTCCTCTGCTGAGCGTGGGTCATCGTAT -GCACCAACGGTGGTGTATTGGTTTTTGCCTGGGTCTTGTCAGACCTATCCAGCTATGAAA -TGCTAAACCATTGCCACTTACCAAGCCGAGAGGCAAAGAAATTGTCTTTAGGACCCCACT -GTCGGTATTTTTAGCAATTATTCTGACATTCTCAGCACGTTTATAACATACCCAGTGCAT -CGAATCAGCAGGTAAGTCGGGGATCTTGCCCTCTACTAAGGAGGCATCAAACGTTGCCCT -CATGAATACTTTCCCACTGAACCGTAGCTTATAATCCGGGATGAACGATTGCCTGACACG -CTGAATCTGGGTTAGCAAAGCGCAGTGCTAGTCAAGCTTACATCACTAACCGATCGAATC -CCATCTGCCCCAATGAGGATTTCGCCGTGAGCACTACTCCCGTCTTCGAAATACAACGAA -ACGCCAGTCTGGTCAGCTTCCGCACGCGCAATCTTCTTGCCCAAATGGATTGATTCTCTG -GGAACTTGCTCGAGCAGAGCAGAGTGCACATGTGCCCGGTGGAAGCGGGTAGTATGATGG -CGGGGATCAGTGACGTTGACATGAGTGTCTACAGACACGACTTGGTCCGTTTTCCAGTGG -CTATTTATCTATACTTAGCGCGCTGAGTACAACTTGAGTATAAGATGGTTTCATACCGGA -AGATCTGCGGGGTTCCACTTGGGCCTCTAAACCCGACTTCGTCTGTTAGGGCACTGTGTA -CGCCGAGTTTTTCCAGTGTGCGCATGCCCTAGTTTCAGTTTTCTGATCAGATTGTCTAAA -CCTTCAGAATCATGCGCAAAATACCCACATTTGGACTCAATGCTATGCTAGCTCCTATTT -CCAAGAGCTCCGGGGCCTGTTCATATAGTTGGATGTCCAAATTCGGAAGATTTGGTATGC -GTGTCAAAGATATAGCCGTTGCAAGGCCAGCGATGCCTGCACCGGCAATCAGGACACGAA -CTGGTTTGCTTTTCTCTGTCATGATGTAATGGAATTTCTGTCGACTCAGCACGGGTCAGC -TTGGACGAAAGGTTCTGGCAGAAACGAATTTCTTATAGTTGAGGTCTGTGGCTCTAAGCT -CTTAAGCACGCCAACAATGCAACGCTGACGCAACCCCACAATGGCGACTTTAGGCGTGAT -TTGGGGTCAGACTCGCGTTTATTCTGAAGATCGCGTTAAGATAACCAGGGAGTCTTCTCA -GAGCTTGCCAATGTACTATTATACGACCTTCCAAATTGGGAGACTATTGTAAACCCCCGC -AGTCGTCTTTCTTATCTGCAGAACTTCTTAGTGTGAAGACGACGGAGCTCAAGAGCATTG -CGTAATTTATCTGTAGGCCCCTGGGCATAAGTACCTGTGCAGGACTGCGATTATCCATAG -ACATTCAATTGAGTCGCGTTGTTTACACTCTACATTCTTTATATCTCACACGATGGCAGT -AGAAAACCTCAAGTCCGGCTTCAGGGCGGAAACACAAGCCAATGAAGAGACTGATAATGA -ATTCAATACCACGCCAAAGCTCCCTGGTCAATTCGCCGATGAGAGCTTCAAGCTATTCTC -GAAAATTCAAGTCACAGACCCGACCCCGGAAGAAGCCATACAGATTCGCAACAAATGCTT -ATGGCGGGTGTTGCCTTTTCTCTGCATCGGATATCACTTGATGTATGTTGATAAGCAAAC -GGTATGTCAGTTCAATTCCCCAAATATGAGAGTCTTGTGATAACATGCTAGCAGCTTGGA -AGTTCTGCGATCCTAGGGATCATGCAAGATGCGCATCTGAACTCGAATCAATATAACTGG -CTCTCCTCAATTTTCTATTTCGGGTATCTTTTGGCTGAATGGCCACAGAATTGGGCTTTG -CAGCGGTTTCCTGTTGGGAAGTGGCTGGCTGGTAATCTAGTAGTATGGTCAGTTCTTCAT -TTGTTCTCAGATAATGGTATATCAGTTACTGACTGACTCTAAAGGGGTGGAATTACCCTC -TTGCATATACCTTGCAACAGCTTTGCCTCTCTTTTTGTTGTTCGCTTTTTCCTTGGTGTA -GCAGAAGCATCCATCGTTCCAGCATTTCTGCTTTCCATGTCCATGTTCTTTACTTATGGC -GAGCAGGCCGTCATGATGCCAGTGATGTGGTCAATTGGTAATGCAAGTCCCATCACGTCG -GGTCTGCTGTCTTATGGTGTCCTTTGGATCGAAACAGGCAGCTTTTCTCCGTGGAAATGG -TTCATGGGTATGTTTGTTCCCCAATTATTTATCGCTCGGGTTGCTGACCTCCATTCAAGT -TATCACCGGTGTTCTCACAGTCATCTTTGGCATTTTTGTATATCTCTTCTTTCCTGATAG -CCCCGTTCATGCCAATTTTCTCACACCCGAGGAGCGCGCAAAGGCCATTTTACGGATCAG -AGAAAACCACTCCGGAATCGAGCAAAAGCTTTTCAAAAGATATCAGTCAGACCTCATACC -TATAACGTTTTCAGGAGTCACTAACTGACTAGCGCTTAGGTTTATCGAAGCAATTCAGGA -CCCCAAAACATGGCTTTTCTTCTTACACGCCTGGTCCCAGGAAATGGCAAACGGAATAAC -AAATCAATATTCACTGATCATCAGCTCATTTGGTTTCACCGTTCTCCAAACAACACTATT -AGGAACTGTGACGGGAGTCGTATCCTTCTTTTCTCTTATGGCAGCTGCAGTCACATTGTA -TCACACCAAAGTAAGCCAGGATCGGACTTTTAACGTGAAGTTGGACTAAAATATCCAGAA -CTGCCGAGCATGGGTATCACTCATTGCATACATTCCAGGTGCTCTTTCAAGTATCCTTCT -ACTGGGTTAGCAGTTTTCTCTTTCTTTCAAAACTGAGCTCCACTGCTAATTAACCCGGCA -GCCTTGCCCTGGTCCAATCGATGGGGTCTCATCGCTGGAATTTGGATTCGATCTACAACC -GGTATCCCGTATGCAGTTGTGATGATATGGGCTGCCAATGCGTCAGCGGGTCATACAAAG -AAAACCACCGTCATTGCACTTTATCATATTGGATATGGATTGGGAAACATTATCTCTCCA -CAATTGTTCCGACCGCAATGGAAGGTGAGTTCTCCGTTTGACCAGACTTTTGTTGTGCTA -CTAACAATGTTTCAAGCCTCGGTACAGACCAACTTGGATTATTTTGCTTGTGGTACGTTC -TGTTTTGGTGAAGCTACATGCGGAGTATTTGACTTATAGCTTTTTTTCAGGTCGCTGCAA -TTCTTCCTTCGATCATTATTATCGCCCTTCGGACCTACCTAAGCCGAGAGAACAAACGTC -GTAACAAGCTTGCGGAAACCGACCAAGTCGCTAGCAATGGAATTGTCGAGACCGTTGATT -CGGATGGAGGCAAAGTCGCTCGCATCGTGGATAACAGTCAGTTGGATCTCACTGACAGGG -AGAATTTGACCTTGTAAGCTTTCATCAACTGGCCAGTCCTGGGATTCGATTCTAATTTGA -AAACAGCCGATATGTACTTTAAGGGGTTATTGACTCTCTAAATATATTCGACATGGGGTT -TGGGGAGCCGAGGAATTCTGATAGTAAATAAATGTCTGGTCCTTATACTGGAATGTTATT -TGTTTTGTTTATATCTTTCCTTAGATTTTCCCGTGTGCTCCAGATGTATGACCTCGGGAT -AGCACCAGGTACGGGTCATTATCTTCAATAGAGGATTCGGCAATAGTTCAACATTGATTT -CAAAGCCCAAGGCGCAATAAGCTTTTCTATCTAATGTGAGTTGTACGATCACATTCCTAT -GGCGTCAACCACATAGCTGCAAGGATCCGATACACAACAGTCAGCCTGAATGACGTGCCA -GCCAATCTATGATTTCCAAGCACCCAACACCTTATCTTCTCTATGATCCTCCTTGGATTG -AACTCGTTCAAATGTGAGAACTATTTCCACCGTACAACTGCTATAAATAAATAGTTCGAA -CTATACCCCAGTCATTTACAATAGCTTCACGATGAGCATCCAAGAAAGACAGCCTCTTTT -AAGCTCATCGATTCCCTCGAACCTTTTGCCTCACAGCTCTCAACAAGAACCACTCAGTAT -CTCATCTAATGAGTTTCACGCACCATACCCCGACATCCTACCAGGAACGCCCCACAGGAA -GAGCATCAACTGGTCGAGCGCATACATATTAGTCGTTTCCCGCGTGATAGGGAGCGGTAT -CTTTGCAACGCCAGGATCAATCGTCAAATCCGCCGGAAGCATCGGATTGACCCTGCTAGT -ATGGCTGGTTGGGACGATATTATCAGCTTGCGGCCTAGCCGTCTCGATGGAATTCGGATG -CATGCTACCTCGCTCAGGGGGAGAGAAGGTAAAAGACATTGCTGCAGGATTGCAGAAACA -CGTCAGCTAAAATACTCTCCTTTTCGTGCAGGTGTACCTCGAGTACGCCTATCCCCGACC -GCGATTTCTCGCGTCAAGCCTGATAACAGCCCAAGTAGTGCTCCTGGGTTTCACGGCGAG -CAACTGCATTATTTTTTCTAAATACACATTCTTCGCGCTGAGTGTTGAGCCCACTGAGTT -TCAGCACAAGATGCTCGCTGTTGGCCTCCTGACGGCGATTACTATGATCCATGGCTGTTT -CTTGAAGACTGGCATCTTTGTCCAGAATGTTCTTGGCTGGGTGAAGATCTTCCTCATTGG -CGCGATATCCTTGACAGGAATCTGGGTTGTTGTCCTCCAGTTCCGTGGAGATACTGATCA -TATATCTCCAGTGTCAGCAGTCGGCACTTCATTTTCTTGGGATTCGATATGGGAAGGATC -GAACTGGAGCTGGAGTCCGCTGTCGTCGGCTTTGTTCAAAGTCTATTACTCCTATGCTGG -GTTGAGTAATGTGAACAATGTTCTCAGTGAGGTCCACGATCCTGTGGGGATAGTGAAGAC -CGTTTGTCCCACTGCACTAGTCACAGCCGGTGCTTTATACTTCCTTGCTAATCTTTCATA -CTTCCTGGTCATTCCACTCGAAGAGATCAAAAATAATGGGGAATTAGTGGGTGCACTGCT -ATTCCAGAGGTTATTTGGAGATCATATCGGAAAAATATTCTTTCCACTTGCCATTGCCAT -CTCAGCTGCCGGAAATGTGATGGTCGTGACCTTTGCCTTGGTACGCTTTCAACATATGTG -GAATTGATAGCCAAGGAATACTAACATTGATAGGCACGTGTGAATCAGGAGATAGCTCGA -CAAGGCTTTCTTCCATGGCAGAATATCCTCTCCTCCTCGCGACCATTTGGTACTCCACTG -GGTGGACTTATTGTACACTATATCCCTTCTGTGTTGGTTCTCGCCCTACCGCCACAAGGG -GACGTTTATAACTTTATTTTGGACTTGGAGGGATACCCTGGGCAGATGTTTTCCTTGGCA -ATCACAATCGGTCTGCTGGTAGTACGATACCGGGAGCCTCATCTTCCACGACCATTCAAA -GCATGGTTGCCAGCTGTGTGGTTACGGGTGGTAGTGTGCTTAGTCCTATTAGTAGCACCC -TTTATCCCGCCGCCAAATTGGAAAGGGGATGTTGATTTCTTCTATGCCACCTATGCACTT -GTCGGTATTGGAATGTGAGTGGCCATTATCTCCATAGAGGTATCTTGCTAACAACCACAG -TGTACTCTTTGGGGTTCTCTACTGGTATGTCTGGACAGTATTACTCCCACGATGGGGAGG -CTACCAACTCGAAGAAGAGAAAGAGATTTTGGCCGATGGGACTAGCATCATCAAGCTAGT -TCATTTATATGATACTTGACATAGTCGACGCATATAATGATCACTTTATGAAATTGGTTA -TTGACTTCATGCCCACATCTGGGCAAGCAGTGTTGGTTCATAGAATGCGAAGTCTGATCA -ATAGTGAGAAGTCCCTAGTCCCCAATGACAATATCTAGCGCACAATGTTTGTGGCCGACT -CACCTGTGATGCAATCATGTAAATTGGATGAGGTCTGAGACTACAGTGAGCAATCACGCA -CGACAGACAGGTCCTTTTCTTCCTTCCTCAGTTCCTCTTTTCTATCTTTCTCTCTCTGTC -ATTCTTTTCCTCCCCCATTCATTTTCTGCCCAGAGCGAATACAGCATATGGATTTCCCTC -TCTGCCTGTATTTGCTGAAATCATAACTGGACTTCTCAACACATTTCACTCGGTCCATCT -GGATAAGCCTCAAACAAACCACTACTTCACCATGGCATCAATGCCTCAACAGCCCGGGAA -GCCACCTCCTCCATGTCCCGCCTGGGTTTTTTCCATCAACTCCAACACCCAGTAAGTACT -TCCCCCTTCGCAGGGCTAAATTCATCTACTAAATAAGCCAGTGTTGCCAAAGATCGCTGT -TGGTTCGGAAAAGACTACATCCCCTTCAGATCTCATGTTACGGATATGGGTGGAGGCTCG -GTCGAAGTCATTGGGATGGGAACTGTCAACATAACCACCATGAGCTCACCAACCCAGACC -GATCAGGATTCGCACAGCTCCATCAGCTTGAAAAACGTGTTGCACACACCTGCGATGCTC -TGCAACATCATTGGAGGTCCCATAACGAACAACCACACTATCGTTTGCGGGCCCACCACC -TCAACTAGCTCCGGTCGAATTATGGACGACTCCGATGGTCGTACCGTGGCCTACTTCAAG -TCCATGAGACAAGGCCCCCAGTTGTACCAACTTCAGGTTAGCGCGCCACCATACGGTCAT -CATTTTGGGGCTTCGCCTTTGCTCCCGAACGGGCACTATATGATTCACGCCTTCTGGTCC -CAACGTGAACGGCAGAGATTTGCAGTCTTGAAATCCCGCAATCTGATCCAAGCCTCCGGC -GTAGAGGTGCTAACAGAATCCGAGGGACATTGGTTCGGGAAGAGACATATGTCCGAAGAT -GCGATTCTAGTTGCCTACGGATTAAACAAAGACAGGAAGGAGCATCGAGAGGAAGGCCGA -GCCATCATGCGGATTCTTAAGTCCCATGCTGAAAATGAGCCGATAGTTTGATGCCGGAAG -AGCCTGCAGCATTGTTGAGGAAGCGGTTTCTTTTGGAGATGGATGGTCACTTCCCAGGTG -TATTTGAGCAAGTCGTCAAAGTCGCCAGTCTCCCATAGCTTCATATGGTTTCAAATACCC -CATTTAAGCGACATAGCTACCACAATCGCCATGCTTCCTTCGTAGTCTAAGAAAAAAAAT -GAGAACTAGACTGAATTTCAAATCCGCTCCTAGTCCCCGATATTGACTGTAAATTCCTCG -GGCTCTAGGCATCGTCTATGTAGTTACATATTGCACAGTATTTTTGAAGGGACGAGAACT -TTGGCTAGATACGGAGCACGATCTTCCCAATTTGTTTACCCTGCCAAACATATTCAACAG -CCTCTTCGGCTTGCTCAAATGGAAATATTTTGTCAATGAGGTCATTCAACGGCATTTGAG -CTGCTGATAGGGCAGCACAGAAGTCCTCCATATCATATTTTGACCCTGCATTGATACCTC -TGTATTGTCCCAGTTAGAATCCAACAAAGAATCCAGTTTCGTAGATTGATTCTCACCTCA -AGATAGTCCTTCGATCGATGATGGTCGGGATCAGCTCTCGTAAATCATTAGGGTCCTGCT -GTGACAGATATCCGACCTGGCTGATCACACCCCCACGTCGAGTACATCGTATACTCTTAA -CTAAAGAGCTTGACCCGCCAGTTTCAACAACAATGTCCACGCCAACGCCATTGGTCAACT -TGAGAACCTCCTTGTCCCAGTCGAGGCTTTGGGAATAATTTATCGTCATTATCGGTGGGT -TTGAGTACTTGACCTTCATTTGTTCGAGTTTCTTGTCACTCGACGACGTCAAAATCACCC -GAAGACCCGCAGCTCTTGCAAGCTTGAGTGCAAAGACACTGACCCCTCCAGTGCCTAAAT -CAGTTGTCAATGTTGAAACAAGCCTTGACCCCGGAATCAAATACCTTGAATCAACACAGT -CTGCCCAATCTGCATACCCTTCAACGCTGACCAAGCGGTAACACCAGCACAAGGAATCAT -AGCAGCTTCAACCCAGTCAAGGTATTCCGGGAGCTTACACAGAGCATCCTCGTCGAAAAC -AATATGATCCGCCATCACTCCATCCTCATCTGCTGCCAGCCACGACCGCTTCACTTCACG -TCCTGAGATATTCTCAGTATCTGTGATAGGACCGGCCCTATCCCCGACCGCCAATGTCTT -CACCTTGTCTCCAACTGCGAGAACCTCGCCTGCTGCATCGTTACCCAATATCCCATTTGG -AACCACAGGCCACGGGTTCCCTCCATTGGCGATATTGGCGTCGCGGTAGTTGAGAGCGAC -AGCATGGACTTTCACCAAGACTGCGGTCGGTGCGAGTGGTAGCGAGAGCGCTTCTTGTAC -AAGCTTTAACTTTGGTTTTCCTGGACTATAATCGTCGGTTCGTCTAAAGGCACGACGGTG -GGTGACCTCAGGGCTTGACATTTTCTTTTTTTGGGCTGTTGGTAATGGAAATCTTCAAAA -TTGTCGAAATGACTCTGATTTTATACTGAATATTGAATATCCTTGCATACCGACGTCACT -CCTATATGCAGGAATACAACTACCCCTCCTTGATCACTGGATACATGAAATCTTTGTTCT -CATCGGTATTGGGAGGGCTTCCAATACAAAAAAAGAATCCGTGTGACTTGCATATCATAA -TTGTTCGCCTCTCTTTCTTTGATCTTATCTGATTGGGAAAGATGGTCTGGCCACAACAAC -TGGTTCGATAAGAGATTCAGCAAGGGGTCATTAAATTCTGAAGTCAGATTTTAAGTCTAC -ACCTAGACGACGCGTGGGTCGTCTGTCTAGCGGCCATTAACCCAGTGAGAATAGTGATCT -ATATCTGACTCTTTGAAGTATAGTTTTACAAGACATGGCATGCTAAACCATGAGGCCTGT -AGATAATATTTTCTAAACATGAAAGACAGACAAGTCCAGTAAAGGAGAAATTATTTCTAG -AAGAAACCCTTCTGATGCGTTGGAATTACCTATGCTTGCATGCATCAGTACCCTATCTGT -ACACATGTAACTCAATATTCTGTTGGCAATTGGATTACATTTTCTTAGTTTTACATTCAA -ACAATCAACCCTGCACGCTTAAAAGCTCGACCAGGTCGACGCTATGTGTCTCGAAGATCA -TTGCCTTCTCATGTGTAATTTTCATCATTCTACTGACCTCTTCTCAATCTTCATCAATAC -TTCTGTAACATTTTGATATAATTTCTCACAGGCTCTGGGGGTACCTGTGATGTTAGTGGT -CGAGGTGTTAATTGCATGATAGGTCCCTACAAATCCCCGTACATGGTAGCTGATCTGCAC -CAAAGGACGTCCCGCGGCCCGCCCATATTGAATGGATCAGATGGTTTATGCAGTACATAC -TTCACATTCATGTGTATAGTATCTCATACCCAATGAATAAAACCACTTCATGTTTCCAAC -CCTCTGTGAGGCATGATAGTATTGGCGATTCTCCAATTCTTTGCTTCCAAGATTGCTTCA -GCTGACAACAGGGCATCAACGGTATAAATACCAGTCCTTCTTGTGTTCAGGACAACATCA -TCCACGCTGGCGAAGGTGCAATTAATTAACTCAGCCAAAAGCGTGTACTTATTCCGCAAC -ACGGATATGACAATTTGTAACAGACCAGCAGTTGCCCTTCAATTCGATGATATCATTCCC -TGATATCCGGAGTCAATGACGAAAAATATATCATGAATATTGTACTATGTGACCAGACTG -GAAAAGTCATCGGCTTCACACTTTGGACCTGATGGTGGGATTTCATCATGACTCGCAAAA -ATAGGTGTTCAAGACCGGTGGAGCCACCATCATAGAGAGGTCTTGACGTCCCAAAGTGCG -ATAAAGCCATACATCTAAGACTATAATAAGTCAGAATTGTTTACATCCTAGTTCACATAC -GCATTCAACAGCGGCCTGTCAATATCTTAAGGTCAATTCCACATCTCGAACCGACCACAT -ACTCGCATTGGTCAACATGAAAATCGCCGTCTTTAGTACCCAGCCTTACGATCAAGTCTC -GCTTGATCAATCCAACAAGCACTTCAATCATGAGATTGTTTATCACGACACAGCTCTCAG -TGAGAAAACTGCGTCTCTCGCTTCTGGTATCCCCGTGGTTTGCGTGTTTGTCAACGACCA -AGTGAATGCCAATGTCATCCACACCATTGCGGCCAACGGTACACGGTTGATCGCCCTGCG -ATGCGCTGGATTCAACAATGTGGACCTGAAGGCTGCCGAAGCTGCAGGCATCACCGTATG -CCGGGTCCCTGCTTACTCGCCCAACGCCATATCAGAATACACACTGGGTCTGGTCATTTG -TTTGGAACGCAAAATCCACAAAGCCTGGCTGCGAGTTCGAGAGGACAACTTTGCACTCAA -TGGTCTTGTTGGCAATGATTTGTATGGTCGCACTATTGGTGTTGTTGGCACCGGCAAAAT -CGGTGCTTTAGTGGCTCGAGCGTTTCGAGCAGGCCTTGGGTGTGAAGTCCTCGCACAAGA -TGTTGTGGAGAATCCCGATCTTGTAAGCATTGGCGTGAAGTATGTGAGCAGGGATGAACT -ACTCCGAAGAGCTGATGTCGTCTGTCTACACTGTCCTCTGACGCCACACACCCAGCATCT -CATCAATGCAGACAGCCTGAAGACCACCAGGCCAGGAGTGGTTATTGTAAATAGTGGACG -TGGTTCGTTGGTAGACAGCGCAGCTCTTCTAGACGCATTGGAATCTGAACACGTTGGTGG -AGCGGCCCTGGACGTCTATGAGAAAGAGAAGTCCCTCTTCTTCCGCGACCTATCTGAAAA -GGTTATCTCAGACGACGTCTTCCAGCGCTTGACTACGCTCCCTAACGTTATCTTGACCGG -GCACCAAGCTTGGTTATCTCACGAAGCGCTGGATTCGATCGCGAACATTACGATGACAAA -CGTCGCACAGTTCGAAAAGAATGGTGGCCAGGTGGAGAAAAGCCATATAGTGAGAGCATC -GTGAGCGCAGGAACGGTAATGAATACCTGGGGTCAAATATGAGTTGAATTTTGAGATTCT -CGGCCTAGTGTTAGAACCTTTGTAAGTGAGGCACCGGTCATCAGGCAGCATCCTGCCTAT -AATTTAAAACTTTTCTAAATTAGAATACTGTAAGAGTCAGCGACTTATGGCTCAAGACCA -TATCGTCAATCTTCAAAGCATTCGTGATCAAATGGAAAAAGAAGGCCAATAAGGGGTAGA -GAAAATCAGTTATAGCCACGCAAAGCAACATGCCAAGGGGACATATGACAAAAATAGCTG -ACAGCCCGTGTTGATGTTTTGTGATGCTTGGTCGTTATGCCAAAGCTACCTATACAAAGA -AAGCCTTGAGCAATGGACCTCAGCTTGAACACATATTGGCGTCGGAATTTCATTCTTTGT -AGGGTTTCAAATGGCCCACTGGGCGATGATTGCAAGTCACAACTCAACAAAGTCCTGGTC -CATGTACAACTATACAACATACAGTGATCAATGATCCAAGCCCTGCGGTAGCTAGTGGTA -GCTAGTGGTAATTGCCCATGAGCTATGATGTCAACGCATTGACTACTAGATATCAGGCCA -AAAGGAAGCAACCCTTGATACTGTATGATATTACATGTATCATTTGTATCATTCACACCT -TAACATAGCGTCCCTGATCTAATAGAAGTCAGAAGAACATCAAAGTCCTCTCGGCCCAGC -GTTTTTCCTTACCAAGGCCAAGGCCAAGGCGGTCAGGTGATTCCCGATGAATTTTATCCG -AGGACATATGGTTCCGCCTCAAGTTCACAGAGCTCCATTCTCAATTCCACATCACACTAT -TCATAATGTTCCAACGAACCATCCTCAGACAGGCCCAGGCCGCCCGCTCCATCCTCTCTG -CCTCCTCTCCCTCCACCGCATCGCTGGCGCTGCGACGGACTACACAACAACCACGTATCC -CGGTTATCCGGACATTCGCTCCCCGGTCTACCCGTCTTTACTCGACAGAGACCAAGGAGG -CCACCGAAACAGAGACCGCAGAGAACGCAGAGGCCGAGGACCCCACCAAGAAGGAACTCG -AACAAAAGACCAAGGAAGCCATTGAGTTCAAGGTAAGCTATCTCCCCATACCGACTGCTA -CAGGTAGCTCCACAGCTCCTTCGCAGACCAATATTTACTAAAGCTTCTAATTAGGACAAA -TGGCTCCGCTCCGTCGCCGAGTCTCGCAACATCACTGAGCGCAACAAGCGCGACATGGAT -GCCGCCCGCAAGTTCGCCATCCAGGGTTTCGCCAAGGACCTCCTCGACAGCATCGACAAC -TTTGACCGTGCCTTGCTGGCCGTGCCTGCGGAGAAGCTCACCGTTGCCAAGACAGAAGAG -AACAAGGACCTCCTTGATCTCGTCGCCGGTCTCAAGATGACCCAGGATATTCTTCTGAAC -ACCCTTCAGAAGCATGGTCTCGAGCGCTTCGACCCCGCCGAGAAGGTCGACGGCAAGACC -CAGAAGTTCGACGCCAACATGCACGAGGCTACCTTCATGGCCCCGGCTGCTGGTCTTGAG -GACGGTGATGTTATGCACGTTCAGAGCAAGGGCTTCAAGCTTAATGGCCGCGTTCTGCGG -GTGAGTGCCACCTACTGAGATCCGGGTTTCTAGCGGAACATTGTGACTGACCATTTTTTT -CTCTCTTCTAGGCTGCCAAGGTTGGTGTCGTCAAGAACGCTTAAATCGTCGGTGACCACT -TTGATTCTCAACCCTTCGACGACCTCTAATTTCACTTTTTCACTCTTCTTTACTTCTCTC -TGTACCATATGTCATACATCATGATCACGATAGGGTTTTATGCTCGGACACGGCTGGCTT -TTGGACCCGGGAAAACTACTCTTCCTTGCTATCTTTCTGCTCGTCTCTTGGATTCGGCTG -TATCATACACCCCTTGTTGCATGTGCCTGGAGGCGTACGGATTGTCTCATGTTGGCGGTG -TCTTTTCACTTGTTTCGGGACTTCTGTGCCTCTATGTCTCCTGATAACTCTCTCCTCCTT -GTCTGTATAATACACACTTCTTTGGGGCACGGCTATAGCCTCTGATGACTCCATTCTCCT -TCCACCTTTGGTAACTCCTAAATCACATTCTAATCTAATACAAAAGCTGGCTATGGTCTC -AATCTCTTAGGACTCATTCATATATATCCATATATCCATATTGGTCTAATGAGAGGAATG -CTATATCTTACATATTCTCCATAATAATCACAGTATAGTACAACGTATAATCCAACAATC -TCCGGGCTCACCTCAGCTCAACTCCAAGCTCTCCAACTAGCTTCTCCCGGACCTTCTGAT -GCATCGCATTACTCTCCTCGTTCGTCAAAGTCCGCTCCAAGCTACGGTAGTTGATGCGGT -AGCACATGCTCTTGCGCGAAGTCTTAGGATGCGTAAACTCATCAATGAGAGTAACATCCT -CAACAAGATCCCCACCAATGCCACGAACAATCTCCATAACATCATTCTCGTGGAAAAGCA -CAGCCCCACCAGCAGCACTGCTACCACCCGTCACAGCCGCAGGAGGCAGCCAGAAAGCAA -CGTCCTTATAGCAAGCCGGGTGCTTAGAGAAAGGCTCGAAACGGGCAATATGGCCAGCCC -GGAACTGCGACAAGAACCGCTCATCGCGTGACCAGAAAAGCCGGATATCAGGGATATTGA -AGAGAAGCATAGCAATACGCTCAATGCCCAGTCCAAACGCCCACCCAATCCGGTCCGGCA -CGTCAGAGTTGATCAACAAATCCTGCTTAATAACGCCACACCCGAGAATCTCCAGCCATT -CATCTTGCCAGAATACTTCCAGCTCCCAGGATGGACTGGTGAACGGGAAGTAAGCCTCGA -CCCAGCGCACCTTGAGAGGCTCAGGCTCCGCGCCACCGCTGCTCGCCGCAGCGGCCTTGC -TCGCCTCGGTGAATATCTTAATCACCATTCGCTCGAGGGAACGCTTAAGGTGAGCCCCTA -CAGCCTCGACTTCCTCGGCGCTGTGGTGCTCGTCTTGCAGCGGGTTCCGCTGCGCGTGGA -TAGTCGGGTTCGGGTCCTCGACCACGACGTCGTGACGCGGGATCTTCTCCACGTCTGCCA -TAATCTTCGCTGCTGTTTCGGACGATGCGGAAAGTGGGTTCGTCGCTGGCCGCTTCCACA -GCATCGCACCCTCCATCTGGTGGAAAACTGGGTAGTGGCTACGGTCAATGGCGTCGCGAC -GGTAGACGTCCGCGATCACGGTGTATCCGACCTCCTCGGGACGGGTGTGCTCATTGCGAG -CAATTTGCTGGAAATACGCCTGCTGGTGTGCGCTTGTGTGGGTGCGCAGCACTGTTTTGT -CGTTCACGTAGTAGGTATCTGTCCGGCTGCGGCCCGGGTGGTCTGGAGGGAAGCCGAGGA -CATCGAAGTTTTGTCTTGTTGAAACAACCGGGTTCTTCTCTGAGTAGTTACCATAACTTG -GGGCTGCGAATTGGCTTTCGATGAGTTTGCGTGTAATGGCGAGAGGGTGGTTGTCGTCGA -GGTAAAGTCGTCGTCCGATATGGGATAGGATTGTATCGGGAGTGTTGGTCCACTGATCGG -TTGGGTATGTTTCTCCTTCAAGTTTGAGGTTCGACTTTTGCTGGTTTGTTTTAGCGGTAG -TGCTTGCTTTCGGTTCACTCCGGCGAGCTGTGGTTGAGCTCCATTCGCGGTTGCGGGAGA -GAATAGGAGTCAGTGTGAGGGAGCGCGGAATGGCGGTCTGGGGTCGGATTGAGGCAATTG -ACCAACGGCCACTGCTCGCACGCAGGGCTCGACCGGTAGCGAATAGCCTCATGGTTGTGT -TGGTCATTCAGTGTGAGAATTGTCTATCAACACTATCAACACATGTTTCGGGTCGGCGAA -ATTAGTCCCCGAAGTACATTTATCGAGTCAATTTGCACACCCAACAAGGCCATGGAAAGC -TTAGCAATTACCTCTCTCCTAGAGATAATGATAGAGAGTTTGTTTATGATTTTATAATTT -TCAAGTTTCGCTATAGTTGCTACAGCTGAAAGCCTTGGCCATCTCGGCCTTCAGATATCT -AAATATAAGGTTCTCTGTCTACTAGTTATATAGTTCATCTTTATAAAGTATACGTCTGTT -TTAGTATACATGCTTAGTACCGAACAAAATTAAGCACTCCTGTTGGTACTGAGTTATGTC -TCAACCCTTTCTTTTTGATATTTGATACGTGCGACCACGACTGAATACAAATACCATCGC -ATTTGCGCCGAAGAAACCGGACAAACAACATATTGAACCAAACTAGCCTTAATTCTAGTC -ATAGGATATTTTGTGCCTTTGATACCCCCATTAAATCCCTCACAACCCTAGTACTTCCCC -AGGCAGCCTTAATGTGCATGTATTTCCCCTTAGAGCTGCACCCCTCCTTTCTAGGCCGAG -GCCAACCGACCAATATCCGTAAAAACTCCGAAGATCCCCATGATAGCCGATCTCGTCACA -TATCAGCTGCTTCGTTAAAGCAATGATACTGGCTGTATGCGCAAAGATACCCTCGGCATC -CATTTCAACGACCGAAGGACAATATACAGGATCACTTGTCTGGGGGCCAGGGCTGTATGT -CCAATACTAGAGATGTCCGGCGCAAAGGAGACTGCAAAACCGAGGATATGATTGGTGCAA -GTGACAAAACCCCATCAAGATACTTGAGTTGATTGGCTGCCTCTTTACATTTCAGCGGGT -GGCCGAGTGACTTCGGGCTTCTGATTTGTCTCCGTCGGTATTAGAAGGCCCAAAAATTAC -GTCAAGTCACGTTCGAAAAGCCGAAAAAAAAAGTCCAAAAAAAAAACATATTTGTTGAAC -TCTCTCCCCGCCTTTTTTTTGTACATTGTATAAGGGTCTTGTTTTCCAGGAGCAAGATGG -CAGACGGCAAGATGTTGGAGGTTGCTTCTCAGCCCCGGGTCGCGCCTGTTCAGACGCCCG -CGCAGTCAAAACCGAAGCAGGTGCATTACCCGTTTTGGTTTGGAGGTTCGGCCTCATGCT -TTGCTGCGTCCGTGACGCACCCGTTAGATTTAGGTATGTTCTTCTCCCCTCGAGTCTATA -CTATATATGCTGACTTTGCGCCTATAGTTAAGGTATCTTTCAACTCTCAGTCGTTCGTCT -GATGGCCTATTAATGGCCGCGATTCCGACCTCCGACGCTGATATTCGTTTGTCGCATAAT -AGGTTCGGTTACAGACTCGTGCACCCGATGCGCCGAAGACTATGGTCGGGACGTTTGTCC -ACGTCGTGAAGAATAATGGCTTCACGGGCTTATACGGTGGTGTATGTACAATGCCAAATC -CCAGTCGACAGCGAATACCCCATGGCTATCAGTAATCGAGCACGGTTGGCTAACGAAGGA -CAAATTGGGTATGGGAATATAGTTGTCAGCCGCGATGCTACGTCAAATCACATACTCCAC -AACGCGCTTCGGTATTTACGAAGAATTGAAGTCACGCGTTGTTTCGCCCACCTCAGATTC -CACCGGCACACCAAGCCTCCTCACTCTCATCGGGATCGCCTCTGCCTCCGGGTTCGTCGG -CGGTATTGCAGGCAACCCAGCTGATGTGATGAACGTCCGCATGCAACACGATGCATCCCT -TCCTCCCGCGCAACGACGCAACTACCGACACGCATTCCATGGACTCATCCAGATGACGCG -GACAGAGGGATTCAGTTCTCTTTTCCGTGGTGTGTGGCCTAACTCAACCCGCGCAATCCT -CATGACTGCCTCGCAGCTTGCTTCTTACGATACCTTCAAGCGCATGTGTATTGAGAAAGC -CGGCATGGCTGATAATCTGGGCACACACTTCACGGCCTCGTTCATGGCAGGCTTCGTTGC -TACCACTGTCTGCAGCCCCGTGGACGTGATCAAGACTCGCATTATGACGGCTTCTCATGC -GGAGGGCGCTAGCCAGAGCATCGTTGGTCTTCTGCGTGATATCTGCCGCAAGGAAGGCCT -GGGATGGACTTTCCGTGGCTGGGTGCCTAGCTTCATCCGTCTTGGCCCCCATACCATTGC -CACTTTCCTCTTCCTCGAGGAGCACAAAAAGCTCTACCGCAAGCTGAAGGGCATCTAAAG -ATCCAAAGAGTGATTAATCCTTTTTGTGACACTTTTTCCCCCGCAGATCTCGCCCAATCT -TCCAACAGCATTTACGCATTCTGCCCGATATATTATCGGTATATACGATTTTTCGTCATT -TTCTCTAGCTTTCTATTTGGGTGGTACACTTTGGGACTCACACCCACCATTTTCATTATG -GCTTGTATATAACTTGCATTCTTTTGGATGTTTCTTCCATCGAAGATTCGAATGACGGCC -GGAGTCGGCCTGGGAGCATATCTAGAACATGGCCCTGTGGCTTTGTACAGTGTGCTGGAT -GATGTATTTTTCTTGTCGGAGCTGGGATTACATAGAATTAGATAGATACATACTGAATCG -CATTCAGAGCATTTCCGCGAGGATGTCAGCTTTCTGCTCGAGATTCTAAGTTGATTTTCA -AATGGTTCGTAGACATCGCAAGCGTCTACCTCTGGTGGAAGATGAGGGGGCTGCTCCGGC -TTCAGCCGAGTCAGAACAAGCACCAGCGAACGATACATTTCTTGCTACATTACGGCGAGC -TTATAAACACATCCTTGCTTTACACGGACGGAACGACCTAGGATATAGAGGGGTTGTGTC -CCCGAGTGACCCAGTAGAGTACATTGGAACTTGTCGATACATAAAAATTTGACACGCCTC -ACTTGACCAACCCAAAATTGACCAACCAAACAGCTGTTGGTCATCTATTTGTCAAGCTTG -ACAGGGAAGTTTCCAGCCTCTGTGAACGTAAGTTAGTATGAATAGAAACCTAACCAGATA -AAATTTCTAGATGCGTACCGCGCTGACCATCCCAACGATCGGTCCAGGCCTTGGGGCAAA -GAGAGCGGAAGGAGTGGTAGAACTGCAAGAGAAGCTATTAGGAACTGCTAGGTTTTCAGA -ATAGACTCGGTGTCGAAATTACCTGCTTGCAGGGACGGAAGTCCTCGCCCTTGGCAGTGG -TACACTTGTAGTAGTCGACATAGTTCTGCCAGCAGTGCTTGGTCCTGAGAAGTGAAATAA -TGTTAGAAAACTATGAAAAGAGCAACCAAATAACAGTTTATCACGTACTGGTTCTGCTGA -GGGAAACGGGCATCGTAGCCTGCGATTCGATCAAAAATTGTCAATTACACCATACATCAG -CCGATCGGGGGATATCGGTCCACGCGCATGCGACGAGCGAAAGAAGCGGGGACATATCTA -AGGTTTACGCACCAGCTGCGATTAGATTATTACATTAGCCATTGACTCTTTATAGGGCTC -CTGGCCCCGGTTTTCGCGGGAGGGGGTTAGTGGACTTACCAGTGACGAACTTGAAGGGCT -TTGTCTCGAGGACCTCATCGGGGTCGGCTTCCGGAATAGCACCCATATTTGCGACGTGTA -GCTTCTTGAGAAACAACACAGAGGAAGAATtggagtggagtggaaagagaggaggggagA -AAGTGAGCCGGGAAGAGCGACGTTGGGTGATTGAAGCTGTGAAGTGCCGGGTATGACCGC -GGGCGGTCAAGCGAGAGCCACTGAGCGTCTCGCTTTTACACCCGAGCCAGTCACGTTCTT -GGAAAGCTGGTGTTCCAACCTCGTCCTCTATCCCACTCTTCTTCTGCTACTCCAGTGTTG -AACCACAGATCTCAACATGTATCCCACTCGCATGCTGCGGATGCAGCCCACTCGGGCCTT -TGCTTTCCCTACTCCTGTATGTTCTTTCCCTCGGCAATTCTGTGATCATACGATCGGCTC -GGTTACTAACTCTTCTGTGTAGAAGGAGCAACAAAGTGGTATGCACCTCCAAGATCACCG -GATCCGGTCACCAACGGAGTCATGTAGAATGACTTTACTTTCATGAGGACAGCCATTGAA -CTAACCATGCGCCTGTAGCTCACACTATCTCCCAGCGTCTGCGCCAGCTTAAGCGTGTGC -CCCCCGAGTTGCTTCCCATTGGTATGTGTTATCGTCATGACTTCACGATGGGGAAATTGA -GCTAACTATCTGCTTGATGTCTAGGCTTCGTCCTCGCGTACGTTTCTCCATGTTTGAGAC -TGTTTGCCGCTTGATTCCGGATCACATGTCCGACTCATGCCGGAATTTTATCCCCTATGT -CCCTCACTGGCAATCAAGCTCATGTCTTTCCCTGATGCTAACTTACAATCTCAGTGTTGC -CCTTGGCGCTGCTGTCTACTCTTGCGGCAAGAAGCTGATGACCGACAAGACCCTGCGTCT -CTACCGCAACAGCCCCGAGAGCCGTGAGCACTAAATGTACCACCATGTTTATTTGAGCTC -TCAGAATGCGATCTGCACTTTTTGGATTCTTGTCGCTTGTATTGTTGATTAGGAGAATTT -GAAGCCGATGGGCATGATTTGAGAAAAGCCAGTGGAGAAATCAGGAATGAGGTCGAGATT -GGAGCCTTTGTACAAATTCCAGCTTCCCGATCCTGAATCATGGTTGTGTAATTGGCAAAT -TGTACTGTCGTGTGTGACTTGTGCTTTTTTGTTTGTCATCTTCGTCGTAGCTATCGTAAA -GTGTAACGTGCTTCACTCGGGCTTCGGCCCAGCAACATCTAATTGTTCCAATCCTTGCAT -AGCATCTTGGGGACGACTGCTTGCAGCCTGAGCCTCCTCCGCTGCTGCCCGATCCTCCTC -GCTGACAGCATGCTTGGCCAAATACCCTTCCTATAAGATATACTGGTTAGTATAGATTGT -GAGTAGATATAAAATCGGACCCAGCGCACCTGTTCCCAGATCGCAGTCTTGCTGCCTAGT -CCCTCGTTGGCATCCTTCTGTGGCTTTGCAGCGGCAACTTTGATGGTGCGTCCGTAGATC -TCGCTGCCATCCATATTGTCGATTGCCTCTTTAGCGTCTTGCGGCAGATCAAATTCCACA -TATCCAAATCCACGGTGAGCTTCGGCCGAGTTGGGTTGCTCCGGTTTGGGCAGCGAGATA -TCAACAACTTCGCCAAATGGAACGAAGGCTTCTGCGAGTGTATGTGCGGTCACCCCCTGG -TCAAGACCTCCTACGTAGACAGTGCTCTTGAGCCGCGTTGTTTCGCTCATGATGTGTAGT -TGCGCTTCGGGCCAAATTGGTCAATCTTTGATAGGGTGGCGATGTGGCAATAGCTCACGG -GGTAGAAGAACTGGGGAGGAAATGGCGTTGAAACAACAAAAGGAAGCTTTTTGTTCGAGA -TTCTGAATCTAAGCTGTATCTTGCTATCAGCCCAGAACGGAAGATTGATATCACGTGCAG -CACACCTCGGAGTGTTTTGGCGAGTCTCAAAGCGGTCTAGCGTGAGTTCCCCGGTCCCTC -CAAGTCCAGGAGGCCACTTGGTTACTGCTATACTTGGAACGTCTAGAGCATCACCCCATT -GGCAATTATTCAGAGGGGGTGGTTTTTGATCCCTCTACTTCCCCATTACTTGAATTTTAT -TGTTTCTTGGATCATAGATTTCACACACTCTATTGGTCTTGATCCTCACCAACACATACT -TCCCCTGCCATCATCATGGTACGTCTAAGGGAAATCCCAAGGACGGCCACCTTCGCCTGG -TCACCTGGGGCTGCATCACCGTTAATCGCAACTGGAACTCGCGCTGGCGCTGTCGATGCC -GACTTCTCTAATAAGACATGTCTCGAGCTGTGGGATCTGGGCCTCGACCGCGAAGATGCG -AGCGAGGAGTTGCAGCCGCTAGCTAAGCTCGATACCGACTCTGGGTAAGTGGATCTTTTG -GGAATTTGAACGAATATGGGCACACTGACTGATCAATGTGACTAGCTTCAATGATGTGGC -CTGGACAACGTCTGAAGATAACAAACGGGGTGTGATTGCGGGTGGCCTGGAGAATGGCTC -GCTGGAGCTGTGGGATGCCGATAAGCTACTTGCTGGATCCAGGTTGGTTGAATTCTACAC -ATGGACCACGTCACTAGGATATCCCGCTGATCGATCAATTCTAGTGATGCATTGATCTCA -AGAACCACCAAACACAGCGGCGCCATCAAGACCCTCCAGTTCAACCCGAAACACCCGAAC -CTTCTAGCAACCGGTGGCGCAAAGGGAGAGGTAAGCTGCGTTTCGAGGACAATGAGTAGG -ATAGGTGACTAATTACTGGATAGCTCTTCATCTGGGATCTAAACAACATCGAGAACCCAT -TCCGTCTGGGTAACAACGCGGCCCGTACTGACGATATTGACTGTTTGGATTGGAACAGGA -AGGTTCCTCACATTCTAGTCACTGGTAGCAGCTCCGGATTTGTGACGGTCTGGGATGTCA -AGACCAAAAAGGAGAGTCTGACACTGAACAACAAGGGCCGAAAGGCTGTCAGCGCAGTCG -CATGGGACCCCGAGAGGGTATGTTGTCAGCTTCTACATGCTTATTCCAGGATTGCTGACC -ATTTCCATGTCTAGCCCACAAAGCTTATCACATCTTCGCCCCTCGAGTCGGACCCAGTGC -TCTATGTCTGGGACCTGCGCAACTCCCACGCCCCAGAGCGGGTAGGTTACTCTGCCAGGC -TCTACGTATGAAAAGATACTGATGTTTATTTTGTAGACACTTACCGGCCACGAGTCCGGC -GTGCTCTCACTTTCTTGGTGCGAGCATGATCCAGATCTCCTCCTGTCTTCCGGCAAAGAT -AACCGTAATATTTGTTGGAACCCGCAAACCGGTCAAGCATACGGAGAGTTCCCCGTTGTC -ACAAACTGGACCTTCCAGACTCGCTGGAACCCCCACAACCCGAACTTCTTCGCAACCGCA -TCGTTTGATGGAAAGATCTGTGTCCAAACCCTGCAGAACACGAAGACCGATAACGCCCAG -GCCATCGCCGATCACAACCAAGCCTTGGACGGCGAAGACTTCTTCAACAAGGCGCAAACG -CAGCCCCAGGTCTCCAAGTTCTCTCTCCCGAAGGCCCCTCGCTGGCTTGAGCGGCCTTGC -GGTGCTACTTTTGGTTTCGGAGGTCGGGTTGTTTCCTTCGGTCTGACCGAGAAGGGATCT -CGTACATCCAGTATCAAAATCACATCGTTCCAAGTTGATGAGAGTGTGGGCAAAGCCACT -GAGAGCTTTGAAACCGCATTGAAAGAGGGCGATATTCGTACCATCTGTGAATCGCGAGCT -TCAGATGCTTCCAACGAAGCAGAAAAGGCAGACTGGAAGGTGATGCAAGCTTTGATTTCT -GAAAACCCACGTAAGGGCTTGGTGGAGTATTTAGGGTTCCAAGATCAGGTTGATGATACC -GCCGATAGTCTCGCGCAGCTTGGGTTGGACAAGAAGGAAGGCGAGGACACAAATGGTGCT -GCCGCCAAACCGGCTGGGGTCAAGAAGCACAAGCGTTTGCAATCTATGTTTGATCCCAAC -CCTGAAGGTGACAGCTTCCTTTCTGAACTGGCGGCTTCCAAGGGTGCTCAGACCAACAAC -CCATTCCAAATTTTCAATGGCTCCGAAAGCCAGGCAGAGCAAAAGATTACTCGGGCTTTG -CTCCTTGGAGAGTTCGAGAAGGCTCTGGATGTTGCTTTGCAGGAGGATAAGATGTCCGAT -GCTTTCATGATTGCCATTTGCGGTGGCCCGAAGTGCATCGAAAAGGCGCAGGAGTATTAC -TTCTCCAAGCAGGCCAACGGTCCCAACTACATGCGCTTGCTTGCGTCTATTGTGGGTAAG -AACCTGTGGGATGTGGTGCATAATGCCGACTTGTCCAACTGGAAGGAAGTTATGGCTGCC -TTGTGTACCTTTGCCGATGAGAAGGAGTTCCCTGACCTCTGCGATGCTCTTGGTGACCGC -CTGGAGGAAGAGATCCAGAACAGCGACGATAAGAGTGCCCGAAAGGATGCTTCTTTCTGC -TTCTTGGCTGGCTCGAAGCTTGAGAAGGTTGTTGCCATTTGGGTTGAGGAGCTCCGCGAG -AATGAGCAGAAAGGACTTGAAAGTGACACCGACAACTCTTCGTTCTCTATCCACGTCCGT -GCTCTCCAGGGCCTAATCGAGAAGGTAACTATCTTCCGCCAAGTCACCAAGTTCCAAGAT -ACGGAGCGGAACAAGGATTCGGATTGGCGCCTGAGCGTCTTGTACGACAAATACATCGAA -TATGCCGATGTTGTCGCAACACATGGACGCCTGGAAACTGCCCAGAAGTACCTCGATCTT -GTCCCTGAGAAGCATCCTGAAGCTGAGGTGGCACGCAACCGTATTAAGCTTGCTACAAGA -CAGGCACCCCAGAAGGCCCAGGCCGCTGTCGGGGCCAAGTCTCCCTTCAAGCCTCTCCCG -CAACAGCCTAATATGTACCAGCCGCAGCAAACTTACAGCCCTGCTGCTACTCCAACACCT -GTTGCTCCCCCTAACATTTATGCGCCTCCTGCGCCTGCTGTGCCCCAACAGGCCAACCCA -TATGCCCCCCCCGCGACCTCGCCAGCTCAACCTGCCAATCCCTACGCTGCCATGTCGACC -GGAGGTGCTGGTGCCTACGGACCAGCAGCTGGCTATCAGCCAACTCAGGGCATGAGACCT -ACCGGTTATAATCCACCATCTGCATTCGGTGGACAGCAGCCCACAGGTGCTGGTATCCCT -CCCCCTCCTCGTGCTTCATCTAACCAGTCGCCGGCCAACACAATCACAACATACACCACG -GCCACCGGTCTTCCTGCATGGAATGACCTACCAGAAGGCTTCGCGAAGGCTCCTACGCCC -CGCCGTGGAACTCCTGCCGCAGCCTCTGCACCCATCGCCTCTCCTTTCCCCAACCAGTCA -CCCAATTTGACTCAGGGACCTCCCCCGGTTGGTGCTCCGCGCGCACCATCCGTGCCTCCC -CCACCAAAGATGGGCTCGGTGCCTCCGCCGCGCATGATGTCGCCGCTCTCTGGTGGCCAA -GGTGTATCTAACATCCCTGGACCTTCCCCGCCGCCCCCTGCCAATCCGTACGCTTCCCTC -CCTCAGTCACCACCGCTGAATCAGGGATCAACCATGGCTCCCCCGGCATCGATCCCCCGC -GGAGCGTCGCCATACAACGCGCCTCCCAGCATGCCACCCCCAACCAATCGGTATGCACCA -AACCCGGCTGCCCAGGCTGCAAACCCCCAGATGCAGGCACGCGGCTCGGTGGCCCCTCCT -CCCCAGGCCGTTGCCTCGCCATACGCCCCACAGGCAGTATCACAACCACCTGCAGCAAAC -CCATATGGGCCCACGACGCCTATTGCTACACAGCCTCCGCCCATGGCCCAGGGCATCCCA -CCCCCACCTCAGGGATCTCGACCTCCCACAGCCCAGTCGCAGAGGCCAACTCCTCCGGCT -CCCAAGTATCGTGAGTAATCCTCGTGAACAATATGATTTCAGCCAATTGACTCTCATTAC -AGCCCCCGGTGACCGTTCACACATCCCCGCAAACGCACAGCCTGTCTACGAGATTCTATC -GGCAGACATGCAGCGCGTCAAGTCCCGTGCCCCTGCTGCGTTCAAGGCACAGGTTGAAGA -TGCCGAGCGCCGCTTGAACATCCTCTTTGATCATCTCAACAACGAGGATCTACTCAAGCC -TAACACCATTGAGGACATGGCGAACTTGGCCCGCGCAATTCAGCAACGTGATTTCAATAC -CGCCCTCGCTATTCACGTTGATATCATGACCAACCGGACTGATGAATGTGGCAACTGGAT -GGTGCGTGAATACTGGTCCCATGAATGTTGAACGTTCACTAACAATTCTCACAGGTCGGT -GTGAAGCGACTGATTAGCATGAGCCGCGCTACCCCGTAAATGACACCCTTCTACCTTGTC -AAGTGGAGGGAATGCTAAAGGCTTGGTATAACTCATGTATTCCTGGCCTTGTTTTCTCTA -TACTTCAGCGTTTCTCTAAATTCTCTCATTTTCCTTGTCCATTTTATAATTAATGTCTTG -CGTATCTTCTTCCGTTCCCCTTCTCTCCTGGGGGCATGAGCCCAGTCATGAGCAAAGCGA -AACTTTTCCAACACCTCTTAAGTGTGGTTGGTAGTTTTAGCTCCCTAGTGCGTTTTTATT -TATATTGCTATATAGACATAAATCAAGTGCAGGTAAAGAGAATAGGTATACTAGGATCTG -GGCTATAATTTAGCAGACTCAATCGAACAAATGAAGATATAAACCTGGAATCCGAGAGAA -TCCCCTAGGCATCCCATCATGATTTATTTACATTCATAAAATAACGCCGAAGCCCCATGG -TCCAACCGCATCACCGTCCACCGACAGGAAGACCACCAAGCCGTCCACCTCGACTAGCAG -GGAAGTTACGCGCACTCAACAACGCACCCTTCACACCACGAATATCAGCCCTAAACCCGG -CAATAGCCTCGTCCTCCGCCTTTCGGTGACCGGAGCTATCACTCGAGCCTGAGTTATTGG -AATTCCAAACCCCGTACGACGACCCAGAAGAGTAGGTTGGGCCACTATAGATCAACCCGT -CACAATAATGATGCAGCTCCGTGAGGCGTGTACGCATAGAGTCATCCAGCGTACTCGCTT -CCTTCTCCGAATTGACGATCTCGCGCAACTTTGAAGTAATAGACTCAACGCGGCTAATGT -GGTTGTTCACCGCCGTTGATGGAGAGACGGCCGAGTCTGCGGAGTTGACGACGCCGGCGG -TGCTCGGAGCCGGAGTGGGCTCTGGGCTCGTCTGTGTTGCGACGTCTCGGTGGAACAGCT -CGGTTGGGTCTGAGGTGATCGACGAGGCGTCGTCGTCTTCGTCGCTGTAGGGCTTGACCT -TGCGTGCTGTTAACTCTGCCGGTATCACGGATACGGTCTTCTCCAGCTTCTCGTTCAGCT -TTTGCAGGTTTGCTTCGGCTGTGCTGGCGAGTTCGTGTCGTGCGCTTGTTAAGTTCGCTA -GCATTGGTTTCACTAGGTATTCGCTTGCGCCGTAGAGACTGGCTCCTAGACCTGCGGCGC -CGTATAGTGTGTACAGAAGACTGCGCATTGTCACGAGCGGTGGCGGTTTGGAGGACTCGA -AGAGGAATTCTGGGTATGTGATTATCGGGGGCACGTCGCGGggggcgggagatggcgttg -gcgctggtgctggGTGCGATTGAGGCATCGTGTTGGAGGATGGTGAGGATGGAGTGAGAG -GAGAAGTGGTGGCTTCGGATTTAGTAGGTGGGTTAGATGGCGTCTCTTCCTGTGGTTTAT -CCTCGACTGGGGTTGAGCTGCTGCTCGCCTCTGCGTTGCGGGAGACGCCGAGGACCTGCT -CGATATCGTCTGAGCTCAGGCCTTTTGACTCGAGGAATGAGACTTTGCGATCTGTTGGTG -CATCACGGATTGACACGTCTTCTAGAAATTTGAATGCTTGGTCTAAGAGAGCTTGTCGGG -ATGTAGAATCTGTAGTTGCGGGCGCATCGTCGGAGGTCGGCGATGGCGACTCCTCGGCCG -TCTTATTGGGCCTCTGCCATGAGGGAATGGATGTTGGTTTGGATGTGTCGTCGGGCATGA -TGAGGTATCGGAAACATTCGCCAGAGATGTGAGAAAGTGAAGACGTTGGAAGGGAGTAAA -GAGGAGGGCGAACCTTCCCCGGACTTGGGGCCATGTGAGCCCGACCTCGGCTGTTACGCC -GGTTACACCTGAGGCATCAATATTGCCGTACATCTTGAGAGAAAATAGATCACACTGATA -TGTGTTCACATTGTACGAGCTACTAGGATACGATATTATTTAGAACAAGTCCAGATATGT -ATTTCATCTTGAATCGTCTTTGGGCCGCCATATCTATTTATTCGAGATAGAAAACGAAAG -ATACACATGCACTGTTCCAACTATACCGGAAATAAGCCTGTGAGACCAAGCAATAACCCC -TGACTCCAGCTCCTATTGAGAGCTATAGAATCCTATACACTAACTACTTCGATACATATT -ATCTGGAGATATCAATTGAATATTGAAAGATCATATAGTTCCAATGGCGTACTCGAGACC -GCCCGGCAGAATGGACCGCGCAGTACTTCATTCGTTCATCCAGCGTCAGCGACTGGACAC -TTGAAGAATCCGAAAACTTATAGAAAAGTACCTGTGAGTATGGCGTTTTCCCCGTTAATC -TTGTTGCGTTCCCCTATCTGCCCAAGGCTCCCCACGAGCGTTCGTTGGACTCTGAACCCA -GGGAAGAAACCTCCCAAAAGCCTGATCCCCTGTAACCAGTGCGCTTTGTTCGCACTGGAA -ACTCCCCTATAGAAAGCAAATCTGTATCTTCCCGGTTTGCAACTCCGCCTCGGAGTGAAT -GAAGTGATTGGATACTTTTCCTTTCTCTCTCTATCCAGAGTTCCCCTCCCCCAAAGGGTT -AATGGCTTAGTTCATACTGCCACACACTTGCCTATCGACCTTCCCTGTCCCGCTTCTTTT -TGCTGTTCTGGCTTTTGCATTTTACCGTTCCCAAGACTTCTGTCACACTTGATAAAGCCG -CCACCATGTCGCTCCGTCGGTCTCTTCGATCGCGAAAGACACGAGATGACACTCCGGAAA -CAGATGCTAACATGACCACCAGGGCTACGAGACAAACTCGTGCCTCTACTGCTGCCTCTA -CCCTGGTCTCAGTCTCTCGATCTGAATTGAATTCAGGAGATCGGAAAAGATCTCTCCGTC -TTACTGTCAAAATGCCCTCCAGCAAATTGCGGGAAGCCACGGGTGCTGGTCGCGCTGCTG -GACGGCGCTCAGTCAATGTGTTCCGTGAAGATCCCATTGTTTCGGGACCCCGCTCCAGTC -GGAACCGAAAGAAGATTGTAGAAGTTGCTACTAGCgacgaagaggaaattgacgatcaag -aagaggatgaagtggacgatgaagacgcccctggggatgacgatgatgatgaagatgccg -atgccgacgctgacgctgatgctgatgctgacggtgacatcgatatggatgaCATGCCGC -ATCAGCCGCCTGCCAGGCGCGGTAAAATCAGTCCTCCATCTCGCGCCAAGCCTTCCAAGG -GTGTGGAGGCCAAggagatggaaatggatgaagatgaagaggatgatgaggatgacgagg -aagagcctatttccgatacggatgaagatgccgaagcggagggtgaagatcaggatgaAA -TTGCAGTCCCAGATCTCAACGTTGATGATCTAGATGAGCTTGACGAAGAGGATGAAATAG -ACGATATGGATAGCGATGCCCTCGCTATGCAAAGTGGAAAGACCACCAAGCGCCAGCGAG -GAAACCTTGGAAATGATTTCTTGCAACTTCCCATGGGTAAGAAATCATAGACGATTAACG -CTTAACCGAAACCAAGGCTAACTGAAAACAGAGCCCCAGGTCAAGAAGCATTTGACCGCC -GAGGAGCGCCAGATGCGTCGCGCAGAGATGGCACGGCGAAGAAAGAACCTAAGTGAGAAG -CGGAATGAAGAGGAAAAGGTATGCGCAAATTCACTTCATACTTGCATTGCAGCACGGCCG -CTAACTCTGAACGATAGATGGACACAATTAATCGGCTGCTAAAGAAGCAGGCTCCCAAGC -GGCGTGGTCGACAGGCGGCCGAAGCCGCCGATGGAACCCCCGGCCAAGAAGGTGTGGAGG -TCGAGAGAGCTGACCCTACCCTAGTGCGGTGGATCAGTGGTCCAAATGGCTGTAAAGTCA -GCGTGCCAGAAGAATGGCTGGGTACCCCTGCCGGGCGTGTCTTCGGCGCACCTGCAGTTC -CCGCGGGCAAAATGGTTGAAGAAGTTTGATGTCACATGCTGAAACTACAACGGATTGGCG -CTCATGGAAGGATTTTCTATCAAGGTATTCGGGAATGGACTACATCATTGCAAGGGACTA -GGGTTGATTTTTTGCGTGTGCTGCGAACGGCGTTCAGAACGCGCCTCTTCCAAATGTCAA -GGTTTAAAGATCATGCAATAATATATCAACTGTTTCAAGTGTTTCCACATACACGCCGTG -AAATCCGGCTTTTGTATCCAACTACTCGGGAGCCTTCACTAGTACGCGGGCTAGTCGATT -CTAGGTCATCATTCAGTGCTTCACCCACAACTACACTCCATATATATAATGGCCTCCAGG -GCCACTCATAAGACCTAACAAGTGTGACATAGCGAGACCTATGGTGCAGTGTGCGCCGGG -ACCTCGGAGCGCGGGGGAAAGCACAATTTCCAATACAGTGGAGTGCACAGAGAACAAGCT -ACTGGCACGATCATGATTTTTTGTGCGCAGGCCAATTTGACGTTCAGATAGCAGGTCAAG -GGGAAGGCCGGTCATCGTCATTGGGTTGTCGACTTTGGATCTTGCTGAAAGTCGCCTTTG -ACAGGAGTCTTGGTGTCCTGCTTCAGGGCAGCAGGATCGGACGGAGGTGAGGTCTTTTGG -GCTTGCTCTTTCTGATATTGAAGTTGTTGGAATGTCGGTTGGAAAGTATAGTAACCTGGA -GCAGGTAGAAAAATTCGAGATTAGGGACTGATACCAGACAAGAGCTATCTATGCCGTAGA -GTTGATTTACCTGTGAAGACACCCACTCCAATAGCGCAAAGGGCAGGGAAGAAATTGCGA -CTCATTTCGAAGGTATTGAGGTAGACAGCGAGGTCGAATTGCAGTTGGTGGTTGGAGGTT -GCGGAGGTGATGCGGCCCCGGGGGGGAATCAGTTATCAGTTGGCTGATTAGCAGCCGGCG -CGTCTGCTAAATTGCTCCGTTGGCACGCGTCAGAATCAAATCCCCGGCACTCAGACAGTT -TGAATAGTTGTTCGACGTTTGATTCTCCCATATTTCGCGAATTTTGCTTTGAAACCTATA -TTCCTCACTTTCTTATCGCCCCAACATCACCTAATGACTGTCTAGAGACATGGGGGATCC -CCGCCAAGGATGCAAATGGAATCATGGCGACAACGAGGATATGTCCCCGACTCGGACGAG -GAAGATGGCTTCGATTCTCTAGATACGAAGAAGGGTGATATCGAAAATAGCTCTGCTCTT -GAAAATCTCGAATACATCGACAACTCTTCGTCATCGCCGAAGGAAGGGACGACGAGTATA -ACGAAGGAACAACGCGCTAAGAATGGCTCCGTCACACTTTCAGATACGGAGGCGGCTCGT -TGGGACACGAATAGAATAAGTCCTCATCTGATGAATGCTGCGCATGACGCAGCGACACCG -AAACAAAGAAGAGGACGCAAGACTTATGGACTTCGATCATCGGCTACAAAATCAACGAAT -CACCACAGCAGCGAGTTGCGTGGTATGCATGTCGCTGCAAAGGATACAGACAGCATATAT -GACTTTCCATCGTCATCGCAGGAACATGACAGGCCTCAATCAAAGCCCTCGAGCCCCGAG -TCTACACCCAAGCCCTTGAAGGCCACACGGCCCTCGCAATCACAAGTTATAGTGGAGCCA -CACATCTCAAAGGCCAATACACAGGATGAAACATCAAGCACTAGGAGCTCCTCGCCTGAT -GAGTTAATGTTGATCCCTCAGCCAACTCGAAAGAAAAAACCTATGGCCAATCATGCCCAG -TCAACAGAGCCCCCACCATTACCACCACCACCCCAGGAGGGCTCGGAAGATGACTCTCCA -TTATCTTCTTTGCCGTCATCATTTGGCTCCCCGTCGGCCAAAAATTCTACAGAAATACAC -ACCGATGTTGTGCTGGAGGCAGAACCAGAAAGACCGGTGGACAGGCAGACAGATATACGG -AATGCAGGGCTCGCTAAACTAGACGCCAACCCAGAAGATTCATTACCTCGCCTTGGTATT -TCCGAAGAGGTGTTGCGAGAATTACCGCATGCTGCGCAAAGAACATTTCGAAAGCGCAAT -GCCATCCAAATGCACCCGTACCATCTAGAACAGCTTAAGTTTGCCCAGCAACTGCAAGCG -CGAGGTGTCAAGCCTTTTGGTCGACCTTCACAGCAAAAGCAACATGTGACCGATGAAAGT -CAAGGGCAAGATTCATATGATCCTGATGCACCTCCGTCTAGTCCTCCCCTGGAAGAGTAT -CTTCCGCCAGTAAGACATGAACGGCATCGTGGCCCTCACTCTGCTGCACAGGGACGGGGG -CACAACGACACCGAGCATCCTCGGCCATCGCAAGCTCATCTCACAAAGCGACAGAAAATC -TCTCATTCTGGAACTCCTAAAGAACGTCGAGATTTACACAAACCGTCGAAACCTCGTGGC -ATTAGAGACAACAAAACACCTACTGGGAATAGAAATGGTATCTCTGTATATGAGCTATCC -TCTAGTCCCCCCCACCCAGGCCGTTTATCTTCAAGCTCAAGAACACCTCGTGCCTCAGAA -GAAGGGTTCCGGTTTCCTCGTGGATGGTCGCCACCGAGAGAAGTTGCGAAATCTAGGGCA -CAGGAAACAGAAGAAGCCAGCGAGGCTCAATTAACTGGTGCAGGTAATAGTGATGATGGT -GGGGACATACAGACCATCTCGTCCAACAGTCAGAGTGATCAAGAAGAAACCAAATCAGAT -GCCGAAGAACGTGAAATTCGACGATTCCAACGAATGATTCGAGGGGCGCTCCCCGCGTCC -CATGCTCGACTAGATCAGAAGAAAATGGCAGATAAAGAAAGAGCTGCGCAATTAGATCGC -CATGCTTCTTCGCAACGCCTGGATGGAAAGGGTGTTGCGCGGAAGCTGATTCGAAAGGGT -GATCGCTCAAGACAACCAGCAACACAACAATCAAGAGGCTTGTTTGACCTTGGAGATTCG -GAGTCGGACGATGAGGATAGCATGAATGATGTTACAGGAATTGCATCTACAGCCAATGAC -TCCAGCCAGAGGCTAAGTGGCACTCTTGGTCTCGAAGACCCCTTCGCGCTGGAAGGGGGT -GATATCTCTGAGGACAATCGAATCGATTACATGTTTGCGCCTGTCTCGCGCAACTCAACA -GGCCCTCGGCATAAAGCAAACAGTTTAAAAAGGCCAAAATCGAAACAAAGTCTGCTCAAT -GGGGAAAGACAGCCGAAGAGACCTCGACAAACCCGAATGACTGATGCTTCCTACGGAGCA -CGAAGGAGCAAGAAACCATCAAGTGTTCAGCGACCAAGAATAGGGATATTGGATGCACCA -GATGTTGCCACGAAGTCTCGCCAAGAGCAATCGCGACTTCTCCGGGTAGCGGCAAGACGA -CCACGCTCACGCAAAGATGGAGGCAGGCAGAGCCCCACTCAGAAGTTCGTGAAACTGGCC -TCAAGAGAGGACACAGTGGATGCAAATGAGTCTTTACTAGATTGGAAAAGGGGAGTTATT -CGGCAGGCCAAGATTAGCCCACCTCATCCTAGAGCTCGTCAACACCGACATTGGACAACA -AATCTTTCAATTCTCAGTGCTCGAGCAAAATCGAACGCTAAAAATGGCCGAATACCGAAC -CAATTCCCTGATACGGAGGCAGACACTGATGTTTCGAACACCGACACAGTGGAACTGGAA -CAGGAACAGGCACAGGCGCCACCAAAATCTAGCGCAGTCCCGCCAAACCAAGCCCCAGCG -GCCGAGTCTTCTTCCCGTCGAAAACCTGAGCAGCACGGTCACACATGGGTTATTCCTAGA -AATGTTGTCATAACATCGTTGAAGAGAAACACCATTAGACCTGCGGCAACTAGTTCGGCA -GGCCCTGGTAAGAACCAAAAAGTGGTTCCGGCCATGTTCAATCAGTCATTGTCTATCCTT -AACCGACATTATAAAAACCAGCGCACATCTAAAACTTACAAGCCCAGTTTAACCTTGGAT -CGATATGTCTCCGATAATGGATCATTGAGTACCGGAGGAAATGCCTCACCCATGAACGCT -TCTGCTGCTGCTATTGCCGTCTTGGCCGAGAAACGAAGTCAAAGACCGACCCGACAAGCA -CCTGTGTCTCGACGGCGGTTGAAGAAAAATCCACCCAACCGAATTAATCTCGATGCCGAC -AAATTCCGTCATAATCCTGATCCTGCTACCATAATCTCCGATGAATCGGAACCTCCTACC -ATAACACACGTGGGACGCACATGTTCTTCGTCTTTCGGCGTCGGTGGGCTTTTCAACTGG -CAGCGCTTCTATCCAGTCGACTTCGGGGTATCATCCCTACGAGACAATACATTCTTCCAT -GAGACTACATTCATAGGAAGCGGGGAATTCTCTCGCTCGCTGCGAATCGTAGAGCGGGAC -TTTGACCGCAATGCCACTTCATTCTCCATCCAGCTCAAAGACGAGACATTCCAATGGAGT -CGCTGGAACGACAAAGTATCATCGGAGATGGGGCAAGTCTTTGATGTGATAATCAACAAT -GTTGAGCGGAGTGCTGTTACCTCCCCGGAGACGGGCATTACGGCTGCTCTGGGTGTGGCG -TCGAGGTTATACAGATTGTTGGTCAGATACATCACGGAAAATCTGGTTTTCATCGACCCG -GTTGATCGTACGGGGTTTGTCACTAGAGCAATGGGGCTGGTCTCCCAGGTGAGAGACCCG -TTAGCTGCGTTTCTGACGAGTGATGAATACAACAAGAACGGGCTTGTCAAGGTTGCTTGC -TTCAACATGATTTTCTCAAATCAGATCTATCAGATTGCTTCTCATCGGCTCGTCCGTCCT -GCCCTTGCGACTGAAGCTTTGGATCTCGTCCAAATATCCGCTAAAGATGTTGCAGGCTTG -ATCACGTCCAAAGTCGGCTCCTCTGAGTTTATAACATTGTTCAAGGAAAACAGCGAGCCC -GAACGCCGCGAGTCGGGGATTCGTGATGAGTATCCATCGGTGGAGGCTTACGTTGTCATT -AAACATATTCTGCGCAGCTCAGACAACTATAAGAACTGCTTTGAAGATATTCAGCTTGAA -ACTTTCAACAATGAACTTATTCGCAATGAGAGGGATGTGACCAGCCTGGAGGCTGGATGG -CGTGGTATCTTCACTGTCCTACCCTTTAACGAAATGAATGCGCTCGGGGTTGCGCACCCT -GGTTATCAGTTGACAGCTGCGAATGACAATTGGAAATTAGCCAAACGACTTCTCGCCCCA -GCCCTGGACGGTTTCGAGTCCAACTCAAAGGCACATCCCATCTCATACAATAGCTACTGT -CGAACTCTGTTTCTCAGGTGTCATCGGCTTATCAATATCTGGGGGTGGAGAGAATGCAAA -CCCATCCTGGACACGCTATTCGACTTCTTCGCCAAGAACACCCTTCACAATTTGAAACTC -GAGGAAGCTCGTGGCTCACCTTCATTTCTTGACGAACTTGATAACAATCCCTCTCTGGAT -GCGCGAGTGGGTGAACCATGTTTCCACACATTTCTCAAAATTGTAGCAAGTGGTCTCCGC -TTCTTGGCGAAGAGATATGACAAAAAAAAGATTCGCAACTTCGCCTGGCGTCTGCTACCA -AACCACGGCCGTGTATACCCGAAAGAGCAGCCGTTGCAGCATGAGGATCTTGATGCACTA -AGAAATCACCACGATCTTCTAAGCACTCTGTACTGGGCTGTTCCCGATGGGTGTCGCCCT -CGACTGGAGACGATACGCAATCTGGTCCATCCTGCCACTTCACATCGTGAAGCGTGCAGC -ATCAATTTGCGCTCATGGTCAAGGCTTGTTCGATTCAAGCTTTCTACCGATGAAGATGTC -GTGGGGCTAGATCCATTTGGAGACTGGTACGGCTATTTTGTTACGGAGCTGCAGCAACAG -CATTCATATGCTCGCAAAGAGATTGAAGCGCAGAACACAGGTGACAATCGTGTATCACAA -AAGCTCGTTGAACGCACAATCTCCCAGAACCAGCGGCAGATAGAGACTTTGTTGAGTAAT -GCGCTTAGCGAGCTGCGAATCGCTGTTCAATTAGCTCCGAAACTGGAGCATGCACACAGG -CTTGTTCTGAAAACTCCCTTCGAGTCTATCACTACACTTTTCAACCCCAAGCTCCCGCGC -GTGAATGTCGTTGTCTCGGAAGCACTGCGGGTCCTGGTCGCGTACACCCAGAAAGACGTT -CCAGCTATATCCACATCAAATGCTCCTGCGGCAGTGAACACGGATGAGGACAGCCAAGAG -TTCGGTGATTGGGATGCCATTCAGGCTGTTTGGGACCAGCAAAGTTCTCCCAGCGAAGTC -ATTGAACATGTAGAGAAGGCTTTCCATCCGATTGTCTCTCGTCTCGTTTCGAACTGCTTT -GGTGAGGACCACTGTCCAGAGGATGCGATCCTTCTCAACGTGATAGAATGCTGGACGTCA -ATCGCCCAGGTCCTCATTCGCCATGGTCTCCGAAATTGGGACAATTATCTGAGCGAGTTC -GGCGATGACTCCTGGACACGGCTTCGACAGACAGTTCAGACAAGGAAGTTTGCGCCGCTG -TTCCTTGCGGCATGCATTGAAAAGGACGCTCAGATTGTTTTTGATTGCCGAATCCAAGTT -ATGAGTATGTGGATGTCATCGCTTGTCGAACGTTCCTCGATAATGAAGTTTCAACATCGT -CTCACGGAGGCGCTTTTGAACGGGAGTCCCACAGATCCTTTGCTTGCCAATCTGCCGTTC -TCCAAAGATACGAAGCTTGGCCGATATGCAATCACATTGGAGGAAATAGGTTCTCGGCGA -CTATCTCTCCTCTCGAGCATATCGTCCAATATGCGTGAGCAGCTCCAGGGCATGGAACTT -TCTGGCAATCGGAAGTTCTCTGTGACGAAACAAGAGTACTCCGAGATGCTTCAGCGGGTG -ATGACTTCCATGAAAGACAACTACCAAGAGCTAGGTAATGGAACAATCCAGGCTGCTCAA -GGAGAATACGTCGAGTTCGTGCAATGCGTCATCAGTTTCCTTCAGCAACATACGAGCGAC -ATCAAACCCATCGACCCCTTTTTCACGGACCCTGCTTCGTTTCCTTTACCGTCTACCGAT -CCACGCTACATTGTTGCGAAGCTCAAACGCTACGAGTCGAAGTTATCTTCCAGCAAAGAG -ATCCAGACATTAACCGGGTTCGTCCAGAGCATTTCGGAGCGCGCCGCCATTGACGGTCAA -CAGAGCTACCTGGTTGATCAGTTGCACACATCAATGCAGAATACCTACGAAGCTGGTGAC -CACGACAAACCCACTCTACGCGCTGTCCTCTTGCAATGCGTGTTTCCAGCGTACCTGGAA -GCGACATTTCCCAACCGTGCCGCCTGGGTCCTCAGCCGTCCTATCATCCAAACCATCACA -CTGCAATTCAAGAACATGTTGTGTAATATAGACACACTGGACGAAGCCTGCGTATCTTCT -GTCATCGATATGATTGATGTTGTGTGCCGAGCCTCCTGTCAAGCATTGGGTCTGGTCGCC -TTCCCTCCCCATCGTCTGCAAAGTCCTACAACTTTACTTATGATTGCCGCCTTTCTTGAT -ATGGTCTCCTCGGCCATGCCAGTGGTAGATTACATCGAACGAGCTGCGGGCACAGGGGAG -TGTGTTTTTGCACATATCAAATGGATTCGCGATTTCGCGAAAGCCGTGAGCAAACGTCTA -CAAAGCACAGATCTAGACATAGAACTGCCCTCGAGCAATGCTCACATCACATTCCCTGTG -GTGCCGCCCTCTGATAGCAGGAAGACTGAACTATTCGAAACGGCTCACTGTCTCGCCTCG -AATGACCTCCAGCTCTATCTCAGAAAATGGTCGTATCATCAAGATAAGTACTACTTTACT -CGTCCGGGTCATGAGGCGCAGGAGGTCGCAATTGAGTCTGAGATTGCTGTTCTGGTGTCT -AGTCAGAGTGAAGCGCAGAAGGTCTTTGAGGCTGCGGCAACGGGATTCTTTGATCATGCG -AAGGCTCTGGAGTTGGTGCGTTGATCGCGAGGTCATTCTTTTCGGTCTTTTATATACAAG -GCGTTCTTGGAGGAAGTTTTAACGTGTCACGAATATTAGAGATCGAGAAATGTTATCTTG -TGCCATATTCTATTGTATGGAATTTATCATATAATAAGGACTAGTTACTCTGAGTGTCGT -TTCAGTATGCCCTCTCCCCAAATTGAAAAACCTGCCGTGAAGCAGAGAACCGAGTCCTGC -ATCATGCAATGCAATATGGCATCCAGCCCAGAAACAATGAGAGCTGCATGTCTGATAGGG -GCCTGGATATGCAGGCCACCGTAGAATACAATCTCCGTTGGCCGTCATCCACAACACCCT -TTGATTGACCTGTGTTTCATATTCCATCTGTCATATATGATTCCCCCAAGGCGCGTTTGA -CATGTTTGGATAGTCAAGCTCACTAGCACTTGCGGAAATCGATTTTTCCCGTGATGAATT -ATTTTATTCCGAAATTCAGTCATGCGTATAGGGCACGAGTGAAAGCCCCCGGTAATGATC -AATCGGCCAAAGTATATTGCACTGCGATATCGGTGTTGATCCACGAGTTGAGTTGTACCA -ATCATTTTCATATGCCTCACACATTCGTTTGCCTCGATCAGCTGGCATATAAGAGCCTGA -TTTGAGAATAACGTCTGTGATACTGCGCACTACTGTAGCATTTCAACGATTTGAGGCCGT -TTCCGGGGTGTGGTATACCAGTGACGGGCGATGCCATATGCTGCATATGCAAAATAAAAG -GAAACCAAAAGTTCGCATCACAGGGGAAATCTGCGAATACTACACAATCTCACGTGGATG -TGATGGAATGTCGGGTGTTACAATGGTCTTGTCCGGGAAAAAAGAAACACGTTTTATACT -CGCCGTTGAACTTATTGTTCACGGCTGATATCAGTATTTCAAATGTATCGCTTTCGTTTC -AAGTCTAGTGTATTTTGGAAGTATAATATCAGTCAAGGGTGGATTGGCTATATAATCTGT -TGATAGACCTCGGAGATGTATCTAGTGACGCAACTTATAAATCTAGCAATTATGATTTGG -AAGTCTCTCTTCAGTGCTGTGTCTATATTGGCACATATGCTTTCAGCTTTCGCTGCCCCG -ACTCTGGATTCGCAGTTGACTAGTCAATCGCCAGGCAAATATGTCTTTGCTCACTTTATG -GTATGCCAATAATGCACTTACCATCTTCCATCACTGACATGAGAATCCCATCTAGGTCGG -TATAGTAAAAGACTACCAGTTATCAGACTGGAAAGAAGATATGATCGCAGCCCAGTCTAT -CGGCATCGACGCATTCGCACTGAATTGCGCAAGTATAGATAGCTACACACCAACCCAGCT -TGCCCTGGCCTACGAAGCAGCCGAACAAGTCAACTTCAAAGTATTTATCTCCTTCGATTT -CGCTTACTGGACCAACGGGGACACAGCAAAGATCACAGAGTACATGAAACAATATGCAGG -GCACCCAGCACAGATGCAATACAAAGGAGCTGCCGTAGTAAGTACATTTGTCGGAGACAG -TTTCAATTGGGATGCAGTACGACAAGGCACACCACATCCAATTTACGCCTTGCCGAATCT -TCAAGATCCCGCTGAGGCTACAACTGGCCCTGCCAAAAGTGCGGATGGTGCGTTTTCGTG -GTTGGCTTGGCCTACCGACGGGGGAAATAGTATCATCCCAGGACCGATGACTACTGTCTG -GGATGATAAGTTTGTTCATTTCCTTGCGGGAAAAACGTACATGGCCCGTAAGAAACCCTT -GAATCTGTGCAAATATCTGGGTGGATAGTAAACTAACTCAACGATAGCCGTCTCGCCCTG -GTTCTCAACCCACTTCAACGTCAAAAACTGGGTGTTTGTTTGTGAGAACCTGCCAACATT -ACGCTGGGAACAAATGCTTTCTCTGCAGCCAGATCTTATTGAGATAATCTCATGGAATGG -TACATTAGCCTTATTTTTCGGGGAGAGTCTGAGAAGGAGTCTTTGCTAACGGTGTATCTA -TAGACTACGGTGAATCCCACTACATTGGCCCGTACTCAGCACACCACAGCGACGACGGCT -CTTCACAGTGGGCCGCTGACATGCCCCATGACGGCTGGCGGAATCTATTCAAGCCCTACA -TATCAGCGTACAAATCAGGCGCAAAGACCCCCACCGTGGAGGCGGATGAAGTCGTCTACT -GGTACCGACCTACGCCCAAGGGCGTTGTGTGCACGGGTGATACCCTTTCAGCACCAATGG -GTGCTCACATGCTCAGTGATAGTATTTTTGTCGCGACTATGTTAACGAGCCCGGCTACCT -TAACTGTCCAGAGTGGAAATAACGCTCCTGTTAGCATTGATGTTCCGGCAGGAATTGTTA -CCTCAAGTGTGGCTATGGGGATTGGTGGTCAGTCTTTCAAGGTGATTCGGAATGGGCAGA -CAATTCTGAGTGATCAGGGTGGCCTTGCGGTTAAGAGCAGCTGTGTTTACTATAACTTCA -ATGTCTATGTTGGGTCAATTGGGGGTGTCAATGGTGTTGGTAACTCGATTGGGTAGGAAA -TTGATATCTCCCGATATTAAACGTATGGGCACACTCTCAAGGCTAAACTACTTTCGAAAA -GATCGGACTTATCTCCGCAGATATGGCATACCCGTGTTATTCCTTTTCTTCTGCTATATA -CCATGTAGATTCATTGCTGAAATTTCTACAAGTTCTTGACCCCTCGTCTAACCAGGTCCA -CTTTTCCCACATTTCGAAACAAAGAGATGGCTCCTTATCGTCGAGTTGCTGTTATTGGAG -CAGGACCTTCTGGTTTAGCGGCCGTGAGGGCACTTGAGCAAGAGGGCTTTGAATATATCC -GGGTTTTTGAACGAAAGAATAGGGTTGGTGGTTTATGGTTAGCATGTCCCGTCACTTTTC -CTCCCTTTTGGCTCTTGGGGGCCGTTTATACCAAGGAACTTCTAGATCAAACCTCCACAG -TTCACCATTGTTATTCGAGAATAATTACTGACCAGATTTATAGGGCCTATGACCCAGAAC -CAGACAGTTTCCAAGTGAACACTCAACGTGAGGTCACTGAAATACCCAGTCAACTTGGAC -CTAGGACTCCATGTCTCATGCCAGCGACCCCAGAGCCTCAGGGTCTCCGAGGATCGGCCT -ATGATACCCTTGACACTAATGCCGGGGCCCGAACAATGGCCTTCACGCACACAGCAATCC -CAATGGCCAATTCGATAGCCTCAATAGAAAAATTCGGCCGAGACAATCCAACTCGACCGC -GCCATATTGTTTTACGTTATCTGGAGGAGTTGTTTGTTCCATTTTTACACTTGCTCGTGC -TTGACACCACTGTTGAAAATGCCACAAAAACAGATGATGGTCAATGGAAGCTTACGCTAC -GACGCCGAAATGTTCAACACGGCACGTCAAATGCACCCAGGGATTACTGGTGGGAGGAGC -AATTTGATGCATTGGTGGTAGCCTCGGGACATTTTACAGTTCCAAGTCTTCCTAATATCG -AAGGTCTAGTCGAGACCTCTGTCGAGTTTCCGGACAAGTTTGAGCATGCTAAATCCTGGA -GATCTTCAGACTCATATGTCAATAAGGTGCGTCATAAGGATCGAGACAAGGGAGAACGAC -GAGGCCTGATTTCAATCACTTGCATGCTGATCCTGTCTTGGACGATTTAGAGAGTCGTCA -TCGTTGGCGGGGGTATATCCGCTGCAGACCTTGTTGAAGACCTTCATCAAATCGTCAAGG -GCCCGCTTTATGTCTCTCGACGTGGCAACGTCGGATTTCTCGAAGATGCATGGCGTCTTC -CCAACGTGGTCGATAAATCGATCATTTCGCGCATATCTCCTGCCGCCGGTGGCACGGTAG -AGTTCCAGGATGGAACAAGCATCAACTTTGACAAGATTATCTTCGCAACGGGATACAAAC -TTTCCTATCCATTTCTACCCTTCGAGGCTGTAACTCCACAGAACAGATTGGACGGATTCT -ACCAGCACATATTCCGTATTGGGGATCCTTCCCTTGCTATTATCGGACAGGTGAGTATCC -CGGTTCCACTATAAATTACAGCATTAGAACACATTGGATGCATAGGAGTCCGTGCTAACT -CCCCTGCAGGTCAGAGCAGCCATCAGCTTTCGTATCTACGAGTATCAGGCTGTGGCAGTC -AGTCGCTTCCTAGCTGGGCGTTCTCAAGACTTGCCAAACAGAACAGCACAAGATGAATGG -GTGGAACGGAGATTACAGTACAAGGGTCCTACCGAGCTGTTCCACGAAATAATGCCCGAT -TTTGTGGAATATTATGGCTGGTTGCGAGAATTCGCAGGCTGCGCGGAAGGGAAGCCCACC -GAATATCTGTTGCCGGAATTTGAAGAAAATTGGGTACAGAGTGATATAGAGATATTGCTG -GCAAAGAAGCAGCATTGGGTGGCCCTTCGTGATAAAAATCGTGTGTTGGATTATGAACAG -CTTGCGAAGACTGGTGTCAATAGTAGCATCTCGCGTTAACTGCTTGAGGTCCAAGGAGAC -AAAACACGTTGAAGCGAGGCGGCAGCCTCTTCCTACTCATGAGATTGAACAGATCACACA -TCATACGGCATTTCTTCATTGTTCAGAGCTAAAAAGTAAATCAAACACCGGCATCCATGG -ATGCTCAGATTATGAACACGCGAAACAATAGATCCCCCAGAACAGACGCCAGAGCCAGAG -CAACCCTAGGTGGAGTCATTTCCTCCGCACTAGCGTTGATGGCTATGCAATTAGTCATGA -TCCATCAATAGACATCAAGGCCCATCTAGTTCGTCGGGCCAGTGGCACCTCCATTCCCGG -TATCCTGGCTAGGGCGGCGATGGAATGGATGCGACCCGAAATGGAAAACTTTCTTCACAG -CATTGGTAATGCTGTTCTTCCTCCTGCCCTGTGGCGCTTCCTCGGGAGAACGATCTGTGT -TTTTCTTGGGCACAGAGGCATCATCAGCAAGGACAATAGATGTCACGGCTGGCTTTGGCG -GCCACTGCCCGTCTGATTTCCAGCCTTGGACCATGGCTTTGGTCAAATGGGCGAGGTTGA -CAGGGACTGTGGGGGTCATGCCTTGCACGACAACCTTGGTGTAAATCGAAGGGTAGATTG -CGGGATTGATTGTTGCGCGAACTGGGTTGTCATCTGAGATGAAAGGAGGGGCAACGGGAC -AAAGTGGTTCATCAAGTTCTTCTTGCTCCTTCTCCTCTTCTGTTTGGTCTTCCACAGCCG -GTAAGGTTGGTTGAGTGGGTACAGTCGCGGCAAATTTCTGATCCGGCACTGTGATGTTGG -TCTCTGACGAGGCCTTTCTTTTAGCTCCCTCGTCTGGGGGAGTTGATAATCCTTCCTCTT -GATCCTCTTGTTGTCCCGAGGTCAGATCCGCTCGATGTTGAAGCTCCTCACATTTCTCTC -GATCCGCGACAGAAAGCGAGGTCTCGGTTCGGGCTGCCAGATCAGAGGCGCCAATGCCCT -CGTTGCCCTGACCAGACGAATGCGCATTGTTTTTTCCCAACGCCACACCATTGGATTGAT -CTGCTGTATCTTTCTGAGTACCAGCCAACGCACGTTTTGCCTCCTCGGCCCGAATCTGCC -GACGAGTCTTCGCACCACACCACGCATCGCGTCTCGCCATCCATACGCGTAGCCCTTCGT -TCCATGCCATTTCGTCGTCTATCTGTCGACGACGCTTGCGACGCCGCTCCACGCCTGCAT -CCCCAGGGGCAGATTTTACCGCAGCTGATTCTCCATATTCAGCTTCGCTGTCTGAGCTCT -GTGGTTCAAGTTCCGAGGTTGGGGTGTCGAATTCTCGAACTCGCCATTCGCACACTTCAC -GATTGTGGCATAGGGTGCCATCGGCGGACGACGGGGAGGTAACGCCGGCTTCAAACTCCC -AATCATTACGTACGACTTCCTTGACGAAGACTTTGGCCTCACGGACTGAGGAATCGCGGG -ACCCGGTGGCCATCATTTTTGCGATCTCTGAATGTGGGGGTTTGGGATGAGGAAGTGGGG -AGCTGTGGGAGCTGTGGGAGCAGGATTTTCCCCGTGCACCTGTCAACTAATAAAAGACGC -AAAGGGGCGAAGAACAAGTAAACACCCAAAAGTCGAAGTGGCGGGATGATCAGACTCAAA -TGTCATATGGCACGTGCCCGAATGCGGAGATACAAGCCTTCCTCTCCGCCTTGATTTCCC -TCCGAAGTACTCGGATCTCTTCTATGTCCCAATTTACCCCCCTATTGACTTTGATGTTCA -TGCTAAACCTGTCTCTCGAGACTCTGGCCTTGTCTTGGGAAAAGGGGACCTCATGTATTC -GGCCCAACGTATGGAGTGACGCAACGCCAAGCGGTACTATCATTCCTTGTGGCTCACGCT -ATACAAGCTGGTTGGATGAATTACACGGACGCTGCATCATAAAATGTCAAAGCATGTGCC -CTTGCAAAGACGTAGGAATATAATTGAGCACGAGTTGAAAGACTTCAGGATCACTATTTC -ATTTCCAGTGCTTCGCCTGCTATCCCTACTTTGTTACTTCATTCTAAAACAACCTCTCCA -TGCATAGTACTTTGGCAACGCCGATCTTTTCCCCAGATCCATCTATTCTTGGTCCCCACT -GGCCTCCGTCAAGAGGTAGTCCCAATCAAGATCTTGTATGCGGTCTGCCTCAACAACCAA -AACACTAGGATCAACTCCGTGGAATTGCATCCTGCCAAGATAGCGCAGGTTTTGCTAAAA -CCAATACAATCAGCACTTTACGGCCATGCAATGGACACAGCGAGTCAAAAGGCAAATACC -TTGAAAGGCGCGTGGCGATTCCTCCAACAGACGAACAGGGTCGTGTTACTGCACAGCCAG -TAAAGAGAATCAGCAACAATATTCCACTGCACTCATCAAGTCACTAGTTTCTTACCGCTT -CCCCTCCCTCCCAAAATGGTACTCCATGTCATAGATGGCATACCGTGGAGCCGGATTGCC -ATCATTGTCGGTTGTCTCCCATAGCTTATGGACAAAGGTTTCGTAGTCTTCTTCCGTGGA -TATTTTCTCGACTAGAATGTCTTGGTTGTCATCGGAGATCTTCCAGATCATAAACTTGGG -TCTGATGGCTGCTGGCCCCGATCGGAGCTGATTGAACTCAGCTATGCATTCGGCTGCGCT -GTAGACACTGGTATATATTGATTAGTGGTATGTTCTGCCTAGATTCTTTCCTTGGAACTG -TACAAGTGACCCTGTAAATACCGCAGGAATTATATCTTAGTACTTGTATCCATTGCACGT -AGCGGATATACGTACCGACATGGTTGGAGTTTGGTAACTGGTATCCTGCAACTACGGGAT -TTCTGCAGTTAAATGAACAGACTGAGCAGAATGAATAGTGTTATGTTGAATGGAACGTGA -TGAGAAAAGGGGAGCAAGTAGCTACTTTGACTACCTTATATCCCTGACTCTACATAGATT -CCGCGAGTTTCTGTACCTGTCTGACTAGATTGAGATATTTGACATCGAAGGAACATACAA -AGAAAAAATTCCCTGACAATAGATGTAGCAAACCGGTGAATATGCAGCATCAAACGTCTC -TGCCTGTACTGTATTGCTTCGGGATTATTTCACAGGGCAGACTCCAACCAGAAAGACTGG -GGTATATTTAAGGTGATAATTCTAGATGATACTTACTTGACAGGCACTCAAATATAGATG -TTCAGTTTACAGATACTTTGAATAGACAGATAGTGATATTAGTCAGGTGCTACAGTACTT -ACATAGTAGTCGGTATAAGACTTCGGCTGAAGGGCTAGGCACCTCTCATTGAGCCTAGTG -GTGAATGAGAACAGCTCAATGCGGGCGTATTTACCATGGATTCTTTCTACGGATATTCAA -ACTTGTCGCAGAGCAAGTTGTCCCTTTAACTTGAACAGTTGCATAATCACCCTGCCATTA -GATATTGATCAGTTCGTCTCCTTGCCCGACCTACATCAGGTTCAAAGACAAAGTCAACGA -GTTCTTTGGTCTGTCTATACAACTATTTGCAGCACTTTGACTTGTCCTGAAAGCTAGTAG -CTCATGACCTATCAATTGAAAGTATTTACGGTGTTTTAAAGAATCTAAAGAAGTTTCCCT -GACATATATCATTCCATATATCATTTCTGCTTGACCCAAAATACAGTGCAGGCACTCGGT -AAATCATACTGACCTAACACGCTCATGAAGGTCATGTGTAGTATCAAATATTGGGCCCAC -AATCAAGTCACCTTATTATTTCAAGTTTCATTGTGTTTCCCCTGTCAAGTAGAAAACGAT -ACGTGGCTAGGGAGTACTAGACGAGCAGAAAGTAGTTGACCGTTAGCCCACTGTAAGCCC -AATTTAAAGCATCTCAATGTGAGGCAAAGATATTGCTTTATTGCCCAAAAATTGATGGCA -ATATATACATTTCGAGTAATGGTACAGGTCTATCATATCGCTAAAAGTACATAGGGAATC -CCCAGTTTCTCCCAATCCACGTAAAATAACCCAGTATACAAGCCGGTGAGAAACCTCCGA -CGAAAAGAAAAATCATGTCCGAACGCGCGCCGATTCAGCCGGAGCATCACCAGTAGCACC -AGGGATAAATCCCCACCCAGGACTCTTCTGGCGACGACCCTTCCGATCACCATACTGCTC -GTCGAACTTCCGAATCGCCGCCAGCGAAGACATATCTTCACTGATCGACGCGCTGATTTC -TTCCATTCCACGCTCGAAATGCTTACGAGTCAGAATGCGGCGCTCCGGATAGCTATATGG -CTCGTCACCGGTGTGCTTGGCCGCAGCGTCATTCTCCTCGCGAACGCAAGCTAATGCAGC -CGCCACTGACAGATTCTTCAAATCGGAGCCAGAATACAGTGGCGTGCGTCGGGCGAGTTC -AGCAAGGTCGACAGAGGGTTCGAGTTGCTCTTCTTTCAGGTGGATTTTCAAGACTGCTTC -GCGATCTTCCTCGGTTGGTAGATCGACTAGTAGACGGCGGGGAAGACGGCGGAGGACAGC -ATCGTCTAGGTCAAAAGGTCGGTTTGTGGCGACCATAATGAATGCTGATAGGTCGTTCAT -GCCGTCCCACTCGCGCAGGAACTGGTTGATCAGTTCACGGTGGGAGGTGCGGCTGCTGGC -ACCGGTGCGCGAGCAGAAGATGGCATCTGCTTCGTCGATGAAGACGACGCAGGGACTGAG -CTTCTTGGCAAGTGTGAAAATCGCTTTCACGTTCTTCTCGCCCTCGCCGACGTACATGTC -GTACACTTCAGACCCGCTGACTTCGAGGACTGTGGCTCCGCTCTCGCGGGCTACAGCTTT -GGCGAGTAGCGTTTTACCCGTACCCGGAGGTCCGTACAAGAGGAGACCGGGTATTTTGTC -AGTGGCGAGGACACCGTATGTGAATGCCTCGGGGCGGATGAGTGAGAGGGAGGTTAGGGT -CTTCAGAGCTTCAATGGTCTCGGGAGGCACATGCACATCATTGAATGTGGTCCGGATGCT -TTTTGCATCAACTACACCGTTAAGTAGCTTTTTCTCGTGCGTGTTGCACGTTTTGCGCAA -AGAGCTGATCAGTTGTTCTCTGCTTTGTCCCGCTTCTGACTCTTTCTGCTTAGGTGACTT -CTCTTTCAGCCATTCGTTGGTGACTCTGTCGCTTTTCTCCATGAGCTGGATTCCACGTCT -GATATGTTCAAATCCGAGAGGCTCGTTGGCTTCAGCACAGCCAAGTGCAAGGGTAGAAAT -CCAATGCACTTGATTGTACGACCAGTACCGATCCTCGATTCCTGATTCTTTGATAGTTGA -AGGTTCCAGCGGCCAAGCACGATCGCTGAATATGTTATCTTTTACAGGCAGAGCCTGTTC -ATGTAATCGGAAGCGAAGCATGCTCTGCAAGTGGCGAATATTAATATCCATGGTGCGCTT -CTTTCTGTCTTCTGAGAAGACCTTTTCGGCGTCTTTCGAGCTCATGGCAGGGGTGATGAA -CAAGGTCTTCGAGAACTGATCATCGAGAACATTTTGCAGCAGACCAGCACCATCTTGTGA -GGGAGAGTGATGGATTTCCTGCGAGGTAGTTCCGATCAACAGAATCTTGGAACCTGCCCC -TCTCCGGTCTTGGATGACTTTGTGCAAGAGTGAAATGAATTTGGATCCCTCTCGGGTGTT -TTGAATGTCACGGTAGTCCTGGATATGCACAATGATGTCTCGAGATGGCAAAGCTTCGCT -GGTCTTTGTCTCGACATTTTCTGTCGCTTCACTAGGGACTGGCATTGCTGGTGCGGATTG -GCCTGGTGAGCCAATGAGCTCGTTGACTAATTGTGTCCACCGAGTTTCAGCGCGCTGACC -ACTTCGTGAGGCGCCTGCCCCGGGATCCGGATCCGTTTGGCCATTGATCGATCCCCCCAG -GAGGGTTTTCAGCCCAAGCCAGTTAGGAAGAGGAATATCACCTGCCTCTTTGCTGCTCTC -GATTGTGATAAACCGAGGTGTACCAATATCGCCGCGCATACCACGAGGACCTCCTCCGAT -TTCAGCCACGTCATCCTCGGCCTCGCCTTCAGGGTCTTCAAGTTCCGACCATGAAGGTGT -GACGGATTGCCGGTATACATCATAGCCGAGAGACCGAATGGGCGAGGTGACTCCGGCATC -TGCAAGATCTTGCTCGCTGAACAATTGTGCGATGTCCTGGGCATTGAGGGTCACCAGGTC -AGTGCCCAACTCGTGGGCAAGCTTCTTCACCACCGCGTCTAAGAATGGTATGCCATCAAT -ACCCGGGTAGTGTAGCGATAGATGCGACTTCTTCACCGAAGGCTCTTTGGCATATTTTGT -AGATGGTAACCGCAAGCCTGCTTTAACGGAGGCCCGCAGCTCTTCCCACGATTCCTCGGC -CACCGCATATCGATTCTCGGCTGCTGCCGGTGGTTCATTGGATGCAGATGGGTCGGAACC -TCCCCCAGAAGCCTGATTTTCATTTCCAAGATCCTCAAGCTCCTTTTGTGACACTGACTT -GGTTATCTTAACCTGTTGTAGTGGTTGTGTAGGGCGGTCGTCGGAGGTGTGAGACACCTT -ATTGTGCTCGTAGAACCATGCGGGGACGGTCATCGTTGATGTGGAAATTTGCTGTGCGGA -CTTTCGATTTCGCATCGCTCTGCGTACGGCAGATCCGTAGGTTTTGGTGATCGGTTTGGC -CGACTCGGCAGAGGGAGATGTGGGCGGCGTGCCATTGTCAGAGAGGTTCTCGTGTTTGAG -GGAAGGGTCTTCCACAGTAGTCCCTGGCGCTGTAGACGCGGCATCCTGAGGGTCTTGGGA -CGGTGATGCGAGCCATCTTCGTGACAGATGAAATCGTCGCGCGGACAAGGGTGCGGAATA -GCGGCGGCGAGCAACAAATCGACAAGGGCGTGCAACTGACAGCCGGACTTGTTGCATTGA -CGAATACATCCGGGACAGTATCACCGGAGCTTTATGTGATATCGCCGAGAGTAGTAGATA -TATGTACCAAGTCGAGAGTGGCGCAGGATATCTATCAAGCCTTACGAAGGATGAATGCGA -GATCACGTGATCTCCAAAATTCTGGTATCTGCTGCAGCCTGAGGTTCTTAGAATAGCAGG -TGTCTTTTACGCGTGTCATACAAAAAGAAAGGGTTAGCTCAAACTTCACACAGCTTGCGC -GCACATCTCCATTTTGCAGGCGTACAGCTTACTGTCAGTTTTGCTCTCTATAAATTCCCC -GTCGAGGAACCTATCCGCTCCCATCTCCACCGCGTGGTGGAAAAAGTATATTTTGTATGG -GCTTCCCATACAAAAGGCATTTCTAGTCAATCTTCAATTCCTGACCACAACATCTAAGGA -TCCATTTCTAGTATAAGTTATATTTAATTTTAAAGTGATTTCATCACAGTGCTGTCATAT -GAGCCATAAGGTCAGCCTCGAGCTATGCGGGAATCATATGGGCACATCTATGAAATGATC -AAGCTTGATTTCCAATCACCGATTTATATCCTTGTTCAGAATTATTTTTCCGTTATATAC -AGGTTGAATTCCAACTCGACAAATGGGTAACTATATTGCAATATTCGAAGCGTTCCTTTA -AGGGTAAAAGGCTTTCAGCGACTTGCCACTTTCCAGCTACTACAGGCGTGCCGCCAGGGC -ATCATCCAACACATTGTGGCCACGAATGGAGCAAAGTGACTTTGTGATAGACTCACAAGT -GATTTGGTAAGACACGGTTGCCTCTTTACTATATTATTCTACTTATTATCCATAAAAACC -TTGCCATAGCCGGCTGCTCGCGCGGCCTTGTCACGATGAACTTTTCTAGGTTATAAAGCC -GGAAAAGAAGCAGTCATTGTCCAACCAGCTGTTCAATACAGTGCAAAGTGGGGGATGCTT -CTATGTGAAGAGCATCGGCCTCTGGCAGGTGCATATTAACGCCTCATAATAGCCAAAAAG -ACCTATTCCATCTTTCTTTGAAAGAGCCGCAGATGTATGACTAGGACTTCTAAGCAAGAA -TTGCAACAGATATCAATGGAGTGGACAAAATGTTAATGACATTATGCCCGAGAGCACCCA -TAAAGAGAATCTCGAAAAATACAATATTTTAAAATACAATACTCACATTGCTGGGAAACA -AGCCATCCTATGGCGATTGAGACATATCTTCATGGAATTGAAGGCATTTTCTGGACATCT -GCACAGTGATATTCTCCTCACTGTGGTGCCTTTTAGAACTATACTGACAAGCAAAGTCTT -GCAGACCAAGACCGTATATGAGTCTGTCAAATGGAGACTGCTCACTTGCTTTTACATATG -TAGTCTGCACCCAATCAATACTCTATATAATCTTAAGGTTGAATCTTTTAGGACCTGACA -AAAATCTACCCCTCGGAGGCATATTCACAGATCGTTGAGGCTTATGTTGTAGATGGTCTA -ACGATAGTATAGGTTGATGAAGCATACTTTGGTACGAAGTGATTTCAAGCGACTCCGTAC -AACGTACAAGCCATTCTAGTTAAGAAAAAAAAGTTTTCATTCAAGTATTCCACAACTATC -TATCCAGACAACACACAGGTGTAATGGCGAATTGATCATCAGAAGGAAGGCAACGGAAAT -CTCTACATGGTTTTGCGCATTCAGAACAGGTCTGTGAGTTTACATCCGAAAATTTTTCGT -CACTTGATCAATTCGCCATTACAACAGGCGATATCCTTCAAGGCATCATGGATTGAGCTT -TGGGCGTTTCTGTCTAAACTGAGAACAATATTAGTGAATGTTCGAAACAAGACATGGTAA -AGATGCATTGCAATATAAGAGTTCTCAGACAATATGTACTGGCAAGTAGCAGACCAACCG -GGAAGGAAGGTCCGTATTATCTCCGCATTGTCGATTTGGAGGGTAAAGAAGTAAAGGTTT -TGTCATCCTTGAACGGATAGGTGATGCAAATGCAAATGAGAAAGGATGTAAACTCACGTT -AAGCACAAGGTGCAAAGCACCTGACCTATCAGGTTAGTGGCGAGATTGAAGCATCGTAAG -ATACCCGTACTAGGAACATTGGGTCATGATATCCTCAGAAATCAGGTGCAATCTGCATCA -AGAGAGCCCAGGACATCTGATGAACATAGAGCCACAAAGCTCAACGTAGCAGGTCTCAAT -GGAAACTGTTCACAGGAGTGACAGGTCCCAAGACAACAATGTGCAGTTGAATAGAAAATG -AGAAGAACTCTTTGTGGGAGTCCTCATAAGTCGCGTATCACTAGAAAAGACATTTgaaga -caaggaagatgaagaagaagaaaaaaagaaagaaaaaaaagttgtgaagaaaaagGCAGA -GGATTACCACGTGACATCCCAATTGGGGATAGCGTGTTATTGTGATCAGAGCAGGAGGGA -GAAGAAGAAAAAAAGCCTCAAAAAGGGAAAAAGATTGAATGGAGCTTATACTTTAAGATC -TATACAATTTCCCTATCTCATTAGTCCTTTCTTTTCCCACTGTTTCTCTCTTGGATCGCG -TTGTCGTCTCGGTTAGATCCTCCACCCTACGGACCCGGATCCTCCGTCCCCAGTTCTCTC -TCTCCCACCGGGCGATCATCCTTAATTCCTGACTTGGGTGTTTGCAATACCTCTAGCCCC -TAATACCAGATATCCCTTAGGAAACTATTTTGTGAGTTGTACCTCTTTGTGATTCTGTCT -GAGAGCCCAGCCCCTCCACCTCCCCTAGCCCCATCACTTTAAGGCTTCCCCCCGACATCT -TCTCCTCTTATCACTCCACACTGACCGCTCAGTTCTCCCAGGAAAAAAAAGATTCATTGC -GCGGGTGAGTGACTTGGCCAGGAGGCCCCCGATCGATAATTCAGCACCTCGCGTCTGCTG -AATACCGGTCCAAGTCGCTCGCTACGTCTTGGCCCATCCCCGAACGCTAAAAGCGGCCTG -TTGTCTTCCTACTCCAGTCGCGCTCCACACCACGTTTCGCACACCACCCTTTCGCCATCG -CGAGCGATCATTCTGAAGTCCCCCATTTGTGGTGACTTCGTCGCTCAGCAATGATGATGA -CGCAGCCGTTTCCTGCCCACCAAGGCATGCCGCAACATGGGTTCCCCCCCGGCCACCCAA -TGGCTGCGCAACATCCAAATGGACACCCGGGCGCTGGAATGGTACAGCAAATGCATCCCG -GTGTATCTGCGCCGGGAGGACCTCAAGCTAGCCAACCGGGCCCGATGATGGGTGGTATGC -CCCCGGGCGCTGGGACAGCTGGTCCTGGTGGCCCCGGTCCAAATGCTCATGCCCTCTCCC -ATCTGAATCCCGCGCAGGCGCATATACTTCAGAATCCTCAATTCCAACAAAATTGTTAGT -ATACAACCTCCTTGTTGTTTGACGCGATGTTGTGCGAGTGATGTTACGGAATACGCGATC -GGCGCCCTTTGCGGTGATTTCTTTCCCCCGCTATACCCGCATCTCTTGCAGCCGTCTTGC -TCGATCGCATACTTACGTCCTGATTCATCTCTAGTTGCGAACAATCCCCATCTATTacac -cagcaccaacaacagcaacagcaacaaTTATTGCGGCAACGAATGATGCTTCAACACcag -cagcagcaacagcagcagcaacaccagcagcagcagcatcaacaacagcagcagcaacag -caacaacagcaacagcaCGGTCTCCCTGTATCCATGCCCAATGGCACACAAGGGCTTAAT -GCACAAGGACTTAATGCTGCTCAAATGGCTGCAATGCAAGCGAACCCTGGAATGAGACCC -GTCAATATGATGCAGCTTCAGCAGCAAATTCCACATGGCCAGCCGCCAAGTCTCCAACAG -CAGCAATTTTTCGCTCTTcagcaggctcagcaggcccagcaggcccagcaggcccaACAT -GCTCAACAGCAGGCAAATAATGGACAATCAGGCCAGCACACCCCGCAGCGTGCATCGGCA -CAACCCCCGAATATGCACGAACAGAGTGCTACTCCACAGTCACAGCATGGCGGACCAGGA -GGCGGCACACCACAACCCTCTCAAACCTCTCAGCCGCCATCCACCCAACCCCCACAACCA -CAGGGACCCCCTCAAGGCCAGGCAACACCTAATCCCCCATCGCAGCAATTGCCGCAATCT -CAACAGCCGGGTCAACAGGGGCAGGTTGGTCCGCAGCCCCAACCTCCCCAGAGCGCTCAG -GGCCAGCAGCCGGGTCCTCAAAATCAACAGATGACTGCCCAGGAGGCTCAGTTGAAAGCA -CAACAGCACCAAAATGCTCTTCTGATgcagcagcaacagcagcagcaaaggaagaataac -gcaaTCTTGACCATTCACGCCTATGCTGAGCATCTGGGCAACTTCCAACCCCGCAATGAA -ACGCAAGATCTTCTGTACTGGCAATCGTTCGTCGATCGCTTCTACTCCCCCGTAGGTGTT -TTGCGACAGGGTGTTTGGAACAGCTCAATTGGATCTAAGCAATTCGAAATTGCGACTCCT -GCTCTGGCACGTTATTATTTTACCCAATTCACCAGTGGGATTAGTCAAATCCAGATGGTG -GTGGAAGGTGCTCGCGAGCGAGAATCTCACAATGGGGGGCATTACGTAGAAGCCCCAAAA -TGCTCATTCATTTATTGGTTCAAGAATGAGTGCCAGGTACAATTCGCATGATTCTCTTCT -TTTCCACTTGCTAATTTCTCATTTTGTTTAGCTTTTCACTAATGGCACCCTTCGTGCGCA -TTTCGATATGCACAACAAGCTCGAGATGCTTGATATTAATGTCGTCAGCCACAACGAGTA -TATTCCGCGGTCTTTATTGTTGGCCATGGAGGCGGATTCCCAAAAGCAAAGTCCGAAGGT -CGCTAAGAACTCAAAACGTGCCCAGCCTAAGCAAGCACCATCATTAGTGCCAGACTCCAA -CGTGACTGCCAATGGCGTACCTACTCCAGTCATGGGATTTATGGAGGTGGCTGAAACAAT -CTCGGCGATGCAAATGCTGTTCCAATTTTCGCAGGCAAATCCGCAGCTATCGCCTCCCGA -CGCTTTGCGAAACCTCGTCAACACCCTTCAGGCACAAAATCCCAACCCAGGCTTTGTGCC -AACTCCTATGCCCATgaaccagggcatgaacccgaacatgaatccgaacatgaatccggg -tatgaatccaggcatgaatccaaacatgaatcccggcatgaacccaggcatgaatccagg -catgaatccaggcatgaatcccggcatgaacccaggcatgaaccccaacatgaaccccaa -catgaaccccaacatgaacccTGGCATGAACCCGGGCATGAGTCCGGGTATGAGCCCGGG -TATGAATCCAGGCATGAGCCCGAGCATGAATCCAGCTATGAATCCAGGCATGAACCCAGG -CATGAACCCAGGCATGAATCCCGGCATGAACCCAGCAATGAATCCCGGCATGAATCCAGG -CATGAACCCAGCAATGAATCCGGGCATGAATCCGGCGATGCACCCGGGTATGAGTCCGGG -CATGAATCCAGGCATGCACCCAGGTATGAATCCGGGTATGAATCCGGGCATGAACCCAGG -CATGCAACCCATGCAAAATGTTCGGGGACATAGCATGGGTGCTCCGTCCCAGTTTGCCTC -GCCGGCCATGGCTCATCTTGGTCTTCCCGGGCAGCAAGGCTCACCTCATCTGACTGGTTC -AGCACATGCGAGCCCGGCTCAAAGCAACCTTGTCGGTCCTCCCGGAATGCAACCACCAAT -GCAGCCGTCCCCAGCTGGCTTGAACAACAGTCCAAATGTAGGTGGCAAACGTCGCCGGGC -AAGCACCGTCAAAATGGAGTCAGAGGACGGTGGTGGAGTCGAAGCTAACGGCGCTTCGCA -ACAGGGCTCAGCGAAAGTAAAGGCTAGCCCTCGGGTGCCGAAGCGACAAAAGGGCGCCGC -CGCCTAATCATTTTGAGAGTTTTGGTTTGCTTCCCTTTGCTTTCATTTTTCCTCGCCGTT -TCAACGGTGCAGGACCGGCGTACGACTGGATACCTGGGCGAGGCACGTCGCGCCCCTTCT -TGTTTGTTTACTCCTCTCATCTAGGCCTTTCATTCCATTGAAAGATCTACTTTGCTATCT -TGGCACATGACGACTCAATTCACAATTGACGGGATTTAATTAGTGCTTGAGGCCCCCTCC -CACTCTCCATGTCTTGGAAGTGGCATCAGGCCTCATCGATCCTGCTCAACTCTGACCTAA -TGTACTCTATCTTCCTTTCAGCGTTTCCTTGGTTGGCTTGGGATCATTGCCTGCCCTTGC -ATGTTTTCTATCTCTTTCCCGTTCGGTTTTCATTTTGATCCTTTTGTCATCCATGCCTGG -GATTTTGACGACTTTTGGATTGGTCTTTGTTTAGGCTTTTGCCCATTGCTCGCTGTTTCC -TTGGCAGTCTGTTCATTTTTCTTCTGAACAGACACAACTATTGATATCCGTGTTTGGCGA -ATGATATGGCGTCCGGCTTTTACGTTTCTTTGAGAAATATGTCTGCGTGCATAAGGGTGT -TTCATGCTCCATTTGGCTTGACATCTCTCCGTGACTTATCCTCCTTCTGCCGCTTCTTGA -CGAATCCATAAATACCTGCTTTCCAATCATCATGCATTGTAGTTTGTATGCTTATATGTA -CATCGAAGGGTTTATGCCTAGATATGGATAGGTTCCAAATCCTCAAATCACAGTTGATAG -AGTATGAAAGGGAGGCCGAGCGGCGAACTCATCGGCGAATTGCTCTCGGCATTCGTTATC -ATATTAATTCCCCACACCGGTTCTTCAAAGCATAGAACTGCCCCTCCTCTGCATTTTTTC -CCCAAACATCTTTGTATTCACATTTACCGAGAAGTATCTTAGAAGCTTAACAATGGCACA -CTCTCACTCCCACGACGCCGGAATTGACCACTCTCATGATGACCCCTTCAACGGCCACGG -CCATTCACACGATATCCTCGATGGTCCAGGCTCTTACGCCAACCGCGAGATGCCTTTGAT -CGAAGGCCGCGACTGGAAGGATCGTGCCTTTACTATTGGTATCGGTGGGTATGTATAGAA -CTTTCCATTCGCAATCAATCTCCTTCATCCGCTTCATGCCAATGCATATCCCTTCGTCCA -ATGACTGACACGGATCCAACAGACCCGTAGGATCTGGCAAGACAGCTCTGATGCTGCAGC -TAGCTCGCGCCCTTCGGGACGAGTATAATATTGCCGCAGTGACGAACGATATCTTCACAA -GGTAATACCCCACCTACGAACCCCACACCCAAAACAACACACCTCATATCTACCTGCTGT -ACCGTATATCTCCAAGATCAACACTGACCACCCTCCTGATAAACAGAGAAGACGCCGAAT -TCCTAACCCGCCACAAAGCCCTCGCACCGTCACGCATCCGCGCCATCGAAACAGGCGGAT -GCCCGCACGCCGCTGTGCGTGAGGATATCAGCGCTAATCTCCTCGCACTGCAGAAGCTGC -AGAAGCAGTTCTCGACTGACCTGCTGCTTATCGAGTCCGGAGGTGACAATTTGGCTGCGA -ACTACTCGCGTGAGCTGGCTGATTTCATCATCTATGTGATTGACGTTGCAGGGGGCGATA -AGGTGCCTCGTAAGGGTGGGCCTGGTATCACAGGGTCTGATCTGCTTGTTGTGAACAAGA -TTGATCTTGCCGAGGCCGTTGGGGCGGACCTTAGTGTTATGGAGAGGGATGCGGCAAAGA -TGAGGGAGGGGGGCCCGACTGTATTTGCTGTTGTTAAGCATGGGAAGGGGATGGACCATA -TTGTTAATTTGATTCTTAGTGCCTGGAAGGGAAGCGGAGCTTATGATGTTAGTTTGGAGA -GATGGAAGAATGGGGCTGTTAGGGGGTCTGGTAGTGTTGATGAGTAGATGTTTTAAAAGG -CTTAGATTGGATCGAGATTTTGGATAGATTTGGAAAGAGAATACGACGGGGGATTGGAAA -GGTTGGGCATTTATGAAGTTCAATATATTCAGTTGAAAAATGGTACACACGTTAACTACT -TTTTCCTTCCGAATACAAACATACTCCCCGAGAACGCCTGGAATGCAGCTGTCGCCAAAT -CTCCAATATGTCACAGAAGCCGCGTGTGGGCTGGGTATCTGGTTTTGGGCATAGGAAGGT -TGTTCCTCTACTGCTCCACGAGATGTCACACATGCAAGAAACAAACTCGGTGATCTCATT -TGGCGAAACGAGACAGAGCCGGCCCAAGTTCTAGCCGTTCCTGCTTGAGCTTGTCCTTCG -TGCGAGCAGCGTACTCCTTGCGCAACATCTCCGCCTTGACACGCTTCTCGTCGATGACGT -TCTTTCCACCCTTCTTGCGGAGGTACTTGCGCAAGGCACTGTTGCGGCCACGACCACGGT -TCTTGATCTTCTCCATGTGGTCCTCGGGCTTGTGATCCGGGTTGCGCTGTTCCTGATACT -TCTTGTCGCTGATGGTATCGAGCTTTCCAATGATGTTGGGGTCGAGCGAAATCATATCCG -GCTGCAGCTTGGTGAGCAGCGAGTGAACCTCTTGCTCCTGGCGCTGTCGGACGTTCTCGT -AAGGGTTGGCTTCGAGCGAATCGAAGTTGGGCTCACCGGCGCCGGGAACGATGACACTGG -CGAATCCTTGGTCATGTCCTACGCCAAGAATATCTTCATATGGGCACCAGCGCACATTCT -CGATGCGCTGTCCATCTCCGCCCCACGCCATGTATGGGCTCTGCACTTTGCCTTGGTCGG -CTTGCGCGGCGTCGAAAAGCCCGCGCCATACGCTCATTTGCGTGCCCCAGCCGACTGCGG -TCAGGCCGCGGTCACTGATCGCCACGGATGCACCGGGCTGGTAGCATGAGTATGAGTGCA -CCTCGCGGTACATGCGGATATCCCACACATTCATTTTTTGGTCCTGGCCTGTAGAGACCA -TGTAATGGCCTGTGCGATCCATCGCCAGTGACCGAACAGGTCCCTGATGCACAAGTGCCT -TTACCAGAGGGGTTTGAGAGTTGGGTGACCAAAGGGTCACGGCTCCATTTTGATGTCCGA -CATGGAGGATGGCATTCCATGGGTTCTGCGCGAGAGCGGTGGGTTTACCCAACCGTGTAG -CTAGCTCGGCGACGGTCTGGCCGGTTGAGGTATCTGTGTATTTGAGGTGACCGCTCATTT -GCTGTATAAGGACTTGATTAGTGGGGGCTATCTAGGGAATCGAAAAATCGAGGATACTCA -CAGCGCTTGCGAGGAGGAAATGATATGGCAGGAATTCCAGGAAGAGAGGTTCCAGGTGCT -TGCTCAGACAGTGAATCTCAGTTCCCTGGTGGTCGTAGATATAAGTGTGCTTCTTCTGTG -CGACAGCGAAGTACTGGTTGTTATGCAGCCAGCGGGCATCCCGCACAGTCTCGTTCAACT -GTAATTCGCAGCCCAATTTGCCCTCACGCCAATCCATCGTCGCAACATGTCCCTTCCGTC -CCGCTAGCAGCAGGTCACGTCCGTTCCGAGAGTAGTCCATACGGTAGGGGCCCATATCAG -GAAGCTTCAATTCGAATCCCTTCTTCGCTGTCTCGATTCCAACGCTGTCCCGAATCTCAT -CTTGGCGAACTTTGTATGTCTTCTCGAGTTCACCTTCAGGTTCGAGGAATCCCTCCGAGT -GCTCGAGCAAAATCTCTGCATCCTTGGCTTGGAGAATAGCCGCCTTGTGCTTGAATTCCT -GTCTCTTGAGGTTTGCGCGCAGCTTCTTGTCCCGTGCACTCTTCGCATTGGCGCCTTTGC -CTCGTCCGTAGGTTTGCTCGGCTGCTGCGAGTCTCATCTTCGCTTCGTGGTCGCCCTTGG -CGACAGCCATGTTGGAGGCTGTGTCGGCCATTTTCTGGCAATTTCTCAAGTTCGTTTGCG -AAAGAACTCGGGAAGATAACAAGGCGGACGAAAAGCGTAGCTTCAAGAAAAAAAGTGTTT -TGCCCAAAAATTCGCCAAGATTTGCGCTCACCGCTCGCTTATCCTCCACTGGCGAATTTT -GTTTTCTTATCGTTGCATCTTTTTTTTTTCTACAGAGGCACTTTCGTTTTTTTCTGTATT -TGCCAGTTATAAATCGCTCTGGCTAAAGATTTGTCGACTTACGCCTGACTCGCCTCAATC -TGGACTCCCTCCCTTTCCGATTCAAAGGGTCTAACGCCAATATGGCGCCTGCCCAGACTG -TGCACCACCACCGCTCGACCACTAAGGTTTCGCACAAGCCCTACAAGAGCAAGCATGCCT -CCAAGAGCGCCTTGAAGGATCAGGCCAAAGGTATGAATTTCGAACCACCCAAGCATGATA -AATAATTCAAAATTAACGCTTATATCACTTTAGGCAAAATTGAAGGTGATGAGCGGGGCG -GTCGTAAGACACCCCACCAACAACTCAAGACGAAACTCGATCGCCGTAACACCGCCCGCC -AAAGACAGGCATTGAAGCACCAGGAAAGATCCCAAGCTACCAGCATCTTCGCTGGCCAGG -GTGGTGCTCCTCGTCATGTCGCAATTGTGCCGTTGTCCGCCGATATTGATGCCGCCGCCG -CCATTGCTTCTATCAACGAAAGTGTCGATGTTTTGACTGATGTCTCGCCCGATGGACCTA -CCCGTGTGCGCATTGAGCGATTCCGCCAAAGCGTGCAGTACATGCCCGCCAAGTATGATT -TGATCAGCGCTCTGGACGTGTGCCGTATGGCCGACTTTGTCATTTTGGTCATGTCGTCTG -AAGTTGAGGTGGATGAGGAGGGCGAGATGCTGTTGCGCTCGATCCAAGGACAGGGTATCT -CGAATGTGTTGGCTGTCGTGCAAGGTCTCGACAAGATTGAGCCCCCCAAGAAGCGCCCCC -AGGTGGCTAATTCGTTGAAGTCTTTCATTAACCACTTCTTTCCCAATATTGAGAAAGTTA -TGTCTCTCGACTCGAGACAGGAATCTTCTAACGCTATCCGATCTATTTGCACTGCTACAC -CTAAGGGCATTCACTGGCGTGACGATCGCAGCTGGATGTTTGTCGAGGATGTGAAGTGGC -CAGAGTCCAACCTGGATGTGGTTGACGACGTCGTCATCACCGGAGTTGTTCGTGGAAAGG -GCCTGAAGGCAGACCGCATTGTTCACCTCCCTGGATGGGGCGATTTCCAGATTGATTCTA -TCACAGCTGCGCCTCTAGCAACCACCAAGCCCAAACGGGAGGATGCAATGGCCGTTGACG -GGAATGAGACCACACAAATCTTGGATACTCCCACCGCAGACCAGGATGACATGGCTGCTA -TTGCTCCCGAAGAGATTGATATGGCGGATGACGACATGATTTCTATGGCTGAGACAGAGA -AGAAGGGTGTTCTGCTAGATGACTACCATTACTTCTCGGATGACGACTCGCACATTCCAC -CAGTACCTAAGAAGTTGCCCAAGGGTACCTCAAATTATCAGTCTGCTTGGTACCTTGAAG -ATGTTTCAGACTCCGGATCTGACATGGATGATGAGGATGAGCCCATGCAGATGGACACTG -CTGGAGCACCCGAAGATGGCGTGTTCCCAGACCACCATGAGGCCATGACCGAAGGTGCTC -CTTCAGAATACCCGCAATCTGAGATGTTCCTCGACCCCTCCCCAGAGGACGAGGCCCAGG -AACTGGCCGACTACCGTGCCAGCCGTAAGACCGAGGCCGAAGAGGACTTGGAGTTCCCTG -ATGAAATTGAATTGCATCCCAACACGCTTGCTAGAGAACGTCTGGCCCGTTTCCGTGGAT -TGAAGAACCTCAAGCTCAGCCACTGGGAAACATCCGAGGATCGTCCCTATGAGCCCGAGG -ATTGGCGCCGGTTGCTGCAGTTTGCTGATGTCAGAGGCTCCAAGAACCGCATCATTCGGG -AGGCTTTGATTGGCGGTGTTAACCCTGGTGTCCGCGTCGATATTCACCTTCGCGCAGTTC -CTTCTATTTTGCGCAACAGCCCCAAGCCCCGTTCGCTATTCTCTCTGCTCCGCCACGAGC -ACAAGCAGACTGTGGTTAACGTCAGCATGACCCTGAACTCCACGGTGGAGAAGCCTCTCA -AGGCTAAGGAGCAGCTCATAGTCCAGTGCGGTGCCCGCCGCCTGGTCGTCAACCCCATCT -TCTCCTCAGCTGATAACACGCCGAACAACGTTCACAAATACGACCGATTCCTGCACCCGG -GTCGTAGTGCCATTGCATCCTGGATTGGACCCATGACCTGGGGTTCAGTTCCCATCCTCG -TTTTCAAACAGAAGCAAGCTGAGGAGGAGGATGGTGACGAGGCGATGGATACCACCGATG -CCAAGGAGGAGCCTATTGCCCTGGACCAGCTCGAGTTGATCGGTACCGGTACTGTTGTTG -CACCCGATCAGAAGCGTGTTGTTGCCAAACGTGCTATTCTCACCGGTCACCCCTACAAGA -TTCACAAGAGGGTCGTTACCATCCGGTACATGTTCTTCAACGCCGAAGATATCCAGTGGT -TCAAGGCACTCCAATTGTGGACCCGCCGTGGCCGCAGTGGATACTTCAAGGAGAGCCTGG -GTACACATGGATACTTCAAGGCTACCTTCGACGCCAAGATCAACCCACAAGACTCCGTGG -GTGTCAGTTTGTACAAGCGTGTCTTCCCTCGCAAGGCGAAGACTTTGGATGCGATTGCTG -CGTAAGCTGTTGATGTCAATAGCACCGGCAAGGTGGGATGATATGATACCATTTGACTGA -ACTGATTACATGATGAGGGCAGCTCTGCTCATTCTGAAGGGAGCATGATTTACCCAAAAA -GTACATATAGACCTGTGGCATCTTAAAATGATGAATTACGAATCTTCTGCACTTGCGTTC -CGTATTTAACAGTAGTATCTGAATGTCCAAAATCAAGGATTCGAGGACTTCATATTTCCA -GGCGAAGGGTGATGCTTATCACCATCAAGCTCAGACAAAATAAATCTCTACAGCCAATGG -ATTGTAGCATCGATCTGTTTTCATTTCCAGACCAGAAAAAGTTCAAGATAAACAAACAAA -ACGAGAAGCCAAAAAAAAACTTAAAGAGTTCGTATTCTCTAGACATCACGCGCGGGGAAT -CAACGAAGAAATTAAATCGCCATTCAAGAGAAGTCCTCTTCGGCTTCCTCGATGTTATTC -TTGATCCGCAGGAAGAGTGTTGTCTTCCACTTGTCCAATTTGCTGAGCTGGTCATACTGT -CGGAGCTTATCACCGAAGCCCTCCGGATCACCGCCCTCAACAGCCTGAGTTAGGTCCACG -AGAAGCTGGTGCTCCCTTGTAGAAGCAAAAGAAGGATCAAGCTCGCGGTATGATTCCAAA -GCACGGTTAGTCTCAACAAGGTCCTGGAGATATGTAATTAATACTCTGTTTCCCGTTAGA -ATAGTGAGAGAGAAACAACATACTTTTGTGGCAAGGTGGCAGATGCCTGCCTTAAGGAGA -TAATCCTTCACGGACCACTTCATCAGGTTGTTGTTAATAGAGCTGCGGCCGATGCGTTCG -TAGTTGGTGATGGCCTTGTAGTAGTCACTTTCCAAGGCGGCCAGGTCTGCCACCTTGAGG -TAGTGTTTGTTTGCAAGGCTGCATGCGAAGTATATATTAGCTGAGTTGTTTTTTCGAATT -CACAGACACCAAAGAGATCATGGGATACCTACGCTTCAGCATTATCACCGTCAAACCAAT -CAGCAGCCTTCTCGTACGCTTCCAGCGCCTTCTTCATATCTCCAAGCTCAACCTCATACA -GTTCAGCTAGGTATTGCTGCTGCGTTGCCGCGCGGCGCAAGTTTCCTCGTAGTACATAAT -GCTGGATGGCACTGGAAAGCACGCGGGCAGCATCCTCGGGGTCTGACTTGCGGTAGACCT -TGAACGCTTCCTGGAGGTTATTGGCGGCATCGTCAGGTTCGTTGAGGTTCGTGGTTTGGA -TGGAGGCAGCTTTCTCAAAAGCCTGGCCGGCCTCCTTGTCTGTTTGGAAGAACAGATCAA -TCATCGGGCTCAATACTGGCTAGGCCGCGGGGTAGCTCACTCATTTTCTGTACTCGGAAT -GCATTTGCAGCCTGAGTATACAGATCTGCGGCGCTCTCATACTTCTCTGATCGTCCTCCA -AACCAAGAGAAGCCGCCACTAGCACCGGACAGCGCTTTGTCGGCCTATTTCATCGAAACA -AGCGAGGTACGGTTAGATATGCGGACTATCGTCATTGCCTATGGAAACAAGGCGGGGAAT -TATGCGTACTTTTTGGAGCAGGACACGGGGGTCTTGAGTCATTTTGAGTGAATGTGTCTG -TTTTCACTGCGATAGGCCGTAGTCTGTAGATATTTGATGTGAAGTTGGAGTGTTGTGGCC -AAATGGAGTTGCTATCGATGAGAGCATCGATCGGCGGGCCCGTGGAGGCGGTGTCGTTTA -TTTTTCGGGTCTCCGCCTAAGCTCCCCAGTGTTATCTTGCGACTTCGGCGACTTTGGCTA -TTTTTACACTGTGCTGTCCAGATTAGACTGTGCTTCTATTTCACTAGACCTCATTTCTCT -CTAATTACCCTTAGAAAAGGTTTTTATTCCTACGGACTCTACAATGACCTCTATTCGCAC -TGCTGCGTGGCTATCCAAGTCGTTGCGCCCAGGTTCTTCCCGACTCAACACGACCTTCTC -TCGCTTCTACTCTTCCAAAAAATCCGACAGTGCACATATCGAGACCATACTTGGCGCGCC -AAACTGGTCTGTCAGGTCGCTGCTCCCGAACCCAGCCTCAAAACCGCCACCATCCATCAC -ACCGAAACAACTTCATCATCTTCTGCGCATCTCGGCTCTTCCCCAGCCTGCCAATGAAGA -AGAAGAGCAGACTATGCTTGATACACTCGAATCTCAAATCCATTTTGTGAAGGAGATCCA -ACTTGTTGATACTACCGGCGTCGCGCCGCTGGCTAGGATTCGTGACGAAAGCCCTGCTGC -TATGGAAGAGGAAACAATTGGGATCGAGAGACTCAGAGAGGCTCTGGCTAAGGAAAAGGT -GTCCGGGCGACGGGGAAAAATCCAACGTATCCCAGGTGAAAAGAACGATCGTCCGGATGG -AACAGCCTGGGATGGGAATGCGCTCGGATCCGCGACAAAGACAAAGGGGAAATATTTTGT -CGTTGAAATCGGCAATTGAGTTGTTTCCTTGATTTTCTACGGCTTTGGAATACGCAGGAG -ACTTAGATATCCTGCATGAAACGAGGCTGAATGATATTAATATAGTCAGATGTTTCTGCT -TCGGGCTATTGGTGGCCCCGATACCCGTGACTATTTGGGGCCATGACAGCATCGCCCCCC -CTCTCCGACAACATCGACTTCGGCCTAATTGCTCCTTCAAAACTTTTACAATCCATTTTT -CACTTTCATCCAGCAATGTTCAAAAATCTCAAAGAAAAACATGGAGGATTTTTTGAATCC -AAGCAATCCAATGCCTCGGACAGCGATCGCGGTCAAGATCTATCATCGATCCTAGACCGG -TCGCAGCGTGGTGACTTAACCGTCCTCGTCGCTGAGGTAGCTGAGTCAATGCGGGTTCGG -ATTTTGGAACTTTATGAACATACAAATCCAAAGGATGACTCGCCCAATCGTTCCAGCCCA -CGTTCATCTCCGGAGGCGGGCCGCGATGAAGACACACCACGCTTGCCCCCAAAACCACAA -CGCAAGCCGCTTCCGGGAAACAATAGAAGTCCGCGACAGGGCTCTACTAACAAGCCGTTG -GCCCCTCAAGCCAAAGTTACTGCGCTCTCTGCTTTCGAGGATTGGAGGGACTCTGTTCTT -TTGCGTGTTGGCACAGTCGTCAACAAAGATGATAATAAGCAGGGGGAAAAGGAAGGCCAA -GAAGAGCTGCAGAACGTTTCCAAGTCTAATGATCTCCCATTGGATGAGGATCAAGATAGA -ATGGCCAAATTACAAGAAATATACCACCCCGTGGAAACACCCTTGGCTAAACTTCCTAAA -GCAACAAGGCTGCTTATCCTTCACTCGCTTCTCCTTTTAATGCTCAGTCTAGAGCACTAC -AGTGCTTACTCAAGAGTCTTGATGCTAAATGTCACGTCCAGTCTGAACATAGACATCAAC -CTGCTCAATCAAGACGAAGTCAGTGTCGCCCGCGGGCTGCTGCAAACAGCTTTGGCGCTG -TCCGCAGACCAAGATACCAAAGAGCAACCAAAGAAAGATGATAGCATGCGGAAATGGAAA -GTTGGCCTTGCATCTGTGGCTGGAGCCGCTTTGATTGGCCTCACTGGCGGGCTCGCAGCT -CCATTGGTAGCGGCGGGTCTAGGAACTGTCATGGGTGGATTAGGCCTTGGCGCTACGGCT -GCTGCTGGGCTCTTGGGTACTTTGGCTGGGAGTAGCGTTGTTGTCGGTGGTCTGTTTGGG -GCTTACGGAGGGCGTATGACTGGTCGCATGATGGAAAAGTACGCCCGTGAAGTGGACGAT -TTCGCATTTATTCCGATCCGAGGAGAGCGCCGCCGCAATGTGAAAGAGAAGGAGTCTGCT -CAGCAAGATCATCGTTTACGAGTTACCATTGGTGTCACCGGCTGGGTCAAGGAGGAGTCA -AACTTCGTGATCCCGTGGCGGGTAATTGGCACCGACTCTGAGGTCTTCGGTCTCCGCTGG -GAGATGGAGCCTCTGATGAACTTGGGTAACGCGATCTCAGCTCTTGTGACAAGCGCAGCA -TGGTCGGTTGCTGGGCGTGAGGTCTTGGCTCGTACTGTCTTCTCTACAATTATGAGCGCG -GTGATGCTTCCCCTAGGTCTCATGAAAGTTGCCGGTGTTGTCGACAACCCGTTCAGCGTG -GCCAAAGCTCGTGCTGACAAAGCGGGTGAGGTTCTCGCCGATGCCCTGATCAATAAGGCT -CAGGGAGAACGGCCGGTGACCTTGATGGGGTACTCTCTGGGTTCTCGGCTGATTTTCTCC -TGCCTCCAAAGCCTGGAAAGAAGAAACGCTTACGGGCTGGTCGAGACGGTCATTTTGATG -GGGTCACCCACCCCTTCAGACAAGGAAGACTGGCAAAAGATTCGCAGCGTCGTGAGCGGA -CGTGTCGTGAACGTATTCTCTGAAAATGACTCCGTACTCGCTTTCCTGTACCGAACAAGC -AGTCTTCAACTTGGCGTCGCCGGATTGCAGCCAGTTGAAGGTGTTCCAGGCGTTGAAAAC -GTGAATGTAAGCGAAATGATTAGCGGTCATCTGCGGTACCAGTTTCTGCTCGGCAGAATC -CTTACTCTCATTGGGCTTCAAGACCTTGATCCCAGTGAGATTGAACGCGAAGAGGCTGCT -CTCGCATGCGAGGACGAGCGGCTGGAGAAGGAGCGGTTGGAGAATGAGCGTGAGGCTGGT -GTTAAAGACCGGGAATCATCTTCTGCCCGTGAGACCCTGAACAGCGAGCATGGCTTTGGG -GAAGATGAACGGTTGCAGAAGAAGGTCGAGGCACAGACTCAGGAGAACATGACGACTCAC -CGGATTGAAATGCTCGATATGGACGACGATGATTATTCGACGCCTTTGAAGGAAGATCCG -TCAAAGCATCCTGCCCTCTAGATGGGGATTTGGGATTCATAGTACAGTCATTTCGACACA -CCTGTCAAATCTTCCTCGTCAAAAATACACGAATTTTCGTTATTTCTCAACTATCGCAGT -AAGATGGGCTGGATATTTGGCATTCTGTCTCAATTATAGACATAATAACTATAAGCATAA -CTAATTCGTAGTGAAAATTTCACTTGTAAGTATTCTTAGCTAATTTCTTCGCGTAACAAA -CCAACTTCCAATATTCCAGACATCACACGCATGTTGATGCGCTGAATATGCCCTTGAATA -CCCTTACAAGCTCAAACAGTCCGTATTTAAACATTTGGACCTCCCGGCGCCATTTTTGAT -CGTCCCTTGTTGCCCTAACCAATCCGAATAGACATGAAATGTTCGAAACTGGAATTTATC -GGACCAGAACCTCGAAAGCCAGACTTGAACATTGCTCAGCGGCGGAATGCCAGCCGGAAA -TACATCCGACCACATTCCTGAGTCCCAACTCATATAAATAACTTGGTGGTAAACCCCAAG -TCCATCGCATGAACTTGATCTGAACGCATGAATAGCATGGTGAAGCAGAATGAGAAGAAT -GTATCTCCGATTCGATTCGCGGAGCCAGGGATCCGTGTTCGCGACAACAAAGGGACTGGT -ATACAACTGGTCTCGGAGTTGATTCTCATATGGTGATTCCCAGGTATCTCCATCTCGGAA -CCAGCTTTTATATCTAGCGAGACGAAGGAAATCGGCCGTTCTGCCCGGTGTCTCGTCCAG -GCTAGTTTCTAAAAATAAGTTGGTCTCAACGAGTATTTCTTCAATCTGAATGGCGGCTGT -GTCTTGGGTCATCGTGTGAAGCTCCTCTAATCTGAGCTCGATGCCGGTTCTGTTAAATAG -ACCCATGAGGCGTCGGTGAAGGTTGTCGAACTGTTGGTCTGACATCGCGGGCATATTTTT -GGCGGGATCCTCGTTCCATAGTTCCAGACGAAGAAAGGGTAACATCACGATGTCTTGATC -AGGACGGTATCTATATAGGGGTTTGATTGACTCCTGTAGACCTCCCATCTCATAAAACAA -ACCTCTGTCGGCTCGCCAGTGAGCTGGTGGCTCTCTGGAATGCCATGACCCCTTTGGGAC -CAGTTCTATATCTGGTCGAGTAATGGGGATGACAAGATCAACTAGTGCGCTGACTTTCTT -CCAAATTAATACTAGTTGGCCAGGGTCTGTTGGATCTGGGGCGTGGATATGCACTTCCAT -GGTCGTTTTTGAATGCGGGAAGTTGTGGAAATGGCTTTCGGTATCCGTTCTGTTCCTGAG -AACCCACTGGATGTCCACGGTTCTGGACTTTAATCTTATGACCATCCACTTTCTCTTACA -GTATTCTGGATTGAGCAAGACGACCTGGACGGAGTTGTTGAATATATGGGACGAAATCTC -ATTGTAGAAGTATCGACTAGTGCAAAGAATGGATAGGGGATTGGCTTTGGGCAAGACTGA -TTTTCTGTGGATGGACGAGATAAGAAATTCCCATATAATCAGGCGAATCTCTGTAGGAAG -CATGGAAAATCTTCCTAGAATTTGGAGTGTAGATTTCATTATGGGAAAATGAAGGCGTTA -AAGTGTTGATGAATGCAAAGTATCAGCGATGAAGAGGGGGAAAGGAACAGAATATCAGTG -CCGAAAACTATTGCACATTACTCACGAATCCAGGTACATCGTCAAGTTTACGTCGAAACT -TGCTACTATAACTTTCTAATATGTGAGGATTTCCCGATTTGGCGCGGAGAGGTAGAGTCA -GCTCAAACCCTAAAAAGACCACCTCAACTAGGGAACATTTGAATTTGGCCTCAACTCATG -GCTTAGCAACATGCACAATAATCTTCCCCCTGGCCCGGCCAGAGCGAAGCTTTTCAAATG -CCTGAACCGTATCCTCAAACTCGTAGACAGTATCCAGCTGAATTTTGACAGCCCCCTTCT -GCACCCATTCCCCGATTTTCACAAGATCCTCACGGGTATTCTTGAAGAAGAAGATAACAT -ATTTCCTCTTCCCACCACCCAGGAAAGCAGGCCAAATCAGCCTATCAACAAAAGTCCCCA -TGGCTGACGCCCCAACCTGCACAAACGCCTTCCCAGCCGCCAAGAAGTGATGGGACTCAC -GATACATCGGCGCAGGATATCCAATATGATCAACAATATGATCAAAGACAACCCCACGAT -CGCGCAGTGTAGCAACCAAATCCGTCTCAACCGCGTAGTCAATAACCTCATCAGCGCCGA -GCCCACGACAAAACTCCACATTCTTCGTCGAACAAGTAGTCGTGACATGACAACCCAACT -CCTTAGCAATTTGGATCGCAAAAATTCCACATCCCCCAGATCCCCCGTTAATAAAAACCT -TATCGCCAGCATTAACGTACCCAGCCAATGACTGGTACGCCGTCTGTCCCGCAATACCGA -TGGCGGCCGCATGATCAACCTCCACGCCCTCCGGCACGGGCGCAAGCAAACTTGCAGATG -CCACTAGGTACTCGCCCAAAGAGCCGAATTGCCCCGGCTTGCTGAGACATCCGTACACGA -GCTGGCCGGGTGAAAACTCGGTCGCGGCATCGCCGGTGACGACCACGCGTCCGCAGAAGT -CCATTCCCGGTGAAGCGGGCGTGCCAATCACAAATTTGCGTGCTACTAAGCCCATTTCGG -GGACCTTGAAGTCGGCTGGATTCAGGGAAGCTGAGAGGACTTGGACTAGGACATTGGTGC -CGTGCACGGCTGGCGGCGCGCGTGCCGACGCGTCGAAATTGAGGTTCTTTTCTAACCCGT -TGGTTGTGCTGGAGTAGAGCCAGGCTTTCATCTCTATGGGCTGGGCAGATGCCATTTTGT -GTGGTATGTTGTTTTGATGTGGTTATGTTGAGGAATGGGTGTTGCCACTTTTTTAGAGCG -CGGTTGATGACCTCACCCGCACTTATCCCTTCGCCTTCCATGATCACTTTTGAAATCTCC -CGAGTAATGTTCGACTAATTGAATGTATTATTTACGCAAGCTAACTAACAATCTTTTCAA -GTTCTCTTCAAGAAAGTCAAAGCCCCAAGTGCGAGAGCATCCACACCCGTACACAATGTG -GGCTGGATGGCTGGTGCATAGAACGGCGAATGGTTACTAGGCAGTTCATCAACAGTCCCC -TTAGCAACAGCATCATCCCAAGTCTCGGGATCAGTTCCACCAAACATCCACATAACATAA -GGCGCACCCACAGCAGTAGCCAGCAAAGTAAAGTCCTCACTAGCAGTCGGAGGCGCCGCA -TCAACCAGGCGATCACCAAAGTACGGCTGGAAAGTCTCACGAAGAGCATCAACAGTCTTC -CCATCATTAATCGTCGACGGTGTCGAAGAAGTAACCTTAATCTCAGCCTTCTTCTCGACA -CCGGCAGCCATACACTCCCCTTCAATAATACGATGGATAGAAGCACACACACGCTTTCGC -ACTTCCGGATCATAAGTACGCACATTCAGCTGCAGATCGAGATATGCGGGGATGATATTC -GACGAGTGTCCAGAATGAATACTACCACAGGTCACGACAGCCAACTTGCCCGGCGTGACC -TCTCGCGAGACGATAGTCTGTAATCGTGTAACGATCGCCGCCCCAAGGACGATCGGATCA -ATAGTCGTATGCGGCGCAGAACCATGACCCCCACGCCCAAACACGCGCACGTCGAAGGCA -TCAGCCGCCGTGAGTAGCGGGCCAGCGCGGAGATTGACCGTCCCCGCGCGCATGCGCATT -ACATGCTGCGCGAGCACAAGGTCCGGCTTGGGGATCTTGTCATATAGCCCGTCATCGAGC -ATTGCCTGTGCGCCGTCGAGGTGTTCCTCAGCGGGCTGGAAGATGCAGATTAATGTTCCG -GACCATTTATCGCGCGCGGATTGCAGGAGCTTAGCGGCGGCCATTAGGGTTGCGACGTGT -GAGTCGTGTCCGCAGGCATGCATTACGGGCCTGTGAGCTCCCGTTTCGTCTTTGACGTGG -CGGGTGCTTGCGTATGGCAGGCCTGTCTGTTCTTCTACGGGCAAGGCGTCCATGTCCGCC -CGTAAAAGAATTGTTGGGCCTGATCCATTGCGAAGAACTCCTGCTACGCCATGGCCGCCG -ATTTTGGTGTGGACTTCGAAGCCAATGGCTTTGAGGTGGTCGGACGCTAATTTGGCTGTT -TGTTCTTCTTGGCCTGATAATTCGGGATTGGTGTGTAGGTGGCGATAGACATCTTCAAAC -TGCTCTAGATCTGGTCGGTGGCAATTGATGATCTCAGCCAGGTCATTGTTTTTGTCTATA -TAGGTGAGTATACTGGCTCAATTGATTGAACTTGGGTAGTCATACCTCGCAATTGCATAT -TGGAAGTAGTATCAAAGTACTTTCACAAAGGCAAATGTCAAGTCATCCCTTCCTTGGCCC -GGGGTTTATATACCCGAACAATTTTCCGAGCAAATCGGAGGACAATAACATTGGTCATTG -TCTTGCCTCTTCCGGTCGACAAACCCACCTCGTCGGCAAATCCTCGAATCATTCCCAGAG -ATTCCCCGCAGGTGTGATTCTGCTCGCCAATTGCCGGCCAACTCCGGTTTCTTGAGTTGA -ACCCTCCACTCTCTGGGAATACGCCTTGACTTTGTATTAGTCAATCTGCAAAATTGCCTG -GACTGGAACGTTATGGGTTATTTTATATGATGCTCTTAGGCTGAAAAGAATATAATTAGG -GATAAGTAGCCACATTTTCCCACCATCTTCAACAGCCCGGCCGGGCTAGCTCAATCGGTA -GAGCGTAAGACTCTTAATCTTAAGGCTGCGGGTTCGACCCCCGCGTTCGGCTATTCCTAA -AACCAAACCTGAGATTTCTTTTTTGTTTTTATTTATTGTTTGCCCAGTATGGGATTCATA -TGGAATTAGGCGACACAGGTCGGGTGCATGATGGTCACATCACGGTTGGGCCATGTATAA -AACTGAATCTGCAAATAATTCCTCATAATACATTTAACAATCATTTATTATAATAAAGAC -AATCAAGCAGACAAACAGTAAAACAGTAAAACAACTTTAGGGGGAAAACATCGATATATA -TATTTGCGCTAAGTTGGTGTACTGGACGGAGACACATCCATCAACCTACCACTAAACACT -GGCTATAAACAGCTGAAACTAACCGTAGTATTCGAAGAAGCTGGTAGTAACTACGAATCG -CACCGCGGTATAGCTCTGGAGATGGTGGATATGATCAATAACTCGAATTGTTCCATGTCA -TCGATGTGAGGTGCGTCGTATCGATCGCAGATGCATTTGCAGGGTGACATCATGGAATTT -CCCACACAGTCTTTCCTCTAGCATGAAATGGGGGAACACATTTTCGAAGGGAGTTCAATT -GAGCTCTAAATCGGACATCATGTGAAATACGCTACACCTTTCATCTCTATCTCCCGATCT -CACATTCATTCTAAATATCCCATTAAATATCAACTAGGTTTTACAAGGACCTGGCCTATA -AACTAGTATATTTTTACATTTAATATATCCAGCTGGAGCTAGAATCGGAGGTATGATATT -TTAGCCGAGTGCCTGTTAAGTTCTCTCATGTAATCAAATCAGGTGCAGCAGCCTACCCTA -ACAAGCAGGGCCGGACCATACATGATACGACACATGATACATACACCAATTAGTTGTGGT -TCTTAGTGGATGCTAGGGCCAAATATAGGCCGCTGTACAGACAAAAACACACTTTCTATT -TGTCTTTATGATCTTTTGGCCTAGCGATTAGAGACTCATACATAATAAACTAGGAGTATC -TATGTGACGGCCAAAGCCCCGGAGCGGAGTTCAAGATTACAAGGTCTCCCAGACTGCCTG -GCCTGAGAATGGTAGACTCGTCACCGTCGAGTTTAGATAAGAATTGATTGGTAAAGTATT -GAGATGCAAAAAGAGACCGCCCCAACATTCGTTTGAAAGGCGTGGGAAACCCCAGCGGGG -AAAAAACCACACCTGGAGCTGGACAAAGTATCGAGAACAAAGTGGGCGAGTGGGCCTCCT -CGAGATGCCAAGGTTTGCACCGGAAATGAGAGCCAGGTAAACAAATAATGAACAGCACTG -TGGTATCAGGGCTCCGAGATCAGGGTCATAGATGACCCACTGCTGCCTGATAAGCCACCA -TACACGAGAACACAAACATCACAGAACAGCTCCGAGGGGACAAACAGGACCTCTAAGAAG -ATGCGGGAGATTCATAAAACGCCCATAGCTATCAGACCTTTGCAGCTTACGAAGTGAATG -GTACCTAACCCACTCAGCGACGCCCAAATTAAGGGCGAATGGGGAAGATGCAGCGAGCAT -TTGTTGTACCTCCATCAACACCACCATTATCACCACATCTACCATCCTTGCTGAGGCTAT -CTTCACCGATACTGGCGTACCTCACCCCAATGCGTGAGCGGTTCTTAGAGAAAGCGCAGT -TGGTGATCGCGCTCGGGATCAGCCTGGTCGAGAACACGTATGGAAGCCATGAGTTAAACG -CCTGCCAGGTAGCGGAGCAAACAAAATTATGCCGTTGGGGTGATTGGCCCTATAAAAATT -AGTTATGGTGGGTCATCAACTCTGAATTCCCACTGCTCAAAACGTACCGCACCGTAGAGG -TAGACACCATTTGAGCCGCTATAGTGGAAAATGACAGAAAAGAATGCGCCATCAAAGCAG -CCGATTGGATCACAGCCAACAGAGCGTTTGTAAAGTGATCCGTACGTCTCAACATCTGTG -ATATTGTAAGGGTCTTGATGCACCTCTGGAGATCGAGTGGCGAGTTCCTTGATCTGGTGC -AGACTAAACGGTCCATCTTCGTTTGACAGGGAACCAGACACGCTAGAGTCTGCATTGCCT -TGGTTCACATGAGCAAATCCGGTAGCAGGCTCTTCAGTGCTGCCAGCCTCCAATGGCATG -GCATGGGCAATGCCACTGAGCAAAGCACACACAGGTGCAATGATGGAAAGACGCATTTTA -TGAATGGCGCTGAGGACTGAAAGACAATATCTGCATTGAAGCAATTCGCCGGGGGAATAG -GGCCTTTTATAGACAGGACTGGCAACAGTCACTGCTGAATCTCGCCAATGCTGCTCTTTC -CAACCATGGAGCAATAAAAAAAAAGTCAATTCGGAGTCTCTACACCAGACAGACTGCATA -TACGGAGTACGCCGTGCCCAATCAACCTGAAGCCGCAAATGATGCGACATGCCCGTTCAG -GCAAAAAGCGAATGAATTTCGTCAAAGTAAGCAAGTCGAGTGCCCTCATACAATCCTGAT -CCAACCAACAGCACACACAGCTGATCCTTCTCGGCTGTATCATGGATCCGAGACCGAGCC -TGCACCTCGTTCTTTGCTAACAAAGCAGCTCTCTCATGCGCGAATTGCGGACACTGTTAC -ATCGGACTCCGCTGTAGTCTAGACTACTCCCGTCACATCCGGCCCCCATCATCAATCAGC -TAATTGCTGATTTATTAGTGAAAGGATGCGTGACAAGATTCGCTCTTGCACCTTCAGTCA -GTATATGTTGGAGGTCAATGCGTGGAAACTCTGATGATCCGGGGACGAAGGCCAATTAAG -ACTTGGTTCAAGGGAAAGACATTCATACGGGACAATATCCGCGCTCAATGGGAGCGAGAG -TGGAGATTGTCGACAAAATCCAGTTACTTCTAAGAATTAACAACAGTTTGCCGGCTGTAC -GGTTCTTCCTCACTATGAGTGCACACAACACATCTGCCATTCTGATCAAGCCATCATTTA -CAACTATTTTTAAAGCAGAATTTCCCATTCATGCGCCTACAGCCCCCAAATATGCATAGG -ATGACATTTCTGAGTACCGACCTTTGATACACTAGCGAGGTCGTATATGCATGATTAAAA -TATGCTAATACCCCCTAGGCTCCGAAAAGCCTAGACAAATCGGGCGCTATTGCGGGCTAG -TAAGAACAAGCCAGTCAGCAAGACGTGGGCATATGACTTTATGGACCGTCTTTCAGGCCT -AAGATTGCCTTATGAGGCAGAGAACGAAGGGATTGAAGTATATTAATACAATAGATGCAG -ATCTATTAAGCCAATGGCTTGGCTATCTTGCGATATCATTGAAATATGTTCCTTGACCAG -TATAGGTGTCGATAATCATGGCTTTCGCCTTGATAGGGGTAGATCAGGGACCGTGATTGG -TTCAAAAGAAGCTTTAGCCCCACTAAAGCTTAAGAGTCACCAATCACAGAGGAGAGTCAG -CGGATCGCGGGCCGATCCGGCGTACCGGAGCACGGGATACGAACAACATCCGAGGCCTAT -CCAGATTGCCCGACTCCCACTAACTTTTAGTAGCCGCACTAATATTTTTTCTATCCAGTG -AGGTAACTTTGCACATCGCTATTACTAGCGCCAAAGGATTTCAAAGTGCAAGGGAGGCGA -TGTTACCCTGTAACATTTGGGCAAATAGGAATCCAAGTAAATTACACGCTAGTAATCTAT -TAGCAACATTCTGGCTTGAGCCCCATATTTGTTTGGAACCCCAAGGCAATCCCGCCTCTG -AGATACGAGATGAAGCAGAATCAAGCCGTGCTATAATTGACCTGTCTTAATCGGTATGAT -GTGGAAATAAGATCCTGTCCGAGTTTATTACTTCCAAAGCCGCAGGGCCCCACATGGGTG -TCCTATGTTGCACACCGTTGGCTGATAATGCTGGACGTGCTCGTTAGGCAGCACAGCGGG -TTTTAAAAGTTGGTAATACATAGTTGATACGTGCATGGGGGCCAATTCCTCTATGTCTCG -GAAGAGGTGGAACACTTTAAAGTTAACTTGTAGAGCCCCACAATTACAAAGAAAAGACAG -ATGAAAATGGCGTACTCCGTACTGCGTACAAATAAAAATGATAGTAGGTAGCTGTAAGTA -ATTGCCATCTACCCAACTAGCGCGACCGTGATATTTTCGGCTGCGCTGGCGAAACGCCGT -GGAAAAAATTGGATTGAACGATTCATACCAGGCATTCTCGTCTATATGCCCTACAAACCT -AGGGATGCTCACTCTTCGGTCTTCATGGCCACTTCGGAGGCGTGAACTCTACATATCAAT -TCTTCTGATTATTTTGTTCGTGTTGGGTCTCGGCCACTTTGTCCACCGACAACGATACGC -ATCTTCCCAGAATGTGTCCTCCTCGACAAAATCGCCATACTATTCTAAAGGCTGGAACCA -TTATTATTCATGGAGGAATACGCCTTTTTTCCCGCCGCTCATCGATGCTGGAAACAGAAA -TCTGACGGCGTCCGAGTTCTGTGCAAGCTTTCCCACGCACCTATTAGATCGCATCCAAGT -CATCCTCAAAACAGGTGCTGGTGAGCCTGAGAAGAATAAAGCGCATCTTGCCTCTGTTAC -TTCCTGCATCACCAACCTCATTGTATTTTCCGATCACGATGAAAAAATTGGTCAACATCA -TTTCATTGATGTACTCGCCGACCTCCCGCCAGAATATACCATTGACAATCCGGATTTCGC -GACCTACAATACTCAGAAGCTGGCCTATTCACAAGGAGGATCGATTGGCTATTCGCAGGA -AGGTTGGAGGCTCGATCGCTTTAAATTTTTACCTATGGTTGAGAAGGCATATGATATGCG -CCCGCACGCCGATTGGTATGTCTTCATCGAAGCCGACGTCTATTACTTTTGGGACACGCT -GTTCCGCATGCTCGATCAGCTCGACCCATCCAACATGCACTATATGGGGTCACCGGTGCC -GGGACCCGCGGGTAAATATTTCGCGTATGGTGGCGCAGGCTTTGTGCTGTCAACGGGCCT -AATGAAACGCTTGGTTGGAGACTCTAGCACTGACTCAAGCCTCAGCCTTCAATACATAGA -CTGGGCCAAAAATGACTGCTGCGGTGATGCTGTGCTTGGTTATGCAATCCTGGATAAGAC -TGGAGTGAAGCTAGAAGGTCTTTATCCTACCTTTGCTGGTGACGACCTTGGGTCCCTCAA -AGTGAACAAGGAGAGGTGGTGTACTCCGCTGCTTGCACTTCATAGAATGTCGCCAGAGCA -GATGACGGCACTGTGGAAATGGGAGCGGACGAGGCCATATAATGAGGTATCTGTCGTTAG -TTTTGCTGAGGATATGTCCCCTACTAACGCCGTTAGCAACCCTTCCTTCACTCGTCGCTC -CTCGCATATACTCACTCTCATCTCCGCGACGCGCCCACTCGAAAATCCTGGGACAACTAC -TCGGACATTCCTCGACCAAAGGGTTCCCCTGCGCATTCTTCTGCTGCAGCGTGCAGCACC -GCTTGCGCTGCGGACCCATATTGCCTGCAATTTTCCTATTCTTCCAATAAGTGTCGGTTT -GGAAACTCGATACAGATGGGAAGGCTTGTACCTGACAATCCAGATTCTATATCTGGTTGG -GACATTGCGAAAATGTCAAATTTAGGGTTCAGGACGGACACAGACTACAGCGCCTCATGC -AGGCAGGCTTCTTGGCTTAAGCCGATTATTCACTAATGATCATGCAACGAACTATCTGAA -TGAAGAAAATAAAAGACAATTTCATGGACGGTCTGGATTATGACAAACCGCACAATTAGT -GTATCAGGTCCGCACACCATCGATTCAGATTCGTCCCGCGGAACACAAATATGCTCCTAG -GCTTTTGTAGCAGTACCAAACATCTTCAAAACCCGGACCTCCCTTCAAGAACCTCAAAAC -ATGAACATGAAGATCGGGACCCATGATCCATCAAAGAGGGCTCCGTCGGGATACCCGCGA -TCGTCCGCTGATAGATATTCGAATAGTCACGTGAGCTTACAAGGTACGGGAATGTTCCCT -AGTATGCAAATATTTAGAACTTGGACTCGGAACCTAGGAAATACGGCATGAAAGCCGACA -GAGGATTTGTCCTAGGCATCTTGGAAATTTTCCGGATGAAATAATACTCACAGACTTGGG -GGGTGCCCAGTAGGAAACGCTTGTGATTTAGTGCTGTTTGGTTTAATATCCAACACTGCA -CTTTATGTTTGAACAAGAAGCTATTTTTAACAATCAATTAATTCATGAAATATTCTTTCC -AACCACTGCTGTAGACGTACGTTTGGCCCCCGAGTAACATTTGTATTTTATTTTGCTTGC -GCTCACCATATATTACGTCTAGAGCAGAATTATATTACCATACAATGATCTGAGCCCGTG -CGTACCACAAATTTGTAGACTTGGCTATCAGGGTCCAAACCCCTTTTTCGGGAGGGTTGG -TGCCTGGATGCCTTACGTTATCACTCAGTATAACTAGCGTGTGGTAAGTTTATCAATACA -AGATACTTCCAAGGTGAGATTTTTGATCCTATTCAAATTCGAGTACCTCTATATGTGTAA -GCGGTTCCCGGTCTGAAATTGAAGCTTTTGGGACATGTAAACGCCAGCAGAATTAGGGAC -CTCTGATTGAGGGACACAGCATGGATATAATGGATACCGAGGTAGTATTTCAGGTATCGT -AATTCCCTACTCTCGTAATTCCCTACTCTAAAACAAGGCATAAAACCCGTCTTGAAACGG -GGCTTGGACCCTGATCGCTTAGTCTGCCAAGTTTTCGCACCCGTGCTGAGATCTACTGAG -CCTAAGCTTGCGCCTCATAAAAGCGGCTCAAATAGGAAAGCCCCACTCAGGGAAACCCCG -GTTTCCTACAATTCATTCTTCCATCAAGAACAACGCTTGGTGACCTGGATGAACGAACCG -AAAGAATTTCCTCTCTCAGCATGCAGAATGGAGAATATCTTCATCCACACTCTGAACACA -CCCATACCCCCATTATAGTAAATGATCCCTTCGGTGTTCTCATGGCTATTGTCAAAAGGG -AGGAAGAGAAAGAAAAACCATGGTTTTTGAATCTAGGAAAAAGCAGAGTATCTATTCCCT -GGTACCCGGGAAACAACCCACTCAGTCTGCAACCAAACATTGAGAGATGGGATCATATAA -AGGCCATCTGCCGTCCACCCCAGCACAATCTCACCTAATATCCATAGTACGGGTTGACAA -TCGAGTCCTCGGTCTTCTCGGTGCAACATTGGCCATTGATATTGGTGCCAGTCGAATCAT -ACAGACAGTTATCTGCTCCAAAGTAAACGTCGCCATAGGCGCGGGGCCAGGTAGCTTGCA -GCTTGCAGCCAGGAACCACTTTGTAGCCAGTCTTGTTGTCTCCATCAAACTGTGCGTAGA -TGGTCGCGACGCTGTCTCCCGTAGCTTGATCTGCAATATCTGTTGTCTAGTTAGAAAAAG -ACCTTCACACAAGGGAACCTCGAGAATCATGGAAACATACTGGTAGACTTGGTCTTAGAC -TTGCATAGGGTCTCACCGTCCACATTGATGCATCCGTTGTTGACGTGCTTTATAATTTTA -TGTTAGCAAAAGGCAAATCGTCGCGCTATGTGCTATGATTTCGTCAGACACTTACCACAT -TAGTTCCACCAGCGTTGTATCCCTCTGGTGCTTCGCAAGGCCAGTATTTTCCCTTTGCTC -CGCCCCCACTGACAACGTCAGCCGCTGCAGCAATATCATCGCAGATTGGCAGGTTGAAGA -ATCCCGGAGTACGGATTCCGTTCTCGAAAACCAAATCACCCTCTGTGCCTGCTCCATCAA -TAATCTTTGACAGAGCGGGCATCTCATAGCCATTTTTGTTACCATTCAACTGATATGCCT -GGTAAGAGCTCTGAACTATATCCTCCAGGATGACACCGCCCCAGGCATCTCCGTTAAGAG -TTTGATGGTCCCCACCAGGGAGGGCTTCGAAGGGCTGCATGCCAGCAGAGGTATCGATGT -TCACGACGTAGAAAGCATCGTTATTGTAGCAGACATACCCGCCGACAAGAGCGCCCTTAT -CCATGTAACCGAGAACGTCTTGGTCAATTGACGTAGAACAGGCTCCAGCTTTTCGCCTGT -CAACAAAAGACAAGTGGTTAGTAGCTTAATTTCACATTCGTGAAAAGATGGGAAGGGTGC -TATTCTTACAGGATTCGGGGGTAAAGGGTGCCAGGTCCTTGCGCCCAAGCTGCCGGGATT -AACTGTCCATAAAGGATCTTCTGCGCTTGAGCGGTCATCCCAGTCAAGTCCACTTTTGAC -GAAAGTAGAAGCATTGAGCCGCTGGACGTCAAAGTCTTCAGAGGAGAAATCGAAGAATCA -TCATTTGCTCCATCAAAGAGCGAGGACATGTATCCGCTCTCCATACCAGTCCAAGCAGAA -AAGTATACACCAAGAGCGGAGGAAATGCCATTCTGGGTGCCCAATGCGTCTTTGGCGCTG -TTTGTAGCTTAGATATTTGTTCAAGAATGAACCTGACAGGGAGGATGTAACTGACGATTT -CATTGAGTCTTTACCCATCGCAGTGGTGAAAGAAATGAGCGCAGCGTTGGTCACGTCTTT -GCCCATGGAGAAATATTTTGACGTGGCCATACTGAGACCCTTGAACGCTAATATTGTGAT -ACCCCATTAGCACAAATCAGGTCATGGCTAGAACAGACACCTACCAATATTCCATGCAAA -GGAGCTGCCGACGCTAGCCACCAAGACAATGGCATCTATAATGAACTTGATGAGCTCAGT -GTCATCTTTGAGTTGAGGCGCAAAGATGCCCGAAAAGGCACCAATATCATTTTGCATTCT -ATTCAGGGCATCTCCGAGAGCGTTGTAAGTTTGTTGATGAAGCTAGTAGAAATTTCCTTT -AGTCAGTCGGCTGCCCATCAAATCCAGGGAAAGGACAATATACCTGGTGAATCGAGGAGA -AAGAATTCATAATCAGATATCTGAATGACCATGTAAGCCCTCAATTCTGTAAAAATTGTC -AAAAGCCTGGGGTTTACTTACCCGGCAGGATACTCTGCCTGGTTACAGGTCAGCACTGTG -GAGCAAGCAGTATTGCCAATATCTGCACAGTTCCAATTGTCAGGACCAGAAAAGTAATTG -CTAATGAACTCAGGAAAGCCCAGGCCAGTAGAAGGACCGGACGTAGCCCAGGCATCCAGG -GCAGATTTCAGGGCATCATCGGCATTGGCTGCATTCCAACGAGCAGTGGGATCCAGTGTC -GCATCAGTGATGTCGGTAGTACACTGAGCATCTTTCCAAGCCCCCGTGGCCTGATCTGCC -AATGCAGCCAGGTTTTTGAGCCTAAAGACAGGAGTTGATGAGGGCGCTGAAGGGGTGGTG -GAAACTCTGGTAGAGTGAGGAACAGCAGCTACGCTGGCCGCGTAAAGGAGTGAAAGGGAC -GTCCCGAGGAAAAAAGAGCGCATTTTGAATTCAACTGTAGGTAGTTACAAGAATCTGGAC -AATAGAATGATCTGAAGGCACTTGATCCTTTTATACATGGTAGCCCCTATATTTCCGTTT -CGACAAAAATAGACCAAACATCCTTGGTGAACGACTCCTAATGCCCGGACGCCCAGGTAG -CCAGGTGACAATTGTCAATTTGAGAAGATCATCCCATAGACCGTTTCTACCAGCGATGTT -CTGTGACTTCCCAAAAAGGAAGTTGGTAGCCAACGGTGAATGATGTCCCCAGGGCTTACT -TATAGTACGAACATATATAGATGGCTCGTCCAATCAGTCAAGCCCAAATTTTCACGTGGC -GTGGGCCTTATTTAGATTTCGCATCCTTCCGGTATTTGGCTCAAAGCGTTGATGTTCCTT -CTTTATATTGTATTTTCTTATTCACATGAGTTTCCAGCATTAGAAAAATATGATATCAGC -CGTACGTGGCTCTTTATGGCCCCCAAAGAGATACCCAAATGTAAATGAAGTACCAAGCAA -ATTTTGCCCCACAATCGAAGCACGATGTTCCTACAGCGATGCCGATTCGGTGATGTTGCC -GAGTGCAGGATTTTGTATGTTTCAACGTTGTGGAAATTGGGGATCAACTACAATGAAATT -GACTTATGCATGGAGTAATGACAGCATAGTCCATGTAATGCGTGCTATATTTGATGTGCA -TTTTCTCAACTGTGTCCCAACCTTGCGTGAATATAATCTTCAATTCCAGAGACCGATGCT -CGGCCTCAGAATGAAAATTTGTCAATTTGGTACTTTTCTACTGCTCCGCCAATATACATG -ATACTCAGTCGTTAATCTCACTCTCTCTCCTTGTCTTGACCAGCACAGGATTTCCCAAAA -CATGTATTGCCCCAGATACAACAGCCCGACCGCCGACTCTCTAAAAAGCACTTCGCTACT -ACCGCAATTATCCCTGTTTCTGCCTTGTCGGGCGTAGCCCCAATCGCCAGTAATGCCGCA -TTGAATAGAAATTACCATCCATAATTCTAGGAGCTTCGCTAGAACTTTTTCTAGCTCAAG -CAATCCATGCGAGTTTTGGATGGATCCGTACCAGTAGACAGCGCCTCTGAATAGTTCATA -GGTATCCGGTGAGAGATTAATCCTTCGTAGTGAGCAACAATACCAGTAGAGAAGGTGACA -AGGCTGCCGTCACTGAGGTGCAAGATTACGCAGCTAATGGGGCTATTAGTTTGAATCTTC -CCTAAGGCTGAGCTCACCATCGAGTTCGCTCCATTTCAATCCAGGTAAAATCTACCATGA -GCAACAAAAATCAGGCATCAAGCCTTAGCCACTATGCTCCTCCAGACCCATTGAGCCATG -TAGCCTTGTAAGTCATCAAGTTCTGGTCACCAGGGGCACCAATTGACGTCAATATACTAA -TTCTGTCCAAGTAGTGGGGATTGTAAGGTAGATCAACTGGAAATTGCAGCTGATTGATAC -ATGACGAATTAAGGCAACCAGCACCTTCTGAAAGTGAAACTATCCTCACAATTGAGTGGC -GTGTTCGAGATGACCTTTTGCTGCTCGCTGGTTTTGCAAGTGGGCAATGGGTATGGGACT -GCTTCCCAACGAATATCAGTCATTAATGCAATGGTCGATGTGTGCCGGGATGTGGAGGAC -GAGAAATTTTAAATATGATAGTTTTCGCCCCCCGTGACCAAAGTGTGTTGCCGGTCAATG -ACTACATTGGAAGAGAGAAATCCTTCTTCGCTAACATGAAAAGGCTAGCATTACAGATAA -ACAATGTCAGTGCCTTTTGCGAAAGATTGATGGGGGTATATTGGTTGCATCTATTTGGAA -GGCTTTGTTGAATATCATTCGAGAAAAACGTGATAATTTCTTTGTGAGCTGCTTTTTTCA -TCTTTAGCTCAGCTAGGTCCGTAGTTGAAAGGATGATATCCACAGCGTACCACAGTGTAG -CTGACTAGCAATTTATCTAAAACACGAGCCAAAAAGCTCATGCCTATGTAATCTCTCTCG -AATCTGGGTGGAAGGACACAGGACATTTATGCTGGCCAGTCATACACTGGCCTCAAAGGC -CGAAGCGCCTGACAATATCCCCAGTATGCCGGATACCACCAGCCAGCATATTCCCGGTCA -AGAATAGCTGTAACCCGGCCGTCTTCAACAAGAATATTTCGCGGGGCAAAATCAGAATGC -GTGAAGACGATCTCATGATTATCCATAATTGCAAATTTGGCATAATTTCCTAGTATATCG -GGTGCTACTTTCGTCATATCTCGGAGGATAAATTGGTTGACTCCTGCTCGGTATCAAATG -GTCCTCCTTCGATTGACGAAAATTTTCTCATAATAGCTTTACCACGACCAACAGCCCCGA -TCTAGCCACCTTTCAGTTCGCGAAGTTGGTTTATGTGGGTGTGCAGCCCATCAGCAATAG -AGAGCTTCTGACTGGAACCCGAAGGTTTCTATGCTTCATCAAATCGCTTGCCAGGTATGT -AGTCCATCACGGGAGTAATGACTTCATCATCTTCATAGCGACCATCATGAACCTTGGGTA -CAGGGATTTTTTGGTTTGCTGCAATCAAAACGAAGTGTTTCCCCTTCGTGAGGGCGGAGA -TCACCCGACTTCACGACGAGGTTGTCGCCGGATCAGATGACAGTGCGGCCATAAACCGAA -TGTAGTACCGTTCCATCAGGTATTCTTACTTACTGGTTCTTCATCTCTTCTTGGTCACTC -TTTGTGTTCATGTTGTAGGGACGATTATTGCTTCTTCAATGGCAAATCAGGGAGATTATG -TGGGAGGTAAAATGTCCAAGTTAAAGGGGAAACAACTAAAACTCCCTCACCGCACAGCCC -TGAATAAGTACCTATTTGGATAGAATATGCAAGATAGACAGCTCAATCAGAATATTGGGA -AAATATCCTATTTAAAGACTAATTAGCTAGGCTAATAAATAATCACGAAGACCATATGCC -CGATGCTATATCCCACAGTCCCTCGAGAAAATTTCTCGGGAACAGACGATAGCAAAGAAT -AAAAAAGAAACTCACGCCTTTGGAGGTTTGTCGGACTCAGTTGTGAAATCGATTTCGGAA -CCTTGACGCCATTATCCATGCTGCGATCCAACTGAAGAACTTGCGGCACTTTCAAAGATC -ATGTAGAGAAGTGAGATGATCTTGGCTTTTTCGAATTCCCAGTAGTAGCTTCGAACTACG -TACTAGATAACTTGTTGGTGATATTCTAGTATGATTCGTGGGTTTTCTTACCTCGCTATT -CTCTCGAATGTGAATAAACTGCCACTCGAAACTATCGGTCGCAATGCCATACGTTTTGAC -GTGTCAGACATTTTTGAAGGTTTCCTTGCGTGATAAATTATCCCTATCTATCTGAGCACT -TGCAATGACGGAAGTTTAGTATAATTCAGCATTACTCACCCACGTAGGCTAAACATTGGA -ACATGCCGCCCTTTAGATACTCGGGTTTTTTTGCCTCAACCATTACGAGGCTGGTCTCGT -AGTCTTCTGGTTTTCCAACCCATAGGGAATAGTCTACCACGCCGGAAAGCAGATAGTTCT -TGTCTTTGACTTGCCATGGACTTTGGATACTCGGTTCAAACTGAAGATGCAGGGATTTGG -CGGTTTCTGTGGAGATGGCAGGTCCGATGGAAAGACGTGGATTAGCATCTGATTCTTTTT -TCATCTAAGCGAGAGTGCTTAGCATAATAGCGTCGATTCGAGATCGCAGAAGGGCTTCAT -TTGGAGGTGAGCCGCCTGTTGCGAGGCCATAATCTGTTAGAACGTGTTCTGCTCGAGTCA -GCATATGTAGGTTCAAAACTATAATGATAAACTGATGGACCGAGATAACTGGGCAACGAG -ACCATTTCGTCGTCACTTGGGGAAAGTTCCCCATCACTGGGTGAAACGCGTTCCAGGGAA -AAAATAATTGGTATTTCCGACGGCTCTAGATCAGTGACTGCCTTGATCGGCAGCTTTTTG -GCTTCGTTTTTCAGCTTGAGACACTGGATCTCAGTATTTTATATGGAAGAGGAGCGAGCA -GAAGGGAGTACAGGTGCGTCAATTTGTGCTTAAAAATCAACGAGGATAAAGATGAGGGGC -GCTGGGGGGTTGGTGATGGTTCGAAGAATAGTCACTGTTGTGAGTAACTTCCGAGCTTCA -ACTACCCAAGAAAGACTTCGGAGGTTGGGAATTCAGTATTGATGATGGCTCTATAACCTT -AGAGCTCGAATATTCTAGATTTAATATCCCCTAGGAAGAAGGACTATTTAAAATTACGAA -CCATTATCTTAACCTCTATGTCAATAATTGGTAGAAGACCGGTTCCAAATAGAGATCGGA -ATTTCATAGTCCTATTACCTCACATATATCGCAGGCCTCGAGTGTCCAACAAAGCCTTCT -AGTCATTTACCTACTTAAAGGAGCATTGAAAACATTAAATACATTGTCGTTACTCTGCTC -ACTTGTCCGGTCACTATTATCAGTGGAGTCAAAGGATGAAGTAGGGAGCTCTTCGATCCG -ACAGCCATTGATCCGATGGACAGTAAATGTACGCAGGGACAAGCTTGGCGTTACCTGTCT -TCCGTTGAGGCTGACTACCCGACTGATCCTGCCCAGTCAGATACCTTGATTGTATACTAA -ACATCATTTATAATCAGGCTATAGTCTACACAGTCTACTATACCCCTTTCCCAACGGCGA -CATTATTAGCAATCCACTACCTGCAGGTTCATTTGTTCCATCAAGGCTGCCATGGTAGAT -GAAAGGGGGAAAGCGTAGGTAAAAAAAAGCCTAGAATAGCCATGTCGATAACCTCGAATA -CTGGCTACTACGACACAAGGCAGACGACCCAGATTTAGGGGTAACAAACCTTAGGACAAG -CCATTAGCAATCAGCCTAGGTGGCAAATTTCACCTCGATGTTTCATTCGCGACCCGGGAT -TGTAGTCCGTTGTATGCTACCATGCTAGGCTGGTATACTCGGAGTAACCCTGTACTTCTC -AACATAGATCTACCGGGGTAGAGGGTGGACAAATTCCGTCTCGGAAAATTTGGATCAGAA -ACTATATGCAAGTAGGTCCAGGATCACGATAGAAAACCTGCATATTCGTCAAGCCACTAC -ATACACCATTCAGCTGAGCGATATATATTAATAAGAAGCCTGGATGTTTTTCTACAGGAT -CCCCAACGTAAAGCCGGTGATGACATATTACACGCATTATGGCATGGTTACTTTCTGGGA -AGGTCAATTTGATGCCTACATACACTATAATCTCCAATGACCCATGTCCAGCTGAAGGAC -AAACCTTGTCGCACCCTTATCTGGACAAGGCATTTTCTGCCTTGCGCTCGGGCCAGGTTA -AGCCACAAATGGCTGCCAGGATAATACTTTGCTAGTCCTACTCGGTGAAGAAAATTTCAT -GCTTTCATGCTTTCATGCTTGTAATATACGTGCAATTGTTTCTGCGGATATCATGCAAAA -CACACGAAGGACATGTATGTAATTTCCCGCCGATGTACCCATCTGGCTCAAGCATCGGCC -CGACCCACTAGTGGATCCTAATGGGCATATCAACCAGTCACATTCAATTATAGACAGGGG -AGGCTCAAGCAGAATTTTCTGCAAAAAGAAAAAAGAAATCTCAGGTTTGGTTTTTAGGAA -TAGCCGAACGCGGGGGTCGAACCCGCAGCCTTAAGATTAAGAGTCTTACGCTCTACCGAT -TGAGCTAGCCCGGCTGGGCTGTTGAAAGACGTTGCAACAATACAACTCACATTGGCACTG -CGGTAGGGTGTGCATAGGGCGTGATTTGGTCGGCGGTTAGTATGGGAACGGGGGATGAAT -GTCGCCAGGGGGATACCTTGACACGAGACATATCAAGATGATCCATGAATCAACAGTCCA -GCGGCGTGTGATGAAACAGAGCACGACAAAAAAAGAACAATCGGGCCTTTTCGAGGTTGT -CATGTGCAATCCTAGGCTCAGGCCCCCTGGAATAAACAGTATGGCTCTCTGGCGCGTGTG -GAGGTAGAAAAAGAAGGGATTGAAGAAACGGTAGCCTGGCGTGTGCGTCAGGCGATGTAC -ATGAAAAGATGCAGAAAAAGACGCAAAGAATCGAAGACAGGGTCATGGCCAAATAGAGAC -CGTTGGCTTGAGCGCCAGCGGCGCAGCGATAACCAAGAAATTGCGAGTAAGCAGTCTCCG -AGAAAAAAGACATCAGTGGCTGGTGTGTCGAATCATGGATCAGAGATAGCGCAATTGAGG -CTGAGGCTGAGCAACTGTCCTATATTGGGGATAGAGGGAAAAAGCACTGGATGAGAGTTC -ATCGTATACCTTGTGGCTTGCGGCTAAGAGGGGCACTTGAGGCTGGTGAGAATCCCTAAA -CGTGGGGTCCAAAATAAACGATCCATGGTCGCACACTATTCTTAGATAGTCTGAAACGCT -CCAATATCTCCAAATATGGAACACGAGAACAATCCTTGAAGTCCCAGCGAGGTGGTTTAG -GGGTTTCAAAGTATCTCGGTAGACGCTCATGGCAATGCGGCGGTTGCGGGAGCCTATGAG -CGCGGGGAAGGGGTGGGAGAGAACCCCTGCGATCAAACACCTCTGTCTGAATGTACTGGC -TCTGATCAAATCTTGAGGGTTCTAGTTCCGCTTGTCAAGTCAAAAGGGTTGCAAATGACG -CGAACCTGAGGCCTGTTCTCAGGGGGACGAGGGGGGGCACGGGACCCTTGTCATGGCGCA -GGAACACGAGAAGAATAAGCCACACGAGATGGTAAAGTGCATGAAATGCGTAATAAAACC -ATCAAGTAAGAGAGTTGATGTCATGAAAATAAGGTGGAAATGATAATGCCTTGGAAATGG -GGTCGGGGAACCGAAAAGGAGATAGTGGGAATAGTGGGAATTGAATAAGTGCGGTGACAG -TTGACCGTGAATTAGGAGAGTTGAGCCAATCAGGGGGACGAGAAGTGCTAATCCGGGCGG -TATTGGAAGCCAAAGACACAGCCTCACTAGCCACAGTCAGTAGATACAGCGAGGTACCAG -AATAGCACTTGCATTTCCCGTATCTTTAGCGTGAGACGCCAAGTAGTGTGGAGGTATTGC -AGATCTCTAAGGCTCCAGCAGACACCCTACACTCCAGGCTACAGGGCACGATCTATACAC -CCTGCATATCTATATAATAGCCCCCCCCCCACCTCTTTTCACTGGAATATCTACTTCAAT -CTACAcacatttgacattccacattctattctacatccaTTGTCACACTTGGGTTTCACA -TCCGATCATCTACACACGGTGATCATCTACTATCTTCGCGTGACCCTCCATAAATCAGCC -ACGCTGTCGCCCTACCGCTCGACCTCGTACTCCGTCGACCCTTCGACTTTAACTACCAAG -GAGAGATCTCCTCCCCGTCCTGACCTGACAAGACGGCTACTTGGACTTCGCCGTACGAAC -ACAATGCCTGCCCCCATTGAGGTGATAGATCCACTCGCTGTCGTGCGTGATCTTCACGCC -GACGAGAAGCTGGTGGTCGACGAATTCGAGCGTCACGCCAACCACTGCCACCCCTGCTCC -CACGCTGTCCAGACATACCGGGATGGCCATTCACTCTGCGAGTCAGGCAACACCCGCGCC -CGCACTCTCCAAAACTACCTCTATGGCCAAAGCGGAAAGCACTACTCTACCGTGGACCTC -GAGAGCGGCAAGTCGACACGAGTGAAGCTGCCCCGGGATGCATTCGCTACCCGTGAGCTC -CTCACAGCTCTCGAGCAAGGTATGCGCATCCGTGAGAAGGCCATCGTCATCCAGAAGCCC -GTCCAGCCCGCTCAGCATTCCCACTCGAGGAGCTACGATCAGACCTACCACGTCCCAGCT -CGCCAGGTTAGCGGCAAGCAGGTTCGCCCTCGCTCCATGTCCCCCGAGGCTTACCAGCTT -ATCGAGAGATCCCCGCGCCTCTCTCGCTCTCCCACTTCCATCATGTACCGTTCGCCTGGT -GGCTCTCCCTCTCGCCCATCCTCCAGCCGTGGATCCTTGTACAGCGTGGACCGACTGGAG -CGTGTGGAGCGCAACTCCGAGACTCCTCGCCGGTACGTCGAGCGTTCGCCCAAGCATCGT -TGAATGCGTACTTTGCAACGTACCTGGCGACGGATCTCGACTCTGGTTTCTCTGTTACGA -TCACACCCACAAATACCCATACACAAGCCCCCACCCCACCCCCAATCGCTTTGAACAAGC -TTCAATTCACTGGCGATGGGCATTGATTGCCCACACCCATAGACCGCGAGACGGTCTTTG -CCTGTAATGTGGAATTTAGCACATTATGGGCATACACCCCCCTTACCCCAGTCTTTGATG -ATCTGGATTGATAGGGCCTGCCTGGATGCGCCCTCGGAAATTCACCTTTCTGTTTTCCAG -CTTTGATTCTCTGACTCGAGTGCACTTCGGGGTTCTGGATCTGCGGGATTTCCCAGTCTC -GGACCTCGACTTGATTGTGACTCGATTTTTTTGATTGAACGCAATTATCTGCAGGCTTGG -CCTGTCCCCATTACTGGTGTGCTCCCAGAAGGAGCATGCAGGTGTGGGGAAAATTGGCCA -ATAACGGACGCGGAAAGTCGACGATGTTCATGCATGTCTTTTGTTCGGTTTTACTACACG -CTTCATTGGCTCTTGTTACTGTGACTCACTTTTTTATGTGGACCTTGTATTTCCTGACCT -TTTTTCCACTTCGATTTTGCCGAATTTCCTTTGTCTTTGATATGATGTTGCCTTTTCGGG -ACAAGGCTGTCAGCCTCACACTCTTTCGACAGCTCCCACTGGGATTAGTTACCACGTGTT -ACGATTTGAAATATTGAATGTCTCATTGGAAAAACCAGATACCTTCCAAGTTCACGAGTA -GAGAATGGACAAATCCTACGTAATGAGACATAGGTATTTTAATCAACGGCGGACAGTCTA -GTCATGAGGCTACCCCACGTTAATCAAGTTCCGACGATCATACCCACCAACACTTTGTGT -TCCCCACCATTTTCACTACATATTTTACATGACTACATGCTGAGGTTCTTACGGTTCTTT -ACATGACTCACTGCCTTGTGGCTGAGGCGGATGATCTGCCTCAGGCGCGGAAATATTTAT -CTTCTGCTTATATCACATGTACAAATTCTCCATTCCACGTGAGTTTCTTGGAACATGACG -TCGCAAATGATACCAAGATTGGGATGTCGAGACGAAGAAATTAACATAGAAATTAGAGTC -ACCGGCTACCATTTTGAGTTTTTTCTCGACTCCTGATACGATCACTACCTCATAATTCCC -ATATATAGGCATGGGATTAGAATTCTGAGCCAAACACAGCTATTCTTATCACTGATAATA -ATTCGGCACCTTCAGTCCTTTGAACTCAAAGGAAGTATTCTCCCTTCGATCAATCACGGT -CTTGTGGTGAACATCCCGAACCCGAGCTCGAACATACTCGGTCCCCGTGGGAGCAGGGCT -GCTTTGACTTTTGTCTTCTTCAGTTAGAAGACCAAGACGGTCAAGCACAGGCGAGGGCAC -AGTCCTCATCACGGTATGATCACCCGGTCTACAGAAATAGAGCAACCCAAGACGGGGAAT -ATTGATCTGATCTGGGGGAGGGGTAACAACACGATGGATCGTTGATTTGAGGAAGCCTGT -CGTCGTGTGTTAGCTAAATCTATTGTGGGGACTTGAGCACTTGCCGTTGGTAAGGAACGA -TAATGTATCCGCAACATTGCACGTAATCCCACCTTCAACAGGCTTCACGTATTTCCATTC -CCCATCTGGCGCTCGGATCTGTAAGCCAGCAACGTGCTGCGAGAAAAGCAGGGTCAAGCT -GCCGAAGTCCGTGTGACCGCCCTTGGTGTACGCCTGCGCTTTATCCCATTCCTCCTGTGT -GCGCGTGTTGTAGATCATGTATCTCAAGTGGTCATCGGAGCGTTTGTCATAGGCGTGGGC -TTCAACAAGATGGTTTTCTGGCAGCTCCAAAATAATCGCCATCAGCACGAACAATTTGCG -GATAACTTTGTCAAAAAGATCTCGATGGAAGGTTGAAATTTCATCGTAGTTTTGACGGAC -GATCCTGTGTTTAGGGACATCATCCCATGCAGGGATGGCCTTGGGGATATTGAGCTAAAC -AAGTTAGTCCAATTCAATTTTTTATGAAGACTAGTAAGTTCCTACCATTTCTATGTTGTC -TTTCACACCCGTTTCTCCAATCCATCTGGAGTTTTCACGGTAGCCGAAATACTCGCCCTC -GGCAAAATTGCATTCGAAGCTGCGCTTTTCCTCAAGACTCTCTTTAAAAAATGCATTTCC -AATACTGAACTGGCGTAGAATCCGCTCGTCATTGATTCCATGTCCGACCACCACCCAAAA -CCCGACTCGTGTCACGGCATCATATAATTGCTTTGCGAGATCTTCCTTTCCGCCCGGTTC -GTCGAAACGAGAAAGATCGATCTCAACTAGGGGCGCCCAGTCAAGCTTCTCACTAGTAGG -TGCAGGTCGGCTCCATGGCTTGACGGGGGCCTTTTCGATTTGGGTTTGTTCTTCTATAGC -GGAAGGGGCCATATTTGGTTTCGGGTACAGCGACAATGATAAACTAGAATACTAACCTAG -ACGAGAAAACTGGACTTATATAACTGACTTGAAACAAAATCTGATTGGGATCTACAATGT -ATTGTCAACACTCGTATGTGCCATATACTTGCTCCAAAGATGCGGGGTCTATGGTATCAT -GCTCGCTAGGATGGCGTTATATCGGCATCGTGCTAATGTCGGTAATGCAAATTTCTGAGT -TCTGATCGAAACAGCCTTCGTACTGCGAATTGCGAAACCCACTTGGATGTATCGAAACAT -GCGTAAAATATTTTAAGCGGCTTCGAAATGACAGGATGTTGTTTTAGAATTGTCAACCCT -ATGCCGACAGAAGATACAGGTGAAGAAGGGAACGGCCTTCCGAGAGAACAATCAGTAAAG -ACGAGTCGAAGACGCAGCAGCTCTGCGCTCGAGGGCTTACAACACTCTCAAACTCTGAGG -AAAAAAAACCTATATGTATTTTTGGCAAGGGAAATGATTACTCAAAAGAGAAAAAACGAT -AGCATTATCCCAATATTATCCCAAACACAAATTGAACCTAGCAGGAAGCACCAAGTAGAG -GATATCAATGTTTTCGGACAGTCTAAAGAAGCAATTGGAGCTCTTTCTCTATCTCCTCCC -ACTCCATTCCTTCATGTCCTTTCTCACCCCCTTCCATTCTGTCTCCTCTCTCCCCATCCT -TTCCTTCATCTCCTTGACTTTCCTCATCACCTTCATCCTCTTGATCCATTCCAAGGTGTA -CTCATTGGTTCCCGAGCGAGAACAGCTGTGACCCTTCCGTTAAGCTTCAGATCCACTTAC -TCCTCCGTAAGCTCAGTCTCTTCTGGTTCTAAGAGATAAGGTCTCTTCCGCCCAAAATCG -CCAATACCAAGGTCTTGACACAAGGTACTTCTGCAGAATCGCCGATTAACCTTTTCCCAA -TATATTGCGTCGAAGGAGAAATTGTTCCGTGCAGCGTAAGGTGGATGAGTACTGTGTGTG -GTTGGAAATACAGGGGAAAGAATTGACAATGCATGTGTATCTAAAAAGATCATGAGATAT -GATTCTTCCGTACGAAGGAAAGTGAAAGCCGCGTCTGGCGGCCTAAACTCGACCTATGTT -GCATCTGAACTTGATAGAACCTTAAAACTCTAAAAATTCCACTGTGCACTTTGGTTATAC -AATATTGGGAACAAAGGGCGGAAAATAAGCCACCAAGTTCATAGCCTAAACCACCTAGAG -CCAATCTTGTCGAGTCCCCATAATTATAGTGCTTAAGACTATGCATGCCGCAATCTGAAG -GAAGAATGAGTCATGAGGGTTCAATTAAAGTCAATGTCTTGGGGATAGGAACATTCGAGG -GTGAGATGTCGTGATCCAAACTATCATGGCACCTTGCTCTGACATCCCGCGTCGCCAAGA -GTCGGGGAGTTCCAGTGACACTTAAGAATCGAGGTCTTGCATGGTGAACAACTTGTTTTT -GCTTGCTCGATGAGGTCGAATTACTCAAAGCAATTCATCCTGCATCGAACGGACTTGAGA -AATCGACAGTATGATGTTGGATAAGCGCGACGAACTGGAAGTAATAACTTCTAGTATACC -TTCAACTCCGCCCAATGAGATCAATGGAAAAGGTTCAGAAAATGAAGATCAAGCCAAACT -CGAGAAGAGAGTGATGCGAAAGATAGATTTTTGGCTCGTTGGGTTTTACTCAATTGTATA -CATCTTCCGGGTCATTGATTCCAACAACTACTCCAATGCCGCCATAATCAATCTAGAAGC -CGGCACTGGAATCAAGAAAGAGCTCAACTTTAACTCCTCACAATGGGCCTGGACGCAGTC -TATCTTCTCCTACAGCTACCTTATTTTTGAGCCTACCAACACCATCCTTCTCAAACGAGT -CACGCCCTCGAGATGGATGTTCGTCCTGATCTTGTCTTGGGGTATCTGCGCTTGTGCTGC -CGGCGCTGCACAGGACTTTCCGGGGATGATGTGTGTGCGATTTGCAATTGGAATGGCTGA -AGCTGGATTCTACCCCTCTGTGCTCTATCACATGGCATTCTGGTACAAGCCGTCTGAGCT -GCGCTGGAGGATTGCTCTTTTCTACTCTCTTGGTCAGGTTTCCGGTGCATTGAGTGGGCT -GTTGGCATATGCTATCAGTTTCATGGATGGCGCCGGTGGGATATCAGGCTGGCGGTGGCT -GTTTATTATCGAAGGTCTTCCGGCAATAGTACTGTCTGTGGTAGCGCTATTCGGTCTTCC -AGACTATCCCGAGAACGCTCGCATGTTGACGGAGGAGGAGAAATCCTTCCTCAAGGGTCG -CCTATCGTCCACAGCCCCGTCAGGCAAAGACAAAAGCTGGAACTGGGGGGATGTAAAAGC -GCTCCTTTCAAGTCCAACCCTGTACACGTTCACCGTATACTGGATTGGACATGGTATTGG -TGGATTCGGAGTGAATTACGCTTTGCCTACTGTTATCTACGAGCTCGGTTTTACCTCGAC -GGCTTTATCACAGTTGATGAATATTGTGAGTGGTGCTCAATCTAAGCAATCCAACTAACG -GGCAGGAAAACGCTAACCGAGCCCGTAACCAACAGCCCCCATATGTCGCATGTTTTTTCT -TCTTGAATGCCCTTGGATATTTGTTACACAAAGGATGGATTAGGCCCTGGACCACTGCTG -TTGCGAGTAAGCATAACCCCTTGAAATGCACATGAAGCTCTTACTGATAGCGAAAACAGT -TGAAAGCACGATAATCATATGCTATATCATCCTGATCACAGTTCCAAACTCAGTCGTGAA -ATATCTCGCTTTGGTTGTAGCCACCGCTTGTGCCGGATCTGCATACCCTGTCATTTGGCC -AGAACGTATTCGTGCGCTCGAGGGCACGGTGGCCGTGGGAATTGGCATTGGGTTGACGAA -TGCCATGGCTCAGTTCAGCGGCATTGCTGGGCCTCATATATACAGTACGGTGTTTGGGCC -GACGTATCGAGTGTCATATGTTATCTGTCTTTGCTTCTTGTGTGCGGCTATCTTGGGTAT -TCTGGCTTCGTGGTGGCTTGTCTGGAGGAACGATAGGAAGACTGAATTAGACACTGGGAT -CGGAGAAGATGTATGATGATGCGGGAGACAAGTCTCACTCTAAAGGCGACACTTCGATAT -TCATATGGACAAGATGAGAACACCGTTGATCTACAGCCTTTGCTCAACCACGTATATGGA -GTCAAAGTTTTGGTATATACTTCTATATTGCTCGATTAATTTCTACTTGATATAATTGAT -CACGATTTGATGTGTGCATTCTGCAGGAACTAGGGGGAAAACATGCTCAGCATCGCCTAT -GCTATTAATTAATTGGAACTTATAGGTCTCATCTACGAAAATGCTACATCAGCCTTTTCG -AATACTCAACTTTGTATCTTGATACTCTCAAACTGTAGCCTTAAAATAGTAAAGATTTGT -GATAACAGAGAATGTGCCTTGAGATGCAGGAGTCTTGGATATTATATCTGCAGAAATTAG -CCAAAGAAACAACCCCTGACCAATTGATCTTAACTACCTGCTGTCAGGCAAACAAGCTCT -TTTAACATTTTTTTCCCTTCCAAGTTCTCGGCCCCTTCCATCTCGCACCTGTCGCGCAAA -CAGTTGCTCGCCAGAAAGCAAATTGCAGCTTGAACGCTTGCAAAAGCCTCGCCTGATTAA -CCATCCATCCACTCTAATAAAAATGCCTCATGCTGGTCCCATGGACTTCCAGGCCCTCAT -CCTGTGTGGTCCTGGAAGTTCACTCAACACCTTCACCTCCAGACCCGAAGAGTATCCAAA -GTGCCTCATTCAAGTCGCAAACAGGCCGATGGTCTTCTATGCGATTGACTTCTGTAGACG -ATCGGGTGTCGCTAGTAAGCCTGAATATTCTCTCTGTCTCTCGATCTCCTCTATTGAACC -GAATCGACTAATTATATCTCAGACATCACACTGATCACCCCTCCACTGTCTTACCCTCCA -CTTCGAGCCGCCTTGGACCAAAACCCATATCTTACATCGTTCTACTCACTTTCGGTCTCA -GTTGTCGCACCGAAAGCCCTCGCGATGACTATGGGAACTGCAGAGCTACTCCGCCTTCCC -GAAGTCCAGAGGTGTATTAACACTAATTTTCTCCTTCTTCCTTGCGAGATCATCTGTGAA -ATCCCGGGCGAGTCAATCCTCGAGGCCTGGATGTCCACCCAGTGTGTGGTGAGCGATAGC -GACGATTCTAAAGGGGTTCGCCATCCATCCCCTCCAACCAAATCCTACTTCAGCCGCCCG -CTGGAAGCTCGCTCCGGCGGTATCTCTGTGTACTATCAGACCGACAACCGAGAGGAGAGC -ATCCAAGAAGAAGCAACCGATTTCGTAGCCATCGCACCGCTAAAGCAGGATGAAGTACCT -ATTGTCCCATCCCCCAAGGGACCGCTACTCCCACGCTTCAACCTATCCAAGCTTCTCATG -TCAATGCCGATGGATACAATCAAGGAGAAAATGAAACAAGACAACAGCCTGCTCATTCGT -CATTCTCTCATCCAGAATTGTGCACGCGTCAGGATTCTCACCACCTTCCGTGACGCCCAT -ATCTACGTCTTCCCATACTGGGTCAAAGACCTACTCCATTACCAAAGCAGACTGAAGTCC -ATCAGCGAGGATCTGGTTGGAAACTGGGCCAAATCAGAATGGCAGGAAGGTCTCGGCGAC -AAGCTAGGATTGACGATGATCTTTAATCAAGAGACACCCAGCGAACAAGAACCTACTAGC -CCTCCACGTAGTCCTACTGGCGCCCCCGTCAAGAAGGTCATCAATCTCCGGGACATGAGT -ACTACCAGAGCTCGCTCGAGGTCCGAGTCACAGCCAGGGAATCTTTTTCAAACGGCTGCC -ACTGAGCTGCCACATATGCTGGCCTATGTCCACCGAGGTTCAGTCCCGTTCATCCGACGA -GTCGATAACACAGGGATCCTTCTTTCAACCTCGCTTTTCCTAGCAAAGCTGCCATCTATT -GAGGAAGTTGGCTGCAAGGCAGCGTCGCCTCTTGCGCACGCCCACAAAGTTGCCTACCCT -GAAGGTGTGGCTTCGCCTAGCACTGTTACAAAGAAAGATTGTTTGCTTGGAGAGAATGTG -ATCGTTGAATCAGCTGCCGTTATCAAGGAGAGTGTCATCGGCGCGAACTGCCATATTGCT -AGCTCTGCCCGCATCGTTCGTTGCGTTCTGATGGAGGGTGTCGATGTTGAATCTAGGGCT -CAATTGACCGTATGTGTGATTGGTCGACATGCTCATATTGGTCGTGAATCTGTGCTGAAG -GGGTGTGAGGTCCAGGATGCTAATGTGATCTCTAAAGAAACGATTGCTAGGGATGAGAAG -TTCATGGTTTCTGAGACTCTGTTTGTTTAAAGAAGATGGTATGGTGATAACCGGTTACAT -TTATTCCAGGTCAACTGGCGTGTTTCTTGATAAGCAATCTTACTTCAGAAAACAGCAGAA -AAATAATTTCACAATCACAACATGTGCCTAAGTGTGGATTCATCACTTATAAGACCAATC -AACATCCTCTCGCTCTAAACAGTGTAGAGCAAACTACCGATATTACGCTTGATTCTAAGC -TTTTAGGAAGTCAACTAGACTAATCAGAAAGGCAAATTACATACTGCTTATTGCTGTAAT -AATTGTTGATTTCCCAAGCTTTCTTTTGGAGTTGTAACAGATAGCCAATAGCCTTTTCAG -CCAACTGGTGTGTGATCTAATTTCTTTAGGAACCTAGTTGGATATGAGGAGATATACTAG -AAGAGTGATAAAGTGATACAAGACTATGAAATGCAGAGATGTTGTATACACACCATATGA -CAATCTGGATGCAAAACAGAGGTAAAATAAGAGTCAATAAGATCATCGCCCGTACACAAC -AGGATCCTGCATCTCGACAGAACCAACACCGCCATTTGATGATTGTTTAACAATATTCTA -CCCAAGCATCAAGATCCATGTTCTTGATGACAGTCGAGAACTTGTCGGCGTTGGCTGCGT -TACAGGCTGATATCTTCAACTTGGAGAGCACACTTCTGGATCGGACCTTGTAGAGGCCGT -GGCCACGTAGGTTTTCTTCCTTGGTGACATCGGCATTAACATCACCACCGGCATCAGGCT -TGAGGTCAAGATAATCAACGGGAGCAGCGAGGCTGTCCAAAGTATTATCAGAGTTGCCAG -TGAGAGCCTCAGAAGCGGTGCTAGTAGCACTCTCCTTTGGGTCAATATATGCAAGGGGGC -CAGCGAGGCTGCCAGACGTGTCACCAGAGCCTTGAGGCATGATACTGTCAGCAGCGGTGG -TTCCGCTGCCTGTGGGAGCTTCGGTAGTAGGACCAGAAATACCCTCCTTGGGGTCAACAC -CGCTGCCAGTGATAGCTCCGTCCTTGGGGTCAACGCCGCTGCCAGTGATAGCTCCGTCCT -TGGGGTCAACGCCGCTGCCAGTGATAGCTCCGTCCTTGGGGTCAACACCGCTGCCAGTGA -TAGCTCCGTCCTTGGGGTCAACGCCGCTGCCAGTGGATCCAAGGAGACCAGCGAGGCTTT -GAGAAGCACTATCAGAGCCCTCAGGCATAACACTATCAGCAGCGGTACCGCCATTACCAG -TGGGCGCCTCGGTAGTGGTGCCAGTTGCAGCCTCCTTAGGGTCAACAAATTCAAGGGGGC -CGGCGAGGTTGCCAGAATTACCACCGGAGGCCACAGATGTAAGGCCAACACTGCCAGCCG -CGGTGGCCCCGCTGCCAGTCAGCGCTGAGGTGGTGGTGGCAGAAGTATCATCGAAGTCCC -GAGCCTCAAGAGATTCCCCAGAATACTCAATATGGAAAACAGGTTTGTTGACTTCTGTGA -ATGCAGCATAGGTCTCACACTCATTGTACTCAGCACACTGCTCATTGACAGACCACTGCA -TCATATCAATGACCGAGCCGATGATATCGCCCCCGTTCTTCAACCCGATAGCCATGCCAC -GGGAATGCGACTCCTTTGCCAGGAAAGTCAAAAAGTCGATTGAGTCGGCTTCCGTTAACT -CAAGACCATTCTCGTTTCCGTACGCATCGATATTGTCCGGATCGACACCGTCGCATCCCT -TGTCCTTAGCAATATCCAGCCGCGAGCTCATAATCGCGCGGACCTTGTCCGACTTGAGAT -CGAGCCAGCGCTCGCCTGGCCATTCATCGAGGTTGCTGCCAAAGTCTTTGGGGTCGAACT -TGCTAATATCCGGGCGCCAGTCTTCATACGTGCCGGCAGAGAAGTAGCAGATAACTTTGT -GGCCTTTGCCCTTTAGAGCGGTGATTGTTTCAGCGGTGTTTTCAAAGAGATCGATGTCCC -AGATGTCGGCATCGACGGTGGTATCTTCGACTGGATCGACAAGTTGAATTTGCCATTTGA -CTCCAGCCTCTGGTTGCCAGATGTCGGTGTCGTCAGTGCTGGTGGTGTCGCGCTTGGACA -TTTTTTGGTTGTGTCTAGATCCGGGCTCTCCTCGGAGGTCCATACCGATGGCGAGGAGGA -AGAGTCCGAGAACACAGAATACGAGGAGCTTTCTGCTAGTCGCTAGGGCCTGCCAGCCGG -AAGTCTGAGAAGCGTTCATGATTTTGAAAAAGTTGAAAGGAATGTTTAAAAAAGGAATGT -AAGAAATGAATGTGTTTGTCAAAATGAATGGATAGACAACTTGAATCCAGCAAGTGTCAG -AAAAAGGAAAGACTGAAGAATAATGAGAATAGACAGGAAGACTTCACGAGTTTCTATCGA -GTACTTATAGTCCGGGACATGCTGACAAGTGCTTGGGGTATACCGTACATCGAGCCGTTT -CAGATTTCCACATATGAGATCGAACGCATACAAGAATCATCAGATCGAACCACTAGCGGA -GGACGTTAAACGAAGCCAAGCAAAATCGAGCCCAGTCGAATTGATAGCTTAGACCAACGA -TTAATTTCAGGATCAGTAAAAATATGAGAACCTCATGAAACAATTGCCAAGAAACGCCTC -GCGGTAGACGTCGTCCGTTCATTGGATCTTTAGCTGACTATCTGACTTACATGTCCCAGC -AGCAATTATATGTCAAGAGGCTTCAGTCTGTTAATAAATTGAATGAGATATATATGCCAC -CCCGATCCTGAGTTTTCTTACATGGATTTGAGAATGCATGTCGGAGAATTGTCTCCGTTC -ACAATCTCTAGGAAATCGTACAGATCCGATTTTCATAGCGTCAAGCCTCGATACAATTGG -ACCAATTACATTGGTTTACTCCTGAGCCACATGGTTGGTCCTTTGTCTCCTTCAAAGAGT -TGAATACGTTCAATTTCTCCGGCCTTGCTTAGAACATTGCCAAGGGCGCTTACCTTGGAC -AGAAGGGTTTCATTCTGGATGTCACCGGATCTTTCTTCTGTATAGGGTTTAACGGCTCTT -GACCCTTATTTGTTTATTTTACCCCTAATTAATGATATAGAAGCAATTGAGTTGTGTGTA -AATTAGACACAAGATCTGGTATACAGGTCTCAATTCCCAACCACGCCCACGTATCCGTGC -GTAGTCGCATATTTCAAGTCAGACCAATTTAACGTGCATAATACCCCCGTGAGTAGTTTA -TTGGTCATCATCGATTTCATATCCATGCCTCCCCTCCCCAAATTCCTCCTGCAAACGACT -TTGCCCAGGCCTATGCACTCTCTCCCTTGTTGTACTCCCCTCAACCAGGGGCTCGCATAC -CTCCTTCTCTTCGATTCTGGAACAAGAGCCAAACAACGTCCCCGAAATTTCTAAGCGTTT -GCATATAGCCAATATGAGACCGGTTCAGATTCCGCCACCCGGGTCCGTTCCTCCAACAGG -ACTGCAACAGTACTTGCAAACCAAGAAGAACAGATCCAGCCCCACCAATAGAGAGATGAC -TCCTGAAATTCCTGGGCGTCTAAAATGCGGGCGTCATCATGTACATCTAACAGCATAGTA -GGCCTGTCTAGACCAAACCAACAAAGACCGATACTGCTTCTTTGCTAGAGTATTCTAGTG -CTTCACAGTACTTGAATCGCGCTTCTTCGTCAGCTCGGCCTACAATCATTATCACAATGG -CGAATACTACCACTGAGCTGGGGTCTCGACTCGGTTTTCCAGCTTGTGATGTGCATCTGT -TTCGGTGTTTATAAAAGACTACGTATCAACAAGAAGACAATGAGGTTATAGGCGAAAAGG -TGCCAGCAGAGGGCCGCAGACAGCCTCTGGGGAGAAAGCATACTCTGCTAGATGCAAAAT -GATCAAGCATAAACTTGGCTGGGGTTACAACGATACATAGAACACGCAAGATATCCTCAG -ATAGACACGGGCTATTGCGCTGTTGCCATCCCCAAGAATTTTTCTGATAGTGGAGTATCG -GTATCATGGACAACTGTGTCGATCGAGCATCTAGTATTGACCTGAGGCGTCTAATAACAA -AACCCCAAGGAGGATTGAAATTCATTTCCCGTAGTTCATTGGGTCTTCCAGGGTTTCCAT -ATTAGCAGGGCAACTGGCTTCCCCACACCGACAACAACAATTTGTATTTCTACCTCACTT -TGCTAATTGATATTGTTCTCAAATCCCCCAGTAGCTGCCCAAGATCATATACCTTGTGCA -GCATCTAGATCTCCTGATCTCCATTTTGCTTTTGTTTCCAGGTCTTGTGACTCTCTCGGT -GACTCTCACCCCTGTTCATGTCAATGTTCCAAATTACATGTTCAGAGCCTGGTGTCCTTT -CCTGCAACTTGGAAAGTGGCTCTAAACTGTATAAAAATCCGGGTTCGAATTCGCTTACAG -CAACAAGAACATTTGGATCAACCAACGTATCAGTTTCTTGGATAAGCAGTGATTCTTTCT -ACAAAAAAAGGTATGCCCATACAATAATTCTTCAATTTCATGCCCTCGGTATCCATTTCG -ACCAAATTCTTGACAAGAGTAGTAACTGACTAGTCGGATTTACCTGAAAAACCAGTCAAA -TTGAGAAAATTTCCTCAGAGCAAGGAAAGGTTATATACAACCTCCTTCGAACATTATAAC -CAACCACAATCATTTACTTTACATCTAAGCTACAATAAGACGCAGACAAGGTGTGTCACA -TTTACTACTAACATGATACATGAGAGAGATCTTGCTGGATGCCTCTAATTATCTAAAGCG -ATGTTTCATTCCCTGCTCTCCACAATGTTTAACTACCCACTCCTTCATAAGATCGTCATT -TAAGGATCTCAGGATCAGACAGGATTTTGTCATGCAGTCATACAAGCTCCACTTTCCTGT -CCAAGCCGTTGAAATCACAATGTACCAATGTCAACGCCAAGCCGGCTGAGGTGTAGATCG -GCGAAGGATGCTACGGAAATTGAAGGGCAAAGCCAAGGAATCCTTTACTCCACATTGAAG -TCTTGTTCTCAGTCCTACGGAGTTCTCCGTATATAGCCTAACACATTGTTTATAGTTGTC -TATAGCGATAGCGAAAACCTCCAGATCTCTCCCAATCATGCCAAGTTTTTGATTGGGTCA -GATACCACATAGCTACCTGGTTTCAATCATTCGGGGGCACACTTTCCACTCTTGCAGACC -AGTTCATCCGAGCAGTCGTTCTCGTCCGAGCAAGAAGCACCTAGAAAATCATTGAGTAAC -TTCGAAGAAATATATGGGAAGGATTCTTACCAGCACAGTGTCCCTCCCATTCGCAAGTCC -CTCCAGTAGGGCTGGTGCCAGAGGTAGTTGTGGTCTTCAATGTGGTAGTAGGAGTACCAC -CACCACCTCCGCTTCCCAGCTTGAGGCTCTTCGCAAGAGCGGTCACGAGCTTATCACCCA -TAGTGCGCAGTGTGGTGAAGTTGGTGACGTAATTCTTGTTGAGCGCACTGCTGGGGAGTA -CCGCATCGTCTCCAGTGAAGAGGATGTCTTATTCCATTAGCATATGTCAGAATAAAGATT -TGGATTCATCAGACTTACAGGTAACATCAGCATCTCCGTGGCCGCTGTTTCCATTCAAGT -TATCATTGGGGAAACAGGTCCGGGCCATCAGCCACGAGGCCTCACCGATAACCTCGGGGT -TATCGCCATCGGTGTCTCCGAAGATGCCGTAGAACATCTTTCCATCACTAGCTTTTAGTT -AGAACAGGACAGTGCTGGAACTGCAGCCAAAGGGGGAAATCATACCAGATCACTGCACCA -ATGTTGTTTCCCTTAAGAGAAGACTCGTAGGCTCCAGCGAAGGTGTCTGGAATCACAATG -AATGGCACTTCATAGGCAGCCAAAGCACCGAAGTTGGTCTGTGGCTGGCCATCGGGGTTG -CCCTGCACGGGGTTAGTGATAAGATCTCAGACAGACAGTAATATTCAACATACCACACAC -TTGTAGTCAAGGCCATCGCAGTCGACATCCATATCAGCAGTCCAAACAAATGCCGCACCC -TATCATTCATTAACATGTGATTCTCGCAACATACTGATGAGAAAATTTACCTCGCTAAAG -TTGGACCAGTCGCTATAGATGGTCACTTTCTTAGCACCTTTGTCGCTGCTGATGGGATAA -GTGGCTTTCTTTGCAGCAGTGCTAGCCTTGCCAGCAGCGGTCTTCAAGGCGGCGACGGGG -ATGGAAGAGCCAGCAGCGAAATAGCTTCCTGGGGGGCCGGCGTCAGGCTTGTTGAACTTG -GAGCCATCCACGGATTGGGCCAAGCCGGTGCCAAACAAGGCCAAGAGGGCAAAGGGAATT -GAGCGGCTGTAGGTCATCATGATTGTGATAGAAGCTTTGAGATGTGGGAGGAAGATGATT -CTTGCTTCTCAGAGCTGGGGCAAGTGGCCCTTTATCAACACTTCTGGGTCCTGAAACAAC -GAGCAATTAGTGACTAGTAAATACTACTCCGGCGAACCGAACGGCAAATCGCCACCAAAT -GACTTTGCTTTTTACATAGTTCATATCACCTTGTCAGCTTCGGTCATGCAGTCGATCCTT -CAGGTCTGCATTAGGGGTAAGACATACTAACCCAGAAAAGCCAAAATACTACATGATTCA -GCCACAATAGTTGTCTCTAGGTAGCTACAAGACGTTAGTCAAGCTGATTTTGGGGTATTA -TAGGAGAGGACAGGAGATTCGTGCCTGGACAAGGATTACTGCGGGGGGGGAGTTTCAGGG -CCCGCATAGCGTCACATCGGCAAAACGGTGATGTAGACGCAGCCCTAACTAGAAGTATGT -TAGTTGCAGAGACTATATTAATTATCAGGGCGCTTACTTCGGACTGTTCCGTATTAAGAA -ACTGCTTGAAAAGATCGGCACTCCAATTCGGTTCGCATATTTGGGGGACATCCACAGCTT -ACCAGGCAACCAATAATCAATATCATACTAATCGCCCACACTCTCGTTAAAGCTTGAGCT -GAATTGGCGAGCTCCATGAGGATGATCAAGATTTAATAGAGGACCGAAAAGAGCAACAGC -TTTGGCGAGTGAGAAGAACGGTTAACGATGGATTCATGGGCGTGATCGGTTCTAGTCCAT -CCATAGGCCCGGCGGATATATAAATAGCGAAGACTTTGTTGACCTTTATTAAAGCTTCAA -TGATTCCTGTCATTCCTGCGCCCTATCAGATAGCAAGCTGGAAGGGTAGTGTATTCTTTG -CATGGCCATTCTATCTAAAGGGTATATACACACATAAGCACAACCACGATTATTTTTCAT -TTTCAACTGAATACAGCCCAAATATATCATAGCAAGTCAAAGCTATAACGTAAGTCTGAT -TCAATCGATCCACTAAAATCAATTCAAATATATATGGAATTGTAAAGAGGACATTGGAAA -ATAATCCCCTACAAATATTCCACGAACTTAGCCATTCTCTGGCTTCAAGCATACATGACA -ACCCTTGCCGTGATAAATACTAAGTAGTCAAAATTCTCTTTATGGTGTATCCCTCTAGTT -AGCTAGGGAGATTATAGAAAGCTATGCAAGTATTTGTTTACGTACCTTTTATCGGTGTAA -ATTCCGTGCAAAAAAAACCATCCTATAATTGCGGCCTGGTGGGCCCATCGACAATAGGTA -AATGGTGTTGAGCATGGCACGATTGATGCATGGACGTACTTATGTCTAGCAGCATGAAAA -ATGATAAAAAGACCCAAAACGTCTCAATCGTCAGCCAACGAAATGTCCACGTTGATCACC -GGCCATCTGGGCAATAAAGAGCCCCCAGCCGCCCCATGGCAGTGACTAAGCAACTGACAC -GTGATCTGCGGGAAACGCTCAACTCCGACCTCGAAATTGTTTCGGAGCGACAACTCGACT -AAGCAAACCCGAGACGCCGCCCAAGATGGCCAAGAAAGGTACGTGGGACGAAGAAACCAG -AATCTCGAATTGCCTCATGCGCCCGCCCCTCCGCACCGCAATTGGGACGCGAAATGGGAC -GATGGGCAGCGAAAATCGAGAATCCCCGGATTCATGTGCGGCCGATGGCTAACATCGTCT -TTTCTCCGACACAGCGAAGTCCCGTACCATTGCCGTTCGGCTGATCTCCATGGCCATGAC -GGGTTATTACCGCACGATGATCCGGCCCCGTGTACACCGTCCTCTCAGCATGATGAAGTA -CGACCCTGTCGTGAAGAAGCAGGTGCTTTTCCTTGAGGCGACCAAGGGTGGAAAGAACAA -ATAAGCTATCCGATTTCAAGTCGGAGTTCTGCTGGGTTTTTTTTTACATTTCGATTGTTG -TGCGCCTTGAGATATCCGCCTTTCCACCGTGATTTCGACTGATTTCGATATCTTTTCTTC -ACCGAATCAACTTTCACCTCTTGCATTCGTTGCACTCGCTATCCCAACATGTCGAATGAT -ATCTCGTACTGTTCTGCGGATTTAGGGGAATGCGATAAGGATGTCAGGGGAACCACCCCT -GTATGTACATTATTTTACTGGATGCAACCGGGGAATTGCGATCTCCCCGGTTTCCCCGGT -TTTCCGAATCTTCTGCGGTGAAACCTCCTTTTTGTTGGAGCATTGTGTTTGGCGTTAAGA -GCAATACAGCTTTGGGTACCTCAATAAAACTAAAATGAACATCCCACGTTTTCCCATCTA -TCGCCTTCCGAACCCTGAGCCCCTCGTCCATTTGGTTATGACAAAGACATATTTTCATAG -GCAATGTTTGACTGGTCACACGCACCTTGTGTTTGATAAAGTGAATGAAGCGGGAAATTG -CGTAACATGATCAGAGTAAGAATGTAGGAATGGCGTATCAGGGTTACCTCACGTTGAATT -GCTCAGCCGGGATCTTGGCTGAAGCGCTCATTATAGTGGTCACCTTCTGACTGTCAGCTG -GGACGACTTGAGAAAGTATCAAGCATCTTACCCCGAGTGCGGCGCTTACCGCGGCCTCGC -GAAATGCTGGCTTGGCTGGCTGCGTTTCCCCTGCTGCTACCAGAAGCCAAGCCCTTCGGA -CTCAGCCGTTTTCCCGAGCCATCGGCTGCGTTTCGAGGTCTTTTCGGTGGCCTTGGGTTC -GCGTTTGAAGGGTGCTTAGTCTTTTTCATTGAGCTTTGTTGGGAATCTCCCACCAAAGTT -TTGCTGGCGGGCTGCACTTCTGAAGTTGCTTCGATCGAGGTTTCCTCCTGTGCTATGGAC -TGGCGAAGGCGGCTATAAGTCCGTTTCTGCGTCTTCTTCACTTCTTTGCTTTCAGACTCT -CTACCGGAGGTGCTGTCCGTTGCTTCTGACATTCGTCTAGCTTTGCCAACCTCTGTTTTG -GTCAAAATCCTCGTCCCCTCGAAGCTGACTACTTTACGCTTGATGTTGGGTTGTTCGTTG -TTGGCAGATTTGTCAACCACCGAACTATCGACAATTTCAACAGCAGACTGGGGTCTTTCG -GTTGTGCCGTCACCAGCGCGGGGAGGTTCCATCCGGAAAATTGGGTTAGCTTCATTGGTA -TTTTGATTCATTTTCAAAGGCTCAATCTTTTGCTGTTCTTGATCTTTCTTGGGGGCATCA -CCTGCCATCGAAGGTCCCTGAAGATCTGGAGTTGACATGAAGAGCATTTCGAGCTCAGCT -GGATCGTTGAAAAGTGAGAGACTATCTTCCCGTGAAAGCCTGTCATGGAGGCTTGAAAAA -GGAACGATATGACTGTGACCAGCAGGGAATTTTGGCAGCGAGTCAACACAATCACCATGT -TTCCAAGCCTCCGAGGGCCTAAGATCATTAAATGCTAGAGGTCCATCTGTCTGCATCTGT -TCAGCGGGCTTGCGAACCTTGTTGTTTGATTCATCATTCTCCAAGAAATCAGCATGTCTG -CCTTCTCTTGGCTCCAGTGTGAAATTAGTGCCTTCAGCCCCCTCCATACTCGCCGTGCCG -TCAAGCCTGGTGCAAGACGTATCGCTAGTGATAGACAACTGAGCAAGCGGGGTCTGAATG -ATGTCTTCCAGTTTCTGCCATGTTTGCTCGAAAGTCTCATCAGGATGTACCTCGATCACC -TTTGCAAACCCTTCTCTTATAGTCGCTTGCCAACTTTCAGCCTCCTGAGCACGGCGGAGG -AGTGAAATCATCTCAGCGTCAGCCTCCTCCCCTACCTTTTCCAAAGCCGATGACCGTGAT -TCGAGCATGGTAACGGTCTTTTTCATCGCTGCAAGCTTTTCAGTCTTCTGAGCTATCTCC -AGCTCAAGTACCTGAGCCCTCCTTTCTACCTCCTGTGTCGCTTTCTCTTTCAATGCTGCA -GATTCGGTCTTCAACCCCAGGTTCGATTCAAGCTCGATTGTCTTTACCTTCGCTTGCTCA -AGCTGTTCAAGGAGCCTGTCCTTCTCCACCAAGAGGGCGGCTTCGGAGCTTTCCAATACT -TGTGATCGATTCTCCATGGCCTTGGCCTCTGTAATGTATTTTGAGCAAGAAACATCCAGT -GCCTCAATTCGTTTCTGTGATTCCTGGTGCTGCGATTATCAGTAAAACACATAAAGATGA -TCCTCCGCATCGCTTACCATATAGGCGTTTTCCAGTCGCTCATCCTCGGCTGTCCTTTTT -GTTTCAGTCTTCATACTATTAGTGGAACAGTCTCTCATCGGGCACGGTCGGGCTTACCAA -AGATTTTTGAACATTTGCAAGCTGCGCCTGAAGGTTACTGGAGTACTTTTCTAGCCTCGC -CCTTTCGGTCTCAAGGCCTTGGACTTGTTCTTGAAATCTAGCTTTCATCTGACCCTCTAA -TTTGGTGCTTGCGGCGCCCAACTCCTCTCGGGTCCTCTGCTCGAGCAATTCAAGTTCGCC -CTGCAAGGCCGTCTTTTCAACTTCAAGCCGTGAGTTTTCACCCTTGGATGCTTGTTCATT -CGTGACGGCGTCAGTCAAAGCATGCTGAAGATCTTGGTTCTCAGACGATTTGCTTGCAAG -TTGTTTTCGAAGCGACTCGACCTCAAGTGAGATTTCTTGTAGTTGCAGTTGCAATTGCAT -ATTCTCAGCAAATTTCTTGGAAACCTCCATCTCCAGAACAGGGTTTCCTGCTGGGATTTG -AGCCTCGGAAAGCTTCTGACTGAAGGTTTCTAGACCGCACGCAAGGTCTGTCTCTGTCAT -TCCCACAGCCTTAATAGAAGCTTCAAGACTACCGAGCATTGCCTCAAAAATCTGAAGCTG -TTGTTGTAAGTTTCCGTAGCAACTCTCGCTACTAGCAAGCTGTTTGAAAATCGACGAATA -CGGACCAATGTTGCTTTCAATTGCTCGAAGGGCTTCTTGCAGGGCTTGGAACACATTTTT -GCTCATTTCTGTGCTTCGGTCAACATCGCTTGCGACCTGCCCAGCCAAGGAGTCGAAACT -AAAGATATGGTTAGTCAACAACAGTTGTTGCTCTTCTACACTTCTACTTAACTCACCGAG -ATGCTGCCTCCTGAAAGCTGCTGGTGAAGGCTTGGACATTCGTGACCTCGGACGAACACT -GCTCCTTCAACGTCTGCACCAAAGCAATACAGTGTTCGAGCGCAGGACTCAGTTTCGACA -AAATCTGTTCCTGGGATTTGAAGCAAGATGTGGTGAACAGCTCGCAGGCAGAGTCAATTT -TCTCGGCCATTCTGCGCTGGTCGTTTCTGACAAAGGCCAAGCACCGCGACTGGCTCTGAG -ACTCATTTTGGATGTAGGATTCTAGAGTCGCAATCCTCTTGTTCTCATTGGTTAGCTGCA -ACTTGATCAAGTCGCCCCTTTTCTCTGAATGATCCAAGGCCTCCCTCAGGTTGGCAACCG -TGTGATCGGAAGTTGAAAGCCTTTCCCGTTGATTCCGCATGGTTTCCTCACTTTTTGAAA -CCTTAAACTTGATCTCGTCCAAATTGTTTTGGATCTCGCTCTTCTCTCTACCAAGAGATA -TTGTTGCTTCTTTCAATTCTATGGCTTGCTCCCGAACTGTGTCATATTCACGGCCAAGTT -CGTTAAGGACACCTTTGAAGGTACCAAGCTTCGCCTTCCACTTGTCTATTTGTGCTCGCT -GAGACCTTGACTCGATTGATGCTCTGGCGAGCTGTCCTTGGCATTTTTTTAGGCGGTCTT -GCATGTGAAGATTATCCTCCCTCAAGGCCATGTTCTCGTTCTTGAGTTGCCGTTGGATAA -TTGCAGCTGCTTGCTCGCTCTCCTCGCGCTGTCTCATTTTCATGATGAGAAGCTCAAACA -GATCATCCTCACTGAGCTGAACGCCATTCTTGCCACCAAAGAGCGGGGCCGGTTGTTTTG -AAGATGCTTCTTTCTCGAAGGCACGACGTCGCGTCGACGTATGAAGCTGATTTTGTACAG -GCATGAGAGGCACTTGGGAGCATGGGTTCTCCGGAATATCTTGCCTCTCTTCGTGAGGCC -CTTGTACATGGGGTGGCTTGGGTAAGACTGGTTCAGCTTCAGCCATTTTGTCAATGTCAA -CAACAAGGTTTGCACTTAGGCCAACAACGCTTCCTGCAGAGCCAACTGTCTCTCTGGGGA -TATCAGCTCCGGTGGGCTTGTTTGATTGGCAAATAATCACTTCATGAGATTTTCTAAAAC -TCGGATTATCGAGATGTCTAGGTGTAGATCGACTGCCGCTGTCTTTGTTGGGTCCGGCTG -CTGCCGAGAGCCGACTTGGTGGCCGGGTAAACATGGGCTGTCCCGCGGCGATTGGTGCCG -TGGAGAGTCCTTCTTTCACAACGCTGCAATGATTGAAAGGTCTGATCCCAAGAGAAGATA -TCAGTCCATTGTGAATAGTGCTGTCTGGGGTCAGCCCGGATCATCTACATACGTATCTGG -CCTTGCTATCTGATAGTCGCCCTCTCCCATGGGCTCCCTCGAAGGTCGGGAATTATCGAT -TGCATGTTGAGTAATGGGTGCTTGCTTGGTCAGGCTTCGCGAGTACGAGGGTTTGTGTGA -GGCAGGCTTGCTGAAAGAGAAATCTAAGACATCAATGCCGAAACTCTCTGGGCGCTGCAA -CAGCGGCCGAGGCTTTTCAGAGCTCTCCATAGTAGACATCTATTGGGAATGATGGAGGAA -GTAGGAAAGAAGAAGAAACGGCGAGGTCAAGAAATAATATCCACGTCAAGGAGGACGTGG -GAAGGAGCTACCCCGTGACCGAGCCATCTCTACACTTCCATTTTCAGAGGTCTGCACTAC -ATTTTCACATCGAAAACTTGAAATAGTCAAATTAGGATGATAGGAAAAGCGAAAGGGTTC -AAACATAGTTCATCCAGACGCAGTATGGTAAGCATCGCCATTTTGGCTAGTCAGTCGCGT -TTAGTTGAAGCTGATATAGGGGTTGGTATGAAGGCTGGAGGGGAATACCGATTTTGTTTG -GATACATAGGAGTCGATTTTGGTATTAAGAGATGGTTCTCCGGGCAATTGTTCATGCTAC -GGGATATATGTATGGATTGCGTGGAGACATATGTGGTTATACCTTTTCATGTTGTAAAAG -ACATATAGCATGAAGAGGGCATCAATCGACTGTTTTGTCAAGTCTAGGCACCGGGCCGTA -TCAGCTGAGATTGAAGGCATATATTATTCCTATCTCTCTTGCGTTCTGTCTGTGCTAGAC -TTAAAAATGAGAGCAATTGTGCCGACACAAAGGATGCCACGTAGGGACTGTAATTGTCAT -GCAACGGTGATATTTCGATTCATCACAAAATTCGTGCATATAAGAATGCAAATAATATAA -CACAAATAGTATAAACAGCCTTGTCTTGATTTGAGAGTCAAAATAGCCGAAACCCCCTTT -GTACGTTGTACACGTTCGTTTGACCCAACGCCCAGTTGACAAGGATCACCATTGGATCCC -CGGGGTCATATTATAAGCTTAGTGGACACTCTCCAAGGTTTCTAGTTACTCTCTCATTTG -ACAGGGATTGAGAACCCTGCACGAACAATCTGACTGCGCCCACATGCACATGTCAAGTTT -GGTGGAACCAATGACATTCAAAGTGATAGAGTCAAAGTGTTAGACAAACATTATCTGTAC -GGAGTACATATTCAGCATATTTGTGCCCTAGTGGAGTGAAGAGCGAAAAAAGACCCGCCT -AGAATCCGGGTTTGATCCAACTGCACAAAAGTCCCGCATGTGAGGGATCAAATCGAACAT -CGAACATTCTACAGGCATTCCAGGGCCCTTCGACAGTGGTCTCCGAGACAATTCTGTGTC -TGGCCTTACATTGTAGGGAAGTCAATAACGTGCTGTCCACAGGATTCGGGAGAAATATCT -CACACGGCCGACAGTGATCAGTCATTTGCAACACAGCTTCTAGGATATTTCCTGGTTATA -GGATCAGACTTAATATGGTTCAGGATAATCCGCCTTACAACTCCAGATCGGAATGAGCCA -GAACACCTGGTAAGATGGATTTCAAGTTTCACAGAGTTTCACAAGTGGGATTGTAGAGCG -TAGAGCATGCTAAATGCCCAAATGGTGATCTATATTGTATTATAATCCACCCTCTCTCGT -GGAAATACTAAATTTACCCTCCCTCCCTTCGAACACTCGTTTCAAAATGAATTTTGATTA -CCCGGATGTTCCTCTAGGATCATAATGGACAAGTCACGGAATTGAGAAAAGTCTATGCAT -TTAGCGGATATCGTGGCCAGTTTAAGCGGGCCCCCGATCGAACTGGGCCAGGGCTATATC -CTCTCCACCTAGCTTCATTTCGCAAGAACCCGATATTCGGAGAGCGTGGGGGGAGAGGGG -GGGGGATGATATGTATATCCGTATAGAAGTCATAGTCGTTGTTCAACCTTCCATGGCAAT -ATGTAAATTCAAGAGACGCGGGAATTGGTATCTTCAAACAACTTATCCTGAATAGTAGCT -GCACACTGTCGATCGCACGGCATTCAAGACCAGAACAGTGTATAAGGGCCTTTTTTTTTC -TTTCATCCCCCCTTTCCCCCATTTTTTTGCGCGTCACGCGTGATTGGCCCGCAATTACTG -CCATTCTCTCCAGACCGCTCACTTCATCCCCTTTCTTTCATCTGCTTCACATCCCGAAGA -CAGGATATTCTTGGTGTGTCATTTGACATATCTCTTTTTATTTCTGTCTTTGTCTCCCCC -CTTTTTTTTTTGGTGGTGCCTTATCAAGCCCGAAACTGGTGTGTTTCGCGAGTTCCCGGG -CACGTTTTCATGATATACATATCCTGCAGGCTTGGAACCTCGGATCTTTATTTCTCTTCA -CTTCTGGTCGCCTTCTTGTTTCATCCAGTCTTGTCTAGACCTCAAGAATTCATATCTAGA -CCTGCACCGATAATATTCGCCCGATTCTGAAACATTTGACCCCCACTCAGCCGTTTTGCC -GTCGGAGTTTAACCTCTCATGGAGATTGCGATTTAAGCTCGATATCCGCCGAAAACCCGT -CCCTATATCTACTCGTGGTTTGCCCGAATACCGCTCTTAACGCGATCACTATTCAATCGG -AGGAAGCTTGGGACAAGTGTGATGTTGCTAGAACGATACTAGTCAACTTTAAATACTTCG -ACCATTGGTCTATCTTCACAACATGGGAAACACCCAGGGGAAACCGGTGAATTGCAACGA -TGCAGGTACGTTCACTGGACACGGAGTGGGCCGCCATTTCCGAGTCGCCGAGGAGTACTC -AATAACATGCTAATTGCTGGCTTATTTCTGCTAGTCAACCTCAATCATTTTCGGCTCCTG -CGAGTTGTTGGAAAGGGCGCATTCGGAAAAGTACGAATCGTCGAAAGGAAGGACACAGGT -CTTACATTCGCTCTAAAATATATTCGCAAAGAGGAAGGTGTGTTGGAGCTCCATGCTTGT -CAAGCCGAGAGAGCTATAACTGACAGCTCGTTTTAGTGGTTCGTTCGGAAAGCGTACGAA -ATATAATTCGTGAACGGAGAATGCTCGAGCACCTGAACCATCCATTCCTTTGCAACTTGA -GATACAGTTTCCAGGACATGGAGTACATGTGAGTATCCCCACATCGGCTCATCACTCCAA -GCTGACCAAAGTGAAGCTATATTGTGGTTGATTTAATGAATGGCGGCGATCTTCGATTTC -ATATCTCCCGAAAATGCTTCACAGAAGATGCGGTGCGGTTTTGGATGGCGGAACTGGGTT -GTGCACTGAAGTATATCCATTCGCAGGGCATCATTCATCGAGATTTGAAACCAGACAACG -TCCTGTTGGATTCCGATGGACATGTTCACTTGGCTGATTTTGTAAGTTCTACCGGCACGA -ATTGCATTCTCACGTGAGCTTCAACTAACATGAATTGTAGAATGTGGCATCGGATTACCG -ACCAGGGAAGCCTCTTACGAGCAAGTCAGGCACACTAGCCTATCTAGCTCCTGAAGTCTA -CGAAGGTACTGGTTATACCTTTGAGGTCGATTGGTGGTCATTAGGTGTCACTTTTTATGA -GTGCATTTACAATAAGGTACGAGAGGTCGGTTCCTTGTCTAGTCCCCCCGTCTAACTTCC -AGTGCAGCGACCATTTGAGGGCCGAAGCCAAGAGACACTGAGCGAGAACATCAAAAAGGC -GCAGCCAAAATACTACGTCACCAATCCTGCCGTATCGGTAGCTTGTTTGCGGGCATTGGG -ATCATTGATGCAAAAAGATCGGAGTCAACGAATCGGTGCTATCAGCTTTGAGAGCTATAC -CTCACACATGTTCTTCAACGAAATCGACTTCAACGCGCTGGAGCGCAAACAGATCCCGCC -AGTATTCGTCCCATCAAGCGACAAAACCAACTTCGATGCGACTTATGATCTGGAGGAACT -GCTTCTTGAAGAAGCGCCCTTGGAAGCTAGAGCGCGCCGACAAAAGCCGCGCGCTGAACT -AAGGGATGATGCCACCGCAAAAGAAATCCGAGATGATGAGCTTCATCGCCTCATTGAAAC -AATGTTCGAACCGTTTGACTATACCCAGACTAGCTACCAAGGCAATGCCGCGGAAGCTAT -TGCTGCTGTAATGTGGGTTTCTCTCCCCATGCAGGAAATTCTCCTTCTGTGCTAACGGTG -TCTTCCAGAAATCCCGAGGAGTGCTTCCCAATGGCCACCACAACTCAAGGGAACATCACA -ACACCAGCGAACCCAACCGTACATGCGCGTCAATTCTCACAGCCCGACCCGTCTAAAAAT -ACATCTCCTATCCAAGCTGACGGCTCGCACTACCGTGCTCCACCAAGTGAAACCATGACC -ACACTCAGTGAGACACTCGACCCCAATGACCCCACAACTGGTCAGGCTCCTTCTTCTCCA -TCAACCCGTGTGTCCCCTTCACCGCCGCCTGCCCCATCTTTCCACCGACCCCTACCCCCC -AACCACAACAGTCGACACCGCGGTGCCACTCGGCAAATGAGCAAAAGTGGAGGCGTACAA -ATGGTTCTGAACGAACATGGTAGCTGGAGTGAGCTGGCCCATAACGCTCTTCCCGCCGAC -GGAATGGAGAATGGCGATGACAAGGGCAAACAAGCCAATGGCATGCTATCATTCTTCAGT -CGGAAGAAGGGCCGCGACCGCAGCCCCAAGCCGACTGAGCCAGGTGTTCTGGGCAAAGAG -GGTGCCAGGCAGATCATTAGCTAAAAGGCTCGCCTTTGATGAAAATCCGATTGGCGCTCA -TGCGGGTGGCGCTGTCGCCGAAAATATACCCGCGCGATATGGGCTATCATGCCAACAACC -TATGTGGTTTCTGCTCACACTTGAGTTTGAAATCTTAGCCCCGAGACCAGTGCTACTCAT -TTTAGCACCCAAGGAAGTCGATGATTCAATGCTCTCACTCCTTGGCTGGGTTGCCACTCT -TTTGACACGTTCCGGTTTCCGCTCCATTGGGGTGGCCGGAGGGAGACGGCCTTGTTGACC -CTGTCTGATTGCATTTCTCTGCGTTGATCTCTGCAGCAAGAGATGCAGTGCTCGAGGCAC -CATCTCCCTTTCATTCTATTAGATTCCCCGGACACAGCGAGAGTTCTTTATTTCATCTTT -TATCATCTCTACATATCATGCTTTCGCACCATGTCTCCATCTTCCCCCCTCGCAACTGTA -CAGATACTCATGGCAGACACCGTGCTTTGATTTTTTTTACCACTCTTTTCGGTAGCTTTT -GTTCACTTTTGTCACTTCTTCCTGGCAATGATGTCTTTCACCTTGTATTGTCTTTTGGAG -TTTGCATACCCTGACTCGTTGATATGAACCTTTATATTTTGGATAGAATGGACCGGAGCT -TGCATGTCTATGTGGACTGGAATAGGTGTATCTAATGTCATTTGCATATCAATATACAAA -ATGAATTGGACCATGACGTACTACAATCGAAACAGTTGTATATCTATTGTTGTGTGTGCG -TACCTCTATCCTTATATATCCGTTGCTCCGCAACCATGCACACCTATCGATCGTATACGT -ATGCATTTGGTGACATGCGAAATCAACGCAGTATGGTCCATATATAGAGTATGATATACA -AAGGGATTTAAGGTAACCCCTATAACCCTACAAGCCCAACAATACACATTGGCATTAACA -AAATCTTGGCAGGATCTTCCGTGTGGGGAAAAAGCAAAACAAAGCCATACAAAATCCCTT -AGACCGACCCGATATAGATTCCAAGAATTCCAAGACAGCCTGGACTTTCTAAATCCCACG -TGTTATCACACCCAACCTCGACTCATCTTCCATTCCTTCTTCCCTTCCCATCTAACAACA -ACCCACCAATCATCCACAGGCGCAATCAACAATCCCACATTCTATCCCACCGATCAGCGC -ATTCCACACCAAAACCCCCCACCCATCTGGCATGCCAAGCCAATCCCGAGATAGAcggga -ccgtgaccgtgaccgggatcgggaccgggatcgcgaccgcacacgcgagcaagagcgctc -gcgccgccgagaccgagaccgggaccgcgaGTATATCCGCGGCCGCTACGAAGATGACGA -AGGCGCCACATCCAGCCCTGGAAATGCGCGCACCGCGAAATATCGCTTTCGCGGGGAACC -AGGGTACGAGTCGAACTCACATAATGAACACGATGATTACGACGAGGAGCGTGAGCGGCG -CAGGGAACGGAGGCGCAGACGGAGGGAAGAAGAGGATGAGGCGGCTGGTCTTGCTTATGG -GGAGCCTAGGGAGGAGAggagacgggatcgggagcgggaacgagagagggGAAATTCTAG -GGCGAAGGATTCACCTGCTGCGTCGCCTACTAAACGGCGGGATCGTGAGAGGCGGTATGT -CAGAGAAGAGAGTCCGAGTGCGAGTCCTGTTAGAGAAAGACGGAGGGATTACGATGCTGA -GGTTGAGGCTGAAGCGCGTCGACGGAGACGgcgcgagcgcgagcgcgaaagagaacggga -gcgtcagcgggaATACGATGCTGCTGCGTCGCCGGCGAGAAAACATCGCAGCACTGAATC -TTCGAGTAGCGCTGCGCATCTCTTGAGTGTGGATGCTATGGCGCGTCTTGCTGCGGAGCA -CGAGGAGGCAGATCGCATGGAGCGGTCCCGTGGAGGAGATGATACGCGCCGGGAAAGGGA -AGCTCGACGGCAGCGGAAAAGAGCTGCGTTAGATGAAGCTGCTGGTGCCGCGCTTGGTGC -TGAAATTGCGCAGGGTCGCTCACGTCATAAAGCGGGCGCGCGAGTTGTGTCTGGTGCCCA -TTTGGAAGAAGGGCATAGCTCCGATGCTCGGGTTCGTCGTCGTGGTGGTGGACCGGCTGT -GCATGATCATTGGAAAGATGAAGACAGCTGGGAGGGTAGCGTGGAGAGCCGTGGGCCACC -GGCTTGGAAATTCTGGTCTAATTGGTCGAGAAAGAAGCGTATTTTGGTTGGATGCCTGCT -GGCGGTGATTATCTTGCTTGCCATCATCATACCGGTTGCACTTGTTGTGTCCAAAAAGAA -AGGGTCAGATTCAGATTCGAGTTCCTCGGACTCCGATGATAGCAGTTCTACTACGTCTAA -TTTGGGCAGTATCAGCCGTGACAGCATACCGGTAAATTTTGAGTCAACTATCTTCATGGA -ATTTGCTGACGACAGTACTTCAGAGTTACGCCAAGGGCACGGTCTTGGACCCATTTATAT -GGTACGAGACCGACGGGTTCAACCTCACTTTCACAAATGCCACTGTGGGTGGTCTTTCCG -TCATGGGAATCAACTCGAAGTGGGATGACTCTGCCCGGCCGAACGACAAAGTGCCTCCTT -TAAACAAGCCGTTCCCTTACGGGTCACAACCGATCCGCGGAGTCAATCTAGGCGGATGGC -TTTCCATCGAACCGTTCATTACCCCGTCTTTTTTCAGCAAGTGGCCATCCAGCGCTGGTA -TTATCGATGAGTATACCTTGACCAAAAAGTTAGGAAACTCAGCGGCAGGCACGATAGAGA -AACATTATGCCGAATTTGTTTCAGAATCAGATCTGAAGGAAATCGCAGAAGCCGGCCTTG -ATCATGTCCGTATCCCATACTCCTATTGGGCTGTGACCACCTACGACGGAGACCCGTACG -TCCCCAAGATTGCGTGGCGTTATCTACTGCGTGCAATTGAGTGGTGTCGCAAGTATGGAC -TGCGAATCAAACTCGACCTCCACGGACTCCCCGGTAGTCAGAACGGCTGGAACCACAGCG -GCCGACAGGGGAGCGTCAACTGGTTGGAAGGCTCAGACGGTGATCTCAACCGTAAGCGGT -CACTTGAGATTCACGACCAACTCTCGAAGTTCTTTGCCCAAGATCGGTATAAGAACGTGG -TGACAATCTACGGCCTGGCGAACGAGCCGTTGATGCTGAAGCTGCCCGTTGAAAAGGTCC -TAGACTGGACCAAGGAGGCTGCGGAACTCGCCCGCAAGAATGGTATCACTGCCACTCTTG -TCCTGCATGATGGGTTTCTGAATCTCGCCAAGTGGGACACCATGTTCCAATCTCACCCGG -ATAACATGTACCTTGACACCCACCAATACACCACCTTCAACACGGGTGAGATCGTCTTGA -ACCACACCGCCAAGCTCGATGTCATCTGCAATAGTTGGTATCCCATGATCAAGGAGATCA -ACACGACAACTTCAGGGTAAGTTGGACTCGATTTTTCTTGGTGATCTCATATGCTAACTT -ACACTCAGATGGGGACCAACCATTTGCGGTGAATGGTCACAGGCCGACACCGACTGTGCA -CAATATGTGAACAACGTCGGCCGAGGTGCCCGATGGGAAGGGACCTACGATACCAGTTCC -ACAACGGCATATTGCCCAACAGCGGACAAAGGAACCTGCAGCTGCGCAGATGCCAACCAA -GACCCATCTGATTATTCGGCCACCTACAAGAAGTACCTACAGACCTATGCTGAGGCACAG -ATGTCTGCATTCGAAACGGCGATGGGATGGTTCTACTGGACCTGGCGGACGGAGTCGGCT -GCCCAGTGGAGTTATCGCACTGCATGGAAGAATGGGTATATGCCTAGCAAGGCATATAGC -CCTTCGTTCAAGTGCGGAGATACAGTGCCCGACTTTTCCGATGCAGCGGAAAGCTTCTGA -TCTATTACTAAATGTTGCATTTTGCGTTTCATATTTTTGTTTGCATAGATACCATGGATA -AAGACTAGATTCTTACATATCGATCTCAATGCCCCGATCTGGATATTGGATATCTTCCCC -CATCTCAACCTTCTCCGAAGTCAAATCACCCTTCACCTCAAATTCGATAGCATACCATCC -CCCTTATGCCCCAAACCACCCTCCGGCCTCCCACACACCAGGCCGGTAAACAACTTCCTC -CCGCACCCACTGAAGCCTAACACCCCCGAGCAATCGATCCACTCTCTGGAAATCAATCGG -CTTCAAAACCCACCCATCAAAGCCTGAATCCATATACATCTGCCGGTTCTCCTCCAGCAA -GGAAGCAGATACAGCGAAAATGGGAATGCGAGGCGCAGGCGTGGTCCCATTGGCCAGCTC -TTGCTGCTCGTATTCGCGTATCATCTGTGTTGCACCGAGGCCATCGACAATAGGCATCTG -CGAATGGAGGTACGAACGTTAGTACATATATACATTAAATTAGGAAAAGTTTGACTGTTT -CGTGTACGTACTTGTAGATCCATCAACACCGCGTCAAATTGAGTGGGATTTGCTTTGTAG -ACGGATGCGCATTCCTTTCCGTTGCTTGTCATGTGCACTGTGTGGCCAAATTTCTCGAGG -CGTTTTTGGACAATACTGCTGTTGATTGGGTCATCTTCTGCGATGAGGACGTGGAGATTC -CAGTCCATTGTGGGAGTGGTGCTTGGTTGACGAGTTTGGGGCTGACTGGCTTGGGATCTT -GTGGTTGGTGAGCCCGAGGAGGTTTCTTTGATAGGATATTTGGGTGATTCTGACAATTTC -TTGTAATATTGGATTGGGAGGGAAGTGGTTGGTTTATCTGATCGGTTGCTCTGGTCGAGC -TGTTGGCCAGAAAGCGAGAGATTCCTTGATTCGCCGGTTTTCAAGTTTACATCAACGTTC -ACGATGGATTCCCCTGGTCCAAATTCTGAATCTCGACCACATTCGCATGGAATACTACTC -TCTTTCTGATTTGTTATGCAGCGTGAGGATGAAGTGTCTTGCTTTGTGTCTTGTGTTCTT -TCACCTTCCTGCAGATGTTCATTTGTCATACCATGTGAGGACTGCTTTTGACTACTATCT -TCATCTGGAAGAGGAAATGCAAGCCTGATCTTAAAGCAACTGCCCTTGTCTTCCTCCGAC -TTCAAGCTCAGCTGCCCGTTCATATTGCGCACGATTCGGGCTACCAGTGCAAGGCCAAGG -CCAAGCACACTGCCGGTCTCAAACACATTGCCAGTTGCAGTACCCTCATATGACTTGGGA -TTTTGCATATACCCCTTGTTGGACACTTGTTCCAACTCACAGAACAATGTTTCGACGGTC -TCCTGGGACATCCCAGACCCGGTATCGTGTATTGCCACTTCGACACAAATATGATCTGGC -TCTAGGACTTCTGGTACGACGCAAGACTCGATTGTTACTGCCCCAGTGGATGTTTGCTGT -ATAGCATTGCTTATCAAGTTTGTGATTACCTGGCGCACCCGTCGTTGATCTCCGAGGACG -GGAGGTAAGGCTGCATGTTGAACTACCTGCAAACTGACATTTTTCTGTCTTGCCTCTTCC -CAAAATAGGTCTGTTGCTTCCATAAGGGTCTCCGAGAGATCAAATACCTCATCCTTTATA -AGCCGTTGTCCATTTTCAGCATTTGTGAGGTCTAGAAGGTCATTGATGATGTATACCAAC -GACTTGGAAGCCGAGTGAGATCTGGAAATGTGATCTCGTGTTTCTTGATCCAGAGATCCA -TCGAGAACAATTTCCAGATAGTTGATGATTGCATTAAGGGGTGTTCTAAACTCATGCGCA -GAATTGGCGAGTAAAAGCCTCATCAACTGGTTGTTTTGCATTGTAGCCTCCTTCTCTTGC -CATATATCGGTAAAAGTGCGATATAACAAAGACAGAACAGACGCCTTTCCAAAATCTGCT -GATGACCATTCATGCTGTCGACCCGATCGGTCTGAATCAGATGCGAGGCTGTGACTTTCC -AAATTGGCCCGGAAGAAGATTATAAAATCACGACCGTCCACTGAAAGAGGAACGTACAGC -AAACCAGAAAGATGATGGAAACCACGCGAGTAGTTGAGGTCCTGAAAATCAGCAGCAATA -TCTCCAGACCAAAACACTGTGTCGTTCTCCTTGGCTTTCATGTATTCAAACAATGCCAAA -ACTTCCTGCGAGTCAACTGGCTTTCCTAAGATCTTAGCCTCCCCCATGATTGACGCTGCG -GCGTAATCCGCCCCAAAAAGGCTGAGAAGATCGCCGGGTGGAGTCTTGATGGCCTGAGGT -GTGCTCGTTTTACTCGAGGAAATGTTCGGTTTTCGGAGCTGAAACGGCAAGCTGTAGGAG -AGGCGCTCAATGTTACTTGAAACCGCCTTGGCTATAAACCAACAAACTTTCTGGATGAGG -GGATGGAGTCTTCGGCTGCTGTCACAGGACTGACATGATATAAGACCCCAAAGCTTGCCA -AAAACATTGATTCGAATTGACAAGCAAGTCTCGGTAGGATTGTTGGTGTTCGGTGTTGAG -AGCGCGGGTGCTGCAGAGAGATACGTGTTTGACATGTCGAGGGCTCTTTTGTTGGTGGAA -GCTCGATACACAAGCTTTGAAGTGCCCCGACTTCCACAACAAGAGGAGCATATGGTACTC -CGTGTGTATATTTTCTTCAATTCTTCGGGAAATGTTGATTCTGGGAAATGCATGCCTTCG -AACGAAGTTAATCCTGACGAAGTATCCACTGCATCAGCCACCACGATTCCGTTGCGATCA -CTGTCAAAATGATACATGGTTGTTCTATCAAATTGAACAAGACCTTTCAAAGTGCTGAGG -GTATGCTGAACGAGTGCCTCCAGGGTTTGGGCATTTGCAAGTCGCTGGAGTATTCGAGGG -ACGATATTGACAAGCTCCGATGGGTCGGGAACAAGTCTGTTACTCGTGTCGACATCCTTT -CCAGGGGGACTGATATCCTCGAAGAATGCCTCAGAAGTGACGTCCAAGCTCCGACTATCC -AATGCGGTCGTTTGGCTCTCGTGATTCTCAAAATTGGGGTGATCCATTGTCCTGCGACCC -TCAGGCTGGAGCTCGCAGATCACATAGTTCTTATATAATGTGCTCGTATGCATGGTACAC -CAAATACCCTGGGTGTAGTCTTCCAATGATAACATGGAAAAAAAGAAAACCTCGGGCCCA -GAGTCCTCGACTGAGTAGGCATCACTCAAGATGAATTGAACATGAGAAAGGAAGTTGCTG -TGTTGCGAGGATGGCAATATGTCAAAAAATGTTTGAAGCTCGAAGAGTTTGCCAGGAGAA -TGCCCAATGATTTCCTCCGAGTTGTCGCTCACTATCTGCACTTCGAAGTTGCCGTTGTAC -TCCGTCAACACGATGAGTGCACCGAATGACTGAATGAGAATGTCCTCTTCATCCTCGTGC -GAGAAGGATATTCCCTTTGTGATTCTGTGGAAGTCAAGGATCTCGCCACCATTCTGCACT -GTATTCTTCAATTGTGCCAAAATGTCGAGATTGTGGCTGGCGGAAGATTGTCCTTTGAAA -GTCTCGGTCCCACCGCCGCTCTTTTGATCCAGTGGAGTCAGATAGTCGGGCCTATTCGAG -ACACCATCTCCAAAATCCACGCCCTTTCCGGGGAGACTCTGATTCTTCTGCTGACAGCCC -GCTTCTTCATTGGTGATCCCTGCACTCCATGTGTCTGAAAACGGAGATATCGATGTTTCG -AGCTTATCAAGGGAAGATGTTTGCAGAGCAGATGAAGGCGTTGAATCAAACGAGACGACG -GAGCGAACTGGGAACACTCTTTCCGCCGAGTCAGTTCCCAGTTGAGGTAGTTTCAACTTC -GAGGATAGCAGCTCGGCTGCCGTTGATATCTCAGTCTTGTCGTCAGACATGATGCACATG -GTCTTTGGTATGGATAACGATCGTGCGAGAGATTGCCTCCCTCCGGAAGGCTTTTCTTCC -CTCTTGTCATTTTGGTCATCGTCATTCGCGTCATCATAAATGACCGTCCAATCGCCGTTG -TCTCTAATGTCGGATGGCGAAGGCATATTTCAGTCTTCAATCCTTGGCAGAAAGAGCTAT -CGAGAGTAGTTTTGTTCAAGGAAGGTAACCGAGTTAACTCTCATTGGAAATACCAAGTTT -GACAAGAAGATGCTCACGGAAAGGTAGCGAGGTATCCATGTAGTTAAGAAATGCCTAGCA -ACACATTGTGTATCTAGTGTAATTAGAGCAGCACACAGAGCATTGGAAGGAGATCAAACA -AAGTTGAAGGGCATATGGTGTAAGATTGAAAGATCTCCACGAGATGGCATATTTCAACCA -GGGCCATTAAGGAGGTTTCTTCCCCAGCTCATGTTTTCTCCTGAGCTTTCAACCCATCAA -AAGAGCAGCCACCGGAAACAAGCTTCCTCCCTGGGACCATGCCCTCGGTTCCCTTGTCCG -GTTCGCCGGAGACTGCCATGAGAAAACCACTCGCAATCCTTTGGACCAGACTCGGAGATC -CACGGTATGAGCTTCGGCAAATGGTCTGATCAGTCGGATGGCCATGGCTTGAAACAATCC -CCGATGCCTCTCAGGAGTGAGAACCTTTGCGTTAACCCTCGTCCGCTCTCAGAATTTGAG -AGCTTTAGCGAGCTGGTTAATTCCATGTGCTTCCAGTACAACCCAATCATAGATCTTTTG -GACATGACCGGCAATCAAGTTGAAGCGCACGAAGGCGATACTCGGCAAGTCCGGACGAGA -ACCATAGAACCTAACACGGAGGTGCCTGTTCCGGCCCATCGAGAATCTGCTCAGCTCGGA -TGGATAACCCCCCACGCGTTCCACAAAAGATGAACGGTGTGTTCAGCTAGGTAGTGTGTG -TGTGACAAACGCATTGAATGGCGACGTTGAGAAGTGCTTGGATTTCGCAACTGGATTCTT -GATTAGCTTCTGTCCTTCGATATCAAGCATTGGTATGATGCGGGAGTGGACCGGAGTAGA -CCATGGAGATCGAGATCCCCCATGCATGTCAAGTAGAGATATATATCGCACCATTGCAAT -GGGACTCAAGATGCCATCACCCATAGCGACGGTACTTATCCCCTAGGATCGCAGGTATTC -CAATCGGACCACAATGCCGAGCCCCGTTGTGAGATTGCCATATCTAAAACTGGACGGGAA -TCTGACTCGCGGGCGTTGTTTCCGGATACATGGGAGGCGTGAGGCCAGTCATGAAACAAT -CAAGGTTCCACCAATCCTCGGTTTGGAAGAACACTAGCCGAGTATAGGGAGCGGAGGACA -TGGAGACGCGATTTTTGACCTAGAGATATACGACAAGACCCAATGAAATATGATATAGAA -CAAGGTATAGGATAAATCTAGCAGAAATACTTCAAACAAGTTCCCCACGCCAACCGAGAC -TCCCCAAAAGTCCTCGCCGAGTATAAGTTCATAATTTCGTCCAGGTATCATGCTGATTAC -CAGCGTCGGCCATAGACCGAATGCTCATGCCGCGGCCGGTTAGGCTGGACCTCGCTTCTC -TGGTTGCGCGGGGCGGATTGTGATGACTCCCGAGAAGAGGAAGGAGCAGATTCAGTCTCA -TCCCCGGCATTCAGCAGCCGGGTGGGTTGGTTAAAGTCCAGGGCAGTCTGATTATCTGTG -AATTTGGATTCCGCATCACGACCAAAGAGCGCCATCATGTCGCGCATTGTGAGCTTGCCA -GCGCCGGCCTTGCCCTCGATAGTGGCATTAGCCAGTTCGCGCTTGCGGTCTTGCAGGGCA -ACGATGCGTTCCTCGACGGTACCCTTGATGACCATCCTGTATATTTTCACGTCAACAGTT -TGGTTCAGGCGATGAACACGATCGATGGCTTGCTCTTCCACAAACTAGCTTCAGGGTTAG -CGTAATGCGTAATAAACTAAAGATAATTGCAACGTACAGGATTCCAGAAGGGCTCCAAGA -TGACGACGCGACTGGCAGCTGTCAAATTAAGGCCGAGTGCACCGGCACGCAGGCTGCAGA -GCAACACACGTGTGCCACTATTGTTACGAAGCTTTTCGAGACTGGCCTCGCGGTGGTTAT -TGGTCATTCCACCATCGTAGCGAGCGAATCCAATATTCGCACTCTTCAAGAAGGGCTCGA -CCTTATCAAGCATAGATGTAAAGACGGAGAAGACAATGAACTTGAAATCCGGTGCCTCGC -GATTGAGAATCCGCATAAGGTGGCGGATCTTGGTGGATGGAAGCTCTCCATTTTCTCCTT -CATCGTTGTATATGTCCTCCGCTTCGGAATCGCTTTCATCCTCAGAATCACTCAGAATGA -TAGGTTTGCTCTTCCTTCTGGAGGGCGATTCAATTTCGACCTCATCATCGGTTTCATCGT -CCGAGATGAGCCATTCACCACCTCCTTCAGCATTCTCATCGTCCGAACCGCCTGCTTTCC -CGAGGCTGGGAACCTTGCGCTGGTCTTCAGGCACAACCCAATCGCCTTCTTCTTCATCTT -CGTCGTCACTGTCAATTATGACTCGTCGATTTTTGCGGGAACGAGCAAGCTGTGCATCGG -ATTTGCGGGCTGTCGGGGATTCGAGGATATGATGCTCCTTGTGGCTATTCTTCGACTTCT -TTTTCTTTTTTCTATCTGTCCCGATAACATTGGTATTCAAATCGGCTTCACATTCACCAC -ATCGACTGCTACCGGCCTTGATTTCGCCTGGGCTCAGGTCCGTTTGGCAAACATCGCATT -TCTTTGCAACAACGCTCATTGCTCCAAAAAGATCAGCCATACTGTCAAGGTCATCTGGCT -TTGACTGTGTGGACTGGCTGTTTGTACTGCCATTCTGCAGAAGAATATCCTTGTCTTTGG -CCAAATCACCCTTAACCAAGTCTGGGTGGTTACATGCCTGTCGAAGACGCAACAGCAGAA -CCAAAGCCCCAGCGTAATCGACCTTAGCACCGCCCATCATCTTCTCAAGACTATTTTCAG -TTCTCTGTTCCAGACGTTTGTAGAAGTTCAACTCCCCAGGCATGAATTCAGGGTCGACCT -TGATGACCTCACGCTTGGTGATATGGAATCCATTCGAGGACTTCTTTGGCTTGCCGTCTG -GTCCTTCTTCACCAGGCTCCGCAGTGTCGTTCAGTCTGAGGACATCCTTCGTACGCCGTT -TCATAAACGCCTTAAGATATACCTGCAGTCGCTGAATAGCGAGGCCTCCACGGCCATTGT -TCAAAGGCCGACTGATCTGATCTCTCCATGCAGCCAAGTCGTCATAGGGCTTCACCCGAA -GGAATTTGATCAGACTCTGCAATTCATCCAGGTTGTTTTGCAACGGCGTTCCAGTCAAGC -ACCAGCGGTATTGGGCATCTAATGCATACGCTGACTGTGTGGCTTTGGCGTTACGATTCT -TGATTGTATGCGCTTCGTCAAGGATGATACGGTACCAATGAACAGCGAAAATACCCGACT -TTTTGGCTTTGTCTGATGCAGAAGAGTTGAACTCGGATGTAAGAGTTCCGTATGTTGTGA -TCACAACATCGTATTTGTCTAATGTCGTCGCAGTCTTCGCGCGTGTTGTACCGTGATAGA -GGCAAACCCGCAGCTTGTGAGACGGCTCAACTTTGTCCTTGATTTCTGCCTCCCACTGCT -TGATGAGAGCAAGGGGCGCAATTACCAAAGTGGAGGAAATGCAGTTTGGGGGCAGCTTAG -TCGTCTTGCCCTCAGATTCGGTGTTTTCGCTCTCTTCGTGGTCGGATTTGGGGTTTTTGA -GCAGTAAAGCAATCGTTTGAACTGTTTTGCCCAAACCCATATCATCTGCAAGGATTCCGC -CTTTGGGAAAGATTCCTTTTGAAGTCTTTCGGCCAGTTTCTTTGTCACACATCCATCTCA -CACCTTCAATTTGATGGGGGAGGAGGGTCACTCTTAAGCCTTCGACGACTCCTTCATCTT -CATCGTCGGAAtcctcctcctcctcttcttcttcctcctccgctgcctcgtcgtcTTTAT -TATGCTTGGCAATTCCATCGTCTTGATCAACCTTGGCAGACGAATCGTGTTCGGACTTGG -CATCATCATGCTTCGGATCGTTGACCGTGACACCTTGCAGTTGGGCTGCAAGATCATCAA -GGTCGTCAATCGCCTCCTCCTTCTTCTTGTCCACGACGACTTCATCTTTCTTCTTGTCTA -ATGACTTCTTGGACTTCTTTTTCCCACCCTTCTTCTTCTTGCCTTTGCGCTTCGGCTCAG -CCTTGTCCTCATCGTCCTCGAATGCACCTTCAAGCAGTGCTTTGATGTTCTCGTTTGCCA -TAGCCGTATCCACATAGCCATTCATATCCATGGCTCCGAAGCTTGGTGTTGCATGGGTCG -ATGCCCTTGGAACAGGGTCAGTGTCATCGCGTGTGAGATCGATGAATTTCTGTATGTTCT -TCGATGCCCCTGCCCTCGGTGGGACAGGTTTGGATTGATAGATAGGCCTAGGAGCCCTAG -GGGTGGTCCAGGTGACATTTTCAGGACGCTTTATACTGACTGCACCATGATGCAATCCTC -CGCGAGGGTTGTTATAGGCCGATGGTGCCACCGGCTTGAATGGATCCCATGGCTCGCTTC -GCTTCATCATAGGCGTAGCGACTGGGGCACCCGAAGTGCCCGATGGACGATAAGCGTGAG -GCGTAGCACTGGACTGACCATGGTGGCGCGATTGTGGTTGGTGTTGCGGCTGCGGTCGAG -GGGCGGGTCGATGATGTTCGGCACGAGATTGATTCGGTTTCGGAATGTTGAAAAGGTGGT -TTGATCTGTGTTGTGGGGGGCCGATCGGGGAGGGTTTTCCAAATTCAGGTCCCTGGATAT -GCGCGGGATGAGACTGACTGGTTGGCTTTGCCCCTGGAGTGAGGGGCTCCAATGGAGCGT -TGCTTTCTCTAGCCATCTTGGTTTAACGAGCGGTGTGGTTTGGGCAATATCTTGTCAGAT -GGGTGAATAGGCTGGATTGTTGAATTCACCAAATGACCAAATTCGAAAATGATCCGGTCT -TTTGGCATTAATTAAGCGCGAAGGACAATCTCGGACCACTGGAATTTGGCGTTAGTTGTG -TTCTTGCGGACTCGACAACTTTGGGGGTTTGGAGGCGGAAGATTTCAAGATCTCGTTCAT -TCATTCAAGCGGCGTTGATAGGAGAGACTAACCTCTGAATCATGCCTTGAATCCGAAGCC -CTGGGGTGTTTTGTCGATGGTGGGGGTTTGCAAACATCAAAGAAAAGGGCCGTTATCAAA -AGGTTATCGTTGTGTTTGCGGTCGGCCTAGCAACGGATGATGCCTCAGGCAGAAATGAAC -AAATACGTATGAAATGGCCCTTCCAGTCCCAGTAATCCATACCTCTCGTCCATGAAAATT -GACTTGAATTCTACAGGTGCTTTAACCCACAATCTGCCACAATCCGCACCCCCAGCAGGC -ATATCTTTTCGGACCACTACCTGCCGAGAGGCAGAGCACACTTTATGAAAACTGTTGAGA -GGCAGATACCCGAATCATGCACGATACATCACAGCCCTCATAATCATCTTTATCACATTT -CTAACCGACCTGAATATGCGAGCTACCATGAGCTACCATTGAATCTCTTCAACGTACGGA -GTACTGACCGCTATGAACCAATTCCCGCTTGAACTGTCTGAGTTTTGGATCGAGGTTATC -ATATGGAAGAAGGGCTCGCGGCACTTAAATAGACCTCAAAAGCCCACAATCGTGGTTTTC -AGCAGGGCATATCTTGAGTCTGCACTATTGTGTTCAACGAATGGGCTGAATGTTCGAGCG -ACCTAGAAGCCGCTACAAGCGAACTGACATTCCGCTCATCGTCCAGAGGGTGCAAGACAG -CCCCCGATGTGAAAACCCCTTCAAGACCGCGTTTCGGAAAAATACGGTTTGGATTGGTCG -GCTTGTTACACAATGGGCGTTTGCGTTTCGTGCGGCTCTAGTCGGGGATATCGCCTATAG -TGCTAGGGTATCCTTACCGTCGGAGGATTACTATCGATCAATGAGGCATATAGACCAAAC -CTCACCCTTGACCATATCTAGCCTACGGCTATGATAAAACCGAGGAAGCCTGTAGTCCAA -TGACCTGTTAGATGCTTCTGCCTCTACGAGGCTTGGCTCAGAAGCCACGTATATATAGTT -CTATACGAGTTTTGGGATAATAGTACGGTGCCAGGGTCATTGTGCCTTCGGACATTCCGC -GAACGGAGTCTGAATGGCCTCGTTGATCATGAGATTGAGTACCTATCTAATCATCAATTC -TACCTGTGGACTAAACACCAGGGAGGAGATTGTTAAAGAACAAGAAAAAGTATCTTCTTC -TTTTTGTGTTTTTGCTGGAGGAATGGCAAAGTGAAAGGGAGGGCAGGAGGTGAATGCACC -CTTTTATTCTCAATTATCTTTCCACATAGACGGAACACTATAGATAAATATCAGCCGGCA -GCCAGTTGAAAGCCACTATTCTTACTTACGAAACTTCAATCACAGCACATCTCCCACCCT -GAACAACACCCGTCGCCTCCTTGATAGCAGGATCAAGCTCACTTTGCGTTGTGACTGTCG -CACCCCACGCCTTACCAGCGGCAGCGGCGATTCCTGGATAATCGGGAGAAGGATCAAACG -AGATATTGAGATCTGCACTTAATCAGCCACTGTGCCAAAAAGAATTTAGTTCAGATCTAG -CTGACCTCGTCTGTTTGACTTCGATGATAACCCGTCCTTGTGGACCAGAAGTGCAGACAC -CTTTGGTGCGTTCCATCCGCCATTGTTCAACACAATGAGGAGGAACGGGATGTCGTAACG -CCTAGCGATCCAATAGACGCTCTCCATCTGAGAGAACAGATAAACTCCGTCACCGACGCT -GGGCGTCCATTAGATACCATAGAGATTTTGCACACTTGTTAGCACTTACATCGCACATAT -AAACGAACCGGGCTTGCCTAGCTTCACGCCTAGTGCTGCACCACCTCCCCATCCCAGACC -ACCTGCGCCGCTGGCAATTAGGCTTCCTGGCTATAATTCAATTAGCATCGAGTCTGCAGC -TTGGGGTGAGTACATACCTTGGTCAAATTCAAATGATGAATGAGATTTCCTGCATTGGTT -ACAGCCTCCAAAACATAAATAGTGTCTTCGGGGAGAGAATCACGCAGTCTTGATGCGAGG -TACGGCACACTGACCACATCATCGGAAGATGGTGACTCAAGTGCTACCAAGGTTTCCCTC -CAGGTTTTGTAGTTCCTTGACCGCTCCTCAAACTCCAGGGTATACGTAGCCCCATCGATG -ACTTGTTTATCTAAGAAAGCATTGATTTGCCCGAGCGCAATTCCACAGCCGACCTTGAGC -TTACGAGTGGCGTTGATTGAAAATACTGGTACATAATTAATATTAGCAAGAGAACAGAAG -CGGGCAGCCCTGTGGTCGATCTGGGAACTTACGTGGCATCTGTTGCTTCAACGGATCGAC -ATCTAGATGGAATATCTTGGTCCCTACAACGCCTGTTAGAGAAACATTGAAAGATTGCTT -GCTGGGTACTCTTACCACTCCGAGGTTTGCCAGCAGTAGGAATCCAAGGCACATCACAAT -CGAGAATCAGAATGACGTCGGCTTCAAGAACTTCTGGATGGGTAGTAACAGTAACGCCGC -GATATGCTTCATGGTCACTCCGCAAGGAAACATCAGAGCCGACCATCTCCACGACAGTGA -TCGGAAGTTTGTCACAAAGCTCAGCTAAGAGGGCTGGTGCATTGCGGTCTCGACCAAGAT -AACCTGTGATGACAAGTGGTCGTTTTGCGCCCATGAGCGACTTGGCAATCAATTCCACAT -CTATGCATGTCAGTAAGGTGAAGTTATAGGTTAAATAAATACCACAATACCTTGCTCTGG -CAAAGCACTTGGAGTGACTGAGCTGAGAATTTCCCCGTCTAGGCATTCTTTTCCCACACG -CTGCATAGCATATTTGGTCAGCAGTTATTTTAGTTTATCTCAAAGACTGCGAATACTTAC -TTCCTCAAGTACCTCTCTCGCAGCCATTAGATATGCCGGGCCTTTGGGGTCAGATGTAGC -AAATTGCAAAGCTCGCGAGACCATCTGCTTGATATTGCGTCCAGTCTTGATTTCACCTGT -ATATCTGCAGTATTGTCGAACAATTGCTGCTTGGTCGGGTACGTCTTGCAGCCAATGAAT -GAATCTGTTAGATTGTCAGTGAACTCCATGTAAGTATTCTCTTTGGGGCTTGCTGACTCT -GTCCTGGAACCCAACAGTTCACCATTCTGGGTAAAAGGACTTAAGCCAGCAAAACATAGG -ACGGGAACTCTGCTGACGGAGGCGTTATGTATGGACTGACCCATAGCGAGAGTGCCGCAG -TCAACGTGCACAATAACACATTGCTAATGAGGAGTCAGCCAAATCGATTGATACGATAGA -CATAGTCACCCACCGGAACTCCTGTAGCTTGAGCATATCCCAGTGCAGCTGAAAGCGCAA -CTAGCTCAGAAGGGCAGGTGATGATATTTGGGAAATTGGTTTTGTTTTCTGCCACAACTC -AGCCCTGGTGTTTTGGGATAATATCTCCGGTGCTCACCTTGCTTGGCCTTGATCATTGCT -TCCAACATCGCGGGATGGTCGCTGCCCCTATTGAACAGGTCAAAACACATCAAAATGATG -ATGATTATAGGGACTCACAGGTTGACGAAGCAATGGGTGATGCCGGCCTCAGCTGTGAGG -ATGGTCAGCCTTTCGAACTGAGCTTTTCTAATGAATATCGTGGAGTTTGGATCACATACA -CAGCGCTTCGAAGAAATTCTCTGCTGCAGATGTCAGGCTGCCATCTTTGACAATCTCGTC -TTCATCGTCACTCTCTAATTTCTTCCGTTTTTGGGGAACTGTGGTTGTTGACTTCATTTC -TTGCAATTGAGTTGGCATCGGTAACGGGATGATTTCAATGTCAACCCCTTGGAAGGTCAG -GGCTCTATATACCCCGCATAGCCACACAGCTCATTCCGCTAATTATGGGGCTGTGAACCG -ATTTCCGCCCATCACATACATACAAGGAGCCGACCTCGGGAGGGATGATCGGCAGATTCA -CCGGCATTTTCGTGTCTCGGAATCCTCACGATTGGTCTATAAATATCGAAATTGCACCAT -AGATTCTCCTAGCTTCTATAAAAATCCTGGCCTTATCCTGCTCATCGCCAGTCTATGCAA -AAGCTATCAATTCAGATCTCACCAATAACACCCTTCAGGATCTCAGGGATAATAGCACCC -GCATCTCCTTGAAAGAACCAGTCACCGGAATGAAGCTCACCCGGTGTATCCTTGGGGTCC -ATATTGCACACTGCAACTCGGGCGCCCTGCGAACGAGCTTCGTGGACATACCCCGCAGCT -GGCCAGACCCTCGCACTGGTGCCAATGACCAAGCACAGATCAATTGGACCCGCCGCGATC -CACTGATCAACAGCTGTGAGTGTTTTCTCCGGTAGAGATTCCCCAAACCATACGACTCCC -GGTCGGAGCAAGCCATCTTTGCATTTGGGACATTGAGGTAAGTCCTCTTTTGGCACTGTG -GCGATCGGATTACTGGGATCGGATATGTCGACCTCGTTGCTTGTGTCTTCCAGCGCTTCT -GCAATCGCCCTCGATGCTTCCTCGCCAGTCTTGTCAGTCGCAGAGGGGCAAAGGCCTGAA -CTCTGTTGGGGGATCGCGAGGGCGGGCACGATAGGGTCCGTGTAGTTTTCTTCGCTGTAG -TCACAGTAGAAGTTGGTGCATTTTACGTCGAACAGAGTGCCGTGAAGAAGATGCAATTGC -TTCGAGGGATGGTTCGCTCGCTGGGAGAGGCCTGGAAGAAACGCGAGGGACTTATTAGTC -AAGAAAACAAAGCAAACATTTATTGGACATTAGTACCATCAACATTTTGGCTCAATGTAA -TGAAATCTTCATTCCTTCGCGATAGCTCCGCCAAGGCGTAGTGTGCTTTGTTGGGGTCGG -CTTGCAAGGCCATGTGTCGTCGATAACTGTAGAACTGCCACACGAGACCGGGATTCTCAG -CAAATGCTTCTGGAGTCGCGAGGGAGGTTGCATCGTGCGCACGCCAGAGTCCACCTGCGC -CACGGAAAGTGGGTAGGCCTGATGATGCTGAGATTCCAGCCCCCAGAAGAGCAACAATGC -GCTTGGAGCCTTTTAAATATTCAGTAAATGATTGAAGGTCCGCAGCAGGCACAACGCTCT -CTTCAGATGAAGCCATGACTTTGGAGTCACACCAAGAAATGAGATTTGAAGGTGGAAGAG -CTGAGGCTTATGGCAGGGGCCGAAGTGGAGATGCGGGGAATTTATAACTACCCCATTAAC -ACCTGATCTATAAGGCGCCAGGACAAGTCTTTGCTCAAACATCTCCCATTGGAGTATCGG -TTGACTTACTATCAGATGTTTCTTCCCTTTTCATTTTAAATGGTAGAAGCCTGACAGTTT -AGCAGGGGCTATCTTCCGTTGATTGCTAGTATTATATGCTTTGAATATATTCTTCTAGGC -AGATATATCATTCCAACACAAGATATACAGTGACAGCCATAAAATTGGCAGTGAGCCTGA -TATTGGAGCAATTTTAGAAGCGATGCTGGCAAAATAAAGCTAAGAAGCATAATCCATTGC -CAATTGACTGTGTAGCAATATTTGCAGCCACACCACCGAGAACTCCTGCTCCTGCAGTTT -TCCGACGACAAAAACGCTATTGGCTGCGACATTGCCTATGGAACTTGAAATTCCAGAAGC -GATAGACCCTATCCTCGGCAGCATGTTAACACAACTCCACAATAGATGAGTATCATGTAA -AATGGGCGTTATTGCTTAGTGGTTGTAAATTCCTGCAGCAACACCACCAACTTCAAATCC -TCGGCCTGGTAGGCCTGGGGCTGTTACTAGTACTGGAGCAGCAGTAACAGCGATTCCCCA -ACGCCAGCTACCGCACAGGCAGAAAAGTGGGATTGACGAGAACCCAGATGCCTGCATATT -GTATTTGAGTGATTATTTGCAAGTTTATGATGTATGAGAATGAGCGACTTACCTGGACTG -TGAAGTTGAAAGCTTGTTCGATGGTAGAAGCCATGATCCAGCGGAGGTGTGCTACGTGAC -TAAAGCAGAGCAAGAAGTGATTGATGAATATTACGACGAAGGAGAGCACTATGCCTACCA -TAAGTCACTTATATAGTTTCAAGAGGAGAAAGAAACAAAGGCCTGGAAAACTTCAGAAGG -GATAACAAAGGCTCAGCAGGCTTGCTTGATGAGCTATATTTTAGAGCAATAGAACATAAA -TCTCGAAAGCATACGCTGTTCATTGCAAGCTTCCGAGGTCTAATATCACGATCTTGAAAA -AATCACATGGCAATTGACACACCACACATATTATCATGCTGAGAGTATTGACATAAAGTC -TCGTATGATCCGATTGTTTCTTATATCAATTTTCGAGGATATTGAGCTAGCTGAAGCTGA -GGAAAGACAGATACCATCAACAAGCGCATGGGTAGCAACCATATGGAGGGAATTTCCAAT -ACTGTGGCTCTATGTACCGCTTTACTATATTCATGCTCTACATATTTTGAATTATTACAA -TTTTTGGTCATGTCATTCGATGTAGGCTCTGACGAACTGGTCTATAGATCGCCGTTGAAC -GGCCCTGCACGATACGTTTACCTTTAGTAGACGGACTCTAGTTTGTTCTAGTTTGGAAAT -TTTGGTTTCCATCCTTTCTGCTTTGTCTGAACTGTGTCACTTTTGGCCCCTAAGCTAAAA -GCCGAATCTTTAATTTTGGTGGTGCAAGCTTGACTTTGCCAAACGCGCGCTGGCGTGTAA -CATTCGCGCCAATATCATCCAACACAACTCTATCCATCGACCTTGATTTTCGCTCTGTTT -CAGTTCAGATATGGCGCCTTCAAACCTGGGTGACGAGTCCGATGGCAAGAAACTTGCATT -GGTTAAGAAGAATCGACTTAAAGATGACGACTTGAAAAACATCAATTCACCTCCAAACCC -CCGCCCTAGGCCACTCACTCCACCTTTCCCCGACAACAAAGACCTCAAACTTCAAGCCTC -ACCAACTAGTCTTGGATGGCGGTTATCACTTTCAAGACGGTACAAGCAACAGTCTACCTT -CGACCAAGCCCAATCTCCTCTATTTAAATGTCTACCCGCCGAAGTCCGGCTTCTTATCTG -GGAACACTATCTTTGTAGCCGCATGCTGCACATTGTTCGACCAAACCAGCGCAAATGGAG -AAAATCCCATAAACAAATTGTCGGGATACTATGCAGCGAGCCTCGCAATTTCTGCCCATG -CAGTCATCATTGCTGGGGACCCCTTGCCCGTCGCCCTGTGGGAAACTGCATCACACCCAT -GGACCACGGATCATATTATCACGAGAATTCTGAGTGGAGATTTGACACGAGAAGAGTTGA -TTTTGTGCCACTTTTACAAACTTGTCGAAGGGTGTGAGTAGCTTCCATGTATGTTCAAAT -GTAACCAACTAACAAGAGTGTTAGATACTCCGAGACTATTGATATCATCTTTCAAAAGAG -CACCTTTCTCTTCAACCACACAGATACAATTATCGATTTTGCGCATACACTCCTACCACA -GCGGGTGAGCATGATCCGCACTTTGCAGCTCAGCTTTCCAGATCCCGGTGGCCCAAGTTG -GAATAAATGCTGTCAGGTTCTGGCTTTGAAGTTGCCTGGTCTGAAGAACTTGATCATCCA -TTTGTATCCTCATGTCACGAAGCGGCTGGATGATTGGCTCATACCGCTGCATCAGATCCA -ACATCCCACTATCTTTGACGTCTTGCTCATCAAGCCATGGTATCTGGATCCACAGTGGGA -AAAGTCAACTGGGCTTGTCGATGCGCCTTTTCAATTTTCGATTGCTGATGTCGAGAGACG -CGGTTTTTTTGAAAACTATTGAGTAGACCCTGCTTTAAAGGTCGGTATTCCCTGAGGATA -GATAGACATAGTTCGCAAACATAGATGGGCGAGCCATCGAGCTTGATACACTAGGTTCAT -ATTCCATTCGAAGAACGAATGTTTTGTGTTGCATATTTCATTCTTCTTTTAGTAGCCATC -GATATCCCCCAGTCAATTTGGGCTCCAAACATTTCACTGTCCTCATCTCCCCTTAGAAAC -ACTTGCTTACGTACAGTTCAAGGCCCCCAAACTTGAAGATGATTTCAATTTTTTTCAACC -AATCTAGATCCCAAGGTCCACTCATATGTTGGCCATTGGATCTCTGTCTCTCAAACTCCA -TCCAGATTGACTCAATCCCATCACTGAATCAAAGCCGTACCGGCAATAGTCATTTGCACC -GTCAAGATCCATTGGTCCTGGCGGAGGTACTTGTGTAACCTATTTATGATGATTAGAGAT -GGTTTTGTTTCATTCCTTGCACACAAAGGGATATCGTTACTGTTTTGTAGGTACCCTGGC -ACGACAGAGGCACTGTTTGATATCCGACTTGCTGTAATCTTCTGTCTGGAGCCGGGCATC -CACGTCCGTTTCCTACTGATTCTGGAATATTGCTTGATATCTGCTGGAATGTCTCGTTGT -TCGGATTTTGGATCTGAGCCTGCAAGTCAGGTGCACCAGTTTCGCCACGAAATCTGATAG -TCTTCATAGGGCGGGTTGATGGGTCTCCGTTAAAGCCTTTTCGTTTCTTTGTCTGAGCAT -GGGGTGGTTGCATATCGTTTTGTTGAGATTGCATGTTACCTTGTATTCCGCTTCCTCCAG -CCAACCCTACACTGAATAAATCATTATCCTGTTCACTACCTGACGGTTTTTGAGCCCTGA -ATCGCGTCGTGCCAATCACCGGTGAGGTCGCAGGATCGAGTGAAGGTCGCAGAGAAGGAC -CCAGAATACGGGTTTCCAGATCTTGTTTGATCCAGTTCCTGCTCAAAGTTTCTTCAATTC -GTAGTATTCCCTCCGGGACATCTAGTGTCCCGTCCTGGAAGGCATCCATCAACCTCATCT -TCGCTGTAGCTTTGGACTTTGTCGGCGGCAGTCCAAAATGGAGAAGTTGGGCTTCGTACC -AGTGAGCCGGATTGTCTGGTCTGGTTCTTCTGGCTCCTAAGGGTGCACTGAAAATTTCAT -CTAGCTCGGACAATGTCGCGCGTGAGTAGACATGGCCGCAAGAGGAAGTGGCGTAGAATG -TGTCTATGTTGTATGACCACTGGCCATGATAGACAGGTGCCATGACGTGTTGTGTGATAA -GTAAAAGGAAGGATACTGGTTGGTTCAAAGGTGAAATGAAGACAGAATGCTCAATTACTC -CGTCAGCAATCATGCATACATTGGTATCCGAATATATCAGATTGATGTGGTATACACTTG -CCGTTTGAGCGAGTACGTGGTGTGGCATCTACGCAACGGATTTCCTTATTTACTTGACTC -TCCCTTCCGCTTTTTTGCTCAGCCTTTGCTGCCTTCTATATGGAACCGAGTAGGACCATA -ATGTGAGGGAATAGATTCAAATTTCCAAGAGCCTTTTATATCTCACATCGCATGCATATC -GATGTCATTACAATGGACTTTTGATCACAACCCCTTCTCTCCAATCTCACACGCTGGAAT -TTCAAGAGCCCATTCAGACCTCAAGACCTCTGCTATAGTGATTCTAGCCCCCATATCTGG -ATGTAACATCGCGTAAACTACATTTCGGTGATGCTTCAAGATCCGACAGGTTAGATGTTA -TTCTCATAATCAGCCTCGCTAGAAGGAATCTATAGATGTAACTTACTAATCTCAGAGTCT -GTATGGGGCCATAGCCATCTTCCTCCAGACGACAGCGGAGATAATCAGCAAAGGGTTCCT -CGGGATGAATCGTATGAGCGCTGAGCCATAGGAGGCGGCCCGACCGAAGTAACGTGCAGA -TTATGCCGCATGCCCAGACATCCCCAGCTTTGAAATCGTATACATCTCGATTCTCATGGC -TTCGACGGCGAGTGCCACGACGGGGAAACCTCTCGGGAGGAAGGTAAGGAATACTGGAAT -ACGAGGCCGTTTGCCGCATGCACAGAGTTGGCACATATTTGGAATCCGGAGCTGGGTCAG -GGTTTGGTACCGAAAAGGACAACTCGGGACTCTCGTCTTTTGTGAGTGTGACTATTTCTC -GAATTGCTTCTCGGTCTTCGCCAAACCCACCCAGCTTCACTGAGCCTCTGGCTGTAAAGA -GTATATGCTCTGGCCTGATATCACCATGGGCAATGTCATTTTCGTGTAGAAAACCCACTG -CGCGCAAGACTTGAATGGCTAAACAGTCCAACTCTTCAGTCGAAGGTTTGTTTTCTGGGC -GCCTTTTTCGTGCAATAAAACAGTGTAGGTCTCCGCCAGCATAGTACGGCATGACAATAC -AGATGTTCTGCTTTTGATTATAGAGAATGTCGATGACTGGGACGATATTGGGATGACATA -GGGGGCCATTTTGACTTTGATCTTGAAGATTGTCCGGAGAGAGGGTGATTTGGGTATGGC -GAAACACTTTGACTGCGTAGAGTTCCAAGATTGCATTGGCAGATTTTCGCCTCAATATCC -CCCCCAGAATATGGCTTTGCGGGGATTTTGCTGTGGGCATGATTGGATCAACAACCAGTA -CTTTCTTTTTATATAATTCAATAGTATTGGTGGTATCATAGTGGATGATTCTGTTGAATT -TCCCATACTTTCCCGCGAACGTAGTTTGCGATGCCCCCTCTGCTTGGGTGTTGGATGTAA -TTCTTTGCGCCTGATCTGACTCGTGCGGACTGATAGCCGTCTCTTGTGGGTGAGACCGAC -CCTCACATTCAGGTGTTGACTGCCCCCACCCAGGTATAATGTTGCTTAGTCCTCCAAGAA -TGTGAGGGATCTCCCAGTCCCTCTCTCTGTCACTCTCTCGTATTGTATGGTGAGTATGAG -ATGGCAAGGGGCGTGGCGGTGATTTGACATGCGGGAAGGGGCTATGTAGATGAGATTTTT -GCATTCTCTGTTTCTCAATGGCTATTAGTATCTCCCATATGTTTCTCATAGACAAAGATT -GACGAAAACTAAGAAAGCCCATGAAAGCTCCCAGGGCTGTTAGTAGATGACATAATCATG -AATCATGCAACCTGAGGATGTAGAGATATACGCCAGTTGATTGGTTTACATGCGTAAATA -ACTTGGCTCTAAGCTATGGAGCATTAGATTTATTTATTTACTATGAAGTATTCATTCCTC -AACAAATATGTACTTATATAAAAGTATGCAGGTATACTCTACTAATCCGTTACTCCGTTA -CTCCGTAGTTATGTAGATGGTGTACATTTCAGCCCCACGTTCAGGCTAAACTTAGAGCGC -GTTAAGAATTGCCCTCTTCCAACTTCACTCCATCCCATCCTTGCAGGTACTCTGCATACA -TCATGGATTCATCTTCCACTCGCTTTCCACAGGGCTCCCCAACTTCAAACCAGAGTACAC -TCAGTCATCGTCCAAGTCCGCGACCCCAAAGCTCAGACACGCCGCCCAACACCTCAGCCA -ACGCTCCGATTCCAGATGACGAGCATGGCGCTGATCTACCGATGAACATGTCCGCATCGG -TCATGTTGACCAACTTACCTCGTGATGCGCACCAGGCACTTGCGGACGTGGAAAGCATTG -ATTCCGGGAAAGGTATGACCCCTCCTTTTCCAAGCTCTTGGATCAGACTGCGATGTCAGC -AAATAGTAGAGTTTAACTAACGATTGTCAAGTCACTGTGCGCTTTCAGCCTCTCCCTTCC -GCGCCGATTTTGAAAAACCGGGTGTTCAAGGTCAGCGCTTCGCAGAAATTCGAAACGGTT -GTCAAGTTTCTGCGCAAGAAGCTGGACTGCAAAGAAACCGATTCTGTTTTTTGTTATGTG -AATAGTGTGTTTGCACCCGGGTTAGATGAAGGCATGGGCGGATTATGGAGGGTGAGTTTC -CCATTCTACCTGCACATTCGACAAATTGAAGGCTAAGTTTGTGATAGTGTTTCAAGACGG -ACGATCAATTGATCGTCGCTTATTCAATGACGCCAGCTTTCGGCTGAGCAGGCTATGAAC -AATCAGCGCGAACTGCTGAATGATGATATGTACAAACGATGATAATGCTATATACAGAAC -ATGCAAGAAATGACCGAGAAACAAAGATGACAACCAGACGCATCATGATTCCCGGAAGAC -CTCGTCCACCGTCTTCCGAAATTCATCGAGCTCATCCTTCGCAATGGGATACGTGTATTC -CTCCGAAGGGAACGCTCGACTCTTGACCTCATCTCGATACAGCGAGATGCCACGGGCAGC -CTCATCCCAAACATCCGCATACTGCTTTACAAACTTTGGCACAAACCGACCAGGTTGGAA -ATTCCCAGACATGTCAACCTGCACAAGAACCTGCCCGGAGCAGCCATTTCCAGAGCCGAT -ACCGATAGTCGGGATCCGCAGTTTCTGCGTGATGATCGACGCAACCTCCGACGGCATAGC -TTCAAGCACAAGCATGAACACGCCAGCCTCTTGCATAGCAAGCGCATCCCGGAGCAGCCT -CATCGCGCTGGCGGCTGTTTTGCCCTGTACACGGAAGCCGCCGAGGGCATTCTGACGCTG -CGGTGTAAGACCAACATGGCCTATCACCGGAACGCCAGCTTGTGCGATTCGCTTGATTGT -GGGTGCGAGTTCCTCGCCGCCTTCGAGCTTGACGCCGTGTACGCGGCCTTCCTTAACCAG -GCGCAAGGCTGACTCGATTGCTTGCTCTGCGCACACTTCATATGACCCCATCGGCATATC -TCCGATCTGGAATTGGAGTATGGTTGTTAGTTGTGTGGCGCATGCCAGTCAAGGAATACC -GGTAACTCACAATGAATGCGGCTTTGGTAGCTCGCGCTACGCTCCGACAATGGAGGATCA -TCTCTTCGATAACGACCTCCGTCGTGTTTTCTAGTCCAAGAGCTACCATCCCTAAACTAT -CCCCGACCAAGACCATATCCATCCCCGACGCATCGGCCACATGAGCACTGGGAAAGTCAT -GCGCCGTGAGCATTGTGATTGGCTCCCCCTTTTTGTGCATCTTCTGCAGCGTCTGGATAG -TAACTTTCTTTCGCGCGTTTCCATTTCCACCCGGTGTTGGCGCGGAGTAAGAACTGTGTC -GGGTTTGGGCGAGGGAGCGTGGCGTCGGTACAAGTGAGGATAGGGCAGGACGGCGGTCGG -CGCGAAGGGTGTGTGTGGCTTGAGTTCGAGTAGCGGGAACCACTCTCCTCCTCAGTAGCG -ATGAAAAAGCGAGTGCTGTGCGCCGCAGGGCAGAGCCTGGCGCGGCCATTGTGTGGTGTG -ATTGTGGAAAAAAAAAGTGGCGCGACGCACACGGTGACGCACAGTATGAAAATTCGACAC -TTCTCCGTGGTGTGAGTATATATCCCAATATGTCCCAGAATGCAATGCTATATGCAGTAG -TTGTATTGTTTTCGAGCTGTGAAAGTGGCAATAACAAAGAAATAACGATGACCTCAGCGG -CGAATCGCAGCTTTTCAAATGCGCGAATGTAGAAGACGAAGAGGTCTTGAGGTCTTATAT -CAAGGAGTATGTCCGGAGCTGCGGCTTTGTTCAAATATCCTTCTGTCCGAATGAACGTGC -TGGGTTATGCGGCGTAGATGGGAGCATCAAAAATTCTTGAAGTCGAGGAGCTTCTTTACT -GCCGCTGAATTACGGGGCCGAGTGCCGAATAGGGTTGATTTTAGCAGCCATCGAGCCAAC -CAAGCCCCGATTTTGGCTGGCAGAAATTTGACCAACGAGACACAACCGGGGCCATCTCCG -CCCCGGGGGACCCCTCTACAGCTGGAGCCTAGGCATCATTCTCACAAAATTTCTAAACTC -ATTTTATCTTGATCTATGCAGTGTAGTGGAGCAAAGTTATTAGTCCTAGAAAATATATTG -CCGTGAAACTTGCGCGATTTGTCTATCTGAGTGGATCTAGATTAACGAGTATAAGCCTAT -GTATACAGGCGAGAATAAAAGCGGTGACTGAATTGACGCGATGGCTGCTAAAACTTCCCG -TAGTCAGGAATTTTGCGGGGTTACCAAGTGCTTGGACTTGAGCATAATGTAAACCCTCGG -ACCTGGTGCTAATGAATATATGTCAGTTTGACAATTGTCCACTTGTCAACATCCTCAAGG -CCTTGATTGATGTGTTGACATCGACTTTGGAAGCTCGATGTGTTTTACTTCACTTTCACC -GCCTGCAGTTTACGTCCCTTGGAGGTGATTACCGCATTAGGCTAAGTGAGGGGTCTAGGA -CTTCTACTTCTATAAGCTAATCTTTACCAAGAATCCATTCAGCAGAGATTTGCAATTAGC -ATGATACAAAATGCATACAGATCATCGAGTCAAATAAATGCTTATTCAGGTAAGTAATGT -CACGAGAAAGGAAAAGTCTCAATAGACTCGGCCGCCTGCGCTGGGTAAGAAACCACTGGC -ATCGGGAAACAAGACACCACCAATGAAAAGACACAAGGAAAAGCTGATCAAGGTAGAAAA -CATGGGCCCGTAGGACCAAAAAGGAGGATAATGATGTGTTAGAATGAAATGCAATACAAG -GTTAGGAACCCTTGAGTTCATGCTAAATCGTCCGTTATACCTTCAGTATATCGCAACCGG -CCACTATTAAAGGCCGTAAAGCACAAGCTGATTGCATGTGAGGCAAGCAGCGTCGACGAT -GAGTTTTATTGGAGCAAGATGATGAATGCAATTATCCAAAATTGCCGTCGGCGGTTGATG -GAGGTCGGTGAACGAGATCGAAGGTGATGTGATGCTGGTTTAAAAATCGGATATTCAATA -CAATTGAAAGAAGTGAAAATGCTGCAAAGGAGTGGACGACCTCCATCGAGGTCAAGGCAA -GCAGACCAGACTTCAGGAAGGGACCGGGTACTGGTACTGTTGATGCGTAAAAACAGAAAG -AAAAAATGAAAACGCCACCATGCATATGTCGATGCCGAGGGTATGTTTATGCAAATGTTT -TGAGGTGCAAGTCGGGCATGACGGAAATAAAAATTGCAACGTGGCAGAAGAAAGATAACT -TCAAAATAAAAAGCTCGACCTGCACTAGGATGCAGACTGGTTGCATTAAAGAAAAGGCCG -AGGTGCAAAGGAAATCATGGAATGCTGTGAGGAACAGCATATCTATTCATCATATTCATA -GTCGGCCTTGAGGGGACGCTGAATTTCAGCGAGCAATGCATCGAGCTCAGCCCAATTCGT -CGAGTTCGCCGCAAACCGGCGATGTTTTCGAGTTTGAGTGTGCTAAGAGAGGTCAGTATT -CATGTGAATTTGAGAAACGGGACAACTTCAATGATACCTCTTCAAAATCGTCAAACTTGT -CCCTGCAATTTTCGCAATAGCCGGGCTTGGGATCTCTGCGACGTTCGGGTTTCTTTTGTT -CATCTTTGTCCTCCCTTTGCCGCTTTGAGCCAGCCACCTCAGATTGCTTTGCTTCATCCT -CATTGTGAGGAATGTTGGACTTTCCAGGAGGATTGACGTTTGTCTTCATCGGAACTGTCG -CAAACCCATCTCCGCGACGCTGAGGAGCAGCCATAGATCCCACTGCGAATCCACCAGTGC -CCTTCTCAAGAACCTTCCGCTTGAGACCATGAACCTCCTTGCTGAGCCCAGCTTTTGCGC -CAGGTGCTGCTGCAGTAGACGAGATCATCTGTGAGCGAATTGCGGATGTCATATTCGACG -GCTGAACACCCGAGGCAGCTGGCTCATGGCCAGCATAAAAAGGCCCGGTGCGAGGGAAGT -GAGGAGGAATGAAGCTTTCAGAACTCTTTCGTTCATGAGTTTCATCAAGTGATGGCTTCT -GAATTTCGTTTTGAACTGGAGGTGAAACACGCTCTTCTTGTTTTGTCAGCGGCTGGGAAT -GTGCGATCTCTGTGGCCACGGATTCGGGAGCTTGGGCTGGCACAGCAGCTTTTGTCTTAG -CATCACGAGTTTCGTCAGCCCTGAGAGAGGCAGCCTTTTTTTCTTTGTGGAGTTGACGCA -ATCGGTGTCGGTCGTACTCCTTCCTGCTCGGGGGTTCATCTATAAAAGGGCACTTTCCAA -GGGGAGCACTTCGGAACTGTGGCCAAGATCCATCCTGGCGTCTCGCGACTCGGGAGTATT -CTCGAACCATCACTGGTTTGGTCTTTTCATTCATGTCATGGATGTAGACGAATGGTCCCT -TGAAAATAATCATTTCCATTGAGGTCAAGGGATCGCGATCAGACGGTCCGTTGAGAAGTT -CCTGGCGAAGAACCCGCGAAAGGTCGTTTTCGCCCCTACCGTTGGCGACATTACCACCAG -CAGCCTTGTTCCGAGCAGCTTGTTCATATTGAGCACCGATGTCACTGTCATTGATTGTAG -TGATCATACGCTGAAGCTTTTCCAATGCCCAGATTTTCATGCCCATCTGCCGAGCCCGTT -GCAAAATGTCCATAGTGACTTCACGACGCGTCTCGGTTTTCAATGAAGTATGCAGGTGAG -TCTCGCCGTTCCTCTCCAGAAGAGAAGGGTTGACAGTTTGGACGTTGCTATCGATGTTGC -TAGCTTCTGCAGTTGACTCCGGCTCTGTTAGAGTGGCTGATTCCGGGGGGATAGCACGCG -AGGTGACAACGTGGGTCACTAGACGCGAGAAAAACTTCTCCTCGCGCTGTATTGCAGTGG -GTTAGAGATAATTTCAATGGAAGGTGATCAATCACTGAATGGAGACATGAAGTGCAACCA -AGACCAGCTGAATGCTCTGTAGGGCGGTATAACTTACAGCTCCAAGAGCAATTACCTGGC -GATTGAACCGCCGTCTGACATCCTCAGGGATGCTATCGAAGTAGAACACAAATTGAGGGA -ACGCCTTCCTATAGTGCCGCTGCCATTGCCGGATGGAATCCATAGTATCTGCCGGGGCCT -TCTCAACTCTGACATGCTTGACTTGCGGTTGGCGTTCTTTCTCCCTAGCTGCAACAAGCT -TCTTCTCAAATGCGCTAGGATTGCCGTTGTTCGATCGTCGCGCAAAGAGTTTCGAGTCTG -TGTTCGTGGCTTGGCAGGTGGCAGAGCGAGGCTCGTACTCCATATTCTCCATCGCTTGTC -TCTTCGGCGGTGGCTGTCCGTAGGGGATGTCCACTCGAGTAGGGCGAGCTCGTTTAAGCA -CTAGCTCTGCCCTGCGTGGGGAATTTGTTGCATTCGGGACGTTTGCGAGTGGACGGCGAG -TAGCCATGGTCAAGGAGGTTCGTGGCGAGGGAGGTACGAAGACAGTGGCCATTTGAAAAC -GGGTTCCCAAGGCTGAGAACGCGTGCAGATGTTTTAGAGTGACAAATAGTCAGATATCAA -AGAGGGGTACTCGGTGAAGAGCACGCGTACTAACACGTCTTTTCAGGGTTAGTGGAGATC -AGAGAATCAACATTGTATCCCGTAAGGCAGTAGCAAGTAGCCCAAGTAGAGAAGAAGAGA -TCAAAGGGCTCGAAGGTGGGATGGGAAGAGAAAGTCGAGGTGGAAGACCTGGAAAGAAAG -AAGAGCAAAGCAAGTCGTGTGGTGTGGAGCTCAGTTTGTTTTGGGCGGAGGCCTGACAAC -CAACGGCAGAGCGCGAAGCATGAACATCAACATTGCATTCTTTGACCGCGACGAGATTGT -TTTTTCGCTTTTCTTCAAAACATCATTCTCGTTCTCATTCTTTGCTATTTGATCCGATAT -TGTCCTTCTTAACATCCGACTAAAAAAAACCAATAAATGATCTCCAACCTCCCCAGGACC -AGCCTTCCAAACAGCCCCCAAACAAACAGACCAGCCACCACAAAGTACCTAGTGAGGCAA -GGATGGTTCAGACATGTAGACTATACTTCCATGGaacaaaataaaataaaataagataaa -aAGGCTCAAACATGATAGTTCAAGTCATTATGCTATTGTACATGGGAATTGGGGGATTCG -ACGGCTACCATATACGTCACAGCTAACTACGTAGTGCCTCAATATCCTCAACCTAGTCTA -AGTTAGTAACGATCAAACATGGGCGACACTACAGTCTTGAACATACCTCCTTGACAGCTT -CGGGGGTTAAGACAATCGGGTGTTCCTCTCCGACACAAACACTATCCTCGATACGAATTC -CGATCCCACGGAAGTGGGCGGGCCATCGCTCATCGTCAGGAACATAGATGCCCCTGGCAT -GCATTAGTTTGTTTCAGATCTGGAGGGGGGGAAGAGGCTGGGGACACACGGCTCAATTGT -TATACATTGACCTGCTTTCAGATCATAGCCTCTGGAATATCCAGGGCAGTCATGGACATC -CAGGCCGACATAGTGGCCAAGATGATGGGGAAAGAGAACGTTCAGTGCCTAGTCGGAGTC -AGCATACACCGGCGTGAAAGGACTCTGAATACTTACATTGCCAGAAAGATCAAAGCCAAG -TGACTTGAGCTCATCTTTCAATCCAGTCTCAGCGATGCCATGGAGTTTGTCCAAAGAAAG -ATTGGAGGTCTCGCGGCACAGAGCTAGACAGCGCCGATGCACGTTGAGCACAGCATTGTA -TAGATCCCGCTGGGGATCTGAAAACTTGCCATTGACAGGCCAGGTTCTAGTAATGTCGGA -AATATAGCTTCCCCATTCCTGTAGTTTGGTCAGTATCCTGGAACTGTAAAGACTAAGCTC -AGAGTAAAAAACTTACACCACCGCCATCAACGAGAACCAAATCTCCATCCCTAGAAGTAT -GTGTAAGCAGAGTCTCACGGTAGTTTAGAGAATCCACACGAACTTCAATACATCGTCATT -TCTAGTGTAATGGATAGCCAAGGCGTTCTAGGTTGGGTCAGTATCGAGCTCAGTTTAATA -GAGGTCGGTAAATTTACCTGGCCACCCGCGACAACCGGGACAAATGCGTTGGTATCACAG -CCATTCACCTTAAACTGGTACTCCAGGAAAGAATAGAGATCTTTCTCCCGAGTGAAATCC -TGTCGCATAGCTTCGGTGAAAGCTCTCCCTGACGCTTGTCCCAAGCGGCGCATCTGTACA -ACCTCGTTCTCACTCTTGAATACTCTCATTTCGTTAAGCAAATGCTTCAAGGGCTTGACC -CTGCGGTTATCGATAACTTTCTTCAGCTTTTCCGACGCGACTGTTGGCCCATACAGATAA -CGATTTAAGGATGATCTTGAGCCATCAAATGCGGGGATGTCAGAGTAAACCTCCGTCGCT -CCCTGGACGATGCTTGGCAAAATATCTCCAATGCGTTCAATGTTTCCTGTCTGTTTGAGA -CGTTAACTTGATGATCTGTCGGATCCTTAAGCCAATCTCTCTCGCACCTCATCCGCATTG -AAGACGTCCAAAGCAGCCTGGGTGCCAGAGCGCGCCCCGTCCCAAAGTTCTGCTCTGGGA -TCCTTTTCCCGCACATATAAATGGAAGGTATGATTATCGCCAGAGCCGTCGTTCCCGATG -ATCGCCAGTGCATTCGGTTCGTTGAATCCTATCCAACGCGCAAACGCTTAGCGGGTATCC -TCCGCAGCATCACCGGTGTACAAATCGAACTTACCAGTCAGGTAAAAGAAATTTGAATCC -TGGCGGTACTCATTGAAGATGCCTTGGGCACGGTACTTGACCTCCGAGGCAGCCAGCACC -GCAATTGCGCCTTTAGGTAGTTTGTTGGCGAGTTTCGATCGACGTTGCGCATATTCAAGG -GCGCTGATTCCAGGCGTCACTAAACAAAAGTCTATTAGATAAATGAACAAGTCTGAAACA -CTGATAATGACTCACGTTCGCCCGGACTAAGGATATGGGGGTGCGTCTCATGCAACGGCT -GGCCAAATTTGAGTTCCGCGGCGGCCACGGAGGTAGCGTAACTGCGAGTCAGAGGGTTGC -ACCTAGGCAATTGGCGCAATGAATGACTCAAAGGCCTCGCTGAACGCAAATTGATAGTGC -ATTGGCGCACAAGAGACCTGCATGCCGCCATGCTTCAAAGTCTCAAAAAATAAGTGATAA -GAGTAAACCGTTGGAGTTATCGCCAGGTTCCGTGGATGATCCAGTCGGCGGTGTGGACCA -ATCACCCCGACTGCGTTTCCCATCGGAGACCAGTTCTATTTGTCCACACCCCCTGTCATT -TCATGCTTACTTCCATGATTCTGTGCGTTTGGTATCAATATGCCCTTGTCTAGCCTTGTG -GAGAACACTTGTCCGGATGGACCTGCCAAAAAGAGCGTTTGGAGGCATGCCCTAGCTCAT -CGCATCATTTATCACGACTCGCTATCTATCTACTTAAATGCGTAGGTTGCTTTCTTCTTG -CTTTTCTTCACTTCCAACTACACTCTCCAGATCGTCATATACATTATGGGACGAACCTTG -ACCTACCCCAAGAGGCTTTCCAACACCGTCAACCGGTACAACCACCGAGGTACAAAAATA -ACTACAGAGATACATCGAAGCCAACTAACACTATATTCTAGCCACATATGACTTGGGTCA -AATCCATTCAATCATCAATGACTCCCAAGTTCTCCATGTATCCTTCAACCCAGGGCCAGA -TGACCCATTCCCCGCAATCCTACCAATGATTGGGCAAATGGGATCATTCGATTTTCCCTC -CGCAAGCATCGATGAGCCTTTAGAGTGTTACCTCCATGGCTATGTCAGCTCACGAATCAT -GAACTTGGCCCGCAGCTGCAGTGACGGCGATGGCCTCCCCATCTGCGTGGCAACAAGCAA -AATAGACGGCCTCGTCCTGTCGCTCACGCCCAACTCGCACAGCTACAATTACCGCTCTGC -CATCCTACACGGCTACGCAACCCTCGTTACCAACGAGGAAGAGAAACTCTGGGCCATGAA -GCTGATCACAAACTCCGTTCTCGCGGACCGCTGGGATCACTCGCGTGTGCCGCCGGACCG -TGCTGAGATGCAGTCGACTGTCATCCTGAAGGTCAAGATTGTGGATGGCAGCGGCAAGAT -CCGTGATGGTGGTGTCTCGGATGAGCGCAAGGACACCAGTAATGAGCAGGTCACTAGCAG -TGTATGGACTGGTGTAGTCCCGGTCTGGGAAACGTTTGGAACCCCGATTGCAAGCGGTGA -TGGTAGTGTCGCTGAAGTGCCAGAGTATATCAAATCGTATATCGCTAGTAAAAACAGTCA -GAACCAAGCTCTGGCGGAGGGTGCAGTGAAAGTTCAGCTGCCGCCCCAGGAGCAGCACTG -AGGATTAGAAAAAGATCAGATTGGTTCAATTTATGACACGAGTTTATGTTTTCTTGTCTC -CCACAGCAGCCTCCCGAGGCCTTCTTATGTTTGTTTACCCTGCCTCCAGCATGTATATTG -GAAAGTAGGTTTTAAGAATACTCAATTGCGAGTAGGTTACATTGATGATTATAGAGACAT -GACTGAGATATATTTAAGTAAGATTGTTGAGTACAGATACACATTGTACCATAAACTATC -ATCTTCCACACAAGTAAATTACCAATGAGCAGCCACGCTTCCCCACAGTTCAGGGCGATC -TTCATGACCCACACGTCCATTATCATCTTCAGAGTCAACATTCCGACCAGTGCTCCGTCC -CGCCGCAGCCAATCTCTTAGCGACCTCATTCGCCTCGAAGAGACTGACAAGTCTCCTCTT -TGCAGGCCGCTGCTGAGCCCGTTCACGGCAGACCCGGAACATAGTTTTCTCTTTGCTGTA -ATTCGACCTACTACCGGGGATGAAGAATCGGGGCTCACGCTGAACGGCATTCTGCCATTC -AGGGTGGCTATGCGACGATTCCGCAGGACTATTTGGCGGCCCAGAGGTACTCTTATTGAC -CCAAGCGTCGACTGCCATTTCAAGAGGAGTTGTACCAGTGGCATTCTCCCTATTGAGCAT -GTCAGGACGACGATCGAGGAGTTTTCCAAGCACTTGGGGGAACCCCTTCTTGACAGCATT -ATGCACTGGTGTGTTTCCAGCTTCGTCGAAGAGCTCGAGATGCTTTTGATCCGTGGAGTT -AGCTAGGTCCAGAACGAGGTTCGTCATGGAGGTTATCATTTCATCATCAGTCTGGCGACT -GTTTGGGGGTCGAGGAATGGCTGGATCATCTGGGGTAAAGTGGGGATAGGCGTCAAGCCA -TCGTGCAAATGGCGTCCTTGAGCCATCACCTGCGCGCTCCATGAGCATTGTTGGGATGAG -TTGTTCATCCAGTAGATGTCGTAGTTTGATGAATCGGCTGGAGTTCTTGCAGACTCGACC -ATTATTTGATACAAGAATCAAGTGGAGGAGATTGCGCGCCTCCTTGTCACGAACGGCCTG -GTTGGCTCCGGCTTCTACCAAAATGCGGGCAAAGCGGACCTTGTGAAGGGACATGGCCCA -GGCCAGAGGCGTTAGTCCTTCCGTTGATCGGACCTCCAAGCACTCAGGGTAATGATTGAC -AAGATACTGAACGAGACGAACAGTCTCGTCACTTGGTTCAGACATGATTGCGCAGTGGAG -AACGAGGTTATCTTTTACATGGTCAGCTTTATTTGTCAAGGTTCCTGAATGGGTTCGACA -CTCACTACTCGCTTGTAGCCAATTTGAGAGTGTTCCTTCGAGTCCGAGCTTGGACTGAGT -CAATCGTTTGACTCTCTCATCATCATTGTTTGCATTGAGATACTCCAAGTAATAGCGAGC -AGGCGCGGTACCGAGGAAAAACTCTGTAGCGGCCAGATTACCTTGACGAGCAGCGATAAG -GAGAGGAGGACGCTGTGTGATGGCTTTCATCGGTTGCTCCCGACCAGCGCTAGCCCAATC -AGCGCGCTTGGTGCCACGAATTGACAACCCAGGGTAGTACTGAGGCTCTTTCTTCTCTTC -GACGCCGCAATCTTCGCTCAATTTTGTGAGTGGAAGGCCTGCTCCGGCACTTTGAATCAT -TTTGGCAAGGCAATCGGTACGTCCAAGCAAAATGGCACTTTGGAACACGTCGGAAGTAAC -CGAGAACTCACACTTGCCTGAGAGGTTGGTGCGAGCAAGATCCAGTCCGGTCTTGAGAAG -GAATTCCAATAGAGTAATGTTGTTTGTGTAAACAGCGTATTTTAAAAAGCTGTCAACATT -GATATGCTTCTCCCTCTTGTTCCAAGACCTTTCGTCTTCGTATTCGTCCAAGAAGAGGAA -GGCATTGAATCGTGCCCCGAAAGCAGCAAGAGGGGACGTATTGCTCTTAACCGCACTGGT -AACTTCGCCAATATTTTCGTATGTGAAGGTATCATCGACAATGTCACTGACAATGTTGAG -ATGCTCGTTCTCCGCATCTGAGTCGTCTGAGTCATCGGAGCAGTCGACATCTATCTCAAA -GCGCGTCCGACCCTGAGGTTCCTTCACCCTGTACTGCACATGGAGAATTTGAAGGATGGC -CCTAGCGACGGCGAAATGGCCACGCATGATGGAAATCGAGAGAGCAGACAGGTTCTGCTT -GTCGGTCACAGCAATCTCAAGTGGTGGTTGGTCATTGAGGGATCCCCACTTGCCCAGTGT -CAAGGACTTTATGGTATCAATATCGCCATTCCAAGCTGCTTCGAAACTGTGAGAACATTC -GTTGTCAGTAGGAGAATCTTAGGCTGTGGAGTTAAGGAGGGGACACATACAGTTGTAGGT -ATCCGTCGCGGACATTGTCAGTAGTAGCCGGTACATTGAACTTGAATTTGATTTCCCAAG -GCTTGGTGGCTTGCTGTGTCTTCTCAGTATAGAATTGGGCTGGTGGATTCACAGGAACTT -TCTGTGGAGGATAAAGCTCGTCCCATGTCTTTGCATCCTTTTGGAGGAGAATGAATTCCA -GCTTCTCGTAGTCGCGAATCAACTCCGCAATAAATGCCTTCTTCTCAACCAAGCCAGGAG -GATCAGCAGGGTCGTTTTGGCGATCACTGGCTTCCTTTTCAGCCTCTCGGTTCGCCTTAC -GCACGTTGCGGAGCAGATCCTTTGCAGTGAACCTTTTGTAACTACCAGATTGGAACTCCG -ACAGATATGTCTCATCGTTCGGGTCAAGTGGAGTTACAATGGAATGATGATAGTTTTGGT -CCTTTCGCTCGACCAAAAATTCACGGAGAGTTTTCAAGGTTTGGCGCATAACATCCAACA -CCGTTTGACCCTTGTCTCGATATCTCGCTTTGAGTTGATTAGTAGGACTGTTGGGGTTAA -CCCCACGATTGAGCAGGTCGATGGCGACCAGAGGGAGATCATTTTGAATAGCCAGTATGA -TCGGCTGCTTCACACTGGACTGTGCGTCTTCTTCAATATTGCCGTAGTAGGTCTGGGTTC -CTCTATATTCTGTGATAGCCTTCACAAGGTCGCCGAGGTCAATCTCTGGGCTAGCACCTG -TGTTCAACAGCTTGATTGCTGCGGGAGCGTTCTTCGCAGTGAGAGCAGCCATCAATACTG -AGGAGAAGTCTGGCGTCCAGTGAGAGCCCTGAACTGCTAAGTGATTGATTGCGCGCTTTA -CAGCCGGCTCGTCATGCTGCATGTAAATATCAAGAATATCAAACTTGTTGGAATGGGCAA -GATAATATAGTGGCGTCTTCTGCTTGAGGTCTGCCTGTGCTGGTGAGGCGCCGAGATCGA -GAAGGGTCTGTGACATCTCCTTTGCTTTCTCGAATGGGAGGGCATGGATGAGAACCAAGT -TGAGAATTGCAGCAAAGGGTTTTTTGTGGTGCTCGTTCAGAATTTTCACTGGCATCAAGA -TATCAGCACCGAAAGACGTGACAAGCTCTCTGACGGCTTCAGTGTGACCATGGAGAATTG -CGAGATGCAGAGGAGCAGTTCGACTATCCCATGCAACAACATTGATGTCGTAGACATCCG -GCTCAAGCTCGTTCTCCTCAAGCGTATCGTACGTGGTCAGGTCTTCCTTCTCATCAGCAT -CGACTTTGACGTAAGAAGCAGAGGTGTGAGACATGACATCAGCTTGGTCAATCATATCAA -TATCTTCGTCCTCATCTGTTTTGGGCTTGGCCTCTGAGCTTTTACTCTTTTTCAGAGCAT -CCTGCTTATTAGCCTCTTCTTCCTCATTCTCGTTGCTCTTTGTGAGTAGAATTCGAATGA -TCTCGACGTGCCCCCGGGCGGCGGCAAGATGGAGTGCGGTCTTTCCATCAGCCATACGGG -GGATCATGCGAGCGCCGCCGTCAACAAGGCATTGTACAACTTCAGGGGTCGAGCACATGC -ACGCCAGTTGGAGTGGTGTTCGCCCAGTGTAGTCACGACGATCCGGGTTGCTGCCCTCCT -CTGCAAGGAACGCTTTGACAGAGTCAACATCGTTTGCCACGATTGCCTGGCAGAGTCTCT -CGGTGTGGCCAACGTAAGCCATTTCTATCGTGTTTGTATTAGTGTGTAGTTCACTGATGA -AGAAAATCCTGAACAAGTTCACTCACCCGTCCAATCAGTCTCAGTGATTGGATTGAAGCT -TCCGATTTCCTGACGACCGGGATCATCTTTGATGAAATTAACCTGACCAGAGATATAAAT -CTTGGCCTCCTCTTCGGCGACCTTGATTTCTTCCTCGGTAACGGGCTTGGGGCAGAAACC -ACAGCAGTCGCCAATGACATGTTCTGCTTCGCCGAAGAATGCTGGATGACGATGAAGATA -AACTGTACGGTCCTTGGGCTGGTTCCATTGGGCGTTCAGCAAGAGATCTTTAGTGTAAAG -CAGCTTCTCAATATTCCTGGCATTGAATCTCCGGCCGTAAGGAATAGTCTATCCGATGGT -CAGTAAGATCAAACGGCAGCAAGTCATTTGCAGTGAAGGTACGAACGAAAGTATGGTAAT -CAGACACTTGGTCTTGTTCTTGCCATTCAGGCACTTCATCTTCCCAGTCATCCTTGATGT -TTCCATGCAACATTTTTCCATGACGACGCTGCAGGTACACATCTAGAGGAGGGCGACCTC -GTTCCTCGCGCCTCTTTTGAAGATAGTTATCCCGGTCTTGAGACCGAGGAAGCTTTTCCA -AAACCAAAAGTCTTGCAAGACCTTCGGTTCTGGTGAAGCTCCTCTCGAAGATGGTCTTTT -CAAAAGTTAGTTCCACCGAAGCTTAATGCATTCAACCGAGGACTCAATTCTCACCGGGTC -AATCTTCGATCTGTCAAGGGCAGGCCAGAAAACCTCGAATCCACGGCGTGAATACTTGGA -GAGACGGTTCTCATATGATGGTGATCGTCGGGTTAGGTCAATCTGGTTTGTCTGGGTGCT -GTATGCGGCGATTGCACGTGGAGATGTATAGACTTGATGACCATCATAAGCCGCGCACGA -AACATCCACGTCAAAGCCGGTGAGAATCTCAGCTACTGAACGGTAGATGCGAAGCACGAT -TTGCACATGGCGGTTTGGATACTGAGAAGCAATCGTAATTGTGTTCTTGGTTCGAATGGT -TGTGGTCTCATAGAGGATGGTGTTCTTAATCTTATCTTCGATATGCATGATCTTCTCGAG -GGCTTGCTCCTCATCCAGACCATATAGAAAGAGATCCACATCTGACGCGGGAGCAAACTC -TTCATGATAGTACTTGCGAAGGCCACGCTTTGAGTTGCGATACTTCTCTGGCACGGGCAA -AAGGGAGGTCACTACCGCGCTACCAGCAACCACAACGTTGGACCAGTCGATTTCACTTAA -TGAGCCCTCTGTGAAGAGCGCAAAGTTGTTCTGAAACTCATTCAGCGAGGGTACCACGGC -TGCCACACCATTTTCTTTCCGTTCTTTTGCATTCAGAGGCAAAAGATACTTCTCCGCTTG -CTCTGGCGTTTCTGAGGCAAGGTCCCGGGCTCGAATAGAAAGATCTGTGGAACCGTTCTG -GTCGTACAAGGGCACAACATTGACATAGTTATCCCGGACTGATGGGTGATCTGGATCTTG -TGCAAATGTTTTGCGCAAAACAGCATCATACTCGTTATATGGCTGAACCAAGTCTCTCAT -GGGCTTTTCCGGATTAGCGTTGACATACTTCACGAAGTCATTGTGCTTCACTGGGAGGGA -GGGCAAAGAAGTCATATTTTCAACTGTAAAAATTCAATAATGCTGTTGTAATGATGTATC -GATTGAATAGCAAAGTCAGGAGAGATCCCCTGAGGTTTATATAGAGAAGCCCAATGTTTA -GGCTCATATCGTTGATATCCACATAGCGTTAGCGTCCAGGCATCCTTAGGCAACCTTAGG -CAACAGCTCCACTATTTCATTTTTCAACCAATCGCCTCCTTTCCGTACAACCAATCAACG -AGATCCTGCTATCCGCTCGCTTCGTTTGCTCCCTAAATCTTGATTAATGAGACGTCAGTT -CACATGTCGACTTATTTCTGAGTTGGAAGCCTGGTGCAGATCATACACAAATATCAGGCT -TCTTGAGACGTTAAAGTGGATCTAATTTGTGTGTATGTATGAACAAATCCATTTATCGGA -GTTGTGCTTTTTGATGTGTTATCCCATAGTACTCAGCTAGTTTTATATATACACAACTTG -TTGTAAAACCACAACAGTCTTTTAGCAAATTTTGAAATAACAATTTATGTTTTATATTGT -AGTACTGCAAATCAGTCTCAGGAATTTAGAGGCTCGCAATCAATCCTAGAAGCTAGAAGG -GGCAAAGTCGTTAGTGGTCGCTATGCACTCCGATATACAGCGTACTACCAAGGCGGCACT -CTTACATGAAATCTATATTTTCCGATTATTGTGTGTCTATCGCATGCCTTCGTGCAAGCT -AATTGTGATCCCACGAGCCTACCTAAGTTGGTGGGATTTAAACTTCTTAGGTATTGCAAA -GAACTACAATTGGCTCAGACTCCCACAGAAGGCGCTTGGGATTTAAATTGCCATTTGAAA -TGCCGGCCTTTCGGGTGTGCTGATATAGGTGAGAAAGAAGGATGAAAAAAAAGGATGGAC -ACGGCATATATCAAGGCATTGTTGTAAACACCTACCGGTTTAAACTATACAGTCAAGTCG -CTAGGGCTTGTCCCATAGGATGAGCCTGTGGACCGATGGGATCTATATCCCTCTTCCTGC -TCAGAAAGGTCTTGGATTTGCTTATTCCCAAAGTCATTGTATGACCAGGATAGATATTGG -GTAGTGTTTCGAGGGGAGTGTTATTTAAATAAGTGGGCAACACTCCTTTCTTCCATATTC -AAAAGCTACATCCTATACACCCTATGCAGCTCACCCACGATCTTGGGGACCATAACTTAC -CAAGTGACGGTATGCCCAATATTGCCGATAGCTCTAGAGGGCAAGAGAAGATAGCCCATC -ATGTATACTAGTCACATCGGGTCCACGTGACCCAGCCAACCCCAACTGGAGGATACTCAC -AAATGCTTGAGACCAAAAGAAGAGAAATAAAGTCTAACCCAGTGATCAAAACAGACAGCT -ATTCTGTCGCTTAGGCCCATCTAAGCCTTCCACACTTCTTGTCGTACATGTACGTCGGAG -ATACAATTAAAGGAGTAGGATAGCAATTAGCAGCTCGCAGATGTATAATTAAAGTTCAAT -GATTCTTTTTGACCGATACTCACCATATAAAGTTCTATTATCATGAAACTATGTCGAGAT -AACCTTGTGATATGAGAGTAGCAAGTTGCATCAGGGCATGGCCCCACATATTTCCCCACC -TATACTGAGCGCACCAGCCATCGTCATGGCGCTGGGCCTCGGCGCTGAGTCATCGACTCA -TCATGCCCGATCTAACCTAGGAATCATTCACCTGAAACCTCATTCTCCTCTCCCTGTTGT -CCTCAGGTCAGAGAAGTGAAGCATTCTCCGATATTTGTTGTCCGCATCCCTTTCCTATCC -CTTACTTATTGACCATTGAACTACTTCTCTTTCAAAGTTGGTTGCCCCGCCCATTAGTTC -CCCGCGCCGATTCCGGGAGTAGCTGCTCTTGTCAATCATAGCGTTCCGACCCTGCTTGGT -CTGCTTTCTCTGGGAGAGACCCTTTTTGACAGCATTTTCCAGATCAGAATTTGCCTTTGC -TATCCAAACCGCCAACATGAGTAGCCCAAAGCGCAGAATCGAGACAGATGTGAGTAATGT -TGCGGATAGTCCGATAAGCCCCCAGCGTCGCCGGAGATACTAATAACGGTTGTGTTTGAT -TATACAGGTCATGAAGTATGCTCCGCAAGCCCAATAATATCTACAATTCACAGCTACTAA -CCTTTATCTAGGATGTACGACCGATAATCCCCTTACCCTTTCCGCCGCTTTTCCCAACCT -CTTTGGTTGGCCCTCGGCGTGTATATTTGTTTGCCGGGATCTGATCTGACGGTACCTTCA -GGTTGATGAGTGACTATGAGGTGACTCTGGTCAATGATAACAGTAAGTTGTGACCCTGTT -TTGGCTATTGGTCTATACACTGACTTTGAAAATTCAGTGTATGTACTTCGTACACCCCAC -GATCATCCCTTGGACCATTTGAAGCTTTCCGATACCAATTAATACCCACAGGCAAGAATT -CTATGTTCGATTCAAGGGCCCGGAAGAGAGTATGTCTCGCCTATGACCAACCGGTACTGT -ATCATACTGACTTTCTCAGCTCCTTTCACCGGAGGCCTCTGGAAAATCCACGTCGAGTTG -CCTGACCAGTACCCATACAAAAGCCCGAGCATTGGATTTGTCAACAGAATCTTCCACCCA -AATATTGATGAGCTGTGAGTCATTGTTGGTCCGACATGTTTGGAGAGTTAGAGATTAATA -CTATCCTTCGCTATAGGTCCGGCTCGGTTTGCCTCGATGTGATCAATCAAACCTGGTCGC -CAATGTACGACATGCTCAATATCTTTGAGGTCTTCCTTCCCCAGCTCCTCCGTTACCCCA -ACCCTTCGGATCCTCTCAACGGGGAGGCAGCAGCGCTGATGATGCGGGAGCCCAAGGCAT -ACGAAGCAAAGGTCAAAGGTGTGTCACAATTTTTAAAAAAGAATCATTCGTCATAATGCC -ATACTGACAGCTTATCACCAGAATATGTTGCCAAATACGCTAGCAAGGAGGCTGTGGACG -AAGCAGGCGAAGACACCGAGTCTGAAGACGAGCTCAGCTCCGCAGGAAGCTACGAATCTG -GCGGCGAAGAGCCTGCAGGTACGATGGACGATGTGTGATGGTCTACAGCGCGAAAGTTTT -CGCCGCGCTTGGGATGACCCGGCACGAACATATTGCATCACTCTACTTTTTTCAATAATC -ATCACGGTGCTGGATATCAGCGGGCGTTTTCTTTTCTGACATAGGGGAGTTGTCATGTAT -CCATTTTCTTTTTACCCTCAATGTCTATCTTTATCTTGTTCTACTCTGCTTATTCTTATC -TATTTCGGTCTACCCAAGAGGTATTTGCGATAATTACCGGCGGAGTCATGGGTTGAATCA -TTGATACCAGATCTCTCATAGGTATGGGGTATGATATTCTGTTCTAAAGCTTGTGATTAT -CCTGGAACCGTATATAAGCTTGAAGTACTTTGCATGTAGACCATTTCACCCAAAGCCGAA -CTTCTATACGGGTTGACTCGTCTACGATGCCTATCTTCCCTAGAAGAATTGGCAGCTTCA -GCATCTACGTACATTGTCCTGATTTACAGCAACCCAATTCGTAACATATTTCAAATTGAG -TGTCTATGCGAAAATAAAAGATGAAAACCGGTACTCAAAAAGAAAAATACATATACAAGC -AGTGTACGACTGACAATATGATATGAAGGCAAAGAAACAAACTCAGAGACAAAATACGCC -AACGGGAAAGAAACCAAAGATATGATGGAGAGAGAACAATTCTGATAACAAAAGAGAAAG -TCTGCGCCATTGGGTATTAGCAGACCAAGAATGGAGGGAAAAGAAGGAATGGCTATGTAA -GTTCAAAAAAAAAACAGTCGGCCTGCGATCCATGCTAGTAACAAAAATCCCTCAAACAAC -AGCGCGGCGCAAGAAAAGAATGAAATGACGGGTCTTCCAGTACCCTTATCTGCATCCGTT -GATGCAATCCTCATAGCCCGCACCCTCTTTTGTCGTGTACAGCGTCATAGTCAATGTGCA -GTGATGCATGCCCAAACGCCTGTTCTTAGGTGACGTTCAGAACCCCAAACACATCATGGC -TGGTCTTAGCATCTGTGTTGATATCGGTGCGAGACCGTTTCTTATTTGGGTTTATGACAA -CTTCACCCCAATGTTTTGGGACGGTGCCTGAAGCGCCGGAGGTGGGACGGCTTGATGGTG -ATGTGCGCGCAGTAGAACCCACGGGTGTTGTTCTATCAGTTTTTTGTGATGGCACGGGAG -ACGCAGATATCGGTGTGTGGAGAGTTGCTTGGGGTTGGAGGTGTGGTTGTGGTTGGAGGG -ACCAGATATCAATGACGGAGCTGAAAGAGAGATGGTTAGCATAAATTCAGTTCCATCGGA -ACCTAGTTTGCCATGTATCAGAACAGAGTAAAGTAGTAGAGTATACTCACTCAGATGTCC -GCTGGCAAATAGATTCCAGCGCCTCCCTCTCACGTCTCACGTATTCCGAATCCGGCTGGC -TTGCCTGATGAGCATGATTCAGTGCTCCGTAGCCATACCCGGCTTGATAAGGATCTTCTT -CGAGTAGACGCGCAGATTCAGGCTGGAAGAAAGAAAGTTAGACATTGTTTTCTCCGGTAA -ACATCACACGCATTGCGTATCAGACTGAGGTAATGCGCGTTGAACATGTGATGGTGAACC -TAGCTCCAGACTCGGAACTCACCTCAGGAGATTCGTGGCGATTCCCGCCGAGGCAGGAGG -CACAGATACCCATTTATGAGGATCGTCAACAGGAATTCAGAACCGGAGATGTCCTGGGCT -GCACGTTTGCGCTACGCGATGGGCACGAGGCATTAATCACCCCAATAGTTGAGATGCGGG -TATCTATTGCTAGAGAAGGCAAGGATTATCGGAGGGTAATGAGTGTGTGGAATATGCAGA -AATAGAGTATATGAAATGAACAAAATGGCGCAGTAGAGAGTCACGCTGTAGACAAAAGGA -CTTCTTTCGAAGTCGGAGGTTGAGGCTGGGCGGCCCGTGCCATGTCATTAGGCCGTGGCC -GCAGTGTGCCGTGGAAACATCGTTTTTCTCAATTTGGCATTCGGTCCTAATCCTCTCCCC -AGCACACACCACACGATGTTTGACCTACCAGATGCTAAGCGGTATGTGAGCAAGAAGCAT -ACCAAGATACTTTTCAAAATACCCAGCAGCTAATCTGACTTATGCAGCGTCCGCCGCGAT -GAGATGCGTTCTCCAGCGTCATCCCGGTCGCCATCACCAGCACCAGATGACACCGCCGCT -CAGGATGCATACGCCCGTCTAGGAAAGCTGCTCAATCTCGATCAATTGGATAGCACACAG -GAAACAACCAGTGAAGATAATCAGCCCGTGGAAAACGAGGACGAGGAACAAGAATTCGAG -TTCCGCCTATTCAGCGCCCCCGCTAAATCAACGGAAGATACTACCAAGCAGTCAGGAAAG -GCTCCAGATGAGAAAAACACAGAGGAAAAGCCAGCGGCCATTACACAAAAATTGAGGATT -CGGCTGCATTCACCTACACCGGGACCTGGCGCCGGCGCCGAGGGACGATTTGTGAAAGCC -TCCCGCGGATGGGATTATTACTTCTCTACGCCGTCTCTCCAGGGCACTCGGAGCGGAGAA -ACTACCGCTGAAGACGAGAGCCGGATAGCGGAGAAGAAGAAACAATTTGAGGATATGGCC -GTTAGCGGACAGCACGTGTCGACATGGGCCAACTCGCTAGTCTGGGTAAGGAGATACTCA -TTTGGGATTTGGCTAGTTTTATGGCTGCTGACTTAGATTATAGCCTGGTTGTCATCTCCC -CTGGCGAGTCATCCACCTCAAACGCCATCAGACCAAAATGCCACGCCCAGCCAATAGTCT -TCCCGTCTACGTGGTAGAAGGCGCACCTGTTTCCAAGAGTCCACTTACACGCAAGAAGCC -AGGCAAGAAGCGACGAGTGCAGCTGCGCAAGCGGGTTGCGGCTGCACAGGCTGCTAAGGA -AAATGAAGCAGAGAAGCGGACTCGCAAGAACCGTGATAGAAAGCTCAAGCGCAGACAAAA -GGCGAGAGAACAGAAGGCTGCGGCGGCGGGTGTCAGCGTTGAAGATGTCGTTATGGCGGA -TGGTGATGATCATTCCTCAGGCGGGGAGGAATGAAACCATCTAGCACCAGGATAGACTTT -CTAATGTGATACCCGTGGGAGTGTTGGTTTTTCCTTTGCCGTTAGCATCAATAGCACGAT -ATATCGAATTAAAGCGGAATTTTACTGTTGTGTAAGCAATCTGGAATGGATGAGTAGAGC -TTTGCATGGTATTGAGCGGTCTGATCAAGGCCGATATGAGAAAGCAGCATACAATGCATC -GCTTCGTAAGAATGAATAATATACAACGCTGAGGGTATGAAACGAAATAGGGAGCGGGAA -GAAAACACTCGAAAGTTAGATCAAACTCGCAAAGAAAGGACCTATCCTCGCCAATCGGAG -GGATAGTCACACTTCTACTTAGCAGGGGTGTCTAGCATCATTGTTAGCAATGTCTTGTGC -CGTGCGAAAGACAATTGACTTACCGGTCTTGGCGGCAGGTTCCGCTGGCGCAGCCGGGGC -GGCCTCGGGTGCGGCAGCGGGTTCCTCGGTAGCAGGGGCCTCAGTCTTAACGACAGGCTC -CTCGGGCTTAGCAGCAGGGGCCTCGGTCTTAGTGACAGGCTCCTCGGGCTTGACGGCAGC -ATCCTCAGTCTTAGTAGGCTCCTCGGGCTTAGCAACGGGAGCCTCGGACTGCGCAGCGGG -TGCAGGGGCAACCGGCGCAGACTCAGTAGCAGCAGCAGGGGCAGGCTCGTAATGGGTCTC -GACGGGACCGGCAGTAGCGGTCATCTCAGGGGCAGCGGACGCCGCAGTGGTCGCGTCCTT -TGCTTCTGGTGCCTCTGGCGCCTTAGTAGCATCTATATCAGATCATAAGTCAGTTGGGGG -CCAAATCGAGAGTAAATAAATATCAAATCGATAGGAGAGCATCGTACCGGGGACGGAGTC -CGGGGTGGCAGCGGGGGCGGTGGGCTTCGCGGGAGCTGTCTCACTGgccgcagctgcaac -gggggcagcagtagcagcagtggcggcagcaggggcagGTTCGGTCTTCTGGGAGGGCTG -CTTCTTAGCTGACTTCTTGCGACTGCTGAAGAAGCGCTTGAAGCCCTTCTTCACCTGCTC -AAGAGCGTCGGGAGCTGGGAAGATGTTAGCGCGGTGTACTTGCGGTCCCGAGCGCCAAAT -GTCGGGAGTATAGATGATTATTTGGATCACTCACGAACACCAGGCATTTTGATCGAGTTT -GAGATCTGGTTATAGGGAGATTTGATAAAGGATATGTGAATCAGAATATAAGTTTCAAAT -CGAAAAGGAGAAGCGCTCAGTCTATTCAGTCCCACGTGACAGTCTACGTCTATTTTTTTA -GTTTTTTTTCTTTTTCTGGAATTACCTTCGCACGTCTGTTGTTTCCGATGCCTGAACAAT -GGACGTTTATTGGCTGGCAATGCAACTGGGTCCTATCCCGCAGCTTATCTTGGCCTCTGA -CAGCTATTGTGGCTGTGTCGGCGCTAATTTGTTTCATTTTATTTTTTACAGACACTCTAT -TGTTGTTTCATACCATTAGCAGAAAGGGGAGATACGTACTCCGCATGGAGTAGCTCGGAA -GAATTTAAATAGTAGAAGGATCGTTCCGTTGATATAAATCTAGATGGGAATCCCAAAGTC -CCAAGCATAAGCGATGCAAGCATGAATGGCAGGGATGGAAAGTCAAAAAAGAAAAAGAAA -GAAAAAAGCAATGCACAAAAAGATAACGCCGGGGTATCAATAATATCAAGTTCGCAAATC -ACTGCGCAGGATGGGTATAACTAAAGAAATTTGAAGATTATCAGGGCAACAGAATGCCCT -AGGCCATAGTATATTAGCACCAATCAATCTTCATCATGAAGGGCAAACATACCTAAAATA -AGAAAAAACAAAGCGAGAGCAATCTTAAGGTTTTGGGCGGCGATGACCATGAGGAGGACG -GCACAGAGAGACCACGTGGAGGATCCTCCACCAAAGCTCGGGAGGTGTTTGGACTTGAAT -GCTGCATCCTCGATAACAGATTGCAAATTGGCAGCGGCAGCGCTAGTCTTAGCTAGCAAA -GCTTGTGATACTTGCGCATGGAACTGAATGGCCTCCTGGGCTCGAGATTGTGCAGATAGG -GCTTCTGTTTGTTGGACCTGTGCCTTTTGCAACTCGCTTGCTTTCGATTCGGATTGCTGT -ATAGATGTTTCCATGCTTTCAAGTCGCTATGAAAGTTAGTGAGGGAGTCGATATGTGGAA -AATGGACTCGTACCTCCGCCATCCTGGTTTCCTGCTCAAGAATCATGTTCATTCGGCTGG -TCAGCCATTCCTACAAAAAGGGGCAAAACATTAGTCCCACAGTATACACCTAAAGGAAAT -TCCATATTTCGCAAACCATAGCAGCATCAAACCTCTGCATGCCTCGGTATATTCTGTCCA -TGTCTGAGTCAAGAAGCGACTCGAGCGACATGTGTAAAGAGGAGGCGAGATCCTTCTGGG -CGACAGCATTGATTTCTTGGACCTGCAGCGTCTCTTGGCTTCGAGCCATTGCATCTTGAT -GCGCACTCCGTAGAGCCTGTTGAAGTGTTACCACTTGGCGGGATACATTCCGGATGTCCT -AGTTGACATTTAGTACACGCTTCAATTCTGATAGGCTTGTTCAAGATTGACCTTTTCAAG -GGCCCCTGTCCCGGTCCGCACATTTGATAAAGCCGCAGATATAGAATCTTGCAGAGACGA -TATCCCAATTTCAATTTCTCGAAGAAATTTGCTTAACGTTCGTCTGAAAACTGAGTCAGT -CGCTTCCATATCTGTCACGATCCTCTCTTGCAGAGACTGAACAGCGTGTATGAACGATTG -CTGCTCCGTAGATTGTGCAGCTGCATGTTGCAAAGCCTCTTGAAGCCCGTTGTTGAGTTT -GAGACTGCTCTTGGCAATGGATCGATGCAGGTCAATGAGCTCCTCTTTTTCCGTCTCCAT -TCGCGAGGCATGGCAGATCACAAGAGCATTCTGGCGACTGTTACTGTAAGAAGTCCACCA -CTGGGGTCGCGATTCAAGTGTCTTCAGACACTGCTCTAATAATGCTTTTGGAACTTCGTT -TGATGCAGGATCAGAGCTTCTTTGTCGACTTACAAAGCTGAATCGATTCTTCTGTGGAGG -TGACGACACTGTGATTGATAGGCATGACGGAGGCATTTGAGTACCAGCACCGTCAATTTC -ACATAATGCCAGACGTGCTGCATAGATAGACCGAAGAAAGTCAAGCGTCTCTGAGCTGTC -TGTCTGTGCGTCATGTTTGCCATCGCTGAATGTTTGACACGAGCTCACTAATCTTGTTGC -TGCAAGTCGATTGCAAGAGGGTGAAGATTTCATTGAGTCTAGAATTTTGACGGCTTCATT -GAAGATGCTGTCTTGGTCTTGTGATCGCGAACTGAGAAACGAGATCGCGTCGATGCCTGG -TGCTCTTGGCGCGATAATCGGGATGCCTTTTTGAGTTAGCAGAATAATCTGAGACATATC -TGATAGCCTTACTGTGCTGAGAGGCATCTGCTCGACTGAGATGTGTAAGTCCCAGAAAGA -TATATGAAAGCGCTTGAAAGCTGTCGATTGCCCTCATAGTGAAAGGTCTCGCTGGCTTGG -ACTATGAGTGTTGCCCTGGACTGGTCCTGGTTATTGATCACTCGTGTATATTCTCTGGGA -TGTATGGGTGTATACGAAGATAGGACAGAAGAGTATGGGATGAAAAGAAATGGATGAGGT -ATGGATCTGGACAGTGAGATTTGACATCCAAGTTGTTGGGCTCACGCCTACACCTCGCTT -TACTTTTGCCTGGGAAATTTCCCCTGGGCACTCATTTTTTTGCCTATAATATTTTTGGGC -GTTTTTTTTTTGTATTCAAATGCTAATACGATCATGAATACATGACCCAAGGCCGAATTG -ACGAATTGAGGGTGTTCATATAGGTTTTTTTTGGACAGAAGACAGCCCAATATCATTCAT -AAGAGAAACGATTCATATGAGAAACATTGATGGCCATATCCCAGTCCGAAATGCATGTCA -GATGAACGCCCTTGGCAAATTGCCCAGACATACATGGTAAATATTATTCCGATGGTGGAT -GTGAAGTACGCCCCTCAGGGGACTCCTTGTCTAATAGATTTTGAATACGCGTGACCCTTG -TCCTAATGTTGGCTTATGAGCTTGAAGACGGTCAGTACTTCCCCGAGGATACACAGAACT -TTGACGGAAGATTCAATCCATACTCACGATACAAAAGCTTATAATACTTCGGAAAAGGCG -CCATATAAGCAAGATTCTTCGCAGAGCATTTGGATGGTGCCTTGATCGAATTAGTTCAAT -AGGTGTGTAGATACGGCGTTTCAGGTCATGCGTGTATAGGTACTATCCGACCACACCTTT -CCACTCCCGCTCGTCTTGGCCCTCAGGCTATAGATAAATACCATGTGAGAACTGAAAGTG -AATAGATAGTAGTTCTCCCTCCAAATATCTTCTTCCAGTGCGTTTTTCTCTTACTTCAGT -TCAAGTACGCAACAGATTGCTGGATTTCCCGACCTTTTTCTTTATTTACTTGTTCACCTA -TCCTATAGTCACCTAGTATGGCATCGCCTTCTTGCGACTTTCATGTGGATCATACACCAG -CGCTGGAAATCTATGACTCGGCTATTGAGGACTCGAACTCTCCTCGGTTCGCCCCTCTTG -CCTATCAATCATTTGCCTCCACACTAGCTGGGCCATCTATAGGATATATGGATCCTGTTC -AATATAGCCATTTCCTACACTATCCCATTGCACCAAGCTTTGCGGTATCTGCTTCCTCTC -CCCCCGCTTGCTCTAATATGTGTCGCTTCAACTATAATGCTGCATGGCAATCTCCTATCA -AATCTGACTACGCAGCCATAACCCTAGCGTCGAGCCATCTCGACCCTAACCACGAGATGA -CTTGCACTTTTTTTGCCAGCATCTACTTCATCTGTTCAAAGGGAACCTCAGCTCAAAGCT -ACTGGCGATAGCTATCGATAGCTATGCCAAGGGAAAAGAGCTTCTTGTCGAGACCAAGAA -CGACAGCAAAAGCAGAGGTAATATCATGGTTCACCATTAAGCTTTATACTATTTGACACC -ATGCCTTAATTTTAGAAAACCCAAGCGATCTTTTGAATGCCACTGGAAGGACTGCACGTA -GCGCGATCTTTTCTCCGGAAAGCCATCGTTGCTGCGACATATCGACACGCAGCATATAAC -TCCACAATCATTCAATTGCCGCTTGTGTCCCAGATCCTTCAATCGGAAGGACAACATGAC -TGAACACCTAGGAAGAGTTTATGGGGAACGAGTTCGATGTTGATGCTGAGTTGGTGGTCA -GATTCGGGTGATGAAGCCTGAACTGTGTTATCTGAACGAATTTCGGCGTTTGAAGGCATA -AATTTATGTGAACTGGGACATATTGCGTTGTGTCGTCGTTTTTTATTCACTTGTGTGCGT -ATGAGGGAGAATCAGAATGGTCGCTGAGTTTAGATTTACAAGCAATGGTTTAGCAATTTT -TGGCAATTAACGCATTCAATAAGCAATCGCTTAGATTCAGGTCGACTTATTGAAAAATAG -GCTCGGATATCGTATCTCAGGAGTCAAGAGCATATTTAATTCCATTGAGATCTCGGTTGC -GAAAACAAACAAAGGAAAAAGCAGATTATATAGATAGTCACGTGCGCGCTAAACCCCATT -AGGGACAAACCATCTGCAACAAAATCCGAAGTCCGCAACACAGCATCCGACTCAACACGC -GCAATTCACCATGTCGTGCTCCATACCCCAACGCTGTGCCCGCCAGCTTCTTCGAGAGCC -CTTCCAGCGCGGCTCCCTACCCATCTTCCTCGCACCCGCCTTCTCTCCCCGCACACAATG -CTTCTCGACCACATCCCCGGCGCAGTCCCGAGTCGGAGGTGCACCTCTCTCCATCCCCCC -GGAGGTGTCATTTAAGCTCATCGATCTCCCCGCGACTCTCAGCCGGACACGAGGAAAGGA -TATTCCTAAGGTTATGGCTGAGATTAAGGGGCCTAAGGGTAATGTGTCGTGGCTGAAATG -TGGTCCGGTCTATGGATGCTAACTGGTCCGATTCACAGGCGAGATGAGCCTGAGCATTCC -CTCATTTCTCACTATCACCCCTGATGAGACAGGCCAGAAGGCTACATTGTCAATCTTGGA -TCAGGAAGTCCCTCACCAGCGTGCCATGTGGGGTATGTTCTCCTGTCTTTTCTCCGCGGC -GGTCTAATGCGGCAAGTCAAAGTCTAACATTTCACTTTTCTATCAAGGAACCATTCGCGC -ACTTCTCCACAACCACATCCTAGGTGTCTCCGAAGGCCACGTCTGTGTCTTGAGTATGGT -TGGTGTCGGTTACAGGGCGACTATCGAGGACAAGGCCTCCACCGTCAAAGCCACCTACCC -CGGCCAAAAGTTCGTTTCCCTCAAGGTCGGCTTCTCTCACCCGATTGAATTGGGTATCCC -CGAGGGCGTTGTGGCGAGCACACCTCAGCCTACCAGAATTCTTTTGGAGGGTGTCGACAA -GCGCATTGTGACCCAATTCGCGGCCGAGATCCGAGAATGGAGACGCCCCGAGCCGTACAA -GGGTAAGGGTATCTTCGTCAACGGAGAGACTATCAAGCTCAAGGCGAAGAAAATTAAATA -GACTGGAGGCGCGTTTCTTTTCAGCTGCGCGGGACATTTTGTGTTTGGTTCCGTGTCTTG -TACTCTTATCGTCATATATGTATTTTTTTATTCCTCTCACAAGCGATGGTATCACAAATC -TACTTCAAACTGAATCTGCTCCCCAGGAGCTCAAGTGCAGTGGAAGAGGCGACGAAATTA -AAAGTCATAAACCAATTACAGCTTCAGAACGGTCTTGACGGCATCAATGACCTGGTCCTG -GTGGGCCTTGGGAGCAGCAACGACACCCTTGTTCACAGCCAAGGTCCGGCCCTTACTGAA -ATCCAACCGGTTGCCGTAGATGTCAGTCACCTGGCCGCCAGCCTCGCGCACAATCAAGTC -ACCAGCAGCGTGATCCCAGATCTTCTCCTGGTAATCCTTCTTGACGGGGAGACGCAGGTA -GACGTCACCGGCGCCACGGGCAATGGAGCAGTACTTGGCCTGCGAGTCCAGTCGTACGCT -CGGGGCGGTGATGCCAAGGAGCTGAGCAACAGCAGCGTTATCGCCCTGGGCGGAGTGAGC -AGCCTCGACACCTTCACAGAAAACAGCGTCGGAGATCTTGGCGACGGGGCGCATGGAGAT -GGGCTTGCTGGCAGCAAGAGTACCACTGGTCAGAGGACGGCTGGTAGCACCCTCACCCAG -GATAGCAGAGAACAGGACACCGTTACCGGCGGTCGCGGACTGCTGGACACCGGTGCTGGC -ATCAATGGGGGCAGCATCATCGACGGGGAGGTTAGGAGTACCAATAGCACCGACCTTGAC -GTCACCGTCAACCATCAGACCGAGACACACGGCATACTGGCCGCCACGGAGGAAGCCCTT -GGTGCCATCGATAGGATCGAGAGCCCAGATGCGACCCTTAGGACCACCGGCACTGTTGCC -ACTATCAATGATGTCTAGCATGCTCTCTTCGCTGGTGAGGGGTCCACCCAGAATATTATC -GCTCTCCGCATCATCCAGCTTGATGTCCTTGACCAAGCGCCAGATCTCAGCGCTGAGGCC -TTTGTCCTCACGTAGAGTACTAGCCTCCTCTTCGGCAACAATTTCATCGTTGGGGAAGTT -CTTGCGGATGGCCGAGATGATCAGAGCCTGAGCGCCGAAGTCGCCAATGGTGACAGGGGA -CTTGTCGTCTTTGGAGACTGTGCCCTTAGCCTTCTCGAAGAAGACCTTCTGTGTCAGGAG -ACACGCCCGCTGGACGGCCAATTCGGCGATGTAACGTTCCTGTTGGTAAGACATGATTGT -AGGATGGTTGGAGGAGAAGTGGGGGAGGAGAAGGGAGAGACGAGAGCGAGATTTCAGAGC -CACGCTGACGGCGAGAATGAGGGTGGAGCAGACCACGAAGCGGGACAGGGCGCGTAACAA -ATAAGAACGTGGGGATGGATCTGTCCGGTGTGCAGACGAAAGCTTCATCCGAAATCAATT -GGCAAGAAATGCAGCCAGGATCGACTACACTTTCTCGAGGTCTTATTGGTTAATACATTG -GTTTGCGAAAGTCGTCATTCCGGACCTCTTCCTCATTCTTGGCCTCAGGCAGTAGACCCC -CGCCCTAATCCTCTTTGGGCCGAAAATAGTCACGAGGAGCCGATAAGCTTAAACTGTCTC -AGCGTTGTTTCAACTCTCCTTTTCCCTTGTGACAAGGTCATTTCGCAATATAGATGCCCC -GTTGAACAAATTGGGAACGGCTACATATTCTTTCTGCATTTGTCGCACCTTTCATTGCGC -TTATCGGATTAACGGACGCCAAGACACCCCTGGGGCCTCCGACGTCACCGACACCACGTG -AAGGTATCGCGAACGCTCCCTGAAACCTTATGTTGGCTTTCACGCAGTTAATCCATTTCC -GGCGTTCTTTTTTCTAGCGAATCAAGTCGCATACTGTCGCCGCCGATACTCCATATATAC -CCAATCCTTGCGAAACGTTTGTGTAAACAGGGCCGTGCGGGGTCATCTCTATACCGACAT -CGCAGCGCGGGGTGATTGCGGGCTGTTTGATATCAGGGGCAAAATCGTCAATTTCCCAAC -CGGTTCCTACTCAGAACAAGTATACGCTGTTCTAGAGGTTATCCTCTTGAGTAGGCCAGG -ATGTTCCAATCCAACGTAAGTTATTGCATGGAAATCATAGATCATGCAACCTTTAGCACT -GACTAATATTGCCCTCAATAGAAACCGTATTCGGCAGTGACCGTACAAATTGAGGTCCTT -ACTAGCGAACAATATGAAGTCGAAGACTCAAGCGGTATCGTCGATCTAGTGGAAGTAGTT -CGCATCCAGGCCAGCGGCCCAATGGAAGCAAGTCGGGCTCTTCGCAAGAAACTGTGAGTG -TACGAGTATTGACAGTGCTGAAAATCAGGCTGACCGATCCAGAAAATACGGCAACATTCA -TCGACAACTTCGCGCACTTACCATTCTCGACTTCCTGGTTCAAAATGCAGGCGAACGCTT -CTTACGCGATTTCGCCGATGAGGCGTTGCTCGAGCGTCTGCGAATTGCAGCTACTGATTC -AGTTTCAGATCCACTCGTCAAACAGAAATGCAAACAGATTTTCGGACAATGGGCGGCAAC -TTATAAAGACACTCCTGGTATGGCAAGAGTCACAGCCCTATACAAGCAACTTCCCAAGCG -AAAGCAGCCTGCGACGCAGGCAAAGGCCAAGGTGCTCCGTGAGGGCTCGACTGCGAATCC -AAACGATCCCCCAATGGGCCATGTTGTATCTGTTTCAGGGGGCAGTGGACCATCCACTTT -GCTGACCAGCCCCAAACAAAAGCCCAAGAAAGTCAAGAAAGATAAGAAACTATCAATTAG -TGGCAAGAGGTTTGATTTGGAGAAGGAGCGCCCGGAGATCTTGCAAGCGCTTGCTGCCTC -TTCTGTTGCTAGCACCAATTTGTTGAATGCCCTTAAACTCGTCAATCGCGAGACGCACCG -AGTGAGTGAGGATGCAGAATGCATCAACCGATTCGACACATGCAAGCAGCTTCGTCATGG -AATCCTTCGATATATCCAGTATATAGAAACCGAGGAATTTCTGGGCGGTCTAATCCATGC -AAACGATGAGCTGGTGGATGCATTGATGGCATTCGAGGTGCTAGACAAGAGCGTGGACTA -TGACAGTGATAGTGAGGACGAAGGTTTGGAAGGCAATTGGAAGCAAAAACATGGACTTGG -ATTGGATGACACGGACGTGAACGACGGCCTTGCTGGACTTAATATCAATCCCCCAAAGCC -GCCACGGCCGAGTCGCCCTGATAGCCTGCCTCTTGCATCTTCGTCGCAACACACACGCCC -AATCTTTGAGAGTGAGAGTGAGTCGGAAGAAGAGGATGATGAAGACAATCCCTTTGGAGA -CAAGAATGCTATTAAGACCCCGGCCATTGAGAAACCTCAGCATACTTGGTACGTCACCCT -CCCGTTTGATTACAACTCATGTGTTTAATATCCCTACAGGAAAGAGGTCTGATCCATGAG -ATTACAAAGCTGAAGTTCGGATGATTGTACTTTTGCGGTTTGCAGTTTTGATTTATTTCA -TACATTGCCTGGTCTTGTGCTTTATCGGTATACCCAACGCCATTGGTTATCGTGTTGCTG -GAAGGTGAATATTCATTTTGATCGTCGGTGTCTTCAGTGTATATATTTTAAATCAATTGG -ACGTTGATTGCATAGTCATTTCTGTTAAACTTTTTTAAAGTCCCGGAACGAATATATATA -CTAGCTTTGCTTGAAAGTACGCTTTAGTTTATAGCTCTAAAGCCCACCCCGAGACGCTCA -ATGGTGCCAAACCCAGCAGCAGTCTTTCCATGAAACACATTGACCATGATTAGTCAAATA -GCGCCGAGTACGTTGGAATTAACTAGGGAATACCTGGCAGAGTTGGCTAAGCGCCCCGCC -CCTGACTCGCTCTTCGGCCTCTTTCGCGTCTTTCTCGGGGTCCCGACGGGCTACAGAAAG -AAAGCTATAAAATAACGTGCATCTCCAATAATCGAGAAAAAACTAAACATATCTCCTCTC -ATAACACCAACAGGTCATCTATCCAAGAAAAGAACATAAAATGCCGGTGACCGAACTCGC -TCTTCTCCGCCTAAAAGCGCAAGCCTCATCTTCCGCCAGGACTACTGTCTTAGAAGTCCA -GAAGGCAGTATCAGAATATTCGGACCATAAAGTCACATATCTCCGTCAGATTGAAGACCC -AGAATCCTTTTATCTGCTGGGCGGGTGGGAATCTATCGAGAAGCATACAAGCGGAGGCCT -GTGGACTTCATCCGAAGCCAACCAGAACCTACATGCGAAGGTGAAGGATACCATTGATCA -TAGCTGGATGTTTCACCTGGACCTTGATGTAAGTTTTTCAGTGTTGCTATGAACTTACCT -CGTATTTCGTGGGTGTTATCAAGGAATACACAAACTAACAGAGGGGACAGCCATCTACAT -CCAAAATCCCACTAGATGCACCTGTCATCGCTATAACGCGTTGCTTTGTCGAGTCAAGCA -AGAAAGCTGAATTTGACGCTGTGTTCAACGTTGGCGTGGTTGATTTGAATACCTTCACGG -CTCCCTTCACTTCCTGTGGTGCGTGGCGCATCGATAAAGAAGGCGACGATGAGGAATTTG -TTCTTTTCAGTGGATGGAACCAAGTGCAGGATCATTTTGACTTTGCAGGGTCTGAGGTTT -CCAAGGAGTTTAGGAAGATTCAGGCGCTGATGAAGGATGCGGATGTTAAGCATGTACGTA -TTGAGAAGTGGGAGTAACTTATTATATGAGTTTCATTCATCTATATAAGATCTATTGTGA -GGTTATTAGGTTATGTCCTGGACACGGATGTGCCGTTCAGGATAAATTGCACCATTGCCT -TTCTACCATCCAGTTTGAGTGGTGCCTCTATATGCATGGCGCCGAGGACATTTTTCCTCT -CAATCTCCAAGCCAAGAGACTTGGCATGTTGTTCTAACTGTTCTGGTACGTATGATGGAA -TCATTTCATCTTTCTCGGCAGAGAGGAGAAGAATTGGTGGGAGCGAGCGAGGTATTGAGG -CCGTAGCTTCAGAATTGGGGATTTGACACACAGGCTGGTCTCGCCATGTAGCCATTTGTT -TCATCTTCGCCTTGATGTCCCAGGTATTCCAGGAGAAAGGCCAAAGATACCTGTATGGAA -GCCATTTCTGAGGATAGAGACTGATGAGCATATCTTTAATGCTGGATGTTGGTGCCTCCA -AAATGAGCCCAGTAACAGGCGCCAGGGACCTGCTACTACTGATATTTGAGGTTTGCCCCT -CGTGTTGGCGGGCAATGTATGTGGATACCGCAGAGCTTGCAACGGCAGCACCCAGACTGT -GTCCCCAAACTACAATCTCAAGATCGGTTTCTGGGGACGCATGAGATTCTGAAACCCAAT -TCAAGAGTGCCTGGGCATCCATCTCAATGCCGGACTGTGTGGCGCGACCTGATGATGTCC -AGTAACCGCGATATGATAGAGCAGCGATAGTATATTGAGAGACTTCGTTCTTGGACATAG -GAGATGAAGTTGACCTCAACGTTTCCGCAATTCCGCGCAAGACGTGAGAGAGAAGAGGTA -ATCGCATAGGGGTGGATCCACCATTGCCCTGAAAATAACAGATCACAACGGACTTTTTTC -GTTTCGGGACATGATTTTTATTATGTAAAGCACCGGCTTTTGAAAAAGACTCGTCGTCCT -TGTGAGGAACACGAATGTGACCCTCACAAACAGCCAATTTAGTTCCATCCAAACTGCGAA -TCTGCTTCTCCTCCCACTGCACCGGTCTACACTCAACCTGATAGTCCGAAATGACTTCAG -ATCTCGTAAATGGAGGCAGCCAAGATAGGTAGAGGAGCTTATTTTGCATTACAATGATCC -AGAAGCATTTCCAGGTCCATAGGCCGAGTAGGAGCCCGATGAACATGAGAGGTGGTATAG -CAAGGCTTGGAATAGCTTCTGTCATTATGCTTCGTGATTGAATATTTGCTTGTACATTTT -GAGCGACTTTCTGTCTCTTTCCGGTGATGCCATGAAGACAATTGATGCCATTTGACGGGA -TATTGTGATAAGCCTGTGATGGCTGTCGCGCGCGGGTGGATATTGCACTTGGTTTTGAAG -CGCGTAGTGCGCGTCGCAGAACGCAGCGTGCCAGCATCGGCAGATCTGGTGGGCAAGTAA -TTGGTTGAGTTGGAGGTTGTTGGACGGCTGCGACGTATTGAGTGGAGCATTGGAGCTAGA -TACTGTCCAGGTGCGGGCTGGGCCTTGACTCAGCAATTTCAATGCTCCGGGGCGTCTCTT -GCATGCAGCCAGTATAAAGCGCTTTTTCACATTTGACTCATGTGCTTAACTCATGTGTTT -GTTACAATAAGAGAATGGTTTCATTTTGTCACTCTACTGTCAAGAGAGATGTGAAAGTAT -AGTCAATTTGAATTACAAGACAAGATAGTTAAATCCGCGTTCAGGCCTCGAACAAGATCG -AAACTCCAACGTGTGATGAAAATGATAGCATTGTGAAAAACCAAATGGGTATTGTGGGAG -GTCATAATATGGAGTCATGATATGAAAAAGAACAAAACGCATTCGCAACAGGAAAGTAAA -GATGAATGCCTTCCCAAGACGCCAAAAGACCAAAACCGCAAGCCAACCATGCAGTAATGC -AAAATGAATTCTCACAGAACCTTCGATCTCATCCACTCACTCGTACCTACTCCAAGGTTA -AGCCCATTCCGTTCCCACCAACGTCGGACACGACCTTCAGCTCCAGAGCTTGGATCAGCA -TCCTCAAGCGCTTCCTCAATATCACAAGTCTTGAATTCAACCCGTAAACCGCAGCTTTTC -CACCATTGCAGGATATGGTCGTGGCCATTCTTTGTCGGGCCAACGAGTACCTGGTTATCG -AAAATCATCTTCGGACCCTTGAGTTTATACCAGAATTCGAGTACATGGACATGGCCATGC -GTGCTCGCGATGCGGGCAACGTTCTCGGCGTGGCTATAAGGTATACCTGATGCGTCCCAC -CAAGCGAGGGATGCGGTGCGGCCCGATTGCGCGGCGAGTAGGACGGACTTGCCAACTTTG -AGAGGCAATGGGTTGGTCTTTGGGGCGTTGGCCGAAGCGGCTTTCCACCAGTCGAGTACG -GCGACTCGACCTTCAGCGCTGGCAGCTTCGAGGGCGCGCTCGCTGTATCGTAGCGGGAGC -CCTGAGTGGAGCCACCAATCTAGGACGTCAACAAAACCGGCTCGGGAGGCGCCGTCCATA -GCATCGGCCATGTACTCTTTGTTTGGGAGAGCGGGCGCATCGCGCCACCACGTCAGAACA -TTCGGGTTGCCGTAGATTGCGGATGCTCGGTATGGAATATTGGTGAGTCGAGAGGTTGTC -CAGAGAAGGTCCAGGTGATTTTCGGTGAGATAGGAAAGGATGTCGGTGCGGGAAAATTTG -ACAATTAGGTGGCAAGCAAGATCGGAGAGCGGCTTCCAGCGAGGGAGCGCATCGATGCGC -TTTTTGATTGCAGCGAAAGATCCTCGCAAAATAGTATATTCAAGTGTGGCCGTGGTGGAC -ATTTTAGGTAGATGGCTCTGCCATTCTTGGGGAACAGGGAGGTGAGTTGGAATGCCTACG -GCCTTCGCTGTTTCCCAGTCTCGCAGTTGATCCTCGAAGACATTCAGCCAGATTTCCAGT -GGTAAAAAATCGGCATTGCCCGAATGTTTCGTTTGTGGTGTGGAGCTGGTCGCACTTCCA -GGGCTAGATTTTGATGCAGTCTGAGCCGGGACCGCGGCGACATGGGTGTTTGCTCTAGCC -GATAGGGTGGCTTCCCATGGCGGCAATTTTAATGTTGTCTCCAGCGCCTGATGTAACTCT -TTCAAGCCGTCCCAAGGCAGTATCGCTAGGTCTTCGGGCCCATTTGACTTCCAGAGCACG -GGCTTAACAGTGCGAATCCAAGGCCAAAAACGACTGATTTCACCGAGTTGTCGACTGCTG -AGCGGGTGATCCCATTCCGACACAGTCATTCCTGGAATGAGAATCACAGGTTTCTTCGAG -ACCCAGCCACGAAGAAGCGCCAACGTCAAGGTATTCGTCAAACCGGCCAACATCGCCCCC -AAGGTCCCGGCATCGATGGGCGCAACCAAAAGCATGTCTGCCCATTCGCATAGCTCCGCC -GTTTTCGCCTTAAGCCATCTCCGCGCGCAGGTTTTCTTGTCGAACGTTTGGCTGTCGACC -GTATCCATGTAGTGTGGGATCTTGTTGGTGCCCGTGAGCAAAGCCGAGGGTTCGTCTGTG -ATCAGTCGCAGTTCGATATATGGATTCTGAACGAGTGACGGGAGAACGGTGCTGGCGAAA -GCACTATCTGTCGACAAGAGCAAACGGCGGCGCCGAGGATGCGATGATGTCGCATTTTCC -ACCGCTTTGTTTCCAATCACGGTCTCATCACAGTCTCCATCCACCTCCATCTCCCTTGAC -CACGCCTCTTTTGTAATGTTATTGCCTTGTATAATTGGTGAAGTATTAGGTCGAAATCCA -ACAGTCATGCCAGCGCGACTAAATAGACAAGCTGAACTGTACAAGGATAAGGCGGTCGAA -TGAAATAAATGTGTATGAAAGGTATAAGAAAGATCGAGGTCGATAGGCGATGAAAACAAA -ATCAACTGTGGACTCTCGACGTCTCGTACTCCTGAAAGATAAGTTATCAGGCTGTGGTCT -GATCAGAGTCCAATCATGCAACCCAGAATTGCAATCGAGACTGGCGCTTTACTGTGCAAC -GGCGATCAACTGGTTTCGAGCGGTGAGGTCGTTGCGTTGTCTGCGTCGCCACGTCTTTGC -CACACCCGACTTAATAATGAGCTGCTAAATGAACAGAGTTGCCAAATAGCGAAAAAATGC -CGAGATCCTTCCTGGATCCTCTCAAGGGTGAGATCCACCAATGACAGAGAGGCAAATGCT -AGTTCGAGGGTCTATGGGGTTTAAAGACGGACCCGTACAGGAGGCGCTCAACTTGATTGG -CGACTGTTTGGGCATTTCCGCTCTCAGCTGGAAGCGGTCAATGCCTAATATGCATCTGCA -CCCTTCAGATGTCATTCTCATAAATACTTCAAATAATGTTTCTTGCTTGTCTATTTGCCT -TCTTTTGTACATTGATAGATGCTCAATTCATGTCCAATATTAAAACACGAACTCTGACAA -CGGGGTACAAAAAGGGGTGGAACTAGGCCGAGTCCCCTTGCAATGTATACATACTGATCT -ATACAAATAACACTCGAACATTTAAATCAATGCCCGAATGAATGCCCACTCTAGCGTCGG -GCCAGGTTATTGTGCAGTTTGTGCTCTCTCATGGCGAGACTAGTCAATGATCCCAGAATA -GCCACGCCCAACAATGCCACAAATACCGCCCGCAAGGAGTCCATGTATGCATCAAGTACT -CCAGGTGTCAAGTCTGCGGGCATCTTCCAAATCTCGTCCAAGCTATCTCGAAGCCGTCCA -ATCAGCTCTTTGGCGTCATCGCGCCCACCCAGTCGAGACCAAAGCCCGAGCTTCAGCGTG -TTCTGGAAAACGGCCGAAGCGATCGTGATACCAAGCGTACTTCCGGTGCTACGGAACGCA -TACGAGGCAGATGTCACAACCGCATGGTGCTGGTGGTCCACGGAAGAAATGAGTGCCACC -AGGGTGATAGTCAGCATGCTACCATAGGATGCACCGCCTAGGAAGAAATAGAGGAACGGG -AGTCCCACGGGCGTGTAGAGATTCAGAGTGCAGATGAGCCCAGCGGAAAGAACCGAAATA -GCCATGGCAACATAGTTGAGAGGTGCATACCGACCGCTGGCACGCATGATGAACCCCGAT -CCCAAAGAGCCAATCGAGGTGCCGATCGCTTGAGGGATTATTCGCGCACCCGCAGCAGTC -GCGGAAAGTCCCTGCACCTGAAAGTACACCGGCAGGTAGAAGAGCAAGCCGAATACGGCC -ATGGTACTGAACCAATTAGTCAGACAAGCAGCTGCCACAGTGCGATCCATCAAAAGGCGC -ACGGGAATGACAGGCTCGGAAGCAACTTCAGCTTCCACATAGATGAACAGACCCAGGAAG -ATGACGGAGGCAACCAGGGATGTCACAACCAACGGGTGGGTCCAGGGCACCTGGTTTCCG -CCGGTATTGAGACCGAGCAATAATGTGACCAATGTCAAAACCAGAGTGATAGCGCCGAGG -AAGTCAACGCGCTTGATGCGCGCGGTATCCGACTCCTTCACAGGAATGTTGACTTTGATT -GCGACTAGAACACACGAGATGAGCAGGAAGGGAACCTGGAGCAAAAATGCCCATCTCCAC -CCCAGAGTGTCGTTGATCCACCCGCCAAACACACCTCCCAGTCCCATTCCAGCACCGTAG -CAGATGTTTCCGATGCCCTGCCAGATGCCTCGCTTGCGGAGAGGGATCAGGTCCGAAGTG -ACGAATGTCGAGATTGCAGTAAGACCGCCTCCTCCGACACCTGCCACGACGCGACCAAGA -ATGATAACCCATTGCGCGCTTGCCAGGCCGCAGATCAAGTTGCCAATGGCGAATACAATG -TTGGAGAACACAAGCCCCCAGCGACGCGAGTAGATATCAGTCAATCGGCCACTGAGAGGC -TGGCAGGCGGCATTGGAGATCAGATAAGAGGTGGCTAGCCATGAGAGCAGCGAGAATGAG -TTGAAGGACGAAGAAATGGGCGCAGAGAGAGTCGCGACAATTGTAGTATCTGTAAAGCTT -AGGTTAATTTGATTATCGCGAAGAGTCAGCGGAGTTCATACCGAGAGCAGCCAGGAAGAC -ACCGAGCCAGATGCTTCCGAGGATCCAAATTAGCTCCTTTGTACTGTGTTCCTGAGGTAG -TGGGGTATTCTCATTGGCATCGGGAGCATCCCGAAGCAAGGGTGTTTGCTCAGATACAGG -TTCTCTGGCAATATCGTCTGCAGACATGGTGTGTCCGGATGTGAATTACAGTGTTTTTTT -TTCCCAGAGAATTGCGAATTGACGTGCTCGGGGAATCGGTGGAATCTCCGAAGGGACTGC -GGAGATTGTGGGGTCTACTACAGCGGCTGTGGCGGTTATCCTATTGGTGGATGCATGTAT -GCATTGCTATCCACTAAGTAAAAGAGACTGGCTTACGAAGTGCAGTCTTTAGAAATGTGA -CGGTGACAAGTTTGGCTGAAACCCCTTGCCGAGACTGGACAGTCTTGTTGGCACAGGACG -TGACCCCCTTGAGTCCCGAGTTGAGTGCGTGTGTGTTGAAACACAGTCTTTAAGCCCCAA -ATAAACATCGAAACCCCATTGCTATACTGAACAAAACTTCTTCGATAATGGCATTCCCGA -ATCTCCCATCCACTACCGACTATCTGATTGTAGGCGGGGGAACAGCCGGGTTGGTTGTTG -CATACCGTCTCTCGGAGGACCCAAGTATCAAGGTTGTCGTCTTGGAGAGCGGCCCCGATG -CTTCAGATGACCACCGAGTTCAAGAACCCGAAGCATGGCAATCTTTGAGCGGATCAGAGC -TTGACTGGAAGCTGAAGATTGTTCCCCAGGTTTGTCTGCTCTGATGCTCGAGCCGAGCAG -AAATTTACCTAACATCATATGCTCAAAGGCTGGCTTCAACAACCGAGAACTCGATCATCC -GGCGGGAAAGGTTCTCGGCGGGTCCTCCGCCATCAATGGGCTGATATATACTCCCCCATC -ACCGGCTGCAATCAATGCATGGGCACAACTTGGAAATCCGAATTGGAACTGGGAAACATT -GCGACCTTATCTTCAAAGATCCTACACTTTGAACCCACCAGAGTCAGCTCCACATGAGAC -TGAGCCTGAGCATCATTCTCCTCACGGGCCGATCCAAATCACTTTCCCTGCCCTAAACGA -CAAAAAGAGCCTTCCTCTCGGAGAAGCATGGAAGGGTGCGTTCAAGGGCCAGGGATATGA -GCATAGTGTCGACATTCTTGCGGAAAATAAGACGATTGGCACCCGTGATAATCTAGCCAC -TGTGAATCCAAACTCGGGTCTTCGCAGCAGTGCGGACAACGAATATGGCAGAGTAGCATC -TCAACGTGCCAATGTGAGTATCATCACAAGAGCAACAGTTCGTCGGGTCCTCTTTTCCTC -TGGTTCGAATAAGACCTTGGTCACTGGGGCGGAGTTCCTATATGATGGCAAAACCATGAG -AATTCAGGCAACCAAGGAAGTCATACTTGCGGCTGGTGCTTTCCACACACCAAAGCTACT -AGAGCTGTCTGGGATTGGCGCCAAAGACAGATTGGAACGCCTCGGAGTCCCATTGGTTAG -TGACCAGCCAGCGGTGGGCGAGAATCTCCAAAACCATGTGATGAGCATGTTTCCAGTTCC -CCTCAACGCTCAACCAGATCTTGACGAAATTTCTCTTGGATTGAAGGGTTTGACCTTTGC -ACGCCTCAACCAGGAAGAGCAGAGCGGGCTATTTGCTGGACGTCCAGAGCCAGTGAGCTC -ATCGGAACAAGTCATGCAGTCAATCCTGGAAAGTCCCGATGAAGCGTCTGCAATCCTTTT -CATGTCTGTAATACCTGGAAATATGGTTATCCTGGGTGCTATTAACAGCGTCCCCTGTTC -GCGGGGGAATAGTCATATCAAATCGGCAGATCCCAGTGAGATGCCTATCATTGACACGCG -ATACTTCAGCGATGATCTAGATATAGAAATTTTGGCTCGACATGTGCAAAGCTTACATCG -TCTAACCGCAGCCCCAGCTCTGCAGCCATTTATGCAGCCGAGCACAGGGCCAACAGATTT -GAAAATCATCAAGAAACAGCTGCGCGAGGCGTCGGCATTGACGTGTCACCATGCATGCGG -CACTGCTGCTATGCTTCCCCGTGAAGAGGGAGGTGTAGTAGATCAAGATTTGAGGGTATA -CGGGACGGAGAACCTGCGGGTTGTGGATGCGAGTATTTTCCCGCTCATCACCAATGCAAA -CCCTATTGCAACAGTGTATGCGGTGGCTGAGCGGGCTGCTGATATCATTCGGGGGATCTG -CCCTGAGATTGGATCATCAATTAAGATGTGAGATTTGAGAGACTTTTCTTTTTTTTCTTC -TCTTTCCCAAAGGAGATTACGAGAACAAAGGCGAAGGAGAAAATGACAGGAAGGAGCTGA -GCAGGTAGCTCTTTCCTATGTTCCACGTTCCAACATTGTTTCGTCGCTCATCGCTCACTT -TCCCGCTTCCAACTTTTCGCGATCCCTGATTTTGCTTTAAACTTCAAACTGCTCATTATG -GACGGCCTCATTTTCTTCGTTGTGTTCCCTATAATCGACACTCCTCTTGTTACGTTGGCA -GCAGCATGGATCAAGGCCTTGCTTCTTTGGCTCCACGTATGGATGATCCGCCGATCATTC -AATGCCCTGAAGAGAGATATCGATGCCTATGAGACCAGAATGACGATCTGGGAGAAGGAG -TTTCGTGAGCTCAAGAAGCAGGTGAATGCGGAACTGGCAAAACCACCGATTTGGGAAAAT -CGGCTGCACGCGACTCAACAAGACACCCTTACCCCAGCACAAGCAGAGCGTCTACGTGAC -GAGACAGAGACCTTGAAGGCCAGGACATGCATGATTCGAAGAAAGATTATTCTTCTATGG -AAGGTCAGAAAGTCTCTTGATGCTATGCGTATGGACCAGCTTAATAGGCTGGCAATCCTT -GAGCGATCGATCGACCTATGGCGGTTAGTAGATACCAATTTCTACCTAGTGATCTTGGTC -TTTACTGATAATCATACATAGAACCTAGAGATAGCCGGTCCTACCTCCCGGACAGTTTGC -CGCAATTCGATGCTGTCTATGCCTACGTACCAAATTTCCTTAGTCAATTGTTAGCTTTGC -ATTTCCATTTTTCACCACTATGGATCTTTTACTACTTCTCAACAATATTTCATGGCCGAT -GTATTTACTCTCGATCACCCACTACGAGAAATCCAGTTTGATCCACTCCTAGCATCAGCA -TCAAGAACGTGTATGTAGTATCGCCAAATAGATCAATAATACCAGCAAGACTAGATACTA -CAGTTGAAAGGATTTCTTGATAGAAGTCTTCTTTTCGGGAGAAGCCAGCACTGTAGTCAA -TTACTTTAATTAGAATTTCTAAGAAATTAGTATGAGTGGACTAAGTCATTTCATTTGTAG -ATACGCTTTTCTTGACTGGCTGAAGTTCAATGTTTACCTCCTGAGTCACCACAATCCCCT -GCCAACTCGCATCCCGCTCTTCATCATTCGATTTCGAACTCGAACCGATATTTGAACGAT -GGGGCTTGGAACAGACATCCGCTGGTGCTACACCTCCAAGATCCTTATAGTACTGCTTGT -TTTTAGACCACCGCGTACTGCTTGACGGCGGATGGCTGGCATTTCCACTGGACATGCCGA -GGATTCTTGCCAACGCGCCACCAAAAACTGTCTTTAGGAGTACTCGCATCGTTGGCAGAC -AAGTACAGACAATGCCTGTTGAAACTTCAATGATAGACCACATGTATCCCTTGGCGTAGA -GCCATGTGGAGTCGTACTCGCCCAGGACAAGCATTGAGAGGAGAAGGCGGGTCATAGCTG -CAGCGCAATCACTGAGAACGACTGTATCAGCAAGGTATAATTACAAGTCATCTGCAACTT -TGAGATAGAAAAAGGGATAGACATACAAAAGACCCAATGCAAAAATCAAGGACAATGCAA -TTTTCTGTGATTTCGGAAGCTGAATATTCCACACTTTCGGAATTGGCATCAACCAAATCA -CGATATCAGTGATGATATTTACAATTGCATAGCCGATAGCAAATTGCTTAAAGTCAAAGC -AAGCTCCTTCCGTTGTCCAGAAGGCGCTTATCGGATTGCAGTTGAAAATAGCGAGGAAAG -TCATGATGATTGCCCACATAGAGACCAGAACGATCATTCCATTGACAATTCGCAAGAACC -TTGAATGCTCGAAAATGCGTCGGTACAAAAAAAGCAGTGCGAGCTTCACCGTTGGGGGTC -CGAGGACATAGAAAATCTGGAAGAAGTAGAGGAGCTTTGAGAATATAGCGACTTTTTCTG -GTAAGAGGATTTCTCGATGCTTGCCGAGGCCATTGATTCGACCTGAAAGAGCGTTAGGAT -GACTGGGATTTATAAGTAGCGTCAGAGCAATGGTAGTGGAAATACTAACATCCCACAAAT -ATCCCATATAGTATCCAGTCCCAGAAAAGGGCTACGCAAATGATCCAGTCATCTGCACCG -AGAGGCAACTTTCGCAGTCGTCGAGCACCAAAGCGAGCAACAACAGCGAGTGTGGCCAGT -ACAATGAAAATTGTGTTCACTACCCATAGAGCTGGTCCAGAGTCCTCCTGAAGGTCAGTC -ATATATGCCAGTGATTGTAAAGTCCCTGGTTTGTAAGAGTCAAGACCCTGAGTATATGCA -GAGGGCTGCCCACTGTGTACACCTCCATGTCGCTCATGTCTTAGACACCAACCATTCTCA -AATTTGATTTTCAAGGATGTTGGGGGCTGGAACGACGCAATTAAGCCATCCTCAAAGATC -CCTGCCATACTATATTGGCGCTAGTACGAAAGGATCAAGTCACATCAGATAGCGTGGGAC -TTGAGACGCTCGAGGTAACGATCGTAGACCCAGGGCCCGCTATTCGGCCCCCCGACGGCA -TAGCGGGTATCACGGCTATGCTGTACGGAGTCGATATATGTACTGTACGCTGTGATCTAT -ATCCCGTACCTAATTTAAACTCCCGCATGGTACTTTCGCTCTGTTTAGTGTAGATGTAAT -GTTGGGCCTATATTACAAAAAAATACAGCAGAATCCTTTTATAACCAAGAACATCCAGTC -TTTACAAACGGATACCGGTGCCCCCGATCGATCAATTTATCATAGGAACACAATTTCCCG -TCACAAACTTCGTTGATTGGTTCTTTTCTCATCGAAGGACCCCGAAGTCCGGATAATATC -AAAGTTCAATGGAGGTCTTCAGCCCTTCCTCCCGCGTGAGATTGAATTGCCCATAAAAGG -TTGATAAAATATGAGAGGCTAGTAGTTATAGGACGAGAAGCTATGGGTTATTTACTCGGC -GGAGTATGTTGATCATAACGCCACTGCACAGTGGAGATATGGCCCACTGAGGGTCGCACT -ATTGCTCTGTGCAACATTGGGGCCAAGAACTGGAGCTTTTGCCCTTCTGGTCGCACATTG -AGGGGCCCGTGTCGGAGGCGATGAAAGAATGTCCAGATGTTTTATTGAGCACAGGTCCGC -ATTCTACAACTCCACCTAACCTAACCCGAACTTGAAACCATTCAATATCAGCTGGAACAA -TGACCGTACCTCAACCAAAGCTATCCTGGCAAGATTCAGCTCGCCAGGTCCAGGTAGTGC -GTGATCTCTCTATAGAGAGCGTTGACCCCGCCATCGCTGCGCTTCCCGATACACATACGG -GCAGGGTGATTGATTTTCCCAGAAAGCACCTTTCACAGATAGAGATTGCCATCACGGAGA -CTTCCGCTGAGTCCTTGGTGGCTTCGCTTGCAACGGGGAAACTGACGGCAACCGTGGTGA -CAAACGGTTTTCTTCGGCGGGCTGCGATTGCTCAGAAGTTGGTGAGTGGAGCTGCAGGTT -GACAATGACCAACGACTCGAGGTTTGGATTCTAACCATTCATCTACTAAGACCAACTGTA -TCTACGAGCTTCTTCCTGAACGTGCCATCGCCCGCGCTCAGGAGCTCGACGACTATTTTA -CAAAGCATGGAAAACCTTCGGGTCCGCTTCACGGATTACCAATTAGCATCAAAGCACATG -TAGGCCTCAAAGGACGAGATTTGGCTGCTGGGTTCGTCAGTTGCCTAGATCGGGTGAGCA -AAGACGATGCAAATATTGTGAAGATTCTTTTAGACGCTGGTGCAGTTGTTTATGCAAGAA -CAACGGAGCCACAAGGACTGGTCAGTAGATATGAATATCCCGCTATGCTGAATTATTGCT -GATGCCTTGGTTTGGGGATAGATGGCTCTTGAAACCTACAGCAATATCACTGGAACCACC -ACCAACCCACATAATACTGCTCTCACCCCAGGCGGTTCTTCAGGTGGAGAGTCGGCCTTA -CAAGCCCTTTACGGCAGCCCTCTAGGAATTGGCTCAGACATCGGTATGTCGCCAATTTCT -CTGGATTGTTTTTCGTTGATTGCTAATATTATATCCAGGTGGTAGTATTCGCTCTCCCGC -CGCTAATTGTGGTCTCTACGGCCTAAAGCCATCAACCGGTCGACTACCGCTAATCGGATG -CGCTTCCTACCTTCTCGGGTGTGAGGCAATTATGGGAACACTAGGACCTATCTCACCCAC -ACTTGGCGGTGTTGAACTTTTTATGAAGACGATCGTTGAAGCGAAGCCTTGGGTCAAAGA -CTCGATGATGCTTCCAATTCCTTGGAGAGACCAGGAAAAGCACATTCACCAAGATAATAA -AAAGCTGACCGTCGGAGTAATGTGGACGGATGATGTCGTTACTCCCGCGCCGGCAGTGAC -AAGAGCCCTGAAGGAAGTGGTGGAGCGTCTTAAATTGGTGGACAACATCGAGGTTATTGA -ATGGAAAGCATATCAACAGAAGGAGGCTCTTGAGATTCTTGTAAGTGGTCGATATCACCT -CAATGATCGTGCTGAACTTACTAATGAGCATATATAGACCAGGCTTTATGCTCCAGATGG -TGGAAAGGCTTTTGTCAGTCATGCCGAGGCCTCTGGTGAGCCATTGACACCGCTCACGGC -TTGGACTTTGCGGGATACCCCTGGCATTGAAGAGCTCAGCCACCAGGGCTTGTGGGATTG -GACTGGCAAACGGGAGATGTTCCGCTACAGCTATTTGCAAGGTTAGTACACCAATCATTC -ATGATGGATCTCTCATTTCTGATCTGACTCGGCTCCATACAGAGTGGAACAATATTGCTC -CTGAAATGGATGTCATACTCTGTCCCGCATTCCCCACGCCCGCCCCCCTTCACGATACTT -CGAGGTACTGGGGTTACACTTCTTTATTCAATCTTCTCGATTATTCAGCCCTTGTGTTCC -CTGTCACCAAGGTTGATCTTGAGAGAGACGCCAAGGACACCGCTTATATTCCGAAGAACG -AATTTGACAGCTGGGCATATGAGAATTATGATCCGGTCAAGCAAAAGGATGCACCTGTCT -CTTTACAGCTAGTTGCTAAGAAGTTCGAGGAAGAAAAGCTACTCCAGGCGTTCAGAGAGA -TTCAGGAGAAAATTGGCCTGCCGTTTGTGGATTGTTTGGCATGAGTATGTCTCGGGGATA -TCGAATTGATTTTAAACAAGATGCCGGATTCATGTTCTGTGCAGGTCCATCTGGAAGGAT -AGACACTTTTCTAGGATAAATCCATAGGTCACCCTTATGGAACTATGAAGATTGCGTAAG -GGGGTATATATCTGCGAGAGCGGTATCATATGATCCATGACCTCTTGTAGGCTTCTGAGG -CTTGTACTCTGTGGTATATCTCATGTGAATAGATCATTCCCAGGATGATCCTTATCAGAG -AATTTCCTTCCAACGAAGAGTATATAATACATTATTTTGAACTTTTGTAAGGCCTGAGCA -ATTAAGCTTGAACCAGGTCTCCAATATTATATTACCCTGAATCAATACCCAGCTACATCT -CCAACAGGCAACGAGAAGTGCCGACTCCAATAGCCATAAAGTGGACCAGTGCTCTCCGCA -TTTGTGGAGATCCCCGGATTTCCCTCTGACTCGAGGAAATGAGAATATCGTTTTCGACTT -ATCCCGACTACTTTTCCAAACCTTCAACGCTCAGCCAGTCTGTTTTGCAGCGTTCTCCGC -ATGGTGTTGCTCATTCGCTCTTTTTCCATCCGACCCCACTGCTATCCGGCCAAACTTCGG -ACATTCTAGCCTTTTCGTCACAGATCTGCAAGACTAGACTCAATTCACGGCAAATCGATT -TACCGTTTCATGTTCCGTATTGCTACGTGATCGATACAGTCAGGGGAGCAGCGACCATGC -CTCGGGACAGATATCAAATATTAGAGCCGCTGGTCTGCAGCCATCTCAGCGTGCTCGAAT -TCATACAATTCCAATTCATACAAGTGATTTGATATCTTCGACGGCCCCTTCTCAATATGA -CTCGTCTTCATGCATTGTCTTTGTTGGCAGCCCTCAGCTCGCTTGCTCTGGCCAGCCAGG -ATGCTTTTAAATCAAAATGTCTAAAATTCGGTGATGAAATTGACATCCCCAATGTGAAGG -TCAACTTTGCCGAGTTTGTGCATGGAGATACCAATTTGTCCCTAGCGGACAATCCGCCTA -GCTGTGGTACATTCAATCAGGCCGTCTCGGTAGATCTATGCCGAATTGCTATGGCTGTGT -CGACCTCCAACAGTAGCGAGATCACGCTAGAGGCTTGGTTCCCTCGTGAATACAAGGGCC -GTTTCCTAAGCACCGGAAACGGTGGTGTCTCGGGGTGTAAGTCTTGTATCCGCTATGATT -CTCAATCCTGCTAATTTTGATCCAGGCATTCAATACTACGACCTCGCATACACAGCACAT -CTAGGGTTTGCTACCGTTGGAGCCAATAACGGACATAATGGTACCTCGGGGAAGCCGTTC -TACCGCCGCCCTGAAGTAATTAAAGATTATGCATATCGCTCTGTCCACACTGGAGTTGTA -ATAGGAAAGGAGCTCACCAAGCAATTCTACGATGAGGGCTTCAACAAAAGCTACTACCTT -GGATGTTCCACTGGTGGCCGCCAAGGTTGGAAATCTGTTCAAAATTACCCCAATGACTTT -GACGGGGTGGTTGCCGGCGCCCCTGCTATCAATCTAATTAACCTGTTTTCATGGAGCGCA -AGTTTCTATTCTATCACTGGATCCCCAACTTCCGACACATTTCTCTCCACGGCTGAATGG -AAGATCGTCCACGGAGAGATCATCCGACAATGTGATACCATTGATGGTGCCAAAGATGGA -ATCATCGAGAATCCCGATCTCTGCCGTCCTGTCTTGGAAACATTGACGTGTGACCCAAGT -GCCTCAAACAAAACAAGCTGTCTCACCAGCGCTCAAGTCACCACCGCCCAGCAAGTGCTA -TCGCCATTGTACGGAATCAATGGCACCCTGTTGTACCCACGTATGCAGCCTGGCTCCGAG -ATCCTCGCTGCACCTATCATGTACAATGGCAAACCGTTCCAATACAGTGAAGACTGGTAC -CGCTACGTCGTCTACAACAACCCGTCCTGGAGCGGTGCCAACTTCACAGTAAATGACGCC -TCCCTCGCTGTCGCCCAGAACCCATACAATATCCAAACTTGGGATGGCGATATCTCCTCA -TTCAAAAAGGCCGGCGGGAAAATCCTACATTACCACGGCCTCCAAGACCAACTGATTAGT -TCCGAGGACTCTAAGATGTATTATTCTCATGTCTCGAACACCATGAAACTTCCTCCTTCC -AGATTAGATGAATTCTATCGCTTCTTCCCGATCAGCGGCATGGGCCATTGTGGAGGTGGC -GATGGTGCCTATGGTATTGGTCAGGGACTCAGCACTTACGGGGGAACAAATCCCGAAGAC -AATGTGCTTATGGCAATGGTTCAGTGGGTCGAGGAAGGTATTGCACCGGAAACCGTTCGC -GGTGCCAAGTTCTCTACCGGTCCTGGCTCTAAGGTTGAGTATAAGCGCAAGCACTGCCGC -TGGCCTCGACGCAATGTGTTCAAGGGTCCTGGGAACTACACCAATGAGAACGCCTGGCAG -TGTGTTTAAGCCTGGTTGTCACTTTTTGTGTGTAGCCCTTTTGTCAGATAGCTGGTGTTC -TTATTGAATAATTGCCAGGTTTAATTCAAGTTCGATGAACTCCATCCGGCTCATGTATGT -AAACAACCATAAAAAGACGTCATATCGGCAGAAAATAAACGGAATCCATCGGCAAAACAA -CGCGCTCCGCAACAGTGGCAGACCGCCTAGCAGACTGCCCGGCAGGTTTGCAGATCAGTT -GAGCTGTGTGACCACTATAATTCCGGCTCATTAGTTCCCCTTTATACATCACTCTTGTAT -GAATCATTGCAAATTCTGAAAGACTACAATATATTGCATTCTCCACTGGCTCGCGCACGG -ATCACCATTTGAGGCCCCCAATCGAAGCTACCCCTCGGATCGCGTATATAGCGGCGACAA -TCCCGCGCTCCTGCGTGTCTTAAGCTCACACACAAACCCTCCGCGTGATTTCTCCCAACC -ATCAGTACAATCCTCCAGCCCCAATTGCAATGGCAGACCCCGAGATGGACCAGTTTGCAC -AGACTCGCGGCGCCGACGATCTATTCGACGATGAGATTATCCCTATCTCTACGGAACAGC -AACAGGCGCAGACAGAGGCCATTCCCACTCCCGAGCCAGGATCAGAGGTACAGGAAACAC -ATATACCTGAGAAGCCAGTTCCCGAGCAGCCAATTCCACGTGGGGATACTCCGCAGCGGG -GCCGTGGAGAACGAGGCCGAGGACGACGGGGCCGGGGCAAAGGAGCGCGCGGAGGACGGG -ACTCTGAGCAAAAGCGATCAGAAGGCTCTCCGCGTAAAAAGACTCCTGTCAATCCGCCTG -CTGTGGATGCGCTCGAGGCTGGCGCTTCTAAGCCAGAGAAGCCAGTTGAGACCAAAGAGC -AGGCCGGCTCGGTTGAAGAGGGTAATGGTGAAGATGTGCCTGCGAGTGGTGCCGAGGCCC -AGCGTGTGCCAGCTGTTCGTGGCGATCGAAGTGCCACTGGTGGGCTGAGAAAGGTGGGTA -GTCCTTCTCTTAGTAATACAACGAGAGTACCAAGCTGACTGTTTAATCCTAGCCTAAACT -CACGGAAGAGGAACTATCCAAACGCATTGCTGCAGCCAAGGAAAATGCTGTAAAGAAAGC -TGCAGCACATGCTCGCGCCGAAGCCGACCAAGCATCATTTATGGAGCGCGAGCAGGAAGC -AGCGAAGAAACGTCGCGAGGAACTCGCACACCGTCGTGTGATGGACTCCGAGCGTGAGAA -AAACAGACAGCGCAAGCTCAAGGCCCAAACAGGGCGCGAGTGGGACTCGCAGAAACGTGA -GGAAGATTACGACCCCCGCGGTGGAGGTAGCCAATTCCGACGCGGTATGCACGGTGGCGT -GTCTGGCGCCGTGCGACGAGATTTCGAAGAAGTCCGCTCAGAAGATGCGGCAGATCACTC -CGGTGGTAACCGGGGTCgtggacgtggcggtcgaggtggccgtggccgtggaTCTCCGCG -TGCTCCCCAGGGAGATCGGCCACGCAAGGGCCTATCTGACAGTATATATGCAACACCGAG -CCCTCCTGCGCCAGGATTGGACGACCAGGATGCATTCCCAGCTCTGCCCGAGGGACCTAA -GAAGACGGTGACTGAACCTGTCTCTGCGCCGGAAACAAAGGTTAagcccgaggctaagcc -agcccctaagccagagccagagcagaATACTAAGGCTGAGGCTAAAAAGGAGCCTGAAAC -CGCTGCTGTGGTATCCAAGTCACAAACAGCAATGGACAAACTGGATTCTACGTTCTCGCC -AATCACTGGAACTTGGGCAGACCAGTTTGACGATGAATGACGGAATGTTTATGGAATATG -AATCTCGATAGTTCTGGTTCATTGGTTTTCACGATTTTCCTTTTGCATTCATATTACGAA -AAGACTCTTGTTTCCCTCACTTGCAATTTGATATCCCGCTTGTCCTAAAGCCTTTATCGG -TGGCTGAGATGGCTCAGATGCATAGGTTCTCCAACATTACATATACGCATATTGATACTC -ATAGCATACACTACTTTTATGAATTGTAACATTAGTTATCTTTTTATTTATTGATCTTAT -ACAGCATTGTCGGCCATGTATTCGGCCTCTGTCCTCAAGGACGCATCTTAATAATCGATC -CATGTGGCTGAGAACGATAGGACGAACGTTGTGTTCAGGCTTATCCTGAACTGCCTTTTA -AATCTTTGTTGTGATTCATTCAGATCTCTCTTCAAGCGATCTGGTGAGCTGTAAGTATTA -TGAGTGTCCACAGATGAGGTTTCAAACCGGTAGAAAGTACTACTCTACTTTCTATCTTGG -TATTCTCACCAAGTCCCTGTAAGTTGATACCGAGAATAACTGCTTTTGGGACTACATACT -TGAACGCATACTGCATCACTTAAGTTTCCCAGAACTAGATGCGGAGGAGCATACCGCAGA -AATTCTTCCCAAGAGGTAAAGATCAACTGAAATGTCTTGATCATCTTACTGAATTGATAC -GTGGTTCTACCTTCTCTCTATGCTCAGTGGTTGATCTGGGATGTGCCTTTCCAAGGTCTC -TGACATACTTTTGCTCTGTGCTGATGGAACGAACAAGATACATGCTGGCTTCGTTGAAGA -CAACTACTGTTGGCAATGTGTGATAAAGTACGAGTCGGCTCGCTCATACCCTTGACTGTG -CTCGATTAGAAAATAAACCTCCTAACATCTTTCATCGCCTAGCAACGACAAAGTCGCGGC -TGCCCAATCCTCTAGGCATGGTAGACTGAGCCTTGAGAATCATAGTCATCAAAGTCACCA -TTTGAATGGTCACACACTCCATGGCTAGTTGAGTCGCCTCTTGCATTTGACCAAGATTCT -TGCAGGTCACCGCAAGCTCGACCATAGCAAGCACAGTATCCACATCTGTGGCACCCAGAG -TCCTATTGCGACATTCAAACACTTTCCGATCAAGCGGTTCTGCATCCAGCCACCGAGATT -GGCGTCGGTATGAGGATGCAAGATTGCTCATGGCCAAAAGCGTTCTGGGATCCTCTTCCC -CGTGAACTCTATTGAAACCCTCATGACAACGCTTAAATAGTTCCTCGGCCTCTCTGTATT -GGCCTTGAACCCAATAGGAGAATGCCAGCTTATTCGTAGTACTAAGAGTCCTTGGATGAT -CTTCTCCAAACACTCTCATGAGGGTTTCGAGTAATTGTTTCAACAGCGCTTGGGAATCTG -CCCAATTCTCCTGTTTGCGGTACACAACATCAAGCTGTTCCATGCTATTTATTGTGTCAA -GATGGTGTTCCCCAAGAACATTTCTGCGCCCCTCGGCGGCTTTCATAAACAGTGGCCCGG -CTTCTCCGAAGCGCCCTTGACCTAAGAAGTTCATTCCAAGGTTGTTCATCGACATCAAAA -CATCGGGGTGCTCGTCGCTGAATAATTTCTTCCGAGATTCCAGAACGCGCTTGTGCAGCA -CCTCTGCATCTCCAAATCTTTCTTGTTTATCGTATGACCGAGCTAGATTGCTTGTTGTGG -TGAGGTTGTCTGGATCTTCCTCCCCGAAATCGCGGCGCATTTTGAGAACTTGCATGTGAA -TATTTTCCGCCTTTTTCCATTGACCTTGCAGCCAGTAGTTATTCCCAAGGTTATTCATTC -CGTCTAGGGTATCTTGATGTTGATCCCCCAGAACTTTTTTTCGGGTAGCAAGGACTTCCA -GGTGAAGTGCTTCCGCATCCTGGAGTCGTTCTAGATTGTCGTAAGTTAAGGCAAGATTAG -CCATGGACGTCAGAGTCTCTTGGTGTTCATCGCCGAGGGTCTCCCTGCGGACCTTGAGCA -CATGCTGGTTTAATTCTTCGGCGTCTTCCCATTTGCCTGTTTGCATGTAGACAAGTGCTA -CCCGCGAAGCACAAGCAAGGCTCTGCGAATGTTCCGCTGGCAGCACCTTCTGTGTTTCGC -TCCTTGATTTCTCTGCCATTCGCTTCATTTCTGCCAGATCTCCGCTTCTCCATGCATACC -ACGCACTTTTGTACAGTAAAAAGGCCCAATCCATGATGGATTCTTTTGAAGCTGGACGTT -TTGACATGGACGCAGCGGCTTTGACATGGGGGAAGAGCGCCTGGTACTTCTCCCAACACT -CATAACCACCACCGGGAAAGTCGCGACAAAGATTGTGAACAAACCTTGCTTTTGGTGCCT -CAATTCCACCATGAAAATCAAGCCAGTTCCGTGTAGCCAGTTGAACCAATGCATGCATCG -CAAATATTTTCTCTTCCCCAACTGTGATGAATGAATAGTTTCGCAATAAAACAAGGTCAT -TCTCAAAATTTTCATCATCATCCGAGTCACTGTCCTCGCTGTCATCTTCATCTGATGAAT -GATTTTCCATCGGATTTGAGGCTTCATCCTTCTTGTCGTCGTGCAACACATATTCAGGAA -TCCCTTGGCGATCGAAGAAACTCATCAAAGACAAAAGATCAGCTGCAGATTTCCTAGCTT -GCTGAATATAGTCAAATGATATCTGCCACGTTATGAGGATAGAGTTGGAAGCTTCCCAGT -CACGCTGCGGGTTTCCACTCTGATGCTCAAGTAACTTGATGCGCCTTCGTTCACTCTTTT -GGAATTGATCGAGATACTGTTGGATTGTACGTCGAGGAGCTCGATGTGCCATGTACGCGA -CAGCCTGAACAATAGCCAGAGGCATGTATTCGAGGGTCTCGGCCAGACTCTTGATATCTT -CCTCACTTTCCTGCAGGAGGAGCTTTTTGTGTATTAGTGCTCGGGCATGTGATTCATCAG -GCTCAATATGAACCACGTTGCTCTCATTGACGATTCTCCAGGCAGATTGCTTATTCCGGG -CGGTCAGAATGACCGATCCATTTTCATATCTGGGGATGTAAGTGATCAAAGGATGTTTGA -TGTTTTGATCTGGCGATGGTCGCACGTGAAGAAAATTATCGTCATCAATATTATCGAGAA -TCATGATCCATTTGCCCCTTTTTGAATCCAGGAGCCAGTCGTGAACTAGCTCGAAGACGT -TTACATTTGGGTCCTCGTAGCCTGGGATCTTGGCCCGTCTGCTGATGTCTTCGTAACTTT -GTTTAAATCGGGCGGCATTGCTTGCGTGGATCCACAAAATCCAAACATCGGGGGATTTCT -CCCGCACTCGATACGAATATTCAATTGCCAATTGAGATTTTCTTATTGCCTCTCAGTATT -GGAATCGACGAGGGTTGATAGGGACTTACCCGATCCCACCAAGGCCTACCAGCGCCACTC -GCGACGAAGGGGCGGAGCACAATTGATCAATTTGATCAATCTGACCTTGTCGATCGATGA -AGTCCGGGTCACGGATAAATGGGACCGTGGAGAAAGGTAGAGGTGGCGTCTCGGGTGTTT -CTGCCCGGCTGTCAGCAAAGCTGCTCTCAGGACTGTTCACCTGCACTAACTCACTTGGAG -GCAGATGTAACTGAGCGTTGATTACACCGCTGTTAATGCCCACCTGTATCCCATGATTTC -CATCGCCAAAAGATATATTTGAGGACATTTTGTAGTCGGTTTGCTCGTCAATGTCGATGT -GCTCTAAGGTGCATTCTAGGGGATCATCGGCCCCAACCCCACGTGATGGGTCAAGTCAAT -GCCTAAGACCACATTCCAGCATTCCCAAGTGTAAGGGTTCGTCATGAACGATGCAAAAAT -GGTCATTGGTGGACTTTTATGGTTTTCCGGAGTCTTTCAAAGATGGGAGGCACTTCATTT -TACTCGAAAGCGTCTCGCCTCTAATCCCAGACTTTTGATCCATTCATCTCTCCTAGACGT -CTAGAAAACAGCAGTAGCAGAGAGAATACTAGGTGGCTACGATTGCTAAGATCACTGCCT -ATACAGGCTTGTATATAAAAGCTGGAACATACGCAGAAAAAGTTGTAAAACAGCTATTTT -ATATGTGTACAATAATGGGCTTTGGCCTTGTGGGGCCCCATTGATAGGTTCTGGGTTGGG -TTGGGTTGGGCTTTGCGGAGGTCTAGTCTTCGGGCAGCCATTGTTCTGCGAAGATTTATC -TATAGCAGATGAGACCTTTCCAGGAAATCCAGGCATTGCTCAGTGTAATTTTCGAGGTTA -AAAACAAAACGTCCGAGTTAACCATATCCAATCTCGATTGCTCATACTCCGTATAGAATC -CGAAGTGGATATGATTGGAGGAGTCTCGCGTCAACTGGCAGGTGTACAGGTGCATGCAGC -CAAGAAATTCTCCGAGAACAAAACGGCTATACTGGATAAATTGTATGAAATGTGATTCAC -GTAGTGTCTAAATCGAGGGCAATAACATAGCACAAATTTGGTCTCCTGGATGAGGAGAGG -CGATAAACCATCCGTAAACATGATACGCAAGTTAAGTATAGTATAAAGACCATTAGGTAT -TGCGGAAATCCGGGGATCTCCAATTTGGAGTATGTTAGCTGCATAGACAATCCGGGAGAG -CTCGGTTACTGAGTGCCGGTTGAACCGAATGAGTTGACTAATGAAAATGTCCAATTTCCC -CACAAAGACTAAGTGGAGTGTTAAAGGGTTCTCCTATTCTTCTTTGATGGCAAAATATAT -ATCGAAATGGTTTCAACGGAGAAGACAGATGACACCCAACCACCTTCACCCAACTTGGTG -CCCGAACTTGAAAACGAATATGTACTCCCGGAAAAAAGAGTTAACGGGGGCCTTGATGCT -TGGTTAAATGTCCTCGCGGGTTTCTGTGTTTTTGTGAACTCATGGTTGGTTGAGATTGTT -GCCATATTCCCTTTGTATCAAGCTAATCTTTGCTACAGGGGCCTGTTGACCACTTATGGC -GCGTTTCAAGAGTATTATCAGACAGTCCTTCTATCAAATGAGACACCATCCACAATCTCA -TGGGTGGGCAGTATCCAAGGCACCCTAATTCCATTAGTCGGTCTTGCTACGGGGCCTTTA -GTTGATGTCGGATACTTGCGCCCTCTAATTCTGGCCGGAAGTTTCTTGACCGTATTCGGA -ATGATGATGACCAGTCTCGCCACTTCCTACTATCAGGTAAATCTACAATCTAACATCAGC -AGTATATCAGATTCTGACATGACCAGGTATTGCTCGCACAAGGATTCTGCGTCGGAATGG -GAGGCGGCATTTCCTACATCCCAGCCCTCGTCGTGATCTCCGCGAGTTTCACCACCAAAC -GCCCAATTGCCATCGGCTGCGCTTCAATCGGGTCTAGCGTGGGGGCCGTCATCTTCCCCA -TTATGTTCCGCCAACTGCAACCTAAGATCGGATTTCCATGGGCTGTCCGTAGCATCGGCT -TCATCAGTCTATTCCTTGCTATTATATCATGTGTAATCCTATGTCGAAAGCCAGGCCAAA -GAACCCATGCAAGAAGCCTTATCGATTGGAGTGCCTTCCGCGAGCCCTCGTTCATGATGT -TCTCGCTCTCACTCACATGTGTCATGCTCGCATATTACGTGCCAATTTTCTATATCGCCT -CCTATGCCCGTACGGTCGTGCATACCAGCACCGATATTAGCTTTTATATGGTTGCCATTG -TCAACGGCACGTCGGTCATTGGTCGTGTCGTTCCATATCTGCTTGTCGCATATATCAAGC -CCATTGGGATATTAATATTTGCCGTGGCAGCGTCGGCGATTGCGATGTTCACATGGATCG -CGGCTACCGGTACGGCTGGTTTTATGGTCTGGGCATGCTACTGGGGAGCTCTTAGTGGTG -TCTTGGTCACGGCGCCGACTTCCATTGTGGCGCATCCGATTTTCTGTCCGGATTCGAGTT -TTTTGGGGACCCGTTTGGGCATGATGTGGGGGATATCATCTTTCGGGTCGCTTGCTGGGA -CTCCGATTGCGGGTGCTTTGGTGAATCTTGAGACCGCGGAGTTTTTGCGTGCTCAGGTTT -TTGCAGGGTGCATGATGGTTGGTGCTGTCTTGCTGCAGGTTTGGCCAACTTTCAGGGTGG -CTCGGTATGACCTAGAGCATCCTCGTAAAAAATAAACGATACTAGAAATTGGAGTGTAGG -AAGAACTTATTGCATCAACCCAAGTTAAGTAAATATGATTGAAGTCTATGTAAGACCACT -ATCTGAGAAGACTGGTAAATGACAGTTTTCGGGAGAGGGCTACTAAATTGTCTAAGTGCA -TCGGGCAAATAGGATATTAGATGGCTGAGTATTGAACACAATTTACAGACAATATATGAA -CTTTGGCACAAACATGACAAGGCCACATAAGCAGCCGCCTAGAAGTCTACGCATTCACCG -ATCAATATCCCAGGAAGAAAGCTTTCATACGCTTCTTTTGTCTTTGCTTGGTCCTGAAAA -GCGCTGTTCAATCCGCCATGCCCCTTAACTCAGATATTTGTATATATCTACATATGTGAT -GCATGTACCACCCACACGTGGAGCGGAAGTAATATTTTATTCTTGAGAATATGAAAGTTT -CAGTGCTACCACTGCCATAAATCTATCTATATAACTCCGTGGGGACGTGACTTTCTGGGT -TGCCAAAGGCCACAAATTGAAATGTTCTTACCAAATAGGTTGGCGCAACGGTGGGAAGAC -GAACCAGCTGGCAGATCAAACAGTTCGCTATGTATACAAACCAAACTTAGATACAATCAC -CCCGTCCATCAAAAACAGGGTATTCATGATCTACGTGATGGAAAAATACGTGATTATGTG -CCTCGATGATACGCCAATCAAAAAAGGGCGATGTATACTCATTATGACATCATCTGGGAT -CATGGCAAGGAATGAGGTCAAGGCCAAAAGAACATATATAAATCGAATGAACGAGTAACT -TGACTTAAATCAAATACCCATTACCTGTACAAATTCAACCACAAGAACTTATCCTCACCC -ACAAAAAGGAAATTTATCAACACTTGAATCGTGAGTATACAAGTTTCTTGTTACAACACC -CCATCCTACCATCCACTCCTCTCATAAGTCTATCATCTCACAGCATATTCGCTATCGATA -TGGATGATTTCGCTGACAGCCTCACGTGTAGTCAAATCATATCAAAATGGGTATCGAGAG -TAGCCTCGACCAGCAAACGACTGCCGTCAACGGAGGTAGCAGTGCCCAACCCACACCCGC -TAACTCCGACGAAGGATCGGGATGGGGCGACAGGCTATGGCTAGGTGGCAAAAATGAGGG -ACTGGGTCATGCAGATGTACGTGGTATAGTGTTGCCAGGTGAGCTTAATGGCAGACGTTG -ACTAGTCTCTCTCATATCGCAGGCTCCTAATCCGAAATCAACAATGGGCGGCTTCCTTGC -TTTAGATGAACAGAAGACGCGTGGAGAAAGCAAACTTTCTGAACGTCAGAACTTGGTCGA -CTTTGAAAAAACCAGTGTTAGACCTATAAATACCGAAGGGGAGCCCACTTGCGCCAGTGA -GGATCATTCTTTCATGAAGCATAAGGCTGGAGATCCGGATACATATCCCGGATGGAGTAC -AGTTAAGAACATTTTCGGCAGGTGAGTTCCCTCGTTTATTTGCGATACATAGCCTGATTG -GTTCTGATCAAATCCAATTTGTCATAGCGGATAAAAAAGGTTATACTACTAGTATGAAGG -TGGATTTGCTACACTCACACGTATCTTAATTGTAAATTGAAATTTAACATGTCGAACGCG -TCTTAAAAAATTTAAAGCCTAAATGTAATATTTTATAATATCTTACACGAACAAATTCTG -TTCGTATCTAGACAAGTGCCATCAGCCTCAGAATATTAAGCCTAGAGAGACGATTTGGCC -TGAAGTTTCTCAAGTTCACCCAACGCGCATCCAACGGCATCAACTGCCATTCCATATCCA -GCTTGGAATCCAGTTCCCCCAGCGCCGTAGTTGTGCACAACGAGGCCCTTTCGCTCCGAA -CCGTCAATTTGCACGACTTCACTGGCAACTCGCGCGCCTCCCTCTCGTGATGGGCGCAAG -CCTGCAAATGCTGCGAGGATTTCTGTATTGGGTGACTCAAGCTCGGGTAACATTGCCTTG -GTACGGGATAATATAGACTCGGTCTCTTCTCCAAGTGTATCAGGCAAGCTATGACTGTTA -GTCCTTGAAGCATCCACAGAAAAAGTATGGATCTATTCGACTCACCCGACTCCCTTCTGC -ATATATCCACCGAGAATGACATTACCCTTCGAGTGTGGTCGTGGGATAATATACGTTTCA -TAATCCTTTCCGTGTCTCATCACATTTTGATCGATGTGTGTTGCCTTCGCAAGCAGAATC -TGGCCTCTGGTGGGGTAGCACTTAGGGTCCTCGACTCCCTTCAAGGTGCGGGCAGCGTTA -CCGGTGCAGTTAAAGACCACCTTTGTATCCTTGCTAAGGAATCCTGAGGTGACACCCGAG -ACTTTTTTGCGGATGACCCGCACACCGTACTCCTGTCTGAGTCTTTGAAGGAGGTAGCGA -ATGTGCATTGGGGCATTGAGGGTTACTGTGGTGAATCTGACACCACATGCAACACCCTCT -GGTAGGTCCCTCTGGGAAATTTCTTTGTACTGTAGATTTTGAATATTAACAACTTAATTC -GGAGCACATTGTTGGTAATCGACTCACATCCTTCAGATAACTGGCCATGGAATCAAGCTT -CGTGCGCGAAGGCAGTTCATCCCAGTACTCGGTAGAAGGAGTTTCCCGAACAAATGCCTT -CATACTATCTTGCGCTGCAAGTTTCAGAAGGTGCTTGTACCCGAGTTGATCCCATCGCAA -GGCATTCTTGTCACTTGCAGAAAGGGCAGAGAAATTTGCGCCAGCCCTAGATTAAATTAT -TATCATTGAAAACACATTCGATGCCTACAAATATGAATAAGATCGAGCAGACTTACCATG -GCGAAGTATAGTTTATGGATGTATCACCCGGGAGGTGCTCAGCTATGATCGTTGTGTGGC -TGGCATAGCCCTGCTCAGCAAGAACAAGAGCAACATTCAGTCCAATGATGCCAGCACTTG -AGAAAAAAAAAATGTTAGCTTGAATCTGCGAGCTGCAGTGAAGAAATTCTGTTTTTGGAC -ACGCACCCAATGATTACCACAGATTCGCTCTTCATTGTGACTAGCTTGGTGTTCGAAAAC -TCAATAAAATGACTCGAAGTCAATTCGTCTAGGGGACGCCTCTAATTTAAACCGAAAGAA -ACTAGAGCTTGTCAGTTTCTTCGGAGATCTCTTTGACTCATTGAATTTGATATTCTTATC -TTATGTCTAGCTTGCAATACAGATGAGAATCCACGAGTAAACGAGCGCCTATCTCGGAGG -CGCAGGATTAGGGGACGCTCCGATTGGCGGCATTCGGCTGTGGGGTATGCGGCGTAGCTT -GTGATTGGTGCCCGTGATTCTGGCGTCGCCTAAATTGGAAACGTGTGTATATCTCGGCTG -ATAAAGGACTTGGGGGCTGGTTAACAGCACATGGCACTGGGGAAAGTGGACCCCAATTGA -AGGGTTCATATCTCGCAAATTTCAGGATTGCCAAATTTGCGGTGTCCTATTTGGGACAAT -CTTGGTGATGTTGAATAATTTGAAATTGAAAGTGAAGTAAGTTAAAACATATTGCTTTTG -TTGAAAAAGCTAGAGAAGAATTCATGTATAGTAATCTACAGGACTTTGAGGTGAGAAAAT -CTTGCAAGGCTACAAAACAATTTGAGAGTGGTCTACAAGAAGTGCAGGCATAAGATTTAC -CCACTGGGCGTGACAATGCATGGTGATTTGTGGAGAATAGCTGTAAAAATTTTCAACCTA -AGCATCTATCTGGACATGGTATAAAATAAGAACGAGTGTGAGAATAATGTGTGGAAATCT -ATTGATTATATAGGCTATTTGACAGGAAAAAATGACTCCTATGCTTCACTATGTTTCCCA -TAGTCAGCAACTTGTGTAACATGCTGTCCATTTCCCTCAGCTGGAACTCCGTCGAGTCCT -GGGATAAAGGGATCATCTAAGTCACTCTTTCTGAAGCAAAGGAAAAACAGACATCCCGCC -ACGAATCCCACAATTGCGAGAGATCCATACATCCAAACAAGCTTTGGATCTTCTGAAACT -GGACTTAGACCAATGCAGATAGCCGCCGCAATGCCGATCATAAGCCAGAAAAGTGCGGAG -ATAAAGGATCGCAGGCTAAAGTAAGATGTGGTTAGTTTCAGTTGGACAGTATCATGGAGG -GGTTGGTTTAACTCACTTCTTCGGAGCATGAGTGAAGGCAAGTTCCAAGCCAGTAACAAT -GACAAAGGCTTCGGAGAATGCGACAAAAATATACGCAGGGGCCTGGATCCAGACATGGAT -GCTTTTTTCCGGGGAGATGTAGATGTAATGCTGTAGCACGCTGGCGTAGACCATGGAGAT -CGAGACAAGAAGGAAACCGGCGAAGATTCGGCGGACTGGGCTGAAGTTGATGTTGAACTT -GCGAAGGACTGGATAAAGACCCACTAGAGATACATTGTCAGCGAATATTCTCCGGTATCA -TCATGTCAATACTCACAGTCAAGAAGTGGGATGAAGATACACAAAGCAATCGGGTCGAGG -TTTTGCAGAAGATCGTTCGGGGTTCCGTGCAACGCCATTTGTCCAGCTTGAGAGATAAGA -TTATTCCAAATCTGGACCCAGCAGACGAAGTAGAAGGGGAAGAAAATAAACATCTGTAAT -AGTTGTCAGTATAACTCACTGCAGAAGTTTTGAACTTCGACTTACTCTACAGCTTTTAAA -GCCTCGGCGGACGTCCGAGACGTACTGATCTGTGTAACGTGGCGAAGACGCAAATGGATA -CTTGCTCAGACGTCCGGAAGCCTGTAGGGATGATGGCTTTGCGCTTTCGAAATTCTTCTC -CTGTCGAGCAATGTTCATGACCTTACATGCATCAATAATAGCAGATCCCTGGGGAGGAGT -CTTAACATATCGCTTCTGCCCGGACAAAAAGACGATACCCGAGAGAACGATTGCGCTGGA -AGTCATTAGTGGTGTAACTGGCATTCTTCTCATTCGAACTTACATCAACGGGATCAAATA -TGCAAGCCAGAAAGAGTGCTTTGCCTCAACATTGACGGTAATCAATGGGGAGAGGGCGCC -AACATTTACGACCCAGTAGAACCACATGAAAAGACGCTGTACGGTCAATTCAGGATCAAC -AACCACACGCTCGCCTGATTTGAGAGTTTTCAGAACTGGCTCTGCGTTTTGGTATTGCTC -GGCGCACATGGGAGTCACATTGGCCTTGATTCCACCTGTTCCCAGGCCAATGACGACCAT -TGCAGTAACCAACCCGCCAAATCCAGCATCACTCTTCAGGCCAGCTGGTGTCGCGGTGGC -GACCAGGATGGTCAACCCGACAATATAAATTCCCAGAGAAATTGTAATGGCTTTGAATTT -GCCAACATATTGATCTAATGGACTGTTAGCAACAATGCTGCAAGATAAGATGGAAACCTT -GTTCTTACCTGCAACGATTGCGCCGATGATTGTCGAGGCATAAGCCCAAAATTTGAAAAA -GTTTCCAAGAGCAGATGCAGTAGATTGTCCTCTACCTAGTGCACCAGGGAGACCGGAACC -AGGATCGTAGGGGTTACTAGACACAGAATAGTTAGAAATTGCTCCCAAGACCTCCACTAC -TTGTGGAAAGGGTTAACGTACTTGATATAATTTTGCATAGGACCGCTAAGGCCAAAATAA -GTGAACCGTTCGCCAAGCTAATATCTCAATGTTAGCGTGCTTCACCACGTGGAAGGTGGA -GCAGCATACCTCGACGACAAGAATTAACAATGCGACACGTGGGAACTTGTCAGGAATACG -CCGGAGTCCAGCATAAAGATCATTAGGAGACTGGTCAACTTTGTCCAGTTCCTCTGAATT -TCTTCGGTTATCGACTGAGACAGAATCCATCTCGGATATACAAGGTAGTGTGTAAAGTAA -TGTGTGAGAAAAATAAATCTATGTCACCGAGGAACGGCGTTTTAATTTAACCAACTCGGT -CAGTGGAAGGTTCAACATTAGGAACGAATCCGGGGTGCAGGATCCACCTTCGACCCGCAG -ATAGGAGGACCCCCACACCAATGCCTATCGCCTTAGATCGGAGCCGATACTGTGGCCGAT -GCCGCCAACCGGAGTCCTGCGTTCTATCCGAGGCGTATCGAGGGCTAAGTGACTATATTT -ATTAATGCATTTTTTTCTTTGATTTGTTTTGACCGTTCGCTTAAGATTGACCATTTACCA -TCTTTTTGCCTCTAACGTTATTTCTCTATCGCAAACCTAATCGCTCCTTTGACACATAAC -AATGGCCAGTACACAAACACCAGTGGAAGAGAAGGGGCGTTTGTCTATCCCAGACAAGCT -CCTGAAGGGGTTGGATCCCGAATGGGTAGAGCTATGGGAAAAGCATGGCAGCTCCATGGT -GAGAGCCGACGAGCTATCTCTTGAAGAATATCGCAAAAGTCCCGCGACATACAGCTTTAC -CTACCCAACATGCCCCGGTAAGTCAGATACAATAGCTTCGAGCATTGAATTGGAGCCATA -AACAAGTGCTGACCCTAATCAAGGTCCCTCCGTTTTCCATGTCGAGGACATAGAGATTCC -GGTCACTCAACCAGCTGGAAAGATTATGATCCGGGTCTACACGCCAGAGGGACCAGGTCC -ATTCCCTGTTCATTTGAACTTCCACGGAGGTACGTATATTATGGCACTGGACCCACACTC -TCCAGCATGAGATTTTTCACTCTAACTCATCCTTTATAGGTGGTTGGGTCCTTGGTAATC -TCAACTCTGAAGCTGCTTGGTGCAGGCACATGTGCAACAAAGCCCAAATTAAAGTTATCG -ATGTTGACTACCGCCTGGCTCCGGAGTTTCCCTATCCTACCAGCATCTACGATTCATGGG -ATGCTGTGAAATGGGTAAGTACAAGAAGCAATGCCGTTTGAAATTTACTTCATTAACACA -TGATATTTGACCAGACAATTTCCAATGCCTCAAGTATCAACGTTGACCCAACGAGTGTCT -CTATCGGAGGTCTCTCGGCTGGAGGTCAAATGACTGCAGTAATGGCGCACTTCGCCCGCG -ATGAGGGTATTGATCTGAAACTTCAATTGATCATTGTTCCGGCGACAGACATGCGCTACT -GTCTCAAAACCCGAGAGTTGAACGAAACCACATGTCCCTATGAAAGTGTTCTGTTATACC -ATGATGCCCCTTGGGGACCATTAGGCAGAGAGCAATGGTTCCTCAAGTATTGGCTCGGCG -ATGATGATGGTAAGtttttttttttctttttttgttCTGCTGTTGTAATTCCCTTCATGC -ATATACTAATATTGCTCTAGCTATCCAAGAGAGAATCTTGACAAAGGAGTGGATCTGTAC -CCCGGTACTTGCTCCTTCATTCAAAAATCTTGCTCAGGCACACATCATCACCGCAGAATT -TGATCTCGAGCGCGATGAAGGAGAATATTATGGCCAGCTTATGCAAAAGGCTGGAAATGA -TGTCACAATGAAGAGATACCCAGGCGTGCCACATGCATTTGGGCACTTCAATCATCCTGA -AAGGGGACTCTCGCAGAGCTTTGTATACATCGAGGATACCAGTCGACTCCTTCGGAAGGT -GCACTTCAGCCACAGTGAAGAGTAATTGGGAACACATGTGTAAAAAAAAATTTAGATGAA -AGAAAAAAAGTTACATCGCTCAAACTCTACTTTATCTCTGAGTGGACCGTATTGCCTAAA -GATATCGTTCGGTAAGTAGCTCTAGCCTTTTTATTTTGCACTGCTTTGACTTATATCACC -TATATCCATGCCTCCCCAAGGATTCCAAAACTCCCATTTGTCTTAAGTTCAATCAACCCA -GGATGAATGCTATGACAAATCTTTAGTTATTTACCGAGGTCGGCCCGGGAGATGCGGGGA -CAAGGTTATCGGGACGATAAACCTAAAATTTAAATTCGCGTTGGTCACAGCCACGGGCTG -GACATGAGCTAGGAATTGATCTAGGGTATTATGGAAGTTCTTTTGTACATGATTATGTGC -CTCAATTGAGCAGTAATACTTGGTCTGATCATTCAGTCGTTTACCTCTCGAGTCTCAAGT -CAAGGAAATGCTAGAGAGCCCAGTGACTGGTCATTTAGTTATTACAGCCACGAAGATTGC -AGACAATTCAACCCTGTTCATCTTTTAGGCACATGGTCAAACAGGTTTCCATAATTAACA -TAACCGCTCAATAGTTCATCTATCAACGTATCACCATTCACCCATCATGTTATTGGTTAT -AGGATTCCAATATTTTCTTCAACGAGTTTCTCAAACTACTAGTGGTGGATGTCACTTCTG -GTCGCTACCAGTACCTACCCATTCTACTCGACTTTGGACAGTGCGGTATAGGCCGTTTGT -AGAGGCAATGTAGTGGTTTTGGAAGGACAAATTGGTATGTAAAGGGCTATTAAATCAATG -AATAATTGCCCACTTGGTCTGCAAGAGAAGAAACGGAAGATGGAATCATCTAATTGGGAA -AATAAATCATAGTAAGATGGGACATGTTCATGTGAAGAATGCCCACCAACCTTCGGGTCC -GCCTGCCCACGTGGGCCCGGAACTAACCGTGTTAGATAAGCTGTGAAGAGCGGGCTGTAG -TCACGTGGTATGTGGGGCCGGCATCGGGAATGGACTGTAGTCATCCTCGGTCCTGAACTC -CCAGGGTTTAACAAGAACTATATGTACTCATGACAATTTTCTCAATGCAAGGATGTCAGG -TCGAGCTCGTAGGTACATGAGTAAGCGGGACCGCGCTTGTGATTCCTGTCGGGCCCGTAA -GGCAGCCTGTCGCATTGATCGAGCACCTCCCTGTTATCTTTGCACGCTTCATGGGAAAGA -ATGCACATTCCTACAGCCAACCTCAAGGCCACGCCAGAACTTTACTGAGGCTACGAGCTC -ACGTATTTCCCAACATACTGAGATGAGCCCACGCGCAGAGCTGCCTGGGGTCTCCCCAGT -TGGTGACTCCGTCCACTACGATATTTTGATGTCGATCGATGGTAGTGGGGTAGACCATTC -AGTTGCCACTGCACCTGGGGAGTTTCATGATTTTTTTGATGGAGCCGGAGTATCATTACC -TAATGGCTTTGGAGATGGTACTTGGCCAACTGGGTTTAGTCCAGGTAGCTTCATCTATCC -ATCCCCTAGGCTGCCAGGGACCGGCCCTTCAGAGGATGGAGGCTCATCGCAGGCCGCTTG -TTTGCTATTGCCTATGCAGGAGGCTAACTCGTCTATTTGCCTCGACACTCAAGGTGCAAT -GACACCACAATTGATTGGCAGTAGTGGTGATATGGATCCTTTGCTGATGAATCATTATCA -ATACGACACATCTGGGGCTTTTCATTTCAAAAATCTCAATATCCAATCCATATCCTCAGG -GAACGACCCAACACAGTTTTTACTGTCAAAACCGTCAATATTTACTGCCAGCAGGGAGGA -AAATGGATGCAATAACGTGTCTGCCTTTGAATTGAGACAGGAACTCGAATCTTTGGTCCC -GGGGGATATCGGCCTCAGACTAATCAGCCTGTTTGAGAGGATCGTCGCACCAAATTACCC -AATACTTGGCCCACCAGGCCAGTTAGATACCAGCACCAGCCCGCCATACCTCCTTGCGGG -CGTATACCTAATAGTCGAACCCTTCACTAAATTTGACGAAAGACTGTGCATTGATCTCGC -ATATGATAAGCCATCAACTGTGGCACTATATGAGGTTATCAACAAAGCACTTCCATATGA -GATACATGCACCCAAGCTTTGTGTGGTACAGACAATGCTTCTTTTGGCTATTCGACCCTA -TCCTAATCCAATTGTACTGGATAGTGGCTTCAAATGGTCCCAATTCGCGACTCTCATTGC -GTGTGCTCACACCCTTGGACTACATTTGGATCCAAAGTCGTGGCGGATACCTCCGTGGCA -AATTGCCCAGCGACGCTGTCTGTCCTATTTCATTTATTCGACAGACAAGTGGCTTGCGTT -GAGCCTTGGTCGACCACCTTTACTTCATTATGATAACTGGCTCGTGACAGGCCCTAGCCA -AGACGATCTTCCTGCCAGTGGACTTGACCCTGTAGCCTGGACGAACATCATGAAGCGAGC -GAAACTTGACTCTCTGCTTGATCGTGTTCTAACCACATTATAGTAAGTCATCTCGCATCT -CTGTTATATCGCTGTTATCCGCAACTCACCAATATTAGTTCACCGCGTGCCATTAATACC -ATTTGTACTGATTACGAGAAAACCATCGCATTGACTGGGCCACTACTTGAAGAACTCTTC -TCATCGCACAGACCGGTATCTGCATCATCGACTGATTTGAGATCATCAAATCGGGGCAGG -CCAATGGGACTAGAAAAAACACTCGAGATGAACTATCATTATATTCATCTGAGTATTTGC -AGAGCTATTTTGAGACCGTTCCTGCAGCGAACCGTCGAAACCCCGAATGAGACCGAATAT -ATAATGGCGCGGGAGCAAGCCCGAATCCGAGCAGAAACTTGTATATCAGCAGCAGCGGAG -TTCATTCACGAGCTGCGGCCGGAGGACCTTGAGGCTCTTTGGCCGGCATGGAGTGCAACT -GCATTCTCGTCCATATGTTTCCAGATCCTGCATATGGCTGCCAGCTCCGTTGATAGGGGC -GAGGCAGATAAATGGGTTGCGTGTCTGCACAAAGTGCGCCGGGATATGCGCCTGAAGGCT -GATATTCTGCCTTGCCTTCATCTTGGCCTTTTACGGATTGACTCCATATTTTGGAAGGGA -ATTAACAATGTGTTTCATTTGGAAGAACATGTTCGACAGGCGTTTGCGTCGGATTTAAGT -GTTGGTCGGCCTCTTGGTAAAGCTGGTTCTGCTAAGTGAGCATGTGTTGCATTGATGTGT -ACTAATAGATTTCGAACACTGGGATTAGGGGCAGATGTGTACGAATAATAACATGGGTGC -ATGATTGTAAAGCAGTTTGTGACTGGTGATCTTTTGCTTCAATGTCATTGGTCCTCGCAT -TATATGTGAAAAAGAGCAATAGAGAAATAAAGTAGGATTCTCACAAGTCTCTTATGGGGC -CATGAACTGTTAAAAATTGTAGTTGTAATGGACATATTCAGATATTAACTAGTCTACCTT -GACACCCTTTCCACGGAGTCGATCACGAAGAGCCACCAATTTCCCCAGTATCCGGGCTTT -GAAAGTCTGGGCTCCAAGAGCGGCAGCCTCATCCACAGAGACAACTCGTCGAACCAGGAT -GTTGAAAATAGTTCCGAAAGCAAGGGCAATCACGGTGCTAGTGAGAATGATCACATTGTA -TGGCATGCTGAAATCCGGCGTTGGAAGTTGAAGCAGTAGACTCGTAGAACGGATGTATAC -CGGCGCATTGTGCGCCGGATCCAAGATCCGAATTACTGCAGGTGCAACGTTGAACCCACG -ATTGGCATCCGGTGGGTACTCAGTGTACCGAAGAATGGCTTTCTCGAAGTCGTAAGTTAA -TGTGACGGTTGATGCGGCGGGGACAGACAGCGCCAGTTCTAATTGTGTTCCACGCTCGCG -ATCAATGGCGGGCCGGTAGTACGTCTCTTTGACGATTTCAGAGGCTGAGACCTCGCGCAA -GACCCCGTCATGGCCTGTGATTGTTGCGCGTAAGGTATGTATATATGGTCGCAAGAACCA -CGGAAGAGATTCGAAATAGATGAAATCAACTGCGCTGGTGTTGGACGGATTGCCAAATAT -AATGCGCATTCCGCCGCGCTCCTGGCCATGTCCAACGATAGTGCGCTCCGCGTGCAGAAT -AGGCTCGTCTAATGGGACTTCAGTGTTCACTTGCTGCTGCGGGATTGAAATATCAAAGGA -AACCGATGGTGCCAATTTGAAGCAGCGCGTGAAGCCATCCTCTTTCTTAATTTCATCAGC -CCCCTCGGATATGAAGACACCCCTTTCGTGGGGCACCCGAAGGCATACACTTTGGTCGCT -AGATCCATCGAGATCTGTAAGAGGGCAAACCCCATGTATGCTCCGCCCAAAGACTTCTGT -AAGACTCCACGTCTTGTCATTTGTCTTTTCCAGCGGGTAGCAAGTATCATCTGAATTGTA -AGGCTTGGAAGTGTCACAAACCAACTGCTCATTAGGCACAGGCCGCGGGATGGGGTTCGC -TAATAATATGTTAGCATTCTTATGCCAGAGTATACCGCGGAAAAATACGCACAACGAGAG -CGTTTAGACCGGTCAATATCCAAGACCATGTCCACAGTTTGCTCGATCTGCACCACACAT -TCCCCGTCTGCAGAACAGGCCGGCTGGATATCCACTGACATACTCTGCCATGCGGCATCG -AACAATTTGTGCCCGTCAAACAGTGTGGAGATTCCGGCCTTGCCTTTGCAAGGAAGCAGT -TTCAAGAAAGGGGTCAAATTCTCTGTACAAACCACCTCTCCGGGAAGTGTTCCATGCAAG -AGGTGCAGCTGATTGGCAGCTGAGTGATCCCCGGCTGGTTCAAAAGAAACCACAGGTCTA -GTGGTCCGGGTCGAGTCCACGAAGTTCAGCGAGGCGCAGAACAAACCAGAGAGGGACTGG -GTCAGGGTGATCCATTTGGCAAAAGCCCTAATTAAATCAGCATTGTGTGCTTAGGAAGAC -ATACTTGAGTGTTGAACATACTCCTCGTCACTTGGCGCATCAATCCACGCCCAAAGCTCG -ACGCCAGTGCCTCCTTCTTTACTGCCGTTCCATGGCCGTGGACCCCAGCTTTCAGCATCC -CATCTTCCCGTAGTGAAGCGAAGGTGTAGTTCTTTTGTATGCGCGTGTTGAAGGATTTGA -CCAAGGGCGCGAGGCAAATATCTAAAATGCTGTTGGTCGAATGATTGTTGTGATGTATTG -CTTCGGAAGTTAAACGAAGCTAGTAAAGAGGACTGCGGGAGTGGTTGTAGGAGGAGTTGC -TCATGATAATCCGACGACGCAAAGGCAGAGGAGCTCCATAATGTGAGAATAAGCGCGACA -AAAGGAAGTGAGAATAGTGAAAGGACCGTCATAACTTCATGCTTGCCGGAGCTCGGTCTT -CGTCCATACGGGTATCACAGGTCAATTGGTAGGGGTTATCGGCGAAGTGATGGGTATATG -TTGGCTGTGGCCCCAGAAAATCAGACCTAAGTAATCACCGAATCAATAGAAAAGCTCCGC -ATCGAAAGAATGGTCGAGGCCAAACTCCATGACCCCCGAGATGTCCATCTGGTCTGTTGA -CTGCGGAGCCTCACCGCCTTCCTTGGCGCCAAAAGCATCAAAAGCGAAAGACTTCGTCGT -TTGCCGCAAATCAGCCCGTTGACACAATTTCGTCTTTCCTTTCCTTTTCAGCTTAGTGTA -TTACTATCAACATGGCAGCGGAACAGAGAAAGCTGCTTGAGCAGCTCATGGGAGGTATGT -CATGCTTTTTGCCCTTCTCAAACCCTCCAAGCTCCAGCCCACCCCGACAAGTCTCCAACT -AGAGCTAACATTGCTTCTGTCGATAGCTGATCAGCTCATTGGAACTGGCGCACAGGGTCG -CAATTCCCAGCTCGCAATTACAGATAACAAAGTTTGCCGATCCTACCTTGTTGGCACCTG -CCCGCACGACCTGTTCACCAACACCAAGCAAGACCTCGGACCCTGCCCGAAGGTCCACAG -CGAAGGCTTGAAGGCGGAGTACGAGGCCGCATCTCCCTCTGAGAAGGCAAAATGGGGGTT -CGAATTCGATTACATGCGCGACATGCAGAAGTACATTGACGACTGTGACCGCCGGATTGA -CACTGCGCAGCGCCGATTGGAAAAGACGCCAGATGAGATTCGGCAGACAAATGACTTGGT -AAGTCCAGATTCACTACTATATCCAATCTCCAATGAGAGTAGCGCCCATGGACTTTGCTA -ACTTCCGATATAGCTCAAACAAATTGCCGATCTTTCCAATACAATCAACTCCGGCCTTCA -AGAAGTTTCGATTCTCGGCGAAACAGGCTCTGTAGCACTAGCACTCAATGAAATGCACAA -AGTCCGCACCTCCAAGCACCAGAAGGAAAGTCTCGAGCGCGATCTCAAGAACCTTCAAGA -TACTTCTGGTCCATCCGGCCACCAGAAGCTGCAAGTCTGTGACGTTTGCGGTGCTTACTT -GTCTCGTCTTGACAACGACCGCCGGTTGGCGGATCACTTCTTCGGAAAGATGCACATGGG -ATACTCAGACATGCGCAAGAACTGTAAGAAGCTGAGCATTGAGCTGAAGGGCCGTGCTCC -GCCAGTTCGTCACCATGAAGAAGATGATAATCCTTACACCTCTGGTGGTCGTCCCGGTGC -TGGTCGCGGCCGTTATggcggtggcggcggtggcggcggtggATATCGGCGCCGTGGAGG -TGGTCGATGGTGATGTTCGTTGTCTTTACTTCATTCTTGAAGATTCGGGCTCTGGGTCCA -TTGCATAGCGCAGGCGTTTTTACTTATTATCTCGTGTATGATGCTCAGACAACGGTTAGG -GAGGTCATAGGATAAGTGCTCAATATCCATTTCATTTTTCATGCTTCTTGTCGATATATA -TCTCACAACCATCTAAGTACAAGAATCGTGTATAGTGTACATTGTGTCTCTCCATTCCCC -GAAATATCCACTGATACTCTCACATGGAGTGTAGTGTCTTATATTAATGTAAAATAATGA -AAATGTTGATTATGCAAGATTCAATTCATAGTATCATCCCATCCATGGATGACTTGCCCT -AACTTTCCGATTAGTCCGCGGCCAGGTTGCTACCTAGATAGGTATGGCTTTGGGATAGCG -GGTGTGATTCAGGTCATGATTTATCTAACATGTAGATTTAAATTCATTCTGTACTTGTAC -AACTTATCTTGGTAAAACTACGCATCAACTCTTTACTCGAGATGAGGGATAAAGATATCC -TTTCCAATGATCCACAGCAGGAGGAAATCGACTTAGAGAAATCAGGAGACATATGTACTC -TGCAAATACATCGATAAAACCTATGGCTCAAACCAGACACCAAGGACTGGAAGCTTGACT -TCATAAGGGTGCTTTTCATAACTGAATCTCGGCTACGGGAGAATACTCAACGCGAAATGA -TGCTCGGGGAGAAATTGCAAAGGCTTGAGGAAAATCCTACCCACGAAAAAGAAGTCATCC -AGGATTTGGGACAAGATCGAAATATCCTTGGAGCAATACTGGTTGAAGGAGCGATTTTGG -TGGGTGGTGAGGGTATCGTTCATAGATAGCCCGTTCACGATCGGCCTCGACCTTGGGCGC -TCTCAGCCAAAATGGTATATGCATCGTGTGCTTGTTGAAGACTTTGCTCGAATGGGAGGA -TGCTGCGGAAGAAAGCGTGGGTATTGTTCATTTTGCGAGAACTTACTCGGACGAAAGCTT -GCCGCTGCCCATTGCACTGTTGAATGTTCTTGTTGTGAGAAATCTCGAGGATTTGAACTC -AACCCTAGGCTTAGGTCAAGACTGCGACAGTGGCCCGACCTGAAGAAGGATTGCGTACAC -TATTGTCGGATCAGGAAGATCTCTTTGTTAGGGGCGTCGGGTGGAAGTCGTCAAAACCAG -TGGCACTGGATGAGATCCAAAGCTAGCGGATTGTGTGGAAAGAAACCCAGAATGTCTATT -TGCTTGAATGAATGGGATAACTAGTATCGAACAGTACGGTACTAAGATACTTATACAAAT -ACAACCCTGACTTTGGTAAATGGTTTCAATTTGGCTACAAACCCATACATATGCGAGTTC -AGGGCCAAGCCATATGAGAGTACGGCAGAAGAACCCTGGTAGAAGTGGTTTCATTTTGAG -TAGAGAACCAGGAGGCATAAAATTTGAAAACATTTGGTGGGTATTCCACTTTCCTAAAGC -AAACCCTTTTCCCATCCCTGAAGACATCAGTCCCGAACACCAGACTAGCATTGCGCAGCC -TTGTAAAAGGAAATACAAATGGGATAGACTGTACAGGTAAAAAAAAAACTTGGCTATCAT -GGGCATAGCTCTATTACACACCCTACTGGGGCACGTGGCCGTTTGCTTGGGTCTCGAATA -GAGCCTGCATCAAAGTATCGTAATTAGCAGCGGCAGCAGGTTGACGGACCAGAATGTCGC -GAACAACACTCAGCTGCGCCATGTCCACGTTGGGTGCAGTCTTCAGAAGTTGTTGAATGG -CGGGAGCATCAACTTGTGATCCGAGGACTTGCACCGCGGCGGGAAGGCCGGTCAGAGCAG -GCTGGTGTTGAGGGGTGACCTGACCTTGCTGTACAGGGACCTGTTGGCCGTACGGCGCGG -GATTCGGGTTGAATCCGCCGAAGTGCGAGGGGGGAGTCGGCGCAACTCCAGCGCTAAACT -GAGGCTGCTGGGGAGTGGCTGCAACGGCGGGGCTCGCAGACATAGGTGGCTCCTGGCTAG -CCTGATGGGATGGCGGCTGGACAGATGAAGGGTTGGAATCGCCAGTCTTGACGACGAAAA -TGGTAAGAAGCATGCGTTGAGTAATGGGCCCCTCCAATGAGATGTTTTCCAGCAGCTCAA -TGAACTCGGGCATCGTCGTTGTGCCAATCTCCATTGGGACAAGGTAGGTGTCTTTGACGG -CGGACAGCGGGTGCTTTCCAATCACGCCATAACGACCACGACTATGGAAGTATTCAAACA -TTTTGTCGAAGCCTGCTCGATCCTCGGGCTGGTCAGGACTGCTGATTGCTATCACAGACA -CATCGGTTGACTTGCTAAATTGAAGGCCGCATAGATAATCGGTTGCCCGTCTGATATCGA -TACGGCCATCGACAACGAGCGTGGAAGGCGCAAGTTGGCTCCATGGGATTCGGCCACTCA -AGTCTGCGCCTCCAACATGCTTTGCGGACGAGGAAAACTCTCCAATTGGAGGCATCGTTA -CCTTGCCACGCCAGATGGTTCCGTCGTCATCAAAGTCTTTGGGCGAATAAGGTGGTGAAT -ACGGTTCATCGTCACGCAAGAGCTGATCAATCTCAGCGTCTGCCTGGACCCTGCCACTAG -GCAGTGGCTCGTACATAGCGGGAGCCACTTCGGGGAAGACGTGATCATGGTGAGCACCCT -CAGGCGATAGACCTCGAACATATCCATCCGTCTCTTTCTTGGATTGCTGCTGTCCCATCG -GGCTGGTTGGGGTTTTGTGGGCTGGACTGCCGTCATCACCCAGTGCACGACGGGGTCCAC -GGGAGAAAACAGACTCGCTTGCGTCATGGTGTTCATCCTCGATCAACTCCTCTCCTTTAT -GGGTGCGTCGAATCCGAGGACCTTGCTCCTGAACAATAGTGTGGTGTCTCTCTGCTTCGC -GCTTGATCTCCGCGTCTTTCTGTTGCAGTTCTTTGCTAGCCATCTCCCCCGTTGTCATCT -GAGCCAGTAACTTAGGGGAGAGACTACCTATGAGGAGACGATCTCGCAGAGAAGCATTCT -TCTTTACATTGAACATGATCGACCGTAATTGCGCCTTATATGATTCATTCGGCTCGCCTG -ACCGCCCGCAGATACTCTCATACATGGCATCCTCGATAGACAGAGCGAGTTCTTGTGCCA -CGTCCTCCGCTGACTGGTTTGCACGTAAGGTAAATGAGCCTGCTTTTTGCGCTGCAGTAA -CCTGGTCTACGAACAACTTGATGAGGTGTGTAGCCACACTCTTGCGAGCAGGCATCAGCT -CCTCCAAGGCTCCAATGGCAGGTGTCTCGGTGGTAGAGTCTGTCTTGATTTGAGGTTCAG -TTGCCTCCGGGGCAGCTGCTGGGGTAGGCTGTGGGGATACCCGTTGTTGTTTCACCTGTG -GTCCCTAAGAGATAAACGAGTTAGCATTTTGAACATCATGATTAGGGGGAACAGAGGTAT -TTACATTTTCTACCTCCACTGTCTCTTCAAGTTTGCGCTTGTTGGTGCTGGATCGCCGAG -AATCAATAGCATGGCCATTTTTCTCTGGTTCGACGGAGACAGATACGGCAGGTGCAGGAG -AACCGCTAGGACCCGGTGTAATGGATGGCGCGGTACGAGCTGGTGTGCTAGCATCTGTCC -GACTCTCACTTGGTCTTCCTTTGCGGCCTCCCTTCTTGCCCTTTTTCCGCTTGGACTTCT -TCTCCTCGGCCTCTTGGCGACGTTTCTCGGCAAGCTCTTCCCAGGGTTTTTCGCCTGCGT -TGATCTTCGCCAAAAGCTTTGTGTGATTCTCCGGTCGGCATTGTTCGCAAAAGTATTGAT -CTGGTTCGTGACCTTTCGTGAATGTGAGGCCCATGCAGTCGTTATGTTGCCATGCTGAAC -ATTGATCACAGCAGATCATGTCGCGCTCCACGTCCTCTTCCTCTTCATATTCACCGCAAA -TACATCGTATTATCTCTTCTTCGCCTCCCTCGCCTTCACTAGGACCGGGTGTGGGTTCAG -CAGACAACTTCGCGCTcttttccttcgggcttttgcttttgcctttttttgcGGGATCAT -CTGGTAGCTCGAGGTTCTTGTGTTGGCCTTTGGTGGCTCGGCCGGAGCGTCGGGGTTCGT -CTGATCTCGCTGTTAGCCTGCCTTCTGGTAATTGATCCACAACCCCGGATTTGGGGCTGC -CCTGCCACTCTCAAGTGATTGAATAGCAAGGTGTGCGGTAAATGATGTACGATAGCTTAC -CGGCCATGATAATGGTGATTCGGTGCGTACTACAGCGCGTGAAAGGATAATAGGTCTCTT -CACTTGCTCGAAATTCGGGTGTTACCCCAGGCTGACAAAAGTCTCTACAAGCTCAAGGCG -CTAATGTAATACACAAGAAGACAGCAAAACAGCGTCACTGTCTAGCCCACAATTTTGCTT -GATCGAATAAGAGAACCGATCCAGACTCTGCGCAATATTTCTTGGACTGGCACGCCGAGG -CGATGAACACGCTCAAGCGGGTGTTGTGGTGGACTGTGCAATCCCTTGCTGCGCGTCAAA -TCGACACTTGGCGCTAAGCGGGGTCGCGTGAAATGAAATGAGCCCAGATCACGCACTTTG -CAGTCACGAATGGCACTAGACGATTCGATTTAAGATTAGAACAGGATTTGCTCGGGAATC -AAGTGGTGAAAGCAATAACGGCGAAGATGGATGGTAAAGAAAAGGGATGCGATGCGCGAT -GTGAGGAGAGGAGAGATGAATGAGTGGCTTGGCTCCCGGCTTTGTCATTCACCGGTAGCA -CGTGATATTTGTCACCAGCCACAGGTTCGCGGCGAAATGCCCTTGAGTGAATAATCTTAA -GATTGAGCTTCTCCAGGTTCCCCACTCTTCATCACAGACTGTTTTATTCCCGCCAGGTTT -ATTCGGCACTTATAACAATTATTAAATCTTCTATCCCGTCAACTCATTACAACTCGAAAA -TATGGCCGACGTCCGATCGCTCCTCCGCAGCGAACTAGCTTCCCGCAAGGGCACATCTCA -ACCAAATGCGACAGGAAATCGCGTCACCAAAAAACGCAAGGTTGACAGTGGTGATAGCGT -GATGCGAAAGAAGATTCGCGCTGCTGAACTAGACGCATTTCAGTCTTCCTCTGGCGCAAC -AAGCGCCGAGAAAACTCCAGAAGAAGGTTCGGGGCAACTCGAGGATGATATTGTCGGCCC -AGAACTTCCATTTGACGACAAGCAAGAGGATGTTGAGCCAGTTACAGACGTCGCAGAAGA -ATCCACTACGCAACCGCCCGAAGACCCATCTACGCCTATCGACGAAGAGGCTTGGGCAGC -ATTTGAACGCGAGGTCGCTGCGCAGGATGATGAGCCCCATGCACCAGCCGCTGTTACAGC -CGAGGCTACGATATCCGCAGCACCAGTGACAGCTGCAGAACTCGCCGCGCAGCAAGAGAG -AGCGAAGGCGTCGTCAGTCTGCACTCGAGAGACAGAGTTGGAAGGGGAGCGGGAGGATGC -CGCACGACTTTTGGAGGAAGAGTTTGATGAGATGGATCAGTTAGAAGAGCGGGTGCGGAG -GCTTAAACAAAAACGCGAAGAGCTGCGGAAAGCCCGCGCCGAAGGTCAGACACAGGATGA -GCCGATGGGATCTGGCGCAGTGGAACAGGATGAGAGCGAGAGTgaagaagaagaagaaga -agaagaagaCGATGATGATGATGAGGACTGGGACGATTGGAGATTTAAATGATTGAATCG -ATGGTGGAATGACGAATCGTCTAGGTGAAGGGAAATGGTTTCCCTCATATACAGTGATAC -CCTCAGGAACCGAATGGCATTGGTATAAACAAATTGCCTCTTGATGTACAATATGAACTC -GGCTGTCCTTATCTCGACAATAACGATGTGTAATATTTTGTGAACAACCATTATGGACTC -GGCTTCAATCCACCATCCGCCGCCTGGTCAACATCCATGTAGCTCATTAATCCATCCTTG -TTGACGCGCATTGCCTGTTCAATCTCGGCCTCGCCTTCTCGGAGTAGATGCTTGCCACGG -AGAAGATCTCCTCGCTGTCTCCGCTTCTCTTCCTCCACCCGCTTCTCCCGATCTGCAAGG -GCCTTCTCGCGTTTCTCCCGTTCAACTCGTTTTCGGTCGATTTCTTCTAGCTCTTCTGCC -GTCATCTGTACCTCCGGCGGCGGAGGCAAAGTAGATATGTATGCCTCAATCAGAGGGTTT -CGCACCTCGGGCGCGAGTCCGATGTAACGAACATCGGTGATGATCGTGTTGGGCAGAGCT -TCCAAGCTCGACGATCGATGCAATAAGTCTAAAGGAACAGATTTGAGGAGGGCTGACAGG -TCTGACTTGCGGGTGCTTTCAGGTTGCTTTAGACGCGAGATGTGGTCCCGGTAAAACTTC -TCGCGCTCTTTGTCTCCAAGCTGAGAGTCCTTCATTTCAGGTTCTTTCCTGTACTTGCGC -TTGAATTCCGGCCAGTAAAGCTTCGGAGTGGCATTTTCTTGCAAAAATGCCAGATATCTA -ATGCGCGGGTCGGCCTTCTCCTGCTTTGCCTTCTGCTCTTTCACATACTGGATTCGGTCC -CGGCTCCAATTGGAAAAGATCTCTCGTCGTGTCTTCATGTTCGAGGGCGCGGTATACCGA -GTATCTTCAATGATGCGGCCTTCTTCGATGACATTTTCCCATGTTGTGAAGGGGTTGATT -CGGAAGTCATCCAGCAAGTCGCGGAAGAGGGCCTCGGCATCGGCCTCAGTTAGCGGTAGG -CCCCCTGCACCCTCTTCCCAGTCTTCTTCCCCGACCTCACCATATTCACCGGGATCCAGT -CCGTAGTCTTCACCCATGGCTGCTAGTTGATATGCCATGTCTTCTTCGTTGAATTCTAGT -GGCTGATCCTTGGGACCCTCACTTTCAGCTCGGGCACGCTTTGATGGTTGGCCCTCTTCT -CCCTCGCTGTCTGTAACCTCGACCTCCTCATAGTCGTCGTCATAATCTGGATGCTCCGCT -TCTTGCGGTGTCGGCTGGGTCTTTATACCATCATCCTGTGATGGAAGCGACTCTTCTTTG -GCTGGCACTTCTGGCTCACCCCGTTCTTTCCGTTCCTTCTTCTCGCGCTCGAGGCGATCG -AATTCGATCACACCCTTCAAAACATGCTCTGGGAATTTCCAAAAACTTTCGTTCGTTTCT -GGGTTGTGCACAAATCTCCTCCCAAGCTTCGTCTTCACTAATAACCACGGGGCGCAGTCA -GGAATGGGATGCTTCGCTTTTGGCCGATCTTCGGGTTCTCTCCGCCCACGATCATGATAT -CCTCTCCCACCACGGAATCCCCCACGGCCGCGAAAGTTTTGCGCTGGCCCAGAGCCAAAC -CCTGCTGGCGGTCCATATGGCGTTGAGGAAAACGGAGGTAGGGTTTCGGGATTGTATGTT -GCTTCTGGGGTGTCGGAAGCGACCTGAATGGGCTGAGGGGCTGTTTCTTGGGGGCGGGTG -TATGTCGATTGTTTTGTTGAAGAATTATAATAGTATAAATGACCTAGCTGAATAGTTAGA -ACGGCCAAAAGTTTTCACTCGAACCCTGCGCAAGACCAACCTGTGGGGGCTCGATGTTGT -GTCCACCCCGAAGGCAAAGGAGGAGGAGCGGTGTATGTTGATCTGAGCATGGCGATCGAG -ATCAATTATTTCAAAGTAATTGGTAAATCGGCCTTAGAGTCCGTCTTTGCAGGTCAGAGT -GGTGAAATCGTCGCAGCGAAGCTTTGGTGTGGTGAAGTGGTCCGAGCTTGGCGGATGACA -GAGGACGCGCCAGGGCGAGTTGCCTGGAGCTTCTACGTCTTCACTACCGACCTTCACCAT -CCCGAACCACCTGTCTCGCACAATCAACCGTTACCTATAGCCCATACCTGAGTTTTTGTT -GCTTTCTTTATTCATACACGATGTCTCGGAGATACGATTCGAGGGTTTGTGCACACCGTC -ATTCCTTTTGTGCAGCTCCAGCTAACCTATGACGCTGTTGGAAAGACCACCATCTTCTCT -CCAGAAGGTCGTCTCTACCAGGTTGAATATGCGCTTGAGGCTATTTCTCATGCCGGTACA -GCCCTTGGTATCTTAGCCAAGGATGGAATAGTCCTCGCTGCTGAGAGGAAGGTGACCAGC -AAGCTGCTGGAGCAGGATACCTCTGCTGAGAAGCTCTACACAATCAACGAGTACGCCGCC -CCCTTTTTGTCTCGAGTCGAAGAGCCAGAGCTACTGTCTTTAGTACTGGTACAAGTAGCT -AATATCTTCCAGTAACATGATTTGCGCCGTTGCGGGTATGAACGCCGATGCCAACATTCT -GATCAACTATGCTCGCCAGAACGCCCAACGCTACCTCCTCACGTACAATGAAGACATCCC -ATGTGAACAGCTTGTCCGTCGACTGTGTGATCTGAAGCAAGGATATACTCAACACGGCGG -TCTACGTCCGTTCGGTGTCTCCTTCATCTACGCCGGATACGACCACCTCCGTCAGTTCCA -GTTGTATCAGAGCAACCCCAGTGGAAATTACGGAGGCTGGAAAGCGACGAGCGTCGGTGC -GAACAACGCAAGCGCGCAGAGTCTCCTCAAGCAAGATTACAAGGAGGACTGCGATCTGAA -GGAGGCTTGTGCTATGGCCGTCAAGGTGCTGAGCAAGACTATGGACTCCACAAAGCTGAG -CAGTGAGAAGAGTGAGTTTCTCAGGTGTCACTCCTACTTTGTCTTTTATCATATTAACAT -TTCCATTTACGCAGTCGAGTTCGCAACAGTGGGCAAGACCAAGGATGGCAAAATCTACCA -CCACCTATGGACTGCGGATGAAATTGATTCCCTTCTCCGGGAACAAGGGCTGGCTAAGGT -CGATGATGAGCCTGAGGCTGGTGACATAAAATAGATTAAATCTTCCCTTGTTCTTTTTCC -GTTTCAGATACTCGAATCCAATACTTTGACCATAATCAGCCGGTTGCAATATCCTAGCAT -CACCTTATATCCAAACAAACAACTGCTCTTTTAACCTGTCGTGTGTGCTTTTGTTCATGG -CTGCATCTAGTGTTGTTCTCTTGTGACGTGGCATGCACATCGGCCCAGTCTCAGCGCTCT -CCTTCAGTTCTGCATTCCAATTTGACGTTGTTTGCCTTGAAGATGTTGAAGCAGATAGGT -ACACGTAGGCTGAAGAGACATGGATGTTGCCTAGGGGCATGCAAGCTCTAGCCATCACTA -GTGAATTCTCCGATTGTAGTGCCATGATATATAGCCCAGCTCGCCGTTCCTGGCCTCCAG -CTCAACTCATCTTTTTTTTTCCCTCTTTTTCTCTTGCCAATGCCACTTAGGCCTACAAAC -TCACCTTTATTTTATCTAAATTACAAAAAGCCATTGCTTTGGAGTTGGATTGAATATTTG -CAATGTTGGAATTGCCGGGTATAGAGCAAGTGAGGTGGAACCCACCTAGCCGACTTAGCC -CTGAATTCTTCTCACAACACTAACATTCTGCAGAACAGCTTCCAGTTTCCTTCATTTCCT -GAGAATTTACTCAACATCCTTCTATGTGACAGGTCTCATCTAATTAGGTTATCTAAGCAG -GGCAAAATCACGTGAGGGGATGTGATGAGGGGCATTTTGTTCCACACTATGTGCTTTCAC -TCTGACACCAGACATCACAAACCACTGCTCTCGTTTCAAGTATTACCTTGAATCTAAATG -ACCATTTGATCAGTTCCAAGCCTCAATGGCTTTATTGTCTGGTTGTAGCAGTGTGCCACT -TCCTGTCCTGTGAGACTCAATCTGTCTCAGCGAACCGCTCACCCAGGAACCAGGGCGCTT -ATGACATGGCTAGTCCCTATTGACATTTTTTGGGAGCTTACTAGCCTATGGACAGAAAGC -AATATGGAGTCTGAAACAATTAAATTTCCCGTCAATCTTTTTGAAGGTGACATGGTTCTC -CCATCACCTTGCAGAAACTCGATTCGGACTTCATGTCACGGACAAATGGTGAAAATATGA -AACTCGCTGCAAATACCCCTAAAAAAACAGTGACAGACAACAAAAAAAACAACAGGACGA -TTGGTCACAAAGAATCCGAAACAAGCCGAACAATTGCTGTTCACCTGTCCAACAAAAAGA -AAAAAGAAAAAAAGAAGCACCAAAGGAACACCGGTTTTGTAATTGCAACCCAATCAATCC -TATCTTAGGAGAGTGCTTCTTCGGTCGTCCCGCTCCCGTTGCAGGGAGCGGATTTCTCTC -TCCAGGTCCATAATGAGTTTGTGAGACCGATCCATTGAGGCCACAAGGTTGGCACCCTCT -CGCTTCCACCGGTCCAAACGGTCAAGTTCAGCACTACGCCGGCGATATTTGGTTTCGATC -TTTTCGCAGAGACGACAGTTGGTCTTCTCATTCTCGGTCATGTTAACGAGTCTCATACCG -CATGTCTCGCCGGTACGATATTCATAGTTGCATCGGTGAGCAAAACTGGTCCAGCTCCAG -TCTCCACACGCGAATCTCTTTTGGTTGTAAAAGCACATGTCTATCAGATCTGTTAGTGCT -GTATAATCTGCAAGTACGACATGACCCACTTGCTCACGGCTCTCCGTCTCTGTTTCAGAT -TGAATGCCGGGTTTTTATTGGTTGCACCTGACAGTACACCCGCGCTTGTCGGGTTTGATA -GGCCGGCTTTTTTCTGCGTTTTTAAAGGGCCCGGAAGGTTCAACACAGTTGAAACAAATT -TTGAATGAATCGGTTTTATGAAGAAGATCTATTGGCCTTGCTTGAATATTGCAGACTCGA -ATTATGAAACCGGAGGGATGATTGAGAACTTCACGGCATCATGGGGGCATGAACAGGCTT -ATTTATGTTTCGGACGAAGTTTCTATGCTACAGCTCTATTCTCTCATGTTTGCCTCGTCC -CACTGTTGATCGACGTTTGTGAGTGCCCGATGGCTTATCATCCGTGGTTCCAGTCCGTGG -ATTTCACATCGCACCTGCCTGACGGTTGATCCGGTTGGCTGTGAGCAGTTCTCCAAGCAG -TGGAGACGGTATGATGGCCATGGTCTCTCGAATGGAGCCACTATGAGCAACCGGTTGCTT -CATGGATGCATGCAACCCTCAGCCGTGGCGATTCACTCCTCATGGTAATGTGCGATGGGT -CGAAAATCCCTCAATCCATGCCTGCAATCCTCTACTTTCCAGATTCGACTACTGCAATTA -TGCCGTCGCCATCTGCCTCTCCCCGCTTTCATGTGGATGCTGTTCGCATGGACTCGCACT -CTTGTGATTCTCTTGCGTTTTGCATGCTTGATCCTGCATGACCGAGTCGGTCTCGGCACT -TGCTAGCTGTCGTATTGATGCAATAATCAGGCTTTCATTCCCAGGGATAACCCTGTCTCT -ATGCAAGAGAAAAGCTACTGCCTTCGCGAGTGTAATGGGACTCTGCACGCCACTTGTGAC -TCCACCACAATGGTTTGTTCTATCTTGATGCAATTTTTCGACGGCTGCAATTATTGATTG -GCCCAATGCCTCATCTATCATGAGCACTGACACGGGATTTCGACCGAACGCCATGTTCTA -TCCACAGAAGTATCTATCCGATATTGCTTTCTGCGTGATTGCATACGCATGCGCATCCCG -ATTTATTGCCTTTACTCCTCTTCATTGCTATTACGTCTCCTGCATTTGGGCAATAATGGA -AATCAATATTGGGTCTCAGCATGTGGTTGCAACCATTAGCCCCTGTCGTTATGTGTTATT -CCTGCCGAGTCTCACTTGCCAGTCTATCATAATTAAGACTCGACCGCGTTCCATCCCTGC -CGCAGCTTTCCGGTCTGTGGCTTTCAGCCAATAGCCCTCCCTTCAGTGGCAATGGTAGGG -TCTCTGAGGCTCTATAATTGGCAACCCGGGCTCAAGGACACGGACCACGCAATTGAAATT -TCTTGTGCCAGACCCCAAAAATTAGGGATGCAGGCCAGATAGATCCATTTTCGTTCAGCT -GATCTCTCACTCCTGCAAAGAGGGATCCTGCATTCGCCCCCGCTCATGTGTGTGTTCATC -ATCATCCACTTGTTTCTTTTGGCGGAAATGATAGGATCGGATTGATGTACAGCGCTCGCC -GCAGACACGAACCGTGACACTTTCCACATATTGCTGAACCATATGCAGCTGCACCAAAGC -TCAGAGACTCTGACGTGGCACGTTGCTTCTGGCTTTCTGCGTCAATTACGGGTAGCCAGG -GCTGGCTCGAAAGCCGTGGGTCGCCGTTTAGGGATCCCCTGCAAGCTCCACCGGGAACCC -AGATGCCATTCACGTCAATGATAATGATGAAATATGACTGATAGCCAATATGGAACGGAA -GATCCTGCAGTCTTCTGTGCTTGCCGTTTTTCCCGATTGTTCTGGGTATTTCACCAATCC -GAAAAGCGATCGTCGGGCAATGCCGCTCAATCATGCCCAGAATCGGATCCAGGAAGATGA -TCCATGACAAGATGTTGAGTTGGCAACTGGTATCCCTGATGATGGCTCATAGCCTCAAAA -TGGACGTCACATTCGGCGGATCTATCGTTTGCTGCGGCGAGCTGGTTCCATCCAGCCCCA -AAAAATCTCAATCAATTTCTGAGCCCCCTGCTCTTTTGCCTCTCGCTCCGTCTTACCACC -GAATTCCCCCTCGCCTATCCTTAGTGTACCAATCTTTTTCCTGGCAAGGATCAGGCCGAG -GGTTGGAAAAGCATGGTTGTTGTTTCTGCCCAACAAGCGAAGTTGCCGGGTGAAAAACTA -GAGCATTTTTGATCTGGTTAAAGCTACCGCAGGCCGATTCTGAGTGCTATCAACCGGGGA -AGCCCGTGCCTCAACGTTGAAATTGAATTCCAGGACCCGCATACAGGCTCCCACAGCGTC -GGGCTCTTTTGATCCCGTGATTTGAAAGTGAACGGAGGCTCCCGCACCCTACACACGCCT -GCCTTTGACCGGCGACAAATCGTTGATGGTAACGCTACCTTCGGTGTTGGATAGCAAAAA -TCCCACTGCGTACAATTTCTCGTGCTTCACGGGGATTGGTCGACCGGTCAACTTATCCTC -AATTGCAGTCGGCCCTTTCATCCAATGGAAACCGTCAGCCGGGATACCAGTCGTATGCAA -CTAACGAGCCCTTCCCCACCTGTCCTGCACCAAGGAGGTATGTGCATCTCATCAGCACTT -GCCATGGGTGAGGCCTAGGCCTAGTTCTTATTGGCACGTCGTGCTCTCTCTGGGTGGATC -CGCGCATTCGTGTACTCCCAGTTGTCATTGCTCACAGGCATTGTGAACACAGTCCCAAAC -CACATTCCAGGAGATATACTGCTGTTTTCTCTTGATCTGGCATCATTTAACACTATCTCT -GTGGGATGCCATGTCTATAAAGCCCTTTTTCTCGTAACATTGCTAACTGGCTCAAGGGAA -CCTCGGGTTGGATGTTCGGCTTTGTAACTAGGTTACTTGAATATGTGAACTTGTAGGCCG -CCTTAATGCATCGAATGCCTTGGTGCGGAAGTAACTTGGGTAATCACGTATAATCACGCT -ACATGTCTTAATCTCGTCCATTTGAAGTTCGGGCTCATAATTTACATTTGCTCTCTTCGC -CTTCTCGTTTCCTGTTGACTATTTAGCCAACATCTCCTAGAACATCAATCTAACCGTTCT -ATAACTCTCACGAGGTCTGCTACCCGGCTCTCCTTCATTCTTATTGCCCTCCTATCTATC -CCCAAAGGAAAGAAACATTCAATCACTCTCAGCCTCATTGGCTCGCATTTCCAGCTAATG -GAAGCTTTGCTATTATGATATGACAATTGAGAATATACCAAAGAGCTTCCAAGCAGACTG -GGCAAGAGCATACAAGACTGATAAAGAAGAATGGATTTTGTCCGAATAGCCAAATTAGTC -TAGGTATGATTCATTCTCGCTCCAGCCCTTCAGCCACGCACAAAATTAAACCCCCACATT -TAGTGGCGCATCTGCTTATTACCGGGACTTTGGACATGTGCAACCTCAGCTTGGCATGCA -TCAATTGGCGACTGTTTTTGACGGCTGGCGGCCCGCTAGCCGTCACAATAAGCCATGGGT -CTGCATAGTGTGACTGACTAGGAGACCAGAAAAACGCTTGTGGTGGAGCTTCTCATGGTG -AGCTCAAAAATAAATATGTACGGTGTTCATCACCACTGTCTCTATCGCAGGTCACATAGT -GGGCCCCATCAAGCCGTTCTTGAGCTGCATACATATGATATGATTATATTGAACGTGACG -GATATTCTGCAGCTACCGACGAAGCCTTGATTCTCATAATATATCAGACCACTGCAAATT -GATACCTAGAACTCCGAAGGCCCTTCAGTTCATTTTCTATAGAGCGACCTCTTAGGGCAA -GCTCATTACCCGATTGGGTTAGCCTGACTGATTGTCACTAAGTGACGTCAGCATGATCCC -AGGGTCCAGGGAAACCTGGAAGAGACTGACGGATTACTTCTAATGGGTTCTCGCTGATAG -TAACAAGAAATTCCTTTCTCTAAAAAAATTGCTATTTTCCAATTTCTTGCATCTCACAAT -CTTGACACGCAACCTTCTCTTCAACGATATATCTCTACGAACCACTTCAATTTACATCCA -ATATAATTTCCAACATTATCAAAATGTTGAAAACCATCATCTCAGCCGGTCAGCAAGCTA -CCCGAGCCACGCGGGGCCAGGTCTCGAAAAGCACTATGCAATCTCTCGCAACCTCAACAT -CAACAAGAACGCCCTTCAGTACAACCCCATTTCCCCAGATTGAAAAGGGCCAGGGCTCGC -TTGACCGTGAAACCTTAAACCCAGAGCGCTCAGAGACTGCCAAATCCGGCACTGACGGTG -AAGTGGCCAAACACCCCTCTGCATTCGATCCAAAGAACACTGCACCAGAGTCCGAGCTTG -CAGCCACTGAAGAGGAGAGCAAGCAAGAAGGCAAGACGGAGAGCCCTCTAAATATGAGCC -CGGCCAATAGGGATGCTAGCGCCTGGAGGCGTCAGGCTGAGGACGGGCCTGATCGGAATC -GGGATCGCGAGGCTTCCAGCTCTCGTGGGGCGACAAAGAAGGGCCGTGGTATCCATGTGA -AGGAGGATGGGACTCATGTTTCGTACCGGGATTGAATCTTTTGTTCATAGTATCGCGGGC -GATAAATGATGTTATGTCGGGAGTTCAGCATTTCTAAAATTCATGTGAGATCAAGTTTCG -ATTAATGCTACTACCAAATTATTCATCCATAGCCGCGTAACGTCATAAATCCCAAGCACA -AGAATTACATACAGATATAGCTCAATTCAACACACCTAATTATTCAAGACGAACTTTCCT -TGCCGTATTTAATGATATCACTTCCAAACAAAGTCCAAACTTTCTCACTCCCCTCAGGGA -CATCACCTTCCAGCGCCTTGAGACCAGCATTGACAGCAGCCCTGCCAATCACCTCCGCAT -CCTGAGCCCAGCTATCTTTCAGACCGGAGTGCAGCTTGCCAGCCCAATTCGCAATGAACC -GAACCCCAGCCTCGAGCGGCCGGCTTTCCTCACGAGTACCAGCGATCAGCCCAGGGCGCA -GGATGACCATGTGCTCAAAACCAAGCTTCTTGATATCTTCCTCAATCTCACCCTTCATAC -GGGGGTAGGCAATGCTGGAATCCTTGTTGGCACCGCTGGAGGAGATCAGCACGTAGACCT -TAGTACCAGCTTCGCGCGCGGCCTTGGCCATCTCAACGTTCAGCCCATGCTCGATTTTGT -ATTGATTATCGAACCCGCCTGCGGCTGCCCTTGTAGTTCCGAAGGCGGAAATGAAAATGC -TTGGAGTCGGGGAAAGGGTGGAGAGCTTTCCTGTCCAGGTGGCGGTATCGCCAGAGACGA -AGTTGGTGAGTTTGGCCGGGGGCGTGCCACTGGTGGCTGGCGGGGTGCGACGGGAGATGG -TGTCCACACGGGTTACGGTGGGGTTGGCAATAAGGCTGGTGAGGATGTGCGAGCCCTATC -GGTTATGTGAATGTGACTTAGCTCAAGTTCTGGCATCCGTATCTCTGTTCAAAAAGATTC -TAGACGGCTTGGGCTTGCAACGTGTGCACTTACCACCATACCCGTACCACCGATCAATGC -GACATTCACCATGTTAACAGCTATGAAATTGTTATTAAAGTAGACGTCTGTTGTGTTTGT -ACAAGATGCTAAGAGAGGAGCGTAGCCGGAGGAATCTAGACGCACTGCCTTTCGTCATGC -CGAGGTTGAGCTAAACTACATTGGGTAAGCTCGACTTTGACGATGTACGTGCAATGTGAA -ACCAAGTCTACCAGAAGAAAGAATAGCTGTTAGTATATACCATAGTAAGTACTATATAAT -TTCTGGCAATTCATTTGGCTTCAGCGCAGCCAATGTGTTAAGATAGCTACACATTTAAAG -AGTTCTGTATGGTTGAAATGCGTGGGCGGTGGAGATAGGCAGGAAGGGCGTACGTAGCAA -AGCTCCCGGACTGTGTTTCTGCTTTTCATTCTCGATTGCTCTAGGGCTGGCTCTGAGACC -CTTGACCTTCAAAATGTCCCTCGTTTTGTTGCCGCTAGAATACATTCAAATGATTGCGTC -AGACCTGCCCACATGAAGACATATTTAGCATGGTGTGCACCAGCAAACTGTTTCACGACC -TTCTAATCCGTGAGCTTTAAAAGCGAAATCTTGATACAGATCCACGGGGATATCTCTTGC -TTTGGTGTGCAATGAATGGTGGTATTCAAGGGATGCATCACCTATTATCCTGGAATGCAG -ATGTGAACTAAAGCTTGTTTTTCAAGGAAGAAGAAAAGGGCCAATTACTAATATGGTAGC -AAAGTTCGGCTCTGCACTTGGCTGTCGAAGATGACAACTTGCTCATGGCAAATCTGCTAA -TGAAGCATGGTGCAGATTTAAATGCGAACTTCACAGACCAGAACCTCACTCCCTTGGATG -TGGCCGCAGGACGGGGTAATGGGCTCATGACTCTGTTACTTCTTCAAAATGGCGCGAAAA -TGAGAGATAGGATCACCTTATCTCGTGCTATAGAACATGGAGGTATACTGGCTTGGGCGA -AAGCTGGTCGTCGCGGGGCTTGGTACTACAGCCCAATCAAAATACCTGAAATAGCACCTA -ATTACGACTTTCGGGCTTTGGTGGAGGTGCTGCATTTATACGGCGCCGATTTAAATAAGG -ACTCTCTTCTTTATGGGGCAGTAGGAAATCACGGGTCTCTGGATGATAGTGTCCTAGAGC -TCCTCATATATATTGGTGCCGACGTGAACGCGAAGAATGAGCGCGGGTCGTAGGTCATAT -CGCGTGCATTTGAAGGCGAACGCCGCGACGCCCAACGGTGTAAAGCCTTTGTGGGATTGT -TTTATGAAAAAGGTGCTAAAGTTGAGCCCAACGACCTCAAAGATGCGTTCTTTGACGCTT -TGTGTCGTGAGGACCTTGACGATGTTGAATTGGTCCTTGAATACAGAGCAAACTTGAACT -TCTTGTGCTATCCTGAAAGGACTTCCCTGCATCTCGTGATCGCACGTTCATTCCACGATG -ATACGAAAAACGAACAATTCGTGAGATGTCTCCTAAACCGAGCAGCCGATGTCCATTTAA -GAGAGCACAACTCGAGGACTCCATTGGAACCACGCATTGGTGCCCCTATGGGACACCAAC -ATAGCAGAGATCCTCCTCGATGCGGGCGCCAAAGTACAATGCAAAGGCAAAGAGGGAGCT -CAGCTCCTGACTTACCTTATCAAAATGGTCTGGCCAGCTGGTTTGACCCAAGCACAAACC -AGCTTCAGTGCTACTTGGAGGAAAATGATACTCGGAAGAAAACGATTGGTGGAAGTTGGA -GGATCCATATCAAGATCGATTCCGTGAGATGGAATATATGATTGGGTTGCTTCTTGAATA -CGGTGTGTCTCCAGACTATGTGGACGACACTAGCCAATGTCCTCTTGACCAACCTGGCGG -TACATTATTTTCTGCTCTACAGCCGTGGTTTTCTAGAAAGCAAGGAGCATGATGCAGTTC -GTTTCGCCCTAGTGATCGGCTATATTTTTCCTGGAGATTTTGCGAAAATCGAGAAGTCTG -GAGATGTCAATTAATATATCATTCATAGTACATACCAGCTTGGGGGACCAGGCTATACTC -TACTCATGCAGACATATGATTCTTCTATTTGTCTACCAAGGTTGAATGTGAAGCCTAGAA -TTGTATAATCAATTGTCACCTCAATGCCTGCAGAACATGATCAAACATGGCGACTTTTTA -CGCCTCTTGAAACAGTGTAAATTGTGGATTTCCCAAAAATGACTAGACAGTAAATTTGTT -TGCGGATATCAACAAGCGATTAATGAATGGATGGTGAAGTGCATAGAAGTACCGCATTGA -GTCATCATAGGCTTGCCATATCCAGAAGCTTAAGAGTCAAACAAAAACACCTAAATTGAA -AATCATCCCATCCAACTCATCGGCTTTTTTATTGACTTTGTGAATCCTCTTGCCGTGCGT -GGACACAGTTCTAAATCCATCCAAGATGTCTCTTGACTCCTTGCCCGTCGAAGTAATCTG -TCTTATTGCATCAAATCTGCCACGGTACAAAGATACATTGTCTCTTGTGCGGTGCAATCA -TATGATTGATGATGCTGCTATCTATGTCCTTCATAATCAAGACAGCCTGACTGCAGATTA -TGCACTGCAATGGCTGCTAAACAGAGGGTTTGAACTGGGAATCCAGAACATAATTTCACG -CAACAGGCTAGACGTGAACAAGGCAGTGGCACCCCGGTACATGTCGATCAACACACCCTT -ACTTTTAGCCGTTGGGTTCGGTCGTTCGAACATTGTGGAGCTGCTTCTGCGAAATGGAGC -CGTAGTCAATCTCACAACGGATATTTCCGCCTTGGAATATACTGCAACCCTGGGGGACCA -CAATATGATCAGTCTACTCCTCAAGCATGGTGCCCATGTGGATCTCGTGGGCGCAAAGCG -CGGGTTAACTCCTCTAGGTTGTGCTCTGGAACTTGGGTACGCATTACGGGAAGCCAATCT -GTACTTCCTACAGCTTGATACTTGCCATGAGTTGTCGAAGTACAAGGGAGAAGATGATTT -TTTCGCTGTGATCCAATTGCTGCTGGCCCACGGGGCAGATCTGCATTTCCAGTCTGATCA -GAGATTCCAGTCTACTTGCCTACACAGGATTCCTGGGAGCCCATGGAAATCAACAGAGAA -ATTGTTCGGTCTGTTTTTTGCTTCTGGTGCTAATTTAAATGCACAAGATCGGAGAGGAGA -CACTCCCCTTCACATCGCTCTCGCCTTCAACGCATTCTTAGGAGATACAAATTTCCAGGA -GTACGTGACGTTGCTCTTGAGGTCTGATGCGGATGTTCACCTACAGAACTGGGATGGAGA -GCCACCGCTAGGTATCAAATTTGAGAACCCGGGCCTCTGGCAACATTTCTTGAAACCAGC -ACCAAATACGCGTTGTCACAGTAAAAAGGGAGACAAACTGATTTGGGCACTCTTGAAGGC -TCCGTACCAAAAGCAAATGAGACACACAAGGCAACATCTCATCAATAATCTCCTGATAGA -GATATTAGTGGAGCACGGGGCTTTGGGAGATCAAATGATTGACGAAGGATGTCCATTGGA -CTTGCGTGCGGCAAGCCTATACCCAGTGCTGAAAGATTTGAAGAGCAAAACGGCTCCAAG -GAAAACTCCCCAAAAGAATGTGTCAAAGGCTTTGAAAGATAACGGATCTATCAGAAAAGC -CACAGAACACAAGAAGGCCACGAGGCCCACGAAAAAGTTGCCCCCAAGAACTCCCAAGGC -CTGAACCGAAGCGAGTCCACGTGGGTATTAGCCTTAAGAGATTCCGCGCGGGTTTGGAAG -CCCAATTTACACGACTTCCTCGAAAATCAATTCCTATCTTGCCGGGGTGCAAAGCAAAGG -GGGCGTTCATTGCTAGTCGTTATCAGTTGATGGCATATCTTCTCCAAATATCTCTGAAGA -CAATCTATGTTTCAAATGGAAAGACAGCTGGCATAGAAGACTTTATGACCATAGTTAATT -TCAAACTCATTCGATATCACACGATGTCTTCTATTTTCACGTACTCCTGCAGGCTAGGTC -GAGTGGCACTGAAGAGTCTACTATAAGCCCTATGACATATTAACCTTTCAAAGCTCATTT -TCGTTTTGTTCGGTTGCGTTGGAGAAAGCCCCGAAAGTAGATAATATTGTACTGTCTATG -GACTTGAATGAAAGTTTAGAAGATTGTACAATATAGCAGAAATTGGCATCCCTGAGTGGA -CAATGTGGGAATCTTGTTTGTTATCTCATTAAATCTTGGCGCTCTGTGAGAGCTTCTTCT -CTTCGACCTGATAGAGCTTGACCAATTGACGCACAGACAGATCAAGCATCACGTCTTCAG -ATCCACCGGGAATTCGGGCGCTATGGACATCGCGAAGAATGGCTAAAGGCTTGGTCAGCT -TGGTTTACGCACATCCTGATAGTCCAGCGAACATACCTTCGACCAATTCACCCTGACCAG -ATTTGGTATATCCACTTCCTCCGAATAACAACACAGCTGTTTGGGCACACTCGTTCAGAA -CCATACCAGACTTTGCTTTAGCTAATGCCGTTAAGCCACCAAGTTGACGATAAGCTTCCT -CTTTGCTGAGATGAATCAATTGATATAGGATCTGCTCAACCCACGCCCACATAGATTCAA -GTTCAGCGCCGGCCTTGGCAAGACGGTGGCGGACTACAGGTTGATCCATGAGCGTTTTTC -CGAAAGCCTCGCGCTTGAGACAATACGAGAAAGCAGCAGAAAGGGCCACACGAGCCTGGC -GAGTGACTCCAACAGCAATACAAATTCGTTCATGATTGAAATTGGTCATGATGATACGCA -TGCCGTCGCCTTCTTTGCCAACAATGTTCGACACAGGAACCTTGACGTTGTCAAGCTCAA -TAAATGAAGCCCCGCCTGTGATCTGACCCGAGACTTTGAGTCGGTGCATCGAGACACCGG -GGTGTCCCTTGAGAGGCACAATCAAAACGGAAAGGCCTGCGGCTCCTGGTCCACCGGTTC -GCACGGCCATGGTGGCATAGTCCGACCAGATACCATTCGTGATCCTGGTTATTTGTGTCA -GGTCACAGCATTATCATCACGAAGGAGTTGTACCCTACCATTTCTTTGATCCGTTGACGA -TGTAGTGCTGACCATCAGCACTCTTGACAGCAGTTGTAGCGATATTCGCGACATCACTGC -CAGCCCCGGGCTCCGTTATGGCAATACAGCTTCGCTTCTTACCTGTGAGCATCTCGGGCA -AGAACTGCGCTTGCAACTCAACACTACCAAATACGTAGATTGGTGGGATTCCAAAACCGA -AGCCGGTATTGAGAGAGCCAGCGGGGCCACTGAGACCCGAACGGGCCATCTATGGACAAA -CAATCGGTCAATAACGCTGCGATCTTAACAAACAGTCTGATGATCGCTAGGGCATACCTC -ATCACAGTATATTCCGGTGTAGATGTAGTCCCAGTCTTCAACTTTGACCCCCAGGATGTC -GTTGATTCCTAGGCGCTTCAGCAAATCCACAGGTAACGGGGTAGGCATGTTTGGCAACAG -CATATTATGCTTGCAAAAAGTGTGGAAGACATGCTCAGGGACCGTTCCCTCGCGCTCCCA -CTCCAACGCATGTTGAATCAAGTTCTCTGTGGCAAATTTACGACATGCTTGTTGGAAGCG -GCGGTGGGCAGGCGTTATATAGGGGGAAGGGAGGCCTCGAATATAAGGCGGCTCCGAGAA -CGGGATGGGATTTGGTGGGTTATAGGCCATGATGACAGAGTACCGCGAAATGTTATCAGG -GAGCGGCAAGTACTAGTCTACGTATAGGGTGTTGATATATATCAGGATCAATAGCCTAAG -AGTGAAATGGTTTCCCACATTCTGGGGTAAGGCGGCCCAACCGAGCCTCGGTGATTTGGG -TAGACCAATCCTGTGCAGTGCACAAGCGTATATATCGGTAATCATGTGTACGTGACCATA -GGGCAGTAGGTACCTACGTAGGACTCTACTGAGAGTTCGATAGGCTGCGCTATGTAAGCA -ATGCGCCAAGAGAAAATCGATGCCTTGGATTTTACGTTACTTTAATACCTTGCTGCAACC -TCGTGGGCCTCATCTATACCAGTAGTATTACACTACTTTTTTCTTACGGAGTCAGCACCA -TTCCATCTATATAAGTAAATCTTCTATCAAGTACTCCCTCTATATAAGTATTTCTATAGA -GGTACAGTGACTGGACATAAGCTTTTGGAACTATGTTGTAATGAATGTAATTATATTACA -TCGCTTCAGTACCGGATACAGTATATATGGACATGTATACTTACGAAACGGAATGACATA -GTTTATTAAGCACAAGATGCCGGGATCAAACCGCAGTATTCAGGTGAAAATAAGGGGCGT -AGCATTGAGCACAGTTTGGCGCCATGTTGTACTGAGTTCTTAGTACGGAGTACGGAATAC -TATGTTGTCGGTACCTACGGAGTACCTAGGGCCTGGGTTCTGTGTAGATCACGTGCCCAC -CTGTGATGCCTAAGGCGCCACTATTTCATAACTAGTTAACTAACACGCTCCACCTGGGCA -TCAGGCACGGCTGAGTAAGGCTCCGAAAGCGGTCCTGGATCCGCCACGCCTTGAAAAATC -TTATCGCCCGATAACAAGCACGCAATTGCCACTAACCGCTTTGGGCAAGTTACCTGGGGG -TTTTCTCTATAACCAAGCTATGAAGAGGGGAGATTCTCTCTTTCCCCCACCTTCTTCTTT -GTTTTTGTCTTCAACAGATATCGGTCTTGTATCCGGTATCAAGAAGAAGCCATGGGTTCA -TCAGTTCCAGCTCTGTCGGCCTTGAATGGCCCGACCTATGTTACAGCCCAGACGCTTATC -CAGCAAGTCGCATACTTGCTGAGTGATAAGATCTTCTCCTACTCGCCGGAGACGTTCGAT -CTTGATGCTGCTCTCAAGGAATGGATTTCCAAAGCCGAGACTAATGCCAATGGCGAGTCT -CCCTCAGTCAAGGCTTTGGAAACCCGCCAGGGTGCTGGAAGCATGGCCCTTGGCTACCTC -TTCTCTCAGGACTTTGATCTGAAGAAGCGCCACATTCCCCAGGGTATCGTTGCTTCTTCA -GCGACCCTCCCTTATATGCGTGCCGCGTTGGAGCAGCTCTCTCTGCTGTACTCCGTCGCT -AGCCCAGTCGCTGCACACGTAGCCGCCGTAGACTACGCCGGTGAGGATGGCCTAGTTTCT -GACTATGCATCCGCCCTTTCTTTGGCCGAGGACCTCGGTCTGGGTTTGGTCTCCAGCGGT -TCCGCCCACGAGTCCCAGCACATGGCTCTGTTCACAACTTTACTCTCCTCCGTCCTCCCC -TCGATTCACATCTATGATGGTGTTCGTGTTGGCCGCGATACTACCCGCGTCATTGATGTT -CTTGACAAGGATGGCCTGACTCGCGCCTATGAGACTGTCCGCAAGACTCTTGATGAGTCC -CGCAACCGTCATCTCGACGCCGAAGGCAAGGTTTTGGATCTGCTGAAGAGCCTGAATGGC -GAACTCGGCACTGACTATGGTGCTTTCGAGTACCACGGTCACTCCGAGCCCACCTCTGTC -CTGATCGCCTTCGGCACGGTCGAGGCTACTCTCACTGCCCAAATCGCACGCTCTCTCGCG -AAGGATGGTGTCCGTGTCGGTGTTGTGAATGTCCGCGTCTACCGTCCCTTCATCGAGGAG -GAGTTCTTGCGCGTACTCCCCCAGTCCACCCAGACCGTCGCAGTTCTTGGCCAGGTTGTC -TCTGAGCAGGCTGTTCAGGAAGATGGCATCCACTCCGCTCTCTATGAAGATGTGCTTGCT -TCTTTGACTTTTGCCACTGGCCGTGAACACAACCCGGCTTGTGTGGAGATCAAGTACCCC -CGCTCCCAGCGTTGGGATCTCATCTCCATCGCCGCTGCATTCCAGCGCGTGTATGACCAG -CCTATTCTCATCGCCAACGGTGAAACCAATGCTTCGCTGCAGCTGCTGGACCCTGCATCT -GTGCAGGAATACACTTTCTGGGATGTTGACACCTCCGTCACTGAGGGGGCTGCTCAGACT -TTGAGCCAGGCTCTTGGCGCCGACTCGGCCAGCAACGTCACGCTTAGCCAGACCCACGAC -AACCTCGTCCAGGGTGGTGCTATCCGAGTTGACATCCGCAAGAGCGCTAAGATTGTTGAC -GCACCTTACGCGGTCACTGCTGCTGAGGTCTCCTATGTTGGCAACCTTTCGCTGCTGAAC -GATGTTGACGTTCTAGCTTCTGTTAAGGACAACGCTAAGGTCATTGTGAATGCCCCAGGC -ATTAATGATGAAGATCTGGAGAAGAAGATTCCTGCTTCCTTCAAGCAGTCTGTCGCCCAG -CGTGGCATCTCCCTCTATGTTCTTGATTCATCTGCTGTTGATGACTCGTCTCTCAACGCC -CTTGTCCTCCAGGCTTCGTTCGTGCGCGTCGCCCTGCCTGCCCAGGAGACCCTGGCAACC -AAGAAGCTGGCCTCGATTACCGGAAATGCCGAGGCTTTGGACAACGTTACCAAGGATCTC -GAGAAGTTGCTCCGCCAGATCGAGGTTCCCGAGTCCTGGAAGGAGCCCGAGGGTATCACT -GAGGCCGTTGAGCTTCCCAAGGACATCATCGCCAACAGCTTCGTTTCGTTTGACAAGGAT -GAATCCGAGCCTCCCACTCTTCTCAAGGACTGGGAGACTGCTGCTCGTGGTCTTGCATTC -AAGGAGGCTTATGGCACCAAGACTGCTCTTCGCCCTGACCTTGCGCACAAGACTTTCACT -GTCCACGTCAAGGAAAACCGCCGTCTCACCCCCACCACCTATGATCGTAACATCTTCCAC -ATCGAGTTCGATCTCGGTGAGACTGGTTTGAAGTACGACATTGGTGAGGCCCTTGGTATC -CACGCCGAGAACGATCCTGAGGACATCAAGAAGTTCATCGAGTTCTACGGCTTGGACCCC -GACGCGATTGTCGAGGTTCCCAGCCGCGAGGACCCTGCCGTTTTGGAGAACCGCACTGTC -TACCAGGCTCTGATCCAGAACGTCGACATCTTCGGTCGCCCACCCAAGCGCTTCTACGAG -GCCCTTGCCGAGTTTGCCTCTGACGAGAAGGAGAAGGCCAACTTGCTCATTCTGGGTGGT -CCCGACGGTGCCACTGAGTTCAAGCGCCGTGCTGAGGTTGACACCGTCACCTTTGCCGAC -ATCCTGCTTGAGTACCCCTCTGCCCACCCCGAATTCCACGAGATCGTTCGCATCATCGGT -CCTCTCAAGCGCCGCGAGTACTCGATCGCCTCGTGCCAGAAGGTGACCCCGACCTCGGTC -GCTTTGATGATTGTTGCCGTAAACTGGGTCGACCCCAACGGCCGCGACCGTTTCGGTCTG -GCTACCCGCTACCTGAGCCGCTTGCAAGCCGGTTCCCCGATTACCGTCAGCGTCAAATCC -AGTGTCATGAAGTTGCCCCCCAAGTCCACCCAGCCTATCATCATGGCCGGTCTTGGTACA -GGCCTTGCACCATTCCGTGCCTTCGTCCAGCACCGCGCCCTCGAGAAGGCTCAGGGCAAG -GAAATTGGCGCTGTCTTGCTGTACATGGGATCTCGTCACCAGCGTGAGGAGTACTGCTAC -GGCGAAGAATGGGAGGCATACCAGGAGGCTGGTGTTATCACTCTCCTCGGCGCTGCTTTC -TCTCGTGATCAGCCTGAGAAGATCTATATTCAGGACCGTATGCGTCAAACCCTGCCGGAG -ATCATTCAGGCCTATATTCGTGAGGAGGGCGCTTTCTACCTGTGTGGTCCCACTTGGCCC -GTGCCCGATGTCACCGCTGTTCTGGAAGAGGCTATTGCCACCGAAGCCAAGAACACTGGC -AAGAAGGTTGAGACTCGCAAGGAGATTGAGAAGCTCAAGGACGAGGAGCGCTATGTTTTG -GAGGTCTACTAGACTCTGTTTCGTTCTTGCATGGCATGATTGTACGATTGGCATGTTTTT -GCTTTTTATTCCCCCTTGTATTTAGATCCTCATTTCGGCTAGCGCACTGGCCTTGGAGTT -CGAAAATGCATTAGAACATTGAATTACATGATCTTTAATCAAATGAGTCCCTTTTAAAAT -TCCCTGCTAAAACACATAAGTTAATGGAATGTATTTGGGTATCTGCTTCGAAGACGATGC -CTTCCTAGCATGACCCTCGAAAATGCTATGTCGTGTCTAGGTCGTCTATACATATAACTT -TTTCTTGAAACTCATGCAACGTAACTGTCAAACGCGCCCAGCGGATCCGTACTTGGTCTT -TCCATATGCACCACCCTTTTCCTTCCTCAACTCCAATGCCCGACGGTACGGCGGTGCAGT -GGGTGGCTGAATAGGATCCAAATTGACATAGTCCGGAGGTAGGGTGTATGATGCATCAGA -ATCCCAAGCAGACAGATCACCACAAGCAATGAACGACGGAATCCAACGGTTCTCAACCGA -GAACTTGTTGGACAGTTTTTGGGGCTGGGGCTTGTCTTGTGATGTTGTAGTTTGAAGATG -GCGGACTTCAGTAGAGTGATTGGGCGCAGGATTGCTAATTGCCTCGGTTTGTGTCACAGT -ATTTCGAGCCTGTGTGTCCTGGTCGATTTGCTCGCGGACTTCCACGCCCAGGTTGCCATC -TTCTGACACGGTTACTGGGGCGGCGGCGCCACCAAAGAGCGGATTTTTCAATGCGGCCAT -ACCAACCACCCCACTGTCACTGACGGGTGGAATGAAGCCTTCGCAGACCACGAATGCCTC -CAAACTACTAGCGCGACTACTGCGCGGCTTTGCGACGCTCACGCGCTCAAAGACGGTTCG -CAGCTGCGCGTAGAGAAGGTCCACATCGCGACCGCGGAAAATCTTAGCAACAAATTTGCC -GCCTGGGCGGAGAACGCCCATAGCAAGGTTTAGAGCTGCGTAGAGGAGTTGTGATTGAAT -ATAAATGTCTAGGTCGTGTAGACCAGTCACATCTGGGGCGCCGTCGGAGATGACCAGGTC -GACGGGATGAGGTTGTCTGATAGCCTCTGTGACTTGTGATGTAGAATCGGAGGGAGTGGA -TGGTTGTTCGTATGCTTCTGGGTCCAGAGCACGCAAGAGTAGAGGAATCGTAGAGGGGTG -TGTGATATCAGCTTTGAGGGTTGTGATACCTTCGAGCGGTGCCATGGGTTGCAAATCGAT -GGATACAATCTTGACGTTTTTCCGGGGTTTTAATTCAGAGCTTGAAGAAGGCTGATCACA -GTCCATTTTATCATCATCGGCCGTCGTTATCTCTGCGCCCTCCAAACCCTGTTGCTCTAT -GCACTTTTTCTCGAGCCATGCCCGTCGCCCGAAGCTTTCGCCCTTGATCAAGACACGACT -AAGTACTTGACTCCAACTTCCAGGTGCAGCGCACAAATCCACGACTCTCGTCACATTGTC -CGGGTTCTCATGCTCGAATAGGTCGAATCGTTCGTCGATCTGGATTAGCTTGAAGGCGGA -ACGGGCGCGCCAATTTTGCTCCTTAGCGAGACGGTAGTAAGCATCGCGCTTGTCCTTTGA -TGACTTGCCCATCTTGAAAAAAAGCTGCCCAGGCGCAATTGGAAATGATAGTCTTTCAAA -GTGGGTTAATTTTGCGACCAACGTGGGGAACGAAAAGATAAGATTCCAAAGGCGGACTAA -AATTTCTGCCGCTTTTTTTCTTTTCCCCGACGGCGTGCTTGAATCATTTGTCAACTGGAA -CAAACTGTAATACTTAGGAATTACTCAGTTCATAATGGGTCAAAAGAGAGCACGCGACCC -CAAAGCCCAGGTGGCTGAGGCAAACAAACGCAAGAAGGCAGTTAAGTCTGACGCAGCGTC -AAGCGAGGAACATGGCTCCATCGCAGTTGAGGACCTCAACTGGAAGGAAGTTGCCATGCC -AGATCGCATGGATGATGCAGAGGGCTTCTTTGGACTAGAAGAAATTGAAGGTGTCGAGAT -CATAAAACAGGGCGATGGAGGTGTTCAGTTCAAGGTATGAGTGTACATGTTTTGATCAGC -CACATTGACTAAACGCTAATATATGGTTGCATGGCTAGGCCAAGTCTGGCAAGCCAGCAA -AATCGATCCTCAAGCCTCCGTCTGAAGATGATGGAGAGGAATGGGGTGGTTTCAGTGACG -AAGAATCGCCCAAAAAGCCTTCTGCCGAGAACACTCCCGCTAAAACCGAAGTGAAAGATG -ATAAGCAATTgaagaaggacaagaagaaggagcagggcaaaactaagaaagaagagcaaa -agaCTGAGGAGCAAAAACCTAAGGAGCAAAAACCTAAGGAGCAAAAACCTAAGGAGCAAA -AACCTAAGGAGCAAAAACCTAAGGAGCAAAAATCTAAGGAGCAAAAACCTAAGGAGCAAA -AACCTAAGGAGCAAAAGCCTAAGGAGTCTAAAACTGCTCAAGGCCAGAGCATCAAGGCCG -GGCTCGGATTCGCGGCTCTCGATGATGTCGAAGAGGACGACGGCGTGGACGTGTCTGCTT -GGGACGAGCTTAACCTCTCGCCTGAGATTTACACGGCTCTTTCAAAAATGAAGTTCACCT -CCCCATCGGCCATTCAAAAAGCCAGTATTCCTGCCATTCTCGATGGGCACGATGTTGTTG -GAAAGGCCTCCACAGGTTCCGGTAAAACTTTGGCATTCGGTATTCCGATTATCGAGCATT -ACTTGGACAAGAGAGGGAAACAGGAGGAACAGTCCGACAAGTCTGAAAAGAACAAGGCCC -CGATAGCATTGATTCTGTCTCCTACTCGAGAGCTCGCGCATCAGCTTGGAAAGCATATTG -GGGAGCTCATTGCTAATTCTCCCGATACCAACGCACGTGTCGCTCTTATTACCGGTGGTA -TGTCGATTCAGAAGCAGCAAAGACAACTTGCCACGGCAGATATTGTGGTTGGAACCCCCG -GCCGAGTGTGGGAGATCTTAAGCACAGGAACGGGGTTGATCCGCAAAATGCAGAAAATCC -AGTTCCTGGTTGTCGACGAAGCGGACAGACTGCTGAGTGAAGGTCATTTCAAGGAAATGG -AAGATATTCTTAATGCACTAGACAAGAAGCAAGCCGGAGATATTGCAGACATGGATCAGG -AAGAAGAGGAGGAGGAGCCATCTCAAAGGCAGACTCTTGTCTTCTCGGCCACATTCCACA -AGGATCTTCAACAAAAGCTCGCGGGCAAAAGCCGCTGGTCAAATGGCGATATGTTGGACA -ACAAAGCGTCAATGGAGTACCTTCTCAAGAAATTGAACTTCCACGAAGAGAAGCCCAAGT -TCGTGGATGTGAACCCGGAGTCACAGATGGCCGTCGGACTCAAAGAAGGAATTGTTGAAT -GCCCTGCCATGGAGAAGGTAAGGCACTTTTTTTTTTGAATCGCTTCAATACAGATGCTAA -CTCAACCGATTAGGACCTTTATCTTTACTCAGTCCTTCTCTACTATCCCAAACACCGCAC -AATTGTGTTCACAAACTCCATTTCCGCTGTCCGCCGCATCACTCAGCTGTTGCAAGTTCT -TCAACTTCCCGTGTTCGCACTTCACTCCAACATGGCCCAGAAAGCCCGACTCCGTTCCAT -TGAGCGTTTCTCATCGCCTACGGCCGACCCAAGCTCAATTCTTGTTGCCACCGACGTCGC -CGCTCGTGGTTTAGATATCAAGGGCATTAATTCCGTCATCCACTACCACGTTCCCCGCAC -AGCAGATGCCTACGTTCACCGATCTGGTCGTACTGCTCGTGCCGGTGAGTCCGGAAAGAG -TATTCTCATCTGTGCCCCTGATGAGGTTGTCGGCGTTGCCCGTCTCGCCGGCAAAATCCA -CGCCAAGAAGGCGCGGAAGGCCGATGACCCCGAGGGACCTAGCAAGAAGATTCCCCTCGA -GTCTCTCGACATTGATCGCCGAGTGGTTGCTCGCCTCAAACCCCGCATGGGTCTTGCCAA -GAAAATTACCGACTCCACCATTGCCAAAGAGAAGGTCAACACCGAGGACAACTGGCTTCG -CTCTGCGGCCGAAGATCTCGGTGTTGAATACGACAGTGATGAGTTCGATCAGTCTAAGGG -TAGAGGCCGTGGTCGTGGTGGTGGCCGCGAAAGACGTGATAAGGAGGCCAGCGAAACGAC -CAAGGGTGAAATGGCCGGGCTTCGTGCGGAGCTCAAACAACTCCTTTCTCAGCGAGTGAA -CATCGGTGTCAGCGAGAAGTATCTTACTGCCGGTCGCATTGACATCGACGCTCTTCTTCG -TGGTGAGGGCAACGACGCGTTCCTTGGCCACCTTGACCCTCTGACATTCTAATTTTGAAA -CTGAAGCGTATGTAAAAACACACATATTAGTTTATATCAAATGGATACTTGACTCATAAA -TCCTTCGCCGGACTTCTCCGATATGAGATAAGATATCAGACCAGCTGATAAGTGACAAAG -AAGACCCCAATTTCTGAACAGGACTATGCCATATTCAATGGCCAATTTAGCCCCAAATGT -AACTAGTTCGCTCAATATGCAGCGATTTACATAAGTGCATATACTCCCCCGCCTTGCTGG -CTCGGAGTATTAAAACACCCTTTTCTTGCCTTTCCTGTCCTTCAGCCATTCTCTCAACCA -TCATGGATCCCCAAATTGAAGAGCTATTAGGCCTAGAAGCCGTTCGTACAAGGGCCCATG -TGGTCCTCAAACTAGCCGAAGAGGGTCGTTTGAACCATTTCAACTACCACCCTGAACGCA -TGGAAGATGCCACCGACTACGTACTTAAACTAATTCAGGTAGGGCTAGCATACCTCCCGA -GTGAACAATTACATTTCCAGGTATCAAACAACTCACGATATCATAGCGCGACTTCGGCCC -TGACAAGTATCATCTCATTCCGCCCCATGGACGCTGGCAGCATTTTGAAGTCGGCGGGGT -TCCCCGCATAGCCACTCTTCTGGCCGAATGGGATGAGGAGAAATGCGACACCACCGAAAA -GACACGCAGATTGATAGATCTATTCTTCGTCTCTGTTCTCCTGGATGCTGGCGCTGGAGA -CTTCTGGAAATTCAAGGAGTCACAAAGCGGCCTAACGCTCAATCGAAGTGAAGGTATTGC -AGTTGCGGCTTTGCACATGTTCCTGAACGGCGATTTTGCTGGCCCAGACTCAGCGGTGAA -ACACACCGCCAACGGTAAGCGTTTGCTTTCAATCAAAGCGATAGCGATATGGGCTGACAC -CCCGTCTTAGGTGATGCATTGCGCAATATCAATGTAGAGATCTTATCCCGTGGACTCCAG -GTCGACGATGGAAACCCCATGATCGGAGTACCGGCGCGTGCAGATATTCTCCGTAAGCTC -GGCGAGTCGTTGGTGAATTTGAAAGATATCTTTGGTCCCTCTGGTCGTCCTGGTAATCTT -GTTGGTAAGTTCATACCGTTCTTGAAGTCACTTAATCCTAGAAGTGTGTATCAAAATAAC -TAAACAATTCCCACATCTAGATTACCTCATTGCCAAATCCAATGATGGAAAGCTCGATTA -CAAGGATCTCTGGACTGTGCTGCAGCGCCTTCTGATCCCCATCTGGCCGTCCGATCGTAC -TCACGTTAACGGACAGCCCATCGGCGATGCATGGCCTCTGCGAGTACTTGGGCAGCAGGC -TGGATCGGAGTCGAAGCCCTACTCCAATATTCAACCGTTCCATAAGCTGACCCAATGGTT -AGCGTATTCATTGATGGTGCCGTTCTCCCGTCTGCTTTCAGTCTCGTGGTCTAATACCGA -GCTGGGCACTGGTCTTCCGGAATACCGCAATGGAGGAATGCTTGTCGATATGGGCGTTCT -AGAATTGAAGCCCGAGGCATTGCAGCGCGGATTGAGTCTGTCAGGCGGTAGCTTACCTTC -TTTCGGTGCCGGCGATGACGAGATTGTGGAATGGCGGGCGATGACTGTTGCTCTCCTCGA -TGTGCTACATACAAAGATTCTCTCCCGCTTGGATGGTGTTCAATTGTCTCTGCCTCAGGT -ATTGGAGGCGGGTTCTTGGAAAGCGGGTCGTGAGCTTGCAGCGGCTAAGCGTCCGGAGAC -GAAGTGCAGTCCTATCCTAAACTTTGGCGATGGCACCTTGTTCTAGACGTTGGGCTGTCA -GTATTCAATATCGAATTTATGAAGAGCCTTTCCAAATAGATCTATCCAATGTATCGGCGC -TCTTCCCGGCCCGGAATACATTTTGAATTGAACAATATATGGACCAAGCTTCTTCGGGTT -ATATCAAAGACCAATGCATCAATCTCTGCTTCAAGTCTGGTCTTGTATAATGATGTTGGG -TTTCTTATGCCCAATCGGCCGGACTCGGGATGAAACCCCTCTATAAATCTTCATAGATGA -GTCTAGGGCGGTTCAGGCTTCATGGTTCtcgtatcgctctcgtattgtattgtcgtattg -tcgtatcgGCCGCTGGCTTTCTCGGATCTGCTTCTCCGGAACACCTTTGATTTGGTCTCT -TGACACGCCCCTTACCATAATCTCAAATGGCGGAGGTGAATGTGAGTGATATTCAGCAAT -GATCATCTCAATGGTTGGTTCAAACAAGAAACGCTCAACACAAAAGGAAATATACTCAAC -ACAGCCCACAACTCAGAAGAGCTGCGCCTGGCCGAAATGAAACCACAAACGACAGATGCC -GCCGGTTCACTTTATACATCGTCAACCTCCCCCGGGGGATACTCAAACAAGAAGCTTttc -cccctgcccctttcccccttcccccatcctccccacaccccttTATCGTAGCATTCGTGC -CCCACCACGCTTAAGGTCAGCCGTGGTGGGGCGTTGCAAGGTCGAGGAACGCCGATGAGG -AAGCAGCCGCTCTCCTCACCAGCACCTAGATGACAAGAAGATGAGTCAGTACTGAACTTG -ATAGGACTCCGAAGTGAAACTGAGAGTAAACTCAAAGAGAACCGAAGCTGACTGAGATCT -AGGAGAAAACAGAATGTAAAGTGAAGACATACCTTGCATTTGAGTAAGCATTTGCTTGCC -TACACCTCCTCCAGAGCGGCGTCCTCGTCACCATCTCTGGCCTCATCGGCGATATCCTCC -ACCTCGGTCACGTGGGGGACGTACTCCCGGTCAGAGTCAGACCCTTCGGCCGGCTGGTCG -GTCTGCCACCCGATTATCTCATCCAGCTCGGCCTCCAGGCGCTCATAGTAACCGGGAGAA -AAGGGGCGTGCCGGCCCATCCTCTTCTCGCACCTCCCACTCCCACCCCAGCTCACCAAGG -CCACCAGCGAAATCCTCAAGGGCTTGCCCGAAGAGGATATCCTCATCCTCTTCCTCCCCG -AGGAAGTCAGGGTCGTGGGTGGGGATTTCCGGGGCGGTGAGCTCGTCATCGTCGGGGGGG -AGTTGGAAGGGGGCGAAGTTGTCAGGGAGAGCGACACGCCACCTGGCGCCCACTACGGGA -GCGAGGGGATTGTTGACGACCTGGGGAGTATCGGCCCAGGCAGGTCGGAAAAGGATGATG -CTCTGGGGGACGTTGTCCACCCCAGGAGCACCAGGGTTGTTGATCTGGGAGGGGTTTCCT -CCACCAGATGAGAGAGTGTTGGTGGTGGTTCGGGGGACGTTGCCCGCGCCAGAACCGACA -GGGTtgctggtggtgatggtgatggtggtggtggtggtagtggcagtgTCCCCACCAGAA -GAGAACCCGCCAGAAGAGAGGTGAATCTGAGTCATCGTGAGATCAGATGAATCAATTGAT -TTGAAAGCTAATACAAGAAGCAGATGGTATCAAGATTTGATGATGATGGTACTTTGGTGA -AATGCAGGAGAAGAGACGACAAGAGTAAGGTATCCTTCGTGTTATGAGAACTCAACATGT -ATGCGTTGTTCACCCCCAACGCGGAAGGTCCATTCCCCGACATGCTCCAACAACTATATT -ACCATGCTATAATACGATCTATGATCCTCTTCAATTGCGGTATGTTGAAGTATGATCTTA -TATTGATATAGATCTATGGAGTCTTGGCATTCTCGGCATACCGAACTATAGTCCTTGGCA -CCTCGGTCCGATCATCGGCTGTTCCAAGGCTTTGAGCCCCACTGTTTGGGTCCCAGGAGA -GAAAAATCGGCTCTAAAAAAAAAAAACAGGACCAATCCAGACGGTGGCGGGTCTCTTCTT -TAAATTGCAGGTCAGTCTTCTTTTTCGTCATTGCAAAAGTGCAAGTGTGGTATACATCAG -ACATCACCTAGCCCTAGTTCAAAGTACGGAGATCGACAAACTCAAAAAGCCGACCGAGTA -ATCAGAGCCGATTTTGCAGTACAAAGTGTGTACCCCGGTCGGCAATTAAAGCAAGCCCCT -CTAACATGCATGACCAGGGTCAATATCGGATTCATACATACTTCCAGAATGTCAAACATT -GCACAAACAGCGCGTCGGCTCTTCCGATCGCAAAACCTTTCAGCTTTCAGGACATACTCG -CCCGTAGTCCCTATTGCACGTCTTCACCAGCAAGGCCCAACACGGTCGGCAGTTGTTCAC -GACCAAGCACTCCCCAGCCAGACAGAAACGCCCGCATTTATTTCTCAGACCCAGTCACAA -CCACAACACCATGATACAACAACCCTTGACACACTTGTTTCCCAAATCCCTCTCGTTCAA -ACTCTCCGCGAAACCCACAGCACCTACAAGGAAACACGTCCCCATCTCGCAATCCCACCA -CCCATCAGACAACACCACTTCGTGGGCGGAAGCTTGAGCGGTCCTGGCAAGCTCGCATTC -GCACCATACATGTGGCTATCCGCCGGTAAGACCGGATCTCAAGCCGAAAGTAACGACCAA -GCCAGCAGCGTTGTCTCAGTATTCCACATCGGCCAGGATCTATGTGGTCATCCTGGCTTT -GTCCACGGTGGTCTTCTCACTGTTCTCTTTGATGAGGTGTTTGCGCGCTGCGTGTCTGCG -GCTTTCCCCAGCGGGCTGGGTATGACAGCCAATCTTAATGTGGATTTCCGGAAGCCGGCT -CTCCCTGATCGGATGTATGTGCTGCGCACAAAAACGACTAAGGTTGAGGGTAGGAAGGCG -TGGGTTGAGGGTCAGATGACTTATCTTCCTTTGACCTTGCCACTGTCTGTGGACTCGCAT -AGCATCGTTTCTGATGCTAATTTACTGCGGGAGGATAGTGAAGGTGCAGTGATGGTTGCT -GAGGCTAAGGCTTTGTTCATTGAGCCTAAATTTGCAGACGTGAGTCATTCATTTGCTCTA -TCTACATGAAGCGTGGCTAACAGTTTTTGCAGTCTATGGTTTCGATCTATTCGAACTAGA -GTGTGTTGTGAAAAGTTGATATATCCCCCCCCCATTGTACATGGTTGTAGAGTTATGAAG -TTATAGAACTAGAAATTAGAATCATAGATTGCAATAGACTATTAGAACTTTCTCAATTTT -CATAAAATTGTCCAGATATTCAAAGCATTTACTAGAATGTTCCCTCGAGGTTCACGATCT -GTATCCCATTCATCCATACATGTAGTACTTCGCATTCACATCAGGGTCTGCAATTTCTGA -TGAATTGTCTTATATACAGCCATATTTGCCTAGTCGAATCCGCGACTCAGCAAGCGCGCA -TTTTTGTTACTTTCTCATGAGGGGAAATCAAGGGTCCCAGCGAGCCTATAGATAGCCAAG -CTTTTAATACATCTCGCTCGCCAATGCTATTCATACATGATCCTCGAAGGACTGCAATGA -AAATAGCACATCAATATCCTCAATTTTGGAAGATCTGTGCAGGGTATTTTGAAACCAAGT -GATAGTAGTGTGTACTTGAGGTAAAAGAAATGAACATGAAGGGCTGGGAATAAAAGGCCT -TTTAGGGCATGGAGTATGCCTATGGGTAATCACCTTCAAGGCAGTGTAGTAGGCTATTAG -TCTCGATGCTTGACAAAATCAAACACAATCCTCCCACACATATATTCCCCCTTCGTCGTC -ATAACCACCACCCGACGAGAGAAGTATCTCTCACCCCCAAACATCTCAAACCCCCAAATC -TACAGTAATCAGCTTCTGATACATCCAATGAAAATATAAAACCAAACCGAGTTCACTCAC -TTGCTCAGCCGTCCATCCAGACTTATCATTCCGCTCAAAAGTATGTACCCAAAGCCCACT -CTCAGCCATTGAATCCTCAACTATAAAACCCCGAGATATCCCCCCATCCGCCTTGATCTC -ACCTCGCAGGTATTTCTCTATCCCCGGATTGGCATTAGCCGTCTTTAGCTCGAATCCAGG -CCTAGCAATTCCATCTATACCAGTGACACCATGAACCAATTCACTCTGAGCATGGACCTG -GCCGAAGAAGTAATCCTCGTGGTCTGATATTTTCCAATCTAGCACGCGCTCTTCTTTGGT -TTCGGCTAGACCTGCTGTGGCTGTTTGAAGGAAATCGATGTTTGTTGATGGTTGGGTTGT -TTCAGAGGGTGTTTGGTATTGTGTGATTTGGATGTATATTGTTGCGAATTTGAATGCTTT -GCGGAATAACCATGGTACTCTTTGCTATAGACAGTATAAATATTAGCTTGGAAATTTGTA -ATAGGAATAGAAGGGATCGTTCGAACTAGTTTTAGAATAGGGTCTGCTTCGCTGGATAGA -TCTTTGTTCTGTAGATAGCATTGTGATTAGGAATCTACTACCTTCGCAGGGAGATGGCAG -AATTACCAGGACCCAGTGACCGTTGAGATTTTCAATGGTAATCTCTTGAGGAGCGGCCAT -AGTATGGGGATGGTTTTATCCTAAAAGGCAAAGTGATGTAAATAAATTTCGAATTTTGGT -GACTTCGGAAGGAAAGAAACGCAATTTATTTGATGGGGCAATTGACCTCTGCATCAACAC -TGTTTGTGATACAGTACGTTTGTACAAATGATACCTCAGTACCCGACGTTGCAGCCATTC -GAACTATGAAGTCTACACCGAATGCGATTTTGAGGAGAAAGTGATCATTGCTATCTAATA -CCACTATCGTAAAGAACCCCTGCCATGACGCCTTTCGACCCAGATGGCATATGAAGAAGG -AAAAGAAGACAAGAACAGCGAAACAGCCCGTCCGAAGACTTGGGATTGATGGAACAAAGC -AGAAACAGGTCGCACGTCGTCACATCGTGAAGAATTTCTCGTCCTGGTTGAGAAAGACGG -AAGCTAGCAAGATAAGAGGGAGGAGAGGAGAAAGTGAACATTTGGGGGTGAACACTGGTC -TACGGGTCTAGGAAAGATGGAAAGCTCCTTGGCCTTCCTTGATGGCGACCGAGAGGCGCT -TCTTGAGAACGTCCAGGTCGCTGTAGTCTGGCAACTTGAGATAGTTGACGCAAGTCATCA -CACTCGGCAGATAGTCGTCTGGTGTGTATGGATGTTCGCTTGGTCGGCACACAACAGTGA -AGATCGGCGTGAGACTCTTGAAACCTATATCGATTATGTTAGTGGGAATTTATGCGTGGA -AAAAGAGGGTTTACATACCTCCAATGGGGAGCTTGGGGCTACCAGTGACAAACTGAAGGA -AATCTCGGCGTTGCTGGTTGTCCAGCTCGCTCATAGTCTGCAGCAGGTTGCGTACACTCC -GGCTGTCCATGTTGAACCCGTGATCGGCCTTGATAGAGTCCATCAAGGTCTCGATAGACC -AATCCTCCTTCGCCTGCCCGAAAAGCATGACGAGCTCACTTGGAGTGAAAGCACGAAGGG -AGGAGTAGGGGAAAACTTGCGAGAACCCAGCCCGGAAGGCATCCACCTGACGACGGACAC -CGCTGCCCAAGGTCATGTCGATCACCCGTTCGATGTAAGTGTCTACATTCTCGATTGTCA -ACTGCACCTCTGAGCCGCCAGGAATCAACTCAATAGCAGGGTATCCAGGTAGGGTGAAGT -CCAACCCTAGGTCGTCCACTTTGGCACCTTGCACCGTGATATTTTGCACGGCCTCGGATT -TGCGGGCTGGCGAAAGCGACTTATCTGCCTCAATAGCGTTTTTGGCATCGGCAAACTCCT -TGAGCATGAGCAGAGACTTTCCGAGGTCGTGATCAACCAGCTTGACAGTGCCCAGGGATG -GTGCCACCGAAGACAGAGTATCCGCGATACGGAAGAAGGCTGGATTGAACGAAATGTCGA -TGATCCGAGAGTCAAGCATCGAGCGGGCCACAAACTTTCCTAGCACCTTGAAGATGTTCA -ATTGCTTCTTGCCAATGTCTTGATCTGCTTGCTCATCACTCATCGGCGCTGGGAACAGGC -CGCGCTTGCCAAACGCGTACTCTGCAGCAGAGCTTCCATCCGTGTCTCTCCAAATCTTAA -GCTTCTTCTTCGAAAATTCTCTAGAAACAGTAGAGTAGAATTCAAGAGTCGGTCCTAATC -CAGTACCAACTTCTTCAAAGTATTCAACCTCAAGAATGCTAGGAGAAGATCCATAGAGTT -CCATGACTTTGATTGCAGAGTCAAGGATGCGAGAACGAGAGATTCGAACTTTTTGGCGCT -GTAGCCGCCCGAGGAATGGTCGGTCATCGCGGCGCAGGTCCCGTCGGCTGTCCTCGCCAC -TTTGGGAATTTTGCCATCTCATCATCGCCCGCGAGTAACCGAATGCAGTGGACTGCAGGA -ATAGATGCCTTGTCTCGAATGGAAACAAGAAGGAGAAAAGCCTGGCTAGGTCTTCGCTCC -AACTTGGAAGACAATCGCTTGCCACAATCAATGGCTCCTCGAGTTGGCGGTTAATTTTGG -CGGTGAGCTTCGTGTTGATGAATTGGGTTAAGGGCTCCGATGTGATGGCGGCAGAGTCTT -TTATATCAGTTAGAATATCATCAATCGTTGCGTTCATTTCGTGCAAAACACGAAGCAGAC -GAATGATGGATGAAGTTGTGGGGTCTTTGTCGAGAGAGGCTGGGATTTCGAGAGCATCAC -CCTCGGAAGAAGAAGCATGGGCGGTCGAAGTGAGGGTTGAAGGTTCCGGTGGAGGAGGAC -CAGGAACGCGCTTAAATGTGATGGTGTGAACAGCCGACCAAACGTTCTTGGCACTACTCT -CGTCCAAGTCTTCGCGGTCGTGATGAACACCACGATATATAGTTGTCTCATTAGTAACGG -GTTTGCCATCTACGAAAAACTCAAGGTGCCAGTCCTGGGGCACTGAGGCCATCGCAGCGG -CATAAGATGAGAACGGGCGCCCGGCAGTGGCAAGCGAGCTGCTGCTCATGCGGCTTGGGA -GAGCGGTAGGGGTCGAGGAAGAAGGTTTGGATGCAGGAGTCCCAACTCGGGTACCATCCT -CCTTGCGGGCTGTGACCTTGCCCGAGGATGCCACCTCCATGTTGACTGCCGAGGGGTCAG -AGATATTGTCATTTTCCATATCCTCATCGAGATCATCAACAATGGCGTTAAGTtcttcgt -cctcgtcctcgtcggcaccatcttcttcgtcgGTCAGTTGCCTTTCATCTGCGCACTCAA -GGGGCTCGTCATCGTGGTCTTCATCTTCGTCCTCTTGAATGCGATTTGACTGATGATGCG -AGTGACCAGTTGCAGGATCATAGGCACCGTCCGTGGAATCGGCAATAGTGTCATGCGGCC -CCCCTGCCTGACTGGGGAGGCTCATACCAGAACCTGAAGCCAGCTGTTCACGAAGCCGGG -ACGCATTGGCAATTTGGGAGAGAATCGAGTCCCTGGTTCGTGAGGGCCGAGGCTTTTCTG -CCACGGCAATTCGTGGATGAAGGAAGTCATCCAAGGCCTTGAATGTGGCAATGGCATGAA -TTGAGACCATGATGTTGCGGTAAGGACGAGGAACTTCGGACCCTTCCTCGGCAACCAACT -TGAGTCGTAGCTGTTTGCTCAACATATAGGCTGCGTTGCTGCGGGTATTCTCCAACGAAT -TATGGCTAACGGTGAGGACTTCGAAGTGTTCCGTCCTGCTAAGCAGATCTTGCAACTTGC -GGATAAGTACGCTGAAAGGTGTCGTGGCTGTGCTCTGGCTTTGAGCCTTATCTGAGATTG -AAACACCCATAAACGCCCGCAGGAAGTCCGCTCTAGCATCGCGGATAGAACGAGCTTGGA -AGTTACCCAGAACACACAGAAGAACCTGTATGATGCCGGAGTTGAGCAATTCAGAGCTGG -TGATGCTCTCCAATGCGTCTCCATCAAAATATGATGCGAGGTGTTTAAATAAGAGGAGAC -CTCCCTCACCGCCACGAGAATAGCGTGACTTGATCTCCAAGGCGAGAGATCTCAATGCTG -TCAATATCCTCAGAGCCTCTGCTCGGACGGTTTCGCCATGGCTTGCTTCATATACCTCGA -GGAAAGCTTGAGCATCTCGGATAACTACATCGTTGAGCGCATTGTCCATCTTCTGTAGAC -TGCGTGGGCCCGCCATGAAGtcatcctcggaatcagacatgtcatcgtggtcatcatcct -cgccatcctcatcatcatagtcatcagcttcatcgtcttcatcctcatcctcaccgccaG -CTCCAGTCTCTCTGGCAACTTGATTGGGGCTGCGCGATAGATCTGTGGTCGTGTTTACAT -TTGTGGGATCGGATGAATCGGCAATCTCCTGGTCTCCTGACAGAGATCCTTCGGATAGTT -TGATGATCTCGGATATCACTCCTTCACGGTGGAATTGATGCTGATAAACATGCTCAAGCC -GTTGGAACAATAGCTCAGCGCAGCGGAGAGCCAAAGATACGAGAGAAGGGTGGTCCTTCT -GAGACAGAATAGCAGCCAAGAACGAGGCGTAAGGAACTGTGCGTAGGGCGTCTTCAATCA -AAGAAGGCTCGAGAATGTGCAGCATCTTCAGCTGAGCAATCAGAACTTTCTGGCGGACAT -GCAGATTGACCGTGCTCGAGTAGGCATCGGTTAGAGTTGGGAACAAAATCATCGCAAAAC -GCTTAAGTTCAGACTTGCAGTTAACAAGCAAGGACCGTCTCTCCTCTACTGCCTCTTTGG -TTTTGGGCGACCTCAGTCCGATAGACATCTGGCTATCAAAGTATGAGCTTAGCAGATTGT -CAGTCTTCGAGGCATCGCGGCTTGGAACTCCTGGCAGCAGCTCGCAGATGACATTCAGAG -TTTCAAATACTTGTTCACGGGGCCGATGAATCAATGCTTGCATAACTAGCACACTATCCA -TCTTGACAGCCGTGTTATCGACATCCTGGGGTGGTGACACGCCCGTCAAAATCTGGTAAA -GCGTATCTACTACATCCATCTTGAGAAGCTCGTTTGACAACCGTGGGCTTGTTTTTGATA -CGATTGCCAAGACCCTGAGGAACTGCGTGTGTATGTGAGGCCCGATCAGATTAGTCGTTC -CTGGCAGTAGGAGTCGAAGAACAGCTTTAAGCATTGCAGGCTCAATCAGCTCCTCCAGAT -TCTCAGGTCTGTGCTTGAAGCTTTCGACAATCCGTGATACGCATAGACATCCTTGCTCCA -CGACCTTGGGGTCATTGCTGGATAGGACGTTGAGCAAGGTGGGCATCACATCCCGCACCA -CAGGGAACGAGTCGTGGGGAAGGTTACGACAGCAGTTGGCGGCCGTGCTGACTGCTGTAC -GCTGAGTGCTAGTTGGGAAGAAATCCAGATATGTGAGGCAAGCGGTCAGTCCACCTTCTC -GCACAATCGAGGCTGGGAAGTCAACTGAAATCTTGGCCAAAGTACTCAGAGCCTGTTCAG -CCAGATCGATGAACTGTATATCAAGAAGCTTTTGACAAAGGATAGGCACAGCACCACCAT -ATACAACATTCGCCACGGAGCCCCGGAGGGCCTCCATTAGATTCGCGAGAGAGCGACATG -CAAGGAGCATAATCTCGGGGTTCTCTTCGCCGAACTGGTTCGGTTGCATCAGTGACACAA -GCTCCTTCACATACGGGTCCGGCGAAAATTGACCAGACAGGTTATCTTCGTTCGAGACAA -GCAAAAGATCCGACAGCTCTTGCAGAGCGATCAATTGGAGTGATGGATCATCTTTCGCTC -TCAAGTTGCTAAGAATATCTCGAAGACGCGAGGACATGCCGCTCATCATGCCAGTAAGTG -CACGTAGAGTGCTCTGGAGTCCCATGGGGCCTCGGCCACCGAACAGGCTGCTGTGGAACG -GATCCATATCATCATCTCCAAACGCTGAGGGGTTGGTGCCATCGTTGCTCTCTCCTGTCT -GGGATCCTTGATGTTCTTCGTGGTGTTCCTCTTCCTCCTGTTCGCTTTCCTCCTCTGCAT -CTTTCATCAGAACATCTGGATTTAGCTTGGCGGAACGCTTTTTCGAGTGCCGCGGTGGTG -GAGACTGGGTTGTCGCGGGTCGATCTACAAACAGAAGGAATCAGTGACTATCGCATTTAG -ATACCAATGTTGTATCACACGGCCGAAACTCGATGCATGAAACTCACCGTTCCGACTGGA -CTTTCGCCTTGATGGTTGTGGAGAGGTCGTCGATTTTGATGATTCCTCCGACGGCCGAGA -TGATGGTCTAGAAAAGAAGATCAAATTAGCACCCAACTCCTATTCATCACTATTCTTCGT -CGCCTACCCCGGTTGTGACATGGCTGGACGGCTGCGGGCGCCTCGACGGGGTGCAGCAGG -AACAGGACCAGTAGATGGAGCTGTTTGCGATGCTGCGGTCCGGGGTCGCTTGGTTTTTCT -TTGGGGCGACGGCGATGTTGGTTGTATCGGGGCGTCTGTTTGTCGGTCGTGGCGGGAGGA -TGCCTTGCGTTTCCGGGAAGAGCTTGAGCCGGTTGTTTGTAGAGGTGTTCCGGACTCAGT -AGGTGGAGGGTCTGTGGCGAGTCTCGCGGAGGAGCGAGTTATTCTGGATGACATGAGAAT -ATGGTCTTTTTTTTATTATGTGTGACGAAATTAGGGCCTGGagaaagacagaaagacaga -aaaagaagaCCGGGATGGTCAAAGCGATAGGCAGATAGTTGGGTAGACAGATGACGAATC -TAGAGATGTAGAACGATAGTGAACTGTGAATGATCCACTTCACAGGCCAGGGGATAAACG -GGGTAAGGCTCAGGTAATCACCAGATTCTCGCCAGTGAGTGTCCACGAAAGAGGAGTCTA -AACTTCCAAGAGACCAGTGTGTAAGGCGGAGAGAATGGGTGGGAAACTACCTCAGGGAAG -AAGAGGCCGTGGAAGTGCCTGCGGGGGAGACGGGATAATCCTCGAGAGGGTCCTGGCGGT -TGGCGGAAGGCCTTTGCCTGGCCATAAGTTATCGGGTATGGGAAGCTCGATTGAGTATTC -GCCGTTTTTAACCCGCCATCCGCAAACCTGTCAAAGGCTAGCCGAAATGGCTGGCGAGGG -CGTTCGGGTAGCTGTTTATCGTTGCCGTCGGGGCTGCAAGATCACTCAAGAAAAGAATGA -GATACCAATGCCAGTCACAGATGAATTTTTTTCCCCGATGGGTGGGTTGAAAAGCAAGAA -AAAGAAGAAAAGGATAAATTGGAAAGAAGGAGGATTTGATGTGGTGGTTGTGATGAGATT -CGGGGAGCGTCAGGCGGCGGAAGTGCAATATTTCGATGATCTAACCCGATACTTATACAC -GAACAACCGTATACAACTGCCTACACTATTATCCATACTATTACCTAGATTTTGGTATTT -GGTATGCTTGCTTACGCTTACCTATGCTTGATTGCACACATCCTACACTTTCTTTATTTC -CCGGCATTCACATCTAGCCTGACATTGGGCACGGAGGGTCCGAGACCTGTCCGACAGGGT -GGCAATAAATATCTCAAGGGATCCCTATATATATATCGTCAAGCGGTTTATGATATTGTA -TTTGATATATATGGTCCGGTTATTTGAATATAGCCGAAGATATTGAACAACTCATCCTCC -GTTCGTTTCATTGATGTATACATAGAGCTCAATGAACATGCCGGGATTAACCCACATAGC -TACTTACGTTGTACTTGATTTCTGCTGCCACTTCTTGGCTTTCCCTTCCGTTTTGGTGTT -CACCTCTTGCCGCTTTCTTTTCTCACCGAGTAAACCTGAGCACAGTCCGGAGCCTTACAT -AATCATCATCGAAACCGTTCCTTAAGCATCACTCGATCATGATCAAGAAGATACTATATA -GGCAGAATGTCATGCTCACTGAACCCATACAGATATGAATTAATGGAGGCATGGCCCGAG -ACAAAATAAATGACACTCGTGCTAAAAACATGAGTAGAAGGACTAGACCCAATTAGGTTA -TACTTCTTTGCAGTGTAATGGCTGTAATTCAAGGGTAGATCCACGCTTTTCCAATCAGCC -CTGTATAGGAGAGACCGCGGGTCATCAAGGTGATTCGTGGAGTTGAACTTTTTGATTAAG -GTTATAGTAACAAAACACGCCTATTGGTAGATTGCATGCCAAGAGGAAGTATAGATACCT -ACGGGTTTAACCAAGTGCTCTCGGCCCTTACATTCTTCTTTTTTATCAAAAACATACGCC -CTCGCATGAGCAGAGAAGGTAGGAGAGAGAAAGGTATATGAGTAAAATGTGCAAGAGGAA -AAGGTCCAGGGTCTGAAAGCGTGAGACGCAAAGAGTAGATGGCCGCTTATGCGGCCTCAT -GGTGTTCTTTCGGCGGTCTTTTGAGCGGTCTTTTCAGACGACAGAAAAAGGCGGGAAAAG -TATACCTTGTGCAAGAGCGATGCCTTATAGAGCGTCGGTAGGCAACCAAAGTTCTCAACA -TGAAAAGTAAAACGCAGCAGGAAGAGGTTACTGCACTTACCTGGTTCGGAAATCATGACA -AACATACTAAATAAAACTTTCCTTTCTTTTCATCAACTCTTCTTCTATTAGTCTTTGCTT -TCGTTCATCAGATTCCTCTCTAATTATCATCTGTTTCACTTAAGATACCAGAGAAGATCC -CTGTCCCTGTCGCTAGACAATTTTTCTTGCCTGATGCCAGAAAACCGCCATCAGGCTTCG -TGTGGTTATCAGGAGATATCAGTCAGCAATTTATCAAGACTCTCTGAGGTTTCGCAAGAC -TCGTTGCAGTCAACCGCATCGGACTTCCTTGATGCTAAGATCGCTGCACCCAAGGACGAG -AAGGAGTACATCAGTTGCAACAAAGAGGGTCTCGATGAGGTTTATTCTTCAGGGCTATTC -AACAGTGATTCTTTCCAGGCAGAGATTGGTCCTTTTCTGAGGAGATTCCGAGCAGCGTCG -CAGACCCTCGATGTCATCAAGCGCCAACGGACATTGATCCAGGAAGACTAAGAAGTGGAG -GTCAAGCGCAGAGGGGCCACTGCAATCTAGACGATAGGCTTGCTGAGAGAGCATACGTGG -ATACGATTATTTCACGTGTAATGGGCGCGGCTGCGAAGATTTGAGAAAGCTCCTTCGACC -AGAAAGAATACAAGAAAGCAGTGAATTGATACTACGGCCTGACAACCGCCTGTTGGGGAA -AGACTTCTTTCTGTCACGTTCTCGGATTTTTCCTGCCCGTCCCTTTAGTCAAGGCAGCTC -ATCTGGTGCCGAAGTCCTTGAGCAAAGGAGAAGTGTCCCATATTTTCGAGTCCAGGTCGG -TGTGCTCTTTGACCCACGCAATGGTAATGATACCCTTTTCGTTTGAGAAGAGCATGATCT -GACGAAATTTTGATAGGACTGTTGTTATCCATACCCATTGAATCCCTGCTCGACCAAGGG -GTGATTGCTGTCATTCCAATTCCTGGCTCGATAACCGAACCAACCACCTGGAGATTCATT -GTGTTGGATGAGTCGAAAAACGAGAATTTTGTTTACAAGAGGGAGTCAGGGGCAGTAATC -AAGGTCAAGGTCAGTTCTATCTCTAGGCCTAGAATGGAAGAGTACAGGCTGACAGTTGCG -CAGCACCTCGATGACCGCATTTTGAGCTTCCTTCCTGAGAACCGCCCTCGAGCTAGATAC -CTCTATTTCCGTTTCCTGATCTTCTACCTCCACGCGAAGCGACTGAATCTCGGTGATACA -ACTACAAAGGTGGAGGCTCAAGGAGCCTGGCCGTCCAGTGGTGCATATTGAAACAAGTCC -ACACTCAAAACCGTCACTCATGTGTCTCTGGATGTGAGCTCCCTGATGAGTTTGTTAAGA -ATCAGACGTTCGAGAGTTCAGCCGATGAGTCTCGCAATCTACAGGCAGGTATGATTCTTG -GGGCGGATATCCGACATACCGATCCTGACAACCGTGGTGCGTTGACCAAATCTATGAAGC -GGCTCTGAAATGTTTGGAGTGTAAATGATCTTCAAGGGCAAGGATATTATTAGCAAGGGT -TTGGTGAATACAAGTGTATTTCATTCATGTGTACAGTGTCGAGATAAATAAAACAAGCGT -GAGAGCCGGTGACATATGAATACAAAGTGATACTTCGTAGCTAATTGGAGGCTTGATCTG -ATCAAACCTCGAGTAATCTGTGAATCAGTGAGCACTCGGTAGGATCCCACTGGCCTATCA -CGTTACTGAGCTCACTCTGAGCCCAGACTGAAATCTACTCAAGTCATGCTCGATCCCCGT -TGGCACTTGATCAATCGAGAGTCTGCTATATTGCGCAGAATCTCGCCGAGATAGGACTGG -ACAGCATCACGCTGAGTAGCCTCGAAGAAGAAATCTCATCAAGCCCTCTTTGATGCAACT -GTTCACTCATCTTCAACTGACTCTGAGCGCCTCATACAAGCGGCGGCAGCTAAGATAAGA -AAGCTCCACCCAGAGGAAATCAAGCACGTAGAGAAACCTAGGTCTGTATAACAAGAATAG -ACAGCCGTTTATGCAGACTTATGGTGTCTTGCTTACAGGGCTTCTCAGTGGTCTTTTTCA -CGGGGTTTTCGGACTTCAGAAAGAGACGAGAAAAGCATACCTTGTGCAGGAGACTCTTCT -ATGTATAACCTCTAACGTAAGGACTTTGAGAGCCAAGAAAACCCCAATTTAAACATTGTT -TAACTCCCCTATAATCTCTTACCTCGGAATGGAATATAGCGGCATTCAATCCGGACGGAG -ACACACCGAATAGAGAACTCGATTGACTCACCGAGGTAGTGATCTTATTAATCACATTTA -AGGAAACTGGGATCTGCCTAGAGAAATAAAAAGAACACAGGGATAAAAAAAAAAAAAGGA -AACATAGACAAATTTCACCTTTTACCTATCACCTTTCACTCCCTCGCCTTCTTCCGGTGT -ATTCTCTTGCCGCCTTGGGTTTGCCCCCCGCCATTCCATCCGGCATAACTTCAACTCTGC -ACTTCTCAACACCCCGCCTTTCCCCTGAACCCTTTCGCTTTCCCCGGAATTCCAAGATCT -TCTCGGGAACACATACTCTTCCTACTACATAATGACAATCGAAATGGTCGGCGGGCAGAG -CGATGATGTCCACCAGGTACCCGCCGACATCACCCCCGCTCGGATGGAGGATTACACCTT -CCCGGAGTTGCGCCTCAAGCGCACAATGAGTGATCCAGAAAAGACTCCTCTCTTGCTGGT -CGCCTGTGGTTCCTTCTCGCCGATTACCTATCTACACCTGCGAATGTTCGAAATGGCTGC -CGATCACGTCAGATTTAGCACCGACTTCGAACTTATTGGGGGATATCTGTCGCCGGTGTC -GGACGCCTATCGCAAGGCTGGTTTGGCCAGTGCGGAGCATCGGTAAGAGCTCCTCCGTCA -ATTCTATCTGGCTTACTTGTTTCATAATGCTAACGAGCATTGACAGGGTTGCGATGTGCC -AACTTGCAGTCGATCAAACCTCTGACTGGTTGATGGTGGACACATGGGAGCCAACTCAAA -AGGCTTATCAGCCAACTGCCGTGGTACTGGACCACTTCGATCATGAAATCAATGTCGTCC -GTGGAGGAGTGGACACCGGAAACGGGACCCGGAAGCCCGTTCGCATTGCTCTGCTAGCCG -GCGCGGATCTGATTCATACCATGTCCACACCAGGTGTATGGAGTGAGAAGGATTTGGATC -ACATTCTAGGCAAATACGGCGTAAGTTCCAAAGCATTTGCTTCATCCCGCGTTTGAGAAA -CAGGCCGCTAACACCTTTCAGTCTTTCATTGTGGAGCGGAGTGGCACAGATATTGATGAA -GCCCTAGCTAGCTTGCAGCCCTGGAAGGACAATATCCATGTCATCCAACAGCTCATTCAG -AATGACGTTAGTAGCACAAAGATTCGACTCTTCTTGCGGCGTGAGATGAGTGTGCGCTAC -CTCATCCCAGTTCCAGTCATTCATTACATTGAGCAACATCATCTATACGAGGATGATGGC -ACAGCAGAGAAGGGTAAAGAGAGGCAGGAAGGCCGATCAGGATGATAAGACCTACTACTG -CCTGTTGAGGCATACTCATAATGACCCCTAGCGACGCGACATCCTGATGCGATACCCCTG -CCCTCTGTCCTTTCCTGTGCGACAGATGATGATTTTTTGTTGGAGTTTGTGGAGTACTTT -TGAGTTCTAAATATGTTTGCATTTGCCCGTTCTCGAAGCTATTGCCGGGAGAATTGCAAT -CATCCCGGGCAACCGCATGTACATACTACTATCTCTGGGCTTGCTTTTTCTTCCCTGTTT -TGCTAAAACCCACTGTACATATGAGGACTCAATATACCTCTTGGGTTAGACATCAATTTC -TCAGAATGTGTTCAAACTAATATTCAAATGATCTCTCATGTTATATGCATCGGCCTCGGA -AATACTCGAATAGGCAGGTAGATCGCCTCCACATTCACCATTATAGCATAGGATGACACC -ATATTATTAGACGGGGGTATGTATAATAGCATGAAACCAATGTTGGAAAAGGAGTAGTGT -TATAAATCAAATTGCCCCTTTAAAAGACCCCGAAGCAGGTAATTATCCGTCTCACATCTG -GGCAAGAAAACACCGATCCGGATCGCCCACTTATCATCTTCCGTTATCGACTGATCTTTG -CTCTCGGCCTCCGCAAATAAACCGGCGCCCAGGCGTGCGCAGAACCCATTTCGCTCATAA -AAACCCCTTCCTTCCAATCAATATGCTATAGACATTGAGAACTATTCGCCATGACAATTC -TCGAACCCCACGGCCTCCCCTCATGGCCTCTCACCCCTCCTAATTCATCCGATCCACCAG -CACCAAAAGACGTTTCGTCGGTGGTAACAGCAATCCATGTGATCTCAACAGAGCGAGCAG -CCCTCGCTCACCTCGAGCACATCTATCAGACCGATGCGCGAGCGCAGCACGATCTCGCCC -GGGCTGTAGATCAGATTGCGCGGAGCGTGCGCGAAGGCGGGAAACTGGTCGTTTGCGGGG -TCGGGAAAAGTGGAAAGGTCGGACGCAAAATAGAAGCTACGATGAATAGCCTGGGTGTAT -ACAGCGCGTTCCTGCATCCGACTGAAGCCTTGCATGGCGATCTGGGATTGGTTCGGCCGG -TAAGCCCACCTGCACCTGCGTCTGCACATGTCTACTTTTGAATCCCGATTATCTGCGCTT -TTGATCAGAGACATTGGAAACTTCTATACTAACCTGGTCTATCCAGAATGATACGATTCT -TCTTATATCCTTCTCGGGTCGCTCGCCTGAACTCTTATCTCTGCTTCCGCATCTGCCCGC -TACTGTCCCGGTGATCGCTCTTACATCCCACACACATCCAGCGACATGTCCGCTTCTTGC -GTTGCATGGTCCTGCGGGGATGGGCATTCTTTTACCAGCCCCGATTCATGAGGATGAAGA -GTGTTCTTTCGGTGTGAGCGCGCCCACCTCGTCTACGACCGTGGCTTTGTCGCTGGGCGA -TGCGCTAGCGATTGCTACGGCCCGGAAGCTGCACACTGCGACAGGCAAGAGCCCTGCAGA -GGTCTTTCGCGGGTTCCATCCTGGTGGTGCTATTGGAGCGGCAGCGGCGGCGGCAGAGAC -GCCGCTGACCACGCCGTCCAGCGGCATGTCAACGAGGACATTTGACTCTCCAACGTCGTC -TGTGTCTCTTCCATGGGAGGATATCACGAATGCGCCGGTGATTCCTCCTTTCAATCTACA -GTCGCCACCAGAACAACGCTTCATTTCCCGAGATATGTTGGTCCCGCTCGATCGAATTCC -CACTGTCTCTGCGTCTTCATCGCAGTCACCTAGCGATGTCCGACTTCTCGATATCCTTCT -CACGGCCATCCAGCATCCCAATGCCAAGTCATGGGTTTTGTTGTCTCCGTCTGAAATAAT -ACCCCCGAGGCGCATTCGTGCTCTCGTTTCTCCTGGTGGAGATATGGACTTGCGTGTGTC -TGAGGTTGTAACCAAGAACCCTGGTACACCATTCATCATACCTCAAAGTAAATGGCTCTT -GGTGCCAGAATCAACGCCTCTCGCTGAAATCCGGCGTACGGTCTCTGAATCTCGAAATGG -ATCAGACCCAGTTTCTGTCATTGCTGTCGTGAAGGATATGGCCTCTCCCGGCAGTATCAT -AGGGGTGATGGAGGCAGAGGATCTATCGGATGGATGACATGGCTGAAAAGTGGCCCAGTC -TTCTTTATCTTTCTTGGATATACTCACTTGATTTTACATATCGTTTGCATTATCCTGCAT -CCGGTTGACGATGTTACGCCCACATAGCTTTGTTCCTCTTTGCATACATATTTCATTTAG -ATGCACATTTACAATCAAAGCCCGGGCCTCACAGCCAAGGCTGATCATACACGATGCAAA -CCAACGCCAATAATATTTCTCATAGTAATACAATCAGTATGCAAAGTCCCAGGTCTATAT -AGTACATAACACCATCTCCTCACACACATAGCAGCCCTTCACAATCAAGCCTTGACAATC -GGCTTGATACCCTTATGGATAGCAGCAATACCACCGGTAAGGTTCTCAAAGCCCTGGCCA -GGAATCATGAACCCAGCCTTCTGGATCATACCCCTAAACTCCTCCTGGCTAGGGAAACGC -TCAATACTCTCAACGAGATACTGATAACTATCACGGTCACCAGCGACGACCTGACCAATA -AGCGGGATAGCACTAAAGTTCCAACGCTTATAAATGCCATCCAGAACAGGGTTATCGACC -TTACTGAACTCCATGCAAGCAAAAACACCACCGGGCTTGAGCACACGATACGCCTCATTC -AGCGCAGCCTGCTTATCCGTGAAGTTCCGGATTCCGAAAACAACAGTGTACAGATCAACC -GAGGCGTCGGGGATATGCGGCATGTGCTCAGCGTTCGCCTCGACAAAGTCCAGTCGCGGC -GTGTTGTAGTACGGCGTGTCGAGCGAGCGCTTGCGGCCCTCGGCCAGCATGTCCGCATTG -ATATCGCTGATCGTCACACGGGTCTCCATGTCGTGGTTGACGTTCGTGGCGTGGTCGAGC -ATGCGGAACGCGATGTCGCCGGTCCCGCCGGCGATGTCCAGAATGTTCCAGCCGCGCTGG -TCAGCGGTCATGGCGGGTAGGCAGGCGCCTGGGTTCAGGGATCGCACGAAGTGGTCTTTC -CACAAGCGATGGATGCCGAGGGACATTAGGTCGTTCATGCGGTCGTAAGAGGCTGCTACG -GAGCTGAAGACGGCGCCGACTGCAGGGGGTGTGGTGGTTAGTTCGAGGTTCGGGTTTGGT -TGGATTAAGAGTTTTAGTCAATTGGATAGGATGATGGTAATCCTTACCTCGCGTCTCCTT -CTCTGACTCGGGGACATTTGTGAAGCCGAAGTGTGTCATGCGGTCCGCTGGCTTCTGCTC -CTGGGCGCGGCTGGTACAGGAGAAACAACGGTAGGGAGTGGAGGGTTGGAAAGCGCGACG -CTTGGCGCTGTAGTGCAAAGCTCGCCAGGCGGAGCGAGTCGACATGGTGGACATTGTGGA -ATTGATGTTGTTTGTAAGTCTGGAGAAGCTTGATCCAGGCAACTTTTCTTTTCCACTTCC -GGTCCCCACCGGTGCGTGCCCTTAAACAACCCGTGCCGACCAATCAGACCGCCTGGTTAT -GAGAGGTACAACATTTATAGTCTATCTCTTCTATACCTGTTCAGATGCAGAAAGAATATT -TGAAATTTACAATTGAGTAAGAGTTGTTTTTTCATTTCTCTCCAACTGTTTCTGCCGACT -GCGATGAGTATCGATAAATGGTTTGTTCCGTTGGACTTCGTAATTTCGAGGTTGGATCTC -CCGAATAGAACCAATAAAATGATACACGCTGGACATATAGAGAAGAAGGAAGAAGGAAGT -AAGTCAGTCGATCTGCTGTTTCAGGGAGTATCTTATCCTAAAGGCTCTGGATATAGCAAA -TATCAAATGATGGGAGTATATTTCCAGGAAGAATAAGATATAGATACAAATATACAGACA -AAGCTCCAAAGTTAGAGACAATCACCACATTTTTTCCCATACTCGCCGAACCCAATGCAT -ATTAGCCAGTTCCTCCTTACGCCGCTCGGCCCAACCACCTTCTTCCCAGATCTTGCACTC -TTCATCAATCTCCGCCTTCCCAGAGTACAAATCTGCCTCTGCAACTTTCACAAAAGAACT -CCGGTGATACACCTTCCAAAAGACAAACATGATAACCCAAAACGCCAGACCAAAATACGA -GGTGATGAAGCCTTGCACGCTGAAAGGCTTGAAGACAGAGAAGCCATTGAAGACAGCGGT -CAGGCAGCCGATAATGATAGCAAGAATGGCCCCGTATGGCTGGAACGGGGCAGCCCAGGG -AACGAATGACTTGCGGTCAATTCCTTGCGCTTTCATGGCCCGGTACCAGGCAATGAAGAC -GCATACCATGCCAGTGTAGGTGAGCACAAAGGCAATGGTAGTCAGATCAACGAACCAGAA -GAAGACCTCGACGGAAGAGGTACTAGCCACGAGGAAAGTGATGCAAGACAAAACAGATAC -CGTAATGACACAGTTGATAGGGATTCCGGATGCAGTGCATTTCAAAAGGAACTTTGGCGC -TTGGCCATCGCGAGCCAGAGAGTAAAGTGTGCGACTGGAGCTGTAAAGGTACGCGTTGCC -ACACGACCAGCCTGATAGCAGAATCAGGAAGTTGATCAGATCGGGCAGGCCTCTGATGCC -GAGATTTTCGATGCCGATGACCCATGGTGAAGCAGCGGAGCCAGCAGCACCGTCGCTAAT -TGCGGAAATGAGACGGGGGTCGCGTGGACTGCAGATAATGCCTACAGCCAGGACTCCGAC -GACATAGAATCCCACAATACGCCAGAAAGTCATCTTCGCAACTCGGGGGATGGTCACTCG -GGGGTTCTGGATCTCTCCTGCGGCTAGGGCTGGGAGATCAGGTCCAGCCACTGAGAAGGC -GGCATAGACCATGACGGAAAAGAAACCCAAAAATCGGCCCGTGCTGCCCTCGGTATAGTA -CTCGAACATGACACCCTCGGTCCAATTTCGGAAGCCATATACATCGTGCTTGGGGTTTCC -GCCAACCATTGTGATAAAAGTGAGGAGGACGAGTCCGATGAGGAGAAGGATCTTGGTTGA -CGCAAGGACGAACTCGCTTTCGCCGTACCATTTCACGGAGATTAGGTTGAGCGCTGTGCA -AACGATCAGTGCCATGGCTACCCAAACGGCCGGGTTGACGCTTGTGTTCCAGTATTGCAT -GATTGTTGAGACCGCAGAGTATTCCGTACACACTAGCATTACACCGCCATAGAAATATAC -CCAGCCCCTAGAAGAGTTAGTGTCTGCAGGGTGAAGCAATGCGATAGAAGGGTATGGAAA -GACTTACATAGCAAACCCGAATGCTGGGTCAATGAACCTGGCAGCAAGCTCGAAGATTGA -GCCACGGATAGGAAGGTATGCAGCCATCTCTCCGACACACAAATTGACAGGCAAAATAAA -GAGACATCCCCAAATAAGATACCCAAGAAACACTGACAGCGGACCCGCATCCCGTAGGTA -GGATCCAATACCGACGAAAAGTCCAGTACCGATTGAACCTCCGATGGCCATTAGTTGCAC -ATGTCGAGATGTCAGACCACGCTTTGTCTGCCGGAAGGGTAATACATCGCCCGAGCTAGT -ATCCTCAGGGATGGACACGTCTATCGAGTCTTTCTTCTCCATGACGAAATATTACTTGAG -CCTGGAATGACTCCAATTCAAGAGGCGGGAGCATGACCCTTTATATGCGCACCAATTTTC -GTCTAAAGTGCGCGCGGAAAGAAACGCTAAGAACATTGATAGATAGACGACATTCATGTT -GATCATGCAACGATACAAGACTACAAAACTGCCAAGAACATCACTCGGCGAAACACGAAA -GCCCATACACCACTGATAAGAGAGCTAATCGGCCAGAGTCAGAGAGTCTATCGGGACCAT -CCCAAAAGGGGGAGGAGAAGGCGATACTAACCTTGGAAGATCTGCCAAGTGATAAGCTTA -TCAGGTATTGACCGGGGGATAGACCATGGTTGACTAGGGGATTGACCATGGGTCATGGCT -CACAAGTGCCAACCTTAGAAGCTAATACTTTTGATAAGAGCCGTTTTAATGCCTCAAGAG -GCGTAGGAAGACCTGCCTCTTATTTGCGGGGTAAAAACTCTTAGTCAAAACAAAGCCACG -ATATCGGAAGTTGGCCGGTGGTCATATCAACCGAGTAAACTTCAGACAAGCTTAAAACTG -TAAAAAGATCCTATCAGCCTCCAATGACTAACCAGAACCACTCTAATTCTCTTGATTTCA -TTCATCTTTGGACCACAGAGGAAATTGTGTGTTTTTTCCACTTCGCTTTGGGAGAGCGGC -CGAAAAGTTGAAGCCAATATGTCTATCCCCGCATTTTGACGGGTAAGCCAGCGGGGCTTG -CCCGCCCGCTAGACGCCAGCTCGGTGCTGTTTCCTCTTTTCTACAACCAAGGTCAAGCCC -CATGACGGGTAGGACCTTCCTTGGTCTTCAAAATGAAAGGGGATCTGTACATACTGGTTC -GATTTCTAGGTTTGTGGTGGGATTGTGATCTTAAGAATGTCAAAAATTCCAAAAGAATGT -CTTTGATTGCGGCACTCCATAAAAGTGGACAATTTGAGTAGAAACAGCCTCCCGGGTCCC -GTCCCCACCACTCCACTCAATATAATTAGATTATGTTGAGCGTCACGCTGGAGCCCATGG -AGGCTTTGCTCCCTGGATAGATTACTTGTTTATAGGGCTTTAGTAGAAATTCTTATGCCG -GGGACTTTTCCTAATACCACCTAGCACTCTTTAATGGTAGGTTTTATATATGATGTGCTA -TTAAAATGACCTTGCTGAGTACCAATGGATATTGGGTGTCTCAAAGGGCCCGGGTCAATT -TTGAGCGAAGAAAAAAAATTGGCCGAGAGTTGAAAATTGGGGCCGCGTGAAATTTGGTGG -TTGTAGAAGGCACCAAAGAATTCAAGAGCCCAGCCCAAGATTGATAATTGAGTGAACTTG -AAACCAAACGTGCGAAAAATATGAAATCCACAAACATAGGTACATATGTAGAACAAAGAA -CATGAAAAGACACCACAATACACAAGTTCACCTCGATAACCGCCAGAGATATATAAACAG -ATGATTAAATACAATTACTTTACTCTATGGAGTAATGAAACACGCGGCTTCCAGCTCCAC -AAATCCACCCATCCACCACCAAGTGAATTATTCAAACGCCATAGGAAGGAAGCCCTCCGC -ATGAGCCCCATCCAATGGCCACGGCCAATCCGACAGCAACTCCGGCGGCAGCTGGAACTG -CATCATCTGCTCGCTTATTGCCGTCGATGGACCAACTGCTGGAACTGGGCCACTGCGAGG -GATGTACGTGGTAGGTGGAATTGGCACTTCGCGTTTAGCATCGAGGGAAATTGTGCTATG -ATGAGGGCGCAGGTGGGGTGCTGTGAGATTGAGTAACCGGGATGTGCGCATGTAGCCATC -GATTCTGTCTACCTGCGATCGAAGTCGTTGGCTGGCGCCGGCGGCGCGCTGGAGACCTGT -CTCGAGGCGGCTGAGGAGGTCGGCCATTTCGAGTCTTGAGGAAGGTTATTAGTTTGGGTG -GTTGGCTGGGGATGGGCCAGAAACTTACAGTAACGTGCGGACGCGTTGGTAGGATGCTTC -TGATAAACTTCCTTCCATTACAGAGCCGAGGATGTAGCCAATTCCGCCAAGGTGATGAAG -CTGCGAGGAGTTAGTAAAATAGCGCTTAGAAAAGCAGTCGTGAAGGACATACCAAAGGGG -AGCTGATGGCGTTCAAATACTCCACAGGCACATTGCTGAAGACACTGAGAAGTTCACCAG -CGACGTCACATTTGCGGTCTACGCCAAGTTCTTCCGCCGATAAGAGCACCATCCGAAGGA -GCTGGAGTGTTGCCTGGATGTTGGCAGACTGGAACCCGAACAGATCCTTGGCCATGTCGC -CTGTAGTAGGCGGGGTTTCTTTGAATTGTGACGGCAGCGCGGCATACATAGAAAGTACCC -TCTCCATGACCGCTGGTTCCGACATGGATGCTGGGGTAAAGAGAGACCACACCTGTGAGG -TTCCGTTAGCTGATGAGAAACGACGCCGATTGCCATCAACGACGTGTTCTAGAATTCGAT -ACAGATCGGTGGTAAAGTTCCAACCACGAAGCCATGCTACAGGCTGCCGTGATACTACGG -GCACATCGTCTGGATTGAGGACGCCCGAGTGTGGAGATACAGGCGACAACCCGTAGCCAT -GTAGTGTGATAAACTCATCGTCAATTTCACTCGGATATCGCACTAAAGAATGCGCTTCGC -GATATCTAATCACGCCTCCCCATACAATGGTGGAATAAATATCCAACGTATAGATAGACC -AGAACTTCAGACCTATGAATTAGTAGAAAGCACAACCATAAGATGAAGGGCTGCGTACTA -GTCTTCGCCGCTCTTCAGTTTCGATAGGACTCAAATTCTTTGGCCAAAGTTTTTCATCAT -GAAGGCCCTCCATACTGGTAAGAGTATGATACATGCCGGAGTACTTTTGCATATTCTTGA -TCTGGCTATTCTGAATGCTGGCAATCGCGAGAATAGCACACGCACGCATATAGTTAATTC -CCCTTGCCACTGCAAGGTCTCGGGGAATAGAATCCTTGGCGGCGGCATAAAATGCCTCGG -ATGGTGGTTCGGCAAGTTCATCGCGATGCCACCTATTAGTAAATAGTGCGCCGTCTCGCG -CACGCCCAGAAACCAAGGCGCAAACTGCCATGGTACAGGCAAACAACCCAGGATTTCTGA -GATGCTCTTGGTTATGAACTCTTTCAATAAAGGATTGCTTGTGGAAAAGCGGAAAGCTGC -AAATTTATCAGCCATCGAAAACGAACAGGGCAAAAATGATCGAGTCTTACATTGGATATA -CAATCTCGAAGTATACTTGCACCAGGTTACGTATTTGACGATCACAAGCAACGGCAAATG -CATTCCATGAGTTGCGCAATGCGCCATCATCATCGTACCCTTCTGTTGCCACGCCGCCAG -CATTGAAGGTAGACCATAGATCTCCAGTCAATGATGGACGGTGGGCTGACTCGGGGTAAT -ATGAGCTCCTAGACGCAGAGGAGTTTGACATCCTCCCTGTAGCCTGAGAGGAAGGTGGTA -CGGGGAGGACGTTTTCTGGGACAGGCGTTTTTGCCAATAATGGCGTCTCTCGCATAGATG -TCCTGGTGCCAGCCTTGACGCCCCGGCGCTTTGCGGGGCGATCAAAGGTGCACGGGACGT -CAAAATCCGCACAATTCTGACATCTGCCTAGCGGGTCTTCACTTCTGCCGCACTTTATGC -TGCGTCTATTGCAGAAGTCGCCTGTGGATGTAACATGAGCTGGGAACCTGGTGCAATCCG -GGGGACAATTGCCAAGCGCAAGTGGAGCATAAAATTGAACGTGGGGATCTTACAGGCTTT -GGAGACGCGTTGGAGCTTTCGGGGTTTTTGCGACATTCAATTCATTAGTTCATTCCTTGG -ACACTGTGGTCTCAGGCGCAAAGAAGCGGGAGCCAACTCGTGGGCCCCGAGCTTCGATCG -GAAAACGGGGTTCGGGGTCTGTACAGAGACACGGGTCGCCGTTTGGGAGATCCTGGGGTA -TAACTGTGCAGAATAGAAAAGGCACGCAGAAAACTGACGATCAAACCCAAACAATCTTCT -CCGACATCGATGCGCAATTGAATGAACATGAGACCGAGGATAAAAGTCGGCATTAAAGTT -GCAAGCCACAGAAGCTGAGTCAGCGACGTGTCATTTCACGTTGACGCCGGGATCGAATGT -GGTGGAGGTTTCGACGATCATTGGAAAAATAAAGAGTAGATATTACATATAATGCTGTAG -ATTCTAATTCGCATATGATGGATAAACATGTTTATCCTGGTCATGTATAGATCATTGACC -ACCTACTCTGATATACACGGCTAGACCAATTACAGGGCACTATCAGAGCCAGGCGAGAGG -AAAGTGAATAAGAAGACTGATGTGAATCTGATCTTGATTATATACATTAAGTGGCATAGT -AACAGCTTTGACTACATGGTAGTATAAATGTTGCGTCGAGGTGTAACACGATCAACGAGC -TCTATATGGTATACAAATGGCGGTGCTTTAGTCAGTTGCAAGTTAAGCTGGGGACATTGT -CACCTCAGTTCTCAGCTCGGCCTTGCCGCTGCCGAGAGCTTCGGTCCTATTGACTCCGTG -TTCGGCGAGGACATCTCCGTGGAGCTTATAGGCCTGTTCCATCTTCCAATAGAATGGACT -GGAGGTCGAGAAAAGAGCGTCAATCGATTCCAGAGACCGATTGCGAGTTTCGGGATAGAC -TGTAAAGATTGTGATGTTAGATCTCGGAATAGGATGCGAATTAACAAATGTTCAAGAACA -TACACAGGTAGACGATCGGGATCCAAATGAGGTTAAGGCCAGCAAACAAAAAGTATGTAC -GGCTCTAAAAGGTTGTCAATATCTGCATTGAAGTTGAAAAGTCGGGAAACAACTTACGGC -CATGACATCAAACATGATAGGGTTGACTAGAGTGGTCATACCCACTCCAATGGCCCACCC -TGTGATCCCGAATCCATTTCCTTGAGCTCGCAGATCACTAGGGAAGATCTCAGTGGGGAC -CAAGAAAGCAACCGTGCCCCACGTGGCAGCATAACTAGTTGTCGTCACTTTAGTATAAAT -GTATCAAGTGTTGCAATTTCATGTTGACGTACCCCAAGTTAAACAAAAAGAGCATGGTCA -CGGCACCAGGCGCATATTGAGACGCCTTTTCGGGATGATGCAGCGAGCCTTCATAAACAG -CCCCAGCCTATAAAATTGTCAGGAGAAAGCGCTTCGACCCGAAAGTGAAAAACCCACAAT -AAGGTTGACTGCAAACAAAACAGCGGCTCCGGCCATCAAGCAAACCCGTCGGCCCAAGCG -GTCAATAATATGTGCGCTGATAATAGTACCTATAATTCCGATTGTGTTAATTCCCCCAGC -AAGACCATTTTGCTTTATATCACCATAACCTGCCTGAGCAAGCAGTGTGGGGGAATAGGC -GGTTACAGCCTACAATTGCAATTAGCACCGATGATTGTTCGAAGACCTGGTTAAAGCGGT -CTAACTCACCGTGATACCCGTCCAGGATGCCATGATTTGAAGCCAAACACACAACCAAGC -TCGTCGGCCGAGATTCAGCCCCGGCTTTCCGCCTTTTCCAAATAATATCTTCACAAACTC -AATGGGGGAGCTGCGCTTGCTTCCCCGAGCAATGGTTATAATTTCAAGATATTCTTGGTC -AATATCATCCTGGGAGGCCCGATCTTTGCGAATGCGGTTCAAAAGATCTCGCGCCTCTTC -ATTCCGCCCCACAGAGGCCAGATAGCGGGGGCTATCTGGCAACAGTTTGATGAAGACAGC -TAGAAGAAGTGCTGGGAAACACTGGAATGCGAGAAGGAAGCGCCATCGTACGTCCGAGTA -ACCGTTATCGACGAAAGCGAGGCCGAAGGAGATCCAATATGCGACGGAGATTCCTAGGTC -TTGAAAATAAACAATGTCAGAAATGATCCCTCGTAGCTAGTGGATCTCACGAATTGACAA -CTCACAATTGGCAATGAAAACATATCCCAAGAAGCCACCTCGACGGTTCGCGGAAGAAGT -TTCCGACACTAACACCGGGGTAATACCTGTCAGGGCTATATCGAGATAACAAATCAGAAT -CAATAGCTCTCATTGCAGACCCCAAAACAAGGCAAGATTAGCGTGGATCCTCACCTCCAG -TCCCAACCCCTGTAACCACCCTGGCACATAGCATGAAATTCGAACTCTGCGCGGCCGCTT -GCAGGGCCCCCCCGACCAATGCAAATAATGATCCAGCGAGTAACCCGTTGATCCGACCAA -CTCGATCTGCAAGCCACCCGCCGGCAAAACAACCGAATATCGCACCCAGGTAATAGATAC -TGACAATGCCACCCTGGTGTGTAGTGTTGGTCACTGTCCCGTCAGGTTTACCAATGCCAA -CCTCTGTGACATATCTCGGTGAGCTGTTTACGCCACCCATCACACCCTGGTCATATCCCT -CGAAGAAGATTGAGATAAGCGAAAATATGTGGATGGCAGCATTCAGCTGACGCGGTGAGA -GTCGCTTCCATAGTGTGGTATAACCCATGATGGATATGCTGATCCTTAGATCGCGGAAGG -CAGCGTTGGAGGAGCTTTATTATGGGACCCTTCCCGGCCTTTGTTGGAAGATTCTGAAGA -GCTCGGTAAGACTGGGGTGGATAGATACATGCGGGGGGGCCCGAGTCCCACACAACAGCT -TAACATTTAGGCAATACGATATAACCGTTTTGCCGGTCATCCCTACTAATTAGATCTATC -AGGGAATGTCGCAGATTTCCCGTCTCCACCAGATACACATATGTATTATGGAGGTGGGTT -GAGCCGAGTTTGTTAAATAGGGTCTTGATCCCGAGGAGTTGGGTACTTTTTTTCTTTCTT -TTCCTTGGAGTCATCTTATAGATACTGCATTTTACAAATTGGAATTTGTAATCAGACATG -GCGTCCGACTCGCTTAATGGCACTAGCCATAGTCTAGACATCACCGTTTTGGGGCTGAAT -AGCGGCACATCTATGGTAGGCGACTGTTCGTTTATTTGAGAAACGCCAGACTAACCAGGT -ATAGGATGGCATTGACTGTGCCCTTTGTCGATTTCGCCAGGAAACCCCTGAATCACCTAT -GCACTTCGAACTACTCAAGGCAAGTACCCGCGAGAATGCCCCACAAAGTCTGAACATACT -GACCAATGATACAGTATGGCGAGATTCCACTCGAGCCGGTGATAAAGAAGCGGGTGATGA -ACATGATTCTTCTTAACAAGACCTCCCCTTCCGAGTTGTCTGAAGTCAACGTGATCTTAG -GCGAGACCTTCGCCGCGGCCGTGCACCAGTTCTGCAAGGATCACAATGTGGATATCAAGT -CAATCGATGTCCTCGGATCCCACGGTCAAACCATCTGGCTACTGTCCATGCCAGAGGCGG -GCGAGACCCGCAGCGCTCTGACCATGGCCGAGGGCACATTCCTCGCCTCGCGCACGGGCA -TTACTTCCGTCACCGACTTCCGGGTAAGTGATCAGGCCGCTGGACGTCAGGGAGCACCCC -TCATTGCGTTTTTCGACGCGCTAGTGCTGCACCACCCGACCAAACTACGTGCATGCCAAA -ATATCGGCGGTATCGCGAACGTCTGCTTTGTCCTTCCGGACAGCCACGGCGGCGTTGATG -CCTGCTACGATTTCGACACTGGGCCAGGAAATGTGTTCATCGACGCTGTCGTGCGTCACT -ATACTAATGGCACGCGCGAGTACGACCAGGATGGCGAGATGGGAGCCTGCGGGACCGTCG -ACCAGGCACTTGTCGATGACTTCCTGACACATCCCTACTTCGCATTGGAGCCACCCAAGA -CAACGGGTCGTGAGGTTTTCCGTGACACCCTCGCTCATGAGTTCATCGTCAAGGCCGAGG -CCAAAGGCCTTAAGCCCGATGATGTCGTCGCAAGCATCACCCGTGTGACCGCCCAGGCTA -TCGTGGATCATTACCGTCGCTACGCGCCCAAGGACCTTGAGATCGCAGAGATATTCATGT -GCGGCGGCGGTGCCTACAACCCTAACATCGCCGCCTTTATCCAGGAAAATTACCCCAACA -CCCGCATCATGATGCTCGACGAGGCTGGTATCCCTGGTGGCGCTAAGGAGGCCATTACTT -TTGCTTGGCAGGGCATGGAGGCCATTGTTGGTCGCTCTATCCCCGTCCCCACTCGTGTTG -AGACTCGCCAAGAATATGTACTCGGTAAGGTGGCTCCGGGCAAGAACTACCGCAACGTCA -TGCGACATGGTATGATGTTTGGCGCGGATCGGGATCATCTGCCCCCAGTGAAAGAGCTGG -TGAATTACATTGACGGCAAGGCTTTCGACAATAAGTGGTGATGCCAGATTCCCAGACTTT -TTCCTATCAGGTCCTTTGCATAATGGTTTTGGGATGCATGTGCATACTATTTGGTGTTTC -TTTTTTGGTGCAATGCTACAATGATGAGATTATATATATAGACTGCCGAATGCAATTTTG -TCTCCTGTTACTTTTCGCCAATAGATCGGTGAATATCTCACAAACGAAAATTGATTTCAA -TTTACTTCATATTGCGTTCTCTCCGCATTTAGCCAGGCGATCCACCACACGAGGATCTCT -AGCTCGATAACCCAGATTCCATCTGGCTCGACGTTAAATTGGAAATAATAAATACCCATT -CAACCGTTTTGCCTTTGAACACCAATGGAGCATGTGGAGAAAGTTCCGGAGGCTATGTAC -ATCGGTAGAGGTTCGGGCCGAGCTTCAACCGGGTCCATATGATCATCCGTAGCTGCAGTG -GGCCTTTGAATTCCTTGTTTCGCTCCGGAGGACTGTGGGGTTGAATTCCACTGTGTGATC -TAGACTGCCGATAAGGCGTTCCCCGGGTTTAGGCGCTGGCCTTATGAGGGCGATCTTTTC -GTCCCTCTCGGAGATTTCGGAGGATTTTAGTCTCGATGAGTAGGTATGTAGCTCCGGATT -TTCTGTACATCTCTCTCTCTCTCTGTGTGTGTGTGCATGTGTCGATGGAAGAAAATGATG -ATTACAGGTAGATGAGAGAATGATATTCTTTGACAATAAGTGCATCTACGTACAGGTATA -TATTAACCCCGACGATGCAGCCCTCCTATCGAACAAAGACAAAGACTACCAACGCATTCT -TTCATTGGCATCGCATAAGCTCATAGTACACTGGCCATGTTTCTGGTACCTCCATATAGA -TATCGAGTTGGACTGGTATTTGTAGTTCCAAGCTTGTGCTTTCAAAGGTTCCGCTAATTG -AACGAAGGGTCTATTACAATAAGAAATTGCTAACCATGGAGAGCAAACATCTTGGTGACT -CGACAGTCGATAATATGCCCACCTGGGCCGGGGAATCTTAAAACCGAACTCTCGTGGAAA -CATCACAAGGGTCGAAGATCGCCGGCCCAGATCTATCATTGCAATATTATCTATGGAGTT -CACTGAACAGGTGCAATACGATCGTACGGTTATTTCCTTACAGCGAATTATTTCCCTCCT -AGCTATGACACTAGGAAAAGGCTAGATATCATGCATCTATCAATTGCGCTCAATCGATCA -CATCGTCACAGTGTTCCGTTCAGGGCAGCGGCCTCATCTCCTATTGACGCTGCTCAATCT -CTACACAATTACATGAATAATTCGGTGATCTCTCGATGTGTGGCTCATACTAGACGTTCT -CCAGGCGTGTCTATGCATCTATATTAATGTTCTTCATTACCCCGCAGTCTAAGTAGTGTA -ACGTTACAGTCCAGAGAAGCTTTCGGCTTCTGAATGGCTTGAAGATTTCGCCGAGATATC -GAGAATTCTTCCGTGGCTGACGATCTACAATGTCGTACTCAAGTTGATTGCCCACCCAAT -GGGGCCGATATCGGATACTATACAATCTCCACCTGACGATAATCTATTTGATATGGCAGC -CCATGAGAGGTTGGCGAACTGCCAAGACCAGATGCCAAAACAACCCCTGGCGGTTCGAAG -TGATCGTCGGGTCTCTGCTCTGTGATCACAAAGCATTACTACCTGAGAGCACTACCATCG -TGGAGCAGTACTACCTGGACACACGAGATCCCCTGTAGATCTTATCTTTTGACCTACCCG -GAAATACCCTGTTCGGGGGTCTCCAGGTTCTCAATTATCCATCGGATACATCACTACGAC -CTAAGAAATTAATCAAACCAAATCAAACCCCGATAGCACCCCCTTGCTAGGGGTACTGCA -ATCAATTATCGCATGATGTAACCGATGGCACCCGTCACTTCCCATCATGGAAAGTCCCTC -GTGAACCCCACCTTCTCTATATAAACCACGTCCGCCAACATGTAAGGTCTTCACACAATG -TTCAACCTACACCGCAGCACCTGCCTTCGGGCGTTCCTTGTTGCCCTCGCGCTTGGAGCC -AATGCTGCACCCATTGCTAATGAAATTGTTGGCCGTGACGATGCAGGCCCGGCGGTGTTC -CCGATTCCTCAACAGATCGCGTTCACCGGGGGGAAGGTCTCCCTTGCGGGTGATGTTACC -GTCGTGACCGACAACTTTACGGACGCTGCCACGATCGACACGATCAAGAAGGTCGTGGCT -GCCGCCGGTGGTAAAGTGGTTGTCGCATCCAAGCCCAGCGGCAAGGGTACCCAGATTTTC -GTTGGCACCGAAAAGGACTCTAGCCTCGCTGTCGCTGCAGCCAAGGCTCTTGCGGACAAG -TCTGCGGATGGCCTCGATGCCGACGGCTACGCTTTGGCATGTGGCCATTATGAGACACTG -CCAACCATTGTCCTCAATGGTGTAGATACCCGCGGCACTTTCTACGCCTCCCAAACTCTC -CGTCAGCTTCTCGACGGTGCTGACATTCCCGGTGTCAAGATGCACGACTGGCCGCTTATG -TCTATTCGTGGCTCGATTGAGGGGTTCTATGGTGTCCCGTGGTCCCACGAGGCACGCCTC -GAGCAGCTTGTTTACTACGGAAAGCACAAGATGAACACCTATGTGTACACGCCTAAGGAT -GACCCACTTCTCCGTGCTAAATGGCGCACGCTTTACAGTGGTGATGAGCTGACCCAGCTC -AAGGAGCTTGTCACCACCGCCAATGCTAATCATGTCGACTTCACCTATGCTCTCTCACCT -GGCTTGAGCGTATGCTACTCGTCCGACGAGGACTTCGACGCCACTGTCGCTAAGTTTGAC -CAGCTACGTGACTTTGGTGTCAGCAGCTTCTATATTGCTCTCGATGATATCGGCCTCGAG -TTCCATTGCGATGCAGATAAGGCAAAGTGGCCCAAGACGGAGAATGACGAATGGATCGCG -GATGCACAGGCTTTCTATCTGAACCGCGTTCAGACTGAGTATATCGAACCTAACAATCTC -AAAGACCTTGACACCGTCCCGACCAACTACGCCGGCAGTGCATCAACCCCGTACAAGACA -GAATTCGGTACTAAACTTAACAAGAAGATTCGCGTTCAGTGGACCGGTGAAGGAGTCTTT -AGCAATGAGATCACAGTTGACAGCGTCGTCAAAGCCGACGAGAGCTACGTCACCGACAAT -CTCTTCCTATGGGATAATTTCCCAGTCAACGATGGCAATCGTGACCGTCTGTTCTTCAAC -CCGCTCACCAAGCGTGCCGCCGAACTATACAAGCACCTGCTTGGATTCACCTCGAACCCG -ATGAACCAAGCTTTCGCCTCTATGGCCTCGCTCGCAAACTACGCCGACTATACATGGAAC -GGGCCCAAGTACGACGCCACCAAGTCTATGGATGCAAGCCTCTGGGAGCTGTCCGGTAGT -GACACCACCGTCCACAACGCAGTCATCGCTTTCACCGATCTCAACCAAAACTGGCCATAC -CAAAACCCCGTTACCAATGCGCCCCAGCTTAATAAGGACATTGATGCTTTCTGGGCTGCT -CGCAAAGCCGGCACTAGCGACGGCACTAAGGCGTTGAAGGATCGCCTCGCGCTGATTATC -ACCATTCCCGACGTTCTCCCCAAAATGGCTACCAAGGGCTTCGCTACTGACGTGGCACCC -TGGTCAACTCTTGCTAAGCAGTGGGCGACTGCCTGCCAGCACTTGATATCTATGCTCGAG -GCGCTCGATGCCAAGGATCAAAGTAAGGCAGATGCCGAGTTCAATTCAGCCAAGGAGTGG -GTTGAGAAGACGAAGGCTAAGACTGTTGATGACCGTAATGGAGACGGCGAGGATCTTCCT -AAGTCGATCGTCCCGACAACAGGTGATGAGGCGTTTGACAAGTTCCTCACGGATGCGACG -GCTATCTACAATGGTAAATAGATTCTTGATAGTACAAGTCGAACAGACTATAATTCACGC -GTTGTACATGAAATGCAAATAATGGTATTAAGAACAAATCACTGTTATAGAACAAGAGTA -GTAGGATAAAACCAAGCTGATAGAATAAACTACATGCAACGTGTTCACCCATCAAGCTTC -GGTATTCTGATCCCACAGACTACGCACCGTAACACCATCCACCCACTTCCCCCCAGTCTC -ATCCTTCTGTACAACGAACCGAAGAGTCCGCTTCTCACCAGGAATTAAAGAGAAGGCGTT -ATCCTCAAAGTATCCCACCACACCAGCGGGATAGTCAAGCCATGTATACAACGAAACAGC -ATTCCGAGCCTCGACGGTAAACGTGCCCGCCCGAACATCATGAGACAACTTAAGTTCCGG -GGATTTCAGTGCCAAATCCTTAGGAAAGACAGGCGTGAACTGGTTCCGGTGTGTGAACGT -AGTCAAAGTCTTCTTTTCGGCGTTTGGTAGACGGCCCTCAGCAGTGAGTTCCATGACAAG -GACCGCATCTTTCGTATCAGGCACAGATAGGCTTGCTAGATTAGTCTCATAGATACTGGT -GGTATTGAGTGCTCCAACAGAGAACTCCTTCACCAAAGGTGTACCCGCATTCCCCGGTAT -CGACTTGCCAGACAGGTCTATCCAAGTAAGCTTCACAGATCCCCGAGCTGTCTCCCAGAG -ATCCGATGTCACATAGATAGCCAAATGTCCAGTGGTATAGTTCCAGAACGGCGATACAAT -GACAGGTTTGAAGATATCGCGTGCTGCGTAGTGTAAAACCTTCCAGCGACCGTCATACTC -GATTCCAGCCCACGTCGGCGCCTGCCAGATATCTTCGAGTTGCCAGTACAGAGCTCCGAG -CTGACGTTCTGGCATTCCGCTTCCGCGACGATAGAACTGGATTTCGGATTTGTATATGTC -GGCTTGGAAAAGCTGGGTTGCGTGACACCAGGCGCTGAAGTTGGCGATTGAGTCTTGTTT -ATTGGGAATCGGGTAGTATCTTTCTACTGCTAGGGTCATTTCGGCCATGCCCAGAGATGT -ATTTTTGTAATTTTGGGTGTCCGGACTGCCTGCTGGGTAATGGTGATTTCGGAGCATGAT -GGTGCTACTGTTGAAGTGGAGGTCTTCGTCGTCCAATGCTTGTTGCCATGTTTGTAGACT -CGGCATGCTGTGGTAGCCGAATTCGTTGGCGAAGCGACCGACTGGGTAACCGGTGAAGTC -AAATGCTACACTGCTGTCGTAGTTGTAGTAGTCTGTATCACCGTAGTAATATCCTTTTTC -CTCTGTGCCATTATCGTAACGCTCGGTCATGGGAACTGGAGCGGACAGATCAACATCTAG -GAATCCGTTGTTGGTGCTACTGGGCATGTATGAAATCGATCGCGAATTCTCGTACACCAA -CGGGAGAATCAGAGAGATAAAGAGGTGTTCATAATCGCCAACCAAGCGGGGATATGATTC -AGGATCGTGTTTTTTAGCGGCTGGTAATTCCAGACTTTCAATCTCGTTTCCACCAGCCCA -TAAAGCCAAGGATGGATGGTGGTTGACTCGTCTTACATTATAGACAACCTCGGCGGCAAC -ATTATCAAGGAACGGTTTGTCTGTGGGATACAATGCATCGCTGAATTGGAACTCGCTCCA -CAGCAGTACTCCGCGTTCGTCGGCGAGATCGTACATGAAATCAGGAAGGTATGCGCTCGA -AGCCCAGACTCGAAGCATATTCTGGTTTCCAGCAACAACTGCATCAAAGAGTCGCTGCAT -CTTGGCCTCAGTGACTCGCGGCCAGAAAACATCTGGTGGAATGAGGTTCGAGCCCTTGGC -GTAAAATTCATGACCGTTTACCTCGAAATGCCAATTTGCGCCGGGTGCGATACCCTGAGC -TAACTGCCCATCGGTGATATTGGATCGATTGAGGAAAATAGTGCGGAAGCCAGTGCGTCT -AGTTACGCTGGCGAGATCATCATCTGCGCTCTGGATAGTAACTGTTACGTTGTATAGGCT -TTGCTTGCCCATTCCTGTGGGCCACCAGAGGTTCGGTGCGTCTGCATCTACTATGGTGGT -ACCGGTTATGGTTTGTCCAGATACAACCACATTGTCGAGTGATGCGGACTTCAGTATTTT -GCCAGACTCGAGGTCCTTGATTTCGATAGACATGGATGGATGTCTAGGTAGAGAGCCAAG -ATAGTCGATACTTGCATTCACAACCCAGGGCTGACTCTGATCGGGGGCAAGATGATTGAT -CTGGCCTTTTCGATAAATGTCGAAGCCTGTGTTTAGAACATATACATCATGATCTTCTTC -GGCCTCAGATTGCACCAGATAGATGTTCTGCCATGGGCCAGCAAGCGAGAACGCTGGGCC -CCAGTCCCAGCCAAAGTCGGACTGTTCTTTGCGAATATACCAGCGGTTAGGGAATTCAAA -GGTTCCTTGAACACCGCTAGGCCATTCTGGAGATATCGTTAGATGTAGTGTTTTATTTCT -TTGTTGAACACTTACTTTGAGAATTTGGATCCGCGGCAATTGCATTTGCGATCTTGGTGG -CACTTCCAAACTCGAGAGAGACACTTGAAGAGCCTTTGCAATGTTTCAGCGCATTGGAAA -TGTCAAATGTATACTGTCGGAACTGGTTATCTGTAGTTCCTATGAATTGACCGCAGAACT -CAATTGTTGTGAAAGTGTCAAGCCCATTAAACACAAGCCATGAGGACTTGGAGGTGGTTG -ACCTGTGAAGAGTCAGTGGGAATTCTAGTTAGCATAGCGAGGAACACAAACAAGCCATCG -ATTGGTTTACTGGTATAAGTCCAATTATGCTCCGCGATCCAACGCAAGTTGAAGTCATTG -AGACCATGGTAACTTCGACGTGATTTGAATTAGTAGAGAGACTATAGGCGAAACGAGCGT -TGTACTGACGGATCATCTGTTTTTGTTAGATCAGTCAACGCTTTGGACTTGGAAAGACTC -ACCGATCACATTCGCATCAAGAAGATCCAAATGCACCTGCGAGGGAACATTGCCAGGAAC -ACTGATGTTAAGCGCCTTGCTACTGACAGTCCATCCATCACCACTCAGGTCCAAAACGTT -CTGCCCAAGAGTCTTGGGCAGTACCGCTGGAAGCACCGCCAGCACAAGCTGGTTCCAAAG -ATACATAGGGTCAGGAGCGAAAGATGCCGAGATCTGTAAAATCGACGGGGTTGGTGTTAT -ATATAGGTCTAATTTATTCTTCCATTGGGCACACACAAAAGTACCTAAGGATGCTCTTGA -TGCGACACAGCCTGCATTGTAGTCTGAGCTTTTGGGGTGGGGGAATCTGAACACCCCCTC -GCATGAGAAAAGAGAGCAACGGTAGCAGCGCTAATGTATTGCCATCAGGGATCAAGTCCT -GAATTCCGAGTCTGGAATCAGGAAATTATGAAGAATATTGGAGTTATGTGGAAAGTGAGA -CGGGAGGCGTAAGGCTTAGTTTATTAGCGCGACATGGCGGATATGATGACTTCGTTCAGC -CTTAGATTCCCTGTGATTGAAATGATACATATTACTATGCTTAGTATCAATAAGTAATGG -AATGAACCCCCTGGATTCCAAGCAGCAACAAAGACATAACATTCCCTCTTGGATTTGGTC -CATTTGAAACCCGGCTAATGCAAATGATAGATACTATATACAGTGTTGTCTATATCTGTA -CAGATGTTTGCGGTTAATTCGCTAAGTCAGGTTACGCCGCATCGTCGCTAAATCACTCTG -TGAGAACCCAAGTTTATCAATGAGATACCCCTCAATGCCGTTTTCATAGGTATTCTCAAA -AGATATCAGGAAAGCCTCCATGGACGTGGCTCGAACACCACAGAGTTCGAGCATACCCAG -TGCCTCGGGGCTCAATTGGTCGATGTCGATTCCATCAGCTCCTAAGTTGACCGCAAATGC -CTCCATGAGGTTTTCCCGGGCGCTTTCCAGTCCAACGCGAGTGAGGATGTAATCGTTGAT -GATATCTTCATGGGGCTGATTGACCAGGAGGAGGATGAGTGCGGCCAAGACACCCGTGCG -ATCTTTACCAGCTGTGGGAATGTATGGTCAGTATTCATCCACTATGAATGGGGGAAAATT -GGACGCGACCATGCAAAGACCAACCGAGGTAACACATGTGAAACATACCTGAACAATGAG -CGACGAACGGATCGTCTGGTTTATCTCTGATATGCTGAAATATCATGGTGAAACACGGGG -AAGCCGCTTCGAGGATGCCGTTGTACATCTTTACGAACCCGGAGGCGCCATTGTCTTCAC -CTGCAAAATCGCGCAGATTCAAAGTGGCTGGTCGCGCTCCGTATGGACACCATATGGTCT -CGACTCCCGTGATATCAGGTGAAGGAGCTTTTTGTCGCTCGTTCTCGTTCCGGAGATCGA -AGATCGTGGTGATTCCTAATTCGGTTGCGATGATGGATTTCCCGGCTCCGAAGATTTCAG -ACAAGCTTCCCGACCGATACACGAAGCCTTTGCGTAACTTGTTGCCGTCATGGCTTAAGT -CGCGAAAGTTGGATACACCTTGTATAGGGATGAATGGCGGCAGGGATGTGATCTTGGCTA -CGAGGGCCGGGGGGATAGGAGTTCTGATATCGGTATGTAATACCTCACTGATTTGATTGT -TTTCGAACATGATTAGAGAATTGACAAAGAAAACAAAATGCAAATGAACCAAGACATTGG -TTTTTTCCAACTATGTCATCACCAAAAAGAGTGGGGAGGGCAATGTCATCATATCCCGAT -GACGATATCCAGTGCCAGGAACACTTTCAACACTTTCACGTTGAGCTATACGTCTTGGAC -GTGAGATGTTGAGACCATTTTTAGCAAACGGAATAATAAATAGCATTTATACTGCCCTAC -ATGTAACTTTTTCTTCCAACTCCATCTTCCATCAAAGGTTGTCCAGTCATGGAACCTCTG -ACACCCGCAGGCGTCAACATAATCACCCACTTGGCCCCGGATAAGCGCTATCGCATTAGC -GTAGCGGTCTTCCGATGCCGAAGGATTGGGGGATACGCCGTCCTACTTCTGCGAAAAGCT -GACCAAGGTCCCACACATGACTGGTGGGATCTTCCATCAGGTCCGGCTCTAGAAACTGAT -GTGGCCATGATCGATGCTGTGGGGCGTATAGTATTCGAAAAGTCTGGTCTTGGACTCAGG -TCATACCATAAACTCCAGGAGGTGGAGTCAACGGTGGCTGGATCTGGATCCGAGACCATC -ACGAACCTCAACCTTTTGGTCATCGATACATCGTCCGATGAAGTCGTGATCGGCGATGAG -TTCTCTCGGTATGACTGGGTGGAAGAAAAACGGTATCACTCTCTCGATATCCCTGATGCA -ATGAAGGAGGTGGTACGCCAGGGATTTGCATTTTACCGGTCAACCCTTTGAGGTGTAATA -TTTGAGATCTCATTGAGATAAAGAACTTCAACATTCAATTCAATGATGGTCAGTAGTGTC -CAGTAGTATCGCTCTCAGTAGACAAGTTCTCTTCCCCGCATTGGATCGGTTGGCCGCGGA -GTAACGTGAGGGTTGAAATGCCGATGCCAACCGCTCTCCGCAATCCCCAGACATTCAAAG -ACTCCCTTTCTTCCATTGAAATCAAAGTACAAATGTTATTAAGAAACACCAAAATGTCTT -TAAAGCTTCTGACTCGGAATGGCTTTCGCCAGTCCGCAGCGCGCCTCCCCGCACGGAGGT -GGAGTAGCTCCATCTCGCAGCAGCCGGGGTCCGATAGGTGAGTGATATAGCTTCAAAGAC -CATGTCTCTTTTTTTCAAGGGCAGACGCTGACATGAGGATGATTTGTTTAGTGTACGATT -CCCCGGGGCCGTGAATAGCAAATTCACCTCAGAGATGGCCTTTCTCAAAGCCTCAGATCT -GCCCGCTATCCCTACCTACCGAGTGATGGACTCAGACGGTCATCAGGTTGATAAGACAAG -ACCTGCACCCGACGTTACAGATGCGGAGGTCCTGACATGGTACAAAAATATGCTGTCTGG -TTAGTTAGCCCACGTTGGATACAATATCAAGTACCTGACTTTGACGTGCAGTGAGCGTTA -TGGACGTGGTCATGTTTGAGGCACAGCGGCAGGGTCGCTTAAGCTTCTACATGGTATGTC -CAATTGATTAAGAGAAGAATAAAATGATTCACCTGACCGGCTACTTATAGGTTTCCGCCG -GCGAAGAAGGCATCACCGTTGGATCAGCCGCAGCATTGACACCAGATGATGTAGTCTTTG -CACAATACCGCGAAGCGGGTGTATTCCAGCAACGTGGATTCACACTGAAGAACTTTATGA -GCCAGCTCTTCGCCAATTCCAACGATACAGGCCGTGGTCGAAATATGCCAGTCCACTACG -GACAAAACTATCCTCGCATGGTCAGTAACACTACCCATGTGTATACAGCAAAGTACTCCC -AACCTAACAAATCCAACAGCACACAATATCCTCCCCATTAGCCACACAAATTCCCCAAGC -TGCCGGCGCAGCATACGCCCTGAAACTCCAAGACCTTCAAAACCCAAACCGAGACCCACG -CATTGTGGCCTGCTACTTCGGTGAGGGAGCGGCCAGCGAAGGCGATTTCCACGCCGCGCT -CAACATTGCCGCAACACGCTCCTGCCCCGTCGTCTTTATATGCCGAAACAATGGCTACGC -AATCTCAACCCCAACACTAGAGCAGTACCGGGGCGATGGCATCGCCAGCCGCGGCGTGGG -GTACGGTATCGACACGATTCGCGTCGACGGCAACGATATCTTCGCCGTAAACGAAGCAAT -GAAAGAGGCACGTCGAATTGCCCTAAGTGAGGGCGGACGACCCGTGCTGATCGAAGCTAT -GAGCTACCGTGTCTCGCATCACAGTACCAGTGACGACAGCTTTGCGTACCGCGCGCGAGT -CGAGGTTGAGGATTGGAAACGTCGAGACAACCCCATTATCCGGCTGCGGAAGTGGCTTGA -GAATCAGGGGCTTTGGAGTGaggagcaggagaaggagacgcgggatgagatgcggaaggC -CGTGTTGAAGGAGTTTGGTGAGGCGGAGCAGGAGAGTAAGCCTTCGCTTAGGGATGCCTT -CACGGATGTTTATGAGGAGATTACAGAGGAACAGAGGGAGCAGATGACGGAGCTCAAACG -CATTTTGGAAACTTATCCGGATGAGTATGATCTGCGGCCGTATAAAGATGGGATCAAGGG -TTTGGAATAAGAATTTGTCTCTACAAAATATACAAGCGACTCTCGGGGTATAGAGCATTA -TAACATGGGGGTATGTTCATATATATTCCAGAGATGTCCAGGAGGATGTTTTATCGTAAC -GTTTATTGAAAATCCTCACTGCGGTGATTTGAGGAATCTGTTGCAGATTTTATGTATTCG -GCCAAGACCATAATATACAAACCCTAACTCCAGCACTCGACCACATGTAGTCATACTATA -GCAATGAGAAAACGTCTGAGCATGAATATATGGACTCAGCTGTAGTACCGACCGCGTCCC -CTGCGCCTAGCACTTCGATTTTGTGCAACAGCAACAGCAACCTTAGACACAGTAGCAAGA -TTCATATAATCATCATCCGAAGCCAGGTAAATAGCACCGAGCCCCATATCCTTCGCCAAC -CGAACAACATCCTCAACACTAGCGTCCTCAGGCGTCTGATTCAAAATAACAGCATCTTTC -CCATGATACTCAGACGTCGAGAGATGCTCATTTGGATCCCGAGCCACCCAATTAGCATAA -GTATCCTCATACTCGACAATGAAGTCCGCTGCCTCGAACCACTTGGCAACATCAGCTTTG -ATTTTGGCTCCAGGATTGAAAATGACAGTATTCAAATTGCTGCTTTTAGATTTCCGAGAC -AGGCTCTGGAAATAAGCGAGCTTGTTTGGGTCTTCGCCGTCCGGTGCCTCGTCGAAGAAA -ATGCCGGAGAGACGGATATTCTGTCCTGGATAGGTTGCCCACTTGGAATAGGTAGCGATA -TCCTTTTCAACTTCAGAACTGTCTCGCGTTCCTTGTTTTGTGTACGTGTAGCCCAGGACC -TGGGTGTTGGAAAAGCTGTTCAGCTTGGCGATGCCGGTGACGAAAACTTCTTCTGGGTAT -TCTGTGTTACCGGGACCTGAGGATGGGTTGATGATGAGGTAGAAGGGGATTGATGGATGT -GCTGCGATTGCGTCGTAGATTGTATTCCATGCGTTTGTGCCAGGCCAGGCGTATAGCGGT -ACTAATAGGCCGGTTGACACGACTGTTGATGCTAGGGTAAACACGGTAGCTATGATAGAG -AGTAATTGAAACATGGTTCTATATTTCGGGCTGGACGTGAACGAATGGATACAGCACCCA -AAGGAGCCAATGCTGCTGAAAGCAGTGAAAGGAAAGAAAAAGACCAACAAGACTGACGGA -AGATGGGTGTAGTTGGTGAGTGATGCATAGTTTATACCTACATGCCAGCCGAGACACTCG -GGAAAATAATGTCCTATGGGAAGAAAGGGATAGATCAGTCCCAATCCCTATTGTATCCAG -ATCGAAAGTCGAAAGAGACCTGACATGAGTCAGCCACATGTTTCAAGAGAGGCGAAGTGA -TAATTCACCCTGACCAGCCAAATAGACATTGTGGGATTTGAGGGAAACACCATAGAAATG -AACTAATGTCGGATCCCTTCGGAGGAATGAGCCTTCATTAAATGTCTGGTCCTAATTTAA -ATGTTTTCCGTACGGAGGAATGTGGACCTATAGAGATCCAGCCCTGGCGAAATCGGTGAT -TGATATCCCCCTTGACGCACCTCCTTGTTGACTAGTAAACATTGGTGTCTTGGGCGTTGT -TGATTGACGAGATTTGAAATATAGTTCGTTTCTCATGTGCAACTGTTACAATTTATGGAA -CATGCCAGGTCTGTGCTCTCGGTCTTTGGGTTTCGGGTGTGAGGTAGGCAGATTAAGGTG -CAATATATACTACTATACACCCAAGACTCAAAATAATAAGTTTGAGAAGTGTTCTGGGGT -TCATTTGGCATCAAGGGATCTCATGAGGATCAATTTGTTTGCTCATTATGTGCCAAGTAC -ACTATTACTATTGGAGATAGGGCTAGAAGGTGCATGTGTTGACTCCGAACACTGCAAACT -CAGAGAAGTGCATTAGATTAGGAGCTACGTAGACAAATGTAAATGTTGTACAGTGATTCA -TTCAAGCTAAAACATATAATCATACAGAATGGAGGAACGCGGTGAAATACCACCATCCGG -GGTATCATGCAAATCCATATGTACATGCGCAAAAACTCCAAGTCCGCCTAAGAATTTGCC -AGCGGGAGATTGCAAAAGGGACTAAGGGCTTCGGTTTGAGATACAAAGTCAATGATTGAT -TATACCTCCGTCCCAGCAGCAAAGTTCAATGCCTGTTGCTGAAGCTTCTTGTTGCGTCGA -AGCAAAGCTGCGAATTTCTTGTCTAGGGAATCCATTTCTTTCCGGGTGAGTTCAAGGTCT -TTCTGGGAACGAGCAGAGAGATCCTTCAGTTCCTCGTTGGTCAAGCTGGCAACGTCATCG -AGAGTCGGATTAGTACCAACCATGCCCGGATTGCCCAGCATGTGCTGCGCGGTTACTGCG -GCGAAGGAGCGCACGGCAGCAGGGGTGGTGTTTAGAAATTCGGCCGAGTTGGCGACAGCC -CAGTCATGATTGCGGGTGCTGTCTGCCTCCATGGCCGAGAGGACGTTAGGTGGGATTACA -GATTCCGCCGCCTCAAGAGAAGAGGACGGGGACGTGGCACCGAGACGAGTGGGCATGTCG -TCGACCCAGCAAAGGTTGAACTTTTCCGGATTTGCGAAAAGAGCTTCCAGACCTCCACTG -GGATTGCTCGCATCGTCTCCAATCATGAAAGGATCCGAGGTAATGGCTGTGGTCTTCTCC -AGCTCTAGGTACCGCTCCTCCTCTTCTGGGTCGAGGTGGCGGAGGACGCAGCCGCGGGGC -GTGATGAGGCATCGAGTCTTGAGGAAATCATCACCGATTCGAACGGGCTCTCCGCGAAGC -AAGGTTTTCCGATCCTCCTCTGTCAACTCATTGGGACACTGGAGATAATCGTAGTCCTCT -GCAGTGCACTTCATGTCGGTGCGTTGGCTGAGGTTACTCGGGTTGAAGAGGGAACTTTTG -TTGAGGTCAATTTCGCCGGACTTGTGCATCTCAGCGAGCAACTCGTGGAGAGGCTTAGTT -CGTTCGACGAAAAGATCTTTGATTGACCTCCCAAAAGCCTCGGAATCCTTCACCAACTGT -CCGATTGTATTGTTCGAGCGCCATGGCTCTACAGGCGGTTCCTTCTGAACAGGTGGCTCT -TCGATAGCGGGGGGAGGAGTAGGCTCTCTGACAGGCTTGGTCTCCTTGGCTTTAGCTTTC -TTTGCGGTATCCGCCTTAGGTTCGGCCTTGACCGGAGAAGCGTTTGCTGGTTTTGCAGTG -CTCATCTTTTCGGGGGCACTGGCAGCCGATGCATCGACTGTGCTCACAGTGGCTGTGGGG -GCCTTCTTGGTCTTGCGCTTCCGCCCCATGATAGGCGCTTGAACAGTTTCCTCACTCACA -GGCGCAGTTTCAACCTTCTTAGCCTCGGCCTCCTTCGCCTTTTGTTTGCGTTCCTTCTTG -ACTTGGCTCTTAGTCATGGAGCGCACGGGTGCTGAACCGATACGAGAAGACGCGGGAGGA -GAATTCGCACGAGAAACAGAGGTGGAAGGATAGTAATCACCTTCCCATCCCATATCGGCA -GAGGTATCCGGGTTACTGCTTGATGACATACTTTGTCTCCTAGAGCGGGACTTGGTTCCT -CCAACAATGGAAGCGGGCACAGAGGGTGCGCTGACGATTGGGGGAGTTTCTGGTTTGGTT -TCTACCACACGAAGAATACGGGGCTGACGAGGCGCAGGCGAGTCGGAAAGGCGAGACGCG -ATCGTTTGGGGCGTGTCAGGCCGCGAGCCGACTGCGGAAACAGAGCTAGGAGCTGGTATC -TGCTCACTCTTGATGGGGACGGGCAATGAGATACCAAGATCCAGTGGGCTCGCTCTTCTA -GTGGCAGAATCCGGCTGCGACTGGTTTCCAGTTTTGACAGAGGAGTCCTTAAAATCGGCC -CCAATGTTGAACTCGTTCTTGCGTTGGGTACGAGCCTTGCTCGCGGACTTTTGCACTGGC -GATCCAAAGGAGATCTGAGAAGCGTCTTTCACGCGGCTTGATGTTAAAGGACCGCCGACT -GTGAGTTCGGGACTCAAAAGACGAGACGGGCTGTCAATGCGGCTCTGCGATGGGCTGTTG -AGTCGTGAAATGTTAGGCGTCAAGCCAGGCGGAACAATTGGCGCAGGGGATGAAGGTTTA -GAAGAACTATGTTCTTGGAAGACAGAAGACGAGGGGTGGGCATGGGGTAAAGGCAGTCCA -GGTGGCAGAGTGGGAGTTCCAGAACGAGCGGCTGACTCGCTAGGTAACTGGGACTTTAGA -AACCCGAAGCTGTCCACAGGTCGCCGATCTTTGGATTGATCTCCAAGAGGAGGAAAGTCT -TCATTCACTAGAGTATTGGGTCAGTGAATGTTAAAGAAAAAAAGACTGAGTGAGGACAGA -GTGATGAGGTAGGGAAAGGGAAAGTGGTATAAATCAGATCGGAATCCCCTCAGTCAAGCA -CGACGACTAAGCAGATGCGCGACTAAGCGATCACAAAAAACACGACTAAGCAATACAAAA -AAGCATGAGATAATTACATGATGGGAACCTCCCGGACCAGGAACAGTGCAGGACAGGGCG -TCTATGTAAACGGCGGCTGTGAGGTAAACTGATTTCGAATAAGACCCTTTCCGGCTTGTG -GTATCAGGGTAGGGAGATCTTAACGTACATTAATCCATGATAAACATCCGCGGTCTTCAT -GGCGCAGATGATGTATGGAAGTCGAATGCAAAAATAGTAGGAAGGGAATACACAGCCGCC -GAATACAAAGGGAGCCCGGAACACATGCCTATGTCCGTGAAATGCAAAGACACCTGGAGC -CAGCGAAAACATGCAAGAAATAGATAGCAGTTCAACAAGTTGAACGTCCGGTCATGACAT -AAATCAGAGCTTGTTTGACCGCGAATAAAACAGAAACAAATGCAAAACCTCCATTTGGAA -GGATAAAGGGAATTGATATCATGTCATCAGGATAAAGCAGGCTCATCAAGGGAACAAGGT -CATAGCGAAAAAGCAGGAAGATGAAGCAAATCAAAACACGGGAACAAGTGAGAGGTCAAG -GGATTCACCACCGACTGATACCGCCACCATAGACCATCGAGGGGTTGTACCCACCTTGAC -CCTGACTCCCGTACAACGCTTGCCCAGCAGTAGCATTCGCACCGACTTGATGCATTCTCG -CCTGCAAAATGCTCGGGTCCGCAAGATCTACTACACCACCTCCACCGGAGGAAGTGTTAG -CGTttctgtgcttcttccccttcttcttctgcttctGTGGGCCACCAGAATCAGGCATTA -TACCCACTTGGGACCCATAGAAGGAGCTGAGAAGGCCATTGGCAGGAGTCATGGGGGGAG -GGGTATTGTGCTGTTGAAGAAAGGGGAACATGAACTCACGCTTAGCGGCCTCCTGGCCCT -GTAAACCACCGACATTCGTACCAGTTCGTCCACGCAAAAGCTCACGCATTAGCTCGGGAG -TTGGGTCCTGCTTTCCTCCATTACCACTAAGTCCAGAGGTGAAGCCATGTCCCTGGGCAA -ACATTCCGCCACCACTGATTGGGGGGGTACCGGCTGTCTTCAGTCCCGGCGGAGGACCTT -GCACACCACTGGCATAGAAACCACCTGCTGCAAGACCATGTTGAGGGCTTGGGGCAGAGA -GGGGGTTAGGCGATGAAGATTGCATCTGGCCCAACATGCGTGCATTAGGCATATTCTTAG -AACCGGCTTCATTGGCAAAGGAAAATCGGGAAGACTGTCGGTTCTGTGCTGCCTGCAGCG -CATTGAACTGTGCCATTTGACTGTGCACAAGACCTTGGCGTGCTTGGGCAGCTTGTTCCA -GCGCGGCAGAGCTGAGAGGGTCAGCCAGACCAGCTTGCTGATTACCGGCGGACTTGAGTA -GCATCAATTGCTGTTGCTGCAAGGGAGTTAAGCTTCGTCCAGCGAGGTTCAAGCCTTGGA -ATTGATGGCTAGTTGCAGAAACAGGTGATCCAACAGTCGAATTGTCCGCGGTGTCTTGCT -GCGAGGGAGGTTGGATAGCACCGTGAGACTCTCGTGTCTGGCGATTGCGCGGGGGATTTG -CTTCTTCCGGCTCACCACCAAGCTGCAGGCTACCAGCTTCACGACTCTCCTCCTCAGCCG -CGACAGATTGGAGCAACTCGTGCTCGCGACTCAGACGCTCTTGCTCGACCTTTTCGCGCA -TTGCTCTGCGCTTGACGCCACCATACGGATCGATGAAAGATGGGTAATCCTGAATTAACT -TGACCTCTTCAGCGCTTAGCTCGGCCGTGCTGAAGGAGAACTTAAAATCGGGAGAATTGA -TATCCTTGAGCAAAGTATCAAACGGTGATGGTAGCTTGGGTGATTGAGTCACATGGCGAG -GACGCGTTTGGGGGGCGGGTGATTGGGGATCGACTTGAGAGATAGGTTGTTGGGATTCTT -GGGAAATTGGAGATGGCTTCTCTGCTCGCTTTGGTTCTTCGACTGGGGTTGCAACATTCA -CGCTTGCGGGCCGTGGGCTATTGGAGGCTTGGCTACCAGTGAGACTAGTCCTTCTGGTTC -GGTGAATAGCAGTTGCTACATCTTTGTTAGCCCAGCTGGCAGTTGACGGTAGAGCCGGGC -CATCTGGTATGCCGGTGCGACTGCTGATCGAGTCATCCTTGCTCGGTTGCCGGCGCATAG -GATGGGCGATGGGTTGAGCGGACCTCACGTGAGAGGGTGTAGGTGGGAGGTTCGGACGTT -GTGTGGAGATCGAGTTCATAGATGAGAGGTCTTGGCGGCTGTAGCTGTCGCTGTCTTCTC -CAGTCTCGTGCAAGAAAGTGCAGTTCCGATTGTTGCACTGCTCATTGCGAAGGAAAGAGG -AGCAATATTTGGTTGTTCCATATTGGGCCCTGAGCACTCGATCGCCATTTCCGGAACCGT -CCACAGCGTTGATGCACATCGCAGCATCAATTTTACGCGAGAAAGTCACATAAACACCAA -TTCCCTGGTTAGGGTTGCCGCCGGGCTTAGCCTTGCTGACTACGATTTTCTCGATCTCGC -CATATTGACCGAAATATTCCCGTCCACGAAGCGTTTGAAGGAGTTGGTTTTCGTCTCGGA -TTGTAGGGTTGAGGCCGATAACGTAGACCAAGTTCTTCTGGACGACACGGACACCGGCCA -AATTCTTCCGACTAGAAGCTTCGATCTCACGCTTTTCGGCTTCCTTCTTTTTCGCAGCAG -CCGCCTTGCGATGTTTGAGTGCCAGATCTGCCTTGAACCTAAACAATGTGCGTTAATTGG -GATGTGGCGAAGATGTCACAGAAGACGGGTTGACTCACTCCTCGACGTCAGGAATCTTGT -ACTGGATGGTACTTTCATCGTATCCGCGCCGACAGTTAGGGCATCGTCCTTCTTCATTCT -GCGTTTTAATGTTGTTGTAGCAGAATTGGCAAATCTAAAAGAGATCACATGTTAGGTCTA -GCACAGGGAGACAAAGACGAGCCACCTGTCCCCGTTACAACAGACCAGGCACTGGGAGAG -GCGCATCGAGGATTATTATCAGACATACCTGATATCCACAAGGACAGGGTTTGAAATTCT -TATCAGAGAGATCGAACTCTTCAATGCAGAGGGGGCTGTGGGGTGATCAGCACACAGATC -GCGCAGGTTGAATGGGTTCCAAGACCTACCAAAACTCGTCGTCGTCGTCGATGACGGAAT -CGATCAATGACAGGCTGGACATGATCGCAGGAAATACTGTGACAACCGGAGAGGGACACG -AGATCTCAAACGTGGACCGATTTAATCGGATGAGAAACTGTGAAAATAAAAGAGATACGC -TGCCCTGTAAAGTGTAGGAAGATAAACGGCGAGTAGATGTTTGAGGAGGAGCGAGAAGAA -GAGAGGGCAAAAGAAAGGGCGAAGATTATTGATAGAAGAGGGTGAACGGGGAGAGGAAGG -AAGGAAATTGTGAAGCGCGGCTTCATTTTGCTGGCATTGCCAGATCGAGTTTAGCGTGTG -CCCAACACGTGACCAAAAGTCATCGACCACTCTAAACCTTCACAATCCGACTTACGTGCC -TCCGTCCCCTCCTCCCGTTGCATCTGCCATTGCTCCCTCTACACAATGGCCTCCGCACGA -TCTTTGATGCGCCTGGGTTCTGGACGCTCTGTGGCATCCGCCACGAGGTCCATGGCCGTC -CGCACCTTTTCCTCCGCCTCCCTTCAGTGTGCCCCAAAGGCCTCGACAGCACCGGGTCCG -GAACCTGAGAACATGCGCCAGGCGCAGCGCCCGCGTATGTGCAATGGACTAGCCGATAGA -TAAGACACAATTGAGCTAACCGAACTGTCTTTTTCCAGCCCCGGGACCCCTGCGCGCTCC -CGTGGTTAACCCGGTCGACAAGTACCAGCCCATGGCCGACAGCCTTCACGCCTATGGACA -ATACGTTATGTCTTGTCTCCCCAAATATATCCAGCAGTTCACCGTCTGGAAGGATGAGCT -CACTGTGTACACTGCTCCCGCTGGTGTCATTCCCGTGATGAGCTTCCTTAAGAACCACAC -TGCCGCCGAGTTCACCCAGATTTCCGATATCACGGGTGTCGACTTCCCTACCCGCGACCA -GCGCTTTGAGGTCGTCTACAACATGCTCAGCGTTCGTCACAACTCCCGCATCCGTGTCAA -GACCTACGCGGATGAGGCCACTCCCGTGCCCAGTGTGACTGGTCTCTTCGAGGGTGCTTT -GTGGTACGAGCGTGAGGTGTACGACATGTTCGGTGTCTTCTTTTCCGGTCACCCTGATCT -GCGCCGTATCATGACCGATTACGGCTTCGATGGCCACCCACTGCGCAAGGACTTCCCCTT -GACCGGTTATACTGAGCTGCGCTACGACGAAGAGAAGAAGCGCATCGTTATCGAGCCCCT -TGAGCTCACCCAGGCCTTCCGTAACTTTGAGAGCGGCTCTACTGCCTGGGAGCCCGTGGG -TGCTGGTGAAAACCGCACCCCCGAATCCGTAAGTTATATTCTCTTCGTTGTTGAGAGATA -TTGTCCTAACCTGTTCAACTATAGTTCAAGCTTCCGACCCCCAAGCCGGAGGAGAAGACG -GAAGAAAAGAAATAGAAAGGGACTGTATGCGTATCATTTTCTTTGCCATGTTTGTATGTA -CATATCCGTCAGTGTTAATCGATGGTCTTTTTTTGTCCAAGCGATATATCAAGCAATGTA -GCACATCTAGTAGTGAATTTTCCCGAGGTCGCTCATAAGCCAACACCGTCTGGCTGATAC -TATTCTATCATTGTGTTTCTACTAATTCCAATTTATAAAGACTTTCGATATACAAAGAAA -TACGTAACATGACTTGGATATCAAAGCGATTGATTTACGATGGCCATTGCAAGCCTTTGT -CTTGGCGCCGAGATCCGCCCAGAAAAAAAAATTAAGCTACACCAACATCCGACGATCAAC -ACGATCCTCAAAATCGCCCTCGATTTTTATTTCGACGAGCCATACATTCTTTGACTCGGC -ACTCTCAATACTCCCAACCTGTCATACACTTAGAGCAACAACAAAAATGCCCAAACGCAA -GCTCGACGAAGTCAGAGGTCCAGCGCTCCCGCGCCCGTCCAGCGACACTCGCAAAATGTC -CATTCACGGCACCCGGTTGACGCAAATGTTCGAGAACGGCGTACTGATGATCATGCGCGG -TCTCAAGACATCGCGCGGCTTTGAGCGACAGAAGCTCAGCCGGCGAGAGAAAACAGCGAA -GGCGCAGAAGGACGATAAGGCGTTGGCGCGACTAAAGGAAGAAATTGAGACTTTGAAGGT -ATTTAGCTACCCCTACCGGATGTAGCTTTGTGAAGTGCCTACGCCCCGAACTCGATCGCT -AATTCTAATTCTAACAGGGTCTCGATTACCATGCCACATCCGAACGATACCTGTTCAAGC -AACTTGTCAGGACGAAGCGGATTGCAGAAACACGGACGTTCGGCGAGTTCCAGGCCGTCA -AAAAAGTCTCCCAGGAAGGACCGAAGAGTACCGCTGAAGCCAACATCCTGGCGCGACTAT -TCAAATCGACACCCGTGCAGAAGGAAATTCCCGGCATTATGGCTGGTATTCGGAAACTAC -TAAGAGTCGACGACTTGCCGTCCAGCAAGACGACGAAAGAGGGTAACAAGAAGGATGCGC -CAGCAAAGAAGGCACGACAGGAAACCACCGGGTCCGAGTCTGAATCAGAGACGACGTCAA -AACCCAGGCGCGGCGAGCAGGTCTCCCGCTCCACGAACGACATGGAAATCTCTACAGACG -AATCCGGGGACGAAGACCTTTCACAATTCgactcgcgacttggaccgggctcagactctg -aagctgattcggaatcTGGCGACGAGGAAGACCGCGCCGCAGGCGATATCTCAGATTCTA -TCTCCCGTTCACCATCTCCCTCCTATTCCGCTGCGGACTCGCCGCCATCCAAAAAAGTAA -AGGCAGCCAAGGGTGCTGCTGCGCCAGTGAAAAGCACAACGTTCCTGCCGACCCTCATGA -GTGGTTATTGGTCTGGCTCAGAAGAAGCTACTGATGAAGAAGATTCCGGTGCGAAGCCTG -TCCGCAAGAACCGGATGGGTCAGCAGGCGCGTCGGGCTTTGTGGGAAAAGAAGTTCGGAG -CCACTGCAAACCACATCAAGCAGGAGGAGATGGCTGCGAAGTATGGTGGCAGAGATAATG -GATGGGACACGAAGCGCGGTGCAACTGACGGTGCgagaggagggagaggagggagaggag -GTAGACGTGGTGCTTTCGGTGGTGGAGCTGGACGGCCTCAGAACAGAGATGGCCCCGCTG -GTGCGCCTCGGTCTGGGCAGCATTCTGGTGGCAAGCCTAAAGGCCCACCTAAGGATGAAG -GCCCCTTGCATCCTTCCTGGGAGGCAAAGCGGAAGGCCAAGGAGCAGGTTGCGGCTACTT -TCTCTGGAAAGAAGGTTACGTTCGATTAAGGTCATGGTACTTGTGTATAGTTATGAATCA -ATGTTTGAAGTATCGTCACAAGCAAGCGACCAGACATTAGGATCTCCCTATGTGTATGCA -TTTATAAATCACTCGACTAGGAGTAAGTAAAAGATTGATTGCATTAAATACGAAATGGGA -ATAAGCTGCGTACAATTTCCGAGAGTCCACCACCCAGGCGCAATAAACAGTACTGAGACA -TAAAACAGTCGCGAGATGGTATACCAAGAAAACGAAAATCAAAATCATAAGATCCAACCC -AAACATGCTGTATGAGTGGCTGCCTGAGAAAAAAGCACAGAAAAATTCATCATGAGGGAT -TCATGAAGTGAAGCCAGTGAGGGGGGTCAGACCTGCTACCTTGAGCCTTTATATCATTAC -TAGGCAAACCATTAGACTGAGTCTGAGGATCTGGCTCAAAGCAAGGAATAGCAGTGAACT -CGCCCAACTCTTGTGGAAAATTGGACACAAAATGATCGAAGCCAAAGTGTGGCATCTGTG -ACAGGGACTGCTGGGAGATGGAGAAATTCTCGGAGAGTGGGACTTCTTCGCCTTCTGGGA -TGCTCTGATCAGGAGCAGCAGGTGTAGGAACAGTACCACCCACGCCAATCATGGGCGCAG -AAGGAGGTGTCAGAGCCATGCCATGGAGAAGAGTAGCAGGACCTGCCATCGTAGAGGCTT -GTGATGGAACGCTTTGCGGAACATTGTGGTCACTTGAAGCATAAGGTTCCTGCTGTTGCC -ACGCAAGTGTGGACGGTGAGATAGGCCCGGCTTTCCAAAGCATCTCCCAGTTTTTGAACC -TGTCGTAGGCAGATTCAACGGCACCTCCGCTGCTCATATCCGGCTGTGTCTCGCTCCATG -CTTCGAGACGACGGATGGCACGTTCGAGAGCCTCGATGACACTGACAGCTGAACGGGCTG -AGCCAACACCCATAGACATGATCTTGATCAGGGAAATTCCCTGTTCGAGGGCAGCACCAA -GTCGTTCAGTTCGCTTGGTCAGGCCCTGTGCTAAGATGACTAGTAAAGCTGCACGGCATG -AACTGAATTCAGTAAAGGAGGCTCGCGCCAGACCGGTCTCATCACGGAGCAAGCGGCACA -AATCGATGATTTCCAAAGCTGCTTCCACGCAATCTGTGACAAGAGTCGCGCGATTCTTCG -ACACACCGGACAAAGTCGCAGACCGCGAGCCTTGGGAGGGAGTCTGGTAGATACCTTTGA -TATTACTGAAAAGGAAAGGGCGCCCAATGAAGACTCGAGTCAGACAGTAGTCTAGCTTGA -GATGGACATTCGATCTGAAAAGCGGCCCTCCGGGGTTCAGATCACGACAGGTAATCTCTT -CTGGTAGAGTTGACCACCAATCAACGAGATGTTTCCGAAGATTCAGTATTCGTTCCAGAC -AGTCCTGCTGTTGCTCTTTGCGGATCTTTCGCAATGAGGTGATTTCATTTGAAACCTCCC -CGAGCTTCAAAGTCAATGTAATGAGTGCTACCATGTTTGTATGGTTTGATGGTTGATTGA -CGGGCATCAGCCCCGGGAAATCGACCGGCAGGGGAGCATCAACGTCGGGGTCCGACAATG -AAGACGGTCTGCCGTGGAGAATGCTTATACGTTTCTCGATAGTAAAAGCTGTCCAGAAAA -CTCGATTGCGCACTTCGATCATGCGCGGGGACAGGCCTTCGCCGTGATACCTCCGATGCA -TTCCATTTTGAATAGCCAATTTGAGTGCCAGCCCAAAGTATGTGTAGCATAATCCTGAAG -TATCCAACGGAAGCAAGTATGTCCCAATAAGTAAACAAGCCTGTACACTTCGAACCGAAG -CGGTGGCGATGATGTCTGGTAGAAGCCTCGAGGCGAATTGGTAGAATGTGAGCCCGACCT -CGTCTTCTGAAAAGTGATGGTCTTGATTTGCCGTAGAGCCTGAAGGTAAGCAGTTCACGG -GTGTGGATGACTCCATATGTGCGAATTGGGTGCCCACGGCCAGAACCATAAGAATACAGC -AGACCGCTCCTGCATCGTCAAATGACAAGCTTTCAGGGTCAGTGTAGCAGATGCCTAGTT -TCTCAACTAACCAGTCTTCCTCAATGTAGAAATTATTGGTTTGTGCGTATTTGAAGAAGA -TCTGAACAAGAAAATCGGCAACATATCGCGGCGGCAAACAGGTAATTGAGGCAGACACCT -GAGGCGACGTGGACTGGAGTTGTGTACTTCGCCATCGTTCTTCAAAAGGCTCAACTTCGG -AAGTTGCCTGCGGGGGCGGTTTTAACATTCGCTCACCGACGATTCATGGGAAACCTACCT -CGGGTGCTGCAGTCTTCATCCATTCATCAATTTTCTTCCGGATCTTCATTGTGAAATTCA -AGTATGAGAATTCCCCGGAATATTCTGGGAAATAGTCAATCAAAAGCGATCTTCATGTAA -GGTCCCCAAAATTTCGACATACGGGTTGTGTTGTCGGGCATAGCCTTGATTGTAAAATCC -TCCTCATCAATTGCAAGATCCTCCAGCTCATTGGGGTCTTCACGAGTAGAAATCGCATCC -GACTCAGAATCGCGGCATTTAATCTTGATGTCCTCTGCAGCCTGACGCAGAGACTGAACA -TCGAAAGTGATATTGGGTACATAATGCGATAGGAGACGTTCCATCAGTCGTACTCGTTCT -GCCTCGGCGATGTCATTTCGACTCTGGGCGGCCGCTCGGTCCAAAAGTCTGGTATCAGGT -GCGCCATTAGCGGTCTATATTAAGGTAGAGTATAGGCATGCGGTCCACCTACGAGACGGA -CGTTGACCGGGTCTTTTCATTGGGGTCGATGTGAGTAAAATTGCATTGGCGGCGGTATCG -CAAACATCTCCGGCACGGCACACCTCCTTCGCATTTCTCTTTCACCCGGCGGCAATTGTC -GCACCTGCGAAAAAAAACCCGTGTTAGAGGAACGGACGCTTTAATAAAGGGCAACAGCAT -ACGCTCGGACAGCTCGCTTGCGGTTTTCTTCGGCCACGCGAGGACGTTTTTGCTGAGTGG -GTGTCGGGGTCGAGTCGTCCATTACAGTGTCATGGCATTGTATGATATAGGAAAGAAGAG -AAAAGAATGGCAGAAAAGGTCACAGACAGACCCAATGTCCAAGGATGGTAAATGGGGAGA -AAAAAAGGAAGCTAATCGATAAAGGACACCCCCGCATATTCCGGACTCATTTTGCTCCGC -TCATCGGTCTGTTATTTGGGGTCGAGATTGGTATTTCGCGTATCTTTGGTATATTTCGCA -TATTCGCAGGTATATTTGGTATTTGGGGGCGCCTATAGCATTCTAACTTACGTTTAAATT -CTATTTTTACTTTATTTCATATATGACTTATCTACGGAGTATACGGAGTACCGACTACTC -CATTCAGCTGTAGCACCAACCCGGTATAGTTCAGGCCATTATTAACCCTATCTCATGACT -TCCTACAATAGGCCTATATGTATCATTTTCCTGCAGGATTTGGTCCTCTATCTCCATATT -ACTGCCCGTGTTCTAAATCACTTGGCATACATCATTCTTTACTTATCCGTTTCCTTCATT -GTCTCCAGAACTATGCGGTCTGCTTATTTGGCCTTCGACTTTCTTATCTACACGTTCTTG -ATTTCTATCTTTCTCTACAACTGTATCTTCTTCGCAGGAATAGGTAACGAGGGCTTGCAT -TGATCTTGCCGATCTTATGATGCGTGATCAGTTATACAATGGCCCTGAATTACTGGAGTA -TTACGACTCGAGAACAAAAGCCATGCCAGTAGGTTGCTGCCTTGTTTTGGAGTATGCAAG -GAAGCCCACCAGCGAATGCATTGTGAAACTCGTGGTTCCCAATCATGATACTGATCAATG -ATAGCTGTCCTCGTGCTTAAAGGTAGAAGTACCAAAAAAAAACATTTAGAACCCATCAAC -ATGAGTTCCAAGCCCCATCTCAACCCCAACCTCAAGCACCTTCTTCCCAACAACAAGATC -CATCAGTCCCATACCAACACACTTATACACAACATTACAACCAGCAGGAACATCAATGGC -CTCCTCCGCAATCCCATTCGCCCCCAAGAGATCCCCCATTTCAATCAACTGCTCCTCCCC -AAGACCGGCAGTAATCAACTCCCCAGCCTCCTCAAGACAAGCCGACTTGGAATCAACATA -AACCTTCCCACCCCCCGAAAGCAGCGTCTCAGTATCAATCTCATGCATCTCGGGCTTGTA -AGAGCCAATCAGCGAAATAAACCGCTGCCTAGGATTCGCCTGCAGAACCTCATATCCGAA -ATTCGGCACTGTAGCAGGCGTGCAACTAAAGATAACATCCGCAGCAGCAAGTTCCTCCCC -AAGCCTCTGCTCGTAGTCGCTCACACCTTCCTTAGCAAGCATCCCAAAGGCAACACCAGG -GTAAAGATGCCGCAATTCAGAAACAACATCCCTCTCCATTTCGGCGAGTCTCTTCGCACC -GCGATTCACAAACGTCACCCGCCGCACCTGTTCCGGGTACAGCAGGAGCGCGAGACGGGC -ATGCCACTCTGCCTGTCGACCGCTGCCGAGAACAAGCACGTTGTCCTTGCGGAGGGTGGT -GCAGCGCACGAAGAGAGTCATTGTTGCCAGTGCTGTGCGGAAGGCTGTTACTTCCGCTGC -GGCAAGGAGACCCTGGAGTCGGCCTTCTGGGTTGAAGATGTTGATTACGCCTTGAATTCC -GGCGGTGCTGGCTGTTACGACTTTGATACCGGTGGATGCGGTGTCCGATACCGGCATGAA -GATGCAGGTGTTGTCATTGCTTGTGGTGAAGACTGTGCGGAGGGGTTGGTGGATTTTTTT -GGCAGATGATGGGGTTGATGGTTTTGATTCTTTGGTGATGCTGATTAGGGCGTTGGCGAG -TGCTGAGATGAAGCTGTGGCATTGGTTTTGTGTTAGACCGCGGAAGATCTTGGAGACATT -TGGCTCGGAAAGGATGTGCATTTTGGTTGAGGTTTTGATTTCTTTTGCAATGTTCGGAGA -AGGGTTTTCTTGGGATTTTCTTATATCGATGAAATTAATGAGATGCTTTTTGAGGCGATG -CGTAGAATATATGGGGGCTAAAGATCGGGTAGACCCTCCGACACATACTGAGGGGCAAGT -TAACCCGGTGGGGACTACATGCCGGGCAGCATTAGATTATCTCTGACCTGATTTCTTAAT -TTGATTCTTATTTTGACTAAAAGAATGATCTAAGAATATTGTAACTTCAATTTGAAAACA -TTCATTTCATTGATCATCATCCGAATGGACTTCAAAAGTTCTTATACAAAGCAATAACAA -CGCCATGAAGGATGCACCCAGTTATTCGCAAATGAAAATGCAGCAAAAGCAAAACAGTGG -CAGAACAGCCCGTGGGCTGCCAAGTAAAAACAAAACGATCTAGATAGCCCCCGAAAGTCG -AAACGGAAAATCAGGAATGGAACGAATATCACTTGCTGCTGCGGAAAGCATTGCCGCAGC -GACGCTTCAACTCGGCGAACATTGCGATCTGGGAGTCCTTGGTGCGGCCAACGGCCTCGG -TGTTCTCTACAGCACGACGGAGCAGGAAGCCCATGCACTCCTGAACCGAGCCCCAGGTCA -ACAGCTTGAACACGTTGGGGGCATCCATGGGCGTAATGCCGACACTGTTCTCGGCACTCT -CGAATCCTTGAAGCAATTCACAGCTGACTTCGTCAGCCATGCCCTGGAGCTGAGCGTAGC -TAACATCGACACCGTAGTCTTCACCGCGGGCAGCCTGCTGCATGCGCAGAGCATGAGCTT -TGCGAACGGACTCGCGGCTGTGGGTAGCAATGATGATATTCAGAGGAGGAAGTTCGGTTG -GGGTATCCTTTGAGGGAGCCTTGAGCATGGAGTTGTACTTGCGAGTGAGGAGGGCCTCAA -CGACTTCGTTATAGCACTTATCAGTCTCCTCCTTCTCGGCCCAGATGAGTTGGCGGGGCT -CAGTCTTCATGTAAGCGCCGCGGACCAGCTTGACACCAAGTGTGTATCCCTCAGCACGAG -CAGTTTCGAGGTGACGGGCCAGAGTGGCAGGCGTAGAGCGCAGGTAGGCTTGGTAAGTTC -CATAGAAGATAGCACGGCCGGGGGTCTGCGAGTTGCAGTATTTTTGATACTTCATAATCC -AAGCCTCAATGCCTGGCTGTACGGCCTGCTCTTCAGCGTCGACCAGCAGGCGGACGCCAC -GGGAGATGGCGAGATCGCAGACCTTGAGGATAGATTTGTCCATAAGCTCTGTGGGCATAG -TCTGAGACTGCAACAGGCGAAGGGCGTCATTACCCATTCCAGTGAATCTAAAGGATCTAG -TTAGTTACTGTTGCAAGATGAGAACTCAAGTCATGAAACATACTTCAAAGCGACAAAGTC -CCCGGGTTGAGCCATATCAACTGTCTGCAGCGTGCCATCCATCCACATCTGGATCTCAGC -CAGGGCAGCCTTCTCGTCGTATACAGCATCGGAGTTCTCTCCAACGAGGACCTCTCGGGC -ATATCCCAGGAGTACACCACGGCATCCGAGTGCTTTGATTTGATTGATCGAATTTTGCAC -TTCGAGTTTGTTCTCGCCCGCGTTAAATTGTTTATAGATGGTGTGCTTAACAAGCATGTT -CAACAATGGATTCTTGGAGACATCCCAGAACGCATTGCGTGGATGAGCCAACAACGACAA -GGTGTAGATGCATGGCTTGAGAAGGAGGGTTGATGATGAAACAGACAGTACCAGCAAGGA -GCGTAGAACAGATGACAACGGGAGCTTAGCCAAGGGTGCTGTCAGTGCCTTGCGGGGTAG -TGCAGGCGAGGCCTTTTGTGATTGCCATACGTTGGCGGCTGAAACTGAGGCGACTGAGGA -TCGAGAGTTGCTGGTGCCGCTCACCAGTCTGGTGGTTTGTGCGGCAGCCTTGAGGGGCGC -ACGCACGGAAGGAGCACCTTTCATGGGTGCGGTAAGACGTAATTTTTTGAGATAGGTTAT -AAGATAAGGGAAGGCAGAGGTGTAGATTTGAAGTAGTCAAAGTTGAAAGACTAAAAACAA -ATGGTCGAAGAAACAGAAGAGCCACGGTTGAAGGCGACCTTTTAGATATGGCGATCTGTT -GGTGGAACTTATTGGGTAAGTGCCGGTGGCAACCGCAAGGTACACCGGTACGGTCAGCAG -TCCCATCTTAACGGGCCGGCATTGTCTGACAAATTTTTGTGTATCTATAGATGAAATTGC -TATTCTTCATGATTCCTCCAAACTACACACATTAAGATATAAGATTATAACGCCACTATG -ATTTACGATCGGTTCAAGGCTAGGGAGATCCAAGTGTCCGAGCGAGAATGACGCTTTGAT -ATTCGCCTCTTCCCTTGCAGCAAGTAGGTTCGCAGAGTCAACATGCCGAACGGTTACGGG -CCGGTTTGATATTGGGACATGGCCCACTGATAAGCGATAAGCACAAGTAACCGGGCCGGT -GCTAGAAAGGCTTTTTTTGCACCTGACTTGCTTCCGGTTACTTTCGGCCTAGTGCTTGGA -ATGTTCTAGTCAGCTGCTTCAATTCATGAATTCTAAAAGGCTTTATCTTTATTATATTTA -GTCTCAGGAGTTCGCCTATTTTACCCCGGATTCTTCCCCAGACGAAACCACGTGATTTTA -GTGGCAAACGGGTGGCAAATGTTTAAAGCCAATTTGTTCTGCCCACCAATCGTAGACCTT -GTTGCCCATAAAGAAGGTCTGGACTAAATAGAACGCTAGGAGCTATCCAAGTGGGAAATA -TTTATCGACAATGTGTCAATGATAAGATAACATGTGATATTGGGACCTTGGGAAATCCTG -CACGTATGAAAAATAAAAACTCGATAAGGCAGGCCACCAATAACAGATTAGAACTAAAGG -GCAATGTGGATGATCTTCTCCAGAAACTCACTCGCTGCCCCCGCAATGGCGCATTCTTCT -AGATCTCTTTTAGGCATTGGGCACACCATTTCATAGTGCTTGATGGATACATGCGCTTAA -AATCGCCGATTGACGGAGAAAGCTCATTTCCACACCTAGATAAAAAAAATAGCCCTAAGC -TTAAAGAAGTATCGTTCTAAAGGGGAGTTTTCTCCCATTGGAGCACATTATCATAGCGCT -TAATATGTCTCATCGGTAAAGATAACACTTTGGGGGCTAGATTGGTTAGTCGTATACATA -ATAAGAATTGTATATTTCCCCGGATATCACAAGTTACTCTATAAGCTTCTCCACTGCCTA -GAATTTGCGGTCTTGCTGCTAttcttcttcttcttcttcttcttcttcttcttctCCCCA -GAGTCGCGCTGTAGATTGGTAAATTTTCCTGTCTCTCTTCGCGCATCATGGAAGACTCAA -AAAGTCTCGAGGCGGACGTCCGTCCCGTCTCCCCGTACGATGACCCGAAAGCCATACGGG -GAGAGATTACAGGCGGTCACAACTCACAAACTAAACGAGGTTTGAGCTCTCGTCATGTTC -AATTTTTGGCTCTGGGTGGTTGTATCGGCACTGGTCTTTTTGTTGGCAGTGGCGCAGCGC -TGTCAACAGTTGGTCCTGGTAAGAATCTCTGTGCGAAATATGAACCTGATATGGGGCTAA -TATGTGACAGCTCCTTTGCTCATGGGTTATATTGTCATGTCAAGTATTGTATACTTTGTC -ATGAACATGCTTGGTGAAATGACTACCTATCTCCCCGTTCAAGGCGTTTCTGTGCCTTAT -TTGATCACTCGGTTTACGGAGCCCAGTCTTGGATTTGCGGTTGGTACGTCGAACTCACCT -TGAAGTCATGATAAGCCCCATGCTAATTAAAATACAATAGGTTACAATTACTGGTAATTA -CTCATTTTTGTCAATGTCCGACCTTGAAACCCCGTCAAACTAATAATTATGACTATGTAG -GTATTCATTTTCAATGTTGCTAGCATCAGAAGTCACTGCATCCGCTCTTGTCATTGAGTA -TTGGCAGTCCCCAGTCAACGTGGGTGTATGGATCGCAATTATTCTCGTTGGTAAGTACAT -ACTAGTTCCCCAATAACTCGGCATTTGTGATATCCTCTGACAAAATAACTCTAGTCATTC -TCCTTCTAAACATTGTCGCGGTCTCCTGGTACGGCGAAGCAGAATTTTGGTTCGCATCAC -TCAAGATTCTTGCGATTCTCGGTCTCATCATCCTGGGCGTCATACTTTTCTTCGGCGGCG -GTCCGAACCATGACCGACTTGGTTTCCGGTATTGGCAGACGCCAGGCGCATTTGTGGAGC -CTTACTTAGTCCCGAGCATCAGCACAGGCCGCTTTTTGGCATTCTGGACCGCTATGATCA -AGTCCGGATTTTCATTCATTTTCTCCCCGGAACTGATCACTACCGCTGCCGGTGAGGCTG -CCTCGCCACGCCGTAACATTCCCAAGGCGGCGAACCGGTTTATCTACCGTCTGTTTGCAT -TTTATATCTTGGGAAGCTTGGTCATTGGCGTCACTGTGGCATACAACGATAAAAGCCTGC -TGCAAGGAGTTGCCAGTGGTGGATCAGGTGCTGGAGCTTCGCCATTCGTTGTCGGTATCC -AAAATGCTGGTATTTCCGGATTGAACCACGTTATCAATGCTGCTATTCTTATCTCTGCCT -TTTCCTCGGGTAACTCTTGGGTTTTTGCTGGGTCACGCACACTTTACTCGCTCGCGGGTG -AGGGACAGGCCCCCAAGATTTTCCTTCGCTGCAACAAAAACGGTGTGCCGTACTATGCTG -TTCTCGTCACTTGGGCGATTGGTCTACTCTCGTTCTTGAACCTGTCTTCCTCGGGTGCAA -CCGTGTTTTATTGGTTTACCAACATTACCACAATTGGAGGTTTCATCTCTTGGGTCGTCG -TGGGAGTTGCATATTTGGTACGTTGGATTGCTTCTGTTCGCTTCATGGATGATCTTGCTA -ACAATATATCTTCAGCGTTTCCGCGGCGCTTTGAAGTTTCACGGGTTACTCGAGTCGCGG -CCCTTCATTACTCCTTTCCAGCCGTATGGCACTTACTACGCTATCGTCTTTGTGTCCCTA -CTGTCTATTACTAACGGCTACACGATATTCTTCCCGGGCTCATTTACTGCCTCGAATTTC -CTTGTTTCTTACATTGTCTTCGTGATCTTCTTTGTGCTGTATTTCGGCCATAAGTTGTAT -TACAAGACGCCGTGGATGATCAAGGTGTCGGAGGTTGACATTTTCAGCGGCAAGGACGAG -ATCGACCGACTTGAAGAAGAGGAGATCGTACCGCAGCCACGCAATTGGCTAGAGCGAGTG -TGGTGGTGGATTGCGTAGACACTTTTTGGTCTGCTGTTACATTTGAAATGCTCCTTTCTA -GATGGCGCTTCTACGTGCATATGATACCAGGTTGTGGGGATATAAATATTAGAAATAGAT -GTGAATATACCAACCAAGAGATTAAATACGTTTTCCCGTGAACTCTATACGTTGTACATT -ATCTGACAATCATTCAAATTCCAGTGCCATTATTACCGGATACCTTTGTTTTTGCTATGG -TCTAGGGGTAGGATGCTGTACTTGCGAAGTATATACATTTAGCGCCGATCCATTGATCTT -CCATTTTTCCGAATCTAACGCAACTTAATCGCCGCTATCAAGCTTATCTAGATTTGCGGG -GAATGTCAATTGTTTATTATCTCTTGACTAATAAGAACTTCTATGAAAGACCAGGGCCCG -GTGAACAAAGTGAATTCTCTTTGACAAAGCCGATATAGACGCCGGCCCGGTGGGATCCAC -CCGGGATACCTGCCGGCAGCCTCTGATTGGCTCCCACTTCTGACCAGTCCCAGGCCCGCC -CGCCTTACCAGGGTAGCTATGAGCTTCTGACCGGTAATAGCCGGCCTTTTTCATCTTCCC -CGCGTCTACTTCTTCTCCAAATCTTCTATCCTCTCTTTGCAAGCTTGGAAGAGTTGAATC -AGTGTCTTCATCTCAAACTATCATCATAATGTCCTCCATTCTGCGCATCCGCGCGCCCAT -TTCGCGCCTCGCATGCCGCATTCCAAGCTCCAGCACACGCTCCATGGGCACTTATGCCAC -TTTCAAGGTGCCCACGATTAATAATGAACCTAACGTGAGCTTAGCATTGAAATCGAGTAT -GTTTGATCGAAAAGGCTAATTCTGATTTGAAGAAACACTATATTCCTGGCTCCCCAGATC -GCAAGGGTCTCGAGGAGGCTTTGGCAAACTACAAGCAGCAAGCCCCTTTAAACGTGCCTC -TGGTAGTCGCGGGCAAGGAGGTAAGAATTAACAAGAACTCTCATACCTACTAAACAATTT -CTAACAATCACACAGCTCAAGGGATATGAAACCTTCACACAATCTAACCCTGCAACCCAT -GCTCCTGTGGCAACTTATTCCAATGCATCTAAGGCCGATGTTCAGTCCGCCATTGATTCC -GCTCTTGCCGCTCGCAAGTCTTGGGCGGATACTCCTTTCGCCGAGCGTGCTAGCATCTTC -CTCAAGGCCGCTGATCTTATTGCGACCAAGTACCGCTACGATATCATGGCCTTGACCATG -CACGGACAGGGCAAGAACGCATGGCAAGCCGAGATTGATTCCGCCGCTGAGCTCTGTGAC -TTCTTCCGATTCGGTGTGAAGTACGCCGAGGAGGTTTACGCACAACAGCCTGCCCACAAC -GCTCCTGGTGTGTGGAACCGCCTTGAGTACCGTCCTCTTGAGGGTTTCGTGTACGCTATC -AGCCCCTTCAACTTCACCGCAATTGGCGGTAACTTGGCTGGCGCACCCGCTCTGATGGGT -AACGTCGTGATCTGGAAGCCCTCGCCCTCCGCGATTGCCTCCAACTGGCTCGTGCACCAG -ATCCTGCTTGAGGCCGGTCTCCCTAAGGACGTCATCCAGTTCGTTCCTGGTGATGCCGCG -GAGGTGACCAGCACCGTCCTTGAAAGCCGCGACTTTGCCGCTCTTCACTTCACCGGAAGC -ACTGATGTCTTCCGTATGCTGTACGGTAAGATCTCACAGGGCGTGGCAGACGGTAAATAC -CGTAGCTACCCTCGCATTGTGGGTGAGACCGGTGGCAAGAACTTCCACTTGGTACACAAA -TCCGCAGATGTCCGCAACGCCGCAGTCCAGACCGTACGCGGTGCTTTCGAGTTCCAGGGC -CAGAAGTGCAGCGCCACCTCCCGTGCCTACATTGCCTCTTCGATCGCCGATGACTTCGCA -CAGCAGGTTGTCGCTGAGGTGCAGAAGATTCAGATTGGCGAGCCATCTGAGTTCACCAAC -TTCTGCGGTCCCGTCATCCACGAGGCCTCGTTCAACAAGCTGTCTGGTGTCATCGATGAG -GCCAAGAATGACCCCGAGCTTGAGCTGCTCGTCGGTGGAACCTACGATTCTTCCAAGGGA -TGGTATATCCAGCCAACTGTCTACCGTACCTCCAACCCGGACCACCCTCTCCTCTCGCGC -GAGCTTTTCGGCCCTGTCCTGGTCATTCACTCCTACAACGATGCCACCGATGCCGACTTC -ATCAAGATTTGTGACAAGATCGATAGCACCAGCACCTACGCTCTGACCGGCTCTATCTTT -GCTCAGGACCGTGCCGCTATCCAGGTCGCCGATGAGGCTCTCCGCAACGCCGCTGGCAAC -TTCTACATCAACTGCAAGAGCACCGGTGCCGTTGTCGGCCAGCAGCCCTTCGGTGGTGCG -CGTGCTAGCGGTACCAACGACAAGGCCGGAAGTGCCAACCTGATGTCCCGTTTCGTCAGC -TTGCGCTCCATCAAGGAGGAGTTTATTCCCACATACACCGTTGCATACCCCAGCAACGCC -AACTAATGGATAACAGACAGTCATCCATAGCTTTCAAGCTGACCTCGCATGCTTTATGAT -CACATTGGCGTCAATCCTGCATCTCACCCCGGCGTTTTCTTAAAATTGTGTCTTCGAATT -TCGGAGGAATCCTCAGAATGTGTATTTATTTCAGTCTAATAGACAATATGCCAAATCTAA -CCAGTACGATTTATTCAAATATCATAAACCTTTGGCAGACAATTTCCTCCTCAGTCCGTC -GCACAACTCTTCAGAAACTTCTGCATGCCATCCATAGCCCTCGCTGCAAAAGGGCCCATC -TCATACTTTTTCCCAGTCCCCTGCGGTCCACTGTCAAAAATCTCCATTGAGAACCAACCC -CGGAACCCCGTTCTAAGCACCGCCTTCGCAACATCCTCAATAGGCAAATACCCGCCGTCA -TACGGCATAGGCCTATAGTCATGACTCCAGCGACCCCTCGCCCTAAGCTCACCAATAGTC -ATATTCTCCAACGGCGGCGACACTTTATACGCATCAGAGATCTGAAGCAGATAGATCTTA -TCACTAGGCACAGATTTCGCAAGCTCATCCATGCTCGCCTCAAACCGCTTCCGTAGCTCA -GACTGCGAGACTGACTCAATAAGACCAGACTCCGTAGTCGGATCGCCCCATCCACTGCCT -GCAGTCTGGAATGTATCCAAGCACAACCCGCAATTCGGGCGATCTACAGCTTCCACTATC -TCCCAAACTTCCTTCCACCCAGGCGCGTGGGTCGACCAACACCAGTTTTCGTAAGCGAGG -CGAAAGTTGCGTTTTGCCAGAAGGTCGGATAAAAGGCGCAAGTCGGAGATGATGGCTTCG -CGTTTGCAGTTGATTATGTGTTCCGGTGTGTCTGTTGCTCCGACCTTGATGGTTGTTAGC -TTATTGATCTTAGAATTCTTCGAGTGCTCCACTTATATATATCGCTCTCATACCTGTAAC -AAATCTGTGCCCAGCACATTCATAATCTCCATCCACCCTATCGCTCTAGTAAATGCCTCC -TCGCGATCAACAGAGCCCTTGACCCAGCCCTCGAAGTTTGCGAATGGCTGTAGCATCATG -ATTTTCAACTTGTGTTCTTCGCATAGTTTGCGGATCTCCACAGCTACCGGCATAATTGCG -GCATAGTTGTCTGGCGCGATCGCGCTACCAGTGATTTGCGCGCCGTAGTCTAGGATGTCT -GGAAAAGAGAGCTCAATTGCGCTGAAGCCCGCTTCGCTGATTGCTTGGAGTCGGAGCGGG -AGCGTGTCGGTTGGACCGCAGCCGATTGAGCAGGAGGCGTATGAGAGGGGGATTTTTTGG -AGGCTTGAGTATGGAATTGACATTGTTCGTTTCTTCTCTTTTTGGAAACTCAAGAGTTAA -GGAAAAGCGGTTACTGTGTTTTGCGATCTTATTGGGGGTAGTTGATATCGGCGAAAAAGC -ACATTCATTGGACGTAAGGAAGTAGAACTTCTTTGGAAGATTGCCATAGTCGGTCCTAAG -AAGTATATATTTGCCTCAATTATGCCTGTTTTGCCCATTCCTTCTTCATTCTGTGTCCAT -CGTGATGTAAGTTGTCTTGGTCAGGGCTTGTGTCGTCACGATACATCATAATATCAGCCC -ATAATGAACCTGGCCACACCTTGAGTCTTTCGTAGACTCTACAATAATGGGGGTTGCCTC -ATTTGCCTCTTGTGTAATAAATGCTATCTAATAAAGAGGCGTCATGCTCTGTTGAGCGGC -TCGTGGCGTGTCTGATTGCATATTGACAATCCTCCGGATCATAGATCCGATTGCCACAAG -CCTCTCGTTCAAGAGAAGTGAGAGTCGTCCCATAGAATTTGGTGTAAATATCTTCGGCAT -AATTTGCATTTTGAACTTTGAATCACAACCCCTGGATGAACTAGTCTTAGAGAACCAACC -CCAATCCACAATGCCGCCCCCCATCAAGAGCATCGGTGTCGTTGGAGCCGGAAACATGGG -CTCTATGATGACCCTCCGCTTCGCCGAACTTGGTCTTCTGGTCTCGGTCTGGGACATAGC -AAGAAAAAACGTTGATGAAGTGGTGAACTATGCACGAGAAGATAAGAGCATCACAGGTCG -TGTTCAAGGCTTTTATGACATCAACCAATTTGCGAAGAGCCTAGAGGGCAAGTCCGACCG -GAAGCTATTCATGTTCTCTATCACGCATGGTGAGCCTGCAGACGAAGTACTCCGCATGCT -TAAACCCGATCTCAAAGAGGGTGATATCATCCTTGATGGTGGAAACGAGAACTACCGGAA -CACCGAGCGACGCCAGAAAGAATGTGCTGCCATCGGTGTCGACTGGATCGGAATGGGTGT -TTCGGGTGGGTATCAGTCTGCGCGACGGGGACCTAGTCTCTCGCCTGGCGGAGATGTCAG -AGCCATCGAGCGTGTTATGCCATTTTTGGAGTCCTATGCGGCCCAAGATCCCAAATCTGG -AACTCCGTGCGTGAAGCGCATGGGCCCCGGTGGATCGGGTCATTACATCAAGATGGTTCA -TAATGGCATTGAGGGTGGAATGCTTTCTGTTCTTGCTGAAACATGGCAATATATGCATGA -GGGATTGGGAATGGAGCATGGCAAAATTGGAGATGTTTTCCAAAAATGGAACGAGTCTGG -CGAGCTTCGAGGCAACTTCCTGATTAACATCGGCGCCAACATTCTGCACACACGGAGAAC -ACCTAAAGGTGACCGTAAAGGCGAGGGAGCCAGCCGAGATGATGGATATGTGCTCGATGA -TGTACTTGACAAGGTGGTTCAAGACGACGACAATACCGAAGGCACGCCACTCTGGTGCCT -CATGGAGTCTGCCGCCCGGCATGTATCTTGTCCGACATTAGCCGCTGCCCATTATATGCG -AATCTCAAGTGGAAATCGTATAGAGCGAGCACGTGCTGCGAAGAAGCTAGAGATGCCCAT -CCCCAAACCCATCGAGGGGACCAGGGACCACAAAGAGATCATCGAGAACCTACGACAAGC -TGTGTACTGTTCCTTCCTGGCATCGTTCTGTCAGGGTCTGGAGCTGATCTCCCGCGCCTC -GATAGATGAAGGGTGGGATATCAACCTCGGAGACTGTCTGCAGATCTGGCGAGCGGGTTG -TATCATCAAGTCCGATCACATTGCGGATTTGCTGCAGCCGCCGCTTGCAGTTAAGAACGA -GCTGACTAACACTAAGTTTGTGGATGCGGTCGCGCATGAGTTACGCCAGAACTTCCACAG -TCTTAAGCAGGTTGTCATGGAAGGGACCAAGTTTGATCAATATATTCCGGCTCTCTCGGC -TACTCTCGAATATCTGAAGTATGAGGGTGGATTGGGACTACCGACCAAGTTCATGGAAGC -GCAGATGGACTACTTCGGTGCACACAACTACAACAAGCCTGGCCTTCCAGGTGAGGACCC -AGGACCAGTTCACAAGGGGCCCCATCATTATGAGTGGTTACCGGCTTGAATTGGCTCGAG -TGTAATACTTCAGATTCTGTATCATGTACCACCTCAGATTGTATTTAGTTTCCAAAATTG -TATAGTTCGACTCACTCCAAAGTAACATCCTCCTGACCCGGAATAAGTCTGCCATGACAG -TAGACAGTTTGTGCTGGGTAATTTAAAGTTTTATAGATCGCCCAAAAAGGCAGGTGGCGG -CCTCAGGAACCCGCAGCACGTGGGTCAGTAGACAACCAATCGGAGAAGCGTAGGCTGGCC -GCAGCCTCAGAGATTTGAAGATGGCGCAAGATGCGCTGGATTTTAGGCTTACGTTCATTC -TCTTCTATATCGAAACCATTTCGTCGAATTGAGTCCGCAAAACAACTCGCTCGCAGAATG -GCTTCCCTCCCTACCACATACGACGGTACGTATACTTCTGCCTTCATATCACTTAGGTAC -CCCTCACCCCCACTTCGATAAAAAGCTGACAATAGTGACTCTGACAGGGCCCGTCCCAAT -CGCTGTCATCGGTGGCACCGGTCTGCGCGAGTTGCCTGGCTTCACCCAGGTGGCTTTGCT -GAATATCACAACTCCTTGGGGCGCCCCTTCTTCGCCCATCACTATTCTGCACCACGAGTG -TAAGCACAACAACAAGGTTGTCGCTGTCGCCTTCTTGAGCCGCCACGGCTTGCACCACCA -GATCGCCCCTCATGAGGTTCCCGCTCGTGCCAACATCGCCGCCCTGCGCTCCATCGGTGT -CCGCAGCGTGATCGCTTTCTCTGCCGTCGGCAGTCTTCAGGAGGATATCAAGCCCCGTGA -CTTTGTCATCCCCGACCAGGTCATTGACCGCACCAAGGGCGTGCGTCCGTGGACTTTCTT -CGAGGGCGGCATTGTCGCTCACGTCCCCTTCGGTGACCCCTTCGATGAGGGCATCGCCAA -GGTGGTGCGTGAGTGCGGACACAGCCTCGAGGGTGAAGGTGTCGTTCTGCACGACCGTGG -TACTCTGATCTGCATGGGTATGTAAAGCTGTGAGACTTTCTGATGGTGAATTATCGCTGA -CCGTGTCTTTTTAACTACCCAGAGGGCCCCCAGTTCTCTACTCGTGCCGAGAGCAAGATG -TACCGCTCTTGGGGCGGCAGCGTTATCAACATGTCTGCCCTCCCTGAGGCCAAGCTGGCT -CGCGAGGCCGAGATCGCTTACCAGATGATCTGCATGTCCACAGACTACGACTGCTGGCAT -GAGGCCACCGCCGACGTGACCGTCGAGATGGTTATGGGTAACATGAAGTCCAATGCCGAG -AACGCTCGCCGTTTCGTGACTGCTGTTCTCGATGCTCTGGCTAGCGACAAGCACACCGAG -CTGGTCCAGGCTAAGCACCTGGCTGGTGGCATCAAGTTCGGTGTTAGCACCCCTCAGGCT -AACTGGAAGCCCGAGGCTAAGGAGAAGCTTGATTGGCTTTTCCCTGGCTACTGGTAGATT -CAGTGGCTTTGGGATTGAGGCACATAAAGTTTTGGCTTGTCACTTGACGTATATCTGATG -ATAGCGTATATGCATTATAGTTATGGGATACTTACATGATAGGCATGATTGATACACGAA -TCTGGAATACTATGACTGCATGTCTAGTTGATTGATCAAAGGCCCTGTTTCTGGTCCGCA -CAGTAAGACGATGCCGAACATATGAAATCCCGGGTGATAGTCTTTAACTCAACTTGTGCA -TCCGAGTACCCGAGTTTCTCCTAGCAGAGCACTACTTATTCGAGATTTATATCTCAACAA -TGTGTAAGGACGGAACTATAGGTGTCTCTCAACGGCTAGAGGAAACTTAGAGCCTAGAAG -TACATGATACAGAGGAAATATAGTAACAAACGAGGTATCATGTACTTCTAGGAGAGATAG -GGATGCGAAGGAGAGGAATTGAAGCTTTTTGACTACGAAATTCTATAGGCCAAACATCTT -GTAGTGCCCCAGACACCAACAAATAAAGCCCTTGGATGTCGAGACATTCTCATGATGTAA -GCATATCACAGAGGCCGCAAGGGTTAGCCGTGAGTTTTTATTTCTTGAGAATAGCACACT -ACACTACATACGGAGTAGTTGGGAATGATGGTGCACAGTAATTTTGTTATATTGGCCCCT -CGCTTACAACTCTCTTCTCTTGATATTGATACAACCCCTTTTTTCACTCCTTTTACGCCT -ATCATTTGACTGGTACAATACAACAATCTCTCAATACGGGAAGGGTATACTTTCAATCCT -GGACTTGCTGCTGACGCAATTATTTTATAATTCCTCCAAATCCCAAATCTTGATAGCTTT -GGGTATTGATTCCAAGGCCCCGGTTTCGGTGGTATATCTCTTCAGCAGGAACGATGCTGG -TTTTATCATATTCAGATCAGGTGAGTTCCTAGGCCAAGCAGGCGTACTTTGGCCTTTACG -GAGGGCTTAAAAGGTAGCCATAGAAGGGTAGCGGTAAGAAAAAAAAAGTTATATCGATAG -AAAATTCATAAGAGAGCACTAATATTTGACGTCGACATCCGTACAGATGACAGTGATCGA -CACTGCGGGACACAATACTACATGTCCTCTCAATCCTGCATCTTATGAATGGCTCCCCAC -TAAACATCTCATATAGGAGCGAAGCTGTCGCGCGCCTCTACCACGGGATCTCCGCACCAT -CAATCACGTACCAAAACTTGCCCGAGGTCGTCTCCCGGGTCGAATTATCAATCTGCACGA -GTCAGTTTTCATCCTTCATCCATCAGATCGGAAATAGCATCACATACGAGGTCCAAAATG -GCATCAATACTTTGACGCTGGGAGATAGGGGCCTGCGAAAGGCCCATTCCGCGGGCAGCC -TTGTTGCCGGGCTCCGTTTGTACCAGGCTGTCAGATAATCAGCTGCGGTCCCAAAATCTC -CTCTCCGTATAGTGTCTCCCGTCACTTACCCAGGATGCACGGTAAAGGCGATGAAGTCTT -TGTTGGCGCTGTGGATAGCACTGTGCATCCGCCGGAGTTAGCTGATTGCAGCCATGGTAG -ACCAGAGTTGCTAGCATCTTACGTAGTGAGCCAGTTTAAGCCTGCCTTGGCGATTCCGTA -CGCCGGAACGATGTGAGCCCCGTGAACCTCAAGTCGGCCAATTGAACCGGCTGCACTGCT -GACCGAAACCCACTTTGGGGCCTGAGCCTTCTCCAGTAAAGGCTTGACGGCCTGGTACAG -GGCCAGGGGGCCGAGCGTATTAATGGTAAATACATCAGCCACGCTTTCGGGGCTCACCAA -ATCCAATGGAATGGTGCCTTTCTCGCCGGCTCCGCCGGCATTGGCCACCACCACATCGAT -ATGGTCGACGCCCTTGAGGTCCTTGATGGCCGTCGCTGGATCGGCCGAATTGCTACTTTC -AATCTTCACTAGCTGTATACGCGAACCATCAGACTTGGGCAAAGCCTCTAGTTCCTTGAC -GTAGTCTGCGGTTGTGTCGCGGACACTGGCAATAACGGTATGCTTAGGGCGGAGTAAGAA -TGATTCTACCAACGTCCGTCCGATTCCTATCTTCTGTCAGCTCGATCCAAGTAACGCTCT -AGGTATGTCTTACCTCGACTAGTGCCGGTAATGAGAACCACAGTGTTGTCGGAAATCGTC -ATGGTGGGAGGATAGATAATAGAATGATTCGAGTGAGAGTCGTACCTAGGATGGTTGCGA -TTGGGTCAGTAGACTTATCAATTCGATAATAAAAAAATGATGATATAGATAGGTGTTTAC -AGCGGCCTTTAAATAGCCAGATATGGGAGATGTTTGCGAAATAGCTCTTTAAGCGATTTA -TAATTCCCATATACTCTTTCACGAATGCTTTGAAATCGGGACCTCAGGATTACTATCCGT -TATTTACGGAGTACAGTGTCATGAAATGGGCCCCTGCTAGAGACTAAGAGGGACTAGCGG -CTAGGCTTCCAGTTCCTGGTAACTCTCCTTGACGCCATTCGTCTTATCCTCGTGATACCA -TGATAAGTCCTAGACCAGCCTAATGCGAATCCTCTAGGCTGTCTAGCCCTATGGCATTCG -GCAGTACAACATGCCTACTCAGCATGCATGGCCGGAGTGCGGGTGAATCGTACTTTGAGC -ATCATTTAGTATTATCAGACTGTTTACAATGTATAGCCAAGTATCACTGAGTGAGCATCC -TGTGCTGTGAGATCCCAGCGAACAATCGTATGAGTGGTGAGCTATCCAATTGAGCGCTTA -TCATAAACCAGTGACTTGTTAATTGATATCTCATTCTTAATATTTTCTAGCCGTTCGATT -TCAATCTAACCTGGCGCTGGGTATATCATTCGCCAGTGATGTTGCGTCATAGCCTTTGCA -ATGAAGAACCGTACCCAGCGTAGATCACTGATGCAGTCATCGTCCTTGTATGCAGACCAA -ATTGATATAAAGTTATCTTGCGAATTTTCAAGCCCCTCCCGGTAGATCTCTTCATCTGTG -CTTGGGTAATCTGAGCTCGTACTTCCTCGCCTATTCCCATCTCCGCAAGGTAATGGAAAC -GTCTTGGTAGGTTCCATGTGTCCGTCTCGAGCAAGCATTCGACGTTGTGACAATGGATGT -CCTGCAGGAACAGAAAACCGTTCACCAGAAGCTCTATTTGGCAGCATCTCAATTGGCAGA -GTGTCAGGGACGAGGGCGAGAAAGTTATCACAAGTGAAAGAACGAGCCTCCATGGCCTTC -AGAAAGGCTTTCACATATTTTATCTGGGCTTGGGAAATGGCCAGCTGGAGAGCGGTGCCC -CTTCCTAGGCCCTTGATTAAAGGGTTGGCTCCGAGTCTCAGCAGCTCCTCTGCAAAAGCT -AGAGTCATTCTTTCTGTGGCTAAGAATAGAGCAGTTCGGCCATCCGAGTCCGCTTGGTTG -ATTCCAGCACCATGTTCGATGAGCAATTGTATTGATGCGCGTGGATTTATGTGACCCAAC -ATCGTATGCCCATTGGGTCGATCAAACTGTCGTACTTGCGCGTGTTCATCGTCCACGTCA -ATGAGCCGGCCATTCTCATCCAATGTTTCCACGACTAGATGTAGCAACGGCGAGATGGTC -TGACCGTGTAGATGTGTCGAATTAACATCGAATCCTCTCTCAATAAATACCTGAAGTGTT -TCTGCATTCTTGTGAAGGGCAGAAGCAGCAAGTGCAGCTTTGCAATAATCGTTGTATGGG -TCAAGCTCAAAATTCCATTGCGCAAGTGCTGCTCGAAATTGATCGACTGGGCAAACGGCC -GCTGCGTGCTCTAGGAACGAAATGGGCTTGAGAGAATGTCGTCTGAGATGATTAATTTGT -TTGTGTATCTTGTCAAAAGGTTGTATGCCTCCCTTTTCGAGTAGAAATTTCATAACTTCC -AAGCGGCCTGCACGCGAAGCTTGCAGATCAGCCGAATTTGTCGCCAACGAAATCAAAGTC -TCCACGTCGTACTTGTTGAGAGCACCAAGAGTGTTTTCCATCCCGGCTATGAATTGGTCT -GAGTCGCAAGCTGTATTCAATGCAACTGCGGCAAATCTCTTGAAATCCTTGTCATCAATT -TTGTGGAGAAGCCTGAGCACAACGGCTGTTTCCCCCTTTTTCGCCGCGGTAGAAACATCA -ATCTGGGAGACATGATAATCCTGGGAGCGAAGGAAAGCCTCTGTGATTTCGGCATGCCCG -TTATTTATTGAACGGCACACAGGTGTGTAACTTAATTGGGACAAGACAGGCCCACACCTG -CTGTCAAGGAGTCGTTGAGTGAGAGACATGTTTCCTGTCTCTGCTGCTACAGCAAGTAAA -TAGGCTTGCTCAAGTGGAGTAATATCTGCAAGTTTTGTTTCCAGATCGACTCGGTCCAAT -ATTGCCTGAGCAATGTGTGGTTTTTCCCATGGAGATCGCCGAAAGAGTTTGGGGCTATCG -CGGTAGCAAGGTCGCAACATTGAATTCCAGCCGCTGGTAGTCATATTACCCGGAAGTTTG -GGAGGGCAATTCTCGAGGAAGAATGATATTATATTGTCATAGCCCCGTGTCATGGCAGCG -TCAAAAGCCGAGGTAGGGTTATATTTGGAGTTTGGATAGGTAAAGTCAGCACCGCATTTT -ACCAGGTATTTAACAATCTCCAAATGACCTGCATTTGTAGCACGATAAAGAAGATCAACC -GTCAAGGCTTGTAATTGATCTTTGCACATAGTCTCCTCCATTATGTATTTAACCACCGGC -AAATGTCCTCCCCTAATCGCGGATTGCATCAGAGTGGTCCCCGCCTTGTCAATATAGTTC -ACTAGGACACCTTGAGCTATTAGGAACTTCACTAAATCCAGATGACCGCGCATGGCCGCA -CGGCATAGAAGATGCCAGGATCGAAAAGCAGACTTTGTGATGATTGAAGGCAGATTGTCA -ATTAACATCTGAAGAATTTCCAGGTGACCCTTTGTTGCTGCGATGATCATAGGACTCGAG -CATGTTTTGTTGTCCACGAAAGTGTCGCCTTCTCTCCACCAAAATTGTGGTGTTTCCATG -AAATCGAAGAGTTTGTAACCAGCAGACAGCAGCATGCGTACAGCATCTGAATTGCCGGTC -CTCACAAGGCGCAGAATATTGGGGTCGCAACTATTCGCAAACCTCGAGTAGAGTTGAGGA -TTGAATATGGTATAAGACAGGTGACAAGTCTGTGACATGGCGTTTATTTCTGATGCATAT -TGTAGATTTGATGATATACAGAGTAAGAGCTCCACGGGCAGTGAAGATAGAGACATTTTC -TATGATGATGTAGTTGAGTTCGTAGCTTTCCGCCCAAGTATTGGTGATGTATTTTTCTTA -AGAAATAAAACAGCGAAATACCACTAGACAATAATAAAATTGTCGGTTCTTGCTTAAGGC -CTAGAGGGGATATCCCAGCCTGACTATTTCCAATCTGGGAGGCACGTGATTATGCCTTAC -TGCCTCAGGCATTCAATGACGCTAACCTCCGACTCACCGACCGGCCAGATAATCTGCCAT -CCTTATCTTCCCCAGTCACCTACATCAACAAGTGCTCTGAATAACACATACTGGACATTT -ATTTTTCAGCATGTACCAGCTGGATTATTGCCATTGCTGATACAAATTCTACCACTGTCG -CCTTCCTATCTTTTTCCTTACGCCTCTTCACCTTCGGACATAGTTGGTCACCCGTCGCGA -GCCAAATGCAAATGATGCATGCTGGATATCTGTGCTTTCTGATGGTATGAGCGGTGATAC -TGCAAGCTTGTGGATAATTTCAAGGAGCGTATTGCATTATCGCGTGGTGTAGGGTCGCCA -GAATGAGCCCTGCTGTAGATACCAGTGAAGCTCTGTCACAGCGGAAGACGGCCGAACATG -GTATCGCAAACGGGAGCAAAGGGGCCTTAAAAGCCGAGAAGCAATGCCTTTCTCCTTCTG -TTGAGCGGAAACGCAGTCGAGTAAATTCCCCTGGCGCACAGTCCAACGGCGAGGCCGAAG -GCTCAACCAATGGAGATGACAACACGAATGGGCAACGGAGGCACGATACGACTGTGGTAC -GACAGCCTGGCAGTGGACGATATAAATTGGCCAATATGGAACACGAGCAAGGACTTGGTA -TCTTAGTCGAGCATTCACCAAACAACGGACATAATCCTGGCCAGTTGTCGGTAGTGGATG -TATACGATGGCCCCAAGGCCATAGCTGCTTTGGCTGATATGTCTACTGACCTGCCTCACC -AACTACCAGCAGACAAAGAGAAGGTCATGAAGCTGTCTCCTGAGAAGATCCAGGCACTCA -CGTCCTCCCCAGACTCAATACCTTACCGCCCCGTGGACTCAGAATCGAACAGTGGAAGGC -GAGTAGTCTCGGACAACGTTCACGGGAACACCCTTCTCACGCAGCCGGAGATACCGGAGT -TTGTGCTTCAGCCACCAAGCGAAAAGCCATCACCTGCAGACGTTTTGCCAAAGTTGCGGC -CCACTAAAGAGGCGTTGGTGGGAGATGCTTCTACACCCACGAGTCCACCACGGAATACTC -AAAACTCCTCCCTCCGCCACCGGCCGCAGCCAGCTCGAACTGTTTCCACACCTGGATCCT -TGCGGCGTCAATTGCCCGTTACTCCAGGTAATGACCGTCTAACCCAGACATGGGCATCGC -GCTCGAGACAGGACCGACCCGCGCTGGACCGCGAGTTAAAGAGTCACCTCATTGCATCGC -CTCAGATTATAGAATCGCCCAAGTCCTCACCCTTGCCATCATCTATTCCTTTGCCGCCAG -TCTCTCTGCCAACATATCTTCAGCTTGAGCTTGCTTCTGGCAGACCTTCACCACTGTACA -TTCACCGCGACGCAGCCAACGACTTCCCTTATGAATCATCACGAGTGAAGATTGAGAGGC -TCATGAACTTCATCATGCTTGCACCTATGCTCGAGCAGGTACTTTGCTTTGGGAGTCTTG -CTTGCCTTGATGCCTGGCTTTACTCTTTCACGATAATGCCACTGCGGTTTATCAAGGCTG -TCTATATATTGGGTGAGTCATGGGTGATGAACCTAGGAGCCGAGATCCGGTTCATTTGGA -AATTTGTGATGAATGGAATTGGTCGGGTATGGCGCAGGAGGAATCATAACGTGAAGGAGG -AGCAGCGTGGAAGACGCGAAAGCGAATCTGACGCTACTCCTCACCTGCCTCCCGGCTCTT -CTGCCGGGCCAGACATGGGACCAAGACGGAAACATCGATCCTCCGAATCTCGCAAACATC -ACCATCGACGGAAGAAATCGATGCCCTCAGCGCTACTTCCAGACGACAAAGCGGACATCC -TGACAGGCCTGCTGATGATAGCCACATGCTGTGTTTTGATGTACTTCGATGCTAGTCGAA -TGTACCACTGGATTCGAGGACAAGCTGCTATCAAGCTGTACGTTATCTACAATGTGCTGG -AGGTTAGCGACAGGCTTTTAGCTGCCATAGGTCAAGATGTCCTCGAGTGTCTGTTTTCGC -GGGAGGCCCTCGAGCGTCGTCCGGACGGCCGGAGCAAGATTATTCGTCCATTCTGGTTGT -TCTTGGTGGCATTGGTCTACACAGTATCCCATGCTTTGTCTCTGTTCTACCAAGTGATGA -CTCTGAATGTGGCAGTAAATTCATACTCCAACGCATTGATAACGCTACTTCTATCCAACC -AATTTGTGGAGATCAAGTCTACCGTCTTCCGCAAATTCGAGAAAGAAAATCTCTTCCAGC -TCACCTGCGCGGACGTCGTGGAACGGTTCCAGCTATGGCTGATGCTTACTATCATCGCTT -CGCGTAACATCGTGGAGACAGGTGCATTCAACTTCATCGGCAACCTGGGGTTAGGATCTA -GCTTTCCAGGCCAATCCTCCACAATCACGAATAGCACCCCACTATCCACGCCTCCACGCA -CTGCATCATCCATCCTCCCTCAAGCATTCACCCTCTTCCCATCTTCAATCCTATCATCCT -TCAACAGCGTCAACTCCTTCATCCCAACATTGGCCCAAGTCCTAGGCCCATTCCTAGTCG -TACTAGGCTCCGAGATGCTAGTAGACTGGCTCAAACACGCCTACATCAACAAATTCAACA -ACAACCGCCCAGCAATCTACGGCCGCTTCCTCGACGTCCTAGCAAAGGATTACTATACCA -ACGCCTTCGGCGAACAAAATCTAACCCGTCGCATCGGTCTACCTGTCATTCCGCTTTCCT -GTCTCTTCTTCCGAGTCTCCGTCCAAACATACCAAATGTTCCTAGCAGCCCTCATCCCCC -AAAACCCATCCTCAACAGCAAACGGAGCAACCTCCCTAACCTCGATCCATAATAACTACG -CTCCCTCCCCAATCCCCTCAGCACCACCCTTAACACTCGCAACTCTCCTCCCAGCCTCCG -CAGCCCACATCAGCGCGTTCTTCCGCACCGTCCTCGAAAATGCAATTCCGTCTCCAGCAC -AGTCTGTCCATATCTTCACCGGTATCCTGCTTTTGACGGGATTCATCGTCCTGCTCATTC -TGAAACTCTTATTGGGAATGGCCCTGCTTGCATTTGCGCGTTCGCGTTATAGGAAAATAA -AGGCGCGGGAGAGTGAGCGCAGACAATCTGGTGCTGAGAGTGGGGCACCTCGTTCTCGGG -ATTTCCATGTTGAGGGTAGTCAGCGTGTTGGTGGGTGGGGTGTTGTTGAACTTGGGGATG -AGAAGAGGAAATTGATCTATGCTGATGATCCGGATGGGTTGCGGCGGGTGAAGGAGAAGG -AGGAAAAGGATAAGAGTAAGGATGGGGAGTTTAATGTCGATCATGTACAGCGGTATGAGA -TGATTGCCAAGAAGATTTGGTGATTGGTATGTTTTGCGGATGGTGAGACGTTGTTGAGTG -TAACTTATTGGCACTTGTGGTGCCTAGTAGAGCATAGGTGTATAATATTCGTTTAATTTT -TGGTTTACATTATTATGGGTCATTGAATCACGCAGTAAGAAGGAATATCAATATTACTTT -CAAGCTACTTGTGGTGTATACATTCGCAGTAAATGGGGAAATAAAGAGTGATCAATTCGT -ATCATGGGGAATAAAGATTCGAGGAAGGATATCGGAGTAAGGCAAATAGGGGTATGTACA -AGATCTCGAGAGGCTCGCTGTTCGTGATCAGTATAAGTGAGTGGTCCTGGTCATCAAGCA -CAGGAGAGCCAGCTCCTGATTTTCATCATAATAAGGTGTCATGGCCTTCAAATGAAGCGT -ATGGATGTGAGCCTTATCAATATTGGTGAGTATTTGAGCGATTGAATGTGTGCCCTGGTG -GAGCGCGATGCATAGCGGAGGGAGAAGCAGTGTTTGCTCGAGTATAGCTGGACGGACCGG -GAGGAGGCTGAGGTGTTTGATGGCCCATTGATGGGTTGAAAGCTGCATAGCCACCATTTT -GAGGAGGGGTGGCGGAACTCATTGGAGTGAAGGTCCTTGGTGGAGGCTGTGCCATTGCGG -CCGCTTGTGGGGGAGGTGTGGGTCGGTAGCTTGCCGGGGTGTTGGAGCGGTAGGGAGCAG -GTGTCCCGCCAGTGCTGGCCGGCGTGTAGGACCTTGTTGAATAAGGGTCTGTTGAATAAG -GGTCTGTTGGATAAGGATCTGTTGAATAAGGATCTGTCGAATATGGATCTTGTGGGTAGT -AATCTTGCTGTGCACGAGGCGGGCCTCTCTGGCTACTGGGAGCCCCGTATCTTGGTCCAT -ATCCGTCCTCTCCGCCGTATGCAGCCCTCGGAGGGCCGTAAGGATCGTATTGAGCAGGCG -CATGGCCGCGGTTGAAATAATTGTGATCATCGGAAGCTTCCATGTCAGCCGAAGCATAAT -CAGGCGCTGGGGTGTGGTCCTGATATGGCCGAGAATCAGAGCTATCGGTCAAGTTGCGAT -ATCCAGAGCCGGGGGTTTGACGATCATCATTGCCATATGGCGCAGGAGCTGTTCGAGATT -GAGGAGTGGCCATCCTTGATGGTACGTTAGCCGGCAAAGGAGGAACCGGGGGAAGCGTAG -GGTTTTGGCGATCAAGGGGAGTGTATCCCATTCCGGCCGCATTGCCTGTCAGTGAAGCGG -ACTCGGACAATACTGAAGATGAAGTTCCAGTTCGAGCCAGTGTTGGTTTGGCATCAAAGT -CCAGATTTGGCAGTGTCGGGTTGGACCCCGGCGCCGTCGTGGAAGTCGATCGCGAATATG -GAGGCAAAGTGGTCTGGGTGGTACTTCGCGATAATACAGCCACTGCGGGGGCTTTATCTC -CAGCGAGAGACGGGAGAGTCGGGTTGGAATCAGATGTCAGCATTGGCTGAGTGGGCACTC -GGTTCTGTAATTTGAGTCCCTTTGCCAGAGCCTTGGTGTTTTTGCGGTGCACGATGCGTT -TCAGTCGTTGACCAACCTTCCGTCGGCAGTACCTCGAAAGAGTGCCGTCTTCCGCTGGGA -TATGGTGAAAGAGGAAAATCAAATAGAGTACAATCGCCGATATCAACTTGAGAACCGATA -GGACCCAAATGACCAAAGTGAAAAGCATACCAAACAAGACCACAGCTTGAAGGTTATTCT -CCTCGGCCAGAATCTTGACGTTGTTGAAGAATTGACTGGCTCCAGAGCTTCCATCATCGT -GAGCTGCATTCTTACCTCCAGGAAGGAGGTCCATCTGCATGACCGAATAGAGAGTAATAC -CATTGAGAACTTGACGTGGACCATCTGCGAAAATCGTGTTCATCCAGACTAACAGCTGTT -AGCTAATTTCAAATATCGGTTGAGCCACAGTAGTATACGTACATTGGAAAGAGAAATATG -CATAAAGAGCAACATATTCTGCTGCTTTTCTGTCTTTGGTTAATTCAGCGAAGACGAGGA -ATCTCCTGTATCCACGACCTCGTTTGCCGAAGCGAATACTCTGAATCCGCACAGCCAGCG -GATCCAGAAAGCTGCGAGTGATACTACCCGAGCGGATGGCGCGGACGGCGTGTATCCATC -GATAGGCCAATAGAACGAACGAAGCAATGATACAGGCGGCGAAAATCCAGCGGGAAATAT -TGAAGGGAATGGCGGGTTCAACTCTACCGGACCACCGAGAGAAAGCAAGCAAGTTGATGG -CAGTGAATGTATCGACCCCGTATACTGCAATTGAGATAAACAGGAACACCCATATGAAGA -AATATGAGAAACCTGTCCAGCAAGACTCGGATTTAAAGTCGTCCAAGTTCTGAGGAATCG -ATTATCAGTATATCGAGCCATCTATACACCACTGAAATTGTGTAAACATACCACGTAGTC -CCATTTCTCCTCCATCGCTACATCGCCCTTCTCGCGGTCTCCACAGCACAGCATTGTGTC -GATTGACCTGGTCCTGCCAGGAGGTTATCACGGAGGATTGAAAAGAACGACGGGATCGCG -GATGATCAAAGGTTGACTTCCTCGGGCCAGCCGGGCAAAGTCGTGTTTGGAGGTTTGAAA -TAGGTGCAAAGAATAGAGAGACAAAGCTCAGGCGCAACGAGGGAATGATAATGTCTTAAC -CGCGCGAATGGCGCATACCTGCGTAAGGCAAGGAGAGATGTGGATCACAATGTCACGTAT -CGAAAGCTGCAAGCCACGTGTAGATGATCACAATGCAGCGCTGGTATCTCAGATGCGCAT -TGGCGTATTCAAAGCGATAGACAGTGTTTCAGAGACGATTTGCCAAGAAAGGGGTGGAGA -AAGAATGCGCAAGTACTCCGTACTTGGAAATAGGATTGAATGAGTGTGGCCAGGTCCCTA -TGGCATCTGAAAAACGATTGACATGGAAGTTAAGAAAGGGGAAGACCGAAACATCGAGGG -GAGGGCAAATGTGGAGACTGCGGGAAGTTACATCGAGCGGGTTTTTAGGGCTGAAATCAA -AGGGCCTTCTTTTGTTCTTTGTCTTTGATCTAATTTGGGAACATCCTAGGCTACCTTCAT -CAATACCCTGTACTTATTGGCATATCCCTATTTCTGTCACTATTCTCGGAACAATACAAT -GTGCGACAGTTATCAAGGGAAAATAGGGAAAATGGGTCTGTAATCTCCATTCCCACTCCT -CACTGGCCATCCGCACGCGCCAGGGGAGGGTCCATGTACCTAAGCGCTTGACTCCGCTAC -ACGGTCTGATACAATTTTCAACGTGGCCGGCGCTATCGTATTTATTTATATTTTCTGACC -TCGAGACTCGAAAGAGGACTACTCCGTATGTACGGAGCACCTACGGAGTACCTACGGAGT -AAACAAACATTGAATGGTTTGACACCTGAGGTGTCATTTCCCGTTAGTCACCTGATTTAA -TTGGACCTCGGCTTTTCAACGCCAAGACTGGACCCAGCCCAGATAAGCACTAATGCCCAG -CTCCATTGCACATCGACAACCACAAAGGTGCCATTTTTGATGAGACTGGCCAGGCTTCAA -CACTTGGACAGTTGGAATTATCCAATGATGCCCTTAGCTCTTCCCATTCGCCTTATCCAA -TGGTGTCGTGGCTCGTAACTTCGACCTGACATTGTTTAGTCTCGTTGAAAAACATACAAC -TCCGTGTATAAATCACCAATTCCTTCCATTATATCCTTCAACTTCCTTTAATCCATCGTT -TGTATGCTTTCTGTCGAAAATACTTGACACGAGCATGCTCGGCTGAGTCGAATCCACCCC -ACATATCGAAAATAAGTCCCTGCACGGCGTCTCATACTTCACTCCAGCCTCCATGAGCAT -CGAGGTTGCGTGACGACATAACTCAAAGAAAAGAGATTCCGTCTTCTTAAACTGCATGTG -GGTTCAAATTTCCCGCGTGATACAGTGATGAGTAAATTGCTGCCATTGTCTATTCGATTG -TTGCGCCCACACGCTTTATGCCCTCCAGTTGATTCTCCTGTCCTTTTTCATAACACGCAC -ATACCACTGCTTTAGCGCAACCAATTCCGCTATGCATTCGCTTCCACCGCATTTAACTTG -ATAGAGACTGATGATAATCTTAACACTCGTCTCCTGTACGAGCCTCTTGAAGGAGTCGAG -AGACTTGAGAACTATCGACCAGGGGGCTATCACCCCATTCAAATCGGAGATCACTTCCAT -GGTCGATATCGGACTTTCATCAGCTAGGACATGAGCCTTATTCAAATATGTGGCTGACGC -GTGATGAGCAATCTAACCAATACGTTGCAGTGAAGATTTGCACGGCAAACTCGAACCCAA -AGGAAATAGAAATTATGTCCACTCTCACTCGTCCTCACTATTCTCCAAACAAAAATACCA -AAAAGGCGCTGGTCCCTTCCATTCTGGATAGATTCATCATTCACGGCCCAAATGGCAATC -ATGCTTGCCACATTGCAGCACCAGCGAGAGCTAGCCTCTCTGGCTTGAAAGACGGTTCGT -AGATCCGTTTGTTCTAGCTCAATGTAGCTCGGTCGTTGGCGAAACGATTCGTTGTTATTC -TGGATTTGTCCATGCCTAAGGAATTGTCCATGGAGATCTCCATCTGGGCAACATTCTGCT -CAAATTTCTGCTCAATTTTGACCAGCTCTCAATCAAACAATTATATAGAGAATATGGAGC -ACCGGAGCTAGACCCGGTTGTTCATCTAGACGATGACCCGGCTCCTAGCAATCATTTCCC -TCTTGACGTTCCATCCCACGGCATTGCGCCCATATGGCTAGGAGAAGCAAGTGAGAAAAT -CACACTTGCAGAAGCCAGGATCTTGCTCACAGTCTTTGGTGAGGTGTTCTCTCCCTCAAA -AGAACTGAAATACCAATCTTGCACCCCGCTTGCTCTTCGTCCCCTGAGGCTCGGTTTGAG -TCAAATAACCCTCTATATTTTTCATCGGACATCTGGACTCTCGCCTGTACCATATGGTCC -ATCGTCGCTCAACGACCACTGTTTAGGGAATTTCTCGCGACAGAAGACGATATAACCTGT -GAGAATGTTAATGCCCTTGGCATATTGCCACCTGAATAGTGGAGGAAGTGGGAGGCGCGA -CAACACAAATTTACCGGGGATGGTAAGCCAATAAACCGTACCTATTTCCGATCTTTGGAC -GATCGGCTTGAGGACAGCGTGCAAGAGCCTAGGCGAGATAGTGGGATGTCGTTGCTCGAT -GCCAGGGAGAGGGAGGCTTTCTCTGACATGCTACAGCAGATGCTCTCATCCAGACCCGAG -GATCGTCCTACTACCAAGCAAATTCTCGAGTCGGAATAGATGGTAAAATGGGCTCTGCCC -GAATTTGCCAAAATTCGAAATAATGTATGAGCTACTCTCAAATGAGACTATGTATATTCT -TCGGTCGCTGTGGGGGTATTCCGGGATATATTGCACATTGCAAGATGCGTATGCCTGAAA -CCCACTATGATGTTAAAACGTTCTATTTTAGCACAGTATCTTAAGCTTACCTACTCCAAG -ATTCGCGTTTGATATATAGCTGCAAACCTTTTGTCTTAGCTACCTAGTTGACATAGACTA -TATTTGGCAAAGGGGAAGGGGAGGAGGCGGGTCGGTAATGTAAGTCCCTTATTTCTACTG -CTTTTCTCTCATGCCACCAAGACCCAAGGCAATTCCAGAGTTGCCGTCCGGGGATCTCTA -ATACACATTTGACTACCCTTGAAAGACCCCATCGATCAAGGTTGACGTGCTCCCAGATAT -CACCTTCACGCCGAATGAGTGGGTGACTAGTCTTGTCATTAGAATATATCTATGCTCATA -TCTCCACATTTCGATTGGACGTTGATGATAACAGAAAGCCTGGTGTTTCCGTCAAACCCC -CGGGATCTCATTCCTCCGGCTCGAGTGGCTTAAGTCCGTCGTTCGTCCGAGACGGGCTGT -AGATGCACCGAACATGCACAATACAAGAGATATCCCTGCAATATACAAATATATGGAATA -ATAACCTCAAACCATCCCCATACTCAGTGACAAACACTTCAATCAACACCCCACACGTCA -GTTGCTACCATAAACATCCAAAATGGTCTCTCTACTAGATGTCCAGTCATCAAATGCTCA -AATCGCCAACACCCTCCCAGCAGGCCTCGTGGCAGTCTTCGTGGGCGCAACAAACGGCAT -CGGCGAAGCAGCTCTAAAAGAGTTTGCCCGCTCATCCCGGTCTCCACGCCTATATTTCAT -CGGCCGCTCGCAAGAAGCCGCAGCCCGTATCACAGCCGAATGTCGACAGTTGAACCCGGA -GGGAGAATTCATCTTCATCAAAGCGGACGTCAGTCTGATTCGGAACGTCGACGCGGTCTG -TCGCGAGATTCAGAGTAAAGAGAAGAGTATCAACATCCTCTTCCTTAGCTGTGGAACTAT -TCGTTCCGGAGAAGGTAAGTGGGTAGACGATTCGTAATATCTAATAAAAAGGAGATATCG -TTGCTCATGGTATGCACACAGATACATCCGAAGGTTTACATGTCCTACCTGCAATAGGAT -ACTACGCACGAACACGATTCATCACCAACCTCCTCCCCAATATCAAGCAGGCAACCAGCC -TACGGCGCGTTGTCAGCGTCCTAGCCGGCGGCCACGAAGGTCCCATCGACGAGACAGACT -TCCAGGCAAAGAATATGTCTGTACTAAGCCTACGTGGACATCTTGTGTCCATGACTGATC -TGGCTCTGGAAACACTGGCCGAACAAGCGCCAGAAGTCACATTTGTCAATGATTACCCTG -GCGCAGTCAAGACAGGGATTGGGCGCGAGGCGAATACTTTCCTCACCTGGTTTATGGGTA -TCGTGCTCATGATTATCGGGCCTTTCATTTACATTCCCATTCGGGAGTCGGGGGAGAGAC -ATCTTTTCTTTGCGACGAGTGCCAAGTATCCTCCCCGTGTCCGTTTAGATGCGGCAGAAG -TTTCTGATGGTGTGCCTTTGTCGGAGGGGGTTGAGATTGCTAGTGGGACTGATGGGAAGG -TTGGGAGCGGTGTTTACAGTATTCACTGGAGTGGGGAGCATGCTGGTCCTAGAGTTGTGA -AGTTGCTGGCTGGACTTCGCGAGCAGGGGATGGCTCAGAAGGTTTGGCAGCATACGATTG -GTGAGTTTGATAGAATTTCAGGTTCGGCGGATGTCTGAGTCAGAGAATATCTTTGGGTGA -CTTTGTCGGCTTTCGATTTGGAGTGGGTACAGCAATACATATTTCCCCTTTTCATCCTAT -AATAATATTGCAGTGCGTAGGCTGAATGGTAAACGAGTCACAGCTCAGGAGAGGTGCCCA -GGAAAGGTGGCTTGCTTCGTAATTGTAACGAATGCATACTGCCATGCTACCAAATCGTGT -GTTCAGGTCTCGGCACATCGAAAAGTTGGGGTATTCAGGTAATGCCATTGTATCCACTCG -TTGGATGTCTCCGGTTCATCCATTGTGAACTATACCAGATGTGTTCCGACAAATAGGTAC -CGCATAATTTGTAGCGTCCCAAAGAAGCACAATGGGGTTCGTGTTTCTCAGTTATATATA -TACGCAGTCTGTTATTTGTTGTTTCTGTTCCCAGGGCCGCCACGGTCTTGTGAACATTGA -GCCCTCCCACGGTTGTTTCCACGAGAGGAAACTGGTGCTTTTTCAAGGCCACCTCTGACG -CAAGTTCACCGATGCCAAATTTGCGGCTCTCCCTTGGTCAATTCCCAGCCTTAGCATCTT -CTATAAGTTTCCGGTGATTGAGATTTACCGTCAACAATACATTCTTGTCGAATATCACGT -TGCGGATCTGGGCCAGTCTTCTTTGGGACCGGCTCCGTACATTCGCACCGAAACACCTTC -CTTGATCTTCGTTGGGGTCAAGGTCCAATCTTCAAAGCCTCCCCATGTTTCTATCCTTTG -GCGTTCTGCCAGTAACGCTGTTTGTGTGAATTGATGTCATATCCTGTGATATGTATCACA -AAGTCGATTGTGTCGCAAGATTTCTCTCAATTTTATCGGTCATGCTTCTGATGAGTCCAT -GTTCTCAGGAATGGGAGGAACTTGGAGCATCCATGGATGTCTGAGATCGGCCGAGTCGCT -GACATAATCACGCCGCGAGCCCCTATGCAGGTGGAATTCGATTCATGTGATATGCGGATC -CAGGCTTAATCCGGTAATCCTTGCGAGGTCGAGTAAAAGATCAAAAACAATCTTGAAAGT -GCTTGTTTTTACCATAATTGTAACGGGCTTGGTTACTGCGTTGACCTTGGACGCTTCTGG -CTTTCATAATCTCCCCACCTCACCCCCAGGCAACCATCCTAAAACCGAGTATCATCCATC -TCATTTGTGCAAAGGTCATATTCTCTGCATTTCCATCTCTACAACTGTATGGATTATAAA -ATCCAGATACAATTGGAACCAGCACTTCGTCGTCTTGTCTACTCCTCCATTTGAGTACAC -GCTTCACCAGCACCGGAGAACTAATTTAACCCGAGAAAGCCTTTAATGCCCAAGAACCAT -TTCTCTGGTCTTCTTGCTCCATGTGAAACCCGAGCTGTCACATTACGAATTTTGTTTCCC -TGATCCTTCGGGAAAATCTAGCAATGGGACACATCAGCGATAACTTCACCATTCTCTATC -TCGGCCTCGCCGCCGGGATATACTTCAAGCCGGAGTATGCTATTTTCAATTCCAGGATTG -TGACAGCTGCTGTTCTTTTCGGGATCATCGCTGTGTCGAGGACGATATATCAGCTTGTGT -TGTATCCCGACTACTTCACTCCATTGAAACACATTGATTCACCGGCAGTAAGCAAGCCAT -TCTCCGTATCTATTTGAGCAATGTGCCTAATGATATCTAGGATCGAAGCTGGCTCACCGG -CAATTCTCCGAATTTCTTGCTTGAGACCCCCTATCCACAGCTACGGGAGCGGGCCAAGGA -GAGGCCCAATCAAGACCTACTCCGGTACTATATTGTCGGAAACCTCGAAAGAGTGATTTT -GACAAGTCCAAAAGCATTGGGTGAGCTGCTTGTCACAAAAGTTTACGACTTTCAGAAACC -CGAATTAGTTCGGCAGAGTTTGCGGCGCATTACAGGTGATGGAGTCCTACTTGCCGAAGG -TGAAGAGCACAAGGTAGATTTGGCCCTTCAAATGCCACTTTCCATGGATCAAAACTTACA -ATTCCTCCCCTAGGTTCAACGCAAGAATCTCATGCCAGCCTTTGCCTACCGCCACATCAA -GAACCTCTATCCGGTTTTTTGGGAGAAAAGTGCAGAAATGGTCAAGATGATCGAGGAAGA -CCTACAGAGCAGAAAGGCCAACGGGAATAACGACAACACAGTCCAAATCAGTAACTGGGC -CAGTCGAGCAACCCTGGACATCATCGGCGTCGCCGGTATGGACCACGACTTCGACTCTCT -CCGAGAACCCGACAACACCCTCAATCAATCCTACCGCAAAATCATGTCCTCGCCACCCCT -CATTATGAAGATTCTTTTTGTGATAGGTATGCTATTCGGTAATCCAACATGGGTCCACGC -ATTACCGACAAAACGCAACAAAGATATCAAGGAGAGCGGCGAGGTCATCCGCAATGTCGC -ACGACAAATGATCCGACAGAAGAAGGCAAAGATGGAAGACCCCAAAGCCGAAACTGGCAT -TGACATCATCTCCGTTGCATTAAGCAGCGGCACCTTCGACGAAGAGAACCTCGTCGACCA -ATCCATGACCTTCCTAGGCGCAGGCCACGAGACAACCGCCACGGCACTACAATGGGCCGT -TTACGCACTCTGCAAGAACCCCAACGTCCAGACACGTCTGCGAGATGAAATCCGCACCAA -TCTCCCCTCCCTCGACGACCCGACTGCAATTTCTGCAGCCGCTGTCGATAACCTGCCTTA -CCTGAACGCAGTGTGCAATGAGGTCCTCCGCTTCCACCCCTCTGTGCCGATGACTATACG -CAAGGCAGTTCGTGATACAACTCTTGCTGGCGCGCATATTCCCAAGGATACGACATTGAT -CCTCGCTCCGCAAATCTTGAACCGGATGGAAGAGCTGTGGGGACCAGACGCGGACGAGTT -CAACCCGGACCGCTTTATGGGTCCTGGTAAGGCCAATACCGGCGGAGCGGTTAGCAACTA -TGCATTTTTAACTTTCTTGCATGGTCCGCGTAGCTGTATCGGGCAGGGCTTCGCAAAATC -GGAgcttgcttgcttgattgctgctACTGTCGGTCGGTTCCACATGGAGTTGAAGTTTCC -TGATGCTAAGTTGGAGATCCGAGAGGGAGCTACCGTTTCACCTAAAGATGGAGTACTCGC -CTTGCTTACGCCATTGGAGGGGTGGTAGATCTGTCTCAGATGGGTTTGTTGGTTCTGCGT -GGTTCTTGCAGTATATATTGCTAGTCCCTTGGATACTTTCATCTTTACGTGGTTCGAGGC -AAGCTACTAGAGTCTTGTTGTTGTACATACTGAGCTCAGTAATTGTTCCTTGCGGATATG -TATATGCCTCACATTGTGGAGAAAAAGCAAAATCCAGAAAAATCAAAGCTTTCTTCAGCA -GCGTCACTGTGTTTCAGACTCTCCAATCCTTCTACATACTAGATCAGTACATAAATAATA -GAAAAACAAAACGGCAGACTGAAAACCTCAGGCATGAAAGACATGGCACGTGTAGGTCTG -TCAAGATATCCCGTGGAGCCGCGACAGACATCGCCCCACAAGGTCCCGCGGGTTGACCAA -GATTGGAAGATAGGTTGAGGAATTTACCTATGATACATACATCCCCAGCAGCGATGAGCA -ACTAAAGCTTCACTGGCATGTCGCAAGCCGCCATCCGCCCACATAGCCACAGAGTATGGC -TCATCCTTCTCTAGTGGGTTCAAGTGGGCCAGTAAACGAGAATGTAATGAAAGCGTAAAA -ATAGTCGAGAACAGTTGGATAAAACGTAGTTTTGATATTCGCCATGCAATGAAACCAAGT -AAAAAGGCTATATGCTATTTGTATTCCTCTCGAAAGACCATAATTCTCTCAATATCATTG -CCAGCAATATCCTCCTCTACCCAAGCACGCCACATACCGCCGCAATTAAAGTCAAATCCA -ACCTTGCCTCGATGTAGCTTGACATCTGATATCCCGTTCACAAGGGAAAGTTCTGCCTCT -ATACCAATGATACCTCCTTCACCTTCACCAGTCTTATTCCACCTGGCGCCAGCAGAACGT -TGAAGTTCACCCCCAGGCCCAGATACAGCTGTTACCGCATCTGCAAGGCCAGAAGGAAAC -CTTCCGGGCGAAGAAAACCGTAGCATTGCAGCAGTTGTGCGCGCAGCACCAACCCGAAGG -AAAGAGTCTCCATTCCCAGTCCCAGAGACTGCAACGGCCCGCCGACGAGAATGTGGAGCA -GATAATTGATAAGGATAAGCCTTCTCGGATCCCAAGTCTAGCTGCCGGGAGGAAATACCA -CCCTCAAGGGACGAATAGGTAGTGGTTCTCTGACCCCACAAAAAGCGCGGTATACAATCG -CCCAGCCAATCCCTTGTATCACGTACCAGACCTCTCATCGAAGAATCATACAGCATTCTC -TCTCCACCCGACACAATCTCCTCCCAAGCCTCAGCCCAGAACCCAGCACCCAAGGTAGGC -GTGTCGCCAATTCTCCCAGGCAACTTATTAGTCAACCCACCGGTACTAGTAGCAACGGCC -AGATTGCCCCATTGATCAAGACAGACACAACCAACGGTGCCCTGAGTTAATGAAACAGGC -TCGTCCACCCCCTCCAAGCCCCTCATATGCTCATCCCACCTCTTCTGCGTGAAGAACCAC -TCGTCAGGACAGAACTCCAACCCCCAATCCCGAGCAAGCGACTCGACATATGGTCCACTT -AACTGGGAATGCATATTTCCTCCGTCATCAACAGGAGATCCATCGACGGACACGCCGGAT -CTGAGAAGCGACTCGCGCGCGAGATGAATAGGGTGCCGTACATTGCGGACGCCCATTACA -CCTGCGCCGCGCTTGACTGAGCCGGGTTGTGTTGTATCGTGATGCACGGATGCCACCATT -ACCGAAGCTTCCATTTCGATCGTGCCGGCTTTTGTGAAAACGCTGCCACGGCCACAGTTA -AAAAGAGGGTCATCTTCGAGAAGAGAGACAGCGTGGACGGCAGCGCCTAGGGCTGTGCTG -CCATTTTTCAGGAGAATGTGGGTTGAGCGGAGGTAGGATTGGAGAGAGGCGTGGTGGGCA -GCGTAAAGATCCGGGGGGAGTTTGGAGCGTTGGATATTGCCGGCGCCACCGTGGATGATT -AGGGCTGGTTTGAATGATGCTGGCTGTGTCATGTTATTTCGTCTGATTGGGTTATCGACT -ATGTTGGGATTATTTGATGTTGGTTGTTCTGCTCTGGTGCGGGGAAAGATAGTAAGGGAT -GAATTGGGGTATCAACATTGATTGCGCTACATACACTATCTACACTACATACGACATACA -AAGGTACCTGATTGAAGATCTATGCTGGATTTCATCACGATGGCAGATTCCACAGAAAAT -CTAATGTTCCACCTTCATACATCGGAAATTGACATCCATGGCTCATATCAAACGGTGCCA -TTGATGGATCGAATGGTAAGGAAAGCCAGTTATCTGCACCGATATCATTCAATGATGGGA -TGAATCCCAGATCCGCAGGTGGGTCCTGCATTACAGGGGCTCTTATGGGGTTTGACTGGT -CACCATTGTCTGGCCAAGGCGGAGGCCCCATGGATGATCTGCGCGTAGCGTCTCGACTAC -TCTTCACAGTTTTCGACGATGCAAGCAGGCTGCGTCGCAAGCGTGAGACATGCATGTCTA -ACAGCGTGGCATAGCGTGTGCTCAGATGCACATCATCCAAGGCGTTGGATTTGAGTGCTT -GAATGCTCTTTTCTAGTATTTCAATGGATTCTTGGAGCTTGGCTTGCCGTGTACCCAAAC -TGAGAGCTTTCATCAGGAAGATCGAGGCACTTGTTATACGAAGGACAATTCGAACTGGGG -AAAACCGAAGCGCCCCTGCTTCGGCTAGATGGGTGACTTTCCGCAAGATCTGGCCACATC -CATCAATCACTTCTTGGATGTACTCGTAGTCTGTTCGGTCGATGGTCATGGCTCGCACTC -CGTCAAAGCTGTTGGGATCGTTTTCTGCGACAGCACGCTCCACTACTGCTTGCATTCCAA -CTGAATGGGTGTAGACGCGAACAAAGTGGTATTCGATGAATAGATCGTTGTAGAATGCTT -GATCAAGTACTTGGGGCTGGAGGTACTTGTCTTTCCATTGGTTGAGGAGAGGTCGGAAAT -GCTCAAGAAGGCCAACGTATCGGCCGCTGTGTAGTTGCTGGCGTGCAAACGCGGCAGAGG -GGAAGAACATATCAGTAACAGACTTGGCTAGCTTGGTAAGCTCAATCCAGGAGTCCATGA -ACGTCAACCACGTGTTACCCGGTGGACCCAGTCCAAGTGGCTTCCGTACACCCATGATAG -CTTGGTTTGATGATTGAGGGATCGGTGACATGCATCCAATCCGCCAAGCCAATTGATTAA -TGTAAACGAATAGTAGACGTTGTACACGCTGCCAACGAATTTCCATCTGATCTCTCCATT -CTGGTTCGTCGGCTGGTGAACTGCATCCTTGTGTTTCAAATATGCCCAGCTCGTGCGCAA -GAGACAGCGCACACCCAAGCAGCATCCAGGACATTTGGTCAGATCGGCGTGCTGGCTCAA -TGATATCTTCTACCCCACGACTGTCGGGTGCTGAGCTTGAATCGTTGTTCTCATGGTGCG -TGGGGACTGATGGGATCAAATCATCATCCCAGCCATCCGTCTCGGGTGGGAAATGCAGGG -CGCGTGGATGCCATTCTGACATCAATAGAAGTGCCTCCACAGTACCCAGACTCCGTACCT -TGGATTTTGTTGATTTTTCTTGCCCGAATATCAGCCGCATCGTCAATTTCTGGCAATGTG -ACCATAATCGGTGGTGGATAAAGAAATTCCTCGACTCTCCCCCTGCTCCAGGAAGGACGT -GGTAGCGGGAGGAGATCATGAGAATAGTGGAGCATAGAACTGGCTCCCGAGTAATCAGCC -AGTAGTGGTATCGATGATTGGCGTAGAAATCAGTTAGAATCGGAGATAAACAGGACATAT -TCTTATAGAACCTACTTCTGGTTAGCTTTTATTTGGCATAGACATGGAACATGCGACGGA -ACACACAGATCTATATAAGTAACAGCCTCTCTAGCAGTAAACCAGCCCATTTTGACAAAC -CGACAAGCTTCCCAGACATTAAGGGTATCTTGGGTTGCGTCGGATAGCTTGACAGGGTGG -ATTACCCGTGCCACAGATTCGATTGCGGCATCGTAATTCCCCGGTGTGCGATCTTGAGTA -CTCGGACCTGCCGATGAAGGCCCAGCTAATGATTCGGAGCTAGTATCATGATTGACATCT -TCTTGAGCAGTTGCAGCTTCAAATAGAATGTTGAGTGCATCATTGCCACTAGCAACAACA -GTGCGCATCATAGAGCTCGCAAGTGTAGAATTCGCTCGGTGCCTGTTGTTTTCATACACA -TGAGAAGGGGACTGATCAGTTTGTGAAGATCCCACGGAAGGACGGTCGAGGGAACCACGG -TGGGATCCCGTTTGATTCCGATGATATGGCCATCCCGACGGCAAGTTGGGCTCTTGATGA -AAGTTCATCTCGGGCGATGAGTTTGCTACATTGTTTACATGTGCGTTGGGGGTCTGTCCT -TGTGCAACATTAGAACTAGCAGGCGAGGCCGTCGGAGGCTGTCTGGCGTCTCGGGGTGAC -ACATCGGCTTCATAGCTGTCTGGTGATCGCGCTAGATATTGACCATTTAGACAACAGCCG -ACTTAGGTATGGTAAGCACTCAATCCTAGCGTACCTCGCTTGCGGGACCTTTCCCAGGCG -CGTTTCTCGGGGAACACACATTCCCTTAGTTCCCTTCGGCAGCGCGCACATGGTGGGCCG -ATCGGAAGGCCATCTGGGCCTGTTCCTAGGTCGCATTTGGCCTTCCTTTGCCGACAAGGA -ATGCATGCCTTGTAGGTGCGCTGATACGGCTTGTCCGGACGGGATGCCCTGACATCCATC -GTCAACCGTACCTGGGCGGATTCAATTTCAAAGTGGATTCTGTGGATATTTGGTGATCTG -GATAATTGCAGGGAGACAGAGAAATGTGGAGGGATAATTACCCGGCGATATCCGTCGGGC -CCCACGCGTCTTTATCTCCAGTGTACCTTCGTATCAGGATCAGACCTAGAAATTAATGCA -TTATTCATTATAATATATTCTTGAATCTCATTCATAGAGATTGAAACCAGTAAGGTTATC -CTAATCTCGATTGTTACGCAGTCAAAAGCCAATAGAGGAACAAATTGTAGATCATATGTC -ACGCAGACCAGGGTGGCTGCAAGCCTCCAGGCCCTAATGCATGTACGTTGTTAGATCAAA -TGGGCAATTTTCTAGGCAAAGAAATTCGAACAATATAGACATAAAGAATAACTCCCGTCA -TAGCCGAACTGTACAAGGTACAGCGCTTGGACTCGGGCTCAAGATGGCGGAACAAATTCC -GCAATTGCTGAAGCTCCATGCTGGACGGAGTATATGTACAATAGACAAGCGGTAGAAAGT -GATTCAGCTTATAAATTTGAACGATCTATCTTTATTTCTAAGCACAAATCTTCTCTCGAG -CCTCTGTCTAGACCCTTCTTAGTGCCTTCATCGCTCCCCGACTACTCCAATACCCCATAT -ATCGAATATCAACACTCTTCAGCGACAATGACTGACACAGTGACTCTCCACACCTATTTT -TACTCCTCCTGCTCAGGAAGACTCCGCATAGCCCTGCACCTAAAGCAAATCCCTTTCAAA -TCCGTCTATGTGAACCTCCTAAAGGGCGAGCAATCCTCCCCAGCCCATCTGGCCATCAAC -CCCTCAGGAACTGTGCCCGCCCTAGTAATCCAACGCGAATCCAAAGCACCAGTGACAATC -ACTCAATCTCTCGCCGCACTTGAGTACCTTGCAGAAGCATTCCCAGAGCAAGGACCCGCA -TTACTCCCACCGATCTCGGACCCAGAAGGCCGCGCTGTAGTGCGCACGCTGGCGGACATT -ATCTCTTGCGACATTCAGCCTGTTACAAACCTCAGGATCGTGAAGCGAGTTGCGCCGCTG -GGGGTGGAACGTGCTGCCTGGTCGAAAGATTTGATGGAAGATGGATTTAGGGCTTATGAG -ACTGTTGCCAAGGAATCGGCGGGGAAGTTTAGTGTTGGAGATTCTATTACCATTGCGGAC -TTGTGTTTGGTTCCTGCCGCATGGGCAGCCCAGAGAATGGGTGTTGACTTGAGTCAATAC -CCGGTTACGAATGAGATTGTGAAGAACTTGGAGATGGAGGAGGCTGTTAAGAAGGGGCAT -TGGAGGTCGCAGGAGGATACTCCCGAGGAGCTTCGTATTCAGGAATAATGGAAAGGTTGT -ACGTCGGGGACTATCTAGTCTCAAACAATTTACATAGGATAATCTGCCATTGTACTGATA -GTACTGGTATTTAATATGTAGCAAGGGGTTATTTATTCAGATTGTGTTGACTAAAAAGAA -AACTGGCCGAAGCAATTTATTCCAAGCTCTATAGTGTAGAACTAAAAGTAAAGAAGTAAC -ATCACACATTGATCATTTAATTTGATTTTGAGTCATCTACTGCAGCGAAACAGCAGGCAG -GATAGTGCCAACACAATCGCCGAAGCCGACATAATTGCCTTCTGTGCCAGCCATACCGCG -AAGAGTAACAGTGTCACCATCTTCAAGGAACATACGCTCAGAGCCATCCGCTAACTTAAG -AGGAACCTTGCCATTGGTCTGTTCCAAGAAACTACCCTGAGTCTTAGCCTCCTTACCAGA -GATAGTACCCGATCCCAGCAAGTCACCGGTGCGCAAGTTGCATCCTGTGATAGTGTGATG -AGCAACCATCTGAGGGAAAGAGTAGAGCAAGTTGCGGGCGTTGCTGTTAGAGATAACAGT -GGGCTGACCACCGGGGTTGGTGACCTCGACATCGAGGGGGATCTCGTAGGCACTGTCGGC -GCGCTTCTCGCGCAGGTATGGGAGCAGAGACTCGCGATTTGCAGGCTCCAGACCGGGAGC -ACGGAAAGGCTCGAGAGCATCAATCAGGACCACCCAAGGGGTGATGGTAGTACCGAAGTT -CTTAGCATTGAAGGGACCAAGAGGAATGTATTCCCAGGCCTGAATGTCACGGGCGGACCA -GTCGTTCATGAGAACCAGACCGAAAATGTGGTCCTCGGCCTGGTCGATTGGGACAGGCTT -GCCGAGATCATTGGGCTTGCTGATGAAGCAAGCAAGCTCTAGCTCGATATCGAGCTTCTT -GCAGGGAGAGAAAGTAGGAACCTTGGGGTCCGCGGCAGGGTTCGCTAGAACCTGACCCTG -GGGACGGTGGAGGGGAGTGCCTGAAGGCACAACTGATGAGGCACGACCGTGGTAGGCAAC -GGGCAGGTGCTTATAGTTAGGCTGGAGCGCATTGTCCGGGCCACGGAAGAGGACGCCAAT -GTTGTAGGCGTGGTTCAGGCCAGCGTAGAAATCGGTATAGTCGCCAATTTGCATGGGTAG -ATGGTTGGTGATCTGAGACAAAGGCAGCAAAGCTGACTGCTGGAGGGAAGCATTGTCCTT -AAGGAGCTGTGGGAACTTTGTGTCGGCGCGGAAGATGTTTTGTAGATACTCGCGCACTTC -CCGGTGCACAGGACGACCGAGGGCAGCAAAGTCATTCAGTGTGGGTTGGCTGAAAACAGC -TAGGTGTTTCTCGATAGCTGGAAGTTGTGAAAAGCCTCCAGATGAGGCAAATGCGCTCAA -GTTCAATGCATGATCACCGATTGCAATGGCAGCTGCCTTTGGTGCAGATTGTGTTGAGAT -GATACCGAACGGGATATTTGCCAGCGAGAAGGGCGAGTTTTTGGGAATTTGAAGCCAGGA -TGCCATGATGAAGTAATGTTTGACGAGTTTAGAAAGAAAGAGAAAGCGACTTCACGACCT -TAACACCTTCAAGTAGTGCGGAATTGGTCCGCATCCCCCGCATCCAGCCGGTGCGGAATG -GAAGGTTAATGGCGGGAATTATCAGATGCGCGGAATGGGCATGCAAATGGTGTGACCTCA -CATTAATTGCTTTTTCCAGGCTGAGATCCTCGGTTGATCGGAGAGTATTTGTAAAGACAG -GTCGGCATTCTCCTAATCATCATTTTCAATATGCCTGTCACTAATTTCAACTACCCTGAT -CCCTACAAGTACCAGACGGGCTTCAACTCGCACCACGAGTATGTTTTCACCTTGTTTCCT -CAAAGGAGCACAACTAACGAGGGAATAGGTCGGAAGCCGTCGAAGGAGCTCTGCCAATTG -GTCAAAATTCCCCACAGAAAGCTCCATATGGGCTCTACACTGAGAAGCTCTCGGGAACAG -CATTCACTGCACCCCGTCATGAAAATAAGCAGACATGGGTCTATCGGATTCTGCCTGCAG -CAGCCCATGAGAACTTCATCGCGGAAGATGCCGACTCGTATCATACAAGCATGACAACCG -AAACCCACAAGCTTCACCATATCCCTAACCAGTTGCGATGGAACCCCTTCGATCTAGATG -AGAAGGTCGACTGGGTACATGGACTGCACCTGATTGCAGGGTCTGGAGACCCGACCTTGA -AGCAAGGACTGGGCATCTTGATGTATGCCGCAGGCAAGGACATGGGCAAGGAAGCTTTCT -ACTCCGCGGACGGAGACTTCTTGATTGTGCCTCAGCACGGTGTGCTGGACATCCAGACAG -AACTCGGTCGCATTATTCTGCGCCCAAATGAAATCTGTGTTATTCCTCGTGGTGTTAGGT -GGGTTTCTTGAGTGTCCTGCATAGGGTCAATAACTAATAATTCTATCAGATACCGAGTGA -CGCTCCCAGACGGTCCTGTGCGGGGGTACATTTGTGAATTATACCAGGGCCACTACGAGC -TCCCAGAGCTTGGTCCCATTGGATCCAACTGTTTGGCCAATCCTCGCGATTTCCAAGCCC -CCGTTGCCGCTTTCGACGACGAAGAGGAGAGCGAGTATAAGCTCTACAGCAAATTCAACA -ATACCCTCTTCGCTGCGCGACAGAACCACACCCCCTTCGATGTTGTCGCCTGGCACGGCA -ATTACTACCCCTTCAAATATGATCTCGGCCGATTCAATACTATCGGTTCAATCTCATTTG -ATCACCCCGACCCTTCTATATTCACTGTGCTGACAGGTCCATCGGATCACGTTGGTACCG -CCATTGCCGACTTTGTTATCTTCCCTCCCCGCTGGCTGGTCGCAGAGGGTACCTTCCGTC -CTCCGTGGTACCACCGTAACACCATGTCGGAATTCATGGGCCTAATTTCTGGCGACTACG -ATGCAAAGGTTGGTGGTGGTTTCAAACCAGCTGGTGCTAGCTTGCACAATGTCATGAGTG -CTCATGGTCCCGACTCGGGTGCTTTTGAGGGAGCTAGCAATGCTGAGCTGAAGCCTCAAA -AGGTTGGTGATGGAAGTATGGCTTTCATGTTTGAAAGGTTGGTTTTATTTCAGTATTATT -CCGAGTCTCATTATATGTGCTAACAATGTTCTAGTACTCTCATGGTTGGCGTCTCAGAAT -GGGGTCTTAAGACGTGCGAGAAGGTGCAACCAGAATATAACGAGCACAGCTGGAAGCCTC -TGAAGCGACACTTTGTAAACCCTAACAAAACGGCATAGGCTGTTGCTTAGGCCTGATTAC -TACGATGAGGACAAATACATACTTTCAATTAGTTCTATAGCTTTCAGAAGATACCAAAAT -TCAATCATATCGATCCAGATTGTCTTAGATTATTGCTTCGTTATTCCAAATGCGAGTATT -CCGAAGGACAAACTTGTGCATTATATTATTTCTTCATGATACATAATCTTCACCCCTCAA -GCTCCTTCCTCATCGCCTCCTTAACAATAGCCCCGCACATCTTAGCATAAGGATTCGTCT -GCACAGAAAGTTGCTGTGCAACCTTATTCCCGTAGCGACCCCCAAAGAAGAAATCCCGAC -TAGCCATTTCCTCAAGCTGATCAGCTGAAAGTCCACGCAGACGCTCCGGTCCCGCCTCAT -TACCAAAACCAGTCTTCTTGACGCGCGCTGCCTTCAGGAGTAAGTAACCCCGGTGCGTGT -GCGCGTCAGCCAATAACCGTGCCTGAGTGGCTGAAACCGCATCTGCAGGAGAGGCGGGTG -TGGCAAAATTAATGGCTTGGCCTAGGTCAGAGAAGATGCGGGAGCACAGCTGAGAGGTTT -CTGGGGCGAACAAGGCAGCATCGGCTAGCTCGGTATCAGTGCTTTGGTCATTATGCGCTT -CTTTGCTGTAGATCAGATCGACCAGCAGACGAAGTGTTTGTGCTCTATTTGTGTATGCGG -AGGGATACGTTGGGTGCTGGGTGGTGAGAGTGTCGAAGTCGTTGAGTGCTGACTGGACTG -TCTCTATACTTGGTGATTTGTTGGATTGCAGCTTGCGGATGATGTCTGTTTCGCGTTGTT -TGAGCGATGTCAGATCTGTTTCAGATATTTTCAGAGATGATGGCCATGAGGGAAGCGATA -AATCAGTGGTGATTCCTGATGATGGAGAGGACTCGGCATCGAATAGAGCTTGTAGTACAG -CCGAATCATTAGATGTCAATGATGGGCGAGCAGGTGTGATAGATGTCATCTTGAGATTTG -ATATTCAAGTGAGAGATGAGGGAGGATATTGTGAAAATTGGTTCAAACTGAGCTTACTGT -AGTCACTGAATTGAAGAACTAAAGAACAAAGCAGAGTGTCCCAAAGGAAGCCGAGGAATC -GAAGATCTCCGGATATTAACAACGAGCCTTTGTGGGTCCGGATATCTCCGGATATCTCAG -GGCCCTGGAATAGCCGCGTACTATGACATGGATCTAACAAAGATTGGATTAGATATCCGC -CGGTACAAATTGCGCTTACTAGAGTTATTTAAAAAGGGTAGGGTAGACTAGACTTGACAT -TTAATAAGAAATCTTCTTTCAACTTGTACCATTTCCATTTCATCGACCCAAGATCATCAC -ACAAATATACGGGATTCATCAAAATGGCACCTTCAGCAATTTCCAATAGCCCTCCTCCCC -CAGCTACCAATGAGGCATCTTCAGTGGCCAGCTACCGCGGCTATGATCATGTTCACTGGT -ACGTCGGCAATGCCAAGCAAGCGGCAAGCTACTATATCACCCGCATGGGCTTCAAGCGTA -TTGCCTACAAAGGCCTTGAGACAGGCAGCCGCAGTATCTGCTCCCATGTCATCCGCAATG -GAGACATTACCTTCATCCTCACCTCCCCTCTGCGGTCCCTAGACCAAATCGACCGTTTCA -ATCCCGAAGAGCAAGCCGAGCTCAGGGAGATCCACGATCACCTCGAGAAGCACGGTGATG -CCGTCAAGGATGTTGCCTTTGAGGTTGACAACGTCGACGCTGTCTTCTTTGCTGCCGTGA -AGAATGGCGCCAAGGCTGTCTTCAACCCTAAGATTCTTGAAGATAAGAGTGGCCATGTCA -AGACTGCTACCATCCAGACATATGGGCAAACCACACATACCCTCATTGAGCGCAGCGAGT -ACAGCGGTGTCTTTATGCCAGGCTACCGTGCTGAAGCTGGCGAGGAAGATCCAATCTTGA -AGTTCTTGCCCGGTGTCCACCTCAAGCGTATCGATCACTGTGTTGGAAACCAAGACTGGG -ATGAGATGAATAAAATTTGTGAATAGTGAGTTCAGTCCAGATTATTGCTTAATTGGGCCT -CATTTACTGACACCACGGGTTAGCTACGAGAAGGCTCTTGGTTTCCACCGTTTCTGGTCC -GTTGACGATAACCAAATCTGCACGTAAGTGTTGAACATATCTTGGCTGATCGTCAGGCAC -CATGATTAACCTAATAATTACAGTGAGTTCTCAGCCTTGAGCAGTGTCGTCATGGCCTCG -CCCAATGAGATCGTTAAGATGCCCATCAATGAACCAGCAAAAGGCAAGAAGCAATCCCAG -ATTGAGGAATATGTGGACTTCTACAATGGTGCTGGTGTACAACATATTGCACTTCACACC -GACGATATTATCCGTGATATCACCAATCTCAAGGCACGTGGTGTCGAATTCATCAAAGTC -CCTGACACATATTATACCGACATGCAAGCCCGCTTGAAGAAGTCTGGCTTGGTACTTAAG -GAAGGCTTCGACACCATCCGCAGCTTGGACATTTTGATTGATTTCGATGAGGGTGGCTAT -CTGCTCCAACTTTTCACTAAGGTATGTGGAAACTTATGACCCCGGTACTCGTGAATTCAT -ACTAATCTGACAAATAGCACATGATGGATCGGCCTACTGTGTTTATTGAGATTATCCAGC -GACACAACTTTGAAGGCTTTGGAGCTGGTAATTTCAAGTCACTATTCGAGGCTATCGAAC -GAGAGCAGGAACTTCGCGGTAACTTGGTTTAGAGATAAAAAGTTCGTATAGTTGGGCATT -TTCTTGATGAGCCCTTCTTGATGAGCCCTTATGTGTTTGGAGGTTGCATCTTGCATTTAG -CGAATTAGGGGCTTCCCCTTATTGTGAAGTTTTCGAAAATATTTCATTGTCTTGATAACT -GAATCTATTAATCAGGAGGTATAATCAGATTGTCAGTTTTTGTGCGCTCCAGTTTCCTGG -CGATTTTAAATGGCTTGCTCTTGTAGGAGCAGAGAAGACATAGATGTTGGGATAGGTATT -GCTTTATAGCCCATCGTGAAAAAGGGATGAGATTGAGTGTATATTGTTCTCTTAGGGATG -AAGAAAGAGGTTTGTCGGGATCAACAATCATCTCCAGCACCGCCAATGGTTATAGATAAA -AGCAGCCATATAGCAAGGCATGTGGCACTAGCGCATCCAATATCATGGACATTTTGTGAC -TTGGTTTGTAGTCGCTTGGGTCTGGCCGTTGGATACTCAATGAGTAACAGAAAGAAATGG -ATTGTGAATTTCTCTTGGGATGTTCAAGATAGGTGATAATGAGACACTGATGGATGATGT -TATATTCGAGATTCCGCCACTAATGCTGGAAATGCCTTTTGGTAAGGTCGAAATAAAATC -TTATATACACTGGACTTGGAGTCCTGTTGAGATATATCCATGGACATGATAGTTTTTAGT -CTCTCCCTTGTTCCATCCCTCATTTACTTCCGCTTCGATGAACTCGAGGTGGGAGCATTT -GCTCCCATTCGCTTAAATTTCCTCGGATCTGGAGCACTATTAGACGGGCAAACTCACAGA -AAGTTCAAAATGTAAGACATACCTTTCCGGCTCATCAAAGCCCGGTTTCTCGTTCCCTTT -CCGCCCTCCCAGAACCCGCTGCCTGCCTTTCCAATCAGAACAGCTTTGCAAATACCGCAG -TTCTCGGGCCTGTAGATTTGCTCGCGAGAGCAGTAGCCGCATATCATACGATTTGCGTGT -TCGTTAGGGTGGTCGGTTTCCGCGTCGTGGCATCTAGAAAATCAATAAGCATGTGGAAAG -TGGCGGGGAGATGTCAAGTGGGGAATCATACTTATCACAAGGGAAGACCTTTGAACAGCA -GCTGAATCTGTAAGGGGTCAGTATCACCTGTTTGTCGGGTGGTATTAACTCATCGAAGGG -GGACCACATACCTGAACCAGCGGTGACTCTTGGAATAGTGCTGGCAACGACCGCGTCGCG -GGAGTTCTTGTCCTGCGACAATGCCGAGGACTTCTTGAGGCTGAGATAATAGTTAGATAT -TGTCATTTTGATGCGCGGCAGACTGTATGTACGAACCTTTCTTCTGAGGGGGAGGGCTCC -ACGAGTGGTAACTAGATAGTACAGCAATGAGCAAAGGCGTTTCTCCCAGGGTCGACATTG -GGACTTACTTGCTGCACTTCCCACTAAGAGGAATTTTACCTCGGGGATCTTGAAGACTGA -TTTTTGGATATGTTAGTAGCTAAATAATAGACGCTGTGTATTAGACCTGAACCTACCCAT -TTTGCGATGGCACTGGCGGCATATACCCATGGCACTTTCGCCGCGAACTGCAGGTATACC -CGGTGGCGGATGAGGAGTTGAGCACTCAGAACAAGTTGGGATGAAGTCGCTATAGAGAGA -TAGGTCAACACATGGCACTTTAATGACACGAATGAAAGAACCAACCTAAGAAGCATGTCC -CCAATCGTACAGCCACCCATGTCAAGATAGCCAGCACGATTGGCATGAGAATGCATCAAC -TGCTTCCGGAACGCTGCACAACGGAGTCAGTTGCCGAACCCAACAAGGAGCATTGGGCTT -TGTATACTCACCGACGCTCATATTATTAGCGCATTTCCTGCAAGACTCCATTTTCGGCAT -CTGGCCCTTCTCGTCGGTAATTTGGGGCACATGCTTGATGTCGACTGACACCTTACATCT -GTCACATTTGATCGTGATGTTAAGGTTCAAGATTTCCAGTAGCTCAATTCCGTAGAGCTC -TAAGAATGGGAAGGAAAGTGCCACGCCTCGTCCGGGAGCGTTATTATCCACTGATGCGGG -GACAGGTGCACCGCCAtcttcgtcttcttcttcttctGAATACTCGTCTTCCTCGTAGCT -AGACTCGCCTGATGTCCCCTCGGCGCCACTGTTGGGATTAGGGACCGACCACTCAGGTGG -TCGTGGAATGACAACAAGATGCGGCCTGTCTTCATTATCTCGAGTTGAGGACGCAGCAGG -TGCTATTTCAGGCTCCTCCACGTCATTTTCCTCATCCTCTTCAACAGTGAGAGGTACATC -CTCGGCAGGTTCCTGCTCTTTTGGTAAGGGTGTTTCGGCGAATTTATGGATATTGCTCGC -CAGGTAGTTGACCTGAGATACCAGACTCAGCTGCGAAGTCTGTTCTACCCACTGAGTGAA -CCCCACTTCTGTGGCTTTCACTTCAGGCCCTTCGACTCCCTGCAATTTGATCGAGCTGTG -CTCGAGCGGATAGAACTGTGGCACAATCAATTTCACTGTCTTGAGTGCTTGAAGTGTTCG -CGGCAGGCGATCCTTTTTGGTTGGCTGGATAGGAATGACATATGTCCTGCCATCTTGCAG -TTTTTGGTACATCGGCAGTCTTCCTAGGCGCGCCTCGAGCTGTTTTGTCTCGGCTGCTCG -TCTCTTCTCGGCTGTAGCTTTTGCCTCAGCCGTGTATCTAGGTGCTACCGGCCGTGCTTG -TGGTATTGGCTTAGCAGCAGCACTCGCCGTTTTACCACGATCTTGAGACGAGCTTGGCTG -GCTGATGGTTTTCTCCGGGGCCTTGGTTTCTTCTTTCGTTGTGGATCCATCACCAAGGTT -AGCAAAAAACTTCAGTGTTGGCCCACGCTCCAGGGTAGTTAGCAGCGATTCAAGCTTTTT -GTCCAGACTGTTCAGCCAGCCCAAAAGAGTTCCTCGGCCATTGGTGCGGTACGTAAAATC -GACAATTTCATCGAAGCCTCTCACGACGTTTGCTTGATACGCAGCTTCCATTTCAGAATT -GACCACTGCAAGCGTGGGTCGCCCTTTAGGGTAAGAGCTTGGGACATGCAAGACACACTG -TAGACTTTCAAGCTCAAAGGGGAAGTCTGGATCAGAAGGTACCAATCCAAAGGTCAAAGT -GCAGCCCTGGGCGTCATTGACTTCCTTTGGACGGTAACGCCGCCTCAATTGGTTGATCTG -AAACTCTCTCGGATCTGATTGCTCCACTTTAGAAACTGGCCTTTGGGGCTGCACGGGAGC -TGCGGCAGAGCGTTGTTTGGACTGGTTTTCACCGATGTTCAATCTCTCAACCCCAGCGAC -AATGGCTTGTGCCGAACTCTGTTCTTCATTTCCCGAGGTTTGGGTTGATTTCTGCTGCTC -CAACGACGGCTTCTTAGCCTCAACAGCCGAGGCATCATGCTGATACGGGCATGCTTCTCC -AAGTCGGCATCCTTCGACAATGTCAGCATATGACCTTAACAACACATCGCCTTCCTAGCA -AATGACAACCACTGAGAATACATGCGCAAAAGGTTAGGGTATAGTATAGCCATGCAATTT -TGGTCATAGCTCGGTATCATTGACCCTTTGGGTGCACACGGCCGATGCTGTGTACAAGCA -TTCCCAAATGGGCCTGTTCGTGGCACCCGGACCGTATTCAATGACGCGTCGACCGTTGCA -ATGACAAAATAGGCAAAGAGCCCATACAAACAAGAACTCAGAACGCAGACTTACCCTTTT -TGGATTTGAAAAAGCGACATTTCTGTCGCCGAGTCGATGACTGGCGAGCACCACTTGTCT -CATCGATGGATGCAACGCCATGCGACCGGGCCGCTGACAACTCATCAGTTGTCAAAGAGG -ATGGCAACTCTGTACCTGTTTAACAAAGTATCATGAGCGTGTGATCAAGAAGGGTCGTGA -GCCAACGAGTCCGATCCTGGCATATTGGCGTATTAGAGCACCAGGGCCACCCTGCACACC -GACCTCGGAGGAGATAACATGGAAATTGATAGGCTTTGTGGCTGATGCAACATGTGGTGC -ATGGACATGCAACAGGGGGGCTGAAAATACGAAAGCCTGTAGTGGAGGGTCCCGATAATC -CGCTCAAGTACGGAGTAGATCTTATGGAATTGATTGGGACAAAAGCCTCCGGCTAGATGG -GGGTTTACGCGCCAATAATAGAATCATGCTTGACTCACCTTGGACCGGGGGTGTTGGACC -CGCATTGTCATCAAATGACCTGTTCTCCATACTGAAATTGAGACTTGAGCTACTCTTCGA -AAGATCCAGAATGGATGAGTTCTGAGCGACTTGGGAGGTGACGCACACTGCCAACGCAGA -GATCGGGTGACCGTGAGGAGATCGACAGGACCAGATCTGCGCAGTAAGTAAGCCGAGATG -TGCGACCGTCTGAACTGTACGGACTACTCCGTAGTGATCTTTGAAGTCACGGCGCTAAGC -AAAAACAAAAACACGCGGGAGAAAAATGGCAAAAAAAGATACAGCCACGTGATACCGAAG -TAACAAACGTTAAAGTAGACTGCAAGTAATCGGTGCTATATCCTAAAAGTCTGAGAATAT -GAAGCTCTACAAGAGGTTATAGTTGTTTACGGTTTTGCTGGCAGTTGAATGCCAAGACTT -GACGGAACCGATGACCTCATTCGCCTTTGTTGACTCACACACTACTTTGCTCCAGTGATT -TCCAGTCACAATTGTTAGTCACAATCGATTTTGAGGGTTCTAGTCAAAGACATGGAATGT -TCGTAGGGGTTCTAATATGGAGCTGGACTGTAGATCTCCCCTGGCGCTCCGTCCAACTCC -ACACATATTGCCTATATTTTATATTATACGTTGTACAACGTCCAGGGGTACAAGTTCAGA -CTTGCTGTATTAATAATGACAAGCACTGCACAGTCGTCACATTTAATATCATCTCGACTA -TACCCTTCGTCATTTCACGTCCAAATCGCAATGCAATGCTGTGGCGTTCAGGATTCAAGC -ACACCATCTGCTCGAAGCCACAACGGGCGGCAAAACCGAAACCATTCAGTGCCAGCATTA -ATTGGCAATAATTTACCTACACATCGCCAATATGCATACAGATACACGaaccaaaagcca -acagaaaaagcaaaaggaaaacaaGATTCCAAATTACAATTTGCCAGCAGAGCATATACT -CGGAGCGCGGCACGCGCCTTATCTCCCTCATCCATGCCGGGGGATATCCCTATTAACAAG -GAGCCAACCCCGAGCAGCGACGCCATAATAATATAAGAGGAAATGTGCACCTTCCACAAG -AGTAATACGACCCGACGGGGCGCAATAAGCTCAGGGCAATATACAAGATACTCAACAACC -AGTCACGGACAGCGGTCGGTCCGCGCTGGGAGAAGATGAGGGGTTCGCTTTGGAGGGTTT -CTAGAACTCCATCCCTGCGCGATAGGTGCGCCCTTGCCAGGGTACACATGAGCCGGGATG -CAACATCGATCCACTCGACCTAACGGCTATCTCGATTTCCCTTTGGGACTGATGGTTTGG -GTATATTATAGTCATATTTTGAACTCTACCGAACTTTGAATACCCACACAGCGGTCCATA -TCGGATGAGGATTTTCGTTTTTACCCCGACATTTAGGATAAATTACAACATAAGTAAGTG -AGAGAGAATCCAGAAGTAGGTAGTGGTACTAAGTGTGAACATTGAATACGACAAAGGCAC -ATTGTGACTATTACAGATAGAGACCTCGCAATCTAAAAGAAACACTGAAATCCCACACAA -CAGCTGTGGCTGCTAATGGTTAAAAATGATTGAAGGAAACGGCTTTAGCGAAGCTATCTG -GATGTCACATGTACAGTGGGACCTTTGATGTTGTATACCCTGTGCATAACCCCGTCTGTG -GCTGCCGCTAGTAGGACACCGGTATCCAGTCCAAGTCCTAAGAAAGTTACAAGATATAAT -TGCGGTTTAGATTATGCGATTCCACATGGGTGTCTCGATTGCCAGGGGTCTTCCAGCCGA -GCTGTGATTTCAAGATTTCAAAGCTGTTATAAAGAAGTAAGATGTGTGAAGGGGCCGGAG -TCCGCATCAGGGTACCTCCGTACGGACTACTGCAAGTAGATTTGAGCCTCGGTCATTGTA -TTGTACAGGAGATGTATGGGAGCTCCGCTCCGCGGTACTGAGCTTTCCGCTGTATATACT -GTAATGTAAGACCAAAATACAGTACAGTAAATAAAACACCCAAAGCGGAAAGGCGTGAAT -CGGACCTGGAACTCACCGGCCAACCGGATCATCCTTCCAGAAGCCTTCCAGAATCTCGCA -CGGTCTTCCGTGGATGTTGGCGCGTTTGACCATGGGCATAGCTTGCAGTTCGATGGTTTG -AGCACTATATCGGCATTTCTATTTCGTCTAAATTTCAATTTGCCTTGTAATATGTAGTGA -ACAGTACTACTAATCAAACTTTGAATTCTCATCTGACAGCAATGAGGATACTGGCAGCTT -GCGGGGTTCGGAGTACAGAGCTCCTGCGAGAGCTGATACACGTGACTAATATCCACCGGG -AGGCCTAGAATAATAAATTGACCAATAAACAGCGCCGTAATAGGCTTGGGTTTCGCCGGT -TTAATGCCATTGGGCGATTTTCTTTTTGGCCCAAGGAAAACGAATGCGGAAGTTCAAGCG -GGGGCCAATCCCGAGCGCCTCATTCGGGATTTTGCCGAACAAAAATAGTCAAGGTGATGC -ACTCACTCGGAACACCCACGAAACGCGGGATGTCGTGCATGGCAAACTTTGTGAAGTCAT -TCGAGTCGGTATGTGAATATCTGTCTTCGTTCAGGACTAATTCTGAAATAAGTGAGATAT -TCCCCTGAATTGAAGGAATATCAAAAAGGCAGTAAAGCGTGGCCTAACTTTAAAATAACT -CTGCATTAAAACAAGATCGGCTCTGTTCTAGAGCTTGAAAGGCCTAGACAGGGGTCGTAT -GGAGGTGATGGTTGGCCGATCACTTCGTAGTGGGGGACGATCAAGAAGAAGTCAACGAAG -ATTTTTGCGCGGCTAGGATTCGCTTCAATCATCGAAGATCCAAGACAGGAAACCAAGCTG -CATGCAAAGCGTCTCAATCTTTACCCAAAAGCACCCTCCATTTCTAGCTCAAGGGCAATA -TATCCTTGTGCTTCGTAGCTTTTTTGAAGATGCTGCACGCTAAATGCTGCATCGGCTTGC -CGATTGCACGTCGGGCGCACATCACCAACCCGTGGAGCCAGTTAGGCGTCAATCTTGTAT -CAAGAAGGCGATATCCACTTTTTCTTAAGCACGCCTCAAACAACCTTGATCGTGGGGAGG -TCCGTGGGGGGGTTGTACGCGATACCTATAACATAACGATGTGTGTGAAGTCCCAAATAG -GAAATGCGGAGCCGAAGCTGTCACACACGTACAGCGTCAGTTAGAAACCTTTCAGTACCG -GTGAGACGCACGAAAAAGGTGCCGCGAATCATGTCTGTTTCTGGGTTGTGAGTGACCCAC -TGCGGTATCCACCTTTGAGCCTGGTGGTAGCGGAGATCAAGAAGACTCATCTGGCCGGAA -AAGAGATTTAGCCCTAACCGGGCAGGTGGGTTCTGGAGATGGAGGCTGGAATGTTCACAC -AGGGGGTATGTATAGAATTCGGTGATGTCTCAAGAAGCTTTTGTATCATCTTAGCTGTTT -CGCTGTTATTGGACCGCAACTGAAGAAAGAGGCTTGTGATATCAAAGTATACGATCTGAG -TTGGAGTGTGCCAAGGGGCTTTTCCTAAGTACTTTCCTCAATCTGACATGAGGTCTGTTA -TACCCATGTTAGCTTTGTTTGGCTCTCCGATCTTTGTTTCTCTAGCATCCTATGATCCTT -GCTAGAGCCCCACGACTCGGGTATGACCGGAAAGACTGGAGAGGTTCGTAGGATAGAAAT -CGTGTCGGGCGTTTTAGGAGGAAAAAGTCTTCATCCCCACAGTTCAAAATTTGGAGAGGA -AGGTTGTGAATACGTCATTCGCTGAGACCTGTCCTGAACCGTACGAGTCCCACATTCTCA -GGGCTCTTAGATAATATTCGACATCATGTATACTGGTATCATTCAAAGATGAGATGAGAC -GTAAGAAGATGATCGAAGATAGAAGACGAGCAATATAGAAATGGGTCTCGGGCATCAAAT -CCTAGAGGCTCCACCCCCGGATCATGTGCGCCGGTCATGTGCAGTTGCCACATGACTACT -GTGGACACCCAAACTGATAGACCCCGGCGCTACGATTTATCTTGTATAAAAACTCCTATG -GCATTGCAACCCTGACACATCGCTTAATGGAAATAAGCACAACTCCAAATCAGCATATGA -AACGACAGTTATGCCGATGACATCAGCGTGATACTTTTTCTTCGGTGCAGTTAGTCAAGA -TCTGCACGCCAAATCCTCATCCTTCAGGCAACTACCACTATAAGGAGATGGCAAAATGTA -AGCGAGGGCAAAATCATGATGAAAACATGCATAACCCAAGTTAGATCCATGTAAGAATTA -CTCATAGTGTCATAATGAGGCCCCCTAACGGAACATGATTCCCTTCCTACAACTTCCCTT -AGCAGTAAACAATAGTGTTCACACCGTACAAAGTATTCAGCACCAGTAGCACAATCCCAA -TCTAAATAAGCCTCGTAGATTCAAATCAAGATCAAGGAAGATACAAATCTCAAAGTACCT -ATGGGCCTAGAACTAATTTTGTCAGCCACCCAGCGCAGGCCCAACCCAATCACCCTTCCC -CTCTCATATAATTCCACCCACCCCGAGATCTCCGATGAGCTGGCCCTGCAAATCATGCAA -GGCGAGAAAGCCACCGCGAACTGAGCACTTATCTCGTCAAGTCGATGGCAAGCCCACCAC -CACCCGAGTTCCGCGGGAGCTCTAACATAAACTAATGTGAAGATGTCATGATTTCCTGTC -TCGCATGTGCCAGCGACCAGTGCGAAGCCGAATGGAGAAGCTGGCGCCGAGGAAGGATCT -GGTCGATCTGATCGGTTTTGGGAGTAATTATGTAAACTCTGCTCAAAGATTCCGGTTTAT -ACGGAGGTATAAAGTGAATTCGTAAGATATCACAGCATCACGACCTTTGAAGCTTGGAAT -GATATGACACATGCATATATGGTCCGATTATAATTAGAAATCAATCCGAGTTGCTGGTTT -GTGTTCTAGATCTGCGCGAACTGTTCTGTGGGAGAGGTACTTTTGACATCTCTTCCAACA -TTCCGAACGGGGACCCCGGGATATTGGGTGGGCGGCTTTTAGATTGGCCTCCCCTGTATC -TCTTTTTTTTTTCGCTTCCCACATGACCTATATTATTTTTGGCGCCCTGGGATCTGCGGA -GATTACCTCTGGAGCGCCCATTCGTGAACTCATAAGTGAACTCTTCTCATATACCTGTCT -AGGCAGGTGGTACTGTATCGGGAAATTCGATGCGATGATTCAATTCATATAGTGTAACTG -AATCGTTTCTTTTGTTGATACCCCGGGAGTGGTTGTAGATCAACATAGATTTCCTAGGTT -ATGGGGAGTACAACATACAACAATTCTAGATGGATCTATAAGGCTTTAATGCAAGTACGC -ATGTATGGAGTCAAGAAATGTTTAACCCGTGCAGTTAGGCGATGTAATCAAAATGTATCA -TGACCTCTTGCATAGAAAGTACATGCAGAAAGCTATCAGCCAGAAGACTCCTCACCTCTA -CGTGTCAGCGCCACAAAGTCAACACTGAATTTCCACATGATTGATCTGGCGCCGTTATCG -CAGACCTCTGCTATCCTCCGTGTGCTGCAGTAGAGCTATAGCTACCTAGCGCACTACAAC -GTAAATCCAGACCGCCAATTCAGGCCTACGCATGTCTTTCTATTTCTGATATCTGAGCCT -GCAGTCAAGGAAATACGATATGCATGTGAGGGCAGGTATTCTCTGATTTCATTCATTGGA -ATTTTTGTTTTTTTTTTAGCTGTCATCAGGTAGCGTACAAGTGGTGGCCCGGTCGTGAAG -ATCGGTGCCGGATGAGAATTGCAAGACAAACGTGTGGTATCAGGTATCGCAAAGAAAGAA -AAAGAACATAACTATCTCTCCGATGATAATCGGGAGTAGTAAAACAAATGCTAGGCGTTT -CTGCAGTCAAAAGACATGCAGTAAGAGACGCAGCGCTGAGATAGATGATAAAATAAACCC -ATTTTTTTGGTGAACTTTCCGGGCATGAAAACCCCCCATCCGGAAATATCTTGCAAAGCA -GATGAAGAAAAAGGAATACCCATGTGGCATCAACCGTTGGTGACCCGAAAGAAGCACGAG -AGATATGAGCTGTACATGGATATTAATGTCATAAGAAAATAGCCCTCGAAATGGGTGTTC -CATAATCTCCTTGCAGAACAAACGCCCAGTGAATGCTGTGATGAACACCGCGGCCCCCGG -AGCCGATGAGATCATTGGAAAAGAAGAAAGAAAAAAAATTCCAGATGTGCGCCATATATG -CAATGCATGATCGTGATCGTACGTGACAAGGAAAAGCTGTGCTGTGTTGCAAAGCCAGTG -TGAAAATTCAAATACGCGAAGCTGAATTGAATCGTAGGCGAATCAACTACCATCCTTTCA -CATGGTATTCCCATGGACCGTAGTCGGTAGTGTAGTTGGTCCATGAGCCATTCTTGCCCA -GGTCGGAGTTAGCGGCGTTGGGGTCTGCTTCAACACTAGCCTGGTTTGGACCAACTCCCT -TGTGTGGTCCCTTGGGGCCAGGGTACTCCTGGAGGTTCTTCATGTCGTGAGCGGTGTGGC -GACGCTTGAGGAAGGCGTTCGCAGTCGCGCTCTTCTGCTGGCGTTCCCAGAGAAGATGTC -GGCGGAGAGACTCAGTCAATTCAGTAGCGAGCATGTTACGACGGGTAGTTCGTGGAGAGT -GTGCCATAGAATGTGAGCCGTTCGTGTTCATGACAATAGGCTTCGATCGGGGAACGTCAG -GCCCACGCATGACCAAGCTTTCCTCTTCCTTACTAGGAGGGGACTGGGGAATAGATGGAC -CGCTGGGTGAGGTCAACCGCGATCGTTGAAGTGCGGGACTAGACCGGAATGCGTTTCCTT -GCATGCGGGCCGGCTGGTGCATCATCATGGTGAGCATCGAGCGGCGGGAAACGAGATTGG -GCCGGGAATCGACGCGTTGGAACATCTCTCCTCGTTCGTCAACACTGGACTCGCCACCTT -CCGTGATGGAATCCTCCCAATCAGAATCTTCATCATCCTCAATCGCGCTCTCCGAGACTT -CATCATCTGTCTCAATGGCATCCTCATCGGGTAGATTCATGTCCTTCGAGTTCGGAATGA -TGTGGGATGAGACCTGATTGCTGAAGGAAGCAGTTTTCTTAGAGTTCGTGCTGTTGTTTT -TCGGCTTCAACCCTCCACCGGTCGCATTGTCCTGGGCTGCCTGTGGACCCATGCGCTCTT -CGAATGAGCTTTCATCGTCATCTCCAGAGGATCCGCCCAATGTAAACATGCCTCCCTTCT -TCTTCAAGGGCGATGGCCTCAGCTGTGTAGTAGAACGGGAAGGACTAGGGTCGATAGACA -GTCTTGGTTGCGATCTGAATGAAGATGAGATGTGTGATGGGGAGAATCCACGAACGATAC -TGGTAGATTTGACCAATTCGGACCGGCTGGGAAGAATACCAGAAGAAGAGACGCTGGTAT -CGGATGCGTTCACCTGTGCGGTATCGCTCTCGTTGCCCTCGGGGGCAGTTGTTGAGCATG -ACTCAGTCGACTCATGAGGATATGGGGTCGGTCGGACGGAGAAGTGAGGGACATTTCGAG -CGGTAGAAACAGAGGGTggagtgggagtgggggtggatgggcgaggagtgATGTCGACAA -CCGGGGCAGGAGCCTCGACGGGATGAGACATCGGCTCTAGTTGCTTCTTTTCCTTAATGT -TGAGAACCATCCGCTCCAAATCTAACGAGGTGATGTGCTTCTCCTTGCCACGGCCGAGGC -TGGCTAGTGAGTCCTCGCGGACAACCGCTGGTCGATAGTCGGAAAACTTGGGACGCTTAA -TATGAGCTTCGATCCGCTCGGCTTGGTCAGACGCGGCCGACTCGACACTGGCGGAAAGTT -CAGGCAAATCTCCGGCTTCAGATCGTAGCAATGGGAGGGCAGAATTGCGGTTAGAATTTT -CAGGTTCGACACAGAAGGTTTCGCGGGTCCACAACCGCCAACTGAGGTTTTCAAGTCGAC -GGCCCTGGTCCATGTAGTCGGCACATTTCGAAAAGACTGCCCGTTCAAGTCAGCAAAACA -ATTAATGGACCAACAGAGTTGTTACTTACCCATCCACATTCCATGAAGACTTTGAGCGTT -GGCGGTATCCACCTTGTGAATATTGTCCGCGTCCACCGTGAGAACCGGTGTCGTTAAAGG -CATTCTTGCGACGCCACAACTAGCAGTGGAGCGATTGTAATTATGAGAAATCGTCGAGTG -ATGTTCCGGGGTCGCCGTCTCCCCCGATAGTGGTCGTACGGCGCTTGAGATGCGGTGGTT -AAGGTTCGTAAAAATCAACCGATTTCTGGAATGGGCCTTGGAATTTGAGTCTGGGCCTTG -CAGTTACGCTGCAAAGAGTTGATCAATCGGTTTGAGCGTCAAATGTGCAATCGAATGCTC -AAGAGAAAGCAGGTTGCGTCAGGATTTGGATCAGGTGAAGTGAAGAATCGGGTCTCAGAT -GTCCGTCAATTTTCCGTCAGAGCGCCACAGTGCGGAGATTCCGGGGCGAAGAGTCGGAGA -GACGGAGCGGGTAGTGAAAGAGCGGATTGAGATGATCCTTCGAGATTGAAAAGGGGGATT -GTAGGCTAAGAGTAAGGTGAATAGAAAAATCTGGAGAAGGAAAAGAGAAAGGGAGTCGGA -ACGATAATAAGAATGACGGGGGGCGGCACTTGGTGGAGTGGAGTGGAAGGAGAGGGGGAC -GGTAATAAATTattcaaaattgaaaaaattggaaaatgaaaattgaaaattgaaTTCTTT -TTTTAAAAAATTATGAATTTTCGAAATTATTGGTAAGAACATGACAATACAAGGGGAAAT -TGAAAAAATAAAAAATGAAAAATTCCGAAATAACTCCAGGCCGAGGATAAGGGTCGGTGA -AGGGGGAAAATTCTATCGCATCACGGGGCCCTCAATTACATCGATACCATGTCCATGTAT -CGAGATCCTCCCTACCATGGTTCCCACGGTATTTGCCCAGTACTTTTGGCCTATTCTTTC -CCTCTCCGACCTCGGGAGTTCCTCCCTAAAATACCAAAATACTACGGTAATGTTACTTTC -CGTTTTGATTTACTGATTACCTAAATATCAGCCCTGGAGCTCTCCTCGAGATGTCCTAGT -TTCTTACATTTCAAAGAGCTTGGACATTCATAAAGAATGAGAGATATCCAAAGCCTGTGA -CCTGGGCAAAAAAAGGATAGAAAGAAAAATGCTAAAAGTGTACGGAGTACGGAGTACTGA -TCGAAAGACAATCTCTAATGCGGTAGTTGCAGTGACTTGCCGGAAGTCCAGACCAGGGCT -CTCTCTACGCCACCTAATGTATCTTGATGGTCGTTTGGACATTTGGCATTTAGGCACTAC -CTCTTGCCACTGACAATTTACTGTGCAAGAGACTGTGTAAGAAAGGTCTACTGACTGATG -AGCTGTCAAACTGCTAGCTATTGGTTCTGACAGAACTGAAGGTCCTGCTGACTACTAGCG -AAACTTAAGTCGCATTTAAGCTATTTTCAGCTATTTTCGCCACGGACACTCTATACTCGG -AATATGGGGTACACCGCGTCTAAAGGGCTAGGAATCGGTCTAAGATCTTAAGCCACGTTG -TGATCATAGATATGAAACTTGGGGCTCGTTTATTCTCAAGTCTATACCTGGATCAAGTGT -GACATTATAGAGGTCGAAACACACAAATAATTCCAAAATCATACACTTCCCCGTAAGCAC -CTCATTCCTTTGGTTTCTTCTGTCCCGGCTGATTCATGTAGTTTTCCCCAGTTCTTTACC -CCCCGCAAGCTATTAGCCGGGAGAAAGGGATGGTTCCAAGGCTGCATGCCGGGGAACAAA -CGTACCTATTCCATAGTAGATCGTGTTCTCTTTTAGATGCATTGCTTTGTATGAAAACAG -GTAAATTCTACACACTTTCATCGGTGAGAATTTAGAGTGATATTTTGGTACATCACTTCA -GGCTTCAATCATGTGGATAGGGAAAAAAAAAGAAATGGTAAGCCACACAGCTTCACTCAA -AGCAGCAAGGCAGCCTTACCGCCCTTTTTCTTTCTCTTTTCTTATCTTCCGAACGGACAC -ATCCTGGCCAGATTTTCTATCTTTGATTCCAGGACACGACAGTCGACAGTGTCACAAACC -CAGGCTACATAGTCACGAGTCTGCCAGCTGTCATAGTGCTCGGTTTCTCTTGCTTTTCCT -AAAAAGAGATGGGATTGACAGATGACGGTCAACATGAGGAAGGCATCAATACGTAAGAAG -CCAGTTTAGAGATGGCCTTTTTTTTTTCTGTCACGGGGGTTTGAACGCTTACAGTAACCC -TTGGGCTCAGGCTCGGACTGGCGGAAAGGTACTGTGTCTTTCCCTACTACATCATTCCAA -ACCTCTAACCGGGCGCCTGGCAGGCCGATCCTCCAGGACACGACCCCTGCCAGGGCAAAG -TCACACCAGGCGCACGTTTCATCTATCATGTGATATTGCTCCATGAGATCCCTGGCTATA -GGAGCAAATGGCTACGATTTACCTAAGTAAATTGGAGCTGACCAATATATATTCAAGTCT -GTTATTCCCTGTCAGCATTTGACGTGGAAGTTTGCAAGCTGTCATGATGACTATCCGAGT -CTTCAAAAGTGTAGCCAGAGGCCCCTGTTAGATTTGCGGGGCTTCGGATGCAAAAGGCTT -CGGATGCAAAAGGCTACAGATCAACATGTCGGAAAAGCTGGTGTATGAAGGAAGGCCGTT -TGATATCTCATTTCCCTCGTAGGTCTCAGTTGAAAAAGTATTAACCAATCAACCACTGTG -TATCTTCTTTGACCGCAAAAGATATACCTAAGTCGTCTGATTCGTTCAAAACAGTTCGTT -GGTAACAGACGTATTCCTGTGGTTGAATGACACTACATAATCCTCTACAGGATGCAAAGC -ATTGTCTGGCCGCTTGTGCTGCCTGAGTTTTTCATAGCTGCTGACCCCCCTGTCTGCAAA -GCTTGCGCAACAATATCAATGACACATGGAAGTCTTCTCGGTGAGCAATCTGATGAGAGG -AAAATGATAACTCAACAAAAAACTCATCTCTCTTTACTCATCAATTCATCTACTTGTCTA -CATGTCCAGCTCCTGTATTACTGACTAGCATTCGGGTTGTACCAGGTGGCAATATGAAAT -TGCACATGGGATGCCTCATTTAATTAGGCATCCTGCATAACGATTAAGTTAAGGCTGCGG -CTGCCAATAGATCGGGCCTGGGGAATGGACAGTCACCCCCCTTCAACCAAGGCATGGGGG -ACAAAATCCCATGATACCGGGCCGAGGTCCTACCAAGCGACAGAAAGGATTCCTAGGTAA -ATGGAGACTGAACTTAGGGTGGGTCACTAGTATACTATGCGCTGTACAGAACGATTGCCA -CGAGGATGATCACGGGCAGACCGAGCCGCCAGAGAGGAATTTGACCGAGCCTGGGGCTGT -GGCAGATGACAGCTGATGTAGACGCAAATCATGATGAGGAGCCCCGGAGCGCGCTTCAGG -TTACAATAGCTCTGTGTGATGCCGATAGGCTAAAATAAGGCGCTGGGGTATTGGTACCCA -CAGTGTTATGCAGTACAAGTGGTGTATCCAGGTAGAATGGACTCCATTTTGTCTAATGTG -AGCAATAAGCCGGGCAGCTCATAAAGTGGCAAGGTCACGCGCAAGTCATGTCACATGATG -TGGGGGTTCCAGGAACAGCGCGCCACCTCAGGGGCTCGTGACTTCCCCGATTTGACTGCT -GAAATCTCCCACTGGATGGATTTGGGCTATGTTGGCTCCTTCTCAATCACCTGAGTCCAT -GTGACACGGAAATACATCGGCTCAATCCCCATTTTCTCGTCTCGTTTTGGTCAATTCCGT -TTCTGCTTACTTATTGGACCGCCAATAGCTGCTCGGTTGCCTTAGCTTATTGATATGCAC -GGCTTGAGATGACAGCTACATTTCCGACCCAATGATGTGAGCTATTAACACAAAAGCATA -TACCAGCTTCTTGAATCTGATCCACACGGATTTTGAAGCGTCAGGGGTATCCAGGTCCTC -TCTCGGGCGAGTATCACTGTCATACAGCATATTCGTTCATCTCCTATCGTAGCATATACC -CCATGTAATATTTCGCGACTCATGCATGGGATTCAACACCATACGCCGGGTCGATCGACA -AGATGACAAATGCTTCTATCTATTCAGAGTGCACCTATGTAATGTGGCTGGAAGTAAAAA -AGGAGCTCAGGGATCCAACGGCTGATAATCCATAAAAGAATATGGAAAGCCTGTAATTTG -GAATCGACAGACGAAGCTTCAATGTGTCACCGTTCCTTAATCATCGCGCTTCGCGTCAAC -AAACCAACGGCCAGAGCTATCCCTCTTTCAATGTGACGGGGCGCGACGTTAGTGATTGGC -ACTATCTTGACAGCCAAACAGGTGGCATATAACGGTTGTACGAGGGTGATGTGTATCTTG -AATAATTCACCCTGTACTCTGATTCATCCTCACGATCTGAGCACAGCTCAGGGGGATCGC -TCATTTTTTTTTCCACCGTGTCGACCTTGTGACTCTTTGATTTAGATGTTGATATAGATC -CAAAATTGAGTGTTGATGGGAGTTGATAGGGCAGCGACCTCTAGGCGTCCCCAGTTCTCT -TTGTATCACAACAACCAAGCAGCGAATCATATTCCTCTATTTTCCCACTCTATTTGTAGC -CCGCTTCCCCGGCATGAATGTCTGTGCTCAGATCAGTTAGTGTTTACTAGTGTCTGATGT -GGTCTAGTCCATGATTAGTTGATCTAAAAAAGGGGGATTGATCAGGTATCAGGGATTTTT -TGAGAATAGGTCTCGAAGAGGTCTATCAGTTTTGAGGTTTAAGAAATGACTGGTACGAGC -AAGGGATACTTGCGGGCTACGGGAGTCGAAGCCGGGCATAGCTTTCTAGGTTCGTAGAGT -CAGGTGGACAATTGATTCCATGAATTCAATCTAGAAGCATTCATACCAAGCTAGGCGGAT -CAGTGTCCAGTGGAGATATCCTTCTACTATCAGGTACGGAGCACCCTGAGTACGTAATGA -TGCGTTCGTGTCCTTTTGTTCCTTTCCCTACTCTGCAAAGATCGACTTGCTGGATAATGC -ATGTGACCTTCAACAAGGGATATCCGGGGAATCTTCCAGATCGGAACAAAGCTTACTGTA -TCCGCGAGGCTGGTAGGCCCCGTGAAACTGGCTAAGTCGAAGATGGGATGTGGGTTCCTG -GATTCTGCGATCCAGCCGGAAGCAGGACTTGCATTGCCGCGACGATCATTTTCTTCCGAT -CGTCTTTGGCTTCTTTCATACATTTCGCTCTGTACAGGCAGTCGACTTCTAAACATCATG -TTCTACTGGAACAGATGACACCCACTTGATTCTATCCTAGACGAAGATCGCTCTGGACTT -GGGTGTTGTGTCTTTGTTTTTGGTTTATAACTCTCGCCCTTCAATATCGCGGTCTCATCT -AGAACTCATTACATTAGATCGGTTCAGGTGGTCATGTGTGCAATTCCTGTTACTCGTTCT -ACCGTTACAACGCAGCGAAAGCAGAAAAGGACATCTGTGCATACCGTCGAAAGATCTAGC -CCATCATATCTAATGTGCGGCCTTTGGGCCCGGATCGTCGACAGCTCCCTTGTTTGGAGG -TGTGGCCCAATAATACATCCGCCCTGTACTTTCTGCAGATCATATTTCCAGGTATTTGGT -AGCCCTGGCATATCTCCTCCCATTTTTTCAACGTTATGGACCCACTTCCCTTGATATTGA -ACCTTATGTACTCCGTAGGGTTCTATATTCTGCTTGATACAGCAAAACAAGTATTGGGGC -CCTATGCTTCATTCGCCACCTCGTTATTGCATATTGCTTCTGGGGAAGCACCCTGCTAGG -GAAGCCGATAAAGAAATACATATTGAATTTTGGTTAAAGTCTCATTCACTTTCTTTACTC -GTTCGGGCTGTCTGGTATTTGAGGTGGTGAATAACAACCCCTGTCACAGCCATGATTATG -TTCAAATTGTGGATTTCTAGACCTTTGATTCGAGTTCTTTAATTAACTCTTTCTGGACAA -AGTATGCTTGTGAAAAACCAATTGTAAATACGAAGCCCAGGTCATTCATTTTCCCAAAGA -GGTCGTAATTGGGTTTTCTTGGTGCGATGGTCGGAGATACGAAGGAATCAAAGACAAGGG -ACTCCAGAGATAGTTCAATTTACAAAACCAACTGACTAAAAGCCTAGACTCATCTACCTT -TGGAACACTTGGAACCCCTATGGTCCTTTGGTACTCTGCCATTGCGCAAACAACTGGATA -ATATACGTGCTGATCCTAGGCTGTTTAACCAATGATACCCCACAATACAAGCTGACTGCA -TGGGGCCATGGGCATCCGGTGGCCATTTAGGGCGGCTATCCCCTGACAGCTCTCTTTGCT -TTTCTCCCTTGACCAGAACGGCTGTAGCGGCTGTTTACAAAGTCTTCTCTTATCACCGCC -GGTGTGGTGTCTCTCGGCGTGTGATTGTGGCGGACGATGGGCTGTGATGGCTGTGTTTGA -TTGCTTGTTTTTGCTGCGATGTAGGGGTTTATCAGCATGTAGTCTGGGCCGTCAAGAGTG -GAATCCCTGATTGATAACAAGCCTCTTCTGTGAAGTCGTCACTGTGGCTTCTTTGATACC -GCGCCACGCGATGGCTTTATCATCGATTCATCTCTTTGATTGATTGCGCTGGCACCCTGT -CACTGCTGATTGTGGACGGTGACCGGGTATTGACTTCAAAGCCCTGCAGTTCATTTTTGA -TGCTCTGTGTTTTCTATGCAGTTGATCACCTGTCAGTGCGTTAGGCTCTGGATATGGAGT -CTCCTGCAAGCAGGCTAATTCATCCGGGTTTGGCACCTGCTTCCCCGCGAGCCGGTGTTG -GTCGCATAGCATTTAGAGTTTGGCTGAAAACAACGCCTGTTACGTACATATAATCAGACA -TGTTTGTGCGGGCTTGAGGGGCGAGCTCGAAATTTTGCGATAGATTTGCAAATATACATG -GTATCAAACCCGGCTTTTGGATTTCTGTCTCAAAGTTTTGTTGAGTAGACATGTAGACTC -CAAATACAATCTTTCACCCTAAAGAAAAAAGGTCACAACTTTTCGGATCAAAATCCGGGG -AAGCAACATGGAGGACCCCGGACCCCGGACCCCGGGCCGCTTTCGGCCAATCTGACGCGA -ATAGATTACAAAGAGATGAAACACGTGCAGATGTTCTCAAATAGTTATTGTATCGGAATG -ATACGTGCCTTGTACCAAATTTTCATACTTCAGTGGGCCGGAAACAGATACAGATTGGTG -AGATCAATGCCCAAGACGAGTTGTCACAAACCTCGACCACTGGGTTTGTGACGGGGCAAA -GCCACGTCAATCCATGCCCATGTGAACAAGCTATGTAATTCAAATTCTAACGATGAGGCT -GACGCGATCATATCCTCCGGTCCGAGTTCCCTTAGAGTCAACAGTTGACACCCAGATTGA -CCAACATATTGATTCTGACCGCATTATGCTCAATGGTTCATCATGTTCCACATGGAGCCG -GGTCTGCCACTACCAGGTCATCTGAGCGGCAGAAATAGTATCTCCGGTACAAGTTTTCTC -CAAGCATAGCGCGGCTATGTCGACTACCGTAAAAACAGACACACACACACAAGCCATCGT -CTGGTCTCCTACAGGGCTATACAGCTTAGCTCCGTCTTATTGAGAATGCAGGGCATATAA -TATAGGACTCGCGACCAGCAGCACACCCTTGATTCAGCCCTCCCCACGTAAAATCTGCCC -GATAATCAAAAGCAAAAGAAGAATGGCCAGTGGAACCAGTCTAAATCCGTCAGTTTCACA -GATTACAATCTCCCCCCGGCCACGAAAATGGGCGCCAGATTGGGGCTTTTTCTCGTACCT -GTCAGTGGTGTCACACCGCAGCGATGGATTTGCATTCGGGCCGATGCGGAAGCTCGAACT -AGCCCATGTACACAAGTACCTACTCTGGCTAGACAGGGTACGCTCTGCCCCGGCGTACTC -ACATACGGTTATGCTGGAGCTGATGTCCGTCTCCTGTAGTGGCGGCTGTCGCCCATTTTT -CAATTTTCCAATGATGACGCTTTGCAACGCGACCCGCTGCAGGTCATGGGGGACTTTAAA -CTGATCTGCCGCGGGATGCAATGCATCTTGGAATCGTGTGCTTTGTATAGGTATGCTCGA -TCCAGTTGGGTTAATGTGCCCACATTCTAAAATCTTTATCCTGGCATGGCATTTACTATT -CCCTCGTTTGCTTTGGAATTGTGCTGCAATGCAGTATCTCAAGTTCGACGGCTTTCTGGC -GTAGCTGATACTTCGAAATACGACGGATGGCTGTTGCCTTCTCTATGTCTGGATATACAA -TTGCGGTCTGGCTAACGATATTTCATTGACTCGTCGGGTCTTTTCTCATCTAGCCAATTT -GACCGCCATTTTGACTGCCCAGCAATGTGAACTCGCACGTACAAGGTAGACATGTCGCTG -ACACTTCAGATTATGCTAGGCAGCTTCCTATGTCCAAACGCTCCCAGCATCAACAGCTAC -ACGTGAGAATAAGCAGAGAGAAAGAACTAGTCTAGGCCTGTCTCGCCTACGACAGGTTTG -GGTGTTCATCGTATCACTCTATGTACTATTCACAAGCCCTTATAAGTATGAGACACTTGC -TCTACTTTAAAGCGATTGTCTTTTGGACTCGATATATGTCACTCTAAAACACCTATAATA -TGTAAATCCCTCATACATCCCATAGAGAAAGTCTAGCGCTACTGTAAGAACCACTTTCTT -GTCGTCTCTCTATCGGTCTTTTTTCAACATCCTTTCCAGTAGGATTACAGAATATAATGG -CTAACAACCCCTAATCTTGTTAGTAGGGAACCTTCGATATTATGTATGTCTAGAGGTTTT -GAAGACTACAGatatatagatatatatataGACCCCAAATCCTGCCGTACTCGCCTTCAG -TCAAGCTATAGTAAATGGCAGAATCCTGAATTGTGTGGTCCACGAGTCGCGGGGAACTGG -GGTCGTACTTCCATGCATTTGTGAAACCCGGGAATCAACACCAAAATCCCTGCACAAGGC -TCAGAGCTTTATCACACCCATTCAAAAGGGAGACGGGAAAAAGAAAACATGGTGCTATCT -TTACATCGACACGCTGGAAGGTCCAGGAAATAATAATGACTCCTTTTTGATGGTTACATC -TTTGTAGAGCTAGAAATATATCTAACGGAGATTGAGACTTAGTACGCGGCCTTGACCGTG -AAATTCCGATTCGGACGCCCTAGCGAAGGACGCTCTGATGGTTGTCCAAGCTATGGTCAT -TATCGGGTTGATGTTCAATCCACAATTCCAGATTGTCCTATAGGCCGGCACTTTGTTGCT -CGCGGTAGTTCTCCGGGGTCACGACGATCGATGCGACAGAACCCAGCAGAACATATTTGG -GATAATGAGATGGTTCTGAATCTGTATAAAGAGTACCGAAACGTCACTCCCGGTATCAAT -AGCGTAGATCGTTACTCTTTCCTGGTGATTTCGGCATCTGTTCGTCGTACACATTACCTA -GAGTGGGGGGTCTCTAGATTGTGCAGAGACGAACTGATCTTCGCCCTGCGAAGATTTCTT -CGGGAGAAAAGAAAACAAATCGGAGGAAATATCCGAGTAATGTTATTAGGGGGGGGGGGA -TGAAGACACAGAATGACGAGAATGAAGAAGCCAACGCATCATCCATTTGATAGAAGATCA -TGACCTAACCTCGCATGTAAGTCTTCAGGTGCCATCTAAAAAAAGTATATATCGCTCTCT -GAATGCCGATTCTAAATATCAAGATTCCACACTCGTATCCGGGTAACAAGGCAGTGTAGT -CCCCCATGATAGGAAATGTCACCAGCTACCTAGTTCCATTCGTCCATTAATCTAGACCTT -ATTAGAGAAGAAATGACAGAAACGCAGCCACCCCCACAGATGCGCCCCGAGGTAAACGGA -AGCCAGACTCCGAAAAGCCAAATTTGTCAGTTGATGTTGCGGTGGGAGTACGTCTGTTTT -CTTTTGGGTGCGTTGTTCCTCGGCATATCGGGTGACCCTCGTGAACACAGACAATCCCAC -ACAATCCCGAATGGAGGCACGGATCTCCCACGAAAATGAAACTGTCTGATGGTGTGGGAA -TGTTGAGTTGAATGGCTGAGATTGGTTTTACATGCAGTAATGGTATGTATTTCGCGCCTG -CTGTGGCTTTGGCAAGGATCTCTGGTTGACGTCGTGAGGTTTTAAGGCAGAAATGGTGAG -GCCAAAGTGGGAACTTGAGATGTTTGAGATTCAGGGTAAGACGAGTATGTTGTACCAACA -AGTGTCTGGCTGGTTCGCCCGTGTATGTACATAGCGTTCATATAATTGTCGGGAATTAGA -TGCCGGACTGGGCTATCGTGTGTGTATCGACATCTATATAACTACTAGTTGGTCGTACAT -CCGAATCCTCATCTCATGAACGAGGCTAAAGGCAGTGCCCAGATAGAGACATTATCAATC -CTTTCGTTTTTATTGAAACTCAAAGGGTTCTATGTAATTGACCATGTTTTTACATAGCAA -GCAAACCAGAATCATCAGATTTTTTCTTTTACCATCGTGGAATAGGACTAGAACGGGGTG -ACCTAGAACTCGGACTGATCATCACCGGAGATAATGACATTGTTGATCTGATCAAAGGTT -AACATCAAGCGCAATGGAAATAAAGAATGAATGAGAGCAGTACCTTAAGGACCATACGGC -ACAGCTGGGTAGCAAGCAAAAGTTGCTGGCGCTTGCCGATCAGGGGGTCAATAGCAAAGT -GCTCACGCATGTCTACGGAGCTCAAGTTAGTCAATGAACAGGACTGAGTATGGAATAGGA -GTATGGAATAGGTTTCTAGGGGCGTACCGTTGGTGCCAGTCATCATGCAGTCAACACCGA -GACGGGTGTTCTTCTCCTTGACCTGGCGAGACTTGATTGAGGCAAGGGTCTCGATGGGGC -TCAAACCAGAGTTCTCGGAAAGAGCCAAGGGGACAGCATCCAGCGCATCAGCGAAAGCGC -GCATGGCATATTGCTCAATACCGGGGCTCTTGACAGCGGCGTCCTCGACGGCGATGGAGC -AAGCAATCTCAGCAGCACCACCACCGTAGACCACACGGTTGTCGCGCACCAAATTGCGAA -CGACACACAGAGCATCGTGTAGTGACCGCTTGGCCTCATCAATGATCTGTAGCCGAGTTT -AGCTACCGGCACATCGCAGTACATATGGTGTAGGGTTTGACTCACCATTTTGTTACTTCC -ACGAACGAAAATCGTCACTGCCCGGGTGTTGGCGCACTCCTCAATTACCAACATCTTCTC -TCTCGTAGTTCCAAAGCTCATCTCGCGGACAACACCAGCTGAGCCAAGCTTGTCAGATGT -AAGATCCTCGAAGCGGGGCACAATACGACCGTTTGTGGCAATTGCGATCAACTCAATCTC -AGGACCGCCAACCCAACGGACGGCAGGCAAGTCATTCTGAAGCAAAAGGTGGTTGGCCTC -ATCATCGAAACCCCACTGGCAGATGACCAGGTTGGCACCCGAATCCTTCAGCTGCTGAAT -CATCTCTGTGAACTTGTTCTTCTCGTAGTCCTGGAGCTTCTTGAATTCCTCCACGGATGT -GATATCGAGCTTGTGCTTGGTCTTGGGCTTGGGGGGCTCAAAGGGACAGGTCAAGATAGC -CAGTTTGGCATCCCGAACTTCGTCGGGCATCTGAGGGTGAGAGAAATCCTTGTCCACAAT -GACACCGTGGACAAGCAAAGAGTCTTCGAGGGATCCACCGACCTTGCCGTCAACCTTGAT -CAACTCGAAGTCGACATCCTTGCGCTCGAAGTCAGCAACCGAGAGCACGGCATCAACGGC -GATCTTGGCGAATTGGTCGTGAGACTTGGAGACACTGCGCAAATTGTCAGCATCAATCAG -AACTAGTATACAGCCAGGAGTCCGTACATCTTGCTGCCCAGACTGGTCTTGGCAACCTTG -AGGAGGTTGGTGGTATCATCGCTGCTAAACGTAATCTCATCACTGATCCTGTCGAGCTCG -GCAACGGCAATTTCGCAGGCCTGGTCGTATCCGTCGGCTATACGGATGGGGTGAATTCCT -TTGTCAATGAGGTCCGAGGCTTGTTCGAGCATGGCCGCTGCCAGCACCACGACACCGGTC -GTGCCATCACCGATCTCCTCGTCCTGGGACTGCGAAAGTTCGACCAGCAGCTTCGCAACG -TTGTTTGTGATCTCCATCTAGTCGTGCCCATCATATCAGTATCTCGAGCAAGAATTGAAT -CTCTTTGGGGAAAGGATTTCGGGGTCGAACCTGTCCGAGAATAGTTGCGCCATCGTTGGT -GACGGTGATATCGCCGTCAGCGGAGATCAGAATCTTATCAAGTCCACGGGGACCCTAGAA -ACGGGAGTCAGATGTGTTGTCATCCATCGCTTGTCGAGTGCCATACCAGAGAAGACTTGA -CAATGCTGGCCACCGTCTTTGCGGCAACAATGTGTGACTTAACGGCCTCAGTGCCATGCT -GTCTCTTCTTCTTTCCCTGACTGTTTTATGAAAATCCCCGTCAGCCCCATATTTCAACCT -TCCAACTCTCAATGGTATTTTGAAACGGAGGACAAGGCGTAGCGGTGAACGTACTCCCGA -ACGACAATAAACGGGCGTCCCTGCTCGTCTTTCAAGACCTGAGCTATCCCTTAATGTCAG -CGACCGGATCCGAATTGTCCCAAGGCATTGAATAAGTAATCACCATTAGACATATCAAGC -TCTGTTTAAGATCAAAATCCACATAGTTAGCGACAGCACATTTCAAGAATGAGGGGTATG -GGGGTGCGGCCTTACGCATTGCCATGTTGACGGTGGGTTGCCACCTGGCAAAATATAAAG -ACTTTTAAAAAGAGGGCACGAGGAGACCGAGAGATAATCAAAAGTAGGTGACTCGACAAA -GCAGTGAACTTGTAAGTTGAGCCTTGAGGGGGAGCTTCCCGAAGAGCACTTTTGACAAAT -GAAATTCTGACTGCCCAAAGCGGTATAGCGTATCTTGTCCGCCTTCAACGCGTTTCTGCT -TCTCGACTGCTCCAATTGCTTTGAATGCGACAGCCTTTCGTCTGACTTTACTACTACAAT -TGAGGTTTTGATCCTCTAGTCGCCAATCTGTTTTCTAAACTCTCCAAGATCATCTCACCC -TTTAAACGAAATATGCCTCGTGAGTGTTTTCCCCCAATGTTGATTTGTCGCGCGCGCAAT -GGTGAGCATCTCGGTTGACCCCGTTCATTCCCTAGCTCCAGTTGAAGATTTGTCCGACGA -GGAATCCGGCGATATTCCTTACAGAGATGCGCCAGAGACCAATGGCAAAGATGCCAAGGA -CTCCCCAGAAGACGAAGGAgacgaagaggaggatgagggtgaggaagaggGACTGTGAGC -TGCGCATTCCCGACTGATGGATACAATACTCTAACCGAACGAAGCTACGTCGTCGAGGAT -ATTATTGAACATGACTGGCTTGATGATGTATGTTTCGCAACACTCAACCTCATTGTCCCA -AGAAACTAAACAACTACAGGGAACCCTAAAGCTCTTCGTGAAATGGAAGGGCTATGAAGA -CATTGACGATAACACATGGGAAGAAGAGGAAGGCCTGATGTACGTCTATGCATTCTTTCA -GCTTCGACATATGCGACATATGCTACACGCATTTATTGACAAGCACCATAGGGATGGAGC -CCACGACATTGTCACAGAATACTACAAGAGAATCGGTGGCCGGCCGAAGAAGCCCGAAAA -TAAGCCCGCGGCAGCAAAGCCAGGTCGCAAGCGCAAATCTATGGGTGATTCGAAGGCTGC -TACGACCACAACGGTTTCCGCAGCGAGCGAAGCTAAGAGACAGCGCCGAAAGTCAGCGCC -GAAGGAGACGACTAAGCAACCTTCCGCCGAGTCCGAAGAAAATGGCATTCCGTGGCTTCC -CAAGGGGAAAAACTGGGATAAGGATGTTAAGACGGTGGACACGATTGTCCGGGAGGGTGA -CGCAGGTCTCATGGCCTGGCTTGAGTTTAACAATGGACGCAAGGCGAAACTGTCTGTCCA -GGCTTGCTATGAAAAGTGTCCTCTGAAAGTATGACCTCCACCGTGCTAGAAACAGCGACA -ATAATACTAACCGGCAGAATAGATGCTCAAATTTTATGAATCGCACCTGTGAGTCAAGCC -CCCTCAAGCCATCCTCCTAATGAAAGCTAGAAATGGATTCCTGACATCTCTCATAGTGTT -TTCAAGGATAATTAATTACCTCCGTCCGCCGACAAGTTGATATATCACCAACTAAAAAAC -GGTCCATAAGTGATATTTCTAGGCCCTTTCTTCATTTTCGGAACTACAATGGCGAATGGC -CATCGCTAAAATGCTTTTTAACTACTGCAACCAACGCGTTTTGAGCATGGCGCTGCTACC -TTTCAACTGATGTTTAGTTTTGAGTTCCAATTTCCTCTTTGCGTTCTGGGCAGGTCAATA -CCGGTGTCTTGGGAGAGTATTTCGTTTCTAGCTACACGCTAATGTTCATGTACTTCACAA -TTGCAACTTATCCATTGTAGCTGTCCTTCGTGGGCTTTCTCGAATTCCGCATCCCATGGG -TATCATTCAATGAGCCCTGTCTGTCGAGCCATGCTGAAATATTGCCATCTATCCTTGTAC -GCCTCCGCGCCGTGCTTTGTCAACGCTCGGTGTACACGCGAATATTCCTGCGCCTGTAAA -CAGTCATCATCCTCGACTGTCCAGACTCCGGGCATATCATCGGGAATACCCTTCCCAGCG -TTGAGATATTTGAGAACTTTATCTGCCATCTCTGGGACCATGCTTGTACATCGCAGAGCA -TCGAACACGTGAGATTCCTTTGCCCCTCGAGCTAGGCGTTGATCAATCCAAGTATCGACG -TCGGGCACATCTGAGGCGTCATCGTCATCCACCTCGGATGCTGGGCTTGATGGTAAAAAT -GGAAGTTCTAGGAAAATGGGATCCACTGGATCTTCGAAGATATTGTTTACTGGTGGTCTG -GTTTTGGGTGGTTGTTTCTTGAATCGGCCGGACTGTGCGAAGGCATTGTCCCCCAATGGT -CGGGGTGGAGCCGGCGGCACTGGTGTTGTTGGATTTGTTGGTGGTTGTGAAGGTTCCCGA -GTCACACGCCGAGGGGTTTGTATGGAAGGATTGGTGACTGGAAAGATATCTGCAATCCCG -AGGCCTTGGAGCGGTCTTGTACTTGACGATGAGATACAGGCAGAGCCTCTTTCTGAGCGT -GCAGTAAAGGGTGGTGTGGCTGACACATTTGATGACTCTGGTTGGGGCACCTGATTACCA -GGACGGGGACGGCGTGAGGAAGGGCTTGCAGTTGCAGACGCTTCAATTGCTCTTCTTTTC -ATAGGTGAAACATCCCTCCGCCGCTGGTTCGGCGGTTCAGGGACCGGATCTCGTTTTCGC -TTTGGGCTGTTTGTTATACCTGCTTGGACCGGAGCAGGTGGTACCAGCCGAGGGACACTC -GAGTTACGTTTTGCCGTGGAATCTTTTGAAGAAGTTCGTGCAGGTAAGGTCTTCGGGCGT -TCTCCTTGGAGTGGTGCAGACTGTAAAAGCTCGGAATATGATACTGGCGTGCCGCCCCCT -GGACGGCCCTTGCCACGCAGGTGCTTCATGTATCTGCTACGCCAGGACTGCCATGGATGG -GCTGGAAACTGTGTGCTACCAGTCAGTATCAGACTTGTGTGTGTCCCGTAGGCTTAAGAG -TGACAGGAAAGCATCATACCAGTTCACTCAAATCCTGGTAAATTCTATTGCCTTGGATAG -GTGCCCCTGGCTCTTTCTCAAACTGCGCAACCCAGTCAAACAAAGTTTGATCCTCTTTCA -GGGTAAAGAATGATCTTGTGCTTTTTGTAGGGATGTTGAATGCACCCATCGGACGAGGTG -CGGATGGCCCAACCCGGTGGGCTTCTAAATCTTCAAGTCGTCCCTTGTTGATCGAAAGCT -CTACATATCGATAAGAGAAGCTGATTTGATATTAGTACACAAGTTACTTTCAAGGCAGCA -AAGGATTCCGTACGTGTCCACAGGCAGGTCTTTTCTTGTGTGATCCACAAGCTTCACATC -GGCATCCTTCTCCATCAGCACCACTGTGCCACCATATTGCTAGATATCCATTAATCTCTG -CATTTCCCAAAAAAAAAAAAAAACACAACGCGCTCCCTAAGTACCTTGATTAATTCCTTA -AATCTGGACCTTTGTGGAACTGTTCGAGATAGCCAGAATCGTGCGCCCTGGAACAGACTT -GTTGTCGTATCCTCAGCCCGAGACGAGGCCATTTTAGCTCTTGTGAAACCAAGTTTCACA -GCTTTTATTCGAGCTTTTACACCCTCAAGATAGGCCGGCTTTGTCTGCGACGCGGTATCA -CGTGTTGACGTGTCGCAAGGGGCTTTTCCCCCATTAAACCGCGAGCTGTGATGCAAGATA -GGTATCGTCTGGTTCGCTTAACTTCCAGTACTCGGTTTTCTCATATATCGGTAGTCTCGA -TCGACAAAGGTCATAAATGAATTGGTTGGTTCACGCCGAGACTGAAAACGCGGTCACGTG -GATACACAAAGTTCACGACGTGCGATGAGAGCGCGTTGACTGACCACAAGTCAAGAAAGA -TAGGAACTGCGAAAAGTATCGCTAAAGGATGCAGAGCAATAACTATATCTTTTGAACTTC -TCGAGAGAACTTAGGCTTCAGAATGGGGGCCCTCAGAACCGTTGGTCTGGTGATTCTGGC -GATCTCGGGGTTCACCTTTATCGCGCTATTTGGCAGGCTGCCAGCTTTCCGGTGAGGAAG -TTGCCAATTTACAACTCTTTGAGGCTTTGAACTGACTGCTCAAGTAAAACTCCGGTTGCC -TGGCTGCATCGGGCACTGTGGGTGTACTTTCCAAATGGAATTGCCGCTGTCGATAATTGC -TTGTTTGGCGGGAGGTTAGTACGAGGCTGGAATCGGTCTGGTAGCTATGTATTGAAAGAG -AATCACCCGCTGGTCTTAGTAAGTCTCGGAACTTATTGTTACTTATCTAGATACTAACCA -TGCACACTAGATTTTCTTTGTTTCCTTACTGGTCATTGGAGAAGGCGTTTTTGTCCCGGC -CGCGTGGCCTCGGCTTTCGAGCATACATCGGCTCTGGGTAGCTGTGGCTATCACCTTGCC -CTATCTCCTGCTTTATAAGTGTATTGTGACCAAGTCATTCATCACCGCAGAGAATCATGA -GGAAGAGATGAGGCGATATCCATATGACCGAGTCCTCTTTCATCCGGGGCATCGATGTAG -CACCTGCAAATTCCTCAAACCAGCGCGCAGTAAACATTGCAGCTTCTGTCAGGCCTGTGT -TTCTCGGCATGATCACCACTGTGTATGGCTGATGAACTGCGTCGGGGCCAACAACTGCGT -CTATTTCATCTCACTCTTGGTCTCTCTTTCTGTCATGTTGATATACGGTTCCTACCTTGG -TCATTCCTTACTCTCTCAGATGCTGAAGCAAGTGGTTCCTCCCGAGGTACAGGAGGCCAT -GCAGGGCTGGACAGCCTGGATCAACACATGGAGCGTTGTGGTCGCTTCCGATCCCAAAAT -TGGGGCAGTTTTTCTACTGATGCTTATGACTGCCCCGCTAGCAATATCCTTCTTGGCGTA -CCATGTGTATCTCATCTGGGCTGGGGTGACAACCAATGAGAGCGCCAAGTGGTCTGATTG -GAAAGAGGATGTGGAAGACGGTTTTGTATTCAAGACAAGGCGAAGCCTCATCTTCAGCAG -CCCCCTTCCTATGGATCCATATGATCAACTATGGCCTGTCCATACTGATCAAATCCTTGT -CACTGATGAAGAACCTCCAACTGAAGGATGCTTATTGGCCTCCAACTCCAATTGCATTGC -TCGTCGCCCGGAATCAGACCTACCCCCTGACCCTCGATGGAAGCGACTTCGCTCTATGAG -AGATGTCGAAAACATCTACGATATGGGCTTTTGTTATAATCTTCGGGACGTGGTGGGACT -ATCTGTGCGTCGAGCTAAGGACATATCCGGTATTTAAGCTTCTTTGTATATATCAGCGAT -ATCGATGAAAATTGGATTTCAATGAATGTCTAAAACATCTACATGGAATACAGTGTCTAC -CACACCTAGGAGACTGTGACGTTCAAACTAAAAAAAAAAAGTGGAAACAATATTTGCTGT -ATGGTCAAACTACAGGAAAATAGCCCTGATTACTTTCATTTACACCGATACATGACGAGT -TTTACTGCTCAGGGCATATCCAGAGGTCAGTGGGACCAATTTACATTACTACAGAGTCGA -AATTTGCAATTTTACAGATAAATAGACAAAGAAGCCCTCTATAGCTTCGATCTTCCAAGA -GTATACCCACGTGCATCCTTGAGGCGGAGAGCTCAGAATGGGCGAATGAGTGATAACGAT -AACGTTAGTGCGGGCGGAAAATTTGTCTCTCCACTCCGAGTCTTTGATTCTCTCATCCCA -CTGTGATCTCCCCGCGCTGTTTATTGTTCGAAGGAATTCGCGTTGAGATTCTCTTTGCAA -GATGGCAGACCCAAAGCCGTTATGCACGCCAGCCGAGCTTATGCTCAAGGTAATCCTTGC -AGGGACCCTGAGCCATTACGAAGTTCGAGGCATGATCGATGACAAGCGGCTGGAGCATAT -GCTTGCCGCTGGGAAGCAGATCCTGTACAAGACACACCAGGAGAGTGATGTTCGGTAAGA -TAATTATCCCATTGACCGTTTGCATGCTTTTTGTACTAATCTGCTGCGCGCTTTTAGTCA -CAATCTCGGCATGTCACCGGATATCTGGCAAGGTCTCACGAACGTCCTCACCAAAGCAAT -TCCTGTCCTCGAGTCCCAATCTTTCGCGTGGAAAAGCCCCGCGGCAAGCTACGAACACTC -ATCTGCCAATCTAATCGCATACAATTATTTTTCGCTGGTCAAAGACATCGAGCGCCTGAA -TGATTTGTGTACAATCGCCCGCAACCTCCTCGCCACAACCAAAAAGGCACAGAACCTCGC -GGCAGAAACAGGATTTGATCAAAGGATTCTTGCTCTCATCGACACATGTGTTCGCGTCAC -CGCAAGAGCCTTCGATGGCGAGCAAAATGCGCGGAACGAGGAACGATGGCAGAAAGTCGT -CAACCTGTACAAGCGCCTTCTAATTACGTGCCTCCAGTACCTTCATAATTTCATCATGCA -CAACGAGCATCGCAAGATGGTGCTTTGGTTAGATCTATTTGGTTACCATTCAACTGCTGA -TACGAACATCATAAAGCCCAAGGAGCCTTTGGACGACGCCAGCTCGCGAGAAGGAGTGGC -ACCGATTGTGCAAGTTGGAGAGCGCGTTATCAACCCCCCTATTCGCGCGCTTTATGACCA -GACTGCGGAGGATTTGCTATTGGAGACTATCGCCAAGTTCCCGCGGGAGCCAGCAACTAT -CAAGGAAGAGGCTGCCATGCTGCTTCTGGCCAACATTAAAGATCATATGGAGAGAATCCT -GGGGCGGGACCTCAGCGCTATCCAGGAAATGGGCCGAGACCCAGACCAGGTCAAGGATAT -CAGGGCTGCTTTGACTGCAATCCTTGGGGCCAAGGTTGACGGTTGGTCCGACCTTCAGGA -CCGGGCTCGTGAGCTCCCAGCTGCGCTTGCCGAAGAAGAGCATGAAGAGCATGAAGACCA -CGAAGACCAAGAGCATGGAGACCACAGGGAACATGAAGAGGAAGAAGAGGAACACGAAGT -CACTAAGAAGAAGACCATCCTGACTATTGATCGCAGCCTTACGGCTGGCTTCCCTCGCCT -TTGCTGGGCCGATCTTCCAGAGATTAACGACTTTGGCGCTGTGGATGGACCTGTTACAGA -CGAAGATACCAGTATGCCTCGGTCAGCTCAATCTGCCGCCGAGACGCTCCAAGAGGCCAA -GGACGAACTCATGGCTCGGCTCCAGGAGCCCTCCCATATTGATGGCGATGAAGAGCATGA -CTATGACCACGGCGATACACACACAGTGGGGGATGATGATTCGCACAGCCTAGACCACAT -GGCTGACGGAAGCGTGgatggagagggcgaggatgatgtggatgacgatggcgtcgatgg -agaaggcgatgatgaagaagaggaagacgatgaaTATCGTGGCCGTCCTGGCGATCAACA -GCGTGGTCTTCTGACAGATATCCCTCTTGTCCTTGGACCGGCAGAAATCGAAGCTTTGCC -TATGATTGTTCAGGCTGGTATCGTGGACAGCTTCGGTCTCAAGGGTGGTGAACGGAATGG -ATCCAGAAATATGCAAGCGCTCCGTTGTCATATTCTGCTCACACAGGAGACAGGACGTAA -CCTACTGCGCGAACTACTGATTTTCATTGCGGCATGGGATCTCCCCGACGACGAGCTCTA -CTTCAAGATGATGGTCCAGATCATGGAGGCTGTGCTCAAGAACGGCCTAATGTCTCATGC -CTACTCGGACTTCGGCCAGCCGAAGGACATAATCTCACCGGCACAGGCTGTTGTGATCAA -GATACTTACCCACATCTTCAGAGCCAAGTATTCTCCTGCTTCGGTGACGGGCTCGGCTCA -GCCCAACATCCCCCGCAACCCGAGTTCACTCAACAGAGTTGATATTCTCACTGTCCGATA -CATCTTTACCATCTTCCGTGGCAACATCATTCCCGAGACCTGTGCTCTGATCTACCTCCA -AGGCCAAATTCGCGCCCAGCGGGCTCTTTCGGAGGACTTCCCGCTCAATCTATGGGACAT -GGAACGGGTATACGAAGGTGTCTACCAGTTTTTGGAGTTCTTCGCCGTCTTGACCGAAAA -TAACGACTGGAAGAACCTGCTGGTCAAATGGGAGATCGTCTACGACTTGGTGACATTGAT -CAAGGAGTTGGAGGCAAGCATCCCCAAGGGCCAGTTGAGCCAATTGTCCTTTGGCGCCGT -ACCCCCTCCCCCTCCTCCCCGAACGGACACCCCCAACGATGCTTCCAGCGCCAGTCCCCC -TGCCCCTGTCGCCGTCGAACGTCCTTACGATCCCAGCGACCCAGACCCTGTCGACACTGG -TGCCGGCAGCGTTGACTCTCGACCCGAGTCTCCTCCGATCACAGAGGATCCCTCCGAGTT -CGAATGGCGCAACCTCAAGAAGTTGGTCGTGCTTGTGCTCTCCTCGCTTGTCTGGAAGTG -TCCCGATGTCCAAGAGCAGATCCGTCGCTACGGTGGTGTCGAGGCGATTCTTTGCTGCAC -CGCCTTTGACGCCCACAACCCATACATCAAGGAGCATGCTGTGATGTGTCTCAAGTTCTT -GCTGGAAGGTAACCGTGAGAACCAGCGTCTTGTTGAGGAGCTCGAGGCTCGTGAGGTCGT -TAACAATGACGGAGGTGTGCTGGAGCGAAGTGGCTACGAAGCCATGATCAACCAGGCAGG -CAAGTTGGCCATCCGATCCAAGGCTCGGGCCGAGGAAATTTTATGAAACACTTGACCTTA -CGTGTGATGAAAGTTTGGAATATTACATATAGAGTAGATTCAGCCTCATGAATGAAGACT -ATAAGTTACTGTTCTTTGCGCTGTCCATGATTTGTACAGAAATCCACCGGAATTGCCATC -GGTCCCTGAGAGCTATGGATTCCAAAAGAGGAAAGTGGATCCGCGGTGCTCGCCTGAACA -TCCAATTTTCTGCCTGCAGATCTTGGCCTGATTCTGCTCTACAGGGAAAAGACCACTTTA -CTGTCTACAAGGATATTTGGAATTAGTCGACTAAGGTCACTACATTTTAAGATTAGCAAA -TCTATTTCTACCATCGACCAGCAAGGTAGTCGCTCATCATATTGTTGTCTACCAGGTTAT -CTCGGCAGCTATCTCTATCCGACCTTTTCACCAGGACTACAGATAATGAGTTGAGTACTT -ACTGCTCGAAAGTCACCTACGATCCTTCTCTCCACCACCTTTGCAAGACATCTTATAGAC -ATCTCCAAAACTAAGATAAACAATAACAGTGCCTATGGTCGATATGGGTTGGTGTAAACA -ACCAACCCATATCGACCATAGACACATCAACACTCGGCTTCACCAAAAGGTCTTCAGCCG -CCAGTACTGCACCAAAGTGCCAGTCCAGCTCATATGACCCTACCCACTCCAGTTCTATCA -AGGTGAGCCGCCTCGAAAGACATGGGAGTATGGGGCAAAGAACTGGGGTGATCCACTCGG -ACCGAAATCTCATGCACTATCCCTACGCATAGGAGGCATCCAGATATCCGGTCTTGGTGT -CTTGCGTCTGTTATCGGAAGGTATACTGACATTATCAAGCGGTTAAGGCAGTGGGGGTTC -ATCATGGTATTCAGAGAGGTATTCAGCAAGCTGGATCCCGCGACCAATGTCTGTTTCCAT -CTGCTCAACGCGCCATGTCAGTTCCTCTACGTCCTTTACCAAATCGTCTTTGAGTAAGTA -AAGGCCCGACTCCAGATCCAGAGTGCGAGTACTACGCAGTTTCGGCCAAAACTGCATAGC -TTGGTACTCGGTCCACTCCTTGACAAGAATCATGATTATCATGATGATCAGTCCCAGTAC -AAGAAAAAAGAAAATAATTGCGAAGGTCTCCAGGGGTGGGGGAGACATTGTGGTAGTTGG -ATGGGGTGTGGTACTCACGTGTGATACTCACGGGTGAGTAGGTGTCTCGATTGTGAGAAG -TAAGTGGTTGAAATATGAATGTGGAGATAACTAGTTTGCTAAGGAGTAAGCGGTTGAGTT -GCATACTGAGAAATGGGAGAAGGAAATACCCTAATGTTGTGTGATACTCCGGAGAAGTCA -CTGTCTGTGACTCGGTTCAAGTACAGAGAAGGATTGTATGAGTTGTAGAGTTGTAGAGAG -GATGTGATGTGGAATGAGAAAATATATCAGTCAGGACTTGGGGACTTGGTATTTATGTAT -GAGCTACGGTAAAATCCTCGACAAAGACTGTTACTTCACAATGAAATTGTATACTCTTTG -TTCAGATCACCAGATATATAAACAAAGATATGAATGGAACTGAGATCAAAGAGGGAACAA -AGATCTTTGAAGGAGGTTCGATAATTACAGTATGAAAGCCAACGAGGGAAGGACTCGCGG -ATATCATCCAGGGCCGCATGCAGTAGAATCTAAAAAGTCTAGATCATACACCTAGAACAC -TGTCAATTGCCTCTATTGCATCTTCATCTGCGTCTTTTCACATGATTTGCCGTTTGAAAG -CTTAATGTTCTTGTTCGAGTTCAAGATATGCTGACCACCAAGCCTACATTCAATATCCAT -TTGATTAGAATGTACATGATCGATAGATGCAAATGCAACAGTCATTCACTCATGCTAAGA -ATATGAAATACTGCCCTGTAATCTCCACCACAGACAGTTTAAAATGCACATATATGCATA -GTCTCTCAATCCGTTCAGCCATACATGCCATCCCATtcctcctcctcatcttcctcaaca -agcttcttcttcgcctcctcctTTTCTCTCAACTGCTTCGCAAAATCCACACGGTCAAGC -GGACACGTCCCACGCAGGCGCAACCAGGGTCGCACACACTCCAAATCGAAGCGGTGTGTC -GTGTGGCAGGGCAACTGAACGACGAGTGGATACGGATCTTCCACGAAGGGATTGTTGCAG -ATAGGACATGTATCAGAGGGTTTGAGGGATGCTTTAGGGACGCGTTCGAGCACTAGAGAG -ACCCAGACGTGGTTAGTTAACGGTCACGAAATACCCCGCAATCAGCGGGGTGAAGTTGAT -AAAAAGGTGAGCCAGGGAAATGCCTCTTACTATCGCAGAATTCTTCATTGACACCCTCCA -CCTCCCGCGGCGGCGTATCTGCCTGCGCAAGCAATGACTGGATCATCGTGGTGAGAAGAT -CACGGCCGGTATCTGAGTTGGCGTTATCGGTTCCCTCCTGCGCGTCGGCTTGCGGTGCTG -AGTCGGCCTCACGGCGCATCATGTCCAGTGCCTCTGCAAGAGAGAAGAATGCTGCGCTCA -CATCTCCTGGGACGGGGACTGCATGGGGTCTGTGGTCGGGAGCTGGGGTGATCTGGGACA -GGGTTGAGAAGAAGGTGGAGAGGTCGGGGCGGCGGCGGCGGGGGGTTGAGGCCGTTGAGG -GATCGGTGGTGTTGTGTTCAACTGGTATGGTGTTAGTTATATAGATTTCTGATATTGTTT -AGGGACTGGAGGTTCGTACTTTCGTAGGTCGACATGGTGGGGGATGGATGGCTCGAAAGC -CTTCGGAGCTGTTGTTTTTGAGGAATTGGGGCGGGGGAAAACCCGTGTATTGTGTGACAC -CTCTGGAATTGTTTGCTTTTTGTTGCACAATATTGAGCTGTTTGAAAGGAATATGGGGAT -ATGGGGATTGGGTGTAGGTGTAGGTTGTTTTTATAAAGAAGTTCCCTCAAGGCAGAGAGT -CGATAACGCGGCGCGGCAGTTCTGATTGGTTCCTCGCCATACAAGCTCCTAGAAGGCTCA -TATTGATATGCTCATATTCAAATGACGCACATTTTGATGTGAGGAAAATGCTTGAAACTC -GGAGCAAGCTCAATAAATCCTTTGGCCTTCCTGATTGTTTTGAGAATTTCAATTGTTACA -GCGTGCTGGACTCTTGCGTGTGGAGAAAGTGATTGAAGCTATGTTCCATCGTTTCCTCTT -CGATATTTTGTCCACATACTAGAGGCATTTGTTTCTTTTGTTCTTCCAGCTATTTATGTA -TCTGTTAGCCCCTTCTCCTCTACCGGCAGACATGACCTAATGTCAACGGGCAAAGCTTAT -CGGCTAAGTATACAACAAAAAGTATAGAATCTCTGAGTTGCGAGTGACGATCAAGTAGTT -ATGACTAGCCAAACATATTTCCTACGTTTCGTGTATCGTAAGCATCTTAAGCACTAGGTA -CCTAGTGAAGAGAGGAATTCTCTAGATACTATAATATATAGGTCAATTTAGCTTTCATCA -ACCGCGTTATGCCCACTAAGGTATATCGATAGAACTAGGACTGCTGTCTAAGAGTGAAAT -CATAACGGGATAAGAGGCTATATTATCGATCACTATATATCCATATTTCCCGTGCGTATC -TAATCCAGTCGCGCAGTGAGGGCGAGCCCCCCTTCACTAGATAATAAGTACTATAGGCTC -TAGAATATATCCTGGTAGAGATGAACAACAACCATTGGCCCCAAAAGCCGAGCCCGTTGA -AACCGTACACGTATCTGCAAGCAAATCGCATTTGTGGGCGTACATAACATCTCATCGTTT -CCATCTAGCTACTTCCTTGAACTTGTACCCTCTCTCATACAAATTCTCGACGGAAGCTTT -CGTTCTTCAACCATGAAAGTCGACTTGACACTGAGGCGAGTTCCCTCCAGCGACGATATT -TCTCATATCGAAAACTTACTCCGAGAGATCCATCCCAACGTCACCCAAACCTCACAGGAA -GGCCTCTCCATTAGCTTTGCAGCCCCTACAACCGACATTGACGCATTCGGTGAACTGTTT -CGATCATGGCTTAACGCACCGGAATCAATTATGCATGGCTTCCATATGGTGGAAGACATT -TAATTTATTTAATCCTCCCCTCCAATGCACTCGGAAGGGTCGCCACTCAACATTTCCATT -GACAATACCATCTTCAACTGTCCCTCACTCCAGGCTTTTTGAGCCATCGAGTTGTTATGC -TGCCTCGAAAACATGCAGAGGCGCCGAACAAAGGATGCACATCAATTTGGGGAGGGGAGT -TACCCTCAGTACTTATATAAGAGCGGGGGACGGTTCAAATTTAAGAGGAAGAGTCACAAG -CGTTGTGTCAGGATTCCTAGTTTTGGAAGCTGGTCAAACAGTAACGCTTAAGCAAGATAC -ACTAGTGCTCATCCCAGAGTTCCTTGAATAAACTTGGGATAGTAAGCAAATACGACGGTG -CTCCCTCCTCGACAATTTCACCCCCTTTCATAATCAGAATTTTGTCAAAATCTCGGATGC -ATCGCAATCGATGAGTGACTGCAACAACTGTAGCCTCTTCAAAGTGGTCTCGAATGACTT -GGAACATAAGTCGCTCCGTCTCGACATCAACGCTGCCACTAATCTCATCCAGAACAAGGA -TCTTGGTTTGGGCCTTGCGGAGAAGTGCTCTTGCCAAACTCACTAGTTGCCACTGGCCAC -GTGTGAAGTTAACTTCGCTCACCAACACGTCTAATGGATAGTGGTTTTGCATTTGGAAAC -CATCCCGCAGTCCTACTGTGTTCAAAGACGAAAGGATTGCGTTGTTGGAGGCCTGTCCCT -TGGGATCCAGGTTGAAGCGCAGTGATCCAGGCAGTATGACAGGGTCTTGGGGGATAATAG -TAAGACGGTCTCGAATGATCCCACAAGGAATTTCGCCCAAATTGACATCATCGATGAAAA -CCCCACCTGCACTGACCTCAAGCATCCGGAAGATCAGGAGTAAAAGAGAGCTCTTGCCGC -TTCCGCTTCGGCCGCAAATGGCCACTTTCTCCCCCGCATTGATGACTGTAGTAATATCGT -GAAGGATCGAATTTTCAGTTTGCTCAATACTTTCGTAGCTGGTATAAGCGTGATCAAACC -GGATAGAGCCATGTGCAGGCCAGTCTTCGGCAGGCTGCTGGATAGTCTTGTTCGGATCGT -CCTCTTCGGGGGTAGCGGCTTCATGACCTTTGATGCGTGCAACAGCACCGAGAGAAGTCT -CCAGCTGTGTCCATGAAGTAATCAGAAAGGATAGACTGCTGCTAAAGCCCAGAATGTTCA -GCAAGGACACAGCAATTGCCCCGTGGTCGACTGAGTGTAGCGTAACTGCGATGCTGATCA -CTATAACAGCCATTGCCATTGTCGTCAGATCCAATACAAGATCCAACCAACGCTGGATAC -AGAAGAGAGAATAGTATGGTTGGAGCGCGGCGTGAACTAGAATATGACATTGTGCTATCC -ACAGCGATTCCCAGCCAAAGACGCGAATTGTATCTGTCCCCGATGCAGTCTCTAGAAAAT -GTGTATATAGAGGGCTTTGTGCTTCAAGTTGCATCAAGCGCAGTTGGCGTGAGGTTCGCA -GATAGAACTTCTGTAAGCCATATATCATAAGAATGATGAACGGGATGGTGATGGCAATAT -ACTTGTTGCCGGAAGCAATGAGCATAATCTCGGCGATGGCGGCGCCAGCACCGAAGATGG -TCATAAGAAGCGCACCCGATAATTGATTGTCAATCTGAGAAAGGTCTTGACTGAATTTGT -TCAAGAGAGTTCCGCTGTCATGCTTGGTGAAGAAAATATACGGTGCCCTCAAAGTAGACA -TTAACAGCTGTGCATGTAGACGACGAGCGCTTTTTGGTACCAAGGAAACAAAAAGAAAGA -CTATGCTGGCTATTGTTCCAAGCAGCTGGATCGCGGCACAGACTCCATATACGCCCAGGT -AAACACTGAGCGGATAATGGAAGTCATCGTCCGACCAGTACATGAGCCACAGAGTGGGAA -ATTTGTACCCAAACGTCTGGATGACTATGAGACTGAAAAGGACGACAGAGGATTTCCAGC -CGATTGATCTGAAATAGTACTTATAGACCGAGAGATCCCCGTCTCTGCGCGACAAATCGT -CTCGTGCTTTTACTAGTTCATCTTGCGTTCGAGCAGTCACGGGTTGGGTATTATCTGTTG -AAGAGGTTTCAATTCCATGTGATCTGGAGGATATATCTTCTGGAGATAGGTTGCAGGGGT -CATGGGAGATAATTGTGCCATCAGCTAGTGTCATAACACTAGCACCAGCAGTATGATAAA -TCCGAGGATCGCTGCATGCTAGAACAACGGCCATTTTAAGGGATCTGCATAGACCTTGGG -GGCCCAACATGCGGCTGAAGACCTTTTTGGCAGTGACAATGTCTAATGCACTGAGCGTCT -CATCAAGTACGAGGAGCTTCCTACGCGAGTAAACGGATCGCGCAAGAGCCAGGCGTTGTT -TCTGACCACCACTCAAGGCAGTCCCCCCCGACCCAACTTTGGTGGTGTCGCCTCCAGGAA -AGGTAGCAATATCTTCATCCAAAGCACATGCATGAAGCACCATGTCATACCACTTTTCGT -CAAAGGAAGAAGACCCAGTCACCAACGCTCGGATAGAAAAGTTCGGAAGCCAGGGCTCTT -GTGAGCAGTAAGCAATCGTCACAAATGTGTCGCAAGTACCAGCAGCGCAAGGTATTGCTC -CGATGAGACCTTTGAGTAAAGAGGATTTTCCAGAACTGACAGAGCCTGTGACAATTGTCA -GAGTGCTGGGATAAATGTCAATAGTCACACCAGTCAAGACCGGTGTCATAGCCCCGGGGT -ACTTGAGGCTTGCATTGCGCATTAAAAGATATCGCACATCCTTCATTGGCATAGATTGAA -GCGCAATCATGCTGCCATTAGAAGTGCCTTTTCCCGTCTCGGCCACCATATTCTCGGTGC -TATTGGAGCTATTCTGGCCGGTATGAGGCTCTGGCAATAAAAGGTAGTCTTGGATTCGCC -CAAAGCAACCTAGGGCCACGCCCAATTGGGTAATTCCATAGACAAAGACCTGAATCGGCT -GGGTTATGAGACCAATCAGGGCAAGAATTGTAAAGGATTGTGCTGATGTGACTGTACCAC -TTCCCCCGAGTGTAAAGATGGCAAACGTGAGAACTGGCCCAGCAATCTGTGGGATGTTTG -CCATCAATTCAAACAAGCAGTGAGCCAATGGGTCGCCGATTTCTCACTTTGAGGGAATGC -ATACCAATGACATTTCTCCACATTATAAGCATACGAGCCCCCAGAGACTGCTTGAGCTCC -ACATCACGCAGATCACATATTCTCCGTTGCACAAATGAAGATATACCGAATAGCTTCACG -CTTTGGATTGATCTAAGGACGGAGGCGGTAGTTGCAACACGCTCCTGGACCACTTGGTTC -CATTCTCTTTGACGTGATGGCGAGCATTTAGCAATTAGATAGCTAAGCGCTACTGCTACT -AGAGCACAACCGACTGATGCTAGACACGCCAATTTGACCTGCCTTTCAAGAAGAAGCAGA -GCGAGCCCTACTTCGATTGGAGAGGCGAACACTGCATCAAGCCGTAGTAGATTATTGCAA -ATGCTTTCCGTATCAGTACTCATCAACGTGATTGCAGCTTAGTCCTTTGATATAGTGGTG -TCAAGAGAGAGTGTTTTATTGTATAGAGCCACCACTAGTATGCTTCTTGTCATAGTGATG -GCACGATTGATCCCATGTTTATACAACATTGTCGTCACCTTATCACCTTGCAATGTCATT -ATCAGCCATCCAACGGTGGTTTTGGTGTGTGACACATACCGCAATACCAATATAAACCAG -CACGGTAGCTCCAATAAGCGATTTTCCAATGGCCTTGCTTTTAACATCAGTGGAAGAAAG -ATAATCTATCACGCGATAAAGCAGCAGCGGCTGGGCAATCTTGAACCCGATTAGGCAAAG -TCGAGGAATTGCTGCCCATAGTAGTGTCCGTCGAACACACGAAGATACAACCATGGCAAG -TATATGGTGATGTTGTCTGGGACGTTTCAATGCTATATGAATGCGACAAGTGAGCTCTCA -GCTCCCAAAATAGATCATTAACAAGATAAATGTAGAAGGGATGCGAGTAAACTACCGGAA -TCCCATTTCGCACTGAAACGTTCTTGCAAGTCGTCAGCCCTGAGACTGGGATCGATTGGA -AAGAGATTGGCAGGATCCAACAGGCCACGCGCCCCTTGAGTCAATAATGGGTTGAGCCAC -CAGAAGGCCACTCGATTCAACATCCCCCCCAAGACCTCAGGTGGGTACGTCGCAAACGGT -GAGATGAGCTATTTCTGCTTCTCTTGGAGCTCTGTTATGAGCAGTAGAAACCGGATGACC -AATATTGCCGTGAAGGTTTCAGCAATAATAGATGAGATATCTCTTAGCCATAATGTTCGA -GCCTGTACTCCATCGAATAAAATGCTGCATAGGAGATAGAAACATATCAGATTCGGAGGA -GACCCTGATTTTCTATGCTCTATCCAAGATAGAACACAAATAAAAAGGGAACATGTCAAT -GATAGGGAAGCCGATACAATGGTGGCTTTGTTGCGATACTCCGGAGCTCTCGCCCATAGA -ATAAGTACGACAAGTTGAGTGATGAGCGAGCCAAGGTTAAGTATAGTTTTAGCAGAGATT -GAACGGTCGGCCTGCTTGTTTGGCTCCAATGTGACTTTAGTGGACTGCCCAAGAAGTTGT -TCCAGTCTCCAGATCGCCGCCGGGATAAAACATGCTGAAGGAATGATACTGAACACAATT -TGTTCGAAAAGAAGTGTGAAGTCTAGGTAATAACATCCTTGAATTTGAGGACCGAATACC -TCGTCAGCTGCTTGGTTGCAAAGGTGTGCCATCTCGGAGTAATATCAAATGCGAATTTAC -TCAGCCGGTAGCCTATCAGCCGGGCAATCCACACAGTTTAAGTATTTCTAGATCTACCGT -GTGCGTGTAGGCGGCTAACCAAAAAAGCTTCTAGTGTTGCTTGACTCGCTAGTAGCTAGA -CTAGATGCACTATGGTTGACTAGGATATAATGTTGGGTCAAACTACCATTTAAATCAAGT -ATAGGCTGGGATACTATCTTAGAAGCCTACAACCAACAAGGATTCCCCTCTTTGCTACTC -CATATAGTAAGCCTCGCATATCTATATGACTTTCTATTGGTCCCCTTATCTCAATACTGT -GAGAGCCCATTATTATGGGATCCACCATACTAAGTAAAGAGAGAACCACTCGCACAAGCG -CACGTTTATTCTATATCCTAGATCTATGGAATTCTGTCTATTTTGTCTATTCTGACATGA -CTTTTCCAGCATACCTTCAACTTGATCATTGACACATGCAATGAATGCGGTTGGGCCACA -CTGGTCTCGTTAGCGCCGTAAAAGATAAGGCAGAAATGATACGTTACCCGATCTAGACAG -GGATGAGTGGCATAGTTCATCGTCATATTCCGGTCCTGGTCCGAGTTCTTGCTTGGGCTT -TGCATCCTGCTACGTAGAGTGATAGGAGGGGGTTGAAATTAGAAAGTGCCTCCCCGACCG -GAGTCCCCTGAATTTTCGAGCTTTGCTTCAGGCCTTTCAGTTTTATTTTGACTTGCATTT -GAGCATGGAGGACTATCATGAACGAAGAAGGATTCAGAACCTGAATGCGCAACGAAAATG -GTGTAGGTGTCTTGAATGGTCTGTCAGCATTGTGCAAACGATTAGTTCCAGTGAAAGGAA -GCGAGCTTAGTTCTCTCCTTGAAATATGAAACGCGTGGTATACATAGTATCTCGGTGCAA -GTTAGTAGGATTCGCGACGAGCTCAATATAAACGTAGTACTGTTAAGATATGAAGTTGTT -TTATCTCACCTTAGGTTCCAAATATCGGGTGATGGCCCCCGAGTCATTCAGCTTACTGTC -CAGGGCTCCACAAAGCCCAcaaagcccaaagcccaaagcccaaagcccaaCCCGACCCAG -CCCACTAAAAGGTTGGGTTCTAGGCCCTATCTTCGGGCCCACCGGTGGGCTATGGGCCTA -GTATTAAGCCCACTGGGGCCCAAAAAGCACCAAACAATACAAACATTCCGCAAAACGTGC -CTTTCAAATCACTCTATGGATAGACGGGAACATATTATTATTTCAATCAATGTATCGTAA -ATCAGTACGATGTATGAATCACGTCTCAAAATGCAAGAATACGCGCTACTTGGACAATAT -ATAGATAAGGTTCACTAAGAACGTCCACTGGTAGTGCTCGCGCTTACATCCAAAATATAT -CTTTACCAACAGAGCATCCATCTTGATATTTCCGCGATATCTCGACTGCTACCCGTCGAA -TTCAGATTTTCAAATTAGAGATATAAGTTTACTGAGCTATTGCGATTACATGTCACAAAT -TCAAACTCTAGGTATACGACTAATAGAAGGCTAGATCACAACTACCACAAGACCCTCTTA -ATGGGTTTGATAGGGGTCATATGGGCCCCCAATGATGGGCCCTGGGTTGGGTTGGACTAG -CCCactgagggctgggcctgggactgggatccgggcctgTTTGTGGGCTTCGGTGCGGTG -AGCTCCAGGGCTAGCTTAACGATTATGTAAGATCAAGGGCCGTCCACCCAGGGTCCACGC -TCAGTAGTAACTACTCAATGAGCAAAAAGCGGCGGGGAGTCAGCACTAAAGCGGAAAAGG -AACGCCATGTCGCGCGGCCTAAATTCCGGTGTATGTATATGTCATCCCAAGGCCTGGACA -CAGGGCTATACGGAATGAGATGAATAATCATCCCCTTGCTCATCCAGGTAACGACACTCT -CCTTTTACATGGGGGATTTCATAGCTTCTCAGTATCTACATTTTCCCTACTCAATGACCT -CACTACTAACAAGCACCGTGCCCTGGCACGAGGGCGAGGAGAAATTACACCACTGGCTTG -GGGTCCCTCACGGCGACAACCCGACCACTCCGTATTTGAGCCCGAGAGCTGCATATCTTG -TTCAAGAATGTCCCCTGCTTGCGCTGGGTACATTGGATTCTCAAGGCCGGCCATGGTCAA -CAGTATGGGGCGGTACAGCGGGCTTTGCAATGCCAGTTGCCGAATCGCTAATCGACCTTC -GGGCACAGGTGGACGGCAAATACGACCCCGTTTTGCAGGCCTTATTGACCGGCCATCAAA -CTCATCCATACACAGGAAAAATGGTGTCGGGGCTGGCAATCGACTTGGAGAACCGTCGGA -GAGTGAAGCTGTACGGCAGGATGGTGACAGGCTCACTCTCCGATGGAGATGCAGGAGAGG -CACAGTTGGTTGTCCATATTGAAGAGAGCTTGGGCAACTGCCCTAAATACATGAACAAGA -AGCATATAATACCTGCGCAGTCTGATTCAAAGCTGATCTCCGACTCACCACAATTGAGCC -CTACTGCAATTGAGCTGCTCTCCCATGCGGATACTATGTTTATATCCTCGTCTCATGGAA -CATCAACAATGGATACAAACACGCGTGGTGGTCCCCCGGGATTTGTGCGTGTTGAATCGA -ACAGTGCTAGCGGTGCTGTTCTTGTTTATCCTGAGTATTCGGGGAATAGGCTCTATCAAA -CGTTGGGGAATCTAGAAACCACCCCGCTGGCTGGTTTTGTCGTTCCCGATTTTGACAGCG -GGAACGTTCTTTATTTCACCGGATCGACCGAGATACTAGCCGGAAAAGACGCAGCTGCCA -TCCTACCCAGATCGAATCTCGCTGTGCGAGTCACCATCGCGGCAGCAATCTTTGTTGAAA -ATGGGCTGGCATTTCGTGGCGAGGCCGGAGATCCTTCTCCATACAATCCCAGCGTGCGAT -ATCTCGTCAATGAGAAACAAATCCCTGGCACCCAAGAAGACAGCGGCTCAGCCCCAATGG -CCACGCTGATAAAAAAAGAAAATATTACCCCGTCCATCCAACGGTTTCGGTTCCGTATCT -CTGGCCGCAAACCGATTTCATGGAGTGCGGGACAGTATGTCACACTGTCATTTGAAGACG -AGCTTAATATGGGCTATAGCCACATGCGCGATGACGATCCAAGCAGTCTAAATGATGACT -ATGTTCGAACCTTTACTGTGTCTTCGTATCCCGGTCGCGACATACCCGCTGATCAATTTG -AGATCATGGTCCGACGACATGGTCCCGTCACTGGATATCTCTCTCGCATAGATGAGCGAG -CCAGCCTAGAAGTACCACTCAAGGGATTCGGGGGAAGTTTCCACATATCGACTGGCAAGT -CCATCGTCCCCTACGTGGCGGGCGGTATTGGCATCACACCCCTTCTCGCACAGCTTCCAG -AATTGGATATTTCCCAGCTGCGATTATTCTGGTCAATCTCATTCAATGATTTGGGGCTGG -TGCATGATATTTTCCAGCGGTGGCCACAGTTGCCCCTGTCAACGACTTTGTTTATCACCA -ATGTGGAGCTCGATGACGCCGATCGGCAGATGTGGAACGGCATTCAGTCGTCTGGGGTGA -AAATGATACGACGAAGGATGCAAGCCGAGGATTTAGACCTTGCGCTAGCGGATGTTTGGT -TTCTCTGCGCTGGTGTTGCGTTAAAGCGGATGATTCTGGATTGGCTTGTTGGAAAGACCG -TGGTTTATGAGGATTTCGACTATTGAAAGGCTCTTGCAATTATGATTCTGACGTTCTCAT -TAATATTTCCATTAATTAATAACTTATTCGAAACAATTCCAAAATACCCAAGACAATCTG -CTGGATAAAAATAGATGCCGCAGCGTACGCAGCCCCAAACGTGTCAAAGTGGAGCGCCGA -TCATGGAGACCCGTATGTTGTAGGCGTAGGGGCTTCTCTGGCTGAAATCTTTCCTTCATC -TTCACATATTCACATCTCGACATGAAATCGCCTTCAGTGAAGCCCTCGGCATACCCGCCG -CTGCCATTTCATCTCATTCGCTTCCTGATCTTCCTCTCCTCAATCGTTGTAGGCGTGATC -TTGGCAGTGTTTACCTACCATCTCCACGCCGATGGCTTCAAACTACCATATTCATTCCTC -GTGGTGAGTAAACATTCAACCTATATCTCCTATAAAATACTAATGAAACCCCCAGCTCTT -CGTCTCAGCAGGCCTCTCCCTCCTCAACATCCTCCTAACTTCAATAATCCACTGCAGCTG -CGGCCTATCAACAAAACTCTCTATATCCCTCAACATCCTCCTAACTATCCTCTGGGCCCT -TTCCCTGGGCCTCCTGAGCTGGAGCGTAGcaagcacaataacaacaacatgcacaacaac -aCACTGGGGAAACTCGACCGGTATTGCCGTGTGCAGGAGCTACAAAGCCCTCTTCACATT -CACCGTCACCGGTCTGGTTAGCTACATCGCCGCAGTCTGGCTAGACGTGGTTGTGCGCCG -TCGTCAGACTCGTTTTGGTGTCTATGACCCTATGGGATCACACCCCGGCCTCGATGACTC -GGGCGCATTCGATGTCAAGATGGATGACCGCCACAGCGAGTCCACACCTGCTTTGCACGA -TTATGATAATGTTCCCCCCGCGATGTCCGGTGCCTATGGCCAGGGACATGCACAGGTACC -GCCGGTCTATGAGCGGAATTTGGAGTATGCTCATGCTGGTGATGCGCAGAATTATTATGA -TTCTGCGCCTGGGATGAGTCACCGTGGTGCACCCAGAGTGCGTTTTAGTGCCTATGAACA -GGATGGGCACCAAAGGCCCGCCGAGAATACTGGGTACGATCCTGCTATGTATCGCTGAGT -GGGATCTTGCTACTTGGTTTTCCTTTTCCTTTTTCTTTGCTTTCATGACAGCAAGCGAAC -TTTATGATATTTTTCTGTGGTGCATCTGAAGGTTCTTATTTGAATCATTTGTGTTGATAT -GGAATCTGCATGACGGCGTTCCGAGGGTCATTTACTGGGGTATGTGATATTCATACAAAA -TAAAACACAGGTATAGCAGAATACCAATGCAAGAAATCCAACGACTCTTTCATTTTGGGC -TCGTTCTAGATAAATTAAAACAAAAAGATCCTTAGATCGGCTGCGCTCACGAAGCAAATA -ACCATCCAACCATAAGATAAGATGAGATAGGACAAACCCGACAATGAAGCCGTTCGAGAA -GATCAGGGTAATTTTCGACCAAAGATGACCAAGAGTCCGTTTACATAATGACCTCGTTGC -ACACATCCTATAGGACATATTAGTCAATAGCGCATTGAAAAAACAGGAAGTAATACATAC -CCGCTTGAGCTCCTTGTAAGGCAAGCACACGCAGTTCTGCACGCGGACCTCATCGCCAAC -ACCACACTCCTTGCCAAGAATGGTGATGCTCTGCACCTTGATACCCTGCTTGACAATGCT -GGTCGAGTGGCTGCTCACGGGGATGGGTGTACCCTCGACACGAGCCCAGGCACCGACACG -GCTGCTCCAGCCGATGATCGAGTGCATGACGCAGGCATCGTGGCGGATCTCAGCATCCTC -AAGAACAATCGAGTCCTTGATACGGGCACCAGCGCCGACCACGACACGCGGGCCGATCGA -GACGTTAGGACCGAGCTTGGCCGTGGGGTCGACGGAAGCAGTGGGGTGGATGTAGACCGG -AGGCACGATGGTGGCAGAGGGGGGAGTGAGTTCAGGGGACTCGGCCTGGAATGCCTTTTG -GAGATACAGGGCGTTGGCAGGCACGGCAGAGCCGGCGGACTTGATCTGGCGCCAGAAGTC -CTTGGTTTCGTGGACGAAGAAGCGGTTGCTATCGGCCAAATCGGAGAGAATATCTTGCTC -CAGACGGAGGACCTCGCTCTTCTCGGCGTCCTCGTCGTCTCCGGTGGTGATGAAGGAGGA -TTCCAAGTTGTCGGAGGAGGGGTACGAGAGGAGGCGGGGGCGGGTTGTGCGGCGCTTGAT -GGCGCTGCGGATGGCGGGGAAAATGCACTCAGTAGCGAAGAGGTAGACACCGCAGTTGAT -CAGGTTGGAGATGTGCGACTCGGGCTTCTCGACGTAGTGCAGCACACGCTTCGTGTGCGA -ATCGGACACGATGCATCCGAAGTTCGTTGCGGTGTCGTTTGACACCCGTGTACCCAGAAT -CACAGCCTCGGCATCCTTCTCCTCAAATAAGCGCATCATCTCGCCCAGTGGGAATGAGCA -GCAGACATCGGCGTTCAAAACGAGCAGCCGCTCGGGCTTGCCCTTGAGGATGGCATCACG -GAAGTGGTATAGCCCACCTGCGGTGCCCAACGCTGTGTATTCGCGCAGGTAGAGGATGCG -GAGCTGGGGGAATTCCTTCGAGGCGTCCTTGATAAAATCCCGGAATACACTCTCATCGTA -ATATCCAACCAACATCACTTCGCGTACATCGGGGACCTTGGCCACCGCCTTTAGGCAGTG -GTGGATGATGGGGTGGCCGGCGACTTCGAATAGGGGCTATGGAAGTTGCAGTGTCAATTT -TTGTCCGTCATAGAGTCTCACGGACATTTAAATGAGATGCAGCTGCATACCTTGGGCACA -TCGAGTGACAATGGGCGGAACCGGGTGCCTCTTGAGGGGCCTCCGACCTGTGGACAAGGT -GTCAGCTATTGTTTACCGCGAAGATTCCGTCACTCACCAAGATCACGGCCTTGGTGGAAG -CCTGAGTAGTGTGATTATCGCGCATTGGTATATTAAGCATGACGGCAAGGAGATGTGAAG -AGAATATAATTCCAGGAAAGGCCAGAGAAACACCGAAGCTCCCAGACGCCAAGACGGTGG -TTTCAGTGGGGCACCTGACAACATTTTACTCCTTTCGGCCACTTTTCAAtgttttgtttg -atttaatttatgtgatttatatgaGTGTAAATGAGTTTATCTTGACTATACGTATCCTAC -AGATATATCCTGCTTGACAAGGGCTATCTTGACAGACTCTGATCATCTGGATAGTTGTGG -AGTATGGGGTTTGTTCCTATACAACCTACCCGAGCAAGGCTTGGGTCAAAGGTAAGAGCA -GATAATATTCCATTTACCGGAGCTCACGTTCTCGCTCCTCTTTTATAATTGAGCATAGAC -TATAGAGATCTTAACTGCTCATCAAGCACCTATGGAGAAGTCGAACTTGAAAAGGTTTCG -CTTTGCCGAATTTTGCATTTTTGATATATGAGACCTTCGGTATAGAAACATCCAGACTCA -TTTTATTGCTCCAAATGCCGATTTTCCTCACCAGCAGGTTCGGCCATCGTCTGCATCGCT -TGTAGAGTTGGGCTGATACCGTATGCAACTGCCATGACAAGCCAAAAGAGCCGAAGTGGT -ATGCGTCCGTTGTTAAGTAGGTCAAGCTCCGACCGATGTTCGTGGGATTCCACCATTTTC -TAATTGATGAAGCCGAAGTGTGAGAGGTCGATTGGAAAGTCGCAAAAGCCCATCCGAGTC -GAGGGTCCAAGATCCTATACCGGGGAGTGGTATCTGACTTGGTGAAAGCATCATACATGA -AAGCCCCCAAAAGAGGTTTTTCTTTTGACCTGGTCATGACGGTCTTTATCCCATGTCTCA -AAGCATTTGAACTTCTGAGTTGCCAATGTAGTCCATCACCAAGTATATCGAGTAAGGATA -CACTTTGATGAGATATATAGGGGCAGGGCAAAGTGAGTCCAAACAGCCACATGATAGATC -TTTTGAGATTCCAGAGAAGTCTAGTGCTTAGGGTGATCTTCTCTGGTTTCGTAAACTATA -TGAAAATATGTCAGAAATCATGTTCATAGCCAACTCCACATGAGGTTCCGTACACTTTGA -GCCCTAACAAGTCCGAACCCCCACAGTTGCGAAATCGATATCTCCGGACAATGCCTATGT -ATCCAAATAGATGTTGCGGTTTCACAGCGAAGTTCTTCGTCCACATTGCAGGGGTTCTTC -CATTCGTTTACATCGCCAGTCCGGCAAGTATCTGAAGGACCTAGACCAAGATGATGTGAT -ACAATACGCTCAACCTTATCCCGATTGCTATACAGCAGCACGAAGAAGTCAATACGCTGT -TCCGGGTAATCCAAATCGAGGAGAACACTCTCCTCTTGCTTCAGCGCTGACTTCAAGGTG -GTGTTTACCCNNNNNNNNNNNNNCCCCACCCCTTCAAAAAGGCGGTCCGATATACTCTTT -ACGGCTTGAGAGTGAACCTGGGCTATCATGACAGCGATGGAGGCCCCTTAGCTTATGGGT -TTAAGTATGTTCACGAAACATATACAGAAAAGCATGATTGCATTTGTTCATCACTGAAAT -CAACTTTGGCTTGTCATAATTAAGGTCCCCCTCAGAGTGCGCCAATGTGTTGAGTGTCAA -TAGCGGCCACCTTATGATCTTCGCTCTTTGAAGTGTTCTCGGGCTGGCTGAATGCCGCCA -TGACTCTATTGCACGAATGCTGGCTAAGGTTATGCGAAATAAAAGAGATATGATGTTTCG -GCTTGTCACAAACCTGATATAACGGTAGTCGGGACTCGGGACATCAACGAATGTGATCTG -AATACGGCTCTGACTTGTTACACATAGCTGTGGAAGTATTCATATTAAGTCGATTCCCTA -TCAGTTTCGGTGCTTATCATCGTCCAACCCTTGCCTTTTAAATGCGAGCTCCCCCGATTG -TTAGGGTTCATATCTCCCATCGCATAGTTCCCATTACTATGGCGACGTGAATCACCAGCT -GTGATAGTCACCTTGTTCTCTCCACAAATAAGCCCTGCCTCACCAGCATCAACTTGTGAA -AGAGCTGCAGTTCCAGACTGTGTCAAATTTGATTTTGAGAGTGAGCGAGCGGTTGAAACC -ACTCCCTTGGTGAAAATTTTGACAATAGGGGCGAGGGTAGGTAAACAGGCGGAAATGACA -CCTGCAGCGCTTTCAACGACACACCAAGTTTGCGCATCGGCGATGGTCCAAGCCACATCG -GTGATATCGAAGATAAATATGAGGCTGAAGCGATATATACTCGCAATTACGACGCTAAAT -CTTGTAAGCACATGCCCAAATATCCTCTTTCCCGTCAAATTCTGCGGGAAAAGCCCTGGA -AGGCATAGGTGGCACTTACAAGCTTCCCGTCAAAAACACGCATATAATTGATACACGCTG -CCACAGACTTGCCTGCAATCCCCAAATGAGTTTTGCCGGCATAGCCAAAATACAGATGTC -GATCACAAAGTTCGGTACCCCGTTCCCAATCAATGCCCCCTTGAGAACGATGCAGTGACC -AGGAAGCATGGGATTGAATCCTTTGGCAATGGGCCAGCACTGGAATAAGGCGAGAAAATC -CATGGCAATCACCCAAGCGAGAGTAGTTCCACCAAGGATCAGGCCACCGATTTTGATGAA -ACGAACAGGGAAGATGCGATAGTACATGAGCAGCACAGCAAATTTTATAATACCGGCTGT -TGTGATATACAGCGGCTCAAAAGCGTAGATCAACTGTTGAGAAGTATAATTAGATATCGC -CATTGATCACGCTCTGAAGTTGACCAGACGTAAGTCTATTCCTCGTACCTTGAAAACCAT -AGATTGGCCCTCGGGAGGGATGGTATGAGTATGTGTGCCCATGCCATATTCAATACCTGT -CGATATGGTTAGTTCAAATTACTCTTGCAACACGCTGGCAAATTCTCAATAAAGACCTAC -AGGCTAGGTTGATTCCTGCAATAATCAACGTAAAGATCTAGGATGGCATTAGATCGGATT -ATCACAGACGGCTGGATGCGAAGAGATCAAGAGCTACGTACCAATCCCAGAATCAAAGTC -CAGTCATCCGCTCCTAGGTGAACATGTTTCATACCCCGAGCAACTAGTCTTAGCGCCAAG -AAGAAAATAGCTAACACGGCAAAAATAATAGTCGTGATGCGCTGCTTGGGCTGAGTACTC -TCATGCGCATCGCTAGACGCCATTGTTATCGCATGGCCTTGCAAAAAATAGCTAACAGTT -GAGCAATGTTCTTGGAGAAGCAGGGGTAGCAGATCTTGATTTATTTAGAATTGTAGTTCC -CCTACGAAGGATTTGCCATGTGAATCCTTTAGCCCTACCTTCCAAGGTGCCAAAGTGGGA -CTTATCCAAGATGCCAAGGCCATTTTACATTGCCAATTGCCTGAATAGGTTTAGCATAAC -ACTATAGAAGTCGATTCAATAAAAATTTCCACGGAACAATTTGTCTAGTCCCGTGGGCAA -TGTCAATGCTTAATGCTCATTGATCGATTTTCCTCATCAAACAACGCTACCGCGAACCGA -GGATTCATACCACGCCACTTTGTCTAGTTACACTTCGTACAGGGAGCATCGAGATAGGTT -AGGCCTGAAACGAGGATAAATTTTTTGCTTGTATTCCAGAAAACAATGTGTGAACCTACG -AGTACGCTAACATGCCGGCCATTCCACCTTTGGGGTTTTCGTAAATAGTCTTTGTTTCTG -GAATCATTCTCATAAGACATACCCGCCTCATAATTCCAGTCTATTCCAATGACGAGCCCA -ACATTCGTATTTTCCCTGGGTGCCTGGGTTGTTCCAGCCGTCTTTGAAGCCACACGCTCC -CGTCTGGAAACCCTCGGGTTTCCCTCCGAATGCCCCGCGCATCCTTCTATTGGCGCGGAG -CCGCCTTCTAAGACTCTAGCTGATGATACTGCCTCCCTGCGCAGCGTCCTGACCAAGCTT -GCAGACGAAGGGCGTGATTTGGTTGTGGTTGGACACTCTTATGGCGGCGTGGTTGCCTCC -TCGTCCGTCGAAGGACTGAATAAAGCTACTCGTGTAGCGGAGGGCAAGACTGGTGGTGTT -GTGAAGGTTGTGTACCTTGCTGCATTTGCTCTAGACAAGGGCCAAAGCTTGTTGGGTATG -CTTGGAGGAAACTATTTACCCTGGATGAAGGTCGAGGTAAGCCTATCCCAGAACTACCTG -CTTTGTACGCCACAGGAGGACAAAGCTGACTAGTACATAGGGTGATTACGTCCAGGCAGA -TGGTGCCGGAGAGATTGGCTGGCAGGATATCTCTCTCGATGAGCAGGAGAAGTGGAACTC -GTTAGCACTACACACTTCGCGCGCGGTGTTCTCCGGCGAGTCGACTTATGAGCCCTGGAG -CGACATCTCCTGCGCCTATATTGTGTGTGAGCAGGATCGTGCCCTGCCACCCCCATTCCA -GGAACTATTTGCTTCTAAGATGGGTGGGCCGGAAAACACTTATCGGCTGCCTAGCTCGCA -CTCGCCGTTCTTGAGTATGCCAGATCGTCTAGCCGAGGTCCTACAGCAGATTGTTAAGTC -ATGAGTGACTTTTCAATTATGGCTTGACTCAATTGATTTTGAAACTACTTCTCAAATCGC -CCCATGCATTATTTAATATATCATTGTGTCTCCATTGAGCGAATATCACAGCCGACAAAC -TTATTTTAATTGTTCGCCTCAAGCCCCCCCCCCCACCTTGGATGTGAGACAAATTACCCC -CAGTCGCCCCCAAATGTACGGAGTCCTCGGAAAGTCTGTAAATAGCCCCTGAAAAGATTT -GATAGAAATACGGCACTCGGTTGAGACACACGGAGTAGCGCTTCGGCAGTTGAAACTAGA -ACAAAGTACGAATCCAAACTCAGGCTAAATGCATAGGGTATGCTGTATGGCCGTTATGTA -CTACTCCAAAGTGGAGAAAGGAGTAATATCTCACCAATCATATATATGATCGCTTCCAGG -TTGCTCCCACATAGACGGTTAGCATTTTGGTAATCTCGTCCCAATGATCTGGCCCTCGGT -AGATGTTTCCGAATATACGGAGTACCCTGCCCTCAACTGTAGAAGGATGATCATTTTCGA -AAGAGCAATCTTTCACATTGAAAAGTCCATCGTATCCTATATGTAACACATGATAAGAAT -GAAGTAAAAGAGAAATCAAAAACTGACCCGAAAAAGAAATTCCTCTCCGAAGTGGGTGCT -TGTATATATCCCGGAGATCTGATCCATGTAAATCGCATCATCTATCAACCGATACATCAT -CAATTGATATGAATCTACTCCACATGTACATAAATCCTGATTTACACAGATATTTACTCT -GTTCATTCCTTTATCTTCACATCTTTTCTTTGGGCCTACAAGGCCCAAGAGGTACGAACT -ATCTACGGACTAACTTGCTCTCATTTCCCTCATACTTCGCTCTCGGTTTGTCTCACTTCC -ATCCCACTTTCACCTTACAAAGTTTGACCTTATGATCCATGATTGTATACCCGTGTCTTG -AACCAATTCATCCTTCTATGGAAGGTCACATGGCCCATAGTGTTTGCTGCCGGTCATCTA -TAAAATAACTTTCTCTCTTCAATACATCAAGAATTTCTAGATCCTACCATCAAGATGCTT -TATTGAAGGGATCTCTCTCGATGATACTCTTGTTTCTGTCAAAATTTCTGATCACCTTGC -TGAGGGTATGTGGATGCGTATGGAAGGcgatgatgacgacaatgacgaagactatgaaga -caacacaaacaaagaggacgacATTCGAAGAGATCATTCCAGCATCCACAGAGGCGATTC -TCGGATGCTCGACGCTCACCTACAAGCACATCACGGCTCTACCCATAATTGACCCCAAGA -GGAACTTCAAAGCCAATGTTGTTATTGCTGCCAAAGCTGCGAAAGACAATGATTTGATGG -ATGTCGACTAACTGTGTTGGCTGCTTACGATCTTGTCTCGCATCAATGCGTGGACCTGAT -TCGCTTTTCTTCCTATCCGCAAGCTCTTTTCCACTATGTCTTTTCCTGTTTCGATATCTC -TCTTGCTTTCTGATACTATGTCTTATCTAGTTTGCTTCACTACAATGAGTTCTTCCAATC -GTTTCTACACGACCCCACTCAAATCCGCATTCTCGGCTAGTTCCAAGTCGTTTAAAATGT -ATTCAGGCCAGAATGGCGCCCCCTCGATCACCACATCAATCTCCTCACTCCTCATTGCAA -AGCGCTGAAGGAAACTTAGCGATCCAGTCGCCGGCCAATACCTTTCGTTCCCCGGGTGGA -TCGACAAGTGAGCCTTGAGCGTACACACAAGTGGTTGGCACGTCGAAATCATTTGGTATT -TCCATTGTTTGGACGGCTTGTACAAACCACGGAGTGCGTCTCCAGACTGTCTTTGCTCAG -AGGAGACTTCCCTCGGATTATGATTGTAGTTCTCAGCCCTGAAGTTCTTGAATTTACTCA -TTCATTCTCATTCTCAAGGTCATTGAAATTCATCCTCTAGAGTAGATACCGTCAGAATTG -CACATCTGTTAGGCAGCCCTAGCTCTCGAGCAAATCTACCTTCGAGTGAATATACCCCTA -GAGGGGTCAATCATGACAGGGCCGCCGTAGATTCAATGAACATATTATCTTCGTTTCACC -ATATATATACGTAGCTTCACTTGTTATATCAAAGCGGGTGCCGTGTGAAATACTACCTTG -TCATCTAGATGATCGTTTCTGGAATGGCTCTTACAACCCCAACGTATGTAGATACTGGAA -ATTGAGATTAATCAGTACATGTTCTTTTTTTGTATTTTTTGCTATGCTATTCATTTATTT -TTACAGTTTTCCACTTGATCCACATTTCCATCACTCCACCATATGCGAAACATTTAGGGA -AGATGACAGGCACTCCCCAAGAACAGGTACCTCAAGATTTGGCTCGATCAACATTGGCAT -GGGTGGCGTAAGTCATAAAGACCACAAGGCAAAACAAATTTGTAAATCAGCTCCAATCCT -CTTTGATGTCTGCGAAATCAGTGTGATCAACATCTTCCATCTCCATGTCCTCATCTCGAA -CAGAGTCCAGCCATTCTTGAGCTTTGGAAAACTTCTCTCGCTCTTTTTGTAGCTGAGCAC -GGCGATGCTGGCGTTCAGGGTCCTCCGTCATGAGATCGAGACAGCGCGTACGACCTAGAT -TACTTATTAGTACAAGTGCTAGCGTCATTCTCGACGTACAAGATGTATATACTCACCGTT -ATCGCCGAATATCTCCAGGTCATCACGAAGGGTATTGACGAGGCCCGCACGGCAAGCTCG -GAACAACTTCATATGAGTAGACTGGCACAAGACGCCCAAGAAACGAGAAGACGCGATATC -GTAGTAGGCCAGCGAGCTCTATGACAGCAATTAGTCAACGCTAGAAAACGTTCCCTAGAA -GACTTACGGCCATCATCTCAATCTCTTGAGAGTACCTATCGGGACCAAGGTGCTCGAGTT -GAATCTTTTTAATCTTGTCGGAAATCTTTTCGTCACATGTATCATGGCCACGGATTCTAA -GCCAGCATAAGGCGCGAGCGTTGTGACGACGAGATTTCAATATAGTCAAAGTCTGCTGCA -TCAACGCTTGGTGTTGGCCCTGCGCCATCGTGAATGGAATCTCGTTCTCTGACTTGCAAT -AATCCTTTGCGATTTGGAGATGAATGCGGCGAAGCTTGCCCATGAAATCTTTCAATATTC -TACTGAGCTCCTGATAGAGACCAGTCTGGTGGTACTGTGCAAATTCTTCCTCCAGAGCTT -CAAGCAGCATACTATGCACAAGATTATGCGTAGCCTTCAGGAAAATCTCCATGAGCTTTC -CCCAATGTTCAACACTTCTTTTGTTCAAATTTTCAATCGCACGTGGGTCAAGCTGGTTTG -GTACACCGACTGTGTTAGACCGAATCTTGAGTTCAGTAAGCTGTTGGAGAGTGATTCGAG -CTGACTTGGAACCTTCAAAGTAGGAGGTGCTGTACACAGTTCTTCTCGGAGTTGCTGATT -CGGATGTATAACTAGCACTCGCCGGTGACTTCCTCTTCTTGGGTGTTGATATGTTGACGC -CTACAATCTTCATCTCACAATCTGAATCAACAACTTCAGCCAAGTCCTCAATGTCCTTCC -GCGCAGCAACGTCCAAGATAGGACGTGTCTTCGCAAGTGCAGTCTGCAAATCCATAACAA -TCTTGTTCCACTGCTCTTGCAGCGGCTTCCGCTGGGAAAAATTATGCTCGATGCTAGGGG -TTCCATCAAAAAGTTCATGAATCTTCCGTTCAAGATCCGAGGTTTTGCCCCAAAGAATGC -GCTGCACATCCTCAGTGGGAGGGTTAGGCAATATCTGAAGTTCTGCGTCAATGGCTTTTG -ACTTTTCATCAATCTGGGCAATGATAGAGGGCAGCGATCCTAAGATCTGACGCATCAACA -GGTCAGACAACGCCGTTTGGAGCTGGCGGGTACCAAAGCGGTCTTTGAATGCTGCGAGAT -CCCTTGTCCACAGTGAAGAGTTAAAGAAAGTTTCTTCCTCCTCCCTGGCTTGGAAGTGGC -TGACATCGGGATCAGGATTATTCCGAATGACATAATAGCCATGGCCAAGCTTGAACTTGC -TACCATCCAGAATCTCATGCCATTGCTTAAAGGCTCCACCAGATCACGGCAGATCGGGCT -TGGTCAATACTCCAAGCGTACGCTCCTTTGCGCCTTTGATGTCTCGGACAAGGCGAGCAG -CACTTGAATTCGTAGCATCGTCTGTCATAGGCAGGGTGAGGAGTACAATGCAATGCTCCT -GAGAAACGTAGTCCTTCACGAGGTTCTCGACGAGATTTACTAGATATTTCTCATTATCCT -GTTCCGTCTGGCTAATCACTCCAGGCAGATCGTAGAAAGATAAGTTCGGAAGACCTGGGC -CGGAGATATCCAAGCGGACAGCGTTCGGAGAAAATTTGACCTGTGGAATATGTGACATGT -CGGCAATAGTTTCAGGCACAAAATCATGTGAATCTGTGCTTGGGTTCAAGATCGCCAACT -GGGCGCACAAGATAGCCTTTTTAACTTCTTGTTTATTGTCCAGGGTGACGAAATGTTCAT -CATCCTGGCCGCCAAGGGCAAGCCATGGCCCCAAGGGCTCTGACTTTTTTGGTATCTTCA -CGCGCTTTGATGTGTCAAAGATGTAGCGGCGTGATAAATGAATCACGCATTTCCAATTTT -CGTCGCTCTCGGAAAGATTTATTTCCAAAGGGCATCGCGTGCAGGTTCCTTCGGATCTAG -GCACCTTGATTTCGGAGATACCTTCGATAAGGGAGCTTTTCCCGGTGCTCTGATCACCGA -CGACACATATCTTGGGAAGCATGATTTTGCTATCCTCGATACCCAAGTGGCTGAGGTCTT -GAATCTTTTTAACCAGATTTTTCATGTCTCTGGTGAGATGTTCAAGAGAGGCAGCGGGAA -TTGGATTGGCGGAATTAATGTCAGGTAGGCCATGAGACACCGGCATTGCAACAGCTAAGC -CAGCTAGAGAATGGAACAACTCGTTAACAAGGTGGCGATCAACATCAAGAGAGACCAGAC -TCACAAATCATTGGGTTGGGTTCTGACTTCGGAACCCGGCCATACACGCCTGACATCGTG -GTAGACTAGGGCTGAGGAGGAGTCTGTTAGTCATCGAAGCAGGAAGATAGGAAGCAAGAG -GCGATGGCTATAATCACCGGCTGAAGGAGCAGGGTAAATGAGACATACTAGGAGACACAA -AATGGTAGAATGAAAATCACAGATAGGTATGATGCTTGAAACAAGTTGACCGAGAGGGTG -GAGGTCACCACAGCTCATGAAACGATCAGAGTTCAAAGAGATAGGCAAATTTAAATGGTG -CAGAAACAAATAGACTGGTGTCAGCAGTTAGTGGGCGCGTTCGAGGATGAGGATATGGGT -GATGAAGAAATATGATGAGGAAAGATAGGCAGGTGAGAAAGGAAGAGCGACAAGACAAGG -AGGGTCTGTAAAGAATACACGCAGAGTGGTTGCTAACGGGAAATAAACAAGAAAAGAAAG -TACGCAAGAGCTCACCAGCTACACGAGGGCAAAAGAAGAGATGGAAAACAGTCCACACCG -CGATGTGGCAAAAAGGTGCGATTAAAATCATCTTTCTTCCACATGCCAGATGCCACATTT -TGTTAAATTCATTTTAACAACTTGAAGTTATACAATTCAATGACCAGCACCTGCAGTCAC -CAGCCAATACATGAGAGAACGGAGCAGGGATTGTAACAAGCGTAGAGCTAAGTCCCCCTC -ACTAGAATATGACGTTAGCGTTAGACAGTCAATGACTTGCCGTCTCTTTATGAGTCAAAA -AGCTGAGCTTGTTGATCCCCTGTTGTCTCAGAATTGATACTGCCAAGATGTAACCCAAGT -CCTTTGGTGTTTTGTCACAATTGAACATCGTATCCATCTAATAGATAAAACTCTCTCTAA -TAGCAGATAAACTATGAATACTTTGTAAATATAAACATAATTACCTACTACAGGGAGGAC -CACCATCAAGCAGGCCTGCGCTGCTGTTGGCAATGCCGTGGTAGACAGGGTTGTGATGAC -TACCCAGGATTGTGGCCAGGTAGTCCCCACTATATTTGAAGTAAATATTCTATATGAATC -AAGAAACGACCCGTCTGAGGTCACCTTTAAGCACATATAGAGTGCCATTGGAGGAGAAAT -TGGTATTCTTGGAAAGACGATATAGATATGTCCGTGCCCTAAACCCTACCTCCCAATTGA -GTATCCGTGAACTGTACTCGACAACAGGGGATAGGAAGTCAAGTCCGGACATAAGAGCGC -AATTTTTAGATTAGTATTGTATTTTCATGCGTACCTATCTTTTCGCCATTAATAAATGAG -TGAAGTCCAGAATGAGTCATGAGGGTTGAGGTCCCCGCGTATATGGAGAGTGATATGCAA -CCGTCCAATCAAATGACATGCCGATCCCGCCCAAAAATTTAAGTGCCTTATCCCACCATC -CCACCATCCCTCTTTTTGCTCTCTGTAACCACACATTATGGCGGATTCCAACCCTGTTCA -AGAGGCTACCGCCTCCATTGCCAACCTTCTTCTCGACGATGTCACCGGCGAGAAAGTCTC -CAAGACCGAATTGAAGAGACGCCagaagcagcgtgagaaggacgccaagaagaaggagaa -ggagGCTGCTGCTCCCCCCAAGCCTAAGGCCGAGAAGAAAACATCTGCCGAGGAGGACGA -GGCCAACTTGACTCCCAACGTTAGTCTTTTTGGTTCCTATGTCAATTAATGGTTTCCGTG -AAGCTAACTAATGTATCATTCTAGCAATACTTTGAGATTCGGAGCAAGAAGATCCAGAAG -TTGCGCGAGACCAACCAGCCGGAACCCTACCCCCACAAGTTCCATGTCGATACCGACCTC -CGTGATTTCCTCAAGAACTATGAGGGCCTCCAGAAGGGCGAGCAGAAGCCCGAGGTGACC -GTTCGCGTTGCTGGCCGTATCTACACCAAGCGTACCTCCGGCAACAAGCTGAACTTCTAC -GACATCCGTTCGGAGGGTGTCAAGCTCCAGGTTATGTGTCAAGCTCAGTTCGCAACTGGA -AAGCCCTTTGAAGAACAGCACGAGCTGCTTCGCAGAGGAGATATCGTGGGTATCGTTGGT -TTCCCCGGCCGAACCAACCCCAAGAACCGTGACGATGGTGAACTTTCTATCTTCGCCACG -GAGGTTGTTCTCCTGACACCTTGCTTGCATGCTATTCCTTCTGAGCACTACGGATTCCAG -GACAAAGAGCTGCGCTTCCGTCAGCGCTACCTGGATCTCATTATGAACGACCGTAGTCGT -GAAATCTTCAGAACTCGTGCGAAGATCGTGTCTTATATTCGCAATTACTTTGACAGCCGT -GATTTCATTGAAGTTGAGACTCCTATGATGAATGCTATTGCTGGTGGTGCTACAGCCAAG -CCCTTCGTTACTCACCATAATGATCTCGACATGAAGCTCTTCATGCGTGTCGCCCCCGAG -CTCTACCTCAAAATGCTGGTCGTAGGAGGACTAAACCGAGTAGTAAGTCGCTCCCTCGAG -TTCGTATGCTTCTTCTAACAAAATTCCTAGTATGAAATGGGCAAGAACTTCAGAAATGAG -GGCATTGATCTCACTCACAACCCCGAGTTCACCTCCTGCGAGTTTTACCAGGCTTATGCA -GATGTGTTCGACCTGATGGAGACGACCGAGGATCTTGTTTCGGGTCTGGTCAAGCATGTC -ACTGGTGGCTATGAGACAGTATTCCACACCCAGTCTGGTGAAACCTACAACGTGAACTGG -AAGGCTCCCTGGCGGCGAGTGGAGATGATGCCTGCTCTGGAGGAGGCCTGCGGCGAGAAG -TTCCCTACCGGTGACCAGCTCCACACCGAGGAGACCAACCAATTCCTCAAGCGCATTCTG -AAGAAGATGAACGTGGACTGCTCGGCTCCTCAAACTAATGCTCGCATGCTCGACAAGCTT -GTTGGCGAGTTCATCGAGAATACTTGCATCAACCCAACTTTCATCACCGGTCACCCCCAG -ATGATGTCACCACTGGCCAAGAAACACCGCGAAATCTCCGGTCTTTGCGAGCGATTTGAA -GCCTTCGTTTGGTAAGTTCTCACCTACCTTATAACTACCTGTTCTCCCCAAACTTCCCTG -CTCAAGATTGGTCCAAGTTTGCTTTGCCCAGAATCGGCCAATCAGCTTCCAGGGAACTAT -CAGCTAACTCAATGTAATTATAGTACCAAGGAGATCACGAACGCTTACACGGAGTTGAAC -GATCCCTTCGACCAGCGTATGCGCTTCGAAGAGCAGGCCAACCAGAAGGCTCAGGGTGAT -GACGAGGCTCAGATGGTTGATGAGACTTTCTGCCAATCCCTCGAGTACGGTTTGCCTCCG -ACTGCTGGCTGGGGTATGGGTATTGACCGCCTGGTCATGTTCCTTACCGACAACTACAGC -ATCAAGGAGGTTTTGACCTTCCCCATGATGAAGGATGACAAGACCGCCGCCGAGCCCAAG -ACTGCTGCCGAAGTCGCCGGCGTCGAGCCCAGTCCCGAAGAAGGAATCCGTATGTTATTA -ATATCAGCAAATTTCCACGAGCCGGTATCACTGACGATGAATATACAGCCCACAAATGAA -TAAAGTGATATGATTGCTCAACCTAATGTTGATGCTATTGATGTGTGATGCGTCTTGGGG -GCTTGCATGCAATTCTCTGTCTTATGATAAATGCAGCAGATAAAATAGGGTTTCAAGAAC -ATCTTGGCTAAATAGCCAGTTTTCAAAGAATATATTTCCGTGTTTGTACTAAATCCAAAA -CCAGAGTCGGTATATACATGTAGGCAAGTCAGATATCTCCTCGCCAGCAGACAGACCATG -GACGTCTAATCTATATTTAACCCTAAAGAGGTTGGGTTGACCGGAGGCTGAGCAACCGCT -AAAGTGATCAGGTATACCTGGAATGTATTTCGAGAGCTCCGATCTGTTTGTTTGCCCTTA -AGCGGCGCTCTCCTCAAACTACCAGGGGTCATCCGATTCCATGAGACTGTATATAATAAT -GCCTAGTACAACTTGCATCTGAGAACCTTCATCAACCCCAACAAGCCTTTTATCTACACC -TACGACATATGCCTTTTCATTTTCATTTCGTACATCCCCATTCTGTTTCTTTTCCGTTTC -CTGAAAGCAACACTTCTTTCAAATCCTATCAAGTGTCATCAAGCGCAGCTCCATTGGAAC -CATGTCCCGTCATACCATGCCTCTCCCTCGCATCCCAGAGCTGAGGCTCCCGTCTTTTCG -AGAGCTCGACGAGTCTATCGAAGCCAACAAGCGCAGCCAAATGAGCTCGCATCTTTCTAA -CAATACGTATGATGGAACAAACAACGGTTCCTCGTCTTCACCACAAAATATCAATACCAC -TCCCAATCCATATCCACCTTATCCTGATAATGGATGGCTTGTTCCGCCTGACCCCAGCCA -GAATGTTTGTTACCATCCATCTCTCCCCTGAGGCATGAAACTAAAACCTAGTTAGGCTTT -TCGAGAGCCGGGATTTGCCTACCAAAACGTGCGTGCAGAACGATAGATACACAAGACCGA -GTGTCATACAAACTAACACTCAAACAACAGGACTTTGACGGCGTGCTTCAACCAGCCGTG -AACATGGACAGAGGTGAGGAAGAGGGAACACAGACCTCCATCTCAAGCCAGAATTACAGC -TTACAAGGCCATCACACGGAGATAAATCAAACCCATCAACTCTCCCTGCGTCTGAAGGAA -GCCCAGCCATCACCAAGTCTCACGCAGTCAACTCTCAGCTCCGGGCAGACGACACTCCGC -GGACCGCTCTCTCCAAGCACCCCAGAGAATGTCTCCCTCAACCAGACTCGATCAATCTTG -ACAAACTGCCAGCCACCTAACGAGCAAGTCCGTCGCGAAAGGGCCAACCACGATGCAGGC -GCCAACCACCGAGTCCAGAGACGACGAGTTGGCGTCAATTTCTCGGAGGTGGCAGCGCAC -ACCGCAAAGTGCGATGTTTGCAACAAGCGCAACAAGAACGGCATGGCTCGTTGCCAGAAT -TGCGGTTGGCAAATATGCCGCAAGTGTCTGACCGATCGAAACGGAGACCGAGCCCACGCT -TCTTTTGGGGCTACTCATGTCCCCGAGAGCGGCGGCGATATGCCGCTATCCTTGCCATCT -GTGAATGGGAATCAAGAGCGTCGATCCTCCGCATCAACAGCAGAACTGAGGGCTGCGAAA -ACTCTGCTTGACCTTGGATCTTTCCGTAATGTAACTGGGACTACTAGTAGCGCTGGTAAT -GTAGCAAACAGCGCAGCTGGACAGAAAGTGTCTGCACGTGGCCGGGGCTTGCAGCGGCAG -GTTGATACTCTGTCCACTGATTCTGATATGACATTGTCGATTGTTGGAGATGAGGGGTGG -CCTCAGGATGAGTCTGATGTCCCTATTGGTGAGGACGGTCTGCCCCTTGGATATGTCATT -ACAAGGCGCAACCCTGCGCGTGCTGCGAGGCCTTCTACGAAGATGGCAGAGTAAGAAAAT -ATCACCAAAGTCTGGAAAATGAATGTAAGAAAACGGTGGAAGGCACGCATGAAAGCATGG -AAGATGAAGCCAAGAATGAAAGGGGTGAAAGAAGAGGAATAATAAAAAAGATTGGTGCAA -TTGGGGAATATCAATTTGGATTTTGGCAGGTCAGCCAATAAAGTTTGTTTAATTTCAAAA -TGTGTTTTTACAATGCTAGATAGTTATACAATAAGACTCAATAATTGCTGGAGTCAACAG -ATGAATGGAGATGGTATCAATGACCAAGGCGATTGATTATCAGAATCAATCCTGGTCGAC -ATGTTCATACACTCGCGAAGCAACGCTTTTAGATTCTCTTCACAGCGGAGGCAGCAGTGC -TTGAGAGCTGCTGGGACCGAGCTTTCACAGCCATCTGCAACAGCTTCGCTGTACGGGATG -GATGCCAGACTCGGCTGCCCAAGATCATGACCCAGGTCAGGAGGAACAAGCTGCCGGCCT -GGTGGGCAGAAGCGAGGGGAGTAGGAACGAGGTAGAGGAGAGTAGTAATACCAAGTCCGA -CTTGGCAACACGCGAATGCAAACACGCCGTGGACGCCCTTTCTAGCAGGTGCCGGTAGGA -AGCGCTTCATAGTGGGTGAGCGGCGCGTGTAAGCCATCAAAGCGCAAATGACGGTGAAGG -TGGTCACTGCGAGGACACGGTGGTCAAGCTGGACAAGAGAAGGATTCTCGAGCATGTTAC -GCCACCAAAGATCGGAGCGGTCCTCGTGGCGGGAGTAGCGGGCATCGAACAGCTCCTCCT -TTGGAGGCGCGAGTCCTTTGCCCATCCATGGGAATTCGTTGTAGATAAGTCCAGCATCAA -GACCAGCGACAAGACCGCCGGAAATGACAGTTGTGAATACCAAGAGAGCGATACCGGCGA -CTGAGCGGCGGAAGAAGGCCAGCTTCGGGTCGCGCAGCGCATTCAGCTGACTGATTCCAG -CAGCGGGATCAGCCATCAACCGGTGGGATCGCAAGATAGCAAGACCGTTCCACAGCATGG -CAACGTAACAAACGAAAGCAGCACCCAAGTGAGCAGTCAGACGGTACTGGCTCACTCGGG -GGTGACTGCCCGGAGCAAAGAGATCATCCTTCAAGCCGGACTTGACCATCCACCAGCCAA -GGAATCCCTGGAAACCAATGAGACCAGCAATTCCGAAGAGACGCAATGCCATAGGCCTGG -ATACCTTCTTTCGGGCGACGAAGTAGACAGCAGGCAGCACAAACGAGATTCCGACAAACC -GGCCCCAGAGCCGGTGAATCCATTCCATATAGTAGATCGACTTGAACTCGGATAGAGTCA -TGTTGGGGTTAAGCAGTTGAAATTCGGGAGAGGCACGGTACTTTGCAAACTCATCCTCCC -AGTGTTCTGCGTTCATTGGGGGAAGGGAGCCGGTAACAGGTCGCCATTCGGTGATACTCA -ATCTGTGCCAAAAGTTGTCCCGTTAGCATTGGAAGCTTACTTATTATGAGAACCAAGCAT -GACAGTCCACGTACCCAGATTCTGTCAACCGTGTCAAACCACCGAAGACCACAATTCCAA -AAACACTGGCTGCGCTGCCTAGAAGCCAGTAGCCAACGGTCTTGTCGCTAATTTTGGGGA -AGCTCGATTTGCCTTTCGCGGCGCCCTCTGCGGCCGTCTCTGCAACGGCCGGTGCAGCAT -TTGAACGCTTGAGACTTGATGTAAAGAATTCGCGGTTTTGATTGTTGAGAAGGGTCGATT -TGGTGGGTTGACGGAACGGAAGGGCACCGAATTGGCGGATTGATTTGGCTGCGTAGCCCT -GACTAGGGAGGGACTGCGATGTGAAAAAGTCCTGAGCCAAACGAGGCGCAGCACTCCGCC -ATAGCGACAGAGAGCTCCTCAGAGAAGCCATGGTTGAGGCAGATGAGTGAGGCAGAAAGT -TGGTGAAGTTCGGGGATTGTACGATGTCGCGGAATGCCTCGGCCAATCAGCAGTGGAGCA -TTTGTCGGAGTGGAATAGCACTGTCAGCAGTACACAACTACTGACTACGGAATGTAATTG -ATAATTGAGAGAATACTAACATCTATATTGTTCGAATCAGATTTTTCCGTGAATGTATAT -GTACGGGCCCAATATCGATGTTGATGCCTCAGGCGGCTACGCCTCGAGCGTTGTGATCGC -CGCAGCTCCAACCGTCAGCTTTGTATTGATCCGCATCACTTGACTTTCTTCCCAGCGCAG -CGCCAACATGTCCTACGCTCTGGAATCCAAAAAGCGAAAGTTTCACCGTGTTCTTGAATC -AATATCCAAGCCTCTTACCTCCGACAATGCACCCAAACCAGCCCCTGCGGCGCCAACGAC -GGTGCAAGACCGTATATCCGCCAACTTATCGATCAAGAAAGTACGCTTGGCCAGTGCCGA -CCGCTCGGAGCTCACAGCAGTGCGCAACTCAATCAACAAAATCTCCCGACCAGGGCACCG -AATTGCCTCTGCGAATTCGAATAAGCGCCCAACATTTGTTCCTTGGGATCGCGAACGTTT -TCTCGAGCGGTTAGAGACCTTCCGTCGTGTCGACCGGTGGACCTCAAAGCCATCCCCAAT -CAATGAAGTACAATGGGCGAAACATGGTTGGATATGTACGGATGTTTTGCGCGTCACCTG -CGTGAGTGATTGTGGCGGCGCAGTCGTCGTGAAGGTTCCCGATGAAATCGATGAGCTTGA -TGGTTTCGACATTGAGAAGGTGGAGGAGCGCAAGCAAGTCCGTGAGTAATACATTTATTG -CTGGTCCAGCTGCGCTTGGTTACCTGCTAACCTACATGACTGCAGGTGCGAGGCTGGTGG -ATGAGTATGCGAAGATGCTAAGCAGTGCTCATGGCGAAAATTGTCCATGGCGAAACAAGA -GCTGTGACGGTGAGTCTGAAACTGACACTCACAGACTTTTACCCCGAGACTAACATTAAA -TTAGCAACTATTCAACATCTTCCTTTGACAAATTGTGATTCTGCCTTATCGGGTCTTCAC -AAGCGATATACAAACATATCGGAAATGGGCGATAAATTACCATCCGACGATATTATCCAG -ACTCCGGAAGGCCTTGACCTAGATGTCCTTATCAAAGGACTTCCCGAGGAATGGTTCAAG -GAGGCCGAGAAGCCTGCCGTACACACCGATGGAGAGACTCGACCTGCAAACATCGACAAC -CCAGACCCCACAGATACACCCAAACCTGTTAATCGCGCTGCGCTGGCGCTCGCATTGCTC -GGCTGGGACACTGCCTCTGATGGAGCTGCGGGGTTAGTCGGATGTGGTGCGTGTTTCCGT -CGCCTCGGGCTATGGATGTACAAACCCAAAGATAACGGAGATGTGACCGTCTACACTTCC -CTCAATGTGGCCGATGAGCACATGGAGTATTGTCCCTGGATTGATAGAGTTGCCCAAAGT -GGCACGGGGCGTCCCAACGAGAAGTCGGCAGAACTACGCGCTGGGTGGCAGATCGTCGCG -GAGGCCGTAAAGGTCAAACACCGTCGCAGACTCCGCACAATGGCTTCCACTGATACGTTG -CGGACGGACCCAGGCTCATCGACCGAGACTGTAGGCGAGGAAGAAAATGCGGATGCGAAA -AAGAAAGCAGATCGTGAGTGGTGGGCAAAGATTCGACGAGTGAGATCGGTTTTGACAGCC -AAGTCGCCAAAGCGCAAGCCTGTCTTGCCTCAATAATCTTGCATTCCAATCTTCATTTCT -GTTCAATATTACATTAATTGGGAGCGTGGGTTTTATTCTGAGGGTCAAAGTGCCTCTTTC -AGCGGCTTGTGTTTTTCAGTTAGCTTCGATTTATGGCCATTGTGAGACACGACACTTCGC -AATGATGAGAATAGATATATTGTATGTTCATTTCTAGATACTGTAAACCAATTAATTGAA -CGATTGTAGACAACTTGAGATGAATGGGAATTAGGGTTGGTTTGGATCGCTCTGATTGGC -CGAGATCCGGAGCTCGTTGACCTTTCGAGGTCGTTCAAACCGCGTTGAGCTGATATGACT -CTGACTTCCACAACTCCGCAGACAATCTTTCTGTGAAATCAATACAAAATTCTTTATTCC -TTCCAAGATTATCCTAATTTCGACACACACCGGTTTTCCTCGCCAGCTTCCCAGGACCAT -TGTCCAGCCGAATCACTTATTTGAACAGTTTACATCGCCATGGATCATCCCAACGGAGAT -GGAGCCCATGATAGAGCTCCGGGGCGCTCAGTCCCCCGCTCGCCTGCCCGTGCGAATGCT -ACACAACCTTCAACGGATGCCGGCAGACACAGGAGCATAGAAGGAGATGGTGGAGCAGAC -GGATCGAACAATTTATCAACCGCAGCAATCAAACGCAAGAGACTCCAGGAGAGACACCAA -GCCCTCCGAAAACGGGGTCGCACTCCACCGTCAGTATTCTCGCGACGCAATCGTTCCCGC -AGCCCAGGTGGCCCCCAACCACGCGAAGACCGACGATCTCGATCGCCACTACAGCCACGC -CGCTCTCCCTCCCCCGAAGCACCGCGTCAACGCAAGAGACCGGGTGGTGGAGCGCGACAG -GGACTTCTAGACCGTGAAACACTACGACGCAAGCAGGAGGAGCGTGAACGCGCAGAGCAT -GATGAGGCTATGCGTAACTCTCAACAGCGTGGCGTCACGGACGTGGTCCGACAACATTAC -AATGCGGTTCCCGAGAGAGGCCGTGAGTGGCGCAAGACAGAGAGCAAAATCAAGGGTCTA -CGAAGCTTCAACAACTGGGTGAAGAGTACTTTGATTCAGAAGTTTTCTCCCGATGAGAAC -TTTGTTGCGCGGTTCGAGGACAGCAAGGACTGGGCGGATGACAGCCAAGGGCCACCGCCC -TCTGCTGATCAAAAGCTTCTCGTACTGGATCTGGGCTGTGGTAAGGGTGGCGATCTAGGC -AAATGGCAGCTGGCGCCTCAGCCGGTTGACCTCTACGTCGGTCTTGACCCTGCCAATATC -TCGGTCGAGCAAGCACGGGGTAGATATGATCAAATGCGCACGGGCCGTGGTCAGCGAGGT -CGTCGACCACCCCAGCCGATTTTCCATGCAGAGTTCTACCCGAAAGACTGCTTTGGCGAG -TGGTTGGGCAATATCGACATCATCCAGCGGGTTGGTATCGATGCCAATGCTGGCCCCGGT -GGATCTATCATGGCCTCTCGCTATGGCGGTGGTGGCTTTGACGTCGTCACATGTATGTTT -GCCATTCACTATGCCTTTGAGACTGAGGAGAAAACACGTCAAATGCTCAGCAATGTCGCT -GGTTGCTTGAAGAAAGGAGGTCGTTTCCTTGGCGTTTGCCCAAACTCAGATGTCATTACC -AGCCGTGTGTCTGCTTTCCATCAGCAGCGAAAGGAGCGCGAGGCTGCCAAGCCTGCGGAG -CCGGAAGGCCCTGAGGATGGTGAAGTCGAGGAGGATGAGCGCGCCCAGTGGGGCAATGAT -ATCTACCGCGTTCAGTTCCCTGGCCCAACCCCCGAGGATGGAGTCTTCCGTCCGCCTTTC -GGGTGGAAGTATAGTTACTTCATGAAGGAGGCAGTGGAAGAGGTTCCTGAATACGTCGTT -CCCTGGGAAGCTTTCCGAGCGTAAGTGTCGGACATCACCACGTTCGACCATCTATGACTA -ACTTTTACGCAGCCTGACCGAAGATTACAACCTGGAGCTACAATACCGCAAGCCATTTTT -GGATATTTGGGAGGACGAGAAAAATCACCCCGAACTCGGCCCGCTCAGTGAGCGAATGGG -CGTAAGAGACCGTGTGACTGGTACATTGAATATGACAGAGGAAGAGAAGGAAGCTGCCAG -TGAGTATTTCCCAATATGCATTGATTTTATCGATCGCTAATAGATGACTTCGCTAGGCTT -CTACCATGCATACTGCTTCTACAAGGTATAGCCTATCAGGAATACTTCGAATTTCGCATC -CCTCTTATGTGTAAAGTTTCCATCGCTCAGCATCTGCATAATAATCATCTTCTTGTATCC -TGAAATAACTAGAGTTGCAACAAAACCTATATGAGAAAATCACCGTTGATCATTACAATC -TCCAAACCCCATCCGCAGGCATATTTGATTCCTCCAATATAAAGCCCGGTTGCCCTTCTT -AATCTACGAGTCCACTACTCCCAACATCCTCTCATCTATAAATCCTGCTGCTGAATCTCC -ACCACATTCCCATCAGGATCTGTAACCATCAAAAACATCTCAAACCCAATCAAACCCTGT -GCCTTCACAAGCGCCCTCTTAGCCGCAAGATGCAACCCAGCGAATTCCCCAATGCCCATC -GAATTCGGCACAGGCCCGGTAAAATCCTCCACAGCCTGACCAACCCTCTTGAGAACTCTC -ACCCCCAGCGCTTCAAGATACGTCTGCGCTTCCACAACATCAGGAACGACAAGCCCGACA -TGTCCGAAGGTATTAGTCCTTTCGGTAGACGCAACAAGCCGGTCATCCGAAACATTAAAC -TGCACCAGTTCCAGTAGCCCGTACATATTATTCTTGGCTGCCGTCATCTCTGCCCCGGTT -TGGAAACCCGTGCCGTTCCGTCCGCCTTGTGCGTACCCCATGTAGGTTACTGTGTACTCG -GGTGTTACTTGTGCGTCGAAGATAGTGCGCATACCGAGGATGGTGCCGTAGAAGTGTTTC -ATGGCGGAGAGATTGATGGTCAACAGGCCGATGTGGTTTAAGGCGAAGCCTCTTGTTGCT -TCGGGGGCTGGGCCGTCGGTGCCTATTGTGAATGGATTTTGAACGTTGGTATCGCGGCGG -ACTGGGTTACATGCTTGCGCAGGCTTGATTGCGAGGAGGTATACGATAGAGCAGAGGTTT -CTGAGATACATTTTTGAGGTAATTGGGTTTGATAGGAGTTGTCTTAGATTGAGGGATGGA -GATATATATGGCCATAGTTTGCGATGCTTCCCTGGGTGTTTCTATTCTTTATATAGGTAG -AGGGAATATCAAAGTCTCTGAATTTCCATCGTGATTGTCTAAACATGCAGCTATCTAAAC -AACACCATAGATTCTGCTTACCTGCTCGGCCGAGTGGGGACTGCGCGCAAGGCTAGCTTT -ATATGGAGAGACCTGGCGACATGCCCAGACTGAGGCCCAGGCTCAGGGTCTACTGATGCA -TCAGATATTCTATGTGCTGTCAATTTGCGAGCTCTTAGGGTTGATTGTGGGGTGTGATCG -GGTCATGGGCGGGTGATCAGCCACTGGGTCCCTTGGTGGGGACATATACATACACCCGTG -CACGGGCAGACAGCAGAGCAGTGCATCAGGTTGTCATGCTATAGTATTCTGTCAATGCAG -GTCGGTGCTCAAATCAGATGTGTATACGGACGCCAAGCCACCCGTATCTTTGGTATGGCA -GTCGGATTGGTGCGTGAAGGTCGTTAGAACTGTCCTCCGATTGGGGCGGGTTTCAGAGCA -AATACGAGATTATGCAAGTTTGTAAATCAAGGAACCCTTGTCATTGGTAGGTCAGTTTCC -ATCATCAAGGCGTCCTATGCTGACTCCATCATTCTGGATATAGACTCCCAGCTGAATATA -CAACTGGCATTACAGGGTTTCATCCATCACTGATCACATCGCACAACTCTCACGGTTGAG -TTGATATTGAAGAAACCTGGGTATTTTGTTCTAACTACATTGCTATACCGGGCTTTATCA -TTCAAGTATCACGACATAGCACCGGTATATACTTCAATCTTTCTCGACGAACAACATGTG -TCTTTTGTCACGGGGATAAGTTTGTCACACCGGTGATAAGATCCCAATAAACAAGTTGAC -TTGAATTGCCCTAAGGAGGGATCCAAGAGCATTTGAATTCTCCGCAGTTGAAAGAAATGC -TATTTGTGAAGGTCGTGTGGGACAGTATTGTGGAAGCTATAAGAACAGCGGCAGCTTATA -GCCTGGTCGTTTTCCAGGATCTTCTCCAGCTGGCGAGAACAAGCAATCTGGGCTATAAAC -TACGAGTGTTGTAAGTACATCTTAACACTTTGTGCCTAGAGGCAAAGCTTCCAACAAGAG -ATGCCCAGGGCTCTTCAGTAGTATTTTCAGCAGGCTCTTCAGTGGGCTTCTCGGATATCC -TCCTTCGTTCTTTTTAGGGTTTTTTCCTCACCTCGCCAATTCTATAAGTAGATGGCAAGA -AAAAGCAGGGAAACAGGCATTTGGGGAATAGACTATTTTATGTATAACCTGGGGTGCCAA -GGCCCACCCCGTAGTGATAAGGCTTATCGCTGATATCTGTAAACCCAAACAGGACTAACG -CCATCGCGTCTCGCGTCGGGATTTCGAGATGCTCTTTGTTACATAGACTTTCTTTGTCAA -CGCCCTGAGATCCGGTATTCGAATCAAAGCAAACCCCCGTGTAACTCGGGTTCCCCCAAC -TCACCCTCTACAGAATTTTCCCCCAATAACGATGTCGGCAACACCAAGCCGACGGCAGTC -ATCTCAAGCCCATCCACAATCAGCACAATCACGCAGACGAACCTCAACCCACACCTCGGT -CTCAGAAGCAAGACCTCTCCCAGAATATCAGACTCCCGAGGCTCCCCTCACAGCAGAAAG -CCAGCGCCAAATCGCTGCCCTCCTCGCATCACACCATCTCCGCAATCTCCGCACTCATCT -CCAACATGCCGCCGAGAAACTCACACACTCCGGCGGTGAAGTAAACGAACGGCTCAGCGA -TGCCCGGGTCCGATACGAAAAATCAAAAGAGGCGCGTCGAAGTCAAGGCGACGAAAATGT -TGACGACGATGAGAGCAATGAAGAATACCAGCGACTTGCTGAGTTAGAGACTCGAGCCGA -TGCTATTACTGCGCGGATGGAGGAGAAGACACGACAGGTTGTCGATTCAGAAATAAAGCT -CCAGGGACTGACAGATGCGATGGGCGAGATTGAAAGGGAAGAGGGTGAGAATGTCGTTAC -TGCTCTCGGGGTCAGACAGACCCGTCAACAGCGGGCAAGACGAAGAGCCAATGAGGACGA -TGATGCCGACGGGACTGAAGATCCCACCGACGGTGACTACGAAGATGCGCAGGAGAGAGA -GTTGCGGGAGCGCAATGCACAGAACCCGCCCAGTCGCAAGCTGGCCGATAAATTGACTGA -AGGCGTTCAGAAATGGGATGAGCTTTCTTTGACAGAGAGGTCTGGAATTGGCCCCTAATT -ATGGCTGTTATACTCGAAAGCGCCGAAACTGACAACTACGATCTAGATATGCCAGCAATA -ACTCCTATATCGGCTTCTATCGGATGGTACACGACTCCAAGTTCCCCGGTGACGATGTCC -CACCGTTGCCCCACTCCTCTACGTGGTTTGAGCACATGGAAGATACGAATACGCGATCCG -GAGCACCGGCACGTACGCGCAACCAGAAGCGTCGCCTGTCGCCCGCCGACTCCGACGATG -ACATTGCCATTGAGCGCGAGCGCATCTCGCTTAAATGCCCATTGACTCTTACGCCTTACC -AGGATCCCGTGACAAGCACCAAATGTCCCCACAGCTTCGAGCGGGAGGCTATCATGGATA -TGATCAAACGCAGTCCGACGACTATTCCGCCGCCTGCATCTCGTCGGGGCCAGCGCCGCG -TCCATGTGGTCAAATGTCCTGTTTGTTCTATTCCGTTGACCGCGGAGGATTTGCGACCAG -ATCCTGTTCTACTACGGCGTGTTCGCCGTGCACAAGAGCTTCAACGGCGCGAGGAAGAGG -ATGATCATCTTGAAGGGGATGGTCGAAAGAATAAGGATCGTAGTACCGGGATTACCTTGG -GTAGTGACGTGGAAAGCGATGACGATGCTATGGATGTTGATGCTCCTGCCTCCCAGCATA -TCAAAAATGAGCCGCTTAGCCAGGCTGCCGCAGCTCGATCTGATGAGTCCAGCGATGACG -CTGAGAGTCAAGATGCTGAGAGTGCGGAGGGGGGGAGTGAAGAAGGGGATCAGGACGAGA -TGGAAACTGACGAATCTGAGAATGAAGAAGATGAGACCAATCAGGGTGAGCTTGAACAAG -CGCAAAATGAAGAAGAGCACAATGAACAGGCGGAAACTGACGAATCTGAGAATGAACATG -TGGATAATGAGCCAAACAACGAAGAGACGGAGACTGGAGATGGTGAGATTGAACAAGCGC -AGAACGCTCTAGAGCAGAATGAAGAGACGGTGACTGACGACTCTGAGCATGAAGTTGGGA -GTGAGGAGGTTCCAAGCAAGGAAACACCGGACGCGGAGCGCAATGCAGACAATCGGGGTG -GAGCGGTGAGAGGGGACGTGGAGGGCGGGGAAGGGCACCATGAAGCTGTGGCTCCCCAGA -AAGCTGTGCCGGATGAGAATGAGAAGAGCGATCCTGAGAGTGGCAGCGACAGGGACGGTG -AAGGCAGTGAAGGCAGTGAAGGCAGTGGAAGTGAAAGTGGAGACAGCGTTATCGAACCCA -AAGTCGAGAGTGGATCCGAAGAGGAAAGCCATAGTGATTGATCTACATGATACCATTCCA -TTTCTAGCGTTAAATGAAGTGCCCTTCCATCTAGCCGAGATGTAACGTTTCGAACTCGGG -ACTTTTTTCCTGAGATTCAAAGGCGGTATATTGTACTACCTAGAAGGTGAGGTTGAATGT -CTTCTAGATCTTGCGTCGAAGTTCAAGCCATCAAACTTCTACATATTCAACCATGTCAAA -CGCTCGAAATTTCAAAATAAATTAGATGCTACCCCGTACTTCGTCCTCACATGCAAGATT -TTCATAGTAGAGGGTGTTCGAACCGCTGCTCCTGTAGGTTTCAATCGATTTTACATGGTC -ATATGCATACATCGAACTATCGATCTAGTTCACAACCCTAGTAGACACCTATTTCTAGTT -CCATCTAAGATCCAAACTTATTGATTGGTCTCCGATCTCAAATAAACCCTCTCTGTACGG -TACTCGCAACCAGAAATTTCATTTAGAAGGCCCGGTATTATCGATAGTAGTTTGAACTCT -TGTGTTTCCGCAGTACTCCACATGCTGTACATATGGGATGAGATATCCACCTTGAAGACC -CTGCTCAACTCCATGTAATTCTACCACGAGGGTCTGAACTTTTCCGGTATGATATTCCTC -AGTTTCTTTCTAATCTCAACCCGACACCTACCTGTCGCCTGAATTCCGGGCCATGGCCAG -AAGGCATTCTACTCTCTCTTCGTTTTAGGTCAGTAGTCTTTTTCATTCCAGGGTATTTGA -TCACCTGTCGCCCCCTGGCGACCCTCCTGCATTTTCTCCACTGTCTCAGGCTTCCCGCTG -CCGCTAACCACCGCGTTCTATATCCAATTTATCCTCACATCTGCCCGCCGGCCTTTGACA -GAGGCCAGAACTTTTCTCCTTGTGGCGTACCTTGAATTTCGCTCGTCTTATTCTACCTTG -CTGGACCAACCTCCATGTACTTGGATATCCTCCCACGCGGAATCCTTTCCCCCTCAGGTC -TGCCCAGTGCCTCTGAAACACGTGATATTATCCTCGAGTCCTCTCGTGATTGTTGGGTGG -TGACTGGGGCATAATTTGAAACCCCCGTTCTGACACCGCTCTATACCGTGTACTGTAATT -TATACACATTCTGTACTGGTTGACGTTATACCTATCTTCTACATCTCTTGATCCACGGAA -TGATAGACGTTGACGAAGGCCCGACCGTAGAGAAGGTCCCTCGGGTGCATCTGGATCATC -ATAACCAGGTCCTTGATAGTTCTCTTTTCGCATACACATGAGGTTTTTTTTCTACGGTTG -AAATGGCTTTCCCACATTGAGATCGACGTACAGGTCTGGGTGGGAATATATAGCTTTGGT -TCTCAAGGTCTCTGTCAGAGTAACTCCAGTGGCAGTGGCGTAGACATCAATCAGGCGCTA -ACCCTAAATCAGGCATTCGACCCCTGGCTATGCAGCTTCGGGCCACGGCCATTTCAGCCC -ATTGTTTGCTATCATCTCAGTGTCTATCGGCCTTCTAGAAGTTGCATTTTGTTTTCCAGG -CAATGAGTATTTATACGAGCATCTGTCCGAATCACGTGAGTTCCTGTGCTGCCGAAGGGC -CCTCGTGTCTCCCCCGCAGTCAAGGCAGAGAGCTGTGGGACCCGATCCAATCAACGAATC -GACCCATTTCTACTTTCTCTTCTGGCCAGTCCCGAGGTCTGATGAATTGATTATTTTCTC -CAGGATGCGACTCCAGATCGGTGAACTCTAATCTCGAGACACAAAAAGCCCCATAGTAAC -CTTGATCACCCTACAGATACTACATCTGTTCTCACGCGACAACCCCGGTCTACCAGGCCC -CACAAAGAAACAGTGCCATCAGGGTGTAACACTGCTATCAAAGCGCAGGCCCCCATTTAC -CTAACCCCCAAAGAGCAACATCCCCCCGGCCAATTGTATCAAACCATCGTAGTCCCGTCT -CTGACCCCTAAGGCCACAGCTCCAGCCCGTAAATTCAATGCTACCCTGCTAATTCGGATA -CATCTATCTGTAATCTGAACCTAGCGCTCTCCTTGGCTTATACCTCTCTGAAGCAGGGGT -ACATTGTACTACGAGCTTGCTATCTTTGAAATCGGGGGAAATTTTTTGTTTAAAGATCTT -CATGTCTCCTTTTTGATCTTCCCTCCCCCACACTTCTTACCATCCCATATCTTACCTTCT -CTTCTGCAGCGGTCTCCTTCATTCAGAATGGATGAACGGAAAACTCCCGTCGAATCGTCC -GCCGAACCAGTCCACACCCCCCGACCCGCAGGATGGATGTACAAAGACACCAAGATCGGC -AAGTTCTCAACGAGCTGGTATGCCTCGCCACGCATCCAACTGGGTATGGTCGCCTTTGTC -TGCTTCCTCTGTCCGGGCATGTTTAACGCCCTGAGCGGAATGGGCGGTGGTGGAAAATCC -GACCCCAGCTTGGCTGATAAAATGGTACGATCGAGCCTCGGGACTTTTGTTTCTCTATCT -GTTCCCCACTTCTTTTACAGCTGATACCCCGATACTCCGACCCCCCGATTGGTCGCATTC -ATGGATTCACTATCTAACAACAACAGAATATCGCCTTGAACAGTACGTTCGCCGTGGTCG -GTTTCTTCGCCGGAACAGTCGTCAACCGTATCGGTGTCCGTCTTTCGCTCTCCTTCGGTG -GAGTCGGTTATTGCATTTATTCGATCAGTCTTCTCGTCTCGGAACATGCTTATGTCCCCG -GGTTCAATATCTTCGCCGGCGCCTTCCTCGGTGTGTGCGCCGGTCTGCTCTGGGCCGCAC -AGGGAACCATCATGATGTCCTACCCGATCGAGCAACAGAAGGGTCGCTACTTCTCTTGGT -TCTGGGGTATTTTCAACGTCGGCGCCTGTATCGGTAGCTTGGTAAGACCAGTCCAGTCCT -GTTATCTGAACTTGATAGTCCCGATTGGTCACATCAACCAACTAACACGGCAACCGATAG -ATCCCCCTCGGCCAAAACATCCATGTCAAGGAGAACAAGACCGTCGGCGACGGTACCTAC -ATCGCCTTCATCGTCCTCATGTTCGCAGGTGCCGTTCTCGCCCTGTTCATCTGCGACGCA -GATAAAGTCATCCGTCGCGACGGCTCCCGCGTCATCCTCATGAAGAACCCCTCGTGGAAA -TCTGAGATCGTCGGACTGTGGGATACCGTCCGTGCCGAGCCGTGGATCATCCTCCTCTTC -CCCATGTTCTGGTCCTCCAACTGGTTCTACACATACCAGCAGAACGCCATCAACGGCGCT -TACTTCAACACTCGCACCAAGGCCCTGAACGGTTTCCTGTACTGGTTTGCGCAGATCGTG -GCCGCTGTGATCATGGGTCCTTTGCTCGACACCGAACGCGTCCGTCGCTCTCTTCGTGCC -AAGATCGCCTTGGTCGTCCTTTTCGTCCTCACGGTCGTTATCTGGGGTAGTGGATACGCT -TGGCAGGTGCGGTACACCCGTGCGGATGTGGATCCCAAGACTACAGATTTCAAGGGCTGG -GATTGGACTACCTCCGGCTACGTCGGCCCCATGTTCCTCTACTTCTTCTATGGCATGTAC -GATGCCGCCTGGCAGGGCCTTGTCTACTGGTAAGATTTCTTCTCGTTTTTCCCTCCACTA -TATTTCATGACTAACGTGATCCTAGGATTATGGGCGCTCTCGGTAACTCTGGCCGTAAGC -TCGCCAACTTGGCAGGCTTCTACAAGGGCTTGCAATCCGCCGGCGCGGCTGTGATGTGGT -CTCTGGATGAGAGGAAGATCCCCTTCATGAACGAGTATGCCTCGAACTTCGGTCTGCTGT -GCGGCTCGATCCTCGTCGCCGTCCCCGTCGTCTTTTTCAAGATTAAAGATACCGTCCCCG -TGGAGGACGAGCTTGCCGGTACTGGCGAGACGCTTGAGGATGTGCTTCCCCCCGGCGCGA -TCGAGACCAAGCGTGTTGGGGATGATAAGATCTAATCAGAAGTAATATGAACTCGGAAGA -GAATTGGATGGAATGTGATCTTTTTTTTCTTCTCGTTTTTGCGCAAGGGCGTCGGGTCGT -CTTGTGGTGGATTTGGAAGTGTGCCTCGATCTGGTCCTTGAGTCTGGGTTTGAGCCTTGG -TGATCTTATTTTGACGTTGCCTTGTGATTCCCGACTATCTGGCTCGCGTGTTGCCGGTGG -TGTACTTTGCGTCTCTATCATTATGAACTTGGTTTTAATGGGGCATTCTCTTTTACTTTC -ATGATGGCATGTGGATACTTCTTGTGTTTCGGTTCTATCGGGCTGGCCTTATGCTTTGCC -TGTAGAATGGGATCATTCACATAAACAATGGAAATACTTTACTATGTGTACACATATATC -GTATACTTGGATGAATAATTTTCACTCGTCCGAGTCAGTGCGATGATTCATATCTACAAA -TCTACTGGACAAGACAGACATCTGGATATCTAATCGAAACAAAGAGAATGGATAAGCAAG -TAAGCTATCTATCCACCGAGGATCCTACCCAGCAGAGATCGTGACGTAAGACCATCGACA -TGGAGTATGGTGAGGCAAACAACAAGAGCAAGACAAGGGGGTATTCGCTTGTCGaaagca -aaaacaaaacgaaaccaaaaaaaaaaaTTAAGCACTTGACAAAGACTATGTGTTGTATTC -TGTTGTCATGCTGACTTCGGAGAAAAATCAGCCACGTATCCTCCGACATATTTCCATTTT -CTCTTCGGGAACGAAAAAGAAACTGCACATGGGAATGCTCCTTCGCCGATACCATGATCT -CCGAACCAAATCTACCACGGCTGATTTGTCATTGACTCATTGCAGGGAGTGATTGTGTGA -TGCACACAGACCGCGGAGCATAAAATGTCTCAAGAGTCAGATTGGCTGTGTTCCTCCGGA -CGCTGCGCTTCCCATGGATTTTAAAGATAGCAGAATTTGTGTCTAATGATAGACAGTGCG -GGGTGGGAATATTAAATCGCGGAACTGAGCAATAGGTCGATGCGATGTGTAATCGGAGAG -CGATGTAAAAGAATGAAGCCCGATTCAGGCTTTCAAGCAGAGAAAAGTAGTTTACTTGGT -GAGGATGTTGAGATAATCCGCCACGATGAGGGTGACCATGCTGCAAATGGAGTCAGTGGG -GGCAAAATAAGGGTAAAATAGTGGGGGCAACTAACGCATGAGGGGCGATCCGAATGTAGT -AAGTTCCGAAGCCACGATAGAATCGCATCCAACCCTCTTCCTTGGCAACCTTGCGCGCAC -AATCAAGAAGACCCTTGTAAGGAACCAAGCCGGTCTTAGGATCCTTCTGTTGCTTCTGCA -GACGGGTCTTGATGAAATCGAAAGGCAGAGACAAGAAACTAGCAAAGAAACCGGCAATAC -CGGACGCAGCAAATGTCTGATTCTGAGTCGAGAGAGAAGTGTGTTGCTTAAGCTGCTGCT -TCGCCTCAGCGAAGAATGTCAATTGACCCATGTTGAGAGCCATGGCACGCACAACCGTGG -GCGAAGCACCAGCCCACAAGGCAGCGAGTCCCTCGCCGCGAGTGATGCGCGCGAGAGCAT -CAAAGACGGAACGGTAGTTTGCGCGAGCCTCGGGGGCCTTGAGGCCGTCGGACTGCATGC -GCACGAGAGCCAAGTCGGCTGGATTTCCAATCATGGCTGCGATGCCGCCGGCTGTTAGTC -CTGCCGCGGCACGCTCGACGAAGGTTACCTTTCGGCCTGCGGCTTCGGCGTTGGCGTTGA -GCTTCTTGATGAAGGTTTCGAAGAATCCCAGACGGGCGGTGGTGTAGACGGCTTGGCGGA -GGATCCCGGCGGACAAACCGGTGTAGAGATCGAGGACCTTGCCTGAGGCGATGATGTCGC -GAGTGATTCCCAGGGCGGAGGGACGGGGACCGGTGCGTGCTCCTTCGCCGGCGAGCTGGA -GGCGCACTTTGACCATGTCGACGGGTTGGATCACGGCTGTTGCGGTCATGCCGGCGAGTC -CACCGTTGACGAAGGGAAGGGCGGCGCGGGTGTATGGGTGGTGGAGGAAGTCAGTGGGTT -TCTCAATGGCGGACTTGGTTGCGTTTTGAGCAGAGACAGCGGCGTCGCTGGCGCTGTCTT -TGATAGTGGATGCCATGGTGGAATATGATACTGGAAGCGGGGTTAATACGATGTATAATA -GAAGAGAAGCGGCAGATGCACGAACAAGAAAGCCTTTTCGAAGGGTTAGCAGGATCTCTT -TGGCGGATTCAGAGGCCACGGGGATTGTTCACGTCTTGGACCATGGTACTTACCGATGAC -GTAGTCTCTTGAAGGATAGCAGTCAACAGTTAGATGGACAGTGTCGAAGTGGAAAATGAA -AAGAGAATATAAGAGAAAGTCACAAGACAGAATAGGTGCGGGGTGTCTTTGGTTCGTACT -CTGGTACAACGGGGCCTGAGCCTTGTCTATATACTCCAGAGTCTTATATTCATTCCGAAC -CGGAGATTACTCCCTTCAACCGATCAACCGAATAACTACCCGCGAGTTCCCCGCCAGAAC -AATATGGAGACAGCCCTGAGTGAGTGTCGCCCGAGACCCCAAGATTTTCGTGGCCCAATT -ATATCTACATCATTACATCAATGCAAGATGCACATCCCTGCACGCTACGGAGCTTGTCAA -AATGCGGGGGGCAGCTTGACGATCCTTCAACTTGCTTTCTACCATACACACATCCTCAAG -TTAGAGGTGGTGAACATCCCACAGCTCTGTCTACTGTTCAGCACTTTCTAATACACAGAC -ATTCTCATGGCCCTCGTCTACTATGTACACACGGCAGATACGTTAGTGCCACGAGCTAAA -TTCGGAATATCCACCCACGCTAAACCCTTTTGGATTCCCCCACGAGAACGTATCGGGATC -CCGTGCTGACATACCCGACGGCGGCCGAGAAAAGACGCTAATATGCTACATCAATATTAA -ATACACCATGCTTTCAAGCCCATACCGATTCAATACGCAAGGATATTCACAGCTGCCATA -TCTCAAATTGACCAACATCAAATATCAGTATTGGTATTACGACCAGAAAAAGGGTCATGC -ATTAGCGTTCTTTCTCGGCCGAGCTGTGGTAAATCGTCCTCGGGCATACATCGGACATCT -AATACTACTCCGTACAAATGCTGATACCATATCCTATCCAAGAGCACAGCCCAGCTCAAT -TTTCCCCCAAAAATACGTTGAACATTCCATACAGAACGGGTCCACGATCAGCCGTATATC -GGTGCTTGAATTGTTGGGGACTTCTCAAAGAGCCTAGAATCAAAACGAAACCATATCGGA -TGGGTCTGATCGGCTATTCCACCTGAGTAAAATTCTCCCGCTCCCTCGGGCCACGATCCG -GGGATGCTAACACGGCAGTCCAATCGGCCTGACATAATTCCGCTGACGCTGACGCTGTGG -CCCTGGGTCCTTGCACCAGCCCTTGAATCGCCCACCAATCCCATCCGTCGAGATTCTAAA -GGACCCTCCTTGCCTTGTGTGGAGCTGCGGCTCACGCTACTATTGTTGAGTTGACAGAGC -TAGTCTACACACATTCCTCCGGTTTGGGGCTTGTTGCTGGCGTTTTCCGCCGATGACCGG -CAATTATTATGTCAAGTAAGCTTCTGTGGATTCTCACCCCCGTCTCTAGTGCTGCATCTA -CTCCGTGTGTTGATTGATTATGCCGATTACGGATATAAGTCGTCAACTTCCAATTCTCAA -GAGGACCGGGCATGTCGGTGTGTAGGATGTGACGGAGGACTGTTCTCCGGGAAATAGCGT -CATATGTTTGTCCGGGGTGTGAACCCCTGAATACCTGTTCTGAATATGGCACGGGGAAGA -GACGGGATCTTGTTTTGATGGGCTTGCTGATTGAATTGGCAGGGAGGGTCAACAAAGCGA -TATGAGACTAAACTAAACTATGAACAGTGAAAGCGCAGGAAACTTCCATCGGATATGACC -GTTTCGTAGTATACGACCCGCTCAATCCATGTTTGTATCGTTATATGCCTCAGTCCGAGG -CCCCGAGTGTAAACTAAGTCCTGAACTTCTAATTCCAATGAAGGATGTATGCCCAACCAA -ACAACCCACGCACATAGCCACGGACCTCTGGTTTCCATGAACACATCCAGCTACCCGCAT -CATCCTCCGACAAAAGGAAATCACATTCGTCTCGGAAACCGCTGTCCACACCAAACCTTG -TAAACAACACCAATTTCCACTATTGCAACCCAACCAATCGTGAAAGTCCAAGCACCCATA -GCTCCCGAGGTCTCCAAACTTCCTCCCGCGGAATCCACTCCGTGCCTAGCTCAACCCTGA -ATAGTTCCCAAGGGGAATGTGTCAATCAAACTCAAAGCGCATCACTCTTCTATCCGCGGG -AAGGGCTCTCGGCAAGTCGGCAACTCCACCGGCCTCAAGCCTCGATCCAAACCCACGGGC -CTACCGGCATGGGAGTTCGGGAAGGTTTCTTATATATGGCTTTCTCAGACCGGGGTATTT -TGTTGAAGAGCCGGGGACGGAATGTAGAAGCGGGAATGAGGGATTTTTCGATTTACAAGT -TCCCGGTGAGCATTATTTGACCTATATGGCATTTGATCCCTGAACATCTTTGATCAGTTT -CTTTTTGCTTACATCTTTCGGATGTCTTTGGTTACTTACCTAGCTGGAGAGTGAAGATAC -ATAAAGCAGGTATGAACTAACGTGGTACTAACAAGGACGTATAACTATTATAGTCTTGAA -GCCATGTATGCAAGTATCGGCTTTATAAGATTTTTTTCGTTGATGTTTATTGTCGTGGTC -ATTCTTGCCTGCTAATTTTGGTTCTACGCTAAAGCAGCTCTATATATACTATAAAGTAAA -GGAAACAGATTAACAAAATGACCCAGTCTGCATTTGTTCTGAAAAATTCAGCCTTCTGGC -AGCTCCTTAGCTAAGTACCCAACCCCTTTTTCAAAGTTGGAAACTATCATCATCAACTTC -AGGTCCTTCTTTAGGTCCTCCTTTGGGTCAAATTTCTTTCCAGTGTCAGCTAAAGACAGC -ATTTTCGACAGTGAGTAAGCCAGGTCTCTCATGAATTCCCCTGTGTCTTCTTGGAACTCA -ATAAAATGTTCACTGAACTTTTCATGACAATGAGCAACGAAAGAGCACAGCGCCCTTCGA -GCATAATCCTTGTAAGTATTCGACGGGGGCGTGGTATCATAGATAATGTGAATCGCATCT -GCTAGACGGGCAAATGTCGGTTCGGATGAAGTGTCGTGCTCTTCGAGAAGAGGCTCGAAA -CAATTTACCATTTGGCCAAGAGCATATTGTTGCAGGTCTTTGACAAGGTAACGCAAGGCG -AATGAGTAGACCTTCGCATGTAGCACTAGCGCGTCTCCCACGCAGCCTCGAGTCGGGAGA -AAGCTTTTCTTGGAATCTGATAATACAATCAGCAGGCTGTGAGGTGAGATGTTTCGGGTG -ACTTGCTGTGTGTAGTGGATGATCTCTCTAGGGTTGGGTCCTCGTTGTCTGAAGGCACTT -GCCCTGGGACCCTGTAATGCTCGGCATAGCAGAACATGAGGACACAATCGATGGTCCCTT -CATCGAAATCGGACCAGTCCAATTCTTCTTTTCCGGCAGTCTTCCATGGCCCAGTCACCC -TGGCGGTCAATGCAGACTTGCCTTGAATCAAAGCACCGGGATGGATATGGTAAATGGTCT -TGTTTTCACCCACAAGGGCCTTTATTGTGCTTTCCCTCCAAATGCTTTTTCAGTCGTTAG -CGTTTGAGCTTTGTAGAGGAAGAAGTGGCGTTGTAACCTTTTGCATGGTTCTCCTCCGAC -TGAGATCATGATGAATTTCGTGCAGTGTGTCAAGAAAGATGGAGAAATTGACAATCTCGG -TATAATCAAGGATGAGTTGAAGGTGTTGAGATAGTCAAGAGATCCAAACTTGGTGATCTG -ATCACCCAAGCCTCGATTGTGGTCTCCCTATGCAATGAGAGAACGAGCTGTCGCAATTTG -CTCTTAGAAAGTCGAAGGGCTTGGTAGACGTATTTGGGAGAATACAGCTATGGGGAGAGA -TTGTTGGATGAGAAAATATGATAAAAAAGAGACAGTTGAATAAGAAGAGAATTTTGAATG -TGGAAGGAGGCTCACCTACTCGCAGCGAACCCGGAACACGGGGATATTTTCTCGAACATC -AATACATCATGGTCCATCTTAGAACCACATCATCCTTCTCGAGGTCGGCATAAAATTCTG -ACTGATGATTTATCCCATGTCTTAGAAATATAAAAAAGGGAATCCACGCCCAGGACAGAC -AACAAGGTCATCTATTTTCATCATGGGACAGGTAAAGCAAGCCAGATTTTATCGAAGATA -GCCTTATCCATCGGCCTCATTTATAAGTCTTCGAATAATAAGACAAGGAGATACATATAC -AGAATATATGCAACCTTATGAAGCATCTTACCAGCTACTATCATGCAAGCTAGTGCAATT -AATCAGAGCGTCGGCCTACTCATTCCACTACCGTTTCACCTAACTGCATCATCTCTAACT -GCTTGAAGTCTTTGATCAATTCTGTTTTATTTGCATCTTTCCGGGCTTTATCTGGTATCA -TAATTATCTCAAAGCGCATCCTGAAATGGTAGCATAATGATATATATGCGACTTGAAGCA -AGCTCTGTGTTACCGGCGAGAGCCTGCCGAACTGTGAAACAATGAATTGGCAGAAGAATC -ATCAAATCGGACACTGGAGACAAAATATAGAAGGGACCGTTGGCTTTAGGGTTTAACCAA -GCTGTACATAGCACGGTATCTTACTCCATATGCTAGACATCTATATAATATCTAGAAGAA -GACAGTCATACGATCTTCAAGACCCCAAAGGGTAATATGCACAACATATCCATGTGGCGC -CCAATATCTTGGTTGCTTATGATTGGATGGAAGCATAATCCATACAACCTTCTCGAGTCG -CCTATCAAAGAGGGCTATGGCGATTGGTATGTAGGGGTTCACCTAGATCATCCAACCGAA -TGTAAGCGATAATGCGGAAATCTATTGGCAATCTGCGCATGACCTAGTCCGCCTCTTTCT -CACAGAAACATGATATTCGCCCCCCTCCTTTTGCCTATATAGACCGTCCTACCTAGTCAT -ATTTTCTCTCACCTATAACTTGTAAATCAGTAATAGGGTGCATGATGCTTTCGGAAATCA -AGATCTCTTTCCCGTCCAGTACAATCTCATTATTGACTGGCCTATCTATCGGGGCTATTT -TCACTCTCATCATATATCGATATTGGTTTCACCCACTCAGCCAATTTCCCGGCCCCCCAT -TAGCGGGGATCTCTAGCCTTTGGGAGGTATGGCAGGACATAATCTGCCAGGGCAACTTCC -CACGATCGATAAATGAGCTCCATCGAAAATACAGTAGGTGATATGAACAAAAAAGTAGAC -TATATCAACTAACCATCTCTTTAAACTCCAACGTAATTCGTATCACACCAAATCACCTCC -ATATTCATGATCTGGACTTTCACTACAAGTAGGTATCAAAATACCACCAATCATACAAAT -ACCATTTTAATCCAAATTCCACAGGATTTCAAAAGTCAACTCGGATTTCCTCAAAGATCC -CAAATTCTACGGAAATTTGGGCGTTCCCAATGCATTAGGAGCTCTATGCGACCCCCATGA -ACATCGCATTTTACGCCGCCGGGTGAATCCCCTCTTTAGCCAACAGACTGTGAATCAAAT -GGCGAGAGATATTCAGAAAACAATCGAACATGCCTGCGACGGGGTTTCAACCAACTTCGA -TGCTCAATTTTTCTTCCGCGCAATCGCAGTATGAATCTCCTCTATCCTATGGTGTATCGG -TGTATTCTCACAAACTTGCGAAACCAGGGAGATATCCTCTCTACACACTATTTCGGAGAG -AACATGAACCTCATCGACAGACCCGAGTATGCGCGCAAACTCTGGGATGGCATCGACACA -ATGGTCAGACAGATGTGGTACTGTATGTCCCCAAGCAAACTCCCAAGATGGAAACGTGTA -CTAACTCCCCGACGAGCGATTCATATTCCGTATCTCGCGAGCGTCCTCGTGAATATGCCG -ATTCTTGTTCTGAAAATCTCGTTTCCGGGATTTGCGGGTCTGGTAGAGGTACGAGGTTCA -TGTCTTATCCTCATCCGTAATCTATACTTGCTGAGGCTTTCGGGGATAGTTGTGTATGCG -ACAAGCTGAAAAAGCTATTGAACGGAATGAAACCAAGTCTACTCCACTCGAAAACCACAA -CAATGCCACTTTCTTCGATTTACTCATGACACCGTCGCCAGGACAAGAATCAATCCATCT -TAATCGAAATGATATGGTCAACCACGGTCTAAACCTCGTTGGTGCAGGGGTAGACACGGT -ATCACTCGCTATGACAGCTGCGCTTTATCATATCCTTTCATCTCCGGAGATCCAAGGTCA -GGTGTATAAGGAGGCACAGGAAGCGACGCCTTTCGTCCGAGAAAAACTTGATTCGCAACG -GATCCGAACACTGCCATATCTCGTAATACCAACACCTTTCAGCGTCGTATAAATCGCTAA -CGATCTTTTAGGCAGCCGTTATCAAGGAATCTTTGCGCATGTATCCCCCGGTCCCAGGTA -GATTGCCACGCATTGTCCCGCCACAGGGCGAATCCTACGATGGGAAATTCATACCGGGTG -GTGTAAGTAATCTCTGATAATAAGACTCTACGCCTCAATTATAGCTTTGGAAAGTCTGAC -CATCGCATAGACGACTGTGTCAATTTCCCCGTACACTATCCACCGAGATCCGTTACTGTT -CCCAGAGCCGAACATCTTCAAGCCGGAGCGGTGGCTAGGTGACAACACACCTGAGCTGGA -AAGGGCTATCATAGCATTCGGCGGTGGAAGTCGCATGTGTCCCGGAATCCAGTGAGCAGG -CTCAGATAATTTTGGGGGTTTTTTGCTAGCGTCACTAACTCGCAAAAGTCTAGCATATCT -TGAGATGCATATGACTCTCGCAATGCTCTTTAGTCGATTCGTCCTTGAACTTGAGAGTCC -GCTTCCGAGCCCAGAGTTGGATTGGAAAGATCATTTCGTCGTTGATCTTGATCATCCAGT -CAAAGTGAGGGTGCTTGCTGATCACTGGGCCAGTGAAAAGAACCGTACAGGCCTGCCTAG -TTGTTGAGTGAAGCTCTAGTATCAATGAGAGGCTGTCGGCGCTGATGTCACTTTCTATAC -CATATCCCTAAACCATGGGTCATTCATTATATCGCTTGCAGGTGCTCTAGTAGAAGGCTC -AAGCCGCAGTAAACTTCTGATGATACCGCCAAGCTGAAGTATATCATCTTCTATCAACTC -TGCCTTGCGATCGCTCGTAAAGTACGACTCCTCGAGCCTATCCTACAATTTACTTTCCGG -ATCTGATGATGAGGGAAATTTCTTCTTCATTTCGAGCCAGGAATCCTGCCATCTCACAGG -TAGCGAAGCGCCGATCATCTCTTGCATCAGGGCAGCGAGAATCAGGGGCGTGCTCATGAG -GCTGTCAAACAGTGGCTGGCCAACGAACAGCTCAGATAGCTGGAGGGAGATAAGCTTTGA -TTCTGAAAACTGTCGGTATTTGGCAATTCCCACGTCACTTACCACACATCCCATAGCCCA -CAAGTCAACACGATAGTCCAAGTGGTTGTTGCACAGTATTTTAGGTGCTCTAACAGCAAG -CGAAGTATTGAGAGTCCGAGGGGCTTTATCAGATGTGAATGATTCCCCCAAGTCGATGAT -CTTGATGGTTTGGAATGACGACGAAGACTCCATGCGATAATGAGTCGGTCCAACAAGATA -TCCCGGGATACCAATATTGTTTGGTTGCCCGTCACTTCGCGTCGCTTTCCCAATATCAGG -TTTAGATAGTAATTCCATAAATTTCTCCTCAGGTAATTTGTGCATGTTGAGTATTGTGAA -ACCCAAGTTGCGAGTGTGGAGACCTGCGCGTCATGAGAGCCATTCATAATTATTCCCAAG -ATGGGAGAGTAGCTCACCACCATGCCCAATCTTATACCGATGGAGACTGTCCAGCGCGAC -AAGAGCTTGTTTCGCGACGACCTTGGCAAATTTTCCAGAAAGTCTACTATCCTGGGTATG -CAATCGACCATATCGATAACATTAAACCCCGAAAACTCAGGAACGAAGTATTTATGTATC -CCGTTTGGGCCTTCCAGGTCAAAGCTATCAAGCATTTGTACAACATGTGAAGGACGAGGA -TCAATCGACGCCAACAATCTCATGATTTCGAATTCTTGGGTTTGCTTATCAGTTTCCGTC -TTCGAGGTGATGTTTTGATAGCGATATAGGCTTCTTTCCTATTTGAGAATGTCAGTTCTA -TCCGACTGATGACGCAGTTGCTGTACTGACCTCCGATCTTTAGCAGCCCAGATTGTCGAA -TAACCTCCCCATCCTAGTTTATACAGGATCCTGTATCTCCCATTCTTCAAGGAATCCCCC -AGAATGATGGGGTGGTAACCGCCCTTTTTATGGCGTTGAAGTGATTCCGCGTGAACGCCA -CTTTTGCACAGGTATGATTGCACCTGAAATTGGTGCGATATGGTATAAATGGGCTAGACA -CAGCTGCGAGCCATTGGTGTCTTCGTCGGAAGGAAGCAATGCAAAAAGCGTGAAATCTTT -CAGACTCCCGAGGTCAAGGGAATGCCTGATCTAAACAGTTACGTCAGTGTAATACTATCC -AAATCATCGTTATCAAAGTTTTAACAACAGTGAAACTGTCGACAAGAGCTCAAAAACAAC -CCAATCACGTGGTCTACGAGAAGAAATTATTAAATAACCAAAAGAATTCAGTCTTGAAAT -GTATGATTGCATTGACTATCATGTAGGAAAATATTTCGTTGATATCACTAACTCCATTCC -GCCAGTCGAAATAGATGGCGAAATATAAAAGCAGAACCCAGATCGGGGGGAAATCAAAAC -ATATCCATCGCTCGTCCCGATACAAGCGGCAGACGTCGGCGAGAGGATCTAAAGATCATC -ACCAAAAAAAAAAGAACGCCATGCAAAAGACTTATTCAACCTGTTTGGGAGAACAAGAAA -CAAGGAGAAAAAGCACATTTTAACCATCGCTCCAACTCTCTAAACCTGGGGAGCAGCGGC -AAGGACATCGGGGGTGGCCTCGGAAGAGTACTTCTTGAAGTTCTCGTTGAAGAGGACAGC -AAGCTTGGTGACCTCATCCTTGAAGGAAGTGGAGGCGGTCCAGCTCTTCTCGGGGTTGAG -GAGCTCGCTGGGGACACCGGCGCAGGAAGTGGGGACGGGCAGGTTGAAGGTAGCGTAGGT -CTCGTACTCGGCCTTGGCGAGCTCGCCGTTGTGGATGGAGTCCAGGATGGCACGAGTGTA -CTTGAGAGGGCAACGCTTACCACCAGTGGTGGCACCGGCGCCAACCCAGCCGGTGTTCAG -GAGCCAGGCGTTGACCTTGTGCTCCTTGATCTTCTCGGCCAGCATGTGGGCGTAGCGCAT -GGGGTGAAGAGCCAAGAAGGGCTGAGCGAAGCAAGAAGAGAAGGTAGCCTGGGGCTCAGT -GACACCGTCCTCGGTACCGGCCATCTTGGAGGTGTAACCGGAGATGAAGTGGAACATGGT -CTGCTCGGGGGTGAGCTTGGAGATGGGAGGCAGGACACCGCGGGCATCACAGGTGAGCAG -AATGATGTTGGTGGGGTGGTTGTCGCTGACGCAGGGAATCTTGGCGTTCTCAATGTACTC -AATGGGGTAGGCGCAACGAGTGTTCTCGGTGAGGGTGGCATCATCATAGTCAACAACACG -GGAGATGGGGTCGAAGACAGTGTTCTCGAGGACGGAACCGAAGCGGATGGCGTTGAAAAT -ATCGGGCTCCTTCTCGGCGGAGAGACCAATGCACTTGGCGTAGCAACCACCCTCAATGTT -GAAGACACCGGTGTCGGTCCAGCAGTGCTCGTCGTCACCAATCAGCTTGCGCTTGGGGTC -AGCGGACAAGGTGGTCTTGCCAGTACCGGACAGACCGAAGAAGACAGTAACATCACCGTT -CTCGCCCTGGTTGGCAGAGGAGTGAAGAGTGAGGACGTTGTGCTTGACGGGCATCTCGTA -GTAAAGAACGGTGAAGACACCCTTCTTCATCTCACCAGCGTACTCGGTACCGAGGATAAC -CATCTCCTTGTCGGCGAAGTTGATGGCCACGGAGGTGGCAGAGGTCATACCCTCAGTGAA -GCGGTTGGCAGGGAAAGAGCCAGCGTTGTAGATGACATAGTCGGGGTGGAAGTGGTCGAG -CTCCTCACGCTTGGGGCGGATCAGCATGTTGCGCATGAAGAGAGCGTGGTAAGCGCGAGC -GCAAACAACGCGAACGCTGATACGGTAGCGCTCATCCCAGCCGGCGTAACCATCAACGAC -ATAGATGCGGTTACGGGTGTTCAGGTAGTCGACAGCACGCTCACGGTTGATACGCCAGAC -CTGTTGGAGATGTATTAGTCACCAGCCTTATATTGACGTCATGCACGTGATAGCCTCGGC -CGAAGGGCGGGGATGCGGAGCAACAACTGAAGCCGAGGGGCAAATTGGTGGAGGCGAAAA -GGGAGAAGAAAAAAAGATGAAAGGAAGTATTCCCAAAAAGGGAATGAGGGATAACAGCGG -CAGAGAAGGCATCTCAAATGTTGCACGAAGTCCTGCCACAAAGCTTGACCTCCCGATGCA -TATGCAAGGGATACAAGTGGATCTCATCGGGCTTATCTGCAAGGGTGTGCTCCAACATCG -AGAATGAAGTATCAAGTAAAATGGCGCATAGAAAGTAAATTAAAAAATAAAAAGTAACTC -ACATCAAGTGTCATGGGCTTGTTTACGGGTCCCCACCAGATCTCATTTTCTGAGGAAGCC -TCCTGGACGATACGCTTGTCGGAGGGAGAACGGCCAGTCTTAGCACCAGAGTATGCGCTC -AAAGCACCGCTGGAGGTGACAGCAGTACCAGTCTCGTAGACAAGAGCATCTTCGTAAAGG -GCAGGAACAGAGGGGTTGGCAATCTAGTTCATGTTAGTTGATGAAGCAAAGTCAAGAATG -ACAATAGACATGGTCTAATGTCAAGCGCAAGGATGGCGAGGTCTTACAATGGAGACTCGG -TCGTAGTCAATGTGAGCAGTCTCGTGAAGTTCCTCCTCAAGCTCGGTGTGACCCTTGCTA -GGCCTAAGTAAAGACAGTTAGAGACGATGAGGTGAGGACAGTGAGGTAGGAAGGTAACAT -ACTGAACACCACCAGGGTGCAAAGCAGTGCGATTTACAGATGTGGCAACCATTGTTCTGA -GTGAAGTAGAGTGATAGTTATTGTTGCGTTGTTTCGCGATCTGCTGTTGAGTGAAATCTG -ATGCCAGAGGACCAGGAGAGTTGGAGCGGTCGAGGGGTATATAATCCTTTCCGGCAGAGG -CCATAGTCGCGATCGGTCTTGGTATGAAGGGAGGTTCTCGTGCAGTTTCCGTGGTGTTTC -TCCACAGTTCCACGGGTGATGTCGGGGCTGAAACGCCGAGCTGAAGCGGGCACGCGGTTG -GTGATGGGATCAAGTCCGTAGGTTGCAAATGCCTCGAGGCGCAGTCCGAGTTGACCCGAA -TGGTTCAAGTGGACACGAAGTTGATTCGGAACTAACAAGGCTCTATGACCGGGAAGGCGA -ATAGCGGAAAACGATTAGGCGTTCTAGAATCAAAAATAACTCATTTAAATTTTGAGTGGT -TTTACAAGGGGAAGTTGATGAATCCAGTGCATGAGTTCTTTTATGCTTGCTCTGACGGTT -GCGGCGAAGTCCCTAACCTAGCTAACCTAGGTTCGATTTACAAGTTTCGTCAATCATTTC -AGTATTAGACTTGAATTAATGAATGTAGTTCAACTTTTAGAAAATTATTTCAGCCACAGA -ATGACGCACAATGACTTCACTAAGATCATCTCAAGACTTCATACCTCGAATCGCGGGGGG -CTGACATAATTGTCGATCTTCGGGGCAGTGATCTTTTTTTTTTTCATATTGGAATCGCCC -ATTTTCGGCTTCAATCAACCCGAATATCGGAATGTGCTGCCCGAAGAAAATGAAATCAAA -AAATTGACCAAAAAATTATCTTCCTGAGAACCCCGGATGACGACAAGTTCGGCTGCAAAT -CGGACAAAATTCAGCAGCAAATAACGGCGTTTAACCAGCCGAGGATGATGGCACCTGCAT -CCATCCATGGCGGCGGTGGGGACCAATCAAAACGCATATATTTCCACCTCCTAGGATGGT -CGGGTGGGCAGAAGATATTGCCGCTGGGGCCTAAGCCCTATTTTCACTGGTTTAGACTGA -ATTCTACAGGTTTTGACTGGTTTTCACTGGCCGAACAATAAACACACATGTGCAATAGGA -CGCCTCCCCGTACCTGGGTCTCCAAATCCAAGCCTTATGGTAAAAAGAACCCGTGCATAT -TACGTCCCAACGCCGAAGGCCATGGCCTGGAACACGGATTTCTTGACTCAGAGCATAAAT -CTCGCACATGAGGCGGTCTGCCGTGATTCTGAGGGTTCTTATCGGCCACCATCGCCGTCG -GGCTGTGTACAGATGTGGCGTATGGCAAAATCAACCCCAGATCCTTTTCCCCGTTAGCGG -CGGATCCCGAACCGGCTACGGCCTCGGGAACGTGAGCTTTTTTTTTCATTTCCTTGCATT -CTTCGTGAAACAAGTAGTTATTTTTGCGCGATTTGTTTCATCTTACGTAGTGGCTTCTCT -TTTTTCAAATCCGCCTGATCATTTTCCTTTTTTTTACCTTGCGTGTGACACGAGTATTTG -ATTGCTAGAACTGTCGGCCATTTTGAGGCTTTTTTCACACCCTGAAGAGGTAAGGACTGG -TGGAGTAAAGGGGTTTTTCCCCTTGTCTTTCCTCCACAAAACCAGACTATGTATAGTCAT -ACTCCGGAGTAGAAGCCATGTGATCTTCAATGACTCTATTCACACATTCCCGGATTTGCC -CCTTTTACCCACATTTTCTTATCCGGGTCAACATTCACCAACATAACTAAATTCTCGGTG -CTTGATATTAATCACAACCAATCATTTCTGAAACTTAGCTTCCTAATTTACCCGTTGTGA -TATGCTTCTTGTCTTTCATTCGAGTCTATCTATCTAAAATATCTGCCCAACCAGTGTCGA -TTTCGAAACGGCCTGGGCCGATTGGAAGGGGAACTAAGATCGACTCATCTCCTATTTTCC -CCGAGACTACCTCTAGCGCTCGGCTGCACGTCATTGATATTATTACTTCATCGGAGTACA -GAAATGAACAGGTCCAACCTTGGAATAATCCAGACAAAAAAGACTAGGATATTTGACTCT -GATCCCATGACTGACATCACGTATCTTTCTCCGATCTTTACACATTTCGGTCGAGGCGCA -CTTCTTGGCATCTTGGCTTTTATCTGGGGAAATTGTCGCGTGGTCTCGACCTGCGGATGC -GGGGTATAGCGCTGAAGTTGACACCAGTCAAATGGCACAGATTACTGCACAGATTACTGA -CTCGGCGATTTTCAATATATGCTACATGCGGTTTGGATTGTAATTTAGTGATGCCACGTC -CTATTGCGCTATGGTCGAGGATGCGTGTTTTCTGAGTTTTACAGTAGCATTGGCAGCTGA -ATATGTCGTCCATGGCGAATTAGGAAGGTGAAAAAATGTGACCTCAAATACTATAACAGG -GCATGATCTTCAGAGAGTTCAGGTGAAAGAACAGGCTGCACGTTTATCAGAACGGGATTC -CTACAATATAGCTAAGCTGCTCGCTACGATAGACGCTAATCCTCTGGTCTAAAAGGGCCG -TCCACTATATACTACTTCGGATTTTGGAATTAACATAGTACTTCTCATTGACTGATCTGA -AGTTCCCAGGAATTCCTATCAGCCTTTATAGGGGCAAGAACCCGGAACATATCCAGATAT -ATATTTACGCGATCAAGTCGAATGGTGAGAATTTACGATTGGACAGAGGTCGATTGGTCT -CATCTCTCCAGTGCCGTAAGTCGAGAATTTCTGGGTATTGATCCACACAACAGGATTACT -CTATTGTTAGCTCTATGGGCGATTCAGGCCAGAGGACCTTTCAGGTGTTTCCCATTCTAG -GGCCGCACTGTATAGCTTTCCCCAAATTGAACTCGGCTCTCCCAATAAATCACCAGGAGC -TTTCAAGGACTGATCGCTTTTTGAGATAGGCCGGATACTAGACCAATGATGATGAAGAAT -ATACCTCAGTCTTTGGGCAATAGAAGTAGAAGACCCCAGTACAAACTCTAGCATTCTGAG -ATCTATCGCCGTTATCCGCCAGCCAATGTCAAACTAGAGTATGATGATATCAGAGTTTAT -TCGGATAACCATTGGGGACTTAATCGATGTGCAGAGCTCAAATCAGGAAATGTATCGTGG -CTTGATATAGACAAGTTCTAGTAGCACTCGGCCGTCAGAGTAACTAAATTTTAGGTATAA -ATGTCTTGAACCAGAAAGAGTCAAAAAATTCTAATACAAAATAACCTCATTACTCGCAGC -TCCAGCCTCAGCCCACCCACCGAGGAGTTTTCGTGCAACCCCAGCAACAAGCTGGTAATA -GTTAGGTGGCGGAACAGCACCAGGAGGAGGCTTTCGGCTTGGGTCACCAGCGATCCAAAG -AACAGGATCTTGCGAGATACTAGCAACATTCTCGCGCAATGCCTCGATATCCTCCAAGTC -AAACCCACGACCAAAGATAATTGCACGCACAGGCCGGCTGTAGTCTTTTGTGCCGACTTC -GTTGACGTGGGTGGATTGAGGATCCCGGCCTGCGAGAAGATGAGGAATTTCGGAGTTCGC -TGCTGAGAGGCTTTGGATGAAATGAATTACTAGAGAGTATTAGCTAGCTTGCCGTGAGAC -GATTCGGATTCTGTTATAAAAAAAAGTAACGTATGCTCTACGAACCTTCAAATTCGGGGA -GAAGAAGCTCGGAAGCCGGCTTGCCGATCCCGGGGTCTAGGGCACAGAGCACGATTGGGA -GGGGGGAGGACATTGTGGAGGTGTAAAAAATGGGTAGGACTGTCTATATTCTTCTCCTGA -TATGATAGAAAGCTGAATGATGAAACTCATAAACCGAGCAGACATCTCTATCTTTATTTT -GAAGTTCGGAGTCCGACCTACTCGATGCGGCTCGGGAATGGAGACAAAAGAGCCCCACGG -GGTTAATTTTGGGACTAGCGTCATGCGGGAGTATAGTAGTTCCAGACAGTTGGAATCATG -TGGAATTTGTGTCTTGACTTATTCTGTTAGCGATAGATTCATTAAATCCATGCCCACTAC -TTATGTACTTATCATGGATTGTAACTTGATAGCTGGTTCCACCCCCTGGGCCAGATCGGT -ACTCCACAATGTGTCTAACCGTCATATACCACCATTGGGGTTATCGGCACCTTACTTACA -ATTTGCGTTTGATATCAACACCCACTGCGTACCATTTCCAGTTGAATTTTAAATCGGACA -GAGATCATTCAGGTCCAGCCCTGAGTGACTAACAACTTACATCATTCGTCCATATAAACA -AGACGTATCGTCCATTCTTTCCTTTTCACAAAGAAATCGTCCAAATAAAACATGACATCC -ACCTCGCCCCCTCCATCCCGCTCCACACTCAGCAACAAGGCCCACAGATACCGTATACTC -TCAAGCCTTCTCTCTCTACTAGACCGATACCTCTCCTGGCCCCTTCCCCCAAGACCAAGC -ATCGACATCGTCATTCCCTCAAAAATAAGCAAAGCTCCTGGATCAATCCAGCTCTACTTT -TTCACAGCACCCTCAAACCCCCTCTCACCCCGCAAGCAAACAAAGCCAATCCCGCCCCGC -CCTGTCCTCATAAACTTCCATGGCGGAGGCTTCTCAATCGGCCACGCTCTCGACGATGCC -CGCTGGGCCGGAACCGTGCTAAAGGCCTACCCAGACGCAGTGTTCGTCAGCGTCGATTAT -CGCTTAGCCCCAGAACACCCCTTCCCCGTCGCCCTGGAAGACGGCGTCGATGCAATATTA -TGGCTATGGCAACAAGCCCAGAGATACAACCTTGACAGAACGCGCTTTGTGCTCAGCGGA -TCCAGCGCCGGTGGAAACCTCGCTTTAGCATTGCCATTACGGCTTCACGAGGAATTACAG -AAAAAGCGCTGGGCATCGCTACGCGGGGAGATAGCGTTGGCTGGATTGGTAGTATTCTAC -CCCAGTACAGATTGGACACGAACGCGGATAGAGCGCAATGCTACGAATCCTATCGCCGCG -CAGAAATCAATGATCTCCCCTTCTATATTCAAGTTCTTTGACGATTCATACCTTGTCGCT -GCGGGACTGCCGAAACGGCCTGGCACAGATCGGGTGGATATGTCGCATCCTTATCTTTCA -CCTGGTTTGGTCCCTGCATCTCTATTGTCTGCGGCGTATCCGCCTGCTGTGGCGATTTAT -ACATGTGGTTGGGATCAATTGCTTGTTGAGGGAAATAGGTTTCGGAAAAGGCTTGGTGGG -TTTGTTGAGGAAGGGAGTATGGCGAGTGTAGGTGGGTTTGTAGTTGAGGATGTTGTGCAT -GGGTTCGACAAGAAGCCGTCTTTTTGGAAGGGGAATCAAAAGCGTGAGAGAATGTATGGG -GATGCTATTAAACAGTTGAAGATGATGTGGAAAGTGGATTGAGATGTTTCCCCAAGCTGA -TTCTTTACATACATTATTACCCTTGCAGCAATATATACCCTTGGCCGAATTATTCTTAAA -GAAAAAGAAAACAAGGGTAGACGGCCGCTTATGCGGCCTTCTGATGTCTCTACAACCCAC -GGCTCTGAAAAATCTTCTCACCGGTCCTCTTAATGGGCTTCTAGAATATCGTTCTGAGAT -CTCTCAAGAATATTTTCTCCCCCTTTCCAGTTTTTCAGGTAGTTGACAGAAAAGGCATAA -GAAAACCATACTTTGTGATATATACTTTTCCATGTAAAACGTGCGATTTGCATACAATCC -TCGTTATTTAGACCCGCAACGTCCTGCTTAGGACCCCTGGACTTTGAATGCCTACACCTT -GCTGCCTTTTGCCCAGCAGAGAAGTGCCACGCCCTGAACGGTCTGATCTACTCTTTGGGA -ATCACCTGATACTCCGATCGCTGTCTAGTGCTTCTGTGGTTTCTTCAAGTGCCCGTAACC -AGCTTCCTAATAGTCACAGTTCTTTGGTTCTTTCTACTTTACTTGAAGTCTAAGACGCTC -AAGAACTCGTGATCCAGCCGGCTTGTGACACAGGGTCTGGCGATGTGATAGTCCTTGAGA -AGCTCAACAAGTCAAAGAATTCAGTGGTTCTTAAGATTTTCGTTCATGAGACAATCTGCG -CGAGGAAGGTGGTAAGTGCAACTGTACTAAGTAATCATACTTCCTAACCACACCTAGTTC -CATGATAGAGGCCCCTCTGAGCACGATTTCTCATCGGAGTACCTGCAAAAGCTTAGGGAT -ATACTCGAGGACATTCATCTCGCGGGGATCCTTCAAGGTGATCCTATGCCGAGAAACATG -AAGATTTCTAGGGTCCAGGGTAGGGTCCCCTGGATAGATTTTGATTCTGCCGACACATTC -TCTGAGGGCTTTGAGCAAGGCAAAGGACTTGGACCGAGGAACAGAATGAAATGATAGATT -ACTTTATCAATGCTTTAGTAAGGTCAGACCAAAGTGTGAGTGCGCCAGGCTCAAGATCAC -TAGGAAGGTGAATTTTATCATGCATATTCTTACTTTTACGGTTAATTGTTTAGACCGCTT -GTTCATAAATGGATATCACTGGCGAACGGAGGATGTAATAGAGACTTCTGTGAAGGTCAT -TATTTCAGTCCCAGCCAGTCTCCCAATGCGAAAAAGTAGCCAAATTATCCAATAATATGT -CGAGTATCACTGCCCTCACTATAATATAGTCGGCCATCTTTGTATACTGTACAGCAGTCT -GGACTGCTCGAGATATTGTCCGTGACGGGAGAATCACCTTAGCCGATGATTGTAATCGAA -CAGCTTCATGGTGGGCGATAGCGAGTGGACGTTCTCAGTGCGTCCAGTTGTTGACTGAGC -AGCCGTTCGTGGCCCCGATATTACGCCTCGTAGTTTCGAAGGCAAATTTGTCCAGCATGT -TGCACAGATGGGTCAGATCAAGCTGCTGTATCCGAACCCTAATAATCAGGACAGTGGCCC -GGTCGTTGAGAAGCAAGTCAGTCAGTTCAATTGTGGATAGCAACGGCCTAGTAGTACGGT -GGAGATGTTGTCGATGGACTTCAAAAGTGGATGGAAGCTACACGTATTTGTCAAGCATGT -ACCCAAGCTATATGTATCCAAAAGGAGCAATGCACAGGCTCACTTAGAAGAGTATTAATT -GTCTAGCTCTATCACTCGACATTTTATATAGGGGATATACTTGCTTCCCATCCGCAACTA -GATCGCATAGATTCTTAGAGCTATGGCACAAACGAGATACAAAGCCAAGCCATTCCAAGG -TTCTTTAAAGGTCTATATTAAGGCCTCCTTTAACGATCCTCTGTCAACAGACTACATGTA -TGTGACACTTCAGGGCTATCGAATCATGTGCCAGTGAACAAATGACCCTTAGCGGGGCAG -CCAGTCAGGCGGGGTAGAAAAGTCAGCAATTTTCCCAATTCACTTGACTTCTCACACATT -TGCGATTCTTCTTCAAATTACTTCAAATGTACGGACTGTGTCTTGTCTGCATCTCCTGAG -GACCTTGGTGGTTACTAACTCTATGTCAGACCTACTTCATCCACAATGAACGGACACGCT -GCCTGCCCCAAGTGCGGTGCCGCCACTGACGGCTCTGGCAAGAGCTGTGGCGCCTGCGGT -GCTGTATGTTGATTCTGCAGTACTTCTGTTTGAGCTTATACTAACCACACTAGACCTGCC -CTGTTTAAACCATACACTTTCGAATAGGCCAGGCCTGGTGCACTTGAATGTTTCGGACGA -ATGGTATCGGAGATGAGACTGCTTGCGGTAGCGATAACATGAGGATGAGGCAGGGCGGAA -AATGGATATCAATATGTTCATAGGAATAATATGACAACGGGAATGAACTCAAATCTATCC -GAAACCAAGTGCGATTGTTATGTACGAAAACAAAAGAAACGACGTTAGGACATTTCCCTG -AAATGAAAAAAGATATAAGACTGGTCGTAAGAGAAAAGAACAACCATCAGGATATAGAAT -GCGCAATAAGGATACCTCCTCGTCTCGTTCCCATTTGGGCCCGGGGTCCCAAAATCACCC -CAGTAGAGAGTCACTAACCTCAAACCTCATCAAGGACTGACTATACACTGCTTAAACAGG -AACAGCGGTCTTGCGCCACTGGGACCAAACACCGATCAAACTGCTGACGGTACCTGCGAT -ACCGACGAGGCCATCGTCGAGGTTAACGATACCAACGGCAGACAAGGGCGCGGCCAGATC -GCAAACATCGGAGATGAGCTGTATCCGAGCAGCAGAGCGCTCCCTAACAACAGTGTTAGC -ATACCATCCATTTTTCATTTGGGTAATAAGCAAGAATCCATACTTCTCAATCTTCTTAGC -CTCAACAACACCCTCACCCTCCTTGCGGTCAATAGTCTTCTCCTTCTCCTGGAGACGAAC -AAGAGTGTAAACACCAGCCACAGCGCTGAAAATCAAACCAGCGCCCCAGGAGCGGTAGGC -GGAATCCTGCAGCCGCTTAGCGGAAGCCAGCTTCCGAATGCCGATCGCATCAATGACTGT -GACGGCGTCGAGGGTCAGGTAACCGGCATAACCGAGCTGACGACCGATGGCCAAGTAGCG -CAGAACGGGGTCAACGGGGTTCTTGTTATCAAAGGCAATAGCGGCAGCCTTGAGATGCTC -GAGGAATTTGCCGATACGCATAATCTTGCGAGTCGTGCCGAACTGCTTCTTCACGGCATT -GTAAGGGTCGATCGCGGACTGCGGCTTGTTCGTCCGGTATAGGTACCACGCGTAGAAGCG -GGAGAAGTATTGGATTGTGCGGAGGAGCTTGTCGCGGCCGACTATCGATTGCGTATACGG -TTAGCTTGAATGCTCGGATAAGGGCGGGGGAGAGCTCAGCTGGGTTTCTCGGCCGAGTGC -TCGCCACTTAGGGGAATTTTGCAGCAGTTGCGATGCAAAATGGGAGTGTCTTGTTCGTGT -GCGCGTAAAAAAAACTGGGATGTTGCTTTTTGGAGTTCGGGGATGTCATCACTTACCGGT -TGTCGCGACGAAGCGTAGCCAGTGCGCCAGCGCAGGGTGGTAGATGAGAGTGTTGGCGAC -CATTTTTGTGTGGGAAATGGATGAAAAGCTTCAAACGCCTGAGGATGGAGGGTGTCAAGA -ATTAAATGTGATAGTTCATAAGGAAAGGGGAGTCGGTAATTGACCGGCGTATGGAGCCGG -AGGACCGACCTCGGCTGAACTCGTGCGTGTGGGGGGAGATCCGGAGGTAAGATGTAATGG -TATTTACCATTATTACAACATCCAAGGACCTTTGAGCCCTGAGTCGTATGAAATCCCTTG -AAATACGGTGAAATGCTTGATTTCCTCTGGTTTGATCGTTCAATGCGCATGTGCACTCGG -CTTGTTCCTTTTCACGTGAGCCTACATATATTCGAAGAATTATAAGATCGGTATAATCTC -TCGTTTTATCCCTTTTATGCTCGGCATTGAAGTTTCCTTCGCACCTTATATCCAAATTTA -GTTTAGTTCCAATTTCCGGGACCAAATTGATAGCATCACCATTGAACCAAGCACTCTTGT -TATACTTGTCCTCGGCCAGATCACATCCAAGGCCAGTCAAATGCATTCTCCTCCCCATGC -TCTCCTGGATACTGCATTACAACCCTCCTAGCAAACCCCCTTGATGGTTGCTATCAGCAA -CTCTGTCGTCAAAAGGAAAGTATGGAGGGGATTATATGTTTCTCTTTTAATATATCTCTG -TTCTAAGAGGGAAAGACATCGCAAGATTGGATGTACAATACTTGAAAGTGGACCGTGCGG -TCTTAGAGTAATTCCGGGGTATATCGGCGAAAAAATAGAATGGCGGAATTCCAAGGTTCA -CAAACTACAACACTGATGTAGTTCTATCTGGCATTTGATACCACCGTATGCGCTGGTCCA -TTTGGATCTAAATGGTCGATACAGTGAGTAAAATGAGTCTTCGGAAACTTTACAAGTCCG -TGGCTAATCCTTGCTAGAACCTCTCCAAGCTCGTCCGGGTATTTTGGATGAACACCGGAA -CATGAGCATTGACATCGTTAAATGATCACAAGTAGTCGAAAAAAGCAACGGGAATACCAA -AGGAAGCAAAAGGCAGTCGCAAGGTAATTGGAACTTGTTTTACAAGCTCAAAGTCTCGAT -ATCTAGTGTGGGAGCCATTTTATGTATCTGGATGTGGACAACATTGAAGTGCCCACTTAA -GAAATTGTTTATTATGCAAGCCAAGAGCTCATCTTCTGTCCAGCAAAATGCACCACCTAG -CCTAATCACCAGCAGATTAGGCAGGATATGCCCGCAAGTCTCTGATACATTCATGGGCCA -TGTCGCAGGGGCAAAGGGTCGGATATCTCAGGTCGCAAGGGTTGGGTAGTCGCTGTCGCA -TCGATGGAAATTCCTCGTTGATATTACTGATTGTGCATTGTCGCGTATGGGTGAAGTCCG -TGATTAAGCAGGGCTGGTTGGAAGACAATAGCCCAAAAGGGTGAGCAAGTATGTGTCGGG -CGCAGTGGGGTTAGTGATTGGGTGACCAAGTTACTCTTGTTCGCGATTTTCGATTTGGCG -GTTGCGATCGCCCATTGTCGTGGTGTCGTAGCCATTAGCCTGGAGATAGTCGCGGAACCT -AGCATCCTCGAAAGTAATGGTGTCCTGAGAACAGGTCAGCACATCAACAATGGACTCCAT -ATCCAGTAGTACAGACCTGGTTAATCTGCGGCAACATGAAAATGGCCATGACTGCGGCCA -AGATACACAGTGCGCTGGAGACGAAGAATGGGTCCTGGCCTGCGCGGGTTGGGTTGGGTG -CGTTCTTTTGGATAACTGGAATCACATAGGCGCCCACAAAAGCTCCGATTTTCCCGAAAG -CGGCCGCATATGAGTAGTATTGGCCACGGATAGCAGTCGCGCTAGTCTTGGCGGCGCAGA -GACCGATGTTGTCACCAGGTCCCATTTCACCGAGGGCAGAGAAGATTCTGTTCTCAGAGT -ATTAGCATTGACAATGGGTATAGTCGAAGAACACCGTTGGTCAACTAACCCGAACACAAC -AACGAAGGCTGCCACATTTTCCGGGGTCGCAAGCCACTTATAACATCCGGCCATGATGAA -CCCGATGATGCCCTGGAGCCCGACACCAATGGCAAGGGTGTAGCGAGGGCCAATCCAGTC -ACTCATGAAAGCTCCAAGGGCGGAACCTGGAATATAGAAAGAATTGACCAAGGTGGACCA -ACCAAAGCTCTTCCACAAGGGAGCGTTGTCGCCTAAGATGATCGATAGCCAAGCAGAAGA -ATAAATACCGAAGCTGTAGGCAGAAAAGTCATAAAGGAACCAAATAATAGAAACAACAGC -CTATCACACCCAGTCAGTTGATCAGTTGGCGCTTGTAGAGCCAACATGGAGAGAGGTTTC -GTACCAAGCGTTTCCAATAGAATCTAAGCTTGTCAGTCAACGAGTTATTGCACTAGGGAA -TGCCGTACTCACTTGATAATCAGCAGGATGGGAAACTTGTGCATCCGCTCCCGGGTGAAC -TCCTCGGGCTCATTCAACTTCAGTCGGAGATACATTAGACTCAGAGGAGGAATTACTCCC -AGTCCCAGAGCCACACGCCAGCAAGCATGCAGATGCTTCTCAGTGAAGATCAAGACCAGG -ATCATGGACACTAAAGCAGCAACCACGAAACCGAAATCAATCTGGAAGTTGGTAAACATG -ACAAACCAACGATTTCGATGACCTTGCTTGAGCTCTCCAGTATTCTCCGCCGCACCTACT -GATCCAGCAGGGTATTCGCCGCCGATTCCAACTCCTAAGAAGAAGCGGTAGGCCGTCAAA -GCAGCAAGCATGCCACCCAGGCTACCGCCAGCTCCATAAGAGCCAGCACCGAGAGCTGCA -AAAACAAATAGGATAATGGTTGAGATCATCAAAGACCATTTGCGTGACCAGTGATCACTA -GTATAGCCAAACAGCAGCATACCCACAACAGTGCCGGCAAAAACAATCGAAGAGACATTC -TGTTGGGCCGTCGAGTTGGTGTATCCATCCCCGTAGATTTTCTGTAGCATGGTACCGACA -GGACCAATGATCTAGATCCAAGTGTGTTAGTAAAATATCAGTAAATGTTCGCCAAGCTGC -ACACCCCTGTGAAATAGTATATCTGCAGAGCTGCACGAAACACAACATACCGCATTCAAG -TAGCCATCAGAAAACAGACCAGCCCCACAGGCAATAGTGGGCCAACTACGCTCCCAGCGA -CCTTTTTTGATCTCGTCAAGGGGTACTAGCTCAACAGGCTGCTCCTCGAGGGGATGCGCG -CCCTTCTCAGACATCTCCTTGGAGTAGGACTGAGGCTCTGCCATTGCGATTGTGTTGAGA -AGTAGCAACAAAAACTTTTGCTATGTTGGATCACGCGGCGAGCTTGAGCGTCAAATGGGT -AGTAAAGTCGGCACGAGACCAATGCACAGTTACTGTGTCACCGGATCACAATAACAGAAG -TCGCGTCGACCGAGTAGGAGCGTTTTACCTCTGAAAGCGAGGAGAAAGAAGAGATTTGAA -GCAGTGAAAAAAAAAAGATAAAGAGCCCATAGGAGGGAAAAATGCTTGACAATTAAAAAA -TATATATATTAAAAAGAAATATTAATAAAAAAAAATATCAAGATTGCTCGGGAATAGCAT -CAATCCAAGGCGCATGGGGATAAGTACCAAGTACCTATCAGAGAGTGCAGAGGCCGGTAT -ATGGCCAAGTATTGGAACCTGGTGGGACCCGGCCCCCGAGCTTGATCATTCTAAGCATTT -AGCTTTGGGGATTTCAGCTTGTTAAGATGTATAGGCCAATGATAGACATAGTAAATATCA -AGGGCGATTCTCCAGCCAATGGTGTGTCGGACTAGAGGAGCCGGTCGGGGTATGTTGCAG -TATGCCATAAAAAGGTCGGCCACTCCGAAGATTCCAGGGAGAAGACTTGATATCAGTATG -ACAGTGAACACGTGGTTTCCCATCTAAATCAGATACGTAAATCCATTCGAAGTGTCAGGG -ATGTGGATGTTGATACGAGAAGAAATGTGCAATCGTATATAGCCCGTGCTTTGGTAGTTA -CCCCATTAGGATTGGGGGCTGGAACCACCTGCCGGATTTTGACTGAACTGGTTTTATGGA -GAATCTACAGGCGTCAAGTGGCCAATAGTCTGGAACATGGAAATGAAAATGTTTTCGAAG -GATTGCATTAGGAACCAACTCCCTAAACTAGTGATATGCTGGGGCCTAGATAATTTGGAA -CCATTTTCATAACGGTTCAGCCCTATGTTCGTGGGATATCAGGCCCTGAATCCTCGGGCT -TGAGTCTTGGGCTTGAGTCTTGACCGTAGCACATGTGATTGAAGTTTGTTTACAAAAGGC -TTTCTTGCACAAGCCAATATACAACATACGGCATTCCCAGGAATTAGAATGTCTTCAGTC -GACCTTGGTCTAATGTGATTATTATAAACCAGAGCACACAAGGTATTGCAAGCTTATATA -GTCCCTCAAGAGCGAGATCTAGAGGTGCAACTAGTTCTTTTGTATAGAAGAATAAATATA -TGTTGAACGCAGATGCGACCGAATCTCATACTAGACAAACTGGTCTTGACGTTCGAAGGG -CGAAGGCTAAGGCTCGGCACGGAGAAACTTTCTGCCAGCTGAGCTCGGATTACGGTGATG -CACCTGGCTTGATCTCATCCCCAACATCACCAGCTTCCATCTTCTCCCTGCTCTATTGCA -TCGTCTTTTTTTCTTCTCGTGCTACTCCTGTCAGCATTCCCAGTTCCCCATCGTCATCAT -GGGATCTGATCCGCAGTACATCAAGTTCCCTAACCTCTCTATCGCTCAACATGTCTTCAA -CCTCTCCAACCCAGCATGCGCGCCAGCCGTCCAGCAAACATCATTGAAGAAACTCCAAGA -TGTCATCGTAGAGCAGAAAATGGCTCCCTTCTACCGGCATCTCGCCCATCCCACCGAAGG -CATCTTGAACAACTCCGGCGAAGGCGTAACACAGCTCCCACAAAACGGCAACTCAACGAA -GCCTCTCATCACGTCCAATCTTCTTGCATCTCGCAAAACACCGCTCAAGTTTGATTTCCC -TTGGGATGAGAATCTTTACCAGTCTCTACTGGAGGATAACAAGAAAGAACTGGAGACCTT -CCAGAAGGAGGAAGATGACGCAGAAGAGGCTGCTGGGGAGACTGAGGTACAAGCTGCGCG -CGGGAAGCGAGCTGAATTCTGGGCTCGCGTGGGAGATAAGGTGAGGATACTTATCGTTTA -TAAAGCTTCTTGAGTGTGCTAGCTGACACCAGTATTCCAATAGGACAAAGCACTCGAATC -GCACGAAGCCCTCCTCGAAAAGACCGGATTTCTCGGCACCAAGATTGATCTGGTGATGGC -CATGCTCCGCATTGGACTCTTTTTCGGCGACCTTTTGTTTGTGAACAAGACCATTGAGCG -AGCAGAGACCCTGGTAGAGAGTGGTGGAGACTGGGACCGTAGAAACCGTCTCAAGGCATA -CAAGGGATTACATTTGCTCACTATTCGTTCCTACAGCCTTGCTGCTCCTCTTCTCCTTGA -CAGTCTGTCTACCTTCACAAGCTACGAACTTTGCAGCTATTCTTCGCTAGTGATTTACTC -GGTGCTTGCGGGATCATTGTCCCTCAAGCGTGTGGACTTCAAGGCTAAGGTAGTGGACGC -ACCAGAAATTAAGGCCATCCTTGGCTCCGGGGAAGACCAATTGGCTGCACTGAGTGGGGA -AATCTCCTCGGGTCCAGGTGCCCGGGATGAGGAGATGAAGGATGCAACGGCGTCACTGCC -CACACCCGCCGGTGCCAAGACCGCTGTCAACATCTCCTCTTTCTCAACCGGCTCCGGCGC -ACCGATAGAGGCTGAGGTGCCAGTTGATTTTGCACCGCTCGCGAATCTAGTTACCAGTCT -CTATAACGGCAACTACCGCTCTTTCTTCAAGGCTCTCGCAGCCGTTGAGGATCAATTCCT -GACACAAGACCGGTATCTGCATGAGCACCGGGCCTGGTTTGTTCGTGAAATGCGGCTGCG -CGCTTACCAGCAGCTTCTCCAGAGCTACCGCGTGGTGGGGCTGAGTGGTATGGCTAACGA -CTTCGGTGTGACGGTGGACTACTTGGACCGGTATGTGCTATCGAATCTCTTCAAATATCA -AGTCATTTGCTGACCGTAATCACAGGGATCTTGCCAAGTTCATTTCAAACAACCGCATTG -CATGCACCATCGATCGTGTCAATGGTATCATTGAGACGAATCGACCAGATGACAAGAACA -AGCAGTATGCCGACGTGGTGAAGCACGGTGACTCTCTGATCACCAAGATCCAGAAGTACG -GTCAAGCTGTGCGTCTTCGGGGAAGTGAGCGAAGTTAAAGTGGTACATCTACGGACTTGG -AGGCATTTTAGACCTCAAAACTACAACCTATGATCAACTATGTATGCCTGAATGGAGAAT -CTTACTGTGTGTGTCCTTGAACGCATTGACACCATAAATCCTATGACATGCCACACCATG -ACATGATCCATCCCTGACTAAAAAAATTAATCCTTTGACCCGCACGGGGCAGCGGAAAAT -CGCTCAGCATCGGACAACCAGTCTGACTTTCCCTCGGAAAATGATCGTCGCCGATCATGT -TATCATGCATGAACGTGGATGATCCTCCAATTGGGATGTGTACACTGTATACACGCATAG -TTTTCATTCTCATGGACCAAATCCTCCGTAGCCGCATCTCCCGCCGGTGTCTCCAAATGT -CTGATATCTCAACATCCTTATCGTAATCGGCTTGTGGGGAAAATGCTGAGTCAGTGGTTC -ACCGCCCAATGGCCAGGGATATTTTTTTTCCTCCTTCTTCTCCCGATCGCAAAATAAGCA -CGACGGCGGTGTCGAAAGGGACTTTGTTTAATTGATCAATCCAATCCTTGATTGACCATT -CGTCAAAATGTCCTGGCAATATAAGCTCGCATCCAGTATGCGGGCCTCTCCGCTACGGGC -GCTTGGCCCTAATGGCCTCCTACATCGCCCGGCCGCACGTGGCGCTGCGGGGTGTCTCTC -CGCTAGATCAGGTACGTGAGATCCCGATATCACCCCAGCTTACATAGTTGGGACCTCCGC -AATCCTTTGGCGTTGGCTTTTTACCTCATTTTATGGTGTGACTCCTTGTCTTGGCCATAT -CTCCCCATTTCCAGTTGCAATCTCAACCTCTCAGCAACCTTTTATATACTTGATTGTCCT -CCCACCTTCAGGGATACCACATAGTCTCTCCATCCCAGACCCGACTAACCTCTTTCGGGT -TAACACAGAGCCCATCATGACTCAAACCACCGTCTCATTTACCGCTAAGCGCACCCTCTC -GGCCGTCCAGACCCGCAAATGCCTCAACGTCGACAACATCAACCAGCAAGTCAAGGAGGC -CAAATACGCCGTGCGCGGTGAATTGGCCGTCAAGGCTGAGACCTACCGCCAGCGTCTGAT -CGATGGCGACAAGTCCCTGCCTTTCGAAAGCGTCATTTTTGCCAACATCGGTAATCCCCA -ACAGCTCGACCAAAAGCCCATCACCTTCTTCCGCCAGGTCCTCAGTCTCGTCGAGAATCC -CTTGTTGCTGAAGAACCCCGAGGTCCTCAAGGAGTCATTTGGATACAAGCAGGATGTGAT -TGACCGTGCTCAGGCCCTTTTGGCCAATGTTCAGAGCGTTGGTGCCTACAGTCACAGCCA -GGGTGCACCGGGTATCCGGGACAGCGTTGCCAAGTTCATTGAGAAACGTGATGGCTTCCC -TGCCAACCCTCAGGACCTGTTCCTGACCAGTGGTGCCTCATCTGGTGTGAGCACCATTCT -GAATGTCATCTGCAACGACCCCAAGGCCGGTATCCTGGTCCCTATTCCCCAGTACCCTCT -TTACACCGCAAGCTTGACACTGTTGAATGCTCGCTGCGTTCCTTATCTCCTCGAGGAGGA -TAAGGCGTGGGGTACTGACGTCAACGCCATCCTTAAGTCCATGGAGGAGGCCAAGGCCGC -CGGCACCGATGTGCGCGCCATTGTCGTCATCAACCCTGGTAACCCTACTGGTGCCTCCCT -GAGCGTCGACGACATCAAGAAGGTCCTTGACGTCGCTGCCGCGGAGAACTTGGTGGTCAT -CGCCGATGAGGTGTACCAAACTAACGTGTTCAAGGGCGAGTTCGTCTCCTTCAAAAAGCG -TCTTCGCCAGCTCCAGCAGGAACAACCCGGCAAATACGACGACGTCGAGCTGGTCTCACT -CCACAGTATCTCCAAGGGTATGGTCGGTGAGTGCGGTCACCGCGGTGGCTACTTCGAGCT -TGCTGGCTTCGACCCCCTTGTCCAGGAGCAGATCTACAAGCTCGTCAGCATTGGTTTGTG -CCCGCCGGTCGTCGCACAGTGCCTGCTCGAGTGCATGGTCAACCCTCCCCTCGAGGGCGA -ACCTAGCTACCCGCTTTACCAAAAGGAGTACAACTCCATCTCGGAAGGTCTGCGCGAGCG -CGCCCATTCCCTGTTCAACGCCTTCCAGCGCATGGAGGGTGTTGAGCTCCAGGAGCCCCA -GGTATGTATTACAAGCTTGCTCACATTGTTAAGCATTTGCTAATCATTTCGACAGGGTGC -TATGTACCTCTTCCCCACTATTCACCTCCCCCAAAAGGCAATTGAcgccgccgccgccga -gggccgcgccgccgATGAGTACTACTGCTTGGCCCTTTTGGATGCCACCGGTGTCTGCGT -CGTCCCCGGCTCTGGCTTTGGCCAGAAGGAGAACACTCTCCACTTCCGCACAACCTTCCT -TGCCCCTGGCATCGACTGGGTTGAGCGCATCGTCAAGTTCCACTCCGAATTCTTGGAAAA -GTACCGCTGAACATTTAAAATGCATAAAGGTATATAAATGTGAACTGGCCGTTCATCGCC -ACCTGTTTCCCATGTATATGAGATGGATTGGCTTCCATTTATATATCGATATGTTCATAC -CCCTCTACGACGCAATGTTCGGAATATTTGAATTTGCATATGGATTTGGAGTTGGCTCGC -CCTTGATACCTATAGACTACTGACTCTATTGGGCTTGCTATCTTCGATTCTCATTCTTGG -ATACTCTGAGACACTTATGCATCGCTTCTTGAATAACGGTATACACTTGCATCGTTCTGT -TTGATTTGGGTCCTGTATATTTCGGTTTTCCTTTTATAGAGAATGAATTCTACTTCATTG -ATTCCTATTCTTCTATCTGGTTCCGTCTGGTCCCGTCCTTATCCTCATCTTTGGATGAAG -GTAAGCGAACGAGTGAAGAGGGAGTGGCTGGGGGGTATTAGGGGTAAAAAAAAGGGGTGT -CATAAAGTGATTGTTCCATCCGATAAGAATAATGCAAAAAAAGAGTTCAGCGAATATCAC -TTCTCACTCCTCGAGTCCAAAGGAACAAAGTAAACCATGATGATCGCTCCAGGGCGTGAC -TGTAGACTGAGTCTCCTGGGACTCTGGTATATTCGTGTCACTTTGAGGGTTCTGTTGTCC -ATCTGACTCCCCCACTCGCTGAGCCTCCAATATTTCAATTCCACGCGCTTTGAGACCAAG -CAGCGCAACTCTGTCCATCCGATTTGGCGGAAATCGTTGCTTCCCATCAGCTCCCCATGT -ATACCCAGGTTCTTCCGCCCGAAGGGCTATCCAGGCATCTGTAAGTCCATTGGACTCTAT -AAGGGCGGCATCTTCCTCAAGAACCGGGTTGAAGTCACCAGCAACCAGCCCGCGGCCTGC -ACTACGCAGGAAAGAGGAGACAATAGAGAGTTGCTTGGGTCTGTGCGAAGGTTTGATAGG -CAATGAATCCAAATGAACATTTGCAAGCCGTACTCGTGTGGTAGAAGAAGCGTCTGTAGC -CGAAGGAACGAAGAGATCACAGAATAGTACATCTCTTCCGAAGTGACTAGGGAAAGCTAC -CCTCCACACAGGCCCTAGAGCGAACCTGCTCATTCCGGAGCCTGGTGATCGAAACCGTGT -TTTGGAAACTAATGTCATCGTCATGAAAGAATGTCTTGTTGGCGAGTTCTCATGTTCACT -CGAGAACCACGATCGACGGATACGCTCATCACTCAAGATCAGTCGCAAAGCACGCTGTGA -GACTTCTTGCAGGAATATCACGTCCACCTTGGAATTTAGCTGAGTGATGAATGTGAGAAT -TTCAGTGACGCGTTCTTGCGTCCGCTCGGATAGTGCGTCGATATTCCATGTCAATAGTAC -TAGATCCGATGATGCACCATCGGCGTGGGGTTGCTCTGCAGTTTCGTTCGCGCTCACTGG -ATCCCACTGTTTGGAGTTGGGGTTGAACTGGTGCCAGCACTGGAATTTTGGATCCGCTGA -TAGGTCGGTTGGAAGAGGTGTGTCGTGTTTCCAGGACGTTATGCTCATCCGAACGCGTGC -GTAGATATCCATAGTGAGGATGAGGTTGATTACTTTTAAGGTTGCCTGGTGAGGTTTATG -GTCCTGGGAAGGTTTGCCCAGGCCTCGGGGTAGCCTTAGGATTAGTTAAGCATTAAGCCT -CAGGCTGGTGTGACAGGGGTGAAGTGATCAGATGTAGAAATTATTACGTTCATAATTTAC -GGGTCTTTTGTAGTTACAAGAGCAGTTAAGCGGCAAAAAGCAAGTATCTCATGCTTGGAT -TCAAGGACATCAAAGCCTGATGTAGAACCCGCCGGTGGTCCAATATCCATCCGACGGCAG -TAAGCTGGATATGTCTATCTGCTGCCCTATGGATTACCCCAACTATCCATAGAAAGGATC -AGAGAAAATGGTTGAAATTGACAAGTCCTTAATTGACAACAAATGGCCCCCACAACCCTT -CCTGTCGCACGGTCCTTACTAGGAACTGCAACGGGCCCTCAGGGGCCAATATTGGACACA -GGAATCAAGGTTCTGTCTATCCGTACGTTCTAGTGCCTCGCTTCACTCGGCAGAACCCCC -TGGGTAGAAACTCGTATACAACCGTTCAATGCCCGGCGTTCCATCAAGTCGGGCCTGTGA -GGGGTGTCGACAGCAAAAAAAGAAGTGTGATGAATTAAAACCCAAGTGTTCGCGGTGTAT -TCGCGCAGAGATTCCGTGTGTCAACAACGGAGCCCGACGATTCAAGTTTCAATACTTGCA -AGTCACCTCGAAATCAGAAGAGCAGCCTACCGAGAGCTCGGCCAATATTGTGGCCCCCAC -TACAGCCCACAGGGCCGACGACAGTCAGTCACGAGTCGCAAACACTAGGGCTAAACGCGC -CCAACACATCAGCGGTAGAAATGGGTGCAGTCACTGCAGGTACGCTCCCTCAAGCAGCGC -TTGGCAACCAGCTCTAACCAAGTCACCATTGATAGGACTCGGAAAGTACCTTGCGACGGC -ATGAGGCCGGCATGTACACAGTGTGTATCTACTGGCCGAAAATGTGACAGTTTTGTTACC -ATATCAGCCGTTGAGCTCTCGAAGCCACGAAACCCTCCGACTTGGGCCCTGCAGGCTGTA -GCCAATCCTATCCCGCCTCTTCCTGACAAAAAGCCCAGGGAACTGCGCAGCTTCCGTTTC -TTCGTGGATGTGACAGCACCTACGCTAGGAGGCGTCTTTGATCCTACATTTTGGAAAGCA -GAGATTCCTAGAGCCTGTTACCTTGACGGTGCTATCTGGCATGCCATTATCAGTCTCGCG -TCCGCTCACGAATCTGCTGTTTCCACTGTACCTGTTGGTATGCCTATAATGCCTGACAAT -GTTCATACCCTTCTCCACTATAACCTAGCCGTCCAGAACCTGCTCAAGTCATACTCACCC -GAGGGCTGGTGGCGGGTTCTTACCCTGAGCATTCTTTTCACCTCCATCTGCTGCTTGGAG -AACAAATACCCGGAAGCACAGATGCATTTCAAATATGGATACAAGCTGATCTGCGATATT -TCTACGTCTGACCAACATGACACGCATGGGCCTGTGCCTATTGAACGAGCTCCGAATTGG -ACCCGGCTGCAAGGCCGAGTACCCATACCAGTATCAGTCGACTCACTTCGATGTATGGTC -GAAGCGTTTGAACGGCAAAATCGCAATCTCGATGCTGCTAAATCCTTATAGCTTATGCCA -CTTGCAAATGCTCTGGTGATTGATGTTACTCGTCTTCTACGGCCTTGGACAAAATTAGCA -TTGTAAGGAGAGCAAGACCAGGACCCCGACGCAAGTTGATCTTTATGAGACCACATAGTT -ATACCCCGTTGTTTACATAAGGATGGGAAATAAAAGAAAACGAATGGTGGTTGCATCCCT -CGTACAGTTAAGCGATCTGTCGCAATGAACTTGCCGACCGGATTCTGCTCGGCGATATAG -TTGCTTTGACATCTGCCTGGGCTTGTTCACAAATCATGATGGTTTAGTAATATATTTTCT -AGAGTCATACCACCACCTCGGAGTTATGATAATAGGTGGGCGATTTTAACGACTACCATA -CGCAGATATCAGGAAGATGGCGATAGCGAGCTTAGTTCTCACATGTATTATGGATGGTAA -GTTATATTACCACATAGATAAGCCTTGATTCATGCCCAGACGCAGAGAGGTAACCTTCCA -GGGTTTGTGGAGATGTAGTATTTCGTTATCTATGCACGCTACATCGCCAATATCACGTCA -ATTAGCCTGAATCTGCGGGCCGATACCCAAATGCCAAGTGGCTCTTTATCTGTATTCTCG -TATGGAAATATAAGATATCATTCGTTTGCAAATATTTCGTGTTTTCGAAGATATCCGCAA -AATACTGACCTTGATCTTATAGTTCCGACGCCGAATTCAGAATTCATGCAGGAGTTTTTT -ACTAATTTATATAGCATCCTTAGGTATATCCATTTTTTGTCCTCGGAATTCGTAGTGGAC -TACTTGGATAATTTGATGAGTCAAAATAGAATTCGTCGGCCTTTTCACTCTCGTCTCCAT -TTTCTTTATCTCTCTCTCTCCCCCTCCTATTTCAGACTCACCATTCATCGTTTTGGTCCC -ATCACACAATGGACGTTCTATTGCCGAGAGAGCTCTCATCCTGGGGCAATTAATTGTCAT -TATTCTCTTGTGGAACTTCATCGCTATGTTCTTCTTCGCTTGTTACCGCTTAACGCGTAC -GCTGTCTGCCCCAGTGTCAGACCCTCGGATTCCCATAGCGTGGGAACTATCTTTGACTAG -GCGTCCAGATCCAAATATCGAAGCAAAACCCTAGTGTTGCTAGAAGGACTTCTAGATATG -ATGACCTATACCCTTGATCCAGCGCTGATAACTGCAATAGGAGATGCAGACATACAATGG -CTTCGCGGCCAGGCCGTAGCCCAGGCACGCCCACGTTCTCTCTAGGACTTTCTGCTATGG -AGATTGCTTGTTTGACTGTATTCCCTATCGTAGTTGTCAGTGCTGCACCAAGGATACAAG -TGTCATATATCTCAACTTTTTCTCGCTTCGGTCAGAGGTTTTGAATGCCTGCGTCTTCTC -AGTCCAATGTTTGTTGGGGGGGTTTTGCTTGTGTCATTGGAAATATTCCGAGTTGTTGTA -TATTGGATCAGGCCAACGGGTGATTCAGTGTTTTCAAAAGGAGTGCTAAGCTTGGGCAAC -GGTTCAGAAATACCTCTGAATTATGTCTCAACTTGCTTCTTTCTCCCTCGACTGTCACTA -TCCTGTGTGATCTTTCTGTCAAAGTTTGTTCCTCCTGCGTATTCATACCCAGACACATAA -ATCTACGAATGCTAGAATATGTGTAAAAGAAAATCAAAAGATGGTAGTAAAATGCCAATT -CAAGTTCTCACGGGGGTTTTCAAGGTCATATAGAAACTTATACCAGAACTCAAAGTCGCT -TTTCAAATGCTCATTGAATTACCCTGCACTCTCTGCAATCGAGCCATCACCAATCCATCG -TCTTTGGCCGACGCAGAACGAAGATCGATCATCAGCCGCCCACCCCTAGAAGAAGACAAC -CTCAGCCAAAATATCTGGGCTAGGGTGAAATTTGCTGTCGGGGTGGTTTTAGTCGCTTTT -TATACGATGATGGAGAAGATATATATGAGGAAGGGGGATTGATCTGTACCAGACCCACTA -CAACCCTAAAAAGAGACAACCAAAGACGACGATGTAATAATCAAAGACTTGAAGCGTTTG -ATCCTGATATGGAGTTTAGGATAGTCTATCCAGCTAATTAGAGAACATGCAGGGTCCATA -ATTCTTCCACTCGCTAAACGCTTATTTCAACCCGCGAGCCCAGACCGAACTCCATTCCTT -GTCCGAGTATGTTCTAGGAAATACCTAGTACCCATCTGCATTCCATCCCAACAAGAGGCT -TGTCACTAGCACATAACCCAGCACAAATTATACATATCATACCTCAATATGATATGTCCT -GTGACACAAGAGAAGCAAAGAAGCGGTGAGATTCCGTTGTTCCGAGGTATGTAGACAAAA -ATGAAAGTCACGACGACAAAAAACACACACCGATTTATTTATCACTATGACCTCAAGTGG -ACCAACAGAACTATATCCTTCTAAGCCTATCTTTCTAGATCCACCCTACTAGATCTATAC -AAATCAACGATAGTCTCCTAACCCAACAACCACGTCGCACATCATCCCTAACTCAAGACC -CATCCACATATGCAAGGCGTATAAACAAACCCACCCATTCACATACACCTTACACTATGG -TTTCGGTAACCGGCCAGCGACATCAGACCCACTCACACTCCATCGAACACGCAACACCTG -ATCGTACAGCAAGGTGCAGCATACAGTAGTCACACCGCACCTCGTGCTCCACGTCCTGAC -TTGCGTTGTCTTGGCTAATGCTTAGCTATGGATTGGCGTGGGTACAGGGGTCTTCCTTGC -ACGGTATTGTGCATGAATGTACGTTGCACTGGGATACCCAACTATTGTAGGGGGTATGTA -TATGTACTGTGGTATGTAGTACGTAGCTGAAATGGAAGCGTGGATATGGCTAGTTCTGGT -TTAGTGTGGAGTATATACTATGTATGTAGTACTCGGCACGGAGTATCGCGTTGTTTCTTT -GGTATTGTAAATTTCTTTGTGCGATGTCGAGACCGTTTCCCTGTTTCAGAATTAGGGAAG -GCTTTTGTAGATGTCTCAGAGTGAAGGATCAGAGTGAAGGATTGAGTTAGAACTCGATCA -TTGGGTATTAGGGCTAGACAATTTAACTTGACCAATCCAGGAGTCGATCTGATGAGGCAG -CTGAGGGCCACTGTGGGTGTAGCTTGGATTTTCTCACCGCTACATCCCGCCTGACCGGGA -TCCTCTTTGCATTCGATCGACTCCGGGCCGGGTGATTGCTACTAGTGTTGATGAATACAG -GGGATAGCATAGTTGGTATTATATTGTTTTACCTAGTTGATAGGTAATTGATTCGTGATA -TCATCGGCTTGGGGTATATCAGCTAGCGTTCTGTGCGGCCCTATATGATAGAATATGGGG -TGCAGGGTGATGGAAATCCAAAGGCAAATCTCGGGGTCTGTATTGATCCGCATTGTGCAC -GTATCTGGGTATCGTTGTCCGGAGGCAAGTGTATGGAGATGGAGCGCCCGAGTCATTGTA -CATGTAAAAACAGAACAAATAGAAAATAGTCTATACATCGGCGAGAATGCAAGGCCAAAG -TAAAAGCAAGCGTGTACGGAAGCAAAAGCAAAAGAATGTACAGGCACGTGCAAGTGTCCA -AATCGCAAACCCTTCTAGATTAAATGGCTATCAAGGAAGGAATCTAGACTGGCTAATTGA -ACAGGACAAATCAGGCATTTATCATCATCTCTCATCAAGGTCGTGAAGACGGTGTAGAGG -TGGTGGTAGATTGCCACAAGAGAGCAAGCCCCAGGACgaaggaagtaagagaggaagaga -agataagcaaggaTAAACGAGCACCGTTTATTTTTTTTTACTGAGCCCAGTTTTGTTGGA -AGAAACTGTCGTCAATGGGCGGAGTGGACCCAGCATCGAAAAGACCAGGCCAGATGGGAA -CAGAGTCCGATGGCGGCGAAGGCGCACTGGTAATCGGGGCCGACGTCATAGGCTTGTACA -ATGGGATCTGCGACATGGTGTAGATATCTTGAGCAGGGACTGGTTTTGGCTCCATGCTCA -TGCTGGCTTGCAGCCGGTGCTCGAGGGCAGCGGTGGACTCATTAGTCGATTTGAGGAAGT -CGTCGATGTTGATCGAGTCCAGATCATTGGACGACAAGAGACGCGCCTGAGGTAGACCGA -CCAATGAGGAGGCAGAGCTGACACGGAAATGCTCCATGTCAGACGCTTCGCTTACGCTGT -GCAGATCGTGCATGTCATTGCTAGCATGACCCAAACCCGGAAGCGGTCCAAATTCGTCAA -GCTCGGAGATGTCCCCGGACGACGAGGGCGGCGGGAGACCCGAGGGATAGGCGTTGTAGT -CGAAGCTAGCATATGAAGGAGCTTGCGTGCTATTTGCAGCGGGCACGTCGGTATATAGAG -GGAAACTCGACCAGTCAACCGAGGGAGCGCCAATGCCGGCAGACCCCAGGGGCATTTCGG -AGTCTGGGGAGGTAAAATAACATTCTTGATAAGGTTCTTGGAATGCCATACAGGTGATAG -GAGAATTGGTGCTCGTGGTCGGGAAGGAGGCGAAAGAGGAGATGTCAAGGGGAGGAACGC -TAGTGACAATCTGATCCAGAGGGAATTTGTCCATGATTGCGGGGGTAGATGTTGGGTTTG -ACAGCACATCGTGGGGCCGGTCTATTGACGGAGCGGGCGGTGCTGGCGTAGCGAAATTGT -CGATGTACAAGTGATCACGAGGGGGAGGATTATAATCATCTTCGGCACCCGAGCCAAGCG -CGCTGTTGGGAGAACCATTTTGCGACGTCTGTTGTGATGTGGCGAATTCGGATAGATTAA -GAGGTTCCTTGATGAAGGCCGACTGGCCGAGCGGCAAATGATCCACGGACCGACGAGGTA -AGTCGGCTGGGTGGTGGATGGTATGCGATCGTGGGATCGTATAAGGCAATCCACATTTGT -GGGCCATGTCATTATGCTTGTGAGCGGGTTTATGATGACCGTCGCGGAAAATCGTGAGCG -TCGATTCCGATTTGGTAGACGTAAGAGGGGGTTTCCGTGGGGTCTCAATTGTAAGCGGGG -GTGGAGAGAAAGGAAGACCTGTTTCGGGCACGGTATCTAGGTGTTCCTTTTTGAGTGCAC -AGGTGCAGCGCTGGCCATGGCAGCAGCCACAGCGACCATGGGCAACGCCGGAATGGCAGT -CTTCTTTCTTCTTCTTGTCACCGCAATCACACTGGGTGTGAGTTGTCCGAGACTTGCGTA -GGCCGCGGCAGTGAGCACATTGAGAGACTGGGCGGCCTTTTTTGTTGATATGGGTCAATT -GGCGGTCTAAGGAGAATTAGCATAGTGATTCATTGGACGATGATTAGAATTGGCGTACCG -CTGTGGTGACAGCTGCTGACTCGGTGACCCCGGACGCAAGCTTCACAAGCATACTTTTCA -CCATCGATGAGCATCGCTGCGGTGTTACTGGAAGGTACACATGCGATGATTCGCGAGGGA -TATCAAAGAAAATCGAAGTATTGAAATAAAAGAGATTGGCTGAAAAGACCAATATATCAA -GCCGTCAAGGCGATTGGCAACGACCTCGAAGGGAGGTCGGTCCGAGCAAAACAAAATGAA -AGGGTATAGTCGTATAGTCGAAGGTCCCAATGCGAGGAGTTCACAGAACGCCGGACTAGT -GTGAAATGCCGTTGAGAGCCACCGCACAGTCTTCGATTGGAACAGAAAACAAAAGGTAAA -TATTTAAATCCAAAGGGTATCTCAGGCGCGAGGTAGGAGGGGTGGTCAGGGATCGgagag -taagagagagagagagaggagagagGGGGACCAAGCCTACGCACCGTTTGGCGCCGGCCG -TTAGACAGGACACCAAGTACCGGTAGTGGCGGCCAAGATAAAAAGCGTTCGATAAAAGAA -GACGTTGGACACCCTATGAAagaggaaaggaagaggagaggaaggagGGCGGGTAGCCGT -TAATAACGGGGTATGGGACGAGGATTTTCGCTGCGCCACTGGTCTTTATAGCTGGCCGTA -CCGGTACGAGCGTATTTTACGGGGACTGACTAttttggctacttttggctattttggcta -cttttgctgctttggctacttttAGATTGCCTAGAACTGCTTTTGTTTACAACAGTAGGT -ACTGGGTACTAAATTAACATTTAATTGACACTGTGTGATACTGGACATGTACATAATATC -AAGAAAACCAGCACACCAACAAACCACCAAACCCTTGATCTTCCATGACTCCACATGAAT -GGGTTTACAACGCACATTTGAATTTAGTTTTGCTTCTTTTAATCTACTAGTTTTATTTGT -CTCCTCTTTTCCCTGTTCCTGTATTAAACTGTCTTAATGCCCAAACTGGCAGCCCCAACC -GGATAGATAAACGTAAACGCATAGGAGTTCCCTGAACAAAAAAAATCTAGCGTAACTCCA -CTAATGAAAAAAAATGGAGCCCAAATCTGGGGCCGCACATCCCTCCAAAGGACACAAACT -AGAAAATAATATTGTGCGCCTGGGGCTGAATCTTTGAGATTGTGTTTCAGGGTCCTAGCG -GTTGATCTGAAGAACTTTCCATAATCCCTGTACTCAAATATAGGACATAACTTGGATTGG -AAAAGAAGATAAGAAATGAAAAATGAAAAAAACCGAAAAACATTGAAAAAAATAAAGAGA -AAGGATAAAAAAAAGCCAGTGGAAGGAGTCCACGTATACATAGTTACGTATCAATACATA -AAAATACACAAAAATACACATGCGTGCCATACCCAAGTGGGTTGATGGCGTCAGTGTTCC -TGCTTCGAGCGGCCTGCAGAGTGAGTTTGGGGCCCTGATCGGCATCCTCTTTACCCTATC -TATGTTTGCATTTTTTTAGGTAAGATATACTCTTGCATTCGGAATTTATGGATAATATCT -ATGTGATTCAACTTATTTGGTTAACCGGGAAATTTACCCGGTTAGTATATATGATGTAAG -TGGAAGCCATAACTATATCGAGCGACTTGGAAGTGATATATATCATGGGGATCTACAGTT -TCTCCGATCCGATTAGTTTCCAGCAGGGTTCTCAACCAAATGCCAAAAATTCCTATCGAT -GAGAAAAAAAGGTGCCCGTATGGCGCTATCTCTCAGATGTCAACTACCAGATGCATTGTA -GGTTCCTACCAATATAGGCCAGAAAGACAAGCGCCGATATTTAAGAGATACTATTAGCGA -ATCTGAGATACAACGAGGCCCCCATTGCGCAAAGAAATGGTCAGATATACGTAATAAAAG -GTAGGAGTAAACCAATCATAAATTGTATACTCTTTGATACTTTTTGATTCATTTTATCTC -ACATGATAAATAATAAATAGCGACTCCAACTCATTGGCATCAGGCTTGAACAAATAGTTC -AGAAAACCAAATCATTCGACACTTTTCGTAGGTTGTCGGGCAGATGTGCGATACATGTAA -CCAAAGACTACTGGTTTCTCGATTTCTCGAAAATGATTGGTCTAACTACATAGTACCACT -ATTGGGGGGGGGAACATCAGCTACGACGCTAGGTGAAAGGTATCCCCGGAAGGATGAAAG -TAATGTTAACACATATAAAGCATTCGCTAGATAATCATATATCATTCGCTACTTCAAAGG -CATGTTAAATACTCGCTGAGGCACCATTGAGCGCCCTCATAAAGTGGCGGCACTGGTACA -GGTGCAGGTACATGCTCATCTCAACATAGACGGTGTAGATATTTGCATGGTTTTTTGTAG -AGTACTTGTGTAGACTCTCATCAGTAGAAACCACGTCAATATGTTCGAATATAGCAGAAC -GAAGTACAATAGATGTCGAGGTGTTCTAACTTGGCATATTTGAAAGTGTAACCTTGTGTA -GTTCATCCGACGCCGAAGGGGAAATAGTATTAATCCGTTCGTCACGCAGAAACGAAAGGC -CGTCCTCCGTGCGATCATCCAAAAAAGACTCGGGGAAGTAAACAACTGTATATAGCTTCA -AAATAATATCTAATACCAATGAATTCTCTGTACAAATGTTGTTGCTCAATGCGGCGCTTT -CGGCGCTAGCCCCCTTGAAAATTCCTCCCCCGCGAAATTTTAGTTGAAACTTCACTTCTT -GCACCTCGCGTCCTACACTCTCTTCCCTCCATCTCCCGGAGGCCCGCTTTGCCCACAACC -CATCATTTTTCGTGAGGTTTTCGGTTATATATCGCGCGATGAACTCTCGCGAGCAGTCTG -ACATGCCTCCGGCGCCTTCTTATCCGTCTCCTAACGGTGCGCAGATGGCCCAGGGCGTGC -CTTCGTATTATGGCAACCGTCAAATGACCACTGATGAGCTGCTATCCGCGGAACTTTCGC -GTGATGCCTCTGGTCCAGGACTTGGCGAAACGAACAATGGTGTTCACCATGGCCAGTCTA -TGGTGCTGGGTTCCTCCAATGCTGCCGACATGGGTCGAACCTCGTCGGAGGATCAACACC -AGCACCAGCATATGCTCCAGTTCCCCCCGAGTCAACAGGTTGGCGTAGACCCCAATCACG -ACTTAAGCTATGGCGAACAGAGCGCGCGGAAACGATCTAAGATCTCAAGGGCTTGCGACG -AGTGCCGCCGAAAGAAAGTACGATGACAAACTCCCCCCCTCCCCCAACGCGATTGTGTAC -CCTGGAAGCTAACACGCGTTTTTTTCCCCATAGGTTCGTTGCGATGCGAGCTCGGAAACC -GGTCTTGAAACATGCTCCAACTGCAGACGTCTCGGCGTTGTGTGTCAATTCAGTCGTGTC -CCGATGAAGCGCGGTCCCAGTAAAGGGTAGGAAAAACAAATGCGCCACCACAACAATCGC -CATGACTAACGGACACAGTTATATCAAAGAACTCGCCGAGCGACTCCATACTCTCGAGAA -CCAGATGCAACCGGGAATTGTCCAGCCTGACGTGCCATACCAGTCCATGAACGAGGTGTC -GCTACCGAGAGGTTACCAGGATTTCGCCTCGCCGGTGGAATCGACCTCGGGCAATCGCAA -GAGAACTTACTCGGTCTTCGAAGGTCTCCCCAGTTCCTCATTTGCACAACCTTCTTTCAA -CGCACGCGTATCGCAAAATGCCTTCGACGCGTCTGAGACAGCGGCAGACCCGTATAACCC -AGCTGTCGCCAACGGTGGCGCACCGAAACCTGGCAATCTGTTCTGGAACCCGACGGGCCA -TGAGAATGATCTCCCAAGTGGCCTTGAGATGACGGATTTGCCAAAACATGAAGGCGATGA -CGACATGACACCGGTGACTCTAGATGAAGGAGCCCTTGATGCGTACGTCCAATGTATAAC -CCTCGTAACTACTAGACACTCAGCTGACATCGGACTGCAGCTATTATCAAAAAATCCACA -CCCTGCTGCCGATTCTTCCGCACACCAAGGAGCGAACGTTGGAGCTTCTGCACCAATGCA -ATCGCGAGGCACAGGAAATTTTCTGCCATGCATTGTACAGCGTCACTCGGACTGACTTGT -CGCGTGTGGCGGGAAATTTCGAGAAGACCACCGGCTTTGATAATGCTCAGGATCTCCTCA -TGTTCCACACCCGCCAGCCATTGATTGTCCAAACGACCCCTGTCAACCTTATCTGGCTTC -AGTCTCTTTTGCTGATGATTATCGACTGTGACTCTCGTGGACCCGATAATTTTGTCCTGA -AAGATGGTGTCCCCAAGGGCACCTTAGTCCAAGCAGCTAATAAGCTTGGATACGAGTTGG -CCAAGAGCCAAGGCCAGCTGAGGAACAAGCGATCTGTGGATCCGGATGTTGACTCGGATG -CAAATCTCACCCGGCGCAACTGGGTGTCTTTGGTCATCTTGGCCCGGTGGTATGCTATCA -GTGTAGCAGATGCCACCGTTCTGGCCGGTCATGAGATTGGCGGCCGTGAAGATGAAAGGG -TTGTCGGCCAGATCACAACCGGAATTGCCTGTATGTTGATTTTCCTTATTTTGAAATTGT -GCTATTTTACTGACTTTTTGCACCAGCTTACTCGACGTTCCTCTCGGAGATGGTCACTTT -GGCTACTGTCGACCACAACGTTTGCCAAACCAACTCTGGCCTGGGTCGGATCATTGGCGC -GAACCTCGTCTCGTCCCTGGAGCGTCTAGCGGAGATGGAGGATATATTCCAGATCCACGA -GTTACCCGAGAACTCCAACACTCGCCCACTTTACGAGAGCCTCCAGGCCCAACTATACTG -GACCGTCCGTCTGCTCATCAAGCGTCACGTCTTCGTATATAGCCCTTACGAGATCATTTT -TTGTGCGCAGGAGGTCATCAACGAGATGCACAAGTCAACCATGCAATCCCGCCTCCCATC -ACCCTTCGATCTACACAGCCTAGCCCTCGCCTCAATGACTCTTCTTGAGGCGACCGTTCT -CCCAGAACACGCAAACGAATGCTGGGCCAGCCTCGAAAAAGTTGAGGAGATCCTCGATCG -CCGCTCCAAGCGCGCAGCAGAGGGCTCCGAGTTTGACAACATATTCGGCACTCCGGAGTG -GGATTCCAAGATCCGCATTTTTATTGAGTGGCGCCGTATAAAGTCCCAGGAGAGCCAGCT -ACAGGAAGCTGACCTCGGCGGCCTCGCCAAGTCCCAGCAGCCGGTCATGGGCCCCAACGA -GCAGCGTTCTCTCCAGCACCTTGCAGATCTTGCAGTCGGTGCTGAGGGCTCTGTCAGCCA -GAATGCGCCGTCCACGCCTCCTCCCGGTATATCTAATGAGCAGGGCGAAGCCGAGCAGAA -TCTCGCTCCCCAGTTGACCCAGTCGCAGGGCGGGTCGGGCCGCGTTGTTGTTGACTTCAC -TATGCTCACTAAGGAGGGTTATCTGAATGTCTTCTCTGGCTTGATCTACCGTCGCACACG -TTAAAGTCTGCTTCGCGATCAAGCAACATCTTTTCCTAATCTTTCTGTTGTCTAGCTTCT -GAAAAAAGGGGGCCTATCGGAAAATGTTTTTCATTATGTAATATTTGCGTTGTTGGGCTT -CTCTATGGCTTAAGTTTGTCTTTGCTATCTTATGCCGTCGTTGTGTCTCGGTTATGAAGA -GTTTAGCCCTTGGATGGAATACATGTATGATTCATATATGAGGTATCAACATCTCCATTT -CCCATTTATCTGTAGTCGCATTCCTCCATATGGCAATGACATAatatgatacgatacgat -atgatatagtaagataatatacCAGAGAACAAGCTAATAGAAGCCCTTCATCCTCTCATT -GCTTTTGCCCGAATGCAAACCGCCTCCATGTTCCATCCACGTCCTCGCACAGGGCCATGA -AACTGTCATCGTTGATCAACTCGCACATCCAGTTCTCGAGAAAGACAATCCGCTCAGCTG -TTGGTGAAGACAGATACCGTGCCAGATGGTCTTCTGTCAGGGCTGGGTCTTTCTCATTTG -GCTTGCTAGGCGTGTTGTGGTTGGAGTTCGAGTCGGATAGATTAGTCTGTGTGTGCGATG -TGTTCAGAAAGTTTGGTACTTGAGGTGCCTGTTCAGTGGATTCGCAGATTGGAACTGTGT -TTTGCGAAGCAACGGGCTCGTCATATAGTAATAATGGCTGGCTTGCCTCAGGACAAGGAT -TTGTTTGGGACAAGATTTGGGAATCCGAATCAGGCACACCAGAATGAGTACTAGATTCAA -TCAAAGGATGATGTGGACGGGGTTCTAGATCCTTGGTTGGTTCAATAGATAGAGAACTTG -GCGATGGAGCTGAGCGGGAATGTTCTTGTGTTGTTCGAGACGCGCTTTCAGCGCGCGGTT -TCTTTGCGGTTGGCTTTGAGAAGGGCAGGTCTCTTTTGGGAGGGAGCATCTGATTTAACT -GCTCGACGCTATGGATGGCAGATACGGTGGAAAGCTGCGAGGTTGGTGGATCAGGTTCTT -TGTTAACGATGTTTGTGTGAGCAGAGGTATGGGGTAGTTGGAAGAAATCTAGCTGTTCCA -TGCTTTTGGATGGAGGGTTGAAAGCCGGTATAGGGCCAAATCTGGACACAGGGTGTGAGA -CAGTGCTCGCGGGAGATGATGCTCGAATGGGTGCTACCAAGCCGGAAATGTTAGATAGTT -GCTCGAGCGGTTGTTGAGAAAATTGTTACGTACTTGTCCCTCCCGATCCTAATGCCCCCG -TGGGATAAAAGTGGATTCCATTGCTGCCAGTCGAAGTTGCTGCTGTGTTTGTAGTCCTGG -GAACAATAGAAGGGAGATGCCCCGATGTCCATGATGACACTGAAGGAAATCGTTGCATTG -CTGGAACTGGTATCTTGCCTTCTGTCAATGGGCAGTTTATTTCACCCAGGAGAGCCAAGG -TCGTGAAGTAGTCACGCTCTGTGGTGAATTTGATTTGAAAGCGACGAATCTGGGGAGTTG -GAAGATGCTGCTATTAGAAATCATTACCCGAAGTCATGTCTAGTACAGACTATACATACA -TGTGTTCCACCATAAGGATACTTGACTGCAAGACAAGGCGACTTCACAACGACGGCAAAG -TGTGACCTTGGGGGTGGGATCAGCTGGGCCTGCATTCCTGCCATGCGAGTGAAGAAGACG -AGATCAAGCTGTTCCTAAATCAGATGACCCGGATGTTCAGCATTGCTGGCTGTCAAGCTT -TATGCGTGAGTGAGGATGTACCAGGATGCCATCGCCTCGAACCACTTTCTGAATCATCTT -TGATGGAATAGGCCCGGAGTCAAGGGATTTTTCAAAGATGCAAGCCAGATCCCCATTTCC -AGGGATATGATTCCAAGTCAGTGGACCGACATGATCGATAGCAGTGGTTGTATGGGAGAA -CTTGGCTATGGCTAGACGGAGGAATTGGGGAGGTATGGGTGGTTGAAGATGCTGCATCTT -GTAGAAAAAGAGCAATGAATTGACAGTGCCAGATCAGACGGACATAATCTAGATGATGAT -GATCGTGCTAGTGGAATGAGGTTCGAGATGAGGGCGAGCTTGCGAGAACTTTTGGCCAAA -CACTCCCACTGTTAATTAACCTTGTCCACCCATCAGCAAATCTAACTAGAGTGTAGCTAC -ATCACTGTGATTTGATAAATTTCATAATCTATCTGTCTCTACTGTTGCTATTGTCTCTAT -TGTTCCTATCCAAGATACACTATGCCTGAAGCATCAATGTATACCACAATTCCATTTCTT -AGTCATCAATAGGAACAACTAATAATCCGATAACGCGCCCTGGAGAAGCGCGGACTTTTG -GCTGATCAGTGCCGTTGATCGCAAGCCCAGCTCTAGCCCTCCAGGGCCGCAAGGCATTGA -CGATTATTTTACTTTTGAAACCTATCGTTGACTCTCATCGCCTACGCGCAAATTTACTAG -TGCCTTAGAAGTGCCATTACTGCTAAGCACTTTGCTCTTTTCACTGTCGCATCCAGTGCT -CCGACATTCAAACGCGCGAATACTGCGACTCAGCTCGAGTCACAGTCACTGGCCGTCGCT -ATCACCTCTTGCGGCTAGATTCAACGTTGTATCAAATTCTTACTTTGACCCTTCAGAAGC -CGAACGAAGTGGCCAGTTTGACACCCCTGCAATTTCTCCTGCTTGACTCGTCTGCAAACA -AAGACGTATCAAGGGCAGCATCTTTCAAGCCACATTTTCGAAGTCTGTGATCTTGAGATC -GAAGGTCATACCCCCGATTGCCCCTCGAATTCTCGAGCTCAAGTTTGTCCGTCCTCCCCT -ACGAGGACAGAACTACCTCGTGCGTCTGAAAACGTGAACGGCGCAAAGCTGTCGCGCTCT -CAACAGTACAATGTCTGCCTCTACTCCTCGCACCTCGCTGCGACGTAAGACTGCCATCTC -TCAGCCGACAGGTGTGTCATCTGTACTAACACCACCAACAGAGGGCCTACGCCATGCTCC -AAAAATCAATCAACCCTTCATCCCCGATACGACCCCTGCCCGTCGATCACACATCCACCA -TGGGCTCACATCACCACAACCATCCACCAGCCCCCATCATGTAAACATTAATCCCGCCGC -CAACCCCCAGAGTGCGCAGTTTACGGTCGATAGCTGGGAAGGAAAAGACAGTCGACAGAT -TCCCATGTCGACGAGGGAAGTCCCAACCCCAGGGAACCGACCGGTCATTTTTGCACATCT -GCGTGACCCCTCGAAAATGCCACGGCAATTGGATTATTATGACCCATATTTCCCACTGAG -ATATCTTGAGGTTCCAAGGAGTCAGTCCCTTCGCATCCCATGAATTGATTAGAAATACTG -ACGGTCTTGTCTTAGCGGATCACATCTACAAGCGTGCCCACTATGGTCTGCAATCAGGCA -TCCCAGACGAAGTCGACTTTGCACTATACCACCTCGTACAAATTTCGAATCAACGATGGG -ATAAATTCAAGTTTGAAGGGTTTCCCTTGCTTGCGGAAACTTTGATGCAGAAAGCACTAG -ACATCACACAACTCTGCACAGGGGTGAAGTGGGAATTCCAGTATGATCCTCGGAAGCCAA -CTGATCGTGTCAACGTGCTCAATTCTCTACATGGCACCCGGGATATTCTGGACAAAATTA -GCAAAATTCCAGTTAATCTCCCAGACGACAGCCTGGAGACATACGAATTCAATCATCGGC -TGAGAAACATCAAAGAGGCTACGTTGGTGTTGAGAAACATGGTGCTCCTGAAGGAGAACG -CCTTTTATGTGTCGCGTTACGCAAATGGCCTACTGCGGGACTTTTTGGTTATCTTGATCA -ATGCTCCCAATCAACCTCGACTCAATGAAATCAAAAATGATGCACTAGATATTGCAGAAG -AGGTCACCAAGTTCTTGCGGACTGACCCCGAAGATCCGCTGTGGATTTCTTTGGTGAATT -GTCTCGACTCCCCAGACCGCGCCCATGTCGTGCGATCTTTGTGGGCCTTGACACACTTCG -GCACTGAGCTGGACGATGCGGATGCGAACCGGGCCATGGAAACCCTGACCAAACCGACTC -TCCAGCAGATGTACTATCACACGCTTCTTGATCTTGACAAGGATATTCTGAGTGGTGCAC -TGGACTTCTGGTATCAATACACGCTCAGTCATGACAATATCGAGACATTGATGGATGTTC -TCAATTTCCCAATTGTCTTCGTTCCTCGCATGGTCGCGCTTCTTACTTACGAAGCGCGGC -CTACCAAAAAAGAAACTATTCTTCAGGAAGAAAAAGTTGCGCCTCCCCCCACTGACATCC -CTCGTGTCTCTCCCGAGCTATTGGAAAAGCTCATGGAGTTATCTGAGCCGGAGCGAAGCT -CCCAATGGCTGCGTTGCTGCTTTATTGAGGATCCAGAATGCGAGATCACACAGATCGCTT -TATGGCAAGCCTACCAAAGCCGATTTGCAGACCCCCGTGTCACTGGAGGGGGCGTTCTTC -CCGCTGCAGAATTCATCAAGAATGTCAGCAACACATTCACAAATGCTCAAGCACAGGTCA -TCAATGGCCCCGGCACTGCCACTAAATTTATTATCAAGGGCATCCGACCTTTAGAAACCG -CTCACACATTTGAGGGCTTCCCTTATTCCTACTGCAGATGGGCAGACAACTCAAAGCCTT -CTAAGATGTGTCAGCGCGCATTCACATCTCCGACTGATCTCCGCAATCACGTTTTCGGTG -ACCACATGAACCTGGAACCCACGGACACCCCCGGCCAGTATAAGCTTGATTCCGCAGATT -CTCCCATCCATACCTGTCAATGGGACCATTGTTCACGGTTCCGAGCATCTGGGCCCAGCG -CGAACACATCTATGGTTGCCGGCCACGTTTCGTCGCATTTGCCCGAGGATCGCCCCGCGG -ATGCACAGCCCACAACCGCCAAGCGTGTGGTTCTCCAAGAGCGAATCGTCCGTAAATGGT -ACTATATGGATACTCCGGTCAATGAGAAGGGCGAACCATTCGGCGTGGCCTATAAAGCTG -CATTGGTGCTGCGCAACATTGCCCGAGGCCTGCCCAATCGAACCACGTCGAAATACGGTG -GTCTTCCCTGGAAGAAGGCCTGTTTCACGAGCCAGCGCCCCAAGATCGTTGAAGTCTGGG -ACCGCAATCGTGCGCTGCGCAAGGAGCTTACCGAGTTGATCATGGTCATTGAGAAAGAGG -TAGACTATTAAATTTCCTATTACATGTTTTTCGATCTCGCAACTACCTCTTTTCCTTTCC -TCATGAATCAATATCTCTGTACGATTAATACCCTGATTATTCATGTTCTCACTGTCTTTC -TGGTTCCCCGGTCCTCCATCTTGTCTAAATGTACAATATTTAAGAGCCCGGGCCATATTT -GCGGTGTTTTCTCGTTTCTGCTTTGTTCTTCATGGCTTGTCGCTTTGAACTAAGCCCAAG -TTCGCAAAAAGGCCCCCGAAGAGTGGAGTTGTATTACTACCTAGGTATGGGTATGGGGTG -GCTAATATTTGACTTTCCCGGCGTTTCTTAGGATAGAAATTCGGGATTCATTAGTTAGTA -TATTGAGCCTCGTTTAATTCACGCTTTCTTTCGTTTTGCTTCCTTTTTCTTGCTCAAGTG -TGTCGAATCAGATCCGAGAGAAGTTACAACGATCAACATGATAATTGGTATAGTACTCTA -CCTAGATAACTTGATACGTTCTGGTTCCCAGCTCGCTTCCAATAAAGCAGGAGGAGCCCA -CGATACAAGACCTCACGAGTTCTTATAGCTCAATAGCCGCGTCACCTTGTTGCTTCACTT -AAGAGAATCTCCGGCTTTTGGATATTCCTCCATCCAGGGATAAGGAACCCCGTAATGAGA -TAGATCGAAACAACAGAATATTCCATCAACAACCCCATAGTCAAGAATGGGGCAATTGGA -CCACTCAGGTCGTTCCACCTCAAGTCGAATGGCTGGAATACAGACAGAAAGAGGTATATG -ATACGAACAGCTATGAAGGGCGTAGCCAGAGCCATGGCGTTGAGAATCTTGGAAGATATT -AGCCCTGAAAAGGTAGATAACTATGTTGGGGTCTGCGAACGAACCGTTCTGCTTGTAACT -GAGAGACAAGAACGATGTGTCCAGAAATATACTTGCATGGCCAGTAAACACCCAACAAAA -ACCACCACGATGATGTATCCGGATTTGACAAGTTTAGTGCCGATGAGTACATCACTGTTG -GTGTCGCTTCCTTCCAGAGCCCCGCCTGCAACGGTAAGTCCGATCCCGACGATAAAAAGA -ACTCGGGATACAACTAAGCATTGCCGAATCTGTCGGCTCATGTCTAGTTCAAGAGCAACT -CTGTAGTGTATTAGACTGTTGAACATTCATGACATGTAGTGACATGGTATTTCACGACGC -CAACTTACATGATTCGAATAAAGCCTAAAGTTGCTGCGATCAAGGGAAAGACGCCAGCAT -TGAGGAAGATGACCGAAGCTACCATCACGCCAGTGCTGGAAGACTTCTGTGCAATAATGA -CCAAGATTCCCGCAGTGATACGAACTAATTCCCGAGTATGTTAGAGACACTAGATCCGAA -ACCTTGACTCTTGTAACAAGATACTTACCAAGTGAGAAGAAAGTCAGGATAATCCATGCC -ATGCGAGGTCGCTTGTGTCGACAGCACGCAAGGTAAAGCGCAATGGCCGTTACCGGTATG -TAGAACACGATCTGTGCGATACTTAATTTTGAATGACTGCCCAACATGTTGATTTTGAAG -TTGCTGGATTCGGGATGGTTTGTTCCTCTGAGTAGTTTGTTAAGAATTTATAATCAATGT -ATGATGCTTATAGAGGAATATTGAACCTTTTATAGACATCCTTCCTTGATCTCCTGAAAA -ACAACCCGGCTGTGCTGAAATCCTTTGCATAGATGCTCTCGCAAGATCAGCACCTCTTTG -GGTTTAGCGTCGGGAACTATGAGAGGCCGGGTATTTTATTGGCTCAATGGAGCTCCACCG -GCTCCACCTAGCCAGAGCCCCCAAAGCCTCCAAAGCCCACGAACCTAATCTGACTCTGAC -TCAAACCAAGAAAGTCCAAGGAGCATTAAGCGTTACCTGCCAAGTGGTGATCCTCAATTC -ATCCTTTTCATCCTTGAATCAAATCTCCAATGCTGTAATGCGACATCATATAGGGCTCGG -CTTATGAGATGGTTGGAGTAGCTGGTAAATCACGAGGTTGCAACACCTGTCGAAAGAGAA -AGATTGCAGTGAGTGTCACTAGATCCATATGTAAAGACCCCACTGACTTGAGCTTAGTGT -GACCAGCAAAGGCCAATCTGTGCTAAATGCACACGCTCAAACCGAGTATGTGGCGGGTAC -CAGAAAGATACAGTATTCGTGTTGGTTCAACCAGCTGCCGAGAAAACACATATCTTTCTC -AGACCCTCTGTTTCTCCACAGGGCGTTACTGAAAGTGCAGAGCTGAGGCCCACCATGTCC -GGCCCAACCAATGCGGTTCGAGGCAATGAATTTACCCATTCCTTTACCGAGACAAGCTCC -GCCTATGCAATACTACATAGACGCATTGAATGCCAAAACTTGGTTCAAGCCTTCCTTTTA -AACTGCTTCCCATCTCGTTGGCCCCCGACATCCCGCTCCTGGATTCCATTACTTGGGGAG -TTGCGTTCCAAAAATGAAGCACTGGAGATGTCAAGCGCAGCAGTCGCAGCCTCGGCGATT -GGACACATGTTTCACGATTATGCATTAGTCAAGCAAGGTCTAAGCTACTATACACAGGGT -CTTAGACAGCTACAAAAAGCATTATGGGATCCTAGTCTCATGCGAGATGATGGAACCCTG -GCGGCGTGCATGGCTTTAAGTCTTTATGAGGCTCTTGAATGTCCCAATCTGGGATCAGAA -GGCTATTTTAATCATTGTCGTGGTCTTATCGCTTTAATCCAATCCCGCGGCCACGGAGCG -CATTCTTCGGGCGCGGGACATCGACTGTTTCTTGGTGTTCGAGTCCCAGGGGTAGGTCTT -GTGTGAAAGTCCAGGAGTATTGTTCGCTTGCTAAAGAAATTGCTCAGATCTTGTTCGCTT -TGATAAACCATACCTCGACTATTTTGTTCGAGTCCACTTGGATGGAACAGCCTTGGGCAG -AAATACCAAAGACGTCTCACGATCGTGTAGTAGATTGCCTCGCCCAAGCCCCTGTGATTC -TCGAACGCGTACGATCGTTATATCACCTGAGTATAGTACAACAGGTCGATCTCCTCCACC -GTTTAATTGGTGAATGCTGGCAAATTGACAAGCAGTTGGATGTTATCTATGATGACATGC -GCCAATCAACGTCCGACGTGCTATACTGGCAGGTGCCCTCACAAATAGAGCCTTTGTTTG -ATTCTAGGAACTCTGGGAATCTATTCCCGGTCGTTTTTTGTTTCCGAAATGCCCAGGTAG -CTGCTACACTGATGCTCCTATGGGCAACACGAACAATGCTGTGGTCCGGTCTTTCCAATA -TGTACCAACACTTGGGAAGTGTCATATCCCTCCAGAAATCAAGCCTTGGGGTTCTTGGGC -ATGAGTTAGGCCAGCTTGTTGTTTCGGAAGTGATGGGTACAGTTAATCCAATTGACCGTT -GTGGAGAGTACCTATCCGTGGGACATCAAGTCTGTCAATCCGTGGAATACTTTCTTAAGG -ATGAGATGCTGCTAGCTGGGCCACTAACAGTTAGCCCTGCACTTGGAATTGTGGTTGATA -GTCTACGGAATTGCCCTGGCCATGATAGAGAAATTGCATGGATACAAGCAGCTCTGGAGG -TAGTTCGGCGAAAGGGACTTCGTGTTCTGAGAGATGTCGATATATAATCAAATCAGTAGA -CAGGCAAATAAAGAAGAGACTCAGACATGCAAGCATCCTTTTATAAGAATAGACATAAGT -CAGCATACTAGCCTTGCAGTTTTTTTTATATGCATATATCCATTCTGAAGTAATTAGAGA -TACATCTTGTCCAGGGATATCTATTGAGAACGAGATATCATGGTGCATAGCGCAATGTAA -TTTCATGCAAGAGTTGAGAGAGCAGAAGCTAGGACTAGACTATGGAATTCGACAGTAACA -ATATCCGAAAAAGACCTCCACACTTAGTAGTCCCCCGTAGGAATTCCACCCACAGTCGGA -ACATGCTGTCCATGGAATCCCAGCCCAACAGCCCCATCCAAATTTGCCCGGCCAATTTCC -GACAAGCTACGAGCATCAAGGCAGAGCAAATAACTCTTGCCACGCATCCCGTCAAGAACC -ACGCTGAGAAGCACACCATCATCCTCGGTAGTGCCCTCGGGGTTGGCAACGAAAATGGGC -TCACCGGGCGATTGGCCGTGGCATGCCCAGAGCTGAACTTGCTGTGTATCACAATCGAGC -TTTACAATACCATCTGTCAGGGTAGCCTCTCCACGGAAGGTCGTAGCATAAACGTATCGA -TGTTTTTGAGTGACTATTTTCGGATTCATAGTCGGAAGCTCAGGGGACAAAGATTTGCAA -ACGGACCACTCGACTGTTGCTTTCTTGAGGTCGGTGCATGGAGTAGAAGGCACTGCAGGG -AGACGGAAGCGGGTAAAAGATGTACGCAAAGAATCGTCTCTCTTCTTTTGGAACTCCTTG -GCTGTATCCGACGAAGAAATCAAATTCTCGTAATAGAGAGATTGCAAAAATTCAGAGCTG -CTGGCTCTCACTATATCTGCCACAATATCCATCTCTTCAGGATTCTCCTTTGAGGGCTCG -AGCCATGCGTTGATAGTGTGAAAGCAAAAGAATGCAGGGCTTTCGTATGATGCGATCAAT -CCCTTCCCGTGCTTGCGGTCAATGACATACCACACCGCCGGCTGAGTTGGGTCCGTTGGC -AAGATTGACTCGATGAACGAGGAGTTCAATCGCTGTGGATTTATGTGTGCGTTCCACACA -CAGAGAACAACATGATCTTCAGTGATAAGAAGGGAGTGAAGATAAGCTGGTTTCGCTGGG -AAGGTTGCTAGGATTGTAGTTTCCCCCGTTGAAGCCGAGACCCTGAAGACCCGGTATGTA -CATTTTGGTCCTAGGGTGAGATTGAAGTTGAACATGTCTCCCGTGATAGGGTCTGATCGC -GCATGAGATGCAGAAATTTGCCCCGAGAGTTCAGGGTGCAAGGTAGCCTGCGAGGCCAGG -CCGATGGGCTCCAGGGTTTCCGGATCGAGCTTCTTGAAGGCATTGTAGTCGGTCTTCGCG -TAGAGAGTTCTAATTCCCTCGGAATTGCTCCAGCGAGAAGTCGACTCATCAGGTTGAGAA -TCAAGGCCCGGAACGTTGATCGAAAGGGTGACGGATATATTTCTACTCGATGGAGTGGGT -TGTGGGACAAATTCAGTCTGGACTTTGCCCAGGACGCTCTTGCAAGGATCGCGTTTCTGT -CCGAAACTGACCGAGTCCAGAGATCCCGCCTTTCTTGCTTCCTCAATCAGGTCATCGGTT -GAGAAGCGAGAGTTGTAGAAGACCCGAGATGAAGTATGAGAATCATCTGGAGCAAATATC -TGGAACCGATGGGTTTGACCGAATCCATCGAACCAGTGGGAGAGCCGCAGTGTTTCTCCA -TTTTCTGCTTCAAGTTGGCTTTTTCCTGGACCTGTGCGGTATAGAATTCCGGCGGCGTAT -GTAGGAATTTGTCCTGTCACAGATAGCTCGACGGGCTCGCGTTGTTCGTATTCAGGATCG -AACTAGCAGGGATTAGCAAGTTATTTAGTTCGAGATGCCCACAACTTCACTTACACCCTT -GTCATTGGGCCAGTCATTGTAATGATGTTCATTTGCATAATTCTCCATGACTATTGAGTC -TGGACCAATAGAAGGCAGTTCTTTTCAATATTGTGATAGTTAACTAGGTCGGTGTTGAGA -TAATTGCCTGATGTTTTCAGGCCGATTGTTCTGAGAGCTTGTCTTTTTCCTTCTTTACGA -CCATGCACCCTTTATATATATTTCTAAACACTTCATTATTTTCCTTTAAACGAAGACTCC -CTGTCTACCAAGCCTAATTTTGGGGATCATCATTTTTGGCGGGGGAATCGCGACGGGCTG -GATTACACAACCGACTTCTCCTTTTTCTCACCAATCGCCAGCGTCGAATTACCGAGTCTC -GCTCATCATCGTCATTTCGAGCCGCGCCAAGTCTGCAACAGACTTTGGATCTCGGATTGC -TCATTCAATTTTGGTTTCATCCGGCACATCTCAGCGGGATACACCCACTTGAAAGCTGTC -TACAGCCAATCTGCTGACAGATCAATAATCCACAACCCCGGATGCTGTTCGGGGCCGTCT -ATTAAACCGATGCCAATCCCCCATAAAAAAACAAGTCTGGAATCCCGAAACCGGACAGGT -CTCAAGATGTGGAAATGTAAAACATGTCCCGCGACTTTTCAGCGATTAGATCACATGAAA -CGCCATGCCCTTACTCGTATGTTTCCAATCATACCCAGGGCCGGTACAGTAATGACTTCC -TACAGATCACACGGTAAAGAGCCACCTCTGTTCATTTTGCGGCTCTGCATTCAGCCGAGG -GTGTGTCATATTCTACTACTTCCGAAGAGAAATATGTCACCCTATCATGTGTGCTGACAA -GGGCCACCCAGAGATGTACTACGGCGGCATTGGAAATCGTGCAAGGCCCGTGTAGACAGT -GGCCATGCAATCCCCGAGCCGGAGCGCGGTGGGAAACACAAACGTGCTTGTGATAGTTGC -GCAAGCCTCAGGAAAGCCTGTAGTGGGGAAATGCCTTGTGCTGAATGCGCCCAAAGAGAA -AGACAGTGCACTTATCAACGGCTTCTCGGCGACGAGAGACCCTTACCATCCATATCAGAC -GAGCTTTTCTTGCCATCGACATCACAGGATTTACTAGAGATCCCCGCCGAAAGTTCAGGT -GCGAAGGATTATGTCCCGAAGGCTTCCGAGTCCAGTTGGGACCTTGGCCCTGCAACGCTT -TATCCATCTAATAAGGAAGTTCTTGATCGGCGCCGTGGGTTTGCTCTCAGGTGATAAGAA -AGGCTAATACTTAATAACGCTCAGTCCCATGTGGGATGAGGCATACGTTGACTGCCTGAT -TGCGAGCTAAATGGTATGGCTGTCGGGTGAAGATGTGCCCTTGACGGTATGACACATTGG -GAGTGTGAATTTCAAATGTCATTGGTGAACAGCTCGGCTCAGGTTCCTCCGAAAATGATG -ACGGCGTACTTTCCGCTCAAGATACTGTCTCATTGGAGTTTGAAGGACTGAACGACTCAA -TACTCTCTCGTCCTTTCTGGACTATCCAGTGAAAAAAATAAGAGCGAGATGGTCTAGTAA -AGAATACGAGCCGGGTCTGTATTATGTTTAATAGGTGCGAGCTAGCGCCATCTGTGGCGC -AGATCTTGAATATATTATATTAAACTCATATGAAACGTCTATTTGGTGGTCATGGCAAGA -TTGGAGATTTATCCATGTATATCTGTGAGGTCCAGACTCCCCTTCACTGGCTCCCCGAGA -TGTGCTTTCATCGCGGCAAGAAAATGGAATCAACGCTATCGTAAACATTCTCAATAAAGT -TCTCATAGCTCAGCCAAGCCGTGTCATCGGAATTATTTTGCAAGCTCGGGTCACTGAGAT -CCATCGCATCCCACACATTGGGCCAGGGGACCGGCGCAAAGGAAGTCGGCACTTCCGCGA -TTTGCGGCTCTCGCGCTTCACCCATAGCCTCTCCCAAACCAAGCGAAAAGTCCGTGAGAC -CCTGTTGAAGCTCAGGTTGTTTGGAAAGTAACCCGGAAGACATACCATCGAAAGGGTATT -GAGGTACTGCTTCACGAGAAGTATCAATACCGAGAAGCTCAGTTCTCCGAGTTTTCGCCT -GTGACATGAGCCTCTCCAGAGGCCTCCAGAGCATCCCGTGCTCAGAATCTGCAATTATGA -GTCGCGAGCGCTGGTAAACGTCATCAACAACGGTCCAGTACTTTGACATCACCGCCGGAC -TTTTACAGACGCAGAGTTCGGCCAGGGCCACAGCCAATGCATGCCACGGAACCCACATAG -AGCCGTACCATCGCCAAGGCCCGGCGCCTGGGTAGTTGTATGTCTCCTGCGACCAATGCA -GATTATCAGCTGCTATCTTGAGGAGGACGTTCTCTTGAATCTTTGGGGGTATGAAGTCCC -GGCTTCGCTTTAGCGGCCGTAGACAGGCTAGCTGCAGCCAGCTACCCATAACTCCCGCTG -TCTTTTGCGCGTACCATTGGAAATCCGATAGATCGGGCCTGCAGCCGCTCAAAAGGTCTG -ATGCCTTTTGCTGGAAATCGCGGACAACTTGCTGCCGTAAACTCATGATGGTTACAGCTT -GCTCGCTGAGGTCGGAAAAGGCAAGGGTTCGGGACACGGACGCTGAAGCTGCGATAACTA -AGGTGTGCGTCATGTCGGTAAATTTGCCTTCTGTGTGTTCGGGGATTGGCCCTGGCATGT -CGAACCATAGGTCCTCGTCGTTGATGTTTGACGGTAGATTGTAATCGAGCCAGGCGGATT -GCATCATTGGCTCAGATGCCCTGTCTAAAGAAGCTGCTACATCGAGAAGGCCAATTCCTA -GCCATGCTCGTTTGCGCATTTCATATTCAAAAGGGCGCACTGTGAATGGCGGCTGGCGCA -CGTGGAGAGATAATGCCTGGCCCACTCGGAGCGCCATGCTCAACATAGTCCAGACCCTTC -GGCTTTGGTCTTGACATCGCGCAGCAAGCTAGAAGCGTTCATATTAAGTTCCCATCTTAA -GATAAAGGACGCGAAAAAGGGGGAACATGTAGTCGTACTCACCAGAGAGATAATATAAGC -TTGCAGGACAGTAAGATCGTTTGTCGTGACAAAGTCCGCCCTTGCGAGCGCGAGCTCAGT -TTCTTTCTGAAATTCTGCAACTATTATCTTTTTGCTCGTCTTGAACAATGACTGACATTC -CACTTCGTCCAGAGTGCACACAGCGCAAAGATACATAGCCGAGGCAAGGGTGGCTGGCGC -CTGGTGATCTTGCTCATAGTCAAGATAAGGTTTGCCATCTTTAATAAATGCTTGTAGGGA -TGGTCGGTGTAATATCTTGAAGAGAGGATCGACATTGTGAAAGAAAATGTCACATAACCT -ATGTCGGATTTGGGGTTGAAGATTGAATGGTTTGTTGGCTTGAGGTGAGTTTGACGAGCT -CTCAGGGCTTTCTGAGCTCACAAGCGATGTGCCAAAACCAGATGCTAAGTCTTTGACGTT -GTGATCTTCATTTTCTAGACGACCCTCCAGTACACTGCGTAATTCTTGGGTCTGGTAATT -GGGATATGAGTCAAAAGACATCCGCAAATCATGAGTTCAAGAAGCTCACCTGCTGCATGA -TATCCTCCCAAAAGGAGCCCCCTACATATGCAGTTGCAGGGCGAGGAAGATGTGGCTGAC -CTGCACCTGGTAAGTAGTCATTGTGATCCTGCCGGGTATCTAGACCGGCGTTGCTAGCCT -CTGTCACCGGCTCCTGCTTCGCGTATGGATAAGCGGGAGCAGGGGTAGTTTGAGGAAGCT -GTAATCTATCCTGTCGCTCATTGGCGACTCTTTTCAGAAGTTTCTCAAGTCGCGCTACCC -GCTCGGATAGTTCTTTGTCTGAGCCTACAATCCGTTCGGGTCTTCGCGTGCGACCGCGCG -GGAGACGCGCACGCTCGACCGATACACACACAAGTCCAAGTCGCTGGCAGCTTGTACAAG -GACTTGACTTGTCGCATTTCACTTTGCGCTGGCGACATGCTTCGCAGGTAAGGCGGACCC -GCGGCGCGGAGGAGCGTGAGTGGATTGTCCCGGCTTGGCCCTCGGGATCGGCCATAATTT -TGCGCGGACCCAATCAGTCTGGATAGGCAAATGGACTTGAGATGAATTGAATACTTCGAT -GTACATATATGCGGGTTGAAGATTCAAGGCCTCTGCATGGGAGGTTCTGCCTCGACTGCT -GCTTTGACCCCCGCGAAACCTATCGATCCACAATCGATGTCACTGACCAATCAGAGGTCG -AACTGGTACTTTCTGTGATGAGCTGAGTCAAGTTCGTATAAAAAGAGATGTTAAAGCTAT -GAAGTAGATGACAGTGTTTGCTTTAATTAGGAAAGGTGTGTGATAGTAGACGCTTTCTAC -CCCTAGGTATTACTCAAACGTGCTGCCTTCCAACCTACAGCCTTGTATCCTCCCGTTCGC -GACGTTCCTTTTGGTTCCAATCAAAAATGTTCCCCTTCTACTTCTATCGGTTCCATCAAA -TCACAAGGCCTCAAATAAAAAAAAGGCGAAAACGATCAATTGTCTAGAGCTATTGAAGAT -TCCGAGACCTTTTTCCCTCTCACAGAACATGACTGCTCTGATTTCACATGGCAAAAGACG -AAATAGCCCAGCTGCCTACGTACTTATCATGGTATAATCGGTTTATCATTACCTGCTTGG -GCGTATATATGATAGCTTGACTAGAGCAGTTCCAATCGAAGAAAGCCAAGTTCCTACAGG -TGGACGTCCTCCAATGCCTAGCATACACGGGTATCTTGACTTAAATCTACAAGCGATAAT -GGAATGCATGAGCTGACATAAATATGGATAATCCGCGAGGCTACTCTCGAAATTGGAAGC -AGAACCGCCAAGATGGATGATGCATCAATCAAACTTGATCCCCAGATATAAGAACTCATG -AAACATACTGAAATGCCACTATTGAATTATTAAAGTCATAACTCGATGTCCGAATGGCAG -ACGAGAAAAATATAGATTATTTCCCTTCTTTACAAAACACTTCGCATTGTGACGATGTAT -GGAATGTTCGACCGAGTCGCATACTTAAACACCCATACACGAGATATTGCGACAGTGAGG -GACTTCAGTAATCCCATCAGGGAATGTGTTTTTGTGTTTCGGCTCAAGGACGATTTGATT -CAATACCCCGGGTGGTAGAATGGATGCCCCGAAGAGAGACATTGATAGCATTCAGGAAAT -TTTACCGAATATAGCAATCGGATCTTTCCTCCATTCTTAGCGTTTCAGCTACTCAGTGGA -AATTTCCGAGAATATTCAAGTAGCAAATTTGCGAATAAACATCTAGAAAGGCATCTCAAA -AGGTTAGAAAGATAATGACTGATTATTGAAGTGCCATCGGAAATGAGAATAGTTCGTATG -ATACAGGGCTTATTTATCGCTCTACAGACGAGTTTTCTAAACCAGGAATGGATTTGCACG -AGTAGATCACAATATCAATAATGGTCAGAAAATCGCATCATCTAACGGTCAATGCTCTTC -TTTTATCGATTCAAATTCTCCGACACCGTGCACGCGGCAATTTTTCGGAGAAGAGCGGCT -GGGGGCATAAATTGATTTGGATCAGCATTCAATTATGACTTTTTTTGGATTTCACCGGCA -GATAAAGCGTTTAAGAAAGCAAACGTTGTGATTAAAGGAGTAGATGTTATTGAGATGCTT -TGATGACCAAGAGATATCCCCGAACTCCCCGAAATTGGATTTTTCTTGGGCTCCAGGATC -GTGCAGGTACCAAATTGGGCTGCAGAGGTGTTTGCAGATCGCAGGAACAATACGGTTGCC -CCCACTTGGACTATTGTCCTTGTTTCTTTGGGTTTTAATGGGGATTATTTGACACTATTG -ATTTTGCGGACACATTCGACAATGGCATTTAGCCATACAACAAAGAACAAAAAGACAGAA -ATACAAGAGAAGGAGAGGTGGATAGATGAATAAATGATGTGAAAAACCCATACGGAGTAG -AGTACCGGTAGGTCGGTTCAGCTATGAAGCAAAGTCATCCTGCAGCTGAACCCACCTGCG -TTCTGTGTGAAATATCTAAGTGGATTCTTTTGTTAGACTAGTGTTCTTGGCCTACCTTCA -CAGGCACTCACGCCACTACGATGGAGCCCGGTTTATGGGGGCAATGATCTCGGGTTGATT -TGAACATTTGCGTGAATCCATTGATCTCAGAATATGGGAAAAAAAAGCAATTCATCCCAC -GTTTTAGGGATATGCGTCTAGTGCGATTGCAAATCTCAGAAGGGGAGAGCTTCCTATTAT -GGATGTACAGGCCGTGTGGTCTTTGACCGCGCGATTACCATGGATCAGGCCACGGCATCA -ACGAATCAGAGACCGTTTCACTGGTGGCAGAGTTTATGCCATGTGGAGACTATGCCCAAT -TTGAACATATGCATATACATTTTTGCTTCAAATCCTCATCATTTTCTCGTATAATATGAT -GCAAAACGAGACAACTGGTCATCACAAAGTCTCGAGTTTCCCATAATCACCAGACAAGTA -TGGGGCCAGCGTTTCTGGAATTGATGACCAGACTGATGGGACCAATATCAATATCTCTTC -AAAAATTAGATAGTCTATTCTTCTAGCTCGGTGGCTGAATGGTGATCTTCAAATCTGGTG -GAGAAGATCTCGTGTTTCTCTTGGCTTTCTGCTCTTTACATATTTCCTTTCTTTGTGCGG -AATACTTCACAGTCTTTTGTCCCTATTTCGTACATATGTATTAAGAAGCGGGCCATTCCC -CCACACCATGATCCCGGGAGACTTCGTCTCGAGCATTGACGAAGTGGCGTCCATATCTAC -TATAATTCAATAGAGGAACTTTGCGATCTCTCCTTCAGCGCATTGAATACCCCCTTGTTC -CGAATCAATCTGTGCACGAAATCTGGCTTTCACTCATTTATATACGCCTTCATGACACGA -TGACAACACTATCAACATCCCGTGGCCACCAGGCCCTCGCCGTTAGCATTGTGTTCACAA -CTATAGCGACATTCATTACTGCCGTCCGTATATTTACGCGAGCATTTCTCGTGAAGCAGA -TGGGAGCAGACGACTATGTCATCCTAGTCTCTCTGGGCTTCTCATGGATTTTCTTCGGCC -TGATGGTAGGCGAAGTCTATCATGGCATGGGAGAGCACTACATTGATATCCCCGCCTCAA -TTTTCAAGGCCCAGATGATCGTACGCCCAACCTCAGCAGAACACTTCTCTTGCGCAGTGC -AGTTTTATTGACATGATTACCTTCCAGTACTTTTGGGCCTCAATTCCTATATACCAAACA -AGCCTCATCAGCACAAAGATGTCAATTCTTCTTCAATACAAACGAGTCTTCTCCACACCA -CGCATGCGCCTAGCATGTTGGATCATGATTGGTGTTCTAGGCGTCTACGGAACCTGGACC -ATCATTAGTGCCTGGGCTAACTGTGTTCCTCTCGCCAAGTTCTGGGATCCAAGTGTCCCA -GGTTTCTGTTTCGACAAGAAAGCTTTGTGGTTTTCCAACTCGGCCATCCACATTATTACA -GATATCGTCATTCTCATATACCCAATGCCAGTCCTGAAATCGCTTCAACTGCCTAAGAGG -CAAAAGTTCGCTCTCATGGCTGTTTTCGCGCTAGGTGGATTGTAAGTGCCGCATATGATG -AGGTTCCTATACAATGCTAACAATAACTTTCCCAGTGTCCTGATCACGAGCATACTCCGC -CTAAAATCTCTCCTTGTTATCTCAAACTCAGACGACCCAACTTGTGAGTACTAACAACAC -TTGCCAATGACCCACTCTGGCCCACGAGCGAAATTTAATAAAATACTCCAGATGACAACG -TTGGTGCAGCAACATGGTCCGCCGTGGAGTGCAACGTAGCAATTATCTGCGCCTGTCTCC -CGGGCACACGCGCCTTCCTCTCCAAGTTCCTACCGCACATTTTCTCGACCCGAAGCAACG -GCTACCGCAGCAAGACCGCACGACCTTCACGAAACGGCCGCAGCGCCCTCACAGGAGACG -GCAACACGCAGGTCCTAGCTTCTGTCGTTGGCGGACATGATCACAGCTCAGGATACGACC -ATGACCTGGAAGATCTTTCACCATCTGGCTCCTTCAATAGCTATACCAAGGAGCCTGCGA -AGGAGGTCTTTGCGGGCATCAAGGTCACGACCAATGTTACGCAGGAGAGAACCTCGCATT -CAAAGATTGCTGTTAATGATGATATGGGTAGTACGAAGGAGTTGGTGAAGAAGCATAGCT -TTTGATCTATAATGATTTGGGTTGGCCTAATGAGTTCATTCTTTATATATATTTATTGTT -TCCTAGCCTTGATTTGGTGGTACTCTCTTTCATCTCATGTATCGGAGGTCATACATATCT -CTTGCACAAAGTATACCTTGTCCGCTTCTTTCTGCCGTCTAACTATAGGCTATAAGACTG -GTAAAGGGAAAACTATCTTGAAGATGTCTTAAGAATAACCAAGAGGCCCACTGAAAGGAC -CGCTAAAGCTTGGTCAAATCAAGACATCAAACACATATCTCCGGATCTATCCAGTCGCTG -GCCTTTGTTAATCGAAGAGATTATGAAAGAAACATTGCACCACAAATTGCTAGATTCCAA -ACGTATTGCTACTCTCCTATCCAAACAATCATACCAAGTACTGCATTAAGACCCTCATGT -CTATTTGTTCCCTGCTCTGACAAGATCACCAAACCTAGTTCCACTCTCTTGCGCCAACAT -GGCCGGGTTGCCCACCTCCACAATATTGCCAGTATCCATAACCACCACCCTGTCAAAGTC -CATAATCATGTCCAATCGGTGACTGACCGCAATGACCGTGTAGTTTCTGAACTCGGTTCT -AATAATCTCCTGCATGATGCGTTCCGTCTTTTGGTCCACGCTTGAACTTACTTCGTCTAG -AAGCAAGATGCCACCTAGTTTCCCAGCGGCCCCAAACTTGCGCTCTCGAACTGAGCGTCT -AAGCAGGGCTCGGCCAAACGAGAGCAGTTGCCGCTGTCCAGCGCTGAAACTTGAGGCATT -CATTCCTCCGGCTAGGCCGCCTCTTTCCTCAACAATGTCCCATAAGCCGACGGACTCGAG -CACAGCCCTACACTCATCCGGGGTGGCCTCCTCGGCAACATCAAGATTGTTCTGAACTGT -GGATCCGTCGGGTAAGAAGACGGCCTCTTGGGGCACTGCGATGATACGCTGACGCAGCAC -AGATCGATTTAGCCGATGGAGCGGTAGGTCGTCAATCACAATGTTTTCCGCCGTGGCGGA -TATTGGATCGAGGAGCTTAAGAAGTAAAGCGACAAGACTAGATTTGCCGCTACCTGTGCG -ACCACAAATAGCAACCTTTTCTCCGGAGTTGATGATCAGTTGAATATCGCGGAGAGCTAG -ACTTTTGGCTATACCATCACCATTATCAGCGCTAATGACATAGTTAGTATTGTTTCAGTA -TAGGGCTTATGATCCTTACTCATATTTTGCCGAAACACCCCTCAATTCCACCACCCCACG -CGGAGGCCACTGTTCGGGTGGGTCGATGTCCTCTCCATCTCTGTCCTCGGGTGTTACTGT -CTCGTTAAATGTCTTCAGTCTAGCAATTGCGCCAAGTGATGTTTCAAGATTGGTCCAAAA -CAGAACAATGCCAGCGAGGTTCTCTCCAAAACTCAAAAGACTGTACAGAGCGGCACCGGC -GAAAGCAGAGTTGGAATGAAAGCGCACGGCGAACGTTGTTAGCGCTACTGCTAACAGCAT -GACAACCACATCCAGAACGACATTGAGCCATTCCTGGATCATAAGAAGGAGATACGAAGG -TCGCTGGCTCAAAGTGAGAAGACGAGCATTCTTCTTGATGTCTTCGTGAATGAAGCCTGT -TTAAAACTTGTGAGCAACAAATCATAAAATTAATGCGCAGCAGGTGTGATGATTGACACT -TACCAAATGCCCGCAAAGTTGTGATTCCTTTGACAATATCAAGAAAGTGTGTGCTAAGCG -TGTGTCAGTAACGATAGTGAATATACTTGACGGTGTGCTCCCATTCACTTACTATAACGG -GCTCTTGGCTTCCAGGTCGAGCAATCGAAGCTGCCTCGAAGTGCGCAGGTAAAACTTTTG -TAGCACGTAAAGCAGGGCGGCGAGAAATGGGAAGGCGATAGCCAAATAGGGAGACGACGT -GAGCATAACAGCCATTTGCCCAACAACTTGCGTTATCTATTCATGCATGTTAGCTGCTAG -ACTCAGGTCTTGATTCGTCACGCGTTGGATTACCGAGAACAGGGTATTGACGGTGGCATC -TGGCAGCTCCGTGTCAATGAGATTCAAATCTTGTGAAAACAGATTGGTTGTGACACCTAT -GTCGGTTTTAGTGAAAAAGCGAAGTGGTGCACAGAACAGCGTCTTCAGAATGTCTTCATG -CAGGTTGGCGCCAGCACGTTTGATGGATACTATCCAAATGGAGGCACCAAGTAAAAACAA -AGAGATCAAAGCACAAATCTGAAAGAGCGCGTAGATTCCGGCCCAGTAACTCGAAGAGTG -GGCTGGGCGTTGCGATTTGATGTCCTCAACCCAGTACGTAAGCCCTGCAGTAGACCGTTA -GCCTCTGAGAGCTTTCTTTGAAATCAACCCCATATCAAAGGAATCTAAATACTTACAGAT -GGTCGAAAAGTTCAAGAAGAAACCCCAGAGCACAGCAAAGAAAACGGCAGATCCTGCTAG -CAGCCATCCCATGCTTCTGATATAATGCTTATACACGGTGCCATCTCCAACTTGCCGCGA -TGCGTCCATGTTAGGGTCAACAGTAAGCTTGGCCGCTTTTTCATTCTTCACCGGCGACGA -AGATTCCTCTGCGCTCTTGTTGATACTTGGTTCCTCCGAAGATGTCTCGCTTTCGGGTCT -CTTCAAACCCAGACGGTGAACATAGCCACCACCAGCTATCAGTGTGTTAAATGTGCCCTG -CTCACTAATGCTACTGTCCCCAAGGGCTATTATATAGTCTGCCGCCTGTAGGTGTCGAAC -GCTGTGCGTGCAGAGTAAGACAGTGGTTTGGCGGCGTCTCAACAATCCTTCTTGTCCAAA -GACTTGACGGAAGACTTGCTCTTCAGTGTCCGCGTCTAAGCCACTGAAGACATCGTCGAG -AATAAGGAAGTGGGCTTGTAGGTATAGGGCCCGTGCTAGGGAAATGCGCTGCTTCTGGCC -TCCAGATAGGGAGATACCGTCGGATCCCACATTTGTCTCATCGCCTTGTGGTAGAGTGTT -AAAGTCAAATTTGAGAGCTGTTGCCTCAACCACCTGTGCATATCTGTCCTCATTCACAGG -AGAGAAGCCTATAATGTTCTCCCTAATGGTTCCATTCATTATAAAGGCAGTCTGGTCGCA -GAATCCGACCACCGCCCTTGCACCTAGTGTGACTCTGCCTTGGTGAAAGGGAATCTCTCC -AAGTAGCGCCTTGCAAAGTGTAGACTTGCCCGATCCAATGGGTCCGACAACAAGGGTAAG -TGACGACTTTGGTACCGTAGTGTTGATGTCTTGTAGGATGAACTTGTCTTGTTCCCAACC -GAAGCTCCCGTTCTCAATAACCACTTCGGACTCGTATGAAAGTTCACGGCTGAGCGAGCT -CTTCTCGGAGCCTTTCTCGGAGGAGGCCACGACTTTACGGAAGTCTTCACGAGTCTCGCA -CTCTAGAAATGCCTGAATTCGGCCAATGCAAGCCAGGCCAGAGATGACTTCGGGAATACT -CTGGAAGATCTGCGATAGAGGCTGCGTCATGAGGGTTATGAAAGAGAGACTTGTGAATAT -CTTCGATGAGTCTAGTTGATGTTGGTTCTGGGCAAAGGCAAATGTCACGGCCGGGCTAAT -GAGCAGCGGAATGAATCCAAAGACCGCCGCCCCAATATAGATCTTGCGGTAATGAACTCC -CGCTATAAGCTCCTCGACGCGCAGCTTCTGCACGAACGTGCTCACAGACGAGGAAAGACC -GGATATCTTGAGGTTCTTCATACTGGCAATGATAGCAGCCGTCAGACCAACGCGCTTCTG -GACTAATGACATCCATCTTCTCTGTGCCGTACCGATGAAATTCATCAGAATAACTAGACC -CAAGAAGCAGAGAACCACGACGACGATAGGAACGACAAAGACAACTCCAAGGCGACCATT -CAACATCCACGCACATAGCGCTACCTGGATCAGACTTGCCCAGATATCATGTAGTTGTCG -GAAGCCTATTCTAATTCGCTCAAGATCGGTACTCATGAGAGTTAGCGCAGCGCTGTCATC -GGCGCCGATGCGAGCTCTTGTGGCCGTGATAAAGGTCTCAGTAACGAGGATAGATCTTAG -CATAGTGCGCATGCGATGATGATAGTATCTTTTATATATGTTAGCGGTGTCTAAATAATC -CACTCACTAAGCGCCTACCGGTAGAAGGCCCAGCTAATAGCAATTCCGGTATAGATCAAA -AAGCTGGCGCCAATTAATCCATAGCCAAAATTTGGGTCGAGCTCATCCTTTGAAAGGTAA -TCTAGGAGCCCCTCAATAAACAGCGGCTGAGCGATAGAAAAGCCGAAGAGGGCCAAGCGG -GGGATAACAGGGAGGAGAAGTGGTCCTATAAGTGTACGTATGAGGGCCTTAGTAAGACCG -AATTTATCGCCCTTCATCTTGGCCATGTCTAGATGTTGAGAGAATTTCTCGTGGAGCCTT -GTTGGGTCCATGGCGCCGTCCAGGGGGTACAAATCATTGACGGAGAGGACTTTGTTGTAT -CCGGACAGGAACAAGCGATTGAGCCAAAAGAAGACACCAAGGGATATTATGCCACTGGTT -TCCTCTGGGCTATGCTCATCTTTGTTCCAGCCAGCCTGAATCCATCTGGACTTATGTTGG -GATTCCAGCAGTAGCACGAGAGCTTTGACAGCCAAAGCAGCAGTAAATAGATTGCTGTAG -GCGCGCTCCGTAGCCAGAGTGGAGGACAGGTAGAGAGTTCTCGACTGGGCTCCGTCTAGT -AGAAGCGTCAGGAGCAGGTATCCCATCAACAGCATAGACGGTCGAGGTGATCGAGTGTGC -TCAACATCACTCAGAGGAAGCATGATAAAAGCCGCTAGCAGCTTGAGGGCTGAGGTCGCT -ATGAACATGGCTGTGGCATCGAAGTGACCTGCGGTGGCCAGGATGACGAGCGCAATTTGC -AGCACAGCGTACACTGTAATGGCACTCTGGAATGATATCAGCAAGCTGCTATATTTAGGG -GGTGGGATAATTCAGTAATAATACACACTATTTTCACAAGCCGAAAGGCAGACGCGTTGA -CGATGATGGGCTTTCGAGCATGAGAAACGATTATCCATGCTGCGAGCACTATGAAGAGGG -CAGACGGGATGATGGAGAAGAAGACCTGCTCAAAGTCCACGTTGAAGTCAAATTGCTCGC -CGTTCTGTTCGAAAAGCGAGCCATTTTGAGAATTAAAAGGCATTCCGTAGTAAGCAACAA -GCTATCAGGCGGCCGGAATGACTTGGACTATCGGATGCTCTAGCGAAGGACAAAAGCGAG -AATGGACACAGAGGGCATGAATACAAGGAAGTAGGAGAATCTCCCAGCATAAATCGCTGA -GAAATCTCAAGGTACTAAGGTGCGAGAGGCCTAATATAAAGGCTCGATGAGAAATCAATG -GCGCTAGAGGCAGGGATCGGGGTTCGGAGTTGGACCGTAAGCAGACCTTATGTGCAGTCA -TACATATGGGCTTCTTTTTTTTACCTCTGGAAAGGATAAAATTGCTATTTGCAGTCGGTA -TTTTTCATTGGCATAAGTACTACTGAGAAGAAAATAAGAAAATAGAGTAAATTCCGATGT -GTAAATTCACCGAAACATCAATCCGGCCATTTTGAACGTATGAGCATCGGTAAGGGCGTT -CGACAAATAAGACTACATTGAATTACTCTGCTTTCTTATAGAGCAATACCATGGCATAGT -ACATTGGGCCAGCGGAAGGATCGGGAGGCCATTTTGATAGTAAGAGCGGCATATAGTAAC -CGTATATATACAAATCCCTACTTGGACAAAACACAACACAAAAAGGAAACAAAAGAATGA -TGTTGAAATGCTAATCATTTCACCCCATAGATTTACTGATGATACAAGCTTGATGAAATC -TCTCCGCCAGTGGCAGGATCTAGAAGACCAGGACTCCGACGTCAACGTGGCCATCTACCA -CAATCTCGATAGTGGCGGCGCTGACGCTGAGGCTGGCTCCGATCCGGACAACTCCCGTGC -ATTCCTGACGTTTGTCAGGCTATTGATCCCGTCAGGACACCCATCGTTGAAATTGCCAGG -AAGCTCTTCAGTCTCCGCTTAATAAATGATGTGGATGTGGACATCACCAAGAAACCGGCC -GATATGGGGCTCGACTTGCTCATTTTTGTTGAGCTGCGAGCTTGGTTAGAGCTCAGCTTG -GAACTGAAATATCGACACTCGGAATGTTGAATTTGCGAGCCCCTGGGGCTTTCGGAGGGC -GTGCTAGTAGGTGAATTGATGATGGCAATGGTTTCCTATGCTACAGGGTAGAAGATGACA -GGGCCAGTGGCCATTTGTGAACGCCAGTGCGCTATAAAAGATTTTCCCATAGCTTATCAC -TCGATTTTAACTATCAATTTGCTATACGGGTTGAAGAAAAATTTATAGTACGATGAACCA -GAATGTAAAATGGAATCAAGAACAGAGAACACTTGGATTCAAGTGTTTAAGCATCGACAT -ATCAAGCATAGAGGATTCAATAATTCAGGGCTCGACTATCAGGCACTCAACTTGGATTTT -GAACATTTACGGACAAGAATACTTGGTTGCCTCTGCTCTACGGTCCTGTGTTGTATGTCG -TCTTGTTAGTGGTCATGTCAGTAGTCCTTTTTAGTAGACATTTCCTTTCCCTTCCTCTAG -GTAGATAGTAGAGAGAAAAAAAGAAAAGGCATAGGCTGCGCAGACGACTCTCTTTGCATA -ACAAAGGGTAGAGAATCGGCGTGACATTAGCATCTCCAGATGTATAACATGCTGATATTG -CAAAACATACTGCAACATACTGCCGAAATTTATTATAGCGCACTAGTGGTTTGAAAGTAA -TCCAACTAACAGAAAAGACACGCTAATAAAAATCCGGTATCGTCTCTACTGACTGTAGAC -TTAGGAGGTCATCATAGAAGCAAAAACCTATAGTATTTCTATCGCCTCGGACGATAAACA -CTGATAATTCCTTCCTAGACCCCCGAAGTCTTCACACAGATCAATCAAGGCGCATACTCT -AAATGCACGTATGCTACAGCATGTTAGGCTACTAAACCAAGGTGGAAAATAACTTCGAAT -AAAATTCATAGGTCTAGTGATTACAACAATTACTAACATCTGCTACCTACAAATCCTTTC -AAACACGAGATCTAGGGTATTATGAATGCATTGTAACAAGTGTACAGATGTCGAAGCGAA -CAAAACGAGCTGATCATTCAGTATGCATGAGACCCGCCCATTGTACATTCGAAAGCCGCC -CACGTTTCAAGACTAAAGGCTCGTGACAAAGATGCAAAAAAAAGAAGAGAAAACAGTTCT -AGGGAATAATCTCGAAAACGATGGACACCCATATGTGCAATGTGCAATGCGAAATTATGC -CTGCAAATAGGCAGATGTCATCATGCAAATCCTAGAAACCTCAAGATGCTCGTGAACTCT -AGATCTTTTGCAAAGAAGAGACAATTTACTCCCAAACATTGGAAAAAAGGTCATCAGATC -CGCCAAATCCACCAGACGACTTGTTGTTATTCGTGCTTGTGGAGGTAGGGTTGGAGACTG -GGGCAGCGCTGGCCCAGCCGCCGAAGTCCTCGTCCGCTGCCACAGTGGGTGCGGGCTTGG -AAGCAGCACTGGGTGCACCCCAGCCGTGGCCCACGTCGTTCGGAGTCAAAGTGTCAGGAA -CCTTCATCATAGATGCAGGCGAGCGTACTGAAGCAGCAGGCGCTGGGGCAGGATCTGGTG -TGCTCCATGCGTTACCGCCCCATGGATCAATGCTTCCAGACATCATGCTGGCTGCTGCTG -TGTTCGCGGCGCTGGCCGATGGGGCTCTAGGGGGTGCGGGGGGCTGCATCGGGGGAGCAG -AAATATTAAAAGCAGACTTTATCTCCTGCTGAGGAGAGGTTGCAGTAGACTTTGGGGGTG -AAACCGAGGAGGCAGATGCAGGAGGAGGCGAAGAAAATCCGAACAGGTCATTTGACAGGG -AGGATGATGGAGGGTTAGGCTTGGCAGTCGGGGAGGTAAACCCACCGAATCCGAAATCTT -GCCCATGGGAAGAGACAGAAGTTGTTCGGGACTGGGGCTTCGAGGCTGACACGGTCGGAG -AAGTCAAGCTATCAAAGAAATCTCCGCCCGCAGATCCCGAGGGCGATGTAACCTTCGGGG -CAGCAGGGGTCGACTTGGCGCCACCGAAAGATGAAAGATTAGAGAATGCCGATGGCTTCT -CGGCCGCTTTGGCGGGTGGGGGAGAAGTAGTAGTCGGGAAGCTCAATCCACTAAAGGCGT -CCGTTAGACCGCCCATGTGAGACGAAGGGGAGGGGGCAGGAGGCGAGGCCATGGCGCCGA -ATGAGTTGCCCCGCTGGTGCTGGACGGGTTGTGCTGGCTGTGGCTTCGAGTACAATGACA -AGATTGACTGTTTGAGGTCTGGCCGAGACATTCCGGCTGGTGGTGCTGGTGTGGATGCTG -TGCTAGACGGATGACTACCAGTGGAAGGCTGGGTGCTACCAAAGAAGTCGAGACCCAGCA -ATGAATCATTGGGCTTGTGGGTCTTGGCTGGTGCAGGCTGCGCCTGTCTGGGTGGCGCAC -GCGGAGTAGGTCCAGTGGTGCTAGGGCGAGGCGGAGGAGAGATATCGTCGCCGAACAAAT -CAATGGAGGCTTGTTGACGGTGTGCCGGGGGTTGGCTGGGAGCAGGGACTCGTTGCGAAC -CAGAACGTTCAATCTTTGCTTTCTCCTGTACGACCGCAAGAGGCTGGTCGGTCAGTGATT -GCTGCTCACTGGCTGGTCTATCATGCGATCACTCACCAAATCGTCATCGCCGTCATCCAG -GGTGGAAGGGTCCGGCATCTCACCGTCCATTACCCATCGCTTGCTCTCGTATTTCGTCCG -GATGAAGTTCTCGATTTTGCTGTTCACGCCGGTTGTCAGAAACGGTGTCATCACCACTCG -GGTACTTTTCAAATGCACTTACGCATCGGAGGGAACGTGGCCGGGTGCTAGTTTCGCTTC -CCAGTATCTGTATTTTATTAACCCATGCCCAGTAGACGGTCAATTGGTTGCGACGAACTT -GTTGGCTCTTCCATTGCCCCATCTCACAACACTCTGAAGTTGTTCATCGGTCCACGCATC -GAGATCTACGGACTTGACTCGGCTGATATGCGTGCCCATACCCCTGTGAATGCCCGAGCA -CCGGATGCACACAAAGACTCCTAAGTTCCAGGAAGCCCATCGCGGATCTATTCGGAAGAC -AATTCAGTATATGAGCGCCAAGAAAAAATCCTCAGAGATGAAGCCAGCGTGGGCGCAACT -CAAGGAGGGGATCGGGGATCACTCACGCTTGTTCCGTTTACAGTCGGCACATATCTTGTT -GTGTTCGAGTTTCAGCAGAGCCTTGATAGTCTGCGAGTTTTGAGCCGCTTGGCCTGGAGT -AGGACGTCGAGACATAATAGCACCTCAAGAATTGATGGGTGAAAGGGATAGGGAACAATG -AGAGCAACCTGATTATTCTTAGGTGGCGGTCCGCACAGTTGCGGCCTAAGGCTGCGAATG -TGTATACGGATGGGGGAAAATGCCAGTCACGCAGTTTGATATAAAGTAACATGGGTCATT -GGCCAATAGCAGTAGGTGCATGTTAAAAAGAACATATCAGCTATATTGTAATATTTACTT -GAATATCCACTTAAATAATCTCTGTAGAGCTCAGATCTATTACACCTGGTTGGTCCACTC -ACGCCTCTCCGTATCATGCATGTCGGCCTTTCTGCAGCCTTAGTTCCCGCACGATGATCG -AACCTCTTGAAATCGCGTCAAGCTTTGGACTGAGGGTTCTTCACAGCGCTAGTATCTTCT -GTAAAGCGCAGACTGACACTGTTGACACAATGACGCTCATCCGTGGGAGTCTGATAACCC -TCACCTTCAAATACATGGCCGAGGTGACCCCCGCAGTTAGTGCACACGATCTCTGTCCGC -TTCATACCAAAAGTGCTATCAACATGTCTAGTCACAGCACCAGGGATGGAATCAAAGTAT -GCTGGCCAGCCACAGCCAGATTTGAACTTGTGGTCGGCAGTGTAAAGCGGCGCATCGCAA -CCAGCGCAATTATAGACGCCCTTCGACGGGTAGTGGGAATCATATTCACCGGTCCCAGGG -CGCTCAGTGCCCTTTTCACGGAGGATACGGAATTGTTCTATAGCCATTTGTATGAGCACA -GATCAAACAGGTATCGCCTCCGACTTCAAACCCCTTCATGGTGTACAAACCTGGATTCAA -AACAGCTCTCCACTGGTCTTCACTTCTCTGGTCGGGGGGCGACATAGTGTCGGTTGATTT -CTCGGGGTTGTGGTTGGAGCTGAAGAAGGCACCAAAAAATGGAATGGTTGGCGCAGCTCT -AAGAACCTTACTGCTTGTCTTCGAATCGGGAGGCTTGCGCGTCGAGGACAAGAATCGGGA -GGAGCAAAATGTCGGTGCAAGTCGGATAGGGATAGTCCGGTGTCTCAAGGGCAAAATAAG -ATTGCGAGATAATTGGGGTCGCATAAAGAAGACTAATTAACCGTAGAGGCAACGTTTATA -GCAAAGTGAAAAAAAGAATAAGCAGTGACTGGCAACTGAAGCACCTGCAATGAGAGTGGG -GTGAGGCTAATCCCATATCGGCACCTGAAGTTGGGGCCTTTACGTCATCATGCGGGGAGA -CTGTCGGGGACTCTTCCAGGCCTAGACATTGCTCCCAAGATATATAGCATGAAATAGATT -TATATATATCCCCAGATCTACATTGTTCCAAAGCCGGTTGTTCAATTTTGTTAGAAAATG -CTTTTCATTCAAAATGGCCCCAGGTCCCACCTGGGAAAACTCCAGCAGTGCAGGGGTATC -TCAACAAAATCAGACATgtaaagaaaattaaaaaaagaaaaagaaaagtacaagtcaaag -aaaTTTCAACGGCGGTTCACTCCGGTCACTAGCATGGAGCCCCGGTTGAGCTCCACACGC -CAACCCTCCTTCTGCAGCATGTTCCACACGGCAGGTCCAATGTGTGATTTGCGGTCGAGA -GAGTGGTGACCAACACCACACACAATATTGAACCCACCGTGGGAGCTCTGACCTCCCCCA -CGAACGTACTTCTGATCTCCTAGTCCATCCCACCACGCCTGCACTTTATCGCGAGCGATT -CTGACCGCATTGGCGACTGTTACGCCATGCAGATCCAGGTCGGAATGCGACGACTGGCGA -GCCACCAGCCGGTCGGAAGCTGTCGCCAAATGGCGCATGGCGAGCTGGCGTTGCTCGTTT -CCGACATCCCGGTAATAGGCAGAGGCGCCTCCATAGAGGGGGTTGGAGCGGGCGCGGCGG -GCGGACTGGGATGCTTGGGCGAGGGCAGCTGATCGGGCTGCGAAGTGAGAGTTGGCGGCG -GCGGCGGCTTCGTTGAAGTCCATGAAATCTGAAGCAGTATCTATCTGCTGAGCGGGAGCC -ACATTCTCTTCCTCGTCGTCAAGGGTGACCGGAGACGAGACAAAACTGATGATATTTGAA -ACAGACGTAAGACCAGGCCGACGAGACAAGGTGTCGGCCAGGTCATCTGCGGCTGAAATC -TGATTATTTGAAATTCGAATCAAACCAATTAGAGTTGTGGGTTGGACGCCGGGGTACTTG -TGACTGAGTTCTCCGACTTGTGTGACGAGTACAGGATCGTCCTCAATCTCATGGATGTCC -TTGGGCGCATTGACCTGTGCCAAGGCTCTGATTGTTGCACAGAGTGACATTGAGTTTTCA -TGATAAGTCGATGCAATTTTCTCCCTCTGCAGAGCACACGCTCGCGAGGAAAGGAAGTCG -ATGTCCTTCTTTCCTGCCTCCCATTTATTGGTGGCGGTGTTGTTCATGGCATGAGAGGCC -AACTCGACTCGACGTTGTTTCTGCTGCTTCTTCTTCTTTCGGGATTTCTTGCCGCCGCCG -TTCAGATCCTCCTTCGCCTGGAAGCCATCAATTGACTTGGGAACGGATGATTGGCTGTCT -TGGGCGGCCTCTTCGGGGGTTTCCTGAGGAATTTCTTTGGCAATTTGTGTCTCTTCGATA -AAAGCAAGATTGAGCAAGACATCCATGGAACGACTAAGGTTGTCGTGACAATCTTCGAGA -ATGCGTTCAGCTTCTAGGCGGGTGATACTCGGGAACATGCTGATCAAGTTTTGTGCCTTG -TCGGCAGTCGTCATACCCAAGAAGGTGTATGCAAGCTTAGAGTCTTTGAGTCTCTGGGCC -TCATGACTGACGTTGTCGGGGTTATTATCATCTTGTTCAGGATGGTCAAACGATGGCCAG -CTTGTAAAATCTGAGGGAGCATCGTTGGATGATTCATCGACCAAAGATCCTGAGACATCG -GAGTTTCGGAGATTGGTTGTGCCTGTGGGGTCAAATGGCAAGTTCTCTTGCTCGACAGCA -GACGCGTTTAACACGTCTAGGGTTTCACGTAACTGTTCAACTTGAGTGGGGATCGCCAAG -TCAAAGTCTGATACGATCGCAACAAACAACGCGGGATCGATTGGCGGACAGTAGGTGCTT -TCAAGCTCCTAAATAAGGATCCAACATTAGACCAGAGCTTGACAAGCAAGCAATACAGCA -ACAAGCTCGGCTTCAATTGTGTGTGAACCAGTGAGACTTAACATACCTTGAAGAGCTCAC -TGTCGTGGTCCCCCATGGCTGTAATGTTGGATTTATTTACGGCGGTATGTAAAGTACTTG -TCTCATTCCCTGTGAAGAACTGTGAGGTTGGACAATTGCAAAGGACGTTGGCAGGAGTGA -CAAAGGTGAAAAGTGGCTTTTTTGGTGATGATCTTGATACCTCCGATAAGATAGAGATAG -AGATCAATGTGGGTCATCATGATGGGGTAGCCAATCACAGAGCCATGTGACAATCACGTG -CTGCTATCACGTGGGTGGGGTAGCCCAGGTAGGAAGTCGCATGCATACGCAGATCACGTG -CCATTGTAATGAGAACGTTTTAAACGATGTGACTGAACCTCAACATAAAGCTAACCGGAT -ATTAAGAGACAAAGGAGTTGTTCCCTGATGCTTTAGAAACTGCCAACAAGGAACTTTCTA -ATTATATGTACTTCAGCTATGCAAAAACCCCTAAATCCATACTTACACCTACATGGATCC -CTCTATCTGCTGCATGTCAGAACGGCAGTGTCTATGAAGCCAAGGCTTTTAGTTATGCAC -GAAGGAGTTAGTTTATATGCAGAGTCCACCCTCTATTGACTTCTAAGAATACTTTAAATG -GTGCTGTGCTTGTTCGTTTCTATAACTATATGTTCCGCTGGTTCTGTTTCCTCTTCCACG -TATATAGTGGGACCCTAGGTATGTAGGTTACCCATGCAAACATCACCCATCGCCATGGTG -GACAATTAGCTCTAGTTGTACTAGAATTATCATTGATTTGAACAATATGGTAGGTTCTCC -AAAATTTTAATCATCAAATAGCAGGTGACATAACGTGAAATCACAATATTCCAAATTAGA -TGACTTGATATTAGCTTGTCGTTCAAAAACCATTAGCAAAATCCTCACATACAACAAATT -GATATTCCTTAATCGAGAAGCGATAAGGAATAAATAAAAACTCCTTCTGAACCAGTCACT -CAAATGTACCAAGACTAATATATGTCCAATGTAGCAATGTAATATTTATTCTTCGAAAAC -CTGCTCATATTTTATTTACGCATAATGGCCTACTATAAAGGAAGAGGGGTAAGGTCAGAT -ATTGATGTGTAAATCTTGCTCATTGTGTCAAGGTCCATGAGCTTCACCATTCTCGCATAG -CGAACATCGGGATCTGGCCCCGAGTCCAATTTACTTCGGTGCGTCCATCCGACTTTTGAG -TGCCTACCGACGTGACGATATCTTTGAACAATTATGTCCATCTAGATATATTGTTCAACC -AATCTTCAGTCCTATATCTTAAGCAACAAAAGAAAATCTCATTATGCTCCCATACCATTG -TCTCCGAACAAAGGGAACTGCATTTTCAGCCTGTTTTTCCCAACCTTGGCGAAACTTTTC -AAGCACCAGGATAAAATTCATGGCAGCACGAAAAGAATTTCTCTGCATCCTTCCTGACAA -TCCAAATGTCCTGGCTATCAGGAAACAAGTTAAAGGGTCTGTATATACCGGAATTTCTAG -ACAGCTAGCTTGTGCCGAACACAGCTAACATTGATTAGAGCGCATTACGATGGAATCAAG -CCATTAATAGCTGCCGGAAAGCTCGTGGATGGAGGTGCCATCTTTGAGGAGCATCCCGAA -GAAGGAAAAGATGCCTTGTTTAAGGGTAGCGTTATTGTGTACTCGGCCAAGAATGCCGAA -GAGGTCCGCGCCATTCTCGAGAAAGACATCTACGCCACAAGTGGTGTTTGGGACTTGAGT -AAAGCACAAATTTTACCTGTGAGTAGAGCTTTTCATCCCCATCAATCATAGCTAATAAAT -GTAGTATGTCGCAGCTGTTCGGCAGCCGCTTCTATGAGGAATACAGATTCCCGAAATATA -TATGAGAAGCAGCACATGTGAGAGTTTGCGCGAGGTTCAGAGGTCACACTGTAGGATACA -TTGAGTAGAGCAATCCCCAGTCAGTCACTGGCTGTATGTTGTACATATAAAAGGGCATTT -TGTAGCAATTGAGGTTGTCCACTCCAATAAATGATCCGCGCTGAGGTCTCGGCAGTGGGG -CCAGAAGAGAGTCGATACATAGTGAAGCGCAAAGCCCAGAAATGGGCCCGGACCCCAGGC -CCAGAGCCCAACTAGCAGTGGGCCATAGGCTTTACCTTCAAAGCCCATTCATGGGGGCCT -ACATAGGGGTTCAACTAACATCTTGTAATCGCTGTATTTGAACCTCCTACTTATTAGATG -GTCTCTCGACTTTTAGTATTGATAATAGTAGTAGCTATATTAGTGTACGTTTCAATCCTC -AATTAATACCGACCGATATATGTTCAGATATCGCGGAAATATTGAAATAAATATGGACAT -TTATTTTAGATCTACGACTGAGTACCGCCTTAGTGAACTTCTATAATTTATCAATTCCTC -AAGTAGCACGCCTTCTCGCTTTCAATTCTTCGATAGAAATGAAATATTTGTTAATAAAAA -TGATAGAGTATTACCCTTTATTCCCGTGTACAATACATCACCCCCGTAGCTGCCTAACTA -CAGCTATGCCCAAGCGGAGGTCCAACCATACATGGAACTACTCCGTACGTAGCTTGCTTC -TTGTGAAAGGTGCTAATCGTGAGCTCCAAGTTGGAACCCATGGAAGCGCACTAGTCGCTG -TCGCTGCTACAGAAGGCCTGGAGCAAGTGAATTATCTTGTGGAGAAAGGCGACTGTGGAT -CTCCATCTCAGGTTCGGATCTTTTAGAAGTGCACTCACCGCTGCCGCTGCGAATAACAAC -TTCAAAAAAGTGAAATATCTCGTGGAAAACGGCCAAGCGGATTTGGAAACTTTGCTTGAG -TCTGGCTCTCTCGGAAGTGCCCTTCACGCTGCAATACTCTCGCCTTTCGACGCTCCAATT -GACAATGATATCAAACCTTTCGCGGACCGAATGGTGTATGTGAATATAGATGCCGCAATC -CAAACTTTGGGCGACGTTGTTGCCCGAGTGACTCTGTCTGCGAATTTTGTGCCTTCTTGA -GTTCAGGGAAATGCTTTCAGTATTGTGAAATATCTCGTGGATGCAGGGGCAAATTTAGAT -CTCGAGCTCCCGACTTGGCGCCTACGGGAATGCTCTAGCCGCTGCTGTTACAATTGGAAA -CCTTGACCAAGTGAAATATCTTGTAAGTAAGGGCAATGCGAATGCAAACTTGCAGCTTAA -GTCTGGATCTTTCGGGTGTGCCCTTGCGGTCGCTATTATCGCAGATTCCAAAATCCGGAA -GCTCCGCCCTGAGAATATTCGTGGTATTCAGCAGATGATTTCCAACCAATTTCGTCGCAG -ATATCATATCCAGAGATCGCTCCGGTCCATAACCGATACAGCGGCTCGATATATGGTTAC -TCTATGATGAATTCTCTGGTCGATATTATGGTCTATCTCCTGGAATCAGGAGCCAATGCG -GATCTGCAGCTCAATGTCGGGCCGTTCAGGAGTGCACTGGCCACTGCTGCTGCAACAGGA -AGCTTTTGGCATGTCAAATTCCTCGTGGAGAATGCTCAAGCAGACGTGAATCTCCAACTT -GAGAATGGGCTTTATGCCAATGCCTACGAGGCGGCAATAGCACATCGCGGCATATCTCAT -ACAAACAGGAACAGATTCTGAGCCTACAAATTTGGACAATCGCTCAGAGTAATGATCTTG -GATCCTTTGAGCCATACTTCTTCATGTGATATGGGAATTGACGTCTATTTGATGCTGCCC -CGAGGGCTAAACTAGGGCTAAAGTGTATACTTGAATCTTGAAGTGCATAGAAGTTTATCA -TCCCTGGTTGATATACGATAAACCTATACGTACCTCGGAATTTGTCGATCAATGGTTAAA -AAGGGTTCTCAAACTCCAAAAGATACCTTACGGTGAAGTATTCCTGGGGAGAGTGTATAC -TGCCCGTGTTCTATATGTAATATTGTGAGCTGAATAGATAAATATTGAAGTCTTACGTCA -AAAAGAGAGGCTGCACCAGGCACCGACTATTTTTGGGAATATATATCCAAATGTCAAAGT -AATCTTGATGGTATACCTAGGTATGAACTAACATTATGTTTGGAACAACTTTCAGCGTTT -AACAAGATGAGTAATTGACAAACACACAATGAGTCCGTAATTCATTCATGTGGGGATGTT -AAGCGTTCGGAAAAATAATGAAAAACACACAAGGTATAGGATTGTGGTGTACCAAGGAGG -GAGAGAATAGAAAAAGGAAACCCCCAGACGCTATGCGAAGAGATAAATCAAAGCTTGTGC -CCAAACACCACACCCGACCCGCGTGTTGGCCGAACATACCGCACCCTCTGCTGCCAGGTG -ATCCGGCAGAAAAAGAGAGGCAAGTATCAAAAAGGGCACCCCCCCTGTCAGAGCCTCAAT -TCACCTTCCCTGGTGTAGGATGCCAATGAGTGCGTAACAATATGATTCGCCTTGAGAATT -GGTACACGGGGAATTATGGAGAAACATGTGAAAGGATATGTGTAATGCATGAAAATGGGA -GAACGCGGCTTGAGTGTACATGTTCTAGGAGCATCCTGAACTGACAAAAAGCATGGGTAT -TTCCTTGCGCTGCTAAAAACGAGAAGGCAGAAGTGTGGGAAAAGAAGGAGAAAGGAATCG -AGTGAGGATGCCAGCCCCAGATTCACCACATAtcatcgtcatcgtcgtcgtcgagttgga -cgtcgtcgCCAAAGGACGAGTGCCCGTACTGGTAGTGGCCGGGGCCGTAAGCTTGGCCAT -TTTCATAGCTCTGCTTGTCACTCCTTGAATGGCTACCGTAGCGATCATGCGGATCGTCAT -CATAGTCGTACATGGAAGAGTCGTCGTAGGCACTTTCTCCGTAGTGTTGTTGCTGCTGTT -GCTCCTGTGACTGTGCGAGGTTGTTAGTGATCTTCTACTTCCCCAGGACAGGGGCCATAT -TTGCTTACCTCTTGGTATCGCTGATATTGCAGATACTCATCTGGCTGATCTCTTGATTTC -GAAGATCTCTGGGAAGAGCCAGATGAAGCAACTGGCATCTGCGGGTGCATCTGTTGCACT -TTGGCCAGATACGAGTACATGAAGTTGCTCAGTAATACCTGAGAGTACAAAGCACGCCGT -GGATTCGCAAGTTTGATATGCGCCATCCGGTAGATAGCTCGCTCCTCCAAGATCGAAAAG -CGAGACCATGCATAATCGACGTCGGCGCGAAGTACTCGCATTGGGGGATCGGGAGAAAGG -TTTCGCGACAAATGTTTTCGGTGATGGCCTTCATTGGCTTGCTTCTTTCGCCCACCGCCA -AAAATAGAAGAGAAAATCCCGTCCTTTTCTTTCTTTGAATCTCCCGACGCTCTCCTCACG -CCGTCTTTGCGACGTTCTTCATCAAGCTTCGGATCGAGACGATCCAGAACAAGACTTTCG -CGTCCCCGCGGAGTGCTCTCAATAGATGATTGTAAAACATCTAGACGTGTGTTGTCATGG -GCTTTGTCGACAATCTTTGATTTGGTCTTTTTCGAGTCGTTGTCTTTGTCCTTTTTCTTA -TCTTTTTCTTCGCTGCCCAGCAACCAGTGCCAACTCGACTTGCGGCTTCCATCAGAATCC -TTCTTGTCTTTTGCTTTCTTGCTCTCGGATTTTCGTTCTTCGGGTTTGCGTTCCTCTGAG -ACAGTAGGGATAAATGAAAGACTGTCGGTACGGGTGGTATTGCCAGGTATCATCTGAGGG -TTATTGGCAAAGTCATTAAGTGAAGATGCGGGTTCGTGGGGAGGTTGGCGAGGAATTTTG -GGTCGTTTTGGTGGCAGGTCATGCGAAGGTGCGGGCTTTGACGACGGTGTAGATGCTGCG -GCACGATCCGATGGCCCCCTACTTTGTGAGGGGGGACTTGGTATACGGTTGGATTCAGTG -GCAGGCGGTGTTTCAATTATTGCAGGGATCTTCTGGTCGCCTGCGGCAACGTTCAAGGTA -GATCGGCCATTGGAGCTAATACGGGAGTGCCATTTCCGTGAATTGGCCCCGTCTCCTGGC -TCCGATCGATCTAGAGCAGCATCCGGTTGTCCAACTGAGGCGGACAAGCCTTCCTGCTCG -GGCTCGGGCTGGTCGACAATCGAATCGAATGTCGAGCTCACGGGTGAAATAGGGGCATAA -GCTGAGCCATGCGATGAGGCAGAGGGGGCCCGAGCCGCACGCGTCGTTTTGGGAGTAGGA -TCAGTGGAGACACGGGTGAGCCCCGAATTTGTGGATTCTTGAGATGGTAGAACAACCGGA -TGTCCTGTATCACTCCCTGCGGATTCAGATGCTCCCCGTCCAACTCGTTTAGAGTAGGGC -AACTTTTCGCCCTTTTTCAAACTGCCCGCTTTCTTGTATTGTGTCCGGGTTGATCGGCGT -AGACTGTTTCCTGGAGGTGCGGGGGGTAGGATCGGCCTGTCCTCGATTCCTGTGATACCT -ATATTCTGTATGCCACGCACGAGTGATTCTTTGTTCATCGGTTGAGTATCATCCACCAAG -CTTTCCAACTGTTGCAAGTTGGGACTCAACATGCCAGCTCGCTTACTCGATTGAGAGCGT -TGATGTTCCAACCGATCCGCACCATCTGTATAACCCGAAGAATTGTCGATTTGTCTTGAT -AACATGGAGCGCTTCCGATTAAGACCACCTCCAGATCCTCCACGTTCCGAACCCAAAGTC -GAATACTCGCCTGATCTGCGCTTGATATGGTCGGCCTTGCTATCGAGAAATGATTTGAAT -TCTTTCGGAGCCAGTTCGGGGTGCAGTCGCGCAGGAACCCAGAACAACCGGGAGGCGTCA -TCTTCGTCGGCCGAGGGCGAGGGGGGTGATGCGAAGCCTGGGAGGTCAGGGTCACCGGCG -GCAGCCACATCCATCGACATGCGCCGAATCGCGGCAAGATTGTTCAGTTCCTGAACGACG -GCTGCAGGATTCGATTCATAGTCGATATCGTTCAAAGGAAAGGAGGACGTTGCAGTGTCT -GAAGGCATACGAGCACTATCGGAATTTCCGGTATCACTTTCTCGATCAAAAGACCGGTTT -CCAGCAAGCGGCGGCCGCTGATTCCCATTTGTCCGTATTGTTTGCTTATCTGAGGAGGAT -GGGTGAAGTGGTGAGGTTGGAGAAAGAGAGGACTCGGATCCTGCGGCGGCGCGCGGGATG -GTGGTTATTGACTCGCCGAGAGGGGTTGTTATATAGCTCAGGCGCCTGAGATCATTACGC -TCGTAGTCATCGTCATACATGTGTCCGATGGCTTCTGTCACATGGTGGCTTGGATCGCTA -ATTGATAGATGGGCGGCATGACCTCTCCGCGTCGCATGCCCGCCAGCCTCATGGGCACTC -GTGGAAGCCTGAACGGATACGAAATAACATTAGAATCAAATCCGCGAGTTGCATGCCGAG -ATCTTATTCTTGGGGTCCTTAACCAGGGGTCCAATTCCACAAGAATATGGTTATGCCTGA -GGGGTTCGAGGTTGAGGGGGCCGGCAAAAAGTCACGCCCACTTGGACCAAAAAAACGGAG -ACCGAGAAAGCGACGCACCTGCATGGGAAGCCAGTTGTACAGGAACCAGAAGACGTCAAA -GACGGAGCCCGAAGGGCAAAAGCTTAGGAGGAGGGGGCAGTGTCAACCACGTTGGATAGC -AAGGAGGAACGGGTCATCGAAGCGGGGCAAGAGGAATGGAGACAAAGGGCCGTAGATACT -CGGGTACTACCGAGTCCGAATGAGCGTCATGGATGgagaagaaaggaaagagaagagaag -aATGGAGGATGGGGGAGTTAAGCGGGCGGAGAGCAGATGAGGGTGAGAGAGTGTGTACTG -ACTTCCGCACCTGACATCTTACACTTCAAATTGTAATCCTTCTATATTAAGAGTTTCTAT -CTAGGAGTTTTATTTTTTTTATTCTTGATTTTAGGCCTAGTAAGTCTTTTAGGTCTTATT -TCATTGAGAACTTATTGGCTCCACTTAAATGGGGGATATGGTGGACGATCGGTTGATCTA -CATCCTCCACGTTCTATTTCTTTCTTCTACTTTGAATTCAGTGATACATGCTTTTGGTTC -AGATTCCAACATTCGACTCGAAAGTAAGTACTAGCTATCTTTCAGACTTGACTGACAATA -TATTTCTTCGATCCGTGACACCGCATTTATGTACCTCAAGGGCTGAATCGCCTCCCCTCG -GACTCATTACATTCATCTTTGACGCAAGGTCTCGATTGACATTGGGAAGGTTGATCACAC -TAAAAGTCCAAATTTACCATGAAAAAGCCGATAAATATCCTTTTGTCCATAATGTGCCCC -TATGGACGGAGTAGATGTGGTCGGACCTGAGATTCTATAGAACGTCGTGTGACTTGATTC -AATCTCGGGGACCGAAGACAATAGCTGGATATGCCGCGCACCTTAGCATGCGAGTGTTTA -TACAATTTATCAAGAACGCGAGTTGGTCCAGGCCTGAACACAAATTTCAACCAACCGTGA -GATAGAGAGAGAGCTGAGTGTATGGGATTGTATGTGATCAGACCCAAGGTGTCAGCCTAG -GCCACCCAAATGGGACTGAAAGCTAAATTCAACCTGAAATCCTTACAAGAGGCTTATGAG -GCATAAATGGAACAAATCTGGAAAAACCCCGGGGGAAATCATAATCAGATGGGAAATGCT -CTGCATAAAACCAAAAGGATCCAAAATGACAGTATCCCCAAGCCCCCAGAACAAGGAAAA -AGTATGACGAGAAACTCCACGCTGAAGAATGTAAATGGCACCTCGAACATAAATAAACGA -GAGAATGGAAATGGAAAGAGTTTGACGAATCCGTATGCGCCAGGATATATGTAGGGCAAG -AGTTGGGAAAATGTATGTTCATTAAGTCCCAGAGAAATGTGGATCAAAAGTTCATCACAG -CGTGTGCTATATGGACCAGCATTGGACACGCACAAACTCAAGAGGCCTCAACATAGTTGC -TGCGAATGTATTAGCCAACAATTCTCGAATGTGAGAGGAATGATTTCTTACCTGGGGAAG -ATGCCCACGCGATCACCAATGCGACCAGTCCACCAGTCTTCCGCCTTCTCCGTGCGCTTC -ATTATGGTAATAATTTCACCCTTCTTGAATCCAAGGTCACCCTCCTGATCCGCATCGAAT -GTGAAAAGGGCAACGGCTTGGTCTGACCGCAATTGGGCGGCACCAGTCTTTTGCTGGAAG -ACTGGCTTAGGGGCAGTTGGGCGGGTAGGTCTGCGATCTGAATAGACATATTCCTCCTCG -AACGGTGCAGTGTTACTCCGGTTGCGCCCATAGCTATCAAATGAATCGCCGGCAGGGGCC -CGGGTTGTCGCGGAACGACCAAGTCCCGAGGCAGGCCGATCATAAACATCATCAGCCCAG -GTATTCGCACGGCGTGGCCTGTCGCGGTATTCGTAATCATTTTCAGTGTTGGATCGACCA -CGCTGGGTACCTTGACCGTAGGCATCGCCCTTGCGTCCCTCCCAAACCACATCGTCATGG -CTCTCATCATAGACAGGGATATCATTGTACATGGAGTCACCATAGGCCCGCGCATTTCCT -TGGAATGCACGAGAATTCAGCACGCGCATGAGAGCATCTGCGGCCGGCGGGGGGCGAATG -CTGCCGGTGAGGAGTTGGTTTGCGGAGACTCGACTGCGGTACAGTTTCTCGTTCGCATCC -CTGCGTTCCACCAGCATACTGCCTTCGAGACTGACACCAGCGAACAGACCCTTGGTCTTT -GAGTATGAGAAGACGGCCGCAACGCCCTTCATACTGGCTGCGCCGGCTGCTTCGGCATTT -CGGCCAATCGGGCCAGCAGCAAGTGATACATTACCACCCAGTGTCAGTGTTCCTGCTTGG -GAAAATGTGCGAACAGCAGCGGAATCATTGAGAATGAAAACAAAGTCGGTCAACTCAAAC -CCGATTTGTCCTCCCACTCCGGCACCTGCAGTCGCGATGGCTGATGGAGCCGACCAGGTA -CCATCGCTGAGACGAGCAACAACGATACCGGAACCGAATCGACCGGAGCCTAGGAAACCG -GCCTTTAGAACCGTAAGGACTGCGAGACCCTAAAGGGAAAGGTCTGTTAGCCAGTAAGGT -TTGCAACCGGGGTCGGATGGGGTTTTTACCTTCGCATTGGCCAAAATCTCAGGGGGAATG -ACCTTGTCGGGACCAAATGATTGCCTAGGATCAACGAATGATGTCAAGATCTTTGCTGCC -TTTTTGCACTCGCCTGTTAAGAGAGCATCCACTGTCAGCCGGATATGCCGCATACGACTG -CGGGGAAGTGGGATTTGGAGATTGATGAAACTTACTGGCCATAGACGAGGGCAACGGATT -GTTGATCCCGAGAGGCATCTTGGGTGGCGATTATCAACGTAGAAAAGAACGGAAACTCGA -CTTGGGGAGGGAGAAAAGGAGACTGTTGCTGTCGTGAAAGTAACGGTTAGTCAGTTGAGG -TGTGGGCCTGCCCAGGTGATGACGGCTGACGCGGGACACCGGTTCGGAGGCTCAAGACGT -ATCTCACAGAGCTCCGAGGCTGAATTAATATGGAAAATTCTGGGGATAAGTTCAATTTAG -TGTCCAAGTATTGAATAAGAAATCAAACCCCCTTATTAGTAGTCATTGATACTTCATGTA -TCTTGTCAATTCTGGGATCTCTTGTCTGTGGCGGAGCGAAACGAGTGTTGCTTGGCAAAG -TGCGAGTTTATTAACATTTGAATACACATGTTGTATATGTTTTGACAAGAATAACAAAGA -GATTGTAACTAAATTAATAGTCTCCTGTGTGTACAGAGGTTGTATGGCCATTGTATAGCA -AGCATATGTGCACTTGTAGATGCCACAGGCGCCAAGGCCATTATCATATGACGATTGTCT -CTCATCCGGAACCCTAGAACCTACCTAGGTAATCCACCTCCATGCACTTCCAAGGTTGAC -TACAAGGCCATGGTTAGATTATGATCAATAAGTGACTTCGCTAAACGCGAAAAATGGCCT -TAGGTTTGTTAGATATCTCAATCTCTGAAACACGGCTAACAGATTTCAGTTCGCATTTGT -GTCTGCGGCGACGAGGGCACTGGCAAGTCCAGTCTCATCACCTCGCTCGTCAAAGGTGTG -TTTGTTACGAACAAGATCCAACCAGTCCTCCCCCAGATTACCATCCCTCCTACCCTCGGG -ACCCCCGAAAATGTCACCACAACCACCGTTGTCGACACCTCGGCCCTTCCCCAGGAACGC -AACAATCTCGCGCGAGAGATACGGAAATGTAACGTGATTCTCCTCGTCTATTCCGACCAC -TACAGCTATGAACGGGTAGCTTTGTTCTGGTTGCCATATTTTCGCTCGTTGGGTGTCAAT -GTCCCGGTGGTCCTATGTGCCAACAAGTCCGACCTTGCGACTGGTCACAGTGAGACGCGA -GTGGTTGAGGAAGAGATGCTTCCTTTGATGGCAGAGTTTAAGGAAATTGATTCGTGTATA -CGTACGAGTGCTCGGGAGCATCGTAATGTCAACGAAGCCTTCTTTGTTTGCCAAAAGGCA -GTCACTCACCCGATCGCGCCGCTGTTCGATTCCAAAGAGGCGGCTTTGAAGCCTGCTGCG -GTAGCAGCCTTACAAAGGATTTTTTATCTTTGCGACAAAGATCGCGATGGATATCTATCA -GACAAGGAGCTCGAAGATTTCCAGGTCCGGTGCTTTTCAAAACCATTGAACGAAGCGGAT -CTCAATCACATCAAGGACACTATACAAAAAGCCTGTCCGGACTCGGTTACGGAGTCGGGA -ATCGACTGCCAGGGGTTTATTCATCTGAACAAATTGTACTCGGAGAAAGGGCGACATGAA -ACTGTTTGGATCATCTTGCGAGCGTTCCAGTACACGGATAACCTCTCCCTGCAGGAGAAA -TTCTTACACCCGAAATTCGAAGTACCGCCGTTCGCCTCTGCCGAACTGTCACCTGAAGGC -TATCGGTTCTTTGTGAACCTCTTTCTTCTATCCGACAAGGATAATGACGGGGGGTTGAAT -GAAGCTGAACTTGCGTCCTTGTTTGCTCCCACCCCAGGCTTGCCCGCATCATGGGCCGAC -GGGTCATTCCCATCCTCCACGGTCCGCAACGAGGCAGGACATGTAACACTGCAAGGCTGG -CTTGCTCAGTGGAGCATGACTACCTTCCTATCCCCGAAAACTACACTTGAATATTTGGCA -TACCTAGGCTTTGAGCCATCTGACCAAAGCGATCAATCTATTACTGCGGCGTTGAAAGTC -ACACGTCCACGAAGAAAGCGTCGCCGCCCGGGCCGTGTGGGACGAAATGTTGTCCAATGT -CATGTCCTGGGAGCCCCTGGATCCGGAAAATCGGCCCTGCTCGATGCACTTCTTTCCCGT -GGATTCAGTACGACCTACCATCCTACCATCCAACCACGTACCGCAGTGAATACAGTGGAG -CTTCCGGGCGGGAAACAGTGCTACCTGATCCTAGACGAGCTAGGTGAGCTGGAGCCTGCT -TTGCTGGAGAACCAGTCGAAAGTGCTGGATCAGTGCGACGTGATCGCATACACGTATGAT -TCATCGGACCCGGACTCATTTTCATATATTCCTGCATTGTTGGCAAAATATCCTCATCTG -GAGGAGCTTCCGAGTGTGTTTGTTGCCCTCAAAGCTGATTTGGACCGGACGACCCAGCGA -GCCGAGCACCAACCCCATGAATACACCGCTATGCTGAACATGCCGAGTCCGCCACTCCAT -GTGAGTGTTACATGGAGTTCTATCCAGGAGGTGTTTGTGCATATTGCTGAGGCTGCCATG -GAGCCCAGCACCGCTTTCCCGCGTAGTGAAGAGGATGTAGAGGGTAAATGGATGTCGTGG -GGAATTGCCCTCGGAGCGGTGGTTTGTGCGGGCGCCGCCGCAGTTATGATCTGGCGACGA -GTTGGGAGCGGAAATTAAGATCATGTTCTATGACATACTAGCCATACTAGCCATACTAGT -TTATTATGCAAATAAATGTTAAATGAGATAGATTGATACTTAGGCAGATCGGCTTAGTAT -CCGGCTTAGCCGGAGTTAAGCCGGTGCCAGGCACGTTTCTTCTCCGCTCGAACGTTACCC -CGGACATCGTTTCCCTTGCTGATCCTCCCCTCGCTTGTATCTCCAAACCCCCAACCCTCT -GAAATCCGAATCCTGATATCATTATAAATATCTTATTATCCAGTACTTATATACGCCTCG -ACTTATTGCGCATTGCAACTCGCTTTCCCTGCAGCTTTACCTGCGACTCGTCCGTCAATC -CAAGCCTCAACGCCAGGCACAACACATAAGAGGCAGATATCAATATATAAACCCTGAGAC -GGGAAATTTTGGAAAGAGTGCCATCGAGGAGCTAGCAAAGTTTACGCGTTTGCCTTATTC -GGTTCCAAGGTTGAACTGTCACACCGGATCTCGTCCAAATTGACCCTGTACGAGTGACGT -CTGGTCATGTTGTAAGCATTCTTCCCTCACTTCTCGGTCTCGATCTGCAACTTGAGGCTC -TTTTCTCTACGGACATAATTTGGGACCAGTCTCAATTACCAGATATCACTTCATTTTCTG -ACACCAGCAGTGTCCTTCCCCCTCCTCCTCCGCGCTATACCATCCCTGTCGCATATGCGG -CGGGGGCGGCAAACGGTATGGCGGTCCCTGTTGTGGAAACGAATAATACTATTACACACC -CGGAGCGTGGGTGTCCATTGCAAGTCGGCGAGGGTATGTGTACTCGATCGTTGATGTCGC -CCCCACACCTTAATCCCTTCAGAATCTCAGAGATGGCTCACGTTTTATATTGCGACAGGA -ACCTATATCCTCCAGGATGATCTACTTCTCGCGACACCCCCTCCACACCCATCTGAGGCT -CCTATCATCAACCCGAACCCTCTTGCGACGCTACCCACACCCCCTACCTCTGGCGTGAAG -CTATCTCTTGTTACCCTCGACCCTCGAAAGAAACCTCCGACGTTCTTGAAATCGACTGCT -ACAGCTCCCCAGTTTGGAGATGGAAACCCCGCCCTCGCGGCGCCACCGGTGGCAGCGAAG -GACGCAGCGAAGCGAAGGAAGCCCAAGAATAACATAATCAAGAGCAGTTCTTCCTTCGTA -TCGCGAGTCATCACCCACGAGACATCTGCCAAGCGATTGGGCGATCGTGATTCGAGTGGA -ATTTTTGCCTTTGCGAATATCAACCGAGCATTCCAATGGCTTGATCTCAGTTCGCCCATA -AAAGAGGAGCATCTTACTAAAGTTCTCTTCACCAAGGCACACATGCTCTGTCACGATGTC -AACGACCACACCAAGACATCATCACATTTGGATATTGTGATGGGATCTTCTGCAGGCGAC -ATCATTTGGTATGAGCCCATGTCGCAAAAGTACGCTCGTATCAACAAGAACGGCGTGATC -AACAACTCCCCCGTTACCCACATAAAATGGCTTCCTGGGTCTGAGAACCTGTTCATGGCA -TCACACGCCAATGGTGTGCTGGTCGTGTACGACAAGGAGAAAGAGGATGCCCTTTTCACC -CCCGAGGCGAATGGCCATTCCGAAGAGGTCGAACGATTACCATTGGACGTCCTTAAATCT -GTGAACTCCAAGAACCAGAAAACTAATCCGGTCTCATTTTGGAAGATGGCAAATCAGAAG -ATTTCAAGCTTCTCCTTTTCGCCGGATCAAAGGCATTTGGCCGTGGTTCTCGAAGATGGG -TCGCTGCGGCTTATGGATTACTTGAAAGAGGAGTATGTTTGCCTTAGACTAAGATAATTA -AACTGAGTCCCAACTAACATCTTCTAGGGTACTGGATATTTTCCGCAGCTACTATGGCGG -ACTGATCTGCGTCTGCTGGTCGCCGGATGGCAAATACATCGTAACCGGAGGCCAGGATGA -TCTGCTGACGATTTGGTCTTTCCCCGAGCGCAAGGTTGTTGCCAGATGTCAAGGACACAA -CTCCTGGGTTTCATCGGTAGCATTTGACCCTTGGCGCTGTGACCAGAAGACATATCGGTT -CGGCAGCGTCGGCGACGACTGCCGCCTTCTGTTGTGGGATTTCAGCGTTGGAATGCTCCA -TCGTCCCCGTGCACACCAAGCCTCAACTCGAAGCCGGTCCAGCATCATATTGCCAAATTT -ACAAACTAACAACCGCCACCGTGCAGACAGTGGTGACAACCGTGTGCGCTCGGACTCCAA -TGAAACCGAAAAGTACAGTGAAAATATCGATCAGACGCCGAGTCACCCCGTGGAACCCCG -GTCTCGAACAGCTCTTTTGCCCCCGATCATGGTAAGCATCGATTGGCTCCCAGATTACGA -GCTGCAAAATTTTAACAGACCCGTAGTCCAAAATCGTTGGTGAAGATCCCACTTGCTGGC -TTGGCTTCCAGAAAGATTGCATAATGACTTCTTCTCTTGAAGGTGTGTATCATGAGATTG -CTTGTGGCCAATAATACTAACCCTTCGACACAGGCCATATCCGTACTTGGGATCGACCCA -GCGATAGCGTCAAGTCATGATGCTCCAGTTGCGGCTTTAAGTCCACGTTTCAGGTACAGG -ATGGCTTAAGACTTCATAACAATTGAAACATGTGATCGAACCCGATTCAACAATCTTGGA -GGCCCTGGAGGTTACCAATGGGCGGTTCAATTTTTTAGCGTTGCTTTTTTTTCTTCTAAC -ATCTATGATATCTTTCCATGTCTATATTCCCTTTTCCAATCTGAAAATATTGATTACGCA -TCCATGGATCGTTCTATATATAGACTTATTAGGCTAACTTTAGCATCAAACCTAGCCAAA -AGGCCACAATCGAGGTTTTCAACGTCCTCTACTAACCAAGCTAAACCCTTTCCTTCTTTC -CTTCGCTATCCGTTCAGATAAATGAGAGCAAATGGAGTAACAATCAACTTAAAGGTGTGA -TTGACAGTATAGTTCCAAAATTATAAAACGGGCATACCATTATCGCATTTCTCATTAATG -TAAGGGGCATATCACAGGTCCATAGACCAAatcatcagcatcagcatcatcaAGCAAAAT -AGATCATCACTAGTAAACTAACCCCCCGCTTCCAAAATATCTTCAAAGGGTAATAGCCTT -TGTCCAAACCAACCGCAATAACCACTATTATCTCCAACGAATATATCTAAGGTCCCGAGT -ACCCAGCTGCCCAGTTGCCTAGAGCCGGACTGCAAATGACCGATATTGGTATTTAGAGGA -CCAGAGCGAGCAGACCCAAAAACAGACTTCCAGCAGGGGGGATTAAGACGGCAGCGGCGT -TAGTGGTGGTAGTCGCCGACTCCGTGGCAGTGGAGGTTGTAGTTGAAGATGTGCGGCGAG -TAGTTGTCGAAGTTGTGGTGCTCGAACGGGTAGTGGTGGACATGCTCGTCGAAGTGGAGG -TCATGGTTGAAGCAGAGGTCGATCCTATAGATAGAGTCAGTGATTGTTTTCTCAATTCAT -TTGGGTGAAAAGCAAAGATTCTGCCTCGGAAGCTACGTTTTCCGCCTTTACTAACCAGGA -GTAGTGAAAATATGGGTAGAATGGGATGTAGGAGAGATAGTGCTTGATGTGAGAGTCGAG -ATGGAATGTGTGGATCTGGTAGGTGACATAGACGAAGTGATAATAGTAGAAGTAGTCGGG -GTAGCAGAGGTGAATATAATAGAGGAggaagttgtaatgaaagaagaggttgaaggtgta -gggggagaggagactgaggaagaagtggaagtggtaagCGTCAAGGTTGAAAGCAAGGTC -TTGGTATAAGAAGTTGTATATGTCCAAGTAGAAGATTCACTTGGAGTGAAAGAAGTAGTG -ATGGGAACGTATGTTGAGTAGGAATCAGAAGGTGTCTCAGGAGTGGAAGGGACAGAAGAT -AAAATGGAAGTAGACGCACTTGCAGAGCTGTTTATAAGAGAAGATGTTGTAAGGGTAGAG -ATAGAAGAAGCAGAAGAATGATGGGACGAGGTAGGGATAGAAGCACTGGTGGAGGAGTGA -GCGGTCGAAGACAGATAAGAGGAAGCAGTCGAGGAAGGAGTCGAAGTAAGATCAATGGTC -GACGCGGTGGAAGATATCAAGTACGAAGCAGACGAAGTTTCAAGTGCAGTGCTGGTTTCC -TTACCAGAGCTACTAGCAATTGCACCGGGCTCAGTGACTCGGAATTGCTGAGACTGAGCC -AGAATGCCACTGGTTGGAGAGACGAAGTTGATCTGATATCCCTTTCTATATTGAACATCA -GGTCAGTAGCTCAAATTCAATTCAATTAAACTAAACCTACCCGTCGTCTACACCCGTCAA -TGACTTTGCTTTGACGGTGTAGGAGCCTTTGGAAGAGTCTATATCTTCTACCACCACCTC -TGTTGTCGGAGGATAGACATTTTGGTTGACCAGTTCAATGGTGAATGTATCTGGGTCCGT -TCTATAAGATTCTTGATTAGTATATTTCCAGTTTTCATGCAAATCTCTGGTAGCTTACGC -AACAGTCTGCCACTTGATAGTAAGGGGCATATCGGGGTCAATCTTTTCACCAATTTGTGG -CGAAGTGACAAGAAGAGCTGATTTCACTATTAGATGTGTTTGTATTTTTCCAAAAAGAAT -TGCCATACCTGAAGCTGTAGCGGCTAGAGCCACGAAAGAGAAAAAGTTAACACGCATGTT -AATTGGTCTCTGTGATCACCAATATCAAATGAAGTATGTCGAGAAAGATATGTCAAGCTT -GGAAGGTCGGGCCACAATATATATTCGCTAGGTTGGAGGAAATCAAAGTCAGGGGCGATA -TAAGACGGAGACATGCTACTTTATAGTCCCTATGTCAGTTGCAGGAGGTTGTATTTCTCA -CAGGGTCTTGTGAAACTTCAATGGCAAGCTTTATTTTTGGGCTGATTTGCCGTGCCTTGA -TCCATCACGCCTCGTGCTCCCGGATGTGTGTCAGGGTACAAGATAGCTAGGCGAGGATCA -CATTTACTAGTCCCCGTGATGTGATACTGTGTTGCATTCAAAGGGATCGAGAGGAGGGAC -CGTTGATCCTGTAGCTTGCAACTTCATTTTCAGCCCAGATGGGAATTTTTCATTAGCTCA -ATCCATGATGTACAACGTCCTCGATTCTGACCGGCCGCGCGTCATAGTTCCTGTTGGTAT -TGCAATGAGAAATTTTATGATACGTTGCCTCTTGATATACTGAGCGAACTGATAGCCATT -GGTTCGGGCACGGGACTCCTAGGGTAATTGCCGCGTCCCGTTTGCATTCGATGGCGATTG -CCAGGGATTCTTGAGGTGCGTAGCTTTATAGAGCTTGCCAATTGATGCTAAAGTAGTGAG -CATTCCTATAAGCATAAAAAAACCCTGGAGCTTATACTTGTGTAAAGTTCCAGTACAAAC -CTCACACGGAACTATATATTTCCACACCTTTGAATCCATCTTGGACACTTGAAGCTACTA -CTCTATGATTGTTGACAATGATTGCACTCAATACTTTGATCAATGTAGGACAATAACTCC -GAAAGGCCTAAGCACACTAGAAATATTATATTCTGAGTACTGTTGGTCGCTATGAGTATA -AGTAGCACAAAGCCAAGGGAACAGATGTTTCACATTCCCAACCAGCTCCCGGGTCACAAA -CGAAAATGGCTTGGAGGTGAACAAAAAACTGCTGGTTTTCCATGTTTACTAGTCGAAAGG -AAATTGCGCTATCAGAGAGGCTCCAATATTTCAAATTGGTTACCTTGGAGACCGCTTTTG -GAGTACGTAGAGTTGTACGAAGCATAAACACAAGATCGGAACAGCAATTATTGGGCAGCT -TTTGGGGGATATTAAAATAACCCACATAGTATCCAGGTCACTTGTCGGGCGTAATACTTG -TCTTGCTGTCGTAACAAAAGGTCCCCGTTCAGTCTCGAGCATGTGTGTTTGTGCTCTCCG -ACAATTCCACTGTTAGGCTAAGTTATTTGTCGTCGTAAGCATCCAATAAAACCTCAAATA -TGACGTGGAATGTCTGAAGAGATCTCAAATAAAAATTGGGTCTAGTCATTGTAGGTAATT -TGCCTTGTGAACGGACTGTCAATCATCCGGATCTCTACCATACGCAGTGACTATACGAGG -AAACAAGCCTGCATCCCATTAAGATCAAGAAGTAATTTCCAAAATTTGATTGGTATTTTG -TGAATTTGAGGTAATACGAAGAAATCCACCTTCAAGATGATTCAACCCATAGCGAGGAGT -GCACGTCGCATTTCCACTCCAACGTAGGGGCCATTTTAACGCTCAAGCACACTGCGACCA -CCGTCGACGGTGATCCTATTCCGAACTCGAATATGGGCGTACGCTGTCCCAAGCTGGACG -ACTCGATGCGCAGTTGACTCATCTTGAGAGTCTAAGTAGGGTGTCGGAATCTGAGTGCTC -TCTTCAGAGGACGGGCTATCTGGAGTATCTGGCCCACTGGAAGAGGCGTCACCTGAAAGC -TCCTCATCTTTCTGGTCATCAAGATTTGCAATATCCAGTAGAGCCGTTGGACAAAATACC -AAAGCCTCCCCAGTCCGCAGACTAACAATTTTGTGAAAGATGGTCGACGAGGCCGTGGTA -TCTTGAGTGAGCTCCCCAATCCCAGCTCCCGCGATGTGCGACTTGATTGCTTTGAACCAA -GCTGGTGATGAGAAACGATGGATAATGGTCACATTGCAAAGCTCAAGCAGTGACGGTGCA -AGAGTTGGCTCCTGGGTTGCTATCATCACGCGTGCAGCAAGATGTCGTTGCTGGCGGACG -ACAGATAGAAGCGTCTCTGTCAGATCTGCGGCTTCTGATGAAGTTGAAGTCATAAACTAA -AATTGAATCAAGTGTCAGTATGGAAAAGCATGAAAGATTCAAGAAATCGGGACAGATCCA -CGAGCCTTGTGTGCTTCATCCAGGGCGATGATGCGACCTGCGTCGTGACGGTTCTTCAGA -AACAAGGAAACTGCGATATTAAAAAGTGCGCATGCATCCTCCTGGCCAACAAAGGGGCAG -CTCAAATCGATGATTGTCAAAGTGCCTGGAGGAAAATTCCAGATATCGTCAGCAGCATGC -TTCTTTGGCTTATGGGGAGTTGCACCAAGCACAGATCCAGGCTCGAAGAACGATTCAAGA -ACCTCCAAGCGCATAGTAAGAGGCATAAGCTGGCCTTTCAAGAAATTCTCGCTCTGAAGA -AGAAGTTTGAATGCGCTGTAGTTGAATCCGGATTTACCCTGGTTGGCTATGGCCATATCC -CGAAGAATCTTCATAACGACCTACATTCAAAGCTTATTAGTACATTTCCATTATGGATCA -CAGGTGTTCTGGTATACCTCAATGTAGAGAGGCTGCTCGGTTCTGTTGTTGATGCCCATC -AGGGTTTTCATCATGCCAATATTCAAATCCTTCTGCGGCAGATACAATGGCTCAACCTTG -AGCATTTTCGAACCACCTGCCAGTGTTTCGTATGCCTTTTTCATTGCAAGGTAATTGGTG -GGGGACACTAAGACTCGGACAGGAATGTCTGATGAGTGCAGATAAGCCGCCTCGCAAAGC -TGCGTGCTGGAAAAGGCTGTGAAATTGTCATAGTGAATAACCAGACCAGCAAGAGGTGAC -GATAGCTTTCCGGCAGGGGAGGATGCGATCAACGAGTTTTCCAGCAGGCATGATAGGGTG -TGACTCTTGCCACTTCCTTGGCTGCCGCAAATGAAGGCAGACCACGGCGCCGAGACATTG -GCATACACAAGATCATCCTGTGCCGGTGCGTTGCCAACAGATCGATTGCGCAAACCAAGA -AGTCCATACTGCGGATATGTCCGCTCCACGTTGGTAGTTTGTTCGGCATTCATACGGTCG -TTATCTAAAATCTCACAGGCGGAGAGTGAGAATAGGGGTGCTGTTATGAGCTCCTCCTCT -AGGCGGGTGTTCTCGTATCTCGAAGCATCGGTGCCAGCTTTGACCAACTTCATATGCTCC -CTTATCTCCAATGTGTCCCTGCAAAGATGAGGATATGCCATAGTGTCAAGTGGTCACACA -AGCAAAGCTCTTGCTGGGGCTTGTTGGTGATCGTGAGAGATTGTTGCTGAGGGGTTTGTG -GGAAATGAGCGATGATGAGGGTTAAGCGAGATTGGAAAGACAGCGATCTTTATGGCCAAA -TACGGACAAAGCCTCAAACACAAAGCGCATCTTGGTCAGGCATCTGGGAAATCAGAGATC -CACCAAGCATCCGTATTTCCTTTCTACTGCTGGAGGCTTCCTGGCTTTGTCTCCCTATTG -GCACATGTCGATTTCAAGATGTATCTATTTCCAGGATTGTATCATGACTTTAAGCAGGCC -CTACTCAATTCTACCTGCCGTAATACTCAAGATACTTTCCATTCTTTCTTATCGAACCCC -CCATCCCTAAAACCTCTTTCGAGTCACGACAATCCTACCTGGTGAGGAAAAAGAGCAGTT -TCTTCGTTTCATACACAGGATACTAGTTTGGGAATGAGTATATGATGAGGCAGCCTTCAG -AGGAAGATTTATTGTTTCCAGGGAATCAGGTGAGTTGACGCAATGAGATGTGTATGTAGG -GAAGATAACTGCGCATATGTTTACCTCTTCTAAGGTTTAATGAACTTCGCTTATACTAGT -TTTATCTTATAGTACAGGTTTATCAGTTTCCACACATCATTGTCAATAATTATATTACGG -TGTACGTAGTTCATAGCTGTCCCCTTCCCTACAAATGCTTTTTTCTTTCCTTAGTCCACC -CAAAATGATAAAGATAATCCCAGTTTAGCTTTTTTGGGCCCTTGTTTTCATAGACTTTTG -CATTAGCTACTAAAGGAAAATAACAGTATGAGCCACGTTGCAAGCTCTTAGACTCTCGTA -TCAAGATCCTAGCCGCATTGTATTCCCACATTATCACAGCACCAGGATTGCACACCTTGT -TTAATGTTTCAGCACGCCGTTAGTATTCTTGGATTCGATCAGTTGTATGAGACAGAGTCA -GTAGTGCTTGCATCAGAGTGACTATCGTACTGAGCATCTCATCCATACTTCTTGCAGATC -TTACTCTGTCTATATCGCACTCACCCTCACCATTTCTATCAATCACTTGTCGATATTTAC -AAATATGATTTGATCCAACATTTAACCAAGCACATACGACCATAGGGTGTGGAAAACAGG -GCTTCCCGTCCGCTCAGCCGTACTTAAGCCACACGCCGGTGAGTTAGTAGTTGGGTGGGT -GACCACCAGCGAATCCTCACTGTTGTATGTTTTTGCATGTTTTGTTATTTTTAGTCTCTT -TTCGATCCCTTCGAACCAAACGGGGAGTCGCGTCGTGTGAAGCTTATAGTTGTTCGATAT -CAATCTGCGCATGCCTATATAGAAGATTTTACTACATAAACGGACTCATCTGACTGAAAG -CTGGTTTAGCGGCCTACTTGTGAGATCCTGGAAATAGAACGAACAGTGACTGTTCAGACC -TTACATTGTAGAGTTCAAATGATTTTTCTAGGGTTTTTTGGGTTTGAATTGCTTGTCCCG -TGTCCTTGTGTTCCTCCACTCCTGGATAGAAGACGGGGACAACGAGCCCGGAAAAATAGC -GATTGATCAAATTTTGTGTTGAGAGCGGAATATGTGCTTTGATCATATGTTGATATGGCG -CGGAACAAATGGACCAGTCCGGTCAATGCTCGAACCCTAGTTCGTTCAGGATGTTTCTCA -TCATGTGTTCCCAAACCAGTGATTCATTTTCGAGACCCTTGCCAAGGACCTTGGGGGTGA -AGGTGATTCCCATGCTCATCAATTGAGAGATGAGTCTGCGTTCTCTACGAAGAATTGCGT -AGCGTAGAGCCTCGGGCCCCAGCCGAGCAAGGTCACAGCCATAGTCAAAGATCAATCGAA -GCAGGTTTTCATCACCGCCGATGAGCGCAGCGTAGAGTGTATTCGATATCTCGGCTGACC -AACCAGGGGTTTCGACACCTTGAATTCTTTCCCTGGTTTTATGCGCTTGAAGGAGTTTGG -GTCGTGGTCTATCAGGCAGTTCATTCGCACATACATCCGGAGCGAACCGCAAGGTCGGGA -TGAGACTTTCGTCATTCACATCGGCACCATTTTTGAGTGCATATCTGACAACATCTTCTT -TGCAGTGCCAGAGTCCCTCATACAGGATAATCTGTGCAGCATCTTTGACGGCAAGGAATT -GACGAGGACGTTGAAGGAAAGAAAACAGAGCCTTGAGTACGTCCATATGGCCTCGAGCTG -CGGCAAGGTGAAGTGGCCGTCGCAGCTTGAAGGCACCCACGAACGCACTTTCATACCTCG -CAAGTAGATACTGAACAATCTGCACATGGCCATTTCCTGCGGCCTGTCTCATCGGGGTTT -CGTAGTGTTCGTCGAGGATGTTAGGATTCACACCCTGCATAAGCAGAAGATGGACGCAAG -ACATCGAACCAATCTCGGCTGCTACAGTCAATGGGTGTGACTTCGGGTGTCGTGGGATGA -CGTTCATAATCTCTGGGGCATTGGGATTTCTCAAATGAGATCGGAATCGATCTCGCAGCG -TAATCTTGGCGCCGGCCTTGATACACTTGCGGATGGTGCCAACAAGGCCTCGCTTAGCTG -CCCACAGGATAACATAATTGTGTTCGTGTGCCGTATTGGCCTTGTAGAGCTTTTCTCCAA -ACATGGCCGTGAAACGTGGGCTTGTGCGAATCAAGGCATTGAGGTCTTTCTTGGAGTCCA -GGTAGCTGTCAATCATGAGAATGATTTCTGTGGGTAGGTGGACAAGTGACATGGTTGCTG -GCACCAGATGCGAGGTTGAAGTGCCGAGTACTGAGGATGTGAGAGAGATAACCGGGAGAG -AATAATCACAAAGATGGAAATGCCACCAGCTTGGATGAAGACCGAGCAAGTGTCAAGAAC -AGAAGAAGATATTTGAATGCATGAGGGATAAGGGTGAGAGATTCAAAATTGCCAAATTga -gaagacagaagatagaagagagaaaagagtgaagaagtgaagagtgaagTACTGATACGA -CCTTCAAATGATAGTAATGTGATTTTGATCAGATGAAGACATAATGTCTAGAGGATTATT -TCAACAAACAGATTGCCTTTTCTTGTCGGGTCTCTCTCCTGTAGTATTCACTTCAGGTAT -TTTTGGGGTCTTTGAAATTGGATTGAATCTGAGAGACATGTGGTAGCCACGGTAGTAATA -TATGGTATATAAATGAGCTTCATAGCCTTGAAAGGCAGTTTTGACCGAACACCGTTTGTT -ACTATTCAACAACTGAATAGATCCTGGTCTCCTTTCTAACATTGTGAATTGGCGATATGA -TTCCTTGCCTGAGATCTCAATCTGCTACTACAATTACTTGTGCACCTCACTTAGGGATTT -AATACTTTTTCGGGCCTGATTATGGGGTATATATTAATTATAACCCTCAGTAAACGCTCT -TCCCGATTGAAGTCAACAAAAATGCTCTAAACTTGTGTGCTTGTCCATTCTGGTTGTCTG -AGAGAAGATCATAGCGTTTTTCAGGTGTGTGTGCACGGAGAGACCGTTCAGTATGTACCG -GGCCGCGTGAGAGGTGGTACTACCAAACAGTTGAATCCAACCCATCCCTAGAGAATGGAG -AATTTCAAGTGTTAGATGTGAATGATACCTTGCCACAGGAATTTTGACTTACCTGGGGCC -ATAACTCTGGCTGAATCATCGTGCACACAAGCCAATTGCTCTTTTTACTATGCAGAGAAA -TATGTGAAGTAAACAAGGAGATCTGTGCTCAAATCTCCAAGCTATCTTTGATTCCCATTC -TTACCTGCTGCGTCTGCATAAAAAAGCAGAACAATGGATTAGAAATCCTTGAACCAGGGG -GGATAGACTCAGGACATCATATTGTGGGAAGGAATGCGTCAAGTTGGGGGACAATTTACA -TTAGAAACAAACGTATCACCTGAAGTTCTTCGTGAACTGAATATAAATCTAGCTGGATAG -ATTCCAAGGTTGTTTCCACGTTCAGATGCTCGGTGTACATGATCTTTTATTGAATGAGGG -AAGCAAAATGAATAAACCTACTAGTTGTTAGACCTTGAGAGATTGCGATAGGTCTATTAT -ATATGTTCATGCTGCAGTATATAAATGCCCCACAATGAACAAGCATACAACCAAAGGGCT -CGCACGCGGGGGACTCGACCAATTTTCTCACGTTGTTCGATAGCCGGGTGCACTGCTACT -TCGTCGAGCTTAGAAGGGTGCTCTGGTGTTTATTCTGTGCTCTTGACCGTTCCTGATACT -AATTCCCGCAGAATAGAGGCAATGAATTATACTTTGGTACCTCAAGCCATTATATATTCT -CATGGATTGAAAAGCGCATTGACTTGTGTATTTGACCCAAGACACAAGTTCTCGTTCATC -TGTCCCAGCTCTCGGGGTATATTCAAACCATGATGAGGCACAAAGTCACCAAAGTAGTCA -ATCAAGGCGGCTTCCACCATTGAGCGCAGGTGAGGGTCTGAATTGACGACCCATGCTGCA -GACAGACACATTATTACATATCCTGCCCCAACAGGTCTGTGTATATTGGATTTTTGCGCA -TGTACGAGAGTCTCTATCACCAGAGATCTCGATTCTAATATGCAAGCGTAGTCACTGGGA -TTCAGGGCCTGAAGGATGCAATTGAAAAATGTTGTGATTGCGATGCCCATTCCGTAGGTA -CGGAGATAATGCGCTCGCAAGATCCTGGTCATGAAAGTTTCTGTCCTAGGAATGAGCTCC -GATGTTTCATCTTCGACTGTTCGTTCTTTCAATTCTGCCAGTATCAATCTGCATTTTTCG -TATATTGGTCGGACTTCGTTTTTCAGGGGGGTTAAATCTTCGCCGTTGCGTATAGCTCGT -TTGCCGCGTTTGATAAAGACAGGGGCTTGTGCTAGACATCGCAAAAGCTGGCCTTCGGGT -TGGTTTAGATCGAAGTCATTCGTGACTAAGGTATCCCACTCCTCGGGGCTTAAGTCGATG -GAATCATTGTATAACCCTTCGAATAACTTGGGGGTATAATCAGCTTGGTTTCGGAAAATC -GGGGGAGTCTGGGATTCTTTACCACACTTCCGCGTAAAGAGAGGAATAATTTACGCTCAA -ATTCATCGCGTGGACCAAATCCCTTTCGCGCTTTTAGAATGCTGGCGGCCCCTTGTGCAT -GTCCTGATACAATTTGCCCGTTATTTCCTATGAATGTTTGGCAGATCAGAAGTAGCATCA -CAGCACACAGTGTGCTGGAAGCACTCGCCTGGGGCGGGTTATCTAGATGACTCCTTAGGG -TTTTGAGCGCAAGGGAATACTTGGCAAGTGCCTCAACCGAAGCTGGTTGGCGGGTGCAGA -AGCCTGCGTGGATAGTAGTCACAGCGTCAACCGCATGGTCAAGTGCTTCGTTACTACCAA -GCCTTGGGGGAATCTCTTCCAGATAGATGCCGAAAGACCACCACATATTGTATCTGAGGT -CAGTGGATCGTTTTATCGCGCCGACAAACGAACTGGTCAACGATGTCATTCCATTGCTAG -GACACATCTGAGGGGTTCCAAAAGAATTATTCTTCTTTTCTGCTGATTGTGTCCTCGATA -TGACAGTCATTTGGCTACTTCGATTTGACTTGGCGGAAAATTGTTCCTGTTTGAACTTGA -ATCTTTGCTGACCGGACCCCACGCAGGTCACTTTCAGGCGAAGACATCGTCCACACGCAG -GTTGTTTTTCGTCGCACTTGACATCTCATTAGATAGTACTCTGTGCTTTTGTTTTATCGT -AAGTACCTTTTTCTTCTGTTTTCGACACGCATCACAGGCCCGGCCTGTTGGAACACCCGG -CATGGTGGGGAAGGCCCAAGTGTGGGGAGCGGAGCGTGAATCTCAAAACAGACACTGCAG -GGCTTCAGGTACGGGTCGATCTAGTGTGGAAGCACTCTAGGCGACTATGAAATTGTGATG -TATTGCGCTAACAAGAAGGAGAAAACGCCTCAAACGCCCAATTCGAGGTTATTCGGGCTG -AGAGGTCTAACCGCTTTGGGTCAAATGTACCGCTCCGCAAGGCTGGGTTGGACGGCGCTG -ATTGGTCAGGCGATGAACTATACGCTGGGGCTGAATTGAATCTCATCGAGCATAGCTCTG -TCTAACCAGATAGGGACATATGCAGCGCTTCATATGTGTAAATCAACACGGTGAGAGAAT -ATATAAATATGCACTCCCCCAGAATCATCCGCATTCACAATTCATGAGACTTCATCATGA -CCTCAAATAACACAGATAGAGAGACAAACCTCTACTTGTACAATCCGAGTCATATATTAC -CAGCTATTTTCGCCGCTTTGGTCGGATTGTCTTTCCTTCTCCATGTCTACCAAAACTAGT -AAGTATTGAGAATTCAACCTGAAAAAGACCAAGGTTCTTCTAACACCTACAGTCGCTATC -GATTTTGGCGCGTCACCTTCTTCATCTCCTGGGGTGGAGCCCTTTTCACCGCGGGCTGGA -TTCTCCGCTGCATATCGAGTTACCACCCAACAAACTTGAACTTGTATATCGCATCAACAG -TCTTCATCTATGTCGGCCCCCCGGTGTACTCGGCCGCTGCATATAATCTGATTGGACGCC -TCATGAACTACTTACCTATGCACGCGGTCCTCAACCCAAATCGTGTTCTCATCTTCTTTG -TCTATGTGGGCGCTGCTGTGGAAGCAACTACAGTTGCAGGAGCAGCCAAGAATGCATCAG -CCGGTTCTGATTTGGACGAGTACAAAACCGGCGGGATTCTCGTCGCTGCGGGTCTTGTTC -TCCAAGCAGTTGTTGAATGCTGCGTGATCGCTGTCGTTGCAACTGTCCATGTCCGCTGTT -CTCGGGCACGAATGCTCTCGCCAAATGTCCGTACAATCTGCATCACACTCTACGGAACAT -CGACTCTTGTCTTGCTGCGTTGTATCTTCCGAGCGGTAGAGTCATTCAAGAAGTTCAGTG -ATCTCGACTGCCGGGAGAACTGTGGATCTATTCTCAGCAACGAATGGTATCTGTATGCAT -TCGAGCTGGGCCCTATGCTTCTGTATACTTGGTGGCTCAATCTAATGCATCCTGGTCGGC -ATCTACCTCGTCAAAAAGTACACTATCTGTGCCCTGATGGTCGAACAGAGCGTATGGGCC -CCGGCTGGATTGACCGTCGGTCTCAGTGGGAGACCTTTGCTGATCCCTTGGATTTGAAGG -GGATGCTCAAAGGTACACCTTCTCACGAGAAGTATTGGCTACGACCGGATGAATGGCCTG -TGTGCAAGGATGGTAGCTTTGCTATGGGGACTGCTACCAACAAGCGGATTACATTTGAGG -ACGAAAAGGACCTTGCTCCACTTAGTTCTTCTGTTTGAATAATGGGTGTGTGATCTGATT -TTATGTCTAAATTTTATTTCCAAATTGTGCTTTCTTTTTTGCAGGTCTTTTCTTTCGTGT -GAGACGCGAATAATTCCCATTATGTTTATATAGACTTAGACCTGAGCCTCAAATTATAAG -GTGAATTCGCCTTTATTATATTGCGCACAAATGGGTCTATGAATTTCTTAATTTGAATTC -TTCAGAAGGTCTAGTATAAGTATTATGTCGAGTCCGAAGAGACTGGTTGCCTTCCAAGCT -TATCTTACCTGGCCGTACATTTGCTCGGTGAACTTCTGTGGGGGTAGGCTTGGTAATGAC -CGTCCGTCTTGTTTGTTTTATATTTTGGGCGAAGCAAATTAGATCTGGTGAGCATAGCAA -CTCGGTGTCCTAGTATAAAGGCTGTGCCCAGTATCATAGATTATCACTATATTTTGGCAC -CGGGCGATATTCAGCAGTGCAACCAACTGTCACAGTCTCTATTCTTGTGATCTTGTAAAT -CCTCCATGATATGTGTTTTTCATTGAGCGTGTATTTCTAAAAGAGCCCCGTGAAGGTGAT -GAACATCGCTTTCAACTATCGTAGCATCATCGCACCCAGTATTTACAATTTGGCTTGTTC -AAGTGGAGTCAGTCTTCTAAGCGGAGGACCTGCTCTGCTTCCATCTCCTGCCATCAGCCC -TTCTGAGTCATCTTGGGGCGGGTTCATCCTCCACTTTTGGCGCAAAACTTCGGGTGCTTT -GTTCTCAAAGCAGTCCGCGATATATTTGCCCAAAATAGGAAGGAATTTGAATCCACTAAA -ATGAGGTTCGTTAGTACAGACCCATAGGATGAAGAATGACAATTTCGATTGACACTTACT -GTCCTGCACCACCAGTAGCCATGAAGAGTCCTGGGATATTTGGGTGGTGGTCGATAATAA -AGTCACCATTGGGGGTGTCCGTGTACCAACAAAGTCGACGGTTCATCCAAGGGCGATCAC -CCAGCTTGGGGAAGAACTGACGCAAACCCTTTCGCAGCGATTCATCTGCGTCATCAGGCA -AATATCCAGTGGCTGCATTGCTGGTTTCGAACTTGGGTGATGATAGCGATTTCTGAACAC -CGTCCACATTGACAGTAATATCAGTTGCATACCCGTAACCGTGGCGCGCAAGCTTGAGAA -CATTTGTGTCTGGTGTGGGTGGGAAGGAGAACACGCCGGAGCTCATGTTGATCATGACCG -GGGTCTTTCTCATCTCAAGAGCTTCTTCTTCGGTCAACTGAATGAAACCAACAGGCTGGC -CAGAAGAAGATGCGGCATTGCCCATGTTCAAAAGACGGTTAGTCCACGCGCCAGTGCTCA -AGATCACCTGGGAAGCAAGCAACACCTGCCCTCCGACAAGGTTGACGCCAACAACGCGCG -AGCCCACCCTGCGTAGAGAAACGACAGTGCCACGAGGACCAGCAACGATCGAAATGCCCG -CCACAGTACACTTGGAAGCTAGCTTCTGAATACTGCCTGCAGCATTGGCCCAGCCACCTT -TCGGGTTGTGGTATCCGTTCATGCCGGAAAAGTCGGCCTGAACACTAGGGTAGATTTTGC -GGATATCCATGGCGTCGGTGAGGTCGTCTAATTTCCCACCTGCCGCCCGAATGATAGCTT -TAGATTTCTCCATATATTCGTTCTTGGGCGTTTCCGAGAACAAGGCGAATCCCGATTCGA -AGAATTCATCTTTGTACTCGGTGGTCCATCCATTGTATGCCTCACGGGCCATCTGAGAGT -AGACCGGGTCTGCGTAGTCGGCGCGAATAATGCGCGAAATGTCAACACTACTTCCATCTG -CTGATGGTGGGACAAATCGATCGACAACGGTGATATCGGTGTATCCGCGCTTGTTGAGTT -CAAGAGCAGTTGAAAGACCGAAGACGCCAGCGCCCACAATCAGGATTGAGTCTGTCTTAG -CTGTGGCCATTTTGAGAGACAGATTAAAAGCCAGAGAAGTGTTGTTAAACTATATCGGTC -GGAGGATGTTCGTTATAAAATGCCGAGGCGGGTTCAGTACCGGGTCTTGGCGTTCGGAAA -AGTCGCCGCCCGGTCCGTGTTCCCCGGGTGAGAAGATGGATGAAGACGCCTTTAAATTCC -AACTTCACCTGCAGACGTTGGATTAACACTTAACCAATCCTCAAATGCCTTCCCGTAATC -TTGAAGCTGGAGGAACAACGTGTCACCACCGCTCAACTCAATATTGGGATCTCCCGAAAG -CCCCGAAAGGTCCATTCTAGGTGAGGTTTGAGTCGGCGCTTGTATGGGATCCTCAGTCTG -CTCCGGTGGATAGGTGACACACTCGGAAGCCACTGTCTTGTGGCCTGAGATTCCAGCGGA -TCCGATCAAAGCAAAGTCACGCTCGGTGATAACCCCAGCGGAAGGGTCCCCTGAGCATCC -GATACTGTCTTTGGCTAGTTCCGGGCCGAAGATGTCGCCGGCGGGGTCTGATATATTGCT -TGCATGGGCATATACTAAGATTTTCCATAACAGCTGCAGGTTAACAGACCACTTCTGACG -AGAATGGGTTCCTCCTGGGGCGATGCTGTCTGTCATAAGGCCTCCGGGAGAAACGCTATC -CTTGAGTTGTTGCAGTTGTTGGACCTAGCGGTGATATCAGAAGTACTGCACCAAACCAAG -TGCGGAGTGAAAACTGACCTGACGATCGATGTGTGGCCACCGACGACCCATGTTGCGTAG -GAATCTGAGGCATTTCTCAAATCCCGTTTGCGCCTTTCTGTTAAAAGCTTGATCCTCGAC -AAAACTATGTTGGAGGTAGATCGTAGCGACAATTGCCACGCACTGCCCTATAGTCGGATC -TGACACCTCAAAGCTTTTCGTCTCGATCAACTCAAGAAAATGAACAACCCAACCCGAGTG -GAAGGTCAGCTGTTCGAAGGAATTGCGGAGGAATGATTGAGGCATTGTCCTGCGAAAGTT -CCGAAGTCGCATAGATAGCAGAAACGGATGGTTTACTAAACAAGGCACCGCATGCCAAAC -CAACTGGAAAAATAGCCATGGGCTCCAGTAGTCACGATGAGCCTGAAGTTCGGCAGGGGG -ATAATCTTGGAAACGACTTGCATGAAGTCGAAATCTCAGTGGCATAAGGCTCTCGTGCTC -GCAGTGTCGGAAATTTATCATAGTATAATCAGAGTTCGGAGACCAGGGGGGATGAGCATC -AACGCCGACGTGACTGGCAGCATAATTACATGCCATCGCCCACAACTCGCTGGTGTAAAC -ACTGCTACCAACGATGCCAATGTCTGGCTTGGAGTCACTGCCGTTACCACGGGCAATGTC -TTTGTTGATAATCATGGCTCCTAACGGGGAGACGATTTCACCAGGCTCTGCAAGTAAGTC -CGAGCTATTTCCGGGTGGGTGTAGGGACCCCTGGAGATTTTTAAGCATGACCAGGGCTAT -GTAGCAAAGCTTGCGCTCATCACGCTCGGTTTCCAAGTTGTTTAGAGACTCTAGACCACT -TATTCTGAGGTTTTTCATCAAATATGTGGCGAGTCTAGTGTAAGATCCTGCACGAATGAT -ATGACCCGCTAGACAGATAAAAGAGTTAGCAATGAACAAAGGGACATAGACCAGGACTGT -TTTACCTGTAAATTCCAGCATACTTAGAAGACAAAGGGTTTGGATTGTAGATAGCTCCAC -AGTACCGGATGCTAAGCGCATCATGACCATCTGACAGGCTCTTTCAGTTTTTTTCTTGAT -CTCCATTTGTACATCCAGGTCAATGACCCCTGGTCTTTTGAAGCGGACCCCAAGAGCTTC -AATTGCGAGTAGAAGCTCAGGGTCTCGCTTCCCCAGGGAGGCCAAGGAGGATCTGTGGGG -GAATAACAGAAGGGGCTGAGCATTGCAAAAGGTCAGATAGAGCTGAGCATCGGGGGTTCC -GGATGAACCACTTCTATCTCATTCATCAATCAAGACATCCGACAAGAAAATTTCAAGCTG -AACTCACATAGAAGAGCTCTCAGGAGGGGAAAATTCTGGTAGCGGGTGATTTGCTGGGGC -AACTGGCGAAGAGAGGTTATTTGATGCAGGCCGCCTTCATCCTCAATCAGCCGAATATAA -CATTGTATAGAGAAAATTACAACACACGTAATATATTCCATCAACTGTTCCAGCTTCCCC -TCAATGCCGGAAATGCGTCGTTCCTGTACTACATTAGAACACGTTTTGTTCTACGTATAT -AAGCATCAATGATACTTGCCATCACCTTCACGGAGTGCCCCGGACCTTCTTCTCCAGTTT -CAGGGCCTGCATACACGCACTGTTGACCCAATCGCTCGCAATATGAGCAGCTCGGTTTCT -CTCCCGGGCAACGCGACTTTTTCCGGCTGAAAAAATGTCAGTCCAAATGAGCTCTTATCT -TGGGACAGTCCAAGCTCACCGACATGGTTCACAAGCCTGACGTGCTCGTTTCGATGGTCT -TTCATCTTCAGTCGATGCCATTTTGCGTTTGTCACCATAAAATGTGGTGATATAGGTTGT -TCGCGGGTAAAATGTCGATCCAGGGCGGGGAACGCGTGTGGGTCTGAAAAGAACTGCATA -CAAAGCTCCCCCAGCTTCTCTTGTACGTCTATATGACCGAGCTGCAACGTGCCCTGATAC -GTTCTCTACTCTGCCATCTGAGGTCTCTTTGTTAGCTTACCAAGAAGCGTGATTAACTGG -AAATTTGATGGGTACAACTTTATTTAAGCCCGGGATGCATTTAGAGTATACTCCCAGTCA -GGGTTGATTATATCTAGCTCTAAGGAGCTCTGACAGTAGAAACAATGACAGAAATAACAG -CCGAGAGCTCAGTAGTCTTCAATAGGAGAGCTAAAGGGAAATTAAGACATCTTCAGGCTC -AGCCCCATACTATCGCAATGATATCCGAGGCAGGGGTGACCAAATTTACTCGTCGTAGAT -TATGCGCGACCTTGTACACGGTGTTCTATACTTAGAAATGGTGTATTGGCAACTCGAGCA -ATGTGAACCCATTCCACTTGGCTTTTTTTGTCTTTTGCCAAGTTGTAATGAAAGATGAAG -AGTTGAAATGTGCCGTATAGTGAAGATCGACGATCATGTGATCCGTCACTACCATAGACT -GACGTCGGCTCCGATTCCCCAGCCCTCGCAGCTTTTGGCCACTCAACTCCGCCTCACTCA -TTTCTGTGACAGTCTTTCAATCCCTTCAGGCTCTGCGTTACTCCTGATAAACACTCCGCA -AGAGATTGAGGACAACTTTAAAGGTATCCAAAATGAGTGACGCCTACGAACGCGAGCAGT -ATGTCCCCCTTTCTCCACCCTGTCCCGTCGGACACCCCTATGCAACAAGTCATGTTTTGA -TTCGAGTGCTGACACAATGAAACAGGCAGAACAATGCGCTTCTCAATTCCCTATCCTCGA -AAGTCTCAGCACTCCGATCCGTGACGATCGATATCCATGACAATGCGCGGGATCAGGACA -CACTCGACCATACGGTATGCTTATCAGCCTTTCTCTGGTCCGATAATAATTCAGGATATT -TACTTGTCACCTGAAGATACTTGTGACTGGTGATTTGATCTGGTCAAGTTGCGACTGACA -ATCATGCCTTAGAGCGATGTGTTCTCCTCCTTCTCAACAAACCTAAAAGGCAGCGCCACT -CGACTCACACGGATGGCGAAACAAGGCGATTCTGTGGCTGTGATGAAAATTGCGGCCATG -TGTGTTGCCGCTGGTGTCGTTCTATATATCATTCTCGGGTGGATCTTCTGAAGGACTTCT -TGCGTCTTTCCAACCTGCCGCCTTCTCGGCTCCATATCACCCACGCCCTCAATATTCGAT -GCGCTTGCTGGACAGAGACTCGTATTGAACCTTCATGATCTGACCTTGTCATGTTATCTG -ATGGTGCTGGGTTGTCGTCATCTCACGGACCACACCTGTGCTTCGCTCAATCTTAACATG -TCATGGCATGTTAGTTTGCAGTCTCTACTATGCTCCAGGATTTTCCCATCGTCCTCGATA -TACTCATTCTATTTTCGGGCTGTGCCTGCCATAGACATGATGACACGACTCCTACTACCT -CTTTCGACCCATTTTGGTCATCCTTTGACTTCTGGGCTCTCTTGATTGACGTCTTTATGA -TTTGCAAACCGGCCCAAGTGGTTCTGATCTCTAACTTGGAGCAGCTTGCTCTGGATCTTG -CTCTAACTTGGCATTTGTATCTTCCCATCTTTCCATGTCATTAATACTTGTGATTACTCG -TGAACATAATACATCTTCACTCCAGGCTCACTAGATCTGTTCAATCATGCTCCAGTATCT -ACTCCTTACGTAATCAAAGTACACAACGGGTCCGGTGGATGTATGTACCTGGTGGCCAAA -TTCATGAATGTCATTAAACGGCATCCGTCGGTTCCTCGGAAGGCTCTACTGACATCACCA -AGTGGTCCATCACCCCAGATATATTTCGGTCTACCCCAAAAAACTCTAATAGCCGATCGT -GAGTCTAAGCTTACCAAGATCGCGGTCAGAAACACTAATCAGAAACGGAACCCTGGTACC -ATAGAGGCAATGAAACATTGAGGTTTCTTTCGGACACTTGTTTACCCCGTTTCTTCTCAT -CACTCGACAGGATGGAGGACCCGACCGGGAAAGTCTATTTGTTATGCTCAACACTTCCAA -GGTTAGATTGTGTCGTGTGCCACATTCCATGCTTCGACGTCACAACGCTGTTTTCGGCAC -AGGTGCCTTGGACAGACCTTTGCTCGGGGCAACCAGTCTCATGGAAAAGTACTGTACTAA -GTTTACAGTTTGATCGCGATTCGTAAGGCATGCATCGTTGAATCTCATTGGCTCTTTGGC -CGCCACATGATAGAAACCTTGTGACCATGCCATCACGACAGCAACAAACGGCACCCTCGG -AAATGGATTTTCATCGCACAATTCTCAAACTTCGTCACGGTCAGCCTAGGGATCATGGAA -GAGCTATTTTTGGCAGAAGCTGTCACTGGTGCCTCGGTAAATCTCGGTTGAAACATTGTG -AATGTCGTTGATCAAACGTCCCCAGTGCAGAGCATCAAGATCCAATACTGGACCAAAAGC -TATGGCATTACTATTTGCATGCAGCGCAGAGTCCGACCCTAGACACCGCCCTAGACAAGG -GTACACTGCACACACGGCCTGATAATTTGACTCAAATTCGAATGGAACGTTGTAGAGAGA -CGTTCGAGTGGCACGATCTGTAGTGATATCGGAGGAGCCGGTCCTATGGCAGACGACACG -GACTTATGCATAAAGCGAGCAGCACTTCCACATGCCCGGCAAATGCAGACATTCATCCTC -GTTACCGAGGTGTCATTGTGATTCTGAATTTGAAGCCGGTTGTGCTGGATGCTATTGAGT -TCCGACAGCCGAGGTTACCTACCTACATACCTATGACGGTATTCAAGGCTTGCTGGGCCA -GATCGGACAACCATATCTCGCAAGTTAAGCAGCTGGGCATAGTCAATGGTCTCCCTGGCT -TTGATCAACCCGCTGTGATATAACCGAGTATATGAAATTTTCGGGCGTTTCCGGGTGTTC -TTTCACCGTACTCCCTATAATTGGCTTGCAATTTCAGATGTTCAGTCTATAGGAATCGAT -GTCTAGGATATAAGCATTGAAGGCGCGCTAGCTCTGTATTGACATGAAGTCCGAGCTGTA -TTCATCCACATCCCACACCCATTCTAGACCACCGGAAAACCCGAACCAAAAATGCCCGAA -ACAAATACCCTGACTGCTCATGTGGACTGATTCCGGTCTACAATTGGTCCAATTGAGGCC -CTTGCAAGCGTGAAAATTCTTTTAGTGATCCACGCGATCTTTTTTAGTTCCCGCTCTAGT -CATTTCAGCCTTGCCCCTGGCGATCACCGGAAGATAACTGTATTACGCTATAGATTTTTG -ACGTGAGAACTGCTGAAGATAAGTGATGCACTGCGCTTGGATGCTCGCAATACTTGTCGT -TGCTAACGAACTGATCTGATTCTTGAATAACCAGGGATTTCTGGTTGATTATTACCGTAG -GGGTTAGATCTTTCACATGCGGTTAACAATTATTATTACCTAGTTTTAGGAAGGTTCCAA -GGTTCCACTTCCCTCAAGTGCCTCTTATCTGGCCAGTTCGGAAAATATTAACCGAGCTGA -TAAGCTACCCGTGGCTCCAGGCCTGGTGTTTACTGTTTGTGATTGGTTATACTTACTTGG -TATCTGCTCTTTGACATCTGTTGATATGGTTTAGCTCTAGCTTTCAATAGTTTCCATGCA -GACCGTATGTTGTATGTTGCTCAGTGTAACATGGCTTGtgcatatgtatatgtatagata -atatgatacacacatactctgtaggtgcaaatgtgaatatacagatgtatatatatagat -agatataaaaataGCGAGAATGAGTCAATTAGAATGACGACGATCAAGGTAATTACAGTA -TTATCAGAACGCGTAAACAAAGAGGATCCATTTTATTGACTTTTCTTTCGGGATGATGCA -ATCTGAGCCTTGGGACATGAAAGAACCTTGGAAGTTCTGGAACCCACAGAGAATTTCATA -GTGGATCTGCAAAAGTTGAATGATCTACTGGGTGGGGCCGTTGAAATTTTGTAGATTGTA -GCTTTGACCCTGGGCTACTCACGAAACTTTGAGGGATTGTGAGAGACCATGATTGTCCAT -ATCTTGGAAGAATTCTAGACCTATCAATCAACCCAGGTAAGGTTGAAAGGTTGAAATATG -AACTTTAGTGCGGGTCGCACACCATGGGAATGACTATTGTTGCCCACCCTTAGACGTGGC -ACTGTATAAATAACTAAAAAAAAAAAACAGGTTTTAGCCTAAATATCACACCGGGAAGCC -TATATAAAAAGACACGGTGAGGTGCATTCGGGTAGATTAAATCGGCATCGGGATTCCGAT -CCGGTGAAGCGGAAATACCCGATACGTGAAGGCATCCCGACTAGGCCCGCGGTCGGCTTT -GCACAATCGGGAACAGGCCGATTTCTAGCGTTGCAATGCTGGTATACCTTCTAAATTATT -CCAGAGTGGCCGAAACAACGCCAAAGTGGGCCACACCGTAATTAGCTGTTTGTGTTGCCC -GAGACCTTATCTTTGTTTGATTTTTACCCGCCCAAGGTTGAAATGCCTCATAATCTCTCT -GACCCGCTTTAAACCTATCACAACGCCACGGGCGTTGACAGCTAGGTGGGGCTTTATCAC -ACCAGGTTCCCGCTCTTACCGCGATGCTCATCAGGGACTTTGGTGATGTAAGTGTTGGGT -GATTTTCTGAATGCCCCGAGATTAAGATAGGCCAAGCTTGCCAGATCAAAGTCAAAGCGA -GGATTTCATGCAGGAATTTGCATGGGATTTTCAGGCACGCCACCTGCAGGGAGACACCGC -CTGATTTGAAATCACATTTATCGGGGTGTAGAGTGCTGACTCTTGGCGTGAACGGGCAGT -TAGAGGGCTGAAAACGCCTGACGATCCAAGAATGGCGCCATCTAGAGCCTGGTAGGTGTC -GTGGAGTACATGTGCCGCGACTGATTGAATGCACATATAGGACTAGCTCGGGTATATATT -TTGTAGGATACCTGTTTATTATCAAAGTGGGGGTCTAAGTTGACGATTAAGTTCTGAAGT -ATGAACCCTATGTCATTTCATGTCGTGTCACTTCAAAAGGATATATTCGTTGACATTGTG -CATACCTAGGTAGGAGATCTTGCAAGCATGCGGGTGATCCCGAGGTATGTACAGCATTGC -ATGGCGCGATGCATAAAATGGGTAAATAAAGACCCGGACGACCGGGGTAAGTTGATAAAT -GGTTTCATGTATGTACAACATATGTGATAGTGTTCAAAATTTATTCCAAGTACCGTGTAA -TTAGTATTTGATACCGAATAGGGTAAATGATCCAAGTGATTCGTGGTCAATAGGGGAGAT -ACACATGTGTATGTACCTAGGCAGTACGTACTAGAGGCCTGACATTCATATGATAGCGCT -GACGTAAAAGACCAGGCCTTCATCTTGGTGTGATTAACCGGTGAGAACGCTTCCTAAGGT -TCTCTTCAAAATCTCTCAAAATTCTTTATAATCCCACTGTTTCTCAGTTGCATTCTCTCT -ATCTATTCATATGCCAAGGTAATCGGAGCCTAGCTTTTTATTTTCTAAGCTTAGTCAGTT -TCACTTGCCACTGaaaaagagaaaagaaatagaaaaagcgagaaaagaaTAAATCGCCGC -TTCCGGAACACTCCACTTGAGGACCCGCGAGTCGGACCAGCCTTGCGTATCTGGCCATGT -TCCGGTAGATCTGCGATAGAGAAAAAACGGAAAAATACCGACACACATACTTACCAGACT -GGATCcttttacttcttctcttctctctttctcacctttcttacttctgactttcttGAC -AAGCCCATTGCTGCGGGCAAATTCAAGATGGCTCTCAGCTTCTTCAGTGGTGGCGGGAGC -GCCAGCAGCGCCAAGTACTTTGACATTAGGTGAGTGGGGGGACCTACAGCACATGTGAAC -TGCATATCTGACATTATCCAGGTTGAATGATGAGTACATCGTGTTTCGTGGTGGTGAACA -TGAAGCTGCGAGCGCGCATCTGCGGGGCACTCTGATCCTCTGTTTGTCCGAACCGCTCAC -CATTAAACACCTCCGACTCCAGCTTACGGGCATGTCCCGCGTTTGGTATGTGTCGCAATT -CCTTTTGAATGAACACCACTAACATGATATTTCCTCAGTTGGCAACTTCCATCTAGTGCA -GCCGCTGGTGGCCGCAAGTCATGGCGCGAGCGAGTCTTCTATGAAAAAACTTGGAGCTTC -CGAGACCCAGGTAAAGGAAAAACTGAGGTCCTCCCTGCAGGCAACTATGAGTACCCCTTC -GATGTGGTACTTGAAGGGTCTATGCCCGAAAGTGTGGAAGGTCTTACCGAGACATGGGTT -CTGTACCGCTTCAAGGCCGAGATTGGCCGCAAATACGTCAAGGACATCGTTGCGCGCAAA -CCGCTCAGGATCATCCGAACTCTAGATCCCAGCGCCCTCGAGCTATCACATGCAATGGTA -AGTATTCATCTGCCTAGGCGATATACTGTTACTGACGAATCCACTAGTCTGTGGACAACA -TCTGGCCCAACAAGATTGAATATTCTATCAGCACCCCAACAAAAGCCATTGTCTTCGGAA -CCGCCATCCACGTCGATTTCAAACTGATTCCCCTTCTCAAGGGCCTTCGAATCGGTCAAA -TCACGTCCCAGTTGATCGAAAGCCACGACTTGACCCTCAATCCAGAAGACCCCGATTCCA -TTCGCAATACATACAAGGTCACAAGGACGATCTTGACCGACGAGCATGAGCTGGATGAGG -ATAACATTGAAGTCATTGATGAGACGGCGGAGGGATACCAATGCACGCGTATCCTAGATA -TGCCGCAGACATTGACGCGCTGCCTTCAGGATACTGATACTAAGGGGATTAAAGTCCGCC -ACAAGTTGAAGTTCCGTATTCAGTTGCACAATCCTGACGGTCACGTCAGTGAGGTGAGTA -CTTGTATCACGGTTGACCTTTCTACTGCCTGGCTAACATGAATTCAGCTCCGTGCTACTC -TTCCAGTTTCGATCTACATCTCACCACACCTTGCCATCGATGACAACAACAACCTTCTTG -CGCAAACACCCCAATCTACCCGAATCGCCGCCGATGAGCTTGCACACCAGGCACCTCCCC -TCTATGGCGAACACACCTTTGATCAGCTCTATAGCGAGCTGGATCCTAGCGGCTATCGCA -CACCAGGTCCCGGCAGTGGCCCGGGCACGCCATTCACTCCTCTCAGTCGCAACATCTCGT -CCGAGAACCTCGCCTCCATGAACGCCCTTACCAATACCAATATCTCAGCTTCGGCACTGC -ATCATCGTCTCACAAACCTGAACGCAAGTCCCTTCCACCACTCCGTCACTCCTGGTACAG -TCACTCCTGGCGAAGCGCTGCATGGATCTCAATCCCCAATAGACAGTCACCACATTCATC -GCCAATTGGGTGTTCCTAACGACTACTTCGGACCGTCCTCCGGGTCAAACTCACACAGCC -ATGGCAGTCCGGAACTCTCCCGTCGACCATCCGATGAAGTGGAGCCCGAAGGCCTTCCCT -CGGGTATGGCGACCCCCTTTCACCCTCAGTACGACGAGGTGGAAACTCTCAGCCGAGTTC -CCAGCTATTCGACTGCAGTGCGCTGCGCCGTGCGTCCCCGTGATTCCGGTCTCCCAGATT -ACAACGCTGTGATCGCTAGCAGCCTTCCCACGCTATCAGTCCCACAGTCTCCCCAGCAGG -CTTATATCCGCAGCGGTCGTGCAAGTGGAACTGCAACTCCCCTAGAGGTGCAGACTCGCC -CCAGCCTTTTCAACTCTCACGGCGCCAATGCTGATGATGAGGACCGCAGACTTAGAATTG -TGCAAGCTCGCGCCAGAGCATAAGGTGTGATCATGATCCCTCGATGCCCACATGATCCGC -CTTCGATTTTCACCAAAAGTCTTCCTGCTGTACCAGTGCTGGACACTGCGACAGCCGCGA -ACCCACCGCGCTTTTACACTTCTTCCGCTTTTTTCTCTCCACTTTTTCGCATCTCTTTTC -CCACGACCGATCACGGCCCAACCCCAGCATAAATTACCCTTGTCGAGCCCTAGGCTCGGA -CATCCTCGAGCGCAACGTTGTTCTGAACCGTTCTACTTTACCTTCATGGTAGCTTTCTAC -CCCTTTTCATCTCTTATCTATCATCTTCTTCCTCAGGCGTTCGGTCATAGACAGGCAGGA -ATATTTTCATAGATGGTCACGCGCGGTGTTTTTCATTTTTCATAACCTTTTTTTCTCTAT -CTCACGAATGGAATCCGATATGAACGAGCTGCACGACATTGAAAAACAAAAAACACCAGA -TTATTAGCACGAGCCATGCATTCGAAGCCTTCAACTCATGTACTTAATTATTCTTTTGTT -GATGAGATCAATCCAATGCTTGGCAGAGTTCACATCTGCACTGATGATATCATTACCCGA -TTAGGAAATTAGCTGTAGATCTTATTATGTAATGGATCTCGGGCCCCTGGCACTTGGAAG -ACTCATTTTCTTCTTTATTCTTCAAGTCTCAATGGCATTGTTTCCTTTTCAGTGTAACAC -TGGGGAATGCATTGGTTTATGCCTTTTGGCAATTTTGATTAGGGTCTATGTATGTTCATT -TTGTGTTCTAGGTTTGGTGAGAGTTGCCTATAGTTACCCAGTTCTAGTAGACATAACTTG -CCCCAAGTATAGTGTAGATGGAATGTATTACTACATTCACATCGGTAAATCTAGATGTAA -ATGACATATATCAAGTCAGCCTTTGAATAACCTGAATATTGGAAGAGCCAGGCCCGATAG -ACTAATAGGGTTCTAGTGGCTTTGGGCTCGATGAGGGGCATGACCTCATGAACGTCGATC -GGTCAAGAATGAATTTCTCTCTTCGCCCTTTTTTTACCTGACAATATTCCGCGGAATTGT -CACCTCTATCAACTTGTAAGTCTGCTCTTCAATTATGGATCTCTTTGTTAACTAGGTGAA -CTAGTCCAGCCATAGACTAGCGCATCCCCGCCTTAGTCTTTCACACCTTTACCCCCCGAT -TATTCATTTGAGCCACATCACTCTTCAGCCACGGGTTTGGGATAAGATAGCTTCGGCATC -TTTAATTGTCGTTACAGAATTGGATTCCCGCATCAACCTTGTCCCGCTGTTTCTCCGCCA -TGTCCACGAGCCCCGCGGATGTGAAGCTGCCGCAGCGGTCTGGTACCGTGAGCAGCGCCT -TCACTCGTCGTACGTCCATGAGTGACGATGAAGCTATTCCTGAGACTGATAGTAGTGAGG -TCTGTATTTGCGATGTCCACAAAAACAAGGAATATCTTGGACTGACAGTGCGCGTGTTTG -CAGACTACCACTCTCCTACTTGAGCGTCTTCGAGCCTGGAAGCATATGTGCGGATATCTC -GAGGATTATGTTTCTGCTACCGCCAAAGTTCAGAAGTCGCAGTCCAAGGATTATGAGAAG -ATCTTGAAGGTCGGTGCCTATCCCAATGAGCCGAAGTGAACGGGACTAACAAGGAATGGG -AATAGACCGTGAACGAACCTCTCAAGGAGGGCCACCACTTCAGTGCCAGCGCGGGCGGCG -TAGCCGGTCTGTTCGAAAACATCCGTAACAACACCCAAGGCATGGTGAACATGTACCTGG -ACGGCGAAAAGAACCTCAAGAGCGGCGTGCTCCCCACCCTCGAGCGTCTCCACAAAGAGA -TCAAGGCCAAGTCCAAGGAGCTCAACACCGGTGCCTCCAAGGGAGCCAAGGCCGTCGAAA -AGGCGCGTGGCTTAACCCAGAAGCACATCGAGCTCCTAGGTCACAACGCTGCTACCTTCG -ACGCCGCCGCCTCCAGCAAGATCGAGGCGCACCACGACCCCTACCTCCTCAAGCGCGGCA -TCAACTACCGTCTACACAATCAAGTAAACGAGGAAAACAGCCACCGCCAGGATATCCTCG -CCGTGCAGAACTCCTTCCAGCAGTTCGAAGCACACGTCCTACAGACTGTGCAAGGCACTC -TGGATCAATTCAACCAGCACATGGGCGGCCAACTCGAGCGCCAGCGCGCCATGTACGCCG -ACATTCTCGGCACAGCACAGCGCATCCCACCAGACTTCGAATGGGTCAACTTCTGTGTGC -GCAACGACGCAGCCTTAGTTAACCCAGACTCACCCCCACGCTCATTCGCCAGCATCACTT -TCCCAAATATGGAGCACCGTTCCACACAGCCTCTCATTGAGGGCACTCTCGAGCGTCGCT -CGCGCGCCGTTATCAAGGGCTACAGCACCGGCTACTACGTTGTCACGCCGGCACGCTATC -TGCACGAGTTCAAGGACAACGACGACTTCCGTCGCGACCCCTCCCCCGAGCTCTCGCTGT -ACCTGCCAGACTGCGTCGTTGGCGCTATCGACGGCATCAAGTTCAGCGTCAAGGGCAAGG -ATGTGTCTAGCGGTAAAATCGGAAATGCTTTCCATACAAACACCGAGCTCAGCTTCAAGG -CTCACACCCCTAATGACGCTGAGAAGTGGTGGACCGTTATTAAGGATGCTACCCGTGGCC -CGACTCTGGCTACAGCTGCCCCGGCGATTGCTACTGCTGCCCCGACACTCGCTTCTCCCA -CAGCAACGTCGCCTGCTGCCTCTGCTAGCCCCTCCAACAACGTGTCCGGCCAGGCACTGC -CGCCTACCTACGCCGAGAAAGATACCCAGCCTGCTCCGGCTGCTGTTCCGGCTGCTGCAT -CCACTGCTCCTGTTGCCACTGGTGCTGCTAGCCCGACTGCTGTTCTTAGCCGCACAGGCA -GCACAGCTAGCCACTACCACACCTCGCCCGGCGGCTCTGCCGTGGAGAAATCATGAAATT -GATAGTTATTCCTTGTGCTTGTTCGAGCATTCGAAATGCTCTTTTGGTTTTGAACTTGGA -TGATCTCCACACTTGTTTATATAACGCATGTGCATACGTCCGTTCATTGGGGTGTAGCGT -TCATTACATCCGAGCGAAACACCCCCAGTCAACTTGATATATATATACCTCTTAATGAAT -TCCTATTCAAAATAAATGCAATCAAATATCAACGCTAATCGGATCAATGATCCTTCCACT -ATCAAATACACAAAGCGGATAGAAATGTGTCGTCGAATCATACCATAGCCTATGAGATAC -ACACGGGCATGGTTTAACTAAATTGTCCCAGCATACATCGCTGTAGGCATTATCTTATTT -CCAATTAGAAATCCTCCGGACTTTCAAGGTCATCCAACTTTTCCCCAAGGAAACGTCCTA -GCGCGGCGCATTTAGACTCTAGCGCAAGTAATACAGCCGGCGAAGCGGTGAGATTCAAGC -TAGCCAGACGTGCAAGATCGGCACTACGATCGATTTCAAACTGCGGGTCTCGGTGGCCGT -TGTGAGGTGGGTTCGGGGACGCAGGTACAGATGCAAGTGTGCGCGAAATATCACCCTGCG -AGCCGTCAGGCGTCAGAGTTCCATTTCCAGCGTCAATGACACCGTTGTCAGCGAGGGATT -CGGCGTTGGTGCCCCCGTTTGGCTCGGTTGTATCTAGTCTGGAAGGGCCGACGGCGGCGA -CGAGAACTTTATCCGCGATCCGGGTTACCACTGCGACAGAGTCGTCTAGTTCGAAGACGA -TCGAGCCTACGTCGCCTAGCGACATTGACTTGGCGGTCACGTAGGATGAGGGATGGCTGG -CTGGATTGACGCCTGTTAGGTTTCCGGTTGAGGCTGTTTTGGCGTAGCATGTGTAGGCGG -CGCCGTAGGTTGCGCTAAGGTTGCGTAGTTGGCGTGAGGAGAGGGAGGAGCCGTGGGCGA -AGACGGCGCCGTTTGTTGCGAAGACACTAGAGATAGGGTTACTAGGTGGATCTTGTGATG -GAGGCGAGCCGGGCATACATGATTGTTGTGATCTCCGGCGGGGACAGGTTATCTTCCAAG -AGGTCGCTTAGCGCCTCTGCGTCCAGCATCTTGATATCGGGGAGACCGGGGATCATGGTG -TCGGTGTCGGTGTCGGTGTCCTGGCCTACCGATGGCGTGTACTGTTCTTGTATCTCTGGT -TCCTCCTGCTGTGATAGGCAATCTTCGTCCGCAATGTGACCTTGTGATGGTGTCCAGGAG -CAGTCTTCTGGGAAAAAGAAACAgttgtcgtgatagtcctcgtcgtcgtggtcgtattcg -atcggctcgtcAGAGTAGTCCAGTAGGTTCTGAGCACACTTTTTCTCGTAACGGGCCCAG -TAAGGCAGGAAGTCTGGTTCGAAGAGAGGCTGAGAGGGGGATACGGGTGTAGTTGTCGAG -GAGCAATCGGAGATCGAATCAGAGAAGCCGGGAACTGGAGGAGATTCGCATTCAGCCATC -TTCACACCCATGGATACTGCTTCCACAGAGATTGTAACGCGATTGTTTATTCCCCACAAA -AAAGGTAAAATCAATGAACAGAGGGTACGAGGGAAGCAGGTAATGTTCTCATAGCCAGGG -TTTGGCCAGCTAGATGACAGGGAATGAGGGGAACAGGCAGCGCTTTGGAACCGTTGACTC -AATGCGCATACGGGGTCTCTGGTCCAAATAAAAAGTCACGGTAATTCCAATCGTGTCCGG -TGGTACAGAGAAAAAAAAAACAGGGAAGGAGAGCTTTATGAGGCCCGGTACGACCGTGAG -GCAGCTCCACGATGGGAGCAGCTCAGCGATCGGGCGAATTTGAGCGATGTGTGTACATGA -GCAGAGACATGATTGGGAGAGGAAGAGGGGCAGTATACGTGGAAGCTGAGAGCTAAGCGC -GGCAGTATGAGAAAGTCGAAATCTGAGAGACCGATCGGGATGCGACACGAAGCCTGGGGG -AGGATGAGGCCAATGGTAGGAAAGGGATCAACCGGGAGCGGGGGAGCGAGTGTGGAGGAG -CATGCAAGAGGAATTCCACGAACCTGGTCTATGTGAAGCTGGGGTTGGATTGGATGAATC -GGGGGGTTCGAGGGTGTTGAAGTTTCAATGTTTAGGTGTTGGGAAGAGAAGAGGAATTGG -AGATAAGAAGGTCCCGTGGGGAGTTCCCGCATGACGTTTTAACACGTTGAGGTATAGCTG -CATATGGTGTATATATAGAGTACCTGGTATCCAAATTATGACATTTCATATTGAGCCCAA -AATTAGACAATACCTACATCTCTGTATAATTCTTAGCTATGTTAGCTGTGTTATAGGGTG -TTGATTATACATAATATACTCCCTTTCAGGGTTCAGGTAATTTCTTTAGCAGGTCAACCA -ATCTGACATAGCTTAGACATATCACAAGTGATAAATTCGACTATATAGAATCTAATCCCA -GCTCTATGTACAATATACATCTATAAGCTATAAAGAGCTACAAGAGCTACAAGAGCTGTA -CCGGATATAGCGTCATGTGCCCCACAATCTCCTCACGGCTGACAAAAGCACGCTTTTCGC -CCCTCCATCCTTTAAAATGTTCGCTCGTCTTTCCGCCATCCAGCAAAGATTTGTACCGAT -CAGTAAGTTCAGCTCATCAGTTGAGAGCTCAATTACTGACCAAGTTACTAGGATCCCTAG -CAACCATGGCTACCGATACTACCACCTATAAACTCAACCGTTCGTGGACCGATCAATATC -TTTCGACACAATCACTGACATGGCAACATGATAGACACGATGATGCGGGTCAAAGACCCG -CAACGATCGGTGGAATTCTACAAGTTCCTTGGTTTGAACTTAGTCAACAAGATTGACATG -CCTGAGTGGAAGTTCTGCAACTACTTCCTGGCATATGACGGCCCCGCCTCCCTCCAGGGC -ACCCGTCACTGGACCGATCGCAACGCCGTTCTAGAATTGACTCACAACTACGGAACTGAG -AACGATCCCAACTACAGTGTGGTGAACGGAAACACAGAGCCCCACCGTGGTTTCGGCCAC -ATTGCTATCTCGGTTGATAATATTGAAGCCGCCTGCAAGCGGATCGAAGATGCCGGCTAC -CCCTTCCAGAAGAAATTGACTGAAGGCCGTATGCGACACATTGCCTTTGCCAAGGATCCC -GATGGATACTGGGTTGAGATCATCCGTCGCGCCGATGAGAACCTCAGCACCACCACCGAC -CCGGGTAGCTACCGATTGAACCACACCATGCTGCGTGTGAAGGATGCCGAAGCTAGCTTG -AAGTTCTACCAGGAATCTATGGGCATGACTTTGGTCCGCACCATCGAGAACCCAGAGAAC -AAGTTCAACCTATATTTCTTGGGTTACCCTGCGTCCAACCCAGAAATTAAGGAGGGCAGC -AAGAATGGAGTGGCGGAGTGGGAAGGTCTACTCGAGTTAACCTGGAACTACGGCACCGAA -AAGCAGGAGGGCCCTGTCTACCACAACGGTAACACTGAGCCCCAAGGTTTCGGTCATATC -TGTAAGTGGATCACACGACTTCTAATTCTTTTGCCAAGGTTTCTAATTCGAACCAGGTAT -CTCTGTTGACGACCTTCCGGCTGCGTGCGAGCGCTTCGAGTCTCTGAATGTGAGCTTTAA -GAAGCGCCTGACTGATGGACGGATGCACAACATTGCATTTATTTTGGATCCCGATGGATA -TTGGATTGAGGTGGTGCAGAACGAGGGAATCAAGCGTACTGGAGACTGGTAGTACAAATA -CTCTAGTCAATACATGTACTATTCCGCTCCATCTATGTTGTCAATGTATATATTGTGCTC -CGTGTAAATTGGATGTCACTTGACCAACAGCCCTGCTTATCCCCCGCCATTTCATAGGCT -GCGGTGGATCCTCTTCCACCCCGCCCAGAATCTTCCGCCCAGAATCTTTTGATCTTTGGG -GGACAACAAGTTTTATTGTGATCTATAAACCTCCTTTTTCGATGGCTTGATACAAAATTG -TGAGAATTTCAACCCTCCGCGCGGGATGAATTAAACAGTGATCAGGTCCATCTCCTATTT -GCGGGGAGTCATCGCCACATACTCCACAACCCCAAGTTTCAACACTGGGAAGAATCGGGG -AGGCCTCGAACATATATAAGCCATGTCGCCTGATCCAGAGAAAACAAAAAAGGCACCCCG -TAAACATGTCACAACCGCCTGTGTGCCATGTCGGGAGAGCAAAATCAGGGTACGTGCTCC -GAGCTCGCAAAAAGCCTCAATGGGATCGAAATAATCTGATGTGCCTGTTGTCAGTGTGAT -GGAGCTACTCCGAACTGCCAAAACTGTCAGCGTAAGGGAAAAGAATGCAAATACCAACAT -GGAGATGACAAGCGCAAGTAAGCACGACTTCTTCACAGTTCATATCCAAATGACAAGCCC -TTACAGGCATGCAGCCTTTAATGTTCAAAGACTAATCTAACCCAGAGTGTCTCTTCGAGC -ATCAACGGAGCTGTTCTCAGCCCGAATTGACCAGCTTTGTCATTTCATCCGGGAGCAGGG -GCTAGAGCCACCGCCAATGAACCCAGAAGATGAGGCTGGGATGAACAGGGTACTGGATAC -CCTCCAGATCCCGCGCGGATTCCCTCAAGCTTCAGCAGCGGCCGATGACAAACGACCACC -CGATGAGGCAAGGGCCGTGGTTCCGAGCAAATCCCCAATCCAAAACTCGCCCGCCATAAT -TCCCAAGGGACAAACCCCGGTCACTAATTTGGCACTTTCTGGTGCATCCCCAGCGCAAAA -TACTCCACCTAGTGAAAAGGTCCCATCGCCCGAGGGCTGGAATCCATTCGGAATGGTACA -AGGCGCTTCCGACAATCAAAACTTTGTCCATTGGGGTTTCACTTTGCCAACAGCTGAAAG -CCTGGACACTATTTATGCCAATCTCAACGGTGGATCGGGCACTTCTGCAATGCCTAGAAT -TCCAGTCAACACCCAAATGAGCCCGGATAGCTACCAACTTGGTATGGACATGGCCCAGCA -GCCGGGGGTCTTGCTAGGCCAGCTACCCAATGACGGAGAAAATGATTCGGACAGCGACGA -AGAGGACGAGGCAGAAAATGATGTTATAGAGCAATTGTCGCACCGAATTGGCACTTTGAA -AATTGCGGGTGACGGCCATCTCCGATTCTACGGTGCCACGTCTAACCTCAATCTGGTGGA -CGTGTCTGCGACACAGCAGCGGCAGCGACCAGACGCACGGACTGTGCGCCATGATGGTCA -GGATATTCTGAATCACCTTCGGGTTGGTCAACCTGTTGATCAAGCCCTGGAGGATCATCT -TGTGGAGCTGTATTTCACTTGGCAAAACCCCAGTACATACATTGTCGACAAGGATATGTT -TTTAATGGCACGAACTAAATGGCGTAACGAATTGGATGATACTCCATTCTACTCAGAGGT -TCTGACTAATGCAATGTGAGTTACCTTCAATCACCGGCCCTTGAGGGATATCCTGACTTG -TTGCAGGTGCGCGATTGGGAGTGCCTTTGAGGCTCGCTATCATCCCACATTCATCACTTT -TCCTAAGTCTCTCTCAGAATTCTTCGCAGATCGTGCCAAGGCCCTTCTGGAAATTGAACT -AGACTCTCCTTGTGTTGCTACTGTCCAGGCGCTTGTCATTATGAGCTGCCATGAAGGCGC -ATCAAATCGAGATGCTCGAGGATGGCTGTATAGTGGTAAGTTTATATGTATCTTGAAGAT -GAGGAAGAACCAACGCTAATGACCAATGGCTTCACAGGAATGTCTATGCGGCTTGCTTTC -GATCTTGGTCTGCATCTAGATATGACAGCCTACGTCAACAAGGGCGATATCACCCAGTTC -GAGGCTGATGTACGCCGCGCAACCTTTTGGGGAAGCTATGTCGCAGACCAGTGAGTTTTC -GTATCTCTATACCATATATATCTCTAACAGTAGCTAGTTTTTGGGGCTTCTACCTGGGCC -GCCCGTTCCGTATGAATGCTGGGGACATAGGCGTTCCGAAACCTGGCTCTGCTCTTTATC -CCGAAAAGGAAGAAAACTGGCATCCATACGGCCACCACGGTGCTCATGCGCTATCCGAGA -ACGGTCTGAAAAATCCCATGGAGCTAATATGCAGGCAATTTGTCGTTCTATGGGAGATGA -TCTCCCCGGTTGGTCATATTTTGTATGTGATGTTTGGGTGTTATCTAGTACAATGCTAAC -ATGTCTAGGTATGGATGTTCTGACATCTCTCGGCACGACTTGCAAAGAATCTGCTACCAA -GTGACGGAAGATTTGTTCGCCTGGAAGGCAAATCTTCCTTCCAGCCTCAAGATTAATATG -GATGACGATACTTCACCAATATTGCCTCATTTGATGATGCTCCAGTAAGTTTATATCAAT -ACGATAGACGCCTGGGGTCATACTAATAGTCTCTGATAGCATGCAATACCATCAGATCAT -CATCTTCTTCCACCGCCCATGGCTCTCTAAAAGCTATATCCAGCCCCGCAGTCCTCGTCA -AGGCCCCGGGTATCACCATGCGCGGCGCATGTGCGTGGAGTCAGCAACCGCCATTGCACG -AATACTCCAACTCTTCGAGAAACACTACACCTTCCGCCGCATGAACAACCAAGTGGTAGC -GATAATCTTCAGCGCAGCCCTCATGCTTCTCTTCGTAACAGTATCCAGTTCCCCAATGAG -TCCAGCCAAACAGGGTGACAGTCCGACATTTCCCCGCAACGCAGAAATGGTAGCATATCT -CAATCTATGTTTCCGCGCTCTCGATGAACTCGGCCAATCATTCGACAACGCCAAGCGCAC -CCGCGACTACCTTGTCACTCTCCAGCGACGCTGGCAAGCAAACATGCGCCGATCCGGCTC -AGCGACGAAACGCCAAAACAGCAATGCTAATTTGGGGTCGCTTGGCTCGCAGAAGCCGTC -TTTACAGCACAGCCGTGCTAACAATGTTCAAGGAATGGATGGCTCGCGGAAGAAGTCCCG -TTTATCTGTTTCTGAGACCCGCCCTCATATTCAGCCAACAGCACCTGTAATGGCCAACCA -ATACTCCCGTCTCTCACAGCAAACCATGCCCACTCATAACCAATATCACCAGCAGACCCC -CTATCAACAAGCCCCCTTCCCTGTCCCAGTTCCGGATTCTCAGCTAGGCGATCTAGACTG -GATCCCCAACTCGGATATGCGTCTTTTGTCAGAGACTCAAAATGGCAATGCACTAAATGG -CATGGGCCAAATTCCTTCTCCTCCTTTTCCTGATGACCCTAACATGCTCTCAGATATTGC -GGCCATTGATGGGTGGTGGGCGTCGGAAAATTATAGGAGCTCAATGCCGCCATGAAATCG -TTATTGGGTGACTACTTTATCCTTCTGTTCTTTGCTTTGCCTGTCTGGATACTTACGGCA -TTGCCAAAAGTATCTAAGTCAAATTTGTCCCCCCTGTTACCCGGATGAACATGCATAACC -CGCTGATATATATCATTCCACCAGCGGCGCCCACATAGATGTATCATTATTCCCACGATT -CCACACTTCAAACCTAGACCTTTGCGAATTCCTCGCATATACAATTCGATATTTACATCC -GAATTTGTATTAATTGCAACAAATCTTAATCTCTATCGTCCATTTCCCTCTGAAACACAC -CTTTAAACCAATCACCCACTTCCTTTCCCCCATTATTATCAGTACTCACCCTCGATAAAG -TAACCCTCCGCTCCCCAACCCCCTTCTCCCGTCCCCAAAAGCAAGCACGACGCATAACAG -AGACCATCTCATCAGCCTGTCTAGCAAGAACCGTGGCCCCAACAGCCCCTTCTCTACTCT -TCTTCTTCGTCTCAATCGCAGACGAAGATAGAGAACCAGAAGCCAAAGCAGCACCATTTT -CTCTCAATAACACCCGAACCCAAGGCAAAGCCATCTCCAATCGCCCAATCCCATCCCCAG -GCCCAGACCACATGCCTTCACTAAGCCCATAATCCAGCCCGAACGGCGGCCAGCGCGAAA -GTGCTTTCCGGCGCCGCGTAACAGTAGTGCTGATATCGATCTCATCGGCATCCATATCGG -GATACACATCTCGATCCCAGATCGACCCGTCCGCGGCACCGTATTGCTCAGCTGTTGTGC -TCCGCATCTCCTTCAGTACAGGGACGGGATATTCCGGTCCCGTGAGGTACTGCGGGAATG -TGTGCGGCACGTCCCGGCCTGCAGAGCGGAGGAAGAGGATTGGCGAGGCGAAGGAGTCGA -AGCAGCGTTCTGGTGTCGAGAAGAATTTTGAGCGGGCTTCGAGGAGGGGGAGTAGGTCTG -GTGGTGCTGGGGTCTTTTTTCTTGGTTGGCGCTTGTGTTTTGATGGTGTTGGAATTGAGG -CAGTATTGTTTATTGAGATAGGCCTGGGCGTTGTGGTAGTGCTCTCGCGTGTGCAGTACT -CATCTAAACCTGGCCAGTCGCAAATGGGTTCTACGGCGGCAACAGCTTGGATGGATTGCG -CTTCTGTTAGAGCCAGCATTAGAGCCAGGGAGCCCCCGACGTGTGTGCCGAAGATGGCTA -GTTGAGATGGTCGGAGGTTGGTCCGGATCCAGTCGAATCCAGCTAAGGTATCGTGGACAG -GGGTTGGGTATTTGTAAGAAGTGACTTGGGGGTTGCTTGGTCCCAGGATTTGGTCTGGTG -CATCAATTGGCTCTTGTGAGATAGAGCTTTTATCTAAAGATGGGGAAATTTGTGTTTGTA -TCTCCCCGAGGCGGTAGTTGACTGTGACTACCAATGCAGATGCAGTAGATGCTAAAACAT -GCTGGGGTGAACCTGGTGCAGAGGCCAAGACGCTGATAGCGCCGATCTTTTCGTGATCGA -TCCTAGATGTTTCATGTTTTTGAGATTTGCTCGTCTCTTTCCCCTGTAACAAAGGTCCAG -GTGGGAGGTAGAGGATTACATTTGGGCCTTCGGGTGACTGCCCCTGCTGAGACCGGCTCA -ACGCGCTCGGTCGCGTGATTCTGGGGACGAAGTTAGCATCTTTGACTATGGCTGGTAGAG -AAAGTTATAAAATACTGAAGAGATATAGCCCCGTTGTTTCCAATAGGAAGAGAAATTGTC -TCATCCGTGCAATCAGAAACGGTCGAATATCGCGAGCTCTGGAAATACGCGACGGAAATG -CGGGTGGCTCCTCTGCTGCTACGCAGAAAAAGGGCTATCCCGCGGGACGCGCAGAGAATA -TCTCGCATAGCGCTATTGAACAAGCATTTGATGTTTGATAAAACTTGTCAAGGATCTTGG -GCTGTTGGCTTTGCTATCCGAACGCAATTGATGCCTGATGATGCCTGAATAATATGGCTA -CATAGAAAGATCTCAACATTAAAACACTTATTTTTGAAGAGTAAATTTGTCTTCATAAAT -TTTACAAAAATCTGGACTTTAATAACATGTATTCTCCGCATCATTCTGCAACCCGAAAAC -AAATGGCTGCATATGTAAGCTAATAATCATGCACGAATTTCAATTTTGTATCGCTGTTCA -ATTTGAATAAAACCGAGAATATGGGTCAGGAGCCGTGAACTAAATAGGTAGCTCATTACT -TTCAAAGTCCCTTCATGAATAAGCATCTCGCCGAGAGGTAGTCGCTGACGCCGCAGGGTC -GTCTCGGTCCAGAGGTGCCTCAGTATCATTCGGTGGCACAGGACGCACAACCTGCAAACC -CAACATCGGATCCTTGGAGTAAACGCGCAGCCCAACAACGCAGACAGCATTCCATGGGTC -GAGAGGAGGCTCATTGTGGTCCATTGATGGGGGATGATTCTTCACATCCGCCTCGTCGGG -AGGCATATCGAGATCGCTATCAAATTCGAATCCTTCTAGTATCTCAAGGTCACTCGGATC -CAAAGCCTCTGTAGCAATGCGAGTGTCTAGACTGGGATGTGGTGAGTCGGCTCCGCGTCG -CCAGCCACTGCTTGCGTGTCTTGCGTGTAGGCCCTGGCCGCCGTTGACATGGATGACCGG -CACGGAACCATTAACACGGTGGTCACTGGTTTCACTTGAAATGCTTGCAGCGTCGATGGG -AGAATCCTCTACTGGCTTTTCTGCTAGAATCTGACTCGGAATAGAATCGTGTTGGGGGGT -ATGACCGTAGGAGCTACCGCTGGGGGTGCGGGTAACGCGCCGTGCTTCCATGCGCTCTTG -TCGGGCAATCATCTTCTGGTTGCGGATCCAATCCTTCTGCTTCTTTTTCTTGATCTCATC -GCGAAGCTTTTTCCGCTCAATCTCCTTGAAGCGTTCCTGCTCCCACTTTTCTTGCTCTGT -CTCGCCTACCATTCCCTTGGCATGCGCGAGGTCGTATGAAAGGCCGACTTGCACCAGTTT -CTCTCTTCTAGTCGAGGCAACCTGGCGCACAATTTCCTCGGTCGACGCTACGTCTTCGTG -ACGGAAAGCTGTGATCTTCATCAAGACGTGATAGTGACCCGGGTCAAGGTCAATCTCGGC -ATTGACAGATCGGCCCATGAAGTGGCTGCTATGGCTGCGCACGAGATAATCTTCTTCTCC -CTGCTTTTGTAGGCGGAACTTGAGCACGAAGCTATACTCGCCGGCTAGTCCTTTGAAATA -ACGCGTATCAAGCTGTGTCTGTCAGTCACCGTGTAAGAGCAGGAGTAAGTAGCGCATAAC -AAACCTGAGATAGGACGATGACCACCGGACCACTCTTGGTCACATCCATCATGAATTTGG -TACTGTGATAATCAGCAGACCATGGAACATTCAATGTAGTCCATTGCTGGGTGATGGTCC -ATTCAAGTGCAAAAAGGCGAGTCCGGTCGAAGTGCTGGTACTTCCTAAGGAGGTCTTTAT -AAGAGATCCAGAAGAACTGGGCCAGGTCAGCGTATTCAGCGAGAGGGCAATCAATTGATA -CATACACCATCGTTGCCAAATTTATGGCCAAGAATTTCCATCCACTCGGGGGTCCATTCT -TTGGATCCATCACCCCAGGCACCGTGCCATTCTTTGCGACCCCATGGGTTCCTAGGAGTT -GTGAGCTTTTCGTCCATCTACCTTTGACTAGAAAATCCATGCACGTACCTCAATCGCAGC -AGCCGATGTCCGTTTATCTCCCGAGCTTCCATGATGGAATAGGAGTGGCCCTCCGCAATA -CCCTTGCGATCCCTCGGTGGACCCTGATACTTGGGATCTAACCAGTTTGAGTACATGCCC -GTCCCGCAACCAAACAGAAATTCCTTGTTGACCTTCATAAGTTCCTCGGTCCAAAAGCGG -TCCTTGTTCAGGACGTTGCTGGATAAGACCTCAGACGTCACGCCTCCAGTGAGATCCTCG -ATAGCTTCACCAACAAATCCGCCCTCGATAGCGGAATAGTCTCCGTGGGCCTTTGCAAAG -GCTTTCTCCAGAAGAGGTAGCCAGATCTCATTGTCGTCGACACATTGGGCGAAGTACAGG -GCCCGCGAGCCCGTCTGCCAGACTCGTCGGTACTCTTCCTCCGTATCAGTGCGAGTTATG -TCGTCCCAGAGGGGTCGCTCGTCCACAGACTCATCATAGTCAGCAGCGCGGAGATATAGC -TTGTCATCTACAATGCACTGCTGCCACTCGCCGTCTGATGATCGAAAAATCAGCATGACT -CTAAGTATCGGATCTTCGCGCACATGCTTACCTCGATGGAAAACAAACCCATAAACCCCG -ACATTCTCATTGCGATGAACACAAATCTTCCGGATGAGCTCTTCCTTATTGCCCATAGTA -CAAAGCGCAGCCATGAGCCAGCAATCCCCATCATATCCCTGTCGCACATCTGATGCGGTA -GGCCCATTAACGAAGAACTGCGGGTTCTCAAAAATTTCCTACATGAAAGTCATTAACAAC -CCTCCAATCACCCCATGACAGATCGCTTTCGAACTAACGGTCACTCTCTTCACGCCCCGG -GGTCTCATGTACGTATTCGGCTTATCCAATCCATCGAGATAGTGTCTTCGGCCAGTCTTT -AGGTCGAGCTCGATATCGAAGTGGGGGTCCGTGTACTTCTGATTCACACGTTCACATTCC -TTGACGATACGGTCGACGGATTTTCGACATTCTTGGCGCGCCTGCTCGTACGATTTGACG -GCCTCGTGGCCTTGAATCCGGCCGCTCGGTACACGCTTGGCCCGAGTCCGTGCGTACGGG -TTGTCTGGTAGCACCGTGTATACTTTCCCTGGATATTTGGTATTGAATTGTTCCCAGAAC -TGCTTGACAGATTCTTGGGGTGCTTGACAACGTGGACGGGGCGGTCGGCTGTTTGAGCGC -GAGGAGCCTGGTCCTGCGACGGCATAGCCCATCTCGTCGTCGAACTCCATGGTTTTGGGT -GTCTCTGGGTTGGTTTTGCTTGATTGAGGTCTTAGGCTCTGCTGTCTTTGGGGGGGCGGA -ATTCTTTCGGGGAGACAAGCGCTGTGGGAATCCTCGGGCGGTTATTTCTCTCACACTGGC -TATTCCAAAATGGCGATCACGGATGGAAATAGGATTGCTTTTCTAAATAAGAAGATCGAG -GATTGGAACATTCCCCAATTTTCCCCTCACACGGCCAGTCCGAGCCAACAGCATAGCCTC -AGCCTCAATTATCTCGTATTGATTCGCTGGGATCCCTTAGAAAGGTTCGTAAGGCTGCAT -GTGCATTGGACTAGTGTACGAACGAGCGAATCTGACAACACATCGTGCTTTCGGCCCCCA -CCTCGCTTCTCAGCCCCTGCTATGAAGGACCACACGAATAAGCGTGAATTCCAAGTCATA -CCTAGATTGGATTAGGATTGGCAGAAATGTGAGACAGCAATGGGAAAACAAGGAAAAGAA -AGTAAACTGGGAGATGCTTAGTTCCCAACGTTATGTGGGGCCCCGGGGCGAAGTTCAGCT -GCCGACTCTTGACCGATATTGTCCTGAGGCTGAAAATAGAATACAATGGTAACATCATTT -TTGTCATATCTGGGTAGAAATGGTATGTACGCCTGTTATTCACTAAAACACTTCGTATCC -TGTAAGTCCGCCCTCCATATCACTCCCCATTGAACGCCGAAAGAGGAGAAAGAAAACATG -CCCTGTCATTCCCCACTGTGAATAATATTACACAGAAAATGTGAGGAATCGTTACACTAT -TAGTAGCCGTTACTTTGACCACGTCCGGAGTAGTGTGCGGTGCCGTGACGGAAGTACGGG -TGGAGGCAGGCCTGCTTGGCCGACAGTCGGTGCGCAGGGTCAAACTCGAGCAGAGACTCC -AAGAGCTGACATCCGGCCTCTTCCAGCCCGGGAACGATCTCAACATCAGGGCGCTTCCAT -TTGGGGAAAGTGGCTTTATAATCGGGGAACGAGGTCACACCGGGCCAGATGTCTTCGCCG -GGGGTACCAAGGACACTGTAATTCGTTAATACAGTTCGATATAGCATAGAGAGACACCAC -TCTTACCGGAAGATCTTGAAGATCTCGTCAATTTCAGAGTCACCGGGGAAGAGAGGCTTG -CGTGTACACATCTCTGCGAAAATGGCGCCCACCGACCACATATCCACACCTGTGGAGTAC -TGACGTCCACCCAGCAGAATCTCCGGAGAACGGTACCACAATGTGACAACCTTTAATTCG -AGTCAGTAAATAGATAATCCCACTAATGTAGTCGAAAATGCAATGCATACCTCGTGAGTG -TAGGTGCGAAGGGGAACACCGAACGCTCTGGCAAGACCGAAATCGGCCAACTTCAGGTTG -CCTTCACGGTTAATCAACAAGTTTTGGGGCTTCAAGTCACGGTGCAAAATTCGGTGGCTG -TGGCAGTATCGGATGCCCTCAACCAGCTGGGCCATGAATTTCTTGACCATTGCGTCTCCG -AGGCCCAGGGTCACACCACCCAATCCATCGGGCAAGGGCTTCCCTCGCCCGCCGTCACTA -ACAGGCAGAGCCTCCATGTATTTTTTAAGATCGCAGTCGAGGAATTCGAAGACAAGGTAG -AGCTTGTGACCATCGGCATGGACAATGTTAAACAGCTGGACAATGTTGGGGTCCTTCATC -TCCTTGAGGAGGGAAATTTCGCGGATTGCAGTGCTGGGGACGCCCTCATCTTCGGCTTCG -AGTCGGATCTTCTTCAATGCAACGATACGATTGGGGTGACTAAGTTCACGGGCCTTGTAG -ACCACACCGTAGGTGCCTATGGGGGGTCGACGCGTGAGTTAGGCTGTGGGTGACAACAGT -CGCGTCAAGGGTACATACCTTCGCCAATTTTCTCAATTTTCTGGTAGTTTTCCATGGTGG -CCGACATCCAATCCTTTACTGCTATTGATGAGATTAACAATTGCAAACTAGGAAGGCCGG -AATGGGAATTTGGCAAAGATTGGAGGGATGTTGCCAAGAAGAATTTGTAGAAGCAAGGAG -CGGGGCTAGTTACTACTTACTTTGGGGATGTTGATCTCTGAGAACAGCTGAAGCAGCACG -CTCTCTTAATTTCACCGGGAGATCGAAAGAAAGTAATTCGTTGAAGAGATAATTGACAAG -ACAGTGTTTCAATGGTGGGGAGACAAAGACACAAAACAAACAACCCCCGCGCCTATCATA -GGGCGGTGAAGCGCGTACCTCTTCCTTCCTGTTCAAGTTTCTCTCTGACTTCATCTCTTC -GCAATGGCTCAAATCAAGGGTCTAAGTGCTTATGATCGCCCTCCTGATCCAGTGAGACTT -TGCTACAAGAAATATTCCAAGATTGCCTTGTCCGAAATTGATAATGATCCGGGAATCCTT -GACCTACAACGGATCGATCCCGACCAACTGCCCGACGGAGTGACGATTGAGCAGTATATG -TCCAGCCAAGACTTGCGATTGGCATTTGATGATTTCTACCGCGGAGGCCATGCAGCAGCA -GCAGAAGATGCTCCCCTGACCGAGGATATTCCAGTATTCGCTCACAAATCGATTTCCGGT -TAGCATTGTATCGCCTCAATTAGGAACTGACCCAGCAGGACCAAGAGCCATGCCTGACCT -CCTGACAGGTCTCCTGATGATTCCCGCCTTATTTCCTCCTACTATCCAGATCGAATTGCT -CTCTCGACTATTCCATCGAGACTTACCGAACCCCGAGCATCAGACAAACCTACACCTGCA -CTACGATGTGACGTACCCCGAAGAATCAGAAGACACCCACATGCCCAAGTCTTTCTTTGC -AGATGATCCCGCTCGTATCTTCCAGCCCAAGGATCCTCAAGTGCACAAGCCTTTAACAGT -TCAAAGCCTCATGGAGAAAAAGCTGCGCTGGGTGACATTGGGTGGACAATACGACTGGAC -CGCCAAAGTATACCCCTCTGGAACCCCGCCAGAGTTCCCGCCAGATATTGCGAAGGTGCT -ACGTGCGGCCTTTCCAGAAACTTCAGCTCAGGCTGCCATTTTGAACCTCTATTCCGCCGG -GGATACACTGAGCGTGCATCGGGATGTGAGTGAGGAGTGCGACGTAGGTTTGATCAGCGT -GAGTTTTGGATGTGATGGATTGTTCTTGGCCAGTCACGACGATGGGAATGGTTGCGAAAT -CATTCGGCTTCGCTCAGGCGATACTGTCTACATGAACGGTAAATCGCGCTTCGCCTGGCA -CGGAGTCCCGAAGATCTTGCCCTCTACCTGCCCAAAATGGCTGGCCAACTGGCCATGCTC -TGAAGAGTCTGCTCCGGGGATGCCCCCTGGTCCATACGAGATGTGGAAGGGTTGGATGTC -CAGCAAACGGGTGAACTTGAACGTTCGACAAATGACGACTTTGGCGTCTGAAAAGAGAGT -AGATCACATTTGAAAATGGGATGTCTAGATGTCAGTAACGTCCTCAGATTCTTTGGTCCA -GACATGGACAACATGCTATCAAGCTCTACTCAACAACAGAGACAGTCGTGCTCTTCTAAT -GAGTTTGGTCAAATCTTGCAATCAATATCCAAAAGTGTTGCTTCCATTGACAAGCCGCTC -TGTCTTTTCATCCGATAACAATGCGGATTACATTTCCGCGAAGTCCACCGGTGGAGAGAT -CCATCATCCCTTAGACGAATCAATCTTCTATAGAACAAATAATATGGCGATGTGTCTGGA -GTACTCCGGATGAAGTGCCGAAGACGCGTCAACACGCCGAGTGTCTCACCGATAACCCCG -CACTGGTGGATAAATATTCTCTTTGCGAAAAGTCAGGCAATCTCTGGAATTAATCCAACT -CCAATTCACAATGAGTGATACATCCTGGGAATTGAAAGGCGAAGCCAAACGCCTGGCAAT -TCTAAATGCCATCCCCGAGAAATGGCGGCTGAAATCTCCTGTACCCCCAGCCACAGAACT -GCGTGATGTCACCGGGGATTATATACAACAATATCTCACCGAGCGTGAAATTGAAATCAC -AGAGACAGATGCCGTGGACATTGTGAATGAGACATCCATAGGCCGCTGGTCAGCCGTGGA -AGTGACAGAAGCCTTCTGTCACCGGGCCGCACTGGCTCACCAACTCGTACGTTTATATAA -CAATGCAAAAGAATATAACATGAAATACCTAGATAAAAGCTTATCATGACTCAAACTCTA -GGTCGGCTGTCTCCATGAAGTATTTTTCGAAGCAGCAATTGAAGATGCCAAACAACAGGA -CGAATACTTCGCAAAACACAACACCCCAATTGGACCTCTGCACGGCCTACCAGTGAGTCT -CAAAGATCAGTTTCACGTGAAAGGCGTTGAAACCACAATGGGCTATGTAGGCTGGATAAA -CACGTTCCAAGGCCAACCGAACGACCCGCGCAGCGGCACAGAAGAAAGCGAACTCGTCCG -CGAACTGCGCAATCTCGGCGCAGTCCTTTACTGTAAGACCAGTGTACCAGCCACCTTGAT -GTCCGGAGAAACCATCAATAACATCATCGGGTACACCTGGAACCCCAAGAACCGGCTGCT -ATCCTGCGGTGGCAGTTCTGGCGGCGAGGGCGCCTTGATGGCGCTCCGCGGATCGCCTGC -TGGTTTTGGCACTGATATTGGAGGTAGTGTGCGCATTCCTGCTGGGTTTAATTTCCTCTA -TGGACTTCGCCCTTCGGCGGGCAGGATTCCTTATCAGGGTGCTGCAAACTCGATAGATGG -CCAGGGCTCCATCTTGTCTGTCATCGGGCCGATTGCGCCAACTGCGAGATCTTTGACGCT -CTTGTTCAAGGCTGTTCTGAGTGAGAAGCCTTGGCTGTATGATCCTCTTGCGTTGGAGCT -GCCATGGCGAGATGAGGTTGTTCAGGAGACCAGGGCTTTGATTGAGAAGGCTCGGGGCGG -GGCATCAACGCTTGCATTCGGGATTATGAAATATGATGGTGTGGCTCCTGTTCATCCGCC -TATTGCGCGGGGGCTCAGGATTGTCGAGAAAACGCTCCGGCGGTTGGGTCATCGGGTCAT -CGAGTGGAAACCTCCTGCCCACGCTATTGCCGTTGAGCTAGTTGTATGTCACTTTGTCTT -TGCGCCTACATGGGATTAAGCACAATACTAATTTGTTGCAGGGTAAAGTGTTCGACATGG -ACGGTGGCGCAGACATGAAGTATCATTTCAGTCTATCTGGAGAATCTCCAGCTCCGCAAG -TAATCAGTGCCGACAATGGTACTGAAATGACAGCATCTGAGATTGCCACGCTCAATGTTG -CAAAGCGTCAGTATCAGAAACAATACATGGACTACTGGCATAGTACAGCGGAGGTGACAG -GCACAGGGCGTCCTGTTGATGGGCTCTTTTGTCCATTGGCGCCTCATGCCGCAGTCATTC -CCAGCGAATACAATTATGTAGGATATACTGGATTTGTGAATCTGCTCGACTATACTAGTC -TTGCGGTTCCAGTGACATTTGCAAATAAGAAGGTCGATGTGAAATCGGCTGATGGGTCTG -TAATTGACTCCGAGAACATTCAGTGGGACTGTGAGTTTCCCCGTGCCTTCCTCCTACGAT -TCCATACTGATCGCCGTGTAGATGATGCCGATACTTACGATGGAGCCCCGGTGGGAGTGC -AATTCGTAGGCAGGAGGCTACAGGAAGAAAAGATGTTGACTCTAGCTGAATATCTGGGCG -AAGAGATTGCTCTAGATGCGAAGGAGAGAGACTAGTTGCAAGTGCACCAGGCCGTATCTA -GTGTGTTCTTTTTTGCCAACATGAGGTCATTGCGACAGAGCAACTAGGCATGTATTACGA -TTAAGATGAATCATTAGAGTCTCGTCTGATCATGATATTCTCTAATAAGAATTTATCCAG -AAAAAGACGAAGCAGACGGAGCAGATTAAATCACCTACTGTCCGAAGTCAATGCAAAATA -TTAATAGCCCCCAAGCGCCTTGTCCGATGCATTTTTACATATGATGTACTCCAAACCCAA -TCAATTCCTAGACGACGGAGGCGTTTGTCCCGACGGTGCAGCAAGATTGGATAAATGGGG -CGTGGAAGAACCCCAAGGTTTACCAGCTGCAGCAGGCGGAGCATGAGAAGACTCGGACGA -AGGACCAGACGGGTCTTCTGAGCCATTCATGAGGCGCTGTCGTTTAGCTGGAGGCAGCGC -ACCCGCCTGGGCCAGACCGCTGGGAAAGGCGCTGGGCGGGCTGCGAGGATCTGCATAGGA -ATGGGGTGAAGCGTAGTGGTTAGATGAAGCAGGGGTGGAGACAGCTGCAGCTTCTGGACT -CAGAGCCGTAGCAGACTGCAAGGGACGCAGGCGATTTGGATGAGAGGTGGTGTCTTCGCC -TGGGCGTGGAGGCCGGACATCAGGGCTTTGCTGGGACTGACTTTGTTCTGGATCCCCTAG -CTGAGGGGCGTTTCCGTAGCCAACCCCAATCCGGCCCATGCCTTTATCGTGCATAACGAC -ACGTTTCCATTTAGCCACCACGCTGTCGTAGTCCCATCGTGTAGAGTCGGCGCTAATGTG -ACTGTCTTGTGCGTCCAATTCCCGCGCTACCTCCCGTGGTAGGGGGAGAGTATCGGGGGG -AAACAGTGCAAACTCACGGGGCCACTGGCATGATCCGAGACGCTCAACCTGTGCGCGCAG -CTGATTTGCTTCCTGTTGCACTCGAGCGAGTTGCTCGTCGGAAGACTTCCGCTTCTCTAC -TTCCCGCGCAAATGCGCGTGTGATCTCCAATTGCCATGCGTCGCGACGCGTCTCGTGGAG -TAGAGAATTCCAATGATTGAACGCCAGTTCTAAGTGTCGGAAACTGATATCTTCGTGTTG -TTGTGCGCTCCGGTATATCATGTTCGGATCCACCCCGCTATTTCCATGCCTTGTGTGAGC -CGCGCCAGTAGAGTCTGCTGCGAGTTGGTCGGTCTGCCATTGTGCGACTTTGGAGCGAAG -GGCTTGGCGGATTATATCAAGGTGCTCAACGCCGGGAGGCTGAAGAGGAAACGAGGTAGA -GGAGGGAAACGGATGTGTCGCAAACAGGCTTGGTGGAGGAGGAAGCAGCTGCAGACACAT -GGAGGGGAAAGTCTGAGCTTGCATGTCGAAGTCGAAAGGAGACAGCCGAGGTCGGGTACG -AGAGCTAATGCTTCGTGAGCATGGCGGTTAGATATAACGATGAATGAGCCAACATACCTC -GCAGCTTTGATATTATCAAGGACCTCGCGTAGTGCTAGCTCTAGCGCCCGCATTGTATCC -GGGTTACTGGAAGTAGCACCTCGACTCGCATAGTCCGGCAGTTGTAATGACCCTGTCCGA -GACTGCGATGGCGCAATGCGAGGCCCATCCGGTACGCGACTGGGATTGGGGATATCATTG -ATCACCCCAGTCGCGTGCCAGGTGGGCGTGTTAAACATCATGCGCACCGGAGCCTCGCGC -GACTTCGATCCCGGGTCTGCAGCTGTGAAAGGCTCTGATGTACCCTTTTTCTGGCCTCGT -TCAGGACTGCCCTCGTTCTTCCCTGTGGATGTTCGGGCTTGGCGCCGGGTGATTCCACTG -CGAATTCGTCTGATCTCTTCGACATCGTGGATTCCGTCGGGTTTCTTCTTAAAAAGGAAT -TGGTCGAGGTGTCGACCTAGTGATGAGGAGGTAAACGCCTGTTGGCAGTACTGGCATTTG -CGATCCTTTGCAGCAAGGCTGGCCTTCGACTCGTCACCGGCAGTCGAGCCTTCGGGGGTA -TCTCTGGGTTCTGTTTCTTCCTTCATGATTGGTGAAGTGGAAGAAGGAGGTGGATCGAAT -AAATGAAGTGGAATTAAAAAGTCATGGCATAGGCCGGAAGCAACAAGTTGGTATAGGCAA -AATTCAACGCGCCAGATGAGAAGGAGGACGGAAAAATGCCAAGCTTTTGGGCGCGGAGCT -GCGCGATGAGCAATTTTGTTGGCCAAGGCACATGGAGATTTATTTCAGCAATCATATGTC -GGACTTATTGATCTCTAATGAATTTTATACGTGTAACTGTTGGAATGCTACCTACGCTAG -TAAAAGGTAGTCTCTGCAATGTAACTCAGGCCCTCAACCAATGGATTAAGCAGAATCGAT -GTGACTGCGTATGTAGACTATGTACTTCGGCAAATCAAAAGCAAGAAAATGGTCTATTCT -TCCGGAGAAAACAGCAATGATTATGTATGTATTATGAGTATAGAGCCGCAACATCCTCTA -CCTTGTCAAGACTCAAAGATTACGTGGAGTCAATCACATACTCGGAGAGGAAGTATACAC -AGAAGGCTGGAGAGACTTGCACACTTGCACTGCATGGCAAACAAAGTGTAACCACGCTCA -AGAACGGTCCTTGCCCGTTGTCGTCTTACGTGCACTGAGCCAGCGTTTCATGTTGCGAAC -GCCGTGCAAGTATAACCACGAGAAGGCGAAAACAAGAGCAAAATTGAAGATTATATATCC -CCAGAGGATTCCCTAACAGATGTTAGATTAACCTGAGCCCCAAGGCTGGCCAAAGGCTGG -CCAAAGGCGCACAAAACACTGAACTCACATAATCTCTCCACTTGTGCGAGTAAAAAACAT -TGATGCTCTTAGCCTAGAAAGTGTAACGAAGTCCTAAAGTCAGCCATACTCCAAGTCTAC -TAATCGAGGTATCCAAGAAAAGCCACTCATGATAGTGCGCGACTTACATATTGACCACCG -TCCGAGTATTGACAGTATGCGCACATTCCATTCGCAGCGTCTCGGACATATCCACCGGCT -TTCGCGACATAAGACCCAGCATATTGTTGGCAGGTCATGCCAGGTGGAGGACTAAAGTGA -GATTCTTCACGCTCAATGCAGCGCACAGGAACATCGTGGGTGATGGTTCCAATAAGCCCC -TCTAGCAGATATTGGAAAGGAGTGAGCCAGTACATCCATGACTGCCAAAAATGCGGCATT -GCGGCATAAGGAACGACAACACCGCAGAATGAAACAATAAAGGTGAAGAAAGTCGGCACA -AGCAAGCTTGCAAATAGCTCATTGGGTGCAAGTGCAGCGATGAATTGACCAAGTCCAACA -TAGTATACTTCAAATAGCATCAGACACATCCAAACGTAGCCGGAGATGAACGAGCTGCGC -GGAAACCAAATTCCCCAGTACCTTTTGTAATTCTGTCAGAGATGTAGAATACATTTCAAT -TTAAAAAAAGCTTACCAGCAGTTAAAGTAGATTGACCCGGCAACAATTGAGTAGGGTAGT -TCTGGGAGAATTGTGCTCGTCACAAAGGCAGGCCAAGAGTAAATCTTGGAGTTAGCTTCT -CGAGACTCATAGAGGCCCCGGAAATGAAGATACCGCGGTTGGAGTTGCTGAATCAGTGGT -GGAGATATCGTCAATGTCATGAAGACAGAGAAGAGTCGTGACTGCATGTCGATGTTACCA -TGTCCCAGATGCCAGAATGTGAAAGTGTTGAAAAGACCGGTGAATATATGAAGCATGAAT -TTACCCTGAATCATGGTCAGTCCAGTCATTCAAGTTCAGTATTTGCAGGGGAGCGCCTTA -CAATGATGTAGTCTGGGACCCTCCAGTAGGCCACAAATGCGCGCTTGGTGACTGCCACTA -TTTGTACAGAAAGAGGCATTGCATATTCACGGTCGTCCTTGATATTGCCGTCGGTTTGAG -CATTGCGACGAGAGGCTATGAGATCCTCTAGATCGGCAGAAAGCTGCTTGGATTCTGGCG -AGTTGGCCCACACGTCACCCCAGTCTTGCCCCTTGTAATCCGGGTTTCCGGCTCCAATGA -CCTCTAACATATACTAGAAAAAATGTGAGCAAGGAACAAGGGCTCCGGCTTTGATACACG -CACCTCAGCCGGATTTTCATTTGGTGAGCACTTCTTCCCGCCATTTCGCTCAAAGTAATC -AATCAGCGTCTTGGAGTCGTTCCCAAGTGGCCCATTATAAACAATGCTCCCGCCGCTTTT -CAACAACAAAAGCTCATCGAAATTCTCGAACAACACTGCAGAAGGTTGATGAATTGTACA -TAGTACGGCCTGACCGGCATCTGCAAGTCGGCGAAGGAAGCGTACAATGTTGAATGCTGC -CAGAGAATCGAGACCAGACGTCGGCTCGTCAAGAAAAAGTAACAATTCCGGCTTACTTGC -CAACTCAACCGCAATGGTAAGCAGTTTGCGCTCCTCCTGGTTGAGACCTGAGCCCGCGGA -TCCAACAGTTGCGCCAGCGATGGACCGCATTTCTAACAAGTCAATGATCGTCTCACAATA -ATCATATTTTTCTCGTAATGGGATTTCTTTTGGTTGCCGTAACAGGGCAGAGAATCGAAG -AGACTCGCGTACAGTAGCAGTGGGTTCATGGATGTCCATCTGTTCAGCGAATCCAGTCGC -TCTCTGGAAGCTCTTTGGGAGAGGACTAAAAGTATTAGGCTGCATAATTGATCACATACG -TTAAGGAACTCCTACCTCCCATCAACTAAGAAGTTTCCCGTTACCACACCAAAGTTGATT -CGCTGTGCTAGCGCGTTGAGCAAAGTGGTTTTCCTGTCCAGAAATTAGTATCAAGAACAG -AATCAACATTGTCTTTTGATCTTACCCTGCGCCAGAAGCACCCATCAATGCAGTTAGCCG -ACCAGGCTTAACATATCCTTGAACATTCTGCAATAGTTGTCTCTGTCCACCCTTGTACGG -AATGGTGTAGTTGACGTCTTGCCATGTAAAGATTGCTGTGTTTTTAGCAACATTCTGAGT -TTTGTCAGTGGATTCAGTCGAGTGGCCATCGCTTTTGTTGGAAGCCATGCCGTCTTTCTC -CGCAGATTCCTCATCTTGTGGGGGGACGGATCCTTTGATGGCATCCTGAACTCCTTTGGG -GGCTTCACTTCTTTTGAACACGGTAACCGAGCTGCCTCCCTTATTAGGCCTCTGCAACTC -CATTCCTATCATTGTCAAACAGACAAAAAATATCAGCCATCCGATGATAATACCGATATT -TCGCCATAGGTGAGCTCTGGTGTAAGTGTATGCTGTTTTGATATACTTAGAACCATGAAC -AACCAACTGGTCAGGTTCACTGCCTTGGATTGCACAGGTTTGGTGACCAGGAACCGCATT -TGGCCCATCAGGCACAATCCAGGGTGGTTCGCATTTTATCTGGAGATTGTAGAACTCATT -GGCCATCAACGCCTCAAATGCGTATTGGACAGGATTGATCCAAATGAGCCACTTTAGCCA -CGGGTGCATTTTCCATGGAGGAATTAGGTAGCCTGTATTATGTTGGCCTTTCTGTTCGAG -TATTAGGTTTCATTGGACTGAGCTCACCTGTATACACAACCAATGCCTGGATAGCGACAC -CAGTGAGACGTGTGGCTATATAGTGTTATTGACGACTTTCCGATAGCGGGAAGCGTTTCA -CTTACCAACATCCAATGAGGCACATAGAGCCCCAAGGGCTCGAAAGAACGAGTACATGGT -CATTGTGAGAACGAAAATGATAAGTAGATTGATGAAGAACTGTGAAGCTGTTCGTGCCAG -ATTTGCCATGCTACGTGACTAGTCAGTTAAATCCATTCAAAGCATCTGACAGGACTCACA -AGTATACTACGATATCAAATATAACGACTTGTATGAACACAAGAGGCACATCAACCACAA -CCTGGGCAAGTGCATATGCCGAAGGGCGATAGAAAGAGCTAGAGAAAGTTAATCCTGACA -TTTTTCCCTTGTTTATTTGAAGTCCTCACAAGCTTTTATGTTTCAGCAAGATTGGTCGAC -TCTCAAATGCTGCGGTCAGCTCTGCCATAGCAAGTAATGCATTGAAAAGCAGTATGAAGA -ACATCACACCACCCCTCGTAAACACACCATTGCTTGAGAAGGTGAGCTATACGGGGTCTC -GCCATGTTGGGGAGAGTAGATTGCTTACCTTGTATCTGGGAGATTGTAAAACAAGCTACC -TACAATGAGGGCTTGGAAGGTTATTATACTCCACTTTCCAATCAGTGACTTTCTGTCACC -AACCATCACCAAAAGCTGACGGTGCGTGAGAATCATCACCTGTTTGTAGAAAGAGACTGT -GAAGTTCTTCCGCTTGGCGCTACTTCTTGCAGCCTCCCTGTCCTGCCTTTCAGCTTCGAC -ATCGCGCTCCAGGCTTTCAATATCTGCTATGTTGCGTTTGTAGATATCACTTTTCCGATA -TGCAACTTGAAACTCAGCTGCATTCCGAGGGATTCGGTGGTCCCAGCCGTCCTTGACACG -TCTTGCATGAGGGTCACCGACAGAAGTCAAAAAGTCGGGGGTTGTCCAACGTGGGGGGCA -TTCAAAGCCCAGCCGCTCGAAATATGCCTTTGCATCCTGACTAGATCCAAAGAAACAGCA -TTTTCCGTCGTCAATCAGAATCACCTTGTCAAAAAGATCAAAGAGATTCTCTGAGGCTTG -ATAGAGAGCTACCACCGTTGAAACGTTGGCGGTGTTTGTTAAACTTCGTAAACTCTGGAC -ATACTCCAGTGCTGTACTGGCGTCCAAGCCCCTGGTGGAGTTGTCCCAGCACTGTGTGCT -AGCCTTCGTGACCATGGCTTCAGCAATCGAGGTTCGCTTCTTCTCGCCCCCGGAAACTCC -CCGAATGAGTTCGTTCCCAACTTTGGTTTCCAGAGCATGTTCAATCCAAAATAACTTAGC -TATGGCCGTCAAAAAGGTCTGCTGATATTCTTTCCTGGTTTCTCCCGGGATGCGTGAGGC -TTTTTTGGGAGTTCGGGTTTTCAGGGCAAAGAGTAGTGTATCACGCACAGACAAAGTTGC -GTAGTGGAGATCATCCTCGGGGTTATACAAGACTGATAGTTTTAGCTTCAAATTATAACC -GCATGAACAGCTACATACCTTCAGACCTAAAATGCTTTTTCATCGTTTCTGAGTCAGTGC -CATCATACCGTACGTCGCCTTCGATACTTTCGTAGCCCGCTCGTTGATTACCGAGCACTT -TCAAGAAAGTAGAACACCCCGACCCAGGGCGTCCGAGAACAAGAAGCATCTCCCCTGGGC -GAACACATCCCTTGATGGAAGGTTAGTCAAGCCGACTTTTTGTGCGACGAGCACCAGACT -TACTGTAAAATCATCTAAAATTGTCCGTACTGGTGGTTTTCCGGTTCCAGCACCTTTCTT -GCCCCGAGTGAAGAGTTGCTTGATGAGCCGTGGCAACCCGAGAAAGAAGTCCCCATTTGT -AGGTTGGATAGCGGCACCAAGGCCAACACCCTTCACGGTGAGGTGCTTCCACACAACTCC -CACATGACGTGTTTTTTCTTCTTCGGAATTTGCTTTACGCTCAGCACCGAACATCCGTGA -GACCAGCTTTTCTATCTGGGCCCAGTCTTCTCCTTTGAGATCATGACCGGAGGCATAGCT -ACGTCGACGCGATATAGTACGCGCGACACTATCCTCGTCTAGCTTTGACCCTGTGGAATC -TGCCTGCGAGTCTGATGGAGTGGTACGAATTGGTGCGAACTCCTCGGGTGGACACGAATC -GGCTGGCGAAGATGAGTTCTCAGAGAAAGAAACGGAATCGCGAGAAATATGTTCTTGATT -TGCAGCTAACGTCGACTCGCTTTCCGCAGAATCACCGTTTGCCATCGTCCCTATTCTTAG -GTTGGGTTGAGAGCTTTCAGAATGCAGACGCTCAACCTAAATATCTCCGGGACTGTGACA -CGCGATTATGAGGCAAGGTGATACCGGATGACCATGGGTGCATATTCAGCTGAGCGCGAT -ACGGTTATCGACAAGGAATACAACCAAAATTACATGTGGATATGAATCAAAAGGGAATGG -AAATGAAAGCAGAACTCAAGAATGTAGCCATTACGTCAGGCATCAGTGCGGAAACAAGAT -CTCAAGGTCCGGAAATAAGCCGAGAAGATAATCTTGGCCTCTTTTTTGCTTGTTTTGTTA -GTTGCAGCGGCAGGCAAGGAGGTATACCCCTGTCTAGACCGTCTCTGTTATGCCGTGCGA -TCCTTATTTTTCTGGTCGTATTCGAAGTAATGACGCTTTGTGTTATACTAAACGTAAACG -ATTGTAGGGATTTTTTTTTCTGCATCTGATGAAATCCCAGTCATTTGATCCACGCGGTGC -ATATAGATATCTCCGCATTTGAACTTCTCAATCCCGGATTAGGAAGTCATCAAATCCACT -GCGTGATTTTCGTATAATTTTCAAAAGACTGTAAATATTATGACGCCTATTTCGGGTTCC -TAATGCAGGGCATGATATTCGATTGGGCAGTGCTGTTGCTATTACCTTCTGGGCTACCTA -TATAGGAAAATAAAAACATCCACGGAACTGACTCCCGTTAACATTTACCAAGTCGTTTAC -TACGGCACGACAAAACTTTAGTTGTTTCAGTTTACCACAAAAAAGGGATGAGAGTAATTT -GGCTATCATGATCTCGTCATCGTGCGAAATGGTAGTTGATACTGTATCAGACAGATATTG -ACCACGATTCTCAGTCTAGGGTTAGATAAACTACTTGTTGGTGAAATACTCACGGAATGT -TTTGGTGGTTATCTTACAGGCTAGGAATAGCAAAATATTTAGTCAGCTCTTACAGGTACC -TGGAGAGCTCATGTAGGTTCACCGAGTTGAACAGTAAGGAAGTATGAACGGATTATCATT -CCCTGATCAGCAATGGAATAGCACGCCATGATTACAACATATTACTTTCAGGCTCAGCAG -TGATAAAGCTTGAGATTCCAGTCTAAGATTGTAGCAGATGTACTTCTCCGCCTACAGAAC -CCTTCGGCTCACTGTGCAGTACACCTGTTGCGACCCTAATAGCAATGAAGCCCCATCGCG -CTCACAGATTCCTCCAGAAGGTATTGGAATTCTTCCATTTATCTTCCCCTTCCCCCAAGC -GTGCAGTCAAATCTTGCAACAAGTAGGTCTCCAACTTTCTGTAGTCTTCATCATGAAAAA -TTGACATGTGCTTCAGTGCTAAGCTGAGGTGTGCTGCTACCACCACACCTCCTCTTCACG -GCATTTGCGCTGTCCGAGGATGCAATCAGACTGAATTGAATGATCAAGTTGATGATCCTC -CTCAGTATGCAGCCTTTGGGGGTGCAGCTCTTGAAAAGCCGGTCTCAACGCGTAGCCAAC -TGGAGAGGCGCATCGTGAAACTCTGGAAGAAAGTCTGGTTGGGTCTTTACCAGATTGATG -ACAACGAGGATCGATATCACGTCGAGCGACTCGAGGCCCTTTTTCTTGGAGCCGGCTTGG -CTCGCACATATCGAGTAGGCGAGCTTCAAGTTCTCCGGCGTCATTTTCTTCAAAGCGAGA -CCAACAATGGCGTTGCTTCGCTGAAGCTTTTCCAGGGGCTGTCTCTTTCGACCCTCTTAT -CTGTCATACATATTTTGGAGGCTTATGAAAATGAAATTGACAACATGAGGTTGCTCTATT -CCATCTTGCGAAGTGGTCAGCTTTCACTGCATCAGAATGCAGACCTGGCTCAAACTGCTG -TTGTCAGAACTGAGTATATGGCAATTGCGGAACCGATGGAGAAGCGAATCCGACAGTACG -ATATTCGAATGAATGTTGTGAATATGGAAATCTGTGATGAGGCAATGAATTTCTATCAGC -GACAAAAGGGGGAATATATCTACCGCAAGGCATTCCCAAATTCGTGGTAGAGAACTTTTT -GGGTTCCTCTGTGCCCTCTTTCACTATGACTCGTATTGAGGTTTGAGTACAATCTGTATG -ATATGGATTAAATGACAGGCTATTCTCTTCTCTAAGTCATAGCTACCTTCATCATATCGA -ACAATCGCCAAGTTCTACAAAGCAAAGATCCATGATAATAGCAATACTTACCCGTGCTTG -GGCGAGATATGATGGGAAATGGTTTCTGCGTTTTTATAAGTGCTCAGAAACCTTGGTGCT -AGTATTAGAGGTTGTAGAGCTACTTGATCTGACTTCGGCGACAGGACAGACCTTCTTTCA -GTCCTCACCAGTGTCTTTCGCTGCCAATATTATATAGAAGTTCAACTGCCACTGCCATTC -AAGCCCCAATACCTTTTAAGTATACGAGGCTCATTATAAAAATAGCATAAGCAATATTGT -CTATATCTGTCGCCATTTCTGACCAAATATTCAACTGAGCGGTCCTGGACTAGAAACCAA -TTCTCCTCGAAGGCGAATTTGAATCACCCAGCCATCTTTTTCTCTTTGCCCCTGATCTCT -TTTTGTACATACTCATATCTATAATTTTTTTTTTTTAGGTCTGAACGAGTTTTATGAAAT -TTAGACCGAATCATTAACCAAGTTTGAACTAATGCATTGAGACCTGGAAGATATAGAGGG -TACAGATTACATATCTCAACAAGCCTTGAGCCCATCCGAATCTCGAATTGCTACATCCAC -TATCAGGGTGCGAGTAAGGAAAGGTCTAACTCCAATATCCTTGTTGATTTACTTGACTTC -AAAGATATAATGATCTCACAGCTCAAACCTGACGTCGATCTACCCTAACAGTTCCCTTTA -CTTTGACACTGTCCGTCTCACCCTCGCCCTTTTTGCTCTTGCCAGGATCGTTCACAAACG -GGTTCATAGTATCCCCGTTGTCGAGGCCTTCCTGTTTGAGTGAAATGCTCCCAGGTCCAT -CAGGTGGGCCCATACCTCGAGGCTATGTGAAAGTTAGACTTATATTCTCTTTCCTTCTTG -GTTCAATAACGATAATGCGAGCCAGGCTGCGAAGGGAGGAGACAAGGAAGAACAAACCTC -ATCGCGGTCGAAAGTTTCTGAGTTACCATTTTGTCCAATCATTTCATTACCCTCTGCTCC -TTGAATGTACTCTTGATTTGCTCCGCGTGGCTCTCTAGCCGGGGCCATGGTGCTAGGCTT -CTTTACATCGCCATTTTGCCACAGGTAGAATGTGATTGGTAGCCCTAGACCAATTGATAC -AAGTAACCTATCGAAAGCAATTCAGTGTAAGTCAGATGCGACCAGTGCAATATATATCAA -GGAGACTAACCACCGGGGGCTTTTTGCCTGGGGACTCGATCTTTCGGTGGCGAAGGACCT -TTGGCTAGAAGTCGGTCGAGATGACCGAAGTCCAACAGATGGGTAAAAATGCTGTCTCAC -TGCTTTACGTGGAACTGAATGGCGGAAAGAATTCATGCTTATGGGTTAATGAAACCAGGA -GATAGTCAATAAGACTATAGAATCAGATTAGGATTGAGAAGGGAGGACTTTTGGATTTAG -TGGACTTGGGGAGTATATCAAGTCCAAAGCTTCGGGGTGCTTTCATTGTTACATCATTCC -ACTTCTCGGAGTTGTTCCAAACAATCTTGAAAGTGTACCAAACTTACTAATATGCTTAAA -TTAGCATTTGAAACCTAATTATACGCCATAGCTTCATGACATCTCCGGAGAAACAGCGGA -TCTAAAGCGTTATAGCTAGCGATCTATCAGATCTATGGTTTTATCGATAAATGCATCCTT -TGATCTTCCCTGTGGATTGATTAGTGGATATGTTGCTTTGCTGATAGTGAATTCATACCT -GGAGGATCTAAAGGGGGTTTGTATCATAGCAAGTAAGCCATGAAACTATATGTCAGATTG -CATTCTCTCTGCTATTTGGATTCCCTGCCTTCTAGAAAGGGCTCAAAAAAGCACAGTGTA -AGGAATTTGAGTTGCAAGGAATTTGAGTGAAAGAATAGAGATGCATTTACGGTATGTATT -ACTATCTCGTCAGTATGACATCTGTTTATGGAAATCAAGGATTTCTAAAGGATGTCTATT -ATGAAAATTCCAACAACAGTAGTTTACATTTGAGATACGCTGGCCACTGCTCCAAGCATT -TACCCACTTGACCGAAGGTCATAACAGAGTCAAGTCATGGAATTGACATAGTTGTAGTGG -TATATATGTACGTAATCCAGGGAATCACACACCCATTAACTTCTGTGAGGCAAAAAAGGG -CATTGTACGGTTTCCCACACTATTCTTTAAATAACATGAGAGAAACTATTATACACTGAT -CATTCTCACAAACCGATCACCAAGGGCGTTCGCAGCCAAAGGATTACCAGCAGAGACCAG -TTCCCGATGCAAGGTGGTGCCACCAGTCTTTTCACGTACACCTTCAACCATTATTGCACC -TTCGTTTCTCAATGTCGACTCAACCTTTTCAATCTCTCCACCCCACAATGTCTCCATCAA -ATTCTCTTCTGCATCGCTCCACGACGTAATTTTGTACCCATTATAAACAAACGACCCATC -TCCCGCCTTCTTCGTACTCAACAGAGCGTACGGCCCATGGCAGATCACAGCAGTGGGCTT -GTTCTCTTCGTGGAAATAACGGAGAATTCGTCCCAGTTCAGCATCTCCGCCTAGATCTTG -AAGCGGTGCGTGTCCGCCGGGGATGAAGACTGCCGCGAAGGTAGTCAGCTCATCGTTGCT -GATGGTGCTGAATGGCCGTGGGCTATTGAACCCATTCTCGCGCTTCATGCGATCGATAAG -ATTATATTCGCGTTGACGCTCGTAGAAATTTCCGGCATTTGCGACGAGTGATTGGCTGCT -CGGGTCTAGGGCAGGTTCGTGACCTTTTGGCGAGGCGAATGTCACCTCGTGGCCTGCGCT -GAGGATTTTGTTCAATGGCTTTGCCAACTCTTGGAGGAAAACTCCTGAAGGCTGATCGAC -TACCTTACCAGCATCGTGTCCGCTGGTCCTTTTCAGCGGGAAGGAGTGCGCGTCGCTGAG -GATGATAAGAACTTTCTTAGAAGGCATTGTGACAGATTGTAGAGTAGATCAAGATTATAA -TTGTCACATTAATCGAAATTAAAATATGCAATCTCCCCGGTGTTTTATGTACTTCAAACA -CAGAATTTGAGCATAATGACATGGAGATGACGTATCAAATTCCTGCTTCTTAGCCACCCA -TCAGGTTTTGTGGTTTTCCGTCAAGTTGACATATTAATTAGTGATTCTCTCTGAATAATA -AATACCAATGGCTGAGAATATCGCGGGAATAGTCCCCCAAAATTGGACAGTATGCCTTTG -GGCGGAAGGAAGGAAGCCTGTTCGGCGGCAATCCAAGCGAAATTAAAGCAGCACCACTCT -AGGGATGTGGATATCGTCCGCGACTCATTGCCTGATCCTCTCAGTACGCATTGAACAAGA -CCGAGACATCAACCTGCCTGAGTGGAGCTTGACGATAGTTATATTGACCAGTTGGATGAC -AGCCCTGGGACTGCGAGCGGCAGTGGAACTTATAATAACAGTGGTAGTCGGGCTCTCAAA -TCAGAAGCCAAAAGCTGCATCGTCAGGATGGATGCTGACATAGTTATCTAGGTATACTAG -GATCAGGATGCTGTCAATATGGATCTAAGATAGCCCTGATGGAGGGCGTAGAAAAGAAAA -ATAGCTGGATTGACGCCCATCTAAAGTTTTTTAACACAAGTCTACGATGTCAATTCAACG -AGCACAATGGACTTGAACTGTCGGCTTTTGTAACGCTTCATAACTCTGAATATTATTTAT -AATGACGAAAAACACATCGTTCTTTCATGTCTTCTCTTCGCTCGGTTCATCCTCTCATCC -CATAAATCATACTCAACCCTTCGCTCGATGTTTCTTGAACTGTCGACTAATTTCGAACAT -GCTTTCCGGCCACGAATCTTCACTAGTCTTCCACAGGGACATATCTAATCGTGTATTTTG -CCGGAAGGAGATAAGTCGTTCAAGTTCCCAAGCTCCAATGCCACCAATCCACCACACCTT -ATCTAGCAACATAACCAGAACAAGCCAATGTGAAAATATGTCCACAGCTAACTGTTGTGC -TAGACTCGCCTCCTTATTCGTGGCATTGAAGGAGGTCATTATCCCGTGGCCGCCTGGGCC -TGGTGCACCGTTCTGCATGAGACCGATTGTGTCTTCAACCAAGTTCAAATACTTTGTAGG -AGCTTCGTGAACGAATGCCATAATAAATCTTCGCGGAAGCCTCGACGGTATCATCGAAGT -TATTCTGGCGGTGTATCTCCGCATCCAGGGCGAAACTTTGGAGACTTGTCCCACTGGATT -GACATCGACTTCGGAAGCGAGATGGAAGTTTCCTGGCATATTTGACTTAGAACTTTCATC -AAGAAATGTCCGGGATACAATCCTTTCCAACAGCTTCAACGCAACAAAACATGCTTGAAC -ATCGGAGTCGGTATATGTGGCCGGTGTCTCCTGATTTCCACACTCATCCGCCTCGGGTCT -GATAATTTCTCTGGCCTTCTTCCCCAGCCTTTCTAGTGCAGATTCCACCGTAGCGGCCAG -AATGGGATACAGAACATGATCCTGAGGAGCATTTTGCTGCCCAACTGGAGTGGACGTCTT -GCATTCCCAATGCATCTGAAAACCATGACTTGCGACAGGATCAATGGGCGAAGCGGCCAG -CGCAAGTTGAACTACATCCTCCGCCTTAAGCCTGTCGTTGAGTAGTCCCGTGTAGGCTAC -GCGAACAGCGCGGAAGAGACTGATCCATTGAAAATTTTTTGGCATAAACCGGGTCTGTTC -AAGGTCGTCGGAAGATGATGTTTCCGCTAGCCAGATTCGGATACAGTGGCTGCCAGATCC -ATACATCCCCATCATAGCCGCATTTGCAAGGATATGGTCGTATTGATCAGTCTCAGATAG -AGCGCCTTGAATTTCCCGCAGTGATTCCATGTGATATTCATCGGCGATAGACAGAAGCAC -TAAGACTTGTTCACGGTCTCTGGAAGATATGGACGAGTGGTTGATAATATCACAACATTT -GCACACTGCCGAGAATGCCAGTACGGATTTCATAAGAACCTTGTTCTGGGAAGCTAGACT -CGGAATTCCGATTTGTAAAGTGTATTGGTCCTCATCGTCAGCGCACTTATCTTTGCTAGT -GTGTTCAAGATAGTGGTTGAATAGTGCAGACTCCGTGGGTGTAAGAGCCCGCAGTATCTG -TGAGTATAAGGGCGAGCTGGGTTGTCTCTGGTGTCTTGGGAGGTTGAGTGGTGATTGTGC -TGATGGAACCTCTGGTGATGACCTTGGACTTCCGACAGCCGCGGATGAGCAACGCGAACT -TTCAGAACTATTCAGCCCGTAGTCTATGGGTGAACTTCCATTTTCCAGACAAGGTCCCGT -TTGATCAAATTTGACCTTAGGGGCCAGTATGTTGGATCTACTTGGATATTGGCAGATCAA -ATTCATCTTAGTGCACCTTGAACACTTTGGCTCAAGCTCGTCGCACTGTGTATATTAGAA -AATCATTCCCATGTTTGTCAATGATATATTATACGCACTTTGATTCTGCGTGCCTTGCAT -TTCTGGCAACCATATCGCGACTTTCTGTGTGGCCTGCGCGTCTTAATATTGGTCCAACTA -CCGTCTTGGTCTAATCTATCCATACGTGACAATCAAAAGGTAAGTGGGAAGGATCAGAGT -AATGAACATCGGAGATTATAAGGATAAGTAAATTGAGGCTCTGATCCAGAAGTGTAAATC -CCATCGCCTAGGTGGTTTCCTAATGTCTTGGAAATGATCGATAATTCCAAATACTTTGCC -CAACGCCGACATCCATTTAGTGTCTGACTCAGCTATATTAGACCCAGGGGCTGAGAATCG -GGCGATCGACTATGTTGTATATGGCAGTTTGGTAGTGTTGAAGTTCATGGTCCACTAGAC -TTAAGTAGAGCTGTCTTGCATGAGAGGTACTTAAGCGCATCAAACATCAAGGATATAAAT -GGCTGAAAGTACGGGGGATTGAGGCTGTGAGCCTCAAAATTATTGAAGCCTTCGCTGCGC -CCCTCAGTGTCAAGACTTGGCATTAAGGGGGTGTAATGGCTCTTTTATGGCAAATGACAC -CCCACTATAATATTTCTGTAGAACATTCAAGGCATCTTCTATTTCACTTTCAAGCAATAA -AAACTTGTCAAAGATTCAACTTCCCGCGTTGAATGCCAAGAGAATAAGCGCCAAGAGAAT -AAGCGTGACGGCTTCATGCAAGTGCGTATTCGTTTAGACTTGGCAGCTGGGTGACAAATA -CTCTTCATGGAGCGATGAATGATCTAGAGCCTATCCCTTAGAAAAGACCGGTGCTAAAGG -ATTGAGAAAGCCTAATCTGGATATTGAGACTCCATATTTTGCGCGCAGCGCGTATGGAAT -ACGCTCTGTGGCATTGCCGCAGTACTGAATACGCTCGTTGATCATGGGCTGGTCTACAGT -AACAACAGATATATAAAGGCCTATATATGCCGAACTAGAGAGACCAGCACTGGCTATCAT -AACCTGATTCACAAATCTACTTTATCAATATCAACCCGGAGAGCCATGAAGTTAAGCTTG -ATCAAGACAGTTCTGTTCAGCCTTGCAGCCAGTTCGTATGGCTGGGAGCATAAAAGCCAT -GAGTTCCGTCCACCATTACCAGGCGACAGTGAGTTATTAATAACAAGTTCTCTTGGCTCC -CTCAAACAGCAATTTAACATATGTCCCAGGTCGATCTCCATGCCCTGGTCTCAATGCCTT -GTCCAACCACGGCTGGCTCCCTCGTTCAGGGAAAAACATCAACCTTCCCACCTTCCGATC -CGCTATAGCTGGTGCCTATAACTATGAGCCCACATCTATGGATGGATTTTTCAATCTAGC -ATTGAACTTCAACCTCTCAACCACCGGGAACCAATCGACCTTCAACCTCTTTGATCTAGC -AAGACACGACGAGATCGAGTTCGATGGCTCGCTATCGCGCAATGACATCTACTTCGGCGA -CAATGTCCACTTTGACCACAACATTTGGGCGGCGGTTGCTAAGAACCTTAACCTCTATGA -GACTCTGGGCTCCGAAATGAATCAATATGTAACCGTGGAGAGCGCGGGTAAAGCCTGTGC -AGCACGCGCGGCCGATGCCAAGAGAGTCAATCCTAGCTTCAATGCTTCCGCCATGCAGGT -TATGGGCAACCCTGGTACGACCGCTCTTTATTTGGTGACGTTGTGGGATGAGAAAGTAGG -TGCCGCACCAAAGTCATGGGTTCGGGCTCTATTTGGTATGATTCCTAGCTATTTCTGTCA -ATATTGACACTATTTTGATACATTGATGCTTACTATCTGATAGAAGAAGACCGTATCCCA -TATCTTGAGGGGTACAAAGTGCCTAAAATCCCAAGGACTATGGACGATGTCAATGAAATG -GCAAGGAGAGTGTCGGCAGCGGGCGTTTAGATGTAGCAAATTCAACGCCGAAATTTTTAC -GATAACTATTGTTGTCGAGATTGGCCGTCAATGATATGTGAAGAAAGGGTACATAGAATG -GTAGTGCAAACAGCCCTTGATGTTTTCTTAGCAGCATAGCAACAATCAATATTCTTGAAG -TCATGTCTACCAACTTGAAGATAGATTTCTAGTGTATGTATATTTATTTTCcaatacaaa -tccaaaacaaaagaaaacaTCAAAAGAGAGAAGATCAATGTGAAGCTTCTAAACACGTTT -CCCTTCAAAACCCTCAGCGGGCCATCCCTGAAGCGTATCCCCGTTCTCAACATGGTCACC -CGACAGGACCAAGGTATGCTCCAAAAGACAAGCATCCTGGCCCATCGAAGCACCGGACAT -CAAGCGCGATCCAGCGCGTAGGATACTCCGGTCCCCAACGTTCAAGGTGTGTAACTCGAA -CCCACCACGCGAGTTTAAGTGGCAGACCAGACTAGCATCGTCGACAGCGACACGGTCACC -AAGGGTGAGAAGGTCGGGCTCGGTGAAGAAGATACTGGGACTACCGTTGGCGTGAATCGC -ACAGTCCTTTCCAATTTTCGCGCCCAAGAGACGGTAGTACCAGGAAATATAGGCAGAGCC -GGAGATCATTGGCAGGAGACCAACTCCGTCATAGCAATCTTTGATCAGCGTCTCGCACGA -AAGGAGGAATTGCCAGCGCTGGTTGTAGCTGCTCTTGTCCCAGTGATAGGCGCCTTCCTT -GCGGCGGCCCACGACCGTCCACTTAATGCAAAGAATGATGCTGAAGGCAAGGAAAACTTG -AGCAGCGGTGACAACCGATAGGATTGAGGCCAGAAGGCCGTACAGGGCAAATGGGCGCCA -GGCGCCTTGCTTAAATGCAGCAACATCACTGTGATCTAGGACAGCACGGAGAGCAAAAAG -AGAGAATAGGACAGTCAGGAGCCAATAGACAGTAGTGACCATGACCGAAAAGGAAGAATA -GATGACAATTTGCCACATGCGGAGGACGTAGTAATTGGCCTCGCGGCGGTAGAAGGCACG -ACCGAAAGGCTTGCAGGTATCTGTAGCCTTTTCCACGATCTCGGTAACGGCAGTGGTCTT -ATTCTTACTACCAAGGTCGATCTGCTTCTCGTTGAGGGTGTTTGCATCTTCATTGTCAct -gctggggctgctggggctgctgctactgTCTCCGACGATGGTCGGGGCAGTCGAACTGGA -GGGCGAGGTGGTAGTAGAACTTGGGAACTGGATAGCTTCACCACCCTTGCTACCCGTCCA -GACAGTATTGGCAGGATAGTCCCCGTTGCGGCGAAGCAAAGCACCGGAGCCAATCATGGC -TTCACGGCCAATAGTAGCACCTGGTAAAGCGACAACACGATCACCGACCATAGTACCGTC -ACCGATCACGATGCGGTCACGACCGTAGCCATCAGATGTTACCAGTGTTGAGCGAGAACC -AAAAACCACATCGTTGCCAACCTCAATGAGATCAAAGTCGACAGTGACGGGACCAACACT -GGGCCAATAGACATGCTTGCCGACCTTGCCACCAAGGGCACGAACAGCCATCGACACGAA -CTCGTAGTGACGACCAGTAAGGCGGGTGAGCTCATGGATATCACCTGCAGGCAGAATCTG -CGCAAGCACAGCACTCCGCACCTTTTGGCGAGTGGTCTGTCTTGAGGCAGGACCAGGTCT -AGGCTTGCCACAAATGAGATCGAGCAAGGACTTTACGACCAAGACCGCAATAAAGAGCAC -GATCGGGCCACCAATAGAACGGCATATTCTCGCGGTGATATGGAATGCAACCCGTTCATC -ACTAGTGTACCAATCTAGGGTAGACCGGAACATGTCGGCCGCAGGGGTGGGGAACTGGAG -TACCATAGGAATAAGACCACCCATCCAGGTAATACGGGCGGCCATCCACGTGAATAGCTT -GATTGGCTCAACAATGAGGAGAATCCAAACCCAGTGAGGCTGAGGGATCTGAGAGGTGAG -TAATTGTCGATTAGACTCATCGGCATCCTGAAGCTCCCAGCTTGACGAATTGGGACCAAT -GCAGGTCCTCTCGGGGATATCGGCACCAGGAGCAACAACGGACTTGATGCCGATAGAAGA -GTCCTTGCCAATGCGAATGCGCTTCAAAAGCATAGAGGTGTTGCGTTCCACAGCGAAGGG -GCGACAAAGGCAGGTATCTAGAACGACATTGTCACCGATCTCAATCAAATCATACTCGCC -AAGCTTAGCAGATGCGGACATGGTGACGTTCTTTCCAATCTTTGCTCCCATTAGACGGTA -GAAGAGAATACGGGACCAGTTGTAGTTGTTGAAGAGGCCCTTTCCGCATACCTGTGATGC -CTTCTGGGTTAACCACCAGCGCGTATGGTACGGGCCCCACATGGGAGACATTCCCTCCTT -ATAGCGACCGATGACAAGCCACTTGAAGACAATGGCGCAGATGGGAGACACAATTTGTGA -GCATAAGCGAGCAGACATCACGGCGAAAACAACCAGAATCAAGCGACCGATGAGATTACC -GCGAATGGGAAATCGATTTGAGCACTCGGCGACGACGTAGGCAAACATCATCCACTGAAA -GGCGCGCTTCATGGGGAAGAATATCACAGTCGGGAATAAATGCAGAATCAGGATAATTGG -GTTCGTGCTGCTGTATTTTTTCTCGCAACCAGGCAATTCAACTCCCCCATCGTCGCCCTT -GGCAGCCTTGACCGCAACGGCCTCAGTGATCTTGTACTCTATTTCGCCAATGGTGCTGTG -GTGGAAGAGGATGTCACCAGCCAGGAAGATACTGAATTCGCGGCGAAGTTGCGAAACCAT -ACGACCAGCACTCAGACTATCACCTCCAGCAGCAAAAAAGTCTGTAGCAGCGGATACTTC -ATCTGGTGCACATGATAAAGCCGCAGCAAATATCTCCCGCACGCGGCGCTGAACTGGCGA -CAGATCACCATCCGAGTTGCGAGCATGGATGGCCTCATTGATAGCCGCCTGATCGGGGTT -TCCGTAGAAATCGACTGGCATGGGACCCTTGAGGGGGATGATACGACTTGGTATGAGGTA -GCCATCGAGGTGGTCGCTAAGCTCACCAGGTGTCACATGGTTATCATCTGGGTTTTGGAC -AAACAAAACAACCTGCGCGAACCCGTCGCGAGGGTTGATTTGTACGTGCACCTCAGGGGT -ATTTGCTTTCTTGGTAAGAACCGAGTGAATGATTTTATCATCGATGACACACGCCCTTTT -AGGGATCAATGCGGTGAGCGGAGCACCAGTGGGTGGGCAAACGGCTTCATAGTGCCTGTT -TGCTAGGGGTGTGGCCGTGGTCAATGTCTCCAACGACAATCGCTCAGCGAGTTTGATGCG -TTGGATCTTGTTGTTGGCCTTGGGAACACCATCCATATATACTACCAAAGCCGGCCACTT -AGGCTGGTGGAGAATAGGCTGTAGTGCTTCATGTATCTGACGAAGATCCGGCCTAACGAA -GCCCGGAGGTGTGACGATAACCACACCCACGACTTCCTGCAACACCTCGTCTGGGGCGGA -GAAGGCCAGTGTCTCGGTGACGCGGCCATACAGCGGGGATTCGGGATCCTTAGCCGTAGT -CAGCACAGCGTTTTCCACTTCGACCGGAGAGATAATCTCGCCACCACGATTAATGACCTC -CTTGCTGCGACCAGTTATGTAAAGGTAGTTGTCAGCATCGAGGTGTCCAAGATCGCCGGT -ATCAAACCAACCGGACTTGTTAAATGCGCTGGTATCAATTTTGCCTGCGGGCGTCAGATA -TCCCTCGAAGGCGGGCGATCCACGAATGCAGATGTGCCCGAGCATCCCAGGCTGCGAGAC -AGGCTCGCCACTCTCGGTCAAAATTGCAACCTCGGGACCAACAATTCGTCCGGAGGTTCC -ATGGCGATCAAGCTTGTAGTCCCTTGGTGGCGCAGCAATAGGCATACATTCAGTCATGCC -GTAGCTGGGGAGGACTGTGCAGTGGAAAATATCATGGAGCTGCACAGCAAGAGTTGGGGG -AAGGCCACCACCAGCGTTGCAGATAAACTGAATGGCGCTTTGCTTAACAGCACCGGGGCG -GTGTTCAGCCTCCGCAAGGATCATTTGGTGCATTGTGGGTGTGGCGTAGTACCAAGTTGG -TGACATATGAGGAGTTTCCACGGCATCCCAAAACATGCTAGGGTCAAACGATGGGCAACA -TATCGTTGCACCGCCAGCGAGGATGGGGGAAAAAATACTGCGCATGATGCCACCACTGAT -GCTGTCAGTACCTTTCTATCATATAATTTGGTATAGGGGACATACACATGACTCAGCGGC -ATCATATTAAGACATGTATCTGTTTCGGACAATTCCACCGACTCTATGGTCGCCATCGTA -CCCGCAATGAGGTTGTAAGTTGTGATGGGAACCAGCTTCTTGGTACCACTGGTACCACTG -GTGAACAGGATAATGGCAATATCATCCCCAGAATTGGGAGGAACCTCGCATCCACTAGAA -GCATTTTGTGCAGAGACCACACGGAAAGTCATGTTTTCAAGCTGTTCAATGCCAAATACT -GGCCGTGTACCATTGTTTAGCTGCAACTTGCCAATGTCAGCGTCCAGCGCGACAACGGCA -TCAGCTTTTGAGTTTTCTATATCGACCTGCAGTTGTTCTGGCACTGTGTTTGATGTCATA -GGCACAATTGTATACCTGTTGACGAAAGCAAGAACAGCCAATGCCATGAGAGGGCCGTTA -GGCAGGATAACAGCAATGCGTGGCTTTCCATGACGGGAAGGTCCAACAGGAATCTGGAAA -GTCTCCAAGAACTGCCTAATGGCATAATGAGTCAGGTGCCTCCCCGTAACGGCATCGATA -AGACCGGGACGGGAATCTGTCCCTATTAAATTTGCAACGTTGGGTGTGGCTGCTTTGACT -TGCGCACGTAGGCGTGCAATTGATTCATTGTTATTGGTCTTGATATACTCAGGGACGGAC -TTATAAATGTCCTGGAAAGTAGTGATGAGGTGCCGAATGGGCCACGACCCGGTGTGCGAG -GATAGTAGATTGGAGAGGTGGTGGATAGAGTCTGGGTGAGAGGCCTCACGACGGAGGCGT -AGGAGCTGATCTATAAGTCTGTCTAATAGTCAGAATTGGAAGTGACTTCAATTGAAAATC -CAGGgaacagggagaacaagagagagaagaagaacaagaaAATGTGGTAAAACGTACGAA -GATCCTGATAAATCCGCCATTTTGTATATAGTATATCCAGTGCAAATAACCTTTCGACTT -GACTTGATATACAACCCAAAAGGAGAAGTCCTCGAGAACTTATATGCTGCCAGAGCACGC -TATTAATATCAACACGGCTTACAGATCACGAGGAAAACTCCTTGATAGCACATGCCGCCA -CCGCATCCAAGTAATATCCCGATATACTACCCAAAAGGACAACTCCTTCACAGCTCATAC -CCCGGTCATACCCACTCGATCCACCAATGGCTTGCAAATCACATCATATACTCAATTCTT -ATCGTGGATTCTTCATAGAAAATACGGCGGTAACCGGGGTCAGGGTCTAATCCAGAAAGA -TCCGCACGGCATCTAAATCCCAAATCTCGACATCCTTCGGAATTCCGTTCGTGATCGAGA -GATCAGCTCAACACATAGTACGCAACATCGTGCAGGGCAATGAGATGGTATTGGCTAGAG -ATAGAGCTATCGTCATGTATACCATCCACGGGCCGTGATGTGCTAAATTCCTGGGTCTTG -ATAGATATTGGAGTTTCCCGGCGGCCGAACTTTACAGTTCGACCCCTGTTTCAAGTGCCT -TCTGTTATTTAACTTCGTGTTTAATCTGCTCGGGTTTCTTTTCACGAATATAGTAAGATC -ACCGGTATGATTATGAGGAGAGATAAGCCCAGCTGCAGGGTCTTCTATCTTGAACTACCT -AGCGTTATACCTGACATATTCTAAGGGTAAGTATTTATACTATGCCTCGATATATAGTGT -ACATAATACGTTCAAGTCGCCCCAGATGATCATGTTTTAGCCAGCATTGTACGGAGTAGT -CGAAACCAGGTATAAAATGGATCTATGCAGATCGCAGGATCTTTGATCGGCAATTCAGAG -CCAAGCCAACCCTGCATATCACATGGGTTAAACACGGGAAACTCCTGTCGGACTCCGTAC -GCACCCACGAGATTCGGGCCGTGTATAGTTCTAGAGTAACCACTGAGGAGGATCCACTGA -GGAGGATCAGCTAAGACTTTGGGCGCTACGTATACTTATATTCCATATAATGGACTCAAC -GGCCTGACGACGGCGAATTTTCGCAGTTTCCCGTACAAATGACGTTGAAATTTGGCCATT -TACCATATTCGGTCAATGTACTGACATGAAATACCCTTTGTGAATTTCTTATAATAAGAT -CTACTCCTAATGGTGTCATGTATGTGATCTGCAATTTGTATGTAACCGTCGTACCCCACT -ACTCACTAGGGGGAAACAATTCAGCCTTCCCGCATCTGCAATCTGTACAATTAATTATAC -TCGTGGACGTTTCGTGCCAAGTAATCTAACCTTTTGCCAAGGACTTGGCGGGGGCCGTGC -AGATCGATGTAACTCCGACTCTTCCAACAGATATTCCATTCATGGATTCCATGAACGCCA -TTGTCCCCCACCAACGACCGTTTCAGCAACTGCTTGATGGTTATCTTACAGAATGTGGTG -GGTTAGAAGGTTTGACTTCATCGGTGGCACTGAGGTATCGACATTTAGGCCGATGATCTC -AATGGGGCCAACGTGGCGAATACGACGAAGTCGCCCTCATTACGGAGTACCCCGTATGGA -GGTACTTTTCACCAAAGCCAGGAGAAATTTCAAGGCTACTGTAATAGATATAATGAATAG -CAATAGTTATTCTGATCCTTAACATTCATGATTATCGCGTGGATGTGGCCATTGTAGATG -ACAAGAAATTGGCTGCATACTGCCATTCTTAGGGCTAGGAATAGAGTTCCACAGGAACAA -GGTACCAAATTCAATTCGAAGCACATACAAGGATGATTTACTTCCCGTCCTATATGCAAG -GTGACATGGTCCTAAAATGCCCAGGATGTCTCAAGTACTCTAATTTGCATCGGCAAGTGA -GGCTAAAGCGGTTGAAACCGGGTTTGGCTACGCCGTAAATTCCGATAGATAATCCATCTT -CCCAATCTGCACACCTCTCGCCCTCAAAATATCATACACCGTCACGACATGAAAGTAGAA -ATTGGGGATCGCCAATCGCACCGAATAATCCTCTGCCGTCCAATCGAATTCATTATTTCC -AAGTGGAACCTTGAATTTTTGTCCTTCTTTCGAGGCGATCTTGGCCAGATCGACCGTTTC -GAGCTCCGCAAGTGTCCGATCGAGGCGCTTGTAAAGATCTTCATAGGTCTTATCTGGCTC -TTGCTGTGGCGGAATCTCAACGAAAGATGCGCGGGCGAGGGTCTTATAGATGGTGTTTGT -TGCGGTCACAATTTGGAAAGAAAGAGGCTTCATATCCTCGACAAGGCGAGCGTTGAGGAC -TTCATCGAGTGGGATGTTGTTTTCCTTTGCATGTTCTTCGGCCTTTTTTAGGAGGGCAGA -AAGGGTCTTGATGCCCCTGGTGAAAGTGGAGATAGTATAACCGCAAAATGGGGAGGCCAT -GTTGATTTTAGAATTTTGGATTCCGTACTTGAGCTTTTTGGATGCGCAATAATGTATTCT -AGAAAGAAATATGAGCCTTATATATTCTGTACCAACAGAAGGAGTTCTCATTCTCACCAC -TTCTTCGCTTGATTTCAAATCCACGGCTTACTAATCGAATTGCCAAGCCCTAGAGCGTGT -CAGTTCCTAGACTGGAAAGACGAAGAAATGAATAGACGATGATCTGCACGGTCCCCAAGA -CGCGGCTCACAGAGCCAAGCAGTTCAGCTCGGAATTCCGGTTCAGTTACGGAGTATGTAG -TCTATTCTCCTTGCTGGGGTTTTTGACGACAACAATTGAGCCTGATATTGTGTGGTCAAT -GAAGAATTGTTTGAGATCGTATTGCCCGCATTTATAATCAATTACTGTATATTAGGTCTG -GCAAGTGCCCTGCTTATTCGTAATGAAGTATGAAAAGTACATGTTGTTCTCATGTCATCT -TGTATCAAAATATACTTCCCATCACTTTCAATAAAATTTTTCTCCGTATTGCCTTACATC -TGCAAAGCTCCGAGTGATACAATCATCTCATAACAGCCAAGCCACGCTAGGTCGAAAGTC -CAACCCCAAGCAATCATCAAAACAATGAAGGGAAATACATCCAACCACGAGCTCATCAGA -AATGCAATTGCAAACATCGCAATTGCATTTGACACAAACACCTATACGGATCTTCGAAGT -TCGTTCACAGAAGATTGTGTGGCAGACTATAAGGGGTCGCTTGGCCTCATGCATGGTATC -GACACTGTCATTGAGCAGCTGCAGAAGACCATCGGGCATGTTACTACCTTTCACGCATTG -AGCACACAAGCTATACGGCTCATTAGCAACGATACGGCCGAGGCTACGACGTATTGCTCT -GCGAGTCATTACGTTGGCGAGAAGTCGTTTTTCGCTGAAGCCAAATACTTTGATCACCTC -GTGAAAGTGACTGAGGGAAGCACCACAAAGTGGTTGATCAAACATCGCTTGACCACGATG -ATGGGTGTTCCACGGGGCGATGTCTCCATTTTCAACATGGATTTGGAAGGATGGGTTGAC -AGCTTGACGGCCTGAAAGCACTACGACTTCCATGATCGAGTATTCTTGAGTTGTAGGGAA -AGATTATCCAAGAAGTGGATGAGTGGTCATGTTGCACTGGGGTATTTCTCGATGATCTGT -TGCTTTCTCAAGCAGCTTGTTTGATTTCTCTTTTGCCTGTTGTGGTCTTTGTTGTGTCTT -TTTGTCTATGAAGCTATACAGTGAAAGATTTATATATATAGAATATATTATAATTATAGT -CCTTCTTCCTCACTTCAGCGAAAGGTGAAGCTCTCCAGTAGCTGATATACTGCTCTCATG -CATGCCCCGAATAGAAATTAATAACAATATCAGATTAGACCTCTGTAAAGCCCGATTAGG -GGGTTCAACTAGCGTCTTATAATCGCTATATTCTGGCCCTTTGACCGAAAGTATTGAGTT -TCAATTTAGTTCTTATAATTTTAATAGCTATAAATGTATGTTGTTTAATTTGAAATGAAA -TCCAAGCAATAGATGTCGGGATATATATTTTAAGTCTATGTGGGATTGTTGTACCGCTAC -TTTCTGAGGTAGTGAACCTCTACAATTTATGAATATTTTGAACAGCGCGTTTTCTCGCAT -TTAATATATTGAACCTATCATTTACTATATATCAATTATGATAATAACGCATTCCCCCTA -GATAAAGGCGAGTAACGCGGTCTATACGAATGGTAATTAAAAAAAAGTCACGTTTTTCGA -AATATTTGTATTGTTTGGTGCTTTTTGGGCCCTAATAGACTCCCATTCAAAGGCTAGTGG -GATTTGACCCTGGGCTTAAGAGCCGGGCCCAAAACCCAGTTGGGGCGGGTTGGGCTGTGG -GATTTGCGAAGGTTCATATCAGATGAAATATTGGGTGTTATGGTGGACCCAAGCTGGACT -TCCTGACCAATTTTCCAGTACTGATGTGTAAGCAAGAATAATAGTGAAGTAAAAAGAATT -TGAGATCTGCGGCGCCAGACGATGCCAGAACTTGACCTGAAGATTCCATAGCACTGTTGA -TAACGCCCTTGCTACTGTATTGGAATGTGACCGAGATATTTGTAGAAAAAGCAAGTCAAG -TAAAAAACACGAATTGGGGTTATATGCTGACTAGGATAGAAAAAAGAACCATAGGAAGGG -GAACCAATCTGAGTGAGGAGCAAAGGTCATATTGCTTTATTATGATCCCCCCTTCGTACT -CGGTTCGTACTCGTAAAGAAGCCAGCCCCGCAGGGGTCTACGGTCCACTAAAATTTGAGT -GATTCTAAATTAATTGAAGGGTGGTCCAATCTAAATTCGGACTTCGTTGAAAGTCTTTTG -ATTCCGAAATTATGAAGATGATAAAGTTAGGTCTAGCGCCCCGTTCTAGCTCCCGTTCGG -GATACCCGGCCCCTCTCTGCGGCCTGCTTGGGCTCCCGCTTTAGCTTATTTCGGGATCCT -TGAGCTCACAAAATGTAGTTGAGTGTGTGATTCTTTTGCAAGTGGTGTGTTGTCGAGGGT -TGTGAAGCCGAAAGTAAGTCGTAAATGTTCTAAAAATACGGCGAAATGCGCGACATGGTG -AAGGTAGTGAGCTCGGATTGTTGAAATAATCAGTTATGCACGTTGGGAGCAAATTGCTGA -GGGGCTAGGCAGATGGAAAGTCCGCGCGGTATTTGGGAATCCAAGCAGGGTTGTTTAAAA -GTGCACTGGCGATGTTACTTTGCTTGCGTAGGTAGCAAGGCCCTCCGGTGGGCTAGGGCC -TAGTTAGAACTCCGcaaagcccataaagcccaaagcccagcccaacccaacccgcaaaTG -GGCTGGGCTTTGGGCCCGGGATGAAAGCTCATTGGGCTTTGAATGGGGGCCCATAGGGAC -CTTAACAATACAAATATAAAAGTTGAACCCCCTTTTAGGATAAATTAAATTACCTCTGTT -TTACGTCTGTTTCTTATTCCAATCTCTAAGGTAATATCAAATTAACTCCATGTTTCAGAA -CTAGCTACTAGTATTCTCTTTATATCACCTAAAGGATCATAACAAAACGCGTGAATTAAA -AGATTTCTAAAATTATAGAGGTTCACTAGAAAATGGTAGTACTCCGTGCAAAACCTAAAT -ATGTATCTAGCATCCAAATTTCAATATTCACTTTTTCGGTTTTACCCTAAGCCCTACCCA -TTGGATTCTGTTTAAGATTAATATACATATTTATAGTATCCCTCGTATTTGAACCTGAAC -CGATTAATGGGTGGTCGTAATACAACGATTACAGAGTTCGCTCTACCAGCGCCTACCTAA -TGGGCTTTGTGGAGCTCTAGGCCTAGTATAAAGCCCACTCGGGCCCAAAAAACACCAAAT -GATATCTATAGATGTGGGAGTCAAGGACGATGATGATGATATTTCTGGTCATGGTATATA -TACGATTGGCCTGGTTGTCTATTTACAAGGATATCATGATTATAAATACAAGGGCGCCTG -CCCTCTATTTCGAACCCGGCCCCGCAACATCACTTCCTCACTCCGTTCGGTCTGTTTGTA -TCCATGGCACGAGCAGAAATGGGGCGCCTGTAACACACGCCAACATATCGCTTTTGAGGG -CGTTTCACAGCAAAGAAACGGTCTTAGCTGCTCATAAGTGTTAGCTAAATTCTGTGTATT -GCAATATAGTGGCATGATATAGTGAAGAGCCAAGTGAAGTGAGAGCCAGTGAAAGTCAGG -CTTTAGAAGTGAAAAAGGGAAGACTCAGGCTATCATAGATCGAAGGGTTCGATTGAATAA -AAAAAACATACTCTTCTTGGCCTTGTCAGAGCGGGGGCGGCTATATAACTCTCGGAAATT -GGGAACGCTAGTCTATATCATTGAATCTAAAACTAGAATTGCCCTTCTATCTGAGAAATC -AGCGCGTGGGTCATACCCGGGCCAGGGCCACCGTCATGTTCATCATGGCCGCACCAGTGG -CAATACGACATACGGCCACTAAATGGCCGGTCTTTCTGCAAGTATCGCAGAATGCGGGCT -AGTATTTCTATTAGCGAGTTTTCCAATAATACGTGCTAGTGATTGGTAGGATGGCAATCA -TGGTTGAAACCTCGCTGAATTGGGAATTTGGATAGGGAAAAATAGGTAGTGGAGGGATCG -ATACTTACGATTGTTCACTCGACAAGATGTCGAGTGAGAGAGATGAATATAGGAGCTGCA -GTTGTCCTTATTTCAGAAAAGAAAAGGGAGGGCAGAGTAACTGACTAGCGTTCAGACTTG -AAAATGAGGTTTACAAAGTTAAGTAACTTTGGGGAGTCACAAAGTTGTGGAAAAGAGCAA -GCACTATTGCGGCAGGAACCTGGTGTATTTCTTTCATAAGGGACTCGGCTGTCTAAATTG -AGCTGCGGGCAGGGGTTTTGATGATCAGTGTATTTCTCTTCTCGGGCAGAGTTTGCCTAT -GCGTAGAACTAATTCAAACTTTATGCACCTTAAAGTCTTTCTTCCCAGTCGTTACTTTTT -GAGTCCAATGACCGCCAAATTACGCGGACTCTCACTGTAGTCAAACACCGGCTCAACCCA -GCACTCTTTGACTGAGTCCTGTTCACGAAGGAATTGCCACCGATCCACAACAATGATAGC -CTCGACGAGCTGAGCACTAAACGCCATTAAGCTCCACACAACACTCATATGGTTCCTCGC -GTGAAGATACTGAGATTCGTAGCGTTCAATCTCTTCATCGGTCAGGGTAGCGACATGCTC -CTGCACCTGGGGACCATATATCGAGTCCCGGCTCATCTTGATCGTCGCGGCACGAACATA -TTCAGTAAAGGACTTATAAGCAGCTTTAGGCACTGCCCCAATGATAATAGATCTAGCTTC -TGGGCCGGTTGTGTCGGATGGGCTGTCTTCCGGCACAGAGGGCTTAGGAAGAATACCTCT -ATCAACGAGGATACGCTGGAGAAGAGCACGAAAGAAATGACGGGTGAAGGATACTTCACT -ATCATTGACGCCCCAATTGTATGGAGCCTGCACAGCCAGTGCACGAGCCGTGATGTTAAG -CTTCATGCCGGTAGTTGCTCCGGGGCTTCGGTAATTCTCGTAATGTTTGGACATAGGGAA -CCCATGTGGATCGTATGATGTTCCAGTCTCCTTAAGCCGAGGGTGGAGGGATTGGAGAAT -AGGGAGATTATGTGTGGTTGGACCGAGGCGTTCTGTCAGCAAATTGTAGCAACAACCAAT -CATAGCAACGGCCACAATCGAAGGATTTAAAATCAGGGATCGTAGACCATGATGAACAAG -GTTTCCACAAGAGTGAAGGGAAACTACCATCACTCTAGCATCGTTATATTTCTTTCGTTC -ATCATCCGAAGCTCCAGTGGGAGATGGCTGGATGATATGCTCGATAATGGGCTCTAAGTG -GCCGTCTTGGATTTCATGCTCAATGTAGCTGAGAGTGCCGCGTGTCTGGATCTCTAACTC -CGGCAGCTGTTCTCGTGATGATACTTTGGTGGGTAATCCGGCTAGCTCGTCAGATTGAAG -GCTGATCATGCCCATCTCGAAGAGGGGGACATCCGAGACCTGCGAGTTCTCCGTCATTGG -TTGGGAGGCCTCCGTCGTTGGTTGGGAGGCCTCCGTCGTTTCGGAAACAGTCACTTCCGC -AATTCCATCGGATCCATCGCATGAGACTTCTTTCTTGCGAGAGAGTTGAGCCTTCGGATT -CTTTGCCAACTTGGCTCGAACATCCATACTCTTGGCACCGTTGATGTACTGATGCTTACG -CTCAATTGCGATGATATGCTTGTAGTACGGCGAGCTGGCCAGTGTCCTTCCAAGATAGTT -CTGTCCAGACCCAAAATCCACAATATGAGTGACTTGCTCGCCTCGGCGCCTATTGACAGT -TTCTGATAAAGATGCGACATATTGAGAAAAGCATTGAACTTCGTGTACTTTCTTTCGATT -CATATGCATAGCAACTTGCTTTGGCAATTCGACTCTCTCCGGGGATGAGGAGCACATTTC -CCGTTGGAGTGTCAATAAACGGATGTTGGAAATATAGTCCAAGAGGGTCTGAGGAGGGAA -GGCTCCTCCATTCCAAGTACCGCGGCGTTGATCTACGGAAGGCTCGGCGGCGCCTCCTAA -GTGTTCAATGTCCTCCCGTAGGAGTAATCGGATGATGTCTTGAATATCATGCTGTTCAAA -GAAGGATATCCAATCCTTTGGCAATATTTGTGTATAGAGGTCGGGCTCGCGGGTCAGAAA -GTCGACCATATGCACGCCACCGCACAAATTCATAAAGAGCACCGACGTGGTCGAGAAGCT -GAGGAGAGCTTCAACGTAAGCATCAGGGTGCTCCCACGCCCTGGAGATGGGTAGTGGGCG -ATTAGATGTCATATGAATTAGTCAAGATTGCCTCACTAGGCCATGAGGATTCTTCGCGGG -AGGTTGTTAATTATGGAGCTCTCTAGGGCTTTCGAGGATTTTTATCGTGCTGAGTCATCT -TCGGATCCTCCCGAGGTCCCGTTTCCATTACCACGGGTCCACCCAGGACCGACTTCGGGA -GTATGTGATCAACTACACCATGCCAATCGGAACCGTTCGCATTTATTTCATTGCATAACA -GTCCTATGGACACTACGCTCATTAGATTCTTCGCAATAACTTACATCTTATAATTGAATC -GCCGTTGATTAGCTTTTTCACACAATATCAGAAATCAAGAGGCTTTCTTCCATAGACGGC -GACTCTGCTCATAGTCAGTGCGAGTACTTGAACCGTTAAAGAGAGTGGATTTGGTCATAC -GTTTGGACATAGGCATGTAGCCTCTTATCCCGCAGCTCTTGCTTGACCACAGCGTTATGA -ATCTTGGCTGCATCCGAACCCCAGCCCAGCACCTGCGTGTACAAGCGCAGCGAATAGGAA -TCGATAGCTTGCTGCAATTGTACTGCCTGGTACTTTCCCAGGGCCTTTAGGTGGGGATCG -CGTGGCCAGGGCGAGAACGGCAGCGTGTAAACCCTCAAGCTGACATCCTTGAAGCCGGCC -GCAATCATCCAGTTCTTCAGCTTATGATAGATGTTCAGCTCCCGGCCGATCTTCTCTCCG -GCAAAGTTGAGGGCCTGCTGCCATTCCATTAGCGCTGAATTATTCTTAAGGGAGCCATCG -TCGCTCCGTGCCCACACGGCACTCTCATGCGATTCGAAATATCCACCGGGTTTGACGTGG -TCGTAGATCCGGCTAAAAAATCTGGGCCAATCGGCCACGCATCTGCTCAGTGTGCGTGAG -TGTTCTCTTCCGTAGAATAGGAGATTGAGAGACCCTACCCTGCCAGACATCGTGCATGAA -TAAAGTCAAAATGGTTTCGTTCATAGTCCCATTCGCTTTCAAAGTCCTCGACGATGAACT -CGATATTTGGGGCCACCCAACTGGGCTGGATAGGGCTGATATCGTTTCCAATAACGTGGG -CATGAGGAAATTTTCTTGGATTTGGGGTTAGTAATGCTGGCATACAAAAACGGAGGATGT -GGTATCAACTCACTCGGCAATATCCATAGCCCAAATGCCTGTTCCGGTTCCCAAGTCCAG -AATGTTCTGTGGTGATGAAGTTATCGGTGCATTGTAGAGTTCCCCTCCCAATAGCATTAG -CCATCTGCATCGCTCGGGTCAGTAGCGCTCGTTCACCGGGAGTGTGTTGCTTCACCAACA -TATGGTGTGTCTGAAGGAAGAGTTCAGCATGGAAAAACATGGACAGCCTTAGGGATGGCG -CCTTACTATATCCATTCGATCCTGCTCTTCCTCGTCATTTGGCATCCTAACTTGGTCAGT -CTTCCTAGCCAAGAGCCTCTAGGGAAAAGTGTAAAAATAGTACATATAATTTCCCTAGTC -ATCCATAGGTCAGTTGGTTATTTGGAAATTTAGTGCCAAAACCATAGGCCGGGCAAAACT -ACTTACTGTGCGGTTACTGCAATATCGCCGACCATTCTCATACTCAAATTCAAATACGCT -CTCCGATAGGGAGGTGAGATCGGAGAGATCATCGTCGAATTGACTGTTGGTATCCTACAA -GACGGAGTTCATTTCGGGATCACGATAAGCCAGTTGAGTCTACGTACATCGCTAAAAGTA -TCGATCGCAATTGGACTGCGCGCATCAACAGGGATCTCACTGGCTGTAGGTACAGATAGA -GATTCGGGTGAGGGCGTTTCGTGGGTGGAAATCATGATGGAACCTTGAATGGAACGTAAC -AGGAAAAATTTTCAAAAAGCAAGAATCAACAAACCTGTTTCTCTCTTGTATCTCAAAATT -CAGCTTTGGTATTAGAATTTAGGATGTATCGAGCGTTGTGCATTGATTTCGCTCTCGCGT -TGCCGTGTCTGCCAATTAAAATCACCCAAAACCCAACTTCACTCCACAAATTCCGCTAAC -TTGCCTCACTCCTGATTTTCTTCACCCTCTCTATTTCGGCAAACATCTTTAAAGTGCCGA -AGCACCTCAAGGTTGACGATCATTGAATACCTTGTCCTGCGGCCCATGGTCCACTCATCT -ACTTATTTTTCCCGTTTTCGAGCGAGGCATGTGTTGTAAGGCTTTGCGTTCAGTCGGTCG -GCTAGTATTGCTCAGTCTAGAGCCATTAAAGCAATGTCGCGATACATCAGACAGCCCAGT -GTCCCCACAACAGCGCTTGATGCCATTTTCCCAATTTTCGTTCGGGAATGTACCGAAACG -TGCCCTAGGCTCCACAGTGTAGCGCGCTCCTGCAGATGGCTCCGCTGCTTTCTTTCAAAT -GTACTCCGTAGTCTCTGTCTGATTCTACAAGGCGTGTACTGCAAATCAAGTCAAGCTTTA -CTGGATGATCTACTTTGATTCCCGGAGTCAGAACAGCGTATTACGTGACGTCAGTGAATC -AAGTTTTATCCATTGAACGTCTCGAGCACGGCATAATGCACGTAAGCATTATTTGTAAGC -AAAATGGAACACCTGGGCACGATGAAAGCACCTGCAAGAACCTTGGAATGCATCTAAAAA -TGATACGCGGTGTCTTATTTGGATATTTCTCATCCCTCgaaatgaaatgaaatcactgac -atgtaaatTTCACATCGAACGGATCGGCATAGCTCTTCAATTTGTTCGATAAAAGAAAAT -ATCTTGTGATATATAATTCAACTAGTCACTATGTGGCTACAGGCCCCTCGCAGATAGCAC -GTTTTTCAACTTCTGTTGAAACTTTGGTACACGTTCTGGGGAGATCATCGTATAATGCTG -GCCATCCACCTCATGGAACTTGAGATCATTACCGACAAACTCCTCCCAACGGTTCAGTTG -TGTATAGCGATACTCTTCCCTCGATGCTGCCACGCCTCGCAACGGATCACAGAAGAAAAC -ATCCATACAAGAAACTGTACCGGATGGATCGTAGTTACGACCGATATTTGCAAGGGAGAA -GGTTACTTCAGTCCAGGTATCCAGTGATGTAAGTGTTATTCCCAACTCAGCCCTACGGAC -TGGATTCGCTTCGGCCAACACTCGTGCCAGCTGCTCTGACTTCGACAAGGAGCGTATTTC -AGGTGACAATTCATCGGCACGGTCTTCTGAAAAGAGGTCACAGAAAAATGCAACGTGTAA -CAGACACCCTGTCCAGTCTAGTCGGCTCATGACATGCTTGATGTATGGTGGTAGGTCAAA -GCTACCTATGAACTTGACTTCGTCGCCAGTTGCTTCAAGCTTCTTTGCGAGTTCAAAGGC -GACCATCCCACCATATGAGTATCCGGCTAAGGCATATGGCCCAGTAGGCTGCTTTTGCTT -CAGGGCATTGTAGTATGTCGCATGGAGGTCAGCGAGATCCTGGAAGGTTTCCTCGCCTGG -ATTGAAACCGCGGGGTCGCATTGCATAGATGGGTCGGTCCGGGAAGTGCTGAGCTAACGC -GAGGAAGACTAAAATCTCGCCGACTCCCGGATGGAATAACCACAAAGGCGTCTTGGATCC -ATGTGGTTGTAGTGTCACAACTGGCTGATACACACTACTGAGATCTGATGCCTTAAGCTT -CTGGACAGCCTCTGCTAAGGATCGAATGGTTGTGTTGGTCATGATGGTGATCATGGGAAT -ATCTTCAATCCCGAAATCAACTTCAACAGCTCTTTTCAGTCGAATCAAATCCACCGATGT -AATTCCAGTGTCTAGAATCGGCATATCAATATTAACGTCCTTGTCGAGTCCAAGCGTCTC -CTTGAAAACCGCCACAAGTTTATGCTCATTATCATTACTGGGCCCGGAGAAGTGCGACTC -TTGGTATTCTTGCAGTACCCGGTCATTGAAGAGTACCTGGTCCTTGTATTTCCCTTCTGC -CAAAGCATTTTGGATTTTCGGCCGCGAGAGCTTTCCTAGAGTCGTCTTCTCCAGGATACC -AGGTGGCAAGGGCAGAACTCGTGGCCGTGCGGATGTAAACAGAATGACAGTGCGCATCAG -CGTGTGCAGCGTCGACATGCGGGCCTCAATATCATCACTTGCAAAATCGTGTTGATATAC -GACAAAAATTTCCTCGGGGCCAGTGCTGGTTTCAAGGTGGGAGAAACATGCAACAAATGA -GGGAGCCACTCCTGGAATACGTGCTTGTTCAATAGCACCTTCAATTTCATATGGTAGGAA -TTTGACAGAATTAATGTTGATCAACTCTTTGAGGCGGCCTTCTAGTTTTAGTTGGCCCTC -TGCATCGATTGTACCGAGATCTCCTGTTCGGAACCATCCATCCTCCGTGAAAGCGGCCTT -GGTGGCCTCGTCATTGTTGAAGTAGCTTGAGAATACGACAGGTCCACGTACTTCCAGTAC -ACCAGCCTTATTGGCTGCACCACCGCTCACGCCATTAGACTGAGTTTCAGCTTTAGGCTC -GTTTGCGTCAATGGGCGATATCCGTATCTCGATCCCTGGAATTGTGGAGCCAAGAGCACC -AGCTTCACGTTGCGCCTTGATATCGATATCAGGGAAATTGCGATTGTAAATAGCTCCCGC -ACAAATTTCGGTCATACCAAAACCAGGGGTGATGAGGTTTGATGGTGAAACTCCTAGACT -TTGCAAGTGTGTTGTAACTCGTGAGCCAGTATCGACATTGTTAGGCTCTCCACCCGAAAC -CAAATAGAGTAGCTTGCTGAGGTCGAATTCTTGCTTAGCCTCTTCAGATGTCGTATCCAA -CATTGCAAGAAGCTTGCGTAGGAAAAAGTCCGGGGCGAAGGTCATGGAGACTTTGTGCTT -AGATAGTAGCCGCAGGAAAAGCAGTGGATTGCTGAGCACCTCGGGTGCCTGGATATGGAT -CTGGTCGACTCCCGCATACATAGCAAGAAGATGCGTTTCCATCAGACTGGCAACATGATC -TAGTCCAATCCAATTAAGCACGGAACTGCCATCGCTGACTGGCATCACCGCCAGTTTTCC -GCGAATCGAGGCAAACATTTGCTGGTGTGTCAGGCACACGGCCTTGCAATTGCCAGAGCT -CCCAGAAGTCAGCATGAGGACAGCCAAATCACTGTCGCCGACATGAGCCTCGCCATTTCC -ATTGACTCGGCTGCAGGTAGGATTGTCCAACGCTGTTTCTTTGAGGCCAGCATCTTCAAA -TTCCTCCACAGCTACAGTACGTAAGACCTCGTTCTCCCCGAACGAGCCTGCAAGTAAATC -CCGACGACTGAGCAGCAAAGGATCTTGCAACATGTTGTGCAGATGCTTGAAGTGGGCTTT -GCGGCCCTCGCTAGTGCTAACCAAAGGAGGAGATAGGGCTGGCACACTGCCAGCCAGGAT -GGTGGCCCAGAACCATGTGATATTATCCAAATGACGCTGGAAATGAACAAGGATGACACC -CCTGGGGCGGAACTCCGCACGGTTCTTCAGCCAGGACGCTCTCTGCAATGCCTGATTGCG -CAGTTCTGCATAGCTGATCGTTACCGGATGAGTCCGGTCGCCTTCGGGGTAGGCAATAAT -TCCAGCGGACAAAGCGGCTACTTGATCTAGAAGAGATCTGATGTTGACCTTCGAGACAGA -ATCCATATTTGATGTTGGGACTCTAGATGAGAAACGTAATGAGAAGTGAAAGTGGCAATT -GATGGAAGGATTATGCGCCCATTCATTTATCTTGGGAAACAGGGGTTTATACAGCGGCTT -CAGCTTCGGCTTCTTTGGCCTTATACTATTATCTACCTATTGTTGAAGTCAATTAGCGCC -GGATTTGACACTAAGTTGGGATCCGTTCAATATCGCAGACTCAAATTTAGTCCATACTCG -ACCCACTGGCCCAATTCCCTAAGGGGGGAATTGGCCCATATCTGTAGGGTCGGATATCTG -TGTATGTCGATGTTCAATTGGTTCGATTTCAATGATTACGCCTTCAGGGGGCCAAGATGC -ACGCTTGAGCAAAGTTGGTTACATCGCTTGCCTACCTGCATGATGACGCCTGCAGGGGGC -CACATAAGACATCAGCCTAGGCGCGCCATTTAGCCGTACCCATGTCGACTAAGAAAATCA -ATCACAAAGGTGCAGGACAAACCCCTCCGTTTAGACTATCTGAGAATACAGAGAATACAA -AGAGTCCACTTATTCCTATCATGATAGGCCATCTCCTTGACCAACTCAACAATGGTTACT -GGTGAGCAAATCGATCACATTGCCATTGTTGCCTCTGCGGTAACATGGCCGTTCGCAGGC -ATCAGTACTGTGTTATTCTTCATGCGCATATTTGCGCAGTTACGGCATACAACTCAAAGG -TCGTACTGGGAAGATCTCATGCTAGCCCTTTCCTGGAGCTTCGCTATCGTGCAGGCTGTC -ATTCTTCAGATAGCACTGAACGCCGCGAAAGACCTTGACGTCAATAACCAACCGGGCACC -GTCCCCAAGGCTGCTTTTTGGGCGATTATGATGAACAATTGGTCATTCTTGAGCATAGAG -CTTCCGAAAGTCTGTGTCGCGATTCTACTTGTTCGCCTGTTTCGGCCGGCACTCTGGCTC -CGGATTGCCATCTGGGGCCTTTGTGTGATAATCAATGTAGTTGCAGTCGGTGGCTTCATA -ATAACCTGGGTGATGTGTAACCCAGTGGCCGCACAATGGAATCCCTACAAATATCCCACA -GCCAAATGCTGGCCTCGCTCAGTCCAGATCACCTACGCCTGCGTCTCATGCGGCATCTCC -TCGTTCATCAACATTGCCTTCTCCGTATATCCAGCTATTGTGGTTTGGAAGCTGCAAATG -GCCCGCTGGAAGAAACTCAGTACCATTGGACTTATGGGCCTGGGGCTGGTGTAAGTAAGA -CGATCTCCTCTACTCTATTCAAAACTGATCCAAGCAGAGCTTTCGCTTTTTCTGTAGTCA -AGCTCTACTACATGACATTTTTGCTTGCCGATCCTGCTCCCGTGGACCTCATCTGTAAGT -GTCTCCTCCGAGTGATACTTACCAGTCTTGAAGCTAAAGCTTAGCAGACCTCTGTGCCCA -ACTCGGCATTTGGAATCGTATCGAAAACGATTTTGTTTTGATGGTCGGCCTTTTGCCCTT -TGTGCCTTCCTTCTTCAAGGCATGCACGAATCTCAAGACGCGTTCTTCCTCAAGCGGCTC -ACGTTTCAAGAGCGATCAGTATTACTCAATCAATTCCAACAAGCAGCGCAAGGACCCCGA -TGACCGACTTGATGTCGAAATGACTTCGCTCGGACGACAACTCAAAGAAGAAGCCTCTTC -TTCTCTCAGTTTTGACAGGCATCATGGAGGGAAGCTAGCTAAGCCTGCAAGTTTCTCCTA -AGACTCGACATTCTTTCCTTTGCTTCCCTCATTTCAGTCCATAGTCACACTAGATGCATT -GTAAATATCCCGTACTGTATTCAATACAATTATTTTCGGTACTCTGTCATAGCAAGCATT -AATTCCTGTTTTTCTTATTCAAACATCAGGCCAACGTGGAACTACCAGGTGCGACCATGC -AGTGAAAATCCTTTGGCCCCCAACATCCCAGTATGATCCCTGCTCTGAAAGATCACCCGT -GGCAAACATAGATAATGGCCCTAGCGTTGTTTTGTCATGTTGCAACTGGCGTGCAATAGC -GCGTTTTCAACAGAGGGGGTTCTGGGCTTGGCTGAGACCGGCCCCGAAAGCCAAGAGAGG -GCAGTACTCTCAAGGGTTGCGCTCGAATAAATCATGCCACATATGTATAAGCGAGCAAGA -AGCAATATGATTTTAGGTTTCCGGAGTACATACTACCTGCAGGTGCTATCACTTAAGACG -TGGTGGCTTCCGTTCGAAAGCGCTATTCGCAAACTAATTTCCAACATACATCTCACTATT -GCTCTCAATGGACCGCGCAAATCTGACCAGTCGCGCCACCGAAATTTCGGAGGTCGTTCA -CAGCCTAGTGGGCGATTTGTCTTCCAAGGGTATTCCCGAGCCTTCTTTCGAACATGGATT -ACCCGCTGTCCTGCAGAGCGATGCGCCGGATTCAAATGCCCTAGCCGCACGCGTGAAGCT -CCTGGGCTTACTGGATGAGCTACGTGATCTCTTAACCGATCCCTCACTCTTGGGTAGCCC -TGAATTGGTGAGTAGAAGGAAATCAGAAATCGCACCTCTTCGAAATGACTAACGTCTTTA -GCGTAACCCTTCTTTGAGTATATTGGCATTGGTTCGCCTGAAGATCTTTGAAAATTTCCC -AAGCGAGGGAACCACCATCAAAGATCTAGCTGAGCGCGTTACTTGCAATGAGAATATTGT -TCGCCGATTGATGTCCCATGCCGCTACCTATCATGTCTTTTTTCAAGAGAAGCCTGATTT -CTTCATCCACACTGCTGGCTCCCGTGTTCTGGTGGAGAATGAGGGTATGCGTTCTTGGAT -GCTGGTCGGACTCGGCGAGACCATGCCTGGTGCCCTTAAAGTATGCGATAGCTCTACCAG -GCTCCTGTTCAAACGCTGATCTCCCTAGATTGCCGAAGCCGTATCGCAACACAACGACTC -TGAAGAGCCACAACATAGCGTAATCGTTCCAGTGACTTTTCACAATGCAAGTTCGCTAAT -AGCAGCAGGGATGGAATATACAAAATGGCACTGATCTCCCGGTCTTCCAAGCCCTCGCCA -ACATGCCTGAACGTGCGAAGGTCTTTGCTACAGCCATGAGTTGGCTCGCTCAGCTACCGG -GCTATTCCCCGCAGTATCTGGTAGATCACTTCCCGTTTGGATCTGGAGATATCACAGTCG -TTGATGTAGGCGGCGGCATTGGCCATATCGCCCGGGCACTCGCAGATCACTGTCCCACAA -TTCAGTGCATTGTACAAGATCGCCCCGAGGTGATTTCTCAGGCAGAAGAAATTTTGCCGG -CTGACCTTCAGGATCGTATCCGCTTCCAGGCCCATGACTTCTTCCAACCTCAACCGGTAC -ATGGCGCGGACGTTTACTTATTGCGCCATGTACTGCATGATTGGTCCAATAAGTATGCGC -GAAAAATACTGCAGGCTCTAATCCCGGCTTTGAAGCCTGGGGCCAAGGTGGTTCTTAACG -ATCGTATCATTCCGGGCTATGGAGAGGCGCATTACTTGAAAGAGCGTGAAGCACGGTACG -AGCTTTATATTCATTGGGGTTATAGGACTATTTACTGATAAAAAGAACCAGGGACTATGA -TTTGTACATGCTCGGTTTGCAGAATGCTCAAGAGCGTACACCAGATGATTGGAAAACTCT -TTTCAAAGATACAGACTCACGGTTTAGTGTTACGAGAATATCCCAGCCATCTAAATCTTA -CCTCGCTATTATGGAGGTGACCTGGGAGGGCTAAGGTGCGAAGAATTCAAGAAGACAATG -TCATTGGGAAATCTATCGAACAAATCCAAGATATCAACTTAGCAAGAAAATCCCATCAAA -ATCAGATCTATTGTAACCCTATGAGATGGAAATAGGGGATTCATTTCTCTTCGTGATGAT -GCCACATAGTCAACCAACAAATGACATGTGACAGACCCCTGAAAGTCCTCCAGCCGGCCC -CTTACCGAGGAATGTCACAGCAACTCAGAATAACCTCGCGAAACTCGCCCGCTCGTAAAG -CCGCATAGATCTTGCCGATATTTGATCCGATCGATTCTTTGACGATTTTTTCGCCGGAGT -CATAGGTTGGATGATCCACCAGACCGCGATGCATGGCCACTCCAAGGGTCTGACGCACGA -ATCCATACAGCTTCTTTGATGAATTGCAGAGATACGGTGCGGCCGATTTGCTGACGAAGA -AGAGCTTCCGATTCTCTTCGTACCTCGTCTCCAAAACATTGGCAACGTTACTCTTCCATT -GTCGAAGTTCTTTTCCAACGGCTTGGTCACCCAGAAGGTCGAGGACGATACCCAAAGAAT -CAGTGGCAGTAACCTGACTGCGTCTATGCAGATCGCTGGTGCTATTTCTCTCCCAGTGGC -CCATAAGTTCTTTCCAGATGATTCCTTGGCTCTGTGAAATTTGGGCACCCGCGCCGCTCG -ACTGAAACAGCTCCGCGTTTGCATGATCTACCTCCTTGCGGGCTTGAGATACAAACTCAA -GATGCATAGCACGAAGGTCCAGAGCCTGGCACAGAACGTACAAATATGTCGCGGACATCA -ACGCCAGGATTTCGACAGTGTCGCCTGCATAGCGAGCGGCAATCAAAGCTAGAGAGTTCA -GGCTTTGATTATGCATTTCTGCGGACTGCACATGGTTGCTGACCGGGTGGTTGAGATATC -CTAGTTCCGACATATATGAAGCCATATTGATATCAACGCCTTTGAACGCAAACGAGAGAC -TCGGATCATCGACGCTGAGATTTGGTGGGAGTCCTTTATTGAGCATCGGGTTGAGGATCT -CCGAGCACTGCGAAAAGATCATTTTACCTAGCATCTGCATCGCGCCCATAGTTTTCTCCA -TTGCCGATGTTATCGATGCTGCTTGGAAGTTTCCTCCGTGGTGAACCCGCTCCTCGGCAA -GATCCAGGAGTGGGTTGTCCGTGGTAGAGTTCAGTTCAGTCTGGACTTGTTCATGTGCAA -GTGCGAGATTCTCTAGTTGTGGGCCGATCCACTGCGTCGATGTGCGCAGCGCATACCGGT -CTTGCGCCAGACCTTCTCCATCGGGATCCGCGCCACGCACCAACTTAGCATTAGCTAGGC -ACTCGAAGATAAACGCTGCTGACTCTAGTTGGCCTGGGTGAGGTCGAGCCTCGGCAATGA -ACGGATGATAGTTGTATCGCGATCCCAGCAAAGCCTCTGTCCCCATTGCGGTGAGCAATT -GCGACAAAATGACCAGCTGATTTGCCTCGAACAAAACTAGAGTAGCTGCACCGCAACTGA -ATGCAGTTCCATTCAAAAGACCCAGGCCTTCCTTTGGCCCAAATTCAATCGGGGTCAGCC -CTGCTTCTTGGAGGGCCTGGCTGGCGGGAATGATTCGATGGTTCTGGCCCTCCCCGCAAT -TCATTGCTATATCAGGATTTCCTTCTATCGCCCCGCAGATGTAGGCCAAAGGTGTGAGAT -CCCCGCAAGCGGAAATGCTTCCTCTCAGTGGTACGACTGGTGTCATATCGTGAGCGAGGA -GGGCAAGGATATTTCGAACAACTTGAATGCGAACGGCCGAATGTGCTCGCAGTAAGGAAT -TGCATCTAATTAACATAGCCGCCTTCACAATGGGAATAGGCATTGCATGACTTTTCAAGC -TTTGCAGGCGCGTTGAGCCTGCTCCGGACTCTCGGTCTCCAGGCAGAACCACGGCAGAAT -GGTGGTGCTGGATCAATGCCTGCTGGAGGGCGGCATAGTTGTTGGTACGAGTGTCGGCAC -TCCCTCCAAATCCAGTAGTCACTCCATATATCACCTCCCCGTTCCCAAGCTTTTCTGCGA -GTAGATCTACACTGTGTTCCATTCGAGATAGCACAGTCTCGCTATCAGTGATGTCGGCCG -CAATCTTGCGCCTTCGTCATATTGTTAGTGTTGTCTCATGGGAGAATACGGTGGAATGAA -TTCACATACAAGCTGACCGCGACGACCGAGCTTAAGTCTAGCGAATGCCCATCAAGTACG -ATGCTCCGCCCACCACCAACAATCTTGTCCAGCTGGGTACATGAATCCAGAACTGAGCTG -ATATACCCCAATCCCTGAGGATTTCCATACCCATTCGGATACTGCGGGAATGCTTCACCC -ATAGTAGCAAGATATTCTAGGAGAAGACAATTGGATTTATTCAGTGAATGAGGTGAATTG -CAATCATAGCAACGCAAGCCGAGGGAGGGGCAGACATATATGAAAATTGGCCACCCCTGT -CCAGCAATTCTAGGCACCGCATGTCTCCGGCTCTCTGTCCAGATGGTGCAGTATCGAGAA -TCTCTCCAAGGACTAGCGTGATCCTGGAAATGATCGCTCCGAAAGTGGATTTTAATTTGA -ATTTCATCACAAAGTCAAAAAACCAATTGAACAGGAAGCATTAGCGCTCTTACAGGAGCA -TCGTACAATCACACGTGGGGGCCAAAGTCGCTAAGCGTTCAAAAGTACCCGACGACAAAG -TACCGTGATTTGCTAGAGACTATGAAGCTTTGCTACCTTCGATACACATTGTGTCACGAT -GGTAGTACACGGCTTATTGTGATGTTTTAACTCAGATGGTGATGATGTACTCGATTGATG -AATCAAGGACTTATGTTGTACAGAGTACTCCGTAGATATCTCGGGATGTTTTACCCAAAG -TGGGTGGGTGCTCATGGATAACTTAGCTCCAAATGGTCGCTTAGAAATGTACTGACTGGG -AGCATCCATGTCTCGGCGCTAATGTTGCAAAAGTGTCCCTCGATATACGATTGCACTTTG -TGTGCATCTTTTGGCGGTTACTTGTCTAGGGATAACAACGGACATAGAGGATCATTGAGA -AACGATTGCGATGAATAGCCATCTAGTCAAAGCGTACGTGGGCTCTTCGTAACAATTTCG -CCTCCTTTGCTAACCCAGTTTAATAGCGATCTCAGTGGCGGTATTGTGAAGATCGATGTG -GGAGAAACAAATACCCCCTTTGACGTCCATATGGAGCTATTATGTAATTGCTCGCCCTAT -TTTGATAACCTGTTCCAACGCCGCTTCGACCAACCTCTCACGGAGCAAGTGATTTCCTTT -CCGGATGACGATCCCCAAATTTTTGCTCAAGTGATTCTGTGGATGTATCGTGGCAACGAT -TCCCTTGATTTGGTTGCGGATGAGAAACTGGGCTTTCTCGTACGACTTTGGATTCTTGCA -GGCAATTTCGAAATGATAGACCTGCAGAACCAGGTGATGCTGGTCTGTAAAAAAAGAGTC -GACGAATCGCTAGGAATTCTTGGTGTGGACACAATCAATTACATCTACTCTCACACCCTA -CCTCAATCCCCAATGCGTCTCTTAGCGGTAGATGTATGGGTGCAGAGGTCGACCACAAAG -GGGTTTGGAACTAGACAAGAAGAAGTTCCGCGGCCTTTTCTTGAAGACCTCTGCCATGGA -TTTATCAAAGAAAGGGAGAAATCAGACTTGCCTCCAACGCTCCTCAAGCTATCCGATCAC -CGTTACCTCATTGATTTTTTACCACTTGAGGACAGGGGCGGCAAGGTTCTAAGGCCAACC -GGGGCTGTAGAAATTAAGCAAACTGCTACGGAAGCTGAGATGGAGACTAGGAAAATCAAG -ATTCCCTGCTCTCGCACGAAGGCTAAATCGGTGATGGAAAATACTGGAGTAGGTGTTGAT -GGACAGCGGGATATGTTTGGAAAGTCAGATTAGGTGCATGTTGGTGAGCTAGAAGGCAGC -GTTTCGTTAAATTCTGCCAGAAGCTGCTGGACACTTACAGACCGAACGCTTTTTCTGGTA -TCTGTCTAGCAAATTTGTTTATCTTTCATTGTTTTTCAGCTTCTCGGGCGCCAAAGACTT -ACGCTTCAACTATAACTTATGTCATACAAACAGTGATGGATCTATGGAAGAATTTATGGA -TTCATGGAAATGACCTGGGCACTCTGGCACAACAGCTGGCATTGTTGCTCTTCCTCTTTT -GCGCTCAGGGGGTTTAGGAATTATTAACCTAGTAGTAAAACTGAAAGCCATTTTGAAAAT -AATCAGGGGACAGAAGAAATCTAAAACAATTGGACTTTTTCGATACATTACGAATATCTT -CAACGGTCACATGCTACAGTCCTGGAGAAGGAGGGGATCGTAGGAGAAAAGAGGTCTATC -TGAACATGCTAGAGGGCACAAAACAGCTCGAGCATATCTCAATAACCGGACTCCACTTGC -TGCCTGACAGCGAGAATGGCATCGCGGTAGAAACCCATACCGATACTTCGCGGATGGAAC -GCTTTGATTTGCTGAGTTGGCAAAAACCACGATATATCTTGAGCGGTAAAGTCGCCGCTG -GCAAGCGCTGTATTTGCGAGTGCAACTTGCTTAGCGATATCTCCACCCGGATCGGACACA -ACTGCTGACGCGAGATCACACCAGTAGACGTCGACAGGGTCGGGATCGACACCGAGGGTT -GTGTTGCATGTGTTTCCATCGGGGAGTGGGAGACTGCCACTTGTTTGCAGGACGCTCAAC -TCCGCGCTATCGTCAGCCGCTGTCGCAACCGTGGCCCCCTCTTCAATGTCTGGCCACCCA -CTGAGGAAAAAGGCAGTGCTCATGTGACTCGGATCCGGTTCTATCACGCCCGGCTCACAC -CAGCGGTGACCCTCGAAACGGGGATCCACGTCGACAAAGTGAGTCACATTTCTGCCAACC -GGTTTGTTTGCGTCAAGGACCGCGGCTTCGATGACTTTGTTGAGAGATTTCACCAGGTCA -TTCATCTCAGTCCTCAGGGAGGTTGTCAGCCAAACGTCCGTATGATCTGTTTTATCCCAC -CCCCAGTAGCGGAACGAAACATCATTGCACTGTGCTGTATCCGCATTGAAAAATTGTGGA -TATCCAGCGATGTAAAGATTGAAATCCTACAGCAGGTCAGCACAGGCACATAAACAGGAC -ATGGATTTACCGACAGGATTTTGTTGGGCCCAGCCAATAATCCGAAGGTAGATAGAGGTG -AATTTATACTGCAGCCCGTCTGGGCCGGTATCCGCCATGAGCTTCTTGGCGGCGCTTTTG -TAGTGAGAACACCACGCAGCATCCCAACCAAGTGGGGAGTTGTAATATCTCAGAAGGCAA -TGCTTGACAATATTCGAGAAATCCAAGTCGTTCCCGCCGATCGTCATAGTGGCCAAGCTG -TCGACTGCCGGGTTCTGCCACCCGTGAAGCTTGTCGTAAAGCCCGGCGACAGTGTCTCCG -GAACAAGCTAGATTTTGGTAATCAAAGCCGTCAACGAACGAATCGTGTAACAGTTGGCCG -AAATTGTTCGATCCAACTCGGCATTTTTGCGCCGTAGGTGTTTTTCCTGTGCCCATTCCC -GCGGCGAAAGAGTCTCCGAAATGTGCCTGGCCGGTGACATCGGGAATATCTTCGTTAACA -TCATCCCACTGGATAAAGTCCTCGGCTAGAACACACTCAGTTGCACTTGTGGAAGCGGGG -AGTGCTGGCAGAGGAATGGCGCGGCGGGTTCTCTGTGTGAAATAGACACCTGCCGCCAAT -AATTGATAATTCTCTGCGTTCTTCCTGGCACCCGCGGCATCGCATTTACCCTCAATAGCT -TTCCCATCTGGCCCAAGGACAGGGCATTCAAGAGCGGGTGTTTTTTTTTGCTTGCCTGGC -TTGGGTGCCTTAGGTGGCTTCCCTTTCCTGGCTGCTTCCCTGGCTTTTCTTGCTTCCTCT -AACCAACATCCACGATCGAAGGTGCCACGGGCGAGCTTCCAGCAACTGATGAAGCCGTAT -GCGATGTCACGGGTGCTTGCGGCAGATTAGTGCGAACAATGAAAAGATAACTGAAGTGAC -TTACAATGGTTTATCAAGCATTCCGTACTTTGTGTGTGTGGCTTCATGTACAAGAACGGC -AGATCTTGCTCTGGAAGCCTTGCGGAGATCCGCTGACTTGTCTTCTAGCACGTCTTCGGT -GGGCCAAATTTGGGGCAACTTGGAATTAAGTATGGGATCTTTGAAAAAGGGTAGGCAGAA -GGTCAAGCGCTGGTTTTTGTTGTTCATATAAGCAACATAATCATCACCGCACATGTCTGT -TACCGAATTTGAGTCACAGGCCAGAGACAGGGGGAAATTTGGATCATTGCCAGCGAGAAG -GCTTGCAATTCGTTGAAATGTCTTCTTGGTGTCGGATATAAATGTGGTATCACCTTGACC -ATTGAAGAAAGCATCAAGATAGACACCGGTCTCAAGATTGGAGGCAGACACCTGCGCCAT -TTCCACTGCGTACTGGAGTTCGAGTTCAATCACCTTCTTATTCGCGTCTGTACATTTTGA -TTCCTCTGAATCATCGTAGCGATAGTTCATGTTTGCAATTGATACGCCGTCTTTCGATAT -GCAGATTCCAGATGGACAATAGCCCCGGCTACAGGTAAAATCACAAAGATTGTCATAAAT -CTTCGAATTGCCGGGACTTGCCTCACCAGCACCCTCTGTCTGCGCGGGCGGCTGGATGAG -AGGTCCTGTCTTTGTGCATGAACATAGGTTGATAGGACAGTACCCAAAGTTGCATGCGTA -CGAACATAGCCCGGCGACGCTACCCTCACCAGTGCCAGCAACACAGGCCGGCGGGAGGAA -GTCCGAAACTGTAGGAATCGGCATAGCATGTTCAGTCTTGTCACACGTCGCAGAAGGACA -GTAGCCGTAGTTGCAGGCAAAGCTGCAGAGCCCCACATAGTTGGCATCTTTCCCTTCTGC -CGGAAAGCCTATCGTGCCCGTCGCGTTGGGCTTGGTAAGGGGAACACCCATCTGCTCGCA -GGTACATGCACCGACAGGGCAATACCCGTAAGAGCAGGTAAAGTTGCAAAGGCCATTGAA -ATCGTAGGCGCCCTTCCCTTGAACGCACTTCTGGTCTTTTATGTCGACTGAAGTAGAGTA -GAGGTCACCATAGGGACTCCCTCCTCCAACGCTTCCCACCCAGGCATTGAAGTTGTTGAA -GCCCCCATCACATGATGTGGTAATGTCGATGGTCTTGGGACTTTGTCCCATTGTGTCACC -AACAATCAAACTGATTTGAACCATTCCGATTGGTGCAGCTACGCTCCCAAACCGGATAGC -AGCTCCAACCGAGCCATCGGGTATGTAGTCCCACTCCACAGATTCACCGTTGACGAGTAC -TGCTGCATTGCCACTGCTGAATAATGACATAACCCAAATTCGAGCTTCCAATGTTTCCTT -TGGGTTGTATTCAATTTGTAGCTGAGAAGCGGTATTGAGTGTTGTTCCCCCAACGTCGCA -GTAGTCTAGCGGATTAGGAGTATACCAGAAGGTAGCTAGATCGTTGCCCATCGAGGCCTT -TCCAGTTTTGTACAAATCAATCAAAAAAGGAAGCATATCACGCCATCCATCATGGGGGTA -ACCTGTCACGAAGTCGAACGGAGCTTCACCGATATCAAGGGCAGCCAGAGCATTCTTGCG -AATCGGGCCAATGTAATGAGATTCACCGTAATCATTCCAAGAGATAATTTCAACGAACTC -TGGCATTGGATCAAGAGCGTAAAGCTCTTGCCAGCGGTCGTGCCAGAGGCTACCACTGTT -CCAAAGCCAGTTCTTGTTGTATCCTGGCAAATTGGTGTAAAACCAAGGAGAAATGGCCAT -CATGTATGGCTTCCCCTCCAGATATTGGATATAGGAAGCATCAGTATAGGTGTCCATGGT -TTGATTTCCCCATGGCCACGCAGACCAGCTGAACAATCCGTCGGCAACTCCATTTGCCAA -TGCTACGGCCGGCTTGGCACCGACAGAAGACCAGTCTGGCATGAAGAAGCAATTTGTTTT -TGCTTTAATTGTCACCCAGTCATCTGCATTGCCAGGCCCTTCAAAGGTGGACACAAAAGG -CTTTCCGTCATACTGGAAGTATGAAGAGCTGGAACTGTATTTTTGGATCATGCTGGTGAC -AACATCTTGGTCCCAAGGTCCATTGCCAGCATAGTCAAATGAGAAGAATAGCCTGAATCC -CTTCGCCTCAGCAGCAGTGAAGGCCATGGCCACGGAAGCGTCATTGGTCTTGTCTTCCCA -TGCCATGTTGAGGGCGAACGCATCGATATGGGCATCCTGGGCCAACTTGAAGTCTGACTG -CCAGTCTGAAGCAGTGTAATTCTCGGAGTTTGTGACCTTGAATAGTTTTAACATCATCCT -TTTGCGTTGAGATAATACCATCCAGAGTGGAGTTACATACCATAAAATGGGCAAACACAG -CTTTTGCATCCGCCTGGTGGATGACCTGAAAGGTCACCACCATGGCAAGCCATGCAAACC -AATTCATTGTCGCAATGGTGGGCGAGGGGAAAAGTGATGAGAAACTCCGCTGCTAGACAC -CTACAGTCCTTTTATAATCTTTGAATCAAGCCCCTATCGCCTCACACCACCGGGGCAGGA -CATTTAGCAGATGAAAATTTCCACCTCACGAGTTTACCTTTCCGTAAAGAGCTATAGCCT -CGCTCGCGTTCCGGTACTAGATATATGGAGTGGAAATTATAGTATATGAGGTATTATCTC -TAGTCTTGAAATCGGACTAGGTGGTAACATTCATTCTAGCGTCAAGTAGTTCCGGGGAGC -CCTATTTCTTCGGATATGATCAATACGTAGGTACTAGACGATGAATGCCGAGCAATCTAG -TACACTCGTGGACTGATATCTACATCTAGGTGTTGCTGTTTATTGCAAGATTCGATACAG -CTAATGGCATTGCCTGCCTTCGCGTAGGTATAACTCCACATACTCAATACATCTCTGTCA -AATTATTCTTTTAAACAGTGATAGCTCAACAGCTATGTGAGTACCAACGGGGCAAACCGT -GAGAGAAATTACTATGAGACGCCACAATTTAGGACAACCAACAGTAATCAGCAGACCATG -GGTTTTCTAGGGCAGGGGCCATAAGTTCAGGAGGAACCTGCTCGAAATAGCTTAGCGGCC -CACTTCGGTTGATGCCGGCGCAGCCCTTGCGGCGCCTGGCTCTGATTGCTTGGACGCTAA -GGACGAATGTCCATACATATTGTAGTGGGGAACATGGTCTTCCACTACTTTGCACCTCCG -CCTTTCCGCGCGTATATGCAAGTGAATAATTAGTGTAAACAGTTAGTGTTTCGGAGGTTA -CAAGAAGCAAATAATTTACAGATTGGCAGAAATACACTCCCACACCTTTCTTGCTTAATT -TCTTGATTTGCTATCCCGATTTGCCTTGTGCGCTCTGGAAGGTATGAAGCTTAGTTACGA -CTGTTGAATCGACCCGTATGCTAGAGAGTATACTGCCTTGAGTGTTCCCCTCTCCAATAT -GTACTCCGTAGTCTTGTCTTCGATCGGGACTCTATAGACTTGACCGTTTGAGGGGTGTGG -GCGAAAAAAGAAAGAAAAGAGAAGAAAATATACTTCAAGTTCATCTCTAAAAGATTGGAG -ATGATTAGCCATTATTGTCTCGCTTGGCTGAGGCTCGACATTATTGTCCTTTTGAGCACT -CACGACAGTTTTCCGACTAGCGCTGAATTGAATACTGGAATCAAGGTCCACAGGACTCCT -GCTAATAATACTTCTTGCTTACGCCTCTCTACGGTGAATAATGCTCATCTTATCACAATC -AAAGGTCCAGAATTAATCTTCATCAATTTTTCTATTAGCATCTATAATATCTCGGGGGGC -TTTATAACTCATTATTGTCTCTATATCCCGTACCAAGTATTAAAATCGCTACTTTCTTCA -TTCGATTGAATTATGTATATCGTGCCCGGCGATAGTGGGATAAATGAGTCCCTAGTTCCT -TTGCTGAGCTACTGTACTAGCAGATCTTGGCAAATGGCGACGGTGATGTCTCTAGCCGAA -CTTTCCTTTTTGCCGTATTGTGTATTGTAGGCTCCGCTAAGGCGAATGGGCCTAACTTAG -AATTGATTTAGACTAAATCATAGTGACGGCCAAGCTCATGTCTAATCTCCGAGCCACTTG -CTTAGCTCTATTACCGATGAGGCGTGGGTGATGTTTAGGCGACGTTGGACCACACAATGC -CCTTGAATCTTGCTAGTATGTCTTGGCAACGGATTACACACCATAATTGGTACTCATACG -GAATACGGAGGGAGTACAACACTGCCGTTCGAGATTATGCCGTTGGAAAAAAAATTGAAA -TTCCTTATCTCATTCGAGAATACGGAGTAGTCGGCTCTTTGTCCGAGTTACTTGTTGGTT -TACTTTCTTTTGGCACCTCGTTCTGCGCTTCCCTCTTTTCACGTCCCTCCCCAATCTTTT -ATTGTAGTCCATTCATTCCCCTTATTTCTCCCGAGTCCTGATATTAGTCGCACAGCCCGA -TGTCAGCTATTGTCAGCTTGTGAGTATTCCTCTCTGCACATGGGGAACTAGAACCACAGA -TTTTCCCAGAGACCCTAACAAGCAAATAGCATTATCCTGATTTTGATCTTTACCTCCTTC -GCTCCGTGTATACGTCACGCTTGGCTAGACAAGGGCACTGGAGACATTTCATTGTTCTAC -CTACTTTTCAATGTCGTTTGTTCCACGGAGCATTTACTCTTTGTCTTCTATTACACTATC -AATTTCCCTGTTGAGACAGGTTACTGGACGCATCATCCACGTAATGCACTTGACTGGGTC -AATTTTGTTCAAGTGCTAGGCGTGTGGGCATTGTGGAATATCCTGTGAGCTCAAAACATC -GCACTTCCTAGTGACCATATAGCGATCAGAGACTGACCAAAATGTCTAGATTGGGGTTCA -ACCTCTACTACAGACCCACTAGTCGCCTTCGGAAGGCTTTGACAGTAGTGTTTTATTTCT -GCTTTCTCATTGTATCCATAGTCCCTCTTATCGCAGATGCAGTTTCTGATATATTCTGTC -CTCCACACTATCCGAACTGCCCCAAGATTGAGCGGGATCCACTTGCGCTCTTCCAGGGCC -TTCATGCCGTCATACTTCTGCCATATCTTACGACCATACCACTTGGCATAGGAATATTCC -AACAAGCCTGCATGCCGTTACAAACTCAGAGTCTGACTGGACTAAAGTTTCAGGCAGTTG -CTTTCATGCTCTCAGCCATTTCTTGGGTCCCAAGAGTCTCTGTTCCATGGGATCTATATC -TAGATGGTGAACACCCAATCATGATGGTTGTCAATGCATGGTACCACATGGTTGGGTTTG -TCGCTGTCGACGATGCATTCTTTGCTCTGGGGCAAGGCTTCTTGCTATGGTTGTCTTTAC -GGCAGATACGACGTGCGGAGGACGAGGAAAGACAGCCGCTTCTGGGGTGATATCTTATGT -GTATGTACCTCACCATGGTTCTAGCTTTGACTTTCAGATCACATAGACCCTTTCCAGTGA -TATGCTCTATTGGGATGTCATTAACTGGTCTTAATGTACCATTTTCCATCAGTAAATACG -ATCTGCTATTTTACATCCAAGGATATTTATAAACAGTTTATACTTTGACATTTTATCTAT -GCAGGGGCATAGTGGCCAGGATTACTTTCGGCTTTAATGTAATGCATTGAGGCCTCGGTA -CCTCAACTTAGCCTGATACTGGCCTTATCCTGACCGTTCTCAAGTCCTATGATACAGTGT -ATCGTGAATCTTTCAGTACGCAGTAAATACTAGTACCGACTACCGGAGGCTTTAAAGGCC -GGAAGGCCTAAATTGCTATGTTCCTGGGTACGCCACCTTACCCACTACTTCAAAGGCctc -tctctctctctctctctttctttttcatgctatccttttctttcCCTGCTTATATCTACC -TAATCTATTCTCGCCGATACTTTTTCTTACAGCTGCCATGGATCCTGAACTTAAATCCCA -CATGAGATTTTTTGAACATGATAGACCGTGAGATTCAACCACCTTTCCCCCTGGCCATTG -TTCCAACCTAATAAATATCCCAGTCCAAACACGCCCAGACTCTCAATGGAGATTGAGTCG -ATGTCCCCAGTTGTGTGTCTCTCGAACCCGAGACTAGGAATTATCGTCACTATTCGTCGG -GCAGAGGATGACGGTAATAAGCCCTGTATCTTTCGCTGGAGCAGCATATATGATGCATGG -GGTCCCTCTGGCTTCGTAGTCTTTCAGCACACACCAGATGGTCTAAAAAGGGTCGAAGGA -ACGCCTGAGCGGCCACCTCCTCGGACTGTCAAGATCACAGGATATGAAGTTGAAACAGAA -GAACTTCTTCCTGGACAAACTATCAGTCGAAATATAAACTCTATGAGCCCCTTTTTGGAT -CATATGGTGGTCGGTGAGAGATATGAGCTAGTTTGGCTTGGGGCTGAATATGCCTTGTGG -GCTTGGGGCACCTTGCGCGACCATTGGGAGCAAGAAATTGGGGATAACTCCAGGCTACCC -CTTGTTGTCATCCCGGGGGGGGCCTCTCGCTCTTTTAACGGCGTAGATGTGGAAGAGCTT -GCAGAGCCTGAACCGTACGAGCCTTCATTGATAGAGAAATCGGATCGAATGTGCGTAATG -ATCTAACCCAATATCCGTCTTTATACTGGCTGATAAACTTTTCACAGTCCTGGAGCCCCT -TGCATAAGTGTGTTTCTGGAAAGTCCATTAGAAATGTCCAGAAGAGAAGGAATCTTTATC -ACAATGAAGATTACCTATGATGGTCTCACGAATGAGGACCACGAGGCGATCAATGATGAC -ACAAAGCCGATCATCATCCACAATTATCCTTTTCGTTATGATCACTGTCGACTGCAACGC -CGTTGCCCCGATTACAATCCGTCAAAAGATTCAAGCGGGGATTCTACACAGTGGAAGACC -TATTATGACAACGAAAGAAATCCGGGCTGGAGGATTGTAGATGAACCTGATGTTGAGATC -AACGTCACGGATTCCGAGTATTTCCGCGGCCTTCATCCAGGAGAATCTTTTGTCAAAACT -CTTCAACTTGAAGATTTCCACTTGCATCCCGATACAGTGGTCGGCGACACATATCGTTAC -CAGTACTGTGGTGGCTGCGTCGACTGGTGGGTTTGGGGTGATTTCAAAGAGCATGCAAAT -ACAGTAGTGAAATTGCCATGTTGGCTGCAAGATTATGTCGTCGATCCCGATGATAATGAT -GGAAGGCCTTTCATAATGATCCCTTCATCTAATTTTGTCGAGTTTACTATTGTTGAATGA -TACGATGGGTTCTCTTTAATGCAACTTTTGAACCCCAATCTTGGGTAGTGGAAGACAAAT -ACATTCAGTTCATTCAGATCATAGGGGGAATTTCTAGTCATGTGATGTGGCCTATActtc -tcttcctcttctcgcctcattctcgtgtctGTCAAGAATATCAAATCCGTCCAAATCTTC -CAGTTAGCGAGATATCCTTCCTGGCATGAATCTCCATCGACATATCAATGCTGCTCTTCA -GATTGCGAGTTATCAACTCGTTCCGCCGCCCTTTTCATCAAATCATCTGCAAAATGTCAA -CTACAGGCCCAATATCAGGGGCAGAAATCCCATTCGAGGGAATCAGAATGACACGGTCGG -ATTTTCAGAGAAAACCAAATCTTCCATGGGGCTTCTGCATTTATCGATGCTCATTCAAAG -ACAATACTGCGTGGCACAAAATGCTCCAGCTCATTCAGCAACGCGTGCAAGAATCCATTG -AACGCCCCTTGCTAGCTGGAGAGGAGAGAACCGAGCTTTTAGAGGCCCATGATCTCGTCA -TCTATGACGAGCCGGAGTTCGACGGCGCAACTTCACATGAGGTGCGCGATCATTTTCATG -GCTGGGTGGCAGAACAACTACCAAAAGTGGTTGACACGCCTGAAACACTACAGCGGATTC -TTGAATCGCATTCAGAGAAGAAAAGAGAGCTCCCTGGGCCTGAATACGGTTTTGGAGCAC -GGTTCAACTTGGCTCTCTTTGTTGATGATATCTGCTTGGAGTCTTTGGACCATATGGACT -ATCCTGTTGTGAAGATCATGTATAAGCAATGGGGGAATTTGAGCCCCGAGGAAAGAAACT -ACGAAATTGCTCCCGAGTGGCATGATGGGACAACGGATGAGGAGCAGGAGGATGTTGGCT -GGATGTACATGTCTGTCGCGGACTACGTTTCAATGTATGACCGGTTTGCGTGGACTCACA -TGGCTTTATGGCATGATGAATATCTTCGGCCACCTCAGATGATTGATTATTTCTCTGACG -AGACGATGCAACCGGGCTTCTGGCGAATTTAATCATATTAAGGCTATGTCTTCGATATGG -TTTCGGGGAAATTGTCGAATCATAATCTTACTCATTTTGATCAATATGGGACATTTTACA -TGATATCTTGATTCCTAGAGAATGTGTTAAGATATACTTCAGAAATTTGCCTTAAAAATA -GATTATCTGGACTTGGACTCTCTTATTAGCGTAGGTTAAGCACTTTATATAATATTGCGG -GACATTTAAGTTTCACCTTTCCACGTTTCTCTTGTAGATCGCTAAGAAAAATTGTACAAT -TGACGGAATATAATTTAGATTAGGTTTAGCGCTAGAAATGTATGCATCCGAGGGTCGCTA -CAGGTCATAGAGGGTCACCCGCGAGCGATCCTCATGGAACCAGGTCACGAGCGACCCTGA -CCCTCGGAGGGGCGGGTGGAGGGTCCAAAGTTATGGACCACCCGAGGGTCATTGCCAGGT -CTAGCTTTTACCTCCTTTATTACCCCGAACAAAAGCCTGAAATAGGTGCGGAGTATATAT -AAACCTAGGGGATAAAAAAAATATATGTCTAGATAGAGCTAGAACTTAGCTGATTGAATT -AAACGGCATTTTGTTTGAGATAACTGCACTTCAAATTTGTAAGTTGGCTTTGGTGGCCAG -CTCGTTGGTGTGGCCAGCTCGGTAGTGGATTAGTTACCCATGCCAGTTTGTTTCATTGGT -TGTTGCGCAGATATAACTAAAGTTGAATGTTTTCTTTTCTCCTTCTTTTTGCTGCTTGAT -GTTTTGTAAAAGGGCCTGAGTCATACAAGGTATATGATCTTGGTTTTTCTCCCATCCTTA -TTTTCACTAGTGGGGAAAATACAGATGTCTATATATAGAGGACCATAAAGTTCGAATAGC -CCTCAAAGCCACTACTTCACCCATAACCACGACTTTTCGGTTTTTATGTAAACCACAACC -TTTTATTCTTTGCATGCAAACCGCAACTCGAATATAGAAGAAATGCAGCATCCGAATGCC -CTGATTGTAACCTTAATGCAGGAACTCCCTTCTACCTTTGACTTTCATTTTCCAAAACAC -ACACCATACAGTGATGCAGTAACCTAACGATTATGTTTTGCTTGATCGCACTCTTACTCA -TGATCCAACAAGTACAATCCTACATCATTTAGTAAGTTATTGTGAAATAGATAACAGCAT -CTATCTCCACAACTTAGATTCCAAAACGCACTATGTGGTCGTGGGAGGAGAGGACCCCTG -GCTGCCATGTGAGAGAAAACGATGTTAGCCACTACACCATATCGGACTACTACAGCAGAA -GTTCATATTCAGCTTTGGTCTTATTGACTGGGTCTAACACAACGTGTTCCTATGCACAAC -TGTGCATGTCTATATCAAACTGCGCCCTCTTGACCCATCTCCTTTGGTATCCTTCCAACA -GCCCCCAAACCCGGTTGCCTTCTTTGCCTTCTAAACCTTGCTCTACATACCCTCTCTGCG -CCCTCTCTGCCTACCAATTCCACAGTCTTCAATTTTCTTCGCCTTTTCCCCGCTTTTTGC -AGTAGACGCAACTCCGACACCAGGGCATAATTTTTTTGTCACTGTCAGCTGAATGTCGCA -GCATTTTTGGTGCTATTCAGGGATAGGATTATGCCATATTGACCGCCAGTTGGATTTATT -CTGATTATAGTCCTTATAGGAGGGTGGAGCTAAGGTGGGCAGGACAAGATCGGGGTTAGC -CCACAACAAGAGGCTCTCGCCAGTGACATACCTAACGCACGGTGGGTAGCTGTAAAGTAT -GGGTGCTTGCATGCAGATCCCTCGACCAATGATAGGATGCCATGTGCTGTCAGAAGAATC -GAAGCTGACTGTTGGTCTCGGCTGGCGCGGCACGCTGTTGACCAGTAATGTTATTTCTCG -TCAAAGTGGCGATGCCAGCTTCCGAGCATCACTGGAGGCTGGAGTTAGTGGACCATTTTT -TCGGGGTGGAGTAGGCAGCGAGTTATTTAAATGTGACTGGGGCGGCCCTCATCCCCACAA -CAAGATCTCAAACCTTGGGATCATGGAGAAAGCTAGATATAGTTACGACCAGCTGGGCAT -GTCTGCCTGCCGCAAAACATATGTATATATACTAGGCCACTAAGACCAGAGGGGGCCACA -GACATATACTGCGAAGTCGTCTGCCTTCCAAAAATCATATAAGGCTTTTGTTGCCCTCGG -TTCCCAAACTGACCCAACTTAAGATTCTAGTCGCCCAAAATTCAATAGCTGGCATAATTT -ATCCCTGACCCGGCTATGCTGATACCCCGTGTTCCTATATCACTTACGAGAATGAAATTC -TTTCGGTCGCTAAAAAAAAGGCCAACCGACCCAAAATTATTTCCGTCCGGGATCAAGTTG -CTTCACGATAGTCAAGGAGCTGAAATTGAGTACGCCTTTCTGCACAAGCAATATAGCCCC -GGCCCAACTAGGGCGCAGGTGATGATTAGCCGACTAACTTTCAACATAGCATCATCTTCA -TACATGGATTGACAGGAGATCGGGAGGAAACTTGGAGGGCAAAGGATGCCCCTGCGCCGT -GGCCGCAGACTTTATTACCTCACAAGATCCCCAATGCTCGAATTCTTACCTTTGGATACG -ATGCATACTGGGTTGGTCTACGAGTAGTTTCGGGAAACAGAATTGGAGACCATTCAATGA -ATTTGATAAACGCTATTGCACATCTCAGAGAAAAGGACAGCACGGTAAGCACCCCGCAAC -ACGGCCTCATTCTACCACATGATCTGTACTGACATGAAAAGAACACCCGACCTATCGTAT -TTGTATGCCACAGTCTTGGTGGGCTTGTGTGCAAAGATGTAAGTAGCGACTACGCCAACG -AGCGAGGCGAAAATACAGAATCTGATCAGCATAGGCTCTTCAAGAAGCTCAACACCGGCC -TGAGATGAGGTTTCAGCAGATTGCTCAAAGCACTCGCGCCATCGCTTTCCTTGGTACCCC -TCACCATGGGTCTAGCCTTGCTTATTTTTCAAAAGCTCTGGCCCAATTTTTCGGGTCGAT -AAAACAGACCAATCCTGAGATCCTGGGTGTGTTGAAAAGAGATTCCGAGGTATTAGCACG -CGTCCAGCGCAGTTTTCACACCATGATAAGATCGCGAGCTCAGCTGCAACTTCCTCCGAT -TGAAATTACTTGCTTCTTTGAAGAATTGCCATTAGATGTCATTGGAATTACTGTGAGTGA -AGTGTCCATTATCCGATCTACTTGCTGACGACCGCTTGATGCAGGTGGTCCCAAAAGATT -CGGCTATCCTTCCCGAGTACAATTCAATTGGAATACATCAGAATCATATTGGAATGACAA -AATTTCAAACCGAAGATGACCCGGGATTTGAAGCCCTTACAAACGAGCTCGCGCAATGGT -ACAAGGAATGTACTCGGTACTTTTCCGATGAAACACACACCAGCCATAGCATGATTCAGG -GTATTTACCCAGCTTGTCATCCACTGGGGCTTTGTACGTTTTGAACTGATCATTTCGAAT -TGTCAAGGTTCCGTCTCTGAGGCACATCCAAGTTCCGCAGTGAATAGAATAGCCATGCAG -CCCTTTTGTTAGTGGCCTGTTGAATTTACCCCGTTGGCTCACTCTCGCTAATGCTATTGA -GGATTGTAAAGATGCAACTTGTTATTATATCCCTTTCCTGAGGAATCGAAGGTTCGTCGG -GCGCTTCGAGGTGCTAGAAGAGATGAAAAGCAAGCTATTCCTTGATAGTGCATGCAGGAC -GATGGCTATCGCTGGTCTCGGTGGTGTTGGAAAAACCCAGCTGGTACTTGAGTTTGCATT -CTTGATCAAAGAGGCTCGACCTGACTACTCCATTTTCTGGGTGCCTGCTTTCAGTCGAGA -AGCATTTGAGCAGGCTTATGCCGAGATTGCTATCAGGCTTGGGATTATCCACACTGGTGA -GAATGAAGAAGTGAAATCATTAGTCAAGAAGCACCTCGAAGAGCAAACGGGAAGGAAATG -GATCATGATTGTCGATAACGCTGATGACGAAGATATCCTTTTTGGTGCTCAATTGGAGGA -TGGTATTGTTGAATATCTCCCAGAAAGTGAAGATGGCTTCATTCTATTCACCTCGCGTCA -TCAAAAAGCAGCCGTGGCACTAGCAAACAGCGATGTGATCGAACTCGGTCAGATGTCTGC -CTGGGAGGCAACCGACTTTCTTGGCAAATCTTTGACTCGAAAAGAATTGCTTTGCGATGA -CTCGGAGACAACGGAGCTTCTTAATGAACTAACGCACCTTCCTTTAGCAATTGCGCAAGC -CGCAGCATATCTCAATGAGAATAAGATTTCTGTCTCAAAATATCTTTATCTGCTGAGGAA -TACAGAGCAGGACATGGCCAGTCTTCTGAGCAGGGAGTTTCGCGATCGCACCCGATACAA -ATATTCCAAAAATGCAATAGCCACCACATGGCTCGTGTCATTCAACCAAATCCGAAAGAA -GGATCCCACCGCTGCCGAGATGCTCGCGTTGATCGCATTCATCGAACCCAAAGCAATTCC -CAGGTCTATATTCCCACCTATCAAGCCGGAAGAAAGACTTGAATATGCTATTGGCACTTT -GTGTGCTTATGACTTCATCACACGACGGGAAAATGAGGACATCTATGATATGCATAGACT -TGTTCATTTGGCAGCTCAAATATGGACACGCAAGCATGACCTTGCCAACGCGACTGAAAC -GAAACTGATCAAACATCTCAATGGGGTTTTTCCACCCAATGAATATGAGAATCGGGACTT -ATGGAGAGAATATTTTCCCCATGCCCTCGCTCTCCTCCAGGCAAACAAAGAAAGTCATCT -GAAAGAAGTCTACTCGTTATGTGTGAAGGTTGGGCTCTGCTTAGTTGTGGATGGCAGATA -CCGTGAGGCTATCTCATGGCTTGAAATGTCATTCTCATGGTGCAGCCACAACCTGCCTGA -AGAAGACTCAGACCGCCAATTATCACAGCATGTCCTTGCATTGGCCTTGTTAGATAATGG -TGAGGTCACGAAGTCAATTGAACTGCTACAGGTTCTTGTTAGAGCGCAAAATTCCCTTGC -TGAACGTGACCCAAAGCGCCTTATATCACAGCATGTTCTTTCAAGAGCCTACCTAGATAA -CGATCAGGTAGATGAAGCAATCGAGCTGCTTCAATATATCGCTACAATTCAAGAGGAAGT -GTTCGAGGATCACCATCCCGACCGTCTTGCGTCGGAGCATGTTCTTGCGCGAGCCTACCT -AAGAAAGGGTCGGACCGAGGAGGCAATCAAGATACTTCAGCATGTTGTCTCCACTGCCGA -GAAGACGCTTGAAAAACAGCACCCGGACCGACTTATATCGCAGCACGTCCTCGGGCGGGC -GTTTCTGGAAAACGGCGAGGTTAAGAAGGCAATTCAGCTTCTACAACATGTTGTTACCAT -GCGAGAAAATCATTTCGACGAGCAACATCCTCATCGCCTCGAGGGACAACATTTTCTTGC -ACGGGCTTTTCTAGCAGATGGTCAGAAAACCGAAGCAATCAAAATTTTGCAGCACGTTGT -TATGGTCCGGAAGGGACTTTTGGGACCCCAGCATCCCAAGTTTCTCGAGTCGTATAACCT -CCTCTCCTTGGCAAAGGAAAGAGAAGAAAATGCCAGTCAAGCAATCTGATAGCAGATTGA -TACAACCCTCCCCCCCCCTCTGCTCAGCAACCGTCGGAAGGACACCGTCGGAAGAAGATT -CGACCCGTGTTACCATTGAAATACATGATAATTGAGATGTGTCACGATCTCAGAGATGAC -AATCTGGCCGAACAACCCCGCGAAGCTCCTTCGATAACCTTTAAATAGACTTCGCCACAA -ACCGCAATTGGGGCTAAACCTTCGCCCTAACCCCTAAACCGTAACCGCACTACGGTTTTT -GATAGGTTCTATTAGGGCCCTAAGAATCCTAATTATAACCTATTTTTATATAAAATCAAG -GTCTTAGTATATATAATCCTACTAAAGCGCCTTTATTGATAGAGGAAGGTACTAGTAACG -AAGAAGGGTAGGATATAGAGGATCTAGATAGTATAGTTAATATAGTAGATTATATAGCTA -AACCTACACTACCTTCTAATTCGGAGAGTAGTACTTAACGACGGGCTAATTTACGAGCTT -ATAAGCGTATAAGACGCGAGAAGGATATATGTGATTTTCATTACGGAGAGAAtataatta -tataattttatttttttattaaaattatcttagatatataaatataatGTAGAAGATATT -CAAATTTAAGGTTGTATACTCTATAATTCTACACCACTTTAGAAACTAATTAGGGCCCTA -TAAACCATTACGGTTTATACACCTCGCCCTAAAACCCGCCCTAATTGCGGGTTGGGGTAG -GGCGGTTTATGGCGAAGTCTATGAGGGGCTTTATAGAACCACGCATGGGAACGCACGAGG -ACCAACGACGCTAAACCAGAAGATCGATAAGCAAACTAAGAGCTATTATACTATATGTCC -GATATGGTGTAGTGGCTAACATCGCCGTCTCTCACACGGCAGCCGGGGGTTCGATTCCCC -CTATCGGAGATTCATTTTTGGTCCTACAAGTTCCTTAGTTATCCAAGATTATGAAATTTT -TGGACAAGTCTATATATGTTGGAAAGCCAACCCGGCGAGGTAATATTATGCAGTATATTT -TTTTAAGGTTGGCCTGCGCCACCTTGAAACGTGCCAAGAGTATCGATCACATGACCTTAT -CATATCTAATCGGATTTCTTTTTTAAAAAAAAAATTCCAAGGTACACAAGATAATATAAG -AGCTCAATTGCACTACCATACACCCATCGCGGAGAGCCTCAAAGCAAAACAAATTGACAG -CTTCCATCAGTACTACTCAATTCAACATTTCCCACTCCATCAGTAAGCGCCTTTTCTCCA -CCGAGACCGCGGGGTAACACTCGTCCCAAAAAAAAAAATAATCGCCAACCCACCTCAGAT -TCAACAATCATATAAATCACAGCCTCCAAAATGGCGACAACTTCCACAGCCCAGCCGTCC -GCGGATGCCGCCTATCACCTAGACCTAGCCATTACTCTCACAATCAACAGTTGGCCGGCG -CTCTCACTAGCCGTGCAGTCAAACTGGGGTGGCCCCAACTCGGGCGAGAAGCGGGACTGG -CTGTGTGGCGCAATCTCCGAGATGCTGTCTGATCGACCCGAGACTGATGCCGAAGACCTG -GAGGAAGTACTTGTCCAGGTTATGAACGACGAGTTTGACGTGGCGGTGGATGACGAGAGC -GCTGCGGACGTTGCGGACATGATCATGGAGCTGAAGGCGCAAATCGATCGTGGCGAGTTC -GGCACTGTTCAGCAAATGTTGGAGAATTTCCAGAAGAAGAGCCAAAATCGATCTGCTGTG -GGTCAGTTTCAGAGGGTTGAGGCCGGGGATGAGGATCAGGAGACGGATGATGAGAGTGAG -GAAGGTGATGTTGAGATGGGTGATGCGCCTAACTTGGTGCGGGCTCCGAGGGAGCGGGCT -GAGCCCGAGATCGACGATGATGGGTTTACGAAGGTTGTTGGAAAGAAGAGGTAGATGTGG -GCTTTTGTTGAATATATTTATATCTTCATACCTTGGTGATAAGAGGAAACCCCTCCGATA -GATATCCTTTTTCAAGTTACATAGATGCATTTATCACCAATGATTGATCATTCATCAAAT -CTGGCTTGCGTGTGCTACCTACCCTATGTGGCTGGAGATGACCCAAGAATCCGTATACGG -TTCAACCGCTTGGATTCATAGATGGAGCAAAAAGTCTATGGAAGCAATGTTGTGGAAAAA -GAAATGTTCATCATCGTCAGTTGTGGGCTGTTTGTGGTATCATTACGCGACCTCTGCCCA -CCCGTACCTCCCTGTTCAGAGTTCCTCGCAAGCACATCGCTGACATCGCTTTTTTTTGGT -GATTCATTGTGCTTTTGAGAATATGGACATTCAGGTATTCTCTGTGGCAAAAGCACTTTA -CTGGGCCTCGGTCTCAGCAGTGATGAGCTTGGTAATCATACCAATAGCAATGGTTTGACC -CTGGTATCGATATGTTAGCGGGGAATTAATATTAAGAAGAATGTAAAAAGACATACCTGA -TCACGAAGGGTGAAACGACCCAATTGGTTGTACTCGTCGTATGTCTCGACACAGACAGCA -CCAGCGGTGCTAGTAACCTCGAGACGAGCAATAATGGTCTGGCCCTTGCTGGCAAACTGG -GGAGGGCGCTTGCTCTTGCGCCCAGTGCCGGGCTCGAGCTTGTGGAGAAGAGCGGCGAAA -GTAACCTCCTCAACAGCGGAGTGAACGTGCAGCACACAGTTGAAACCAGCTGTCAGAATG -CTCTTCAGATCCAAAATACGGATCTTGGCCTCGAAAGCAGATACGCAGTGAACGGGACGC -TTAGGTGAGCACAGCACGAAACCAGGGAAGAAATCTTCTTCTTCAATGCCACGGAGACGC -AGACGGACCTGGTCACCAACCTTGGCGGTAGGGATCTCTTCTTCGGTTTCACCGTACATA -GCAGAAATTTGAACCTCCTCACGGTTAGGCATCATGAGATAAGTGCCGCTCTTCTTAATG -ATACCGGACTCAATGCGACCTTCTGCCATGGTACCCATGTCACGGTACTTTGCGCTGATT -GGCATCATGAATGGCGCGTTGACCTTACGCTCGGGAAGTTCGAACGCAGTCAAGAACTCA -AGCAGCGACGGCCCGTCGTACCAGTCACATACGTCTTTGGGAACTCGGTCCTTGATACCG -ATAGTGCGCTGGGCAGAGATAGGCATGCAGAAGATGTCCGATTTCTTGTAGCCCAGGGCT -TCCAGGAACTTGATGACCTTGACGGTGCATTCATCGAAGCGGGCCTTGCTCCATTCGACG -GTAGGGTCGTCCATTTTGTTGACAGCAAGAATCAACTTGCTGACTCCAGTGTTGCGCGCA -AGGAGTGCGTGCTCACGGGTCTGGCCACCTTTCTCAAATCCAGTCTCGTACTCACCCTTA -CGAGCAGAGATAACCAAACAACCGAGATCGGCCTGAGAGGCACCACCAATCATGTGGGGA -ACATAGGACTTGTGACCAGGGGCATCCAAGATAGAGAATTGACGCTCAATATCGCCCTCT -GGGTGCGGGATGATGACCTTGAAGAAGGCGCGGCCAACCTCGACTGTCTTTCCCTTTGAA -CGTTCCTCCTGAGTCAGATCAAGGGCCCACGAGAGATACCAAGTCTCTCGGCCGGCTTCC -TTGGCTTCCTTCTTGTATTTATCCAGTGTACGCTCATCGACCATGCCAGTGGCGTGCAGG -AGGGAACCTCCCAGAGTGGACTTGCCAGCATCGACGTGACCGATGAAGACAACATTGACA -TGCTCTCTCCGCTCACCGTAAATTTCCTTTAGGGTTGCCTCATCCACATCAGCTTGCTGT -GACTTTGCGACAGCATTGGCTTCACGGGCTTGCTTGGCTGTCTCGCCGCGGGCTGGGCTT -GAACGTCCGGGGCTAGAGCGACCAGAAGCTGGCGTGGGAGATGCCTTACCAGTAGCAGCA -GCCTTGTCTGACTTATCAAGGGATTTGGTGGCCGCCACCTTTGAGCCTGCCTCAGCAGCC -GCAGCGCCCTTAGTATCACCATTGGCTTCCTTGGGGGTGGCGTCGGCAGGCTTTGCTGCA -GGGGTAGGAGTGCCAATCGAAAGAACTTTGGTCTTGGGAGCAGCGCTGGAGGAGGTTGCG -CCGATAGACAGAACCTTGGTCTTGGGCAGAGCGGTATTCGATGCGGGCTTGGGAGCAGAC -TGAGCCGGTGCTGCAGCCTGGGGTTGTTGCGCCTGTACTGGCACATTCTGGCGAGGCTGC -TGTTGATAGCCGCCATATTGCTGCTGTTGCTGTCCATTGTTGTACTGGTTAAAACCACCA -GGCTGCTGGGCATAGGCATTGTACTGGCCGTAGGCTTGCTGTTGCTGGTAACCACCGTAG -GCTTGTTGCTGGCCGTACTGAGCGTATCCCTGCTGGGGGTAACCAGCATATTGCTGGTAC -TGCTGTCCGGGAACGAAGGAAGAGGCTCCAGGCGTGAAAGAGGCGGCGCCGGGTTGGAAA -GAGGGTGCCTGGGCCTGAGGGCGAGATTGTGCGTTCAGGTTGACGCCCTCAGCTTGTCGG -GAAAGCTCATCCTCCCAGGATTCAGGGGTTTGGTTCGACATCGTGAAAGAAGATCCAAGG -TTGAATCAAGAACACGAAAATACTAACCAAAACGGATTTGGACAAAAACAGGTTGGGTCA -ATAAGAGGGGCCCCAGTGCCTTTTGATTTTGCACAGGTGAATGACTAAGACCCCGCGATA -TTTTCTTTTCACGTGATACTGCATTAAGTCGTTTTTTTCTTTGAGAGATGTTGAATGAAT -GCTTATTAGGTATTCGTCCTTGTCTTATGTTTCATACTTTTTCAATGAGCCGATCTCGAT -AGGAAGTCTTTTTTTCAGAGGCACAGCCTTGTACTCGGAGAGTTGGGCTCATCGTCTTTG -ACCTTTCTCTTATCGATAAGCACCCCTCCGTGTCTGCAGGTCCAGTGTTAGGGTGCCTTT -TATATCGCTTGCCAATACAATCTACACTACAATTCTAAAGGCCATTGGCCTTCAATTTAA -ATCAAGATGGATACTTTTCTAACGGGGGTTAGTCGCCTGTATCAGTTGTCATGGGGCGAG -CAGCTAATTCGTTCCATACAGCTTACCCGGCAGGCCATGAATTATGCCATTCGGTATGGA -TGATGATGACTGAAAAATGATAGCCTCAATGCTAAAAATGTATCATACAGGTCGGGAATC -GCGATAACTGCCAGTTATGCAATGCGCCAGTCATCGCGCTTGCTTAAAGTAAGTATCCTT -CTTGCAGTTGCGGATCAAACCAACGCTGATCAGTTCATTTTTTTGACCAGAATGTCAAAA -GTGAGGACAGAGATGAGCTACTTTCCTTACAACAACGGCTGGAAAGCAAGATACAAGTTA -TCGCCCCATCTATTGACATGATTGAATTAATGTGAGTCACCCGACACTGGGTTGGATTGT -TAGAAAGCTCATAATTGGCCTAGTGCTGCTCGAGGAAATACGTCATTGGAATCAGCTGTC -GCCCTCACAAAATCTTTGAGATGGGACATTCAAGCCCTAGGGCAGCGGCTGGCAAGAGCC -GCGGCATCTGAAGAAATGAGACGCAAAGGAGCCAGCTCAGCCAAAGATCAGACGAACAGC -GAGAAAGAAATTAAGCTTATTATCAAAGATATCAAGAAGCTGTTGGCTCGCATTGAGGAC -ACAGTACCCTTGATGAATCTGGCCATAACCACCTCTGGGGCCAAATTATCCACCAACTTG -CCCGCAACAGTGTCGCCATCCCGACTCCTCCAAGCAAGCACATTTCTCACAGCAGGCGAT -ACACAATATTCAATGACGCCATCTCAGGCAGTTCAGGTAGGGCCGACTTTTACCTTGTCA -ATGTACATGCTATTTGCCAGCCACTTGCGACCTCACGACGAAGAATCTGTCCGCGAGGCC -ACGTGGAAGGAAGTCATGCACAAGGCACGTTTGAAGCTGCGTCGTGTTCCAATAGATTCA -CTGCAGTCCCCAGACCAACGTCCGAAAACGAAACTTCCAGGACCGGCTGGCTCCGATGAG -TACTTCTATCAGGTCCTGATCATCGAAGACCTAGATGATGGTCGAGTCCACACGTTTGAC -GAGAATGATCCACAGCCCCAGAAATATGAAGGTGTTTCCTCGGCAGGAATAAGGGAGATT -CTTCCTATCCATCAAATTTCAAAAATCTTCTACGCTGACACCGGAAGGATCCTCAACATC -AGCACAGAGGGCGAAACAAACAATCCAGTGCTACTATTGAAACGGGACCTAAATGCTCTC -CCGCCACGACGCATGATGGAACGTGACGAGGTGGAGGACGAGTTCCCTCACGCAGAATCT -GAAGAGGAACCAGAAGACGAGGAGCAAGCCCAACTCGATGCACAGCTGAGTGGAGGCGGC -AACCCGGAGCCTAGCAGCTTTAATTATCTGCATGAAGACTCCATCCCCGAAGAATGGCGT -CTGCCTCCCGGACTAGACCCAGAATGGATTGCATTCGAAGTCTACAACGAAGATGAAGCC -TCCGATACCGAGTCCGAGGTTGAGAATCCCGAACCAGATGAACCCTCAATCGACCCAAAT -ATGATGGCCAAGCTTTCGCTTAATGAGAAGGGGCATTCATCCCACTCCCCGTCTCCGCTA -AATCGGTCATTCTCATCCGCCGCTGCCACAACAACAGTCTCCAACCCTCACTTCGAGAAC -ATCCGCACTTCTCTCTCTCTCCTCGAAACCCTTCTCCGCCTAACATCCCTCCAGCAATTT -CAGCAACAATCACACCTCTCCATCAGCGATGAACTCCTCAACTTCTTCCTGGAAGAATCA -TCCACCACCGGCGCTGGCGGTGACGAACAGCATCGTCAGCGGCTCCGTTCTGAAGCCCGC -CGTCGCGTGGGCTGGGACCCCTACAATGAAAGCCCCGTTAAACACCGCGGCGAGGATTAC -CAGTACGGCTGGGAGCCCGGTAGTAGTCCCCGCCATTATGGTGGCGATCCGTATTCGCCT -GGCGGGCGGGCGCAGGGATTTCATATTCGTTCCAGGGAGAGTACACCGGAAACCCCGGTG -CGCAAAGGCTCGCCGAACGTCCGTCCAAGCGGTCTGCGTGGTATGAGGCCTGCGTATGCT -GAGTCGCCGCTCTCGAAAAAGATGGGTAGGTCTCCGAATGAGGGGGATCTAGAGTCTGGC -ACGACAGATTGAATGGGAAGGAATGGAAATCCATGAACTATCGTGCCTTGTACATGTATT -ATGACCATGATGATGAGATAGCGAGTGCATTACAAGATTCATTATTTCCTATCCTTCGGA -CCAATTTTCTACTATGTCAAATTGTTGTGGCGAATGCCATAATTCAAGCGGCCATCAAAG -GTCGACTAATGGTTCACCTGGGAACCAAGATAGTCAGGGCCCCAGCTTAAGATACTATGT -AGACAAAAAGAATGACGTGCATATTCAGATATCAACCGCATAGACGGGTAACATCGCTCG -CTGAAAGCAAGTTGCAGAAAACAGGACTGGGAATAAAATGAGAGAGAAAAGCCCGAAACG -CCAAAGAAAAAAAGAACCAATATTCCTAAACTCCAACCAAGGCAGGTTTCTTCGGCGGAA -AATTGGCCGTGTCTAGCAAGCGCAGCAGCAAGATCACGCTCTCTGGGGGATATATGGTAT -AGACAGATGCCTACGAAAGCAACTTAGATTCACTTGCCGATGCGGCCTTGAAAAGAGATA -AGTGGATTACTCCCAATCTTCGTCGCTCTCGTCCGCCTTCGCAGCGGTGGGACTCTTCTC -CTTGGGACTGCCAGGTGTGCGGCTAGCAGCGCCACTCACAACATCGTAGCTGGATTCGCT -GTCGGCTTGGGAGTGCAGGTCGTGTGACCGGCGAGGCTCGGCTTTCTGGGTTTCTTTGGT -GGGAGCGAGACTCTGGGCTCCAGAGGGGACCTGCGGAGTAGAAGGAGACTGGGACTCAGA -GTCGGAATCGTCGTCCCAGCCGACTTCTTCTTCGTCTTCGGCATTGGCACCTGTGGAGTA -GAAGTAGTTAGAACTGAAATAAAAAATTACCGGGGCGATGGACTCAATGTACCCTTCAAA -AGCTCCTTGCGCTTCTTCTCCTCGGTCTCCACAACGAGACGCAAGAAGTAGTAACGAGTC -CAGAATTCCACGTATTCCACCTGCTCCGGCACGAGTTTCTCCATGGCTGAACGGAGCTCG -GGGTATTTCTCCAGATCAGCCGCGATCTCGTCCGTCTTCTTTTCAACATTGAACTCCTTT -TTGAACGCAGGCCACTCGTCGCTGACCGGGTCTTTGCTAAAGCTTTCCAGGTTCGAATGA -ATAACGTGCAACTGTGCCTCGAACCGTGTAGCATGGATAACCCGCTTTCCCTCGGCATCC -TTGCTCTCGAAAAGAAGCTTGCCGGACTCATCGGCATCGGGAGGTAGAATACTGACGGCC -TCGCGCAGTTTCTGGCCGATGTTCAGCCCAAATCGTAAGATCGCTTCATCGGCTGCTTCC -TCCGCCTTCTCGATTTCCTTCAGCCGCCGAGCGGCTTCCGCCTTCAACCGGGCAATAAAG -CTCTCTCCACTGGCGCCAGCGCCTTCTCCCTTGTCACCGGATTCTCCTTCTACAACAGCC -TTGGGTGTGGCGGTTTCATCACTTGGTCCCTCGCCACTGCCCGTCACAGAAGCTGTGCTG -AGAGACAGCCCCTTTGTCCGGTCGGCGAGTGTTTCCCTGAGATCTGAAAATCCTTTCACG -GCTTCTTCGCTAGCAGCTGCATACTCCTGGCGAGCACCCTCGTAGTATGTTTCTCCCTGT -TTGCGAACATTGTCCCATAGTCCTCCAATTCGGGAGCCCCACGGACTGTTGGAGAAGGCG -CGGAAGGTCTCTTGTAGCTCATCGTTGAGATTGGGGTTAGGTTCCTGGGGCTTGGAGTCG -GAGGCTCCCTCTTTGTTAGAGTATGACTCCTCCAGAATATGATCGTAGGCAATATCCATG -ATTGGGAAGTTGTAATAAAAGAAGGAAGAGGATCAAATGATTGGAAGCACAAGGTTGCAG -TATCTCCGCGTTCATGGCCCGACCGCCCCGCTTGCGACGATCTTCAAAACTACCTACGTA -CAGCATACAAACACATTGGTTAACAAAAGATTATCTATATAGTACTCATATTCTACAATA -TTGTGGGTATTCATAATTCATAAACTAATAGACAGTGACCTCGAAAACGTCATCCTTCAG -GTAAGCTATAAGGCTCTCCCACACTATCCTGACTTCCCATCCAAAAGTTTGGGAGCGGAA -TGCTCTCCATGACGCTCGATGTAAGAAGCTGACTGCGAGATACGACTCGGCCCTGCAACT -GGGGATCTCGGTCCACCGCCTGCGAGGACTCAACTTGAACCACTGAGCCAGGAACACGAG -GCCGTTGGAACGAACCATCATTTATTTCGATCCCACGTTCTCGGACCCGGATGGGTGACG -ACTCCGGAACCATTTCAGTAGACTCTTTTTCGCCTTCCTGGCTCTGCGTCTGTGTCTGTG -AGGACAACCCCGATGCTTGTCTTTGCAGAGAAGGCCTCCATGGCTCCAGATTGGTGTACT -GGGTACTGCCTCCATGGGGGAAAAGCTCAGACATCTTCTGCGTATTCAGCGTTGGGATTG -GCTCGTGCGGAAATTGAGTAGCTGCATGCATTCTTTGATAGTAAATTGAAGCATCAGACA -TTGGACATTCGGATAGAGGGGCGTCACCCAATCCAGGGCTCGATTGCACTGGTAGGGGAA -GGTCTTGGGAATCATCCTTGGGCGAATCTGGGCTGTGTTGGATCCGCTGTGGGCCATCCT -GTGGGCCAAATGCTCGGTGTGTTTTCTTTGGAGATGGTGTCACGGAACGCCTATCGAGGC -TATCCTCAAAGTCTCCGTATTCTGAATCCGCATCAGTTTCATAGACAACTGTCCTCTCCG -TCTTAGTGTATTCATTTCTCGAGTTGCCGTTTACTTGGGTGGGCAATTCCTGGTCAGTCG -ACTCCGTCGGAGGTGCAGATGAAGAGAACTGCTCAGTAAGAGTTGGATCGGGGTGAATCA -AAGTGTGATTAATGCTTGGAGAATCCAATGTTGTTGACTGGGGTAAAGATTCGCCTCCAG -GATCTTGTGAATCCTCAATCACTTGCCGAACTTCCAGCGGTTCTTCTTTGATGGTTTGCA -TATGATTAATATTGGTATTTGACGGGTTCTGTCTTGAAGGAGAGCGCGTAGCGCTGCGAA -ATTGAGATGAGGTTATAATAGCAAGACCAGGGCTTTCCGGTGTCTGTGAAGAAGGTATCT -CGCGTGTCCGAAGCTTCTGCGGTGTAATAGGAGCTGCAGATGCCTGTTTGCCCTCAGCTG -CGCTTTCAGTATTTGACTCTTGTGTTTCGTTTTGCTGAGATATAGGTTCCCCGGTTGAGA -GATCCAGCTCTTCTTCAAGGACTCTGTGCTTTCGTTTGGCGGGGGTTTGACGCTTCGTAG -GCTTTGGTTGATTCGATTTGGGCGTATGATCGGCCTGTTCACATTTTATGTCCTTCTTCT -GCGGTGTCGGCTGGATATACCCCATGTTCACCTCGTCGTCATCATCATCGATGGTAATAT -AACGTCGAACAAAGTCCATCTGGGTTAATGTCTTGTCCCGTTTTCCCACTGACTTTTCCA -GGGATTTTCGACCGCCCCTAGGACGAACACTCGCCCTTGGTGTCTCGCTTTTTTGTGGTC -CATGGCCTCGGTGTGCCTCTCGATTGACGATTGCAGAGCTTCTCCGCTTCTGTGGGCGTT -CGTCCGTATTGCGATCTCGTTCGCCCATCTTAGATTTCGAGATACGAGCTGACCGAGCTG -GTGGAGCAGGGAGGTAGTCTGAATTCTTGTCGGACCCATCATCTACAGTTGGCCGCTCAT -TGGTGTCTTGAGTATCGCTACGACGTTGGTGCTCGTCGATGTAGCCAAGCTGATCATCGT -TCGTAGGTGTGGATTGTGTCACAAAGTCAATCTGTGTGAGAGTAGATTGCACTCGTGACA -GCGATGATCGTTTACGAGTACTCGCCGATGTCGATGACTGTGGTGATCGGTTTCGCGCGG -TGATTTTCGTGGTAGTTGCTGGAAGACCTCGTGTATTAGGCGTTCTCCGGGTGCGGAACT -GATTTCGAGGAGTTTCTGGCAGGTTTCTAGATTCATGCGGTTCTTTCATCGCAACAGCAA -TGATCCAAGGTTCGAACATGAGACTGGGGCCTAGAGGTAAAAGATATGACGTGAGGGGAT -AGATGCCTGAGGCGGTTCGCGTCGACGCGTCGCTAATATCGGCTTGGTACGTGCGTTTTG -ATCTCAAGACTTCCAACCCCATGCATAACAATGACCACAAACAAAAATCACAATGCAATT -ATATACAATATATTTCAATCTGGACTATTGAATTTTGACTACGCCGTAGTGTAGTAAAGT -TGAAGAGCAAAAAAGAAATCTCAGGTTTGGTTTTAGGAATAGCCGAACGCGGGGGTCGAA -CCCGCAGCCTTAAGATTAAGAGTCTTACGCTCTACCGATTGAGCTAGCCCGGCCGGGCTG -TTGAAGAGCACAGGAATATAACACTACATAACCCAAATCTAAACATTGGGAGTACAACTC -GTATTCGTGATGTTGAAGAAATCACATCACAGTCTTTAGGTAGGAAGTTGAACCTTTGGG -AGGAAAAAAATTCATGTTCAGCGTGTATAGATAGCATATAACAGTTCTTATTATCAAGGA -CTAGCCCTCATCAGACATTAGCCATTCGGGAATCGCATCCTCATCTTCAGGGTCCTCAGA -AAGCAAAGATCCCTTCTCAATCTCGCTGTCAGAACCATAATTGAGCACTGGTGATGGTGG -TCGAGAGATAGTGACCACACTTGCAGATTTGGAGCCCCCATTTGTGGATGATTCTACGCT -GATAGGAACTTTGTCAGATGATCCTGCGAGAGCCTGAGGTGCGGATGTCGGGTTTGAGCT -GTGGTCTGGGACCACCACCAGTGAATAGCGAGGAGCTTTCGATGCTCTGTTCCTGGCAAT -ACTCTCAGGAATCGATGTCGGTTTTGTAGCCAACCGCGCAGGCTTTGGGGGTTCCACCGG -GTTTGGAGCTGGATCATGATCATCGGGAAGAAAGCCCCCTCCAGGAGTATACTCCTCCTC -TTCCAATGACGGTTCGATTTCTCTTTCTGCATAGTCTTCGGGTAAATGCTGATCATGGAC -CAATTTGCCCTTTCCTGGGGCTTTCAAAGGTGAACTATTGGCTGGTTGATCTGGTTCGGG -AAAGAAACCACCACCAGCATCTTCAGCATCCACTTCGTCTTCGATATCTTCGGGCTGTTT -TTCTTCATCATCGCTGGCATACTCCTTGACTCTTTCTGCGATCCTAAGCTTCATCAAAAA -CAGCCGCCAGACCCGCAGAGCTTCCACAGTTCTTGCCTCCAGCGCAGTGTGCCGCCTCTC -ATCTTCGAGACCCCTCAAAACTTCCTCAAGCGCTTCCTGGTACTCGGCAGCAATAACAAT -TCCCCTGAGTACAGCCGTGCCACGGCGGCCTCTGAATTCAAATCCAGTGACTGCATCGGC -ATAGTCAATCCCCAGGATTCGAGCAGCTCGAACTGCTTCCGGGCGGTTGATGTGAACCCC -GCCAGGCGGAACCATACTGGGCACATAGACGTCGAGGTTTCCATATGCGTTCTTCGGAAT -TCTTCCCTCAATCACCGGTGGTGGGACGTATATGTCTGTCTGAAACTCGGCGTATAGTGT -AGTCTCTTGGGGCTCGTTTTCATCTTCTTCGTCACTGAAGCCCCCACCCTTGTTTCGACT -GGCAGCAACACGCTTCAGAGGTTGCTCGCCAACTCTAACATCCCGTCCGAGGCGATACCA -TCTATCAGAGCTTCGCACAATATGCACGTCAGATCGTCGATAGACGGGTTCTGATATCTC -ACTCCGTGCTGTGCTTTTTCCCAGCCCCACGTGCCCAATGACTCGTTTCGGATGGATCAC -TTCATTGCGACGGATGTTTCGCTCAAGCGCGTAGACAGGATGGTCCTTGAAATCCTGGAT -ATTTCGTGGCATGGGCTCAGCTGCTGATTTGGATGTTAATTCACTAATCTCTGCCTCGTC -CCGATCCTCGAAGAACGGTTTTTCATATGTCTTTAGCGCCTTATCCCACCACTCCTCGCC -ATTCCGTGTAACTTCCACGCGGAGTTTCCGCGTCTTTGCATTGAATGCCTTTACATAGCG -GCGACTTACATCTCTTGCGGATGCGTCATCCTCAAAAGCAACCACATAGCTCATCAAATT -CAAGGAATCGCTCGCAGGCGGTTCAAATTTTGAAGGTTTGGCAAGGGATTTCGTGACGAC -GGGGTCAATTGGAACCCATTTCTGTGCAGCTTCATTGAAAGCTTCCACCCAAAATACTGG -GAATGATGACTCTCGCGGTGTCGAAATCGGACCTGTGGATGTTAGTATGGTGTAGACTAG -CGAGAATCGCTGGAAGTCACATGCCTGAGTGGAGTTTTGGTTGAGGGGGACGGGAGGGAG -TGAATTTAGGCTGTCCAATCCGTCGAGACCTTTGAGCCGCAGATGACTTTTCGCCCTTGC -TGCGTTCATCTGTTGAGGATCCAGGATCGTCTGAGGATATTATGATATATTCATGCTTCG -TTTTAGACGGCGTCATGTCTTTCACGGTACCAGAAAATGGCAACGGTTGCAATGAGCATA -CAAGTCTCGCCTCTACGGCTACAGACCGCAATAGGGCACAGAATAGCTGGGCTCCCAAGT -CCCGGGATCCTTCCATCATCTTTGCTTGGATTCGAAAATCCTCTTTGGATAAAAAGACTT -CGGCATTCGACATGATTGACTTCTATATATTCGTGTCAGTGGATGATTGCTGATTGAGCA -GCTCGTAGAGAATACCAATTTCTGCTTCAATTGCGATTCATTTTCCACCCAGTGAGCCCG -TTTCATTCCTGGCTTGGTGACCCGAAATCTGCGTGCGAAGGCATCTCCAGCCTGGTTGAG -ACCGTCCATGAACGTGGTTGATCTGCTGTAATTTGGTTTACCCTCATCTGGGTGAAGAAG -AGCAACTACGTTTCTAGACAGTGTCTTCTTCAAGAACTCCTATGCTTCTGTTAGACACCC -AATTGATATGCCGATACAGGCAAATGCTCACCTGTAATTCCCCGTCGTTGCACCACATGT -TGCGCAACTGCACATGGCCAAGAAGGCAGAGAACATGACATTTGTGCACGTCCAGTCTGA -CTTTTTTCTCTGCTCCATTCAATGGCTTCCTTCTTTGAGCTGCTTTTTTCTGCTTCTCGG -GCTCTTGCTCTAGCGTGATCTGGAGCACCTCATCATCGCCTTGAGAGGTAAGATGTCTAT -TTACCAAAGACTGAGATGGTTGTTGTAATTCAACATCTTCCCAGTCAATGTCTGATTCTT -CTGATGTGGACGAGTCGTAAATAGTTTGCACTGGTTGTGTGTTATGGTTGTTTGCTTCGG -CATCCCGATGTGTTTGTTCCGAGGTACCTAATTTCACAGGGATAGCTTTCATATCCCCGG -CCTTCCGCCTCTTGATTGGTCTGTCTGAAGCAAATTGCGCAGGATCTCTGGCTTCAGCCT -CCTCCAGCAGTTCTCGATAGACATCTGGAGCAAAATCATCTGTCTGTCTCTGGCGTTTCA -GTTGACCGGCGGGAGTGGCTCGGCTCCTCGTAGAACGAGCTGGCGTTTGGTGCACCATTG -CAAGCCAGATTGTTGAAATGCAGAATCATGCGGTGGATTTCCAGGCTTTTTGGCCGATGA -GGCTCTACGCGTCGCGCGTCAGCACTCAAATTGGTGGTTTAGTACCCCACTCTCCTCGGT -AATTTAATGGGATACAACATCAAAAAGAATAGAATGTTCACTATTGCTTCAAAATGATAT -TTGCAAAATAGAAATATGTCCTCGCTGAAGCTTTGCTGATGCACTGCACTTCAAGTTCTT -CCAGATCACAAGGTGCGTCATGGCGGATCCTTATTGATCCGTGCCGATATCAATATAACC -CAATTAGGAAGTGAACTGTTTGCGTCTTGTCCTATACATCAATGTATCAGCCAATTTAAA -TGTTGATTCGTTGATATCTTGATTATTCCAGTCACTAAATGAAGAACATGCTTATAGCTG -ATAAAAGTCTCATCCTGTTGGAATTTCTAATTCGATGAAAAGCCTGCAATTGCAGTGGTC -GGAGATCTTTCTAGCATATTCAACAACACACCCCTTGTGTGAAAGAGACGGGGCTCTTTG -CTCGGGGGCTAAAGCTCTAATCGATGATCTCACCTATGGGATTGGTAATGCAATGGTGTA -TGTCCTGACCAGGGTCTTTGCCCTAGGGGTTTGGCACGAGCCACGCGGTATTTCGAAATT -GTGACCTTCAACGTGGAATACCAAACGTAATGCTATCATGAGTTTTTCATTCAACTAAAA -GGCCTACAATTGCAGTAGTGAAGGATCTTTTTGGCATTTTCAAGACATTCATCTTATGTC -TAAACCACCAAAACAGGCGAGAACGCTAATATATATAATTCCACTTGAGTGTTCCCCTGT -TTCCTCTTTGACCGCGACTCTGACAAAGCACGACTTTCACAGGCGAAATAAAATCAAAAC -CTGAAATTTGCCAAGCCAAACGGAGTTCCGTGACCAAAGAAGATAATACATTCCTGGGAC -ATGGGTTGAAGAAAAGTAGTTGACAGTAGCTCTTAGTCGGCTAAGCATCATAGAAGACAG -GACGAACCGACAACATGATTGTACGAGTTGTCTGATCATACGTTGTCTTCCAAAAACATC -TGGAAATTAGGGATACCAAGCAAAGCGATATGCATATACAACATACAAATAATGTTCATA -TGTTCGGAGTATTATTACATTCTACCTACTATACTTCCGATGAGTATCTCAGCCGTTCTT -TGTTCTCTCTGGCAGATTCGCGTGGATTTCATAGGACGGACAGCTTCCCACCAATAGAGC -TCACACCAGTGGATATAACCTGCATGCCAATTGATCATTCAGACTATACTGGAAAGTTGA -CAAAGGCCCTTCTAGAACGCAACTTACAATGTAAGGAATTTTTCGAATGAGACCAGGCAG -TGATAAGAAAGTCAGCACTGACAATAGCTTCAATATTTTAACCCCCATTCCTTGGATGGT -AGCGGGGTAGATGGGAAGTCTTTCCTCTCTATACCAAGTGTAGACGTGCATGATCTACAT -TTTCGCTTGCATTTGACAGTTGTGTGGTGCACGCCCCCTTTGCCTAATCCTAAACCGTGC -CGACTGGGCGTCTGGCCTTCACCCAGGGCCATTTAAATTGAGATTGTAGCGAATGATTTG -CAGTTCCAATTCCAAATAATTTTTAGCTTTTCTTAGGCGATGGGTCCCTGTGAAAATTTC -GTTGCTCCAATATACAAAGTCGAGAACGTGGTGGGTTTTTCGCCGAGACTGTGAATTGAA -CTGTGTGGATCTCTCTAGCCGATCATTTCTGTGAATAAGTCACCCCGCACCTACAAGAGG -ATTAATAGTGGATTTATTATTAGGCGTTGGTGACTTTTGAGAGGATATAAGGATGTTTCT -TTTCTATTCTTTTTCTGTATTTAATTTTTCGTATCTGTATTGCCGTGCGTCTTTAGTGCA -ACCAGCCGCTGCAAAAGGGTTGCCCTATGAAGTAATAATTCTTACGGCTAGACAGGCCTT -TCTCACCCGCGGCCAATAAAATCCAGTGATATCGTCTGTGGGGAATGATTATCTGTGAGA -ATACTAGAGCGGTCTACCCTTTGGGCAGCAGGCAACTGTTTTATATGCAAGACACGTATT -ATCAAGTGTACCCCATAGCCCAAGGTGGACGGGCTGAATATGCATGCTGTCAAGAATAAC -AGGCCGTATGCCCTACTAGAATTCAAGGTACTTTCGATAGGCTTTCACTGCCTTAGCAGG -AGATGAGTTGTCAAGAATGGAGCCAGGCACATACGTCCAGGTGCATGTAGTTAGCATCCT -ACCGAGGGCGGAAGCAATGTGCAGCTTAGAGTGATTCACGTTGCATACATTTGCTTAAGC -ACGCTTGTCAGTTGCTCTTCCAAGAGTCCAATACAGACTCACAGGTTGAAGTTTCCGATG -CACAACCATACCTAGTAGGTACTAGATATATGCACGGACTCACCCCGTGAGCATAGGAAA -GCGAATACGTGGTTTCTCCTAGAGGCGGACTAGATAGGCCATCATCTGATTGCCAGAGTT -TTGGCAACCTATGTAACATGCATCTTGGCTTGGGCGtacatatatatatatatatatTCG -GAGTTGTTCGGTTGTGAATATCCAACAGACCCTGTAATTTCCAATCTCCTATCTCTCCAC -ATGAACAGAGTCCCCCTCCACATACGGTGGGTACTTTTAGATGTCAGCGACTAAGGCAAG -GATCTCAAAAGAGATAAACCGCCAAGAGCTGTCTCCACTACTGAGTTGCTCCACTGCGGA -GTGACGATTTAGCAGTTCATTTGTGATTCGGTGAATATCGACAATTTTACCGTCGTACTT -ATGTACATAGCATTAGAGACGAGGAGTTCAACAAAAGATAAGTAGGTCTACATAAAGCAA -GTGGGAAAAAATTTCGACTGAAAAAAGCCCTTCGAGTATGTAGTTCGAAAATTGGGGCCG -TTACCGACACGTTGGAACCCGAGCAACTTTCCTAGTCACCGGGTCCGGGTGAATTTCAAG -GTTGTTTTGACTTAACCCAATTAATTTCCGGGGTGTCCCCTTTGGATGGAATCCCGGCTC -CATGGCATCGAGACCATGATTTTTTTCGGGCTGCAGGGATGCTGTGCTTGGGATTTTATC -TGCGAATTGTTGACGTGTTTTATTTTTCCATTTTGCATGATAATCTTTTAGATTACGACT -TTTTAAATCGGAGTACTCCGCGGACAAAAGATTCCATACAATTTGTGTGTTTTGGTGGCA -TAGGTTTATTTCCATCTACAGTGCTACACTATACATAAGACCGCGCCCCTTTGACTGCGA -CAAAGTAATAGAAAGTCATTCGGTCTAGCGATTCGAGATGGCAGCTGCAGTATTCCATAT -GTACGGAGTATAGAATAGCCGATACCTTATACATGGAAGGTCAGACAACCGAAATAAATG -GAAAAAGGACAGAAATCAATATGCAGACCCGGAGAACCGGGGACTATTGTACTCCGTACA -TGTTGACGCACATCCACATCTACCTTGTATATGCCCCTCCTTTTTGGAGCAATTTGGGAT -GAAACCCAGGCCTCCAGCCCCCGACGCTATTTCTGATGGATCACCTCCTCTTTTCCCACC -GCTAGACCTCAGTTCAGTGAGAGTCTAGCAATTAGGGTCGGGACGTGCATACCAAATAAG -AATAACCGAGGAAACTGCGAGCACTGTACACTCTAAATCTTCCGGAATTATCCCGGCATT -TTGATATTGCAACCCGAACCGAGCAACCTGCCCAGGTGGGCATAGGCAGTGACTAACAGG -TCTTGGCGTGAGAACTGTACGATACGGATTGTATGACCTTATGTTGAGAGTCGGAATCCC -AACAGCATACGGAGCCAAGAATCCCGTCTAGAATTCCACCCAAGCCCAAATAATTTGAAA -TTAGAATGGTCGACTTGATATGAAAAGGGGACCTCGGGGATTAACGGCGTTCTGACTACG -AGATATGCATGGAAATTGCTGATCCAAATGGAAATGGGTAAATCCCAGCAGCAAGGTTGA -AGGCGGCCCGTGGCTGAGCGGCATATTCCCAGCCAGGTGGAACCCACCTCTATTGGTCGA -TCGAGCCCAGCAGTCTTCGATCGTGATCGAAATATGTTCAGTTCCAGGATACATAACATG -TACCATCCTAGGGTCCTAGGGGGTTCGGGAAAATAGGTCTTTGAAGAGTATCGGATTCTT -TTTATTATTTAGTTCTAAAGAGGAACAGGCTTACTATGTTGTAACTTGTGCCCTTTCCAG -GTCTTTCagaacacaaaagagaagacaagaaaataaCTAAAACCCAGTGAAATACCTATA -TATTAATTCCATCCAATCATTTCCACTATACCCTTTTTTTTTTCCCACAGAAACTTATAC -ATGGGTTATTATTTCCGCTTTTAATGATTTCACGCCTGAAGCGCCTGGGAATCTTACTTT -ACATTGAATTCTACTCCACTTACCGGGGTTTGCTTTCTGATTTCCGAATATTTCAAAATA -TTAATGTCGATCCTGCAAATTATAGTTGTCGGGACTGTCCGCCCTCTGCCAGATTATAAC -TTAACCCAAGCTGTCGGGCTCTTTCCCCCCCTTTAATATCTGCTGCAGTTTGAGTCTTTT -CCTTTCTTATATTGGAACCCCGAATGGAACATCTTGATTCCTGATTCCACCTGGAAGATA -GCTCATCTGCCATCCAAAACTGATACCGGTCACTGTTGGTTCTACTGGACGAGAACGAGA -TGACCATGGGGGCTATACCAACTGTAAGTTTGTTTACTTGGGAGAGACGGTCTTAATTCT -CTATAGCTATTGATATTCTGGTGGCCAGTTGACTATCGATTTTCTTGTTGCTAACGTAAA -CTATGCAGGCACTGAACGATGACTGGTAATCATATCTCCATTCGCCCGTCTTACCTTTAT -TGCCCCACCAAATGACTGACCATATCATGACAGTGATACCCAGTCATTGACTGACAGTGT -AATCGATTATCCTGTCGAAAATGGAAGAACCTATCACAAGTACCACGAGGGAGGTTGGTT -TCCATTCCCGGCTCCGTCAGGTTTCGCACAGCATGGCTAACTCAGTGCTAGCTTATGTAT -ACCCCAATGATGAACAGGAGATGGATCGTCTGGACATGCAACATCACATGTTCAAATTGG -CCATGGACGGAAAGCTATTGCAAGTCCCACTCCAAGACCCGAAGCATATTCTTGATATTG -GTACTGGTTCAGGAATTTGGCCAATAGAAATGTGTACGATCACCCGTTCCGGTCTTAACG -ATTCCTGTCTCACCATCTAAAATTGTCTTATTGCAGCGGCCTTGTTTCCAAACGCCAAAA -TCACGGGTACGGATCTATCACCAGTGCAGCCCACAGAAGTACCACCCAACGTCCATTTCC -TGGTCGATGATGCTACGGAGGAAGACTGGATATGGGAGAACGGCTATTTCGATTTAATTC -ATACTGGCCACATGACCGGAGCCGTGCCATCATTCAAACGTATAATACGCCAGTCATTCA -AACATCTCAAACCGGGGGGATACATGGAATGTCAAGAACTAGATCCCAAACCGCAATGCG -ACGATGACACCATGCCTCCCGAGAACCTAGACGGAGGATACAGTGCATACGCTATGCACG -ATTGGGTAGACCTTAATGTGCGCTCCGGCCGTGAGTCCGATCCACCAAGACAATTTCGAA -TCGCCCACCGGATTGCACAGTATATGAGAGATATAGGGTTCGTTGACGTTGAACAACGGC -TCACCAAAATCCCCACAAACACCTGGCCACAGGACGAGAAGCTAAGGACCCTTGGGGCTT -GGAGTGAGAGCAATTGGCTTGATGCCTTGTCTGGATGGTCATATAAGCCTTTTCTTGCCC -TTGGCTGGTCGAAACCTGAAATTGAGGTCTTCCTCGTTGACGTGAGGAGGAGTATCCAGG -ATAGAAATGTCCATGCCTATATGAACTTTTATGTGGTGACAGGACGGAAGCCCCTGCCCA -ATGAAGGAACAACTGCTCCCTGAATTTCTATTATATCCCCTCTTTCTCGTCATGACTGTC -TTCCTTTTTCATTACACCGTTCCATTGTGTCCTCTGCCAGATGATTAGCATGTGATACCC -TTGGAAGGCCAAAATGTGCATATACTGCATTGGCGTTGTTTTTATAATTCCAACCTCTTT -CTTTATATACCTTCTTTATAAATCCAATTCTATAGATTATAGAATTATGTGAGCTTGGTA -TACATCTGAGGCGCCCAAACAAATTCGGCTTTGAAACGGTTCAATAGCATAACATGACAT -CAAGCCCAGATTAGACGTAGAAACAGCGAAAATGTATAAACCAGAAACTGGTGTCTAAAG -AAAACGCTACGGATGACAATATGATATGCACATTTGTACGAAGTTTACTCCGTACAGAGG -TGATAAATGGAAGGCTCAAGATTGGCCCGTTGACACGTCCACGCTAGCCCTGGAGCTGTC -GACGCGGGATTAGTCAGCACGCCAAGAACCGGAAAGCCATGCCAAAGTTGATTCCGAGTG -CTTGCGACAAGCCTAAGACAGCAATTGGCCTGCACATGCCATAGCTAATCCCATGTCTTT -CGGCAGACATGCCTATCGGCATGCTCTCAAGCCACCAGCTACTCCAACTCTTGACCATCT -CTGGATCAGCGATGACCTACTCGCAGCCACCTTTCGTCGCTTCGCCAACGGTCAACGGCG -CCATGGGAGTTGTGTTCCTGGACCGTTGGAAGCACGGCGACGCCTAGCAAAGCGAAGAAA -TACCGCTCTGGCAAACATAGGGGGAAGCCCGGCTGAAGATATCGCGTGTTTGTTTGGCCG -CAATGGAAGGGAACACATGAAATGGACCGACCATCCATGGCAGAGAGCACCATTCGACAC -CCAGAGTATGTGTGAGGAAAGCACAAAATCTTCTGATTATCTGCTAATCTTTATTTGAAG -GTCTCGCCGATCATTCTATGGGTCCTCCTGCAGTACCTTTCTCCTTCTCCGACCAGAACC -CAGAGCTCCAACCCGAACCCAGCGAATTTCAGACTACACATGCTTCTCCGTCGTACCTTA -CCAATCAAGCTACCCGTGAGAAAGTTCTAGAGGAATTCTTGGATGGGAAGAACTGGGGCA -TTGATGATGCAAGGGATTTCACTCGTCGGTTGAGAATTGACCTCCATCGGGAGCCGCGAT -ACAGCCGGCAGATATTCGAGCGGTTACTTGCCCGGCCAGACCCAGACCTTACTGAAGCAA -TTGCGTTCCTGGATGATCCATTCATGAATACCCGGGGATCAGGAAACTACGCTGCAGCTG -TCGAAATCTTCGTTCGAACAAAAACGAAACGCGGTAAACGAACCGCTGTTTTGAATGCGA -TAAATCGTGCGTTGGAGTTGGGGTTGATTTCAACCGATGAGATCTGTCTGATCATTACTG -CTTTACCAAATATAATCGTCGAACGAAACAAGACTCTAGGGTCATGGGACCATAAAGCTT -TGTTGAAGCACTATCGGGCAATGTGGAAAGCTATCGGGTGCTGTAACATTCTCGGGTATC -ATGATCTGGATAGAAACATTGTTGATGCTTGGCTTGAAGAGCTCTTGCGTACTCGCAGCT -TTCGTTTTGCCGAGGAAATGATCATCGAAACCCATGATGCAAACAGTCGCAGCCAATGGC -CTTCTGCTTTGGTTCAAGCTTGGTTGGAAACGATGGAAGTTGATTCGGAGACAAGCTTGC -CGTTTCCCGACAAGATCTTCAGCCAGCTTGATGTGAACAGTGCAGCAGACTGTGTCATTC -GTGTAACAGAGTCATTGGCATCTTCCCCAGCAGACAGAGTTCCCAGAAACCAGTTACTTG -AAAGATGGCGGGATTGTCTCTCTAATGCCGAAGTCATTTCGACCGTTGCCAAGTCACAAG -TCTGGTTCGATTTCCCTCTCCCATGTGTTCAAACACAGATCAAGCATGCGCCCATGAGCC -ACTCAACTCAACATCAGATCATTTTGCGTCTCTGGCTTCTACGAACGCTTGGCCGATCCA -CTGGGCCCATGTACAACCAGAGTGCAAGGGCAACGGATCAACCGATCTGCTCATTACTCA -ATCTTTACCAAACGGTGATACAAGATACCAGCGGCACATTCTTTCCGGACTTCATGCGAG -AAATTCATGACCTCGATCTCCCATACAACAGCCTTCTGCTCCTAGCCCTGAACAAAAAGG -GCAAAACTTCCATTACAAAAACAATCCGCAGAACCCTCGAACAACTCGAAACCTCCCAGC -TTTCCCTAGCAGAAGTGTGGAAAACCCCATCCGTATATAAGGGAATACAACGACTTTTCC -ACACCACCTTTGACCAAATGTTCCATCGCATGGACCTCACCAACCCGGCTACAGTTGAAG -AGATCCTGAATGTTGTTCGATCAGGCGACTCTAATAACATCTGGCCCATCCTCAGACTTC -TCAACAACAACACCCCATTGAAGATCAGCCTCCACAAAGCATGGCAGCCCATTCCACACC -CCGATGAGAAAGTCCTTGTGCGATACCATCCGGGCCCGCGGGACAGCCGATGCCCCGATC -CCCATGCTGCGATCGACCTCATAAACCAGCTGGCAGTTTCTCTCTCCTGCTGCAAGCACC -TCACTCCGTGTCAGTCCTTCCATATGGTTCACTGGCTTTATCGGTATCTTCGCAGACACG -GCGGGCCCGTGGATCCGGAGTTCGTGCGGGCCATGTATCATGCCGGTGTTGTGCGATACC -GACGCGACGGCCGCCGTATCTCGGCTACTCAGTATGAGTACATCCTTTGGATTGTTAGAA -AGTTTGAAGGCCCTGAGGTCGTCGAAGAACTAACTGCTCTGCCTCAAATTGGTGGATCAA -GGCTTGACTGGCAGTAGATGCGGTACGTGAATGGATGTCTACACAAGAAAATTTTGTAAA -ATAGAGCGATACAATATACCCCTGCTTCAATATTCCCTTGCCCAGGGGGTGCTGAGCTGA -GCATACAATCTACATTTAAGACACATCAAGTTTTCGAGAACTCCGATTTCCTGTCCTGGC -TATACCTGCCTACTCTAAACCTTGAAGCATAGGCTCCACGAGCATTCGCAGTCTTGGCTG -AGTACATCGTACATAAAACCCAATGAGGTAGAAGAAACCCATGCGTGCATTAAATCAACC -CAAGAAAATCACTGGTCAGTGCATTTCCAAGAATATCTAAGTATCAACTTCCATGCTGTC -CCCAGTAGCAAGCTTATCATCACCGGACAGAGTTTCGATCGTGACATATTTCTTCATATC -TTCCTTACTGATAACCCTCTGGTCCAGATAAGCAGTGGTACCCCGGGGGAAGCGGTAGTT -GCGCTCCTTCTGCGGGCGCTCGTTGGGCTTGATGTAGTTGCGGAGCAGCTGAGTGGGCTG -GTCCTTCTCAATCACACACACGTCGACATTGCTACCAGAGCCCAGATCGTTGAAAATACC -AGCCTTGATTGCCTCAGCGCAGATGTCGATGGCGCCCTGTCGGTCCAGGTCTGGTTGCCA -GGTCGACTCAAACACCGACATCGCAGCCAGCGATCCAGAACCCATCGTCACATACGGCAG -CTTGTCTGTGGATCCGTGTGCGTGCACAGTGTACAGTCCCGTGCCGGTCGGGTCAACGCC -TGCAACAACCAAGTACGCTCCGATGTGACCCTGGTACCGGAAGAGATGCTGCTTGAGCAT -CGTCATACAGGTGATCACACGGGGGTCGCGGCCCGTTGATAGCGAGTGAAGTTCGACGTT -GGAGCTGATGAGCGCCGTGGTGAATTCGGTATCGGCGGCGGTACCGGCTCCAGCACACCA -GATCTTCGGCGCGATGTAGTGTAGCTTCTCGCAGTTCTAAAAACAAAGAATGTCAGATGG -CTTGTCAAAAGAAATTGCACATAATTTGACCCGTTTATTTTACCTTGTCTGCTACTATAG -GGCCGCTGGTAGCTCGGGTATCTGCTGCAATCTGTAATGGCGATTTGAGTTAGCTCCTTG -GTCACAGCTCTTTTGAATCTCCTCGCATCGCCTGGGACCGTACCACAACACCCTTATCGT -AGATACAGCCCACAATTGTTGTACCGGTACTTGTAGCCTTGGGAAGGGGAACACCCCTGG -CATGAAGCGCCGCATTACGGTTGTAGTTTGTGAAGTCGAAGCCCGGCATCGTGACAACGC -TGGTGTTCGCGACCAGCTGAACTGATTATCAGGATAGAGAGTATGTATAGGAAAAGACAA -AAGCAAAATCCGAGTATGGGGGCAGAACGAGAAGGATGGATGGACGAATGGGGTAGCAAG -CTGGAGATGGAAGTGAAGAGGAAGTGAGCTGTTTGGAGTGACGAAGCTCCCGCAGCCTAA -ATTGGCGCCTTAATTATCAGGCATAAACAAGGTGTTTTTGCCCGGACATTATTTTCCAAT -TCAATGCTCTTCTCTATGTAGTCTCTCTTGACTTCTCTCTCTAACCCTTTGCGCATCGGC -GATACCCGTTGCGTCCCTTTTTGCTCTCTGCTTGTCTTCTCATTCCTGGGAAGCCGCGCA -AACCCAAGATTTTCGCGCATACTCTCAACCTCTTCTCCCGGAGTCCTCAGTCACTCCATA -GACGCTCGTTCTCTACACCTCCCACCTCACCTTCACCTAATGCTCTCAGACATAGCTCTC -CTTTAAGTGAAAGTACACGTTCTCCTTCTGGGGCATTCGACTCGGAAAAGCTAAGAATGG -CTTTCAACTTCGGTGCGTCTAACCCCGCTGGGGGAAGCGCCTCTGCGGAGCTTGGTCCAG -AGCTTCCTGATGTTTTTACAGATGTATGTCCCTCCACAACTTTACCCCAACATGATCATT -CCCTCTAATATATTTATAGGAGGTCGGATTCAAAGGCGTCTCTGGGGATGCAAATATCCG -CTTCCTCCCAACGGCATGGCCCGACGACGCGCTTCCTGCTCCTACTTCCTCACTTCTGGC -TGTCGCACACACTAAAGGCCTCATCGTTGGCGCTGGTCCAGATACACTGGTAATCACCAC -CGCTGATGCCGTGAGAAAGTCAATCGAGGCGCCGGCTGGAGAAGATATGGAGAAAACGAA -GCCGTTTCAGCCCATCGCCACAATCCCCCTTCCTTCTCGTCCCACACATGTAGCATTCAC -TCCGGGAGACGATGGTTTGATTCTTGCCACAGAAAATGGCCCTGGAATCTCAGTTTTCGA -TACAAACACCCTCACGCAAGGGAATGCGCAGCCAGCGATCTCAATCCGCACGAATGGCGT -GTCGCTGCGCGCACTGGCGCCAAACCCCGACCCTTCCTCCACACTTGTCGCCATGGTCAC -TGTTAACGGCGAGCTTCTTATCGCCGATCTCAAGGCTGGGAGCCTAGTTTCTGGACCCAA -TGGCCCGGTTCTCAAGGATGGCGTCAGCTGTGCTTCATGGAGCAGCAAAGGAAAGCAGCT -TGTTGCTGGATTGGCCGATGGCACTGGATACCAGATGACCCCGGATGGAACGAAGAAGGC -CGAAGTTCCTCGGCCATCAGATTTGGAGGGTGATTGTCACAGTAAGTTTCAAACATTATC -GGAACCAAAGATGTGTGTTTACTTACGATTCCACAGTTTCATCAATCTTATGGCTTGGGG -ACAATGTTTTCTTTATGATATACACACAAAATATCGCAGAGGATGATATGGGAATGAACC -CCCCTTCTTCATACTACATCATAACCCGCCGTAAGCAGGCACCGTTCCTCATCCAGAAGT -TGCCCGAAGTCTGCTCGACCATGGGCTTTATGCTGAAGCGTGGCCCCGCATTTCAATTTA -TCACCCGAATCAGAGACTACGAGCCTCACTTGAAAGATGTCTTGATCGTCGCATCAACTG -CGTCGACTGATCTCGGCCTCATCACGCGATCTGACCAACCCCTGGCGAACGATGAGCGAA -CCAAGCTTCATGTTGGTCAGTACATGACCACAGAAGTAAGCGACGATACCAAACGTGCTA -GTGTCCCGTTAAAGGACTCTGGAGATGAGACGTCGGTCATCGGCCTGGCACCAAACCTGT -CAGCCACAGAAAATGTCATTGCACCACTTGCGGGTTCTGATATTTTGGAGAGTTCGACGC -CTCTTCCCGGGGTTCTTCTTTTAAATAATGATGGAATTCTTTCATCGTGGTGGTTCGTTT -ACGCTGATTCCATTCGTCAAAACACCCGCTATTCTGGGTTGATTTCGTCTAGTCGGAcaa -cccaagctcccactcagccacaggctatagctccaactccagccccggccccagtcccag -ccccTTCACAGCAACCTGTATTTGGCCATTCAACGTTTGGAACTCCAGCATTCGGCCAGT -CAGGTTTCGGAAAGCCGGCGGGAGCCTTTGGTACCCCATCACGCATGGGTGCTTCGACCA -TGGGCGCAACTGCTTTTGGGAAACAAAATACTCCAGCGTTTGGCAGTCCATCCCAGCTTG -GTGGAAGCTCTGCCACGCCCTCTGGAAAGCCATCAACCCCGGCTGGTGCTCCCACCTTTG -GGGCCCCGTCCCAACCGGGCGTATCCTTCTGCACTCCTAGTACTCCTATACAGCCAAAAT -TCGGTTCGTCTGGATTTGGGGCCACAGGATCTGGACTTGGACAGCCTAGTGCCCCAGCAA -ACCCATTCGGGGCCAGTGGAGCAGCACTTAGCGGTGGCGGATTCAGTTCTTACTCCGGTG -GAGGCGGCCTCAGTGGCTTTGCAACAGCAAAGCCGACCGAGTCACCATTCACGGCGAAGC -AGTCAGGAGAGTCACCGTTCGCCGCAACACGGCCAAAAGGTTCACCCTTCGCAGCCACCT -CTTCTGGTGAAAGTGCTTTCGCTAAGAGCTCTGCCTCACCCTTTGGAGCCAAACCTCAGA -CCTCGTTCCCACCCCCAGCTGCAAATGAAGTCAAGAATCCTTTCGGTGGCGCTGCGCAAG -GTGGCTTTGAGCTCAAGTCTACCTTCAAGAGCCAAGTCTCGCCTGTCAATGATGAGCCGA -AAGAAGACAAATCATCTTCCGGGGCATTCTCATTTGGTGGCTCCTTTGGGGATATGGTGT -CAAAGTCCCGCAAGGGAAGCTCGTCGCCAAGTGAATCAATGGACGACACTGAAGATGTCG -AGCCAGTGAACAAGGAGACATTCTCAATTTTTGGTGGAGTAAGCAAACCTGCTTCACAGA -CTCCAACGTCAATGTTTACTTCAAAGACAACGACCCCGGCCGGGAACACTATGTCGATGT -TTGGTCCCAATGCCGACAAAAACCGCGTGACCAGTCCATTGTCAGTACTGTCTGATAAGA -CAGAGACACCCAAGAAGAATTGGTCTAACACATCAAGTATCGATGTGGAGGCGCCATTGC -CACCGGATTCTACCAGCCGGGCTAGTTATGCAGCTGGAGATACCTCTGCTTCCTCCAACG -TGTCGAATCGCTCTGTCGAAGATGCCCCGCTTCCTCCCGATTTCACTAAGGCGAAGACGC -CTTCTCCGAAGCCTAAAGATGACGCTCCTCTCCCCCCTGATTTCCTTTCGCAGAAGAAGC -CTGCACACAAGACAGACGACGCTCCTCTTCCTCCTGATTTCATTACCAAGAAAAGTCCAG -CACCGAAGACGGATGACGCCCCTTTCCCTCCCGATTTCATCGCCAAGAAAAAGCCAGCCC -CGAAGACGGATGACGCTCCCCTCCCTCCTGACTTCCTCACCCAGAAGAAATCTGCACCCA -AGACAGACGACGCTCCCCTTCCCCCAGACTTCCTAAACAAGCCCCAGAAGTCCGAGGTGG -CAGTCGTCCCCGAGGACGCACCTCTGCCGCCCGACTTTACCGCAAAACCAAAAGCCAAAC -CCGTCGAAGAGGCAGTCCCAATCCCAGATGAATCTGATGAATCAGGTATCTCCGACGAAG -ACTCCGAGGTCGCACACGAGTCAGACTTCAGTGATAGCGGCGAAGAAATCACACACGATG -AAGCGTTCCTCGCCAAATGCAAGCCATCAGCTGAGAGCTCGTTCGGTGCAGTATCTGAGC -AAAGCTCTACCGGAGGATTCTTCAGCAGTCCTGCTCGAAACGCGGATGACAAAGGCCGCC -TTCCCCGGCAGCTATTTGGGGAAATGCCTAAGCAGCCGCTGCTTCCTCCCCCCGGCCCCA -TTGCCCCAGGCAATCGTGAGCCCTACCGGTCACCAAGCCCGGTTCGGGTCGGAGGGAAAA -AGAATGTGCTATTCTCCGAGAAACCTCATGTCCGTAAGGGGTCCACAGGTGCGCTGCACT -CACGTAAGGCATCTCTTACCCAGATTGCCCAGCGTGATGGCCTCATGAGAAAGGCCAGCG -ATCTTGCTCGCGAAGAGCAAGAGAAGCAAGCACGCGCACATGCAGCAGCTCAGCTTCGGG -AAGAGGATGTTCTATCTTTGTCAGACGACGACGATGACGATAGACTTCGCGATGAGCTGG -CTCAGCCCGTTGAGCCCGTCGATACCCTGGATCCATTCTTGCCCCACCAAAACTACATGG -GCGAGACCGCAAAGCCAGGGATTGCTGGTCAGGTCGAACGACTCTATCGTGACATCAACT -CGATGGTAGATACTTTGGGCATAAATGCTAGGTCATTGGCGGGATTCCTCATGCACCAGC -AGCCCAATAAGCCCTCAGATATTGACGAGTGGGTAATGGTCCTTAACAGCGACCAGCCCG -CCGATATTCTCGACACGAAGATGGCTTTGAAGGACATTGAGCGATTTGAAGAGATGATTA -CTTCGATTGGAGAATCATTGGATAAGCAGCGGGTGCAGGGAGTGGATGAGAAACTCGATG -CATGCCGTGACCTGCTCACAAAACAGATTGTCACCCTGCGAGGCCAGTTCGCAAGCATCC -GCAAGACCCTTGATGCGCACACTGACATCGGCGCTATTCTCGAAGCACCACTTTCTGCCG -AGCAAGGTGCGCTCCAACATGATCTGCGCACCACATTCACAGATGTCCAGGTCAAGATGG -CCGCTCTTGAACAGGCAGTCTCCCTCCTGCGTGCCAAAATAGCCGACATTCCCCAGGCGA -ATGGCGCAGGCAGGAACCGACCAACCGTCGAGGCTGTCACCAAGACCATTGCTACCATGA -TGAGCATGGCGGAGGGAAAGAGCACAGATATCGATGTTCTCGAGGCACAGATGCGCAAGC -TCGGCGTGGACATTGCTCCTACTGGGCCCCCAAGTCGCGAGGGGTCTCCCTTCTCTACTC -CTCGCAAGAACATTGCTGGCCGTATTCCCATGACTCCGGGATCGCGAGGCTCGATCGACG -GTAGCGCCTATCACACCCCCGACTCCGTTTCGCGCGGTCTCAACTTCCGGGCCAGCATCA -ATGGCTCGATGAGACAGAGCCGTCTCCGCAGCGTGGAGGGTGCTGGTGAGCCTGCCGTCC -TGCAAGAAGATGTCATCCAATACAAGACCAAAAAGTCACGTCGCCAGCACTTGAATGCCA -ACCTGAAGAAGGCCTTCGAGGACAAGCAGGCCAAGGTGCGCAGTGTCGATGAATGGTAAT -TTGTTTTATTTCATCTCTTTGTCTTATCTGCTTGTATTTGGTTTTTCTCTAAAGATTCTT -ATCCGAAATTTGGGGAATTTGTTTTCAGCGTGGATTGGTATCCGGTGAATGGTCTTAATT -GGACCAGGGAGTCATGGCATGCACATATGTATGTAGCGGGGTGTTTTTTTTCAGTGCCAC -GGTGAGTGGTTAAATTAGCTATCAAATGCAAGCCCTATCTACATACAGTTCTCAATGTAA -GCTTACCTCCTGAGGTTTACTTAGCCAGTTGTTAAGTCACAAGTAGAATCTGTTTTTGTC -TTTCCATGGAAGTCGAAGTTGATCAGCTCCATCTTTCGATACCTCTCTATACTGCTTGAC -AAAGAAAGAAATTCAGTCAATGACATAGGTAGAGCAGAATCAACATATATAAAGAGAATG -CACATCCTTGGGGCTCAATATAGAACAAAATTAGCTCTAAGGGTTGTTCATCAAAGACCT -AAGTTATATGTCTATTAGAAGCCATACTATAATGAATGGCAAAGTGTCTTTGTTTAAACC -TAGGACTTCGAGTTCATATAAAAGTCCAATGCCCTGGAGCCCATATCAACCAGCACCACC -AGCAACCCTCTAAATACATTTTTAACCGAGTCTTCCCTGCAACCTATTTTATCTACAAGC -AAAATGGCAGGACGTAAGACAAACGGATGTCCTTTCATGGTGGGCTCGGAGCGTCTGGTG -GATCCCGAGGACGACATCGCTCTTTGGGTTACTGTCGAATTCCCTGAAGCAATGAAGCTT -ACCCACAGCGACGAACAGCTGATGGAGTTTGTGGTTCAGCAGGTTCGAAAACACAAGACC -AGTGTTTCCGCTTCCGCGCAGCATTACCGGCGACACTTGTGTCTCTCGCTACCAGTTGTT -GGATTTCCTAGGGATGAGGAACATTTTGATGCAGTCATGGCTCAGGCTAAGACTCTGGCA -CTCTGGTGGCTTGGAGGAATCCACGCGCACAAGGTTCATCTTGACCGGAATGTGATTTTC -CCTTAGTTCGACGAGAAAACAGTGATAATGGAGATGACCACCCAAAGCACCACCCaaaaa -aaaaacaaaaaaacaaaaaCCCGAAAGACCATCCAAGTGACTATTCTTCTAACGGATTAA -GATAGTCATTCTTGGGGTGTGCACTCGATATGGTTCTGCGAGGGGATCTGATAGACGAAT -CAAGCAGCTAAACTGAAAGGAAACAAATAACAGGACACCCTAAAAAAGACGCAGAGCTTC -ATAAGGAGCATATAGCACCTAAAACATAAATCCTGGCGTATGGGATCCGCGTTTTGATCG -AAATTTATAGTCTGCGATTCTGGCTCGACGAAGTTGAACCACCACTACGGCCATAAACTG -ACAAATGGCCGAAAAACGGCTGAACCACAAGATCAACAAGATAGCTGTAGCATATATTTA -GCCCAAGACGAAAGGTGAAAGGACCTATGAATGTCAAAGAAGCAGAGATTAGAGGCCATG -AAAGGTATACCTCTGCACCTTCTTACCCTAGATCATGTTCCCAGGACAAAACTGCAGTTC -GAATCCAACTTGCTCAACTTGCGGAGTATATCCCACTGTGCGACTAGACGGTTTCCAACA -CGTGGGTTCGATTCCACCGAGGAACCAGATCTACTCGGAAGACACGAATGCTCAGGTGAG -ATGATAGCGATATGGGAGTCGTGAGGCCTGAGAGGTCACCAGCGTCCACAACCGATGATA -ATACACGTGTATGGCATGAGCCATGGTTTCAGTTTGGGATAAGACGAGAAGCCACACGAT -CCAGAGCTCGAAAGCGATGGAATGGAATAGCACGAGTAAAGCCACAATTGCCCAGAAGAA -TATCCATCTCACTAGACAAAGTGCGAGGCTAGGGTAGGTAAGGTAGATTGAACTGAAAAT -AATGAAGTACAGTCGAGTTGCTGTGAGTTTGAAAGGGAGGGACAAACGGGACATCATTGT -GCATGATGTATGATTGCATTTAATgaagagagagacgagaggggagaagaagggaagaat -ggagagaaaagGCAAGCACTTGGCCTTCTATTGAGGAAGATACTTAGGTGACTACACAAG -GAAGGTGCAGAGGTAGTTTCCTAATTGGGGATCTATGAGTGACATCATTCCGTGACATCA -GCCATGTGTATTTTGATATCTCCCTCAAGCACATTAAACTTAGAAAAAGCACCAGCCCAT -CATGTCTCACAACGTTAGTCAGGTCATCTTCTTCCGCGTCAAATCTTCCGTCAAGCCCGA -AGATCCATCTAGTGAAGAAGGCGAGGCTCTACTGAATATCTTCCGCGCGACCCAACAGCA -AAGCGGCCATGAGTGCTCATCCTGGGGACGCACCTCCGAAGACGAAGACACTATCGTCTG -GGTGGTAGGTGAGTTCGGCATCCTTTAGAAacagccctcaagctacaatataaccaccaa -cctacaaactacggcctaACCAGACCAACAGAATGGACCGACGCTCGCAGCTCAATAAAC -ACCCGAGTGCTCAACTCCTTCCTAGCCCCAGACAATCCCCAGCCACCGTCGTCACTTCGT -GTCACGTTTAGCCCACCACTCTCCTGCACAGAAACCCTGACCAAAAACCCCGTGACCGAG -CTGTGCACGCTATCGTTCCCGAGTGATCTCGACGTGCTAGCAGTCAGAAAAATCAACGCG -GATTTGATTAATTTCCGCACTACGCTGGTCGAACAGCTGCCACAGTCAGCAGGGCCGCGG -TCGTGGTCGATGGGACATGTCGACCGGCCGAGTAAGCTGCCGCACGAGAAGAGCCCAACT -GGGGAAGTCTTCGCGCACCTCCTTGCTGTTGGCTGGGATAATATAGAGACACATCTCAAG -GTTAAGGAGACTGAGAAGTTTATCACGAGTATTGCGCCTCTGAGGGAGAAGATGTTGCCG -CCTATTCCTGGGTTGGAGATGAAACATGTTAGTTTTCAAAAGATTGAGGGTTAGGAAAAG -ATTGAGGGTTAGGTGTTCCGAGAAGGTAGCGACAAgagcggaagggggcagagagagagg -gagagcgagGTGCCGTGGATTAGTGTCGGGGGCTTGCATGTAGTTTGAGCTGTGTTCTGT -GGATCATAGGATGTCGGTGGTAATTAGAGCAGGTAGTGGAATGGGAGAGGGCTACTTGAT -TCAAAGGGATAATTGCGGGAATTGTTATTAGAAAGGCAAGGAGAAATCAAATCACACGCC -CTGGAATAAGATACAACGAGAAAGTAAAAGGAAAGGAATAACACCCTGAGAGAAATCGAA -GGCAAAGAAAAAGAGACTTCATTAACCACCGTATACTTTTGGGCAGAACCCCGCAATGAG -ACTCTCATAGCAAGCGTATTGCGCTGCTAGACTTAGACTCGGTGTTTCTCACTCTCTGGT -CCATCCGTTAGAATCTGGGCACAAGAATCTTCATCCTGGGAAGGGGAACACGAACCAAGT -CGGGAATAGAGCATGTAGAGAGCCATTTGATTACACTCCGCCGTACCAGTGATAGTAACC -AGACGCTCGTTGCTGCTCTCCTGGGGCTCGTTGATCTTGATCACACTGCCGCTGAGATGG -CGGATCTCGTTAATTTTAGCACCACCTTTGCCGATGATAGCGCCAACCATGTCATTGGGG -ATATAGATCTGCTGGGTCAGTGGCTGACCAGGCATGGCGCCGCCCACGGGAGCAGGGGCG -GTTGGGGGTCCACGGCCAGGGGTCGGCTGTTGAGGACCACCAACGTAGGGAGCGGGCTGG -TGAGGTCCTGCACCGCCGTAAGGGGCATGGGGAGCGGCGGGGAAGTGCATTGGAGTCTGA -GGAACAGGAGCAGGAGCTGGCTGACCGGGCAGGTAGGGAACACCGAATGCACCAGCACCG -GCACGGTTGGAGTGAGGGTGACGCTTGAACGTGTCGGGGTGGCCGTATTGGCCGCCGGCG -GGCTGAGGGACGTACGGCACAACCTGCATACCACCAGGGACAGCACCGGCAGGACCACCG -CTGCGAGTGGCATATGCCGAAGCCGCGGGACCTCCGTAACGTTCAGATAGTTGCTCGACC -AAGGTAACGGCGACGTAGTAAGTGGCAATGTGGACGGCATCCGCAACACCGAGGATAACG -AGTGATCGTTCGGTGGACAGAGGGAGACAAGTATCCGAGGCGTTCAGGCGAGCGCCGGAG -GCCTCCTGGATCTCGCGGATGCGGCTGCCACCCTTGCCGATGATGGAACCGATTAGAATG -TGAGGAATTAGCAAACGCAGAGGGTACGTCTTGGACTGGGCAGTGGAAGGGGCCTCAAGA -GGTTCATTGTTAAGGGTACGGATGATTAAACCAAACGCCTGGTATTTGTCATTAGCAATC -CAGTATTTTATAGAACCCATCAAATGGATAACTGACCTTGGCAGCAGCATCCTGGGGACC -GCTGACGGTCAGGATACGCTCCACTGCACCACGAGAATAATCACTGACAGTGCACTTGGC -ACCGGACAGACGACGAATCTGGGACACGTTCTCACCTCCCTTTCCAATCACGGTGGCTGC -CTCCGGGCTGGAGATCACAGCACGAATATGAACCCAGCTCGACTCGTCCTGGGGAGTTTG -GTCAGATCCTGCTCGGTCGGCCTGGGAAGCTGTTGATTGGATGGGTTGGGTGTCGACAGG -GAGACCCTTGCCATTGGGCGCATCGGGGACGACGGTATCTCCCTGGGTATCCTTGGCACC -GTCGACAGCAGGGACCTGGCTACTGGATACATCTGTTTCCGCCTCGGCATCGCTCTTGAC -TACTTTGTCCAGTGCTGGGCGCTTGGCCTCAGGCTGGTCATTGGGACCGGAGGGCGACGA -AGGGTCCTCGAGAGGACGCTTGGTCGATTGAAGGTTGGATGGAGAAGCCGACATCTTGAA -TGGTCTGTGCTGCTGTTTGTTTTCACTTACAAGTGAAATCGGAAAGGTGTCAAGAATGAA -ATCTTCTTTTATGATCGTTTTATGTACACCAGAATTTCCCACGAAGGTGCTGGTGCTGGC -GATCTGAAGGGGAGATTGGTGCTTGGAGAAGATTGGAGAGGCTGTGAAGGAAATGTAGTA -CTGAGGAAGTGCCGATGTAAGACGGGAGAGAGTGTAGATTCAAGTGTGAGAGGGCTTTGC -TGTCACGACCGGGGAAGCTTCACGGGGTTTGCCGAGATGCTCGAAAGGGATGAGGAAGCG -GAAAGGGTGAAGATGGGATCCTTGGGTGAATGCGATGGGAAATAGGAAATGAGGCAAGTA -AGATATCAAGCGCCAGAAACCGTTGCCTGCTGTACCACACTGTAGCCTCCGATAACGTCT -GAATCTGAGGTCAAAGGTGTAGGCTAAACTCGAAATCGAGTTGATAGCTTGATTGACGGA -AACGAGGTGGAAAAAAAGGCAAAGAAAGGCGAGAACAAGGAAATTGATGAATGTGATGAA -ACAAAATCACGGTGATTAAGGAGAAGAGGTGAAGTTGACGATGGGGGAAGGAACTCGAGG -GCGCGAAGGTTGGTCACATGACGCGTTGCATGGGCGTGATGCATCTCGTCTCGTGTTCAT -ATATTTTGATTGCCTCTGAAGGCTTCAAACATTTTACAAGTCTAAATGACAATCTTTAAA -AATATTGTTACTTCTAGAAGGAAATATTTACATCTCGCGTTACTCAATAAGCATGGATAT -ATCAGACCAAACCGTGCAGAAGATTCAGAAATTCGCACAAAAACGCCAAGAAGCAGAGTA -TGAGGCGGCGAAACAGCCTCTCAGCGGAACTGCATTACATGTCTACACGCGACGCCTAGA -TGCAACCTTACAAGGACTTCAAGAGCAAGTCAAACGCCAGCAGGATGAATTGAACAAGGT -ATGTTGATTCTCATTATGGTCGAATGCCCCGACTGACATGTGTGCCCCGACTGACATGTG -TTATAGCTGCGGGAGCTCAATTCCCTTGACCTAACAGAAGCGGGCACCGATACCTGGGCC -CGCGTTTCTCAAGCCCGACGGGCCAAGAAAGCATACGACTCGCTTCTGAAGTCCAACGAT -GAGCTCCCAGCAACAGACTCGGTGTTGCCATCACTTCTAGCAATAGAAGAAACGGCACGA -CTTGTTCAAGAAAACAAGGTCTCTGTCACAATGACCGCCGAACAGCTGTCCGTGGACCGC -GGGCGTCTGCGAGTCGAAGAGGCCAATCTACGTGACTCTCAGTCAATCGCAAGCGGGCTC -CGAGAGCGGATCCAGAGAATCCGTAATGCAAACACGAGAAAGGAAGAGCAGACTCCGTCG -CAGGTGGCACGCGAGCAGCTCGCCCTGCAGAAAAAGCAAAATAAAGAACTGGACCGCACG -TCCGCGAGCCTCAAGGCCTCGTTGGATAAGTTCATCGATGAGACTCTTGCTCCAATGCTT -GCAGCGGAGGATCTAGGCGGCCCGACTGTGGGCGATGATTTCGCGATATCAGATGCTACG -CTGAATGCAGGTTATACGGCGCACGGCAAACCCAAGAAGCAAAAAGAGCTGGTAGAAACA -GAAGACGGATCACAGCAGCGGATTGATAAGTTTATGAAACGTAACGCGGACGAGCATTCG -ACAAACAAGAGGGAGGCAGCTGCGAAGGAGATGCATGCTCTTCTCGGCGCCATGTTGGAG -GCTAACTCCTATATCGATTTGAAGCGCGATTCGGCGTCTTCGAGGTTCCTGGTGAGAGCC -AAAGTCGCCCAGTTCCACCCTCGAGATGCACGGCGGCTTCGACTGATTGATTTTGGTCGT -TCACTAGGGAATTGAATGTGGAGGTTTCCACGATTGACTGACAACATGGTATTATTGATT -CGTGGATGTGTACATGCATCTTGTTATATCCTAGTCTTGTAAACATTCCAAATAGCAACC -CGCACCTATGAAGTACCTTGAATTGCCCCCTGTAAACTGTGAGAAAATAAGCCGTGGACA -CCTAGAGATGCTTGTGTGAACGCCAATCAACCATTAACATCCCGATAAAAGGCGGGGAAA -AAACAATAAAAACACATCAAATAGGCATCATTCCAATCGTGGACTTGGGGGTTTTGGCCC -TGGGGTTCTTTCGCGGGGCAGAAGACTGGCTTGTAGAACGATCCGCGGATTTTGCTTTTG -TTCGCGCGCCTTTGCCCTTCCAGCTGCCAGTGGAGCCATGCTTCCGTCGGGCAGGTCGTT -TGCCACTGGCAGGCTGGCGCTTGCGACTCTTGGAGCCAGAAGGCGCAGGGCCGGATTCCC -AGTCGTCACCAGAGTCATACCCCGGAGATGCTGGTGGAGGGAAGTAGCGGCTTGTGATGT -TTTCGGTTGCATTATTGGGTGCGGTCGAGACAGGATTGTCCAGATTGAAGGCGGGAACCT -CATCCATAAACAAGTCGTCGTCACTATACTCATCGCTGCTGCTAAGATTGATGACGTTAT -GGTGGTTGGGATCCGGAACGACGCCGCTAGTCTCTGCTTCCTGCGTCATTTCCAGGTAGC -GCCGCTTTGCATTATCAACCAACCCCAGGATCTGCCGGCCGTAGCGTTGCACCTTATCTG -GATCGATACCCGGGATCGCAGAAAGCTCAGTAAGATCTATTTCGTTGTTAAGTAGGTCTT -CAAATGTATCGAGAAAGAAGTGCTTACTTTTGGGGAAGGATATCCCCATATCGCGTAGCA -CCTGATCAGAAAACGGCTGGCACCGAAGTCCCTTCTTAACAACAAGCTATAGCGATTGTC -AGTTTCTTGACGATGTATCAGTAGGCACTTGCGGTAAAACTCACGTCTTGAAGCATGATT -TTAGCATGCTCTTGGAAGTCCTCCACGACTACACGATGCATATGATCAAGTCGATCCAAC -TTCTCATCGCTGGTGATTGGAGGACCCATCTCACGTGTATTTGCGCGACGAGATTTGCCA -GTCGTGCGAATCGGTTCAAAGCCATCACTATCCTCACCGGCCGAGAAATGTGCGGCTGGG -GCACCGGTGTATTGATAACGACCAAGACGGCGGTCTTGGGCAGCTTGCACTGGAGAAGAG -ACCATGGTGGACTGAGGGTCGCCACCTGAATTGCCTGACTTTTTCTTCTGCACGGCGCCC -GTGGGCTTGCTCCTGGCCTTGGTCTTACTATTTGGTGAGGCACGAACCTGTAGCTTCATT -TGCCGCTGGCCACTCTCATACTCTGCTGCCCGACGGCCAAGAATGAGATACTGAACAGCA -AAGTCACTTTTATTGATCACGTTGTCTTCGGCCAATGCATCTTCGCCCAACAGCCGGTAG -AACAGACGTTCAGCTGCTCCACGGTCGAGGTCAGATCCCTTTCCATACCCCGGCATCTTG -CGATGCTCTGGGGCTTTAGGCCGTTTGCAGTCTCCACGAAGTATGTCACAGCAGTAGAGC -ACAGTCACCTTCTCCTGTGTATTCTGAAATTGTCTCACAATCTTGATGGCCCAGGATGCC -TGCTCGGTAAAGTCATGGAGCTCAAACACCAAATCAGACTTGCAGTTGTCGCAAGTGTTG -TTGCAGTCCTCTCGGCGGAAGTATTCCGCAAAATAAGCGAGGACTTGCACTCGTCTGCAA -TCACTGCGGTTCTCACAAAATTGAACCACATTTCGCAGCATCTGTTTTTGACGGCCTTTT -TGCTGACCATTGCCATCTCCTGCATCAATCATACGCTTCAAAGTTGCTGTATCTTTGTAG -CCATAGTAAAGATAGCAGCCAGAACGCTTGCCGTCCCGACCAGCACGACCGGTTTCTTGA -TAATAACCCTCGAGACTCTTTGGGATGCTATGGTGCATGACGAATCGCACATCGGCCTTG -TCAATTCCCATTCCGAATGCAATGGTGGCAACGATGATATGCACTCGCCCCAGCTGCCAC -TTACTCTGAGCTTCCGATTTTGCCTTCGAAGTCATACCCGCATGATAAGAGAGTGCCTTG -AGGCGGTACTTCTCCCGTAGATCTTCTGCAACTTTGTCACACGTCTTTCGCGAGAGGCAG -TAGATAATTCCGCACTGATTCCGGTACGAGCTTGTAATGGTTTCCGCCATGCTGGCCAAA -ACTTCATCATTCTTTCCCTTTGACCGTACCTCGTAGGTAAGATTAGGACGATTGAAACTC -TGCAAAAACACTTCACAGTCGGTTATTTTGAGATTATGCATCACATCAACTTTGACATTC -TCAGTGGCGGTGGCTGTGAGGGCCATAACAGGTACACCAGGAAACCTGGCTCGAACCTCT -CCTAGCTCTTTATAGTCAGGTCGGAAATCGTGTCCCCATTGACTGACGCAGTGGGCCTCG -TCGATGACCAGGCGTGCGAGTCTCTTTGCTCGATGAAGCTTCTCTAGGGCTCTAATCAAG -GCTTGGCTTTTACTGAGCATCTCAGGAGTGATGTACAAGACCTCCATCCCCTCGCCACCT -CCATTTGAGAGCTGGCTCATGATCCACGATTTTTCTTCCTGGTTGGTGTCGCCATTGACC -ATAAAGGCTTTAATGCTCAGCTTCCGGAGGTGTGATACCTGATCCTCCATCAGACTCAGT -AACGGAGATATCACGATCGTCACTCCCGTGGTGCGTCCGCCCGTAACCACAGAAGGTAAT -TGATAACAAAGCGACTTTCCTCCTCCAGTAGGCATGAGAATGAAGGTGTCTTTGCCGGCA -AGTGTTGCATCGATGGCTTCGAGCTGGTTAGGTCGGAATCCGCGGAGGTGGAATCGATCT -TTTAACACTTTCCTAACTTCTTGAGACCATGGATGGTTTGACCAGAAGGCGCTATGACCG -GGCGACTTCTTCGGGGCGGGAGCCTTGAGGATGTTACCAGAGGTCTCTGCGAAAACTTTG -CGCGTTTGACTGGCTTGCTCGAGAATAGGCATCGAATGAGCACCATCGAAGCCCCCTGCC -GCCTCCAATATCTCATCGTCGCTGACATTCCAGTCGAATTCATCAAGATCCATCGAACCC -AACGGCGGGGAGCCCATATTCCGAGAAACACAATTTTCATAAGTATCAAGCCGATGTGTC -TCTTTGATCTTTCTAGGATTTGTCGATGATAATGGTTGTTGGCCTGTGTTCAAGTCGGCC -AGCGAAGAAGAGAAAGGAGGACCCTCCCGGGTGACTGCGTGAGCTTCGAAGGTAGGCTGA -GTATCTTTTTGGGAAATAGGGTCGGAGGGACAATCATGTGCCAACTCGAGGATGTCGGCT -TGGGTTAGAAGCTGGCAGATTTCTCTTTCGGTTTGCTCCAACTCTGCCTCGACTGCTCGA -CTCTGCGCAAGCTCTTCTGGCATAGTTGTCGGGTCAAGACCTTGTGAAATCGCACGTATC -AGATTCTGCTTCAAGTCTTGCTTACGAGAAACACTGCCTTTGTGCGAGTTTTGATGCTTC -TGCAGTAATTCGATTACCTCGATCTGAGCGACAAGAGCCTTGTTTTCAGCAATCAATTCT -GGGACCGGTTGACCCTCCATGGCCCGCTGATATACTATCTCAGAATTCTTCTGCAGAGTA -GACCTCAACTTGGAGATTGCATACGCGAAAGCATTGTTCCCAAGGACCAAAAATTTCACA -AGATCCGGATCTCTTTCCGGTGAGTTGGACGGCTTCGACCAAGGTGTTGACGGTATGGGG -TCTTTAGCAGCTGTCCTTTTCTTCGGCTGAGTCGACGGTTGCGCAGTTGGAAACGCTGCT -TTGGGTGTGGATTCCAATTGCGAATGCTTGATCTCGATCTTTTTCTCGTCCGCCGCTGGA -CTCATCGCGGGGAGTATTGGGTACAGGCTCTCCTCGGCATCCAGAATCATCTTATCGGCC -GTATCTTCATTATCGACCCAGCCGTCAAAGAGATCCTCATCATCATCTGAATCCGCAATC -ACTCGTTTCCGATGGCTCTGTGGGGGAAAAGGTGAATTATTTTCAGGTCGTTTTGTCACG -CTTTGGGCTTTTGTTTTAGTTGCTTGAGTTGGTTGACGCGCGATGGTGGGCTTTTCCGAT -ACAATGTCTCTAACGGCGAGAGGTCGAGCCGTTTTCGAAGGAGTTCGCATTTTCGTCGCA -TGTTTTGATGGGGAAAGAAGATCGGATGTGTACTCGTCGCTTTTGCGCTTCTTGCCGCGT -TTCTCGCGAGTTCCGGATGATGTGCTCTCTTCTGGCCATGATACGCGGAGCTCACCGACC -GGGATAGTATCCGAGGATGATAATATGGATCGATCAAGCTCTCCTGTCAGATCGATCGAT -TCAATATCATCACGTCCAGGTGCGTGGAAACTATCTGGTCGCAATGGAGTTGACTTTTGC -CGGGCAGGTATGTATGATGATAATGGTGGGTCTTGGAAAGACTCTGATAATTGTACAAGT -AGCGTCAGTCTAATGATTTCCACAAGATTCGTTCTACTTGGGCATACCTCGCACTATGCT -TGATTGTTTTGGCGAAGCCCGTTTCTTTGTAGTGCACGGAGTGCTGGTGGGGACCGCGTC -GTGTCGGCTCAACATTCGAGGCTTGCTCGCCGATTGGGGTAGCAATCTAGCCATTGATAC -ATCCGTCTCATCCTCCACGTCTTCGAGGATATCATCGAGGGTCGCGAATTCGTCGGCGGG -GGGAATTGCTTGCTGCGATACGTGATTGTTATGTTCAGGCAGGCTGTTTTGAGCCAAAGG -ATGGCAGAGAGGTTTCCCTTGGGTTATCAACCATTTGAGATGAACCGCTAAGTTATTCTT -CGTCATGGGGGCCTATTGCTGTCATGGCCGTCGATCTTCATGTTTCGGAGCAATGGGTAC -AATCGCGAAACTGATATGGACGCGTCGGCACGTGAGCGGTCAAACAAATCGATCTATGGC -AGCAAGTACGTTCAACCGAGAACTACATGGTCAAATTGGACTAACAAAACACACAAACTA -TACACGGTCAATCATTATTATCCGAGATTCTCATATCTGCAAAAGTTATACGTAAACAAT -ACTTATTGGTGCAAGGCAACATACCTACAGACGTAGATGTGCTATAGAGGGGGGGGCAAT -TCGGTGTGGGGAAAGGCATAATTTTTGCCCCACGAAAAGCAGATCGCGAAAATTCACCAG -TGTCCAACATTGAGCCAAAATGGGTAAAAAGAATAAAAAGTCCGCCGAGCACAAGGATCG -TGTAGCAGCAAAGCAGACAAAGAAGACCGCGCAGAAAGAAAAGAGATCCAAAGCCAAGGG -CAAAGATGCAGACAGCGATGCTGAAGACGCAGACTTGGATGCCATTCTGGCCCAGTACGC -CGAAGAACAGGCCAAATTCTTGAAGGTCACTGAAGTCCCTTCCGAGCCGCCTGTACCACG -TTCGTCTGCAACGTTGCTAGCATCTCCATCAAACCGAAATGAGCTGTTCCTTTACGGTGG -AGAGTACTTTGACGGAACGCATGCCACTTTCTTCAATAATCTTTTCGTCTATTTGATTGA -TCGCGGCGAATGGAGAGAAGTGACTAGTCCCAATAGTCCTCTGCCTCGAAGTGGCCATGC -CTGGTGTCGGGGAGGAAACTCCGGAGGCATCTATATGTTTGGAGGTAACATTGTCTTGCT -TGGTATGTTGAAATGCTTCGTGCGGAAACTAACCATAAGTAGGAGAATTCTCGTCGCCGA -AGCAAGGCACATTCTATCACTACAATGATTTCTGGCATCTAGACACAGCCACAAGAGAAT -GGACCCGTCTCGAGTCAAAAGGCAAAAGTCCTCCCGCTAGAAGTGGTCACCGCATGACTT -ATTTCAAGGTATATTATTCGATTTCGGCATCAGTCTTTCATATCAGAGCTAATACCAAGA -TAGAACTACATTATTCTCTTTGGTGGATTCCAAGATACCTCCCAGCAGACTAAGTACTTG -CAGGACCTGTGGATCTATGATTGCAATCAGTATACCTGGTCCAACCCCACGTTGACTTTG -GCTTCCCAAAAGCCCGATCCTCGGTCATCATTCTCCCTGCTACCTCACGAGACTGGAGCT -GTTATCTACGGAGGTTACTCCCGAGTGAAGGCAGCCGCGGGCGGTGGTAAGCAAGGAAAG -GGTGGTGGCCCCCAAAAGATGGCTCTCCGGCCCATGGTTCACCAGGATACTTGGTTCCTG -AGAATCACGCCCCCTCTAGCTGAGGCCCCATCGTCGACAGCACCTACAGTTCGATGGGAG -CGACGAAAGAAGCCCGCTAACACACCTAATCCTCCCCGTGTTGGCACGACCATGGCATAC -CACAAAGGTCGTGGAATTATGTTTGGCGGTGTCCACGACGTGGAACTAACTGAGGAGGGC -ATTGACAGCGAGTTCTTCAATACCCTCTTTGCTTGGACCACAGACAGAAATCGTTTCTTC -CCGTTGACTCTGCGACGTCCCCGCGCGCCTGGCAAAAAGCAGCTGGCAAACCAAAATGCA -AAGTCTAAAAACAGGGGGAAGGCGGACGAGGAGGAACTTTTGGCAAACCTTAAAGCCCTT -GAAGCTAAGGTCGGAATTCGCGATGAAGATGACGACGATTTGGAAACAAGCACGCCCCAA -CCAGAGGAGCCCGCAGAGCCCGCAAAGCCCTCAGTTGTTCGCTTCGAAATGCCCCACCAG -CGATTCAATGCCCAGCTAGCAGTGCAAGATGATACTCTTTTCATCTTTGGTGGTACTTTT -GAAAAGGGTGACCGAGAATTCACTTTCGACGACATGTATTCCATTGACCTAGTCAAACTG -GACGGCGTCAAAGAAATATTCTACAGAGAGCCTGAGAATTGGAACCTCTTGAacgaagct -gaagatagtgacgaagacatggatgatgaagatgacgatgatgaaatggacgaagaggat -gaggccgacgccgaagcgaTGTCTATTGATGCCGCTTCACCTGCCCCGACAGATGTGACG -GTGCCATCTGTTACCAAGGACATGGAACAACTTGAGGTTGAGGAAGAGCAAATAGTCAAT -GACAGTCGACCTTTGCCTCGGCCGTTCGAGAGCTTGCGAGAGTTCTTCAGCCGCACCTCT -GAAGAATGGCAGAATATCGTGATGGAAATTGTCAAGGAGAAAGAGCAGTCTGCTGAGAAG -ACAATCAAGGAACTGCGCAAGGAAGCTTTCTCCCTGTCCGAAGAAAAGTGGTGGGATAGC -CGAGAAGAGATCATGGCCCTGGAAGATGAGCAGGAAGCCGCTGGTATTGGAGAGGTTGTT -AGCATGTCTGACCGAACTGAAAACATGGGAGGAGCTGGTCGTCGCCGCTGATACCCTCTA -TAAATATGTTAAAAGAATTTATGCATTGATGTCCGTATTGTTGAGATTCTAAATCTGCTT -CTCTTGTTTCCATTTATTAATGCTTCCCTAGTTACGGGCAACTTTTGCTAACACGTCTGT -CACCAGCGCAAAGTCTCGCTCAACATCAACTCGCCAATTCGAGATTGCAATGCGACAAGC -AGGGGACCCATCCCAGCTAGTACCCGAGACGAACATTTGCGAACTTTCGTTGATCTTGCT -TCCTAGCTTGCGGTTCAGGCTCTCATCATTCGCGCGCAACAGAACAGTCATAAATGTCTG -ATCAAGCAATGCAGATTTGGACGACGCTTGCGGTAGAGCTGTGTACGCAGAATGCTCGAA -GACCCATCCGTAGATTTTCCGGGCAAGTCTGATCTGACGCTCTAGCATTTCCTGATACCC -AAATTTGCCATACGATAACAGCGATGCATAGACCGGCAGCGCTCTGAATCTTCGAGAGTT -TTCGATTCCAATGTTCAAAGGCGAGGGGATGGAAGGGCCATCGGAGCTTCCAGCAGTCAA -GTAGGCTGCATTTGCATTTTGGAATACTTGCGATGCCACATTGGGGTGTCGAGTAAAGAA -AAAGCCGCAGTCATAGGGAACGTTGAGAAGCTTATGGCCGTCACCGGTGATCGAGTCGAC -CAGGTCTATTCCCTCACTACCCTTCTTGATTGTTGCAAACTCCGGGCTTTCGCCAAGAAC -ACGACCAAATAGTCCAAATGCTGCATCGGCATGTATCCAGGCTCCGTACTTGTCGCACAA -CTGGCGAATTTCTCTCAAATCCGCACCGCTTGTTGCAAAGCGACCAGTATTGACCTCCCC -GCACGATATCGCGACGATGCTGACCTTGTCTGTTCGCGCAAGTTCTTTCTGAAGCTTATC -AATGTCAAAGCGGAGCGGGTCGTCGGCTCGGCAGATGTTGTGTACGTTGGCACGACCGAT -GCCCAATACACCAGCAGCTTTTACGAGGGACGAGTGTGGCAGTGTTGATAACACTTGCAC -TCCAGAAAGACCCAGTGCATTCAATACCTGGAATAGACCCTCTTCGCCAACACTCTGGAG -TTGAGAGCCTTGTCTCTCCGCCGCCTTTTGCAGAACAAACTCACGCCCACATGCTAGGCC -GAGAATGTTGCTAGCTGTAGCGCCAGTGGTGAACGTCCCATTGTGCCAGATAGAACGCTC -TAGTCGAAAGAGATCCAAGAGGAGACCAATTGCGTTGTACTCCACATCGGTGACAATTGA -GTGATTTGGGAGATGCACCTGGACGTTCTGGTCGTAAACAGATACAACGTTGTCCGCGAA -GAGTGCTGCCGGGGTAGTTCCGCCAGTGATGAAACCATAGTAGTTGGCACTTAAGCTTCC -TCCATTGAATGCCGGGACAATATCATTGATGATGTGTTGCTGGACAGATTCGAGACTGGA -TCCTGTATCTGGAAGTGTCACGGGCAGAGAAGCTCTAGCCCGCGCTAGGCTCTCGGTACT -AGGTAGAACGCCTGACTCCCAGGGTCCCTGAGCAGCCTTCCATAGAGCTTGTTGAAATTC -TGTTTGCATGATTGAGATGTGTGTCTGAGATAAATCAATCAGCCTCGCCAATTTATACCA -ATATACCGGGATCTCGGGATTTGAAGATTGAGGCTCCCTATAAGGTTCGGCTGTACCTCC -CAAAAACAGTGACAGATCTTACAATGTACAAAAGCACCTCCAAGCAAACAGGGAGCGGCT -ACCGACATGCTCTTTATATTGCGACACACTCCCCGCTAAACTACTTCGGGGTTGTCTCCC -GCTGGCCATATTCCAATAAACCGTGTAAATTATTACAGGACTCACGATACTACTATAACA -CTCTTTTTTAATGATATTAATCCTAAAATAGAAATAGATGACGCTATCTATCTCCCACTA -CGCAAAGAGTGTTATGGTAAAATCGTGGGTCATGTGATAAGAGTTTCAATTACTATAAGG -GGACGGAGTACTCCGTGCTGAAATAATTTACTTGGTTTATTCGACTATGACCAGCGGGAG -ACAAACCCGAAATAGTTTAGCGTGGAGTGAGTAGTCGTTCTCTGCATCTTTGGAAGACAC -GTCTATAAGGAACGTTGGAAAAATGGGGCAATGTGAAGTCCGGAGTCCGACGCCCGAAGT -TCTTGTTGTAAGTTTCTGGAGAAGATGGCTAATAGCCGGAGATGGAGGCCTATTATAGGC -CTAGGCTTCGTAGAGATACTCATAGCGAAGGGACTTGCCTCAGGTGCCGACCGCATTTGT -CTAGGACCTCACTTAGTGTATGTACGGATTACTGTGCAGGTAATCCTTAAGATTCTCTCC -AGTCCCTCATCCTCATCAGATCCTCCTTCCACATTACCCCTGACACCTCATTGGATGCTA -ACTACCTGTCAAGTCCGGTAACAAGGTACTGAGCAGATTATTCATCCTGACTTCGTCTAT -GAGTTGCTAAGTCTCTACAGGTTTTCGTTACTCTCCGAATTTATGGCAGTGCACAAGTCC -TTACTTGACGACATTGAGCAACCTTATCTTCAGGTACGCCTATTCCAGGGGTACATAGTC -AGCCCTGAGCAAGGAGAAGCTGTCTCGGATAACGAAAATAAAACCAGTGGAACCAATGCC -TATGATTAAGGTCATCGCCTCTTCTAGGCCATGTCGCAGTGCAAAAGAACTAGGAAGGGC -TCCTAAAAGCTGGTCGAAATTCCAAATCCAAGTCACGCATGCCCTAGAATAGCAAATCCA -TGCCAAGCCATATCCAATCCAGTGTATTTAGGTTCAATGATCGGCGCAGATCACTCTTCT -TCTGCAGGTAAATATTCCACATTCTATCATCCCGCGTCTCCTGCGTTCCACATAAAGCCC -CTGGGGTGAAGATAACGCGGGGCTTGGTCCTACATCGACCGTGGTCACGTTCAAAACTTT -CGCTCATGCAACAATGGCTACCACTCCAGTCTTCACCTACTGTTGAAGCCACCATCGATC -GTTGAAAGGCCGACAATCATGCCTAGTCATATCAAGTTTGCAGAGCCAACTTCGATTCAA -GCTAACAAAGACCGCGCTAGGGGGATCAACCAAGAAAAGGGGCCGGCACAGAGAGCACCT -TCTCCAGGAAGTGAAATCGTTTACCCGACAGGATGGAGACTGGTGCTTACCACCACAGGG -TATGTGCTCAGCCAAGTGCCATATGGCTTTGTATCACGAAGACTAATGATATGCAGACTC -TTGATTGGTTTTTTTCTCTCGAACCTGGATGTCACAATTGTAAGCTCGGCTCTCACGAGT -ATCACAGATGATCTAGAAGGCTTCGAGAAGAGAAGCTGGATTATTACTGGATATTTGGCC -ACCTACACAGGTATGATCCGAAGCGCAGTCCTACAATTATTATTTTTTTTTTTTTAATCC -AAACATATGTATCCGGAAACGCCTTTCTACAGTCTCTAGCGCTCTCATTGCTGATGCCAA -ATTACTCCCAGGGTCTATGGCAATATGGACAAAAATCAGCGATATAGTTGGCAGGAAACA -AACTACAATCGCTTCTCTTGTTATTCTGCTCGCGTTTTCCATTGGGTGTGGCTGTGCTCA -TACAGTCAACCAACTGTAGGTCTAGCGTAGTTCTGCACTTGCTTTCCGTCTAAAGGCTAA -CTACTGAGGCAGCATAATATTCCGAGCACTCCAAGGAATTGGAGGCGCCGGTGCATACGC -ATTGACAATACTATGCGTCTACGAGATAGCTCCTAAAACAAAGCTTCCGACCTACAGCAG -TCTCATGTCATTTTGCCTTGTCTTTGCATCTTTGATAGGCCCAATAATTGGGGGTGCTCT -TGCACAAGACTCGGCCTGGCGGTGGACTTTCCTCGTGAAGTATGTAAATGCTCGACATCT -GAGCTAACAGAATGCTAATTCAATCACCAGTGCGCCGTTGTGCGCTATTGCGATCGTTGC -TATCATCGTTGCGATGCCCAAAAACTTTGGTCTTGATCAGCACACTCCTTCATTTCGAAC -ACGAGCCTCATATCGCTCGTTTGCTAATTTGGATATCATGGGATCGGCTTTGATGATGGC -TGGCTCATTCTTAATAGTCGCTGTCTTGAACGAAACGAACCTTGCATTCTCATGGTCATC -GAGAGACGCAATTGCGCTTCTTGTCCTAGCTGGACTCTCCTGGGTTGCTTTTTTTGCCTG -GGAATGGTACATCTCGGGCATACCAGGCAAAGACCCCATATTTCCCAAGCGTTGGCTTTT -TGACCGACCATGGATAGGAATTCTTGTGTGGGTTAATCGAAGGCTCACCAATGTGGTCCA -GTTTCTTCTGACACAATTTTAGTTCTTCGTTCATCATCGGCGCACCATACAACGTCGTCC -TGGTATATGTTCCACAGCAAGCACAGCTTCTGTTAGACAAGTCGCCTTTGGATGCCGGTA -TATATTTGATTGGTTATTCTGCCGTTGCGGCGATTGCAGCGGCGATTATTAATATCGCCA -GCTCTAAGGGACGCATCCCGTTTATTTATACTCTGCTCGTGGGATCTGCAGTCCACACTG -TCGGTATAGGGCTTCTCTCGACCATTTCCACCTCCAGGGGCTTTCATGCGACAGACATCG -GTTATCTAGTCATTGCTGGGACAGGAATGGGTCTGACAATGGGCATCTTGGTACTGTCGA -CACCATACATAGTCGAGGACAGAGACCTGGGTATGTGTTCTCCCGAGAAATTCCCTAATC -TTAAGGTGACCGATACATTGACTAACTGGACCTTCACTCCTTGTTGCAGCGATTGCCACT -GGCACAGTGGTTCAGCTCCGTTTCCTTGGCGGTGCTATTGGCCTTGCCATTGCATCTAAT -ATATTAAATGGCCGTTTAGCGCAGCGTTTGCAGGGTGTTATGACATCGCACGAACTACAC -CTCTTCCTTGAGAATGTGAAAACAATCAAAACTCTACCTACCCATCTACAGGCGGACGTG -AAAAACGTTTTCGCTAGTAGTTTCAATACACAGCTAGTTGTCATGATCGGATTTGCAGCT -GCTCAGCTACCAGTGACTTTACTACTACTCAAGAGAGGCCGGCAGCTTGCGGCCAACAAA -CAATCGAGTTCGGTACCAGGATGAGGACGGTCAATTCTTGATTTTCAGTCTTGGAACTGC -TTTGGCTTCAGACAGGATGCAGACGTTATGACAAAACACTCACAAACACACCCATTAGTT -ATCAATTTGCTACAATGTATCCAACATCCTAAAAAATAGTCCAATTCAGCGCATCTCTTG -GGGTCCACTTGATGAAGATATTGAGCTTAGCGTCCAACAAAAATACATTCGACATTATGG -ACAGCTATATACCTAGTCATATATATAATAGGTTTATTCATCTCTTTGTGGATCTATGAA -ACCGAAGAAGACGAAATGAAGACATGTATTCCTTTGCTAGGCTGGGGTGATGGCAGGTGA -ACAGCAAAGTCCAGACTAAAAAAAAATGGTGCACTTCTCCGCAAAGCGGCTGCAAAAGCG -GCTTATCGCCAACGCGCCCTCATTCCAGTCCGCGTCAACATCAAAGATGGCTGCAAAACT -GATCGATCGACGGTTCCATAATGTTTCAGGTCTGTATTGCCTTATTAATTGTCACTAGTT -AGTCCACTGGTTTCAAAACTGATAAATCTGTCCCTATCCAGGCAAACTTCGGGTTTCCGA -GTTGTTCTTTGACGTCCCCGTGGACTACAGCAAGCCTACTGGGGACACGCTGAGGCTCTT -TGCACGAAGCATTACTCGATTGAACAAACCTATTGAGCCTGCTAAAGAAGAAAGCAAAGA -AGCCAAAGAAGGTAAGCTTCCTTGGCTGGTCTACCTGCAAGGTGGCCCTGGCTTTGGATG -TGGTGCCCCGCAGTCTTACCCCTGGGTTGAATTCGTGCTCAGCAAGGGTTATCAGGTGTG -TCATGCCTTTCACAAATTCTACCAATAATAGATTGATGCTCAGCTGTTCTATATAGGTGC -TCTTTCTAGACCAGCGAGGTACTGGTCTCAGTTCGACTCTCACAGCGGGAACCCTTGCCC -GTCAGGGAGATGCAATCAAACAGGCTGAATACCTCAAGAACTTCCGTGCAGACAGCATTG -TTCGTGATTGCGAAGCCATTCGTGATGTTTTGACGCAGGACTATCCTGCTGACCAACGCC -GGTGGAGTATCCTTGGGCAAAGCTTCGGTGGCTTCTGTGCCGTGACTTACCTATCAAAGT -TGTATGTTTTGAAAGTCACCGCACTCCAAGTTTGGTCCATGAATATTCCGCTGATGCTTA -TCATCTTCCAGCCCGGAGGGTCTGAGAGAAGCTTTCCTTACTGGTGGCCTTCCCCCTCTT -ATCAATGGCCCGGATGCTGTCTATGCGAAGACCTATGGTGCACTCCCTTGATTCCTTCTG -GTTTATACATATCACTGATGCAAAAATAGATAAGGTCAAGGAGAGAAACGAGGCTTACTA -CGAGAAATACCCTGAGGACGTGGACCGTGTGAAGAATATCATGCAATACTTGAACCAAAA -CAAGGTTACATTGCCATCCGGATTACTCACTCCATCTCGCTTCCAGCAACTGGGCATTTT -GCTCGGTTTCCACGGTAAGATCCCCATTCAGACAATCGATATCTTAAAGCTAACACGACC -TAGGTGGTTTCGATAGTCTGCACGGTAAGCTTTGCTTTTTCTCGTGAATGGCGTAACAAA -CAAAATACTAACTTGGGTATAGATATTATTGTCAGAGTCACCAACGATTTGGAGATGTTC -GGATTCCTTACTCTGCCGACTCTTTCTGTGGTTGACAGCAATGGCGGTATGGATAAAAAC -ATCATTTATGCCATTCTGCATGAATCAATCTATTGCCAAGGGTAATGATAAGCTCCTCAT -GCTGTCCATTTTCTGTGACCCATCCATGCCTAACCATGAATATAGAAAGCCATCGCTCTG -GGCTGCAGACAGGTTGCGCGCTAGCAACCCGCAATTCCAGATCAATGACTCCCTGCCAGA -GATCTACTTCACCGGCGAGATGGTAAATAGCCTGACAAATTCCATAAACATTATTGTTTC -AACAAAAAGATCTGACACCATCTTTCACCTTCTAGGTCTTTAAGGATATGTTTGAGTCCT -ACTCGGAACTCTCCGAGCTCAAAGAAGCAGCTGAAATCCTGGCGACAACAGACGAATGGC -CGGCCTTGTACGATGAGGCTCAACTGGCCAAGAATAATGTGCCGGTCTATGCTGCAACCT -ACATTGATGATATGTACGTTCACTTTGATCTGGCCACAGCCACTGCTGCGAAGATCAAGA -ACTCTAAGCAGTTTATCACCAACACTATGTATCACGATGCGCTGCGCAGCAAATCGGGTG -AGGTCATGCGTCAGCTTTTCAACCTGAGAGAGGACTCGATCGATTAGTCCACATATGCAG -TCTTTCATTGCGTACATATCATGGATTTCCAGCAACACGATGAACTCAGAATATCATTTA -GTAAATTTCACGAGATGATTCGAGATGTAGAGGTAGACCAAAATACGTTACAGCGGATGA -AATATATATATTGAGTTGGCACGGACGCAGTACTGGGTATGATAAATTCTGAGATAATAT -ACATATACCGAGAACTTTCTTCCATTGTTTGAATGTGAAAGTATCAGAATATATCAAAAA -ACCTGTAAAATGTGGTCAGTGTCGTCACCCTTGCTGTAATAATCATGGAGGGAGAGATTA -AGTACGTACCGCACCACCGATGATGAATAACCTCTCTTCCTCACGCTGTACCCACTGAGC -AATCCTACTAGCGCTTTGGGCAAGTGCATTTCGATTAGTGTTCGGTTCAGCGATACAATA -TAATTCGAATACAGGGGTTACCCATCCAAACGAAGTGGCTGACCGCGAAACGCAATGATG -AACTTTGACGTGGGTATTCTTGGCGTGGATACTTGCGTGCAAATTGTTGTACATCGACAT -CAATCTACAGACACAGTCAAAATAATTCAGTATTACAAAGCAGTCATGGACAGTGGAGGG -AACAGTACTCACCTTCGTCGTCGTGTGACAGAAGAAAATTCGGGGGTATATGATGACATC -GTGAATTGTACGTTGGCACGCGATTTGTAGAGAAAATGATGGAGCACAGTTCCAGGAACT -ATATCAGTAGTTGCGGGTCGGCCATTGTCAATTGCCGTTTTGATCTCCTTCAGAGTGCCA -TTTCTCTCCATTTGCTATTGTATCATCAGTATCACATGTCATACTCGATTGTGTAGAATT -GTTCAATCTCACCTCCACAAGAGCATCGCGCATTGCTTGCATCTCAAAGAAAGCTTCTTT -GTCGGGACTGATAAGGACAATGGCAACAGACTCGTCTTTGGTGATCTCTGCACTGTTTTC -GATATCTTCTCGAATGTCAAGAAAGCTCACATACATGTAGAGATAACCGCTGTTGTTGAA -GCCAGGCAAACAGACGGGAATCCAACTTTCTCCACCACCAGCTTTAATCCCGTCGGCTTC -AAATACCATGTTGAATAAGAGCTGTAGATCGCCTGGATGTAACGAGTGCTTCTTCGGTCG -GACAACGCTGACGAGTCGCCCGCCGGCCACAAGCAAGCCGTATAGAAGACTATTAGCTTT -AGTTTTCAGAAGTGCATTGTTGATAGCTTGTCGGTGTTGTTTGCGGATTTTCAGACACTC -CAATGCCGATAGCAACGTCGATGGAGATCCTCGCGTGAAACTGTCTGCCAGAGATGAAAG -AAGGGACTCCGAACCCTGTAAGGGCCGCGATAGATCGGTAGAAGGCCGCATCGAGAAAAG -ATGGGTCAAAGAAGGCAGCGTTAATGTAGACAGGATCTGCATATATAATGCTTCCAGCTG -AAGCCGGAGCTGGTTGTCGCTCTCCATCATACGGCTGATTGCTACAAGGTGCAATGATCC -TCGTGTCAATACAACAAATCTAGTGTCACCAGCGGAGAAGCCTCGTAGATGATCACTTGC -TTCTTCATAAAATGAAATTATGGTCTGGATGACCCCGACATAGCCAGAAATTAGGCCGCT -GTCGCCATGCCTTGTCCAAATCGGCTTACCAGCGGCGGAGAGGATCACATAGTGCTTGCG -CTTTGCTTTCCACCGTTGGAGCAGGAGATCTGATTCGGGTTAGTGTTCTGAGACAAAAAA -TTAATTGACCATTGACCCAGACCTTCATTATCGCCTTGCTCATTAAGCTCGCCAACTGCT -TCAAATTCGCTCATAATTCCATCGTCATCTATGTCGTCGGCTGCAAATTCAGGAAAGTCA -AGTAGACCTGTAGCTCCTCGACTTTGGCTACCGGGCGCTGAAGCAGCAAAGTCCAAAAAA -AGCGATTCAACATCACCGGAATCGCCGTTTGGGATTGAGCTTCTGATACTAACAGAGTCC -CCAGCTTCGCTAGCCCGGGAGCTCGCCAACTGGCCGAGGCTCGCCCTGGCCTTTGGACCT -TGACCCAGACCGCGAACAGCCAGTATCGAGTACGCACTATCAGCAGCTTCGGGTAATTGT -GTTTCAATATCGGTCCGTGAGATAGCTGTCGTTGCCTCTGCCTGTAGAGCTGCTCGGGAA -CCCGTTTCATCGTTCAGTAGACTCAACGTATTTGGTCGCGGGGGTAAAGGTGGTCGCTCT -CCCTCTGAGCTATCGCTGTGCCCATCGTGATCTGGGATTGATGGGCTGTAAAGATAGGAT -TGCTTTGATGTCTCGACAGGCTGCTGGCCGTCGGTCTTTGGGTCTTGCTGTGTATTCATC -CCCGCTGCGGACATGCGCTTTTTAACGCGGTCGCTCGTCGCGGGTCTTCAGGCCATTAAA -TACTCATTGTTTCTTTCAAATATCCGGAGTCTTTGATCAGTTGTGTTTTGCGCTTTGATG -GTTGCTGGGCGTTGACGTCGGCGACTCGGAGCTACCCCAAGGCGGAACAAAACAATTGAA -CGTGGCCGCCTGCGTGTCTTCTAGTCAACATTCAACCGCCAACTCCCTTTCCCACAGGGT -CATCATCGATGTCCTAAATATGACGGAGGGTAGTTCAAAGCTTTTACAACAATTTTACGT -CCGTTATTGGCACCATGAGCCTTGATCCCAGTGTCTTTCCGAGGTCGGACTCCCCCGCAA -GCTCCGAGAGCTCCTTGACACGTTCCCGTTTACAAGGCAAGGAAGGTATGTGCCCACCTC -AAAGAATCGTACGAGAGTACTGTGTCTGATTATACCTGCTGTCTGTAGGCTCCCTCAAAA -AAGACAAGCACTATCGCCGATATGCATCTAGCGTTGAAAGAGCGCTTTCGCTCTTCGACA -ACGCTCTCCAAGAATGGGCCGACTATATCTCGTTCCTCGGTCGCCTTTTAAAGGTGAGCT -CGCTAGGCTTGAAAGTGATGTCCTGAACCATCGCTAAGCATGAATCATTAGGCACTCCAA -ACCCATCCACCCGATCAGCCCGTCGTCCCTCATAAAGTGTTAGTATCGAAGCGACTCGCT -CAGTGCTTGAACCCGTCCTTGCCATCAGGCGTGCACCAAAAGACACTTGAAGTCTACACG -TACATATTTGGCTTGATCAAGGTATGGACGGCCTCCAGAACGGCTGAATAGTGTGCTTAC -TGCTATATGTTTACAACCAGCTTGAGGGATTGTCACACGACCTGCCTTTGTATCTCCCTG -GCCTTGCGCCAACATTGACCTTCGCCTCTCTTACTGTCCGTCCACTGTTTCTGTCTTTGG -TAGAGGGATATATCGTTGATCTGGAACCATGGGCTATTCGGCCAGCCTTGAAAGCAATCA -TTCTAGCCCTACTTCCAGGCCTTGAAGAGGAAACTAGTGACGATTTTGAGCCAACACTGC -GCACAATTAATAAACTACGAGACGCTGCTGGACAACTTGAAACACAAAGAACGTCTGAGG -CCGGGGCATCTGGGCAGTATTTCTGGCAATGCCTCTTTCTCGCATCTATCACGAACCCTA -GTCGACGGCTTGGCGTCCTTGCATATTTAAACCGCTACCTCCCGAAGTTAGGTATCGCAG -ACCGTCGATCAAGCACGGCTGGTGGGAGCGACCCACAGAACATCCCACCGGAGATATTGG -CTGCAGCAGACTCTGTCATTCTGCCCGAGCCTGGCCTCTTGATTCGATGTGTTGCATCGG -GCTTATCTGATGACCAGCTCCTTGTCCAACGTAACTTCCTCGATATTCTTGTCACCCACC -TTCCACTCAGCTCCCCAATCCTGCAAAGCAAAATTGCGGCTAGCGATCTTCGAACTTTAG -TTATTGCAGCAGTTGGGGTGGTCACTCGGCGAGATATGAGCTTGAACCGCCGACTCTGGG -CTTGGTTCTTAGGACCGGATTCCCCAAACGATAGCTCTTCGATGGATGATCGAAAATTCT -CAACGGATACCGCCGGCTCTGCCTCGGTTGATGGAAACGAACTCACACAATCACAATACT -TTAGCCGGGTCGGCTTACAGCCATTAGTCACTGGTCTACTGGATATGATCAAACAAGCAC -CGAGTACTCCGTCCGGTAGAACCAAACCCTTCCGCATAGCTCTGTCGCTAATGGATCGCT -GGGAAATCGGTGGATATATTGTCCCTGCAGTTTTCCTGCCCACTGTACGCAGTGTTCAGG -CGTTTGAATCTACAGCCCCCAAAAACCACTATGAAGAGGTTTTCCGCAGTGCAAGCGCCT -TTTTTGATGGCGTTGAAAGTGGTGTGATATTCTCTGAGCTTCTGGGGTTAGTTGATTACC -GGTCTTCTGATATAGACATTGATAGCGAGCAAGTGTTGCGTGACCTCGATCTTGCACAGT -TTATCATTGAAAATTTCAATGTGCGCGAGGAGGACATGGTTCAGATCCACGTGCCTCTGC -TTACATTGTCAGTGCTTGTGAAGTTGAAAGAGATTTCATCTAAGGAGAGCACTGCCAACA -AACAGGCCGTGACGACTGCATTGAACAATGTTCTCAAATCGTTGACCGGCCTACTCACAG -AGCGGGCGTTCTCGAGAAAGGTGGGATCTGAAAAGACTGCTGGGAACGATACAAAGGGCC -GCGGTACTGATATCTTGCGAACTGTCCATAATTTCTATGAGCAAAGCAAGAATAGCTTGG -AGCTTCCACCTCTTCCCTATGCTCCTAAGAATCTCGGCGAAATGATCATTCGAGAGGCTC -ATGAACTAGCTATCGCGGCGCTGGCAAGTCGGGATGGCAATATATCACTCCATGAACCAC -TCGATGTTCTTGTGACTTTGCTCAAGAAGCTCCCAAAGTCCCGCGTCTTACGAGACCGAA -AACTGTATGAAGCGCTCTCTCGGCGACTAGGTTCAGGAAAAGACAAGCCAACAACCGCCT -CCTTCTCAGTCATATCGACAATTGCTTCCACAGTGACAAGTTTGTTTTTCATTCAGACGC -CCGGCTTCTACGTCAGCTATGAGGATGCTTGCGATCTGATAGCCCCTCTTGTTAACCAGC -TGTGGTGGTACCTATCACCCCTTAGTCCAAAATTCCACGTGGAAGCTGTGCGCTGCTTAT -GGCTCCTGCATTCAATTTCTTGGGTTGACCATCTCGTCGAGGCGTCCCTGACAGCCTTAA -TGGTCAATGTGTCAACCGCAGCATCCCGCCACCTCTCGTCTGAGGAACAAGCCGAGCGAT -TCTACGTCTTGTGGAACCACAGTCATCACAATACACATGAGCAGCCACCAAAGCAAGTAA -TTGATGTTGGAGGGACCCTGTTCTCTTACCAGTGCTCTATGCTTGAGCGTCCGCTTTTCA -TTGTCCTTGATCTTCTCTCTCAAGAGTCAAGCGACAGTTCGCAGAGCGTTCAACTATGGC -TGCAAGACTTGCCATCTATCCACAAGTAAGTCGGCTCCTCGTCCCAACACGCTATTTATA -TCTAATATTTAACCAGGGTTTTCCATGTGGTCATCTCAAATCTTGATGAGCTTTCTGAGC -ACGATGAAACCAGCGGTAAAAAGACAGGCAATTACTCTATTTCTCCGGATGACTATAAGG -AGTGTGACTATCTGCTACAGACAGCCTCAAATATCATCTCTGCGCTTTCGCAAAATGGAT -GGGTGGCACTTCTTACTCATGTGTTAGGCAATGAGAAGCGTCTTGAAACGTCCAAGTCTG -AAGGTGGGTAGTCGATCTTCTCACGAGATGATTATTGCTAATATGTATTCTTGGATAGAA -AATACAGACTTCAAAAGCCTCCATTCCACCATATTTGAGGCGTGTCTACGAATTGTCAGC -ACACAACCCCCAGCTTCTATGGAAGTCAGCCTAGATGGTGAACGACTGCATAAGGACGCA -CTCCAACTCATGCGGAAACTTCTTCTTGGGCCCGGAGCAGAAGAATTAGTGGAGTCCGGT -ATCGATGAGTTCTTAGTGGAACGCCTTCTTGTATCTTCCGATGGGGGCAGTATCACGGTG -CAGGGTGCACTGATCGACGCACTCCTTGCTGCATTGAAGGTACGATTTGCGCAAGCATAT -CTACCACCACCGCCTCCTCGGCCGAAAGATTTACGAGCTGCCTCTCGAGATCGGCTGACC -AGTCCTTCGATCTTGTCTTTCACCAGTGACAAGGGGGACAGGAGACAATCGCTACCACAG -CTACCGCAGCCGCCAGACCGGCTTCTAAACTGCCTTCTGAAGGGTATTAGCTCGCCAAAA -TCCCGAGAAATTGTAGACAAGTGGATAGTGTTGCTCTGCGAGGTGCTCCCTCTATACTCA -AGTTCTATATTCCAGATCCTATTGATGCTTGTGGAATGTTTCTGCAGGGAAATCCGAGCA -TCATATGGCAGACTCCAGTTGTCCTTCCAGCAAACAGAGGATTGGCCGCAGGATCGTTCC -GAACATGTCACGATTGCTCTTCTGAGTGGTCTTGAGACATGTATCGCGAATGCCCACGAA -CGTCTCATCGTCGAAGAGTCCAACGTTCCAGCAGTCAAGAGCCCTGATCAAGCACAGGGC -TTTTTCGGAAATATGGTGTCTGGAGTATTCAACTCGGAAGGCGGACAAAATCGCCCAAAT -ACCGCCAACGACCGTCTCACCGTTCTTCTCTGTTTCCAAGACGCCGTTCGCTTATGCTTT -TCCATCTGGGCATGGGGTGCAGGAGATCACAGCGGATCTCCTCCTGATTACGAATCTCTG -GCCTCGTTTCAATACACCTCGTTGCGAATGCGAAATCGCTCACGGCGAATCTTGGAGCAC -TTTTTCACCGCAGAGGCACTCGAGTGTCTTGAAACCTTGGTGGAAATGTGGACTAAGTCT -GACACGGAGACAGCGTCACTCATCTTCAATCTCCTCCACACACTTGATGGGTCAAGCCCA -AAAATCACCATTCCTGCCATTTTCAATGCAATCTACACTCGCACTAATCCCAGTGCGTTG -GAGCCTAGCCGGAAGTCTTCGCTAACGTCCAATCTTTCTGAGTCTGAACTTGCTAGCTTT -TTGGTTACTTATGCGCGGTCGCTGGACGATGATGTCCTTGATGAGATTTGGTCAGACTGC -ACAACTTTCTTGCGCGATGTTCTTAGCAACCCATTCCCGCATCGGCAGATTCTTCCACGT -CTGATCGAATTTGCGGCTATTTTGGGGGTGAAATTGGAGAATACAACCTTTGGCGAAGAC -CGCCGGATGCGAAAGGAACTCGGAGTATGTATATTTACTGCTGGTTCTACCATCGCCACT -GACAATCATATCGCAGGATGTTCTGCTACGACTCCTTACTGCAGTCTTTACCAGCAAGCC -CCTAGGCCTCAACCAAGATACCGGGTTTATGGCAAGATCTTCGGTGGATCATGATCATAC -CTCGGTCTCTCACACTGGACCCGATGACATGTTGAGCATTCTAGCGGTCTCTATGCCGTC -GTTCATCACCACCCTGGGTGATTCAGATCGGATCAACACTGCCATTACTGGTGTCTCGAC -TAATGTAGTGGGTCCTCTCATTCGGTCACGTCTCTTCCCTAACAATCTCAACCGTAATGT -CATGGTTCTTCTACAACAAATGGCAAAAGTACCAGCTGCGGCAAAAATGTGGAAGAAGGA -CATATCTGACGCGTTCAACGATGTCCGATTCTTTGGATTGCAGTTGGATTTGGTAAAGAA -CAGCTGGATGGACCTACTGCGCCAATGGGTTCTTGCCGACAAGGAACGGCTATCCGAACT -TCTTGTCCGCCTTCCACCACCAAGCTCAGCGGGTATCATGTTCGGCGTCGGAGCCTCTGC -AGCCCGGTTGGAAGCTGATCGCAAAGCGCAGCTCAATCTCCGGCGAATCGCGCTGCTCGT -GCTTTCCGCTAGCGACGATTACTTTGTTGGTGAATTGCCGGCATTACTCCAGAAATTGGA -AGACCTTCTTGCGGCGACCAGCGCCTCGTCGCCATCTTCTGCCACTCGGGCGGAAGTGTT -CATGGTCCTTCGAGCAGTCGCGCTCAAGACCTCGGCCTCGACCATGGCACCATTTTGGCC -TTTGATCAACACGGAATTACAAGAAGCCATCTCTGCAGTACCTCAGGGCTCTCCACCAGA -GCTCTACAACCCTTACTCGCTGCTCCAAGCGTGCAAGCTTCTCGATGTACTACTTGTCCT -TGCCCCAGATGACTTCCAGCTCCTTGAATGGCTTTTCGTCACCGACACAATTGATGCGGT -CTATCCGCCAGATCGCTGGGAGCCCATTGCTCTGGCAGACGAAGTTTCTCAGACTTTCGG -CCCACGCAGTGCAGGCTCACCCACAGTCCCCACGGAGCCCAACGAGCCCCAGGCTCACAG -TGATCTTAAACGGCCATGGCTCATCTCAGACTGGATCCGAGAAACAGCTAAGGACGAGAT -TGTTGACCGGGTCCTACGCCCGTTCTTCGCTCGATTGAGTATCTATGTGTTTGAGAGTAC -ATACGGCATGGGCAGCGTAGATCTGACAGTGTGCCGGGATGATTTGTTGGCAGATCTCTT -CAACGAGAGCACAATGGCCAATTAAGCGAAAGCAAAAACTCATGCATTTTAGCTTTGTTA -CTAGATACCGCAGAGCGACTCCAAATATAACCTTTTTATGTTATACAATTGCACCTTTGG -CTACCTTCTTTCATTTGTCGCAATCAGGGACGTGAAGCTCTTTTCCTGTGGAAACCATGG -TGGTGCTGGTCACGTACAAGGCCCTTAAGTTCTAGTGTCAAATTGGGATTGAAAGCCCGG -TCATAAGACGGCTCAATACAAAAAGACTGGGATGGGCCAATATCATCATCAAGGGGCCTG -TTTGACCTGCTCATATCTGTCCGAAGGTACAGTCCTCCATATACGGGCAATTTAATGGTT -CCCAACTGAGCCTCAAGCTTGGTCAGGAAAGCAGGTCAGCATTCTATATTGCAGTGTCAT -TATCAACCGTTTTGGGCTGACTCATTTCGGAATCTGCCAATCGTCCAGAAGTGGTATCCC -GCGCAATTTTCCAGAATGAAGCCAATATGACTGAGACGTGAAACAGTGATAGGTCTCTCG -TTTGGCTGGGGAAGGTCTACTAGGGGCAATCTCGAAGCCAAGAAGAGATAAGCTTTGGGC -CAAATCACACGTCCCTCCGGTTCTCACAGGCAGGAACGGCCCGCTTGAATCAGAATCGTA -ACTTGAGACACACTTCAGTGGTTGGAGCCATGATCAAGTGAACTCATGTCTAGCGTTAGA -GGAATTACTACTTGTGAGATCTCACGAAATCCCAGGCATCCTTCCATATCAGCTCTGTCT -TCTCATAATATGCGGGCACTGCACCAGCACAGTATTGGCGCACCATGGAGTCGAGGAACA -ACTGCCCACAATCCAATTGCAAAATGAAGTTGCGGCGCAAGGCGTCGTGAAAGCGAGGTT -CTTTGGTGGATGGGATCTATGGGTTGTCTCCACCGTCATGAGCGAAAGTCAGGATGACAG -CAGTGAAGTGGATATCTCGATGATCTATTGTGCGGATACCCTGGATGAAGAGAGGGCACG -AATGTTGTTGCATGCATTAGTAACGAAGGTGCATTGGTTGCGGCTTTTCGCCACTCTGTA -GGCGGCAGAGGGGCAATTCAATCTGGCAGGTGTGGATGGGTTGTCAATTCGAAAACATGT -ATCATCTGCATGGAAATAAATCACCATATCCGCGATATACATAATCAAGAGTCAAAGGTT -GGTGTCGAAAGGGGTGCTCATTATTATAAACCAGATAGTACTTACTTTCGCTAATATCGC -TTAATGATGGCCATACCTTCTAATCTCTTAGCCTACAATTAAGGGGCTATTCCTAACTTG -AAAGTATTCTAGTCTAGCTTAAATTTCCTTATAGAGAATAGCATCTATCTAATACTACTA -CTTATTAGGCTAAACTAGGGGTATAGGTATAGTTACTATGTAGAAGCATATTTAGAGCTG -AAAAATGAAGGGGAAAATGATTGAAAGGCTATAACGTAGGGAATAAATACACTATGGTTA -ACCTAGGGGGTTAAAGAACGACAGGGAGAGAGACAGTAATAGCAGGAGGGCACCGAGAGG -AGGGAGCAGAAGTATCTTATAGATCTACGCTAGGCTAGGCAAAGGTGGTCTACTTACTAT -AAAGGGGAGCTATTTTTGGCGTTCCTGTGGCTGTAAGTGGAAAGGAGAACTTATTAAGGT -CCAGTATAACGATCTGACTGGGGTAATGGCCAATAGGGTCTGGCGGGTATAGAATAGTGG -TGATCAAGATTGATAGGGTTGAGCTTGTTTGGCGGCGATGTCACGAGTCTTATATTCTAG -ATGGACCTCTCCCTTGTTTGTATACAAAAAGGCCTCAGGTCTCCAAGTTCCGAGCCGCCC -GGACCTCCGCAATTTATCAACGTCACTTCCAACAAATTCACCTTGCCACCATCTTCCATC -AAACAATCCATAAATTTTTTCTCTACATTTGAAATAATATTGTCATTATGCCTGTTGGCA -CCAAGATCTTTCTTCCTTTGCGCGGACGCATTGGTATTCGGTCCTTCTCATCATCCCGTG -CCCGCTGGGAGTCCGCCGTGCCGCTACCGTCAAGCAAGCCCGTAGGAGCTTTCCGAGGAG -GGTATGGAAAATTCTCCTTGACGTTCAATGTCAACGACCAAGCATTTCCAAGTGCTATTG -CTAATGCCTGACAGTGTCTTTGGGTTCCTGACAGGATCTGTCGCTGCTGGCGCGTCGGTG -TACTGGTACATCCTAGCTGAGTACCGCCTGGCCAATGAGATGCTGTCTGATGACATTGCG -GTATGTCTATAGCGTCCCAACTGCTGAGTCTCAGTGACTATCTGGATCTCTCAGAGGGTC -CTCTCAGAGGGCCTCTCTATTTCCCTTCTGATTTTGAACTTTGCATAACTGATTGTCTGC -TTTACAGGCGCTTCAAATGGCTACCATCAAGCTCCAGTCTTACATCACAGAGCTGGAGTC -TAAGGTGAATCAACTGAACAAGAAATAAGTACCAGTATGTCTGATATGTGTATCTATTAT -TGCTTGGGGTGTTAGGTGTGTTTTATTTCTGTGCATGAAGCTTGTCAACACTAGACATGC -AGTCTACTATCACTCTTTTAAGGGTCGGGGCATAGCTTTTTGATTTTTCTTTTTTTTTTG -GCATGAGATTAGTCCCAAAATTTCAAATATCAGCATTTCCTTTCCTGAAAATTGACCAAC -TCAATCTTTAGCCTGCTATATTGTACTCTCTCCAACTACCAAACTCCGCTGCAAACAAGA -CAAACAATGCACAATCGCCACTAAATCCCCGGGCCATCATTCACTTCCGCATCACCACCA -GACCATCCCAACACCGCAAAACCAAAATGTTTCGTGTCGAACTTGTATCAGACACATCAA -CGGCCACACCAGGCTGGTCCTACGCCCCAACCCGGGGCTTCGACCCCACACAAGCGATGG -CCCCAACTCTAGGTCGCAAACGCGGCATCCGCGACGCCGGCAAGGGCGGCGACATCTCCT -CCAGACAAGCGAACGCGATCGCACGTCACATCGCTGAATTAGATCGCGAGAACCAGCGCG -ACGTTTCCATCCCAGCACCGGTGAAGCAAGCCCGAGAAGCCGGCGCACGCGGCACACGAG -CAAAAACCACCTCGAACGTCCGCCGCATCCTACAATCCCAAAAGACCTTCCGGAACCACC -TAGACGACGAAGAGGCAGCAATATCCTCGGGCAGCGGTGGTGTAGCAGGCATGCAGGGTG -CGGGAGGCGCGAACATCATCGCGGCTAAAGGAGGCAAGGCTGCTGCGCGCAGCTCAACAC -CGGCTAGCACAGCAGGCGTCAAACGTTCCTCGGCGGCTATGGCGGGTGTGAGAACAGGTG -CATCGACGCCCGCGCAGGAGACTACGGATGGTGAAACTGATGCCGACGATGAGCCGTCGC -GGAAACCAAAACTTATCAAGTCTGAATATGATAATGATCCTTTGCTGCGCTCGTATGCAC -CCCCTGTGCCTTCTGACCGGCTGATGCAGCGGCTTCTGGCGGAGGCGCCGCTTTCTTATA -ATGCGTCGCGTGCGAAGCCGCAGACGTCGGGGCGGCCAGGGCGGCACTTTTGTTGTATGT -GTGGATATTGGGGGAAGATTCGGTGTAAGAATTGTCATTTGAGAACTTGTGGCTTGGATT -GCTACAAGGTTCATGAGGATTCGAGGTGTGGTGCTTTCTTCTAGAAGGTTTTTGGTGCGT -GTGTCTGTATACTATTGAGAAGGAGCTTGGCCTGGCCCTGTTCTGCTGATGACTTTATGA -ATTGATACCAAAAACTGTACAATGGACAAATACTAATATGACATCCAAAATAAGACTTGT -GTATTCATCATCTCATTATGTAGATGGGTTCTCGACGCAACAGGTAAGATCGAGTTTATA -ACATATGCCAAAAGAAAAGGGGTGGGTGAAAAGGCTCAACATTTCAGGTTGATACCCGCG -CGGAGCTCGTCTTCACGAACCTCAAACATAACCTGCACGCCGACAAAGAGTACTACGCAG -CAGGAAACCATGTACGCCAGGCCAGAGAATGAGCCCGTCAGGCCAGCAAGCAGATCCTTG -AGTCCATCCATACCCAACGCAAAGCCAACCAAGTTTGCGATCATCATCATGAGGACATTC -CCAACGGAGCCAATGCCACAAATGACACGATATGCAGTGGGATGAGAACGCCAGCGACTG -GCCGGGAAAAGCATGCCTCCCAGAACTTCTGGTAGAACGAAGAGAGTAACGAGCCAGCCC -CACATGAGCAAGCGGAGGTTGATATCGTGCCATAAGGCGACGAAGGTGAAGACCAGCAGG -AAGTTGCGGATTTGGAGGAACTTGCCCATGAACTTAGACTGCGGAGGTGAAGCTGGTTTG -GGTGCTCCGGGTGGCCGGCGTCCACCACCGCCGCCGCCGAGCGGGATGTAGACGTAGCGG -ACAATCCAGCGGTTGTAGGAGCGGTGCCAACCGCGCCAAAAGGCGAGGGCGGAGTAATTG -TTGGACATGCATCGGACCATGTTCTCCGGTGGATCAATCCCATCTATGAGGGCCCACAGA -CGGAAGAATCGCCAGGGAATGAGGAGTTTCAGCCAGATTATGTGCAGGTTGAAGAACGCG -AGCATGCTCAGTTGGCCCGGGGTGAAGACGGACCAGTCTGGGTTAGCTTTGGAGATTGCT -ACCGCATAAATATAATGGAGAATAAGCTCCATGCAGAGCAGAGTGAGGCCAAAGCGGACG -GCGTACATCATGATGCGAGCTCGAGTGAGCGAGGCCGATGTATATCGCTGTTGAGATACG -TAGTCGTTGAATGTCAAGATCGGACCAGCCACGTACAAAGGTGCGTAGAGCACATAGGCG -ATGTAGTTGCGGAATGTAAAAGCAGTTGCTTCTGCGGGGATCTTGACCCGTTCACGGTCA -GAGAGGGCCGCAGGGTCTAGTTGCTTCTTCTTGTAAAATTCGGTCAGTTTGATTATCATA -TGGGGGTTAATAGCTCATTGGCCACGGGAATACCTACTTCAATTGGACTTGCTGCGGGGT -AGTCTGCACTCCAGTGATGATCCATGTTGAAGCTAATGAGGCGCAACACCGTGACTTTGA -AGAAGATCTCCCATCGCGGCATCAGTCCCCCGAGATCATCAAGAGTCTGAGCCCATTGGA -CTAACACTGTGGCATCATCTTCTAACTTGCCAGACCCGGGAGCGAACACCTTCGCTATCC -GCTCAAAAGAGTAACCACCGCTTAGTTCGTTGGCAAACAAGACGCAGATGTTGAAGATCC -ATGTGGCGGCAGGCACGTATGAACGAGGAAGATCCTTGCCGATCCGGAAGTTGATATACA -AGATCGACAATACTTTGAGAGCGGACACCCCATGAAGCGCTGTGATGAAGATCAGTCCGA -ACCAAAAGTCGAAGCGCATTCGCTGTTCAAGTTGCGCCTCGGCCGCACCATTTACATGGC -TCAATTGTTTCGTTTCCACTTTGGGGCGGCTCGTAAACGATTGATACGCACGACGAGCTA -AAGGATGGGCGATTAGGAGTAGTAGAAGTATAGGGATATTGTCGCGGAAGCCTTCATATT -GCGAATCGGAGTTGTCCTAAGGGCGTTGGAGCAACAATGTCAGTCCTAAGTGGTCTCGTA -GACGTGGATGGTTGAACGCCAAATGTGGCTCACTCACCACCTGGCGGCCAGGGATCCAAC -CGGTTGATAAAAGGTGAGAGTATTTAGAGTGGCTTGGGTGGCTTTCTGTGACATTGATCC -TCAATCAGCATTTCGAGGCTCGGTGTTTTCTCCAGTGCCTGCACTCACCCTTCGAGACAT -CGATGACAGTCTTGAACATCAATGGGACAAGGATAATAAAGAACAGGTAGTAGACAAAGA -ATTCTGGTGTGCGCCAAAGTGAAGGTGACGCGCCCTGGGCGATTGCATTGGCGCGGGCAT -CTTTGGCGGAGGAGCCTCTTGTGGCTTGGGCAGGGAGTGCGGTGGAAGTGAAACGGGTGT -CAAGGGTGTCTAGCGAGTAGATTCGCCGAAGCCAGGCGAAAATGGAGGGCATTTTATAAC -AGTGGCATGGCCGGCGCAGGGTTTGGAGAAGTTGAAGCCGGAGATGTTTGAGATTCCAGT -GTTTACTCAGGGGCCGAGGAACCGCCACTTGGTAGGTGGCCTCAATGTACTAACTCGACA -CGGATCTCAAAGGATTTCAATGCAATTTAAATTAAGAATAAATCGATACCGGCGCTATTG -ATTTTCATTTCATATCAACATATGTAACCTAAATTGAGGGACTATGTACCCTTTAGACGT -AGATCCCATTTACCCAATCAGCGCTAGCCAGTATCATATCGAGTATTGCCGAGGCCCAAG -TATCCGATTGAGGGAGATCTACAACAAAGCTTCTGGTACGGAATAACACGTACTCCGTAC -ATCATGCACAAAAGATATAGTCGGAGGCCTTTGGGGTTAAGCAATATACCTTTGCGGGAG -CCAAGTTCCTCCAGAGCTGGGCACTATGTGTGCTATAGAAAATTGAACCCAATTTAGGTG -CGGACTTGAAAGATCAAATTGATAATATAGAAAAATAAATTATGTCCAAATTGAGGCTGC -ATCTGTTATACTACTCGGCTCTTTAAGGACATGTCAACTGTCATGCTGGGGCCACAATGC -TCTCGAGGGCTTGGCATCGACATCGTACACTTCAACTCTACCTACTCCTAGACATCATCC -AACTCTTCAAGTCTATCTTGTCTGCCAAAATGTCAAACACACGAGTATGTCTCTTGATAT -GTATCTCCCATTCCATCTTATACTATGCACATCGCGGGCCCGCAATACGAGGGCCACAAG -TTACCCCTACTGATATTCTATCTACTTCAGATCGCTTTCAAAGTCCACGGCACCGTTCAA -GGTGGGTAGACCACATCGCTTGTCCATTTTCTTCAAACATTACCATTCACTGTGTCACTG -ACTTGAACCCCTAGGTGTCGGCTTCAGGTAGGAGACATCCCTAGCTCGACCGATGACATC -ACAGTCCATCGATTAACGGATACTTGTCACGCACTACAGAGACTTCACCCAGAAATGTGC -TAAGCTCGATGAGGTCAAGGGCTGGGTTAGAAACACTACCTGTGGAAGGGTCAGTAATGT -CCTGCCGTGCCTCGATGTAGCGCTAACTGGATCTTATATCAGGTCGAAGGCGAGGCCCAA -GGCAGCGACGAAAGAGTGAAGAAATTCCTCCAGCGTATTGATAAAGGTCCATCTATGGCT -CACGTAGTAAAGCTGGAGAAGCGGGACCTGGATGTGCGTGATGACGAGGAAGGGTTTGCC -GTCATGCGGACCGCAGAGTCTATGTTCAAATCGGGGAACTAGGAGTTGCCTATTTTCAGC -ATACGGTTAATCGGAACACAAGTGGCGAACAGCCAGAGAGATTCCTCTAATTCATCATGC -TGAGCAGACCGAATGGTTGCCACCTGGCCGCGTAGAGAGAAATGGCTAACCAGACACCCT -TGTGATTAATAAGTGAACACTAGCAAGAGCGGAGAGTATGCATAGACAAAGTATGAGCAG -ACGACGGATATCAAAAAAGTAACAAATGAGAATGACAACCTTTGACATTTGACAGATTCA -CTATGTATGTATCACTATACTACTCCGTACCTAGGTATAGATCTCTCATGCCGAAAAAAT -CACTACAGGTAAAAACAATTGAGATGCATGTGAATCGGCAGGAAACGGGCAGAGATTTCC -CCCCTTTTTTTCAGGTACCAGGATCAATAGGTCAATACAGTCACCGGTGAGATGGAAGGT -ATGAATTTTTTCAATATCCCGAAGTCAAATGACACCAAAAGTCCGGGCTTTGATATCGAT -CATCTCTCGGGATTGACTTAGTGCTGCAAACGGCGTAGGGCGGTTACGGGCTGAGTCATA -TCTCTGCTCCCGACCCATTCCGATGTCACCTGCCTGCAAGGACTTGACCGTTTCGGGAAA -TACTGTATGGAATGAATGATCGTGCATATAGTGCAGGGATAATGGGAAGGCTGATCTATT -CGCTTCTTGTTCAAGGAAAGCAATGATAATATAAGATTGAGAGATATGTGATATACTGAA -AGGCCGATGGAAAGATGAAGTGCCTGTGCAATGAAGTATACGAAATGTCATCATTTCCAA -CCAGGACAACCTAGGAACCATCCAACGTGAACATTTCCGGGGTTTGTCTTTGAATTTTTC -TTTTGCAAATGTCCCTGATCATATTCGTGTCCTACCGGGGGTTTGCTTTGATGATGTATT -TAAAACTTTAAAGATCGGGCGAGCCCATCTTCATCTCTCTCCTACGACATAGGCTTCACC -TAGAACCGCTTGATatcttaatcatcttctcatcttcaccttcatTGACTTCAGTACTTT -CTTCTTCAATCGGGACAAACCCCAAACTCGATCTCTTTGGCTACATCATGAATACTCTAT -CAGCGAGGGCTCCCTTGCGGGCTGCAGCAAAGCTGCAATACCTGCATCTTGCTGTACGGA -CCTACTCAGGCGTTGCAGCTACAACCTTCAATCCCACATGCGGGGCCAGCAAGCGTACAT -CATCACTCAGCCTCACATCCAAACGCCCTATCTCGTCAACACCCCAGAACCAGATTGAGG -ACTATTTCCCAGCACCGAAGACTCCAAATGTCAAGGAGGTAAAAACAGCTTGGGTGCATC -CAGTGTATGTTTACCCACAGCCACTCTATATACACTAGACGCTGACTCTATGCCAGATAC -ACCGAGCAGCAAATGCGAGACATTCGCATTGCCCACCGACAAGCCGCAAATTGGTCCGAT -TGGATTGCTCTGGGAACTGTGCGGTTCTTCCGATGGGGTATGGACACGGTGACAGGATAC -AAACATCCCAAGCCCGGCCAGGAACTACCGGCCAGGTTCATAATGACCGAGCGTAAGTGG -CTCAATCGGTTTATCTTCTTGGAAAGTGTGGCCGGTGTCCCAGGAATGGTCGGCGGCATG -CTGCGACATCTCCGTAGTCTTCGCAAGATGAAGCGAGATAATGGATGGTACGTTCATTAT -GCACATTAGATTTTAAGATCCTATCATATGACCTCAATGCTAACCTACAATCTTCCAGGA -TTGAAACCCTGCTCGAAGAAGCATACAACGAACGCATGCACCTCCTTACCTTCATGAACA -TAGCCGAACCAGGCTGGTTCATGCGCTTGATGGTTCTCGGCGCTCAGGGTGTCTTCTTCA -ACGGCTTCTTTCTGGCCTATCTGGCCTCTCCACGCATCTGCCACAGATTTGTGGGATACC -TTGAGGAGGAGGCAGTCATTACATACACCCGCGCTATTGCGGAAGTCGAGAAGGGCAAGC -TCCCGGAGTGGAATGACCTCTATGCACCCGAAATTGCGATCAAATATTGGCGGATGCCGG -AGGGTAAGCGCAAGATGAGGGACCTGCTGTTGTATGTCCGCGCGGACGAAGCCAAGCACC -GCGAGGTTAATCACACTCTCGCCAACCTCAAGCAGAATCTTGATCCCAACCCCTACCAGA -TCGAATACATCGATCCGACCATAAAGCATCCCACGAAGAGCATTGATAACTTGAGGCCAG -AGGGCTGGGATCGCAAGGAAATCTATTCTATTGAAGCGACACAAGCCAAGTCTTGAGGCA -TTCAAAATGAGttttttctttcttttttttttttGAAGGCGTTTCAAAAATGGTGTTAGG -TTAGAAAGGATTGAATAGAATCTCGTCTCTCCAAAGATATTCCATTGTTGTCTTAAGGTA -TACCTATACCTACTCGGTACCTTTGTGCCTTCCAAGCCCGGGAGGTTCCAGCGGGCTTGA -AGCCCATCAAACCATGGAACGCGGGGCACTAAACCCAACCGGGTAAGAAACAAATGTTCA -AGGCCCTGAGAAGTACTCCGTATACTCCGTATACTCCGTACACATTCGAGGACTTCATGA -CAGTTGGGCAATCGCAAAAAAGGGGTTTTTATTTTTGGTGCAAAACACTCATCTCTTTCA -ACTCCATGTGGGCAGTTGACCCGCTTCCCCTTTTAGCCCCCGTTAGTCGAAACCCCCCTT -TTGGCCCAAAAGCAGTTTCTCGTCCGTCGATCCTGCAGGTCTCATTAAACCTCCGTCCTT -TCATTAATCCATTTATATAAAAGCCGTTGCTCGTCTGAGGATCGCCCAGGCTTAACGTCA -CACTTTCCCCAGAATATCCCCTCGTCACTGTGAACCTTCCGGTGATTGATCTTCACGCGA -CTCAAGATCCACTAAGTTTGATTGGTGACATATAGCTCGGAATAATCAAGCAAGATGGAT -GCGCCCCGATATTCCATGGACTCCAAGGCGGCGCACAATCGGGCCTCTTCGCCATCGCCA -TCTATCCCGGCAACGCCAGCTATATCCTCATATTCCTCTCCAGATCGCACATTTTCTTCC -GAGTCTTCCCGCTCTGCCTCCTCCGCCACCTCTGCAGATGCCAGATCGTCTGTATCAACA -TCCTCTCGTCGCCATGGATATACCCGTGCATTGGGTGCCGAGTTCTCCGAATCCGCCCGC -CACCGCGACAGCGTCATGAGTTTAGGCAGTATCGCTCACCTGCAATATTATTTTGCCCGG -ACGGGGTTGCTGGACGGGAAAAGCGGGAGGGGCCGCGAGTGGGAAAAGAACAAGAAGAAC -AGAGATAATGTGCCACGAGTTCTGATCACCCCCAATCAACGACACATGAATGATGACTTG -GTCGCAAGCCCGACAGATATGGCAGATCCGGCGGAGGGTGAATGGGACGATGAGGACGCC -GAGGTGATGCTGCCCCCCACAGTCAGCACATACAGCATCAAAACGCACCACATCCCCCCG -CCCCCAGATCTAATTTTCTTGCGTCGGCAGCTTACGCTCGCGCTGGATAAGGCAGAGACT -AATATTGAGGCAATTGAAAACGGCGCGGAGCCGCCTCCCCGCCCAATCCTCCGGTCGAGT -CTTTCCCCGGGTGATATCCCGGAGGATGATCAATCCGGGCAGGTGCCAACCCCAATGAAT -AAGGAGGAGAGGCAGGGTATGTGTATTTTGGACGATGTTACGAATGCAATCCGCGCAGCC -AAAATTTACTATACCACGCACGAGAAGCCAGAGCGCTTAGCGTCAATAAAGAGTGAGCGG -GAGATCCGCAAGGAACTTTTCGACGTCCTCGAAGTTTTGAAGCGGTGGGCTGCGCGACAT -TTTGCCAGCGGTCTGCGCGAGGAGGAACGTGAGCGGATTCAGGGTTGGATTGCGGCTGTG -CGTACCATGCTCGTCCGCGAAAAAGCGCTAGAAGACCTTGAAGCCCAAGAACGGGAGAAC -TGGGATTGGGCTGCTGGAGATTGGAGAGGTCGTGAGCGCTCACGCGAGGAATCCTTCCTA -CGCAGCCTATTGCCTGGCGGTGACTCCTTGCCGACCTGGACACCGGTTGAGGAGGCTCCC -AGTGACTCGCTTCCTACCGCGTTCCTTGATAGGTTCCGGGACGGCCGTGCATTGGTCCAG -TTACACAACCTTGCGATCAAAAAGTCGAAACGCCAATTCGGTGAGATCACGGCTTACCAC -TTAAACATTGCCAAGCCGTACCGGCAGACCGAGAATCTTCAGTTCTGGGTAAAAGCTGCA -CAACTCCGGTGGGAGCTTCGGCTGGATGTTGATGTGATGGGGGTGGTACAGAATAGTGGG -ATTACCGCTTGGAAGCAGTTCGATACTGCACTGCTGGCCTGGTGTAAAACTGTCCGTGAA -GAGCTAGTGCGCGATTGGCAGGCGGCTAACCCTGGGCGACCCCCAAGTGCTGGGTTGATT -TGAGCGCAGTGTTTGAAGTTTTCTAAGTATTCGGAGTTCATGTTTGCGTCTCGATTTCTT -TTTCTCTTTGATATCTTTATGGGCACTTGATACGATACCCCTGAATTGTCGCCGACTGTT -CCGCCCATTCATATTTTGGCTTTTTTTTCCCCCTTGATTGGTCTCTTTTGGAATTCTATT -CATTGTATATCATTGTATATCTTATTACTGTTTCATTCTATGGACGCGTGCCTCAAGATT -AAAGCTCTGAACATCCCTTGTCTTACTTTTGTGCTCAGTCTACGTTGGGATATATGGTTG -CTGATCTGTATATAGAATTATGGAACTAACTCCTTTGGGTATTAATTTTCTTTTAATTAA -CGCGTCTACGGGGTACGGAGTACTGTGACTTGATCGACCTCGTGCAAAGAGAATGTATAT -GTATAATATAGGGTTGAATGTTAGATTGATCTGGCCTTAGTGTGCTAGAAACATCGAATA -TCGTTTAACTGAAACAATTTAAAGGACAAGAAAAGAGCACAAAAACAAAAAGTGAAATGA -TTTCATCTCGCTGATCCCCTTGAATTACATACCGAGGAGGTACATGCAGCCCCATAGTTA -GTAGTACGGGGCTGGAGGTACCCATTCTGCAGAGTAAACGGCTGTACATGGCCACTTTAT -CCCTCTTATGATTTGTTTCAACAGTTGCTGCCCGATATCTCCCTCAGATTCATGAGACTT -CTGCATCTAGAACCTCCACTCATTCCTGTCATGTAGCAGAATTTCTTCCCGACACTCTGA -ACAGCCAAAAAGCCAGCCTTGAACTGTGAGAAGGGGTCTCCCTATTACCGAACGCCGAAC -AAGACGAAACATTCCTTGTATCCCTGTCTGAACCACATCCTTTGGTCCTTTTGCTCTTTC -AATCCCCCCAGGGCCGGTCTCCATGCATGCCACAGCCGAAAGTCCAAATTTTGCGACCGA -CCTCGCTACGCATCCTCCCAACACCTCTACAACATCTACATCCCCGTCGCTCACCCCCGC -GCTCCCCGTCTCTCCAAATGACTCCGGTACCTCCACCCTCCGACGCCTGAAGTCCAAATC -ATCCCTCTGGAGCTTAGGGAGCAGCAATAACGAAGAAGAACCTGTCTCTACTGAGCTAAC -AAACGCCGGCCGAACATCCATCCTCCGCCGACTTTCTCCCGCCCTCGCGGCTCGGGTCAA -GCTGCTGGACAGCAGTAATAAGTCCGCGGCGCAGAACCGGCATGCCAATGCAGTCGGTCG -GATTCCTGAAGAACACCTGAAGGAGCTTGATAGTCTTCACCAAGACCTATCGATCAAAGT -GAAGAGGAAAGGCCAAGCGTGGAATGGTACAAACATATCACCCGGAGAAACCCAGTTCAA -ACGGAGCGCGTCAAACCAGCTGGAGCTACCCCACCAGGATATTCTGAGAGAGATTGCAGA -CGCGCAAAGAGGGAGCCCGGAGTCATCCGAAGCAGTCGTCGACGGTCCTGAACTACCCAC -TCTGCCCATCATTGTTCCGGTGACTGCCCAACCCGTGGAAAACATCAGCCCACCCTCGCC -TATGTCTGTCGCAGAGCCGGTTCTCGCGCAGCAAAGCGCAGCCCCCATGCCAAGCCTACC -GCATTCCAGTGATTCCCAGGATGACCGGACCGACTTCGAGAAATATGTCGATGATACCGC -ACGAAGAGAGGAACGGTCCGCTGACGAGGAGTCCGCTCCGAAGCCCCCTCCCAAAGACTC -CCCTCCCGCCGACGCCCTGCACTCACGCTCTTCATCTAACTCTCAATCATATTTCAACCC -AATGGGTTTGCAGCGCGCAGACTCAATATACTCTTTTTCCCGTGCATCCTTCAGCAACCA -GCTCTCGCAGTTAACCTCCATCCCTCTCCCACAACCAGCCTCACTCGAGGCAAGCATTGA -AAACATTTCTACGGCTTTGTCTGCAGTCCGGGCCTTGAATGGTGCCGCAGAGCAAATTCA -AATTTGGATCAAGAAGGCCTCAGATGTCCTGAGCGGCTTAGATTCTGAAGATGACGTGGA -ATGGGCTGCGGCTGGTGGCCGAGAGGGACTAGATGAAGTCGACAAAGCAATCACGCGGTT -CGAGTCCCTAGTCAATGTGTACGTTAGGGCTATTGAGAATGTCCAACATCGAGACGACAT -TGCGAATGTGGACGCGGATAACCTGAACACTATCGTGAGCCAGATGGAGAGCATTCTGGA -AAACTGGACTCAGATCAAGAAAAAGCTGAAAGGCGTAAAAGAGCAAGTTGAATTGGCAAT -GGAATGGGAAGAGCTTTGGTCGAACGTATTGGGCGATGTGGGTGTGGAAGTCGACAACTT -GAGCGGGCTGATATTTGAGATGGAAGAGAAGCGGCATCAGGCGTTGATGGATACGGGTGA -TACCACTGGCGGTCTTGACATCAATGAGTTGGAGACGATTGTGGAAGAGTCCCCCTCGAA -GGGCCGCGGGCATACGCAGAACCGTCTGAGTATTGGACCGCTCTTGGCCTCGGCGTCTGC -CACTCCCGTCATCAAAACACCGCAGGACGATACTAGTCACTCTAACCTCATGGCGATTTT -TGCACGTATGCAGCCATTGCGCGCATCGTTGGAGTTTTTGCCTATGCGACTATCAATGTT -CCAGGGGCGCGCCGAAGCCATTTTTCCGAGTGCATGTGAGGAGATTGAGGACCGACGAAA -CCGTTTACTGAAGAGCTATGAAGTTCTTGAGACCGATGCCGAAGCTCTATTGAAGGAGTT -GGCGGAGGACAAGTGGATTTTGGTGTTCCGCAATGCTGGGTCCCAGGCACAAAAGATGTT -TCAATCGGTTGAACGTAGCATTGGAAAGCTACAAGACGGGCTGGAGAGTGGCATGCAGGT -GCACAACCCGTCAACTTTGGCCAAGCTCATTGAAAGCTTTGAGCAGAAGAAAATGCATTA -TGTTCCTGCTATTGAACGCGTTGTTTCCATCATTCAAAAGGGTGTCAACGACCGCTTGAC -TGTCAATGGTGAGATTCTGCGCCTGCTCTCCGACATGGCATCCCGCATGGACGCCCTCAA -GGCTAGCATCCGGGTGATGGATACATCTCTGGAAGATGTACACATTCCCAAAGGCCACCA -GCTACGAGACTCAATCTCCAGCATCATGACTATGGACAGTCCAGCCACAGGCAGCGCAAT -TGAGACCCCAGGGAGCTCGCCTGCGTCGTCAGTCATCATTAACAGCAACGGGTACAAGCG -TGCCTCAACACCAATGGGCGGCTCAAGTCGTCGTGAGAGTTCAGTCGGAAGTGCCTCAGC -CCGCTCTACTATGCCCCCAAATCGTCGTTATTCCAGCTTGCCGCAGCCCACGGCGAGTTA -CACCACTCGAAAGTCCGCCATCCCTCAACCGACAGGTCTTGTTTCTCCCACTCCATCGTA -TAGATCCTCAAGCTACTTCACACCGACTCCAGGTGCTCGTTCTCGTACACCAGCGCCGCC -ATCTTCAATTCCCAATCGTCCACGCTGGAACCCCAGTACGAACCTGAATAATTCAGCTTC -TAGTTACAACTCGACAATACGGAAGGCATCCACACCACTCTCCCGAACACCTCGGCCATC -ATCAGCAATCCCGGGCTCTTTCCGACGAGACACGTCTGCATCCCCGATTCCCGGAGGTCT -TCGATCAACCAGTCGTGTCTCGAGCCGCTTTGGTTCCCGGAGTCCCAGCCGGAATGCGAC -CTCTCCGACACCAGCGCGGCCCTCGCTCCTCGACCCACCGCCGTACAGCCGGCTCTATCG -CCAGTCAGGGATGCCAAACACACCTCGCAGTCGCCAGAGCTTTGCGGGCCCGCCCACATC -TTTCAGTCGAAGTGTGTCAGGAACTACAGCTGGGATGGAGAGCCCAAGCAAACCGGCACG -ACCAGGCACATCACTAGGCCATTCCAGCAACAGGCGAACCAGTCTACTCCCGGTTCCCAG -GCGAATGGAGAAAGAGCAGGCAGCCCCCAAGGTGGATACGCGTCCACGCTGGCGTTAATA -ACAACTGAGTTGTATATTTCGCACCTTTCCGTAGCTCATCTCCTATGCCACCTATAGCCC -TCATCATCTACTCTCCCTCTTTCTCATTCTATCTTCCTACTATCCTCCTATCTACATCTC -TCCGACCTTGTTTTATCTATGTGTTTTTAAATTGCATTACACCCCTTCCCCTCGATTCAA -TCTCGGTTTCCAAATCGGTGTTAAGTCCGTGCGAAGGGGAGCCATCATCTTGAAGAGTCA -AGTTCTGCGTGATATCCCCATTGTTTGCATGGTATGGTCTGAGCGTTGGGGTCATCTATT -TCCCGGTTGTTTTCTCTTTTCTCATTGATTACCACTGCGCTTCTCTTCCCTGTGGATATT -ATCCCTTGTAGTTTTATCTAGTCTTGCTTTTTTGGTCTTGGAAATGACATGACTCGTTTC -AACCAGACAACTTTATCGTATATGCTGATGTTGGCTCATTGTAGTAAATGTTCATCATTC -GTAATAAAAAGGTACATGTATTTATACGTGAAACGGCAATTCAAATATGAACTTCATATC -TGAAACAGGCACGGCCCAATCTAACACTGAAACTTTCCATTTAATTTCCCAACTCACGTC -CGAAAATCAATGCATCCACCAAGTCCTCGCTATTTGCCAACCGCGCAGCTTTGGGGACCC -GTCCAATAACGCTAAATCCTAGCTTCTCCCAGATACGGATAGAGGCCGGGTTATTTGCGA -AGACCAGATTGAAGACTGAGTATGTGTATCCCTAGGAAAGGAGTTAGCCACGCATGTTGT -GTGCTTGGTAGAGCAAAGCCTGCTATCGCAAAGCCGGGGGTCAAGTCGTACCAGTCTCGG -GGCAAATTCGAGGTAAGCTTCGCCCATAGCCTGTCCGACGCCCTTGCCGCGTGCAGCAGT -GGTGGTGAGAAAGCCTGCATTGCAGATATGAGAGCAGCGGCCTGTTAAGGTTAGTGTAAG -TGTTCCTGCAGATATTGGTTTTAATCCGAATATGACAGAAAGATCCACATACCAGGATAG -TTAGGCTTGATATAGAAAGTTCCCATACAAACACTCTCCCAGTTCCTCCCCTCCCTTAAG -CCCTCTCCCTCATCATCTAAGATCGCCAAAGCTGTAAATGTCCCGAACCAATAGTCTGCG -AACTGATCTAGCGCCATCGGCTGCTCCATGGGGTATGTCCCACCGCGGAGGATCTCCGCG -CTGAACTCCTCATGCAGGAATTTGATCAGGTCCTTTGGGACTGTTTGCGGTCCTCCTGTG -ATGGGGTAGAGCGTCATTGGCGCGGAAGAGGACTTGGGGAGGGTGGTCTTCCGCGCTGTT -AGGGTCGGCGCTGCATCTGTGTGTGCTTTTGGTGGGGGATTGCGGGTTGAGGGGTCTTCC -AGGATGGAGGGCATGGTGAGTGGAGGGTTTTTTGTAGTGGGTGAGTTGGGATGAGTTTGG -GGATAGAGTTAAGGTAAGACAGACTCTCAATTGGACAATTGTATAGTGGTCAAAGGGATT -TGACGATTTATATACCCTCTTCTCTATCTTCATATCGGTATCGGAGCCGATTTTGCATGA -GTCAGCTTGTCATTGGTAGGGCGTTCTCCACCGATGTTGGAGTACATAGTTTGATGTGCG -TGGATAGTTCCTTTGCTAGAATATGCTACATGGATCTAGGGCTGAAATTGCTTTTGTGTA -GGGGTCAGTCTCTCGCCAATTGTCACGGTGTAGAACATGCCCAGGTATTGATCGGAGTTG -TTCCGCAATCTACAACCAACATAAGGTTCTCAGACGTACTCATGTGAAGACCAGACTCCA -TGATAAATACGACCTAGGTGCGGACCTGACCGGTCAAACTTGTTAAAATCAGGTAAAAGG -CCTCGTCTCTTCTATTCCTACACATAGTATATACATATGGGCTGGCGGGGGTATGGGAGA -TGCCTGCATATCCCAGCATAACCCTACCAGGCACTTCTGACTGCCATAAAATCAAGCACT -GCATTTGAAAATCGGTTTGGACTGATGGGGCAACCGAGACCTCATAGTTTGAAGTCGATG -TCGCCAGTTCTCGGTGTGGTAGTGTCAACTTCGGTCCAATGCAATCATCAGAAAGCTCAT -TGGTTCTCTCCTTCATTCTCATTCTTCAAAGCCTGCATAAGCTCAAGTAACTGTCCAAGT -TGCTGATCTGCAATGAATAACAAGTTTAGCAATATGATCGACTGAGGCAGAGTTCAAACA -ACTGTCAACAACATACCACTCATCTTATCCAACTTGGCCTTGTCCAGCTTATCACCTTCC -CACTGATCGCCCTCCAGCTTCTGCGCATATAGCTGCCACTCAGTCAAGAACCCGATCTAT -CATCGCCGACATTAGTAACTGCATCAACAAGCCCTTTCACTGCCGCCATGCCTCCCCTTC -TCTTCAACTTGGGTATGACCACACCAACTCAGAGATCAAACGTACAATATGCAATGGATT -TTCCACACCGCGGTGCGCACGGAACTCGTTCTTCACATAAGAATCGCCCAAGACGCGCAT -CTCGGGATCGAGTTTGCGATGGACGCGTAGAATGCGCCGGTAAAGCTGCAGCGGCGGGAG -GAGAGCGAGAGCCTCGCTGAGGCTGGACTTCGAGCCAATCGAGGCCGGCGTGGCCATGAG -TAAACGCTGAGCAATCCGCATTTTGAACAATTGGGATGTTTAAGGAAGATGGGGATATGA -CATAATTCCGGGGATTTTTGAGGCAAAGGCGGCGAAGAGAACGCGCGAATCAACGGGTCG -GGATGTCTCGGGATCTTTGACGATTTGGCCCTTTTGATCTCTGCCATTTGCTGATATTTT -TGGATTTTATATTGCTTCACATAACCCCAATTGACTATATTGGTCCTTATTTTTTCTGAA -ATTTGTGATCGCCTCGGGTCTTGTGCATTCTGCATATAATCTTGAGATAATTATCTTCAA -AGACGGAACCGACATTCTTTTTTTTAAAAGGAAAGCGAATTGGCCGTATTAAACATACGT -ACCATGCTTCCCACACCGGACACTTCACATGTGTCCTTTGACACCATCTATGAGCCATCC -GAAGACTCGTACCTCTTTCTAGACACATTGTCATCAGCATCCGAATCAAAATGGCTCTCT -GAGCGATTCCCAAAGAACCAAGAGGCATCACCATTAGTCGTCGAAGTAGGCACAGGCTCT -GGTGTTGTGCTAGCCTTCACAGCCGCACAGTCTCAACATATCTTCGGCCGGCGCGATATT -CTCACGCTAGGCACTGATGTGAACCGCAATGCTTGCATCGCAACGCGGAAAACCGCAATG -ACAGCCATTCAGGCAGAACAACAGCCGTCCCCTCAATCAGTTCACATATCTTCCTTGACA -GCCGATCTTTGTGCACCTCTTCGCCCTGGCAGTGTTGATGTCCTCCTCTTCAACCCACCT -TATGTTCCGACCGAGGATCTACCGCGTCTGCCTTCTGCGGCGGAAAATGATCCGGCTGTT -TCAGAGGCCATGTCTCGCTCTGCCAAGTTCGATAATGACTCGTACTTTTTATCGTTGACC -TATGCCGGCGGAGCTGATGGTATGGAAACTACGGATAGGCTGCTTGAGGCTATTCCTGAT -GTGCTATCTGTGCGTGGTGTGGCTTATGTCTTACTTTGCAAGCAAAATCGTCCTGATGAG -GTCATGGAGCGCATTCGTGGCTGGGGCGGATGGCAGGTTGAGACGGCGGGGTCGAGTGGG -ATGCAGGCTGGGTGGGAGAAATTGGTCATTGTGAGGATATGGAGAGAGGATAGCTCATGA -TGATTAGATAAATTGAACAACGATGATACCATTTGTGAATTGACGAAAAGCAAGAAATCG -AGAGACACATACACATCCACCAACTAACTGCTCGCTGCCTAGATAGTAGTTCACAATAGA -GAAAGAATCATGCCCATTCCCACTCAACGTAGCGCTCGCAGCCACTGGCGCCGGATGCCT -CAAGGATGTCAAACACACGCTGGAGGGCATGTTTTCCGGGTCGCATAGGCCAGTGGGCCA -CGATCGTGTTGTTGCCCCTCATCTTCAGCCCGTATTTGCTGGTGAGTCCATGAAACCCTG -TTGCCTTTTTGTAAAGATCGAACAATTTACTGACATCTCGGAAGAAAGAGCTCGATCTCT -TGAGTCTCATCTCGTCTGCATCATCTCTGTACGACGACTCGGGCTCGGGGAGGTATTTTG -CAGCACGAGCCATGTCCCTGCCAAGTGAGTCAAAATTCCACATGTTTTCAATGTCGCGCA -TGAAGAGGACAAGTAGCATAGCCTTTGGGTTTTGAATCTTCGGCTTCAGCAGCTGGCTAA -ATACGGCGAGGTTGAGATCAATCCCAAGCACATCTTTTTCTGCAGTTACGTACGACTATA -TCAAATTAGCATCGTTTCAATAACATAGTGAACTATATCAAACAGTTTGGGAAGTCAAAA -TGAGTAGTTTACCTCAATTCGGTCAAATCCTGGATGCCCGCTTGACTCGTTGAAATATTT -CGGTAGTTCAAGTGAATCGATAGAAAGAAGTCGAATGCTAATATCTCGCTTCGAAATTTG -GCGACAGAATTCCAAGATAGTCTCTCGGAGATAGATAAAAATCATACCGTACAGATCATT -CTTCGCCGCATAACTCGCAGTGTGCAGTCCATCGCATATCTCCAAAAGGTCCCAACTGCT -GCGCGGATCTGCATTGTCTGGCATAGGCCAAGAATGGCTAGCAGAGAAAATCGTCCTAAT -TTTGTAAGTGGACTGACGATCATAAAGAGGAGTCAAAGAGACTAACGGGTTAGGGGTATC -AAATTCTTTGCGTGAGCACCCAAAAGGCAACAATATGCCATCACGGCGGAACTTCAACGT -GGCAACACGCCAATATGGAGGCTGTCCATGTAGAGCCCTATGGAGGAGGTCCTCACCGTT -TGTCGGTAATGTGACCTTCTGTCGATTACGGGCAGCCGTGGCAGCAGAAAGACGACCAGG -AACCTGAAGAGATTCTCGAAGACGTTCCCAATCATCTCTCGAAAGCTCCACATGTAAGGA -TGCTTTGTTCTTCTTCCAAATCCATTTAAAATACTGAAGGGCACGTTTCTGTGCAGCCTC -GGAGCAGACTTTCCGGATCGACGGCTGGAGCTTTGCTTGAAGTGCTGCGAGAACAGACGC -GGGGATCAAAGCCGAATACCACAAGTGGATCATAATAGGCGCAGCTTCCTCCGGTCGAAA -ATGGAAAGCTGTCAACAGAAGAATGATATTCTGAGCGAAAATCCCGGTGTGGATTCCATT -CATCACTATCTCACATTGCCCCGAATACGTATCTGGCAGGCGAGCGATTGTTTCGACCAT -GTTTCGAATATCGTGTGATGCTATTAGGGAGCATGTTAATTAAAAGGAACCGGCTGTTGT -GGCTCAGGCAGTCTCTTACATGCAATAAGAACACGGATCTTGGAGGGCGCATCCTGGCCT -TCATTACTATCCAATTTCAGCAAATCCAACGCTGGCATAGTTCCCCACCACGATATTTTT -CCCTCATTCGTGTTTGGCTCAGATGGGTCTTCTGCGAAGAAACTTGGCTCTCGCTCTTCT -GTATGCCAATCTGGCCTCCATGAATCCTGTGCCAGACTGTGATTACAATCCTCCTTGTGC -GAGGGCCAATGTTCTTTTTGACATTCTTTGCTGCAGTACTGATTTGGTTTTTTGGTTAAA -ATATGATGCAGGATTTGAAAGGTCAAGGGGTCAAAGACTGACCACAACCAAGAGACAGCG -GCTACAGGCATTGTTCACCTTGGAGGTTTTGTTAGGACATGGCATGTTTATGTTATTTGC -ACATAATGCGGGATGGAGGAGCTTGTCACAGCTTGGCATGATGGAAGTATTCCAGCAAGG -CTAAAGAAAGATGAAACCCGCAGCTGAGGATATTCGAAAAGCCGATgaacgaagcgaagg -aacaaagcgaaggaacgaagATAAAAAGCAGGATAATCCTCAGCAATCCCCCTAGAGATG -CTGGATGGCCGCCCTAAATAATGCTGGAAAAACCTCGTCACCGCCAGGCACATCATCACA -GCAAACACTGCCCTGAAAACTAGTAGCGAGCAACTGTTGGCTGTAGTCACCTATCGGATG -AAAGTATATTATACCGCCCAGTGAATGCATATGACAAGATCAATCGCCAGCTTGAACTTG -CTCTTGTGCTTGGTTCATCTGTTTTTAATATGCTCTTTGGCAATTGGCACAATTTCAGCA -CAAGTCCAGCATCATACAATGTGAGACGATATTGCTAGTCAGTAGTTGAAAAAGCTTAGT -CACAGACGAAATCTGAAATTTTACATATTTGATTTTACTGTGCTTTCGGGTGGACTTTGC -AGGTATTCAGTTTAGACTCATAAAACCTCCTCAGTAGGTCTATAGGTAGACAGCGTATGT -AGCCCTTGGTGTATGTAGTATGTACTAGTATTAAACCCACGTTCACGGAATATACATGTG -GAGGTACATCCGCATGGATGTTATTATATATGTAAACCGGGGtacatacatatgtacata -cacacgtatatGTATTCAAGATGTACCTAGTTACCAAGGAATGAATCTTGTTAGCATCTA -TGGATCTGATCTATACATTTGTATCCGTATAAATGTATGTAGTAAACAAATGCTAGAGTA -ATATAGACACATCGTACAAAATTTAACAATGAATAATAGTGTTCCTCGCCTCTGAACCTT -TTCTCCAAGCTATTCATTCTCCTTCACCGGTAGTACTGTTTGTAGAGCAATGACGAGTCT -GTACTTTGTTGTTCTCTCATCTAACTAACCACCATGGCAAACCAAAGGCCCGCTAGGGGT -AGATAGGCCGAAAGAGCATGTCAGGTCGCTTTTATTTTCTTAGATCTCAAAGCCTAAGAG -ACCATCACCCACCATACATGCTTCCAACCTATATTTCTATGCTTCCACCTATCATTCACG -CTATCCATTGATTTATCAACCATTGCTGATACCGAGATAATTGTATTCAATGCATTGAGC -CTGCCGCCGAGTGAAGCTTTGTTATCTGTGGTTTGTTTTTGTTCTACGGCTTGTTCGGCC -TCGGGCATGACTTCGTCATAAAATCTGTTGGCCACTTTGAAATGTTACTCGTCGAATGCA -GAAGCTTGTTTATTATTTAATGATCACTACTTCTGTTACTGATGTATCACATTCGTAGGT -TTAGGTAACCATATGCCCTCTTCGATATATTTCAATTCTGCGAGAGGTCTATGGAGACGG -CTACGAAGTCCATTGACCTTACTTACGTAGTGAACGGATATCTGTTGTTGAATGGACGTT -ACACCAAATGACCGGCACATTATCCAGACTAGTTAATGTTATTCTGGTCATGTAGTGCCA -TATGACCCAACCAATAACAAGGCCTTAGTATCCGCATCTCCGCCAAGCCTGAAGAATTAC -CTATCCAGACAGTGTAAATAGCTTCAGGTTTGCGACTTATTGTCGCTTTCAAATCACGCA -CATTGCCTCCGCCATAGACGACAAGAAGTCGACATTCGAGATGACGAAACTGCTACGCGA -GTTGACTATGAGATACGCATTTTGTGAATCCAAGACTTTTCCCCCTGTCAGCAACAAAGT -GATCAAATGAGTGACGTAGAAGTGTTGTCAGCTCCCTTAGATCTGGAATGCTATTAGTGG -CGGTTCAGATCCTAATTTAAATCCCTAACTAGAAAACCTGTAATCTCAACCACTTGGCCA -TACTAGCGAGTCTTCCATTTATCCTTAAGTAAGAATTCTATAGACCAATCCGATGAAGCC -CTGACGTTTAAAGTCTGTTGGTCGAGAGCATAAAGGAAGAGCTGCGAGAGCCTAAGAGGG -GTTCACGGGGTACTTAGCTACATGTATAGTAAAGTCTCGATACTTTTGCTCAATTTATAC -AAATTTTCAGATTATTCCTTGCTCAACTTCCTCAAATCTTGCGTTCTAAGTAATAAATAG -AAATATTATCTGCATAGGTTATCCTTTCTCTCCCTTTTACCGCCGTCCACCTACAAAATT -GAGAAGGGAAAGGAAATATCTGGAAATTCCGAATACGAAAACAATATATAAAAGGATTAT -TAGAAATGTTAGAAAGGCTGTAGTAGCGGCTAAATATCTGTTGTAAGTTTAGGAAGAGAA -AAAATTCCAAAGTCGAAATCTTTGCGAGCGACGATGGGATAAGAATAGGCTTGAAAGCTT -GCCGCTCCTTCCATTGACCTACAATTTACATACTGCATAATTTAAATTGAGAATGGGTTC -CTTTCATTTGAGAACATTGGAGTTTTAGTTTTTTTTTCGGGGAGAGGATGGTGGTAAGGA -GAGTAATAAAACAATGAAGGAACATGCGCGAGAGATTCTCATAACCTTGGAAGTTTCACA -AGTCAACCTTGAAGCATTTTCCAATTTGTTTATATGTTGTTTGTTGTACATCCTTGGAGT -AACTGGAGGGGCAGTACCCACCCCATTTTAGCGATACCAGCAATACATTGACTATGTACA -TTGGAAGGTGGTCCAATCAGTAATAAATTTGGTAGGAATTCACAATCCTATAGCATCAAT -GAATTGTTTCAACGCCCTATAACAGCATGAAACGATGTTGTTGTATATTGTTGCCTGAGG -CGACGATGAAGTAATCGAATGTTAGCACTAAACCCAAAAGGGCATTCGTGCAACGTCGAC -CCTGCCATCTAATCCTCGAGTCGATTGTTCTTCCATAAATTTCTGGTTTGGGTGTGGTGC -TTGTTTTCTCTTCTCTTCTTTTGGGGCTGTGCCAATTTGCCTGCTCCGCTCGAGCCACGC -TCGTTTCTCAACAGAGGAGACGATTGACAGCATCGTCCAGCCCACCCTCCACCAACAGTC -CGGGGTGCAATAGCTCCTCGCAGGGCAAGCCGACTTCTACTTGTGCTGATCTGCTTCCAA -TGTCCTGACCCCATATAGGTCTTGAGTACCGCACTACAATCAATCAACGGGACTCTCCTT -CATCTGGGGGATCAGCCTATCGTGGCCGCCGAGCAGCTCATACGTGGAGGCTAAAGCTAT -AGGTCTATGGCGGAGAACAAGCGCCAGGTTTCAGTAAAAGCCTCTGCGCAGAAACAATCC -GGTAACGCAGCAGGCCCAGGGATGGCGACGCCCACAGTGCCCTTGGAGGCAGCATTGAGC -GGGGCGATCGGTGCCCGGTGAGTCGTATCTGTCAACGATGGTGTATACATAGTCGCAGCG -CTGACCTCTTCTGCTCTTGTGTTTCAGCGTGCGTATTACAACCGCGCAGCCACTCCAATC -GGTTCTCGAAGGTACATTATTCACGGCATGTCCGATCACCAACCTCGTGGCCATCAACAC -GGCCCCCGCGCCCAACCCCGGTGACGCCAAGCAGGCACAGAATGGCGACTATCGTGTGAT -CCCGATCTCGCGCATCCAGAACTTCCAGCTCCTTGCACTCGCCCCACCCTCGAACTCTGC -CTCGTCCTCGTTCGCGGATGCACAACCCACGATCCAGGCACTCGATACCCGTGCGCTCAA -GGCCCGCGAGTCGAAGGCCATCGGCGAAGCACTTGACCGTGAGGCACGTCGTGGTAAGGG -CGTTACTACCCAGGCCCAAGACCTGTTTGATGCTTTCAGCCGCACAATGCCCGCGCGATG -GAACGGCCACAATATTATTGTCGCCGATGCCGTGACAATTGCTGCTCCTTACCGCGTTGA -CGATTGTCGTTCAATTGTGGAGGGCGATACGGCTGCTCTTGCTCGAGTGCGGAAGGTGGT -AAGTTGATTCAATCACTTCAGTCATCAGTCGTATAACGAAGGAGCTAACGTCTTGGAACC -TAGCTCGAGATGGAGCGCAAGAAGATTGAGCTTCGCAATGCCAGTGCAACTATTGGCTCG -ACCACCACCTTTTCTCGCCATCCCAGTGTCCCTAACGACCAGCGCAAGGGTGGATAGTGT -GCCTAGTCACGCGAATACCAGAAACGCAGGTCTCCGATCAAATTGTGGATAGGCCTGACG -TCGTTTCAAGTGCCTTAGTAGCGCTTGATCAACAAAGTTGATGGAGATACCCAAGCGAAA -AGAAAAAAACAATATTGCAATGGGATAGAGATGATTCGAAAGCGGATCCGACGGGACACT -GCGCGTTTCTGACCGCTGGTGTTTGGGGGATGACTTGCCTTATTAAGAAATTATATCTGC -CTACCATCAAGTTGGACCATTCGCATTGCTGCCAACTGATTTGCCCTGTGTCTTGGCTTT -GGGCGGGTCGGTGCGCTCAAACCGAGAATACTAGACAATCCATAGTAACCTCAATTGCTA -CGACAACACTTTTCTTAGGACATGATATTGCATTCTTCAATCACTGAAACTTCTGTAAAT -GCATCCTATTTAACAAGCCAGTAGCGACTCACTCCATTTATGGACAAATATGGGAATCAA -ATAAAAACCAAAGCCGCTATAAATACTAAAAACAGACATGTAGATGGCTAAACAGCTCAA -AGGACGCTCACAAGTAACAAAAAGAATTATGTCAGCTATTGCCGCGATGCAAGACTGCGA -TCTAGCTTTCCGCGACTGGAAATCTCCCAGGAACAACCTCACAGACAGTCCCCACAAACC -CTTCTAACCCTGACAGGGACACCGTCCCACAGGCAGAGCTAGAGCCAGGCGCAGATGCTA -CTGTGGCCCCAGATTGGGCTGGAACAGACGCCATAGACACGCCGGGCATGATAAGAACCT -GAGACCCAAGTTCCACCCATTCAACAGAGGATACGAGCGCAAAGGTAATCTTGACGTCCT -CTTTCAGGGCTTCTGCGGAAGAAGCAACTCCGACAATGGAGGTAGAGGCAGCGCCAGACA -TCACTTTGCTGATATCCGTTGTCGAGGGATGGCGGCGAACTGCATCACCTGCGCCGTCCG -CGCTATGGCTTGTTGTAGTATTGCTGTTTGATTTCTCGCTAGGAGGATGAGGATCGCCGT -CAAAGCTAAACAATTCAGGCTCCGATGGAGAAGGCGGCTCGGAAGATAGCTCTTCGCCAG -TCCCAGTTAACGCCATGCAGATGACACGGACAGTGGCTCGAATGTTGGCAATGTAGGCAA -TAGCGTTACCGCCAAGCAAAAGTGGAGGTGAATTCGGAGCCGAGAACTCGGCTGACCGGA -AGGTGGCGGTGAACCCCGTGTGGAAGGGTAGTGCCAATAGAGAGGAAGCAGGCGAGGTAT -CTTTTGAGCACGAGAGTGGGGCGGTGAAATTTGAGAGAACCATGCCCTTGCGGATCCGGC -CGAATCGCGGTGAACGACCATCCTCTGAAAGCCCGATGGGTTCGATTCCGATGGTTCCGA -CTTGGTCTCTGATTAGGCGTCGGACAGGAAGCCGGAGATCTCTTACACTGACGACGCGGA -TACGCTGCCATCTTGCTTGTGAATCTTTGCCGGACAGTGAGCTCTGCGGGAAGGATGCCG -GGAGATCGCCCGAGGAGCCGGGCCCACTGCGGGACAAGAATCCTCCGGGTGGTTGGGGCT -CATTGCCTTGGTCGTTTGGAACATCCACTAAGATGGGACCGATGACCATTTCGTCGCCGA -TGGAAATATTCCCGCGACGGACTAGGCCGCAGAGAACCACGCCGCGGTTTTCTTGCCCGC -CCTTCTCCGAGTTTAGAGAATAGACTTTGGAGGGAGGGATTGCAAACACTTCATCTACAT -CAAAGATGTTGGCAGAGCTATGAGAGGGGTTTGACAATCCTTGAGGGACATGAAGGGTTC -TCTGAGAGGGTCGCGTGGGAATGGGAAGAGATCGAAGGAAGGCATGAAGTTTTCCAATAC -CAGAGCCATCGACAGCACTGGTAAGCATGATAGGTACGGTGTGTACCCATTTTCCATCAG -CCGTAGCCAACACTTTCTGTACTTGGGTGCTATCCAACATGCTGACTTGCTGAAGGTCAA -GTGATTTATCTCCTGCAGGCGATGCAGGTAGCATTGCAGGTTGTCTTCCAGCTGTTTTAA -GCGCAGATAAAACTCTCCCAAGATTGTCTCGCAGTCCGCTGCGGGAAGCAAGATCCAATT -TGGTAATGACAATCAACACAGGAACCTGTAACTTTAGGCAAAGTTCTAGATATGACAAAC -ACGTATCAATTTCTGATTGTTCAGTTCCTGCCTGTTCACTCCCAGGTGTCTCAGAAGCAC -AATTGGCTGGAATGCAAAGCATCACATAATGTGGAGCCCAGCTGACAAGTCCTCGCAGGG -TAGATTTCAAATATCGAATAGAGCCAGGTAGATCGGACACAAAAGCCAAACGACCTCCCT -TCGAAGCAGCATGAATATCATCCCAGCCGGCAACATTACCGGAAGCGTAGTTGATCACGG -TTGGTGGCACCCCATCCGTGTACCCAATCAGTTCCTGGGCAACCGAACTGGTGATGCCCG -ATGAGATTTCATGCCGATGCTTGAGCAGACTCAACCTGCTTTTGCCTCTGCCGTTGTCAA -GCGTAGAAGAAGTCAATGCACCCAGCAAAGAAGACTTTCCTGCGGCACTTGGACCTGCGA -GAGAAATGCGAATTTGCTCAGTGTGGGAGTAATCCTGGTCCAAGACCGACGTGTTGCCAG -ATTGTGGGTGTGCAGCAATTTCTGTGTCGTTCTTCGGCTTGATCGAAGAAATATTGTAGA -AGTCAAGATCGGGACTGACCAGAGCCTCGGCGACCCAGAGACTTTCGGTGTTGTTTCTAG -GTTGATTCGCAGTTGCAGATGGCAAGTCGTCGGTCCATTCACACTTCCCAACCACAACTC -GCCGAAGGATTTCAACCTTGCATCCAAGACTAGCAGCCATAGCCTGTAGATTGGCCATGC -TCTCATCCAACTCATCCTGCGTAAGGCCTACAAAAGTGCCATCATCTCCGACACCGATCT -CATACAAGGCACCTTGGCTCTCTTCAAGACCGGGAAGCAAGCGAGCGGGTTTAGGGGGCA -CACCCAATTCCAAAGCTGCCTCGGGAAGAACAGGGATCACAAGGTTAGAGGTAGTGGATG -AGTGAAAAGGCGAGGATTGTTGCAAACGCCATAATAGCTGGGTAGTCAGCCCCTGTAGTC -GTTGATGGCGGCTCTGGCTAGACTTGGCTTGCATGGGTCTTGGGGTTGGCTCTAAGCCTG -GAGAGGCAGAGATAGACGGGGTTGGCATGACGCGGTGATTTTGATGGTACGAACCCGCTA -CTACATGCCCAGTCGACATTGAGAGATAAGGGCGTCTGGGTCGAAGCAAAAGATGCACTT -TGTACTCAGTGGGACCCTCTTGGGGCTCGGGCTCCAGTTTGGATATTCCATAATTAGATA -ACAAGTCAGGGTCGACGCGTAGTGGAGTGGCACTAGAGCGCGTGCGGATAGGTAAGCCCC -GACTACCAGGTGCATTGATTTGAGGTGTCGATGATCCCGAAGTTGACCACGGGGATGACA -CCCGAGGTGGGTCAGGGTCATAGGTGAAGATAGATGCCATGGGCACAGTTACAATTCAGA -CTGGTAGCAAGGGTATTGGTAATAGACACAGATATGACAGATAGAGCACTTCTAAAAAGA -CTTGAGAAAAGTAAGGCATTGTTGTAGAATTGAATGATCTGTGACGCCGTTGAACTGACT -CACAGATGGCACGGGCCGAAGCATCACGTGGCCGTTTATCCGGTGCCAAGGAATTGTATG -TACGAGAAGTTCTATTAGACCAGGCATGTTTCTCTCAGAGAATACTTTGGTCCCCCAAAT -ACACATACATAATAAGGAATCCAGTCCTACTAGACCCGAGCGCCTTCGCTTGAACGCCGA -AAATCATAATAGTGCGTATACCCATCTACTCAGCGGATGATTCCCTGCCTGAGCTTTTAC -ACTTGCCCTGCGCAACATGGATATTGCGTGCAGTCGTACGTGTAAATACCGCACCGCATA -TTGGGCATTCAGCCTTCGCTACAACTCCTGACTTTTCTCGTTGATGGCGCAGGAGATTGC -TAAATGTTGAGAACTCACGACCATTGCAACCATGGTCCCAGCACTGCGGTCGCGGGCGTA -AGCTGAGCACTCGGATTGGATGGTCAGAGTCTGTAGCAGCAGCTGGTTCTGGTGAAGAAT -ACATTGGCTCAGGGGCGGCCGTCGATGGCGGGTGAGAGCTGGTGGAGGCAGGGTGCGATC -TAGTGGTTTGGTTTCTGGTCAGTATGGTCACATTATTCAAGCTCATCGATTTCCAACTCA -CGGAATGGATGATGTGTCAGAAAAGTAGTAAGCAGGTTGGTTTATCATATAAGGAGGACT -ACCGTTGGATGTCAGTAAGCAGGTCATTGGATTTTTATCTTTCGAGCGGTTCATTCAACA -TACGATGTCATTAGAGGCTGCCCAGGCTGAGGAGTGCTCGAAGCAGATTGAGGGTGGAAG -GGACGGGCGAGGGGCGGACTGAAATCGAAATGTCAATATGTTTCGAGGCCAGGTGTTCAA -TCGGATTACCTTCGAGTCTGGCTGTGACGGGGAGGATTGCCAGGTGCTGCGAGATAAGAC -GCCGGTAACTGAGATTCTTGCGGTTGGTATCCATGCATTGGCATGTTTGGATGACTGGGG -TAACCCATTGGATAGCTCGACGAAGAGTAGAAGGGATGTGAAGATGGAAATGACGGACTC -ATTTGTCCTGATGGCGGTTGCAAAGGCTGATAAGGCCGGTTTGTCCACGCATATTCTGGA -AGCGTCAGTCAACACCACTTAACGCGCGTCTGGATGAACTTCGTACGATGAGTGTTCTGC -TGCATTGGGCCAGCAAAATAAGTGTCTGATGCTACTGACATGGGATATGATGAGATGCCA -AGGTCGCTATAGAGGGGATGCCCTAGTCAGCACAATTATAAGGTTGTTTCTCACGATTTG -GTTTGACGTACCGTGAATCTCCCGATTCAATCTCTTCCTTGATGGCAGTGCCTTCGACAG -AAGACAGGGGTATTGACCCCGTGGGTGGATAGTTTGATCCTGTCAGATTGTGAGGCTGAT -CCTTGCTGGTGTCCAGCTTCGTTGAGTGATGTGAGGAAAGCGCCAGTGCGAAACCTTGTG -TTGATTGTGAACCCCCTTGCACGGCAGAAATAGAACTCTGTATGGGTTGAATGGGCCAGT -GAGAGTCAGGTCCGGGAAGAATGCCTTCTGAAAAGCCCGAAGTTTCCACCAACTGGATCA -GTGGACACAAAGGCCTCACGTTGGATTGCGAGTCCGTGACTTTTGGTGGGTTTGATTGAT -ACCTGCGGGAAGAAAAAGGTTCCTGTGAGCCAGAGCCTCCGGGGAACATTAGGAACTATA -AAAGTTCCAGACCTGTGAGATTGCAAGCATCGAGGTAGAGGGGAGAGGAAGTATAAGGGG -AAAGGCACTCATAAGAGAAGGTCAACATACAGTCTGGTATCCCAATTAAGTATGCCGTCA -ATATGGTCAATCGAAAGCTTTGCGAGGATTTCAATTGGGGGCACAGAACGGGCGCGATGA -AGTATTTAAGAGCCCATATCCAGCAATGGGCGTTTACTAGCGACCCCCGCAATCGCTTTA -TGATCTGAAAGGTTCCCGTTGGGTAGCCTTGCTGGCCCACAAGTTCAGAAAAAAAACCAA -CTTCTAGATTTTCTGTGCGCCACTTGGTATGCATCAATGCCGGGTCTACTCAGCTACTCT -TATTCCCACTACCCACTAGTTCACCATCTTCATTGCATCTGTGATAGGAAATATTTGGTT -GTTTAGTCTGTAAGAAGAAGACCAGTGAGACAAGAAAAAAGAAAAAAAAAAGTAATCCCA -GGATCTTCAATGAGGTGGGCCGATGGTAACTATCACGAGCCTGAGTCGGGATTTTGACCC -CCACAAATGACGGGTTAAACAAGAGGATATGCGGGAAAAGGTCTCGGAACTGATTGCGAC -TGGGGCCATAGTGTGTACAGCAATGATCCTCTTAAACCAGGGAGATGACAATCTTGGCCT -GGGCATTGGTTTCCAGCCCCATTATAGTAAATGGGGGCGGATTGGGGGCCATGACGCTCT -CGATAGTCTAGTGCGTGGCTGATACGCCTAGATTTCTACAATTCGAAAGGGAAACATAAT -TGAAATACGAAAAAGTCTGTAAGATGAAGTCAAGAACCCGCCATCCCCGGCCCGATGCTT -GTCCCCCTATCAGTCTTGGCCCCCACAAACGCGCCGTTGGAGAAAAAAAAAAGAAAAAAG -GAGATGACATGGATGTTACGAGCAATATGACAGATATCCTAATGGAGACTATATAGCATA -AATATTTTTTGAAAGAACCTAAAAAGTACCGAAGTCCCCATCTAGGTACATAAATGATAG -CATTAGACACCCTAGGCAAATGCTAAGATTTCCGGCCCCAGATAATCCCATTTTAGTAAG -GATTAGTAGAACCCGGCTCTCGGGCGGTTCGTTTTAGCTTCGAAATACTTTGCACGGAGT -ACGGAGTGGAATTACTCTTGGATGAATGGACCAATATCCGGAATGCTGGGTGACTATTGT -AGAGGCAGTCGTGTATCAAAATGGCAAAGCTGTAAGGAATGGTGGAGCTTGTTACACAAA -AACACAGCCAAGACAAGTTGTAATTTGCCAGGGAATTCATGATATATACCTATGGTGTTC -TCGAATTTAGCGTCCTAAGAATTTGTCACTTCCCTGAGCTCTTGCTTTGGACTAGGGCGC -TTGCTAAAAAATCGCTGATCGGCTCACTCCACAGGGGATCCGCAGTTATATCACTTTGCT -CAAAAGACATGTGCGGTTTCTGCATCGGAGATCATAGATCAAATATGCATGTATCCGTGT -ATATCATCTGATTATATTAATATATCATGCACGGTTGATCGTTCTGTTCCTCGTGGCCGA -GTAAGGAGTTAGTAGTAGGAAATAAGGGTTATGTTCTGGACCTTTTTTTCAAAGCCGATT -TATGTCGATTTCCGGGTTTTTGGGGCTTGGGACTACCTATAATGACAGAATTGGAGAGAA -TCAGCCTTATCCAACCTGTTCAATGCCGCTCTGGTTTCTATTGATTTGGGGGGGGGGGTT -TTTTTTCCCCGGTATCCGACGGGCTTGGGGGTCTCGTTCCTACGTAGATCTGGTCTCGTT -TACTTTTACAAGGCTGTACTGTTCAAGCATGATTTGCTCACATCATGTAGCGGAATCTGG -CATACATATCTATTCACTTTGTGAGTCTCTGGGGTTTCTCGAAAACAAATTGGTTCAAAG -GTGCGGTGTTAAAGTGACGAGACTCGCCGGAATTGCAGATATGCGCCGCTATATCCATCA -TATCACTCTGCCCAACTTGATCACTGTGCCCAACTTCAGAGCAATAACCAAAGCGCTTTA -GGAGAAATATGAACATGGTGTTCCCATTGATCCTATGAGATACGATGCAACGTTGACACA -CCGCTCAGCTGCATATCGAATATGAGTGCATGGAAGCCGTTACAGGCCATGGAAATTTTT -CAAAGGCTAGACACGTCGAATGACACGCTAGTTTTGATCACTCTGCCTACTGGCAAGGAT -CAAAGGTCCAGCTTTGTGTAGATAGCCTAGAAACTCGACATTTCTCAGCAGTCGGACTGA -TGGAAGATTGGTCTAGGTTAAGCATTGCCTCCGACAAGTGTGTCGAAGATAAAACAATGC -TGATGAAATTTGATGTAGCATAGTTACTTCAGTAGACATATCCGGCACAACATTCACTTG -TTGTAGAGAAGGTCGGATCTGAACCTCGTAACTCGAATCAAATGTGAAAGTGAAATCACG -TGATTGTCCTTACCTGATCAGATCTTCTCTGGCTCTGTGGTCCAACAGCATCCCAAAATC -AACGCGTTTTTTGTCCCCACAATCTCTAGCTATCTGCTAGTGTTTTGACAGTTCATATCA -TATTGAGAACGTCTTCAGCTTACAGCTTACTGATTCACCATGAAACTCGTCCGGTATGTG -ATCATCTTCCCTGTGCGCGGCAATCACCAGCTCCGCTTGGCGCACCAAGCACCATCAAAA -CCACTCCTGCTGACGTCTGATGATATTTCTAGCTTTTTGATGAAATGCGCCAATGAGACG -GTAACCGTTGAGCTGAAGAATGGTGAGGACCCCCGCGCAATCTCCGCAAACCCCAACCTA -ACCCCCTCAAATATCCCAATCCTCGAAATGGGTGAAAGAACTACGTTCTCTAACTTGGAT -CCATCATCTAGGCACAATCCTCCATGGCACTATTGTGTCTGTCTCTCCTCAAATGAACAC -TTCCCTCCGCGCTGTGAAGATGACCCCTAAGGGCCGTGACCCCGTTTCTCTCGACACTAT -CAACATCCGCGGTTCCACGATTCGCTATTATATCCTTCCTGACAGCCTTCCCCTTGATAC -CCTTCTCGTCGACGACGCACCCAAGCCGAAGAACAAGGCGCGCAAGGAGACAGAccgtgg -tcgcggtggtggccgtggcggtccacgaggccgtggAGGCCGTGGAGGTCCCCCCCGCGG -TCGTGGTCGCGGACGTGGATTCTAGATTTGCTGTTTTCTTCCAAGAGAACAACCGGCGTT -TGTTTCTGTCTTTGAAATTCGGGTTGATTTGCATCATGTGGGATACATTCTGCTGTTCCT -TTTTTCTACTCCCCAGGCAGGGTCTCGATCTGGATGGTGGGTGACATCGAAAAATTAAGC -AATGTTAGGGGGAACTTTCCTCCATTCTGAATTTGCGCACCTCAATCTTATCAAGTGCTC -AGAGTTTAAGGCTTTGACAGAAAAAGTAGCTGAACATTAGCTATCAAAAGTACTGTGGAA -TCACCCCCAGAAGGATACATACGATTTGGCTATGATAATATCCACGATCACTTCGCCTCC -AATGCAGGACTCCCAAAACATGTGAGTGTGAAGGCAACAATGGCAGTATCCCAGTGGAGG -TTTGGGGTCTAGAAAGGAAACTCAACATGATCCAGGCCAGAGCAATATCAAAAGAGATAT -CGAAACATCATATAATTTTGTGAGCATGAACATCGTAGGTTTCAACTCTCACATAGATCC -AAGGCTATGTTGTACATACCTATGGAATACATGTGTATCTTGTAATTCTTGTGTTCTAGC -CTCTCACCTTGTTCTGTTGCACTCTCGGACAGAAACATGGCCGTTACGGGACCTCGCACA -CCGCATCCGAGATGGGTAATTTAGCGTGAGACATGTTATCAAAAAGCACCTGAGACACTA -ATCTTGGGCTCTGAAGTTGTCGCGTTAAAACACCCACATCAGGGCTTGCTCTCGGGAAAG -ACTGCATGATATATTACTATCCACCCCCTCGACTTCTTGGCAAACCCAGTACACATCAAG -ATGCGGAGTATGGAGCATTGTAGATAAAATTGGACACTAAGTCATAAATTGCCCCAATGG -GTAGATAAATGGAGACACGTGAGTGCTATTGTAGACGCTGTCATGACCACTGGATTTCGC -TCCAGCGATCTGCATACAAAGGGCGAATATTCAAGAGATGGAAAAGACATCCTATCTCAG -CTCAGCCAATACTACATGTAATATTTTACGGGGCAGTCTTCCCGTGTCTGGGCTTCTGAA -GTGAAACCCTTGCCAAAATGGCTCTACTCGTGTACTTCCGTATTCCGTACTTCGTACGGA -GGTGTTTTCGGTATTTAGAAGAATACCGGGAGTATAGTCCCGAACTATTTGGATCAATAA -GGCCGTCACCGTAGTCCTTTTCTAGCTAGAGGGACTAGAGTCTGTTGATCCAGCGCATTA -GACCAGGATTGCCACGGTAGACTGTTTCTGGCTAAAGGTTTTCAAGTGCCAAGGGTTAAT -ATCGTAGGTTACGGGAGTATGGAGTGGTCTGGGGGCTGATAAGGACCTTCTATTCAGGGG -TTCCCGTGCATCATGCCCGTGTACATGCCTAAATCAAGACGTTTCGGTTTTAATTACTGA -AACCGGGTTGGATTATTTTGAAGGCCAAATCAACTGCGGATCCTCTTTTATCGTCTCTCG -CATTCTCTCTACAACATATACAAGTACAAGTGTAGGTATGGAGTACTTCATATACGAGTC -ACAGTCATCTTTCCAACTAATGATCTATTGAGAAGTAATATAATCTCTCTTTTTGTGATT -GCGTGAGATTGTTGCGAACTAGCGCTTCAGGCATAGATAACCTATGCCTAAGCCTTCATT -TCTCGGTTATAGATTTAATAAATTATGCCAGAATAATTGAATTATTCCCTGATCATCGCT -AGTACAAAGTGCATGGAGCACCATCCTGACCTGCTCAGAGTACAACCGCCACCGCCGATC -TCAATGAAGTAAACATGGTGGGCTGGAGTGGATAGAAGATTACTGCCTTTACGGAGTTGC -TCAATGCCCCCCGGTCCCGTTTTGTTTCACGTGGGGGCCAGAAAACGCGGAATCCGATCG -ATCTGCTCTCGAtctgtcctctggaactgtgaaatgtctactctgaactccgtactcCTT -ACTCCTGGGAAACCGTTGAGAGCAAAGAGAAATGGTAGCAGATCTAATATGACAGCCCAC -GTGGGTATTGCGGGGACTTCATTTTCTCTCGGAGACTTCAATCGAACCAATTACATGGCT -ATACATAGTGAGTAGTGTACAATTTTAATATGTTATCGGTTTCCACATTAGGAAAATGGA -GTGCCATACGGAGTATGTTGCCTAGAACTCCACCTCTCGTGCTCCATTCTTTTTAGGAAA -CTAGTCTAAATGATAATCTTAGGGTCGGAAGGTCAACAATATAGTCAATACAATATGGCA -GATCTGATGTGTTTCTAGCCCACCATTTTACCCCTTCTACAGGAGTCGGCTAAACGGCAA -TGACTTCTGGATCCTCCGGTTTATATTCCGATATACTCTTAACATCAAGGAACACCGTTG -CATACAATGGTACGGTATCATGCAATGCATCAAAACTCACCATCTAGTACTCGTCCCCTC -AAGCTCACCCATTTCTATCGATTGCGAATGTGCCCATAGACAGAATTGGAGTCAAATTCT -TGAATCTTATGCCGGGGAGTGAGCTTCTTACACTCAGACCTATACACCTTGATCAATCAT -GGAGCTAACAACACGACCGAGTCTGCTACAAGTCGCGATCTATTTGGATACTGAGCGTCT -CGAATGGGCATACTGAACAAATGACCAGTATATTTTACCCCATGATGTGAGTCTCTACTT -GTATGCATGGTAAGCCATCTACTGTAAATGACTAGGGATAGTGTCGGAAGGGTGCCGGTA -GAGTAGAGTGAAAAAAAAATAGCTGAATGAGATTCAAGAGTGTTGGCATCGCTTTTGAGA -CATTAAGATACAGAGTGGTCATTCAAAAAAAATATGTGCACCAGAGCTTTGAATTCTCGG -GCGTCACACCAAAGCTGTCTATGATGGTGAGATAGTGTGCAAATAGGGTATTGAAAAAGT -TGCCATAGGTTCAAAAAGTGTGGTCCTGGGATCAACACAGCAGCACCTCACACAGCGCCT -AATTGCACATGAGAGCAATAAAAGTTGCCAATGAGATTGACCCTAGTGTTCGGCAGTGGA -GAGGGGTGTCTGTATCAAGACTCGACCCAGTGAGGAGGATCAAAGGTCCAAGAGCATGAT -TCGTAGGCCTTGGGCAAGATAACTTCTTGGCTGGACAAGGTTTTACATATGATGTCAAGA -AAATGGCACAAGAAAGGGCCAATAATGGCAAAACATATTACATGTTGCGGAAAGTGTTGT -TGGAAGCACTGCGGCGTTCCTTGGACCGGAATGATGTGTCGGTCTTGTCGTAACGCACTT -CGGGGGCAGCACGTTTCGGCTTCCCGGCCGGGTTGAAGCTCATTTCTTGCTCGCCGACTA -CACCAACCCGTCGTGCAGGCTTTGGCTTCGATTTCATGTTCTGCGCACGGGAACCAAAGG -AGCGGTCCCGGGTGGAGTGGGATTGGTTCGAGGAAGATACCTGCATACGAGGCGGAGGCC -GCTTCGTCCGCTTGTAGTCTGCAGAAGAGATCTTTTTGGAAGCTGCTGTCTTCGCGGGCT -TTTCCGCCTCGCTGTCGGAGTCGGAGTCGGAACTTGAACCGGGCACATCGTCGATGGTTT -CTTGCTCGACTGCCGTCAAGCCACGCTCCTTTCTCGGAGCTGATGAGGCAGCAGCCTGGG -GAACTGTACTGGGGTTCAGCAACTGGAATTCGCGTGAGGTCTCGTCGACAGCGAAATCCT -GGTCCTCGAACAACTGGGCGAAACGGCCTCCACCCAATACCTTCTTGACTTCGCCTTCGG -CAGCAGCATCGGTCATTTCCTGGTCGCCCGAGCCTTCCAATAGGCGTTTGGCATGCTTGC -GCTCCCTCTTCTCCTCTGCGTTCAGCATCCTCTCGGCCAATCTCCTGTTCACCTTGACAG -CAACCTTCTTCTTGCCGCGGATACGGCTTTCCCGCTCCTTGTTCATAATTTCCGCCACCT -TCTTCGCTCTGGGGTTCTCTGTGTCGGGGAACAACCCAGTGATAGAGTTATAAACAAAGT -TCGACACCTGATAGCCCTTAGAGTAAGGTACCATCTTTCCTGACTGCATGTAAGCTGCCA -AATCAATGGGTTCTCCGCCGGCCTTGATGAGCTTGGCGTCCCATTTCGCGACCTCTTCAC -TGGGCACAAGCGTGAAGCCGGGCCAAACAGAACCGGACTGAGCAGACTTGAATGCGCTGG -GGTCATTTGGATCCTCGGCCATCTCTTCAACCAGGTTGTCCAAGAATGAGCACCATCTCG -GCGCGGGGCCAAGCTGCGGGATAAAGAATGAGTGTTGCTGTCGGCCTTCATTGGCAGTGA -GGAGCATGCCGCTATCCTTGCACCAAGCAACAGAATTGATGTCCACAGCCGGTTCGACAG -AAGTCCAAGGTTTTTGATCTTCAATATCCCAGAGTTTGATGATGCGCTTATCAGCCGAGA -GCAACTTGGGTTCGATTGTCGCAGCTTGTGTTGCAGAGGACTGCTCGAGGAATTGGAGAG -TGTGAATAGGGAACCCATATCCTTGGTCCTTGATCTGGAAGGGAACGGGCGAACGCAAGT -CGTAGAGGTGAATGAGGCCATTAGACGAGCCAGTTCCAAGGGTCATACCGGAGCGGTTAA -ATTCTAGAGCCGTGATCTCCGTACGCTCATCGTGTATTCCAACTGATGTGGGTGGGCTCA -GCACACCGGCACGTCCCTTAGCCCTAGGGTCCCATAGCTCAACTGTTCCCAATGAAGTTC -CAAAGGCCAACAAATTGTGACTCTCCTCTGCAATAGCACCAGTGTTGACCGATCCTGTGT -TGATTCCACCTTGGAGAGTTCCGCCACCCGTGGATGTAAAATCGTCACCTCCAACGTCGA -TCTCAAAACTCCGCATGTATCGGCCCAACTCCAAGTTCAATCGGTAGACCTCTCCCATAC -CATCTGGATTGACACCAACTGCAGGAACAAGCACCTCGGTCGACTGGCGGTCATAGACGA -TGTCACGTCCATATCTGGGAAGTCTGGTAGTGTAGTGACAGCCCGAGGGAGTGTGGAACT -CGAGAGATCGATCAGACTGGAGGTGGAGGGACTTGGTGTAGTCGGAGGACAGAAGGATGA -ACGTGGTATTGAGCGAACTGGTATGGCGCGCCCATGAAAGTGACAATTGCGGAAGGTAAT -GGGTGTGGATTTGGGGCTTGTAAGTTCCTGCGAGGAATAGTGTTAGGGTATGCGCTCTTG -AAGAAAAGATAGTGACCAGTAGTACGAACCTGTGCTCATAACCCATTCTCCATCCTCGCT -GACTCGGATGCAGTGACTTGCTTCCTCGAATTCAAAGTCCTGCAGTAATTCAACACGATT -CGCATACTCGGGGTCATTTTTCAAGCTGCGCTTTCGCCGTCTGGCAAGCCATTCGGGAAG -CGGGCGAGCACTGTCTGATCCCGATATGGTATATACCGGGACTTCGGAAGGGTTGGACAG -CTTCATGCTGGTGTGTTCTGATGTGAGGATGCCGAGCCAGCTGTTTTGAGCTGATTTGCA -ACTTATTTGCAACCTATCGGTGTCGGCGAAAGCAAATCTGCAAAAGATAAAAAAAAAACT -ACGATAAAAAGCCGCTGATAGCGATCACGTGATAACCCGGCCTGGTGATTGGCTCAAGTT -GTCCCTGGCGAACGTCCCCCGGAATTACGCATTTCAACTTTCTGTTAACTTCATGCGATG -AGCCTTGTGGTTAAATCATTGCTGTTTCATTTGGGGTGCGAATTTCATACAAGTGAGAGA -GTTGTGCCAGACATCTTAGTCTTTGTTGACCGCTTCATCCCGCGCAACATGGTATCAATC -AGCTAATGTATTCGCTGTTTGTTGATCTCTCTGATATGCGCTTTCGCTCCTTCCTACAGA -CCATGCAATTTGCGATTGGAAATGACTTCCAGAGTCGGCCTCAAAATATCATCTATCTTT -AAAGGCTTCCGGGGCTGTCCGCGACCGTTGAATTATGGACTTCCCTGGCGGCGCAAACAC -CAACATACATCTTATTGTGAGTGTTGTAATATTTAGTATACATTCGCGTGCTAACGCGTC -ATCAGGATGGATTCTCCAATATTTACTGGAGAATATACACAGAAGAAGCTGGTATCACAA -ATAATCCACAGGAGGGCCCCGCCAACGGCTACACTATTCTCAAGCACTTGAGTCGTCTCA -AAGATTTAGAGGCTCGACTGAGAGTCTTAAATTGTCTCGCGTCATGCCCAAGACGCCTAG -GGCTTTGGGTATTCTCCCCTACCCCCGAGTTTGAAAGCCTAAACCCTTTGTATGTGAGGG -GAAGTGATGCAGAATCGAATAAAATCCTCGTCGGCACGACAACTTTGAAAGGTATGTCTA -TGATCAGCTTAATATTCATTTAAGGTCATCTTCTTAATCTTATTACAATAGTCTCCGCAT -TTGGAAGTGTCTCCTCGCTAGACTTGGTGAAAGGTCTGTCGTCCGACAACCAGAGTCAGC -ATGGTACCCAATCCGCTGGGCAGCCACGGCCACACCAGAGCCAACCCTCATCTCGGCGTC -AAGATGGTTATAGCAGTTCAGCGGCCATTTATGCCTCTTTTATATCTGCTGTCACTGGTT -CAATTGGCCTCCAATTGATTCAACGCCATGGTGCTCTGCCTCTCGGGTCACGTACCCTTT -TCACTGCCGCCGAACAGTCGGGATACGAAAGTCCGCATGTCAACAATGATAGCATTCTCT -CTACTTCTTGCTTGACTACACTCAACATCCAGCTAACTATGAGTGGAACAATAACCGTTT -CCGCTCAGACTATATCCCAAACGGGCATAACGCGACTATGCAGTCCACGCGAGGACATTG -CCGAAATAAATGACGTACAGCCTGGAACAGATCTTTGGCTATCCCCAAACGGGAGTATTG -CTAGGCTTGTCACGGCTAATGTTGACTCGCCAACCGTCCCGTCTCTGGGCTTCCCTGCCC -CTGGGAATCTGCCTGCAAAAAGAATACAGTGGAAAATGGATGTTTTGCAGTGGCTACGGA -ATTTTGGGTTGCATGTTGACTCAATCGATGAAGAGCCCTGGGTCGAAGTGGAGGTTTGGG -AACCATTTTTCGCAAGGCTTGCTGGAGAAGCATGGCGACAGAGTGATGATAGTCAATCTG -CACTACCGCTAAAACGCATGCTATGGCCAGCGAGGTTTTGCTTCAGAAGGGCTAGCTCAT -CAAATTTCTCTTCCGGTTCTCAAACTTCCCTTCGTGACGAACCTTTAGATTTCGCAGAGC -GATGGTCTACAATGGCGGGCTCTCTCAAGCTAGATCATATTACCCAAACTACCCAAAACA -CTCCTAACACGCAAGACTCACGACCAAAAGACCAAGAAATGCCATCGTCACCCAAGGCAG -AAACCTTGGACAGCATAGAGAGTTTATCCCGAATCGCGCAATATCCCGATTTGCAGAGCA -CGAATCTTGTTTACCCAACTCCCCCGGACGGAGCTGCAGCAGTTGGGTTGAACAATACAA -ACCCTTCTGATACTTTTGCCGAGGACTCTGACTTTGGACTACTCCATGCAACGCAACGAG -ATTCAAGAAGAAATGCATCGGGATCGAATATATCACCCGTCCAGAATAGTGGCATCGGGG -TTGGCACTGGCCGATATGATGCAAGTGACGAGGAAGACCTCTTCGGAGAAATGAACGAAA -GAGATTTTGGGTCGAAAGGTATCACCGATGCGGACTTCAGCTTCTTTGATGACCCTGATT -TCGATGGCATGGACGGTGACTCTCGAGTAGAGGATGCAGATGAAGCTCTTGAAGCGTCTC -ATCCTCAGAGTGAATCTGAGGCCGAGCCCATGGTTGATGAAGAACCCTTCCCCGATCAGC -CACAGGGAATTTCAACTCATGTCGAAACAAATGAGGCACACGCATCCCCGGTACATCACG -AAGCCCTAGAGTCGTCTGTGGGGCCCATGGCCCCTGAGGAACCAGCTCATTCGCCTATGG -ATCGCGCAGGTCAAACGATCAGTCCCCCCTTGAGCCCAGTAGAAGTGAAAAAGATATTGT -TTCCAGGACCCGAAGGAGACGGTCATCAACAGCCTACAGATGACCGGGGCCAAGGGCACT -ACCACCCAGTGGCCTTCCAAAAGAAACTCGATGATTGGGACCAGAAGTACGGAGCAACTG -GCAAATTCTGGTTCTCTGGTGGTAGCTCGGTGGACACGTTGGATCAGACCTCTGCCATAC -CCACGATTGGTATTCCTCACCGTGACCGAGGCAGCGCCAACGCACCTGGCTCTTTGAAAG -AACGAGACAGGACCAGTCTCTCTTTAATTCACTCAGAAGGTGGTCTACGATCAGCATCGG -TGAGCAGTGACAGCAGCGATGACAGTATCGAAATCATGTCAGAGCACGTTCCAACGCCGG -CAGCCATGCCCTTAGTGCCCTCCCTGAAACGGAAACGAGCTCCTTCCGAATCTGACATGA -TGTCGATTGCATCCCAAGAGAAGTCAATGCCCGGCACGGAACCAAGTCCCGCATACGCAG -CCGAGAATTCTACCTTCCTCGGTAATTTCCTAGCCAACTTTTCTGATTGGACCTTAACCG -GTTATTTCTCGGCATTTCCACCTCAACAACTCCCTGCACTTCTTCGCCGTGAAGGTCAGC -TTGAAATTGCGCAACTCCTAGTCGACCAAATCACACAGTCCTCCCTCAAGCACCCACTCG -ATGGGCAAATGGGTCTTTTTGATCTCGAGGGCCAGTCTCTGCCACTACAAATTCTCGACG -ACACAACCCTCTTAGGCGAGGCATGTAAATTGGACTTCAAAAGATACACATCATTGCAGG -ACGAATTTAGCGCAAACCACCCGCAACAGCAACCGCCCCAACATCCGCCAGCGCCCAAGG -ATACCCCAAAAAGCTTCATTTGTAAGTTATCAGCGCCCCACATACGTGTGCGTCGAGGCA -AAGCATACTTGGAGGCTCTTCCCCCGGCTGTATCGTTTTGGGAAACGTTTGGTCTTGAGC -CTGCCCATGGACCAAAGAACATTTCTGCTTATTGCATCCACCCACAAGCTGCGTCAGAAG -CAGCCGATGTGTTCTTGAGACGGTTTGGTCTTCTCTACCAAAGTTGCAACCTTGGAACCC -ATATCAGAGGAGACGAGTCCGTGGCGTTTGAAGACGGGCTGAAGCCATGGCAATCTGAAA -CATCTAGTTATGAATCTATGATGCATGTACTGAAGAAAATATGCGAGCAGCTTGGTATGT -GGTTTCTCCTACGGTCATGGATCATACTAACAATTCTCAAGGCTCTGAGCTCTCACAATC -CTCAGCGAGTGCTGATAACCACGTGGTCTACATCATTAACCCATTTACGCATGCTGCGGC -ACTAGCAGATATTTGCACCGCCTTCTGGCATCTTTTCCAGCAACTGATAGCCGGTTCCGA -GCGAAGACAGACTCGTATTTCCAACGAGCTTGTTCTGCAAATAATTCCGCTAGAATTCGT -CATGTCGAGCGAGACGATGGTTATACCTCCGCAGACAGATTATTTGAATTTAGCCCTGGA -AGTCTACAGTCGATGTCGTCCAAGTGATGGAGATATGAGCCCTCTGCTCTGCGCGCCACC -AATGCTTCTGGCCGACGCTCTCCCGAGAGCCATCAGTTTCCGGCTTGCCCCCGAAAGATC -TTCGCCACTTCAAGATGGGCGAAGTCTTCACATTGCCTATTCCAAAAGCCTTGATCAACG -CTGGATATCGGTGGCATGGTCTGATCTGCCAGGTTCCATACAAAGAACCATGTCCTACTG -CTTGCGGTATCGCCAGTCGGGCGGCGCCAGACCAATTTCTGAGGTAAGGAATGAAATCTG -GGCTACGACGAAACACATTATGGACAAATTCCAAGCACGATGGAAAGTTCAACTGGCAAC -CACCGAGCCCATGGAAACCGATGAGGTTGAAGGTACCGCTTCTTATCTTCTTCAATATCA -AGGGCAAATCGCTAACACATCCACACAGCATGGGCTAGTCTAGCAGATCAGCACAATAAG -TCAAACCCCGGATCCTTGGAATTGACCATCCTAGCTGTCAACACTATTCCCGACTTGGTC -CTTGAGCCACCAGTTCCGCCCATATCGATGGCTATGCTGAATATACTGTCTTTGTCTACC -CCTGTCTCGACTCCTAATCCAAGTGCCAGCGTTGCATCCCCCGAACAATCAGGCAATGCA -GCCACTCCAACCAGCGCCGGTCCCGCTGCCTACAGCGCCCCCACGCCAACAGACATGTCA -CTTGAAACGGATTCAGAGACCGTGCTCACTGATATCTGCGATGATTCCTGGCTAGCCATC -CTCTCACACCGACTCAACAGCTCTCCACACCTCACCGAATTCCGACCCGCCTTATGCAGC -GGGTATCTCCTCCGCCGCAAGGGCGCTACTGATGGCGACGGCGTGTTTGCAATCTCCGTC -AATCTCATCTATTCCTCCCGTCCTCCTCCATCTCATGATAACGTCCTAAAGGATACCTTA -AGTATGTATCGGGACATGGGCTGTCTTGCCCGGGCCAAGGGCATATGCAGTGTACAAAAT -AATACACTGCCTTGGCATGTCGCCACAGCTCTTCGCGCACAAGAGCTTTTGAGTTATGTT -TTTTGAGCGAGGCATCTTATTACTGGGGCGTTATGGTTTATTTTTCTTGGGATCTCATGT -ATGTTAAGGCTTTTTCGGTGGTGGGATGTTTTATTACCTAGTTTGGGTAGATATGTATAC -CCTTGGAGGCAGTGCATTTGAATTCCCTTTGTTTACTGTGCATGACTGTCAAAGTATCGA -GAAGGTTGAAATCTTCTTCGCAATATCGATGGACATATCTTTACGTCTGACCAATGTGTC -CTTGGTTTTCACTTCTGATACAAGTTTGAAAAGATCTAAGAGATGGATTGAAGTTGTTTT -CGAGACTTTGTTAACTAAGGATATTTTCAAACTGTCAATAGAAATATGCACCAAATGAAG -TCCTCGTCAAAAGTACATTCAAGAAAGAAAAGACCCCCAACTCCCACCTGGAAACACCCA -AAAAGAAAAGAAAAAAAGATACATCGAAAGCAACGTATGTACTGATTGCTTCCAATCGCC -AACAATTGTACAAAAAAGGCGTGTGTCAACGTCAAAGACTCGTAACGCAGCTCGTCATCG -TGGCAGATAATGATGAATGCCATCAGAAAGGGTATCGTGTCGCAGAAAATGTCGTGGTCG -TAAAtgtttgtgtgtgtgatcgtgtatgtgtgcgtgGTCCCAAGAAAAAGAGAGGCAAAC -AATGCCTGTAAGGCTAGTAGACTCAAGGCTCAGGTCAAGGGTGCAGCGTTTGCTGCTACC -CTGACTTATTACCCATCCGCACTCTCTGAATCGGGCTCCTTCTTCTTGGAGAAGAATGAG -ACATGGTTGTTTACCGCCCAAATCGCAAATCCAGCAATTGCAAGAAGgaggaagaggacg -atgatgatgatcgcgatggtgatgttgCGCTGGACCATCTTGGGCGGGAACGATCAACAG -ATCGTGGTAGAAAAGTAGAGTAAATGATTTGCAGGATGTGTAAAGAGCTTTGATCACTGT -TTGTTGAGTTGGTAGGATAGGTAAAAACCAAAAGGCGTGGAAATCAAGATGGTAGGCCGT -GTACAGGGCAGCAGTGCGTTGGGTTTGTATACAGATATACAGCCTCGTTGGGCTCTGTTT -GCAACACACAACTCTCCTTTTTCGAGAAAAGCGAGATAAGAGATCGTAAGAATGTGAGAA -GGGAGAGATAAAGACAATTAGATCTGGAATGTGAATGGTTTTTAAACAGGACTTTTTTTT -CCCAGCCATGTGTTTTTAGAGGGGCTGGGGATGTGTTGTACTAACAGTGGCTAGGCACCA -CATGGAACACTTCATACAAAgtactccgtactccatactccatacctagtactctgtaTG -ACTTAGGAGTCAATTGCTCAAGTAGTCTCAAGTTAATTCGGCTATCAGATGTACACTATA -AAGCAGTTGTGTTGGATACATAGAACAGCCGTGCACATGTCAAGCTGCTTTTACTAGAAT -CTGAATCTTGTCAATCCTTGTTATCCGTCTAGATACAGCACTGAAGGGAACCAAAAAAAA -AGCACACAATGAAACGATATTCAAAGGCGGTGTCTAAACCTGGGAAGATATTACTGTGAT -GCCTCGGAAGTAGAGGTAGAAGTCCACCCAAGATGAGACTTGAAGGCCCAAGGCAATAAG -TATATGGTCACGTGATCCATACTGGTTACAATGGGACCTGTACTCGAGCAAACTGTTTTT -ATATTCTCTAGGATAAGATGGTATAGATATTTAATAATAGAAGAAAAAAAAACACCGTCT -GATGTAGAAATCTGTTGTAGTATCGCGACAGAAAATGCCAGTCGCATTCATAGACTTAAC -CCAGAATCTTCAAACCGCCCTTAGTTTTGGCGGATTTTTTGACACCATCGTGCTTCACCT -TTCCACCTGCTCCGGCGTTGTCTCCTCTGGGACCCTTGCCTTTGCCCTTCTTTCCATGCT -TGGGGTTCGAAGCTGGGCGAACCTCGGTCTCGGCTTTGCGTAGTAAGATTGGGTGTGAAC -CGATGTACTTCCCTTGCATTTCTCGTCCCGCTTTGAAGTAATCTTCACTTCCGCTGAAGC -TAATAAATCCATAACCTTTACTTTTCTGGGTTCGTTTCTCTCGGACGACACGCGCTTTCT -GGACCGATGTGTAGCGAGAAAATGCCTTCAGCAAGGACTCGTCCGTCACTTCGCCTGCAA -GGTTACCCACAAAAAGACGGAAGTGTGCTGGATCCCACTCCAAGAGAGTGGGGTCGGTCC -ATGTTTCACCTCCACCCGAACGAACGACTGTCTTTTGTGGCTCGGCCTGCGGGATAGGAG -TGCCGGCGCCGACAGTGGGGGTTCCAGCTCCGGTGTTTACACCTGAGTTGGGCTGGTGCT -TGGCAGAATCATCAATGGGTTTGTTGTAGGCGCTCTGCCACTGCGCAATTTGAGCTTCAG -TCTCTGCATCGAAGCCAGGGGCGTTGCTGTTCGGAGGGAACCCTGTGTTGGTCTGGTTGG -GCGTGGATCCGAAGGGATTCGCAATGTGTGGGACGGTCGGGCCGTATGTGTTGTCAGCGT -ATTGGGATTGTCCATAGTAGGAAGGCGCGCTCTGGTATGCCTGAGGTTGCTGATAGTGAT -TGACGTAGCTGTTGGCGGGTGTTGCGTATCCGGCAGCAGAGACTGGAGGATGCGATACCG -AGTGGCTACTGGCGCGATGTGGTTGACTAGAGGCTACTGCGCGCGGCTGGAACGCAGTGA -AGGCGCTGTAGCCTGTTGGTCGAGATCTGCCATGGCCTGCAGCATTGGCGGAAGGATGTG -CAGTGTTGGAGGATGGAGGTGGGCGGGGCGGAAGCGACGAGGGAGTCTGTCTGAGACCTG -GAGGAGGCGGAAGGGACATGATGAATGGGATATTCGAAAATGAagacgagagaaatgaga -cacgcaagagagagagagagaATATGCTAGATGCAAAGTAGCAACTGCAACGTACCTTCA -GGGCCTGACTGACGCCGCAGTCAGTCGCCAGACGTGACGCTTGAGATAGGCACCCGAAGG -ATGTGCACTTGCAAGAGGCGAAGCGTTGTGCGGCTTGCAAATGATGACTATGAGAAGGCT -GGATGATTAGTAGAATGCGCGTCCTCAAGAACTGCGCTGGATCAGGATGCTCAATGAGAA -ATGACTTTTGTATCTGAATGCGCTGGCCAGGCCATGGCGCTGTGTCACATGACTCGATCA -AGCTTTTCTGACTCAGTAGTGACATAAGCACACTGCTGACTCATGGATCAAATGTAACCC -CCACTCTAGTCCCAAAGCGGCCTAGCCTATACGTAGCGGGCTATGAACTCATCGGCAAGT -GATTTCTCCGTTTCTACCAGAACAGTTCACCGATGCGTGGACAAGTTGATTCGGATGATT -TCGATGGATTACATAGACCTTCGGTCCAATTTCTGTTTCATCATGATCTATATCGGATAC -TTAATACCCTACATGTACATATACACATGTCAATTCTCAACGTTCTCTAGCTACCCCTCC -ATGCCCCGCTATTGCATAGAAAAGCTATTCCAAGCACATATAATCTATAAGTCTATCTAA -GTATGTAGAAGGGAATTGAAATCGGGTCGGATAATGCTTTTGTAAAAGTGGGTTGGACAC -CGGACGACTCTGCTCCTGTATCTAATACTTGGCGCTACACCGGAGAGAGTAATAACAAGT -GAGGTACACGTTACTCCGGTATGTCCCACCCAAAGATGACGGAAGAGTTGCTCCGTCAGC -CGGGCCActgctacttcttcttctttcccacccctccccctcttcatcttccttttcttc -tctagatctttttcCTAAACTCACCCTTATCTCACATCTCATCGTCATGGCTGACCAAGC -TGTGTAAGTGATCTTCCCTCCCTCTTCACCAATTCCAGAATCCCCGGTTACACCTCCCTG -CATCCTGGGGAGAGCGCATGGATCTTCAATGCTGCTCAGACATCGTGCAACCGCTCTATT -TGGAAGTCTTCCGATAGATGCTCACGCAAGATGATACCAATGGAGATAATGCCCTTTTTC -GGAAGCCCACGGCTGTCCCTTCGTTCTCATTTCCATTGTGATTTTTGCCCAGCTATTGAC -AAGAGACATTGATCGCTGACTATAGCGCCCGTCTCGCCGGCATCAATGTCGGTGCTCCGG -CGCATTTTGCTCCCAGTGCTGATTTCGGCCTCATCGGTCTGGCCGTTATGGGCCAGAACC -TGATCCTGAACGTTGCTGATCACGGATTCACCGTTTGCGCTTTCAACCGCACAACTGCCA -AGGTTGACCGTTTCTTGGAGAACGAGGCCAAGGGTATGCTTCACTGAGAATGACAGCTCC -GGATTCCTTTCCAAAAATGCTCATAATAATACAGGCAAGTCCATCGTTGGTGCTCACTCC -ATCAAGGAGTTCTGTGCCAAGCTCAAGCGCCCCCGTCGCATCATGCTCCTCGTCATGGCC -GGCAAGCCCGTCGATGACTTCATCGAGTCCCTCCTGCCTTTCCTCGAGAAGGGTGATATC -ATCATTGACGGCGGTAACTCCCACTACCCCGACAGCAACCGCCGCACTCAGTACCTCACC -TCCAAGGGCATCAGCTTCGTCGGCAGCGGTGTCTCTGGTGGTGAGGAGGGTGCCCGTTAC -GGTCCCTCTCTCATGCCCGGTGGTAACGAGGATGCCTGGCCCCACATTAAGGACATCTTC -CAGAGCATCTCCGCCAAGAGCGACGGTGAGGCCTGCTGCGACTGGGTCGGTGACGAGGGT -GCTGGTCACTACGTTAAGATGGTCCACAACGGTATTGAGTACGGTGATATGCAGCTGATC -TGTGAGGTAAGAGAATCTTTCAAGCAAAATACGCTCAAGCAACCAATTGACATTATTTAG -GCCTACGATATCCTCAAGCGTGGTCTCGGTCTCTCCTCCAAGGAGATTGGTGATGTTTTC -GCCAAGTGGAACACCGGTGTTCTTGATTCCTTCCTGATTGAGATCACCCGTGATATTCTC -TACTACAACGACAACGATGGCACTCCCCTTGTTGACAAGATCCTCGATAAGGCTGGCCAG -AAGGGTACCGGCAAGTGGACCGCCATCAACGCTCTCGACCTCGGCATGCCCGTCACCCTG -ATCGGTGAGGCTGTCTTCTCCCGCTGCCTCAGTGCTCTCAAGGACGAGCGTGGCCGTGCC -AGCACCATCCTGAGCGGTCCCACCCCCAAGTTCGAGGGTGACAAGCAGGCTTTCGTCGAC -GACCTTGAGCAGGCTCTCTACGCTTCCAAGATTATCTCCTACGCCCAGGGCTTCATGCTC -ATCCAGAACGTAAGTCATTTCTTGTCAGCCTCACTGATAGTCAATTTCTAATCCTCGTCT -TTGATATAGGCCGCCAAGGAGTACAAGTGGAAGCTGAACAAGCCTTCCATTGCCCTCATG -TGGCGCGGTGGCTGCATCATCCGCTCCGTCTTCCTTAAGGATATCACCAATGCTTACCGC -AACAACCCCGACCTTGAGAACCTGCTGTTTGACGACTTCTTCAACAAGGCCATTAACAAC -GCCCAGAACGGCTGGCGTAACGTGATCAGCAAGACCGCTCTGTGGGGTATCCCCGCTCCG -GCCTTCTCTACCGCTCTCAGCTTCTACGACGGTTACCGCTCCAAGAACCTGCCCGCCAAC -CTGCTCCAGGCCCAGCGTGATTACTTCGGTGCCCACACTTTCCGCATCAAGCCCGAGAAC -GCCAGCGAGACCTTCCCCGAGGGTAAGGATATCCACGTTAACTGGACCGGCCGTGGTGGT -GACGTGTCGGCCTCTACCTACATCGCATAAGCGTGTTAAGTCGAGGTTTGTTAGAAGGGT -AAAAAAATTGGATGTTCAATGTATTTAGATGTCTCCATGGAATGCCCATGATCCGTAACG -AAATCATTTGTTTCTTTCTTCTATCTATTTTTTTTTTCCATCTACGTTTCAAGTGGCCTC -TGACCAGCACAGGCTGATAGGAGGTCTAAGGTGCGCATTCAAACTGACAATAATCACACT -GGCTGTAGCTAGAGACGAGGCACTGTCACGGGAATTGTGACATTTTGTCCATAGAGATTT -TGCAACCCGAGTCTTCCCCCTGCAAGAGTAGATCACGAGAATCTATACAATGTAAAGTAT -ATTTGCTCCCTGCTTGGCGATCTTGGTCCACTATACTCCGTACTTGGACGGATTTGTCTG -CAATGACGTGCATATTTGCATGTTGATCAACTGCATAGATCTTGTGAGTTGTTGCAGAAT -TTCGCTGCATATAAAGCACTATTTTCGTCATGATGCTGACTATTAATTTGAAGCCGAAGC -CAGAGCAATAGTCAGGCAGCATTCTTGCCAGGAGAAGAATTTTCTCAATATAGAGCTGTT -TTACCTTCATTGTCTTACAGAAATATGTCCAACCCGTTTGGAGACTCTTAATATATTTTT -CCAACCTAGAGCCATACTAAAAAACAAGATTGTCTTCATGTCACACGCTTGGCCAAATCA -AGAGTCTACATATAAATGACTATTATCTCAACCTCCCTAGCCGATCAAAAAGTTCATCTG -CGGCGTTTAGAGGCCGTGGTCGTTCAGTTGGTATTCTGTTCACTAATCACAACTTAGTAC -AATTATCGCAACGGATACAAACAAATGTTTCAATTTTTTTAATAGATAATTTAGCTCTAC -CAAGTTAAGACCGCCAACATCATCAAATAGATCAAGAATCATATAGTCGTAAGAAAAAGC -GTCGGTCGCCAAAAAAAAACCATCAGTTCGTATCAATCATGCAGGGGAATATGCTAGATT -AGCAAAGCAAATCCCAGAACTTCGAAAGTGAAGTGAAAGCGAAGAGAAAAGAACAAGAAA -GGGATAATCAAGACGCCCAAAACGAAACAATAAGACCCAATCTCTATGTAAATCGCAGTA -ATCAATCTGTGATTGGAGTGTTCAGTCTATGAAATCACTGGCAGCCAACCGCGCAAGTAG -TTGTTGAAAATCAAAACAAGGACCAATTACCTAAGAGAATCGCACTGTTATAAGATAGAT -GTTTGAAAGGTAGGCGAGGGAGGTTGAGATAGAATAGAAAACCAAAAGCATCAATAAAAT -GACCATGTATACTTATACTTCACGACCAAACATGGAAGCCCGGGAGCCCTGCTCCCCGTC -TCCCACTACCACGCCGGGACCCAGCGGCTGATTCAGCCCCACCGTGATTCCCATGTCTAC -CTTCATCCAGGCTGCGCCGCGAGGCGCCGAAATCACTCATGGCGAAAAGCAGCGGCTCGT -CGTCATCTGGGGTTTGTGCGGACAGCGTCCGACCCGTGCCTGGCCGCTGTGCAGAACCTC -GACGGATCTCTAACGTGGAGTTTCCGCTGTTGCTTCCGCTGGCGTTGCCATCGCGGTCCT -GGTCACGTTCCGCCAGGTGCGGGGCGAGATTAGCACCTCGGGCGAGGCTGCTGGATGCTG -AGCTGAGGGAATGCCCGCCCGAGTAGCCGCGGCCACGGGAGCTGGCGAACCGGGGCTGGT -AGACATGGGGGTTGGAGGAGACTGAGGCGCTGGTTGGTCCAGACATCTGCCCGCGGAGGG -ATGCACTGTCCCGATACGGGCCCGTAGCAGAGCCTGATGGGCCACTTGGGTCCTCGGCCG -GTGATTGCTGGCTTTTCTGGGACTCGTTCTGCCGTTGAGTAGCACTCGGAGCGGTGTTTG -AGGGCTCATCGGCCCCCAGACTGAGGCTGCGCATGCCGAATGGGAAGATTTCATCATCTT -CGCTTGTAACTATCGGCCGGCGCAGAGCTGCAGAGCTCGAGCGACGGTAGTTAGGGTCGA -AAATCCGCGGCGATGTTGGGATGTCGATAGCACCTGCGGTAGAGGAAGGTCCTCGCGTAG -TTTCCATGCCAGGATGTTCTTCCAGCGGCGAGTCGTGCTGTATACGAGGGATCCTTGAAC -GGTGGTCGGTTTCGATATCATCCGCAACGCTGTTCGAGCTCAGCCGAGAGCGGATAGCAG -GAGTGTGGGGAGTGTGGGGAGATATGGGTTTCCCAGGAGAAGAAGCTGTGGAGATGGAGG -TGCCCGCAACCATTGGTGGGACTCCTGATAGTTGCTTGCTCGAAGAAATGGAAGAGCGAT -GCATATGCATGGACTGTGACATCGAGTCGGACAACGCGGCATTGGAATCTCGCATACCGC -GGAAGCGGGCCAGGGCTGCTGCAGTGGGGTTCGGCTGCCTAGGGCCAGGCTGAATAGATG -CGGAGGTAGATGAATTCATTAAGTCCTTCTTCGAATCAAGCATCTTTAAGAAATCTGATA -TATTCTCGTCATCTGCATGAATGGACTCCGCACTCGTGCCTGTAGCTTCCGTGAGTAGGC -CGGACCCGGGCTGTGCATTGGATGATGTCGCACTAGCTCGGCCACTGGAGTTGTCGTCTT -CTAGTCTGTTTGCCCCCGCAGAAAGGCGGCCTCGTCGGTGACTGAAGGAGCTGCTGTACC -GGGATATCGGAGCAGGTTTGGGGGATGCGGAATTAGAAAATGATATAGCTTGCTCGGATG -CAATGTTTATAGGTCTGCGCGCTGAAGCAGCAGAAGAAGGGGGTGGCATGACACGCGAAT -CGGCAGACGTTCCTGTGGGCATGCGTGAAGACATGTTACGGGGAGACACGCCTAAGGGAG -AGTCCGCTAAAGCTGGGGAAGCAGAAAGAGGGGGGGCTTTGAAGGGCTGGAATGAAATGG -AAGGACGGCGTGGGAAACTTGAGCCTCCACCTTCGCCAGCAATCTGCGCAGCGCGTCCTG -AAGGAACGACTTTGGCAGGGGGGAGGAGTCTCTTAGGTGAGTCGGTCGGGGACGGAGACC -CCGCGCCCGAATCTCTCATTGCACGAAGTGCTGATATAGGACTTGCACCTGTTGTTGGGC -CAACCTGATGGAAGGTCGAGAGGCTGCCGTATGCACGTGTGCAATCTGGATTCTCAACAG -TCCTTCTCTCCACTGGCACGGAACCAACTTCAGGATTTGGGCGGTTGATATCATCTGAAG -GCAACGATGGCCGAAAGATCTCGTCGTCGGCACCCATGAATCGTGAGCTCAGGAGCGCTT -CCGAGTCATCAACGCGGAAATCGCAGTTCGTCCGATACGTGACTTGAACTGAAAAGGGAC -CTGCGGGCGACTCTGTGACCCCAAAGCCGTAGGTGTCAACCACTTTTTCGCTTCCCTCAC -TCAACGGGGCTGTCAAATGATCCAGCGAGAGGTCATCAGGAGCAGCGCTTCCATCTACCA -CACGGTACTTGATCTGGAGAGCTGGACTCTGCCGCAACTTTTTGTTGCGTTTAGAGAATC -TCCATGCAGGCAAGAATTTGGAGTAGGTAAATAAGGATCGGAAGAGCACAATGCTTTTCT -TATACACAGTCGGCAAAATAGAGCCCAAATCCGCCGGAAGCCTACTTGAGGACTCTCCCA -GCTCGATTCTCCACCGCTCCAATATAATCTCGTCATTTTCCGATTGGTATGGCTTTGCAC -GGGCGCCCGCAAGCGCCGCGAGCGATTCGCGCACATCCCAACGCTTCCCATTCTCATCAA -GAATCACCAAGCTTTGATTATTTGTGAGCCCCTTTGTGTCTAGGTATGTTTCAATAACGA -GGGGTGGTGGTCGATTGTCCGTTGCATCGCATGTTCTCCAAGGTCGTAGGGGTTCCCGAA -GGGCATCTGTATCGTCTAACTCGACATTGAACTGGAGCGACCCAAATAGAATTAGCTTCT -GTCACTTTTATATGGTTGGCCGTTCCAAAAAGCATCCCAGTCCCGATCATGAGATGGAAA -CATACCCAGCGGTTGACTCTCGGGGACTCAGAGCCCTTGTTATAGGAGGGGGGCAGTACA -ACTCGTGAGTGCAGGATAATCAAAGCAGCTTTTGTATGATAATTCTGTTGAAGCGAAGAT -TAGCCATCAATAAGCCATCCTGATAAGCCATTGGAGGGGCGATCTGAGCGACGATACCGA -TATTATTTGATTTAACCGGGTCAGGGCCTCCTTTCCCAGTGCTGCACTCCTCCCCTGCTC -AGACGACTGAGGCCCCGGCTCTATACCCAGCCCTCTTGTAGAATTAGATCCCATCTCGAC -GGTTGGGGAGCTCGGGGCGGGTTCTTGTTCTTTACGGTCGTCCCGAGCTGTCGGTCTGAG -GGGGGGATTTGGCGCCGGAGAGGGCACCGCCGATGATCGGGGATGTTGATGCATGATGGG -CGCCTAATCGTGGGCGCGTGGTTAGCGACCATGAACACGCATGTGAAGAAGAGGAGTAGT -CTTACCCTGGAGCGGAAGTGCTGTAGATTCAGTGTTATCACCGCCGGAATCTGTTATCAG -GAACGCAATCGGGCAGCGATGGTTCCAGTACTGAGGGCCATGCTAACAGGCGATGAGAAC -GATATCTTCTAAGGACGTTCCTGAATCTGAGACGCTAAATGATTCACGATAAGACAATGG -ATGCCCAGCATGTGGTCTAGGGTTGTCTCCGGCCTGGGACCTTCGGAGCAAGTCTAGAGC -TTCTCCGCACGGGCAAATGCAGTTGAGAGGAAATGGCCGGGTGAGGCGTAGAGAAACGAG -AGATATATGAGAAAGTGAAACGGAAATGATAAACAAGATGGATGTGGAGGGAAagagaga -gagagagagagagagagagagagagagaAGGGGTGTAATAATGAAGATAGTTCATCTGTG -ACGCACATCCGGTGACTCGACAGGTCTCGAGTGTCCTAGTCAGCCTAGAACGCCACGCAG -ATGATGCAAATACGGGGATTTGCTGCCTATATTCTATACATTCTAGGCTATTGTTCTTTT -GTATCTTTCTCTTTAGATTATGTAGGTAGAAAATTGTTCAAAACACGGTCGCAAGGATAC -CTAGACAAGGTACCCTGGAACTGATAAGCACTTGATCACGACTTACAAGGCGTGATATAT -TCACATCCATATGGATGTGTGTCCACATTAGGTATCAAGGGCAGCTGATACTGATAGATC -TATCTATGTACTTTATATTCGGAATTTCCTACGTTTCCCCAGATTTCATTGGAGCATGTT -ATATGTCTGATCCAACGTCATAACTTCACTTGATTATAAACATTATGTATTGAAAGAGTT -ATATAATTCCATACTTCAGAGCTGAATAACTAAAACAAAGAGATTGTCGGAAAGTGACCT -ATAATGTGCGTTATATAAATATTCAGCACTCGTTGTGGCCAATATTTATTGATGTCCGCA -TGCGCTCAAAGGTTGGTATTGAGCACTTTAACGGGGTTGGCCTTCAACTTTGAGTCTTGA -TCGCCCAGGTGGAACAGCCCATCAATGAGCTTCACCAGAATAGGCTCAAGGTTCACACCG -AGAGCAAGCGGATTGAGAACACTGCCAAGCTCAGTATTCTTGGTGCAGCCAGCCTTGGTC -GTCCCAGCAAAGGCATTCTGGACAAACTTGATGGCATCCGGAATGGCGATGATTTCAGTG -GTCGCGTGACCACCGCTGGCGTAAGTAGTAAGCTTGACGTTCGCATCCCAGTTGCACCAG -GAGTCAACCATCTTTTTGGCGTCGGCGTAAGGGATGATTTCATCCTTTTTGGCGTGGTAC -ACAAAGGTTGGTGCCGTCGGAGTCTCGTCCTTATTGACACCCATGGTATTGTGGCTCAAC -ACCGCCCCAATGGTCGGGTCGAAGATGAAAGCATCGCCCAGACTTTGGAAGCTAGTGTCA -AGGATCGATTTCTCGAAGAATGCTAAGAGATCCGCAGTGGCGCATTCTGAGGAGGCGGTA -TCCAGTGCCTTTTGACCTTCTGGGGTTATAATCTCTTTGATGAAGGGGATCAGCTCGGCA -CCGTAGGCACTTGGCCTCGAGAGACCAGCAATCGCTGCGGGGAGGAAGCCACTGAAGGCT -GTATTGTCAAGCTGATACATTGTGCCGGTGAGGTTTGAGGGAGTTCCACCCTGCACCCAG -CCCTTGATGTTCAGCTCCGACGCGTACTTGGGCTGCAGGGAGGCTGCCCAGCCGGTAGCA -ATGGCACCACCTGAATAGCCGACACCGACAACCATGGGGTTAGCGGCGAAACCCAAGGTC -TTGAAGTTCTCCACGGCACGGATACCATCCAATACACCCATGCCTGCGAGACGACCAGGC -CCAAAGGCAGCCTCGGGGCCCTCATAGTCGGGAGAAGCGACGATATAACCGAGAGCAAGG -TAGATCTCAATGATCAATAACTCGACCGAAGAGATCAAGCTGTCCTGTGGCGCACCGAGC -TGGTAGCCATAGCTAGGCTGGCATTTCGTCGCGGAACTATCATAGGCCGTGGCGAAGGAG -ACCAAACGATCGAGCTTAGCGTTCTTCGGTTTGAAAACCGTAGTGACTGTTGCGATGGCC -GAGCCATCGATTGCAGTTGTGCGGTAAAGCAGCTGATAGGCTTCCACATCAATCGGGACG -AGGCCGAAGAACGCGGCACTGATCTTGCGTTGACTCAGGATTGTCCCAGGCGCCTTGGAT -GCGAAGTCGGCAGCCGGCTGATAGAACGGATCCGTATCAGGAGGCTGGACCGTGGTGCGA -GGGACGGGAGACGCAGGCAGTGCGGAGGCAATTGACGTGAATGCCATCATGCCAACCAGG -GCAAGGCGCACCATGCGCGCAAGGGACAATGCCATCTTACTGCTCTATTTATGAGAGTCA -GGGTTGTGATGCTGCTGCTGCACGAGAAAAGGGAGGGATGATCTTCACATTATATACTTA -GATCTAAACATGACTGGTCTCATCCTGTGAATGTGATTGATATCTATTACCGAGGCTGGT -GAATATGGCGATGATCATCACCTAGTCCCCGATGAAGGGGAGTTACCTACTCCGGCCGAG -ACCTAGCTGTCTAACGCTCTGCAGATCCCTTTATTGATATGTTACTCCTATATCATTAGC -CCCCAGAATGAACACAAGAATTAGCAGATTATTCTGCGAGGAAGCTTTACAGAGACAGAC -TCATCTATTTACCTCTTTTTTCTTCTTCAACATCGAGATGAAAGCTGAAGTCCAACATTC -TTTCAACCTGGAGGATCGACACTAGCCCCGCACGAACTTGTTTACCGTCTGATCAGGCAC -ATCCAATACAATAACCCGAGAATATCCCGAGGCAGATGAGACATTATGGCGGAGAAAGCC -CCTTCTGCAATGGATACGTCGTGGATACGCGTTGGGTCAGAATAAGCACGTCTGTCTTCG -TATCCCTATACGGTGCACCGCTTCCATTCGGCGTTTTATTAGCGTTACCCCTCATTATGC -GGAGGATATCTTGCAAGAAGCAGGTACTTGCATAACGTGCTGCCGTTAACTTTTTGTCAT -AGGCGCTCCACATAAAAGGGCTTGGGTTGCATTCTTCGTGTTAATATAGGACGTTGAGAG -GCAGATGACCAGTTCCCGTTTTGGTAAGGGCGGTGCCTAGTCAGAATTGTACGTTAGAAT -GGGATATAAGACCGATTGGTAGAAGAAACAGATGCAGAAAGTATACGTTTCTCATGGTCT -ACGGATTTTCACAAATCACAATATACTATAAGGTTATACCCGGATGTCTGAGTTCCCGGT -CCTGTCTCATGGTTTCTCCCCTTAAAACGGAGTAGTTCTCCCTATTGACCCGTTAGATCC -CAGAATGACGGCTGGGGATGCGCTAGTTTCACCCCACAGTTGATCATCCCAGCTATTCTA -GAACTGTAGGTTAAGGCACTTGTCTCGGTACTTTGTAAGCTTTTCTATGCGCCCTGCACA -AATTTGTAATCTTGACAGGCCCCAGGACTCTTTCGTCTTATTCTTCTATTTTTCTCAACA -CCGTCCAGTGGTCGAAATCATTTGCTTTTCTCTTAATCTTGGAGTGAAGAACCGTTTAGT -GGAGAAGCTGGAGCTAATTCTTTGTTTAGCATGGAGTGGATCACCTACTCCGTACTCCGT -AATGTGACCTACATTGCAGAACGTCCGGCGGTAATCCGGATTCTCGGCTTGATCGGTGAA -GCCGAGAGAGTTTGCTAGTCATCGATAGTTGTCGAACCCATGTATGAAGCCTGCAGCGGT -AGTCTGAGACATGGCTCCACCTTCGTGTGAAGAGCTTTTGAGTTTTAACGCCGTGATATC -TGACCTAGTCCTATCGTAGACAGCTGCTACCGCTAACGATGCTACCATCAAGATCACTCA -TTATCTATTACTCTGATTGCTTTCCCCGGAGATTCACTTTGAGAATTATTCGTTGCTCAA -TTGGCAAATTTACCTTTGTCATCATACAAAGGTAAATTAGATCATGAATAGGCTAAAAGG -CTGCAATATCATAACATGGGGCTGTGGCGCAACGGTAGCGCGTGCGACTCCAGTTCGCAA -GGTTATCCGTTCAAATCGGGTCAGCCTCATTTTTTGCCACTTTTTCAAGTAGATAGTCAC -TTGCGCAGTTGCAAACCCCATTTTTTCTCAAAGGAGTCTCTAGAACAATTGATGTCTTGC -CTCAAGCAAAGCGCTGACATACAGGTGTAGGTAGCCTTTAGAGCTCATCCTTATCCGAGG -TGAAGTCGGACTTTGATACAACGAAACGGGTCTTTTTGTTAATAGAAAAAGGCCCTCGGA -AATGACGTAGATGCCCTCATACCGCGTAGATATACACTTAGCCTCCGACGTACAATTATG -CCTTGAAGCGCAAATTATTGCGTGCGTAGAGACAAGACGATAGATATGGCTGAGTATGCT -TAAAATACACGGTGGTATATATATTTGACGTGAAAAGATGGAATTTGAATAGCGCCGTAA -AGCTCATATTTTACATATTCATATTTGACACACTGAGCGTGGGAAGAATATTTCCGAAGA -TTTCTCAAGGACGCCATATACGAGTTTCCAAAGAGATCTGAGGAACTTTATCGAAGGGCA -TGCATAGTGATGAAATCCTTCATCGAAGCGTGGTAGTTCAAAAGGCACATGAAGAGGACG -AGATCTAGATCTTGGGAGGTTACTAGAATATAAACCTGATTCAACTCCTACTTCTTTCTA -TTTGTATAATCTTCTTCTTCCCCCAGCTCTCATCCCTCTTTTCTTCTCTCTTCCTCACCC -ATCCAGCAGAACAGTCCAAACCAAATTTTCTAGCTTTCACCTATCTCCACTCGCCAGCAA -AATGGCGATTATCTCGATTCCAAAGTTGTTAGGCCAAGGGCTCGGTGACCCAGTGTTGCT -GAATAAGATTGATTGGCTCCTAGCTTGCAACATCTGTGAATACGTTAAATTGCCTCAGCT -TGTTGTGGTGGGAGATTAGTCTAGTGGGAAAGGCTCTGTTCTTGAGGGCCTTACAAGATT -GCCGTTCCCCAGAGACAGCGGGCTGTGCACACGATTTGCTTCTCAGATCATCTTTCGGCG -CACCAGAGCCGACACGAAAAGAAGCATTAGTGCGTCTATAGTTCCAGCATCCGATAAGCC -CGTGGAGGATGGAGAGCAACCAAATGGAGTCTCTCAATTTCGAATCCTTCGCTCGATTAA -TGCACGAAGTAAGCTCTCCTACTAGAACAAACGATAGCAGAAGCCTTCCTGTACTAACAT -AGTCTCTTAGTCTCTATAGGTTCACCAGACTATGGGTTTATCGAGTTCCAAGGACGAGGT -ATTCAAGTCCACCTTTACCCAAGACGTCTTCCGGCTAGAAATTTGTGGCCCCGAAGAAAA -TAACTTGAGCATCATGGATGTGCCAGGCAGTTTGAAGAACGCCACAGATGGTCTCACTAC -CAAGAAGGATATGCAGATGGTGAAAGATATGGTCTTCAGTTATATGCGTAACCGTCGCTC -TATCATGCTGACCGTGATCTCCGCAAATGTGGACATTGCTACACAAGAGGTATTGGGGCG -CGTGAGTGCGATCTCGAGGGAAGTCGCACACAGGGTGTAATTACCAAACCTGATTTGGTG -GATAAGGGCGCCGATGAGAAAGTCATCGACTTGGTGGAAGGCAAAACCCCTTCCTTGAAG -CTGGCTGGATTGTTATGCGCAATGCGGATCAGCAACAGCTACTTGACCAGAGCTCTGACC -AAGATTTAGTCGAGGTCCAGTTCTTCCGCGAAGCTCACCCATGGAATAACCTTCCGAGAG -ACAAAGTTGGCATTGCTGCCCTCAAGATTCGCCTGCAAGATGTGCAAGCCATCCATTTTC -GTCGAGAGTTTCCCAAGGCATGTCACGTTATCTTACTAGTTTATTCACAGATCTGGCTGA -CTACATTGACTTGCAACCATAGTCGGAAACGGCATGGGTATTTTCAAGTCTGACATGGAA -AATCATGGATTCGAGTACATATTTCTTCATTGGGTAACAGGAGCACATCAGCCAAATTCA -GTGCAAGTTAGGTctgagtctgtacctgagcctgaacctgaacctgGCAGCGACGGCAAC -GATAATGGAAATGATGATGGCGAGGACGATGGGGAGGTTGATGAATATCCTGTTCATAAG -AACATGAATGCTTCAGATCACTGTGGCGCCATTGATGGTATTCTTTTCGAACAGGAGCAT -ATGAAGAACCAACAGATGACAGCATCCTAGTATGGATCGAGGACCAATATCGCGCTTCAC -GTGGGTTCGAAATGGGAACATTCCAGACGTCGCTGCTGAGCACCATAATGAACAAACAGT -CTGAAAAATGGACTGAACTCGCGCTCGGATATGTCAGCGATGTGATTGATATCATGCATA -CATTCAATCTCAAAGTCCTTCGGTATATATGTCCCGAGGAGTGAATCCGCAACAAGTTAG -CGAGTGTTCTCGTTGACGAACTCTCCAAGAGATACAGGGAAGCAATGGACCAGGTCCGGA -TGGTTCTCGACGTCGAGCGCATGAACCTGATGATGCTGGACAACAAGTTTCAGGAAACCC -TCGAGCCAACTCAAAAGACAAAGTGTATTCGACTCACGCGTTCAATCTATGGGTAGAATA -CTATACTAAAACCATCATACCAGCCACCGAGAGCAGTATGAGATTATAAAGGGCAAGAGA -AAAGCCCCAATGAGCAACACCCAACGGACCATTAGCTACCTTCATGATATTCTGGCGGCC -TACTATGCAGCGGCATCTAAGCGGTTTGTCGATAATATCTGCATGTAAGCGACCGGCTAC -TGTCTGCTCACCGGACCACGCAAGCCGCTTGGACTTTTCTCGCCCCAATTTTTTGCGGAC -TTGACGGAAGACCAGCTGGCTGATATTGCGGGGGAAAATGCAGCACTGAGCCGGAGACGT -ACGCAACTGAAGAAAGGGATCCAGGACCTGGAGACTGGTCGAAAGATCATGATATAGATC -TGATTCGGTACCGACAGTCCAAGATACCACACAATATCTACATGAGGTAATGGAAATGAA -ATGGAATTTCATACATGTCTAGCTCGTAAATACATAATAGCGCATCCTATCAGGTCCCCA -CAGATCTGCCCTGGGTGACCCTCGCCTCTCGTCCGGGGGCATGGAGGGACATGGGCGTCG -TTATTGGTCAAACGGGCTCTATGGAGGACGATAACGTCCATGTAGACATAGTATAGTATG -GACTTGCACACGGAGTAGTCTGGACGGCTTTATTGGCTTCCCTCACGACTACTTCTCCTT -GGACGAATACTTTCTACCTTGGAAGTCAATTTTTAAGTATGATCCATGGGAATAATTGTG -AAGTCTATGAGCTTTAGATGTCCACAATATTGAAATCATACTCTCGACTTGATAAAGCGA -CGCTTTGATACATGTTCTCTATACTTGCTTGTGATTGAATTGCTAGATATTTCATCCCAA -AGACATATTATGACACATTTATAGTAGTTACAGGAGTCTGAAATGACCCAGAGTCATTGA -TCAAGATGTACAGCTGCGTACTGGATATGACCATATATATGCTTTTTACCCACACGCCGT -TGTCGTCAACTGTGCGGGCGGACCCACGGATGACATCGAGGGCCAACCGGCCGGATTTCT -GACAAGTGCGATCCCCAGGAGATCTTGCTTTTCCCAACCATCTAGACACCTCGAGACGTA -CATCTTCCAACTCTCAACCTGCTCACCTTTCGTTCCTCTTCAAAATTTCGAGTTGCCCCG -CCAAAGCACTTTCGCCACACGCGCATACCACGAGCCCACAATCGATTTCGCGTCAAATCC -GCTTCAGAAAAATTGTGATATCGAGATCGAGATCGCGTCGACCTATATAGCGTCGACTAT -TCTTTTCCTTTCCCCTGAACCGGAGCTCTCTTATCACCTAGCCCCCCCTTCGTCGCGGTT -CACCCCACTGTGGTGACGGCGACGCTCAGATTGGGTCTGATAAGAGATTCAGCGCGTCAA -CCACTCCGGGCACATGAGTAACAGACTTTGGGAGTGTTTCATTCAAGATGATGTCGAGCG -GTTCCAGCGGTTTCTGGCGAACGCGACGTTCGCGGGGCCGCGAGCTGCAGGAGGCTCCGG -GGGTCCCTCCGGAGCTGCGACAAACCTATCATCAAAGGCTGGAAGCCCCGGGTCCATGAT -CGCCTCGTCTCCGAATCCATCGAAAAGTAAGAAGAATCTTGAGTCATCACTTGGGGCCTC -TGTGCCCGACCGGGTGGCATTCTCGCGCGGGAGCGCGACTTTGTCGCGCGCCGATGTTAA -TGTCCGCGATCATTACGGCCGCACACTGCTGCATCATGTCGCTTCGTCCACAAAACCTAC -TGCCTTAAAATTCGCACGCGCTTTGTTGGATGTTCCGCTCGTCGATATCTATGCGCAGGA -TTGGGAGAGTGGATGGACTGCTTTGCACCGTGCTCTATATGCCGGCAACGCCACAATCGC -TCTGGCGCTCATGGTGCGTGACGTGCAGGACATGACAAGCTTCAGTAAAGGTGGAAATAC -GAGCCACTCGAGTGGTGGCCTGATCAAGATCAAAGACCGCGAGGGTTGCAGTCCCTTTGA -CGTCTTCGGGGCGACCATTAGAACTCGGGACATCAAGCATCTCTCTGAAAGCTGGCCGAC -CTCAGGTATTGATGACGATGCCCTGGGAAGCGATGCTGCTTCCAATGCTGCTGCGTCGAA -CTTTGGAGATGACGGAGAAGACAGGTTATACGCTCCTAGGACTGCCCTGAAAGGGGCTGT -GAACATTTCCGCGGATGAAATTTTCACATTAGGAAGCAACAAAAATCTGAACCTTGGGCT -AGGAGACCAGGATGACAGACAATTTCCCGAGCGAATCGCGTTGAAGCGGCCCGAGCATCT -TTTGCATCGATTTTACCGTGAATACCAAGAGAAATTGGAACATCTAGGACTTGAGGGTAG -TGTTCCCGAGAGCCAATCAATCGGGCTGCCTACTCTGATCACAAACAAATCAATGAAGTT -CCAAGACATTATCATGTCAAAGCTTCATACTGCTATCATCACAGATGACCCTGAGGCTAA -TCTGTTCATGTGTGGATTTGGACCTGGGGGGCGCTTAGGAACAGGAGATGAGTCGACGCG -TTTCACCTTTGCTTGTATTGAAACGGGTGGACTAGAGGGCAAGAAAGTCGTATCTGCTGC -TCTGGGTCAAGATCATAGCCTGGCAATCACAGAGCAAGGTGAGATTTTCAGCTGGGGAAG -CAACAAGTTCGGACAACTTGGCTATGGCTTGCCCAGAACTAGCAACAAAAACGATATTCC -CATACAGACGACTCCTCGTCAGATCTTCAACCCGTTCAAGAAAGAAACAATCATTGGTGC -AGCAGCTTCGTCTATCCACTCGGTGGTCTTCAGTACCTCAGGTCTCTATACGTTTGGCAA -GAACGAAGGTCAGCTTGGCTTGGTTGATTCCGATGCTCGTTCGCTTGAGGTACAGACGAC -ACCGCGGCGCGTTGGAGCATCCCTTTTCAATTGTCCAATCAAGATGGTGGCCGCCATTGA -CCGTGCTACGTCTGTGCTTCTGCAGAACCACGAAGTCTGGGTTTTCTCGTCATATGGTTA -TTCAAAGCTTTCATTCCCACTTGAGGTGAGCTCAAGATTTATCAGGGATAGCTTCATGGT -AACCCGATACGACAAGACTGCAAATCGTGTCGTCAAGATTGTATGTGGAGGCAACACTAT -ATGTGCATTATCCAGTTCAGGTGATGTGTTCACTGTTCAAGCCAACCAGTCCGATAACTC -TTCGGCTTCAACGTCCACAACCAATCCTGTAAAGATTCGGAATTCGTTAGCCACTCCTGT -TCGAGCATGGTCCGTTAACAAATCATACATGGCAGCAAGCGATGTTGATGTTGGGCAAGA -CGGTTCGATCATCATCTGTACAACATCTGGATCTGCGTGGAGAAAAGAACGGCGGACCAA -GAATAAGGAGGGAGCCTCGAAAGATTACAAGTTTGCCCGCATACCTGGTCTCTCACGGGC -AGTGGCAGTGCGAAGCAACGCCTTCGGGGCATACGCTGTCGCTCAACGTGATTGCGAAGT -GACCAGAGAACAAATTCATGTCGAGCAAAGTAGGCTTTGGGATGATATGTTGCCGTTGTC -TCCATTTACAATGCCAGGTCTGGAAGAGTTAGACTCTATTCTCGATGATGACGCTCAGAA -TGACCCTCTTATACTTTCCTCCGGAAAAAGTATACAAAGAGCCATACTTTCATCCTCAGA -TATTGAAAGCCAATTTCTTTCTGTACGAACCGAGGGGACTGTTTGGGTAACCAGCTCCTT -ATCTGACTCTTGTATTCCCGTGCACGAGTTTCTTATAGCTGGCCGCAGTCCAATTTTGGG -AAAGGCATTGACCGAATTCCGGCAATCATACTATTCATCTGTCCCAGATGTATTTGACAT -TGAATATGGGAAGGACGGATATCCCCAGATCTGCCTCCGAGGTGTTGATTTCTTGACCGT -TATGAATCTGGTCTTTTTCCTTTACACAGACGGCATTCTCGATGTTTGGCGTCTAGCAAG -GAGCTCATCCCCCAACTCGGCCCGTTTCCGGCAGGTTCGCACCGAAGTCATGCGGGTTTC -AACGCAACTAGGCCTACCAGCGCTGGAGCGGGCAGCACGTCTAATGGTTGAGCCACAAAG -AACCCTGAAAATGGATATGGCTCATGCCATCAACGACCCATTCTTGTTTAACAGTGCAGA -CGTGATTGTGCAGTTGAAAGGCGATACGATCAAAGTGCACAGCCAAGTTGTGTGTCAGAG -ATGTCCGTTCTTTGACACCCTTTTCCATGGTTATGCGGGGGGAAGGTGGCTAGATTCGCG -TAAAGCAGACCCAAATCAGAGTGTGCATGTCGATTTGAAGCACATTGATCGCTCTACATT -TGAATTCGTCCTGCGTTACCTGTATGCCGATACAGAAGAACGGCTGTTTGATGAAGTCCG -AACCAATGATCCTGATGACTTCGTTGACTTGCTGCTGGATGTAATGTTCGTGGCAAATGA -GTTGATGATCGACCGACTATCTCAAGTGTGTCAGAAGATGCTCGGTCGTTTTGTCACGAC -GCGCAACGTGTGCTATCTGCTCAATTCCATCGCCCCCTGCTGTGTCACAGAGTTCAAGGA -TGCCGCCCTGGAATATATTTGTCTCAATCTCGAGGCTATGCTCGCTAATAGATATCTTGA -AGATTTGGATACTACTCTCCTACGGGAGTTGGATCTGATCTGCCGCGAGAATCAGTTAGC -ATGCTGGCCCATATCCCGTGGGCGCAATTCTGAGGAGTACGTCTTCGAGAAATATCCCGA -AATTGTCAGCTCTGTGGAAGCAGATAAGCAACGAAGGATTGATGCCATGGCTTTGCAGTC -CCGCCTTGGTCGTCTCGAAACTTACGATGCACGAGCTCGGCCAGCGCCTAACGAGAAGGT -CACTTCATCTCCTTCTGTGCGCAAAACGAAGACCAGTGTATATGGAGAGTCACCAAATGT -TGGTGGTAGCCCAATGCTGAAGCCGAGGCAGTCGGCTGGGGATCTCATGTTTCAAATGGA -CGATGAAGCGCCCATGTCTTCAAGTGCTTCGAAGGGCAAGGTTGCTATGCGTGGACTTCA -ATTCACTGAAGGCATACCTGAAAATCGATCATACTCTGAATCCCCAGCTATTGGGGCAAG -CGTCCCAGATGTCGAGTCCTTGAACGATCGCATTTTCTTGAAGAATCAGATGTCTTCCCC -GCAAGACCCACTACTAGCACAGTCCCCGTCTGAATCCAGGGCAGTCGCCCAGAATCGGAA -ACAAGCCATGGCATCATCATCTAATCCCTCTTCTAGACCATGGACCTCGCCAAAGATTTC -GAGCTCCAAAAAAGATATCAAAGACATCATAGGAGAAGCATCCCAGTCCCGGGTATCAAA -TCTCACGCTAGGAATGGCAGATCGACGCGAAAGTAGCGGTAGCTTCGTACCCAAAATCTC -CCAGAAGGAACGAAAGAGGCTTCAGCAACAACAGATACAAGAGCAGCTGGCTGCCCAACA -GAAGGCGAAGGATAAACCACAGAACCCCTGGCAGAAGCCAGCACCTTCTTCTCCAGCGTC -GCTTCCTCAGCCCGATCCTCTGCCAGGTCAAAGCGTTCCAACTTCTGAAGCGGGTAAATC -ACCTCAAAGGACCATGACTATGCGGCAGACTGTGGCTGGCACCCCGCCGCCAAAGTCAAA -GCCAGTGGCTACACCTATGCAGACACCACGCCGCAGTGTGTCAAGCAACCCACGACCCTC -ATCCCATTCGAAACCTTCCACGCCAGGACCTTCGACAGTAACCCATAGCAATCAACCGAC -ATCACCACAGCCAGCCATTCAATCAATTCGTCACATCCCACGCCCTGACCCAATAGAGTC -ACATTCTCTCTCTGGCTCGTACTCTCTGTCGACCATTTTGCTCCAACAACAGACCGAAAA -GGAGGCGATTCGTGAAGCAGCCACAGCCAAGCATAAAATGCAAGAGATTCAGGCCGAGCA -GGAGTTCCAGCAGTGGTGGGATCAAGAAAGTAAACGTGTCCAAGGACTCGTAGACCCTGA -GCCAAATGAATCGAGCTCTGGAAAAGGTGGACGAGGCGGCAAGGCACCCGGCGCTGCCGG -TGGCCCGCGGAGGCGTCGAGGCAACAGGAATCCCGGTGATGGATCTGCTCCCCAAGATCA -CAGACGGGTCTCGGCTCCAAGCTCTGGGCATACAACCCCCAAGACGAACCCGAGTGACCA -AACCCCAACCCAGTCTCAAAAACACCTAAATAAAAATCGGGCCGGAGGTGTTGCTGGCAA -TGCCAATGGTCGTCGTGGAGGTGGTAATCAACGAGGAAGGGGCAAGAACGATGCATAAAA -GCATGAAAAGAACGAACTCATTCCCTGGTTGGCTTTTCTTATGCTGTTTGATACTCCAGG -TGTTGGCCTCTTGTACACTTTCGGCTTCTATTAAATGTCCATCGAGATATGATTGCTTGA -AATACGATATCAGTGGGTCCTTGCGATCTCCATCATCTGCTCATACCCCTTTTATTCATT -TACCATGTTCGATCTTTTTTTTTTATCAATTTAGAATGTGTTTGGGCGAACTTGGCGTTT -GATCAAAGTGCTGGTCACTTTGATCGACCGATCGATATAAAACGTGATAGACGATCTATG -ATTGATACACTACTTTTTGTTCGTAAACTTATAATCGACTTGTCTCTTTCAAGATACATA -CTTCAGACGCCTTTTCTGAGAATCAAAGCCGCCAAGACTAACAAACAGTATTGCCTCCAG -CCCTATGAACGGGCTTCCCAGCGCACTGTCGCCGGTTGCATAGGACGTGAAAAGAGAGAC -TAGAAAGAAGTCAGTCACATATTCTCACCAACGCAAGCCAAAAGAACTTCTAAAGACTTA -CCGAATAATCCGTGTCCGGCACTCCCTCCCTCCCCTCAGGATTGAAAAACCTGAACTCCC -TCACAAGCGTAGCAACAATAACCTCCAAATTGAGATACGCGAACTTCTCACCGATACAAC -GATGCCTACCAGCCCCGAACGGCAAATACGGACTGCGCGTTCCCTTGGAGACAGCCCCAT -ACCCGTAATCCACCGTATCAGAAGAATCCTCAACTTCAACCCGACTCTCCCAGCGATGCG -GATCCCATCGCAACGGATCACGGAAATGCCGCTCATCCCGTGCCGTAACACCAGGTGAAG -AGAGTAGCGTGTGCGAGGGCGGAACGACAAAGTCAGTCCCAGGCACCGGCATCGGATTCT -TCACTTTCCGCATGAGCGTATGGATAGATGAGTGGATACGCAACGTCTCTTTGATCACAT -TACGATGCAACGGAAGTTTTTCCATATCCTTGTACTGCAAGGGCGGGAGATCAGCACCCA -AATTCTGGATCTGTTCAGCATACAGCTTCTCCGTCATCTCGGGCTGCGACGCGAGTCGGA -GAAGGATCCAGCAGCTAATTGCGGACGACGAGTGTTGGCCCGCCATCAGCAGCGTGATCA -TCATATGCGCGATCTCTATGTCTGGAATTGGAGTCCCATTCCGGTATACGCAGCGCATTA -GATTCGAGATCATGTCTGTGCCTTTGGCTTTATCCCTTCCTTTGTCGTTGTCTTCTGTTC -CAGCTTCCCGTCGCGCTTGAATAATGCTCAGATAAATCTCACGCATACGCCCATGAGCTT -TATCCCGCTTGCGGTTGCGGGGCAGTGGTGCCCAGGGGAGCATGAAGTTTATTGGGGTGA -AACCAAGGTCGAGGTCATGGAAGAGCTCAGCGAAGTCGGAGGTCATTTTCGAGCGTACTT -CTTCGCCTTGGAGAGTGCGGGCTGCCGTGAAGAGGGTTATCTCGGCCATGGCAGCGGCGA -GGTCAACGGTGCCGGATTGGCCCTTGAAGTTGGGGGATGATTTGATGTAGCCGCTTGTTT -CGTCGGCGATTAGGGGGACATAAGATTCTAGGGCTTCTTGGCTCAAGCCATATTTGATAA -ACTATTTTCGGGGAGTTGATTATTAGCTTTGGATGCGAGGTGTGTGGAGAGTGATCAAGT -AGATATACCTTCTTTTGCTCCATGAGTTTGGAGTTGGGACAGTCGTAGACAACATCAGAT -CCGAAGACAGGCGTTGTCAGCTTGCCGTAAACTTCTTCGGCGTTAACATCCTTCAACTTT -CCATTGAGAATGAACTCATTGCCTTCAACTCCGAGGTATACAGTCGTTTTCTTGCCTAGC -AGAACGAAGGTGAAGATGTCGCCGTACTAGGTGTTCTCGTTAACAGGCTGTCTTTCACCT -TGGAAGCATCGAGTGCTACAAGATATACACACCTTTGCACGAGAAGCAAAAAAGAACTGG -TACGGATCCATGCCATAGGCGACTGTGCTTCCAATGAAAGGAACCCAGTGAAAGACAACA -GGTGGTTCTTTTCTGTTGTAGAAGACGAGTTGGTTGATCACATTGAGGACGACTAACACC -AGTATAACTCCAACTGTATAGTACGCGATGTGCAGGGTCTGACCTGCTACCAATGGGACA -AGATCCATAGCTGCATTCGATACGAAGTAGAAGTTTGGAAGTTTGCAGTATGAGTTCATG -GATATAGTTAGGGTTTTGTAGGACAAATCAGGAGGGCAGGATTGGCGCTTAATCATCAGG -TCAGGCCATCAACCCCTGCCCCGGATCTCACAGCAGCCGGCGCATATCCTGGGGCCACTG -TCAGGCAGCCAATTTGTTTTGGGTCTATTGGAAGTAGTGGGCATAGTGGGCTGTGATGAC -CATGTAAATGCGTGGCATTGGCTAATCATATCAATATATAAGAGTGCAGTGCAGAGGACA -AAAATAACTTATGTCCAAACTGATCGCTGGAATAAAAATGAAAACAGACTGTAAACGCCG -AACCATAGAGCTAACCGTCATAAAACATAAATGGCGAAAAAGAGAAAACGGGGCAAATTC -ATGAATTAGCCAGGGATGGCGACCTCTCGGCTCTCCCACATCCGGCGGATCAGCATGACG -CTGTCGTTAATCTGGTATGCTTCGTCATTTTCACCGCTGAGGTCACGCGAAGCTTCGGCT -GAAGTGGAGAGGATGCGATGAGATGGATCGTTGGACTTGGCAGCCTCTGCAGTATTGATA -GGTTGATTATCTTTCGCTGGATCTTCCGAAGCGGTGGAGTTTGGATTACGCGATTGGGAT -TTGGACGCAGGACTGGAGCTGAGGCGATTGGGCCGTTTGTTTTGTTGCGCCAGGGGCAGC -GAAGGTGTTGAGGGGACTGATTTGTCGGGAGTTGATCCTTTCTCCGGCTTGTTGGCTCGG -ATAATGTGACGAAGACGAGCGAAGCTCTGGCCGGTGGGAGACTGAAAACGTCGCACGACC -ATCAACTCTAGAGACGTTTTCTCGTACGCCTTGCGAATGTGACGGGCTTCGCTGTCGTGT -CCCGCACCACCAGCTCGACCGTCGCCGTTGAGCCCTCTGCCGGTGTCAAAAGTGCCATGG -CTAGCGGCAATAAACGTTGGGTCAATGGCAGATGGTGGAACTGCTGCGGTCACTCCATGG -TTATCAGGGGGAGAGTTATTAAGGGTGGCGGTACGCTGAAGCCATAGCTTCTGTTGTGTG -CGAGAAGGCAGGTCAGCACCACGCGGCCGCGCAGGAGACGCGGCCAATTTGGCCGCTGCA -GTGTTTGGAGAGGGCGGTGTTTGAGGGTGGTAGTGCGGGAGGAACGACGAGGGAGTGTTT -GGGTCTGAATCGGTGCGAGAAGAGGCCTGCATGCTGTTCTTGTTCAAAATGAACCTCGAG -ACTCCACCTTCCATGGAGGATGAGGTAGCTTGGGGGTGAGAGCCCGGCATACTGGCCATG -CTGGTTGTAGAGAATGTGCGCCGAGCGTTATCCTGGCCAGCAGCCAGGTTGAGAGATGCA -CTACTACGCGATGCGGGATTAGGACTCTTGGCTTCTGGCTTCACCATAGCCGAGATAGAG -GACATGGCGGGGGGTGCTTTGGATGAGTGTGTAGGACTTAGTAGGCGAGTTGCTCTGTCG -CCTTGTTCTGTGTGCTGACTAGACTCCTCGGCCTCTTCTTCCGAGTCCTGCTGCTTAGCT -tcatcatcctcatcctcatcttcatTTCGTTCACCGCGTTCTCGAACGAACTTGGTGGTG -ATCGACTCGGGGAGACTGGTTGCTCGAGGCATTTGAGGATACGGTCGCTTGACAAAGGTA -AGATGATCGACAAGAACGGTGCTATTGTCTGCACTGGGATTGGATGGTACGACCGAGTTG -CGCCTCGTACTTTCGGGGGACTGTGTGGTATCTTCCCACTCAGCCTCGTCGGTCTCATCA -TCTCCCAGCTCAAAGCCGACCGTCTTTTTCTTTACCGTGCCTGAGGTCGGTCGCTCAGTC -ACCAAGGCTTTCTTCCCAACAGCTGATGGATTCCGGCGTAACACAGGAGTGGAATAATTT -CTCTTCATGGAAGGATCGGTGGAATGATCACCCACAGTGTTGTCCCAGCGAACGCGTCCA -CCTTCCTTTGGACTCGCCACTGGAGTTGCGGGGGCCGACTTCTTGCGCTGATGATGCCGC -CCAGCTGTCTCAGGACCTAGTTGGAATCGTTGAAGTTTGTTAAAGTTTTTGCCGGTGGAG -GGGTTTCGGTGGTGGTTGCGACCAACAACATGGGCGCGGTGAGGTTTATGGTGTAGAAGT -GTCTTCTGCGTGTGAGACAGCGACAGGTCATTAGGATTGGCGGTGCGCGTGACGACTGCA -TGGGAGGACACTGCACGGCGACCGAGACCGGGGCGCTTGACAGGGGAAGAAGTATTTGAG -GCGTTGGAGGCAGGCATGGCGGCAAGCGAAACGCTCCCAGAGGAGCCACAGGCGGGTAAT -CAGCTGTGCTCACGGAGCGTAGAATGTGGGGATTGGCCGAGGCAAGTTGGTAAAGAGGTC -TCACGGCAGATCGGAGAGTATGCAGGCAAAAAGCAATCAAGGCTCAATAGTTGGAAGCCG -GTTTACACCGGTTCGCACCAATAATTTTATGAAGAATCAGATAGAATGCGAAAAAGAAGA -ATAGGAAAGAGATCCGGGGCGAATTAGAGAAGTGAAGAAGCGAGAAGGCGATCTCGCTGT -CTTCGGCGATTAAGGAGCGGGTAACAGTAGTTGAATGCCTGAGGCAAATATGACGCTTCT -ACGGAGTACTCCAGAAGGGGCTATAAATGGAATCTGGGGGGACAAACTCAGGACCTCGTA -ACAATGATTATCTTTTTTAGAAAGTGCCCTTAGGTAGTGATCTTTCATGTATACTTGAGA -TCTCTTCATTCACCCGCTTCAAGCTATAAACACATTCAATCTGCTAAAGGTCTTCAGGTA -TCATCGGACGGAGAAAAAAAacacacacacacacacaAAGATAGAACCATAAATAACGCC -CAAAAATGCAATTAAGTTATAGCTCAATCAAATTTTCCTCCACTGGCCGTGCTGGTTGCT -GATAAGGCGGCTGCGGAGTCTCCTGTGCATAGGGCCCCGGTTGTCCAGCATTAATTGAGG -GGTGGACGGGTGGCACGGTGTTAGGGGGTTGTTGAGGCTGAGCCTCTGACTGGTAATAGT -AAGACTGAGAAGGACCTGTTGTATGCTGATGAAGTGCTGGCGCAGTGGGCGGCACAGAAT -AGCCTGGCGATTGCACGTAACCAGCAGGTGCATGTTGCTCGGGTACAGGAGTAGGACCGC -TGGGAGGCGCATTGGCCTGATACTGGGAGTCCCGGCGCATAATGGGCGACGGCTGGTATG -CGGGTTCTGCACCGGGCGACTTGACTGAGTCTGGGTTGTGGGAACCTGTATAATATTGAG -CCGGAGCACTTGGTGCTGGggtattggtatagttattgggaatggcgttgctaGGTGAAG -GTGATCCAAGGGTCGGGTAGGTATTTCCACCCCAATTGTGGCCATAAGGCTCATTTGTGG -GAGCTCCATGTTGGGGTTGTGATGGGTAGGCAGAATTCTGGACAGCGCTGGGTGGGCTAT -GGAACCCGTTGCCGTTTACCTGTTGCGGTGCATACATGCTAGCAGGTGGGCGAGAGCTTT -CCATTTGGTTCCCATAATAGAAGTTTTCGGCTCCACTTTGGGCGTCGGGGGCGTGAGTAG -AGAACCCTGACTGAAGATTTGGATAGTGTGCCGCCCCGGGAACAGCTCCATGACCAAGGT -TATGTTGCGAATAGGCAATGGTCAGACGCTCTTCTAGCATCCGATCGTAGTAACGTACGA -CCGTCGAGAGCTTGGCATGGAGATCCAATAATGTATCTGTGGCGATTAGTTTCCAGTTCG -ATACATGGGAATGGAGACAGCTGAACTCACCATGCTTACTCATGGTCTCTCCATAGCTTC -GAGCCAGCTTTGGTCGAAGCGCACCGATACTCTCATATAGCTCCTGGATTTGCGGCTCTC -TCAGGATTGTGCCCGGCGGCTGATGCTGGAGCCGATCAACCAGTGTAGAGAAAAGATGAA -TATTCTCAGTTTCGACAGGAGTTAGCTCGTATGGATTTTTAGGCAATGGAGTGGGAGTGC -TAGAGGTATCGCGCAAGATAGAACTTGGAGCAGGCACATTCTTCAATGCGGCCGCATGGT -TTTGCTTGTGCTGTTCCATATCCCGCAGGGATGCTTCAATCGCGGCCTTGAGATCTGCAT -CTTCCTCGTCCACATTGTCCGGCGCGGGAGCGGTTTTAGGGGGTTCTGGAGCATTGCCTG -TCTGCGGGACGAATCCAGAAGAACCTTTACCCTCGGCTTCATCGAGGCTCATTTGCAACG -CTCGACGCAAATCCTCATCGAATCCTGTATCCGCCTTTCCAGCACGAGGTTCCATTGTGT -TAGATTTGCTGATCGAGTTGTTCTTGAACGCAAATCGATCCGAAATTCCTGGTGGCGCAA -ATGACTTGGAGGTCAATTTGGCATAACACCCATCATCTACTCTGACCGGCTGGAGAATTC -CAAGATGCGGCAAAGGAATGGTCTTGCTTGAACACTGAGCGTCGAAAACGTTGCCGCAGT -TACGGCAGTGGTGTTTACGATTCATAAAACTGAAGGCAGTCCGACATCGCATACAGACTT -CAGAGTCGATCCACTCTGGAGGCTGAGACAAATTCACAAGTTAGCTTCTGCTTCGGAGCG -GAGGGTTCGAGGCACACTCACTGCGCTGCTCTCCAGCATACTGCCGCTCATTTCTGTCTT -TGGGGGAAATCGGTATCCTTCGCTCTGCAATTTACGATATGTCTCTCCCAAATACATCAA -ATCCATACGGCCCTGTGCAGCCATAGCCCAGTTCTGAATAAGTTCCAATATCTTCCCCTG -CACCTCAGCATTAAGGGGCGCGCCTTCAGATTTCAGCAATGAGACAAGATTATCCATGAA -TTCCCGGGACGCAATTTCGGCCAGAAAGTGTGTGCCTCCGTTTTTGACACACGTATCCGT -TAACTGGAGGGGCGGGCTTATCAGCGCTTGTTCCAGGCCTATCGTCAACGAGGAGCAATA -TTCACCTTTAATGTCGCCAGCTGAATGTTTGGATTCCTATTCTCGAGTCGTCGTTTCAAA -GACCTCATTCCATCCTTGGGCTGAACGCTCTTTGATCGAACCATATCTGATATTTCTAGG -TTGAGAGCAATGTCTTCCCTGGTAGTAAGTCAGCCATATGACCATACAACTCAGAACAAC -CGGTAGCCCTGGTACATATTGACATGACAGCAAGATGGCACTCAAAACTTACAAAGAGGA -AGCCGTCGCCCGCTCGATTTGCTCATCGAGCGGCGAGGTTGAAGAAAACCAGCCTGCCAT -AGTGAAGGCAAAACACCCGATGCGCGTTTAATACTGAACGGACACAGATCAAAGTAGTGT -TGCAGAGAGCCAAGACACTTCAATGCAGCAAATATGAAGATCAGGCTGGTAAGTTGGAGG -AGGAGCTTCTAGGCACCTCAATAGTGACTAAGCCCCTGGCGTCATAGGCTGGGGGCGGTG -AAGTACTGTCATCTGATTACCCCAGAGACATCCCGCATTACGTTCCTGGATCTGTCCTCT -TTTATGTTATACTCTAGTCTGTCTTTCGTTATTTGTGTTTGTTATATGAACTTGATTCAT -TCCTAGGCGGATTGGTTACGCCAGTGGCTCGCTCTATTCTTCCCCCCACCTCTATCTTGG -ACATAACCATGTGCTAAGTCCGCTTGTCGCTGCCTGGATCTACCATGTCCTCCGATGCGG -CGACTTTGCCATCTCCCCCCAATATACCACCGTGGAAGGGCAGGCGGCGGCCTTGCTCGG -TGGATGACGACATGATTTCTAGCGGACATGTAAACAGACCAGATCTGTACCCAAATGGCA -AGGCCCCTCCCATCCCACCGCGGCGACCTCGGTACCCCAATATAAAAGACCTGCAAGACC -AGGCTGCTGCATTAAATGTTGATGACACTACGCCGGTATGTGCAGGATTTGCTGAAGTTG -TATCAATCATTCTTTTAACGGGAATCATAGCTTAGTATACTCTTGCCAACTGCTGCCGAT -GCAATCGAAAGGGCCAGGAAACTCGCAGATGACAACTACAACGATAAAGCCTACGTCCAG -TACCTTCGCGCCTCTGAGATAACTATAAATCTTATTCCTCACCATCCAGACTATCGAACC -GTCAGCCAACGTCCTGATTGGTATAAGGAGTTCGCCGGTCTAGTGAAAGCGGTGCGGTCC -AAGCAAGGCACTATGGATGCGATCAAGCAGGACATTATCGAGGATAATTTGGTCAGCGGC -ATGCAACCCACAGGTGTCTTTACATCCAGATCCCCAGAACAGACCTCTGTCACACCACGA -GGCCGTGAGAATCGAGAGTCTGAAACACACTTAGCTAGAATGCCAAGTCCGACTCAGTTT -CAAAAAATGCCCGACAAAGAGGCGCATTCTCAAAGATTCTCCAGTCCTCCAGAAGATATT -CTTGCGCAGCGTTTTGCCAAACTCAAGGCTTCTCCACCTTCCAACCAGTATGTGCCGGGA -GCAAGTACTGCTGGACCGGGGAGCCCAATTGTAGGATCTGCAGGGCAGACCCCACTTCAT -TCGTCAAACGTACCGCTTGGAACATACAGCTCCCCACCGCCAAGACGTCCATTAGGACCT -CGCGGCATGGGCCCTTCATCTAGTGTGCCGACCATTCCACCGAAGGTTCCGCTTAACACA -TCGTTACCACGTGCCCCGGACCCAGCCTATAGCCCTGTCTTTACTGTGCCATCAAAGCCC -ACATCAAACCCACCGAGAAGCTCAACAGAGAGCACACGTTCAGGCAATCCAAGATACTCG -CAATTCAGCAATTCACCACGTGTCAGTCCAACCCGAGGAGGGTTTGATGACAACCCGTAT -AGATCTCTGACGCCAAATGGGTTGGATTCGGCGCGAGAGACACGAAGCAACTCACCCGAT -TTGCCGTACAGCACAACTATAACTGCACAGAGCCTTCTTGAAAATCTGCGGAAGTTCAAT -GTGCTCCTGATTGATGTCCGGGCCCGAGCCCAGTATGATAACGGTCACGTGTATGCAAAA -TCTATCATTTGCATAGAACCTGTGGTTCTGAAAGAGAACGTGTCGGCAGAAGAACTAGAG -GAGCGACTCGTTGTTTCACCAGAGCATGAGCAGGCCTTGTTTGAGAGACGGAACGAGTAC -GACCTTGTGGTTTACTATGATCAAAGCGCCAATTCAGTCAGTTATCTTGCTGGATCTCCG -GTGGGGACATCGGCACCTCATTTGAGGGCTCTCTATGATACTCTTTACGAGTTCAATGCA -TACAAGCCGTTGAAGGCCGGACGGCCTCCAGCTCTCCTGCTTGGTGGATTGGATGCCTGG -ATCGATCTTTTAGGGCAACAATCCCTTGCGACATCTTCAACTGCTGCTGTCATGAGTTCT -CTGCAGACCAGAAAGCCTATGCCTAGACCTGGCCGACCTCTCGGCAGAGTGCCTACCATG -GCTAGTGCAAATTCGAGCCTTGAAATTAGGAAGAGACGGCTGCGCGAGTTCACTCCTTTG -AACTCGAAGGAATTGTCCGAGTGGATGGAAAAATCTAAAGTCGAGGAGATTGACACGAGC -ACCTATGCTGAGGAAGACTCACTGACGGAGGAACCCGAGGAGACTAGACCAGAGCCTCCA -AGTCCATTTATCGACTCATATGAGGCATTCTTGCGTCGCTTCCCTGAACCGCACGATGTC -CGACAGTCCATGACGCAGACAAATTACCGTCCTCCATCTACCGCCTCCCCGAACTATGCT -GCACACATCTCAGTAGCCCCTTCTCGGCCACCCCCTGCTGTGCCCCGTCCCAGCTATAGC -GGCGTCTCCGATGGACGGCACATCCAACCCCATTTACAACGCCAGAATTCGGCCAATCAG -ACAGCCCTGTACACTCCAAGCTCCCGCTTAGATCGCCTCAAACTCCCGCGAACTGGATTG -ACCAATTTTGGTGTGACCTGCTATATGAATTCCACCATCCAATGTCTCAGCGCAACGATT -TCACTGAGCAGGTTCTTCATCGACAACCGATTTCGTCATTATGTACAAAAGAATTGGAAG -GGGTCTCAGGGCGTTATGCCAGGACTTTATGCGAATCTGACCAGGTCGCTTTGGAAGAAC -GATGTCGAGGTCATCATGCCTACCTCATTCCGGAATTTTTGTGGACGTTTGAATCGAGAA -TGGGCCATCGACCGACAGCAAGATGCCAAAGAATTCTTTGATTTTGTGGTAGATTGCCTC -CACGAGGATCTCAATATCAACTGGCAGCGAACGCCGCTTCGTCCTTTGACCTTTTCGGAG -GAGATGCAACGAGAGCGCATGCCAATGACCAAAGTGTCCCGAATTGAATGGGACCGATAC -TGTCACCGAGAGGAGTCTTTTATCTCTTCGTTATTCGCGGGACAACATGCCAGTCGGTTG -CGCTGCACGACCTGCCGACAAACGTCTACTACCTATGAGGCCTTCTACAGTATCAGTGTT -GAAATCCCACCAACAGGTACTGGTGATATTTATCAGTGCCTCCGTAGCTACTGCCAGGAG -GAGATGCTGAGCGGCGATGAGGTCTGGAAGTGCCCACATTGTAAATGCAAGCGAATGGCA -ACTAAGCAGATCATCATCACCCGCGCGCCCCAGATTCTCGTTGTTCACTTCAAGCGCTTT -TCCGCATCTAAAACCCAAAGTGCACGAAAGATCCACACCCCAATCGAATTCCCGCTTCAC -GGTCTACAGATGGACGATTTCGTGATTGCTCACCCTCCCCCTCCCCCTCCGGAGCCCGGT -GTCCCCCCTGCCACCGGTGCCACGGTCCCACCCTTCACTTACGATGCCTTTGCAGTGCTG -CGGCATATCGGCTCTTCGATGGGCAGCGGACACTACATCTCACTGGTCTGCGATGCGGAG -CGGCAGTGTTGGCGGAAATTCGATGACGAGCGGGCCACGGACTTCAATCCGCGAGATCTT -CGAGCGCGGGACCGGTTACAGAATGAGCAGGCGTACATTGTGTTTTATGAGCGAGTTCCA -GCGAAATGAGCGTTTTGATTTGTTGTCATGATTACGTTTAGATAGAGGTATATTATTATA -CACATGAATATTGCATACACAACCTCCACTACGTGCTGACACATTGGAGAGCGGAAAGCG -GAAGGCGGAAGGACACCGACGGCGGTACCCGAGAATTGGATCCCCGAGGTTATAGTTAAT -CAGGCAGTGTAGTCAGGCAGTTGTGTTTATATCTTCTATATACTGTGCTGTATTCTATTC -TTACATATCTATTCTGTACAACATAGTTATATATATTTTGCAGATTTAGAGATTGTATAA -AGGCCAGATTTCATACCTAATAAGCAACCCCTGATTCGTCAGGACTATCTAGTACAACAA -CTGTACCCTAGAGGTATTAGATCCCAATTAGATACGTTACAACATGAGATAGAATGTAAT -ATAACATCCAATGGGAAATTCCCCTTTTCTCTCCCCTTATAAGCCCCGGGTGTGATTCAC -GGCATCAAAGTCCCTGGGTACCTGGCCCCACATACACGAGACCGCCCAGCCGCACGTACC -AGCAACATGTTGGGGGTtctttttcttttttgatatctttctctctctctctttctcccg -ctttctcttctatttctcGAGCCTTTTGGATTCATCTCCCTAGATCCCGTAAGTTGTCGG -TCGTACCTCTCCAAACATTTCCCGTTTTCTCCTATCGCCAACTCCACACATCCCCCCTCT -TCAGTTGCCCAGTCTTGACAGAAACCTAACACCGGACCATTTAGATACCTCAATTCACTA -TGTCTGCAACCAAGGAACTCACCCTGCAGGAGATCTCCGAGCACTCCACCAAGAAGGATC -TCTACCTGATCCTCAACGACAAGGTCTACGACTGCACCGATTTCGCCAAGGAGCACCCGT -ATGTTGTCTCCATATCTACCCACTGCCACCAAATTTAGCCCCAAACACCCCGAAGCAAGA -GAACAAGCGAGATTAGACGAGGAAAGCAAAGAAAGAAAAAACAGCCAACCCGACGTATTA -TCTTCACTCGTTTCGCATCGCTTCAACCCACTCCAGATTTCTAGAACCGACCGGCTAACC -AATTTATCAGCGGTGGCGAGGAAGTCCTCCTTGATCTTGCCGGCCAAGACTGCACTGAAG -CCTTCGACGATGTCGGCCACAGTGATGAGGCGCGCGCAATCCTCGACGACCCAAAGATGT -TCATCGGCACAGTGAAGCGCATGGTACGCAACCCAGACAAACCCCCAACATTCTTGAAAC -ACTTTTCCCCACACCATCCCATGTGTCTCCCTTCTACCCTCGGCCTCCCCTTCAACCCCA -CGCAATCACATGCCCCAACCCGCATTCACTATCCGTCTCAAAACTAACTTGAACAAATAG -CCCGGTGACCCCGCGCCCAAGTCCAAGGCTCcctcctcctccgactcttcgtcttcttcc -tccACTGCCGGCTTCGGCGTCGGCCTCTACGCTGTCATCCTCATCGGTGGCGCGATTGCC -TACGGCGCATACAACTACCTCCAGGCCCAGCAGGCCCAGCAGGCTCAGCAGCAGTAAGCT -GGTCCAGAGTGTTTTGAAATTAGACGATTGAGTGTGAGAGGACTAGTAAGCGGTTATGGT -CAGAATGCAGATCTGAGATTTAAAGACCTGTCTCCATATGGGAGTCGCTATGGCTAAAAG -AGCAAAGAGAGCGTGTGTTTTTTTTTTCGAGTTTCGGGCCTTATGAGTATTCAGCTCTGT -TTTTTATTACCCTTTTATCTATTCATATCGACATAACTTGGATTAATCTTTCGGAATGGA -AATGTTTGACAATTAGCGATAAGCCAATCTGGGCCGGATTTCTTTACTCCTTTGGGTTGT -TCCACATCTCCAATATCTCCACTTTCTTGCTAATAGCCCAGGCATGAGAGGATATCAGGT -CCGAGCTTAGTTCCACAGCCTTCGATCCTGTTCTTGCGAATATGTACGGAGTACGGAGTA -CGTCTATACTAGATAGAGATTAGTATATCTAATCCACGAGTCAAATTACCTATGGAATCC -CACCTATCAGAATCAACCCAGTCCATTCCACATAAACAAGTTGAATCCGATCTGCGAAAG -TACAAGATACAGTAAAATGAGAATCGTCAAAATAAATGCGAAAGATCAAGGAAAAAAAAA -ATCTTTTTAAAACCCAATATTCATGAAATAGATACCAAAAAATAAAGATGAAAACAAGAA -TTGAAGTAGAAAGCCAAAAGAAAAGAACTAAAATGATACTGTTCCCGCTCGCTCGAAGTG -AAAGCAGAGAAGGGGTGGTGTAAGAGTTGGTGTCTAGATCAGATCAAACCCACAGTCGCA -ATGATGAATAACTACCTACCGCGCAGCGCCGTGAAGGCCATTGTGCACGGCCTCGAGGAT -CATCTCCTTGGCCTCTTCCAGGCCGCTGGGGCTACCCCGGATCTCAATCGCTTCGCCAAG -AGCCTGGTCCTTGGGCACATTGATACGGCAGCCTGTCTTCTTGCGGATGGCGTTGATCTG -GCTGCCACCCTGGCCGACAACGAAACGGTAAGTTTTGGGATCCGGCAGAATCAGATATCC -CGTAGTGGACTGGTGGATCGCGGAAGCGAGAGCCTTCTCGACGACGGAGCGGGCCTTGGC -CACGTTCTCCGGTGTGCCGCTGAGAACCCAGGGGAAGGGGGGAGAAGTTTCCTTGGCATT -GTTGTCGACAACGGTCCAGGAGTGTGCGTCGGCGGTGACGTTGGGATCATCAATGATCAG -AGGCATGTCGGTGCCTTCAGGTGTGGCGCGGGTGTCTTCGGCGGCGGAGGGGGCAGGGAC -AGGCAGGGGCTGGCCGGCGTGGTCCACGGTGACCTGATAGTCGTTGCGCAGGCGGCGGAA -GATGTGGCCATTGTCCGCGAGGAAGTAGTGCAGGTGGCTAGGGATTTCGATAGTCTCCGT -GTGCTGCTCCTTTAACATGGACTTGACATGCTCCTTGGCGCTTTCGACGGCGCTACTAGG -GCCCTTGAGCTTCACATCGGTACGGCCAGAGCCTTGCTTAGGGATATCAAGGGTGACGTT -GAACTTGGACTCGATGCCACGGCGAGTGTCACCTCCACGGCCAATAAGCATGCGGTGCTG -GGCGGTGGGAACATCGATGACCTCAGTGACCTGGTCCTGGCGCTCCTTGACGAACTCTTC -GATCGCGGCGATGATGCTATCCACGATCTTGCCGTTGCCCTCGAGGCGGATGGTTGATTC -GGTGCTTTCAGGACGGGGGAAACGGACCATCCTGGCGGCACTGCCATCGGTAGGGCCACC -GGCCTCGGTGACAATTTTGCGGATGCTGGCACCTATATTAAGTCAGTAATTGAAAACAAG -GATATAACTTTGCATGGTAACATACCACCTCCACCGATCAGAGCCTTGTGATACTTCTTA -TCAACCTCAATGGTCTTGGTCACAGTGGCATCGAACTCGCTGGCCCGCTGCTGCAAGATC -TTCTTGGCCTCGGCAACTTGCTGCTTAGTACCCTTGACCCGAATCTCCACACGACCCGAT -GCATCCGGCGCATCGTTGGCGTTAGGGACATCAATCTGGGCACCGGTATCAGCGCGAAGC -TTGTCCATCTCCCGGCCGCGCTGGCCGATCAGGGATGCGACCTGGCTCTGGGCCACAGAA -ATAGCGGCGGAATGACCGTGCTCAATGACATATTGCAGCAAGCTGAGGATCTCGTCGCGA -GCGGCGTCGGCACCCTTGCTGGGGCCTTTGACAAGGACCTCATCGGGAGCCTGCTGAGCA -CGCTGGGGACGAGCACCACCAGCATCACTGCCAATTTCCGTGGAGTCATCATTGGCAACA -GCTGCGCGGGGAAATTGGACACGGACAGAGTAACGATCCTGAAGACGGTTGACCTGGTTT -CCACGCTGGCCAATCAGCTCACGGTGGTATTGGGCGGGGACCTTGAGAATATGAGTAGTC -TCATCTTCCAACTTCTTGCCCATGTTGATAATGCGCATGCGGGTGGCATCTGCCTTGGCC -TTGGGTCCCTTGACCTCGACCTTGCCGTTCTCCACCTTGATGTCGACATCGAACTCCTCG -CGGAGCTTATTAATGTTCTCACCACGCTTGCCAATTAGGAAGTTGGCGTACTTCTGGGGG -AAGTCAAAGCTAGTGGTGTAACCACGCTCCAGGTCATCCTTCTCCTGCTCCACGACGAAA -GCCTCCACCTTGGCAGCAAGTTCAGCCACCAGGGCTGAGGGACCACGGAGAGAAACCTCA -CACTGAGAACCCTTGGCGTCACCGACAATGGCACGCACAGGGATGCTATCCTCACCTTTG -GCCGCCTGCTCGCGCGTGATGAATTTACGCGCCTTCTCCTGGTATTTGGCAGGAACGGAA -ACCGTCTTGGCAACAATGTCAAGCTGGTCCCCGAGAGTGCTGAGGAGGTAATCCTGAGCC -TCTTGCAGACCCTTCTCGAAGGCTGCAACATCCGCGGGGGAGGGGCGCTGTCTAGGGACC -TCGAAACGGGAAGCAGTTGCGGACGGGCCTTCATAAACAAGAACAACATCGGGGGAGTTG -ATGTCATCCGGCACCATCAAGTGCACACCATAATCACCCTGAAGAGTGTTTGCGCTTCGA -TCACGCAGGTACTGGTGGAAGTAAGGGTCCACAGCGACATGGCGGAGTCTAGAAGGTGGT -AGAGCCTGGACTAGGTTCATGATATCGGAACGGGCGCGGATGGTGTTCTTGCCGTCACGG -GAGTACACCTCCCAAGTGATGGGTCCACCAACATTAGGAGGAAGTGCAATGTGCGCATCG -TATGTCGATTCGAGTTCCTTAATGACCTGACGGCGACGTAAGTACTGCGAGAGAGCACTT -GCGTGGGCGTGGGGACCGGCGGGAGCAGCGGGATGCTGGCGGGACAGATCAATGCTAGCC -ATCTGCATGCTTGTCGCAAGATCCATAGCGTGTTCAATACCAGCCTCGATCTGGTCAAGG -GGACCAGTGATGGTGAGGAACTCACTCTCATCGGAGGAAGGGGGCAGGACAATGGCACAT -CCAGTTTGTGCAACGAACTGGTGCAGAGCATCAGCCTCATTGCCCAAAATGAACTGGTGT -TGGCCACGGTTGATGGCCAGCTGGCGCAGAGTCAGCTGCTGGTGCAATTGCGCGGCCAAG -CGCTCAATCTCAGCCCGAGCCTCTTGTGCGGCAGCCCGTTCTCCGGAGATCAAAATGTGC -TTATCAGGCACAGCCACGAATTGAACCTGGCCAGGGTTGGCTTCCTGCGGAGGAGGCTGG -CTCGTCCAAGTATCATAGCGGGGAACATTAATGTGCGCTTTTGTGCGCTCTTCAATGGCC -CGCACGTTGGCATCGTGTGCACCTGCAATGAAGGGGAAGAACTCAGGAGGAACGGTCTTC -AATCGGAAGCTCATGTTTGAGCCGCGCTCCTTGACAATGGCCTCAATCTCGCGGCGGGCC -ATCTCCGCAGCCACGGCATCTCCCTCAATCAAAATGTCAATGGTATCATCGTCGTCATCA -TCGGCACCAGCAGCGTTGCTGTCGGCGCGGGGGACCTGAACACGGGCACCAGTGCGGGTC -TGGATATCCTGCACAACGGTACCTTGACGGCCAATGATGTGAGCACGCGCAGATGTGGGA -ATCGGAACACGGACAGATTGCTATTAAGACGAATTAGTTAGCTTGGATACTCGTTTTGCG -GAGGTTCAATCCACATACCTTAGATCCAACCTGCTGAGCCACCTCGCGGAGAGCCTGGCG -CACTGCATCTACGGAGCCCTTTCCTTCGAAAATGATTGAGCCACCGGGACCACCGCGCAT -GTCGACGTTCGCCTTTGATCTGCGGGAGATATCCCGGAGGACATCACGAAGAGGCTTCTT -CATCTGTCCACGGGGTTGCATCTGGGAGGGGGCTAGGCGGAGCTGCTCCATGTGCTTTCC -GGGGAGAGCCATGATACGGGGAATTTCTGTTTCTCTGTCAGTATTTGTCCAGTCTGGAAA -GCTTCAAAGTAATCCACGTACCCATTGACTTCGCGGGCGAACCATTGGCGACAGCAGCGG -TCGCGGAAGGTTTCTTCGCACCCCATGTAGCAGCTGCGGGGGCAGCAGGCTTCGGGCCGC -TGCCCAAAGCTGGGAACAATTCTTCGGATTGAACATCGAATGCCGGGGCTTTTTTGGTGG -CGGGCTTGGGGGTGGGCGCCGGGGAAGCGGTTTGGGCGAGGGTGGTAGGCTCGGGGGTAT -CAGCGTTAACGAGGAGGGGTTGATCTGGCTGTGTCACGGTCGGAGGGGGGTGCTTGAGGT -CCTCCTCGTCCACGGTTTCCTCCACAGTGGGCTTGTGCAACTCGACGCGGGCGTGCTGCT -GTTCCAACATAGCAGCGAGGGATCCGCCGTTGGCAGAGGTAGATGCTTCGGAAGCCATTA -CGAATAATGAAAGTCGATAAGATAAATCGGGGTGGAAGACAAAGTTGGTAAATGGGCTAT -GCACAAGATCCTGCAATCAAGACAGGCCCAGGTCAGTATGCAATAACAACCGATCAAGGT -AGATCAGGACAGATCACCTAGACCTCGGGTGGAATCAATGGCATGGCAAGAATTTAGAAA -GAACTCACAAGTAAAAGTATATCTTTCCTCTTTTAGAATTGAGTGGGATATATAGATTTT -CTTGGGGTTCAGGAGAATGCCGAGGATGCTCAGGAAAATGCAATAGCGATTTGGACTGGC -AAAGCACCGAGAATCAAGAAAAGAAGGATACAGGGGCTCAAAGAGAGGAGCCACCTGACG -AGAAAGTGAGAAGCgaggtgggagggaggaagagaggtgagaggtaagaggaaaaggtcg -agaACCCCGGACAATCCAGAGAGTACAGAAAAGGCCAGCTCTGAGTCTCCGGGGAAGTTT -TCTCCGAGTCCGTCCTCTACAATTTGCACTTTTTGACCCTTTTTGACTCTTTTTCTTCTT -GATTTGTTTCTTTCTGGAACTGATATGATTTGACAATAATTGAACTTGAACTTGTCCAGT -TTGAAATGCGTAATGATTGCAGCTGTAATAGCTGTTTGCCTTTGGGGATGTGTGGTAATA -CAATCCCTAGCAATCAAGCAGAAAACTCGTATAGATTTCAAATGAATTCACTTCTCTTAT -CTCTGCCAGTATCTATTTTATATCTTCAATTGGATTTCGCACGTGGCCACAGTGCGGCAA -GTGGCCAATTGATGATGCACGTCACTGTGGCTCAAGAGTCTACGGATATATTTTAGGGCA -CTTTCCCAGTAACGGTCAACTCCATAGGTAGGTGAATATTGTCCAAACACATACATGATC -TGCTTTGCCGTAAAGCTTGAGTTGGACTTCTGGCTACCTAGATATTCAATATTCATTCAC -CCAATCTGGTTGGGTTTAGGACCTGGATCATGTATTGGGTGAGAGATTTATGGATATTTT -CGGTACTATATAGATCAAAGACACATTTTTTGGTAATATTTCAAATCACGATTCCTGTCT -AGACGACCTTGGTTGCCGGTGTTTGCACGGGCTTAGCTTATCGGTAGATATGAACCATTC -TTTCTTCATCGGAAATATATATTTCTCCACTGTTACCTGTCAAACTAACGAAACACCACC -AATAACCGTCCAGGTTATTGTCTATTCAGACACACCCCCAGCCTAACTATGTTCGACCCC -GTTTACTAAGGTCCGACGAGTGATCTAGTCACCCCTGGTTTTAGCAGATGCCCTCACTCT -AGGCTTTACAGGCCAGAACCGGCTCTTACAACCCACGTGGAGAATTATAATGGAGAACCA -TCTTCTTGGCCAATCTTTAGAAAGGCAACCCTCATTAGCTGAAATGTGAATGTTTGAAAC -AATTCTTCTGGCTGATCATGTATGCGGGCCTGGCAACATTCCGGTGCGGTTTTGCGGGTT -GGACGGTGGGCACAATGGAGACATACTACATTGTTGGAGCGCTTGCCAATATGATTGGGA -GGATTGGACTCGGCAAATATAATAGGGTTGCTTCTATCTTGTACTCGGTGCTCCCAATGT -TTTGAAGTATGCGAGTAATCAAATAGAGTGGCAAGATATTTATGGCGGGTCATACCAATG -GGATGTATGGCTTATTGAATCCCACGGAACAGGTTCATACCGGAGCAAAAATCTGCAGAG -TTCAACTGATCGATCGATCCCAAAACAGTATTATTCAACGTATTATGTATATGATTAAGA -CTTGCCAGACACAAAACGAAGCGCATCATGACATACATCAAGATGGCATGGGTATCGAGA -AATGAGTGCAATACCTCCACCTCCAAAAGTACAGGAGCCCAATCATTGACAATAGGAACA -AAAGCAAGTAAAACGTATGTATCAGCCTCGCGGGCCAGCCATGATCATTATGTGCATTAA -GTGGCCTTCACGAAAGGCCATCCGTCCTTCCAACCGAGGACGTTCCACGCGTACCTGTAG -TTCGCGTTGTCGGAGACCGTGGTGCTCTCTTTGATGGCAAGTGGGTAGTAATGGTAGTAC -ATAACTACGCCGAGATCCTTGTCATGGACAACACCAGGACCGCCGGGTCCAAAGACCTGG -CCGTGGCTGGAGAGGAGCATAGTGCCGCCGGTTTCCTTGCAAGACTTTCCATCGGCATCA -ACCTGTAAGAGAAGTGTTAGCATTTGGAGTGAGCGAGGATATTGGAAAGCATACAAAATC -GCCAAGACCAGTCTTGGATCGGCAAACACGGATGTGGTACTCGGCACCCTTCGCTGGGAA -TGTCTTTGTGTATTTGCCCGCAATACCAGCAGAGTAGAGGAGATAGTAGTAATCGCCGTG -CTTGAACTCGTACGAGCCCTCCTCGCGGTGGTTTAGGGTAGCGTTCCAGGAGATTTGGTA -AGGAGTTGCGCCGCCGACTTTCAAGGGAGTCTCCATCTGGACCTGGTAGATATCGCCCCA -GAAGGAGCCAAAGTTCATGTAGTTCTTGTTGCCGATGCTAATCCAGGCGGGATCAATACG -ATTGTAGTTGTCGTTTGCAGGGAGACCAACGCTGCCGTGGTCTGTCCAGGTACCGGGTTC -CATGTTCTTGGATGTTGCGACACCAATCTGTGAGTTCTGGGTTCCAATCTTTGACAGGAC -ATAGTACATATAGTACTGGTCGTTTTGGAAGTGGACATCAGGGGCCTATCAGATGGCTCG -TGTTAGTTACTTTCAAACTTGATATTGTTTTTCTAAGCATCTTCGGGTGTTTCTTACCCA -GATATTCGTGTTGTCCACGCCATCCAGGGTAATTTTGGACCCATCTGGCAAAGCCGAGCC -AATGTCCGTCCATGGACCCTTCACCGATGGCGAGCTATGAGTTCTGACACCGGTGCCCGT -TGAAAAGCGGAAGTACTTCCCATCAGAGACACGCTGGATCATAGCAGGGTCATGGGCCCA -GCAGTCGCCTGTGCAAGGGCCCCGGTTGGGGTATGCACTGGCGAGCGCCGTAAATAGAAG -TGGTATGAGACCAAACATGATGTATAGACAAAAAGGGAGGGGATAGAGGGGATTGGGAAG -TAGTAAATTAAAAAAAAAAAAGATATGAACATTCTGGGGAGGAATCGCGTCCAATTTATG -TTGATTTGCCCTACACCACTCGCAGCTCTTACAAGGTCGGGCATTTAGCATAGAAACAAC -TCATCCAATCATTGGTGATTAGCCCCTGTATATCGTGTCTGCCCAGGTGGGCGATGTAAG -GCGTATGCCAGCAGTGAATCCGGACTAGCTGGAGTCGGCGTCGAAAGGGATGAGCTCATT -CATCCAAGTCCAAGGAGTCCTGCTCCTGTGGAGCCAGAAATCAAAAACAATTTTCTGCAT -AAAGGGTTACCTTAGTGTTAACATGGATTCCATGTGATGTGTCTTGCCAGGCGCAGTCAC -CAGTAACTTTACCTCATTTGATCTACACCTATGAGCTACACCGACCCTCAGATCCCGAAG -TCTATAAACTATAGCTCTCGGCACCAGTCTCGGCAACAGGAACATATAACATGCATACAT -GTGTTGTTCACTGCAAGTGTGTGTGGTCTCCACATTCTCACCAGCAAGGGAGCGAGCACA -TACCTGGAACTAGTGAATTCTGTTTCATGACCCCATATCAGGTTTTGCTTTGCAATCCTA -ATGCCCTTCTCTCAATTCATCATGACCAAGAAAACAGGATACATCATATCCTATTAAGTT -GCGTGGCTTGATTCTTGTTTACGCCGTGTGGATGATCTGCGGCCATGTGGACCACGGAGG -ATAGTAGGCCAATCATCCAAGAATCATGTGAGATACAAGTGCAAGTACATTTGAAAGTCA -ACGAGTGTACAAATCCGTCATTTGATCAACTCTCTTAAAAGTGGAGATAACTCGTATGAA -TCAGTTCAGGTGGGAGGTCATGCCCAATTTGACCGGAAAATAATGAAAGTACCCGATATA -CTATGTCATCCTATAAATGGCTAGATACCATTATTGAGCCTCCTTCAACCCCGCTTGGGG -CCAAATGCATTGAGATGCGTTAGCTTTGATCTCTGTATGGTTTCTCTCAAGTAATGTCAG -TGCCAAGAATGTCTAGTTGGGCTGAGTCTCGGTAGCGATCGGACTTACCGCTATATTGGA -GATCGGCGCCAAATCTAGTAGCGATCTGGGGTCGGACTGGGGTCATTAGGGGTGGCTTGT -GAGAGCTTTTGTTAGCCTACTAAATCGACGCACTGCCTACTCCGTACAGGGCTGAGATCT -TCTAATAATATCGATATCAGTCATGACGACAAGTAACAGGTGTGGGAGAAATGTTTCTCG -TTTCCGGCTTTCTGTCAGACCAGTAGCCCATGCACTTGCGACATAATGATACTAGCCAGC -ACGATATCTATGAACATTATCACCCTCTTATGTCTACCGAGGTTAACATACTCTCATGCA -TTGCTGTCAAGGTAATCCGGAAACTGCGTCAGATGTTCACATGTATGTGTCACGACGACA -TGTCATGTAATTTGATGCTTTTAGAACAGACCAAAACTGCACCAGCATGTGATTTAATAT -GTATCTATCAACTACCCACCTTCCGACTTCTACCCGAGGATTATACACATTCAATTCAAG -CCAGACTCTATGAAGTAATTATGCCCTTTGCGCCTATTTTATGTAGACACACTACTATAG -TATAGATACCCGATACGATGTTGTATATAGCTAGCAATAGAGTAAATCGTGTTCATTTGG -AAACTCTCACAGACACACTCGTTCATTCGATATTGCGAGGAACAACTGTAATCCCCCAAG -TGCTCAACATGATAGACAGCCATAGCTGCCTACCCACATATCATCTTTCGTCACCCTTTC -CTTGATCTCTCTCTGTTTCATTTTTTTCTTCTTCTCCCATTCTTCGGTTAAATCCATCCT -ACGAAATCGTACATGAAATGAACGCCACCACTTCCTTAGTTATGGGCAGGTTTAGAGACG -ATGAGCGGAACCAGGTCCCGGGCCTTGGTACTGTGAGTTCCAGCTTTGCTGCAAATCTGA -CTCGAGCTGATATTGAGGACCGACCAATTGATTCTGTGGCATGAAATAGTTGGGTTGCAT -TGCTGGAGGAATAGGGGGATAGCTCTGTGGCAAAGAGCCATGCATCTGATTTTCTGTAGT -CTCAAGGTCTTGCTTGCTTTCGAGAATTTGGCCGACTATTGTCTTTAATTCTTTGACTTC -TGTGAGTATGGCTCGAAGCAATTCAGTTTGCTCGCTGCCAGTGGTTTTGGGGGTAGAGTC -AAATGGGGGGAAACGGGTCGTGTTAAACGCATGGTAACGAGAGTAGGGCTCTGGAGTTAT -AAACTTGCGCCTCGGAGCCGAAGAAGCAGTATTGACAGGAGTCGATGGCCAGATTTTGCC -GAGATCACTGGCCTTTTTCTCCTCACTAGCCAGCCACAAACGAAAAGAATCGACAACACC -ATACTTCGCAGTCGGAACCGTCTCCCCGGTGAGAATTTGCCTCCATTTAGGAAGGACTTC -ATCCAAAACATCATCTGAGAGATATCGATTACCGTCATCGTCAGTGGCCAATGCGACATT -CCAGTAAGGAGATCGGACGGTCCAGGCAACGGAGCCGGTCACGTAAAAGACATCTTGATT -CTTCCTCATGGCGGATTTGTCGCCCCGAATCATCACATCCCGCATATCATAGTCGAATTG -GAGGGTCCTCTTGGGATCAGCAACAAGATAATTATCCGTACCATCTCCTTGAAATTTCCG -TGATGTCAGTATTGTATACCGCGGCGCAGAGGGAAAGTGAAGAGGGTCTTTACCATTTTC -TTGCTCACAAAGAGTTTCGAGTGATAGTTCTTGATCCTGCAGAGATTCCAACTTCTGCAA -CAACCTTTCGTCTGCTTTATTATCCATGAGAACACGGAGTTTTACCATCCACCTCTGTCT -ATTTGTCAGAGAGTCAGAGGGTTGGAGGCTCTCGATGTGTTCCAGGATAACAGGTGTGCC -GAATGGGAGCAAAAGGCGACATAATAGCTCTGGGCTACCTGAAGCAATTGGCTTATTCTC -CTTGACGTAGCTAAACCACCGAGGGAGACATGCAGCAAGTTCCGTTGCCCGGCAGATTTT -GCCTCCGGACAAGACTCGGGACAACAAATAGCTGACTGTGGCGTCCTCGCCTGCATTGAT -AGCAGGAGGCGACTTGATGTTGACCATGATGCTATGTTTCCCGAAAGGAAAGCGCCTGGA -GCAACGCGAGGGTCAAAAAAATGGAATCAAGACAATTAGAGTTAAGACGAACAGACCAGT -GAAAAGAGAAAATCTGCGAGAATTTACAAATAAGAGTGCTCGGTATTGAGAGACGTGTGG -CAAGACAGCCAGGAGACCTTGGAAGAGACAAAGTACCTCAAGAGACAAGTCGTCGCCGAA -AAAACTAGGAGAGGCCAGATAAAAAGCGAGTAAAGCTGTAGGAGCATGAATTGAATACGC -GACGACAAGAGCAGGAGACGCGAAGACAAGACCTGGCCTATCACTTATGAAGGGACAGCC -TGAGCAAATGAGTTGTCTAGTCAGGCTGTATCTGTGATGCAGAGGGGCTGGGGCTTGTAA -AAAGGATGGATGGAAGGAGAGCTTGGTGAGGAGTAAGGTGGAGGGGAATAGTATGGGAGA -AGAGTAGTCGAGGTAAACAAAAGAAAAACCGTACACGCCCACAAGTAGTTGCTATGTGAG -CAATCCCTACTACCTGGGCCTTGAGGTTATATCTTAGGATAGCCTTTCTCCATCTTGTTA -GATATTACCATGGGTCCAGGATTATTTCTATCAAATTTACTTGTAGAGACAGGTAAGATC -CAAGAGACAAACCAGCTTCAGAGTAAGTAGCAGCATCTCCCTTAGAACCACCGTCATCGG -TCTCTACTATATAAAGAAAGCCACCATTTTGGCCAGGCGAGAAGGTGATGCTATAAGGTC -GACCAAAGCCAACAATATCCAAAGCCATCAGCAATCTCACGCACATCCTTTGTGTGGGCG -GGTGCCCACTCATACTGACTTGATATTTGGATAGTGGAGTTGTTGGATGTTGTTGGATAT -TAAGAGCTTGCCTGTTGACCCGACGGAGACATTTGCAGCATGTTCAAATGGGTATAGATT -TCGACGAGGTCAGCTTCGATTTGAAGTGGATCAAGTGGTATTTAATAAATTCAAGCTCAG -CAGAACAAGTGATGTCAAAATTTCACTTGGAATCCAACCTCTGCGATATCAAGTGAGAAT -GGTTAAACCCGGTGAGATCTGACACGCACTGAAGAGATTGTTTAAACAAAAGCAATAGCT -CATTTGTGTGAAGTCGAATGTGAGAGAGAGATGTCGTCCTGATAAGTCGGGCAATACTCT -CCGAGTTTCGGCCGACCCCATTATCTCAAATAGTACTTTGGTATTCCGCATGATAAGGAT -AGAATCTACCTATGCTGTAGGTAAGTAAAGAGATGTAAATGCTTCCAATATAAGCCACGG -GACCAATCTCTGCCAAATTTGCCATTTGCACACACTCACAATATTAGGACTTGACTGCAA -GGATATTCATAATGCTCTTGGAATGGAAGTTGGCCCGTCAAAAATGGTTAGAGCCCGGAC -CTCTGATTTCACCTCACTCGGTAATATTGGAACCCATAGGATGTCAACGCCAAAGTCATG -TGCTCTATCCTGATCCGAGTCAAGGAAACGGAGAAATAGAAACAGCGTAAACAGATAGTG -TTTGATTGAGTAGCAAACGAGGTATTGGAGACCCAACGGCGCATAGGGAAGTTACAACGT -GATTGCCCCGATCCAGGGCCCAATGGTCCCAATGGCCCAAATGCCCAAGATGTGCCGAGT -GCTGTCAAACCCAATATGTTTAACCTTTTTGGGACGTGTTCGGCTAGCTGCCAGGGGCAA -AAAAAAAAGGGGGGAAGTCTGTTTGCTTTAAGATGTAGAAAAGGGTATGGAGTAGTATAT -CTTAGTATCGTTTATAGTGGATCTCATATAGCCGTTGAGAGATCCGAATATTAAACGGGG -TAGAAGAATCCAAGACTATTCTTATAATCTGAAATGGTATATTTTAGATTGCTAATTCAA -AGTAAACTTGTAAATTAGATGAAGTCTACATGTAAGATAGATAGTACACCTGTTAAACCC -CATGAATCCCCCCAGAAACCCCCAGCCACCCGACTCCTTTTCTCAAGACTCCACGTGCTA -AACTTCCTCTCTTCTCTTTTCCAATCTTCCATTCCATCCATCTTCGAAACACCTGGGACC -CGGTTATTCTCCGCGGCCGTAAATCACCTCAGGTCGTGCACCTGGTTGGCCCGAAAAAAA -AAACTACCAAACTGCATCTGACCCCTCCTCGGCCCAATTTCAATCTGTCTCGTGTAGAGA -CAGAAATCTGCCCACCATGAGTCAGGACAATTTCCCATTGAAATTCCGTCGAGCATCTAG -CAAACTTCACAAGGACCCCCCTAGCTTCAGTTCGCGCATTCTTCGGAGTCATCAGAGCAC -CACATCGCTGAAACGCACCCCCTCCGCTCCGGTCTATCCACGTTCCTCTCCCAGCGGCAG -TCGCGAGCATAATCGATCGCGATCCAACGCCCAATTTCCAGGTTCTTCCTCGTCATCCCT -GGATCAGAATAGTGGTCCATCGCCTTCCAACGACGAATCAAGTGGCTTCTTCTCCAATTT -CACCTCCCGCTCTCGCACCCGCAGTTCCAACCGTTTTTCCTACAACGAGCAGAGCTCGGA -TGAATTGAACGGCCCTTACGAAACGCGGGGCATGCTCAGTGCATTGGACGAGAACAGTGC -CGAGACCGATCCTCATTCCCAGCAAAAGCCTGAACTCCGCTCTCACCACACCAGCCCCGA -TACCCGCGGACGCCACTCACTTCGTCAGTCAGCTAGCTTCACCACTCTAACGCAACGAAT -GGATCCCTTCGCACACCGAGAGACTGAGAGGCCTCCGAATGCCGCCAAACGCTATTCCGA -CGATGCCAATCCTCCAGTCGCGCCCCGAACGAGACAGAGCAAAAAGGCCAGCTTCTCTAG -ATTTGTGGACAGTATGCTTGGAACCCCCGGTCGCAATATGAAAATTTCAGCTCCAGAAAA -TCCGGTCCATGTCACTCACGTCGGCTACGATAACCAGACCGGACAATTTACCGGTCTTCC -TAAAGAATGGCAGAGACTACTACAGGAGAATGGTATCTCGAAGAAGGAGCAGGAAGAGCA -TCCGCAGACTATGATGGATATCATGCGATTCTACGAAAAGAATACTCGTGGCGACAGTGA -TGATGAAGTTTGGCACAAGTTTGACAATGCGCAAGCTGCTCAAGCCGCACAAGCTACCAG -CCCACGAGAACAAACGTCGCCACCGGGAAGTCCACGATTCCCGCAAAATCACGAAAGCAG -CTTTGAGAATCCGCGATCGCCCCCACCGATTCCTCGCGGGCCTCCGGCAGCGTCGCCTGT -CATGTCTCCTCCACTGGGCGGCATGGTGCCCCATCGTGCTCCCCCGAAACCGCCAACCGG -AATGACGCCCTCCCGCCCTCCTCCTCAACCCCCGGTCTCAAATTCGCACGGAGCCCCGCA -GCGAGCTCCTCAGGAGACCTATGCTCCTACATTTGGCACTCCGACCATCCCGGAATCTGA -GCCATTGCCAGCATCGCCCCACCGCAGTCCTTCCAGCTCCCGAAATGGCACCCCTGCGCC -CAGCGTTCCCAATATTCCCAATGTCATCGCATCTCCAACCCAATACCAACGGCAGCAGGA -GCAGGTCATGGCCGCGGCACAGCAGACTCTGGAAAGGAACCGCACCCAACGCCAGCAAGC -GCAGCAGCCCCCGGCCATCTCCACATCGACCGCTGCTCCGGCACCGGATCTTTCGTCTGG -AGTGTCACCTACCACGCGCGCCCCACCCGCTGCTAGACCTAGACAGCGTCAGCAGCGACA -GAGCAGCATTATGGATATTCGGGCACGCTTGATGGCAATCTGCCACCCTGGCGATCCCAC -GCAGATCTACTACAATTTCAACAAAATCGGCCAAGGTGCTTCCGGTGGTGTGTACACAGC -GTATGAGCACCCTCACAACAACTGTGTTGCCATCAAGCAGATGAACCTAGACTTGCAACC -CAAGAAGGACTTGATTATCAACGAGATATTGGTCATGAAAGATAGCAAGCACAAGAACAT -CGTAAACTTCTTAGAGAGCTTCCTACATGGACTGGATCTTTGGGTTGTGATGGAATACAT -GGAAGGTGGCAGTCTGACAGACGTGGTTACCTTCAACATTATGAGCGAAGGACAGATTGC -AGCTGTTTGCCGAGAGGTGAGTACAGGAGAATTGGGCCCGTCTCAAGATCAGGCTATGAG -ATGCTAACCATTTTGAGTAGACTCTGAGCGGCCTCCAGCATTTGCATTCCAAGGGTGTCA -TTCATCGAGACATAAAATCCGACAATATTCTGCTGGCAATGGATGGTAACATCAAGCTGA -GTGAGTTTCACCTTGTCCTGTGTTGAACTGCGCTAACCATTTGTTTTTAGCCGATTTCGG -TTTCTGTGCTCAAATCAACGACTCGCAGAACAAGCGGAACACCATGGTTGGTACCCCGTA -CTGGATGGCCCCCGAAGTTGTGACTCGCAAAGAGTATGGCCGCAAAGTCGATATCTGGAG -TTTAGGTATCATGGCCATTGAGATGATCGAGGGCGAGCCGCCATACCTGACAGAGTCTCC -ATTGCGAGCACTCTACCTGATCGCTACGAACGGAACACCCACCATCAAAGATGAACACAA -TCTATCCCCCGTCTTCCGCGAGTTCCTTTACTTTGCCCTCAAGGTTGACCCAGAGAAGCG -GGCGTCGGCGCATGACCTACTGAAGGTAATGACTCGTTTCCATTTCCCATTACTTGCAGC -GGTAACTGACTTCTTTCCCTTTAGCACCCATTTATGTCTCTCTGCGCACCACTATCGCAC -CTTGCCCCCTTGGTTAAGGCTGCTCGTCTCAGCCGAGCCCAAGAAAAAGCCCAAAAAGGA -GGTGCTTGAAAACGATGCTGATGAATGACCACTTTTTCCCCCTATGTTTCGGTTTTATTT -CGTCGTACTTGATATCTCTTGACGCCTTGCTCAGATACCCCGTCACCCCTGATGTTTCTT -CTAGATCCTGACGTGCACTACAGTACGTTGTTGGAATCCGTTTATGTACCCAGTCTATCC -GTTTACTTTTTTACATCAATTCTTGATGGAAATGACCTTTTCATGAGCACGTTGCCTGTG -AAATCTGCTTTGCCGCTCGCTGCCTCTGTGTTGATGCTTTCTATGTAATTTTTTTGTAGT -TCTATTGGTCAATCTTGTTATTGTTAGCTTATGATGAATACAAGCAACTAATGAGCTTCC -AACTCGGCTCCTTCTCATAAAGACTTTTCCCAGTCCCTATAGAATATATGCCCAGGTGTA -CTAAAGAAAAATAGCGAAGTGGAGGTTTGTTCAGCCACCGACCAGTAAATGTGGGAAGAG -GAAACGTAGCAGCCTACATGAGGAGAGCGAAGTCCTAGAGTGAACCGagaaaaaataaag -aagaacgaaagaaaataagagaagCTCAGTCCCCAGGCAGACATAATATAGAAAAATTTA -GTCAAAAGATACGGAGTACAATCCAAGCTAGTAGCAGTCTCCCAACCAGGCTGCACCGTC -GTCATATGCTGACTCAGGTGACGAGTTTCGCCCCGTACACAGTACCCCAACACTGTCCCT -GATCATTCCCAAGGACCTGATTAACAGATACCCGTCACTAAAACACCAGTTGCTTGCTAC -ACCATTCCTTTCCAAGGCCCTGTCTCTGTGACGGTCACACCGCCCGCAACAACCTGCCCA -GCCGCATCCTTTGCTGTCGTCGACTCCCCGCATTCGGTGCTGGAGAACCATCGCTACGAG -TGACACGACCGGATTTGAAAAACAGTGTGGTTTTTGATACCCAAGTTGAATTGGAATTCA -TCGTCGGACTGTGGCGACGTGAAAACAAGGCCAGCCGCTTGTCAGAAGCTGAACAGTGCT -GCCCGACACACTATCTAGTGATCACCTGCGACGGCTAGAGGACATACATTCGCGATGTTT -CGCGCGCAGCAGAACCACTTCGATGATGCCGTTGGTAAGTCACATTAGTCCTCTGCCTCT -TCGTATACACCCTGTACCCCTGCAGCTGGCTAGCTTGGAGGGAAGCTCTTAGGAGCACAT -TGAATCTGCTTATACAGCTTACTGACGATATCTATTTCTGTATAGCCAAGGCGACGGACG -AGAATTTGACCTCCGAGAACTGGGAATATATTCTGGTGCGTTGTGCATGGATACGGAATT -GCTAGAATAACCGTGGCCTTGGGACATGAGAACTGATTCGACTTTCGTGGTTTAGGATGT -CTGTGACAAAGTCGGTTCAGAGGAGTCAGGGTATGTGTCTAGATCTTACTTGCAGGCTCG -CCCAAGCTCACACAAAGATTATTACTGATTGCGATTTGCCGCGATGTAGTGCGAAAGAGG -CCGTGGCTGCTATGATCAAAAGGCTGGCTCACCGAAATGCCAACGTCCAGCTCTATACAT -TGGAGGTTCGTCACCAGATCCCGTGCTGTTGAAAGCGCAAAAATCTCCCGGTCTATGACA -ATAGCTGATTCTGATTTCTAGCTTGCAAACTCGTTGTCACAAAACTGCGGGCTCAAGATC -CACCGTGAGTTGGCTTCGAGAAGCTTTACTGATGCGCTACTACGCCTGGCAAATGATCGT -GTAAGTGTATCTGGGACAGTAAGACATTTTCAAAATAGCAAATACTAATCATGGATTTGT -TTAGAATACGCACCAACAAGTCAAGTCCAAGATTCTGGAGCGCATGGAAGAATGGACCGC -GATGTTCGCATCTAACCCTGATTTCGGAATCATGGAACAGGCCTACATGAAATTAAAAAC -TACCAGTGAGTATCTTTTAAACGCCCGAAATATTGAACCTCCGCTGATTGTACTCAGACC -CGAATCTGCAGCCACCGTCTAAGCCTGGAAAGCGTGAGATCACCGATGTTGATCGTCAGA -TGGAGGAAGAGGAATTGCAGATGGCTTTAGCCCTCTCGATTAAGGATAAAACGACCCCGG -GggccgcggccgcggcagcgcctcgagcagaggcttcgtctgctgctgctgcATCTGTGT -CGGCCCCCCAAAATCAAGGAGAGCCCGCTGAGCCTCAGCCTGCTCTCTTAGGTACATCGG -TAGCTACCGTGTCGCGTGTGAGAGCGTTGTTCGACTTCCAGCCATCTGAGCCAGGAGAGC -TTCAGTTCCGCAAAGGCGATACCATTGCAGTCTTGGAATCGGTTTACAAGGATTGGTGGA -AGGGCTCCTTGAGAGGCCAGACAGGCATTTTCCCTCTCAATTATGTGGAGAAACTCCCCG -ATCCCACTGTCGATGAGCTGCAGCGTGAGGCTCAAATGGAGGCGGACGTCTTTGGCCAGA -TCAAGAGTGTCGAGAAGCTCCTGACTCTACTGAGCACCCGCAACTCAGACCTCAATGTCC -AAGAAAATGAAGAGATTACTACACTCTATCAAGCTACACTTGCGATTCGTCCCAAACTCA -TCGAACTGATTGGAAAATATTCGCAGAAGAAAGGTACCCAGCTGTTTTTTTTCTTGTTTC -GAGTCTTGAGGCCCCGCTTACACATTCCAGATGAATTTACCCAGCTCAATGAAAAGTTCA -TCAAAGCTCGTCGCGACTATGAGTCCCTTCTTGAGGCATCGCTAGCTCACCCGGCACAGC -CTCAATATGGACGACCTGCCCAGCCACAGCATGGTTATCCTGGCTCCTCGCCTGCCGGGT -ACCCCTCTGCTCCCCAGGCGGATCCTAGATACTTTACTCCCCGGCCTCAAGGTTAGTGGG -GTTTACAAATTACCCACCGACTCCTGGTCCTAATTTGACTTGCCTAGACACCACGCCAAC -CCAGGCGAATGCATACGCACCATACCCCGTTAGTGAGCAGACTATGCCATACCGTCCTGC -CTCTCACTCCCCTGATCCTCGCAATCAGGCTCAGGCCGGCGTACCGCATCAATCACCGCA -CCCCGATCCTTACCAACCTGTCAACCACCGCCCGCAATCTACATACGATCATCCCCAAGA -ATTAGGGACCTCCGTATATGATTCACCGGTCGATCACCCTGCTCCTGCTTCTGGCCAGCG -TGTCCCGTACCCTCCAACTGCCCAAGCCTCCCCTGTTGGCcaccagcaattccaacagca -acagcaacagcagcagcagcaAGACTACTCGCCCTCGGTATACTCTGCCGATGATACATC -CCAGATCCCGCCTGCCTCAACTATGCCTCAAATCCAGCACCAGTTCCAGCAACAACAGCA -ACAACCCCCGTACCCCAACTCTCCCGGTGCCCATCAACCCCCTCCATCGCATCAGCCTCC -GCCGGTACCAGGGGCAGCGCAGCAGCCACAGTATACTCCATACAGCCCGGCACCACCGGC -CGCAAGTACGGGCGAGTACCAGGCGTATCAGCCTCCACAGGGCGGCGCAGCTGGCTCAAA -TCCAACTTCCTATTACCGGTAATTGATAGTTAGTCATGACTGCCCTCGTTCGATTAATAG -GAATAAATCCCTTGACTGTCTTTGATGCTGGCTTTGTATTGTGGCTTTGCAGGCAGTATT -CTTCGTTTCGTTATCCCTCCTTTGCTGGATGTCTCTGTGTCATGTATCTAGCCAACAGGT -TCAATATGTCGTCAATTGTATGATAGTACATATGCGATTTTGATCTTTGTCTCCGTATGC -CAGTGCCATAATCTTATGATAATGTCCTTTCTAGACATGAACCTACGTCTATTGATTGGC -CTTTGGTGCTGATCGAATGCCCAAGCAATGACCGGGGTCCCCATGAGCAGTCCATCGCAC -ACCAATCAAGTTTAGCATTTTCGTGAAAGCAAGGTGGGGATGAATAAGAAGCAACCAAGA -AGAGGAGGCAAGATGAGGTAGTGTGAAAAAGACGTTAGCACTACTTTCACCAGGATTATC -CTTTCCATTCTGGTCATCCTTTCTTTCCTCGACCATGCTTCACGGCCTCATGGTCTTCCC -AACAATCGACTTTGAAAACCACTGATTCCTGCCGCATTTTGAGTACTCATAGATTCAATA -TGTTATATGGGCTGATGTTCACACGCGATGCTCATTATCTGTGTACAAACTGGTGAATTC -AAAAACAGGCGAGTTGTTACGAATGAAAAGAGGGTTGGTGAAGAGGGTTGGTGACATTTT -GCGATAGCTAGTTGATCACAGGTGAGCAAATAAAGGGCCAATAGCAATTGAGCAAAGCAT -ACATGACGTTTTGATCTCTAGACTCTCTAGATATCGAGTACCTTGAGTAACTGGACATCT -TCAATCCACCGAGGATTTCTATACCAAGCAACCAAGTCCACAAATGTACAAAGTGTTGAC -GGAACACAATGTATTGTCTCAACTCTCAGCGGCCTTGCTAGCAGAAGCTGAAGTCTTAGA -GTTGCTGTCCCAGCACCCACACCCGAACATCATCCGCTATCACGGCTGTCGGGTTCGCCG -TGGGTACTTTATTGGACTTGTAGTCGACAAACATCCCTATGATCTGTACACTTATCTCAA -AAACCAAGTTGGGGAAATCGAGAAACGGTCTTTCATTACTGCGCTCGAGTCCTCTCTCCG -TCATCTTCATACATATGGATTGGCCCACAACGACCTGATTCCCAACAACATTCTGGTGAG -CAAAGAGGGCATGCCAGTTTTGATTGGTTTTGATGGATGCCGGCCTATTGGAACCTACTT -GAAAGCATATTAGAGGGACTTACGGGTGGATAGATGGAGAAATCAAGGACCACAATACAT -CGAGAAAAGAGCATGATATTTCTGCTTTAGCCAAAATCAGTGCGTCGTTGGACAAGCCAG -TGTTTGACCTATAATTCATTTTTGCATACATAGAGCTGGACATAGATTCTTTAATTGTAC -GGCTGGCAGTATATCGACAATTATTTATTGGGTCTATAACGCGAAGCTACTCGCTTCTTC -ATGaaaaaaaaaaaaaaaaacagaaaagaaaagaaaaaaCCAACCGCCTTCTGCCGTCAA -CTTATTTCGATCCAAAAAACCAGTCGGACACAATGACTGCAATTACCGTCACCTAATATG -AATTAGCAAATATCTCTCATTCAAGGCCAATGCCTCGACTTACCAGCGAAATATCCACAC -CAGGACTCACTATGAGATCCAAAGCAGGGCGATGGCCTCCTCCGCGCTTCGTCCTTCTCA -AAGCTAATCTTTTGTTGTGGGTGATACTCTCGCGCAATTCAGCAATCCTCCTGTCCACAT -TTACAACATCAAAAAAGGCTAACGCTTTCCCGTGTTTTTTTACCATTAGCGTGAGCTCCT -TGTCCGTCTCGCTGTTTCTGTCCTCGGCCGCGCCATTTCGAAAGGAGAAGACCAAGTTGA -TGCTATTCCAAGCCCACTTAGGTGTCGCTTTCGCTATTGTCACCTCGCTGGTTTTGCTCC -CGGGCAAGGTAACCACCCAACCTAGAGCACTTGACAAGGGTCTGGTGTGGATATCAAATA -ACGGTAACCCGGACGAGTCGCGGAGTTCACGACACGAGCGATCATTGTATTTTCGACCAG -TCGCCGTGAATGCTAAATAGCCCTCTTCATTTAAGATCTTATACTCACGATCCGACTCGC -TATTGCCGTGAGGCTTGAATAGTAGCGTTGTTTTTTCCGTCGCGATATGTTCTTGCCGAA -AGGCGACTTGTCGCTCCGGTGCTTTCAAGGCATATCGTGGCGCAGAGCTTTTGGGTGTGA -TCCCACGGTCCATTCGCGCATGGGTGTATGTGGTTACAGACATGATTTTGATCGAGTGAT -AGGTTTATATGGTATTTTCTAGTGGGATCTTGTGGGCTTTTGTCTCTGGCTTGTCGTGGC -AGGGCGTTGGCTGTGCACTGGGGCGATGCACTTACGGCTTTGACGTCGAGTTATAATTCG -CGTAAACTAAATTATCGATCTCTTAAATCATATTAATCGAGATTATTACAGCTCGGGATG -TCAATGAGGCTCAGCTCTGAGGCGAACTATAGAAAGGAAAGTCTCCCGAGGTCGATGATC -GACAAAACATTGATAGACGAGTTGATCTTATCCCCTGCAAAGTATTATTCCGTAGGTTGC -CTGAACAGATAAGTAAGGTGGAATCTCCCGGAACGGTAGGTGAACTTGAAATATGTTGTA -CTTCAGACTCGCTTCTTAGTCTTTTGCGAGTTCAATGTTCCAAGGCCGTGCAAGGTGAAT -GAATATCAATAGTAACAAATTCAACCAGATCTGCAAATCTGAATGCATGCCAACCTTTTT -CATTCAATTTTATACAAGAAATTACTTCTACTTCTATTATGTAGAATTTTGGATGGAAAG -ATTAATATGGTAATTGATCTGTCGGCCGCATATCCCGAGGATTTCGGATGAGACCCGACA -AAGAACCAAATGGCTAGTTTGCTCAATCTGTACATTATCTTTCTTCTTTGTCTGTGACTA -CTCTGATCAAGATGTCTGTCCATCCTATCCATATTGCTGTCCTTGACACCGATATTCCTT -GCTACCCTGTCTACGCCAAACGAGGATTGTACAGCTCACAATTCAATGTTCTTCTCACTG -CCGCCGCAGATCGCATCAATGCAAGCAAATATCACCAGCATAGAGCATCTCTAGATGTTA -AAATCACTGCCTTTGACGTTGTCGGGGGATCATTCCCACACTTCGAGTCATTACGAGTCA -CGCCTTGGTCGCCGACAGAAAATCGACCCCCTGGCTTCCCAGGTCCTGTCGATGCGATTC -TCGTCACCGGAGCAGCAGCGGCAGTATATGACGATCTCCACTGGATTACAGGTTTGCGAT -CTTTTATCAAGCGGGTGTATGCAGACTATCCGATGGTCAAGATTTTCGGATCTTGCTTTG -GGCATCAACTTATAGCTCAAGTCCTCCTCGCAAGCGACAAGAGCTACACGTCCCATGGAT -CATCATTCAAGGTATCTGTCGAGCCTTCGTCCAATGGTCATGAAATTGGCATGCATCCTA -TCACCTTGAACCCAGCTTTCGTCTCGAGTTTCCCACCTCTAGCTCGGTTTACTCCAGAAC -AGCCATTCTATATTCAGGTCATCCATGGCGACGCTGTGATATCTTCGCCAGAAGGCACAA -TAGCTCCTGGGGGACATGCAACGATGTTGCCTGAACCGTGGCTCAGTATCGGAAGCAGCC -TTAACTGTCCCATTCAGGGCCTCTACAAGCCTGGCCACGTTCTCACGTTACAAGGTCATT -TTGAGTTCGATGCGTTTGCCACTGCTGAGCTCTGTCATAAATTTGCCAATCAATTTGACT -GGCCTGCAGATCTGCTTGCCTCGCATCTCAAAAACATCAGGCGCTCTGTGGTGCTTGGTA -AAGATCATGATGATTCGAGGGTAGCAGCGGATGCGGTCCTTTTATTTTTCGCCGGGGAAG -ACTTGATCCTGAGCTCAGATATCAATTCAACCTCATAGTGAGGTCCGTTGAGATTGTTCT -GATTATGGATTGCGGGGCCATCGGAGGTCCAGGCCGAAGTCCCCGTAGTCTCGGGTGAAC -ACCCGATCCATACATTTTCACACTTCTGACTTGACTGTGACTACAAAACCAATATCAATA -TTAGCTGTACGAAGCTCTTTACATTTATGGCGGGCTCCAACCCACGATCTACTAACGCGT -AAGTGTGCAATACCTCCCTTCTCCGGCTTGGAGCTATGTATGATAACAACTGAATAGATG -TGAAACATGTCGAAGACGCAAAGTAAAATGTTCCGGTGATAAGCCTTGTCGCGCTTGTAT -CAAACACCATTGGGAATGCACTTTTGGACACTCTGGTCGCAGACGGTTTTCTGAAGCGTC -AGTTCATGCTTGTTATAACCTCCGGCCAGTCTTGCTGACTGTCAATCAGCCAAGTGAAAC -ATTTATTGGAAAAGATTCAAGTCTACGAAGAGCAGCTCAGTTCCCAGTCTGCAGGGGCCC -GGGGCTCTTCATTGCCTTCCCCTGTAGCACCGGATGAAACTTCAGGCCGTCAACCCGCGC -AACAAGTTTCTTCGACAACTGGGAATAAAGAGCGGCCCAACCAGAGGCAAGATGAGCTAC -TATATCCTGACAGTGATCCTGCTATAAGCCCTGGTAACGTGATATTGCTTTTTTCTTCCT -GGTTTGCTTGCGAAAATAAACTGACTCTACGTATAGCAACGGACCTGACCTCGGGGCCTG -CATTCGAGTCTCAGGTCAGGTCGCTTCTTGACAGGTCACATTCTAGTGACTACAACTTCC -ATGAATCTACATCAAGTTTTCAATATCAGACTGATCGATTACCTCAGTGGACATCGGTCA -AAGAGCTCGTGGACAATGTTGCCGGAGTATCTTTCCCATCTTTGGAGGAATCACAGCACT -TGCTTGATCAATTCCTGTTTTATCTTGGCGTGAGCCAGCACTTTTTTGATCCACGATCTT -TCTCCGACGATCTTATGTTGCTATTTCAGAGCCCAGTGACAAGGCAACGACAGATAAATT -CTCCTTGGTTTACAGAATATCTTTTGGTCATGGCCATGGCAAAACTCATGGATGTAAAAC -ATCCGACATCCCACACACCTGGAGCGGACTTATTCGCTGAAGCTTTGAAGCGTCTTCCTC -CCATGCATCATATGGGTGGGGAAGGTGTTGTCGCGGTCGAGATCCTGACACTGATTGCAA -CATATCTGCAGTGGTGTGACCGTAAACATGATGCATATCTCTATGTATGTTATGCAAAGT -CTGCAATGTTCTTGCACGGTATCGTTGACATGTGGTAGATTGGACTCGCACTGCGACTAG -CTATTGCTCTTGGGTGCAACCTACGGGAAATCGACCAACGCTGTCTTCCTTCCCAGAGTG -CCCACAGATTAAGACTGTGGTGGACAGTATACATGCTTGACAGGTATGTCTTTTGCAAAT -TATTTGTTGCAAATTTAGGTATTTGAGTCTCAAGTTGCACTTTTAGGCGCCTCTCCTCGG -GCCTTGGACTGGCCGCAGGGGCCGATGAACGACAACTACAAACTGAACTCCCGCGGAACG -CCATGGGGTTCCAGTCACCAATTGCCCTTGCCATAAATGTTCGTATTGCACGTGTCACCG -ATGATATAATGTCAAGTAAGCTACACGCCGTTGAGCTCTTGACATCGTGCTAATGCACCC -ACAACAGGTCTTTATGGGAATAAGTCGATCACGCAACTGAAATTGGTCCAAAAAATCCAA -CAAATTCTCCAAGAGCTGCATGACACAGGTCGATCTTTTCCCAAATCCTTAATGTTGGAT -TTCAATCGACCTTTAAAGTTGGTGACTCGCACAGGTGCGTCATTATACCTAATGCTTTTT -CAGGTGAGGCTGCCTCCAAATAGAACGCATTGTTAGATTTGCGTCTGATTCAGCATAAGG -CTATTATCCTATGTACACGGCCAATACTACTCCAAAGAGCCCGATTTGAAGCCCAGAGCC -AACAACAGCCGCAGTCACCCAATCCAGCCCCGAGTATGCTGCTGCGTCTCTGTGATACAT -GTGAAGAGGCAGCCACCAGAAGTCTCGCAATTCTTGAATCGCTCCGTCAACAGCAAACTA -TCCGTGAGTCTTTCTTCTAGTGTTTCAGTCAAAGAACCCTGGTCCCACGCAATGTTTGCG -TTTTTGAAAAACCAACAGCTAATACATGACCCCAAGCCCGGTATGGTTTTTTTGATCTGG -ACGCAACGTTCTCCGCGGCATTTGTTTTGGTCATGGTAGGCTTCCTGAACAAATCACAAA -GCCAGCCACCCCCAGCGCTTGATCAAGCATCAAAAGTACTTCAGTTTCTATCCCATTCCG -GCAATTTGGCTGCCGAGCGACGCATACAGGATATTGCACAATCTTATTCCCACGTTTGGC -CGAATCATATTTTCAATGCAAATACTCCAAGCAGCGATGCAGGCTCCCATGATAAGCCCC -GGACACTTGAAACAAGTCTAGCCCCGAGACAAGATGTGTTGACCTCCTCCGATTCTCCCC -CATATATGGCTGTGGCCTCAAGCACTCGCGGTGACCATCAAGACGAGATCAGATTACTTG -AGCCATGGTCTAACATGGATGTACCAGACGCAATGTTTGATATGCAGGGGGATTGGGATC -TAGATCTTTCCGGAGATGCAGAAGGGATTTATTCAAGTTTTCACAATCCGACATTGCCAT -TGACCGGAGTTGACTATATCGACTGGTTGGAGATCGAGAAAGTATTTAACGGTCCATAAT -CCTTTTAAAGCTAGTCACAAAATCCCAGAAGAACAACATCTAAGGAAATCTAACGAAATG -TATCTCAAAAGACATTGAAAAGACTGCCAAACAGGGTGCCAACAAAGGGTCGAAATCACT -ATAGGAGATCTAAAGTGGCGGCAATAGACTGCCGTACGCTAGAGATTAGATCTCAAAGCC -ACGCCCACTTTTATAAGATTTTTTTCCCCTTGTTTTTCATAATCAATTACGCTCCAAGGA -TAGAAACGCTCTCGGTGATCTTCACTAAAGTTCGAGATTGCATTGGTCCCCATAGACTTG -GCGATCGAACGTTGTCTGCCATCACGTGATCGAGCCAATATCTCACCGTTCCTGTTCCTC -GCCGGCAGCATGCACTGCAGTCGGAGCCCGACCGTTCCTTATTTCGTCGCGTCAGGCATC -TCGAGCCGAGCTTCGAATCTCACGTTTGGTGCCATCTCGACTCCAAATCCCTCACCGTGT -CAAAATCCCACCACCAAATCGTCTGATAGCTTCCCCGCAATCGCGTCCCTCGCGCCGAAC -GATGCCCACACTCGCTGTGTTCCCACGTGGGTGAAGACAGCAGTTTACACAAGGCCCTAT -CAGGCCTGCACATTGAAAAAGAAAAAAAGCGAGAACGCACACGATAATCTCGACATTGAA -GTTGGACCACAGAACCTACGACCAACATGGAGGCCGACTGGGATGAGCTGTCGCGTATCC -CAATGCCGCCGCCGAGTCCGCATGCGATGCCCACTGTGGCAACGGCGACGGCATTTGATG -ATATGATGGAGCTTCTATGGACAGGCAATGAATATGTGAGTACTGGCCTCGGCTGGAGGA -TTCCCAAGGCGGGATGCTGATGAGTGTGTTTAATTCGCCAGGGTAGGGTTTCCTCCTTTT -ATGGGCCGGAGCTACAACGTTATACCTCTGTTCGAGCGCATCCAGTATCCGAGGGCGCAG -TACGGCAAATAATATTCCACGAAAGAGGTGTGATTTCGCTATCATCAAAGAGTGTGCACA -TGATTACCCGACGGGGTTTGACACAGTGGCATCTTACTCACGATGAAATGGTCGATCTCC -GCTGCATGAGCTTCACCGCTCAGACCAATCGAATCCTCGTTGCAGGTTCTCAGCGCGTTA -TGTTCACTGTTGATATTGACAAGGGCACAATTGTCGAGAAGCTTCCCACCGAGCATGGCT -ACACAATGATGAAAAAGAGTCGATATCTCTGTGCTGCAACAGATACCGGATCAGTTAATG -CACTTAGTCTCTTGGACTTCAGTGTTGTCAAGTCATGGAAAGCTCACGGAACAGGTGTCA -ATGACATGGACGCGCGGAATGACCTCCTGGTCACCTGTGGCTTTTCAGTCCGCCACCTTG -GATCCCCAATCGTGGATCCTCTAGCCAACGTTTATGACTTAAAAACATTGACACCACTGC -CTCCAATTCCCTTCCATGCTGGAGCAGCATACGTGCGTATGCATCCCAAACTTCACACCA -CCAGCTTCGTGGCTTCACAAACAGGCCAACTTCAAGTGATAGACCTCATGAATCCCAACT -CGGTCAACTTGAGACAAGCCAATGTGTCACTTGTGCTTGGTATTGACCTATCACCATCAG -GAGAAGCATTGGTCATTAACGATGCTGAATGCTCCCTTCACCTATGGGGCTCACCGACCA -AGGTTCATTTCAATGAATTGAGCAAGGAGACAGAATTTGCCGACGTCCCCACGCGATCTT -CCCAGATAGATTGGTCTCCCGAGTCTCCGTTAAACATGGTTGGGATGCCTTATTACCACG -AACGTCTCTTTTCGGCGTGGCCAAGTCATCTGCTTTTCGAGGTTGGCAGTCCACCGGCTC -CATTGGATCAGTCTATGTTACCATATTTACGCCCTGCAGAGATGGGTCACCATGCTCCAA -ACCCGAGAAAGACTCGTCGATACCAAATAGAAAACACTCGCGCACTGACCACGGCCGAGC -CGGCTCTAATTGCACCAAAGTTTTTGAGTGAAAAGGCTCGAGATTATAGCAATTCAGACG -GCTTGGTTGGCGATGCTGCAGAGGCACTGGCAGGGGCGAAGATCAACGGAGAGAGCGATG -ATGACCCACTACTCAAATATAGCAACGTGGAGATTAAATACAGTCGATTCGGTGTAGATG -ACTTCGATTTCCGGTACGTCCTTCTCTTCAAATCCATTTGCTGCCCTTCACTCACGATCT -CTATAGATTCTACAACAAGACCTCATTCTCCGGGCTTGAGACTCACATCGCCAACTCTTT -TACCAATTCCCTTCTCCAACTTTTCAAATTCATCCCTCTTTTCCGAAATCTCGCTCTGAA -TCATGCAGCCGGATCGTGCATATTTGAGCACTGTCTTTTGTGTGAACTGGGCTATTTGTT -CGACATGCTTGAAAAGGCAAGCGGTCAAAATTGTCAGGCCACCAACCTTCTCAAGACATT -TAGCAGCTTCCGAGAAGCCTCAAATCTGGGTCTCTTAGAAGAGAATCTTACCAATAAATC -TCTGTCGACTGCGATCCAAGCAGTCAATCGTTTCTTCCTCACCCAAATAGCCCAAGACTT -CCGGGTGATTCAGCCTAGTTCTGAGGAGCTCGATCAGCGCTTAGCCACCATTGCGTCTGA -GTCTATTCGATGCATGTTCTGCCAGAACGAAATTGTGCGCCCTGGCAACTCACTCGCCAA -TGAGTTGATTTACCCAAATATCGACATCAAACATGCGCGACGCAATCCCCTCTTCCGGTT -CTCCAATATTTTGCGCGCTAGTATTGAACGAGAGACACAAAATCGAGGATGGTGCAATTA -TTGTCGTCGCTACCAGCAAGTGACGATTCGCAAAACCATTCATCGCATGCCTTTGATTCT -CATGCTGAATGCCGCCCTGACCAACCCCCTTTGCCGTCGGCTTTGGGCAATCCCAGGGTG -GTTGCCCGACGCCGTTGGCGTTCTTGTTGATGCTAGTGGACAGGTGATGTGCTTTGAGGG -TGATGATCTTCGCCTCCGAATCCAAAATCAAACACCAGGGCTCGCGGTATATGATCTGGT -TGGATTGGTTGCGGAGATTGACATCCCGGAACATCAAAAGCCTCATCTAGTATCTTTCGT -TAATGTCTCTATTTCAGGTACTCAGTCCGAGGAACAAGCCAAATGGCACCTGTTCAATGA -CTTTTTGGTCACGGAGGTCGACAGAGACGAAGCACTCCGATTCACACAGCCTTGGAAACA -ACCTTGCGTGCTAGCTTACCAAGTGCGAGACCCGCGCCATGTCGTTGATGACTCTTGGAA -GAACCTTCTGGATAAAACCCTACTGTTCCGCGAGTGGTCTTTGAAGTAAGTTTTTAAATT -CAACTTTGAGGTTCTGAAAAGAAACACTAACGATCGACTTGTAGTGGCGGTCGTCAGGTT -GAGTCATGTCAAACCCTCACCGAGGAGGAACAACCACAGCCTGGGACACCTGTCGCACTG -GACACCGAGTTTGTTGATCTCGAAAAAGCCGAGATTGATGTCAAGGCAGACGGATCACAA -GAAATGGTACGCCCCAACAAGAGCGGGTTGGCCCGTGTCTCCGTCTTGCGTGGTGTGGGT -GTTCAAGAAGGAGTTCCCTTCATTGATGACTATATCACCATCCGTGAACCCATTGTGGAC -TATGTGACCCAGTATTCCGGCATCAAGCCTGGCGACCTGGATCCGCGCACTAGTGAACAC -AATCTTGTCCCACTCAAGGTTGCTTACAAGAAGCTTTGGCTGCTTCTGAACCTCGGGTGT -GTATTTGTTGGTCACGGTCTAGCCTCCGATTTCCGTAAAATCAACATTCAAGTGCCCAAG -GCACAGACTGTGGACACGCAGTATCTCTTCTTCCACCCTGGGAAGAATCGCCGTCTCAGT -CTCAGGTACCTGGCCTGGGCTGTCTTCAAGGAGCATATCCAAGAGGAGCCCACACCCGAC -GCCGTACAAGGTCATGATTCAATCGAGGATGCTCGCATGGCTTTGCGTCTCTGGAAAAAA -TTCCAGGAATACGAAGATGCAGGAATTGTCTCGCAAATGCTCGAAGAGATCTTCCGCGAA -GGCTCTAAACTAGGATTCCGCCCACCTCCTAAAAATGGAGGCACCATCACTGTTCTCTCC -CGACCTGGCACTGCTGTCACCATGCAGAATGACAGCGGTCGCAACACTCCTAGTACCCCG -GACACACGGACCGCGGTGCCAATCGCCCCAACATCGGCCGCTCCCATCGCTGGAACACCC -AGTGCCCCCACCACCCCCCGTCAAGCCTTCAGGCGATCGATTGCGTTGACCCCGAGTAAT -GGGAGCTTTTCTGGACCTGGCGCCGGTGATTTCTTTGGAGGTAGCCCACTTCGATAGATG -AATATTATTACCCTTTTTACACTTGGCTCGTTTGGCTCTATTGGCATCATGTGTGTCGCA -ATAACGTATTATGCGTACTACCATTTCAACACCGATAAAAGTGGCAAACTCGACTTTTGG -ATAAACATACCAATTGTGTGGTATTTGATCTAGATCGTATATCTTTTTTTCAACTGAATT -ATCAATATTTACCGTATAAAGAGTCCCTGACAAAGGGAAAGGATGAGCCTCGGAGTCTTT -GGCCTAAAAGActcatgtctctctcttcttttctcttccctGCCTGTTGTCGCGAGCAAT -CATCGCCAACTCTCCACTCAATGATTCCCTCGTTACCATCGCAAGGCTCGCTCCTGGCAA -TTCGCTTCTACCTGATGACACACATATTTGAGGGAGCTCCGATGATAACGGTACTGATTA -CTATGGCACGGATCTCACAGAGCCTGATGAAAAAAATGATCAGCTAACGAATGCCGATGT -CGCGTGGCTCTTTGCAGACAACGAACATTTACGAGAATATTATCAGCAACAACTTGAACT -CTCTGAATAAAGGGCTCGTTTAGACCGGATCATAGCAAACGCACTGTATACGGCCACATC -ACTGGGTAATAGCGGTAAATACACCTGCATTTCTCTATCGCGGGCTAGCACTTTTATTAT -TTCTATCCCTCAAACTGCCACTCTGCGTGTCCCAGCTCATTTAGGCGTTCAAGCCCAGCA -GCATGATTTGAGACACATTCCTCGGCCCCGATTCCCAAAAGATCCATCACCTCTGCTAAA -CGCAATGCACCAGCTGCAAACAAAGGAATTGCAACGGATGGCTGTGCATCTGCATGAATA -TAGAACGGCATGAGGGAACATAGATGAATTGCTTCATTGAACCGAATGCGCAAATCCACA -TCTTCGTCTGGAGTCGAGTCTAACAAGTTCCGGATAATAGGCTGTAGGCGAGCATCCAGA -TCTGCACATAGCTTCCTTGCCCATGATGATAAGATACGGTAATTGATTACCGCGCAATCC -CCAATTGTGGTGAATGTTACTTCGTGTCGACCTTCATGTAGCAGATCATAGCAGCTCCGG -GTACTTTGAGCCAACTTTCCCAAGTCATAGTATATGTCACAAACTGGATATCCACGAGGA -TCAATGAGCCAGAAATCATCGTGATTGATGTCGCATAACATATTTCCGGGATGAAAGTCC -CCATGGATGAAGGGTGAAACACCTCCTGGGTCCAGGCGCCGGACCACCTCGTCGTCTCGT -TCTAGCCTGTGGAGCATCGCTGGGACATTTGAGATCCGTCGCCCGTTGATTTCTAACCAT -GGGGTGGTAATGAGTGTACTGAAAACCGGTTCCATCTCGGTGGAAATTGTCAAGCGACGC -CAGGCACGATGAAAATGATAATCATGAATGTATCGTGGGGGAGTAGGCTGTTCATGCTTT -AGAAATGCTTGGTTGTATTCAAAGTCCAGAATCGTTGACAATATCCGGGTGGCAGCGTCC -ACGGAGAGATGATTGTTCAACAAGAGGTCTCGGAGGTTGGGAATATCTTTCAATTCCGTT -CCCATAGTAATAGTGCCTTGCTTCCTTGATGCGAAGAGAAGCTGCGGATAATAGTCCTGG -ATATCATCCGGCAAGGTTTCGTACATTCGAATCTCGTCTGATAGCTTCTGGGCTCCACGA -CCCTTGGCCACCTTGGCAAAGTATCGATGAACGCCAGAATCTGAAAAGGATCCACCGACA -GCAGATGCGCTGTTTGAGCAGAATTCTTTGTATGTAAATGAGACCAGACTTTTCTGCTTT -GCTAGATCATGAGCAACAATGGGAAGGTCCTGGCAGACTTCCAATTGCTCTCGAAGATCA -CGACTTGACAACTTGATGGCAAATATATCTTCCTTGAGTGGACCATCACAGACTGCATAT -GCTTTTGGCAAACGCACATTGCAGAACGTGAGTTGTCGAATTGCTGCACATGAGAATTCA -TGATCCCCATTGACCAACACCACTTCTTTGTCAAGTACGTTGTCAAGGTCACTAGTTGAT -TCACACTCAATCATATCTGTGACACCATATTTTTGAAAAAGGTCACGAAGTTTTCCCGAA -CGTTGATGAGAAGAACTCCAATATAGGACAGGAGACATCGTGGCTTGATTGAGAATAAGT -GCAGAATCAAGGCACTGGTAGCTAAAAGAGGATGAAAGAGTATTGGGCTTGACAGTAGCG -AAAAACAACATGTACAATCTCAATATCCAGCAAAAATACGGGATATCACCCAAGATCGAT -TCATCGAGATCTAGACTTGATATTGGCCCTAGACTCAGTAATGTATTCCATCCCTTTTCT -AATGATATCTCAACGTCATAGAGGAGCGAGAATTAGACTTGATATTGGTTTTAGACTTGA -AGGTGCATTGCCACTCCTTTCCATAAAATCTTATCATCATAGAAGAGGGAGACCTAGATC -GTATCCTCACCCCGAACCTCTATAAAAGCTCAACATCTGGCAAAATACATGATGTCACGC -TAGAATCGCTCTGTGATAGAAAAAGAAGACCTAGAAAAGGGATTGGGTCTTGGCCTGGAG -ACGTATTTCCATGCCTGAATGAAAACGGAGTTCTAGACTTATCACTTGTAGAGCTTGGAT -ATGTATTTCCACCTCTGGCTTACCTGGAAGGTTGTAGTTCAACCACTGTCCATTCTTGCT -ATTGTTCTTGGATCATGTAACCATACAAACTCAACAGTGTCTCCTTTGAAGGGTGTGTAA -GGAGTACACGGACAGGTAACCGTTGCCTGGCTCTGTCTCATGAAGAACTACTCCCACTTT -CATTGAAACTCCACATATGTTCTTCCTCTGGAGGCACCTCAAAATCATATCGTAGAATAT -CTTTTGTATTTGGAAAAATGTGTGTCCGTGTAATGCTTTCAGGAAGAAACAGGCTCAGTA -TGAAGTAGTGTCCTTAGGGGGTATGGGAAGAAGTAGCGTTACATACGCTTGGTGTTCATC -GCTGTCGCGATATTTCTCAAAGGCAGTCATATTCTCATGCAAACTGAGAAGTACAACTTC -GAAGCCCTTAGTTCTCTCAGGTGGAGTGTTGATCGAAGGACCACCCACGACTAGCCGGTG -GCCCTTTACACAATCTAGTGTTTTTAAAGACTTCATCGCTCGAATGGTATCACTCCTCGT -CCCCTCTGCCACGTCTGGGTGAAATTTAAGCATGACTTCATTTCAAGTTTCAGCTAGACT -CGTCGAATCAGAGGGAGTACCGCTAAATTACTGACCGATATGGATGACAGTCATTTTGAG -GGCATTCCAAGAAAGGAACCGTGTTGATCCGGACTGATCAGACTCAAGAGAGTTGCATAG -GGAGATACAGGGAAGACGTATGCATTGTCAGAAGCTGGCATATCAATTCTTCTTATGTAG -GACTCTTGATGATTTGGGATTTTGCCTCGGATCTGGGTTCAGTGCATGGTGATATCACCT -TTTGCCTGTCAATCAATCTGAAGAACTTGGCTCTGACGAGAGTCTTAAAGCATATCTCTT -AATCTGGTTGACCAATGATTTCTAGCTCATCCAGCCGCAGTCGAACGATTCACTCTATCT -ACATGTTTCTATTAGCATTGCGGCTGAACCGGGTTGGAGTCCCTGGAGCCAGATTTAACT -ATCGTACGTGTGCTTCTTCAAACTTCTGATGAATATCTGGGGGATTTGTACTTGGTTTCA -ATATCAGTTGATGTGGGATTATGTATATATATTATTTTTTGACGAACTTGGGTTGTGGAA -AATGTGGCTTTCTTTGTCCCTCGAGTCGTTTTTGGGCACATTGCATTCTAATATCGTCCT -CTCAGAACAAATTGGAGTCTTTTTCTAGTAATCTAACTACAGGGACTACAAAAACAGTAT -TCCTGTAAATAGAAGGTGAGCTATGGATAGGCTCTTGTCGATTTATTCGGTCTCGTAAAA -TGTCGGGGTCCAATCAGTGTTCAAGTCTCGGTATTACAGTATCCACTTTTCTAACTAGTA -ATTTTTATAGGCGGATTACTAATTCTCAATTACACAGGTCGGATTAACTCATCCTCCGAC -ATGCCGGCCTGGCTACAAACATGAAATGCCGGCACTTAAGACTGACAGAATGTGATTGGC -CACATCCGGATACTCCACTCGGAGTATTTAGATCCAACGCGGGGGGTTAGGGAAATTCAT -CTTCAACTCTTTTTGACTATGAACCCATCTTCACTCGAGGCTGAAGTTCAGAAGAGTGTA -TCGAAAGTGTGTAGTAGCTGTAAGAGGTACGGGGATCCCCTTTCAAAACCTACACCTGGT -ATTAATGCTATGCATGTTGCAGACTAAAAAGAAAATGTGAGATACAAAAGCTGCGGCATC -TGTTATAAGACTTACCTTACCAGTTTCCAGGTGATCGGAAACAGCCAAGTTGCTCAAGAT -GTCAACTGAGTGGACTCGAGTGCCAATATGGGCAAATCAAAAGGCGACCGGGGCCCGTAA -AGGGGTCGGTCCAGTCTCGGACCAGCTTTTTACTTGATAAAGTCCGTAAGTAATTCCAAG -GTTGTTTCTATGTGTTTGATGATTTCCCTAATCCTCAATGTTCAGGTCACTTGGAAAATA -CTCTGCAAAGCCTAGTGGAAGGAGGCAATGCCTCAGCTCTGCCATATTCTCCACCGGGTC -AAAGCTACGGCACGTTATTTGACGGAGAGGGATGGGACTCCTTGGGGACTGAACTACCAT -CTAATATCCAACCAGAGGTAGATCGGGACGCCCATGATTATTCTCAACCACTTGACAACC -TTCTTCCACCGACCATAGCACCAGCTCATCAGGAATCACCACGAAGCTCAATTATCTCCA -GCGAAAATCACATTCGCCATATCACGGAGCAAACTGCTGCTAGCATGAATGCAGAGGAAA -AAGATACTTTGTGCGAAACCCACATACACTTCACTATTATGACGAGGCTAATCGAAGAGC -AGGTTACGCCTATATTTTGAGCATATCCAACCCGCCTTCCCGATGTTCCGGAAGACTTTG -TTCTACGACGAGCTATCTGTTAACATGATTCCCCCCGCCCTGTTACTTTCGATGTTTGCG -GTTTCATCTCGATTTGCTGGCATCAGCTGTTATACTGAGTCAAGCTCTTCAGACCAGCCG -CAGGAATACTTCGATGCTGCGTATAAGGAATTCCGCCAGGAAATGGATCGGAATCGACCG -GTGGAATTAAACGACGTGAAAACCGCATGTCTCCTTGCATTATACGACTACACGTCAGCG -CCCAGCCGTCGGGCATGGCTACTCGTTAGAGATGCCATGAGTCTCGCGTTGGCTGCGCGG -CTACATGAGGTTGACAGCGTAGACTCTTTGATCGAATTCTCCGACCCTGAAAAGGAAGAG -CGGCGATTCGTGTGGTGGACAATTTGGAAGTTAGACTCCACAGTCAATGTCTCAACAGTG -ACCCCTTTCGGGATCGACTGTCGGATGATAGGGACTGCCTTAGTGTCGACGACTGTTCAA -AACTTCACAGCCGATACCTTGGGGCCAATAGTACCGGTGATACCAGAAATGGACCCAGTC -AAGTCCTGGACTTCAATCTTGGCCCCCAATCTACAGGATACGGGAGATGGGTTCAATACT -CATTTGTTTGCCGTATCATTGCTACGAGCAGTCTCCGAATGCCAACAGCGCCTCAATGCA -AAACTGTGCCCTGAAGAGATTGCCCGAGCAGATACCCTCAATCGCATACTATCCTCCCTC -CACCTAATTCTACCCGACAGTTTCTTCAGTCCAACTAAGCGCCTCATGGAACAAGAGCAT -GCACACAGGTTACGATTAGAAACTAATATCATGCTAAACACGTTCGTTTATTTCTCTTTC -TTTACTTGGTCGATTTGCTGACCCACCATACCATCAGCGCGCGCCTTATATTACATGAGC -CTACTAGGCAAATTGTGGTGGGAACCGATATTTCATCTGATTCAACTTTGACGTCTCTGA -AAAGCTGGAAAGACAGTATTGGTTTTGCACGAAAGATTGCTTCGCTGTTCGATCATTGGC -AATTCAGCTATTCCGCATATGCTGATCCGGTCATCAGCTGTGCTCTTTGGCATGCTTACT -GTGGACTGAGGCTTTACGGTATGTCCGGGCTGGACGAGTCCGGTTCAAGTTTCAGCTCCC -CGGCAGCTATCAGTCTTGAGAAGTTGAGACTATCTCTGGAAAGTTTTACCCCGTGGTGGC -CAATTGCACGAGTCCTCAGAGGTATGCACAAGTCTTGCGTAAAACGCTGTTCTCATCATG -AAATCAGAATTTTATCGAGTCCGAAGGCTCAGGCTAACTTCATTTACATATAGATTCACT -TCAAGCTCTGCCAGCACGAAGCTGGTCTTTTGTTGATATCAGCAAGCTAGTCTCCCTGAT -TGGGCTCTCTAGAAAGGCATTCAATCCTTACAGAAGCGATTCCGAAAAGGTTGATGTTAC -ATTTCTTTTGAGTTATCAATAATGTTCGAATAAATCATGTTGTACATAACGAGCTTTTGT -TCATTTCCGAATACACCCAAATGGAATATTCGCGAAAGACGTGGTTAATATTAGAGTAGT -AACATCTCAAAGTCCAACCCAATCATTTGTTTTCTAGTGATTGGTAAATGAGAGTCGAGG -CTGCAAGTTGCAGTGTTCTCAAAGCACAATAATGGGAGCGCAATATTCTGTTCGCCTCAC -TCGAAAGTAAGTGAAACAGTGGAAGTTCTGTTATGGTAGCAGGAAGGCTGGCAACCCAGC -TTTCCAATTCTTGTATGCAATAAAATTTGGCGAGCACACTGACTCCATCTGGCGAAGGAC -TCACGCCAGAAATGTCACTCAATGAATCAGACGAAAGTCCTTTCGGAAATTCTAGGGCAG -TGATAACCCGACTAATACAGATGCTGAGCTTCACATTCTCGATAAATAGTAGAGCGAGAC -GTTTTTGAATCTCCAGATCTGGACAGACTTCCGATATGCCTAACGCGCAGGTTTCTCTGG -GAGAATAGGTTGAAAATTCAAAATCAGCAAGTGTCAAGATTGGGGTCTCGAAAGCATCAT -CATCGATGATGCCAGCCCGTTTGGCACTCAACGACAACATTCTGTCACGGCAAAACAAGC -TCCACCAGGTTCGTGTATGGTCTCGTTGGCTTGCCACCGAGATTCGGTGATCCGGGAAGC -GATGCAGGCCCGCTGTATAAGCAAGGGACACGGCTATACCCATCCAGTGCCGTGGATCTT -TCTCTGTTCCCTTGACTTTCCAGAAGGACAATAGCACAAGCGACTTGATTACCGTCAATC -GATCTTTCTCGTAATCCAAGTCAAACAAACAGCGGGCGCGGGCAAAGAATTCACCACAAG -CAGTGGCTTTGTTTCTATAGCCTGCTGCAGATAGCTCCGTGGCGCTGATGGAGGCAGATC -CGGCAAATAATATTGCTTGAAGCAGTAAAATGCTAATATGTTGGCTCGGATCTTCTGTGG -ATATAGCGTGTAGTAATTTCTTGATATCGACGAATGGCACACGAGGATGAATATTATTAA -TGTAGGCTCGAACCAGCGTGAGTTGAAAAGGGCGTGGTGGCATTATGAAAGCGCCCTTTT -GTGTCAAATAGGCAAGTTCTTCTGCTCCGAGTTTGTTGTTTCCCCGTATAGTGAGCACTG -TGTTCTTCTTCGCGCTGTGGATGTTTGTTGGATTGAGAAATCTTTCGGGATTGGATGGAG -ATTGCATAGTAGACAACCGCTGGACACCTGAGTTGAATGCGTCAATAATCTCACAAGGGC -ACGGCCTAAGCAGAGGGTCTATCGTGGTGGAATGGCAGTCTCACAATCTATTATTGGCGT -CCCCCAGTCGATCTCTAAATCAACCCTTGGAGATACATAATCTTCACTAGAGGTGGAGGT -AGATTCGGAAGAGTATAGACTCCAATGATTGCCTGGTTCATAATCACTGGTATTTGGGGT -TGTTTCCCATGTCTTTCCCCTAAAGAGCCGATCTTAGCTTCCCAAGAGGTAATACCAGTC -TAAGTCGAGACGTACCTCCCTATAGACTTGAACATTCTCACACATCGAAGTCCTCGAATT -GAACAAGCAGTGCATGCCACATCTTCTTGCACTCGATGGCAACGTATTTTTTTGGCTCTA -CAGAGACAACATGCATTCTCGGCTCTTTTGCGCTTCTTGATTTGGGTCATTTTTTGAGAG -TTCACAGTGCGAGGTACCCGAGAGCGGAAAACCTCCGGCTTTGAAATCGAATGTGGTCTG -TGTTTATAATAGAAAGTAAGATGGAAAGGGTAGAATCGTTGACAAATTGCGATTCTCGTC -GGCTGAACAAGATGCTCCGCATCGGCGAGCGGCACGGTGAAAACAACCCGATCTCATTCC -CCCATATTAAAATATAAATGCGCCAAAGGTGTATCTTGATTCAATTATCATTCTAAAATT -CCAAGCCCAAAAATGCCTCCTGGTCGCATCCCTCCCGTCTCGAACAACCCCGAGAAAATC -CAACTTGAACGACTCAGTCATGTGTTGTTTTCGCATCCTGATCTAGAGAAGTTCAACACA -TTCGCAGTCGACTTCGGCTTTGAGATCGTTGAGAAAACCGAGTCGGCCATTTGCTATCGT -GGTTGGGGTCAGGATACCATTTCCTACGTCGCACTCCCTGGCAATGCCAACGATGAAGGA -TTCAAAGGTGCTGCTTTCATCGCCAAAACTGAAGCCGACTTCACAAAAAGTGCCGCTCTC -CCGGGAGCATCTCCTATTCAAGACCACAAAAGGCCTGGTGGTGGCAAGATTGTTACTATT -CCAGCGCCTTCTGGCGCTGCGATGCACGTTATTTGGGGCCAGCAGGATCGACCCAAGCCC -GAAGCTCCAGTCAGCCATACCGAGGTTCATAAGGGAGGGTACAATACAACTTTAAAGAAA -GAGAGAAAAGGTTCGTTTCTCACTTTACTACAAAAAGACGTGTAAACATCAACTAACACG -TCTTTGCTTGACAGGCGAATTCCAGCGATTCAAACTCGGGCCTGCCATGATACATAAGCT -TGGGCACTATGGCTATATGACCAAGATGTTTGACGAGGACATCGACTTTTACACCAGCAA -TTTCAATTTCGTTCCATCTGATATTTTGTATGAAGAGATCGATGGGCAAGAGATCGATAC -TTTAACGTTCATGCATCTTGATCACGGCTCCATTTACACCGATCACCACACACTCTTCCT -CAACCGGGTCCCTCCGACTTACCCAGTCAACTACCGTGTCCATCATTGCTCTTTTGAGGT -GGAGGACTTTGACACCCAGCTCCTTGGACATGAATACCTACTCAGCAAAGGCCACAAGCT -GGTATGGGGTGTTGGACGGCACATTCTCGGATCCCAGATCTTCGACTACTGGCAAGATCC -TAGTGGGTATAGCATCGAGCACTACGCTGATGGTGACGTGGTCAATGAAAATAATCCTAC -GGGGAGATTCCAGAGTGAAGGCGCAGCTTCAATGTACATTTGGGGTCCGATCAGACCCTC -CACGGGTGTTGCACAGTGAGAAGTGAATGAATGGTTTTCTTCAACGGGAAATGAGTTGAT -GGTGCCTATTTGGCTATGAATTATTTTGATATTTTGTTTTGAAGTGACAGAATCTGGATT -TCCCTTTAAATACCAATCAGGGACCATTGAAATGTTGCCTTGCCCATATTGGTTCACCAA -AACTTCTTACTATATATCGTCTTAGGTGTAAAGAAGCCATGCGACCACAACAAACTGGCA -AATTCTATTGACGTATACCAGAAAAATGGATGATGTTTCAGATGTTTGCATTTGAAAAGA -AGGAAATAAAATACCTTGGGAATCCAGGGTGAAATTTGATCATCACAAACAAGATACTGT -AGGTATTCGGTGGCCCGCATATTCAGGCTAGTTGCGTTGGGATGTTGCACGCACAACCTG -AATTTCCAAGCTCCCCCGGTCATGCGAAATAAAGGACTGTGAGAAAGCCCCACAAACAAG -TGGGGTCCCCCATCGACTCACCGAAATATGGGGTAGAGTCTTTGCGCTCGGCGGCATAAC -TAGTTCCCCCGCTGGCCCCCATCGTCGCCCAATATCTCGGTCTGGTTTAACTTGATTGGT -GATGTTCTAGTGGCTCTGTTCTATGTGGCAAGTGGTTTTCGTTTTTTTTCTCCTATTTTT -TTATAGAAAGGATATTGTCCAAAATGATCACGGCATGACATATTTGGGATATCGAGGCGA -GTGAACGGGGGTCTACTTCCAAACGTCTGTTCTCAATACGCCGAAGAAAAAAATGCAATA -GAAATAAAGTCAAGCAGATATATACAAAACATTCCCCCTTGAACAAACGAAAGAACCTAT -AAAGGTTCGAAATGCGAAAAGATGCAGATTTTTAGACCCCGAAATAGTCTTTAACATGTT -GCAGGTTGGCCTTGAGTCCGCTCAGATCGGATGCAACTGAGTGAATGAAGAAGTCAGGAC -GCAAGAGGATGTACTTTGCCGAAGATGGTACCCGCTTCTGAAGGGCATCTTTGTTGTAGG -CAAAAACCGGGGTAATATCGGCTTGTTCCAAAACGTCCGTTGAGCAAGGTTTCCATGCTG -TCACTCCATTTTTGCTCGCCAGATCCCCTCCCGTCAAGTTGACGTAGGTAATGTTTTCTG -AAGCCAATAAATTACATGGCATCTTCAACTCTTCAACCGCGGCATCCACCGAGGCAGGGT -CAAAGTCCGATACTTTCTTGGCCAACACCAGAACGGCAATGTGAGATCTGTCTTTCAGTA -GGATCTCATCAGACAATTGGGGCAGATCCTTCCCTTGCTGCATCCACACTTGTGCAATCT -TCTGTCCACCACCCGCAGCCTGCAAAAAGAAACCATCAGGGCACGTGGTAGAGTTGTAGA -TCAACTTATCCTTGAATGCTCGACGGGTGCGGATTTTGGCGCCACCTGGTAGCCACCAAA -GCAGACGCATACAGAGTCTGTAGATGCTTTCGCGCCACCCAAAGCCCATATTCGCGATAG -AGCCATTAAGTTTGGTAGATAGTGTAGCGGCTTCCCAGGCATGGCGACGTTCCTGACTCC -AGCCTTGCAAAACTCGTTCGCGAGACGCGCCCGTTACACCGAGACGGATCATCATAGATA -GTCGCCAGCCGAGGCCTTGAGCATCTCGGATGCCAGTTGCAATACCCTGGCCACCGAAGG -GAGGGAAGACGTGCGCAGCGTCGCCGATCAGCATGGTCTGGCGGCAGTACCATCGGTTTA -CAACTTTTGTCGCGAATCTAGATATCGAGTAATTCAATTAGTAAATTGAGCTCCCCGTTG -TCAACCTAAGAGCTACCTGGGATGATGTGTAGCCTTACCTGAATGGACGGCAGCGGATGA -CTTTTATGCAGTTCCTTGGGTATGCGACTGTGATTCCTTTCAATCTTGACGAGAATTGAG -AACCGGGGAGGTCCATCCAAGGTCCGAACTGGTTCCAAAAGTCTTGCTCGCAATCTTCTG -GCTCTTTCTCGTTTCCGATGGAGTATTCGTGTCTAGTAACAGATCAGATATGTCACAATC -ATCTGCACTCACTTGACAAAAAGATGCTTACCTCCAAAACCGGGCACCTTTCGGCCCAAA -TCTTCCACTAACTGCTGGTCGATTTGTGTCATTACAGAAGCTTTTGTTGATTAGCACATA -CAGGAGCAGACTGGCGGTCAACGAATGAACTCACTGGAATCCCTTGGGCCAGAATGTGCT -GTGCACTTGCTCTGGTGTATATCCTAATTCCCACAGAGGGAAGTCAGGGTGGGTCTTGGG -AGTGGGTAGGTCCACCTCGAGGTTGACTGCGATCCAGGTCCCAACATATGGCCAGCTTTG -TCTCGGGGTTAGAACTAGGTATTAAACCTGCGATAAAATATCATTTGACTTACACGGCAT -CCACTTGTTTGACTCCTTCTGGCTCAAGGAATCGTTTCCTCACAATCCCACGCTTTCCAT -CAGCCCCCACGAGCCAAGATGTTCGAATAGACTGAAGTGTGCCGTCCGCAGTGCGATATT -CAATGATTGTATGATCTTCGTTCTCTACGCGGCTGAGAACCTCACAGCCCACACGAAGAT -CGCAAGCTGGAATAGTCTTGAGGAGTGCTCGAATTTCGCGTTCTGCAAATGTCAGCGAAT -GATGATACTGAAGCATAAGACTCTGCTGTCAATGAAACACATGAAACTGACCGTAGTTTG -GTTGAAACTGGGTAATGTTTTGTGAGAGGGATTGTTCCGCCCAGTCAGGCCGCAAGTCGA -AAGCCATGAACGGACGTTCACGGAAGTTTCCAGCGTGGAAATTCAGCGCACCGATATCTG -AGAGAGTGATTGGTGTCAGAGATTGACTTATATTCGCAGGTCATATTCAAACTCACCACT -TCCAATTCTTTTGGTCAATCCTTCCCCAATTCCGACCTGGTATGAAATTCGAACCGCATC -GCCATTCACGACAATTCCGCGAGGGTCCTCGCAGACTTCTTGATCTTTTTCAAGAACAAC -AACCTGAGATTAGTAGTCAGCCTCTGTAACTACACCGGACATCGATAAACTGACCTTGAT -TCCCATACGACCCAACAGAGCTCCGAGGACGAGGCCGCTGGGCCCTGCTCCGACAATGAG -AACGTCGGTTGTCTCCATTATTGCACTCAAGCAACTGAGTAAAGAGGTAAAGTGAGAAAA -ATTGCCGGAAGAGAAAGACGAATACGTGAGACTACAGTGGCATGATTAATCCAAGGTCAT -ATAAGCAAAGAAAGTGAAGCTATGTCGGCATTAAAGCGGAGATAAACAGTATAAACAGAG -ACCTCGGGTGATGAAGACTGCCTCTTGCCGAAGCACTCTTATTACAAAATAGATATCGCC -CATTGCCGCAGAGAACTGTACGAGCTCAGTTGTCGCGCAGCCGCTCTATGTTTTCCGCAT -AATGCGGCGCGATATCTAGTTCGGCCGACGGTCCCACATGGAGGCCACATCCAGGATTGG -GCAATCAATGCAAGATTCCAATGCAAAATAATAACTGGACAGCTCGTGTTTCCAGACCCA -GCTCATGCGCTAGTCACTCAGTCAATTGCGGGGGGAAATGGTAGTTACTATTGATATATG -GCAAACTTAACGAAGGCAGTCCATGGCGAATTTGTGATTATTCGCTTCGTCAATAATGAA -TCTCAAACATATTTCATCTTTTTTCCAATGGTCTCTAGTAGACTTACACTTGAAAAGTCG -AGAATTTTGTGATTCTTTCCCTGGATTGTCGGGCGAGGTAATTCAGGTATAGCGTAACCC -TCCCAGCCTCAACGAAATCAACCTACCTTTCTCTGGATTTTGTCATATTGGACTCCATGG -ATTTAAAGTCAGACCTACACCCGAATCAGGCCGAGTGGTATGAGTGCTCACATATAATTA -CAGAATTTTCCAAATAAAGCAAAGGCTTCCCTGTCAATCTTAATGATTCACAGCGCCGTA -TTTTATCATATGAACAAGTCATACTCAAGTTACGATGTAGAGTCAGAAAGGTTATAAAGT -AACGAAATTTGCTTCTCTAGTAGTATAATCGCATTGAGCACATGTTGAGCTCTCCGACTC -AAACAAAGTCAACCCCGTTCTTCAAGGTACCAATCTGCGACACGCTGATGTTCATCTGGG -TACCGGGTCTAATAAATTGAGGAGGCTTGAGGCCATTTCCAACTCCTAGGTAGTGTCAAT -GTGAGGCCTTCATAACCGTAAATGATGAGTATTGACATACCTCCAGGAGTTCCTGTCATG -ATAATACTTCCTTTCTGCAAGGTGGTTCCCGTCGACAGATAGGAGACAAGAGCGGCACAG -TCGAATACCAAGTCAGAAACGAGAGTATCTTGGCGAAGCTCCCCGTCAATCTTAGTCTGT -AGGCGCAGCTCACTCGGATCCTTGATAAGATCCCTCCGTACAAGGCAGGGGCCCAAAGGT -GCGAATGTATCGAACCCTTTGGAGAATCCCCACTGCGGAACTGGGCCAGCAATAGTGCGG -TCGATCTGTAGTCTACGGGACGAGACATCATTTCCCACGGTATAGGCAGCTACGTAACTG -AGTGCATCCTCCTTGCTGATATCCTTAGCATCTTTACCCATCACAAGGCACTAAAGGTTG -TGTATATGTAATCAGCAATTGTCATTGGTTTTAGAGGGAGCTTTGGATAAAACTTACCAA -CTCTCCTTCATAATCTGCCTGCTCATCTTGAGCGATCTTAGGGATCTTCACATTCACGTT -GTGGTCAGTGACCGTCGTATTCGGCTTGAAGAAAATAGACGGTGTGGGTGGAGTCGAACG -TCCAGTCTCGCGGACTATTTGAGTGGTGAGTCTTGGCTTCTGTGTCACAGGACTATCGCA -TACTTACTGTGTGTGAGGTAGTTAAGTCCAACGCAACGCAGGATGGGGATATCTTGCGGA -GTCAAAGGGCCGAGTAGCCTCGACACAGTCACAACTTCATCAGTGACACGGGTTTCGCCG -GTAGTGTTATAGATATCTGAGCCCGTGATCACTCTGGCTTGCAACTTGGTTTCTTCTGTA -GTATCTCCAAGGTCGAAGTCCGCCGATGGCATCACCGGTTCACCAAGCAATGTGCGTCCA -TCTGTGGCGATAAATCGGATCAGACGTTCCCAGGCAACAGACATTATTGATTATAAGAAG -TTGACGGAATTGAAGTGAATTGCAGATGTAGCTGGGAACGAGTACCAGGCTAGATTCCAA -TTTAAGTGAAAGCCACCAGCCCTAAGCGGGATTCTCTTAATTTCGCCGCCATTGATCATC -TCGGCCGCGAGAGCTGGTTGTGTGTAGTTCTCCTAATTCCACGGCGATGCGGTAAATCTA -GTTATCCCGCCAGGCCCACCCCACCTGGATTTCACCAACTCCCCCAAGCTCTCACCCCAC -AAAAGTGAAAGTCTAATTCCGGGGTTTAAAGATGCCAGGCATGGTTCTCTATATTGAGCG -TATATCTTTTTTGAAGTCCACGTTGACTATAATCTTTTCACGAGATGGCCGTCCAGCCAG -CAGTTTCAAATCATTATCCTCAGTCGAAAGATGAAGAAGCTGCCTTCAGTACAATGAACG -ACCATGTTGAACATGCATCTCCTCCTATGGAGCATTCGAATGAAAAGAATGCACCGACCG -TCATAGGTGATGATCCTAACCACCCGGTCAACTGGCCAAAGTACAAGGTAAAGTTGCTCA -TCAGAGCAAACTTGGACTCCTAACTAATTGCTCGTCCAGAGAAATATCAACCTGGGTCTG -ATCTCGTTCCATGCCCTTGTGACCAATTTCGTTGGTGCTGGAATCATTCCCGTCTATGCT -ACCCTCGCCGAGAAGTTTGGTATCGAGATTCAGGACGTTAGCTACTTAACCTCGATTCAT -GTTAGTCACATAATATTTCCATCAACACCGAGTCGCGAGAAGCTAACCAACACAAAGATT -CTGTGTACCGGTCTTACGCCCTTCTTTCTAGTCCCAATTTCCACCAGATATGGTCGCCGG -CCTGTGTGGCTCATCTCCACGCTGTTCACGGCCATCTGTAATGTTGGGTGCGCAGAGAGC -CAAAGTTACGCACCCATGTTGGTTTGCCGTATTTTAGGTTCTCTCATGCTGGGTGCTCCC -ATCGCTCTTGGGCCTCCTGTTGTGATGGAATCCTTTGCAGAGCACGAACGGGGTATCGCG -CTAGGAACCTGGTCGTGAGTCTTCCCTAATTTCTATTCCTTGACCGTCTATTTGGAATAT -TCATGCAGTTTATGAAGTACTGACGCATTATTCCCAGTGTTTTTGTCACGCTTGGCCCTT -TGACCGGTCCGTTTGTCATGGGATTCGTTGCGGAGCGACTTGGATGGCAATGGATCTATT -GGATCTTCACCCTGATAAGTGGCGCGCAGTTCGTCTTATACTTCTTATTCAGCCCCGAGA -CGAGATATATTCGGCAGGAGAAGACAACCAGCAAGTCTGGCTTCATGAAGTTGATCACAT -TCTCGCGAATCGATCCCACGCCTTTCTCCATATCCGAAGTTATTCGGCCCTTCTCAATGG -GGAAGCAAATGACCGTGCTGCTGCCCGTGCTTTCACACAGTATGATCTTCTGTCTCTCAG -CTGCAATGCTGACAGTGGAGATCCCTCAATTGTTTGCCACTCGCTTTGAGCTTGGCCCAG -AGCAAACCGGTATCCAGTTCTTGGGTATCATCGTTGGCACCATTCTTGGTGAATTGTTCA -ACGCTGTGGTCCTCAGCTTGATGAACAGAAAAGCAGTAGAAAAGCAAAGTCATGCTGTGA -ATCCCAGGAATTACCTCGTTGCAAGTTACTTGGGTTTTGTCTCCATGATTGTTGGGTTGG -TTGTCTTCTGTGTCCTATTAGGAAACACACTGCCCATGCACTACAAGGTATCTCCAATCA -TTGGTATTGGTGTTGCAGGATTTGGGAACCAATTGGTGTCCAACTTTTTGATTAACTGTA -AGTTTGTATGAGGTTATCTTGGTTGCTGAAACGGGAGCTAACAATTCATCTCCTTCAGAT -ATCATGCACGTTCATTCGAATGAAGCCGCCACTGCTTCAATTTTCTTGGGCTTCATTCGC -CAGACATGGTGTTTCATTGGACCTTTCTGGTTCCCTACCATGTTTGAAACCGTGGGGATT -ACAAACTGTGCTGGGCTTCTGTCCGCGCTGGTGGTCATAAGTGTCCTGCCTGTTATCTGG -CTGCACTGGAGATCACGAAGCTAGAGAATAATTTGTACAAGAAGCTAGAGAGTGGAAGAT -GAATAGACAAATGGATATGCTATTCTGGGCTTCAAGTATTTGGCATCAAAGTTGCAATTT -CTGTCAAATCTATGCTGTTTTCAAGTTGGAGATCAGCTCACGAGCCAATTGAAGGCATGT -TTTGGATGATAGACAACATAATTTACCAGTATACCATAACGAAAACCATGGTTCCTGTTT -AGAGCCTAAGGGGAGCCATATTTGTTGTACTCACATACTGTTGTACCTAGTTGACAGGAT -GACGAATGTGATGTTCGAGATGAGTAGGCGGTGGAAAAGGCGGTCGGCTCAATGAGCCGT -GCTCGGCCTACGTAAATAACCGGTTCGTCCAGGGTTTCTCCGTTACCTACCCCTCGCTCA -TCGCCCTGACGCCGTAATGTCTCGGCACCGTATTAAGGATGTCGATTATGACGATGATGA -TCTCTATGATGAGGATGATGTGCTAGAAGATCCTGAGGAACAGGAGTTTTTGCAACAGTG -TACTACCGCTGTTTTGCAACAGCTCGGTGCGGGGCAACCGTCAGTGACCGCCACGAAACA -AGAAGTTGAAGACGCGCTATGGCACTACTACAATGATATCGAGAAGTCAGTCAACTATTT -GAGGGGTATGTTCAATTTACATAGATACGCCACACCGCATGGCATTACCCATCGTGGAAT -TCCGCGTATGTTGTGAAATCCAGCTAACCGAATTCTACGTGAAGGTAAAAGAGAGAAGGA -AGCCACGAAGCCCCAGAAATCGAAGATTCAACCAGGTAAGCATTGTTTATGGGAGAATTG -TTGTCCTAGAGTGTCGCTGGGCTCGGGGTCTGGGGCACAAGATATTCTGCTCGCCCAGAT -CATGGGTGCTGCGGTTGCTTGCAAATGAAAATGTGTGTACACTTGGTACAAATCCATCCG -ATGTAATTTCTAGAGGCTCAAAACTGATGCTATTGAAGATTGAACAATGTTCTAACAGGT -TTTCCAGTGCCAGCATATCCCGCTCCTCCTTCTACGGCACACTTCTCGGCTGCCGATTTC -TTTCGCGATTGTCCATGGCTCAACGTCCCGTCTCATCGGAAAGCGGATATCCTGATCGAG -CCACTCTATCCACGTTTGGGACTGTTGGGCGGTGCACCGGAGAGTGGTGGAAAACTTTCC -AAATTGGCCGCCCTCGCTGCAGCCCGGAAGAAGAAAGAGGGCGAGAAGGTACCGTCCCTA -CCGGAGGCTTCGACTCCATCAACTCCCAATCTCGAGCAACCAAAGACATCCTCGCTTGAT -CATACGGGCACTTCGCGCTCCCTTCGTGACAGACTTGCGGCTAGCGGGAGGTCTACGCCA -AAAGCTTCAGAGGGTACTGGGTCTTTGCGCCGCTTGGCCAACCCAAGCCCCTCCTCGCAA -CCGGTACTCAAGCAGCCCACATCTGAGACCAGAAAGCCGTCAGTGTCTGAGGTTTCTGAA -TCGATGCAAAGGGTGGTGCTTGAAGCGAAAGAAGGACCAGAGCCCGAGCAAACAGTGCCT -ACCATTCGAGCCTCGCCCTCGACATTTGCCAGCACTATCGTTGGCTCGGCAACAGGCCCG -ACAGTGGCTGAGCCCAGCCTTTTACACTCCAACAGCTCAGACCTGCTGAGGATCTATGGA -CAGGATCATGCTGAACCTTTTGACTTTGCAGCACCGAGCCCCGACGATGTCGTTCTCAAC -GCACAAAACACAGCGAAAGGTTTGGCGATCCGCAGGAAGGTTTAAAGGCAATTGCCTAAT -AATATGTAGGTATGAAGTCGAAATCAGCCGCATCGAAATCGGCAACGGTGAAGAAAGGCC -AGTCTGATCTGGCAGGTGGTATGAAGGATTTGTCAGTGGAAGACAAAGTGAGCGTGAAGA -GCAAGAATCTTGATGTTTTGTCGGAGTACAAGAAGTCCACCCGCAAGCGATCAGCAAACT -TTGTGGTTATTGGTAAGTCGATCTCCACCCGCGAGAAGATGCGGTGCGGACCTTGACTGA -CACATAAAATATAGGTCACGTTGATGCTGGAAAGAGCACTCTCATGGGACGGTTGTTAGC -AGATCAAGGTGCCATCGACCAAAGGACTCTGGATAGGTATCGGCGAGAGGCCGAGAAGAT -CGGCAAGGGCTCATTCGCGCTAGCCTGGGTCCTCGACCAAGGATCGGAGGAAAGAGCACG -AGGCGTGACGATAGATATCGCCACCAATAAATTCGAGACCGACAAGACAGCTTTTACGAT -TGTTGATGCACCAGGACACCGTGACTTTGTCCCGAACATGATTGCTGGTGCCAGTCAGGC -AGACTTTGCTGTTCTGGTCATTGATTCCAGTGTAGGTAATTTTGAGTCAGGGTTGAAGGG -CCAGACGAAGGAGCACGCTCTGCTGGTGCGCAGCATGGGTGTTCAAAAAATTGTGGTGGC -TGTGAACAAGATGGATACAGTACAATGGGATCATGAACGATTCGAGGAGATTGAACAGCA -GATATCTGCATTCTTGACGACTGCTGGCTTCCAGGACACCAATATTTCGTTTGTGCCGTG -CTCAGGAGTTCTGGGTGACAACATTTCTCGGCGGACTGACGACCCTCGTGCATCCTGGTA -CACTGGTCGTACATTGATCGAAGAACTCGAAACTTCGGAGCCATATACACATGCCCTTGA -AAAGCCATTGCGTATGACGATTGGCGATGTCTTCCGAGGTGGTGTGCAGAATCCGCTTTC -TATCTCTGGCCGGATCGACGCAGGTAGCTTGCAGATAGGCGACCAAATCCTGACCATGCC -CAGTGGTGAAACAGCAACGGTGCGCAGTCTGGAGGTGGACAATGAACCATGTGATTGGGC -GGTAGCAGGTCAAAACGTCGTTTTGCACATCGCCAATATTGACCCTATTCACCTCCGTTC -CGGTGATGTTGTCTGCCGTGCTTCATCTCCCATTCCTACCATCACTTCTTTCACCGCCAA -GATTCTGGCCTTTGAACACCTGATGCCCATGCAGGTGGATGTGCACCGTGGACGTCTACA -TGTGCCGGGTCGCATCAGCAAGCTGGTTGCCTCTCTGGACAAAGCGTCAGGGGTGGCTAT -CAAGAAACGCCCTAAAGTCGTGGGTCCAGGTGTGGTAGTGCGTGTTGTGGTGGAAATGGA -TCAAGCTGTTCCTCTGGAGGCACCGACCAGGATTGTCCTGCGCGCGGGAGGCTCAACAGT -GGCCGCTGGTTTATTAGAATAAGTTTTCTCAATTAAAATTTATCTCATTGGTAATTTGCC -ATAATTCTTCGTTGATAGCAAGTTATGTATCTTACTTTCTCGGCCGCCCCGTTATAAACC -GTATATATATATATCTCTCTCGCTCATACATAGTACTCCATTCAATGAGCTGAGCAGTTG -GTGCTCGCTAAAAATGATTTCACATATTATTTGAGATAGAATTCTAATATTAATTATTGA -ATAAGTTTTCGAATTGTGTGAGAGTACGGAGTACTCCGTAGTGCCATTAGAGTTAAACTC -ATAAGCTGTTTACTAAATGGCACAGGGCACGGTGCCCTGTGCCACGCCCTACCCGGAGAT -CACCCAAATCAACTCCGTACTTTGTAAGTCCTCAGTACACCCATGCTGGATCGAGATGCC -ACTTGTCATGATCCGTCAACTTAGGCTGTTTCAAGATATGAGCGGGATGTAGCAACACGT -GGTCAGTAACTAAGACATGAGTCAACCGGGTAAACTTTGGACGGAGTAGATCGCGAGAGC -CTGAAGGGGGCGAACACTTGGCCCCTTCTAGCAAGTTCGATCATTTGGCAGTCGTTGATT -GGTGGAAGCCCTGAGTAGTCCGTGGATATACCCCTGCCACTAGGCTATTTTGGGGAGCAA -CTTCCTGGTGAAGAAGGCGGTGTGGCTGCAGGGTGATTTCTGAGTCAGCCATTGCCGGAC -AGATGCCAGCCAATCAGCGCACGAGAAGTGACCTGAACTGGGAAAAGGCCAGCATGGCCG -CTGGGCCGCATCCGTGTACAAGAAGTCCTCTTTCTTCGATTCTTCTCCAATCCATTCTCC -CATTTCCTCACTTCGCCCCGGGTCCCTTTCCCGCACATCTTCGACTAACATAAGGGTCCA -TAGAGTCGAACACGCCTTGTGTTTACTCGTCCGCGCAGCTGTTTAGATCGGCACGCGCGA -AGCTTTCATAACGACAGCTTCTTTGTTTTTGTGTTGAAAAAGGACTTCGATCCACTTATA -CATACACCCTTCACAATGGCTCGCCTGTCCGCACGCGATGGCGCGTCCAAGCCCTTCGCC -TGGACTACCATCTTCTACCTCCTTTTCGTCCTCGTCGCGCCTCTGGCCTTCTTTGGAAAC -ACTGCCAATGCCCAGGAGGACAACTCTCCAGAGAGCTACGGCAATGTTATTGGTATTGAT -TTGGGAACCACTTACTCGTATGTTTTCTTAGTTTGAGAACCATTCATGTTGTATGAGATA -GGTTGCTGACATGAAACACAGTTGTGTTGGTGTGATGCAGAACGGCAAGGTCGAGATTCT -TGTCAACGACCAAGGAAACCGTATCACCCCGTCCTACGTTGCCTTCACCGATGAGGAGCG -CCTTGTTGGTGACGCTGCCAAGAACCAATACGCCGCTAACCCCGAGCGCACCGTTTTCGA -TATTAAGTAAGTTTCTACTACACCTATCACCGTGAGAATCCAGCACTAACGCATCGCAGG -CGTATGATCGGCCGCAAGTTTGATGACAAGGACATCCAGAAGGATATCAAGAACTACCCC -TTCAAGGTCGTCAAGAAGGATGGCAAGCCCCAAGTCAAGGTCGACGTTAACCAGTCCGCC -AAGACCTTCACCCCCGAGGAGATTTCCGCCATGATCCTCGGTAAGATGAAGGAGATTGCC -GAGAGCTTCTTGGGCAAGACTGTCACCCACGCCGTTGTTACCGTTCCCGCCTACTTCAAT -GACGCCCAGCGCCAGGCCACCAAGGATGCCGGTACCATCGCCGGTCTTAACGTCCTCCGC -GTTGTCAACGAGCCTACCGCCGCCGCTATCGCCTACGGTCTTGATAAGACTGGTGATGAG -CGCCAGGTCATTGTTTACGATCTCGGTGGTGGTACCTTCGATGTGTCTCTTCTGTCTATC -GACAACGGTGTCTTCGAGGTTCTGGCTACCGCTGGTGACACTCACCTTGGTGGTGAGGAT -TTCGACCACCGTGTCATGGAGTACTTCGTCAAGCAGTACAACAAGAAGAACAACGCCGAT -GTGAAGAAGGATCTCAAGGCTATGGGTAAACTTAAGCGCGAGGTTGAGAAGGCCAAGCGT -ACTCTTTCTTCCCAGATGTCCACCCGTATTGAAATCGAGGCTTTCCACAATGGCGAGGAT -TTCTCTGAGACCTTGACCCGCGCTAAGTTCGAGGAGATCAACATGGATCTGTTCAAGAAG -ACCCTCAAGCCTGTTGAGCAGGTTCTGAAGGACGCTAAGGTGAAGAAGTCCGAGGTTGAT -GATATCGTTCTTGTTGGTGGTTCTACCCGTATCCCTAAGGTCCAGGCTCTTCTCGAGGAG -TTCTTCGCTGGAAAGAAGGCTAGCAAGGGTATCAACCCCGATGAGGCCGTTGCCTTCGGT -GCCGCCGTTCAGGGTGGTATTCTTTCCGGTGATGACTCCATGGTCGATATGGTTCTTATG -GATGTCAACCCTCTTACTCTCGGTATCGAGACCACTGGCGGTGTGATGACCAAGCTCATT -CCCCGCAACACTGTTATCCCTACCCGCAAGTCGCAGATTTTCTCGACTGCCGCTGATAAC -CAGCCTACCGTCCTGATCCAGGTGTTCGAGGGTGAGCGCTCGATGACTAAGGATAACAAC -ATGCTCGGCAAGTTCGAGTTGACCAACATTCCCCCCGCGCCTCGTGGCGTTCCCCAGATC -GAGGTCGCCTTCGACCTTGATGCCAATGGAATTCTCAAGGTCAGCGCCTCCGATAAGGGC -ACTGGCAAGGCTGAGTCCATCACCATCACCAACGACAAGGGTCGTCTATCCCAGGAGGAG -ATCGACCGCATGGTCCAGGAGGCCGAGCAGTACGCCGAGGAGGATAAGGCCATTAAGGGC -AAGATTGAGGCCCGTAACGGCCTTGAGAACTACGCCTTCAGCCTGAAGAACCAGGTCAAC -GATGAGAATGGTCTGGGTGGCCAGATCGACGAGGATGACAAGCAGTCTATCCTCGATGCC -GTGAAGGAGGTCAACGACTGGCTCGAGGACAACGCCGCCACCGCTACCATGGAGGACTTC -GAGGAGCAGAAGGAGCAGCTTTCCGGTGTTGCTTACCCCATCACCAGCAAGCTTTACGGT -TCTGCTTCTCCTCCTGAGGACGATGACGAGCCCCTTGACCACGACGAACTGTAAAATTAT -CTTTGAGGCATTATTCATGGAGTTCCACTTTTTTTTCTCTTGCTTGAGTCACTTTTGTGT -TTAAATCTACACAGGGCACTGTTTGTGTCCCATCCAACCTTTAGGCCCGGTTGAACGCCG -GCTAAGAATACAATACAATGATACCTTGTATTTTGTATTCAAGAAGTGTTCTGTAGTACC -GTAGTATCTGGCAAGCCTTAATAGATCTTTCATCAAGCTAAATACTGTTCACAAGTATAC -CTTGGTAACCTTGGTGAAATTCCTATCTCCAAAGTACATTATGCCGCACGTATGTACGGA -AGGTTCCAATGCCCTGCTTTTTCACACTTTCTTATAGTTCGCACTAAATACGACATCTAT -CTTACTTGTTGGGCCACAAATGCTTGGATAGTCTCCGCAACAACCTGATGATCATCCTCC -TGCTTCAACCCACTAACAACCACCACACCAATAATCCCATCAACACCATGCACGCGAATC -GGGTACCCACCGCCATGGATAGCATACTCATCCGCAACAGCACCCCCGGTGGCACTCGGA -ACAGCATACTTTCTGACGAAAGCAGCCTCGATCTGATCCCCTGAGGCACCAGCTGCAAGC -CCACTAATAGTCTTCTGTCTCATAGCCCAGCTCGAGACACCCCAGCGGAGCACGGTATTG -CGCTTCCGTCGCACCCAGTTCTCGTTGTCCGGGATCGTGCCGCTCTCCGTCACAGACTGA -AACACTACGTGGAGTGGGACTCCGCCGGCGAGTGCAATGGAGATCAGGGCGGGTTTGCGC -TGCCCGTGCGGGAGGCGAAGGATTCGGTCCCTCAATGCATTTCCTAGCTGCCAAGCAGTG -TCTGCTGTGAACATGTGGAATATTTGGTCTGTGCTTGTTTCTTGCGCGATCAGGACTTGC -GGCTATTTCTTGATTAGCTTCTGGGTTCCAGCTTGTTACAAAGGAGAGTCTCACGTCCGT -GCTGGGTGCTGATAGAGCCATGATATATGAGAGGTAGTTTGCTAAGGTAAATGAAGAAAA -GTTGAGGTTAGAGGGAATCAAATGGTATTTCTACAACATAGTGGGGGATGGTAAGGGTGG -ACCTTCCTCTTCTTACATCGGTGACGTCGCCGAACTCTAGACTGTGAACGTTTCGATCAT -CACTTCTATGCATTCGATCTAGATCCTCACGATGTTCTGAGGTATGTACATCGGCAAACC -TGCCGAAAAATACCCCTCCTTAATAAAAAAACGCCCTCTCCGCATTCCAATGTGCATCCT -CAACATGACACTGTCAAGATTTTAGGTCCCCATGCCCCCCAAACGAGCTTGTGATACCTG -TATTTCCCGCAAAGTGAGATGCAGCGGTTCCTGGCCCTGCGATACTTGTCGCGGCGCAGT -CAAGAGGGTGCGCTGTACGTACCTGAGGCCTGCACAGAGGAGGGGCCCGAAATCCCGACG -AGCAGTCAGCAATCGAGAAGCCGACCTCCCGGACATTCAATCTATAGGGAGCCCACAAAA -TGATCAAGAAAATACATATAATCACAACGCGAGCGATGAGGCCGTGTATCCATCCACCAC -AGCCTTCAGCCCCCAGCACATTTCGAAAACAATTCTTGCGTCGGTGATCCGTCTGTATCA -GCAATGTTCATACGGAGTTTGGCCTGTAATTAATGCAGAAGTCCTTGTGGAAAGGCTCCA -GGATATATATCCAGAAAGGCTTGACTGCAATGCTGGAAATACTTCCTGTCTCATCCTGGC -CCTCTGTGCAGCGACGATGGCTCAGTTGCATCTTGGTCCCTTGGTAGATGGATCTCGGAC -AGTCGATAGCGCTACAATGGCTCAGTGTTGTTTACGAATGCGGACATGTTGCGATATTCA -TGGAGCAAGTCTGGATCTAAGAAGCGTTCTGGTGTCTTTCTTTTTGCATGTGTACCACGC -CAAGGTCAATCAACGAAATTCGGCAATGATGTTTATCCACGAGGCTATGGCCGCAGCTCG -TATTATGCGACTGAATGAAGAGGCCTTATACAAGGAAGATGAGATGATTACAAACACGGC -CTTGGTCTTTCCTTTGCTTTGGGTATCAGAGAGGTAGGCTGGAACCTATGAACTCGCGGA -GTAACTTATTAACTTTCCATAGAGGCTACTCACTGCACTTGGGTCTCTCTCCTTCATATG -TTGACACTCCTGAATTAATTAATTTAGAAACAAACACGAATGTCGATGTTCATGCCCAAG -GGTTAATAGAACTCATCAAGCTCTTTACTGCATTTGATCAAATCTCCATACGGCGTAACT -CGCGTCTTGGGATCACCACGGCTACATATCTAACCGACACCGAGGCGAAACTTGCTTCGC -TATGCTTTAGTATGGTCGATCAAGTTTCAGCCCGAACGGCAGACTATCACATCACGCGGG -AGTGGATGAGGACCATTCTTTGGCAGGAAGCACTGACTATGGGCCTGTTGTCATCGTCAG -CATCTACATCCGTCCTGGGGTTTGGATTTCCAGCGCAAATTGGCCGTGCCTTGCTGCAGG -CCCTGCGGGGATTCAGTGAAATGGATTTGCTCCCATTGGGACGAGACCAGGCTAGTAGCT -TTGAGAGGAAGGGAATCCAGAATATGGCTGACGTCCTTTTGTAGTTACTGAAATGTTTCG -AAGTCGTCAACTCACTCGCGGACACGGTACTTTTATCCCCACACAGAATACACTCGCGGT -TTGAACTAGGACCACAAGACTTTCTGCATGCATTGTACCAGAAGATCGTCCCCTTTCTCG -AACAAGATACTATGTTAAAGTCAATCCTTCGCGGTAAAACTGCTGAAGCCCTAGTCACGG -CTCCAGCCCGACTACTGACGATTGAGGAGGAAGATTCGGGCTTGTACTCGGGCGAACATG -GGCCGGCTGACCAGGCTCAGATGACCGGATACACAACAAACGGCGAGCACGATGAACAAA -ATGAATTAATGCTAGGTTTCTTGGATTGGCTACAACAATCGGATCTTCAGATAATCTGAT -ATGAACATTGATGACAGTTTGATCAAATTATTCTGTGATCTGAAGGACAATCCGACCCGA -AATATCACCCTTCTCCATCTCCTCATAAATCTGAGCCAGGTCCTTGAACGGCCGCACCTT -GACCACAGGATTGACGATACCACGTCGTACTAGATCAACAGCCTCCATGCATTCTTTCAG -CGAGCCCACCAGATTACCGGTTATCTTGAGGCCTTTGATCACAATAGTACAGATAGGAGT -CTCCAAGCTCGGTCTTCCCGGGGGAATACCAACGCAACTCAAAGTACCACCAACCCGCAG -CATATCACAGGCGTGTGCGAACGCCTTTGCACTACCAGCTGTAACTACAGCAGCATGTGC -ACCTAAACCAGTAATTTCTTGCACCCTCTTGATGGGGTCTTCGGAGGAAAAATCAATAAA -TTCCTGCGCGCCGAGACCTCTGACAAAGTCTCCCTTACCGGGACCAGTGTCGACGCCAAT -TACTAGAGCCCCCTGAGCTCTTGCATATTGAACTGGACAATGTCAGTCAGCTGTCAGTCC -GGATCCCAGATTGCCCACAAAATTGAGAAAACATACCAGCCAAATGTCCGAGGCCACCTC -CGGCCCCAACAACCACCAACCAATTCCCAGGACGAACATTCGCGTTCAAGACGGCCTATC -CGAGTACTCATTAAGCCTGAACTGCAAAAAAAAAGTACTCAACCCACCTTATAAGCAGTT -AACCCTGCACAAAGCACAGGCCCCATAACCTTCGGATCAACATCATCAGGCAAAATCGTC -AAGTAATCCGCATCAAGCGCAATATACTGTTGGAAAGAGCCGTCCTCATGGTGGAGATGG -TTCGTACTCGCAACGCAGTATTGCTCTGTCCCCGCAAGACAGAAATCGCAGCGTCGACAG -ATGCGGCTCGCGAACCGGATCCCAACCAGCCCACCGACTTTCAGACCCGCAGCATGCGTG -ATATCCGGTCCAAGCGCAACCACTCGGCCGATACCCTCATGGCCGCCCACGTGTGGCAGC -TTGATCTTTGTAATCGGGTTGCCGTCTGGTCCGGCGGCGGTGCCGGCTTTAGTGTGAAGG -TCTATTGGCGTGTTAAACTTGGCCTTGTTCAGATAGCATCCTGGGGTACTTACCGCTCTG -GCAGACGCCGGTGTAGAGCACTTTGGCCAGGACTTCGTTGCGTCCTGGGACTGGCACGGG -GTAGTTTGTTTTGAACTCGACTTGACCGCCCAGTGAATGGACAAGAGCGACTGTTTGGGT -TTGTGGAATGTCGAGGGAGTTCATTGTTATTGGGATATCAGCTTGCTGAGGAGATATTGG -ATGGAGATGGACTCTGAAGTGTGCTTTGGCGAGACTGGACTCTGGCTATTGAACAAGCGA -CAATGCTATATTTATATCCAAAACTCCAGCTTGCACTGTAATACTGAGAATCAATCCTGG -GATGTCGTGTCATCAATAAACCACGCCAATACTCCGAGAGCACATCAAGTGATGCGTGGA -ATGTACATCCTCGAGCGGGGAAAGCTGTTCTGCATTGTTACTTTTCTCTCCAATTTAGGA -CAAATACCAGATATAGCTAGTAAATAAGGTGATATTCTCATGAGTGATGTGGAACCAGAC -CGTGTGGCCCACAGGCAGCCTGTGTGTAGATGTGTATATCGAGATATACTCGGATAAATA -GAGAGTTTCCAATCTAAGCACTGCTGATTATTGGTAGTTCGACCAGAAAAAGAACTTCAT -AGATGTGGCGCTGCATGAAGTCCTGCTAATTCTCCGAGTCAATTCTCCGGTGGGGGGTTG -GGTGCATTGCTGATCTCACATAGGGATGTCTACAAAAAGAAAAGACGAGAATCTGTTCCT -TTCACTATCGGGCCTAGGTGAGCATTCCTTTTTTTTTTTTCTTTTACCTCTTTTCTTGAA -ACTTGACGCTTCAAATCGCTATTCACAGGCAAATCGGGGTATGTACAGTCGCAATAATTT -CAGTCCCATGTCCGAGTACGCCATAACCTCACCAGAAACGCCGAGGTCAAGAGACCAGAC -TCAAATCCCCGAACACCATCCCCAACGTATGTACAACGCGAAGCCAAAAGCCCTGGCTCA -AAGCCACCAATCGTTATTCAGATAGTCATGCGCTCTCAGAGGAATATCGGAAATTTCGTT -AAAGATTTAAACATCGTCCAGGTGTACTGCACGGGCGTAAGAACCCTTCGTGGGGGTCAA -AGGTGAAATATGGGAATCACCAGTCTCCAGATCCCTGCGCACCCTGTTGGCATGAGGGCT -GGAGTGGGGGTCAGTAAGCGGAGTGACGAATAACTCGCCGTTCTTCTTGCGCATCCAGTG -TATACGCAAGTGATCGAGGGCGGTCTTGAAATCGTCGAGAATGACTGTGTCGCAGAGGCA -TTTAAAGATGAAGGATAACTGGGATGGTGAGTGATGTGGGGAGATGGACCTCAGGTGGGG -GATGGGACTTACTTTCCAGAACGGCTCGACACCTGTTGGGAGTAAGCTGGAAAATACATT -CAGCACGCTGCAGGTGTCGACAATCATGAAGATAATTGAGAGGCACATTGCCGCTAGCAT -GATTCCGAAGCGTGGGCTGACTACTACCAGCTCCCATAATCCGAAGTTGTATTCGCGCTT -GATCGTCCAGAACAGGCTGGCTGTGGTGAAGATCCACCATGGGTCACTGTTGACGGAGGA -TGTTAGGGCCAAGGTGGTTCAAAGATGAAGGAGAGCTCGGTTTTGGGCGGGAAGGGAATG -ACGAACCGAAATAACGGCTCCATTGGCCGAGTCGTCAGGAAAATCTTATTGATATTGTTG -AAGTATGTGAAGTTGGCATAAATCTCCAGACCCCAATAGGGCCAACAGAGAATGACAGTG -GTTATGTAGATGAGTGAGACCTTTCGCGATAGGAATGGGCGGTTCTTCATCCAGGCGATA -ACATTATGCAGAGCCCAGGAAATGTTCAGACCGATCGCGGTCACGGATAGATACCAGCCG -TAGACAGGCTCATGGGGGAAGATGAAAGTACCATGGGCCATACCCAGAATGAGCTACATT -CAATCCAGAGATTAGCAATTGGTTGGTTGCACTCATCCATGGGCCTTTTTCCCTTCCACC -TACCTCGGCGAAGATCAGCTTATGCAACAGAATATGGGACTTCATATTGGCCAGAGTAAT -AGCAGCCATAATGAGTAGACTTCCGACCATCAGACCCTGACCCCAAGCTTCGAGGATCAA -GGCATCGGCCTCAGTGGGCTTCGAAATATGATCGGTGGCCATCGTGGAGTGTATTCATGT -CAAGCGTGGTTCCTTAGGGGTATTGTGGGGGAGTAAACAAGGATCAAGCGAATGAGATGA -GGGTTTGAAAGCGCAGTGGAAAAGGGCAACTTTCATGGAGCCATGTCATAAAAGGAGCAT -TCACACAAACGCATTAGGGTTTCAAGTTCCAGAGAAGGAGATTGATCATTTATAACGCAC -ATTTGTACGGCAATTTCGACCGTAAGATGATCGGGCTAAAATCCCGTAAAACGCAATTTG -CGAGCGCCACTAGCCTAAGACAGGGGTATAGGGGAGGCAATTATGGGGATAACTAGGCCG -TCAAATCAATTCCCTTCCAATGCCCCTATTGGTCCCGCCAAACATCCCCATCAATCCCCA -GACGGGTTTATCCGTATATGACTCGGGATGTCCGTGCCTGCGGAGAAAGGCGCTTGGGGA -GATGGGGAGAGCTACTGCACGGTCACTGCACGGTCACCGGGGAATTATTTTTATGATCAC -AGGGCAAATGGGATGTTCGAGACATTTCAGAGAGTCaacaagaatatagaaatcaaagaa -ccggaacagaaacgtgaagagagagcgaacagaaacagaacaccaaaaacTCTATAACGT -TCAGGCCTTTGTATTCCCGCTATTTCTCCGTCTCTCAACTCCTAACTGGAGGGTCATTCG -AAGCGAGCTTTTAGTCTGGAATTGTTTCACGTTTCTCTGGAAATTGACCCGCCACAGGTT -CATGTTGGTGACCGGATCCCTGCCGTGAAGTCTGGGGTTACCAATGACACTCTGGAGCGT -TTGGAGAGTATATCCTCTCTAAAAACATCCCCGAGTACGCCATGTATGTGTGGGGATATC -ACCGTGCAGAGATTCGACCGTTATCTCAGATATGCGATCGCATGGGTATCAAACTGAACG -GGAAGCCTTAGCAACCATTTTCACCACGCCAAAGATCCCACGGCCGTTTTGTGGCTCTCC -CCTGCATTTCGACGTCAGCGCTATCCTAGCCATCAACACTGTGAATCCAGGGTTTGTATA -ATGCAGCCATTTGCTAATGCGTCTAGACTAATCATACGAGACAGCGTAGGCCAAGATGTA -ACCGTCTAACCGTCACGCCTAGCGCGAAAAATCATTGGAAATAGGCAATCAACCCCGGAT -CTCCTTGACATCTCAGCCGAACACACCGGGGTAAAGAATGTTGATACCCTAATTCCTCTG -CCGGTACCCGTAGAACGCTGGGTCCATACGCATTTGATATACGAGAAAAGCCTGCAATCG -TAAAACACGTTCGGGGCCCTAGGCCGGCTCGGGTGAAAGGAGCTACAGAAGGTCTTTCCC -TCTATCTCCCCTTTTCTCTCTCTCCCGTCCGTGCCACCCTATAAAGTGAAATACAGTCTT -TCTCTCCGCCTATTTTTGTTCCAAATTTCCTAGAATATTCTATAACAACAAGAGAAAAAA -AAAGTCCGGATCGGGGGAGCATTTCACACTAGCATTTTCCCGGTGAAAAGAAAGATTCCT -ACGATATCACTGACAGGTACCAAAAAAAGGATAGGTCTAGAGCACTTCAAAGACCGTATG -ATGATCTATTCAGGCAACACCATCCTGGTTGTCACCCCTATTCAAGAAAATGACGGTAGC -CAATTTCCGGTGCAGTTGGGAGACTTTCTTCCCGACGGCTCTACAGCCCGACCATCGCTC -TTCAATGCGGAGTCTGGCTTCTCGCGATGGTGCGTAGCATGATCAAATGCCACGTTGACT -GGTAAAGTGAGTACTAAAGGCGACGTTGCTCTGCCTACTTGTAGCGAATGTTGATATGGT -ACACCATCGGATCGTTTTGTCGTTAGCCATTGATGTTAGCGGTATTGTGGAGTTCAGACC -TACGAACCAATTATACCCTGACGGATAGAAGACCAAGTCAAGAACAATGCAATATATACA -GGTTGCCTTTGCAGCTCGAGATCACAAAAGGCTTATATTCTCCGATATAGAACGACATGT -CAAAAAGATGGGAACATCAGTGCCGCACAGCCGAGGAAACTGCGACTGATCGTCACGCTG -GTTGTTCAGCCCGAGCCTTCACTGTAGGTGACTGTGGTATCTTGATCGTTGACATTTCAC -TACGGTGTATACCGTCTTGTTAGTTCCCGTGCCGGCGTTGATCACTTTGGAACATCTCCC -TTGTTACGAATCGCGCATACCACGATTTATCAATCAGTTTGGCTCTTGATGTGGAGCAAT -GACGACGACCAAGGCTATTGATTGACAGTCCCACAATTCCGAAGCAAATTGAAGCTGGTG -GCCAACCTGGTTGGGCAATGGCTTCATCGCAACCCACTTGGCCCCGCTGGTGGAAGGCCC -GTGGAAGAGCAATGATCTTCAGGGCTGCATTTTACCGAAGACTTATTCTCTGATTCTTTT -AAGGAATCCTACTGTTTACTGGTCTCTATTCATTCCTTATTGACCCCGTCGTCTGTCTCC -AGTTTGGAAAATACCTTCATTGAGCTATAGAGCACACTCCTTTCACGCTGTTCTTTTTCA -ATTCGATATGGCTCCATGGTGGTACACAGCAGCCAGCGCCCTGGCTCTCTCTCTTCAGGC -GACTGCCTGGGATTATTCAACTTACAGCTCGATGGAATTCCAGCCACCGAAACTTGAAGT -TAACAAGACTGGAACTACCGATCCCGGCTACATCTTCCTTGGTCCCCGTGGAGACGTTCA -GGACGACGGCTCTGCTGCGCTCATTTACGATGATACGGGTGACCTAGTGTACGAAGGCAA -TAAAGGTGTAACTGCCAACTTCAGGGTGCAAAAGCTCTTCGGCAAGGATGTTCTCACTTT -CTGGGCCGGAGACATGATGTCCATCGGCTACGGTTATGGAACTGTGCATATCCTTGACGA -TACATACAAGGAGATCTACACTGTCCAATTGACTGGTGACTTCGTCACCCCAGACGGGTC -AAGCAAGGACTCATACTGTGATCTGCACGAGAGCCGAATTACTTCGCGCAACACCCTCAT -TTGCACCGCCTACAACGTCACTCAGCATGACCTAACTTCCATCAACGGGACCTCCTCTCA -GTGGATGCTGGACAGCCAATTCTACGAGATTGACATCCCTACCAACCAAGTTCTACACTC -ATGGAGTGCGCTTGACCACGAAGCCGACATTCCCCTGACCACCTCCCATCAGGGTCTCGG -AAAGGATGTCGGTACGCAGGATGCCCCCTATGATGCCTACCATATAAACTCCATCACCAC -CACCAACCATGGCTATCTGATCTCGCTGCGCCACATGTGGTCAGGCTACTATCTGTCCCA -CAATGGAAGCGTGATGTGGCAGGTGAGTGGCGAGGACGGTGCCGACTTCAAGCAGATCGG -CAATGCTGAATTCTCGTGGCAACATGACATGCGTGTCTACAACGAGACTGATGATGGTTT -CGTCCTAAACCTTTTCAACAACGCGAACACTCCCACCGACGAAGAGTCCGAGTCCACTGG -CATCAGTCTAGCCATCGACCTGAACAAAAAGACGGTCACCGGCCTCCGCTCCCTGACCGA -CCCCAACGACCCCGTGCATTCTGTCAGCCAGGGCAGCTACCAGCTCCTCAGTGAAGTGGA -CAGCCATGTCTTCCTGGCCTACGGATCCATTTCCAAGGTCAAGGAATTTGATGGTGACGG -TAACGTCGTCTTCAGCGCTCAGTTCGGTGACGACAATGCCGTTGCCTCTTATCGCGGCTA -CCGGTACCAGTGGACCGCCACTCCTTTCTGGAAGCCTTCGGTTTCGATTGACAACACCAG -CTCCGGTGCCACCGTCTACATGAGCTGGAATGGCGCCACCGAATATGACAACTGGGTGGT -CTACGCGGCTTCCTCTGCCACTTCTACCAACAATACTCAGATCGCTACGGCTAAACGCAC -CGGCTTCGAGACTTCTGCTGATATCGCCGACCCTCCTACAAAGTTTGTTCAGGTTGTCGC -TCGCAGGGGGACGACGATTCTTGGCACTTCTGAGATTGTCTCATTCTAAGCAGGTCTAGA -ATGCCTTTTGAGTTGGCACACTACCTATACAGAGTTGCTTCGGTGACCCTCTTCTTTGTA -GCATCTGTCTTTTATGACTCCCCAATTTAATCTACATGTTGATTTGTTCACACATTTTAT -ACGATTGCTTTCTTGCTGTGCATGATTGATATTGTATTTTGAACATACAAACGTTTCTTG -AGCCATCTTGAGCTCAAGTGAAGATTCGAAGAGTCAAAAGCTTGTCCTTCGCCTAGGCAA -AGTGAAGATATCTGAAATGTCAGATCTAGATATCATTCAGCCACACCAATCCTTGGGCAT -CTAACTGAAATGCCGAGTTAATGACAGTATGGTGTTACGCCAATCAAATATTTCGTATGC -CACACAGGAGCTGTTAAGGTGTCTCACCATCTTCTCCAACGCAGATTGGAGATGTCAATG -ATTCTACAAATGAGCGATCGACTTTGGTAGTGTGCCACATGAACTAAGTCGAGTTCTTCC -TGTTCCTTGAGTTCGTCTGTGATCAAGCCTTTTCGGAGAGAGGCATATAGAGCTCCACGA -TCGGCTACTACCATTCTCTATCTGACATTCCTCGGCGGTTCCAAACCCAAAAGAATTCTC -CGTGTAAGTTCTCGAAAGGAGAGCTCACATTCTTCAATGAGAATGAATCCGTGGGACGGC -CCCAGCCACGGGTGTTTCCCATGCTTGTTTGCTCAATTTCATCCAGCCAAGGCTTCCGGG -GAGTATCAGCCAGCTTACCTAGAGCGTGTCATGCCGTCCACGGGTGTAACTGACTATAAA -GGTTGTTGCAGTATCATTCGTCAACTTGGTCCAATGTCTGACTATTCAACGTGGATTTGA -TTTATTTATAAAACAGTTATACCTAGAGGTGGCTGGCCAGTCACATTCTTCAATAACTTT -GATCCTACACAAGTAAAATATCTAGTGTTTTGACTTTTCTCATCCTATCCTCGACACACG -ATATAGAAAGGCCCTTGCATTCCTCGCTGTAACGTGGGAATCTGAGAAATCGAGATCATT -ATGTACATAGTAGAAAGATCACATTCTTCATATTCTTTTTTTGAACAATCGGATGTGTCT -CCGATTTGGCTACTACTTCGTGGTACTTTCACATCCCTTCATTTGCACACCTACATCGCA -CAACATAATTTGGCGTCCAATCTCCCCTGAGCCAAGATTCATAGCAAATGTCTAACGGCA -TAGTGCCTTGAGGAATTGGCTCCAACGTCCAGGGTCACAGTGCAACACAGCAACAGGAGG -AATGATACCAATTGAACAGGAACATCTATATAAATCTGTCCTAATTCCTGTTCTTAACCC -AAGATTCATCAACACAATAGTATTCCTGACAGTTCTACCAGAAAGCAAGATGTATCTCTC -ATCGATCTTGCCCCTCGTTTCGCTGGCCACCCTCGCCTTCTCCAGCCCAACAATTCATAA -CAACAAGCTGGAATCGAGAGACGTCCCCTACGGCGCCTTTGCTCCCTCCTCAAGCAATGG -CCAATGGATACCAACCAGCACACCACCACCCACACACCAAAGTCTGGGCCATGCGATCGT -GCAAAACAATTGCCATTTCCCCATCTACATCTGGTCCGTGGCCTCTACGGTCCTACCCAT -GCGAACCATCCTCCCGAACGACCAGTATCGGGAGCTCTTCCGTGAGAATCCCGACACTGG -CGGCATCGCAGTCAAGATCAGCACCAACCGCAATGGGTTGTACACCAGTGCTCCGCAGAT -GATCTTTGCGTACAACCTTTCCTCTTCACAGGAACAGGGTCAGAGACAGGATAAAGTGTG -GTATGATCTCTCCGATGTCTTTGGTGATCCGTTTGTGGGGTACACGGTGACTCTTAGACC -TGCTGAACCGGCGATTTGCTGGAAAGATGGGGTTCCACCTGCTGGCAGCCAAGTGAGGGT -TGTTGATTCTTCGACGGATTTGGTCTTGTCGGTGTGTGGAGACATAATTTAGGTTGGAAG -GGTATGAGATCGTATTTGGACTTCTATTTGATACATTAGTGAGGTAGAATTAATTACTCT -TTTCTCGTACGATATTCAAGAACCTCGGCAGTGAGTAAATTGTTCTCCAAAATGGAGAAG -GTGGAAGATCCCACTGGATTTTCCCCTTGGGGCAAGTAAATTGACGATGTTCGTCTCGGC -CAGAAGTCCGGGGCATTTCCCCCCCCCCCGCATTTCCCCAATTTATCTTGGCCTCCTTCT -TTTATTGACTCTCCGTCACTTTGCTCTTCTCAACACACAAAAATGGTTTCCCCAGCTGTT -AGAGTCGCTCCCTCAGCGGCTAATCGTGCCCTGAACTTGCTACGCACAGTACAATACACC -CACCCACCCAACTGCCCGTGTCACAAAAACCCCAGCCATCACCATAACCACAAGCAAAGC -TCCTCTCTGCTTAACCATGTGAAGCGTCGCATGTCCACACCCGTAGATCGCTCGCGCGAA -AAGGAATATGCCTTCGAGATGGCGGCGTCTAGCGTTCGCTTCGGACCCGGGGCAACCAAG -GAGGTCGGAATGGACTTTACTAACCTCGGCGCTAAGCGTGTGTGCATCGTCACTGATTCC -AATGTCGCCAAGCTTGATGCTATGAAACAGGCTGTTGAGGGTCTCACCCGTGAGGGCATT -GAGTTTACGATCTTTGACAAAGTTCGCACCGAGCCCAAGGATAGCTCGTAAGTCAACCTC -GAGCTGTAAGCGTGGCTACAGGTACTAATATGTTTTTGAATTTAGTGTGAAAGAAGCGAT -CGCCTTCGCGAAGCCCTATAACCCCGATGCCTTCTTGGCTGTTGGTACGTGTCTCCCTTA -TCTCCGCCCACCAGCAGATACAAATTAACACCAAAATAGGCGGCGGCTCCGTCATTGACA -CGGCCAAACTGATGAACCTATACACCGTCTTCCCCGAAGCAGACTTCCTTGACTTCGTCA -ACGCACCCTTGGGCAAAGGTCTCCCCGTAGACAGGCCCCTTCGCCCCCTCGTCGCCGTCC -CAACAACAGCAGGCACAGGCTCGGAGACAACAGGTACTGCAATCTTCGACCTAGTCTCCA -AGAAAGCCAAAACAGGCATCGCCCACCGCAACATGAAGCCAACCCTTGGAATCTGCGATC -CGCTCAACACCCGCACCATGCCCTCAGCCGTGGCCGCTGCCTCCGGCCTCGACGTCCTCT -GCCACTCTCTCGAATCCTGGACCGCAATCCCATACTACGAGCGCACCCCGCGCCCGACAA -ACCCCATCAACCGGCCCGCTTACCAGGGCGCAAACCCCATCTCCGACATCTTCTCTCTCG -CTTCCCTGCGCGCAACAGTCAAGTACCTACCGCGCGCAGTCCGCGATCCAGATGACCACG -AAGCACAGTCAGAAATTCTGCTTGCGGCTACACTGGCCGGTGTCGGCTTCGGAAATGCAG -GCGTTCATCTCTGCCATGGTATGAGTTACCCCATCTCCAGCCAGAATCCTGGATATAAGC -ATGCGGGCTATCAGGTCGATCACACAATTATCCCCCACGGTGTTTCTGTGGCTGTCACTG -CACCCGCCGTCTTCCGCTTTACCGCGGCGTCAAACCCCGATCGCCATCTTGCAGCGGCTG -AGGCGTTTGGTGTGGATATCTCGAATGTTAAGCGTGAAAGTGCGGGTGAAGTGCTTGGTG -AGGCAATTGCGAAGTTCCTTGTTACGCTTGGTGATCAGCCACGTGGGTTGAAGGATCTAG -GTTTCAAGGCCGCGGACTTGGAAGGATTGGTTGATGGAACTATCCCGCAGCAGCGGGTGC -TTATGCTTGCGCCAAATTTGAGTAAGGAGTTGGAGGCTGAGAGGGGTGAGTTGAGGAATT -TGCTCGAACAGTCTTTGGATTATTAGGGTGGTAGATACATCTAAATTCAAATGGATATCT -GTGTTGTTTCTTTGTTCATGCGGGGAGGGAAAATAGTGAATAATGTTGTGTGGCAGGGGG -GAAGTTCATCCTAGGCCGGCTAGGAGCATATATCTTGTCGTATCACATTTCATGTCAACG -CAGGAATCATGGTGGATTATTGTAGCGCCAGTAATTGAGGGGAGCTCTCCGCCTGTAGTG -AAGCACTCGAGCTATCACCCGGGAGGACCTTTGCCCCGAGGGCACCCATCGACGACGCTA -TGCGATCTAACCAGATCACAGCACTGGGATCATTTGGATCAGAGGTCCTAGATGGACCCT -GTCCAAGCTCCGAATGAATGCGAAAGGGGAGGCCGAAATTCTTCGACGTCATTGAAAAGG -GGGATATAGGGTATTCACTATCGATGCTGGAGGGCGTGACGACATATGACTCCGGCGAAT -GATCCTCGGCGGAATGAAAGCGTAATCGGTCTTCAAGGCGGTCGGAGAGCTGCTGTAACG -TCTCTAACATATTTTCTGCTCCATTTAGAAGCAGCGACGGGCCGGTATCCGGGAAGATTT -CTGGTACGGGGCCCACGAAGGATCCGGGTCGCTGGATGATACCTTGCTGGCGCTCACTGT -GCGCGCTGATACAGGATCGGAGCTCGGCGGCGAGTGAGCGTAGCCGACGAGAAGTATCAT -GTATGCGCTTGGTGAGATTTGTGATGGCATTTGGGTCTGGCTCGGCCGAGACGGAAAGGA -GATCGCGGGTATCGCTGATATACCGCGGGCATTTGGCGATTTCCATGAAAGCAAGATCGG -ATATTTCTCCGTATATGGACCACTCGCCGGACTTCTGGCCGTCTTCGGTCCTCACTTTCA -TCGCGTAAGATTGCCAATCCTCTTGGTCTAGGAAGCAGGGTTTGCCTTCGTAGACTGATG -TAGCGATGAAGAAGCCGCGGAATGCAATCCAACAGTATCTTCCGAAGCCGGACTCGTGCG -CTTCTGGGCCTCGGCTTATCATCAGTCGTTTGGCGCCATCGGAATGCACAACCCAGGCAT -CGGGCGTGGTCTGCGCGTACATCTCGAAGACACTGAGGATGATGGTTGTGCATAGCATGT -TGTCGGATTTGGCATGGTCGGCTTTGGGGATGGCCTTGATGACCGCCCCCAACGACTTGG -AGTACATTTGTCGACTTGCTTTGAGAAGAGCTTGATCTTGAGTTAAGTGGGCTAAATAGA -CCGAGCTCAAGGAGCAAACTGCGGAGTCGAGGTAAGCATCTTTGCCAAAATTCATAACGA -TGTATTCGGCGAACCCGGGCGCATTGCCAGACCGGAACCGGTTATGATAATAGAGCGTTG -GAAATGACGTGCTGATGAAATTGAGAAATAATTGTGGCTGTGCGGCACGGAAGGACTGCC -GCACCAGCGACGGCGACAATTTCTCATCCCAACCACCTAAGGAAGAGTGTCGCCTCATGA -GGGCAATCGCGTTGCTGCGCACTACATCTGCAGCGGCAGCAGACTCATCGTGTGCCGGGG -GCGACCGGTCAGTTGGGGTTGAAGACGTAGCCGATGCGGGCGGGTCTGCCAATGCATCTC -TTCCCCTCCGCCGCGCATTGGAGCGATGTCGCCGGGCGAGGTTGGGTCCTTCGTCTTGAA -AGCGAAAGGTTCTCCTATAACCCGGACAGGCACGACCATATTTTTGGCATTGGCTGCACT -CTGGGACCACCTCGTCACACTAGGGACTTTATTAGCCCGAGAGCCTCAATCTTAGGATTG -GAAATTGACCTTAATTCGTCGCTCGCGACACAGGGCACAGCCTGTACTGTGCGGAACTCC -CACCATGGTGTCAAGATTGAGAAGATGTCATCACGGGGGAAAAAGGCTCGGAGTTATGTC -CCCATCTTTCGGGGATCTTGAAGGAGGCTGATTTTGAGGGGGAGAATGAGCTGTCAGCCC -TAGAATGTTGGGCCAGGTTGGTACCGTTCACATTTTGCCAAGCGTCAAAATGGCTCTGCG -GGGAAAAGCCGTTTCAGGTTATTTTGGATCTTCAGATGTTCATCAATTTGACCTATTATT -GACTTATTTGCCCAGTATTATTGGACCTGGAATTCAATTTCAGGGTTTCAATTCTTTTCT -CTTCAATTCATTAGAAACTACAGTACTCCGGGCACCTCAAGGTACAGCATAAGCCAAGGA -TACATCCCGAGGGCAGGATACTATTACTATTTGTAACCCCCTATTTATAAATTGCAATAT -CCATTCAGACCAAAAAAAATCTCATGACAAAGTCATATATTTCGGCTATCACTGGGCTTA -CGATGGCCGACTCACCAGAAAAGCCTCCGCCTGCTAATCTTGAAATAATAAGTACATCCA -CTCAATGGGCGGACAAGGCTACTGCGAGCGATGATCAACAAGCCCTCCAGAACTCTGAGC -AGTCATATGGTCGGTATATCTCCCATTCTGAATTGCAATCAATAGTAAATATTGAGAAAA -CTTGGTGTGAGGTTTCATTAAGGAGAATTTCGAAATGGTGGTGCTACTTGCAGGTCTCGC -CATTATCGTATAATCGGGATGCGAGCCCTTCCTGCATGCACACTCTGGTATGATGAACAT -TTGGGTAGAACATCAATGTTTCTGGGATTGACACCGCTTTCTTTGTTCACCTCGAGGAGT -CTTAGGAAGGGGTTCCGGTCTCTGTCTCTTGGACAATTGGTCTTCCAGAGGACCATCCAT -TTCTAGAAACATTGACTTCCCAGTGAAAAGCGATTGAAACCAAGATCATGGATAAATACA -TGATCTGGTCCTTATACGAGCATCATATGGGAAGAGGGCGGAAGTATGGCATGAAGATGG -GCAGGGTGAACCTGAAGTCGAACGAGATGGATATAGCATATCTATACTAACCTTGAGTAT -CTAGCTCTCGAGATACTAAGTATAAGATACCTCCAATTATGGTACCCCGCGTTCTGTACG -ACTGGACATATTTCATATCAGGATGATTGGGCAAAGATGAGACAGCGCGCCTGCCGCGGG -GAGTAACACTGAAATTCAATGACTTCAAGTTGGACTGTTTCTTCTCTTTTCCCTCAATTT -AGTCAATTGAATCATGCCTCTTCTCAAAGCATCTCGTCTCAAGGTCCTTGTGCCCGAAAA -GCTGTCATCGGATGGTCTGGCTCTTCTGCATAGCTCACTGGATGTCCATGAGCGAAAAGG -TCTCAGTGCCGATGAACTACTTCAGATAATCCCCGATTACGATGCCCTTCTTGTTCGCTC -CGAGACAAAAGTCACTGCCACTGTGCTGCGAGCTGCAAAGAATCTAAAAGTTGTAGCGCG -TGCTGGTGTTGGTGTTGACAACGTTGGTATGTTTGAACATTGTTGTTCGGTGTGGAGTTA -TTAATCGTAGGATAGATGTCGAGGAGGCTACCAAGCTCGGAATTGTGGTTGTCAACTCAC -CTTCGGGGAATATCGGTGCTGCAGCTGAGCATACTATTGCATTGATGATGTCCATGGCGA -GGAAAATCCCCGAAAGCTGTGCTAGTCTGAAAGATGGGAAGTGGGAGCGAAGTAAATTCG -TCGGCGTGGAAGTCAAGGGCAAGACGTTGTCTATTATTGGTCTGGGAAAAGGTGAGTATC -CTGCACTTGACTTGTTTGGGGATAGGAGTTGACGGTGTATGATACCAGTTGGATTGACCG -TAGCACGACTGGCCAAGGGGTTGGGTATGATTGTCAATGCCCTCGATCCATATGCGTCCC -CGGCAGTTGCAGCTTCGGCTTCGGTCGCCCTCGTATCTTCACTTCCAGAGCTGCTTGCAT -CTGCTGATTTTCTCACGATCCATACCCCATTAATCGCCTCAACTCGAGGAATGATTGCGG -AGGCGGAACTGGCCCAGCTGAAGCCAGGTGCGCGGGTATTGAATGTAGCTCGGGGAGGCA -CGTTCGATGAGGATGCGCTGCTTGCGGCACTGGAATCAGGCCATCTTGCCGGTGCCGCTC -TTGATGTGTTTACTTCAGAGCCCCCTGCTCCTGATTCATCTGCGGCACGATTGATCGCTC -ACCCACGAGCAGTCGTCACCCCACATCTCGGTGCCTCGACAGTAGAGGCCCAAGAGAACG -TTTCAGTCGATGTGTGTGAGCAAGTGCTTGAAATTCTACAAGGGTCGCTGCCACGCAGCG -CTGTCAATGCTCCTCTCATCCTGCCCGAGGAATACAAAAAGCTACAACCATTTGTACGCC -TCGTCGAAAAGCTGGGCAGTTTGTATACTCAGCATTATGCAGCTTCCCCTGGAGGCGCAA -TGGGCCGAAACACCTTTGATCTGATCTATCAAGGCGAGTTGGCTAGCATTAACAACACGA -AGCCACTCTTTGCCGCTTTGATCAAAGGTCTTCTTTCTCCTATCAGCAGCATGGAGGAAC -TCCATATCAATATCGTCAACGCCGAACTAGTGGCTCGCGACCGTGGCATCTTTGTCTCGG -AGCAGCACTCTCGCGACCCATCAGACCACTCGTCCTATTCGTCGTTGGTCACTCTTGTTG -CACGACCACCTTCTCGAGCATCATCCCGAGCTCCGGCTTCTGGTGACACCTCTACTGGGG -CCGTCCCTGACCAACCGCAGCGAATTATTTCTGGCACCTGTTCCGGTGATCAACCACTAA -TCACTCGCCTGGGTCGATTTGAGGCTTCCTTTGAGCCAGAGGGAACCTTGTTGATTTGCG -AGAACTATGATTCCCCGGGAAAAATTGGAGTTGTTGGCAACATCCTTGGTCAAGAGGGTG -TCAATATCAATTTCATGGCTGTGGCGCCTGTGTCAACCAAGCGGACTATTAATGAACCCC -CCAAAAGGTCTCTTTCCACTGCTAAGTCTGAGGTTCCTGCCGCAAAGGGTCAAGCACTGA -ATGAAGCCCTGATGATTCTCGGCATTGATAGAGGAGTTTCTGCTCATGTTACCGCTGCTT -TGGCTAGGGAAACTGGAATCTTGAGTGCGTGCGTTGTCACCCTTTGATTCAAAGTTCGGT -ATTTCCCCGTTGATCATTCGTGCAACGTGTCGGGTATTTGAGAAAAGGATGAATTTGTTT -CGGTTACACTTAACGCCTTTGCAGCTGTCTTTCAACCATGGCAGACCTCTAGTTATCTCC -TTTAATATATTGCATGCTGTATTTGGACCAGCAAATATACTGTTCCACGAGATCCAAGAG -TCAGAATCCAGTGGACCATGGAACAAACGTCTACACACAACCCAATTGGCAACTACCAAT -TTAGGAAGGCGCGTCATTTCCCACGGAGTATAAACCCCCCCCCCCCTTCTCTCGAATTCT -CGAAGTCCCATACTTTTGCATTCTTCCTTCCTCTATATCTCAAGTATTGATCACCCTCTC -TTCTCACCCTCGTCTTCTCTTACGAGGGTCCTCTTTCGATTCTTCCCTATATATATTTCG -AGGTATGTATATCACTTCACTCGCTTCTGCTTCTGCTATCTCTCTCCACTCCATCTCCAT -CAACTCCATTAACATTTAGTCTTTAATCATGATTTCCTCACATTTCCATATGTATGACCA -ATCCTGAGGCCAACTTCGAGTGGAACAAGATGCATCATTCTCTATCTTTTGTCTCTCTTG -GTTTCTTCCTTGCCTTTCCTTGACCTTTCTCTCACTTCACCTTACGTCTCTCCTGGAGTC -GAATTCCTACTCCGCTTATACCTGAGCCGCTAATTCAAACTTGCCTAGAGAGATACCAAA -TCTATCTCAAAGAAGAGATAACATTCTTGGAGGACGAACTCAAGATAAATTATGAAGGGC -TACTGGCCAGGAAAAGCGAATGGGTCGCACTTGCCCAGCAGCGTGAGGTGGCTGAGGGAA -TGGCACACGAAGTCAGTTCCTTGAAGCACCAACTCGCCGCCCAGGAAACGCAGATGGCCA -AGCTGCGGGATACCAATGCGGAGATTGCTGGTGACCTACAGGTCACGAAGGAAGCACGCG -AGAAACTTAACACCAAGCTGGAGCAAGAAACAGCACGGGTCGTTAAGTGCGTGCAGAAGA -AGGATTGGTACTCCCGTACCTACAAGGATCTCAAGGCCAAGTATGAGAGCTTACTGGCCG -ATAACGATGAACTTGTGAGGGGTCGCGAGAACCTTGAGGTGGAAATTGAAGAACTGAAGG -ACGCGCTGGACGATGTCGTCGAGCAGCAGGAGGCATGGGACAAAGTGAACAACCTCGAGA -AACAGGTACAGAACTACCAGACAGCGGAGGCGTCTGAGATCACTTCACATAAAGTCCCAC -TCATCGCCGTAGCCGGTGGCCTGCTACCAGCTAGTTTGGAACAGCATCCGTTTATCGTCT -TCGGTGCCACCCAGACAGTCCAAGGCTTTAGGTCCAGCGCTGATTAACTAGCGGATCTAA -TGAATCGCCAAGGTACTAGGTTTGGGATTTGTCAAATAGGAAGTGTGATGTAGGTTTACT -GTAATAAAAACATGATCGCTGTGCAACTACAATGAGATGGACACGGGGACAAAAGATAAC -CACGAAATGATGAGTATGATGCCGGAAAAAATCAAAAAACGCCAGGGTATAGATGATCAG -ACCATAGAACTCCGCTTCTTTTCAAGCTTGGAAACAGTATTTTCTTCTCTGTTACGCCGG -AAACCAAAGAAACTGCTCTTTTTTGGTTGGATTTGAGGGGTTGGTCTCGATGGCGCTGGG -AGCGCACCGCTGCTGGTAGGTTGTTGGTGGAACATGACAGAAGACTGTGGTCGGGGTTGT -GGACGTGATTGCTGTGGTCGTGACTGCATGAGGTGCGTAGAGGATTGGGCTCGGGGATCC -ATGCGACCCGGTGCATGGAGGTATGGGCTGTTGGAAATGCCATGGGAATGATGGGAGGGA -CGGCCTGCAGCTTGGGAGGAAAGCTGGACTGAGGATTGAGAGTATTGGTGGGAATAAGGC -TGACCGGGCCCTGTGTGTCGGGGTGGGAGGTAGGGCCTGGAAGATGGTGCTGGAGTACGT -TGTTGCATTCGTGCTTGCTCTTCTTCTCTCCCGTATAACTGCTGTAGACGTTTGGTTTCT -TTTTCGACTTCTGCCTCTCGCCGACGCCGTGCTTTTTGCTCTGCAGCGAGAACCTCCTGG -GTTCGTCGCGCCTCCTCCTGTCTTGCGAGTTCGTCCTGTCGCCTCCTCTCCTGTGGGGCT -GGGGATGAGGAGTTCGGGATGCTAAGCCGAGGTTTCGATGGCTTGCGGCTCATTTCTGTC -CCAGAGGGTCGAGGCTGTGAGTTCCTCCGATCGCGCCTGGGATCACCAGCTGGTGTGGCT -GTTGCCGCATATGTAGTTGGCCCCACTGCTTGCTTGCGGGCCTGGTTAGCCCCCCCGGGG -GTGATGTGAAAAGCCTCCTTGGCAGGCCCAAAGAAAATATCTCGAATCGCAACGGCAGCC -AAGAGTAGCACGACCTCCAAACCCTTGGTATCCTCCATCTCGACGCGATAGAGATTGGGC -TCGTAGAGAGTAAGCTCCTTGAGCCCTTGAAATAGAGCGACCGTAATATCTGGCTCACGG -CTCTTTGTGCGGGTGTCGGAAATTGTTGAAGTTTTGCCATGCAACAGACATGTCAAATCC -TTGGACAGTTTCCCATCCTTGCGCCAGCTGAACCTCAACTTTGGAGTGACGTCCGCAAGA -GAGGGATCGATTTGTGTCTGGTCTAGCGCTGAAGTGGACGGAACCCGGAACGTGTTTTGT -GGCATCTCGAATTCCCACCGTGCTGGTTTGTTCCACGATTTGGTATGATATTTCACTGTG -ATCTCTTGGTCGGGATTGTAGAGTTGGACGGTGAACTCTGTAGGGAGTATGGGTTCGGGG -GGCGGTACAACGCCGCCATTTGCTCGTATTGTGTCTGCCGACAGACTTGGTTGGGTCCAA -TCTGGAGCCACAGCGACCTCGCCGTAGACCACGTCTGTTACGTAGGGGTCATACAGCGCG -ACTCCGTACCGGTTTTGTGATGTTGGGGCGACCGGGTCAAGGGCACGCAACGAGTAAGCA -GGTTCGGGGTCATTTCCATGTTGACGATGGTAGATCGTGCTGATTTGAGGTTGTTTTGAG -TTGTTCTTGAGGAAAAACGCTGTGCTGCGGTCAGTACGATAATCACAAGGCCGGGGGATG -ACGTAGCCTGCGATAAGAGCGGCTCCTCACAACGTACTGGGTAGGTTTTCATCGATCATC -TTCTCATCTGGAGTTCAATGTACTCAAGAGACAACCTTGGTCATGTGGAGGAGTTATATT -GAAAGCTCGGGAGTTGAAGTCTGCTAGTCTGCGCCACGGGACTTCGTGATCGGCACCTAA -AAATAGATGGCCTATACCCGCTAATTCGTACGTCAATTGAGGCACCAGCATTTGAAGCAC -TCTACAGAGTAAGATCTACCTTGAAAGCTGGATAGATATAGGCATCGGCATTCTAGGCAT -GCGTGACAATCGAGCTTATAAATAATTCCTCCTTTCGCTTAGCCTCGATCTACATACGAA -ACAGGTATGCTAATCGAAAGTATAGATCTACATGCTGTCATCAGAGCAAACATAGTATAT -TTGCGGTAATTCTTCTACATTTGGATGCTCTCAGAAGCATTGTTCTCAGATATCGTGTTT -CAAGTCGAAGTCAACAGCTTAATTTTGCTTTTTGTAGGCCACCCAGCACTCATTTGTATT -GCCCGAACAGCGCCTCCACCAAGCGGATCACCACGATGATCCAAGATGACTCGGACATTC -GGGGGGTATTCAGCCTATAGATTTCTGCTCAGCTATATCCGACATTGATAAATTTGTCTA -AAAACTGATTTAGCTTCCAACGTATATTTCGTTGGATACATCAAGCCCTCGTCGACCCTA -TATCAAATTAGCACCACAAATTCAAGAAGATTTCAATGTAATATATATCGGTTCGAGTTG -CAAACTGTGTTTTTGTGTTGATATACTATCATCCCATGTTAATTGCGGCAAGGAAGGGTT -GACTTAGCTGTTCGGAGTGACTTGTGGAGAGAAGAAGGGAGGAAGTTCTACGCTGCAGAT -TCAATTAGTATTCGGTTTGTATTCTGAGGAAGGACTTCTATGTATGATTATCCACATAAT -CCTCAAGTGGTGCTTATATTGACTCATTCGGGGGTGGCTTCTAAAACCCATGTTAGTACA -AAAGTTCGCGGGAACGACTTAGCTGTGCTTTCAAATTCTGTCCAAGGTTGAGGTAAAAAT -AACTGACTAACCACGGAACAATCACAGTTTTCTTATTTGAATTAGGACACAGAAAGGATT -GGTTACACTCAGTTAGTGTGCAGATCGGTTGAGCTGGAGACAACAGACCTTCACGTCGGG -ATCTCAGGAAGTATTCGTGGATGTCTGTATGAAGTAAGTGACAACGGGATGAAAGGAATC -ACTTAGCTGCTTGATATTTCAAAGCATAGAAATGGTAGTTTTCAAGACAAAGATGAAGTC -AGAGCTTCATTCACCCGTTAGTTATAATGATCTAGTGAGATATGGCTGGCTGTCGAAATC -CGAGTTAGTTTAAAATGGGGCTCAAGATTAGCCTAGCTGCTTTAGAGAAATTAATTTGCG -CAATCTTTTGGGCAAAGGATCAATTTCGGGTTAAATTCGTTAGTACGGACTCCAGGTCAC -GGAGGAGAGTAACAAGTCTAGCTCACGGGCTAATGGCTGATCGTGGAATATTGTGGATAT -ATTGTCGTGATGCATATCTATAAAGTTGCGATCAGTGACAAGAGGGTGGATAATGACTCA -CCTGCTAGGGGAACTATATGATGATGTCCAAACAAAGGATTGCGTCGGGCCTATACTTCG -TTAGCACCGAATCATGTTTTCGAGAATGGTGATAAGAAACTCAGCCATTGGGTTGGTCAT -CATCTGAATCCTTTTTTGGTGGAATATCTTCGATAGAGCCCTATTTAGTTTGTTAGCATA -TCTCGCACCTGGTGTAGCCTACCGGGGGTGGTATGATAATTAAGACGGTAAACTTGAATG -ACCTGAAATGGTAGATGGTATTGTAGTCTCTAAGCCTCGTTAGTAGGGAAGGCAAAGGGC -GGAGATGGGGGTGATCAAAGAACTTAGCTGTATACAGGTAATAAAGGATGTCCAGACCGG -TACTTGTTGAATACGATGTTACCCTAGTTTGTTATATGTTAGCAATGATCAACTTCCGGG -AGGACGCCCACCTGTGGCATAAATATGTGTGACGGCGTGGTATTCCGTATGTCAATGGTC -CTAGATTGAAGCGTCAGTGTCAACCTCGCAAGGAATGGGAAAGGTACTCCGCAGGTTATT -CGCAGTCCATATATATCGTATGTATAGTAGCGGACGATAGGCGTTGAGTTCTGTTTTGAG -CTGGAATGCTCCTGATGACGGATCAAAAGTTAAGGGCCTGAAGGGCACTATATAGATGAT -TATATCCAACACAAAGAGTGACCCAACTCATAGTTGTCCTACACAAGCCCTTTGTCAGAA -CGGCTACGGTAGACTGGTTCATCGTGTTCCCGAGGCCAAGGGTCATTGTTCACAGATAAA -GATCAAGAGGTTAAAGAACGAGTAAAGGACTCGACAATGAAGTGATTGACATAATGCATG -CTTGTCAGCTTTGTTCTATTGTTTGTTGAAACCTGTTTGAAGGCTAGATTAGCTCAAGGG -GCATAAGAGCCAGGGTGACTGGGGTAAACCAACAGTGGGAATGAAAAGCCTTGGAAACCC -AAGAGATTATTTTTAGTCTATGGAAGATCTATGTATATATTGGCTCTGTCTTTGTAGCCC -TCGCAATATGCGACAGCTGGAAAGTACAATAGCATTGTCCGTCAAAACACCTTTTCGTTC -ACTGCCCATTTCATACCCGAACGCCATTCACTTCCTCACCTCCAGTGGCCTTAGCCGCCT -CCCGTTCGGCGATGCTCATCTGCAATCCCTGGTCACCCCAGCCCTGGATGCCCTTCTTAT -ACAGGATCTTCTGTTCATCCCGACTAGGGATAGGTCCAGTCTTGCCCCGGCGCTGCCGGG -AGTCGCTTGTCTCCTCAAGTGAGGCGTTGTCACCATCCAGCGGGTATCCCTCCAGATCAT -CTGTATGTGTTTCCTCGGTGAAAGAGAAAGTGCGATACTCGGTATTGGACCAGCCGGTAC -CTGCGAGAATGGAGAACGTTAGTATATTTCCTCGTAATAGTAGCGCATTACGTCACATAC -CTTGGAACTGGCTGTTGTCTTCCCAATCTTCAGTGAAGTAGTGCAAGAATCCGCCGTGCG -TGACCATGACGATTTCCTTCTCCGGGCGGGCTTTCAGCCAGCGACGGGCTGCACGAGCGC -GTTGCTTGAGGGCGTGATTGACTGGGGCATAGCGGCCAGAGGCCTTTTAGGTAGACCGAT -GTGAGTATGCCATGTCTTGATTACGGTTGAGGCGACCCTCGATCTGGAGGCAGGTGAGAA -GCATACCTTGTTGTTCCACCCATCATGCACGAGACCAAGATCAACTCCGGTTTCGAACTC -CTCCTTCAACACAGTAGGCTCGCTGCCAGTATCACATGCAACATCTGAGATCTCTTGAAT -ATCAGGCAAGGCGATAATCTTCAAGTCCGGCTTTGACTCAAAGACGGGCGCAAAGCTCTC -GAGCGCAGTGTAGAGGGTGCGACGGAGGGGTGAGGCGGTCACTAGATCGATATGGGCATG -GCGGGGGAAGTTCTTGCGGAGCGTTTGACATTGTTCATGTCCAAGGTCAGTGAGAAGAGG -ATCTTGAATGACGTGGTTGGCGGTACAAAGATTGTGCAGGCCCTGTAATTGTCTATTAGA -TCAAGCAGCAAATAACCATCTTCCAAGGTTGCCAATTGTCAAAGCTTTGGGAGTTTCCTA -AGATAATTGTCGCGAGATGGACTTACTTGAGCATGGCGCACGCAGTGGATAATTGGAGGC -ATGTTCAATGTTGAGAGTAGAGTAATCTAGTAGATTTCCGTCAGGAAGCAAATATTGAAG -ACCCTGAAAGTAACCTCGGGAGTTCTTATCGCGGTGGCGGGGACTTAGGCAAGGGATGCA -AATCGGAGTGCCTTACGGAATACTTGACTCACGGTTTCTCAATGCAAAGCGATATAGGCG -GAAAAATAGAAAATGAATGTAAGAGTCATAATTCCTTCCCGGACCTCAGAAGTCCGACTT -GGTCATATAATTTTGCGTAGCGCCACATCATCAAGCGAACCACAGCTCTGCACTGGAGTC -CGATGGAGATCTATAAACTGTGATTTGTCTATCTTAGTGAGGGATCCTCTTTAGATGTCT -AGTATAAGGCCGAGTACGAAAAAAGGAGTACTTCATGAAAATAGGTTATTTGCACCGAGA -ATGCCAACACATCAATGTATTATAGATCAAATGGGTATGTATAGGTAAAAAAATGGAAAA -TTCACAACCGTCTATGCAACACCAAAACCTTGGGCATTTTCGAGGGCCTTGTCAAGCTTC -TTCGCCAAGGTATCCCTTGACGAGTACTCTGGCAGAAGCAAACGTCCGAAGCACGTCAGG -CTGGTAGGGAGTCGCTAGAGGAACAATCAGCATGACACTTCATCTTCTGTTGTAGAAAGT -GGAACTTACCGAATCTCCAACGCCATTTTTTTGGATCACAAACATAATGCTCGAGATCCC -GTTGACGGGGACTCGGTCACTCGCAGTAACGAATTCAAGTAATTGGGCCCTCTTTTCATT -TGGGTATGTTTGGACAATATCCCAAAAGTCATGAATGACACGATTATCAGGGCTAAATCC -TCCCTCATACCGGGTGTGGTTCTCGAGCTCTTTCACGTCAATAGATTGGATACCTTCTAC -CACGGTTTTGAGGGCTTCTGGGGAGAAAATGGACAGAGCTGATCGATCCAGGCATGTGTT -GAAACCCCTTTGGAAGGCTTCAAACTGTGGTCGAATGGATTTGTCCGTAAGCCAGAAGAT -GTAATCCCTGACAAACTGATGGCGATTCTGATTGGTCACGAGCGCAGCCTCTTCAGCTGG -GAGTGCAGGGGGCTGAAGTGATATGGACTCAGACCCCGACTTGGCAATCGTGCCCGAGGC -AGGCTCGCGGGATGTGACATTCTTAAGCTCCTCGGTTGTCATAGAGGTTGTTGAGCTTAA -TGTCGCAGGATCGCAGTAAGATGGTACATCCATCCAAACGCTAGAGCCCGCGATAGGTGT -CGAAGCCGGAGCCGGAGTGGTGATTTTGCTAGCAATAGGCCACGGTGTATCCCTGTTAAT -CTTTTGCATGTCGATTGTCTCTATACTTCCAAAGGCTTCGAAGCTAAACTCGTAAGTTCG -TGTGAAAATATCGCCGACATCGCCATCTTTCCATGCCAATAAAGTGTCGAGCCCGTGGCT -TAGTTCCGGCCATCCATCTCGAATGTGGTCTAGATGCTTCACCTTCAAGCCCAGCATTTT -GCGGTAGAAGGCAATAGGAAGGTTCACGGGGAGAGTCAGGCCATTGTAAATTGCAAGAGA -CATCAGCAGACCAATGAGTTCAAACTTGTAGAGTGGTTCCAATGAGCACGGTTGGAACCA -AGACATGCGGTGTCGAGTGTCAACGACGAACATGCCATGAGATGGGTCAAAGGCTTGACC -CATGAGCAATCGGAAGAACTCTTGCTGAACACCACCATGGTCAAGCCCTTCTTCTCCTTC -ATCCATGCCCATTTGTACCTTCAAGGGTCGCATCAGTTCGCGCTTTTCACGCCGCCATAG -TTGGCTCAAGGCACCGGAGAGAATATCATCTCGGCGGACAACGAGAACGAGATAGGACGA -CATGGATGTCTTCATGCGGGTGAGAAGTCGATGACCATCGGAAATTCGAATAGGGTCGAA -GGCAGTCTGATTGACGTGTTTCGTGGTGGTCATAGCAGCCTCATAGGACTTAGTCATGGC -GGAATAGTTCAGAGCTCGGAAGTAGATCACGAGGGAAGATGGAGGGAAAAGGAAAGAGTA -CGATAGCAGATGCATCGTTCGGTTGTTCGGAAGTCCCCCAACCCATGCCACAGGCATATC -CAAAGGGTCCAGCCGCTCTGCGAGAAGAGGTGTATGGAAATCTTCGGGTAAGAGACCTAA -CCGATCTCGTTCCTTGTACATTGAGGCCAGAATATGAACGGCACCACCGGCTGGGCTGCT -ACGAGCCATTTCAGGGTTGCCGTCCCATTCACGCAGGAAAAGAGTCCGAAGCCACTCAAC -AACGAGGGAGGCTGCGCAAATGGACCCGCCGGGCTCATCGTTGCTAGGAGTCTGAGAGAT -GTGTTGCTCACTAAGATAGTCAACCACGCGGTCCAGCACGCTGATTCTTCGTTGATTTGG -TAAATTTAGGCTGTATACTTGGCAGGTCTTTGATATTTCATGGTATGCCAGTCGTGCAGT -AAGAGCCTGCACGAGGCGGTTAATGAGGCGGAGACCTAACTCATGCTCGAATCTGTCTGT -CACCTCGACGGCTTGCTGAGCATTCTTCGGAGGCAGCTTGCGCATATCCGATAGTGAGGC -AACAGATCCAGCGGCACGCATCCGAACTACGGCTTTCCATGTCCCCGAGTCGATATGAGG -CACAAAGCTGACCAAAGTGAACAAGACAATCGTGGCAATGTAGGCGGCATCAGGATCCGA -CAGATAATCTTCATTGGCAGACTCGGACATTCGTCGGGGAACGACCGGAGGAGGTGCGGC -TCCTGTCATGGTCGAACTACGAGACGACCGGCGGGACGGCCGTCCAGAAGAAGTGAACCC -CCGTGGGGGAACGAAGAGCTTGCCGAGAGAGCTCCACAGGCTATGTAGTGCAATATCCCA -AGGGCAAATTGTGAATAAGCGCTGTAGTGATAAACGCAACTTGGAAACATCCAGGCGACT -AGAAGTAGCCCCGATAATATCCTTTGACTCGCTACCAAAGGAATGGAGGATCTGACTGGG -GCTGCTGAACACGAAGAATGCACTTTGAGCAACGAACGGAAATACTTCACGCTGACGTAG -AGAGGCAAAACACCATTCCGGCTGCTCAAAACTGCCAGCGCACTGGATACGGTCAAGTTC -CTCTTTCCAGCAATCAGCATCTTCCGCAGATCCTATCATGATTTGTGACAGGCCATCGAT -AATCTCGGGCGTAAGATGAGTGACTGTCTGGGCTAGTGGGATCGTGTCTAGCAGGGTTTC -AGCAGAGGCTTGCTTTTTCGTGTGCTCACTGGGCGGATGAGCAACAGAAGGAGGAAAAGG -AGGAGAAGACATGAGCTCTGAGCTTGCCGTACTGTTGATAGGAGATACTTCATCCAGCAA -CTTCTGGCTGTCCCAGGAGACACGTCGTTGACTTTTGCGCTGTTCCTTTAAAGGTACTGG -ATCGACATCTCCAATTCTTCCGCGGTGCTTTTGCGGACGATGCTTTAACGCTGGCGGTGA -GGGTAAATTGACAAAATCATCAGAAGAAGCCACTGGGAGCTTCTTGTTGAATTCAGGTTT -CCTTTTATCGTCAAGGCTTGAACGTGGACTCTGGCGCCAATGGTCAACCTCGGAGACTGA -TAGTCGTTTAACATGCTGGTTCTGAATCTTGAGCTCAACCGCGGAGGGCTGAGATCCCGA -AGTTCGTGATGCAGGAGTCCGTGGTTGAGAGCTTGGCTGGTGCGCAGGTGTGGAGCTGCC -GTGTATTTTAAGATCTCTGCTTGAAACTTTGTCGACATGGAAGTGGCGCTTCTCAGCAGT -GGATGATTGGCCTTCCGGTGCGGGTGTGGGTGTTTTGAACGTTGGCTCGAATGTATCGAG -GGAACGGCGAAGAGGAAGCCATTCGACCATACGGAGTGAGATTGTATCAAAGAGGTTCTG -TGTAAAAGACTTGGGGTCTTTGAGTTGCTGTTCGGCGGTATGGTCGAGGTCTTGAATATA -GCTGCCCCCGGTGGGAATTTGAGGTGAATAATCCCCTTCTTTCACTTTTGAGTCGGCTGA -TCCATCTTGTGTAGGCCTGGGCATGTTGCGAGCCGACTGGCCTTCATCGTCCCGATCCGG -TTTTGGTGATCTGTAAGCCGCTGCTATCTGTGCTTCAAGTGTACGTGTTCGGCGCAGAGA -AATCCGATGGTAGTCTTCCGCTGTTAGTTGTGGCGGAGGTTTAGGCACGTTCAGACATAA -ACCGGTCTCCGCATCGTCCAAGCTAGCGAGGTAGCATGCCAGGGTCCTCGCGCTCAGTTC -CGTGTATCGACGGTATGGAGCTTCACTGAGTCTTCTACGGTGACTGGCACATGTAGTGGT -TCGACATGAGGGATCATCGCAGCCGTGAAGCAGTTGAGCTTTGTACCTTTTCGACGAAAC -CGTTTAGTGTCATGCGGAGTCAAAAGGTCGGGGCCCAATGGTTGAAGTTCCACTCACCTG -CGCACCAAGCCATTAAATTTCCGTCTTCGATCATCGCGATATCGGGGGAGGACCCTTGAG -GGATCAGATACTTCAATGGTATTGTGTATGAGCGCATCCGGTGCGTTTGGTGGGAGTTTT -GACCGATAATTGAGAGTTCGCCCGCCGGGGGATCGCGTGATGTCTGGGTCATCGATAGAA -CCTCCGCGGAGCCTTGTCATTTTGAACAGAATGGCATATCTGAGGGAGTGAGGATGGCTG -TATGGTAGTAATGGCACTAGTAGATTGTTCGTCTCCACACGTTGAATAAAGACTAAGACG -CTCCAGCTCACCTAACTAACATGCAAGACTATAATGCCTCAGGCACACTTTTATCCCCCT -TTGGCATCGGAGCTTTCGGAAGTCAAATAGTCCGGCTAGGCCTTGATTACTCAGCCCGCC -AGCCTTTTTGGAGGCCACTAAGCCTTTTACGGTTAGCCCAATGACCGAGATAGTACTAGT -ACCATTGCAGAGGGGTTTAGTCGCTCGATAAGAAAAGTAAATTCAGACGATCAACCCTGG -ACGACGGTCTAAGTACTTGAAGACTGACTTGTGAGAGCATCTCCTTTCATTCTTCTCTTT -TACATCTATCCCAGTCCCATCACTTCATCTATTTCGTCAACAATCTATACTCAGATCATC -AAAAAGTTATTGAAATCAGGTGAAGGCAGAGCCTTCATATGTACCCCGCCTACAACAAAT -ATTCATCCCACAACCCACAGGGAAAACCTACCCTTTCAACCACCCACGACTCTGGAAAAT -CCATCGATATGTCGTCGACCAACCAAGATGCAGCCTCGCTGCAATACACTTGCAACACCT -GTCTTGTTGCCTTCCAACGCAGCGATGCGCAGAGAGACCACATGCGCAAGGACTGGCAGT -AAGTTTGAAATTTTCTGTTTTTTTTTTTGTGAATTCAGTAGCTCATATGGCCTAGTTTAT -ACAACATGAAGCGTCGCATCGCTTCCTTGCCACCTGTCACTCTCGAAACTTTCAACGAAA -AAGTTCTTGCCGCCAAAGCTACATCTAATGAAGCCGCCGCCAAGGCTTCGTATGAAAAGA -CCTGCCACACATGCAACAAGGCCTTCTACAGCGAAAACTCGTACCAAAATCACATCAACA -GCTCGAAGCATAAGCAACGTGCGGCTAGCCTACGTAAGGATGGCGATGCTGCCTCCGTCC -AGAGTTCCGCCTTCTCTCTCGGCGAACCCGTGACCAAGTCCGACAATGACGTCAGCAAGG -TCACCGAGGGCCTCAAGACTGCCACTATTgatgaagaagaagaagaagacgaggaTATGG -AAGCAGATATCAAGAAAGACAACTTCTCGTCATCTCGCTGTCTCTTCTGCAAAATCGATT -CAACCGATATTCACACCAACGTGGACCACATGCGCAAAGATCATGGCATGTTCATCCCCG -AACAAAAGTATTTGGCGGATCTCGATGGCCTCTTGAACTATCTTTACCGAAAAATCACTG -AAAACTTTGAATGTCTCTACTGCCACGCCATCAAGAACGACGCGCAAGCCATACAAACCC -ACATGCGCGACAAGAGTCATTGCATGATCGCCTTTGAATCCGAAGAAGAACAAGTTGAAA -TTGGCCAATACTACGATTTCCGGAGCACCTACTCCGAAGATGAGGATGAATCCGCTACTG -AGAGCGGTGGCGTCAACGTGAATGGCGATGGAGAGGACCAAGGCTGGGAAACCGACAGTT -CTGCATCCTCTGTCGATGAGGATGATCTTGACTCATACCGAAACGCATCAGGTGCCTACG -AAGCAGATTATGAGCTTCACCTTCCATCTGGAAAGAGTGTCGGCCATCGTTCACTTGCCA -AATACTACAAGCAAAACCTACACAAATACCCCTCGGCCGCCGAACGAGCTGCTCGCCAGC -TTGCTATCGCAAATGGAGAAATTGAGGAAGAACCTCCCCGTGTCCGCAACAACCATCACC -GCGCTCTTGTCACTCGTGCCAACGGTGGCATGGGCATGATTGGAGTCACCGAAGAGCAAA -AGGAAGCTGCTCTCATTGCCGACCGCCGCGAGAGAACTCGTGCCCTTCGCCAAGAGAATC -GTTATGTTGCCCGTGTCCAGCGTGCCAACAATCACCAGAAGCACTTCCGCGTATGTTCAA -TCTATACCGTTCCTATGTTTTGCAAAACTAACTTTTTTACTAGGATCCCCTTTTGCAATG -ATCGCTTCATCACAGCACCAACGGGAAATCACATCAATCAACCAGTTTTCATTTACGACA -TCGTCTTTTGGACATCACCAACGAAACATTCGCCAATACTCGCAATGCATCGACAACCAT -TGCACCTCTTCAAGTTGCACTCCCGTTACCTCCGTGCACGAAGACTGCTCAACTTTTGGC -AGCCTATGGGTTTCTGTCTGCAATCATTTCGAGTCAACGAAACCACTACGTGAGGTACTC -TTTCCGAGCCGGGCCATTTTCTCAAATGAGTTTTTCAACCCGGGCGGTTTTAACGAGGCC -TCACCGAACATTGAACAACTTTATGATGGATGGATGGATAACGATCACATGTTGCATATA -CCTTTTGTTGTTATTAGGTCGCCTTAACCTGTTATCTGTGGGATGAGGGGTTCTTTTGGA -AAGAGCATTTAAGTATCTAGTCTGAGAAAATCGAAGCAAATCTGCACAAAATTTGTGTCT -TGCGGTTAGACTAGGGTGACAGGATGCCTATTCTCTTTCTCCAACTTTTCAATGCATAAA -TCATTTCCAGGTCAGGCTACAGACTCAATGACTTTACAACCTAATCCCAAATAATGCATG -CGGTATATCCAATCCTTTCGCGGATTGAGAGACCTTTGGCTTCTCGTTTGGCATATGCTC -TTACAAAGACAAGTGAGTACAATCAAGTCTGCTAGTAGGGAAACCATATTGACTGATTCA -TTTAACTATGTAACATTTCATTGAAGTAACGACTCATTGAAGTAACGACTCATTGAAGTA -ACGACTCATTGAAGTAACGGCTGGTCCTCTGAGGGGCCACCATCAGGTTTTGAAGCTTTG -CTGTGCTTCTTTCGGCGGCGCTTCTTTTTGGACTTTGCGTGCGCACTATCGGGGTTTTCT -GATTTTGTATCATCGGTATTGACAACCGCACCATCGGCATGGCGGTGGTCGCGATGATGG -CCAGGTGAATGAGATTCACCTTCTGAAGAACTTGAACAGTTTGCGTGAAGAACAACTTTG -CGAATACCATTAACATCCACAACCTGTTCTGTAATGCTTCCACTGCGCGCCGGCCCGCGA -TGGTGGTAAATGAAGCCTGGGGTATCGGACGATTCAACACTGCCTTTGTCACTACTAGCC -TTCCGCGTCGACCCAGAACGCGGTTGGTCTTCATGCTCCTCACGGACAGCTTCGGGAATG -GAGACGTGGGGAAGGTCTTTGTATTGCTGAGCACCGGTTCCTTTGCTCACAGTATCGTTG -GAACTCATGGTACCGTAGCCAAGTTTGAGGGCATGCGCACCATCTTTGGCATCTTGTCCA -GCGGAGTTGAGCAAGCCTGCACCTATACCGCCTTGATATCCAAGGGAAGGGATGAGTTTT -TGGGAGTCAGATGTGCTTTGTCCGGACTCAAAGTCTGTTTGTGCGGAACGGGACGGAGAG -GCGCTGTTGCGCTTGATCTTGACTGTATGGTAACGGGTTTGACGAGGCCGACTGGCCAGG -TTGGAGGGACCAAGATGCTTAAGGTGCTCCCGAATTTCATCAGTGGTGCCGACTTGTGTG -ACCCATTCACCTGCCGAGTCGGTGTTGATATCGGTATTGGTTTTGCGGAGTTGAAAGCGG -GGAAGAGGTGCTTCCGCTGAACTGCGGCGTCGAATCGAGTCGGATGGCTTGGGGGAAGAT -GATAGCTTTGTGGAATCCACATCAATCAAGTCGCCATCCGTGATAATAGAGCCATTCAAA -GGAGGCTCCTCGACAATCTTGCCTTTTGCAACACGAGACTTGGGTGCAGGTGTCATTCGT -CGGATGGCTTTGTGTACATCAATAAAGACATCAGATTCGTCAATGATTTCTTCGCCGATC -AATTCTTCAATGACATCTTCCAGGGTGACGACTCCAAGAGCGCCACGATCCTCACTAGGA -TATTCAGAGACAAGAACCATATGGGACTTGCCTTCTTGGAAGAAATTCACGATATCCAGA -CAGCTGGTTTCTGGTCGTGTTTCAGGAAGTGTAGCCAAAGCAAAATCGCGGACATGTTTG -CAATCTTCCGGATCGTATGTAATAAGCATTTTGACAAGGAGCATACCAACAAAATTCATA -GGATTGTCCGGTGCGTGGATGGGAATGCGGGAGTACCCTTGGGAAAGAATGTGATCCATC -GTGTGTTCGTCCAAGACAGTGTCGGCAGACATAGTGAAGACATCTTTCATTGGTGTCATG -ATACTGCCTACTGATTTCTCTTTGAGGTCGAGCACTGCACTGATAATGGTCACTTCATCG -GAGTTGAGTTGTTCTCCGGCTTCGCCCAAGGTTTTGTGGAGGGTAACGAGAGTTTTTAGT -CCGGCCTTCTTGTAAATGGTACCATGGTCCTCTCCAAGGAGCCTATCGAGCAATTTTGCG -ATAGGCCATGCGACTGGAGACATGATGTACATCAAGACCAAAACACAAGGTGCCATCCAG -GCACCGATAGGAAGACCGTAACGGACGCAAATGGACTGCGGAACAATCTCGCCGAAGATG -ACAATCAAAGCAGTACTTCCTAGAACAGCCGGCCAACCACCGCCAAGTGTTCGGTCAAGG -ACGATGGGTAGAGTTTCATTCGTGATGACATTGCTGAGGAGGAGGGTGACAAGAACCCAA -TGTTTTCCATGGTTCAAGAGTTTGAGAACGCTAGTAGCGTTTTTCCTTTCATGCCCTTCG -CCAGAGGTCTTGATGACTTGGAGATAAACTTCATCCTGTAAGCAGACAGGACGTCAGTCT -GGTTCATCGGGGAATTTTTAGCCGCTTTGGAAAGAGCTGACCTGTCCCATCAAGGCAATC -GTAAGACCCGCAAACGCTCCACCGCTCAACACAAGAGCAGCTGAAAATCCCAAGTAAAGC -CATAAGCTTGGATCGCTCGGTTCCTTTGGTGATTCTGTATCGCGGGACGATAGAAAAAGA -GGTGAGAATGTAGGCGCACCAGCGGCGAAGGGAATTTGCGAGACGGCAACGAGGAGAAGC -TTGGCCAGACTCAGCAACAATGGCCGAGCGGCAAACGAGCGATATGGCGAAGAATTCATG -AGAGAAGCGTGAGTGAGTGACTAACGGGACATGCTGTGATGTAATGAGCAATGACCCTCG -GTCAGGACGAAAAGGGGAGAGGGAGAGAAGAGATGCGGCAGATGGGGATGGGGCAGATGA -AGAGGGGTGAGGGAATTAACAAAGCCAACGGGAAGGAAGGAAGGAGAGGGGGATACCTTG -AGATGGTTAGAACGAGAATGCACTGGGAGGAGGACGACAACAGAGTGAGCAGCAACCGAG -CAGGGTGGCGGAGATCGGGGGGTCGGGAGGCCGGGACCGTTGCTTAATCAGCATCCATCC -CAAAACCCTGGACCGCCGACGTGATGTCCGACGCCTCCGCTGTCTGGTAGTCTTGAACTT -TTTGTTCGAGATTGCCCACTCTGTCCAGGGTCTTCTGGTGTTCGAGGTCAGCTGCAATTG -AAGTGCAGAACAGCTCCTCGTTGTGGTCCACCAGGATCCGGTTGTCGTCCTCCAGCTTTG -CCACCTTGGCATCCGCCTTCTGGGCCTTGGCATCCGCCTCCTGGGCATGCTCTTCCCATG -CCCCCTGCTGCTCGGCGGCATTGTCCAGCGCGTCCTTCAGTTCTTCAATTTCCACCTCAA -GGTTCTCGCGACCCCTCACAAGTTCATCGTTATCGGCCAATAAGCTCTCATACTTGGCCT -TAAGATCCTTGTAGGTACGTGAGTACCAATCCTTCTTCTGCACGCACTTAACGACCTGTG -CTGTTTCTTGCTCCAGCTTAGTGTTAAGTTTCTCGCGTGCTTCCTTCGTGACCTGTAGGT -CACCAGCAATCTCCGCATTGGTATCCCGCAGCTTGCCCATCTGCGTTTCCTGGGCGGCGA -GTTGGTGCTTCAGGGAACTGACTTCGTGTGCCGTTCCCTCAGCCACCTCACGCTGCTGGG -CAAGTGCGATCTCCTTTGCCTTGATTGCCTCCATTGCCTCGTTCAATTTCCTCTCGTAGT -CGACCTTGGCTGCCATCAGCTCCTTGAGATAAGTCTGCTTTAGCGAGTTGACACGCTCCT -CATGCTGCTGGTCAGCAAACACCATTGTGCGCTGCACCGCTTCATCCAGTCGCTGCTGGG -CCAGCTCGTGGGTAATAGCGGGCTTTGCCTGGATCTGCTGCTTGAGGTATGCGTTCTCCT -CGCTGAGCGAGGTGGCGGCGGCGTGAAGGGACCCAATTTGGCCAGTTGTGCGGGCCACAT -AGTCTCTTGCCTCGCTCTCGAAACTGGCCAGCTTGGACTCACAACTAGCCAGCTTGGACT -CACACACCGCATTGACGATCGCCACCTGCGCAATGGCAGTCAACTTTCTTGCCGGTCGAA -CACGTTCCCGCTTCGTAACTCCGTATTTTTTGGCCGGGGCCCAACGAGCACGTCCCTGGA -TCCCACGGATCGGGTTTTCCTGGGCCTGTCGCTGTAGGTCTTCCAGGTCCTCAAACTTGC -GCCGCCAGTGGCGACACTCGCCAAGGACCTGATCCTTCGCGCGCAGGGATTCCTTGATAC -CGCGGGAGACGGACGCCAACTGAGCATCCAATCGTCGGACCTCCTTGTCGTGGCGTCTCT -TTTCGGCCTCCCGGGCTATCTCAGAATCACGTGCTCGTGCCTCCGTGCCACGGAGCTGCT -GCTCCAAATCCACCACCTGGGCTCTCAGCCTCGCGTTTTCCTTGGCCTCGGCCTCCCTGG -CCTCGGCGTTGACATCCTCAATTGTTGACTCCACGACCAACTCTGCTACATTGTGTAGCA -AGGTAGCGAGCCAAGCACGGGGACTACTTAATCAATGTTAGTCTCAATGGTGTACCATAG -ATAGGTATTTGGATGGGTATTTGGGATGGGACATGGGATGGGACATGGGATGGGACTTGG -GATGGGACTTGGGATGGGACATGGTATGGGTATTTGGATGGGTATTTGGATGGGTATTTG -GGATGGGACTTGGGATGGGACATGGTATGGGTATTTGGATGGGTATTTGGATGGGTATTT -GGGATGGGACTTGGGATGGGACTTGGGATGGGACTTGGGATGGATGGGTATTGATGGGTA -TTTGGGATTTGACAAAGGATGTGACACGGGATGCTTTGGTGTTATGAGACATGACCCACA -GTCTGACCATCCGGAGGCGCTACATGAGGGACGAAATATCCCAAACTTACCTGCTAAGAC -AGCAAGcccccgcaaccaccaccaaagcaaccaccaccacAATTCCAACAAGCTCAAAGA -GATCACGGCGCCAGTATGCCGCCATAGCGGCAAGCAGCACAAGGAATACAAACGATGAAG -TGCTAGACATTGTGGCTTACAAAGTAAAAGGTAAAAGATGGGAGAGAGAAGAAAGAAGAG -AAGAAATCAAAACAACGTTTGGGGTTTAAATAGCAGTCCTGCGCGTTCCTTCCTAACCTG -AGCTTTCGTTCTTAAACCGAGTGCTCCTTCCTCAAATGAGCGAATGCCTATTCACACAGC -CCTCAGCCTACTGAACCCCAGGCATATATTGGACAAACAATTAGAAAAATAGCATCAATG -GCCATGTACAATAGGGAAACATTGCAAATGCTACTATTGTGGAGTATATCAATAGATTAA -GCTTTGCGGCCCGTGTGGGGCACACGAGCCTTGGAAGATTCCATCGGGCAGGTACCTATG -TATGCACATACCATCGGTCTGACGTCACAAGATCTGATAACGGAACATGGTTGTCCCAGC -CTTCCACATTCTACTATTGAGGCTTTTATTGATTGGGAATCAATTTTCCGTCTACCTGGT -TCTCCACTCAAGGCTTGCTCGTCAAAAGTCGCTCTCAAGCCGGGCGCTTTGGGTGCATGT -ATACGTCAGCGCGGGCCACCAATCTTTTGAACAAGCAACTCGTTTCGAAAACTGTAAGTA -CCGGATAAAAGCTTGCTCTTCAGTATCTAAGACTATCTTAGGGAATCTTCAACAGCTAGC -GGCCAGACCGAGAATGACCCAATCCTTCGCGCGCCAAGCCCCCCCCTCCACCGATATCCC -CGAAGTTTGATATCCCGGATTCCCGGAAAACATTGGGAATCCCCGTGTCGGACCTCCTTG -TCCTGGCTTTTCCTTCTTCGAACTTACGAGATTCTTCAATATAACAAACCAGTGCCTACG -CTGAGGCGAACCAAATGTGGCGACATGGCACAGCTTGTCCTCTGTCATATGCGTTAAAAA -CGAAGGATACGAAGGACGGAGTTAGCCCATCCCCCAATGGACTATATTTGGCAAACAGAT -GTCATATCTCCTTTGGCTATTATATGCTCTCGTTGGGTGGAAGGTCTTTGAGGATGGCAC -TGCTTTTACTGGCTCCTCCACTGGGCCTTTGCTGGTGTTCTTGACACCAATGATTTCGTG -TCAGCCAATTGCATGAGGAGTCGCCCAGAGTTACTGCTTAGCCCTCTCTCGGGCCATGGT -CTGGGCCACAGAGGCTTTAAATCATAGCGGTTATCATCTGGTATCACATCCAAGGCTAGT -TGCCACAACGGGTATGAGCAAAGGTGGTTTAGACAACACCTTAAACTGAGATATCGACAA -ATTTGTATCTGAGGGCAAGTAAGGCTGCTCAGCAGCTTGCAGATTATTTGGCCTTTATCT -TTTCCTCTAAGATTCTCACCAATAATGTCAATTGCGCTTAGTGCGCTGGTGGATGTTTGG -AAAGTGGGAACACAAGTTGCTCCGCGGAAACATTGTACTCGACCTCCAGCTATTATTCAT -CGGCCATCTTACTTTCCATTGGTACTGAACGCTCTGACTGATGGCCGTCGGCCCTGACCC -CTTTATCGGGGTTAGCATTCGGTGGACCGTCTTATTCCGGAGCACATTGTAGGTCGGCAG -TGCCAGGTTGAACCGTAATCATTGCTAATGGGAAGTGGGTCCGGCCTTAGTCAATGTTCT -AGTAGGATGACTTTATTGAGAGCTGTTTCGATGGCAAATTTCGCGGTCGCGTCCTTCTGG -CCCTGTAAACGGATGATCAGTTCCAGAAGCGCAGATGATAGGCGAATTGACCACACTGGT -CCTCAGGGTGTATTTGTGGCCCGGTGAAAATGTAAGACATCCGTCAAGAGGCCGGCTATT -AGTTGGTCAGCTCTGTCGGAAAATCTTCTATAGCGTCTCATCTCCTTCAGGCTGTAAATG -ATGTTTTAAGCTACCAAAATCATTAACGGACAGAACATCAACTGTAGCGCGGCTAGGTGA -GAAGACGTTTACTCTAGGATACTGGAAGACCTAGTAAGGGCTTTGTACATTTTGTCTCTC -AGAACTAGAACTGTTACGACATGCCGAGGAATTCATCTGTCCGGACTGATTTGGATATCA -ATAGTGACAGCACATCGTTATGTACCGCGTTCATTCCCCGATTTCGTTTCCCGGTTTGTG -GCTCCCCTCCCCGGTTATTTAACGTGACCTATCGGTTACTTACAAATATTGGAAAGCCAT -GCCCAGCAGGGAATTACATACATTGATTCAGCAACAGTCGCTCCTCGGTAGTCAAGGCGA -TGGGTGGAAAATTAGATTATGTATCATTTATGCCCCTATGTCTGTCCTATCCTATATCCT -TCGTCTTGTACAACGCCAAAGCCCGAATTTTGAATGTGTCACAACTTTGTTTTGCCACCA -TTGGCATTCTTCAAAGGGCCTGTGTGATGATATTCAACAAGATAAGGCCCTTGATCCTTT -GGGAAACCATCTTAGGCAGATTGAAGGGTCATATCTGGCAGTAGACCACGTCTTGAAAAA -AGGTAGGCTCTTTTATACCGGTCATATCGGCGTGGCCGAAAGGAACGCCCAATTTCCTAA -TAGTCATCGTGTGTGTTACCGTAGCATCAATTTTATTGTTGATAATCGTCGTCCTAAGCT -CAACAATGGTCATATTGTGCTGTGCAGCACAAGCTTTCAGTATGTTTATGACAAGGTTGA -TGGTTTCAACGGTATCGCTTGTCTTATGTGTTGTCATAGACGTCCAACTTGTGAACTGAA -GCCCATCAAATTCCAAACCAACAACGACATTTCTCTATTTCTGGTATTGTTCCAGGATTC -AAGTACAACTCGGTTGGTGATATGTCACGTCAAGATCGCTCTGAATTCCTCCGTCGGCGT -ACAACATCACTTGTCGAAGGAAATCGGCTGACAGAATCGGGAATGGCAGTGATATATAAA -GCTCAAGGACGTCGGGGCAGTCGAAAAAATGCTCACGTACCCTTGTGTGTTGTCATTAGT -GAGAATCTCATGTTGTAGACGTGTGTTCACATTTAGCCAACTGCGAATGTCTTTCTCGCG -GTTGTCGCTGATCTTCATTGAATCAGTCCGCCAGAGCTTGAATGAGATTGATGACTGCGT -GATGATGATGGGACTTTGCGAGTGGGAGGTAGCTTGGTGTTCCGACTGATTTGAGGGTAT -GATATCAACATTTGAGGCGACAGTAACATTGCCCTCTGCGCTGCAGACACACGGCCCATG -ATTAAAAGGAATTTCTGATCATTATTTTCCCAATTTCGATGTAATTTAGAGAAATGAATT -CAAAAAGGATAGCAGCAAGCCCAGTCAGTATGATTTCGGACCTTGGTGATATTTCGCTTG -GCTGCGTAAATCTGCACAATCCCTACAGTTTATGTCGCATATCCATGACCCTCAATAGAA -TGTACACTTTTATATTCAAATTCAAATGACAAAAGCGTCATCTGCGTATCTGAGCCTGAG -GCACCAGGTAAAGTGGATGATTAGTCAGAATTGATGACGTCTTGGATGGGTCCACGGTAG -GGGTGAGCTCTCGAGTGAACTCTGGAATTTTGGAATTTATTTTCACTAGCAATCATGGCT -TCCAAAATCGCCCCTCAGATGTTCGGTGCATTCAGAGCTGCTCAACGGGTCCCGTGGGCT -GCACAGCTACCTCGTGCGCCGGCTTTCCGCCGGTTCTTCTCGGCCCAGGTTGAACAGCCG -CGTCTGCGTCTGGGCTCAATTGGTACAGTCGCAACAGAATTGGATTTGTTGCATTTGCCA -GATTATGCTAACACACTATAGCCCCCAATTTCAAGGCACTGACCACCCAGGGCGAGATTG -ACTTCCACGAGTTCATCGGTGATAAGTGGACCATCCTCTTCTCCCACCCTGCCGACTTCA -CACCCGTACTTCTCGCCGATATACTGTATAAAGACACTGAACTAACATGTATAATTAGGT -GTGTACAACCGAACTAGGCGCTTTTGCTCGACTGCAGAATGAGTTCGACGCACGCGGAGT -CAAGATGATTGGATTGGTGAGTCCCCGCTATTACAACGCCTACAGAGCCTTAGCTGATGC -GCCCCTTCTCTGTAGAGTGCCAACGAGCTCGGCTCCCACGACCAGTGGATCAAGGACATT -AACGAGGTTGGCTCGACTCAAGTCCAATTTCCAATCATTGCGGATGCCGACCGCAAGGTC -GCCTTCCTCTATGACATGATCGACGAGGATCACATCGGGCAGGAGGCTATCGCGTTTACT -ATTCGCTCCGTGTTTATCATCGATCCGAGCAAGAAAATACGTTTGACTATGATGTACCCC -GCCTCGACGGGCCGAAACTCCGCGGAGGTCCTGCGGGTCATCGACTCCCTGCAGACTGGT -GATAAGAAAGGTGTAGTCACCCCTATTGACTGGAATGTTGGCGATGATGTCATTGTTCCT -CCCACCGTCAGTACACCGGATGCCCAGAAGAAGTTTGGTGAAGTTCGCGAGGTTAAGCCT -TACCTGCGTTATACCAAGATTTAAAGACGTGAACATGTATAACAATGGATGAAGCTGAAT -ACTTCAATTTTGATTCAATGGTGTCAAGTGCTCCACTCCGTATTATAGTAATATCTTGGA -ACTATGGAGTATGGAGTACAAATGGGAGGTTCTCGGAGCATAAGTCTCGAAAAAAGGGGG -CGTGATGGGGTTGCATATTTTATCATCGAAGCCCCTGACACCATGTCTCGTCACGGCAAG -GATAATCCGGCTAATTAGGAAGGCTAATTAGGAGGTAGTATGTACTTTTGTTCAAGGCAC -AGTCTACGGTTTTTTGTCCGGGAACCCTCGGTTATGCAAGCAAATTCACTAGTAGATATA -CCACGAGTCGATCTTTGAACAATTGTGACAAGTCCTCATGCTCACTGCGCTGCCAAGTGA -AATGTCAATAGGAACCCATGACGATCCTCCTCAAATCCAGCAAGGATATGCATTTCTCCG -AGATGTGCACGCGGGCGTTGCTTCTTCCCTGTTCTGCAAAGTACTAGTTACCCGTCCACG -GGGTCTGCAGTTTTCTTATAGTCCTCTGGGGTGCTGGTTTGTAGTGAACTTTCCAATCTG -GAGGAAATGATGCACTCGTTCGCTTGACTGGCCCAAATACGATCTTTTCCTTTATCTACT -CCCTTTTTCTTTGTTCATTGCGAAGTCACTATGGCTCCTTGGGCTCATCTAGCTCTAGTC -TCTCTTTTAACCCCGCTTCTCCTGGCTTCCCCAGTGCCAGACGAATATGATTCTACTACA -CACCTCCACGAGAGAGCAGAAGAAGGCTATGAATCGCCTCCATATTATCCCACCCCTCCG -GGAGGCTGGATTCCAGACTGGAGTGAAGCCTACTCCAAAGCGCATCGTCTTGTCAGTAAT -ATGACTCTGGCCGAGAAGGTCAACCTGACAACAGGCACTGGCTTCTTCATGGGACCCTGT -GTAGGGCAGACAGGAAGCGTGCCTCGCCTGGGTGTGCCCAATCTGTGCCTCCAAGACAGT -CCTCTGGGAGTCCGCAACTCGGACCACAACACTGCTTTCCCACCTGGTATCACCGTTGGC -GCAACCTTCAACAAAGATCTCATGTATGAGCGAGGTGTTGGGCTCGGCGAAGAAGCGCGC -GGCAAGGGAGTCAACGTGCTCCTCGGGCCATCCGTGGGAGCACTTGGCCGCAAGCCACGC -GGTGGTCGGAACTGGGAAGGCTTTGGTGCTGACCCCATTCTCCAGGGTTATGGAGGTGCC -CAAACTGTGAAGGGTATGCAGAGCACAGGCGCTATTGCGGCCATCAAGCATTACATTGGT -AATGAGCAAGAAATGTACCGCATGAGCAGTGTTGTCACCCAGGGGTATTCGTCTAATATT -GACGATCGCACATTGCATGAGCTGTACCTCTGGCCATTTGCCCAAGGTGTTCATGCGGGT -GTAGGGTCTTTGATGACAGCTTATTCCGACGTAGGTCTCTTCAATTCTATTCACATGTCT -ATGTCGTTGAATTATCATAGGTCAATAGCTCTGCGTGCTCTCAAAACAGCAAGTTGCTCA -ACGATATTCTCAAGGATGAGCTTGGCTTTCAAGGATTTGTGATGACAGACTGGCTTGGTC -ATTACTCTGGTGTTGCGTCCGCTATAGCTGGCTTGGATATGTCAATGCCTGGCGACGGAG -CAGTTCCCTTGTTCGGAGACTCATATTGGGGCGCTGAGCTGTCACGTTCGATCTTGAATG -GAAGCGTCCCCGTAAACCGGCTCAACGATATGGTACGTATTTCGTTAGCTGGGACATTAC -CTCGTTTCTAATATACGCGTCTAGGTCCAACGCATCGTTGCTACTTGGTACAAACACGGA -CAGGACAAGGACTATCCATTACCAAACTTCTCGACCAACACCCAGAACAAGGAAGGCCCA -CTTTACCCCGGAGCTCTATTTTCTCCATCTGGTGTCGTTAACCAATTTATCGATGTGAAG -GGCGATCACAATGTCACTGCCAGAGCCGTCGCACGGGAAGCAATCACTTTGCTCAAGAAC -GATATGGACATTCTTCCATTGCACCGGAATGATTCCCTCAAGGTTTTTGGCACAGATGCA -GGTGGAAATCCAGATGGTCTCAACTCATGTGCCGACAAAGGATGCAACAAGGGCGTATTG -ACCATGGGCTGGGGCAGTGGCACTGCCAGACTACCCTATCTCATCACACCCGAAGAGGCA -ATATCTAATATCACTAAGAATGCCGAGTTCCACATCACGGACAAATTCCCGTCCGACGTT -ATCGCCAATCCCAACGATATAGCACTTGTTTTCATCAGTGCTGATTCAGGTGAAAACTTC -ATCACAGTCGAAGGGAACCCAGGCGATCGAACTAAAGCGGGACTTAATGCTTGGCACAAT -GGTGACGATCTTGTCAAAGCAGCAGCAGAAAAGTTTTCCAATGTAGTTGTGATTGTCCAC -ACTGTCGGGCCTATTCTGATGGAAGAGTGGATTGAACTCAAATCCGTGAAAGCAGTGGTT -GTGGCCCATCTCCCCGGTCAAGAAGCTGGAAACTCCCTCACCGATGTCCTCTTTGGTGAC -TATAGTCCAAGCGGCCACATGCCATACACGATTCCCCGCAGCGAAGATGATTACCCGGAT -AGCGTGGACTTGATCAACCAACCGTTTGGCCAGATCCAGGATACCTATACCGAGGGGCTA -TACATCGATTACCGGCACTTCATTAAGGCCAATATCACACCTCGGTATCCGTTTGGCCAC -GGCCTCTCCTACACCACCTTCAAGTTCTCCCAGCCTTCGCTGTCAACGGGTAATCCGCTA -GACTCAGCCTACCCAGCTGCACGACCCACCAAGCCGTCGACCCCTTCCTACAATACTAGT -ATTCCAGACGCATCAGAAGTCGCCTGGTCCTCTACCAACTTCACTCGCATCTGGCGCTAC -CTCTATCCCTACCTGAACAAGCCACAGTCAATCACTGCGACGAAGAAATACGCCTATCCG -GACGGCTATAGCACTGAGCCGCATGCAGTTCCCCGGGCCGGAGGCGGCGAGGGTGGAAAT -CCCGCTCTCTTCGAGACTGTTCTTTCGGTTCGGGTGGAGGTCCGGAATACGGGTAAACGA -AGCGGCAAAGCTGTCGCCCAACTGTACGTTGAACTTCCATCTGGTCTGGCTCATGATACG -CCGACCTTGCAACTTCGACAGTTTGAGAAGACAAAGGAGCTCGCTCCTGGTCAGAGTGAG -ACGGTCACGCTGCATCTTACTCGGAAAGATGTCAGTGTTTGGGATGTTGTGGTACAGGAT -TGGAAGGCTCCTGTCAATGGACAGGGCATCAAGATCTGGGTTGGCAATAGTGTTGCTGAT -CTGCCTGTTCTCTGTGTGGTTGGAGGAAAGTGTTCCGCTAAATAAGGGAGTGTTCATTGT -TGAtgcttgtcttgcttgtcttgcttgtcttgcttgtcttgcttgtcttgcttgtcttgc -ttgtcttgcttgtcttgcttgtcttgcttgtcttgcttgtcttgcttgtcttgcttgtct -tgcttgtcttgcttgtctGTGTTAACTTCTTGTTTGTTTGTGTGGTTACGTTTTGAATAT -ACTGTTACGCGAATAATTTTGACCCTTTTGCTTATTTTGGGTCGACATACTTGAGCTTTT -GTGATTTTTTTTTTTTCCAATCTGACGCAGCTCGGAAGATATGATGTGTCAAATCTATCA -ACAGTGCAGGTGGGTCGGATGATACCTTCATGCGTCTTTAACCTTAATTGAGCGTCCCAT -TTACTATCAATTCATCGTTTGTCCTCGTCGGACTTAGATCCGACCCCAGGCGATATAGGA -AATTTATAGAAATAAATTGCGTGAACTTATAACATTACATAAATAGATAATTATATTTCA -AATAATCTATACGCCGTCCTCAACGACTGGTGTCTTCTCGACGTTCAATCCTGTCTTAGT -TTCGACATGCTCCTTGTTCAGCTTAGTCTTGAGAATAAAAACCGAAAAGAAGACACCAAC -AGCAGACATGCACGTGTAAAGGATCCACATATTTCTCAAACTCCAAGCGAAAGCCTCCCG -AACAGCAAGCAGCTGCGCCGGATCGGTAATGAATTTAATCGACTCGATACTGGCAGCAGC -TGAATCCCCAGACATTTGAGCCGCCATGCTATCCGACATACCGGAAGCCAATAAGCTGGA -CTGCTTTTGGCCCATACTATTCTGGAAGACAACACCGCCAATCACGATGGAAGCCGCAGT -TGCCATATTGCGAATGAAACCAATTGTTGCCGTGGCCGTGGCCGTGTCTTCCTGAGAGAC -CATGGCCTGGATGGCGATGATGGGTGTTTGGAAGAGGAATCCGGCTCCTAGGCCGCTGAT -GATCTGGTACCCGACTATCTCGCCGACGGATGATCCCACGCCGAGGTTGATATACAGCCC -GTTGCCAATGGTCATTAGTACCAGGCCGATCCAAACGAGTTCAAGGTATCGACCTGTGCG -GTGGATGATTGCGCCGGCAATCATACCAGAGCACGCTTCCATGACAACAAGCGGGAGAAT -CAGGACACCAGAGCCCATTGGCGAGGAGCCGTGCACGGATTGCAGGTAGAGGGGGATGTA -GTATTCCCCAGCGATGAAGACCTATAGTTTTGTTAATGATTGACGGTAATCAATAAGATA -AAGATACTGACAAAGCCATGCGCAAATGCTACTAGCAAGGTAGCAATGTTAGAGGACCGA -GCGAAGATGTCCATAGGCATCAAGGGATACTTGGCTAGCTTCTTCTCGCTGTAGACAAAC -AGCAGGGAACATAGAGACCCAAAGACAATTAGACAGATCACCTGAGGTGAGCTCCAGGCA -AATGTCTCGCCTCCAAAGTCCAATCCCAAAAGAAGCATAAGAGTAAGGCCCAGAACGGAG -ACACTGCCGAACCAGTCGATAGCGCGGACGCCATCCATGATTTTCGTCTGTGGATTGTGC -ACGTCCAAGAAAAAGAAAAGGAGCACAAATGTTACCCCAGAGATAGGAAGATTGATCCAG -TAAGTCCATCTCCAGGTGACATACTGACTGAATGCACCGCCGAGTAGGGGCCCAACACCA -CCAGCAAAAGCCCACATGACTTCCATCAGGCCGAGATATAAACTGCGATGTCTATCACAA -AGTCAGTAAAATCCCATATGCTAAATATAGATGGCCAACTGTAGGAGAAAATCAAGATAT -CTTACCTGACACTGAACAAATCAGAAATAATAATCGTAACCAGCTGTAGCAAACCACCAC -CAGCAACACCCTGCAGCGCCCGGCCCGCAATCAACATTTCCATACTCGTGGCAGCAGCGC -AAAGAATTGAGCTGAGGAAAAACCACGCAACAGCCAAGAGCAAGATAGGTTTCCGGCCCC -AAATATCAGAGAGCTTGGCCCAGATGCATGCGCTGGCGGCAGAGCTGAGAAGATACGCTC -CGCCAATCCACGTATATCCAGCAGAGGAGTGGAAGTAGGCAGCAATTGTTGGAATTGCTG -TAGCCATGATGGTTTGATCCAGCGCGGCAATAAACATCGCTAACTAGTGGAATTTAGTAG -GTGAATATGCGGTTTTTGATTTGATCTCTTACTGCAAGCGCTACCATAATTGCGGTAACT -TGGATTTTGCTTCGCCCTTTGGGCGGAGCCTCCAGGATTTCCAGTGATGCCGCCATGGTA -GATCTGGAGTTTGGAGGTTCTAGTTTTTAGGATGTGACGGCTTGAAAATATTTTTCTCGC -CGGATCGGTTTTGTGGGGCATTTGGGGGCGCTCCACGGTTCATGCCTACATAGATACTAA -TAGGGAAATGGACACTGATGTTTATAATATACACATGTGAGTGAAGATGTAGTATTGGTC -AATGCTTTCTTCAAGGCCGATTTTCACCGACCAATGTTGTTCATAGCGAATTATGATGAA -TACAGTAATTTTTGTGGAAGCGCTACCTGCAACATATATAGTACATGTATGTTTAAGTCG -ATGCCAGCAAAAGTGATGTTTTTTTATAATTTTTTTTCGTTATTTCTAAAAGACCCGTAG -GAAGGCATTCAAACAATCTGAACGGAAATATCAATCCACCCTCCGACCAACCGTAACCCC -AAAATTAATCAACGCTGTCGCAGACTCACAATGAGCCTCAAACTCACGAATCAATCCCTC -AACAACCACACGTGCCTGATCGGCATCCCTAGCTTCGCCAGTAACAACCATTGAAGCTGG -TACCAGCGCACGCATCGCAGTAGCGACCCATTTCTGTGCGGTAGGCCGCATATGCTCCTT -CCCAACAGTAGTGTAAGTCTCACGGACGACGTCGGTGAACCCGTTTGCCGAAACCTCCTC -GTACACACGCTTTGGGGCGAACGGCCACAGGCCTAGTTTCAGCATCGCTTCGACCACGGA -GTCCCGCAGCCGAGTTATCGCAGCTGGCTCTGGTACTGCATCTGTGCAGAAAGCCGTGTG -GTCGACTTCTTCCCATTGGATCCAGCCGTTTGGTTCTGCAGTTTCAAGGTTGTTAGTTGG -GGGTGGAATGTTGGTCCGTATTGTTCTAGGTGGAGAAGAGAACATACTTAGTAAAGCCCG -CGCATTGGCAAGCACTCGTGTATAATCACCTTGTTGCAGGCCTGCTGTCAGAAGTCGGAT -GTGCACGAGATCATATCGGCCACAATGCTCTGTTGGAAATGGGTGAAGGGCGTCGTGAAC -GCTTAGTGGGATTGACTTTTGGCCAGGTCGGGGAATCTCGTGGCCTCGTGGGTACTGTGC -CGATGAGATGTCAAATCCGTGGAGGTAGACTGATTTGTTGGGAAGCAAATTGGCGACATC -TTCGAGCCAGACACTGTGACGGGTTAGCTTCTATGGATCAGGGGttttttattttttatt -tttttttATAAAAAAAAAAAAATTGGGAGAGAGACATACCCTGTACCAGTACCCAAGTCG -GCGACAGCTGTAAGATCTTTCGGTATGGATGGGTGAATGAGATTAGATAAATCAACTAGA -AAGCGATGCTGATTATTGAGGCTATCAAATATCAGCAAAAGTCCCAATAACGGGGCTACC -GAACTGTCGGGTCCTTGGGCTGCAGGGAAAATCTGGGGACTATGTGTGGGAATATCTCGA -TATCTTCGATATCTTTGAACACGAAATAATATCTTTCAGAGATCGGAAATTTTGACAGTT -GATTTGAAAAGATCCAAAGAAGAAAACAAAGTCACATACCGTTGAGTCTCGACCTCATCT -CGATCCAGTAGGTAAACTTGATCACCGGTACTGCCCATCTTGAATTGAGATATAAAAAAA -GGCTGGAAAAGTGAAAAGAAGTTTGTGTGGGAGAGGATTCAACTGGGACTGTCCATTACC -TTCTTTTTATATTGGAATATCTCGTGTGCACGGAGTACTCCTAAAAATGAAAGCATCAGA -GGTAACAAATTTTTCTCTCGGACTCTCCATGCCAGCACACAAAGAAAAAAAAAAAGCCCC -TATCCCCACGCCAAGAATGGAGCGCCTGACGGCTTAGCGGGTTCCAGGGCTAGAGGATGC -CCATGGGACACGGGAGACCAGATGGAGTGGCGGAGTAGGAGAAAATGGTGGGATCTGCTT -CGAAAGACATGGTCTTTCCCTTTACGGCGTTACACGTCAGTCGGGCGACTATAGCCCATT -GAGTCTACAGACTATTGGAGCCGAAAATATACACATCAAATACATGCCAGGAGATTGTTA -TTTTTTTACAAAAATGAGACCCAAAAAAAGGGAGAAGGCGCTCAAGACTTATCCACCAAG -TTAGCAAACACATACCGGAAGATTTCCCGGCTCTTATACAAATTCTCTACCCGCATACGC -TCGTTATCCAAGTGGGCGTTGTCACTCGCCTGTCCACAAGGCAAGTTAGCTGCCGGCGCC -GAAAACTCCTTTTCCAGGAACCGGATTGTCGGGATGGATCCACCCTCACGGATGAACATT -GGTTTCACTTTAGTTACACCAGACGCAACATCTGGCCCGCTTCTTGTGGGGGGTTTGACA -TTCGGATCAGACTTGGTCCGGATTGGCAATGCAACTGACTCCTCAGCCGACGTTGGTGTG -GCTTGTGTGAGGGTGGAGGACGTTGGCACCGTGGCGGCTGTTTGCGAGGACCGTTTGCGC -GCGGCTTCTTTGCCTGCTGATGAGGTCGTTGTCGAGGTGATTACCCGCTCAACATGAGAG -GCCAGACTTTCGTCAGAGTCTTTGCGGCGGAGTTCGGGGATTGGGCCCGCGCTAGGTTTG -GGCTCGGTCGCTGGGCGAGTGTTTGTGGATTGCGGGATAGGTGGGTATTGGCGTTTCGGG -TCCTCTTGCGCCGGGGTCCATGCTGCGGTGATCGCCTCTGACAGCGTGGCGAAGATTTCG -TTATCGGGGTCTCCGAGCCAGGCATCGGATTTGCCGGTGATTTCGACTGTGAGATCGTTT -TGTGAGTCGAGCAGCGCGAATTGCTCTTGTGCATAAGCGGTGAGGTCGGCTGCAACTTCG -TCCGCATGCTGGTCCGGTACGATACGGATGGATAAGGTTGCTTTGGCTCGGCGCGCGATG -GTCGTTGCGCTCTTGCTTCCAGGGACTTCCACCGAGTGGACAGTCAAGGCGGGTTCGCGC -CAGCGATGCATCAGCGACTTGATCAGCGCCTGGCTGTCAGGGATTTCTGGGTGCTGCAGG -AGCAATACATCGGCAATGGCCTCGAACCGCTTCTGCTCGGCATCAGTGAGGGGCCTAACA -GCATCGTGGAAGCCGGGCAGATTGATTTTGCCTTTACGTCCCACGATTGTGGACAGGAGC -ATGGTAATATCCTTGACTGGTTCATCCAGCAATGTACTGCCGTCGATACCACTGTGAAGA -TCGGGGTGGTCACTTGTAACGATCAAGTTGGCATGCACAACACCGCGTAGCCCGTAAGTT -AAGCATGGGTTGTAGTCATCAAGCCAGTAACTGTTGGCAAGCAGAACCCAGTCCACGGAT -CCGATCTGAGCCTTGTGCTCGCGGACTGCTTCGTGAAAACCTTGAGATCCGGACTCCTCC -TCGCCCTCAATGAGGAAAATGACGTTGCATCGCAAGGTCTTCGTCCGGGCGAGATCAGCT -GCGGCATATAGTGATGCCAGAATAGGTCCCTTGTTATCAGACACACCACGGCCGTACAGA -AATCCGTTGATGGAAGTAAGCTGATAAGGATCTGTCCTCCACTTAGGTCGGTTCGTCTCT -GCGCCCACAACATCGTAGTGACCATAGAAGAGAATAGTCTTGTCCACCTTCTCAGGTGAG -GTAGCGTTAAATCTAGCAAAGACAATGGGATTGGTATCCTGGCCGGTAGTCAAGAGCTTG -GTCTTGGCGCCGAGGTAGTTGCAGTGCCGCCGAAGGAACGCAGCACCCTGGTTGCATTCG -CCAGCGAATTTGGGGCTTGCGGAAATAGTCTTGAAAGATACGAATTTGGCAAGACTATTC -ACCATTTCGTCTAGGTAAAAAAATTAGCGAGTCTCCGACGCGAGCGGCGTGCATGGGACG -TACCATTGCTGATGGGAGGCAGCTCTTCCTGAGTCATAAAGAATTCAGTCAGATCCCAGA -TTCCTACTGAGTTGTCATTTCCACCAGTAGCGTAGATCTGGCGATCCTTGAACCGTCCGG -CAGCTGAGGCAAGCATGGTGCCCTCATGAGCAACCCAGCCCCCGACCTCTTCGAATCGTG -AGTTAAATTTCTACCGGTGGTCAGCGGAGGTCGATTCACAAGCGACGAAAATCTAGATCA -ATACATACCTTGACCGTTCCACTAGAGTCACCACAGATAGCGATACCCTGGATAACGTGC -ACAGCCCAAAGATCTCCAGTATGCCTAGTGATCCGCTTAACGAGCTGCTGAGAGTCTAAA -TTCCAAATGTTAAGCGCGCCGCCAGCCAATCCACAGTAGAGTAGTGATCCCTCGACGGCA -ACAGTCAGCACAGGATCTCCATTCTGTAGCTTGGCGATTTGGGATGGAGCCGCGCCAGGA -TCCTGACCCAGACGCCACAACTTCACCACACCATCGCCCGCACCAGTCAGAAGAACCTCC -TCGGAAGGAGCGGTCTCTACAAGGCCTTTGACTAGAAGCATGGAGTAGACGTAGCCATGG -TGCGCGAAGAGCTTATGATGATCCCGCTTAAAGCTCAACACGCGGCCACCATCAGAGAGG -GCACCATCCGATGATCCTGGAGCGCGAGTACCGTCCGGTCCTCGGGAATCAAAGAAGCGA -TGTGTTCGCTTAGATGGATGGGCAGACGATGCTTGATTCAATGTTGACCCTTCCTCGGAG -AGGTCACACCACTGTCCGAGTTAGAATCTCGCTAAATAGGGGTGCAGACATGGATCAAGT -ACCTGTATACTTGTGTTTTGTGCACCACAATAAATGGTCTGATTTTGCGATGAATAGACT -ACCGTGAATATATCGCCTACATCGTGGTGTGAATAAATCGAGTAGAGACGTTCAAATGTT -CGCGTGGACCACACCTGTCCGCGTCATTAATCTCGGCTTCGCGATATTTTTCGTAACTTT -GACTTACGTTGATCACCGAGTCACCTCCACTTGAAAATAGTAAATTTCCATCTTCAGAGA -GATAGAGGCCTAGCACACTCTCTTTGTGTGCATGGACGGAAACCACGAGGTCATAGGTTT -CAAGGGACCACGCCTAATGAAACGGTCAGCCTCGAGCCTAGAAAGAACATTGTGGACTTG -AAATGATTCAAACTAACCACAATATCACCTCCTTGCAGGCCAGCAAAAACACACTGTTCA -TCAACGGCCAGAGCGAGAACGGAGCGACTGGCGAGCACGCGGTGGCCGATGCCGCAGTCA -CTATTTGGAAGGGCACTGGTGTCTTGAGTATGAGAATCAGATTCCCAGTTTTTCTGGTCC -TCCTCAGAGGTGCTCTCTGATTCGACTAAATCCATGATCAGTCCAATCCAATGGTTGATA -TTTTGTAGTGGAAAGTGAAGTGATGTTTGGTGTTCTCGGATGCTCTCCGCTTCAAATCAA -CCAAACAGAACACTGGAAATCTCCCGAATTCACTCCATTCTTTTTTTTTTTGAGGTTGAA -GAACTTCAATGTTTTCTTTTATTTCTTAACAAATAGGAGGAATATGAAGGAATTATAAAT -CATCGAGGCGGTACAGGGGGGAAAAAGCAATAAAAACAACAAAAATTGCACCATCGATAA -AATATCTGACTCACCATCCCTAGCATCCCTTTGATTCCAGCAACAAGTTCTTCTATATAG -AACAACAATTAGTTCTTCAATTAATTTAGTCTGGCTTGTCTATGTCCCGGTCGCATATCC -ATTCATTGAGGTCAAATATTCTTGCAATTCATCACAATGAGACGCACCTTGACTCCCACA -ATAGCCGTGCTAAGGGGGCATTGGGGCGACGGCCGACCATAATATAGGGGTATAGGATAT -ATTAAAGATAGATTGAGGACTCAGGTCGAGTGAGTTTGAAGTATCAATGAAAACGGTCCA -ATATGGAGAAGACAATGAATTGCACACCGACGTTGATCTGCACTTCAATCTTGAATTCTT -AGACGCTCTTTATTATTGATAAGGGAGAAAGATACCTACATGTACTCCGTCGGTATGACT -AGACTCTGCCCGAGACCTACTGGGTACAACCCCTGATGGTTTTCATTCTTCAGTTCTGGA -CTAAAAACATTGGCCTGCATGTGCCCATCTGTGATCAGTGTGCCCAGGGTTATCTTACAA -GACCCTTCAAAACTCGGGTTCGAGTGATACAACAGAAATGGCGCCATTCTGATCTTTAAT -CATCATTTTCGGGGTGATACCTTGAATCAGTAGATACACAATATATATCCCTGGAGGAAA -AAAGAAATAGTCTCGGGGAGTATCGAGCCATGCTCACATGTTAAAGCCTCAGCACCGCCA -CAGATAGCTGATATGTTTGACTTTGCACAGTACCAGCCACAGTGGCGGTGACCCCGTGAG -CCAATCAAATGTCAAATGTCACCGCTCTTTGCACATTTGCACATCTTCCATACGGAGTAC -ACCGTTTAACAGCTGATCAAAATGTCAAAGCACACTATGTCAAAAAGTCGGGATTATCTC -TTTTCCTTTTTTTCCTGTGCATCTAAATATAACAATGTGATCTTGAATTCCCTGGAAATT -CTCTTCCGGGTTCCGCCACATGCAGCTATACCGCAAGGATCGCTTTGGCGCCCAACAACT -TTCGAGTGTGATATGTGATATGAGCTCATTGAACCTACCGAGCCTTTGATACGTGTCTGC -CTCCAAAAGTGGCACCCAACCATTCTGCCCTATAGTTCCTGCAGCCTGGCCCGCTGTTCG -TTGCTCTGCCCAACGTGGATGTGCTTGATCCCCAGCCTCCCTGCTCCATACTGTCCGCAT -CACTTCTCCTACGTCTTTAAAACCTCGTTGGGACGCTTCCGATCGGACTTTTGCACTGAG -CAAGGCCTGAGGTGAACAACCCAACTGCCTTATTAACTGTCAGAAGAACTCTCCAAGGCT -GTGCCTTGCTTATTCTTTTATACCGCGCCATACTTTACTTGTGATCTATACCCCGGGAAT -TTCTTTGAAAACATTTTTTTACCCCTACACCGCTTCCTTGTCAGCCGTCGATCTCGACCA -TTTGATGACTGCTTATCGGTAAAGTCGTCGCAACGACTTGCTTTTTGCTATGGCGCCTTT -TGGTCAAACGATCGCTGTGATTGACAAGTCCGGCAAAGTTGTGAGCACGGTCAGTTCGCT -CTCCTGAGGAACACCGACCCCGCATTCTGACAAGTACTAGAGTAAGCAACTATTCGGTGT -TTTCAGCCATGCCAAAAACGCCTATCGCGAGCGCAAGTCCGCCTTCCAATCAGAACGCAA -TGCAAAAATTGCAGAACAGCAAGCACTCCAAGGACTCGCCAATTATCAGATCGATGACTC -ACCATCGGTAGCCCCATCTCGACGAAGCCGTGGGACCAGGTCAAGACATCACAGTGGCCG -CAGTCATCGTGCTTCGTCTCACTACGATGATGGACAAACAGTTGTATCGCGGCGGGACTC -TCACTACGAGCCCCCACAGACAATAGCCCGCCGTCATACTCATCACGATGTAGCGATCCG -AGATGCCCCTCGCCCATCAACGGCTCGATCAAGGTCTGATGCGCATATCGATATGGATCT -TGCATATGGGGACGCCTCTCATGCAGCCTTATCTCGATACAACCCCCCAGAGCCCCAAAA -CGATCAACAACAGCTTGATAGTCTTGTCAATCGGGCACAGTGGCTTCTCGAAGAAGCACA -TTGTGTCCAGCATGGTGCTACCGCAACCATCGCTCATCTCCAGAAGAACCCAGATGCCAT -GGCTGCCGTCGCCTTGACTCTAGCGGAGATCAGCAACTTGGGGCGCAAGATGGCGCCCGC -AGCATTAACCGCGCTCAAGTCAGCCTCGCCAGCCGTTTTCGCCCTTCTTTCCAGCCCTCA -ATTCCTCATCGCTGCCGGTGTCGGTCTAGGCGTGACGGTGGTCATGTTTGGAGGATACAA -GATTATCCAAAGAATCAAAGCCGGCGCTATTGGggaagaaggaaagccagccgaaccgga -aacagaaatggaaatggaaGAAATGATGGAGTTGAATACAGACGATCTCAGTTCTGTTGA -GATGTGGAGACGCGGCGTCGCTGACGAACAAGCCCACAGCGTTGGAACTTCTGTTGATGG -CGAATTCATCACACCCACCGCAGCGGCCATGTCGGGCATCGATGTGACCACTGCTCGTGC -GCGACGGGACCCTCGTTTCAAATTCGACGAAGATGCCTCGGTCGCTTCGTCTCGACGATC -CCGCCGTTCACGTAGCACCCGGGCTCCGACCCATGCACCGTCAGAGCGGCACGAACGGAG -ATCAAGAGCTCCCTCAGAGCCACCGAGTGGATTCTTCGGACGAAGTTCATCCAGGTCCAA -GGCGCCATCACAAGCGCCCAGCAAAGCGCCCAGCAGAACACCAAGCAAAGCTCATAGTCG -AGCTCCCAGTTATGCCCCCAGTCGCACCCCATCTAAACATGGCACCTATGTCTCTGAAAC -AGAGAAACGGCCTAAGGAGAAAAAGAAGGGTCCCAGCCGGCTACGACTTATGTTCACCTC -TTCCTCCTGATCCTCAAACTCTCGCCACCGACAGAATCATTATTTGTACATACATATATT -CACTTTCCCCTGATGTCATTAATGGGTGGGTATGCACTTCATGCATTTCATGCAGCTACG -TTGATCCACCCAACGCTTCTTACGACGACCTATGACTTCTATGTCGCTTTCTCACGTTCT -ATATTTTTTTGGGCATACCGCCTTTTCTATGCTTCCTCTTTCTCAACTACTTTCAGCAAT -GCAATTTCTTCACACAAGCATGTTTTGGCGTTCATTGTCGGCATCTGCTGTATCATATAC -TCATCAACCAGGAAAAAATTGGATCAATTGCATCTTCTAAAAAAAGTTTGTTTCATCATG -CAGGTTCTGCAATTTATCCTTTGCTATCAATACCTACCTGCAATTGTAGTGGGATTATTT -CCACAAGAGCCACAACTATCCCCATTTGCCTTATCATCCATCAATTTCGTTTAACTGGTG -CAGATAATTAATGCTGTCGATTTTTGATTGCAAAGCCACGATTATGGATCGCACCGCTAC -GTCTTGAGTAGGAGTTCAGAAGACACACATAATATAGTATAGTACTATGGCCCTAACTAG -ATTAACGACCAGCCTCTAGCTTCGTGAATGTTAGGGAGTCCATTATTTATGAACGTAAAT -CCACGTGTCGTGGTATAGCCAATGAAATGCCACTTAATAGATAAGGATATATGTCCGAAT -ATGTTAGGTTTATTTCTTCAGCTGCGTTTGGACTTCTTGCTTCCTGAACAAGTCATCTTC -ATTTTCGCATTCCACTATCTAGATATTGGGCTATCCAAATCTTGGTCTTCTCGGGGCAAA -AGTCTTTTAGTATAGGAAGTGTGGGAGCTCTTGATTCTCAGGCAACAGCATTTGACTGGA -AAGGCCTACCGTGAATGATGTTGTATCGTTGACCATTGAGAGTCCCTTTCCGGTGAACTG -ACTTTGGGATAGGCAGTACATCTCAGCAATAATTTCGATGTGACCCTCCGATTGAATCAG -GCCGTCACCTGAGTTTTACGACGTGGTAGTTGGCTACTTGATAGATTAGCGAGATACTGA -GCAGTCGATCAGAATACCTAATCGGCAATACACGGTCCAAAAATCTATTGTACAGACTGT -CAAGACGCGACATGCAATATGGATCAGAACAGTAAGGATTGAAGTCACAAAACTCAGTTT -CATGGGCCGCAAGAAATTGATCTGTGTCGGCAGATGGAGGGATCTCAAACAGCCGGTTCT -GCGTGGCCCGTTCCTGGCCCCCATTACCGGCAACCCCGAACAGCAAAGAAGGGACTTCCA -CCACATAGTAGTACATGGCTTTTATTTTGCCATCGGTGGCTGCAAAATGACAAATACCAT -ACATGCAACACGGAGTCCCTGATCTAAATCTGTAACGGTATCTAAGCACTGCTGGGACAG -CCAGGGGAACCCCCCTTTGCGAATCCACATTGTGTCTCCATACGAATCCCCCCGATATAT -GGATAAGAGGGTTGTGGAGGCGCTAAAGTAGCATATTTATCGTCGGCTTTGGGGCAAGTT -CATCCTTCTCTTTCCCGGACTATCTCGAGCAGATGGAACGGCACAGTCCCTTGAATAGAC -TCGAGGTAATACTTTCCTAAGTACTGCAAGGCCTGGCTTTGCCAGCCCAGAAATAGTCAT -TTCGGACTCTGTATTTCTATGACATATCACTCCAGTCCTCCGATAACAGTGGGAATTCGA -AAAGAATTCAGGATGGAGGTTCCAATGGCAAATGTACCTCTAGAGGTGATTGTTGTTGGG -TATGTATTCCAATACGCATTGGTCCTGTGAGGTTGTTGTATTGATGTTTTGCAGTGCTGG -TATGTGATTCTGGAGCGAAAAGGCTGCATTGCCCTGAGAAAAATGGTGTTAACAATAGTT -GAACCAGGCATCGGTGGTATGGCGGCAGCCCTCACATTAGGACTACGAGGGCACCGTGTC -ACTGTCCTTGAAGCTGCACCAAAGGTATGTGCCGTCGAAAAGTGAAACTCATGGTATCTC -GAAAGGCTAACCACTCATTATAGATTATGGAAGTGGGAGCTGGGATCCAAGTGTCCCCAA -ACATGATGCGACTATTAGAGCGTGAGTCTGAATTCGGCAAATAGAACAACCGTGCTGACC -GATACTAGGTTGGGGCGTCTCGGACTTAATTCACGCCCAAGACGTAGCTCTAGAGCACAT -CCAAGTCCGAAGATGGGACAACGGCGACCTGCTCACTACAATGCCAGTGAACAAGACATT -CGGTCAGCAAGTCGTTATTCATCGCGCAGACCTGCACAACGCTATCGTCGACAGGGCCAC -AGCCCTTCCCAACGTCAAGCTCTGCGAGAACTCATTTGTCACAGACGTGCAGTTTAGCCC -AGCCAATGTTACTCTCGCAAACGGTGAAATCATCCACGGCGACGTGGTGATCGGTGCAGA -CGGCATCAAGTCCACCATCCGTAGCCACCTGCTCGAAGACTCGACTATTAAAGCAGTTGC -CACTGGCGACGCGGCGTACCGCATCATGCTATCCCGGCACATGATGGAGCAGGATCCTGA -ATTGAAGAAGCTCATTGACGAGCCGCTGGCGACTCGTTGGTTGGGTCCTTCGCGCCATAT -TGTCGCCTACCCTGTGCGAAAGCATGAGCTGTACAATGTGGTTCTACTTCATCCCGATAG -CCGTGGCGTTGATGAGTCTTGGACCACACAGGGTTCCAAGCAGGCTATGGCTGACAACTA -TGAGGGGTGGGATAGCAGGGTGGGGAAATTGATTGATCTGGTCAACGATAATGAAGTTTT -AGAATGGAAGTTGTGTCTGCATCATCCGCTGAAGACATGGATTCGTGGATCTGTCGCTCT -GATCGGTGACGCGTGTCATCCCATGCTGTATGTGCTCATGAACTTGAACTGAAATGGCCT -CGAAGACTGATCTTGCTGGTCTAGTCCATATATTGCGCAAGGTGCTGCTCAGGCTGTTGA -AGATGCAGCTGCGCTCGGCGTTGTATTGTCGACTATCTCCTCACGCGAGGAAATTCCGAT -AGCATTGAGAGCTTATGAAAGGTCCCGGAAGAAGCGTGCTGAGATGGTGCAGCAGTCTGG -ATCGGAGAATCGTATCACGTTGCATTTGCCAGATGGGCCTGAGCAAATTGCCCGAGACGC -GCAGTTCAAGGCTTCAGCCGGCGGAAATAACCCTGATAAGTGGTCGGATCGCAGGACACA -GGATTTTCTTTGGGGCTGGGACGCAGAGAAGGCGGCATTGGATACTTGGAATGGTATGTC -CCCTTTAAGTCTCTGGTGGTCATGGGCTAATTTGTGGGCAAAAGATCAAGGGCCGCTCAA -GGTTAATGCGAATATGTAGGCTATCGGTACGATGCATCACTTGTTCATATCTCGGTTTCA -CCGCCATTGTTCTAGAAAGAAACGACTAAAATTAGACTACATGATCTTAGAAATCTAAAA -CTCTTCGAAAATCAAATTCTCTTGGACATAATGAATATGGGATCTCGTTGAATTCATCCT -AGACATTGCAATTTTAATACCAATTCCCAAAGTGCTAATATTTTTTCTAGTTCATTATAC -AGTCATTCAAGCCCAAGACCATCCATCATAAATCGGTGGAATATTAGACAAGGAGACTTG -ACATGTTGGCCCCAGGAAGCCAGGGATGGTCAACATTTGGCATTCCAATCATTCACAATT -GAAGATGACATGTCATAGTTCTGGAGGCATAATTTACAGTCTCTCACCAGCGGCGGCCTT -CATCTGACCGTACTTCTTGAGACGGATGGCAGAAACGTTGATGAAGCCAGTGGTCTCGGC -GGGGGAGAAGTCACCAATCTCATCCATGGAAGACTCGCTCATGTCGTAGAGCTTCTCGGT -CTCGGAGGAGCGACCGAGGATGGAAACGGTGCCCTTGTAGGCGCGGCAGCGGACCTGGCC -GTTGACGGACTTCTGGGAAGCGGTGATGGAGGCCTCGAGGAACTCACGCTCAGGGGAGAA -GTACATGCCGTTGTATAGGAGCTGGATATGTTAGATCAGACAATCGAATATCCGAGAGAT -CGTGCTTACCTTGGAGTAGTTGTAGGTGACGAAAGAATCGCGCAGAGCACGGACCTCACG -GTCCATGACAAGACCTTCAAGATCACTGAACACAAGTTAGTAACGCTTATCAACATTTCG -TAACAGAATACTCACAGGTGGGCAGAGCGCAGGCAGGTAAGACCAGGAGTCTCGTAGCAG -CCGCGGGACTTGATACCGATGAAACGGTTCTCGACGATATCGATACGGCCAACACCGTTG -CGGCGGGCAATGGTGTTAGCGGTGAGGAAGAGCTCGACGGAGTCAGTAGCAACCTTCTGC -TTGCCACCCTCGGTGTACTCGAGCTTGACGGGAACACCCTTCTCGAAGTAGAGAGTGAAG -TCCTCAGGCTGATCGGGAGCGGTCAGGGGATCCTGGGTGAGCTTCCACATATCGGCGGGA -GGAGTGGTATCGGGGTCCTCCAGAATGCCGGCCTCGTAAGAGCAGTGGGCAAGGTTCTCG -TCCATAGACCAGGGCTTAGACTTGGTAGAGGTGACGGGGATACCCTTCTCCGCGGCGTAG -TCGAGCAGGTCCTGACGGCCCTTGAAGCGCTCGTAGAAGACGGGTTCGCGCCACGGGGCA -ATGACCTTGATATCGGGCTGGAGAGCGTAGAATGCAAGCTCGAAGCGGACCTGATCGTTA -CCCTTACCAGTGCAGCCGTGGGAGACAGCGAAGCAGCCCTCGCGCTAGATTTGGGTCAGG -ATGACATCTTGCTGGTGGGGAAGGAAAGCGTACCTGGGCGACCTCAATCTGGGCACGGGC -AATAACGGGGCGAGCCAGGGAGGTACCGAGCAAGTAGACGTTCTCGTAGACGGCATTGCA -AGCAATGGCGGGGAAGCAAAGCTCCTCGATGAACTCGCGGCGGATGTCAATAATCTCGCA -CTTGAGGGCACCAATCTTCAGGGCCTTCTCGCGGGCAGCCTCGAAGTCCTCTGCAAATAG -TTATTAGAACGGTGTCGGATAATATGTCGACAGAATTGAGTTACCTTCCTGGCCAACATC -GGCCACTATGTCCATATTAGTACTTGATCTTGATTGACATGGCCATGGATCACGACTTAC -TGAAACAGACGACCTCATAGCCCTCCTCGATGAGGTACTTCACTGAAGCTCAGTATTAGT -AGCTGTGGTGAATCAATTGACTTATGGTAATCATACAGATGCAGCTGGTATCGAGACCGC -CGGAGTAGGCGAGGCAAACCTGTGCGATTGAACTGTCAGAGAAAGTGTGGAGCAGGTGGC -GGTGTGGGATAAGAATTAGGAATTGGAGTTGGGCCTTGAGAAGGAGGCGAAAGCAATGGT -TCATACCTTTCCCTGTGCCATTTTGAGGAATTGAATAGAATTCAAAAAGACTTATAAACG -TGGAGTATAAAGGAAAGGAAAGGTTGAGTCATTGGGAAAAAAAGGCGATTTTGGTGGGCC -GGGGAACAAGTCACGGGTCCGAAAGTGATAGCCCTGATAAGATCATCATTGCCCCTCCAA -GTCCGAATCAAATCCGACACAGTCATAGAGCCTCAGGACATAAGTCCAATGGCTTTTATA -TATGTAAACTAGTGGACCTAATTCAGAATTCTCAATTCCGAGAGTTATTATGTTTTCCTA -TGAAGAGGATGAGTGTGTGTTTCGTGTTTGACTTATTCACATATATCTCGACAAGATCGA -GTCGATTAGAGGCACGCTGACATATACAAGAGAGAGCGTAAAAATCGGCGTATACGTCCG -TCTGCGTTCTATGTTCTATAGCGCATATAATATTGCCCTTATTTACTGTGTTGTCCAGAC -CAGGGCGGCCATCGCACTTCGTCATAGCTCCTGTTTGACTGGTTTGACAGGTTTGCGCAA -TTTGCATTATCTCGGCTATCCGCGGCGATTTCCCCGCGCAATGCCCCGGACAATGAACAA -GGAGTAAATTTATAAGGCAATCATCCCTACGGAGAAGAAAATCTATGTATTTACATTTTG -TCCATATTCCGCAATTCTAGCCAGAGATGGCTTCTTCACGAAATTCTTTGCGGTCTCTCG -CCCGAGCTCCTCGACGGTTTTACTCTGGAGCTCCTACTCCAGCAGCAAAGCTGAACCTAC -CTATTGACTACAAAACCACACCAATCTTACACCACACTGCAGCCTCCTTGTCAAGTACAT -CAGAATACCCTGCAGGTGCCACGAGCAAACGACTCAACCTCTACCAAGCGATCAACTCAG -CAATGCGGACAGCACTATCCAAATCCGATCGAACTATCGTGTTCGGCGAGGACGTTGGAT -TTGGAGGTGTTTTCCGATGCTCGATGGATTTACAAACAGAATTTGGGTCAGATCGAGTGT -TCAATACCCCCTTGACAGAACAAGGCATTGCGGGTTTTGCAATTGGAGCGGCAGCGGAAG -GGATGAAGCCTATAGCTGAGATCCAATTCGCCGACTACGTATTTCCAGCGTTCGATCAGA -TTGTGAATGAGGCGGCCAAGTTCCGGTATCGAGAAGGTGGGACAGGTATCAATGTTGGGG -GCCTCGTCATCCGCATGCCGTGCGGTGCAGTTGGACACGGTGCTCTGTATGTCATGTTGT -ATCCCAGTGCCAAGCAAATTGATATGCTAATTGGCTCGAATAGGTATCACTCTCAGTCCC -CAGAGTCACTTTTCGCACATATTCCCGGCTTGCGGGTAGTTATGCCACGGTCCCCTGCTC -AAGCAAAAGGACTTCTGCTGTCTTCTATCTTCGAGCACAATGATCCAGTGCTTTTTATGG -AACCTAAGATCCTCTACCGTGCAGCTGTGGAATATGTCCCGAATGAATATTACACCATTC -CGCTCAGCAAGGCGGAGGTTATCAAACCTGGCAAGGACTTGACCATCATTTCATATGGAC -AGCCTCTTTACTTGTGCTCTTCTGCTATCTCCGCCATTGAGAAGACCATGCCGGGTGTCA -ATGTCGAACTTATTGATCTGCGAACCATATATCCCTGGGATAGACAAACTGTCATCAACA -GTGTCAAGAAGACTGGCCGGGCCATTGTCGTACATGAGAGCATGATCAACTACGGTGTGG -GTGCAGAGGTTGCATCTACGATACAGGAAAGTGCATTCTTACGTCTCGAAGCCCCCGTCA -AACGAGTTGCTGGGTGGAGCACACATACTGGACTGAGCTATGAGCAATTCATTCTGCCCG -ATGTTGCAAGTGAGTACCTGAGGCCCTTTGTCTTATTCGCATTAGCACTAACCAACCGTT -TTACTCTAGGAATCTACGACGCAATTAAACAAACACTCGAATATTGAACTGGATGCCCCG -CATGATATTATAATAATACGCAGGTCATCTCTATCCGATCACATTTTTCACTTGCAGTTT -AGTGATGACCAAGAGCAAGAAATTGCAAGCAAAAGAAAACTGCATGAAATGTTGACACAA -AGTACAACTGTTCTCAATAGCTACACATTTTACATAACGTTCCTTGCTAGAATATATCAG -TCGGTTATCAATTCTTTTTCGAGACTTTTGCATAATGCACTAGAGACCAGACTTGTCTTC -GGGGGCCTTTTTTTTTTTTCCTTCACTCTCCCTCGAGGTATCTTTATCCTTTCTTCAATC -CTCCCAAGCTCTCTACCTAGGTATCTTGTTTGATCTCCACTTGTCTCAACTCATCTCAAT -CGCGTCACCTTAAACCTGTGTCGTGAGTACACAGCTGTGCGAATGTCTATTCACCGTCTG -TCTCAAAGTATTCATACCCTTTCCCTTGGTCATAAAGCTGTGGGAATCTCTCATTGCATT -TGAGTGAGCATTTGCGTTGACCTAGAAAGCAAAGATGAGCTTTAGGTGCGAGGGAGAACC -CATTAGGGTCGCCGAGCGGCCATCCAGATACCGAGATAATCACTTTTGGCCGGGCTGCCA -TTCTTCGAGATATTCATAATTGTGAGGTGAATGAAGCTCCAGCAGCTTACGCACCTCTGG -CGCAAGCGCAATCTCTGGAACAAGGACGCCCGCCAAACTTAGACTTGATTCCCAAGTCAC -ATGTCCAGGTCCAGTCCCGCCAGCAAGCGGATGCCACGACCGTTATCGCTGTGTCTCCTT -CCGGCGTCTTACGGAAGTGCGCCACTTGGAGATTTATCCCGAGTCGAACATGATTGAAAA -AGCTGCTCACATGCATATTGACGGGGATGTGTCATCCGCCTGCTGAAAAACTTCCTGGAC -GATGAAAGCTTGTTTACTCCTCGTTTTTGTCGTCTATCTTGGTCACTGAAATCTGCGCGT -AGGATCATCAATGATGGGCTATGGGCGTATAATTCACGGGGGAAACGACTGAACACGGAG -ACCAATCTCGTAGTTATTGCTCGGATCCAGATTGCTCTTGATGCATCCTACCAGCTTATG -CCCCTGGTGAGATCACAACTAGCGGTTCAAGCAGTTGAGAAACGTGTGTCCTTTCTTTGC -AAGGATATAGTTCATCGGGGAGAATGTGTTAATGATCGTGTAGAGATCCTGGAGCTCGGA -AGAGCAGAAGGGTAGCAAAGATGCACGAATTGCAGCCATCTCGTTGAGCTCTGTAGCGGA -TGCAATCATATAGCATGAGAATTTTCTTTCATCGACATAAGATTTCTTTGACTTTGTGGC -CACTTAAGCTAATACGCACTCATTCTGCAGGCTGTTGAACAAGCAGTTCAGTCCCTAATC -GCAGAGCATGATTGTGATCATGACGTAGCGTGGAAAAGGCTCAATATCCCCGCTCAATGT -GAGGAATGTGGTGACTGAATCCTTGTCTATATTCTCGAATGCAGGAATTGTGGACCCTAA -GCCTGCAATGATTGCCAAATGCAGTTCTAAAGGACGCGAAATGAGTTCCGCATGGGTTGG -ATAATTCAAGGATATTACCTATTCCGGGTGTAGGATTTGAAAAATGATACCAAGCATTTA -TGTCACTTCAAGATAATTCTAAATAAAAAGGTATACCAAGTGCAGAAACCAGAAGTTTCC -AAAACACATGTTCATCGTTGGTTGAAGGCTATGTAGGACTCGGAGTATTCCCATCTTGGA -AATATTCCGACCTCGGAGGTCCTGATTCGTAAGCTCTCACATAAGCTCGGCCTTAATTCC -GCCACATGCCTTTGAAATCAACCCTAGACAAATCCGTACAATGTTGTAATCACCCCAATA -TGTACGAGTATTACGAATATGCCATGGATGGCATCATAATTGAGGTTATATAAGGGGCGT -CTGGCTTTGAAGTAGTCATTCCACACATAACATTTTGATTCCTCTAAATATTTTATCCAG -TCTAACAATCTTTTCTCATCGAAGAAAAAGACTGATATTATCTTCACCGATTTACATCTC -AACATCAACATGACGAGCACCAAGGTTGCCCTTGTAGGAGTATGTAGATCCAATATCCTT -CATACCTTTCCATTCGTTAACATGATCCCCAGGCTAGCGGAAATCTCGGTCCTGCTGTGC -TCAAGGAGCTGCTCACCGCAGGCTTTGACGTGACTGTTCTCACCAGAAAAGGCAGTAGCA -AAACATTTGACTCCCGTGCCCATGTTGCAGAAGTGGACTATGAGTCCCTCGACTCACTCA -AGGCCGCGCTGTCTGGCCAGGATGTTGTAGTCAACACTCTAGGTGTCGGCGCTATCCCCA -AGCCCATCCATCTACGCTTGGTTGATGCAGCAGTGGCAACTGGCGTGAAGCGGTTCATCC -CTTCGGAATACGGCTGCGACACTACGAATCCCGCAATTTCTAAATTGCCTGCTTTTGGCG -ATAAAGTCAGTGTTCAGGAACATCTCAAGAACGTCGCTCAGAAATCGGGCCTCAGCTACT -CGCTACTCATTACTGGTCCCTTCCTTGACTGGGGTCTTGAGAAAGGCTTCATTCTCAACC -TTGCTGGCCCGGCTGTGTTATATGACGGTGGGGATCGCCGTTTTAGCTCAACGACTCTCA -GTGGCATCGGCAAGGGAATTGTTGGGATTGTCAACAATCTCGAAGCCACTAAGAACTCTA -CAGTCTATATCAATGAGGCCAGAGTTACACAGAATGAGCTGCTGGAATTGTCTGGCAAGA -GTATTGGGACGAAGATAGTTCATACCACAGACCTGGAAAAGGAGGCTTATGCAGAACTGG -CTAAGCCGGCCCCTAACCCGGAAATTTTTGCTACGAAATTTATTCTTCGGGGTATCTTCG -GTGAAGGACATGGCTCCCTTTTCGATTCTGAGAAGCTTTCTAATGATTTGTTTGGCTTGA -AGACTCTGTCTAAGGAGGAGATTCGCGGTCTTATCCCCCAGTGAATACTCTCTACAATAT -GTAACTAGCCTTATATGTCACACACACATATACTTTTGTTAGGTTATCATCTCCTTCATC -TTTGGGGTTATTCTCAAAAGAAGGCAGGTGTATTTTATAAATTACACTCTTGCTAGACTT -ATCAGAGCGTGGATTGTGATTCTTGATATGTTTAATTGATTGTAATTCTGGCTTTCAGAG -TTGCCATCGAGCTCCCCGGACACAGATCTAACCCATGTGAGCCATGTAGGTCCAGTTTAT -CAACACATATCAAGGGCAAGCAAATTAAGCACATTAGACCATATTGCTCAATTTTCCATA -TAACTAGTCTCGTCAGATATCATTGGCTTAACGCAAGGGCATAGCCTTTGAACTTTCATT -CAACTCTTGGGTGAGGTCTTGATGTAGAGCGATACGGTCAATTAGTGGTCCCTCACTTTT -TCCCACACATAACACATATGCTCATTGCGCTTGGACACCTAAATGACGTGGTCAGTCTTG -AGTTACAAAGCTTACTGGGATAATATTTGGCCTACCGCATGTGCCGGCAACCAGCTACGC -GCTCTATGACTATAGAGCAAGCTGGACATCGTTGCCACCTCTTCCTCTTTGTTAGATTTT -CCAGTGATTCATCATTAATTTTCTCCTCATCTTGAGTCCCGTTTTTATTATTAGCGTCTA -CGTAGGTGCAGTCCCCTCTGTGTGCTTGCCTTTTGCATTGGGTGCAAGTTCGAGTGACAC -AGAACTCGCAGATGCCCAGCCCACGGCGGATTTTTTTGGCAGGATATAGCGAGCACAAGT -TGAGTTTGAATAATAAGTTCTACGAGAATCGCTGGCTTCGGTCTTCCGTTCCTTGTATTC -CTTCACTATTTCGATGCCAATCATATCCTCGACGGTGGTAGAAGGATAAATCGGCAGGCG -GCAGCATCGAGGAGGAAACAAGGCATTGATCGCAAAAATTTTTGGAAAAAGGCGCTCAAT -ACATTCGCGGCAATAATTACGGGTGCATTCTGTTTGAATAATATTCGACACTGGATATAT -TTCCGTACAAGATATAAAATCTTGCATCTTGATTTCTTCCTCGCTTTCAGGTAATTTCTC -GCTATTATCCGTGATAATATCTTTGGCCATTTTGTTCAGTGATGTGGCTGGGTCCCACGA -GCTCCTCGGGGTTGCTCTGTGTCCCTCACTCTTAATCCTGAGTTCTCCCAGCTGTATTAG -TGTTCGGTTGGTATCTTCATCTTGACCTGAATCCATATCACACTCCATGTAGTGGCCTGT -TGGTATTATGGACAAAATGTCGGATACTCATGCAAAGCGAGCTAAAATAGTTTTTGGAAA -ATCAAAAACGATGGCCTTGTTTGTCAAATCTTCCCTTTTAAGTCAGGTCATGAGCAGAAT -TCTGGATTCTGTCATGAATAGTAATATATTGCAGAATGCATACTaaaacatatacaaatc -aagaaaatcaaatctaaatcTGTGTCTAATTCTGCAGGGCCGTTACTCAAGCGTTGAAAT -ATCAAGCGCTAAAGTGATCAACTTCTCAACTTTCCGAGGGCAAAATGTCGCATTTGAGTA -GATGGTAAATGATTGAGCAATTTGATATTTCTTGAGAAGTAAATGAGAGTAATTGACCCT -AAGGGCAACTGTGCAAACCATATGCGTTGCCCTTCCATTTCAACTGCACTTCAATTGCAT -CCATACCTATCTCCCGAATGACCAGTCGTGTCATATCCTGTCCGTTCCCAGACAAACCCG -AAGCGGTCCCTTGGCGTTGCTAGTTCAGGGTCGTTCGTAAAGATAATCTTCCTCTGCGTA -GATACATTTGCACGTTTTCCACTGTTTTCCGCAAAAATAACAAAATTCGAACTTGCAAAA -GCACCTAGTAGATAGATGTAAGTCCCAAAACCTGCACTGTGAGAAATGGTAGATATTTGC -TTGATGACAAATGAAATTGTCCTTACATGATGTGGTTACAGCCGGTGGTGAGTTCAATCA -GACGGCTGCAGTTAGAGCACCGTTGCCATTTCTTGAATTCCGCCAATTCCTCCAAGAGAG -CATCGAGCTTATGTACACACTTTCCCCTATGGGCGCGCTTTTTGCACTTCCTACATGTGT -GTATTCCACAGCTCGTGCATTTGCAGACCTCATTGCATGTGACCTTTTGTGGGAGATATT -CAGCACACTTCGAATTAGAACAATATGTCCTATCTGGATCGCTGAGCTCGATCGCCTTCT -CCTTGGGTTTTTGAACCAATGCAGATCCAATCATCTCCTCCGAGGCCGTAATTGACCTGT -GGCAGCATTTAGGGGGAAACAAGGATTCATCGCGCAAAGAATTCGTGAACAGGCAGATCA -AGCAAGCCCTGCAATAGAAGTGGGAGCACTTCGTTTTAATCATATCGTCTTGGGGTTGAT -CGTCCCAGCAGGAGACACACACACGCCAAATCTCGGATTCGGGGGATCCATCCAGGTTGA -TAGCATTTTCGAGTGAGTCACCAAGTTGCTGAATGGCGATAGGAAGGTATTTAGAAGGAG -ATCTTCTTCTCTCGTATCTGGACAAATCATTCTCTGTTTCTAGTCCCATTGACATCGCTG -TGCCCATGTTGAAGTTGCTCATGCTGAAGGTTGTTTCTTGTTCCAGTAAAGAGTGAGTTT -CTGTGTTATTTGTATTCTTTTTTGCACTGTGGTCTCCTAGTTGGTTGTCACCAGGAATAG -CTGTAAGACTAGAACTTGAGATTTTGTAGCTTGATCTTTATCATAGTCAATGTGACGAAG -AGAGGACTTTGGCTGCTATGGAGTCAAAGGCATGCCAGAAAGAAATCTGAATATATACTG -AAGCTTCTGTGAAAGAGGTAAAAGAAGCAAATCAGTTAGTGGTGAGGCTGATACTGCGTA -ACCATACCTATGGGAATGCCTAGAATACATGGCCCCCACACAGCAGAAACAATTGGTGAT -GCATAACATACTACTAGTGGTATTTCAAAGACTTGAGTTTGAGGACCATCAGTGTATTCT -AGACCTTCAAGTGTGGATTTCTAGCTCGCATGAGGATGGAAGTGGCGTCGATGATCGGCA -AATTCACATACTGATCTTCTACATGTAGAACCTTGAAAGGTCTCTACACCATTCTGCTAC -ACATGTGTATAGTCATAGGCATATTTCCTATCTCGCTTAGCAATATAACATGGTCAATTG -GCTAAGTACATTGAGCGATATCTGATAGATAATATCCCATATTTCTCTAAGCCAAAAGTA -CTCAAACCACATACCTTACTCTACGGATCAAGCTCATCAGAATAACCCCCCGTTCTATCA -TCCACACGGTGCTTTTTGGATCGCTTAGATTCACACCTTCGGACAACCGCAAGTTTTCCA -CTTCTTCCCACAGTAGCCCCACTGGGCTCTGCAAACACAACTATCAGGTGTCAGTGAAGG -GCTTTGTGAGTAGCTTAGAGGGCTTGGAAAGGGCTAAAAATACACCTTCATGATGTGAGG -ACAACCCTCATTGAGCTCAATCACACGGCCGCATCTAGTGCTCCGTTGCCATTTCTCCCG -CTGCATCAATTTCTCCGATGACTGGTCCAGCTGATATAAACACCTGCCCCTGTGGGCTGC -TCTCTTACACTTCCAAAACGTGCGTACACCACATTTGCAGGTGCAACCCGTTTCACTGGG -AAGATATATTGAGAGCATGCTAGATCTGAACATTAGGTTCGGTCTGGGTCGCTGAGCTCA -ACTTGCCTCTTCTTTTGTTCTTGGGCCAATGCAGGTCCTATCAATTTCTACAAGTCTGAC -CCTTCAATTGGCATTTGACAGCATTGAGGGGGGAGCAAGGATTCATCTTTCAAAGAGTTC -GTGACCATGCGGATTAAGCAATTCCTGCAATAGAAGTGGGAACACTGAGTCTTGAAACCT -TTGCTGTAGGCTTCTCTACAGGAGGTGCAGACCTCAAACTGGGAGGAGTCAGGGTCAGCC -GCAGTCGGAAGTTTCAAAGAACGCATGATATCCCTGAGAGTGTGGACTAGCTTCTGATAC -AAGGTAGAGATAAATCTGACGAATGTCTTCGTTTTCTTAGGCGACGACGTCTGTTCTGCT -TCCTCTCCTAGCGATCAAGCTGTGACTATTTTACAAGTCATATAATATGTCTCTTTGACT -TTCGATTTCGGACTCCCAGTTAAGAAGTGCCACTCCCCTGTCGGGCTCTATCTCTTCTGG -CCTGTTGACTGGTTAATTTGTAGTCTTCGGCTGCAGCTGTGCGTCGAACCTGTCGAGCTT -GTCGCTCACGTCTTTTAGCAGCAGTTCCAATATGAGGGACTTTGTAATTCTGGAGTGTAT -CTTGGTCCATCTTGCGGGGCAACTATCAAACGATCGAAGCGCGCCAGGGGTTTTGCAAGT -AAGGAGAAGTAGAAAAGAGGGTGCGAAGCTGACCCCTGAAGTGATCAAGTAGTAAATGTC -TGACGACCAAGACTCCCACGTAAAAATCACTCAGCTTCATAAAGGGTCTTGGCTCATGAA -AAGTTATTCAAAATACAATATCTCTACTTGAAGTGTATGACGAACCTTTGAAGTATGGCG -ATTCGTGCTGTAAGTATGAGATCGATGTAAAACGATGTATCAAGCGCCATCATTGTCATG -ACTCTACTAAGCCCTTTTCTTAGTCTAGTTTGTTAAATAGTGTATACTTTAATTTTCTTC -CTTCCCAGGTAATGCCATTTTTAAGTATAGATGCTCTAGGATCGATCCCCAGACCAATGG -ATGAAAAGGAAGTGATGAGTCAGCTGTCTTAACCCCGTTATCTCCGACTACAGTCCCGAG -GTCCGTCGATAACTTCGTCTACGACACTATCAACTCGGACAAGCTTTCCAACTTACCACA -TACTCACTGATCTCTGACTACACCGAAAGCGTAATAAACACCCAGCTCGCCCAAGATGCA -GGGCTTCAGTATGTCCCAATCCCATTTCAAAGAACATGCGAAACAGCCACTAACATGATT -TCACTAGACATGGGACGGTCAGTAAAAATGCTCAAATCTCCACATAATTCCCAGCTAATA -TCCCCAACAGATATGTCCCACCAGACCAAGAAGGAGTGACAACGGGAAACAAACTTGCGG -GAAAGCACGCACTCGGCGCGCGCGCGCGTCACCTACACACAACCGGCGCATTAGTCGTGC -GCTTCGAGATGCCCTTCGCAGTATGGTGCGACAACTGCAAACCACACCCCATAATAATCG -GTCAAGGCGTGCGCTTCAACGCAGAGAAGAAAAAAGTTGGCAACTACTACTCGACACCAA -TTTACAGTTTCCGCATGAAACACACCGTCTGCGGCGGAACTATCGAAATAAAGACCGACC -CTAAGAACACAGCGTACATTGTTACCGAAGGCGGGCGCAAGCGGGATACTGGCGAAGACA -AAGAGCTGCAGCCCGGCGAAATTGCCATCAAACCGTACGCGCGAGAGATGGATCCCGCGG -AGAAGGATCCATTTTCGAAGATTGAGGGGAAGATCGAGGACAAGAATCGTGCGAAGACGG -AAGCGACTCGCATTCTTGAGCTTCAGGAACGACAAAATCGTGATTGGGAGGATCCGTATG -AGAAGTCTATGCGGTTGCGGAGAACTTTTCGGCAAGAGAGGAAGGGGCTTGAGAAGGCCG -CGGCGAAGACTGAGGCCCTCAAGGATAAAATGAGTCTTGGGATTGAGCTGTTGGATGAGA -CTGAGGAGGATCGGCAGAGAGCGCGCATGGTGGAGTTTGGTGAGAACCCGGCTGATGCTG -GCTCGCACGCTGCCCGCGTGACTCGTATTCGGCCTATGTTCGAGCAACCAGCGGAAAAGC -CTGCACAAAAGATGGGACCGGGAGAGACTAAGTCTAGTTCTTCGAAGCGGCCTAGGAAGA -ATGATCTGGTTGCGACACGGAAAGCTACTCTCCGCCGCGAGCTGGCTGGGAACACTCGTG -CAATCATTGATCCATTCTTGACCGAAAATGCAGGTAATCCGAATGCATGGCAACCTAGCA -TTAAAAAGAGAAAAACTGTCGCTTCAACTGTCCCTTCAGCCGTCCCTTCATCGACTCCGT -CCATCTGTGAAACTCCCAGTGGCACAGAACTTGATGCCTCTGTCGTGACACCAACACCAG -CGCTGGTCAGCTACGCTTCCGACTCGGAGTGATGTTGAATTCAGTTTCGTGGATTATTTC -CGAACATCAAGAGAAACGTTCATCGAACTTTATTGTATTATGTATTAAGATACCCACTTT -TGTTGTTGACTTTCAATTATAGCATTGCGCTTGAATTATCCCTCTCAGCATAGTTGATAC -GAGCATGTAACCACTTGTACATCACTCCTTACTTAAGAACCAGCCAATATGGCCATTGTA -GTGACGCAAATTGGATGGTACGAAGTTCAACTCAGACAAGTTGTATGTGTAAGATAAAGA -TTTTGAAACAAGAATAGTGGGAAAACCCCGACATAGTAAGACAGGGTATTCTGCGTAGTT -ATTACATACAGTAGAATCACAAGAGCAGAGAGTGAAACAGGATTCAACAAAAGAAAGTCA -TGCTGCATCACGACTAAGCGCTCAGGAGGATGCCAAATCAACGCGAGATTTGGCGTTTAC -CGAAAAATGAGACTGTTGCTGCGCGAGTGCGAGCAGGTTCTGGCTCGGGGGCTGGTGGGA -CTTCTGAGGTGGGCGTGGTCGAGGGGCTGGGAGACTCGGAGGATGCTTGCCGGGAGAGAT -TGGCAAGTTCGCCAATGAGTCTGGAGAAATCAGCATTGGTTTTGATTAACACGACCATAT -ATTTGACTTACCTGTGGTGCTCGCTGTACCAATCATCATTGGCTGTTCGGATGTGCTTAA -GCTGTTCCTGAATGCTTGCCATCTATGGAATGCATTAGTCTCTGTATACACTAATATAGT -CTTCATAATCATGTAAATGGGTGGAATGCGGGAAGATCTGATGAGTAGGAAGACTTCTTC -AATCGGGGATCTCCCACCGTTCCCTCAATGGATGGGCCTAGGTCCTCACCTTCCGTTGTG -GCTGGACCCCCATCTTCAGCGTTCTCGCTCCCCTCATCAGCTGTCGCCCAGCGGCTAGCC -TCCAAGCCCGAGGAGCTGCTCTTCTCCCCGCTGTCTGCTTTCAAAACTGCGAGCTCGGTT -TCGAGTTCGCGGACCCGTCGCCGGCTGGCAGAGATGTCTGAATCAACATCGCCAAGCTGG -TTGAGGAGACCCATGTTTTTGTCGATCTGGTCGTTCAAGTCACGTTCACCAGCTTCGAGC -TTCTCGGACAAGGTCTTATTTTGGTTTTTGCAATCAACGAGCGAGTCGGAGATGGCGGCG -TTATCGGCCTTAACTTTGAGAAGTTGGTCGCGGGACTCGGAAAGCTGCTTCATAATCGAC -TCAAGATGCGACTTCTGGCTGTCCGAGTCCTTGGCAAACTTCTCCAGTTCACCCACCTGG -GTGACAGCGGCTTGGTATTTATCTCCAAGGGTAGCATGGTCAGCCTGGATCTTGGCGAGC -TCAGCTGAGTAACGACCCTCGATTTCCCTAGAAGCAGTGGTAGCTGATTCCTGTACCGCA -AGATGGGCCTTCGCAGAGTCAGCAAGCTTGGCCTGCAGATCGGCGACTGTGGCCTTCAAG -TCATCAAGAGCGGAAGTGTCTTGGGCAACAGCCTTGGCCTGAGACAACTGCTCGCTAACA -CTAGCTAGTGCTGCATCATGGGCCTTCTGGAGATCGTCAATCTTTGTCCTGTTCGAAGTT -TCGAGCTCCTCCCTGAGGCCGGACAGCTTGGAGTCGTGCATGGTTAGAAGCTGTTCAATT -GCCTTTCCGTGCTCGGCATCAATCTCAGCAGCCTTTGAGGCTTGAACAGCCTCGATTTCA -GCCCGGAGTTTGGCTTTGGTCTCTTCAAATCCTTGCTGCAGTTCGGCAATCACCTTGCTG -TGCTCAGCATCGATCTCAGCAGCCTTGGATGTTTGGGCGGCCTTGATCTCAGCCTGAAAT -TTGGCTTGGGTCTCTTCAAATCCTTGCTGCAATTCAGCAATTGCCTTGCTATGCTCAGCA -TCGATCTCAGCAGCCTTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTGCAATTCAGCAATTGCC -TTGCTATGCTCAGCATCGATCTCAGCAGCCTTAGATGTTTGGGCGGCCTCAATCTCAGCC -TGAAGCTTGGCTTTGGTCTCTTCGAAGCCCTTTTGCAGTTCCGCGATTTGTTGCTCGTGA -AGCTCGGCGTCAGCTTTGCTGGTACTCTGCAAAGCAGAAAGCTCGGCTTGCAGCTTTGCC -TGGGTCTCTTCAAATTGATTCTGAATTTCAGCGGCCTCCTTTTCGCGACCCTCGGTGTGG -GAAGCGATCTCACGGGCGTGGCTTTCAAGTGCCGTCTTCAATTCAGCCTCAAGCGACTGG -ACCCGGGTGGCGTGGATAGACTCCAACTCCTTGTTTGCGGCAAGCAACTCATCGTGCTGC -GCCTGTAGATCACTCAGAGCACTGTCATGGGATCCTGATGCCTCCTTAGCCCGCTCCAAT -TCGGATGCAAGGTGGGCAGCATGCTCAGCCTTCAACTCTTCCAAGGCTTGTGCGTGCTTT -TCCGACACATCGGCAGCCTCCAAGGTGTGCTTCTCGAGAATGGCTACAGCCTCTGCAGCC -TTCTGCTCCAGTGCCTTCATCTTGTTGTCCTTCTGAGCACCCTCTTGCTTGGAGTGATCT -TCGACATCCCGTAGAGTGCGGTGGATAGCTTCGGCTTCGGCTTGCTTGCTCTGCATCTCT -TCGAGCAGCTCATTGATTTTCCCAGTGCCAGTTGACAGCTGCGATTCCAGCGACTCACAC -TTATGTCTAAGAGTGTCAATCTCCTGGATAGCAAGGTCTTGTGCAGAGGCATTACCCTCA -TCCAATGCACGGCGGGACTCGCCGAGGGCAGTCTCAGAAGCCTCCAGCTTTTCTTGCAGC -AATTTTAGTTCTGCTTCATGGCTTGTTTTAAGCTCAGTAACAGCATCTTCTTGTGCCTTC -TGGGCAGTAGCCTTAGACGCCTCGAGGGCGTCACGGAGTTCCTGGAGTTGTGCGGTGTGT -TCTGTGCCGCTCGATTCTGCCGCCGCCATAGCAACAGTGAGTTTTTCTGCGTGAGAAGCT -GAAATATCACTAATAGCAACTTCGTGTTCAGCCTTGAGGCTTGCGATAGCAGCCTCATTA -GCAGTCTCCAGCTCCTTGATCTTTGATGACATTTCAGTGGCCGCTGTGTCGCGTTTCTCC -TGAAGCTCACTTTGAATCTTCTCCAGGGCCTGTCTAAGCTCGGTGATTTCTTGATCCTTT -TCGGCAATCAAAGTGCTCTGCACAGAGCTTTCCTGGGTGACAGACTCGGCCGCAGCAAGC -TTCGCCTCAAGGGAGGCTACTTGCTCTTGAAGGCTAGAGACACTCACTTGAGCAGCCTCT -TCGGCAGCGCGAACTTCCTGTTCCTTGAGCGTATTCAGCTTCACAATCTCCATGTTAAGG -TTCTCCATGACCTGGTCACGACCCTTGAGTTCTCGGCTGAAGTGGTCAAGGTCTTGGGCT -TTCTCTTCCTTTAGTGCAAGCTGCTGGAACTCAAGTTCCTTTTTCAAAGCGCCAATCTGC -TCTTCAAAGTGAGCAGCTGCAGACTTGTGCTCTGCCAGCTCTGTTTGGATAGCTTCAAGT -TGAGTTTGGTGTGCTGACTTGAGCTCATCGAGTAGGGCAACTGTCTTGTCGTCTCCTTGG -GCAGCGGCGGATTGTCTAGCTTCCTCCAATTCCTTTTGGGAGTTCTCCTCCAGCTGTTTC -CGTTGGGACTCTGCTTCTTCTAGTTGCGCTTGCAGGGCCTGAAGCTCCTCGGTATGGGCC -GATGCCAACTTCTCAATCTCTGCGGTGTGACTCGCACGAATGGCCTCGGTGGCACTAGCA -TCACCCAAGGGTTCTTCGGCCGAGACAGAAAGCTGGCTGAATTTCTCATTGACAGCCTGC -AACTCAGTCTTCAGACTGGCAACGGTCTCCTCGCTCTCTGTGAGCTTCATTTGAAGGGCT -TGTAGCTTCTCGGAATCACCAGCTCGGGCAATTGAGCCAGGTAGCGTACTCAATCGCTTC -TTGGAATCGGTCGCAATGGCAGATTTGACAGTGCTCGTAGACGAGGTTGCCCGAGTAGTA -GCAGGTCTAGTGGTGGAGGTGGAAGCGACAGTTCGGCTTGCAGGTGTCGCTGATGAAGTC -AGCCTACCCACGGGCTTGCTCGTCGACGTCGCGGTGCGCGCAGTTGTGATAGCGGGCTTC -CTCTCGGCGGTAGTGCCGGCAACACTCGATCTACGATCGATGGAAGACGAGGTGGGTTTG -AGAGGGGGGCGGGTGGAGGTGGTACCAGCCATAGACATGCGCTTCGCAGTGCCAGATATT -CCCCGCCTCTCATCTCCCGAGCTCGCCACAGACCGAGGCTTCTCGTCTGCAGAGCTGCTG -ACTGACATGCGAGACCGATGAGACGAGACAGTTGAGGTGCTCAATGGTTTCCGGGTGGTA -GTCGTGGTCGAGGGACGAGTGGGAGGTTTGTTCAGGGTACTGGCGGTGGTCGTGCGGGTG -GTACCCGTCGTGGGTTTGGATGCAGAGGATGAGGTGGGCCGTTTGGTGGTAGCTGTTCCA -GATAAGGGACGCTTCGCGATACCAGGTGTACCAGGGCTAGTCTTGGCCAGAGAGGCGGCG -GGGTTCTCCTTGGATACCTCGGGACTAGGCTTAACCAACTCCTCGCCCTCGGAGGCGACG -CTCGCCTCTATCGGGGTAGCCGGTGGCTCGTTTTCATCGACCTCCACAGGAGCAGGGGCG -ACGACCGGTTCGTCGTTTGAGGACATATCGGTAGATTGTTCGGGATTGAAGTATTAGGGA -CGGCAAGCTCCGTGGTAAGGTTCAGCTGGCGTGTACGTGTGAGCTGTCTGCGAAAAGCGT -TGTCAGCTTTAGTTGAGCAAATTCGAACCCCCTAAACACATGATTGTCCAGCCAGTTTGC -TTTGAACCTCAGCTTCGAGCTCTCAGATTCAGGCCACTCCAATGGCCTTTCAGGAGCTTG -TAGCATGCTGAGTACCCTAAAATCTGCTCCATTTGCGCAAAAAAATGATCCAACTGTACC -CATGGCATAATTGAGTGCCATTGTCCAGCTCAACCGAAGCTCCCGTCTATAAACAAACAT -GCAGAAAAGAGAGGAATGTACTCACGCGCCGCCCAACCGTTCCAATTTCTTCACAATTAC -TCAACTGATCTCCGGACTAAGAGAAGTCCAAACCACAATTGTAAAAAGAAAAAGCAAATA -AACCGACAAAAGCAAATGGTGTCGAGATACTTTGGGGGAGAAAAGGGGAAAGCGGGAAAA -AGAGAAAGCGGTGTCGATTTAGGAGGGGGGTGGGATCGCCGTTGTATACTTTTGCTTTCT -CTATAGACAAACCCCTAAATGGGTCCGGGGTATCCCCTTGCATTTTCTTAGCAATCTTTT -ATTTCTAGATGCGGACCTTGACTGGATCAATTGGTATGCTCATTCTTTGCTCTGCAGCTT -TATAGATACATGTGTCTATGTCTATCACAATAGCCCCATCTCCATACCACAGTACTAATA -TATACCTTAAGGGTATGCAGGTACCAAAGATGTCTCACAGCGCCAGAAGGCCAGGAATAT -AAGTCCTATGATCAGTGTCAATAACACATTGAATTTTACCAAAATATCTGCCCTCATAGG -GATAGAAAGATAGTAAGGCTTTAGCCGAGGTATGTACATTGTGGATATAGTACTTAGGTA -TCCATGGATATACAAAAGAAAGCCTATGGTTCGCTTATGGTTTGCCGACCCCGGAAATTT -TCAAGTCTGGTTTGGTATTTTATGAACTTCTCGGGTATTTTGAAATCAAGTCTACTATTT -ACATGTATCTGTGTCTATCTTAACCCATGCATACTTGCTTGGGTCATAGATCATCCCCGA -GTTTGAGGGCTTGGCTCTGGATATGTTGCAAGATCCGATCAGAGAGTCGACCGCCGATTG -TGTTGTCACATTTGATATTAATGTACCGATCTTCTCCTATTGATAACGTCAGAACATCAT -ACAGCACCAGAAATGAAAAGTCTTACCCTCATATAGTTGCAAGCCATCTCGGAGTGCAAT -CTTTTCCGCATAGGTCCATCGTCCACAAGAGATATGGCTGTCAACCTCTTCCAACTCAAA -GTGCCTGTACGGTAGAGGGCGTCGGTGAACGCGATGAGGTATCTCTCGTAGCCTACCGTC -TTTTAGTGCATCCTTGAGAGTCTTTACATGGTGCACCAGTCTTCTAGAATGAGCCATCAC -GCTCCCACTAATAACATAATTCATCTGGGCAAGGGAAGATATCCTATCACAGGATCCCCA -AATCAAATCCAGGATAATGCGAAAGTGTCGTGCTCCCTGATCATGTATAATCTTGTAGGT -CTTGAAACCGAGTATGATCAAGTCAATCAGACGGGGGACAACATGGGCTTCAAGTTGATT -TACCAGGTGTGCACCATACTCTTCATCTTCTGGTGCTTCGGTGTAAATGATCCACTGGTT -CTCTTTAAAAATGCTGTTGGCGATGAGACTGCATTGGTTCCTCGAGAAAGAAGCTGAAGC -AGGATCGTTGGTCATGGTTTCAAACAGTTTTCGTAACCGCTCAATGAGCTTCATGATATC -TCTGAAATGCCCTTTCGTAGAGGGATCAGCGTGGGCCTTAAGGGTTTTTCTTGTTTTGCG -GATTGCTTCCCAGGTATCCTTGAAAGCAGATTCTTTGGTCGTCTCGGTTAGCCACGTGAA -GATATTGGGGCGAGTGCTTTGAGTGCCCTGATCTGGGCGAGATGGCTGGATTGTTGGTGT -CTCTGGTACTTCCTGAAGCTGCGATGAGGCATTCCGAGAATTTCGAGCATTTTCTGACAC -ACGATCCATAGGTGTGCTATCTCTTTGGTTTCTTGGTAAACTCTCGAGGATCTGCACCTC -GAGAATATTCGCATCTGCGACCTGCTGGGGGGCTGTTGGATGATTCTGGATTTCGAGTGA -AGAATCATCATCTGCGTATTCGGATCCATCCTCAGGCGCAACGGCTTGCGTATCTTTGAC -TTCCCCTCCAGCTTGAtcatctgcttctgcttctttttcttcttcttcttcttctccttc -tccatattcGGGTTCGCCTTCAGATGCGTTGTCTTGTGCATCTCTAACTTCCTTTCCCGC -TCGATTATTTACATCCTCTTCTTCGTCGTTCTCAAATCCGTCTCTAGGAGTGTCATCTGG -TGCCTCTCTGGATTCATGTACAACTTGATGATCAGCTTGGTCCTCATCTTCAGCTGGGAA -CAGATCCACTGGTTCGGGGAACGGATCGTGTTCAGGCTCGGGCTGAGCCTCATCGCTGAA -ATCAGCCAATTGTTCCTCTTCACTACTACCGAATCCATCTTGAATAGATTCGGACTCGTG -GTCGCCTGACCCAGATTCGTCATTGCTATGCTCCTGGTCTCCTTCGTTGTCGGAAACGCC -GGGGTGTGGATCCGGTTGAGCGAGTCCAGGCATTGTGCGGTTTCTCAATTTCGACCATCT -CGTAATTTTTAGGTCGGTTTGAGGACTCTTTCCAGATTTTGTCGGGGAGGGGGAGATAGG -AACGTCAAAGACGTTGACATTAGTGCGACGCCGTTTGGGGTTTTGTATATTTGGTACATC -GCGCCCGAGTGCATTGATATTATTCAACAGTCCAAGGCTGTAGTCGGTAGATCGACTGGA -TCTTTTTGACGCCATCTTCGGTGACGCGTGGGGAAGTTAAAAAGAACCTTAGAAGACGCG -GCACGGGAGCTCTAGTGGAGCCGCAGATAACATCCCGCTTCCACTCCAATCTTCTCTCAC -TCATCCCACTTCAGCCTTAGATTTTTTTTTTCCAGAAGCTCAGCAAACATGGCTAATGGC -TTAGTTATTCGTGTGTTGCCCACTTTGGAACCTTCTGAGCCACACACTTTTGTCGAACCG -ATCAAGAAGATCCACGAATCGCAAGATGTCTCGACTTTCCTGACCTCCAAAGCCTATACT -AATATCATGACGTGGATACTACAACTCAACCGATCTATGTTTCCAGTGAAGCTAGCCGAC -GATACTATACAAACATGGCCTCTGAACAGCGATGCAATACAATACTCCGCTCCGGTCCGA -CAATTGCAGCGACTTCTCTCGAAACTTGAAGATATCATACAGGAGGTCCCACCAGATACC -GGCCCGCGGAGGTTTGGAAATATAAGCTTTCGAAGGTGGTGTGAGGTGGTCGAAAGTCGT -GCATCCAACCTACTGAGGGAATGCCTACCCGCCGACCTCTTGGATAAGGGATCATCGAAC -GGGGGCCTAACGGCAGAGACGGAGTTGAAATCATACTTTTTGGGCAGCTGGGGAAGCGGC -CAGCGGTTGGACTATGGTACTGGTCATGAACTGAGCTTCTTGGCTTTCCTGGGAGGCATA -TGGAGGCTGAATGGATTTCCCGTGGCCGGTCCAGGCGTCGAGGAACGAGCTATCGTGGTC -GGGGTATTTCATCCGTACGTCCACCAAGAAATTGCCACCCCAGATATGCTTCGTCCCTTA -CTGACTGACACTACTTAGATATCTGGAACTTGTCAGACTGCTCATTAAGACCTACAATCT -TGAACCTGCTGGCTCCCACGGCGTCTGGGGTCTTGATGACCATTCCTTCGCCCCTTATAT -CTTTGGATCTGCACAACTCTCACCTGCAATCACAGACAATGACCTAACTCCCGAAGAAGG -CTCACTGGAAAATGCACCTGACCCGGGCGGGGTCGCTAATGCAAATATTGTTCAAAGGGA -ACGTCGAACTAATCTCTATTTTTCCGCTATCGGTTTCATTTATGATGTTAAGAGAGGCCC -ATTCTGGGAACACAGTCGCATGTTGTTTGACATTTCTGGTATTCCAGCTGGCTGGGGCAA -AATCAATAAGGTTCGTAATATTGATAATTACTGGGAATGTTCGCGCTGACCAGTCATACC -AGGGAATGATCAAGATGTATAACGCGGAAGTTCTCTCGAAATTCCCCGTGGTACAGCACT -TCCCTTTCGGGTCATTATTCAGCTGGGACCGTGATCCAAATGCCCCTCTACCTCCTTCCA -CAGTTCATGCCACATCTGGTCCTCAGGCACGCCCTGTTGCGCCAGATTCTGGCCCTCCAT -CTGTTGCCGGTGCGCGTCCTATGCCAGGGGCAGGCACGCAGTCTGTTCCCCCAATTGGCT -CGACAGCAGCTCCATGGGCTGGGATGAGAAGAGAAGGGGTCCCACCCTCCATGCCTAGTG -GTGCAACTCGGGCTCCCTCTGCTCTTTCAGATACTTCAAGGATGCCTCCAGGCCCCATGG -CACCCACCAGAGCGCCTTGGTCTGCTCAACCTCGGCCTTCACATTCACAGGATGGAAATG -CCGATGTGCACACCAAAGCTCCCTGGGCGAAATGATCACGGAGAAAAATAATACGATTAT -TAATGTACCCTATGGCCTTTTGTTCCTGCATCAATTTGTAGACACACATCAACCCAGCCC -GAAATTGGGCCCTAGGTGCTTTCCCTTTGCCACTCGTAATAGTCTGCAAATTTTGATCAA -CCTCGCTCGGGTGTCCTCTTTCTTGGATTCTTTCGTATGAGCTCTAGGCGGTTCCCTCTA -CTGCCTACGCTTGACCCAATTATATGAGGGGAATTTTTCTTCAGCAGGGCCTATCACATC -GACTTTGCTCGGTAACTAGCGGCGGTATAGCATTAAAGTCGGTCCAAGCATCAACTGAGC -ATTATACCCCAGTCGTAGTAGCATACCACCGCCGAGAACCCCTGAATTTATATTCCAATA -ACCTCGGTTTCATATCGTAGACTTGCAACACAAGCATTTCGCCCCCAGAGGTTGTGGTTC -GGCCAATACCGGACCGAATAGGCTGTCTAGGCGAATCACGGCGATAATTGCTGGACCACT -GCCAAGTAAAATCGGGCCGGAATCCACCCAATGCCAAGACCTCGGGATCGCTGCCTCATC -GCCCTGGGCATTTGGGGCGAATTGATCGTAATTTTCACCTCTTGTGTTCAGATAGTTATA -AGATCCATCGTAGCGTCATGAATTGCCCATCCCGGACCGATGACTTGCTGCAAGATGGGT -GGAATCAGAACCCGCGCTTTCTTTCTCCTGACCTAACTACCCGTCAAGATCTCAATGGCA -TCTCAAACACGCGCGAGAACAAAGATGATGAGTCTCCAGGTATTTTACGTGGAAGTAATT -GGAAAGTTACTCATCCGCTAGAAGAAAAGGACGCGGGGAGTCACCTTGGCGTGGGGTGCG -AGGCGTCCCTGACAATGTCAGGTATGAACTGCAAGGCAAATATTCTAATAATCAATTCTA -ACAACTGAACAGGACGCCTCGCTCCCCAAAATACTATTCATGAAATCGATCAAGGATTGT -CGAATGTTTTTCCATCATATCAGAGTATCAAAGGTTGGGCCTTGTGGCTCCTCTCTGTGC -TTCTTGTTTCGGCAATCATTTCCTTCGACAGTCTGAAGAGATATTTCACATCTAAACAAC -CCCTCATACTGGAAGCACCTGTCACCCAGCAGTATGCAGAGCCAGTCAAACGATCAAGTT -GTGCTCAGGGAGGTACCCGCGGTGCATACGACCTACCATTGCACGTTGCGGCCCTCTTCA -TCATCCTAGGCACTTCGAGTATTGCATGCGCATTCCCCATCCTTGCAACTCGATTCCCAC -GGATGCATATCCCACCCGCGTTCCTTTTTTTCGTTACTCATTTTGGAACGGGTGTCCTCA -TCGCCACAGCATTTGTGCATCTTCTTCCTACGGCGTTTACCTCGCTAGGAGACCCATGTC -TATCTAATTTCTGGACAAAGGATTACCCAGCTATGCCGGGTGCCATTGCCTTGGGTGGTA -TCTTCCTCGTTACAGTCATTGAAATGGTATTCAGTCCAGCCCAGAATATCTGCCGTGGGG -GAAACCGGAGTCCAGCCTCAAGACCAGCCTCCTGCCCTGAGGGTGCGATGGTAGCTCCAA -TTCCAACCATCGATGCTCCTGACTGTTCGGACCATTCCAGAGCGCCGAGCTCACAATCCG -TTGGAAATGATGGCCGATCTCATTTGCGAGACATGGGTCCGCTAATTGGAAGGTCCGCGA -GCATAAGCCGAGCTATCAATCGCATGGGCGAAGGCACTGAAGATATAGTTCGCGTTGCAT -CTGCACCTGATGTCCCAACTCATCACGAGAAGGATAATGGGACTATCCAGACAGACGTGG -AGCGTCATGAAGACAACTTCGTCTTGACCCCTGAGCAAAAGCAGAAAAAAGAGACCATGC -AGGTTTACCTTCTTGAGATGGGCATTCTTTTCCACAGTGTGTTTATCGGCATGTCACTCA -GTGTATCGGTCGGTAGTGAATTTGTGATTCTGCTGATTGCCATTGTATTCCATCGTAAGT -TTTCCTTATCCGTCTGTTGATCGGAGCAATGCATTTGACTAAACCGACTACAGAAACATT -TGAAGGTCTGGCGCTTGGATCCCGGATTGCCTCGCTTCCCTGGTCAGAAAAGCAAATCCA -GCCTTGGATCATGTCCCTTGCATACGGATGCACGTAAGTTAATTCCCCGCTTACAACGGA -CTCAACATTGCTGACAGAGGTCTAGAACCCCAATCGGGCAAGCTATTGGTCTCGCAACCC -ATACCCTCTACAGCCCCGACTCGGAGGTTGGTCTACTTGTCGTCGGCGTCATGAATGCCA -TGTCAGCTGGACTTCTAATTTTTGCCTCGCTCGTGGAATTGATGTCGGAAGACTTTCTCA -GTGACGAGAGCTGGCGCATTCTCCGTGGAAAGAGAAGAGTTTATGCCTGCATCTTGGTAT -TTTTGGGTGCTTTCTGTATGAGTATTGTCGGCGCCTGGGCCTAAAAACTCTGGGATTCAA -GTGTTCAAGAATATTTGGTGTGTCAAAGGTTGGACACTTGGATTTCTTCGTCAGTAGACC -CACAACTGGTCTAACTTTCGGGCATAAACGTGTATATAGAACGCTGATTGACGGCGTTAT -TATATTTCATTTTCCAGTATCTATATAGCATTAGGATATAGATCGGCAAAGCGGGTCTGT -TGTAACCCGCAGTGTCAAGCGCGTGAGAATACCATGAGTATCATCTCCCCGAGACAGTAT -CAATTTAAGAGAAGCCAGAGCAATACAAGGATATCCTGATGGATATATTCCACCATCTGA -GACAAGCCGAGGAAAGGGATTAAGAGTAACCTTCAAAGCGATATAGAAAAAGAGCAAAGA -AATGTAATCCTCCTAACTACTCCTAGAAGAGGATCGAATCATGACCAGTTTCACCATTGC -TGAGCTTCCCAATGCGTTATCACCCGTGGTCATTGTTCCACCCAGCAAATTAGATGACCT -CGTGTCTCTCACCCACTTGGCCACCCAGGGGGGATATTAGCAGACTATCCTGAGCTAGAG -TCAAGTAGACCACCTAGGTATCATTGAATCAAGCTATCCACTGCCTAGCATTCACCATTT -ACCACCCACCCCATCGGCCTCTGTTCTCCTCTCCCCAAGCTTCGTCATTGAAGCCATCCC -ATTTGCTACGCAGATCACTTGCTGTCCGGACAGAGCTATCGATCCGTTGGCCTCGGAAAT -GGAGATCTCCATAGCAGCTGATCATCCCAACAATATCACCATCGCCCAGGAACTCAATCC -AACCTCCCTGGTCGTCTTCGTAGCTCATTTCACCCTCACTGGTCTCCCGCCCACGCCACT -TAAACGGGACACTGTCAAAGGAAGGCTTCATAGGTCGTTCTGGCATCCACAATACACCCT -CAAACGCTCCGAACTCATATGCCCCCCATATCTCCGTCCCATTGAGGCGGAGAGATAAAG -TGAGTATTTCAAATGGTATAGCGTCCGGCCAGTTCCATTCGATGTCGGAGGACCGGACTT -TGTACGTTGCGTTGAGTCAGGCCCAGGGTTTTCTTCAGGGCTCGGGGCGGTGATGGTGGA -TCTGGGTTCGTGTCGGGCTCCCTGCGGTGGCCAAAATCCTCGCCCGCATGGAAGAGGGCA -TTGTAAGAATGTCCTCCGCCCTGATCAGGCAAGTTAGGTAAGTGTTCTTTTGTGAATTAG -GTAGAATATATTTACCGTTCTGCAGGCGGATTCGTACAAGGGAGGTGCATCGAGGTGCTC -AGGCTGCTCAGACCACCCGCTCGATTCGGAGTGACTGTTTTTGGACACATTCGCGTTGTC -TTGTGTCTTTTGCTATTCACGCGGCGTTTGGCGGTCTGCGGGTTATAAGGTGTCTCGTCG -TCCGAGGCCCTCTTTCTCTTGCCTGGGGCAGCCTTAGGCGTAGTATGCTTAGCAGCCGCG -GTTTTTATAGCTGTGGGTTTGGCAGCAGCCTTGGGCAATGCAGGTTTATCCATAGTTTTA -GATGCAGTAGTCTTGCTCTCAGTGGTCTTGGACGCAGTAGTCTTGGACGCAGTAGTCTTA -GAAACTGTAATAGTCTTGGTAGTAACAGTCTTCGTCGTCATAATCTTGGTATTGACCTGT -ACTTGGTTATTCGAATCCCGTTCCTTCCATTCCTTCTTCAGATTCTTTTCGATTTGTAGG -CTTTCCTTGGGAACACTCAGGCCGCCGTCTTGCACAGCTTTCAAGAGTCTCATCTTGGCA -GTCGCCTTGACCTTTGATAGTGGGAGGCCATAGTGAAGGAGCTGTGCTTCATACTAATGA -CCTGGGTGGTCAGGCGTGCTTTTGTCTGTAGTCGGTGTATCTTAAAGGGCTTTTATCTTG -GCCGGCGCTGCACAGCGGTGGATCTGTCCTCCAGACGTGGCGACATAGAATCTGTCGACT -CTAAAGGGGAACTGACCATTGGTGACAGGCGGCATGGTAAGCTTTTCTAAAGTAAGTGTA -GGTATGGGGAAGAAGATAGGGAGAATAGACGTGGGGCATGCCACGACTCCACTCCGATCT -TGCCGCTCCTTTCGATACACTCTAATTTTCCCCATTGTTATGTTCGATATTCTCTTGTTC -CAATTTGCCTAGCTGGGACTCAGCTGGCCCATATACAGGGCTACCTACCAACCGGTCATA -TATATGTAAAGCATATCTATGCAGATTTGGCATAAATTATTCAACTTCGTAGCCTCAACT -AGTTATGTACAGTCAACGATTATGATGTCTCATATTCGGAAATCAATTTACGAAAAGTCC -ACTGAGGGAAGCTCTCTCTGTTCCAGAGCTTTCTGGACACTGTACTGCATGGCCTCTATA -GCCTCTTCCCAGCTTATAGTTCCCTCGGGTCGGGACTCTAGTAGCTCTTTAGGCCTCTGC -TGCTGCGAGTATTCACCAACCAACCACATTAACCAGCGTACTACTGAAACAGCTTTGTCG -ACATCCTTCTCTTGCAATGTCACACTGGACAGATATTTACATAACGAAATGACGTCTTCA -CCATATGGGCCGCCAATTGCAAATACTTCATGCCATGAGCCAACAGCATTTCGCAGATCT -GGCAATCGGGACAGGTGCTGTGATGTAAATACGGGGCGCTTGGGGAGAGGGGGTAGAGGT -AAAAGCTTCTCTACGGCAATTGGTGCAGAAGGAGGAGCCTCCTGTGCCCGAGATGGAGGT -CTTTGCACAGGTTCAATACCTCGCGAGCGCTGTTGCAAACGTTTTTGTTCTTCAAGGAGT -TCTTGTCTGATATCTTCGGGTAAAGCAGCCAGAAATTCGGCTGAAGGTTCGGCTGGCTCT -TCGACGATTGGTGACGCTTGTCTAGACAAGGTGTCATCATCAAGACTATTAGATACGGCT -GCTGGGCGTGGAAGAACGAACCCAAAGCTTGATTGATTCAAAGGTTTGGGCTTAGGCTTG -TTTAAAATCGTTGCCGACTTGGGTGGGCGGCCACGTCTCTTTTTAGGTGGTGAGACCGGT -CTTCTCAGCTTGAGAGAGCTGGAGGTTGATGGACGTGACTCTGGTACTGCCGGCGCGGGG -GGCGGCGCAGCGATAATGGCTGGGTGTTTGTAATATCCCAGGACTTCTTCGCGGACATCT -TCAGGTAATGCAGCCAATGCATCTGGGTCCAGTTGGGACTGGGGGGGAAGGTCTGCTTGA -GCGGACTGTCGGCCTTCGCGCGGGGGAGGACAAGGAGACCCAGAGCGAGAATCCCGGCAC -GGCTTTGCTTGTGCAACAAGCCTTGACCTGATATCCGTGGGCAACTCAGCAAGGACTTTG -GGATCGGCTTGAGTGGGCATAATAAACTGGGACCCGGAGATATTGAGTGGCTTGTGGGCA -CTATCGCCGAGTACTGGGTCCGCTTGAATACGAACGGAGTCATCTTTCTGCGGACTCTCC -AGCTCGTCGGGGTCTACATCTGGAATGACCTTCTTGCGAGGTGGAGACCTATGAAAGTTG -AGTTGTCGCTGGCTGCTCTCGTGGCCTGAAGGACCTGAAGGGCCAGACTTGATTGGTTCA -AGTTTGGTCATTTGCACACCTAGACCTCTCAAGTCTCCAGGTGAAAACTTGAAACTTCTC -AACATCGATATTGCTTCCTTCCCAATGACCTCCGGTACATTGGTTGCAATGCCAAGAGCG -ACGCTCTTGTTGAATGTGTCACACTTTCCATGGCCGAGATGCTTGACCGGTTCCAGTGGA -GCGTCCATCGATCGGCGCATGACCTTTAATGTGAGCTGTTTTCCCTTCACCAAATTTTCC -ATCAATCTTCGATTCAGCTCGTCGCACAGAGACCGCACAAAGTCATCTGCTTGGTCCTGA -TTGACAAATCTAATACCCCAGTTGACCTCGGCCGAAACAGATTTCCGCATGACTTGATCG -CCGATTTCTGTTTGATCGATTCCACGGGTGTATTCCCACAATTTGATTCCAGTTTTGGGT -CCCAAAGCAGATATCAGTCTCTCTTTCGGGAGATTTCTGATGTCTTTGACGAACTTGACA -CCTAGCTCTTCGAGTTTCATCGCCATGCTATTGCCCACTCCAGGAAGATCACGAACGGTA -AGATCTCCGATAAAGTCTAGAACAGCATCTGGCTTTAATTGAAACTGCCCTGCAGGCTTG -GCTTTTCTGAGAGACACTTTCGCAAGCAAAATATTGTTGCCAATTCCGACAGAGACTGCG -CATCCGGTCTTTTCTTTAATTGAATCGCGTAAGTCTTGAGCGATTTCGTCCGCTTTGGCC -TGCTCGCGGTAAATTGACCCTTCCGATATAGCCTTTCCATCTGACCCGCCAGCTTCCACA -CATAAATTTGTGACATCAATCAAAGCCTCATCGATACTGACGCTCTGCACAGTACCATCA -ATGGCAAGTATCGACGTATAAAACTTGCGACTCGCATCTTCATACGCAGGGAAGTCATAA -GGCAGAACTTTGAGGTCGGGGCATAATTGCAAAGCACCTTTCATCCACATCCCATTCTTG -ACGCCTCGGGCACGTGCAGGATAATTACAACTAGCAATCTCGGAGCCTGATCCAGATCCA -TGAGCAATAGCAACTGGCTTTTCTTTGAGCTCGGGATGATTCAGAAGTGAGACAGCTGCA -AAGAATGAGTCGAAATCCACGTGTAGAATGTAACGCCTCGATCCAGGCACGGGTTTCTTT -TTCGATATCTGTGACTGTGTATTCTCCTTCATTGCGGCTTGCAGTTGGGCTTTCAAATCA -GCCTTCCAGGTTGATAAATGATGGAGGCGAGACTCCTTATAATACTGCTGCAGAAATTCC -GGATTGACAACAGAAGAATTATGCATCCGTGGGTCAGAAAGCAGCTGTGCGTTGTACTCT -TCGGAGGTCAAATTGGGCTTTTTTGCCCCAGTTTGTTCGAGAAGTGAGGTTGTTTCGAGG -TCAACAAGTGGATCTGGTGAAGTGGGCAATTGATTGGGATTGGGAATTTGATGTGACATT -ATAGCTTGTGCTGCATCACCGACTGGTGCCTCTGGTTGATCCTGAAGGCCATACGCTTCC -TCCTCTTCCGCTAGAGTGAAGCTAGGGAAATCACCATAGTCTGACTGGGATGGCTGGCTC -GGCGTGATTGGTGCGACCATCTGTGAAGCTTCGGACAGTGCAGACGGGATCGCAGGCTTG -GGTGAAGAGCCAAGCCCCTCTTCAGTGGTGTTGATATCTCGGATCTGAGAGTTATACCAG -CTAGTCCTTGATTGATCTTTGTAACCTGGGCGAGGGCTATTCGTCTGGCTGATAAACCGG -CCATCATCAAACTTGAGCACCCTCTGTGCTTGACCTTCATCTACTATTCTAAATTCATCC -CATGGCAGCAGACGTCCTGCTTTGATGCTGTCTGTCACCCACGCAGGTTTCACTATGCGG -TAGCGGCGAAACTCTTCACGCTTCTTGGGTGTCAGAGCACTGGCGATGATATGAGTGGCT -GCAGTCTTGCCATCTAAATATTGAAGAAAGCCACCCCCATGGCTGACTATCAGACGATGC -AGATCTTGCAGGGAAGGTTGAGTGTACCCATTGACATGGGCAACTACACCACGGAATATT -GGCGGGCAATCGCCAGCTGAAGCTCGAATTTCCGCATCAAGATTCTGTAACTTCATTTTT -TTGCGGCGCATGTAGTCTCCAAAACCACCAAAGCTAGAGGCCTCATACTCCTCCCCTCCT -TCATCGTGGAAGTCATCTAGAGACTGTCAGATAAGAGCGGAGACTCAAATTTGGACAGAA -CCTACGGTTCTCAATGCGTTTGCGCACTGCACTAGAGTTTATCTCATGACGGGAGCCCAT -TTTGGGCTACGATGACCATCATGACCAACAAGAAAGTACAGAGAGGTAAAAGAGAGAAGC -TTATTGCCCTTTCGCGCGAAGCTCGGCAATCATCACGTGGATCCAAGCTCATAGGAAGCG -GTCACCTATGGCAATAGATTAGCGGTGAACGATTCGTTGATATCATTGATTGATTTTTTT -TTCTCCCAATTTTTTGCCGCATTTGTTGCTTGGTAGCAtcttttcttttcttcccttttc -tttccttCATCCACATCTTTTTCCACGTGGTTCATTCTCTCTTGGATTTGATCTTTCTTC -CATCTCTCTAATTTACTCTAATAATTCACAATGTCTGCTGTTCAGCCTGTCGCAGTCTAT -GCGCTCCGGGTTCCTCCCGGTGCCATGGTTCAAGCAGTCCCCAACGCTGCTGCTTCGGTC -AGTTTCATTGTGATCAATTAGGTGATAGTTTGTGCTGACATTTAATCCAGTTCCGCATCA -GCATGGCCGCTATCGACCCCGATGAGGCTCCAGAGTTCGAGGATGACCAGGACTCGAGCA -AGCCTTCCCGCTCCACCCTGAAGATTGTTCGCGCTCCTCCCGGTCTTGACCTCgatgagg -atgacgaagatgatgaggactacagtgatgaggatgaagaggattctgatgaggaggaGT -CCAACGGTGGCCCTAGCGACAAGGAGAAGGCCCGCAAGCTTAAGGCCGCCGCTGCCCTCA -AGGACCTCGAGGACGCCATGgaggaagatgatgatgaggaggatgatgatgaggatTTCG -ACCTGAAGGCTGCTATCTCCAAGCTCATCAAGGGCAAGGGCCCTGCCCTTGACGATGAGG -ACTCCGAGTCCGAGGAAGGCTTGGAGCTCGATGAGAACGTTATCTGCACCCTCAGCCCCA -GCCAGGTAAGACCAACACTCCCTTGGGTTTCACAGCTAGTTGAATATTGACGTCTAAACC -CCCTGTAGAACTACCAACAGCCTCTGGATATCACTGTCTGCGAGGGTGAGCCCGTGTTCT -TCAAGGTCACCGGTAACCACACTGTCTTCCTCACTGGTAACTACGTCATTGGTCTCGATG -AGGGTcacgaccacgaccacgaccatgaccacgaccaTGATGATGAGGACGAGGATGACT -ACGATTTGTCTCCCGATGAGGATGAGCTTGACATGGAAGACCTCATCGCCATGCAGGATG -ACGATGAGAGTGATGACCTTGATGACATGGAGGACCCCAGAATCACCGAAATCGAGAGCG -AGGAGGAGGCTCCCAAGCTTGTTGAGTCGAAGAAGGGTAAGAACAAGCGCGCTGCCGAGG -ATGAGGTCACCCTCGATGATCTGATGGCCAAGGCCAACAAGGCCAAATCTACCAAGGCTG -AGGAGCCCGCCTTGACCAAGGCCCAGCAGAAGAAGCTCAAGAAGAACAACGGCGATGCCG -CCGCTGTTGAGCCTAAGGAGGCCAAGAAGGATGCCAAGGAGGACAAGAAGGAGACTAAGA -CTGACAAGAAGGTCCAGTTCGCCAAGAACCTTGAACAAGGCCCTACTCCCTCTGGAGACA -AGCCTACTGGCACTCTCGGAGTCAAGGAGATTCGTGGAGTCAAGATTGACGACAAGAAGC -TTGGAAAGGGTGTTGCTGCCAAGAGCGGTAACACTGTCGCCATGCGCTACATCGGCAAGC -TCGAGGACGGCAAGGTTTTCGACTCCAACAAGAAGGGCAAGCCTTTCACCTTCAAGCTCG -GAAAGGGTGAGGTCATCAAGGGTTGGGACATTGGTGTCGCCGGCATGGCTGTCGGTGGCG -AGCGTCGCATCTCCATCCCTCCTGCTCTCGCATACGGCAAGAAGGCCCTCCCCGGCATCC -CCGGCAACTCCAAGCTTATCTTTGACGTCAAGCTCCTCGAGATTAAGTAAACTGCTCGAG -ATTGCCATCCGTCTTTTCATCGGGGTGTTTGAGGGTACCGGTTCAGTTGCATCATTCCTG -TGATATCCATTGCTGTCTCTTTTTTCAGTTCACTCCCCCACTGCATCTGGCAGGGATGAG -ATGATCGCGAGGAATTTGTATCCCTGCATGGAAAAAGTGACCATTTATTGCTGTTTATTA -TATACTATGTACTGGTTTTTTTATGAATTGATCATATGATTGAGTCTTGTTTTTCAATTC -TTGTGTAAAAGAAGGCTTCACTGCCTTTTCTCTTCAAAACAACAATAATAACATAGGGCT -GTATACTTCATCTCCTTGCCGCAACAAATTCCCCGATACGGAACCATGCTAATTAAGAGA -AATAAGGCTGCCTGTTGCATGCTCATCTATTTTTCCCTAAACACAACATGGGCGTTCTTT -GTTCCATATAGTTTTGGATATAAGTCAACGGGTCCCCATTTGCCTTAAGGAGGCGATTGC -TAATCAACGTCATTGCCTCCAACCCTCTAAGGCTCCTAAATCGATTAATCACACCTCAAG -TATTCTCTCTTAAAACATTGACTTGCCACATTTCTACATGATTCCCTTTCTGTACTTCTT -TGTCCAATAAAATAATGGCCATCTAACTAATTGTGATCCTCCCCGCAGATAAGGCCATGA -CTCGTGACTGTTCTGGCGCTTATCGATTCTATCAGCTTCAATCCTAACGTCTGCGTTGTA -CAACATTTTATATCTCTCTAGGCGATTATCAGTTTGTATCGCGACATGGAAAACGGCGAC -CGCATGTTTTGCCACGCCTGTGGCGGAGTTTGGCTCAAGGACGGCGGTTTGACGTGCCCG -CATTGTGAATCTGAGTTCACAGAAATAGTACAGTGAATGCCTTCTCTTTCAGTTTATTCC -ATCCTAACAACCTTGTTTCCTCTAGATTGAAATTCCCCCAGAGCAGCCTTCAGAAGCTTC -TCCAAATTCCCCTTCACACCGTGCCGATTCCTCGTCGCCTTCACGCGTCAATCCGTGGAT -AGACCACAACCCATGGGAACGAGACACACAGGAACGACATCGCCCTGGGTTTTTCGCAGG -CGGCGCTCCTCGCCCAGGCGACACTCCTCTATATTCCTCCTTGCATACATATAGATCTCC -CGACGGACGTTTCTCATTCAGCAGCGCGACGCTTGATGGTGGAATACCTGGTGGTCAGCG -CAACAATGACCCAAACCCTTTTCCAACGATGATGCAAAGCTTTGACACAATGCTCCAAAC -TTTAATGGAACCAAACCCTCGTGGATACAGGGGATTTGGGGAGGATCCCTTCCATGCTCA -ATCATCTACCTCTCCCGACTGGCTCGAGGACGACCACCTCACGGGCCATCACCCAGGCCT -ATCTCCCCGCAATACAGATGCACCGCAACCTAATAACCGCACTCCAACAGACTTAGCTCA -GTATGCAATTGCTCTGCTCAATGTGACTATTTGTAGACTAATCAGAATCTAGACTTATGG -ACGCCATTCGTGCTGACATTGGGTTACAAACTACCCCACGCGCACGTGGAAATCGGGGCC -CGACAGGCCCACATGCTCTTTCCATACTGTCAGCCATCCTCAACATGAGTAGAAGTGAGG -ATGCAGTATATTCGCAAGAGGAGCTTGACCGAGTCATCACACAACTGGTTGAGAATACCG -GAGGGACCAGCACTGCGGCACCGCCAGCATCTGATGCTGCTGTCCGAGCTTTGCCGAGGA -GAAAGGTCAATGAAGAGATGATGGGGTCGGAAGGGAAAGCAGAGTGCTCGATTTGTATGG -AGAATGTCGAACTCGGCTTGGAAGTCACTGTGCTACCATGCACGCACTGGTTTCATTTCA -ACTGCATTCATGCCTGGCTGACCCAGCATGACACTTGTCCCCATTGCCGGCGCAGCATCA -ATACAAACACAGCGTAAGTCTATCTGAGTTGTGTGGACAAGAACTTTCAGCTGATGGCAT -ATTACACAGGAGCGGAGAAGGCACCTGTGAGAATCCGGTAGTGATCCAGGATAGCCCCGA -GCAGCCTAGGTCCCAACGACGTCACAGTTCTCCTCGCACTGTGCGCAGCGGCCGATCCTC -ATTATCTTCATTGCCGAGCCTAATTTCTCCACGGGTTTCACCCCGTCGGTCCCCTACTCC -AGAGGGAGGGCAACCCAGCAACCGACGGCCTAGCCGAGGTGAGGGTAGCAGTGGAGGGGG -GATCACGAATTGGTTTACGAACCGATTTGGAGGCAGTGCATAAAATAATGGATACATCAG -TTATAATTTTCACGCAGCGGGCCTCTTTCTCCCCTTGATTTTCGGGATGTGTTTATAATG -CTTCATTTGTCAGGTTTAGGGCTTCGACATGTTCACTACTTTTTTTCTTCAGGCAATTTT -TCTATCCCTCCATACCCCCTTCTTTGAGCGACTTTCCCTCCGCGCGACAGATGAATGAAA -CTTGTCTGGGTTTTCCCCAAGAGTCCGTTTTGTAAACTTGTTTCCTCCTCCATGCCATGT -ACATACTCCAGCTCACACATTGTCGTCCTGTCTTTGGATCGACCAGCGCAATCCATAGGG -ATATCCTCTTCCGTGACAAATTGGAAGGATTTATCTCATCTTCCAAGACAGTACAAGGCT -GCAACATTGAATCGGCCTCTCCTTGCATCACACCTGAAATATCATTGTTGTTCCAGTCTG -TTCATGATCCATTGGTGCCATTTCTGTGTAGAGGCTTACTATTTCGTCCTTAGGGTCGAC -GTTGTAATGGCGACATCTATCACCATGTACGACGTATGATTGATGGTCTGAAATTACGGA -ATCGTAACCATTTCATCCATCGTTTGCAGCTCAGCTGCAAATAACTTCCTTCATAGATTA -CTAAATGGATGCCACTTGCATCAAATTGGGCTTGTGGTTTAGTGGTATAATACTCCCTTA -GCATGGGAGTGGTCCAGGGTTCGATTCCCTGCAAGTCCATTTTTTTTGTTCTTTACTTTT -ATGCACAATTAAATCTGCTCTATGAGAACTATTTTTTTATCTCCTTTTTCTCCGACTGTC -ATATTATGGTTAGGTTTTTCTTGCTCCCTTTTTTATTGTGGATTGGGTATCTCGTCCAAG -GATGTCCTTGGCTTTCGTGTTTACTAGCCTCCGGAGTACATAGTATACTCAATAACAGCC -CCCGTCTACTTCGTGCGTTCGATGTTATTTAATTGATTCTATGTGTTGTGCACGGTTTGC -TATATATTTTTCTGATTGTGTACACCTTTTTAGTGCATGGTTTTTTTTTAACCTGATGTT -GTGGCTGGTTATATATTCCGACTATGGAGTAGTGCCGGTCAAAATATAGAATAAAATCTT -CGCTAGTTTGCGCTTGTCCTATCTAATTATCTTTAGAGGTTACAATCATTCTACAATGAA -ATGCAATTTTCTCCCATAGAGATCCACCCTGAGCGTCGCCATCCAGTGTATAGCATGTAT -TGTTAAAGTTCGTCCAACGAGGGGCCAGATCAAGAGAAAAGCTGGATCGAAGCGCTTGGA -AATCTCCATCTGGATAATCCCTTGCGAAACCCTGAAGTCAGGTCAAAGAAAGTAGATAAG -CCTTGGATACTGATTTCCCCAAAAGTTCAGACTCTTCAGCACCTAATGAAAAGGTTTATG -TTTCGCAAAGAAAGCGGATTCTTCGGTTAACCCCGAGGTTATTCTTATTAATTAAATATT -CAATAATGAGTCAAAATTGATACTTCTCATCTAGACTTTTTTTTATCAGAAGCCAAATTA -TATGGAAATTTAGACTCCAAACGTTCACATGAATGCCGAGAAGATCACCATGCTATCCAC -CGTTAGCTCGAGCATACATGCTAGTTCGGCACCTAGGGCGAAAACCACAGGTATAATTCA -TGGGCTCGAAATGATACTTCGAATAATTGGAGCAGGATTTATTGGAGACACCCAGCGCAC -GCGTATGTTACAAATCCTGACAGCCCCATTGGTATCATAGCAGGTCAGAATGGCAATTAC -CGCATAGAGCGGGAGATAGCCGTTGTTACCGTTTGATACTACAGGTTGAGCATGCCCGGA -AAAGTATCGTAAAAGACTGATTTTCGCCGTTTCTTCTCCTTTCTTTTTTCATCTAGGCCC -CCACAAGGAATCGTACCCCAGACCGTTGCATAATTTGACAGGTTGATCTGCTATAAGTGA -TGAGATCTGCATCGTGAAACTGACCGTCTGCGGTGGCAACTGCTGTCATATGATCTCGGA -TCCGAGATGTATGGCGAACTGTCCAAGACTCTTTGAAGGTGACTTGTAACATAAAATGGT -GGCGTACATGAATCAAGTTTTATGAGCAGAGATCGGGATCACGACTGTCTTACTAGTATC -CCACAATGCCGTATGTTAGCCGGCCCTCTGATGAACCACATTTCTTTTATATGCCTGAGA -TTCTGCTTTTAGTAAAGATCACCCCGAATAAGCGAATCAATGTATGATTATATGCCTAGG -TGGTTTGTCTGCTTTGCTCTTTAAAGGTTTGTGGCTTCGGCAGACTCGTATTGATAAGTA -AATGTCTCTACTATATCTTACATGACACGATGATAAGCCTCACAAGGTCCAATCGCACAA -ACCAAGACCTAAATCCCGACCCTTCTCAAGCCTGGCGTATATACTCCGTAATATCCAGGT -GTTTGGAAGGTGAGATGAGATCGCCCATGCGGGATCCAGGCATGTGATGCTAAGGTGTTT -TCAAGCAAAAGCAAGAATGGACAGTGTGATTAAGATAAGATTTCCATTCAAAATACAGCC -AGCCAATTTGAAAGCCATCCAATATCCCAAACCTGCTTTTGCTTTGCTTGACACTAGAAC -CCGTTAAGAGCAAAACCGTTATTCAAGGACTTCATACCTCCGCCCCCAGGAGGGCTGGCT -TGTCATAAACCACGATCGTATTGGAAATATGACTTTATGCCGTCGAGATGCTTGGCTTTC -TCGTTTTTCGCGCTGCATGTCCATCCAGCCAGTTGGGGTGATGATGTGAGGTGGCGGCAG -GGGGTGATTGGAGGTGGGTATTATCTCGTAATCCTTCTCGACCTTTGCCCGCCCCATGAC -TGGCAATTCCGAAGAGGATCTATCGACAGTCGTCGAGTGTGATATGGGTTTCTCATCAAG -ATCTGTAGATGGGCATGATATTGAAAATGCGACAGTGCCTGGGCTGGGCCTTGAATGGGC -CCTTATGTGGTCGGTATCGGTATCAGAACTCAAAAACTCAGAAGTGTCATTGCCTTGGAG -AGTCTGGTCTGTGTGAGAGGCCTGTGTGTGTTCCTCTGCCGCCTCCATTTCCATTGATAA -GTCGAACGTCCTACTCATTGTTGTCATCCCCGATAGTGTAAAGTTTTGACAGAAGGCAGC -GTATGATTGTTGAAGACGCATCTCTCTCGCTGCTCCCACGTCAAGAGCAGGGCATTCCTC -CTCAATCTCTTCATAAATATGGTCGTATATCTTAATATCATGAGCTCGGTGAGCCGGCGA -GTTTAGGTTGTGTGAGTCGATAATTGTTGGGTTGTTGAGGCTTTTGAGATTCATTTCGCT -CAGCTGATGTTCTTGACCTCCGGTTCTGAGGATTGTTTGAGATATTCTCTTCGACACCCT -TCTTGTGAGCTGAGAAACTGTGCTTGTTCGACGGAGGAAGTTTCGTGGTTGCTCTGAATC -TTGAGTAGTGAAGGTATCACTTGAAGCAAAGGAAGCATTGTCATTAACAATGTGGACATC -GGGCATTTTGATATGAATAGGCTGATTTAAATGGTGATCGTGAGagaagaagaagaagaa -gaagaagaagaagaaggagaagaagaagaagaagaagaagaagaagaagaagaagaTTGA -AGGTCAGGCTGGGAATATGTAATTTCTTCAAACGCAGCCAACGGCAATAGTTTGATTGAG -CAGGGGTCAGATGAAAGGAAGTTGCAATTACAATATCCCATATACCGAGCAAGAGATAGC -GGTGAATCCTCACGAAGTAGATGGGGGGGAAGGATCTGGACTTATACTGCAGGTTTGATT -ACAGGTATAAGAAGATAAGAAGGCCTAGATGCTCAAGCCCACAGGTGCTCTAAACATTCA -ACACAATTCACTACCGGTGGGACAGATCTATTAAAGTATTCAGGGAGATTTGGCTAACTT -AGACGCAAATCTTAATAGCATGTGCCTTGCCTAGGCAAGATACAAGATCACAGTTCGCCT -GTTAGCACAGGCTACCAGGGCGTCCCGCTATAACAAGCCACCATGCAGACCGAATGTGGA -AGGACGTCAGAGGGCATTGCCCATATGAGACTAGGTGTGAACGTGCGATAAGGACCATGG -GAAACTCATACTTTGTCGATTCTAGTTCACTAAGAGGAGGTTATATTGTCTACTCCGTAC -CTACCTGAAGGCTTTTCAAGCGTAAACAGGGATCCATTTTACTAAATTATCCGAAGTACA -AGACAAAGGGGCAGTGAGACATTTCTAGTCACCCCGCCATTGGCCCCCAACGCTACTTAG -TAGCCTAGTCTCGGACGTTGCGTCCAATGCAGGAGATCACTGGATTACAGAGGCTCCCAT -TGGTTATGGTTCAGATGAATAGCATGTCCGTGCGGTCGTATTCGACCCTTATAGTCATCT -GTCTTTGTCCAAACCTGGTCAATCGTTAAGAGTTTAATTGAAAAAGGCCAAATCATATCA -TGAACCTATGTTTTGATTATTTCAAAAGCCCTTCAAGGCAGGTATAGATGTACCAGAAAG -CAAATAAAAGTACACTATTATATTATCCAGACACGCCTTCGCGCCCGAGCCCCCAGCGAC -CGACCTCACGCATGGCCTCATCACCACCTTCAGCCAGCCGTCCGTTGGTATTGGAGGGGT -TGCTGCTCCCTAGGACGCCAGCTACACATTGCCAGCCATTGCGGCCACCACCCCAGCGGA -GGACTTCGTTCTTGTATCGCTCGCCGGCGGCGCGATCAGTTGCCGCCCGACCAGACAGGA -ATACATCCTGCCAGAGTTTGTTCGCGATCGCTCGGTCAAAGATGTAGCTGTAGTACGTGG -CACCGTAGCCATACAAGTGACCGAAGAAGCCTTGCCAAGAGGTACGTGGCTGGACATTAT -TTGGGTCGGGCAGACTGGAATGCTCAGAAAAGACTCGATGATAGATATCGGTGGAATCAA -TGCCAGCTGAGCTAATTTCAGCTGGGATCGAGTGATATGCCTGATCAACGAGCGCCATGA -GGATCTGTGCTTCATTCTCAACGGCACCATAGATGGACCCATGCGCAGTGCGGTCTAGCT -CCATGCTTTGCATCATGCTCTCAGATAGAGGCTCATCGGTCTTCCAGTGTCGCGCGTAGA -GTCTCAGTACCTCTGGCGCAGTCGCAAAACTCTCCATCAATACAGAGGGAAGCTCGGCGA -AGTCCGTGGCGCATCGAGTACCGGAAATGGACTGTAGAGGCGTCTGGCCCAGAATAGAGT -GGACAGCGTGGCCCATCTCGTGGAATAATGTGCGAACACTCTGCTCACTCAAGAGAGTTG -GCTGACCGGTGCCATTGGCCTGAGGTTCCTGGAAATCACAAACGAGGGCAATGGTTGGGA -GTTGACGCAAAGATTTAGTCTGCGGATCTACACCAGTGGCCATGCCGTCATTTGGGTGTG -CAGATTCGTTCATAGCGGCACACTCAGCGACTTCCTCTGCAGATATTTCGCGCGAGCAGC -GCAGAGTGAAGTGAGCCGGATTCGGGTGTTTGCTCGGTCGTGTAAATAAGTCACAGTAGA -CGACTGCAATATGATTCTCTGATTCATCCACGACATCCAATCGACGAACGTCGGGATGCC -AAGTCTCGCCGGGGGATGCTTCATGAGGGACTAGACGCACACCATACAAACGGTCAAACA -GTCGAGAGAGACCTTGCATTACTGTTCCCAAAGAGAAGAACTCATTGATGACACCCATGT -GACGAGAGCGTCGAGCTTGGGAATACGCTAGAACACGCTTGTGAACGTAGTACGCGTGGT -CCCAGGGCTGCAGGGAGGAGCTTGCCTTCAACTCCTGCAGCTTGGAAAGCTCTTCGCGGA -CTGGACCCTGGTTGCTTGATATAAGTGAAGTCAAAAAGTTCGATACTGCTTCCGGTGTCT -TGGCCATTTTGTCGCCGAGAGTCATGTGTGCAAAGCTATTGTAGCCTGTCAGTTGTGCCA -ACTCTGCCCTTTTCATGAGAAGTTCCTCCAATCTTTGAATTTGGCGCTTGCTGGATGTTC -GAGTGGCGAGATAGATGTCTTTGCGAACTTGTTCATCATGCACCGAGCGCAGAGCAAGTC -GAGGGACTATTCCCATCGTTGGCACTGGTGCTTTCTTATTCCACCGCTTGATTTTGGAGA -TAACCATAGGATCCAATCCTTGGAGGCTGTTGGCACCAAAGACTACCTGGGACCTTGCGG -GCTCTGCGCCATTCATGAAATCGGAGCCTAACTGGCTCACATCATTCGAGAGATTCACAA -ACCGCTGCCTGACGTTGGGCGGCATATGAATTGCAGAGTTCGAGAAATCCTTGTTGAGGA -TCTGTGCCACGATCCGTTCTGCCTCAGTCCAGTATGATGTCACATCGGGGTTATTCAAGG -CCTTATTGAGCTGTTCATGAAGCCCTGTAGTTGTGTTCAAAACGTTCATATATTCGAACA -TGAGAGCCCACGCCTGCGTGGCTGCTTCTTGTATCTGCGGATCAGGGTGAATGGTTCGAA -TGAAATCGGACAGATCGATGACACGACACAGGAGATCGCTCAGTTGGTCAAGGTCTCGTG -CCAGGGCCCTGTATTCTTCAACCGTTGAGGCGGCAAGCACCTTGGCTACTATAGCCTGGC -ATCTTTGCAGCGAGATATTTGCGAATGTTCTAAAGCCCTCGGGGCTTGTCAGGTATTGAT -TTTGGACCAATCCGGCTCGCTTTGAGACTGTGGTTGGGCGCTGCTGGGAGAATTCATTCC -AGAACGGTTGGGAGTCAAAAACACGGCGTAGGGTATGGTCATCGGTCCTCTTGCTTGGTG -CACTGTTATCTGCTGAAACGTACTCAAACGATGTTGACGGAGAAGATGCGGCGCTCTCGA -AGGCTCGATATTGAGGCCGCAGATTTAGGCATTTGCGACATGTCCATGTTCGTCGCAAAG -GGCGAAGCATTGTAGAGATTGTATCAAAAGCAGATTAAAACCATCGGAAATTGTGAATCC -ACAAATCCCCTATTCAAAGAAACATAGAGTCAAGGAGAACGGGCAATGACGGAGTTTTCT -GGAGGTTCCGCCGAGCCGAAGAATTCCCGTCTTGCTCCAACACGACTTTGCTTCATCCTC -ACTACCAGCTGCCCCCATTCTCCTCCCCCCCACACCCTCAGGATGTCATTCCGGCCGATG -TTGCAACAGGTATGTCTTTGCATATTTGCATGCTCTCAATTTGGATGCAATTGATTCCCC -AGTGTCCCGTTTCCATGCTGACTCGTGCACAGCGGGCTGTGGCTCCGATCGCAGCCACTC -TCCTTGCGGGTGGTATTGCTCTCTACCCTAAGAAGACAGCATTCGCAGAAGAACCCCGAG -ACCTTGTATGGCCACATCTATTCCTAGCTTTATTGAATGTCTGAGAGACCTGGTCTGACA -CCTGAATGCTTCCCTAATATAGCGGAAACCTATCTACGATGACTTCCCAGCCGACATCCC -CGAACAATCCAAGCCCTCCCAGGTTGCTGCGCCTGTCACCTCCAAGCTGATCTCTGCGCC -AGCTGCCCAACCAGCTCCCTCTTCGCCCACCCCGACAGACATCCTGACCGCTCAGGTCCG -ACAAGCTCGCCTCTTCCTCTACTCACACTCCCTTGCCGCAGAGCAGGGCTTCAACAACTT -CCTGTCTCGGGCCCTCAACATCGAAAACTGCTTCACCAATACCGTAGCTTCGCTTGCACC -TTCTGCCGAGTCCGGCGAGCGTCTCCTGCCCGGCGGTGTCTACGTCGTCGTGGCCGCCAT -GGCTGGCTCGATTGTCTCGCGTAATCGTGGCATCCTCCTCCGCACTGCATCACCGCTTGC -TTTCGGTACTGCTGCTGCCTACACCCTCCTGCCCGTGACAATGCGAAATGTCGGAGACTT -GGTTTGGGAGTACGAAAAGAAGGTCCCCGCCCTTGCGGAGAAGCACTCGGCTGCCCGTGA -GCGTGCGGAGCACATCTGGTATACCGGCATTGCACACAGTGCAATGGCGCGCCAGATGAT -GGAGGAGAAGATTGGTGACACCCGGAAGAAGCTGGAGCAGCTGGTGAGCAAGGGCCACTA -GGTATAGCCTGGCTGTTTGGGGCTATAGCACGGAATCATTTGTTGCACATGTACGCTTAG -AACTCGTAGATCCGAAGACATAACCGGATTGATTTCTTTTGTTTCAATCGTTTAGTGATA -AAGTAGGTGGCCTGACAAGAGCACCTATGAGGGTACGCTTGGACATGGGAATAGGTAGCT -TGCGATGATCGATATAATTTGATTTCGTCATACTCTACATTAAATTGATTTCTGATTGCT -CTAAGCTTTTCATTCTTCCTGGGCAGGCCCTGGTGATCCTTCAGGCGCAGGGCTCCCTGA -GTGAGAGGACGAGACTCCTGCCTTCGGCGGCGGGGGTAGTGGAGTTCCATGCCGTGACGG -CGCATAGTCTTGCGCCCGACTTTTCTCTTCGCCCTCGGCGAAATAATCATCAAAGGCCCT -GTTAGGCGATGCAGCTGGCCGTGATTTCCCCGCACGACGGCTAGCGGGTGATCTGCTCTC -TAACGGCGAAGCAACTCTTGCTTGAGCGGAGGCTAGGGGCGGGGGCGCTGTGATGTTTGC -TGGAACCAGGAAGTTACTTGGTGGAGCTGCAATTCCCGGGACAGCGTTTGCAGGACCGCC -CACCGGGGCAGCAGTGCCATCATCGCGTGTCTCGAATACAACTTCCCCTGGTCCCTCAAC -CTCTTCTCCAAGTCCAATGGGTAGCCTGTGAAGAGCACCAAATCCTAGCTCTGCACCAAG -GGCGCCTTCGCCACCCCAGCCTCTGGTAGGCACAAGCTCGACCTCGCGAACCACATCGAA -TTCACTATTGTAAACCCACAGAACGAGCGTACGATTTAGATGATCTTCAACCAACTCCCC -GAGTGCCGACTCCCCTCTTAATGTCCCGCTGGGGGTTCCAATGATATAGTCTGAGTAAGG -TAGCAGCCCGGCTCGGTATGCGGGGCTGAGAGGTGACGGAATTGCAATAACATGCCAGAT -GTTTTGTGTAGAAGAGAGGGGAGCAAGCTGCAGAGCGAGGCCGAGGGAAGGGTTCGTTGG -CGGTATAGGATGAGTGACTGTGTGTGTTCGTTGACCCTGATAGGATATAAATTTAGCTGG -AATTAGATGTATCATTTTAGCACGGTTACCTTCGCACTCCACACTTCAAGTGTCAAGCTT -CCCCCGGCGCAATTTCGACATTCTGTCGCAAAGAGGTTCGGGTCTGGATCTTCCTGTACG -TCAGTGTCAGTGATGCTATGTTACGGGCGATCAAAATGATCCTCTATACTCACAATTGTA -TGACCGTTGATGCCCACAATAAAGTCAAACCATGGCTCTAGAGGCAGTTCTGGGTCTTTG -TTGCGGAGGACCTGGAAACCAAATGCATTGTCACTGGGGCCGTTGCGAGGCTGCTGGCCA -GGTTCCCCGTCTAGCCGACCGATGAAGCGATTTAATGCGCCAAACATAATGATCTGCTAT -AATTCAGATTGCGAGAGGTGATCGATTGATGAATAAGGAAGCTGTGGTCAGGTGTGTTTA -CCTCAGGGCGAGCTGGCCAGATACAAATGCAAACCATGCAAACATAATCCATCTTTATCA -GTTATCGATGAATCGATCATCTCGCTTTATCGCTGCACTGCATCATGGCAGACGAAGACG -CGCCTCCAGAGGTGCATCACTACAGCAGCTTGCACGAGGTGCCCTGGGACATCCAGAAGT -AAGTGTCATCATCATTGAAAAATACCAATGCTAACTCAATTACCAGCTACTGGGCACAAC -GCTACCGGATATTTTCCAAATACGATGATGGTGTATGGTTGACGGATGATGCTTGGTTTG -GCGTCACACCAGAACCAGTCGCCAAGTTCGTGACTCTTCCTATAACACAGCCTTCCTACT -AACGTTTGCTTAGCAAAATAGCTGCACAGATTGCCAAGTCAGCGCCTGCTGGGCGCAGTA -TTCTCGTGGATGCCTTCGCTGGCGCAGGCGGAAACACCATTGCATTTGCCTTGACCGGAA -AATGGAAACGAATCTACGCGATTGAAAAAAATCCTGCTGTGCTAAAGTGCGCAAAGCATA -ATGCGAAAATTTATGGCGTGGAGGACAAGATCACATGGTTTGAAGGCGATTGTTTCGAGA -TTATCAAGAACCAACTCAAAGAATTGGCGCCGTACAGTGTCGTCTTTGCCAGTCCACCGT -GGGGAGGTAAGTGCCACCTAATCTTTGCCCTCGTGATATGAACTAAATGTAGCAGGTCCG -GGATATCGTTCAGATCAGGTATTCAACCTGAAGACTATGGAGCCATATTCCCTCCAAAGA -TTGTATGATGAATACTCAGTATTCAGCAAGCACATGGTTCTTTACATCCCAAGGACCTCT -GATCTGAAACAGATTGCAAAGCTAGTCCCTGAAGGCGAGAAGGCTACAGTAATGCACTAT -TGCATGGAGGGTGCAAGCAAGGCATTGTGTATTTTCTATGGCGATTTCAATGTGTCATGA -CTTATGCTTGCTATCGATTTGACAATAATCTCATGAAATGTATTTTTACTTCCAACAATG -TATCATTATTTCTTATTGTTCTCGTTGCCTTTATTGAAGAGATCCTATTGAACCCTCAAG -TATTGAACTTCAGGCATTAAGCTGATTACCTTATCAGGCTAGCGTGGCCGTCCCTTGCGC -ATTATCTTGGAAACCCGTTTTCTCTCTTCGACCACCTGTCCGACCACGCCCCTCCTCTTA -TCCTCTCCTCTGCGCGACTAAAGCCACGGCGAAAATAACTTCCAAATCGCAAACTCGCAT -GATACCCAGATGTGCATATTCTGAATCGCGTCGTCGCCTTGTCTGGCTCGCGCCTACATT -CTTAGCCTACGATTTCTCTCACCAACTCATTGAATCTAACACTTCCTTCATTTACAACAC -TCTTCTCGCCTTAAGAAAAAGCGCAGTTGCCTATCATGCCTCACGCAACACCGAAGGCCC -CGGAGGGGGAAGATACTGATTTCGACGATGTGATGCGACAACTGAACAGCTATGAAGATA -ACGGCCCCTCTCTAGATTTTTTGTCGCGGGATCTGGAAGTTGGTGAAAAAGCCGATGATG -CAATCGATTATGAGGATTTTGACGACGATGAACTACCGGAGGAAGAGGATGTCGGACGCC -CAGCTGCAGCCCTGCCTGCAGTGGGTGACAATCAAGATCCTTTCGCCAACCTGGGCTCGG -ACGACGCCACTCTTTTCGGAAACGGCGATGATCTATTCGGTGACCAGGTAGAGGGGACTC -AGACTCACGATGACAATCTTGATGATCTTTTTGGCGAAGGTTCTTTTTCGCCGCCTCCTG -CTGGACAGGATGATTCCACCCGTGGCCTATTCGAAGACGAAGAAATGCCATTAACAGATG -TGCCTGTTGACCTACCACCTATCCCGTCCGCTCCGGAACCGCAACCAGAGCCTATGATGG -AGGAGGATGAATTCCCGGATGATGACAGTATATTGTCTGAAGACATGAATCCCGTGGAAC -TCCGCGCTTGGAAACTTCAGCAGCAGTTGTTTGCCATGTCAGGTGTTGAAAACCCACCTG -CGCCGCCTGAGAACCACGAAGAGCTTTTACATTCCTTGTTCCCGTCGTTCGACCGCAACA -CTCTTCCACGTTGGCTCGAGTTGATTCCCCACAAGAAGGCTATTTTCATTGGGAAACAGC -CGACCAAACCACCCAAGCCTGTCTTGCCCACGAAAGTGAACATTGAGCTGGCGGCTGATC -ATGAACGTACATTCAGAACCGGTCAACCCTTGAAGCGTGGCTTAGACCATGAGAGTCTTG -GATTGGTGATGATTACGCAATCGGACCGCGAAGACGAGCGAGGAGAGACAGAGGAAGATG -CAAAGGAAGACATTGATCTGGATGATATGGACGGAGATGAAGTGCTGCCGGGTGGTTTCA -CCATGCAGGATCTTCGAACGATTTGCGTGGATTGGGATGTTAAGGATGATGTTTCCAACG -TTGATCTAGAAGAGATGCCATTAGCAAAAAGAAAAGATGGCAACTTTGATGAGGATGAAG -CCGATTGGCTCATGGAAACTGAGCTTCCGACCAAGAAGCGAAAGACCGGCCCGACTCCAA -TGGATGTCATTGCTCTATCACATATTGATGTCCCATTATTGGATGACCCAGAACAAGCGA -CAATGAAAGTCGCAAAAAGAGTCACAATTGACCTCAATGATCCAAACATTCTCGTTGACG -AATTAAGAGTGGATGCCGTCGCAAATAAGCCGAAACACGCTGCGCCTCGGACCAGGGATG -AAGTGGACCTGAATCTGACGCGCCGTCTCACTCAGAGATACAACATTTCGAATGATCAAG -CATATGACATGCTCAAACAGAATCATCAGAACAAGATCCGCAGTACTCTCGGTAATGTGA -CACTTGAGCACGCCCTGCCAGCCCTACGTTTGCAGTGGCCCTACTACAAGACTGAGCTGG -CCAAGGCTGAAGCTAGGTCTTTCCACCGACCTGCTATGGCCTTCCGACCTGGGCAAACGG -TTTGGTTCAAGAACCCCGTTCAGCTTAGACGCAAGCACTTTCGTGGTAAAGATGCGAAAA -CTGTCTATGATTCTACAAAGGCATTGTCTATGGGAGACAACTCCAATGTCTTGCTGGTAG -AGTACTCCGAAGAGCTGCCTATGACGCTAGCCAACTTCGGCATGTGCAACCGTTTCATCA -ATTATTACCGACGAAAGAACATTGATGATCCCACTCGCCCAAGGGCAGATATTGGCGAAA -CAGTCGTTCTCTTACCCCAAGACAAAAGTCCCTACTCAATTTTTGGCCATGTTGACCCTG -GCGAAATCTCCCCCGCAATCTCAAACTCCATGTACCGAGCACCGCTATTCCCGCACCAGC -CGAAGTCAACGGACTTCCTCGTGGTCCGCAACACTACTGGAGTTGGCGGAAGTAACTATT -ACCTGCGCAATATCGAAAATTTCTACGTTGCCGGACAACAATTCCCATCTGTCGACATTC -CCGGACCGCATTCGCGCAAGGTGACAACTGTCGCGAAAAATCGCATGAAGATGCTAGTGT -ATCGTTTATTGAAAAAGAGCCCGGATGAGCGACTCGCCATCAGCGATGTTACAGCTCACA -TACCGAACACTACCGATATGCAGAACCGACAGAAGGTCAAGGATTTCCTTCAACATGACA -AAGATACTAAATACTGGAAGCCACTGGATCCAGTTCTCCCAGACCAAGATACAATCCGCT -CGTGGGTCCAGCCCGAAGATGTTTGTCTTCTTGAATCAATGCAGATTGGGCAACAGCATC -TCCACGACACCGGCTATGGCAACGATGCCGAGACTGGCGGCGAAAACGAAGAAGATGAAG -AATTCGAGAGCTTTGAACAGCAGATGGCTCCCTGGAAGGCAACTCGCAACTTCTTGTTGG -CCTCCCAGGGCAAGGCTATGCTGAAACTTCATGGCGAAGGAGATCCCACCGGCCGTGGCG -AGGGCTATAACTTCATCAAAACAAGCATGAAGGGTGGGTTCAAGGCTATCGGCGAAAGTG -TTGAGGATAAGCTGGACGCACAACGACTTAAAGAGCTTGGAGGTCACAGCTACAATGTTG -CCCGCCAGCAAAGATCATACGAAACATCTATTCGTCGGATCTGGGATGCTCAGAAAAGTA -GCTTGTCCTCCACTATCGAGCATTCCGACCAGGAAAGCGACGTCGATCAAGAAGAGGAAT -ACCATGACCAATTCAACAAACCAACACCCCGATCCGAAGCACCAACACCTGGGCCATATC -GTCGTGATGATGAGACAACAAGTCAGTTCAGCAAAATGAGTTTCAACAGCCAGCGCGGCA -AAGTGTTGCGCATCACGCGCCAGGTCAAGCTGGACAATGGCGAAATCGTGGAGAAAGAAC -AACATGTTTTCGATCCCCGAGTGATCAGGCACTACATCCAACACCGTCACCACAACGAAG -CCATGCACACAAAGCTGGAATCTCTTCAGCCTACGGGCGACCCAGAAGTCGATGCTCGCA -ACAGGAAGTTGATCGAAGCCGAGCTCGGCCGTCTCAACCGCAACAAGGAGCGTCGTTTCG -CCCGCGAGAAGCAGAAGGGCATACCTCGCTCCGGTGATCCGGATGGCAAGCCTGCTGGTA -CTCAGCGCAAGTGTGCTAACTGCGGTCAGGTTGGACACATCAAAACAAACAAGAAGCTTT -GCCCACTGCTCAATGGTACTATGAAGCCCGAGGATCGAGTCACGGATTCGGCTTTCTCCA -TGAGCGCCCCGGTTCTGTGAAATCAGTTCGCTTGTTTTTTGCATTAAGCACGTTTCTCCT -TCATCATTTCCCCCCTTATATCCATCTACCCTGAATTTGATACCCCATAAAAGCCTCTCT -GTTCTGAATTTGCTCTCCGATACCCCGGTTCCCTGCCCGTCAGATATGATTTGAGACCAT -ACATAGGCACTTGCTCCCTGCTGTCTTTATGAATTGCCCCCTGTTGTTGCTTTTTATTTT -GGGTGTTTCACTTCATTGATGAAGTATCATAGGAAATAGCATGTATTCAATCTGATAACG -GCACTCTTTGTACACAGTAGGTCAACGTCTTATGTCCATAATAATATATTCATTGCTAAA -GATACATAAGAGGGAGAGTGGAGTGAAGTTTGTTGTTCCAACAACGGCGAAAAATCATCT -ATGAGAGAATTAGGCGCTATAAAAAAACTTAAAGAGGGCTATGCGGATGTGTCTGACCCG -CCGAAAGGATAATGGTTTCTCGAGTGAATGGAGATATAACAATAGCTGACAGGTTTGTCC -CAAATTGAGTAGGGCGTAATTCCCCATTCCGTGGCTTTGCTTTGTGTAATCACGATGTAT -ACAGGCCAGCCAATTGACCTCTGGTATACGGCTGACGCCCTCGATCTGCGCTGAAAGCCA -AAGCCACGTCGTTGGGTCGCCGGTATGTATACCCTAGAAAAGCGCCCTTTTTGCGAATTT -CCAAGACTGTCTTGCTCATCTGTTTATCGCGTAGGATTTTATCGCGGGGTCGCCTCTTCT -CCTTTCTCTTTTCGGCGTCATTCGCCTGTTCCGGTGCTGCACGTGGGGTAGTCTCAGCAA -CTGGCGGCTCAGCGTCCTTCAATGGATCTATTTCTTGATCCCTAGGCAGGATCTCGTCAG -GGTCTTGGTCAACATCCTCTGAGTCAGTACCAGCAGGATTCTCGGCAATATCGCCCGGCA -ACTTCCAGTCATCGAAATATCTAGTGTCCTCCCAGTTCCTGACCCTAGGGACCATTGGTG -GTTGGCACATGTGGAGTTCTTGCCATCGGATACCTCGGAAGAACGGATGAATCTTGATGT -CTGCTGCATCGTTGGGGTATACGTAGTAGCTCGCGATATTTCTGTACCGCGGGTCCATAG -AGTACAACATTTGAGTTGAGACAGGACGTCCATTCAGTATATCATTGGCTTGGTATTTGG -GAGAGCACAGGCGATATTCGCGCTCTTGGAGAATCCTAGTAATCAGATCCATGGCATCCT -GGGAGACTAACTTGTTCGTAGACTTCTCTCGTGGAAATCGCAGGGTTCTTGCGTGTTGCT -TGAGAATTTTTAGGATGAAATGCTTAGCGGAAAGGGAACTAAGGAAATACTCACAAGGAT -CTTGATCTTTGTATCGTGGCGATTTTCGCAGGCAAAGGGCGTGAAACCATATAGACACTG -AAAACGCCATGAGAATTTGATACACACGAAACGTGAATGCCGAGCGACCTACCTCGTAAA -GAATGATACCCAAACTCCACCAGTCGCATCGCCCGTCATACATTTCACCGCGAATGACCT -CTGGTGCCATGTATTGACTGGTTCCAACCACACTCTTTGCGAATCTGCGTCGTCCCTTGC -TGTCTCTCCAATCCAACAAACTTGCCGAGGATGGTTCATGAAGAGTATAGTCATTCAGAT -TGTGCAATTGTATTTCAGGCGCAAGCTCTTTCGTTTCGGCAGCCCCCTTCTGATCTTCAG -AATCACCCTTGACATCAATGCCAAGTTTCTCCAGTAAACAGTAGCGATGACTGTTGTAGT -AAGCTTGATCGTGTGACCAATGCCCATTGAAAGCCAGACCGAAGTCAGAAATTTTCAAAT -GGCCCGACGCAGAGATGAGGAAGTTATCGGGCTTGACATCGCGATGAATCCAACAAAGTC -TGTGTGCCTCCTCGATGCACAATATCATTTCGGCGATGTAAAAACGTGCCCAGTCCTCGC -GGAGTGTGTCTTTGCGAATCAGTAGCCCCAAAAAGTCACCACCAACCATGTAATCCATCA -CAAGATACAGATTGTTGACATCTTGGAAGCTGGTGATCAAAGGAACAACCCAGCGAGAGG -TCTCAGAGGCAACAAGGAAATCTCTTTCTGCTCGAATATGGCCTTCCTGGGCGTTGCGGA -TCATCTCTGCCTTTTTGATGACTTTCATGGCATATACCTCCTTTTTTTCGCCGGTCATAT -GTCTTCGACGGCTTTGTTTAGCACCTTCTACAGCAGACACCAAAGCTCCAAGAGGATTTG -ATCTAGAGTGGACATTGTTGTCCTTGAGAGTTAGAGGGTCGCCTTCATCGGAGTTTTCCA -AATCACAGCCTTTATCTCGAACAAGACGCACAACGCCAAAGCTTCCTTTCCCCAATATCT -TGATGGCTTTATAACCTGCGACAGAAATGGCACTTTCCTTTTGAGTGCAGTATCGATTTG -ATCTGAGCACACGGCACTGTCGTAGGTAATCGTTCTCTCGCAAGATCCAGTTGTGTCTTG -TCATCAACCGCTCTTCGGGTGTGAGCTCAAATGCATATATATGCTGCTCCAACTCGAGTT -GGCGTTGCTGACGGGGTTCTCTGTCTTGGAAAATGGAGTTGAAGTAAAGTTCCAAGTAAA -TCTTTGCCACAGATGTCGCTTCCACTGTTTTGATTGACGGGGATATGGTGATTGCATTGG -GTTCTGAATGACTGGCTGTTTCATTGATCTCAGAGAGTGAGGGCCAGTGCATCAATGCAG -TGTCGTGCCCATCGTGCAGCTTGTCTTTATCTGAGGCAGGAGACGGCCCTCCGGACGTGA -GTGGAGTGAGTGGAGTTGACTTTCCACCAGCAGGAGACGCACCACTACTAGGGCTGGTTG -ATGGATCTGAGCTATCGTTGGCACCATCATTCTGTTTGCTCAGGTCGTCCAATACTGGGG -GATTGCCGAGCAGAATACTTGGCCTTGACCGAAGATGCGCACGGCGTATTACAGTGGGCT -GGCCAAAGGCCAGTGAGATCTTATGCGAAAAGCGATCGACGAAGTTTGGATTTGTTGGAC -TCTGTTGCCAAGTTATGGTAGTTGGTAAAGAGTCCTGACCTGTGGCATGTTGCTGCACGT -CGACAACAGCCGATTCGCCCTCACTCTCGGGTGATGGTCGTAGGTCGGGTAGGGGTTCAG -ATAAAGTTTCAACCCCTTCAGTGGGGGCATTGAGTGAAGGAATGACAGGTAGTGGTTCAC -TTTGTGGGGACTCGACTGCAGTGAGATATCCTGGGCTATCATCGCCATTACCACTTGGGG -AGGATGAGTTTGAGGGATCTGGGCTTTCACCGGCCTTCACGGAATCACAGCAATCAGTAT -CGACACTTAACCCTTTGTTGCCTGCGATTTATAGTCAGTAAATGGTTTATAGAGATAATG -ACACCACTCACTCTTGGATGAACCACGTAAGCCCAGCAAGAGTAAAATCTTGGAGCCACT -GCGAGTCAAACGAACTCTAAGCCGATGCTTTTCCGCCCCCGGGATAATTGGCTCTGATGC -CGGGCTTTCTGTTGTAGTAGAGTCAGTTGGAGATGTGTTTGGCTTTCCAGAAACAACGAC -TTTAGTTGGGATCCAGAGATTGAAAAGACTTCGTTGATTTGCATCTTTTTTACTGGAGGT -TGTCATTTTGGCCTCTCAGAGTCAACGAATGACGAATGACACATTTGTGGACCTTAACAA -TATATTAAATATATGCATGGAGCATGAAGCATGGAGCATCAAGCTCAAGTGGTTGTAGGG -GAGTACTCATGTTGTTAGGGCTGGCAACCACGGAAGAGGGTAAGCACTACCCGACCTTCT -GTGTAACCCAATGGACCTGATACGCTTGGAGAACCGGAAGAAATCAACATGCATGGGATA -GGTATCTATGCATGAATATATAACCAACCTAACATGAATGCACACAGCCACACCACATGG -CAATCCAGAGGCTCAAGACGCGGGGGTGGTCGGTATCGGAATCTCCTCGTGTTTCCCTAT -TTAATGTGTGTTTCCCACTCTCTCTGGGCATTCAAAGAGTCCATTTAATTTATCTTGAAA -GAAATCTTTGTCGTGTGTGGGAATGTATACTCATTCAAAGTCAGCTCGCTCAGGCGATGA -GTTATATATGACCAACACCATCTCCGAAGACTTGAGCAAAGACGCCTCACAAGAGCAACC -TCTTCCTGCTTCATTCGGAGGGAGAGGCTCTCCCTCTTCAGATGATGTCCCTTACATTCC -AAACCCCAGACCGAAACAGGAAGAACACAATAACCAGGACTACTCATTCCAACTGATGCT -GCAAGAACGGGAGAACGTCGCTCGACGAAACAAGATGGCTGCCTCCCAGGGCAGCCCATA -CGATAGGCCGGTTGAGCCTAATGTCCATGCCGCTTCTTTCGGGCCACCTGCAGTCAACAA -CCAAAACATGCCAGGCCCTTTCCCGCTAGGTCCTGTTCCTTATCAATATGCCCAGTACCC -GGTATATCCTGCCAGTTTCCCATGCGACTCTGAAAGACACCAAGTTCCGAGTCCACAGTC -TGTGCAATACTACAATATGGATCCGCAGGATGCAAATAGCGTTCACTCAGACACATATAC -CCCTCCCAGCCAAGGAAGTGTCTCTTTATCAGAGGCCAATGACCTCAAGGCCCAAATCAC -AGCTCTCCGAGATAAGGTCCGGGAGCTTGAAGGAAAACCTGCCCTGCCCATAGCCTCAAA -GTATCAGATCCTCTACAGGATTGAGAAAGATAGCACCCATCCGAATGATAATCAAAGTAT -GGACGATAGTGACGATTATTTCAGCCCAAGCCTCGAGCAACGCAAGGCCAAACCTAGACC -AAGATCTTACCCCAGGTCTCCATGGATGGGGATCTATACCGACCCACCTGAGCTCATCCA -GCGAAACATGGGTGCGCCATATTTGCACTGCAATGATCGGCTGGTCAACTTCGAGCTGTA -CCTCGCTCTTAACAAAGACATATCTTTTGTAATTTTTCGCAACTACAAAAGAAGAGCTGA -GAGGCAATCATCGAACACAAATTATGGCAAGCCAAAGCCATTCAGCGAGTCGATTTTTCC -AGTGTCTGAAGATCTCAAAGACGTGATCGCAGATTTCCTTAGGGGAAAAGAGTTTCAATC -CATGCAAAGCAGCCACCGACACATGGGAGAGGTCCAGTCGCCTTACCTCTTCGTCTACCA -CAGCAGGGGTGCTGTAGAATCGGAAATCAAACGAGGGCTGAGCTCTGAGGCACAAAGACA -ATTTGACCTCTTTATGGACTATATCCAAGAGGCTTGCGGCAATGAATACACTGCAGCAGA -CCTGCTTTTTGAGAAAGGCAGAATTCGCTGGGAGTATGTTCAATATCTATTCAAGCCTAA -CCATATCTTGGTGTCAACCCAAAACAAGGAGCATATAGGTTATGTAGCCAGGGGATGGCC -ATCTCAAAGCCTAGACATTGATGGCACCGGCAATGCTTATTGGTGGATCAATGCCCAGAC -CTGGGATTTCGACGGTGAATTCTATAAATACCAGACTAGGCTCAATTTTGAGATGCCAAA -GTCACAGCGTGCCGTGGAGAGCCCGTCGTCCCCGGAAGTGGATTATTTGTCAAACGATTG -TACCCTTCCTTTACCCAACGATGACCCAGACAAAGAGTGTGCTATCACGGACCTTGCAGT -ATACCCAATCAAGTATGCCCCTGATTATCTCACTCAGAAGCTTCAGCGCCGTGGTGAGAT -TTTCTGGACATTCCGCACACAGAAGTATGTCTCCTATCAGGCGACCGAGGAAGAGAACTT -CCAGACCATGGTAAGTGTTTATTGCCAGACTTCCATGGTGGCTTGAACTAATCATAATTA -CTAGGCCGACGACCGATATATCATAGACATGAAGACATATAAAAGACTGCATCCAGTCCC -GGTCGGGTATTCGCGTGATGGCCTACGCAAATACACACTCGATGACCGAGTCATGTCCAG -AGACCAACCCCCGCAAGAGCCCTTTAGCATGTTGATGCCTCCGAGGGTGACAGGGTTCAA -CCTCCGGCGCAAGAAATGGTTCGATCTCTCAGTCGACCGCATCTCCCATGTTGAGTGGAA -CAAAGACGCCTTTGAGAGTCTGGCCATCGACTCTAAATCCAGAGATCTTATTGAAGCACT -AGTGACTAACTACGTCGAGCCTGAATACTCAGCCGATCTGATCGCCGGCAAAGGCAATGG -GCTTATCTTGCTTCTGCATGGTGGACCCGGAACAGGCAAAACACTGACTGCAGAGAGTGT -GGCCGAGATAGCTGAGAGGCCTCTTTACCGAGTCACATGTGGCGACGTCGGCACTAAGCC -AGAGGAAGTAGAGAAATATCTCGAATCCGTGCTACGCCTGGGCAAGATATGGAATTGCGT -AGTCTTGCTCGACGAAGCAGATGTTTTCCTCGAGCAGCGCGGGTTGGAAGATCTGAACCG -AAACGCGCTCGTATCGGCATTCCTACGCGTGGTGGAGTATTTCGAAGGTATCCTGATATT -GACCACCAACCGGGTAGGCACCTTCGATGAAGCGTTCAAATCGCGCATCCAGTTGGCACT -GCACTATCCCCCTCTCGGCGAGGAGCAGCGACGAATCATTTGGAAGACTTTTATCAAGCG -ACTGGATGAGTTTGATGAAGATGCTATTGACGTTGAGGATCTTATGGGAAGCTTGGATGT -GCTTCAAAGGAAGAATCTGAATGGTCGGCAGATTCGGAATGCCATAACTACTGCTCGTCA -ATACGCGAAGTGGAAAAGGGAGATTCTCACACACAATCATCTCAAAGATGTTATTGAGGT -GTCTGCAAAGTTTGATGACTACTTGGAGAACATACACCGTGGTAGTCTCATGCAGTCTTG -AGCGCCACCGAATCAAAATTTATGACTGGATAGCGTGTTAAGGGAAGAATGGCTAGAATG -TGACGCGAGCTGTAGCAGGATATTTGCAATTGAACGCGGTTTGGACTTTGTGATCCGTGA -GCTACATCCATCCTGTGCTTAAAAGCTATATGAAGATGGGTCATTAGGGAGATTGGACAA -GGCCTAGAGAAGAAGTGTCAACAAAAGCTCTGTATCAGCGATCGACTTCCCGTACTCATA -CTCGTCAGGGAAAACGCAAACATACCCATGAGTCAAGCTGAGCCCACCAGATAGATCCTT -CAAAGCTCTCAAGGAAGCCTGCATCGTCTCCGGCATTTACCATCACATCGGATGGTTGCA -TCATGAAATCAACCGTCTGGGCATCAAAGAGGACAGGTTTCACATCAGTATTTTGTGAAA -CCTCTCCCGAGACATATAAATTATCATTTTCACCAGAAGAATGAGCCCCAGTTTCAACAG -CGAGCCCATTGGATCCAATCTGCCGATGAACCTCCCGTCTCAGCTCCTCCAGAATAATGC -CGTAGCGACGGCTCGGACAGTTCTTGCGCGTGGCTTCAGCAAGGTGTTGCTGACAAGATT -CAGCAAGAGAAAAAAGTTGGCGCAATTTATCTGGGTCACCGGGACTTCCGACCGAAGAGA -CAGAGGCTGAGCTAGATCCGTCCAATGATTTACGATGCTGTTGAATCGTATGGATGTAAA -CGACCAAAACAGCACAGAAGCCCACGTAGTGCGTGAACCAGAAGGACTGAATAAGCACAT -TCTGATGCGCAAGACCATCGACTATTGTCATGATGTCCTCCGCAGCCTGGATACATTTCT -GGACGTGCGGGCTGACCATCGGGTGTGGGATTGTCGGTCTGCGGCTGAGGTCTGTGAAAT -CGTTCAAGAGGAACGAGCGAGTGACGTGAATCATGGCATGCGAGTAGGCGAGTTGAAGCA -CTTGGCTTTGGCGGCAAAGAGGCGGGATCAGACTGGTTGGGTGAACACTGTTGAATAACG -GGGGCGCGGTTTCTTTCCATTTTTCGAGTTCGGCAGTTAGGCGGATTGCGGTCTCGAGGG -GGGAGTCTCGAGGGAGGGTGTTTATGCTATATAGTTGGCGGGATATATCGCCTAGAATTC -GACCGAGTCTGGATTTTGTGTTAGGTGCCCAGGGCTCATATGAGTATAAGTGTAGGGTAG -GTATGCGGGACTCACCGGTAATGAAGAACAGAGGCAATCATCATGCTGTCGGTGGACCCT -GTTCTTCGAGTCGGATCTTCTTCTAGTAAATCTTCGTCGTTCGTCTCGTTCGGTAGCTCT -TGGTCGATGTCCTCGTCGTGGAGCAGTCGAGGTCTTCCAAACATAATGCTAAGGTATTTG -TCTAGGGTGTATACACTCCAGAAAATGCGCTTGCGGAGCTCCAATTCTAGATAAGAGCAC -CTGCTATTTGAGACTTTGGCTGGCTGTTTCCTGTGTAGTCCGAGTGCAGTCACAATCTGC -ATTGTCGTGCCGAATGAGTACCAGCATTCGTTAGCCCGTGACGAAGACAGCAAATATAGG -CATTGACCCAGTCGGGCCTGGATGGTTTCCAGTCGTGGCGGCCCTGACTCAAGGGAAGAC -ATGTATTTCGACGCTGCATACCAGCGCTCACTGTAATTTTTTAGTAAGCCAATCTTCTGG -CTGCATCAAGAGATTGCTGACCTTTCTAGCCTCTCACCCTGCCCACCATGCATTCCCCCA -GGCCTCATTTCCACGTACAAAGTACTCACGGCAATTATCATGAGAACTATAGCAGTCCGA -GCCACCATATTCCCCACGGGGAGGTTAGAGAGACCGATGTTATTTTGGTAAACCTGTCTA -GTCCACTCTTCCACATTGCCCCTGTGCACGAACCGATAAGTCACCATCGAGTAATCGAAG -TATATTCCCACCAGCTCCAGTGCCTTTTCCAGAGACGGAAGGGTAAACTCTGCCTCTTGG -GAATTTGAGTAAGGCTTGTCGCCAAACATGAAGACAGCTGTATTCCGCGAGGACTCATTT -TGGAGCTCATTTGGATAGTGCGTCGTTTCATCTTGATGCAGACGGCTCCATACGCGATTG -ATAAACGACACACCGGATGCAGGACCGAGGTAATTCCCCTCAAAATCGGTTGATCCTGGC -TCTGGCGAGTTCCGCGATGATATCACATGCTGGTGCTGGCTTTTGTTTGGAAGTAGATTG -TCGGTCACAATCGAGTCCCGGTCATTTCCCTGTAGGTTGGGGCTGGTCGAGGTTTGCCTA -TCTATATTCTGCAATGTACGACCAGTCGGGGTCAGTGGACTCGGCGATGCAGGAAGAGGG -TCTGGTGGTAACCCCCGAGAGTATGCGGCGTTGTATTTGCATGGCAGTGACAGTCGTGTA -CACCTTGTGCATGGTAAAGTACCCGTACACCTGGTTTTCTTTACTTTGCATGAGTCACAG -GCTCTGGTAATCTTCTGACGGTGTGCCACGCCATCTTTTAATGTGTCGGATGCTTTCCTC -TTGTTAGACGGTAATTCTGTGATTTCATTTAGCTTGATCGCGGGCCGAGATTGCATGGCA -ACGCTAGTGGTTGTCATGTTGAGATCGGCAAGACAAGCCGCAGGAAATTTAATCCCGAGA -AATATCGAAAGGAGATAGAGAAAGGAGAAGTATCGAAGTATCGAGGAAGAGGGGGGAAGT -TGGAGAAAAACACAGGATCTCGGCAAGTCTCCGGTCGCGGGGATGCCTCCACCTCCCCGG -ATTCTCGTGACTTTAGATTGATATATGGACTACACCGGTGATTGAGATCACCTCCCTATG -AGGGTTGCACCAGGCTGTTCATTGAATGCGATGATCGTACAGATATCGTCTTGCTCTCTG -TAGCTAGTTGTACGGATATTTCATGTTTTTATACGAACCGTAGTGAGCAAAATTACCGTA -GGCTTGTACACTTGTTAGCTGGGAAAGCACAAACACTGGCACGTCATCAGATGGAAATTA -GGAGACAGTCTGATCAGCCCATGTAAGTATGAATTTGGATAGCAACAGAGAGCTTGACCT -GGAGACAAAGCACAGACATAGACCGAAGGAGCATATTAGTATTTGATCAATCGAAGTTGA -AACGAAGCCTGCTGAAGAAACCTTGAGATATGGATAAAAGCTatgaggatgaggatgagg -atgaggaAAGAACATCAGCCCTAGCTCCAGGTTCTTGACCAGAAGATACCTTCCGGAAGA -TCTCGTTAGACTACCATATATAGCCAAGTCAGTCAAGGCAATGGGGAGACATTGGATATG -CATGTTCTCTACCCAAAACCCGAGAACGACTTTAACGAAGAACATGTCAGGCTCAATTGC -GTAGAAAAGTCAACATATGGAAAAAACCCCCCGTTTTCTCCGAAACCACAAACACCCCTG -GAATGTAACTCCAACCCGAGAAACAAAGTCTGAAGGTGACAAGCTATAACTGGGTATCTC -TGGACATGATCACTTCGGTCAAGGTATATCGGGACTCTCCGCAATTCTTTTTTGCGAGCT -ATCAAAAGCCCCTGCTGGGATGGCAAAGGATATGCAACAAAAAGAAAAAATATCTAGGCT -TCGGGTAAGAGAATAGGAACAGTCTTGGTTTCTTTGCGAGATCTCAATCTGTCTCGAGAT -ACGGCAATAAACATAGAGATCGAGAGCTGGTCACACAGCTAGAACGAGGGATGATGTGAC -GGCTGGCTCCATCTCAGGAGAAACACACGAATAAAAGCTATGAAGGCAACGCTATGAAAC -AACCAGATTCGTTGGGTTCAATTTTGGGATCGAGAGATTTTGGTGAGTTGGGGAAGAACG -GGGATAAGGCTGGGACATATTGGTCCAGGGATGCGTATATTGGCCGAAAGGTCGGATGAA -ATCAACGAGGGGGGTTGAAGTAATTACCCATGTGATTGTTTGGAGTAATGTTTAGGTTCA -GATGTGAGCGTGAGTCGCGCACGCCTAAAAGGTTGGGTTGATGATGATCTTCCAAGATCC -GGGTGAGAGTGTCTCGAATCTCTCCAGAGGTTGCTCCCGATCGTATGGTATTGACGATTA -ATTGCACCGTCGGGCCATCAGAGTCTCGAATGAGCCCGACGAGATCGTCCACAAATGACC -GGAAATGGTCCAATTGGTCCTGGATTCGTTTGGCGGAGGCCTTCCGGCGTCTGTCGGCGG -CTTCATCGAAGACACATTCGCGGGCGTGGGTTGAGCATTCGGTGCAATGAGGTGGGCCGG -TACACTGGCAAAATGTAACAATGAGAAAGGATGTCAGATTAATTTGATTTCCATATGGAT -TAGAAACAAGGAAGAGATGTTCAGGAAAATAGGAAGTGTAATGTGTACCCTAGTTCGGCG -TTTTTGACACTCTGTACAAGCGGTCGATGCTCGTCTCATACGTTTCTCTTCTGGTGCACT -CCCGCCAGCAGGActccctccctctcctccatcccctccgctaccaccGAGTAGTCTGGT -GCGAGGTGCAAGAGGCCTATAGCCTCGGGGTGTCTGTTCACTCATCAAGCGCAAAAGTGA -AGCGCAGAAGTGAATAACAAATCAGAAACCCAGCTTTCTATCTGTCTAACCACCATTTGA -AAGCATGATAATCAAACAAGTCACATGGGAGGTGGACAATAGAACGAATTATAGAATGGA -GAACGAGAAGAGGGGCCCTGGGGCCGTGCTAGACCTCAATGAATTCTGCACTGTGCGCCT -CTCGATTAGGAGGAAAAAGAGGACCAAGTGGGGGGGAAAAGGGAGGTaataagaaaataa -aataaaagaaatcgaaatcgaaatcctaagaaaacaggaaataagaataagaaaataaca -gggaataagaattaagaaaaaaaaaaaaaaaaggaaaaagGGATCGAGTGGCGGATGTGG -GCCTAGGTATGTCGCAAAAATAAGCAAAAGATGTCAAAGATGTGAGAGGAAAACCCCAAG -AAAAAAACCCCAGAAAATGCTTGATATGTATATCAGACATTGAATGTCAACTTAATATCA -AACCCAGGGGAACTAATATCAGGAGTTGGGGGTAAATCAGTACGCCGTGGGGAGCACAAG -GGATCGGACTTCATACCCTTGCCACGGAGTTGAGAACGATCAAGATACAGTAGAGAGAGT -CTTTCGAGGACCTTGATCTTGGGTAatatgcttcatagccttctatatccttcccagcct -ctgtcctatatcccatatcctatatcccacatccatcctatatcTACAACCCTAATTCAT -TTACCCCGGAGTTCTTTTCCGGGGTAATTGATAGGCTCAGGCACGTTCCTACCGGAAATC -ATGACTCAGGCTACCTAGGGATCTGAATTCTAGGATCGATATCATTTGAACATCATTGGT -CGATCTGGGTGATTTTCTTTCCTACTCTCTACATAAAAAACATTAAAATGCATTGGGCTC -GGAAAAGATTGACCATTTTCAGTCCTTGTCGATCGAGAAATCTAGAGTCTTATAGATTTT -CCCCCTTTTTCTCTGAAGAAACCCGTAACACTCGCTTAAAGGTTCCGAAAAATAATTCCG -GTTTTGGATGTACGATGTTGTACATATATCAATACTCATGTGCTGTTTCTGTGAACGGAA -ATCCCAGTGGACAAACCCCACTTGTGGCCGTTTTGCCACCGTACTAATTTGAATTTACCC -GCCACTTCTGAACAGTAAGATATCTAGAGCTTTAAGCACACTCCAAAGCTAACAGTCAAT -AGAAGAAACCGAGAGGATCACTCTGTATATGGGGTACCGCATTCTAGGCTCTATGTATCT -GCTCCACGGTTTGCTGAGGCTTTTGACACAGATTCGAATTCCTGTGGAGGGGGATATAGT -GCTCCCTGGTTGTAATGAACATTCAAGTTACGAGTACGTTACCCACAAGGTTAACAAGAA -CACACAAGTGGACCGAATTGTGGGCGTAGCAATGACAGGCCAACTACATTAGCTTCGAAG -TGTTCCATGGTCATGCTCCTTACGAGAGACTTCACCGTGTGCTATACACATCTTGGACCT -GTGACCACTGGTTTCCTTTTCTTTTTCTTTTTTTTTTCAGTAGGAAGAAAAAAAATAAGA -AGAGAAAGGAAAAAAGCAAAAACATGCGAACTTGGTCATAGCTGTTTGACGTCCACTCAT -TCCTAGGAGAATACTTCCGCACCATTCATGATTTGCCTCCCCCAAACTTTGAGAGCTTCG -GAGGTCCCTTGGCTCGGATATCAAGCGAGAAGGGTCTATGGCCGCAGGGAGGTGAAGAAC -TCACTCTGATCCAAATCCCATCCACATATAATGTCTGCTCGAATAAGCGATTCAAAGTGT -GGAGTATTCTATATGCAGAGCTGCAACCTCTGCAATACTTGATCCGTGTCAATACTACAT -ACTCCGTACCACATCCTCTCTTGAACTCCCATGCTTCTATCAGTACAGCTTTTGCGTCTG -TTTCACAGATCCTCGGTGCCACAGCACTATATAACCCCTAGAAACCATTTACAGCATACG -TATGCCTGACTTACCCCTTATGGAGGAGTAGGTACGGCCATGCAAGGAAGAAAGCTCCCC -AAGCTGCCCTAGAGCAATCATCCACAATATGTCTACACCGTTACATTAAATGCCCTAATT -TAGTGTTTCTCGGCACCAAGGCACTAAGAAGAGCGGGGCCAGAGATAAAATAGTAATCTT -GTGaaaaaagggaaaaagagagaaagagaaaaaaaaaaaaaaGGAACCTAGCACCTTATC -AGGTATGGAGTTCGGGGCTACAAACACTGCCCGTACCCACTCCATTACCCCCAGCGCTCC -TTGATAGGATCAATGACCCCCAAGAAGCGGTCAAAATCGGAATTTGCCACAGTATTCCCC -TTAATTTAGGGCTATCCTAACTGACCGACGTGCCTAGGTAGATTGGCGCCTGGCCAGGCA -CATAGCCTGTCACACGTCAAGCACACACATAAGCAGCCCCATCTCCTTAAGGGACGTTCC -CTCCCGAGCAATATGAGAGCTGGTTGGTTGGCTAGACAAGCATCTCCTTTTTACCGAAAG -TTCCGACACTAAACGGAGTACGGGGTATGTATCACTTCGTTCCTATCTTCCTGATTGGTC -TATGTTGCAATATATGCTTAGCGACTCTCGAACTGAGTATCGCGCACCTGCAAATGATCC -TTTGAACCACTGTGATGGTAGCCACTCAGAGGTGTAACAGCTATGACATTGGGGTATGTC -CGCTGTACTCCGTACGAAGTACATGGCTCCATGATATTACAAATCTTTTGATATGTACAT -GACTCGCACGGCTTACAGTAGCTTCAAACATCACTTCACTTTACCCCGTTTCATTGGAAG -CTCATACACTAGTCTCTACCAATGCTTCCCATAGTCCTAGAAACAAAAAAGACGAACAAA -GAATATCATTTACATGCCAACACTACCCTCCCCACTCAATCTATACCTAGAAAATTGATG -CCAGCACTGCACGTGCACTAGCCATGGGGTTGAGACTGATAGGGATACTAATTTATCAGC -TCGATGCGTTGGGCATCAAGGAAATTGAGACGGTCAAGAGCGAAAGCGAGAGCGATATGT -GTAGTACTTTTTTTTTTAAATCCGAGATCCGTATCGATATTGAACCTGGAAGATTGGCTA -TAGAGTGACTGGGAAAAGTTGTACAACATCTGGATGAATATCAAATAATCTAGGTTCATT -TCATTATTCTTATCTATATGGAGTGCTACATACATATATGTATGTACTAGTTAGACTATT -CGCACTGGTCACACGGTAGAACGAGCTTTGGAGTAACTAGTGCTTGTTTCCCCACATATG -TGTTAACCGAACGCATATGTCGATGCACTATATAGGGTTAGAGTATCGGTAATGATCTTC -GTGGCTGGGTTGAATCGTCTGGACCGGGGCTTTCAGTGGCGATATGCTCACGAGGTTATT -AGCAAATGATGTGTAAATTTCTGTCGATTATGAAGATTCGCGCGCAGCAGACGCCCTCCT -GTGCCATCGGCGCAGTATTAGATAAACTCGAGTACCTGATAGGGTGTGTTGCATGTCGCA -TATGTGCATACATGCAGTGCTTAGGCTTCGGCTTGGTGCAAGGCGCTAATAAAATCCATA -GGAATAATTACAGCTATCTACCTAGGTAGGTACACTATGTTGTACATCATTTTAGACATC -ACGCGCAAATTAGGAAGCGCTAGTATGAGAAATGCGACTGAAGAAAAAAAGCTTTTGATT -GCAATGGGTGATGGAATTCGTATGGACAGGAAATAATATGTACTGATAAAGTAGGCAAAG -GAAACAGGGAATAAAGTGTGTAAAAAAGAACCAAATAAAAACACCCGGGAAACAAAAGCC -ATTATTTCATACATTGGTAAAGAAAAACAGGTGGGATATCAAAACGAATCCAGTCACAGT -CAAGCTGTATGCATAGTACCAAATTATCCTCTAAACTACAGGGTGAGGACTAGTAGATCG -GGCGGCCTTCATCCCAGCGGCGAGATTCTCTCGGACTAGAAGCTGGCTCAGGTTGACTTT -CACCCGCAGGGGTGTGTGGGCTTGTAACGATGTTGCTGGATGGAGAAGGGATAGAGAATC -GAGCGCTCTCACGGCGGCCAGTATGTTCCGGCTCGCCAACGGCGTTGATCCCACCAATCC -ACTCCCAGACCCGAGAAACATCACCACTACGTCCTTCCCGTACAGAAGCGCGGACGTTAG -CGTTACCCTCGACAAAGTTGCGGACCCGTCGCCAGAGTTCCTCGCGACGCTTGCCTTGGA -GTTCAAAGCGGAGGACATCATCGCGGAGCTGACCGATGGCAATGTGAGGTTCTTTAGCAT -CACCGCGGGCGTATAGTGCAGCCTGCGTGGATAATCGGTCTAGGGTTGTAGCCACCAATT -CAGGCACGCGGGCGATGTCAGACCGACGTGCGAGAACACGCGCACGAGCGTACACAAGAA -GGCCAATGGCCAGGATTAGAAGTGAAATAGGGAGTCGATACGCGAGGAGAGCGAGTCGGA -AGTGGCGGCGGACGGCGCATGCGAGTGGGAGCCGTGCAATGGAGGTAGAGGAGAGGACAA -GGATGCCGGTATTTCTGTGATTGCGAACGCGCCTCTGTCAGTTCCGGTACACAAGGTCAG -ACCAGATCTTTTTAAGAAATGCCACTGGATGTGGGAATCCAGAGCGATGAGCGTAGCCAA -GGAGACACAATAATCCCTAGGGTGGAGCAAACTTACTGCTGGGTCTTGCTAATAACTTCA -TCTTTACCAAGGATCTCGCCAATGGCGCCCTTCCACAGCTCGTCAAATTCGGTGTCACTC -AGGCCCTTGCGGCGCTTCTTCGCCACCTCTTCCCTCAACTCCGGCTCGCTGATGTCTGGA -GATCTGAAGCCCTTGCCGTCTTCCTTTAGCTGACCGCATTCCCACTTTGCCCGGCGCTCA -CGAAGCTCTTCAACGGCTTTATCAGCAACCGCCTTCACGCGGCGCTCCTTTTCACTATCC -GGTTCACATGTTGGCGGCAATGGGACCAGGCCCCCAAGAGCAAGAGGATGCGGCTTCAAA -AGGAAATCATTTTCGCAGCGCACTTCAAAATTCGGATAGCAGAAGGCGTGGCTTGGGCAT -GGCTCGCATCGTGGCTCCAGAACATTCGCCCAATCAGGAACCTTGGTGTCTGCCAAAGAC -CAGGTGGGCTTTCCCAGGCCACAGTATCCAATCTCGAGCTTTTCTTTGCGCCACCAGGCA -CCAAAACTGCCAGCCAGGGAAAAGATGACCACCCAAGGGGCCAAGCGGCTGAGAGTTCCT -TGCTTTTGCGGCCGTCGCCTAGTCTGAACTTGTGGCGAGTACTGATCATGTTCCATAGAC -ATAGCGAGCTGTTCCTCAGGCGTGAATTCTTCACCAGCCCCAATTTCGCTTTCTTCATCT -TCCTCGGATTCCTCTGACTCCTCCCCAGGCTCCTCCTGTGGAGGCATCAATCGGGAAGCG -AAATCAAACGAATCCCGGTGAGGTGGCTGAAAACCATCATCTAGCTGAACGCTAGACGGA -GTTGCTGATTTCCGCGACCTGAGGCCGCGGCCCCGAGCCGGGCTATCGGTTGATAAGCGC -GAAGCACTCTTCCGCTTAGCATCATAACTCATCCTTGCGCTTTTAGGGGCCTCGGGGCTT -CCGCTCTGGAAAGGATTGTCATCCGTAAACGTGCTCTCATGGTGTAATCCTGACCTGGCA -CCGGGAGTGTGTTCGTCTACTTCTGTCGATGGGAGGATATCGCTTCGTCCAACCTTTCTA -GCCGTCGATTTGCGAGGCGTAGGACGGATAGCAGCGACTGGGGTGACGAGTGTGTCATCG -CCGGTTTCCGTGTCAGACGTGCGAGCGTGTTTACCCACTGAGCGAGCAACGCTCGTGCGT -TTTGTGTTTCTGACAGGTGTGGCAGGGCCGAAAGCATCATCGGCATCTGCTGTTGAGTGG -CGGGTGCTGCGTCGCGATGTACCTCGCTGGCCTCTGCTTGTGGACCCGACTGTTGAGGGT -GTGGCTGGAGGAGGCCTTGACTCTCTGTCGTTTGCGTCGCTAGCCACGGATGTTGACCGA -CTGCTCATGTCAGTAATCCCCATACTAGTTCGTCGAACCGTGTCGCGATCGCGCAGGAGC -TTTCTGGCTTGCGGCAGGACTTCATCTTCGAGGATGCCGATCAGTTGAGCTTTCTTTGCC -GAGGCAGGGTAGGGGATATCGTGGCTAACGAGAATGGAGCGGAGACGAGGGACAGTGAGA -GAGTTCAGATCGAAATCTGGATGAAGATATTCCAGTTCATCCGCCATCTTGTGTGGCGTG -CAATGTGCGACGTTGCGGCTTGATCAATGTGCGAACACTCCGACCGGGATGGCGTGGTCT -AGCGGGACTTCAGTTAAAATGGACAGTGACCTTGCAGCACGGTGCGTGACTAGACGAAAT -TTGAATATGCAAGTGAAAAGACCAGCGATCGCTCCCCTTTTTGAATTTAGACTGGGGTAG -GAGAGGTGTTGTTTTTTTGTCGAGCTGTCAATTCCTTCAGTTCGTAGCCAGGGCAACAGG -TGGGGCGCGAAAAACGGCCGACGATGGTATTAATAATTCTCACGATGCAGATTTAGGCAA -TAAAGCCCGAGAAACAGAGTTATCTCCTTTTTTTGAGCTTGGATTCAGTTTACCAGATGA -AGGGGGGAGCAGACCAAGAAAAAAAGAAAACAAATAGTCGATGACGTACACATAAAGATA -TTAGCACGTGACCCATCGTCATGCCTCAGCAACTGGGATTCTAACCTTGGAAAGTTATGT -ATTCCGTACTCTGGTATTTGACGAAGACGACTATATAGCGAACCTAGATGACAGGTAGTT -GTCTTATATCTCTTCATGGCAAACGTTAGTTTGCATATTATACTCATCCTCACGATCTCG -AAAAAGTCTATGGCAAAGTCTACTCCATACCTCATGGACGTCTGTAGCAGGTGTCCACGA -GTGGGAATGTTCACGTGTCATTAAGAGCTGAGCACAAACACAATGATAGGAAACCTATGA -GAGTATCATATAGACTGTGAGTATCATATAGACTGTAATATATATTCACTGTCCGAGCTA -GTTACAGCCCTAACATTCAAAGCAGACCAAAGCCAATAATAAAAGATACCGAAACAATCA -CCGTGGCTCCATGCCATAATGCTTTATATAAAACAATCTGCAGACCCCTTCCCCGCCAGT -GGAACGCCGCCCACTAAAACAAAGCCAGAGAGATACAAAAAAATAAGAAAAGGTCAAAAA -AGCAAGTAGAAGGCAACCAAAAGTAAAATCAAGAATTAGTGCAGTAGGCCAGATGGCAGA -AATCAAAGAGGGCCGCGGCCGGAATCAAAAGAGCGACTGGCATCGCCAGGGAGGTGGAGG -AAAGATGACGAGGTGGAGGTAATACCGCTGTCAATACTATCTGAATGTACCATTTTTCCA -TACTGAGGTGGTGGACCGCCGATGTTGCCTGCAGCACCAACGGCCATCTGCTCTTTGAGG -GCTTGGAATTCGGCGAGTTGTGTTTGTAGTTGCTGTAGTACCTCGTTGATGGGTACGGCG -TGGTCATTGAGAGTGTCGCCTGGTTTGAAGGGACGCATCATGTGGCGTAGGGAGAGTGTA -TTAATGCAAACCTGGAATTTGTTAGGATGCGAAGGTAGAAAAAAGCAAAGACGGGGAATT -CGAACTAACCTCGATGCGGGTGACTTGTTGATCGATCAAACTGAGAACAGTGGAGTCGTT -GAGTTCACCCCCGTGCCTCTTGACCATGTTGAAGAGAGCAACGTTCGGTTCTCTGAGGCC -CGGCTCACTTGGATGCTCCAAAGCACCGCAAAGTCGCTCAAGACGGAGTGTGACTTCCCA -GAGACGCCAAATGACGTTTGTCTGGATGTCAAGTTGATTATTTGGGTTCATAGTTGCAGC -GGAGCTATCACGATCCTGTGGTTCTTCCGAAGAAATCATGCGACCAGAAAGCGAGGAGGG -GTAGACAAGGTAGAGAGGCTCCGGATTCTCCAGACCCTTCAGCTTTCTCTCTCCTTGATC -TTTGATAATGAAACCTTGACTATTGAGCTGTTGTAGTTCGCGACGAATATTGTGGCCGAT -GCTATCGCCGCGGGGATGGCTTTCGATCGAACTGGTAGATGCAGAGCGCTCGGCATCCGC -GAATATCTCGAGGTTGCGGTGAATGTCGGTCATGAAATCCGAGGAAACGAATATCTGCCC -CCCGTCGGCGACGGCAGAGATTCGCGATGCACGATTCACCATTGGACCGAAATAATCCAT -TCGACTGGTGACCGGATCCTTTGCGCAGACAGGCTCGCCCCAATGGCCACCCATACGGAC -GGACAGGCCACGGAATATAACATTATTGTCCATGTCAACCACGACTCGACACTGGGGTTG -ATCAAGAATCTCTGTAGGCCACTCGGCTTCAAGGAGCTGGTTTTGACAGTTGAAACACCA -TAGTAATGCCGCCGTCGTGGTTGAGAAAGCCACCATGAAAGCATCACCTTCTGTTTTGAC -CTCGTAACCACCAAAAATTGCAAGTTGCCGACGCAGAATGTCATTGTGGATTTGAATAGC -AGAGCGCATGGCATCTGGACAAACCTCCCATAGGCCCGTGGATTTCTTGATATCAGTGAA -CATGATAGTCAATTCACCGTCCGGGGCATCCACGGCTTCAAAGCGCGCGAGTCGCGAATC -TCCAGGGCCATCGCGCTTCTTGGGGCGTTTGGCAGTTGCGATGATCTGCTCTTCGGCAAA -CGTGGACGAGCCAGTGTTGAGAGAAGGACGAGGCTTTGGGCGTCTCTTCTGAATTTCCGC -AACGCCAAGAATCATCACCATCAACTTATTGGTGGCACCGAAGGAAATGGCCAAGTCACG -CAACTTTTGAGCTGCAAACATCAAGTCTAGTTGTTCTCTCCGAGTGATGTCAATCACCAC -TTCGGGAGTGACATAATCCCATAGTTCCTTGGATGCTAGAACAACCATCTCATCTTGTTC -CGTCAAAGTGAGATGCATTGTGGAGGGGGCCGCAATGACAGCAGGCATCATAGGGAAATG -CCCGAAGCACCTCGACACGGTGAGCACATCATTCAGCTTGCCATTGCGAGACACAAAACC -TCCTGCTGCACGGATGCGTGCGCGCTCGTTAGGTTCTGCGGGGTCATGGTTTTGCGTCAA -AGAAACCATTGAACCGTCTGACCGGATCAGAATAGCCTGGGCATCACCAACATTGGCGGC -GTACAAGTCCATGTTGTTCAGATACAAAACAGTAGCAACACCGCCAGACTGAATGTCGTC -CTGGTTGAGCAGTTTGGCCGCAGTTGATCCTCGATGGTACTGTCGGACCTCACGGTCATC -AATGGATCTGTAAGCAGAGCCTGCCATATTCTTGTTCAATGCCAAGAAGGACCGTCGGAG -TGCATCTAATGGTGTTTCTCCCTGATCTCGCTGCAATTTCTTAAGCTCAAACGAGAAAGT -AGAAGTAAAATTCTCGTGCAAGTATTTTGCGACGCGAGAACCTCCACTGGATAGTGTCTG -CCCATCAAACATGCCCACAAGTGTTTCCACGTTATCCTGCTTCAACCGCGGAACAATCAT -ATCAATGATAGATAGATGCTCGGTCTTGCCAAGAGTGTCGGCCATACCGTAGGCGAGCGT -GCCAGCCAAGGAGGCAGATGTTCTCACACGGCGATCTTCAGTCTCCTCTGGAATGGTGGG -GATGGTGAGTGTGACATCCATCAGTCCCAAAACATGGAGATGAGTCAGGGAGTTGAAGTC -AGTCAGGTCAGCGCCATTGGCCGCCGGGGGACCCAGCGACGCAATATTTGGCTTGATTTC -GAGACGCTTGTTGCCCGAAAAATTCAGATACTTGAGATTACGGTTCCAGTTCCAGTTCCA -ATCATAGGGCCAATTTGACACGTTGTACTTGAGCCCATTGCTCCCCACATCTAGAATAGA -AAGCTTGCTAACCTTGCAAAGCTCTGCAGGCAAGACTTGGAACCGATTGGCGTTGATATG -AAGAACTTTGAGGTTGCTTCCCTCTTCCAGGTCATCGGACGGGAGAGAGGTCAGCTCGTT -ACCAGACAGGTACAGCTCGGAAATCAATGGCCACCTTTTGAGAATTCCTTGCGGTAGTTC -TGTTAAGTCATTGTAAGAAAGATTGACGATACGTAGCTCTGGTAGCAAGGATAGTTCCCG -GAAAACATCATCTTCAAGTCTGTTGTCGGCAAGTGAAAGAGTTCGAAGTGACGCTGCAAA -CGTCATTGCCATCTGCTGAGAGAAGTTTGAGTCCTTCCGGGTCCCTGAGTTCGGGTCGGT -AAATGATCGCGACGTCGCTGAAACCTTCCTACCAGGTTGGCCAGTTGAAGGATTCCGATA -CCCGTTGGGCGAGACAGAATTCATGATCCCATTCGACGTTTGACTAGGTCGACGTAGACC -TGGCTCATCTAGCGGGCCTATATCTTCGTAGCTGGGGTTTCCAGCAATGCCAGGTGTAGC -CACAGGTGTGCTTGTAGGCTCACCGGGCATTTGAGGGTACGGAGCACCATATTTTGGAAA -ACTCGAAAGAATGTTTGAGGAGATGTTAAGTACTTCCAGCTTAGCACAATGCCAGATTTC -AGCCGGCAGAGATGTCAAATTGCACTCCCTCAAATTAAGATGTTTAAGCTCGGCAAGGCA -ACCCATAGTTGCAGGCAAAGACGAGAGTGGGTTCTTGATTATACTGAAGTGTTCCAGTCG -ACGCAACTTGCCAATCTGAGACGAAACTGAGGACAGGTGATTTTTGTCAAGGACGAGCTT -CGTCATGTTCGGGCAATGCTCGAAGATGGTGTCCTTGAATTGGGAGAGCTTGCCGGAGGC -AAGGTTCAGTGAGGTCAATGTTGGCATAGGAGCATCAATATCAAATTGTGTCATAGGGCA -GTGGTCCAGATGTAGACTCCGCAGCCTTGGGAATGAGCCTTTGAAGCGCGATAGAAGATT -ATGACCAAAGAATATCTGCTCTAGGCGTGGAAGACTAGAGAGAGCATCAATGTTGGTGAT -AGCATTGAAGCGTCCATCAAACTCCTTGAGGTTCACTAATTTCCCGAAGGAATCATCGAG -AGGCCCGCTCAGGTTGTTATTTGTCATCCAGAGGCGTTCGAGAGTGACCAAGCGGCCAAT -ATTGGGAAGTTCTTCTATGCCATTGAAGCTGATATCGAGATCAACCAGACTCTTGAGGTT -GCAAAGGAAATCGGGGAAGATCTTGAAACTATTTGACGACATGTTCAGGCTTCGCAAGTT -TTGGAAATTGCCAAAAGAGCTGGGAAGCTTGGTCAACTGATTGTTCGCCATCTTGATGCT -GACAAGTCCAGTCAGCCGGTCGAGACCCGCATGATCCAAGTCCTCAAGGCAATTGTTTGA -CACATCCAGATAGGTCAATCGGGCGGCTAAACCAAAACTTTGTGGGAGAAATGCGGCCTC -ATTCCCGATGAACTTGATCTCTCGCAGATTAATGCAGCTTTGAATGAAGTCTTTGGGAAC -GTCCAAAGATAAATTCCTCGACAAGTTCAGTGAAATGATCTCGGACGATTTTGCATACAG -CGTAATTGGGATAGTGACAAGGCTGCGGCCCTGGAGATCCACATGGCTGAACTTTTGCAC -TTTGTTGAAACCTGGTTCACTCTCGAGACTGCTGTAACCGCTAAGCTTGGTCGGCAAGAA -GATAAAGCGACAGAGATAACTGTGATCCTCACGGCCAATATCTCCGATACGATCCTTTTC -CGTATAGCCAACTTGTTCCAATAGCCGCTTTTGCATTAGAATAGGTCGCTCATTGTGGTC -AAGCTGCCGCGAAAGATCATTTTTCCGTAGAACAATCTCATAGTTGTTGAGATGATCTTG -CAAGAAAGACTTCCGACCAAGCATCAGGAGAATCTCGGACACGGTGGCATTCAAACCAGC -CGATAACGTGGCAAAAGTAGAATCAATTCGGAAGATCCGAATAAAGTACGAGGCTTCGCG -TTCACGAGGACCGACGGGAGTGGGTTCCTCCGATGGAGGTTCTTCCAAGGTGAGGTCGAC -GGGCCCCCTGACCTGCCAACTTTCAGGTGCATTCCACGGCTTTTCGACTTTAGAATCGTC -GCCGGGTGTGATCGTTCCGTCCTTCACACGCTTCATTTGCCCAGGCGAAGGTGGACGGAC -AATCCCTTTCAGATTTTCCATATCGACATCAAGGTGGAAGAAGTGTTTGTTCTCTTCTTG -GGTTTTGTCGTCCTTGTTAGGGTTATAATAATCACCACCTTGATGGGTGAACACGCGACG -ATTACCAAGCTTTCTTCCGACCACCAGCCCCCCCTTTCCAGAGTCTTTCTTTCGGTCCGA -ATCTACTGACTCTGGGTATCGAGAAGGAGTACTGCCGTCTCCGAAGCTCCCTTGTCTATC -GCGCCGCGGCGGGGAAGAGTTCCTCGACTTGTCATGCGACTTTCCGAGAATCTTTTTGAT -AAACCCGCCAAGTCTGGGATGCTGTGAGTTTTGGTCATGGTCTCGGTATGTGCTACGCAC -ACTGGGCGTTGGACTTGTCGATCGGCCCCCGTATACAGTAGAATCGCTCATGTCAGTCGT -TGAATGCAAGTAGTTCTCATTAAAAGGCCGCAATCCATTGTCGTCTCTGCCAGTAGCAGG -TCGAGGTTGGGACCATGCCAGATCGCCAGCAACAGTCGGGTTCTCTTCTTTGCTTCGAGA -AGGACGGTGTCTGCCGGGTCCGCGCCGGCTGAACTCCCTCTGCCCCGCGGTACCATTTTC -AGAGGGCGGAAGACGCTGTCCATCGGGGCCGATGGGTACATGGCGCACTGGCGCCTCCCC -ATACTGCGGGATGTCATTGAAGCTCTGATAGTCCCATGGAGTGACGTCACTAGAGGGCAA -AGGGATATTTGCACGTGGCTTCTGGGAACGTTGCGAGGAGTTGGAGCCCTCGGACGTATT -GCGCGAGCCATCTGAGTTCGCTCTCTCACGCGCCTTCAGCTGGTCGACTGGTCCCCCACG -CCTGCTCGGCGGGTTCTGGGAACCATCGTCATCGGTTTGAAGCTCCGCAGGCGAATGATA -GTCATCACCAAGTAGACCTTTCAAATGCTTCTTTCGAAACTTTTGGCTAATACTAGATCT -AGACCCCTGGCTACTGACTGTGGTTGCGCTTGCAGCAGATGGTCGGCGATGTTCCCCATA -TTCCAACTCGGCGGGTTCGCTTGCGAATGTTCGTCCGGTCGTGGGGCGAAATGTGAATGA -AGCAGGCGAGGCTTCGTGCTCCGAGTAATCATTATAAAAGGTCCCACTAGTATCTGGCTG -ACCACTGCTGGGGGCTGTGGACCACGGGGCAATCGACTCAATGTCATCGGGGTCGAGGCG -AGATGGGTGAAGGGGGAGAGGTCGAGGACTCGAGACGGATGACTGGGACGGACTATGCCT -GTTGCTTGGAACAGTCAAAGCCTCGCGGTGATTTCCAAAACCAGCGGCTTTACGTTGTTC -AGTAGGGGAAACACCCGGGGAATTCCACGACCACATGGAATTTTCCCAGTCAATCCGAGG -GGGTTTGGAGTCGGAGCTAGCAAAATGAAATATCAGCGTCTCTGCAACCTTGAGATTTTA -TTCGACGCGCCGTGGTCGCATTGAGAGTCAGTGTCATATTTAAAGAAACGGAAGCTTACG -TAGAGTCCATTTTCAGGGAACCCGGAAACAAGCCAGCATTGTGACCTCCCGTAGAGGCCT -CGTATTGGTCTAGTGGCTGATTCTCTGAATTTCGCACGGTATCTTGCGACCTCCAGCTGT -ATCCTGAGCTCTCCGAACCCTGACGATCGTCGGGTTGGTTGCGATCCCGCGCCGGCATGT -TCTCTCAGACTCGGCCGAATGCCACCACGGCAAGCCTGGTCAAGTTGCAGCCAGCTGTAT -CTTGGGGTGTAGCAATAGTTCAGAACAGCTTAGCATTAAAAGTACCGATGATTATCGGAT -GGTGTGGGCGACCGGCGGAGCTGCAACGTATGTCGATTCTGCTCTCGGGTGCTAGCGCCA -CTTTGAGCAGGCCAACCCGGCGACACAATAAAGAGTCTAATATCTGGAAGAAGCTTTGAA -GAGCATTCAATATGGATTTGGGAAGCAAATAAGTTGTCACCAAAAGCGCGGTATGAGTTG -GGGCTAGCGGGGGGGGCGTGATATCGTATGGGAAGTTGAGGCAGAGGGTGTGAAGGTGTC -AGGTCGAAATGGGGCGTATCAAGATGGGGCGTTATTTCATGAGGTTTTAAAAAGGGATCA -CGGTTAAGGGTAACCGGGGCGCATACTGGCAAGGCGGACGGGGTGATTATTTGAGGTCCA -GGGATTAATATTGGGGGGGGGGCAAAGAGGAAGAGTGGGGATGACTCGTGACAAAGGCGG -ATCAATGTAATGACGATCGAGAAGAAAGCCAAGAATCTCTATCATGGAAAAAAAAGAGAA -AGAAACTGAAGTTGAAAGTCAGACTGTACAAAAAAAAAGAGAGAAGAAACCCATGTCGGA -TATCGGGTACTTGGTCGGAGTCATGAGGCTCATTAGCGCTACACTTTTGCCCGACACTTG -GACACACCTGGGTACATTTTCACCCCGGTGAAACTTGTATAGATTACTTGGCTACTGCAT -TGGTAATTGAAACTCCCATTTCTTTCTATTTTTCACTCTTATTTATGTATTTCTTCGCTT -GATATCTTTGATCTTGGCGTATAGTAGTATTATCACTGGAAATGGTCCGTGCCGTCCGTG -CTTAACAGAAGCCCCGTCTGTGTTATACTTCATTATCTACAGTCCAATTTTTGCCTCTCA -TAGTAACATATAAATCATCTACGACTTTCTTACATTGCATAGCCTTCTAGCATAGTGGAT -TGATTGGTCAACCTGGCCCCAGCGATTTCACCGAGGGGTCGAGGCTATGACCAAGAAAGA -ATGCCGGCACCAGGGCCACTCGTAAGATATATATTTTTATGTGCGTATATATTTTTTCGA -AAATTCGTTGGAAAGTGACAGCTCGGATCGCCAAGAAATGTATTGCTACCGTTGCACAGC -CCATCTCTCCCTTGCGATTCATTTAGCTCGTGATGTATCTACTGCAGCCAAATACCGTGG -GCTGAACCATGGCAGTTTTTAATCATCAGTGGGCCGTCATGGATTTTGACTGTAGTAATG -GTGACGGGACTCTGTAAATCTGCCGCCTCGGGAGATCGGCTCCTGGAGCACAGGGATAGT -TAGTCACCCTGCCGCCTTAAGCTACTCACTGAGCAACCCAGAGCAACCCTGATCGCTCAC -CCCAATGGTCAGGCCGACCGGCCCATTCGATCTCATTCTACCTCTTCGTTCTCAGCTGAT -CAACCGGTCTGTCAACTTGTATGAGATGTTAGATATCCCATCATCCACCCTTTCCTTCAC -AATGCAGGCTCCATCCCGCCGTCCATCCAATAGACTCAACCACAAATGCCCTCATTGCCT -TCGAATCTATATGTCCGGGGCTGGCTCGATCCCAGGAACATCAAACACTGCCTCCATTGG -GAAAATATTCTCAGGTTCCCTGTCATGGGCCTTGACTGCATTGATACGCCGTGCCACGAA -AAGCAGAGACATTAACTAACCATCGGTCCGAGTATACTAGATCCGGAAAAGAAGTCCCAT -ATGCAGTCTATATTATGTATAATACTAGTGGTCCTGGAAACACTTCCTCCAGTCGATTCA -CTACGAGATAGAACGGGTTTCAGTTGGCTGCAGCAAAGGGTCAAGAAGCTGCGTTGATAA -CAAATGTAGTCTCATTGCCATGCTGCATTAGTTATTGAGCCGAAAACTTTGCCAACCTTC -TTACATTTCGCTTCATAGTGCAACGGGGGTGAAGAGACAATGACGCCTGGAATATAAAAA -GCCCAAGGATCACTTTCGTGCGGCTTTTTCCCCTACATACTGTGAGCTCTTATTTCGCTA -TCTCCCACTTCAACTCTCCTCTAAAGATCATTGCGTTCACCATAGTGGCAGCCAACACCG -AATTCCACGCGAAGGTGCTCTTCGGTTGGTTCGGAAACTCAGACCATCGTTATAAAAGAG -CAACTGAACACCCCAAGAGAAAGCTGCGAAGTAGGCGGCCACAGCCCGAAAAACAATGTG -CCAGAAGAAGAAGGCAGAGTAATGAACTTCAGTAAGTTACATAGTCAACGATTTGCTCCG -AAGCGTTTGGATTGATACGTCATCCCCGGCATCGGAAACTGACCGGCTCTCTCGACTTAT -CTCGCCTGACGCCTATCCCGATGATGCACCAGCCGCTGATAACTACCCCAATTACCCCGA -GGAGGTCGCGCTTTATCCTTCTTCCCGAGTCAGTAGTACGGCATCTGTGACTGGCCTAGA -TAGACCTGGACCAATGAGCCTTCAATGTTTTTGAATATTATCACATGCAATTTACCCCAA -GTCGAAGGAAAAGTGCCTTGGGGGACCCAAGTATCCCTTCCCCGAAACGACGCCGGTCTA -CTCAGGCTCGGGAATGCGATCAGGTATCTGAAACGCTTGTACAGCCTAAGTACTGTCAAG -GCCAAGTTCTCTGATGATCTCGCCGATGCCGATATTCTCAAAGAAAGTCTTGCGGTAATC -TAACTTGCAACTAAAACATTGCAGCACGCAGATTGTATCATTGCAACAAGCACCCTTTCA -ATAGCGAATCCTGTTCGACAAGCTACGACCGTAGAAGCACGGCAGTCTACCACGCGCGAA -AAGTGGACTCCTGAGGGTGATCGTCACTTGCCCGACCTGAGAGACAGTCAAAAACGCCCA -TGGGGCCGTATTCATAATACATTTCCTGGAAGAACTCTGAAAGCAGTCAAACGACGTCAT -TTCGGGTTCTCAAAGCAATCCAAGAGCCCCTCAAGCAAAGAAAATCATTCGAGCTTCCCC -GATCGACGTGAAAGCTGCACGCAGAATGTCTGAAGAAGCCAAGGGCTGGGTCGGTAGTAT -CAATTCAAGAATACAAGGAGCAATGTCGCGCGCCATCGATGGTACACCTCGCGAGCTGTG -AGACATTCCGGTGTGTCAAATGCAGTAATGATTGCATCAATCCCCAGCTACGAAGCATTA -GCAAATTCTAAGATCTGTTCGTTGAGGCATGCGAGTCTAAGCTCTTCGGTTACTTCGGGA -CGTATAAGAGTCGTCCATGTGTTGGAGAATCTGTTTTATATTTCAAAATACTATCCTCGT -CTCAATCACGGAAAGAATAATTCAAGAATCTGCCTGTACACTGCTATGGGAGTATTTATG -GAGTTGACGTTGACTACCATGGCAATGTTCCAAAATGATATCACGCTCAAGTACAAATTT -ATGTACACAAAGCAGGACCTGAGATTTATGATTGAGTTAGGCGAGGTTGGATATTTGAAA -ATTGGTCAAACTGGAGATATTCAGATGGCTGGGTCATTCTCGCTGGGGCAATTTGAGGTT -GCGTTTGATACGGCGGCTGAAATGTTTGGACCGAGTATCCATATACTGATATTCTTTCAG -CTTCTACCAGTTATATTTGATTTAAAATTGACATGAACACTTGTATAGTAATTGCAGATA -TATATTCCTCATTTAAAGTTTATATAATTAGTAGAGGGGTCAGAATACAGCGACTACAAG -ATCCTAGTCGAGCCCTCTAAATGGGCTATCGAATGAGCTGTGGGTTGGGTTGAGCTTCGA -TGGTTGTGCCCATAGCCCACTGCTGGTTGGGCCATAGGCCCTTGTGGGCTTTGTGGGGCG -CTAATATAGACTGTCATCGCTCCCTAGGTGTGTACCCCAAGGGGGCAATCAGTTGATGGC -CGAGGAGTTACATGAAACGAAGCAGTAATCACAAACTTTCGTTACTGAGAAATTTTACAT -GGAAGAAAACATGAGTATTGTGATAGCGTAGTGACTCGGCTAATTAAGCCAGGGCTAATG -ATTCTAAGTGCACGCGTACCCTCGTCGGTTGCCGAAATACTGGACCGACCACCTTCGGAT -TTCGGTAAACAAGGCACCGACGTACTTTTCACCTACCAGTGTCTTCGCCTGAAATCTCCT -TAGAGGCGCAGGCTCCGAGACTCTGACACATACATTATTCTCCCTTTCTCTGCCGCAGAC -TACCGTAGACTGCCTTAGGCTTCGGGCTTCTTATATCCTTAACCTAACATGGCTCCCCTT -ATCATCTCCCAATGCAGCATCGCTGATGGCACCGCCCTAGCTGCCAACAGCATTCCCGCC -TTCTGGGCTGACCCGCACTGGGTCCTAGCCTGGCGCCATCGGACCCTCGAGTACCACATT -TCGCAGATAGCCCTACGCTTCCCGCGTAACCTGTTGAACAATAGGGAGGCCTTACGACAT -CAAAAAGCAGTGGATCCAGAGACAGGCCGTATACTCGGCTACGCTCGATGGCGCCTGCCA -CTGTCTTATGAGATTAACCTAGATGATGGCACGCCGACATGGCCCGAGGCCCAGGTCCCC -GCGGTCGAACCCGAGAAGGAGGCCGAGATTCGACGGATTGCCGAGACCGTTGTCTGGGAT -CCTAATGACGATGCTGATGAGCTATTGGATCGCATAAACGCACTCGAGAAGGAAATGACG -CCAAAGACACCTCACATCAGTAAGTTGTGCAATAACGTGCGGCTCTAGCGAAATATCTCC -TGGTCTAACGGGTCGGGTTAACAGGCCTAGAGTACCTTGCTGTGCATCCGGACAATCAGC -GTAAAGGAGTCGGCACTGCATTGGTGAAGAGCGGGATGGATCAGGCGGACAAGATGGGGT -TGGAAATTTTCGTCCACGCTTTAAGAGAGGGAGCTGAACTATATAAACGAATCGGTTTTC -GACTCATAGCAGAGCTCGTCCAGGATGATTCTGCATATGGTGGCAGTGGGGTGTGCGGTG -CATATTTTTTTATATACGAGCCTAGATCGAGGACTGATTCTGCAAGTACTTAGCTCGAGC -AAGTAAAGTCAAGTTGCTATGTATCTATCTAGTTATCATGCTCTCACGCGATAGACAGTA -CCTGATACTAATCAGTTTCTGAGCTGGCTGTTGCTCGAGGTACCGTTGATCATGAGCTAT -TCGAAGGATACAATGCTTAGGCTCTTTCCGTAACGATACAATTCATATGATCTTTAGGAA -AGATAGAAGCTTAGACCACATATGAAAAGTAGGGAGCCGGACCCACCAGCTCATGAAAGA -AGTAGAAACCAGCGGCCAAATAATGGAGCTGAATAAGGTTTCACAGAATCATAATCCGAG -GAAGAGATCGACCAGCTTCGGGAATATAGGCGTTCTGGGACACTGTCAATCTCACATTGC -CCAGCACGCAATGCTTGGGGCTTTGGAACGTACCCGAGACCACTGACTGGATCCCATCCT -TTCGCTGCCAGGAAACCGGTCGAGTTGCAAGCAGTATTATTGCCATCCGTAATGTCAGTA -AAGACCTCTGGGTGAGCATACTAATCAAGTATCAATCAGTTTTTGTTAGTGCAAAAGGTC -ACTCTCATATTGCTGATGATTCCCCAAAACGAAGTAGACTTACTAGGACAAAACTCCTTT -TCCATGGAGTACAGGAACCAGAAAGCCCGTTTGGTGCGTTGACGGTCTGCCATGGTCTCG -CATAAGTTATCTACTGCACTGCCTCTGCCCAATCGCATCATTTGCGTGTATTGAATGCTG -CTAGAAAGGAGACCTGGCGTTTCATCGGCACCGGTCAGGACCGCAAAGTAGAACTTTATC -ATTGAGGTAAGACAGAGCAAGAAGGATGGAAATTTTCAATTTGAACAGTACCATAACAGC -GAGAGCCTATGGCAAAATTTAACTTAGACTTGGAGTGAAGCGGTTTGTAGGCAGAGGGTG -AGACTGACTTGCAAGTTTCGGACAGAGAAATTCCTCTCCAGAATTTGCGTTCTCAGCTTT -AGAGCCTCAGAAAACTGAGCTCTGGGATCGTATCCGAGATGCCCAATCTTGTCGTGACGG -CCTTGAGCAAGAAGAATATGAGACCCGAGCGCGAGAATGATATGGGAAAGGGCACGCTCG -GGCGGTGGAAATGGCGTCTGCTCGACCATCTCGGGCACCAAATGCACAAGCGAGGATCGG -TCCAGGAAGGTCGAGACACTGCGAGAATTTGCACGTCAGTAGCGGTTGGACGAGTTCGAG -GTTAGTTAAACTTACCCTTGCGGATCCTTCAAGCAAGCTTTCATTTGTTCTATTGGCCGT -CATGTTCTTCTCACCCGCGCCTGAAAAGAGCCAGATCTTACCATCGATACAATCAAGTGC -AAAACCCTTGTCGAGACGAAACATCGCCTCTCTTCTGAGTGTTGTTAGTGGCGGATAGAT -GTTGATGCTCTCTTCGTTGATCAATTTCCCAAATGCACGAGCAATTTCCTCCCTCTGGGG -GCTGTGCTGTCGCTTGCAGTGTTCTTGGATATCCCGAAGCAGAGCACCCTTGATCAAATC -GTTCTGATGAATTTTCTCTGAGAAGAATGTCAGGCCTTATCAAGCTCATGGAATAGCTTG -GTATTTTGCGTTACCGGCACAGATGGGATCTCCAAAGCGGCTGTCTCGACCACCTAGGGG -GAAGACGTCAACAATGCCCGATGGTCGCCTTTGGAGGCTCGAAGCCTTGCTTGAATCTGA -TTTGTCAGTTACGTGAGGGAAATGGCTGCAGGTACCAAAGTCAAAATGCTCATCGATCAA -GTCAAATTCCGGAAAAAAGGCCCTCTGTGATGAAATGAAAGAATAGTCAAGAGAATCTGT -GGGATAGTCGCTCGTTCTTTCTGGACGCTCTGTTTCTGCGGACGCCGTGGTGATATCATC -TTGAAGATCGTCTCCCTGGCCTGGTGGACGATTTGTGTCGATTTCATCCCCAAAAAGAAA -TGGGCTGATGTTCTGTGGCGTCGTTGGCTCTGTCAAATCGGGCATGAAGTTACGGTCGCC -AAAGTCCTGCAGCTGCTCCAACGAAACGGGCGTCGGAGCGCCGGTTTCTTTCACAGAGGT -TCGTCTCTGGCGGGTTGAGCGAGGGTATCGGCATTCAAAGCCAAGTCGCTGGGATCTCCG -TCAACCTTGCTGATCCATATCAAGTGTATAATGCCAGGGGACCGGTAAGTATTCACCTGG -CATTGACTGCATGCCGTTTTGCCATCACCTAACCAAGTCCTAAATCAGTAAAACCCTTGT -GATATCATCATAAAAAAAAGGCTTAAGTAGCTTTTGAGGGAGGACAGACAGCGCACTTTG -CGGGCCTGGCAGCGTCGACAAGCTCCCCGCCGTCGCGTGATTGGCGTCGCTCGTGCATGG -TCCTTATTGGCAGCGTTTGGGGTTCGCAAGTCAACGATTGAGCTCATCGCTAATCCTTTT -TCTCTCATGCGAAGGACGTGCGAGAGTAGCTAGAAATCCAGTGTCCATCAAATGCTATGA -ATAACAGGCAGACGGAAAAAAAGAGTGTTGGCTTAGCCATCAGTCCACCGCTTTGCCTAA -GCGCTATCAAGAAAGGCATGTCATGTTCCCAACCTGGCTGTTACTCCTCATTCACAACCG -CTAATTGGCTGGTAGAAGAAGATGATTCTCTCATACATGCACCGAATGGTGTGCTTATGT -CAAGGTATGAAATTCCGAGCTAGAGCCTAGAGGAGGCGCATACTACCTTACTATCTTAAC -TTGAGTATGGGAAAGGACAGTGGGGCACTTAAGCATTACACACTGCATCAAAGGGAGCTA -ACCCACAAGTGCCAAAACTAACCTAATATAGTAATTTGCAGGGTCTGTTACATTTGTTCT -ATATCGAATGGATTATCAGAGGCTTTGATTGTGAAGACTGACCAGATTGTATCTCACCAG -GTGAAACTATAGATGGGCTTTGATGATCCTCGATTGTGGAAAAAAGAAGAAAAAAGAGAT -GAACTATGCCTGCTGAGGAGCGTGGCTGAAAGTACTACCTAGGTAGAACCCCGCGTCCGA -AGTTGTCTTATCCTTTACCGACGGTGATAGTCAAACACCCATCTTAGGGATTCAGGGGCT -AGTGTATTGCTTGCTAGTATTGGTCGAAGGTGAGGTGTTTACTTCCGTGAGATTTTCCGC -GGGCGTTCTCCTGCCTTCAAGCTGGATCTACCTACGTACGGTGGAGTGAATACCCCGCCG -CCCGTGCTGGCGGACCAGTTTGACATTGCTGTCAATGACGCCCTAATGGCGAAAGCGTTG -TGCTCTGGTTAGAGGATTCGTCTTGTTCTTTGTGACAACTGAATGGTTCTACCGGGGATG -ATAACGCAAACGACAGGCATCGATTTTATTAGGTTAACTTTGTTTTGAATACAAGCTTAG -TGGGAGGGGCAATCCCGGGACGCCACGCCACGTAGGCTCTCAATCGACCGGTTAATGGGC -CAACCCGAGATCGATGATTTATCATTTCCTAGTAGTTTCGGACCCCCCAGGTCTTTGGCC -TGAAATTATTTCAGCCTTCTTGCAAGGGCAACAGTGTGGTTATCACCGTTCCATTCCGGG -CCCTTGGGCGAGACCGGAGTCTGTTGAGTAGATTGCCTTGGGAATTGGAGCCACAATAGT -GGCTGCACAGGCTCCCCTATTGCCTTCTCACATTGTGGGTCAAAGCCCAGATTACAGTTT -GGCCTTATAACTGATCTGTGCTCCTGATGACCACACTAGCTGCATTCTGCTGTAGATCCT -CACACCATACTATCGTGGGCCATGGCTTGCAACAAATTCCAGGATAAGGGAAAAGCTACA -GCGGACCGTCCCCTCCTAAGTTGATTGAATAAAACCTACTTCCAATATTAGTTCAGTGAT -ATTCTGGATACTTCTCGGAGTTCTCACTGACAGCCTGCAGGGTCGACTCGGCACTAGAGC -TTGTATGATTATACCAAAGTGGATATCGCAGGTTTTCGACCAATGTGTCGGGCATGACTC -CAGAGATGTGGTCACGTAGGGAATCCTTGATTCCTGACAATTTCATTTAGATTGAACAAT -AAGTGGGTCGAGCAATCCACTGCCCAGGTGGAAACAGCTAGTTTAGGGGCTCCGCAAATC -TAGAATGGAGTGTATAGATGTACATCTTCAAAGACCACGGGTTGAAGGCATAAAAAAACA -GGTCAAGTAGCCCTACATCTAGCTCATTGGGATCATGCTATAGCCACTCGGTACGTACAC -AAGTCGCGGCGAAATTTGCGTTGACTTGAATGTTGGTACGGACCGTAAGGGCCGCCTTGA -CCATAGCCGCTTGGCACAGTCAGTAGCACAAAATGGGTCATGGGGGACAACAAATTCACC -AAGTGACGCTCACAGCAAGGAGTTTCTGTCGCTCGAAGACGCTGAATACATGACCCTCCA -CCCCGACAATCCGGTTCTGACAGGACCTGATCGCAATAAGTTGCAAACGAAAGGTGTATG -TCACGTAGAGTTTTGGTGTATTGGTGCCATTTCGCCACCTGTATCCCTCTCTTTTTGCCA -TCTCAAATTCATTAGTGTTCACCATGTATTGATGTCTCAGTCTGGTCTAACGGATGACCG -ATTCTCCAAGTATGTAAGCAGTGCCACCTACATCGCCCCTCCCACAATATCAGTAGGTAG -GAGGTAAAGTCTAGGCCACTTTCTGGGTAGCCCCGGTCTACCCGCGGCCCACAGGGTCGA -TGCCTGATCGTATAGGGACTTAGGTGCGACATTTTAACAAAAATATTTGTATATCCATTT -TGTACGGAGTACTCCGTGCACAATTCCTCATTTCAATTATTAAGCTTACTACTTCACACG -TATGAAAAAGTCGGAATATTTTTCTCCCGTTGTCATGGCTCAATTCGTCGCCGGGCGGAT -CGCCCTTACATCGATCGCCTACATAGATCGTCGACTTTATATCGACCACGATCTAGACTA -CCTTTTGTGCTAGAGTGTCCATTGAGTGAGCATCACCGGGTTGAAAGGGCTAGCCAAGGT -TGGTTGTCAAAATCATATCATTCATAAGGCCGCTAAAGCTAATGGGGAAATACCTACCCA -ATAGTAAGCCGCCTTGTCCCAAACATTGGAAATGCGACTATTGAATAGTATTATCACATA -TTTGTTTTGGATAGAAGAGAGGCCCGAAGTATGGAGTCATCGTGTGAATTCATACGGAAT -ACCATGCAAAGCAGACGTTCAATATGAGATCACATCTCTAATAATATGTGAATGAAGTAT -GCGGGAGGCTGGGGAGTGGGTCCTAATATACGTGTCTCCTGCCACCTATTAATTTGCTTG -GGCCTTGCCGCCTTTTGGTTTTTTGAGGTACAATACGGAGTGTACGGAGTAGACTATTTT -GAAGTATTACCTACATTATTTCGGACATGGACTTTTTGATCGATGCCTAATCGAATGTGT -TGTTACTGTTGTGTGACGCCGGAACAGCAGTTGTACATGCACACGATACCCAAAAACACA -TTGACATCTGAGCTGCACACCCGTAGAATGTACGGCGAACTACTCCGTACATGGAATCCG -TCATGGAATCCAATATGTCGACTCGAACGTGGCGTAGCTCTCTTTAGGTCCTGTGGACAC -AATCGAGGTCGGGTCTGAATGACCAATGACAGGATAAGCGGGAGACCCACAGATCGACCA -AGCAAAACGGAGGAGGCTGGCACCTTCCCTGATTGTTTAAGACGGTTTGTGTACTCCGTA -CTCCGTACCCCATGAAAATCTCCTTTCCTATTCTTTCCTCTCTTCCAACCCCTTTCGTAT -CTTCATTATTGCATAATCGTTGTGAGATTACTTATAGCTTACTCTACTTGGCCCACCATG -CGATTCTTTCTTGGGGCACTAGGCCTGCTGTCTTCCACGCTAGTCGTGGCCCAAACTTAT -ACCGATTGCAATCCGACAGAAAAATGTAAGGCCCATAAGCACCGTTGACGACTGGTCACT -AACAGCTAGCAGCATGTCCCGCTGATCCTGCATTTGGCCAGTCTGACAAGACTTTCGATT -TCACAAGTGGCGCATCTGATGCTTTCAAGTCAACTGGAGCCGTTACATTTGACCAAACAA -ATGGTGCAACCTTTACTATTGCTAAGCAGGGTGATGGCCCGCTGATTCAGTCTGGCTGGT -ACATCATGTTCGGCCGCGTAGAATGCACTATCAAGGCAGCACCAGGGACGGGAATCGTCA -GCAGTGCAGTTCTGCAGTCAGATGATTTGGATGAGATTGACTGGGAGTGGCTGGGCGGAA -ACAACGCCCAAGTACAGACCAATTACTTCGGCAAAGGAGATACAAGTTCATTTAGCCGTG -GCGCCTACCACGAAAATCCTGGAAACCATGACGACTTCCATACTTACTCCATCGACTGGA -CCAGCAGCCAGATTGTGTGGGCGGTTGATGGCAAGACTGTTCGTGTTCTCACTCCCGAGA -CGGCAGAAACCAACCAGTACCCACAAAGTCCGATGATGATCAAGGTCGGCGTGTGGGCTG -GCGGTGACGCGAACAATGCCAAGGGCACTATTGGTGAGTAGGATTGCCACAAAATTCACT -CTTGTCGTGCTTGCTAACAGCCTTTGTTTGTGTAGACTGGGCTGGCGGTCAAACGGACTA -TAGCCAAGGTCCTTTCAAGATGTATATGAAGTCGATGACTGTAACTGATTACTCCACCGG -AACTTCGTACAGTTATGGTGATAGAACTGGCTCATGGCAGTCAATCGTCGCAGAAGGAGG -CAAAGTCAATGGCAACAAAGGTGCCGAGCCCACCTCGACCGAGTCCGCACCTGCAATCAC -TGCAACCATCGATAGCGTCCCGGTTCCCTGGAGTGGAACCCACAAAGAAACCTCAAGCTG -GGTGACGCCAAATGTCTGGCCCTGGGTGGCATCCGGCTCTCCTACGGCCTTGTCGACAGG -ATACCAGTATGACTGGGAGTCTATTTCTTCCCAGAACAAGCCACCAGGTGGAGGCTCAAT -GAGTGAGCAATCCCCCGAATCCTTCAATTAGATTCGTCTTTTAATATCGGATCTTTTCCT -TCACAGTCTACCTTCCATTCTATATCAGCACTGTTGCATTTTTCTTTGGCTGCCTTTTCC -CGTTTTGGCGTTGAGATTTGACGAGGAATCTCGACTCGATTGATTCGCGGAATCATATCT -TCGACTGAACCAACCACTACTATTACTTCTACCAGTACGCTCAATTGAGAGGCTACAGCG -ACGTCTTTTGATCCTCATATTGCTCCATCAATGAGCAGTGCGAAATACCCAATCTCTAGG -ATGGCAAGTGTCTAGTGTGCTCTCTTTGGTCGTGTGATTGCATTGCATTGAACGCATCTG -CGTTTGTATCATTATGACTTTATCGGACCCGGCGTTGACTTATTTTTCTTGGACATGACA -CACGAGCGATGACTTTAGCGGCTCACAGAACAGCCGCAGCATTTGGGCGCTTAGGGGCAG -CTTACATTCTTGTTTACACTTTACTTTTATATTCATAGATGCCAAAACGGGAATATCCAA -TTGATGGAACACCCGTACCCTACATACATAACGAGTTTTCATCCATAGCTCATTTAACTA -TCGATAGCCTTGTCATGAACCCAAATTCCTATTCTCTGCATACGCATCGCATCTCTCTTC -TCGACTGCGTGCATCAAAACAGTATTAACAACTTCACTTTGAGTTTTGAAAACTGGTACG -AAATGTAGGGGTTGAAAATATAATGGGGTGCAGGTTACGTGCGGCTGAGGTTGCTGTACC -AAACTAGCAATGTCCTTAGTTTGGTACACTATCTTGACAAAATTGTGAGGGCGAGGGATT -AAAGTAAAACAGACACCATGCGATAGACAATTGTGAGCACTCCAAACATGAAGGCATAAG -CCAGGGCATCCAGATGATCCAGTATAGAAAAAAAAAAGAGTTCTATCTACAGTGGGGAGA -AAGATGGTACACACATTGCTCAAAAGGGAGTAGGAAGACACTGTGATCCATGGATCTTCT -ACAGAGCCATCTACAGCCATCTACAGGCACTCTGAAGAGTGCTTGCTTTTGAAATTTCAT -CTTTCACGTCCTCCTTTTCAAACAACTGACTTGATATCATCGATCTAATACTACCACCTC -CTAACATGAGTGACGAAGGCGTTATCTAAGACGGAAACGGACTAGGTATAAGACCACTGA -TGATCGAAGCCCTCGACGAGCATTTTCCTGAAAAGCCCGGTCATGACAAAACCGCCGACT -TCGTTCCTGGTGCCTGGCAGAGCCACCGTGTGGCATCAACACCCGCACGATGATAATACC -TTGGTTTGCTGACCATAGAATCTGCTTCTGTCATACCGGACACTTAATTGTAGCCAAAAC -ACCGGCTTTCCCTTAGATTATATAGTCAAGACCGCTGGCTACTGATTGCAGCGTAGCAGC -TTACGAATAATATAGAGATTACCTATAACTCCTTGTGGGTCGTTCAACTGGGTGTTCTAG -AAGTGACGATATGCATATACAAGCGCATGCATCTCCTTTGCTTACATTGATCAATTGTAA -TTAAATGTGGTACATAGCAGGTCATCTACGGGTTCCATGTAAGGGCGTAGAGCTTAATAG -TTTCTTCAAGTCAAGGCCCGCTAGTATGAGATGATCCATGAGCTATCATCTTATGTATTC -TCATAAGAAGAAAATTGTAGAGCAAATTTTATCGACACAATTTGCGCATAGTGAATGAAT -CTTTGACAATTCCCGGCGGCTTGGAAACTTGCACAAGGCAACTGACGCCTTTAGTGACCT -CATACTTGCTCACAGACCGTAATATGAGCTTCATACAAAAATTTCAATTGATTTGACGAC -GATCAATTACATTGTTCCAAATAGTTCAGCCAATGACGCAGGCCAGGAATCCAGTACAAG -GCTCCAACCGCATCAGCACATTCTTCAAGAGCATCTAGCTGATTCCGGGTCCGTCGCTCA -AGATCAAAGAGCCCTTGGTGTTTTAAAACGTCGCACTGGGAGAGGAGAATTGTACGTGTT -GTGCGTCCGCTGGCTAAGTTGAATTGCCGAAGATTACTCATCGCTTCTGTCCAGTAGCGA -AAAGCCTCCCCGAATTGTGACTGAACGTGAGATAGCTTAGCTTGGAGAATTGCCAGGCGT -AATTTATCCATCTTCATGAGCTTTAGTCGGCATTGGGACACGCGGCAGAGCGTTCTCTCT -GCCTCTCCAAGTTTCTCCTGTGCAAAAAGACATTCTGCAAGAGCAATGATTAATATACCC -TGCTTTGAAGCTTGACGGTCGATCGCAGCACGGAGACAGCTCTCGGCTTTGGCAGGGTCG -TCCAGCTCGAGATACGTGTCAGCCAAGCTACATGCCAGCTCACTTGTATCTTCAATATAA -TTAAGACCCTTGGGGGTATTCATAATGTTCTGGGATCTCTCCAGGTATGACAACGATCCT -ACAAAGTAGCCTCGGTAGCGTAAAATCTTGCCTATGATCAATTGCTGACGAAAGAGAACA -GTTTGTTCCATCGCAGAATTTGCCCAGGAAGTCGGCTCCCATGCCCTTAACACTTCCATT -GCTTCGGTAAGTTGATCAAGCTGGATGTGATTTAGGGCATGTTGGACCACGGTATGCCCA -TACGCCGCATGTAGCTTTCTGTTGCCTTTATAAGAGGCACCGTCCGCTAGGGCAATGGAA -GCGAGATCTATCTCACCGGAGAGCCGGTGGAGGAGGCATTGTCTCTGCGCGAGGCATAAA -CGCATGTATCCGTCGTCTAGGCCAACCGTGAGATCTTCTGCATGTTTGATCGCGGACTGC -TTCCACTCCATATGAGAGAAGCGAGCAGATTCAATGACGTTGAGAGCTATATCAATCCTA -GCCATGGGAGTTATGTCCTCCACTTTACAATAGCGTTGAGCTCGTTCTAGGGTGTATCGA -ACGTGCGATAGTAATAGGTTTGCGTCAAGTTCTCTGTAGGTGTGTTAGTCGCTAGAAGCT -CAATCGGGACGGTAGAATTACGTTTGCTCCAGATACTTCCAGGGGATTGAACGGGACGCT -AGGATGAGAGCTTGTAGACACCAAAAGGAATGAGTACTGCCAGGAAGCATATTTAGCATC -CTATCTCTTGCTCTTTCGCTCAGCTTGAAACGCGAGTTGGATACCCAGTCAATAGCCGAG -GATGACTGAAGTTTGGACAATACTTTCGTCAACGTGTCAGGGGAACATATGCGTATCAAA -TCATCAGAAAGGCCCAATTCACCCGGATCTTCTTCGACTATGTCTCCATCTTTAGTCCAG -CGCTTCCGAGGGGAAGCTCCGCGGTAAAGGAGATCTACCGGGAGTTCGTAGTTTGCGAGA -TAGGACAGAAGTATCATCAAACGTGCAGAGTCAGCAGACCCGGTAGCCTTCAGATGTGGT -ATCACTTCAAGTACCATGGCTATGGATCTAGTATCGTCGCTTAGAACTGCCTCGTTCCTT -CCTTCACAACGGCTGAAACCTAGCACCGTGTGCTTTGGAGGGTTTACGAAGCTAAGATGC -ACTGTAAATGTGTCAGCATCATAGAATACTTGGAGCTATACACCTTACATTTCACTTCCT -TGATGAAGTTGTCAGCCAACTTGCTTAGCTCTTTCGCTTTACGCAATATATTCGACTTTT -TTAGGAGTTTGTCAAGAGCAGTCCGCCATTGACTCTCAGCATAGCGTTCCCAGCTAACTA -GTGGGTTAGCCAAAATTGTGCGTACCCATTTCAATACTTACATATATCCCTTTTCCAGAA -CAAGGAGGATTCCAGTAAGCGGCTCGTCTGATTTCTGTGAAATCCAGGATACAATAGCAT -GCCATCGTCTGCCGATCCTACGAGCGTTGTATATCATATTTTCGGAGGATTCCACATGTC -CTAGCATAGCTCTCGCTTGATGCACTAAAGGATTGATGGACTTGTTTTTCATGTGTAGTT -CAGCTAACACGTACCGAACAAGCCGCTTCCACTTTTCGGTATGAGTACAGCATGTCTGTG -TAATAGTGGAAAAAACCATCCTCGCGCTAATCTGAACACCCAGCTCCAGCTTAGCATAGG -AATGGTCAGGGCTGGCCTCTACCTTGGACGACATTGTGTGGAAGGTTAGCCTAGGCTCTA -GCGGGGTTGGCGGATTGACATGCAATCGATCCATGGTTGTTTTGAAATTTTGAGTTTGCA -CTAAACCCGAAATGGCCCAATTTTGTACGACGTACAAAATCTAACCCAAATTGGGTCTAG -ACTGATTTACAACGTTACACTAAACCAAGGATCCGTCTATACGCTGGAACAGCACAGGTC -AAACCAGGAGAGGTATGCTGAGCTGTAGTAGAGACCAAAGGCAGGGTCTACCCTTCATTT -TGTACAGAAAAAATAATTGTCTCCCTTATCTACACTGCCAACATCCAATGCGCCATGTCG -AACCGCCCTATAGAGGACATTATCTCCTGCCTGGCAGTAAACACAAAGAAAAGCCTCCCA -AATGCCGATATCGATCGTCAGTTTTTGATAGAGCTGGTAATTGTTACCAATAGTTGGGAG -ATACGTGTTCCCACAGCCCAAGGTGATGTTCCTCTTCGACACACGGAAGAAGGACAGAGG -GTTAGGGTGTATATCGCGGATGACAAGTTGAGGCATTTAGGCGAAGAAGAGATGAAACCA -GTATTGCAAGCATCTTGTGTGTTGCAGGCGATCTGGGAGAACCCTCAAATCGGTCGTTCC -TGGATTGTCCAGTAAGTGTGCCCATCAGTCAAACATTGTTTCCATAATAACAGAAAATAG -TTGCGAGGGAGTGCCAAAATATCTGTTAGTGTCTCACTTCTGATCGGTAAGGCACGTCTA -ATATATCGGAAGATTAAATCTGTGCGCAGCTCTCAACAAAGACACTCGAGAACTCCACAC -TTGTTCTTGTCCCTGGGATGAAGTTTCGGCCATCGCTCGTCAGCAACTTGAGACTCTTAA -GGGAGAGCTCAACATCAACTGTAACGATCTCGAACTATATAGAAAACGGGAAATGTCTGA -TGTCCCGGGAGGAAGTGATGTTGCCCACATCCTTGATAATTACCAACACCAACTACAATT -GCTGGAAATAGCTGGAAAGAAGAGATTGGAAGCAGCAAAGCACACAGGCTATACAGTAGA -GTGACCGGATGTCTGGAGACTGCTGCTTTGGTACTCTTCGTATTGTATATAGTGGTAATA -ATACTGGTTATGATAGCGGCATTGATCTTTCTTCGGCTAAATACTGGATATGATAGAGAA -CCAGGCCCACGTATTTCCAGCTTACCCTATTCTATGGCTAGGAGGAGGACAAGGAGTGGG -TGAGGTGGATGGCCTAGAGGGAGTGCAAAACATTGCGATGCTCAATCGTTGTACGCCAAA -TTCATTGTGGCCTCCCGGGCCTGGACCACGGACTCTATGCACGCAAGCCCGAACTTGTTC -ACGTCCAAGTAAACGCTGGAGTCTAGTGCCTGCAAAGACAACTACCTCAGGTGTTCCATC -ACCAACAGTAGGAACGTGCATTAGATACCACCGACCTAACCCCTTCGCTGTTTTGTGTCT -CTGTAGTTTTAAGATTCGTAATCTCCAGGCCGTCGTTGTGATTATGAGACCGGAATCCAC -TAGTTAATGTTCCGCTATCTTTATGAGGGGGGCTAAAATTGCTAGGAAGGCTTCCAGAAT -CAAAATACTAAAGTTTCTATTTCTTTAAGAAGGGGAGAATCACCACGATACACCATAGGG -GGAAAAGACAAGGACGGTTTACCTCGTAAAGAGATTTGACTGAACGATATAAGTTGCAAT -TTTTCGGCTCGGCGTCATTTCGGAGGAAAATAGATACTCTCTTTAGGTATTTCAATCCGG -CCAATAACACGTTCCGAAGCAAATGGCGAGTATCCTATTATGTGTGTCTTAGCTGCCATC -GGTTGCCGGATAAAACGCCGTGCCTAGGGAGTGAGGTCAGTGATGAATACTATTCCTGGA -TAGTGGGTTTAGCCTGCAATTTCAATGACATCGTCGGGACTGTGTTGCTTGATTTCAAAG -GTGCTGTTAGGTGGAAAGCTCCAAACCGACGAGGGGCAAGTTTCAGGTTTGAGACCGGAG -TGGATGATATAGTGGTAGTTTGGCAGACATTCTGATGCACACATCCGTATGTACGTGAGG -CACTTGACGACGAAGCAAGCGAAAGCTCCAAGCACCTTAAGTCTCAAAAGGTTTGTAGGG -GTTGCAGGGGTTGTGGGGTAGATCATAGGTCGGTAGTGGCCTATTGAACTCTCAGTAGGC -CGCGTAGCCCCGTGCTCAATTTTTTCTGCCTAGATACCACGGTTTGGTTTTAGATCCTAT -ACTTATTATATTCCGTGACCCGGTATTGGGTATATACCTATAGGTAGTATCCGGTATATA -GTATCTAGGGGTATATATATTTATATTCTCGGCCCTTACGTACTAGTAAATATAGAGTAG -GATACTAATGGGTCTATACCATGATAGAAATATATATCTATCTATCAAACTCCTATCTGT -AATATTTATCTCACAAATCTATCATTTTACCACAATTTAACTACCTCTTTGTCTTCTACC -TAATGATAGACTTTCAATCTACCTAGTGGCGCTACCGGTATAGCAAAATTAATGGTCTTA -TGTAGGATATTAATTCTATTCCTCCCCCGCGTGGTTTTGAAAGCTGAAATTCCAATCGCT -CTATTGGCACCTGATACCAGTAGACTCTTTGTGGATAAACCCTTGAAAGGTATGGGTCGA -TCAAAGAATGCATGGTACTTACTTTTTATGTTTGACCTGAACCTATAAAAGAAATGCGAA -CAACGGGGCTATGCTACATATCGTGCACCTTTGCAAAACTCAAGTCACGTAAATTGAAGA -AGCATTCCAACTGACCTAACACATGATTCGAGTGTAAGGCTATCAGCGAACGAGTGGGTT -TTTATTTTCATTTTCACGCCTGCTATGGCCTTACAAAAATGGTTGGTTATTTTCAAATAT -GGCGTCCGCTATGATGTTGTAAGGGATATGGAAGGGAATTAGGAGGCCAAGCGAGTTGCC -GATCGAGAGCTTTGTTGTCGCAAAACTTGTTATAGTGCGTCATTATCATGCACAGTCATC -GATTGCATATATAACCTGATCATGATGAATATATCGAGAAAGGTCGTCAGATTGGGGTTT -AGCGTTTCGTGTTAACAACCTCTTAGACTATCACAGGTAAGGCATTCAGTCTGGGAGATG -CAACTCCAGCCTTAATCAACACAATGACAAATGACTAGAGATGACTGTGCAACAATTTTA -ATATCGATAGAGCGATAAATCCAGGTATAGGAGCGTTCAAGTTTCATAATCCTGCTGGTG -TTTTCACATTCATCTAGGCCAGTCATATTTATTGAAAAATTATGTAGGTTTTTTTCCTCG -GCATTGATTGGACATGATCCTCACATTTACCACAGCCTCTCCAGCTTTTAACGGTTGCCC -CATCCACCTTTCATCTACTCTCATAGTCCTTTGCCCGTCTGCAAACTCAATATACTCTAT -TATCATCTCTGACACAAAATGCAAAGACAGATCGTTGGAACGCTCAACGTCCGCGCTCTT -GATCACATTAGACAAGATATACTCGAGCATCGAAACGACGAAGAATACGACGTCTTGTAC -TTGACCACAGTACCTGATTCCAGGCACAATTTTCGTGTGCGTATCGAGGATTTTCAAAGT -CTCGATTTCGATAACTCCGATCAGTGGTCTCAACGTCTCTTATACGCACTCTACTACTTC -GACCAGTCTCACCCTCGTGGAGTGCCATTTATCGTCAATCAAGTCTGTAAACCAGCATGT -ACTCGAGCGGATCATTCCAAAAAGCCTCCGTTGGCTTCCTCCTAGGTGCCGCGCTAGTGC -AAGGAGCGTATATTCCAAGCATCGACCAGCGTAACAATACCAGTGCCTTGAACGCTTGAG -CTGGCAACGAAGATCGGATTGCAGTCTATCAAAATGCTCAATGTACTGGCGAGGCCATTC -CTTACTCTGGTGATGACGGGTATTGCCACGCTGGTCTCGGTGGCGGCGGTGTCGGATTGC -AGCTCTTGGCTCTAGATGCTGACGGCCAAATTGCTTTCTTTGACGAGCCTGATTGTAACG -GTAATATGGTGGCCGCCTTCAATCCAGATCAGGACCAAACCGGAGCGGATTGCAAGCCTC -TGCACGGAAACCCTGTTTCCTTCCGATTCGAAAACGCTTGAGACACTCACGCCACTCCCT -TGCCTTGATTGGTCTAGAAACAAATCCAGACAGATATGGTATCACGAATTTTGTTTGAGA -TGATTGAACCAGTTTGAATGCAGTCTAAAATGAATAAATAAATCGGAAATGAAGTTTACG -ACGAAGCAATTGTATGCTCCAGCTAATGTTATCACCAAATTGATGTTAAGGTAACCTACC -TACGAAAAAGAGCCCGAGATTAAAAGTCTGTCTTACAAGCTAGAAAGTATATCCGTTAGG -GAACTCTCGCAGGTAAATTTATTACGTCGTATCATATTGAGATCCCGTAAGCCCATTTTC -ACTATAAGACTTAGTGGTAGAAAGAGTTACGTGCGGAGCTATACGGGCACCCGTAGCCTC -TAACGGCCCATAGCACCTATACACCCCTATACCTATAGGTATATAGCTAATAGACTACTA -CTTAATAATAGCTTATACGATCCTTTCTGAATCATATCTAAAAGTAGACCTCGACAAGTC -CCATAGGACTAAGAAAGTTTTGGTCCCATATTCTGGTCCATTGGTCTCAATAGGACAAAG -GAAGATGGTGTTTATTAGATGTGATTGGCTTATTCGTCTCCACTTGTTCTGATGTCCGTG -CATTGCCGAATGCCCCGCCACCCCATCCCATGTCTACCAGTGAACCACATGAGAGAACGA -GGATAGTAAGTTCAAGAATGCCTCAGTTTCGGATCGGCCGAATTATTACCCGGTGAGCAC -CTTCCGCCAAACTAGAACCCCTCGCCTTTCTGCTCCCCAGGATATTCTTCGCAACGGCAC -TTGGATCTCCCAATTTTTGCGATCTACCTTATTTCCAACCGAAAATGACGAGGATCATCA -CATGGCTCGTGACAGCTTCTGGAACTTTCGATCAACCACGCATTGCATTGGACTTCCTGT -GCTTCCTTTTAGAAAGACGAACCTAAGACCAATAGTTAACTAGAGAACGCTGGATGCCTA -AGCTTTCTGGCACTCGTAATGACCTAGCCGAGAACCAATCACACATTGGCTCCTCTGACC -TATTGTTACTCCTCCATTTCTGTTTTCAAGCGGTACTTTTGGTGCGTGACTTCCCTGTCA -ATCTGACAATCGCTCCTTGAAACGAGACTTCCTCAGGAGTGGGGTACAACTATCTACAGT -ACTAGTCATAGTGACCATATTATGCGGCTCTCTCGGTCTTTTTGTTATAAAGGTGGGATT -AGGTGGCACTGACCTGTCACGTTAAAGCAGGGTATATAACTAAGCGGTTTGCAAACCCTG -CAAACCACTTGGGATCGCACAATTCCCGTGAGCCCGCGCACGTGAGCCAGACATAGGTCA -TTTCTTGATTATTCAACATCCCGACTCTTCCAAGGTTCTGACGGCCAGATGATATAATCG -TATCGCCTCCACGTTTGTCTTAGTCTTGGAGATCTACATGCTACACATTTTCCATACAAG -CTGAGACATATTCCCATATACTAATTCCGGACAATGGGCGAAGAAATTTCTAAGCCGAGT -GGTCGCAAACGTGGTCGTCCGCGAACTGTGACTGATGATCAAGAAGTGCCAGAGGTGGGT -CAGATTCCAACGCTCCAAGTGCTCGTTTTTATACTCATTAATTTTCAGAGACGTCGCAAA -CAACTTCGCCTCGCACAGCAGGCATATCGCAAACGAAAGGAGACCACGATCGGTAATCTG -CAGAACCGTGTGAACGAATTAGAAACCGGCATCGAGAACATCAGCCACTCTTTCCTGTCC -TTCAGTAATCTTCTTATAGAAGATCAAATCCTTTCGCAATATCCGCATATCGCATCAGCC -CTTCAAAACATAACCCAGCAATGCGTATCACTTGCAAAGGCTGGGAGCGATGAAACGACT -GAGGAGGCCCTCCCCCTCGCTCGGGCCGCAAAAGAGTCCCGAGTCACAAATAATAATACA -GCCCAGACACCAGTTCTGGATAACACATGTTCAGAGGCTTCAACAGATGTCGAGGACATA -TTACAACCAGCTGCCACAAGATGGCCCAGCCCTCCTACCCCACCATATCAAGATCAACCC -ATCCTGCAATTCGACCTCGTCACGTCGTCGCCTATTGTTCAATTTCCTTATATCACCCCC -CTGTTATCGGACTCCTCAACACTGGACCTATTTCCAAGTAATATCACGCCTGACAGGCAA -TGGAATGTCGCTCAGCGTCTAGTCCGGACTTGCTGTCAGAACGGATACCGCCTTCTTGTC -AACAGTCCGGATAATTTCGCAGTGCACCGTATCTTTGGGTCTGTTCTGAGCCTGTCAGAA -CGCAATAGTATGATCTCAGGCTTTTACGCTATTCTTCAGGACAAGACCGGTGACTTGATA -GATCGCAAGACTAATGTGTTTCACTCCCTGCGATCAAGTATGAACATGCTATCGGGTGAA -CAACTCCAGGTCTCGTCTAGAACGTGGCAGATTGCACTCGAATCAGCCTCTGGGGAATGG -TTGGATGCCAGTGGAGTGCAAAAATATCTGCGCGACAAGAGAGTGATCGTTGATAGTTTT -CGTGATTCCTCTGGCCATCTTGATTATTCGGTCTCGTCATCACTTGACATAGCGACTTTC -ATAAAATGTGAGTTCTTGAAAATTGAATAGAAGAAACCCACTAACACCACTGTAGTCCTC -TCAAAGGAAGCCATATGTGTTGGGAATGGTCCAGGATTCCGACGGCAGAGCGTTGAAAAA -GCTCTTCGTCTGGCAACGATAAGTTTGCCTTGGGAATTCGAAAATCTTTGCGATTTATAG -TACACATCCCTACCTGTAATTATATAATGCACATGTACACAGTGCATGTGACCTGGCAAT -TGAAAACAATTGACAATTGATCGTACTCCATTTGGTTATACTAGCAGCGACTGCTCCAAG -TTTCCAAAACAATGTTCCCACCTATCCACCCAATTTCCCCTAGTGTGATATCTCATCTTA -TATGTTACAATTGCATTGGGATGAACTCACGTAGTCTCATTATGTCTGTGATCCCCAAGA -TCTACTGTACAAGCATGCCAACGCATGGAATACAACCCAGCATTATAATCTCATGCATAT -TCCACTCATCCCCCCCCCCTTCATCTCAATCTTCTGACGTGCAATCGATATTATACTCTA -CGCCCTTGGGTCGGCATGTGGATCTCCGTGGAGCGACGCAACAGTCTATGAATCGTCATT -GGCTCGGCATGACTCACATATGTCTTCTTGAAGGGATCAGACCTCTCGGTGTTTTGTGAA -GAAATGATCAAGATACCATAGAGCCAACTTGAGTCATTGGGTTTCCGTTTTGTGCTTAAA -TTGACATCCTCCCGCTGGAGTTGAGGTGTTCCGAACTTATCCTTTTCAAGGTTTCTTGCA -TGCCTCTTAACATCATGGCCGGTCAATCGACTCCCTTTGGCAAGCCGATGCTAAAGCATT -GGATGTTCGATCCTGCCTATAAGAACCTTAATCATGGTTCATTTGGCGCCCATCCGATTC -CTGTCAAAGACGCACAACGTGCCTTTATGGATATCGCCGATTTGCGGCCAGATCCATATA -TCCGCCGATACCATGCAGAGTATCTAGACGAAGCTCGTGGAGCTGTCGCAAAGGTCCTTA -ACGCTCAACAGGATGAGTGCGTGTTTGTCAAGAATGCGACTACGGGCGTGGCGACAGTAC -TATATAACTTGAACCTCAAACCGGGCGAAGCACTCATTTATTTTGAACCTGTCTACGGTG -CTGTCGAGAAGGGTGTTGTCTCGCTACAAGAGCATAACCCTTTCCATTCCCGAAAGGTAT -CTTTCCAGTTCCCCATCACCGAAGACGAGCTTGAACGCCGGTTCCGCGAGGTGATTCGTC -AAGCTCGTGACGAGGGACTCGAGGTCCGGGTCAGTGTCTTCGATGCCATCGTCAGTAATC -CGGGTGTGCGGTTTCCCTTTGAACGCATTACTGCAATTTGTCGCGAAGAGGGTATTTTGA -GCGTCATCGATGCCGCGCACGGCATTGGAAATATCCACCTCGATATGGAGAAGCTACAGC -CTGACTTCTTCGTATCCAATTGTCACAAGTATGAACTATCCCCCATTATCCTGACCTTTC -CAGCAAATTAATAAACCATCGAATGTTGACTTGGCTTTATCAAGATGGCTCTACACACCC -CGCAGCTGTGCAGTCCTCTACACACCCCGACGTAACCAACATCTACTCCGCACTACCATG -CCAACATCTTGGGGATTCATCCCAGCGCCGGACTCCCCCGAAACTATGGCCTCTGTCCTT -GATGACCCCAACGCTCCCGTCACAAAGACTGCCTTCGAACGTCTTTTTGAGTTTGTCGCC -ACCAGTGATGACTCTGCCTACATCTGCATCCCAGCAGCGCTGAAATTCCGCACCGAAGTA -TGCGGTGGCGAGGATGCTATTATTGCCTACACCCACCGTGTTGCAAATGAAGGTGCAGAT -GCTGTTGCTGCCGCTCTCGGCACTGATGTCATGCAAGAACCTGGCCTGAAGCCTGGCCAG -GAGAGCCGTATGCGCCAGTGTGCCATGACTACGGTCCGGCTGCCTATTGCTGTGGCTCCT -GCCGGTAAAGAGGCTGAAGCTTTTGACAGTGCTCCTTTGGTTGTCTTGTCTGAGGAAGAA -ACCCCGAAGGCATTTTCTTGGATTCAGACGCAGCTGTCGGACAACCATAACACTTTCCTG -CCAGTTTTCCGCCATGGGCCTTGGCTATGGACTCGTTTGAGCGGCCAGACTTACTTGGAG -ACAAGTGATTTCGAAGCTATTGGAATTGTCTTACGCGATCTTTGTGAACGGGTTGCAAGG -AAGGAGTTCAAGGCTTGATCTGATCACTTCAATCTAGCGCTGATTAAGTAGTGATAATTG -CAGCCAATTATTATATTTGTACCAGGTTCATATGAGGGCCAGAAATACTTTGTATGTCAT -CTCCAAGGCTGCCCAGTGAGGGCCCGTAGGGTCCAATTCAGCACTGATATTTTTAGGCCT -TGTGCTTAGTGGCTTTCCTCTCATCTTCACGCGTCGTGAGATCTAAAAATCGACCAAAGA -AAATTCTAGGCATCTTTCGAAGGAGAACAATTAGATATTTGCTTTCCAGGCGCCAAGTGC -GTAGTGTTGCTCAACAAGCACCTTGGTATTGGCCGACTATTGGCAAAATATGTTGCCTTT -CTTCCCACATATCACACCAGAAGCGGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNG -GGTGCATTTCTTTATATACCATTGTCAAAGCCCAAGGTCCTCAAAATTAATCTACTCAAA -GCATACTAAAATCATCATGTCTGTCGCAAAGGAATTTGAAGCAGCCAATGCCAACTACGT -CGCATCCTTTACTAAGGGCAACCTGCAGCTACCCCCACAGCGGTTCGTTGATAGCCCTCA -TTCTAGCTCCAAATTTTACCACTAATATCCTGCAAATAACACATCTGACTATATTCCTTC -AATTTCAGAAAGGTAGCTGTGGTAGCCTGCATGGACGCACGCCTTGGTACATATTCTCCG -CAGTACAGCCGACACAATGATACTCATACCTACCATGTACTTTATAGATCCTGCCCGCGT -CCTCGGTCTTGAAGAAGGCGATGCTCATGTAATCCGCAATGCCGGCGGTAGAGTCTCCGA -TGCACTACGCAGCATCATCATCTCGCAGCAGCTGCTCGGCACCCGCGAGATCGTTATCGT -GCACCATGTAAAAAACCCTTGCAGACCATTGAGACTAAGCCTCGACCTTGGCCCATCTCA -CCAACTTTTGATTCAATTGCTGATAATATCCAGACAAACTGTGGAATGCTCACTTTCACC -GACGAGGGAATCAGAGACAAGATCCGTGCAGATCTACACCAGAATGTCGACCACATTGCC -TTTCTGCCCTTCGGTGACCTCGAGCAGAGTGTTCGGGATGATATCAAACTAGTAAAGGAT -AGTCCTCTTGTCTTGGATGTTCCAGTCACAGGATATGTGTATGAAGTTGAGACCGGGAAG -ATTATTCGGGTTGAAGAGAGTTAGTAATTGTCACTATCGACACTACCGACATCATCAATA -TTGCAGTATGGTGAGCCTCAATGCCGAGGTATATACGATATAGTAATAATTATAGGATTG -TGACCAAATTTGTTTTGCTACTCGAACCTATATGGATGTATCCACGGATGAGATAAATTT -TGGGTCATGTACGATTTGGCAGGTATTTAACATACCACCACCCTACTGGTGGATTGAaga -gagagagagagagagagagagagagagtagagaACAAAGATGTAACGCCCTCGGATGCTC -CGACGAAAAAAAGAACTCGCAATGGCCAATAAATCAGATCCCACACGAAAAATATACATA -GGTGGTTATACTCGAACGGACCCCTATCAGCAGTCACAGCCTTTGCAGGCCATGCCAATG -CTAAACAGGTGCTTACCGTCTTCTGGCTCCCTTGATCACCAAACTACCCACCACACCCGC -GACAAGAGCCAAGCCGCCCACGATTGCGACACCCACCAGGCCCTCCTTGGCAACTTTATC -ATCTATGAGAGACCCAAGGCTGGCAGCCTGAAGGTTTGCGGGTTCCTTCTCCAGGAGCAA -ATCGTTGTATCGGCGGGCTTCGCCATAGTTCCCTAGCTTGAAATTGCCGAGAGCCAGATA -ATAGAGACACTCACGGCGCCGCTCCGGAGCTCCGCGGAAGATTTCTGAAAGCAAGCGAAC -ACCTTCTTGCTGCTCGGAACGGATGTTGGATTTGATCAGACCCTATAGTCAATACCACCC -CCGCATAGGCGGGGAGTACGGATTAGCTCGGGCAGATTCGATTAAGAATGGGAAAGCCAT -ACCCATGCGTAGTTGAACTTTGTTTGAATCCCGACATAGTCGCCTTCTTTCTCATATTGC -GCTCTCAAAACTTGTAGCTCGGCGGGTTTCAGAGGACTGTGTGGATGATAGTCAAGTTAG -TTTTTAGGGTTCCGATGTTTCTTTCTCTGATTTTTTTCGCGAAAAACAAACCTTTCTGCA -TCGGCGGCATCTGGAGGGGTACAGCAAGTCAGATTCCAAGTATCAGGCATCAGTCGGAAC -AAACCACTTACATGGAAGGTTGGACATGGCGAGATAGGGAGTTGGTGGTCTTAAGACGGA -GTAAGATTGATCAGAACAAAAGTTTTAACGATTCGATAGTAGTTCGTCGATAGTAAAAGT -CTTAGTTGCGTTGAGCGAGAAGATAAAACCGAGGACGGAGAAATCACAAGCACAACGTCA -CTTTGCGGAGGAGCAACATATGTGGCGGTGCTGTGGCTGTTGCTGCCGCAACAAACCCAA -AACAACCGACTTTCCACCTCGACATCGTTAACCACCGACACCATGTTGCTCGACGAGGAC -CCTGGAACTGTATGTGCTACTCTCATTGGTAAATATTTTAAGCTCATGGATTTCACTAAC -GATTTCAGCTCATCCACCACACGATCGGCAACTTCAACATCCACCCCGATAAACAAGCCG -TTACACGCATCAATGACTCGCTCGCAACACTCCAGCAATCGCGCGAGCTGCGCATTCGCG -AAGCCGAGTCGGCTATGCGGAAACTCTCACGCCATTTGAATTCCCTATCTACACAACATG -AAGAAGCCGTGGCCGTGCATGACTCAGGCAAACACGCCGCGGAGATTGTGGAGTTGGACA -CTAAAAAGTTTCGCATCGCCAAGGCTGCATCAGAGCTAGAGATTGAAAGTGAGCGCCTGG -AGAGCGAGCTGGAGATGCTCAAGGAAAGACTGGTCGACCTAGAGTCCCAGGGTCTGGAGG -GGGACGAAACAACTCGGCGCGAGCGTGAGGCTGACGATGCGACAATGTATGCAGCATGCC -TGTGTCCCTGTATTGGATTGAGGCTCTTGCTAACATCTATCTAGTTTGCGGCTCAAGATT -TTCCGTTCTCTCGGTGTTGATATCGAGCCCGATGAGGCGGGCAATTTCAATCGAGCGGTC -ATTCGCAATAGCCGCAAGGGCGATGTTCATGTCGTCAATATCGACCCAAAATTTTCCCGC -TTCTTCTACGCAAACTACTTTTGGTCAACGATGCAAGGATGATGATGACGAAAAGTGTTT -TTCTGGTTGTCTGTGCTCCCTGGCGTTTTATTTTTGGAGGTTGGAGGCCGTGTTTTTATC -GCTGTTGGTGTTTCTATACCCTTGGATTCTTCTCTGTCATAACTCGTACAATTCATTGTT -CATAAATTGCAGCCATATGGTTGCATGGGCATATCGTCTTTTTCATGCAGTAGAGTGTTC -ATCCGGATGCTGGCTGCTGTTGTAGCCGCTCTTGTAAAGCTTGAATGACTTTCTTCCGGT -CTGCGTCTGATATTGTCCTATCAAGCAGTTGAGCAGAGACATCATGCGGCAGGACTCTTT -CTACAGCATCCAGAAGCTCCCCCTTCTGGATTTCCGACAAGTTTATACCCCAGGAGGCGA -TCCATTGCTCGGTGAACTGGATAAGCTTTTGCTCAACCTGATCCATATCGGGGATATCAC -ATCCATCCCAGGTCAATAACATTTCCTTTGTAACCGAGCCAGACTCGGTGTCTTCTGTGG -TTCCATATCGTGTTTCCACGCCAAGAGGGCCAATTCCAAGCGCTTCTGCACGGGCATTCC -AAAGTGCCTGCTTCAAGGTTCCGTCCCGCTCTATCATCATCCAGAACAATGGATATGTGC -TACGAGCTAGCGTGTCACGAACACCTTTGGTTGCCTCACGTGGGCTCACCAGCACGCCGA -CCGTTTCGCCCGTGCGCCACCCAACAGGTGCCTGGCGGAAGGTTCCTTCTAGTTCACGGA -CAACATTAGGGCCCACTTTGGCCTTCAGTGATTTGCATTGTACCAATACACGGAGTGCGC -GCTCTCGCTCCCGCTCCGGGAGGTGCCAAGTGCCAACAAGATCGATGCCAGCGTCATCGC -GACCGCCGATCCGGTTGAGGCTTAGTGCGTAACGACGTAGGGTTCGGAGAACAGTATACT -CGTAGTGTGTACCGACATGTACCGAGCTCGTCTGTGGAAGAGAGATATGCTCCGCGTAGG -AGAGAAATGTCGTCAGGTCATTGTGGTGTTGTGAGGGCGGAGAAGGGGCCGAGGGGAGTT -TGAAGAGTCGGCGCGTGAACGGTGTCAGCTCATGATGAACCTGGGAGCTGGAAAAGCAGC -GGTAGATATTTGGTTTGAGTCTCAAGTGAGAGTATCCGGCAGCAAGGCCATGGCGCAGTC -GCAATGGGCGCATTGAGTGATGTGAGGAAGAAAAGGCCGTCTCACCGCATTAAAGCTTCT -CTCCGCATTTGCTCCGCTTCTTTCGCTCGACTGGCATTCTACTTTTCTTTCTTAGAACAC -TTAAAGCTCCCTTCCATTTTTTTCGGGTCGCTATTATGGCTGACATTCTCACGCAGCTCC -AGACTTGTCTGGATCAGGTAAATCTGCTCTGCAAATCCTACATCTAAACCCTTTCTCACC -ATACGGACAAAATAGCTCGCAACACAATTCTACGCAACACTTGGCTATCTCACAACATAC -CACGATAATGCTCCCACAACACCGCCTAATGTCCCCAACGCAGTACCGGCCCTAGCCAAG -ATCACTAAGAACTCGACAACACCACCGGTCCCGGCAGCCATCGCAAACAAAATGGGGGGT -GCAGCCGCCGTTGCAGGAAATGCATCACCCCCGCATGCGCCTCCTCAACAACCTGGAGCT -GCGCCAGGAGTAGCGCCAGAAGGTGGAGATCCCAACCTTCCTCCTGCGCCGGATTCACCC -AGAACATTTGCGAGCCGGCAGCGGGAGCTTGCGCGCGACCTCATTATCAAGGAACAGCAG -ATCGAGTACCTGATCTCTGTGCTTCCTGGGATTGGCGCCTCTGAAGCCGAGCAAGAAGCC -AGAATCCGGGAACTGGAGACAGAGCTCAGAGAAGTCGAGAAGGAGCGCGCTGCGAAAGTC -CGGGACTTGAAGAAGTTGAGGACTCGGCTTGAGGATGTCCTGGGCGCTGTGGCTGTGGGC -ATCTATGGAGATGGGTATCTTCAAAAGTGATTCTGCATGTTCTCAGTGGGATACGATCTA -CCTGCGCTTTGGGAGTTATAAGTTATACGAAACAGATAGGACTGGCGCGACTTCTCTTTT -TTTTTTGGCTACATTCTCGGCGTTATGTGATGGAAGGACTTTTACCGATTGCACAGGCAT -ATGCATAAATCTCACAAATACCAAATGATCACACCATCCTACATGATTTCTTTCAATCCT -GCAATGGAGTCTTTCTTGCAGATCGCTAATATCATATTATCCCGACACCCAGAAATTAGA -TTTCTTTGTGTGGAAATATCAAGATCAAATGTAGCTCACCACCTATCCAGGTATCCAAGG -CTTTTGTCCATCGCGAGATCCTTCTAAGATTAAAGATGTAGGGCTTTGATAGTCCGAATG -TCCAGTTCCAAGCTTGGCCCGAGACCATTTGAGGTATATATAATTCTGATAACTTCTCTC -CTATTACGTGAGTGTAGCCAAAAGTGAAGATCATGAGCACTCATGAGAGGCGCAATTGTT -ATAAAATGAATTTCCCATATGTACCGTTATTAGTGCTTATACGCCATAATATGCCATCTT -CCTAGAAACATAAGGACCTAGGCCAAAATTCAAGCTAAAACGTAAAAGCTACTGGGCAGG -CATCGTTCACCAGTTAGTGCTAGATCTCTGCGAAAACATCATGTACGCGTAGACTTTCCG -AGGTTAATGTGCGCAATGTTAGAGCCATAAGTACACCGAACAAGCTGAAGTCACGAGCGA -AAAGATTGTTCCGTCGTTAGAAAAAAAGTATGATATAGAGGTCATACCAGATGACTTGGA -AAGGTAGATCTCGGAACGTTGGAAGTTTGAAAAGTCTTCTTCCTATTGAGTGACTGAGAT -CTCACCGTACAAAGACTGACAACTGCTCTCTTCATGAATTATGACTGGCTCCTATACTCT -CTTTCTGAAACTATGTTAGAAGTGTGCAAAGTGTACTGCACACAAGTATCTGTTTATAAT -GAATGGTACATGCATAAAAGAAAAGAACTTCTATGTTAAAGCAAGCAAAGGCGTGTATGA -ACGTGCTTGGAGACTCCTATAAAAGGAAGCATAAGAATAAAGCGCCCGGTATATGATCAT -AAGGAGAGTCACAGATGTAATAATAGATACTAACGCCTTCTCCTTTTCCGGGGTATCATT -GAATAGAAGCCGGCGAGCCACCGGCATTGGGGCAGAAAATAAAAATCAGAAATGAAAATC -GCAGGGGTAGATAGGAAACTGAAAAGGGCATCATGCTCAGTTAGGGTGAGAAAAAAAAGA -TCTATTGGAAAAGACTGCGGTATTCACGCATGCGAGCCTCCTGGGTCTCTCGCTTGTCCC -GCTTCGCGCGGTTCAAGCTCGTGTCTGCCATCTCTTGCTTCTTCTTGGCGAGATCCATTA -TCTTCTCCTCAATGCTGCCCTTCATAACAAATTGAAAGGTTTGCACGGGGCGAGTTTGGC -CGAGACGATGTACACGGTCAATCGCCTGAGCAACCGCAGCTGGGTTATACTGGGGCTCCA -TGATAAAGACACGCGAGGCGGAGGTTAGATTCAAGGCTACACCACCAGCACCAATAGTTG -CCAAAAGGACTTTGATATTGTCGTCTTTGGCAAATTCCTCGAGTGCTTTTGTGCGTGCTG -CAAGTGTCATGGTACCATCAAGTCGGACATAGCCAGGAAGCTCATTGTTTTGCAAGGCGA -TCTCAATGAGATCCAGGTGCGAGGTCCATCCGGAGAACACAACACTCTTGATAGGGGACT -CGCCTTCAAGTTTCTTGCTATCTTCAAGACTTTGCTTGAGATAGTTGACCAGTGCAATGG -TTTTGGTGTGGGGCCCTTCGTATTCACCAAGATTCTTGCCCAGCTTCCGGGTTTGCCGAT -CACTTTCCTGTTGGGCCTCGTATTCTTCAAAGCCAGCAGGCGTGATAGTCGAGTATTTCA -TGGCAATCCAGCCTTCGCACCTGGGGCAACGGGTCTCTGTCTCAAGATGCGAATCGAAGG -CTTGCTTCCAGCCAGAGAAGCAACTGGGGCAGAAAACATCAAGACATGGTAGGACAACAG -CCATTGGAGAATCGCGGGCAACAGCTCCTACTGAATTCAGAGGTTCATCGAGTTCTCGAT -TGCAATTGCCACACCGAGGGACTGAGGCTTGCTGCATCAGGCTGAAAATTTCATATGCCT -TCTTGTCTATGGCTGCCGGGCTTTTGTCCGGTTCGCCCTCCTCCAAGTCAATTGCGTCGT -GAACAGAGAACCCCTTTGCTCTCTCCCGGTCTGCGACGTCAAGAAGCTCTTTTCCGTGTG -CACTGACTTGGCGGAGAATCATCATCGCCTTCAAAACATGGTGATACATACGTCCACCTA -TCTTGCGTTTGTCCTCGCCAGCAATGACACTCATCATCACATTTGATTCCGTGCGGAAGA -AGTCATGCAATTGCCGTTCTTGCTCAGTGAACTCTAGTGTGACGATATTGTCTTCCCGGG -GTGGCAGATTGATTTTGTCCTTGACTCTGCGCAAGGTGAATGAATCCACCAGAACTCGCA -ACCTGGCAAGTACATTGGGATCACCATTTTTGACCGGATTAGTTACATGTTGGGAAAATT -GGCTTCGTTGGTCGTAAGGCGCAATCCGTAGAAACCTAGTCACAGACAGGAGATCTTCCA -TGCGGTTTTGGATTGGTGTTCCAGTAACTGACCATCGACGCTCGGAGTGAAGCCCGAGGA -TCGCCTTAGTCTGTGCCGCATTCTGTTCTCTGATAATGTGAGCTTCGTCAAGCACAATCC -GGAACATGTTCATTTTTGTCAACGGGCTGAGCTTTCCACTTTTGGCGCCACGCCCCGAGA -TCTCACTGAGAATAGTGCTATATGTCGTGATAATGATATCGTACTCGCAAAGCTCATCCA -AATCGGTGATACGCGAGGGACCATGAAATATGTAGTAGCTGATAGAGCGTGGCTTGAGAT -GGTCCTTGATCTGTGTAACCCAATTGCTCACTGCACTTAACGGGACAACCAATAGTGTTG -TCCGTGTGTTGCGAATTCCTGGGTTTCGGCGAACAAGCACAGGATCAGGAGCCATCTCCG -TCCAGTTACTAGCCAGCCCAAGCGACGACGTGATGAGTGAAAGAATACTCAGAGTCTTAC -CCAGGCCCATCATGTCGGCTAACAAGCCACCGAGGGCCTCCTCGGGCTTTTGCTCTGAAA -TCATTCCAGTGATGATCTCGCGGTACTGCGTCTTGCCATTCGGTGTACGTTCCATTCGCC -ACAAGGAATTGTTGTCTTCTTCTTTCCGGCCAAATTTGCGGGGCTTTTCTTTCTCTGTCA -TGAACCACAATGCTTGTTTTTGGTGATGTAGCATGGGTGTGGAAATGTGACGAGACGGCT -CCATTGTTGGAATGTTGGCGTTGGCAAGCTGGTCGAACATCTTCATAACTGCATCATTAG -CTTCCTCAGCAGTTCGCGCTTCATATCTAATGCTTGGCCCTGGTCGAGAACGCCCAGAGT -TGTTGGCGGCAGCCTTTGCGGCAGCAAGCTGGAGCCGGCTCTCTGCATGTGGATTGAATA -CAGTAGTTCCTTTGTCCACGGAAATTGGAGTTCCTAGCCACACATTGCGGTGGGATAGGA -AGTTGCCAATAGACTCCGCCTTCTTCCTTTGACCATAAATATTGAGGGTTGCACGCCATA -TACCTGAGGTGGGATGCCAAATCTGTTCATTCGGAGAAGTTCTCCGGGTATCCAATAAGG -CTCTCACTTCGATAGCAGCTTCTTCAGAGTCCATAAGCGGACACAGTCCTTGTGCCGTTT -TAGGGTCGATCGTACCGAAAACTTTACCATGAGGGTCTGCGACGTTGATCTGGAAATTGC -CCTTCATAGGTTGGCGTTGCAGGTCTAATTTAATAGACGGCCAAGCGTGCGAGAAATCAG -TGAAAACAGAGTGCCCTGGTTTGGGGACTAAATAGGCCATGACCATTGCTCCTTCGATTT -TGCCGAAGCAGACACGCTGGGCGCTGAGGTCTGTCGAACCGGTGCATAGGACTTCATCAT -CGTCCTTGGGTGTGCTCACAAGATCTAGATCAACATTAGATCAGTTCAAACAAAACCAGG -GAAAACGGGTGAAATAAACTATCCCATACCTGCTGTCAAATCCACCACGTTGGGTGTAGT -GGAAGTGGGGGTTGGTTTCACATCCTCCTCTTTCATCCACTTGCCAAACCCACTATCTCC -AGAGATCGAGACCAGTGCTGTCGGCATGTTCGGCTTCTCTTCCGCATCATGTTCGGTTTT -CATTTTCTTCGTCGGCTGGTTCTTCCGTTTTTCAACGCCATAGATATTTTCGATCAGATT -GGGGCGGTAAGTTATGCCTTCTTCGTCACTCATCTCGCGGTGACTTCTTTTCGTGTCGCT -TTCGTCATCTTCGTCATGGGTGGTGCTGAACTGCACCATCAGACTGGGTTCTGACTTCAC -AGCCGGGGTCGAAATTCCAGATACCACACTTGGAGAAGTGCGAGAAGTGCGAGTAGTGTG -TTGTTCAAAGCGTGTAGCGGCTGCAGTGCCTGCGGGAGCCATGGGAGAGGCAGGATATGA -ACCATTCCGCTGTTTTCCTGGACCTTCCAAAAGTGAGCCTTGGAAATCCTTAGAGTCGAG -AAGCATAGATTCTGTGGAATGAGACATAAAAGGGGAAGATAAATTGTGGGAAGAAAGACT -CAAAGATGGATCGAACAGATCGAGAGCACGAGGGGGAAAAGAGTCACCGGGGCCAAGAAG -ATTTCCACGCATTACCGAAGTATCCATCTGAGGGGCAGGAGGGACACAAGATTCAGCATT -GTACATCATGTTCGAAGGCATAGACTTGGTACCCATCTCGGTGCCAAGATGGACAGGAGC -TTCAGGTGGGTCAAGAGGGTTTCTAGACATGGCCGAAGCACCTATTGTGTCCACACAAGG -GAGGGTAGAAACATGAGGAGAAGCAAGCGCAGGAGCAGAATTCATAGGTCCAGATTTATC -CGAGGGTGATTCGGAGGACATAGCGACATCATCCATGGGAGGCAAATCACTCGCTGGGGA -TGAAGAAAAGGGAGGATTACCACTGCTTTGAGCTTGTCTTTTGGAGCCCTTTGGATTCAG -AAGCATCGCAGCATGAGATGCCATGATGTCTCAATTATGGTCGAAAAATCCGGGATAAGG -TTTGGTCGGATTCAAGGTTGCGCGGTGCTGTGAAAGCAAAGTGATCAAAGTCAATAGATG -GGAGAGGCAACCAGGCTGGCTGCTAATTCCACTTGCCCTGGACAAAAATAGAGGACCGAG -TTCCTTTCGACTGGGAAGCTTTATGCCCAAGCTCAAATTTCAGAAGTATTCGTCGTGTCA -GGTGGCTTGGGGTTAGAGAGGCGATCGCAGCTTGGGATTAAATATCCACCTAACTTTTCG -TACGTATTGATTGGGTGGGCAGATCAATCAAGTTCAAAATCTTTGAGCCTGGATGTCACG -TATTAAATATTAACGTCGCAGTGAAGTGATTACGCAAATGGTCTGGCCAACGAAAGGTGC -GTCTTCCAGACGCGAGAGTCGCGAAACCTCCGGCGTCATTGAAAGAGAGAGATATGGAAG -AGAAGCGCGGGCAACAGGAGTTGCGTATTGTCACAGTCACCAACGTATATTATACAGGAC -AAACCGCGTCGCTTTCGTGGTCGCAATCCAGTCGGCGCCTTGTCGTTGCCTCGTCGAGGT -AATCGTGATTAGGTCTGTGGGTGGCTGTGTTGACCGAGGGGAGAACCCTTGATCTCGAGT -ATTCCGCCGGTCTTCCGTTTCGCGCGACTCTTTTGCTCGAGTCAGTCGTAAGCTCACTCG -TTTCGTTAGAGACTCGAAATGACGGAGTGGTTCTTCTAAAGTCGGTGCCTCGGAGAAACC -CGCCGATAAGATCTTATCAGTCCGCGCGCTTACGGTATCAGCGTGATGTTGGAACCTTAT -GAGGTTGGAATGGATGGACGTAAGTACCTAATTACTTAAATCAACTTGTATCAACTCAAA -GCAAAGTTGACTGGTCCAAAAAGAAATTTTCAGAATACAAGAGGTTAAAAGGTGTATGCA -TCTAAACGCGTGACATGGATATATTCATAGGTCCCACTGTGATTAATTGCCCTTGATCAC -TTTTTTATACAATACAATGTAGATGGTAGATCTTAGGGCCTACGTACTTTTTTTTTTGGC -TTCTATCCTCCTATGATACTTTAATTTAAGGATGTTTGCTATCTAAATGGGTGACTGTCA -CGTTTATGGATTGCTAATCTGTTTAGTCTAGGCTAAGCGTGTTTGCGATGGTTAAGATGA -TGTCTCACAAAACAATGGTATGCAAGTACTTGACGGAGGCTTACATGGGAAACTACTGCA -TCAAGAGCTGGACTTGAATCCTAACTATGAGATTGTATTGGCATGGAGCACTAGAAGCTG -AAAACTGGTTATCCCTCCCCCATCCTGAGTTAGGATCTACACATACCCTGTGACATACTA -GAGGGTCACATGCAGTAGGTAGTAGCCAACTTGGTCAACATAGTGGATCCAGTAAAATAG -ATGATGAAGGTATCATAGAATTAAGTCTCGTCAATATGCTACATGAGGCAGATCGCCCAA -TCAATGCGCCATAGCCCAAGGCAACCCCAACAGCCAGAGAATGTCTATATACAACACGGA -AATCTCCCAAATCATGAGCATGTCCAACAAAGGACTCGTGGGAAATGTCACAGAACAAAA -AAAAAGGGCATAAGTTGAAGTCTCGAACAGGACATGGGACCCGTTCCTGTTGAAGACTAA -ACAATTCCCCGAGAAAACATCGCTGCAGAAGATAGCTGTAAATTAAATGAGAAAGAAGTA -GTGCAAGAATATAAGAAAGTAGGGAAGTACGACACAATCGAACCTTAGCACCAGGTGAAA -ACGGGATTTTTGCTTTTGACGGCTTCCAGGCGCTCGACAATCACCTTGCGCTGGTTGCCT -TCCATGCCCTCTGCAACTGCGTAACCAAGGTCACCTTCGAGATCATCATTGCCAAATGTG -TTGACATGAGGAACGCGGAAGGACATAGAACCGCCATTTTTGCGATGCTTCTGCTGGCGG -GGAGGAAGTGAAGACCATCCATCTAGAGAACCCCCGGTGTTTCGGCCGCCGAAGTCATCT -ATAGTCGCTGGGCTACCGAAAGAACCATCAGCACGTTCGCTGCCAGCGTCAGGTCGGCCA -CGGGTGAGTTCGAACCGGTTTGGGTTGGGAAGGTCGCTTGGCGACGAATTCAATCCCAAT -AGGTCGTGCTCGGGGATCGGTTGAGAAGATGCGGATGCACCAATATCCATTTCTTCTTCA -TCAGGGTCATAATTCATAGATCGGTTCCTTTGTCTGTCTGTGGAAGGCGCGCGCATTGGG -ATCGCAACGGGTACATCTACCAAGTCATCCCATACACAGACTCGAGTCCGTCGAGTCAGC -TCAAGCGGGCCGTGATCTGGTTGTTTCAATGTTTCCACGACATTCCAAGCTTGACCTTTC -ATTACAGCGATTTGTCGAGCGAACATGCTCTCTTTGAAATCATCATCTTGCGCAAACACA -CCACGCAAAGCCATCTGTGTGCCGCTCCACCAGTCCGTGGAGGTCAATAGTGGAAGGAAA -TGGTCGCGAGTTTTCTGCGAGAAGGGTTGTCCGATCAAAGAAACTGGCAGGAAAAGCCAA -CCGAAGGGAAAACTGCGCCACTGCAAGTTGTTAGTGTTGGAAGGGTGTAACCGTGGAAAT -GAGAACTTACGGCGTCTGGGTGTTTCCATGGCCAGGAAAGACTGTTATCAATGGCACCAA -GCTGAACAGAAGCTGACTGTTCCGATGAGGCTAGTGGAGTTCCAGTGCGACTGATAGCCA -TCATTGTTTCATGCCGCTTGTAGGGGTGAGAGGGATTAGTGGTGCCGTTCGAATTCACGG -AAACAGGACGAGGCGGGGGCATAAGCTCGTCCTCATCATGCTTAGGCTGTGGCTCAGTGT -GTTTTGGAGGCTCTGCCACGATAGAAACCTGCTCTGTTCCCCAGTCAATCTTGACCATCC -AATTGTCCAGACCTCGATCAGTATTTCGCATGATGTAATCCAAAATTACAAGCTTCTCTA -ACTCTTCGCGGAAGGATTGCTTGAGATTCTCAGTCCAGTAAAATCGGCGTTCAGCGCTTC -CGTCAGGCGAACCGGCTGGCGAGGGCACATAGCCCGGGTCTTCATCATCCTCATCGTCTG -ACTGTGCCCCCGATGGACGACAGGCCTCATTCCACGGCCGCTTCTTGCGCTTCGGGGCAT -CCTCGGGGCGGAATCCAGTGTTTACCTGGTCAGGCCAGGGATGCTCCCGGAGGAAAATGT -TGGCGTCCTTGAATCCCTTTAGGAAAACCTGGAAACTACCAGCCTTGGGGGGCAGAACTT -TCCTGCCTTTCCGTGCCTTTCTCCGGTCCCAGAAATCGTAGAAGAATGATTTCGAAGAGA -GCCGAACGATGCCGGTGTAGGGGACGAGGTTCGTGCGCAGGCGGGCATCAAGAACGTATG -CGGCGGCTTCAGAAACGTAGGAGAGATTAGGAATGAGACATGCGCGGCCAAAAAAACAGG -GGAACAAGTTGCGGTGAAGCCATTTTGTCCATTTAGGATTCCGTGAGGCATACGGCTCTT -CGTCCTTTGGCTTGAACACACCCACCACTTTCCCTTCGGGGTTTCGAGCAAAGTAGCTGC -CTGAGCTTCCTTGTGAGATCATACGGGGATGTACGTCCATCTCAATCGCAAGACGCACAC -TTTCGATAATTTCCTCGTACTCGGCGTGGGCCTGCCGTTGCTGAGCCTCAAACTCATCTG -ACTCGAGAACCTCGGCGCTCACTGGGGTAACTCCGGGAGGTGCCTGAAAGACCGAATGGT -AAATCTCTAGCTTCTCTTCCTCATAGCTCTTGCCCTTGACACGGTTGATCTTGAACTTGG -AGGCAATCTCGTCCGCCCATTTTTCGAGACGCGCATTGATGGCCTTGATATCTACCCCAG -AGTTCCGACGACCGCGAGCATGTTGGAAGCCACTATTTCTGCGGCGATGGCCTGGGGGAG -TGTGGAGACCGGACGCCTGCATCTGGCCTCGGGAGGAGATGGGAGCGTATCGGGGAGCAG -AGGTGGACACTGTTGAGACCGCGCCGACATCGTCGTCTTCTGAATCATCTTGAAGGTAGC -CTCGTTCCTCATCGGCCTGAGCGAGCCTGGCGTATCCCGAGGTGGCAGGACGAGTCTTCG -GCATTGTGATGGAGGGTGGTTGGATCTATGAAAGCTGAAAACGAGCAAAATACTTTGTAG -GAAAGAGAGAGGAAGGCCGGCAGAAGAACGAAAGAACACAAAAAAAGGTTAATCCATCAC -AGAGAAGAGAAAACAAGGAAAAGACTGCTGGATAGAATAGAGTGTTGATAGAGCCAAGGC -AGTTGAAATAAGTGTCACCTATGTCCTGACGCCCAAGGGGGGCGTCCTGAAAGTATGGTT -TACAGCTAATTTACACAGGCGGTGGCGATATAAGTGGCAGAATGCAGAAACCCGTAGTAA -ATTGTGTGACGCGGGCAGGTCCACCTAGGTAATGATGGTTATACCATTACGCACCGCCAC -ACTCTGTCTCCACCCCCTAATCGAAAGGGGGTCTGGATGGATGCATCCAGTATGTGTTAA -GAGCCCTGAAACTTGGCCAGAACCGTAATTGATCTCCATATGGGAGATATCTTGTGACGG -ATTGGGATATTCAGGGGATGGTCCATATATATGTTGTATATACGGTGGTAAAGCCGCCAC -GCTGGCAACTGTCTCGGTGCAGGAAATTTCACACGCATAAAACCCGTCTAGGAATTGATC -ACCTCATAAGCAAATCACATGCGGAATGGACTTATGCGACTGGCGATATCAAGAGATTGG -GGGGCATAGAATGATGAGGCACAAGCAAGTTAATTCAAATACATTCTTTGCGAAACAAAA -TATTCCAACGTTGACGCCCCTGACATGCCGTATTAAATAATGGTTGATACTGGGGCAATC -GTTATCCCGATTGTTATTATCGCGTCTTATCGTCCATCTCAATTCTGCATCTTCTTTCAA -CCCCTACCCTCACTTATCCTCGGCCATCATGTCGTTAATTTAAAACTTCATTCTTTTCTT -CGGTTCGATCAAGTCCCTTAGACACAATCATTGTCGGGAAACCTAATTCTCCCCTTTCTC -TCGGTATCAAAAATCATTGCTTGGAACTTCACGAATCGGCCACTGAGGAATTGTCTTGAT -ATAGACCCATTGGCTCCAACTCTTACCCAGAGTCCGTTGTTTCCAATCGCGCGATTCGCT -TGACACCAACCATTTCCCTTCAATTCGGTCCAGGGCCGGATTTCAATTGACACCTACTTC -TCCCTCCACTGTGGCTTCCACCCTCTATCGGGCGTCGCCCATCGACCCGGCTTTGAAATC -TCGTTCTTCGCAGGAGCGCGAGTTCCCTTACGACTTGTCCGCCGATAATTCTTCTGCCCG -TCACGCGTCCAGCGGATCCATACCGTTTTCTACGACACTACCGGATATCAAGAGCGTGAC -GAAATTTGGCTCAGGCGAGACCGACGGATACCCAGGCTCTTCTGTCAGCACCTCGCTCCC -AGGACTGGCAGCCCTGGCCTCGGTCGCTTCAGCACCGACCTCTAATCTTCGGTGCGTATG -ATACCCCATACCGGTCCTGAATGGAGTCTACCTCGGACCTTTGCATCCATCTTTGCGTAA -TCTTTAACTGGGCCCTGAAGAGCAGTAATCTGGCCCCTCATAAGAATTCGAATCAATCGG -CTAATGCATATATTTTCCCTCATTTTTTAGATCATCCAGTGATGCAAGTATGAATATGGC -AAATATGACCTATGCTACATCATCACCGGCAGCTACAACGGGAGGACAAGGAAGTGGACC -TGTAAGTGTGAAATATGCCTTATATCAAGAAACTGTTATCGGAGTGCTGGAGGTCTTATC -TCTCTGGGGAGTTATCGAAAGTCTTGGCGCCACTCGATCCGTGCACCCGCTTTTTCCCTT -TCCTTATCGACTCAAATCTCTCACAATCTTATCATCCTCTTCTTCCTTCTAACTTCTATA -CCCTCATAATGGCGACCACATTGACTGACGACAAGCGACCTCAATTACAGCCCGTTTGCC -AAAATTGCGGCACCTCTACCACTCCCCTGTGGCGTAGGGACGAACTCGGTTCAGTCCTTT -GCAACGCATGCGGACTCTTCTTGAAACTGCATGGCCGACCTCGTCCGATAAGCTTGAAAA -CAGATGTGATTAAAAGTCGCAATCGCGTCAAGACCGGTCAAGGCCCTAAGCGCAAGGTGA -GTTCGCGATCAAAAAATTTTTCCTCCTAGTCCCCTGCCTTCAACTTCCACCCATCGTAGA -TAAGAACGTGCGCTAATCAAGATTGGGATTTCGTAGTCTGGCGGCCCTGTTGATACCAAT -GGCATCCCACCCCGATCAGAGGCTGGGACTCCTCCCCTCGGCTCGCATGGATATCGTCGC -GCGTCGCGCAAGATGTCCTCGGGTCATTCGGATCGCTCCAACTCCCCCGTTTCTCGGACC -GAGACACCTGGCTTTGGCTCGATGCACGCGCACAACTCCAATATCGCCCCGCAGCACATG -TTTGACAGCGTTACCCTAGGTGACAGCATGAACTCCTCCAGCGGCCTTCCATCCCGCCAG -ATGCGCCAACCTTCTCCCTCGGCAGTCGACCGCCAACTCGATTCTCCTCACACTTTCGAA -AGCCTCCTGGCCCTCAACACATCCCTGAAGACACGCGTCAGTGAACTCGAATTTGTCAAC -GAGCTCTTCCGCGGTCGCGTGACAGAACTTGAGCAGAGTGATGCCAGCGCGCGTCGATCA -GAGATGATTGTACGAGACTCCGAAGTTCGTCTGAGACGATCTCTTGAGGATGCTCAGCGC -CGCGAGGAGGATCTCAAGCGTCGTGTTAGCGATCTTGAGCGCCAGCTTGGTTCCAGCAGC -GCTGGTGATGCCGACTCCGGCGAGCCGCTTGCAAAAAAGATCCGGCTTTCGGATGTTGTG -GAATCTGGCGAGGATGTGCTGGTCAAATCTCCCAAGAGCGTTTAGGTCTTGCGGTACCCT -ATTATTTCCGGCCCTTTTTCACCTGCTTATTCGTTCCATCATCGGTCCACACGTAGTACA -TGCTTATGAGCTGCATTGCGCCTGCTGGTCTCCAACCTCTGATATTCTTGATACCCTTCA -TGGCTCCTTCGTCATCACTGGCTGGAACTTTACCTACAAAAAAAGCTACTATTGAACGGT -CTCAATTTCTCTTTCTTATGTCCGGATGTTTCTCTAAACGTCTTGTTTTAATTTGGTTTT -CTTGCGGTGTGCTGTACGAACTCTTTCACTATGCTTCTAATACAATATTTTATCACTATC -CTCTCTTCCAGAACTCAACGATCCGGAAGGCCGATGGATCGGTGATCTGACTGGGAAGTG -TCTTGTTGTTTGGGTGTTTTTCTTGGGATGTTCCTCTCAATGTGCTAGTTGGTGCTCGCA -ATCACCATCATTCCTGTTTGCTGCTTTTAATAGAGCAAAGCAACTGTTTCTGATGGTTTA -GTCCAGCTTGGCGGGTCTCTGGGCCGGGTTGTCTGTTTTGATTGATTTAAACGAATGTTC -TCGCTTTCTATGTTGCGATATCTACATGTATGTTAGGAGGGAAATGAAAGTGTCATGCAA -AATCTTCGCCTATCAATTTAAAGAGCTCATTTGATTATGGCGTTGATGTGGAGATGAAGA -CGGAAGCACATCCACTACCTAGATCGAGAGTAAAGGTAATACTTGAACACGTTGAATGAT -ATTCGAATCTTCAATCTAACGTAATGAAATCGACCAAGTCATGGCCTATGGAATGGAAGG -ATAATCCAAAGGAAGAAAAACGCAAAGGGTCAAATCTATAGAGAAAACAGTGTATGGGCA -AATCAGATGAGGACATCAGCCAAATAACATCCATGCCATTCATCATGACATGTATTCTAT -AGGAAGTCATAAGAGATCGCAAAAGGATATACACAAATGTGCAAGCAAGCACGCATTCCA -TCAGCCCCTATTCACCATGCAGGTTGTACTGCACCTAGATGATTCAAATTGATGGAGAAA -CATTGAACTCACGCGAGTCGGTGGTCTAACTTCTTGCTCATGACGTTGTTTATGTTCCTC -TCACGGATAATGGTCCACAGACAAACAGCAATACCAAACAAGCCAGAAATGAAAGTCATG -AAATAGGCTGAGCGGTAGCAGCCCAGTCCATCAGGGCACTCGCGGTCGCCATCGTGGCCG -ATAACAGAGTGTCCGTCATAGATGCTACCGTATAGCAAATTGAAAACGTTACCACTGAAA -ACTGGGGCTAATGTCATAACGCCCCAATTCTGGGACAGCCCGCCGATGCCGAAAGTGTGT -GCGGTTAGGGAAGGGAAGACGCCAAACAGGAAGCCATAAGCGACTCCAGTGAACCCGGAG -ACAACGACGAGGCTGTTCGGGTTGGAAATGGTGGTGCCGGCAAGCTGGGTCAGAGTGAAG -ACCACCGCGGACAAGAATAAACACCAAATGCGGGACATGCCGAGCTTCTTGACCAGCAAG -TCAGAGCCGATGCCACTGAATAGACGGCCTAGGAAATTACCGAAGGAAAGGATGGAGACG -TGCATGACCTGACGGTGTTGGATGAACAGGTCTGTTGCGCTGTCATCGTAATATAACCAC -AGGGCTTTCACACTGTTACCAATGTTATTGATGGTCATCAAGCCGATGCCGGACAAGAGT -GCCATGGTTAAGAAGAGCTGCCAGAACTCGACTTTGCGGAGCATGGCGAGACCTCGAATG -TCGTTTTGGTGCGCCTCAATTGCTACTTCGTCTAGGAAGTCATCGTCAAGTGTATCAAAA -GAAGGGCGCGGGGGGTCGTGCTTGGATACCAGAGTGGCAGTCTCGTCGACATTGGCAAAT -GCTCTGCCTTGGGAAGTCGAGGCAGCCGAGTGCGACCGCGCATGCGTGGGGTATTGTTCG -TGATCATAAGCGGTAGAGCTATGTAGATCGGAGTCCTCTGGCACAGGTCGAAGGTTGCTG -GGATTATGTCTTCCTAAATCGCGTGCGTTGTGTGGGATAGACGAGTAGGATTCGCTGGCC -AGGATTCGAAGGAATGGAATTGCAGCAAAGGACAAGATCGAGGTTCCGAGGGCTAGAAGC -AAAAGAAAGCGGCCTGTATCATCCTTGAAAATGAGAGTCGACAGATTAGACCAAAATAGC -GCGCTCAGTCCAAAGGCGGCCATCGGGAAAGCGGTTGCCGTGCCGCGGTGTTCAGGGAAA -TTGGATGCCGCTGGAATCATTGGGTTAACACTATCAAGCAGATTCCTTTTCATGATTGCA -TACCAGTTTTGATTGCACCACCAAAGGCAGAACAACCTCCCATGCCAGTAAGAAAAGCAA -ATAAGCATAGGAAAACAACCGGCATTGATCCTTGACCATTGTCATATGCTACAAACATTA -GAGCTGCTCAATTCCAATAAGGTAATGTCAAAACATACCCATATAGATTGGGAAGTATCC -CAGGCCCAGGCAAATGGAGCCAATTAGTGAGACAAGTCGAGGACCGCGGGCATCTGTCAA -TAGTCCCATCGGGATCCCCATAGCATACATCCCCAGGTTCCCCGCGACACCCTAAAGCTT -GTTAGGATCCCTTTCTCTACACCGGAAACATGACAACTCACAATAAAGTTGCTCTCGGTC -GACGAGAGCTTCATTCGCTGTGCGAATTGCGGCGCCCACGCGGAGTATGCATACTGACAG -GGGAGTTAGCCTTGACGGTCCTGAATGGGGATAAGATCGGAGATATCACGCACATTTGTT -CCGCATGAGAGGGCGACCAGCGTCCCCGCAATGACGGAAATAACCCGGGCAGTTCTGTGG -AGAGAGCCGGACATGATGTTTTGTTCTGTGAGTTGTGATTGATATCTTTTGCGATATACG -TGTTTCAGTGGGAATCGTTGATTAGGCCCTAGACAAATGAGAGGCCGATCCTAGGAAAAA -CATCGGACCCGAAATGAAGGACACAAAGGGAGAAAAGAGCGAATAGAGTTATACAGAAGG -AATTGAGGGAAGAAAGCCTGGGAGTTTTCAGGAGCGTACACTTAGGTAAGGACTTGATGG -ATCACCCCCGAGGGCCGCGTGGTCGGAACAGCCGTCAAGCAAGTGCCAAGAAAAGTTTGC -TCCACGTCAACTGATGTACGAATACGATGGAGTTTGCTGAGGAAAGAGGAATAAAAAAGA -GAAATGGAGTGTAGTCTATGATACGGCGTACATATGTGCTACATGTTCGACCTGGAGAGT -CCTACGCTGCATTGATAAGATTATGCAAATGCGTCAAATCTTCCGGAATTGCGCGAATCG -TGACTCGTCATAGTTGGAGTCCACCGCGAAATGTCTTTCTCCACAAGATAAGGCGGACAC -ATCTCCACTGCAACGTTCATGTCTCATAAGGCCTGAGTGCCTGGCTTACTAGTGAAACGG -GTTAAAACGGTTGTCAACTATCAAAACTCATCCAGATTCTTTGGCTTATATTATATATAC -CCTTTGCTCTATAGTATCCAATGGTTATCAATTCAAAATTCGCCTAGAAAGGGAAGATCA -AGACATCGAAAAAGACCCCCAAAAAGGCATAACTAGGCTGGATACCGCCGACTAAAGGTT -CGGGTTGTTTACAATAAAAGTATAACGTAGGTTTTTTTTTTGAGTATCTGTCCTAAGACA -AGGATAATGCCAATGTTCTATGGCATCAGTTTACATTGCAGATGATCTCGTCATCCAGGA -ACTCAAATTTTATACTGAATACACTTGAGACATGTGTAGTGACTTTCTGATAAACAATGT -TAGAGGGAATGGAGAGAGAGAGTCTAATCCCAAATGGGCAGGACAAAAGAGGCTTGGCAC -GTGATATATTTTCGACTTGAATGTATTACGTAACACCCATGACGGAGGCCTCAGATGATC -TTCAATCTGATCACTTGCCTCTCTCCACACTCTCATCGATTCTGGAGCTTGCTTACCTCT -TATTGTGTATCAGAACCATTATCAGGACACTGGATCATCATATATCTGCACTCCCAAGAT -GGGTGCCCTCCTTTCCATTCCGCTGTTGGCTGTGCCAAGTGTTGGTTCAGTACGTTGCCG -CCTATCTCCAGTTCCCCGATACACCTGCATAGTTGGCTAATTGATTCTCTCCCGATAGAT -TTTGACAGTCGCAACATCATGCTGTGGAGCAGCAACATGTTCTGCAGCCTGCAGCGCCTG -CGGAAAGTTCCAAAACAGGTATTATTCATCACTTGCCCAGTGCGGAGAACCTCGCACTAA -CAAAGACAACAGTATGGCGACGAGAATCGCCTATGCCTTCATTCTTTTGATCAACTCCAT -AATATCCTGGATTATGCTCACACCATGGGCTCTGAAAAAATTGCAAAAATTGACACTAGA -TTATATGGAGATTCGATGCGACGGGAAGGAATGTCACGGCTGGGTCGCGGTCCATCGGAT -CAACTTTGCCCTCGGTCTCTTCCATTTGATCCTCGCCCTTGTGCTTCTCGGTGTTAAAAG -CTCGAAAGATACTCGTGCTGCTATTCAGAATGGATATTGGGGCCCGAAAGTCATCCTTTG -GGTGGCCTTTGTTGTCATGTCGTTCTTCATCCCCGAACCCTTCTTCTTCGTCTATGGCAA -CTACATCGCCTTTTTCTGTGCCATGTTATTCCTACTTTTGGGTTTGATCCTTCTGGTGGA -TCTCGCGCACTCGTGGGCGGAGCTATGCCTGCAGAAGATCGAAGACAGCGACTCCCGTGT -GTGGCGCGGCCTGCTTATTGGTTCCACCCTCGGCATGTACTTGGCATCTTTCGTCATGAC -AATCCTGATGTATATCTTCTTTGCGAGCAGCGGTTGCTCCATGAACCAGGCTGCCATCAC -CGTATGTATCCCTCACACGACGATTCTTGGTACCTAGCTAACTCGTCGCAGATAAACCTG -ATCGTGTTTTTGATCATCTCTTTCGTTTCTGTTCAACCAGCAGTGCAAGAGTCGAACTCT -CGCGCAGGGCTAGCTCAAGCTGCAATGGTCACTGTCTACTGTACCTACCTCACCATGTCT -GCCGTTTCGATGGAACCAGATGATAACAACTGCAACCCTCTCATCCGTTCCCGCGGAGCC -AGGACTGCCACTATCGTTCTCGGTGCAATTGTTACCATGCTGACCATCGCATACACCACC -ACGCGGGCTGCCACCCAAGGCATTGCCTTGGGATCGAAAGGCGGTCACAGCTACATCCAG -CTGGGCATGGACGACAACGAGCACGGTTTGGTTACGCAGCAACCAAACGCCCGCCGAGAG -ATGCGCGCTGAAGCTCTCCGGGCCGCTGTCGAGAGCGGCAGTCTTCCTGCTAGTGCTCTG -GATGATAGTGATGACGAAGATGAGTATGACACCACGAAGGACGACGAGCGTGGTTCAACC -CAATACAATTACTCGCTCTTCCACATCATCTTCTTCCTCGCCACCACATGGGTGGCCACT -TTGTTAGTCCAGGGACTGACGGTGGATACCACGGCGGATTTCGCGCCGGTGGGCCGCACA -TACTGGGCCAGCTGGGTGAAAATTGTCAGCTCCTGGGTCTGCTATGCTATATATCTGTGG -ACTCTGGTTGCGCCTGTTATGCTGCCGGATCGCTTTGGGGTTTATTAGTTTTTATTCTAC -ATTGATTTCGGGCGTTAAGAGCGACCGTGTTGCTCCTGATTCGTCCGTTTTGGTTTATAC -CCACTCCTGCAGTGCTACACAAGTTCAAGTCCACAAATAAATATATTCGTATCCCTGTAG -CGCAAACAAAATTTTTGGGTAAAGTTGAAGGGCGATATCGAATGTATAGATATGTCGTAT -CCAACTGGCCCACCGCGGTCTTGGCAAAACCACGCGTTAAGTGAGACGCGTGCCAACTTA -ACCTCACCAGGCATCATGGGATGAAGATGGCGACAATCCAAGCCATCGAGGCCCGCTCTG -TATGAAATTTGATCCCCCCCCCTTTTCCAACGCCCAATCTGACCAGCGCATTGATTAGGT -ACACCAAATCCAATCGGGTCAAGTCATCGTTGACTTATGCTCCGTCGCCAAAGAGCTTGT -CGAAAACAGCCTGGATGCTGGCGCGACGACGATCGGTGGGTGAATAACATTCCGGAAATG -GCTTGTGATGCACTTTCTGATGTTTTCGGGGGTTCCAGAGGTCCGGTTTAAGAATAATGG -ACTGGACCTGATTGAGGTCCAAGATAACGGCAGCGGGATCTCGCCAGAGAACTATGAGAA -CGTCGGTGAGTTGTCACGTCTCTCTCGGTACATTACACTAAAGCTGATTTTCTTTTTTTT -TTTTCTTAGCATTGAAACACTATACCTCCAAATTGACATCATACGATGACCTCTCGAGCC -TACACACCTTCGGTTTCCGGGGAGAGGCGATATCCTCACTCTGCGCGTTAGCCGACTTCC -ACATCGTCACAGCACAAGAAAAGCAAGTTCCCCGCGCGAATCGACTCGATTTTGAACAAT -CTGGGAGACTCCAGAAGACACAGATCGTGGCTGGGCAGAAAGGAACGACCGCATCGGTCG -AAGGCATCTTCAAGCGGCTTCCCGTGCGTCGGCGGGAATTGGAAAAGAACATTAAACGAG -AATACGGGAAGGTTCTGAATCTACTACATGCATATGCATGCATTAGCACTGGAGTCCGAT -TCAGCGTCAGAAATACTGTTGGCAAGACACGAAACGTGGTGGTCTTCTCTACAAATGGAA -ATAAGACCACCAAGGAAAACATTACCAATGTGTACGGCGCGAAGACATTATCCGCCCTGA -TATCCCTTGATCTAGAGCTGGAGTTTGAGCCTGCCGCAACTACGAAACGGGTTGGCGACG -ACCAGCTGACTCAAATTCAGGTGCGAGGTCATATCTCGCGTCCTGTATTCGGCGAGGGCC -GTCAGACTCCGGATCGTCAGATGTTCTTCGTCAACTCGCGTCCATGTGGCCTGCCGCAGA -TTCAGAAGGCGTTCAACGAGGTTTACAAGTCATTCAATGTGTCGCAGTCGGCTTTCATCT -TTGCGGACTTTCAGATGGATACAAATGCTTACGATGTCAACGTTTCGCCTGATAAGCGCC -AAATCCTGCTTCATGATGCGGGGGCTATGATTGAATCGCTCAAAATTTCTCTTACGCAAC -TCTTCGATAATGCAGACCAAAGTGTGCCGCAGTCATCGGTCAAATCCATGTCTTTGCCAA -AACAGCAACCACTAGCGTCACTTCCAGGGTTTGTCACTGCCCGGGAACTCAGCGAAAGCG -CTGAACGAAGTCCAAGTCAAGATCGGACGAAGGAAAGCTCTGTGGAAACTCCGGTTGTGA -GTCAACAGAGCGCGATTGAGCGTTTCACAAGCTCTCAGGGGGGCATGGGAAGGGCTCAAA -CCACGCCATCTCCAGCTCGATCGATGAGAACACCTCATGCTACAACCAGATGCTCTTCAA -TGTCGACGCCAGCCCCAAGAGCGCTAGAAACAGCTGATGCAGAGATCTCTGACGATGAAC -TTTTTGTCCAAGAAGCCGCCCAGGAGCAGCCTGGGCCGACGCCCAGCTTCCACGAACCCT -CATCCCAGCCATCGGCTCCTGACGAGACTCCATCTCGCGCTCGTGGAGACCCAGCAGAAA -ACCCCAATATTATCCAAAATGCCTTCCATAGAATGCGTCCACGACGGGAACCCGCCGAAC -TGGCGACCATCACTATTGGCAACCGAACTGTAACATCCGTGGTCGGTAGTGGTCCTCCAC -GAAAACGATTCTCAGAAGATCCACCTTTAGTGATGGAACCGTCCCGGCGGAAGAGACGAA -TACATACACCATCACGGCCTAATATCTTTGGTAAACATATGAGAGCCTTTGCTGCGCCTG -GATCGCAGCTTGATGATGGAGAAGATGAAGAGGGCTCTGCTAGTGAAGCTGACCAAATTG -AAGAAGATGAAAATGACCTTTCGTCTGAAATTGAGGACACACAAGACGTGGAGCAAGAGC -CCGAGACCGAGGGTTCTCCTCCGCCCGACCATGGCGACTTAGTCCCCGACCAAGGTGCAG -CCCAATCAGAAATCGAAAACACTAAGGCAAAAGACAAAGACACCGCCGTAGATGAAAATC -TCGATGAAGCTGAGAAGAAGGCACAGGAAGAAGCTACAGTACAACGGCTCATTCATGAAG -CCGAGGAGACTGCTGTGCTTCCTCAAGAAAATAGTACAAACCGTGCAAAGAAGATGAACA -AGGGCGCTGCACATCGTGACGCGACTATACAGCTTGTCAGCACCGCAGACGGGACTATTT -CACGTCTCCAATCTCAACTCATCACATTGCAAAAGACCCTGCAAACAGGCACCAAGAAAA -CGACAGAGGAAGCAACTGATACTGGCGAGAAAACCGTCGAGGACAAACTTTCCTTGACTG -TAAGCAAGAATGATTTCGCCCAAATGCGCATTATTGGCCAGTTCAACTTGGGCTTTATTA -TCGCAGTCCGTCCAGGAGACGATCGCGATGAGCTCTTCATCATCGACCAACATGCCTCCG -ATGAGAAATTCAATTTTGAGCGCCTGCAAGCTGAAACTGTGGTGCAAAATCAGCGACTTG -TCCGTCCGCAACGCCTTGATCTCACTGCCGTTGAAGAAGAGGTTGTCCTTGAGAATCGCG -TTGCTCTTGAGAAAAACGGTTTCCTTGTGACCGTCGACGAAAGCGGCGATGAGCCTATCG -GTCGTCGATGTCAACTGGTCTCATTACCGCTGAGCAAGGAAGTGGTGTTCGGTGTTCGGG -ATCTGGAGGAACTGATTGTGTTGCTATCAGAGTCAATTTCAACGTCAGATGGGTTGTCAG -TACCACGGCCGAGCAAAGTCCGCAAAATGTTTGCTATGCGCGCATGTCGCTCAAGCATCA -TGATTGGAAAAACCTTGACGAATCGCCAAATGGAACGGGTCGTTCAGAACATGGGGACTA -TTGATAAACCTTGGAACTGCCCCCATGGTCGACCGACGATGCGGCACTTAATGAGTCTGG -GCCAGTGGAATGAATGGAATGAGTTTGAAGAAACCGAAGGACTCGACCCTTGGAAACAAT -TCTTGgaaggagaagatgaagatgaagaataaTCCATAACGATACCCTGTGCGTGATATA -GAAGTATAACAAGAAGAAATTAAATGACCAAAGACACGAACCATCACCACCATCGCAACC -ACCCTTTCGCCGGCTGATCCACTTTTTTTCCACTAGTATCATGATCAccatgtccatttg -catttccattctcaacatgtccatTCAGTTCCGTATCACTGCCATCTTCGCTCCCGTGAT -CCCCACCCCTCTCGACGTTATCCATCATATCCTCCAAGGCCAGCAAATCACGCTCAATCA -TTTCCGCCCAATTCTGCACATCTCCAATCTCCTTTAAACCCTCGCGCGTCTCATCTGCGA -GTCCAACCCACTGCTGATTTTGCTGCGCGAGTCTCTGCGTCGTCTCCACTAGCTCGGCTT -CTTGTCTTTCCAGCGCTGTCGCGTTGGACTGGATATTTGCTGCTCGGTCGCGGAGCGGCG -CTTCGAGGTTTGTTCCCACTGAGTGGAGTGTTGCGGTGAAGGCTGCTAGCGCTTCTTTTT -GTCGTTGGTCTGTGTTTTCGGTTGGATTTGGGTTCGGTTGCGGGGTGGAGCTTTCCATGG -TCTCGGAGTCGGGTTTTGGATTTGTAGATGTCAGAGATGATGCCGATGATATATAAGCCT -CTCTCAGATTCCGAACTGGTTACTTCGTACTGATATAAGAAATGTAAACAAGATCGACCC -CACCCGGCACATGTCATTTCCCCGCCCTAAGTGATGGTTTGTCGAGTGTCTGATCGCCCT -ATGTTCATTCTGAAGACTACAATATCGAGTTCATTGGTTTTATTTCTCTTGCAAAGAAGG -TGTTTGAGTATCTTTTCTAATGCAAACTGATCAATACAAAAGTCGAACATAGTGTCCCTG -ACGGCGGAGGATCTATACCATGACTGAACTATTATTACCTATGATCACTGCGCAGAAGAT -CTCTGGAAAAAAGTCTACTAACGAGAATACTTTACAGTAAGTACAGTCTCAATCTGTGGT -ACTTCTTTTGGGCTGTAAAGCGAGCCACTGGGTTTGGGATGAAAAATATATCGTGATCGA -AGATTAGAAGCTGTCTAAGATAGGAAAACCATAACTTGGTCTAGACAGCCAAGCATTCAC -AAATCCAGAACAATCAAGTACAGAGAAGAGAATCGGTATAGCACAGGCTAACAAATGACT -AGGGCTCTGCTCGGAGAGACGTGGACAAGGGATTGGTCGACATTGGATGCTGTCCAGGTT -GGTATACCACGCAATTCTTTGGTTCCCACCCTAGTTCCACCCTAGTTCCCAAAAAGGAAG -GGATTCTTGGTCTTTGTTACGTTTGAAGTGGTAACTACACTGTTTTTTGTCTGACAGCTC -ACTTGAAAGGCTCATACACCCTGTTACCTTGAGACTAGATCCCAATTAAGCTTAGTAGAA -TATCTACTGTTCCTAAGTAGGCATAGCATGCTTTTGTTTCTTTGTTCGCATCTGCAACTC -AGCTGGGGAAGTCGCGTCCCGGAGGTCGCGCCTTCCTCGAGCTGTCACCTCAGGTCCCCT -ATATAAAAGCCTCCCACCTAGGGTTTTCAAATTCCTCCTGTCAACCACCTCCGTCACAAG -CTTTCTAGATCTAATCTAGCACGGCTTACTTGACAGGAGTGGTACAGGTGAGCACCATAC -TATATGATGTGACCCCGAACTCTTCTGACACTTGACAGCCTTAGTAGATGTGAGGGGGAC -TCTCCGAAGTCTTCTGAACATCGACCAGGTTGGGTATGTTATCGTTATCCCACTCTTTGA -GTTTATAAGCTGTCTGACTCGTGCTAATTTCTGAGTAGGTCCTTCGTTGGAGAATCTAGA -TCGGGGTCCCCGATAAGCCTCGACGACATGGTTGTATGTTCCTGCCAGTGACAATATTTT -CCCCCATCGCTGACTTGTCGTCTCTCAGAACCTTCGTTGGATGGCGGCAAAGTGTACCTG -CGCTGGGCATGGACTCTGTCCTGTCAGCCTTAGTGATGTGCGGGTGAGTTCTTCAGCCAT -TGGTCATTCTATCATCCCTAACTGCATCTCCTTGCCATCAGAAACTTTGTCACCAACGCT -CGGACCAAGTCGGCTCTCTCCTCGGAGGTACGTTGTGCTGTAAATCTTGGGGAGGCAGAT -GGGATCCAAGGCCCATCAATCCACGAGGGGGGGATAGCCCGACCCGGAATGGCATGTACG -TAGCAATACGAGTCCCCATTTCGTTTGTCGTGGCATCCAGGACGCAAGTCCGGGTAGGTT -CTTCCCGTTTGTGGTCATTCATCATTCCTAACCTGTGGTCCTCAACAGAACCTTTGTGGG -AAGACCTGAAGTCAGACCGGGTCTGCACAATCCCAGTCTTGGCGGGTCCGGCTAAATTCC -AGGTGAGTTCTCCAACCAGTCTGTTCGTTTTCGTTGCTAACTAGAGGTTATTTCTATCAG -TGCATTCAGCAGAATTTCTGAGATTACGCAGCAACGACTGTATGATATCACTTGCAGTTG -GCCATGCAACCCCGCTTTCCTTTTTCAAAGAAGCGAAGAAACAAGTTGGAGCTCCAAACC -TTGCGGTTTACTTGTTTGGCCTTCGGGGGGGCTTTTACGTGGGCTGTGGGCTTTTTCTGG -TAAATAAGAGAGCTACTGTTCTCTTGCGTGGGTACATGATCTGGAGGAATCATACTTTGA -GATTAGTACACTTAGCAGGACCAAACAACAAAACATCCATTACATCTCTGATCGGAGCGG -GCTGAATAAACAGGGAATTATTTGCCTGCTATGCTCCTGATCAATGATGTACCTGGAAAC -ATTTCATCCCAATCAAACGACATCGACCGTTGTTGTAGCAGTATCTAGCTTTGGATTCAT -GATAAATATGACCTCTGAGTCAAGTACGTCCTTCTCCTAATGCGTCTTCTCCATCTAGAC -TCGATCTATCTGCGGATAAATTGTTGGACCTATAAAAACGTTGGGTATTCGTATTCTGAG -AGCTAGCAGACAAACGCTCGGTCTGCTAGCAACCAATGGGAGATAGGATCACTCAACAAT -ATTTCCAGAAGTCAAAAGAAATATCTCCAGTGGTGACTACGAGGAAAAGGGCGGAGAGGC -ACCTGTGAGGTCTAGATCTAACCTCTAGTAAATTCCAGTAAATTCTAAGAACATATAGAC -ACATGGCGGAGACAGAAAGAACCCCAAGGGCTTAGTTTGTAGAGCTTCAGTCTTATACCT -CGATGTGTGTAGCCTCACCCATTGGCTTTTCATTACTCTCGGTGTTCAAATTTCGATGGT -GCCCAGGGTGGACAGGATAACCTCTGGTTTCCCAAACACCTCCAGTAATTCCACCTCTCG -AGAAAAGGTGATCAACATCCTCCAGTTCCAAGCCTTTGGTCTCGGGGAAGAAGAAAAAGA -CCATTGGAATCTTGGTTCGGTTAGTATCATAAAACGCAAATATTCAGTTCTGGATTGGGG -TCATACCCAGGCAAAACAGAAGCAGGCGAAGATGATGAATGTTTTCCATCCAATATTATC -AATTGCCGTTGGTGTAATTTGGACGACAATGACTGCGACGGTTAGTGAAGTCACGTACTT -CCCCTGGGGACCAATCTACTTACAGACACAAATCCAATTGACGAATCCACCAAAAGCCTG -AGCTCTAGCTCGGATACGAGTAGTCGAGATTTCAGAAGGATAGAACCTGTTGATTGTCAG -CATTGGTACACAAATGCAAGTCAGATAGGAAAAGCTCACCAAGGAACTGGCAGATAACCA -ATGCCAAGGAACAACTGGAAGATAAAAACGAAAACAGTAGCTGCATATGCACATGACTCT -GTGCCAATCGATAATAGAATGGCAGTCATCACGAAACAGAAACAGAGACCAGCGGCGGAG -ACCATGAGTGATACCCGTCGGCCGAATTTGTCCATACTCCACAGGGGGATAACAGAGCCA -ACAAGGTATGCGAGAGAGGTGCAGCCACCAAGGACCAAGGAGAGATTGCGAGATAGGTTC -ATAGCATTCTATTTAGGTATTTGGCTTAGTTTGCTTGTTCAACTGGAGATGGGAATAGGA -ACATACCTGGTAGACAATGGGGGCGTAGTAATTGATCATGTTAGAGCCTAATACATGTCA -ATACAGGACTCTCTCACGGCACCTGAACTGGTATTTCTCACCTGTAAACTGCTGTTGAAT -ATTGATAACAGCAGCCAGAATCATTCGTCGAAGATTCCCCATCGGGCCACCCGACCAGAG -CTCCTTGTACTTGAATGGTCCGCCAGCAGACTCAATCTCAATTGACGATTCTATTTGATG -ACGTAGAAGAATAACCTCAGGATTTGACTCGTCAGCGGGTTGGTCCATTTGTAGACGTGC -CAGGACTGATGCCGCCTCGTCACTGCGATCTTGTTCACAAAGCCAGCGTGGGGAATCAGG -CAGAGGAATCATCTGCAGAACCAAGAATACTGCAAAGAAAGCTTGGAAACTAATTGGGAA -CCGCCATTGAACCGGTGTGTCCACAAAGCTTAGTCCATAGTCCAACCAGTATGCGATACA -GAGACCGAGAATCGTAACAGTGCCTTGGGCAGAGAGCAACATGCCACGCTGCTCTGGGCG -AGCTAACTCGGACTGGTATGCTGGCACAGTACTGCTGTTGATTCCATTGCCAAGGCCAGT -GACAATTCGGCCAACGAGGAATTGAGGAATGGTATAGGAACTGCCGAGGATAATCGTTCC -TATGATCATGATAGAACCACCTGCGATAATCATCTTGCGGCGACCGTATCTGTGACCAAT -GACGAGACTCAGGAGGCAACCTACTGCACAGCCGATATCGTAGATCGAAGTAAGGAGACC -CTGCATCGTGCTGTCAGGATTGTTGAATGTTTGTCCGAATTGATTGTTCGCGCCAATCAG -GCCGCTCATCACGCCCTGATCATATCCCAATAGAAGAAAGGCTTGACAGCTTGCAGCCGT -CACAGCCCAGTGAAGGTATTTTCCGCGTAGGACCATAATGTGCTTGGATCAACGGGGATA -CTAGAGAGACATTGGGATTCCGGGGCCCCAGGTCCTTTTCTCCAGTCAACCCTTTCCATA -GCATGTCGGTTCACTCTTGCCCGAAAGCCCCTGTTCGGTCCAATATACCCCGGGGCTAGT -ACATTCACCTGCGGGATGAAACAGGGTTAGCGTATATTTTCCCCCGCGGGGACCCCACAT -TGACGAATCCCTGCGATTGCCACTACCCAGCTAGAAAGCGGCCGGGGATCTCTTCAAGCA -TGAGACCCTACGATTCAGTTGGAGTCTGCCATTCAGTCTCTGGGCTTTAGCTCTGGTGAG -CATTGTCAACACGTTTCTTTTGAGATCATCTTCCATTGGTGCAGCATCATGATACATAAA -TTTAGTGGTGGTCTGTCACCTCGGTGGATTAAGTCTACACGCTTGAAAGGACCAGCATAC -ATTCATTATGCTTAATTTATTCTGAAGAGATAGCCTAGCTGACCAGAAGCATGGCTAGGT -GTGTCTAGGTCTTTTGTACTGATCGTAAGTGCCGTAAGTCTCTGTTGGCCGTTAGATGGG -GCAGTTTCCTGACTGGGATAAGAAAATGGCCACCCTCCCGAAACTGTGGGTCGCACTACT -GGGGATAATCCATGTCAAGCACGAAGAAATGAAGGGCACAGCTCAAGTCCCTTCGTATGG -TGTTGTCATGCTATCACTTCTTGTTCAATGTTCTTGATTTATAAATAAATTCTTTCAATC -AGCTTGGGCACCAAAAAGACTAATTTTAGTTCAGAAATCAAGCAAAACTTCAAGATTTGA -GCATACATATGTGCCGTAAACCCACATTTTAGTCTCGGGACTACTTTTACTGCTATCAAA -TACGTTACTACGAAAAATTTCCGGGGATAGAAGTAAGTTTGGGCGAATGAGTCGTTATAT -ATTTAAAGGTCGGAGTATCTGTTTACAGCCTCAATTTGGCGATATATCCAATGCTGATCG -GAATATAGGCGCATGAGGCTCTCCTTGGGCATAATCAACTGGGCTTCAAGCTTGTGAGTC -TTCGTTAGAAATGCCCCATTGTACCCGTAATAGCCTTTTCTATCAAGCACAGTACGGCAT -CGCGGCATCTCCCTACACATAGTGGCACTAGCTCGTTCAAGGACACGTACCTCGTCAGTA -AGATCGGCGAGGTTATCAACTGCCAGGGCCAGACGGAGAGGCGAAATCCCCTTGCGACTC -ATAATAACATCAATCTCGAGCTGCAGCTCAGCCTTGAACCGGTTGCGTGTACGCAGAAAT -TGATGCTCAGGATTTCCTGTCGGGGAGTATATAACTCCAAATCGGTGATCTACAACGAAC -CTGGGTAGGGGGGACTCTGTCAAATTAGCTGCTAGGTATGAGTAAAGACGTAGTTGATCC -TGATGCACGATCGTTGAGAAGACCGTGGGATCGAGTCTTATTAAGACAATATTCGGCAAG -GGTGGCTTATGATTGGAATCAAAGAGGAATAGTGTCATGAAAGAAACAGTGCGAGTCACG -AAGCCTTCTTTCTCTTTTATTTTCTGCCTCTTCCAGGGCCGCGCTCATTCTTTGTGGTTT -GAACAGGGATACTTAGCCGTGATAATGAATGAGTAATGCCGACTGTATACCTGGAGCCGA -ATCAGTGAAAAGGCTTGTTTAATTCTTTGATTTACTATATTGCATTTTATACATCGAAAT -ATTTGTAAATTTCCATGCCGAGGGTTGTTGTATGGGTACAAGACAATGTTGAGGTCTATA -ATGTCTCGATTCTCCAGAGGGAGCTTCCAATCGAGCGAGAGGTGGGTATTTGAGCAGCAT -GTTGCCATTGTCAGAGGCGCAACGGAGTAGAATCTATATCTCTGTCATTTGCAGTGAAAA -TGCCCTATCTTTGACATAAATAGTATCTGAACAAATCCCATGTAGCTTTATAGTTTAACC -TCATAGCATGATCATCATGGAATACTCTCGATCATAAGTATACCACATCAAGAAAATAAA -CTTGAAGCGTATTCTAATAGATCAATCCTTGTCGATGCTGACACAGCCTTGATAGCGGCT -GTGAAGTAAGGTATATCGACACCCTTGAAAGAACAACTTCTTCCATGAACCTCGATAGTC -ACAGTGCCAATTTCATCGTCGCCGGAAATGGCCTCGTGAATCAATCTTCCTTTATCGACT -GAGCCGAGGACCACAGCGCTGCTAGTAGTCTTTTCTAGAAAGAGATATGGCAGATCGACC -CAACCTAATTCTGAGCCACCCATAGTGAGATTAAAAGTCACATGTCCCTAAACGTGTTAG -CTCATACTTCTATCCCAGGGTATGGGATGGATTGATATACCAGTTCAATAACGAAGGGAG -TCGGATTGCGCACGGAGATTGTGGCATTGAAGTTATATCCGTCAACTGGAGGATCCAGGA -GCTCTATACTGGTGATATTGAATGCGCCTTCGGCGTTGATGAAGTCTGCAAATTTTAGAA -AGGCTCACTCCTATCGAGCAACTTGATATGGTATGTCTCACCTGTGGCATTGTAACCTAA -TATTGTCAGCTTGTTTAATATATGTGACCGGAGCCCTATACTTACCACCCACGTTCATTG -TTTTATGGATGTTTAAATGAGCAGTTGGTAGTGCTCCAATTTTCAAATCTGATGAGCCTT -CAATCAAGAGTGAAGAGCTCTTGTTAGATCCAGCTGCAGTTGCCAATCGTGACATACAGT -CGACACACGATAAATGCATATCTTGATCTATCTCGAGGGTGGCGTCTTTGCCAAATGCGA -TATCAGGCACTGGGAACGTTGTAAAGATCTCATTGGTATCTTTAAGACGACATGTGGCAT -TGAACGCGTTCATATGACCACTACCACTGAACCCACCACCCGTTCGAAGAGTTTGTTTTT -GTTTAACGTGAAAGGCGGTTGGCCGGGGGTTGGTAATCTCAAGGCCATCGGAGTCATATT -CGTACTTGTTGATATAATCGTTTGCGAAATTGGGAATTCCAACATAAAACCTTAGAAATT -GTAAGCACAGCCATTATCATTTTCCATGATGCAGCTCACAGTGGTAGAATGATTATTAGG -ACAGAAATGCAAAATGCGATCAAATACGCCCACCAGAACCTCTTCATATGTGCCTTGAAT -CGGTTCCATAAGCTTGGTTTTTCCGGAGTTATTAGCACTTCCTCCACCTCAGAGTTACTT -GAAACATCGGATTTTGACATGATGAGGGCAACAATGTCGATGAGTGCCAAAGGATTTGCT -TCGGCCTAAGTCGTGGACAGTAAAGGTCCAGTAGATAGCAGTACGCCAACTGCTTCCCAT -GCTGTGACGTGTGGAGGTAGAAGTAGAGGAGAAATTAGATTGGGGATTCTAAGATTGAGT -GAAGTAGTTTCTAAGCTAGTCGTCGCTTCGAGGTGCGTTACACCCCGTGTCGATCAGACC -CGCCTTTCACATGATAGGGAGATATGCACAGCAATGATTCAAAACGAGAAGAATATCTTT -GCGAGAGTTATTGACGTCATAATAATTCTTCCTGGATCAGCCATGTTGTTTTATTGTGAG -TTCAAAAGCTAGCGTTTGGCATTCAAATGCTCGAAGACGAGGCTCAAGCAAAGCGATATG -TGAGAATGCAAGTCCCACCGACACCTTTTGATTGGGACTAGTTTCCCTGTCGGTGGGGCA -CCTAAATATGTATGTTGTATGCTCCTGTGGTGTCTGTGTAGCAAAAGTAGTCAAAGTAGC -CAATCAAGACCTTCCATATGTAACCAACTTGCCTCAAAATGGGGACTAAAGCCTTATATG -TACTCCTCGTATCGGATACGTGATCAACACAATGACGTTTGATAAATTGCAGATCGTGCG -CGGAGTAGAGTGATCGGCCAAGACAGACCTCGGCTGTGGATTCAACTGCTTGCAATGTAC -CTCAAATATCATAAGAGCATTGCATGAAAATGAACCCGGAACTAAGGAAAGTTGTACGCA -TTAGACCTCCGCAAGGCCCAACGCCCAACCCACCCCAATTCACTAATGGGCTGGGTTTTG -GGCCCAGTCCTCAGGCCCCCGGTGAGCTATAGACCTTGAGTCAAAGCCCACTGGCGTCTA -AATAGGGGCCCCACAGGGCCTGACAACACCAAACAACAGAAAAATTCTAAAAAACGTGAC -TTTTTTAATTTCCTTGTATTGAGGCCCCCGTTTTTACCCTTAGGACAAGGTGATGTTTTA -CTGTTTTAGCTGATATATCATTGGTAAGTAGTGAAACTTATATATCATTACCTTTAAACA -CGCGAAAGGACCCGATATACATTAACATTTCCTCTAGACCTCCACTGGTTGATTTTATTT -GAAATTAGAGTATACACCTATATAGTTATCTCAATTATAAAATCTACATTTGACGTATAC -AATTCGTAGAAGGGTCGAAATACAGCGATTATGAGATCCTAGTTGAGCCCCCTAAATGGG -CTAAAATGGGGGGCTTATGGGCCCCCACGAATGGGCTTTGCGGAGGTCTTCTCCGCATAA -TATCAACAATAAAAAGTGGAAAGTCTCACATGAAATCTATGCTATTTATTAGTTCCCCGG -GGGTTTCGATCCCTTAGGAGGCTCCAATACTCAAGGTTAAGCCGGCGGCATTGGCATTTG -TACCTCCCGGGCTTACTGCGATGATGGTGTGGTCTGAGTAGTAGGGGTTTGGGAGAATAA -ACATGTACACATATATCCCAATTGGTATAACTCACTCTATGTCAGAAGGGAAAAAAAGCT -TGGGCTAGTCCCTCTGTCCAACCTTGAAGGGTAAACTGCTGTGTGAGATGCAACGTGACT -TGGGGCTAATGCCGACGTCCCTAACTACCAAGAAAAATATTTCGTTTGCAGTCTTTTGGT -TTCCTCTTCATCCCAAAATCTCCTTTCGTTTGATTTCCTACACCACTCCAACCAAGTAGA -ATATGCCCCAGCGAACTTTCTAGCACCGCCCTGTGTTTCTAATACCCTTCCATCATGATT -ACAGGCTAATAAACTTACAGAGTCGGTGCTGTAAGATTTCGTCTACAGCCAACTGGTGTA -CAGCTAGAATGCAGTGGTAGGTACATATGTACATCATGAATATTACATTTTATCCCATAA -ATACTTGCACCTTGGCAGCTCTCGAGGCCTAACATTCCAGGGCTGGAAGCTTGGAAATCC -ATGGGTATCTGAAGGGTGGGACATAGATCGGCCCGACCCGTAAAAAAGGTTCCAAAAACC -TTTCAGAAAATGTGACAAGAGGCAACCTTCCAAGACTTCGAAGTCATTTTGTCGCATACT -TTGGATAGAGTAAAATTATGCTGAAACAGTCAGACTGTGGCCCACGAAACCTAAACCCCC -GACGCCTCAAAGCGGAATTATATCCAAACTAGGAAGGTGGTACACCTGATTTTTCAGGTA -TCAGAGCACACACCCTTGGGGGCCAATAAAAGAGATTAATCGAAACTAAACCGCTATCAG -CATATGGCCAGGAACCTTGAGATTATGAGAGCCGAAAAAGGTCAAAGGTCTAGACCCCCA -CGGGTAACGCTAACTAGATATGGTATAGGACACGTCCTTTTTTGTCTTGTCCACGGGTCC -TCTTAAACACTCTCTCAGTAAAATGGCTCACAACATGAACTTGGGCAGCTGTGTCCTATC -TTCCCTCAGTAATTAACCCCTATTGTCCAAATGTTACCACGACACTCAATTCCACTTAAA -GAAGGCCGTCTGTCACATCCAAGACCAGCCAAAACGCGACGAAAAAGCATGCTTTTGCGA -ACCACTCGGGTCATCATTGCGGTTGTAGCTGCGTGGTTGGTAATCTCATTCCTGCTCTTT -CGGTCGCCAGCTTCAGACGCAAAGAGCCAGACTTCCACCTTGGTCACGGACAGAGGATGG -CGAACTTCATCCACTGATATCACGACCGCGGAGTCATCGGAACCTGATTCGACATTGCAA -AAGACGCCCGACGAGAAATCTATTGGGGAAAATTCAGCAGAGTTGCAGGAAAATGTAGAA -GATCAAAATTTACCGCAATCATTCTCAAATATCGTCGAAGATAGCGAGAACAGCACGAAT -TTGGATCTAGAGCCCGATCTCTTCGAGGCCGCTCTTGACTACCTTCTATCAACAGCTCCC -AACGAGTTCTACATCAAACAAATCCTACGGCCAGTAGTCAGCACAGGCGAGGCGAAGCTG -CGCGACCTTGGTCTGCGCACACGCATATATAAAGAATTCTTCGAGACTTGGGAGAATGTA -CACCTCGTCAGCAGCGACGAAAATCAGACGTACGTGCGGAATGACATTGTCCCCTACATC -CGGGACAAGTTCAGCGAGTCGACATACCCTGGCATACTCCACAAATACGACTCTTATCGC -AAATTTGTCACCAAACTCTCTGCGCTGTTGTTTCCGTGGACGAGTCCATACTTTGCAGAT -CATATGAGTATGCACACTTCGTTACGCCATGGAGGTCGCGGCATCGTGACAACGGCGGGA -AATGGACAGGCACAGTTTCTACTGGCGGCCATTCCTTCTTTTCGCCTTCTGGGCTGCGAT -CTTCCTGTCGAGGTCATGTATCTCGGCGAAAGCGATCTGAATGAGGACTTCCGGATCAAA -CTTGAGGCCATGCCAGGCGTCACAACGCGTGACCTCTCCGCACTGGTTAACGATGAAGGT -TGGCAGCTCAACGGATGGGCTGGTAAGCCCTTTGCTATTCTCTTCTCGTCTTTCCGCGAA -GCCCTGTTCATCGACGCCGATTCACTGTTCTTCGTGAACCCCGAGTCACTATTTGATGAC -CCGGAATACGTGCGCACGGGTGCCCTTTTCTTCAGAGATCGAAATATCTCACCAGAGAGG -AAGCAGGAATGGCTGCAAAAAATTTTGCCAGAGCCAATTTCGCAAGCTGTGCAGCAGAGC -AGAATGTGGACCGGGGAGAGCGGGCATCAGCAAGAGTCGGGAGTCATTGTTGTGGATAAG -TGGCTGCACTTTGTTGCATTGTTACTCGTATGCAGAATGAATGGGCCGGATAGGAATGGT -AATGGTGCGGACATTGTAGGGACTTACGATATGGTCTACGGTTCGTTGTACCCCGGTACA -TGTCTTTTTAAACTCCCTTTTCGTTTACTGACCACCTGAACTATACAGGTGATAAGGAAA -CCTTTTGGCTTGGATGGGAGCTCGTTGGTGATACTTCATATTCCTTTCACCCGGGTCTAT -CTGCAGTCATGGGCGTTGTCCAACCGGAAATCGTGGACGCTGTCCAACCAGAAATTGTGG -ATGCTGTCCAACCTGCGGTCATGGACGTTGTCCGACCCCCCGACAACTTCAACGCCATCA -ATCAACAGCCTGCACTATCTTTCGTCGATGGAATTGCAGTGGCCTCACCAACAGCTAGCC -CACCAGTCGAATTAGCGCCAACCCCTGAGTCCAATCCCACAATCTGTGCCCCCCAACTTC -TTCATTTGGACCGCGAGAACCGTCCTTTGTGGTTCAATGGCTGGGTGTTTGAGAATAAGT -TTGCGGACGGTAAACGGGTTTTGGGAAAGTTCGACATGTTTATGGCGGAACCGAGTAAAC -CACAGGTTCCAGAGGCGTGGCAATTAAAGGAGCACAATCTCTGTTGCTTGTCAAATTCCA -CGCCAAAGAACTTTACAACACAGGAGACCGAATGGCTTGAAGAGTTGCTTGAGTTGGTAA -ATATCGTTGGTAACTAGTGAAGAAAGAATATCTTGATTATTGCATTTACTATGCCCCAGC -ACACACGCAAAGGAAAGCTCGAGGGATCTGGGGTGACAGGTGTTGGGTGAAAGAGGTTTT -GAACAGCCTCGAAAGTTCCTAAAAAGAATACACCAGAGGACTTTCCCTTATTCCCCCTGC -TACGCATCCCCACAGCCATTTTTCCGAAGCAACTTCCACCAACCCTTGCGAACCTCTTTT -CAAGGCGTTTTTGCTTCTTTCTTTTTCCATCATGCTCAAAAACTCCACCGATGCACCGCC -TCTTCCTCCAATGGCCCCTCCAAAAGAGCCAAAAGGAGAAGAAATAGGCAAGGTGCGATG -CAATCAAGAATGAGCAACGACAGCGCAAGAAGGCGCGTCGGGTCCGCATTAGTACAACTT -CTAATGACCTAATAGCCCATTACCATACAGTAAGAACCAATGTGGCATCTTTATCAAGAG -CGTTGAGCTGATATATCTGATAGGCAGTGCAGAACAGATCACTGGTACGATGGAAGTTAT -GCGGATTCTGAAAGAAACCGCTGGTGTCAATGCCGGAGATACGTGACTTGCAAATGTACG -CAGTACAGCATTCTCTACGCGCTCAGGTACTAGTGGAGCAGCGAATTCCCCACGAAGAGT -ATAGAGTACATACAGAAGGTGGAGGGAGGAAGTTGTCGCGGTGCTACAGGAGATTTCTAT -GCTCTCGCTTTCCTATCTCATTTAAATATCAACCTCTTGACGAACATAAATCTCATTTCT -CAGAAATCGTACACGCGAATCTAAGCTAGGGAGATACAGGGTAACCACACTCGGCTGATA -ACACAAACATGAACTTATATCCCTGTATCAAAACGAGAGACTAAGGGCTTCATAGGAGTG -CATGAAGCCAATCAGATACCAGTTTCTTTGTTCTCTTTGTTAATGCATATGTATGCACCT -TCCAGCTATAGTGGGACTTAGTGGGGACTTATTTGAAGCTAAGGTATGTTCTAGTTGAAT -GGTCAAATGTGTCAAACAAACCTTAATTACTCGTACAGCACGAGGTCGCAGTCGGATAAT -TTTAAATATTACGATCGGGGATCACCCGTACTGCCCCCCTCCTCCTCCCTCCCTCCTTTT -CTTTATTTGATCCTTTCTTATAATATTAGATATCGTCAGAAAGAAGGAATAGGAAAACGA -GTGTGTATGTATCTAGGCTTCAGGCCATTTATTTACTCAAAACTCTCAGTGATAGATGAG -GAAACTGCCGAGGGCGAGCAACAGCTTCCCGATTTGGACTAATGCCAGATTTATTTGAGC -CGCACGATATCCATGGATATTCGAAGGTTATTTGGAAGTCGAGTTGCCAATACAAGGTTT -CTAAAGACATAAGTCATACTCTATATTCTTATTCAATGGTTAGGAACATCTGAGACGACT -TTGCTTCATTCTTCACTCTTGGAAATACTTTTCTTCTCATCTCTCCCTGTGTCTCCAAAT -GCCCCATATACCAGGTCGGGATATTTGATTTCATTCTTCGGCCCGTTGGGCGGATGTTTG -AGGATTAGAACGGTGGCTTCTGAAGCCTTCTCTGGTTGTATTCTACTGCCATACTGCTAA -CGTCAAATTCCTTTGGAGCCACCATCTCCCACTGGCTTCTGACTTACTCTCCCGAGGTTA -TATGTCTTGTGCAAAGAAACGCAAGAGTCAGAAATTCAGCCAGGTTATTGACGTTTCATA -TTCTAAAGAAATAGCGTTGGGTAAGTCTGACCGATGATGATGCCTGAAATCATATACCAA -TATGTTATTGGCAGAATGTCAAGTTCGTCTCATCGACCCTGGTTCTTCTCAAAGCTAAGC -GAGTCTCGCCTATCAAATTTTCAAAAGTCCCTTGCTACTTGGTCTGTAATTTTAGCGAAT -TGAATGTGAATATTCTCAATATCAAAGTCACAGACTACGATGGATCAAAAGTCATTCACA -TGGATCACAAGCAAAACGCCAGAAAAATCTCTGGGAATTATCAATTTCAATGCAGATAGA -GAATATGTAGTTTTATGAGGCAAAAGGCGGAATGTTCAAAATCAAGGGCCGAAAGGAAAT -CGCTGCGCAAAGCTCTCATCCATGCATAAGTTCGCTTGTAAAAAAGAAAAATCAAGACAC -AGGTAAAAAATATTAATAACACATGTTGGTCGAATACCCTATGTCCACACCTGAGAAGGA -CCAAACACAGGAAACGGAATGGCGTGATAGTTGTTCAAAGAACCCCCGATAAAACGGCTT -CGGTGGCTACATATCGGTTAGCTGGGTCCATTATGCTATTTAATAAATTGACTCACCAGA -GTTGATATATGGGCGCTTGGCGTCCAGGTTAACATCGAACCGCCTTTCGAGTTCCTGCTT -GACAGTCTTCTTCGAGACCTTCATCAAGTCTGCAGTCGCCAAGATTTTACGGATCTCTTC -GAGAATCTCATCATCAGGAGGTAGATGGCTCAGGTCTTCCATACCGAAGTTGCTCATCAT -TTCAGACGGGGCGACTGAGTAACGAGTGCCACCGCCTCCAAGAGGCAAGTCGAGCTGCGA -GACCGGGCGCGAGCCAGTGTGGTACCCATACTCTGACTGCGGGAAGGGAGGCAGTCTGGT -ACCATACGAGTGTGCGGAGAATTCAGATCGGTCGTCGCGCGAAGTTTGGGATTCCCACAA -CTCTGCTTGATAGTCTTCCCAAATCTTCTTTGGAATTGAGTCGGGGTCAAATTTGCCTTC -GTCAGAAATGACAACCTTGCGACCTTTCTCTCCGGTGATGACACGCGTATTACCCCAGGA -GAAGTCATCCATATGCCAGAAGGAGTACAGTGGCAGAGCAAGGGAGAAAATGGGAATGGC -CAGGAGATAGATCAACATCCAGCCAATCATTTCCCACTTGCGTCTAACGAGGAAGATCAA -GGCCTGTAGACCGTAGATGACACCAAGAAGAATGAAGGAGGTGATAGGCAGCACATCTGG -TTCACTCACACACCAATAGATCAGATAAGCGATATAGGCGACCGTGACTGGCTGAATGAC -GGTCATGAGCAGATCGATGAGCACAATGAAACGCATACTGAAACAGCAGAAACCACACAG -CTGTTGTAGCGGAATCAACTCCATCAAGTTGTGAACTGTAGAATTGATCCATCGGCGACG -CTGGGAAAGGAAGACTGCAAAGCTTTCCGGTGCAACTGTCCAGGCATGACATCTGAAGAT -GTACTTTGTCTTGAACTTCGGGTGATGCTTGATAAGCAGTGTCGTCAAGTAACGATCTTC -ACCAAGGTGCAGTAGATTTTTCATATGGAGGGTATCGACACGGATTTCGGAGTAGGAGTC -GACCACAGCCTTACTGACAAAGAGAGGAGTTCCATTCTCCGCTGTGCGGATACGATACAT -GGTGAAACAACCGGGCAAGCAAGTCACGGATCCAAACAAACTTTCAAATGCCTTCGTCAG -GTTGTGAGAGATGTAGTATTCGTAAACTTGGATCATTGTCACAATGGAGGTCTTGGCATT -GGACAGGGAAGTCTCTCCGCAGCAGCCGATGAGCCGCGTATCAGAGAGGAATGCCGAAAC -AAAACGTGTGGCGGCATCAGGTGCGACCATGGTATCAGCGTCAACCTGTAGGATATACTC -GTAGAATTGCGGATTGACACCAATAATGTTCCGAATGTGATGATGCATCTCAAGCTCCAT -AGGGCTCATGGGCAAGTTGTAGTGAACACGGTTGAGGAAGCGCATGAGAACCATTTGGGA -ATCACGTTTTCCACGGTTGCCAGGACGGGACACTTCAGAAGGTTTGCCGACTTTAACAAC -GACCAGGAAAGGAACAATGTGGCCCTGGACTTCGTATAGACCAGAATAAAGCTTTGCCAT -GTTATGTTGCTTCATACCCTCGCCCAGACTCTCAAAGCTGAGCGGTTCAGGATCCACAGA -TTCGGGGACACCCAGAATGTCAAGGACGATGCGGGGAGTCGGACGATCGTTGCCCTGACC -GATAATCATACCATCACAAATAACGAGTAGAAGTTTGCGCTTGTCGTCATAGCGCATGCG -AGCCATAGAGTCCATAGCACGGCGCAATGACTCTTCGTCTTCGGTATACACTGGGACTTG -GCAAATTATGAACTTATCCAAATTCTCCGGAAGGTTCTTCTTTCCGAATTGCAACGCAGC -CAAAAACTTGAAGCCAATGATGGAGACAAGCATGATCGAAATTGCGAGAATGAAGTAACG -CGCAAATTGACACCGGACTGAGTCTCTCGTGTCGATTTTGCCAATGAAGAACAAGTTGTC -AAGGCAAAGCTGCATTCGGGACCTGAGTGCCTTGTCGAGATTCAAGTCTTCCCAATACTT -CGTCACATCGTCTCCGGGCTTTCCCTGGAAGATATCGACAAGAAGAGGGCTCATGTAATC -AGTATCAATATTGCCGGTGGCGTTCTTCCCGGCTGGCCAGCGAGGTACTCGTGGGCCAGC -AATGTAATAGGTCATGTCATAGATTTTGCCGTGGATACTTGCAACTTTCCTATTCTTGTC -GGTCGCCAGCTTCTTCATTGCCTTTGAAGTATAACCAACGTAACCTTTCTTGTAATTCGA -GCGCATCATGACCATTTGCTCATAGAACCAATCAGCTCGAGAGTCATTAGTCCAAGAGCG -GAAATCGTGGTACTTGGCATTGACATCAAATGTGGAGCTGGAGGCTGTGGCCGCGGAATC -ATTCAGATTTGATCGATAGTCTAATGGTACGCTGGAATGCACTTTGCCCGTTGTACCCTG -ACACAAAGCCGAGACCTGGACAGGGAAAAGGTTGGTAGCGTCAGTGCCTGCATAGGCCTT -GAGGGACTTCTGAGGAACGATATCAGGGAAGTGGGATGGCATGAAACTTCCGAGGTTGAA -AACCACGCCTCGAATGGCAACGTAAGACTCTTCTGAACCTTTACCATTCTTACCGCTGAG -CTCTGCCGTGTTGTAGACATCCTGAGTTGGACAAACCACTCCAGGGAAACCGACAATCAT -GAAAACAGCTACACCACAGGCGAACCAAATGATCATATTGATAGCGAGCTTTTCACGCCA -GGCAATGCGCACGTCCTTGCGCTTCATGCGCCCAATGAAACGGATCACGAAATCAGGGAT -GAAGAATGTGAGCAAATACACCAGGAACATCCAACGACGACGACCGGCAGATGTCGGGGC -CTCGTCGACCTCCTCCATTTTCTTTTCGTTGTTTTTCTCAAGCATTTGCTCTTTGGTGTC -AAGATTCTTGAACATATCACCAGAGTTGAAAGCGGAGCCTCCAGCATCAGAGCGTCCATC -CAGTTCACGGCTCCCAAAGTACCCTTGTTTGGGATCATCTCCATAAATAAAGGCACCTGG -AGACTGGTCGGCCGCGTTGAGAAGTCGGAGTTTCGCGTCCGGATGTCCACCAGCGGACTG -GAAACCGTCGCTTTCATCGACACCTGCATTTCCATAGGACGGTACCACGCGCTCGCCCAC -TTTAGCAATATCTGCCCAGCATCGTTCGCTCAAGAAAACACCGGTGCTACCGACACGAGC -TTCGTTGCCAGGCCATCGCTTTTCGTCGAGAACAACCTCAGACTTTTCGCGGTCGCTTCC -CACCACAACATTCCCGATGTCAGCTAGGCCAAGGAACTCTCCGAAAGGAAGGAAGACACT -GTAATCAGCGTTCCGTAGCCGCGAGCTGATTTCGGCAATACCGAATGTTTGAACTTGCAT -CCGCACACACTTGCTGTCGAACTGGTTGGCAATGCGACGATCATTGGGCTTGAGACAGAT -GACGAAATAAGGATTGAGGTTGGAGGAGCTCAGGCATTTGTTTACAATCTCCAGGCCAGA -AAGGAACTGGCCGGCAGCTCCCTGAGCCGGGCCGGAAGGCATTCCGCTCTTCCGCCCGCT -GGCTCGCTTGGATCCTGCAAGCTGGCTTTCGTGTTCATCACCCTCCTCACTTTCGGGGTG -CGCACTTGGGGCAGTGAAGCGGGATGGGGGTCCGGCCTTGCGTCTAGCCATGCTAGGCAT -TCGCAATGGCTTTGAGCTCACCTGAGCCTGCATAATAGCGGTTTTTTCCTGCGGGTGGGA -TATAGTCTGCAGAGCTTCCTGGTGGAACAATTCACGCACGAAGTCACTCCGGGTGGAGCG -CATTAGGTTCATCAGATCTCCCGAGATGACTTCTCCATTTTCCTCAATAAGGCCGGTGGT -GGAATAGTCCACCTCGCCGGCGAAATGCTTGACAGTGAACGAACTTCGCGTCTGGGACAT -ATAGCCACCCGATCCACTGGTGCCACCAACGGAGATGGCGGGGTTTTTGTTCTCGAAGCG -CTTCCGCACGCTATCCAGGAACTGTGCATCGGATCTTCCACGCCGGGTTTGGTCATCCAG -AATGCTCAAGAGACCGTTGCCATGCTTCAGCAGACCACGCACGGTATCTGTATTATCGAA -ATAGCTTGTTGCCGCCACTGTGATCTCTTCGCGATCCAGGATATCTGCTTTGTGGTCGAA -AAATGACTGAAGGCAGTAGTTGTAGAGTGATTCGGTCGCAGCATTGCTGAGGAGCTGATC -GAGTGTAGACCCTGTGGAAGGAGCTTGGGCAAAACCGGGGAAATCAACAATGGAGATGGT -GTTGGCGACACTGTCCTCGGCTGCACAGAGCCTTTGGTTCACGCTTTCAATAACGTAGGT -CACTAGCAGAGAATAGATAGTACGTGCAAATGCATCAGCATTCTCTCGCGCTCCCTTTGG -ATCAAGCATGACCGTCACCCGCTCACGGTGGATCGTCTTTGTCCGGTAACCGAGGCTGTT -TTCCAGAGCATCAACACTGAGACCCAAGAACGCGGCGACATTGTCGAGCACATCCTTGTT -CTTCACCACCGTGACAAGCTCACCACCCTCGTGGGAATAGCCACCACTTTCTTCTGCGCC -GGTGGTGGTGGACTGACCCGTGGCGAATTCGAGTTGCCCAATGTGGAGGATACTGGCCAG -AATCTGGCAGATCTCGGCGATTTCACCCCGGGAAAATTCCAACTTCCGCAACGCAGTTTT -GAAATGCTGGAAGCCCTCGGCATCATTGATTCCCACTTTGAGCTGCGTTGGGTGGCCGAG -ATAGCGCCAGCGTTTGTGGCTGACAGTTCCTCCGGACAGCCTGCTACCAGATTGAATATG -GATGCTATTCTCAAATCCCAGGTGTGACTTCTCTGCCGCACTTGTCCCAGATAGCAAATA -GTACAACACATGGAAGCTTCGCTCGCCCGTGGGCACCGAAGAGATGCGGCTACGCTCCAA -TCGGTAGTCAATGATCTTACCACCAATCAGGGTTGGGTTCACGGAGGATGATCCATCGTA -TTGAAGTTCAAGGAATAGACCAGCTTTTGAAGCCGTGGGTGTAGTGACGGATTTGGTAGT -TGTGAGAGTGTCAAAGACAAAGGCGGCATATGAGAGTTTAGAGGAGAGCGGGGTGGATGA -AAAGGATAAGAATGAAGACAGCAAGTGCGAGCGAAGGGTTGTTTTTCCAGATCCGCTCTC -TCCTCTTCAATCGAACGTTAGATATAAGTTTTATATAAAGGGTTCATTCAGCTTACAAGA -ACACAACGGCTTGATTCTCAGCGCGCGCACCCAGACGAGTGAAGGCGCGGCGCGCGAGAT -CCTCGGCTTCACCCGCAGCACTTCCCTCTTTTCCGCCATCAGGTCCTTTTGTGGCCGATG -AATATGTATTCAGAGAAATGAGAGCCTGAGACGACAGGTGGGCAGTTGGAAGTCCAACAT -GAAAGCTGGGTCAGACGTTAGCTATGACCGAGACTGTATTCATGAAGGGGATCAGGGATC -TCCATACCGACTGGCAAGATGAGCTGTGAGATGTGTATCAGACTGCAGGTGGGCGGGCAA -TGAGGGAAGTGAGGGCTGGGCGTGTGCCGGGCCACTCCCAGAAAAGGTATTGACCATATT -TTGGTCCCCTCAGTCGAGATCTGATGGTATGAACGAGTGTATGAGGGGCAATCGGGACCT -AAAGTGTTGACGGGGCAAGTGGCTTAAGAAGCCTCGCAGTGACGGAATGAACGTGGAGCG -AGGGTAGGAATGTCTTGTAGACTTGGCGGAATAAAGACCCCTAGCAACAGATCGAGATCG -CACACATCGGGGTGAAGGAAGATTCGAAGTGAGGTGCAATGGTAGGCGAACGAGAGACAC -AGAAATCGAAGTAGAAGCAAAAGCAGAAATTAGAAGCCCAGAATGGAAGATCACAAAGGG -AAAAAAAGCAACAAATCGATAAGAGGTCGGGGGTGGATGTGTATTGTGCAATACTCAAAT -TACTAGTTCGGTCGCTAAGAATAGCTGAACCTCGGCCATACTCAACGGGGGATTAGGGGA -TTAGGGGATAAATTGTATTAATTCGGTTCGGATTGGATATGTCTCGGGTTGAATCCACCA -TAGCTGATAGTGCCATAATACAGATTATTTACTACAATATACAAAATTAAAAAGAACAAT -TTGAAAGTGAGATGACAGAGGGAAAAAAAAACGTAACAATCAGAAAAGGAACCGGTTTAA -AATTTTAATATACAGTCTCATGTTAAGAGATCTCCACTCTAGCTGTTACCAGAGCTGCAT -CTTTGTATTTCGTACGGAGTACGGAGTCCTACGTAGTTACATACTAGCTGTACATCAAGT -TTAGTTGGAACACGTCAACTCTGGAGATCATCGTCAATTATACTCTCCTTGCAAGGAACC -TGCATATCAGAAATATCTGACAATATCCGGATAGAAAAAGTCTGAAACTAGGGGCACTGG -TCTCTCGGCCTAAGAAAGAGTGGCTCGACATTCTCGCGACTAACAATAATAGGCCAGTAA -ACAGACTAGTAAAGGTGGCAGGGTGGATCAAGTCCCCGATCATATGTGTGGCGAAACTGG -TCATCTGTTAGAGAAAAAAATAGGTGTGAGACATCCCAAATATAGTGCATACCATAGACT -ATTAATATATTAAAATACATATACGGAAATAAAATACTTCATCTGATCATTCTACGGGGC -TCTGGGTCCCTATTAATCTGAGGCATTTTCATTGGTGGGAACTTATCAAGCAGAAACCCA -TCAGGGTCAAAGAACAACAAACATGTGGTTTCAAAATTACAGGGAAAAAAAACCCGTTTT -CTTTGATTGCAAATACGAAGTTCGAATAGGAATTGGGCCGTGTCAACATTGGAAATCAGA -CAGAAGAACTCGACTTGTATTGTCTTGTCCGCCATTCACCAAAAAGCAAAGTGTCAAAAG -CTTAAGTGCCGTACCGCACAGTTCAGACGATCGTCAGGATCGACGGTATTTTTCTATACC -CCGTAATTTTCAACTCCATCGTTTTCTTTCTCCCCACGCCTAACTTCTTCTCAGCTGTTT -CTGTCTTCTTCCTCACCCATTATTCAGGAGTTCCGTCTCCAGTCACTCGCTCAATCCAAG -CCCTTGATCCTTGAGCTTTGATTCAATCAAAAGCTAACTCCCTTACAACCTCTAAACACC -AACTCTCACTGTACATTTAGCGAGACAGGCGACCAATACCAAAAACAAGAGTACCCTCAT -GCAGTTGTGATAACGAACAGGCCGGACATAGCATCTCGCCGTTGACATCGTTGACCCCTC -GGCCGCACATTCGTCATGTCGAACCGTCATTCGGTCTTCTCCACGCAATCCACGGGTCTC -CCCGCGGGACCCAGACCACAACATGGAACTCAGGTCTCAACAACAACGTTATTGAATGCT -CTCCATTCTTTTTATACTGCGGGTCAACCATATCAACTGGACTCTGCCACGAGTTTGGTT -GTGAACACATGGGTGACTGCAGCAACTACTTTACCCGATGGGCGAACTGGCGCCACGGTC -GACCGTGATCTGGCTATCAGAGCTTGGGAACATGCTCGACGCCGGGCCGAGGATGGCTGC -GTTGTTCTAGGGTATGTCAAAAATTAACTTTATCCGACGACCATTAGTCCTAACATAGAA -CAGCGCCCTCCATCAATCCACACCCTCATTGCTAGAACCGTTTATCACGGCGATCCCGGT -GGAAACACCCGAGGTAGCTCTGATCGCGCTCTCCGCTCTACGACCCTTCCTCACCGCTGT -CACCTCATTCAATCCATCCTACTCTCTTCATTCGGCTTTAGCCGCCACTTACACCTTGTC -TTTGCAAGGATCTGTCGTTGGCCTCTCCTTGGCCTTGTCCACCTCCGGCATCAATGTGCG -GAAGGGTCTTCTGGAAATCCAACCTGACACAGGTTACCGTGCATTTGATGCCTTTTACTA -TCTGCTCACTACTTCATCGACAGCCGCGGAGCGTGAATTTCTGGACCTGAAAGACCCCAC -TGCGTATACCTTGCTCAACCGATCAAACACATATGATCCTCCACACTATCTACCCACAGC -CGATGATGCCGCTGCAGCAGAAGACTTCCGTGCTTCTCTGAAAGCTATTGGTATCAAAGG -TGCTGCTCAACGAGGTTTGTTATCTGTCCTTGCTGGTCTCCTCAAGCTCGGCAATGCAGT -CGGGTTCGAGGTGGACCAGGAGGAGCTAGAAGAGGTCTGTGAGGATGCTGGTGGTCTTCT -GGGAATTGATCCAGAAATTATTCTCCATAAATGCTCCACAGATGAGCGGGAAGTGCTGAT -CGCTGGAATATACGAGGCGCTGGTCGATTGGGTCATCAGTAAAGCAAACGAGGCCATCAC -AGCTGATATTCAGGTCGCATTGGAAAACGATTCCAGCAATGGGGGTGCAACACAATGGTC -GAACGACGACACAGTCAACCTGACGGTAGTTGATATTCCACGACCGGCTCTCGGTAAAGC -GATCATGATGAGAGGCATCTTCGATGATAGTCTCGGTCTCAACGCGGAAATGAAGGATGA -TGGGATTCCTCTCCCTCCCATTGGAGCTTCTGTCATCAGTGAAATGAACTCCGCGGTGGC -CCAAGTTGAACCAGATCTCAATATCACCACAGGCCTTGCCGGACGAGAACGCGAGCATGA -TCTCGACAAGCGGCAGGGAGTACTGGAAAAAGTTGGGGTTGAACTTGAGATTGACGCTTT -TCTCCGACGCATCTTATTCCCTGTTGATACCGAGGGTATCTCCGTCGGCAAACGCGGCCG -ATTTGATCTTCCCACTACTCTCAACAGTAGCCGTGTCTGGTACCACCTCTCTCTCCATCC -GACTGATGACATGCCGGAGCAGCTCGCGTTATCGGCATCTACAGCGTGGTCGGCCGGTGC -AGTCTCCCGCCAATTGCGAGACTGGCGACTCCCGGAATGGGCCAACCGTCGTCTGAAGCA -ACTCGATTTCACCGCTGATTTCGACGTGGAAGAGTTTGTCGGTAGGTATGCACGACTCGG -CTGCACCGAAGGCCAGGATGGTGTCGAGAACTGGATCATCGAGAGAGGATGGAGCAATGG -CGATGCGGTCGTCGGTGAACAGCGTGTATGGATGCGCGAAGGGGCGTGGTGGGAAGCGGA -GAGTATGCTGGATCTGAAGCCAGAAGAGACCCATGTGAATCCGTTCATGTACGGCCCTGC -GTTGTACGAGACTGGCTTCACCCCAGATGGCACCGCCATCCATGAAAACTCCAATCTTCT -TGGCATGCCACCTGGTGGCGCCATAGCCCCGAGTGTTATGGGCGGTGCGAAATCCATTGC -CCCCAGTGCGCCATTCACCAACACTGCCAATGCCGGTGACTACGGCCTGGGCCGAAAAGG -TGACGACAAGAAGGGAGATATTGCATACTACGACGAATATGGCCGATATATCGGCGAATT -CGACCCAGAGTTTGGTGATCCCAAGCACATTGAGAAGAAAACCATCACCTGGAGCCGTCG -ACTCTGGGCTGGTTTCGTCTGGGCCATGACCTTCTGGATTCCTTCTTTTGTGCTTCGATA -CGTTGGCCGCATGAAACGCCCAGATGTCCGTCTGGCCTGGCGAGAAAAGGTCGTCCTTGT -CTGTTTGATTCTGCTGTTCAACGGAATTGTCTGTTTCTATATCATTGCCTTTGGTGACCT -CCTCTGTCCCAACAAGAACAAGGTCTGGAATCAAAAGGAGATCGGCTGGCACACAGATGA -TAATAACTTTTATGTCGGCATACACGGCCGTGTTTACGATATCAGCAAGTTCTGGCGCTT -GCAGCATAGTGACATGAATGGAATAGACACAACGGCGGATAACATGCGCCCATTCGGAGG -ACAGATTCTGGACGACTACTTTCCCCCTCCTCTTAACCGGTATTGTGCCCCATTTGTCAG -TTCTGATACGCTCACCTTGGAGCACAACAAGACAGACGCTATTGAATATCCTACAGCCGA -GCACAAATCGGGGAAACAGTCATTAATTCCCGACACAGCACTCCACAAGGACACCTGGTA -TGAAAATAAATTCTTGCCAAAGATCGGTACATACTACAAAGGCGACGTGGTGTGGACAAG -GGAGACAATCAAGAAGCAGGCCAACGAGGACTCTCGCTACTGGGTTATCAAGGACCAAAA -AGTGTACGACTTGACGGACTACTTCTATACACTAAAAAGGATGAACAACCTCAAACAATA -CAATTGGCTACCAGGACCTGTTACTACCCTCTTCAAGAATAACATGGGTGAGGATGTCAC -CGACCAGTGGCAGGACACAGTGGACTTCAGGGATGCCCAAACATGTCTTGACTTCGTCTT -CTATAAGGGCAGAGTTGATTTCCGCGACAGTGCACGATGCACAGTCAACAATTGGATCCT -GCTCACCTTCACCATCCTCATCTGCGCCGTCATCCTAGTCAAATTCCTGGCAGCTCTGCA -GCTGGGGTCCAAACGTCGACCCACGCCTCAGGACAAATTCGTCATCTGCATGGTTCCTGC -ATACACTGAGGGCGAGAATGATCTCCGGAAAGGTCTTGACTCTCTGACTGCCTTGCAGTA -CGATAACAAGAGAAAGTTGATATTTGTCATCTGTGATGGTATGATTGTTGGTGGCGGAAA -TGACCGCCCCACCCCTAAAATTGTTCTGGACATTCTCGGGGTTGATCCTAAGATGGACCC -GCCGGCACTGCCATGCAAATCCCTCGGTCAGGGAAGTGAGCAGCTGAACTATGGAAAAGT -TTACTCTGGACTCTATGAGTATGAAGGCAATGTTGTGCCTTACATTGTAGTCGTCAAGGT -TGGCAAGGAATCCGAGCAGCGCAAGTCCAAACCTGGCAACCGTGGCAAGCGAGATACGCA -GGTCCTTCTTATGCAGTTCTTGAACCGCGTCCACCACCGCTCACCTATGTCCCCCCTCGA -ACTCGAGATGTTCCACCAAATCAACAACGTCATCGGTGTGGATCCGGAGCTGTATGAGTA -CTGTCTCATGGTCGACGCAGACACCACTGTACGGGAGGATTCTCTTAATCGCTTGGTCGC -CGCGTGTGCGGCAGATGCCAAAATTGCCGGTATTTGCGGCGAGACTAGTCTTCAGAATGA -GGAACGTAGTTGGTGGACGATGATTCAGGTTTACGAATACTACATCTCCCATCACCTTGC -TAAGGCATTCGAGTCACTCTTCGGCAGTGTTACCTGTCTTCCTGGATGGTGAGTTTACAT -GGATCGCTTTTCCACTCTTAAGAAAGCTAATGCTAACGATGCCTAGTTTCACCATGTACC -GCCTTCGAACTGCAGACAAAGGACGTCCTCTGATCATCTCGGACAAAGTCATTCACGATT -ACGCTGACAATGATGTTGACACCCTCCACAAGAAGAACCTCCTCTCTCTTGGTGAGGATC -GTTACCTGACAACTATCATGACCAAACACTTCCCGTCGATGTCATACAAGTTTATTCCTG -ATGCCTACGCAAGCACAGCCGCCCCTGAGACCTGGAGTGTGCTTATGTCGCAGAGACGCC -GCTGGATCAACTCCACTATCCACAATCTGGTTGAACTGGCAGCTCTTGAAGATTTGTGCG -GTTTCTGCTGTTTCAGTATGCGTTTCGTCGTTCTGGTCGATCTGGTGGGAACTCTCATCC -TTCCTGCGACCTGCGTATACCTCACCTACCTGATCTACCTGGTTGCTAGCAAAACGGGTC -CCTTCCCCATGATCTCGATAATCATGCTGGCTGGTGTTTATGGTCTGCAAGCGATCATCT -TCATTGTCAAAAGGCAGTGGCAACACATTGGATGGATGATTATTTACCTTCTTGCGTACC -CAATCTACAATTTCATCCTGCCGCTATATGCCTTCTGGAAACAGGACGATTTCGGATGGG -GCACCACCCGTGTCGTCATTGGCGAGAAGGGCGACAAGCGAGTCATAGCTGTGGAAGAGG -AGCCCTTTGAACCACGCAGCATCCCACTCCAGCGCTGGGATGACTACGCCATGGCCAATA -ATCTCCCTGGCCGCCGCGGCGACCTCAGCGCCAGCCAGGAGAAGGTGTATGCCACCGGCC -GGTACGATGACATGGCTATGGAGATGGACGACCTGCACTCGCAGTACTCATCTGTCAAGC -CAGCCTCCACCATTCTAACCGGCTTCCCGGGCCACGGCCGCCACCAACCTTACATGCCTC -CACAGTCCCCCGCACCCTTCGTTGGGAGCAACATTCCCGGCAATCGCAACTCGCACATGT -CAAGCTTCTCCCGCTACACGGATATGCCGCAGATGAGCGCTCACATGGGTGGCCACTCTG -CCCAGGCTTCCCGCCACATGTCCATGGGTGGCCTCAGCGCCTACCAGGACAACCAGATGG -CCTCGAGCAGACACAGCCTCGGCATGATGCAAAGCAGCGATAACCTCCTCGGCAACGCTG -GCTCTCGCAGTCCCTTGCCTCAGTTCCCCTCTCGACCGGCCAGCACTGCCTTTGATTTCC -GTGTTGGCAGTGGGCCCGATGATCTTGCCATCACCGAGGCAATTCGCAGCTGCTTGGCCG -AGGTTGATCTAGATACTGTCACCAAAAAGCAGGGTATGTTCCTAACCCAACCTCAATGAT -GACTATACAGTTCGCTAACCACTTAATAGTCCGCGTTCTCGTCGAACAACGCCTGCAGGC -CTCGCTCACAGGCGACAAGCGCGTCTTCCTGGACCGACAGATCGACCAGGAATTGGCTAA -CATGTGATTTTCTTTTCTTGTAATATGCATTCTGCATCCCGCATCCACACCCATCGAGTA -CATTTATTTCTCTTTTTCTTGATATGGGGGTCTCAGCATCTCTGGAGTTTTTGAGTCTTG -GGGTTTTCTTTTTCGAGCACAGTATCCAGCGTCTTGAACTTTGAGCTATTCCGCTCTCTT -AATGAGATTTAGTCATATAGCGCCCAGTGACAGTAATATATGTTATGACGATGTCGAATT -CGATCGTGTGTTTAAGAAATTTTGTATTGTGGGAGCGTGGAGCTCAAGTAGATCTGGTTA -GGTCCTGCATCTCCGGTTGCGCAAAATTTGAAAATGTCGAGTATTCTGGTATCGACAGAG -CACTACATCTTCATATCTAAACCTCTTAGAGAGAACGCGATAATCTATGATATATACTCT -GTAGGTAGACAGAAATGCCAGCTCAGTGTGCAAAATTATAGATTAGAGTTACGCATCATC -CCTTGTAGGCGACTATGGTCCTGAAGGTATCGTGTCACTGAATATTACTCCAATCCACAA -TGACCACTCTTGGACCTAACGATGCACAACATGTTTATACTGATCCAGTAGTTTCCTAAA -ACCCTTTGAGTGATGAGACGTGAGATGATGGTAGTAATGATCCTTCGCAGCATCCATAAT -CGCTTTCACCTTCTCTGCAGGAACCTTCTCCTTGCGATTGTAAGTGTACAAACCATTCAC -CTCCTGTTCAATGTCACACCTTAATCAGAAAACAAAAAGGTTAGCAGATGTCCCCAATTC -CCAATTCCAAATCCAAGAATATTGCAACATACAACTGAGTCCAGACAAGCCCGCAGATAT -GTCCCGGTTTCACAACCGCCATGACCAGCTTCTCAAATCGCACCAAGAAGTCTGCCGGGT -CGCTCGCAGTCGTATACCCCCAGTCCCGCTCCCCGGCAACAGCAAAGTCCTTGGGCGGTG -CAATATTGACTCCACCGAACTCGGTGCAGATCACCGGTGCACCAGGTGTATGCTGGGCAC -CCTTATCGAGAACCAGGGAGCCGGAGTACCCAGAGTAGATGGGCTTGGTGAAGACCTCGT -GCCCACCTTTCGGTTCGAGGATCCCTGACATGCTGGTACATGCTGCGCTTAGCTCGGCTG -AGTCGCTATAGTCATGGAAGGTTGTTAGGTCTGTTTTGACATGCTCCCAACCGCAGTTGT -CGTTGATAGGTCTACTGGGATCGAGGTTTCTGTTTTCTTGTGTTAGCTTGCTCATAGGTT -TCCTTTTGGAAGCGTGGACACATTGAAGATGCAACTGGGTGCGTCATACTTGGTCAAGTA -GTACAGAGCGCGAACATGATTACGCTGGTCGATATTGTCTTTCAAGGCAGATACACCCCA -GGACTCATTAAGTGGCGTCCAGGTGACCACCGATGGATGGTTGATATCCCTCTTCACGGC -TTCAGTCCACTCGCTAGTAAAGCGCTCAATGTAGTCAGAGTCGAACTCGTAGGCATTTGC -CATCTCGCCCCAGACAAGGAAGCCCATCCGGTCTGCCCAGTAATAGAAGCGCGGATCTTC -GACTTTCTGGTGCTTGCGGCATCCATTGAAGCCGAGTCTCTGTGCCAGCTCGATGTCTGT -CTTCAAAGCCTCTGAAGAAGGAGGTGTCATTCCCGTTTCGGGCCAGTATCCCTGATCCAG -GAATAACATCTGGAAATACGGCTTTCCGTTCAGCCGGAATGTTGCATCGCCAGTCTGCCA -GGATAGACTGCGCATGCCGGTTGTAGTCTGGACCTCATCAACAACGACGCCAGCCCCATC -AAATAGCCGAAGGGTGATATCATAAAGAACAGGATGCTCCGGAGCCCACAGTGCAACACC -CCAACCCCAGGCTCCATCAACACCAAACAGCTCCTGGTTCTTTGCCTTTAACTTCGCCAC -CGCATTTTCAGGGACCTTCATACTCAAATCAAGCGCAGCAAAACATTTATCCCGCGGAAG -CTCGCCGTTTACCTTTGTAACACTTATTCCCCCAAGCGATGCCTCAACTTCAACCGTCGC -CACACTTCCAACCCGACGTCCGGCAACATGAACATTCGCATGCAACAGGCCGTTCTCAAT -ATCATCAGAACGAAGCACCGTCCCTCCACTCCCACATAGCAACCGCATAGCCGGGACGCT -CTCCAACCAAACCGAAAGCCAGATACCACCGCTGGGTGTGTAGAAGATACTCTCGGGAAC -AGGACCCCAGAACTGCTTTCCACGCGGCTGGGTCAGGTCATAAGGCGAGTCTCGAACCCG -GAGAGTGAGACGAGCTTCCGTAGCGCCGTCGGCAAGGGCCTCTGTGATATCTATATCGAA -TGGCACATGTCCGCCACGATGCCCACCAACAAGGGCACCGTCCACCCAGACAGAGCACTC -GTAATCGACGGCACCGAAGCGGAGTAGGAGTCTATTCCCCTTCTGCTTCTCATCGGCTGT -GCGGATGTCCGTGACGCATCGCTCGTACCACATTACCTCGTGCGCTTCAATCAGGTTGAT -TCCCGAGGCTGAGGTTTGGAAGGCATATGGGACTATTATCTGGTGGGAGGGTTTTCCTTC -TAGACTGGTTTTGTGCCATTTCGAGGTAAGACCTTCGTCTTTGTCGTCGAAAGTGAAGGT -CCAATGGCCGTCTAGAGTGTTCCAGTTTAGAGGGGTGCGTTGGAAATCCGGACGAGGGTA -GGATGCAGTCATTTTGGGTATTGTGGGAGTTTTGCTGTGTGGTTCAATGAGGTTTTTATG -GTCTCTTAGGAGGTTTGAATCGATCGAGGAAGAGAAGAGTAGCGGTGTCTTTTGACCCCG -CGTCTCTAACGTCACTGATCGCGGGCCGCATAAGACTCGAAGCGATGAAGTTCAATATAA -AAAGGTCCTTGGGGTTCAAGCTCGCATTGTCTCAGGCTCAATTTCGAAACAACTTAGATT -GGAGAATATCTAATGGCAAAATCTCATTGATGATTGGGGATATCAAAAGAGTAAAAACAT -CGACATAAAAAACCCAACTTATCGGAAAACTCCTCTGAAGTCGATACCAGTTGGTATCAA -TTCAGACTATCCAATCCCATCCTTTCCTCCGAATGCGCGCGAAATAGATAAAAAAAAGAT -AAAAGAGGCATATGGTCAACGACTCCCTGCTCTATCGCAGATCTTGGATATCTCGTTTTC -TTTTCTGACGCCCTCATTGCTTGCGTTCCGTGGTATCAATCAACAAGACCAACAAAAAAG -ACTACCGAAAATAAAAAGCACGCTGGGAATGAGAATAAAAGCAGGAAAAAACAAAGACAA -ACTCAACCAAAAAAAAGGGTATACTGCTGCAAATGGATATTTTGGTTTTAACGCGGTGGC -CCAATTGGGGCCGGGTATCCAGAAGTAGAATTGGAAGTACTAGGACGACCACTATCAAAA -GAGGTGCGCGGGTCATGCTTTGGCGTCATGTCACCCGGCGCCGTGCCGAAGGGAAACTTG -GCGTCGAAGCGCGTGGGCCAGCCATTGCTGGTACTGCTCTTGTGGCTGTGTGAGCTCCCT -AGGACGGGTAAGTCGTCTTCGGGGAGGTTTACGCCAGAGTCATTCCATGGGTTGCGGTAG -CCACTGTCGCGTGATTCCATGGAGACAAGTGAGGCCTTGGAGGCGTTGCTCGAGGCGTGT -TTGAAGAACGACCCCGGTGAGGCGGAGTGGTTCTTGCGGCCGATGAAAGAGGAGAAGGAC -GAGATGCTTCCGGGCTTGCGGAGGCGTTCAAATATCGAGCCTCTTTCGCTGCTCTCGTCG -GAGAAGTTCGGCCGGGTGGGCGAGTCGGTCGAGAGGGGCGCTGGGGCGTGCTGCAGTAGC -TTGGAGTCAAAGAAGGGCAGGGACTCCTCGAACGAATCGGATTTAGGGCGCGGGATAGCC -CCCTTGACGGTGTTCGCATCGAGCCCATGAGCAACCAAGCGGGAAATCAGGAAGTCAATT -GCGTCCTTCAAGCCACCGGCGTTGTTGCGCGTGTATCCGAGGATCACACGGTCTTCAGGC -CCGGGGGCGGGAGACGATGAGCCTTCCTCGTGAGTAGTAGGCAGATCGGTACCGACGTCC -ACGTTGATATCGACCTCGTACTGCTCCTTCAGCTTGGTCCGAACATCGGCGAGGAAATCG -GAGCACTTGAAGTATTGCTGGAGTTCCGCCGAGCTCTGGAAGTGCAGCTCATGGCTCTCT -GGCACCATGCCAAGCAATGCGTCGACTGCTTGGGGGACTTGGTATTCAGGGCCGGAGATG -GTGACAACGTCACTAGCAAGTTCAGTGCTTGGGAACTCGATCTTGCAGTTCCATTTCTTC -TCAAGCTCCTCGATCTCGGTCATACGGGCCAATAACTCACGATGGTATAGGCGGTTGATG -ACGACTCTCTCGGCCACGTATTCGGCATCCTATCAAATTGTAAGTTGCTATCATGAAATC -AAAGCTGTGGGCACCGTGAGCACTTACGACTTTCTCGACCATGTCCATGATCTCCTGCTT -GACAAGGTCCAGACTCTGAGCGTTGCGAGCAGGGGTACGACAAATCACGTTGTCAACCTT -GATGTCGTCATCCTCCTTGCCCATGCCGCCGCGGTCCATAGCATTGGAGAACTTCACGAA -GACGGAGTACTTCTTCATGATGCGCTGAATGTGTTGTCCACCTATTCCAATGATCCGCTT -GTGATATTGGTCAGGCACATGGAACGAGATCGATGCAGGCATCTCCTGCTCGACCAAATC -CAGGCCATTCTTGGTCGACTCATACTGGTTGCCACACACATCGATATAGAAGTTATACTC -GTTGAAACCATCGAAGATGATCTGGACATTGCCTGCATACAAGTCAGCATCTCGTGCCAC -AGGTGGATATCTATCAAAACTCACTCTGTCCCATGATCTTATTGATCTTTCCGTTCTTCT -TGCCACTCACAAACTCCTTATGCTCGTTGGCTAGCTCAATCTTCACCCGCATCTGGTATT -GACTTCGCATCACGAAAGGAATCTGGTTAATCACTGTCATGGCAGCCTTGACTGCATCAT -CGGAACCGTTTATTGTGAACGTCAGATTGTCAAAGCTAACCTCCGCTTCCGAGTTGGTGC -AAATATCCGATAGCATGGAGCGAATATCGGCAGTTGAGGGAGCACGAAGACCACCTTGGG -CGGGGTCGGGCATGATGATCCACCACGAGGCGCTATAGAATTGACCAGCCTGTGCATACA -AATATTAGCAGATTATTCCGTCAAAATGTCAGGAAACGACGCACCAGAGCCATGATTTCC -CGAACTGTGCGCTCAACATGCAACACCTCGGTTCCTTGAATCCGAACAAGGCCGCGTTGG -CTACCCAGCTGCGGAAAGAGAACATAAGACCCGTTCATTTCCATTACCTTCCGGACCTTA -TCCAAACGATCGAGTAAGATGTTGTCGATTTTGCTCGAGCTGACCACCACATCCTTGACA -AAAATCTTTACTCCCATAACCAGTTCTCGCAGCTTCTGTTTGGCGCGAGCAATCTGCTCC -GGTGTCTCACCGGTAATGTAAACCTCATCCTCGCTTCGACGGTTCGCACCAGGTGGTATA -TAGCCGAAAATTTGGGGGAAAGGCGGAGGGAAGTAAATAGCGGTGCCTGTAGCAGCCTCG -ATGAGCTTGATGTTCTTGCGGGTCCGACCACAGACCAAGGTGTGCATAGTCGAGTCCAGT -TTGATAGCGTCGACGTGGCGTTTGAGCTGGTCTCCGGTTAGTTTGCCGGTCTCATGATAC -ACGCACACAAAAACGTACTATTTGATCAATCATGATCAGAACCCGAGTTTTGGCATGTTC -AGAGCTCTCTAAATCGCCGTAAATGGCAACCCGGAAGCGCTGGTCCAAGCCATTATCTGA -GGCGTAGCCATATGACGAAACTACCGCACTATCCGCATCATGAAGCTTGGGGGTCAGTAG -GAAAATGTCAGCCCCGGTGTAGGCGGCTAAAGTGTCCAGGTGCTCGAGGACGCTAGCTCT -AATTCCTTTTGGTGATCCATCTATAATCAGATGCATATCCACGTCAACGGTCGCACATCG -CTGATAAGAGGTCCGATGTTAGCAAGGGATGTCTTTGGACTTGAAAATCCTGCTGGTGCC -GGAACCAAAAAAAAAAATTCGTCCACGTACCAGCATGATCGGGGTCTCGTTCAAGATTCT -CGCCCGCATCTTCCGCGCCATCTCCCCATCACCTGTGACACAGACGTTGGTCACCAAGCC -GTTCGGCCGGCGTTGTAGCGCTGGCACAGCCTTGGCCTCAGAGGTTGTGAGCGTAGCTTC -CACTCGTCCTCCTTTCTCTTCTGTAATCTTCCGACATAATCTCTGCAAACCCTCGACGTT -GTTTGTGTGGACGGGAAGCTGATGCACCGATGTGCCCTCTAGTGTGCCTTCCGGAAAAGT -CCAGCGCTGGAGAGCGCCGGGACTGGCGTGGAGCACATCCTCAACATCTGGGCCGGCTAG -TGTGGACGAAAATGGAACGTTGAAGGACAGATTGACTGTGGGATCGATACTAGTCGGTGG -TGTCATGGGCGGCGCACCGTGTGGTCGGCGAAAAGCCATGTCGGTGTGAGGTCTGGACAT -CCCGAAAGTCGGCGTCATTCGTTCCCAACCATGACGGGCTTGGCCCGTGAAGTTGGATAT -CTTGTGCATGACTAGGTAGAGAAATCTCTAACCCCTTGAGCAAGTCGAGGGGATAGAGAA -AGAGTGAATGGATTGGGTTGGGGGGNAAGGAATCAGGCCGTTCTCGACAGGGATGGGAAT -GACCAGTGTGGGCGTTTCGACCCGGGAATGGTGGAAACGAAGATCGATGGGGCCAACCAA -CGCAAATCTCTAATATGAGAAGGAAAACGAAATGAGATCAAAGTCACTAGGGAAAAAAAT -GTATATATATCTCaaaagaaatatgaagagaaattgaaaaggaaaaggaaacaaaacaag -gggaagaaaaaTAAGTCGTCAGTCCTTTGCGGGTCTTCTAGGTCTTCTACTTGATTTTGC -AGTTTTGACTTATATTCCTTTTTAGTTGACACCTTATGTTGTACGGAGCAAAGATATAGC -TATTGAAAGGGATTTCAAAGGTCTTAAGAGTCCAAAGCCCCTGTAATATCTTCTTCTCAA -ATCCACATTGCCATTCAAGAATGCAGGATCTTGAATGTTCATTCGCAAGGGTCTTCTTGG -CAGATAGAATCAGTCCAATGTTGGATTGTTAATACCCAAGACAGACAATCTTATCTTGTA -TGAGCTACCTGGCAACCTTGGAAGGATCCCAGTAAAGCCCGAGTTTGAGTTATGAATCGT -GTCTCATACCCTGCCTCATCACGTGTGTGAAAGTGATGAGATGTCGTGGAGTGACCATTC -TGCGCCTCTCTCAAGGCTGGATTTGGCTTTAGTGCCGGGCTCCAGGCACTTCGGGATCCG -ATTCATTCCGTACCGTACCTAGCATACGCGGGTCGATCATTTCATAGGCTCCGGGATATA -GAACCCTGTATGCCGCCGTATCTGAATCCACTGGCCTAGAAGCGGTGATAGGACGCCCAT -CACTGCGACCGGCTGGGCCGGGGGGGAAACTGAGGGGTCAAGCCCAGGCATGTGGCCAAG -GTTGATTATATCGCCGTTAAAGATTCTGATTTCTGACTATGTTGTAGACTCCACGGCTCC -GGGGGACCATCTGTTGTGCTTCCGATACAAGTGTCAATCATATGCACACAATGCATGCAA -TGGACCGGGGAACTGAGAACACCAATTGCCTTGTCTAGGTGGGATAACTTCCCCCTCAAA -TAGTCGACGTCATCTTCTCGTATAGGTGGTAGTGGGTCACTTCTTCAGCTCAAGGACTCG -ACAATTCCGACTGGACCGTTTTGAAAATCAGACTGCAGGGTCTCCCAGTTCCAAAGAGGG -GACTCATCACATCGGCCCTGTTTTGGCTTGTAGATCAAGCTTCAAAGTGTTTCATGGAAA -TCTGAGTGAAACCGTCAATCGGTGTATGAAAAATACATGGTTTGAATCGTTTTTACTTAT -CTTCCCAGACCAAATTGGAAATCCGAACTTAGTACGGCAGAATAAGTCATCCAATTTCAC -CTGTATAACGACAACGGCGAAATATGACAGGGGTTATGAAGGGATGAAGGGATCGTTCAT -GAAATTTCTGATCAGTACAAGCCTTTCCAACTCCATGGAGTAACTCTATGGGTCGCTTAG -GCGGATCGACCGCTACAGGTCGTCGATCATCGCCATTCTCCCGCAGAACCATGTACTCCA -AACGTTGTACGTTGACCACTTCGGCAAATGACTAACAATGATCGTTATTAACAGTTAACT -ATATTATAACACAATCGCGTCTGAAGTGTTATAGAGTTCAAAAGGATCTTCTTAATGTGG -TAGAAGCACTAGCCTACGTTGCATACCCGCCCATGTACAATATATGTAGTGGTCTAACCC -CGGCTATTAATAGCGACGACTTCCGTCAATTCTAGCTTCAAATTATTAAACTCCCGCATA -CAGGTTACTGCATAAGAGTTAAGCGCTACCAATTGTCCCACAGATCTTTTGACCTGTTAT -AGCATACTATTTTATGTCCCCGTTTGTAATTGCTCAACCTGCGGGTGAACGGAATTATAT -TAGTTGCCTCGGGTACATCTAGATTGTACGAATGGCAAATCTTAGTATTGTGTACTAGTG -ATACGAAACGCCAGCCACTCATGGATTCATTGATTTTTTTTTCGGCTCCCCCAAGCGTCG -AATCAACATCAAGACCAATGGAACAGAAGCTTGTTTGAATCATACGGCAAATCATGCCTA -AGTCTATATAAAAAAAATGAGAGTGTGACTGGGCTCTGGTCTCTAGGGTCATAAGCAATC -ATTTGAAGTCGAATGAGTCTCGAAATAAGTTTGATATATATACTTGATCTGTTGGTCCTA -TGAGCACCACCATGTTGGACCCAACATAGCAAGCTATATACGAGCTCGTCAAGGCACAGA -GGTCGATAAGCACGGAAAATCTGATTGGTATAGTAGCACTGTGAGAAGAAATTTTGGCTG -CTAATGGCTACAACTATTGTTTTCTGAGGCAGCAATGTGAATTCCATCCACAATCCTTAA -CAACCCAGTTCACTATTAAAACAGACCGTCAAATGGATCCAACCAGGCTGAAGCTGAGAG -CTACTTGTAGGTATGATTAAGGCTAGGACTCATCAAATACGGTTAAACGTCTTGATATTT -CGTTGATTCAATGCCTACGATATGGTCTGCCCTACACGTTTTGCCTTGCCATTCTCCCGG -TTCCCCCTCTCTCTCACGCCAGCTTATGTGGTAACAAGGGGCTTGAGGAGCTGGAAGCCC -TTGATATCACGGGGCTTATCCGATGCAATGCACCTGGTGCGTAGGTCCCCGGTATGAAAC -TGTTCATTTTCGAGACAGCCATACTTCATTTTTTTACTCGCACATGATGAAAATTTCATG -TCGAAAAGTTCATATGAGCAATTTACCTCATTCTGATGTGTAAGCAACTATGGCTATCTG -TTTCTTTCTTCATTTCGACCTTGAAGCTGAAAATACAATTTACCTGCTTTGATCGAGTAT -GGCAGGCTAATAGGGCAATCACACCGTTACAAGCATGCCCCACAGATTGCTTGTTCCTCC -CAGTCTTGCGAGACAAGTATCATGTCAGCCCTTAAGTGGTGAGTTGGAACCCGTTGTGCT -ACGATTTGATATGTCTTTGTGCACCAACTTTTGGCCCTCCGGAAATGCTTGATTCAGAAT -CCTTAAATAATGATATCAACCTCTTGTCTGACGTGCTACTCATAATGTTGATAATATTTG -GCCATCTGATGTGAGTTGAATATAAGTACCTTAAGGGCAATCTACCTTGGCTTCTCCTGA -GTCCTAGTGCCTCCGGATTCTCTTGAGTTGTACACGATCAGGTGACGGTTTATTTGCAGG -TAAAAGCATGATACAGCAATTCGATACCGAGTGCCCTTGTTAATATTGAGCAAAAAATGA -GTTCAACGCCCACGCAAATTGTGACCCTCTGAGTTCTGGTATGGCCCGCATATGCCAACT -TTCTCGAGGACACTGCTGTTATGCAGAAAACTTGCAAATGATGCCTCTAGCAGTGAAGTC -AAATCATGTCCACAATGCAGATAAGTTTCCATAGCTAACGTGAGTTTCATCATATTGAAA -AGTGGCTAATAGGAACGAAGGTCTGACAAGAGTCCGTGGATATAACTGCCACCAATATCT -CAGACGAGCACGAGTAATTGACAGATTGCAAGATCTTTCAATGAGAAGATACTCAACTGC -CAGGTTGTCCAAGTAATGAATCAAGGAAGATGGAACACCACATCGAAAACCTCGGCTATC -TGATTTACAACTGGGATTGCCAGGCGAATTTCATCAGGCTCGAGCAAGAACTATGATCGA -GGATCATGTTTTGTACGAATCTCATATGCAAAGCTTTGGTACGTAGTAGTAAGATTATAG -TTTTAGAACCTTGCAAAGGTATGAATCGCTGGAAAAGTATATTTAGATACAGTCAAGAAA -TGCTGAGCAGATAAATAGACCCCCCAAGCGCGACACGCTACTAGTAACGCTGTGAGCCCC -GTTGAATGGTATCATTAGCACCGTCCAACGCAATGCGATCCCCTTCTTCACCTTCACCTT -CATAGTACCGTCTATCCCGAATGTAGTACGGTCTTTCAACGTATTCCCCAACTTGTCTGT -CCTCCATGAAGATACTGGGCTCGCGGCCCGGGCCACTTGGTCTCACGCTCCCAACTCGCT -GAACAATCTCATGTGAGTCTTCATAGCGAGGTTGTGGAGGACGAACACTGGCTGTCCGAA -TTATTCTCTCCTGGGGCACCCCACGCTCATCAACATAGACGGCATGGCGAGGAAGGTACT -CCGCTACTTGTACACTGTTGCTGCGTGAGTAAGGTTCATGGCCTTCCAAAGCCAAGGGAG -CAGCGTAGCCTTGTCTCTCATGGGCTACTACGGGACGAGCATATTCTGACGTCACTCGCC -TGTAGACTGGCTGGGGCGGGGGCATTTCATGCAGATAATGTCGTTCACCGTAGGGGTCTT -GCACAACTGAAGGCGCACGAACACTACCCGCGTGATGATGCACTCTTTCATCGTAAACAT -CCTTTGGCATTTGAGAGACTGGCCGGGGAGGTGGCGGGGGCGGAGGAGCCATCGGCTGCA -GTCTTGGTGCCTGCAGCAACTCGTAGTACTGAGTGCCATGCTCATCAACCACGATCCGAC -GTTGAGGTGGTGCCATCATTCGTGCTTGCGGTGCCTCTTCGTAATAAGTCTCCCGGTATG -TTGGGGGTGGCATGTCATCCGCGGTAATGTAACGATGCTGTGCGTAAGGAGGGGCTTCTT -CGTAGTACCTTGGCCGCTCTTGGACTGGGCGTTCCACAATGTATGATGCCGCTCGAACCG -GGCGTGGGTGGGCCTCAACATACTCACGAGCATAGTCTGATTGCCGAGCATACTGTGAAC -TTGCCAATCGTCGAAGATCGGCTTCTTCCCGGATTGGTCGCCTGATGCTCAGACGAGAGA -TTGTGCGCTGGGGACCAGCATCTGGTAGTGGGCCGTGGTACGGATCGACCTCGTAAATTG -GCTCTCGCACTGGTGGTCCGCGACTTTCGTATACTGGAGTATAATGAGGAGACGCGATAT -CAATGTAGATGGGGCGCTCTGCAGGGTTTCGGCCGCGAACAGCAGTGTGATCGTCGGCGA -ATGGTGGCGGTGAAACAGGTTCCTCTTTGATGAATGCCTCGATAGGTTCTGCATTTTTTG -TCCTGTGTGAGATTGGACGGGGCCCTTCATCACCTTCACGCAGCCTCCTCCGTTTCCGGG -ACATAACTTGAAAGACCGGTGAATTGGGGCTGCCACGAGCAGAATCAGGTTCCAAGTACA -CCTGATCAGAAGCTTGTTCAGATCTTTCATCCCGAGTTTGCTGGACAGAAGGAACCTTGG -CAGTGGCCAGGGGTGACACTCGTGATGGCCTGGGAGCTGCCGGTGATGTGATATGGTTTC -GAACTACCTTCACGCTTGCAGGAGAGGGGGGCTTCCGAATATCTCGGGTTCGATCGGTGT -ACCTGGCATCTGGCCTAGAATTTCTTGCAGCTCCACTTTCTACCGCACGGGGATATTCAC -GATGATCCACTGGCGGCGCCGCTGGAGCAGGTGGGGAATATTCGGGTTCGTCCACGGTCT -CGAACTGATGCGGATCGCGGAAGCGGCTGACGGGAGCCTCGGCTTTGTGGTAGCGGCCCT -GCTTCTGAGAATGGGGGTGACGCTGGTCAGCAACAAGGTAAGCATCTGGCATTTCCTGAT -CAGATCCAGGACGGTTGAGAGCTTCAAGTCGCTGCAACTCATCTGCGTAATGGGTTACCG -GAGCACGCGTGGCAGGTCCTGTGGGTGGAATTATAGCCACGGGGGATAATTTTTCTTGTC -CGCCAGTCTGGGGAGGCGAGTCAGGAGCTCTACTGGAGTAAAATGAATTGTCGTCGAAAG -AGTCACTGGCTGCACCACCAGGACCACTGGTCTCAGACACATCGCTCAGCGAGAGTGGTT -TCACCGCCTCAAGAGCTTGTTTGAAAACCTCCGAGACATCAAAATCGGGTTTCGTGTCTT -GAACCGCAGGTCTCTGTTTGGTCTCTTGTTTTTTCAGCTCCACTTGATCACGCAGTGCTC -TTTCCAACCTCTGCCTTTGTAACTGTAATTCCGCCCTGATTAGGTCATCGGACTTGGTCA -AAAAGATGGGATCGATCTCGGATGTAGGTTTTGAAGCCACACGGGCGGGTTGTGGTACTG -CAGGCAAGGCATTTCTGGCCAAGGCACTCTGCGAGTTATTGTAAGGCTGTTTTGTAGCCG -CAGCCACAGGCGCATTCGCGGATACAGCTTTCGGGGTAGGGTTTTTAGGTAAATGAGTTC -GAAGAGGGACATTTTGAACATTTCTCCCAGACTTGCGCACCAGGTGCTGGGGGACATTGA -GCCGGGGATGAGTGCCCGAGAAGACTTGATCATTGATGGCTAAAATTTTTTCATACTCCC -GTACCTCATCATCCGGTGTAGGGGCTGAGATAGCCATGGGACCTGCTGAAAGGTGAGTTG -CTGGCCCATGAGTTGCGGCCATGGTCCGAGAGAAAGCTAGGGAAAGGGATCGGTGAAGCT -AGAACAAGATGAGAGAGACTGGAAGCAGATAAGTTTGCTAACGCGTAAAACACACGTTCA -GTATCCACTCAGTCCAATCTACGATTGAGAAAGACTCTGAGAGTTGGATGGTAGAGCAAA -TAATGAAGTGGTTTAAAAAGTTCACCATGCTGGCCGTTTGGACGCTAGATCTGATTAGGT -AAGGTCACGGGTGTGTGTTGGACATTGGGAAAGTAGAGGAGAGAGAAGAGAAAGCCGACC -TGACTTTTGCCTTTTCTTTGACCTATGCATCGGTCATTTACATTATCCTCTTGTCGGCTT -ATTAGACCTATTGTAACCTTGCCTGGACCTTCACCTGTTTATCTACCCCTGCGATTCCAT -AATTTAAGATCGATGGCAACCAACGGCACCAACGGAGTCAATGACTCACGCCCAATCTTT -TTCTTTGATATTGACAACTGTGTGAGTCGAGAGATCCTGAAGTTGGTATCTTGGTCACTA -ATTACTATGTGTTCCTACAGCTCTATTCTAAGTGTTAGTGGTCTTGATTTCCGCTCTTTA -TGAGCTACTACTAACGGTGGTACCTCCGCGCAGCTTGCAACATCCACGATGAGATGCAGA -CACTGATCAGTGAGTGAAGCCGACAAACAGTATGCCGGAACATATAACTAATGCAACCTC -ACTCCTGCTAGATAAATTTTTTGTCAAACATCTTGACCTTAACTCGGAAGATGCCCACAT -GCTGCACCAGAAGTACTACAAGGAGTATGGACTAGCTATTGAAGGCCTTACCCGTCACCA -CAAAATCGACCCGCTTGCGTTCAACCGTGAGGTTGATGATGCTCTACCTCTTGACAAAAT -CCTCAAGCCTGATCCCAAGCTGCGCAAACTGTTGGAGAACCTGGATACAACAAAGGTGAA -GCCATGGCTGCTCACGAACGCCTATATTACCCACGCAAAGCGCGTGGTGAAGCTACTTGG -TATCGAGGATCTGTTTGAAGGCGTTACTTATTGTGATTATGGAAATTTACCCTTGGTCTG -CAAACCTAGTCAGGATATGTACGCCAAAGCGGAAAAGGAAGCTGGTGCCCCCAGCACGGA -TCAGTGTTATTTTGTTGGTATGTCATAATTCCCCGTACGATCATGACAGTTTTGGACTCT -CTGATTCTGATGGCGTTTAACTAGATGATTCCCACTTGAATTGTGAACATGCATACGCTC -GCGGATGGACTACCGCTCACCTTGTCGAACCGGGTATTCCTATACCACACAAGCCGGCTT -CGAAGTACACGATCTCTAGCCTCGAAGAGCTTCAGGTGCTTTTCCCACAAGCCTTCAAAA -CGCAACTCAAAAGAGATGCTGCTTAAATGAGGACAGGGCCATGATTTTCTTTGTTTATTC -CCCCTAGACTTTTGGTTTACGACTATGAACGTTTATGATTATGGGCCTCTCTATATAGCC -AAATATATATGGTTTTGGATAATCTGAAACGAAGTCAGGGGTTCAAACCTGCTCTAATTA -TATAATGAAGGGATCATTAGTTAACCGTATCTCCCATAAATGCGTGCAGTGATCTACATC -GTAGTTATATTAGAATTTTTGCCTCAGGCAGTGAACAATCACTTGACGCGCTCCGCTGGA -CTCAGGAGTTGCCGTGTTTCCGTTCGTTCAACAACCAAAACTTCTCGGTTATCGCTCAAG -AGCTCGTTGTAGCTCTTGAAATTAGTCTCAACATGTCTGGCCAAGCCTTACTTGTAGGCA -CGGTGATTGAGCTGCCTGATGGACGGCAAGCAACAGTGCGTTTCATTGGCAACACCCACT -TTGCAGACGGACAGTGGATAGGCCTTGAGCTAGATGAGGCCACGGGCAAAAACGATGGCG -CAGTACAAGGAGAACGGTATTTCGATTGCGATCCAGGATACGGCATGTTCGTCCGCCCAA -CAGTGGTTGGTAAGATTGTCCAGCCCGGGCCGGAGAGTAAACAAACGACCAAACCATCCA -CCAGCGCTGCAGGTAGCAAAGCGCAACCTAAACCCGGAATGTCCACCGCTATGCGGAAAC -AACCTGGGCTACCTCCGACAGCAGCGAGGCGACAAAGTACCAATGCAGCGTCCACTCCTA -CTCCGGCGCCGAAGGGTGTGGCCGCTCGGTCGTCTCTCCGAGTATGTATTGTCCTGCAAT -GACATTATCTGATAATATGCTTACCTGGATTCCACCTTTCTAGTCACCAACAAAGTCCCC -AACGAAACAGCTTGCGACGACTGCGGGTCAGGCTACTCGCCCGTCCGTCGGGGCCACTTC -CCGTACCTCTACAGTGGCGTCCAATCGACCCCGACTAGCTCCCAGCACCAGAAGCTCCCT -GGGTCCTTCTCCTACACAACCTGCAGCATCTAGAGGCTCAAGATCCTCGGTGTCCGGACC -AGCAGCCCGAACATCAAGGCCAGGCTCCCAAAGTACCGTTGCTTCAGCAACAGCAGGATT -GACCAAGCGCCCTTCACTACGGCAGGTTGGCAACACAAAGGTATCCGATGGAGGAGACAC -GGGCATAAGCGGGCGGTCCGGAGATCTAACCGATACTGAGTCGCCTGACACTGAAGGGGA -GGGTGCACAAGACGAAGCCACTGCGCCGAAGACCTCTCGACAATCGATGACAGCTACACG -TCCAGCGACCTCTCGCTCTGGAGTCTCACACTCTGCATCTCAACGCCAAGGGCAGAGCGG -TGCTGTCAATCGCGAGCTTGAAGAACTCAATGCCAAACTCAGAGTCATGGAAAAGAAACG -AGCCGACGACAAGGAGAAGTTGAAGACCCTCGAGCAACTCCAGACTGAGCGTGATAAATT -TGAAACGATCATCCAAAAACTGCAAGCCAAGTATCAACCGCAGCAACTGGAAGTCACCGA -GCTGCGCAAAAGGCTAAAAGAGCTTGAGACTCGCTCTGACAGCTTGGACCGTATGCAGGC -AGAACATGAATCGCTCATGGAAATGGCTACTCTTGACCGCGAAATGGCAGAGGAGACAGC -GGAAGCATTCAAGCACGAATGTTCAGAGCTACGATCAAAGTTAGACGAGCTGAATTTGGA -GGTGGAGGTGCTCCGAGAGGAGAATGAAGAATACAGCCAGGAAACGACTCCTGAGGATCG -AACCACTCATGGGTGGCTCCAGATGGAGAAAACCAATGAACGTCTCCGCGAGGCACTCAT -TCGTTTGAGGGATATGACACAGCAGCAGGAGTCAGACCTCAAGGCCCAAATCAAAGAATT -GGAGGAAGACCTCGAGGAGTACGCTGCTGTCAAGAATGACTACGAGGCTGCAAAGGAGAG -AATCTTGGTCGCTGAGACCAATGTCGACGATCTTAAGCAGCAGCTAGAGACGGCACTGGG -GGCCGAAGAAATGATTGAAGAACTAGCAGACAAGAACATGCGCTACCAGGACGAGATTAA -TGAGCTGAAAGCTGCCATCGAAGATTTGGAATCTCTCAAGGAGATTAGTGATGAGATGGA -GTACACCCATATTGAAACTGAGAAGCAACTCCAAGAAGAACTTGAGTACCGAGAAAGTGT -ATTCAATGACCAATGCCGAAAAATTACACAACAGGATGAAGTGATCGAAGACCTTGAATA -CACATTAACTCGTTTCAGAGATTTAGTGACAACCTTGCAGTGTGACTTGGACGACATGCG -CACAACCAAGCAAGTGTCAGAGGCGGAGACCACCGAAAATGCAATGCGCCACCGCCAGAT -GCAGGATTTGAATATGAAACTACAGGCCAATCAATCGAAGGCCCTGACTAAATCCATCGA -TGTCGAAATGAACCGTATGGAGTCTGAGGAAAATGCACAGCACCTTTCAATGGTCAAGCT -CTATCTTCCAGAGTATTTTGAAGGCGAAAGGAATTCCATTCAGGCCTTGCTTCGATTCAA -GCGTGTCGGATTTAAAGCCAGGGTTATGAGCAGCACTCTACAAGACAAGGGATCTGAGCA -CTCATTAGTCTCCAACGAAGAACTTTTTCAAGCCCATGTTGTTTTGGAATGTCTTATGTG -GATCTCGAATGTGTGTGATCGGTTCGTGAACTACATCACAACTTGCTCGCCCGAACAATT -TGCTAGCATCAAGGTTGCATTGTTCGAAATGGAGCCGGTTGAGCGCATGTTGAACTTTTG -GATCGAGCCTATGAAGAAAGATGAAGTGAATCTTGCCAAGCTTGCAGTGGAATTGCAGCG -GTCTATTGCTCTCCTCGCACACCTTGCTGAGACCCTCCTTCCTTCCAGCTTAGAGATGTT -TGCCGATGAGCTGTGCATGCGGGCTCATTTATCTCAATTGTACATTGAGCATTCGGCCGG -TGCAATTTCACGCGTTAAGCTCATTATCGGCTCCAAAATGGCTGCTGCGGCAGAAGGCGA -CGAAGAAAATATGCTTTCCCTGGATAAATTGGACGCTTTTCCCCCCCAGGCCCGCGGATA -TAAGGTCGCAATGGGTAAGATCAGCCGGTCCCTTGATGATCTGCGTTCAAGATCACTTGC -ACTTCCGAAGGAGGCCGACGAACCTTTCAAGAAAATTGAAAATGAGACTAGGCAAATTTC -AGAGCTGGCTCGAAAACTTGGCGAGAACCTTGTTGTTCTCACGAATGATGAAGGTCGTAC -GGAACCCTTTAGCCCGGAGGAGATTCTAGACTGCATGTCTCAGGCTGCTATGTCTTTTGC -CTCATCTTCCGAAACCCCGGATGAAAGCAACGATCCTATTTCATTGCTCTTTGCCAGATT -ACGGGAAATAGGCGACCAGTTTGAGGAGCTGGATTCCATCTCGTCAGATTTGTCTCGCAC -AACAGAGTTTGAGAGAGCTGCATTCCCCTGGGTTACCCGTGCCGCGGAATTGAAATCCAA -CAAAATGACATCCCCTGATGCAGACGAGGAGATCCGTCAATTAAGAAATGAGATTCACGA -GGCCTCAGCCGCACTTGGAGTCAAGGACAATACGCTTGAAGAGCAGGGTCTGAAGATTGA -GCACCTTGAATCTAGGATGCGAGAGGCAAGCAAAAAGGCTGCCATGGTCAAAGATCTCGA -AGCCAAGATCGAGGAAATTCAGACAGCAGGAAATGAGCTCGAAAAAATTGTCGAGCAACA -AAAGAAGGAGCTTCAGGTAGCTGAAGCAGAGCGAGATGAGTTCATGGCTCGTCTGGAGCG -AATGAAGCGCATGTCTGGAACAGCAGGTCTAACAGCCACGGGTAATGGCGCTGTCATTGC -CAGTGAGGCCTCTTTAGCTGCAATGGACGAGAACGAGTCTCTCCGCGCTGAGGTGGAAAG -CCTCCAAGCAGCCGTGCGTTTCCTACGCGAGGAAAACCGCCGTTCTAACATTCTTGACCC -ATACTCCGTCCAGCGATCCACTGAAATGCACGCCTGGCTTGATGCCCCACTCACGCGTGT -CAAACCAACACCTGAACAAGAGAAGGTCCAACGCACCGCTCTCGAAAGTCGGGACGTCAT -GACTCACCTCCTCAAACTGACCAAAGAGTCCCGTGTCATCGATCTCAAGTCCACGATGGC -TGCTCCTGCTAATGACGATAATGACTACACCGGCCGCACTGCCTGGCGCCCGTCCAAGAC -CCGCCTCCGTTACCAGGTACTCCAGCAACGCGAAAACTTCGAGCACTGGGCCGAATGGCG -CGATGAAATTGTGAATCACGAACGTGAACAGGACCGTCTGGCCGCCGCCAAGCAGGAACG -TGCACTGCGTGATCGCGTCTCCCGACATGCCCACAAGGCTTCCGTCGAATTTCCTCAGGG -CTTAGGTCATGGCATGATGGGTCGTGCCTGGCAGATTCTAGGCATGCAAAAACACCGTAA -AACCGGCTCCACAAGTACACCTGCTCCGGACGGAGTAGAGGTAATGACCGCCGACTGAAG -TTTTGATTTCGGTTTCTGAAAGCCAGGACATAGATATTTTGTGGTTGTGCATGTATTCAT -CCTTTTTTTAACATGTACTCTATTTGTCCTTCCGGAAGCTACATTGTCCTTTTTGAGATA -CCCTGCAGAAATAGTTAGCGTCAGATTTTGTGTACAATCAACTGAGGTATATTCCAGACG -AGTCAAATATAATGGCCATATACCATGTGCATCACTCTGGACCTTGCATTTTGCGTTTCT -GTTGCATCAATGTTCTCCTCACCACTAGGATGTGTCTTAAGAGTTGTATTCTTGTACCTA -TTAAAGATATCATATAGTCATATCGACACTATATTCATTATTACTTCCTAGGTAAGCTAT -GTGCCTTGATCAATCTCCTCAAATAGCAAAGACTATCCAACCGGAATAGAACTAAGGGGT -ATTCAATAAAAGTCCAGAAGCAAACACCCAGCAAACATCCAATCATTCCTAAAAAAAAAC -AAAAGACACAGATTTCTAACCGAAAGTGGGAAGGAAGGAGACAAGTAACCCATTAACACA -ATCCTTTGATGGCCCAAAGTATCCTCTCCAGGGAAGCTTCGTATAATTTAAAACGCCAAT -CCAATACCGGACTCCTGGTACCGCATATGTCCATTGTCGTGATAGGAAGAGAACTTATGG -TGCTGCTGCTGGCCTTCAAAGATGGTCGGGTAGCAGACAGCAGAGTCAACGTCCTTTTCA -ACAGCAGACAAGGGGACCTGGAATGAGGTGCGGGGGGTGATAGCGCTAGAGAAGGCACTA -GAAATGGAGCTATTGTTCCGTACTCTTGCGCCGCCGGTGATGGTCGTGATAGTCGATGCG -ACTGTGGTACCACTCAATGTGCGAGTTCGTTCCCAATTGAACCCGTTGTCGCCGGGAAGA -TACGCTTGTTTCGGGGGCGTCGAGGGCTTGGTGGGTGTAGGGAACATCCATTGATTTACA -CGATTGGAGATAGAAGAGCGGACTGAATCGATTGTGTCTCGGTCATCGGGCAGCGGAATA -GAGGGGCAGCGTAGAGATGAGATTTGTGATCGCATCGTGCGTGAGCTGGCTGACGAAGGA -CTGTCCTTGGATATCGCCGAGTAGTTGGATGGGGATGTATTAGGCGATTCTGTATACAAA -CTCTTGGATGCAGGTGGAGTTCGAGGTGCTGATTCCCAGGGCTGGCTTGTATTTGCGTCC -TTTGGCCATGGGGTGCTCAGCGTAAGCTGGTTCCCTTCCGACTTTGACCTACCATGGACT -GGAGATGTTGCCTGAGTTTCTGAAAGAGGGGTCTCGGTGGAAGTGACCGTGGAGAAATGC -GGGACCAGAGTACTAGCGATGCGAACAGGTTCTTTTACAAGCGATGGTTCCAAGTTTCCA -AGTGATCTTCGTTTCTGGGACCCGACTGGCTTGCGAGAGACTACAAATACCTTTTGGCAA -TGTCGATCTCGAATGCCGGTTGAAGTCGTTAGGGCACGAGGAGGCACTGGTAGCGTCTTG -CGCTGGGTACTCTCGTTGAGCTGGAAGTCCTCAAATGAAGGTAGGTCTGATAATCGATTG -CCTGATCGATGAATGCGGAGATCCAATGGTTGATAAGTTGGTGGTGGGGGAGAGTAAAAT -GACTGAGACGGGTGATCGTGGAAGGATTGCACTCTGCGAAAGTCTGAAGGTGCGCCAATC -TTCAATGGGGTGGTTGAAGATCGATTTAAAGTCATGCTTGCCCGGTTAGAAGCTCGTGAG -GCCAAGTTGCGACTGTCGTTGAACCATGATGACAGTGAGTTCCGATGTTGATGGAAACTT -GGACTGGGGAGGTCATCGTATACCACCAGCTTCATGGGTGGGACAAGGTGTGGTTGAGAG -TGGCATGCTTTCATTTCCCGTTCAATGCCTGCGATGTATATTAGTCAATCATCTCTGTAG -TTTTGATGTAGTTGGACACATACCGTTCCCGGCTTGTCTAGGTTGACAGCCATCCTCACC -CGGTAGACAAGTCAAGCACGATCGTATCCAGCCAGTGAATAACGACCCCGCCTTCTCCAA -AGCCAGCATGATGGCGACCAGCGCGAGAAATCAATCCATCACCTTGTAGGGAATGgaaag -aaagaacgaaagaaaggaaagGGACAAACCCGTTGCACAGGCAAATCAGCCCCTCAGACG -GGATGGAGACATCCTATAAGCCAGGTCAAACATGAGCGATTGCCCAATTTGTCTGGGCAC -ACATCCTGATGGGCACTAATGAACAGGGGCTAGGCCCAGTAAAGAAATACATCCTGACGC -CAGCGCATTTAGCAGCATTGATCATCGGGTACGGAGCTGATTAGAGCATTGCTATTTGTT -GGCGCCCTGTGGAGTGGTCACATCTGTGCATATTCTTTCAAGCTATACCCTGTCAAGACT -GTCAAGTACTTTTTTTTCTCGTGATGAGAAAGTTGGGCGCTTACAGAGATTGAAAGGGGA -TTCCTATTTATAATTTATTGTTTCATGTATTTTGTATTCTTCTGTTTCGCGTAGAAGGAT -ATATACAGGAGTCATAGACAAAGAAAGGACAAATGAAGAAATGAGTAAATAAGGTAGATT -ATCATGGTCCAGAATTAATCGAAAGACAAAGTACACTACAGTACGTGCTACTTGCCCCAT -ACATTGTCCTGCTCTAAGCCATCGTAGGCCCATTTTGGCCGATCTTCGTCATGATTAGAC -TCAGGCTGTGTTGATTCGGCTTCAGCCTCCTCTTCGATTGGCTCTCCGGAGGGTCTTCTG -GAAATTGGAGCCAGGATTAATAAATCTGCTCTAATATAACTAGCGCTGGTACCAGCCAAT -TATTGGAGAACTTACGCGAATCGTCGGGGTTCAGGTTCTGCCCAGGCAGACCCGTATTCT -CTCTGTATTTGAGCTCGATCTTCTTCTTCAGCCTCTGTGTTCTCCACAACAAGGTCAACC -AGCTGGCTGCGCTGCGTACCACTGGAGCTAGTGGAGCTGACATTTCGCTGCAAGGGACCG -GGAGTCGCCGAGCTCGGGCCAGGCTCATTGGATTTCGGCTTGGGACTTTGCCGGCTCTCA -CTATCTGACGTGCGACTCTTCCTGTGCCTACGAGCCTTTTTGACATCTGCCTCTCGGTTT -GAATCTTGATCTCTTCGCGAGGGTAGCAGCCGTTGGTCCCTTGGCACCTCGTTGGCGGTA -ATCACCGGGTACTATGCATGGTCAGCATCACAATCACAATGAACACATTGCTCCGACATC -AACTTACATTCCAATCTCCAAATTCCAGTGCGCGAGCATTGAGGAATAGTCGCTCATCCT -CGTCGTCCATTGCGGTTTCATCGATTTCACTATAAGATCGGAATCGCTCTCTCGAGGTTT -GACTGGGCCGTTTACTTCTGGGGCCGCGCTCACTGGGTTCGCCACTCAGTAGGGGCGTGC -GGCTGTTGGCTTCACCCGGTCTGGGAATCCAGTATTTTGCCTTACCCCCACGCTCATACC -TCATGCCGTCCATTACTCGCTTGACCCGCGACTCGGACTCCTCATGCGGGATGATATTAT -CACCGGAGTCAAAGAGACGGTACCCGTAATTCTTACCGCGAATAGTGAGCTTGGTGTCTT -CAATCAAATCTCGCACGCCAAACGCGTCGCGCAGCGCGTATTTTACGGGCATGCGTGCAG -CTGAGATGGTCGGGTCTGCATAGTCGTGCCACGAAAATGCATACCAGTGAGCCATAGCGA -AGAATGGCATCTCAAAACAGATAAGACTGTCCTGAATGGCCGCCGCGAGGTTGTCAGGGG -TGTACCCAGCAACATTGCCAAGCGCTCCCAGCCACTGCAGAATTGACAGGAAGAAGCCTT -GCCAGTATGAGGCAAAGATAATGAGTTTGATGCAGAGAAATTTTGGGATTGGGCGGAATG -GCGTGAGATCGTCATGTAGACACACCCAGAACATGGCCAAGGAGTAGAGACTGATCGTGA -CGCTAACATTATATATGATCCCTGTCCAAAGGTATCCAGAGCTCAGCCCAATATAACCCT -CCTGGTATGTGTCCGTTGCCTTCATTATGATGGAGGCGAGAGCTAAAATGGGCTTCAACC -AAGCATACTGCAGGATACCACGTTTGACAGCGAGGAATGTATGGGGATCGGAGACGTCAA -TCTTTGGTAGGAAGTGATTAAGAGGCCAGGCATGGGAGATAGGAGGGCGGCCGTGGGCCA -TGATAATCACGGCCCGCTCGCCGCCGAGGAAATTGATGAGGAGCTGGAAGAAGGTATAGA -TGGTGAATGCCTACCAATGCCATCCGTTAATACGGGCCTATCCTACAATCAGCTAGGGCC -CTGCTTACCTCATATATGTCACGGACAGGATCCAGAAATTGAGCTGCCGTCAGCGACACG -ATGCTCGTCCATGATGATATTGCGTAAATCGGGACCCTAAGTTCAATCAGCACTGCTCTT -TGTTCCCACCCCCTGACCTAGTATCGGAGCCAATACTCACATTAACAATATGCGCACCAC -GTAGCGTTGAAGCAAGGGTTTCCGGTAGTTTTTGCTGTATAAATGGTGAGCTGTATTTCG -GGACTTGGGATGGTATAGGCTCGTACGTTTGAAACCATATGGACCTGCGGTTCGAGACAA -CATGTCAGCTCTGGTGTCTACAACGATCGGACTCATGAATTAAGGATACGCAGGATATTA -GACTCACACGAAGGACAACAGCGAGGAAACCAAAGCCGAGACACCTGTGACAATGATGAT -TGCGCGTGCCAGGCTGGAACCTGTTCCTCCGCCAGCTGCGGTCATGATTGCTGAGATCTA -GGGACAAGTACGGTTTAGGGACCGTTTTTTGCCTGCGGAGATAATCCAGCTCATTCCGCA -GAATTGATAGTGACGTAAAACATGTTGGTCAAGACCGAAATGGGGAGGAAACCAAGCTGG -GGAAGGTTGGTTTGGAATGAGGAGGCGTGTTGTCCGATGACGATCTTCTCGTGACACCGC -TCTCGGACTGCCTAAACAACGTCTTCACCTTGCGACTTCTAAGATATTTGACCCAGCCAC -TTTGGTGTCGATGATTGAGAGGTCAAACTTGGCTCGTCATTCGCGTTCTATCGCGCTGGC -GGTGTTAATGTTGGCTGGCGGAGAGAGATGGCTAGTACAAGCTTGACATCGGCTTTCCTG -ACGCCAGATCCAAGCTTCAATTGGTATGGAAACGCTGCTCGGGCTAAGGCTGTTGAACTG -ACCATTCTAGGTTCTTTCTCTTGGAACTGATTGTCTCTTGCATTCTGTAAGTTTGCGCTG -CTCAACACGGCCGGATTACGCTGATCACAGGCATAGCGCTCTGTTCTTTTTGCTCTACTT -TAATCGACTCTTCGCGAACCTCATCTCCTATGCGATCCGAGCATACACCTGGCATTACTA -CCGCGCATACGTTGATATCAACGCACTGCAAATCTCCTTACTTGGAGGGAGAATCTTCTT -CAAAGGCATTCGGTACCATGGCGTGAACGAGACGATCTTTATCCATGGAGGATTCATAAC -ATGGCGCTATTGGACGCGAGCAGTCGAGCGAAATGACTTGACAGGGATCCGACAGAAGGC -AAGCCCTTCTCATAATACCTCAAGTCAAGGACGTGTGAGGTCCCCGCGCGGTGGAACGGA -TGACAATTTGGGGGAGCAAGGTGGCATGGGAAACACAACCAAATTGCCGTGCCGGATCAC -TATCAACACATATGGCCTTGAATGGTTCATCTACAACCGGACTCCCGCTTATGATAGCCT -CCTTGCTGGCTTTGGATACTCCCCCAAGGATGTTGATCTAGACGATGTCAATTCCAAACC -CCCAGGGTCTCGCGCGACGGCAGAAAATGGCGACGTTAACAACGCCTTTAGTTTTGAGGA -TACGTCTAGCCATCAGCGAACCATGAAACCCATGTCAGAGCGCAGCATGAGTGAGAGGGG -CGGGGCTGCGTCACAGAGGACTAAGATTGGTGAATCAGAACTATCGGACCCTGTGTCCAA -TATGTTACAACTCTTGCCTGTGAAGCTGGATTGCCAGAAAGGAGCTATTGTCATGGGGAA -CGAGCACACTCGATCTGTGTTGACAACGACATTCGACACTGGAACAGGGACCGTCGATGC -TTCTAATTCAGGCCCTCTTGATTTATATCGTCAAGTGTTTTCATTTCAACTCAAAAATCC -AGTGGTCCAAATGCGACCAAACCCGGACTTCAAACAGAACCAGTTGGCGACTGCTAAAGA -CTTGGGTGCCGTGCAAGGGAAAGAGCCCACATCGAAGAAAAAACCTCAACATATGTTCAA -CTATCAATTCCAGAGGCGCCGAGTATGGCATAGCATTCGAGACTTAGTCCCGTATTTCCA -AACATCTGTGGAATCATTCCATAACGATGCCAAACACGCCAATAGTATGCCAAGAAGCCA -GGCGGAGTTTCCTGAGGTCCGTTGGACCGGTCTCTCCCGATATCTTGACGAAGGGAGCGA -GGATGACCATGAGAAATGGAACTCAGTTGAATATGGCAGGTTTTCTACCATCGTGGATAG -CCCTAGCTTGAATATTACCTATTTCTGGGACATCCCTGGACGAGTCATGATTCAGCCAAC -GTCAACTGTTCGGCCTCAAACAGCGGATAACAATATCAACTACGCTGCTCCTCCAGAATG -GGGGATTGACATTAAGATTGACGGGGGTACGATAAATTACGGACCATGGGCTGATAGGGA -GCGGCTTGGGCTGCAGAATGTCTTTTTTCCAAACTTCTATCGCAGCGCTGAGCCAACAGA -GCAACTAGTCCTTGGTGTTCTGCGACAAAGCACAGCATTTAGATTGCGCGTGGAAATCAA -CGAGGCATTGACATTGCGCATCCCAACACGAGAGCCTTCCAAGGATTGGCAGTGGAAGGG -CCGTGCTGATGCCGTTGGGGGTGCATCGAGGGCAAAGAAGCCAAGTGACCAAAAGAAAGC -TCGAGCCAAAGAAGGTGAAAAGGGCTATATTGGCCCGGAAATTCGCCCGTTTGGGTGGCT -TTCCCTTTGCGTTGCCGCCGACTCTACCATCAATTATACCATGGACATGGTGGGGTCGAG -CGCTGGGTTTCGAAATGAACTCTCTATCGACTTGCGAGAGTCGAGATTATCATCCAGCAT -CAATCATGGATTGCTCTGGGAATGTCCTCGGCAACAAGTCACTTGCGATCTGTCCAACCC -ACTCTCCTGGAATAGTCTTCGCTCCTGGAAATTTACTGTTGAAAGCCAAGACCTGCAGCT -TTTCCTTCTACGCGACCACATATTCCTTGTTACAGACCTTGTAAGTGACTGGGCCTCGGG -ACCGCCCTCCGACTACTACGTATTTGTGCCTTTTATCTACAACCTCGATCTCAATTTTTC -CAACTTCCAACTTTACATCAATGTCAATGATCGAAACATCATCAACAATCCTTCGGATCT -AGATGACAACCGATTTATTGCCATCAAAGGCAAGCGCCTGACGTCAAATATCATGATACC -ATTGAACAAATACCAACCTGAACAAAATGCCATCGAGTTCAGGGTCAACCTTGAAGATGG -CGGCGTGGACTACACAACCCCTCTTTGGGATACCCTGCACGAGTTCTTGCCGCAAAAGTC -CATGGCTACTCTCGAGAATTTGTTCATTGATGGCTCTTATAATTATTTCCAGTCAACGTC -CCCGGAATTGACCGACACGCTGATGCTCAACATCGATGGCCATTCACCTCGTCTATACCT -TTTTGGATTCTTGATCAAGAGTTTCATGACTGTCAGAGAGAACTATTTTGGTGAGGAGCT -GCACTTTAAGACACTCGAAGAATACCAAGAGCTGGTCTATGCCGATGAGCCGCCATCAAA -CCCCACAGGGGTCAACCCAAACCGAAAGTCGAATGACATGGATGTGCTTGTGCATGTCAC -AGTTGACGGCCCCCGTGCTTTACTTCCTGTGAATATTTATGACAACTCAAAGTGTATGGG -AATCAGCGCTGCCTCACTCGAAGCGGATTTACGCTTCACTAATTATTATATGGACTTGCA -GTTTTCCGTCACCCCGGTGAAAATTGATTTGGAGGCCACACAGCTGGATGGCCCTTCCAC -TATCTCTAGCACTCAGTTGTTCATTGATGGAATCTCGGTTTACGGGCATCGTCTGTTTGG -CTTGCCGCCACTTGAACCAACCTACGTTTGTAATTGGGATTTTGACGTCGGTAGAATTGT -CGGAGAATGCTCCACCGAGTTTTTGGCCTGCTTGACTTCATCTTTGAAGAGCTTTGATTT -CTCGTTCGACAATGAGGAAAATGCTTTGCCAACGCTGTTCCCTGAGCTTCTGTTTGACGT -CACTTTCTTGAGAGCCAAGATTGATTCTATCCATGTTTCCGTGCTACTGGACCGAACGGC -TGTGATTTTGAGCACACAGCCCCTGAACGTGAACTTCAATGATTGGGCAAATACCAAGTT -TTCGAAACGCATGAGCCTTCTCGTCCCGGATATTTCCATTGCTGCCGTTGATCGTCAATC -TGTCTATCAGTACAATCCATCGGCGGGAGAAATGATCTCCCCCATAGGACTTTTCCAGTT -CACAATTGGGCTGAAAATGGCTTTGCGTAAATCTGATATTGCCGAAAGTCGACGACTTCA -GCAAGAACATATCAAAATCCATGATCAGCGGACTCACAGGGCACAGTGGTTGCTCTTTGA -CTGGGAGGAGACTGGGCCAGCTTCAACGCTTCCGCCTGATGAAGATGTACTCCCTCCAAC -CATGGCTATACCTTCAATGCCTGAGCCAATTCATGACAGGCATGGCTCAATTCATGCACC -GTCATATCACAGAGCTTCAGGGCCCCGTTCTAGTGGAAGCGCCAAGAGTTTCCTTGTCCA -GTCAGAAACATCAAGTATGAACAGTGTCCGGATACACACTACCAAAAATGTCAGAGCAGC -CTCAGATCCTCTTCCCAAATCACGGCCTTTTCTGCCTACCAGAGGATCGACCGCGTCCGT -CGCAAAACCCGACCGGTCGAGAGATGGTCCAAACTCCGGGTCTCGGGCAGGCGCATTCTC -TTCTTCTCCGACAAGAGACGCAAACCCATGGATAATGCCACTTTTCTCATATTACAAGCT -TCATTTGGATACATCTGAGCTTCCGTTGCCATTCACGGATGAAGGTAACACGATGAACGA -AGATTTTGCAATGAATCAGAAGTCCATGTTTCTAACATTTGATGATGATCAAACGAAATA -CACAAATCTTGCGTGCGATATCCCGTTGGGTGTTCGAGGATTTTGTACATCCAAATTCCT -TCTGACTCTAGCTGCCTTACTGGATGGATTGGAACCCAAAAATCCTACACGAATCATCGA -CTCGCTTCAAAAGGATGTGATATCGAGCATTGTTGGCTACGATAATGCCATGAGCCAACC -GAGAATTTCAACAGCTGTTGCTCTTCGTGTTCCATCTATTCAATTCCGACTAGTTGACCT -GTCTGAGGCCCCTAACAACAAACATATAGAGTTCCGCGATGAATATAGCATCGAGATCCG -TCGTCTGAACACTGAGTTCCGAAAGAAGGTCCAACGCCGCAAAGGCGATCTGCTCGAGGG -TCTCAAACAAGGGGTGACGATTCATGCGGCAGCGGACCATGTCTCAATTTTTGTCGAGGG -CAGTGGAGCAGATTCTTTCCACGAGAAAGCTGTGTTCAATTCTCACTTGGAAGATATGAA -CTTTTGGCTTGTGACCTCGCCAAATGTTCGCTCTAATCTTCAAATGCGAACATTCAATAC -AATGACATCAGCAAAGTCGGTGGAGCGTCTCGCATTTCTTGTTCGTCGAGCCACGACCAT -GTTTGATTCTGTCGCATCGTCATTTCAGCAGCTTTCAACATCCAGCGAGAAGCGCGTACA -ATACCTCTTGTACTCTCTCACTCAATCGGCTACTGATATACCTGATCCAATCTTCCTTGC -TCGCATATCTTATGTTCTCAGGGTTGCTTCGACTCACCTGCGGCAGCATGACTCGTGGAA -AATCATATCTCGCATTCGCAAGATTTACAAAGGCCTACCTTCTCATCAGAAGCGTGAGTT -GGAGCAGAAATGCTTGTGTGACAGTTTCCCTCTGCCGGCGAATGCCAAAAAGGCCGTATT -ATCTGGCTTTGACCACTGGCGAGCTTGGGACCTGGCACATGTTGCCAAAAGCTATGCCAT -GCGCCGTGTTTGGCCACACGCCGCTGAACGGCAGCAGACCTCACGGCCTTCAATGTTCAT -TTCCTCCACCATCCACACATTTCGATTTTCCATGGATCCTGGTCCGAGAGAAAGCGATTT -AATCATAGGGAATTTGTCTACTGTTGCATCATTTGCTTCAGACTTGCCTGAAGGTGAAGG -GGTGCTGGATAAGCAACTCATTACCCTACAGTCCTATTGTGGATCAACAGCTCTGAGGCT -GCGCTGGGAAATTTTGGACCTTGTTGAAGGAGTGGTCAAGGTTATGTCGAATATCACACT -TGAATCCTCGCCGGGCCATGTGCCAAGTGACATGGTCCAAGATAAGACCCCGACCGAGCT -ACAGTTCATTTTTGGCACGGATTTCGGTTCCATCACTCTTGACGGTATTAATGTTAAGCT -TGCCATGATAGCGAAAGCTCTCCGAGGATCCATTGTGCATAAGTCGCTTGGTGGCAAAAA -ATTAGAACACAAAGACCTCGCTGTATTGTTTAGCGCTGAAGGGTGCTCATCTGAGCTTCA -AAGTCAATCCATGACTCTGATGCTGTCGCGAATCGCTGACCCATACATCTACCTTTCGCT -TACATCAGATGAAGATGCCAACGAATGCAGACATGATTGGAAGCTCGCTGGGTCTTGCAG -GAAGCTTCGTTATGACATGAAAGAGGATCCTGTCAGCTTGGCACACACAGCAGACAGACT -TATTGCGGACGAAGTTAATTATATCCAACAACTTGTGGATAGTGTCAAAGTACCAAAGCC -GAAATCCGACCAGAGCCTCACATCAAATAAACCTACTCGTGACACCTTCCATGCGGCAAT -GTTCCTAGAAGACTATCGGTTTACCTTCAGCCTTCTGCCATCTTTGACCTACCTTGTGTC -GGGTGAGGTAGCTCGCATGTCAGTGATGCCTGCAGAAGCATCCAAAATCGAGGTTGACTT -TGATGTGAAGAAGAACTCGCATATGTTCGTATCGGGTGAAGGCGACAGATTCAATGTTTT -GTCAATGCTGGAGATACCTCCCGTAAATGGGCGAATCCTTGCCAATTTCTTGTCAGAACG -CAAGGAAGTGGAGGTTGATGTCACGATTGAACTCATTCGCCTGGAAACCAGTGCTATGCG -CAGTCTCCTGGCTGTTTTGACAGGTCCTGATGTTTCCCATTTGATTTGTGACCTCAAGCA -GAATTTAGATGTTCTCCAGGCACACTTGAAAGACGTGCTGTCGATACAGAGGGAGCCCCC -AAAGCCCAAAGCAGCATCTGGTACCGAGGAGATTCTCTACAAATCGCGCCTTACAATGGC -TGGACTTGAAGTTCATGCCATCGCGCCTGGCCTTAATAGCAAAGATTACTCTGCCGAGAT -GATATTCAGTCTTGGAATGATGCGTATGCGACTTCAAAACGGACTAGACCGAGGATACCC -CATGGAGCATCCCGAGTTTAACATCGATGCGTCTCAAATCAGATTTGACCTGCTAAGACG -AACAAAGGCTGATACCCAATTATATGGTGGCTTCTCTGCTGGAGTAAAACTTCAGGGAAC -CTCTGCGGTCCGCGAGAATGGGGAGATTACAAGAGCATACCATTTCACCAGCGACAGGTT -CGATGTCGAACTATTCGCTGAGACAGCAGCCCTCGTGGTCGACATTGCCGTATACACACA -AGAGCGCATCAAGACTTTGGACCTTTCTCATGAAGTCAAGCGCCTTAGGAAATTAAGACA -TGGTGGACACATTGATACAAATGACGGGGCCCCCGCTGCCCCCGAAATCCATGTCAACGA -TGAGTCCACCCCGGAGACATTCCTCAATGCTCTTTACTCTTTACAATTTCGGACTATTAA -AGTCGCTTGGAATATGGCCGCGATGCACAACAAGCCTGGTCGACAGCCCGAGGACTTAGT -CTTTTCAATCCAGCAACTCGAGTTGTCCAACAAGAAGAAGAATGCAGCCAAACTTCGAAT -TGAAAACATGCAACTTCAGATGGTACCATTCGGAACGGACAGAGGAAAGCGCTCGCTCAA -TTCAGCCCTTATGCCAGAGTTGGTCTTCAACGTGGCGTACTCCTCTATAGGGAAAGAATT -GTCGCTTGTTTTTCAAGCAGCTGGCAAATCTCTCGACATTCGCGCTACATCAGAGTTTAT -CATCCCGGCTTGTATGATACAAGACTCAATTGCCACCGCAAGCGAGGCGCTTCGTGAAGG -CAAGGCTGTTTGGGCAACCAGAGCAGACTCTCCCGAAAACACCAATATTAACAAAGATCG -CAACCTCTTCGGCAACAGACGATTGCGGTCTCTCCTCGTCGATGTTGACTTTGCAGGTGC -TACTGTCACGCTTCAAGGAAAGCTTGGCCATGATCACCAGACATTGTTAGCGGCCACATG -GAAGGGTAGCCGACTCTCCAACGCAAAGTATGGCCAATATGTACAAGGCGATGCAGCGAC -CACAGCAACCCTCCGAGCACCGGGAGTGGCGTTCAAAGTTCAATTCGAGGACAATGGTAC -CGATGATCCTGCACTGAATGCGGAGCTGAAGGTTGACCCCTCGACGAACACATTGTATCC -TACCCTTGTTCCTCTCGTCAAGCAAATGACCGCCACGGTAAAGGAAATTATGGGCAACCA -ACAGGGCCAGTCTCGAAGGCCATCTACAGCAGCGAAGCTACAATCACAGAAATTGATGCA -GGAGAAGCCATTTGGCGCCGCCGATCCGACCAGCATCCTTGGTCGATGCAAGGTAAATCT -GGGACTGCTGTTCTGCAAGCAGGAGTTCAGCTTGAGCTGCCAGCCTATTGCAAGGGTCGC -AGCGACTGCAAGGTTTGAGAGTGTCTACGTGACTGTCAACACGGTCTTATCAGAAGAGCA -TGGTCGGTTCCTTGCCCTCTCTTTAGCATTCAATAGCTTGGAAGCATCTGTAAAGCATGT -GTACTCCAATGAATCGACCGCAAGCTTCGAGGTCAAGTCTATGGTTTTGTCTTTGATGAA -CAGCAAACACCTGGGCCGAATGAACGGAATGTCAGCTATTTTGCATGTCAGTCCGATGAA -GGTCGCAGTCAATGCCAAACAAGTTCAAGACTCTTTGCTCTTCAAGGAGATCTGGCTTCC -ATCCGACAATGAGACTACTTCGAATGAGCCCGACCAGTCAGAACCGTCTGAAGCGCAAAC -CTATATTGTGCAGCGATACCAACAGGTTGCGTCAGCGTCCGCATTCCCCTGGAACACTAC -TATCGCGATTGAGAAGCTCGAAATTCAACTCGATTTGGGCTCGACGCTAGGCAAGGCACA -ATTTGCCATTGTTGATTTGTGGGTATCTACCAGCAAGACATCCGACAATGAGCAGAACAT -GTGTATCAATTTCGGATCGGTTGCGATTGAGAGTAAGGGTCGCATGAGTGGAATTGTCGA -ACTTCGAAAGCTGAAAATCCATACCTCCATCGAATGGCCCGGCTCACCTGGGACGGGACG -AACACCGTTGATTCAAGCCACGATTGCCTTTCAACACCTCCAAGCCAAGGTGTCATTCGA -CTACCAACCTTTTTTGGTTGCACATATCGCCATGTTCAATTTTTTGATGTACAATGTTCG -AAACACGTCCGACGCGCAGAGTCAGCGCCTTTTCAGTATTCTAGAAGGCGACAAAGTCCA -GCTATTCTGCACTTCTCTAACAGCATCACAAACTTTAGCGCTATTCCAGGCGTGGCAACG -CTTGATTCAAGACGTTCAGGCAGCCTACAAAGCATCACTACTCGAGGTAGAACGGTACCT -CCGTCGAAAGTCTTCTATCCTTCCCGAGCGCCTGGAAGTCAGCGCCAAGGATCCAGCAAA -GAAAGACGACGACCAACCTGAAAAAGCACCCATCTCTCTACACACCGGTGTCGTCGTAAA -AATCCACCACGTCAATCTGGGCACATTCCCAAGCTCATTCTTTGACAACCAGATCTTCAA -GGTAGAAGCCCACGACGCCGAGGCCCGATTCGCCGTCTCGCTCGAAGCAAACAAAATCCA -CAGTGCCTTAGGCCTAACACTGGGCCAACTTCGCGTAGCACTATCCAGCATCACGCGACC -CACATCGGCCCAACTTGACGAAATCGCAGTGGACGAGATCGCCGAGCGAGCCGCAGCATC -TCGTGGCGGAACAATTCTCAAAGTGCCCCGACTAGTCGCGAGCATGGAGACCTGGCAGGT -TCCAGGAACCCATCAGATCGACTACCTATTCCGCAGTACGTTCGAGGGCAAAGTCGATGT -CGGCTGGAACTACTCGCGCATCAGCTTTATCCGCGATATGTGGGAGGCGCACTCGCGTGC -GCTCGCGTCGCGACTTGGCAAGCCTTTGCCGCCGTCTGCGGTGCGCATTACCGGTGGACC -GGGAAGTGGCGGTGAAGGGGGTGGTGCTAGCTCCGAGCAGCAAGAGAAAATTACTGCGGT -TGTCAATGTACCCCAGTCCAAGTATACCTATACTGCGCTTGAGCCGCCGGTGATTGAAAC -GCCGCAGCTCCGGGATATGGGTGAGGCTACGCCGCCGTTGGAGTGGATCGGGTTGCAGCG -CGATAAGCTGCCTAATGTGACGCACCAGATTATTATTGTCACGCTGTTGGAGATTGCTAA -GGAGGTGGAGGATGCTTATGGCAAGATTCTGGGGTCGTGATCTTTCTGGAGGTGCTGCTT -TTGACTGCTACTGGTTCATTTGGACAATTCTCATCCTTCGTTACTTTTTCTTCTCTTTAT -TGCCTGATACCATTGTTGTTTTTGTACATTACATCACATTCTTAATTATAGACATACTAT -TCTATCGATTCGATATTGAAGTTCAGGTAGATGTTGACTAAATAAATCAATAAATCGACC -AAAAAGAGGTAAAACTGAGAATGAAACATTTTGATTGGAACACAAAAAGCAAATCCTTCT -ACAAAACCGCGATCTCTATCATCTAAAATCGCAAAACTACCAGCGTTAATGCATCCATCC -TCCTTGTAATCAAAGAGATTCGTCAAAATCGTAGGCCAGGAACCACTTTACAGACCAAAC -TGCACGCGCAGAGCGTCAGCGAAGCGTGAAATAGCCTCATCAATCTTCTCTGATGAAGCA -GCTGCGAATGTCGCACGGAAGAACATCTTTTCTTCGACAGTGTCATGGTCAGGCTTGAAC -CAGCTGCCACGAGAGAGAAGGACACCTTGATCCACAGCGGACAGGAAGATTTCTTCTTCA -ATAGCGGCGCGAGTCTTTCCCGCGGCAACACCGGGGTGCTTGTGCCAGTCGATTTCCATC -CAGTGCTGATTAACAAATTAGTATTTTCCAGTTTGAAAGGCGAACAGGGTGAGACTTACG -AACATGCCAGCCGCGGGGGCTTCCCAGTGAACGATTTCGCGGGGCAGGTGCTTCTCACAG -GCGTGGACAAGCGAGTCACGACGAGCAGTGTACGACATGCGCAGGTTAATCAGCCAATCC -AAATATCCAGAGTGGCCCCAGTGCTCATCGAGCAGTTTGAAGAGCGTGATCTGAGCAATG -CCGCTAGGGTTCTGCGAGGAAACCTCGAAGGTCCGGATGAAGCGCTCGATGATCTGTTCA -GAGGCGACGATCCAGCCAACCCGGGAACCGGGCGAGATGACCTTGGAAAAGGACTCGAGG -CGGACGACACGGCCATCAGTGTCCATGCTCAGGAATGAGGGGACAAGTGACTTGATGAAC -TCCTCGTGGGTGGATGGGGGAGGGACTGGGGAAGCGTCCCCGCTGGTGTAAGGTTGCATC -TGCAGGAAGTAGTATGGTTCGTCCTCGATGATGATGAGATCGTGCTTCTGGGCGACCTTG -TACACTGCATGACGGCGTTCCGCGGACTGGGTTGCACCAGTCGGGTTCTGACCTGTCGGG -ATGGTGTACAGCACGTAAGGCTTGCGTGCACCCCGGGCGGCGACGTCCCAGTTGCTGAGG -ATTTCGTCCATTTTCTCTGGGATGAGGCCTTCCTCGTCCATCGGGATGCCCACAGCCTTT -ATTCCCAGGGGGAAGGCGGTCTCGGCAGCGCTAGCGAAGGTATATTCCTCCATGAGGATG -TAGTCTCCGCGCTCACAGAAGACGCGAAGAGCAGTGTCCCAGGCATAGGTGCTGCCGGAG -GTCAGGGTGCATTGCCAGTCGGAGTAGGGAGGGCTGTGGATGATCTGATCTAGTTAGCTC -TCTGCTGAGAAATGAGAGACACGAAGTTTTATACTGACTTCAGTATGCTCTGTAACGAAA -CGAAGCAGTGGCGCAGCTCCGGTAGCTTGTCCGTAGTTAAGGGCAATCTCCAAGTCTAAG -GAATTGGATCAGCTATGGCCATTTCTAATGCATGAAGATAGTGAAGACTCACCGTAGTTG -CTAGTTCCTTCTTGGATATCATGCTTTCCGGCACGAAGAACCGTGCCTGACTCCCGAGTA -GCCTCTGGAGAGAATCCCGGGGCAGTGGGAACCTTGATATCCAAGTGCTCAAAGGGGAAG -TACTCAGGTGATGGTAGTCCACCACCGAGGGAAATAAGACCTTTGAGATACAATTAGACT -ATCAATTGAAAAAGAAAGTGTTCTGAAATATTACCTGGGGTTTTCAGGTAACGAGCAGCA -GTCTTCAAAGTGGACGCCTAAGGAGGGCGAATTAGCAAGAGTGGACTTCGCATTGCGCAA -TTGAGTGCGAACCTTGCGAGACTTGGCTTCGAGTGAAAAATGGTCTAGAGTCGAATGGAA -GTAAGTATAAGTGGCATAATTGTGATAAAGCCGGTATAGATTCGAAATGGGTCAAGGTAG -ACATACGCTCAAACCGCTTAGCCTTAGGCTTAGTATAGCAGATCTAATGGTCCCAAACAC -ATATTAGCATTCCGCGTCTTGCAATTGAGTTCTTTCACTCATATTCTCGCCACGTCTATG -CCGAATATGAAATGATCATACCGGGCTCTTAAACATATCACTGTTGGATGCCGCGGCCAC -GCCGGTGGGGATCTTGGCCATCTTCTCTCTCCATGCTGGCACGCCATTTACGGTCAATGG -ATCCGGGACAGGGACAGCCGAGGTGTCGGTGACGCCCACCAGGTTGACGTCTAGGGGTGT -CGGTGGACTCATTGTGGTTTGGCGGTGTAAATGTGGAATGCGGAATCGGAGGGGTATGCG -GGGAGTTGTGCGGAAGGGTATATATAGCGGATTGGTAGGATTGGCCTGGTATGTCTATGA -GCTGAGCAAAGTCAGGCTATATATAAAGAGAAACCGAAGGTGCTACAAAGGTCGGCGAAA -GCGGAGCGGAAGTTGATCTCTCTCCATCCCGAGAGCTTTCTCCGAAGCACATGACTCATT -TTATAAGATAAGAGATATGATCTTTCGAAGGTTTCCATACGAACTTTGGGGTCAATAGGT -ACGATTTGTGTGAATTGTATGCACTGACATGTCAAAGGGCGCGGGATATTGCTTTTTAAT -TGGATTAGTTTTAGGTATCTTTTGGGTCTTGCATACAAAATATACATATGCTGTGCGTTT -TCTTTGTCACAACATCTATGCGCAATATAATACCTTTTTGGCCTTTTCACTTACCAAATG -TTACTTTTTGAATTGTCTTTTTCTCATCTTCGCCCTTACTAGTGTAGCTAATAGTAGTAA -CTAGGTAGACTAGGTATAGAGGGTGAAAAAGTTGAAACATACAGATAGATTGATCCACGT -TCAATATATATTATAGCAATGGGAGCGCGGTTCAACATAAATCatatctatatttatatt -ttttacattatttatatCCAGTGGAATGGAGTGGTGACGTTTCCTGCGCAACACCCATGT -CTGGTCTCCCGGACATGAAACACCTGATATCAACCATCTTTCCTCGATATCCCTCCCATG -CTGAATGGAAGCACAACGATCTATTGACCTACACTGAGTGTTATTGAAGGACAGTCTCTA -AAAATGTCGTTCACTCTCTAGGACAAGGGAATCTGTATCTTTGTATGGACCGTACAAAAG -CTACAGCAGTACTGCACAAAATCTAGGTTTCGGAAGGTATGGAGCTACAGACCCAGGTCA -AGGTCAGCTTCCAGATTGCGTCTCTTCAACCCAAAGAACCGAGTACTTTTCGGCCTTCTC -CTATTTCTAAGTTCTCTACAACCTCTTATACTCTCCGAGTTTCATACTCTCACGAGCTCC -CCACATATTTGCCAGTAGTCCCGTCAAGCTGAGCTTCCAGCACTGCGGCCTTGCCATCCA -GAACACTTTGAATAGCTTCTGGAAGTTTCTGAGCGAGCTCAGCAACAGTCGAGGCACGGC -CAGCCCATAACTCCCCACCTGCCGCGGCCTTGGCAATACCAGCATAATCTGGCGTAGGCG -CAAAGGAGATATTAAGGTCTTCATTTGTAGCACGAGAGCCGTCCCCTTCAGGATGAACGA -GCAGCATGCTGCGTCTGGGGGCATTCCAGCCCTTGTTATTCAGCACGATTGTCAAAATGG -GGATTCCATAACGACGGGCGATCCAGTACACAGATCCAGGCACCGAGAAGAGGAATGTTC -CATCGCCGACAACCTGACAGATAAACTTGCCCTTCCCAGGTCCCCCATGCTGCATGTCAC -TGGCAAGCTTGATGCCAAGTGCACCACCGCCGGACCAGCCGAGACCACCACCTCCACAGT -TGATCCACGACTTGGGTTGGGTAGCTGAGACTTGATCCGCAACAAAGTGGCTCATGGTCA -CAGCCTCGATTGCCCAGATTGTATCAACCGGAACGCCCTGGCGGACCTGCGCCATGAGAT -AGTTAATGTTCAAAGAGGCTTCGCTGCCACCCTCCGGGACTACGGCTAGATCGGAAATTT -CTTGTCGACGCTTTGAATACTCCTCGTCTCGGTGTTGGCCCATAGTGAGATTTCCCTCAG -AGGTAATGAATTGCTGCAGAGAATCTTGGGAAGAAACGTAATCGTTGATTTGTTTGAGCG -CTGTGGAAGCGTCGGCGCGGAACGTGGCCATTGAATTGATATAGAAAACCGGAATCTGCT -GCTTCAAAGGGTCGACATCAATGTGAATGATCTTGGTCGACGCAGATGGCTTGAATTGAG -TAGGCACCCAAGGGACATCACAGTCTATCACAAGGATGAGATCGGATGCCCTCACCGCGT -CATGGCCTGGATATCTCAGTCCCAACCATCCAGGGTGATTAGCAGGGAAGCACATGTCGC -TACCACCAGTGTCTAGCACGCGGACTCCCTTGAAGCTATCTGCCAATGCGACCAGCTCAG -CCACAGCTTGAGATTTCCTCCCAGAATATCCAGTAATGATAAGAGGGCTCTTCGCAGCGG -CAAGTTCAGATGCAATCAATTCAACGCCGGAGGTTGGCAGGGCAGCCGGTAACACGGGAC -CCCAGACAGCCTGATTCAGCTGATACGGCTCGATATCCTCCTCCATGACCTCTCGCGCGC -CAGCAAGGTACACAGGCCCCTGTGGGTCACTAGTTGCGAATTGCAAAGCGCGGTTGACAA -TCTGCTTAACATTCTTGCCGGACTTGATCTCACCCGTATACCGACAATACTGGGAGACGA -TTTGCTTCTGATCCGGCACATCTTGAATCCAGTGGATATATTCCGTGCGTGAACCTCGCA -TCTCACCCTCAATTGTGTAAGGCGATAGACCGGCAAAAATTAAAACCGGGGCGCGTCCGC -ATGAGGCATTGTGCACTGCTGCTCCGAGTCCCTGTGTACCGACATCGACATGGACGATTA -CTGCTTGTGGCTTTCCGGTTAATCGGGCATAGCCGTCTGCCATGGAGAGCGCCACCATCT -AACGAACGCATTTGGTCGTAAGCGGGGCTCTCATTCATACCGACTCTAACCGTAGTGCCG -CTACACCTGGTGGGTCGCATGATGCAATTGTTAACGTACCTCATTGGGACAGGTAATGAT -CGTAGGAAATTGGTCTTTCTTTTCTTTCTGGCCTTTTACCATCGCCTCCAGGATGGATGG -GTGGTCTGATCCCAAATTGACAAAGACATGGCTGACGCCGCCCTGGGGTTGTCAAGATTA -GAATTTAATATATGCCAAGTAAAGCTGCTACCACAGAGCACACATGTAGTCCATACCTCC -CAGAGGGCTTCAAAAAAAGCAAAAGATGCTGTATAAACCCTTTCGTTGTGTAAGCCAAAA -GAACATTCCTCAATGTTTGTATTGCTGAATTGCATACATGGCTGAGCTCTATACTATAAG -AAGTCGGATAGCAGGAAGAAGATGCTGCGAGTCTGAGTATTCTTTGTCTCAAGAGCAGAT -TAGAGAGAGGGGAAGGTCATTGCTCATAAATATCAAGATCTGTGAAGCCTCCAGTTGGGA -TATCGGCCACAACTCCGAGGGCGGGGGCCCGAAGGAACACCGACTACCTCTAAAGAAATC -AAATATATATTTCTCGGTTATAACTGATTCGAAAGGAACATCTCCGACTTTGATCAGCTC -TCTCCATGTTTGGGCCAATGATTGCGGCCATTCGATGAACCGCGGTTGATCAGGCTTGTG -CCGGGAATCAGTCTCCGAACTACAACATAGAGGATCTCATATTCGGTCTACAGCAGTAAG -CAAAGCTATGGGTATAACGTACCTTCAAGTTTATAAGAAACCATGATTGAAGACTTAGCA -CACTAGTCAAACAAGGAGAGCGAATTTGGTTTAAAATGAGACCTGTAATAAGGAACCGTA -AAAAGAACTGTTAGATTGGACTTCATCCGAGTTTCGACAACAGTAAGCCCTATCTGATCT -ATGCGACTTTATGTACAATATTGTACTCGAGGGATCCATACATTCATGATATATATATAT -TATTCAAATATTCTTAACCAGCTCACTCTCCCCACAATAAATTTCACACCAGTCACTGAG -ACCTTTTTGGACACTTTTCCCAACACAGTTGTGGATACATATGTCGTGTAGTTAATATAT -AAAATCGGAGTTTGACTCAGGGTATGGTGATAACAGATATATTGCTCTCTTCAGGAATAA -TCATATGGCAAAGGTGATGCCATCATTGTTTACAGTAAATTCGTCGGCTGAAATAGAAGA -TAGACTCTATTCCTCTTCAACAAAAACGCCCCCTTTTGAGGCCACAAGATCTACCGAGCA -CATCGGAACCCGCTTTAGATTGCCCATATACGATCTCGGCAAACTGTCCCCATTTCGCTA -GTGGGATAGATCGTGGGAAGCCACGTCAATTATAATTGGTCAAATAGCTTCTCGGGGTCC -CCTGTATTCATTACCTAGGTCCTCCCAAGGAAGCCCAGATATTGATTTGGCGTTATATGC -ATTCATCGCGCACGGCTTTGGTTTCAGATTAGGCTAGGACGTGGGGGCTATACACCCCTG -TGTCGCTATAATCGCTTATGCTTCCTCCCGATACATCTAGGCGTTGAAAAAATGGGCGCT -GGCTGTAGACAATTTTACAAAATCTCACTAACGCTGGCTTCGATGGGTCCTTCGCTCGAG -AGTATCCGGGGATCTTAGTCAATCTCGAAAATATATGAAGTCTTGATTCCCTTATCGTCA -ACAGCTTCTGTATAATCTTTTCTTTGTACTCTTTTCCCCCATATCTTGGTGGTCCTGTAT -CAGCCGAAATTTTTTAATTTTTTTTTTTCGAGTACCCGCTTCAAGTGCGACTGGGATTCG -GATTACGAAGGCAATGAATGAATGACTAGACCTTTCGATTGAATATTAGAGCAGGGTCAA -AATGGGAGCCTCGATGCCCCTTGTGGGCCGAAGTCTCGCTATTTTCATCGTCGCAATTGT -GATGATGTCCATCTCCATTGTGACGGTTTTGTTACGAACTTTTGTACGATTGAATATCAT -CCATGCGTTTGGTTGGGACGATGCTTTGATGGTTGCTGCATTGGTATAGTGACCCTATAT -GATGTGCTATGGTGTGGTAGAATCCTAATTTGCTATAGATTCTTTTCATTCTTTTGAATG -CCTGCTGCATGATTGGAACAATGCATGGGGTAGGGCACACGCGTACAGATTTTACCAGTA -TGGAGGTTTATAAGAAAGCACTTGTGGTATGTGATACTTCTATTGGGAAAAACTTCCTTG -CTAAATGTCTTTTACAGTGGTGGTGGCTTGGTCAAATGCTCTACATATGGGCCTCAGCAG -TTGCCAAAATATCAATCGCCGTTGCGCTTCTCCGGTTGGCCATTGCAAAGTCTTACCGAG -TGATACTTTGGAGCATTATTTCTGTTGCCATTGCTATCGGACTTATGTTCTGGCTTGTGC -TACTCTTAGCCTGCAATCCGATCTACTACTTTTGGGAACGAGTGGATGAGACCAGCACCG -GAACATGTATACCCATGTCCACAATTTTGGATATTGCGTATTTTTACAGCTGCGTGACTA -TATGTTGTGATGTAGCGCTTGGCCTGTTACCCGCTTTCCTCGTCTGGAAACTGCAAATGA -ATAGCAGGACGAAGCTTGCAGTAGGAGGAATCCTCGGGTTAGGGGCCATGTACGTGATAT -CGGACCCGGCCGTTCTGAAGATGAATTGATGGCTAACCATAAACAGTGCGGCTGTGGCAG -TGATATGCCGACTTCCCTTTTTACAATTTTATCCCGATGAGAATTTCCTTTGTATGTCAA -CCAAGCCATTTGCCTCTCAAAATCTAACAGTCTCCAGACTCTACCTACCAGATCGCCATC -TGGTCTCTCATTGAGACCGGCCTCGGAATTACAGCAGGCAGTCTGATCACTCTCCGACCA -TTATTCCGCTGGCTCCTGGATACAAATTCGACTCATAACCACAAGTCACCCCTCCAAGGA -AAATATTCAAAGCCATACCGACTGTCCAGCTTGCAAAGCCAGGCCATCAGAGAATCGCAG -AACACGAAATACTGGCGCCCCGATGTCGACCCCGACAACAACAAGAGTACAATTGTTGCG -GTTTCGTCGCAGCGGAAAAATAAACTCAGCCTTAGCAACAGCAGCCAGGAAGCCTTAAAC -CCCGAGCCGATCTCGCCAAATCATGTCACGATACAGAAGACCTTTGTACAAACAGTTACA -GAAGGAAAAAAATAGGGTATCTTACAATGTACAAGGATTATTGATTCATTTAGACCTCTG -GACTTTTAGCGCATCTGTACAGCCCCCCTCCCCTGATCAGGGCCAACCATATAATTACCC -CGGCTTTTAGCACAACCCCCCTCAAAAAGTCCATCTCATGTATTTTTATCTCAGGTGCGG -CCCTCCCGAGCAAGTCTACAGCTAGACCGCCTTGTTGCTGATCTTTGCACCCTGAAGATA -AGGCTGAGCATGTGGGGCTGACACAGCTGGGGCCATCTGCATCAATGCTTGAGCTACTAT -TGGCTTGGCTATGGTAAGCTGATCCTATGAAGTGCTTTCATCCTTGTCGCAGCTGAATAT -AACACGAAGAACGGGGCACCTGGGTGACTCGAAATCGGGTTAGGCTTCCCTGGGTGGCCT -TGAGGGCCGTGAGGCTTATGCAGGTGAACAAAACAGATGATTCGAAGCGATTTCAAGATG -GATCCCATTCGTGCAGCTACACTAATAAAAACCTAAACAAGGGTGTAAATAAAAAAATAT -ATAGACAGACTCGTACCCACCTACTAACTTTCTTCAAATCCACACCCACATTCAACTTCA -TCGTACTTACACAATGTCTCTATGCTCCGATGTCCACGAACCACCATATAAAGTCCGACA -GAGAAGGGACTCGATGGCTCGGCCAATCCGCAAGCGCATCATCATTTGCTGCGATGGAAC -ATGGCAATCTGCCGTCTCAGGGAAGAAAAACGTCCCATCTAATGTCACACGTCTCTGTCG -CGCCCTGAACAGTGTCGGGACCGATGAACACGGGAACCAATGGCAGCAGATAGTCTGGTA -TGATTCCGGAATTGGCACAACCTCAGGCCCTTTTGGAAAAGCGATTGAAGGCGCCGTTGG -CCTAGGACTGGAAGGAAATGTCGTCGAAGCGTATAACTTCTGTGTCCTGAATTACAGACC -TGGCGATCAGATTATGTGTTTTGGTTTCTCGAGGGGTGCATTCACGGCCCGGGCCATTGC -CGGTTTGATCTCGGATATTGGTATTTGCAGTAAACAGGATTTGAACCGATTTCCCGATCT -CTGGAAGGTTTATAAGAAGAATAAGCCCGGCGAGCGGTTCTATTGCAGTGACCTCTGGTT -CGAGTGGATGGATGGCAAGGCCGATGAGAATCAGGGCGCTCGGGGCGAGACTTTTGTATT -TGAAAAGCGTCCCGAGGGCGATTGGGCTCAGGAGGGGTCAAGGGATGTCGAAGTTGTTGG -CGTCTTTGATACGGTCGGCGCTATTGGAATGCCTGAGGTACTGGGAGTCAAGCTGCCATC -GTGGCTCTTATGGTGGTCGCCGCACAAGGATGGGTGGGAAAATGTTGGACTTTCACCTAG -TAAGTATCCTGAATCATTTTTTGCCTTCTTGTCTGAGGATTTCGTGGTCGATAAAATACT -GACAAGCGTTCCTCAGACGTCAAACATGCGTTCCAGGCTTTGGCCCTCGATGAACACCGT -AACGCCTTTTCGCCTACCCTATGGTATCTCCACAAGTTTGGGAACGTCACTTCAGAGCAG -ATACAAGCCCAGAGGAAGATTATTGATCAGAAAGCTCAAGAATGGGACGAAGCCTTACAA -GATGCGATCAGCCTCAAAGGTAGCGGCAGGGCTAGTGATGAGGACGTAAACAAAGCAGCT -CGAAAGCTAAACGAGACAGCTAGATCCTTGAACCAGGAGACTCGAAAGCTCATCAAGTTG -GAAGATGACCACAAGCACCAGCGTCATCCCCGAACATTGAAACAAGTGTGGTTCCCCGGT -TATCACATTAACATTGGCGGAGGTTCGAGTGAAACGCTCGAAGATGAGGGTGACATGGAA -GAAATGTCTAATATCACCTTCTACTGGATGCTGGATCAGATCAGACCCTACCTGTCGCTC -AACGACGAGTCTCTCTCTGATGAAAGAGAAGAGAGAGAATATCACATGTCGACACTTGTC -GAAGACATAGTCAATAATGGGTCCCTGGCAGGCTGGGCTCAACGCAAAGCTGCAGCAATA -GCCTCTGTTTTCCAACGTCCCTCCGCCTCACCTGTCAAGTCTGTCGAAAAGCGACGCAGC -TATGGCTGGGGAACAGGCCCACTGGAAGACAGCTTTACGGCATTTTACTACGCCAACGGC -TCGAAGAGACGCACACCTGGCAGCTATGATCCTTTCGACAAAGAAGGAAATCTACTGGGG -GAGACCTTTGAGTTTATCCATCCAGTGGTGGGATTTAGGGAGAAGCAGATCAAGAATTAT -ACGCCCATCGGACATGATGTCAAGTTTGGGCGTCGGAAGGCTGTTGACGAGAAGGGCCAG -CCTTGTTATGTCTATGACCTGGGGAAGGCACGCAACCCCTTGCCTGAGTGGCGCTTAGGA -GGACTTGACTCTTATGAAAGGATTGCTATCACTGGAAAGGCTGCGTACGATTATATCGAC -GAACTAGATCTGTATCTGGAGACTGGGGTCAAGACATCCCGTCGGTCTGTTTGGGGAGCT -CGAGATATTGACCTTGGGATTGAAGTGCCCAGAGTCACTGAGCCTGAAACCATCGGGCTC -AAAAGTGGGGGTGTGCATTCGGAGTTTAAATCAGAGAAATCTCAACAGAGAAGCTTCGAA -TGGACGGGCTTTGGGACCAAAGGAATGCAGTTGAGGAGCTCAGAGATTTCATACACGGAG -GAGACAATAACTGTGATGTGATTGTGGTTTCTTTCAACAATGTACAGAAAAAATTGTATT -GGCAGGTATATGTGGATGGTAACCCAAGCAGGTGTGCTTTTGATTCGGACAGGGCTTGAT -GTATAGTTTCTTTCCTGCATATTTTATTTGATAATGATAATTACAATTTGAGGTCGTCTG -CAATTCTGTTTTTGGTATCTGAATAGTATTCGATATTCCTCAGCATATATCCATGACAGT -CAGTGGGTTTAGGGCAAAATCTGTAGACTTCCTATAGATCAAGCCCTATGATCAGGAACA -TTAATGTACTGTTTATCTTGTGGGGTATTCGACGTCATAGGGCTGAATATCAAGTTGGAA -AAGGCTGGGCATTCCAAGATAGACCAAATAATTAGGCACCACAATTCTTTTCAACTGATT -GATTTTCTCCATCTAGACCCATTTGTTTTAGAGACTACGCAGTCGAAGCCTCTTAGAAGC -GAGAAACCTTAGGGGTGCCCTACGTCATCTCAGTCATGAATGGTATATTCCACCCATGGA -ACGTTTCTGGAACAATATTGAACTGCTTTGAACTTGTCAAGCTACTTGCTGCTATCTGCT -CTTTTTCTATCGTTGGAATTATTTTGATCATTCGACAGCCCCCAAGTTTCAACCGAATTC -AATACAAAATGGTATCCCGCAAGGTGTACAATATGTGTCAAGCGCAGACAGTGACTTCAG -AGTTAGCTCGTGACCCGTCGCAGTGTCCGCAGAAGGTCTTTCATGAGTTGTTCGAGAGTC -ATCATGCGTCGCGTACATACGACGGCACGGAAGAATCAAATATCGACAAAGCATCGGAAT -GGGAAACGTTAAATGAGCTGCAGAAAGCTCGCGCTTGCGGAAATTTTGGGTCTGTGGAAA -CCAGTGATCTGTTTTTAAAGGTGATTAATTCTGCTATATCAACGATGAGGGAGGGAAACC -TAACAGATGTTGACCAGGTTTATCATGACGCCTTATGTTGTTTAGAAAAGAACCCAATGT -CAGGGGTTGTGTCGCCCCAGCTGATGGGGAGCACAGGAGTTCTACCACTGACGATTGTCG -CCCCGTTACCTGATCTCTGTCGCCATTTAGCGAATTGTATTGTGCGTGCTGAACATGAAG -TCTTTTTGGGTACCAATTTCTGGATCCACTCGGACGCATCGACATTAGTGACGAATGCCA -TTCGGGAACTTTCCAAACGAGCAGGCGAACGGGGCCAGAAAGTAGTTATGAAGATAATCT -ATGATCGTGGAGATCCTCGTCAAGTGAGTGCTCTTATAATGTATATCGGATATCATCTGC -AGACATGATTCTAATGAAACCCTAGGCATGGGACAATAGATTGAATGTCCGCGAGGACCA -GTATGTCGGCGGAAAAGTGAAGCTACCCGCTGCGAGCGAGATCCCCAATGTTGATCTCCA -AGTCATCAACTTCCACCGGCCTGTCTTTGGAACCTTTCATGCAAAGTTCACTGTTATTGA -CCGGCGGATGGCTATTATACAGAGCAGCAATATCCAAGACAACGATAACCTTGAGATGCT -AGCTCATATCGAGGGTCCGATTGTTGACTCTTTCTATGATACTGCTCTGTTGTCGTGGGG -AAAGCTTCTCGACCCGCCATTTCCACTCCTGAACTCTCCAGCGAGAGATGCTCCGATACC -ATGCCACGCAACAAAAAACAATGATATTTCTACAGAGAATGGAGGTAGAGCTCTCCCTGA -GCATACGACCAACTTTCCATATTATGATAGGGACCTGGAACAAGAGGCCCGGAGAGTCAA -CGGATGTATTCATCCACAAAGTGACGAAACACGCACACAGGCTGTCAGCCGTCATCTTAG -TATGTGAAATTGGTGCAATACTTATGACATGTCTAACAACTAGACAGATACCACTATTCA -GCCCGACACAACAGGCGATGCACCAGAAAGCGACCAGGATAATATGTTCGACCCTTATAT -GGTCCTACCTCGCCACGAGCCCTTCGCAATGGCAATGGTTAACAGGGAACCATATGGGTG -TAAGTCCAGACCACAAAGATAATCTTCCTCAAGCAGCCGAGCATGACTCTAACTCCCCAT -TTCTTCAAGCTCCAAACCACAGCAGCATCTACACCCCGCAAAACTCAGCGTGGCTGTCCG -CAATCAATAACGCCCAGCATTCAATCCTCATTCAGACACCAAACATGAACGCTGAGCCTC -TAATAGAGCCTCTCATTAACGCCGCCCGTCGCGGCGTCATGGTATCTTGCTATCTCTGTC -TTGGCTACAACGACGCAGGCGAGCTTCTCCCGTTCCAAAACGGAACCAACGAAATGACTG -CGAATAGGATGTATAACTCGCTTGATACCGACGAGGAAAAATCTTGTCTGCGAGTATGCT -ATTATGTCGGCAAGGACCAGACTCGCCCGATTCATAATAGCTTCAAGAAGCGAAGCTGTC -ATATTAAGTTGATGATCGTTGATGAGCAGATTGCGATCCAAGGTGAGTCCCTAAAAACAC -CAGGAGAGAGACATGTTTCATCCAAGTATCTAACAATTTTTTCAGGAAATGGAAACCTCG -ACACCCAGTCATTCTTCCACAGCCAGGAAGTCAATGTCCTTATTGATTCTAAGCTCGTTT -GTCAAGCTTGGACGGAGTTGATCAATCGTAACCAAAATACTGCTAAATATGGCGCTGCGA -GTAAGAAGGATGGCTGTTTTCATGATCCTGAGACTGATGAGATTCCTGCTGGTTCGATTG -GACCTGTGCCGGGTCGGTTCAGTTGGGCCAAGGGGGTCGTTGGAGCTGTGCAGCGAGTGA -GAGGTGTTGGTGGATTCTAGAAGTTTATTTGCTCATGTGCCTGCTTCTATCTCGGGATAT -TTTGATGAAGGGATTGGAAATTATTCTCTATTGTTGGGATTTGATCTTTTATCGAGGATT -TGTGATAGTGATATGTTCTATCTAGCCCCTCTGGAACTGGAAATCGATGGATACAGTCCC -TTCCCATCTACTCTAGTTGTACGACATTGACCGGGGCGAGAAAAGATAGGAGGGCATAAA -ATTGACAGGAATGATAAAGGCTCTCACCGAGGTAAAGGAGCAACAGTGGACCAATGCTCA -TAGCGAGCGGTCCGCAGAGGGTGACATAGTGTGAGTACGAGACCAATAACGCGATCGGAT -GATTCCAAGAAATCCACCGAGGACTTCTACAGCATGTACAACATACTCTGCACCGAACAT -TGCCCAAATGATACATTTCTATCGTCTCCCCACTTTTACAACAAATCAGTCCCAGAGTAA -ACCAAACCTCGTCCAAAACACCAATCCATACATAAATGTCTCGTCCGTATGACCAACCAA -TAATCGACATAGTGGAATATGTCTACCACTACACCCTCGGACAAGACGACGAAGCGATCT -TGAAATGCGCGCGGACAGTGCTGCTCGACGCAATGGGCTGCGCCATCGAGACAGCAGCGA -CGAGCACCGAATGTCGAAAACTGCTCGGCCCCGTGATCAAGGGGACAATAGTGCCAGATG -GATTCAGGATCCCAGGAACTGATTTGCAGGTCGATCCCGTGAAGGGGGCTTTCGATCTGG -GTGTCCTGATTCGGTACTTGGATCACAATGATGCGCTGGGTGGTGCGGAGTGGGGACATC -CTTCAGGTATAGTTTGTTGAAGACATGGGGATTCGAATTATGCCCATGGGACTAATATAT -GGATTTCATTGCAGATACTCTCGCTGCTATAATCCCAGTCATGGATTGGTTATCGTGTGC -AGGTTTGTCTGGTCGGCGGTCCCATGGTGGCCCGCCATTGACTATGCAGACGCTGCTCAT -CGCTCTGGTCAAGGCGTATGAGATACAGGGTTGTTATCAAATGCGGAACGCGTTTAATGT -TTATGGGATTGACCATGTGGTACTTGTTAAATTGGCCTCTGCTGCTGTTGTTTGTTGGTT -GCTTGGAATGACAGAAGAGCAAGCTATGGCGACTATTTCTCATGTTTGGATGGATGGCCA -TCCCAACCGGGTGTATCGATCCGGCGAGAATACTATTCCGAGAAAAGGGTGGGCAGCAGG -CGATGCGGCGAGGAGAGCTGTGCAGTTGGCTCTACTGGTACAGGACGGCCAGCCTGGTTC -GCCGCAGGCGTTGAGTGCAATGCCGTGGGGGTTTTGGGAGCGGACTTTTGGCGAGGCGGG -ATTTGTACTTCCACGACCATTTGGGTCGTGGACTGTACAGAATGTGCTGTTCAAAAGTAT -GCCAGTAGAAGGGCATGCGATCTCGGCCGTGGAGGCAGGCATTTTGCAAGCACGTCAATT -TCGACAGAAGGGCCTGTCCGACCCTCTGAACCAGATCGAGCGAATAGATCTTCGAACGAC -TGCTGCTGCCTTTTTGATCGTGAAGAAACATGGGCCGCTGCACAACGCTGCTGATCGTGA -CCACTGCATTCAGTATGTGGTTGCCTTGGCTTTTTTGAAGGGGAGTCCGCCGGAGGCTGC -GGACTATTTAGACAAAAGCCCATGGGCAAATAGTGAGGAGCTTGAGGCCCTGCGAGAAAG -GATCGTGGTGCAGTCCGACCCTAAGCTCACGGAAGACTACTTGGATCTGGACAAGAAGAG -TATTGGTGCAGGCATGACGGTCCATCTGACAGATGGGTCCTCGTTGCCTGAAACACTGAT -TGAGTATCCCGTAGGACATGCGCGGAATCCGAAAACACCAGCTGCGGTACAAGAGAAATT -TTTCCAAAATATGGGGCTTATGTTCTCTGCGGCAGAGATCGGTCGGATATTGGGTGCTGT -TCAAAATCCAGATACCTTAATCTCGGATTTCATAGACATGTTTATCCAGCCATCGACCAA -GGCAAGATGGTAGAAGGCAAAGCTATCTGCAGGGATCATGGGTTGCGTGTACTAGCCAAT -AAAGATGATCTTGTTCAGGGTATCTAGTGCTGGCGTTCGTTTCTCTTCAATCGATTCTCC -CAACCCCCTGCAGCGTCAAATAAACATCTGTTCCCCATCCCCACAGACTTTGCACCGCAA -TACTGCCGCCGAAGTACTCAGCGTAGGCTCGTCCGAGAGGCAACCCATAGCCAAGACCTG -CAATACTGCTGATGTGGCCACCACCAGCAGAGATGGTGTTCAAGGCATCCATATTGCCAT -TATCTGAGCCCTGGTAGTCCATATCGGAGAAAGTCGTGAAACTGTACGACCAGATATGGG -GTAGAACCTCAGGTGGAATACCACCGCCACGGTCACGGATACGAATTGTGATACTTTGAG -ACGAGGGAGCGGAGTAGCCAATGGTTTCATTCTCATTGGCACCGGCAAGCTCTGAATCAA -ACTCAAAGCCAGCATCGGCATCACCCGGGACCTTGGGCTCATGTCCAGGCACATCCGGAG -CAGCAGCGATCGTCACCTCGATGGGCTCACGCTCATTTCCATTTTCGATAGTAGCCCGGA -ATGCGTTTTTCAACAGTTCCGTAATAATGTACTCCACATGTACAGGGATATGAGCAAATG -TGGCCTCAGGCTCACCACCAATCTTCAAACGCGGTCGCACTCCGTACTTGAGCTCACAGA -TTTCTCCAACGAAATCCTCACATAGCCGGATGATGCGTGCCGGCTGCAACGCCGTATCGA -TGACGCCGATATAATTTGATGGAATTGAGTTATTTGGCGCAGGTGCACCAGTGGCATCGG -CAAGGTTATCACTGATAGGCTGCGAGGCATAATGAAGAGCTAGGTGCTGCTCTGCGATCA -ATCGCGTACCAATGCGCGCACGTAGATGCGTGTCCAAGAAGCTCGTCACGTCGGCCGGGC -TGACGTATTTCCTACACTCCAAAAACCCGCGTGCAAGGACGGGGATTGTATTTGTATGCG -TTTGTACGAGGTCTGCCAGTACATCCCCGAACTGTTTCTCTTCTTCGAGAGTCGTGATGC -GGCGCTGTTGATATGGTATGAGGGTCGATAGCGAATGGAGGTAATTGTGGTAGATTTTGG -AGACATGAGGGTTTGAGACGACGATAAAAGGGAGGTTTCGTAAAGCTTGAATCCGAGAAG -CTAGTCGGGCGGGGAGGAGGGAGAGTGTGAAGTTGGCAGAGGCTAGAAGAGCCTCCTTAT -TCAATGGTGGGCGGCCGTGCCTAGGGGATTAACATAAGTTTAGCATGAGTTCATTCTGCT -TTGATGTGAGCTGGATATAAAAATGACTGACTTTAGTAGGTCTGCAAGTGTCAACGGCCG -ACGGGGACTTGCAGCCAGCTGCGCAACATTGTCTTTCGCAACTTCGAATGGGCGACCTGC -CCAGTTTTCACTCTGCGTGGATGATTGCCGACTAATGCGATACGCATTCGGAAGGGCTCG -GCATCCAGGCCGAAAAGGCATAGCTCTAGGTATTGAGGTTGTCATTGTGAATTGCCATGA -GATAGAGATCAGTGGAAGTTCACAGTGAAACCTCGGGACGGAAGATCTGCACCGGTGCGG -ATATCATCGGTCCAACATTGCAGATCAACAGGGGTCAAGACTTCGAAAGTTGTTATATTT -CACTTAAAGATATATTTACGTTGATTACAATTCATGGATATTGCACAGGGCTCATTCAAG -TTTAGATTTATACACGCTATGCACCCAAGTCAAGCTGATATTTGGATTCAGGAAAAGCGC -TCCATCTGTATGAATGCCTGTACCCCTTTTGGAAAAGGTACTCATAGGATGCACATGGCA -TCTTGGATGAAAGCAGGTCGCATAATTGGTATCAAGTCATCTCGCTGaagcaaaacaaag -caaaatcataaacaaaaGAGCCCCCAGGTGTCGTGTTTCCATTTGGTCATAAAGCGGAAC -ATTCACAGCGACTGCAGTCTTCAGTCCTAGCGATGTTGACTAGGGACATCTATTGAGCCT -CCACTGCACCGTCCTGCTCAACATGAACATGTTCAGTAGATTCGAAGGACTGGACGTTGT -CATCAGGGAACTCGACACTATTATTGATCTGTATATTAGTACGCGCCTGGCATAGATCCC -GGATGACCAGCAACACTTACGTGAAACCTTTGATGATGCTATATTCATGTCTGATGACGC -CGCCCTTCTCCCTAGCGGTATCCTTGGCTCTGAGAAGACATATCAGTATCAAGATCCTTT -GAAGATGATGGTAATGCAACACACTTTTCCAGTTCCTCGACAGGAGCATCAGCCTTCAGA -GTGATCTAGAGAGATGTTAGTCCGGATAAATACTTCAAAATGAAGCTGTACTCACGTTGT -AAAGAGGCATGGTGTCAGACAGTGGGATTTTAGAGTGGAGAAAGAAGGGAGATCGAAAGA -AAAGAATTTATAGGAAGAGGGGCTGAGGGGGGTTTGAGGCTACACTTTATTATTTGGGGG -GGGAACCTTGGAAGACGCCTTGAAGCAGGTACCTTCCATACGTTCCAATCGCTCTTTATC -TCTCTCACTTTGTCACACTATCtacaggtatgtgcatgtatgtacatgtatgtacatgta -tgtTGTATATGAATACTTGAAATGATTTGTCATTTGGTGATTGGTATCCTAGTCTTACAT -GGACTTTGAGAAACCTCTTTGAAAGCTGGATTTTATATCATCCCTTGCTGAACCCTTTCC -TGTATAATTCTAATGCCTTTTAAAGGGGAGTCACCATTGATGTAACAACGCAAGACAATG -ACATCATGCTTAAACGCGGGGCAATATGTCTTGCATACTATTCAACGCTTCAATAGACGA -CTACGATTATACATATTATGCTAAACTCCAATGTAACCATCCTCGTCCCGGGAAGGCATC -TTCATCCCGTGCATTCCCTTGTACTTGGGGCTTTCGGGAGCCTTCCACCCCTCCAGGACC -TTGTCATCTGCCAGTTGATATACCTCCACACCCAATGGACGACAAGTGGCTCCCATATCA -CCCTCCTTATCATCCCAACCCCAGTTGGACAAATCACCTAAGAGTATCATTAGCATGAAA -ATAAGACATTCGTGTATGATGGTGCCATACCGCCAGGACACGCAGATAGCGCACAGAGAA -CATCTACTTCTGCAAAGAACTCAAAGTACTCGCCCGCCTTTGCAGGGGAGGTCTCCATAA -AGTATTTCCCATCCTTATCCAACCCTGTCACTTGAAACACATTTAAGACATCGTGGACGT -CTAGCTCCGTCAGTCCATGAGGGACGACAGAACGAGTCAAGTTCGAATGACAGTGGAAGT -CAAAGGTCTCTCCGCCCATGAGCAAGTTCACGTAAGGATCACATCGAGTACCCATCAAAT -CATGCACCCGCCCTCCAAACTGCGTAGTCCCAAACACAACCTCAGGGTTGCGCTTCCCAT -CTGGTCCAAGCACGTCATGCAACTGCCCGCCACCCAACGAGTCACCCGTGATTGTTACAA -GGGGCCGCAGGTACGGCAGGTTCGACCACAATCGATCACCCACTGAGACATGGGAGGCGT -GGATCTGTCGGGTCCGCGCAGCCCACATTCGTTCTCGGGGGTTATTTGCGTTCCAGATGT -TCAGGTCGCCCACCTGGGGCCCCTTGGGCGTGGTCAGGCGGCAGATGTGGCCGGCGGGGA -CGATCCACGCTTGGGCGGAGCATGGTCTGATAGTGAATGATTCCTTTGGGATCCGTGCAG -AGGGGGTAGATGCGGTTTCAGAGATTGACCGGTACAACTGCGAAGTTGCATGAACCCCCG -AATCGGGTGGTGCTGTGTAAGCTGGGGGTGGCTGACGTGTAGACATTGTACACGGTTTGC -GGGACGGTATAGAAGTTGGATGCTACACAAAGTTCTCGTAGACGCGGTGCAAGATTGGAT -TCATAGAATACGGGTGGAAGATATGGAGTGCTTTATAGCTCTAAATAATCGGAGGCTTTC -CGATATCAACGGTTTCGGCCCCGCATGATGGCCGCGAAGGGAAATTACAGCCTACAATTT -CTAGATAAGTGCATTGTGCGGGATAGGGAGCGCTATCGCACGGAACCTAGTGCCGTAGAC -TGTAGGAAAGCTCAAGCAGCGGGGTTCTCATGTCTCATGGCTCGAAGTGGAGCCAAATGC -CGGGCCGGTGGGGGATGGATATTTCTGCCTCAGGCCAAAGTCAAGGATAGGTTGGTGCCT -TAGGCCTGAAAGCATCTGATATCCTGACTAAGGCTTATCCACTCCGCTTTGGCAAAAAAA -TAATTACTGCTAATTTTCTAGCCGTTGGGAAAGTCCTCTGTCTTGCGCTCGCTTTCTTTC -TTTTTGATAAAATCAGTGGATCTGTTGAGGTACTATACCTCTTTACAGATATCATGGAGC -AGCAGGCCAATAGGGCGCATCGCCCCGCCAAAGAGAAGAAGAAATTTGAGGGTACGACGA -GACCGCTCCGATAAATCTGCAAACTTTCTGCTTTCGAGTCAGGCTAACATTGATCTCTTC -TTTACAGGTGCTAATCCGAAAGCGTTCGTCACTTCCAGACCTGGAAAGCTCAACAAGCAG -GCTGCGCGATCGCACGATGTATGTATATGGCATTTTTCCACCATTCAATTGATGCTGACC -CGGACTTCTCTTTTAGGTCAGAGAAAAGAGGCTTCATGTCCCCCTTGTGGATCGTATGCC -TGAGGAGGCCCCGCCAGTCGTTGTCGCCATCGTTGGACCCCCGGGAGTTGGAAAGACCAC -TCTGCTCAAATCTTTGATCCGCCGTTACACCAAGCAAACCCTTAGCTCGCCGCAAGGCCC -ATTGACCGTTGTCACGACCAAGCGCAAGCGTCTCACCTTTATCGAATGCCCGTCCGACTC -TCTTGCAGCTGCGATCGATGTCTCTAAGATCGCAGATATTGTTCTTCTCATGATTGACGG -AAACTTTGGATTTGAGATGGAGACTATGGAGTTTTTGAACGCCTTACAGACGTCTGGTAT -GCCGGGTAACGTCTTTGGTATTCTTACCCATCTTGATCAGTTCAAGAAACACAGCACCTT -GAAGGACGCAAAGAAGCGCCTGAAGCACCGTTTCTGGAGTGAGCTATACGCTGGCGCAAA -GCTCTTTTACTTGTCCGGTGTTATCAACGGCCGTTACCCCGATCGTGAAGTGCACAATCT -TTCCCGATTCCTGTCCGTCATGAAAAATCCACGCCCGCTCGTGTGGCGCAACTCGCACCC -TTATGCGCTTGCCGATCGCTTTTTGGACATTACACCACCTACTCAGATCGAGGAGAACCC -GAAGTGCGATCGTACGGTTGCGCTCTACGGCTACCTGCGCGGCACCAACTTCCCCTCCCA -CGGGGCCCGAGTCCACGTTCCTGGAGTTGGTGATCTGACAGTTGCCAACATTGAGGGTTT -GCCCGATCCATGCCCAACTCCCTACATGGATCAGCAGATTGCCAAAGCAACTGGAAAGTC -GAACAGGCGTAAGCTCGGTGAACACCAGAAGCTGCTCTTCGCTCCCATGTCTGATGTCGG -TGGTGTCCTGGTCGACAAGGATGCAGTCTACATCGATATCAAGACTAACACGTTTGACCG -GGACTCAGATGAGGATTCGGACGATGAGGAGCGCCGAGGTCTTGGCGAACACCTAGTTGT -CGGGCTTCAAGGCGAGCGCAAGATGCTTGGTGAGGCTGATCAGGGTGTTCGTCTCTTCCG -TGGCGGCGAGGCCTTAAACGAAGCGGATGATGAGGAGACAGGCCGGAAGGGTCAACGCCG -CGCCCGCGTCGCCGTACCCGATCCAGACGACTTAGTTATCCCCGAAGGATCTGATAGCGA -AGAGGAGGTTCTTGAAGATATTGACGAAGCGGATGATATGACAGATGATGAAGGCGAACT -CGATATGTCTGCACCGGCAGACTTTGGAGCCCGTTTCAAAGCCAAACAAAATGAAAATGA -CGATGGAGAACAAGAAGACATGGCTTTTGCCGACAGTGATTCTGACCTGGGCTCCATCTC -TTCAGTCTCAGACCAGGAACTCGAGAGCGGCgatgaagacgaggacgaggatcaggatga -ggacgagggtgatgaagatgacgaggaGGGCAATGTTCGATGGAAGGAGAACATGCTCGC -CAATGCCAAGTCTCTTCATGGAAAGCGCCCTCCTTTCCGAGTCAGCGACCTGTCAAGAAT -GATGTACGATGAGTCGATCACTCCGCTTGATGTTGTGAAGACATGGCGCGGTGAAGATTC -CGACGATGAGGAGGACGAGGAAGATAGAACCGCTGATGACGAAGGCGAGGACTTCTTCAA -AAAGACAAACAACGAGAAAAAAGATCAAGCCGACTTCCGTGCCATTCCAGAGTACGACTA -CGATGAGCTGGAACGGAAGTGGCGGGATGAGGAGTTGCTCGAATTAATCAAACTCCGCTT -CATCACTGGCAAACTGTCCGGTGGTGGCTCAgatgatgaggatgaagatatggacgagga -tgaggatgaagatgagggcgatggtgactatgaagacttagaatccggtgaagttttcaa -cggtatcaaggaagatggcgatgaagacgaggaagaggaaggcgatgatgagcctgaaaa -tgCTGCCGATCTGGAAGCCGAGCGTGAGCGAAACGCCAAAAAGAAGGAAGAACTGAAGCT -ACGTTTCGAAGAAGAGGACCGCGAAGGCTTTGGAAAAGTTCAGGATGGGACGCATGATGG -TATCGATGGCCAGTTTGGCGAAGATGATTGGTACGACCTGCAGAAAGCCAAGCTGCAAAA -GCAGGCAGATATCAATCGCGCCGAATTCGAGTTGCTCGACCCTGCCTCGCGCGCCCGGGC -GGAAGGTTACAAAGCCGGCACCTACGCACGTATCGTTCTCGAGAACGTGCCTTACGAGTT -TGTCTCAAAGTTCAATCCTCGCTTCCCTGTCATCGTTGGTGGTCTTGCGCCTACCGAGGA -CCGATTTGGTTACGTGCAGATTCGGATCAAGAGACATCGCTGGCACAAGAAGATTCTCAA -GAGTAACGATCCATTGATCTTCTCGCTTGGTTGGCGCCGTTTCCAGACTATGCCTATCTA -CAGCACGTCGGACAGTCGGACGCGAAACCGCATGCTTAAGTACACACCCGAGCATATGCA -CTGCTTTGCTACCTTCTACGGTCCTCTTGTCGCTCCCAACACCGGTTTCTTATGTGTGAA -CTCGTTCTCGAACAAGAACCCCGGTTTCCGCATTGCCGCCACTGGTGTTGTTCAGAGCGT -TGATGAACACACTGAGATTGTCAAGAAGCTCAAACTCACTGGTCACCCTTACAAGATCTT -CAAGAACACAGCCTTCATCAAGGATATGTTCACCTCTGGGCTGGAAATTGCCAAGTTCGA -AGGTGCATCCATCAAGACTGTCTCCGGTATTCGCGGTCAGATCAAGCGTGCACTATCCAA -ACCCGACGGTTGTTTCCGTGCTACATTCGAGGATAAGATCTTGATGAGTGATATCGTTTT -CTTGCGCGCTTGGTATCCCATTAAGCCTCACCGCTACTACAACCCTGTGACCAACTTGTT -GGATCAAGTTGAAGGTGAAAAGGGTGATAACGGATGGCAGGGCATGCGCTTGACCGGTGA -AGTCCGTCACGCTCAGGGCATCCCCACTCCCCAAATCAAAGACTCGACCTACAAGCCCAT -CGAAAGACAAGAACGTCACTTCAACCCGCTTCGTGTGCCAAGGCAGCTGGCCGCCGAGCT -CCCCTTCAAGTCACAAATCACCAAGATGAGGGGCCACAAGGAGCCGACCTACATGCAGAA -GCGTGCCGTTGTCCTCGGAGGCGAGGAAAAGAAGGCCCGTGACCTCATGCAGAAACTTAC -CACCATGCGTAACGAGAAGCAGGCCAAGCGATCCGCCAAGCAGGAGGAGCGCCGCAAGGT -CTACCGTGCTAAGGTGGCCGATGGTCTGGAGAAGAAGGCCGAGCGCGAGAAGAGAGAGCG -CAATGATTACTGGCGCAAGGAGGGCAAGAAGCGCAAGAACCCGGATGAAGACTCTGGTGG -CCGTGGACGCGGCAAGAAGCGCAAGTGATCTACTCTTTGTTCTTGAACATGTCTTAATCT -ACTTGGGTCTTTTGCATATATGGCGTTTGAACCCAAAACTTGTACCTACATTATGTTGGA -TGTTTATTTATATCAAAACTATTTCATGAATGCGACACCGTTTGCCCAGTCTTTTCTTTC -TACTCCCCTCCCAGCCCCTCTACACCATAATTCTTACCATACTCGGGGCGATGCTGCTTC -TCGCCGTCATATAGGTCTCCGATGTCAGAAGTAGACATCCGATCCCCAAATCAAAGTTGG -AGTACATGAAAAATCCACCGGTTCCCAAAGCAGGAAGGTCCCGAACAGCAAAACATGGGC -CGAGTTGTTCACATTACTAGGGCTGGTGTAGTAGGTGAAGTCATATTACTGTTTCTATCG -TCGGACATCACGAACCCTAACTCGCAGTCCTGCCTGTAAAATGCTCGCGAATTCCAACTA -TCCTCGATTTCCAAATATCCACCTACGTCTATTTTCCATCAAGCTCCATCAAAGCTTGGT -CTTCATCCATCAATAAGACATCTTAACATCTTTTCCAAACATCAAATAACATATAGAAAG -ACATGGCTCCTATTCCAGGCTTTGCCAATGACAACATACTGTCTTCTGGGGCTGAAAATA -AGCATGAGGCTGACATGGAGAAGGAAAACTTCCTAATGGAGTTCCTCTTTGGAATCTTGA -TCGCTGTGGTCGTCGTGTACTTGGGATACAGGGCGTAAGTCCGCCATCTACCCAGCGCTA -AAAAATCAAAGATGCTAATAGCTTAGCTACATTCACTTGAAGCCCAAGTACGAGGAAGAG -TGGAAGCCCAAGCTTGAGCCCAAGCTTGAGGACTGCAGAGAGAAGTATGAGGATTGGAAG -AAGAAAATGGAGGCCTGGCATGGCCCCAAAGAACCAAGTAAGTTGATTCCCCACCACAAT -GGACTTGCTCTTGATACTAACGTTCCTTCTAGAGGCGCCAGAGCCTGCTCATGTGGCTCG -CCATGTTTCTCAGGCCCCGGAGGTCAAGCCGATCATTCGTGATTTGACCCCGGAGTATAT -GAGGATGTCGACGGCGGGCATGGTTTAGGTGGTGTGTTTGTGGCTCAGTGCTGGTAGAGG -TATCAAAAGCCTGCCAGGCTGATGCTTGTATTGGATACGGAAGGAGGAAGGCtcttgttt -cttgtttctttttctctttctcttttgtttcttcttctgtttttctttctctcagtttat -tcttACTTTTCACTGAGTTGTATCTTGACTCCGTTGTCATTGGAGTTGACTCATTGCTTC -GGAGTATAGTTCCTGGAAATCGTATCCATATTGTGGACTCCGAGTTTTATTCTTTTCCTT -TCTTGACTAGTAGTATCGTAGTCCCGTAGATCTTGGTATACTTGAAGTATAGCACGTCGG -GCACAAGCCCACCAAGCCCACTCACAACCTTTTCAACCATCTTCAACCATCTCCATCTCT -AAATGACCTTATACTGATGCTTTCAGAATAATCTTCCTCTATAGAAACTCATTGAATCAG -ATGAAATTGGTCTAGTACCATCCACGTGACCGCGCCTCACACGCGTGTTCCACCAGTAAC -CCTTCTCTCTACCAACTACTCAACGAGTTTCTTTTGGTGCTAGCATATTGTGGCGCATCT -ATATTCAAAATAGTTTTTTATTGCATTCTTTTCCTCAAATCCCTGCGGGTCCCTTCATCC -GGGGTCAAAATCACGGAGAACCCCATTTGCCGCCTCGTGCTGTCGCAACCCTTGCGCGCG -TGCTGGTCGATCGCCACTCGATCGGGCAGATGCTCCTGGTTCCCACAGTACTTCCACGCG -TCTTGTTGAGATCTCGACTGTTAGCATCTAAATCACTTGGTATCATACATCCCCGGCTCT -GACTCTCGATGTGGTGACTCGAGACAGGGCATGCGTGTTGAGATCGTGCGTCCGCGTAAA -CTCGCGATTAAAGCTAGCTCGCGCGTATTTGCCGATTGTCGCTATTCCAGCATCTTTTAA -TCTCCCACATCCTCAGCCTTGATCATGAATGACGGTCGCGATGCTATATCTAATCGCGGG -GTGCTCCATGCGGAGGCCAGTGCGAACACCCACTCTATGAACAACGCGACGTCTTCCGTG -CCTCATAAGCCAATGCCTCCGGTCGATACCCAAACCACACAACTGCCGCAACCATTGCCA -CCACAAATAGATGGCTTGAACGGTGTTACCGTTGCTCACGGCGACGGTAACCCAAGCCCA -GCCTATATCGAATCATCAGAGCGCGGCAGCGATACGTCGGGCGATGGCCGACGACTAGAC -CTTTCCGCGAGCACCATCACAAACTCTCTTTCCAACCTTCCTTCATCCTCGACTAATGGT -CATAGCCAGTCGACAGAGTTCGGAAACTCTCATACATTGAATGGACATACATCGAGATCG -TGGGTAGATGGGGAATTATTGAAATCCCAACCCGTCCCGCACATGCAGCCGGTCAATGGA -AGCGCTTCAATAGATCGATGGCAACCATCATTGTCGCCGATCAAAGCCGGCAATGTTGCA -ATATCACCGCCGCCATCGCCACAACATTCGCGCACAGGGTCGAACTTCGATGCTCGCTTA -GCCCCAAGCCAGAAACGGACGGCAGCTGGCGACTTCAAATCCACTTTGAATTTACCCACG -CCTCATAGTGCCGACCCGAATGGACCTGCACGACGACGTTCAAAGAGTACGGGCTCATCA -GCTTATGGGAGTAGGATTGCACAGGTAACACGCTCACCCGCTCACTCAAGAGTTCATAGC -TCATTTGTATTACCCAGTTGTCGGTCCATATCCGTACACGTTTGTCATATGCGGCCGCGA -AAATAGAACAGGCTCGACAGTCGCGTGAAGCTACCCCACACCCACAAACCGCAATGCGAA -CACACAACAGCGCGCCACTGAGCCCGTCGGCACCAAGTGACGGCGCATCATTACCGGCTG -GAAACTCCGCGCGCTTTTCACCAGACTCCCAAACATCCTCCAACGGGAACTCCCATCGAT -TCCCGAGCCATAACCGCTCGCGATCAGCACTCTCATCCAATCAATTTCTTTCAATTCCAA -AGCTAGCACCGCCCGTGGATATCATCTCTTCAAATGGTCATAACCAGCGCCGTCGACCGA -ATCCTAATGCAGTCACAAAACCTTCAGACCACACCCCTATTTCCCGTCACCGACGCCACC -ATTCTTACCAAGAGAACGGACTCATCCGACCACTCAATGGCCAAACGCTTCTAGGACCAA -AGAAGCCCTCGCTCTCATCATCTTACGCAAGCGCACCAACATCCACACCGAATGGCTTCT -ACAGACCCCGCACTCACTCCGAAAACACACTCATGGAACAAGATGCCATCGAAACCCTGA -TGTTCATGTCAAGCCCTGAGAACTCGGGGTATCGATTTAGCCCTCGCCCGCTACAAGCAG -CGAGCACACAAAACAGCCTGAACGAGTCGATCAACGCTGCTAGCAATGACACTCATCACA -ATGAAAGCCAGGGCTCCCAAACTTCCGAGTCTCACAATGGTCGTGGGCTCGAGAGAAAAC -CAGAGCTGGAAGCTCATGCCGGTGATGATATTGATCGCCTTTTGGATCAAATGGATAGTG -ACAGCGAGGATGAGGGTCGTTATGCATCATATCGTCTTGGTATTAACGGTGCACATCCAT -TTAGAGAGCATCAACGTCCGAGGTAATTACATTCGTTCTAAGATTTCAGGATTGTTCATG -AGACACATGGTGGGATCATGTCATATACATCGTTTTAGCATAGTGGGTTTTTTTGGAATT -ACCGGCATAAGCATGCTCAGATCAAAGGAAGCCTACGATTGGTATGACTTCAAATCATTT -GCTGTAATGAAAATTTTCAGGCCGGGCATCTATCAGAGCCTGAAATAGTTGATTGTATGC -ATGTTGTTTGCTCTATGTGGTCCTCCACCCTGAAATCATCTGATTAAATTACTCCTGTCG -TATTTATAGTTGATATGCGCGATGCATCATTTAATGTGCCAGTTCCTATCAATTTATTAG -TCTCTCACGGTGTGAATTTACAAAGAGCACACTTACACTTGCGAGCTTGCTCCTGGCTCG -AATCTTGGGTCCGTATAGGGCCAGCACCCAGGGGACCAGAGTCAAAGCAGCACCCTGTTG -TAATCCAGTCAATTGATTTTTCTCTTGGGGTGGAAATTGCCACAACTTACAAGTCCTCCC -AATAGACTTGATGCCTCTGGGTAGCCAAGATTGGTGAACATGGCATTTGTAACCAATGGA -AAGATACCACCGAGCAAGTTCCGGCCTAGCGAGCAAGATATCAGCAAATGTCCTAAACAC -ATACCCAGACCTTCATCAAAATAACGATCCAAGGAGAGAAACACTTACAACAAGACTGCG -CTGCAATTGCCGAACTCGCATACCGATGATAAGTATCGGCAAGGTAATTAAAGGTAGCGA -GATAGATAGAAAAGATGCCCATAGTAGCACAGCCAACGGCGATAGCAGGCACAATCCAAT -GAACGGATGGGAAAGAACTCCAGCCAAACCAGAACAGGCCAATAGGTAGAAGCACAGACT -CAATGCAAACGAAATACAGTCTACCCTCGGGCGAAGTAGAGTGTTTACCGAAGCGCATGG -CGATGCGCTCTTGGTAGATGCTGATGATTGTGATGAGGATGACGCCGACGCACATTGAAG -TGAAGACGGCGCCGATCTGCTCGATAGTAAAGTTGTGGTTTGTGCGGAAAATGAGTGGCA -CTGAGCTGAATTGGAGGTAGAGAACTGCCCAGCTGAAGGAGACCCATAGGGAGAAGAAGA -AGACCACTGGCTCGGTGAGGAGCATGTCTAGATTGGGATGTTAGCTTGTTTGTCTTTATT -TCTTCTTTTTTTCTTCGTGGGAGATGGTGGCTTGGGGGTCGAAGTAGACTTACGGAATGG -CCGGTATAGCGAGATCTGGATCATGGCTGCAATCGATGCCCGTTGCTCGTCGCTCTTGAC -TTTCCATCTGATTCGATGAATGCATTTTTCTTCGGACTCTTCGCTCGACAGGATCACACC -GTGGTGGCCTGCTTCTTCGAGTTGCTCGTAGTACTTGTTTAACTTTGCGGCTTTGCGGCT -CAGCAGGACGCTGCCGCGCGTTTCTTTAAAGAAGACGTAGATCACGATGACAAATACGGC -GGAGACAATTGCATGCGAGTAGAACACCCAGCGCCATGTGGTGCGGGTAATGATTACGCC -TGAGATCAGTGGTGCCAGGCCAGTACCGAACAATGCAGATCCCGAGAAGAGGGCCATGGG -CGTGTTACGGTCTTGGGCGTGATAGATGTCGCTAATCACGCCGCCAACCATAGTGGAGAA -AGTTGCTGTGAAAAAGTCAAGTCAATATCTATAGAGTGGTGAATCGGAAAATCGACCATT -GAGGGCAGGGTATACATACATCCACCGACACCTTGGAAGAAACGAGCTACAAGCAATCCA -GCAAAGGACTGCGTGCCTCCACAGCCAATTAAACTAGCTATTGCACGTATCGAGTCAGAA -AATACCGAGTGACAGCAGTGAATTGCGGTATACATACCAACGAACAGAATCCCACTAGCG -ATAAAGATCGGTCTCCGTCCATTTATCTCGGAAAATGGAGCCAGCACCATCGGTGCCAGG -GCAAACCCGCAACAAAAAATCGTGATGCTCAGATTATACACGACGGTACTGATCCCCCAC -AATTTAGTCAGTTCCCCGGATGCAGGACTGACCTCGCCCGCGGCATAACCTGCTAGCGCA -GTAACCCCGCAAGCAATCCAAGTCATCATCGACTTGCGCCATTTCGGCCACAGAAATGGG -GATGTATATTCCTCTAGACTAGGCGGCTCGGGTGGTGGTGATTGAGCGGGACCGGGAGGT -AAAGTAATGCAAGGCGTCGGTAAGGGCGTTTCTAGCTCTAGATATCGGAATTCGATATCT -TGAGACCCGCATAGAGACTCGGGAGTCTCCTCGCGCTTTTCCATGACTGTCGGTTGAAAT -TTAGGGGACTCTGGTGTTGTTGATGCCATGCAGTGAGTCAGAATGGTTGGCAAGAGTTTT -CTAAGTTTAAACGGACATTTGGTTGTTTATAGGCTAAGCATCTAGGGCGTGGAGTCGCCC -GAAATAAAATTGGAGAATTGTTCCAAGGCCAAGACATTAATAATTCCAGTTTGATATATC -ACTCATGTGGATCATTTGATCTAGGTATCCCATTGCAGTAAATGCTGAGTCTCATGCCGA -GTTACATGCCGTCGATATCAAAAGCCCTAGAAGAGTCCGAAGTAGAAAGTCGGTGGAACT -GATAACGGCCATTCCATGGTTACGTGGGACCCGAGGTCTTGGCGTTGGAGTATACCGCCT -ACTGAATCTGTCTTAGTTCTATCTGTGGAAAGGATTTTCGCTTCTTCATGTATTTCATCT -CAATTGATTTCAAGCAATAAAGTCTCGATTTTCCCTAATTTCGGCAAAGAAAGAGTAAAA -CACAACACTTGGGGATCAACATTGATAATCGTAATATGGGGTACTCCGTATACTTCTCCG -CATGCAACCAGGCATCCACGATGTTTGGTGGAAATTTCCCCAATTTGGGGACATTTGGAG -TTCGGGGAATATGAATTGATTCGATAGTGATTAACAATCAAGGTAGACTCAGAATAAGCC -AATGGTGTATGAATATGCAGGGACAAAGCAGGATCAATTCCAGTCTGACCTGACGTGTGT -GTGTGTGGCACATGTGAACAGGGAACATTAGGCACTATGGCATAGTCCATCCGGAGTTGC -GCTAGAATCCCCCGAGATATCTCACACATTCACTCCACATCCCCAATGTTGACTAGCAAG -CCTTATCAATGTCATCGATGCCCAAGGGCGTTTGCTCGTCTAGAACATCTTCAAAGACAT -GATCGCTCACGTATGGCTTTGAGACCTCGGATTGAATACCCCATACCCGACTAATACCCC -CGCAGATACTAAAGAGAAACCGTTTGTTTGTGTTCAATGTCCCAAGGCCTTTACACGCAA -GTATGATATATCGTCCAAGTAGATGTATTCAATACTTACAACACTAGGGATCTGCTGGCC -CGTCATGAGCGTCTTTCTCACAACCCCGATGCCAGTCCAGCCAGCAATCACACACCGACA -CCCTCCCCCGCAAATCCTCCCGCCCTGGACGAGTTAGATTCATTGGCATCTGCAGTCACA -AACCACGTCCAATCTGATCGCAGCACCTTACAGGGTTCCGGTGACCATTTCCCAGCATTA -TCACGTGCTGTGCACCGTCCGATGCTGAGCGCCCAGTTGGCTAGCACAGACCCTCCGTCT -ATAGGACCCAGTACCCCATCTCCAGGCCCGGATTTTGCATATGCTCTGGGTGGCTTTGGA -CCTGGATCATATCATGGTCATGATTTCACCTCATTCCTGGATAGCGTCCCTTTGCCAAAT -CATCCATTCTCGCCAGCTTATCAGCCACTTCCGCTATTCCCACCGTTGAATTTCTCCTCA -GCTCTGGACTATGATAGATCTTCTGATAGAGGGACTTTGACTGAAACTACTCCCTCGACT -CCCTCGAGCTCTGTACTTCCCCGACATGGCGCTCAGTTGCCATCGCTCCAACCGGAGGGC -TATCACATCTCCCCCAAGGCCCGCCAGCCCACAGGATTCGTACCAGTCACGGCTCAATGT -CGGGACAAGCTCTTAGATATGCTATCCGACTACGCCAATGTGGTCCCAGATCCATCATTG -CCATCTCGGCATGCGCTATCCCGGTGTTTGACCGGGTATGTAACTGGATTCCATGACCAC -TACCCCATCGTGCACATCCCGACTCTCAATGTAGACTCGATGACATTACCACTCTTCCTA -TCAATGGCGGCATTGGGTGCACGGTATTGCCGCGAGCCAGACACTAGCATGCGTCTCTAT -CAGATTGCCAAACCAGTGACTTTGGAACATGTTCGCCGGGTCTTCCAGTCAGGCAAGCTG -CCCACGGTCAACGCAGCCAACGACATTGATACCTTAGAAACCGTGCAAGCTTTATTGTAT -CTGAACTCGGTATCACTATGGTTCATCAATAACCCCCCATATCACGAGGCGCTATCGCTT -CGCAGTCTAATGGAAATGCTCATTCGACAAGGAGGACTCAACCGGCTACCCGAGCAAGAC -GGGACATGGGGTAGCTGGATACGACGGGAAAGCGTAAAGCGTACCAAGCTCATTGTCCTC -TGTTTCTTCAACATCCACACCATCGTCTTCGACGTCCCACCCATGATCCTCACAGAAGAT -TTCACACTCGAATTACCCTGCACCGAAAAGGAATGGCAAGCCGCGAGCGCAGACCTCTGG -CAGGCAGAGCGCATGAACAGTCCCGGCGAGCCAAAATTCCAAGACGCCCTCTCCGCTCTA -TTCGGGCCAACCGACAATGTAGAGCGATTCTCTTCGCTCGGCGGCTACGTTCTAATCCAC -GCCATCCTGCAAGATATCTGGCTAATGACGAAAGCCGGCCGACTACCCGTCTCACGACGC -AACCGCTTCGCCTCATCATCAATGACATCCACACCGGAGCTCGTGCACGTCGAACAAGCA -CTAGAACGCTGGTGCCAGTGCTGGGAGCGCAACCAAGAATCATCCATCGACCCACTCAGT -CCGAATGGCCCTTTATCCTTCACATCAGCAGCCCTGCTTCGACTAGCTTACATTCGACTC -AACGCAGACTGCGGCTCAGCCCGCCAGCTGCAAACCTGGGACCCCGTCCAGATCGCAACG -AGTCTCCGTGAAAATCTCTCCGTGCAACGCGGGGACCGGCTCACGCGCGCAGCCCTCCAC -TGCGCACATGCCCTCAGTACACCTGTCAAACTCGGCATTGGATTTGTCGCACACTCGCAG -GTAGCGCTGTGGTCAAACCAGCATGCTTTGTGTTCTTTGGAGTGCGCGGTGCTGTTGGCT -AAGTGGCTTGAAGCGATTACGGTGCCGGATCCGGAACCGAGGCTGACGGAGCAGGAGACT -CGATTGCGAGATTTTGTGCTCGAGATGGTTATGGAGGTGCAGCATGGTGCTTCGAGGGAG -TGGCTGCTCGCTACTAATACCCGGCTCAGTGCTGCGATTACCCGGTTGTGGGCTCGACTG -TTTACGGCGGATTATATCTGGGAGATGGTTTCTTTGATTGGGAGGTCTTTAAATAGTTAT -GCGGACTTACTAGAACAATAATGGGTGTTTTTGTTGCTGTACTTGTATGGAGCTCTCATG -GCGTTATTCTGCCTCCAACTTTACGGCCATTTAGGGCAATTGACGGTTGTTACTGTCGTT -ATAGTGGCTGTCTAGCTGCTAGACTAGGTGTCCAAGAGCTGAGCTTGAATGGATCAAATT -GCTCATGTCGGTTATACTCGGCACGTCGTCCTGCTGGCCCGTCTGAGACCCCAACTTCCC -ATAGGGCGTATGCACATCGTACGGACTCTGCACAAGGCTCCCCGATGACTCACTGAATAT -CAATCTAGCGGCCGCGGAGATTGAGGACTTTTTGATGTGGCCACGTTCATTCATCTATAT -TTCTCCGAATCTCCAATCCTGATGCCTAACGCCTGTTTTCTCAGGCCCTCCTAGCTTAGT -TAGATCAGACTTCGTTAGCTTCAATGATGCAGTCCTTCTTCTCTCTCTTTCTTCTTCCCT -CCCCAGCAACTTAGGACATACCCTCCTCCCTCAAATAACCAAAACAATCCAAAATGTCCC -CCTCACCCCTCGCAGCATCACCAGAAATTCTAACCCTCCTAAAAGACCTCCACACCAAAT -CCCTAACCCAAGAATCAACTGTTGACTGGAAAAGCCTACCGGAGCAATGCAGCACCGAAT -TTGACTCCATCATGCTGGACAAGTTCATCGCTCTCGACCAAGATAAGTGCGAACTAGTCT -ACCACCTCCTACGCAGCACCAACGCAAAGACAGTCGTCGAGGCCGGTACCAGCTTCGGCG -TTAGCACCATCTATCTAGCGCTTGCTGTTGCAGAGAATGCGAAGCGGGTGGGCGCTACCT -CTAAGCCACGAGTCATCGCCACAGAGAAGGAAGAGTCCAAAGCTAAGTTGGCTCGAGCGC -ATTGGGCCAGTGCGGGCAAGGATGTTGAAGATGTGATTGATCTTCGTGTTGGGGATCTGC -GTGAGACTCTTACTTCGGATTTGGGGACGGTGGATTTCTTGTTGTTGGATAGTAAGTGGT -GATTTCCTTGGGTGCCGTAAGATATGGATATACATGTCTGCTTCCTTCCTTTGTACATGT -TCGATGTTTGGTATTCGATCGATGGCCACTTGTGATAGTAACGTCTATACTAACCCCTCT -CAGTCTGGACACCTCTCGCCCTCCCTGCACTCAAGCTCGTGCAGCCGCACCTCCGACCTG -GCGCTGTGATCGTCGCTGATAATACAGTCAAGGCGGGTGATAGGTATCAGGAGCTCTTTA -CATATGTCGATGCCAAAGGTAGTGGATTCCGACGTGTGACGATGCCGTATGAGGGAGGTT -TGGATATGATTGTCTATCAGTAGATGTAGTCCCTTGAAGTAATCCCCAGCAGCACGCTGT -CGAATATCAATTCGTGTACCCCATGAGCTACATCAACATAGTTCCTTTTCTTTGATAGGC -CAAGCGGGATCCGCCCATATCACAACCCCTCATTCTATCAAATCTCAAATTTCCCAAGTT -TAGGGACTTTGATATCTCCAACATTGCTCCGATTCCACCAACCCGATACATTCATCTATA -ACCATGTCCGAACCCAAGATCCAACTCATCCCACCCCAAAAAGAAGGCCACTTCAACGCA -GCACCAAATCTAAACCCACCCCCAACGCCAGCAACAAAAGACTATAAACTTAACCACCTA -GCCCTACGCATCCAAGACCCAGCCCGCTCACTGCACTTTTATATTGACCTGCTGGGGATG -CGAGTAGTATTCACTATGAACGCCGGGCCCTTCACAATCTATTACCTAGGTCACCCGCCG -GCAGATGCAAAGACCTCAGAGGATATAAGTGCGTGGGCGAAGAGCACGAGTGAAATCCCC -GTTATGACGAAGACCAGTGGACTCTTAGAATTGTATCATGTGCATGGTACGGAGAATGCA -GGTGGTGGATGTGTTTCTACGGGGAATACACCACCGCATCTTGGATTTGCGCATCTGGGG -TTTACTGTTCCTAATGTGGCGGCTGCTGTGCAGAGGTTGCGGGATGGTGGGGTTGAGATT -CTCAAGGATGTGGGTGTATGTTCGAGAGAGGCTGTGCCACTTTCAGAGTGGGAGGAGGAA -AGGGGTATTGGATGTGGTGAAATCCATGAGAATTATGCGTGGTTTTTTGAGAAGTTTGCT -ATGGTTTCTGATCCGGTGAGTTGCTAATGATATTGATAACCCTCCTAGAGGTTTGAGCTG -ATGCGGGTTCATAGGATGGATATACGGTTGAGTTGATTCCTCAAAGTGTGTGAAACTGTG -AGGATCTACTTTCGTACAATCTACAAAGAGCTTTGTTTCAAACAGGTCATGAATGGAACT -TCCATCATGTAAGGTTTGCTTAATGCAGCCTATACACCAAACATGTGATGATACACTCAA -ATGTAACGAAATTACGCGCCCCAGCGGTCATCCTCCGCAACATCGTAGTCCATATCCCGC -GCAGGAGTGGTGTTGGCCACCGACGAAGATGACATGACTGTGTCTTCCGGTTCGTCCTGT -CCAAGACCACCCTTGAAATCCGTCGATGACCCAGCGGCGGGAAGTGCAATGGGAGGGTTT -GCCACCCAAGTGAGCTGGACCTCGCCCACGGATGGGATGTTCCAGGGAGAGAACATGAGT -TTCTCCGCCTGGAAGCGCTCTTTGAATGCCACTATGACGGTGTTGGGCTGATCTGGGTGT -GCCTCAATAGATTCGTATTCGCCAACGCCCTAGAACATAATAGATTAGTATATGCTTTGC -TTTGGTCAGAAAGATGATAACATACCATCAGGTGTTGTCGCAATGCCTCGTCCTTGTCTG -AGTTGAGCTCAACACCCGATACAGCGACCCGTCGAGGCCGGTTATCCAGACGCAACACAC -CTCCTCGTCCCCGAGCCGCAAATCCGCCACGCCCGCGATATCCTCCGCGGAAATTGGGGT -CATAACCGCCGCGTCCACGGTAGGTGCCTCGGCCGCGGAAGGCTCCACGACCCCGATATC -CACCGCGTCCAGAAGGATCGGCTGTAGGGTCGAGGCCAAGGCTCTTTGCCTCAGCCTCGA -GGGCCGCCAACTGAGCCCGCAGTTTTTTCGTCTCCTCACTGATATTCTCATCGCCAGCAG -GCTCGACAGACATGGCGTCCCCAGAGTCGGCCGCATCGCTCATATCTGCTGTACCTCCAA -GCCGCTGCATGAGACGAGCCCGCTCCTCGTTCTGTTTCTTAAGAAGTTCGTCGCGCTGGC -GCTCTAGAGCCTGCTTGGCCGCTTCGGTCTCCTGTCGCTTCTTCATTTTCTCTTCATGTA -CCCTCTGTGCTTCTTGTTGCTGCTTCTCAAACTCTTCTTGGTTGAACGCTGGGGCCTCAG -GTTGTGCGCTCTCGTGCTTTTCCTCGCGATTCGGTTTGTACCAGTAAACCTTCACGAAAC -GGTTATCAAAGATGACCTTGGGGCTGGACCACGCGGCCTTGGCTTCGGGGAAAGTCTCGT -ATTTGACCAAAGCAATCTTTTTGTAGGGTTGCAGAGTGACCTCGGTAATGTTGCCAAACT -CAGAAAAGAACTCGCGCACAGAATCCTCGTTGAACTTGTCATCAGGAATCTGCTCAACAA -CAATCGTCGTAATAGACTGATCCTCATTAGGTCCTGCCGATGAGAAGTCGGATCGGCCAC -GTCCGCGTCCGCTGAATCCGCCGCGCCCACGGCCGCGGCCACGATCACCGCGCATAGGAG -CACCGCCACGCTGTGCATGCATGCCCGCAGTCTTGGGGTCATATTCTGAGACTGTTAGCA -ACGGCATATCTAGTATGGATGAATTTCTCTTACCATCGTCTTTTGAGGCACCTCCCTCGC -CGTGCTGATAGGGGCAAGTATTTCCTAAGTAACAAATACCTTGGGTATCATAGAACGGAC -ATCGCTGGTTGCTTTGTGGGATCTGATCGACTCCTGGCTGGCCGGGAGGACCAGGCATCC -CGGGCATAGGCATACCCGGCATCTGCGGGAATCCCATGCCTTGCATAGCCATCATTGCTG -CCATAGGATCGTTCGGGTCGAAGGGCGGGAAACCGGGCATCATCGGGAAGCCACCAGGCT -GCTGCAGAGGGAACTGTTGGCCCTGCTGTTGTGGAGGAAATTGCTGGCCTTGCTGCTGCG -TGGGGTATTGTTGACCTGGCTGCATCTGATGTCCACCCATAAAGTCGCTGCGACCGCCGC -CGCGACCGCCACGCCGTGCAGTCTTCATGGCACGATTTTGGAACCCACCATCGTCATTCT -CCGACTCCCCTTGGAATCCCTCGTTAAAAGTTCGTTTGCGATTGGCACTGCTGGGGGTGA -CAGCAGCCCTCGGGCCGTTGGGAGCATTCTCAGGAGGCTGTGACGAGGAAATCTGGGCAG -CGGGTTGGGGCTGTCGCTGAGGCTGGGGTGCAGGTGCGGGCGGAAGAGGCGCGGGTTGTT -TGGGGCCGAAGGTTGTAAAAAGCTCGTTGACAAATGGCTGCGTATCTAGGAGATGTTAGT -AATATGAAACAATCCAATGCGATTGAGACATCAGACGCACGTTCGCGCAGGAAGTCCTCC -AAGTTTTCCACCGAGACCTTTTTGATTTCCTCGGCTGGCGCATCAGACCTAACTAGCGCT -AAAACATAGTCTGCAAGCACATCCGAGTCTGCATCAGAGCTATATGCAGAACCAGCATGA -TTAGTCGCTGCTTTCTTCCTCGAGGCCCGTGTGAAGACCTGATTCCAGTACTTGACCAAC -TCACATGTCCTCCAGTTTTTTAACCACCCATTTCTTTACATCGGCGGCTTCTGCATCGGT -GAACTGCATCGCGAATATGATTGTCAAGCAACTTCAAACTGTGAAGGTTGAGTTGCGTCG -AACAAACAAATGACGTGAAATGGCGATGATTGCAGGGATTTGGTTTCAACAGAGTCGTCT -CTCGGAGAGATTCCTAACTATGCAGTGATAATTGTGTGCGGGGTGAATTCACACCTTTGT -GTTTCAAATAACAAAGCTTGAGAATAAGTTAATTTGAGCTGATAAGCATTCACCCCCAAC -CGTGAGGGAATGTTAGAGTCATGACTGGATACGCGAAGTCGAAGGCAAAAACGCGATTAC -CGGGATCACGTGGTTGAATGACATACCCAACTGCTCAAGCCGCTCAAGCTGCTCAAAGGA -GCTCATGACTGAGGATTGTGATCGGATTCCTTGTATTTCTCTAGTCTTTAGATCTACTCG -CACTCTACAAATGAGCCTACAAGAAAAGCAACTGTGCCTAGCATACCAAGTTTATTGTCA -TATCGAAACTGGTTGCTAGACCACAGCAAAAACATACAAGCAAAAAGCCCTCAGTAAAAA -AGCAAGATGTAAACAGCAATGAATGGACAGAGCCAATGATGCCAGAGGCTTCTAGGCTCA -TGTCCAGATTTCTAAAGACGGAACGATCAAAGCAAATACTGACGAATGAGGGAATAAAAT -TTTGTTGAACCGAAGAAATTGTATTGAATTCATCAATCTCATGAAAGATTCCACGAATGA -CTGGCAGAAGGGAAATTCTGGGTGGCCGGGAACATTTTTACAGCTTGTCGTAGTACTATA -GGACACAATTAGTGAGTGGGACCAATGATGTTGAGCAAAAAAGTGACTTACGTTTTCGTA -ACCGGGAACGACCTCCTTGAGACCCTTGCGCTTGCGCATCTCCTGGACGACCTGACCGGG -CTTGGTGGTGGGGTCGAGAGGAGAACCACCGGGGAGGATAGCCCAGTGATCGAAGACGGA -CTGGGGGAAAGCCTGGCCACCGGTAGCGGAGCGCAGATCGGCAGGGAAACCGAAGGACTC -GTTGACGGGCAAGTAAGCCTTGACCATGAACAGAGGGGTACCGGGACGCTGCTCCTCAGT -GTAGACGTGACCACGGCGGCGGGTAAGGACACCGTAGATACCACCCATAGCCTGCTCGGG -AACCTGGATCTCGACGTTGAAGATGGGCTCGAGGAGAGCGGGGTCAGCCAACATGGCAGC -GGCGTACAGAACACGACGAGCAGTGGGGATCAGTTGACCACCACCACGGTGAATGGCATC -AGCGTGCAGAGTAACATCGAGAATGTTGAAGCGGACGGAACGCATGGGCTCCTCAGCAAC -GGGACCCTCGCGGGTAGCCCACTGGAAGCCGGAGACAACGGAATCCTTGATTTCGTTGAG -GTACTGGACGGCCTTGGTCTGGTCGACGAGCAAGTTGGCACCGGTGGTGTCGGGACCGAA -GCACCAAATCTTACGGGCATCGGTGACATCCCAGCCGTACTCGTCAGCAAGGACACGGGC -ACGAGCCTTGAAGTCGTCACGGGGGCTGATCTTGCCAGACTCAATGGCGAGAGAAACCTC -CTCCTCAATGGGCTGAGCAGTCATGTACAGACGGTTGTGCTTGTTGGGCGACTTGGACAG -GGCAGTCATGCTGGACTCGCCAGCGACGGTCTCACGGTAGGCAACGACGGGGTCGGAGAT -ACGGAGAGGAACACCAGCGTGGTCCTCCTCGAGATCCTTAAGACAAATCTCGAGGTGAAG -CTCACCGGCACCGGCAACAATGTGCTGGCCAGACTCGTTGATCATGGTCAGGACACAGGG -GTCGGACTTGGAGAGACGCTTGAGACCCTCGACAAGCTTGGGCAGATCCTGGGCGTTCTT -GACCTCGACGGAGCGCTGCACGACGGGGGAGACGGAGAACTTCATGACCTTCATGTTGTG -GGCGGTCTCATCGGTGGTGAGGGTACCGGACTTGAGCAGGAACTGATCGACACCGACCAG -ACCGACAATGTTACCGGCGGGGACATCCTCAATGGGCTCAACGAAGCGACCCATCATGAG -AATGGTACGCTGGATGGCCTTGATGAAGAGATCCTCCTTGCGACCAGGGATGTAGTTGGG -GCCCTGGATGCGGACCTTGATACCGGACTTGACGGTACCGGCGTAGACACGACCGAAGGC -GTAGAAACGACCCTTATCGGAGGTGGGCACCATCTTGGAGACGTAAAGCATCAGAGGGGC -AGTGGGGTCACAGTCGCGGATACCGATGGCGGCACGGTCGTCCATGGGGCCCTCGTAGAG -AGTCTCGGCACGGTACTTCTGGGCAGTGACGGGGGAAGGCAGGTGGATACAGATCATCTC -CAGCAGGGCATCAGCGGCAGGCAAGAACTTGCGCATGATGAGCTTGAGGATGGCCTTACC -CTCGAGCTCCTTCTCATCGTTGGTGAGAGAGACCTCGAGCTTATCGAGGAGGGTGAAGAT -CTCATCGCGCTTGTTGTGGTTGACAGCGGCGAAGATCTTGAAGATGGGGTCCAAGATGAA -CTGGTTGAAGGCACGCTCAAGGGTCTTGCCCTCGTACTCACCCTTGGTGGTCCACTTCTT -GGTCTTGGGGTTGAAGTAGTTGTCGCCCCAGAGACGCTCAAGCATCTTCTTGCGGTCAAC -ACCGAACTTCTTGGCGTAACGGACAGCGAACTGGCGGACAGTGAAGCACCAGCCGTGGAG -ACCGGAACCGAAAGCGATAGTACCTCTATCGGGGTAGACCTGGACGTCACCGAGAGCCTT -GTCGAAGTAGGTGGAGATGATGACGTTGACGGACTCAATGGTACGGGAGAAGGACTGGTA -AAGATCCTCCTTGGAGACCTGGAGCTCGAGAAGAGCACGATCGACCTTGTTGATGATCAG -AACGGGCTTGATGCGCTCGGTCAGGGCCTGACGCAGGACAGTCTCGGTCTGGACACAGAC -ACCGGAGACACAGTCGACAACGACAAGGGCACCGTCAGTGACACGGAGAGCGGCAGTGAC -CTCAGACGAGAAATCGACGTGACCGGGGGAATCGATCAAGTTGATCAGGAACTCGTCACC -ATCGACGGCCTGGGGAATCTCCTTGAGATCCTCTGGGTCGGGGAACTTGGCGTAGAGGGA -GATGGCGGTGGACTTGATGGTAATGCAACGATCCTGCTCATCAGGACGGGTGTCCATGAA -ACGAGCCTCACCGGCCTTGGCGGCGGAAATAATACCGGCACGCTGGACAAGGGCATCGGA -GAGGGTGGACTTTCCGTGATCGACTAAGACACAGAGACAATGTCAGAATCAGCCATTTCA -CTATACATCATGCCATCCTTGGTAGGAACTTACCGTGAGCAATGACGGACATGTTTCGGA -TGTTAGCCCGACGGTCCATCAAGGTCCGGATCTGATGTAGGTTAGTATCGACACTCATGA -TGATTTGATGTTTCTCTGTTCCAAGAATCCAGGTTTTCTAGCATCACCAGCGGAAATGGT -AGATGGAGATCAGGCGAGTGAAGAGCAATCGAAGGGAATCACCCTTCCACTGGTAATGCT -AGAACCTTGGGAAGCTTTGGGACAATCACTGGGACAATCACGAACCTCCTCAATAGTGAA -GCTGTTCATATCATGTTAGCTGTGCATTCTGAGACTTGTGAGAAGATATCCTTACTTAAC -CATTTTGGCGGTTTATCCTTTTTTTCAAACTAGGATAGAAAAAAAAAGAAGAAGAGCACA -CAGATGACTAGGATCGATATGATCGATCACTGAATCAAAAACTTCTATGTGCAACTGCTC -CTGGACAGAGGTGggaaagaagaagagaggggaagtggaaagtgggagagaaaaaaaaga -aaagcgaaagtggGGACTTTTTTTCCCTTCCGGTTTTGTGCGTTTGAGCTGACTTAGCCC -TTTCTCTTTTCTAAAGATTTTAGATTATCTTTTTTTTATTGCTCATCTTTGTCTCATCTA -CGGTGTACTTTTCATGGCCTTTCTACTTTGATTTCAAATCCATATTTCATGTCTTGACTC -ACTGGGAACCTTTTTTTTTGGCCACCCGTGGGTTCTCCGGTGACGTTTGACAGCATATCA -CAAGCGAGAGGTGTTCCGAGTCTTTTCTACAGCTATACCGATGATATTTATAGTCTTCTG -TTCATCCTAAATTCAAAAATGAGTCAATCCAGAGCGAATGATTCTGCTTCAGCTTCCCAT -ATTATCATGTAAATGACTTGACTTGCCCCCAACCCCTCTATCAACTTGGTGGGGCAATGT -ACATTAATAATCAACTGATTCTGGCCTCAGGCAACTTCTGAGCTCAAAGTATGGTACTAT -ATACTCCGTAGCTATACACCTATACACTATCTTTGAATTGAATTAAACCTAGCTCCAGAA -AAATATCAGTCCTTACGAGAGAAAAAAAAGAAAGCAAAAGAAAAAGCTTTGCTTGTTCTG -CCTTGAGCAAATTTTCATCACTAGCTAGCTGGCTCGTGCATGCATTAGTGGGGGGTAAGA -TAACTCCTCATTTGAAGGCACCAAGGCATCAAATCATCCCACCTGCATTGTACACAGGAA -AATGACACCACCCTGAAAGCTGCATATAGAGTCAAGGACCAAGCCAAGACTTCAACCTAA -CGCTCGACTCTACTACCACAAGATAGTGGATCTCAGAACAATAAGCCGGTATTCTAGCCC -GAGTATCCCGGTGCTCCCGGCCACCCCATATATGTTGTATGGAATCATGTGATTCATGCT -TGTATTGACCAAAAAACCTATGGCAAGACTAGGGGGCCTCGATCTAACAGTAAACCATAT -GGCCTTTGAATTTCGTTCGCCCCCCCTAGAATTCAAGTACAATTGAGAAAAGAAAACATT -GGCCCAAGCTCACAGGCCGAATACGTAAGATTGCCACCCTGCCAACAAATCTAGGTAGTC -CATGGGTGAACAGCCAGCTTATCTAATTTCATTGATGGCCATATTTGCGATTCAGGTGGT -ACCAAAGCTGTCTGTTCTCTTATATGACCCTGAAGAATACCCCACCATTGCTCCTGTCAT -CTTCAAGGGCTTTTTCTAAGATCTATTTGGCAACTGGTCGTCTTTGTATAAAACCTATCA -ATCCAAAGCCATTTAAATGAACATGAAACTTCCTGTGTATTGAAGTGTATGACCTGCCCT -CCTTTGGCTCTTCGCTATGTTTGTTGCCTTGGAAAAGCCGCCCAATCTGCTATAGTACGC -CGCTGAATGCGACTGGTATGGGATCATGTTCAAATATGCTATAGAATTGACATATTATCT -GTCACCATATCAATGTTGGGTCTGATATAGCAAAGCTGCAAATCAAGAGGATAACGCGGG -ATACTAAAGATGAGTAGTTAATCAACTAACTTTGAAAACTATTCACATTTGATTATACAA -CAATGCTATATATGTATCAATTTTTGCGTCCTTTGGTAAAACCTATAATTTATCTTAGGT -CTGCACTATAGGGGCAACTATGGGGGCAAATTAAGGTTGACGTTTGGTCTTAATGCTCTA -TTTACAACCGAATATATATACCCCGTCAGTCAAATCCAGAACCGCCACAAGCTAATTCCG -GCGGATCTGCCAGAACAAGCAAAAATAAACACTGGAACTATTTGTTCGACACTATGTGAT -TGATCTATGAAGACCATCAAGATGATTTGGCTCAGGCATAAGTGCTCATTTACCTTCGTT -TTCTTTTGGTTTTTTTGGTGTGAACACCGAACTTGCCGAACTTGCCCTGAAAGACACGAA -AGACCGTCTTCTAATTATTTTGTATGTGCTCCCTACTCCGTACAGATTTTTAGAATGTCA -CTTAATATGTAATAGGCGTCCTACAACATAAGCCCACAGTTTTAACCAATCCCCAAAAAA -GACCCCAAACGCTCAGAACACCGGGCGCCACCCAATCGCTAAAAATTTCCTTTTCATAAG -CGAATTTTATTGCGACTAACCTGGGTTATGTTGCCGGAGCTAGGCGACGATTTTGCCTAA -GCGAAAGGCTTAGCGCCAATATGTCTCAATATGATTTTGCCCCTATTGAGTTAAAACCCT -GATAGTTCCTCCGTACAAAGACGAGCTATAAAGTATATCCCCGCGCTTAACTGATCGACA -TGCGTAGTGGGTCATGGCTCTGGGCCCTACTGGGCGCCTCTTTTACAGATGCTTTGTCTC -TGGAAAAAAGAGAAAAGCCTGCTGTGTTCAATGTGCCATTGAGACACAAACAAGTCGCAA -ATTCGGCTGCTAAGACCCAGTCAAAGAGGGAAAATACAGTCCTTCTTCCAATGGGCAATG -ATGTATAATGCCTTATCTTCATGCCGATATTTACTTCTACTGACTCCCACCTTTTCAGGA -AAATATTTTTTACACCAACATCTCATTAGGCACCCCACCGCAGAGCATCGATGTGGCATT -TCGGACGGTCGGCAATGAATGCTGGGTGCCCAAAACGACTTCGCGTGCTTGTGGTATGCT -TCGAGGCGATAAATACTGCAGCGGCTCCGGTGGCTATAACAAGAGCCTTTCCACTAGCAT -GAATGATCTGCATATCAATTTCACCCTTAGCGAGTCGATTGAATCATTCTTTAGCATCAC -TGGGGATTTCGTGACCGATACTCTTGTTATTGGAAATACAACCGTGAAGTCTATGAAGAT -CGGGATTCTAAATGTGGATGCCACCCAGAGTAAGCCCACACGTCTAAACAATCATATTTG -TGGTAAACTAATCCCTTGAATAGACATGCTCGGTCTGGGATATGGCGAAACGAACTCATC -TTTTGTCTCCCTTACTCAAGCATTAGTTGATGCTGGAACCATCAAATCCCCGGCCTTTAG -CATGTATATGGAGAATCCTCTACATTCTGCATCCTCGGGAGATTTGCAGGATAAGAAACC -GGGCACTCTGCTTTTTGGTGGTGTGAACAAATCCAAATACAATGGCACACTACACACGCT -TCCCATCGTCAGCAACCCTGCCGATGATAGAAAGACCTTCCGGGTCAACATGACGAGCTT -TTCCATCAACAAGACATCGGTATTTCCCGAGGGGCTTTCGACACAAGCTCTACTTGACTC -CTCGTTGTCTTATACCTATGTTCCAGAGTCCATAGCACAGGAGCTATCCTCTCAATTGGG -CACTGAGATCTCAATGTTCGGGCCAACTCTGATCCCGTGCAATACAACTTCTAGTGATAC -AACGCTCACGTTCGAGTTTGGAGTTGCGAGTTTCAAGCTAGACATAGCTCTGTTCATCGA -AACACATAGCGTGCTCAATGATGAGGACATCTGTTACCTGGGAATTAGCACGAAAATTGA -TACCAAGGATGCGAACAGTGTTGTGCTTGGGGCAAATTTCCTTCAACAGATCTACACAGT -CTATGATTTGGAAAACGATGAAGTCTCTTTGGCACAGCGGGACTGGCATTCCAATGAAGA -TGAAATTTTAGAAATCACGACTGGCAAGAATGCAGTCACCGAGGCTACATTGAAGCAAGA -TGACGCAGATGACACAGACGCCAAAGATGACAAGGAGCAGATGAAGTCCATCGGCTTCCG -CATTGATGAAGGTGCAAGCTTCCGCATCTCCCTTTCCACCTTTGTGATCTGGGCTCTCTT -CTGTTTATAATTTGATGTATACTAGTATGTTCTTTTCACTTGTATAATATAAGGCCTTGC -ACCCTCTGCTGTGCACATATGTTCATAATCATTTCTTGGTTTGGAATTTGCCATCTTGAT -CCTGAAGCTATTTTAACATATTCGAGACTTAGATAATTCAAAGCCGGGATAGTAAATGAG -GCGTAGCATTCCCCTCGAATGTCTCCAAACCAGCGACAAATTCATTCCACAAGCTCTTCG -ATGTCATAGGAGCATTAATATAAGCCTGCAGTCCTGAAGCTTGCATGATGTTTTCCAAAT -CTGGGAACAGGGCATCTGCCGCCTGAGATTCCATCTCTCTTTGGATCTCTCTACTTAGTC -CACCTTGTTCGGCATACCAGGCAGAGTCCTCCTTTATGACACCAATCGCGTAGAGATCAA -GCAATTTTGAATCAATATTGGCTTCCCGGGCGGCTTCAAAGGCCATTCAATGCCCGATTG -CTTCGACCAACGGAAGAGCAAGTGGCAGGATACTCCGATTGAACTGTTCGCCCCGATGAG -TTCCCGTGGCCGATTGTAGGAGAAGAGCTTTCCCTCAGAAGATATAGAGGTTTCATGGCG -AGAAATAGAACTACTGGGGTTCTTTGGCGGTGGCATTTGATACCTACCTATTAATAACTC -GGATGCGACTTCTGCGTCATATATGTCAGACTTGCAATTCTCTAGAGAAAATAAATGTGG -ACGTTGGTGGGCACATACTGATGGCAAGAACCCAGATATCGCCTTCTACGGTTGACACGC -CACGCGTTTCTAACTGCAAATCGCCATACAACTTATTTTTCTAGCATCCTAACAAGGTTG -CTAAACACTCACCTCGAGTTGAAAAATTTGATTATTCTCAAAATGCCCATGCCATCCACA -TCGCTCATTCATAGCCTTGATACTCTTACCAAAATGAGCTAGAGCTGCTGCTTTAAAGGC -GATAGCAATGCCATGACGGATTCGTGGATCGATGCTCTTTTTCCGAAAAGCTTCCGCCGA -GGAGATATAGAATGGTTCCAGAACGCTATATTGAGCCACAGAGTAGAGAATAGCAAGATG -TTGTGTTCGGAGCCCAATGATTGCTATTGGTTTTCCATCTTGTCCACTAACCAAGCACTT -GGTGAGAGATATACGGTCTATCGATAGTGAGATCAATTGGATAGATATACTTGTACTAGG -CTAAACATACCTTTTCTGATATTTGGAAGCAAGCTGTCTTGATTCTGTGGCACATGGCAC -AGGAAGTGGGATGTGTAGCGAGCTAATATCCCTATCGGGTATTCATGTAGTCGAGATGCA -GCAAACCGATGATTTTGTGACGCACTCTAAAACTACGAAAGCATATGTATGATTGATGGC -CCTCTCAAACGCAATTACCCGTCGGTCTAACAATCCCTGTTTGTACGGCCTATCATGCAC -TCCTGGGTGAGACTATCCTCGCATCATCCCTTCACCTTTCCAGCCGTACTTTATCACGGA -TGGTGGTACGGAGTACTGGCCCCCTATCGGGCATACTGGTTGTTGTGTTTCCTCAAGGCA -ATGTTTTATGACACACAATCGTGAAATTTGCCTTCCGGTCCCTTACATCTTACTAGGCCT -GCACAGAAACAAACTAGGATGACTCTTCAGCCCCCGAGTACCCCTGATTAGCAGCAACAG -GAGAGCCTATCATCAAGTTTCTTGGCGTACATTTTATTTTTCTCGCAAATAGGTAGATAC -AACATATATTATTGATCAAACTTTCCTAACCGCCGTATTTAGAGTCCTGAGACCTATATT -ATAGAAGGGTGATCCATAAGATCCCCGACATGAACCCTATCAGGGCTTGAATGAAGATCT -ATAAAACGAAGAGCCCTGACAGAGACTGTGGCTCTGTGACATGGCCCCATCAGAAGTCTT -ATAGTGAAGTCTCTAGGCCCAATCTACCAGATCATCTTCCAATATTCTAGAAATTGTTTT -ATCGCCACTTACCTAGCTTTATTGCATACAAATGGATATTAAGTGTACAGGTGAACCTTG -CTATCTGATATTGCATAACTTGGGAACACTGACCAAACTTGCATATAATTTAACACATTA -ATTACTTCTTCGCACTAAGCTTTAGTGTATTATCTATGCCTTTAATGAGTACGTACTCCG -TACATCCACGGATATGTGTAAGAGCATTCGCGGAGCAACCTTCACCTAAGAAACAAGCGA -TAAGCGATAAGGCCTAACCCCATCTCAACCTGAAGGCTGCCTCTAGGAATCATCGAATGT -ACTTGGAATACCATGCTCAGGGAAATGCAAAGCTTTAATATGCTTGGTAGGAAATGAGCT -TCCAGACCCAGTGGACAAGTTTGAATGCTTCTGAATGGCGGTCCAAATTCCCGCCTCAAA -AGGAGTAGGTAGACAAAGCGGTCAGCTCCTTGAGATCACGATAAGGGTGTACATATCACG -TGCCACAACATCGCCAACGACATCTGGCCGCCGGTCTTCATCGCCCTGTTGTTCAATATA -TCCGCACGTCTCAAGTCTCATACTATATCAGTAAGAAGCTGTGTGAATCAGACTTTAGGC -TCTGAAACATTGGCCTCTGCCTTGGTTCACTGTCAAGACCCGCTTGTTCCTACGGAGCAT -TCCATATTGGAAGAGCAAATGGAGTGACTCGGGGTGTCTACTCGATTACATGGACTTCCG -GGCTTCTCAGGTGGTGGCTCGTATTTCGATTAAATAAGCGACCAGGTAGGTCCCAAATTG -ATTGATGGCGTGACACGAACGCTTTCATCCTCTGCTTTTCTCCCATCATCAGATTGACTC -GCCTTTGTAATCCTGAAACGGAATGGCTAACGAGTCTCCCAGTCTGCAAAACCTAATTGA -GGAGCTCGGGTTTCCTCCAAGCGAGTACCCATTCCTGCTTCGTCTTTTCGGGTCCTCCGA -CCCATTTGCTGGGCTTCCTGGTACAGATCTTGAGCCCAGCCACAACCGAAATCAATCTGG -CCAACTCAGTTCTACTAGTATTGCTGGCAGACTTGCATCTACTAGGCTCTGGCAAGCTAT -TCCTCCGCTTCATGGGTGAGTTTCATTGACTAACATATCGCTCTTGCAGTGAATGTGCCA -ATCAGTCTCGCCTACCACTCACCCACCGTCGCCCTAGTGGCTTTCGCGAAGGTTTAGAAG -GCAGAGATCCAGTCCACTACGAAGAAGAGTGGACTTCTCTCATCGCGGCCGATTCAGAAT -CTCTGTTCTCCGAGTTACTCCAACAGCTTGTATTGGACGACGAGCATTCCAGGCTTGAGT -CCTCTGAGCTTCCCGAGCACTCTAAGGCATCGGAAAACGTTGCAAACCCTGGACCCGTTG -GACCTCCAAGCCCATCAGTCTATGACAGCGAAACAGGTGAGCTGCCCGACTCTGTTGTTG -ACCCCAACGAGACCCATGGAGCCGAAGGCAATTGCAAAACACCCCAGGACCTGAGGACTT -TGAAGGAAACTGGAAGTTTTGAGAAGCTACTTCAATCATTTCCAATTCCTCCCGGTCGCA -ATCAAACGGTCGCGGGCCAAAGTTGCTCCCAAGTTTTATCTAACTCCCCATCTGACTCCC -AACGAGTGCTACCGAAAGACTGTGCCCAGGCTAATACGTCAACCGTCGCCAGTAGTCAGA -TCGAATCTGTAACCAGTCTACCTCGAATCTCCCGATTCTCTGAAGAATTGGCTGACAGTC -ATCCGATCGGAGAGCAAAATACTCCCTGCATTTCTGATAAAGCCGATACATCTGCACTCG -ACACTTCAAACGTCGTCAACACTCGGAACGTTTCCTTTAGGATTCTTTCATTGCCCCCTA -GAACCTCTTCCAGAGAGTTACGTCCCCCCACTGGCTATCACAAGGGACAAGAAGCGAAGC -TCGACAGCCCCCATCAGGCCCAGGATCGTGCGAATAAATCCATCGCTTTTCCTGAATACC -CCCCGCGAGTCCGCCCAATCAGCTACGTGATCAGAAGGCAGCGCTCCTTGAAGCCTATCC -TCGAAGGGCGCCCCACACCAACAGCTACCCCTCGATATTACCAAAAGACGCCTCTCCGTA -TCTCTTCTCGTCCGTCTACAACCTCTTTCGCCTCGGGAAAAACGAGCACGCAAAAGTGCA -AGAGAGTCTTGAGACGTTTTGTTAAGAAGCTCAAACGTCTGCTTTGTTAAACCCGCAAAC -ACCCGGGCACATCATCCACTCAATCATTAACTCCACGCCGGGATATCTTCACCTTCACTT -CAAATCTGACTCACTCGACCCCCATtcttatctttactttcttctcatttttacacctat -cttatcgttaaggctatcttatctCCTACCTGGGTCATTTCAAGGTCATCTTCATTCACA -ATGGATTCTCGAACATCCCGTCTCTTGGACCCATCCTCTGCAATGGCGGCCATCACCAGG -CACAAAGCAGAAGCAATCAAGCTAGCTCGTGAACAAGGTGTGGCCGTTCAAGAGATGTGC -CGCCGTGCGAAAACTGAGATCCCACCATACGAGTTCGAAGAGTTGATAGGCAAAGGTGCC -TATGGTCGTGTTTACAAAGGTCACCAAAAACAATCCGGCCGGTTGGTTGCCATCAAAGTC -CTAGATATTGACTCGCTGGATTACAAATCGCTTCGAGATTTCAGAGATGAGTCGATAAAG -GATTTCATCCATGAAACTAAAGTGATGAAGCAAGTGAAAGATTCTGGTGCCAAGAACATC -AACGAAATAATTGAAGCCATCTCCATCCACTCCCAATTGTGGCTGGTTTGCGAGTACTGT -CCCGGTGGCAGTGTCCGCACCTTGATGCGTGCAACGAACGACAAGCTTGATGAGAAATAC -ATTATTCCAATTGCTCGTGAGCTGGCTGTTGGTCTTCGTGCTATCCACGAAGCTGGAATC -ATTCATCGCGATGTGAAAGCTGCAAATATTCTCGTTCATGAAGAGGGTCGTCTGGAAATC -TGTGATTTCGGTGTTGCTGGTATTCTCCAATCACAGCGCGACAAGCGATCAACCTGGATT -GGAACACCACACTGGATGCCGCCGGAGATGTTTGCAACTCGCGGAGAAGCCCATCTTTAC -GGGAGTGAAATTGATGTATGGGCGTATGGGTGCACATTGTTCGAATTCGCGAACGGAAAC -CCCCCAAACGCGGGCCTACGGGAACGAATGCAAATCGGTCGCCAACTTAATCGCAAGACA -CCCCAGCTGGATAGCGATCAGTACAGCCAAGGCTTGAAAGATCTTATTGCCTACGCTCTT -GACTCCAATCCAGACACACGGCCAACAATGGCAGAGATCCTTTGCCACCCCTACATTGCC -GATACAGATGATGAATATCCCACCACCTCTGTGAGTGAGTTGGTGAGAAACTACTACCAA -TGGTCTCAGCGAGGAGGCCAGCGTATTTCTTTGTTCCACCCAGGTGGCGCCCAGGCGGCA -GAGCTTCCAGGCATCGAGGAGTCAGAAGATGACTGGAATTTTAGTACAACAGATGGGTTC -GAGAGAAGATTCTCTGTTATTGACCTGGATGAGATTGCCGCCTCGTTGGCGGAGATGGAG -CAATCAATCAGTCCTGCCGATACAGGCCCTGAGCAGGACATGTACGAACAGTTCTCGGAG -GACGATCTTAACTACGAAGATAAGGCAAATTTCGATGAACGTGTGCGCCGAGGGGCCGAG -GCAATGGAAGGCCTGTTTGACGAGGAAAAGCCCAGCTATACATACGAGACAAAAAATGAC -TTTGTCCCCATCGAACCCGCCCAGTCTACCTCGGATCTTCCCCTCCGTGCGGAAACAGAC -CGATCTTCCGTCACGTCAACTTTTCTTGATATCGACATCGGGTCATTCGAGGCCTCACAC -TATGCAGCAGGAGCTCCATCTGCGCAGCCTTTCCAATTGGTCGATGCAAACACTATCCGT -GCAAACCGTTCCAGTCTTCGGTTGAACAGGACCTCCTCTGAACACAGCTCAAAATCATCT -AATAGCAGTGTGGAGGATGATCAAAACCTTGAAGATACATTTGTGCCGTCTGGTCCTCGA -CCTCCCACCATGGATTGGAAGTTCCCGTCATTTGTACaagatgaagaggaacaaccggaa -gaagaaaaagaagaggaGCCTCCTGCGAAGGAGCCCATAACGGAGGAGTCTTTCCAGGCT -GAGAAGCGAGCCACCATGCAGTGGACATTCCCAGTCATGGGATCCGTGCCCGAAAGTCAA -ACTTATGATGACCGACATGACACCCTCCGGGCTCCACTGCCCGAGGTGCAGCCTCCACCG -CAGTCGCAACCACAGTCAACAGATGAACCGGGTGACTCGCGTCCTTCAACTTCGGCATCG -AATGCCTCCGACTCGGATTATGATCCCTTCCGCTTTGATCGCCCCGATACCCCAGAGCGC -GCAAACTCACTCCACCACAGTCATTTCTTCGACACAGAACTCCCCGCGATGATGGAATTA -GCCGGTTACAATGAATATGAGGCTTCCGGGATCCTCGATGGCCCGGGGCCGGACGAGGAA -TCTCACCTCACATGGAGTGCGCACTCAGGGACTTCGTCGTTTGAAGACAGTACGACAGCT -TTACCCACGGCCGTCCCAATTCCCCAAACTCCCACTGCTTCTTCGGTGACTTCGGGCCCG -GTCTCCGAACCGGGATCTCCAGTGTTCGACACTCCCGTGTTCGAAACTGTACGAAATGTA -CGACACGACGTATCTGAAGCCTCCTTGCCACAAGCAGGTGATAAGCGACCAATCTCCTTC -CCAGCTCTCAATCCTCCAAGTGCAGCGGGTCTGATGGAGGGTATGGATGACGGTGTCGTG -ACAGCGGAACTTGACCGCCTGCTGAGCGATTTCCTAGAGTCGCTCTCGGCGACTGGGGAG -GCCATGTCACGGGTCGGTATGAAACATGAGACGAATGGTGTTCATGTTGCTGAGCAGGTC -TAAATGACCAACATGGGGTGGGATATCATTACTGGCAGATATCCTTAGTAAGAAAGCATT -CAAGGAGGCCCAGATTTGTTAAGATTCATGTTATTATCTACGATTCCAGGAACTAATGTT -GCGTTCACTTATATTCAGATTTCCAAGGCTAATGTAGGCACTACTTTTGGCCGGTAGAGG -CAAATATGTACCTTCTTCGATGCCCGTTAAATAATCCTTGGAGTTGCTGTGCCTATGTAG -CCTTGAGACTGAGAAAGACAGGCATGTGCGCTGGAGTTGTCTAGCAAACTCTGGCTTGTC -CAAGTCTCAATTTGGAGAGGTAGACAGTCCAGtctcatctcacctctcatctcttctctt -gtctcttcctcctcttacatcctatcttatgatttatctcttctcCCTCCACAATTGTCC -TGCCTACTTTCAAAATGGCCTCTCTTTTATCCTTGAATACCTCGGAAAAGCCATCAGAGG -CCTCGTTGCCCTTTGGTGTCTTTTCTCCTCCAAAGAATTCTCATGAATGGAAGAGAGCCC -TCGCCTATGTGAAATGGCTATGTTTCGACCAGCAATACAAACAATGTGCCTTGCGTTGCA -ATCAGCTCATAGATACCGCGTCTAACCCGGTTTGTTTTTCACAGATCTCATCTCAGAGTC -TGGTCTTAACAGGTATAAACATAGCTCCATCCAATCCGTGCCACCTACCTCCACTACTAT -GCAGCCACATCATATGAGTATATGGGACGAGCAGCTCATATCTTTTCGGCTGTCAAAGTC -CGGCTTCTGACTTCTGCTATGGACCGGTTTCAGACTGCCTACGAGTCTTTGCCAGCCACT -ATTTCACTGCCGGTTCTGAATCCCAACCAGACATACTCGCCAGTGACTTTCCTCCATTCG -CCTGATTCATCTCCTCAGTCTTCCGTAAGTGTTGTCATGTGGAGCCCACCTTTGCTGGAC -CACGATGTCGCTCCTGGTACTGCGTCAAATTCTGGGGCTCTGTCATACGAGAATTTGTCG -GCGCATAATGCTACCCAGGTGCAAACTCCTCCTCCGCCTGTCCCGTTTTCTTCTTTTTCT -TCGGACTCGACTATTACTACTGCTATTTGTGTCACGCCACCTCCTGCTGCTCGAGAAGTC -TCGTGTCTTGACTATGACGCTCCTCCAGCTTTTCATATCCCACCTCCAACTCGCAAGCCC -CCTCCTCCGCCTGTCCCGTTTTCTTCTTCCTCTTCGGACTCGACTATCACTACTGCGTTT -TGTGCCACGCCACCTCCTGCTGCTCGAGAAGTCTCGTGTCTTGACTATGACGCTCCTCCT -CCAGCTTTTCATATCCCACCTCCAACTCGCAAGCCCCCTCCTCCGCCAAGCGACGTTTCT -CCTCGCCTGGCTACGAGAGACCAATTAATATCCTTCAATAGTGCTAGAGGTGGGCTTCCC -AACGGGGGATCTATCGTCCGAAACATTGCCCGAATGATTGATAACTCTATCTTGGCGAGA -GGCGACGACCCTTTTGTGACCCGCGCGCCGCCAAAACCTCGCACCTTGAATCAACCGCCA -GTGCGTCTGTCTCCGATGAAGTTCCCAGCCGGACTTGACGATCCAGCAAAGCGTAGGGAG -CTTATTCCGCCTCCGCTTGCGATCCGGAAAAGCTCCGGCGAGGTGCACATCTGCATAAGT -TCTGGAACGGTAACCTGTGGAGGTCCAGAGCAGGAGACGTCCAAGTCTGAGGTGAATCGA -TCCATCCGAACTCGCCCTCCCCGGCTACCCTTGAAGATAATCCCATCCAATCGTTTGAAT -GCTAACACAGAAAAGCCGAGCTCTTCAACCCTAGCACTGCAGCTGAAGCGTTTGAGTCCC -GCCCTTCTGGAGTTCATGGCTACGCCTCCATTGACGCCCACCCCTGTTCTGAGAAAGAAG -AAACCCTTCCTCGGGTCGCCGTTTACCAGTCGCGTAAGCTCACCAGAGCAGAAAAGTGTC -AAGTCACTCTCATCGCAGTCGACAAAGCCCTCCCCCGCGCCTGTCAGCGCTGCGCGCGCC -TCCCAGATCAATGAGTTCAACAACGCCGTCAAATGGCTCCGTGAGCATATTCCCGCCGAC -GTGACCGGGCTACGCAAGCAGATCAAGCATGTTTCGGATCTTCAGCAGGCTCGTCGTTCT -CGGAACATGACAATGGCCCGCTCTGCCTCTTTCTGGACTTTTTCGCCTGTTAAGCTGAAC -CCCGGTGAACCACAGGCACCTCCTGTGATTGAGGGACCTAATGTCGATGAGTATGGAAAT -CATATTCGGATTGAGACTAAGGTGCAGCGTATTGAACGGCTCAGGGAGGAGGATTGGAGG -ATTGGTATTCGGTCGAAACATAGTCTCTGGAAGGGGAAGGAGTACTATGATGACCTTTAC -GAAACTGCTTTTGCTGAGCTTGGTGAGACAGGTTATGGATCTCGCGAATGGCTGAAGCGG -TGAGATTTTACTTTTACCGCATATTCTCTTGCGTTTTTCTCTTCTCTTTGTTTTTCGTTT -CCCGGGGTTGGAAGTCGTGCAAGTGCTCATGCGGTTGCATGACATTAAAGCCTTTCGGTT -CAAGATATTCTAATCTGCCTTAGCGGTTGATTATCTGGTTGATGGATATCACCATGGTCT -GTGTTTAGGCACTTGATTGCGCATCGAAGTGGTTTGCTGGAGTTGAGGAGTCGAACAGCA -TGCCATCGGGGCTACGGAAAGCATTGCCTCTTCGTTCACTTGATTTGGTACAAAAGATCC -CGATATGGTCTTTTGTATAATACCGGTTAGGTGAAGGTTGGAGACATTCTATCGTTGGGC -TTATTtcttaatcttgatcttaatgttaatttcaatGAAAAGTCAAGCTTTTTATTCTCG -TCCTTTGTGCTAACAGCCCACTCCTCAAAGGCCTTTACTCAAAACAGAACCAAATCCCTC -TCCCAGACCTCCTTTCCCAAACACGCCACCCAGTTGATTTCCGAGTTGATCTTGAATCTT -GCTTAGATTTTGGTCCTCGGTGAGTTTCTTGCCGGTCAGACTTTCTATTAGTAAATTCTG -ATTAGTATACTTCTTATTTCCTTGTTTGGGTATAACATACTCTTCAGTCCATAGGACACC -TTTGTTTCACCCATATAGTCAAGCAAGAGCTCTTGGAGCTGTTTCTGGTATGGGTCTAGA -TAACCAGCGACCCACGCGCCGAGTAAAATTACCTGGAAGGTAAGGAAGAAGAGCAAGGAT -ATCATTGACAGTTCCAGTTGAGAATATTGTGTCAGATCTACAAGAAACAGTCCAAAAACT -TGTGGTTATATATACCTCTAGGCCTATGACGTCTGGTGACGCTGTGGCACACAAGGCACC -GGGGAGTATGTGCCAATCAGGGAAGGGCTTGTATATTAATTTCCCATAAGTTTCAAATTT -AGTTCCAATTCTTTTTCTCAAATTGGAATGGAAGGTTCAATAGATCTCTAGATCCTGATT -CTTTTTTATACGTATATCGAAAGAAGAAGAAAAGATCAAAGCATACTTTTGTACTTCAGA -CAAAAAGCGGCTGTTCATGCATAGTGCATATATGTATGTGACTAGCGACATATCTCTATC -AAATGAACATTAATTTTGGGAATAATGTCTAGGAATCTCCGAGAATATGTATTGTGCATG -TTGTACTCATCTGTATACAAATTAGCGCCCTTCAGCCCTGGAGGCAGTAACATTCCCGTT -GTGGCTGTGCTTGGCGCCAAACCCTACAGTCGCCTTATCGCGTCTAGAACTGATCTTCCC -CCTCCTTTTTTTTTTGACATTTTTCATTTTCATCTTCTCTGCGATTGATCTGTGATCTCT -GATTGTCAAGTCAGATAGTTGTCATCGGTCCTGGGTTGTCAATTTTCAATCATCGGGTCA -GTTCTGTTTGGATTGCAGCCACCCCCGCGCATAGTCACCCCTCGAATTTCGGTTCTATAT -TTTCTATATCACTAACCCTGACTCTCTCTCTATTAGATTGGACTTCATTCGCGACCATAT -AACCCTCAATCCTTTGAACCCTCTTCGTTCTACTCGTTGCTACCTAAGTCGTGCCCATTT -GAGCCCATCACATACATCTACCTTGGGCATCTGCAGCCCTACCCCCACATCATGAGTTGT -GAGTCGATATTTTATATCCCTGGGAATTTTGTTCGCTATTCAGCCGTGATATGATTCGTC -GTTGGGTGTTGCCCAAATATTGTTCAGCTTCGCGTCAGCCTTATCCCCAATCTGACTCCT -CACTGCGTTGGAATTGCCATGTCGATCTACACCCTTCTTGTCGGCTCTCTATGGAATATA -TGGGGACTAGGGGCACTTCTAGCTGTCATTTTGTGCCACGGTCTGCTTCGCAATTCCTAT -GCCCTCTCCCCTCGGTCGGCTCCTCGAGGGGTCCTTTGCTTCATTTGTCTTACGCGTTTC -ATCGGAGAGGCCCCGCAATTTACTAACTTTCAGTCTTCCAGTCGCAAACATGCTCAACAA -GCTGTCAGGACAGCCAGAAAGCTACGAGAAGAAGTAATGCCTCCTTATCCTAGCTCCTTT -ACCCCACATTCGCTGACTTTTTCTTCGCTGCCTGTAGGGCTCTCTATAAATTCGGACGGA -CGTTGGGTGCAGGTACATACGGAATTGTTCGCGAGGCGGATAGCACCGATGGACAGAAGG -TTGCGATCAAGATAATTCTCAAGAAAAATGTCCGTGGCAATGAGAGCATGGTCTACGACG -AGCTTGAGATGCTGCAATCACTCCAACATCCCCACATTGTCTCGTTTGTCGACTGGTTCG -AATCTAAGGTGAGCATACAGGGACTTTGGCCTGACCTTTGCGGAGCTCTAATCCTGGACC -TAGGACAAATTCTATATCGTTACACAGCTAGCCACCGGTGGCGAGTTGTTCGACCGGATC -TGCGACTATGGCAAGTTCACTGAGAAGGATGCGTCGCAGACTATCCGCCAGGTGCTTGAT -GCTGTGAACTATCTGCACAAGCGTAACATTGTGCACCGAGGTTGGTCTCCTCAAGTCATA -CTAACAATTTATGCGTAACTAACCTTTACAAAGACTTGAAGCCCGAAAACCTCCTCTACC -TCACCCGCTCTGCCGACTCTGAATTGGTCTTGGCTGATTTCGGTATCGCGAAGATGCTGC -ACGACCCGTCTGAGGTACTGACCAGCATGGCCGGCTCCTTTGGATATGCTGCCCCCGAGG -TCATGCTCAAACAGGGCCACGGCAAGGCTGTCGATATGTGGTCACTCGGTGTCATTACCT -ATACCCTCCTGTGTGGCTACTCCCCCTTCCGATCCGAGAATCTGACCGACCTCATCGAAG -AATGCCGAACAGGCCGCATCATCTTCCACGAGCGGTACTGGCGCGACGTATCCCAGGACG -CCAAAGATTTCATTCTCACCCTCCTCAATGCGGATCCCAGCAAACGAGTTACCTCTGAGG -AAGCCCTCAAGCACGAATGGCTGACTGGAAAGACTGCGAGCGACCGTGACTTGCTGCCTG -AAATCCGCACCTACATTGCACGTGCCCGTCTCCGCCGCGGAATTGAGATCGTCAAGCTTG -CCAACCGTATCGAGTCACTCAAAATGCATGAGGAGGAGGGCGGAGAGGATATCCCCAGCC -CCATGGACATGGGCGCCGCCTCATCCGAGGAATCAAGTACGGCTGAACCATCAGTGTCAG -ACGGTGATGCCAGTCCAGCCCCTGCCCCTGGCCACGCAAAGAAGAAGAGTCTGGCAAGCG -CCGCTCGCGGTGCTATTTTCCGTGAAGTTGTTCTGGCCAAGGTTCGTGAACAGAAGGAGG -CCGAGGAGCGGGAGAAGGTTGAACGTGAAGCTCGCGAGAAGGCTAGCTCTACTTAAATTG -TTTGCCTTATTTTTTGTCGCTTTTACATCTTACGTTGTGTTCATACGCTGTTCCTACGCC -TACCCTGCCCTGTTATTCTCTTTTATGTGCCTGGCGGTCTCTTGGATATAGTTCTGTGAT -TCCCATTTTTTAAAGTCAATCAGAGTGGACATACCACATTTAAGCAATCACCACTGCTCT -TCTTACTGTTTTTGATGATACAGTCACGAAAATCTGTCTAATGGGCCTAGAAAGAAACCG -AAGTATCCCAAGTGGCCATTATAGATAGAAGTTGCCCAGGTGAATACTAAATAATACTAA -ATAATACATTCCATGAGTTTCTAGCGGATTGCTAATATTCACATATTAATATGATATTTT -ACCACACCTTTGATTTCTATATTTTGCGGCTGATGCGTCTTGTCTAGCTATGTGGTATAC -CATGGGGGTTCATCTGAGACTTCCAACACCGCCGCATATGTGACTTCTTATTCGTTAACA -TACACTGCACAACTGCTGCAATATATATGTAATAGCTATGTAATTTGACCGGAGCCGCAT -ACACAAGCTGAGTTATCTCGGGCTGATAATTTTTTGTACTATGCCAGTGATCTCCATGCT -TATATAGAAATTGAAATGGTTCTTAAAGTTATAGAAAGTCATCAAGTAAAAGTAGGAAAG -TAGTAAGTGACAACAATATAGAGTCAGGTGTCGATATCACGTGCACCAATATATGATCTC -CCCACTTCCAAGGCTAGCTTTCAGCTCGTTGATCTTTAAGAATACCGCTGTTTGAACCCT -CTTATATCTGTTGGTTTATTCTCATCGTGAAGAAGTTTCTCACTTTTCCGTCTTCACGTC -ATACATCCGCTCTACTCCACCTAATCGAGACACCGCCCCTCCTTCTCCACCGGAACTCAA -ACATCTAAATCTTTTCTTTCAACTATCGCAGCAATGTTCGCTACGAAGCGTCTGAGCAAG -GTGAGTTACACTCTGAATCTGATGGGAGAATGTTCACCTTACTTACTCTATCTTTTTGGA -TTATAGGAGCTCATTAAGGTACCAATTCCCCCGTTTCCTTCTCGGCCTTATCGCATACTT -TACGAGCATACATACCATGGCGCCTTGCATATTGCTCATTCTATGAACATGTGTCTCTGA -CATTTTGGACCTCTTAGATGCAAGAGCATGTTCCTCCGGGAATTACAATCGTGAAATGTG -ACAATCTAGAGGAATGGCAAATGGACATCAAGATCCTCGACCAGAACCCACTTTATCTGG -ACCAGACTTATCGCCTGAAGTTTACCTTCAGTAACAAGTACCCGATTGGTAAGTATCATT -TTCTCAACAGCCTCCGTTTCCACCGTCTCTCGGCAGCTACATTCCACTGCCCTTGTCTTT -GTCTCGTCTTTTTATCAGATTATTCCGGGTTTTGACACAATCGTTCTAGAACCCCCAGAG -GTCCAATTCATCCAATGTCCCGCCTCCACCGGCACTCCCCGCACAATTCCAATGCATCCC -CATATCTACAGCAATGGCATTATCTGCCTTGACCTCCTGGGTTCCGCCGGTTGGTCCCCA -GTGCAGACAGTCGAGAGTGTTTGCATGAGCCTTCAGAGCATGCTTACCGCCAACAATCGC -AACGAGCGACCCCCTGGTGACCAGGAGTTCATTGCCACCAATCGCCGTCGCATCCGTGAC -ATCAACTTTATCTATGAGGATGATAATGTATAAAGTTCTAGCTCCCCCGATTTGGGGGCT -AGTCTATGAGGTGTTATGGAAATAGGGAATGGTTCGCGTTGGCGGGGTGGGGGCATACAT -TGGCGTTTTCTGCATGAGATAAAATTCCGCCCCTTGAGATTTGAGCGGCAGGATTTACAT -CCTGCCTGGTGATATTTGATTTTTTGCGTTTGCATTCATCCTGGCGCTGGCCCATTGTTG -ATTTCAATCATGGAATGGGTGGTCTTCTTGCGGGCGTTCAAGGTTTTTCATTAGTTAGTA -ACACGATCAATAAGAGAACTACTCCTCTTTAAATACTATATGGCGATTAGAGTAAACATA -GAAGAAATCTAGTAATAGAATATACAGGATCAATTGTCCATTTTGTTCGAGGAAACGATA -GCGGATGTTCAAATGTGATTGGCCATACTGCTTATATTTCCAAAGCAAGAATGGTATCAA -TCATTCAAGCCCCAAGTATAATACCAACTCACAAATGTCCGTCCTTTACGGGCGACTGAG -TAAACCACCCAACACATACCTCTTTCATTGGTTATTACCGCGCCGCCTCTTTGGTAATCA -CCTTAAAGCATCCATAGGCAATATAATCCGGTACAATGATTTCAGTTTATCTGACAAGAG -TAGACAGCCGCTTATACGGCCCTAAAATATCTTTCTGATAGGGCTCTTCAGCGGTCTTTT -TAGCAGGCTTCTCAGATATTCTCTCTGGATATCTTTAAAAGTATTTTCTTCCTTGCTGGT -CTTATAGGTAGATAGTAGAAGGCGGACGGACTAGACCTTATATAGAAGACTCTTTTCTAC -ATAACCTCTAGCTTTATCTTAGGTCGGCCTTTCCAATAAGTAGATTACTACGCCTCTTCG -TTTCTGAAGGACATAGGAGACATTATCTTCTGGTCCCAGCAATCAAACGCATCTCGGTCA -TTTTGCCGCATTGACCACGACTCAGGTGTGTCGGGAAGCCTTAAGGACGCAGGATTTGGT -GCCGCGTAGCGGTGAGACATTTTCACTTCTAGGAGGGAATGCTTCAGGGGAAGATTTGCA -TCGTAGACTAAGACTTTGTTTCATGTGGGAGGATAATGGGCGGCTAGTTGAGGGGTCGGA -GAAGCTAGGGCTTATAGATTATAGAATTATTCGGACTTGTTCATGGGTATGTTTTTTAGA -TTGTCTAATCTTTACTTCAGACCTCGAATTCCTTAAAAATCCCATGATGCCTGCTCTTGC -AGTCGTATATATCGTTGTAAAAAAAATCCAAAACTCCACCCCACACCGTATAACTTCAAA -CAGGAATTGAGCACGCTTGTGTTACCCGTAGAACCAATATTCTGCAGCATAATCAACTTG -AATCGCTGTTTTTTGCCCAGCGCACGTAGAGGGCGGGTCACCTTGAGATGTCACAACTCG -GTAAACTTCCTACATAGAACAATTTAGTCAGATATCTCTGCTCCAGATTAGTAATTATGG -ATAACTTGAGCCTACCTGAATGCCCTTTCCCTTCTTGTGTCCTAACTTAAGCCACGGAAC -ATCGTCGGAGGATTTTGATCTTGACTTCGGCGCTGGGGAAGAGGCTATCTTTCTAGCAGC -AATCCAGGACCTGGAGCCACTGAGCTCCATGTCAAAGATCGGCTCTCCGTCAGAGTCAAA -GTAATGTTCACCAATGATGAGGTTGGTGCTTCTCGTGCTCTGGGCCACAAGGGTTGCCAT -GAACGCAAGAGATCCAAGAGGAGTGGCGCTGATGATTGCGGGGATTTCGTGGAGGAGAGT -TAAAGATGATGATGCAATGCAGGAGGCGTCGAAGAGCGTAGCGGCGGCTCCTGTGGCTTT -GGGCTTGATGGTGGTTTTGGAATCTGAAGAAGCATTTGATGGACATGTATAGTTCTGTGT -TCCTCGACCCAATGCAACATATTTGAGCGTCAAATCCTTGGAGGGGACTGGAAGCTGTGA -CTTTGTCGCGTTTAATGAGAGGGATACATTTGCGAGAGAGCAATTCCCAATGTTGATCTT -GCCAAGTTGGTGGTAGATGTGGGAAAATGAGGAAGCTAGAGCCAGAGGATGAGGATGAGG -AAGACCTGAGTGTCCTTCCACATTTGCTGCCACAAGAAGCAACGGCAGCGCTAGGAGCAT -CTTCATGGTGGCCGGGCAATTTTGGATATATGAGGTATAGACGGTTTCTAGAAACTGTTC -GAGGGATTCTAGTTCTGGGGTTGTGGAGAGGAACTAGGGTATAAGGGCAAGGGTAAAACC -AACAAACAGGGAAATATATCCCATTTAATGAGAAGGCTAGCAAGGTTGCTCTAATATGTA -ACAAAGGACATGCTCTGAGTTTGAGATATGCCGCACAGGTAACCAGGCTCTCGGCGTAGA -CCGAATTGAACGAAGCGTAGTGGATTCTGGTTCAATGAACTTTTGAGGGGACCAATGATA -CATTCAATCTATATTGATACCGCTGCCCAGGATTTGTCTACATAGTATAGCTTGGAAATT -ACCCGAGTCTGCTGACTGGGATACTGGCCTGCTGTGCTAAAATTGGCTTCTTTGTTGTCA -ACCCCACCTATCTTGAAAGGTTGCTGGAGAGATGAGAAATGATCTTTTAATTCTGGGTTG -GTTAGAATAGGAAGAATGTAGCCACAGGGCTTTGTAAGTTACACTAGCCGTTGTTGTTTA -CTGGCCAGCAGCATATTGGCAGCAAACTGTCAATTTGATGTCGGACAACCTTGGAATAGA -TTGGGATATGAGATTAATACTGTGGTTTTTGTTGATCTAGTACGTCAGTACAAGTCAGCC -TTGGAAGGCATATTGATCAAATACTACATACACCCTTTCGAATCTTGAAATGCTGCAGGC -GCTTATATGAAGCACCTACTATAACCTAGATCTGTCATTGGCTATCGGAGATTTTCGTTT -CCTTCTCCCCAGTTGGATCAGGAACCTTATTCCTAAATTAGACAATTAATTTCGAGTGCA -GACACACAAAAGTTCATGCAGACTTTCAATAACAATATCCCTGATACACCGGTGTGAGAC -ATAATTAGACCGTTAGCGCCCTCGCAGCGCCACAGATTTGAGTACAAATCAGGCAGCATT -ACATACGATTATAAGCTATAAATCACGGTGTGAAATTTGATCATATACAAATCATAATAG -AGTGAAGAGAGAATGGCACTACATGGTAGAATATCACTGCTTTCCTGGAAGGTTTCAACA -AACTATTAGTCATGGTAGCTCACATGGAGTTTATATGGAGTTAGTTCCGTGATAATGTAA -AATGTTTCGAGAGGCTGCACTTGACTATGATCCATAATCCTGGCCTCTTCGCACTCTTGT -CCAACTCACTATTCCTCCTCTTGGGGATGTTTGCCATATTTCTTTCATTCGATGTTGTTA -GACCGCCCGAAGGCCTCCGGGGCAAAATCAAGCTTATGTCACAGATCACGTGATCATCTA -GAACCAACCCAACAAGCTCAAGTGTCTAATCACGTGCAAGTCAATCACGACCTTTTCATC -ACGAGGCGGCTAGCAACACAGCATCCTCAATCACCATTATTGTGACCGTTACAGCGGTTT -TCAATCCGGATGTAGCTGCGGTGTTCCCTTCTGCGGTAATACAGGCTCACTCATATCCTT -GCGACTTTTACGTTTTGTCACCATGTCTGACGGTAAGTTTTCGCAAGCTCTCCCGCCTGG -TCCTATACTGATCTCCACTGTAGCCGATATCGAGGCTGTTCGACGGCTCCAGGCTGAGCG -CAATGCCGCTGCTGGCGCTAAAAAGGGCTCCAAGACATTTGATCCATCCGCCCAACGTAC -TGATAACTCGACCAAAGCCTCCCTGACGGAATCCTTTGACACCTTTTTGTACGAACGGGA -TGGAGCTGAGAAGTTTTCGAACTACGATACTTCGATCGCTGTTGATGGTGATGAAGATAT -GGAAGATGCGGATGGAGGACACAGACTGGTAGGCCAATACACTGCTACCAGAAGCCAAAT -CGATGAGATGGCGCAAGGAACTGGCGTCGAGGAGGAGGACATTCTGCTAGGCCGCGAAAA -GGCGGCGCGGATCGCAGATCGTGAAACAGACTATCAAAAACGCCGATTCAACCGTGGCGC -CCTGACCCCGACGCGTGCCGATCCCTTCGCTGCCAATGCCCACGCGAATGTTGAAGGAGA -AAGTCAAACCTACCGTGAGGTGATGGCCATTCGCGAAATAGAGAGGGAAGAAGAACGCGT -CAAGAAACTCATTGCAGAGAAGCGTGAGAAGGGTGAAGACACCACAGAGCACGTGGCTAC -CTTGAAGAGGGTGGAGCACGATAAAGAAAACGCCGAGGCGGGTTCGACCGTTGCAGTGGC -TACCGAAAGAAAGCGGAAGAACCGATGGGGAACTATTGACGAACCTGCTGGTGAACCAGG -TAAAAGATCGCGTTTCGATCAAGCACCTGCTCCCGAAGAAGCACCCAAGACCGGCCGTTC -GAGATGGGATCTAGCGCCATCTCTCACAGCTGCCACCCCCGTCGGCCATCAAGGACTCGC -CACTCCCATGCATCCATCCCAAGCAGGCGCCTCTACGGCTGCCGTCGGATTTGGAGCCGA -TATGCCTGGTCAAGTTGTCGGCTGGTCTGATGAGGAGCTCGATATCATGCTTCCCGGTGA -GGCCGATGGCTATAAGGTCCTTGACCCTCCTCCTGGATATGAACCCACCCGCACCCACGC -CCGCAGGCTCACAGCCACGCCAGCACCCATGTCTAGCGCTGGTGGTGTCGGAGGGTTCAT -GATGCAAGAGCCAGAAAGTGTTCAGTCTGTAGGAAAGGATCTTCCAACCGATATCCCCGG -TGTGGGTGAGCTTCAATTCTTTAAGGCGGAGGACATGGCTTACTTCGGTAAGCTGATGGA -AGGTGGCGATGAAACCAGCATGACTGTGGAAGAAATGAAGGAGCGAAAGATTATGCGATT -GCTACTCAAGATCAAGAACGGTACCCCGCCTATGCGCAAGACCGCTCTGCGTCAAATCAC -AGACAATGCACGCGACTTTGGTCCTGGAGCTCTGTTCAATCAGATTCTCCCCTTGCTCAT -GGAAAGAACTCTGGAAGACCAGGAGCGCCATTTGCTGGTAAAGGTTATTGACAGAATCCT -CTATAAACTGGATGATCTGGTCCGTCCCTACGTTCATAAGATCTTGGTCGTCATCGAACC -ACTGCTTATTGATCAAGACTACTACGCTCGTGTCGAAGGCCGAGAGATTATCTCGAATCT -TGCAAAGGCCGCTGGTCTTGCTACAATGATCAGTACTATGCGTCCTGATATTGACCACGC -CGATGAATATGTGCGAAACACCACAGCCCGTGCCTTTGCCGTTGTGGCCTCTGCCCTCGG -TATTCCCGCCCTCCTCCCCTTCCTCCGTGCTGTCTGCCGCAGTAAGAAGTCTTGGCAGGC -TCGACACACTGGTGTCAAGATTGTCCAGCATATCCCTATTCTCATGGGTTGCGCCATTCT -CCCTCATCTCAAGGAGCTAGTTGGCTGCGTCTCAGGCAACCTCAGTGATGAACAAGCTAA -GGTTCGGACCGTCACCGCACTGTCTCTGGCCTCGTTGGCTGAGGCTGCGAACCCATATGG -TATCGAAAGTTTCGATGACATCTTGAGCCCTCTTTGGACTGGTGCCCGCAAGCAGCGTGG -CAAGGGTCTCTCTGCCTTCCTCAAGGCTGTTGGCTATATTATTCCTCTTATGGACGAGGA -GTATGCCAACTACTATACCACTCAAATCATGGAGATCCTTATTCGAGAATTTGCCTCACC -AGACGAGGAAATGAAGAAGGTTGTCTTGAAGGTGGTTTCTCAGTGTGCAAGCACTGCGGG -TGTGACTGCCACTTACTTGAAGGAGCACGTCTTGACCGATTTCTTCAAGGGCTTCTGGAT -GAGACGTATGGCGCTCGATCGACGAAACTACCGCCAGGTAGTTGATACTACAGTGGATCT -GGGCCAGAAAGTGGGTGTTGGCGAAATTCTCGAGCGCGTCGTCAACAACCTGAAGGACGA -AAGCGAGCCATATCGCAAGATGACTGTGGAAACGGTTGAAAAATTAATTGCATCACTTGG -AGCCGCGGACGTTTCTGAGAGGCTGGAAGAACGTCTTATTGATGGTGTCCTCTTCGCCTT -CCAGGAACAAAGCATTGAAGATCCTGTCATTCTTAATGGATTCGGCACGGTGGTAAATGC -ACTTGGCACGCGATGCAAGCCCTATATTCCTCAGATTGTCAGTACTATCCTCTGGCGTCT -GAACAACAAATCCCCCACCGTCCGACAACAGGCAGCCGATCTTATCTCCCGCGTTGCGTT -GGTGATGAAGCAGTGTGACGAGGAAGCGCTCATGGGCAAGCTTGGTATCGTGCTGTACGA -ATACTTGGGTGAAGAATATCCCGAGGTTCTGGGCTCGATTCTAGGCGCTTTGCGTGCCAT -CGTCACCGTGGTCGGTATCACCCAGATGCAACCACCCATTCGTGATCTTCTTCCCCGACT -TACACCCATTCTGCGCAACCGTCACGAAAAGGTGCAAGAGAACACAATTGATCTCGTTGG -TCGTATTGCCGACCGTGGTCCCGAGTCCGTGAACGCCCGTGAATGGATGCGTATCTGCTT -CGAGTTGATGGATATGCTCAAGGCCCACAAGAAGGGAATTCGTCGAGCGGCCAACAACAC -ATTCGGTTTCATTGCTAAGGCTATTGGTCCTCAGGATGTCCTGGCCGCTCTTCTCGGTAA -CCTGCGAGTACAAGAGCGTCAATCGCGTGTTTGCACAGCTGTCGCCATCGGTATTGTCGC -CGAAACCTGTGCTCCGTTCACTGTGCTCCCGGCACTGATGAACGAGTACCGGGTGCCCGA -TCTCAACGTGCAAAACGGTGTACTCAAGGCCTTGTCCTTCTTGTTCGAGTACATCGGCGA -GATGGGCAAGGACTATGTCTACGCTGTCACACCGCTGCTCGAGGATGCCCTTGTCGACCG -TGACCAAGTCCACCGACAAACCGCTGCCAGCGCGGTTAAGCATGTCGCGCTCGGTGTTAT -CGGTCTTGGTTGCGAGGACGCCATGGTCCATCTGCTTAACCTGCTCTTCCCCAACATCTT -CGAGACCAGCCCCCACGTCATTGATCGAGTCATTGAAGCCATTGAAGCCGTCCGGATGGC -AGTCGGCACTGGTATCGTTATGAACTACGTTTGGGCTGGTTTGTTCCACCCTGCTCGGAA -GGTCCGCATGCCTTACTGGAGACTGTACAACGATGCCTACGTCCAGGGAGCCGATGCGAT -GATTCCTTACTACCCGGATATGGAGGGCGACGTGGACCGACCGGAACTGTCCATCATTAT -CTAACACTGTAATTGGATGGTCTACTTTCTTATTAGATACAGATTCTATGGGGTGTGTTT -TTTCTAGCCGGTGtttttttttttttttttCCTGGTCAATAATTTCTTCATAGGGGGGAT -GTTTGGCGTCTCAAGTGTACCTGTATCCCAGTGAAAGTTCAATATCAAATCAAAATTCGA -ATCCTGATTGTGATAGATATATTAGATACACTTTGTGGCTGCTTATGTGCTCACATATAT -CTCTTAGTAACAGTCCTTCCACCCGTCCACCCGTGGATGTTATGAAAGTTCGCAGTACGT -AAAACCTATGATTTTCTTTTACTTCCATGTCACAGTGTAGACTGTATATTGTAGAGTCGA -TCGTTTCTATGGAATATAGCCAAGCATGGTGACAGGGGGCAATCAGGCAGGGATCGCTCT -TCAGGCCGACAATCATGTCAATCACTGCGATCTTTCAAGGTCTGAGTCTGGGGTAACATC -AACCTCACATATACCGTGATACTAACATATTTCGAAGAGAATAATCCCTCAACTCTCTCA -AATTTgggggggggagggggggagggggAATCCAAACCCCCTTTCATCCACATTCGCCAC -ACAAAAAATAGAAAACACCGAGTCGTCATCACAGGGTAAATGAATAAAAAGGAGGGGCAG -TTCTCAGAGTCCCCATACTCCTTGCCAAGAACGCCAAATGCCTAAGCTTGCCGTGGGAGA -GTCTTGGCATAAGATGCCGCGTAAGCTCAAATATTACGTTTTTATTATCTAATGTGTGAC -CCAGTATTCTGTATCTATGTTGTATAGCCTGTATAGTAtctatatctattgtatctctat -ctacatctatagtatctatAGCGTTGTATCTTTAAGTCTGTATACTCCACTAATACTATG -TTGTCTAGCCTTGAATATATAAACTAATCGGGGAGTATGTACACTTCACAGCCTCCCCCT -GAATTCAGTTGACAGAGGAATGGAGAATATCTATTAACGTACTCCATGGGGTCTACACAC -AGACCCTGATAAGGGGCGTTGTGTAACGCCCCTTTTTGACCTTTTCCACCCGTGGACGGT -TCGTCGAGTTCCTTGGGAAATCTTTTTTTTTTCAGGGTCCTATCCGCTTTGGGAAGTCTC -ATTTTTTGTCGGGTTTACTTCTCTCAAAATATCTGTGTTGCAGTGACACGGCACAGAGAT -GCAAGGACCATCCAAATTGAGCTTCATAGATTAGATCGTGGAGGTAGCACATGCAGGCCA -CTCCACCGGAAAATGCGTACAGTGTCAAGGGTGTGATTCTACAAAGTCTAAGGCATATAT -GATGTGTGCGGAGTACCGAATACAGTGTGCATTTGGTATAATATCTACATCGTAAGGGCA -GTGGAGAAGATCGCGTGTCGTGCATCGAACCCCCAAGGGCCGTGCGTCTGTGTTTGCATG -TACTAAAGTACAAGGTAGCTGTCAAGCATGCGAGATACCGAAATTCCTCGGATTCATGTG -AAGGAGGGTTGGAGGATGGGAGTCCCTACGACATACGTACTGGAAAATAAGAAAGTTAGG -TGGATAAACCACAGAGAGCTCTGATCCAATCAGATCCAAAGATTTGGAACTTGCGCAACA -CTGTAGGGGTCAAGGATTGGGAGAAGACTTGCGCATGAGATGATTGGAAGCTCAATGCTG -GGAACGTTATGCTCTATGTTGGATGTCTCTTCTCCCACAAGGTAGTCCGTAGGTAAGACA -AGATGTCTGTTCTCACACGTTTCTATTTTTCAGGGCTTTATTTTTTATATACATGTAATT -TGATTTTCTGAGTCCATTGCAAGGGATCGGAGAACCCTGCAGGAGGCCCCATTCACGACG -GTTGAGGCACACAGCGAGGGGCGTGTACACACTTTGTGGATCTCTGGCGCGTTACCCTGA -CCGTCTAGGGCTTAGGCTAAGACGGCCCCTCCTTCAAGCCCTCGATTTCTTGACAATGTC -TGGTGACATAGGCCAAGAGAGTACTCCACTTTCTGCAAGTGCGTCTTTTTTATGTTCAGA -GTATAGGAACGGAGACAAGGGGAAGCTGGAGCTTGAAGTAAAAATTTCGGGGCTAAAGCC -TACAGCGCACACCGCAGTTTGAAGATATTCAATAGAGCGATTAGAGCAAACCTTCTTTTG -GTGGCGGGAGGATCCGATCAAACAGTCGAGTTAATCGCCTGTTAGGGCAGAAAGGAAAAC -CTTGGAAAGAGGCGTTTCTTAGTCATGTCCGAAGTACTCCGTACTCCGGACTGTTTCCGT -GGACACGAGACTACGTAATGCATCGCGAAATGAGTACCAGTGAAGACTAGAGTTACTGGT -AGTTTATAGACTATGCATGAATTCGGCGATTAACTAGTGACGCTGTACGGAGTACATGTT -CTAATAATATTGTCTCTCGAACAGGAGGGAAAAAAAAAAAAACACATAAATTACTGTAGA -ATCCCAGATCCACAAGCGTTTCAAGTTGTTAGCGTTGACCCTGAGAGTGGCAGTGTTCCT -AGCGGGAATTGTGGAGTACAACCAGGATCGCACGCAGGGATCATAACCAAGACGGGCTTT -GCGGGGAGCTTGGGGGTCACGTCATTTGACTTTAATCTGGTGTCTGCAGAACCCTTCCAA -GGTTAGAGTATATTAGGATGATTGTATTACAATCGTCCTCAGACCCTTGAAGAGTGCAAA -AAGGACAGATATATTGAGAGGCAAAAAAAGGATAGATATTTCGAAATCTCATCCAAGGTT -AACCTTCCTACTTTGTACCTACAAGCAGATAACCACGTTGACCCCAAAATGCGCGGGATG -AGCACGCGTCAGCTCCTGGTGGGTCTGTCAATACAATTGGTCGATCAACAGCTGCCAAGA -TCGGCTATCGACTATCTGCCGTCTCCAGGTGGACCTGGCTTGTAAATACTCATTGATGAT -TGGTTGAAAGGCTCTGCAACAAACATGTAAGAATGTGGACCCAATGGAATTAAGCCACGC -TAGCCTGAATTAGGGTGGGCTAAACCGTACTCGTCCCAAACAATAGAAAGAAGCCTAAAG -GGGGTCCGGTATCGGAATAAGCGGCATGGTTCAATTACATTGACGTTTTTTTTAGAGGGT -GGATTCCAGGAATTTGGGCTTATCCAATTTACCCGGACTGCTGGACTTTCCATGGTCTTT -CGCAAATTTAAGAATCCACAGGTCCACTGTGTGGATGCAACACTGTGAATGTAAAGAACG -GTTGAACGTTTCAGCCGTTTCTATGTATAGCAACTAGGTGCGATTGCTGCAGTGATGCCG -CATGACAGGGGACTAATATTCCCCAAAATGTAGTACATTTACTTAATTCCAGTCTTTGAA -CACTGATGTGTGACTAGAGATTATCATATCTAATATGAGACAAAAAGAAAACGGTGATTT -GCACACAATGTCAACCTTGATTTAGGGGCAATGACGATCTGCCGTGTGGGAACTGCCAAG -ATCGGAGCCCCATGCAGATGACAGGCCTTTGTTGGTCAAGGGTACCTGATTGACACTCGG -CATTGAGGCGGATCGTCAACGTGCTAGATAAGGATCTGATTACAGCGTTTGAGGGAGGGA -TTTTCATTTCAGCCAGATAGTAATCGTTACAACGTATGTTGTACATTGTAAAGAAAGCCT -GGGACTCGATATATTTCTCCGTTGGATTGTCACATCATTCTCTGACCGCCAGTTGATTTT -TTTTTTTTTTCCATTCCTGAATTTTCCAGCGGGTTTCTCGGGAAAACACATTATTCCCAA -CAGAGTAATCCGCAGACAGTGAAATCCCAAACTCATAAAAACATCTCAGGGTCCCAGAGG -ACGCGTCATTATTATGAGCCGTGTCCGGAGTATGCTTCCTGCTTGGGAGCAAGAAAAGCG -GTCAGAGCCGTCAAGGCCCATCTAGAAGACAGGGCTTTAAAGAGGTCCACCCCTCCCGTG -TCTAGCAGGGGGCCCTTCCGGTGGACCTGTAGAGCCTATGGACTCGAACACTTTTTTACA -ATTCAATTTGACTCCAGTCTCAGGATCTGTCCATCCGGCACGCTCCGACTCAATATTATG -GCCAAGGTATACCTTCCTCAAAAAGCACATATTTTGGAACTACTGAACGCCATGCCAATG -TGTTATACGAATTGGGAGGAACCAAACAACCACATATCCACACAACACACAAACAGCTTG -GCGTTCACACTTTCGATCTCACATCCACAACCCCTCTTCACCCTGCACTCAAAAATCAGC -ACCGAAAACGCCACATTTGCCAACGCTGTTTGGCGTTCGTTGAAAGCGACGGAGTGCTGC -CGGTGAGATGCCAAGTCCACCAACACGGCATGGCGAAAAAAAAGGGGGGGGTTACCATCA -GACTAGAGTCCAGTTGTTGCATTGCTTATTTTTCTTTTATTGGAATTTGGAGTTGATTTG -GCGCGAGTTCGCGCCTCCCTAATTTCAAGGGCTCCATATGGGCCTTTTTTTTTTTTTGCC -AAAATGGGGTCAGTTGAAGCTCAAGGTTTCATGAATTTGGACGGTGTACGGAGTACTTGG -GTTTCCTTTTTACCCCTTTCACTATGCAGTGTGGACTTGATTTAGGTACCTGACTGCCCG -AGTGCTTTGTGTTTTGCTTTTTGGGGAATGTCCACCCCGACCGTCTAGACTCGCGGGACC -CGGACTACTGAGACCTTTCGTCTTAGTATTGATAGTTGAGATGTACAACATAGAGGGGAT -AAAGCTAGTGACTGATAGGTCTACATGGAGTAGGTCCTGTTGTATACAATTTATAGTCTT -GACTAACCTTGGCTCTTATACGGAGTAAACCAAACGATGTCGTTCCAGGGTTCGCACAAT -GTCCACCCACATTGTAGGACAAACCCAAAGGCTAAAGTTGAACAAAATGCTTGGATAAAG -TGCTTGGGGTAATTTGATAGAAACCTAATCGCGACTCCAATCTTAATCCGGTAACCCGAG -CCCCATCGACACTTCCTGGAAAGCCATTTCCATGTTCACTCCCATACTGAATTTGGTCAA -ATAATCTTTGTAATCTTACAAGGTGAGCGAATGAAGACCGAACCGCCGATGTTCGTCCAT -GGCCCACAAGTTGCTTAATTATCGGATTGATTCCACGGTTAGGTCTTTTCTGGAAAACCG -CGCAACCCCTTGGCGCCATGATTTAGTGGAATGCGGAGAAACCTGAGTCCAAAGGAAACT -TCCAACGTTGGGACTGCGTTTGGTTTTGGTTTTGGGTGGTCTTGAGCTACCCGGTTTGGA -AGATCTCCATGAGATCATGGTCGAACCATTAGAACTTCCCATATCCTGTCGGGAGTTCTG -ATGCTTGAATGCTTTTGTAGTTTACAATGCTGTACGGATGAAGGGACAAGATACTACAAT -GTTGTACACCTTTGTTTTTCTCCCCGGGCTAAACGCCAATCATCGATGGAGACTCTTATA -TTCAGTCGATATGATTGGATGGATTCAAGTTTATGTAATGTTTCTTCAGCCTCTTCCTCG -GCTTTCGCTCGACGATCTACCCCCCTCTCGTACCTTGTGATCAACCCTGTACTCCGTACG -CAAGCACGGAGGATCCGTTACAATAAAAAGCGACCTCCGGGGGTGACATCAATCCCCGAG -TCTCGGGGACCATAGTCGATATCGGAAATTTTTTTATTGCATATTCACACACTATTGAAT -ACCAAAGAAAGGATGAATGTTTGGTTCCACAGCACGAGGCATATGGGAATAATCCCAAAT -CAAAAGGTTAAGTTGTGACATATCCGATTCGGGCCTTCCACTCTACCGCGCTATTGCGGG -AGCCGCGCCCGCCAATATATCCAAAGCGGCCTTTTTTTCCACCCACCCCCATGGGTATTC -CGCCCCTCTGACGTCGATGTCCAGCTGTCCCAATCCCAGGGGGGAAAAAACATGGCTTGC -AGCCCGTGGCCCTTGTTATCACAAACGCAATACGAGGTTATGATCCCGTTTATGGTCTTC -CGGGTGCCTTAGGTGTGGACTTAAGGGTATTTTCTATGTGATATAGCTCGCATGGCCCGG -CTAATGGTAGCGTTGGAATCCCAACTTATTCCTTCTTCGTGCAGGAGGCGTTATTTTTTG -TGCTTCCGCTGGGCGCGACCTAATTGCACGCTCGTATTTTGTGTGCATTGGTAGTTATGC -GACCGAGGGAAAAGGGTCGGAAAACCTCCGCGGAATATACCCTATGCAGATGCAACATAG -AGTGTGTGGTCGTCGCTTTCGTGTGGCATTCTTATCCCGTGTGGAGTGTGCTCCATCGGC -ACTGTACATCCTCCACATGTGTATGGAGTACTCAGTCAACTTCTGTGCTATGTATATTCG -GTCTAATGTCTGAGATTGGAGCCCTCGACTTTAGATATCAAACGTCATCGCATTTTTTAG -TGAATTCCCTAATCAAATGCAATTAGAATGCAGTCAATAATGTTGGCTAGTGATTCGATA -CTCCCCTTCACTCGATGTAAGGTCGGGTTGACGCGCTAAACCCTCTCTGTGCCGTGTCAA -AGGCCCCTCCATGGCTTAAGCCATTTTCTATTTATTATTTTCACAATATATTTTCTGATC -CTGCAGCTTGCCAATCAGGAAGAGATCCTATGTTATCCCAGAGCCATTTTTCCTCGTCAA -TTTTTCGCATTGAAATGTATGAGCAGATCTTGAACTAACTGAAGAACTACTATATTCTCA -TATTAATCCCCACGGTAACGGGTATACCCATTGACACTGGGATCTGCCCCGCTATAATGA -AACAGCGCCTTCAGGGCAAACCTTGGTGAGTTTATTTCTTGGCAGTCCCtttttcttttt -tctttttcttttttctttttttttcttttttctttttatattccatcttttattttaatt -gaattttcctctttcCCTTAGACACGGCACAGTTGGACCATTCAGCGTTCATCACCGCTA -GTCTACAATTGAACATTCCCCTAGTCTACACACCCCCTTTGATTCTGAGGTCACCGACTG -CTTTTTGGGTCTCATGATTTTTCCTCCTTTTTACTTTTCCTTAGGTCAGAAATCCATTTC -GACTTTCTCTGACCTTTTTGTATGGTTCCTTCTAACTGCCGTTCCGATGCCGGAGAGTAA -TTTCCACATTCTTATTGAAATACTTATATACCGATTCTTTAGATCTTCCATTCCTCAGCC -TAGTTGATGCTTAAGCAATTAATATATTTTAGACCCTGATTGAAAGGGTAAATTGGATTT -ACTAGTATGCTTATGGTAAATATCAGTGATTTCACATCCGAGGAACTGGTTCCCGTTGAG -GGACCACTTTTGGTGGCCTCCAATTGGAGAAAGAGAAGAAAATAATATAATTAAAAAGGA -AGTGAAAATTAAAATAAAGAAAATATCTTAAAATGTGAAATCTAAAAAATCTCCAAAAAC -AAAAGAAAAATAAATACCGGTATTATAATTTCCAGCGATCTGCAATCCCCGACGGGACGT -CGTCAGTGGCACTTTGGAGTATGGGATCATGCCTCACACCTTATATTCCTTCATAAGCTT -CGCCGATCCCCTTCCCCAGACTGCCTTTCTCTTTTTTTCTCCTGATATAATTGATCATCG -GGTCTATTTCTTTTAAAACTCCGGATCAACACAGACGATCTCTCTCCAACCTTTTGGACT -CGATCTTCACTCTCTACACAGTTTCTACACGCGATCGATTCGCGACTCTACATCCCTCAT -GACGATGGTTATGGAAAACCAGAACCGCCCCTATGGCGGCATGAGCTTCGACAATGTTTA -TCATCACACTCCGCCACAATTCACGGACCCCTGGGCACATACTTCGTCGCACTCGACACC -TCCAGTCTATGCGACTTCCATGGGCAATGGCGCCAGCATGGCCCTCGCTCAGGTGAAGCA -AGAGGAAGTGAACCGCACGGGCATGTCTATGCCATACCCCAACATTCCCGTGTCCGCCCC -GTCAATGGTGGCTGGGACCTCCTACGCGACCGCCAGCTATGGCCCAGAAGTGATGGCCAT -GCAACACGAGGTGCCGCGCACAACTTTCGACCAGGCTCCCTCTTACACTACCGCTCCCCC -AATGAGCAACTTCGCGCCTTCTAGCTACGCTTACGCCCCCATTCACCCTTCTCCACAAGA -CAACCGTCGTATCTCGCACTCGTAAGTCAAAAATTCCGAGCCCGCTTTGTTTACCTCGGA -ATCGCAAAGAAAACAAACTTACATAAATTCACAGAGATGCCGCTCGCGTTAGCGCTTCAG -CTTCCGCCCCCACTTTCGGTGATGCGCTCGACGCCAGCCGCGGAATGGTCGCTCTCAGCC -AGGACCTGACTCCCCGCAACATCTACGGTCCTGGCCATCGCGGCGCGCGAGGCTCAGCCG -ATTCCTACGGTTTCCCCTCCACCCAATCTTCTGCCTCATCTATCTCCTCCGGCAGCAACT -ACCCCTACTACAGCGCCTCGGTGGGCTCCGTCGACTCCTCCGTGACAGACTACAGCTCCA -CAACTTCCGAGTCATACGAATCCCGCACTCTCCCCCGGCCGACCAGCCTTTTGGCTGGCA -GTGCGCCCCCTGGCCCGCAGTCGATGATGAGCCAATTCAGCTCCAAGATGCCCTCCAATA -CACAGAAGAAGCACAAGTGCAAGGTCTGCGATAAGCGGTTCACGCGTCCGTCTTCTCTGC -AGACGCATATGTACAGCCATACCGGTGAAAAGCGTAAGTTCTTACTCTTTTCGGAAGTCA -ACTCATTTAAAATTTGGGTTCTAACATTTTCCAGCATTCGCGTGTGATGTTGAAGGCTGC -GGGCGGCACTTCTCCGTTGTCTCAAACCTGCGTCGACACAAGAAGGTCCACAAGGGCGAG -AAGGACACCGGCTCTGGCGATGATGACGAGTAACCTCTGCCAATCTCCCATACGACTACG -TTCTTCCATTCGGGGAAATCCCGTGGACTCTCTCACCTACGCCCTCCGCGCGCATAGCGC -TAAATCCTCCACCCGCCACAAACCTGTTCTCCCTATGCCACGAGCCAATTCATGCACCAA -CTGTGTTGGCATGTTTTTTTTTTTCCATGGCTCGGGTTCTCCTTGTACAACAGCATGGAT -CGCCTTTTCATCACCTTCTATATCAACTTCCGAGAAACATTTCCTAATCGGAGAGATGAA -CGTTCCTACATATCATACTCCTTTCAAAACGGTCTGTGATTTATGGGTATAATGGGACCT -GTCCCGAGGGCCACAGGCACGAGAGAGCCTTTTCTTCCGCCCTTTTTTTCTTTTCACATT -TCCATACTTTGTTTCACCCACTTTACGACCATAGAGGTTTCCGGCTGGATGCGTTGCTTT -CCATTTCATTTCTTATCCTTTCGCTAGACCTGGGCGTCACCTTTCTGCTTTTTCTCGATA -CCATGGGGCTTGTGATGATTCGATGTTTCTTATTTTCTCTGAAGGCAATACCCTGACTTC -TTTTTTTCGCTTCTTAATCTTCCATGTCCTGCATTCTGCATCGCTGAAAGCATTCACATT -GTCATCTTTTCACCCAGTCGCCCTTTCCATCAAGAACGTGCACATTTGGGTTGGTGTTGG -TGATGATACCCATAGGAAAGCATGGTTCCTATTGTTTTGATACTGGCTGGACCTGGCTTG -GTTCAAGATTCACACAGTCGCTACTAGATATATTGTCTAATCAATGAAAGACATTCTGAA -CAACATCTTATAGTTCTTCGTAGTTACATGTACCTTCTCTATAGTCTAATCCCTTGACAT -GAACATCACAACCGTTCCAGTCTTCCGCTCTTCCCAGCTCGCTCTGCTGCTGCTCGACCA -GCACGACGCCCGAAGACCGTCCCAGAAGTCAATCCACTTCCACCAGGGTAATTATCATAG -AAGATCCCTCCAAGCATCTCACCCACACAGTACAATCCTGGAACCTCCTTACTCGTCGTC -TCCGAAATGACTGCGGCAGTCTCCGGATCCACAGCCAATCCACCAAAAGTGAACGTGATG -CCCGCCGTGACCTTGACAGCCAGGAAAGGCGGCTTGTCAATAGGCAAAGCCCAATTCGAC -TTAGCAACAGCCAGCCCCTTAGTACCAAGCCCATCCTTAACAGCAGGATCCCACCTCTTG -TGCCCATCCTCACCCTCAACAGAAGCATTATACTCCCTCACAGTCTCAACAAACCGCCCA -GGATCAACAAGTCCAACCTCCGCACACTTCTCCGCAAGTTCCTCCAACGACCCTCCTTCA -ATCCGCCGCACAACCTCACCCCGGTACTCCTCCTCCCGCAACCACGCCACAGTCCGCGCA -TCCCAAACCTGGAACGCAACCTGCCCAGGCTGCTGAAGCACCCGTCGCCCAATCATCGCA -TACGTATAGTTCCGCATATCAAACCCTTCATCAACAAACCGCTCCCCGTCCCGATTGACC -ATAATCCCCAGTGGATAACCACTCTTCGTAAACTCATTCGAAACCTCCCGATTCCCCGAA -TCAGCAGGCGCATCCGCATCCCACGCCACACTATGACATCCACTCCAATTTCCCGCCGTC -TTGGCCGCCACATCCCGCTGCGCAATCCGCAACATCTCACCCGTATTATATGGCGTTCCA -CGCACGCGCGCACTATCCCACCCAGGCCCAAGCCACTGCGCCCGCAACTGCGGATTCGCC -TCGAACCCACCCGCAGCGAGAATGACTGCGCCAGCAGCGATGGTGGTTTGGCTGCGTTTA -CCAGCATCGCAGCGGACGACTTCAACGCCGATAACAGCGCCGCTGGTTGGATCGGTTACT -AGTGCCGTGGCAGTTGTGTCGAAGAAGACATCGACGCCGGCGTTTCTGACCGCGCGCAGC -TCATCCTCGACGAGTCCTTTGCCTCCGTCTTGGGTTTTCAGCGCAAGGCCGCCCCAGAAT -TTAATTCGACCGCTGACTTCGTAGGCTTGGCGGTTGAAGGACAGCTGGAAGCGGACTCCG -TTGGCGGCTAGCCAGCCGATGGCGGCGCGGGAGTCGCGCACTAGAGCTGCTGATAGAGCG -GGGTCGGTGCGGTTTCCTGTCATGCGCTGAAGGTCGGCTGTGAAGTCTTGTTCTGTGTAG -ACTGGCATGTCGATGCGGGATGCTTGTACTGCGTCGGTGTTGTTGACTAATGAGAGGAGG -TCATGGAGACCGTTGTGTGTTGTGCGGAAGGCGCCGGCTGTGAAGTAGCTGTTGCCTCCG -GCCCAGGAGATGGGGCTTTTGTCGATTACGGTTACTCTTAGGGCTGGGTTTGTTTGCGCA -GCTGAAAGGGCGGCGCTGAGGCCGGCGTTGCCGCTGCCTATCACGAGGATGTCGGTTTTC -TGGGGGACAAACATGGTAATATTTGGGAAGTTAGGAACTGGGGAGGTAATGGCTTGTGGA -AGTCAGTGACTTGTTCTTTGGGTGGAGGTGTGGTGGTTTTATGCTATGTTACACTTAGGG -CTCTGCGGAGGTTCATTCTCCGGGTGCTTCCCCCACACTCCCCACAGTCGGGATGTCGAT -CTCCGTGGGATTTTGATGAGACTTGAATAGTTGGATGTCAAGAGGTATGTCGTAGGTTTC -CCAGGGTTATTGGGTAATAGCAAAAAGTTACCCAGTTCGCGTCGCATACCTAAAGGGGTT -TGAGCATCACAATGTGCAGTCTAATTCCCTATGGTTGAGGTCAATGGAGTTCTTATGATG -TGCTCGGAATAAATCGCAATGAAATTATGATAATTGATATTCAACCAATATCCTAGTCGC -CAGTTCTGATATGACCACGGCAGACAATGCAGAACCAGCCCCAACGCCAAATTGTGATCC -AATGTCCAACGGACAGAGCGCACGTAGAAAATTACTGAGCTCGACAGCACCAGGCATACC -GCAGCCCCATTGATCTCGAACAAGATATCTAACGGATCCGGTCCAGCTTGGCCCGGTTCA -TGGCAATCTGGTCGTCGACGTAATCGCTCTGCAGCAAGTTAGTTTCATCTTCATACCCAT -ACCATACTGCAGGGCTAGTAGAATACTCACCTTGCCCATGATACGCTCCAGATGTCTGTT -CTGCTCGTCCAGCTCCTTGCCGGTGGCCTTGGCAAGACCGTTCAACCGACCAGTAGCACC -ACTGAGCAGATCCAAGTTCTGCTCGATTTCGTCTTCCAATGCCTCGTCCTCGCTATCCGC -CTCGAATTGGTACTTGGCACGCTCAGTGACACTCGCCTTCTTGCCCTTCCCTTGTTTCGC -CGCATCGCGATCGATATCACGGAACACCCGGTCCATACGCTGGTTGGTCTGGTGTGCCTC -GGCCCGTGTGCCGTCGCGGGTATCACGATCACCACGGTGTGTACTAAGGATCTTCTCATC -GCGCTCGCGTCGACGCCGCGAGGCTGTGAATGGGTTCGCTACATGCACGGCGAACATGCT -CTTGTTGAGTGTCTTGAGCTCCTTTGCCTTATCCTCCGCGATGCGACCTTCGATAGCCGT -CATGTCCAGACTCTTCTCCGTTCCGTGGATAGACTCGCCCTGCGCACCGAGTCGGGCCAG -AGTAGAGCGACCGGTTTCTTCAGCCTGTGCGGCAGCTCGAAGTGCGTTCCGTGTTGACGC -AACATCACCCTGCTTCACGAAGCGCATCTCGTTCTTCGTCGCCTGAActtcctcttcttc -ctcttcttcAGCGGTCAGCTGTCGATCCTGGAAAGTCGCCATGCTGTATTCGTTGCTGCC -GCCACCATAGTTGCTCTGTCCTTGACCACCTTGACCCTCGGAGTACGGTGGTGGCGAAGC -ACCCGAGTTGTCCGGCGACCTCCGGTTGGAATTGCCGAACAGCTCGTTGCGAGCGGAGTC -TGCAGCCGCGGGGTCATTGGGATCGTTGCTGCCCAGACCTCCGTAGCCGCCGTTTCCATA -TCGCGAGCCCGAGCCCGAACCGTTGGCTGAGGAGCCGGCGCCTGCGCCTGATCCGCCGAA -ACGATCACTGCCGTAGCCGCCTTGGTTTCCGTATTTGTTGGCGGCGTACTTGTTGTCCGA -GGGAATGGAGTGGGGGCTCTGCGACGCGGGGCCACCGTTGGCGGCTGGGTGGTCGGCAGG -TAAGGGGGCAACACCCGCGCCAATCTTGGCTTTTGTGTAAGGATCTACCGGAATTGGTTG -GGCGTAAGGGTTGGAGGGGGTTGATGGGCTCTTGTCCTTCTTTCGAGAACCGAACAGTGC -TCGGCGGTTAGAGTCATCGTCGCCATCGGATTTCTTGAAAAGACCCATTATGTGATGGGG -GTTTTGTAGGGTTGTCAAGATTTGCTGTATGAAATTAAAATGATCGAGTGTGAAGTGAAG -TACAAGAAAGTAAGGAGAAATAGAATGACATGAGACAAGAAAGAATTAATGTGAGACGCC -TTCAGGGGAGCTAGATGTTCAAGATCGCCGCCTGATGGTAATATATATACGAAGAGCATA -CCAAAGAAACCCACAATATTAATCGTGCACAGTGCAGCGATAACCATACCAATACCAAAC -TTACGAAGTTACATCAGCAGTGGCAGTGCTAACCCAATGCGGGCGCAGTGGCACTTCAAC -GTCTCTGTTTATTTTTGGCCACATCATTGCCCCTGCTGCCTGGATGAAATGGCTCCTTTA -TCGGGGTCTGATGCGGACTACTTTAGGGATAAGGCGCGGAGAGATCTTCTGACTTTACTA -GAAGGCGTGAGTACAGCTCCTTGATATTCTGATCTGCGCTAACTATTCTCCAGGCCCGGG -GTAAGAAGAACCTCGTCATAAGCCAGGAGCTGGCCGGCCCAGTGGGGCTATTCGTCAAAT -TCTCCGTGCTTCAGGAGTATGGTGTGGACCGGGTCTTCTTGTTGGAAAACGCAAACATCG -ACTCATCACAACGCAATGTTGTATTCCTTGTACGTGGCGAGCAAGCTCGCCACGTTCGAA -TCGTGGCAGGTATGACATTGTTTCTTTTTTGCCCCTTTTCAGCTTGCTAGACTGTATATT -GGATCTTTTCATTTGGTCCATGATGGGCCATCATTAGATTGCACAGAGACCTTCCTACTC -TATGTTATCTGGAGAAACGTTCCCTATATTGATATCTATCTTTACAACACTTTACAAGCT -CGTTTCATTAGGGCCCTTTCCTGCTATCTCTGATTGAGTGCCATCAGGATGAGCTGACAT -CTCTATCTTAGAACAAATCAAACGCCTGCAGAATAATGGGAATGTTGAACATGAGTTCTC -TATCTTCTTTGCCCCACGACGGACCCTTGTAAGCAATGCGGTCTTGGAGGAGGCAGGAAT -CATCGGCGACGTAAACATCGCCGAGCTTCCTCTCTATTTTATGCCTCTAGAGCAAGATAT -TTTGTCACTAGAACTGGATGACTCATTTGAAGACTTGTACTTGGTAGGCTGATAGGTTGC -TGGAGTTTGAGTGTGACATTCACTGACCTTGCTATCCAGCACAAAGATCCCGGGTGTATC -TTCCATGCCGCTAAGGCTCTCATGGGTGTTCAGCAGCGCCATGGCTATTTCCCACGTATT -GTCGGAAAAGGCGACAATGCTAGGCGCCTTGCGGATCTCCTACTGCGTATGAGAAAAGAA -CTTGACACAGAGGAGAGTTCCGGATTAACTGATCCCTCTGCCCGAGGGCTGTTGCCAAGT -GCGAGTGTAGAGAACTTGATCATCATAGACCGTGAGGTGGACTATGGGACTCCCCTTCTC -ACACAGCTCACATATGAAGGCTTAATTGATGAATATTTCGGCATAAAGAACAACCAAGCA -GATGTTGATACTAGCATTGTCGGTCTTGCTCCCACGCCACAGCCCCAAGAGTCATCCAAG -ACACCCCAGCAAGCCAAGCAGGGGCTGAAAAGAAAGATACAGCTGGATGCCTCGGATCAG -TTGTTCAGTCAATTGCGAGATGCAAACTTTGCAATCGTGGGCGATATCCTGAATAAGGTG -GCACGCCGCTTAGAAAATGAGTACGAGAGCCGCCACACGGCTAAAACGACCGGCGAGCTA -CGAGAATTTGTCAACCGATTACCAACCTACCAGCTGGAACATCAGAGTCTTCGAACTCAT -ACCAACCTCGCAGAGGAAATAATGCGACTCACCCGCTCCGAAACCTTCCGCAAAACTTTA -GAAGTTCAACAGAATAACGCAGCCGGTGCCGACTCCACATACCAGCACGAGACAATCGAA -GAGCTTATAGCTCGAGATGTCCCACTGAAGACTGTGCTACGGCTCCTGTGCCTGGAGTCA -TGCATGTCTGGGGGTCTTCGCCCCCGCGACCTAGAGAATTTCAAGAGAGAGGTCGTCCAA -GCTTACGGCCACCAGCACCTCCTCACGTTCTGGGCGCTAGAGAAAATGGAACTTCTTCAG -CCTAGGTCATCAGCGACGACTATGCTCCTCCCTACATCCGGCGCGCAAGCAGGATCAAAG -ACAAACTACGGATACCTGCGCAAGAACCTCCGTCTTGTCATCGAGGAGGTCAGCGAAAAG -GATCCGAATGATATTTCCTACGTGTACAGTGGCTTCGCGCCCCTCAGCGTCCGGCTGGTT -CAATGTGTGTTACAAAAGTCCTATATGATCTCGCTGATCAAGGGAGGTACGCCGGCGGCC -TCGTTGAACACCACTAATACAACGTCGCCTGGCTGGCTTGGATTTGAGGACGTTGTCAAA -AGTGCCCGCGGTGCGACTTTCAGCATAGTCCAGAAGGGTGATGACAAGGCTGTGCGGGCT -CGTCAAACGCTGAGTGGTAACTCTGCTACTAAGACTGTGTATGTCTTTTTCTTGGGTGGT -ATCACATTCACGGAGATCGCCGCGTTGCGCTTCATTGCTGCGCAGGAGGCCCCACGGAGA -AAGATCATTATTTGTACCACGAGCATCATTAGCGGTGACCGGATGATGGAGGCAGCTATC -GAGAAGGGTAGCTTTGCCAAGGGGGACTAAATGGTTTAATCTATGCCTAAGTGACCATAA -TAATACCTACTTCTGTCATTTTCGGAAGAATGAGAGGATCGCATAGTCTTCCTTATGTAA -AGACCAAGGCGCACCATGGCTTTTTGAATCACCCGGCGTGGAATATTCACAGTGTCCACA -CATGTAGTTCATCTGCTTCGATTCTAGGTCTTAATCTCCTACCACTAGTCTAGGAGTAGG -ATACTTCGAACTTCCAGGTACgggagaaagaagaaggaagaagaaagaagaacaaggaag -aCAAGCAAACATAAAGAGGCATATCCCGGATATTGTTTTAATAGCTGAATCCAGAATGAA -CACCCATCAAGCGACCTTGATACCGCTCAATGACCTCTCCAAGCCGCACCATAACACCAG -CAGCAAGCGTACGTATCTGCTCCTGCGGGTCTGCAACCGCTGAACTTGGATCAGCCTGTG -CAGCTGAAGGCGGCGTCCGGTCAAAGACCTGGGAAGCCCACTCGCGCAACTCGATCACCT -GCGTGGCGTACTCCGTGACAAGACGCTGCTCGCCATTTGAGCCAGACGAAATATTAAGAT -CCACTATAGCGAGGAAGAGGGACAGGAGCGCGGCAGTGACGGTGGGTTCGCTGGATGTGG -AGGATGTGTGTAGTGATAAGAGGAGGGAGAGAGTTTCGTGGGTGAGACCTGGGAGGATGG -GGGTGTGGGGACCTGCTGTGGAAAGGATTAGGGTGAGAGTTTGGAGGAAGAGTGTTAAGA -GATGGGGGATGAAAAATGGGTTGTATGAGGCGGAGGAGGACCTAATTGGTATCAGTGAAT -ATATATGACTAGAATCGAGATGAGTTCATAAGACATACGACGAGAATTGCAGCATCATTA -TCTCGAATCTACCCTGCAGTGGGAAAAAGAAACCCTCGGCAAGCACCTTGTGCAGATCCT -TGATTGTGGAGCTTTGTCGCTTGGCATCACGCTGCTTGCGTTTCTTCTCGACTTCCATAC -GTGATGAGAAGGTTCGCACTTTGAGAGCATTGGGGCCAGCCATCGTATCTGCTGCGTTGG -CTGCGAGCGGTTCCAAAGATGCGCGGGACATTTTATGGGTGAGCGTTGCGATGGGAGACG -AATTTTCCCCCGAGTACATCGCCTCTAGGGCAGGCGAGAGCCGTTTGGATGGGAATGAGG -TATCGCCAGTAGTTGGGAGACGCAGTGTTTTGGCGTCATCTTCTCCATTCCCGGCTAACT -CGCGAGCACTCAGACCTAGGGCTGTAAGAACAGCGGATCTCTGGACTTGAGAGAGGTCAC -CATCGAAGTAAATGGCAGTGAACCAGCGGCCCATCTTGAAGGGCTCAGACACAATCAATG -CAATCATGCTTTGTAGTCGAGATTCATGGAATGATGGATGATTGTTATCATTTTGAAGAC -CTACGACAGTTAGAGCGAGCTCCTCGATCTGCTCGGCAAGTTCAGTTCCGAAGCCTATCT -TCCGTCGAATTAAGGATGGGGCAGTGGAGATGGCGAGATGATACCGCTCGACATTCTCAG -TGTCTCGAAGAGATGGTATCAGATCACGGATATATCTTACGCTGGTTAGCGAAATTGATA -TCAACAGGCAATTTGTACTTACACCGGCGCTACTGGTTTATTCCGTTGGATCAACGTGGG -ATCATCATCAGAATCGTGTGCGTCGTCGTCCGGCTTTTCATACGGAATGAGatcgtcatc -gtcatcctcattttcatcttcgttttcgtcactatcgtcGACTTCTTCAATAGCAACTAT -TTTGCTTGTATGATGGCGAGAAAATTGTGCTTTGGGCTTTTGTGCAGGCTTGGAGTGGGC -CTCTGTTTTTGCTGGCAATGTCTCTTGCAATGTTTTGATGGTGTCAGGCGAACCGACCGT -ATCTTGGACCTTTGTCAAACCCAGATACCGCAGCGCTTTCTCGCTTCGCATCTCCTCTAG -GTCGAATTTTAGGACTTTTTCTGGTTGCTCAATGAGTTCAGATATTCCTGTTCCAACGAT -CATGCCAAGAAATCGTGCTCTGTTAGAGGACGCAGCAAGTCGATTGGAGACTGCATTGAG -GTATGCTCCTGACCGGCCTACTTCTTTCACCTGCCCTTGATCTAGCCGATACAGGTGGCC -AGCGGCAAGAAGAATCACTTGTGCAGTTGCTGTTCGATGTTAGAGGCTGATATTCTTGGC -ACAAAGGACCGTTGACTGTTGACAAACCATTCTGCACCACATTTGGGACATGTTTGATAG -AGAACTTGTCTCCGAATTGCATAAGGGTTCGTATAAGCAATGACTTCAACACATCTGTTG -CAGGTCGTTAGTTTCGTTGTCTTTCTTCAGCAAAGCAAACTTACCACCACGGTTACTGAA -AGTGGCCAACAATGCACGGCGCAGTCCCAGCGTGTTTATCGAGCCACCTTGCGATTTTGA -GAGCCATTCCAGGACTTGGCTTTCCAAGACCGAATGAGAAGGAATGGCAGCAGAAATCAG -AGCTGCCACCCCTTCAATGGACTCTGAAGGAGTGGATGATGGGCTCACTGTCCCCGATAG -ATCATCTGAGAATTGCTTTCGTTGTATTGCACGGAATATTGCTTCTACAACTGCAAGCTG -CTCCGTCCGTCTGAGGTGGTTTAATAATATTTCAAAGCATGCCGGCGAGGACTGTCCAAG -AAGCAGACCCGAGTAGATTTCTTTGACAACTTGATCTATGCGATGTTAGCATAGGGCTCA -AATAGAAAGGGATAAATCGCTGTATATTCACCTGTGTATCCAAGGCTTAAGGCTCGCCCT -GTCAGCAAAGCAACTGCGGCCCAGTCGCTTTCGCTATCTGGGGCTAGTCTTGCTGCCAGA -TGCGAAACATTGCGCCCAAGCCACGATGCATATTGAGAGCCATTCCCGACCCAGGACACG -GAAGATACTGATCTTGACTCATCAACAACAGTAAGAGCTTCAGCTACTGTCGATAGAATC -TTCCCGGCAGCAACGAGCGACACAAGTTCTCTCCATGTAACTTGCTGCCGAGTTTTGTTG -TCATATATGACCGAAATGTCAGAAAAAACACGGAGCAAAAAGTCTGCTGGCTCTAGCAAA -GCAGCAAGCACAGCTAGAAGATCCCGGATACCGAGTTGGCTGCTTGAACCTTGGGCTTGC -TGGGCAGAGGCGCGGGCCGCATTGATGAGAGATCGTAGCTGAGCCACCAAGGAACCGAGA -CCCGCGACGCTGCTAAGACATCTCAATAACGCTGCTCGAGCTTTGGCATCCTTTCCTTTG -CTATCTCTTGCATCTAGCGATGCCCAATGATCTGGAATCGTGGTGCTGACTAATAATTGG -AGAATCTGGGCTGTGATAGGACTCGCAATCCGGAGATCGAAGTCTTTTGTAGCTTTGGAT -TTGCTTAAGGGATCAAGGGCTCCCAAAATGGCACACAGCTGTTCACGATCGGGACTGGAC -TTGAGTAGATCTATGATGTGATGTGATGATAGATGTTCAGGATCGAGGTTGGTTCTTGCA -GGCGGATCAGCACGCAGAGAGTCGACAGTAGATGTAACAAGGGCGTCAGAATCCCGCTTG -ACAGTTTTTACTGCCGTCAGAAGGCCCTCCATCTTCGATTTGAATATCACTTCCGGAGGA -AAACAGATGTTATGCCTGAGGTGAAGATTACAGTACCCAGGCGGGCCTGAGATTAGTCAG -CCATACCATAGTAACCTTCCTCAAGGCAGCCCACGCGCAGGTGATGTTGTCAACTCAGCC -TCTTCATTACCAGTGACTTGAATACTTTAGTGATCCGATTAAAAGCTGAAGTGGATCTTT -ATCTGAGACCATTCCCAACAAGTTTATTCTGTCAATCACACATCGCAACGAGCTTTGGCC -CGTGTAACGCGAATTGATACTGCGCCTATCTACTCTCATCGGTTGCCCATAACAGGATTC -CTATCAATATGGATGTGGATGTACAAACTCCAGATGCCCCAGAACCTCCGATTTATGTGG -ATCAACAGGGCCGATGGTTCGTTGAAGGACCCTTGAAACGTCTGATCTGCTCTAATACCC -CACAGGAAGCACCTCTACAACGTCCTAAGTAAACGAGGTCCTTTCACAGACGAGGACTGG -AACCCTGGTCCCGAGCCTATCAATGCGCTCGAATCTGCCAAAATATTGTAAGTTCATTCA -CCTCGGAGAGATTACTCTTGTTAACATGAACCCTAGGGTGATGTTCGTGAACCTCGCAAA -GTTGTGATATTCCGTTGAGATCAAACTGACTCAAATCTAGCGGAGCTGGTGGACTAGGAT -GTGAGATCTTAAAGAATCTCGCGCTGTCAGGATTCAAAGATATTCATGTCATTGACATGG -GTGTGTGTTTCAACTCATAATGCCAAGACCAGAAGCTCACAGTCGACAGACACCATTGAT -ATATCAAATTTGAACCGGCAGTTTTTGTTCCGCCAGGCAGATATTGGAAAGCCCAAAGCT -GAAGTCGCTGCCGCTTTTGTGCAAAAGCGAGTTAAGGGAGTGAAGATCACACCCTACGTC -GGCAAAATCCAGGATAAAGATGAGGAGTACTACATGCAGTTTAATATTGTCGTTTGTGGC -CTCGACAGCATTGAAGCTCGACGCTGGATCAATTCCACGTTGATCTCGATGGTCGATGAA -GACGATCCTTCGAGCCTGAAGCCACTTGTTGATGGTGGAACTGAAGGTACCTACCTGCAT -CGAAACCTGATGGAGAATATGTTGCTGATATATCAAACAGGTTTCAAGGGCCAAGCCCGT -GTTATTCTTCCATCTATCTCATCCTGCATTGAGTGCCAGCTGGATATGCACGCACCTCGT -CCAGCAGTTCCTCTCTGCACCATTGCTACCATCCCGCGCCAACCTCAGCACTGCATTGAG -TGGGCGCACCAGATTGCATGGCAAGAGAAGCGCAAAGATGACACATTCGACAGCGATGAC -ATGGAACACATATCATGGATATACAATGCCGCTTACGAACGTGCCCAGCATTTCCATATC -CATGGCGTTACCTTCCAGATGACCCAGGGTGTGGTGAAAAATATCATTCCGGCTATCGCA -TCAACGAACGCAGTCATTGCGGCCTCCACAACTTCAGAAGTACTCAAGATTGCCACTGGC -TGCAATCCATTCCTAAGCAACTACATGATGTACGCTGGTGAGGAAGGAGTGTACACATAC -ACCTTCGAGGCGGAGAAGAAGTCTGACTGCCCCGTGTGCGGAGAACTTGCCCGGAAGTTG -GATGTGGATCCCAATATGACATTGGGCGAGTTCATCGAGAGCCTTGGGGAAAGGGCCGAG -GCTCAATTGAAGAAACCTAGTATGCGGACCGAAGAGAAGACCCTCTACCAGCGCTTCCCG -CCTCAGCTAGAGGAAATGACTCGCCCTCACCTGGTGAAAAAACTGGCAGATTTGATCGAG -GACGGCGAGGAAGTTGCTGTCAGTGACCCAGCCTACACCACTACATTCCGCTTCCGTCTT -CACTTCAAATAAAGGTAACCTGCCCTACGTTTCCATGTGAAAATTACGAGAAATCAGATA -TCTTATGACCGGGATGATTAGCCTTAGTTAACTGTGTAAAATTCAAAAGATAGCTGTTGG -GCTCTTTTCCAAAACAATTAACACATCCAGACTCCAGCCTCGTGAGGCGAAGCCTTGATT -GACTTTTCGTAAATGTACAGCGCAAAGGCAGCCAAAAAAATCCAACGCCAAAGAATGCAT -ATAATAAGGTGTAGAGAGAAAGGACCCATTGATCAACCCATCAGATCGGTCAGACCCCTC -TTCCACCGGTTGCACTTCAGCTTCATAACCTTAACAGCCTCCAAGAAAAGCATGTCACTC -TCAAATTGACCGGTGCTCTCCACGTTGAAGATGAAGTGATCCCGAACACGTCCCAGCTTG -ACCTTATCCTTGAACTCATCGTGGCGCAAGCACTCTCTGCTGACAGTGTCATTGAACGCA -TCCTTCACGACGGCCTTCTGCTCGCCGGTGTGGCCCTCATATCCACTTCCACGCTGCGCG -GCCTCTGCAGAGGTGACAGGCTCAAGACCAATCACACCCTTGGGGAAGCACTTGGCAAAC -TTCTTTGCATCGTCGCCAATAATAGGATGCACGATCTTAATGTCAGGAAGAAGACGGTAT -GAAGCGGTAGCCACAGGTGAGAACTTGGCGTGATCGGCACCGATGCCCTTCTGGCAGTGG -AGTTCCATCTCGATCTTCTGGCCAGGACGGAGTTTGGCGATTAGAATGTCCGGGTTTACA -GGTGCAATAGCACCGTCGCCAACGAAGAACTGTTCCTGACGACCAACGGGCATGTACATG -AGGTCCTTGGCGTAGACATGGGCATTGGTGTACAGCTTGCGGGGGTCCTGCTCATCCTTG -TCTGCGTACTCGTTCTTGTAACATTCCACGTCAAGATGAAGCACAATAGTGTTGAAGTCG -CAAGGGCCATCTTCTTCCTCGGATCCATCTTCAGACGCCTTCTTGTACCAGTGCATCCAG -TTGATTCCTTCAACAGAGCCCTTGAGCGGGATCAGGCCCAATCGTTGTGCGAGCACTTCA -TCTTGAATGACAGAAGTGTTGTTGTGAATGAAGCAGTCCTCAATGGCGATGGTGGGAATC -TCAGCCATAAGGATACGGCGGAAAGCGTTGGCGACTGCGGCGTCGAGACCGATCAGCGAG -AAGGAGGCCTCGAACCGGTCATTGCGGTGGAATTCGACTTGGAAATTCTGTTGGATGTAA -ATGGTCAGGATTGACTGAATACAGCAGATGATACCTTCCTGGTAATATTTCAGCACATAC -GTTCTTGAATTGGTCGAGTGCCCATCCATGCGACTCTCCGGGCCAATGGCCAGGAAAATC -TGTAGAGGGGATATTGGTGACGGTTTCCGGATTGATGCCGATGATCTGGAACGATTAGTA -TTCTGAGCTGCATGAAATCATTGCTAGAGACGTACACGGCGGCGCTGCAGCTCCTCCTGC -GAGGCAACAAGAGGCGCCATGGTGAGCAACTGTATAGGGTCTGGTGGAGGTTGAAGGTAA -AATCCGCGATCCACCGATAAGAGAGATTTCTCGAAGCAATTTTCTAGTTTGCCTGAGGCA -GCAATAGGGACTTTATTCTGCGCAGCGGATGTAGCGTTACGTAATCCATAAGTGTACATT -CATGCGATTAATGCATAACACTAAGCTGATAATGGCCCAGTGTGGGTCCAGTACCAGCAC -AGTATAGTGGCCCCTTCAAGCCAACCAGCGCACGGCTGATGACCCGGCAGCTCTTCACGT -GACGAGCCAAAATGCGACTCCCAACTTGAGACCTTCAGCGACTTCCCTATCCGTAGAGAG -CCATATCCACCAATTGAATGCGCCTGGATTGAGGATCATATCCCCTCTTTTCCCTCTTGG -ATCTCCAATTCTTCATCCTCTATGGCCGCTTCTTTCGCCCGCCTGGCGGGCAATGCGCCC -AAAAGGCTCTGCCTCCGCCCCTCCTCGCTCAACACATCCATTCCCCGTTCCCGCTCCATA -GCCACCTCTGTTCCTCGCCGTCATGCAGAGGCTACTAGCTACCAGGCCACACGACTTGTG -CCGACCGACCCGACCTTCACTTCCCTCGCCAACAAGGGTCTTCCCGAGGACCCCGAGGCT -TCTGCCCTTGAATCCGAGGCCGAAGGGATCGACCGCAAGATTCGTCATTACACCGTCAAC -TTTGGTCCTCAGCATCCCGCTGCTCACGGTGTGCTCCGATTGATTCTGGAACTCAATGGC -GAAGAGATCGTTCGCTCCGATCCTCACGTTGGTTTGCTTCACCGTGGTACCGAGAAGTTG -ATTGAGTACAAAACCTACATGCAGGCTCTGCCTTACTTTGATCGATTGGACTATGTCTCC -ATGATGACCAATGAGCAGTGTTTCTCTTTGGCTGTTGAGAAGCTGTTGAACGTCGAGATC -CCTGACCGTGCCAAGTGGATTCGTACCTTGTTCGGCGAGATGACTCGAGTTCTAAACCAC -CTGATGTCAGTCCTGTCCCACGCAATGGACGTGGGTGCCCTGACTCCTTTCCTGTGGGGT -TTCGAGGAGCGTGAGAAGCTTATGGTGAGTGATTTGGAGCTGCGAAAAGCAAACTACGGT -TAGGTCTTATTTACTGACACTTTGGGTGCTTTAGGAATTCTACGAACGTGTCTCCGGTGC -CCGTCTCCACGCTGCCTATGTTCGCCCCGGTGGTGTGTCCCAGGATATTCCCATTGGTCT -TTTGGACGACATCTACCAGTGGGCGACTCAATTCGGTGACCGCATTGACGAGACCGAGGA -GCTGCTCACGGATAACCGTATCTGGAAGGCCAGAACCCAGGGCGTCGGTGTCGTTAACGC -CACCGATGCGCTCAACATGAGTTTCACCGGTGTCATGCTCCGTGGCTCAGGTGTCCCCTA -TGATATCCGCAAGTCGCAGCCGTACGATGCCTATGACCAGGTCGAGTTCGATGTCCCCGT -CGGTGTCAACGGTGACTGCTATGATCGTTACCTCTGCCGTATGGAGGAGTTCCGTCAGTC -TCTCCGTATCATCCACCAGTGCCTGAACAAGATGCCCGCCGGTCCGGTCCGCGTGGAGGA -CTACAAGATCTCACCCCCTCCCCGTGCCGCCATGAAGGAGAACATGGAAGCTCTCATTCA -CCACTTCCTTCTCTTCACCAAGGGTTACTCCGTTCCCCCTGGTGAGACCTACTCTGCCAT -CGAGGCCCCCAAGGGTGAAATGGGTGTCTTCCTGGTCAGTGATGGCAGTGAGCGGCCATA -CCGCTGCAAGATCCGTGCTCCTGGTTTCGCCCACTTGGGTGGCTTTGATCAGGTGTCTCG -TGGTCACCTGCTCGCCGATGCTGTCGCAATTATTGGTAAGTCTTTGTTCCCTTGTCTTCT -TTCATTTCAGTTCTAACATCGGATCTTTAGGTACTATGGATCTTGTGTTCGGTGAAGTTG -ACCGGTAAACGATATTGGCAACGCAATAATAAAAAAGAAAAACCAAACCCGAAGAAAACC -TTCCTGTGGCACCCTCTTAGATGGTTAGATTCGCCTGTGCTAGGGGAAAAATCAGAAAAG -AGGAACCATCCATTTCCTTTAAGTGATCTATAAACCCTAGAACTTTCGACGCTGGCTAGG -GTGTACCAAATGCTCCATGTTTTCTTCCCATGGCGTTGTGCTATTATGTCCTGTCATTCT -CCTTTGTGTCTGTTTTTTTTACCTTTACTGCTTCAGACAAGCCCTGTGTATAGTGAACAT -CTGATTCTTCTGTGAGCTGTTTCTTTTTCAATGTAGGGATATTATCCATAAGTCTACCCA -ACATGAGTTCAAGCCTGAATTTTGGATAGTGGTACCTTTGCGTACAAATATTTTATTAGA -ATACCTATACTCAGTAAAGACCAATGGTTTATATATGTAGAAAAATCACGAGATGTCCCT -ACAATGATACCTATTACTTGGTCATCCATGGGTATCCCTTGCATCATCCCAGGTTGTTTG -CTTCTTTGCCAGTGAGCTGGCAAAACACACACCGCCCTGGAGATATATAAAGGTCCTTCG -TCCCCCAGGGATTCCTCCTCTTTTCTTTCTTCTTCAGCTCTCTTTGCCTATTGGCTTAGA -TCAAGTGTAGTATCTGTTCTTTTCAGTTTAATCTCTGAAAGTTTCCGTCCAGGAAACACT -TTGATTATTCCTATTTTTGGACCTCGCGACTTGGCCCCTGTGCTTGCGCATGCCTCGTCG -CACAGTGTCATTGGGCTCACACTACCCCCGATCGACGCGAACTCACTTTTTCATCAATTG -TTCGGCTTGTCCGGGCGCTTGGAGAAGATGTGCTTTGAACCCACTGGAGTACTAGAAACA -TAACTCTTTTTACTATGCTGAGGCGTGGATGTGGTTGTTGCCGTGCGATCAACCTCGTCA -CGCACATAGTATTGTGCTCTGTCGCCTTCGCCGTAGCATCCTCTCTGTTTGGTTTCTACA -GCGTGGTATACAACAAACTGTCTGTTTCTGTATATATTGTAGCCCGCCAAGGTCAAGGCT -CCAGGTCTTGAGAACAAGGAGTCCGGACTCGACGCTTTTATCACCCTAACTGTCTAGGTG -ACATGCCTACGACAGGCTCATTTGGCACACATGTCTAGATGTTGAATTGCTCGGGTTTGA -CCTTACTATCTTAGTATTCATTCAGTCTGACCTTGTTGCTCAAACTGCTCAAACTTTCAG -ATGCCTCTTCATGACCTCTCTCTATTCGACGTTATTTTGACAACTCCAACGCAGCTTTGG -TTCCAACAGCTGAACCCTTGTCTACATATATCGGAGAGGATGAGTCTCCAGAGAACTACA -TAGGCATATGCGATTATAACCGCCCATTCCATCTCTAGTTACCCCCGCACCGAGTCCGGC -TTTGTGATGGTCTGTCTCCAAATCGGCTATATGGCACTGTGCAAGGTACAAAAACACCCA -ATGATTCTTCATTATCTTGGAAACAAAACGAAGTCTAAATGAAATGCTCAACATGAGCAA -GCCAATGGACATCTCTCAGTCCATTATATATCCCAAGTGGTTCGGCGGTTCTGCATCATG -CATGGCAGTCCTTGTTTCCCACCCTTTGGACCTGAGTGAGTTCAGTTATTACATTCTGAA -GGACACAGTTTTGATGGGTCAATCACAGTCAAGGTCAGAATGCAAATAAAGCATAGTAGC -ACGAAAGCCAGGACATTCAATACCTGCGTTCGAATTGTTCGGGGAGAGGGCGTACTTGCA -CTCTATAACGGGGTATGCCTAATGAAAAGCAACGTTGATTGTGAGGAGATACTAACATCT -TATCTTGAAGCTTTCCGCAGGCTTTATGAGACAGTGTTAGTCTACCTAACTATTCCATCT -CTTCTACAAATCCCAGTTGACACTAATCGATTCGACAGTAACATATGGCTCAGTGCGTAT -CGGCCTCTACGAGACATTAAAAGAACGAACCAAAGCAAATAATACTCCCACCTCCCCGCC -AGTTCTTGGGTTATTAGCAGGAATCTCTGGATTCATTGGTGCAATAGTTGGCAATTCCTC -AGATTTAGCCAACATCCGCATGCAAAATGACGGTTCACTGCCTCCTCACCTTCGTCGCAA -TTACCGGCACATATTCGATGCATGGAGACAGATCAAGCACCAGGAGGGATGGAAGGCTTT -TGGACATGGGTTGTGGCCGAATGCTTGCCGCTGTAGTATGATGACCAGCTGTCAGCTTGC -ATCATATGATAGTTTTAGAAATCTGATAGCAGCAACCACTGGGATTCATGACGATCATCC -CGGTTTGCATTTATCGGCGTCTATCTTGGCGGCACTGGTTGCTACAACACTGTGCAGTCC -AATTGATGTGATTAAGACTCATTTGATGGGTTCATCAAGCAAGCAAGGGGTATTTTATGT -TATGAAGGATCTCACTACCACACTGGGGATGGGATGGGTCTTTCGAGGGTGGACACCTGG -TTTCGTTCGGCTGGGGCCACAGACTATGGCAACTCTAATTCTTTTGGAGCAGCACAAGAG -GTTATATAGAGATTTTAAAGAGAAGACAGATACTCAGAAAATGTCACCCGAAGTCAGCCT -TTAACATTCGGTTGGACGCCATGAATGACTCCATAAATATTCGTTTTCACGCACCTGTAC -CTCCATAGACCTACATAGGCTTGAACAGACCGAGCTTGCTAGATATTACAGCTGTGTATT -GTCGGATAGGTTCGTTACTCTTGTCCTGGTAAATGTTTTGTTTTGGTTTCATACATCGGA -GCCTTGGGGCCCACCGTGTGGCAAGTGCCTCAGGCCGTCGACTCGATGGACTGTTTGTTT -CTCACGAGAATACTCACGATCCTCAGATAACTCAGTCGATCGTGCCCATCTTTTTTTTAA -CAGATCAGCTTGCGTTCTTTTCCACAACATCCAAGCGACGCTTGGCCGAGCCGTCAACTG -AGGCCCTTACTATCGCTCATATCTTCGATGAGACCTATTCAGTTGCTGCTATCTGTGCCA -TCTTCAATACTATTTGAAGACGGCCGCCAGTCGCTGATCTATCTCAGGATTGATCACGGA -TCTCTTCCGACTGATAGGGTATAATGCACACTATAAAACGCTGAGTGACGATACTAAATA -TGAAAGTTTCTAGCATTTGACCTAGATGCATGAGAAAAGGACTGGATCTCGCCTTGTCCG -TCCAGTATCAATGGAGACGCCACTAGAGCACCAGTCTGAGCGACTGATTCCAAGTTCCGC -AACCCCTCCACTGTTCACCTTACCCGCCGAGATACGTCTGGATATCTTTAGCTACTGCAT -TCCCCGAAAATGCGCCATCAAAGTGATAAACGCCATGTACCGGCCCTACGGCGTTGAGCC -TTTGGTTTCTACTCGCGATCCGTCCCCCACAAGCGACAGGTTCGCCAACACTCAACCTCT -CTGGACCAACATTCTACATGTTTCCAAACGAATGAGCGATGAATGCCTGGACATCCTATA -CGGCCAAAACTTGTTCGAAGTCTACCTGGATAAAGGAGGTGAAGCTACGCTTAAAAAAAC -ATTCAGCAAAGAGAATCTCCAGCGAATCCGATTCATTCTCGCCATCTTTCCCGGCCCATG -TTTTACCGGGGAACTGTACCCACCCGACATATCTTTCTGGGCCATGACGATCCCAAACCT -GAAGCTGTTTGAATGGGTGGCACAGCCAGACAATGGCGCTGAGCAGAACATGGGCGAAGT -TATGATCAAGCAAGAGTTGGAGGGCTGGATGGAGTGGACGGACGCCTACTCGGACTGTTT -CGCTGAATTTCTGGTGACTGGGATGGAATTTAAGATGAGATTTGACAAAGGAGATGCTGA -ATATAGGAAGATTGGCCGGCTCTTTTTCCGGCGAGGACCATTCTTCTTGAAGAATGGACA -TAAATGAGATATGACGAGAGCCACGAGAAGGACCCAAGAGGCATTCCATGATAATCCATG -TTTGAAATTGCCTCAAACTTCAGATACAAAGTCTCGAAAAATTCTACAAAACTGTATCCA -ATTGTTCACATTTAGGAGTTCTTTGTAATGCTTCATTCACAGGAATATCTGGCCGCTTAT -GCGGCCTTGTCAAGTCTTCGTCTTAATTCTGTTGAGATGTCTTCTCGGGTGAAACAGCTC -AAAACGCGCAAAACAACCTTTCAATGTCCAGAGGTATAACATTTGATCACGAATTGGCCC -CCATGAAATGCAGCCTGAGAAGCTCACCATCTTCGTAGCCTATCAAAGTTCTGATATTAT -TCACTCTATAAGATTTCCAACAATTATCCAAGCCCTCTTATATATATGCATTCGGCAGGA -GTACTCACCTCATGGGCCCTTCGGGGCTTGATTGCACTTACTGTCGTACTATAACAGCAA -TAAAAACTAGAAATGGAGGAGAATCGCGTCCTTGCGTGTTAGAACCTGCCAATTTCCTAT -CAAAACCCCAGGCTCTAATAATTGCCTCCGAAATTTGTAGGGGACAGAACTTATATCTGC -CAGCCCTTTCTCCCATCTTTGTTTCATTGATCATCCACGGACCTGAAGGTTTTTCTCACC -ACATAGCCTATGGACCTAGGGAAGTCCAGCGAGCTGTGTAACATAAACTTCGATGAGATG -TGGGCATGGTTGAGCATGTTTTGAGCTCGTAAGTCCTATTCGCAGCAACCTGATTATAGA -ATCTTCTTTATAACCTTGGGCGCGGCTTAGTGTTATTGATCACGGGTTCAGATTTCGTCT -ATTTGGGATTGGCGAGCTTCGACCGATACATAAAATGGACCTCTCATGAGATGAAAGCGA -CATGATTTTGTGGAATCACCGAGGCGATAGTCTAGGGTCTAGTCAGAAAGCGCCTCCAAT -TCCAAATCTTGTCTCGAGGAGTGAATATCCGAGACAAAAGATGCTGGCAGCTCGTGAGTC -GAATTCTAGAGATCGAGTTGATCAAAATTAAATTGTATATCTTTGTGCGTGCAACGTATG -AGCCACAGCTTACTATGTACCCACAGCCAGTGTTGGTCGAAGCTATTTCTGCCCTGCGAT -GCTGGATTGACTCGACTTTGTTTTCAGTTTGGCTTCAAGATTCCATCTCAAGCCTCCGGA -ATTATATGCCACAGACAATGCGCATCCAAGTCATTTTCCCGGCCACTTCCACGGCGCAGA -TCCTCCCAATGACAGGTACCCTGGGTACCTGATCGCAGAGTATATAGAAAGTATACCAAG -CAGAGCATGAAGGACCTTGGGCCAATGTCGCAAACCTCCGTACATACAAATATAATGTGC -CGACGATCACAGGTTACTTTCAAAACCAGTGACGTGTTTCTCCCGATCGAGCTTATCTTC -AACATCATAGAATACTTGGGACACCTCAAGGACATCTGAAATTTGCTTTCAGTATGTCCG -CATTGGCAACATATCCACCCCAATGCTGACAGGCTGTTACAACCATCATGGGATAGAAAA -ACATGCAGCAAAAGTGACAAAACATTCACACAGACACACACTGGGCTGGGTCATGTTAAC -TCTGTCTGCTTCGTACTCCATACTCTTGACAACAACGGCATAGCATCAGCCTGTAGTTCG -TCTAATGCCTGAGGAACCACTGTGGCGTGGTGTATGCTTCTATTATGGCGCAGTACATGC -AGGCTCCCATATTTCACCCAACAAGTCTAGTCAATCCGTACTTGTACAGCTGTCAGCCTC -TGATTCGGAGATTAGAGCGCCCGCGTCATTGACGAGCTCTTTGTTCTCGCCTGAATTCAA -GATAGAGAAACCAGATTAAAGAAAGAAAAATCCCTATCAGGACGATCGGCAGACCATGGC -TACCACAAAGAGTAAAGGTGAGGGAATGCCCTCGTTAGATAATTTCAGTTAAACTCATAC -CAGAAGGGCATGGGATGTAGGGAGTTGAGGGAGTTGGACATCCCCCGGTAAATGGTCGAG -AAGCTTCAAGAAGAGGTGGACCAGGTGGCTTTGCTGTCTGGCCAATGGGATCATTATATA -TTGCTTATGTAATTATTATAATTGCACCTGGGGACCTCCACTGGGTAAGACCGCGTCGAG -CTCTCCTTCAGCCAACTTCCATTCCTTTGTTCATCACGCAGCATATTCTTTGGTCTGCAG -TGAGTATTAAATATCTGATTATCATCACAATCATTTCCACCTTTAGTGTCTCTCCCTTCA -CAAATTGCACCCTACCCATCCATCTCCGATCGCAAAATGGCCACCGACTACAGCAAGAAG -ACCAACGCCGAGTTGGTTGAGATCCTTAAAACTCGCTCTCTTCCTCACACTGGAAAGAAG -GCCGACATGGTCTCGCGCCTCCAGGAACACGACGAGAGCAATCCATCCGACGCTCCTGCC -GTAGCTCCTGCCGCAGCCAAGACCGACGCCGCCGAGGATGTCATTGACTGGGACGATGAA -CCCGCAGAGACCATTATCGAGCCATCTACAGAAGCCGGCGCAGCTGCGATTGCCGCTGGT -GGCCAGGGCGCAGTTCCAAACCCTGTCGCCGTCCCGAACCAGCAGCTCGATGAGAACCCC -GCCACAACTGAGGACTTGAAGGTCGAGGCGACCGGCGCCGTCGCTGAACCAACCACCCAG -CCCGAAGCCGTGGCTGAGCAGAAGCCCGCCGTCGACTACACACGAGGATTGCCCCAGACC -GATCTGGAGGCGGAGCTTGAGAAGCGCAAGGCTCGCGCACAGAAGTTTGGTATCGTTGAG -GATGAGGACACAGCATTGAAGGAGGCTACGAAGCAGCTGGAGCGCGCTAAGCGATTCGGC -ACTGGCGCCGAGGCTGACTCTACACCCGCTGCTGGTGTAAGTCGTCTTGACCAGGCACTT -CCTGATGAGCGCCCCCGCAAGAGACGTAATGATGCCCGCACCGACTCTCGCAATGACCAG -GGTGGTCGTGGAGGCAAGAGACGTGACACTGGACGCAACCGCAACAATCGTCAACGGGGT -GATGGAAACCGCAACCGCGGCGATGGGAATAAGACCAATGCTGGAGTGAAGAAGCCTAAT -GCCGGTGGGAACACTGCCCAGAAGTCTTGGAGTGAGAAGGATAATGCGGCCATGGAGGCT -CGCAAGAAGCGGTTTGCAGCCGCTGCTTAAATGTTTCCAATAGTCTGCTTTGTCTGGTAT -TTGACTTGGTCTTCGCTTCCATCTCTTGTTCTTTCATTATGGGCGTTTGTGTGTTGATAT -GCTGCGGGACAATTTCAGGGTCATTCCATTCGATCTGTTTTAATATATGTCTCAATGAAA -ATGTATAAGTGTATGAATTCCTGTATTCGTAGAGGAAGTAGTGCTGTCTATATGTCCATA -TGTCAAAACGCTAAAGTCCTGAGGTTCCAAGTGAGTAACACCTGGGCTTGCCAGATGTGT -AAGGAAACCCGAGATTTCTCAATTCATCAATTTGCCTGTCTCCCATATTCAACCACCAGT -CACAGAGTTGAAGTTCTCGACCCCAAACAATTCCCTGGCCCTGGGTTGACTGCGTGCAAA -GTCAGCTGAAGGGTGCATTAGTCGCTTGTTCCTGCAGCATGGGCGTCAACCAGTACGCTC -ACTATGTGAGGATAGGCCTCGTGATTGTGGGACCTCAAACTCACCTTTACTTATTCTTGA -GAGTCTGAGAGTACTCCGCATGAAGTACAGAGTACATGCAGTGTTACAATTGCAACCTAT -AACACAAAAAACACTGGTCAATGTTGTTGTTATTCCTATTGCCCGGATTACCGCCCTCTC -CCCACTTAGATCAGTGACCCTCCCCGCGAATCACAGCCCAGAACTTGCTTTTTTGTAGAA -AAATTGGCTGCGAGCTAACATTTGATTATCGTATAGGGCTTGTTCATTGCTCGATCATTC -AGTGGGCCATCACAGCCCAGTGACTAGCGGGCTCCACTGTCTCGAGGCTCCCTCGAACAA -TGAAACCCCCAAATGCTCTCAACGGATAGTCACTGGCTCGTGGGACGGTACAAATCTAGG -CGTTAGACTCAACATTAAAGCCCCTGCGTTTTTAAACTGACCTATCGATGATTGGAATGA -TTTTCCCCGAAGGGTTCGGGACGGGCCTCTGTACCAAACGGCCCTCTGTACCAAACCATG -GACTATGGCTGTTGGGAAATCTTCTAGGTATGAGGCTTGATATGTCTCATGTACGGAAAC -ATATGGGAAATGAATTTGTGATCATCTTGGGACATATGAAGCATTTGACAATGCATATGC -ATGGTCCTTCAAGACCCCGTTGGGAAATTCTCCGAATCCCCGGGAAAGGCTGGAGTTGCA -AAATGCCaaaaaagaaaaaaaaaaaaaaaaaaCCGCGTCGATCATTTCACAATTTGCGTT -CAAGGTGTTCGGCGTCTCCGAATGCGTTGTTAAGGAGGCGAGAGACGCGATAACCAGACT -GGTCGTCAATACGAGTTATCGATGTACAGTTCTAATCAGGTGCTCAGCATGAGTATACAG -CTGACCTGATATACGAATCAAGTTAGGACTTGTAGGTCTCTTTTTTATCAAATGCACGAT -TAGCCAGGGCTCCTACGCCACGGGTTGTACCATAGCCTGTCACGTTGTGTCTGGCCCTAA -AACCTTTACCGCCGTGAGCCGATACCGTCCAAGCACAGCCAACGACAAGTCTCTGCTGGG -AGAATCCTTGAGTGGACCGGTATTTCCCTCCCCATGTGGGCTTCGGTGCGCGCAGTCCTC -TTGATCTGGCCACTGGGGAATCTCCCTAGTTATTTTTGTAGAATTCTGTTTTGACTTTAC -GATGGGGATCGCCTCAGAAGCACTGCGCAACTTTCGCAACGTACTGATCCTTTTGACAGG -CTCTAGGTCTGCGCAGTCTTGATTCCTTCCCGTCTTGTCACAGTAGTCACATTACTTTCT -TGCCAATAAGATCGCTGGCTACGGACTTTCTACATGGTCATTGCGGCTTCAAAATATTTC -CCCCCCCACATCCGAGGGGCAACCTCAACACATAAATCCCTGGCGTCTCGGAAAATCCCC -TGAGTGTTTTCGTTTTGACAACGTTACAAATTATTTTTGTCTACTTCAGCCGGATTTTAC -CTCTGACGTGGCCAAGGCTGGAGAACGACCCTGGTATTCAGCGCCACATTGCTCCGGACT -GTGGCGAAGCAGCTGCAGCTTTAAGGCTTCGATCCACAGTGGACTTTACTCTCGTTTATC -TACTGACGTTCGTGAGTGTACTTTTTTCTTTTCTCTTATAGTTCTTAATATCCTTGCAAT -GTCTATTGACACTTTCTAGTTGTGCAACTTTGAGATACCCTAGtttgtttgtttgtttgt -ttttttttgtttttttGCGAATTCTATCTACTTTTACTTAATTCTATACACTGTCAAGCG -CCAGGCGCCAACAATATTGATATTTTCTTTTGGCATGACTCAGATACTGTGAGCTTACAT -CCTGAACGTGAAGGTGGACCGATGAGCTGATTAACTCAGACAGCGTCATTCCCCCATAAC -GACGTGATGGTTACTATAACGAAGTACATGTACCCACCGTTTAATCATTTGTCCCTGGAT -TGGAGCCGGGGTGTTTACTCATCCGTACGCCATGTCGTACAGTTCTAGTCATCACATGCC -GCACGATGCTTCTTCAAACCTATCACATACCAATGCTGGTGCTTATAGCCGACCTCCGAG -ATCTCAAATTGCGAGACCAGTCAACCACACCTCTAGAAACAGAACCCCGTCGGTATCAGA -TGCCGAGGATACCGGTCCGTTTCCAGTACCTTCGGTTATTCAAAGATCCACCAGTCGTGG -AAACCGCCTGTCCATGCTCCCGCGCCCCAGTATTGGTAGCACAACATCTCCAATCACGCC -AGTGTCGGCTTCCTCTACTCAGAATTCTGCTCAGACGATTCCCTCCTTGCTTTCAACTCC -AGAGATCAATAAACCCTCGCTTTATGCTGCTGGACAACAATTACCCCCACCAAAACGCAC -GCGGAATGTTCTTCGAAGGAAGGCACCGACAATAGGGAAACATGTGGAGCAATTGCACTC -TGACAAGCTTAGCCTTGTTATTCCGCAAGAGTCCCAAAATTTTATCGTTATGAACAATGT -GTCGGGACAGGGAAATTCGACATCCTCACTTTCAGCACAATCGGCTTCCTCTTACAAATC -GACCCACAAGAATGGCAGTAATGCGGCGGTTAACTTGGTGAGGCCTAGATTCAATGGTCC -GGAAGAGCTGGCAAGTCTTCGAACAACAGTTGATACGCAAACTTACCCAGCACTTCCCAC -TCCTTTTTTGATGTCAAGCAGTCCCTCGACAAGATACTCGGAGTCTCCGAGTATTTGGAG -TCGTGGATCTACCCCAACATCATTATCATCGTATTCTCCCGGTATCACAAAGGTTGGGCG -TTTGAGACAACCAAGCCCTTCACAAACAAGGTTGCCTGTTTTTTCTCCTCCACCACCACA -TGCAGGCCCTCAGCAAGAGAAGCCGGAGCTGAAAAATCAAAACCGAGTACTTAAGAAACC -TTCCAGCGCAAGCCCAATAGAACCCAGCAAAGGCCAACGATTACCACCGTCTAAAAGCCC -CTCCACAATACCCCGAGGGCCACCTCGCAGCCCGCCACCAAGGAAGTCATCGGTCAATTT -CAGCCCTCCAAAAGCCACCGAAATCAACATTGAGCAGGCTCGCAGGGAAGTCGAGGAAGC -CGAGAGGCGTCTATTTGAGCCCCTAAAGGCGATGGAATGTTCCCCCGCACGGAATGCAAC -GACACCACAAACTCCACCACGACCCAGCCGAGATGGAACACATCGCTTACAGCTTGAAAC -ATCTCCGATTGTTCAAAGCAATCTACGGTATATTAGAACAACGGGACACACAAGACGCGA -GTCGATAGAAAAGGTCCTAGTAACACAGCGGCCACACCCAACTCCCAGCCAGTCTGCCGC -TGCATCAATCGACTCGCTGCAATCAAGAAGCATCTCTCAAGTCCCTTCTCGAGATGGTGG -CTCTCCCGTACTGTCCAAAAAGCCACCTCGGACATTGACCAAGCTGACTTCTCACGAAAA -GCCGGATTCAAAACCAACACAAAAACGGTTCGGTCTATTTACGAAGAAGTCCAAGCCGGA -TATATCAGAGAATCTGACGGAGCAAACTCGAGCCACTCGAAAAGGTCCAGCCGCGGGAAC -GGGTCACGAGGGCTACGGAAAATACGGACAGCGTGGCCGCAAAGCAAGCGGTAGTAGTAG -TAGCGGGACAAGAACCAGGTCAACAAGCACAACACGAAGTGCCGCTAGCAAAGGGAGTCA -ATCGAGTCACCCGGAGTTGGATATTGACGATTTTTTATTGAGCCGTCTGGAGCCTGTCAT -TATCAATGGCGGTGGACTTGATGGTGCATCGTTGTCACGAACACAAAGCGAGCAGAGTTT -CAGCAGTGTGAGCATTGCGTCTACGTCGAACCTGCCGCGGCAAACTCTGTCATCATACTC -GACTGGCCAGTCTACTGAATCCCTCGCGACTTCAACGGGGATATTCGGTGAGACTGTCAC -ACCGGGTTATTCTAGTGGTAACGGACCGATGGGAAGCAAAAGCCCCGAATCGCTATCTAA -CACGAGTGAGCTTCCAAATGAATCGAAGTCACGTATGCCCGTCCCGAAAGGGAAGAGACA -TGCCATGTTTGCAAATCCCCAGGCTGCTGTCACGAACACATCTCTTCCTAGCCACGAGCC -AAGCACTGCCACTTCACAGCAACCGAAGAGTGCTCAAAAGTCTTCTGAAGAAATGCCCAA -GCCTGAACAGGCTGAGAAGCAGGCTAAGAAAGAAAAGACATCCATGTGGAATTTCTTCCA -GAAGAATCGTGGCAACGAACAGAAGATTTCAATTCCTCCGAACTCGTCACAAACCGCGAA -GCTTCATGCGGCAATCTCCCCGATTGTGAACAGTCGGCCCGTTGCCCATTACGCCTTGGT -GGATGCCGATTCGGACGAGCTCGATGAAATCATCAATAACATCGAAGATTCACCACCAAC -AGAGGAGGAAACGATTTTCCATCCGGTGGAGATCCCAAGAGGCTTGAGCATCAGAAAAAG -AAAGCCGTCGATTCTGCTGCCATCTCCGCCAAAGATGCATGGCGAGTTTGAAAATGATAA -TCGTCCGTCGCCGAAGACCGCCATGTTCAACCGCAACCTTATGGCACCGGAGACTGAGAT -ATCACCCGAGCGACGTCCAAGGCGTTTGGCATCTATCGGTCGGATTCCACCAGTAGTTTC -ACGGCGCGATAGACACCACCAACCCGCCATGCAATCTTTCTCCCGTCCATTCAGTGTTGT -TGAATCTCCCTCCCTCACAATTTTAACGACACAAAGCTCATACGAATCTCCACCTATCGC -TGAAGTTCCATTGAATCTAGGAATTGCTCCGAACAAGACCTTTGACTGGGGCAATGGGTT -CAACTCGACCTTTAGCGCTTCCGAGCAGACCAGTGCTTTGGAATTTCTTGCTGGACCTTT -TACAACCTATGAATTCCTTCAATTTCCGCCAAAGAAGGGCTCAACCTCATCAGATAGCTC -TGGAGCTATGGCTGCAGTTACAGCCGTAGCCCCTGTTCCAGGTTCCCCACCGACCGAGGA -CGAGGTGTGGAATGAGTATAATGACCTTATTGATCATGTTTTGTCGCCAGACGAACCTAA -GCCCGAGCAGTCGACAAAGACTGAAGAGGAGGACAGATTTGAACTTGCAACAATGGCGAG -CAGGGCTCTGCAGGACGAGTTAAACAATGCTAATCGTCCCCAGCCAGCACCTGGCGGCAG -TTCAGTCCGCTCGAGTGGAAGCTCCGTTCATCTTCGTCGTTCTCAGATAGTCTCTGCTTT -GCAGTCGTCTACCGCTCCTTCGTCTCAACCGTCTTACAGTGACCTTGTTGCTGGATACAG -AGGGTCAAATGAGGAGAAAACCAAACAGAAGCAGACCGCCAATCCTGCTGAGCAGCCATC -TTCAAAAGATCAGCGACTTGAACAGCAGTCTACGTTTCTCAACTCCCTTGCTGCTGTCCC -TTCGCCTAGGCCAAAGTTTTACCGAGAACAAGAAACCTCATCTTCGGAGCGTGAGTGGGA -CGCAGTGACACGGACTAACATGCGTTCTGCTTCGCTCATGACTAGTCGCTGGTTGTCTTT -TGGCCGAGTCCTTTTCAGCCCAGCGCATAATCACGTCAAACCCGGAGGACAAGGACGGAT -TCTTGTGATCGATGGATTGGGAAATGATGACTGGTCTTTCTACTGTTCTCTCACGTATCC -TGATGCAGAGGTATACAGCCTAGGTGGACGTCCTGTATCCACGGCACCTCCCCACCCCGC -TGCATGGCAGCCCCCTACCAACCATCACACTGTGTATCACGCAGGGCTGGACAACCCTCT -CCCCTTCCCAAAGGACTACTTCACAGTCGTTGTGCTCCGATTTCCTACTGTCTGCTCCGA -GACCGTCCAAGGCAACATAATCCAAGAATCCAAACGTGTTCTCCGCACTGGCGGGTATCT -CGAAATGAGCCTCCTTGACCGCGACATGGTTAACATGGGACCTCACACGCGAAAAGCCAT -CCGTCAACTCAAAGAAATGACCTGTCTCGCAGACTCGACTCTCAGCTTAAAACCAACCAG -CGACAGCGTTCAACGTGAACTCGGCGCGCAGGGCTTCGACAGTCTGCGCCGCTGCATGGT -TCGAATTCCAGTCGCAGGTATGGTGGTCCGATCCTCCGACTCCAGCTCATCAACACACTC -TATTTCAACAGCTACCCCTTCCACTGCATTCTCGTTGCCTACCATATCCGTTACAACCGC -AAGTGGTAGCCAAAGCACCGAAGCTGCATCGAAATCGCCATCCAATGATGCGAATATCTC -TCTCGGTGATTTGCTCTCCGATCCTTCCCCATCACCGGCAAATGATGAGTCGATCGCTAA -GATTGTGGCCCGCGTCGGACGCTGGTGGTATTCGAAATGCTACGAGGATCCGGTTTTTTC -AAATGGAAATACCCAGGATCCGAGCATTTGGAACAACCGCAAGGTACTTCGCGAATGCCA -AAAGCGTGGCACCGGGTTCCGCATGTTCATTGCATACGCTCAGAAGCCTAGTGAGGTTCC -ACGACGCACAGCAAGTGTGTAATGGAACGACACCATGGTGTATTTGATCGCTACTCTTAA -CTTATTTTATTTTGTTTTTGTTCCTTGATGAGATCCATGATTGATTGACATTACACGTTG -CATGATCCAGAAAAAAAATACCCATATATATATGCCTTACGAAGTCTGATTCTTTTTGAG -CCTGTGGAAGACATGGGCTTTGTGTTTCCTGTTCGGGACTTGCATCTACTTTGCATCTGT -TTGTTTTGTTTGTTCAATATGGCGTGCATGATGCTATGACTGATATGTGCCATAGAAGGA -GGTGTGGGATTTCCATTGTACAAGCAGGACTTAATGTGGACAATCAATTACTTTAATATT -CTCATCTGCGCATCCATGCTCAACCCTAAGAAATCTAGTTACCCTGCAGGTGCAGTTTCA -TAAGCATTGTCATCCCCAGAGGACGCTCGGTATGACCTCAGCTGATAGATCAAAGCATCC -TGAAGGCGCATTATTTCTCTTTGCAGATGCATTCTGTCATGCTGCTCTCCAGTCTGCTTG -ATTATAATGCTAGCATGATTGTGCATTAGACATAGACCTTAAGCCATGACAATAACCAGA -CCACATACGCTTGAGTAACCCTTAGACAGAATATCTGCCTTTTCTTCCCCAATTGAACAA -TCTGTTTCAGCTTCTCCTCCGCCTCGACTTTTTGAGCTAGAAAGTAGCCCCGTCGACTAA -GTAGTACCAACAAGGGAAGACGGTCGTTTTGGGCTAGATCTGCTCTGACTTTCTGTGCCA -AGTTACCTAGATGATGGTATTGTTGACTCTGGCTAGCTTCGGATTGTAGCTCTTCGGGGA -TCAGGTGGGAAATGCTTTGGAGCCAATGTGCAACAGACTCGAATGCCGCATATTCATCAA -GAAACCGATGTGAAAATTCTCGCATGCCTTTTTCAGCATCCAGTATGCGATTTGTGTAAA -TCATGACAGTGACTGATATCGCGTAAGCCAATGTCAGGATTCCGATAATGTTGCCCGTTG -TCGAAAGGGGGCTATCGTCGCATGATAGTATTTGTACAGCGCTGGTATTTCCTGCCATAG -TCAATGGACTGGCGCACTTTGTGGCGACAAATAGATATTCAAGTCGAGTCCAGAAAAATA -GCCCTATTGAGGTTTCAAGAGATGAGCCCATTTTTCGGGGGCTAAATACGGGAAATGCAG -CTGACCTCGGCGTTGTTTAAGCGACTACATACCATTTACCGTAGTCGCTTACTGCCACTG -CGCCACTACAGACTTTCTTATCTAGAGTCTCGACCCTTTGTATGGACCTTTTAGGGGAGG -CAGGGGAGGCTTTTGAGTCTTTTGTGATATCCTCTCAGTGATACAACAATTTGATAAACA -TAGAAAAGTAGACTCTAGAGGACTTACGCATCGGGGATTCTTGATTAAGCATCAAGAACT -CGACTCAAGACTTTGACCTAGATTCTCAAATTGTGTAGAGCACTGGGTGTCATCAAATGT -AATGATATACTTTTGACCTCATAAGTAAGCTTACGCCTTAGTTAAATGAACCTTCAATGG -GCCCTTTTCCCACATCAGGTAGTTCCTCTGATTGATCCAATTCCGGGCTTCGTCAGCAAT -CTCGATATCAAAGTTGAATATAATCAAAGCAAGAATGAGACGCATCTCGCTATATGCAAG -ACTGAAGCAAGCTTGTCAGTATTGTCTGTTACTTCCATTGGTAATCTAGGTCAATACTTA -CTTTCTTCCAAGGCAATTTCTCGGCCCGATATGGAATGGTTGTAATACCTCTCGGCAATC -ATCAGCAAAGGATGAATCTCCCAGGAATCTCTCTGGGTGGTATGAGTTAGGATCCTTGAA -ATACTCTTCGCGCCGATAAAGTGCCCATTGATGAACCGAAACATAAGTCTTGACAAACAC -CGTCAGCAGGGATCACAATTTAGTGTTCACATGAGACTTACATTTGCCGGAACAAATTCA -CCCAAGACTTTTGCACCCCCCTCAGGACACATACGGGGTAGGCCGTTCGCAATCGGTGGA -TACATGCGCATTGCCTCGTCGAGACATGCAAGCATGTAGGGCAACTTGTTAACAGAGATC -AGGTTGATCTCTTCCGAGCTCTGAAAGACAGTACGCACTTCCTCTTTCAACTTCTCATAG -GCAGTTGGGTTCGTGAGAAGGAGATATGTGGCTCCACTTAATAATGAGGCTGTGGTCTCG -GAACCACCGATGATGAGAATTTCAGCATTTGCAATGAGTTCATCGATACTTAGGTTCTGA -GATGGTCAACCATTGTTCTTGACGAGAATAGTTGAGGACTTACCAGCTCCCCCCTTTTTC -TCAAAAGTCCCTCAATCAAATCGGGTCGACCCTCTTCCTTTTCCATCCGTCGCATCATCT -TCTCTTCAGTGAGTTTTTTGTGTTTCTCGTGGGCCTCTTTCATTGACGCAGGCACCATTG -ATAACAGGGTTCGCTTCACGAGCGGATAGAACGAGAGTGCTTGCAATATGATGCCAAGTC -GTCCAGCGTCGAATATCCCCTTAATCCATCCATCAAGGTTTGATCCCTGAAGACAACCAA -ACGGCTCACCAAATGCCAGGTCTCCAATAATATCGAAAGTGGTATAGTTGTACCAATCGG -AAAGAGTTACCGGTTTACGAGACTCGCAGAACTCATGTAATCGGTCAAGCAACATATCAA -CATAGGTACGGATCATTGGCTCCTGCTCTCGCATTGCCTTTTCAGAGAAGCCATGTGCCA -TTTGTCGGCGGAGGCGTCCATGTTGTTCCCTGCTTTCGTTGACTACATGTGATGGTTGGT -CCTCGACCGGACGGTAGAACCACGTTGCCTTTGAAAACTCAGCTTGTCCTTTCTGGTGGC -CCATAATATCTTGCCAGGCATCGGGGTGCGAGAAGGCCAGTTCATCTGGCGCAATACGGA -CAATGTCGCCATACTTCTTATGCATTTTCAGCATGTCGAAAGGAAGAGTTCCTCGAACAG -ACCTGTAGACATATGCCAATCGTGATACTCTATGTTCCAGTGGCCCCGGAAACCGGCTCA -AGGGGTGGAAGTACACATTGTAGATCACCTTAGAGGCAAGATATAATATCACCTGAACGT -GTTAGAAAAGAAGTCATGAAAGGCATCACATAGTGTACCGTAAAAACTCCCGTCACCAGC -GCATGCTGTACGTTGAAGATAATGCCCATTCTGACTCGGTTTTCGCTCGCAGAAAACCAC -TCCAGCAAGGCAATCCAATGTGACGTGTGGTATCTGTCTTATATGTGTAGGTTCTCATGC -GAACCGAGGTTGCTCATGTTGATCGACATAACGATTGATATTCCTGTAACAGTGTCAACT -TCAAACATCCATGAAGTTTTGTTGATTGGTTCAATATCCTAGCGCCAATGGGGATCGTTT -AGGGGTTGGATATCCGAACCCGAACCCGAACCCGATGTCGATCAACCAGATCACTCCAGT -CTATACAACATCCACCAATGGGCGCTACGAAAGGTACGAATACGAGCCCCTGCTCTGCAC -CAAAACACGCGGTGGGATCCCATTTATGTACATGATCAGTTGAGATCCCTATTGATAAGC -GCACGGATATGAAGAATCCCGTGAAATTTATGCATTTCATGCATATGACCGACTTGATTT -CATTCAAAAGCCCGTCGGCGCACGCCAAAGAGCCGTGGGGGTAGGCTTAAACTCTCAGTG -CTCTCAACATGGACTCAAAATCGCCAGATCCTGAAGAACAGTCTGTCAAGCTGCCAGGCG -AAAAGCCCAGCGGAGAACACGCCACAAATACATCCGAGTATGATGTCTTCTGGGAGGAGC -CAGACAGTGAAGACCCCGAGAATCCAATGAACTGGAGCTCCGGACGCAAGTGGACTATCA -TTGGGATGGTTTCATTCATCACTTTCTTAACGTAAGTACCTTATTTCATGTTTTCTCGCG -TCAACAACATTCTGACAATACTAGTCCGCTCGCCTCTTCGATGTTTGCGCCGGGTATCAG -ACAAGTCCTCGAAGACTTCGGTTCAACATCTAATATCCTCTCCTCTTTTGTTGTCTCAGT -CTACGTCCTAGGATTTGCTTTCGGCCCTCTGATTGTCGCACCCGTCAGCGAGTATAGCGG -CCGCGCATGGGTTTACAATATTTGCAATGTCTTGTTCGTCATATTCAATATCGCCAGTGC -GCTAGCTCCGAATATGGCGTCTTTGGTGGTTTTCCGATTTCTGGATGGGTTTGCAGGTGT -CGCTGCCATCACATGTGGTAGTGGCACGATTGCAGATCTTATGCCGAGAGAAAAACGAGG -ACAGGCGATGGCTATCTGGTCCCTAGGACCCCTGTTCGGTCCGATCATCGGTCCCGTCGT -GGGTGGGTTCCTGGTGGAGGCGACAAATTGGCGATGGGTGTTTTGGGTGCTTGCCATCGC -GGTAAGGAAGACAAGACCAGATCAAAATAGTCTTCTTTCTAACTCCTTTTCTAGGGCGGA -GCTGCGTCGATCGTCTTCTTTTTCGCCGTACCGGAGACATACGCGCCAGTTCTCCTGGAG -CGGAAAGCTGCGCAGCTGCGAAAGTCAACAGGAGATACTGCATACAGATCTCGATTGGAT -TCTGGGATCCCTCCAAAGCAACTATTCCTCCGGAGTCTCATTCGACCGTCGAAAATGCTT -GTTCTATCACCAATAGTAGCCCTAATGTCCAGTTACATTGCTGCTCTTTACGGCTTCCTT -TACATCCTGTTTACAACATTCACGATTGTGTTCGAGGGTCAGTATAATTTCTCTGCCAGC -GGCAGCGGCCTTTCATTCCTGGGAAGCGGAGTGGGCATGTTGCTTGGTCTTGCCTACGTG -GGGACATTGAGTGATCGCAAGATCAGAATCAAACTCGAGCGCAAGGAGACACCTACTCCC -GAGGACCGACTTCCCATGTATCTCACTATACCTGGGTCATTAGCCATCCCGGCAGGTCTC -TTTGTCTACGGATGGTCGACAGATAAAGTGGTCCATTGGATGGTTCCCGAGATCGGGAAT -GCAGTTACTGGGTTCGGAATGATCATTATATTGATGTGTGTCCAGACATATCTGGTAGAT -GCTTTCCTTGCACATGCTGCCAGTGCTGTGGCTGCTTGCACTGTTCTGCGGAGCTTGCTG -GGAGCTCTTCTTCCTCTGTGTGGATTGCAGATGTACGATAAACTTGGCCTTGGATGGGGA -AATAGCTTGTTGGCATTGCTTGCTTTGGCAATGGCCCCTATACCGATTCTATTCCAAACA -TGGGGAGAGAGGTTACGGACAAAGTGGACTATCGACTTATAGCCCAAATGGTTAGAAACT -TTAAATATCTAGGAAGCATACACAGTTAATATAATTAAAACAGCAAAATAAGGCTCCGAA -GGTGTGGAAAGCATAAATAAAACCCTTTGGCTTCTCTTACGGCAGCACCCAGCACTATCT -GTCTTGCGTCGCTCACATTCCTCATTTCAGATCATGGTATCTCAAAGTAAATGTAACCTA -TCATGAACAAAAGAGATTCGTACAAAGGTCATAATTTGTACAGGAATAGAATGAGAAAGG -AAAAAGAAAGGAAATGGCCTTCTACTCAAGAAGAGCAACCATCAAGCGCTGGTAATCACT -AGAAGTCTCGCCACGCACCCGGTCAATCAAGTTCCTTCCGTACTTGTGTTCGTAAGCGCG -CTTAACCCTTTCCTTGTGGTTACGATCCCAGTGGACACGGACGACACGGACGACAAGGCG -TTCGTCCTTGGTACCCATGCCGCTCATGCAATCCTCGAGTAGGATAGCATCACGCATTGC -AGGATCTGTAGCCGAGCGGAGCATCAAGAGAAGTGCGTCTTCCATGTGACCCGAGAACTC -ATCCTTGATATGCTTCTCGAGGGGGATACTGTAACGTGACTCGAAAGTCTGGTTGATAGC -GCGGAGCTCAGCATTAGACGCCTGAGCAAAAATAACGCAAACCTCTGTGATGTTCTCTTT -CGCTTGTAGGCAGCGAACATCCGCCTCAATGGTTTGCGGATTGACAGGAGTAGATTCTTC -ATGGCGAGCAGCACGAAGGATAGTGGTGAAAAGAGTGGAAGTCTTGAAGGAGAGGTCACC -TTCAACATCTCCAGCGAGCGACCTTTTATGTCTCATTTCGTAGGCGGTCCGTATAGCATT -GATGTCAGCGTTGGACCGGCCGAGCAGGATATCATTGAGTAACCACTCCTTTGTGCCAGC -GCCATCAATTGCGTCGTGCAGGTTCTCGACATCATGCATGAGCGGACCGTCGAGCACGGC -AAGTAGGCCGTCACGGAAGTACCCGCCGGTCTCGGACTTGACATCTTTGAAGAGATCGCG -GCCAAGATGCTTGGAATATGTGTCGCGGACAGCGGCCATCTGGAGTGGGTCGAGTTGCGC -GAGGACTGCAATCAGTGCCTTCTCGTCCGTGCCGAAGCCCTTCATCGCTTTGCGAAGCGC -ATCTGCTTGAGGTCGGAAGTCGCCTGGCGCTAATTCACCAGGGATGTAACCGGGGGACGG -TCTTGACGGAACACCCTGGGGTGGGAATGCGCCAGGTGGTGGTGCGCCGGGGGGTGCGCC -GTATCCAACAGGTGGTTGGCCTTGAGGTGGGTACCCGGGACCACCTGCGGGCGCTTGACC -TTGGGGGGCATAGCCATGCGGCGACTGGCCGTATGGAGGTTGCGCGGGCGGGGCATGGTA -TCCGTGGGGTTGGGGAGACGGCGCGCCATAAGGAGCTCCCTGGGGAGGCGGGCCACCGTA -GTAACCTTGCTGGGGTGGAGGGTAATGTCCTTGTTGAGGCGGGGGGTAGTGGCCTTGCTG -AGGGGGATATTGACCCTGCGGGGGAGGGGCACCGTAAGGCTGCTGATAGCCTTGAGGCGG -GGGTCCCGAAGGAGGGCCTGCCTGTCCGTATTGGGGCGGGTATCCTCCGTACGGAGGTTC -TATTGGCTGAATTAGCTCATGTCTAGTCTCGAAATGTATTCAATATATGTTCCAATTCTA -CTCACGGTTAGGGTGATAAGACATGGTAAATTGTTGTAAGGAATGTCAGGGCTGAGGGGT -GTTGTTTATAATGTCCCGGCTGCCCCGAGTGTGGCGCAGCCAGGTGGGGGCGCTCACAGC -CTTGCGGCTTGACGTCTGCCAATATTTACTTCGGGATATCCAAGGTTCATGGTGCCGTAT -CCATGGGTCTTTCGTTCTAATTAAGTCGGGACTTGGTTGCCTCCAGTTCCAGCAGCCTTT -CAACATGTACATGTATATACACAGAACTTCATTTTCGACACCTCGATTCATTCTAATAAG -ACCTCTTACATGTTCAAAGTCAAAACTCATCACAATCACAGTTTCAGCCATTTTCCTCCC -CCGCCAACAACGGTTCCCTTCTTTACCTCGCCATCGCTGAAGGGCATATCGAAACCGCCC -GTGATCGAAATCCCATTCTTGAGCTTGAGTTCAATCTGGTCCTTTTCTACGGGCAATCCT -TCAAAATCTTCAAATTGTTTGTGTAACTGGTCGGCTGAGTCATAGGTGACGTCGGCATTT -GGAATGGAAAAAGACTGCCCGAAGCCAGTCAGGTTTCCCACGAAAATATACTGAGCGCCT -TCAACTTCAAATTGGCCAAGGATTCGTCCTTTGTCGCTTGTCAGGGTTCCAGTTCCTTGA -GGCATCTAATTGATGAGTCGATCAGTATGCGCTACCAATGGGAACGTTCGAAGAACGCCA -AGGGCTTACTGTTGGGAGATCTTGCAAATAGTTGCCGGTAATGGGATTAATGCTATGGGT -ACAGGTGGTCATTCCATCATTTAAAGAGAGTGGACAAGGAAGAGCGGTCAGTTTCCAGAC -TTCAACCTGGGATACCGTGCTGTGCGTCCGATGTCATACATGCAGGTCAAGTGGAGAGCC -GTGCTTGGCCTCCATGTTATGGATGGTGGTTTGCCTTATCGAGGCTTACAGCACAAAACA -TGGTATGCAAGGTGCAACGAATACCAGAGATGATTAGCCACGTAATGGAATAGGTCTCCA -GCTACCCTATCTGGCTGCAGGTCTTGTATATAAAAGGTAAAAGAGTCAGGATAATTTAAG -TTGCTGATACGGCATGAAATATATATTTTTCTTTGGATCCCCTGCGGCGATGGAAAACAA -CACATTATTTAATTTCATGGCGCATGTGTATATCTCTCTTTTTAGTGGGCTGCTTGAGGC -AAAACTTGTCTATGCCAGAACCTATATAGCTTAGAATATGCTTCTTTATTCCGAAGATCT -TGAGGTCCAAACTAGGAATGCACACTGTTTCCACCAAATTTGATAGCGCTAGCCGTAATC -GGCTAGTGCAGCCCCTTGACCGAGATGCCACTGCGCCTCCGCAAATTCCGTGGACTAGGC -GCTTAGATTTCTTTTCAATGGTACCATTCATATTAAATGCCTCTCAGGGTCCACCATATT -TGCATGGTACCTGTTAGAGATTGACATGGACAAAGTCCCTCCAGGCCGTCGTTCGCACCG -GAAATCGCGATCCGGATGCTTGCAATGCAAAAAGCGAAAGGTCAAGGTGGGGAATCCACC -AAACTTTCTGTTGAGTACTAGCTTATAGTTCTGTATCAGTGCGACGAAAGCCAACCACGT -TGTCGCAACTGTGAGAAGCACGGGGTGGTTTGTTCCTTCGCTGGTCCCTCAACTGTGCTT -TCCACGCCACAAGCGGATCTAGCAAACTCACCTGTGCCGCTATCTGGGCCAGATAGCGCT -GGAGCTGGAATACTCCGTGCCCCAGCACTTGGCATCTCACAAGCTCGAGTTTTACCAAGT -CTGTCGTCATTAGGACCCTTCCCTTCCACATTGGCTGTTTTTGACTTGGAACTGTTGCAC -CATTGGACAACCTCAACCTGCTATACACTATCGCGAAGCCCAGCTGTGCAAACCGTCTGG -TGCACTGAAGCACCTCGAATTGGATTTGAAACGCCGCCAGTGCTCCACACCTTGCTAGCA -CTTTCTGCGCTACATCTAGCTCGCTGCGATGAATCTCGACGCGCAACATGTCTTGCTCAC -GCACAGATACATCACAGCACCGCCGTGAGGGAGATAATCCCGCTCGTCTCACCTCTCGCG -CGAGATAATGGTGCAGCGCTTTTCATTTTCTCTTCTCTAACGTGTATGTTCTCCTGTGCT -AAGCCATCGGAAGAGGGCGATTTCTTAGTCCTCTTTGAGCGTGGCAATCTATCAGAGTGG -GCCCGTCTTTTCCGAGGGACAACCACTGTTATCCGCACCGGTGGTGAGGACCTTCGTACC -GGGAGACTGGCACCTATCTTCTCGAATGGATCTTATCTAGCTGCTGCCCATCGCTCGCCA -CATGCGCTTGAGCAAGGCAAGCCACACGTTTGGGAATTGCAACAAATGATATGGAAGGAA -TGTGCAGCTGACCCATCACTTCGAGTGATCTATCAAGAGGCTTTAGATGAACTAGCCCGG -ACATTGGGTCTAGCCATTAGGCCCGGCATTATGCGGAGACTCGAGAGCGCAGATGTCTTC -CGGTGGCTGCTTGACGTCTCAGATGAATATTTAAATCTTTTGTGCCAGGAAGCCCCGATT -GCACTCATTATCTTTGCTCATTGGTGCGCATCTATTCGTCAGATCGAGTGGATGTGGTGG -ATGGAAGGTCTAAGCTCGCGCCTTATGACACAACTATACTCTGTGTTGGATCCGAAATAT -CGAGATTGGCTCTTGTGGCCACAGGAAATTATTAATGGGAGCCATGAACAGCCAAGTTAA -TTATCATTATAATTAAATGGTTTGAATATTCCGCTAGGGAGGAAAAACCACCCTCAAGGT -AGAAGAATCAAGGGATGCAGAAAATGGATTGATGCCAAGCACACACTGAGAACTATTGCT -GAAACTGCTGCTGGATTCTCTTGTGATCAGATGTATGGATACTACCAGTCTCTATAATGA -ATATACAGTGTAGAAGAGAGCTTGACTGGAAACTGGTACCTGAACTAAGACACTTATGCC -TATCAAATCATCCAACGGCTTTTGCCAAACCGGTTGTCTCAAATCACCGTGTTCTGAATC -ACCATTCACCTCTGCCCATGCACTTTTTCATAGATTGAAGAAGCTAACACTGCGTTTGGG -GGAGTACCCGCAAAGCGCAaaagcaaagcaaagcaaagcaaagaaaaCACAATAAGAAAT -GTCCAGACCCTTTGGTGCAAGTTGATGATATGGCAAGAGTCAATGACTCTGGGGAGAATA -ATGGGCCAAATGGGACGTGTATGTTGTAGAATAACACCCTTCCTCATACAGTGAGGAACC -TCGCTCTTTTCATAGATCAAAAGTTTGTCTGGCGAAAGAGAGAAGTAATGAATTTATGAC -TGAGGCGGACCTTGATGCCATTGCTTACGCTGAGCTTCAGTCGAAGAACCTGCTGCAATC -CGAAAGGTTTATAGATAATCGAACCGATCTCGGCTTTTCTCATTTTCAATTTCGCGTCCG -GTGTAGTCACACATTTTCAGGGTGCTTCTGTTCAAATGGCTCCGTGAGGCGATCTGCGAT -ATCACTCCTTGTGTGAATCTGTCATGGGTTGATCTTGCCTTGGTAGGTAAATGCCTTAAT -ACCACATCGTTTCCATCCATCGGTCTGACTTCTTGGATCTCTTCTTTTCAATTTTTCAAT -CAAAAGCTGAGGGCCTCAGTGTAACATTACGTGGCTTTTGCAGCTCTGCACGGCATCGGT -TGTATCGCCAGCTAGGGAGCATGTCAACATGGTTGATGAAACCATGTGACTGGTTGGGTT -TGGAAAAATGACAGCCTCGGAGTAAAGAAATGAATGACAAGCGATAAAACACAGGGAAAG -ACAGGGAGCGAGAAAATAGATGAGGTTTTTATAATAGAAAAGCAAAGATTGTGGTAAGCC -CTAGGGAGTACGAGAAAACGACTCCTTCTTGTGTTTCTGACATGCAACACTTCATTAGTG -GTAGATCATTGTTGTAGTATTTTCAGGGCAAAATGAAGACCAGATAAACTGAGCCCTGGT -CGGATACCGAGCCATCGTAATCCGAGGCGTCACTGTCATTCGGATGTCTAGAATTGATGC -GCTCTGGCGCATCGCACCAGAAGCCAACCATGGCATTCTCGGGTTCGTGTCCAGGCTCGT -AGGCCTGTTCTCATCTGTGGGTAATGATCCCATACTTGATTGCATTCTGGCCTGCGATGA -TTTCAAGTCTGGTCAGAGCGGTCCGACATGAGAACAATGCTATGAGTCCATAGTTAAAAT -GGACAAGAATAGCGTATCAAGTATATGTACTCACTTGTGTCCAGTAGTGCTGGCGGATAT -GGCGGCATAGGGAAAGACCATCAATTAAGCCATGGCGGCATATCTGCAGGCCACTTCACC -ATAGTCCCAAATTTAGCCTAATATATCTTGATCTCGGTTTATCTAGCAGTGCGATCATTT -CACAAGGTGAGAGCGTCATTTTCCCTGTTGCAAAGGTCTTATGGTTCCCCAGACTGATTC -GAGCGAGGATATTGACAACATCACCCACCTTCGAAAGATGAAGTTGTATCTAGTCGACTG -CGAACTTTTTATCAATCCAAACGGGAAAGGTAATCTCAGCCCAACTGCAAGGGTCTGGGT -GAAATTTGCTGATGGTCCCCCAAGGAATCACGAGATCGTGTCTGCATGTCACAGAGGTGC -TTCTCGAAAACCTCCCTCTCGGCCTTTTGATTGCTTGGAGCTGAGCCCTTTGAAAACGGG -AGGCTTCTCACAAGGGGGGTGGGCCTAATTACCGTCGGGATTGTAATCTGTTGCTGACTC -TATGGCCAATTCTGCTACATATTGTAGTAGGCTCGTGACACAAACACAGGGACTACTCGA -TATGAGTCTCAATGCTGTATCGTACGGATCTGTTGATAGACATTTAGAAAGGAATTCTTG -ACGACCATTTTTATCATGAGACATGACCTATAAGCTGCCCTTTCAATAAGGACATATGAG -GTATAAAACCTTTAGACTTACCTGCTAATACAGCAAGGCCCCGCAATAAACACGGCAAAT -CAGGACCATAAGCCTTCCCAGAAGTTCGAAAAGACCATGGTGCCAGTATGCTGCCATGAC -TGCAAGGAGCATGACAGAAACGAGCGAATATGTGTTGAACATTGCGGTCGGTTCGCAAGG -TAAACAGAATATACATAGGATGAAGATTGAAAGACAAAAAAAAAAGAAGAAAAGGAAAAT -CATAGAAAGATATTCTATCCTCGAATGAATCAAGGGGTCTAAGTCCGCATCATGACCGGC -GGAAATTGGTCCATTGGGGTTCCGGCCTCAGTTATAATGGCAAAATTAACACTTGCGATG -ACTTCCTAGCTGCCCCCTCAGACCTGTTCTTAACCGAGAGAAGATTAGCCGCTGGAGTTT -CTACTGTAATTGTCGTAGGGTATCAATCCACGCTGTTTGGATCTTCTTTTGTGTTGATGA -GATGAAGTGAGAGGCAGTGGGGAAGGAAGCGACTGAGAAAGGGATGAGAGAGCGTGACAA -AATGAGGGAGGAAATGTGAAGTGGGAGACAGGCCTGGAAAAGAATACCAAACGGATACCT -CAGATATAACTCATCTCGCTAGATCTGTACAGCCTTTGTAGTTTCCTTTTTGAAGCTGAC -ATCGATTCGAGTCGTAAGGCTGTGCGAAGTACCACTGACAGAAGATGAAAATAAAAGTCA -GACGTACTTCAACAGATAGAACAAGAAATGACATGGTTTGTCTTAAAATGAAGCACAGCA -CTGTGGTTTGTATACTTTATAACACCAGGAATTCCCGGGCAAGGGGAGAAGAGAAGATGA -AATGCAATTATATCCAAACAAAAATAAATAAGCAAGGAAAAAGAGGTCTGCACTCAGCTG -GGTAGCCTAAACAGACCTCAGGCATCTTCAAAAGGATAAACGTCAAGTGCTCGTGGCCTC -GACGTATCCCTCAATGAGACCGAATTCAGTGTAATCTCCTCGGACGTAGTATTGGGCGAG -GTCAGTATCATGGTATGGCGTGAAACCTGCTAGCGGGATATTGCGGGTAAAAAGTCCAAA -ATTTTCTTGTCGATTCTTGAAGATGCCAATGAAGGAAACAAACAAAATAATCAGTCGGAA -TAGTGTACTCCGTACGGAGTATGTAATAGCAGCTCAAAAAGAGAAAAACCTTCATGCTCA -TGCCGCAGGTCCGCGATAATTCACTTCATATGTTTTAAATGGCGAACCAAAGAAAAAGGG -CGCCTTTTCTCCCGGTCGACGCCTTAATTGTTCTTCCCGTTCTCGAACTCGTCGAGCATG -TTCTCTGCTCCGGCGCTTTTTCGCCTTCTGCTGCTTGTTCTTTTCCGATATTTTTGATAA -TTTGCCAAAGATATGCTTTAACCAGCTCATAATTGCCCAGTTTATGTCCTCTTAGAGCAC -TCTGACGGATTTCCGGTAGATGGCTGGTTGTCGGCCGAGTGAATAACTTCGTGTCCTCAA -GAGGGCTCCGATAGTAATGATATGGAGATGAAATATATGCAGATTTTGGAAATGGCAGCA -TACAATGTAGAGATTGAACTGGCGCTCCACAACAATGTCTCGATATATGCTGACGTGAAG -GAAGTTCGCTCGCGGGGGTCTGTCTCACTGGGCTTGAGCTAACCGATTCAGGTGACCGTT -CAGCCCTGCCCTCAACAGGAAGGTACTCGGGTTATACACTCCTTTTGTATAGATACGTGC -CCCCTCCTCTCGAATATCTGGATATAGGAATCCAGCATGTTCTCTAAAATAATATCAGTT -ATACACATAGCGAACCGGGAGCGATCAACGAGCTGAAAACCTTTAGATATATTAATTAAT -CCGTTTTACAGTACTCTTACGGCCCCCGACCCACAGCTCGTACGGCCCCAAAGATCGTCC -AGAGAAAAAGCTCTATGTCATTAGTTTCTCGAGTCGATAAGTATTGACAACATCTTTGGA -GATTTGTACACCTCGCTTGCCTTGGTGTCTGAGTAATTTGGCCATCAATGTTGCGCATGG -TTGTCAGTCATAGGGAGTCAAGGCTGGTCCCTACGCATATTGATGCCAGTGTAGACCCCC -TGTAGAGACCCACAAGCCCGATGTATCAATCTAACATTCTCTTACTCCATAGTGATCGTC -TATGTTTTGTAGCGGTCGGGCAATTCTCTGCCCCATTGTAACAACTAACCCGGTTGCGGT -TGATCCAATTTATAAACTCTAGCTAGCACTTCTGTGATTTATTTTCCTGTTTTTTTGCTC -TTCTCGATTCATCTTGAAGGTTACAAGGACTTCGAGGAAATCCATCAAAACAGCGGTATC -CAACATCCTTTCTTGCGACGCAGATTGAAAGCCATTGTCATTCAAACGACCGAAGTGGAA -ACAAGAAAATTGAGCATAGGTGAGTTTGGACTCTCAAAAATCCATTCAATTTTGAAATGA -CACCGTGGTAACAAGCCCCAAAGCACATTCAGCGACTGGTCGAGCGTGATTGTACCACAC -AAGATGGAGTCTACAAAGCAAGAGACCCACGCACACACGTCTGCTTCGTACAGTGCCGAT -TACGATATTGAGAAATTGGCGGGACAGCCCCAAAAGCGCACTCTGAACACCAACAAAAGC -TTCATGGATGAGTGGAGGCATGGTTCCAAGGCACGTGTGGTGAAGTATTATGCCGTGCAT -TTTTGTATAGGCATACTGGTCGGCGCGATCGTTGGAGTGATTATCGGTCTATGTGTTCGG -TATGTAGGCAAATAGTCCTTAATGCAGGGATACATACATCACGGGATTTTGATTCGTTCA -AATATTCTGTATATTATGGACGAAATAGATTTATCCATTACCCATCGTATCTAACGATGT -ACTGTGTAAGTAATGGAAAATGCAGATGCAGCATTTGAATTCGTAACTCCGGCCATCATA -AGCATCGTAACTATGCAATGGACAGACGCAACAAAAATTGGGAGAAAAAAACAAACGGGT -ATACGATGCATGTCGTGCGTATCATGGTATTCAAAAGCAACTTAAGAGAAAGACAAGGCA -TCTTTACCTAAGGACCTCTCTGACGAATTCAACATGTGAACCAGGAAATAGTCCCGTACG -ACCAGCATAACATCCCCAGAACCAATCGTCGTTGATGTTCTCCGCCTCGGTGATATGCGC -GCCGCGCGGGAAAACAAGTTCGTCCGAGACTTCTTGTTCTGGGTACCAAGACCACAGAGC -ATGGACGATGAGTCCGACACCACCGTTAGGCGGGAAACGGCGGAAGGGCGGAACCCCTTG -AATCGGAGTCGCGGCCGCACTGGAATTTGGTGGATTTGGAAGATCAGAAGCAGAGACAGA -AAACCTGCTGGTATCATTTGTGAAGGCAGGTCCGGCGCGCAAACGGGATGCTTTCTTACG -GCGCTCAGGGCCGTCCTTCCAGCTCCAGTCACCGGTTCCGAACTCCGGGGATGCAACGGG -GCCGCCAGTTGCTGGGGAGGAATTTTGCGATTGTGATTGTGCGACGTGCTCGTCTCTCAA -GGCGCGGCCACCCACAAGATCACGGACAATGACGCGGCGCTGGCCTTCTTCGTGGTCTTC -GAAGCCGACTACTATCATACGGTGGCCGGCGAGGCAGCGGCGCCAGCCGTTGTGTCCGTT -CTCTTTGTTGATTTTGCCCGTTGCCACAAGTTTGAGGTAACAATTTGTACAGACGTCGTA -GTCACCGCCATTACATTCTAGGCAATGGTAACGGGTATTTGAGGCTGGGATTGGATATGT -GCAGATATCACAGTTTGTTGAGAATGAGAGAATTTTGAAACTCTCAATCTCGGATGGTCG -TAGGGGGGTATTGGATTCAGCCGGCGGCGGTGTGGTGGACGGTGCTGATGGAGGTGGACT -ATGGGTAATCCGTCGGATGGGTAAAAGGGTGTGGGTGCAGCACCGTCCTTGGTTGACGCA -GGAGTTACAGAAGCCCCAGTCTCCCTCGTTGCACTGATTGCATTTCCAAAAACAGTCGTT -TGCCGATGATTGACATGTGTCACAGAACAGGCCGCTTCGTAGGCGACGTACTGGATTGTC -GCTTGTCATTTGTCTTTCTCCGCTCATATCTTGATGGGCAGTCTCGGGGGGCCGCTGATA -TTTGAACCATAATAGGATATGACCTGAGTCATTGATTTGTATAGGGCGTCGACTAGACGA -CGATATGATTCTTTGAAATGTCGTATCTGCGGATGCGCCAAATCCGATCCATCGAAGACA -GCCACGTCCCAAGCGGTAGCATTGTAGACATAGGTGGAAGTTCCCTTCCTTGCAACTTGT -GCATCTCTTGTGTAGATGATATTGGATGTTTCCCTTCCCGCACCGGTCACAGGAGATAGA -CGGCTCTGGAAACTGCACAGTGGGAGATGCAGGGACATTGGATCGTGATGAAGATGGCCT -TGTTCGTCTTTCGGAGCGGATTGGAACCAGAGGCGAAGATAATGACGTAAGGGTGTGGTC -TGTGTGAGTTTGATGGCGGGATGCAGTTGAGATCACACCACCCGAAGTGGGAGAGTTGGT -GGCTGATTGACTCCTGGTCTGTCTAGAGGAGCTTCTTTCTCGTCCTCCGGCCACTAACGT -GCTAGAAAGATTTTGTTCGGACTCTGTGGCACGTCGTGGCCTGGCTGATGATGATTCTGA -TCGTAAGCGCCCCGCCTGTGAAACGCGGCGATCCGCAGTGATATCGCTCGAGGATCGGAT -AGCAGGCCTTATGGCTTCATCTGACGCAGACCCCTGATTCGTTCGGATGGGCGAGGTCCT -GCGGCGTCCACCCACTTGCTCCGAGTTGCGCCTTTGATGGCCTATGCTCGGCCGTGTGGG -GGCCGAATCAAACAGATGCGGTCTAGAAACTGGCGGTCGTCTAACATTCAGTTCTCGGAT -AGCTGCTGCTGGTTCTGCAGTCCTATGAGCAGACTCGGACCTCGCATGTGTTCGCGTAGA -TCCGTCGGCCTCTTGCCGCGGTTCTCGTCCTTGTTCTTGTGGTTGGCGAGACTGCGATCT -TTGCATATGCCTCCGGCGGTAAGCATCCGCGATGCGTTCACTGAGTTCCTCTTCTTGCCT -TGGCCCCAGATTATCGAGGTTGATGTTGTCTAGTAATCCATCCTCGAGAATTTGTCGCAG -GATCTCTTCCTCCATAGTCTCTGCATCAGATGACAAACTCAATAATGACCGCAGACTAGA -TTGATGTTCAACCCGTCGCGAGCGCGCTGAATCATCGGGCCGTAGTGAGCGTTGTTGCCG -AGTTGTATCATCATCCCTCCGACGCCTTGACCGACCGTCATCCGCAGGCCTCTCGGTATG -GCTGCTCCTCTCACGTGTCCGCGAGGATTGTCCCATCCTTCGGGCCTCACGCCTAGCTTG -CGACCGGCTTTCACGTATACTTAGTTCGCGCACATCCTCTAGAAGGCGTCTATCATCTTC -GCCCTCATCCTCCGCACTTTCCCCGCCCGAAGAAGGTGCTGGAAATACCGACTCGCCATG -TGTGTATCTAGTAGCAATTTCTACCTTTTCATCTGTCGCTCTGGCTCGATCTGGGTGCGC -GGTCAAGACCATGTCCAACAGGGTCGTCACGGTGGCATTTGGGCGGGTGTCACGCACTTC -AGCCCGACAGGATGGGCAGGTGAATCGAGGGGAGGCGCGGGGGCGACGCGACCCTTGCGC -GGAGAACCATTCTTTCACACATGAGCCGCAGAAAGTGTGGAGGCAGTCAAGGAGAGTGAG -GGGTTGGTAAAGGAGCTCGGTACAAATCTGGGGCAAAGAGCGGTGTTAGAAGCAGAAATG -TAAGGGGAATGGAGTACATACGGAGCATACTAGCTCTTTTTCAAGGTCGGCCAATCCGGA -AGTTTGCGCTTGAGACATGAGCAAGTTTTTGTAGTTCCCCGGAATGTAAACAAGAGGTGA -AGACCAAGGATCGTAATTGTGGGCAGAAGTTGGCTCCACCAATGGACCGGTGATctcccg -cttacagcttcaccttcacttccacttccactttcaACGCTGAAACTTCAATTTTGTCAG -CTTCGTGCTTGAATAAAAACTGCCCTGAGAGAATTTTGCCTGGCACTGTTACAGAGTGGT -TCTTTGCACCAGGTTTCTACACGATGAAACTTGATGAAAATGCTTGGACATAGCAATGTG -gggagatggaagatggaaaatggaaaatggtagatgTTTCCATCAGGTGCCTTCTTTAGA -AGGTTGGGGGCCTTAGGCGGAAATGGGCTCGGAGAGGTTTCAGTAGAATATATTTAAACA -TGCATTTTCTTGGGAGATTCCCTTCAACTTCAAATTGGATATGTAGCCTAGGTAAAGTCA -TTAGAGTAGAGTAAAGAATATCAGCAAACACATCTTGTTGTGAATATAATGGACTGAGAT -CCGTTGAGAACTGGTCTTGGCCATCAAATTTCCAAAGTATGGTCACTCGATCGCGAGATC -CTCCTGAGAAGAAATCCGGCAGGGTAAGTGACGGACAAAGGCATTGTGTCGAAGAAACCC -AGACAGCCAGAATAAATCTACATGCCATTTTTTCCCATGCAAAAGACCAAGCGGTCAACG -GAGTCAAATTCAAATCACCCGATACCGGTACTTCAAGTTTGCACAGCGTGTTGTAAACCC -GGCTATAGGAATCTGTCGTATTCTTCGCCATCGCCCCAACCAAACTCACCCATTCGCCGA -CACAGTGGCGGCAAAGATGCAGGACACGTCCCTCAACGCCATGGAGCACAACACGCACGG -TCCATTCACAAAGATAGAGGTACCTTCCGGTGACAACGCAATACAAGTCCGAGCACCGGT -CCCTGAGCAAGACCAGGCCGGCCGAGAAGAAAAAGGTTCAGTCGAGTTTTAGTATAGCCC -TGCCGCCACATAGGATGGTGGATGATGACCCCATCGCGAGAGGGAGGAAAGAGCGGGTAA -AAAGAGGCCTGGTTAGACCTCGCCTCTCTTGTGGCGGACGTTGTAGGCAGCATTCCATCG -TTCCTATGGGTTATCATGGGTAGCGGTGAGGGGGTGGCTGCTAGAAAGCGAAGAATTGAC -TTTGAAATACCATTTCCTTGGAGCCTGAGCACATTGCGAGGCCGGAATGTCGGAACTGGG -CTTGCTATTATGAGTGTCATGTGAGTGAAGGCGAATTTTGATTCTATGTGCATACATCGC -GTGGGAGATAGACGCTGTCTTTCCTCACGGATCTTCCGGCGCAAGATGCATGTTGTGATC -ATCTGCGTGAGCATGACAGTCAAAGTGTGGGGAAGGTCTGTGTGAAGTTCGGCTTCTAAG -GGCCCGCTCCCTGGTTCCCGCCGTGGCTTGAGGATATTGGGAGGTGAGCAAAGCTCTGAT -GGCCTTGGGCGGCAGGAACATCACATCTATTCGCCAGCTATGTTATCAAGCTGAGGGGCT -TTGTTTGCCTTGTCTCATGATAGCTGCATCGTCAGCTGTTTACTGGGACATCAGACACCT -CGGAAGGTTATTAGACGGCAAGACGATATCGCTCGGACAGTGTGAGGTGGCAGATGCACC -CCGCAATTGAGAGGCGCAGGGCTACAAGCCATCCCCATAATCTCATATCCGCAGATAGCG -ATGTCTGTTGCATCTGAGAACATTATTTAGTCTGTGCTCTGGAGCAATGTGCCAAAATAC -AAATTTTTTCGGGCCAGAAGCATAGTTGGTTTATGCCTGGATCCTAGAGATGGGATTGTC -GTTATCTGAGCCCCTAATCAGGCCAATCGATAGGATATTAGGATAATGTCTTTTGGACGT -AGGGGCAACGAGAAAAGAGTAAATGATCACCCTAGCAGCGATCATTATTACTCAGACAGT -CTAGGACCCTCTTCAGCCCCACAGATGAGCGATTTGATAACCATCCCCGGAGATTCCAGG -CCTTCAAGGAATCCTCTACCCCGCGTACTTATCTCAGTTTGACATCCAAAGGTTGAATGT -CCAGGTGTATTCACAAGATTCAGCGGATACGGAGGGCCGTATTTCCGAATCTGAGGGCTT -GACGGGCGTCAAATCTCGTGTTATCAAAAATGGGCGTTTTTCTATAACGGGAAACGCCTC -TCCCGTCGGTGGAATGATAACGGGCCCAAATCAGCCAATCCTCGGGCTTCGGCGTTTTAT -AACCGACTGCGATTTTTTTTTTCTCTATTTGTGGCTTGATTTTTAGGTAATCTATCAGTC -ATTAGTAGTGATAGTGGAGGTGGCTATAATGAATGACAACTTAGCCGAGATATCAATTCG -ACATTCATTGATCTCTCAAGTCTCAAATGGTCACACGAATTGACCCTGCAACTAGTGATT -GTGGAGATAAATCCCCGCGACATTCGGGGTACACCTGCAAGTTGCAAATCATTTTCCCCA -GAACAAAATCAATAACAAGCGCAAAGTATAAAATGCCAACATATCAACATGAAACCCCTG -AGATAAAACCCACATCCGACAGTCCAGAACAACAGCCCATGCACTGACCCCGTCATGCAT -AGCAATTGTCAGTTCGCCGCACTAATGCAACGCCTTATGCACGGCCGCGCAAACTAATCA -AGCTATCCACGCCCCACTATCACCGCACTGTCGAGTCGTAGCTCCGGGATCGCGCGCACT -GACTGCCGCGCCTCCGTTCTGGCCTGTGTTCGAACTTGGTGGGGGTTGCTTTGGCGCTGG -CCCTGTCTCCCATTGTATGCATAAGCGTGGGGCAGGGCCAAGATATGTATATCTACAATA -GATTACTTTGGGTGAGGCTACTAGGTGTCCGTGGTATAGGTGGCTTTCTAAAGTATATAT -TCAGCAGATTTTGATATAAAATTTCTACCATATGCACTAATATCAAAGGCCCGTGCTCTC -CTGAGACCTACGCATCATGTATACTACCTAGATGTATATCGAATCGACTAAAGCGGCTGT -CCCTGATCGCCTGCCCGTCTTGCCAGTGGTTGCTGGATGCCAAGACGTGTAAGGTGGTGC -CACTTGCTCAACTTTTTTTTTTCTTTGGACATACAACGTCCGGGATACAGATGCCCCTTG -GGAACGTGTGTGGCCCGCAGGGATGTTGGAAGGTCAGGTGGTGCTCGGTGCAGTGCATTG -CAGAAGAGGCGCGTCTAGGATTTGCGTGACGTGCGACATTGCATAGTCAGCCCATATGCG -CTGCTTGCGTGCGATTTTCTTATTCTTGAGCTATCAGCAGTTGTGGGCCAATCATGGTGG -AGTTCTCTATCTTGCATCATAAATTTGAAATGCCCAGGATTGAAACGGCTTGGAACGTTT -ATTCTGTGCAACACATGATCAATTGGGAGGGATGGCATTCGCAACGTTCAATCGTCTTTT -GGAGGTCTAATCGGAATGCTGCATGCACATTGTCATCGGTTGCATTCGAGTGTGTTCCTA -TTAGGATACCCGAGCAAGATTCTATTTCTACAAACATAGTGCGCAGACCTGCGTCTCTCA -TTGGCAGATCCTCAAAAATGAATCATTGCCAACCAAACGCGCGAATCCTGGGGTATAACT -CTATAGCCCGCGGGGTAATAAAAGGGATCAGAACGCATGTCAGCGTCGCTCCGATACCCG -GGCTCCGCATCGACTCCGAGTTCGCAATATGCATCGTGAATGTAAGAGCTGGCGCCAGCC -GTATCCACGCGGGGTAGCTAGAGCTCTCTAGGAAACGCGTTGCCCCTCTCTCATTTCACG -GCCCTGGTCTCGTTAATGCATACTACTCACAATTCATTTCCTTTGTTCTTGGCAAGCAGC -GACTGCAGTCTGGACAGGCCTGCTGCCCAAAGCGGATGCGTGTTTTACCACCAGGGTGAA -GCAGGGGAAGCTTATTCTTAGCAGTATCGGGTTGAATTGCGCGAGATGATTCTCGGGAAC -TGGGCCGGCGATTGAAGTCTGAGTTCAGCCACACAGTAACGCCCTCGGGATGAGTTGCAG -TTTGGAATTGTTGTCTGCGAGACAAGGTAAATTTCAAGGAGCACATCACTCCGTAGAAAA -TGGGTCAATTTGAAAAGGTTCAAATTGCAAATTGTAATGTGAGATTGCATCTGGGCGATC -ATTTCATCAGTAGATCAGCAAATGGCTCTCATTCCGTCAGAAAGCAAGATCTTAAGGCTG -AACCATGCGGTCTCAAGGTTCCAACATTAACACAGTATACTTCTGATAGGCTCTAGTCTG -ACCGCTTCGACGCTCCGACCCCATAACACGTTCTACTTCATGTGTGTATTCAAGGGGTTG -CGGGCCATGACCGAATTCGGGTTTCCCGTGGACCGACAACCGATGGAGGGGTTCTCAGTT -TTCGGGATGGAATTAGaaaagaaaaaacaaaaagcaaaaataaatgggaaaaaaaaacaa -aaGCGCACGGAACTTTCCAGGCTCTCGGCAAGAAGGGGCTAAACGGACGTTATCAACCTG -CAGCCCGGAATGTTGCGACCTTGCCGCCTCACCGAGACGTTCATATTGGCAATTGAGGAT -CCAGAAGATGTATCGGGTACGCCCGTAGGCTGGATAACCCATATCCAGTCAGTGGATACC -TGGCAGAGTTTTGCGGGATACATAACATGAAAAGGAGGGGGTAAAAATCTGGAGATCAAC -ATCATAGCCTTGTGCAGATCACAAGAGTTCGAGACTGTCGGACGAACTTCGAGACTTCGG -TGTACCCACCATGCGCCATGGGGGTTTCAGGGCTTCGCCAATCTCGTGGGATGCCCGAGG -AGATCACTGAGGAATGGAAACAAATCGCCGCCATAACGCCATATGAAAGCCACTGACAGC -CACCCAGTTTTGTTGGGATCAGTTCGGATAAAGTGGCAAAAGGGAAAACTCTGCGACGCT -AAGACACAACAAAGGGGTTGGTGGTCCGGGCGTAGGCGTTTAACGAGAATAGCTTCGAGG -GTAAGTCTAGAGCTCGGGGATGGCTTGCAATTTGAAAATTATAATAATCATGGATATGAC -CTGGTACAGCTAATTGGTGAGACATGGAAAAAGCCATCTTGGGGGCTGAAGCGATCGTGA -AACGCGGAAAATCAATTCGGATATAGCACTTGGCAGAGTATGCGACTCTTGACTCTATGA -TCAATGCATACAACATACTCTAATCTACTCCGTACTCCTTTAGCTTTTTCAACGTAAATA -TCAAATATGCATACCAAGAAGTTATGTCTTGGAGTAACGAAAAAAGAAAAATATATATGC -ACAAGCGACCGGGGGTCCAGACCACCCGCCAAGCGGACCAAAACCAAGGCAGTCGGGGAC -TATCAAATGCGCCTTGACATCATGGATTATCGGCGTGGAATTACCACGCCTTTGATTATT -TGTACACAATCTACAGAAGTCGTGATCTCGGATCTGCATTCGGACATTGAAATACCCTCT -AGGTGCGTGATTTCGTACTCCGTACACTACTCTGGTTGGAAATCCGGATTGCATTCAATT -CATCATTCAACTTGATATTCCATATCTACATCTTCTCCATATCTCTATTATCTTAGTCCC -GGCTTTTGGCGGCACCTTCGGTGGAGCCCATGTTTTATGATATAGATCCACCGAACTCCG -GAGTACAGTACACAAGTCTCCAAATGCACGTCACCGTGGAATTCCATACATCAAAACTGG -GAATTGGAATTCTTTGACAGCTACGTATTTAGACCATGTAGCGGGAAAGGAAACTCATCT -CGCGTTTCACAGATTCATTTTGCATTGGATCCAAGGTTCTAGGAAGTATCAAGTCTTGAT -AACAGTGTAGTATGATCAATCTATTTCATGTTCTAGACGTCCTTTTTTGCTCTTTTTGTT -TACCCGTCTTATCGTTCCATTCTCTGGCCATTTCAGCCATTTTACATTTTAGTCCGGGCA -GTAGCACAACGAGTAACTTCAAACCCCCCCTTGGCACCAAAATTTCATATATGCAACACG -GTGTACAGTAGAGTCGAAGGAAAGGGATGGGTGGATATTATGCTTCTAGCACTTGCATGG -GGCAATGTATCCCTAGTTACCTGTGCGTGAACAAAAAAAAAAAAAAAGCTGTCTATCTGC -CACAGACTAAAATACTGGGACAAACAGGTTTCTCGTTGGGACGATCCATAAATTTTGGCC -AATCTCTAGACCACACGCTTTATTGATAAGGGAATATTTGTATCACACACCACATCTCAA -CTGAAGATATTTGGTTTTCAATACCCCAGACTCATTCTTGGTTTGGGAATGAATTAGTGA -TACAAAAATGATATGCATGTCAAAAACAGGGAATACAGGTTGAAGCAAAGTTAGTGTACA -TGGACCTCTCTAGCGCCTGTGGGCTGGAGGTCTGTAGTCCGTTAATTGTGACACTAGGGG -GGAATTATGAGTGCATTATAGTGATCTCGCAAGGTGATCATCCAACGTGCTCGTGCGAAT -GGACCACATGTCGGAGAATTCGGAAAACTCAACCGTGGAAGGTCAATAGGATATGGTGCG -ATTGATGATGTCTACTGGGACCTATATTGGATATCAGATATCGGAAGATTCCGAATATCC -CAATATCTCCAATATCACATCACTTTTAGAATGTAAGATGTAGAAATAGGGAAATGATCA -ATCAAAATCTATACATAATATAGTACGAAAAAAATCCGAAATATAAATTAAATCCGAAAA -AAACCAAGGAAGTTATCCCCCACCTATACGGGACGCACTGACCTGAGAGTCTATGCAAAG -CAGACGCACCTAGACAATAATTTTATGGCGCTTGGATTGGTATCTTTATTTATCTTGTTC -CACTTTTATTTTACTGATTTCTCTCAAAAAGCTATAATACCGGATTTTTATCACCTTTTC -TAAGGATATTTTCTTTTCCACCCTTATCTTCCTGTCAACGGTCATAGCGGAAATGGCTTG -ATTTCGCCACTATATTTCTTCCAAGGTTATCGATACTTCCAAGGTACCTTTAAATCACCT -CATTCCCTTACGGCTGTGCCGGCTTGAGCTCTTCTCTTTTTTTGGCCTTTTTTTTTTATA -TATATGTACATCCTTGTATTTTGTTAAACCTTTCCGAATACCTCGCCACTTGAACGTTCA -TACTTCAAAGGTATAAGCTCCCTTCCCCCCTTCCCCCCAATTCCTTTTTGCTCTAGTGTC -ACCTGTCATACCTTTCCCACTGGCACCTTCCTCAAGTGTCACCTCCCCTTTCATGTCCTG -GTGAATTCCCATGGCATGTCGTCGCCTCCTTGCCACTCCAACCTCGCCACAATTTAACGA -AAGCCTTACTGACAGGCTACTTCAAGCACTTCATTGTGATTGAAAATAATTGAACAGGAC -TCTTTGCTATCTACCTTCCCCTTCTTGTGTTGAGACACACAAGTCGGCCGGATGCCATAC -ACTGCGCCATTGAAGACGTTGCTCGCCGCGCAACACATTGAATATTCAAAACCAACCGAA -GAGCCCTGCGAGCGGCCCGGTTCGGTTCGATCCTCGGGACGGGCTCAGACTTCCCGTTCA -TATTCCTCCACCTCATATACCCGTCGTCATCGGAGGAGTCCTTCGAATACCAAACCAACA -GTGCATTCCGCTCCAGAATCCCCGGCGCGGGCCTCCCCCTCCATCGATCCACACGCCAGT -CTCCGTCAGTCCCCACCACCAATAAGCAATGCTATCATTCCTCCCGGGGCGGTAATCTCG -CCTCCCGAGTCCGCAATGAACTCAAGTGATGAAGAATCATCATACCGGAGTGGTGAGGGC -GTCAAATTTGACGAGCTGGAGGCTGCCGTTCGATCAATTAAGTTAAGAAGGGAGAGTTCA -CCGGAAAGAATGGGTCAATCAGACCCAGAAACAGCAACTCAGCCCTCGAGTGCGCAATCG -CCCGAGGCAAAGCCCAGTCGACCCAAGCTCCCACATCTACCACTGTCCAAAGAAGCTCGC -AAGATTACCCACTCCCGATCATCCACTGAAACCGCCATCGAACTGGCCCGCGAACAGGCA -CTGACTAGCTCGCCCGAGGAAAGTGACGTGGAGATGTCATGCAAGCCCCCAATGGTCCGT -AAGAAGTCAGGTGAACTTGTTCGACCTGCTCTTCGTCCTTCTTCCGCACGGCGACGACCA -TCCAGCATGCCCGGCACTCCCGTCTATTCCAAGGCGGTGCACTTCGACTCGCACCTGGAG -CATATTCGTCACTTCCTTCAGCTGGATAGGCCATTGGCTGTCAGCACGGAAACGTCACCA -GTCGAAAATCACGATAGCAAATCTGAGTTTCCCTTTGGATCTGGGCCTGACTCGCGCGGT -GCCTCGTGGGAGTGGGAGATCAAGCTCGCCAATTGGCCCAAGGATACATCATCACCTGCC -ACTCGTCCGGTTCGACTGGAGCGATTGTTCTTGTCGGCTGATAAGAGCACCTTGATCGGT -ACTGTGGCAGTGGCCAATATTGCTTTCCACAAGAATGTGACAGCTCGTTTTACACTAGAT -TATTGGAGAACTACCTCGGAGGTTGCGGCGGCATACTGCCATGATGTTCGCCGCCAGCAA -GTTGCCGACGGGTTCGATCGCTTTTCGTTTGACCTCAAGCTGAACGATCAGGCAAATCTG -GAGACTAAGACCATGTTCATGTGCATCCGCTATAATGTGGCGGGACAGGAGTATTGGGAT -AACAACGATACGCTGAACTATCAAGTTGATTTCCACAAAGTTCCCAAGACTCCCACGAGC -AAGCCCTCAACTGGCGGATCTCGCCCTGCTCTTCCGCGCAGCCGATCCTTCACCAGCTCT -CATAGCATGCGACCGCACTCCATGCCGCCTACCTATGATTTCCCCGACCTCGGCGACAAG -AAGCCATTCACACCCCCTTTCAATGGTGCAAATGGTGCGCCCTTGACTCGCACACCCTCG -GATGAGATCGATACGGTCGCTCCGCCTAAACGGCGTGAAAATTCCAAGGCATTCGGGAAC -CGGTACGACTTCGGAGCCTCGCTGTCTGCTGCCATGCGGTCGAAGGCGCCTCTAGATCGG -ACCGCTCTGACCGCCCGTGCGAGATCCGGGGAGACCGTGGTGCCAGAGACCTCCAAGCCA -AGCAAGAAATCTCCCAGTTTTGAGAAGGACCGGCAAGGTCCTGTTTCTGCAGTGAGCGAT -AATTTCCGCACTGGAGCTCGTGTGGAAGATCTGAAACCTTCGTCTTTGGTATCTAGCAAA -CCACAGCTCGAGTCTCCCGTGTACCAGGAGTTGGTTGATAAGTACTGCTTTGTAAGTTTT -TGAATAGCGAATCATACCATGCATGACAGTTGACTAACTTCTTTTAGTACGGGTCTCCTC -AAGCATCAAACCTGAAGGCACACCCATCGATCTCTAGCGGTCGCGAAGGCGAGGCCCCAA -AGCATATCTCCGCCCCAGCCTGTCCTTCGCCGCCGTTGTCTCCTCGCTCTCCTGCTCCTC -TCAGTGCTCCCGTGGCCGTTAATCCACGCGCCTCTCCGCGGCCTTCGGCCTCCCCTATCT -CTTTTCGCTATTCTTTCAACCGCGGTTTCATGAATGACTCACACTCTCCAACAGTCATCA -GAGGGTGAGTAATATAGACTGTTGTATGACAGCGACCGACCTGGCTATGATCCTCCCGGT -CGGCCTTCAAAAACACATCATTGACGACTTTCATGAAGGGATATGGAACTGGACAGATCC -GTTGGCGAGATGTCGGTCTCTTCGAACCCACCATGTATCAGCGCAATCTGATGGATGGAT -TTTTCTTCTTTTTTTGACTCCCTCTCAACATCACCTGCTATCTTCATCAATGGTTTCGAC -CAGCAGCACATGTGAGCGATTGTCTCTGCGATGCATAATAAACCGCGGACGTTTCGTTGA -ATCATTAGCTGTGCGGTTTGCTACCACCAGTCGTCGGCCCAAATTATCGACGCGATCGTT -CCAGCTTTCATCTGGTGCGGCCTCTCTCAATTTTTCTTTACTTTTTTTTTGTTTACCTCT -GCGTGTCGCTTCGGCCACGCATGATGACGCTACGATTTCTTTTCCGGGGCATTCTTTTCG -ATTCTTTGCTGTCCTTTACTGGATGGCCCTCCATTTTCAACGCAAAAAGTTTTTTAGTAT -GCCCGCCTCCGCTCTTGATTTCGCTGCATTCCACTTTGATGGGGTTCTTTGATTGCATGG -TGTTCTGGCGTTACCTCTCTCCTGTTTTGCTTTTCGCAGCCTGTTATTTTGATTTCAATT -CTACGGTACGGTCTACATTCTCATTGGATGTGAGAATGGGATACGAAGACGTGAGGGTGC -GGAGATAGTGATACATGACTGCCTATAGTACATGGGCAACACGACGTAGTTTTGAATGAC -ACGAGCCATGATGAACGCGACCGACTGGGTCTTTCTCACTTGTAGCTTCTTAGGATATAT -TTTCAACCGTCAGTTTCCACTACTCAACAAGGCTGCTAGCCACCCGTTGAAAGGTCAAGA -AGTAAATCAGATAATATGTAATCACATTCATTCTAATGGATTCGTGGGGTACCTATACTT -CCTGATCAGGCTAGCGCCAATCCCTATCTTATCACCACGTGACTAGTATCAATGTCGTTC -CACGCTGGAAAATGCGGGCTCGGCATTGCCTGGTAGATGCGCTGGAAACCGGCCCTTCAT -AACGCCCTTTTTCTTTCTCCGCCGCCCGCCAATCACCGCCGCATTGCAGAATGAGTAAAT -CTCGCTCGCTTACCTAAAATACTCCCTTCCCGGATCACGCGTAATCAGCCTCTCTATTTA -AACTGCCATTCCCCCTCCATTTAGTCATTTTCCTATTTTCTCCCTCTTATTCTCCCAACA -CCAACTCATCAAACACGTGCTTTTTGTCATCGTTTATCGCAAACACGGACATCGATCAAT -CTAAATTTCTATTAATTTCAAATTCTCACATCTGAAACTATGTCTGGACGTAAGTACTAC -TATATTTTATGTCTTTTGTCACTATAGACCCTCCTTTCGTCGTCGGCGTGCCGTGACGAA -GTTCCCAAAACACAGAACTCACGATGTTCCTGTCAACATGTGACTTGCGCCTCAGGCATC -AACTGCATCCCACCTTTCCTAATTAGGAAAAGCCATCCACCTTTGCGCATCTTAACACAA -CCTCCCAACTACCCTCACCTCCCTCTACCCCCCGCTCTTCGTCCTATATTATTTTATACA -CCCCTCCTGCATACGGAGCCACGCTACCCTTCCATAATCTCTCAAACAATAACTAACAAA -TCCAACCAGGCGGCAAGGGCGGAAAGGGTCTCGGCAAAGGCGGCGCCAAGCGCCACCGCA -AGATCTTGCGCGACAACATCCAGGGTATCACCAAGCCCGCCATCCGTCGTCTCGCTCGTC -GTGGCGGTGTCAAGCGTATCTCTGCCATGATTTACGAAGAGACCCGCGGTGTGCTCAAGT -CCTTCCTTGAGTCTGTCATCCGTGACGCTGTCACATACACCGAGCACGCGAAGCGTAAGA -CTGTCACTTCTCTGGATGTCGTCTACGCCCTCAAGCGCCAGGGCCGTACCCTCTACGGTT -TCGGTGGCTAGATTTGTCTTCCGACATGAATGGCCTTGAGCGATTTCGCTACTCGCTCCC -TGTGTGTGAGGTGACGTGGCGGTCGATGGCTTTACTTAATGCGCTACGGTGTGATGGGTT -TCGATTTTTACGGCTATGGGGTTCAGGGAATTTCTTTGGGACTGCTTTTTCCAACATTGG -GATTGTCTTTTCTTTTGGGTGATGGACTATTTTTGTCCTGATAACTAGTTGCTGATAGCC -TCCCGGGTTGACTCCATGGAGTTCAATACAACGCTTGCTATCAGTCATGTATTTTTCCAT -GTAGTTTGTATCTTGTAGCAAATAATCAGAGTAGTTGTGGTAGTAGATATGTATACTGCC -GGTGTCATTGTAAGGTGGAAAGTCACGGGCGGAGAATAGTCCCCCGCAACTTTAAATTAC -CCTTTCAGCACAGAGCTTACCTATAGATTGGCTTTTCCCTTACAACCCCAACCCATCCTC -TTGTGAAAATATTTGCTGGCATTACGGTTGTTCCAAATATGGCCTCATCTTCGGTAAAAG -CCGGAAGATTTTTTTTTGCCTCGTGAGACAGCCCGTCATGCAGCAACCATCCTCGGATTC -CCATCTCAAGCCTCCATCGCCTCGGCATACTACGAAAGCGCATGCGTTGACATAGCTAAT -TTGACCTCTGTCATTTCAGCCCATGAGCCAGTCCGACTCTATACGCGACCCGAAGATGTC -CACAAAGCAAAGTCAATGGTTAGCCAAGCCATTACCAAATATCCGAGCGATACCTCGAAT -ATCAGTGTCATTCCATTCGCCATAAATCACCTCTGGGTTCGCGATACAGGTCCTGTCTAT -GTACGTGGTGTTGGCAAATCTATCAAGCAACGCTTTGCAATCAAATTCCAATTTAGTGAA -TGGGGGAAAAAATATGATATCGGCCGCCATGACCGGGTTTCAGATCGCTTTGATTGGCCT -GTGATGACGCCGGAACAGATTGAAGAAAACGGAAGTTTTGCCCGCAGAGTTATCCAGTCG -GATGTCACCCCGTCTCCAGTGACTCTGGTGGAATCACAGATTTGCTTGGAAGGTGGTGCA -CTAGTTGTGGATGGCGAAGGTACACTTCTGGCCACCGAAAGCAGCGTCATCAACGAAAAT -CGCAACCCGGGTCTTTCGAAGATTGCGATCGAAGCAGAGTTACATCGTCTCTTGGGCGTT -GAGAAAGTTATCTGGTTCCCTGGCAGAAAGGACTTGGATGTTACTGATGTGCATGCTGAC -GCTGAAGTCAATTTTGTCCGGCCTGGAGTTGTCGTCCTCTCGCGACCCCATTCTAGTGTG -CCAAGGTCATGGGTGGAGGTTTACGAAGAGATCAGGGATACTTTGGGACAGTCAGTCGAT -GCCAAGGGTCGCCCCTTTGAGGTACACATAGTAGACGAGCCGAACCCCCAGGTGTTTGGG -GTCCTGTCCTCCAATGACCCGGCGACTAACTATGTGAATTTTTATTTCGTCAATGGGGGG -CTCATTCTTCCACAATTCGGCGACATTCGGCGAGATCAGGAAGCCCTCGTGCTGTTTCAG -AAGCTGTGCCCTGATCGCATAGTGCGGCCTGTGTTTGTGTCAGCGCTTCCGTTAGCTGGG -GGTGTGATTCACTGCGCAACACAACCAGTTCTTTCGAATGAATGAGCAGGCATATAATAG -ATGAAATAACATGTCCTCAAATTAAAACAACATCCTGGAGCTTGGAGATCTGATTTACTT -AAATAGGAAGTCCATGTGTCGATCCACCCTCGGCTATGGGGAAATCCTGGGGAGAGCGGG -GAAATTGGCCGATCTGACGCGAATTCATCACAACTCAGTGACATCAAATCAACAAGACTA -CAAAACTACTCGGTGATTTTCTTATTTCCACTCGCGCATTTGCGCTAGAACGATGTCGCC -AAAGGCCTGCGAGCAGTGTTTGAATAAAGAGACGGAGAAATATGCTGCAGGTGCTAGTCC -GACATGCATCAGATGTAGTCGTCTGAAGATTAGCTCCACCTCCCGTCGAATCAAGCGGAT -GAGACAACACTCTGCTGTCAACACTGTCCCACTTGGGGATTTGCATGTTTCCAGTGTCGT -GTTGCAACGTCGCCCCAAATCCAAAGAAAGCGAGAGTGGTGCCTCATCTCCCTCATCTAG -TCAACGCTCATCTTCGAGCTCGCCTCCCAGAGCCGACGAAGCTTGGACACCAGCAACAGA -CCTCGTGTTATCCCCAGAGAAATTAATCGGCGCCCCGGCCAACTTCCAAACAACATCAGA -TGCGCTGCGCACCGTGATGGACGTGGAGCAATTCTCAGTCATACATTTACCATTCATGTT -AGGAGATACTTTTATTCCGACATCCCAGAAGACTGTCTACGTCATCTTGCAACTATGCGG -CCCGATCCTGATAGAAGGATATCTGGCATTTTTAGGACTGATGTCAAATTGCCAAAAGTC -GCTCGTCATCCGGCGGGGAGAACCCGATATGTATAAGGCAGCTAAGGGATTGCAACGTCT -AAGAAGTGTGAAAATCTCAAATGATTACGACGCAGCTTGTGTCCTCTTCCTGGGCCAGAC -TATGTACGTGTTCAATGTACTCACGGCGCCGTACTCCAGTACTGCCCACTCTATCGTTCG -AAGCTCCCTCATCTCGGCCAAGCCATGGCTTCCCAGCTTGGTACAAGCGCCTGTCATGGA -CACAATAACCATGTCACCGATTTTGATCGACACGGTCGAGTGTCTAGTGCACCGTGAGAT -CCCTATAATACGACTCGATCCTCAGCGTCGCGTCATCGTTGACCGCTACCTTGGTCTTTG -TGCAACCCTTATGCCTCATTTTTACGACATCTGCGAGTGTAGCTATGCCTTGAAGATAGA -AGCTCCTGCTACCGGATCTGAACCATACTCAGCCATTCATGATCGACTGGATGGAATAGA -GGAGGAGATCCGACGTTGGCGACCCCAGACCCCTACTGGACTGTTCGAGAGCTACGGTCG -GAATGCGGTTTTATCAATGGTCACGCAAGCAAATGTGTATCGCTTGGCAGGTTTATTGAT -CATCCACCGGCTCCGATATCCCCTGGGAGTCGAAGACGAGGCAGGAGGGAAACTTGCAAA -TGGAATATTTTCTGAATTGGCATTCTTTGCCAAGTCAGCAATTAATTTAAAGGAATCTAG -CGCGTTGCCTGTGGTGCTTCCTTTGATGACGGCCATGATTGAGATACAAGGGCCCGGGGA -AGTTTTGATAGATCGCCTGGCCACCTCATTCACTGCCCAGAGTGCAAGTGCATCCAGGCT -TCAGGAATTTGTGAAGGTGGCCAGAGCTTCGAAGGAGTCTGGATATCAGGGCATTTGGTT -TGAGTTGGTGGATACACATCTTCGTGTTGCGGTACCGCCATAGCTTTGAGCTGGTCGCAA -CAGCAATTCTTTTATGTATAGTATTATTTCGGGTGACTCTGTTTGATAGTGATGTCAGCT -TTTTCATTCTTCTTTGGGCAAGGGTTCTTGTATACGCTAAATCTAAGGGCATGGGGCAAT -TCAATATGTACATTCTCACTATCCTCCGGTGGTGGAAAGAGAAAAGAGTATACGCTAGAA -TTATTTGAGCATTGGAAAATGATTAAATCATGGAAGTGGTCACCTAACTTTACAAGCGTG -AGACATCTTCTCTGTCTCCTGTCTGTACAAATCGTGGGAATCATGAAGCCAACTATTATT -CAGCCATAAAACCGCCCCTCAAGTCAAACCCCCAGACTATATACCCCCATATCTCTCCGA -CCGAAAACCAGTTAATATTGTCGTAGACTGCCAGTCTCGCTCAGCTCGAAGCAACAAAGC -CGTCAGATATATGACACAAGTCGAGCGTTCATCGAATCGACGCTTCAGAGAGAGTATCCG -ACGCTTCAGAGAGAGTATCTGATTGAGCCGGTGCTATCCCTCTCAAGGCCTCCCACTCAT -GCTCCAGCATAGACAAATTGACAATATCATGCCAAGCGCGGCAATGGTATAAAGCCTCAC -GTTCGCGACCTTCATCCACAAACCCGAGCTTCCTGTAAAGCTTCAATGCATTGGGGTTGT -ACGAAAATCCACCTATAGTGATTCTGTGCAGTCCAGCATGCTGGAATCCCCAGTCCAATG -CCCAGTCAATAGCTTCCCCACCGTATCCCTTTCCCCGAAATCCGTCAATCAGTGAAATTC -CTATCATTGCATTGCGGTGGTGGGCATAACCTGAACCAGTCAAGCTGAAGAGACTGATAT -GGCCGATTGGGACTGGCTTGGTATTTTGAGTAGATTGAAGCTTGCCTCCATTCTCTGTCG -AGGAGGCTTCCTCCTCGACAGGGGAATTCGGAGGAAGACATATAATCACGCCTAGCAGGG -ATTCTTGGATATTTTTGATGAACTCCTCTGCAGATTTCTTGTGAGACGGACGTGGGAGAC -GTCCAGTGCTCAATATTTGAATAGTCTCGTCGTTGATAATGCGATCTTGGAAAAATGCAA -TATCTTCTTCGGAGTTCTCTGCGGCTCGATAGACGAGCCGGCTAGACTTGTGAAGTTGTC -CAAGGGGTTGGGTCGATGCCATCTTGGGAAAGAAATTAGAGATTGTTGGAATGTAGAGTC -CACTTGGGAATCATCAACAAATATATAGAAAAGATAAGCTGACTGATGTTGAGTTTTAAC -CAATCAGCATCCAACCTTGACGCCAATGTTGCATTGGAGCAAAGGTGAGGTGAGCGCTAT -ATGGAGCTAGCTCCTATGGGGTAAGTTCGGGGTCCCAATCAGGTATTTTAGGGCTAACTA -CTCCGAAGTACAGCGGGAATAGATTAGTCTATCCTGAGTCATTTATATGTGTAAAAGAGA -AAGTGGGACATATGTGGACGATGTCGCGGGGGGCCCTTGCGAAAGCTTTAAGGGGATATT -CACCTGACTACACTGATAAATACTTTATTGTGTGTGGCTGGATAGTTGGGTGATAGTTAA -TAGCATGATCTACTGTAAAAGGCCAATGAGGGCCGGTACTGCGCAGATACTCAGTCTGTT -GTGCTGGAGAAGGGCTTAGATGTATTTGAACCGGTAAGTCATTCTAGTTGGCTCTCTGTT -TTCGGCGAGCCGAGACACTGGGTGGACGTCTGAGTCTTGAGACCATAAGACGAGACAGCA -ACTCCAAAAAAGAAAACTGTTATTCTATCATGTACTTCAGTGACAGAACAGAAAGCCGAA -CCGCAACACGAATGCATAAACACTACATCAAACACCACGAGCACTATATTTTAGCCATCA -GATCAATCCCGATGTGACCCATGATCCTTCGTCCAAAACCAATGATAAAGGCTCTTCCAC -TCGGCCGCTTCATCTTCAACGTAATCACCGACTAGCTCAGCCGTCTCATTAATTTCATCA -GAAACTGATGAATAAGGGCCCTTCACATTACTCGCACTTCGAGCAGCATCAGCAGCGCCT -GTGGAGTTTGATGTCATATCAACAGCGTTGTTCCCAGAATTCGTCTTCAGCAGCTCTTCT -GTGCCCTCCCTTGTTCTCTCCGCCAGGTCACGCGCGCCTTCACGTGCGATCTCTGTATAC -TCTTCCGCCTGTTCACGGGCACGCTGAATAAGTTCATCAGGCGTGGGATAGCGACGTCTT -AGCACAAGAAGCAGATAGTAGCTCTTGACTATAAAAGCGTACACGGCGTCTTTAGCAAGC -ATTGTAAAGCCATGCGAGAAGAAGCCTTTTAGTACTTCACCATGAAGGCCGGCGTAAAGC -CCTGGTATACCGTCCGTGCGGGCGATGTTGGCGACTGTGGAAAGTATTTCGGGGGTGAGA -GAAGCGAAAAGGGAGTCCTGAGCCTTTCCAGACTTGGATTTCGAGTTTGAGCTCATTACT -TGAGCTCTGGTTTTCGCGAGGGAGAAGGGATATGTGATTGACGAAGCGGCGACTTTGCTA -AGGGCCGCCAAAAGGAACGTTAAAGCTGGAGGTGGTTTGTCACGCTTTGATCGTGGGAGG -AGTGTCCGCTTAAGAAATTCATTGAGGAAGAACGTGAGAGAGGGATTCAAGGTGAGTATC -AGGGTGGCCGAGTATCCGGACCAAAAGCCTAGAATGCCCTTCTCGGCACGGATGCGAGCT -GCGATGTCACTTGTTGACAAGTTCTTCGTATTTCCACCCTTGCGTGCTGCAGATGTCTGC -TTGCGTGCGACGATATTCGACAGTGGTGTGGTGAATAGCTTGGCAAACGAGCCAGCCAGG -ATACCGACGGCAAGTTCGTCGATCATTGGAAGAACGATATTCTTGCTCTTGGCAGCTCGT -TCTACGCCTACGTGTGCGACTATTCTCCTCTGGCGAATGGCACTATATGCGAGAAAGAAA -AGGAACGAATCTGCCACAGTCTTCCAAGTATCCTGCGCCAGGCCAGGGTAGAAGCCGGCG -ACGCCTTGTTGTGCATAAATTTTGCGGGCAGCATCCACCACGCTATTATATTCTTCATCA -GCGTTCTCGTCATCATCTGATTTCTCGCTCGAATTGTTGCTTTTCTGTTTCTGGGTCTGT -AGACGGGCGACAATGACATTGAGGGGGTAGACGACCACATTAGAGATGGCTGCCCCGACT -GCACCGGCAGTTGCATTCCCTAAAGCTGGGAGGGCTGGGCCACGGAGAGCAGCGGATTTC -CAGTCGTGCGGGTCATGAGCTTCGTAGCGGTAGGAGGGGTGGTAGACGTCCTTGATTGCG -TGAGCATGTTAGTATGGTCTGATGAAGTGTAGGAATGTGGCTGTGGATGTAGATATGGAC -GTGTGAGGACATAGGACAAAACGAGCTGGATTACAGCACACACCAGTTGGGACTCATATG -ATAACCTGTATTCCGCCATGGCTCTGCTCGAGTGTGTTCGAAATCTGGTTCTTGAGTTGA -AAGTCCACTTCGGGAGAATTGATGCTATTATGCTCGGGATTAAGCCCCTCAGTCTCTGAT -ATCCGTTTCAACAAAAATTACAAGCATGGGCGATATATGTGCAAGATAATTCGCGATTCT -GATAATATAGACAGTGGGAAGATTCTCAGCTACGAACGTGATCTCTAGGATTCAAGCCGA -GGTCCACCTCATAGAGGGATTACATCATGTCGATCATGTGGTCCGTGATCAGGCCACTAC -GGGCGACCATTGGGACCAGCAATACATAAATAACCCTCTCATTGAATTCTGCCTTACCGC -ATGTTTTCTTGCCGCCTTCGGGGAGAATATATGTTGCACACTAATTCCAGCTTCTCTTTT -ATTTCTGTAGGCACAGCATACAAAATTGAAGTATAAATTGAAGTATCCAAATTAAGAATA -TAATATATGTCATAAGAATAAATATAGTTGTGAGATATACTGACTTAGAAAGATAATCAA -TCCTCATTCCTATAGAATTTGTGATTGATGCTTAACGATACTATTGTACAATTTGATATA -CTTCCATTTCGTGAACTTCAATTTTTGCTTCATTCTATATGCTCTGAGTTGACATGGTCC -AATTTGATACGCTTCAATTATAACTATATACTAGTAATTAGTTCTTGTAAAGTGGGAGTG -AATCTGAATACTGCCTGGCAATGTAAACCAATAAGAGAGTTATGTATGCATCAGTGATCC -CATTAATCGCTAGTATGCCCACAAGGCAGTCAACTCCTACATCGATGTACTCACTACATA -CAAATGGAAAGGAAACCTTTCCAGACTTCACCATTCATCCCTCATTTCCATATGTAATTG -TCTGTGATCATAATCCTCTTGGGCGCGGTTTGAAACGTTTAATCGAGGTCGTAATTCACC -TCCCCTCGTCGAGTGAAAGGATGTGCAAACTGAATCTCGCCATCAAACCCCCAATCAATC -TTTTTGTCTGCGCATGATACTCTTGAATTCCCGGTAATCCAGGATTTGACGTAATCGGCG -GAGTTTTCAATTGTTGTCAACTTCACTCCAGCCCGATGCGTGATTGCGGATAATGGTCGA -AATAAGTGCAAGCCTTTGTCCTCAAAGGGTGCCTCGATTCTGATCTCCTTGAGGTGTTTG -AATTGGTGATCTACCCGAAGAAAAGACGATGCTCTGCCGTAGAGACTTTCACACAGCTTT -TCACACAGTAGGATAAACTCAAGCCCATCATTGATTTGGAAGACGTGCAGCTCTTCAAGT -GACTCGGGAAGTAGCTTGTACAAAGGCTGTGCGCTGTTGTAGATTCCGTGGTGGTCATAG -AGCATCCAGTTACGACCTTCAAAATATGACATCTTCCCTTGCGTCTGGACAAGATAGCCG -GCTGATATTCGCAGTCTCTTCAAATGGCGAAGCCCCGTTAGATTGAGAGGTTGAAGGTAC -AATTCTGGGTATACCTTTCGGGCACTTCGATTAAATGACAGTTCGAGGCGGGTCAGGGTC -CGAGATGTGTATTGAAGCGCATGGCTGAGTTCCGCAGATTTCAAGACCCATTCTTTCCTC -TTCGTCCACCCGCGTTCGTAGACGAATGTTTCCAATTTATGTGCTAGTTTCAGAAATCTT -CGAAGGTTTGTGGCATCAAAGGTGCAATCACGAAGTTCGATGTGAATGAGGTGATGGAAT -TTGCGATATGCAGGCCGCAACCATTGTGGTTTATTGGTCCATCTGGTGCCGAATACATGA -GTCAATGTGGGAATCCGAAGGAGCACCATAAGCCAGTCAAAATCAGCCGAGGGAAAGTTT -CCGTCGACATCAGCTTTCAGGTGAGTGAGCTTAGGGAGCGTAATTGTCATTTCATTTTCA -CTGCTGTGCATCGAGCGTATACCGGCTATACAGATCAAAACGCGTTCGATCAGGCTCCAG -TCTGTGATGAAAGGGTCGCGGGGTGGGTCGTCACTGTCCATCGGGATCTCGAGACGTTCT -AAGGCAGGCAGATACCCCAGAAGAAGAGCTAGTAACGCGTCCGATCGCGGTACACTGGGC -TCAAGGGCTTTATGCATTGAATCTCTGATTGTTGATTGTAATGGTAACTTTCTGGCAGCA -TCCACAATGATTTGATGATCCTCTGGGCAGAGGGGCTCAAGAGTAAGCTGTTTTCTCTTC -GGGTGAGTACTAATCTGCCTATGGGCGTAGCAAAGGTCGTGTAGGTCAGTCAAATCGAGG -GACTTGACAAGTCGAGCGAGGTGCGGTTTGCGGAGAATCGTGAGGAGAAAGTGTTCTATC -TGTATTCTCGGCATTAGAAATTGCGACTAAGTCATAGAAGAGAGACATCGTACGGAACGA -CGATCGGCAGGCCACGCCAAACCAGGCCACATCCGGTAATGAGGAGGAGCCCCCAAAGAT -ACATGGGCATATAACGCAGGCATTGCCATTTCATATAGCTGCCTGCTACAGGCAGCCACG -CGGCTGAGGGTCAAGCTGTCACGGCCAAGCTGGTCAATGATGAGAAGCTTGATTTCTGCG -GGGAGATTCCGAAACTCCGTCTTTGGGGTAGGAGGGGTTGGATAACATAGGGGGAGAAGA -GCGATGGGTAAACAGAATGCCATGTTTGTGAAATGGGATTCATCAATGTTGTGGAAGAAG -AAAAAATGTTGAATCAGATGAAGCAAAAGCGATTGTGTAGGTGTTGGAAGGCGCAGGAGA -AAGGACTTGAAGTGTTTGCGCAGCCTGTTCTATAGGTAAACCATACAGTCGCTTAATCGC -CATTACGATTGCTGACATCGTCAAACTCCTGCCTAGTTGGCTACAGGGTTGACAACAAAT -ATAGCATATCATTAAACATCCCAACAAAGCGCTCATTCGGTCCGACCTTTTGGGCAAGAC -TATTAACCGCCTGAGTAGGAAAAGTAGGTAGGGGCCATCTGGAAAGATATCGCTACTAAT -TACGAGAAAACGCACATCAAGGAAAAATTAAGGAATGGCGATCAGAATAGAGAGCTGAAA -GAGCAATACCTTAGTTTACAGCTCATAGCAGCAGGATTTAGGGTTTGTAATGCCCTAGAT -AGCATGAATTATCGCGGTGGAACATTGGGATATATGCGTACATTTATGAGCCTACGTACG -TATCAATATCAAACCACACAAGTGGGCATGATAAGCCTTGATATCTCTGCAATCAAAGCT -CCACTCACTTTCAATAAAAGAGCTAGGGAAGGAATAGCGCGCTTGGCTGCAATGGAGTCA -TGTGTCACCCTAGATGGTAGTGTAGGCTAGGTTGAGGCGGAAAACCCCAAATGACAACTC -TACACAACAGGGTTGATTTGTAGGTCTCTCAAAGATACAGGCTAATTCGAGGAAGACACA -TCTATACATGGATGACGTAGTATCAGAACAACACAAAAGCCAATCATTGACGAAAAGATT -CCACAGGTACGAGTTTCAAGAACCTCCTAATTTGATGAGAGTGGTGATGATGATAATGGA -GGGCCTGAGCAGCCTAGCAGGCCTTCATATCGCTTATTTCGAGTAATGTATAGATAGACG -GCGTCCGGTTTACCCCCATGAGCACCGTCACCCAGGCCCCACTCCTCTCTCTGTTGGATA -TGCGCGCTCTTGGAGATGTATAGGAAGTGCTTTGGAATAGTGGATAAAGGACTGTAAAGC -TTCCTCGTCGATAACTAGAAACCAGCGAAACCGATTACCAAGAGCTCCGACTTCGTTATC -CACAGAAGCTTTGAAGTCCTCTCGGACTCGGGCGATAGTAGCCTTATCCAATCGGATCTT -GTCTTCAAATACAAGACCACCTTGCTCGTCCTGGCGTCGATTGTTGTGTTGTTGCCAGTC -TAGTGTGGCAAGGATATTTGCTCTGATCGCATCAACGGCAGCCTCCCAGTAAAGGTCGGG -CTCAGGGGTATATACTATGCGATAGATGGTATACCCCCAGGGCCAGCGATGAGGAATCTC -ATTTGCCGGCATACTTGTAGCATCATATCTAACCCTACTAAACCAGGGTTGAATTTCGAG -TTGGCGGAGTCTCTGTAGGTGTTCACGTTCCGTTTCCTCGAGCGGGAAATCTCCGGATTT -GGATGACATGGTGTATCCCAGCTTTGGTGGCATCACAGAAATCGAGAACGTTGCAGTCGG -GAAATTGTGGGAAACGTAAGTTGATGGCAGAGGTACACCAAGGCAATCAAAATCACTACG -CTAATCGCGATGCAGGTGTGGCTTTGGAAGCCTTGACTAAATAAGTCTACTCTGTAGGTC -CTTCCAGACTATCTAATTTGGGTCTTCTAGCATTCCTATAGGCCAAGTGGGCAATTCTCT -GGTTGATCGTCACCATCAAGATGTGGCTGTAGACGCGTCGAGTTGGATTAGGATATTGGA -GTATCGGTGATGACCTTTGAGGCCGAATTTTGCATGAGAGAATGAGAATTATATTGCAAT -CTATTTTATCACAGTTCTTGCCAGACCTGGTGAGATTTCGCCAGCCTTATTTTTTGACCC -GAGGGATATCATCTCACACCCAATCAAGATTCCCCAAAACACAAAAAGAACCCCCAGTTT -CAGACATAATCCAAACTAGGCATTCCAAAACACTCTCACTCATACCGAGGTCACTGGGTC -TTCCTCTCGTTCTGAGGCTCCGCTGCCACAAAGGCTCCGCGTAACTCTTTCGCTGCCTCC -CATAACACCCAGAGAGGGATAAAGACCCAGAGGGTATTGAAGAAGAACAAATACAACCAA -AGGTAAACTGGATCATCAGTAGCCAGCTGCGAGCTCCCCGTCAGCCATTCCGGTGCAAAG -GTCATAAATCCACCATACAACTCCGCCGTCGCAAGCATGGTTGACACCAGCCACAGCGTC -GCCTTGGCTCCACCACGTGACTTGGAACTGATCTTTTCATTCGAAGACCGGCACAGCAGA -TAGCAGATGTAGATCGCGGCCGGACCACCGAGGAATACTGTCAATAATTCCAGCGACACG -ACAGTCAAGTCGGGGCCAGCCCAGCGGCGGTCAGCCTTTGCGTATTCCTGCCACAGACGC -GAACTAGGTCTAGACCCGTAAGCAGGACCATAGATACGGGTTGGGTCGTTGAGAAAGAAG -GGCTCAACGTTATGATTGTCAGCGACTGTGATGTAGCTGAAGAAGCATTCAAAGAGCCAG -GAGCCTTCGACGAAAATGTGGGTGAGGGCATCATAGGCGTGCCAGAAGAAAAGGATGCGA -TCCCGCGTCCTGTGCGCTGGCACAAGTTTCTTGCTTAGGAAGTGGGCGATTGGCATCATA -GAAAGAGAAAATACGATACAGAGGACTGTGGGCGCGTCCAGACTGAAGGACGCGGTGGAC -GCCATGATGGATTTGATAATTGGAGATCGCAAGAGTCTATGATAAATGATCTTGTAGATG -CTTTGTGGAATGAACACTATTTAAATGTGTTTCATGCTTGGAATTGAGTGAGAGATGGAT -GGAGTGTCGAGCAACAAAATAAAAATCTAGGGGCTGAGCTGTCGCGACGATCGAAACTTG -ACGCCTCAGGCAGAAGTGGGTCGCAAAGTGCAAACCTTCTGCGAATAGTATGGAGGACTC -TGTAGTTGTATCTATCATTTAAGAGTTCGTTTGTCACTTGCAGTGTAACACCCCTGCAGC -TTGGCAGCTCGCCGTATATTGCAAATTTCCTTATAAACCTAAGCCACCTCGATAGGGGAA -TCTGTGGACAGTCTGCTTCCACTTTACATCTAGGCTAAGATGCAAAACACAAATCTAGGA -AAACATTTTATAATCTCCTCGACTCTCGATCGCTGCATAGTTCCCTGTCCAGCTTGTAGT -CCCCCAGCGCCACTTTTCACAGTTCTTTGTTGCCTGCAGCACACACGTCCCAGTGGCGCG -TTATCAAAGAAACATGCGTTCTGGAAAGGCCGTCTGCATTCTCCACATATACCCATTCAC -CCATTGACACAGCTGTTCAATTAATAGACTGCGTGAATGGCTGCCTGGGCCAAAATACAT -GTGGTTGGGTGTACGTATCAAATAGTGGAATTAGCCCTGGTCTCCGTATTGGCTCGAAAC -GCTTGGCGGGATATCCCTTGAGGATGTCTCTTTCACGGCTATTACTATGTTAGCTAAGTG -CTTTCTGACTTGTCTCTGGGGGGCAACAGTTGTTATGTTTTACTGCCTCTCTCAATTCAG -ACCCGTTGAAAGGCATTCCGTTTACCTAGAGCTGTGGATCATGACGGTGGAGAGATTGCT -CCCCTCACAGCTTCTTGTTAGTGCCCTCTTGACCAATTAAGTCTCATTGGCACTTCTGTT -TGTCATCAAAAATGCCTCCAAGCCTCGGGGTTCTCGATGTCCCTGCTCTCGCCAAAGTGA -CCCGGGATGTATACAACCAATGCCATCTCCTCGCAAAAGGTGCTCCCGATGGGTTTCAAA -AGCTGATAAATGAGCTTGGCTTGGTACAGAGTATCTTGTGCACCTTCAGTGACAAGGTGG -GCTCCAACGTTTCCTTCTTCGAGAGCATGGATCTGGATCGCAAACAGACGTTGGAACAAT -CGCTCGGTACTTGTCTCGCAACCCTACAACGATTGAAAGAACTTCTTGGCCCGTTCAAAG -GATTAGAAAATGGAGATGGAAAAGGCCTTTGGCAAAAGATCAAATGGGCGGCACAACGAA -CTCAGATCGAGGATCTCAGATCTAATATCATGGCACACGCCTGCAATCTGAGCCTATGTA -TCAGCTCTATAGGAAAGTGAGTGTGCGTTGATTGTATCGATTTCATGGCTCATAATCTAA -CAGGCTTAGTGACTCGCTTTCGCATAGAGAGAAACCAATAGTTCTGACGATGGAAGAGGA -TAAAAAGAGGGTTCTCAGCACAGATGCATTACCTGTTCGGGATACCGATTCCGTTGTCTC -GCCGATGAGCCCGGGCCTAGGTGTGGTGGAGCTACCCGCTAACAACGAAGAGGATTGTGA -GATAGATATGATCGAGGAAGTTCGCGCCAAGATATCGGAGCTGGCTAATAACAGAAGCTC -TATTGGAAGCAGCAGCAAAACCAAACATGCATCCATATGCTCCGAATCGGCCATAACTGA -ACCAGATGATTCGCTCTTCGACGGTCACTCGCTGACTTCTTGGTTATCCTTGAATACATC -GCCATTCCTACCTCCTCCGGGTCGAATAAATCATACGCGAATGCGCAGTGAGCAGCTTCT -CTTTGGTAGCTCCATTGACGGTCAGGCATTCGATGGTGGAATATGGGCCCACAACCCCGA -CTTCACGGAGAAACAAGCCGTACATCACATGAGATCCTACTCAAATGTTAATTATAGCCA -TCCTCACGCTATCGAGGCCGTGACGTCCGCAATGCAACATCTTGTGATGTGCGGCACCTG -GAGCGTATCTCAAGGCCAATTCGTTTTGAGCCACAGAATCAACTACATAGACCAACTGCA -GAAACGTTGAAGATACTCGAGGCATCAGTGAATGATGGACTGCAGATCAAGGCGCTAAGC -ACAAGAGATTGGCTTCGCGTGGCGACCTGGTGGTTGTTGAAAGTGAGCTCTTCGTGTACC -ACTTAAAGAATTCAAGATTTACTTACAAGTTGACAAGACACGCGCGACTTTGGCAAATTG -CAACAGACATAATCCTGCCAGTGCACGTGGGAGCTTGAGTCCCTCCACGGAATCAAGATC -GACTACTAACCAAGCCTATCTTGACCTCCTTAAGGCTTCATATATTCTGTACGATGTTGT -GCTGGAGGAGAAGGCATCCTCTTCTGTTTTGGTGGATGAGGATCGGAAGTCAATTGTCGA -GCTTTCTGAAGTTGTTTACATCGTATTGTTTGCCCCAAGCTGTGACTAATTCAATATAGG -CCATCAACGAAGAACTCTCCCAATACACTTCCGTCGATATTCCCGAGCCTTCGACTCTAC -ATACCAAAAACCTTGCCATTTGGGAGCCTATGCAGCCGGAGGAAATATGGGAAGGCGGTA -TTGATCTAGATCTTGGCTTGGATAATTTGCGCTGGGTCGCTGTCGATCTAGAGGATGCAG -GCAGCGAACAGGAAAAGGTCCTCTACCGAAGCTTTGTCAACGCCAGCATTGGCAGTAAAA -AGTCTCGGATTCGCACCAAGGGTGCTCCCTACATGCTCTTGCTAGCAACACGAGAGGGCG -AAAGCGAACCTAAAGTCGTTCTTTGCAACCAAAGTGGCACTCTGTGTCTTGAAAGAGACT -GTAAGTGATGATTCTCACAATACCTTCTCACGGAGTTGACCAATGATGCTAGATGTACCG -AATGATCTCCCTCCCCTTGTCAATTTATCAAATGCCGCTTTGGCTGGCTTTCCTGGTGCC -CGAGTCTCAGAACCTGTACCATTCAAATTTGAGAATATGAGCGTTTCAGTCTCTTTTCAA -TTTGAAGCGGACCTCGCAAAATTCATCGATATTCCGAAAGCTTACTTTGACGCCGTTTGG -CAGAGAGAGCCAGTGGATGCAACTGAATTTACCGAGAGCGTTATTTTCAAGACCTCGGTT -GATATGTTCGAGCAGCTCAACACGCCGACCATGAAATCAATGAACCCGCCTGTCGTGGTG -AAATCCTGCGAGGTGCGCATTCTGGAACGGTGTTTCGGGGAAGCCTGGCAATCTATCCGA -CGCATGGTGATCACTTCTTCTGCTTCTGAGAAGTCCCCAGGCACTATTGAACTTTTTATG -CCTCTCAGTGTAGTTCAGATCGACCGAGGAAACATGTCGCGCCAGGTGCTCTTGCAGTGG -TCAGACACATGCCAAGCGCGATCCGAAAAGACTGACGGCAATTACAACACCTTGCACTCC -TACGTGTACGATGACACTGCTCCAAATATCGGTGTTGGTCTACAATTCGGCTCATACCAA -GAAGCAGAAGATTTCGAAACAGCCGTACTAGAAATGAACTTCCGTCCGGAATTTTCCTGG -TCACAGCCCAATAGCTCTGGTCTCGTCTATAATGTTGTTGATGCCGGTACTCAGCACAAG -CAATATAAAGCTGCGCTAGTATTCCGAACTCGTGCATCATGGCGATATTCAGACCTATAC -TATATTTATCGTGATACTGACTATTCATACGATCACTCCTCGTTCAGCATCCATTTCCCA -CGCATATACTGCGCAGACTACATCTCAACTCACGTCGACCAGCTCTACCATCCAGAAGGC -CCTGTCACGTTCTCCCATTGCGATAAGAAGTCCAGCCAAACAAACATCGAGTTTGGCAAT -GATCTCGAAGCTCGCTCATTCCTCAGCTCCCTTTCTCCTTTATACGACCTCCTCTACTCC -AGGCGTATCCACTCCCTATCAACGAAAACCACTTCGCTCTTTGTGTTCCATATTTCCGGA -AAGGGTAGCGCCGATATCCAACTGTGGCGTCGGGGAACGGCTTTTCAACTTGCCGCGCGG -TGGGATGACTCCGTCCCGGACAAGTGGCTTAGCATGGCTATTCCCTCGGAGTTCATCGAT -TCCTCCAAAGAGAATAGCCGGGTCATCCTACCGCGTCTTCCATATTTGCGTGGTACGACA -CTTGACATGATGAATGTCATGGCTCGAAGCCCTAAGAACTCTAACAACAAAAGCAAAGAG -GGCGCCATGTCAATTTCTTTTCAAACAAGTAAAGGTTAGTCAGCCTCCTCTTTGATCCAT -TTCGATTGTGCTAATCTATATCGGAGTGGATCATAGACCGAGACGAGTTTCTAGCTGTAT -TACAAGGGCAGCAGTTGCCGAAGTTCCTATAATAGTCTCGTTTAATTAGATAGAACGAAA -GAATTTCAATTCCTGTATGTTCCGAGGGAAATTGCTTTTATTCCTGATAGCAATCCACTA -AAGCCATAAGATCTCATATAGTTGAAAGAACAGCCATGCACGTGGGGCGACAACGTTCTC -CACAGTTTGTTTCATTAACTTGCTAGAAACAGTCGCACGGCCTACTGAACTGGGTATTAT -AGGTTTGCAATGGGCTTTTATGTTCAGCGCACTGTACTCATATGTTAAATAAGAGAAAAC -CAATTCAATAACAAAAAGATGATTCCGGACAGATTTCTCTGACCGTCATCCTTGAGGAAA -ACCCGATGAAGGGAAACCTGATTCACAGGCTGTTTTGAACCGGACGCCTTTAGAGGGGCC -CCCTACCCAGATGAGTGAGAGTATAACTTGAGCGTTCCACCCAGCCATATCTGCTGGGTG -AGATATTTGGCAGTGGCATCATATTTAGTACGTGACTTTGATATTCGGCCTGGTGCCTGA -GGCTTCAATTCAGAATCCATGGAGCTGTGCACAATGTTTGCCATCAAGGCTCATGTCTAA -TGAATTGTAAATAGATTGAAGTCCTTATAATGGAGCTAGATACATATGTATCAACGACTT -TCCCAGAACTGTGCATCAATAGCAATCAACCGATTGAAGAGTTGATTTCGTGACCTCATA -CCAGTGAGATGGTAAGACTGGCCCACCTCATCACCGCTGACAACCAGTCCATCTGCAGCT -TCTTTGCTCCCAGCTGTCAGTTCGACTATCAACTTTCCTTTCCATCCTAGTCCCTCTGTC -TTCTTCAGCTCTTTGATTTCAGTAACAGGAATCTGGAACAAAACAGATCCCTTCTTCCTA -CTTTCGATGCGTAGATCATCTAGTCCAGCGGATGCATAAGTAGTGAAATATAAGATAGGA -GGCTCCTTCGATGAGTCAATGACTGCCGCGCCTCTCTTACGTTCGAACTTTGCATCGAAC -TTCAAGGGCCCGAACTGTGTTGCGGCAGCAGCAAAGTGTCCATTTTGCAGCATCCCAATC -AGATTCTTGGTATGTGCTGACCCTGCTATTGCCATGGCCCGGTCAAAAGCAATATGACCC -TTGATTGCCGTGGCAATTGTGCGGCGGAAGAACTTGAAGAGTCTGAACACAAATTTCTTC -TTCGGCACCTTCGTTTCCGGAAGCGATGTGGCCGAGTTGGTCGATTGGAGGTCCAAAGAT -GACCCCTGCGTACTGAGGTCGGTAGAGCTCGCTGCCTGCGATTTTTTGCGGCGTATGGGC -CATACTGGCTCGTCATCATTACGGGAGGGGGCTGGTGGTAGTGGCGAAGAATTGATCTCG -CCAATTCGGAGCAGGGTCAAAGTGAGCTGCGCATTGGTGGGAACGCCGGCCAAAAGTGTC -CTATATCTCGGGTCAGCAATGCATCAAAACGCTAAACAAAAATTGGGCGTGATCATACTT -CTGCAAATCAAGATGATCTTTCCAGTTTGGTACTTTATTGTTCAGTAGATCAAGGGTGTA -GGTAAATATTGGGTCCCCGAAAAAGCCCAGTCCAAGTGCGAAGCCAACGATTTTGATGAT -CCAGTAGCTCGATGTAAAAGGTACAATGACAGACACAAGGGTAAAGATGCTCGCAAGACG -GAGTCGAGATGTAACCAAAAAGAACGGAGGTGTTGGAGAAAGGATGCTGTTGTACCGTCA -ACATGCCGTGTTCATATATCCTTCCAAAATGGACTCACTTGGCAAGTTTCTCATAGGTAT -CAGTAATATCACTGAGAATACGCATGACATTATCAGTCGCGTGTGCTACCTTCTTTTTCA -TAGGCTTCTTAGTCTTGTCTCCAGGGAGATCCCCCGTCGGGGTCTCGGCTGTAGCGTCGG -CAGCTTCTGCGCTTTCAATAAGCGCAGTTCCGTCAGGGGCGTCTTCTGTGACTGTTTGGC -CATACTTGGCCGCTGCGCTTTCCATGGCAACAGTGGCAAAGCTGTCGACCATATTTTTGG -CTTCTTGTTCAGCGGCTTCACCCTTGTGGCTTTCCGGGGCGCCCGTGAGACTATCCTCCG -AGTGAACTTGCCCCCCAGTAGGACTACTTCTATTTGAACCTTCCGTCTTTGCTGCCCCAG -GGAATAGTACCAGCCGAACAGGCGGGGCCATTATCAAGGCGACGAAAAGACCGCAAATAC -TTGGAACCAAGAGGTCAGTGGCCCAAGCAATGAAATAACCCTGTAGGCTCTTAGTAAATT -TTAAAAAACCTGGCGGTTCCTGGTCCACATACCGCGCAGAATAGTGTTGTTCTCGCTGGT -TCCTTCCATGATCGCAGACGAGTGATATGGCGGAAGAACTCGGTCACACCGACCACCACC -GAGATATAAAACCTCTCAAGCGTCATGCGCAGCTTTTCGGGTGGAAACTGTTCACTTTCG -GTGCGATTCAGATCCAAGTTCAGAGCAGCGGTATCATGAACAGCTTTCACGTGGTAAATT -TGCTAGTCACATGTCAGCGCTTATTCTCGACAGAGCGGAGACCACATTAACCTTGTCGAA -CCGACGGATCAGCATCCATAGATCTTCATTGGAAAGACCCTGAGCAATAGTCTCCTCTAT -TAAACGCGACGATGCCTTCCATTGAGTGTTGGGCGGCGGTCTCGGACTCAGTGACTGGAG -GGGGCCAATCTCCTCCGGGGTTTGGGTGAGATCATAGCTCTCATCGTCCAACTGTCCCGG -CATTTGTGATCCATGACTCATGTTCAAGGGTTATGGATGGATTTTGCAAAAAGAGAGAAG -CTTGGTATTGCTTACTATCTGGGCAGCGGACCCTTCGACGTCGGGCTAGCGCGAAACCCC -GTGTTGACACAGTGGGGGTTGACGTCATAGCTTGGCTATTTTTAGAGCTTAGACCCTTTT -TCGCATATTCCATACTTCCACGGTGCAAAGAGGAACCAACTTGTTTCAATTTACTTCTCT -ATCAATTCCTATCACCAGCTGTTTCCTGGTCATAAACAACCCTCGTTATGTATGGTATGA -AATAGAAAGGTATCAAATCAGAGTATTATATTGAAATTATATGGATTTTCTGCTATGCCG -TTTGTGTAGCATATGTAATTTCTTTGCATCAAAGAATAATTTGTATATAGTTATGTTTTG -AAGGTCATATATCTTCTTCATTTTATGATATAAACGCCATTCCTATGTCATGTCGCATTG -GAGAGACTGCATGTCAAAATATCCAAAATATCTCTTAAAACCAGTTTTGGATTGATGATC -GCTCATATTCATCCTTGGTAAGCATGTATTTCAAATAAATAACACCAACCGACTATCATT -CCGCGTGCACATTATCTGTCCGCAAGATCTCCATGAATTCCACAAGATGCTTCTCTGCTG -CTATCGACCAGGGCGTCTGCCCGTGCACATTGGTCTGGAATATCAACTCGGGGGCTTTAT -CCACAAACAAAGTCAAAGTCCGTCGAGCTTCGATCGATGCGAAGTTATCAGAGCATTCAA -CATCAGCCAGATAATGCAGTACATTGTATCCCTCGTTATCTACTACTCTCAAATCCGCTC -CCCTTTCATACAGGGCCATTGCGCGCGTGGGGTCACCTTCCTTCCAGGAAGCCTGTTCAA -TGCTTCCACCTCGGTGTGGGTATGCCGTGACTGTGCAAGCGTAAGAGTTGGTTCCAGTCG -TAGAGCTTGCCGCTGCCAGTAGCAGCGTTCTTCCTCTGGGGTCTCGTCGTTCGAGATCAA -GATCAGGCTGGTCTAGCCATGGTTGGATGATTCCTCCATTGGCGAAAATCTCGTGTAGAA -TCACTTGACTCCTGTCACAAGGCAAAAAAGGATCTGCCCCGCGTTCCAAGAGGAATTCAA -TCATTCTGCAGGCATGGTCACGGCAATTGGCTTCGTTGAATGGCAACATTGAGATGTAAA -GAAGGGGGCTCAGCTTGAGCTCGCCGGCTGCGGGGTCCTGTTGGACACGGTCGGTGTGCC -AATGTGGCTGTTCATTGGACAACACCGTTGTATTGCAATCCAGGTCCATTTCTAAGAGAG -CCGAGGCCACATCTTTGTTGAAACTTGACAACGCAGCGAAGAGTTCCATGGGCTGGTCCT -GAGAGGGATATGCTCCATGTTCGATCAGCAATTGGAACAACTGGGTGTCGTGCGCTGACA -TAGCGAGCGGGGTATAACCACATGGTGCAGCTATGTTAGGATCGGCACCTCCCTCCAGCA -ATGCTTTCGTAATTTCGAGCTGGCGGTAACCCCCTAGCTCTCCCTGTGAGACTGCATTGT -ATAAAGGCGTCTTTCCCTGGTCATTTTGGGCGTTGGGATCGGCTCCATGTTCCAGGAGGT -ACCGAATGGCTCCTTCATACCACCAGTGCGAACCGTGAGCCAGTTGATGGAGGACCGTGT -CACCCGCTTGGTAATGTCGGTTCAGTGCCATCCTTGTTTGGTAGCTTATCTGGGGCTTAT -CAATCATATCTGCTGTGAATCTGATGTTGACATCTGCGTTGAACTTCTTGACCATGACTC -GAATAACATCCAGGTTAGGAAGGATTCGCTGAGCAGCCGTAAGAAGAGGTGGGGTCAGCT -TTTGGCCCCAAATATTGGTGTTGTCGCTGTTAATCCAGTCGCCTGACATTGTATCACCAA -TCCGCCCAAAGGGCTCGGAGAATCCCCATCTCGCAAGAGAGATCATAATCGCTGGCATGC -TGGATTTGTCCTCGCAAACGGAGGCGGTTTTCTCCAGCGCCTGTGCAAGCTCCTCATGGT -GACCGAACAAGATCATTGGGAGCACATTACGCTCAGAGATGTATTCCTTGAATTGTTCCT -GGAGGAGTTCTACGAGTGGCTGAGCACGCAGCAGATAGTAATCAGTGTTGGTGTGGCGGT -GACCTTTAATCTCAAGGTTCATTGCTCGTGATAATCTCTCCAACTCCACGGCCATTTCCT -CATTTCCTTCATCAACTGCATAGTACATGGGACTTGCACCAGATTTCTTTTCATATGCCA -CAGCTCCATCAGACGCCAACGCTCGTATGATTCGTCCGATTCCAATCGTTGAACTTTCAG -ATGTGATTTCCCCTTTCCACCCCAAACTTTTGGTATTTTGATGCTTTGTTCCAATCTTCG -AATGTGGCTGTGACGGCCGATGGCTATCTTCTGCCAAAACTCCTGCCGCAGAGCGTGTGT -TCATCGTGTTTTCTTGGTCGTCGGCCCCCTGCCAAAGCTGTTCTTCCATGATGGATTCGG -CACATGCATCTAGAGGTGTACAGTTTTTCTTGTCCTCGATGGTGATATCTGCTCCATAAG -CGAGGAGAAGATTCACTGTCTCCAGTCTTCCAGATCGGCAGGCCACATGCAGCGGCGTTC -TTCCGTCTTCCGAGCGTGCATTGAGCATTAGAGCGAGTTGTTTAGAGTTGTAGTGATCCA -AAATCAACCCTACCGAGTTGCTTTGACGCGCGGTTGATGCAATATGCAACAGATTGCGAC -CATCCTTTGTTGAGGATGTGGGATCAGCTCCACGAGCGAGTAATTTACCGACCAGGTGCT -CTGAAACAGTAGCAGCAAGGTGGATAGGTCTAATTCCCTGGTGATCACTCATGTTTAGAG -CGACACCGAGACCCGCATCTAGAAGTAGGTCGATAGCACACTTGCCTCCCTGTGCGGTCG -CCGCGAACAAATTATCTGAAGACTGACTGCATAGCATATGCAGAGGTGTCTGACCTCGGA -AATTGGGAATCGTCGGGGAGTTGCCCATTTTGACAAGTCGGATTATGGCCAAAAGGGCTG -TATCTTGGTTGATAGTGGCAAAATTGGCCGCGAGATGATGCAGGAGTGTGTCTCCTTCGT -AATTTGTCATTTTTGGATCTGCACCAAGAGACAGTAGAAATTCCAAGTACTCGGATCGGA -AATCGTATGGCTCAATCAGATAAGATAGTACCCCGTTGCCTTTATAATCCTGCGCGTTGA -TATTGGCTCCGCGGCTGATGAGGTAGGGAAGAAGCTTGCGACAATTCCACTGTGCATGGC -CGCGCATGGCCTTGGTAAGAAGCGTACATCCGCGATAGTCTCTAGCTTCGAGATCTGCAC -CGGCTGCGCAGAGAATGTCCAACACATCCTGGATATTTTGTTCCGATCCCTCCACCATGT -GGATAGCCTGTTGCCCTTCGTGGTTCCTCTGGTTGGGGTCGATTCCAAGCTTCAACAAGT -TCACTATAGTGTTGCTCCCGCAACTATGTTTTGCGGCAGCGATATGAAGAAGGGTGTTGC -CCTTGCCGTCAGTGATCGTCCAATCCGACACAAAGGGCTGGAACAGCGAGAGATCGCCCA -GCTGCCATTTAGACGCATAGGCATGCAGCGGGGTATGACCATCTTCATCTCGAATAATAT -CCAAGCGGCCTCCATGTGCCATGAGGCCCTTGAACAACTCTGGGTGGTCCAAGGGATGGT -GGAGAAAATGCATAGAAGTGCCTCCCCGGTCCTCTGCGATGTTCGGATCCGCCCCATGTT -GCAAGAGAAATGGAACAAGGGCGGTCTCGTTGTGTACTGAAAGTAATAACGCAGTCTGTC -CGTGGGCATCCCGCGCATTGATGTCGCATCCGGCATCCAGCAGTACCTGCATCAGCTCTT -CATAGCGGGACAGGTCGTCCAGCTTTTGGTAGGGACCCACACTGTCCACCACTCCGTGGA -AGGGGGTAAAGGCAGGGAGCTCGTCTCTGGACCGCTTGTTCCTGGATCTAATCGTGACAT -CGGCCCCATGCTGAAGCAGGAGTTTGACTATGGCCAACTGGACCTTGTTCGCGGCCAGGA -ATAAAGGCGTGTCTCCATCCACAGCCGTGTTGACATCACATCGCCCACTCTCAATCAAAG -CCTGGGCGCTGGCGATTTTACCACTTTCGCAAGACCGACGTAGAGCCGCAGTCAGAACGA -CAGGGTCTAAGGTGGCAGCCAATTGGCGCACCTGATCCACATCTTCCGCATCAACTGCCG -CGTGGAGCGGAGATAGAGGCTCATTCTGCTCGGGAAACCCATCACCGTCCTCGAGCTCTC -CAGGCATTGCTCTGCGGAGAGAAGACAGGAAGTCCATGATGCGGCGAATTGTGGGACTAG -GAGTCAAAAGGGGCAGGTATGATCCCTTAAATAAATATTGCACGCTTAGTCTCGTCTAGG -CTCCCGTGGCAAAAGCAAGCACCCGTTAGTCTCCCACTCTCACCTTGCACACTTCTGCCT -GGATAAAATATCAATCGAGATAGATGTGTCTTTCTGACTCTTTCGTTGCGAGAAAATAAA -CATTGTTGTTCCTTGCCCAATCTCATGCAATCTTGGCAGCTTCAGCCTCAATAATGAGTT -AGCGATGACGGTCGGCGCCAATTTATGTCGGTTCTACACGAATAGAAATTGTTTCATTGT -TTGATTCATGTTGCTTTGCCCATAATATATCGCCTCTGGATTTTTACGTCCTACCCTTTC -AATATATATTCCGCAAGACTAATATATCCGTGTGTGCCAAAAGCCACAACAGTGGAGCAG -CTCATTCTCGGGTTTACTAAACAAAAATGATCAAGATGGATGGATAATTGTAACTTCAAT -AACCCAAAAACAAAAAGAATAGCACGAGCTTAGACCAGTCGAGCAAAGGCAAAAAGCAAT -CTCTCCCTCGGGGAATCGAACCCCGGTCTCCCGCGCTTATCGGTTTTAATGACAAGCGGA -AATCATCACCACTAGACCAAGGAAGATGCAGTCGAGTTAACTCGACCATTGGCTGGTTGA -TGGAAATATTAGCTCATTTTGATATTATGAATCAATCATATATTGAGCCGTTTTAATCTT -TACTTTCCACATTACTCCAAGCATCATAACACACAGTTTGCCTACTAGAATTTGATTTGA -CTACATTTCTTTTACTCTAGACAATGACATGCTTTATGGTATATAAATTTCATTCGCTAA -AGTTCAAGATCTGTTCCGTTTACACTGGAATTAACCGTATAACGGAATAGAACACAAACT -CGTCCACACAATGAGATATTAAATCCAACGCAATTCAAGACACAATGAGAATGATCAAAC -ATTCAGAAACAGAGAGAGGACACACAGGTACGTTTGGCACATCTCAAACCTGGACGACCA -TATAGATCGAGGGCAGGGACTCGGGCTGAAGACGAAGTAGTGTTACAGCTGATTCGCTAA -TGCACAGCGGTCGCGAGGGTGGGCGCCCAAGTGCAGTCATACCACTACAGGGTGGACCAC -ACTCGGTTCCAGGACCTTGAAGCAAAGCAGCTTTCAAATCATGGCGGATCAGGTATTGAT -TCTCGCCGAAAGGTCGGGGTGGGCATCGTCATAAGATGGACGTAAACCAGAGGGCAAACC -AGACAGAGTAGATCCGACAGGAGTTCCATATGTTGGACGTATCCGAGATAGCAGAGCAAG -ACCGTATATTCAGCTCTCCTTGCGGAGTTTATCGAGGCTATCACGAACAGTCTCGCGAAC -ACTGAATCCACCAAACAGCCAGTCATTGGAGTGACGACCATAGTGGTTGACAGTGCCACT -GTAGCGCTTGTAGTCATCGCTGTTTTTGCGGGTGGGAGTAGAATCACGCTTGGGGCAGTC -AACCTTTGGAGATTGGGGCGAGCCCATGCTGAACATCTGCGTGGGACTGGTTGAGGTGTA -GCCTTGATCCGGGAAGTATGAGGTTTTAGACTTCTGCGGGATGTCTCGAGGCTGTGACAT -TCTGTCTTGTTTCTTGGGAATATTGATTGGTGCAGGAGTCCTGGTTCGGAAGAGCTTGGA -GAGGGGGTTCGGCAGTTTAGAGTGTTTCTCTTGAGACAACTGGCTGGAGCTTGAGGTAGC -TGATGTTTGGGAGAGGGATCTCTTGGGGTAGATGTTGGACTGAGTGTGCATGAAGATAAA -TATGAAGTAGAGGATGAAAAATAGAAAGAAGAAGCCTGCGGGAGGGGTTTATTTATGTTC -AGGGTAGAGTGATTTCGGAGTGATCGTCGGGGATTGCTCTGTAGGGCAACCAACCAGAAT -GCGGCACAATGTACAAAATTATAGCTATGATGTAGTCAAATACGGACAAAGGAGCGCCCT -TCAAAGACACAACACCAATCAGCTTCATAACATATGACTGAGAAACGGACTGATCCACAA -CCTATCCAATAGACATGGAAAGGAGGAAATTGGCGTATCCACTGGTAAGGTTTAGTGTGA -ATTGTTGTTGTCGGTGAGATTGTCAAGTTTAGAGAGGCCCAAGTGCCCTATCGCCCGTTC -ACCAGCCAATCAATGATTGTTTCTGCCTTGGAGCCTCCTTTTCCCACCCTCTGGCCCCCG -CAGATTCACCGCTTGAACTCTTCACATGTTTTTTTTTTGGACTTTCATTCAACACACCTC -TACTGCACTTTCCGGAATGTGCATACGTGTACTTCGTATACGGAGTACGGACTGCAGCTA -TTTCCTAAGATGGAAGTCTTCCACAATTCCTCTAATTTATAGAGGAACCGTGACTTTGTT -ACGCCCAAGTGGGTCACTGCGGGTTATTCTCCGCGTTAGTATAAACAAGAATTCAAGCCC -TTTGAGACATCCCGTGCTCCAATTTTCTTGTGTTCGCTAATAAAATCCACTAAACTTCCT -TCTAGCGTCCTTCTAGAATGCCGTGCTAACCGACCTAGGGAGGGACACGGTAGAGATGCA -TAACTAGTGCATGAGTCACCTGGCCCCCACAACCTTGAACGCCCGTGAGCTCAGCTTAAA -AGGATACAACATAGATAATGGTGTTGGGCACTTGTCACGATTACTATGCAAAGTTTGAGT -GTGGAAAATTTGGTTTCATGCCCACTCAAATTCCTCTGCCGGGGTCGGTTATAGTGTTGT -CAACTGATTGAGAAGAGGCTATCACTAAGAGGGTTTTGATCATAGACTTATGCGACAGTG -CCAGAACTAATAATGACCCCCTGGGCATGGACCTCATGTCGTTTCGCTTACGTGGCCCGT -CGATCCCCCCGGGCGGAACGGCTTGGCTTGATCTCCGGTATTCGGTCATGAGAAGCATCA -GGTAAATGTTGCATCCCTATCAAGCAAAGGGCACATGATATTTTCATTATCACGCCCCAG -TGGATCAAGAAATATGAAAAGTCAATTTGGGAACATACAAGACGCGTTCGAACGTTCACT -CGAGATGTCTATATTGAAGTAGGGAAGCTATGGCTATTGTGTAATCTGCTGATGTATAAC -ATATGGGGAACGAACGGTACTTTAGGTTAGGGTTTTATAACACATGACTCATTATGAATA -TAAGATACTACCTCTTTTATGACAACCAAGATTCAAAGAATCCACCTCGATGTTAGAAGC -CACAATGGCTGGTGCTGGATATCTCGTCATTGCACTTGAGAACGGTGATCCAGGGCATGT -CTGACAATCTCCTCTCTTGCGGTCCCACCGACGAAGCTTTAGCTTCTAGCTCATGCATTG -TCATATTGCCTGGCATGCAAAGCAGAATCTAAGGCTGCAATGCGTATAGCATCAGGAAGA -GGTACCGGAGCTGGTGGCCTCTGTGGTGGCCAATTTGAATCAATCTGTGCCAAGTGGGAG -GCTTGCCATAAGGGTTCTTTACATAAGAAGCATATCACGAAAATTTGAGACTAAAACGCT -GAGATGTTAGGGCAAGCATGTTGTCAATATGTAGACTTTGTACCTACGGGGTGGTATCTC -TTCGGTGAATATCGAGGGCAAGACGGAGCAAGAGCCTGGGTGGGACACTGGATTTCTTCT -TCAATTCTCCTGATTTTGCATATGTGCCCGAGCTTTGAAGACACATAGAATAATTCAATC -AACAATTCAACAGAATGGGAATTACCTGGTCGCAGATTTTCCCGCCCCCTCCTACTCTGA -CCGAGGTCAACCTCCCCAGTCAGAATGGCAAAGTGTTTATTGTAACTGGAGGATACTCAG -GAGTTGGCTTTGAGATATGCAAAATTCTTTATCGAGTAGGAGGAACAGTATATCTCGCGG -GTCGGTCTGAAGAAAAGGCATTGGCTGCGATCGCGAAGATCAAACGTTTGTGTACCCCGA -CTTCCCCAGAGGGAAACATCATGTTTCTCCCTTTATCGCTTGATGACCTCACGACTATCA -AGCCTGCAGTGAAGATGTTCACCACAGCTGAATCTCGTCTGGATGTTTTGTTCAACAACG -CCGGTGTTTCTGATCCTCCCCAAGAGACTTCGCCCCAGGGTCACGACCTGCAATTGGCAA -CTAATTGTCTCGGGCCGCATCTCTTCACTCAACTGCTTTTGCCAATACTGCGGCGAACGG -CTGAAAATACATCCGTCTCTAGTGTACGGGTAATCTGGACAGCATCGATTGTTGTTGATG -TATTCGTGCTTGAGACAGGGATGGGGCTTGCAGAATTGTTACAGAAAGGCACAGACAAAT -CACGCTATTACTTCAACACCAAAGTGGGAAATTGGTTTTTGGCAGATTCCCTCGCAAGCC -AGATTGGGGGAGATGGGATCCTGAGCCTGGTGCAGAATCCAGGGAATATTAAGACAAACA -TCATGCGCCATAGCCAGGCTTATGTTCCTGTTCTGCTTGGACCGCTGCTTTACGCCCCTA -GGCTTGGGGCATATACTGCTATCTGGTCGGCTTTTGCCAGTGATTTGAACATTGATGATG -GAGGGAAATACATCTTGCCCTGGGGCCGTATACACCCTTGTCCAAGGGATGATTTGTTGC -GAGCCATGAAAGGAAAGGAAGAAGGAGGCACTGGAGTTGCAGCGGCTTTTGTAAAATACT -GTGATAAGCAGATTGCAGCGTTTCTGTAGTTGCCATTTCACGCCGTCGAAACTTTTGAAA -TTTGCTCACGTCTCCCTTTCTAGAACTCAATCCCATAACCTGAACATGCTCAAATAAAGA -GTACAATGCAGAGCAAGCAAAACAGTTTAACAACCAGATGTTATTACTAAGGTCAATCAT -TCTTCGTATTCCTGAAGTGTCCACAGTGTAGCGCTCGGTTTTCTCCATGCGCGTGGTTGT -TCAGAATGACACCTCCGATGTCGTCTTTCAGCCATGGGGTTTGCTGCGGACGCTGAAGAT -CTCCCTCTATTTGTGCCCGAAAGATAGTCCCGGAAAACTGCCTGCTCATTCAAATCTCTG -TGTGATATAAATTCAGACATAAGATACGAACCGTTAGATGTCATATTTGAAGGGACGACA -AACAAAGAGCGAGTCTTGTTCAAAGGCGGCCGACGTGCTAATCGACGTAGAACACTCCAG -TAGGCCTGCTTTGTCTGCCCTGATGATCCCGACCGCGAGCTCTGATTGCTTCGTCCCGTG -GATGATTGAAACCAGAACCGAGGAATTCGTTTCGGTTGACGAGAAACTTCGGGCATCCCC -ATATCACCACAAAGCAACTCCTCTGTGCGGAGTGGAGTCACCGTGGCTTGCTGTAAAGAC -AGTGGAATGTAGGTATAGGTTGCGGGACCGCCAAATGGGTTGAATACATGGACTTCTAGC -CCGTGGGGCCCACGCATGCACAGTTCAGAGCCGCATTGGGGCCATGTGTGGTGTCTTCTA -TGGCCGGTAGGACGCATATTGAAAGTAATCAAAGAGTGCAAGATCAAATTGACTGGACAC -GAAGTGGTGAAAAATTTATCCTGTGTTCATGACGTATATTTGAAGTGAGAAGAAGACAAG -CTGAGACAAGTAAACATCGATCACAGCTTTATAAAGCCCACGGGTAGTCTATTTAGTCAG -CTTGCAAATGTAGAGCAGTATATCATTCAGAGGTTTTCCCGGGTGACCGAAGGCTCATCT -GCGAGTGAAAATGACCCAGGCGCAGATCAAAATGCCAGGAACCTGGAAGTTCAAGATTCA -GGTGCCAACAGGTTAGGAGCACAGAGAGAGAAAGAGATCAATGGATTTGTGTCAAGTGCC -GTGTGAATCGCTTTTCACACCAATTATCCTGATTTAATAAATTTTCCATTTTGGAATTGG -ATCTCTTGTCATCTCCAAGGGCATGACCTTCTGACCTGGTGTGGTCGATGACGGCAGATT -GTCCCATGACAGGAGAGGCGTTGAATTTCATTGCACTCCAAGAGTCCAAATGGGCAAAAG -AGATTTTTGGTGAGATTTTTGGTGAGATGTTTGCTAAATCTCTACGTCTTGAACTATAGT -CTTGATGCGTCAGTGATGGATTGAGGTACAGATACGTACCGAGTACTTGGGTCTATGGGT -AACTCACATAATGAACGCAGGTGCCCGGTACCGGTCAATCCCTACACCCACTATGTAAGC -AGCGGAACAGCCCGCCCACTAACTAGGTGCCTATGGCCAGGTGAAGGGCACTTGGGCATT -CTCTATCAAGGACGCTTCGACCAACCAGCGGCGAATTTTTGGATCTTAACATTGTCTCAC -TTTCCACTGTCGGTACTCACATTATGCATCTGTATTCTTAGATACTAGGACTGTGGATTA -AGAAGGGATGGAAATTCTGTGCTTACTTCGATAAGGTTCATAGCGCTTACTCTGCCAGTC -AAATCGAAGAGGCAGACGGAAATGCTTCTATCCACTTCGTCCTAGTATTTATGTTTGGAA -AATTGAAAAATGGGACATGTATAAGATTGAAGCATCGCATACTTTTCTAGTCTGTTCTAG -AAGTATATAGATTGTATATCTCACTCTTCTACTTTGAAATATCAGACTACATGTCATATT -CAGCCTACGGAGTAAGTACATTAAAACGACTTGGCCCCCGGGGGCCCAGTTTCCAACTTG -GTCTAGTTGGCGTATACTGACGGACCTTTGCTCCAATGACAAAAGCCATGCCAAGAGAAA -AGCTGCATGTTTGTCCGATCAATACTAAATCAGGAAGCCTACTCACAGCCTCTTAACACA -GCCCATTATCCTTTCATTTCGCTGAAATGGTTCTTTTCCTACTTGACTCGTTATACTTGC -CATGCAACACGCCTTGTAATCTAGTTTTGCTTTCCTGATGGATGTTACTGGAGACTCTCC -TTCAATATGTCAGAGCATTCCATGGACCCCAAACCAGGCAGCTATTTTCCCTTTGCAAAC -GAAGAGAGCGTGGATGAGAATGGCCACAGCCCTCGTTAACTTTAGACGCCATTTATTCTG -CGGCGCCTGACCATGATAGTCTTCCTGATGGGTTTTCTGTCATGCTTGGGTACACTTATT -GCGCTGTATGTGTACTCTGAGCAGCAAAATCACAGCCTGGGTATCAAGACAGACGGAGAT -AAATATTACTATCTCGGGACGTATGGGCCAAGCGCAGGTATACAAGTCCTCTATGACTCG -GGTTGAGTTTAGGATTTCTAATTCCTTCGCAGTGTTTACAATCTTCACAGCGGCCTGGAC -GCAGGCCGAATACCGTGCTGTCCAGCTGATGCCGTGGGCTTTGATGCTCCGTGGTCCAAC -CCCAGCTTCGCAGAGTATCTTCCTCGACTACCTATTGAAACTGAACATTGTATCACTGTA -TCAATCACTCAAGCAAAGACGATTTCTGGTATCTCTCTGCGTTGCAGGATCTCTCATTCT -GAATGGTGTTACGGTGTTTTCCACCGGTCTCTTCGAGCTGGACTCAGTACTGTTCAACCA -TCCTGCTAAATTAACGGTTCCTAGAAAGTTCAGCGAGGCCGATTATGACCCTGATATGAT -TCACACTAAGTCTATCGTTTCTGGCGTGGAATTTTCAGCCCATAATTTTCGACTCGCCCT -GTTGGTATACACGAGCCATTCGCGTACACCCCTTTTCAACCATTGGACTCAAGCTCAGCG -GGAAATGACACTGTTCCCGCAGAGAGGACTTATCAGGCAGACGTGGAAGTCGTCGACCTT -TCTTTTGATTGCCAGAATGCGACCACCTCTTTTGAGCCATCCTCTTTTGATTCTTTCACT -GGAGACACCCCGGTATACAAAACCTCTGATACCTGTCGTTTTCAACAACCAAAATATGCG -CCGAGCGCAACGTATGAGTGAGATACCAAGTATGGTATTGACATCGACCTCAGATGATGT -CAAGGCGAGAAATTGAACGGCTCTAGACCCAGCTATCTCGGGCCGCATTCGAGCGACTGG -GACGCCGATTGGCGTATTTGGGCAGCTATTACTCCATCAGTAGGTTTTGAAATGCCGAGT -ATCATGAATTATACGGGGTGGGCACAGCGTCCAATTCATGTTACTGTGTGCAAGCCCCGC -TGTACCACATACCGGGGCCCTGTGAGAATATGGGGGGAGGCCGGAGAAAATGCCATTTCG -GTAGATATCCAACGGGAAAGCTTGAATGTCATAGAGGAAATTTCTGGTGTACAGGCATCT -AAGATTATGTACAGTGGTTGGAAGTCAGCGTCAGGGGAGACACTACGAAAAAGACAATGG -GCACTAACTGCTGCCACAACATCCCTCCTTCTCGCGGCCATCAACACCATCGTTGTATCA -GGTCTTTTACAGCCAATTTCACGGGGCCACATCCCCCATAAATAGATGTCACGCAAATCA -CTCGTTGGAACCTAGGCGATCCTATCACTTCCAATGCCCTAATTCAACGCCGGGAGCAGC -AGGACGGCTCTGACATTGATCCTACAGCTGGTCTCATCCTCAACCTCAATCTCTCTAATC -CCCAATGAACATATCGCAATTTGGTTTTCCCCCAATTCGCACTCACCACAACCGTTCAGC -CACCACTGGACACGGGGTTCATCAATGCCCGAATCCCTGCTCTCCGCAGCCAGTTGACTT -GCGCACCAGACCCGACAAATGGCGGTAAATGCAAGATAAAAAAAGGGAAGATGGGGTGCG -AGGATAACTCCCTTTGCTTCTCTCCCATCGGTGGCGAGGTTCTTGAATCTGATATCGAGT -TTTTTTCTTACAGACTCAAATCCGCCATCTAGCAATGTACCTTCAAACTGCTCTACGCAC -GCTATGATGTACGGCAAAAAGGATAAGGAGAATGGCTACCGTGGCGCCCCTAGCATTTAC -CATGATATCTACTGTAACGCTACCATCGAAGAGGTAGAAGTTAATACAAAACTCCAGCTT -CCCTCACTTCTCATCGACTTCGACACCCCACCTAGGGTATTTGAGGGCTCTGCTCGGATA -CCATTCAAAACAAACAAGGAATCGTTACCCGCAATCTTACGATTAAGCTCTTACCTCTTT -GTATCCGATCCCAGGTTCAAAGACTCGTTCCTCGATGCGATTACAAACGGCATCAATGGC -GTCCCGTTAGATGAGCTTCTCGACCCGGAGAAATTGATCAATCGTGTCAATGAGGTTTGG -GGCATTATTATGGCCCAATTACTCCATACCGCCGCAAGAGATTCTTTCGACGATCCATTC -GAGACGACATACTTTGTTGAGCCCGCCACCATGAAAGCACCTACTTACAGTGGTATTTGA -CGAAATCATCTCGTGCAAAGCGAGATCTCAACTCGGATTCTTGATGGCGTACTCGGCAGT -GTGGTTGTGTGCGCGTTGATAGCGCTGTATGTGATGCATACCAAGATGGTCCTTCCTAAA -AGCCCGACTAGCATTGCTGCTGTTGCCTGTTTCCTCTACGGCTCACGCATGCTGAGCTCT -GCCATTAAAAATGGCTCAGAGTGGTGCAGTGACGAGGAAATGAAAAAAAGGGGGGTGTTT -GAGGGACGCTCGTTATCGATGGGCTGGTGGGAAGTGGAGAGGCAGAGGAGTCGGAGCCAG -TCTGATGTTTCTAGTGTTTCGAGAATTTCCGAGGAAAGATTTTCTGGGGAAAGTGTTTCT -GAGGAGGAGACACAGCCACGCGCTGCGTCTCGTGGAGATCAAAACTCTCCTGACGAGCAG -AACGGTCATAATCAGACTAGATTGGGAATTGATATTGATATTGCGGATAGACCGTTATTG -CGTAATGCCTCTCCTATCTAAGAGGTATCCTATATAATATGCGGAGGTGCTTGAAAATAG -AATCACTACTTTACTCTCCTTGACTTTTTGCCGACCTGTATACGGAGTTGGCTATGAAAA -AACATGAGATTTCTTATTTGTAAACGAATAGAATATGAGATTCTTCACGAACATTTTAAG -AGACGGGACCTTGTAGTGGGCTTATCTAATGATTGATGTGCGTTGGATGAATGAATAATA -GGCAGTGTCCTCCTATACTTCATTATCCAAGAATCGTCTTTTTACCCACATAGGAAGTGA -TTGTCTCAAGCCGCGATTAGCCTGATAAGGGCTTTTTTTGTTGTAACCCGACCTCTATCG -TGAGTATTCACTGAGATATTTATAGTTATCTAAGGTTTATTTTCTAGCAAAAGTCAAAAT -TTATGGAGATTAACTTCGTGATACGATCTGTACAAAGTCACGAGGCACCTTTCCATTTGG -TCTGTTCAAGCCTTAAATGAAGCTGGGATCCAATCATTGCACAGTGCGTGTTAGCCCTCG -AAAATCTTGTACAAAGTACCTCGTACAAATACCGGCTTTCTTCCCTTGGTGCTGCAACGG -ACTAGACCATACTAGATCTCACATTCATCTACTCAATATAGTAACGGCAAATCTCCGAGT -ACTCCACTGAGGCATCGGTTTGGCGATCGTATCATATTGTGGCACTAGGTGCAGGCGAAA -TATTCACAAAAAGAAAATTATTTGCCACCCATATGTCTGTGGGTGAGTCTTACCTTAATA -GGTTAGAAGGTTGACCCGTCTAGTGGGTATGCATGTAGTATAATCCGGATGTATAGTCCG -ATTAGGCCCTTTGACTAACGGTCTCCTTGTGGCTTCTTTTGGTTACTTGGGAACCTATGA -GAATATGATAAACGTCACCTTGGTAGCGCACTTGATCTCTATCTTTTACACGGCTCTCGG -CAATGTGTACCTATATAGGTGAATTTGCTTGCCCCGCTCATTAATACAATCTGAGCAAAC -CCCGTGAATATATTAAGCTAAATAGTTGGGCCACCAGCCTCGGTTTTGTTGATTTCAATA -AATGCACTCGGCGTGTTTTCAGTCGCGCTTTGAGAGAGAATAATCAGCGATATTTGAACT -CTATTATGAATTTAGTACTCTGTAGTACGCCGTTTGCTTATCTCACGCAAGCCATTATCG -ACGGGGGATTCATGATGATTGGACCAGTGATGAGTGAAGTTGAATCCAGAACACTAGACT -GTGTGTTCATGACCATTACGCCAGAGTTATTATGGGGCCACCTGGTCCTTTCGTCCGAGG -CGGAGCCGACATGTTTATCATCGAGCCAGTAAGTAGACGATGATGATATCAATATGCGGT -TCGGGGCGGAAATTGCGCAAGGGGGAATCCAAGCCCGGGGTTTGATGAGACCGATGGTGA -TCCCGAAACCCTAAAAACGCTAGAAAGGTAAATGGAAAAAGTCTCAAGTTCCTTCATGTG -AACCGAGGGAAAACGGTGATTTGTTTTAGCCCCCTGATGAAAGGTATCTGCAGCTATGTT -GAAAAAAAAATCTGCAGAGATCAACATTTGGTTCGGCGTTGGTTAGTGCAGGCGCTAATA -GAAAAAAAAAAGAAAACACTTATGACTGGAATAGTCGATTAGGCAAAAAATAAAATAAGA -GACACAATACAGAATATGTTTGAAGAAATTATCTGAGAAAAATGGGATATAGATTGCTCG -AGACTGTGATGGATTTGCATTTGTTCGAGAAGGACTTGGTACCCTTATCATAGTGGAAAT -CCTCCGCGTGGCGAAAAAAAAAGGAACCCTCAGCTGCAGCTCAAACATAAATAACGCCGG -AGATCCCCCGAAGCAGACTAAAAGGGGGAATGCTTTATATCCTTCGATCATGCACTCTGC -CATGCGGCAATATCCGCAGCGGAAATATCCAAGTTCTCAGCCGTAAGCCTCTGTAGCCGT -GCCACCTGAGTGGGAGCTAGAGTTTTGAGTATCAGCGAGAGTCCAAAAGGGGCAAGTATG -GGGTCGGGGACAAATGTTGGTTCCTAAGGAGGGTAACGCGTCGAAGAGGGGCAGATCAAA -AGAGAACGGGAAAGATGATCAGAAGTGGTAAAGGGTCCCGGAACAAAGCGCAAAACACGA -CAGAAAGCAAAGGAGGTTGACCCACTTGGCCGTTGGGCAGCCGCACCGGCGTGCACATGG -ATGACCGTCGAATCAGCATACTCCTTTGTCACAACACCGTTGTAGGAAGCTGCACTAAGC -ATCACCCGCAGCCGAAACTTGCCTGGCTGCGAGATAGTGAGCGGGCTAAGTTTCGCATAT -CCGCTAATTGAGTTGTCGGCTCGGCTCCGCACGCTTGCTGTGAGATTACCACTCAGGCCA -ACGGCTGCTCCTGTTCCAGCTTCGTCGCGGAGGGAGGCGCTCACGACAAGGTGCTGCACA -TTTTGCGCGGGTGTCCCAATTGGGTTCACGGCAATGACTACTGGGATTGTGAAGGGGGAT -CCGGGTTGGACGGCGGTTGGGGGAGTAGTCTCGAAGGACAGTGTGAATTCACGACCGGCC -ATTCTCGTTTTTGTGGGACGGTATGGTGCTTCAGTAAAGACGTATTGCGTTGAATCACTT -GTGGATATGAAAGGATTCCGACTTGAGGAGTTATAGGAATGAGGAGATCGTGGAGCTGTA -ACCGTGAAATTTAATTGAACAAGATTTGGATGGAGTGGATGCAAGAAGAATAAAGGGAAC -GATGGATAAATTGGATGAGGATATAGGCAAGGCAACTAAACTTAGAGCTCAGAGAATAAT -GAATACGAGAATCAGGCCAGGTGCAAACATGGCCGTACCCATGGCTTCATATACCACATG -GCGCTCGCCGAGAACAATACCAGACATAGGTACATTGGACCCATAACCTCAATTATGTGG -TAAGTCATGATGGGGGCCACGGGAATGTCGCCACCATCCAAAAGTACACGATTAGCCTAG -GTGATGGTCTGTGACCAGGTAATCGGGACGGCCAAGGACCATCGTCCCCGCTGAAACAAT -AATTGTGAGCTCAATAAAAGGTCTGCCCGGCATAATACAGGATCCTTGCAGCATCACGTT -CCGGCAACAGGATCAAACAGTGGGGCACGTACAGAGTGCTACACGCTTTGTTCCTGTTTA -GGCTGATCGACGGGTACGCGTCCTAGTGAAAGATTAGGTGTGGAGAAGGTACGAGATCTG -TGAGATCGAAGATGCGCTGCACGAAATCCGAAGCTAGTGTCAGGATCGACCCCGGGATTA -CCTCACGACAGGGGTTCCACCATGGTCTGTAAAACGTACCCATTTTGCACACTGACCCTG -ATGGAGCCGAGGTTTCCTGTGGAAACCTGACAAATCTAACAGGGAAATGGCTTGGCATGC -ATGATTGACGTTCATGTGCAGTAGGAGTTATGAAGGCAGATATAGGACTTCAAGGGGATG -GTGTGCTGACTTGGGGCCTATCCTTTCAATAGTTCCGTGCACGGATCAAGGTTCCTGCCC -AACGTTGCTCGATCTCTATCGCAGCGTCAGTGTTGTCATTCAGCTGTCGGAATTTGGTGA -ATCGCAAGTGTCACCGCTGGATCCTAGCTGCTCGATGTGGCGCACCACTAGTATGCAAGT -ATCTCATCTTGGCCATGGTATCATGGTACGGAGTACATGTAAATGTACTAGCTTTGTTAT -GGAGGTGCCCACTCCATCTAGACCTACTTTAAGATGATCACCCATTCGGACTAAAGGATC -TCTGGCTTTGTCTAGCCCCTTTACCAGCAAATGCAAGCCACATCCCAACGGTGAAGTGTG -GCGTTTGTGGAGTCGAGTGAACATCACAGCCACTCGGAAACCCCGACATATCCAATTTTG -AAGTCTGAAACCCTATTTTTCGACATGAGAATTCTTTTTCCCCCTCAAAATCTATGAGAA -CGGTGTTGCCTAATGTGGATAAAAATGAGGTGGTGAATTGGAACTTGTGGATGTTGTAGA -AGTGGACATTTTGACAAAATGAAGACTCAGCCTTACTAGCCAGGCCGAGACCCGCCTTCA -CGGATACATACTCCATGTGGTATGCCAGGGAACTGGTTCGGGATCTGAGATAGACTCCAC -AAGAAGCTTGAACCATCTCTGTGATCCTATTACAGTCTTCAAGAGCTGAGTGATCGTCCC -GACGATCTCATCAATAATTCAAATCAGTCAACGCCCCGCCGCTGCTGTGGAAACAAGTAC -CGTGCGGAGTACCGGGGAAAACAATATCCTTTGCTTTTAGTGGCCCGACGCCCGCTTAGT -TGGGTAGAAGATTGCGACGTAGGCGCATGCAGAACGAAGTCTGATCCCAAGTGCTGGGAA -ATGATGCAGCTGCAGCCTGCAAGACAGGTTTTGGCGCCGAACTTAACCAGGCGAGAAGGA -CAATCATCAAAATAGCCTAACAGGAAACCGATCAGAACTGGATATCATAATACCCAACGC -ATATGCGGAGTCTGAAGCCCaacaaaaaaagcaaaagcaaaacaagacacaacCTTGTTT -CGAAAGTCAATAATCGCACCGCCATCGTCTCAGCCGCAAGTGGCTCGAGCGTGGCTTATC -TGTGGCTGTGACTGCAAGGAGTGGCGCAATATATGTGGCTGTGGATCGTGCCGTTTCCTC -ATTTGGCAGGTGGGCATAGATCGTGGGTTCCCCGACTCAATCGTCGATCATTGCCAAGGG -TCATGAGTTCATGAGATTTATTCGAAAACACCGCCGATCTGAATTTTAATCTGAATTTTG -GCTTGGTTAGCGTCACTGCTATAGTATTTTGCCACTCAGAGGGAGTCTGGAACCTCTAGG -CCTCTGTTGGATCTAGAGGGGTCGGCAAAGAGGTGAACGCTTACTGCCGAGAATTCTGAA -TGTCGACATATGCAGGAATGTGGTAGATAACCACTATTGATCCTTGCCAGCCAAAAGTAG -AAAATAAGTCATGGAGCTAAGCAGCATGGAGTCCTGACTTATCGAACATTGTGATTCGAG -TAGGGGGTATCCGGTAGAGAGCAACCACTCAATTGTATCCCATGTCCACTCTGAAATTAT -CTGCCCGCACGGACGGTTCATTTGAATGCCTGTCTCAGCCCCGAAAACAGGTCGAGTCTC -ATGAGCTAGATTTTGTGATGACAGCACAAGACAACATTGGCGTACTACATGTCCCCTAAG -ATGATAGAGCAATAGTCGATGAAGGCTTGCTATATGTGTATGCGCGTAGGAGAGAGAGAG -TATGACAGTGACCGGCTGCCCAAGAGCACATTGAATGTGATTTATTGGGGAGAATATCTA -TAATATACTGAATAAAATCCAACCAGCGAGGACCGATTTAGCGAGAAATATCACTGAATC -TAATGGCTTGGCCCTGACGGAATTTCTCACGGTCAATAGTAAATATCAACGCTGTCGTCC -TTACGGGTTGTAACTTGAGTGAATCGCAAATAGCTGAGATCGATATCGATAGAGTAATCT -CAAGCATTGGAGGACATTTTGCAGCCACAATGTACCTCATGGTATTTGCCATTATGTCCT -CAGCCCAGGAAAAAACACTGTGAAGAAATATCACTGCAACAAATGAATGTGTCCAGGAAA -TGTCGAAAAGAAGGTAGGAAAGTTCCGGGCATGACTTGCTACTTCCTTATCTCTCCAACC -TCTCTCATTTGCACATTACGAGCAGACAGACTTGACGAAGCCTGATGCTCTATATCCTTA -TCGGTTTATGGAGAGTGAATCCAGGCTTCTGCTAAATGGCGCCACTGGGCACTCGACAGA -GCCACTCTCTAGGTCCTGGCATCCCACGCCCCCCAAAAAGACCTCCAAACACTGCAGTCA -AAGTTCATAAGTCAATTGACCCCACGGTATTCGTTGAGTTATTTGTTGGAACCGTCGGAA -TATTCATAATAGCAGTCGTGTTTTGGAAACTTGGGCGGTTCATCAGGAGATTCAATCGAA -ATAAGGTACTGGGGGCAGGCAAAAATCCCAACACCCGATACGCACGCACATGGTATGGCT -GGGTCGCGTGGCCGACACACGAGAGGAATAAACAGGCACTTTGTGACTTTTTCACTCGAA -TTCGGAAGTCAATGGTGTGGAAATCGACCCCAGAAGATTATAGTTGGATTTGGTGGGATC -CTGGCGATGTGGAAAAGCAAAAGCATCGCCAAGAAGGAAAAGGACTTCGTTGGATACCAG -ATTGCCTCAAGAGCTATGATGATTCTCCAACTGCCGATGAGATCTGGAATCGTTGTTCGC -GACCCAGGTGTTATGGAGCATTGAATGACAGCGTCTCAATGCTCCAGCCTGTTCCAGCGA -CTGGCTCTGCGCCGCAGCCGCACACGCATGAGCATGAGGATATATCGGAATCAATTTTGC -CTCGGCAATCCAGGTTGATAAATAACCAACCAGTCATCACTCGTTCAATCCTAGAGGAAC -TTCTCAGGAACTCACTACAGCCAAAGGACGGCAGTTCTGATCACCAGCCCTGGTTCAATA -TTCGCAAGAAGGCAGCAAGGCCAACTATTCGCACACCGATACCATTCCACCAGGTACAAT -CGTTACCTTCTCGACAGCATAAATATTACCAAATCCGAGAATCAACCCCTTGGTCGCGTG -GTGAGGGGCCCCAAGCTATCTCATGTCAAATCGAATCAATCGTCCAACATGAGGGTCTCC -AATATGAGATAGGAGAATTGAGTGGCCCAATGCACACAAAAGAATCCGAACCATACCATC -GATATGTTGACCACCGCAAATATCGTGGATGGTCAGCACGAATGCAGATGGGCCCAAAAG -ATACAGTATTTAACAACATACGGGATTCATCTGGCCCCCCAGGGACACCACGGACTGATT -TTTTGGTCAGCTATGTCACGGATCCGAGCTCTTCTTTTCGTGGCCACCGCGAGAGGCAGC -GAATTAGCAACGAAGGGTGCTTGCATATCTCAGAAGATAGAACATCGCTCACTTCTAGAC -TCCTCATTAAAGATCAAATTTTGTTCTCAAACGAACAGCAGACATATACCAAATCCATTC -AATGGAATTCGGCCCCAGCAAGATGTTTTCTTCAAGGAAAAGGGACCAGTTCGAAAGCGA -GCCCAGCATTGCCTGATCACTGGGAATTCATGTGTGACCTACAGCATCCTAGCCTCTCCC -GCCGTGAAGGGAAACGGCCTGAAATAGACGCCACTCGGTGCTCCGCTGGACAAACCCAAT -TGAAAACAGGTGACGCCGTCCATGAGTTGAGTGATTGGGAGGTCAGATTGATGGAGAGGC -TGGATCGAAAACTATTATGGCTCTTCAATGAACTCACTCCAGGGCAAAAGCCATACCATT -TTGCTTTGCTAGCCAACCACTGGTTGAATAGAGAAACATGGATTGTTTATGACCCCGTGT -CCAGAGTTTCCACCGACGCACGCCGGCTGTGGGGAGACCCACGATTCAATGTCCCTTATT -CGCAGCCGGTTTTCAGTCCCATACCAAAATACCCTTCGTCCAAACGGAAACGCGCGCAGA -CTCCTCGCATTGACTCCTGGCGGGCTGCAGTGAATAAGCAGCGGAAGGTATCAGGGATAC -GAGACGCCATTCGCACTATTACACTGTATGATGGATCGATCGAGGACCCACCAGATGGCC -ATATCGATCCAGGCTGCTGGTCGTTGCCGAAACCGCCTCAAGGCTTTGAGATGTCCACTG -CACAGAAAAATGCCTGGTATGAAGGAGGGGCAGGCTGGCAAGAGACCCTTGATGATTGGC -AGCAAGTCCGCCGGGGCTATCGTGTTCACAAAGCCCTCCACGAAGGTCAAGCCAATCGAG -GCAGGATCAAGGAAGTGGCAGCTCAAGTCGACAATTGTTATCGTAATGCATCCCGAAAAT -TGATCCCAAGCTATGATCTTAGAAAAAGAAAAGATCCAAACCTCCCTGTCTCATGATTCT -TCTCGAAATGTCGCTTTTCAGTCTGTTCTATTCGTTTTGTCCTGCTGTACCTGCAATCCA -ACCAAGGCTCGCACTCTGCACTCATTTGAAACGCCTTCAATTATTCATAACCTCTGATGC -TATACGCTCACTACCACCATGTCTCTCGCTGCATCTATATCATTAGTATACCCCGGGAGT -CCCGGAATTGAATAAATATAGATATTTTTGCCAAAATGCATTTGAATTCAAAGCCTGATC -TAAGCGCTGGGGTAATCATGGGACATTGTAAGCCACCATCCTCTTCAGAGTGGCCGCGCT -GCAAATTCCAAAAATCCAAAACTCCGGTTGTCCTTATGTTGTTTTTCGCTCTGACAAGTC -CTCCGCCAGAGTCGAGTTCGGATGTATAGTACATGTGCGGGGGAGGAAAATCCGTTTAGA -AACCGGGAGGGTTGACACGTTGGCTGACACGAGTCCACTTGGGCAGCTCTGAGTGTAGAG -AAAGTGGTTAGCTCAGCCAATCCACACAGTAATGAACATCAATTGGTATAAACATGTGAA -GTGGAATTGTGACGAAAACATACTGTGAATGCTCTTCCGGTAGCTCTTCTCCTTCTTGTC -GAAGAGAGTGTACTTGTCCTTGGGCAGCATTTCCTCCTCGCGCGGCATCTCCGCGAACCA -CCGGGTCACAGCCTTAGATGTCTGGCCATTGCGTTCGAGTGCAGCGCTGACGGTGTCGAC -GACCCGGTCCACGGAGCGCAGGCGCTTGCGCTGTCGTGCCTTCTGGGGAGAGGAGAGTCG -CCACGGGGTCTTCCTATTAGTCGGGAAGGAATGTTAGCTTGGTTTCGAATGATTGGGGAT -TTAGTCGAAGTCTAGATACGTACCAGAGAAGACCGCCAAATAGTGCTTGGGTTGATTTGA -ACATCGTTGGTTGGTGGTGCTGTCAGGGTGTAGCCGGGAGAAAATCGTGCTCGGAGTCGG -GAGTGCAGTTGGTCCGTGCACTCGATGTATAAAAAAATACGCTGACAGAATGGATCATTT -GCTAAAAGATACAAGTCAATATAGGAAGACCGCGATGATATTATAGCTCAAACAGTAAGC -TTTTTTCATACTAAATCATGACTAGTCCATATTATTGAAAAATGGTGATCCTGGCGTCTG -ACAATTTACTGTGAATGGTTACCCCAAGCAAAATCATGTGATTTCGGGAATCTGTCGATT -TCCAGGGACGCGATAATTTTCTACAACTACACTTGAGTAAATCGATCTGCTCTTCGGAGA -ACATACTCCTTTCCAAGCTATTATTAGCTTGCGCAAAGCTTCTCCCTGTTCATGGATTGC -GTATAGCAGTTTTACACCATCCATAATGGACTCGAGCGCAACGGCGGACGCCCCGTCCCT -CCCTTGGAGAGCTTTCTCGAACACAACAATGTGGGGAGTGGCGGGTCTATGTCGCGGTTT -CCTCTCGGTTTTTTGTCATGCAGAATGTCACGGGAAAGAGGCATTCACGGAACTGTTAGA -CTCTCGACATGACGTTTCGCAACGGACACGAGGATTAATTACAGGTATGCCGGGACACGC -AACAAAGTTCATTAAACCAATGCAATTGCTAATTGTTTTCTTCTCAAATAGTATCCAATC -ACATCAGCGTGTACGTGCCTCTAACAGCCTCGGATCGAGACAATCCTTACTCCAAATGTG -AAGATGGCTGACATTGCGCCGGAATACCAGAATGGATGACCCATTAGTATGGGGTATTTT -GCCCGCACGATTTTGGAATAAGCGGTGGTCGTTTGGGAGCTACGACATTTGCTATCAGAC -AAGGTACATACTCACAAGTTTACACGAATGCTTCTATGGATAGTTTTGCTGATTGTAATC -GCAGACCACTGTCACTCTTCTTCACCATGGGACAGGTCCTGCCCACTCACCGTTCCGCGC -ATTCTGCTTTCGGGGGCCTAGCTCAACCTGCTATGACCGAGGCTATTCGCTTATTATCGA -AGGGTCCTTTCCCAGTTGACCACCACCGCGCGATACCTGAACGCCAGCACTGGAGTTGGC -ACAATGTCTGTGTCGATCCTTTCTCGGACCTCCAAGTTGCCTATACAACAGATGGCAAGG -ACTCGCATCTGGCACCTACAGCGTACTCCTGCAACTCCAACTCTTGGGTCCATATCTTCC -CCGAAGGCAAGATTCACCAGTCGCCGCGAAAGACCATGCGCTATTTCAAGTGGGGAATCG -CACGGCTCATCCTGGAGCCCAAAGAGTGTCCTGATGTGGTTCCGATGTGGATTGAAGGCT -TCGACGATGTCATGCACGAGAGTCGAGAATTCCCACGATTCCTTCCTCGACCAGGCAAAA -ATGTTAGCGTTACCTTTGGCCCCAAGGTTGACTCGGATGTTGTATTTGGCGATATGAGAT -CTCGGTGGCAGAAATTGAAAGCCAGGGTTGAGAAGAGCTTCCCGGATTCCCGAGAAATGC -CCCTGGGGGTGCTGAGTGATGAGCTGTTGAATGACAAAGAAGCTGTTGAGCTGCGCAAAG -AGGTCACATTGAAGATTAGGAACCTCGTGTTGGATGTTCGCCGATCGCGCGGATTGCCCG -ATGAAGACCCGAAGGAGGGTTTGGTCGATACCTGGCTACAGGAGGGTGCGAAGCGTGAAG -GCCATATGAAGGACGATTCTTGGGTTCGGGATATCTGAGCCCCTGTGCATAGATTGTATA -ACATTTTATAGATTTGATGATGAGCAACAGTAAGACATGAGCATTGGCTTACTTTCTACC -AGTCTTACCTTTTGTTCTCTCCTACTCCGTACCTTTTTCAGCCCTAAATGCCAGCCTCCT -GCGCTGTCAAAGGCAAAGCGGTGGCTCTCTGGCAGCACGGGGGCGGGATCAATGTGGGGG -CGATAAGCGATAAAGGCGGGACAGCTTCCCGGCCCCCGGGGACACTTCACTTAGTCAAAT -CCGCAGCTCGACCGATGCCCAAAAAGCGCCAACGGTTTCACGCAAAGCCAGCAAACTCAG -CACATCACAGCCTCGCGCTATCTGGGCCTCGCCATCATGGCGGATCTGTGGTACAGGGTT -CCGCGTCAGCGACGTCAGTCAACGACTTGATCAGTCACCTCCGCCGTACACAGACGCCTA -GTGCTTCCGATGATGATCCATCAAACTCTTCACGTCCGCAGCGCTCATATGTGGCCCCAC -GCTCAGTGCACCCTTCTCTTCGAGATGTACTCGAGCTCCCTGCAACTCCACCCCCGCGAC -CTCGCCCCGGCGCTCGTCGCACGGTATTTGGGGTTCGTCCACCTCGACCAACTGTCGGAC -CTCCGGCGCCTGCGAGTTGGCTCTCCGGGAATTTGAATTTGGCGGGAGACCAGGGGTTGC -CGGACCGGATGCTGGAGGTATTGGAAGAGGAAATACACCGGCTGACCCGCCTTCAAGGGG -CGCGGTTTCCGGACCAGAGGAGCCTGGTACACCTGATGCTGAAATCCATGGCGTTGAACT -GGAACTGGCATGTTGAATATGATGGGCCTTTTCTATCCCATTTGCCAAATCATATCAAGG -AACTGCTTCTGAGTTATGTTGCTGTCTATGCAAGAGGCTCGCCGCTCAGAGGGTACATGA -AGGGGCTGGGACCGCTCTTTCTGACCCGGCAGGATCAGGACCGCATTGCCGATGAGAACC -CGGATTTTAACGAGTCGAGACTTGTTGACGCGGACTTCAATATCGCGCGATTGGATCTGG -GGAATGCTCTGGGGAGCTGGATAACTTTGAAACAGCTCACAAATGAGATGATCCTCCCGT -TAGATCCAGCTGTGGGTGTCTCGGGTCATGAAATAGGGGAGAGTGTCCCAACTTCATGGG -ATGAATATGTTCACAACGACACAGCAAAAGCCATGCTTGATCCTTTTCCAGCCCCGTCTA -TACCCAAGGCCATTGCCCAGACCCTACGCTTCCCCGAACTCCGTGCGTTGTCCCTGGCAC -ACCCGACACCTAGTGCTGCGAGTTGGACCGCTCTTCTCAACCTCCTAGCACATTTGCCGA -CCCTGACTCATCTCTCGCTTGCCCATTGGCCTATACCGTGTCGGACCCCACGCGCTGCCA -CCTCTCGCATTCGCGGCGCAGTCACCCGCTTCTCAAATCTAGATCCCCTTAATGACAACT -GGGCCGAAGCTGCGAGTATCCTTCGTCAGCTATCCCGTTCGACCTACTGCCTGAAATGGT -TAGACCTTGAGGGTTGCGGCGAATGGTTACCAGCCTTGAAATGGGTGGGAAAAGGCCCGG -ATGGGTTTCCACAGAGCCCTGATACTGTTGGTCCAGAATGGAATGGATCCTGGAGAGATG -TCGAGTGGCTTGGCATCGGCCCTGGCTTTCTGGATTTAAAGTCTTACGACTCGTCACATG -TCCAGTCCGACGGAATCCCCGATCCACAAACACACTCGTCGGATTCTTCTATCCATGCAA -ATCATTTCCAAAACGCGCGTGAGGTACTGAGGCACATTCGGCAGATGAGAAAGGGGAAGA -AATGGCTCGAAATCGAGCTCGGCTTAGGGCTACAAGACGTTGAACCTGAGACAATCAGAT -TACCGGATGGGCAAAAATTGGTACTTTATCTATCGAAGTCTTCTGTGTGCGCAATGTAAT -GTATACAATTTACAACACTGAACCAAAAACCTCCTATTGTACGTATGTAATGATCAATGC -AAGTTGAAACTATTTTATTCGTCTGACTATACAATGGTATCCGGACAAAACGCCCGCAAG -TTTTAAACAATGTCACTATTTCTTTGAACGATTAAACGTCTTCAGAGGATCCACCTTCCC -ACCAGACTTTCCACGAGCACGGTTACGATCTCTCGCCTTGCGGATGCGATTCTGCTTCGT -CTTTGTGAACCCAACATACCCGGCACCATCCTTCGAGAGACCTTTGCGTCCCTTGGTAGG -CATCAGATAATCAGGGACATGCTTGAGATGCGCCTGGACACGCGCAGCGCGCAGCTCGCC -GTCGTGGCGCAGCTGCCGCAATTCTTCAGGGTTTTCTTCGAAATGGCGCTTGAGCTTTTC -GCTCTTGATCAGTTCCTGGCGGATCTCACGTCCTCGTGCTTCTTGCACTGCGAGTCTGGT -CACGGAGCGCAGCGCATCTGTCATACGGTAACGGAAAGCGTCAACTTGCGTCATCTCGAA -GTGGTAGGGCTTGACTTCGTGGCCGAGCTTAGTTTGCCGCTTGATAATTTTGGCGAGCAG -GGCTTCGTCGTTTTTTGTGCTGGGGATTGAGGTGGGTCGGTGCTTTCCGTGCTGCTCGGT -GGGGATGACAAAGGAGAGAGCCATTCCTGCTTTGCCAGCACGACCGGTACGTCCGATGCG -GTGGGTGTAGGATTTGGAGCTGGTGGGGAGGTCGAAGTTGAGAACGCAGGCGACATTTTG -GAAATCGATACCGCGAGCAATGCTGTAGTCCTTCTCTTTGCTGGTCATCTTGCGGCGCTT -CTTAGTTTGCGGCTTCTTTCCATTTCCGCTTTCATCAACTTCCTCTTCATCCTCACTGTT -TCCAATCTCTTCCTTTGCTTCCTCTTCAGTTGCTTCCTCTTCAGTTGCCCCCTTGACTTC -GGACGACTTCTTCGATTTGGTCACCCCCAATACTTCTTGCTCGTCAGCTGCGACAATGAT -GTCGTAGACACCCTTATTGAATTCTTCAACAACGTGGATCCGGGAGTTAATGGGCAGTTC -TGAGTTGAGGACACAACTCTTCAGGCCGAACTGTTCGAGGAAAAGCTTCACACGGTAGCA -CCGGTCCACATCTGCTACGAAAATGATGACCTTGCCCTTGATCAGCTGCAACTTGAAGAT -AACGTAAGTGAGGAGGAATTTCTCGTCTTCCGCGCATCTGTTAGGAGTTGAGTTAGCAAC -AGTTGAATTAACGATTGTGACATATTATCTTACCGGACGACAAACTGGCTGACACCAGCA -CCCTTCTCTTCCTTCTCCTCGAGCTTGAGAATTACAGGGTTCCGGCAGTACAAGTCCTTC -AATGTATCCACTTCCGAGGTGAGAGTAGCACTCATCAAGAAGGTCTGCGCTCCTCGGGGG -ATGGCCTTGGAGAGAGCATTGATTTCCTCATCATATCCGTATGACAAGACCAGATCCGCC -TCATCGATAACCAAATGGGTGATTTTATCGAGCGACAAAGCGGAGTTGTTAACGTTGCTG -TAAACTCGGGTAGGCGTCGACACAACGATATCCGGGAAATCGGCCAGTATCGAACGCTGA -ACTTCGTCGGAAACCTTCTGTGTTAAGTTGACTGACCGAATATCCTTTCCGCAGAAACTG -GCAAAGGACGTAATAACTTTCTGGACCTGTTCTGCAAGCTCGCGGGTGGGGACAAGAATA -AGAGCGGTTGTGGCTTTTGATGAGGGATCGGCCTATTTGCAGACAGTTAAATTCAATCCT -ATACAAGGTACATATAGAGTGGGGGAAAGAGGGAACTAACAGTCTTCTGCTGGAGGATGG -CTTGCAGGATTGGGAGCACATAGGCGGCGGTTTTGCCAGAGCCGGTTTTCGCACGGGCTA -TTTCTGTTAGCACTTGTCCTTTAATTCATCAATTTAAGAGACTTGTCTCACCAAGGATAT -CCTTCCCCTCGAGTGCGAGCGGAATCGCCTTCGACTGTACAAGAGTAGGTTTTGTAAACT -TTTCCTTGATCAGGGCCTGGCGCAATCGAGGATCGAGATTGAGGGCTTCAAAGTCCAGTT -CTTGGTCCTCAGCCCCAGCGGCATCGGGGGATGGGACATCGTTTGCGTCCAACTTGCGTT -TCATCGTGATTGTGCTAGGCATTTGCTTGATCAAATCTGCGGAAATTGAAACTTTTTGGG -AAATCTTGTCGGGCGGTGAGATTGTTAAGGGCGGTGAATCAGAATATTTTAAGTTGACAT -AATCTTTGCTGCCTCAGGCGGAAACTAGATATCGAACCGCTATAAATGAAACGAGAGTTT -GCGAGTCTTAGGGCTTCGCTAGGCTGATTGCTGCATAGTGCATCCTCCTGTCCAAACATC -TACTCCAATATCACTTATAGTTATCCATTGGTAAGTCGCTTTTGGCGATCTCCCCACCCA -GGCCGAAAACATATATTGGAGTTGACATACCAAATATATAACGGTTGGTGGACTGCTTTG -AAAATAAATAGAATGTTTATAATCATAGAGCGTAAACCGGCCTCGGAGATAAAAATTTTC -GAAGATATCATACCGATACCCTTCAGCTCCAAAAGGTTGGCAAACGACAGTAAACAAAAG -AAGGCACACAACAGGACAAGGGCCAACTTCTCTTCACAGGTCATCCTGTATATCTCGTAT -GATCTTGCCCCATTCCCTTTTTAACGGTTGGCATGTCTACTGACATAGCCTGACATAGCC -ATAGTTCCATGGCATCTCCTGTCGTACTATTCCAACCGAACGACGGCTTTTGTCATCAAG -TTACCCTTCTTTTTCATGATCGGATCCAAATATCATGAAACCCGAAGGATAGGAGCGAGG -AATGGGAGTTTCAAGAGTTTATCATGATAATTTTTGTTCATTGAAGAAAAAGCAGATATG -TTACATACCACCTCCTATATCGTTTTGCTTAACAAAATCAAAAAGAACGAAAACTAAGAA -CACAAACGAAGCAGGTACGAGGGATAAATTGTCCACACACCTCCCCGGACAAGCCATGTA -GACTGATTCTGAAGGACCTCGCGTGTAAACCATTCCATGTTGCCTCGCGCGCTTGATTTC -TACCTGACAAACTCACTCTGAGCGTATTCTGGGAATCTCGCCCGAGACGCATCCGTGATT -TTGCTCACGGAGAGAACTTTAACGTTGTGCTCGACGGTATAGACCTTGCAAAAGTTCACT -CTTGACATGCAGTCCAGCTTCTGATCAGGCCTGGCAGGATCGACCTCTAGAGGCTCTTTG -ACCATCCGGGGCTCGGCCTGTACAGTCCGAGGCCTATCACCTCGCATACCAATGACAGCA -TGTTTGCTTCGGTCAAGGCCGGCCTTGGTCACACCTTGCCCACTATAAGTAGTGATGGGT -CTAGAGTGTCGGGTTAGTATTTTGCAACGCAATTGACGTGGAGATAAACGTGTACACGTA -CACACACCAGCGGCAGCCCCGCTACTCTTTAACCACTACCATGCGCCGGATACTACTGTA -CATGGGCTCTTGGTACTTTCCCTTGATGAAAGGGTTTGCTGATGGGTGAGAGACTTTCTC -GCTGAGTACAGTTCCATGCCAGCCTGCATTCTCATGACAGAGCACGGAAAAGACCTATAT -GATCGCGATAAATTATCAGTATGGCTTTAATATTGACATCTAACAAGGTGAACAAACCCT -GCCGATTCGGAAAAATCTCCGAGAGTTGCTCTGCATCTTGTAGCCTGGTGTCAGATATTG -TCAGTTTATTGGCTTTGATATACACGAAGCGTGCCGAGGCTAATCCTTATGAGGGTCCAG -CATCCCACGTTCGCCGCTACTGCCAGGGATCACGTATGCTGGCTGCAATGACTTTTGCTT -CATAGACATTAGCCATTTAGCCAGAGAGTAAGGAGCATGCATATTACTCACCACCTCCTT -GTCATAGTCTCGTTGGGCATCCCCCAGACTGGGGTCGTTTCGGTGACTCTAACTGCTGGC -CCCATAGGGCAGCACCATGTGCGAGGCTGGACGATCCCTGAGACTCAATCTCTGCAAGTC -ATCGACCTGAATTCTGGAAATGGATGCATCACTTTCTGACGAGGCGGTGACTTCGCTACT -GTCGGATATGCTTGCACATCCGGTTCTGAGACCGGTGGTTTCTCGCGTTACCTCGTACTT -CATGCCCGGGCTCCATACAGGCGGACTGTTCGGCTTGATGTTTGCTACTGTTCCTGCACC -TGAATTTGAGTGGACGCCATCACCCACTCGATGATTGGCGAGTTGCTGGTACATGGTGGC -TGGAGATTGACCATTGGCATATGTTTGGTCGCAGTCGGGAGGGTCCTGCTGCTGAACATG -ATCTAAGCGACTTGCATGGTGGCTCATACCTGGGACGAATGGGATATGCCAGTATGCGTC -GTGCTTGGAAAATGTGGACCGGGAGGTGCCTCGTTGACGTGGCGCGACACTTTTATTATA -CGCGTTACTTCCCCGAACTTCTCACGCAGGAAGAATGAACAACGAACCTTGAGCCTTGAC -CTGCCATCGTAGGACTTGATCCCCCACCTGGATAGTCCCACTCGCGAAGAAGTTCACCTT -TTGCTCTGGTCAGTTCTTAAATTATTCTTGTAGCCAGTGTTGCTTACCAGAATAGTTGTA -GATGGACCGAACCCATCGGTTGGTCTCAAGATGAAGAACCCAGTCTGAAGCCATGACCAT -ATCTGACCAGGAAATTAGAGATAGAGGAAGACCGGACAGATAAAATATAAGCCCTAGGGT -TTCTATATCCCTCATCAGCAACTCCTCAAGCACCCATCGCACCCCTGCGGCTCGGTCCGT -TCAAATCAATGATCTTTCAAGAGTTATGATATGCATGGCGCGTATGGCATGAATCAAGTT -GGTTGGTTGTCAAGTCCTCACTCGCGGAAGAGAAGATTTGTTAACTAGCACATGATCGTG -GACCCCAAGTATACTTCCAATAACATCCCGGTTGCATGAGGGGGGCATCTTGGTGGCTGA -TATACCTCATGTATTTTGGGATATTTACTGGAATATTTACAATTGCGCTCTGGGCAACCA -ACTGAAAGGTGAAGCGCGGGCGCTAATGCAATACAGCAGATTTACAGACTCACGGCCATT -GGAGGTCTCATTTGGGCACCAAGGCCCCATAACTGCGCCCTCGACAACCTCACCCCACTA -GCACCGGCTATCGTGTGATCATGACGAAGTTTCCCATCTCATTTTCTTGAAAAAATTCCC -CGGGTGACCAGAATTCAGCCCGCTATTAAACCTTGCTTACCAACACTTGAACGTGTTGGT -CTATAATCTCCTTCGCGGAATGTGACTGAGAGGCACTGGAAACCCCTTTAACAATGACAT -CTTGGCCGGGAGCGCGGAATAATGTATCGCAAGTGAGTTGATCTATATTGATATGCATCA -AGCTCAAGGCTCATCACTATCTTACTTTGTAGCCGCCGTCCACGGCAGACTGCTATACCA -ACACTAAAAGTGAAGTCTGCCTTCCCAACATTTCATTTGCTCCAACTTCACGACTAGAAG -AGTCACAAAGAGAGGCTCGAGCATCGAGTTTCACAGCATCAAAAACCCTGCAACCATTGC -CGCCACTTCCTTCAGGTGGTTTTGCTCCAATAACGACATCAACAGCGCCAATCATCGGTC -ATACCGGACCAGATATGGTAGATGACGGGAACGAAACCACCAAAGACGACTGCTGGTCAC -CACACAATCTCCAACAATGGAACGAATATGATGCTTTCACCGAGTCATCCGTAGAATCCT -TCCCGGTCCAGCATCTCCCCGAATTAGATGAACAAGTCAACTGGTATCGCATGCCTGCGC -CGCCCCCTGAATCAGCCCGTGAGACCAATGCCAAGGTGGCAAAACTTATAGGAGAATTAT -TAAGTACACATGAAGATGTGTCAGCCAAGCGCTATCGGATCCGGGAAATGCGGCATATGC -TTCGACAAAAACGTGATGAAGAAGATGATGTGCGGGTTGCAATACGGAATAAGCTGAATC -TCATAACTGCCGAACACATACATGAGGATCTTGCTGCTATCAATCATACCATCAATGACC -TCCAGGTCGTGACAGCGGCTTATTTCATCCTTGAAAATGAGTACCATAGACAGGAAGAGG -AACTCTCGCAGCTGGAGTACGACTACAACAAGCACTTAAAAATATTGCAAACCATCTTCA -AAAATCAAGGCAGTTCGTTGGCACACTTCCAGCCCATCCGCTCGGATTTCGAGTCTTCCT -CGACCGACTACGCCTCGGAATATGATGACCAAATAGTGCCCTCGGTGGCTGATTATCCCT -CGGTAGCTGGCAAGACGCGCATGATCGCGGAACGCCTATTGGAGCTAGAGACAGAATACC -TAGTCTTAGTTGATCAACGCGAGCTCCGTGAGCGAGTCGGCATCCCACTCGACAGCGCAG -CCCTCAACTTCCTCATTCGATATCAAGATGAAAAGAATAAACTCGAGACTGAGCTGGAAC -TAGCCCGCCGGAACATGGAGTCGCACCCGGAACATGCCAACCGCTCTGCGGGGATCGCAG -AAGAGGAAGATAAAGAACAGGAGGTGATACAGCATTTCATGCCTGAAATCCCAGAAAGTC -AGACATACAATGATCCGCTGCACTCCTCGGAATTTGAGGATTCTTCACCATTCTTTGCGT -CCGCGCATCCATACCCTATCAACAAAGGCACTTTTGTGAATCGATGGCTTCTTCATCGGT -TGCGGCACTCAAGATTTGAGATTATGCGCTTCAAGTCCGCACCTGAACTAATAGATCTGG -ATAGCAAAGGCTGGGGGTCAGACAATATTAGCAAGATGGCGATGATGCTTTGGTTTCAGG -ATGGTGCGGCAAGCGTGGAGCATATCAGAAGTCAATCAGCTGGGTAGTGCATGTGTACTT -TGCACATAGGTAGCTTTGGGCGCTTATATGCTGTTATTGATTTACCCTCTGTTCTTGATA -GCAATTGAACCATAGACTGAGACTTAATCTACTCCTCGACCACACCTTGATGCCGATTGC -ACATATAAACATAATCATAGTACCTCAACCCTAATCTTCACTACTCGAATCAACATTTTC -CCCTCCTATAAAGCTGCAGCGTGTTTAAAAAACAGTCACATATGTGTAATTGAACGTTAG -GATCATTAATAAGTTGGTGTGAGCCTTACCATTTTTTCCAAATATCTATCAACTTCTTAA -AATCCCGAATAAGTGACCATCGACAATGAGCTCAAACATAGCCCAACTGTGCAAGTCCTC -ACCCTTACCCAAAAACAAATCCCTCAAGCTCGAGATAAGCCCTTTCCAAATCATCATTAA -CAATAACCCGATCGAAAACACCCGAAGTCTCAGCAAATTCAAGCTCCCGCCTCGCCTGGT -CCAATCTAGCCCGAACATGTTGCTCACTCTCCGTCCCTCGACCCCTGAGGCGCTGCTCAA -GCACACCAAGACCCCGCGGTTTGATGAAAACATAGCGGGCCTCAAATCCAGGGTTGATTC -TAACCTGTTGCACGCCGACCATGTCAATATCAAGGATAGGAATGAGTTGTTTCTCGAATA -TTTCTGCGAGTGCTGCTTTACTTGTACCGTAGAAGTTATCAGCGTACATGGCATCTTCGA -TCAAATCTCCTTTGGATTTAAGACTGTTGTATGTATCGATGGAAACAAAGTGGTAGGAAA -TACCGTCTTTTTCATTGGGACGTGGTTTCCGGGTTGTGTGGGAGACCGTCAGTTCGAATG -TGTTTGGGTGGGCGTCAATGAGCTTTTGGATAAGGGTTCCTTTGCCTGCGCCTGAAGGAC -CGGAGATGACAAGAGGTCTTCGATCTTGAGGTGTTACTGTAGAGGTTAGCATTAGTAGAG -TGTAGTTGGAAGGCATATGACGTACATGAAGCCATATCACCAACTACGTGGTCCTTGTCT -TGACGATAAATATCTATAGCTTGGAGTTGGTAGTGATTGATCAATGCAAGGAGTGATGCC -AGGGATATAGTAAGTCCAGGCTGTGTCGACATTGAATAAAGTGGGCATCGGACAGCCTTA -CCGCCCATCGGCTCATGGATGCGGGCAGGCCTACACTAATCCACTTATCAACATGCCGAG -GAAGCTCAATAATGGCGAAGTAATATTGCAACGAGATGTTATCCATTCGCATCGTGATTC -TGTTCATTATGTACATGACCCAAACAATTTCccatatccatatccatatccatatccata -AATCGATCGGCAAATGGCCGTAAAAAGACGCTGACTCCATAACCCAAAAAATGGTTGAAC -TTTCAAAGCAAGATCCCATAACCAAATTTCAGTATCTCGTTCATCAAGCCCTAACACATA -TTGTAGTCATTATTCAAGAGTACTGTGTCTCGCCCTCACTATCACTCAGGCTATCGCGAC -GACGGCTGCGTCCATCACGGCGATATCCCTTCGTCAAGCCCGGAATTGGATGCTTGTAGC -TCTTCGTACCAGAATCCTCGGTGTCAACGTTGGAATATTGGTACTCGGAACGGGAGCCCG -CAGATGAGAAATCCAGAGGTTCAGTGAAGTGACCCTGGGGCACGGAGCGGCGTTCGCGTT -GCTTACGGGGGCTGCTAGTGCGGGATGAAGGTGCGGGAGGTGGGCCAGATGAGCCAACTT -CGCTACGACGTTCACGACGGCGATTGTGACGGCGGGTAGATGGGTCGCGGACACGAGGCT -CTTCGGTTACTTGGCTGATAACATCCTCGCTACCTTGGGTGAATGAGAAGCCAGAGGGAT -CGCGGCGGGCATGGCGTGGTTGCTCGTAAGCAAAATCGTAAGCTTGGCTCATCTCGCTCT -GTTGGTAGGAGGTGGGCGGGGCAGAAGTGTCGTAGTCGCTGCCGTTGTAGGTGCCATCGG -CTTCGCGGTTCATGCCGCCAATGCGAGCGGGTTTCTCTTCTCGGTAGGCGCGTACGTCAG -CATCGTCTGCGCCTTCCCATTGCTCGTGCTTGTTGCGTCGGAGAAGCTTCTCTTTGGCTG -ATTCCTTGAGCCTCTTGCCCCGTGACTTCTCTGTGATAGTCGTCGCTGTGTCGGTGTATG -ATCCTTCTGTGTAGGTGTATGCGTCATCGTTGTAGTCCTTGCCGACGATACTTCCCCCAC -CAAGCTTGGTACTCGCTGTGGCGTTACTGAGAGTCTTTCCGTCGGGACCCTTTCGTCGTA -GCACAGTGGACTTGTAGTCATCCCAGTCGCCCTTGTGCCAGACAAATCCGCCGTTCTTGG -CCCAGAAGAAGAACCAGATACCCCCCACGGCAAAAGCGAGTCCAATAATGGCAAATAATG -CCATCAGGCCTTGCATTTTGATGCTATGTGGGTCGATCGTGCCTTGGCCATATTTTGGCT -TGGAAGAATCATCGGAGTCGCTTCGTGGGAAGAGATTGTGGGCGTGCGTGCCGTCGGAAA -TTAAAGTACCAGCGGTGCTCTCGCTGCTATCATCTCGAGCGTATATTCCGGCGACAGCCG -CTGCTGGAAAGTCGGGAGAGTCCCTGTTCGGCCAAAATGGAGGCATGATGGAGTATTAGA -GTGGTCAAGAGCACTCGGCTCAGGACTGGACTCACTATCATATTGTTAGCAACTCAAAAT -TCCTGAAACTAAAAGGTAGGGTATATGGATATACATTAAAGCAATTGATCAATACCAGCT -GTAGATAGAGATAGATCACTGACACGTTGGTATGTGGGTATAGAACGTGGGTATGGAATG -CGGGCAAGAATGAAGCACAGTGTCTAGATGGGCAATAGTAGAAGGGTCAAGATAAGATGG -GGAAATGCAAGCCCAGATATTCAGCCTGACTTCCAGTACAAAATTGAGGAGGTTGGAGGA -GAGGCTGGGGGACCTGTGTTTACGCGATGGTTGTCACGGGCGACAGCTTACCTCATCCCC -ACCTTGCCCACCCACGGGTCCAGTGGACGGGTAGAATTGAGCCCGTGGGTCAATCTATAA -AAAGAGTCGATCATTGATTTCTATCTAGATCTGAAACATGGCGTTTTTTTTTTATAAAAA -AAATGGGTTTAATTATGGATCAATTATATCAAGTCGACAAATGCCACCCACTGGAAGCCT -CTGGGGGAGGGTATACACGGTCGAAAGCTGTGATCGTGCCCAGACAGCTTGTAGGGTATC -AAAACCAACTAAACAGCAAAAGCATGGAAAAAAGCACCAAGACCAATGGACGATAAAACA -GACAAGGAGATTCACGAGACTGCGAAAATGAACTGAATGGATACAAAGCCCAGAACCTAA -ACTACGAGGCTACCAGCGGGGCAAAATACCAACAAAACACATAAAATCATAGATATGGCC -AAAGAAAAAGATGACCTGTTGAGACATGACAAGTAAACAGATATGGGTATAATCTCTCTC -CGCCACTCGGAGTCGCGAACAAAGTAACCGGGAGAAAGACACAGGCCCAACGAGAAAGGC -TATACACGAAACGCTGACAAAAAGAGACATCAGAAAGTAAAGCACATCAGACTTGTCATT -GCAGGTCATCAGAATCGTACAATCATATCGCCATAACAGAAAATCATCATGGGGGTTCGT -GCCCGACAGATTGGCCCAAGCACACAAATATATAGAGAAAATCAAGACAACAGTTGCAAA -TGAAATAACACACAAAAAGTCCTCCCAGTCTAGCGAGACAGTTGGGCAACAGGCAACTCT -GGGCGTCATGACAAAGCCCAGCGTGTCGCCCAAGTCGCCCAACAGAGTCTCGCTTTTGGT -AATTACCATTGTTTTTTTTTGTTCCACAAGCACCCGATCCCGaagaaaaaaagagaaaag -aaaagaaaaTGCAACATATGCGGAGCAATCGAAACTTTTGACAAACAGGCAAGGCCAGCC -ACAAACGGCTATGCATACACAAGATGGAATCACAACAAAAAAGTAACCTAACCATAGAGA -ACGAAAAGAAACACAAAAAAAGGACAACATCTAACAGGGTGGGTAACAAGATTAGTGGTG -GCGTGCCCCATAGGCTGACTGGCTGTCTCGGCGATTGGATCGTTCTAACCGCTTGTCATC -CCGGGGGCGCCGCGCCCCATCCTCAAGTGCCCGACGGCTGGAGCCGCCCGTATGATCGGA -GCTGGCACTATTCACATGCTGCTTGGATGGCCGCGGGCCGCCGATGTTCAATCGGACAGC -CCCGGTCTCCCCAGCACGAATGTTAATAGATTTGCCTGCAACTGATTCAGATGTAAATCC -AATTTGGAATCCGTTCAACATGAGGGTCATGTTCTTATCATCTTCCTTCCGTGAGGCAGT -GGCACTGCCGCGACTGCTACGACTCTTTTGGCTGCCGCTCTCGCTCCCATTGCCTTGAGA -CGCTTTGGGGAGCAAGGTCTCTGCGGAAAGAGGTAGTGCAGCTGTTGAGGCTCGGCCAGA -CCGGACTGCCTGGTAATTCTCGGCTTCACGCTCACGGTCTTCGAGGTCGCCACCGACGCT -GGATCGTTCATCGTAGTGGTATTCGGATGACTTACGTTGCCGAGAGCTCGCCACGGCCAC -CTGGGTATTCCGTAGATCATCGTGGTAGGGTGTTGACCGTCGGTTATCTCGCAGGCTATG -AGTTCTCTCTGGAAACGGAACTTCCCTTGAAATTCGACGATGGCCGCGATCCTGAATTGC -CGGCAAGGCAGACGCAAGTTCCGGCATGTCCATGCGGTCTATATCTCGGTCCATGCCTCG -TTCTCTCTCCATACCTCTGGATGGGCGACGAGAAGAAACAATACCGGCAGAGGTATGCAT -CTTCCGTGGTAGATCTGGTCGCTCTGGTCGCTCTGGTCGCTTCGCCTGGTGAATCTGCGG -GGCAGCCCTAGGCTTGGGCCTGGGGGGTGGTAGCATCCGATAGTAGTCTTCATCCTGATC -CTGGTGTCTTGTAGGACGGTCATAGACTTCTCGCGAGGAATAGTGTTCCATAGGATCATT -ATCATACCATTCATACTGCCCGGCATCCATGGGTGCGGGAGGAGGGGCGTAGATAGATGA -GGACCGAGGGCGTGGCTCCCGGGACTCGGACCGCTCTCGTGCATACTCCGATGAAGGGTA -ATCCGAGGCTGGTGGGGGATAGTAGGAAGGTTGTCCATATGGAGATGACGAGTACTGGTG -CATGTATGCTGAGTTGGCTGGTGGAGGGCCATGTTCGTATGTGTGCTGGGAGTACACAGA -AGATGCCATTCCTGAACCCATCCCATGTGGTACACCTTGGAAGCTCATAGGTCGTCCGTT -TGGGTGATAGGTGTGGTACGAAGAGGAACGGCTGGGTCGACCAGGGTTATTGACATGGAT -GTTCTCAATGACGCCGGCCGGATATCGGGCAGTATGCGGGGAGGGTGAGTATTCTGGGAC -GGGTGGACGCTGGGGGTAGTAGACATATTCGCTCTGTCGATGCTCCATCGGAGCTGATTG -ATGGTACTGAGGAATACCGTAGTCGTGATAATTGTCGTGATATTGGCGGGCAGAGGATTC -TCGTCGATGAGACTTGGACGACGACCGGGGCACGTGAGCATGATGGGCAGCGCCCGCATA -GGCACCAACCATCTCCTCTTCGCGCGAGGGTCGTGATCGTTCTTTTTCCTTGCGTTCTCT -AGTTGAACTTTTCCTGGCGAGGTCGGTACGTTTGGGGGTGTCCAGCTTATGAGGGATCGG -GGGACTTTTTCCGCCCGACGGCCCAGATTGGGTGCTATTAACCGTAGCGGCGGTGCGACT -AGAATAACCTGAGTCACTGGCGGCGTCGATCAGCGGCTCCTGGAATTTTTCCAACCTCGA -GGATATTTTGGCAGCTGTATTGGCAGCAATTCTGGCAGAAAGCCTAGTGTCGGGTAGAAT -AGCGTGTTGTTCTTCGTCGTAATCCTCGACGCAAGCAGGCCGGGGGCCCTTACTGGCCAT -TATGTGGGCTGCAAGGCGATCGAGTGTCTGAAAAAAAGAAAATTAAAAAAACAGGGTTGG -TCCCGTTTCCGACAAGGGCGACGTGGGTGGCTCCCACAGGGCTCCAGAACAAGATCAACT -TACAGCAAGCCACTACTCCCCTTTGAGGATGAGTCAGAATGGATATTATCCTCCCCTCCA -ACCCCGTAGATCTGACGGAATGGGCTTCTTTTTTTTTTCTTTTCTCCCCAGTGCCCAACC -CAAGCGGGCCACTCAAACAAGTAAGCGGACCTGCCGATCGCGGCGGGCGATGGCAGGCGG -AGGGCGACGGGTTCTGTCAAGCGGAGAGAGCGAGATGGTACTTTTCAGATCGCAATGTCT -CCATCGATGTGGGATGTTAGGCTAAGAGTTGGGCGAAAGCGTTGGGTCAATGAAAGGAAG -AAAATAGAAAATGACCTCTCTGGCAGACGATGAGTGGTGGAGCGTACTGCAGCTGCAGAG -GTTGGGTTGAGAATTTGGGATCCTCTGCCTGACGGTCTTAGAACCCAGTGGGGAGAGTGG -AGTCGATGGGCAGCGATTCAAAAAAAAAAAAGTTCGCAGTGAGGGGTGTGACTTTGATAT -CTATGAAAGAGAAAGAAAGATATCTCGAGATAAAAAGGAGAGAAGTATATAGAAAAGAAA -GAGAAAAGGAGTAAACACCCGAAGGCACAGGGCACAGGGTTTAAGGGAAAAAAAAAGGGA -AACGGAGAAAATTCAAAAAAGTTTCACGAAGAGGCTCTCGGTACCCAACACAAAAAAAAA -AAAAAAGAAGTGGGGGATACCGTCATCAATGTATTATCTATGCTATTTATTTTAGAGGAG -ATATCTTATTTTATTATGTCTACATCTAATATAAGTATGATATCTAAAATATACAAAATA -TACATGTTAGTATGTCGATTATTTACTTTACATTTTAAGCTCAGGACCCGCTGGAAATTC -CTTGGTTTCCATTCTTAGGATCCATGGTTGGTCGTTACTCTTATCTATAtttccttttcc -tttttgttttcccgtttttttcccttttcTACTTGTATTTTACCTGGAATTTTCGTCAAT -GTCGCATTCCTATTTCTCCCTAAAATTTATCCCACCTAACGCCCAACCGGGTTTGCGGCC -CAATGATCGCCTCCGCCGTAAAGGGTCTTCCCTGTCAATTAAAAAGGAACCTGTTCCTCT -GGGCGAATTTCGGCTGTTCGGTTCTCAGGTTCAGTCATCCATTTTTCTAATCGTTTGATC -ATTCCATCAATTTCTCATTTTCGTGTACATAAGTCAATGCAGGGCCTCAAATACAATGAC -AATGTATCCGGTACACGGGAAAAAGTCGGTGGGGATGACACCCTGCAAAGGCTGCAACTC -ACCACTAAGGTGACTCTTCCTACTCCGTACTCCGTACTCACACTCGTAGTATGTATGCTG -TAGGCACACACCTGTATGTATCAAAGGTAGATGCCTATTTGTCTTGACAATCAAGGGCAG -AATCGCCACTGTGGAGTGAGTCAGGTTTACTTTTACCATGGAAACCACAAATGTCAAACG -ACACTAGAAGCAGAAGAGGGACTGAGCACTGCATGGTATTCGTCGTCAGAGCCCCCTCGT -CCATGTGGCAGGATATCTTTGGACGTTGTCTCTTCTCCGCCTGCATACTTCCCATTGTGA -CTCACACGGGTAGAAAACCTATGACAATGCGCCCTCACAATATGTACTTGTACAAGTTTT -GAAGTATATTGTACTCTATACACAGAGACACTAGATTGTACTAGTGGTGAACTAAATTGC -CGGAACCTTCTCTCGGGAGGCAGAAATGGCTAGATCGGTATGACATGCAGCACGGACGGT -CTATCATTGCCATTGTGGATTTCTTCATATCCCTTGCGTCTTGAGATCAAGAGGTTCGCA -GAGTTTTCATTCTGCACGTGTAAGAATGGCACTATTAGAACTACGAAGTAGCAATGACCA -CTTCAAACTATACAAAGCCGATAACCGCCAAAACATCCTAGCCCTAGGTATTCTAATCTA -GTCAAAATGTTATTTTGGGCCGGAGTGGATATCTTCAGGCTAAGAGGCTGAAAAATCAAA -CCAGGAAAAGGGCTCCGCTCCAAATATAACCAAGGATCTCCATGCGGCTCCGGTATTTCT -TCTTCTACTTCTATTCACATTGACACCGTTGATAGTCAAGACTACAACATACATGTCTTC -ATATCAGCCCCGGGAGTGCTGACAATGCCGTGCGCACCGGGAAATAACTAAAATTAAATT -GCATGGCAAAATAGCCAACAAGCTTTGCTCGCATATTGATTTCATGGTGCAATAACGTTG -ATGTCTGTCGCCAGTCATTTTTTTATTTACATCGGGAGATAAAACAGGGTCAAGGGAGCC -ACCCCCTTACTTCATTCTGCGAGATGGCATCAGCTCCTCGAGGCGAGATGCTCACCGCTC -TTTTGCAAGTACCGATAATTGGTAATCTGGGGATGAAGATGAGGGGAAATCAATCCAACT -CTATTCCTCTCTCGGCTCGAGAGACTTCTATCGGGTCTTTAAGTGGATGAAGATAGGTGA -CAGAAACATTTTAGAAGTTTCGTGTACGGGGTACATGGTACATCGGTATATATGACTTGC -ATGTTGATCGCATCACCCCAATCTTCTTTGTATGAGACATCATTCGTCGGTTGTAGATAG -CCTTTTCCATAGATAGGCCTTGGCATGTAAATGGGTGATTAGCATTGTCTAATGTCGTAC -GAAAGACAATAATCATGTAGGTCAAGTGTTCCCGTGGCTAATATTCAAGGCCAGCCTGCT -ACTGAGGTGCTACGGCATCTACAGTAGTATGCTTTATTGAGGAGTGTGGCGCCTCCTCAA -GGTATCGAGGAGTAGGTGGCTGCCTTTCCTAATGAAGCTCCCCAAACCTTGCTCATATCG -ATGCAAAGGTAGTCTTCAAAACAAATGCTTTTCTATCAATTATGTCCACCTAGCCTGATT -TATATTCACAAAGATGCGTACAAGCGCCCCTTCTAGTCTGTCCCAATAGCCAAATCAAAC -CCAAATCATCCCCAAGTCGGCTCCACCAATTCGGCTCGACTATTCTCGGCCTCCTAGTTC -CTGCGCAGGTAGAGAGCAATCCATCCAAGGATCCGATATCCCGCAATGATGCCCAGAAGA -ATTCCAACCCACTTCCCCGTCAGACCAGTGGAGTACCCATACGATTCTAGGACGCCAGTG -CCCCGAATGAGGCACTGATCCTCCAGATCAGTAGACCACATGCACCGACAACCGGCGCCA -CACGAGTAAGTCCGCTTGGAGAACTCATTGACCATCATACCCTGGAATACATAAGCCTAT -TCCAAATACCAAGTGAGTTAGTGATCATTCGAGCGAAAAAATTAAAAGAAATCAGATAGC -GGTGTATCAGCATACCTGGTAATCGATGTAGTGGAACACATATTTCCAGAACGGATTGAG -GATCGTCTGCGTGACCATAAACCCTCCGACACTCATCCAAAGTCCATTTGCGAATGCGAC -AAGCGCCAAGGCAATTACAAAGTTTGGGAATACCGATGTCACGAGAACCACTAGGGATTC -GGCCGCCACAAGGTCAAGGAATATCCACATCACCCAGGTGAAGAAGGCTTCGGCTGTTGG -TTGGAAATTGGAAAGCCAGTATGAGACAATTGAGAAGAGGAGGGATATCAAGACTGGAGA -CATTAGAATACGGGCATTTGGCAGGAAGTGGCCAATACATACACAGGTAGGGAAGACCAA -TGATAAAGTTTGAGATCATGAACGGTGTCACACCATATAGTCCGTTGGCTCGCTCTTTAG -AAAAGATTGCCCGGTCTTCGAGAAACGCGGGGACGTATGCTACCGCCATGAAAGACATGA -ACGCGGAGCCAAAGAACTATACCTCATGTCAGTACGGGTATCTCTTTAATTCCAAGTACG -TGTGAACTTACAATGCCGTTGATCAATGGCTGTATAGACTCTTGGGATGGATGAAGGCGT -AGCCACACTGTACCCATCATGATCGCCAATCCTGTGCCACATTAGCACCGGTAAGTTTTC -AAAGGGGGATGATGATTCGTACCAAGATACATCACGATCCGGATTCCGTAAGCAATGACA -TCGCGGTAAGACTTAACAAATGAGCGATGCAGCAGAGCCATAGTAATAGAAGCCGTTCCC -GCTCGATCCGTTCCTGTCATGGAGTCTTCGTCGGCAAGTTTCTCTGCCGAGCGTACGCGC -TCAGACACTTTTCTTGTTCCTGATTTTGCCTCATTCGAGTTTGCCCAGGCAGTTTGTATG -GCCTCGAGACGCTCCTCCGCCGAGCCCTTCGAATTGTCAAAATCAGAACTCACAGTCTCC -AAGAGAAATTCAGCAGGGTTGGTATTCGCTGGAATCGGATAGCCAATGCCGCTGAAGTAG -CTCTCCAAAGCTGAGATTGCACCGAAATAGCATGTTTTTCCTGCCGAGAGGAGCAACAGC -TTATCGAAGAGTTGAAACGTGGTGGTTGATGGTTGATGGATACTTGCAATGATTAGAAGC -TATAATAATAAGAAGACGTTAATAAAAGCAGGGTACATATATGATATGCACGTACATTAT -TGGCCTGCGCGAGCTGCTTTGCATATGACATAACCTCAAAGCTGGCGGTCGAGTCCAGAC -CGCTTGTAGGCTCATCCAAGAACAAAATTCTTGGATTAGTGATGAGTTGGCTAGCCACAC -TCACACGTCGTTTCTGACCACCACTGATGCCCTTCCGGATCGGGGTGCCCACCAACGTGT -TCGCCTGGTTCTGGATACCAAATGCTTCCAAAAGTGTCCGGATCCGATCCATGCGCTGGC -GTTTTGATACAGAGCTGATTGATAGAAGTAGTAAGCGTCAGAATATATAAGCAATATTGA -ATGGTTGTTATTTAGACTTTACATACCTTGACAACGAAAGATCCGCCGCGAACTTCAGTG -TCTCGTGCACCGTTAACGACCCGATCAACACATCTTCCTGCTCGACGTACGACGTCACTC -GCTGAAAAATCCGCGAGTCTACTTCAGCATCATTTACATAAGTCTCCCCAAGAACCTTTG -CACCAGCTGACGCTGCACGTCGAGCAAGTACGTTCAGGAGTGTGGTTTTGCCACAGCCTG -ACGGGCCCATTAAGGCAACGAGCTCGCCTGCATAACCAGCGTCAGTAGGATACCTTTTAT -ATAAAGTATGGACAGGGATACGAACCATGTTGCACATCACCACTAATATCATTCAGCAGA -TCGCGGGGTCTCTTTGTTTCACGATCCTTGACAGTGACCGTTAGACCTTGCCAAGAGAAG -TTCCGAACAGTCTCATTCATTAAGAAGTGCCCGCTGTCATTTTGCTCCAGGTCAGGGGTA -CTGGATGCACTCAAGGGTAAGGGATCGCCCATGCTGCAATATAATGTAAATGGGTTTACG -GCAGATACGAGAGAATTATAATCCAACCCAGCTCTAGGTTCGGGGGCTCACGTTTTATAA -TTTTAAACATACCCATGCTACATGCACCTAGACCCACGGACTTATAAATCCCTGTCTAGG -GTCGAGACAATTACTTTGTCCGAGGTGGTCCCCGGCTTAGGCTGAATGTAAGGCTCAACC -CTGGAACCCATACAATATAGTGAAGGCTCGAGCCCGAGCTGACATAAGAAGTCCGGGCTG -ACATAACCTTGGAAGTTCCCCGGAGCTCTATGGTCCCCGATCCCCCGAGACGCCGGGCTT -CAAGAAAATCTATAAGCCTACAACAGTAGAGAAAATTTCAATTTCAATGGCCCTCTGGAC -ATGTGGGGTACGTGGTATGAGTTCGTTGTGTGCATGTGTGCACGTGTGTACGAGTAATCC -CGAGCTCGGGGATCTTCTCCTTTTTTTTCCCTAACAATACGGAGTACAGCATTCAAAAAA -AGCAAAGAGAGAGAAAAGGAAAAGTAGGTGAAAAATCTAGGAATGATTTGATAAAGGAAG -AAGCATACAATGCCATTTTTATTTGTCCCGCTCTTGGCGCCCAATGCAACTAAATTAAAA -TCAAGGTGAACTAGAGACATTAGTCTTTGAGAAATACTGTGTGAAAACACCACACAGCAA -GGTGAGGAAAATCGGGCAAATAAGCCGAATTTACATCCTGGAAACGAGGAGTCAATAATT -GCTTTTTCAGTAAGGGTGGGGAAAACTAACCGCTCAACAGGGCTCAGACCAAGAACCCGC -ATGCCGTTGTTGAGCACTTGGCGGGCCGACGCATACAGGGCCATACGTGCCTGCTTGACA -TCGGGCTCGCTACCAATCACCTTGAGCACATCGTAGCTAGAGCTCAACATGTGTGTCATG -CGGAACAGATAGCTGAGAACAGTGATGGGCTCAAGAGTCTTGGCGGTTTGCAGGAGTACA -TCGGGCCATTGCGCCAGCAGGCGGACCAGGTCGACAGCATGCTGCTCGGTCAAAAGATCG -AAGTTGGCGTTGACCAGCTCGTCAACATTCAACTCAGACTTGCGGGCCATGGAGCACAGA -CGAGCGTGAGCGTACTGCAGGTATGGGCCGGTGTCGCCCTCAAAGGAGGTCATGGCATCC -AAGTTGAAGTCGTAGCCGTTGACGCTGAGAGAAAATATACGATTAGTAAATGAATAGACG -AGAGTAGGCAGGAGGGGGGGGGaaaagcaaaaaaagcaaaaaaagcaaaaaagcaacaaa -aaAAAGTAAACAAGAACAAAAACAAGAAAATGGTACATACCGCTTGCCAGTCATGTCTTG -GACCATAACGGATGTGATACCCAGAATATCGGCAGTCTTCTCAGGATCCTCAACCTGTGA -ATACTTGACTTCGTTCTTCTTCATGACCTCGTGCATCTTGTCACCAACATCACGCAGGAT -GTCATCCAAGAACTTGACGGTACCCTTACGAGTACTCATACCACGGACCATACCAAAGTT -GATGTGCTGGCAACGGGAGGCCAAGTCCTTATGGCCCATAAGTTCAGTGACCTTGAAGAG -CTGAGCTAGATGGAGATCCTGCTGAGCGGCAACAACGTAGATCATCTTGTCGAAGTGGTA -CTCATTATCACGCTCAGTGATGGCACCGATATCGCGGGTCAGGTAGAGAGGAGTGCCATC -CTTGCGGATGATGATAGCCTTGCCCAGCTTCTTGGCACCGTGCTTGGTGAAGTCCACAAT -AACAGCACCCTCGGACTTCTCTGAGACACCTGCCTCCTCCATAAGCTTGTAGGCATCGCT -CATGCTTTCGGACTTGATTTGAGATTCACCGGAGTAAACATCAAAGTCAATATTGAGACG -GGCGTAGGTCTGCTTGTACTTCTCAATACTGAGATCACGGAACTTGCGCCAGAGAGCAAG -AGCCTCCGGATCACCGTCCTCCATGCTCTTGAAGTAGCGACGGGCCTTCTCGTCCTCGCT -CACGTCCACAAGCTTGGCAAGCTGTGCCTCGAGCTCAGCAACATCCTCGTTCTTCTCCTT -CTTGGTCTTGATCTGTTCCTTCAGTTCCTTGATAGGTCCATCCTGTTCACTGACCTGACG -GTTGATCTTGACATAGACATCGAACAAGTGGTTGATGGGATCTCTGATAAGAGCTTCCTC -ATTGCCGTAGTCCTTGAAGCCGTTGGCCAGCAGACCATACTGCTTGCCCCAGTCACCGAG -GTAGTTCATCTTGATGACATTCCATCCCATGACAGTGTAGATATTGGCAAGGAAACCACC -GATGATGGTACTTCGGAGATGACCAGCGTGGAAGGGCTTGGCGATGTTGGGCGACGAGAA -CTCGATAATGATCTTCTTCTGGCCCTTGGAAGGGTCGGAGGGGTCCTTTTGACCCTGGTT -GCCGTTTGTTCCATAGGCGGCCTTCTCCTTCAGGACACGGCCAAGGACATTGCTGGTCAA -TGGCTGGCGTTTGCAGAAGAATTGAATATGAGCACCAAGCTGAGTCGGGGGGTGAATAAG -GTCGGAGGCGGGGAACTTGGAGACAAGATCCTTGGCAAGCTCCACGGGATTTTGCTTAAT -TTGAAGCGAAGCCACCTAAATGCATGCAACAAGACATTGTTAGTCCAAGGGTTCATGATC -ATGCGGAACATGCGAATTGCGCTTGGAATGTGCCGCCACCTACCGGCAATGAAAGGTCAC -CCTTGTCCAAAGTACTGGTCCAGACAATGCGCGGGTAGATCTTCTGAGCATCGATACCGG -CAGCTTTGCCCAACTCCTCCGCGATATGCTCGCGGTAAATATCCATCGGGTTCAGTGTCG -GGAAGCATCCTGGGAACTGAGATGTCTGCGCCGTTGATTGGAGCGAAAGAGCCTCAACGG -AGGCGGGAAGAGAAGCCGTGGACATGATTGAGAATCCTCGTTGACCGAATGCGCGTATCA -AAGTCGGAAAAGTCGAAGGTTTAGAGGGGCAGCGCAACGGAGAAAAAATGGCTGTGAATC -GCCGAGCCCCGCTTAGACAATGACCCATAAAGAGGGGGAAATCTCTCGGGAATCtttttt -tttttctttttctttttcttttctttctttcttgccttttGACTCAGACTGGACAGGGCG -GTGAATCACGTGGCCATCCGGAAATCTACCACTCAAGGCGGTGAACTGTGCTATCGCTAT -CTTATCGTTATCTTGTTCACTTGCAAGATCACGTGGCTATACATACTTGGAACCCTGGGA -CTTATGAACTTGATACGGAGTGGCTCATAGCTAGATAGCTCATATTACATAGCTCATGTG -ATAATGGCAGTTTCCAGAGTTGATATTAAGCGTAAAACAGCTCAAAGACTGCGGGTCCTG -TAAGTCAACTCCGCATCAATTCTCCTTCGTTTCAGAAATATATCATGTTTCAACTTACTT -TTGACATGATCTAATTTGACGTGTTTGTCATATCCGAGATCTTGGTAGTTCTTTATTTGC -CAACTCTGACCTTCTGTTACACTCAGGCGGCAATTACTTACTTGGTCGTCTTGTAGTAGC -TCAGACCTCCGCTTTGTCCCCATGAAGCATATCAGAAGCTTCATGGTAACAGGTAGATTC -ATCATATAGTTTCACTCGCTCTTCAATAACTAAGCTCTTTTGTCTTTCGGCTTTCCCCTA -TTCGTCGTTATATGCTATTTCTGATACCACTAGGTATTCGAAGGAGGTTTAATGACTCTG -AGGCTCACAGTCTAAGCCTACCCGCTCTGTTTGTTTTCGCGGGCTTTGCTGGCTTCTATT -ATCTCCAAATTTTAGAATCAGTAACAAACATCACAAGGCTTGTCGGAATTACAAACATAC -AATGAACACAGAAAATAATGTCGAAGCTTTGTCTCCTGATTCATATTCATCAAACACTTG -ACAATGGAGTCGAACACAATGGCTTTGTTTCCACCAAAGACCCCAAAGTAGTATGCCATG -GGTTCAATTATACTATATGTGGCCAGTGCCATGTATTTATTGGGCTTCAATTCCCATTAT -CCAATTCATCGGCAAGTCGGCATTCTCCCGCAACTGAAGGAGTAGACTGAAGGAGTGGGT -GGAATTTCCTTTTACATCTGTCGCCATTTTGAGAACAGCCCTAGCAGGCTAACGAACTTT -GGAAGCTAATCAGTATGTCCTATTACATTACTATCAACCACGACCCAGGAGTGGATGAGA -GCCCCGAGAACCTATGGGTGAATCTGTATTTTACTGTCAAAGAATATTTTTACCTTCGGA -CTGATGGGGCGCTGATTTGTTACATCATGGAACGTATCCGGGAGAAGAAATTCAAACTTA -CTTCTGAACCCTGGGATCGACGATCTCGATGGTGTATTTCGATACCAGTCTCCAAGATGC -ATGAGACCAAAGACGGGGCTGCTGTTGATGCTTTACAACTGGCGGAGTGGTACAGGGTCC -AAATTCTGAATGGGAATGCAACTATTAACCGTGATTTGCTATTTGATCGAAAGCCATTTT -ATGATGGCAAGGAATGCTGCAGAGATAATATCTGCGATCTGATACTGCATCCGGAGTGGT -GGATCCCAGAGCCTTCTTCGAATGACTCCTCGGACTCTTCTTCGGATCTTTCTATGGATC -CTTATGTAGATTCCTCTTCGGATTCTTCTTCTGATACTTCTGCGAAATCTTCTTCGGACC -CTTAGTGTTTTAACTGGCTGACTAGAATATGTATAAAATCTTTAAGCCTGACTAGTGCCA -ATTCAGAGTTTTGGAATCCAGAGCTTTAGTAGGTAGCCTTGTCAGTATATGTACGTAAAG -AGTCATTTTTGATAGATCCATTGTAGCGGGCAGCATCACCTGGGGGAGGCTGCCCCCCGT -TAAAAAAAAAAGTCAGATAGTAAAAATTTGAGACTATCTTTGTGGGTATAATGCGCAAGA -CATGGCCTACTAAAGTTATTAAAGATGCCGTGTCGCCCATATGTGATATGCAATACAAGG -AAGTTATAACGTCTGAGTTCGTGACATCCTTCAACCCGGTGTTCAGGTATACAATAGTTG -CAGGAAGAGTTGCAGTTGCAACATTGCCTATGATTTATATATCTTGAACTGAAAATATCT -GCTAAGCCCACGCGAGATTGATCATGGCCCTGGCAAAAAAAAAGACGTGGGAGATATTAC -GTCTCCACCACAGATCCACAGCAGACGGGGGTGGAGAAGAATACGTGGTTGAAGAAATGA -CTAGGCCTTCTAATTGCTAACAATTATTATCGCATGGAACAATTTGCTTCGCTTAGGAGC -GGAACTAGAAGTCTATCGGAACTCATTACCGAGTTTGTGTCTTATATCAGATCAGTTTCA -TATCCAGATCGGCTAATAACAAATTTTAAAGACCTGACCGCGAGCTACACTGTAACTATG -GCGAGATTCACCATAGCATGATTATTTATTTGCGACTTACCAAGAAGCAGAAAGTAAAGA -TATCCATCCATCCTGGGCTGCAATTTTGACTAACGACTAGCTGACAGATGATTAGTTATA -TAGGATACTACTTACATGATAGTACCATGCAGAGGCAACGGCCCATAAAGTTGTGATGGA -CTAGAATCTACAATACTGGGAAATACCCACCAGGGGCAGAAATTGACTGTGTTCTCTTTG -TTGACCGGAGATATTGAAATCTGACTGGATCAAGCAGTGTATCAGACAAATTTGGTGGAG -TCGCGAACCAATGGTTTTGACATCCACACCTTTTGATATCTGAAGTGTTTCAATGGTTCC -GCTCCACTCGATCGAACTAGGCCTTCTAGATATCACCAATTAAGCCTGTTTGTTCCTACT -GCACGCCCATCATACTCCACAACCATGCTGTCAAATCGTACGACACCAAGGCCTGCATTT -CCAGCTGCGGGTCTGATGGCGATGAGTGTCTCGTTCACATATAAGCTGTGTGGGTGCTCA -ATGTCCTGGCCGCACGGTCTCAAAAATGGATGATATCAGTTCTTCTCTTCGCTCAATTTA -GTCACACCTACTAGCATGGGTCTTCATGATAGCTTCCCGGCCTACGGTGGTCGTGGCCCC -AGCGATCTTCGCCTTGGTATTGTTTTGGGCGCGATTGCGACTATCTTCACGGCGCTGCGT -GTTTACGTTCGCATCCGCATGAACAAATTTGGAACTACTTCACTGATATGGGCCCTGGCT -GCTTGGGTAGGGACATTGCACTTTGAATTCCTTGGAGGAAATACTAACTCACTCAAACTT -GAAATAGTTGTTAACAACAGTGACACAAATCTTCGCTATTATTTCCATTCTCCATGGCTT -GGGAAATCATATTTCAATTGTACAGAAAATGGGCGAACTACACAATTATCTTCTGTATAC -ATGGATCACAGTCTTCTTCTTTAATATGGCAATTCCAGTGGGGAAAGTAGCAGTTGCTGC -ATTCCTGATTGAATTAAACTCGCAGAGCAGTATGCAGTCTTCCAATTTTCCCGTCTACTT -TATTTAGCTGACAGTTAGCTTATTGATAGACCCCAAAATACGGCGCAGTCTCATCGTCGT -TGCCGCTTTGAATATCATTCTCAATATTCCTCAAATCCTCATGGCCTGGTTCCAATGCAG -TCCCCCGAGTGCGCTTTGGGATCCTCTTCATCAGAACCAGTGTAATCACACCAGGAGTGT -ATACTATACCTACTTTGTCGGTGCGGTAGCTGCGGTTTCCGATTTCTATCTCGCCATTAT -CCCCATTACCATGTTGGTCCCTTTGCGCATGGACCGGAAGCTAAAATGGGGATTGAGTTT -CCTCATGGGATGCGGTATCTTTGCTGGCGCCGCTGCAATTGTCAGAACATGGGCAGCCAA -GTTCATTATGAGCGATGATTCTTCCTGTAAAGCACTTCATACCCTACTTCACACTTACAT -CGAATAGTTCTCTAACCACCGTCATAGATGGCATGGGCATCCTCTTCTGCTGGGGTGAAG -TCGAAGAATGGCTCGTCCTCATCACCATGTCCATCCCACCAGTCTGGCCCCTCTTTCGAC -CTCTAACCAACCAATTCATCAAATACACAAGCGACCGAAGCCAACCGCAATACAAGTATG -ACGGCTACGGCCAGTCCACCCCAACGACGCGAGGACACCTGGCCGATGGACCAATCGTCA -CAACCACTATCTCGATTTCATCCCACAAAGGTGCGGCAATGACGGCCGGTACCGTGGATG -AAGAGGGATCCGATTCCCTCTCCAGATACGAGGAAGAACCTGAGACATCTCAGATGCCCC -TTGGCTACAACGGTCACCCTGAAGGGTGGATGGAAATGCCCCAATTCAATGAGCAGAAGC -GCACTTGAACTCGAAACTAAGTTCGCTTTCATTGTGACCGGCGTATTATTCTCTGTCCCT -CCAACTTCACCTTCACCTTTCAGCCCAACATAGATCATAGGAATCCGTGGAAATTTTGTC -ATTCTTTCCCGTATTGTAGGCTGAGGGGATTGAAATCTTTTTGGAAGGCACGACATCCCC -TGAGTATAAGCCCCCCCTTCTTCCTCTTCTTCTACGTACCTCGTCCCTTCCCCTTGGTAA -AGTCCTATCTTGATACTTCAAAAGTTGATAGTACTTACAGACAACTTAGTCTTAACCGAT -GCAGAAGTCAGCAATATGCAACTGCAAAACCTTGAGTGTACACATATGAAGGTTTGAATG -GATGATATGTAAATAAAATTATACAGTCCCAAGGCTATTAACGTCGATTAAAAATTCCAG -GTCTTACCAACAAGAGCTAAGAGGATATTTCAGGCATTTGAGCCCCAAAAGACACCTTTT -GATCCCTCGTCGGCTGAGGTCTATTCACAGATCTAACCACGCCAAGCAGAGATTAGACAT -CAAAACAAAAAATGCTTCGCGGCCAGGCCCAGCTAATATTCTATAGCCGATTCATATAAA -TGGCAGAGGCAAGCTCTAAAACAGCAACTTTTTGTTCGCAGCGAATCGCTTAAAAGCAGA -AGACCTGGTCCTCCAGCTTGCTGACGGGGCGCGCCTTGGTGTATCCAAGAGCCTCGGTGG -ACTTCTTCATGGCACTGATCATGGGAGGGGGGCCACAGAGGAAGATCTTAACATCGGCAG -CGGGGGCAGGCAGGCGCTCCTATAGGAAAGTTTGATTAATACCGATGTAGCACAGCCGAG -CACAGTACACCAACTTACCTTGATCATATCGGGAGTCACGAAGCCGACACCGCCGGTCCA -GCCCTCGGGCGGGTTGTTGAGAACGTAGTAGACGCGGAATCCATCGTCCTCCTTCTCCAG -CGCCTCGAGCTGGTCCTTGAGCAAGATGTCTTCGGGGTTGACGTTGGCGAAGATCAGGTC -AACCTTGGTGGTGTCGTTGCCACCGTTGCGAGGACGGTTGCGGATGATAGCCTTGATAAC -CTGCAGCATGGGGGTGATACCAGTACCACCGGAGATCATACCAATGTGACGGCACATGTT -GGGGGTGTACACCATGGCGCCCTTGGGGCCGCGGACCTTCATGTTATCGCCGATCTTCAG -TTCGGTGAGATACTTGGAGATGTTACCCTGGGGGTAAGCCTTGACAAGCAGATCAAAGTA -GCCGGCCTCGTTGTCGGAGGAGATGGGGGTGTAAGAGCGAACAACCTCCTTAGGCTGACC -GGCGATAGTAGCGGCAAGAGAGATGTGTTGACCGATGGGGAGTCCGAGAATATCAGTGGG -ACGGGGAAGAGCAAAACGGTAGATGGCGACATTGTGAGAGATCTCGTTCTTCTCCTTGAG -GACGAAGTCCTGGAATTCGGTGGGGTTCAAGACCTTGCGAGGCTCCGCGTTTACTAGGGT -CAGGTCAGTTCCTCAGCTCCACTGATAGATGGAAGATCAACGAACTAGTGCTGAAAACCA -AATAACCAGCCAGCATTACAGCGAGGGCAATCGCATAGGCGAGTAACTCAGGCTTCACGG -CGTAAACACCCGCGACGAGCACGGCCGAGGGCGCGAGGCCCAAGCCGATAGTGGCGAGAC -TTCAAGGAATCAGCCATTGTTCGGGGATGAATATAGGAATAAAGAGGTAAAAAACATTGA -CTTACCTCATTATGACAGAAACTGGATACTGTAAATCACCAAAAAAAAGCGCgataaaag -gagagaagagaagaagtgagagaagataaaaagaaatgagGGGACCCAGTTGGCTCCGCC -CAACGGATTTTCCCACCTTTTGGGGTCACAATTTCCCCGACTTTCTGTGTGTTTACTTCT -TTGACTTGGGGTGATCGACTGATATTACATTCTTTTCATCTCTTTTCAAAATCAAAATGT -CTAAAGTACCAGAAATCCCTGCTTTTAATCTAACGGAATTGGACCACAAACTCCTGGCCA -TGACCGATGAGGACTTTGTATACCATGACTGGGAAGACCTAAAAAGCATTATCGGTAAGT -GTTCCCCATCTCGAACAAATCTGCCCGGAGAATCCCCCTCATACCATGTACCGTAGCACG -AAATGACCTCGGCGTACTCATGCGGAAGCCTTCCGATCTCCGCCGATATCTGGCCTGGAC -ACATGAGACCAAGACCCAATATGGCACGATTATGAATTATATTTGCCAGCAGCGATTAAA -GTGGCATTTACCGCAAAACACTGCCAGCGGGACCCCTTCCGCCTTCAAGAACCCGATTCC -CTTTGCCGATCCCGAGGACTACAAGATCCTCCGCAATGACTGGCCATATGGCGTCAGCCC -TGGAATTTCACATCTCGTCGTGTGGCTCCGTACTCCCATCCCTGTACAATCTAGTGAGGG -TCATTTGACCGATGAATCTCGTGCGATGATCAACACTTTTGTTCAAAAGACTTTCGTGGA -CCGTTTGACTAAGGATCCTCGAAATTTCTCGGACCCTGAGTCTCAGGTGCTATGGTTTAA -GAATTGGGTTGGTCTCCAAAGTGTAAGGGCATTGGAACATGTTCATATTTTGGTTCGTGA -TGTTCCTGAGGATATCTTACTCGAATGGGCAAATGAGTGAGACTTGGAGGGATTGTTGAT -CGGGAGGCTTTCTTTTGCCTGATTGGTCAGGGTTGCATCATCCGAAGCATCTCACAAAAG -GAAGCGTTTCGCTACGTCGGACATTTCACGGCCTCATTGAGCAAAATAAAATATAAGAGA -ATATATTTGAGAGGCCAAATAGACACAGGCAGTAGTGATGTCCTTCTACCATGAAGGCCG -TGGTTCGTGGTCACCTTGAGGCCTTGCCACATACTCGATCCTTACCTGATCCGTGCGATG -AATCTTGCCGACCACATGGGCATCTTTCCGGAGCTGATCCGAGTTCAAAATGCTTTCCTG -GCTACCGCTCCTCGTAGTTTCAATATTCGTGGTCATCAGGATATCGTCGTCCTCGCGAGT -GTTGTTTGATTTCTTCGTGGGTCCATCCGTGTAAGTACGCATCTTGCCGCTGTCATATCG -CCCGTGAGAGCTCCAGGAACGTTTCCCTCGGATCATTTCGTATATCGGACCCAAGGTTGG -GATGCAGGAGGCCATTATGATGATGTTCGACTCGACACTGATAAACGAGTCAGATACAAG -AGTTCGTAAGATTCTGTATAAGGAGTGATTGAAGATATACCTCGTCCAGATGACGAGATC -TGCCGTTGCGTCTGTTTATCTCATGTCAGCGGTAGATCTCTCCCTTTATGCAAAACAACT -CACAAGTCGAATCTGCCGTGTTATATAAACCTGGCAGCTGTAGGCACTTAACGATGGCCA -TCGCACAGGCTCTGTGTGAAGAATCAGCTACTTACTCCGAACAATAATATGAATCCACTT -ACACGGCACCCAAGCCCAAAGCTGCACATAGGGCCAATTTCTTCTTGAGCGTCATTTGAA -GGCGTAGCAATACCACAGTGGGATAAATTGCAAGATATAGATCAACCACCGCGGACAGCA -CTGGGAAAGCTTTAGTTGTTTGTGTGCGAAAGAAATGCAACTCAACAACATACCTCCAGT -AAAAATTGCATAGCCCACAAGGATCGTGGTCGAACGGCATGTTGCTCCTTCCGCAATGAG -ATGAATCTTCCATAAAGCCCGGGGAGGATCACACATGGTGAAAAGAACGACAATGCAGAT -GGTCGACGCTACGAACACCGAGGCTGTCAACATCCATAGCCACATTCGATGAAACCGGCT -GGGATTCATGATGCGATTTAGGAGAGCGGCGATTGCAAGTTTCGGAGTTGTGAAAGACAT -GATTCCTAGCGCAAAGTTTGCATAATTAATGACTAAAATGCGCTCAACCAAGTCCATGCC -ACCTCGTTCCATCAGCACAGAGGTATGGCTCCCGTAGCCGAGAAGGACATTCACCGTAGT -GAGAATTGTATAACTGGTGACCATGACCTATAAAATTGGTATCTGGCGTAGATCAAGATC -AACCGGAAGATACATACCATTGCTGCCGCGATAATATAGTCATCGAGTCCAATGTTCCTT -AACATCCGGGCTCGGATATATAGTCGCAAGGATACCATCACAACACTTACACTGAAGAAG -GCCCAGAAAACGCCGAGAATGCGCGGACCCTTTGTCTGGTTCATATATTCAGCATGCATA -GTTGCCATGATGCCGAGAGCAGTGGCTGCTCTGTTGCTCAAAAAAGAATCACGACAAGAT -CATTTGCTTATGGCGTCAAGAGGACTAGAAACTGCGGAGGTAAAGAATCATGATTAAAGC -CCAGAAATTGAGAATAATATATTTTTCAAGAGATCTATTACTTCGTGCCGCTTCCAGGGG -GGTCATGTTGAAACGATCTGACCAATGGACCCCCCCCCAAAAAGCAGATACATCAAGCCT -CAAGATCGCAAGTGGCTTGAAGAGAAGTATGATTGACTTTTGGCCCCCAGGCACTACAGC -CTTGGCGTGCGTCCATTAGACGCGAAGTTGTACTTTATATGTACATCTTTTCCCATTTTA -TATTACACATGATAGGAGAGAATTCATGAATTTAGCCCTTGCTCTACTAATTATATTATC -TTCCCTTCGATTCCCTGCCAAAAATTCTACGAAGCTTTGTGTAGATCTCTCGTGGAGACG -TGAGTGATGGTCTTGCACTATGTGGAGCTGTCAATCCCGCAGGATTTTCTCGGTTAGGAC -AATGGCTTCTGGATTTAATGAATTGCGGAGTAATAGAGTAAACTGACCAACCAATTTGTA -CCCAAGATCGCTTAATGTGGGACAACATATGATACTAATATCCGTAGTATATGTACATAG -TGTTCTTGTCAGGGGTTCGGCTATTCTTTACCCGGGCATGTTAGCAGCTAGAATTTAGTG -TAGCACTAGGCACTGTGACCGTGGGGGGTTTCTTTTAATCCCACCAGTTCACCTCGTAGA -TATATATGCCTTGCAGCAAGCACAGTCTATCATGACGTACTCCAACGGAATCTCAAATGG -CTCAGTCAATGATGCGGCGAATGTAGAGCGTCTTGCTCTGTCTTCAAACCTTCCAGATGC -CGTGCCTGGTCTTCTTCAGACAGTTGCGAGCCATGGCAAAGATCTCCTTGCTCAGGATCC -CGAAGCACGAGCCAAGCTGCTAGAAGCTGCCCGGTCATTGACTTTTGCATTGGAAACCCC -CCGAGAAGCAATCATCCGGCATTGCTGGTCCGAGGTGTGTTGCGACTGCCATCTAATATT -CTTTGAAGCAATATGGGAGTATTTAAACTTACATCTACTACATTTGCTTAGTCTACGAGC -TATGCAGCCCTAGAGACAGCCGTTGATATAAACCTCTTCTCTGCCCTTGGCACAGATGAT -AAACCGAAAACCGTCACAGAGCTTGCTCATGCAACCAGCGTGGATCCAATTTTGCTCGGT -TAGTGGTTTGTTTGCATATCAGCCAACACCAATACCTAACAAATTCGTATAGGGCGATTG -ATGAAGCATCTTGCTGCTATGGGAGCAATAACTGAGACTGGCTATAACGAATACTGCCCC -ACTGGGTTCTCAAAGGTCCTCACAGTTGAGAGATATAGCGACGCATTCCCGCTCATGTAG -GCTGGTAATCAGAATTCCATACTCCAAACCACGACTGACAATCCCAGGACTCGCCGTTTC -ACCAAAGGCATCATGGCGCTACCGGCATTCCTCAAAAAGAACAAATACCAAAACCCCACA -AGTCCTACAGATACAGCGTTCCAGATGGGCTATGAAACAGACATGGGTTTCTTTGGACAT -GTGCAAAAGGAGCCAATCACAGCAAAGCAGTTTAATAACCATATGTCAGTCTACGCTCAA -GGTCGAGTTCGCTGGGTGGATCCCGGTTTCTACCCGGTCCAAGAGCAGCTTGTAGATGGT -GTCACCATCGGCAAGGATGATGTCCTGCTCGTTGATGTTGGAGGCAGCTTTGGCCACGAC -CTATCCGATTTCCGTCGGAAATGGCCTGGCGCCCCTGGTCGTCTGGTGCTTCAGGATCTT -CCTGAGGTCGTGGTGTCCGTTCAGGACCTGCACCCCTCCATTGAAGTTACTGGTCATGAT -TTCTTTACTGAGCAACCAGTCAAAGGTACGGAAACAGACGACTCCCCCGTGTCCTTGCGC -ACTGCAAAGAGGCTAGAGTATACTGATCTCTTCTTCTCTCATAGGAGCCCGCGCTTACTA -TATGCATTCTGTTCTTCATGACTGGCCCGATGAGCTTTGTCGCAAGATTCTCGCTAATAC -TGTAGCCGCCATGCGTCCCGGCTATAGCAAGGTGTTGGTGAATGAAAATGTGATTCCGGA -CACGGGGGCCTACTGGGAAACCACCAGTCTGGACTTGATCATGATGGAGATTGGATCCGG -CGAGCGCACGGAACGCCAATGGCATTCATTGCTCGAGTCGGCCGGGCTCAAGATAGTCAA -GATCTGGACTGCCCAGAAGGGCGTGGAGAGTCTGATTGAGTGTGAGCTAGCTTGATATAT -GAGAACATGCTGTACAAATTATAAGCAAATAAATCTGAATCTGCAGTGTGAGCAAATATA -TATCATGTTACTTTGTAAAAATTCGAGTTTAGGCTGGTGAATAATCATCATGCACAATTC -ATTAGAACACGTAAAATAACTACGTTTGAAGAATTGAAATGGAAATAGAACAATCAAAAA -GAAATTCGGTATCAGAGTACGCCAGCCAGCTAGTCCATCGAAGCAAATGTTGATTACAAT -ATACAAAGAGCATACCTAGGCAATACGACGATCAGGTCTTCTGCTCCTCCACGAACTTCA -GAGCTTCTTCACGTGAGACAACATCGATAGTACTGGCCTTTTGCCTAAAGCTCACACCAT -AGCACTTCTCCGCGACATGTAGCATTTCAGGTCGGAAGGTAGTGCAGATAAATTGTCCGT -TGGTCGAATCCGAGATGGACTTGAGCATCTGTGCAACGGCTGTCCGGTACTGGGCATCTA -GGTTGGCATCGATTTCATCGAATAGATAGAATGGCGCGGGGTCGCAAGCCTGGATGGCGA -AAACGAGTGCGAGAGCGCAAAGACCTATGGCAAGTTAGTTAGCAACTACAAGCAAATACA -GATTTGGACTGGAAATGCTTACTCTTTTGTCCGCCACTCAGCTGCTGAATACGTTGTTGA -TCATCGTGCTTGCTATTGAAGCTCACACTGATGCCGACACCAACGTAGTTTTCGACGCTC -TGCCTAGCGTCCTCGTCATCTGAGTCAACATCGTCATCAAGTCGAGTAGCACGGTCAGTT -TTCCTTTGAATGATAAGACGACCACGACCGGCGGGAACAAGTTTCTCGAACACGTTGTGG -AACTCTCGTGAGACCTGCTTGAAGGTTCGTTCAATGGCCTCATCTTTGCGCTGGTCAAGA -ACAGAGATGAGGTCATCAATGGACTTTTGGGATGCATCCAGCTCGGAACGACGTGTGGTA -AGAGTCTCACGCTGCTTGGTAAAACTGTTATACTGTTCAAACGCCTTCTTGTTGACATGC -GAATATTTCTTAAGAGCCTCGTTGGTTTTGTGGAGCTTCTTGACGACCGTGTTGGAGTCG -GTGTTTTGGTACTTGGTGAAAGCTTCATCCGGAAGAACACCGAGGGAACGAATGTTGGCC -GCACACTCAGCACCCTGCTTAGTAAGTGCTGCCTTCTTTTGCATGCTCTTCTCCATACGG -CGCTGATGCTTCTCGATAGATCGAGCGAGCTCTTCAATTTCCCGTCGAGTCTCCGCATTA -CGCTGTTGCAGATCCATCACGCGAGTATTGCCTTCCTCAATCGAGGTATCAACTTCCTCC -AGTCGTGCATTGAGCTTCTCCAATGCCTGGCCAATGCGCTTCACTTCACGCTGTGTCTCT -TTCAGGTTACCCTGAACTTCCTCGTCGGCGATGTCAATGTCCCTGTTGAGTAATTGATCA -AGGCGAGGATTGAGATTCTCTCGAAGCTCAACTTCCAGCACCGACTTGCGGGCCTCGAGC -TCGCTGCGCTGACCCGAAAGCTCTTGGTATTGACGGCGCAGATCATGTACTGTGCTGCTC -AAAGTTTCTAGTCGGGTTTCCTCTTCGCTGGTGAGGGCTTTCTGGAAAGGCGATGACAGT -TCGGTTTGGAACGCATTGACTTGGTCGTTCAGTGCCGCAAGGTTAGACTCAACGTTTCGA -AGAGCGCGTCGCTTGGCATCAAGAGTATCGTTCTTGTTTTGAAGGAGATCACGCTTTGCC -CTCAACTCATGTCGTAATGGTCCGCTACTGTTCTGGACCTGATGTCTCTGTTGCTCAAGC -TTTTGCAGTTCACCAACAGCCTTGGTGATCATCTGGTCGAGCTTTTCCAACTCTTTGCGG -ATTTCCGTACCGCGATTTCTCTTGATCTCATACTCATCCCGCCACTTGGCTAGACTCTTG -ACAGCGTCCAGTCGCGACTGACGGGAGTCGTGGTATCCACCAGTTAGAGCACCACGCTTG -TCTGATCGATCACCCTCTGGAGTGATAGCATTGACACCATGGCTTCGCGCGTACTGTGAT -GCGACCTGCAGGTTGGGGCATATGATAGTCTTGCCAAACACGTGCTGGAAGGCTCTTTCA -TATGCCGGGTCGTACTGCAACTTGTCGATCATCGGTATTGTGTCACTGGCACGAGGCATG -TTCATCGGTCTAGAGCGAAGTCTATTTAGAGGCATGAACGTCACACGACCGGCCTTTTCC -TTTTGTAGAATCTCGAGAACGGTTGTAGCTGTTTCGTCTGTGTCAACAACGTAGTGGAAT -AAACTCTGGCCGGCAGTGACTTCGACAGCGGTGCGATATCTATCATTGACATCGAAAAGC -TCGGCAAGAGTACCGTAGACTCCTTCAAGATTATGCTGGCGCTTGATTCGGCGCACTGCG -GCAGTTCCGCGACTGGTGTTGTGATCCATCATCTGCGAGAGATTGCGCTCGGCACGGTCT -ACCTCTTGTGATGCATTCGAAAGCACGGAGTCAAGTTTTGCTTCTTCTCTCCATAGTTCT -CTGCGGAAACATTAAGATCTAATTCAAGTCAATTAGTCATTTAGGAAAGCTTACTTTCTT -TGGTCCATGAGGCGGTCTCTTTCATCCTTGGCATCTTGGACTTGTTGATCCACGGAATGA -ATTGTGTCACCGCGTCCATCGATTTGTTGTCGCAAGCGTTCGGTTTCCGGCTCCAAAGCG -GTAATCTCGCCTTCTAGCTCTTGGATATCTTCTTTTGTCTGAGAAATCACACCCTGGACA -GTGCTGATAGAAGTATGGCTCTCGCGGATTTCCATGTTCAGCCACTTGTCACGCTCAGAT -TTGTTCTTGAACCGCGAGTTTCGACCCTGCTTAGCATAAAGACGCTGGCGCGAAGTCTCG -GCATCCGTCAACTGTGATTTGACCGTGTCTTCCTGATCCTTCAAGGCATTGAATCGAGGG -AGAATTTCCCGATGTTCGATTTCACGATCACGGATAGCGGTTTGAACAGAGCTGAGATCG -GAGTCGTAGCGAGATTTTAGTGCCTGTGCGGCAACTTGGTTATCAGAAAGAGACTTGGCC -TGCAATTCAGTTTGAGCCAGAATCTTGGATGCTTCACGCCGTTCGTCCTCCAGTTGAACC -TTGTCAACTTTGAGGAACTCGATCTGTTGCCGGCACTCAGCGATCTCAGCCTCAATTTGA -GCCATCGCTTTCTCGCCTTCGACGAATTGGTCACGGTTATTGTCGGCATCCTCAACGCCG -TTCTGCCGTCGCTCTTCAATTTCATTAAGAACCTTACCGATCTCCTGTTGTTCGAGGGAG -TAGATGGTGTACTCTAGACAGCGGCGCTCCTTGTCTTTCTCCTGATAGCTTCGAAGCTCG -TCCTTTTCTTCTTCGAGTTCGGCAAGGCGTTCATTGATATAGTCGAGGAGTTCGTCAATC -TTGGTGCGCTTGCTGTTGGTCTCATTCATGATTTTCAAAGATTCCGAACGACGAGCTTCG -TATACCTGGGTACCGGCGACCTCCTTAAGAAGAACCAGGCGCTCGGAGTCTTTCATGTTA -GTCAGCGCTGTGACTCGGCCTTGAGGCACGATGTAGTAAGGGTTGGACCGAGAGAAACCA -GCCGACTCGAGCAGGTTCATAACATCATTCTTGGTAGCATTCTTGCGGTCGAGAGTGTAT -TCGTCCTTCTTGATACCGATAGTACGACGCAGGACAACCTCGGGCTTGCCTGTCGGGAAT -CGATCGTCGGAGTTGTCGAAGATGATCTCGACGTAGGCGGACATCACAGCCGAACCGGAA -CCTTCCTGCTTGGGGTCAATCTATGTATATTTTGAGTGATGATGTAGTACATACGTGCAA -AAGTGCTTGGCGTTCCTCTCTGCCAAGATGAGTGTAGGCATCGCTGAGGACGAAGCGAAT -GGCCGCGAAGAAGTTACTCTTCCCGGAACCATTGCGGCCGACAATGACGTTGTGTTTGGG -GGAGAATGGCTCAATGACCGTTTGATCTTTGTAGCTGAAATTCATGCGACGTCAGCATAA -GCAATATAAGGACCACGAAAATCGCATTTGGGTGTGGAAGATACCTCTTGAAGCCTTGGA -TGATGATCTGCTTGACATACATCTTGAGAAGAAACGCGCGACGCGATGATCAATCAGAAA -ATCCTCCGCATAACGCGCCTGGCGCAGAAATGAACTAAGAAAAGAATCTGCCGCAAGAAT -CACAGCCAGCAGACCTCTCTACCAATGAACAAACAAAGGCCGATGGGTATCGTGATTGTG -TTGCAAAACCTTGGAAACTGACTACGCGAGGAACGAAATTGGCGCGTCTCGTTGGGCGCG -GGGTTCTGCCGGGCGGTGACCTTAATTTGGGCCACAGAAAAAAAAAACCTTGGGGACCGG -CGGACATATTCAACTCTCTAAATTCTCAACATTGAAGATCCAGAATGGCTTCGATTGAGA -ATAATGCCCAGGCAACTGGTATTGCCGATGGCATCGCACCATCGCATACTTATGTGCCCA -ACGAAGGCTACGTGAACCCTGATACTGCAGACTCCGCCGAGGCGGGGCAAGATCTTCCCG -ATGAGCACGAGGAAGAATATGAGGAAGATGATTACTACGACGACATTTTCGACGACGAAT -TGGATCGTGAAGATATCCTGTCTTCTGATAACTCCGATCTTACCAAAGCATACAATCGCC -AACGCAGAATCAACGACCTTGCTAGCGACTCCAATGTGCCGAGATGGACATACCCCAAGA -CGAACACACAGAAGCCTACAGTGAATACCTCTGCGTCTGTCGATGACCAGGTCAAGTCTC -TTACTCGCCATGCCGCAAAGATCAAGCTAGATGACCAACAGGCCGGCATTTCGGGCCACA -GCAACAAGCATGGAGACAAAGCGGACCGAGCCACATCGGAACAAGTGTTGGATCCCCGTA -CGAGGATGCTTCTACTACAGATGATCAACAGAGGTCTAGTCTCAGAAATCCACGGATGTT -TGTCGACCGGAAAGGAAGCAAACGTCTACCATGCCATGTCCATCTCGCAGGAAGACGAGG -ATGCTGCCCCACTACACCGTGCGATCAAGGTCTACAAGACAAGTATTTTGGTATTCAAGG -ATCGTGATAAGTACGTGACTGGAGAGTTCCGTTTCCGTCAGGGATACAACAAGAGCAACA -ACCGTGCCATGGTCAAACTTTGGGCCGAGAAGGAGATGCGAAACCTGCGAAGAATCTATG -CTGCGGGTATTCCCTGCCCCGAGCCTATCTTCCTTCGTCTACATGTACTTGTCATGGGAT -TTATTGGTAGCTCAAAGGGTCTAGGCGCACCGCGCCTCAAGGATGTGGACTTCAATATTC -CCGAACCAGAGACTCGTTGGCGCGCACTGTATATCGAATTGCTGGGGTATATGCGCGTTA -TGTATCAGACCTGCCGGCTAGTCCACGCTGATCTGAGCGAGTACAACATTTTGTACCACA -AAGAACGCCTGTATATCATCGATGTCAGTCAAAGTGTTGAGCACGATCACCCACGCAGTC -TGGAATTCCTGCGCATGGATATCAAGAACGTCAGCGACTTCTTCCGCCGAAAGAACATTT -TGACTCTTCCCGAGCGGACTATGTACCAGTTCATTATCTCCCCAGAGGGACCGGTTGATG -GGACTGCTGGCAACGAGGAGATGACGAGTGCCATTGAAAAGCTCCTTGCTGCCCGTGAGG -AGGGTGACGATGAGCAGCAGGAGGCTGAGGATGTCGATACTGCTGTCTTCCGTCAACAAT -ATATTCCCCAGACACTAGAGCAGGTGTACGATGTCGAGCGCGACGCCGAACGTATTCGTG -ATGGCCAAGGTGCTGACCTTGTTTATGGAGACCTCTTGGCCGGCGACAAAAAGAAGGCCA -CTGCCACCGATACTGTCGAGGAAGACGTTGAATCTGACGCAAGCGGCGGTGTCTCCGTCT -CAGGATCTGACTCGGATGAGGAAGAAATAGACCCGTTCGCGCCGAAGCCGCCTCGAGGCA -AGCGTTTCGAAGACAAGGACTCCAAGCGCGATCACAAAGCAAAGGTGAAGGAGGAGAAGC -GTGAGCAGCGGGCGAACAAGATGCCGAAGCACTTGAAGAAAAAGCTTGTTTCCGGTTCTA -GCAGAAAGAAGAAATAAGCTCGCGTATTTTTCTGACCTTATCTTTCTTTGCATGTGCACG -AGTTGCGAATTTTTGCACTGTGTCAATCTACCGCTTGGCATGATTTGATGATGAAATGCA -TATATGGATGGAGCATTGGGCATTTGATACCTTGGTCTACACTTGATAGAAATGGGAAAG -CATAAATAAATATATGATGCAAGCTTCTCAACACTGAACCAAAAAAATTCACTTGGGAGT -AGACTACTCATAGTACATGGAGGATATTCAGTCAATTCGAAAAACAGCATCAAGGTAAAA -ACTTATCAGAGCTCTCTTTGGACGTCACTCCAATTCGTCCTCTCGCCATAAATTCCTCAC -TGTGTCAAAGCAGTGCTTACATCCCGCAATCAGGGCAAGAGTATGCATCTGGTAAatgag -catcgagcagggaacatgaggatgagtatgaCGAAAGGTCAGACCTATTGTGTCTTCATC -ATTTAAGTCAGAATCATACCATGGTCATGATGTACTAGTTAGAAGCGGTATTGGTGTGCC -GTATATCCAATTTCCATCTTAGCTACAAATGTAGTGGAAAGTTCAGAGCGAGGCCGAGAA -GGTCAAAGGGAACGGTACAACGCCCGGACAACAATGGGAGGGAAATATGCATTAGATGAA -AGCAACGAGCTGCTGCCGTCAGGTATATTAGGGAAGATCATGGGGATTGCATTTGGAGCA -CGATCTGGCTTCGTGCCTTGACTCGTGCATAAGCTTCCTCAACAAGGCCCCTCGGCAGGT -CAGGGATTCGTTCTCCCTCTGAACTTTCATCTGGTACTCCTGCCTCGATAGTCTCTGTAT -ACTCAGATACACCAGGTTCCTCCGAAGCCTTCTGCGCCACAGGGTCATTATCCAGGTGGC -CGCCTGGGAGGCGCAAGAACCGCACCTGCATTGGAGCTAAGCCAACCCCAGGCCATATGT -CAGAAGGAAGAAAGTCTCCTTGTTCGACACGGAAAGTATCACGAGCAGAAGCATCTTCGT -CGTATAGCTGGTCCAGCTTGTGCCGAGTATCTCGGAGCACCTTTTCTACACGTTGACGAA -CATGACGTCGACGTGCTACTTCCAGCAGCAGGCTATCGTAAGCATTAAGAAATCCGTCGT -ACAGGGAGTGTAGGTCAGTGAGCTCTGCGAGGCCGGAACTGATGCGGTCATTTTCTTCGA -GCCACACGCGCGTGAAATTATGTGCCTGAGAAATGTAACGGGGTAAGCGAGAGGAGGCAA -GAGAGGAAAGGTGGCTGTGAACTTCGATCGTTGCCTTGTAGATGGACAGCAGAGCATCTC -GCTGAGCTTGCACTTGTTCAAATATGGATTCCATCTCATTAATGCGATCTTGAATCTCCA -TAACAACATCATCAGCCTCGGGAGCATCTTTGATCAAGACAGCCACCATCTCCTGATACT -CGGCGTCAGTCATTGGGTCAAGAGGTGCATTCAAATTGGCGTTGATCTCTGCCCCTATAT -TTGGCATTCCATCATTGCTGCCATCAACACCAGTTGACATATCACCCGTGATAGATCGCG -CAAGTGCTCCCCCGCCATCTGTATGCTTCACGGCGGTGACACAGAGATCAAAATGCCGAA -CTAGGGCTTCCAATAGAATGGCCATTTCTTGAGCATGGGCCTCCAGTGATTGGATCTGAC -CCGGCATGAATGACAAAGTGAGAAGGCCGGAATCGGACGCAGATGACGAGGAAGATGTGA -TTGATAATCGGGATGACGCCAACTTGGTGGCTGTGTGGTAGTGACGAAGCGACTGTCTGG -TGGCTTGAAGTTCGTCATCAAATTCACGATTGGATGTGTCCAATTCAGCGCGCGCCGTGT -TAGTGCGGTCGATAGAAGCTTTTAAAGAGGCATGTAGTTCTTCAACACCTCGCTCGTCTA -CAAAGTCATGTAGAGACTTCTGCTCTTCATCACCAGGTCTGAATGACGTGTGAACGATAG -TCCCGCGGAGCAGATCTAGCGTATTTCTGAGCCGAGCATCAGCAGCATCCAGATTTTTGA -GTGCCACTGAAAACTCATCACGACCTCGGTATGATATTTGTTCGACCTCTGCACGAACAT -CGTATAAAAGCCGGAGTTGTCCATTGAGGCCTCGGCGTAAGAAGCCTGTCCGGGAGCACA -CGACAACACTCTTCTCCAATGCCGTGTGTGATGCAGTGACTATTTCATTGGCTCGCCAGA -CATGAGTAATGGAAGATAGTGAACGTTTGGCACCCAGTAAATACGAGATCAAAGTGTCAA -CCTGAGGTAGGGACTGCTGTTCCCCATCATTGAATCCAGCCTGCGGACGAATAGACGATG -AAAAGGACTGGGATGACGCCATTGTGTGAGTCAATTAATAACCCTGTTAATGGCGCATTT -GGACTAGGATAAGATGAAAGACCGGGGTTGATTAAGGGAGAAATTTCTCAGGGGCGAGGG -CAAAATTTGACACAGCTACGAGGGAAAGGGGGTGCTTGCACCACTTTCGCTCCACCGAAA -CTCTAGGACATTTCAGTATAGGCCAATATTGGACAAACCGGACTGTTGCGACCTTGGATA -GAGCCGGAAGGGGGAAGTGGCTCGTCCTTCAACCTGGAAGCTAGGTGTTATCAGGCACTC -GGCACAATAATACCAAGCTGGGGCTGGGGTCCACACGGCAGATCAAGAGCATGAGCTCCA -GCCCAGAAATGACTTGCAAATAAAGCGATAAAGAAAAACAGCCTCTTACCAGTAATTATT -CAGTTCCAGATGTCTCGGTCGAGACCGGATTCCGGTACGTAAATTGTAGACACGGCAAGC -CGTTCACAGAAGGTTTAGATAGACCGCAGTAGGGTGTATGATAACCAACAAAGCTGGGGG -CAGTGGGTCTCCCAAAGGGTGTCGGTGTGAGATAGCCGGACTAGTCTCAATCATCAAGAG -AAAGCCCCGTTTCACCCCAGTGAAGCTGAAAGATAGCGAATCAGCCTGTCGGAAGCGTAG -GAAGTGATCGATTGATGTGCGACGGCGTGGAGATGGCGCAATAAAGATGATCACGTGCAT -ACTCACCGCCCAAATCACGTATGTGCCGCTCTGCCGCTGTCACGCAATCCCCGGACTCCA -TCGATGGCGATATCGACGACACGTCCAGATTGTGCAATTTTTCCTGGATTTTAAATCCTT -CTCTCTTAAAAAACTCCGTTATTCCCAATTTTTCCTTCTCTTTATTCTACAAGTTTCTTC -CAGTCGTTCAATCTCGATAACGCTCTTATCTCAATTTTGCTCTCAGGGTACCCATATCGC -CCGAGTCGATAGCCTTGACACCCTCCCTCTTGAGCCTCTTATTCTGGGGAATTCTTCCAA -GAACCCTGATCGCGACATTGATTGGCATCCTCCCCGTGCTCGTTCATATAAACGATTCAA -CTCAAAAGAACACCGAGATTCACATACCGACCGGTTCTTATCTCGGTGCCACATCAGTCG -CGACCGTCTCTCGCCCGAGCGGTCCCTCCCCCTCCCCCCTCCTTTCTCAACACCGCGCAA -AACCGAATCAGTCATCATGTCCGATACACCGGAACCCTCATCCACTCAACAGAGTCTCGT -TGTCGCAAATTCGGCGGTCAGTGCGAACGCCGACTCGAACCGCTCTATGCCGCGCAAAAA -CACCGACAACACCACACAAACCCTGGAGGAACTAGCGAGGCAAAATGTGGCCCCGTTCCT -CGCTAAATATCAGCCACGACAGTATGCGCCTTTGCGCTCGCAAATATCAACTCCATCCGA -GCAAAACCTAGTGAATGCCAGCTACTGCTATCGTCATCAACCGGACTCAAAATGCAGCAG -ACGCCAAGCCGATGAACAGTCGATGACTCAGCTGCAATCCGATCTCAACTTACTCCCTCA -AAGTGACCAGCAGGGTATTTCCCATGTGTGGTCATTGTTCTCCGCTGCTCCAGCGAAGCA -GCGAACTCTCATGCTCCAGGGAATTTTGGCACAGTGCTGCTTCCCCCAGCTATCCTACAT -ATCCGCCAGTGTCCGCGAACTCATCCGGATCGATTTCCTCGCTGCTCTCCCTCCTGAACT -TTCCTTCAAAATTCTCCGTTACCTCGACACGGCCTCCTTGTGCCGAGCAGCGCAGGTTTC -TCCCCGCTGGAGAGCCCTCGCCGATGACGATGTGGTGTGGCACCGGATGTGCGAGCAGCA -TATCCGTCGAAAGTGCAATAAGTGTGGCTGGGGTCTTCCTCTTTTAGACCGCAAGCGCCT -GCGCGAAGCCAAGCGTGAGATTGAATTACGTGCCACCAACTGGGGCAACAACGAGCCTGC -CGCTGGGCCTAGTGATGCTGCAATGGCCGAGCCTTGCGCGATAGTTGAACCTTTAGACAG -TGGCAAACGCAAGATGGAGTCTGATGACGAAGTCTCCGGCCCCGCAAAACGCCATTGTGC -ATCACCTCCCCAGTCTCCTGAAGAGGACGAGAGTTACTTCAAAACTCGTTACCGTCCGTG -GAAGGAGGTTTATCGGGATCGATTTGTTGTCGGTATGAATTGGAAGCACAAGCGCTGCTC -AGTCAAGGTCTTCAAGGGACACAGAGACAGTGTCATGTGTCTGCAGTTCGAGGATAATAT -CTTGATGACCGGATCTTATGATGCCACGGTTAAGATCTGGGACACAGATACCGGCGAAGA -ATTGAGAACTCTCAAAGGACATATGGCTGGCGTGCGATGTCTCCAGTTCGATGACACCAA -ACTGATCACGGGAAGTTTGGATCGCAGCATCAGAATCTGGAACTGGCGCACAGGCGAATG -TATCTCCAAGTACAATGGCCACGCCGAAGCGGTCATCGCCCTCCACTTTGATTGCACTCT -CCTTGCATCCGCCTCTGTCGATCGCACTGTCAAAATTTGGAACTTCAAGGACAAGTCTAC -TTTCGTTCTTCCCCATCCACAGGGAGTCAACGCTGTCAAAATCGACTCTATTTCACGCAC -AGTCTTGACTGCGTGCGATGATGGTGCCGCACGCCTTTGGGACTTGGACACCAAGACATG -TATCCGAGTGTTCCATAACCACATTGGAGCCGTTCAACAAGTTATTGCTTTGCCCCGAGA -AATTGAACTTGAGAATCACCTCGCCGACTGCGAGAACGACCACGTCAGCACATCTTCACA -AAATGGTGACAATATCCTGAGCACACTTTCCCCTCTTCTCGAGGCCAAGTCTCTGTCTCT -CCAGAACTCACCATTCGGCTCCTCTTTTGACCAGGATCAAGGCCGCGTGGAACCCCCGCG -CTACATTCTCACCAGCGGTGTGGACACAACTATCCGATTGTGGGAAACCAGCACCGGAAG -ATGCCTTCGTACCTTCTTTGGGCATCTAGAAGGTATCTGGGCGCTTTCTGCGGATACTCT -GCGGATTGCGTCTGGTGGCATGGATCGAATGGTCAAGATCTGGGATCCTCGTGTTCCTAC -CGGCCAGGATACCTATGAAGGTCACAGCGCAGCCGTCAATTGCATTGGCCTGAGTGACTC -CCGTTTCATCACTGGTGGTGATGACTACCAAGTTCGCATGTACGATTTCCGGGCCTAAGG -GTCGACGTTTCTTTTCTTTGTCTGTCAAAAATACCCTTGTGGTTAGATCTTTTCTGTCGT -CATGGGATGGGCGTACAGTTGCGACGCTACTTTTTTTTTTTCTTTGGGAAAGTGTGGATT -TTTTTTTCTATAATTATTGCATATCCGGTGGATGATTGGATACCCTGGCGCATAAAGGAC -AGTGGCTATGGCTCAAACACGGCCGTTTGGCCTCATATATTGCGACTGACTGCATCATAG -GCGTTACTGAATTTTGTTCTTCTCCGTTATACGCCTGGTTTCGCATTCATAACATGTGCA -CCTCTTTCCATATGAGTATCAATGAACTCGAATAAATACATTTGTATCTTCAAGCACCTG -TACTAATAGATATAAGTCATGACGTAATATTCTTAACGACTTAAGCAACGTTGAATGTAT -GTACAGCATAAACGCCCAACGGGACAGCCCCACTGAGACGGGGACTAAGACCGTTTGGGA -TTCAAATATTAACCTTAGAAGCTTTTGACTTCGGTGGCTTCTACCTCCACCTTGGAAGGT -TTCTATCGAAATGAAACGTCAGTTCTGGACCTCTAGGCTTTTCAATTTGCTTCTTGTTAG -ATCAACAAtatctatatatgtatgtatgtaCGGGTACAATGTTCTCTTTCTAGTAACCAT -ATGGACGACACTTCATTGCTTCCAAACTTGTTTTCATCCTGCGTTGACTTAACATCGATC -AAACATCAATCCCGAGCTTCTGAATTGCTAGAACATGCATATATCCCGTACCTATTTAAT -GCTGATTTCTAGTCGCCGCGTTCATGATACTGCATGTGGGTTTTAGCGGCAGATAAATCC -AAACTGTGTAGAACACACGCGGGATTTTCAGATTTACTCCAAAAGCCAAGAAGAATTCCC -TTTTTGGTATCAGAGCAAACCTAGGTAAGGTGTCGGTCAATTTTTGATGCTCGGCTGCTG -AAGAAATTAGGACCTCGTCCTCCCACTCCCAGTTAGGATGAGTACCCATCAACTATAAAC -TACTCTAGAGGACTGGTAGAATGTTCACTGGCTTTAGGATTCATGATTCATAACCACTAT -AAAACCCTCTATTGCCCTGTTTGCAACTTTCATCATCCTCTATATCCCAATAAGAAGGGG -GGAAACGACCCTAGTCCGTCCCTCCAACCCAGAACAACTCTGAACGAGAGTTTTCTTACT -AGCATCTAATGATCTTTCGCATGGTGAGAAAATTTGAGCTAAACCAACAAAATTCAAGCT -CAAATTGTGCAGGCCGAGAGCCTGTCGACACAGAAGCCAATTTAGGCCATCTGGAAAAGC -TGAATATCGAAGCTCAAAACACCTCGAGCTGTAGCTCTTTGAACTTTAGCTCCACAGACT -CTAGCGTCGATCTCCCCTCGTGCCCTAACTCTCGGTCGTTTGCTATGACTAGGTAACCCA -GAAATCGCGACAGGTACTGCTGGACATGTGGAGATACTTTGAAGCTACTCCCGACAGTAG -TGAAGTAGGGTTGAGTTAAATTCTTGCTACTCGCCGCTATAGGTGTTCCTGTTGGTTGCC -TCGCTATCCCCCAGGCTAGCAAGTACCACACCAAAAAGGAAAAACATATGAATATCGAGG -TCGCAAAGCTTGACTGCCAGAATACCATACACTCACAGGCCTCGATGATGGCCCTGCCCA -GCGACTCTTCCGAGATCTCGCTGTCTAATGCCACACTGTCCCCTATTGTGACCGCCGCCC -ATGATATCTTCACACATCCCGCACAAACTACCTTTTTCTTCTTACCAAGGCCCTGTTGCT -ATGTTCATGAGTAATGTCGCCTCCCCTGCCCCCTGTATGGGAGTGACGAGGCAAGTGACA -CCATGAATACTATCTCGGACAAGTAAAATAGCGAGGGATTGCCACCAGCTTCAAGTATCA -ACCTTGATCTCTACAAACTAGGGCGAGGAGAGACGCTCTACAACGAAAGGGTCGGTCACG -AGAGAAACCTATGTCATGGATGTCGACATGCTACTCAAAAACATCTCCATTCGGTGGATA -TTTTACCTAGACTTTCTCATCTGCGCTTAGACAGCGGATGTGGTAACTTCAAATAGAGGA -GGGACACAAAACAAAGAATGGAATAACTCCGGTCAACTGATATATTGAATACAGGGTCAG -GCTATGTTGGCTTTACAGCATCTCGATATATACGAAGAGAAATTGCTATTCATCGTACGT -AGTATCACAAACTGTATTGTGTGCTCTAAAAGTCATTGTCTCCTATTACTGTGGCTCTCC -ATTGAGAGATGAATATAATACAATTCAAATCAATCCAATTGTTCCTGCCTTGAGACTTGA -GTACTGTTATCCAAAACCCTAATCAAATTGATTCTTTTAGTCCTCCTGTCTATGGGCCGA -GGGAGTATTTGAATGGGTGTCATTTAATTTTCACTTCCAATTTACTTTCACTTGTACAGC -TCCAGCAATTGAAAGTGGTGATAGCAACATTGGAGGTTGTTGCACTGCCACTTGAACACT -TGACCATATATCTTCACTTGATTTGACCTGACAAAGCTGTGGGTTGATTGCTTGCGACTA -TCATGATTCTATCATTTGTTAATAAAACCTTACCCCAGTTTATCCTCAACTCGCAGCAAT -CCGGCCATGCGTGATGGCATACTCTTCTGAAAGGCTCACGCGAGCTCAGCAGTTATTTAA -CCGTAGAACAAGCGACTTCTACGTACTGCCACACCTTGCTGGCCCATATCACCTGCGAGA -GCGTGCCTCCAAGCGTGCTGCTGTTGTTGCGTCGTGGCATGACAACGACGAAGATGAAGA -CTACAAACCTTCGACTAAACGAACACCGAGCAAGCGCAAGCTATCTCTTCTCCAGGAGGA -TGACCAATCCCCTCGGTCCGAGAAAAGGGCCAGGAGTTTGTCTCCGACTTTGACGCTCCA -TTCTAGACTGAGCCCAGATATAGGAGAAGAAGATGATGTTCCCCCTTCGACACTCTCCCA -ACACAGTGAGCCGGACACAAGGGATAGATACTGGGCGGTCAATTCAGATACAGACACGCC -AAACTCACGCTACAGCCTCCGACGTCGCAACAAGGAATCGGTGTCCGCATCCTCACAGAA -AAATGACGAAACCAAGATCAAGGTCTCGGCATCAAAGCTGCCTAGGGATGTCCTGGCTGC -GGATAAAGATCTTCCGGTGAGAGAGTGCGCCGCCTGTCAGGAACTAGGCATGGAGTGCTC -GCTTGCATCTGACCCTGATCCGCTCTCCTATCCTTGCACAACATGTAAAGTTGATGATGT -CTTTTGCGTTGTGAGCCCCCCACCCAAATGGAAGCGATCATGTGAACTTTGCAAAGGTCG -GAAGAAAGAGCTTTGCTCATATCGCTTTGCTGATTACGATCATTCCAAACCTTGTCTCTC -ATGCCGAAATCACGGGTTCGAATGTGTTGCTGGACCAGCTAGACACCCACCCTTTGCATT -CTTTTCCACGAGTGAGCGAAGCGAGCCAAGCAGTTCTCCAAAGATCAATTCTTCTGCAAC -CTCCAATCCACCCCATGGTTCCTCCGAGCTCAATATTTCCAAGGAATCTAAGCCTCATGG -GAGGATCAATTCCCCCAGGATCAATTCCCCAGCAAGTAGCCCCCCACCACACAATAATCC -GGAAATCGACGCCGATGAGACATTTAAGCGGAGTGATCAACTCACTTCTCCAGCTATCAA -TGTACCAAAGACCCACACTCCAGAGAGAAATCCTAGAAAAATCAATTCTTGGGACATCTC -AGGCCCCCCATTCAGTAACGCACAGCTTTCCATCGTTGACGGGCCGAAGCCGCATGAGGT -GATTGAAATCACTGATTCGGACGACGACAACATCCCGAAGAACCAGAGCTCACCAATTTA -TATCTCTGACTCTCTTGGATCACCCCGTCGCACCACTGGCTCAGATACCATTGCTTCACA -GTCTGCGAACATCCATCGAATTTGGACTGAACTTGCTCATCCTGTAGTATTTCTCGCAGC -TGACCCTTGCGACTGGTGCAACAACTTTGCGTATGGCATAACTGGTCTCGGCCCTCGCAA -CCCAGAGGTTTGGACGGATGAAAACGGCACAATCCTTGAGATTCAGGATGGCCATATTGC -CGAAGGAAAAGAACAGAGCCGGATGTGTGTCTCCTGTACATGGAATCGATGCAAAATCAT -CCAATGCTCCCACAACACTCTGAACCTACTACCCGCGCCTTTTCCCCCGCAAGGTGAAGT -ACGGAAGGTTGCTGCTTCAACACATCTACGCAAAGCAAGTGAGATGCTGACTGATCCCGA -GAATGGGAAGGACGGTACCTTTTACAGAAGTTCCATTTACGAATGGTGTAACCTATGTCG -AGAACCCGCATTCGGGTCTTGCCAGACGGTTCAGCCTGTGACTATCCACACCGATGTGGT -GGACTGTGATGAAGAAACCCATGGATGTGGACTGATGCTTTGTGAGTACTGTTTCGAGCT -CACCAAGCGGTTCAAGGGTGACCTCAATGCTGTCGTTGCATGGGGTCGCGATGACTCGTC -TCACCCTGTATACTACCGAGCCGATATGGAGTTTATCCTCTCAGGGGCAGAACACAACAC -ACTATACAAGCTTTACATGGAGGAGGGTTGATTTGCAGATTGCTTTTTTCCCGGTTATTG -TACCTATCTCACCACTGCTGGCCTTGTTTATAAGCAGCAGCTAAGCTCAGTTGAGAAAAT -GCAAGATTACCTTACTATTTACCAAAGTATGTCTGTACTTTCGCACCGTGTGCTATCTAA -CAAGCTCAGATTCGTTTCCATACTCTAATATGTTTGGTGAATCGTGATAATGAGGGCATT -GTGCCCTAATTGTTCACAATAGCTCAATAGTACCGCAACAGAATAGCAATTGCAAACTAT -GCGTAGAATCATCCCACAAACAAACAAACGTGCATGCAATGAAGAACACAACATATATTG -ACATTTTATCCCATTCACGAGTCTCTCCCATTGAGCGAACTACTCTGAATAGTCCACCTT -CAAGTATAAGACCTCCACAGTTCCGACTTTCCAATCATTCTTCTCTTCTTACCACACATC -TTCGTTCAATATCTTCTCATCCTCTATCTTCTGCTTGTACTCTTCTTTTTTTTTTTTTCG -TTGTTTAGTGATTGTCTTAACTTCATCTACAGACGCCGGTCATCTCTATGTGGATAGCCC -TGGGTTTCGTTTCCGCTCCGGAAAAAAAAAATTACCTTCGGATTTTCTGCCAGTTGAATT -CGCTTCACAACCAAATCCCCTTATACTTCCACTAAAAGGAAGTAAGGGTGCTGCTTTGAT -CTCGTCTTCTATTTTCCCCTGGTTGCCCCGAGCTGCGACACGAGGTGCGCAGCTCATCGC -CACGACGTCTTCACCAAAGCGACGAAAAGTTTCCCCAGAGCCTCGCACCAGTCCAGGCCT -TCGGGCTCGACGTCACGGCCGCTCCGAAATTCCAGAGTCTCGCCACACATCTTCCTCTTC -TTCCCGCGGCCAGAGTTCGTCGTTGTCTCCGCCTCGTTACGTGCCTGCGCATCTGCAGTT -TCAGTCTGGAAGCTCGCCTGGCCGCTCGCTCACCCCCGCGACCGCGACCGCCGGACTCAC -CCTCTCCAGCGACCATTCCGATATGCCATCCGATACTCGAGAGGGTACACCTCCGACTCA -TGGACGATCTCCTTCTCCCGGTGAGAAGCGCCCTGCATCTGAGATCACTGACTCTGATCC -TGAAGGAGGCGTGAGCACTGCTTTCAGTAATGCGCGCACATACATTATTCCCCGCACATG -CGATAGCACAAATGACGAATCCACATATCCTACTACTTCGACCAATGACAACACCGAATT -GGCATCCTCAGCCAGCGAGCTGTCCAGACCAGCATCGCCAAAACCAGGTGATATTCCCAC -AATCGACGAGCAGGTCACTGAGGTGAATGCGTTGATGAAGGCGCCGTTGAAAGATGGACA -AAAGGGCTTTGTGGTTTCAATGGCTTGGCTCAAGAAGGTGCTCGCACGCACTACAGCGCA -TGCGGATCACGCCGACAAAGGATCATTGGAGGGCGATGTTGGTCCCGTCAATAACTTGAA -CATAATGCTCGACACCGACCCAGCCACGCCCAGCTTCAAAGACGAAACCGGTGAAGTATT -TGTGCCGATGCGCCCTGGTCTGCATGACGCTCTAGATTTCGCGATAATTCCGCAACATGG -CTGGGACCTGATCCAGAAGTGGTATGGCCTCGCTGACCAGTCACCTGTGATCATTCGCTA -TGCCCACAACACCAGTCAACCTGGTGATACTGAAAACATCGAATATGAGACTTATCCTCC -GATATTCACCATTTTCAAATTGGCTAACCCCGCTGCGGGTACCACCCCGACCATCTTGCG -ACAAGCCACCAAGCCGTCTGTGAAGCTCTTGGCCAGTCGCCAGACGAATTACCAGAAATG -GTTGAAGGATGCCAAAGAGCAGAGTGGCATTGATATGTCGACCAAGGTCCGAGTCTGGAA -AATTCTCCAGCTTCCGCTAAGCACCACTGCATCGGCATCAGCGACTCCTGCTCCCTCCCG -CAGCCAATCGCCTGCTCCACCATTGGCTCTGATCTCAAACCCAAATGACAAGCTCCTTAT -CGATTTGAATGCTTTCCTCGAACTTCCAGAGGGAAGCCATCGAGTGCTGCTTGATAATGT -GAAGGATCAAACCCAAAATTCAAACTACAACGGCCGTATGACCTTGGATATGGCTGGTCT -CGGCGTGGCGAATTGTGTCATTCTTGAGGAACAGATTGGGACCTCTAAAGGAGGAGAATG -GGTGTCTGAAGCCTCTGCCAAAACTCCCAAGAGTCTGGGTATTCCTGTGGAACAACCGAA -GAACGATATCACTGCTAAGGCATCTGCTGCTAAAGCTGCACTGAAGAGCACACAACCGCC -GCAGCCCAGCGGCAGATCCACTCCTTCCCAGGGACCCGATCCAATTAGGGGGCTCATCTC -TCGAGGTCGGAAGGGTCGACAGCTTATCGTAGGATTGCAAAACCTTGGCAACACTTGTTA -CATGAACTCCGCGTTGCAATGTGTGCGAAGCATTGAGGAGTTGTCTTATTACTTCCTTAG -TGAGTACACCAACCTTAGTGTTGGATCCTACCAACAAGGTGTTGCGGTGCTAACATTAAT -TTACAGGTGGAATGTACAAACCAGAGCTAAACCCCACCAACGTTTTAGGATGTGGCGGTG -TCATTGCTAAGCAATGGGCCAACTTACTTCAGGACCTCTACAAATCGGACCCTCAGCCGA -GATCTGTCAACCCCTATAGGTTTCGTGCTGCCGCTGGTCGGCAACGTGAAGATTTTGCTG -GCTATGAGCAGCATGACAGCCAGGAATTTGTGATGTTCCTCCTTGATGCCCTGTCGGAAG -ATCTAAGCCGCATCGTTGGCAGCAAGCCATCGACCATCATACCTGATTCCACTGATGAGA -TGATCCACAATCGTAAGGCTCTTGAGGATTTCGGAAAGAAGTGTTGGGACCTTTACGAGG -CCCGCAACGCCTCTATTATCACCGATTTATTTGCTGGAATGTACAAGTCAACATTGATTT -GCCACAATTGTGAAAAGACCAGCATCATCATGGACCCTTTCACAATGGTCACTGTTCCTA -TTCCTACGGGCCCGACGCTCATCAATCGAACGATTATCTTTTACCCTCTTGATGGCGCAC -CCGTGTCGTTGAAAGTTCGACTCGATGAAAACGATACTCTGAAAGTCTGGAAGGAATTTG -TCGCCCAAAAGATGGGAATCGATGGGGAACGAATTATTGCAGCAGAAACATTGCACCACT -CTTTTTGGCAAATGTTTTCCGACGATGAGGAGACTTTCGGCAGTCTACGCATTTCCCAAG -AAGACACTATTGTGTTCTTTGATCTTGGTCCTCTTCCCGCATCCACCGATTCTTCTGACT -CTCCCTCCGATGAGGATACAGTCATTGTACCCATCTTCCACCGCAAACTTCTTCCGCGTA -AGAACAAAGAGGCCACTCGTGAACTTTTCGGATTTCCATCCTTCATACGTCTTTCTAGCA -AGGAAGCTCGAGATCTTGAGGCGATTTACCGCAAGCTCCTGGCCCAGGCCAATAACATGA -CTACACGCGACATTCTGACTGCTGAGGAAAACAGCAATGACGAAAATACAGCTGATGACT -CTGACACTGTGGTCACAAACGAGGATGATGCACGATCAGCAGATTCTCGAATCAGAACAT -CTTCTGTCGACGGCGAAGACAGCATCGTTGACATTTCCATGCAAACCGAGCAAGTTAACA -AGCCTGCAGATGGCTCTGATGAGTCAGACTCTGAATCTGAATCTGTTAAGTCTCCTCAGC -ATCCTCTTGCCGGAAAGATTGCTGCCAGCTTGTTGTCACTTTTCGATGCAAAAGTCATGA -GTACAAATGCTACTCTGCCTGATGGACGCAATATTTCCCCACTCAAGAAATTCCCCCTCC -TTTCCTCTCGCATTCCATCTCCTCCCACTTTTACTAAGGTTTGTTTCTTCCATCTTGTCC -GTCTAGATTCAGCCCCGCTAATATTCGCCAGGGATCCGATACTTCATCCCAAAGCTCTGC -AGATGATAGTAGTGACGATGATGTTGAAGTGGATGAATCAGATATCACACCAAATTCAGC -ACCATCCCTCCTTCACCAAGGTGATGCCATCCTGCTTGACTGGACCCCCGATGCTGTTGA -TTCTCTTTTCGGCGGCAAGCCACGTGACCCCGAGGAGCTCCGAGGTCGCCCTACCTACCT -GAGCATCAAGAATGTGTTTGACCACTCCATGGTGGTAAGCCAGAAAAAGCCCAAGCAATC -AAATGTCTCTCTCGACCAATGTCTGGACGAGTTCAGCAAAGAGGAAATTCTTTCGCGGGC -AGATAGCTGGTTCTGTCCTCAGTGCAAGACCCACGTTTCGGCCACCAAGAAGTTTGAGCT -GTGGAGAACTCCGGATATTATGGTCGTTCAACTCAAGCGGTTTAGCCAATTCCGCGGCAG -GCACGGACACAAGATCAGCACTCTCATCGACTTCCCCTTCGAAGGACTGGATTTGAGCAG -CCGAGTCGAAGGCCCAACGGATGGAAAGAGTACGGTGTACGATTTGATTGCTGTCGACAA -CCACATGGGTGGAATGGGTGGTGGTCACTACACTGCCTATATCAAGGACTTTGTGTCGGG -CGCCTGGGTCTATTGCAACGGTTGGATGCCCCTGTCAACATGCTAATCGGAAGATATTTT -ACTAATACCAATTCCAGATACCTCTGCTAAGACTGTCACCAACATGCAGTCCATCATCAC -GGCGGGTGCCTACCTCCTGTTCTACCGCCGTCGCTCTGATCGTGCATTGGGCAACCAGGA -GCTGCGGGAGCTGGTTGAAGGCTACCGGAACGAACAGGAGAGCGTTTCTGGCAGTTCATC -CGGCTCACATAGCCCCGGTGGTTCCCAATCACCTTCAGAAGACGGCGGTCGTGTCCTTGG -CGGCGCATCCCGCAAGGGATCGTCTGCCTTGGCCGGGGCGAGGGTGGCACCCCGAGTGAG -CCGGGGGCAAGACTCCCTTGGCAATGACCTGTACTCTTCCGCCGAGGAGAGTACTTCGGG -CTCGGAAGATGAGAGCAGCGAAGGTAAGAAATTCGGCCTCGAGCACGAAAACGGGGGATC -GCAGGTCGACTCCTTCAGCAAGCTCACGGAACCCTCTTGGTCGTTCGACGCCGCTCACGA -CATCTCACAGATTTCCAGTGGTAATGCTCCCACTGAAGATGGCATCTTCGAGGGTCGATG -TCCATGGGGTGATGGCATGGACATTGACCCTCCTTTTCAGTTTGGTCTCACTTCTGGGGG -TGAGGGGGATGATTCCTCCGATGATCTCCCCGTCGTAGAGCTACGTGTGGGGTCCGAGGA -CATGATGTCCTCGGACATATgaatgatacgaacgaaaagaaaaaaaaagaaaaaagaaaa -aaaGGAGGCGGATGTGTGGGATAGGTGGTGTTTTCCTAGAGGGAGGAGGGGATGGGCGTG -TGGCCCCTACAAAATAAAAAAATGATTTCTTGTTTTAATTTCAATCACATTCTCAGAGGA -AAGTGAAGTAAAGTGAGAATCAGGCCAAGATCAGGATAATGTAAAAAGAATTGCTAGCGA -GGTTGTCGAGAAAGATCTTCTCCATCTCACTCCATCACTTCAATATCAGATCATCAGCTC -CTTACCAAGGGCTTATATCTTCCAAGCGATAACCTCGCCTCTTTTGGAGGACCTCAGGCC -ACGTCAAGACATATCCGGGGTTTGGCTTTACCTAGGCTTCTAGTCAACAGTGGCTGTTAG -CATTGTCTATCCATATACTGTGACCTTGTTTTATGTAGTTTGATGATCATGTTCTTTATT -CTGCTATTCTTGTTAATTTATAGCCTTCCATTCACCACGGGTGATAGTCTCGCGAGTGCC -AAGTTAGACCTCATACATAGAAGACTCATTTTGCACATAGCAGGCCATCTTTACTGCCAA -TTTTCTATGACTTGCTTCCAAGATTTGAATGCAAATTTGAGCAATTCATATCAAGACTCC -CACCTATAACATATCCAATTCTTTCTGTGATTTCAGCAGGACATGCTTCACTAGATACGT -CTCTGAACATTGATTTGCTCTACAAGAATGATACACGGTATAGTACTGGGCGCTGGCAGT -TCTCATAATTATTCAAGTCTATCCTTATAGTGAAATTCCCTGAGTCATACCAGATGGCAA -AGCCAACGAAAGATTGACAACTACTTATAGCATCCGTAGAATCAGTTATCTGGACTCACA -TATCAAGATAGATGTTATCCTACACAAGTCATCTAGTCTGGACGAGGAATCCTCTTTCTT -GAATCAGTTATTGGAGTATGAACAGATAAGATACTTAGATGAAAAGGTTGATACCTCGTG -GCCTTGGACTCTGGGAAGCAAAGTGCCGCAGTCGTTCCTTCCATATTAGGGCACGGTGCG -TGCGAGTCATCCCCTTCCAACAATTACGTCCCCAAATGCCACGCCGTCGTCAATTCAATC -GCGGAACCTTCACACATCCACGCACCGATCCCGAGATGGAACGCACTGTACCTCCACGTC -GGGGGCTCTTCAATCATGAGCCACAACGTCTCAATCCCAATATATCTGCATTGTGTCCAG -ATGTAATCGAAGTCAGCCCGGCCACAGCCAGATCGATGATTGCAAAGATCAACTCGGAAC -ACGCAGATAGATACTTGGATTTCCAAAGCGCTGGTGATCAACTCGAGCTTCTGAGGCTCG -AGACCTTGCGTCTCAAGATGATGGGGTCAATGGATTCCAAGTCCCATCCACCTAGTGAGA -TGCTATCACAGAATACGACTCCCATCTCAACTCAAAGCTTGGGAGAACCCGGTCCCACCG -TCCCAAACTCGATGCGAACTTCCATGCGACCCCGTGCTGCAGTCCTAAGCGAAAGTGAGA -TTAGTGTTCTTGATCTTGGTCCCAGTCTGCACTCTCCCCACCTTTTAGCTCGAGCCTCTC -GTCGCAACTCAATCTCTACAACTTCTGATCATGGGCTACTCGGCCTCGCTCGTACCGCTG -CTCCCAACTTTATGCCGGGGCCTGGCAGCGACATTTCGGACAAACCATCTGACTACCGCC -TCCGGACAGATGACCTTGAATCTGTTGCTCTCACTTCCGCCAGTGGCTCCATCAATAACG -AATTCCCTCTGAATCTACGTCAACGCCGCCAGAACCGAGCTAACGTCACACTCCCCGGCC -CCCATGAGCTGCAAACACAGCGCGTAAACCGTTCATCTACCATCGGCGAGGCTACTCATG -GTGTGAGAGATCAATTTGGCCTTGAGAACAGCCCTACCTTCTTCCCTGATGAGTCTAGTC -GCCGGGTTCACTCGCTCTCTGTCCCACAGTCTCCCGAGTCTCCGCCCCTGCCAGCCCTGC -CTACTCTACGACGTGCTCAGAGCTCATTCAATGGAATGGAGCCGGCATTTTCGGCTATGC -GCATTGCAGAGCGAGTGAGAGGCGCACGCGACGCTGCAGGCTCTCGGGATGCCGATCGGC -AGTGAGACACGGGACGCCGAGACCGAGGTATGAGATATGAGGGTCATTTGACCGCAGGAG -AAAGCGCCGCCGACATTGATTCTTGTTCAGCGGCCGATGGAAGAATATCTGAGACTTTGA -AATATGGGGAATCGGTTATGAATTCTAGGACCCTGCAAATGTTTTCATTCAAGGCTCTTT -ACTTCGAGTCTTTGGCCATACAAGCAACTTTCAGCATCCATCGCCTGTTTATTAGCTGGT -TAACCCCCATGAAGTTGATTTCGAAATGCTTGTTTTGTTGGGCAGTCTCTCGTCTTGAGT -ATATATTGGATCTGACAAGCTCCTCCCTATGGGGAACGTATCTCATTGTAGTAGTCTACT -AGGGTTACACGTATTTATACTGCATACCGGTGACCGGCCGCTCTTGGATCATTGTTCGGT -GGTTGACGTTGCGATCGACGGGAAACCTGGTGAACAAGCTAGAATTATCGCAAAATAATA -ATCTAGGACAAAAGAATATCTATCAACTCAGTTGAGGGTTTTTTCGGCTACCCTTTGAAA -GTATTTGTCCATACTTATTATATCCATAGCCAAGCCTCTTTCATGACGTAATCGCGAACA -CCGCCCTGTACCAGCCACACTGCGACTTCCCCCCCCGCAGATCCCCAGGCACAGCTTCTC -CAACTCAGGTTCCGGTCATAGTTGACCTTATCAAGTTTCCATATCTTCATAATGTGGAAC -GACGAAGATAACAATCCCTACGGGGCATTTAACGAACACCAGTCTGGCCTATCCGACTCG -CTCCACTCAGCCGCACTATCTCCTCGTAAGCTTTCCGCTCAGCTCCCACTCTAACGCTTC -TGCCTTTGGATCAGTATCTCACGCCAGGTAGCCCTCTATGAACATGATTCAACACCTCCA -TCTAGTCGCGACTCCAACAACGAGCCTCCAGACTATATAGCCCATCCCGAAGACCTCAGC -GATCCCGAAGATGAGGCCGAGTATGGTGCCGCCAGTGAACAATATCCCCGGAAGAGTGTC -TATGACAGCCGCATTGAACAACTCCTTTACGAAAACCCCGAGATGCCAATATTGATCACG -GATGCCGGGAAAAATCATGAGGGCGGCGGGGGCTTTATTGTCTACACAATCCGGACTGCG -GTATGAGACTTGTGTCCGGCAGAGAACTGTACAATGCTAAGTCTTGATAGGACCTGGAAG -TGCGCCGTCGATATTCCGAGTTTGCCTCATTGCGACAAACCCTTGTCAATTTGCATCCCA -CCTTGGTCATTCCGCCGATTCCCGAGAAACATAGCATGGCCGACTATGCAGCCAAACCCA -CCAGGGCTAAAGATGATGCAGCCATTATAGATCTCAGAAAGCGCATGCTCAGTGTGTTCC -TCAATCGCTGTAGGCGTATGAAGGAGGTCCGTGAAGATGGAGTATGGTGGAGATTCCTTG -ATCCTAATGTTAGCTGGGTAGGTTCGAGCTGGATGTTCTTGATCACGAGATGATTTACTA -ATGTTTGCTCAGAGTGAGGTTCTAAGCTCCCATCCTGCGTCATCGGTCCCAAAGAATAAC -CTCAAAGCTCCACCTCTTGATCCCGCAAACCCCAGCCCAGCCCATGGCTGGCTCCCGATT -CCGTCTGCATCCGCAAAGCTCAAGCCCACCGGTGGGATTACCACAGCTGGTGGAAACACA -GCGTCTCCCACATCGCCGAGTGAATCAGAGGCGCCTTCGTTCCCAGGACCTGAGATCCTC -GGGCGGTTCCCTCCCGAGTCACGCAAGCTCAGCGAGCAAGAACTTGACCCTTACTTTATC -AATTTTGAAGCGTCTACACGGGAGCTAGAATTACTTCTGCAGGGCAACATTGAAAAAGTC -AATCGGAGAACTCTCTCGCATTTGTCTGCGTTGTCAGCCGATCTCATGGAGTTGGGGGCG -CGTTACAATGGATTCTCTCTTTCAGAGCAGTCACCAACGGTTGCTGCCGCCATTGAGCGG -ATCGGCCAGGCTGCGGATACTTCTTATATCGAAACCGAAGAACTATCGTCGGCTTTGAGC -GCAAGTTTTGCGGAGCCTATGCGGGAGAGTGCTCAGTTTGCTAGTGTTGTTCGAGGAGTT -CTACGATATAGGGTGCTCAAGCGGGTACAGCAAGAAATGACTCGTGATGAATTGTCAAAG -AAAAAAACCCTACTGGACTCCCTCGAGCGCAGTGAACTCGAAGCCAAGCGCATTGAGCAG -TACCTCAACCGCACCAGTCCTCAATCTGGAGGGACAAAGCCTCGTTCTTTGTCAACCTCA -TCCGCGAGCGAAGCAGGCAATAATGCCGCTATGTCAGCCGACTCCGGCTACAGACCTGCT -CACGACGATTCCACCAGCCCTCCACCTGGGTCGCAGACTGGAGGACTCAAGAGGTCTGAC -CCGTCAGTGCCTGGCTCCCCTGCTCATCGGAAGTCATCCAGTGGGACCTTCGTGGCCAAC -AAAATATTCGGTCGCATCAGCCATGCTATCCACGGCTTCGCGGATGTGGATCCCGAACGA -ACACGTCGGGACCAGATAGGGAAGACCAAAGAAAGCCTTATTCAGGTATGTAGACTGTGG -GGTATAAATCTCAGGTGTTTCTAAAACTAAAATCATTTTTTAGTTGGAGCAAGCGTTAGA -AGTATCCGAAAAGGATGTCAAGGATGCGAGCGCGGGCGTTCTGCATGATCTCAAGCGCTT -CCAGAAGGACAAAGAAGCCGATCTTCGACGGTATATGGTATGCATGGCCCCCCACATCAT -TTGCGACTACGACTAATCCTATATTTCTAGGTTGCATATGCTCGTTGCCATTTAGACTGG -GCGCGGAAGAATCTGGAGACATGGACTGAAGCCAAAGATGAGGTTGACAAGATCGTGGCT -CGATAGAAGACCCTACATCATCTTTATGCTCCGGAGCTCGGCGCTGGCGTTTGGGGGAAT -ATCTAACATTTGTTTACCTGTCTGTCGCTGAAAATGCAAATCTGGTATCTTCAATTTAAG -TCTAGGCTGTTCCGTATAGAAAGTAGTAGCCCTGAGGGGCTTTGTCTTAGTTGTACCTTG -TTGTCTGCAACCCTACAGGGTTATGTTCAAAAGGTCTCATCAGAAAAGGGCTAGCTTTTC -AATGAGTTTATATATCCGTTTTCATCTTGACAGGGCATGGGCACATTCCAAATGCTGACC -TATCACGTCCCGTGTGATAAGAGAAGATCGACACTGCAACCGATCGACCTAAGTCATCTC -CCTTGCTTCTCAACTCCATTTACTTGTACATATGATGTGGATTGCGTTGATTGTGTGACT -TCCTCGGAGGTTAATTGCATACTAGCCTGGTCTGACAGCGATTCGGCTTTTGTGCCACGA -ATCCTTTGGTGATTCTTCCTGCTTCTGTTGCGCCAATCCATCTGTATCTGCTATAGAACA -ACTAAGTGCTTCTCTGGGCATTCCGAAACCTTGCAGGCATGTCCAATTGTGGGCCTTTAT -TGAGGGATCAGTAGACTGTCTCGCAATTCTTCCCCCCCTGTCCTGCCTACATCATCAACT -CCACCCTACCAGGGTGGCAACGCGGTGCGTCGACTGAAAACGATGAGTGCCTGGATCCAT -CAGGATGATCGCCAGGGCCGGTCACTTATCACGGTCGATGTCGCCGTCTGGCTATCACGA -TGAAGTTCAACATCGGTCTTGGTTTTCCTGGAAAAGAATACCCCGACGCTGGGTGAAGAT -GCTTCCTTTCCTCGTCTTGGTATGTTTCTTCTCCCTCGGTCTCGGGGTGAATATAAATCG -AAGAGACAAAAGCTTTAAACAAGGCCAGAAGGGGCAAGATCTCGGAAATGACCAGGCCAT -AAGCCGCTTTCCTCCTCCTCGTCAGACACAGCAACATACACATGATCCCACTATTCTACG -TGTGGGCGATGAATATTATCTGTATCAAGTCGGAGAACACATCCTCATCCACACTGCTCC -AAGTATGGCCGGTCCGTGGAAGCGTGTTGGCAGTGTTCTGGATGCTAAGAGTGTGATCCC -AAAGGGTGATCGTGCCGCTCCCTGGGCCCCAACCGTTATCGCCGTCGATGGCACATACTA -TTGCTATTACAGTGTTAGCAAGGCCGGCTGCCGTGACAGCGCTGTTGGGGTGGCCACCTC -TAGCTCTCCCGGACCGGGGAACTGGTATGATCACGGCGCTGTCATCGAGACGGGCACAGG -CAACGGATCGGATATCTACCCTTACACTGCATCAAATTCAATCGACCCTAGCACGCTGGT -CACTCCCGATGGACATGCTTATTTGACCTTTGGAAGCTACTGGAAAGGAATCTTCCAGAT -TCCACTGAGCAAAGACCTCCTCTCACCATTAAGCACCACCAAACCCGATGAGCGTCAGCT -AGCGTGGGAGCCCGAATCACTCGGCTCGCCGAATCGCATGGCCAACAACATCTGTGGTGA -CTCGAGCGGTCCCCATCCCATCGAGGGTGCTTTCATCTCCTTCAATGATGGATGGTACTA -TCTCTGGTTTAGCCATGGATTCTGCTGCAATCTCAAACAGGATAATCTGCCACCCGCTGG -TCAAGAGTAAGTCCTGATAACATAGTGACTTGTCGCTTCATACTCACGTACTTCAGGTAT -TCCATCCGTGTTGGGCGCTCTCACAGTCCCCGTGGGCCATTCACAGACAAGGCCGGTACG -GATCTTGTCAATGGCGGCGGCGAACTTGTATACGGCTCGAATGGAGAAACATATGCCCCA -GGCGGGCAAGCTGTGATCCATGATGGCGATACCGATGTGCTCTACTACCACTATTGTGCG -TTTAGCCCGAATCCCAATCCCCAATTTTGAACCCCGTACTAACTTCATTGCAGTGAACAC -GTCCGTTGGCATTGCATTCCAGGTAAGCTCACACTGAACTGTTGCCAGATTGGTGAACCA -CAAATGTTGTGGTTCATGTACCCCTGTCGATCCTTCTTTGTCATCCCTTGCAGCATATAC -ACAAACGTCATCAAGACCTGATCTAATACATCCAACCACAGGACGCTTTCCTTGGATTCA -ATCCTCTCAAATATGTTGATGGATGGCCCATAGCTCAGGGATGATCTCCTTTTTGCACAT -CATTCACTTTTTGATCTATCCTCGAATTTCCGCTCCGCCACTTCAGACCCCCAACATAAT -TTTCGATCAACTCATCCCTTTCCTTTCGAGAATGGATCTCGAATTCTGCACATCACCGCG -TTTTTGCTTGCTAACATACTTGATTCAGTCGCATCTGACCGTTACTGTTTGTGAATAATG -ATTTACCTTGCCAATACATGCATGATGCTTCTGCGGAAGCGAACAAAAATCCACTGGATT -CAAATGTTATTGAAGTTCAAGAAGACCCGACCCGAAATCAACGAAACTAGGGGAATTCCG -TACCTCACATATCGTTATTTGATACTGTCCAACACCACAAATGGATGGAAATCAAACGAA -CAATAGTGAATTTCCATACGCCGTTCCAATCGAGAAACAAGTACATGGGTAAAAGCACAG -GCTTCATAACTCAACCCATCATCATCAACCGAGACGTATTGTTTCCATCTCCTCGTAAAT -TGCTCTTAGTGGTGCCCCTCTGCCTTAGTTTATTGTTGAACAAGATCCCTTAAGACTTCC -GACGCTTCTTCCAAGGAGTCCAGCAATGATTCAACAGCCCTCCTATACGTGAACGGAGCC -TCTTCTGACTTCTAGGTTTCTGTTCTCCGCCGTCGTCCGAGGATGCCGCTGAATCCTCCG -CCTGCTCTCGCTGTGGCTGATTGGAACTCTCAGGGATATCTGAAATTTCTGGTTGATGTC -CACCCTGGTTGTCAAGTAAGTCCGAAGCTCCATCCACTATCTCCAATTGATCTGGCTCAA -GATTTCTAGGGACCTCTGACGAGGTTGAGTGAGGCCCATCATAATCACTCAGCAGATCCG -ATGTCGCTGTCCTTGTTTGTGTCTCTGACTGGGGGTTGTCAGCGCTTTGACAAACATCTA -ATAACTCCGGCTGCTGTTCTCCCTGGCTGGGGATTAGGTTTGAAGCCCCATCCACTAGCT -CCAGCTGATCGTGTTCAATATTCTTTGAACTTTCAGGTGAGATTAGCTGGCGGTCATGAT -GATCACTCGGTGGGTCCAAGGTCCCTGTTTTCGTCTTCAATTTGGCCTTTTCGGTTCTTT -GACACCCTCCTAATTCCTGCGCCTGTGGCGGTTCATAAACTACAACCGGCTCGGGTGTGT -CGGCTGAAGCTTCTGGTTGTATCTGTCCAGGACTTTGGGGGATGCCCGGGAGGCCTTGCG -GTTGGGGGCCACGCTCTTCCAGTGATTTTATAACCTCAACCTGTACCGGCTGCTGTTGTT -GTTCATGATTTTTCGATAAGCTTAGAGCACCTTCCAGGGCACCAAGTCGTGCCTGTTCAT -GGTCCTCTTGAATAGCGGTTAGCTTAAGATGCGGTGACGCATCGCTGACTGGAAGCTCCT -GTTTTCCCTCTGGTGCGTCGAAACCGCAGGAAACCTTTTATTATCGGCAGCAAGGGAGCC -AACATCGGATTGTTGGCTCTCTTTACAATGACTTCCTGCAGGCGCGGTGCTAAGGTTAAT -TTTGCAAGCTCTCCTTGCTGTGAACTAGTGCCAGTGTGCTGGCCTTCGCTTGAAGTGTCT -TCTTTGGCAAGGGACCCTTCTGCTGCTAACTTCTCTTCAGTTGGTGCTATAGACTCTGGA -AGAGTGGCCTCTGTAGCTTCTAGCTCATATGTTTCTTTATCTGTGTGGCAGACCTTTTCC -AGTACACGGTTCTGTTTAGAAATCAATTGAAGATCCTCCACCTTTGGCTCCCACTCGACT -GGTAAGTCATCCACAGATCCAACAATATTCATTATGTGTTCGAAATCATCGGACGAGCCA -GGGAACGGGGGCTCATACATCAAGAGCTGATATATCTGCCGTGTATCGTTCAGTATATAT -CATATCGACGAGCTCCGGACATTGATGAAAACTTACAAAGCACCCTGTATACCACAGATC -AAGACGATAGTCAAGCTTGTCCCCCATGATGGCTTACGGACATCGCAGATGGTCTGGCTG -CTTGAGTTTACCTGGCTCTTCTCCGCGGAGGAAACTCTTTCCAAACCCAATCAGACGAAT -GTCATATTCATCTTCATCGTCATACCCATTCCATTGGGCGCTCTGAACTAGCTCTTTTGG -AAGCCCTTTATTCAAAGGCATCCCATCACAGCGCTCCAGTGGAACAACTTGTGGTGATCC -CATTACCCCAAAGACGTCTTCTTCGGTGGCATCTCGTAGACTGTTCCGAAAAGGAAACGC -AATATTTGCTCTATTTATACCTATTTGGATGTAAATATTATGCCATTTAGCCGAACGCTT -CATTGGAATTAAGAACATTCGTCTTGGAAAACAGGCAACTCACCTCCATGGCACATTTCC -AAGCTGTGTATGAATCTCATAGCCATGAAAAGCCGCTTGATAATTTCAAATATACGAGAA -CATTCGATTTCCGAACATTGAGTAGAGTTGTTGAACTCAGCATTACCGAATCCAATATGA -TGAGCGTAGTTGTTAAAGTCAATATAAGACTGTTGGATTGATTCGAAGGAAGGGCCGACA -AGTTCAAGCACCAAGCACTGATGAATCCCATTCGGACCCACAAGCACAAAGGTGTCAAGC -AACGTAACGATACATCTTGTGAACAGCACACCTTCCGACTTGTCCATCAAGTATTGCAGA -TTTCAAATCTCAGTCTGTGCCTTGATCTGCGCGGCCAATATCTTCAACGCGACCCATTGA -TATCCAGGAACACGTTCATTGCAACTTGAGCATGTTAGCTGCTGTGTGTGACTCTAGACG -CTGAGGGTGTGTATTACCCTATATCCTTTGCAAGCCACACTGTCGCAGACTCACCAAAGC -CCAGCTTATGGTGTGCTGTGTACCTATCATTCTGAAATTGATCCCCGAGACTGACTGGGT -GAAGGCCACCTGGACGATACCTGTGAATCCCTTCCTGTGTCAAAAGAAGAGGATCCTGTT -TGAAATCGCTGATAGCAAAGTTGGGGTCGGTGTTGATCTCCATGGAGAGTATAGGGACAG -CAAGTCTGATTGAAGAAAAAGAAAAGCGGGCGATGTCAAATATATAAAGATCAAGGGATT -CTTTGTTCGAGCTAACATGAGACTCGATAACCCTAAGTGTTGGCTGTGTAGTGATTCTCT -TTGTTGATATGAGACATGAGCTTCTCATTTGATTCTTTCATGGCTGCATTGCTCATGTTC -ACCTTACGTAGAAGAATCAAAACTGGTAGAAGTCTTTAGTTCTCCTATAAGATACAAATT -GAAAAATGTTATAGAATAGAAATATCCACCACACATCCATCATGTCTAGATGAAGACAAA -TTCATAATGCCCGAGAAACTTGCCATCCACATAGAAAAGACCTTGACTTCACCAATCCTG -TTTCTCTTTCCAGTCGGAAACACTACGATTGCCCCATCCTGCATCTGAGTTTCTGCCTGA -GGGGCTTCGACTGCGATGCCTCTTCTCATTCTCCTCCAGTTCAATCTGCCACCGCTCCTT -TTTCTCCGTATCATTGCCACCGCGGTGTTCTTCTGCATGTCCCCAGTGGCTGGACCTGTT -GCAGCTGTAGTTTTCCCGCTCCCCATGGTCATCGTGTCCCCAGTCTCGTTCGGGGCTTGA -TCGCCCTCTTGACTTTCGCTCCGGGGATGCATAGCGGTAGCCTTTGCTTTGCTTGTGGTA -ACTTTTTCTATCATCGTCTCCCCACCCATAGTCGCGGTCTTGATGATCTTCGTTGGAATA -GTTGTTCCGCGAATGCTTGTATTGCTGTGTACCATCACTCTGACCCCAGCCACGGTGGGA -TTCCACTTCGTTTCTTGACGCTCGGTCTGGGGACACATGGCGGCTGTTGTCCCACCCATC -CTGAAGATTTTTGTTTCCGTGGTCATTGTGGTGTCCGTTGTCCCGTGAAGAGATCCTTTC -CCGCGGGCTATTGTTATCCTGGTCCCAGCCGTGATTTTTCTGGTTGTCTTGTGAGTTTCG -TCGTGGGGAACGTGATGGTCTATTTTTCTGTTCTCCTTGGTCATTGTTGGAATCGCCTCC -CCAGAGGTTATTGTTATCGTTCTGCTCAGTATCTTGATGATTGTCCCAACCATTTCCATC -ATTGTGGTTGCTCGTTTGATCATGTTGTTCCCTTGTGTTTCCCCATTGATTGTAGTGCTC -GTCTTGGCCCGCTTGGGTTTTGGGAGACTGGGCAAGGGGGGACTTCTGCTGGCTCTGGCC -TTGGTTCGTATTTTTCTGTTTCCGCTGATCTTTTCGATTGTAATTGCCCCTCTGCTCATT -CTGGCTTTGCTGTTGATCTCTCTGCTGTCGCTCTTGCTGTCTTTGTTTGTTCTGTAGGGC -TCGTTGTCTTCTTTGCTGCCGTTTGGATAATGGGTTCGGCTGCTGGTTCAAGTTCCGGGG -ACTATAAGTGTTCGCTCGGTTTGGTCCTTGGAAATGTTGGTCAGAATTCGGGACACCGGT -AAATATTGGCTGCCGCGGGTGAACATACTGTGGCGGGCGCAGGACATGGGAGGATCCTCG -GGTCTTTGGGCGAACCCAACCCCGAGGGGTAAAGTAGGGTCCCATTACGGGGTTGTGGGT -TCCATCCGCGGGCTCATAGTTGGCCTGGGCGCGATCCCGGTTTCTTCTGTCAAAGATCTG -GCCTTCTTGGTGCATTCTGGTAAGATGTGATCGATACCAAAGGTAGAACAGCCATGCAAT -GACTGGGGTGACGAACGCTATCGACAGAAGAACTCCAATCAGAACGTTGTTCTGCATGAT -GCGAAGCAGTGAAAGAGGGTCTCGAAGTGATGTGTTTGGAGTGTGTGAAGCTGAAGTGAG -GAGGGTGATGGTCACGTCAAATAATGATAGAATAGTGTAGAGTGCAAGAGAATGAGAAGA -GTGTCAGGAGAAGCGATTCAAGGCCTCAACTAGGAAATGTCAAAGGAGACAGTGAGGGAA -GGAGCAGACTCGTAAAGGCAAACAAGATATATGGATATTGCAATATTCTATCTTATGTAG -TGATCTGTTCATTTTCGATTGACATTTCATCTCAGATTTCCAATCCAGCTTGGATTTTCG -ATAGCCAATCCTCATGGATTGACGACAATTTGCCTGCACTTAAGAAGGATTCGAGAGATG -CACGGAAAGCATCTGGCTGCACCCGCGAACTCTCTCCTTGGAAGACTTGTGAGCTGAGAG -CAGAAGAAATAAGAATAACACGGAGACCTAGACTAAAGATCTCGGGCTTTGAGACCCAAC -CAAGAAGTCCATCTGGCTCTTTCCCAGAGCCAACTAGCGAACAGAGATATGTTAATCCGT -CATATGCCCAGACAGAAGTTTCATCAAAGGAGAGCTTATTGTACGTCGAGTGGGTCAACC -GCAACAGAGCGCGCGACCAAACCTCAACCTCCCGCACATCATCAATAAACAGATTCTGTT -TTTCCTCAACGAAGAGGACGGTGCTTTCTTGGCGATAGTCTGCGATCTGGTCAGACACGG -CAACCAAGTGACTGCGTTGGTCAGATCCACTGATCCGGGGCTCTTGCCCCGTCAAATAAC -GAAGAACTCGTTGCCCTAGGAGTGCGGAATCTGAATAGTTATCAATGATGAAATCGGCAA -GCAGGGCACTTGCATTGAGAGGCGCTAGAGTCACATCTGCGTCGGGTGAGACGGTTGAGT -GTGACAAAACCCAGGAAGCAGCTGATGCAGCCACATCACGAATTTCGTCATCGTCATCAT -TAAGCATGTCATAGAGGACCAGATAGACATCTAGCAACACTGCATCCACGCATGGCTGCG -CTCCGGATGATCTAAGAGCCTGTGAAAAGCTAGCAACAGAGGAAGCTGCAGCAAAACGGG -TTGTAAATTCCTAGGCAATGAAACAAATTAGCTCCATAACCCGATCACAGAAAAAATGCC -ACGACAAGACTTACCGTCTCCTCACTCAAGGCAGAGCGAAGCTTGATGGTCCAGTTGCGG -AGATCGCCTTCAAATGAAGACGCAACTGACTGATCAACAGACCGCCGGATGACCAGTAGA -CAGCCTCGAAGTCGGAGCTCGGCGTCCGTTGCCTCTCGGTTCCAGGCTGTGATATGTGTT -TCTGGGCGGAAGCTATCTGTGATAGCATCCCATGGAAGAGCCATGCTTGACACAGAATCG -AAATGTGAGGATATCAAGTTCTCCATAATAGATGCCAAATTGGACGCGGCCATTGTCTTC -ACATCGGGTACACTATCTCCCAAGATCACAGACGAATACAAGTTAGCCAATGGCCGTCTG -TATCTTTCGTCTTCGCTAATTGACGCGTGGAGCTGCTCCAAAATCCAACGAGCGGCATCT -GGGTCGAATTGCGACACTCCATTGAAGAATGCTGGGAGCTGCTCCCACTGCTGGGAAGTA -AGCATTTTCAATGTTGTACACCAAGCAAGCGCTCTTCTAAGGAGGGCAGATGCTCGGACA -GTGCTTGAAATGTTCCATCCCGCTTGAGAAGAGTCAAAAACAAATGTCAAAATCGCATCA -AGCTCGTGAACCTCGAACATCTCGTTGATGAAAGAAATAACTTGGTCTATAGATTGGTAA -ATTGATTGTCATGGCTTTGTTTCCTTGAATAGATGTCCACTTACTTTCGCTCTGTGCTTG -GATCGCCCGCTCCAGGATGCCATTGAGAATTTCGACAAGCTCCCCTGCCACAGTCGGCGA -TTTTCCAACTCGGAACAGGACTTCTAATACAGTGCGCAACGTCGCAAAAAGTAAATTGAG -GTTTGCTGCCAAACAATTAGCTTGAGTATAATTGTGGGCTGTTCAATGTACTCACATGTC -CAGAAATCATCGGTGGTGAATGAGTATCGACGAAGAGCATATCGTACACACAAGGCAACG -CCATGAACATAATTCTCGGTAACCTTTGAGGGAAGTTTGACAAGAGTGTGTAAATTCTCC -GGGATATGTTGTCGAGTGAGCAACGACGCATATACACGAGCTGCGTGCTCTCTAACACCC -CAAACCGGGCTGCTCAGGTGCCCAATGACCAGTCCGCGTAGCATCTTATCTGTAGGATCA -TCCAAGCTGGGGACTTTGTCGCCAACGAGTTCTAGCGCAGGGAACACTCGCTCAGTAATA -ATTTCAGTGCCCTCGGCCGCTTCACCCTGCGTTGTTGTAAGGAGACCAGAGAGCAGTTCA -AGCAGACCATGGTACTTTGGAAATGTGATCCGGGATCCTGGCTCTGAGCCAGAATCTCCT -CCAAACCCAGGTCCGGCGCCTGGAATTACACGACACATTTTTGTGAGTAGGGCACGGAAG -AGCATAAGTCCTGAATTGCGAAGGGCCCAGATTGGCGACCCTAGACGTTCGGCGGACAGT -GTCAGTGCTTTCATGATGAATGGCTCGGTATGAGGTGCAAGCTTTGCGTTGGTGAAAATG -TCTTTCAGGCAATTCATCGCATGAACCTGGGGCAACTCTAAGTACTGTCTTTCTTTGTCG -TACTCCACGGGAAGGGATGAGATCTCGTGGAGTTCATGAATAGCGTCCCTAAAAAATGGT -GTTCCGGGATCTGAGCCAACAATACCTGTCACCAATGCAGGCAAGCCAGCGGACCGACGG -GTGAGTTGAGATGCGGAGTCAAAGATAGTGGACTTAGCTTCCTGGTACCAGCTCCTAGGA -AGTTCTCGGATTGATGGATCTTTCGAAGAGCTGCATCTTTGACAGCACGTGGCAAATGTT -TGTGACACATTGGAAAATGCACCTCTATGGCGGAGTTCGGCCAGCTGTTTGAAGCTCACA -CCTCCAATATTTGCAAAATCCGTCAGACTCAGACCCTGGTCGCCACTTGGTCCATAACTG -GTGTTTATCAGAGTAGCATGGAGCAAAAGACTTGCTTCGCGAAGAGATCTCCAAGAATAA -GAAAGGGCATCTTTAGGTCCCACATCCAGATCCTCGGCGGGCTCGTCCGAATGACCTTCG -GGCGAGTCAATACACAGCAAAGGCTTCGCTTCGAGCCAGATCTTATCGCAAATTGCAATA -ATACGGTCATGGACAGACCTCCAAGCAGCGGATCCCGACCCGTCAGATATCAGGGAGTGG -AAGCTTGGCATGAGGACAATATACCTGGAATTGCATTCAGCGATATTATCTCATGTACGG -GCCAGCACAACATACCTCAGACCGGACGTATAACCATGAAGCGGCGCCTCACGCATTACG -GAGTTGAAAAGGGTCTTTGAATTCAGAAGACGTTCCTCAAGCTTTTTCAAAATAGTGTCA -ACAACTGATGCCTTAGTTGTCCACCAATCAGAAGCCGGTTGACCGGAGTTTGCTGGTAGG -GCTGCGCAAAATATGATGTGGTATAGACGAGCAACAGTATCAGCATGATCGGCTCTGCTC -GTGTTACTTGCAAGGCTTTCCGCTCGGGCAATTGCATCTGTCAGCCGCATGCCTGCAGTT -GACTCCTGGTTTGTCTTGTTGTGCAATCCGTTGAGCAGAATCTCTCGAGGACACAAACTC -AAGATCGAAAGCGATGTTTGACGAACTTCCTCAAAAGGATCTAGTAGGAGATCAATCAAT -AACCGGAGTAGGTGAGCATCAAACACTTCAACCTGAACTTTCCAAAGATTCTCCATCTCC -GATTTTTGAAGCTTAACCTCTATCCGGGGATCAAGACCGGAGTCTAGCAGGAGCTTCAGA -GCTTTCAGAGCAGTGATGTGGCGAGGATATGAGGCAGCCAGACGCAAGTCATTACCCAAT -AACTTAAGGTATGCTTTAAGAAACTCTGTAGTTTCAGTTTCATTTCTGACCTGGCCTTGA -GCTGACTTAAGGAGTGATTTCTCTGTCTTTTCGGCAACATCTTTATCTTTGACAATGCCA -CCCCTTAGGCGGACAAAGAATTTGCGTGTCAGGCTCAGGACCTCTCCTCGGGTATGTGCG -TCAGATTCCGCATGCATGGAGGGCAGTCCTCGAAGGATAGCCTTCGTTGCCCCCACCGTA -AATGGCTTGACCGTGGAGAATGCTGTGATCAATAGCGAGAGAGCCGCAATACGGATATTG -GGCTCGCGGTGCAGGAGAAAGTTGCCAATAACGTCGCTTTTCATGATCAACACCTCATTT -TCGTCAATTTTTGATTTGCCAACATTGAAGATGTCTGCGCATATTAGCAAATATCAATAA -CTATAGAGTCACAGGACAACTCACAATCTTCATGAACCAAGTTTATTTTCTTGCCGATCT -GCAGAGCAGCAAAAAGAACCATATACTCTGCCTCAGGCGCATCAGTCATATCACCTTTGA -GAAGGCTCTTCACTGGGAGGGTGTCAAGGAAGATCTTGAAGCCATTGGGGTCAATGGTGA -ACAGAGGCTGCAGGAGCCTATTAGATACGTCTTCCAGCGTTTCTATGTTTTGTAATGCCG -TGCGTCTCGCTGGAATTACCCACATCGCTGACAGCTTTTCTGGCGAAGTTTCAGCCTTCA -TCTTCTGTATCAATATTGCCAGTAAGTTGCCAGCAGCTACAGCCATGGTTTTGTTGTTGG -TTTGGATCGAGAGCCCCAGGACAAAGATCCTTGCTGCTAATTCATCGGACAGGCTTCCAG -ATGCAGTGTCAAAAGCAGGGCCTGATTCGCATAAGGACAGAGTCTCATGATCATTGGCGA -AGAGGGTCGGCCATCTATCTGTGTTCTTCAAGAGCCACTGGCGAATCAAGGGGATAAACT -CGCTGGGTAACAGAGCATCCTTTCTAATGAACGTCTCGAGAAACACGAGGGACCCCTTAA -GTCGTGATTGAGGCTCCCCAAGCACAATGCTAGGAGTTAGGGCATTAATAATTGCAGTCT -GGAGGGCTGCTCTTGTCGCTCCTTGGTGACTTTTGGCTAAGATTGCAATGATTGAGGCGA -GAAGCTTTTTCACGGGCTTTGGTTTGGCGTCTTCATATCGAGTCAAACAAATATTGAATA -TTGACAACCACGCTTCTTGTGAGAAAATCAATTGCTTGGTCGCCTCACATTGTGACTTTG -TGGCAGCATCAATAAAAGCAGAAGCAGCGTTGCATGCCTCTGCAGAATGAGGTGGCGAGA -TAGATGCTGTCGATAGCGTGTGAACCAACGACTGCCATACTCGGTTTACCTTTCTTTTGA -GATATTAGAACAAAATTGGCTCTTATACATTCAAACATATACATCACTTACTCAGTCTCT -TGGCATGTCAGTGTGAATTTCACCAATGGGCCTTTTGCGATATCTCGGAGAGTTTTCTCC -GAGAACAATTGCACTTTTTGCGCAAGGTCTTCCATTTCGGTTGATACAACACGTTGTACC -TGATGTGAATCATCAATTGTCATAGCCCCCAACTATACAAGCCTCGATATCTTGGAAACG -GTCCACGCGAAAGACCTCCACGAACGATTAATTTTTATGCTCCCCGCGGTGCCTCAGGCG -CCAAGTGATAAAAGTGGGATTCCGTATCGGGTAAAGCGCATATCCTTCGGCCAAACTCTT -CTCCGTTGGATCGCCGGAAGTCACGGGACCGAAGTTTTTGCTGTTCCGATGCCAGTGGAG -CTGGGAACACAGGGTCATCAACTTCTTTTCCAGTCATCCGCCCGTGCCTTCTCCTTAGGT -CCTTGTAATTTTATCTCCTATCCTCTATCTTATCATATCTCTCTTCACAATGTCGATCTG -GGACTCCCTCAGCGGCCGCAAGCAAAACCAAGGGCCCGAAGCCTTCGACCCTTCAAGCGC -GCAAGATGCTACATCCTTCCTCTCCGAGGTCGCTATTCCCGATCCCTCTCGCCTCCATCC -CCTGGCTGGTCTGAACCAGGATGCCCTCGACTACCTCTCTCTTGACGACTCCGCTCTTGA -TAACCTCCCCGGCTCCCGATCAGTGCTGCCATCCCGCGGATGGTCAGATGACCTTTGTTA -TGGTACCGGAACTACCTACCTCGTTGCACTCGCTACTGGAGGTGCATGGGGACTTGCAGA -AGGATTGAGAAAGACACCAGCTACGGCAGCGCCGAAAATCCGTCTCAATGCTGTACTCAA -CTCGGTTACCCGTCGCGGTCCCTTCCTCGGAAACTCGGCTGGTGTTGTGGCTATGGTTTA -CAACGGAATCAACTCCGGCCTCGGAGTCGTGCGAGGCAAGCACGATGCATCTAACAGCAT -CGTGGCCGGCGCATTGAGCGGCATGGTCTTCAAGAGCACACGCGGACTCAAGCCCATGAT -GATCTCTGGCGGCATTGTGGCTTCCATTGCTGGTGCCTGGGCTGTATGTATATTGTTGTG -ATATGATGTGTGACTTTGCTAACTGTGACTTTACAGGTGACACGAAAGGCCTTTTTCTAG -ATTGCTAGTATGGCCAATTTTATACTTCTCCGCTTCTTGGTTATCCGATTCTCCATACCC -TCCACGCCACCCTTCAACGACATTGGCGGTTGACGATTAGCCGCCTCGCTTTGTCTTTCC -ATCTGCGATGTATTACAACTATTTTTTTATTCCCCCTGTGGGTCTCTTCACTCCGGCGCT -TCAAGCATTATGTTTCGCATTTATGCGTGGGTTGGGAACGGGATGTGCAACTGGGTTTGT -ACTTTGTATTTCTTCTGTGTACATAGAAAAGTCTCTTGCATACCTGCGATTAATTGGCTA -TGGCGCTTACAAGTGTCTGTCAGTAACGTATTTTAATGCTGCTAGTGACTTCCAATAGAC -AATTTCTCATACATCCTTGGCCCAGTATAATGAGCGAGCTTCACATTCCTACTGACATTT -CGGCTGCAACAATCTTCATTCGGTGGGCTTGAGGTTTGATCCCAATGCCACCAGAAAATT -GCGATGTTGTTCGGCCAAATGTTTCGTGGATGGTACACCAAACACCAAACACCTGTATTT -GCCTAACCCCGTTACATCGCTTCTGGTCCGCAATTCTCGAATTACATGCGATCCTCTCGC -TTATGGCTAATCTGAAGGCAATACTTGCCCCTGTGAATCTTCGAGGTGTATAAACCAGCG -GGAATATGCTGTCGGTTGTCGCCCGGAAACCACGGCCTTGTCCAGGGCGAGGGTGAGATA -TTTGCTCCATTTGCTCGGAGTAGGAGCATTACGATGATCTCTGCATTGCATTCACTGCTG -GTACCATTTTTGGAAAAATATTCAAGTTATTGCGGAGCTCTGCGAAAGGTTGGGCTATGA -CAACTGCCGTGCTTCATACAACATACCCTGATCCCCGAGTGATATACCGCAACGCCATGT -GGTAAATAGGAAGAGCTGACAGGACATATTGGTAACCAAGGTAGTAGAACTATTTGGTCA -CCAAGTGCGAGTATGAAGCTTTGCCACTTGACTAACGAGGATCGCTATGCTAGGTACTAT -GATTAGAAACCAGTATTCATTTGTGCAAAAGCTTTATACGCTGTAGTGATATATATGTGG -GAGGACCTGCGGTATCATTTTAATCAATGAAAAAACTTCAGTTCAAGCTTTTCGATTTCA -TCGAAGCCAGTAAGAATACGATATAAATTGTCGGAGAATAGTGAGCGCTCTGATATATAT -ATGGAACATTTGGTCATCATGTGTATCGACCACCTTGGAGGTAAAAATTTCTCTTCCCAT -GGAAATACACTCAAACACTTGTTCATTTACTGGCGCAAGGACATTGAATTGTCTCTGGTA -CATATTCAAAATATTTACCAGCCAAGTTAGGCGCTCATGTGGCGTCGATCAGGGACCGGC -TACTGCGGACGGGGCAAATGGGGAATAATTTTGGCCACCCGCGCCAGAGCTTTCACACTT -CTTCAGATCCTTGGCAATGCCAGATATATCGTCAAATAATACAAAAGTCACCCAACCACG -CACCAAGACTTTCACTGGATGTTGGACTTGTCGAGACCGTCGAGTGAAGTGCGACGAGGA -ACACCCTCATTGTCGGCGGTGCCAGCGGAATGGCTGGGTATGCAAGGGGTATGATTTGCG -CCTAGGATGGTCTCCAATTCCCGGGGGGCGCTCTTACCGACGCAAATTGCGATCACCAGC -ACCAAACGTGGGCCATGGGCTGTCATCTGCTGCTGTGACGGCACTACTGGCTGAGTTAGA -TGGATGTTCGGGGAACAGCTGTGCCGAACAACGAGGACCATTCTCGGTGTTTTCGATTTT -TTCAAGCCCTAAGACTCAAGATGAAGGCAGTGGTCTACTCAGGCCGGAAGATGCATTTGC -AAGGCAGCATGCACAAGATACACAGCATCCATCGTCGGCCTTGATTAACTCGCCAGTTAG -CGACAAATATGGTCACCTGGCTTCGCCCAGACCAATATGGGATAGGTCATCGCCATCGCC -TGCTGCCGCACCGTCCCCGTTTCAAGTCTACCAGGAATCATGCACGGTATTATCAGGGCC -GAATGATCGGGTTGATGAAATCCCCATCAACACAATTGCCCATGTCATTCACCACAAAAC -TCCCGAAAATAGGATGTTAGAACCAAGTGACTACGGCAGACCTGTATCAAATATTCACGG -CGATGGGACTATTCTCTGGGATTCCATGAGTCCATTGACTCTTCCAAGAGCAGAAATAGA -GTTAATCCACTATTGGGTTGTTTTCCTCAGCGGAAACTTGCTCTTAATCGACACTCCGGA -CAATCCCTGCCGGACAGTATTCTTACCTCTAGCTCTGAAAGGCCTCGAAGCCTCGTCAAC -AGAGTCAAACATTCATCTCTCAATTTTCCACGCCATTTGTGCCTCCTCCGCCTTCAGTCT -CTCCCACCTTCGCCACGATTCGCGGTACCACTCCATCGCCGTTCACCATGACCAGCTGGC -TCTGCGTCATTTGCGAGGAAGCCTGCAGCGGGCTCGATGCCTAGATGAGCCCACATTAGC -TGCAGTCCTCGCTTGCATCACGGCTGAAGGTATGTCGGGTCGCCGCAGCCGATGGAGAGC -ACACGTGGCTGGATGCCTTGGTCTTCTAGAGAACGAAGTCTATGGCGACTGGGTTCAAAA -CCCGACAGCTGCTAGAATGATCCAAAGTTACTTGACTCTTTCGTCACTATGCAGTCTTAC -AGTGCCAAAACGGCTCATGACGCTCCTCAATGGCCCCTCCAATTCCCACCACTACCTAGA -ACAATCTCATGGAGTTACAACATCACTGGTCCAATTCCTCGCTCAAATCACCGCACTTGT -TGAGTCTAAGGCACAGTTACCTATTGAGGAACTAGACCGACTCGAACTCCAGCTTTACTT -AAACTTTCCATCCCCGTGCAACCCAGACGCCCCGGGTTCAATCGTCGTCCAACATGCTCT -GACCTCATTTTACTATGCGATAATCGTATACTTCCGTCGTACTTTGCGTGGTGCAAGATT -GAGCGACGTACAGGACCTGGTCGAAAAAGCAATCTGCGAGCTTGAGTCTGTCGATGCACT -CACGCGTGAAAGGGGTGGCCATTCGTATAATTGGGCAGGTTTTGTGATTGCGGCTGACTG -CGAGCGGCCTGATCTGCAAGATCGCATGTTAGCATTCTTTGGCGGCAAAAGCCGACGCGG -GATTCAGAATATCAACTTATTATGCGAAGTCGTTCAAGCCTTGTGGAACCGGCGGGCAGC -AGCTGGATCGCATGTCGATATACAGTGGCAGGATATTGCAAGGGAAACTGATTTTGATAT -CATGCTTGTTTGAATATGGAATTACGCATGCAATCATCAATGCACCTATTAATGTTACAA -CCGTGTACATATATTAGGGCCCACAGAACAAGGAGCCAAGCCATGTGTTCAGGTGGAGCC -AAAGTGGGTACTCGAGCCTTTCCCCAGATACGGTCAGGACGATACTATGAACATCCATCT -CAAATATCTACAAATTAAAGTCACCGGAAAGCTCATTTGCATTAAACTTCACGGGCTGGC -GTCCATCAAAATCGGCAATACGTCAACATATCTCCTTGTTCACAGAATGGCCAGTGCGTT -AGGAGGGCTCGCGACAGCACCAGGAAAATTCAACGGGGACGAAGTAAAAAAGAAAGAATA -CTGCCTACTAGCACTACAAGCATGAGCTAGCGCTTTCAAATCCCAAAGCTCACCAAGCGG -AATACCCAGCAAACTAAGACACCACTGATGCAGCACAAGCTTATGGGTAGGTTGCTCAAT -GCCATCGATGACAGGCGGCATAGCCTCAACAGCGATGTTATCACTAGCAACAGCAGCAAA -ATGCTGATCCCAGAGCCAGCGCGCCATTTCTTCACTACCTTCTAGCCCACACACCTTAGC -ATTGCTCATCGCAGCGCCCTGCTCAGCGCCAGTCATTTGACCCAGCGACTCTGTTACGCC -GAATCGGATGAGGAGGATATCCCCTACTTGGAAGGTTACCCCTTGCCAGGCAGCCACTGC -TTCTAGCTCGCTGGTCCCAATGCGGAAACCGGAGAATTGGTCATAGCTTAACCCGTGGAT -CTGCGCGTAACTCTTGAAGTCGATCAGTACGCCTCGGCCTGTCACACAGCCGCGTTGGTG -CCAGTCTGGTTTTCTGTGTTAGCCATTATCGGTGTTGGCATTTGAGTTAGAGTTCAATTC -TCTTACGGTTCAAGGTTGGAAGACACTCGGCAGATTTGTGAGTCTGAAATGCTTCAATGG -TCGGGTTCGCTCCGTTGTAGACGAGGCCGGTTGGGAGGTGCATGAAGTGAGCTGGGCTGG -GAGTCATTTTTTTATTTCCGAAATCTAGATTTCTTGGACGCACCGAGACTATCCCATTGA -CTACTAGCTTGGGTATTGAACTCGACCTGGAATCAATTAGTTCTGATGAATCTAGAACAA -AAGGATTGCCGAATACCTCATCGTCAAACCCGAAGTGGCTGCCGGATACTGGGTCCTCCA -GTTTCATGACATTATGACGGAGGCTCTTTCGAAAGAACCCTGGGATTTTGATACTTCCAA -TGGGCCAACTAGTGTTGTTAGGATCATGCCTTGGAGATAGTGAAGGGATTCCACTTTAGA -GAAATAGACACGCCTCGGCGAACCTCTGCGACAGCATCCTTGACGACTTCTGGGGTGAGT -AAATTCAGTGTTCCTAGAACATCTTTCTTGGCGTCTTTGTCAAATATGCCCCATGCACAA -CCTTGCGGCATTCCCTCGACAGGAGGGAGATCATCGAAGTTCGGAAGGGATGCCATAGTA -TATTGAATAAAAAGAAAGTCTGATTCAGCTGACCTAAAGGCCCAAAACCGGCAGATCGTG -TGTTGAAATGGCCTGTCACCCTCCCAAGTACTCAGTCGGGGGATTTTCGTAAAATAGAGA -TCAAACAAGAACTCGATTGTTTTGAATGTGCCCGTCAGATTGTCAGATCCGCGTGAGCTA -GGGCTTTGCTTTTGTTTGCTTAAATGTGTGACTGCCCAAGTGCCCAAATGCAGCGTTCAG -CGTCCCGCAAGAGATATTGATTGCAACACTTGCCTCGATAGTGCTACCTATGTCTTCCAC -CTTCTATGTTGCGAAAATATGAATTCAAGGTTACAAAAGAGGCTATTCTCGTCCTACCAC -AGGTAGCGCAGGGCCAAAAGCCAGCCCTACATGATGTTGCAACAAGGCATACCTGATTAG -GAGCGATGTGAACCGTTTCAACCTGCCAGAGGATGTGCACCGCTAGATTGGCCATGCTTC -TGCGCTAGAGATGGCCATGTTCTGTTCATGTACCACTTTTGCAAGAAAGTCCCACTTTAT -GTGAGGGGAGGGCCAGCCATTGTTCATATCCATGATGGTGTTGTAATTTGTGCTGGGTGG -ACCAAAATGTGTAAGCCCTCGTGGGGGAACTTGCGCTAGTTTTGAAGTGGTTTGTAGCGT -CTGAGCTAGTCTCTCGTCTTGCAAACTATATGGGAGGAATGGCTTCATTTGAATGGACTC -AAATGGATCAACCCATATTTTCGGCTGATATGAAAGATGTATATGTTTACACGTCTATCA -AACTAGTAAGCACCCATCGGACCCAGAGACGCTCTTCGTAGGGAAATGATCAATGAGTTC -GCAAAATTCACCTGGTAAATGAGAAGGACATCACAATGGCCAACGCTTTACTCCGTCATG -CTGGTGATAACACACGTGAATTTCGACCGAAATTCCAGCATTTTTGCTGAGGCTCAAGGT -TCACCATAGGTATAGAAGCCCCACCGTGGTCTGGCCGAGTCGGGGTTAACGGGTCGGGTT -CGTACGTGCCGATCCATTCCCAGCCACTTTATTCAGCCAGCAAATGGACATGGCTCTGGG -GTTGGACAGAGTTCTGATTGACGAGATTGTTCAAGCGCTCCATTCCCCGAGTTGCATCTG -ATGTACTCGGTAGTTGACTGTGGCCTTGATGATACAAAGGGTCGAGGAGATCTCCAATCC -CAACACCCTGGAAAATTTCCCGCCTAAAGTCTCGCTTGAACCGTTGCGGCTGCGGCGGAA -GAGGTGTGGGATGGCAAGGGGGTACTATGTACATATGTTTAATTAATTGAAGGGGCTGTA -TGGCACGTTGACCGTGCGGCTGATTCCAAAGAATGAGACACAACGGGATGGACCGGGGCC -ACGTCAACTCTCAGGAATGGAGAATGACCTGCCTTATCGGTTCGGATTTGTCGAGTCAAA -ATTGTGTATAAATTATATCGTACAGAATGCCGTGTTGGATGTCGTCATACGGTCAGTAGT -CATGCCAAGGCGGAGGATCGCATAGTGCATCAATTTTGGGTTCGGGGGCAAGCCACCGCG -AGAGCTAAAATTAGTATCTACATATGTACGGAGTACCGGCAGCCAGGACGCGTTTGGCCC -CTACGTGACTGCAGATAAAGCAGTTCAGGGCCCAGCCGCCGAATTATGGTTGACAAGGGC -CGAGAGCAAGGGTTGAACACCGAGGTATGTAAGAAATAGGAAGCTTACAGCGTATGTACG -CGGTAGGGAGTATAGATATGATCTTCGGTTGGACAAGATACTATACCTGAAGAGGCCCAC -AGAAAAGTGCAGATGATAGACATACCACCTTCCCTTTTGGGAACTAGAAAATACACTCTG -AGCAAACATCTTGGGGCTTACGGTATCGACACTGTACACTGTATGAAAGATATATACTAT -GATGAGATCGCACAGAATGGATTACCGCGAAGTACATACATTTTAGATATGAATATAGAA -GAAAATCAACCATCAACGATCTTTTATGGTAAGATACTATATTTTGCTATATATTAAATT -GCAAGTATAAACTCAACTAAACCTGCGGGGGACACATGGAAAGCTAAACCATATTAGGAT -CGTCAGCTGACTGGCCCCGATCAATTCTGGTTTTTTCTTTTTGATTTTTTGTTTTTCCTC -TCGCTGCATCGCCCCAGCCATGGCGTTTTCCCTGCCTTTTTAGTGCGTTCACTCATTTTC -TTTTGTTCATTTCCCCTAATCTTGCATGTGGTTCCATCTTTTTTATACATAGAGAGGTGA -CCCCTAACGGGTCTTCCCACGCCAAGGCCATTTAGGTCTGGAGCCTTGGGGTTCTTATCT -TTGACCATTTTATATTGATTCTTATATCTTGTGACTTTGACCCCCACATCCCCGGAGGAT -AGAATTCAACGCTCCCGGTCTTCCGTTTGCGGCCGAGTCATCCCCCCTCCCCCCCAACAC -CTTTATCACCTTGATATATCCAAATTCAGTTTATCGACCATGTCAGCAATGTTTCAAGAT -GCAAGCGACAATGAACATACACAGAAAGAGAATGACGGGCCTCTTTCGGTAGACGACCTA -CCTTCCATACTTAATGCCGAATCGCCACTGCCGTCCCTCCAGATTCGCACCGATCTACCT -AGTATTCGTCCATCCAGTTCGCGAGACTCCGACCCTTATGCTTCGACCAGTTTGTCCGGA -AGTTATCCACGTCGGACCCCGAGTCTTCGTGCCCTTTTCGCACCATCTGTACAAAGTGCG -GGCTCCTTGTCCCCGGCATCTTTCATATCCTCGCCTCAATTGAATGCGATGGGTGACATA -ACTCCATTGCCATCGCCAGTTGGAGGAGCTGTGCCGTGGCGTAGAGCTGATGTGCCATCG -TTATCACGTTCTTCGTCGATGGCGTCGCGCAACGGTTCTAGTATTCGCCTGAGTGATTCA -TCGCAGATGCTTGGGCCGCCCGTTCTGTCCCGTTCGCGCCCGAAACCATACACTGGCATT -GACGGACACGGGGAAGATTCATCGGGGAAAAATCGTTCTCGTCACGACTCACCTTCAAAG -CATTCTCATTCACGGAACCGCAGTCTGAGTGACTATGCGCCACCCGGCCGGGTCTCTGTC -CCTCCCCGTCCGATTGCGGTGTCAGGGAATAACGGGCTTGGTATTGCCTCCTCGTCGAGC -ATAGATTCGAAGTCCAATGGACTCCATCGCGAACAACATCTCGCTGTCCACCGAGGTATC -ACATTGCCGACTGTTCGTCCCCCCAGCCCACCTCGTAGCAGTGGCAGTGGGTATAGTGAC -AATGAACCCGTGATCCGTCACCCTGCCCCCCTCTCCACACCCCAGGAGATATATTCTGTT -CGGTCGGTCCGCACAGAGCAGGAACGTATTTATCAGAAAGTCCGAGCATTAGGCCAGGGC -ACATTCAGTCAAGTCAGTCTGGCAGCACGCGTGGAACCTATCCCCGATATGCCTTTGTCA -CCGGATGGTGATGGAGCTGGATCTTTCCACGGGTTTGTGAACAACCAAAAGCTGGTCGCC -GTCAAAATCATCGAACATGGACCCGCTGGAGGCGCGGATGAGGGCCGACTGGAGGTATCA -CTTAAGCGCGAAGTAGAAATCTTGAAGTCTGTCAATCATCCGTCCCTCGTCCAACTGAAA -GCATTCGGCAGTGATGAAAAGCGTGCTTTGTTGGTTTTGGACTATTGTCCCGGTGGAGAT -TTGTTCGAGTTTGCCACATCAGGAGCTCCTCGTATGGCCCCAGAATTGATCAGACGAATT -TTTGCCGAATTGGTGGATGCTGTGCGTTATCTGCATGCTAATTACATCGTTCATCGGGAC -ATCAAGTTGGAAAGTGAGTCAGGTTTGCCAAAATATTGCACACATTATACTGACCATCTT -TCTAGATGTCCTGCTTACCATGCCTGCTCATGTTATGGACAACGTGCAAGACTGGCGTAC -CTACGACCGCGCTGTAGTCACCCTCAGCGATCTCGGCCTGTCACGACGTATCCCCGAGCC -ACCTGAGAGCCCACTCCTGCAGACCCGATGTGGAAGCGAAGACTACGCAGCCCCTGAGAT -TCTGATGGGTCAGGCTTACGATGGCCGCGCCACTGACGCTTGGGCACTAGGCGTTCTGCT -TTATGCCATCATGGAGAATCGGTTACCCTTTGATGTTATCCCTGGCACTCGCGGAGATCC -TGCCAAACTCCGTGCTCGGACTCCGCATCGTATTGCACGTGTCGAATGGGCTTGGTATAG -ATATGCTGACGAGGACGAGGAATGGGATCCCGAGAAAGGCAAAGGACTTGAGGGGGCTCG -TGACTGCGTGGAAGGTCTTCTCAAGCGCAACACGAAGCGCAAGTCTCTTGATGAGATTGC -TGCTATGGATTGGGTGCGTGACGCTATCGATGTTCCTGGTGGCCTGAAACGAGGCGACAA -AGAAGTGCCATAGAGCCGGTCTCCCATGTGCCTTGTTGTCCCGTATTTTGTTATTTTTCT -TCCTTTTTCCTTGTGTGACGTCCCTTCAttctcttccttttcttgactcacatacctttt -ctcttcttttcttccgttttcttatctttCACGAGGCACAATGACTGAATGAGTCATTTT -GTGGATCGAATGTTATGTTTTTATGGCTAGACCGGTTTTTTATACCCTTTTTTTATACAT -CCAAATACCTCCAGGAGGTAGGACACGGAAATACAATATTCATTTTACAATTCTCTTTCG -TTCCCTGCCTACATCTTTCTTTTGGCCCATGTCACTCTTCAAATACGAGTTCCCTGCCGC -AGATATCTCGATTGACGATGGTCACGAAGCCCCGAGATGACCGAGTCAGTCAACTGTCAT -CTGATCTTCGCATCCATGTCACACACGGAGAGCTGGAAAGATATTCACTTCTACAATTAC -GTTGACTAGCATGGATATTAAAGCTACATGGCCTTCTATTTGAGAAGCAACGGTTTCTGT -TTGGTCTTTCGCCTCTAGGCTTGCTCAGTACGTATGAGTGCGTATGATAATCAGACGGAC -TCACGTATAAAATTTAGGTGATTCAACAAGTTGGAGCTGTGGTTCGCGACCCGGCCGTTG -ACAAAACGACTGCTAAATTATTAACAGGCCAGCAAATCGATTTTTTATATGTCTGAGTTT -CATTAGTCTCTTTTCTTTTATGCATTTTATACTTGATCAATATCTGTCGAGGTTTAATCT -CCGAATTTCATGACGTCAAGTGATCACCAAAATCATGTGCTCAGCTCGCAGGTTTCTTCA -CATGCGACCAATCGTATTTGTTGTGGAGGCGCCCTGGGGCTGTGCTCGTATCTCTGAGCC -CTCGAAGCCTCGATGGCCGTGATTTCTGCAAATCAACGTCTTCGACGCAAAGAACCTGCA -TAACTCCTCTTTTTCTTTTTGCTTAATTCGATGTATGCACAAGGACTGAGGCTCTTGAGA -TTTTCGATGGAATAATTCAATCCAGACGCATTTCACTGGAACAGGACGTATACACCATGG -CCTTTGGGCGTTCGAGTCGCTCAAAGAGACCGTCATCTCCAAAGCCCCCTCCAAAACCAC -CACGGCCGAGTCAGCGAGAGAACGCAATGGGAAAGCCACCGAAATATTTCCATCCGCCAT -ATGCGTCGCCGACGGGGGCATATTCAGTGAACACATTTCATACACCTGCAGTCCCATATC -AATCAAATATGGCAATGTCTATGGTTCATCTACAACCTCAGCCTCCACTACAACCAGTCT -ATCCTTTCCCATCTGCTTTAAGCAGTCCACCATGGAGTTCTCAACCGAAACCTTCAAAAT -GGACGTCGTGTACGAATCTGAACCAATCAATGACAAACTTAGTATCGCATACAGTCGACA -AAACCAACGCCACGATTCTTGATCTCGAAAATTTGGTCCGCCAGAGTTTGGGTGATGGTG -CAAATGTCAATGAGGCTACATCACGCTTGTTAGATCAAGTCATCACCTCGATCGACTTAG -GGTCGTTTTGTGGGAGAGAAAATGAACTGAGTAAGTGGTATAGCTCTTCATTGTATCAAA -CAATACTCATTAGTGACAATAGTTTCTGCGTGTAAAATTCCTTCCCAGTCAGACGAGAAG -AAAGATCGCCGAACACAGCGAAAAGAGAAAAAGTCAACAGGGTCTGTGGATTACTTCTCC -AAGGTGTACCTGTATGCCAATTCACGATTACCCCCTCATTTAACACCACTGAAATTGTAA -GATCATCCAGCCCTAAGGTACTAACTATATAGCTCAACTTGCTGATCTCGAATGAAGCTA -TCCTCCAACATATCCGCTCCTTCAGCTCGCAGCAAAGTACTCGCGCCGTGTCTACGACAA -ACCCTCAGGCCGTGAACGACATTCATACGTGAGTTCAGATTGGCTACAGGGCACCAAAGC -CATGGTGGTCAAGTCGCTACCAATCGATGACATGAACACAATTGTCTTCGCCATACGTGG -TACACAGAGCTTCCTGGACTGGGCTGTGAACGTTCACACGGCACCCACCTCTCCCATTGG -TTTCCTAGATGACCCATCCAACTGCTGTCATTCCGGATTCTTATCCGTGGCCCGAAAAAT -GGTTGCCCCCGTTGCAGCCAGACTACGTAGTCTTCTCGAAGAAGATCCCTCTCGCATGTC -CTATTCGCTAGTCTTCACCGGCCACTCCGCAGGCGGAGCCGTCGCAAGCTTGCTATATTT -ACACCTCCTATCCGAGTCACCAGCTGTGCGGTCCGAGCTTACTCATCTCCGCGGCTGCTT -TAAGCACATTCACTGTGTTACATTTGGTGCTCCACCCATATCTCTTCGTCCACTGCAACT -CTCTCCTACTGCTCGACGATCAAAATCCATATTCTTTGCTTTTATCAACGAAGGAGATCC -GGTCTCTCGTGCTGACAAGGGCTATTTTCTCTCGTTACTAGATCTGTATGTCTCACCTGC -ACCGGGGTCACTACTAGCCTTGTATGATAGGAAGAAGAAATCTGCTCCTATCTATTGGAG -AACACCTTCATCCGATTTATCACTCGCTGGTCGACTCGTTCTTCTTCGTCCACGAGATCA -GTGGAGTGGTCGTCCTATTATTCTAGCACCACCAGTCCCCGGTGGTGCTCCGGCCCTGCC -GCCACGGGAAAACGTGGATGCATATGGGATTACAGATCCTGAGCTCCGAGGGGTGGTCTT -TGGTGACCCCGTCATGCATTTTATGGACTTGTATTCTCATCGCATTGATGCCTTGGCCAG -AGGTGCCATGAGCAAACCATCATGATGTATCACAATATGTATCACGATCTAATTTACGGA -GCTTATGTTTCAATCTAAAAAAAAACACTGAGATACCTAGTATGCATTGACTCTTTTGAA -TCGTGGCAGATCTGTTCACTCTTTATTATTGGTAATGTTGATATTGCTTTGGCTGTCCTG -AAATACTCCTGTCGCCTCCATCCCAAGATGTGCCAGTGGTCATTGCTACATTCTGAGACT -ATGTTCAGTGTATGCCTTTGTTGACCTAGTGTAATGTATTATTTTCCATCTAAAAGAAAC -AAAGGAACGGACTAAAGGCCAAAATCTTTGAATTCAAAGATCAATGAAAATTTTGTACTC -TCCAACTTGAAATCTACTTCCATGAATATATAACTGTTCATTTGGTCCTGTTCATGGATT -CTAAAGATGGCAAGCCAACCTCCGAGCCGAACCCACTAAAACCCCAAGTACACTGTATGG -TGTCTGAGCTCGTTCCATTGACTTAAGCTACGGAAAATCCCCCTCAAAATGTGTGTACAG -ACTTATGATATGTGGACAATGTGCAACTGCATTCAATTAGGACGATTTCAGCGCTGTGAA -GAGTTCGGCGTGCCATTTTTCAAACGGGAGCGCTGCACAGGACGTATTTTACGAGCTCTC -AATATTGGCATAATTGGAATCCCCGAGGAGCGGCTTAGACACAAGGCGGGAATGTGCTCT -GAGTGCTCGGAGAAGTTGTATCAAGAAATGTGGGCGCTTCGTTGGGAATGGACGTCTCGT -CAGAGTGAAGAGTGGGCGGCGAAGCAGCGCGACAATGAATAGAGATACAAATACAACGCA -TTGAAAGCCATGAGCTTATTATCAAAGTAAATATGCGGTATTGACAAAAGGCTTTTGAGA -AGAATTTACGAAAACAAGTTGATTATGTCTTGATTCGATATCTACTGCTCAACGTAAATC -TGGATTTAGAAGACCAGACTGTTTGTTACAGAGACTCCTGGGAATGATTTTCCCCCTTCA -TTGCGGGACTTGGGCTCTGTGGATTCCGAAATACATTTCTTGGGTCAACTTGTTCTTTGA -TCTGCTGCAGTCGGGGGAGATTGGTGCCCCAATATGCCTCGGGGGCGTTGCTCATAAGAG -GATCAACATAACCCGGGTAGGCAGCGTAAGGAAGATTGTCATTTGTGACGATCTTATGGA -GTCCATCAAGGAAACCGACCTGCGTCTGGGATATGTGTCCAAAGAAATCGAGTGTGTAGG -ACTGTAGCCAGATGAGCACATCACGATGGGCGTAAGATGTAGCATTAATTGGGATGTCGT -TGGTGTAACCTCCCTGAAAATCGAATAACAGGAACCAACCCAAAGTGCCCTTGTTGACGG -TACCAATGTAGTCAAACATTCTGTCGATAGTCTCCTGGGTCATTAGAGTCTGGGGCGTCC -AAGAGGTAGACTTCGCGTAGAAATTTGTTGGGATTCCAGCGCCCAACTTCAAAACAAGGT -CCTCGGCCCAGTGAGCGACAATACCTAGCCAGTCATTAAATACCAATGCGCTGCCATTTG -CACCGGGAAATTTCCTGCTGAGGTTCATCTTATCATACTCGTCTTTTGTGCCAAAGAAGG -TACCGCTGATTGCCATACTATGCTCGAGCACAGTGAGGGTAGATGCCATCTTTCGAGTCA -ACTTCGGGTCTGACACAAAAGCCTGCCAGCTCTTGAAAAGGGCCGCCTGCTCCTTGGGAC -TGCCGATCTCCAAGCTGTACGCATATTGCACCGCAGTACCAGGCTCCAACTCAGTTCGGA -CCTTGAACTCCGTGACAATACCATAGCCCGAGGCAGCACCTTTGATAGCCCAAAAGAGGT -CCTGATTTTCCGTGTCAGATGCTCGGACAATACTCGAGTTGGCGAGAACCACCTCAACCT -CCAGGACATGGTCAAGTGCGGCACCAAATTGCCGCGAAGTCGGGCCCAGACCACCAATTG -TGAAATGTCCCCCCGAACCCACTTGAGGACAAATTCCGTGAGACATAGTCCGTCCTCCGG -CGTGGTACAGCCGCTGTGTCACGTCACTCAATAAATTTCCCGCTCCAATCGTCGCTTGCC -AGGTTGTATTGTCCATAGAGAATTGTTGGAGATGCTTCAAGTCAATCGCAACGGCACCGT -CCGTACCTCCAAGGCCTGTCACATATCTCATATTAGCCGCCAGGTTGAATCACTCTGCAA -TGAGATAATTGCAAAATCTCACCATAATTGCCGTAGCTGTGTCCTCCACTTCTGGCCTGG -ACTGGAAATCCGTGCTCTGCGGCACACTTGACAATGGCAGCCACCTGCTGAGAGGAGCTG -TGGTCTATGAGCAAGGAGCATAATGAATCATCTATCTTCAAGGATACTCACGTGGGGAAA -GTCATTGCTGCTGGAGTAACGGGAATGTTAAGGTTGTAACGATTCACAATGTGGTCGTAA -AACAGATCCCCGGCGAAGGCAAAGCTTCCCCGGTCTGTCAGGGTACTCTCGAAGCATGAT -TTCAATGCTGCATTGGGACCAGCAAAAGGCACCGATGCAGCCCCTACTACCAGGAGAAGA -AATTGAAGAATCATGATTGGGCTTGGCAAGGTATGTCCAGAGTGTCTACagagaagagag -gaaagtgcagagacagagagacagagacagagacacacacagagagagaaagagaTCATG -GACTACGTTGCGGAGTCGAAGCGGAATAAAACGAGTCAACACAATATCAGATAAGACTGA -ATGATTGAATCTCAAGGATGGACGATGAGCCTTTGCTCATATCATAAAATTATAACATGA -TCCATGAATCTCAGGCTTGGCTCATCAACAAACTAAAAGCTGGTCTGGTTCAACAATAGA -ATTGTTCCAAGTCTAATCAATATGAGATCACGTCAAAGGCAACAAGTTCCAGGCATCGAC -AATCGGCACAATGTAACTGAATGGTGAAGACACCCTTCTTACATCGTAATCACCCCCTAC -GCTATACGATGATCCGTCCATAGAAAGCAAACCCGAGGTCCAGATCTATCTTGTACCTTT -GATATTGTGCCTAGCAAGCCTTCTCCGACTTTCGGACTGCATATGCGACCCCTATACATC -TATTATTGGCGAGTATAGCTTGAGCCCATACAATATGTTTGGGTTAAATATCTAAAATAT -ACCAGAAAATGATATTGATTAAGATCACCATGTCCGGCGACTCTGACTCCACGCCCTGGA -GAACTCCTGCTTCTCTTTTACCCCCAAGCCAACCCGCTTGACCCGGCCAAGACGAGCGAG -CGACTCCCCCACGTTGTCCAACAGCGCCTTCTCGCTGGCCGAGGCCTGTTTTTCTTGGCC -GTTTTGTTCTAGTGTGTCTAGGTAATTCGTGTTATAGGTCGAGTTGTCAGCAGAAGATGC -ATCAACGCCGTTTTGGCTGTTTGTAATGGACTCCGCGATGCGCACTCTAATGCAGTGGGT -CGGCTTGCTAAGCAGCGTCCACAGCAAGCTCATGTAACTCCCAGGAAGAGGGGTCACAAT -ATCCATAGGTGTATAGCGCAGACTGACAGGGAATATCTTGGTCTTGGGAGGTACGCTGAG -GAGGGAGTTGCTCAGGGCCAAGATGCCGCGACCATTGGTGGTGGTACATTCTGGGAACAA -TACAATCGGTCTGGTGGGGTACTTCTCGACCAAGGTCGAGACGTCCACCATCTGCGCACC -AGGAGCGGGTTGCGTGGCGGGGAAAGCGAAGGCTCGCAGAATGGCCTGGAACAGAGAGAT -GCGCTCGACCTGGCGAGTGTTGGGGTAAGACGCCGTGAAGATCGGGTCGAAGATGGCTGC -CAGGTACACGGCATCAATGGGTGATGTGAAGGACGAGGCGATGACCGAGCCGGGCTGTGG -CAGACGCGCTTGCTGCTGCTTCAAAGACCTGTGTCCAAATTGTTAGTTACCGGCGGAACA -GGCTATGGTTCCAATGGCGTAGAACGAACCCTTTTCGCACTCCATCCACTTGGAGATCAA -TCCACCAGATGCTGGGAACTCCAAGAATACACCACAGAGCAGCCTTTTTGCCCAGCGAGC -CAATAGGAAGCCATTGTAAGATCAAAAAATAGCTGACAGCGACGAAGATGAATAGCGGGA -GGCGGAAGCAGAAGAGGAAGATACGAAGGGGAGCTTGGAGGCCAAGTTTTTCAATCGGGA -CTGGGAGGAATGGCGCAATTCCCGAGCCTAGGAGATGTCAGCATCATTGTACTTGGAGGG -GCTGGTCATCAAAATGGACTCACCTCGATCCCGAAACTGAGAGTATCTCTCCATCTTAAG -TTATGACGGCTTCAAACAGAATAAGCTGTAGTACAGGCTTGATTGTCAACAGGCCATGCG -CTAAGCAAATATCTATGTGGGATTGCGCCGGCAGCACAAGTTGCGACTGACAGTTGAATC -GGGTTGACAAGCTGTTGATCTTCACATGCCACAAGAGCTGTCACGTGGTTCTGTGCCTGG -ATGCCTCAGGCACACCGCCTTGTGCAACATAGATAGTAGAATCGAGTCGAGCGAAGTACT -GACGGTTATCCGGATTCCACGTCTCTGTGCTCTTGCGTGATCATTTGCATACAAAGATAT -CTAAAATATAATAGGTAACAAAGAAAAAAAAAATACGCCTTGAGCGCACAGAAGCTAAGA -GTAGTATACAAGATCTATAGGATTACGCCCTGTTATCTCCGTCATCATCATCATCCACCA -CAAAGTCCGGTGTCGGAGGACTTGGAGGCAGTTTGAAAGTCCAAATAGCTATTCCGACCA -AACAGAAGAGCGTACTTGTCATCATCCACGCGAGACCAATCCAGACGCCACCTAGACGCA -GTCCAATTTGAAAGACGGTTGTGATGGATAAGCTCGCAACCACATTTCCCAAGGACTGAA -TAATCTCAATTGCGGTAAATAAGCGAGCCGTCTCATCACGCTCAACCAAAGTGGTAATGA -TGGAGCGAGCAAGGAAAACGAAGCCCGATCCTGAAGTCTGGATGATCATGCTGGGGATGA -GGGTCGCGATGCTTGGGGAGAGCCCAATTCCGAGAGTTCCAATTGTAAGACAGAATATAC -TACGCCGCGCAAGCCACAGGTCTTTCTCTGTCGAAGAAAGCCGGGGATTTAGAACTCGGC -TTTTGAGGAAAGGTAGGCCGAAGAGGAAGAGCGGGATTGACACAGTCGGGCGCGCAGACA -CTAGGAAATTGGACTGTGCTAGTGTCCATGAATAACGTGTCGATATATATTGGGTCAAAA -ACCAGGATGAGCCTCGGCTCAGGCGATAGACCAGGAATGCACTCAGAAGCAGAAGCACTG -GTTTTCGGTTGAGAATAAACCGGTAGGGCCTGAGGTAGAAACGGTAATTTGCGCGGAACT -TTGCCAGGATAGATTGGTGAGAGTATGATGGGATGCTCCAAGACGGCGGTTCATTCTCAT -GATCACCGTTCATGTCGGGTTCTGTGCGATGTGGTTCAGGTTCGTTGAGATTATGTAAAG -AACGCTTTGGTTCAGTGCCATCTGTCAGATCTGACAGCTCAACATTTGCTGGTTCGGCTT -TCCGGGGTGGCAGTGCATGCTTCGTTTCCGGTAGCACCAAGATGAGCATCATGCCGGCTA -TCACAATGCCGAATCCAATCACCAATGGTATCCAGGGATTCAATGTCATCAGATAAGAAC -TAAAAGCACTGGAAGCAAAGTCGGCGCCCATCGATACGATGGCAAATTGAAAGAAGATGC -CCGCCCTATGAAACTCGATTAACTTTGATTAAACACGCCCTAATGAGAGGAGTAGCATAC -CTTTCTGCCTCGCTGGTTACATCGGCCATCATGGTCCAAATAATTGCGAAAGACACCACT -GGACCACCCCCAAACACCCACGTTAAGGCAGTAAACCAAAGCGCACGTAGGGGGAAAACA -TCTGAGAACCACAGTATAACGAGCGTGAGGAGTGCATTCAACCCAAAGCCGGGAATGGAC -AAAGCGAGTGTAGATTTGCGGCCACGACGGTCTGCCAGCAGACCATAGGGAATCGCCAGA -AGAGCACCTGCAGGATAATCAGAGAATTACCCAGTCCTCAAAGTTCAAATCAACTTACTC -AGTGACCCATCAAAGAACATATGGTAACCTTTGACGGCTGCCAATTCAGCCTGCACCTCG -CCGATCTTACACATGGCCTCGGGCACTTGTCCATCCACGCCGATCTTAGTCGGATCCACA -CCTAAGAAATACTGGCGACAAGCGATAGACTCCATGATCCGGACCATCGGGCCCTCCAGG -AAAGCGAAGCCGATTTCAACTGCGAGGACCATGGCGAATAAGGTCGCCTTGAGGCGGAAA -CTGATAGCGCTGGCAGAGGCATTGGCCGTCACTGGGAGACTGCCTATACTGGGCTTGCTA -TGGACATGACGATGGGCTGGACCGGGGTGGCTCAGATCGGGGGAGTCGAGGAATGGGGCA -TCCTCGCGGTCCTCGCGATCGAGAGACTCATGACGTATGGAGGCCATCTTGGTCTGTCAT -ACCGTCAAGGGTATATATATAGGGAATGTAGTCCGATCGGGTATGAATGAGGGTATGATC -AGATCATTGTTAGGAAGTTAGGGAAAGGAATGTGAGAAATCTGTACGAGGCCAAGTTGCC -AATCGTCGCAAGCGAATCCAGCACGGTGTCTTGGCGGGATCTCTTCCAGGGGTTAGGGGC -CAAAAGCCGAACTAGTCCCGGGTACTATTCTTATCCATCATGCAGATCAAATTCTTATGT -ATTTGATTGTTATAGAAAAAGAAACAGTATACCGATACCTAGGTGTGCCCCTTTATATAC -GAAAGAAAGAAAAAAAACTTCCTAATTACAATCGAATCCTGCCTCTTCAAACAAGCTTCC -CCGAACTTGCACTACTGCCGACAGCTCTAGGATAAATTCATAAAGAATCACCACATTAGT -CAGTACATGACCAAGGGTTGCGACTGGGGCATCCTCCTTATCTTCTGAGTACGAAGCTCC -CTCCTTGATAGATGGAGATAGTGGATCCGCGGGTGCCGGCGGGAACGGGGTGGCAAGGCG -CTGGCGAAGGGCCTCGGCGGCCACAGCCCGGGTGTTCAAGGATCCGCGCGATTCGGAAGG -AACAGAAGATCCATCCCGATCTCGACTAGAAAACACAGAGGGCGAATTTGCCGGGTACGG -AGACCGGCCACGGGAACCTAGGGTAGAGGCATCCGATGAGCCAGGATATGACGGGCGTGA -GCGGTTTACTGAGGATACAGGAGGCTCCGAGGGCTCTCTTGATCGGGCCGGCGTCTGCGG -ACGATCTTCCTGGTGCAATAACTCTCGTCGAGCAGCGAGCAGTACATCGAAGTCGGGCAA -TTCCCGTTGCCATTGTCGGACCTGGCTCACCAGTTTTTGGAGCGCCAAAGTCAATGTCGG -AGCAGCTGTGGCAGGCCAAGTTGGTTGCTGGTAGGCCCGACAAACATTTTCAAATCCATC -GTCTAATTCTTCAGAGGCTGGGGCCTCGTACTGGTTGGGATCAACCAACACCCACCCGTC -CATGGAGAAGAGATGTGAGGACGCCAGGCTCATAAATACACCCGTTAATGCCAGGTTGAC -AATGACGCTGTTCGTAAAGAAAGATTCAAAACACATGAGAAGGGCCTGCACTATTGGGTC -GTCTTGCTGGAGCTGTAATGGTAGGGGTAGCGTCTCCTCGTCTTGCTCCATTGAAGATAT -GGGCAGGCACAGGCGAGATTCAAGCACACATGTCGCATCTGCAACATAGTTGTCATACGA -CTCATTCAACGTCGGGTCGTCGACGAGGGATGTTCCCATGACCAAAAGTTGCTCCAGCTC -AGCATTCAAAGCACCAACGGGGCGCTGATGTGCAGGCTGAGTAGGAATGGTATGGATGAG -TGCCCGTGCAAATAGATGGTGTCTCTGAAGAACAGTATTTAGAAGGCGTAGTGTAGCTAG -AACCGTCTGACAGTTCAATGATTGGAGTCCCAGTAAGGCCAAATCGCGAAGATTGAATAG -TGATGGTGAGGGCTGAGCCGCTTCAGAGGCCAACGCAGCTAGGACATTAAGCGATTTCCG -TCGACTTGCCGACATGTCAATCTGCTCCTCAGGCTGAGGAGAAGATGCTAACAAGAAATG -TAAGATTCGATGGACCAATTCGCCTTGATCGATCGAGTCGAGGATGCGGCACATGTAGGT -CAATACCGCTGCTGTCGAGCCACCAGCTGCATCTGATGACTCTAGCAAGGAGGGATACCT -ATAAGAGTCAGTTATAATAAAATTCCAGAGCGAACAGTGTACACTGACAAAAGTTGTTCT -AGAAACAGCACCTGGAAATGGTCTAGCAAGGTGTCGTTGACTTCGGCCGATTTGCAGTGG -TCGATTGTATCCTGCCAAAACAACAGATATGACATAAATGCATCCATTGTCTGACCTAGT -GTTGGCTGCAAAGCGGTCTCTTGTTTGGCATGATCAGACAACGCAATAATATGAGGGACA -TCCTCGTCCTCGTCGATGGGAGTGAATGACAAACTTTATTAGATTAGTGCATTGTCCCAT -AACGATCAAAATCAGAAACTTACTGTCCCAGCTGGCTGTATAGTGCACCCAATCCAGTGG -CCATAAGTGTAGCCAGGTCACTCTCGATCAACCACTTCTCGAGATTCTTGGACCGCGATG -CGGTTTCAATTAGGTAAAGCAAACCGGTGCGGGCAAAATCGCCAGCACGTCCTTCGTGAT -GAACATAGTCCACTAGCAGGTAGAACAGGGGAAACTCGTCCTTCCTCGTAGCTCCTGCAA -ACTCCGTCCCACTTGGAGCTTGAGAATCGTTATCATGGGCGGTCGGTGTTACCCGAGGCA -CAAACCATGCGGGCAGTATAACAGGCTGAAGCCGAATGTTGTTGGCAATACCAAATATCA -ATTCGACGAGTCGGCCCTCGATCTCGTCCGATGTCTTCTCGGCGCGTCGGACGAGATCTA -CCAACGCGCGTGCGAACAATCGGTTGTCCACAACCCCATCCACCTCCGCATCGATCAGGG -TATTGAAAAATACAGTGGCTGATCGGATGACTCCTTCATCGTAGAAAGACAGTGCAAGCT -TAGTCACTGTCACAAAGACCTGAGAAGATGCTGCATAGGCGACACAGGGGTGCGGTGCGG -GCCCACGGGCTTCATCGCTCAGAATCGAGTTGAGTCGTTCAACGTACGTCCGCGCGTGCG -CAACCGACTGTTCGCCCGAAGGATTATTTGTCGCACGCCATATTTGCTATCAATTGGGTT -GGTCAGTGATCTTGTGGAAGCGGACATTGTCGGTACCTGGAGGGCATTGCAAGCACGCTT -AAAGGCAGTCAGTCGTTCCGTAGGCGAGGTGGCCTTGGAGGGTTTGTTGGACAGCGCGCG -CGAACCGCCTATAAGACGAGACCACTACGTTCTATTAGTAGAGTACCATAAAGAAGTAAG -GGGCACGTACAAAGTCCATTCTGTGAGGACAGCATGATCTCACAGAGTGAAAGTGATGGG -GGTCGAGGATGAGTGGAGACGGTGTTTATCGCGCTGATCGCCGCTTTACAGCTACAGAAC -CATCAACTTCCAATATACTACATGCACATTCGTCATTTATACAGTTACTGTAGTGCATAT -TCGAGTTTCGACAAAGAAAAATGCCAGTCCAGGGCGCGATTGAATAATAAGACCGTGCGC -GTATAAGGGCTCGAGTCATTCGAGCCCAAAAAACCAAAGATATTCACAGTGAAATGCGCT -CCCAAATTTCCTCCCACAATTTAAACGCCTAAAAGAAAAGAAAAACCTCATGAATCTATG -CGCCTGATACCAATCCCATCTAGCACGTCGTCAAATCATTTACGGGTCAGCCTCAATCAC -TCTCATCCTTTGGTCGCCGCCCGCCCCGCTTACGGCGACCCTCATTCTGATCACTCCCGT -CGTCTTCGTGTTTGCGGCTAGCGGGTTGATCAGGAACTTTAGAAATAATGTCCGCCAAGA -AGTCGAGCACTTCATCCTTGGCCACGGCCTGTTTGAGATGCGAGGCCGTGACGCGCTTTG -AGTTGCGATCCTTGGCTTCTTGTGCCGCCTTGGTGACGAGAGAGATCATGAAGAGTTCGA -GGGCCTTGGCTATAAGAAAGTGTCTGGTCAGTAGCTATTGCAAGAGGAACCATGAAGAGA -GGCGCAAAGAAACTACTTACACACAGCAATCGGTGTCACTTGGGCAACCTTGCCGACATC -CTCATCGGCCTGCATGATGCGCTTGATGCGCGCCACAGGAAATTTGGTCTTCACTTCAAT -CCCCGCACTTGGAGGCGCCGGCGGCATCTCTTCTTCCATCTCCGGCGCAACAGCGTGGAC -TGGTGCCATTGCGATCGGCTCTGGGGTTGGTGGGATTGCTTCAGGCATTGGTTGAATTGC -TTCAGACATGGGTTGAATTGCTTCAGGCATGGGCTGGATTGCTTCAGGCATAGGTTCAGG -GATTGGGGCGTTATCGGACTGAGCAAGACGGCTTGAGCGACGAGCCATGTCCGAGTCGTC -TGGCTGATATGGCGGGACGTACTGTTGAGACACGCGACTTAGATTTGGGTGTTGATAAGG -TTGGTATGAGTTGAAGAAGGGCGATGTATAAGAAGTAATTGGACTCGGAAGTACTTGTGG -AGGAGGATGAGAGAAATCAGGAGAGCGAGGCCGATAATTGTCCTCTGTCATGGTCAAGAA -AAAAAAATTATTAAGTGGAATCGCGAAAAAAAAGTAGGTGGCCCAAGCTCCCTGGAGAGG -AGAATCCAACGACCCGACTCGGGCGCCGCTGTCCTTTTTAAATTCCGTCGTGTGTTTTTT -TGGTAATGTTTTTACCACGTTTCACTGTTTTGACTGTTTTGACGGTTTTCTGGGTTTTTT -TTTATTTTTCTTACATTTTTGGTCCCTTCTCCTTTCTCCTTCGTTAGATCTCACTTCTTC -ACCTTTGGATATTTGTATCTTTTCTTATTATCTGATACTTTAATCCTTCTCAACCCCTTC -ACCTTGTACCTGTCTTTGTCTTTCAAATCCTCAAATATATTGAATTATCCGTCGTAGGTG -ACCTCTTCCCTTTAGGCACCCGTAGAATACCCGATGACGATAAATCCAAAAACGTCGAGT -TGAATCAGATGAAACCATAAGCGTATCGCTGTTTCTTCTGTGAACACCCAAAATAACTTG -ATCTAATCTGAGGGGTCTCATCACATCTACTCCGTACTCCATACAATGCGCCTTGTTTGC -TCCACGTCATAAGTTTCTCCTCCTTCTGACAACGTAACTTCGGTGAAATAGTTTCTCCGA -CCAATTTCAAGCATCAACCTCTACACTTGTTACCGGACATTGAAGCCATGTCACCGAAGT -TCTCTGTCTAGTCAGGGTCATCTAATATCTCCAGCCATCTCCAGCCATGGCGGTGGCGGA -TGTGCCCGGTCTCATGGATATTGCCAGGTACGCACTTTACATGCCGATCAAAAATTCGGA -ATGATGCTAATGAGTTTGCAGTACCCTGGCCCAAGATGAGATCCCCTTTAAGCTACGATG -TGCCATCTGCAATAAGCTGGCAGTGAATGCATTTCGGCTCCCTTGTTGTGACCAGTCTAT -CTGTGAGACCTGTATGTTGGATCCAATGCTTCGCACCTGTCGCAAATAGCAGCTAATCTG -ATATCTACTGTAGGCCAAGCTTCCCTGTCCGATACATGTCCGGTCTGCACACATACCCCT -GTCTCTCCTGATCTCTGCAAACCCAATAAGGCTCTGCGGACCACTCTCAAGGCTTTCCTG -CGCACGGAAGAGAAGAAGCGGGAAAAGGACCGCCAATCCGCGGCGCCTCCAACACCATCC -ATTATCACGTCTGCAGATACAAAAATTCCTTCCCAGGATGCAGTTCCTGATCAAAATGGG -GCTGAGCTAGTGGCATCTGTGGAAACCGACGCACCTCCCCCTCCGGCCCCTACGGAGCCC -ACAGAGTCAAAGCCTCAGGAGCCTGGACTAGACACACCAGCTCCGGAAATAGTGGGAGAG -AGCATTCCAGAGCCAAGTGCCCCAGATTCGGCAGATCAGGCAGCCCAGGTAATTTTTCTC -TGATATCGTATACTCTATTTGTTCCAAGCTGACTCTGCGTACAGCCCGAGACAACTGGTG -ATCAAGTCAACGGCACCGAGCCGACCCTTGAGTCCACGACTGAAGATGCCGCAGTCAACG -ACGCCCCAATCAATGGAGAGCCTGCTCCCACGCCTGAGGAGGTTCCTGGGGGACGGGGTA -CCAACTCGATGACCCCAAACATGGCCGGAAATTTCCCCGCGATGGGCTGGAACGGGAACG -GCATGAATCCTTTCATGGCCGGCATGTTCAATTACCCGAATACGATGGGTAAGCTCAATA -ATACCAGTGTTTGTTGCTTCGACCTCGCTAATTTCCATCCACAGGTATGTCCATGGGGAT -GGACCCGATGGTAAATCAGGGGATGTATGGAATGAACATGACTGGCATGGGCATGAACAC -TGGGATGAACTACAACGGGGGCATGTATGGATCCTTAGGATGGGACGCATCCCAACAGAA -CAACAATATGTGGCAAGGCGGCCAAAATAAATTCAATCCAAATGCTTTCGCAAATGGCAC -GGGCCCTCCTTATGGAGGAGCATTTGGCGGATCTAATATGTCTGCTTACCCTTCTCACCC -AGATTATCAGTCCGGTTACTACGGCGGCTACGGTCGTGGTGGTTACCGAGGTCGAGGCCG -TGGCCAGTTTCATGGCTCCGGTCGAGGTGGCTTTGGACCCATGCAAGGCCATTACCGTCA -GGGTGCTAACTCAGGCTATCCCAACCAGAATCCAAGCATGTCTAATGGTCTGAACAGTAC -TCAAATGGACGGTCAAGGTAATGTCCAGACAAATGGGGCTCCAGAATCAGGTGAAACTGG -ACCCGATGTACCTGGTAATCCAGACGATATCCCCACAGGCACAGACCAACCAACCCAACC -CCAAGGAATTCCTACCATTGACAGCCTTGATAATCCTGTACCAACTGGCCCTGGATACGG -TACTAATGGGTATGGCCAATACAGATATGGCCGATATGGACAAGAACGCGGGCCAGGCGT -CGAGGGCGCTCCAGCTGCTCCGCGTGCCATGCGGCAGGGGCTCCCCAACACCAGTGTGCT -ACGACAACGTGGCTTCCAAATTCAGGGCCGAGCCAGTATCTCTGGGTGAGACCTTCCCTC -TTTCGTCTTTTTTTTTTTTGTGGCAATCTAAATTCTAACCACCCTTAGTGAACCTACTGA -GGATGATCACACAATTGCCACATCTAATGCCCTGTCCCAACGTGCACAGTCTCGATCCCA -ATCACCCTCACAAAACCTAGCTTCTGCCCCCCGTTCCCCGTCCGCTCATGGTACCGAAGA -CGACCGTGAGAGTCGGCGTGGGGCAGAAGCCAGACGCCCAGATCGAGTTGATGAGCTACA -ATCCAGCGCCCGTCACTCCCGCTCTCCATCCCGCACGTCCTCTCGACCGTCCTCGCGACC -GTCCTCGCGACGCCAACATCATGACAATGATCGGGAGCGAGATAGAAGAAGCAACCACCG -TTCACAACGCTCGCGTCGTCATCGCAGCCATAGCCGCAGCTCAAGTCGCCTAGAAAAGAT -CGCAGAGAAAGATAGATCCAATGGCAGGACCAAAGCGTCCTCTGAAGCCCCGGAGTCGCG -CAATCTAGCAAGCCGAATCAGCAGCACCTACCGCTCCTCCAAGGACAGAGGGAGCCGACG -CGAAGATGACAGATCAcgagaacaggaccgagatctccgacgccgagaccgcgaccgaga -ccgcgaccgagaccgcgatcgtcgaggcagagaccgtgattccgatcgcgaacgtcgccg -tgatagcgaccgcgataaagacagggcttcagagcgcgaccgagatcgaccaagagagcg -agagcgTgatcgggatcgggatcgggatcgcgatcgcgatcgTAAGCGTTCTCGCCGCGA -TCGATCTCCCTCTACCACCGGCGGCGACCATCCCCAAGCCCGTCGGGTGAAACGTGGCGA -TGAGGACCGTAGTCGGGACACGAACGGAGTCTCGACCAAAAAGGCCGAGCCCGATAAAGA -CCCTTACACTCTTGAGCGCGAAGCGCGCAATAAGGAGCGCCTTGAGCGTGAGCAGCAGCA -CCGTGAAAAGGCCAAGTCTGGCCGCCGCCGTGATAGCCGGCAGGACCGTATGGTGGCGGG -CCGTCGCATCAACTACAAGTACGAAGATGAACTCTGAGCTCTGAGCTTGGAGTCCACTTC -TTTTCATTCCATACAAAATCATGAGACATGGCGTTAAGGGTTGCTCTCTCACTTCTTGCA -TGTTATCTTCCTTCAGCTGGTATGTCAGCCTGCTCAACAGTAGATATGGGATTTTTGTAG -GCTATTTGAAATGGCTTCTTATGAATAGAGACAATGCAACTCCAAATGATGGAAATGGAT -CTTTTATTCTAATCCCTGTCTTGCTACGATGATTACTCTTTTATGGTAGAAGAAACGAAA -ACAACATCCTCGTCGCCAAGATCATCCCGCGATAGTCTTTATATATATCACAGCATTCTG -GACTGCTCCCCTAACCGAAGAATATTTTGATTGAACCTTGCATGATTGTTGACGAGTAGC -ACTTCAAATCACCCAGCAACGTGTTACTAACCGCCAAACTTACGTATACTGTTAATTCAC -TGCTAGTCTTTTATTCAGAGTGTGGTCTGGTTTTCAGCCTACAAAACCCATCATGGGATG -ATCCCATACACTGCCATAAGCGAGTAGCCAAGAGGACTAGCTACAGATACTATGGTTCTT -CAATGAACACAACATCCAAATTTTGATTTCAAAGGCAAATGTATTGACAATTCCGCTAAA -AGGAAACAAAAAATCTTATCCACTGCACCGCCTCATATACGTCAAACACCGTCTGATATC -CGGAGCATAGTCTCCCAATGAATGTACCTAGATAATACTTGACCAACCTAGCAGCCTCCA -GAGACTTAAATTAGGACCATATTATTTAGTTCAGCTTGCTTTTCAATGCCTCAATTTCGG -CAGTCAATGTTTGCATCTGCTGTGCCTGGTCGGCAATAGTCTTCGTTTGTTCGGCAAGTA -GGTTTTTGATCTCTTCGATGTCGTTCTTAGGGATAGAAGTGGTGGGCGAAGGGACTTCGC -TAGCGGGTGATGGGACAGTAGGGGTCTAGAGTAATTAGCAGCCGGTAAAAACCGTTAAAT -GTGCAAAGGGACAAGGGAACTTACAGCGACAGCGACAGCCGCGGCAGGTTGAGGCTTTGT -CTCAACCTCCTTCGGCCTCAACGGGCTGCTCACACGGGGCGAAGCAGCCTCGGCAGGGAC -AGAGCGAGTGGGGCGCTCAACAGGCTTGGGGATTTCCTCGAAGCTTGAGTCGTCATCGTC -GGAGACGGTAGCCTCTTCTTCCCCATCGGCAAACTTGTTCACCATGGCGGCCATTGAAGC -ACCCTGTTCCTTCATCGATGGGGCGGGGCGCGCAATGGGAGTGGGTTCAGAGGCAGGTTC -GGAAGCAGGCTTTGGGGAAGCAGGCTTTGGGGAGGCAGGTTCAGTGGCCTTGGGGGTCTC -CTTGGGGGTATCAGCAGCTTTGGGTGCCTTGGTGGGCTCGGGCACAGGTGCATCTATGGA -GCCAGTTGGCTTTTCCTCCACCCCTGTCACCTCCTTGATGCCTTCACCATCGAATAGGCT -GGCCATGGAGATCTTTGGAGGAAGTGCCTCCTTACCACCCAGCCATTCAGACGAAGACAT -GGCGGGGGTCACTCCAACGGTGGGTGGGTATATGTCGTCCTGGAAGTTCTCAGACCGGCG -GGGGACGATGAATGAGACGGGTTCGATGTATTGATCGTTGACGGTCTTGTAGGCACGGGC -TACTTCGTTCTCATGCATGTTGACGCCACGCTTGGGCATGAAAGCAACTCCTCGCTGAGG -GTCAGCAGACTTATGCTCCGACAGGAACTCGAACTTGTCGTTCTCAAGCTCGAAGTACCG -GATATTGCCGTCACTACTGGTTATCAGATATTGCGAGAGCATAAGGATTCAATCATGGAC -ACTCACCCTCGTCCAGCCAAATATAGCATGCTGTTTCCATCATCCCAGAAAGGCATGCAC -ACGCCAGAAATGGAATCCAAGGTCTTGAAACCGTTGATCGGTTCCCGCACGGCACGGATA -TCCCAGAGCGCCAACTGACGGTCGCTCATCTTGGAGAAACCGGTAGTCGCAATACGATCA -CGCTCTCCAAGCCACACAGCTCGGCTGTTCTTGGCACCGGTGTGGCCATTGGTCTCGTGA -GCCGGACGTTCCTGTCGCACATCCCAGATGCGGAGCTTCTTGTCACGCGAGGTGGTGACC -AAGAGGGAGCCATTGGCACTCCATGACTGCGACTGCACAATATCGCCAATGTTTAGCGTA -AGCTTAGCTGCACCAGCTTCAATATCCCAGATCTTCACGGTATAATCGCCCGAGGCCGTT -GCCAGCACGTTTTCTGCCGCGGGGTTGAAGAGCACGTGGCCAATCTTCCTGCAGGGAGGG -GTAAGTATAGAGCGATAACGAGTTTAGTCAAGGATTCATACTTGGGATGGCCACTCAATT -TGCCAATAGGCGCGAGATCCTGGATGTCGTCGGCATCGGTATCAGGTCGAACGGTGAAGT -TCTCTGGAACCCGCCAGAGGAAAACCTGGAATGGGTTAGTCGTAGAGCCAATAGCTGTTG -TATGTCCCACTATACCTTGCCATCATCGGATCCAGAGGCAATCAGGTCGTCATTGAAAGG -GTTCCTGCAGGGTTTCAGTGAGCTGCGGAGTGGGCGTTCTAGATTCGAATTCCTTACCAG -TCGGTGTCCAAAACGACGGCAGTGTGGCCTCGGCACAATGGGATGCGCTCCGGTAGCTTG -CCCCGTTCCTCAAGAGGAATAATCGCAAAAGCTCCGCCACCACCTGCTTCCCAATTGACC -GCAATGTGTTTCGGGTTGACCTAGATGCACCGTAAGACCGGGTTCGCCAGTGAAACGTCA -GGGGTGATGCGGAGGGCAAGACACACACCTTGAGGAGGTTGGTATCCCAGGCATTTCGGG -ATACGCGAAGGTTATCGTAACATTGATCCTACGAAAGTTAGAATAGGAGATCGGGGGGTA -TAGGAGGTAGCCTTACCTTTCTCGTCGATCGCCCGAAGACGTGGCCTGTAAGGGTCAGCG -GGGGAGGCGCATTTTGTATTGTAGACGTAACTCACGATATTTGGAGGAACGAACAAAACG -CCCGGACATGGTTACCAAGTGTTCAAACGAAGACGTAGAGACAAGGAAAAAGTAAGAAAA -AGGAGTAGGTGAAAGAGACGTTGGAGAAGCTCAAGCAACGGGGCTACTGGGGCCGTTTCG -GGAATCCGAGTGGAGGCAAAGAACGCCCCACGAATGCCAACTAATTTCGGAAACAGCTTC -AAAAGGAGTCAACCAAGAACAGTAAATGTTTCTAGGGCATTGTACAAACTGGTCTATCCG -AATGTTCTTTCTTTATTTGGTCATTGGAGAGCTTCATCTTCGGTCTTGGGACGATCACGT -GCTATTTTCAAATACTCAGCTTTGGTAATACAAATACGACAACATTGATGCCATTGCCGG -ACTAGGAATAGTGTGATACATCTGCCTGAGTCAGCAATTCTTTGTTTACTTTGCATCTCG -GTCCCTCCAGAAACCAGTTTGCGCTCATTTGACGCTCATACTTCCAAGGTTACTGCCCAG -ACCATTTATTGGCATGTCCTGTACCTCTTCTTCTCACCCCAACTCGCTAACTTTCAGTCC -CAGCGGCCAACACATACAGTGGCTGGACGTGAGCTCATCCTTCTCGCCCGGATACTCTAT -TCGCCGGTAACCTGGCGGTCAACAAATCCCAATATGGAGGGCGAAAACATGGTGAGCACC -CCACTTTGAGTCTAAGCTCCATCGGAGTACATGGGATCATAATTGATTATATTCAGACCC -GAACCACCAGGGCGTCCACTCGGACGCGCCTACCGCAAGCCGGCCTACGTGAGGCTAGCT -CGGCGACAACCAATTCGCGCTCCGGCATCATGGCGCCAGGTACAATCGCAAACAAGGCTA -TGAGCATGTCTCGTATGGAAGATCCTGAATCATTAGCGCAACTTGAACTGACAGCGCTGG -AAGCAACCCGCCCGAAAACTGCAAACCCGGAAACGAGGAGACCTGGATCCGTCACTCGAC -AGGCTAGTACCACGACCAAGGCGCATAACCGGGGCAATTCATACTCATCTTCAACCATGT -CCCGGCCCACCGCACCCGCGACTCGCGCAACCAACGGCTCCTTCTCCTCAACCGTCGGCC -CCGGCACGCGACCAGCATCGGCCATGTCCCGGTACCAACCGTCCTTCAACGGCCGCAGAC -CTGTCGGCGCCTCGATTCCCCGCGCAGCGAGCGCATTAGATACCCATATGGAAGACGCGT -CTCCTAGTGTGTTAGGAAAACGAAAGGGTATGCTACAATTTCCCTTATCTCCCAGTCGTT -TCCCTTCTTGTCCTGTTGGAACCGTGTCCCCCGGATTGGATGAAGGTTGGAATGGGATTG -GTGAATTGGCATCGGTCTCTGTGTCCGAGAAGCCTGGCCCTTTCCCAAGTCCTTCCAATC -TAGCAACTATCCCCGTGTTGCCCAGTACCCCATTTCGGAATGTTACCACCCCAAAGTCAC -CCGTACAAATGCAGTCCTCGTCCTCTCCCTTTAAAACTGCCCCTACCGCACCCTACAGAA -CCCCATCACATTCCCAGCGGCGCATTGTCAAGAAATCCTCACATACCGCGTTTTTGACCA -AAGGGTCTTCGATCAGAAGTTTTGACAATATCATAGGCCCGGAATGGGATCAAGATTCTC -GTGAAAAGAGTATGGAGGGGTTGATGCAGACTTTGCTGGCCCAGGTAAATCAGCAAGGCC -AGGCAAGCTCGGGTCTTAAGGAGTCGATTGATATCTACAAAGCAAGAAGTAAGTTGAGTC -GGATAGGGCCATGTTGAAATAAATCTAATCCTTCCATCCAGTCAGTGACCTTGAAAAGTC -TCGGGATGAGGTGAAAGAACTCAACATCACCCAACGCGTGGAATTGGATTCGCTGCGAAA -CCAACTACATGCCGCAGAGAAGATGTGTAAAGAGGACAGGCGAGATCATGAGATCGCTTT -GGATGATCTTTACCAACGCCATCGCATTGAATTGGACTCAGTCCAACAACAAAGCAGAAA -AGAAATTGACAAGATGAATGGCCGACATCAAGAGGAGATTCATGACCTAAAACGACGATT -TGATCGTGAGATCGAAGGCGAGAAAGCGGCACGAGCAAGCGCGCTCAGTAAGCTCACTTC -CAAATCTGCACTTGATATCCAGAAATCCCAAATTGAATTGGAGAGGAAGGATCGCGAGAT -CACGACATTGCAGGATAGGCTAGAGGCTCTGAAGGCAGACCTTGATCGCGAACGAAGAAC -TGTGCAGGAACTCAAACTCAATCTGGATACCGCAAGCAGCAACAGCGTTACGCTAGAGTC -ATCAATTCGTGCGCTCAAGGCTCGCATTGAGTTCCTTGAAAGTGGACGGGAGGAGCAGTC -TAAATCTTTTGAGCGATGCAATCGGGAGATGATGGATGCATTTGCCGAGACGGAGGCGAT -AAAGGAGAAGCTCCGTAGAGAAGAGACACTCCGACGCAAGCTTCATAACCAGGTGCAAGA -GCTCAAAGGCAACATCCGTGTGTTCTGTCGAGTTCGCCCCTCGCTTAACAGCGAACCTGC -CTCCGATCTCACTCTTATGCAGTATCCCGATGAAAGTAACGATGGTAAGGAGATTAATAT -CCTTGGTCCCGAAGAGAAGAGCAGCCTTGGAACAGTTACTCGGAAGAATAACACTTTCTC -CTTTGATCGTGTATTCAACCCATCCGCCCAGAATGCCGAGGTTTTTGATGAAATTAGCCA -GCTTGTTCAGAGCGCTTTGGATGGCTACAATGTTTGCATCTTCTGCTATGGCCAGACTGG -TAGTGGCAAGACGCATACCATGTCATCGGCTGATGGCATGATCCCTCGTGCTGTTCACCA -GATTTACGAGACCGCGCAAGGTCTGGAAGAGAAGGGTTGGCGGTACTCGATGGCCGGTAA -CTTTGTCGAAGTTTACAACGAAAACCTCAATGATCTCTTGGGCAACCCGGATGAATTAGA -TAAGAAGAAGCATGAAATTCGTCATGACATGCAGCGGGGCAAGACTACTATAACCGACAT -TACGACTGTCAACCTTGACTCCCCCGAGATGGTTGAGTCTCTTCTGAAAAATGCGGATGC -CAACCGCTCAGTAGCAGCGACCAAGGCCAATGAGCGCTCATCGCGGTCCCACTCTGTCTT -CATCCTCAAGCTCACCGGCCAGAACCACATCACTGGTGAACGCAGTGAAGGCACGCTGAA -CCTTGTTGATCTGGCGGGAAGCGAACGCTTGAGCCACAGTGGGGCGACTGGCGAGCGCCT -GAAAGAGACCCAAAACATCAATCGCAGTCTCAGCTCTCTGGGCGATGTGATTGCAGCTCT -CGGTCAAGGCAAAGAGGGAGGTCACATTCCATACAGGAACAGCAAGGTATGTTTTCTTTT -ACTCCCATTTTTCCCCCGCAAGGTATTAACACACTCTCTAGCTCACCTACCTACTCCAAT -TCTCCTTGGGTGGCAACTCCAAGACTCTCATGTTTGTTATGGTCAGCCCGCTCCAGGCAC -ACATGTCGGAGACCTTGACCAGCTTGAAGTTCGCCACCAAAGTACACAACACCCACATTG -GCACCGCAAAGCGACAGGCGCGTGTTCGCGACACCTGATTATGATGATACCTCTTTATTT -TCTTGCATATATCTTGTTAATTTTTTTTCTACTGGGTTAGGGGTCTTTGACGATCCTGTC -CTCTTCCCTGGCGTCTTGGTTGGGCTCATTGATAGTTTTCCTCCTTTGTCAAAGCCTTTG -GAGATATTTGGTTCCTGTCTTAGCGTGTGATTTACTGTATGGACAATGATACATTTGGTT -TAGTCAATGCACAGTCGCAGGCTCTTAACATTTTATCTGTTTGGTTTACATTGCGAGATA -TATGCTAGTCTTGCCTTTGAATGAATTTACGGATATCCAGTATCGTCTGTTCCCTTTTCT -CAATCCAAGGAAAATGCCCGCATTGGCCATAGGTAATCATTTGCGCATTTTTGATGTCCT -TAACTGTTCGTTCGGCAATCCCTGTTCCACAGATCATATCTTCACGCCCAAATATGACGA -GAGTCTCTGCCTGTACATCACCAAGTCTCTCCACCATTTGCATTGGATGCAGCAATCCCT -TGTCACATCGAGCCTGGGCTTGGTAACACCAAACCGACAACTTGCGGTCACCGATATCAT -GCAGTAGCTCTGGGACATACTGCCGTGGATCAAAGAAGTACAAAGGCCACATGGCTCTCA -CTGACTCCGTAAATTCCTCGTCGGAATCAGTTTTCTGATCCAGCATACTGTCCCAAGCAT -CCTGGTATCGAGGATCTATGCGCGTCGCTTCAAAGTGCAACAATTTTCTGTCCTGGAGCC -CTACTAGCTGGTGATCGAGTAGGACGAGCCTCCCAACGCGACTGGGATACATCTCTGCAT -ACCCCAGCGCAATCGCTCCGCCGTTGGAGTGGCCCAGTAAGACCGGATATTGTTCCAGAT -GCAGGTGCTGTCGGAGATCCTCCAGGTCGGAAGCAAGATCGGGCATACTGCTCATCTGGG -ATCTCGGTGGTCTGGATGATCCGTTTGTTCCACGAGAATGGAAGAAGATGACAGTAAAGC -TTTCGGTATTTGCAGTACTGATGTGGTATTCCGGTCTCCATAACTCTTCCAGTCCGTTTT -GAAGATACTGGGATCCTAAGCCCCAGCCAGGGGATTGAACAACAACCAGGCTGTGTTGGT -CTCCTGTTTCGGTGGATGGATGATACACGATATATTCAAATATCAACTCTTTTGCTGGAA -CATAGAAGGTATGGGTTCCTGAGCGGAACACTTCCGACATTCTGGAAGAGTTTGAGAAGT -AGCACCGATATTGGAAAGCTACCAGGTCGTATGCAATCGAAACCAAATTTGTTCCGTTCA -TACCAGCATCAGACTGTGACGTCATCCAGAATAGTACGACCTATGTTGGAAAAGTGGAAG -CAATTTGGAATCAAGACGTGCATTATCAGATACTAGAGTTGACCAAGGTATCTGTCCGCC -TTCTGCTTCACCTGCTGGGCCTTCTCTTGAATAGCCGCGGTGTCCAAAGTCCCATCCTCA -CAGATATACGCATCTATGAGCTCAGCCGGAACCCATTCAAAATAGATATTCTTGACTTCG -ACTGTGTAGTTTATATCATTTCCACCAGCGGCCTCAGCTCCATCGGTAAGAATCCTGACT -CCTTTCATGCCGCTGTCCATCCAACAACCTAATAGCTCTTTCGGATCATTCTCTTCGTGG -CTACCATTAGTATCGATACCAGACTCAGCCACTTTTTCTAGCTCACTGAGAACCAAAACT -TTGGCGCTAGGACAAACATATTTGGCACTCAGAACAGTTGGTAAAGATCCAGTTTTATTG -CTCACTGCGCCTAGATGCGAGATCTGGTCGGCCCCAAGAAGGACGAAATCCACTCCCTTG -CTGGCGAGAGCAGCAGATGCATCGGTGTACACGCTTAGTTCCAAATGTCGATTGGAAGAT -AAGGGGAATCTTGACTGAAAGGCAGAGAAGATGGAGGACGCCATTTTGACTCCTTCGCAC -AGCGGACGAGATTCTAGGATCCTAAGGTCCAGATTGGGAATTGGAAGGGACGCGAAAGCC -TCCAGAATGCTATCTCGGATGGTAGAACTGGACGAGAGAGTGAGGAAAACAAGAGAGGAC -GGAGGCTGCGATTCTGCTGTAGGCAGAACACTGGATTGTAGATACCCCGCAAAGGAGTGT -TCGATACGCATGGGCATTTCTTCGCGCTTGTCCAGATGTTGATCGAGCAAAGCCAACAGG -CGTTCCCATTTGACTTTGCTATCGAGATTCTCATATACGATCTCCTCCATGTCGGCGAGC -AATCCTAGAAATGCATTCAAAGTGGCTGCCCCCATGCTTTCTCGTCCATTCTTCCACAAA -TGCCATGCAGCCATGCGAGCAGTTCCCCACCATTTGGTATCAATGTCCCCCCTCATCTGC -GCGACAACATCTCGGAATGCCCTCAGCGCAATCGAGGTCAGCTCGTGCGATCCACTCTGG -TGATCTGCTTTCAGCTCTTCCAGGCCTGTTATAAGTGCCTTGCTAGCACGCTCGTTCATT -TCATCTTCGAACCAGACTCGTCGTAGGCTTTCTTTCAAACGCGGCACACCACCGAATTTC -TCATCATCTTTGACTGTATCAGGATCAAACCACTGCCAGCCTTCGTGCTCCCAGTCAATT -TGAATGCCCTTCTCCCCGCGACCACCTTCATGGCAGTCTTTGAGTAGGAACAGGAACGGA -AAAATGGTCCATTCTCGACCAACAGAAGGATCGCGGAAGGTGTATGGTTTTCCCTGTCGC -CAAAGATCAAGATCTCGGGGAGTGAGAGTAGTTTCCTCTTTTATCTCGCGCCATGCTGCT -GCTACTGGTGTTTCATGGCCCTCTATACTGCCTGAGATAGGGGCAAGATGGTGTCTTCGG -TGATCGGTTAGTCGGAACACGAACAGATAATAGTAAAGCAAGACTTTGGGATACGTACTG -ATATGTGCTGACTTTCTCGCTTCGCCGAAATAGAGCAATTTTGGGGTTTCCTGCATTTGA -GCTGATAAATGAGGATACCACAGACCTCTGCATGAACTTAGATGCGCTAGCCATTTTGCT -CTTTTTGGAGATAGGGAGATAGATGGCCCAGGTATGTCGATGTACATAAGCTTAAAGTTT -CGGGATATCCGCAATGCTGTCACCATTGCATCATCACATTCCGTTGACAAACACCTCTGG -ACGAACTACTATAGATCATTTGCTGCGATGATTTTCAGAATAAAAGTTGAAATCATTTCT -TGGTAAGCTCATATCGGCTTCCAAAGACCAATTTACACAATCTTTTATTGAGTAATCCCA -ATACCAGACTGCCCAAAGCTTCAAAATGCCCACTTACACAGGATCCTGCATTTGCCGGAA -TATTGAGTACGAGTTGAAACTCGCTTCGAATGATGATGCGAGAACATCTCTCTGCCATTG -CCGCAACTGCAAGGTAAATACTTGAAAGTGGACGGCGATGTGACTTCGCTGAATCTAAAA -TAATCGAATAGAAAGCCTTTGGAACAAATTATGGTCTGACAGCCAAAGTCCCAAAGGACA -CATTCAGTCTCACCAAGGGGATACCGAAAGAACATGTTGCAGATAACGGGTCTGGGGCTG -TGATTCATCGAGAGTTTTGTCCAAATTGTGGTAGCTTCATCTTAGAATATGGGGTGCGTT -GAGCCTTACACAATAAATACAGGATTTGACAAAGAAATCAGGATGCTGCCAAAGAACATT -TTCGCTACATCTGTGTCGGATCTTTAGATGACCCCGAGGTATTACCTCCCAAGGGGGAGT -TCTTCTGCAAGGCTCGGGCCAGCTGGATGCCGGAGATTCCGAGTGAGTTCGCATGCTGAT -ATCCTACCCAAGTTGGTCTTTTGAACTCTGACAAATACCCGTAGATGTATTCCATAAGCA -GACAGTCAATGAGTGAGAAGTGACGAGATAAATGAATGCTTGCTCCTTTTCCACAACACC -ACTCTATACAAGTAATCTTAATGGTGTATCTTTTACACTGTCACGCTTTTCACAACCATA -TCCGTGGCCAGCAGTTTCTCTCTAATGGAGATCGCATTCAAGAAATCATTGCTGTCCACC -CGGCGATATCCATCTTCCAGCGTCTTCTCAATACCTGCCCGTAGGTGTCGTTCGGCCAAT -CGGCTTCTTGCTTCCAGCGGATCGGAGTCGACAAAGACACGATAGTCCACCAGTCCAGCA -AGTTCACACCAACCGGGCTCATCAAGAAGCAGATAGTTACCCTCGATGATAATGATCGAA -GCATCCGGCGTGATGGAAATCCCGTTCTCGACCGGGTCTTTCGCTTCGTGATCAAATGTA -GGTGCATGAATGACATCTGCTGACGACCAGTTTCCAGGAGAGGAAGCTGCTAGCGGTGTT -TCATCTGCCCAGATGCGGAGTCGAGAGACAAAGGCTACAAACCGGTTCACGTCGAAAGTC -CATGGAGCTCCCCGACGGATATGTGCCTCTTTGGGGTTGGGTAGCTGATCCAAAGCTGCC -CGAGAAAGATGGAATCCATCCATAGAGAGCAGGGCAGCGCGGGAAGTCGAGCTCTGATTT -AATTGTTGCACTACTGCCTCGGCTGTGGTGGTCTTCCCTGAACCAGGGATGCCTGCGATG -GCAACTAAGAGGCGTGGTTTGGGATGAGCAACGGCCTGGTTGCGGATAGCATTCGCCAGT -CGCCGGTACTCGGATTCCATCTGTAAGCCGAGGTTAGGTTGACTACTATGAAGGTCTAGA -TAGAATCTAACCTTGAATCAATCGCACATGGATATATGTATCTTATAGTGACGCAAACGA -AGTAGATACCTCGACCACGGCTGGTGACAATCAACGGATCATACTGCACAGGAGTAAAGA -GCAAATTCAGCAGCTATCCAGGGATTCTTCAAATCGCCTGACAAACGTGCAGGGCCGAGT -TATGTATGTGTTGGTGGAGCAGGTGTCGGGTCACAGTGTTCCGAAGCAACACAGTGGCAC -TCGGTATTAGCCCGACTCGGCATTCCTCCTCAATTAGGATCTTTGCAAGTATGGTTTGGG -CTCAAGTTAGAAAAAAAAGGGGGGAATTGGAGGTGTTGATGGTTCATCAAACCATTTTGA -TACCAAAATCATCTCGGAACACAATTCTCCGGCGTTACACCGGCTGGTAATCTATCATCA -ACTTCATATGAACTTCAAAAAGAACCCCCTTTCATTTTGGTGAAATGAGACTAGGAATCA -AAAGCAGTAAAAGCAGTAGAATGTAATTTTCAAAAGAAAACAAACATGCTAAAACGCAAA -AAAGGCCGGGCCAAAAAGACCCATGCGTCTCGGGCTTTGATAGCCTCAAGACGCCAGACC -TGCGACCACGACGAACCCGAGAAAACCGAAAGGTCCAAGTTGGTTGACAAAAAAGGGAAT -GCAACAAGAAACAATCATCTTCACATCACGTCTCTCTAGTCAAATGTGTAGAAACCGTTT -CCACTTCACGGACTGTTGGCTTCCGATGCTTTTTCTGTTCCGGGTTCACATAGAAGCATA -GGAGCTGTTGTGACCAACAATGAGAACACTGAGGTCGTGTATGATCACAAGTAGTGTGTT -GTTCACGACAGGCTTTACAGTCCCCCGGATGAAGTTGATATGGCTGAAAATCCGGGGATG -TGTCTTGGAATGCCGTACCCCGCCATCCAGACCATAGCCACTGGGAGATGCGACCGCAAA -GTCCACCCTTCACCTGTCCGTGCATAGCTCGGGGACTCTGTCTAGAGATCTGAACTCGTT -CTGGATGGATCTGTTCTCTGTAGATCCTGTCAGACATAATCACCTTGGAATAATCTCAGC -CTCTCTAACCGCTGGCCCTGACTCTCCATTATAGGTCCACGTGCTCCTTTTAGGGGGACT -GCCAGGAGATATTAATTCATGGCAAAGCGAAAACTAGAAACAAGAAAAAAGGTCGAATAC -GAGATATGTACCCAAGGCATTCCATGCACCCCCTCAGGCCCTTCAACCCCTGCATTGGAA -TGAACAAAGCACCTCGGGCACTGGCACTGTGCATATGTCATATTATACAGAGGTCATGTG -GTCAAAGAAAATCAGTATCTGTAGATTTATTTTTCATTCCTTCAATTTTTTTTTTTTTCT -CAGATCATGTTATCTCAAGAGTAACGTCAGGCGCTATTCGATGTTCAGTGTTGCACAGAA -CGGAATCACGTTCCACGACCGTGAATGTTGTTCCTGTGAAACCGTTGAAACGTCATGATG -GCTTCACTCAACCAGTATCCGAGGGGGGATTTGTGAGGAGGAACTTCAAAGGTTGTTCAC -AATACCTAAGACAAAATGGTTCGAATAAGCGAGGCGATCAAAGATGATCACCGGAAACTG -GAATCTTGTTACAACATCATCGCCAACTCTGAAGATGAGGATGAGCAAACCCGATTTCAA -AATCAGTTTACCTGGGAGCTTGCACGACACGCGGTCGCCGAGGAATTAGTAGTGTTCCCT -GCATTGGAAAAGCTTCGCCCAGAAGGCAAGGCGAAGACAGATGATGATCGGCGCGAACAT -TCAGCGGTACGTGTTCACATTCATATCTTTGATACAGAGAGCGATTCTGACATAGCACAG -CTCAAAGAGATGCTCGACGTTTTTCAGGACCTAAACTGTTCCGATCCACGCTTCATCCCG -ACAATCACAATGCTGATAGAAGGTCTTGCCCAGCATATGCAAGACGAAGAGGCAAACGAC -CTGGTCATATTGGAGGAGTCCCTCACTTCAGATGAGAGCGAGAAGTTGGCTAATTCTCTT -AACCGGACTAAGATCTTTGTGCCCTCTCGCTCTCATTCATATGACCTGGACCAACCGCGC -TATGAGACTGCAGCCAGTCTGCTCTCAGCTCCACTGGATCATCTTCAGGATTTGTTCCGG -AAGTGGCCAGAGGAGGAAAGCATGCTAAGCGCACCACTAGAGTAGATACAGCAGCAGTAG -ATACAGCAGAAAATGTATTTCAGCAATATAGATTTCTTTGCTTTTCCAACTTCGAGCCCT -AAGAATATGTAAATGCGTAAACGTAATTGAATTCCTATCCCGGATACTGATCAATTCTCG -ACGAACCATTTTCGCTTTAATTGGTGTGGTGAGACTTGCGTTTGGTTCTGAACAAGTAAA -TGTTAGCGACTGACTCGATATTGTACGGAAATATAGTAACCTTTCCAAATTACCTAGACC -AGTCGGGGTGCTCTGGTCTGGAGGAGCCTCATCAAGGTAATGTCGAGGTGTATCCCCGTC -AATCTCATCATTGAGAAGATCTAGCGCATTACGCTTGGACTTTCTGGATACTTTAGGATT -GTTCACAGCACTAGGGAGGATTAATAACAGCTATAAAGTGTAAATAGAAACGCCCAAGCT -CACGCTGTATAGCCACGCCTCGCGTTCAGTTCGTCTTCGATAGATAAATCGAGATCTTTA -GTATCCCCCAGGCTTGACATTATGTATTTAGGTATTTCAAAAAATCTCAATTGTCCTAAA -CATAAGGTTGACGGTAACAATCATCAAGACATCCATAAGAGCGAGGCAAGAGCGGCAGAA -AAGCCCGGGGTTATAACGTCACATTACCATCACTATTGGAAAATAGATGCTAAAAGCACC -GGAAACACCCCGATTGTAGAGTTCTTACTGCCAATTATACCTGCAAAATATCTCAAGATT -ACCAAGGGATTTGATCCTAGGTTCGAACCATTTGGAATATGGGGTATATTCCCCACTCAA -TTGTCATTGATAAACAGATTCCTATTGTACAAATAACCACGATTTGACAGAGGTGATGGC -AATTCAATGGGGACTGTTGTAATCAGCCTGTCCCGTGTACCCTGTATAACCACCATGATG -CATCAATTCCACATCCGGGTCTTCTTGGGTTTTGTGATATCCCGAGAGAATCTTACTCGG -ATGGAAGAAGTTGAAAAGCAGACAGCAAACAAACATCAACGCCGCATCAAATACATATAT -GAATGCCTCTTTGCTTTGAAGAAATCCATCGCTGCCCTGGACGTACTCTGCCACTCTGTA -GATTGATCGGACCAAGACCAAGACACTAGCCATATAAAGGACTTTCATATAGCGAGTCCA -GGGAATTTGACTGTCCCCCGTAGCATGCAAAGGTGTGGCTAGCATGCGGCGATGAAAAAC -CACGCAAACAACAATGAAAAACCCAAAGAATAGGATTTGAACAAAGAGTCCGACAACAAT -CATGTTGTTTCCAAGTTTTACTTTCGACTTGTCCTTCGCCGTAGCAAGCATACCACCTCC -TTCAAGAAGAAAAGCAAAATTAGTGACATTCTATGTTACAGATAACTTAAGTAACGTACC -TCCACTCTGGATGAAAAATGACAACACATCACCCGTCACAAATATCTTGGTCAGCCATGA -TGGCCTGACTAGGGAACATGAAGCTCCGTTCAGTAGACGAATGATACGACCCAGAACCAT -ATATATAGAAGCAGCAAAGAATGCAGGTGCCAGCAGTAGCATCAAGCTCTGTGCAATATA -CGGCTTGGTGGTGAAATCCGGGGCTTCCTCTGAGCTGGCATACCTTGCCAAGTAGCCGAA -TGCCTCAACTGAAGATAGTGAAACACAATGAGCACATTTTCAAGAGTGAATAATGAAGAA -TCAAGGAATACATACATAAGCCACCAATCACAAACGGTATGAAGTACCAGGTTCGGTTCC -GGCCTAATTGCCAGATATGTATGGCGGTAGTGAGTCCAAAAATAGCTGCAAAGGCGACAG -CAGAACCGGTAGACGGGTTGTAATGGTATAGCTTGAATCCCTCGGCCATGACAAGAGAGC -GTAACGTGCAAGGTTGTTATGGAATAAAATGGCTGGTCTTGGACAGTGGTAGTATTCGAG -AAAGATAGGCGATGATGCTCGAAGATAGGAAGGAGTGGAAAGACCGTATGTTGAAAAGAT -TCAATCAAGAGATCCAGCCTCTCTTATGAGAAGAGTTTTGTTATTTAAATATCTTTTCAT -CTTGTCTGATCAATACACGCTGGGCTGAGATTTCTCTGCCTTGGAGCTGGTAGATGCTTC -AGCCCCACACTTAGAGATCACCTTTTGTTAAGAGGGCTCAAAAAAGAGAAGAACTAGTCC -TTTTGAGTACAAGAATACGAATTATCCGAAAACGAAAGAGGGTCAAGCTTGTGCAGGTTG -GTTTCTCTGATCCACCTAAGAGGATCGTGGCATATTGTCCAATCAGATGTGCTATGGATC -TCCCAAAACAGAAAAGATTACGAGTACGAGATCCTAACCTGTAGCTATCAGTATCCCTTG -GATACTCTACCTAGATACCTACCTAGGTATTATAACCTTAATATATAAGATTTTTCCTCA -CCGCATTTCTTATAATAGATCGATGAACGAAACACCCCAAAAATGGCCCATCTTCAGGGA -TAACAGGGAGGTCTATGGTATCTATATAAGGAGAGACACCTGGTCTATGCTCCTACTAGA -CTTCCTCTCCAACCACAAATGATGTATCATGTGCCTGGCACAACGCCTTGGTGCGTATCG -CCAACTGGCCAATCCTCTGGTAATCACCGGTGTCGGCACCCACCTCAATATACCCCAATG -GACTGCACAATGAACAGGGAAGTATACATTCGGCTGCGAGAATGGGATTTCCCAACTTAT -GATTTTTTGATAATGTAATCTGCACGGAAAGATTTCATTTACATATCACACAACTTCCAT -AGAATTGTCGAGACAATATATATTTTGAAGGGTACTAAGACACAACATTCTACCGGCGCT -GGAGCTGAGTAGAGCAATGGAACAATCTGAAGTTTGTCGCTGTTTTATTGCAATGAGCGC -TCAATATGATGCGCACAATTCGATGAATATCCGGCAAAGTGTCTACATGAAGGATATTAT -TCTTTCAGGGACACATTGTGGCAAGCTCATCAATCCCTACCTTGGCGGGTGGACACTCGC -TTACAAATTGTAAACATTAGGCATCCTGTGTATAAACAACCCCATAACATAAACAAAAAA -CCCCCCCGGCTTTTTTCAACTCTTCCTATTTCGAGCGCTAAAGAAATTTGTGGTTTATGT -CTTCATGTACCTTTTATATAGGGGTTCCATTCAAGAGGTACCTAATCTTTGCAATTTGCT -ATCCGGTGCCCTGGATAGGTGTTGGCTTGCCGAGTATATAGGATACTCCTTGAAGTATGT -GATCTGCAAAGAGATGAAAACTTGAATGTATCTACTCCGTGCTCCAGTACTCCGACCATC -AACCGAGAAGAAGATAGAGTCGAACAAAGTACAGTCTAGACCTAAGCGCTAAAAGGCCAA -GACCTCGGGAGGAAGAGATGTAGATCGGAGGCGAAGTGGAGAGAGCGGCAACTCCGATCA -CATGTCTACGGACAACATTTTCATTTGAACTTGGGAATGTGAGAAAGTCGAGAGATCTCG -AAGATGTGACTGTCTGTATACTTTCATTCATATCCTTTTCTCCTTTTtatctctctatct -ctctatctttctttctGTTCAGATGTTGAAAATCTCCTGTTTCACCGTTTGTTTTGGGTT -ACCCCTCGAATGGGCTAGGGGTTTCAGGTTGACACACTACACCCCCCCTTAACCGCCGTC -ACAGCCACTAGTCGATCTTTTTTGTGTTGCTCGGCCTATTGCTCTGAAGAATGGATCATC -ATGAGTGGACTCGTACTCCTGACTAGAACGCCACGGAGACCTCTCGACTCCGCCGTTGCA -GCCTATCACAGACATCTACTCACTACCGCAGTCAAAAGTCTCAGGCACTGAGTTGTTACT -GGATGGAAAAATCCATCCCCAGTTTTCAATGCTGTACCTAGCACAATTTAGGATGAAAAA -TGATATGGGCTTGTGCGGCTGGTGAAAGATATTTGGGTCAAATACCTCCCACTGTTGCTC -CGATACGTTCGACACGCTCACATTCCTTTCTCCTTGTTGACAATCTGGAATTGTCAGtct -ttcttttagttacttttctttcttatcaattgcactttctctctttttgtctttcattgc -tttctGGATTACCTCACAAGCATTTGGTCTTGTTATACGTCTCCAGACAAGATGGGTTCT -TCAACACGTTCATTGGGCGCTGGATCGCTGATGCTCGCTCTATTAGCTTCCCCCGTGGCA -GCTACAGGTTACTCCCTGGTGGAGACTTGGCAGGGAAAGAATTTCCTGGATTATTTCAAT -TTCCACGTTGGCTCTGACCCGACCAACGGCTTTGTGAACTATCTCGACAAAGAGACTGCC -GAAAGTACAGGCCTTGTCAAGGTCACTGATTCTGGCAGTGTGTATCTCGGTGTCGATCAT -GCTACCAAACTGGACCCCAAAGGCAAAAAGGGCCGTGATTCGGTACGGATTGGCAGCAAG -AAGTACTACGACCAGTCTCTCGTCATCGCCGATATCGCCCACATGCCCGGTAGCGTCTGC -GGTACCTGGCCTGCCTTCTGGTCTGTCGGAAAGAACTGGCCAGGGGACGGCGAGATTGAT -ATCATCGAAGGTGTCAACATGCAGGACTACAACAAAATTGTCATGCACACAGCCGGCACT -TGCAGTCTGACAGATACGGATATGACTGGTTCGGTCAACGCTACAGGTTGTGGTGAGGAT -CTAGGCACCGTTGGCTGTGTGATCGAGGGTCACCAGGGCAGCTATGGCACACCTTTTAAC -AAGAAAAACGGTGGTGTGTATACGCTGCAGTGGACCGACGAATCCCTCAAAATTTGGTAC -TTCCCCCGAAGCTCGATCCCGGCCTCGATTACCAGCGGCGAGCCCGATGTAACACAATTT -GGTACTCCGATGGCTGTGGTGCAAGAATCGTGCGACGTTGCCAACGCTTTCAAGGCCCAG -TCGTTTGTATTTGATACCACTTTCTGCGGTGACTGGGCAGGAAATGTGTATGGTGATTCT -GGTTGCCCCATGACCGGCGGCGATTCCGTCCAGAGCTGCCACAACTACGTCGCCAACAAC -CCGACCCAATTTGCAGAGTCATACTGGGAGATCAACTCGGTCAAGATCTATCAGACCGGT -GTGAAGGAAGTTGCCTCAGCCGCGTCATCCGAGCCCCAATCGGATACAACTGCCGCTGCC -CCTGTGGTGTCGCACACAACTGCTGAGACACACGCTACTCATACCGCCACTCTCGAGACA -CACGCTACTGATACCGCTACTGCTGTGCACTCAACTGCTTCCACGCAAGACGAAACGGCC -GGAGCAATTCAGGAATTGACTCCTACATCTGTTCCTACTGTTGCAGCTACTGCCGAAAGC -CCTGCCACGGAAGAAGTCAAACACCCTGTCACCAGCAAGAAGACCCGTACTATCACCTCT -ATCGTCACTGCGACTGAGACTGTCTGTCCTGAGGATGAGAAATCTTCTGTTATCGCCGCC -CACTCGGTAGCAACAACTGCGCCCGCTCCAGTTGTACAATCTGTCGAAATCTCAGCCAAC -GGCCAAGATACCGAGGCAACGACCCCTGCGGCAGGTGCAGACCCTCTCTCCAATCACGCA -ACCGTCGCAGCTGGTCAAAGCCATGCTGGCTACACTTCCGCCACCGGACAGAGCTTCGGT -GCTTCATGGGCGAAGCCTTCTGCGCCCCCCTCTGCCTTTGTTTCCTCCGTTCCATATGAG -GCCTCCAAAGCTCCGGCCCCTGCCCCCACAGATGTCCCTCCACAGGAGGCTACATCTGCT -ACATCGGGCTCCTATAACCCAAGCGGCTCTCAATCTGGTTTCGGCCCTGTCTTCACCGGC -GCCGCTGATCGGCTGTCTATAAGCATGTCGGCTCTCCTCTCTGGGCTCATTGTTGCCTTT -TTGGCTTAAATATCTGTGATAGATCTCATTTGGAATTGGTGCAAATTTGGCGGTGGATTT -TGTTTTTTTTTATTGTACAGCAGGCCCTAAAGATGGCCGTAGACTTGGTTTTATTTCGAT -TCAGTCTGCCTTTTGCCCCTCAGGGCGTCCTACCTTTTCTTGCACACTAAACGCAAAAGC -AAGCGTGACCAGTTAATGAAGAATTGTTTCCTTGAAATATTAGTGAGGTTTTTCGTATTT -ACATGTGCAGACACCATTTATTCCCAGACTCCACGCCTTCTAGCCCGAATTCAAATGCTT -CGCGGTCTAGGCTGTCCACCGCGCCAACCAGTCCTGGGCGTAGCATCTCAGGATTCCATC -ATCAATTATTCTCAGTCGGCCATCTCGCAAAGTTACACTACTCCTCTCAGCTTTCTCTGT -AATGAACCTTTTTAGTCCATTTTGTGAAGCTGATGCATTCCCATGCATCGTCTCGTAATG -ATGAATGAGCTTGACCGCCGGATTGACATGTTCCTCACCCCTCGTCTCACTCCGACGAAG -TCTCAGCAACCACTCTTCAAAAGGCACGGAGTCGAATCGGAACCCACAACTCATCAGCGT -GTGTAGCAAAGAAGACCAGGTGAATTCGCGAGAATTACACAAATTGAAAATCGAGTCATC -AATGTACTCCACCACAGCTGAGTTCGACGCCTGATTCTCTCCAAACACACTTGGCGCAGC -ACAACTGCTCGCGAGTTCAAGAATAGTGGACGCCAGCTTATCCACCGGAAGCCAAGAACA -TGTCTGGGACAGCTCAGGCAGTGCACCCAGTGTCAGTGCTGACCGGATCATCAACGGCAG -GGCCTCGGAGTCATTCCAAAGGCCCTTCTTCGAGTGACCTGAGACTTGGCCGATTCGAAG -GGAATAAGCCCGTGCTCCACTGCGTCGTGCGTTACTCACGATCCGCTCTCCAACTAACTT -GGACTGCCCGTAGCCCATAAGGGCGCAATCCAAGTCGACTGGTTGTTCGAGGATCTCGGG -CGATCGAGATCCGAGCGCGGTTGAGACGGACGAGCAGAACATCATGACGGCAGGCTCACG -TCTATGCACACACAAGGAGAGCTGGATCAGATTGTATAATCCCTGGATGTGTGGCTCGAA -CTCGGCTAGTGGGAGATTGAAATTGACGGGCCATGCTGTGTGGATGATGAGCGAGACCGT -GTCTAGCATGTGTATAATTGTATTTTGATCCAGCGATAGCCCAAAGTTGGGCTGTTCAAT -ACGGCTGTTGAACGCGACGATCTTTGCTCTTTGTTCGGGGGAGATGGGAAGATCTTTGTC -CACTAGAGTGCGGAGAATTGCGTCTAGTGGCGATTCCCGTCGTGTGAGACAGTACACTGT -GGAGATGGTGTCGTCGTTGAGAAGCTCGTGCAAGACATGGGCCCCAATCGACCCGGTAGC -TCCTGTCAAAAGCTTGGCTATGTTAATTTCCGCATTTGAACTCCGACAGGAAGATGGTCA -CTCACCACACTGTTGGTACCTGAGAAAGCATGCGGCCGGGGCGTGTGCCTCTCAAATACA -GAGTATTTCTTGACCAATTCAGTCATCAAAGAGACGTCACTATCAGCATCTTCAACTGCC -TCACCCTTTTGCAAGGCACAGATATGTCGAGCTAACTGTGATATACAGCCCGCTTCAAAT -ATGACGTTCGGACCAGGAGCCTGGCTCATCTCAAACTTGAAATATTGTAGAATAAGACGT -CTGAAATGGATTGCCTTGAGACTATCCACACCCTCGGAGAAGAAATTGGCATCGACACTA -GAAATAGGTATTCCAAGTTCTTGCTGGGCTAGTTGAAAAAGGATGCGTTCTGTTTCCGCT -TCATCAAGCAGTAGCTTCCCATGGATCTGTTCCTTCTCCGTATAAAGGTCTTCGATAAGT -TCACTGTGTTCTTGGTAGACCTGCGCCCGGATCATGGACCCTTTATCAGTTCGAGCAAAC -TTGGCGTCATATGGCAGTATAGCAACCAGGTTCCGGGATATTTGAGAGAAAGTCTCTGCC -TGAGAGTTGGCTTCTTGAATTATTGGCCAGATTGCATTGAGATAGTTGTCTTCTGAGAGA -GAGTTGGCTTCAGGGTCGTCCGCTCGTAAGATCAATAGTCCTGGAGCAGCTTTGCCTACT -CCGACCACAACTGCATCGTGAATGAAAGGGTGTTGTTTGATGTGACCTTCAATCGGGAGC -GGTAAGACCTTTTCGCCATTCACTAAATTAATTCGATCGTCCAGTCGTGAGATGTATTTC -CACCTTCCCGCAATTGTAGGATGAGGAATGAAGACGTCTTTTGAATGGAAGGATCCTGGT -GGCTCGTTTGAATTCGAAGTTGTTAATGCCGGATGACCAGCAAGATAAACGCATTCAAAA -AGCGAGTCTGATTCAGAGATGGGCTTCATCCAAACATAGGGTTTGATATTGTCGAAGAAT -TTCAGATAGTTCCAGTAAGGGTCACCTTTTGGTCTGGAAATTGATTCTGCAACAAGGCCA -GCTTCGGTGCTACAATTTGTTAGACTTTGTGTGACTATGTGGGTATCCGCAGAATCCTAC -TTACAGACCGAATGAACCTCCAAAGTGTACTCCTTCACTTACTAGGCGATCGCCGAGCTC -GTCAGGACAGGGTGCACCTCCGTATGTGACCAACTTGCATTGCCTTAGCGCTGTGATACC -GCGAGGATCATCAATCAAAATTTGGAGCATATATGGAACTGCTGCTATAGACTCTGGTTT -GGCTTCCGCAAGGGCGCTAGCGACGAGTTCTGCAGTTAAAGGCTGCGCAGCGTTCCACAT -ATAGGCCGTCTTTCTCATCCACATCGCCTGCAATGCTGTGGATAGCCCATGGAGGTGGTA -CCATGGTAGAGTATTAAAAGAAGTATAACCAGGACCCCTCAGCGGATGAGTCATGATGGC -TTTGTGTGTAAGGAAGAGTGGTTTGGGGGTACCTGTCGATCCCGAGGAATGAAGGATCAA -GGCGATTTTTTCGGGGTTTCGATTGCGATGCAAAACAAGCGATGGCGGTTGTGTCTTATT -GTATAAAGAGCATGAGAGCATTGGACGGCAGCTCACTAGCCTCCGACGGAGGATGTCACC -CAATGTTGAACGGATATTTGGGGATTGGCCATATATGATTGACTCGCAGTTGACCGTTTC -TAGTAATGAGACGCAGGCTTCGCCCGAGAGCCTTGGTGAGAGCATCATGACAGTATGACC -TAGTCGACTTAGGGCAAAGAAGGTGATCACCATGTTAAGGTCGGACAAAGTAAGAAGTGC -CACGGTGGACGGGCCTTGCGTTCGCTGAACATCGATTTGTTAGACTGTTGGAAGAGACAG -CTCAGTGAACGACCTACCTGCTTGAATCCATCATCTATGAGCATAATGACAGCTTGGTTT -ATCATAGCATCCAAGTGTTTCCCGGTGTAGTACTCATAGGAAGCTGCATCGTTCTCGGAA -CGAGGATAGGCAAGAATGGGGCATTGGACGACATCAGCGGAACGTAGCCGAATTAAGTCG -TCGAGGACTTGAAGAGATCCAAACTCCCGCACCAATGGGTTGATATTGCTGGAGGAACCA -GCGTGTGCTGCCTGAGTTCCGCGTTGGATACCCATATTGAGTAGAGAAGTATTCTAGAAT -GAGAGGAGAAGTGCCACAGCAGCTTGGTGTTTCCAGGACTATATTGAGGGTCAAGCGGTA -TTTAAGAAATGAATTTCCGTTCGAATAGGTCGCTGAACTAACCCATATTACCAGGTTAGC -CTATAGTGACTGCATTCGGGGTGCCCGGCATAGGATAGTCCTTCTGCATCGGGAAAGATA -TACACATCTCACGGATTCAGCCAAATAGCAAACAGGCAGGATCAGCAATGTACCATGTTG -TGATAGTTATGCATATGCCAATTCGAGTTATCAAGGCGCGTATCGAACAGTTGGGGCTTG -AGGGAATAATATTGTTCTCGGCAATAGTCACATGCACGTACTCCATGTACAACAGCACGG -GTTTAATGCACAAGGATTGGCTCGCTGAGCGAGTTGTGTCACCAGCAAACACTCAGCTTG -AAATCATAGATTTCAGGTGCCTAGAATGAACAATGGCGTTGATTTGATGGCAAGATTGGC -TTATTCCAACTAGTGTGGCGTGGGCTATCTACTAGTTCCACTACTAGTCGTCAAACCCCA -ACTCTTGGGAATAAATCCCCTGTCGAGCTCTATCCATGTGCGTCAAGTTCAATTTTATTC -TCTATCATCGGAGGGAAATTTAATCACCACCACTGGCGTTCGTTACACTGCGGAGCTGCC -CGAGACACATCCGCCTCAGCTAGAATGAAAATACCGTTGGAAAAGCAAATAAACTCTAGC -CGTCCCCGCTGTGCAAATGACCTTTTCGATAAAATATTGAGGCGACAAATGGTTGTAACG -TCTACTAATTGCATATTAATAGTTGTCAATCCTGCTTTGGGACTCTGAAACTTATTGTGG -CAATCAGGGTGACTGATGACGGCAGTGAACGTCAGCCTTCAGTCTGGAACATTTTGGGTG -TAATAGGTCAAGGCTACACAAAATGCTGTGTCCAGACATGCTCTAATGACGTGTTTGTTC -TGTATTTAATTGGTTTTCAAATAAGCTGAACTACTTGCCGAAGTTACCGCGTAAAACCAG -TCTTACTCTTCCAGCGGTGCTAAACCGGACTAAGACCATTCCAGACAGCCAAGGAGCAAA -GGGCCAGTATCAGGGATGTCTCACTAAAAGTGTATTGTGGGAAGACCCCCTATATCCCTA -GATGCCAGAGAGGAGCCAAGGAGCGACGGAACGGGAAATCCTTGGAAGATGGTTTAACTG -AGCGATAATACAAATCATGTTCCATATCAAATGGCTGGTGTTTGTTCCAGTAATGGAAGT -CTGGTATGAAGAGTCCAGATCTATGGTCTTCCAATCATATAAATGGATCGGGAGCGGGCC -GGAAGAAGCATCCGGAAAGCTGGGAATGCTCTGATCCAAGGTTCGGATCTATAAAATTAG -GGCCGGAGACCATCCGGGATCCATTTGAAGAATGCTGATCCGTTCGATCGGATTTATGCC -GGTCATTCTGCCCTTTTTGTATTTTTTTGCATTGTAGGTCTCAGAATACAGGCCTAGACA -TAGCATTCCAGGTATCGGGAATGAGCCTGCAGCGTTCCGTTAAAAGGCGAAGTACCCGAA -GGTTTGGTTCGGCATAGCATTCCTGAGTATTTCTGGAACAATTAGTCATCACTTCACTCA -CATAGAACATTCTTGAGATCTGAGACATATGGATCGTACCAAGGAACGTAGGAAGATATG -CGTGTAGGCATCAAGCATGACTGCAATTTATCGAGATTAAAAACCAGTCTACAACTCTAG -ACAAGGATGTACAGATAGATTGCAAGATACAAGCTGAGTTCATCAATACTATGATCCCTC -TGTATTGACTGAAGGATCATATTTCCATTTGTGAAGCTAAAAATGCAGAATAACACTGAC -TGAAACAAAATGCGATGATCTTTTCAAGCCCAAAGAGCTAGATGAATTCCTTCTCCGACT -TCATTTCCAACATTTCCAGGACCCGCAGCCAGGAGCGACTAGCCCGGGCAGCATGCTCGA -TCGGAACGGCCGGGTTATCTTCCCCGGTGGATCTCGAGAATACTTCCATACTGACCCACC -CAGTATAACCCAGACCGTTAATGCAGGCATCCATCACTTCTTGAATCGGAAGGTAACCGC -CCTTGTCTTCTTCACACGGAAACAGACGAGCATTCCGCGACCAGGCTAGCAGAGTAGGTA -GGCCTGGTGCGTGAAGAAAGGGAGTGTGTGGGTCGACCCGCTCGCCGTCGGCAAGTTGGA -CCATCATCACTCGGTCCAGATCGATCTCCCGTACAAGCCGGTCCATCGATTTCCGCAGAT -CTTGATCTGCGCTTGGGGGCTGTCGCCCTGTTGCGGAGGTGGGATCGGCCCAGACTCGCG -CTGCAATGTTGAATGTATCAAGACAGATCCCGAGATTGGGTCGATTGGCACGTTTAATCA -AATCCCATGAGTCTTCCCAGGTGTTGATGTGCGTCCCCCATGCGAGGGCCTCATATGCGA -TGCGCACTGGAAATGCGAGATCTGCGGCCTCGGCTAGGTCTGCTGCGAGACGATCTCGGT -CGGAGGTGATCTCCGAGGCATCGAGAAAAGATGAGGGGATTAGGATGAACAGTGAGTCAC -CGAGGATCTTGCATAATTGGACCCAATGCTTGAGCTCGTCAATCCGCTCCGCATGCCTTT -GTCGATCAATGAGGCCTTCGTAGTGTCTGAAAGGCTGGAGAGCTACGATCTGCAGATCAC -GCTCAGCGCATAGATCTCGGATAGCGCGCGCTGCATCAATCTCGTAGTCTCGGTCAGTGC -GGGGTGCACTGTCAGGTGCTTGAGCTCGAATGTTCTTGGCGAGCTCTGCAAGGTCATCAT -GAAAGAGCTCGAGTCCTTGTTGGCCGTTGACAGCGGCAGCATCAAGCTTTGCGACGATGG -GGTGTAAATGCGCAGCTCCCAGACTAACTGTTGCAATAGCTGGCCTCCATCGCGAGTCAA -CTTCAGACTTGGGGGTACTTGAAAGCGAATGCATCATTGAAATATTTTGGGGATTGATCT -GGAACAGTCATTAGTCCTCTCCATCACACTCTTTATAGCATAAAAAGCAATGTCGCAGCA -ACAGGAGACTTCTGTTTCCGTGCTGATAATATCTCAGCTATTTTCTTTTCTTTTTCTTTG -CTAAAAAAAAAAAGCAAATTCCAATGTGGGTGGCGAGCACAGAAAGCACATTCGCAAATG -GTCCGGATTCAAATCACACACTGTAGGGCAAGCATGAACATCCTAGATTATCTCAATCAA -CTTCTCTCAAATGCGGCACATTAAGACGACGGGCCGAGAATGACTGAATAGTGGAGCATC -TAGAAACGATACGCCCCGGGTTTCAGCTGTTTTCGGGGTAAGCTTACCTATAATGCATCC -GTCTGTTGTAGGAATGGATCGAACGCCGATGGAAACAAGGTTTCTTGATTTATTTGCCTT -GGCCGATGTGACTCTGGGGGGTGCCAGGGACTATAAGATGCAATTGAGATCCGGCCAAAT -ACTTTCCTCATCCTAGCTTGTGCAACTCGACGTAGACAACCACACTCTATATCGATCTAT -CGGTGTTTACTTTTCTTGAACAGTCAAAATGACCATCACCACTCCCTATAACGGTTTGGT -CAACCGACTCAGCCAAATGATCCTGAAAGAAGCATCGGCGCCATCTCCGCAAGCCCATTT -TGAGCTACTGCGAATGATCGACCAGCTGAGGTCTGCAGTGGAAACGCCAACAGAGACTGT -GCTACGTCTGATCTACCAGGTATTGTGTTCCACAGACTACGATTAAGTTCATGTCTAACT -AGACTCTAGCCTCCTCAAAATGCCGCGTTGCGAGTTGTCATTGATTTGGGCATTTTCGCT -TTTCTTGCGGAGAAGCGCCACAGAGGAACGGGACTTTCAGCTGCTCAGCTAGCCAATTAC -ACAGGCGCCGACCAAGATCTTATTGGTACCTTTTCCTGCAGAGCCAATAGTCCGTCACAC -ATTGTGGATCCCACTAACTTCGTGAATTAGTCCGACTGATGCGTGTTATGGCTTCTCTTG -GGCTGTGCATCACACCGATCCCCGATCTTTATGTTGCCAATGACAAGACCGTGGCACTGA -CTCAGCCGATTGGGAGAAATGGAATCCCATGCATGTATGTTGGTGATGTGGCTTGGACCC -TTGCATCTCTTCCCGCCGTCACTCACTGACCATTCGTTTAGTTATGATCTTACCTTGCCA -ACTTTGAGTAAGCTCCCAGAGTACCTGCGGACGCATGGATATGCCAACCCCCAGGATTAT -ACTCAGTCACCTATGCAATGGGCCGTTGGACAGAGTCAGTTTGAGTGGCTTTCGAGTCAC -CCGGAGCAGCAGGAGCTTTTCAACTCGTACATGTCTAGCCGTCGTGAGGGTAAACCAATG -TGGTACGATATATACCCTGTGGGTCGACTGTTTGGTCATGCCGTGCCATACAAAGATACC -GTTTTCATGGTAGACATTGGTGGGAACCAGGGGCATGATCTGCGCAAATTCCGAGGGGAA -TATGGTCATCTGGCAGGAAGAATGGTTCTGCAAGATTTACCAAGGGTGGTTGAGCGGGTC -GATGGTGAGAGCGCAGGAATCGAGGTGATGGGACACAATTTCATGGATGCTCAACCCGTC -AAAGGTGCCCGTGCGTACTACTTTCGCTCCATCTTCCATGACTGGGATGACGAAATCAGT -CGGAGGATCCTCCAAAATACGGTATCAGCGATGGCACCGGATTACTCCCGAATTCTCATT -GTCGATTTTGTCTTGCCAGACACCGACACGCCCCTGATGCAGGCTTCGCTGGATATTCAG -ATGATGAGTATTGGAGCTGGTGTCGAAAGGTCGGAACGACAGTGGAGAGATCTCTTGCGT -AGTGCTGGGCTGGAGATCACAGGGATCTGGAATCAGAGCCCGGCAATGGAGTCGGTGATT -GAGGCAGTTCCAATGTTGAAATAGAGCAGATAGGGAAATAATTTGAGCAGATTTGGTATT -TGAATTCCAGTGAAATATGACGGATGAAATAGGTGGTATGGGCAAAGAGAGGTGTGAAGA -GTAAAAGAAATCTAATTATGAATTGTTTTCTATAAAAAAAAGTAGCCCATCAGGGCTCAG -CGACTCAAGTAGGCGGCGGCTGAGCGTGCTGCATCTTCAACAACCACCGTTCCCTTGAAA -TATAATCTGTGCGAGTCCAGTTGCTTCTCAAAGGTGGCCCTGTATCTTTCATTCTTCATC -AATCCTCCTGTCAACACCAACACAGAGGCATCAGATGGCTGATAACCAGTGGCTTGAGGC -TTCACCAAGCGACCGATAGTATCCCCAACAAGCAAAGCAATCTGAGAGCTCACGATAGCT -GCCGCAGTCTCATTTTTGTCCATCAGGTCGAGAACAACCGGAGCAATACTAGCAATGCGT -GACTTCACACCCTGAGAATGGTTCTGGGCCAAGATGTTGTTCAACAGGTCGATGCTGGCA -GTCTCAGAGACCGGACACCCCAGTTTCTCAGCGACAGCCTTAGAGAGTTCATCGCCTCCA -GTCTTGTCAAGCCCCAACGTGATATCCTCGAAGACTCCCAGTGTATGTTGGATGGCTTTC -ATACCGATTGTGTAGCCACCGCCCTGATCGCCGAGCATGTGACCCCAGCCTCCAGACCGG -GCAGCGCGAGCGTATCTTTGTTGATCCTCGGCCCATTTGTATCTCATTGCAACACTCCCT -GTGCCTGCAATCACTACCAGGACCGAAGGTGTACCGAGCGATTGTGGTACACCAGCTGTC -AATAGATCCACGTCGTTGGTCAACTGGAGGTCCTTTTCTCGAGTGATACCGAAAGCCTGT -CTCAGCTTGGGTGCTAGCACTCCCTCCAGCCCAGCCCGGTCAATACCGGCCAGTCCAATC -CAGACAGAGCTGAAGGATGTTTGAAGAAGTTCCTTCCACGAAGACTCTGTTCGTTCCACG -TCGCTTGGCAGGGCATCTTTTAGAGCGGCCTTTGCTGCGTCAAGTAATGTGCCAATGACT -CCGTCTATGTCTCCAATGCTATCGGTTCTGGAAAGGGTTAGCTTCACGATAGGTAGGATA -AAATCAGAAAAGTTATCATACAGATTGCATGGGCCAGCAGATCCCTGACCCACCACTTTG -GCTCCGTCAGCTACAGCAGCAGCGCACTTGGTACCGCCACCATCAATGCACAAGGCAAAC -TTTTGAGTGGGGTTACTTGTGGGCTTCTGTTCGATGGGCTTAGAGATGGCAGCCAGCGCG -TTGGCCAACACTCCACCCGCAGCGTCGAAGTATCCTTGACAAGTCTCAACTGACTCCCCA -GTCTCTGAAACTAAGATAGCCAGCTTGACACTGCCATTACATTGTGCAAGGACAGCGTCC -AATTCAGCGTCAGGTGCAGACTGGCATTTTGCACTCAGTCTGCGTAGAATATTGCGGGAT -CTTTGCTCGAGTTTCAAATTTGACGCCACGAGATCAACCATCTTGTACATCAACATTAGT -CGGTGTACCACAGTCGGGGGAAATTACAACATACCATGTTTCCGTAAGTCTTCCCTGTAC -GAATCATGGTACCGGTACTGAGCATATTCAACACCAGTTTGGTGGCAGTTCCAGCCTTGA -GACGGGTGCTACCGGTGACGACTTCGGGCCCAGACACGGGTGAAATAAGGAAGTCTACAT -CCCCCGAAAGACCAATCGCTGAAGGAACTGTACAGACAACTGCGATGGTCACACATCCAA -GACGCTTAGCAAAAGCAAGACATCCCATCACATAGGGGGTTCGCCCAGAGCTTGCAATTC -CAATGATGCTATCAAGCTCTGGGTTCAGATTGAGAGATTTCATCTCATCTTCTCCGGCTT -TCTCATCATCCTCGGCTCCCTCCTGGGCCGATCGGATGGCGGCATCTCCACCAGCAATCA -AACCGACAAACTGCCCACGAGGAGCGTTGAATGTGGGTGGAATTTCTGACGCATCCAAAA -TTCCAAGTCTATAGAATTTTGATATTTTAATTTTGTTCCAAATAGGGGACGCTGATTCAT -GTGACTTACCTTCCACTGGTGCCAGCGCCGACATAAATGACTCGTCCACCCTGTCGAACC -CGCGGGGTCAATGCGTCAATTGCTCCTGCGATAATTGGTAGACAAGGAACAACACTTTCG -GGGACTTTGTGATCGTCGGAATTTATCACAGTGCACATGTCAAGTGTTGACATGCGGTCA -ATATGCACGGTTCGGGGATTGACCCCCTCCGTTTGGAGTTGAGATAGACTTCCAAGATCC -ATCATGCAGGTTGTTCTTGGGTGATAACTTAGAAGACAACAAGCCTGACGCCGTTTCTTG -TCAAAGTTTTATTTATTTTTCGCACGAATCACGGATCGGACGTTGACAGAAAAGAGCGAT -AAGAAGGGGTTTTTAGACGATTGGGGAGGCGACGCAAGACCCATTTCTATTTGGCGCTAT -ACAAGCCGAATACCTATTATCGCCCCATTTCCCGAGGATGAAGCCGATGCGGGATACAGA -CAGGATTTTTGTACGTAAAGTCAAGGCAGTGCTTGGCTTTCTAGAAAGACCCCAAATGGG -GGCCGTTGCAAAGTAGAGTAGCAACGAAGGATCTTACCTGCATGGGATTGGTCCGGCTCT -CAAGCGACCGGGGTGGATATCTCACTCCTTCTCCAGACTTTTCCCAACAAGGAGGTTGAT -GTGGGAGGATAGTGGAGTTATTCTACGGTATTGGTCGGCCGAGAAAATATTAGGGTCGTG -TGATAAGGAGTTGCACTCTACGCTGCAATCTAAAACACATGGGTTTACTCCACCTTCACT -ACTACGCCAGGTGAATTGCCCAACAGATACCCCGATGATAACACAATCCCTGTCTTGCCC -GTACTCCCATGCAACCTAACCCCCCTATCGCGTGGGGCCCCAGACTCGATAAGCTGGCCC -ACACGGGTGCAGTAGTCCGATATAGGGGGAAGGGAAAGGATTGCTGAAGTCTCGAACATA -AGACCCCGGTTATTCCCTCCGGACTTTTGTAGTTTCAGTCTCATATGCACAAGATGTCTC -AATCAGTCGAAGAAGGGCAAGTTGCCGGCTCTGAGCAAAATGAAAACCGCTCGACCTTCT -ACAAGATCTGGACCAATTCTTGGTTTCAGATCATTTTGATCAGTTTCATCTGCTTCTGCT -GTCCTGGAGTGGGTTTTGCATGGAACATTATTCAATTACAGGTGAACTAACAACGAAATT -AGATGTACAATGCCCTCAGCGGTCTTGGAGGTTCCGGTCAAGTTGATCAGACCATTGCTG -CAAATGCCAGTGTAGCTCTTCTGTCTTGTACTGCAGGAACTGCTCTTTTCATCGTCGGCC -CCATCTTCGATCGTGTAGGACCCAGAGTTTGTCTCTTGCTTGGAGGATGGACCTATGCCT -TGTATTCTGGCAGTCTTCTGTGTTTCAACGGTATGCGGAAGTATCGAACTCTAGGGTGTT -ATATACACAGCTAACAGAACTCGAACAGCAACCAGTAACGGTGGCTTCGTCATTGCCTCC -GGTGCTATTCTCGGTGTGGGTGCCTCCTTCTTGTGGGTGGCGCAGGGTGCTATCATGACG -ACGTACGTCCCTGAATCGCAGAAGGGTCGTGCAATCGCCGCCTTCTGGATCATCTTCAAT -CTGGGTGGTGGTGTTGGATCTCTCGCCAGTTTCGGCCTGAACTTCAAGTCGAAGAGCGGC -ACTGTCAGCGACTCAACATACATTGCCTTGCTTGTCATCATGGCTTTCGGATGGTTGTTG -GGAGCACTCATTTGCCCGCCCTCGTATGTAAGAGTTGCCCAGTTGAAGGTCACACCCGAG -AGCGAGAAGAACTTCCGCCAGATTATGCGCCACTCTGTGAAGACTATCTGCAACTGGCGG -GTGCTGTGCATGTTGCCTCTCTTCTTCTCGGCCAACGTGTTCTACTCCTACCAGCAGAAT -GCCGTGAACGGTATGAACTTCAACATCCGGACGCGTTCCCTCAACGGTTCGCTTTACTGG -ATTGCCCAGATGCTGGGTGGTCTCGTCATGGGTCTCATTCTCGACATTCCCGGTGTCAGC -CGACCCAACCGCGCTCGCATCGCTTGGGCTTTCCTGTTTGTCACTGGAATGGTCATCTGG -GGCGGTGGTTATAAGTTCCAGGTGTGGTCTGATGAGCGCCTGGTTCACGGCCTCAAGCAG -GATATCGATTACACAGATTCCGACATCTCAGTGGGCCCGATGTTCCTTTATATCTTCTAT -GGTGCCTATGATGCGCTGTGGCAATCATTCTGCTACTGGCTCGTGGGTGCTCAGTCCAAC -TCCCCGGCCACAGCGGCCGTTCTGGTCGGAGCATACAAGACCTTCCAGAGCACGGGTGGT -GCGATGGCGTGGCGCATCAATGCCATGGAAAAGCCCGCGATGACCCAATTCGCCATGAAC -TGGGGACTCTGCATGGGTTCGTTGGTCATCGCTATCCCAACCGTCCTGGCTATCACTATG -ACCAGTGATACAGAAACACCAGCAGTCGATGAGAAGCAAGACCTGGAGGTGGAGGAGGCC -CGGGAGGTGGTGAAGTCCTAAAACTTTTCGTATATCAACCACTGGTTAGGGGAAAGAAAT -ATATACCATGGATAGATATACATATTGTAAAGTAGATAAAAGAAGAAAGCTTGGAAGTTT -GACACTGCATATTAGCTTGTTCCGATTGAGCCAAAATTCGGTTGCTCGTAGACCACGATT -AGGGATATATGATTAGGAATATGTACACGGCATGCCCCAACTAAAAATTTAGGATATTTT -GTATGCCGACAAATTCCGTCTCCAGGCGGTTGAGAGGCCCTCCATTCTACCTTACCGGAC -GCACTTCTTCAAGACGGCGGTTTCCAGCAGCAACTTGGTATACTGACGGTGCGCCCATGC -CACCTTTTCATCCATAGGAGCACCTTCCGCCTTTCCGAGACCTTGGTAGAAACAGCTCTT -CAGGCTGAATTCGGCCACGCCTTGATCCTGCTTGACCACAGCGGCGATTTCGAAGAGGCC -GTCGGAGGGACGGACATCACGTATACGTGGGGAGTCACCGCAGCGAATGAGAATACGGTC -TTCGGTCTTCTCAACGACTTCAAAGTGGTCGGTGACGATGGTTCCTACTTCGTATTTGTT -TGAGAGGAGGTCCGACCGGTCCCAGAGGTGGTCAGCGGTCTCGGGGCCACGGTACTTTCG -TTCAAGAATAGTTCGTTGGGGAATGTATCCTGGAGAAGGGATGATCATGTCAGAGTGTGT -TGCCTTGCGACAGGGCTGGTTTCCCCAGTATGGGACTTACCCATGCCACCCCAGACACCG -GCGCAGAAGGCTTCTACCAGCTTGCCCTTCTCCTCAATCAGCGAGGGCTGAATCTTGTCT -AGGGGAATTCTGCGAACGCAGACATCATGAAGAGTTGGGTTGCGACTCGGGTTGAACTTT -TTGAAAAAGGCCGATTGGAAGATGGGATCGGAGCTGTCCATGGTCTCGAAAATGTCATTG -CGCGTGGCGCCCCAAAAGACGCCGACCGAGGCAATACTGCCTGCGATGGTCGTCTTGGCC -AATTTACTGAAGATACCCATAACTgaggagaggaggggataggtgagcaaagaggtagga -ggggagagCTCTCGGGTACCTCGGGGGAAGCTCCGACTGCCAAGACCAAGGCCTTGGGCC -ATTCCACTGGATTTCACCAACTATATAAACTTCATTGTATACTTCGACAGACCAGAGATC -GTTCTCTATTCTCTATTCTATATTCGAAGAAATTACCAAAGATAACACACGTTTTATTTT -AACATCTTCGCCTTAATTTCAAACCTCGACAAATCCAAGTTGGTGTAATAGCTATCGACC -CAATCAGACATCAAACTCAAGGAAAATCTATCTCACTCTCGACTCCTGATTACATGGATG -CTGAAGGAAAAGATCGAAAAGTGAGATCAGAGAACACACGGGAGAGTCATGTGATATGAT -ACTCCTAACTTTCTCCCTCTCTCTTCGTTCTCTCACTCAGTTCTCTATGAGCTTGCACTG -TGACCGGTCGATTGACTACTCTTTTCCAGAACAGCCTACCTACTAGGGTACTATAGTACT -CTGAAATCAACTTAGGTTGTCGACACCTCCCCATAATGGAACTTCCCCGCCAGATAAGAA -CAAATTTGTTGAGACCTACAACTTCCCCTGCTATCCAGTCGATGAATATTGATACCGTTC -AATCTTCGTGGACATCCATCAAAATTTCTACAATCTCCGAATAACAAATCCTTATTTGAA -ATATTCCAAGGAGAACATTATGAGTTTGACGGCCGGGTGTATGTATATCGTTTCGATAAA -CTGCCGCCTAAGCCACGGCCTCGGCAATTTGCGTGCGTGCCTTGTTTCTAACAAATAATC -CTAAGCATCAAACAACCCCTATCATCCTTATCTGTATCAACAGCAATGGAGAGTGATGTG -CTACTGAATGTTTGGTACATTTTCGTTACTGATTGTAAAGAACGGCCAATTAAAATCGTT -TACGACAAACTTGGGAATCCATACACTCGAATACTGAAGTCAGGCATGGGGATAAGTCTC -CAGATGCTCGTGATATGCCGATGCTGAGGACCTTGGAGTGCTTCCTCATAACTGAGACGG -AAAACTCAACCGACAGAGTGGAGATGAACTTGACCATACAAATTCTCAACCAGAACTTTT -CTGCCCATATACAAAGATGGTGGCGAAGTCATATTTTCTTTGAACCGCTGGGCTTCGACT -ATGAAAACGCAATGTTATCTCCACAATCCAATTCCCATTCACGGGGGTCATTCCCAATGC -GTCGGTCCCACCTTGTCTCCTTGGAAGTCCCATGTGCACACCCAAAGTTTGATGTCCGGA -GTATCCTAATTCGCACATACCATGGAGATGCAAGATATAACCTTGGAATGAGCTGTTGAT -CGGCAATCTGGAACATCTTACATATTTTGGGACTCTTTTCATCGTTGGCCATGCGTAATG -TGTACCACTGATATACTTCCAGGGAAGGGCCCGACCCAAGCCTTAATCCAGGCAGTCGAT -CGACATGAGCCATCAATGAAGTTTATCACCCAGAGGCCTTGGATATCGTGAGCCCTTCGT -GAATGACCCAATCTATGACAAGACTTGGGGATCGTCCATTATCAACCAGTCAAGGGCTGT -CCTCAATTTAAGGAACAAAAGTATCGAGTAACAGACCCACCATGTGCAAACACAAGCAGT -CTTCATCAGTCGTGGAGAATCGATGGTGTGCTACGGCCTGCGAAAGATATAAAGTCCCTG -CACTTCGTCGGTTCAATTCTTTTCATCGGCGATATCTGAAATATTTCTGTATACCTATCG -TTATCTTCCTACTTCATACATCCTTCCTCGCTGTGCGAGCTTCCGTTGACCGGCAATCTT -TTATTTCTTCCATCTCTCCACTTCTCTCTTGCGAACTTGCTTCAAGATGAAGATTTCATT -TGTCGCCCTTACTGTCGTTCTCTCTGGCTCTGCCATGGGTGTCCCGTTCTCTACCGGCAC -TCGCAACGCATCCAACTCTTCTGCTGTGTCGACTCCTGTTCATCGGTCAACTGACGCACC -TCCTCCCGCCTGCCTCGCTGCTCTCAGCACTGTATCTCCATCGTCATCTGTAACTCCTTC -AGGCTCCGCACCATTCACGTCGGCCCCTATCACCAGCGGCAGTGCTGTGAAGCGTGGAGA -GGCCATCTCCATCCCGACAGACCTTGCGTCCCTGGCTTCTGATGTTCAGTCGGCGGCTGC -AGCCCTCGCTTCTGTCGGTGGTAGCGCTCAAAAGAGTGGCGAGTCGCTTTCTATCCCTAC -CGGCCTCCCCTCGCTACTTTCTGATGCTCAGGCGGCCGCGGCCGCGCTGGCCTCCGCCGG -AGCCCAGAAGCGAGGCGAGTCTCCGTCACCTCAAGTGAATATTTCATCTCTTCTGGCTGA -TGTCCAATCCATTGGCAGTGCTATCGCCTCTGCGGGTGTCCAGAAACGCGCCGCGGAGAC -CCTTCCTCTCTTGACCTCGACTGGCACTCCCACACCTACTGCTCCCCTGAGAGTCGGTCT -GCCTAAGTCTTATGGCTCTCTTTCCACTGCCTCGCCTGTGTCTGCTTCCTGTACTGCCGT -TGCTTCTCCCACCGGCTCTGCTTCTATGTCCGGTGTTGCTTCGTCGTCCGGCCGTCCTCT -GATCTGAGTGTCGCTTATTCTTCTCCCCTCACAATGGATACCTATTGTGTGAATAGTAAG -CCTTGAAATTTGTCCATTTATGGATATATAGGTCATTGTCAGATTGTTCCCAATTGTCGC -TCGTTTGAATTGGAGATCAATTCTTGTCCTGGTATTAGATGTTACTGTTAGATCGTAACT -AGAATTTATTCAGGAGATCACTTCAAATCCCGTTTTACTGTGTGGTCATACAGAATTTCG -GATTAATACGATTGTCAAGTTGATAGATCATGTGTGTATACGGAGTAGTCGGGGAACTGT -CGATGATTGGCCCCACGTGGGCACAGGCACGGCACAGCCTCACAGCCACAACAAAATCGT -TTCTCCCCTTCTTTTGTTTCGTCATGCTGGAGACCCATCTCATTTTCTTGGCTTTGGGCT -GATCATTAATTTTATTCCGGTTTTTTGAGCGGTATTTCGAGCTTTGATCTCTTTTTCCTT -TGTCTTGACCTTAGCATCTCTCTCTTTGTTGAATCATGGCCCGTTGTGTTTGCATCAGAG -TGTTGTTGAATGGTTGAAACTTGATTTGGCAATGCTCTTCGATTGTATCAAGTACATTGT -CCGCCTTTGGGACCTTATATAGCATGTCTGGTAGTGATGGGGGAGGGCGTTGGGCACGAC -AGATCGGTGAGTGGTGGTTTTCGTTTGACATCCTACCTTCTTTTGCATTCCCCGTTGAGC -CTCATGGAGTATACGAGTGTGCTAATCGAGATGTGAACTAGGTCGTCTTGCACCGTTTCG -TTTGTTTCGCACCCACCCTCAACCTCGAGGTTGGCGCCCATTGGCTCTGCGCCCGCCCTA -CCTGATCGCCCTGATATGTCTGATGCTAACAATGCTGCTGACCCTCGAAGGGTTGCGGCA -GTATAGCGAGCGTAGAGGGGGATTAGTCTTCTTTGGCTACACAGAAGATGTCAACTCTGC -ACAATCGTTCGCCTACAATTACGTTCCTATCATCGTGGCTCTCATGCTTGTCGTGGTGTG -GACTGTCACTGATTTCGATGTCCTCCGTCTGGAACCATACTTCCAGTTGTCCCGACCCGA -GGGAGCGCCGGCGACCGTTCTCTTCATTAACTACAACTTCGGCCAAACTATCCTTACTCC -AATCAATGCTGCACGGCGCCGCCACTGGGTGGTGCTCTGGGTTTCCTTTGTCACACTCTC -AATCCGAATGATTCTCCCCGCTCTACAGAGTACGGTACTTGAGCTGCGCGAGGTGACGGT -AGTCGACTACGATAACGTAAAAAGCTGGCCTAACTTGGTGGATTTGAACACCCAAGCGAA -CTGGATCTCAACGCAGGCCAACAATACTCTTGACGACGTGCTTTCCTCGAATCAGCAATT -GCGCCGGTCCCGATCCTCCAAGTACGCTGTGGCGCCCGTGGAAATTCCCGACAGCCACCA -AGATGAGAGCATAATATGGACTCTTGATCAGACGCTGTATTGGGCGGAACTTATGTGTGA -GAATGTTCCTATTGGGAACAAGCTCACTGTTGCTATTCATGATCCCGAGGCGGCGTATCC -AACAATTTCATGGAATGCCAGTGGTATTGATCTGAATGATGTCTACGGAGGCGCCACAAA -ATGCGCCCTTGACTTTAATTATGAGAGTATCTTCTTCCCTGCCACCGATTACCTGCAAAT -ACGGTATTGGGAGCCAATGATCAGTGCTGCTGCCAAAGAAGCATTTGCGAACCGAACCCA -GGCCTTCACAGAATTCGGCTGCGATCCATATGATCTATATGGTATGCTCATTGGAGTAAA -TATGACCGGTCCACACTCGAATCAGACCATGAGTGAGTACGCGGCATCTGGTACGGCCTT -TGCTTGTGATATAATCTATCACAAAGCGGAGGGTCGCGTGGCTATGCACTCCAACAGCTC -AATCATATCTATTGATGTCGATCGAGGCACCAAACGTAAACTCACAAAAGCCGAATTCAA -CATCGAACACTTCCAAGCTCTACTGTCTCAACGCGCTCCGTTTACCAGCGACATGCTCTT -CATCCAGGAAAATGCTACTTCTGGGGCTCGCACCGTGACTGAACTACCAATCATCAGTCA -GGAGCTAGGAGATATACAGCCTGTGCTGGTACTCGACACATCGACTGTGATGGCAGAGAC -CGAATTTGAAAACAAGATTGAGCGGGACGTCAAGCAGACTTTTGTGCTCACGCTCGGTCG -CCTGTTTGACCCCAATGGCGCACCCAGCATCCTTGATGCTTCACGCATGTCAAACCAGGT -TGCTATCGCCGTCGTCAATTTTGCGGCTCTGTGGTCTGAGCTCATTCTTACCTTAGCCAC -TTTGACTGCGGTGTACCTCCTTTACATGTATAGTTCCCGTGAATTGTTTCTCCAAAGTGA -CCCAGGGTCCATTGGTGCCATGTGCAGTATTGCCGCTGATATTTTCCACCCATCCAACAT -CCTTGCGGAGCCAGTGGCGGAATTCCATCAATTCTCTACTCGGCAACTTCGTCGTATCTT -CAGAAATGCTCGATGTTACTGGCGACCAGGCCCTTCTGGCAACCGGCTGGAGATTCTAGC -TGAAGATGGTTTGTACCACCCTATTCCCCATCATAACCGGAGTATCATAACTGACTGCAT -CATCAGGCTCACCTGTCCAGCTTGGAGAGAATCTCCGAACCCGCGTTGATCCGATGCCTC -ACTTTCTCGTAATTCCTTTCTTTCTTATTGAATTCTTGGCTCTAGCTGGAGTCATCATCC -TCATTGGATTAGTGGTCTCATCTTTACTTCGAAACGGCCGCTTCCGCCATATGACCCAAT -CCGACTCGAGTGCGTTCCAAGTCATCCTTTCCTTCCTACCCTCTATCGTTGCATCTTCCG -TGGGAGCTTTATGTACCTCCATTCACCGAAACCTCAGTGTTTTGGAGCCTTGGGTCCACC -TGCAACGCGGCAACGCCTCAGCAAAGACGTCATTGTCGCTGAACTATTCATCCCAAAGCC -CCTTCGCCATTTTCTTCAAGGCAGTTCGCGACCGGCATCTCTTGTTGGGTCTCGTTTCAA -TTGCGTGCGTTGTTAACATGGCGCTGACTGTCGTCGCGGGTGGGCTTTTCACACAACAAT -TGACAACTTCAACGTTGGGAACTAGCGATTTGACGATGAACTACAGCCAGTCTATATTCT -GGCAGACCGACTTCGCCGCAGAATTCACCGAGTATGACTTGATTCAAACTAGCATCAGCA -GCGGAGTCCCTATGCTGTCTTGGACTAGCCCAAATCAGTCTTTCGTCCCAATACATGTGG -CCAATCGTAACCCAGATGTGACTTATGGTGCCTCAACGCTCGGTATTGGGACTGAGCTGA -AGTGTCAGCCCCTTTCGTCCGATGCTCTTGTCCATGGGACAGCCAATGGTCACCAGCATT -GGCGATATAAACTATTTGAAAATTCGTCTATGGAATGCATGGCACGCATGCCGCCGCTGA -AGAGCAAAGAAGAAGGAATCTCGCTATCTATTCACTTTTTGTCCCCAGATGATGTCGACG -GATCAGACATCTGCCAGACGTCCACCGTTGTAGTCGTCGGTCGCTGGGATTATTTGGCGG -ACACACCAATAACGGATGAAAACACAATTGCATTGCACTGTGAACCCCAGGCAAGGTTGC -AAAACTATTCAATCTCTTTCGATCAGAAGGGACAGATCTGCTGGCATGAACCTGTTCCCA -AAACATCTATCACCCATGGAACCATGTACAACAATGCAACCGTGAGTCTCGGGCAGTTCA -ACAAAGTCTTTGCTGCTATTCCTAGCAGCTTTGTAGGCAATACTACCATGCAGAATGGAA -CATACAACATTTCTTCCTATGATTGGGCAGGTTTCCTTGTTGCCCGTCTCTACGAACGCG -AAGACCCCAACTTTGACGCCCTGAACCCCGATATTCTGACCAACATGACTCAAACTGTAT -ATCAATGGGTCTATAGCACTTATTTCTCAATTTGGCGCGATATTTACCTTGAACCTCTTA -CTCACCCTATCCCGGCCACGAATGCAACTATCACCCGCAGTACTTGGCGCATGGTTCCGT -CGGGCCCATCACTTGCCATCGCTCTAGCCATCATTATCTTTGACACGCTGGTCGTGCTTG -TGGTGTTCGGCACACGACGCGGCCGGTTTCGGGGACCACGGATGCCACGCTCCATTGGAG -CGATTATTCCCTGGATTTCACACAGCCAAATGCTCCAGGATTTTGCCGATACGCACAATT -GGAGCAGTGCCGAAAGACATGCTCATCTCACCTCGCTGAATAAGCGCTATGGTTTCCGCA -TGTTCATGGGTGCTGATAACCGCTGGAGGTTCGCTGTAGACCAGGAATCGGAGAGTAACA -ACCCTCACCATTCCCCAGACGGCGCTACAGAAGTGGAAGTCGACAAAACTACCTCGATTC -AGCTACAGGAGATCCGACGTCCACCCTCTCCTCTTCTTCTtcctcctcctcctcctcctc -ctcctcctcctcctcctcAGACTTGATACCTTATTTATGCGCCACGATCTTTTATTTGAT -TTCAAAACGTTACATCATGTTTCGAGGGTATATATTGATTTGGACATGGGATTTTTTTAT -TTTTGTGAGCCCCCTTGCGATAGATGGCTGGAGAATTAAGCCGGTTTGCTTTGCTCACTT -GCATTTTGGACTTGAGAGGTGACGTCGTGTTGCCATGTTGTAAAAATTATCCGGGACCCT -GGTCTAGGTTCTTTGCAGCGCATTTGTTGCATACTCCCCATCGTGGGACTTTGTTTGGAT -GGGACTTTGTTTGGGATTTGAGGATTTCGAGAGACGGCGTCTCGTAAAATGCAAGGGAGA -GGTATTTGACGGAGGAAAGGATTACATTGGCATATTACGACATGTCATCTATCTCGCATC -ATACCTACTTCCTCTATAATCGAAGCCCAGCGAGACCGAGCAAGACGATAAAGAAGGGTA -CAGTATATGGCGGTGGTGGTGTGACATCTACCTCGTGAGAAATGGACAGCCGGTCTTAAT -CTCGAGTACGTTGAATTATCACGGACATCCGATTTTGTTGGGATCTGTGAAATATATATA -TCCCTCAATATATGTAGCGTCTATAGCGAGCATTCATCATCCAGCGGAAGTCGAATCAGG -CATCAAGTTTCTTGTCATGCCGCGCGGCTCTTAGTCGACCATCAACCGCGCGAAGAAGAA -CGTAGAGAGTTTCTAAAAAGGGGGCTTGAACACCAAGTTCACGAGATTTCCGCACTGGGA -AGCCCAAAATGACATCGATCTCCATCGGCCGTCCACTTTTACAATCCGTCTGCATGCTGC -TTCCAATCCCTGGCATGGCATTGATCTTGTTCATAAGCTGATCAATCAGGTCATCTTTCA -GCGGTACACCACAGGCACGGGCGATGTCAATAACCTCCTGCATCAACCGGCGTGTAAAAG -GCGTCGCATCTTCTGATGAGTTCAACCATGTCTTCGTATCCACCAGTGTGAGAGTAGTCA -ACGAGTTCCATGCGACGTTCCAAACAACCTTCTCCCATCGCTGGATTTGCATATTCTCGA -GAACCTGGAACCGAGTCTTGCCATTTCGAAGGAGGTCGGCGAATGTTCCAACTCGTTCTT -GCTCGACTTGCCTTTCCAGCTTGGGGTTGGGGAATAGGCCGATCTGCATATCCTCGGATT -TGCCATGTGCGACAATTCCGGGACTTGTCTGTGTGGCTCCAACCCATGTCTGTGAATGTT -AGATGAGACTGGGAAATTGACGATAGGAAGAGAACTTGCCACACAGGTTATGATCGTGTT -GCCTGGGAACTGCTTTCGGAAAGGTTCCTCGTTACCAACGCCATTCTGGATAACGACAAT -GGTAGTCCTGCTGTTGATGGCTGGCTGTAATTGAGCTACCACTTCATCCTGATCGATTGC -TTTGTGGGCGCACACAATATAGTCCACTGGTGTAATTTCGGCGACGGATTTGACCACTTA -TTGGACTATCATTAAAACATCTTCGACTTGGATGGTTCATATTTGTCATACCTCGATGTG -GATGGACAATATGTTCGCCGTGATTCTCGCTATGAATGGTAATTCCCTTGGAGCCGCGCT -ATTAATACTGGAACAGCCCAATTTCTAGTGGAGTACTCACATTTACTTTAACAGCCTCAT -AGTTCGAACGTGCCACGACTGACAATCGTACACGGTCGGAGCGACTCAAGACAAAGGCAT -AGAAGGAGCCAATCCTACAGAGTTCAGTATCACGTCGTTGTTGGAATACGTTGAACTCAC -GCTCCCAATCCATAGACGAGGACATCAATTTGATCGGACATTTTGTGAGGATCAAAAGAG -AAATGAAAGGTTTTTGATACTTGACATCCCCGCATCGGAGAATGCCCGCGTGGGGGTTCT -TCAGGATGTAACGGTCTTGGCATTTGATACTCAGAGACTCATAGTCCTATAACTTGGCTA -TCAAATGTTTCTCTAAGCGACTGGTATTCACCATTTCTACTTCCAACTCTTCAAGACTGG -TGACCCCCAGAAAAACCAAATTCATTTCAATCTCCTGTCGTAGAATCTCGACCATTCGTC -GTACCCCCTTCTCGCCATATCCGGACAAACTGAACAAAAACGGTCTTCCCAATCCGACAG -CAGTCGCCCCGAGCGCAATGGCCTTGAGGACGTCTGTTCCTCGGCGAATGCCACCGTCAA -TGAAGATTTGCATCTTGCTCTTGATCAGATAAGGCGCAAACTTTCGAATTTCCAGCAGTG -TCAGAAGTGGTGCTTGTGCCCTGTTTATATAAGTATATTATAACCATTGGTCACAGGTTT -TCCTAGAATATAGCGTACGTATCTTGTGAGCGTCCTCCGTGGTTTGAGAGTACAATGCCC -TGGACGCCATTTTCATAGGCAAGCACCGCATCCTCCACACATTGGATTCCCTTGAGGACA -ATAGGAAGACCTGTCAGTTGGCGAAGCCAGATGAGAATTTCCCAGTCTATGACCGAAGAT -ATTGAGCTGGCCATAATTTTCGCTACCCCCTTTCCTTCCGCGGTATTATCGGTAGCCTGG -AGCACATTAGAAGCCACAGTACAGACGCGAGGATTCGACTTGGGCAGACTACATACCGTC -ACCCTCAAATTGAGCCGTTCGTCCATCTCGCGCTTCCCCACTACGGGCGAGTCGACAGTG -ATCCAAATAGCAGTAGCACCCGCTTTTTCCGCGCGTCGTACAGTCTCAACAGACTTTTGA -ATATCTTTGTTCACGTATAGTTGTTGAAAAACAGGCTGGTTCGGTGAGGTGCGACTCTTC -ATCACCTGTTCAATTGGCATGCTGGATCCGTTTGCGAGAACTTGGACCAGTCCTTCCTTA -CCAGTTGCAGCTGCTAGCGCACACTCGCCCTCTGGGTGAGCAAGTTTTGCAAGGCCCACT -GGAGAGATGTAAACGGGTAGTGTGGTGGAGAACCCAAGTATTGCAGTGGCTGTATTCACA -GCGGGAATCTTTCGCAGGATACGTGGACGAAGGGAAATCTTTTGATATACAGATGAATTA -TTTCGTTTGCTAATCTCATCATCAGCACCGGACGAGTAATACGCCCATGTTGTTGGACGC -AGCCTCTGCCTAGCAACTTGTTCAAAATCGTGGAGATTGATCAGACTTTGCAATGGTGGA -AGAGTGTCATGGTCTGGTTTTGATTGCTTGGCATCCATAGCGTCAGGTTTGTCCTTTGGC -TTCTCCCCTCCAGCGGGATCAATGTAGCCTTGAAGGGCGGCTTGAGGGAGTGCTTCGCTC -AACAACTCGACCGTATGAACCGAGTCAAATTCCTCGGTCGCGTCTTTACCCGCACAGCGC -AAAATCACCGCTGCACCCCCCGGGTGAGAGTCCAAAAAGTCTATAGAAGTCAGCTTCCGG -TCGCAACCCCCGTACAGCTTGTTCCCTACCTGTGACATCGTAGACAGCGCCATGAATCAC -GACCCAACACGATGCCTGATTGTTATGTCGGCTGATTTCCTGGCTTGTGAACTTCATCGT -GAACTATTACTCTTGCAAACTGAAACTCAGGCTCTCATCATAAATGATTCCAGCTCTGTC -GTGCCCAACACCGATGACGATCCCGGAATCCATCCGAGCGTGGGGGCTTACAACTTGGTG -CGAGCTCTCGAGACGTGGATAATCTGAACATCAAGTTGGGTTCAGATGATATCCGAGTGA -CAATGAAGATTTTGATACTCTTTTTTTCAAATCCGATATGTATTTTGGTACGTGTCTATA -ATATAACAGTGCCTTTCTAAAAATGATATAAGTTGATATGAAGCCCGCCTGGCTATACTT -CTGCTCATCCTATTTCCAAGGTGATTTCCCCACATGAACTGGTCACTGGTTTTCCCACCA -ATCAAAGCACACAATGTTTATACTGGATCCCCTCAACGTGGGGGAATTGATACACGGCAA -TCAGCTCGGACTTCTCCCGACGGCCCCGCGTGGGATGGTGTATCGTGTAGGGTTGGCGCG -CAAATATATCTTCGTCACATCCCGGATGCGGTTGCAGCTCCATCATACCTGAGAATTTTG -ATTCCATAACTCACCATGGCGAAGATCAAGTCCATTGAGCACTTCCGTGTAAAGCCTCGC -TGGCTTTTTGTCAAAGTCACCGACGATGAGGGGCGCTTCGGCTGGGGAGAGGGTACATTG -GAAGGGCATTCTCTTGCAGTCGAAGGGGCCCTCGATGAGATCATCACTCGCATTGTGGGA -TACGAAGCGGAGTATGCTTGAACCTGTCTGTCGTCAGAAAGAAAGGTCTAACCACCTTTG -ATAGTGATATTCAACACATTTGGCAGATAGTATGGCGGCTGGGATTCTATCGTGGTGGAC -CGGTGTTCATGTCCGCTCTATCGGGAATTGACATCGCTTTGTGGGATCTAAAGGGCCGAA -AGCTGGGTGTCCCCGTGCACCAGCTCTTGGGCGGCAGGGTGCGTCAGAAGGTGCAGGTTT -ATGCCTGGATTGGAGGTGATCGACCAAGTGATGTTGAGGCAGCAGCGTGAGTTATATAAG -GAAGGCTGGCGTTCAAAAAAAAAACCCCTCAGCCTCTAACATGTTTTGCCAATAGGAAAG -AGCGAATCGCCCAAGGCCTCAAGTGCGTCAAAATGAACGCCACCGAAGATGTCAACTGGC -TCGACTCGCCCGCTGTTCTACAGTCCTGCGTGGAACGATTAAAGCAGGTGAAGGCCCTGG -GTCTCGATGCCGGTCTGGACTTCCACGGTCGCTTGCATAGACCTATGGCTAAGCAGCTGG -CCAAGGCGCTGGAACCTTATCACCCTTTGTTCATTGAAGAGCCTCTACTATGCGAACATC -CCGAAGCAATCAAGCAACTTTCCGAGCAGACTACAATTCCTATCGCATTCGGCGAGCGCT -TATACTCCAGGTGGGATATTAAGAGATTTTTGGAGGATTCGTCGGTAGATGTCCTACAGC -CTGATATTGCGCACGCAGGAGGCATCTCTGAAACTATGCGTATCGCAAACATGGCTGAAG -CCTACGATGTTGCTATCGCACCGCATTGTCCTTTGGGTCCGATTGCCTTGGCAGCCTCCC -TCCAAGTGGCCGTCTCGATACCCAATTTTGTCATCCAAGAGATGTCGCTGGGCATGCACT -ACAATGTTGAAGCTGGAGACATCGACCTGAACAGCTACTTGGTGGATAAAACGGTCTTCG -ATATCAAGGAGGGATATGTGGCTGCGCCCTCTAAACCGGGATTGGGAATTGATATTGACG -AAGACCTGGTACGGAAGATCAGCAAGGAGACAGAGCCTTGGCAGCCTAAGGAGTTCTATG -GACCGGATGGCTCTATTCGTGAGTGGTAATAATGACGATTTCATATCGATGTTCGCAGAT -TTTATCAATTAGTGTTCAAATGTTGAGTATCCGGCACATACGCTTCAAATCACCAAGATA -TATTCTGATTTTACTCTGATATTCTTGATCGTCTTCTTCTAGGCTTTGCTGCCAAGTTTG -AATGCTAAATGGGAAGATGATGTTGGCCGGTGTTTGATCCGTATGACATGTAACTTTCAT -CATCTCGATCTACTATAACACACTCGCAAGAAGTTCCCGTAATAACACGACAGTCTTAGA -AAATTATGATGATAGTGTCCTCATCTAAAATATAATTGAGTCTTTTGATGTACCACTTCT -GAATTCCAAAATTAATTCAGCGGCCACCAGATATCTCATGATCTTTTGTTTTCATTCACC -ATATAAAGAGAGACACCAGCACATAATGACTTTCAAAGCTCTTCTGGCATTTGCAACCCT -GGGACTGGCTGCAGAGTATCCTGTCGTCTCTTTGTTTATTCCAAACGCTGACCCTCAGTC -CCTGTTGGGTGAAGTTTTGGCAGAGGTTCGTATGAATCGGGATGATCTACAAATATCCAG -CTAATTGGGGACAGAAATCAGCAACGACTACATACAGTATCAATTGCCCAAATGGTCGCA -CCAATAGCACAGAGTGCGGTATGGGCCCCGGCATGTTCTTGACCGCAGCCCCAACAAGTG -TTGAATATCTGATCAGTGCTGAATCTGACAATGTGTATGTTTACCCAGCCCTCAACCTCT -GCTGCAACTTGGGTAAAGTTCTGATTGATACAACAGATATAACCATGTTGTCTGCGATAT -GACCGATCTCCCGACCGGGGTATATACTTGTACAGACACTGTCACCGGGAAAGGGGCCAA -CACACCCGGCACCAGCACCTCGACCGTCCCTTGGGACCAGATAACGCTGCTGCCAGTCAC -AATCACTTCCACTGCTACCGCGGTGGCTGCAACAGTAAACAATGGCAGTACCACTACTCC -AAGTACCACCGAAGTTGTGTCCGGTAGCGAGGCATCACCCGTTGCTGCCACTGTCAGTCT -TACCCCCGTGGCGGGCGTGGCTGCACTATCAAGACCTACTGGTCTCGTGCTAGCTTGCGG -TGCAGTTCAAGCTCTGGCAGGCTTGCTGTAGTTATTGAGCTTGATTTTGAATTCACCTGG -CATTCCTTGCCTTTCAATCCTTTCGAAGAGGAGATTGCTACAAAATTCTAGTTTAGTGAT -GCTTGATGCTCCAATTTGAACTTTACGACCATTTCAAAAGACCCACAACCCGCATTTACC -ACTCTACCTCAGCGCATAGCAATATTCTTGACAAAGATCGGAATCGAGACCGGCCTATGA -TAGCTACTATCATGTTTTTATGTCCACATATTGATGATGCATTGAGAGAACTGCATTTCC -GCCCTGGAACGAGTCGTGCTTAAACTATTCTGCAGGCCGTAGGGGGGCAATAGCCATGAT -CGCACGCGAAACTGCAAAGGCCCAGATACGAATCATCCTCGCCGATAAGAGGCGCCCCAT -GAATTCCAGTAGATGGTGGAGGTGGGACTGCATTTCCATACTGCGTACAAGTACAGGGTC -CCGGTGGACAATAACCATAGTTGCAGCAAAAGGCGCACAATCCAATATAGTTATTGGGTC -CTGTTCCTGCAATGCAGGTtccacctccacctcctcctccaccgccaccaccGCTGCCGC -CATCTGGAGGACAAGCTGGCGGAGTTATAGTCTTAGTAACAGTAACCGTGCTCACGACAG -TCGAGCATTGCCCAGGTAATCCAGGCGGTGGTGTTGTAGTCGGGGGAGGACTTGGCAAGC -TGCTGGTTGTACTTGTAGATGGAGGAGCTGGGGGAATTGTACCTAGCGAGGGGGTTGGTT -GACATGTGTGAACACGCAAGCCCTGTGTAAAGGCAGAGAGCCCATCTTGTTGAAGGGGGT -CACTGAACCCTGCTGGGAGAGTTCCAACTAGATGCCAGAGTTAGAGACGTAACCTCATAA -GTTCTGATATAGACACTTACCATAAGCGTTGAAGTTATACAATCCACACACGCACCCGTT -GATGATCTCCTTCAAACTAGTTCCTGACAAAATAGTCTGACCATTGCGAATGACTGCAAA -GCTTTGTGCGCCAACCCCCATCGGAACCTCCTTTGCACTTGCCCCAGCTGGTGCCTCGTA -ATTATAGACATTCCCGCCGCTGTTAACTTGCACTGTGGCTGGGGACGTAAGCAATGATAC -AACAAATACAGAATCTGCCATGGACTGCCAGCCATCCGGCCGACCTATGAAATAATTTCC -ACTGCCGTTGTTTGCTGGTACCATGCAAATATCTGTAGAGTCGCAGTCCACCCCACGTGG -TGCAGGTCTATACCAGTAGACTAGCTCATCAGATGAGATATGACTGTTTGGGGACGACTC -GCCTGCTTTGAACGCAGCAATATATGGCTTTGAAATGTCCAGCCATCCGTCGTGGGGCCT -GTAGTGGTCCATTAGTTGAGTTGGCTAAAATGGGGATTGTGAATGCGAAAGGGTGGTATT -AATATTCTCACATGTCGTTGACCCATTTTGAGGCCCCATCATCAGTATGTGGTGATTTCA -AAGGACCAATGTAATGACATTCTCCATAGTCATTCCATGTGACAATTTCGATAAATCGTG -GTTTCAGCGCAAGTATCTCATTCCATCTATTAAACCATAGTAGATCAGATGGGAAGTTCC -AGTTCTTGCTATAAGAGACTTCGGGTCCGAAATGAGTGAAAAACCACGGTGATACAGCTG -GACTAGAGTCAGAAATCTGATTGAAAATCCAATAGAGAGGAGACACCCACGAGCAATATA -AGCTTTTCCGCCAAGGGCATTGATATATTCTCGATCTCCATCAGCCACAGAAACACTGTG -GCCTTGGGTTGGGGCTTTGTTGTTTCCATTGTTCGGCCACGCCATCCAATTCAAGAGGCC -GTCAACTGTCGAGATGTCTGTTCCAGTAGCTGGATGGAAATTGGGTGCGAAAAAGATAGA -TCTCCCAGCTGCGGCTCGCAATGCTGCAACATTTATTCCATCACCGGCAAATGATGAGAC -GAATACCTTGTTGTCAACCATAAGTTGTGCAGGGTTGCTAGCGTACCGGGCAATTTTCTG -GCCAACTGCAGTAGCCTGGTCAATATGCCACCAGTTGAAATCGAATGACAAGAAGACTTT -CATGTCATTATTGGCAGCAGATTCGTATGCCAATTGGAGTTGTTGATCAGTATATGGGTC -GACCCCAATATTTAGGGCAAATGCATCAATTCCCAGAGACTTAGCACGCTTCATGTCATC -ATCAAAGTCCGCAGCACTCCTTCTGTTGCTGACAATCCCAATCTTTCAAAAGATCAGTAT -ATCTCTGTCACGAAGCAGGTATTCGAGAGCATATTACCATAAAGTGCGCAAAGACAAGTC -TTTCTCCCGACGACTGTCTTTTGGCAGCACGTGTGTCCCGGTCTACCGCAGACACCGCTT -GTGCCGTCATTGACAGCAGAGTAAGGATCCCTAAAATGAACCGTATCTTCGTCATGTTCT -GTAAGTCTCTTTTCGCTTGTACCAATATTTTTATTTTGGCTTATGGTATCGGTATTCATT -TTTTGTAAGTACAAGGTCTTTAAATATATCTTTGGCTCGCCAGATGACTCTCGGGAACTT -CTTGCAGCTGACCTACGAAGTATAATCATCTGGATGATGAGTGTGATGTTTGCTAGCTAT -CTCTCCGAGCCCAATCAGCACATTAAATTCAAGGAACCATGCGGTTATTAGTAGGCTCTT -GGCCAAAAATTGTAGATTGGATGCTTTGGTGTCAACACGGCCAAAATCTTGATGCTTGCA -TGCTTTCCAATTAGACATGGATTAGCCACTGGATATCTCCTAGTATGGTAATTGTAACGA -TTGTTCCATTGTACCTATACTTACCGATGATTATATAGAAACAAGATTGCAAAATCTTCT -TGAAGCGGAACAAGACCCGTTGGTACTCTAGAGAATAGATTGAATGTGCATCCTCTCATA -AAATCTATCTCTACCCATTTCCATTTTACACTATACTGGTTGAAATCATTAATTCTATCT -TGAATAAGGATGGAAAGTCGTAAATTAATGAACCCATTGTGGTAAAATATCGGAAGACCC -AAGGTTCCCGAGCCCTAACTCACCGGCGTGATATCAAACCTCCCGAATGTAACCCACAGG -ACAAATAATCTGTTTCTATGGATATTTGCATATATCCTAGAAATACCAGGAATAAGCAAG -CCGTTTCTTTGATCTTATCTGTGGAGTGTGCCTATTTTCCTTGCTACTCTGAAGGGCATG -TACGACGTCCGCATGAAAATAATAGCGAGAACCTGCGAATTAGAGACACAGGGTTTGCAG -AGATAGTAACTCGGATTCCAAATAGCCCAACGATGAGATTCGAATAGGAAAAAAAACATA -CAAAAAAGTATCCTTTTCGGGGATAACTGTCAGTAATAGAGCACTCATACAGCAGCTGAC -CAAATTGTACGACCAAGACGTTTGTTCATATTAGAGTTTGATGGCTCGGGACGCAAGGGA -AACTCGATGGGTTTCAAAGTAACACTCTTTATACAGAGACTGTCACCAGACTATAAGCTC -TGCGGAGCGTCAATAACAACCAAGAAGTTGGAACCCCAAACCGACCTCGTTCCTGGAACT -GTGTAAAACGTCCAAAAACATCATTGCTGCCGGGAGATACATAAATACTTCCGTGATGTA -GTATAAGCTCGTGGTTCTTGCTTCATCTCCTACTTCCCAAACTGGAAGTAAGTCTATCCG -AAGTTGGGTGGCGCTCGGGAGGTTATTTGCCCCAAGGCGTTCAATTACGTTGTGGTCTTT -TTTGAAATTTCTGCAAGGCATCCCTAGAAAGCGTGTCGGTAGGTATGTCCCGAGGCCCCC -ACAATTGCCGACATGCTTGGCATTACTGTATTTGGAAGGCCACATCCAGGGGCTACTTAC -ATTTACATATGATATCCCGTTATGTGTGCGTGATACATCACAATATCTACATTGCAATAT -AATATTGTCAACTGGCCTGAATCTGTAGGTTACCACCCGAAACACTGCAGACCGCCATGA -ATGGATTTAACCTGTTTTTGCCTTTGGACACTTTCGATTTGGTAGCATCTGGTGGGTAAT -TGGGTTTCTACGAGGTGATCGACTTCATGGCCACCACTATATATTTATGAATGGAGGAGT -GTACGAAGTACGGGACTTTGATTTAGCAAAGCCTCCTAATAACCAGCTAACATTGATTAC -TGGAATTTTCGGCAAAAAGACTTACTATTGTCTTGATATGAGCTAGGAATTGAAACGTCC -GCTGTTCCTTCTGCGATTCATGAGGCAAAGCCGACCTGTCCCGAGTTTGAACAGTCGCCT -GATATCTCAGGCAACGATCATGAGTTCCCAATATTTTTGAACACAACTTGATATCGGAAT -ACGCAGTAATATTTGGCCTTTCAATAATCCTATCCCACACATCAAACCAGAAAACCATCA -TCATAAGCACATAGATTTATTTAACAACTCTCTTTAGATACAATTCCAAGCTCTTGGGTG -GCCCCATTCTTAGTGTCTCTTGGCGACGCTCGGCCTAAAAATAGTCGGCATGCAAGGCCC -AAAAAAGCCAAAATGAAGGGGCGCAGTTATGAATTTCGAAAGGGAGAGTTATGTGATATG -GAGGATTTTCAGGTCTCTCCAATACAGAGACTCATTATAAGATGTATATTGAAAAGCATT -TGGATATAGAATGTTACTGTCCGAGATATGAATCAAATTAGGTGCCGCGCGGCAGATGCG -TGTGCAGTTCAACTAAAGCTCCTCTAGTTCCCTAGTGGCTGTTTTCACTACAAAAATAGG -TCAGACATAGCATAATTACTACGCCGTAGATCTGTTTACCCAGCCTAGCGCCCCGCATGG -CGTTTTTTCTTTTTCTTCTCTCTCTCGAGTCCATCAATTATCAACATTCTGACCCTCTTT -CTAGAACGTTTTTTTTCCTATTTTTTTGTCCAAACCCTTCCTATCCCAGTTCGTTAATTG -CGACACTATGCGGATTTCGATATTCGCACTCTGCACAGTGATGTGCTCCCGGGTACTCGG -TCAGCCCTATGCCTCTCAAATCACATGGATCCCAGCCACAACATCCGTTGAAAAATTATG -GCCAGTCTCAGAGCCGACTGAGGTAGCCCAGCTGTACGGAAAACGCGCATATGGCGAGAC -TCTCCAAGCCAGAGACACAACCGCTACAGAAGACTCGACTAAATCCACAGCCGATGCCAC -AACTGATACAGCCGCATCGGAGACAACAGAAAGAGcggaaacaacggaaacaacggagac -aacggagacaacgTCATCAGAGACGACCGCATCAGAGACAACTGCTTCAACAACTGACGC -TACAACCACAACCTCCGAATCGGAAACTACTACCGAGACCTCAACATCTACAACCCCCAC -ATCTACCACCGAGACAACTAGCTCGACTGCTAGCAGCTCCTCGGTGACATCCACCAGCAC -CTCAGAGACAACAAGTACCCCAACTTCAAGTAGCAGCGCCTCAACCACAACCAGTACTAG -TCCGACTGCATCATCGAGCGGCTTTATGAGCAAAGCGGAGCTAGCCGGTGCGTCATCAAA -TCGCTATGTCCCAAAATACCTATCGTACTGACAATACCCAGAATGGAATCACAGGGGTAA -CATCGCAGCAATTGTATTTGGCTGTTGTTTTATTTCGGTATTCCTCGGTGTCGCCATTGT -TTACTATGCGCTTGGTCGAGCGAAGGCGCGACGAATTGCGGCTAGCAAACTGTTAGATTC -TAGTCAGTCGTACTCGAAGATACCGCTTGTGGCCGTAAAAGAACCTCCTTCCACTGATCT -AGAGGTTAATCGCTCCTCAACGATGCACACCAACAACTCTCAGTCCCAATATTTCCCTGC -GGTCAGTGCGCCATCAGTGTCGAACTATAGCGATACTCCCTCCGCGGTCTCTCCGATCAC -GGCAGATCATTCATTTCCCAGGGACCATGGAACGCGAGCATATCAGTGAGCAGAATACAC -ATTTAACTCCAGGCGTTGACAGGCATTGAGTTTGTTGGACATTCAAGAATTCACACTACT -CCTACTGTTATGGTGTTCTTGCCCGGACTCTGATAAGCATCATCATTATTGCTAGCGTTA -TGAAATGACATACACATTGCATTGCTTCCCAAACGCATGCAACCACTAACGAAATCACCG -GAGTAATATTAATAGTGAGTGTAAGAAGAATCATAACAGCTCCAACCGTTGCGTTAAAGG -GGTCTTCGGGATGTATCTCATGGGGAAGCGCGTGTATGGTTATAGGATGAAAGCTCCGGG -CTTGATATAGGTTGACCACCGGATTCAACACAAGCCATGTTCTTGGTCTTCCACAAAGGA -GCGAACATTTCAGGCTCGGTTTCCAAAAGCCTTTACTTCTCTAAATAGGTGGTCCCTTCT -ATCATGTAGCTGCGGGTGGGAAGACCAAATGATAAGCAAATTTTGTAAAAGGTTATATGC -CATTGAAAGAACCAGCAAGGTTCAATCTCACTTTGTAGTGCTGGTTTGAATAATCTGTGT -ACTAGTCACTATTTGAGTTCGGCTTCGATGATGCCTTGTCCATCACCGGGTGGTGCCCAG -AACTTGGTGACCTGGAAGCCAGCACTTTCTAGCAAGCGAGTCCAATGGGACTGCGAGCGC -TCCATGCTGTTACAGAAAATCATCATTTCCCAATCCCACATCGCCGGTAACATAGCACCA -TCGCGATCTGCCAGTACAAATTCTTCAATGATCAGTTTCGAATAACCCTGCTTCATTGCA -CCAGCCAACTGGGTAAGAATGGTTTGATTCTCCTCGTCGGACCAGTCATGAAGAATGAAC -TTCAGATAATAAATACGAGCGCCTGGAGCCCATGTCAACCATTATCCAAATCAAGTTGAA -TCGTGAAACTTACCATGAATTGGCTGTGGTTTAAACAAGTCAAATGGTAATTTTTCAATA -CGCTCCGTGTCTATAGTGCTCTCTTCCAAAACATATGGCAGATCTTCGAGCACCAAGCGT -CCTTTCGCCTCCGGATACCGTTCCAAAAAGGCTACAAGGTCATGGCCTCGACCCCCAGCA -ACATCGACCATGAGTGTTTCCGCCTCAGGATTAGCACCAGATAGAAGACGCTCTTCCACC -GGAAACCATTCCAACCAACTGGGTCGCGCACCGCGATCACCCTCCATGAAAGTATTGCAT -AGATCAAGTTTCTCGGGATTTCTTTGGAACCAGCTCCAAATCGACTCGTCGATCTTTTCG -GCAAATTGGAATGGACCTGTAGTCGCACCCTCGGGGTTCTGGTACTCGTGGGAACGGAGG -AACTCGGGGATCTTGGCAAGAGTAGGCATGCCAATATCAAATATAAATTTGATCACCGAA -ATGCTGAGTCGGTCAGTCAGCTGGCGGGTCATCGTAGTCGCGATATAGACGCGCGTGTCG -CATTCGCCAACATAGTCGATTCCAACCAAGATACGCAAAACTCGTTCTACCCCGACAATT -CATTGTCAGTTACCCATCACCCACGCAACAGAGAAGGCTCAGTAGGAAATAACATACCAG -CCAGGATCGGGCTGGCGTTTCTCACAGCTGCCAGCTCATCAAGCGACACTGGCCCATTAC -GCTCCGTTAATGTTGTAAATATATCCATGTCGATGGCAATGCGCACACACATGGCGTGAG -TAGGCTAGTCCGGGTTAGCACTAAGATCATCACCATGAATTGCCTAATATTTACAGAATA -AGCCAAGTGATAGACCGAATCTTGCGGCTTCTGTAGTGCATTCACGAGTTTTTGTGCTGC -CTTCAGGGCGGCGACGCGATCTGCCTCTTCATGCTGCCTTCGGAAAACCTCCAGTGACTC -CGTCACTTGGTCTTGTAGTTGGTCAATTGCTTCCTTGGAAACAGATGCTGCCATTTTGTG -TGTTCTCAATAGAGATATAAGTAGGAATGTATAGGGAAGGCTGTTGCGACTTCAAAGGTG -TAGAATAACAGCCAAGAGCCAAATTCGGTGTAGGGGAAATTGACGATTCCTGGGCCGTTC -AGGTTCTTTTGTCCAACCCCCCTTTTCGGATGTCTTCGCTAGTGATCGCTTTAAATTAAG -GAAGTGGATAAGCCGCGCGGGGATGTTTTGGTCCATTTCAAGATTCAAGCCCACAGCCGG -GAGACGACGGTAGTAGAAACCGTGTGTGACGGTTTCAAAGATCAACGTTACACCCAGCCA -ACAATTCACTTTCGTGTATGTGGGTTGTGAGAACACCAGAAGCATCTCGCCTTAAAAGAA -GGCGCATTGTTCATCAGATTGGCGGACTCCGGGCCGAGCGACAATGGGGCGAAGGATGAA -GTGGAGACACCTGTACAGTTGACGTGCCGTGTCAAACCCTGCCAAGATCCGCCGTTATTC -ATTCAGGCCAATAAGACTATCTTGAATTAATTGTGGAGTGAAACCTTACAGGGATGCGTT -TTCTGGGACGTTCCAATGTCCTGTCAAGAATATTTTGCGAATACGATGGATCAGGGGGGC -TGTGATTGCCGTGTGTTCTTGTCGCCTTTCAAGTTCGAGTGGAAATGACCCTGGTATCAA -GTCCAACACACAATGAGCGGCGTCAGCATCACGGCAGTGTCGTGGTGTTTTGGTGGTGTT -GCCCTGGGGGTCGTGGCAGTGCGGTTATACACACGTGTGGTGGCTCTGCGACGTGCTGGC -TGGGATGATACATTTATCACTTTGTCACTGGCAAGTGCCCTAGTATGTTCAGCTCTGGTC -CAAGTCGCCGTTTCTTACGGACTGGGACGCCATTTGAACGACATTTCAGATCCGAATCAG -CAAGTTCAGGCGATCAAGTACACAGTCATAGCACCCAACTTCTCTGTTATCAGCACCACA -ACTGGTAAGATAAGTGTGACTATCTTTCTACTCCGGCTGATGGGGCAATCTGCATCCGCG -CCGCGCCGATGGTTCCTATATATCCTCATGGCAGTCTCTATAGCGTGGAATGTGCTGGCT -ATTGTGGCCATCATTGGATTCTGCCGACCCTCGGAACGTATCTGGAACCCAAAGGTGGAA -GGTTCCTGCTTCAGTCTCAACTTTCAACTGGTTGCGGGGATATCTCAGGCTGCCTTTAAC -GCGTTCAACGATCTGGCCCTCGCCCTTTTCCCAGCGTATATCTTTTGGCGCGTCCAGCTC -GCCTTTAAGATGAAGTTCGCAATAATCTCCGTCATGGGAGCCGGTATCTTGTATCTCACT -CTTTTCATCACACATCAAGAGCGTCGCGCTGATTTCATAGTGCCGCGGCGGCCACGGTGG -TCAAGTGCATTCTGCTCAAGAACCTTCCCGCCCATGCCGACATCACCTGTGCGTATTCCT -CGAGTTCCATTTCAGTGAGCACACCAACTAAGCGCATACAGGGTCTTGGGCCGACATCAC -TATCTGGTATACGTAAGTGAGCTATCCTTTTTGAGCTTCTTTCTAGGATTGTTAATAAAA -ACTTGATAAAATCTAGGATCGAGGTAACTATTTCTGCATTATCCACGCAACCGTAGCTTA -TTGACACAATTAGATGTACGTGATTATCATCTGCGCATCACTGCCTACCCTCCGACAATC -CTATTCAGTGGTGCTGCAACGCTCCCGCAAGAGCTCGGCGTACATGCGCTCAGAATCGAC -CCCTCGACAGAAGCCAATCCCCCTTGTCCGCCGTGCCCCAGATGCTTCGTTATTTGCCAC -TCAAACCGAGACAAGAGTCGACCGAGGAGACAGCCGTCATTCCAGTCAGGATAATATATT -GGGGAATACAGGAATACAGAAGACGACAGAGGTTCGCGTGTCTCAAGAGCCTCGGCGTGA -GGATGAGGGGAACCCGCATTTCCTTCAATTGAATCCCTTCTGGGGGCCTTGAAATAGGCC -TTCATGTGGAAGATGAGTTAGTCATTCTGCTACTCACGTCCAAAGTTAAGATTTGGGCGT -TGCCGGCACAAAGCCTGTGCCCTCCATACAACTAATATATTGGCACTTCACCTCAACCGC -TTTCTGGCTTACTATCCCCTCCGATAGGGAGATCTTCCCGTCGTTCATTTCTCCTCCCAG -TCCATAACCATCAAGAAAGCGCGTTCTCAGGTCGACATGTCCTTGTGATGATGGCTACTC -CTCTCTATATAACCCAAATCCTTGTATCAGAACAACTGAACTCATTTGTTGTAGTTGATA -TAAGTCAGATCTTCGGTGGAGTTATGTGCATTACAAATGACCTATGGCATACACTAGAAG -TCAGTGAGTGTTTAGTGAGAATCTTACTTCATTTCCCGTATTTACTTGCTACTATTCAGT -CAATGCTGTCCTAATGCCAGCCGTCCACCCTCAAATGAGCACCTCACTTCAATCAAGTCC -AAAACGGCCCTTTGAGGCCACCATGGGGATCATAATGCCGCTTGAGCTCCCGTAGTTTTT -CAAAGTTATTGGCATAAATCTGAATCTCCTTGTTAGCTTTCCCACGAATCGTAGAAGCAA -ACAATAATTATTACCTTTCTTGGATCATTGAAGTCCTCGAGCGAATGAGTGAAGTACTCC -AATTCACACCGTTCATCACGTCCGCATATAATGAAAGGTGCCTCTCTGATCTCCCTATAG -GCTAACTCTCTGCTCTCTTCAGAATTGCAAGGAGCACCAGTCCCAAGTTGGAGAACAGAT -TGACCGCGCGTACCCGGCCAGGCTCGCAGCGCCGGATCTACTGAATGAAAAGCCGGCTAA -AACCATCAAATAATACTGGATCAACAAAAGGAGGTCTCGTTTGAGAAGGTACTTTGTGAT -TACATACCTTCTGCATGAACTCAATCAAGACATAAGTCCCACCGAGAGAAAGAGATGGGT -CCCTCAGGGTCAAATTCTCAATCCAATCCCATGCCCTCGAAATGAGTTGTTCATCGATCT -GGGGAATGGTGAGGCAAGACTGGAAGTTCGTGGTTCTTCCTTTGGCAGCTTCGAGGCCAT -CTTGATTTTAGTGTCAGCTCATTCATATCCCACTGCATTCTCATATTTGTATATTTACCC -CAAATTTGGTTGCACTGTCGAATTGTCATGGAGCTGGTTGAATCGATCGCGCCTTCAATA -TCTAGAGCCCAACGAAATCCCTCTGTACTTCTTCCATGGACTTCTCCATGCGCGTCAAAC -ACTTGAAGGCCAATACCCGGTTTTGGCGCGTGTCCCGTATAATGACCTTGACTAAGGTCT -AGAGGGAAAATATGCATTGCCATCTTTGGATCTTTGCATTCTGTGGTGAATCTTGAAATG -GCTTTTGCGACGACGGAGAATGCCGACCTCGGATAGGAGATTTGGCCTGAATAGATTTTG -TCCGGGTAGGGGTAAACTCGCAGTTTAGCCTCTACCAAAACTGTATATAGAAAATAAGTA -GACAGTCCATCTATACCATACGTGGACCTTGACTGACCTCCAAAACTGCCACCACCGCCT -CTCAGTGCCCAAAGCAATTCTGGCTCAGCGGATGCCCACAAAATCCTGCCGTCTGTCAAG -ACAACTTTCGCGTCCAGCATATTGTGCGGATCAGCACCAAGGCCTCTTTCGGATGACAGC -CAAGATATTCCGCCTGCAAGTATGGACCCGCCAACACCGACGTATGTGCAACGAGCGCCA -ACCACTATAGTTTTATCTAAACTTTTGCACTGTGTATAGCATGAAGTTAGGACGTACCAG -CATGGCCAGGAACATTCTCCTCTATTTTTCGATCAACTTCTCCCCATGTTTGCCCTGCAC -CAATGACCGCGGTCTGTTCTTCTGCATTATATTCGAATTTGTCGAAAGCAGATAGCGAGA -TGAGCACATCGCTGGCAGATGAAGACCCTACCCCAGTACTTCGAACTGCCCAATCTAGGT -CAGAGTTCGATAAGCAAGAGAGTAAAGATGCGAGAGACGAAGTTGTGGAGGGTCGAGCAA -TGATCTTGGGCTTCAAATCTTTCTGGGCAGCCCAAGTGCAGCTCTCTCGTCCGTAAACCG -GAGAATCCGGGTATATTATTTCGGAGGACGAAAGGGGAAGCTCGGCGATGGAACTACGAG -GGAACATCTTTTTTTCTCTGAGGAAATCCAGGATTATGAAGCTAATAGGATGAAGCCGAT -TGAAGACCTAGGAGACATTCATCTTCAATGGGAACTGGAACTACAAAAGGGATATATTAC -GTCATACAGGCCGACCTCCAGCAGGGAATGTGGACGTTTACGTTGAATGGAAGGACCCAG -AGGGCATTCGCAGACTTTCTAGGAACATATCTGCTTTATTCTCAAAGATGATAAGCTTGT -CTGAAACCATTCTATTCCTATAAAAATTAATGTGTTATAACCCCGTACTATTGGACTGCT -CTGTCATCCAGGCCGAGCCAGAGTGGTTAGATCTAGCCGCTAGCGTAGCCAAAAGCTCAT -CGCAGAGGCTGCTCAGAATGGTGCACAATCAATTGCCTTTCCAAGAGTGCTGGGTTCCTG -GATACCCATGTTGGATCTGGTAGCTATATTCTTACCGTGTATATTAGTAACGGATAATTG -TCCTGGGAGCCATCTTCTCGATGTCGAATTACATATGGCGTATATCGAAGACTTGATTGT -TCTCGGTCTCCCAGAGATGACACAAATTGAAGCTTGTGCTAAAAGCCACTCAATTGCGGT -ATCATTTGGATTTTCGGAAAATGAAAACTCAGTTTATATCGCCCAAGTAAGATTGACAGC -GACGGAGAGAACAAGTCACACCGGCGAAAGATGAAACCTACTCATATGGAGCGCACCATT -TTTGGAGATGTCTCCGGAGAATGTTTCTCTAGTGTTGTGGAGCTGCCGTTCGGACGTGTT -GGAGCTATTTCTTGCTGGGAATACATTCAGCCGCTACTGAGGGATTTCATGATATCTCAA -CGAGAGAATATCCACGTCTCTGCATGGCCCGGCCTGACTCCTCATGCCGGAAAGACTGAG -TTCTGGTCTATGTCTGCCTAAGGAAAATCGACACATCCTATGCCTACAATTCAGTATATA -GTCTGACGTACTTTCAAGGCTGCCAATATTTGCCACAGACATACGCAGTCGAGTCCCAGA -GCTTTGTTGTCCATTGCACGGTTGTAATCACAGAAAAGGAGTGGAGACAATGATCACAAG -TGACGAAATCTCCATGTCTTCTCCAGGAGGAGGCAGCTCCGCAATATTTGGTCCCAATGG -GAGGAGGTTGACTAAAACTATGGATGGCATTACCAAGGAAACAATCTCTGCAGACCTTGA -TATGAACCAAATTGTGACGGCTAATATTTGTGCAGATGCTTCAGGTCACTACAGCCGCCC -GGATCTTATGTCGCTTAACGTGTGCAAGAAAGTGAAAAGGATGGCGGATAAGACAGACAG -TTGCGACAACTGTGGAGTAGTGACATTACGATTATAATTCCATCAATCACAGTGGAGATT -TGGCATGACTTCAAGGCTTTTCATGTCCGAGGCTGAATCTTAATGCATTATTTTTGGGGT -GTAGATATGGTTAATCATAGTCAGGGCTCATTTGTTCATTCTCTCTCTAGTCCTGTTCAT -TTTTTAGTCTACTTGATTAGTTGGACGATATGATTGGTTCAGGGTCATTCATATCATGTT -GAAATAACCGTCTTAGAACAATGGGCAGCAACTCAAAAACCACCCTCTATCTTCCCTGGG -TTCCACTGCATCTATTTTCTCGTCACTATAGTCCATGGACGACTCCTTTGCGAGATTAGG -CGTATCAACATAATTCTCACGGAACAATCCGATCGATTGAATTTTCAGGCTGAAATCACC -ATGTTGGTCCCCAAAGAAACTATACAATCAGCACCCATTTATATATTCCACTGATATAAA -GCTCACCTTCGCATCATGATCCGGAACCTTCTTATATGCGACAAGTCCAAAGGCCTGGCA -TCTTCCACCGGCTTCCCCCGATATGTCGGTCTCAGGTCCTTCCATGACATACGCACTGTG -CGCCCTATCTCACTCGGGCGAAAGTCATATTCCCAGACAAGAGCACTTTGTTCACGATTA -TCTGGCCTCCGTTGTGGGATTTCGTCTGTCAGGGAGAAAGTGTACCGTTTCCCGTCCAAT -GGAACGACCTCTAGCTCAAGGCCGTCGTATCCGGTGAGGTCCCATATTTGTTCGTCGCCG -ACCGTGCGTTGTGAGGCAAACCCTGCGCCGCCAAGTGTTTCAATGTCGAGGTTGCCGTGA -AAAATTACCCCTTCCTGGGTAGGCTCGAGTTTCGACACGGATGATCCACCACGGACGCGG -TCGTCGGAGGCAGTCCAGCGGTTGATGTCCCAATCCGGACCGAAAAGGATTTGTTTATTC -GTGGTATATTCCATCGTGGACGCAAAAGGTGTTGTGAGGAACGTGGTCGAGATCAGTGAC -AGATAAATTGTGGGATTATTATGTATCGCGAGCTGGGAACTGCAATCCGATCGATCCACA -GATCGGCCGGTAGATTCGCAAGTGACGTCATTTGTTAAGCGATAAAGAGAATCTCGCCTG -ACGAACTTAGCTATGGATTCATACCAGTACAAATATCTATCTATTCAAGTGCTCGATCTG -AACTTCAAAATAACATTGGGGTATATTGACTCTATAACGTGAATTCATATCTATTAGACA -TCATAAGGTTTCTTTTCAACATCATAAATGGGCCTGCATGCCAATTTACTTCTTGTTCGC -AGCCATGGACCGTAGACCTTGAACAGAACAGGAATAGGGATCATCGCAATTGCGATACAT -CCAAGCATTGTCCCCGCCCATTGGACCCCTAAATTCTCCATCATTTGCTTGGAGAAAAGT -GGGAACGAGCAGCCAATAGCTGAACGTAAGATTGTATTCGCGGCAATAGTGGATGCAGCC -CTATCAATCGAGTTAGTCAATTGTTTTTCACTGGATAGGCAAGAAAACATACAACGTGGG -GTAGCAGTCAACAATATAATTGAAACACTGCAAGAAGATCAAAAAGATCCCCGCTCCGGT -GAAGAGCCCTGAGGCTACAGAAGCCATCCAGTGAATGGATGAAGTGTAACCGGTCCAGCC -GAACCTACTATGTATTAGCTCCGTACATCACACAGTGTAGACGATTCCAAGAAGATAACC -TACCAAAACATTCCTCCGGCAAACAACACAGCGCCTATGATCGCAGGTGGAAGGCGCCAT -TCAGGGATGGGCTTATCATCGTTTGCCGCCAGCTTGATCCTATACGACCCCTGTATCAGG -AGGATATAAACACCGCCAAGGAGTTCGCCGATGATAATTCCGATAAATGCAAGTCCACCG -ACACCCATACTCATCCCGTAAACACCTTGGAAGACAACAGGGTATGCGCCTAACAGGGCA -TAGATGAGGCCATAGATGAAGGAGATGTACAATGAGATGAGGAGCAGTATGGGCTCCGTA -AACAACATCACAATGGGTCGGGCAAAGTTGTTCCGCAGAAGCTCCTTCACATCCACTTCC -ACTTCATCTTGCTTCGCATGAATGCCCCAATTGCGAGTCTGTCGGCGCAAGGTAGCGGCC -TTGCGGACAAGAATGACTGGAGCATAAGTCTCATCTAGGAAGGCAAAGACTAGGACAAAG -CCTAGGAAAACCATTATAGCAGAGATATACATCGTCCATCGCCAACCCAGAGAGCTCGTT -GCAATGAAGCCCCCCACAAACGGTGCCGAAAAAGGCCCGATAAACACGGCCATGCAGAAA -ATCGACATGACGACACCCCGTTGAGTATTGTTGAACAAGTCTGCAAAAACAGCGGGAACG -ACTGAAATGGGGCTAGCCGAGCACATGCCAGCAAAGAACCGACAGATCATAATGGTCTGT -ATATCTTTGGCAACTGCACATGCTATGGTAAAGATGGCACATCCAAACAGTCCAATGGAG -AGTGGCCAACGTCTACCAATCAGTTCTGATCCTGGTGCCCAAATTGTTGGCCCGGCTGCG -AAGCCAAGGACATAGAGCGTCACTCCAAGTGCGCCAACTTCGGTGCTTGTGTTGAACTCT -TTGCTGAACCCAGATATAGCAGCCGAAAATACAGCACTAGTAAACGATCCGGCGAATGTA -CCATAGCATAAGATGCAAACTAGGATTACTCTATTGAGATATTAGCAAGTGTATCCTGAT -GCCAACGAGGAGAAAGGATAGTGAGCATACCGTCTAGATGTGGACCAATTGTATGGATGT -AAGGGATCATTGGCTCCATCAAACTCCACGACATAGCCTTCAGAATCTGGAAGCAGAGGT -GGATACTCTTTATTGGCACCCATAGGAAGCCATTTGTCACGGGGGGTAGGGCCTCTTGTA -GAGCCTACGGTTGTTTTCTGTTGGAGTCGATAAGTGTTGATACGGGTAAGCTCAAGCGTG -TCGATTTCCTCTATTGTGTCGTCGGATTCATGGCTGCGAGAGTCCATGTTGTAATCGGTT -TCCGGCTCATATTTGTCTGGGTGTTGATTGATATGCTCCATTGTATGTTGTCATGTCAAG -ATTTTGGGATTAGATTGAGAGGTTGTGTGGGAATATACAAGTTTCTAGTCGAGGAAGGCA -ACACAGTATATGTCCCTCTTTCGGTCAGCCCATTGATTACTTTTTTTTTCTTCCTAGTCT -TCTGGGTCTTCGGCCAAATCCGCAATCCATTGTGAAATGGGTCCTGATAGTCGCAAAGCG -ACCTCTACTAGGGCTAAAAATTAGTTTTAGTAGATCCATAATTTTCCGCTGTGGCTTTAA -ATGCTTTAAATGTCCACTATCCAGTCATGTTGCCCCAGTAGTGTAAGTCCACGACCAGTT -AAAAATCAGGTCAATCTCGGCTATGATCGGAGATGTGGCTGACATCCGCTGGAAAGTGGT -CCGCCGAGGGCAACCGACCCTAGCCGTTATCATTTTGTTTCAGGCGAACGAAAACTCTTC -AATTTGGCTTTGCTTTCAGGCCGAAATGAATCAACAATGACACCATTTTATATTGTAGAC -CCCCTCCAGACCGATTCCGACTTACCATATAAAACCGGTTTGGCGTGTTCCCCGTGCCGA -AGCCGTCGGGCGGGACCCAGTTAGTTCCACTATATTCGGGATGCAACTCAAGAAAGCCGC -TTACTACCCAAGTATATGTTAAGTAATTTTTGTAATGGATGGATTCCTTGATGCCTTTCT -TTACCTTCACTGATCCAAGATGGCAATATTGTCGGGCGTGGATATGGCACTTCCTTTATT -CCATTCTAGTCAAAAGAACACTGCCAGTGATTTGCAACAAGTCCGGATCCTTGCATGGCC -ATACCAATTACATACTATGATCACAATCTTCCTCTTGTAATTGCTTACAGAGGTCTTTCA -GTGTCCAGCCAGCCCAGTCGAAGTCGCTGATCTCAAGATAGACCTGTCCACTCAATCGAA -CCCACCAGGAACCCTGAAAAGGGAAGACTGGCATGTAGGTGTTGCATGACTTGATCAAAG -TCTGCTGCATCCATTCAGTGACCGTGGTATCGGGCTTGTCTCTCTGTTTGACCAGACTAC -CATCTCCCGAAGTGGGCTTCCCGATAGGAAGAAGGATGTTAACCATACAGCAGTCTGTTG -AAGTATGGGTAGCATTGTCCAGGATCACGGTCCCTAGAATGTCTGCAACCCTCTGTCCAC -CTTGACGAGAAAGCTGAATGCAGTAATCCCGAATTTTGGTCTCGCCGCCACATACCGTCT -GTCTCCACCGGATGGCTTCGGGCACTGCTAAGAACGAAAGGTTGTCGGCTGTTCCAACAT -ACTCGAAATTGAAAACGAAGTCTGTCTTTTTTTGCACTGGACTGACACAAGATCCTTTCC -CAGCGGATCGAGGAACAAACCCGTGGCTTGTTGGTATAGTGCTTCGCATCATGGCTTGAT -TGCGCTCCGGGACGTAAAATACAGCACATGAGCGAGGAGTAAACAGCCATTTATGGCAGT -TGCTTACCAAAAAGTCTGGGTCCAGTGAGGATAGATTGAGATCGATTTGCCCGATTCCAT -GGGCGGCATCAATCAGACTCAGTACACCTTCTGAGCGGCAGAGAGCTGTGAGTTCCTCGA -ACGGAAGCCGTAGAGCTGGGTTAGACGAAATTGTGTCAAATATAGCAATGCGCGGGCGGT -AACCTTCTGTGCGTGAAGCCTCGATTGCCTTTTGAAATGCAGCAACCATCTCAGAGTCGC -TAACTGGATGTATGAAACCAATCTCTCTAGTGCGAACCCTGTCTCCACTGGCTTCACAAA -CATAATTCGTTGTCTTGCCACATGCTCCATAGATGACATCCAACTGAAGAATCTCATCTT -TTCCGTCACTACTCCATACCATGTTTCGCATAACTGTATTGACGCCAGTGGTTGCATTCT -GAACAAAGACAATGGTTGACTCCGGTACACCAAGGAAGTCACTCATGGCAGCCCGCGACT -CGTCCAACAGTTCGGGGAAACGATATTTGATGAAGGGACACGGAGTTGCCTCGCATTCAT -CGCGTAACTCATTTGCTTTGTTGCGAATGGCGACAGGTGATGCTCCAAATGATCCTAATA -TATGCAGTGAGCTGGTGAATTAGCATGTGGAGAGCCCCGTTTGTCTTACCATGATTCAGG -TTTCGATAGCCGGGGTAAAAGGAAAAATGTCGGCGCATCTCATGTCCAAATGGTGTTGGC -TCATATTCACACGGAGGCGACATGTTGTTACGTCCACAATCTGCTGGGTTGGATATTTGA -AAGGCGACAGTATCGAAAGCTCAATAGAACGTGGCTTTGTTTTCCCCGCGATTCCCGTGA -TCCCCACTTTCTCTACTCGGACTTCGATGCTACAGAAAAATACAACATTGACAAGGCTCA -GATCTCAAACATCCCTGCGATACGAAGAGCCCGCCGATCAACGGTAAATGGGATAATATT -GGCCCAATGCCTGCCTACACAATCATTTCCCTATTCACTCTGTCGTGATTCCGAAGTTTT -ATCTTCCCTACTCCTCCACATGTTACCTAGGTAATTCCCACGCTTACTGTCTCTCTACGT -GGGTCCGTGCGTTCAATAAACAATATCTATAACTATAGAATACACTGTTACACTCAAAAA -AGCTAGTAACCAATATATATATATATAAAATAGTGTTTTCAAGAGATAATTTCATACTAG -ATTAGATAAAGTTCTTTTAATAATAAGGATCTTTCTTAAAATTATCTTTACCTTATTGGT -AAAGAGTATAGTATCTTGATACAGAAATAAGAATAATAGTATATTATTTAAATATTTCCA -CCATAATTTGTATAGAAACTAGGTCGAACTAGGCCTACACCAGGTTTTCTGGGGCCCTAG -TCACTAGGTCTAGGTACTGGTTCGAGACCTTCGACCCAAAATAGGGTTTGCGGCAGACTA -GCTAAGAGGTTAGGTTGGGTAAAGTTGGGTTTTGCTAGGGTTTTATTAGGGCTCCAGCCT -GGGTCCTAGGTTAGGTCCGGAGGATGTATAGAGAGAGAAGTGGGAGACGGAAGGTGGGGG -GTGCACCCCATAACAATCATTATATGTTTATCTCTTATGTATGGGCTCGGATGGGTTTGG -ATTTATGGGGCCACGGGGTAGGGATGAGCTGATTCCGCACTCTTAACAATTAGCATCTAT -CTGTATACTCTAGGAGAAATCCCACTCGGCGTTTCGCGTCGAATTCTTGCTTCAGCTGAA -TAAAGCGCCTGCATAGTCATACGCAACTAGTTAGACTGCAACAGGCTAATTGATTGATAG -TTTGGGTCCGGGTTTGGGTTAGATATTCCAACCTAACCCCAGGTTGTGCGACAGACTGCT -ACCTAGGGATACATAATTAGCTTTCGATACTAATAACTTACCTATAACTAGTATCTTTAT -AAGTAATCTATAGGTATAGATAATACTGAACCCAGTCCTACCAGCGAACCTTACATTATT -AGGAAATCTAAACCGTACTTTCGAGTAAACTATTCATTGCAGGGCTATATGAGCGGTGTC -ATACAATTCTAGTCAATTTCTCGTAGATTCAATAACAAAGGCTTTTCACCTTCTTTTGAC -TCGCCTCTCTGTCCCGAAAATCGGGCCCAAAGCATAATTTTTAGTGAGCTGGGTCGGATT -GGGCTTTACGGAGCGCTGCGACCAAGTAGAATACATGTGGGAAGAGATGAAAACTCGCAC -CTTTCTTGGTACGTGCACATCTTTTTTTTTCTTCTTTCCCCTTGCTAGCTTGAGATAATG -ACGAACTTTTCACAGAACGAATATGGATGGCTCTGGGAATGTGAGCTTACAATCCCGTTC -GGTGCAATGGTCAATCTCTTCCATTGACGTCCCGAGACTTGCCCTAATCGACGAGCATGT -AAGAACTAATATTCATTGGAAGCCATAGTGAATATTAAAATAACAGCCTGGCCTCATCAA -CTGCACATGATATTGCCAAACGGCCACTATCATAATTTTATCTCTTCGCTTGTAATTTGT -CAAATCCATCATGATACAATATATTCAGTGGCTCGATACCAGGTAACGGAAACCCCAAAT -ACAGAAATGACGAATAATCAAGTGAACGATGGCCACAAACACGCTAGTGGGATTAACCAC -CAACCATCATGAAGCTCATTAGTCCAATCCAGCCAGCCACGAAAACTGCGGTCACGATTC -CAGCTCCCGCTCTATCAGCAGTTGTGACTGTTATCGTCTTTTGACTGGACGCAGTAGTGT -CATCTTCACCTGCTGTTGGATTGCTGGTGCTGTTCCCGCCAGTGGTCGAGGTTACCGGGG -CAGTCTTTTCAAAATTGATCAAATTGGATGTGAAGACTTTTGTCGCGCTGATCTGTTCTT -CCATTCCCATCCAGCCGTCCCATGTAGACTTATACCATTGGACACCACAAGTTCCATTGC -TATGGCCAGTACAAGATGCAGCGGCTGCCTGACCTGATGTTTGGAGCTTTGCGAGGATGC -TGTCAAATGTTGATGGAACAAGTAGAGCAGTCAATGCCAACCAAGAGGAAGTCAGTCCTT -TGAAGAGGATCTCATTATTATTGCAGTTTTCTGCAGGCTCGCAGTAATAGTCTTCAATGA -TTCCACCAGTGATGAAAAATTTGTCAAATGTCGCGTTTAACAAGCCATTTACGGCCGTCA -GCCATTGAGTCTGGGCAGTCCCATTTGTCTGTAGGCATAATAAGTTAGTCAAAATACTTT -GGCAGAACAAAAAGAACGTGGAATTTGGAATTCGACATGGGTATCTCTCACATAGTTGTA -CATATACGCTGCCCCCATCAGGTACGTGCCGTAGTTGTACGTCCATTGAGCATTTCCTTG -AGTCGCACAATCATTATCAACATCGGTGGAGTCGGCAACATTCCACGTTGAGTTGCTAAG -CAGCGGCGACGAGAGAGACCAATCCCAGATCTTGTTAGCCCACTGAAGGTAGATTGCATC -CCCCGTGTATCGGGCTAGACGTGCAGATAACTGGAATAAGCCCCCATTAGAAATGGCATT -TTTCATTGCATAGCCAGACTGGTAGGGCCAGATTTGCCACCTCAGGCCACCACCACACGA -GGATGTATCCCATCTTTTGATCTGAGTGTTGAAGACACCCTGGGCTAAAGAGAGCCAAGA -GAATCCGGTCGGGCGGTCCGGAAATGAAAACTCAGCAGCCGTCATGGCTGCTAGACCCCA -GAACATTTGGTCGTCGTTCCCCTAGTTGATAACAGGTATTAGATCTTGTTGTCTTTTGAG -AGTACGTATGATGGCGCTCACCAGATAAGAGCTGTAATTGCTTGGCATATAGTCCCCATC -CGTGCCTGATTGCCATTGCAGTCCCACACTTAGTTCCTCATTGTATGTCGTATCGCCAGT -AAAGTGCCAGTACTCTAAAAGTGCAAGGAATAATGGAGCTCCTTCCCACCATTTGTCCGG -GAAAGCTCCAGGGATCTGACCGGTCTCGTTGCCGTAATAGTATTTCATCATAGCGTAAGT -CGAGGTACTCGCAGCATCTTTGATCGACTCTGTGATAGAATTACATGTTAGCGTCTTACT -ATTGTTGGTAATTGGCACGGAATGAAAATCCAAAAATCACTACCCACGTTCATTGGTAAT -ATCCAATTCGATTGCGTGTACTACGCTGGAAATTAGTACTGCAGCGATAGCTCCTCTCAA -GGGGCAACCCCCGAGCAAGTGCATCTTATCAAGGCGATGTACGGGAAATCCCACAAACTT -CACCAAAGAGAGATCTCGGACAGAGACGTAGACAGATGTACTCAACAAGCATAAAGGAAT -GGAAATAATATTTTCTTCGAAATCAAAAATCATGAACTGTTTCCACTATAATGCCATATT -TAAATATAGAAGGGCATGCGTAATCCTCGGCGCTAGCGTCGTCAACAGTTTCATTGTGGA -GTATAAAGCCATTGGAGACAAATGCCAAGCTTTTACAAGCTTTGCTGGAATCGAGAGATA -TCAGATCTCTTCATTTCGTGTTTCCACCAAGTCCTTCAGTCAATCTACCCGACACTGCCT -GAGGTTAAAAAAGAACTCACTATTCATGCACAGGGAGACCTGCTAGTATCGTCCTTGTTG -TTCAAAATTTAGGAATTAGGACAATACTCTAATCGCCATACTGATCTCATAACATATACT -ATATACCTGGAACTTTATGTTTTAGATGAGATCAACTGTTCTTGGCTTCTAGTCAATTTG -TTCACGATAAGATCTATTTGGGTCCAGTTCGCTCCGGTCGTACCCTGTCGAAATTCACAA -ATTGCGAGACAGATTTGCGGATTCTTTCTCGGTCTTGTGGGAATCTTCCCTTCTCCAGCG -GTACCCCCGGTAGAAAACGACCCATAGAGCAGGACTACCCAGTACCCAAAGGCCAGCGAT -GAATGTAGCTGCCCATCTAATGCCAATAGATTTGATGAGAGGTGATGCAATAGCCGTGGC -TCCCGCGCCCATAAGACAGCGGAATAGATTGTTCGCTGCCGCAGCAGTGGCGGCGCTGTG -GCAATTGATATCCCCAACCAGAGTGCTCAAGGTGTTGAATGCGCTTTTAGTTATAAGACC -AGTGAAAACAACATCACAAGAGGGCCAGCCAGTGCGGTACGATATTGCATCACCCAGCCG -TTAGCTCAAATGCAAAGACTAGCCGAATATATCAATTAGATGGTGATTGCCAGGCGCGCT -CGCTCGATATTTGATTTTCCTTTTTTTTGTTGCCGATTATTATAAATGGTCATTTCCTGC -AACTGTGCCTCGCGGCGGAAGTTCCAATCGAAGAGCCGGCCATTTGCCCAGCGCGAGCTT -ATGCTTCCAATTCCTATGGGAATAGAACACAGTCCAATCTAAATAGAGTTGACTCCAAAT -TAACTGCACAACTCATTCAACAAAATGCTGATAACTGCCATGTACCCTGCGTACAAAGAA -GGATCCGTACAAGAGGACCCACCCCATCTCTTTTTGTGTGGCTATCAAGAGAGCCAAACA -TGGATATGGGCGCTTTTTCAGTTCTTGGACGGTAGAGTGATCGGCGGTCACTTGTCCGTT -GTGGAATCGTGATCTACAGGCTGCCGTGATAGAAAAAGGTACATACATCATCATGTCTCC -ATTATTTGGGCTCCCGCCGCGTGACACCCTGACGTTTCCCACAAATAGGCGCAATTGAAT -TGTGAGGTAATACCATTTGTTCGGTTTAGATATGTGGCGATGCATCTATGGGGTTGCCTC -GGATTCATTTTTGTGCGGGGCAACCAATCTCTCTCAGCACCTCGTCATCCTTGCTTGGTC -TTACTCCCATAATCGGAATATCTGAATCCGTAGCTAATCAGTACTTGTAACACCTGAGCA -GGATTCTTTGAAGACTCGCGTAGATTGCAATTAGATATTCTCTGTACTCGTTCCGAAATA -TTGGAACTGCTTCAAATCAAGCTCATTCTGAAGTATCGTCGAGACAGTAAAGATTCATAG -CATGTAACTATCGGACTTCCATCGTGCAACCTCCCAAGACAGGTGACCCTGGTGCCAGCT -GAGAGGGTCTAAGTGAGGGGTAACTATACAAGCCAGCAGAATATTTCTGGACGCTTCGGA -CAATCCGGGGTAGCTTTTCTTTCATCCTACAAAATGAAGGGATTGTACTTCCCTTCCCTC -CCCGTGGAGTTGATCGAGTTGATTTCAAGCTTTCTCGATAATGACTTAGTAGCACTACGT -TTGGTCTGTCGTGAACTCCAGAGGAAAACATTCCATCATTTTGCGCAGCGGTTCTTTTCA -TCTATCAAGACCGACCTATCTGGGGACAGTCTACGTCGCATTGATGCTCTCTCACAGAAT -ATTGCACTACGCCCGTATGTGAATGGGCTAGCCTTCATGCTTCAAAATGGTGTCGGTCGC -GGCTTGGTTTGGGACCGTCATCCATGGGGTCCCATCTCCGCGCCGCTAGAGGTTGAGGCG -ATTCGGTCTCTACGTGATAATTTGATCCAGAACTTGACTAATTGCCGATCATTCTTTATT -TTTTGTCGATACCCGGAGGGTCATCCAGATATGAATCATGTCACTATCACCGACGCTGTC -GCTGTGTTCTTTGCCCTGGTCGTCGATGCTCGTCTACCGGTTACGTCGTTCCATCTTATA -TATGCCAATAAATACTCCCGCACCTTGATCATGGACATGCGTCGTCTTCCCAAGCTCCTC -TATCGTCAACCGGAGTTCAAGTTGGTGTGGGGCAATCTACAGAAACTCTCATTGGAACAA -TACCTCACGCTGGATAACTTCGGCTTCCTGCTTGAGCTGGTTCTTAGCGCTCCAAACCTC -CAAACCCTCCTGCTGAACCTCGGCTCACACGACTTGGCAGCTGAATTCATGCATGAGCTG -GCTGAGAGCGCCAGCTTTTCCCAACTGCGTGAACTTGCCTTGTTCCGGACATCGATGCGG -GGTCCCGATCTCCACAAATTACTCGGGAATATCCGCCCAAACTTGACTTCACTCACATTA -TACCATGTTTCTCTAGCACCGGGAAGCGATTGGGCTCCATTTTTAGAGGAGCTAAGTCAG -GGATTTCTGGCTCTACAAAGCATCTCGCTTTACTATCTGTGGGCCAGTACGCCTGTTAAA -GATCTCCTGACCTTTCCAGACATTCCAAAGACACCTTCACTGTGCACTTCGAAGGGCCAA -CATCTCAATATATTCTATTCTGAAGATATCAAAACCCCCACTGTCCTCGGGGTCGAGTAT -TCAGGGTCGAAGATGTCGCACGTGTTGAGTTTATTACAGAAGACGGAAAGACCTTGCTCA -CAGATGGAGCAGTTTACTCCTATGTGATTTATGTACAATTTACATTCCAGGCATGTCTGA -ACCttttttcctcttttttttttttttGAGCATATTTACTTGGTGGTGTTGTGCAATATC -TACTCTATTCTGGTTTGTCGTATGAAAATCCCTCTCTCTATTTAAAGATGCAATACAAAG -AAGTCATGAAAGTTGATGCCGCTTTTTTAAGCAATTGTTCTTGATCTTGTTGATGCCGCG -ACAGATCCTTTGGGTATGTATGAAGAACGCAACGGTGCAAAACAGGCGGTAATAAGAAAT -CGGCAGATTGTTCACCTCAAACTTCAATTACCACCATATTCAATGGACATTGTTTTCGGG -GTTTCAAAATTCTTTCTTTATCCAGCAAGTAAGCTTATCCCCTTGATTACAAGCAAAGAT -ATCACTCGATATACATTCACATCATCCCCTGCACTACATGAAACCCTCCATCTTCTCAAA -CATCCTGGGCCTTGAGGGTCACTTTCTCCACATTCCTTCCTGTGACTTCGATCAGAGTAT -TCTTGATATCCCCTTTGAATGAAACAGTCAACCGAAGCTGTTCTTTCGGCCAAAGAGTCA -CATAATTATCAGACCAATAAACCGGTGCAATCTCGGTGTGATTCAGTGTATTAATAGCAT -TCAGACGAATGAAGACTGCCGGTCCCGCAGATTGATTTTCGAGCAACACCTCAGCATGAG -TCAATCCATCATCTGCTGGAGATTTCACCTCCAGGGTCTGTACTGGCATGTCTTGTAGTG -AAGCCTTTAAAGTGGCCGGTGTCAGTGTCTGCAGCTTGGTGTATTGGGCAAATTTGGTAA -CTGGCGTGGTGAACCAATCAGACTTTGACCAGTCCAAGACATCTGTGGTAGGAGAGAGCC -AGTAAACATTGCGGCTAAGGACAGCTTTGGAAGTAGTGTCAGTGAGTGTAAGACGCAGGA -AGCCAATATCTTTGAGCTTTTTGACTCCCGCAAGCGAAGTCACCACCTTTGAGGAATGAG -GCGCTGTGTTGCTCTCCACTGTAGTTGCGGATATTTTTTTGCCATTGGAATCAATGAAAT -CAACTTGGATCCTCCGGTTTCCGGTCTTTTTGAGTGAGTGGTTAATCAACCAAATTTTCT -CGTTCTGGTAATCATAAGCCACGTGTTCCTCACGCGCTCCAACCTTGGTGCCGAAGTAGG -CTCCGATAGGATTAAGATAGTAATCAAACAACTGCCAGTGCAGATTTGGCCAAGCACTGT -TCAGCATCCAATAAATAAGGCCCGTGGCAGGTCGAGTGGCATTTTGCTGCGTTGCGTATC -CTTCAAACTGGGATCTGGTGGCCTCATAATCTGCCATTTGACTTTTAATGACGTAGTCTT -CCAGATTAGATGGCTTTCCGTATCGGGCAAAGAGCGCCTTGTTGTAGAGCGAGCGGTCGT -AGAATTGCGAATCATACCGAGACATGTGATATTGATTTGCATTTGGTTCTTTCCAGAGTT -TTTCCAGATCGGCATCAGACATGAACTTCTTCAAGCTGCCCATCTCAGGGGTTCCAACTC -CGGCACCCAATTCAGATCCAAAGCCAAATGCCGCTCCTTCTTTGTCGTTGTACCAGTAAT -TCGGGGGTACCCAATCGTACGGGCCATCCATCTTCATTCCGGACGGTCCAAGAGCGTCGG -GGTAGCCACGCTTGCTGGCTGATGCAATGATTGGGTTGGGCCAATCCATAGCTTTGAGTG -CATCGAGATAAATTTTGGTGGCCTCATCGTTTGGCCAGAAGTCCGAGCCAACCAAAAATC -CAAGCATCGAAGCATGACCTTGCATCATCTCTGCCTCGTGTAGCATTGATGCCTTTGCGA -CCGGATAGTCTGCCTGTTCCCATTTGACACCGTCGGCCTCGTCATTGTACTGGATTTTCT -TTGTCAATGGGTCTCTCCTCTCAGGTAGTGTCTTGAAAAGCCTTACCTCCCACCCTTCCC -ATTTATCACAACATTCCCATCCCGCCAGGACCATCATCCCCATTTTATCTGCGAGTTCAT -ATAGCTCTGGGTGCTCCTGCTTTCCCTCCAAGCGCACAGTATTGAGTCCCATATCCAACA -TGTAGGCGAAAATCTTCTGCACCCTGTCAATATCGAATCGCAAAAAGATATCCGGCCCAT -AGCCAGCTCCAATAACCTGGAAGGGCTCGCCATTGACAGAGAATTCGGTATCCTTGAACT -TGTTCAAATTGGACGATACGTGACGAATTCCAAATTGTTGGCTAATTAAACTGTCTGACA -CCACCTCCTTGGGTTTCTGGATGATGGTGTCGGCCTGTACTGTGTATAAAGGCTGCTCGC -CCCAAAGGGCAGGCCACCAAATAGCAGGGTTTGTAATTGATGCAGTGATTGACACCGTTT -TTTTCTCGTTTGACTTCAGGTCAAAGGTCCCGCTAATTGGCACGGCTGCGGAGCCATCTG -GTCCTTTAACAGTGCCCTTGACCATTACCTGCTGATCAGTTGATTCGTGGTTGGTCAAGT -CAGTTCGGAAGGTGACCTTCACCGGCTTGGAGTTGGGCTGAGTAAAATCGGTGATGACGC -GGAACGGCGACATTGAGACAGCTCCAGTCTGAGAAACCTCCACATTTCGCCAAACGCCGG -TGCCATTGTCGGCTGGGTATGGATTCCAGTCCACAAATCCCATGGCAAGATCGCGAAGAT -AGTTGGTTGGGAACGCACGAATAAGAAGACAATTGGCTCCTTCCTTGACGTGTTCGGTCA -AATCGTACTGATAGCCGCCATATGAGCCTTGCTGCTGGTCGCTCGATGCAATGAGCACTC -CGTTCAGATAGATGTCTGCCTTGGATGTAATACCGTGTGTTTTGAGGGTGAAATGTTGGC -CGGTAGATGGATTTAGTGTGAACTCTTCTCGGTAAAGCCAGGGTACGTCGAAGGTTGAGG -GATCAGCGATAGTTTCCATATTGTCTGAATAGAACAGGTCCTTCTCCTTGTATACCCCGT -TCTCAATAAGCCCAGCCATGACAGTGCCACGAGGACCAACCCGATACCATGATGAGACGT -CTTTTCCCGGTGTGGAGAGTACATCCAGCTTGTCTGATACTTTGGTCGATGATTGGAGGT -TCCATCCTGGGATAGCAGCAATATCACCAGCAGAGGCGACAAGGGGCTTGGATTCAGTGA -TCGCCGCACAAACACTGGCACTGAAAGCCAGTGCCATGCAAAGCTTACTCATGTTCCTAA -ACATGATCATTTCGTTTGCTCCGTTTCTTCAGACACCAACAGTGATGCTGAGCTTGTATC -GCAAGAGGTATGCGACAGCTTGTTCATTTAAAACATAAGGATCACAATGTGGCGTTCAGG -GCGAAGTAACACTGTGTCATGCGGTAGTTTCATACGACCGTGGGATAAAGGTTGTTTTGT -TAGGATTATCTCCTCCGACGACTTTCTGGGCTGGGACTAGTTCACACGGGGATTCCGGCA -AACCTGCAATATTTCCCGGTTCGGTGCAGGATCACCAGGGGAATTACCCGACGGAGTTTG -AGCAGCCTAGCTGTGATGATGATCGGGTTTCATTTCTGCTCCGATGAAAGATTATGGACT -GGGCATATTCCTATTCTGGCCTTATCAGGATCTTTTATTAGTCCTCTTGGGATTTGTGGA -GGTTGCATCCTGCCGTGCAGATCTTTTGCTAATATCGAATCCAAGGCCAATAAAATAGAA -TGATCCAAGGGTATTACTGAAGTATCCGTACGGAGTCATCCGTTGCAGTGTGGAGAAGGC -AGGCTGTCCACGATTAGTTAGTTAAATATGGATTTTGTGATGATGTGTGCCTGATATCTC -AGTCATCAATTTGTGGCCTTGGGAATGTTCGGGCACTAAGGCATCAAGGCATCAGGCTCC -AGTCGGCTTTCATCTTTCAGCACATATAAACACAGTCCATTGGACTTATCGAGAGACTCG -TATGGCTCTAGTTTTTGAAAAGAATGAGTACCTAGGGGCTGGCCCAAAATAATCATTCTC -GCAATATACATAGCCTCTCCTAGGGGTCACCCTGCGGTATGTCGAGAAATTGTAGGCTTT -AACCCCTACGCTGTACGACAACTCCTTTGACACGAGTGGCGCGTATTTTACGGCAACACA -TTCGTCCCCCAGATCAGAGGGAAATTGACAATGCGATTGTTCTCTAATTCACGCGTGCAA -GCACAAGCGACAAGGCGTGGCCCCCTACCGGACAGGGATACAGGTTACAACACCGGAATG -CGTCAATTGTTGCAACGCGTTCGTTTGCGGCCCTAGACTGCCCTGTCTACGCCCCTTATC -CACTGTCTCCTCTAATCAACCATGCCTCTCGGTGATTCGCCAGTTCCCTCGGCTGTCCAA -TTGATCTGTTTGACAGCTGCTCTGGAATGCCAGGACGATCCCTTATTGTGGAGCGCCAAC -CAGCAAACCATGGAAATGAGAGACTCCGCGTTGCACTCTTGTCTTGTTATCATCCACTTT -TCAATTGAAGCAACAATGGGATTGTGGGTTTGTTTCTTAATGCAGTCAAGTGTTGAAGGG -TCGCCTATACAATTTCCCCTACAGGAGGTGCAGATGGTCTTGGGGACAACGCCTTGTTAC -CATGCCAGTGCTCCTAGACGGTTGGAATGTGGCCTTTCAATCCACACACACGTACCCCCT -CTTACTTCTCGGTCCTGAGGAGTCTGACGTTAGCTTCTTCTCCAGCTTTCTGTATCTTCA -TCACGCCATCAATTGGAACCATCTGTACTGTCCATCTTTACGATATCTTGAGATAAATGG -CTGTGTGGATAACCGATTGACATCTGTTCCTTCCACAACAACACTGATTCTCGGAAACGG -ATCGCTGTCGACCTTCGGTAGCGTTTCTGGAAAATTCAACTCGAAGAAGGTTGCTGTTTT -ACCTCGCTGTCAGGTCAATCGACATATCAGTTGCTATGCGCGTGTTCAATTATGCATCAC -AGCCTTCATGGATGGCTCTGCTAGCTGCAGCTCTGCTAGGCACTTCAACTGTCCAAGCGA -TAGAGCTGGATCTTGATGATGAAGGTAGGTGTTCTACCTTTCTTTTCTGCGTTACATGCG -CAACGACTAATTCAAAGTCAAAGATTCCATCCGAAAGGCAGCCAAAAATGTCGCCACCAA -CATGATGAGCTACTACACTGGTATGAATCCCGGCGATACCCCAGGTAATCTGCCGGATCC -TTATTACTGGTGGGAGGCCGGTGCAATGTTTAATGCTCTTATCGATTACTGGTTCTACAC -GGGCGATGATACTTGGAACGAAATCACGACACAAGCTTTGCTCTGGCAGGCTGGAGAGAA -TAAAGCGTTTATGCCAGCCAACCAGAGCAAAACCGAAGGAAACGACGACCAGGTTTTCTG -GGCCTTTGCTGCTATGACTGCGGCGGAACGAAACTTCCCAAACCCACCAGCAGATCAACC -CCAATGGCTTGAAATGGCACAGGCTGTCTTCAACACTCAAGCTCCCCGGTGGGACACTTC -TTCGTGTGGCGGTGGTTTACGCTGGCAGATTTTCACGTGGAACAACGGCTACAATTATAA -GAATACCATCTCCACTGGCGGATTCTTCAATCTTGCTGCTCGCCTAGCAAAATACACCAA -CAATCAAACGTACCAGGATTGGGCTGAAAAGGCGTGGGACTGGACTGCTGAAGTTGGATT -CATGTCCCCCGACTACCGCTTCTATGATGGTGCTAGTGACCTCACCGAGTGCAAGCCAAT -AAATCATATTGAATGGACTTACAACGCTGGGATATACTTGCTTGGCGCGGCTAACATGTA -TAATTTGGTAAGTGAACTTTGCATTTTCTGCCTTCTACTGCCTTCTACTGCTCCAAAATG -ATGTCTGACACTTTGAACAGACCGAAGATCCGAAATGGAAGGAGCGTACTGAGAAGATCA -TTGACGCTTCTGGCGTTTTCTTCTCTCACAACCCACCGGACGTGATGTACGAGCGAGCTT -GTGAAACTGTCAATACTTGCATGGTCGACCAACGCTCTTTCAAGGGGTATTTTTCCCGCT -GGATGGCTCAAACTACCCAGATGGCCCCTTTCGCATACGACAAAGTCATTAAACGGCTCA -GGGCATCTGCAAAGGCAGCTGCTATGACTTGTACGGGTGGCATCAATGAGAATGTATGTG -GTCTCAAGTGGACAGAGCAAAAATGGGACAAAACAATAGACTTTGGTCAGCAGATGGCTG -CCCTTGAAGTCATTCAAGCGAATCTGATCACTCGTGTGGCAGCCCCGGTCACGAATGATG -ATGGTGGAACGAGTAAAGGGAACCCGAATGCAGGCTCTAAGCCCAAAAAAGCGATTCCAC -CCGAACTCGACTACGACATAACTGCAGGTGACAAGGCTGGTGCAGGCATTTTAACCGTCC -TTTTTTTGATTGGCATTGGCGGTTCCACTGGATGGCTAATCTGGGATTGAATCACATATA -tttcttttgacgtttttcttttatattccttctgacttctgtttttcttCATGCTTCGCC -GCTTACTTTGATTCGACATGTATTTTGGCAATCTGCGCTTTCTCTTACATTAATCTCGGG -ACGTGGTCAGTGGTGTTTTGTGGATATTTTTTGGAATCTAACATATCATTCTCACTTGAA -ACCGCTCTGTATCCTCGGACATATACGGCTGGAGGTGCATGCCCCCATACTACATATGCT -TTTTGGTGAAACCAACCCATTCAATATTCACAATAAGCCCAACCGAAAGAGGGACGGCAT -ACGTTGCACAGTATATTTCCGCAAGTGGGTTTTGCTTATTTCTATAAATTGTTTGATTTA -TTCACCAAATGACTACGACTGAGTATCCAGACAAAAATTTACTTTGCATCCCTGAATTTT -CGCACAGCTCCTCAAAGCATGGGTTCAGTCATAGACTATTCTCAGGTCGAGATTCCCCTA -TTGTGTAAAGCAGGGGTGGTGGTCAATGAAGGCCCTGATTTCCATGTCGAAGTACAGATG -GTGCCTGTGCCACAGCCAGGTAGGATCCCAATCGTTATTGGAGTGACAAACATTCCACTA -TACACACGTTCACGGTCATTTATAATCCTGTGTAGGCCCTGACGATGTTCTGATCAAGCT -AAACTACACTGGAATTTGCTCGTCCGATATCCATACGATGCAAGGAGATGTTGGTCTCCC -ACTTATGTCAGATTTCAGGGTGCGATCTCCGGGGCATGAAGGTGCGGGCGTAGTTGTTAA -GGTTGGAGCAAACGTGAGAACTTTCAAGTTAGGCGATAGAGTTGGAATCAAACCGATCAT -GGATACATGCGGATCCTGCTCACTTTGCTGGGAGGGCAAGGAGCCATATTGCAAGGGCGC -GATTTTCACTGGTGTGATGATTGCCGGTAAGATTGACTATGATCATTTCTAATGGTCTGG -ATTGCAGCTCATTATATCAGGTACTTACCAGCAATACTTGGTATCACCGGCAAGATTTAC -ATCGCCAATTCCGGACGGAGTCCCTGACGAAGTTGCCGGCCCAATTATGTGCAGTGCCAG -CACTATTTATCGATCTCTGGTTGAGTCCAACCTTCGACCTGGGGCTTGGGTTGTTTTCCC -TGGCGGTGGTGGTGGTGTAGGGATCCAAGGTGTACAGCTGGCTAAGGCAATGGGAATGCG -CCCGATCGTGGTGGACTCGGGACAATCCAAACAAAAACTGTCGCTCGAGATGGGAGCAGA -GGCCTTTCTGGATTACAAAGAAATGGCTGATCCCACCAGAGCTGTGATTGAAATTGCCGA -CGGGGTCGGGGCTCATGGAGTGATTGTGACGGCCGCTGCTGCTTATGGAAATGCACTTTC -ATTTATCGGAGACCGTATCGGAGGGATCGTTATGTGCATCGGTCTTCCTGCCCCAGGAAC -GACGACGATAGGAGCCGATCCCGGTCAATATGTTTTCAAGAATCTTACCATCAAGGGCAC -CCTTGTGGGTAGCCGTAGTGATACTGCCATGGCGTTGGATTTCGCACGGCGAGGAGCGCT -ACGGCAAATTAGTGAAGTGTATCCAATCAATCGCATGCCCGATGCAGTGGAGAGACTGCG -GAAAGGGGGAGTGGCCGGACGGATCGTGGTCAATTTCAACTGGGAGGAGTGAGCGGGTGA -TCACCACTTTCTCAATTAAATGCTTCATTCAATTCGAAATCAATTGGTCTGACAAAAAGA -AATAAAACGAAAATGCTCACACTCGGAATCGAACCGAGGATCTCATCATTACTAGTGATG -CGCTTTACCGCTAAGCCATGCGAGCTGATTGGTGGAGGACAATGTGACTGAGTGCTCTAT -ACAcaaatgtcaaaaatcgaggtacaaaaaacaaaaatctaaaaataaaTGGACTTGCAG -GGAATCGAACCCTGGACCACTCCCATGCTAAGGGAGTATTATACCACTAAACCACAAGCC -CAATTGATGGAAGAGATATCAAATTAATGCTATATAATCCTAAACTTGATGTACTTTACT -TCACACCTCAGTGTTCTCTATCTGAAATTCGTTGATATTTTAACTTTCGAAAAGACTATA -TCATTATAATATAAAAATTCACAAACTCGACTGTGTCTGCGCCATCTATATGAGTAAGAT -GACTGTGGTCTTAGGTCATCCTCCAAAGGGAACACAGGTAGGGTACTCCGATATATATAA -ACGCGTGGTTATGACGTGACACACAGGATTATATTAAAATGGCGTCGGTTTTTGAAGACC -CCCTGACTCCCCCGAGGAAATCTTCGGCTCAACCTCGGGACCTCCCTTCTTCGAATTTTC -CAGGTGATCTGCAGAGGTACACAATGCGGAGAAGTTGGGGAACTTCTCTCAGCAGATGCA -ACCAGACATTGCAACTCCTTACCAGCTATGTCTCTCGAATTGCCGATCCACACCATCAAA -AGCTAAAATCACACGAGGAAGTGGCGCTAGACGCATTGCAATTGAAGGCATTACCAATTT -GAGTTTGACGCCATAGACGTAGTGCTGTTCCAATGGACTGACAATTAGCCGGCATACCCA -TGGACGTCTGAGAGTAAAACCCCCATTTAAAGGGACCAGTGCGTACATAGAAGACGAAAA -GACGACCTAATAGTCTACTCGAAAATGCGAGTACCACTTATCTCCATTGCAGCCTTTGCT -GTGGCTACACAGGCACAGGAATTATACTTGACAACCAGTCATTCCGCTCGACCCCAGTGT -ACACAATCAAATGTGGCCCCAAATTACCAATTTAAGCCCTTTTCTTATACGTTGAGTGAG -ACAGTTCGGTACGAGATAAATAGCCCTAGATTACACGATCGCTTGCTAACAAAAGTATGA -GATATGCAATCTCAGTTCCTTCCCCGACGACAACGCGGAAGTATGCCCCGCCCTACACTG -ATGCAGTCAAGCATTTAGCCACTAGCTGGTCAACCTCAACTTGGGGCAGTTGGGTACCAG -AGCAAACCATCATCAGTGCGACTGACACGAAGGACCCATATGGACAAGCTGCATGGTCCT -CAATGTGGTTACAGGCGGATCTTCACAACTATGTATGTCATCTGCGCTGGAATTTTATTT -TCGAAACAGGATTGAAGCTTACTCCGTCAGACCACCACTGGGTTATTCTCGACAACTGTC -AGTCCCACACCTGTGCCCAGCAGCGAGCTGGTCTTACCACCGCGCGACTACTTTGGTCCC -ACAGATTGCTATGATTTCCCAAAAGGCTTTGTGTTTGGAGTTGCAGGGAGTGCAGCCCAG -ATTGAAGGTGCAATCGGTTTGGATGGTCGATCACCTTCGTTGTTGGAGAAATTAGTACCC -GACAGCAAGCCAAAGGACTATGTCACCAACGAGAACTATTTTTTGTATAAGCAAGATATA -CAGCGGCTAGCTGCTATGGGTGTGGAGTATTATAGCTTTACAATTCCCTGGACGCGGATT -TTGCCTTTTGCGCTGCCAGGAACACCTATCAACCAACAAGCCATTGATCACTATGATGAC -TTGATTGATACTGTCCTCGATGCAGGGATGACGGTAAAGAACATCAGATCCCTCGGCGCC -TGTTGGACTCCAGTATCTAACCATCATCGCAGCCAATTGTTACAATGCTCCATTTCGACA -GCCCATTAATGTTTGTCGCCAGCGACAATATCACAAAACGCCCAGATATTGGCTACAACA -ATGCCGGTTATCAAAATGAAACCTTTGTCGACGCATTTGTCAATTATGGAAAGGTATGTT -CTCTAGCCCACGATTTATCAATCCTCACGCAAGTTTGACGTGGCTCACTTCCCAGGTTCT -GCTTGCTCATTATGCAGACCGAGTCCCAATATGGGTGACCTTCAATGAGCCCCTTCTATA -CGCATTCAATTTCAAAGGAGTAGACCATGTGGTCAAGGCCCACGCCCAGGTCTATCACTT -CTATCATGATACGTTGAAGGCTACAGGGAAAATTGGTATCAAGTTCAACGACAATTTTGG -AGTGCCTAAAGATCCGAAAAATTCTAGCCATATCTTAGCAGCTAATCGCTTCCAAGAAAT -GCAACTGGGAATATTCGCAAATCCAATATTTTTGGGGAAGCAATACCCAAAGTCCGTGTT -AGAAACACTCCCTGGAGCTAAGCCCTTGAGCAAATTTGAGCTGAAGCATATCCACAACAC -ATCTGACTTTTTTGGGATTGATCCGTATACCGCCACGGTAGTATCCCCAGCCAATGAGGG -AATCAAGGCTTGCGCTACCAACCTGTCAAACTCGAATGATCTTTTTCCGTATTGTGTCAA -ACAAGAAACAAAGAATATTTACGGATGGAACATTGGGTACCGCTCTGAATCCTACGTTTA -CACCACCCCGACACATTTCAGAGAGTATCTCTCCTACCTATGGAACACTTTCCGACATCC -AATTCTGGTGGGAGAGTTTGGATTTCCGATATACGCAGAAGCGGAACGGGAGCTCTCCGA -CCAGCTTTTTGACTCTCCACGCAGCCTCTATTATCTTTCCTTCATGTCTGAAATTCTCAA -GTCAATATATCAAGACGGTGTGCATTTTATGGGGGCCCTTGCTTGGAGTTTTGTTGACAA -TTGGGAATTTGGAGACTACGCACAGCAATTTGGCATTCAAGCGGTCAATCGCACGACCCA -GCAGCGCCACTACAAGAAGAGCTTCTTTGACCTGGTTGACTTCGTGAAAACTCGACAGTC -GAATAAAGACCGAGGTGATCTACAGTGAATGTCAGGTAGCTAGACTGACAAATCTAGAAT -GAACTTGTTGAACTAGAAGATTGAAATTCTTTTGTTGCAATAACCATTTCCATTAGTACA -AAATAAATTGGCACAGAGCATCTCTACTGTGAGGCATAAAGTAATCCATTCTTTACCTCC -AATTCTTCGTTAGGGTCCTAATTCCTGGCATCAAACTGCAAGGAGGTGAAAGATGACCCC -ATACCCACCAAGGGATCTCTTTGCCGTCATAATGACTTACACCATTTCTTGTCTCCGATT -TAGGCACACATATGTTCAAAAATTTTCCATTCTGATCTGGTGTTGCCTTTTGTATGATAT -CCAGGACCTTCTCTGCACCTTTCTCCACTGAAATATCTGCTTGATCGGTACCAAGGTCGG -TGCAAAGCCACTTTTGATGTTAGTAAAATTTGAGAAATTTGCGGTCACCAAATAACGGAA -AGGCTTAACTCACACCAGGACTGATCGCAAGGACGGTGAAGCCATCACGCGCATATTGCT -GGGCGTATTGTGCAGTGAGCATGTTCAATGCGGCCTTTGAGGTCTTGTATGCCGGTGTTG -CCAGGTGGTCATGGAAATCAGACATAGCTATGGAGCCTACAGTGCTCGATCTTGAATTTA -GGTCAGCTTCTTATGGCAGCATAATGAAGTTCATTCAGTTTCCTCCCGCTGTTCTCACAT -GTTGGTTATTGCTTTCTTGTCGCCTTTGCACAGAAGTGGCAGGAAGGCGCGATACAAGAT -GTGTCCCAGTAGCATTGATATTGAACATCTCGTTTAGATTGTCCCTATGTGATCAGTATT -ATGGCAAGTATTCCGACGGGCGATTAAACTTACATTCCTTCCAGTCCAGTTGGGGACCAG -TCGCTCACTCCAGCGTTGTTGACCAAGTAATCAAGGCCCTTCACTTGCAGCTGGCGCTCT -ACTACTGCCACAGCCTCTTTCACACTGGACTCATCAGTGACATGTAGCTTGACGATCTGC -AATCTGCCTGATGAAGTACTTGCAATCTCAGTCAGTTGAGGAGAATGATCCTGTCGTGCG -GTGGCGAGGATTGTTCCCACCTCGGTTTTAGGTAATGTTGCTAGACGGGCAACCAAGGCC -AATTCGAGGCCCCGAGAACAGCCAGTCGCTAAATACGACGCCATGGTCGATGGAGGTAAT -ATCCACAGAAAGTTGACTGGGAATTTTTCGACTGGTAGTATAGGATGGAGTACGGAGTCA -TCTAAGTGTGATGGTACTGTATATCATAGAATGTGGGAAAGTCCCAACTAAGAAGGGCTC -TTAGCTACACACATTCTGGTCTGAAATTCCTTCAGGTCTACCATATCGGTATATTTATAT -GTTTCTGAATATTTGCAGATATGTACAAAGTCGTCTTATTTCCATTAATCTGCGGTCGGA -TATGCACGAACACAGGTTAGTTGTTGAGATATGTATATATGTAGAACTATGGTAGGTACC -TCTATGCACCGGATATTCCTTGGCGGGTGGGGCTTTCCTATCTGACCTGCCGCCGCTTTT -ATGAGGGTGCTCAGGCTCACTGAGTAGATGTCAGCACGGGCTCAGATAAGTCACGTGCAG -GATGACTGATGCTTAGTGAGTGCTTATTGGATGCTCTCTGATTACTTGATGATAGAGATA -TTGATCATATTCTCTATCTAGAAATAGACTGGGGAGAAAGAGAAAGAAAGAAGGAATAGG -ACTTTGGCAACCTATTTATACTGCCAAACATGGTCCCCTATCCTATCGTTTGATATGCTT -GATCTATATCAGGCATATGGCATATAGATGCCTATAGTTAGTATCCGACGTTACTTACTT -AGTGATGCTTAGTGTCCAATTAGTGAGCAACACTAATGCACTTAGCCCACCTTAGACTAT -ACCTAGAAGGTATTAGCCCCTACAGGGTCGTTTATACACTGATACTAGTTAGTTTGTTAA -TAGCATTACTAACTATGATCGTCTGTGGGGCTGGCCTGTCATTTTGGTTATTGGATATGG -CTTGCGTAAAAACTGCTCCCACTATTGCCATCGCCTATTCTGTCGACTGCTATAGGCCTG -TCTCGGGGGAAATCATGGTTATTGCCACTGTGATCAAGAATACTTGTGGCTTTGTGATGA -GTTATTGGGTTATACCGATTATAGCACGAAGAGGCTGTTTGTTGCTCACGATAATCAGGT -TGCATTGACGATTGGGCCGATGGTAATATCTGTCTGTGGTTAAATTGCACGGGTCTTTGC -TAGGCAAAATGAGACTTTATCCACAGGCATAGAGAAACTTTATGGCGGCCGAGAGGCCCA -TCATCACTATGGCGCCCCATGCTAATACGTATTGGATGCGCATATTATTCAACTCCAGTC -AACATGGAATCACGGTTAAGCGGAGTGCGATCGCGGAAAATCGGACATTAAATCTCCGTC -GCTCCGTCGACCTGCCTTTTCGTACCTGTTGGGTGTCTTGTGGAGGCCGCATGTTTCGGC -CATGCTTCCTGGGAACTCACAAAAATGTCACCAAccttccctttccattttctttcctct -tctcttGTTTTACCGAGTAACAAATTTCAATACTGAATGATTTCAGGCGTTCTGAATCTT -CAGGGCGTAGTCTCCCCTAGCCTCTTCCCGTCCGTGGACGTTCTTTCGTCCGCGGACCAA -CATAATCCCCGCTGTGGATCTGAAGATCGTGTGGAGGTCCTTCTCGTCCGTGGAATTCAT -GCCATAACATAAAAGAGATACATTCTACGGCCAAAAGATATCTATAACGGTCTACAGAAG -ACAAGTTTCCACGATGACTACCACCCAGAATATCAAGTCTCGGTTTCCGGACCTGAACTT -TGTCAAGCCTATTTCCGTCCATGAACACCCGAGATATAAACCATTCAATATCAAATTAGA -CCCAAAGTGCATTGTCCTACCAAAAGGCCATGCCAAAGAAGAAGGGCGGAAGCAAATGCT -GGTCGACTCCGTACTTGACCGTGACACGGCGATCCCTTTGCGAGACGGAGTTCATGTGTA -CGCCGACATCTACCGCCCAGCTTCTTCGGGTGAAGCCACAGTCCCCGCTGTGATTTGCTG -GAGCCCCTATGGCAAGTCTAGCATTAGCATCGATATTGCTTGGCACCGATCCGGGGTTCC -CAAAGACTGGACATCCGGATATGAGACATTCGAAGGCTTGGACCCAGCAGACTGGTGCAA -ACGGGGTTATGCTGTCGTTAATGTTGACGCCCGCGGTGCTCAATTCTCGGAAGGATCATT -CTTTTTCTGGGGTGACCAAGAAGCGGAGGATATATACGACACTATCGACTACTTGTCAAA -GCAACCATGGTGCAATGGCTCTGTCTGCATGGGTGGCAATTCATACCTGGCAAAAGCCCA -AATCAGCTACGCCTCCAGACAATCCCACCCTGCTTTGAAGGCACTGGCTCCCTGGGAAGG -TTTTACGGATATTTACCGTCAGCTTCTTCGCCGCGGCGGGTTTGCGATGCATAACACCTT -TGTGTCGAAATACCAATGGGGTATTGCTGGCAACCATGAAGTGGAAGACATGGCCCAAAT -GGTGAAAACCCATCCACTTTTTGACGAATACTGGGCCGGCAAGTATGATGAGGTTGAGAA -GATCAATGTCCCGATGTACTTGTTGGGGTCTTTTTCCAACCCGTTCCACATCTACGGGTC -CTTCGACACCTATCGCCGCGCTGGTTCAACGAAGAAGTGGATGCGTGTTCATGCTACTTT -TGAATGGTATGAAATGTATGAACGCAACTCGAACGATGATTTGCAGCGCTTCTATGATCG -TTATTGCAAAGGAATCATGAATGGCTGGGAAACGGATACGCCTTCTTTGCGCTTGTCTCT -TCATGGATTCGGGTCTGTTCCGAACATCGTGGAAAGACCAGAAACCGAGTTTCCTCTTCG -CCGACAACGGTTGCAAAGTTATTATCTCGATGCGGCTACTAGGTCACTCAGTCTGACTCC -CCGGGACTCCGAGGCTTCCATTTGCCACGACGCACACGGTCTTACGGCTCCCACTTCGGT -AAGTTACCACCCAGGTATTTAGATATTTGGTTTCTTTCTGGCTTGGTATGGTAAACTGAC -AACCACGAACAGGATTTCGTTCTGAAATTCGATGGCTACACTGAAATCGCTGGATATGCC -AAGGTCCGCCTCTGGATGTCCTGCAAACAAAGGGATGATATGGATGTTGTTGTCCAACTC -CGTAAGATCGATGAATTTGGTAACATGCTAGAGGGAGTCAACTTCCCGTGCCCGATACCA -GCAAGCGCGATCCCGGATGCTGAGACTTCTAAGCTGTACGGTCCTCAGGGATTCCTCAGA -GCGTCTTCCTCTGTCTCCCGTGATCACGCACGATCAAGCTCGGACGGGCAAGAGGTCTTT -TATCAACACGACCGCGAGGAAAAAATCACCCCGGGAGCTATTGTGCCATTGGATATTACC -CTATGGCCCACAGGTATGGTTTTCGCAGCTGGTGAAGGACTTCTACTTCGTGTTGGAGGT -CACTTTTTGAGTGCGCCTTCGGTTGCAGCGATGAAGCCTGTGGAATCTGAAGATGAAAAT -GTGGGCCAGCACTACATCCACACAGGAGGTGAGCATGATTCGTGCCTTGTACTTCCGGTG -ATTGCCAGAAGCAGACCGTAAGCCGAAATTCTGCTATGAAGTCAGTCAAAGCCCTATTAT -AATATGAAATTCTGCAGCATTCCTACCGAGAGAACAATATAAATTCTACCATATATTATT -CTTTATTCCCTCCACTTCTTCGTGTACGAAGGGGAGGTAGGGGAGGTATCGACTTTTTGA -ACTTCATGGCCACGGATGTCATTTAACCCTAAACTTTGACGAAATTTGTGAAAAAAATAG -AGTCATACCAATCTATATACCTCCACAATTATTTTATCTCTTATAGCCTTTTAATTTCGG -TTGTTTTGTAGTCTTAAAACTGTTATATAGTTAAGAGGTTGGAATCAAAATGCGGGACGG -AATCAACTATATCGATAAACTTGACTTCCTCGAAGCATACCAAGAAGCACGAACTCAAGC -TTTGAAATCGGAAAAGATTAAAGATATCTCGGGCAATATCATTGAATATATGATCTTCTT -CTAATCGCACTAAATTTTCTCGTAAAGTGTTATCCTCATTTGACCGCCCGTTTGTCAAAT -GTGCGCGGACTGGGGAACTACTTTTTCGTCCGCGGACAGAATCATGACCGTGCCGAAGCG -TGCCGACCACGACCTCGAAACTCGTCCGCAAGAAGAAACTATTTAAATGAAGATGAGATC -CCCTCCTCAGTTCTTTCTAAGATATCAATTTCCCATCAAACGCAGAGTCGAGCTTCAAAA -TCAACTTCTGTCAATTTTTTTTTGACTGGACAACATGGCTTCTACTCCCTTCGCCATCGA -TGTCCTGAAAGGCCCTTCCAGCAAACGTGAAACTAAGGAACTTGAGGCAGGATTCGCCCG -TCGATCAATGATGCTGGTTCGTCGCCTGCTAGGCGAGGACCGCCTGATCGAGCTTCTTCG -TGACGAAACGGCGGCCAGTACTGAATGGTGGAAAAAAGCGTGTTCAAACTCGAACGGAAA -GTGGACACCAGCTCGCATCGCCCTTTCTGTACGAGGCGTTCTTTCACAGGAATTTATTCA -ATGGTTTATTCCCGCACAAGGTGGCATGCTTCCCGAGCCAGAGAAGCTTGCCGCCCACCC -CGAGCATTGGGTTGTTCGCCCTTCCAAGGAGTCCAAATCAATGACCGTGTTGGAGACACT -GGGCGAGAGGCCAACTTTGTTTACGCTGGCTTTCGATGTGCAAGCTGCCGAATTTGTTCA -GGATGAGCCAACTTTCACTACCAAGATGACTGGTCGAGGCTATCTTGAAGATGGTTCTCA -AATTATAGAGCTATATCACCAGTTCAGGGACCATGCGGATGGCCAAGGATTCGATGTTGA -TTTGGCGATATATTTTCCGGTGGGAGCGGGGGAAGACGTTGTTGAGTGCCATCGTCAGCA -TTTGCTTGTTGAGTTCAGCAACTGGTCCAAGCAAGCCTTTGAGGCGAAGGCAGCGCGATG -AATGCCTCCTTGTTTGTGCATTGACGCTTGATCTCAGAACCCTGTTCCAAGGTTTTGGAT -GACAGTGATCACTCCAACTTTTTGTACTCAATGGATTATAAATCTCGAAGACGGCTTATC -AAGGTTGTGATATTCAGTGCATGATGGTCTACTTGGTGAAGTGTATGCTAATCGTTTCAA -TGAAGAGGTATATACCTAAGGTCCCAGTTCTCTCCAACCTTAAAATCAAATATTCTGCAG -ATCAATTGAAGGCTCTAGAGACATATAGGAATCGGGGTTTAGTTGAAATATCTGTTCGCC -TGCGTTACAAAGTCCGAGAGCTGACATAAAATCCGAAGACCAATCATCCTCGGATTGAGA -AGTCTGCTTATTGCGTTTGACTTGCATTTTACCGAGAAGCGGAGCATGAAATGCTATTTG -GCATACATCAAAGCCGTTTTCGCCAAGTAGATTATCGAGGGAGCGAAGCGTGGTAAAACC -ATTATAAGCAATGCAGGACCCAGAGGGACAGTCAGACTTGTCTTGGAGAAGCTGGATGAC -TTGTCCCACCTCATTTTTGATCTTGATTTTGTCGATCCCGACTTCCAAAGAGCACTGGGA -AAACATATGAGACATGAGAAGAAGAGTGACTGCAGCAGAAAACGCCTGTATGTCCAAAAT -TTCAGAAAGGAAGAGACCAGGTGGAAGCGTTCGTCTCAGATTTTGGTATCGCTGCACCAT -CGATTCGCAGGCGGCAACACAAGCCAAACAATTAAACAAATTTTCGCCGTCTGGTCCCTG -TCGCAAGCTGAGAGGCAGATGTATTCTCATAGCGACATAATAATGCAAGAACTGCACTAT -GTGGTCAGGTTGAACATTGTTCGCACCATGCACACCTCCTGACCACCAAGCTTCTGGGGT -TTGGGAGGCCAGTGCTTTCAGCTCCCTAGAAAGCTCCAAAGCAAATGCGTAGCTGTCCGT -TCCGCGTATATTCGATTTGCGGATTAGATCCAGATCTTGGATCTTGTTGGCTATGTCTGT -CAAGCGACAGAGATACAATCGGGTTTGCACGATGCCATTGATAGATACGGACTGATTTGT -GAGTTGTGCATGACGACCGGTAATTGGAGGAATGTTGAGAATCATCCCCAAAAGTCTGTC -TGCGGCACAAATTAGATCCCATAGCCGCGATTTGCCGATGTAACCTTCATCAGCAGCCTC -GGTAACGGAACCCAGTTGGATTACTTGAACCAGGTTGTGAAGTCCCATCAGCTCTGCGAT -CGAAATAGCCCGTCGCAATTTTAGCCATGCTTTATGGATGTTTCCTCGGCCCAATTCCCT -AACATTGGTCAGCTAACACGATGGGCAAAGTCGGGGAAAGTCGCATTCTCACAGCCTGAT -AAACTGCAACAACAGCGAAAGCCCCTCTATCGATCCCGTCAAAGTATCGTAGGCTAAAAC -TTTATTTTCCACTGTTGCGGATATGGCTTTCGAAAGTGAACCATGTCGCTGTCGGTTTTT -GAGCCGAATCTCCACTCTCACATCTTCTTGTGGCAGTTGCTCGGTTATCATAGCAACGGT -AATCATCCAAGAAGCTAGCTTGATGAGATCAACATTGGGACTATGCATCTCTTCATAACC -TGCAACGACTTCCTCTCCACAAGTGGTGCCGCATGGTAGCGGAAAAAATGACTGTAGCAT -GCTAAGCCATTCGGAAGCCGAATCGCTGTTTTTGAGTATTTCGTCTTTCGACGGAATCAA -CACCTGTAGCGATTTCCTTGCGACAGCGAGTGGAAGGGCAGTGCTAGTGATACCATGACC -TTGTCTGACCCGGTTTTCCGAGGGGCCTTCGACACTGAGCCAGTCATTCTGAAACAGCAA -ACGCAGATGAAACGGTTGCTCAGCAATCGACTGCTCGGAAACGGAGATCCCAGGGCCATT -ATCTGCGAATGGTTGCTCGAGACTCATATCACCGTTCTGCGTTGCTTGGACATTGAGTTT -TTTTTGCATCTGACGAACAGTCTCGGTCAAACTTAAGACTTTCGATTCAAGTTGAGCGAC -TCGTTGTTTGGAAGATAATCTTTTGGTTCTTGGTGAGCCTGAAGTATGTGTTTGTGCAAT -ACATTCCAACCCCTGTTCTTCGCACCGACGACATGTTTGTGCATTCTCAAAACTTGGAAT -ACACCGGATCTTTCGACGTCTGCCTACTTCTGTTAGATTGACGACAAAGGAGATAACAAG -AGAAAGAGAATTGTAGCCTCTCACATTCTGTGCAGCTTTTGGTCCCTTTTCTGATTGTTC -GAGCTTCCCCGGGCATAGCTCAGATCCACTACAATGATTCTTCCGGTGCCTCTCAGGGGA -AAAGGGTTCGGGGCCGTACACAAAGCGGGGTTATACGACAGTGGAGAAATGACCAAAGGA -TTTTGGCAAAGCTTTTACAGATAATCAATGTATAACAAAAACCATCAATGAACTAATCAA -AAGGAGGTTTCAAGGAAAATTACCCTGAACCTCCCTCGGCGTTCGGGCTTCCGCGTCTCC -GAAGTCCTCGCTTTATATCCGCGTTTCAGCCCCTCGGCCCTCACTTGTGTCGATCGTGTC -CGCTATTTTGGCTTTGGTGCGCTCATTGGAGCTAGAAATATCATTTGTGAAGCCTAATAC -TGGGTTACCTAGTCCGACCTAGACCTATTACTGGGTCGAAGGGCTCGACCCAAAACCCAA -CCTAGACTGAGTTGGGTCTGGGTTTTACTGGGTCGTGCGGAAGACTACGCACACGAGAGA -AGGAAGAACAAAACAGAGGAGAGAGATTAAATGTCAGAGGGTCAGGTGACCTCTAACGTT -GATGATCGTTGTTGGTCGTTGACGTGGTGGGTGTTGATGTGTGTTGATGTGTGTTGTGGT -ATATGACCGGGCGGTGAAGGTGAGTGGTGGCCATCGTCTCACGGACGCAGCGCGGGCCCT -TGCTTGCCATCTTATCCACATGACAGATGTATCAACTTCTCATGGTCTCAATTCGCCTCA -TTTCTTGTCCCCAGCTCATCACTTAATTAAGTTGGCATACGGCTTCCAATTCGATTGAGC -TGGTCAAATGCAATCACCGGATTTACCTCCATTGGTTCACTCTGATCAGGCAATACGCTC -AAGATTAGACCTACCAGGTGTAGAAGCATTTGATGAGGAGCCGCTCGAAATATAATTTGT -TGTCCCAGCTGAAGTATAATGAGACTTTCTGAACTGCTCGTCAAAACAACACGAACAGGA -CCAAGCTGGCGAGTAGCAACGCCGTGGCCAGTAGTGCATGCACAGTAGTTGTAGTCGAAA -TCTGCATGACTGCCTCGTGATCAATGGGCGACCAAAAAGGGGGCTTCATATGACCTTATC -ACCGGATATCCAGCTACGGTGCTTACTATATTAGCTCTGTGTTCTAGGTTCTAAAATGAA -GCCACCTACCTCGCGACGCCTGAATGGAATTGCTCAAGTCCAATAAGCCTGCTACGTGCT -CTTGATTGATCGCTCGTCACCCCAAACGACATTTTCTCTCCCAGGAGCTGCCGAATCTCC -CCAGGAGCTTAATCAAAGCTTATAGGAACCGAATGGATGCTCCCAGGAACGGCACGGGCA -TTTCCAGGAGTCCCTAAGCCCCAAGGATCAGGATCTTATGATTGGACCCCAGCAATCAAA -GTCGAAAGTAGTAGACTCCAGAAGTATAGGCCACGGTAACGACGAAACGGGGCGAAGATA -AGCACTAAGAGTTCGAGGGAGTTATAAAGAGCGATACCCAGAAACGCGGTGGACGTGGCC -TTGATAGTGAAATCACTGGCCGAATACTTACCGATAATCCCGTTTGTGAAGGCGTCAGAC -ATTGTGACCCCGGTAGTAGGAGTTTGTGAACTGCAAAATTCGATATGTCTGCAAAAGTAC -CTGTGCCAAATCACTTTCACTAGTGGCGATGATATTCAATCGCCTCTACCTGTAATGGGG -GCAAGCCAAGACCAGGCGTGGGTTCCAGGAATGGTAGAAATACTCGTATGATAATAATGG -GAAAAGTGCCGGGCACTCTGGGACCCTCAAGCTCGGCAGTAGCAAATGAGTGACAAGATC -AAATGGCAGATAGCGAGCGATATACAAGTATGGAAACGCAGTAGACTCCTTGCCGGATCT -GGCAGCAGCTAGCAAGGATACAAAAGGAAGCAAATAGGGGATCATGCTTTTTATTGGAAC -AAGACCATATCCTCGTCTCCAAGCGCCACTAGGATACCATATCACTTATAGGGCCGTAGA -AAGTTGCTTACATGCTGCGAGGTTGTGTTGAATTGACTACTTGAATCGTAGCCAATCTCT -CTAGAAAGCAGATATATACGGGGCGTCCTGGAGCTTATGTTCATATGGGAATGCCAAGAC -AATTGTTCAAATGAAATTCCATCTCGCCTCCACCTTTGATCGGCCCGACACAACCAATCG -AAAATCGAGCTCATCTCCGCACTTCTTTTTTGGCTCCCGGAAGTTTAATTGTTCGGGATC -AATTACCACAGGTACCGCTCATCCCTGCAACGGTTTTGCCTCGCCCTGACCGGAACACGT -TGGTGGCGCCAGGTCGTCAACCTTGAACCCGTAGTTGGCCAATCATGAATGGATTTCCAT -ACGCTAGGCCAAGGGCCGTGCGGATCGTTTCAATTGGTGCTAGAACTTGCTTGTTGCTGT -TTCTCGCATCTCATTGATAAAGAATCCCACCCCCATCGAATTCGGAGTTAAGCTCTAGTT -TTATGTCCGGTCAACGCAACTAATCGGGCCCCGAATCCCGGTTGCCCGGTTTGTAACGCC -TGACATGTAAAACCACGGACTTAATCCTCACCCCACCCCACGCCTTGGCGGCAAGCCTTT -TATGAAGGGGGGTTTGATTCATTTTCTATGGTGTTACCCCTATACAGTCTCTAGGTAGGT -GAAGAATCTTCGGGGTTGGGTTTCCATGCAGGGGGAAGTGGATTACTGAATTCACGTCAG -TGTCGCTGACCGCGGAGGTTATGATGATGGATAGGTGGTCACTTTTCCAGTTTCTCACTG -GCAGTGTTATAGATTATTCAAAAGATTACCCTCCGTATTCATTTTCATACTTTCAGTGAG -ACTTCTAGCTCCCCGACCCCCATGTCTCCTGCCTTCCAATCTAGTTCAGTTTACTTTAGA -TCATTGCCGTAAGCCACAGTACGGGGAAGCATACAGTCTGCGGGAATAACCTGGCTCTAT -TCCCCTTCCGGGTTGATGTAAGTTGAAGGTAATGCGTTCTGCAATTAATTTGAATTACAC -ACCTGGTTGCGCATACAGATTATTTCCCACTGTAAATTAGTTGATGATACCCTCATAGTA -TATTCTGTCCCTTGTTTACCAGCTCATTGGTGTAGATACCTTGTGATATGGGAACGGAGC -CTGCGGCTTTAATGCAGAAGTCACATACACTAGATCATATCCCAAAATCCCAAAAATCAT -CACTCTTGAGTCAATCATGATCTATTGTCGGCTCGCTGTCTCTACACAGAATTTCTATAA -CCACCCCCCCCCCGACTGGTGTGTGTGCCTGTAATTTATTGTCTACTCCGAATCGGGCGA -AATAAACTTGATCAAAACACCTGTCTTGGCTCTAGTGTACTAAGATCTTATCAACTCCCA -CCGATGACTTTGGCCCTATACGCTGGCCCGGCTTTCTCCTCCCATGATAGATACATTCAG -GCACAACTGGGCCTTCTTCTTTCCCCCCCCACCTAAAAGCAGATAAGGGCTGCTCTAAAA -GCAACCATAATGGGCTGCTTTCATTCAACATTGGCGGACCAATTGGAAACAGTCATTGTA -GCCTCCCCGCCAGTGAATGGCTCAGTACCGAATTGCATAGCTGAGGCGAGTAGGTTCCAT -TAGTATGTGCCACGTTTTTTTAGTATTTCCCCTGGGAATAACGTACTAATAAGGTATTGG -CTACTGGCTGGGAACTCCTCATTCTGAGCCAAGTAGTTAAAAAAGTCCTGGATATCCCCG -GTCCAAGATGTGATAGGACTGGAGGCAACGAAGCTGTATGTTTGCTGCGCGCCATTGACA -CCATACCACAGTTCCCAAGTCTCATCCCCGACAGTGGTCGTCGAGATCTGAGTTCCAATT -GGATCAATAGTTCCGTACTTGGCCAGCCTAAGCGTTCAGCCCGATTAGCAATCAATAATA -ACGAGCCGGTATAGCCAAACATACCAAATCATGAGTTCATAGTCGCCACTAGAGGTATCG -TGGTCAATGTCCGCCGCCGTGAAGAGGTCATACGAGACATCGGCGTTGATTTCGGTGTTG -TCATAGCTCCATTCAACGGAAGTGGGAATGCTATTGATGTCACTAACGTACATCTGCATC -TGGCCAATATAACCTGAGTTGGCGTAGCTCTTCACCTGACCCTCGCCGCCAGACCAGTTC -CAGGTGGTGGACCAGGAGACTCCCGAGTCCGAGATTGAGTTGACGTAGGTGCACTGAGAG -CCAGTGCCATCTTCCTGGCCCCAAAGGTTATTGTTGACAGCGTAGCTGCCGCTGGAGTAG -TAACCATACTGGTCGCATAGGGTCTGCTCTTGAGCAAATGCAGCAATAGCAAGGGCGGCG -AACAAACAAATGGTCTTCATCTTGAGCACAAAAAGAATGAATTGTAATGATAAGGCAAGC -TTCGAGTTCTGTGAAAACTGCAATGGATGGTTCCAACTTCAGCGAGAAGATCTTGCCAGC -TTTATAGACGCGAGAGCAGGATGACAAAGCCCCATCTAATCTCTTTTCTGTATACGGATG -AGAAGGGTGGATCTCATGTTGTTGTTTGGCACTTGGGCCTTTGAATCTAGTGGTGAGAGA -GACACGGATGACGTGGATGTCGCAGGTTGTGTTTTATTTGCTTGATTCGGCAAATAATGC -ACGGAAGCTGGGGACTGGACGTTACCTGGAGTGGGTTAGTTATTGTAGATCACTGTCCTT -TTCATGTAAGTGGTATTCTCAATGCTCTACCCGGTACATGGAAGGACAAGGTTTGAGAAA -GAAAACCAATTTTCTCAATAGGTGAATGTACTTTGCAAGCGCTAGAAGTTCATGACAGAT -ACATACCCACCCACCTGGCGAATCAACGGAATATTCTGGATAGAAGATGGATAGAAGAAG -AGAAACGGAATGCATCGGAGGCTACCCCGTATACTAACTAGACCTAGCGTACAATGAATA -GTTATATATGTAACTATACGTACCGGACTTTCCCTAGCGGGTGGGACTTTCCTATTTGAT -CTGCAGCCGCTTTTATGAGAGTGCTCAAGCTCAAGCTCACTGAGTAGATCTCAGCATGGA -CTCAGTTCAGTCACGTAAAGCATGATTGATGCTCAGTGAGTGCTCAGTGTCCAACCAGTG -AGCAACACTATTACATACAGACCAACGCAGATTATACCTAGAACTAAGAAGGAATTAGCC -TTCCCAGGGTCGTCTAACTATCCTATGATACCTAGTTAGTTCGTTGATACATCACTACCC -TCAATTTTTGGAGCACTACATCTTGTCAAGAAAATACCAAGGGGCGTAGGGAGCGGAGAA -CTCCATCCGTTTCATAATATTGTGTAAATAAAAAGCGCCCAAGAGGCGGAATCAAGACCT -TGGTATATATTCAACTTCTTTGTCTCGAAATTCTTTGATGATTGCCAGGGATATGCGGGT -CGAGCAGACACGACTACAAGCTAGGCCAATTCTGCCCGTACTCTATATTCCAATGATGCC -CCATCCACTGGTACATGTAGATCCTGGACAAATCCTTCCTAGATCAGTATGTTCCCGGAA -AAGCTGGTATTGTGGCGCGGCTGGCACTTTGGAGTAATTTCTTGTTCTTTCTCTCGTTTG -ATTTGGGGCATTGGTCTTCCTCTGAATTTGAACAAAACCACGTTCCATGTTCTATCGGAA -GGCCCCGGTGCTAGTTAACTTCACTAAGACAAACCGATAATATTCTAGGCGAGATTTGAT -TGTCATATCTGTGCTGAAGTTCGACGGTTCAAAGGGGTCGTTCTTGTATGGTAATTGTCG -CGACGAACATCCATGCATTATAGTATCCTGTTTTTAGTAATGATAGCACCCGAGAGTATA -TTTGGAAGCACAAGCGATAGACTTAATGAGATTTTGTCAATGCCGGAATTCACAGCCGAT -GTCCCTCGCACAGCTCAGAACCAGAGTGCATGTGTGTACAATATCGAGATCATGCCACCG -CCAATGCATACGGTAAAGGCTAACCGCTGAGAATGCTACGATGCTTGAAGATATGCGGCG -GTACGGTTGCGTTGCTCTAAAGCATGATTTGGACGACGATGAAAGTGATAATGTGCCCAT -TCTTCTAGGCGTAGGCCGTGCCACCCCATTGCAGCGCGAGAAGAAAGCATATGGCAGAAG -GCAAGAATAGTGCGGTCCCTAATGGGTCCAGGTAAAGTAAATGCTGTGTCCAGGATTCCG -GATTAGACGTATGTGGGATATTTAAGAAAAAGAAGAGGATAACGAGAGGATTTTGGTGCT -GCGGAAACCCACGAAACCAGATTACTCAATCCCTGGGGCCTATCGCCCGATCTCACTTCT -TAACACACTAGGCAAACTCCTGAGGGGGTGATGGCGCGAGAACTGTCTTATCTCACGGAG -AAACACGGTCTGCTACCGGACACCTAGTTTGGAGGGCGACCAGGCAGAACTGCCGAACAG -GCCCCTTTGGTTCTCTCCAACGCGATTGACCGGGCGTGGTACAAACACAAAGTGGTGACG -CTAGTAGCATTTGTCCTGAAAGGTATCTTTAATAAAGCTAATAAAACGAGCCTAGAGTCT -AGTGGGCTCCTAGGAATTCGCTGTGGGCTCCCAAGTGCTCATCGGTGTATTCCCGTTGAT -GGTTTGTCTTCGGCTCACAACCTACTTAGCAATGGCGTGAAAGGCATGACGTTATACGCT -TAGGAAAGCCATCGGCTTGTGCGCCCATTGGCGAGAGAAGTGCTCAAGCGTCGTCACCTA -CTGAACACTGATTCTTCTAGTGGGCCTTGATAAAGGACAGGGTTCTAGATCCTCAAGGAA -ATCCATTCAATCGCCCCTGCCCGCCCTAGTAATAGTCCAAAGCCCGACTAGGGGCTCGCA -GGCTGGTAAGCGACGTATTCTAAGGGGTATTTCCTCGAGCATAGGCTAGGATGGGAAATT -AATAGGTTAATTTAATAGCCCAACTATGAACTTTTCACTTGGGTAGGATCGAAACCGCGA -GGATGAACGGTCATGTTGACAACGCCAGTTATTTTTTGATAGGACGGGGAAGGGTTTTAC -GTGATGGGGCTAGCATGTTGCATTTTATTGAGGTAGCTTACAGAGTACTCAGGCCGCTCA -CACTCCTGGAGGTCGTGCTCAGTGAGCAAATCAGGGGCTCAGATGCTGATTATGTAATGG -TCAGAGATCAAGTCCGGGTACTCAGGGGCGTGCTCAGTTCTACTCAGTGATCAAAAAGCG -GCGAGAGGTGAGCACTGAAGCGAAAATCAGGTGCATGCATATGTAGGCAGGCATATAAGG -ATTTCTCATACTCGTCATCAGGACCTCACATGAAGAAAGCTATCGGATATAATCTATGGT -ATCCGCATTTGATTCACGAAGAATAAAGTCGTTTCGTGATGTCTACAATGATATCGCATT -TGTATCCACAGTGTTTATCAACTGCAAAAAGGACTAGTATCATAGACGGCTATGTATATG -GATCCTCTACGGAGGTAGGGGACTGGTATAGGGACATCCTCTGCCTTAAATACCAGGTCC -TACTGAGAGTAGAATAACTAGGATACGTAGGACCGTGCTTAGTGTTACACTCAATATTGT -TAACTAACGCGGTTCAATTTTATACATAATAAGTAGGTATATATAATGCTTACATTCCAA -CTTCACCCTTGCTAACTAGTAAATCAAGAGTAGATTAGTTACTCCGTTTGGAGAGTACTG -ATAGATCTAAATATCAATAGAAGTACCTTTATATATCGTTGTACAATAGAGGACATAAGT -AATTACACCATATTATTTAGTATTCTGCCAGCCAAAATAATTCCAGTTTTTTCGATCTTG -TTTACTTGATATAGCACACGATTAAAGGACTAGGCTGAGAGGTCTAGGAATGAAGAACAC -AACCTTTCTCTCGTAAGAAAGGTAGCTTCACGTGCTGTCTTAGGGTGTTTGATTCATGGA -TACTCTACCTTTCAGGAATAAGCATCAAGCCCCTACACACCTTAGTTATTCCCCATTCCC -CAATTTTACTGTTGGTTCCCTCCACTACGGGCGACTAAGTATGCCACGCGGGAAATAACC -CTCTCTCTTGGATANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCATTTATGAAGTATTTACTC -ACTCGCCATTTTCCCACCAAAACTGACCAAGTTTGAACTAACTTTTCTTTGTAAATTTAT -ATTGTAACATATCTTTAATGGATTTTCAAATCATCAAAATTACACAGCAAGAGTAAAATA -CGTATAAACTATCGAGTTGTACTCCGTATGTTTTCAAACATTGATGAAGGATATAGGTCG -CGAGTTTAGTATTAACGTCCGTATATATAATAGACATTCCAACGAGCATTAAAATTTCAA -TCAGTGACTATTTAACAGCTCCACATATTTGATCATTATGGGCACCGTAGAAGGCACTAG -GTATGGTACTGTATATTGGTACGTAGATTACACAACGTTTACGAGAATTAATTGGAATCC -GTGGAAATCTATAGTGGGCCTTTGCAAATCATGTGGTAGGGTCGCCTAGCAACTGGTTCC -TGAACTTTCTTTGAGGTGGCTTCGAATTTCCAATCGATCTTCCTGTCATGTCACCGACTC -TAATATAATTGGTGTTTGCAGGGTTCTTGAAAAAGAGACTAAAGCAAAATACGCGCGCGC -TATCACATCCGATAAATAAAATGTCGACACGTGTAGTTTCTGTTCGCGCTCAAGTATTTC -GTAGGGGAAGGTGCGTACTGGACCTTTTACATGCTGAGGCTCGAATCTCACTCTTCTTTT -CTCCCCATTAACTACTCGTGCTGTGCGTTGTATCACATTCTTTGTGAGATCTCTACTCAA -CTACATACAACCTATCACTTCAGGATGTCAGGCCTAGAGGTGATCGGCGGTATCTCTGCG -ATCATTACCCTCCTCGACGCGTCGATCAAGGTCTACGACAATGCTCGAAATGACATGAAG -TTGCCTGAGACTTTCGAATCCGTTCGACGCCGACTGCCCGTTATCCTCCACATCCTTCAG -ACATGCGAGAATGACCTAGAACCGGGTAAAGACTCTATGCCCTCAGATGTCTGTAAGGCT -CTAGAGAAGATCCTTGATGCTTGTGACGAAAAGGCTCGAAAATTGAGAGAAATATTCGAG -ATGACCATACCTGGTGAGAAGGATACGTGGGAGAAACGATACGCAAAAGTCATTCGAAGA -CTTGGAAAAGGAAACAAAGTCGAAGAGCTTATGGCTACGCTCACTGAAGATGTCCAACTC -CTCGTCAACAACCACGCGGTGAAATCTGCTTCACCGGAGGACAATGCCGAGCTCAAAAAA -ATCCTCAAAGAGATGGAAACTATCAATAACTCGACCCCCGAGGAGGAACATTCAGCCTTG -GCGTTCCATAGTGGTGGAGGGGCCCAGACCAATAATGTCAATAGCGGCAGTGGCCAGCAA -ATCAACAACAATGCTCATGTCGGAACTCAGTATTTCCATTCTGGTAAGGACCAGCCGTCA -TGATAGCACATTTTGTCGATCGTACTATGCCCGAAGCCGAGTTTGCAGACCCATTTTAGA -CGCCGTTAACGACGGCTCTGGTTCGATGATCTCTAATATCTCACCGAATCATTCCTTATT -GCTAGTTCTTATTTCCAAAATAACCATGCATATGTCCTTTAAAAGTCTCGGTTTCCATTT -TTAAGCCCAAAATGCCACTCAAACCTACTACAAAATCCCGGCTAACATTTATTGAGATAG -TCACCCTCAGGCAGAAGAAAGATTTCAGCTTTCGCGGACCCGTTGGCATCTTACGCGGTC -AAGCGCCTTACATCGCCTCCGAACTCTTCGTTGGTCGTGGCTATGAGCTTGATGAGATCA -CAAAAGTTCTACACCTTGATCACAAGACTCAAAAGCAACGGCGCCTAGTCCTCGGTGGTA -TGGGTGGTATTGGTAAGACCCAGCTTGCGATCGCCTACGCAGAGTCCGGCCGTGGATCTT -ATAGCTCGGTGTTCTGGATTAATGCAGTATCTGAGGCCGGACTGAAAGACAGTTTCCGAT -CGATAGCCAGCCTTATCTTCGATGTTGAGGAGCCTGGAGTGTTAGAGGACAAAGAGATCG -TGAGACGTGTACACCAGTGGCTATGCACTCCTCAAAATACTGGATGGTTATTGATCTTTG -ATAACTACGACGACCCTAGCCAGTTTCGGATTGACCACTATTACCCCCCTCCTTCTCATG -GGGCTATTGTGGTCACCAGTCGACGGCCAGATCTCGTTGCTGGGACCCCTCTTCACATAA -AGCCTTTTCAGAATATTGAAGATAGCCTCGCGATCCTACAAACCCGATCAAAACGAAAGA -ATATTCAATCAGGTATGCTAAAACTAAATACATTTAGGATCCACTAACCACTACCGTTAG -ATCCTCATGCAAAGCGCCTCGCAGAGCGACTTGCGGGTCTTCCTCTTGCTTTGGCCACTG -CCGGGACATATCTTCACCTTAGTACCTTCACTTTCGAACGCTACTTTCAGGAGTACGAGA -AACGTTGGAACATTAACCCTCGCCGACCCCAACAGCTCCAGGAGTATCAGGAGCGCACGC -TCTACACAACCTGGGATTTGTCATACTCTCGTCTAAAAAAGGAAGCTCCGGACGCGGCGA -ACTTACTGAAATTACTGGCGTACTTCGATAACCAGAGTCTTTGGTACGAGCTTTTCTATG -CTGGACTTACCAGTAGTTCTCCAGAGTGGCTTCGTGAGGTGATTACAGATGATGTGAAAT -TCGATGGAGTAATGGGAATCCTGGCTGAATACTACTTCCTGGACGTTCACCAGACCTTAA -AGTCATGGAGCATGCATAACTGCGTGCACGATTGGACACTGGCAGCACTCAATAAGGACA -TCGACGCAAATCATTATTGGTATGTTTTTGACTGCATTACTGCGTCGATAAATGATGACA -ACGCGGATGATTTTGCAAACCTCTCTTACTCTCCCTTGGCTACTCATGCCACGCGGCTAG -TACAACAGCGCTTCTACCAGAACGAGGTCATATATAATATTGCGCCTCATCAACTCCACC -AAGCTTCACTGATTGCAAATCTACTTCGAGAGCAAGTACTTCTTCTTGCTGCGGAGCAGA -TGTACCAACGGGCGCTAGCTGGGTACGAGAAAGCGCTCGGAGCGGACCACACGTCGACCC -TCGAGACTGTCCGTAACCTCGGGATTCTATATCGAGGTCAAGGTAAGCTGGACCAGGCGG -AGCAGATGTACCAGCGGGCGCTAGCTGGGTGCGAGAAAACGCTTGGAGCGGACCACACGT -CGACCCTCGAGACTATCCGCAACCTCGGGATTCTATATCGAGGTCAAGGTAAGCTAGACC -GGGCGGAGCAGATGTACCAACGGGCGATAGCTGGGTACGAGAAAGCGCTCGGAGTAGACC -ACACGTCGACCCTTAACACTGTCGACAATCTCGGGGTTCTGTATTGGAACCAAGGTAAGC -TGGACCAGGCAGAGCAGATGTACCAGCGGGCGCTAGCTGGGAGGGAGAAAGCGCGCGGAG -CGGATCACACGTCGACTCTCAGCACTATCGACAATCTCGGCCTTCTATATCAGGACCAAG -GTAAGCTAGACCAGGCGGAGCAAATGCACCAGCGGGCGCTAGCTGGGAGGGAGAAAGCGC -TAGGAGCGGACCACACGTTGACTATCAACACTGTCCACAATCTCGGGATTCTATATTGGA -ACCAAGGCAAGCTGGACCAGGCGGAGCAGATGCACCATCGGGCGCTAGCTGGGAGGGAGA -AAGCGCTCGGAGCGGACCACACGTCGACCCTTAAGACTGTCAACAATCTCGGGATTCTAT -ATCGGAACCAAGGCAAGCTGGACCAGGCAGAGCAGATGTACCAGCGGGCGCTAGCTGGGT -ACGAGAAAGCGCTCGGAGCGGACCACACGTCGACCCTTTACACTGTCCACAATCTTGGGA -CTCTATATCGGAACCAAGGCAAGCTGGACCAGGCAGAGCAGATGTACCAGCGGGCGCTAG -CTGGGTGCGAGAAAGCGCTCGGAGCAGACCACACGTCGACCCTCGAGACTGTCCACAATC -TCGGGATTCTATATCAGGACCAAGGCAAGCTGGACCAGGCAGAGCAGATGTACCAGCGGG -CACTAGCTGGGTGCGAGAAAGCGCTCGGAGCAGACCACACGTCGACCCTCGAGACTGTCC -ACAATCTCGGGATTCTATATCAGGACCAAGGCAAGCTGGACCAGGCGGAACAGATGCACC -AGCGGGCGCTAGCTGGGAGGGAGAAGAGGAATACTGCTACTTAATTCTTTCAACGGTGAT -CTCAATTCAATATCTCTCAATCTATCCTTCCTGGTATTGTTTAACTCAATTCCTGATGTC -AGAGACAAACCGGCGGGCAATCCTAAGTTCCATATCCTCGATGGCAAAGAACGTAGACCT -GATTCAGATCCAGAATATTTGCCAAAACAACATCACGCGCCTTCTAGATATCGTCCTCGA -TACTCTCGCTGCTCACCGGTGACTGCTGTTGGCCAGAATTGACAAAATAAATGAGAGCAG -TAACGACGGTGCTACAGTACAGAGCATGGCTTTAGTACCACCAGACACATCCAAAATTCT -TAGCTGCGATGCATACATCACGAGGATACTGATATTGATTGGAGACAAATTGGCCAGGTG -CAGAATTGAATAGGGCTTTTCCCGCTCATTGTGTTCATAATAATGTGAGAAGGCTCCAGC -ATGTATGCGAAAGATCAACACTTGGGTGCAGCTTGGCAAGGAAAGTTGAAGCCACCTAAC -AGATCGTCCAGGAACTCGGCGCCAGGCGACCTCTCCACATGATTGCAGATTCACTATAGG -TTTGAACGCTTCCATGGAATTCCACGGACTTTTGTACGCAATGCGTAATCTTACATACCC -TTGTAGCTAGGTACTGCAACATACAAAGCGCATTTATCTGCGCCCCTCAATTTAACGCGG -CTCGCTCAATCATTGGTCTGCGCTAACTCGTGTAGCTCTTTAATCGACGAACAGTCGAAC -CCTTCATGATTTCTGCATGCATGAGGATGAGCTAAGCCGACATCGAGGTGCCAAACCGCG -CACTCAATTTGTACTCTCTTGCGCAAATTAGCCTGTTCAATTTATGTTAGTCTTGGCGAT -GGGTCCAGAAAATAGTTATCAAGTGGTCAAGCCGATTTCGAGAATGACTGTACGGAGTAT -ACATATACATCTATACTACTCCGTATATAGGTAAATAATAGAAATTCTGTGTTGATTCCT -TTTTTTAGTCCATCCGAATGTATTTAGGGTAATATACCTAATAAAGAGGGAGTGCTTTGT -AGCGACACGCGATTCAAAGTAACATGGATGAAATAGTATCATGTACTACTTTTTTTTTAT -GAAAAAGTGGTGAAATACTGCTATGGGGATGGGTGGAAGGTTGCAGACAAGTGTAACGTA -ATGTAAATTTCAAGCAACAATTTTCTTTTGGCACAGTGATCATTGTAACGCGCCGTATTG -AACTCTCAATACGGAATAGAGGGTGTTCCATTAACCAAGGGAGGTCACCTGTTCCTACAC -TAATCCCCTATATATATTGTATTTTATGGCATCTAGATCTCCAAACCTACTACTTTATAG -CCACATACAGAGCCGCCTAATGCACTACATGCCACTGATAGAGTTGGGATATAAAAAGAA -ATTTTAACGCTAATAATAATAATGGGGGAACTCCGAGTGAAATCCTATCTCAATATGACG -ATCACGTGCGACTTACGTCACCGAATCGAGGTTCAGAGGTCGTCCACtcgttctttcttc -tctttcttatctcttgtttcACTAAGCTCCAAGCTCGCGGACGCAGAGGGGTGGATCTTT -GGGTGAATTGACGCCATGGCCGACTCCTTCACCCTGCCTTTGCGACCCATCCTGGCGCAG -CGTGATCGCCCAGATACCCTGCCTGTGGAGATCGCGCAAATCAACAATCAGTGGGGCTCT -TTTCGCGAAGTGAATGAAGATGTTCTTCGAAACAGGATTGCCGAGGAGGAAGAAAATGAT -AGCTTGGAAGAGATAGAGGAGAGTGATAAAGATGCGCCCGATGTTGACTCGACAGAGAGC -TTGGAGCAACTGTACAAGCGGCGTGCCGAGATCACTCAGTTCGCTATGTGAGTCTGATAA -TGGATTTGAAGTCATACTCTCGCGCGCAACATCTGACAATATCTCAGGCAAGCCCATATG -GAGACCATGTTCGCCCTCGACTTCGTTTCCCTTTTGCTCTCCAAGCAGGCCCCGCGCCAG -GCCGAAACCTCCATGTCGGCCTTCCTCAAGCAAGTCGCCCCCCTCGGTTCGCTCAACGCA -GAGGTAGTGAACCCTCCCCCGAAGTCAGAATCTACAAGCAAAGATATTTCGACAGTATCA -AGAGGATGGCGGACCCAAAACTTCAACGCCGCGGCAAATAAGCTTCTGCAGGCCGCCTCC -AGACTTGAAACTGAAGTCGCGTCGGAAACTCGATACTGGAATGAGGTGCTGGCTGTTAAG -GACAAAGGATGGAAGATTTCTCGTCTCCCACGCGAAAGGCAAGCGCTGGGCGTGCAATAT -GGCTTCCTTGAAGGTGAGCACATTCCGACGGGAGATTGATACCGCAGGAAGCATCTGCTG -ACCAATACTGAATACAGCAACGCCGGTTTTCCGCGACCGCGGCCTTGCTTCCTTACGCCG -AGCTGAGGATGGAGCTTTGCTGTTGGATGAAGGTTTAATCCCATCCAAAGCCCGTTATGT -CCGGGTGCGCGTGATACAGGATGGTCGGCTGTCAGGAAGCTCTAAACCGACGCGTTCCAC -TCTCGACGGCGAAAAAACGATTGAGGACCGCATTCTGCAAGCGCGTGACACTGTTTATGA -AGAGGAACTCTTCCACGAACTAGTCCGAGAAGCCCGGGCGATTGCGAGCTTTGGCGTGAC -GACACGCCAGAATCTCGTTCAAATTCCTGCTTCTGATAACCTTGAAATTTTACTGGACTT -GGTTGATACCACTGAAAATACGTTGCAACCGCAAGATAATATCTCACAACAAGGAACTTC -TCTTGCAGAGGGCTTGGCGCATACGATCCGGATCTTATTAGCATATGCCCACCGTCAAAA -CTTGCGACGTCGGACTCAACTGCCGCCGCCCCTGACCCCGAAAACGCGTGCTATTCCTGA -GCATCAGCTCATTCGGCCAGCTCTAGCTTACATTAAGCACATGTCCCATGTATGCTGGCT -TCAATCATTACTGAAAGACCTTTTCAGTGTGTTGCAGTCGGCAAATTTGAAACCTCCAGC -ATACACAGCAAGAGTATTTTCGGCAGGGAAACCGAGACAAACCTCTCCTGCTCCAACACT -AGAAGCCCTCCTTGGACAGTTCCTCACCCCATTGCTGTCTACATTTAAGGGGGAAATTTT -GACACCTCGGGGTTCGTTCTCAATCTCGATTCACACCAACCTTTCCTCACCACCCTTCGG -TACCACGTTTGATTTGTCGTTCAATATGCCCAAGTACCCGGATCTTGGGTCTCCTGGAAA -AATTTTTCATAGGGAAGAAGTGGAAGCCGCCGTCACCCATCTCTTGTTGCTTGATGTTGT -TTTCACCATCTCATCCAACGGTTTATCAAAGTCCGGGAGTGATAAGCTCCAAGCGAAAAG -TGCCTGGGAAGCAATTTATCCTCAACATGGTGAGCTTTTGCTCTCCCCTTCCAAATCAGA -CAGACGAAAAAAGATGAAGATTGCTCTGTCCCGTCATGAACTGTCTCTCGAGATATACAC -TGTATGCTGTATCGACGGAACCGGGCGCGGAACATGTGAACATCCCTCTTACCATTCAGC -GCCGCACACTTGGAAACCATCCCTTTTTGCTGCGTTACCCAGCCAGCCCTCTTTGATGGA -CTTTGTTTCCACTGAAGCGTCACGTGCGTGACCCACGGAGACCGCAGATTGGTATCGGGC -CGAACCACTCCACCCTTGGCCGCGTTGAGCCGAAATCTCCTCGTCCCTTTCATCAAATTG -GGCTTGTGGTTTAGTGGTATAATACTCCCTTAGCATGGGAGTGGTCCAGGGTTCGATTCC -CTGCAAGTCCATGTCTTTTTGTTTCTGAGTATTTTTTGCTTGAAGCGAAAAGTCATTAGT -TCGCGAGATGAAGTGGGCCTAAGCAAAATTTTTCATGCGCGTCTACTCTCGCAAAAATCA -ATAATATGTAGATGAAGACTTGAATACCCTGTACAAATCATCCCACCAAAGCTGAATTGA -AACACACTTCCGTTTGAATTAACAACATACCGAGAACCGGTGCGGTCAACCTCTCATAGC -TTCGACCGTAGTGCATTGACGGCTTCTGGGACTGTGAGCGTCTCTTGGAGTCTCTTTTCT -CCCATTCCGTATGCAGCTGGGAGCCAATCACTGACATCCTCACCCAAAATCATCGAAGCC -AACGCCTCGCCACTCATCAGGCTGTTCGCCATACCATATCCGTTGAATGCAGCAGCAATC -CATTCGCCGTCGCCTCTCCGACCCGTCAGGGTCGTTGGTAGTTGTCCAACCATTGGCAGA -CCGTCTGACGAATAACCCATAATCCCACTCCATGAACTGACTAGGCGCCAATCGGATCTG -TCGCTTTTGCCAAAGAATCGTGGGAGGACGACTTCCAAGTGTTTTACGGAGTCTTCGGCC -ACAAAGGTGTCATCTGAGGACACTGACTCCTCAAGACGTGCATTCTCGCCGCCGAAGTAG -AAATAACCGGTCTTTGCACTCTGTCCTAGATAGTATAGTCCATACCCAAGACGCTTGGTT -TTAGGATCATAGGATGGAGGGTAATGGAAGCCCCATGAAATGGCCGCACCCTGATTTGAC -ACAGTATTCTGCGGATCTTGAACTGTCATTGTACCTTTGAAAGGGTGGACAAGCCCCCTG -AGGCGAGGAAGGAGATGTCCGCTGTACCCATTGGTGCAATAAACCACATTTTGTGCTCGT -AGCGGACCGCGAGAAGTGTCAAGTGTGTACGGATACGCACTCATCATAGGTGAGGCAAGA -GTTGAGATGTGTTCAACGGATTCAACCGGTGTATGCGTCTCGATTGTCAGTCGACCTGGG -TATGTTTCCAAGAGTGCCGCGAATACTTTTGTGACAAGACGATATGGCCAGACCGTTCCA -GCAGGAAGCAATGCTCCTCCAGATGATCCGTGAATTCCATACCCCTAAAAAGAAAAAGAT -GAGAACAACGCCTATAACCACATGGAAGACAAGACTCGCCTTGAGCACGGCATCTGCATC -TAAAATTGTATACAGCCCCTTTTTAGAAGGATGATCCATTTCCAGGCGCGCGATACTTCT -CTTGAAGTCATCAAACTTGTCCTGGGTCAAAAAGACTCGCAACTTTTGCAGGCGCTGCAT -CTCACTTAACTCCACGGCATCATACTCTTCAATCAATTTCTGCATCTTTTCCAAATTGCG -AAATGTGAAATCCACAATCTTGCCCGCCGTTTCAACCCCATGAGTCTCTGCAAGATGCAT -ATACTGTTCTCCAGCATTTGCAGCCATCTGCCCGCCATTGCGCCCAGTTGCGCCCGAACA -GAGCGCACGTGCTTCCAAAACAGTTACTGTGGCAGTAGGGTGGCGTTCCAAGAGAGATCT -GGCTACCGATAGACCTGTGATACCGGACCCAATGATCGCAAAATCTCGATCATTTGGCAA -TTTTGAGGATTGGACGTTAGCAAGAGCATGTGGAATGTGCTGCCAGTAGGACTGGGTGGG -ATTGGCTCGTGGCAGGCCTGGGTCAGCCGTAACGAGATTCAAGACTGGATCGTGTTAACT -TCCGAATTATACAGCGGTAGAGTCAAGCGTGATAGCACATCAGTCAACTTCGGGCTTGGG -GGCCAATTGAGGTTACCTGGATCATTCGGCGCCATGGCAAAAGATGTAAAAAGGTTCGGA -ACTCAATCGTTCTGGCTAATAACAAATCCAGAACTTGCAATTGATATTTGAATTTGAAGT -CATCAATGCCAGAAAATAGGAGCTGGGACCTTCATGGAATGGACTTTGTTGCCCTCGAGG -CCTCGTGCGGGGTTTATCTTATCTATGGCCAAATTGCCCAAGTGGGCAAGCGGGAATAGA -GCAAATCTTCTTTGACATCTATCGCATCCTGCCTAGCACATGAATTCGGAGAAGAAGCTT -GATCAGCATGGAGACCCCTGCTAGCTCTAGAATGAGTATACGTAATCTCTATAAACGTAT -CAATAATATAGCACATATCACCAACCTGAAAGCCCGGAGTAATTCTGTATGAGAAACCTA -AGAATGTTGACGGGCCTAATATGAAACTCCTCCACGTTTGAGGCATGTAGATGGCGTAAC -GGCATTCCATATAACATTCCATACCGACTACGTAGGACCCCGGAACTAAAAATGTGAGCG -TCTTGTTTTACGAGAGCATGTCCTCAATTTCGAGAGATGACCTCGCTATAATAATTCCAT -CATTTTCACTTTGTTGTGTCCCATCCTTTTGCTATATCTTGGAAATTTTATTTGCCCTAC -AATGAAGCTCTCTACACCTATACTTACAGTGTTTGCACTGGCTACAGCGGGCCTGGCAAC -GAAGACGTGTACCCCTAGCTTTGATTATTGCTCTGACGTCCTGATTAAAGACAAAGGTAA -TTTGTTTCAATGTCAAGCATTGCCCCGACAAGCTAATTGAAGAAACGTTAGGGTTCACCG -AGGCAAATCTGAAGGATGTTTTAAAGGGATCAGATCTAGAAAAGGAGGATATTAAGAATG -TTCTATTCCATTGCAAGAACCCCGGCGACGTTGGACACCCCAAACTTTGCAAGAGTGGAT -GTCAAGACCCAGCAACAGAGGGTAGCCACTCGTGCTCAGCATGAGTCAAAATTCTGCTGT -CAACTTCAAATTGGCAGCTGATAACGCAGCAGCTGAATCCATCATGCTGGCTGTGAGAGG -AAATTAGATGCACAATTATGTCTGGTAGCAAAACTGTGGATAGTAATGGACCCGAGGCCA -GTCGCAATCAGAAAATGATTCAATTTAGCAGATTATACGAATTCACTCGTAGAAATCGCC -ACTCCAACACAATAGTGAAGTGATGAGATGGCAGGCCAACAGTGAAGCAAGTCATTGTGG -TAAGTTGTGTAAAAGGGGGTATGAATATGAGTGAGTGATCACTAGACTAAATTCATTGTA -TTTCCTTGCTAACGGAAAGATAGTACGCGAAGGCCGGTTTAGCTTGGTAAATAACGCCAA -ACATCGAACCACTTCGGCCGAAGCCTTCTATCAGGGTCATTTGACTCCAGCTACTACAGT -CGACCAATTTTTCCAATATGTACCCAAGCTATTTCACGCGTTCGATATGGCTAGTGGTCA -TTACCGGAAGCGCCGAAGGCCCGTCACTGAGATTTTGGAGCAGCAAAAAGACACTATGTT -CGCGAAGGCCTGAGCCCATTCATCCGCACGGGCGTGAGGTGGTCACACGATATTTACAAT -CATATCAGCTTAAATCCTAGGATGATCTTTCCGCCCTCAGTTCTGCTGCTGCCTATGAAT -CCGACAATAGTAAGAGCCTTAGTACTGTGATTGCGTTCCCTTGAAGGTTTCGCCGATTTG -TACCTCCTACTCCTACACGGGGCTGGGGAGGGTTCGTAAGCAATTTGCACAAGGAATTAG -CAAAGAAACTGCAGGCACAATGCGTCGGGCTTGGGGTCGAGAACGTGGTCCGAGACCCCA -TGTTGAATGGACACATGGAGGTGGCACTGACAAAAGTCCTACGACTGATGCAAAACTTGC -CTACCCCCCGGAAGCTTCCGAGCGACGCAGGCATGTTTCACAGGTACTACCTAGGCGAGC -AACGATTTAACAATAGTTCGTTACAATTATAGTGTACCGAGTTCGTGTTACCAGTCAGAC -CATGCTTGCCAAGGCTGGTTGAAATCCAATGATAATTTCCAGGATTAAACCATAATGCAG -CAGGAAGATCATGGATATGATGAGGTCGATGACCTGTAATTGGGGAGAGTAACAAGAATC -AGCGGAGGATAGAGAAGATGTCATCGGGGCCAGACTCCGGGGGTATGCGAGAAACAAACA -AAACAAACAGGATGGAGTGACGTCAAGCAAGATCACTCACCGCCCACACTAAACCCAAAT -TGACCCAACGGTGACATGTGTCTTGCAGCCAAATTGTAAAGGAACGCAGGTACTAGAAAG -GTATTAGCTTATTGAAAAAGATCAGGCCACTCTTATACTCGGTGGAAATTATATAGAGAT -ATCTTAGAAAGAGTCCAGGCCTGTTCATATTTTCTCAAGAATTCTCTATATCTCTTAACA -ACCTATCGTACTGGAGCTTCAAGTTGTTTTGTCTGTGACGCAAAGCACCCACTTCTGCCC -GATTTCTCTCACCGGATTGGGTTCAACCCCTCCACATCCCACCCATCCACAATTGGAGCT -TTACAACTATCGAGCTTTCTTTTCTTCTTTTGCTTGTTTTCTCCTCACTTTTCATTTCAT -TCTTTTGATCGACCTGTCTTTCAGGTCATCTTCAATTGATTCGATATGGAACCCATTAAG -ATCGCCACTCAGCCGGCTAAGAGACGCCGTATCGGCGTTCTCACCTCTGGTGGAGATGCT -CCCGGTATGAACGGTGCCGTGCGGGCTGTTGTCCGTATGGCAATTCACTGCGACTGCGAA -GCCTACGCCATTTTCGAGGGATACGAAGGACTGGTCAATGGCGGCACTATGATCCGCCAG -CTACACTGGGAGGATGTTCGCGGCTGGTTGTCCCGCGGTGGTACCTTGATCGGCTCTGCC -CGCAGCAAGGGATTCCGTGAGCGTGCTGGCCGTATGCAGGCAGCTAAGAACATGGTTTTG -AGAGGTATCGATGCACTGGTCGTCTGCGGTGGTGACGGCAGTTTGACTGGTGCCGATGTC -TTCCGTGCCGAGTGGCCTAGCTTGTTGGCAGACCTCGTTTCCGCAGGGGAGTTATCCTCT -GAGCAGGTGGCACCTTACACAGTGTTGAATATTGTAGGCCTGGTAGGTTCGATTGACAAT -GACATGTCTGGGACAGATGCCACCATCGGATGCTATTCGTCTTTGACCCGTATTTGTGAT -GCCGTCGACGACGTTTTTGATACTGCATTCTCCCATCAGCGTGGTTTCGTCATTGAGGTG -ATGGGCCGACACTGTGGATGGCTTGCATTGATGTCCGCCATCAGTACCGGCGCAGACTGG -ATGTTCCTCCCTGAGATGCCTCCGAAGGATGGTTGGGAAGATGACATGTGCTCCGCAATT -ACCAAGGTGGGAATAAGTCCAGCGTTATTCGATCTTCCTCCTTTTTTCCTTTTTTTGCTG -ACTGAGGTGGTTTAACAGAATCGAAGCGAACGCGGCAAGCGTCGTACGATCGTCATTGTC -GCCGAAGGAGCCCAGGATCGTCAGTTGAACAAGATCTCGAGCTCGAAAATCAAGGATATT -CTTACAGATCGGCTGGGATTGGACACTCGAGTCACCATTCTAGGCCACACCCAGCGAGGT -GGTGCGGCGTGCGCTTATGATCGCTGGTTGTCGACACTCCAGGGTGTTGAGGCCGTTCGT -GCCGTGCTGGAAATGTCACCCGAGTCTCCTTCACCAGTTATCACTATCCGTGAGAACAAG -ATCCTGCGCACACCTTTGATGGAGGCAGTGCAAGCGACCAAAGATGTGACAGCGAAGATC -GAAGACATGGACTTCGAGGGAGCCATGAAGCTCCGTGATCCAGAATTCAAGGAGTACTAC -AATGCATATATGAACACTGCGAAGCAAGACGACCCGAAGATGAAACTTCCAGAGGGCAAG -GTATGCACATTCCCCAGCCCACCTGTGGTGCTCAGGCTCCCCAAACTTCCCAGATACTTA -CTCTTCTACAGAGAATGCGGATCGCTATCATTCACGTCGGTGCTCCGGCCGGTGGTATGA -ATCAAGCAACCCGTGGCGCTGTTGCATACTGCCTGACCCGTGGCCACACCCCACTGGCCA -TTCATAACGGTTTCCCCGGCCTGTGCCGTCACCACGACGATAAGCCAGTTAGCTCAGTCC -GCGAGGTTACTTGGCTGGAAGCTGATAACTGGGTCAACGAGGGTGGCTCAGACATTGGAA -CGAACCGTAACCTGCCGTCTACTGACATGGAGACCACTGCCAAATGCTTTGAGCAGTACA -AATTCGATGCCTTGTTTGTTGTTGGTGGCTTCGAGGCTTTCACAGCCGTCAGCGAGCTAC -GCCAGGCTCGTGAGAAATATGATGCGTTCAAGATTCCGCTGATCGTGCTCCCAGCCACCA -TTTCTAACAATGTTCCGGGCACTGAATACTCCTTGGGTAGTGATACCTGCCTCAACACCC -TGATCGACTTCTGTGATGCAATTCGTCAGTCCGCCTCCTCTTCTCGCCGACGAGTGTTTG -TGATTGAGACCCAAGGTGGCAAATCTGGATATATTGCGACCACAGCTGGCCTTGCTGTGG -GTGCCGTGGCAGTGTACATTCCCGAGGAGGGTGTTGATATCAAGATGCTCTCACGCGACA -TCGACTTCCTTCGCGACAATTTTATCCGTGACAAAGGAGCTAACCGTGCCGGCAAGATCA -TTCTACGCAACGAATGTGCTTCCGGCACATACACTACCCAGGTGATCGCCGACATGATCA -AGGAGGAGGCCAAGGGCCGCTTTGAGTCTCGTGCCGCTGTTCCCGGTCACTTCCAACAAG -GTGGAAAGCCTTCGCCAATGGATCGTGTCCGTGCTATGCGTATGTCCATCAGGTGCATGC -AGCACATCGAAGGATACGCCAACAAGAGCCCTGATGAAATCGCCGCGGACCCTGTCTCAA -CTGCTGTGATGGGTGTCAAAGGATCTCAGGTGCTATTCTCCCCCATGGGTGGCGAAAATG -GACTGGAGGCTACCGAAACTGATTGGGTGCGCCGTCGCCCCAAGTCCGAGTTCTGGCTTG -AACTACAGGATGTCGTCAATGTTCTGTCCGGACGTACCCACGCTAGCAAGAACGAAACAT -GGTCCTGCTATGAGAGTACGTAAACCGACCAGGATCCCGGTTATTTGCTTGTACACCCCT -GTTCGATTCATTTTCGCGCTGCTCATTTTTCTCCATTGGCCCCTGAGGATCGGCCCCCCC -CCCGTACAGTGTTTATTACTGGTGCCCCTTCAACACGGATGTTAACAGTTATCCCCGTTT -TACTCTGCAGATTCTTCGAAAGGCGAACCCATGACCTCGGTTCCCAACTCCCCTTGAAAG -GATTTCCACCCACAGGACTTCAACTTCTTTCCAACCAACCATTGACTCCACGCCAATGAC -GCCCCCGGGATCTCCTTGAATGTTTCTTCCAAATACTGGCCGGCGAGCCATCCGCTAACT -TGTGATGTTGAGGGCGATCGTACTAGACGTCCGTTACGCTTTGTACGATGCTATGCAACA -AGTCCTCCAGAACCAAGGATGCCTTACGTGACATTTATGCCTGGGAGTTTGGGTGATTGT -GCTTTTCGCTTGATGATTTTTTACCCTTCTTGAACATTCTTGGTCTACGGCATAAAGTGC -GTTGTAGATAGAACTATCAATTTCAATCTTGCAATTATTTCATATGAAACAATAACGCTA -TAGGCGAGTGCTATCGTGAGGATCAAAGAGGTAGTCAAAAGGCAGCACTATAGAATCTCC -GAGGGCGTATCAATTGCAGCACTGACACTGAAACCAGGAAGAATGAACCCGGACAGACGC -AATATTCCAGTATCTAATGTGAAATTCAATGATTACGGGTGCACCGCCTTTTCCAGCTCA -TCAAGTAGTTCCACAGTCTCAGTGGGGCTTGCGATCCAGGCAGTTGTGGATGCAAGGCGT -GCCTAATAAGAAAGAGGATGTTAGTCTGATCAGTAGCCAAAGAGACTTTTATAATTTTTC -ACCTTAAAGTCATTAGCCCCAGCCGACAAGAACTGGTCGCCGACATGCAGTGTTTTAGCT -GGGTCAATACCGCCAAAGTAGCGCTGACAGGCTCGCACACCCCACGACTTGTCACCAATA -TCGACAAAGACATCATTTCCACCGTTGAAAGCACAGAATGGCAATCGTTCTGCGACAGCC -GAATGCTGAACCGTGTGCTGGACCACCAAAACAGTCTCTTCCAACTGCTCACGGCTGATT -CGGACCCCGTTTACCGGGTAGACGCCGACCGCGCGATCCTTCCGCAGAATAGCAGCAGGC -AGATTCATATTTTCGGCACACGCACGTAGAGCAGACTCGGCAATATCAAGAAGCTCAGTA -ATATCACTATCACTCCAGGTTTGCATCTCATCCAACAACCATTCGCTCCGCCGCACGTAG -ACGAGTCGGGCGTCCGAGGTCGGGTCGAAGCGGAATAGATAATTGCTCTCGCCGCCCATG -ACAATCAGTCCTTGTTTCTGATCTGCGGTGAGAGTAGTGGATTCGTGAATCGCATCAAGT -AGACCGTGAAGTCGTTCGTAATACTTCGGCGCCTCGGTGTAGCCAGCCGCTGTCACAATG -CCGACCTTGCGATCTTGTTCGAGAAGGCGCACAATGCGGGGGATGACGGGATTGTCGGAT -GTGAGACAGGCTCCATCATCGTAGAGTGTTACATCTCCATCGAATGTGATTAGTTTCACA -TCGCTCGAACGAACAAGACCCAACAGCTGGGCGGAGTTCAGAATAAGTCGGATATCGTTG -AATGACGGGGCCACGAAGCGTCGTCGGCTGATGAATCGCTGCTGATCTTGATAATTGAAT -GCATCCTCTAGGTATAACTTGGTGAAAAAAGTGCCGACGGTCGGTACGAGGAGATTCAAT -TTGGATTTCCCAGGGGCATTGCGTTCGTGGTGTTGGACTTCAAAGATTAGGAGACATCCT -ATATTTCTTGTTCTTTTTACGACTCCCACTTACTGTGGTCGTGCATCAGATTCTCTACAT -CACGCATAATCTCTGCATAGCGATGGTGGGTGTCGGCCGCGACTGTCTTCAGCACCTCAC -CATTTTCCTTGTAGATTGCGGTGGGCTGAGAATGCAGGACGAAGGGGACAGCCAATAGAC -CCTAAAGGAATTAGAATTGAACCATATGACATGATTGAGCGACATACTTTGATCCATTCA -ATCTACAGACTGTTAGTTAGACCCCTGAGAGAGGCAGTGACGACTTACCAGTTGATCCCG -GCGATGGGTCTACGTCTGTGGGGTCAGTGAGGGGCACATTTCTGGCACATTTCTGGCGGA -TGATCGTACCTTGAGGGCATCTATGGGGAGTTAGTAAGGAATGCAATGCAATGATGACAC -ACATACATTCTACTCTGTAACGTGTTGTCATGATTGCAGAAAGATGGTGCCTGGGGAGAT -GTCGGTTTCCAAGTTGAGCTGGTGCACGTCCCTTTGGTGACTCACGCGGAGCTCGGAGTA -GATAAACACGGAGTGCCAGCAATCTCGGACCTGAACTGTTTTACTCGATTGGTCAATTTG -GTTTTCAAAATTGCTGACAACACGGTATAGCGAGCCATTATGACCCCAATTCGAGCTTAA -TATCGCACTAAATGGCTAGACCTGGACCCCTGCTCGTAACGCCTGCAATCACTATTGGTC -TCCCAAAAGTGCCTCGAACAGAAAATTTTGCGTGATCAAGGCATCAATAATTTGAAGGCT -CTATGGGATGAGTCCGAGGGCCGAAAAAACCCCTCAACGCTGCCACAATGACTGACCAAC -TCATATCGGAACAGCAGGTGCTTGGTTGGAAATATAATGCCTAGGATGAGCAGACATATA -AATATCTGCCAAAGCGACCTCGTTTTTGAGGTTCAGCACAAGCTTTGAACATCGAAGAGC -CTTCCAGTGAAAAAAAAAAAGCACCGGCGACTTTCATACCTCACCAAAAGCAATGCTAGC -TGCATCTCCACCAACCATGAACACACCTAGCCAGTATGAGAATCCACCGATTTATTCCCC -ATCCGGGGAATTCAACTACTTCATCGATGAAAAGGCCAACACTCCCCTGATCCTCCGACC -CGAACAAAACGAGCCCGGGTTTGCTGTTGCATCTGCTCTCAGCCGAGGCCTGCAAGTGCC -ATCCAGATCCAAAGCCTGCACATCGGGATTCGAGTATCCAGATGAACTGTCTCATTATGA -CATCTCCAAAGAGGATTGGGCACAATTCACACAAGTGGTTTGCGACGAAGCCAAGCTTTC -CCGTCAGCAATGGACCACAGTGGTTGGGAAAGGCCTGGGTGTCATGGCAGTGGGCGGACT -GATGGTCGGCCTTCTGGGCGCTATCCCCGCGATATTCGTTGCCCGTCTCACCAGGAATAG -AAGAGAACAACAGAATCTGATATCGGCACTGGCTGGAGCCCGTGGTGAGCGCCTTGCTCG -ACATATCTCGCAATGGAACGAGACTGTCTTTCAGCCTAGAGGTGTCTTGATCCGGGTCGA -TTTGCCGGATGAATACTTGAATGACATGAAAGATATGGATATTCGGACAAATCGTGGATC -TGAAAGGTCTCTTAAAGAGATTCAGGATAAGGCCGCGAGTAAGGCGAGAATTGTGATCAT -TCCCCTGGAGGAGTCGACGCCAACGAAGCATATCCCGGGTTGGGATAATCGGGGTTGAAG -GAACGGAGAAAAAAATTAGTGAACAAAAGGGCACGTCTATTCATGTATTTTAGCGGGTTG -GTAAAAGGCAAAGAACGGGTTAGGATAATTTGAATCGATTCATTCATCATGATAATGTCA -TTATTCAAAAAAAAAAAGAGATTAGCCTTCTATCATGACCGGCAAATGGACATTAGAGAG -ATCTAAAAGAAGTCAAAAAAAAAAGGTAAGAGTCCAAGTAGCATCTAAACTCGGCCCTCT -TTATCCTGCCTCTCAATATGTTGAGCCATTCCGTGCGATTGCACCGCGATTTCCATCTTC -TCATCCACAACCTGATCACGCTCATCGGCATCCCGATTACCATACCGACCAATCTTGTAC -CACGGCAAAGCAAAGAGACGATCCATACTCTCCAGGGAACGACCTGCCGTTTCCGGAATA -CTGAACCAGACCCAGGCGCCACCGATAATGGTAATGGCAGCAAAGCACCAGAATGTACCC -TTAGGGTCAATTCCACCCTTTGCCACGGGGAGAAGCATGTTGGGGACGGCTCGCGCATTG -CCGTATTGGTTGGCAAAGTGGAGGGTCATGGCCATAGAAGTGCAAAGAGCACGGATACGT -AGAGGGAACAGCTCTGCAGTCAGAAGATACTGCATCGAATTCCAGCCCAGGGCCCATCCA -AATCCTGAGACGTAAATCATTGCAATAGCTCCCTCACTAGCGCCTTTCTTATCCTCCGGT -ACCTTGTAGCCATCGACGACTCCCATATCGGGAACGGAAGTCAAGAAGCCAGCGATATAG -ATCATGGAGATTGCTTGCAGGGTGATACCAAGAAGAAGGGCTCGCTTGCGTCCAATCACG -TCGACCAAGAAAAGAGCACAGAGAATTGCGGCAACCAATTTGACGATTCCAAACACAGCG -GTGACCAGCAGTGACTCGTTCTTGCCTGTAACTCCTAGCAACTTGAACAAATCCGGGGCG -TAGACAGTGATACTACCGGCGCCGGACCACTGTGAAAGAAGTTGAGCCATGAAGGTAAGG -TAGACTCGGTAAAGGTTCGATGGGACCAGGAAGGTTTCCTTGATCACGCCGATCGGGCCA -GCCCCAAGTGTGGCTTCCATCTCGGCTTCATGTGTTGTTTGGATGCCGGATATTTCACGC -ATGACATATTCGTCATCGGTTGATAGGCCGCGGATTTTTGAGAGGTTAGAGATGGCCTTC -TCGAGGTTACCACGTTTGATTAGATATCGGGGTGATTCGTATTGGAGGAAGGAGAGGAGG -AAGATAATGCCGGCAAACATGATATGCAGACTTGTGGGGACGAGCTATGTTCAAGTCAGT -ATAGGTGATGTAAATGGTTCAAGGAGGATCGCACATACCCATCGGTTGTGAGTTGCGTCG -CCGAGGTTGAGCTCGCAGCCATAGTTGGCGAAATATGCAAGTACAATACCCAGATAGACA -AAGCCTGTGAAGACACAGGTACAGAGGCCACGGATAGAGGCGGGAGCCTTTTCCCTTTGT -TAGTTGATATAAGGTCATGACTTGAATAATGAGCAACTCACAATCTCAGCAAGATATACG -GGAGCAACGACAACTGTTTGGCCCACACCGAGTCCAGCAATAAAGCGACCAGCATAGACG -GCACTCAGGTTGCCACCGTTACCCATGAAAATTGCGATTCCCACGATCCAAATAACACAA -AGTTGGCGAGTTGCCCAGATACGACCGATGCGATCACAAACTAAAAAGGCACTGTACAAG -AGCAGACTAGATTAGCGATATGAGATCAGGCACAAAAGCGAACACGGAAAACGCACAACA -GAGCACCTCCAACAGATCCAATCTGAACCATTGCGGTAACATTTCCTTTTATATTGGTCT -GCTCCACTTCGCTGTACGAGTCGTAGTTGATGTAATGTTGAAAGTCCTTCGAGTTGAATG -CGCCTGAGATCAAACCCTCATCGACACCTCGGGCAGCACCCATCAGGCCAAATGAGATAA -CCGCGAACCATAATCGCCAGTTAAAGACCTCCTTAGGCTCGCCGAGGTCTTTAAGGCGAA -AGATATTGACTGGTTTCTTTGCTCCGCCGGCCATGGTGGGCAACACTTGAATATGTAGTT -GAATCGGATGATATCAAAAGCAAAGCCGGGGTAGTAAGAGAAAAGTGAACAAAAAGGGAA -TGTGAGGCAGAGCGGACTTTATAGCTATGAGATCCTGGTCTATGCGGGGAAATCCTGGGG -GGGGGGAGCGGAAATTCCAATCCGAGCAAATCTGAATAATCCTGAATCCTGTCAAGAGTC -ACTTGCAGTCGAAAATGAGCCGCTATACACATGATACACCGGGATAGGCTAAGCCCTGGG -GAAAGGTTGCCAAGAGAGCTTGGATCTTCTCATTTGACGGAGTCGCCGTCTTTCACTTCT -CGGGGCTTTTTTTTTAACGGGACATGACCGAGTAACCGAACCAGCACATGGTCGCACACG -ATTTGGAATATCTGCAGTTTCACTGCAAGGTCTCTGCAGCTCCCTTCCTTTCGTGTCTCA -CTGTATCTTCAGTCTCAGATGTTCCCCCCTTTCTATTCGGTACTCCGTACTTCGTAAGGA -GTACTAGAGCACCTCAATCGCCAAGGTTCTTGCCTCCATAAAACTTTGACAGTGAGTTTC -GGACAACGTCCGACGAGAAATGTGTGTGTGTGTGCCGCATGTGGCTTGCAAGGATCATGA -GGCCCACCAAGGGAGATGCCAGCCTTTTTATCGCGGGGGCGAAAACTGGAGAGCTCCATC -CTTGACTGATAGGAAGTTACTGATTGGTTCACAAACCAACACTATCCCGTTGTTGGCCTG -AGATCAACCGTTGCTTGGCGTGGGGTGCGCTTTGACAAGCCGATCACTCTCTGTTTCTCG -CGTGCTTTGACGAATCGTAGCCCGAAGAGCACCGCGCCGAAATGTTCGGGAAATTTAAGT -CTCGGTGAGACATATATCTAGGTTCTTTAATTTCCTGGCGACATTGTTTATTATTAATAT -CTTCCAACGACCGATATTAGGAAATGGCTCGTACAGCTCTAGACCATTTGGGCCGTTCTT -CGGCGTTCATTGTCCCACTATCATATTGGTCCTTGCAGGGGACATGACTTGATATCACGA -GCATATTTCGTAGCAACTCCTTTCATGGGGTATAGATAGTATCACAATTGAGCTTGCTGC -TGGTGGATATAGAGGGTGATATCAAAAGAACAAAAGCATGGGTTCTCTAAAACATTTCTA -CGTCTGCTGAAGCCAAGAAAGCTCTAGGATGCCCCGTGTCTTTGCCCGAGAATCGGAGTT -CCGGATAGGTAATCTGAATCAATTGAATCTTCGAGGCCAAGGATAGTATGAAATGAGGCG -GAGTCAGGCCATGACATCAAGAAATAAGATACAGAACGGAAGTGATTGGCCAAGCTAGAG -AGGCTATGGCCGCCAGCGGAATCCAGGATTGCGCCAGTCCAGATCCTGAATCTGTTGCTG -CGATAGTCCTGCCAAGGCGCTATGGACATCGACACGAGCACGGCGCCGATTGTCCCACAA -GATATAAACCCATAAGCCGGCCCCAAGAAGGAGGGTGGTAGTTGAAGTTGCCAAGTTGAG -TCCATTCCCAATAGGATAATCGGGTCCATCAAAAGGTAAGAATGACCAAGTGCTGATCAG -CCCGCCGATATTTCCAAACATCACCGTGGTTCCAATTGCCGAAGATCTCGCGGTGTCGGA -GATCACATTTGCCGATGCGTGTGCAGTGCATAGAGCGCCAAAGGCAAAGGCTCCACAGGC -AATAATGAAGGTAGCCCCATATCGTACCATACTGTCTTCGCTAGCCAGGAACATTATATA -GCCTATAACCCCACAATGATTAGCATATGCACTCTGCAATCCTTATATGGCTGCTCGAGG -AGTTGAGACTAACCGATCATCATCAGCGGTGGACTGATAATGAAGAAGATAAGGCGATGA -TCAAATCGCCAGCTGAGGAACGGGAATAGCAAAGTAAAAAATGCACCGACGACATAAGGA -GGCACAGTATGCAACTGTTGAGACACAACGGTGGCGTCGGGGTAGATGGTTTTGACAATG -GTAGGTGCAAAGAATCCAAGACCTTGAACAGTAATGTTGTTCAACATGAAGATTAACGCA -GTTACGAAGGTCACTGGGCTGAAGATACCACGCAGTGTCTTGGTTAGATCGAGCTTGTCG -AGGACTTCTGTCGTGCCAACACGTTCAGACTTAACACGGGCAATGGCCAAGTCCTTCTCT -TCCTGGGAAAGCCATCTGGCTGTCTCAGGTCGGTCAGTCAACGTGAAAAATGCGACGACA -GCAATTCCAATTGTCACGATGCCTTCAATCGCGAAGATCATACGCCAGCTGTGAAGAGAA -CCAAAATTATCGAGCAGGAGAATACCCGAGGCCAAAAGGCCACCAAAAGCTCCAGCAAGG -GGAGCCATGACGATATACAACGACAGGCGGAAAGCGAGTTCACTTCGTCGATACCAACGT -GAGAGGTAGTACGCAATTCCAGGCATGAGTCCTGCCTCGAACACGCTAGAGGATAATTGT -TAGCATTGTCGAGTTTGCTGAATCTAGGAGACATACCCCAAAAGAAAACGGACACCGGCT -GCGCTGTTCATATCAGTGACAAAAGCCGTCGCGACCGAACAGATTCCGAATCCCAATGTG -ATCGCTGGAAGCCACCATCCAGGCCCGACCCATTTGCACATGACATTACTAGGGATCTCA -AAGATAATGTATGCGATGAAGAATATCGAGAGTACAGCGTTATAATCATAGCCCTCGAGT -TTGAGATCCTTGTCAAAGCCGGCCAATTTAGCGTTGCCTATTCGAGGAGAAAAAACGGAT -TAGTGCTTGAATTGGAAATTGAAATCTAGGAACAATAAGAGGGGGCTGGTCACACACCAA -CGTTGGCTCGATCGATGAAGCAAAAAAGATACAAGACTGCGACAGTTGGTACAATATATA -AATCAATCTTGAGTCGCAATCTTGACTCTGCGGCTTTGTCGAATTCCACCAGGGGCTGGC -CATACTCATCCGTCGCGAGGTGGCCCCTTTCTGCTGCGTCATGGGCGACTTCCCCTATCA -TATGCTCTTTATGATCTGACATGACCTTTTCATCACCGATGACCGTAGTGCCTCTCTCGG -CATCGGTCATGTTGGAGTAATGGACAGTGGATTGAGCTCGGCGTAGGCAAGGGGGCGGTC -AGAAGTTTTGAGACAGGGATCCAGGCTTAGGAGCATAGACCGAGGGCCGACCGGGTACTT -TATAGCTGTAATAGCTCAATCAGAAGCCCAATAGAAACAGCCAAATGGTAAACCTGGGGT -GTGGGGGATCACGGACTCAATCCCGAACTATGATTCGGCTTTTTCATCAGCTGAAATCTC -AATCTACCGCTGCCAAGGGAAGATACTTAGTCTGATACTGAGAATAGTCAGGAATTCCTT -GGCAGTCGGAAGTAACAGTGGGAGTACCAGGTATCGTTGAAGGTGGCTGCGTAGGAAGTC -CAATCATCATGATTTCCAACGGCGCGAAGGAGTCAGACTCTAAAGTCTATATCGAAGCAC -ACGTTTGGAACTTCTCCACCAATTGTCCGCTTTAGGGTATTTAGATGTTTCCCCAGACTT -GTGGGTCCCACCCCAGACTGGCTCTTTGATTTTGTGGGGATTGGGCATGTACAGCTTACA -GGAAATGCCGACCTCTTCATCCGACTATCTCTTGGGTTTCATGGCGACCAACCATAACTC -CTGAATGTGCCAGGCGTTGTGATTCTCCAGAACGCCAAGTCTTCCAAGAAACACAGCTTA -GAGCACGAGTGAGGTCAAATGAAGCGATCGGAAGCTATCCCGAAGCTGCTCCGAATATTC -GCCCATCGGAAGGTTTCAGGCTATTAGCAAGAGTGTTCTTTGCGATAATTCTTTTTCAAA -CGAGCTAGACTGAGACGCTGATGCCTTGTTTATAAGGGTCAAATCATGTGATATTATGGT -AGTGGAAATTCAAAATGATCGATTTTGTGATGTATAGGGATTGGTATCTCAATGCTGAGG -AAGAAGAAAACAAATTCAGTCCCGAATCCCGATGGTTGCAGCTGTGGAATAACATATCGA -ACAGGGCGTGTGCGGGCTCTGAGCAGTGAACCCGTGGACAGGATGACACATCACCACGTG -ACCCGGATATATACGGATAATCTCTCGGATTAAACGGATGAACTGCAGAGGAGTTCTGTG -AGATTTCATTTAATACGTGTGTATTTACCGATGTCTGAGGCATCGGATAAAACCTCAACG -GGTTGGAGAATCCCTCGCGCATGCCAGGAATGCCGCAAGCGCAAAATCAAATGCAATGGC -CTGACCCCGTGAGATCCTTCAGTCCATGTCCGCATAGTGAATATGTTGGACTAATATAAC -ATCAGGTGCAAAACTTGCCAATTGCGCAATACACCCTGCATATACAGAGACTATATTCGC -CATCGGCGAAAGAAACATGAGTATGAGGAAGTCAGCGAGAGGAATTCAATTCATCATGGC -TCCCCAAGTGGAGTGCAGCCATCGGGCCGGCGCACAACTTCCGTCATGAATGACTTCCCG -AATAGCGTGTCTGCAACACATATGGCATCGCCATCATGTCAGATGCAGCTATACTATGGT -CCCACTTCTCATTTCTCTCTCATGCAGCATATCTACCGAGATCTCGTCTCCAATCCTACC -TCTCAACCTGAACCCTCTGGCGGGGTTGAGGAAGCTGGAGCTGGCTTGGATTTGTTTAGC -TTCCGTCGCATTTTCTTCGGAACCCCAGATTTTCATGAGACTAACAAGTCACTTGGGACA -GGGGACTTGTCCATGATGTTCTTGCCGTATGACTTGGCTAAGCTTTTTCTATCGCGGTAT -CTCTCTAGCCTTTATCATCTGATGCCACTTCGCCCCAAGGTATATTATGAGCAATGCCTG -GACCGAATGTACAATTCCTCGCCCACAGATCATCTAGATGCTTTCACCCATGCGATTATG -CTCCTTGTCATGGCAAGCGCGGCACTTGGCACGGATCATTTTGCTTGGGGTGATGTATTG -TTCGAGCGTGTCAAGGCTTCGTTGACCGCATTTGATGACGCGGTTAACCTTCAAACTATT -CAAATCTCAGTGCTTATGATAAGTCATCGTCCTCTTGTTTTTCTGTCTGTAGACTGACTA -ATGTGGGCTTTACGCCAATTACCAGAACGAACAAGGTCGACCCAACTCAACGTTCTTGCA -TCTGGGCTCTGCTTGTCGCAAAGCCTTATCAGCAGGTCTGCATAAAGACGTCCCTCATGA -TATTGCTCAAACCCCGGAAAACATTGAAGAACGACGGGTAACCTTCTGGTCCATCTACAT -CTACGAAACGTACATCTACCATTTTCAGTCTGATTTTTGATGAAGTCGCTAATCTCATAT -ATAGCTGGTTCTGCTACCATGCGGGTCGTCCGAGCTCCCTCTCCCTCAAGGACGTCGCAA -TCGAGTACCCGCAGGATCCATTTATTCGATTGCTGGTGCAACTTTGCAAAAGCATCACTC -GATCTGTTAATGAGATATACGGGCAACGGCATGAGTCTCTTCTGCACATGTGGAGAGTTG -CTAGATCCATTGCGGATGATATTCGTGGTCTTGAAACGCCTCTAAAACAAGTTTTGGGCC -ATGGGCTTGATGATAGCATCCAGGCGGGATCCCTTGGTGTTCGACAAATAATCTTCAGTA -CCTGTGAGTCGACCATTGAGATACCCTCTGACCCACACTGATCAGTGCTAGTATATAATC -ATACCCTCCTTCTCACATTTCGCCCGTTCTTGATATTTCGCGGCCATTGGCGGCGCGAGA -GGAACCTCACACTTGACCAGTCTAGCACCGATGTCACAAATCGCCCACGAGAAACGCCTT -CCTGGCTGAACGAAGCCTGCAATCTTGCCATAACTGCCGCTCAAAAGACCCTCCATCATT -TATGTGAAGCTTCAAGGTTGAATGATCTTGTTCTAGTAAATAGACCGTATGTTATCATAG -AAACTTCTGCTGACTACATCTTAGGAATTGAGATACCACGGGTACTTTTTCGGCAGTTCT -GTATTTACTCTCATCTACGAATTTCTCCACAATCCGAGTGTCGCGCCTGTCTATCTACCA -TGGGTTTATGCTTCGTTACAAACCTTGTCAACCATGCGAGCTGGGGACCCGATCGCAAGC -ACAATTTCAGCAGTGCAGACTGTGCTTCGCAATATCAATCCGTCATACGAATGGTTGCCT -CTACTCGCAACAAGCTCAGATGATATTGCGCGCCCGGGAGGGACAGTCATGCACCAAGGC -CTCAACACCGACCGCAGGCATCGATCGCTGCTCAACACCCCTGCAATCCCTCAGAATCCG -TCACTGGGAATGCGGCCTGACATATCGACGTCTCCTTGGAACTTTCCGCAGGTGGAAAAT -CCGGAAAACCCTGAAACCGGGGGTTCTGCGGGCTCTGGGGAAGACTTGCTTGATTTCACT -CAGTCTGATATGGGCTGGGATTTCGATTTCTCTACCATGGACTTGGAGGCCTTCTTTTCG -GTTTATCAATCGAATGATGCTCCGGTCCTTTGATGAGCGCCTTGCACCAAACAATTTTAC -ACCTTACCCACAGCTCCTTAATCCTGTATCCATTTTGTCTCGACCCAGGGGTAAACCGCG -GACTTAATTCATTCAAGTGGATCTATATCCGAAGGGTGGATCTCATCGAAGCTGAGGAAG -CTGCAAAAATAGAGTCATAGCGAATCAAGAATCGATTTGGACCCAGTGGGTCGCTAATAT -ATCCGTGAGTATATTTCCGAGTAGCTCGCCCCAGTGATATGCTCGCTAACACAACATTTT -TTGCAGTTGTGTTACCTGCTCAGAGTCATATTCCCTATCTGTAAATTCCGCCCCAACAAT -ACATACGCGATGTGATCTATGAACTTGACCCACTCCCGGCAATATGGATTTAGAATCGTA -GATTCGATGATAAATCCCCGATAGTATAAGAATTCTTTTGCGCCTCGAGATCATGTAATT -TTTCTCCAATCTCATTCTAACTGATACCATCGCTAGTAGTCATTCTTTTTAATTTCCAGG -CATATACGCCTCCGCTATGGATACTCCATCCTCCGGCACAGCGACACCCGTCGATCTGGA -ACACGGCGATAGCCATATTCGCAAACGACTCACAGTCACCTTCCGCGATCTGAACGTTCG -AGTCACTGCACCAGATGCCGCTTTAGGAAGCACGTTATGGTCAGAGGTGGACCCGCGACA -GGTTGGGGCACTGCTTAAGCGCGGTGACCGCCCAAAGAGAGTGGGCATATCAAACCATAA -CATATAGTTATCATTAACTGACAGTTTTTAGACAATTCTGAAGGATGTTGCAGGTCAAGT -CAAGCCTGGGGAAATGGTATGCTTTCATAGCTCGAGAAAATGGATGTTCTGACTAACAAT -ATAGCTTCTTGTGCTGGGACGCCCAGGCTCCGGATGCACTTCCCTCCTTCGAGTGCTCTC -CAATGATCGGGATAGCTTCGACGAAGTCACCGGCGAGACCAATTATGGAAGCATGGATCA -TATAGCTGCCAAGAGCTTCCGCCAGCAGATTATGTTCAACAACGAAGGTATGCAACCCTG -ACTTGTGGTCTGTGGAGCGCGATGCTAATTGCACACACAACAGACGACCTTCACTTCCCA -ACTCTGACTGTCAATCGCACAATGAAATTCGCCCTTCGAAACAAGATTCCCAACAAGCGC -CCAGAGCACCTGAGAAACCGCAAAGACTTTGTGCAGAACCAGCGTGACGATATTCTAGAG -TCACTTGGAATTGGACACACGAAGAAGAACTTGGTAGGAAACGAGTACATCCGAGGTGTA -TCAGGAGGAGAACGCAAACGCGTCTCTCTTGCTGAGGTCCTGGCTGGGCAAAGTCCAGTC -CAAATGTGGGACAATCCTACCCGGGGTTTGGACTCAAAATCGGCGGTGGAATTTGCTCGT -ATGCTCCGACGCGAAGCGGATCGAAATGACAAGACGATTATTACCACTACATACCAAGCC -GGTAATGGTATTTACGACCAGTTCGATAAGGTCCTTGTTCTGGCTGAAGGTCATGTCACT -TACTATGGCCCTCGTCGTATGGCAAGGAAATACTTTGAAGACCTTGGATTCATCTGTCCT -AAGGGTGCCAATGTCGCGGACTTTCTAACCTCAGTCACTGTTGTTACCGAACGCACAATC -CGACCCGGCTGGGAAGAGAAGGTGCCCAATACACCCGAGGACTTCGAAAACTGCTATAAA -AACAGTTCTATCTGCAAAGATCAAATGGCCTCCATTGTGGACCCTGAGAAGTTGAGCTAC -GAAGCCGAAGACCTTAAGCTTGCAGTATCCAGTGAAAAGCGGAAGCAGCATCTTCCTCGA -AACAGCAGTGTTTATACGGCGAATCTTTGGGATCAGATTGCCGCTTGTGCTCTCCGTCAG -TTTCAGATTATCTGGGGCGATAAGCTCTCGCTATTTGTGAAGGTTGGGTCTGCTATAATC -CAGGCCCTGGTCTGTGGAAGCTTGTTCTATGACCTGCCAGAGGTAATTAAAAAATACCCC -CGTCTGAAACGATTCAAGAAGACCAAGCTGACTGTTGGGATTACAAGGATTCGAGCTCAA -TCTTCTTGCGCCCTGGTGTTTGCTTTTTCCCTGTGTTATATTTCCTCCTTGAAAGTCTGT -CTGAGACAACTGCATCTTTCATGGGCCGGCCAATTCTTTCTCGTCAGAAGCGATTCGGAT -TCTACCGCCCTACGGCCTTCGCCATTGCCAACGCTATAACTGATATACCGGTCGTGATGT -TTCAGGTGACGTGCTTCTCGCTCATTCTTTACTTCATGTCTGCTCTTCAAATGGACGCAG -GAAAGTTCTTCACCTTTTGGATCGTCGTTATTGTGCAGACATTGTGTTTCGTACAAATAT -TCCGGGCTATCGGTGCCGTGTGCAAGCAGTTTGGAAATGCCTCCAAAATATCAGGCTTGC -TTTCCACCGTCTTCTTTGTCTATGGAGGTGAGGATTTGAATATAGTCATTGACATGAAAC -TTCCGAAACTGACAAGTTGCAGGATATATTATACCATTCGACAAGATGCATGTGTGGTTC -CGTTGGATCTTTTACCTCAATCCTGGAGCATATGCCTTCGAAGCACTTATGGCCAATGAA -TTTGTTGGACGCAAATTCACATGCATAGAGCCAGACTATCTTCCGTACGGCAGTGGATAC -TCCGACTCGGCGTCTGCCCACCGTGGATGTTCTGTCCTTGGAAGCGACGATGAAGGCATC -ATTGACGGAGAAAAATACATCCGAGAGCAATTTAATTATTCAGTCCACCACATCTGGAGG -AGCTTTGGTATTCTCGTTGGAATTTGGATTTTCTTCATTTTTGTGACTTCTTTCGGGCTT -GAATTCAGGAACAGCCAAAGTGGTTCATCTGTTCTGCTTTACAAGCGTGGTAGCGAGGAA -TCCCATGGAAAGGAAGAAGGCAAGCAGTCAGAATCTCCAAAACAGGCAGATTCTGGTGCA -CTTTTGAGCTCGGTCAAGCAATCAACTTTTACATGGAAAGATCTCGACTATCATGTCCCA -TTCCACGGCGAGAAGAAACAGCTTCTGAACAAGGTGTTTGGATTTGTTCAACCTGGAAAC -TTGATTGCTCTTATGGGAGCTTCTGGAGCCGGAAAAACGACATTATTGGATGTGCTTGCC -CAGCGCAAGGATAGCGGAGAGATCTATGGATCAGTTTTGATTGACGGTCACCCAGTTGGA -ATTAGCTTCCAGCGCACTACGGGGTACTGTGAACAGATGGATGTGCATCTCGAGACAGCC -ACCGTCAAGGAGGCATTAGAATTTTCGGCCGTTCTTCGCCAACCCTCAACGGTTTCACAT -GAAGAAAAGATCGCTTATGTTGAGCATATAATCGACCTTTTGGAGTTGGGTGATATCAGT -GAAGCACTCATTGGAGGTAGGTTTGGTTGAAGCCTTCATGCCACTGGACAGCGATGCTAA -TAACTCACAGTGCCTGGAGCTGGACTCAGCATCGAACAGCGCAAACGCGTGACCCTCGGC -GTTGAGCTTGTTGCTAAGCCCACATTACTATTTTTGGATGAACCTACCTCTGGTCTCGAT -GGACAAAGTGCGTTCAACATTGTGCGCTTCCTTCGAAAACTCGTTGATGGCGGCCAAGCC -GTGTTGGTAAGTTCTCGCAATATACTATAATATGTTTCCATCGTACTAATCAGTTAGTGT -ACAATCCATCAACCATCTGCGGTGCTATTTGATGCATTTGATGGTCTTCTATTGCTAGCG -AAGGGCGGCAAAATGACATATTTCGGAGAAAGTGAGTTTCTTCTTGAGCCAATTTGTCAA -AATCAAGCAACTAATCATGACTGGTTTGTGTCAACAGCTGGCAAAGACTCGACCAAAATT -TTGGACTACTTCAATCGGAACGGAGCGCCATGCCCCCCGGATGCAAATCCAGCGGAGCAC -ATTATTGATGTTGTTCAAGGAGGCGGTACTACAGACAAAAAGGATTGGGTCGAAATCTGG -AACCAGTCAGAAGAGCGCAAGCAAGCACTCTCCCAGCTTGATGCAATCAACGAGTCTAGC -ATAAATGATTCAGAGTATGTTGAAGATACCGCTGACTATGCGACCTCGCACTGGTTCCAA -TTCAAGACAGTCTCGAAGCGTCTCAGCATTCATATCTGGCGCTCACCAGTACGTCGCTGC -AATGATAAACCTAACCCGATTACAAATGCTTACATTCAGATAGGACTATATGTGGAACAA -GATCATTCTCCATATCTTCGCGGCTCTTTTCAGTGGTTTCACTTTCTGGAAGATTGGAAA -CGGCACTTTCGACCTTCAGTTGAGACTATTTGCGATTTTCAACTTCATCTTCGTCGCTCC -GGGCTGTATCAACCAGATGCAGCCATTCTTCCTGCACAGCCGTGATATTTTTGAGACTCG -AGAGAAAAAGGTGAGCGCGCTAGTGGAGACAGACTGGCGTGAAAGTATCACTAACATTGA -TGCAGTCCAAAACCTATCATTGGAGCGCCTTCATTGGAGCGCAGACTTTGACTGAAATCC -CATATCTTATCATTTGCGCCACGCTTTACTTCGCATGCTGGTACTTCACAGCAGGTCTTC -CGTTGGAGGCCAGTGTTTCTGGGCACGTCTATCTTCAGATGATTTGTAAGTCATACCGAA -TCTTCAACGATTAATTCGAAAACCTAACATCGAGACAGTCTATGAGCTACTCTACACTTC -CATTGGACAGGCGATTGCGGCGTACGCACCGAATGAGTATTTTGCAGCTGTGATGAACCC -TGTTTTGATCGGTGCCGGTCTGATCAGTTTCTGTGGTGTGGTGGTTCCATATTCCCTCAT -GCAACCATTTTGGCGATACTGGATCTATTACCTGGACCCTTTCAACTACTTGGTTGGTGG -GCTGTTTGGTGAAGTGGTCTGGGATATCAAGGTCAAATGTGAACCATCGGAATTTGTGCA -GTTCTCTGCGCCGTCTGGCCAGACCTGCGGCCAATACATGACCGATTTCCTCGCCACGCA -GACTGGTTACCTGGCCGATGGAAACTCGACTGGGACATGTCTATTCTGTCCGTATTCTGA -AGGTTCTGATTACGCGAAAACGTTCAATCTTCAAGAGAAATACTATTCGTGGAGAGACGT -GAGTATACCCTAGCTGATTTGGCATAGTCTGCGATGCTAACTAGTTGTTTTTAGACGGGT -ATTACTGCCCTTTTCTGCTTCTCGTCATACGCCTTGGTCTTCTTTATGATGAAACTCCGG -AGCAAGAAGACGAAAAGTGCTCGTTCTGAATGAGTGATCGCCACCAAGCTAACCTTGAGC -TGTGCTTGCGTTCGCATTATCATCATGCAAATACATTCCGTTCAACTTCGGGTATAATTA -AGATTTGGTCCACTTTTTTCTTTCGTTTTCTAGATCACATTTATTTCTCTTTGTGTTGGC -GACCATATGATGCTATGTTCCTCCAGGACCTCATTACATAGTTGTCTCTTTTATTGTTAG -TTTTTCTCTTTGATAACCTGATGTTTTATTTATTTTTATCTAGATACTACTTACGCTATA -CCAAGTCGTTACAGCGGGTCGGACATCTACAAGACTAGCACTTTAATCTATAAAGTTGTT -GATAATGTACTGATCCAGAGCTCATTGCTAGCTGCCCTAAGAGCCACAAAATCTACCCTA -CCACATGATTCAAGAGATACCATACTTTTTGTGTTTCATTATCGTGTAGCTGATTCAAAT -TGCATTTGCTATATGTTGTGAAGATGATCATTTGAGCCGAGGTGACTGCAGTAGCACAAA -ATATATTTTGTATACACTCTCAATCTACCGGACCTATTAAACTCATTAGCATATTGGACA -CATGAAAGTTCAAAGTAACACAGAAGACGATCTTTTGATTAAATTTCTGTTCGTGCGTTA -TCGTATTCGAGCCGAGCCGGTGACATCCTGGCAGCTAAGCTTGATCACGGGAAGATCGAC -ATGCATGCGTGTGCAGAAAGACTCAACATCCGAGGATACAAAACTATCCGAATAACCCCT -GATTCTACACTGTATTGACGCTCCTGGTCGCCTTGAATATGCACTTTCACATTCTTGCAA -TATTCCCATTGGTTGCGAGTGCAGTAAACATCATCTCATCCAATGATGATGGCTGGGCGG -AAGTGAATATCCGCGCTCTATTCGAGTCCCTAGGAGCGGCTGGACATTCGGTTGTTGTAT -CTGCACCCGCTGAGAACCAGAGTGGCACGGGTGTGTTGTGCCATCCCGCCTACATGCCTC -TCTTCTATTGACTCGTCGCTACAGGATCAAAGCAAAAAACCCCGACTGTCTTGACTGGGC -CGTGCGAGTTCAATAGTTGCCCTTCTGGCAGCCCGGCCGTGGGATTTAATTCCTCAGAGG -TGAGTATTGTGTCCGCAGTTACCAAGTGATATACTGATCGCACAAAAAGCCCCGACTGGG -ATATGTTAATAGCTACCCAGTGACATCTATGAAATATGGTATCGATTCGCGCGCCCCGCA -ATTCTTCAACGGCCCTCCGGACTTGGCGGTCGCAGGCCCAAATGTCGGTAAAAACATTGG -GCTCACGGTTTATATTTCTGGCACGGTGGGCGCCACCACCTATGCTGCACACGCTGGAAT -CCCCGCTATCGCCTTTTCCGGCGCTACTGGCTCCCAGACTTCATGGAATATTACGCGGCC -TCACTATAGCAATATCTATGCCGATTTGGCAACTAACTTGACAAACCAGGTGGTTGCTGC -TGGAAAACCGTACTTACCGAACGATGTCTGGCTGAACGTCAACTTCGGAAAGGTTTCTGA -CTCCAGATGCACAAATCCGGCCGATTTCTCTTTTGTACTTTCTCGCATCCATATTGCACT -TCCATTTATTACTCCAGATGATGTGACCACGTGCGGCCGTTCGAGACTACCAAGCGAACT -GGAAGTATCGTTAGCGTCTGGGTGCTATGCAAGTGTCTCAGTCGGGATGGCAGCCTCGAA -AAGCGACGCCAACGCTACAATACAAGGGGCTGTTTTGAAGAACCTGGGCAATTTGTTGAC -TTGTCTACCATGATTGGTGTAAAAACGCCAATGCCAAGAATGTGGAGATCAAGTTCCTCA -TGTATTTAGCCATGCCGAGTTACAGAGAATACTACTAGGTGGCCTTAATACATACAAAGA -ACACTACTGTATACTGAATCCACAAACCCCCCAATTAAAGACTAGATCTCAACCGAATAA -GCGGTCAAATTGTGGACTGAGACATTTTGCATCGGAGAAACTAAGGAACATACGGCGAAT -CACCGGAGCTAGCTATCCCACGCGTGAATAGCGACCTATTGGATTCTTATCGTCCACTAT -ACTTCATTGGGATTCAACCCCGTTTTTACCGGAGTGAAACTCTCGAAGAAAGCGAGATCG -TCGCCAAGATAAGAGACATGTAGATCTAACGGGATGAGCACGGCAGTATAAATATTTCGA -GGAAAGCTCTTTGTTCATCACACGAGCTGACTATTTTCACATTCTATCTCGACATGATTC -CCCGACTGATTGCCGGGTTGACTGCTCTCACTGTGCTGCCTGTGACGGCTAGCCCGGCTG -GGTTGTCGCGTCGAGGAACAGTCGCATCCGACCAACTTGTCGGATTTCCCCAGACAGTTC -CAGCTGGTGCTACTGGAGACCTATATTTGGCCTATCAGCCAGATCTATATGTTGTGAATG -GTTGCGTGCCTTTCCCGGCCGTAGATGCAAACGGGAACACCAAGTGAGTGAACATTTTCT -ACTTTACAGTGGTTTCAGACTGTGGCTCTAATATTTATCATGTAGTGCCGGCCTCAAACC -AACAGGAGATCCAAGTGGCCAGTGTAGTAGTAGCACGGGGCAGATATATGGTGAGCTTTC -CCAAGTCATGCGTTGAATAGCGTTGAATATATTGACTAAAGGGCTTTTGCAAACCACAGT -GAGAGGTGGTTCTTCAGGCGACCATTATGCCCTGATGTACTCCTGGTATTTCCCCAAGGA -TTCACCTTCGACAGATCTCGGCCATCGCCATGATTGGGAGGGAGTTATCATCTGGCTTTC -CGACGCCACAAGTACCACTGCCTCGAACATCCTGGCCGTTTGCCCGTCCGCACATGGCGG -CTGGGACTGTAGTACAGATGGATATACGCTTGATGGCACTGCATCTTTGATCAAGTACCA -GTCCGTTTGGCCTGTAAATCATGCGTGCGGCCTAACGACTACCGTGGGTGGCACACAGCC -GTTGGTTGCCTGGGAGTCAATGTCAAGTAAGTTATGCACCATGACACTCCTGCAAGGCCC -ATAAGACAGAAATCTAATGTGGAATACCCAGCTATCGTGCAAAACGCCTTGGATACAACG -GACTTTGGGTCAGGGAATGTTCCTTTCAACGAGCCAAATTTTGTCAACAACCTCGCAAAG -GCAAAATTCTAGAACGGTTAAAGTACATGACTTCTGAAAACTGATTTCAAATTCCACTGT -AATGCAAGGTTGTTGCAACCATACCGCGTCTCCCGCCCGGCGGGCGTTGTTAACTCGCTT -CGCTTGGCTCAACAGTTTGCACCTGGTGTCTCGCTGGGGCGCGTCAAGAATTTTTGCGCG -CGAATCTTTCCGTCTTTAAGACCCTTTAGGCCTTCAGACAATCCCTCGAGACCGTTTCCC -CCTATAGTGTAAGGATGAGGTGTGAGCCAACCCTCGCGCAGACCTCGGGCAAACGCTGCG -CCCCATGTCACACCAATGAGCTTGTCATTCTCATCCTCGTAGTGAACCTGACCGACCATG -GAGCTCACGATATCAACACTCGGGTCAGAGTCCTTGACCTCGGTTTTCATCAAGATGTTA -GTCACCTTTGGCCTCCGACCAGCAGCGTCCGGCGGGCCAGCAATGACCTTAGTGACAAGC -CTGACTGTGTCATCTTCGGAAACGGTATCAAAGGCCTTCCAGCAGCGGCCGTCGGGAGCA -CCTCCTTTCTTGACAGCCTCCTGGATCGCTTTGATTAGCTCCGCCTCCGTAGGATAAGCG -GTGTAGTCGACGAGGACATCTCCCTTGCTCTCATCGAGGAAGGGCTTGATAAATTCGCTC -CGCTTGCTGCCAACTGTAATGATAGGATGGATATTGGCGGCCGCTGCAAGTTTGATAGCG -AAAGCTCCTGTGGCAGTACTGGCTCCATAAATTATCAGAGGACGCTTAGTTCCCTCCTTG -CCCGCGGCCTTGGAGAGTGGCGACCAGGGCTCCGGTAATTCGAGCTCCTGAAAGAGTGCG -CAGACGGACGTGTAGGCGGCCAGAGGAATCGTGGCAGCCTCCTCGAAGGTTGTTGAGTCG -GGAATGTGGAAGGTAGTGTAGTAGGGTGCAATGGCGTACTCGGCGAAGGCCCCATGCGAG -GTCATCATCTTGTGAAACGCGGCCACGCGGTCCCCTTTGTTGAATCCAACGACACCCTCT -CCCACGGCTTCCACGGTACCGGCGATATCGTCACCAGTGTTCCCATCAGTACCGAGCAAG -GTGGGGAGCTTCCAATCCTTCGGGTTGGAACCTTTCAAAATTCGCGTTAGCTGCCGATCA -AAAAATTGGGGCCATGACGCATATACGCCGTCCATGCAGAGACAAATGGTCTAGTGGCAC -ATATTGACCACTGCAGGTCCTCTAACATCTAGTACCTACCGGCGACGATAACCTTGATCA -ACACATGGCCAGGCTCCGGCTTTGGGATGGGTGAATCGATGATATCGACTGATGTGTCCT -TACGAACAATCGCTTCCTTCATGACGAGATAGCCGAGGTTCCTTCACGCTTTGAGGTGGG -AGACTCGACTATAAGACAAAACGCAAATAATTTTGGGGATAGGGTCTAATGTAGAAGACC -AGCATGGCTACGGTGATATTGATATAATCTGCAGAATACTGTGATCACTGAATAACAGCA -TCATAGGCTGATAAACCGGTTTAGGGTACCTAGCTAATTATTAATCGTTGAATCATCCGA -AAATTCGATTTCCGCTTCGGACATGATAGGAACCCTCTTTTGCCCCACATAAGAATAACG -AAGTACAGTACCTTCCTGCCGCCTACATCATCCTTGGTAATCAGCCGGTACCTGAAATCG -TACCTCTTACGAATGTCATATTTAGCTAAGTACAATTGTGCAAGCTTAAGCTTTTAGTCC -CTTGATTTCCTTGATATTAGTCACGCTTGACATTTGACATCTTTTGTCCCCAGCTGATAT -GGCTCTTGAGAAAAGAATGGGCGCTGCAAATAACGTAACGAATACTAAAATGAATCTCCA -AAGAGCAAGTTTGGAACGGCTACTGACATATCTTCATCAGCACAGTTTCATATTATTTGA -GATTGGTGGGGTGCCGGTGGCGTGCCGGTGGGATGCCAGCGGGCGATCAGTGGGCGGCCG -GCAGTGTAGGCAGCCGTCAGGATCCGACTGTAAATCGTGACCCACGAACTGCTTTATTTT -TTCACTACTATGAATCGGCTGCATATCGCCCAAGAGGAGGGATCAGCGTAAAGAAatcca -tccatccatccacccatccatACTGTAGTGATCAGAACCCAAATGAGCACTTCTTGTGTG -AATCCTTTCACTCCAGATCTTCAGTATCAATACCTGCGTTTTTAAAATGACCCATTGCGC -CTATTTCTTTGTTCGAACCTCATCAGCACACCAGTCGAGGAAGTAATCGAACCCTTGATC -CGGATTCTCGGCGGAAGACTGCCAATGACGCAGAATGAAGCCTTTTAGCCCCGCAAAGAA -CTCTTCAGTTGGGTTCAGTTCGGGGGGGTTACGGGGGCAAATAGTCTAGTTTGACTCCTT -TATCAAAGCACAGTTGCTTGATTCTCTCAGAACGATGGAAGGATGCCTTGTCCTGGCGAG -AACTGACCTAGGCCTATAGACGGGCCCTACCCTTAGGGCCCATTTCCCGCTTACATAATA -TCCGTCCAGCCCGTCCATGGGCGATGTCGCAGTCTACTTCTACCAAAACAAGCGCGATCC -ATTCTACAACAGTATAAAGAGGTAGAGTATCGACACAGATCTTAGAGTGTTAGGTATCTA -AAAACAAGAAAACGTTCTAGTTCATCAATTTAACTCTGGAATGGCCCAAGCACGACATCA -AACAGTAACAATGGACGCCCCGGATGTTGAAAAGTCGTCTAAAAAGGAATATCCCTCAAC -GATGACCTATTAGACTGGCACAACAATCTTAGTACCACTGACACCCCTTTTGAGCTTGTC -CAATGCTGCGTGGATTCCCCCAAGGCCGCCGCCCTCAATCTGGATGTTGGGGCTGGGAAC -TACTTTGCCGGACTCCAGACCGGGCTTTAGCCATCCATTGAAAACTTCGTTCATGTGCTT -GCTCCTCGCGACCTCGTCCATAGATGGGAAGTTGAAAGTAATCTGGGTGTTGTCGAGTGT -AGGGTGACCCTCTGGAAGAAGGGGGGAGTGTGCGACCTTGGCAGGTGCATCTCCCTTGGT -CTCTTTTAGAACATCCAATGTTGGCTGCAAAGCTCCATCGACAACACAGTGCGCAGTGTT -CAGATGGAAACTATCCTTTTTTATGGCGCTGACGATCTGAGAGACGACATCGCTTGCCCT -GTAATCAAAAACCGCATCGGCTCCAATCTTTTTGACGAGATCATGGTGCTTAGGACTGGC -TGTGGCGTACACGGTGAAGCCGAGTGTCTTAGCTGATTGAACGGCAAATGTACCCACACT -GCTAGATGCACCCCAGATCAGGACTGCCTGTTTGTCTGCGGGGGTGTACTTGGTATTGAG -AGGAATCCCGATCGTGGTCCATGCAGTTAGGGCAGTTAGGACGGCCAAAGGGAATACTGC -GCCTTCCTCGAAGGAGAGGCTATCGGGAAGTGGAATAACAGCTTCGGACTGAGCCAAGGT -ATATTGCTGGAAGGAACCATGATCGGGGGAGCCGTTCTGGTAAAATGATGAGGCAAAGGC -AATGACTCGACTTCCTGGTCCAGGGGCTGTCGTGACATTCGAGCCCAGCTTAGTGACAAT -GCCAGCAGTGTCGGACCCGATGATAGCAGGATAAAAAGGCACAGGGGGCATGCCATAGTC -ACGCTGGAAATGATCGCAGGGGTTCAAGGCAACGGCCTTGACTTCGATGAGAACATCGTT -CGGGCCGGGCTCAGGGGTAGTACGCTTGCCAAGTGACAAGGGGCCGCCTTGTTGGGGGAG -AATGGTACCTTCGTGCTGTCCGGTCATGGTGGATGTGCGACGAGTTGAGGTGGAAATTGC -TGTCGTGGGAATTTTTCTACTGAAAATTCGGGTTGCCCTCAGCACGCGAAGCATGTAGGA -GGTTTGAATGATAGAAGATAGAATGTTGATGGACATGGATATGAGAAGCGATTTTATATC -TTAGAAATTGACCGTGACTTACCTGCATATGAAGTCAAGCCTTTACATTAAATCCGATAT -ATCCCTAAAGCCCGAGTCTAAAAGGAATTCCTTTGCTTATCCCTACTCGCATATTGTATG -GCCAGGTTTGTCGAATCGGAGTGGCCAACAGCGCATCCGCCCCGATCTTCCGGCGTTGCA -AGCGCCATATTCAATCAGGACTCGGCACAAGCCGTTTAACTGTAGCTAAGTCAGTTGGGA -CCAGTGTTCTCATGAGCGCGCACAGGCCGTGCCCTCTCAAAACAAGCACTGACCCACATC -GGGTAAAATTCCGTGGACCGGGGTCCCGGGCTCGGAGTTTCACAAATTCCCTCTGTACGG -GTTGTACACTGAAAAGGCTGCATACATGTTACGCAGGAAATTCGGACATAATATTGCTTT -GTGGAGTAAGCACTGCTGGTATCCTTACACCCTGTCTTGTGTGTTTAGCGATGCAAATGT -GAAAAGGCACGATTTAAGTGCTATGCAGGCGGTCATAGTATTCAACATAGAGATATACTG -TACTATAACATGTAATACGAGCCCCCTTCCATCCCGGGGGAAGAGATGTTTGGAATACAG -GGTTTCCTGTGCTGAAATTCAGGAAGGTGATTGATCGCATATGACCATTTTTAATCGCCT -CTGGATTACCGACGCTTTCAAAAATCCGTGCTGAAAGTTCCTCTCGGAGGATGTCATAGA -GTCGCTGGAGCGCCCAGTGTATTCTGAGGCTAAACACACCGCCGCGGTAGAAAGCCGAAT -CGTCGCGCTCTTGGATACTGTGCGTAAATTGGACTGGCTGGCTTTGTGCCTCAAGTTTGG -CGTTCAGCTTTCGTGAACCCTGCGCGAGCAAAAGTCCTGTGAGGCCTGCACCAATGATAA -TGACATGTGATTCAGTGTCTGTTTCCGCTTCTCTTGTCCTCGATTTCCATTCACCACTCC -TTTCAAAATGTCCAAAACAGACATTCCGGTAAACTAATTCGGGGACACACTTGTTTTGTA -TGAGGAATTCTTAAAATTGAAGATTTGATTGAGTGTATGTTACCGGATGAGAGGTCGGAG -CTTCTTATGAAACTATGGATACCCGTAGATTCAAAATTTCCCATCTTAAGAACCAATACT -CCGGCCAGCGCTTGTATACTATCGTCCAAACATCACGGCGGCATGATGTGCGGTAGATAT -TGGTCTCTGATGTACGAGATATGGTATCTGTAGGCGCAGAATGTCGGTGTATCTGAAGAA -CGATCACCATTGGCCTCGGAAAGTGCGACAGAAGATAACCACTGCCTATCTTATGGTCGG -TGATCGAAATTTGGAAGAAGGAGCCGGGAGCAGTACAATTGCGTCTCCTCGACGTTGGGA -CACCGGAAACTAAGGTAGCATCTGAGAAGCGGGAAGAGCTAGAAATTGTGTGGACTGTAT -GTCAAGCTGAACAACCAACTAGGTAGAAGCCTGACACGGCATCAAAATATGTAATATGTT -CAACAGTCGGCGAACAACTGGCCAAGAATTGGCGAATATGTTTCTGCACTTTGGGTCGAA -TCGAACACGGATTCATACATCCTTGTCTAAGTCTACCCACAACTCAGTGAAACCAAAAAA -TATATAAGGTGCTCATACTGGCTCAAATTAATTCAAATCAGTATCTTTCATCAATGGCAC -TCGTTTCCTGCTCATACATTGAACCTGTCTTTCAACAACAGGATTTAAGTGGGTCTTATG -CCAGATAGATTTAAGCAGCAGTTGGAAGACACTGGGAATACCACTGGTTTGACGAAGAAC -ATGCTGCCGGTGGCGCACAGACAGTTGGTCCTGTCCAACCCGCTCCCCCGCACTGTCCGT -AGGCAGTCTGGGTTGCTCCGCTCCCACCAGCAGAAgtggtggtagtggtcttactagtgg -tggtagtccgtaaggttgtggtggtggccttggaggttgtCGCTGTTGCAGTGCCACCGG -TAGTACCACCCGTTCCTGTCACTGGTGTGCCAGTTGCGGTAATTGCCGAAGTAGTTTGTG -TGATTGAGACTGCACCACTGTAAAGGGTTGGCCCCGGGACAATCTGAAAAAGTGAAATTA -GCAATGGACAGGAATCGGGCGAGGTATTGGTCAGGGTAACGTCTTAAATCACTTACATAG -GTTGACAGAGAGGTATAAATGTTCACGAGGATGCCAGGATCCGTTGGGCTGTACAGATCA -GTACCCAAGACACCTGCTGGCTTGTCTGAACCCGAGCCGGTGATTTGTAGGTTGAAGCAT -TGAGGATAATTCTGGGTCCCACCCTCAGTGCCTGCACCATGGAGTGCGATGCTTTACACC -AAGATTAGCAATGATTCTCGAGCAATCACGGAGCTCTACCCACATTTCATGTCGCAAGAC -GTAATGTCCAGGAGCGATGTCAGAAGGAATCTCGACCATCCACCCCGACTCGTGGGCTAT -CAGTTGATCATCGGCCCAAATCCCTGGAACATTTGTTCCATCAACTAGTCCGACACCACT -AATCTTGAAGAATTCGAGCTTAGTCTTGTCCACAGTTTCACAGCCGCTCGCCCCACAACT -AGCGAGGTAATCGATGATCGGTCCATGATGCGATTCTGGCCAGTTGGGAGTCCATTGGAG -AAAGACTCGATCACCGGCAGCTACTATCGCATGACCTTTAGCATTAGTAGCATCAAGATG -GCAGATAATATCCGATGTATCGTACGCATCTAGTTTACAACGTCAGAAAGGGAACTGCAT -TCGATCAAAGAGGACAGGAACGACATAGCATACCTGGACTAATGAAGCCATTGGCAGTGT -TTGGGGTACCCCAAGCAACCACGACTGGAGGCGAGTCTGTGTAAGGGTAACTTCCTATGT -CCCATCCATCGAAAGAGACACCGTTGATAACGATGTTGGTGACGTGACCGTGGGCTGCCA -CGAGAGATGCAAACAGAGCGGGACCGAGAATGCTATACATTGCGGTTTTATCCGAGGAAG -CAGCACAAATATGTTGAAGTCCCAACTAAACCGGTTTCTTGATTGGGAGATTGATAGCGA -GATTGAATCTTGCCCCTTGATTATCCTAAGTGATGAACCTGCGAAACGGCCATATTTATA -GTGAAGGAGAAGAGTACAACTCGGCATCATGGGCTGTAATTCCGGGTTAATATCCCTGAA -TTACCCCGGATTGATAATTCTAATTATATGAACTGACTAATGGAACCAATCATATACAAA -AAAGTGACTTCGGCAGAACTAAAGTGACCATTCTGCACGGATAAAGGCAAGCCTCGGGAC -ATTTCTGCAGGATTTCACAACGCCACCAGCGGATGTCGTGCTGACTCCGTGTCAAGATAG -CTCAGGATAGGAGTCCATCAAGAAGAATGGAATCATAAAATTAAGATGAGGGGTCAGGCG -CATCAAGTTGGGATTCAAAACTGACACACTGGCCCATCTGGAGTGTCAGTAGGCGCCCTT -GGCACTATTTGGGCAATGCACCGTCGGTAAAACATGTGGTATTTTTGTTTAAGAGTCCTG -TTATTTTGCTACGCATCGAACCTTTTATTGTTGATCTTATATCCCTTGGTCGAGATTAGC -GTTCGAGTCCGGCGGAAAAAAAGTGAAGAGAAAAAGAAGAAAAGATTATGGAAGCGAGAT -ACCTTCACGAGCAAGATAGTGCAAAAGGTGATATATTAACAGCCATGGATCAAACGCCAA -GACGGAGACTATGGCCGAAGGTAAGGTAATGACTCCGACAAAGTCCTGCGAATTGCCGCG -TTGATCAGGCTGAAAGTAATGAATAGTGCCATGAGCTTCACATTTTGGACGTGTGATTTC -AATAGCTAAACTCGTGGTAAAAACTCCCACATGGGCGAGTATGATAGATATTCAGTATTA -ATAGCAGCTTTTGCATGATGTTATATGATTGTTAAAAAAATGTGGAGTTCGGGTTGTGGG -TGCAGGTCAAGGTAACTGGCGTCGAATTTCGTATATTAACTCGTGGTTACCAGCCTATTG -AATCGCACAAGCCAAACGCTGGGGAGCAAATGCCAGGGGGCTCTGAAATGTCATTTACGC -TATATTGGGAAGTCCTTGTATTCAGAGTACAATTTTGGAGAACAAAAAGCTTCGAGCACA -TACATGTAATTGACAACAGCTCGGCTCGATAAGTAAGCCGACAAGCACGTCGCATACCAA -CACAGCTATTTTGGGTATCTAGTAGACGTAGCGGAAGTTCAGGTTCTCCCGGTCCGTCTT -ATCTTCGAATGCATGAACGAGGTTTGGATCGGCTGCAGTGAGGCCTGCCTCAGCCACCAG -CTTATCCTTCTTGGCATTCTCACGTTTGAGATGGAAGCGAAGGAAGATGACGGCGGAGAC -CATACAGGCATAGCAAGCGAGGTGAACACTCCAGGCAATATGATACCGTGGCTGATCTTT -GTCAAGGAACACTTGAGGACCTTCTATCTGTTAGCTGGGAACTGAGATTTTCCACAGGAA -GCCTGAGAGACTCACCGATTGCGTTACCAGCAGCCCATGAGACGAATGTTGCAGCTACCA -CGGTGGACTTCTTGGTTTGGCCAGCAATGTTTCGCGACACCATAGATAGCGTGAGAGTTT -GGGCGGACCAGAATGACATCGTGATATAGTAACTGATAAGAAGGCCCACCTTCGTGGGCA -AAGTTGTGTTTTCAACCGTCATGAGGACAATTGTTCCAATGAAAGACCTTCAAGTTGTCA -GTATAGTCTCGCTGCACATTGAGCAATGGGAAATGCGTACGGAATGATGAAGCACAACAT -CACGAGAGTGTTCTGTTTATACTTGGTCACAAGCCATGCAGACGACAACAGAACAATAAT -AATGTAAGCGCCAAGCACCATGGCGAGCAACTGAGTTTCCAATGTAGTGAAGTGGAAGCC -CTTGATAATGATACCAAAGAACGCGCCTAGACCACTGGTAGGGAGAGTGGTGAACATCTG -AACAGCACAGTAGCACCACGTCTGAGGATCGCGGAACGCCTCCCACATTTGGTATGAGTG -GAACTGTCTGTTCTGAATACCAGTCTGGTTAGAACGAAGACGTTCAACCATCAGACGTTT -GTCTTCCTCGCTGAAGCACTTCGCTCGCATAGGGGAGTCAGGCATCCACCATATGACGAA -GAGTCCCCATACAACGGATATACATCCATATGTCAAAAAGAGTGCTTGCCAAGACTTTAG -CACTGTGTCTCCTACAATAAGACTGAAACAGTACGCCAACAAGCCACCGACAATCTGCTG -TGCTCCGTTCATCATGTACCTATTGTACCATCACATCAATATACAATATCTGTAGTAAAC -GAGAATCCACATACCAATATGTTACCGTGGCTGCTTGTTCCTCTCGCTTATACCACATCG -AGGAAAGAGTCACGAAAATGGGCTGACAACAAGCCTCAAATATACCCAGGAGGGTACGCA -CGATCACCAGCCCAGTGAAGTTCTTGCAGACCGAATGGAGTGCCAGAATCATACCCCATA -GGCAGATGTTAATGCCCAAGTACTTGCCGATTGGGACTCTTTGAATGATCCAATTGGTGG -GGTATTCGACGATGAGCACAGCGATATAGATACACGTAGTCAACCAGGAGTACTATCGGG -CGGAACAAACTTAGCGAGTGTCAAGTGACAAAATCTGTGGAAGAATCACATACCTTCTGC -CCATCCTCCAGATTTGTGTCCGTTCGAATCCCCATAATAGCCGAGAACGACATTGTTCCC -TTGTCCAAAGCCTGTAGGAAATATGTGCAAATCATGATAACGAGAACACGACGATCAATT -AGACGCTTGAGGCGCTTGTTAGTCTCATCGTCGATTTCCACCTGACTGTGGGCGGCATAT -GCTTGAATTTCCTTGTCCACACGGGTGATGTCAATGTTATCCTTTGCGAGGTCATCGCCC -GAGAGAATCCTTTCGACATGCGAGATGTCTTGCTTGTTCTGATACTCTCCCATGTCAACT -TTTGGGTGCTGCTTTTGTACTACCTCAATTCCCTCGCCAGGCACTGTAGCGTCCACGTAA -GGGGCTGTCTCAATCCCCTGCTGGGGTTCGCTTTTGCCCTCGGCCATAACAGAATGGAAA -GGAGCAAGTGCAAAAATGCGTTTGGCAAGGAGATACAGGGAATAGGAAAGCGAGATATTC -ACTCGGACTTATAATTGCCGATGGCCTTCGCTGTTAGGGCCGTGCATGATCTCACAGAGT -TGCCCTAACCGCATGTAAGATGATTAACCGACTGAAACAGTCATGTGGCTTGTCAACTGC -TAGGATTATCATATTAACCGGGAGTCAAATCGAGTTAAACCAAGTCAAGCAGTCACCTCC -GTCTTCTTCGTCTCCCCAGTTTTCTTGGCAAAGGAGGAGACATGACGCTAGTCTTCTCTC -CACACTGGGGAGTCCGGGGATCGACTACCCCGGTGGCCGAAGGCTACTCGCGTATTCCGG -TGGGTATAGGACAAATTCAGTCTTAAGGTATGTTTTTTGGTTTGCGATTCGGCCTTCTCT -CACCCGAATCACGCTTTGTCCCCGCGGGGATGCTTTGTGGCGTTTGACTCTAAAGCTCAT -TAGTGGAACAACTTTCACCAATACTTATGAGTATCACACCTGCCGAAGCAACATATTATT -TTTGCTTCCCGGGAAAAATGTCAAGACTGTATTTCCATGAGAACACAGCAAAAAATCTCC -CATAATTACACAATGTCGTCCTCTAAGCCCACGGGCACTGAAATTCACAACACCATCAAC -CTGTTGATTGACAACTTGATCAACATCAAAGACGAAACAGGAAGATTTTTGCTGCCACTT -GCCGATGGGCGAATTATTGATACGAAGTCATGGCAGGGCTGGGAGTGGACACACGGCATT -GGACTGTACGGCGTGTGGAAGTACTACGAGCTGACGGGCGACGAGAGATTATTGCGCATT -ATCGAGGACTGGTTCGCTGCTCGTTTTGCTGAGGGGGGTACTACCAAGAACATCAATACA -ATGGCCGCCTTTTTGACTCTCGCCTATGTCTATGAGAAGACGGGCAATGTCACCTACCTG -CCGTGGCTCGATGCATGGGCCGAGTGGGCAATGTACGAACTCCCCCGAACCCGCTATGGT -GGCATGCAGCATATCACCTATATCACCGAGAACTACCAGCAGCTCTGGGATGATACTCTG -ATGATGACTGTCATGCCACTTGCAAAGATCGGAAAGCTTCTAAACCGACCCCATTATATT -GAAGAGGCAAAAAAGCAATTTCTCACTCACATCAAATATCTTTTCGACACAAAGACTGGT -CTCTGGTTCCACGGCTGGACGTTCGAGGATGGCGGACATAACTTTGCAGATGCGCGCTGG -GCTCGTGGCAACAGCTGGGTCACAATCGTCATCCCTGAGATCATCGAGTTGCTCGACCTC -CAGCCAAATGACCCAATCCGCCTTCACTTGATAGATACTCTCGAGGCACAATGCGAGGCT -CTGCAGCGTCTTCAGTCGTCATCTGGCGCATGGCACACTCTCCTTGACCACCCTGACTCG -TACTTGGAGTCCTCTGCGACCGCTGGATTCGCCTATGGTATTCTCAAGGCTGTTCGAAAA -CGCTATATTGGTGCCCAATACCGCCTAGTTGCTGAAAAGGCCATTGCTGCTGTGATGGAA -TACGTGGATATGAACGGAGAGTTGCAGAATACCAGCTTCGGCACTGGGATGGGTGACTGT -CTTGACTTCTACAAGAAGATTCCATTGACCTCGATGCCTTACGGTCAAGCCATGGCTATT -ATGGCTTTAGGTGAAGCTTTGCGGGATCTGTTGTGAGAATGAACGTCTAAATGTCTGAAA -TGATGATGGAATATGGGCATTATCTGAATTATAGAGCTATAAATAGGCATTTCCCTCTGA -ATTGTTAGATATTGAGAGTGATATGAACATCATGAAGAGTGAATTATTATTAAATCACTC -AACTGGCCAGCGGCCCTGCACCTTCTTGCCCAGCCCAGCTCTTTCCACTACCTCTTTACT -TCTTCTCTGTGCTTCCTGCACAATTTCATCCTCGTCCATCGTCATGAGTACTCCGCCCTT -GACAACAACCTTCCCGTTTACCACAACAACGTCCACATCGCGCCCAGTTGCAGTATACAC -AACTGCACTGACCGGGTTAAACCAAGGCTGGGTATGGATTCCGCGAACATCGATAGCCAC -AAAGTCGGCCTTCTTCCCAATTTCCAGGCTGCCGATCTGCTTGTCCAAGCCCAAGGCGTT -GGCGCCGTTGATTGTCGCCATCTCCAGCACATTTTCGGCTGGCACTGCTGTTGGATCCTG -CGAGATTCCCTTGTGAATGATAGCAGCGAGCTTCATCTCCTGCAGGAGATCACATGTATT -GTTACAGGGGGCACCGTCGGTACCCAAGCCAATATTAACTCCGGCTTTTTGAAGATCTGG -CACTCTGCAGGTGCCAGATGCAAGCTAGTTGCCATCAGATGGTGAATTCCGTATTTTTTA -TAAATCGCGCTGATGATGTTACCTTTGCATTAGATGTTGGGCAGTGTGCGACATGCGTTC -CCGAAGAAGAAAGAAGACTTCATAATATCATCAGTTTCCGTTGCATCAGCTCGCAGATAC -TCGATATCTCACTCAATGTCTGAGTCGTCCAAATGAACCATATGAACGAGAACCGTGGAG -TCACTCAGCAAGCCCACTGATTTGCAGTATGACATCGGGGTATGCGACACCGAAGCGAAA -AAATCACGATCGGCCTTGACCTCGGCGCAATGCATAGTAATGGGTATACCCTTTTCCTTG -GAAATGGTAGTCATCTCCTTATAGAGTGCGTCCGATACTCTGATTTATCTTTAGCAAGTG -ATACCCAATGGCAGTCACGTCTCATACCCTCCGGGAGTCCTGGCACCAAACCAGACTCTG -ATTCTACCATTGGCCGCACCGTTCCACTTCTCCCACATCTTAACGGTCCCAAGCAACGAG -GTCTCACGATTTTCAACAAGGCCAGGATGCATGGCCCAAGCGTCGTCTTTCGCATATTTT -GCAATGTCCATCACAATCTTACCTAGGCAGCCGCGGATTCCACTCTCTTCAACAGCGCGA -CATAGCCCGTCAAAGCCATATCGATCCGCAAACTGCATTGATGTCAGTCTCTGGATACGC -GAGGACGCCCAAGCACTACAGTACCATGCTCTCCAAGAAACATGTAGTTCCCGATTTAAG -CATCTCTCCGATACTCAGTCGCGCTGCAGCGTACCCGTCTTCTGCTGTGAAGTTACCCTG -GAGGACCCATATTCTCTCGCATAACCATGAAACCAGCTCAAGGTCGTCAGCTGTACCTCG -TAGTAATGTCTGTGCTGTGTGCATGTGGGTCGAGATCAATCCTGGAATGATGATCCGACC -GGAGAGATCGCACTCTTCATCGGTTGCATACGTTGCCTGGAGGGTACTTGTCTTCCCGAT -ATCCGCAATCACATCTCCAACTACGCGGATCGCTCCGTCGGTAATTATACGGCGTGAGGC -ATCCACGGTCACAACAGTGGCATGAGTATACAACATGATGGAGCGTCTGATGGGGTTTAA -AAAATAATTGTTCCCGGCTGTCGAAGATGTACGTAGGCAAACTCCTAGCAGATCCAGCTT -TGAGGGCATTTATATCCCCAAGTCTTATACCCCCGCGTTTGACGAAAAACAACGGAGACG -GAGTCTGCCGATGTACAAAAGATCCGCGTCCAGGGCAAGATAAGATAACACATGAATATT -GGAATTGACACATTGCCCAAGATATGAAGAAAGGTTCGATGTTGCGGAATATAATGAAGA -GATGGGTTAAATAGATTGAAGCTACTATTCCATGTTTGCTGTTGTCAACGTCCAAATATC -TTATCTCAAACTAGACACTAGATCAAAAATTAGAAACTTTTCTTCAGCCACTGAGTTTGC -GAATTGCAGTACGCAAGCAGACCATCCACTCCCCATTCTACACCCATGCCGCTCTGCTTG -TGGCCACCGAAGGGAACGTTCGGCGCTACATCAAAGTGGCTGTTTACCCACACACAACCA -GACTGAAGTTGATCTGCCATGCGAGTTGCCCGTTCGAAATCCTTACTCCAGACCGAAGCT -CCTAAGCCGGTCTCCAAAGCATTCGCACGTTCAAGAACGTCGTCTTCATCAGACCATTTG -AGGACTGGCAGGATGGGAGCAAACGGCTCTTCCTGTACCACGCGCGATGTTTCCGGGGGG -TTGTCGATAATGGCCGGGTGAATGAAATACCCAGAAGTATTCTCAATAGAGCCTCCAAGC -GCAGCTTTCAGGTTTTCAGTGCCGATGCTGCTGAACAGGTCTTTGGCTTTCTCAAATTGC -ATTTGATTTTGAACAGGTCCAAAGAATGTATCCGCCTCAGTGCCTTGACCAACTTTCAGG -GTCTTAACAAACGTAACTAACTTCTCCAAAAACTCATCGTAGATATTCTCGTGGACATAA -AGTCTTTTGATCATCATACAAATCTGCGAGGAACAGAGGTAAGAGAGCATTCCAATCTTT -GGGATAATGGTGTCGATATCAACATCCTCGCAGATAATTGCAGGGTCATTGCCACCCAGT -TCGAGTGTAACTCGCTTGAGAGTGGTAGCACATGCAGCCATAACCCGCTTTCCGGTAGCA -ATTGATCCAGTAAAGCTAATCTTCTCGATGCGAGGGTGAGCAGTGAACAGTGGGCCGAGG -GATTCATCGCCGCTTAGACATTGAATCACGCCAGGCGGGAAGAAGTTAATTGCAAGCTCG -ACGAGCTTCAAGTTGCAATAAGGTGTGAAAGGGGATGGCTTCAGAATAACAGTATTTCCG -GTGTATACAGCAGAAACTATTTTGCCAACGGCGAGTAGGATAGGGAAATTCCAGGGGACA -ATGATGCCCACCACGCCCATGGGTGTGTAGCGTTGGATGATCTTCCTATCTTCGGTTTCC -TCGATGGTGTTGCTGGTGATCTCAATAGCGGTGAGGCCGCGAACCCACGTAATTGCCATA -CCCACTTCCGCAGATGATTGAGTCAATGGCTTCCCTTGTTCCATTGTGAGGGTCTGTGCA -AATCCATCGGCATTGGCCTCAATGGCGTCACCCCAGGCATGGAGGGCTTTACGTCGCTCG -TCAAAAGAGGTTTTGGCCCAGCCTTTGAATGCATCTTGAGCCGCATTTACTGCCTTGTCC -AATTCATCCTGGGTGGAGACAGGCACGGGTGGATTAGGCTGAGTGTTGGCAGGGTTGATT -CCATGCCGGGTCTCCGATGTCGTGGTCAATTGATTATTGATGACATTTCGGAATGTCTGT -TTTGGCAAATATAAGTCATCGTATCGGTGTTGTGAAGCTGTTGTAGACTTACCGAAAAAT -CTAAAGTTTTGGCGTCGGTCATTTTGAGCGTCTTCACCTTTCAATATTGGGGACTAGGAT -TAGGCGGGTGGAAATGGTGATTTAAATATCCATGGATGTTACAAAGCTTACCTTTCACGA -TATCCGGGCTGAGCCGATTGATGCCACGTATTTATAAGATCTTTTGAAAGCATCTTGAAG -TCGCAATTGGTTACCCAAAGTACTGGTAAAACACCTAGTCCAAAGCAATGGATTGCATAT -GCCGCATGCAGACATCACTGCATCTCTTCTGCCCTAATGACGCCTCGAAATGGAAATTTT -CGGACGATCTCTGGAGCATAATTCCCGTTTCTTTGTACTTTACTCCGTACTCCGTACTCA -TCTAGCAGGCTCAAAATTTGGCGTCTTGGCGGTGGTGGGGGGCACCGGCATCCCCCTCAT -TTTTCTTCTCCTGAGCACCAATGAATAATCGCAAGGAGCTGGTCGGATGCGGTTCCGACA -TCCCGGAAGGGGAACGGAATTGGAGCGCGGTACTCCGTACTTAGCCTTGTCCCGACTCAT -TTGTTCACGAGACAAGTTCATCAAATTAGCTGACTAGACGCATATCGCCATCTGGACCAC -AGTTGGTAAATAGTCAAGACTTTTTCGACATTAAACAATGCTTAACAGTCATGGCTTGCC -ACCTAGGTAGCGATAGATCACTGTCTGGGGGGTTAAGCAACTCAGCATATCAGGACCTGC -TGTATGTAGGAGCGCACATCGCACATTTGAGACTCGGATCACAGTTTTAGTGGTAATTAT -TGCAATAACTCCCGTTGGTTGTGAACATGACCAGGTGGAGTATGAATTCTAGTCGCAATT -CGAAGTTCGGTCACATTCCCCACCTGCAGGGCCGGTGGGGCTTTCGGCTTTCCTAATTGA -GCCGCTTCCCAATATAAGAGCGCTTAATGAGGCTCATTGATTACTGAATTATTGAACACA -TCTCTTGCAAAGCTCTGGGGTGTCTCTCAGATAGATCTCAATGCAAATGATCTACTGCTC -CGTTGATGCTGAGTACACATCCTTTGGGCATTTATTAGGTCGCTGATAATAAGTCACTTG -AGGACTACATTTTGTCTGCTGTAAGCAAAATCAAGCCTGGGCACAATCATCCGTTTGGTT -TGTGTAGGATTGACCTAACAGTTATAAAGACTTTGTATCTCACACATGGGCACTGAACAT -ATTTACAGCCCTGAAGGTGGGTACAGCTTCGTCATATGGCTCCATCAATGACCAAATTCT -CCATAATAAATTTCACCATGTAAATTTCTGAATAGGGCCGATGCCCTAGTCCTGCCCAGC -GATGAGTATTTAAGACCACTCAGGAGAACTCTGATTTATCCCTGTAACTTTCCTTGAAGG -GGTGGGAATTTACCCCGTTCAAATGTTACCATTTTTGGCATCAGGAGTATGGAGGAAGGC -AATTCATTCCCTCTCGCCACTGCGTTTTGACTAACAAACAGCGATCAATGCTGTTCAGAG -AACAAATCACCATCTTAGAACCTAACTCGCTGTAGCTTCAGGAACAATTCAGGAACCAGT -CACTCATTGGAATGCCTGGTAGTTCCCGCCGCCACCAATCGCCTCCCCACTTGGGGGAGT -GCTCCGACTGGTCAAGGTAACTATCAACCCCTGAAGACATGCGCACATCCCTGTTACCCC -CCCAAAGCCACAGTCTGATATAGTCTCTATGATGATTGACCTCTTCCCTTTCTGCAAAGA -ATTTGTTGAATCTGTGTCTATCTACATTGTTTCATTCTGGTCGTCTGAGTTACTGCTACT -GTGATTGTTAATTTGTCATCTCACCATTGCATTATTCATGGCTAATTGATGTGTTTTGAT -TAGACTGTATGGGTGCGGCTCATGAGATCATCTCGATGGATACATACACATGTTGGCCCC -AGAGCTGTCTCAATGGATATCGAAGAGAAGCGAACGACCGCATCGCTGATATAGATCTGT -CATCTATAGATGCGACAAGCATGATGTGtatatatatatatatatacatatgtatacata -tGAGGATGTTTATTGGTACATATAGTTTATGTCTTCGTGATCAATCAAGTTCGTCATACA -GGTTTTCCAATCGAGGCTCCAGACTATTTACTCGCTATTCATTGATACCTTGGGTATGGA -AAACTACTCTGGTGATAATTGTATCCGGTGCTTGATCTGGTATTCTTGCTAGATGCCCGA -TGAAGAACATAACTCTGGTAGACAACTGTTTAATATCTCCACCAGTCACATGAGAAGACT -TGCAGGCTCTTTCCCTTACATATTTTTTACAGCTTCCAACGATACCATGCTTCATTTCCA -TGACCTTGTATACCCCTTCTATACCTACTCACAAACCCGAACAGGAACTTGGTTAAGAGA -CCGCTGAGTCACGTATAACCTTATTCTCTACACTCCACATGTCTTGTACAATGGAAATTA -TGGACTTTCCTAACATTGGAGGTGCCATGGGATGAATAGCGTGGCAGGTGGCACTGACCA -TGCCATGACATCATCCCGCTCAGGGTCTGTTTGGTATGTTGTACAAGCACCCTTCGTCTG -TCGAAATCTTGTTTGTCCTACCATGAGAATTTCGAGACGTAATCTCAATTTATATCCCTG -TCTTAATTTCAAACTGTAAGGATTATTCACCAATAACATGTGTTGTTATTTTAAGTAGAA -TTTAGTCGCCCGGCCTCTCCACGCTAACTGGAGACACCTAGGGAAGATCGTCCTTCGGAT -GACGCGCCGGCTGAGCGCCTTTTCGTCAATCTCTATTATCGTCGAAGGCAATATGATGCT -GATGACGATGCTTGGCGTACGTTTTGTACGACATCAAATGAAACACCAAAGTCACCATAT -CTCTACAGCTTATATAAAGACCCTGATCTCCACCTAACTATACACATACAAATCAACTCA -TTACACTCCAATCTTGTGTTCCTATCCAATCTATATCAAATTCGATAAACTACTACATCG -GCCCGTCACAATGTTTCACCGACGTCGTCCAGCCACCACAACCACTCGCACCACCAAGCC -TACCCTGATGACCAGACTCAAGGGCCGAAACGCGAAATCACGCACTGTCAAAACTACCAC -AACAACACACCAGAATCCCGTCCACGGCCATCATACTGGACATACCAGCCACAGTGCTCG -TACTGGTCCTACTGGGGGTCGTCTTGGTCATAAGAATGCCTATGGACGGCCTACTCGTAC -CCACCACCAGCGCAAGCCCTCCATTGGTGACAAGGTCTCTGGCGCTATGCTGAAGCTGAA -GGGGAGTTTGACTCATCGCCCTGCGGTCAAGGTAATTATCGAATGAGTCCGGTTCAATTT -GGACTCGAGCTAACATTTTGAAACTAGGCTGCTGGAACCCGCCGCATGCATGGCACTGAT -GGACGTGGCTCACACCGTGTCTACTGATTGATACCCCTTGTATACATACTTTGTATTATC -ACGAGTCATGAATAGTGCTTTTAGCTTTTGAATTTGGATTTTGTCGTTTCTTTTTGAGAT -TCTCTGATTTACCTGTACTATAGGAGCCAGACCATTGAATTTATTGTCAAATTTACCCAC -TCGATCCTACCTTTGTAATCCCAAATCTATAAATATGCCTTCTTTAGATTAAAGGTCTAG -GATGCTGGGTTTTAACATTCGTTCGAGTGTATACATTGGACGCAATCAATTTACCCCACC -TTTGTTGTAGGAGTATAAGGACACATACAATCGATATAGTCACAAGACTTGTATGATCAT -ATCTGCCTCATGTCGCACCGACTCTGCAATCCGAAAATATCTGAGATCAAAGAACAGCAA -TAAAACAAAATCGGAACGACACCATTTCTCCAATACCGGGCCGGTGCGATAACGGCTACA -TGTCCGATACCCCACCAAGATTTTACATAAGTGCGGGACTCCCCGATTACTCACTTGAAC -ACACCTGTGGACCGTTTCAGTGAGATCTATACAATGCCGTCCCTTCAAGAGAGCAAATTG -GCCTTCATTGGCGGTGGCAATATGGCCTCCGCCATCATTGGTGGTCTGCTGAGCAAAGGC -GTGAACAAGCAGAACATCTGTGTCTCCGAGCCGTGGGATGTCAACCGCGAGAAGATGGCC -GCCCTCGGAGTGCGCACTACCACTTCCAATGTCGAGGCTGGAGGTGATGCCGATCTGGTC -ATCATTGCCGTCAAGCCCCAAGTTACCAAGGGTGTGTGTGAGGAGCTCGGCGGTGCTTGG -TCACCCCGCGCGACTTTGCCCGTTGTCGTCAGCATTGCTGCGGGCATCACCCTTGATAGC -TTGAAGGAATGGTTGACGACGAGCGATAGCCGTACGGCTCATATTGTTCGCGTGATGCCC -AACACCCCTGCCCTTGTTGGTGAGGGTGCGTCGGGTGCTTTCGCCAGCGCGAGTGTCACA -TCGGATGAGAAAGAGCTTGTCAATGCTATGCTCGGAAGTGTAAGCAAGGCCACCGAGTGG -GTGGACCGCGAGGAGCTTCTTGATGTCGTCACTGGTCTGTCTGGTAAGTTTCCTGCGCCA -AAAGCCATACGGCATTTCGTCGACGCGCATCAGTCTAACCCAATTAGGATCTGGACCTGC -CTATTTCTTTGCCATGGTTGAGCACCTGGTTGCAAGTGCAACTGCTCTGGGCCTCCCCGA -AGAGCAAGCCACTCGATTGGCTACACAGACATGCCTCGGTGCTGGCAAGATGTTGACTGA -GAGCCCCGACAGCCCTGCTCAGCTGCGTAAGAATGTGACTAGTCCCAACGGCACCACGCA -CGCTGCCCTCGTGACATTCGAATCCCTCAAGTTTAAAGAGATCGTCGACAAGTCTGTTCA -GGCTGCCACTGCCCGGTCTGCTGAGCTCGGAAACCCCATGGCCAAGCCATAGAAAGGGCG -ACTCAAACAAAAATAAATGGGTTGGATCTATGTAATGTTATCCAAGATAGTCCTACGCAA -TAAAACAAATCAATAAATACGATTAGCCCCTATTTAGGTGGACTTGGTAGGCTTCCCAAT -ATTGGAGCCAAAGCTGAGGTGGCCCGTGCGTATCGCCTTCCAGAAACTTGGAACAGCTCG -ACAATCGGAGCGTCTTCTCGGTTCATAGTCATACTATGTATGTACAATAATGGTTTCTTT -TTGGCCCTGGAAGGTGAATTAAGTTGTCGGCTATTTTGCTTCACTGCCGCTAACACTACT -TAGGGCGATGACAACTCTGCGGCCTCGTTCGAAAAGACCCTATCCACGCTCTCGACCAAG -ATCGCCCAGGCCACCGCGCGACTCGACCAGCAACGTCAGTCTTCCCGGCGCATCAAGGCT -CTCTGGACCCTCTACTCGACATTTGCCTATCTATTTTATTCGATCATTCTCGCTCTCATA -CTCGGATGGGAAAGCTGGGGAGTCAAGGAATATGCAGCCATAGCAGGCGGCCCGGTCCTG -TACGTGTGTCCTTACTCCGCACTTTCCAACACCGCTAACATTTAGTTCCCATTCCTCCAG -GATCTACGGTGTGCGCAACCTGAGCTCCAGAATTTTCGATTACCGAATTTCTCGCATTCA -ACGCCGCCTCGATGATCTCCAGAAACAGCGCGAGGAGACCATCGAAAAACTCAAGGTCGC -AACCAAGTACAACTCCACTCAGGAACTGCTAGAGAAGTACGGCGGCGAATCCCCAAAACC -ATCACCGGGTCCAAAGAAACAGGACGACAAACATCCGGCTGCTCAGCAGCAACAGCATGT -GGCCCGCACTGGTCTCCAGccgccgcccactgccaacatccgccgtcctccgtccgcACC -ACAAATCCCGAGCGACCCGCCATTCCCCAGCCCTCTTTCTTCTCCTCTCGAGCTACCGCA -GGGCCCGCAAACCCCTCAGCAGCCTGTTTCCCCATCTCCTTTCCCTCCCCAAACACCCAC -CGATCAACCTGGCTTTGCACCGAACGCATTCACTCAAAACGGTGAATACATCGAACAACC -ACACTGGTACGACCGTCTCTTGGACGTTCTTCTGGGAGAAGATGAAACACAACCCCGCAA -CCGGATGGTTATGATCTGCACCGCGTGCCGACTCGTGAACGGCCAGGCACCCCCTGGGAT -CAAGACACCAGAAGAGCTGGGCCGCTGGCGATGCGGCAGCTGTGGAGCTTGGAACGGCGT -GGAAAGCGAGACAACAAAGATCCTCAGCAGTCTGCGTCAAGATGCTGTGCCAGCTGAGGG -TACTTGGGAGGCTGTGTCGAAGGCAGAGACGGATACCCAGTCCTCCCAGGGCAATGAAGA -GGGCGTCACCGTGGCCTCGAGTGAGGAGGAACAAGCGGACTCCCTTGGCTCCGATGCGGA -GGAGCAGACACAAGAGGAACCTCAGCCTGTCCCTGTGCGGCGGTCCAAGCGCAGCGCAAA -GGGAGACAAGGCACAGCAGTAGACAGTCTTTTACTACTTATTTTATTTCGGAATGGCAAT -TGACTATATTGCTTACGATGTTACCAAAGGTAGTCCAAATTCAACACCTCTCACATGGTA -TCTATATATCTTAGCAATATCCAAAGTACAAAATCGTACAGATCATTCAGAGCACCCAAA -TTTCCAAACCATTAATTTCGCGCTCATGCAGAAACAATCGGGTGCAACGTCAAATGTTCA -ATAGCAACATTGGCAACCTTAACCTCAGCCTCTTCGGCGGAAGCACAGCCTTCCACCAGC -GCGATCTCCGTCTGGTTCACTACCGCCGAGCACCGATACGTAGCCACGCCCCCAGACTCG -ACCCTCTTCCTCTTAAAGACCAGCGTACCTGCTCCCTTCACCATGTTTTGCGCAGTGTTC -CGGGGATGAACAACATTAACCCCGTCGGCGATCACACGCTGGATATACCAGACCAGTCCA -ATACGTTCGACGAATGCGTGACACGCATCCAGATCGCCCCCGCAGTCAATGAAGATTGCC -CCGAGAACACTCTCGATGATATCAGCCAAGAACTTATCAGCGCGCAGACGTGCGAACAAT -TCCCACGGGTACTGGGTCCCGTGTTCTAAGGCATCACAAATCTCACCACGGAGAACTTCG -TGTCGCTCGAGACAAGCTTCCCGGGCGGACTTGATGTTAGGGCCATGGGAGCGGAGGAAG -CGCCATAGATGGATCTGGTCATAGTGTGGCGCAAGGTTGACATCGCCGTTGACGAATTGG -GTGGTGTGGGAAGGTATGTCTGACGCGCATAGCTCCATGCAGAAGAAGGCGAGGAGGTTT -GCGTTCACGACAGAATGCTTGAGTAATGTCATCGGGCCCTGGTCCATTTCCACGGGGTGG -GCTGCTATCACTGCCATGACGACCATGTCTAGGACAGCATCACCGAGGAACTCGAGGCGT -TGGTAGGATTGTGTTGATGTGTCGTACTCGCAGGATGCGTGGGTAAGCGCCTCTGTTAGA -AGGGCGGGATCTTGGAAGGTGTACCCAATCAAACCGGCCAATCGGTGCTGGTCGATTAAG -CTGCTCACGCCTTTCCCTTCTGGAAAGATATGCGGCGATATATCATTTGTGAAGAGGTCG -ATCTCCGGCAAGAAGCGATGGAGACAGGCCTGTGCTTTGCGGATTCCGCCGTCCACATAT -GCTGCGCCAATGAGAGATTCCACAACATCTGCCAGGACCTTGGTCGAGATGTTCCTCTTG -GCCGTTGATGGGTCCAGCTTGTTCCTGATCAACGGTGCAGCCCATTTCCGAGGGGTGAAC -CGATTAGTTAGAATAAAGGAGTCAATGCCGGTGTCAAGAGCTGCGCGAGCAAGGCGAGTA -TTCTGGATGAGCTTGTCACGGCTCTCCGAGAGATAACCCTCGTGCCACTTGGTGTTCCTG -AAAAAGAGCTGGCAACTAACGGTGAATTTGAGAACAGAGTCACCAAAGAACTCGTATCTT -TGATAGTTCGTGCTAGCCTGTGCAATGGGCGCGCTAATAGCTGTGATGACATGGTCAATG -TTTTGGATTCCGACCCCTTTCAGGATGGTATCGTTCAACCGATGTGCAACCAAAGATGCC -TCCAGCCGGTCTAGAATGGCAGAGATAAGAAGCCCGAAGAGACCCTGTTTTGCTGGAAGC -TTGTCGATGACGCAGGCTGAAGCGGGTAGAACGTAAGATTTCTTTGGTGCATCTGCCTCC -CCATCTTCGGTAACACTGATGACTCTCATCTGGAGGAGATTTCGTCGACGCGGGAGTGAG -CAGCATTCGATTTCAATTGGCGACGACTTTGATACCTGGGCTGGAACGATCCATCTACTG -AATGTCCGCGGCTCGCTGAATTTGGACTGGTCACGCACGATACCCGTAGGCGGCGAAGTT -GGATTCCTCAAGTAGGCTTCAATTGCAGGCTCTTTTCCTTCATACACTGCAAGCCATTCC -CCCAGGCGTTCATGTGAAATATCAGGGACAAAGAGAGCCACGAAGTCACGGTCGTCTCCC -CGCTGACGCGAGCTGGGAGCATGTAGGTAAAGGGCTGTGATCCTTCTCATTTGATCCAAT -CTCTCCGCTGTTGTCTCAAAATTTGGGATTTGCAGATCAAAGTGGAGTTTCAAGGAAGTT -TCAGGTTCCCAGTAGAGAGAGATAGGGTGCGGCGTCGGGGTCGACCTCGGCAAAATTAAG -GAGACGGCCAGATCTTCATCGACAGAACCATTGCTGTATATCTTCAAGCGTGTCTGGCAT -AGCTGACCCGAAGCCCAGCCTTGTGCAAGCTCTACGTAAGGGTCATACTGTTCCGCGGCT -GGGATAATGGATGGGAGAGTCGTCTGTTCCGAAAACCTCAACTCTGGTTTTCGTGTCAAT -GGCAAAAGATTGTCATTTACCAACCCATACGAATACAAGGCTTTGTAGGCTTGAAATGCA -GTCTCTTTTCGGGCTGCTCGCTCTGTCCGCCACCAGGACTTCCCTTTTGCTCGTCGAACC -GTCGGATGCACAGAACTTGGTAGTGTGACAGTACCCAAAAGTAATCCCACATTGTTCTCT -TCAAATGAAAACATTGGTCGGTTATCGGAGAACTCATCAACAGGCAAAACAGCGCAAAAG -TGATGCAAATGCTGCACAGCATCGTCCGCAGTCAACAAAGCACTACCAACAGATGTCAGT -AAATATCAAGAGAAGACATGGTGTCATTCCACTTACCTGGTCTTTTCCACCGAAAGTATT -TCCTTGACGTCCTCTTCGGTGGTTTCGAGCTCATAAAGTTCCCGACGCTGTCGTTCGTCA -TCCTGATACGCTTTAATCATAGCTTGCTCCAGTTCCTGCCATTTATGAAGGGCTAACAAC -TCATCCTCATTTGAGATCATGACGGCATATGTAGATTCTCTGTGACGAGCACGCCCTCGC -CGCTGAACAAAAGACTTGAGGTTTGCAGGCTTGTCAAAGCATATAACCACACTGCAGGCA -CTTATGTCAATTCCTTCTTCGAGAACATCAGTAGCGACAATGAGATTTTTGCGGCCAGCT -CTGAACTCATCGAGAGTATCTCGCTGCATATCTCGATGAAGTAAGTCACCGAGACAACCC -TTACGGCTTCTATCACTTGCCCAACCAACATACGCGGCACACTGGAAGCGATCTCTTGTC -TTGGGATGAAGGGATAAAATTCGAGCCAACATACTGACAGTTGCTCTCTGCTTGGCAAAG -AGTAGTCCGGAAAATCCAGGCTTATCCGCTTTCTGGAGGAAGTCGACAAGCATTTCAAGT -TTGATTGATATGTGATAACTCTCCTCGGCTGAACCTGCTGCAGGCATTGCTAGCAGCAAT -TCCAAGAGATAAACTCTCTCCATTTGATCCAAGCGTGACATCTCACTAGCATCCTGGATT -GAACGCTGAAGTTCATCAATTGAGGCACTGATGTAGAAGTCGGCAGCCCAACCTCCCAGC -TCCTCATAGATGTGACGGCTGCGCGCTTGGAACATCCTCAGTTGCTCACTACAGAATGTT -TTACCTGCAGTGAGAGCTTTGGTCAGCTCTGCAGTTTGACCTTTTGACCGTAATGATTCG -ATCCAAGGGTCGTCTTCTATGTTATAGGCCCTGCAACATTCTATGAGAGATGAAAGCAGT -TGACTTCCAAAGCCATACTGTGCTTCGTCAAAAGGTATGTAATTGACACGCTCTAGATGA -GGGAGATGGGTATTTTCCAAAAGCTCGGTGCGGTGGACTCGCGGCGTCCTGCATACTGCG -TTCAAGTTGGATTCAATTGTTCTGGGTGAAAGAAGGTCAGTTGCTGGGGATCCTTATCAC -GTAGCAAATAAACTGACTCAAGCTCGTTATTGCTCGAACGGACCACTGGGCTCGCAGTGA -GTCCCAAGATCCGAGGAACGGCATTTGGGCCTGACCTCAGCAGGGTAGGGTGGTAGTGAT -TTTGCATAATTTTGTTCGCCGGGTGGCGTCTTGTGCAATGATGAGCTGGGTGACATGTTC -TATCAGTCTGTTTTCTGGGATGAGTTGAGATATGTCACGTACCCTCATCAAAGATAAGGA -GAGAAAGTCGGGACATGCGAACAAAACCATGACCTAAAGCATCCGCAAGGACAGCATATG -TTGAGACAACCACGCGAACATCCTTGAGCACTTTGTCCCAGACACCCTGTTCAGTCCATC -GATCGACATTGTCTAGACCGGTTAGAACTTTAGTTTTCACAGAGAGGATCTGGCTAGTGA -TAACTTCATGTTGTTGAAGGCTCAATGCCACAGTCGGGGCAAGAAACCAAACAAGCTGTG -GTAATTGTGAGCCGAGAACAAATGTAAGATACAGCTAGTTTGTTCACTTTATCTGGAGAA -TTGCATTTCTCAAGCTCATCTATAATCCGAAGAATAGCACTAAAACAAAGGTTAGGAAAA -TGTGCCGATACAGAAAGCATCGCCTCTAAACATGCATATGAGTCTTTCCACTCCCAGTGT -CCATCTAATAATAGTCAGTCCATGCTTTCTCCGCTCTCAAACTTTAAGAACACATACCGC -GACGATGATATTCTCCTTGCGGCTAGCCTCCAGCATTTCAAGCTGGTACCCCCGAGACCG -GTAGGCAGCACCATCGCCACTCAAACCTGTGGAATCCATGGGTAGCAAGTCTAAGATAGG -ATTATAACAAATAAGACGCTATATTGCAGAAGGAAAGGGAGATGAGAGGAATCTCAAAAG -AGAGGAAGCTGTTTGCTGGTGGGGGGAGCAGCTACGGCATCCCGCAGCGGCCATCGCGCA -GCTTTAACCCAATGACCACCGAGGCCTGTCCGATTCCCTTTTGGATGTTGTATACTTCGT -TAAGAATGTCAAGTGATTGAATTGTCTTCGGACTTCACACGTCACGTTGAATTTTTCTTT -TCTAATTCTCCTCCCCTCATCCTCAAAAGTCATATCAATCCTTATCAACCTCTCTTTTGA -TCATTCACTTCGAACTATGTTTTCTATCTCCCATCACCTTCTCCCATTTTCCATCAAAAC -AAATCTTCTTCTTGCCTTTCCAAGTCACCAACCCATTCCGGCCACTGAGCTGCAGTATTC -ACTCAAACACAACTAAGTTCAAGTCCAAACAACCTGTTCACCAACATCCGTGCCCGTGCC -ATCTCTCTCCCCCCGAGCTTGCTACGATGTACACGGTGCTCGAAGAGATGAAGTATCAAC -CGTTGCAGGCAAGTCCAAGTCCCCCACGCTCCCTGAAAGCATCTCCAGTTGTCAGCGGAT -ACCAAGACCGGGCTAGCATTGAGGAACGGTTTTTCTTTTTCTTTTATTCGTTTTTTCAAA -AAGTCCAGCATAAGGAAGATTCTAGCAAATATTTTGCAATCCTTTGATCCCTTGACATAT -CGATCAATTGTTATATTAGCGGTTTGTAGTAGGATCTCATCGAATCCCCTTTCATTAGTC -CAAAATGTGCAAAACAAGTTTTACATTTATAGCATTTAAACAGGAAGGTGATCCACTTAG -GTTCCTTTATTGGAAGATATGTACATTCGTGGTAATCTTGGCGTGCATTGAATTTCCGTT -TTGGAATGGCTTTTGAGGGTTCTGCTTTAGACCACGAGGGAAGGTAATCTAAAGTCTGCA -AAACTGGATCATGTATTCAATATTACCTTCCAGATGAAGTTGTCACCCCGATCTACGATC -GAATATATATAGTCGATCATACTAGGGATTTATTCCTTTCAATCCATCACGCTCAGAGTC -CACAGGTCCAGCAAATCCGAAGAAGCCCAAACAAATCTCTTGAAACAACATCCAAAATCA -TGCCACAAGCTCTTGATCTGTCTCTATTGAAGGGGTCCCCTGAGCAAAGGGATGCGGTTT -CGTCAGCTCTGCTAGACACACTCAAAACTCGTGGGGTGGCCAAGCTAAAGAACCACGGAC -TCCCAGAAGATCTGATTACCAAGATGTTTGACTTTGTGAGTTAGGTCAGACGAAAGATTG -TATCTCTACTGATTAGATACCTTTACCAGACCCGCCAATTCTTCTCTCTCTCTCTGGAGG -ATAAGATGACCGCCAAACATCCCCCTGCGGCAAACCCCAACCGTGGATACAGTTTTGTTG -GCCAGGAATCAATCTCTTCCATCAGTGGTTATGACAAAGGCCTCCCTCAAGGGAAGTCAG -TCCGGGATATCAAGGTGACCCCTCCCTACATGATCAATTTGGCGAAAACCAGCCTAACGA -GGACAGGAAACCCTCGACATGGGTTCCCCACACGACAGCCTCGTGGACAACATTTGGATT -GCCGACGAGAAACTTCCCGGCTTCCGTAAATTCATGGAGGATTTCTATGCAATGTGCTTC -GAAGTTGAGTTGGAGATTCTTGACGCGTTAGCCAAGGCCCTTGATATCTCAGCATCAGAC -CTCAAACTGTTACACAACAAGGCCGAAAATGAATTCCGCCTGCTTCATTATCCCGCTATA -CCGGCATCTGAATTAGAAGATGGAACCGCCACACGAATTGCAGAGCATACAGACTTTGGC -ACGATCACCATGCTGTTTCAAGACTCCACAGGTGGCTTACAGGTCGAAGACCAGCAAAAT -TTGGGTAGCTTTCATGATGTTGAATCAGGGGGCAAGTCGGAAATTATTCTGAACATTGGT -GACTCGCTTCAGCGCCTGACAAATGATACATTCATGGCTGCCTGTCATCGGGTAACCTAT -CCGCGCACTGTCAAAGTGGGTTCTGACGAGATGATTCCCGAGCGCTATTCTGTTGCCTAC -TTCGCGAAACCAAACCGTGTCGCTTCCTTGTTTCCCTTGCAGAAATTTATCACGCCGGCC -ACTCCGTGCAAATACGAGGACATTACGGCCTGGGACTATAACAATCTGCGAATCGCCAAG -TTGTTCACCTAAACAGTCATCCAGCCAAGACACGGCGCCTCGTCATAGCTATATGTTCAT -CTTACCACTCAGACCAAATATCAAGTCTTCATGAACTTCATTTAGGGTGTATAGTCAATA -ACGAGGAACAATTTATGCTCTTTCGCATAAATGTGTTAATGCACCTTACATGTGTCTAAA -ATTCGAAAAGACGCTCGTTATTGCTTAACATCACTATCCAATTCGTGTTCCAAGTTCAAC -AAAAATAGAAAGTGACTACCCACATTCTGCGTTGATCCCAGAGAAGAGGGGCAATCAAAC -CGGGCAGGAAGTTCCGAGTCTTCCAACTTCCGTCTTTTAGGTCCGACTCGAAAAAAGTAC -CTAAATTTATGTGTAAGCAAGGGCTTCTTTGTTGCTGAAATAAATTACTAGCTATGATAT -ATAAGAATCAATACCTTGTCATTCAGTTGAGCCGGGTAGTTGATCGGATCCCGAGGCATG -TCGTGTTGATATCGGAATCACATTCCCATCTGAGCTTTATGGCTAGATCAGACTAAGAGC -CCATTGTCAGAACCACAGGCTCTCTGGGCTACAAAACTCCCCATCATGCAAAAAGACATA -CAATTAACATTTATACAGGACAAGGAGGCTACCCGATGGCTGAGTGCGGGAAATGCAGCC -AGGCACCGGCAATCGATCTTCACAGGTTCGCCTTGGATTGCCACTCCCATCTACCATATT -CTGACAATGTTCCATAGATGTCGAACGCCGAGACATGACCCTTCAGATTGTCTCCTCTGT -ACCGGGTCAGAGCCCCGACCAGGTAATAATTGATTTGGAAAAGTTGGAATCGGATATCAG -CTGCTCAAACCTATCCTCTCTATTTCGGGGACCTTTTGGTGCTTTCAGATGGCTACGACC -CCCACCACCTGAGCAACTGTCTTCCGTTACAGGCGAACCGCCGGATTCCTCCCCCAACTG -GCTCCAGCTGCTTGGGGAGTTGCCAGGAGAGGAAGCTTTCCCGTTCAATAAAGATGCCAT -GGATGCTGAAGAAAACGGTGACGACTTGATACTGGAAGAGCAGAATATGCAGCTACTTCA -AAGTATTGATGCAAACTTTCATGAGCCGAGTCTGCGATCAGAGCTCAGTCCCTACCCGGT -CGTGGGAAATATCGAGGGCTGGAGTCTCTTATCTCATTACAAGGATCGAATCGTCCCATT -AATATCTCCTCTGCGCGCGGGGCAGGAGAACCCTTGGGTGAGCTTGGTAATCCCATGCGC -AATGGCAACTTTGGCAGACATAATGCTGAATGGGTCTTCCAACCATGCCCGGCTTGCTCT -ATTAAATGCCGTTTGGAGTGCAAGTGCCTTCCACTTGGCAAATGATTCAGTGGCTTGCCT -TGCGCAATGGACAGTCTCCGGGGGTCTCTACTTGACACGGGCCCAATATCACTTCCAACA -ATGCATGGAGGAAAGCTGCATCTCAGCTACCAAGATGAGCAAATACAAGGAGATGCTCAT -GGCAATACTGAGTCTCTCAAATGCCTTTGTATGACATCCACATAAATCTACTGTATACGG -CTAACGTTTCCTCTGATACAGATGATCAAAGGAGACCCAGACATGCGACGCGCCTATCTC -GTCCAAACGGAGAAATTCATCTGTGTAAAGGGACTAAGCCAACCGGCGCTGTCAGCCAAA -AAAAGAGTACTGCACCACTGTTATGCTTACATGCGCATTATGGCCGAGACAACATGCATT -GCTGATAGGCCGAGAGTAAATCTAACCGGGTCAGGGTGCACTTTGACCGCCAGCACAATA -TATGGAGGAGACTTCCGTCTCTACCCTGGTCTAGTCTTCTCGACAACCACAATGAGCATG -GAAAAGGATCCAGAGATGGCTCAGCGTGACCTCCACCTTGCGATACCCGGGCGATGGAGC -TCGACTTTGTTTCCCACACTGTACGGAATCGACGAAATATTTCTCATGTTACTGTCGCAG -GTAATTCGGCTGGCCAATGAGCGAGATTTGTCAATGATGTCGGATGCGGCAGAAAGCCAG -CTCAGTCTCAGGGAATTCTGGACTCGAGCAAAGGGCCTAGAAAGAGCTATCGATCATTTG -CTATCCACCGCAAATCCAAATTGTGTTCAATCTTCTGATGGAGCCCTTCCAGTGATCACG -ACTGCAACAGCTCAAGCGATGTATATGGCATTGTCAATTTTCTTCCATCGGCGAATATAC -GAAATTGATCCTGCAATGCTTCAGGCTAATGTTAGTGCGATTCAGGGCCTTTTAATCGAA -ATCCAACAGGAGGAAAGCGCCCAGAAGGAGAGTGGCAATGCCGCACTCATATGGCCGGTC -TTTATCACTGCATGCGAGGCTGTGAACCTGGAGTTGCAAGTTTTCTTCTCTTCGTGGTTT -GATAATTGCGCCAAGTCGACAGCCTTGGTTCATGCATCGGTGGCTAAACAGATTATTGAG -ACAATATGGACTAAAAGGCATGATGTGAGCTTATGTGGAGGGACATATAGCTGGCCAGAT -ATTTTGCGAGATGGGGGTATCAGGTTTATGTGCATATAAAAGGCTATTATGTTCATTTCC -TATTTGAATAACATTGCAACATTTAATATTACTTTGAACAATTTCCGATCGTAAGCGCCT -TTAGTATGGCGACCGCAGACAAAATTTACTTATCTCAAATGGTTTACATCAAACTACTAG -TTTCAATATTAGCCCTGTGCATACATGTGCACCTGGACTCCCATTGGCAGTTAGTTCTCT -GTTGGGTCATGCATTTGCTCGAAAGTTAGGATATATGTGGCTGTTTGTCCCTGCAGGCCC -GACAAACAGCCCGGCAAACACGCCCAAAATGCTTGCATGTGAGGTAGGCTCACTTCTAAG -CCCTTGCTTTACCCGATCGAGATAACCGAGCTATTTCCCATTGCGACCTGGTAGAAAGCA -CCATACAAGAGATCTTTGGCGAACCAGCAGTTCGGAACCTTGATGCTCAAGCCTGGGTAA -TTCACTGACACAAAAGCACAATACCAGAACACGTGGCCAAGTTCGAGGCAAACGGACCTA -CGATTGCCTGTTTCTTGCGAGACAAGCATTCTTCTCGGGGCCTTCAAGCCGACCCCCGGG -AAGCTTGATCGCTGATTGCTGGATTGCTAGGCCTGGGCCTCCCCCCGACCACAGACACCA -AGGCGTTCGGAATGCCGTTTGCCGAAGCTATGCTGGGCTATTGAGTCATGGTACCAAATG -CCTCATAATCAAATGGATTGAGCATAGGCTCCATGCCTTTACTACTATTTGGGTACCTGG -ATTTCTACAGTGCTTCACTATTCATGTCACAGCTCACTACCGAGTACGGTACTACGCAGC -TCACCCAATGATTTTCAAGTTGTTGCCTTATTCTGCTAGTAAGGTCAGAACAAGCGAGGT -CAATTTATCGACTCTACAAAATAAAAAATACAAGATTATGGCCTTAAATAAAAGCATATC -AGAGGTCTTCTACCCCATTCCCTTTATAGGAGAGGAATGTGAGTACAAGAACCCTGTATG -ACGTTGGACACGGAGTGCAGTCGATTAGTGGCAAGTGTAAGTGGTCTTATCACACCCGAG -CAAGGGGGCTCTCCCCTATAAGAAGGATGTCTCGCACTTGTCCCTGAACACTGTCATTAT -TTTATCTCCCTCCTATAAACCTGTCTAGACCCATTTCGCTCTCTCTATGGCTACAGGAAT -ACTCCGCTGGCCGACCCTGGTCCTTGCGATCCTGTTTCTGACAATTATTGGTCAATCTCA -AGCCACGCTCTGCGCTACCGTGTGCGAGAAGAGCCCACAGATTTGTCCATATGGCGAGGT -AAGATCTCTTTTTCTCCCTTAGCCTACTCTAGTCTCGTCGACATGCTGACTGTGTCATTT -TACTCTTTCTTAGAGGGCCTCAGGGGTTGAGGTTCGTCTTTAAACTAACTGATCTTTTGT -AATCTGTACAATTTTAACTCAATTTGTCTACATAGGGCTGCTGGGGCTGCTGTCAGCCCA -TCTAGTGCCGCTCACGATTTATACTCGAAGCTGGTAATTTTACGAGCAGGATGGCCCTCT -CGTATAACTCTTTTATCCACAGGTGGCTTCGAATTGTTCACGACTTGACGGGATATTTTC -ACAATCGCGAAGACTGACCGGTGATTCTTTCGCGAAAACCCTACTTCCTTGTCAATTGAT -TTTCCTTGGCCTCTTTTGCACAATCTTTAATTGGAGCTATATCGTATGTAGTTCTGATCT -CGGCAGCGTTGTACGTACCCTTTGGACATGAGGTTTTCACTAGCAATGGAAGATTTTGTC -AACATAACACTAGGTATATGAGTACATAGTGTTATAGATAATCAAGTCGGTGATTATATA -TGGACCAAGGTTGATAATCTTTCTATATACAACGAAGACTGAGCTGGAGACAACAAAAGT -TGAAAGACAAATACATTGTCACCTTTTCGGCCTAGGTAGTCTAGGGGCCATGAAGCCGAG -TCCTGCAAATTTACAAAGGTCCCTTATGGATTCGAGAGTGGTAATTAACACCAGGGACTG -ACTCGTGGTTCAAAAGCATTACATTTCTGCAAACATAATTAATTATTAGTCCTAATAACA -TCTTTCGAAACGAGGAAGAGTGTCTTCCAGCATCTCATGTATACATATATCATTTCTCAG -CTTTGATGGTGAACAAACCATGATTTAAATGGCTCATCGTCTCCTTTAAAGCTCTCGTAG -CGACGGCCTCCTTGCAATATCTTGCCTTCGCCGAAGCCTAGTCTCCCAATCCGAACAGCC -TCACCGGGTGATGATGTTGACACATCTCGGACAACGCCAGGCATCTCATCTAGCTCGGGA -CATTCCTCCGTATTGCGTCCAACTAGCAAAGGAAAAACCTTATCGCCAGCAAAAGCGCCA -AATCGTAGCATCACAAAGGAATCCAATCTTTCTGTCCATCGTGGGTAGTAGCCGGCGTAT -AGTGACAAAGCCAACAATGGCACTAAGTAAAGTCCCATCAAGATGGAGACTGTGATAATT -CCCGCGGTAGATATTGATGGAAGATCCATATCTGTGCCCAAATCTTGTATGACAAACCAC -CTACCTCTCTGACTGAAAAGATATTCCTGATTGGCAAGAAAAGCGGCGGCTGTGAAGGCA -TTCGAAATCGAATCCGTTTCTTGTTGGAATTGATGGATCCATTGCGCAATCTTTGACCCA -GCAGGGTACCAGGTGCCATTTCCAGAATCTCCAGCATATATGCAACCTAAAGAGGAGTCA -CCCAACAGATTGGTGAGTGGGACGTTTGCGATGCAGGTATATCTCATGTTTGAGGGCATA -TCATCCGTGACCTTGCCGTCTTTTTCAAGAGCTGCGGAAGTTGCCACAAATGTGTCGACA -AAAGACCCGGGCCCGAAGGCTGCCATGGCGGCCGCTAACAATGGTCCTTTCTTCTGTGTT -GAGATGGGACTCAGCGATGCAGTTTTGAGGGGGCTGCTTGCTGTGTCTTCATCACGGCGG -CGATAACCCCCCTCAGAAGGGTAACTTTGCACTGAATCTTGATCTACACAATGCTGATCA -CAAAGTTTCACTGGATCTTGATCAATTAAGGGGCCAGGGACGCCATTGTTCATATAATTG -GGCAGTTCAAAGAAGCCGGCCGTAGTGTTCACAGTGATACGGTAAAGAGCCTTTTCCCCC -ACATCGTGACTAGAATAATCGCCCGAGGCGTACTTGGCTTGTGTCATGTTGAGATAGAGG -ACCTCGGAGAAATCCTGCCGGGTACGTCCCACCCGCCAAGGTGAATCCGTCATATTTTCT -GGCATGCACGCAATGACACCCCAAGAATAAGTACTGTACCACGAGTACGAAGAAGAATAC -TCGGCGTAAAATTTGCCTGGGAGTGAATAACCCGAACTGCAGGCGGCTGGGAAATCAGAC -TCTCGTATGAGCTCGGATGTAGCCGTGGAATTCACACGTGAAGCAAATTGACGAATCAGT -CCTGTATTGAAACCACTGGGAACTTGGGAATAAAATGTTTGAGGAGACATATCGTTGAGA -CTCAAGAACTGCACTCCACTGTTGTTACTCCACAAATTAGGTTGATAGGTATTGCCGTTC -GCTTTGTTGAGGGCATCGCGAACTTGCAACACGTCCTTGCCGTCTGGTTCTGACCGTAGA -TACAGATCATCATTTGAGGTGAAGTCATGGATAGCGGCTCTTTGCTGGGGATATGTCCGA -ACTTTGACAGACTGTGGATTGAGAAAGAGCGACTGGATGGGATATGTTATGAGACCAAAG -ACCCAAATGAACATTGCGGTGGCAAGTAGTGTGCTGCCGTATTGCCTCCATCTCCCACAA -ATCAGTGCTCGATACAAAGTAAAGTCCATCCACTTGCGGTCAGCCAATGTTGTGACTTGA -CGTAGCCGAAGGCCCATCGAATTTTTTTGATTCTGCGCAAATACCACAGCAGCACTAGCA -CACACTGTAGAAATCCAGGGAAGAGTAACAACTTGAATGAATGTCTGGATCGTCCGCACA -GAACGATAGATCTGTCTCTCTTGATGGAAATCTGCTTGACCCCAATCATCATAACCGTAA -TATGAGCCTTCAATTGAGTAGACCCAGGACTTGGCTTTGAGGGGGCGGTATGACAGTACA -ACTGTCATGGCCCAGGCATAGATGGCGAGCCCCACATACAAAATCGAGAATAGAGATACC -CAGGCCGATCGATGAAGTAATGTACTGTACCCTCGTGCCAGAACTTGTCCCGTCGACCGC -GCCTGGCTATATGATGGAAGCTCCTCGTCATGTAGTGGGGAAGACTCGGATGGGGAATCA -TGTCCAGTATGTTCAGGCGCCTCGTCCCGACTGTAGCTCGGGAGTTGCCGCAGAGGGACC -GAGCCGCGTGTCTCCATCTTTGGATCGCGATCTGAAGAAAAACAAAGACACTTAGGACGT -GAAAATTCTAACCCCATAGCAGATACTCTCCTTTTTATTCCCCAACCGACTGCAGCTAGA -GGCTGGGCAGAAGCTACTAACCGAGCGCCCTGCCCTGAGAGCCCCGAGCGCCTTACGATT -CCATTATCAGTTTTGAATTTCTGTCAATGTGGAACATGTTTGAGAAATGCAGTGCTATTG -GCCCAGTTTACCCCAGCTCATCACTTTGTTTCGGGCGTTCAAGCCCTGCGGCTTGTCACT -TCGCCCCCTTTTACCCTGAAGTTTCACTGAAGAAATCTCATTAAACGTACTATATTTTCT -GGATGCAGCTCTACAACTAACATAAGATGACAAAAAATAGGCTAATGTATAAATGCACAT -CTTTATGGGCTTTCGGGCACAGTCCAAGGATATTCCGGGTAAATTACCCTGAGCGTGGCT -TTGAGGCTCCGCGTTAGATTGCGGACGGCCAACAACCATTACATAAGACATAATGAGCAC -ATCCAAGAGAGAACTTGTCGGGCAACGGTCTTGTCAGTCATGCCCGGCGAGTTCGATGCC -GCTGATGTGGAGATCCGGCCGACAAACATTTTGGTTGAGAAGGCCGGCAAGACCACTTTC -GACGTGGCGTCTGGTGACACATCAGGCTCGAGCATCTTGCAGCACAAAATACCGATTGCC -CCGTACGTTTCAACTTTTCTCCATAAGCTAATGGCGATTTACTGACTTGCTTTATAGCAC -CGAAAAATCCCAGGGGACTAAATTGCTCGATAGATCAATCAAATTGTTCAAACTTCTCAT -GAGCTTGATAAGCGACGACTAAATTCTTGTGGTATCCAACACTTGAGGGGGATATGGGCT -TCGGGGATGGTTCAAAAGGTTTGTTTGCCCCTGGGCCTGGTATTTTATCCCTTGAAATTC -TATATTCTTGATCTGACATATTGCTTCGGCTTTGTATACTGCCCCGGCTTTATATAATCC -CCCTGTAGTCCGTCTAAACATAAAATCACAGGCATCAAATATCTGGATCATGGTACCTCG -GCTGGCTCCTGTGAAGTTGTATCGGCCATTTTACTCTAACCCCCAGTTCATAGGCAATAT -TCAACACCAGACTCGGATTACGCGCGAATTCACGGGCGATGATAACGACATCCGCTTTCC -CAGAGGCCACAACATCCTCAGCTGTCTTGCCATCCCGGATCATACCCACGCTTCCAGTAA -GGAGCCCCGGAACTCCGTCCTTGATTTTCTGCGCCAGATGTGCCTGATAAGCTGGACGGA -CTTGGGGTAACTGCTGATCAGGCACTAAACCGCCCGAGCTCACGTCTATCAGATCGACCC -CAAGGGGCTGTAGCAACTGGCATAGTCGGACGGATTGTTCAGGGGTCCAGGAATCTCGAT -CGGGGATCCAGTCTGTGGCACTAATACGGTAAAAGAGCAAAGTGCTTTCTGGTATAGCGG -CACGAATGCCCTGTACGAGTTCCAATGCGAATCGCACACGGTTGTCGAAGCTGCCACCGT -ACTCATCGGTACGCTTGTTGGTGAGAGGTGAACAGAACTGGTGAATGAGATATCCGTGAG -CGCCATGTATCTCGATCACATCAAAGCCGGCCTCTACGGCAAGCTTTGCAGAATTGACGA -AATCGCCAATCGACTTCTGAATCATTTCCTTGGTCATAGCGGTAGGCGTCGGCCAAATCC -CATCATCCTTGTAGGGAAGATCGGACGGAGAAATCAAGTCATCGGGAAATCCATTTTCTT -CTTTCTTTGATGGCTGATGGCCAAGCCACGGAGCAGTCACACTAGCCTTTCGACCGCAGT -GCTGCAGTTGAATACCGGCCTTTTGCCCCTGGGAGTGGATGAAGTTTGCCACGCGCCGGT -ATGATGGTACCTGGCTGGCTGTCCACAGTCCTGCATCTTCCATTGTGTTTCGACCTTCAG -GTGATACACCTGTGGCCTCGACAATCGTGAGTGATGCTCCTCGGGATGCGTAGGAGCCGA -GCTGGACCAAATGCCAGTCAGTCAACTCTCCCGTCTTGGGGCAGGAGTACTGGCACATGG -GAGAGACCCAGATCCGGTTCTGAAAAGTGGTCGAGCGGATGGTGATCGGTTCGAAGATTC -GGGGAATCGGTTGACTCTTATCGTACATGGATAATAGAGAGCCTGCCGGAGGCGATTGGA -TGGGTGTAAAAAAGCCATGGTCTCCCGGCATGGGCTCATTTATCTTCAGCTCTTTACTTT -CATTTGAGCTCGCCATTTTTGCAACAAATAAAAATACAATCTAAACAATAGAATGTCGAC -CTGAAGTAATCTTTTGTGTTTCAGAGGTCAGCATCGGCAGTCGACAGGAGGAATGGTATT -AGGATCAAGTGAGCTTAGGAGGGAACAGCCATTATCTATTTGACCTCGAGTTAGTTAAAG -ATCACGCCAGATTATTAAAGACGCTGATTTTTAATCAATGAAGGGGGCCGGCGAATGCCA -AGTATTCGCAAAGAAGATGATCTAGCTCCTTTAGTCAGTATCGGAGTGACTCCGGTCTGA -CCATAATAGGATTAGGTGGAATCTTCGACCCGGTTCTCGCCAAGCAGCCTAACCCCTATT -CGAACACTTGAATGGTCGGGTTGCTTGGAGAAAACCCGGCATCGCGTACTCCGTACTCCG -TACTTTGTAGTTCAGTTAGCTGATGCGCCGGATTTCAATGCTGGTATATCTACTCCGTAG -TCTACAACAGATATGGGCCTAGTAGCTCAATGCCCTTTTAGATGTCCCTTCACCTCTTGA -GGTCTAGAGAACTTTCATGACTATGTGAAAGATGTGTATATCTATGTCGTAGTGACCGTT -CAACATCATGACAGGGGCACTTTCGATTGTATTTGACTCAATGCAGGATTATGAAAGACT -TCATCAGAAGTGATACAGGAGAAAAAAGACGGGATCATGATATGGCTGAGAAGAAAATAT -ATGGAGTCTTGTTAAGATGGTAGGTTGTGCACAACTAAATATGTAGGGATCTCCAGGTGC -CTAATTTCCTAAACGGTCTCCGAAGCTCCATTCCCCTTTCAGAATAACGCTAACGCTATA -CGGCACCCACTATGCTTCCTCTTTCGGATGTGCTCCTATTGTGAATTGGTTAGGTATAAA -ATATCTTCCCATTGTACTCCCTACATTGCTTGTAAAATGATTGATAATCCCAAGTTGAGT -TCGCCATTCAGAGATAACGGTAAAGGGCATTAAAAGGGGCACTCTGCTTGGTATAAGGGT -CTTCATCCAGGATTGGGCTCAGAAATAATATTTTGATAACGCCACTTTTCCAAAGTCATG -GCGGCGACTGAGGAAAAATCCACGGCGATGGCTACTCGCACATTGATCGCCGGTCTCGTC -GGAGACTTCGTCGCGGCTTGCCAGTCGTCGGTGAGAAACAAAGACAGTTCCATCGTTTGG -GATCACCTAGAGATGGAAGGTGCTGGTCAAGGGGTAAGAAAAACATCACAGGAGTGTTGA -AACAGCAATTAGCCAAATGCTAATTGAATCAATGAACAGCATGCGATTGCTCCCACGGTC -GGATCCATGGCCGAACGATACGCCCGCAGGGCCACGTCCGTTTTTGGTCACGGAAGTGAG -CCGTCGCGCAAGATTTTGCATGACTTTCGGGGCCATGTTAAGGCTGGGGAGATGCTAATG -GTGATTGGCCGACCCGGTAGCGGTTGTACAACGTTTTTGAAGGGCCTTTGCCATATGCAC -GCTGAATATAAAAGTACCAGCGGTACTTTATTGTACGGCGGGATCCAAGCGAATTTTGAA -GAGCCTGCGGCACCAGCTGAGACCACATTTTGCGGTAAGAACTGTCTTTTGTTACCTCAA -TAACAATCAGTGGCTGACGACAGACGGGTCTATAGCGGAGGAGGATATTCATTTCCCAAG -CTTGACTGTGGAGGAAACATTGAGGTATGTTAGAATGAGACACTCCATGGCATGTTCTCT -CTTTATCATTGGAGGACTAATTCCAACTCTCCTATTAGATTTGCAGTCAACTCTCGGTAT -AGCAACGTCATATCCTCCACGGAAGCACGCAACACCGTGGTCAACCTTGCCCGTTTGTTC -GGAATTGATCATGTTTTGGCCACCAGAGTAGGAAATGAACAAGTTCGAGGTGTTAGTGGT -GGAGAAAGAAGACGAGTGTCTCTAGCTGAAGCTCTAGTAACTTGCCCCGAGTAAGTTGAC -GATCGTGATGGGGTCAATGCTATGATATCCTGTGCTGCGCTTATACATTGGATCACCACA -GCCTGATTTGCTACGATAACCCTACCGCTGGTCTTGACTCATCGACCGCTCTTGAATTCG -TGCAAATGCTGCGAGAATATGCCAACCAGTCCCACTGCACCGTCGCCATGAGTCTCTACC -AGGGTAGTGACGATATGGTTCCTCTCTTTGATAAAGTGGCTGTCATAAATTCGGGGCACT -GTATCTACTACGGTGATGTGGTTTCCGCAAAGTCCTACTTTGAGAATCTAGGCTTCTATT -GCCCTCCAACAATGTCAATTACAGACTTCTTGAATTCGATGTCGGCGGAGCCTGAAGCAC -GGGAGTTACGCCAAACAGCAGATACTTGGTCGATTCCGCAGACCCCTCAAGAATTTGTCA -CTGCATTCCGGAAAAGCGAGAAAGGGGTTCAACTTGGAGCACAGATTGAAGAAGCGAAGA -ACAGTGCTAGCGTTGGGGAAAAAGCACTAGGTCAGCGGAAGAAATCACGACAGACATACT -CGATTCCGATTCTTACACAGATTCTTCTCTGTGCTCATCGTCAATATCGCATCTTCATCA -CGGATTACAATTCCTGGATCGTCGAAGCTGCCTGCATGGTAGTCCAAAGTATCATCCTTG -GTACAGTCTTTCGCAACTTGCCACATGAAACTTCGTCGCTGTACCAGCTTGGATCAGTTG -TTTTCTACGCGATTCTCGTTCCTGGACTTCAGTCCATGAGTGAGTTTGGCAATACCTTCG -CACAGCGTCCATTGCTCTTGAAGCATAAGGGCTATCGTTTCTATCACCCAATGTCTTATG -GATATGGCCAGATTCTGTCAGACGTTGTTTGGAAAGTGGTGGTTATTGCATACAACATCC -CTATGTATTTCTTGGCCGGACTCCACCGCACTGCAGGGCATTTCTTCATTTTCTTCTTGG -TCGCTTATATTTCCCATCTATCGCTTTCAATGTTCTTCCGGTTCATCGCTGTCTTGTCAC -CGACAGTGGAACGGGCGGGTCTTCCTGTCGGCATTTTCTTGACTACGTTGGTCATCTACA -CGGGCTGGTACATTCCACCACCACAGATGCAGGTCTGGCTAAAGTGGTTCCGGTTCTTGA -ATGTGAGTATCAAGCAACCCTGAAGTTATGGCCATGGAACTAACGCAGAAAATCCCAAAT -GCATAGCCTATGTACTACGCTTTTGAATCGCTCATGATCAATGAAGTTGGAACTACTGCC -TACAAATGTACTTCATCCGACTTAGTCCCTCGTGGCCAAACATATTCGGACGTTGCATAC -CAAGCCTGTGCCATTGCCGGCTCAGAGCCCGGTAAGACTGTTGTGGAAGGAGCTTCATAT -CTGAGAGTTTACTACGACTTTGATAACTCTCACCTCTGGCGTAACGTCGGCATAAACGCG -GGCTTCTTCGTCTTTTTCGCCGTTTTGGTCGGGTAAGTGAATAGAAAATGATGAAGAGAC -CAACCGCTAACACAAACTCAGTCTTGCAATGGAGCTGTTCAAGCCCCCTGCTGGCCTCCT -TGCTACAGTCTTTTATCGCCGCCACGTCGCAAATACCCTGCCTTCGCCCACTAGCAGCAC -TGGAAGTGATATTGAGGCACAAGAGTTTGCTAAGTCGGCACGGAAGGTATCGTCGAACAC -TGAAGAGGTTCGGTCACAGGGTGGACACTCCTTTGCCTGGAAGGATCTCCACTTGACACT -TCGTAAGGATGGAGAGGAACAGACTTTGCTGAAACATATTGACGGTTCGTCCCTTGAATA -TCTACAAGTACTGCTTTTGTCACGGCTCTATTCTGACATGCGTTCAATTTAGGTTGTATT -GATTCTGGAACGCTCACCGCGCTGATGGGTGTCTCTGGAGCTGGAAAGGTCTGTTTGTTT -CTATCATACTCTATGTGGTATAAGCCTGCTAATTTTATTTCCAGACTACACTGCTCAATG -TCTTGGCCGGACGCATGGACGTTGGTAAATTGACAGGTACACTATACCTAGATGGGTCGC -CGCTTCCAAAATCATTCCGCTGGAGGATGGGTTATGTTCAACAGCAAGATGTTCACCTGC -CTACCCAAACAGTGCGGGAGGCTCTCCAAATGACGGCCCATCTGCGTCGTCCACTTTCGC -TCTCAATAGAAGAAAAGAACGCCTACGTTGAAGAAGTCCTTGATATGCTCGGTATGCAAG -ATATCTCAGATGCGTTGATTGGAGTGCCCGGCGCTGGCCTCAATCTCGAGCAGCGAAAGC -GGGTCTCAATGGGTATCGAGCTAGCAGCGAAGCCAGATATCTTGTTATTGGATGAACCGA -CCTCTGGATTGGATGGGCAATCTGCTATCGCCTTGGTTCAGCTACTGAAGAAACTTTCAA -GATCGGGCCAGACGATTCTATGTACAATCCACCAGCCTGCCGCAGCCGTGATTGAGGTTT -TCGACAATCTGATTCTGCTAGCGAAGGGCGGCCGAGTGACCTACCAGGGACCTTTGGGCG -ACCAATCTTCGACGGCATTGGAATACTTCAGTCAACATGTGGAAGCGTGCGATCCCAAAC -GAAACCCGGCAGAGTACCTGTTGGACGTCGTTGGGGCAGGTTCCCGGTCCAATGTTACTG -CCGACTGGGCTCAAATTTGGTCGGAGAGCCCTGAATGTCGTGTTCAAGATACCAAGTTGC -ATGATCTTAAGAACGTGAGCCCTGCTGAGCAATCTCGGCCTCAGCTCTATGCTACCCCTC -TGACTCACCAATTTGCCGTCGTGCTTCGTCGCACCTGGTTGTGGTACTGGCGTGAGCCAG -CGTATTTCTCTGCTAAGCTATGGATGAATGTGGCCAACGGCCTGTTGAATGGCTTGACGT -TCTTGAACATCCCGAATAGTCAACAGGGTGCATTCGACCGCGTGTACACAATCTTCCTAT -CCTTCTTGATGGGTCCACCTCTTGGACTATCCATGGAACCACGATTCACTGCGTTCCGCG -ATATTTTTGTGTACCGGGAAAAAGCCAGTCGATCGTACCACTGGATTGTGTTTGTCATGA -GCTCTATCATCATCGAACTACCCTTCACTCTGATAACGGCGTTCATCTATTGGCTTCTGT -GGTATTTCCCGGCTGGGTTGCAGACTGATGCCACTCATGCTGGCTACGCATTGCTGTGCT -ATTGGCTATTTTCCGTCTTCACTGTCAGCCTGGGATACCTCATTGCAGCGTGGATGCCAA -ACTTGAACTCCACTCTCATGGCCAATGGTTTCTTCTTCATGTTTGTGAATACATTTGCCG -GCACTTTGACGGCGCGCGAAAAGACCCCGGCAGGCTGGAGTTGGTATTTCAACGTATCCC -CTCTGTATTATCTGGCGGAAGGACTCACTACCAATTCTCTGTATGGGCATGAAATTCGTT -GCGTATCCAGTGAGGTGACTGTTTTCCATGCTCCCGCCAACGACACTTGCCTCAGCTACG -CTGGTTCTTTCTTGAAGTCTGCTACAGGTTATCTGGTTAATCCGGATGCCACTGGGGCTT -GCGATTATTGCCGCTATAGTCTTGGCCAAGAATACGTAAGTAAAAACCTCAATTCTGTTG -AAGCAACCTCTACTCACCACTATATCATCGATAGTATCAACAATTTGGATATGAAAATGA -TCGAAAGTACCGCGATATTGGTATATTCATCGGGTTCATTGCATTCAACTTCACGGCTGT -CATTGTGGGTACCTATGTGACAAAGATCTACAAGTGGAAGCGGAAGAGTGAATAAGCATG -GAGTGTGGAAATGTAAAGTGGAGCTACTGGGATGCTCTTGTATTGGTTTCTCTTGAGCGA -AGCATATTCAGATTTCTATTTGGAAAGATCATTCATTGGATTATTATTTTCTTGTGTATT -TTCTTCATTGGATGTGGCGAAGTATCAGGATTTAATCAATATATATATTCTTCTATATAT -GTCTTTCTACTAAATGAAATTCCCAAGAAAGGATCATACTACCTACATATGCATAGAGAC -TTCTGGCCTAAAACTCCCGCTCTAACCACTTGTGTTTCAAATAATCTAGGAGAAAATAGG -GTAGTTGTAGGTACCTACATTATCACGAGCTATTACTCCGGGTACATCCAGGGTAAGATA -AAGATCCAGAGTGAGATAACGATACTTCTGGATTGAAATCAGCAGGTGATGGAGAACAGG -AAAGGTCCCCAAAGAGGATTGTCCCATTCAGAACACGCTTACCCATTTTAAGCGTTTTGA -TTAATTGCCTAAAGGAGCCCTCGGGTGTTCTCCGATATCTCCGACGTAAGACGTTGTTAA -TCGAACTTCAATGTTGCAGCCGAATTCCAAGATAAATACTACATATTCCAACAGACTGGC -CTCCTCATCCGCTGCTTCATTTCGCAAATATCCTGAAATCCTTTTCACAGAAACTATCAT -GCCTCGCGTGTTCTTTGTTACCGGAAGCTCAGCCGGGCTCGGGGAAGCTCTGGTAAAGGA -AATCCTGGATAACGGCGACTATGTCATTGCAACTGCCCGCAAGCCGGAAGTTTTGATCTT -TGACAAAGCCACTTCCACCAACTTCCTCGCCGTTCAACTCGATCTGTTAGATTCCAACAG -CGTCTTGACCGCATACCTCACTGGTGTAGCCAAGTTTGGTCGTATTGACGTGGTCGTCAA -CAACGGGGGCTATGGGTTGAGCGGTCCTTTTGAAGATCTAACCGATGAGGATATCCAACG -ACAAATGGACGTCAATTTCACCGGTGCAGTTGTTTCCACTCGCGCTGCCATTAAGGTCAT -GAGAGAACAGAGTCCTCCAGGAGGCATCATTCAGCAAATCACCAGTATCGGTGGTCGTAT -CGGGATGCCCTACATGTGCATGTACAGCGCCTCGAAGTTTGCCCTCGAGGGATTCACCGA -AGCTGTGGCGCAGGAAATGGACCCGAAGTGGGGAATCAAGTTTACTTGTGTTGAACCAGG -TGGTTTCAGGTAGGTTATGCCGAAAACTTCCTGTACGGCGTATTCGGATTGTTAATGTTC -CTAGGACCGACTTCTTTGCCCGATCGATGGTATACACTAATTCTACATCTTCGGCGTATG -ATCTGGCCGCTGTTAGTGCTCAGATCAAGGAAATCGAGAAATCTCAACGCGGTGATCCTC -CAAAGGGGGCAAAGGCACTCTATCATCTGGCAACCTTGGAGAATCCTCCCAAAAAGACGG -TCCTGGGAAGTGATGCCTATGAATGGTTGAATCAACACATGACGGCTGAGCATGAAGATC -TGCTTCGCCACGAGTCCCTGGCCCGTAGCACCGATATTGATGATTAAGCATTGCTATGAT -GAGATAAAGGGAGGTACTTTTTTTTGTCAACCTCTGTCATACCTGGCTTATTGATATCTT -CATATATATATGAGTGGAAATAGAATTCATTTCAAGGTCTCAATTATCTCAAGTGCCGAG -TCCAGAAGGGGATAATGTAGGATTACCTGATTGGGGTCGGGTGTGCATTTGCCTCTATGT -CGATTTTGTAGATGATCGGCGAGATAAGGGCAATGAAATTGCCTACACAATTATCGGGGT -ATATATTAGAGAGGAGCAGCAAGAGCACAAAAGAGAAAAAAAAGATGGGCCGACATTCCG -GGGTCTTCTGTATCTCTTGTATGGGTGGGTCATACAAAAGATACCTAATCTGTTCCGTCT -CATGTTTTCCCCGGATTGTTAGTCAGATCCGAGGGCCAGACCAACGGTTGGGATCCATAG -CTCATTGGGAAAGCCAAATATGGCGACTAATATTCGCCAACGTCTAGAAAGGGCCATTTG -GTCAGCATATTAATTGTGTTACTAAACTGCAGTGCTCGGCGTATTCCGCCAATTCTTTTG -CTCGACTCATGAGGTCACTTTCCCCGCACGCCCCAGATCGCTGGCTGAATGGCCAATCAG -AAGATGCAAAAATTCCAATCCGCGTCACGTTGCAAGTAACGTTGTGAGAATCTGCCCCCG -AAAAGAAATGAAGCTGCTTAATACGCGACCCGTCATTCGGCAAGGGACCATTTAAATTGC -GGGGGTGTCTGTCCCAGCATTTTCAATCTTCCCAAGTATCTTTCTCATCTTATTAACATC -ATGAGTGAATGTCCTGTTGCACACAAGAAGAGCAATGTTGCCGGTGGTGGCACTCGGTAT -GATTCCTGACTCAATTCATATTACCCTCATACCGCCATGATTCGGAACTAATAATTTGCT -CCAGCAACACTGACTGGTGGCCCAATGCCCTGAAACTGGGTATTCTTCGCCAACATACCG -ATGCCACCAATCCTCTAAGCAAAGACTTTGATTATGCCACTGCTTTCAAGACCCTCGATT -ACTGGGGTCTCAAGAAGGACCTCCAGGCCCTCATGACTGATTCGCAGGACTTCTGGCCCG -CTGACTTTGGCCACTATGGTGGTCTTTTCATTCGCATGGCCTGGCATAGCGCTGGAACAT -ACAGAGTCTTTGATGGGCGCGGTGGTGGCGGTCAGGGCCAGCAGCGTTTCGCACCATTGA -ACTCATGGCCGGATAATGTATCCCTTGGTAGGTCTTGAATAATTGGGGTAATCTTCTCAC -GCTGACATGTATTGTCTACTTCCAGACAAAGCACGTCGTCTACTTTGGCCCATCAAGCAA -AAGGTGAGTACTATATAAACTAGAAACAACTTCGTATCTCTATATGTTTTCACTAAACTA -ATTACGGTACTTAGTACGGCAATAAAATCTCATGGGCAGATCTTCTGCTTCTGACTGGCA -ACGTTGCCCTGGAATCTATGGGCTTCAAGACATTCGGATTCGCTGGAGGTCGTCCCGACG -TCTGGGAAGCGGATGAGTCGGTGTACTGGGGAAGTGAGAACGTCTGGTTTACTAACGAAG -CTCGTTATGAGCCCGAGACTGAAGAGGAGAAGGCTAAGCAGAGCGATATTAAGACTCGCG -ACCTCGAGGACCCGCTGGCCGCGGTCGTCATGGGCTTGATCTATGTCAATCCTGAAGGCC -CTAATAGCAACCCCGATCCAGTCGCTGCTGCCAGAGATATCCGTATTACTTTCGCCCGCA -TGGCCATGAACGACGAAGAGACGGTCGCCTTGATTGCTGGTGGTCACACTTTCGGAAAAA -CACACGGTGCTGCCCCTGCTACTAATGTCGGAAAGGAGCCCGAAGCTGCTGGCCTTGAGT -CACAGGGTCTGGGTTGGAGTAACAAATACGGCTCCGGAAAAGGCCCTGACGCTATCACCA -GTGGACTGGAGGTGACATGGACCAGTACTCCCACCAAGTGGAGTAACAGCTACTTGGAAT -ATCTCTTCAAGTTCGACTGGGAGCTGACCAAGAGTCCCGCTGGTGCTAACCAATGGGTCG -CCAAGAACGCGGACGATATCGTTCCCTATGCTTTTGATTCAAGCAAGAAGCACAAGCCTC -AGATGTTGACCACTGATCTTTCTCTGCGTTACGACCCCGCTTACGAGAAGATCGCCCGTC -GCTTCCTCGAGCACCCTGATGAATTCGCCGATGCATTCCGTCGCGCGTGGTTCAAGCTGC -TGCACCGTGACATGGGCCCTCGCACTCGTTGGCTAGGCCCTGAGATTCCTAAGGAGGTGT -CTATCTGGGAGGATCCAATTCCCGCTCCTACACACGCTCTGATCGACAATGGTGACATCA -TTGCCCTGAAGAACGAGATTCTCTCAAGTGGTATCGAACCCACCAAGTTGATTGCCACTG -CCTGGGCATCGGCTTCTACCTTCCGCGGGGGTGACAAGCGTGGAGGTGCCAATGGCGCTC -ACATCCGTCTCGCTCCTCAGAAGGACTGGGATGTCAACAACCCAGCGCAGTTGCAAGAAG -TCCTCAGTCTGCTGGAGAATATCCAGAGCCGCTTCAACAGTGCTCAAAATGGCGAAAAGC -GTGTCTCCGTCGCTGATTTGATCGTACTTGCTGGATCTGCTGCCCTCGAAAGATCCGCGG -GTATTCCTGTTCCCTTCACTCCTGGCCGCAGCGATGCCGTCCAAGAGCAAGTCGATGTCG -ACTCGTTCGGCTGGCTTCGACCTTTCGCTGATGGATTCCGTAACTACGGGCACTCGACTC -GCCGGGTGCGCACTGAGCAGCTCCTCATTGACAAGGCGCAGCAGCTGACCCTCTCTGCTC -CTGAACTGGCCGTTCTCGTCGGTGGTTTGCGCTCTCTGAACGCAAATTGGGATGGTTCAA -ACCACGGTGTGTTCACTTCCCGCCCTGGCCAGCTGAGCAACGACTTCTTCGTCAACTTGC -TCGACATGAACACTGTCTGGAAGCCAACTAGCGAAGAAAGTGAGCACTTTGAAGGTTTCG -ACCGCAAGACTGGCGCCAAGAAGTGGTCTGCTACTCGTGTTGACTTGATTTTCGGTCACC -ACGCTGAGCTCCGTGCTCTTTCTGAACTATACGGCAGCGCCGATGCGCAGGACAAGTTTA -AGAAGGACTTTGTTGCTGCCTGGGACAAGGTTATGAACTTGGATCGTTATGATATCCCTT -GTTTCCCTGCGACTGGCCGTCCTCGCTTGTGAATAATTCGTCATCATGATATCGTTTTCT -CCAGTACAGTCAGCTGAGCCACAAATGGACGTTTTAGTTTCGGTATTATCTCCATCATTC -AGATAGTAAATGTATTGGCAAAAGTCATCCCATTTCTGAAATGTGCCCTGTTCTGCTTCC -GTATAATGCCCCCGGTTCCTTGACTGCCAGCTGCCGGCCTTGAGTGTGAGATGACATGTG -AATGCTCCTTGTGAGTCCGAATGATCTCCAACTTCGACAGGCCGAACTAGCGCTCCGCAA -AGCCCAACCAGCAGTGAGCTATGGACTCAACCTCCGAAACCCAACCCAACCCACAGCCCA -TTCGTGGGGGCCCATAAGGCTCCCATTACAATTCATTTAGGGGGTTCAACTAGGAACCTG -TAATCGCTGTGTTTTGTCCCTTCTACTGATTGTATAGACTCATATCTAGAATATATATTT -TGAATCACTATAAGCGTAACTTCTAAGTATGAACCAAATCGAAGTAATAAAAGTTAAAGA -GATAAAAAACCTTTCTACACTTTGATATCAGATATATATTTAGATCTACGCGTGAGCCTT -ATCATTTTCTTAATGAACCTCGACACTTTATAACCTTTCTGAATAGCGCGTTTTCTCGCG -TTTTTCAACGTGATTAATATACTTAACTGTCAACTTACGATATCTAAATTAAACAACAAC -ATAATACCCTAAGATGAAGGCGAGAAACGAGATCTATATATATAGTAATTAAAAAGTCAC -GTTTTTGGGAATCTTTGTATCGTTTGGTACTTTTGGGGCCTCAATGGGCCCCCATCAAAG -CCCAGTGGGCCCGAAGATAGGGCCCAAAGCCCAGCCCATTTGCGGGTTGGGTTCTGGGCT -TTGTGGGCTTTGCGGAGCGCTTGGCCGAACGCACGGCCCAAATATAGGAATTAGGTGCGT -TACATGAGAAATCACATTCAAAGGTGGTGATCTGCATTGACAAACCGGGATATTCCACTC -AAAGTCGTAATTTCCGCAAAATCCGAAATGTCCGAATGAGACCGGGTCGGCTTATTCCTT -TAGAGAAGGCCGAAAGGTAAGATATCTCAGTATTAAAGAGGGAGAAGCGCTGTGTAGCAC -TCGATAAAAGCAGAATAAACTGGAAGAGAGAGAAGAAGTAGAAGAGATACCTATCAATCT -CAGGTGCTGTCTCCCCGGGGAACTGGCATGTGCCTCAGGTCACCCAACAGTCACACCTAT -CATTTAAATCAAGGCTACTGATGCTTGGACAGATAATTTGTTTATCGTCCTACCCATCTC -CCGTTTCATTTGCAAATATGACTCTGAAGGTACTCATCTGTGGCGGCGGTGTTGCAGGTC -CGGCCTTGGCCTTCTGGCTCGCCAGACTTGGTCATCGAGTTGTTATCGTCGAACGTTTTC -CAGTGCTGAGAGCTACAGGTGCCCAGATCGACCTCCGCGGTCAAGGGATCGAGGCTATCA -AACGCATGGCCCTGGTCGATGCCGTTCGTGCAAAGCTCGTTGATGAAGCAGGCATCACCA -TCGTTGACAGCAAGGGAAAGGTGAAGGCGACAATTCTCGCTAACAAATCCGGCAAAGGCG -CGCAGTCTCTGACATCTGAGTATGAGATCATGCGTGGAGATCTCGTACGCATATTTTATG -AAGCAACAAAAGACAAGGTGGAATACATCTTCGGAAAAACCGTGGATAGCTTCACAGAAA -ATGACCAAGGGGTAGTCGCCTACTTTTCCGACGGCTCTTCCGACACATTTGATCTACTCG -TTGGAGCAGACGGGCAAGGGTCGCATATTCGAAGGAACATACAGCCCCCTGGTGCTCCAG -ATGAATATCACCAGCTCGGAGTACACATGGCGTACTGGTTTGTCCCACAGATTGAAACCG -ACAGCAACATGTGCAAAGTATACTTCAGCCCGGGTGGCCGGATGATAATGTCTAGAAGCC -ACAATAAGACGGAAACCCAGGTCTACTTCCTACTTATGTCTGATTCCAAGGAACTGCGGA -AGATCCCCAAGAGTTCAGTTGAACAGCAAAAGCATTTCTGGGCCGAAAAATTTCGTGATG -CAGGGTGGCAGGCTGATCGGTTTATCGAAGGGATGAACACTACAGAGAATTGGTTTTGTC -AGAATGCAGTCCAAATCAAGACAGATACCTGGCACTCTGGCCGAGTCGTTCTCCTCGGTG -ATGCAGCACACTGCCCTTCTCCATTGACTGGTTTGGGGACAACTAGCAGTCTAATTGGGG -CGTATGTTCTGGCGGGAGAGCTTTCTCGCAGTCAAACCCTGCCCCAAGCGTTCCAGAATT -ACTCTATGATAATGCGACCGTTTATCGATGAGGTTCAACAGGTCAACATTGGGTGGATCC -GACGGGTGGTCCCCGAGTCTCCGTGGACGATATCTCTCTTTCAATTCATTGTAGGGCTAG -CTTGTTTTCTTCGCATACCGCAGATTATATCACGGTTTTCAGACGATGGGGGTGGCGACT -GGAAACTCCCTGATTACCCGGAGCTGCATGTTGATCAGGATGAGTATCAATGAGAATCAA -GAGAAATAAAATGTTACACGAAACTTCAAGATCATACGTGCGCAATCAGGGGTACATGGC -TTGGCAGAATCAATCTAAAAATCTATGCTCATCTATATTATACCGAGTACTTCATACACG -AGTGTATTTGGCTCTTGAACTTTTAGATAGGCTCGATTAACGTATCATGACCCTTCACCG -TTTTGGATATTGGAGCCGTGAATGTCGATTTGCTTTTGACCGCATATTGTACTTGTATAA -GAGACATAACTTCATTGTGCATACTCCGTAGTATCTTGCCCCGTGCCCCTGAAGTTTTTA -TACATTCTGGGTACGTGGGCGCCAAGCCTTCATATCAACATTAATCTACGGAGTATTTCC -ATTCTCTTTCTCTCTTCGGACTCCGAAATCTATATGTCTCCAGATCATTACGAAGCTCTC -CGTAATCCATACCAATCTTTGGATTCATGCTAGCGAGGCCTGGGCTCCTATCGCGGAGCG -CCACCAGAGAGAAATGTGATCAGTGCACTAATTCCCGGCTGGGTTTCTTTGTTTGAGATC -CCGATAATCCCTGTTTGGAGCACCGATAGCCTCTTAACATTCGACCATTGCCCTAACGTG -TCCAATAAACTGTCAAAACAAGAGAATTAAGGGGCACGGATAGTTGGATACTTCTGGGAT -TTTCATATCGCCAATGACCGTTTCACCTAGATGATTTGCAAGCAATCTACGGTTAATATT -TGTCCAACAAACCGGAAGATCTGCCCCAGATCCCCGTGTACTAGCATCATTGACCGCATT -ACCACATTTCCTCTCATTTATGCGACGACGGATGACTCGCCGGTGGCGTGGTCCACTTCG -AGCGCAACAGTCAACCTTATCCGGCTCTACCCCGTGCCCTAAGATGAGGGGCTGAAGCAA -AGTCTCCGTTCTGATAGAATCTGTCAGCCAGGCGAGACATGTGTCCGAGCTGTGATCCAT -ATACTCGTCTTCTTTCCCACCGGTTGATTTATCCGGATGGCTCAACCCATTCACACCTCA -TGGATACTGCAATGATCAGCCTCCCCGCAGAGGTGGGCGCTGTTTTAAATGCCAACTTCA -CCATCGTCCAAACCATTGCTATGTTCTCAATCGGAGCGTGGACCTCACTGGAAGTGGTGA -TTGGCATATTTGAGAGATTCAGAAAGTATCGTGGGCTATACTTCTGGAGCATGCAAATTT -CGGCCTGGGGAATATTGTTACACGGCATACCATCTCAACTGGTCTTTATGCATACGGCAT -CTCCTACTTCCATGTCAGCCATATTTATCATCGGCTGGATTTGCATGGTGACCGGACAAG -CGGTAGTGCTGTATTCAAGACTGCACCTTGTTGTGGCCGACATCCGCCATGTACGATGGG -TACTACGGATGATTATAGCCAATTTCATCGTCCTCCACATTCCCATGGTAATTCTCTTTT -GTCTTACTATTCGCAATGTATCATCTGCCCGCCCAGCGGCAATCTATGACCGCCTACAAA -CAACAGGATTCGCCATCCAAGATACAATAATATGTGCCATCTACGTCCGCGAAGCACTCC -GAGCTCTCAGGCCCGTCTTCGAAGCCAAAGGCTCCCAAGGTCGAAGAACTATCTATCGAA -TCGTCATTGCTAATCTCTTTGGCATACTGCTCAACATATTCATCCTTATCGCCGAGTACA -AAGTGCACTACATCGTCATCAGCTTCAAAACAGTCGCCTACAGCATCAAGCTGAAACTCG -AATTCCATGCCCTCATGCAGCTCCACGAGCTAACACGCACTTACCCATGCGCTTTATGCC -AAGGAACCCATAGCAGCCCTCGTGGTTCATCCGAAATCAATATCTTCGATATGTTTGCAC -GTAGTCCCCGGGTACAGGATACCGAGATGCAAGTTGTCTCCGGTCTTTCGGGGACCGCAA -GCCCAGCGTGTTCAGCGCGCAGCGGCACATACGATTTCCACGAGGCACTGCGCCAGACGA -CGTCGACTGTAAATTCTGCTGAGTCAAGGGCAGACGCTCGGCCTCAGATCCACTCGTCGG -ATAGGCAGTCTACGGTTGAAATTACATTGCTGGAGCGCCCCAAGTAAGACCAAATTTTGG -TAAATTCGAAGGACAATTTCTTGTACGAAGTATTTCATATAATACACGATGAACTCCGAC -TCAAGATAGTCGGATCATCAATTATCACTGGGCGGCCAACTTTTTCACATTATACCCTTG -TCGTTTTTGTCGTTTTTGTCGTTATTGCTGCATTTTTTTTTTGTTTTTTTGGCTACTTGT -ATCTGTCTTAATTCTAAATTCTTTCTGCTCTGAAAATTTGACGCACTCGCATAATATTTC -CATTGAATTTCTACATTACGTCCATCTCTTGTAAGCATCTTGGAATGCATCATGCATAGC -AGAACCATGTGGTTCTCATGGATCGAGGAATAGCATAGAAGCGAATATCAAATTTCCAAC -ACGGTATGTGTATGTAGTATACTTTCTTCGATAAATCTGCGAGAAAAGCAATGAAAGGAG -ATATACAGGCCTACTCATGCAGGTGGAAAATCTTTCATACTGATTAACACCGAGACTGTC -AATATCTATCAAAGGCAGGACAACGCTCGTGGAGGTCCGAAAATATTAGCGTAAATTGAA -AGAGGGAGGATGAGCTGAATGCTCGCCGAGGTTGAGAATGTAAGCCAGTAAAATGAACAA -TGAAAAATCAAAAATGAATGAAGTGTGATATAAAGTGACAGCATGGCTAGATTTCCCAAA -AGAACTCCGGTGACCGGTCACCGTATCTTTGAGCTATCGAAGCACCTAGTGACAGCCGGA -CCAACCGGCGTTGTTCCATCCGCCAGGCTTGCAGATCGAGCTTCCCCGGATAGTCACCGA -GGTGCCCGAGACAAGGGATGCTAGTACTCGCAAGTCTCGAACGTCGTCTCCGCTTCGTAT -ACTCCGTATAATTTGGTTACTTCGTAGTGTCCTGAGCGTCTGATGTAGTTGCGTCGCACT -ACTGAGTTGATCGGCGAGGTTGAAAGCTATTGAATATATTCATCAAAACTCGCTAGCCGA -ATAACATTATACAGGCATGGCCAAACTCTCTCTAGAGAAGGTACCGGAAACTATCAATCT -CACGATCTGTCTTGTTCGATAATTCATCCTGTAGCTCGGCCCCCACACTCATCTGCTCAG -GCGAACCATACAACCTGTCTCGTCGACGGTTCTCCCACGTGTAGTATGAGTATAAGCACA -GAAGGAAGAAAACGCTCAAGATCAAGCCTGACATAGAGGCTTTGATACCGGTCTTTAACA -AACAGCCATGGCGTTAGTTTCGTGTTTTGCAATGGAAACCCTAGGATAAAAACTTACTGG -GTACGACGGCTCCTCTGAGGCAAGGAAAAATTGAGGGCCCACGATATTTCCCACGCAATA -TGCAACAAAAAGCAATCCACTGACAACGCTTTTTTTCGTAAATCCGGCCACATTAGAGGT -AATGATACTCAACGACATGGGAAAATTGATCGCGTAGACGACGGCTAACGTCAACCCAAC -CATCCGCCCAGCTTGATCGTCGGGGTCAAGCCTCCAGATAAGAATTAATCCGATAATAGA -AACTATTGTGATGAGACACATTCCTAATATGCGAGAAGAAGGAATATAGGTCACGGTGAT -GGAAGTAATCAACAGAAATACGATCTGGGCGCCGCCCTGAGGCATTTGCATAAGAAGGGC -CTTGAGATCAGTGAATCCAAATCCGGCGGTGATAATGGATGTGAACTATAAAGTATGAGC -ATCGGGTTCTAAGGAATATAGGGTGAATACTCACGCTTGTGAGACCACCGTTGGCAAGGT -TCGAAGAGAATGCATTCAGAACCAGGAGCCACGTTTGGGGGTCTTTCAGGGCCTGAACAG -CCTGCGACCATTTGAACACGCCGGTATCCATGACCCCCGTCTTGTTCGTCAGTGTTCGCT -GCACAGCAATAACACGCTCATCGGGCTTCAAGAAAATGGCTTGAGCAGGCGAGTCCGGCA -ACATAACAAACAAGACAAGACCATAAGTTGAGGTGATGGCACCGAGAATAAGGAACAAGA -GTTTCCAGTGTGCAATTGCCGACGTATTGATAGTCCCAATACCGTAGGCAATCAATCCAC -CAATGAGTACAGCAAGGAAGTTTCCGAAAAACCAGGCAGATTGTCTGGAGGTTGTTAGTG -AGTCCTCCCTCCATCTCCATTTGGTATTCTTACCGTAGGGGCTGTTCCTCGCGTTTATAA -AACATGCCCGTGAGCAATGTAAAACCCGGCGCAATAGCCGCCTCACCTACACCTAAGAAG -AAACGGGCTGCCATAAGTCCACTCCAATTAGAGCAAGCAGCATGACACATTAGGGCACCG -CCCCAAAGGAAGCTGAGGTTGTCAGTCTCATCGTCGACGCTATGGATTCTTTAGTAGTGG -AAAACATACACCGAGACTGAGATATATTTCCCAATGGGAAGTCGGACGACGATATATGAG -CTAGGCCTATTGTCGGATTAGTATTTGTGATGAAAGGTAGAGGAAGTTTGAATACATACC -AACTCCAAGCAAAATACCCAAAATAGAAGATCGCAGATGCCCATGAATATTCTGCCCCAT -GCATGTTCTATAGCTTTTTGTCAACATGTATTCCTTGACCAACAGCCCTATGGGAATGTA -TACCAAATCTTCGCGTAAACCAAACAGGGTCGCCTGACTCAGAGCCAGCTTGTCGAGGAA -TTGGAGGAAATATGCCACTCCCATCAACGGGACCAATCTTCCCTGAATCAGTTACCTACC -CATGTAGGATCATTCTGCCGGAATACTCACATAAGGTCAAGCTTCCACAACACTTTCCGT -TCCTCCGCAGGTGTTGCTTGAACAGCGAGCACTTGGCCCAGATCGAGATCCCGACCAAGT -GTCTCCCCTTCCCCCTTCAAACCCTTCTCATCTAGGTTGGTGTCATAAGATTGCATCGCG -AAATTGTTACAAATGCAAGTCCACTAGATGGCGCACATAGTCCCTTCTTATTCAAAGACC -GACGAAGCCGGTATCGCTCTCGACTACTTCGCCACAAAGCTCGTTTGAGCCTACTAGTGC -TAAGGCCAGGTGCTTTTCGTAGTGGGCCCTTTCCTATCTTTCACCGGGAAGGACGTCATA -TATTCACCAATTAAATCGTACGGTTTCATTTTAGGTTATCATCAGAATTTCCCCGCGATG -TTGATCCCGGAGAACAGGCGAGCCTGCCAATCGGGTAATCCATCTCGGCACCCCACAGCA -CAGTCAGTCAATTGAAACCCCGGGCTTGTAATCAGCAAGGAGCCGATCCGGGGGGGACAT -CCACTTCCCCCTCAGCGGATGTTAAATACGGCAATTGAGCTCGCTGTCGGTGATCGGCAT -CTGGTATAGGGGAGGGGTCCAAGCGATGTGGTTATCAATGAACACGAGGCATTTGAGATG -CGCTACGAGGGCCGCTCTATTCCGCGACAATTACCACTAGTCTTCGCACCTACAAGAAAA -ATGTATCGTCATACACCTGCACTCCAATTTCAGAAGGTCTCACATGGAATACGAATGTAC -ACTAAATCCCGCCTAGTATTCAGATCTCTAAGCCTTTCTCGAAATCCGACTTGATGGCAG -TGGCGAAACTTTCATCGACTAAAGCTCTGCATGCCACCACGGCCATTCCTGCCGCACAGT -GCAAACTACGCTTGAAACCCTCTTCGGATCCAGCCCCGGCAGTAAATTCGGGCGTGTGAT -TAACACCATCTGCGTAAATATAGAACCCACCGTGGAAACCAGGTACTTCATATGAGACAT -TGCCTATAGTGGAGACAACAGTTAGCTTCTGGCCATACCCGTGGACTGTAAGGAACTTCA -ACGCTCACCCATATCAGTTGATCCCCCGCTAAGGCCTCCTACCTGCCCTGCGGCATCGAA -GATAGTACTGTGGCCCATGGCATTCATAGCGGAGACGTAACTGTCACAGATAGGCTTGTT -GATCTTCATATCCTTGTACGCGGCCTCCCTAGCAAAAAGGGTTAGCGAAGCGTATCAAAA -CATAAAATCGCCTTATCATACCATTCATATTCCACCGTGCAGCCAGTAGCAGTCGCAGCA -GCTTCGAAGCATTTGAGTACTTTCTCAGTCAACGGTTTGAGCGTCTTTACCGTCTCCGAT -CGAATATAGTATTCAACTGTCGTCGAGGCAGGAATGACATTCGGTCTCTCTCCACCTTGT -GAAATAATTCCATGTATCTTTTGCGTGGGCAAGATTTGTTGTCGCAGCATTGAAATGTTC -ACATAGGCAGATACCACAGCGTCTAGAGCATTGACACCCTCCCACGGAGCAGCCGACGCA -TGTGCGGGTTTGCCAGTGAACGTCACCTCAACCTTGTCGTTTGCGAGATACTGCCCGGTG -CAGGAACTCGAGCTCATAAGATCAGGGGCACCGGAAAGCACCGGGAATGGATGGACCATG -AAGCATGCATCCACGTTCTTGTATGCTCCCGCGTTGATGAGCAGAAGCTTTCCACCGCCA -CCCTCCTCCGCAGGAGTCCCTAGCAACCGAACGGAATAGCCAATTTCATCGGAGTACAGG -GTGTCCATCGTCTCACAAGTAGCTAGGAAGGCCGCGATCGAACTTGTAGCGATCAAGTTA -TGTCCACACGCATGTCCCATGCCTGGAAGGGCATCGTATTCTGCATTGAAAGCCATGGTA -CGCCCACCCTCGCCATGGCTGTAGATCACCTCGAACGCCGTCCCAACGCCATAAGCCTTT -CTATGAACCTGGTATCCAGCATCAAGTGACTCGAAAAGCTCGCAGATACTGTCGTGGGCG -CTGTGCTCTTTCAATGCAAGCTCGGGATTGGACCAGATCTATTGAATTGTTAATAAAGTC -ATGATGTGCTTGCCAAAAATAATATACACCTTTTGATTGATCTCACTGAGCTTGGATTCA -TACTGATCAATGGCCCCACAGACCTTGTGGATGATCTCTGTTTGTCCTTTTACTGGCAAC -ATCTTGTTAAATCACAAAAAGATCCCAACTTAATCTGGAATTCTGGCACTGGGTAATACA -ACCTTAAAAGGGAAAGCAACCAAGCCCGCAGAAATGACCTCGGGAAACGCATGCCGAAGC -CCAACTCCAAAGACCAGACACCAGAGACACATCTCAAGCGGGGGAAGCTTAGACAACTCC -GCTGTGGAGAAGCAAGACCCGCCCATCAATATCATTTTATCATGCAATTTACACCCAGCG -CCCACTAAGTTCCGTAAGTTGACCCGGCAAACAAACTTGCCGAGTGGGCCTGGATAGGTC -TTTGATCCAAGAAGATCCGCATTTCAACAATCTGCAGGTCGGACTCGTCACGACCTTCCA -AATCGAGGACGTTAAACTCGCGCACTCGGACCGCGAACTGATCAGGGTCGGCAACGAATG -TTGTTTCGGATGTCGCTTCAAACATGACCGTGCGCTTCATTTGTATGTTCCCTGCTAGTG -GGGCATAGAGCTGTCCTTTGTGGGCGGTTTCTTCATCTGGGGCGGTCCTCTTGTGCGATG -TTTCGACAGGGCCTATGTCCCTCCCGAGATCTATATCCCAGGCGAGATGAACCTGATGGT -GGAATTTGCAGATATGACGGCTACGTACTTCCATGAGAGGAACGGCCTGGCTTGCGGCTG -TTGGTGGATCGCTGCCGATGAGGATGGGAGCGCTGGGTGTGAAGAAAGGAGTTACGCGAT -TTTCAGCGGGAGTGAGCGAGTCTAGTGCTGGTAGGAATCGTTGGAGAAATTTGAGACCTG -GTGAAGCTGTGGGAGGGATGAACTCGTAGGGGCCATGGTGGATCGTGACGGTGCTTTCCA -TGCTGTGTCGACTACGAACGAAACTGTCTTGTTGGATATGAATGTTGCCAGTTGATTTTG -ATCTCTGATAGTATGTCATGGCGACGTAATATTGTCATTGAATTAGTTACATCATGAGGC -TGAAATTCACAAATCGTGGGCGTCGATAGACTGCATTAACTAATTATTGTTACAATGTAC -GTCTGACACACATGGACATCGCAGTCCTATGGTGGATTTGGTGGGATTTTGCGTAGTGGT -CTGGTTGAACGACATGATATATTGTACAGACAGCAGCAATCGACAGCAAGGCTGGTCGAA -TTTCGCGCGGGGAGGCAGAAATCGCCAGGCATTGGTTTTTGATTGCGCCGGAGGCGCTGC -GCAAATGTTTCGTTGCGCGACCGAGATGGAACCCAGGAGATTTGCATTCCATTGAACGGG -GCGGGGTTTTTCCATACTTGCTGAAGCGAGTTTCTTGTGAAGAGCACACGAAGAAGAACG -TATAGCGGCATCATTTACTATTGGAAAACTTCGCCATATGTGATCAGCAGAAATCCGGTG -AACAACCAAAGCCTGTGTGGTTGCGCCTCTGGAAAATTATTTGAACATGGAGTCCAATTG -ATAAGAGGTTCATGTTGAGTAGTCCGACCTCAGGATGGATGCGTACTGTTGTCGGTCGAC -CGGGTGGATGGACGAATTCTGCGCTCAGAACTAGGCAGACTCTCCACGGACATTGTATTC -CAGCCTCTGCGTCCCCCCTGTTGCGACTCCCGTGGGGCAGCGTGAGTGACATCCGAGGTA -AATTGTGTGGGATCCACGAAGCCAGGATATTCCTCATAATCGGACGAGGTCGGTTGCGAT -CGAGGATTCTGCACAATGTTGACCAGCTTTCCGAGCACACCGAATTCGAGCTTCAACTTA -ATACTGTAGACCATGGCCTTCAACGTGGTCTGCAGATTGAAATACCCAATGTATTCCACA -ACGAGTAAACAAACGTCGAGCAATATGATGACTGCATTGATAGCCAGCAATTCATACATG -ATTCGAGATCGCCGTTGGTCCTTGTCTGGCATCAACCTGAGCAGCTTCACGGTCTCGGTA -ATGTAAATTCCCGAGATGAGTATCTCTTGGGCGCAAAACCAGGCCAGTTGTAACCGCTCC -ATGATGTTGTATCCGTGAATGGCTGGCCGACTGGCAACGTATGCGGTAAAGAAGGTAAGA -ACGGTCGTTGGTATCAGGAGAAGCATCGCGTCGATGATGATCAGCCAAAGAACAAACCGA -AGCACCTTGTTGCTCTGAACCACCAAATGAAGTCGAGAGTACAGGACGATCGATTGGCCC -GGAACCATGACGTAGAACCCAATGGTGCTAATGGTCAAAGAGAACCATAATGGCGCTAGA -GAAAAGAATCCCAGTAGATAGCCAATGGCGTGAGGAATTACCCCGAGGACAACAGAGAGC -AGAAGCGTCCAAAAGTAGAGTCCACGATAACGAGTAAAAGTCAAAAACAACAATATGAAC -AATTCGAGGGCGTTGTATAATGCGATAGATGTGAAGACAATCATCAATGTCCGCACTGCC -ACATTATTGCCATGGTATCCATGGAGGCCTTCAGGAGAATCCGATATGTGTGGAGAGCTT -GATATATCCGTCATGTTCGCCATGCTTGCCAACTCAACCAGCCTGGCGAGTTTGTATATT -CAATGTCGGTGGAAAGGCCGTCGGAAGGGTATTTCACGGCCCGCTTTGTGAGAACCCCAG -TACACTTCGCTTGAATTGTATCAGTTCCACAGCCATCATAGTTGTGCTAGTCCAAGCAAA -TCATTTGCGCCAATTATTTCTATGATGGTACCAAGTAGCGAGGTTGTCGGCTATTCGGAC -AAAGATGGGAGGGATTTCGGAAGGGATTGAACTGGAGGGTATGGTCGCAAGTGCGTTTTC -TGAAGGATGTGGAGGTAGATTTGGAGTTTCCGAGGACGATGAGGTCAGACACTCAAGGGC -TAGAGGGATTGAAGCCAAATGTTCAATGCAGTATATTATAAGTTGAGTCGAGAGAGAGAG -AAGAAATGCCCAGCGTATTTGCCCCCGGGATGCCAAGCTAAGCCGAGAAAGGGAACCCTT -GGGAACCGCCACACTGAAACCTCGGCCTTTTTTTTTTTTTCTTTTCCAATTTTAGTTCTA -TAGGCTTCGGGGAAATTGATTTAAGCGGACGATCTTTTCTCATTTCAAAATAGAAGGTCT -ATATTCTCGTATAACAGGGGTTGACTGAAATTTCAAGgtggctgaaatgactgaagtgac -tgaattggctgaagtggctaaagtggctATACGCCGTAGTGCCCAGAGAGAATTGTCTAC -TTGTATTCCACATACTCCTACACATTTGACAAAGATGTCTCGATTTCTGATCTTCTCAAG -TTTGCGCTAACCTCCAGTCCCGGCATAGGGTGGGGAATAGATTTACTTACAATTGGCCGA -TCGCCTTGCGGAGACCCGGAACAACCGTGCGTTCGCGTTCGTCTATGACGGTCTTAACGG -ACGGTCTAAAAAACTTCCAAATCGTGTTGATACCTCTAATACTCCGGGGTTCATGGGCAT -GAAAATATGGAAATTATTGGAGAATATATTCCTTCTGTGTCTTCACTCCGTGCCCAACAT -CTCCGCGGCGTCGCAGATCTGGCTGTATAGGCCATGATGATTCAGACTTCTGCTTAGCCT -CCGCCTAAAGAAGCGACTGGTAACTCTTGCCATAACTTTGGCATATACTTCCGAGTTGTC -ATTTTTCCACTCTTCACTCATCTTGAGAGCGGAAACCTTCTAATTTCAAAAGATCGATAT -CCTAAATCGGTCCTCTCTGATCGCAATATGGTCCCCATCGGCTTAAAGCATATTGAACTT -CGACTCTTTCCATTTTCGGAATACCCAATCATCTTATTCCTTTCGATACTCGCTATAGAC -TCTGTGTTGCGGGAATATGCTCTCTATCATCAATACATGGAATAGGAGTGTTGTGACTCA -ACGAAAAGTTGGGATTCCAGACCATCAACACCATCCAATTAGTACCGGGGATTACCCTTG -GAGCAAAGGTGCGACAAAGAGCGGTGTTTGAAATCCCATGCCGCGGCCTTGCTCTGACAT -GCAACTAATACTGAATTAGCCTATCCTTCGGCATAGGATCTGCATCCACAATCCCCACAC -ACCTATGAGCCTAATTATGAAATATCCAGGCTGAGATTCCCATCTACCTCCGTCATGGAT -ATAAGAATATACAATACCATGTGAATGTTGTAGTCATATCATTGATATGTCACTTCCAGG -CATTGATGATTGATCGATTACCTGGTAAGTTGTACATATACTGGTGACCCACATATTATT -GGGTCCTTGCAGCTGTGCAGGCTTTTTGATGATCCACTGCGTGAAGAGCGGTATCCAGCT -GACGAGGAACGTATCTCGCCAAGAAATAACTTATATACTTGACGTGTCGCCCTGGCTTAA -AACCAAGTTTCTTTCACACACTCTAGCTGTTAAAAGGCTATATCCACCATGGAGTCTACA -AAAAGCAAATACGACTTTGTCATTGTAGGAGGTAATAATGTACCAAACACACATGCAATC -TCTCATGCTTATCACACTTGAGCAGGTGGACCAGCTGGTTGCGCGCTGGCTGCAGGGTTG -GCCAAATCAGCCCAAAGACCTCTAGTTCTACTTCTTGAGGCAGGTGGGCACAATGATGAT -AGGGCTTTGCGGGTAGATGGGAAACGCTGGACGACATTCATGGAAGAAAGCATGAATTGG -GGGTATAAGACCACACCTCAAGAACATTGCAATGGGCGCGAGCTCGACTATTCTCGTGGC -AAGGGTCTGGGGGGTAGCTCAGCAATCAACTTCGGGGTTTACGCTGTAGGCGCGCGGGAC -GACTACGATGAGTGGGCCTCTGGGGTGGACGATAAGACCTTCGGATGGAAGGAGATGCAA -ACCCGCTTCAAGAATCTAGAGACCTTCAATGGTGCCATCACGCACTCGAAGTATGAGAAA -TATGCTAATTCAGTTGCTTCCGACCACGGCAGTTCGGGGGGCCTGCGCGTCGGGTATGCG -GATGATTGGGAAAGAGACTTGCCACTCTCGCTAGATGCATTCGAAGCCGCTGGACACAAG -CTCAACCCAGATCACAACTCGGGTAACCCCCTCGGCATGGCTGCCACTATCAACTCCGCT -TGCAAGGGGGAACGTACCACGGCTGTTGACTTGCTTTTTGGTGCTCCTGATAATCTTGTG -ATTGTCACAGACAGCTCAGTACAGCGTGTTTTGTTGCAGGGGAGAAAGGCTATTGGAGTA -GAGACTCAGGGCAACCAATGTATGTTTCATTCTGAACTTTCACAATTGACTTCATCTGAC -GTGGTTCACAGACTTCGCCTCCCGGGATGTGATTCTTTCGGCAGGTTCACTCGACACCCC -GAAGATCTTGATGCATTCTGGAATCGGACCAGCAGCAGATCTTGAGAGATTCAACATTCC -GGTAGTCAACGATCTCCCCGCCATTGGCCAGGGCCTCCGAGACCACTACTTTGTTCCTCT -GGTTCTTGCCCGTAACCCCGGGACAAATGACCGCAATTCGTTCTTCCAAGACCCCGCCGC -CATGGAGACTGCATTGAAACAGTGGGAAGATAACAAGACTGGCCTTTGGACCCGATACGG -CTGTCAGATCGGCTGTGGCTGGTTCAAATCAGACCGTATCACCTCTTCACCCGAATTCAA -AGCACTTCCGGCATCCGTACAAGAATTCATGAACCGCGAAACTATCCCCCACTACGAGAT -GATCACCCACTTTCCCATCCATTTCGTGTTGCCCGATATGTTCAAAGATTATAGCTATGT -GGCTCTTGCGGCACTCATGATGAACGAACAGTCCGTCGGCGAAGTGCGCCTGCAATCCCC -CGACCCAGATGCTCCGTTGCTCTTCAACCCGCGGTTCCTCGAAGATCCCTTCGATCGTCG -CGCCTGTATAGAGATTTACAAACACCTTCTGGAAGTTAGTAGACAAGATGCCTTTGCGAA -GGACACTGTGGCTACCCTGATTGGGCCCGCATCTGATTCCGATGAGGACATCCTCCAGTT -CTGGAAGGAGTTTCTGTCTTCTACCTGGCACATGACCGGCACAGTGAAGATGGGCAAGGT -CAATGCCAGTGACGCTGCTGTTGACACGCATTTCCGCGTGCGAGGCATAGAGAATCTCCG -CGTTGCTGATATGAGTGTCGTTCCTGTTCTGACTAACAACCACACACAGACTACTGCTTA -CGTTACGGGTGTAACTTGTGCTGAAGTTCTTATCAAGGAATATGGCCTTGATGAAAAGTA -AAGGACAGTTAGTACCTGCATCATAGTATCTGCAGATCAATATCAACAATCTTGATCAGC -CAATGCTTTGCATTTTCGCGCTGTCACATCAAATTTTCCTATCTCAGTCTATGTTGTTGC -ATATTGTCCACGAAAACACCAGGGCTGCTTATCTGCTGCATTGAATATATCGCCAGGGCG -ATGTAGAATGGAACAACTTCACTCAAAGCCTCAGTTTGCCTTTGCCTGTGAGAACGGACA -TGTAGTCACACCTCTTGCTTTTATTACCTCGATATCGGATATCATCTCCTCTGCTCCGTG -CTCTTTCTGCGGCTCCGGCCCGATCGTCATCAAATCGTAACCTCCCGGTTGGTACCGGTC -TCCTCGCGACCCAGGCACCTTTCCTCCTAGTGCTCTAACAAGCATCGCGCCGAGCCCCCA -TTTTGACGAGAAAGTTGGTCTAACATACCATGGCTGCAGTGTTTTCCTTTGAAAGTTATA -TAGGTGGGTCTCTTGGTTTGGCATGTCGTTCACTAGTTTAGCGGCACTGAACGAAGAGCG -TGGAAGCGCAAGATATCGAAGGAATATTTTTCGTCCCTCTCGGATGAGGGCCAGTAGGAA -CCAGAGAACTGGGCCTGGGGACTCAAGACTATAGAGGTTAGTGAACAAAGTGACTTGCAT -AGCTGAATAAGAGACAAGATTTACCAAAGACTCCTTCTCATAACATCGTCTAAGTCATTA -CCCAGCATTTTTCGCACAGTCATTCTGATGAACCCAGGAAGCGATGACAATGCTGAATCA -ACGTAGACTCGGACGTATTGATCATTCGTCGCCGTGGGCTTTGCCACTTCTTCTTCGTAC -CGGATTGTCCACTCTATCAATTCGATCGTGAAATGCAGGCCATTTTTCCATCCCTCGGCT -TTTGAAGGGAGGACATCAAATGGGATACCCATGTCTTCTCCCAAGTTCTTGTGGAAGATA -CCAAGAGCACACTTCTCGGTATCAGTAAGCTCGCGCCATTCATCTCTCTCAACAACATTC -AGGATCTCGGCAAGACCGTCACCCAGAGTATGGAGGAGATCATTATCCGTGATTTTGTTA -GCACGACGGTAACGAGCATGGCTAACAATGCCTCTGATCAGCGAACAGAACAAAAGACTT -TATATTTGCCTCCACATAAAGGTAACTTGGGTTCAGAAGTGTATGGAACATACAGAAAAT -TCATTCTGGCCACAGCTGACAAATAGCGATCAGAATCCCTTGGTTTAGACTGCGCCTCTC -GCAAGAGGATTTCCGTATCAACAGCGCGCTTCCCCGAATTGCGCTTATTGTTTTGGCCTG -TAACCGCAAAGAGTTTGGACATAGTTGGAATACCTCCAGCCTAAGGATTATTTGAAGTTA -GCGGCATTCTCCGCGGCCATGATTGAATCGGTGACATCTCCCTGTCGTACCTTTAGTAGA -GCGATTTTACGAGCTTTAGCAAAAGCATGTGGAAACTCGAGCTCTTGTAGCTCGGTCAGG -ATAGCATGTGCCTCTTTTACTGTCATCTCCGACAAGTCTTTCTTGCCAGGTGCAAATGGG -GCCTTGATTCTTGTCATGCGGCGGTACCGAAGGAGGCTCACTAATCCTATGTAGCTGATG -AAAGCAACGGCAATCGTTACCTGATGCTTGCTCCATATTGTACAGAGCAGGCGAGTGAGG -TCCACCATTCTGTCAACGGATACTGGAAGCGTTCGTTCTGTGTCACTGGTAAGCATTTGA -CGCACCAGTAGCGTGTCCTATTTGTCAACTTACCGTCCCAGGCTTGCAACTGTTAGTAGG -GTGAGGCTTAATAATTGTTTTGCGTTTGCGATGCCGGATGTCGAAATACTTCAAGATGGA -GGGAGATGTTCAATATATATTCTATGTGTGGCATGGAAGAATACTCTTGAGTACCGAGGC -CCCTGGTTACGGTGGATCACCAACCCAACAAAAAATCCCATGGTGAGATTTGTGAGTGTG -AGTATCAATGAAATTTGTATTGTTGGGCTTGCAATTCTCGAGCCAACCATGCTGAGACCG -TTGGAAACTTCTGATGGGACCTCGGAGGCATCACGATGATAATGTAATCAGCAGGGCTCC -ACTAACTAGGCACCTCCAAAGCTACGCAGCTGGGAAGGATTAGAGCCTACACATGTACTT -AGTCAAAATGAACTAGATGTATTCATTCGCTATGCACTTATTACGAAATGATCATTCAAT -AGATGTGGAGTCGGTCTCTTGTTAATATCTGGTAGAGTAGGCTAATCAACCATACTTGCT -TGATTTACTTAAAATCAGTTCGTATGTCATTATCATTTAGGACAAATTCCGCATTAAGGC -CGAGCGAAAGGTCGTTTCAACATCAGATCGCCGAAAACCGGGCGCTCGTCCTAATATAGC -CACCCTGCGAAGAAGTCCTAGCACTGTTTTTAGAAATAGTGTTTAACATGATATCAGTGC -GAAACTCACCTATCAAGAAAGATTCAAGGTCGAGTACGCAACGAGATCGCTGTTTAGGTG -ATGCCGAGATTATAACATCTGTGAACCGTGAGTGGAAGTCTTTCTGGCAAGTGAGACCAA -CGTACCAGTTTGGGTTTTGGAGTACGATCTGCGTTGAGTATCCAATAAACCAGATTGAAT -TTCAGATTGACTGGGGCTGGATGCCAAATTATTCACAGTAGGAAACGAACCAGGCCCATC -CAATTCCACACCTTGCTCTCTCAGATATCCTTCTACATCGTGGCAGTCAAACCACTGGCC -GCCGAACCCACAAAGTTCGAGATTACTCTGAGTCTGGCGGTCGGAATTCGAGTCATCGGG -GATCTCAGGCATGGGTAGAGTACCGAGGATCCTTCGAGGGAGCCTCATCTTCGGGGGGTA -GATTGGATTCCCTTCTTCATCTTTTGTCGCATAATGAGTTCCAGCTCCCCCAATACAATA -GAATGGTAGTGTAGAAATCTCCAATGAGCTGCCTCGTCCAGAAGTGACCAGGTCCCTAAA -ATAAGGGTGCATCTTTGCCCTATCCCTGAAACACGGGACAAGTCTGAATACTCGGAAGAC -CTCAAGCGGATCAGACCGTGTATCATTGAATATTCGGAAGGCATACTCCAGACTGGATCG -TTTGAGTCTTCGAGTGAAGGAACTTTCCAGGAATGAATAAGAATCAGGAGTATTGGCTCC -ACCAAATCTATCAATAATCTGCACAATGGGTTGGCTATGATCTAATTCCCTCTGTTCATG -TATGGGAGGTGGGGATCGACAAAGGACGCTGTCTCCGGTTCCTTGATATAAACTGACCTC -GATTTCCGGGGACGTTTCAATGCATGTGGAGCTTGACTCAGTAATCGGCTGATCATCATG -AATAGTATCGTTGGAGACACGTTCATCAGGCTGGATCTTCTTGCGTGGTTGCGGAGTAGA -CTTCTTGGAAGTTTCTCTCAATGAATTGTCATCGACACTAGTTCCCGGAACAAAGTCCTC -GGACGACGCCAGAAGACCGAGAATACTGTCAAAATTCCTGAGAAGTTCGTATTGTGTGTC -TTGCAAGCTAGACTGAAGGGTAACTCGGTAATCTGCGAGCGACTCTTCGATTCTTTTCAT -CTTATTTTCGAGGAAGGCCACACGATTCTGTGAGTGTTGAAGACTTGCTTCTTTCTTCAA -TCTGTAGGTCCTTTGAGCACGGCGAATTTGGCTTCGCCGGTCCTGCGGGGGGCATCAGAA -TACATCTTCCCCACTGTTTTTCTATTCCATGGGGACTAAATACCTCGGATAGTATAGCTT -CGGGTGCATCGAGCCTGCGCCTTCCCGCACTCCTTGGCGGACGTTTTGTTGTTGAGTTTG -GCATATTGATTAACCCGATATAGCAGCGCCGTAGAACAAGCCACGGACTGTTACTATTAG -CCCCAAAAAGCTCTGTGTGGGCAGGCGTTGAGCCATGAAGTGAGTACATTCTCGTCCTCT -ATGCGCTGAAAAGGCGATGTGCTGATAATGTAATCCGCCTCGGTCCCCACTAGTGGGGTA -GTTGAGACGGATAAACCAATTGGGTCAATTGGTTGGGGTTCCATTAAATGGCCACTGTCT -CTCGCCGATTGTTGAATTTCAAGAAAAGTAAAAAAAATAAAGGAAAAAGAGAAAAAGAGT -CCTCCGTAGTGTCGCGGCCACGACGGACAATGAAGATCTGCTGCCCCCCCCGTCCGGAAG -TATTTCCGATGAGACCTGAAATTTCGGATATCTCCAGTCTATAGTTATTTACCCCAGTAT -AGCTATACTGGGTGAAGCTCGAGGGTATATCTAAACTGCATGGTCGGAGTTATCTGATAC -TTGCGTTTAGGTGAAGCGTGGAAGCCCTGCGAACTATATCATTTGGACACAGCGGATGTA -TGTGGACTGATAGGCTGATAGGTACATGTAGACGTCTAAGCGCTTGGCCCCAAAGGACTG -TCTCAGATGGCTTCATTTCAATGGGCCTGTCTAGACAAGCCCTGAAAATACCAAAGTATT -GAGTAGACTACGGAGTACAGCTCACCCAAGCCCCATAGCACAAAACAGCTCAAGCGTCAT -GTCGGACTGAAAGTTCACACAATAGGCAGATAGGAACCCTCACCAGGCGAAAGCCTGCTA -TGCAGGTACGTATATCCACGGCTTGTGACCAATGCGTTGGGCAAGTAAGCAGATAGATCG -AATTGAGGTATGAAACAAATGCGGGATTGATAATGAATTTCCCCCATCAACCTGCCATAA -TGGAGGCAAGAAATCTCAAAACACCACCGGGTCGCGAAGCCGAAATCAAGAGGATGGAAT -TACTCACTGCGCCACACACCATTACGCAAAGGGAAAACATGACATAATAGTAAAGAAAAA -AGACGCAGGAGATAAAGCCAAACAGGCGAGCTAGAAATATTGCATGTGTCGTCAAGCATA -ACGTAACACCGAAGCTGAATAGCGCATAAGCAAAGAAAAGACACAGACGCGGGTATCGAA -AAGTGATCGCAAAACACAGGCAATCAAGACGGCGATGAAAATGAGATGGCGATTAGTTGA -CCATAGGAAGACCGTGTGTAATCTCCTGCTTGTTCACGCCGTCATTCCCGTCCTGATTAT -CGTCTGTGCGCACCCGCTTATTCTGCTGGAAGTTGTTGTTGTTCAGGAAGGTCGTGTCGT -CGGGATGCTTTCGCATGCCGAGTGTGGCGGCAGCAATGCGGTTGAGGGAATTCATGTCAT -CAGAGACAGAACCGGCGTGCTGGCCCTGGGGCCCCGGAGTAGGCACCGACCCCGATGGGC -TGGAGTTCTGAACTCCGGCAGAGGCAAGCGCCATTTCAGTGGTGCGGGGCTGGCTCAAAT -CGATGTTGGCTGGCAGTTCGGGGACTTCACCCTGGGCTTCGAGGAGACGCGATTGCAAGT -TGATGATGTACTCGCGTAGCTGGTAGTTTTCGCCCACCAGCGACTTGACCTGTTCCTTGA -GGGGGTCGATGTTCTTGACCTCTTCTTCCAGCTTGCGGATGTAGGTTTCCTTGCGCTGGC -GGAATGCGCGCTGTTGTTATGATTAGCGGAGCACGTTGTGCACGATATCAAAAAGACATA -CCTGGGCAGCACGGTTCTGGGCAGCACGCTTGGAGGTGGACAGCTCACGCTTGCCATAGC -TCTTGGACCCGCCGTCAGATGGTGGAGAGCCCTGGGGCTGGGTGGGGGTTTGGGGAGCGC -CGAGCATGGCACCGGAGATAGCAGGGTCAATGTTCGCCTGGTCACGGGGACTAGCATTGT -TGGGCTGCAAATGCTGTTGTTGGCCTGGAGCACCCTGGGGGCGACCACCCAGGTGGTGCT -GAGCGGCTAAAAGCTGGTCATGGAGGACTATAGAATCATGTATTAGCAATCGATCACAAT -CACGTGCGACGTGCACCCTAATTCGTCCCGGTGAGATAGGGGACATGATTGGGGTTAGGT -AGTTTTCAACCTTGAGCCCACGCATCCACAAATCACTCAAGGTAATGGCAATAACACATC -AAGAGCGCTGTAAGCTATGGTCACGTACCCTGATCCGCATGGTCCTAAAGTCAAACGTTA -GTATGATACCGTAGAAAATAACAATTGTGACGCGGGGAGACTAGATGGAGACTCGACGCG -TCAAAACGTCGAGCCCCGTTCCCACACGTGGATAACGGGACACGACTGCCTTGTATTCAT -TGAATCAAGTAACCAACGCGGGACCTCATATAGATCTGATGACAAACGCGTCCAAGGTGG -TCTGCTTACCTGAGCAGAAGTCTGCATGCTTGGATGCGGCGCTGGCGCCAGTGCAGGCTG -CATTGATTAAGAAGAAAGGGAATTGGTAATGAAATTGGGAAGTGACTTTGCACAGCACAC -TTAGTTCCCACAAGAATTGAGAGCAAAGAGAGCTGCGTTGCTGATAACAAAATACAGCCA -AAATGAGGTAAGCAGTTACAGATCCGATGCTGTAAGCAAAATGATGGCGGCACCAAGGTG -GAGGAGGAGGATTACTTAAGGATTCTGAGTCAGAGGATTTGGGGCGATCGTCGATTTATG -TAAGGAGTTGGTAAGCGAAATGTCCGCATTGAGCAGGCGGTGGACTAGGCGGTAGCTTTT -ACCCCGGAGTTTATCTCGCACTTTTTGCCGCCCGACTTATTTTGATACCGCATATACAGG -CTGTGATTGGGTTACACCACAATCATATGGTTACTCGTCATCCCCCACCAATCGTATTCT -ATCATTGTAACCATGACTTCGGGAACATGTAGGGAGTACAGTACATAAGTGCCTAAGTTT -TGCTGACTACATTCCGCCTGCATCCGGCCCCGGAACGGGCTTACTTATCCCCGTCCGATT -TAGCATGACATCATATCTATTGCCTATCCCGACATTTCTGCCTGAGGCGACACGTCTGTC -TCACAAACAGGCACCAGGGTAGGACAAGCATTTCATTCAATCGGGGACAAGTTGAGGCAT -AAACCGAACTAATATGAGTGATGATTATGTATTTTCACATCGTGTACTAAATGAAATATC -TCTAATCCCGTGGTCTCGTATCCACCCCCAAAACACAGTCAGCCACTTATGCCCCTGCAC -CTACATCATAACTCTGTACAACTTTGAAGTCCTACTGTTGCTTCTTCATGGTACTGTTGC -TAGGCTGTGCCTGTATAATCCCAGCTCCAATTCTCGCTGACTCCATCCAGCCCCGCTGAT -GCATCCAGAAACGGGCTGCTGGGTCGGCGCCATGCAAGGATGTACGCCGAGCATCATCCT -CAAGATCCGTGCGTGAGAGACCAGTTGACTCGGACATAAAGGTGGCATCATCTTCATCCT -TCTCCTCATCGGCCTCTGTATTGGTCGTCGACTCAATATAGCGCTTGCGCACGAGATAAC -GCTCAGCGCGCTCCTTTCGAGAATTAGGAGGTTCGATTCTGGCCATACCAGCGCGCACAG -CATACCGCACAATAAGATAAAGATGTTCCGAGAAGAATATTGTCAAAAGCAGAGCCCAGC -CTTTGATATCTGTCGGCTCGCCATTGGGCCCATGATGTGCGTTGCCACTGAACATGTATA -GTAGCGCAGCGCTGGTGATACTTCCGACCCAAGAGAGAAATCCTAAGCTGTCGAGCCAGG -GTCCAATTGTGTCGGTCCGCAACGGCACAGGTCTGCGGAACTCCATACAGATCTTGAAGA -AGTCTGATCGCAGCTCGATCCAGTTATTCACAAAGAATGAAAGGGGAACAAGCGGCCATG -TTGGAGAGAAAAGAGCCAAATATCCAAATTGAATGCACATTTCACGCAGATCATCGGTCA -CGTCGTACTCGGAAAGCTCGACTTCGTTACGCACACGCGTGAGGAACTGCACTTCCTCGG -CCGGATCATCGAAGGACACGACGGGTCTTTCGGGCGATTCTTTGTCGTCATTCCGGTCCA -TCTTGCCATTGCGCTCCCTGTTGTACTTCTTATATTTGCGGAGTGCATGCTGCTTGATAT -AGGGCACGATAGTCTCCATGGCGAAGCCAACGATCTGGGCTGTGACAGTGAAGTAAATAA -CCTGGTTGCGCAGACGAGCTGGGTCGATTTGGAAGTGCGATTGTTTTGCAACGGCATGTT -CCTTCGATACGAAAGGCCGAACTGTCAAGCGGAAGACATCGAGGTAGGGAACAATCCGGC -TAGCAAATGGCACGTAGACAAAGGCCGTCAGGAAAATTGGCATATAAGACGTGATGAAGT -TGATGACAAAGACCTTCTGAGTCAATGCAACATCATACGCATCTTTTGTCTCATGGTTTT -CGTAGTCATTCAGCTTCGTTGCGATAGATGTCAAAATCGCGCTCATGGTTGGCACAAGCG -CTGACACAAGAATAGTTGGTATAAAGATCTGGATGGTAAGCTTTGGTCACAGGGAATTTG -AAGGATCACCATACCAAGTATGTTTTCAATGGGCCATTGTAGAGCTCCGAGATGAAGATC -TCAATGGCGAAGCAAGTCGCAATGATAGCGCCCAAGCCAATTGCTGCAAGCAAGGCGAAG -GGGATTTGGAGTAGCTGCCGCTGCATGCGCGTAGTTGCCGGGAACACACCGCGCATCTCA -CCAGTACCCTCATCGCAGACCTCCTTTTCTGCCTTGAACTCTCGTCGTTTCGTCCGAACG -GCTGAGACGTTCTTGGTCTGCCACCTGCAGCTCAGATCAAGCTCCTGGCGCTTCCAGTAT -TCCACAAAAATGATACACCAAAGGCAATTGACGATAGTATAAATAATCGAGTACCCGCCG -AGGAGGAACCAACAAGAGAATCCAAAGACCGCCGGGAAAATCAAGAATCGGAAGTATGAC -TGCAGGAACGCGAAGTAAAATCCAACCTACCATATTTGTTAGCCAGACCGAAGGATAGCA -ACCGTTTGCTCTACATACACTTTCTCCAAACTTGTTCCGAATCTGATCCAAGTCGTCATC -TGAGAGGAAGGTCTTCGAGCTCCAATCTCGTATCCACTTTTTGTTTGTATCGACATCGTG -AAGGGGGAATATAGCGGTGACATTCTTCCATTCGCCATGGTTGGGAGTGATAGCAGCGCC -GCCCTCTTCCTTGGGAAGTGTGATCATCTGGTTGATCACCCGTAAGCGCTCAGCCTCGGT -CTGGGGCTCGGTCGAGTTGGATGGGGTGGGCTCAGCATTTCGAATACCATAAAGCCAGTC -TCGTACTCTGTGGAAATACAAGTCGAAATTAGCACATTGGCTCTTATAGATAACATCATT -TATCACTCACCGCGACCGATAGACCGCCCTTTTCAGCCGCTTTTCCGAAGCTTTGACGAA -AACAAGCAAAGAGGACTCGTCGCCCTGTCGTACTTCTACCTGTAGACCAACTTGGGCCAG -TGCCTGAAGCAGTTTTTCCAACTGCACAATTGCCTGCGATCGGTCTGCGATTGTCAGAAA -GTGCCCCGCCACATATTAGCCCTGGAGAGCTCTCTCTTCTACTCACCCACGTCTCCGAAG -CTGTAACGGATGACATAGTCCACATGGAGATTATACTCCTGGACATTATCTGGAATTGTC -GCCATCGCTGGTGTGTGGAAAGGAGAAAGAGAGTGGAACAGACAAATCGAGGTGATCTGC -GGGTCGATGCCGTCACCTGGCGCTAAAGGGCTTAGGGGCGAAGCCATCAAGGATCAGACT -GCAATTTGCGACGCAATTTGGAAGTTCTTCTCTATATTTAATTGTACTTATGACGTAGAA -TTGATATTCTTGTATTTGGATCGATTTTATTTCCATTTTCAGGCACAAACATTCAGCCAT -ATATGTGGCTGTAATCATAGATATATGCTAAATGTACATGTATAGCGCTTTTTTGCTCTC -AATTTGCAAAGAGCTTTACTATGTATGTAAATCCCAAAGCGATCATTCCGATATCTTTTG -TATACTCAATCATCCCTCATCCCTCTACGCATCCTCCGCCAAACGAGTCAACGCATCGCC -TTCAACCATCATCTTCATCCACTCATCCATACCGCGCGCACCAACCTGTTCATAGAACTT -AATACTAGGCTCGTTCCAGGTGAGCACACTCCACTCCAATCGCCGACCTGACACCTCAAG -CACCTGCTTAGCAAGGTACTTAAGCAACTTGAAGCCGTATCCGTTACCCCGCGCGCTAGG -CTGCACATACAGATCCTCGAGATAGATACCAGGGGCAGATCTCCAAGTTGAGTAGTTGTA -GAAGTACATAGCCATGCCGACGGGGGTTGGGTTTGGAGTATCGGCAGTAGGCGGCGGGGT -AATGAGAGCTGTGTAGACAGCGCCGCGTCTAGGCGGAGTATCGGGGAAGGAGAGGGTTGC -TAGGAGGGATTCTTCCGTGGCTTCGACTTCATGAAGGGCTTTTTCGTAGTCGGCTAATTC -GCAAATGAATTGGAGGATCAGGGGGACGTCTACCTGGTGTTAATTGGGGTCCTGCGTCAG -AGGGTCAATGCACTTACCCTCGGGGGTCGCGAGACGAATGGTGGGTTGGTCAACCATAGT -GTTGGTGAGAGGACGAACGTAATCCATGGCGAAGTCCAGAACTTCCAAATCTTTTTTTGT -TCGGACTTTTGTACTTTGCTAGTATATGCGCGCCTATATTTTTTGCTAGGCTTCGCGATA -ACCCGGGGATTTGCTATACCGGATCAGGCAATATTGCCGATTCTTCTTTTGGGGACTTCA -ACTTTTTTTTACTGGGATTTTCAACTTGGCTCGAGGGATAATTTGCTTTATCTCGATACT -TTGTTGACAGCCAGAGGGTTATCATTATTGGACAGTGTCATTGTTTTACGTTGATAGAGG -AACATAATAGTATCTCTACAGAATGATATTGTACAGAGCAAGGGACTCAGATATGCAATC -CACGCCCATGGGTATAAACGCCAATTATTTGGACATGATGGCCGACATCCACTTTTGCTC -ATCGGTTCTGTGTTTGTTAGTGACCGTTGTTGGATAGAAGGTTCAATTCCGAGACTCACC -GATCGAAATGGAGCAGCATTCCTAACACAGGGATCAGCTGTTTCTGATAGTTCTTGTCCC -TCTGTTCCAAAAATTGGAGAAGAACATTCTTCAAGTACATGTAGTCAATGGAGGCATTGG -TGGGCGGACCGGTAAGAGACTCGCTCCTGCCAGGAGATACGACTGCCCGGCGAGAGCCTG -AATCTAAGGATGACCGCGAGGATTGCCGAGACGGGGAGCCACGCAGGCGAACATCCTCTC -CGAGTGTCTTGTGGGATCTTCGAAGCTTCTCCAATCGGCTGTTTGTCTCTTCCACCGAGC -GACGAAGCTCTGCCTTGTCCTTTTCGAGGTCGCGAACTTGCTTCTCGCTTTCGTCGAGGG -CATCGCGTAGACCAGCCATGGCTTGTCGAATCTCTGTCACTTCCTGAGATGACTTTTCAG -ATACAGACTCGAGCTCATCGCGACGGCGGCGCCACTCTTTTTGCGACTGTTCAAGCTCCT -CTTTATCCTGTTCTGCCGTGCGGAGGGCGCGCTCAGCTTCGCGGACCTTACCCTTGAGTT -CTTCCACCTCGCGAGCCCGGCGTCGACCTTGGGTGCTGGCCTCATCTTCGACACGGTCTC -GCTCTTCGATAGCAGCCTCCATGCGTTCCTTGAAGTCTCGCACCTTGGACTCCGAACGAC -CCTCAATATCATTGAGCAGACGGCGCATGGTCTCGCCTTCGCGGGTTCGTTCGCTCAGCA -GACGGTGAGCATCAGCCAATTCTTCATCCAAGCTCTCGCATCGCTCACGGGCCTCCTTCA -TTTGCATTGCAACTTCCGTGGTCTGGTCCCGCATACTGTTCATCAAGCTCTGGGCGCTGC -TGTGCTGAGCTGTCTTTAGCTGGATCTCATCTCGGAGGTTTCCGAGTTCCTTGTTTAGCT -GAGAAATCTGTTCATCCATCTCCCGCATCTTTGCACGAGTACTCTTCAACGCGTCTTGGG -CCTTGGAGAGATCGCCAGTAGTTTTGTCTCTGGCATTTATGGCATCTTGCTTTTTGCTCT -CAGCAGAGCGGAGGTTGGATTGAGCAACAGTAAGGTTCTCCTCAGCCTTCAATCGGCTGT -CGGTCTCCTGTCGGATCTTCTTGTTGAGAGTGCCTACTTCTGTGTCACGTTCTGAGATCT -TCGACTTGGAAGCCTTCAGCTCAGCACGCAAATCCTCATGCTTACCCTCAAGGGCCTTCA -AGTCGGAATTTTTGCTGGTGAGGGTTTCCTTTAGGGTTTTGAGCTCCGAAGACTCGGCAC -GTAGGCTACGCAGCTCGGGTTGAACCTTCTGAAGAGTTTCACGAAGGTCTGTCAGGTCTT -TAAACCGAGTGGCAGCCAGCTGCTGGGCGGCGGAAAGATCAGTCTCCAGAGTAGCGGAGC -GGACCTTGAGGTTATCGAATTCCTCTCTGAGCGAGTTGTGCACCTTCTCTGAATCAGCTG -AGGTCGAAGCGTTGCTGGTGCGGAGTTCCACCAATTCCTTCTCGAGCTTGTTGATAGTTT -CTTCAAGAGACGACTTTTCTGCTGAAAGCTCCTTGATCTTGTCCTTGGCTGCCACATGCT -CCTGGCCGATATTGACAAGGTCATCGCGCAGGGACTCGATCTCCTCCTTCAAATTGTCCT -CGCCTTTCAGCTTGGAACTGAGACGGTCGATAGCGGCCTCCTTCTCGGACAATTGCTGGG -TGAGGGTTTCAATCTTTTGCTTGAGTTCATCCAGCTCTTTGGAAGCTGTCGCAGGCACCT -CTGGAAGCGATTGCTCCGCAGGAGCTGCATCAGCCGGCTTAGAAGGTTCCTCCACCTTGC -CACCcttcttcttcttcttgttcttcttcttcCCTCCACTACTACCACCAATAGCTTGAG -CAGGCTGTGTTTCTTCTTCGGTAGGAGCTGGGGCTGGAGCCGGGACACTAGGCGTGGGTT -CCTCCTTGACCGGTGCCAGGGTCTTGACGACCTCATTGAAATCGGCCACCTCTCCAGCGA -AGACTTTCTCCTTAGCAGAGACCCACATAGCGTTTTCTTTCAAGTTCCCATCCACAAAGT -CGAAAATATCTTGGAGTTTGGCAGAACCTTTGCATTTCTCGTCGATTTCACTCTTCAATC -TCGAAACGGTAGATTCGGTGTCCTGAAGCTGAGACCGGAGACCGTCGACTAAACCCTGGA -GCACCACGAGCTTTTTTTCGTTGGATTTCTTTTCCTCCTTAGTGGTCTCCAACTGAGCAA -CGAGGTCCGAATCAGGTTCGATTCTTTCGGAGGCTTGTTTCTGGAGCTTGTCAATTTCCT -CGCTCTTCTCTTTGAGCTGCGACTTGAGCTTCTCAACCTCGGATGTGGTTGATCTGAGAG -AAGACTCCGCAGTTGCTAGCTTTGCCTCTAAGTCGTCAACATCAGCCTTCTTTGAGGTCT -GAAGGGTCTTGATCTCGGCACCCAGGTTATCCTTGGTTTCGCGCAGCTCGACGAGCTCGC -GGGTGGCTGTCTCCAAGTTCTGCACCATGCCTTCAGTCGACTCGCGGGTGACAGACATAT -CGCGCTTCAGCTTTTCGACCTCGGATTTAAGGTTCTCGACCTCCTCCTGCTTTTCCTTCA -GTTCGCCTTCCAATCGGGGAACTTCATTGTCGAACGAGAAGAACTCTTCACTGGCCTGCT -CCTCGGTTCCATCTGTCTCCTTGGGTGTATCGCTCTGGGGTCTCTCCTTTTTAAGACCAG -TAACCTCATCTAAAGCCTCCTTGGTAGCCTGTTGTGCTTCTTCGAATTTCTTTCTGAAAT -CTTCTCGTTCTTCGGTGACACGCTTCAGCTCTTCCATAACCATGTCGCTTTTCAAGGTTG -ACTGGTTGAGATATTCCGTAAAAGCTTTAGGTTCGCCAATCGATGTAAGCGGGGTGTTCT -CGCGCAGGGCAGCTTCGAAGGGTTCGATGGATAGGACGCGCGAGTGAGCGGTACGGTAAG -CCTTGAGCAATTCTAATATGTTCAATTAGTCTTGTGCCCTACAGTCTGGTTAGGCAGATG -CATACCATGGTATCGGGACTCCAGCTTATCAAGCCGTCGGAGTTTGGCGCGAACATCAGG -TGGAAGCACAGCCGGAGCCTCCTTTCCCTCTGTCTCCTGTCCTTTCTCTTTCTCGGCCGT -TTCCGCCGATTTGCTCTGGGCCATCTCGTCCTCTCCCTCTTCGGAGGCCTTTACTTCGGA -GGCAGCCTCTGGTGTATCTGTTTGAGGAGTCCCGGATCTGCTGGATTCATCGTCGTCGCC -GATGGTAAATTCAGCCTCAAACTCCTTGGGGTCAGGTCCTCGAACGGGGGTGCCGGTGCT -ACGTCTTGGGAGGGAGCGCCGACTCGGGGATTGGTTGCGTCCAGGTGGACGCCGAGCGGA -GTTGGAGCGCTCGATGCTTTCTTGGGCGGATTTTTGACGGGCTTGTTCCTCTGCGATGCG -AGAGTCGATGGCGTCGCGGAGGCGCTGTGACGGTGTTAGCATCCAAGGCGCGCATCATCA -AAAGGGACAGGTACCTGAAACATTGCCGCGGTTCTGAGAACCCAGACGACAATAGACTCT -AAAAACGCAAGACCACTAGCGAAGGTGTTTATCAAAAACAAAAAGAATAGCAGTTGGCCG -CAAACGGAAGTGAGGGGGAGCATTGGTTTGGGAGAGCCAAGGCGGAACTGCCATGTGGAA -GGTCACCTTCGGCTGTTTAGCTGTTTACTTTCTTTGAGAAGACTCGGAtatttcgttcta -tttcattctattttattttaattgatttgcactctcgggtagaattgagtctagtgtatt -ttgattagattgtgattgatttgtttatttTCTAATTGAGTCTGACCATTGAAACTGCAA -TATGGCATGCATATATTTGTTGTATTGTATAACATTCAGGACAAGTATGGCATACGGGAA -TTCTAGAACCGTTTGATTTATGGATCTGGAGCAGACCTTTAGGGAGATTTGTTGGATATG -AATTATTTATATATATAGTTCTCAAATTGGAGTTGGGTGGCTATTTACACGTACTATACC -TAGACCGTACATTGTCGCGCTCATGGATCACTTGAATAAACAGATGCCAATAGAGACATT -CAGGTAACGCTCAGTTGTTATAAATCTCCAGGGTAGGTGGACATCCGCAGAGGATAATCA -ATTACATCACTGTGGATTTGGGTGATAACAAGGATATTTCAATCGAACCTTGGAATATAA -ACCATCTTTATATATCATGTCAACATAAATAGTGTATGTAGTAGAAGAACTCAAGTACCT -TGTCCAACTAAGAGGATAACATAATACAAAATAAAGAAATGTCACATTCTTAAACGTAAA -CTTGCATGTGATGCGGGACCTCCCTGTTCTTCCTGCGATGACCCTCAATCTTCAAGCCTA -ACGGGGCCCACGGTTGGGTTTCTCTATTTTGAGGTCTAAGTCAAATCTGAGTTAGTCTCA -ACCAGACAAAGCGGAGCTCCTTTTAACATGGCTGGCCTTGCCCCATCGCTCGGCCTCAGC -CTAACAGCTCCAACTTCATTTAATCCTACCTTTTCCACCCCACCGATCTCTTCTCCCTCA -ACCATGGCGGGATGGACCGGATGGGTGTTTACCTTTGTCTTTCAAACCGTCCCAAGTGTT -CTCTACTCGATAATCGCATTCTTAACAATTACACTGCCAACATGGCTCTTCACGCTCTTC -TCCATGAGCTTGACATTCACCATGAACTTCACCACCATGTAGGTTTTGATCTTGACTTGA -TCCTTGAATGACTGTTGCTGATCAAAAGTCCTTAGAATGATAATCTTCCTTGTTCTGGTA -TCGACCGTCAGCTGGTTCATCCGGTACCGGTATCTGAACATGTACAGCCGCCTGCCCCCA -GAGCCCCAACGCAAAGAAGCCCAGATCGATTTGTTCCCGGATATCCAGGATGGGGATTCA -AAACCAGGGCTCGCCAACTATCTCGATGAATTCCTCAGTGCCATCAAAGTCTTTGGCTAT -CTTGAACGGCCCGTCTTCCACGAGCTGACCCGGACGATGCAGACCAGGAAGCTGATTGCA -GGAGAGACGCTGATGCTGGAGGAGGAGAAGGGCTTCTGCATGGTTGTGGATGGGCTGGTC -CAGATATTCGTGAAATCAGCACGCGACGGGAAGTCAGGCTCGCAAGAGAGTCTCCACATA -GACGATGACTACTCAGACGAGGAGGACCGGAACATTCGCAATCGACAGGGATATCAGCTC -CTCACCGAGGTTAAGAACGGTGCTTCGATGTCGTCTCTGTTCTCTATCCTCCAACTTTTT -ACTGAAGATATTGAGCTCCGCAACTCGCGAAGTCACAGCTCTAGTATATCCACTCCTAGT -GGCCAGACTCATGTGCCTGATACCTTCCCTGTCAGTCCCGGATCCACCATGGGAGAGGGC -GGGAAATTGAACACTACAGCCAGTGGATCTGGCGATATATTGTCCCCAGTGCCACCGCTG -AACATCGGAGAGAGCCATGCCATGCCCCGGCAGGACTCTGGCGAGATGCCGCAGCCGGAC -ACCAAAAGGAATGATCACAAGAAACGCCGCAAGTCGGTGCACCCAGATATTGTGGCGCGT -GCTACAGTGGACACGACAATTGCCATCATTCCAGCGAGCGCATTCCGCCGCCTGACCAGA -GTCTACCCACGGGCAACGTCGCATATTGTCCAGGTTATTTTGACACGTCTCCAGCGGGTC -ACCTTTGCTACAGCGCATTCTTATCTGGGCCTGAATAGTGAGGTTTTGGGTATAGAACGC -CAAATGTCCAAATATACCTCTTGGGACCTTCCGAATAATCTTCGAGGAGGTGCTTTGGAT -CGTTTGAAGGACAAGTTCACGCGGGAGCGGGATAGATTGGGATTGGAGGAAATTGGTAAA -GGCATCGCTCTTCACAATCCTTCTGTCAGCCGCAGAGGACGGTCCCGCAGCACTCTCAGA -AAAGAAGCCTTTTTACAAGCGAAGGTGGCCAGCGAGCGTTCTCTCACACCCTTGAGCCCA -CAAAGGTCACCATCGGCATTCATTGACAGGGATGCGGCAGGCGTGAGCCCCGGTGACCTA -TTGTCAACAATACAGTTGTCCCGGTTTGGACCGCGATATGAGCATTTGGCACCTCAGATC -CAGACCCCTCTTACTGAGCGAGAGCAACCGCAATTTGTGTCCACCACTGCTGCAATGAAT -GGCCTGCCCTCAACTTTCCAGCGAAACGAGTCAGTTGACGAGGATGCTATCTTCCGCGAA -TCGATCTTGGATTGTATCATGAACGCCATTGGGCTCACTGAAGGCACACAAGACAATCTG -CGCAAGAGCAGCCACTCTGGCGAGGCGTCGCCCAGACTTCTATCCTACGATAACCGCCGT -CAGAAGGCAGTCTTCTCTAACGCTTTTGGCTTCATGGATCCCTACGAAGGGTCTGGTGAT -GGCGAAACCGAATCCGTGATGTCTTCCATGTCAGTGACTAGCGCAGGGGGGACTTCGCCT -GTCATAAACCTGCGAGAAGACCTCCGCCATGATATGGAAGTAGTATATTTTCCACAAGGT -TCGGTTCTGGTGGAACAGGGTGAGCGACATCCAGGTCTCTACTACGTCATTGATGGATTC -TTGGACGTCGGTATTCCAGCGAATGAACGCGATGATACCCTCATTGGGTCCTCCCATGGA -TCTGCCTCGCAAGCGCAAGAGGGGCTGTTTCCGAAGCTGAGACGAACGACGACAGCATCG -TCGCGCACCGCCTCTGGTGCAACAGGCAGCAATGACCCTCGACGCAAGACTCAGTCCCGC -AAATCTCTATACCTAATCAAACCTGGTGGCATTCAAGGGTATGTAGGTGCACTTGCATCC -TACCGCTCTTACACCGATGTTGTGGCCAAGACGGATGTTTACGTTGGGTTCCTTCCTCGT -GCTTCTCTCGAAAGAATCGCAGATAGACACCCAGCTGCCCTGCTAACGCTGGCGAAGCGA -TTGACACGACTGTTGCCTCGTTTGCTGCTTCATATTGATTTCGCGCTAGAGTGGGTGCAG -GTCAACGCAGGACAGGTAATTTATCACCAAGGGGATGAAAGTGATGCTATCTATCTTGTT -TTGAATGGTCGTTTGAGATCTGTTGTTGAGGGATCGAACGGCAAGATTACTGTCATGGGT -GAACATGGGCAGGGCGAGAGTGTTGGCGAACTTGAGGTTATGACTGAATCCACAAGACCA -TCTACCTTGCACGCTATTCGGGAAACGGAACTAGCCAAGTTCCCCCGATCTCTTTTTAAC -AGTCTTGCCCAGGAACACCCTGGAATTACCATTCAGGTCTCTAAACTCATTGCCCAGCGA -ATGCGTGATTTAGTCGACCATCCGCTGTCTGAAAAGGGAATTGAACAAGGACAGGCTAGC -GGCGTTCAGACCGCAACCTCCACAGTCAATCTTCGCACAGTGGCAATTCTTCCCGTCATG -GCTGGTGTCCCGGTTGTGGAATTTGGACATAGACTCCTGCAAGCGTTGCACCAAGTTGGT -GTGACTAGAGGTGTCACATCACTCAACCAATCGACTATTCTGAATCACTTGGGTCGCCAT -GCCTTTAGTAAGATGGGAAAGCTTAAGCTTTCCCAATACCTCGCTGACCTTGAGGAGAAA -TTCGGGATGGTGTTGTACATCGCTGACACCAATGTGAACGCACCATGGACTCAGACTTGC -ATCGCGCAAGCCGATTCTATTCTGTTGGTAGGTCTCGCAGAGTCATCGCCCAGAATTGGT -GAATATGAGCGGTTCCTGTTGGGTATGAAGACCACTGCTCGCAAAGAACTGGTTCTTTTG -CATGCGGAACGATACTGTGCACCTGGACTCACTCGCGAATGGCTCAAGAATAGGGTGTGG -ATCAACGGCGGCCATCACCATATTCAAATGGCCTTCCGCTCGACCACCGACCCTACGCAC -CAGCCAACTAAAAAATTCGGTACTGTATTGAAACAGCGTGTGCAAATCCTGCAAGCCGAG -ATCCAAAAGTATACGTCTCGGCGTGCACAACAAACACCACTCTATTCCCCGCAAACTCCT -TTCAAGGGTGATTTCCATCGGCTGGCTCGGCGTTTGTGCGGCAAGTCTGTGGGTCTTGTA -CTGGGAGGAGGTGGAGCGCGTGGCATTTCGCATGTTGGCGTAATCAAAGCGCTCGAGGAG -GCCGGAATTCCAATTGATATCATCGGTGGCACATCGATCGGTTCTTTCATTGGAGCTCTG -TACGCGCGTGATGCTGATGTTGTCCCCATGTACGGGCGTGCCAAGAAGTTTTCCGGCCGC -ATGGGAAGTATGTGGCGGTTTGCATTGGACCTGACCTATCCCACTGTTTCCTACACCACG -GGTCACGAGTTCAACCGCGGGATCTTCAAGACCTTTGGAGACAGCCAAATCGAAGACTTC -TGGTTGGAGTTCTACTGCAACACAACTAATATCAGCCGATCCCGAATTGAGTATCACTCG -TCTGGTTATGTCTGGCGGTATGTTCGGGCCTCGATGACCTTGGCCGGTTTGATGCCTCCT -ATCTGTGATGAAGGAAGTATGCTCCTGGACGGTGGCTATATCGACAATCTCACCGTGGCA -CACATGAAAAGTCTTGGAGCGGATGTCATCTTCGCGATTGACGTTGGGTCTATTGATGAC -ACCACTCCCCAGGGATTCGGAGACTCACTGTCAGGGTTCTGGTCAGTGTTCAATCGCTGG -AACCCCTTCTCGTCACTCCCGAATCCACCTACACTTTCCGAAATCCAAGCGCGACTGGCC -TATGTCTCATCGTTCGACAACCTCGAGCGCGCAAAGAATATGACAGGCTGTCTATACATG -CGTCCTCCTATCGACCCATATGGAACCCTAGAGTTTGGCAAATTCGACGAGATTTACCAA -GTCGGCTACGCATATGGAAAAGAGTTCCTCGAGCGGTTGAAGAACGAGGGCTCACTACCC -ATTCCGGAGGAAACCGAGGAAAAGCGCAAGCTGCAACGGACGATGGCCCCTCGCCGAGCT -AGTATCTAACCGATTTGAACTTACTCTTCAATCCAGCCCCGCGTGAGATCCTGACTTCAA -CGACTTGACTTTTGGTATGCGGCGCTTAAGGCTGCATACAATTTGCCTGTTAAGTGCTGA -CACCGTTTCTTGCAAGACACCGATCAACTGAATCTGTCTTATGAAACTTGTACATTCAAT -GTATACTGTACGTAGCCAGATAGAAAAATAGAAATACAGCTTTGCTGTCGGTCTTCATGG -TTTGATTCATACCGAGATTTTTTTTTTTTTTGAAAATCATGTCGTTCTTAATTTGTTATT -TTGTCCTTAAAAGGAGGCACCCGAGATGGATCATAAAACATAGGCATCAAAAGGAAAACG -AAGTCATTTTGATTTAATCATTTCGTCCTTCTATCATATCACGGAGATGAGATATATAAC -ACAGGTATCAGAGGATGGCCACGTTAACAGGAACACTGTAGTACCTTGTATATGTAAATG -AAGAAAAAGAAAGGATACCATTAGAACGGCCATTCCAACCGTTCCAAGGCAATTAATCCA -TTAAGATATTAAGAAAGCGTACATTGAGAAAATGTCTCTCGCAAGTTTGCGATGGCTGTA -CACAGTACTGTCCACCGGGTATCAACTTCATCATGCTCCCACCCCAGCTCATCGAACGTA -TGTTCAATCCAGCCATTGCCAAAATCCAAGTCCATCAAGCTAAAACCGAATCGAATACCT -CGTAGGGAAGATAGAATTCCAAAAGCGAGGTACGGTAAGCACAAAGCGGTTTTCTGAACA -TCCTCGTAAATAATATATCAACATCTACTGACCACGTCAAGAGATAGTAGATGTATATTT -GTCCATCGAGGCAACCGAAAAACTGATTACACAGTTCCGGCAGGGGCTGCAGGCGCATCC -ATAGGCTGACGGGCCGCATGATTGACCTTATCCTCAGTGGCGGAGCGGTTGGAAGTGCTG -TTCTCATTCCGATAGTACCAGAGGGCGACGACTTCAAGGATCATGCCAAAGAAGGTGAAG -ATGCTGTAATGATGTCAGTCTCGCTAATTGATAGCAGCTGGGAAGATACAGGCCACATAC -AAAGCTAGGAAGATGAAAGCTTCATTGGCGTTCTTGCGCTTGCAGTCAAGGCCTGCGGGG -ATAGCTTCGCGGCAGTTGCCTCTATCATAGTCTTGTGCGGCGAAGATGAAGGCAGTCAAC -CAGCTAGAACGGGTTAGAGACCTTGACATGGGTGATAGGATTGAATGTTGACATACAGAT -AGGAGAATACGACATCGATAGCGAGCACGAGTCTGCTTAGCGAAGAGGGGAAGAACGGCG -ACACGAAAGCTGGAAGGAAGAAGACCACCGAGATGGTAGACTAAGCTTGCGGTTAGATTA -TATGGCTCTTGCGATTGAGGATTAGTGCGGGAAAAATGCGAATTTAACGTACAATAACTT -CCTGGTAAATGGTATGCTGGCCACGGGGACCGCGATGGATGAAGTATGAAGTCAGGCCCA -TGACGATGACAGCACTGGACCACTGCAAGAAGCGAGTGATCAGCTGCAATGGACGAGCGA -TGGTTGGTGACAAGCCCATTGTGCGTTGAGAATTATGAGAAGTATACAATGGGTTAGAGG -GCGCAATATGGCGAGCTGTAGTTTATCACGCCTGTAAGATGATCTAGGGTTAGATAGTAG -ATAATAGATACTTAATACTAACTTAAGAAGCCAAGTTAAGAGGAGCAGATGGAGGGCTTT -ATATCTCGGGATCATCGCAACATCACAAAGCAAAAAATGGAAACACAACAAGCTCATAGT -AGGAAAAGCAAGAATTTCCAGGTGCGTCACAATGGTCTTTGCATGAAGTCAGAACCGACC -ATTTCTGGTCCAGATCCCAATTCGGCCCTTTTTTTAGGTTTCGAAGAGGCAAGATATTGT -ATTTATTCCATTTGTGTAGCTTTGTCAAATAGACAATGTCATTATCTAGCTGCGTGGCAA -TTCATTTCAAATCCCAAGGATCAAAGAAAGTGCATAACGTCAACTGAAGAAGCCCGGCAG -CTTGCGGAAAGATAATCCAATGTGTTGTGATGTGATAGTTATCAAGGGCCAATCAGAAAT -AGCACTGTATAACCAACGAACAATCTACCAATGGGAGTGAAATTGTCTTCAGCCTTAAAG -CTTTGCTTTTCTTAGGGATAACTAGTGTGGCGTTCCTTTGATTAAATACTTGAATCGTTC -GTATATTTAATCAAACTGCTACTGTTCGAGTAGAAATTGCCAGTGATGCCAATCGTGGCG -ACATGAGAGTACTGTGTAACTATTACTCCGTACGATGGCTCGTATTCTTTCGTACAACAT -CGTCGATTGACCATGTGGATGGATTGGTCTGCACGAATGTTCCGACATGGCCGAGAATAA -AATTGTAGAGCTGGTTCTTTTGATTGGTCAGTGGAAACTATTCCCTTTCGTTTGGGGCTA -TTTTGCCATTGACCTCCCCGGAGCCATGTACATAGATGTTTCAGACCTCCAACTTTTGCT -ATTGATCCAGGGATATGTAGCTAGACAGATACTCAACAGGCCACTAATCGCTGTCTCCGC -CTTACCCTTAGTCGGAAAAAAAAAGTCTTGATCCTTCTTTCAGGGATATTCCTGAATGGA -GCCAATATGATAAGATGGTCCACGGTAGCTTGCCTAATTTTGGCGTGTGTTCGGCTGAAT -CACCGCAATCGCTTTCACAGCTGACTGgagagagagagagagagagagagagagagagag -agagagagagagagCTTTGAGTGAATTGAGGTTACATACGACTCGAGAAGGATATACGAT -CATGCGGGATTTGATGAGAATGGATGTAAGGGAAAGACGGTGGGCCGCCTAGGGCAAGAC -AGGCTCTATTAAGACAATAGTAGGGATTCATACGGAGGGCTGAACATGTCATATTTCGCT -TACTACCGCCAGACTCTGTAATAACATCTTTAAAGTATCAAATAAACATGCCCTATACCA -CGCCGGATAAGCAGACCATTCGACTCCAGTGCGCCCAAGCAGGTGTTTGCCGAACTTGCT -TTTTTTTTACCCTCACCATGTTGCATTGCCAAGCTCATGCTCCTTGATGTATTCGCTGCC -CTTCTCCATATACTCTTGTCGCGTGACCGAAGATCGGCCCAGGTCACCACTAGAAGCCCA -CTGTGCCGCACCCTTCCAAGCATCAAAGACAGGATCCGATGCCTTGCGTACATTCAGTGT -AGCGTCGATGGGGAGGAACCCGCGGACTTCCTTACGGAAGCGTTCATCGAAACCGGTGAA -CAACGTATTTCCTCCAGTCAAGAACACATCCTTCAAAAGGAGGGACTGGTCTGCTGGGTT -GGAATAACGTTGGTTGATAATATCGGCCGCGATCTCGATCAGACCAGCCTGGTCGAGGCC -AGCAATGGACGGCTTGAACACAACCTCGGGCACACGTATACGCTCCACGTTTAGATGAAG -CTGATGCGCTTCGCGCTGGCTTTCTGGATCAAAAGGCCAGGGCCCGCGAAGGAACGCGTG -CACAAGACTCTTTGTCCAATCGGACTGTGCGGCGAAAGTGTTGTTCTCGTTGAATTCGGG -ATCGTACTCGAGGAGCTCCCTCTCTACAGAGTCAAGCATACCGTTGAGATCCTCCTCCTC -TTCATCGTCACTGCCCTCGCCAACAGCTACGGTGCGGTAAACACCCCAGTCCTCATCATT -GGCACCGAAGTCGTCGTCATCTCCACCACGACGGCGTTTCTTAGGACCGTCAGATGCCAG -GTTGGCTAGCGTCTTCATACGCATCTGACTGGCCAGAGACTTGCGATTACCAAGATCAGC -CGTGAAACGATCACGGTCTTTGATTCTTTGTAGAATGGCCTGGGAAATATTAGCACGGGA -ATCTAGAATGTCACGGCTGACCAACACACCTGGCGATTGCTGCGGCGCTTCGCAATCCAG -TTCTCAAAATCGTTCTCACGTGTCTCACGGTCCAGGCGCTCTTCTTCCTCAATACGAGCC -CTTTCGCGCTCTTTCTCTTCCTTGGCACGCAGTCTCGCATCGACGTTGGACTTCATAAGA -CGTTGGTGCCGCTTTTCCTTCAGTCCGGCCTCATCCAGTTGATCGTCTGGAATATCAAGC -ATAGGGAACGTCATTTCCTCAGGTTGTTCCTCGGTTTCTTCAATGCCTAGATCCTTGTTG -CGCGAGCGTTTGATAGATTTCTCGAGCTCGCGAATCAATTTCTCAAGCGCGGCCTCATCT -TTCAAATCCTCGGCTTCCAAAATCCGACGAATCTCCTTCTTAGTCTCGTTGGCTAGATTA -TGTTGCAGGTCTTTCCAGTATTCAAGCTCTTGCTCTTTCTTCACTAGTTTCTCAAGTCGC -ATCTTTGCGGCCTGTTCTTGAAGGCGACGACCACTCTCCTTCTTACGCTCAGCAATCCGC -GCGAGCTCTTCTTCAGTCTTCTCCTGAACAATGTGCTCTGTGAACGGGTACTGAATGACG -CGGTCACGATCCTCGAGTCCTGTCCAGTCGAGGAACCCATTCAACTCGCGATCGTAGTCC -TGTGAGACGTAACAGTGCTTGTGAACCATTTCCTCCATTTGTGCGTCTGTCAAACGAGTT -GGGAAGGTAGGGTACTTCAACTTCATGAGCTTCATCAGGTATTCGGCGCTCTGCAGGCCA -CCCCAGTTGAGACGAGTGCTGTTGGAGAGCATGGGTTTCGAGTTCATGACCGGAATGACA -TGAGTAGACGTATGCGATGAGTCCACAATCAAACCATCCCTTCCACCATTATAACGATAG -GAGAAAAGGGAATCGATACCGTAGGCTACTGACGGCGCAGAGTAGCACTCGAATAGGATT -TCGTTCATCACTGAAGGCATGTCAGCACTACCCTACTCGATCTAGGACCGTGGGTTACTC -ACTCTTCCTTGAATACGGCATATTGGCGATTGGCTCTGTCATTAAGATCGGGCGACCAAC -ACCCCCATTTGCACCGTCCACACCCAGCTTGATGAACAGATAGTCCAACACACCTTCCAT -GACATCCCAGTTACCTACAACACTCGAACCGGGATCGAAGCCATTGCGCAGCTGTCCCCT -TGTAGTCGCATCGACGTAGGCATCATATCCGATAAATTGGCATTGGCGATTGAGCTTGCG -ATCACGGTATCGGCTCATGACCGGAGGTATGGCGAATCGCGGATTTTTGTCAAAAGACCA -TCCGGCCTTGACAAGATGGGCACCTGCGCATGAAGCACATTTGTTAGGGGAGTTGAATCG -CAAACTATCTGGAATTCCTTACCATTGTCAATAACGATGGCACTGGTGTCGGGATGGGAC -TGGCTCTGCTGATATCCCTCAGGTTGCGGAGGGTGGTAGCCCTTGAATGGAAAGTCTTGT -ACACCATGTAGTTGTGGAGGAGGCTTCTCAGTTCGATTCTCAGTGGCATTGGCTGCAATG -CGAGCTGGTAGGAGGCGCTGCACGGAGGTAATCGTCATGGCGGGTTGCGCTCCGGAAACC -AGGAGCTGTTGGGATCGGATATTGTAGGTGAGTAGGGCTCACTGCATGGGAAATGATGTT -TGGGTAGGAAGAATCCAATAAACCTGTGGAAAGGGCAGCGGATTGACAACGGAGCAAACG -ATACAAACGCGGTTGATGAGGTTTGGCGGAGTTGGACCGCCAAGGACGGAGGCTATCACT -ATCGCTTCTATGGAGTACAGGGATGGCAACCGACCACAACCTTTACATGTATCTATTGAA -TTGAGAAGTTGATATCATTGACTAATTGCAGGCTCATCAATACATTTCTTCGTTACAGAA -AATTGATCATCCCTAGGATTGCTTAGGATGGCTCTTGGAGGTTCTCTGTGGCTGTAAGAG -GTGTAAGTAGAGATTTTCAAGGCCGATGAATAATCATAGAGTAAGATATACCTTCAAGAT -GGAAGAGAACATTAGACTGGTAGTTCTTGAACATCTGCGCCACAAGTTCAACATTCATGT -TCTGGATATTGACTTCCTGCGATCGAGAGTCAATATACTAGGACTATATCAATGAAAGAG -GCTCTAACCTACCTGGAGGCCTTTGTTCATGACCATAATGGCTTCATTAAGTTTCTCCTG -TAGAAAATGAGTAAAGGTTATCCTACAGATGTTGTTCAATTATGACATACAACATTGGTG -ATGATGCGAGAGAGCAGCACTGTCTGCTGATGCTCATGTGGGCTTTCCTAAGGAGATATT -AGCATATAAAATCCTCGGAGACAAGCTAGAAATAATGCATGGAAGAGAAACCACTTACCA -TGGCTGAATAATGAGTAGCGTTGTGTTGGTGTTGGACGTGAGAGTGTTTATTTGGACTTA -ATTGGGCCGCGATCACGTGTGGCTATACAGACTTCACGAGCCGGGGATAGTTGCATTGCT -TCAATGTTTCTACGTTCGACCCTGATCTTGTCCGTTGCCCCCTTTATAAGCTTAACATTG -TATAACCAAGTTCTGCCACTGAGGGATATAACCTTGTTTAATCATTCCCATCCTCATCCT -GGTCTTCACACTACCATCTATCCACACGCAGTCAAGCTCCGCAAGAGTTGCTTCCGCCTC -GACCTCGCAATTGACCACACCGGTGCCCTGAACCATACTCAAAACCATGCCTCCACCAAA -TGCCGCCAATGATGAAGCTCACCAGCAGCTATTGAACACCCTAGACATTTCTTATGTCCC -GAAGCCCTTCCGCAACCCACACTGGAGACCGTCCCAGCGGCGCAACAAGAACCTCAAGCA -GATTATCTCAGAGGCCTCTCGGAAGGAGGCATCAGTGTTGGCAACCCAGGCCAATTCAGG -TACAACTACGCCGGGTGCCACGGGCGCTATCACCGATGGCACACAAACCCCCGCGGAAGG -GGCTACACCAAACCTGTCCCAGGCTACCCAAAACCTCTCGACGCTCGTCTTGGAGAAAAA -TGCCCGGTCGACATTCCCAACTGGGCCCGCCGTCACCTACACGAACATTGAGTCGGCACC -ATCATTTAACCCTGCCTACCACCATCACTATTGCGATATCACAGGCCTGAATGGACCGTA -CACCGACCCGAAGACCCGGCTGCGGTATCACGACAAGGAAATCTTCAAGGTGATCCGGAC -CCTGGCACAGGGTGTTCCTGAAAACTACCTGGAAGCGCGCGGGGCGCACACTATTCTGAA -GTAAAGAGCGACAAGAAACCCGCTTTATTCGATTCCCACACGAACGACTATGCGAGAATG -GGTATGTCATACAAGGACGAGTTTGGATAGTAAGAACTCTATTAATCAATTCAACTAGAA -ACTTAAGAAAGACCGCCAAGCAGCGTGATGTATGACCAAACACAGTCAAGCCCTTTCCAC -CCCACCATGCCCCTTTCGAGACAGGCTCAAGAAAACGCTCCGGCTTATTCGTGATCAAAT -GAATCATTCTCCGCCTTGTGCATAAGCACTCGACTATTACCGACACCCCATGGGACAGGG -CACCAGTTACAGAATCCGAAGAGACTGTCGTCGCGGCGTGGCATGTACAGAATACATGTA -AAGGAAGATTCTGCAAAAGGGGATAGCCCTATCTATGTATGAGCTATGCTCAATATTGTG -CTTCAGTCGGTATATGTAAATACCAGTAGGTTTAGACTGAAGGTTGAGGTCGTGAAAATA -TATGCAATTGGCATGATCACATCATTAATGACCTTGCCAATTGCCGCGTAAAATTGACAA -AAACTGTAGAGGAAGAGGCCAGGTGGAATGATCGGCGCGCACTATAAGAACTTTGTGTTT -ACCGCCTACTATTTTGGACTTGGCGTCGATTACTTGGCAGAATGGTGATTTTTGAGTTGT -TTTCTGTTTACGTGCCTTTGGGACTTTGCGGCCTTTTGATCTGTTGTCGAGGACTTTCCA -ACTTACGGTAATACTCTCTCGATACTAGAGAATCGCTGTCGCCCGCTTGGAACAACGCCA -CCAATATCTCCGCTTTGTCTTTGCCGTACCTGGCCTCTTGCCCTACCGATTCCGCAATCA -CCGAAGGCCACTTGTTTCTTAGATCGCAAAGTGTCGCATATTCTAAAAGACTACGTCAAG -AGCATTTGCGCAACCCTCGATCCTGTGCATTATCATCAACAGTCGAATCCGGAACAAACA -AAGATGCCTAGCATGTGGTTAACAGAGAACCAGAGTCAGTCGCATTCGTCCTACAAGCTG -TGAAATGCACTGCCATGACAGTGAATGTGTAGCGATTGTCGAACATAAGCTAATCTATAT -AACAGAGATCGGTGTGATCTTCTGCTCAGCCGGTACGAGATTGGTGTTTCCAAAGCCACC -AACTAATTGGTCCGAGAACTAACCAATGCTGTTACTGAATAGGAGGCTTGTTCCTATTTG -GAGGAGTGGTGATGTTTTTCGACCGGTCATTGTATGTCTATCCCGCCGGTCAGCGATTCC -TTTTTGGAGGAGAATGAATACTAATGCTCATTTTATAGACTTGCTATGGGAAATGTATGC -TGTATCGTTTAAACAAATTTGATACACTTGACTAACAACACCAGGTTCTCTTTCTCATTG -GACTGACCCTTATCATTGGTGTCCAGAAGACCATGGTCTTCTTCACGCGTCCGGCAAAAC -TCAAGGGCACTGCTGCTTTCGCCGCTGGTATTATCCTCATTCTCCTCCGCTGGCCCTTGA -CTGGGTTCTTGATAGAACTCTATGGCTTGTTCATCTTGTTTGGCGACTTCCTTGTCACGA -TCGGTCAATTTGCCGGCAACATCCCTGTGGTAGGGCCCTACATCCAGCGTGGGCTAGAGG -TGCTTGCTGGTGGGAGGAGCAATGCCGAGCTTCCTGTATGAGTATACACAAACAGGGGAC -TTGAACGATGAGGTTCCCTGGGGGGATTTTGATCAAATATGTGGACCTTATCGTATACGG -CTACGGGATATCAGATGCACATTTATGTTCGGAGATGTCCAGGCCTGCTATGCTATAGAT -GTCACACGGGATGATACGATCCATCTACGCCCTTCAGATTTATGTAATTAATAATGACCC -TTATTGATCTTCCAGCGGTAATTCAACGGCTGGTGCTCAGTTAATATGCATGTTAGAATA -GACAAGTTACATTGTGCATCATACATTCTGGTTGATATCCTCGTACTGTGCATTGGGAAC -CATGTCTAGATCGACGTGGCCCATCGAGGCAAAGAGATCCAGTCACAAAAGCCAGAAAGA -AGCATAAGCGAAACATCACGCTGGGCTCGAAAAAGAAATAGTCGAACGCTCCTTTATTTC -TTATTCTTCACAAACCCCTCAACTCATCACATCATTTAGTTATCATTGCATATTTTCGGA -TTTACCGGGGGTTGTTGAGGTAGTCGACGAGGTTCTCGACAACGGTGGCCGCGTTGGTGG -TCTGGACGGACTCGTTGTGGTGGGCGATGATGGTGCAAGAGGAGGCACGGGAAACAATGA -CACCCTCCTTACCCTATATCACAGACTCAACAGATTAGCACATATTCCGACAGTATGTAT -TCAAATAATTGCCGAGGTTGTTATGAGTCGAAGGGGAGTGGATATCTTTACCTTCTTGCC -GTATACACTGCGCTCGTCAGCCTTGATGGTGACAAACTTCTCACCACAGACAGAGAAGCC -GTTGGCAAAAGCCTTATCGCTGGAAGTGTAAGCGGTAATCAGGGAGTTGATCTCCTCCTG -AGAAAGCTATATTTCACCAATAACATTAGCAACAGTGTCCAGATGTGTGATCCCACCCCG -CAGTTAAAAAAAAAAGAGGCAAGCTTACCGCGAAGCCAGGCGAGGCAGCCTCAACACCAG -AGAAGTCAGCAGCGAGAATACCGGCCTTGTCGAATTGACCCGAGCCCATCAGGCTGCGAG -AAAGAAGCTTGTCAGCACATGTATCGGATAGACACGGAGGGATGATTTGGTAGGATCGAG -GGGTATATCAGGGGTCGAGGACGGATTATGTACCTGGAGTCGACATAGCCTATAGTTTTT -AGAAGAAATCTGTTAGCTGATTTGAAGCAAGGGATGAACTGCAGTCTGCAATTATAAGGG -TCATACTTTGCCAGATTGCTGAGTGCTCGCCCATGATGTAAAATAGAGAGAATAACGCTG -AATAGGTTAGAGAGGGTCAAGAAGAGAGGAAATTCTGGCGCACACCTGAATATACAGAAA -AAAAAGGAGAGCTGAGAGAAGAAAGAATGTTGGTGGGGGAAGTTGTGGCGGTCGGGAGAG -GACCCCGCCATATGACGATCCCACCGTTTGCAACTTAGGATTCAAGCTCATAGCTGGGCG -ATTTTAGAGGAATTTACTTTCTTTCTATTTACTGTGTGGGAGAGAAAAACTGTTACGATA -TCTGTCTAGAGTGTAGAGGGTATAGTGTCTAGGGAGTATATGAGGTCCGATATTCTCTTG -CACTAAGTAGCGGTGGGAGAGATCTCTAAGTAGCTCAACTTCAACTAATTTGGATATATC -TATTCAACCTCATGAAAATATATATGTATGACATACATACAACATCCCGCTTTCAGTTCA -GTCAGCTCATCTGTACCGTCGATCACGGTGCCTGGGTGCCTGAATCTAAGGGGTTGATCC -TGATGCTCCTGATGCTGATCTGACCGCTATTTAGGACAATAATCTTTCATCTTTATCTCG -TGTACGGAGTACGGAGTACATGCAAAATGCAGACATACCATAGGGCCTAGAGACAGGCTT -GGCATCACAGTGACTTTGGTCATGCGCAGTCTCGGAGAATTCATGACATGCCCAATGTGG -ACATTCAACTTCAGATGCGAAATCCCCCCTGATTCAAGGGTAAAACGCAGAAAACGCCTC -AGTCTCAGTTTTGACCGGATGAAACACGCCTCCTGCATGGCTTGGCCCCTTTCAACGTGG -TAGCGAGTTAATCAATCCGCAACCGATGCGCTCATTCGTCCCACGGGAAATAAGCATCGC -ACACGGGCTTGAGGTTAGCGCTGAGACGCCCCTCGAATTCCAAATCAGAATTTGCTTGCG -AGGTTTCGGTCTGATTTCTCATGTTTCGTTCAGCCCCAGACTAGGGTTGGGGAACCTATA -GCAGATTTAGGATGGAGGTGCAATTAATATGGCCCGTCTCATTTACCGTGGTTGACTCCA -CTCAATCTACGTGCTTTCATTAATATACTAGACGTTACCCTTTTGTTATCTCCTTCTATT -TTGTTTTTTTCTCTCTTTGGGTTGCCGGTCTTCTGGAAAGACTGTATTTCTTCTTTCTAT -AATTCGGGTTGTGTTGATACTCTCTCACATATCTTCAGGttctttttttttttttttttA -ACTTTTCTACTGCATGTCGACCGGATTCAAATATTCTACTCTGAGATGGCAAGGGGTATG -ACAGCAGTCCAGTTTCGCACGCAGTCTAAGGAGACCATCTCGTTTCCTGTTTTGACCTAC -ACCGATGCGTTGCCACGGAATGACCTTCAATCTCTTGAAGCCTCACATCATCCGGTTCAG -CCAATGACCGAAAAGTGGGATTTCAGTCAGCCCTCTACGGGAGATGGCTGGTTCAGAAAG -CCACCCCCGGGCCAAGGCGCAACGTTCGACTTTCGTCTCACAGCGCCACCAGATGAAGCC -ATTCGATCTACCCGATCAGGTCGTACCCAAACCGAGCAACACATGATTGGCATTGCGCTG -GGCAGTCCCGGCATGTTAAACAAAGATGAGGCTCTGCCGCCGCCCAGATTTGATACATCT -ATGTTTGTAGAAACAGATTCAGCCGATAAGCCCAGCAAGTGGAAGAAGATTGGTGGATTC -TTCAAGGCCAAAAATGCGTTTGCATCGCCTTCGGAGGCCCCACAAGAAAGCACCTTTGAA -CCGCAGATCAACAAAGATAAACTACCAGAAAAACCTCACAAGGCAAGACTGAGAAAGAAC -TCGACAGAGGAGTGGCCGAAACTTGAGATCGATCCCAGGAAGATGCCTATGCCTGGTCGA -AGTGATCAAACTCCCCTACGGAGTCGGAACTTTTCACTCAGCAACAAGGCCCCCAAGGAA -GAGCCAAGTAATCGGGGCCTCCTACTGAGTGTTGACATCCCGGATATTCAGATGGAGCGG -TACAGTGTCATGTTCAGCAATGTCGTCAACAAGAACCAAAAACCCTCACTGCTGGCCAGG -AGGGCCAAGACATTGGACAATCTTCGCGTACCAGATGCAAATGTATGAACTTTCATTGAT -AAGATGCCAGTATGAAACTAACTGGTGACTTATATAGGGTTTCCTAAAATCCCCGGCACC -ACCCCCAATGCCCCAACGGCGGGCAACGTCACCAGCGCGATCAAGCTTCACTTTGTTCCC -AACTTCACAGTCCTCGAAAGCGGCCCAGGTACTCGGCACACAGAATTTCTCTCGTGGACC -GAGCCCACTGATGAGATCAAACACACTACCTATGGAAAGCCCATCGAAGATGCCTCCACC -ACAGCTCTATCATGGGTCAAATATGAGCAGTATCTCATCGTTCGAGTCACCCGTGATCCC -AAAGCTTTTCTCTGAGCGTTCGAATACCCCACGGTCTTCTAGTAGCTATGACAAGCCTCT -TCCTGCTATCAAGCCGGAGCCACAGACTACTCAACTACAGAAAAGTGCTCAGCCAGAAAG -AAAAAGCCCATCCCCACAGAAGATTCGGCCACAGCAAAGTATCCAAGCGCACAATACTGC -TGTAACAAAAGCGGTTGTTCAGCCCTGGGCTCATCCACGACAGAGGAAGGATTCGCTGCC -GCGAGCCATAGAAAAACAGCCTCAAGTTACTTTACCTCAAAACAAGACCACACAGCATCA -GCCAAATACGATATCGAACCACCACAAGACTAAGGACTCAGCGAGACCGCGTCTCAAAGT -TGAAACTGAAGGCCGACCACAGCCGCCTGCAAAGGACACCCTCTCTAGCAGTACATCTCG -GAATTCTCCTTCAGCATTAAATCCGACCCAGAGCAAGATTGACCGGATCATGTCCCCATC -ATCAGCGTCAACTGGGCCTAGATCTGGCGCATCGCCATCAGAGTCCATACGCATGCCATT -CGCTATTCCCGTCGAGACCGTTGCTACTGTTAAACTGGAGCCTGCACCCGAGCCTCGACG -TCCTGTTCCAGAGATTGAGGTCTCAATAGCACGCTCTGTCTCTGTTTCGCGGGCGAAGAG -GCAGGTTCTGGTCCCCATCGGCATGAGGATAGATCACTTAGACCCAGAGGAGCGGGTTGT -GCACAGAAGACCTCTGACACCGCAAATCACGGATGCTCACCGGGGTCATCGGCCTGGTGT -CTCGCAGGAGTTGCGGATAGAATGTCTGTGACATTCTACTGCGCCATTCTTTTATATTTC -TTTTAGGCTATTTTTGTTTCATACCTGATTCCCCTTTGCCACTTCTCATGCCTTGTCCAT -ACGTATCTTTCGTTCTCTGCATATGGTCTTCATATTTTTACAATTTCTTAATCCACGCCT -TGATTGGCCCATATTATTCATGGTCATTTTGTTTTTAGGGCTGCTACATTTCGCATGTAC -TTACTTGTTAGAGTAAGTACCTAGGTATAAATTAACAGAGTACTTAAGCCAATCATTGTA -GAATATTACTCAAGTAGTTTTCCAAATGACGAATTTTCAGATGTCCGTCAATTTGTTGTT -TTATCTTAGTCTGAGGTATGAGTCATGAGCCGATCATGCACCCTTTGTATCGGGGTTGCG -ATATCATAATATTTCAATATAAATAGGGCATACAAGCCATTGCACGGAATGCCAGAAAAT -CCCTAAAATGGACACCCTAAAGCTGGCATCCGCCTATACACCGGTCCAATTAAGAAAGTA -TCTGAGCTACATCACTCTACCACACCAATATGCACCATATATCAACTCACCAGAATCATT -TCCCAAGACCGAAGAAGCCCTGAAAGACCTCTTCCGGTGCCAGATCACCCGTTTCCCATA -CGACAATCTGACATGTCACTATTCCGCTACCCAACTAGCGGAGATCCAACCCAACAAGAT -CTATACTAAAGTCATGGGTAGCGACGACACAGCCCCTTCTTGCCGAGGAGGCTACTGCCT -GGAAGTGAGCATTTTCTTCCACCACGTGCTCCGTGGTCTGGGATTCTCAGTGTACATGAC -GGGCGTTCGGAACCGGCTACGGATAGATGGCATACCGCAAGGGGAATTCAAAGGATGGTA -AGACGATCCTCCAAGAGCACAAACTCAGTGATCACAAAGAGGACACAATCTACCACGGAA -ATAAACACGACTAAACAATAAATCAAGGACACACATCGTCAATATTGTCCGTCTACCTAG -CGGGGCAGAGTATCATCTGGACGCGGCATTCGGCGGAGACGGGCCAACAAGTCCTCTGCC -GCTGATCTCGGGACAGATAACGAAGAATCTGGGCTCCCAGGAAGTGCGATTGATTTACGA -CAATATGCCGAAACAGACACGGAAAGAGCAGAAGGTTTGGATTTACCAATATCGTAATGG -TGCGGACAAGCCATGGAATTCGTTCTATGCGTTTGCGGAGCTGGAGTTTTTCCAGGATGA -TTTCGAAGTGATCAATCGATTTACGAGTTGGGAAGCTGTGGAGAAACGCAGTTTCGTAGT -AGTTAAGTTCATTCGAAATGGGGAGACGGCGGCATTGCCTTTGTTGGAGGGAGAGGTGTC -GGAGAAGACGGATGATGTTTTTATTGCCGGGAAGCTTATGATGGCCAACAATGTCATCAA -GTTGAATATGGGTGGTAGGACAAGGATTATTGATTCATTTGAAACCGAGGAGGGAAGGTT -TGAAGCGTTGAAGAAGTGGTTTTCTATTCATTTGTGAATTCCTTCGAGCTGGGACAGTAT -CAACAGACAACATATAGGAGGATACGCATATAAACAAGGGTTAAGCGAACCCACAATCGT -TTGAACCCTGGATAATGTGACAATATCATGTTGAGCTGTACCTAAGCACGGCGCCTATTG -AATTTGTTGATTTGATGTTAGGGAAACTCCGGACCTCAGGGAAGACTTAGTTTAACTACT -GAGCCACATTCGCGTATTTAGATGATTGTTCTCTCCTATAATATGCCATAAGCCATCACA -AGATTATCGCTGTATTCTAGAACAAGTCTAAGATTATTGCCATGTTACAGCCTGAAGTGG -AAAATGGATAGACTGCATTTTGCAAGGTGTTTATATACATTTCTCAGCCAATAGCTTGAG -AATACGCTTCTAATTCTTGGTAGGAACGCTGGGCTCTGCCACAGTTTGCGCCTCAATGCC -AGTATCCAAATCAACCTCGACATTGGAGACAGTCCACGACATCCCAACGTATGAATCACT -GATGACTCTGATATTCACCCGAGCTGTGCTGGTATTCGTGGATTGCGCGAGGATACCCTC -GGGGAACTTAATATTGGAGCGGACAGTCAAGAGGCCGCCCTTTTTATCGTTGGCAGGTCC -TCCGCGACTGGAACTAGCACCGGAattgcccttgcctttgtcattgcaattgccatttcc -atagcGCTTGGATACCGGTGGCCAAGACACACGCTTCAGGCCAAGTAGTTCGCCATCAGA -GCCATTGGGCAAAGCACTGCAAACGATCAAGAAGAAGCCCTCGGTTTGTGGCTTGGGGAA -ACGAGGGGCAAAGACCTTGCATTCGGGGGTCGTGGGCTGGTTACGCCGCGTCAGAGACAC -AGCAACGCCCTTGGCAGAAACAGTCGCAACCGAGACCAATAGCTGCGGGAGCTGGGACGT -AATGCGAGTGAAATTGAATGGAAGGTTTAATTTCTTCACCAGAGCAGCAACTGCACCACC -GGGTTGTGACGAGAGCGACACAAGAGTTCCGGGGAGACCACTGGGTGGCTTCTCGGTCGG -AACACCTGGCAGAATCGACAACGGGTGGTCTTCAGGCCAGCGCGCTGACTTGATGCATTG -GAGGAGAGTCATCAACATCTGACACGCAGGAAGATATCCAAGTTCAGCCATAACATCGAT -TGAGGCCTGGATGACACGAATGCCTTGATCAAGCACCGATGTTTGATCACCAACATAATC -AGTGATGGGAAGATCGATCCGCGACATGTAGGCCTGAAGGAGGAGGAACGCCTTGGTATG -TGGATCCCAGAGGGGAGCATCGCCCATGCAGTCAATAGAAAGCGGCAAGTTCTGAGCTAA -CTCAGCATTTATCAGGTCTTCATTGTGTCTAACAGGCAGCTCGTCGAATTCGGTTGCGGA -ACACATCCAGCTCAAGACATCCTGGAAAGTCGGGTCACGCTTAGCATGGGACATGAGGAA -CCGGATCGTCTTGTGCGATAGGTAGTAGTAACTCATGATCTTGCCGAAGGGAGTGGAATC -GACCTCTCCAGTGGCCGAGTCCACGACAACACAGGACGATTCTGCGAGATCATTAAGTGA -CTTCCCAACCAATTCGACCATGAAATCTTGAGCTGTGGCTTGAGCCGCCATGGTATTTTG -TTCCTCGGCAGAGATCTCGAGGCCGTAGTATGAGGGGTTCTTGTGCAATCGACGGAAGAA -GAACGTCCAAGTCAAATAGTCTAATGCATCTTGTTGCGCCCCAATAGTTCCAGCAGATAC -CTCAGCGCCAAGATGGTTGTCAAGAACCTTGTGTAGGGTTGACTCGACGGGGAAACCAGT -GTGGAGGAAGTGCTTGTAGAAAGCCTTCTTGGAGTCCTGAGTGAAGATGCGCGCAATGCC -GGAGGTATCGAACTGTGGGCGACCAGCACGACCAAGCATTTGCAGAACGTCAGTCAAGTC -CATGTCCCGATAACCCTCGATCTTAGCGTCAAAGAACTGTGTGCCTTTGACCACCACAAG -ATGGGCCGGAAGGTTGACACCCCACGCAAGAGTACTGGTTGCAACGAGAACTTGAATCTT -GTTGTTGGCAAAGAGTTCTTCTGCTAGCTGGCGATCAGATTCAACCAAACCAGCATGATG -CAAACCAATACCAAAGTTGAGGGCTTCCTTTAGCGCATCATCTTTCACTCGCGCAAGGTT -CAACTCGAGATCATCTTCAGACATTCGCACAAATCGACGAGGATTGTCCTCCATACCGCA -GTAATTGATTAAATCTTTGGCGGTCAGACGAGTTTGTCGTCGAGAAGCAACGAAGACAAT -GACTGGCTTGTCCGGGGAGTGATTCTTGATCGCAAGGAAAGTTGGGCGATTCATCGACTG -CATCAAGGGACAGAAGCCACGCTGTTCAGGGAACCCATCGATGAAAATCTCGAGGGGAAC -CGGGCGAACTGAATGGCGGAAGTTATACAGACCTTCTTTGACCCCAAGCCAGTTGGCCAG -ATCAGAAGCATTGGCACAGGCGGTGGACATGCCCATCAGTCGAACGGAACCTTTGGATTG -AGAAGCGATATAATTCATTCGGGAAACGATGATTTCCAAAATGGGACCTCGGTCACCACC -CAGCAAGTGAATCTCGTCAATAATGACGAGGCTCACTTTCCTGACGTAGTCACGGGTTTG -CCAACTACGTGAGATACCGTCCCATTTCTCAGGCGTGGTAATAATGATGTCGGCATCCCG -GATGGTACGTGTGTCGGGTGTATTATCACCAGTCAACTCGACAAGCTTCAACCCCATCTG -GCGAGTGAGGCGCTTACGCCAATCTTGAACTCGCTCACGGACCAATGCTTTCATTGGCGC -AATATAAACAACCTTGGAGCCGGGCTTCTCACGGAAAGCCCACCACATTGCCAGCTCAGC -TGCAACAGTCTTTCCACTACCAGTTGGAGAACCAAGGAGAACGTTGGCCGGTGTGTGGTA -GAGAAGATGGAAAATCTGTGTTTGCATAGGGTTGAAGAATTGGAAGCGTTGGCCATACAC -CTCCTCAAGAATGGGATTCTTGAGAGCCGATATAGGAAGTGGCTGGAGATTGAGTAGATC -GGTATACACGCTTTCAGTGTCCGGGCGAATCAGGTGTTGGAAAGAAACCGGGCTAACGGT -TTCAGCACCCAGCCAGCGATCAGAGATCAGACGGATGTAGATTTGGCTGGGAAGGGGATC -GGATAGCGGAATGGTAAAATTGAGCTCATGATCAGCGTAAAGCTTTTTCCGGCTAAGGAT -GAAGTACTCGTGATGATAGATTTCTGATGTTTCCGAGTTCTCGACCCACACCCAAAACGA -TTCGGATGCTCCGTGGTGACGGTCATTCCATGTGAACTCCGGGAAAATAGAAAGCCGAAT -ACGAAGCACGTCGCGGTTAAGCGGCGCGATCTCGGTCTCAACACTAAGAGTTGGGAAATT -GTCCAGGAGCTTAGACAATGTGTTGCCCATGCGGTGGTTATGAACAAGCTGACCGATTTC -CGCGGGCTCCATTTCTTTCATCGACTCAATGGAAGTAGTGGGAAGCTTTTCGTCAAGGTT -ACGTAGGATCGGCTGGGGCAAGTCAAATTGACGGAATGGATGATCAAAAGGCCAGATCTG -TTTCTCTATGGACTTGCAAAGAGACAAGAGAACCTGGCATTGATAGCCCCAGCGACGATT -CAACGCGATCATGAACAGAGCACGGCAGATACGTGCCGCGTTTTGGGCAACATATCCAGT -ATCGGAGGCTAGAGCAAAGTCCTCAATCTTGGCGCGGGAGATATAGGCTTGCAACAAGAG -GTTTGTCTTCGCGTGTGGTGTGTCGTTTCCTCCGGCAACTTCAGTCTGCGCGACTTCGTC -GCGTAGCCGCTGAAGCTCTTTCGACTCGCTTTCTCTAGATTGGATGTTATCGAATTCTCC -ACTCATACTGATCATCTTGAGCACATCTGCCTCACCAGATCGGGGACGCATCATGTCGTT -GAAAATCTCGACACTGGTTTGCAATACATAATATTGACTGGCAATGCGGCCAACGTCCTT -GGATTTGAGATCTTCAGTCTTATCGTTAAAGATGATCATCTGACTCTTTTGTAGCACCCT -TGCTGCTTGTATGATCAACTGCCTCCGCCTTTGGACCAACATTGGGTCGTCACGCAACTC -AGCAAACTCAATTCCGTAGTTCCTCGGCTCACGTTTCATGCGCACATAAAGGTAGGAGTA -GCCAAGCCATTGAACAGCCTCGGAAACAGATGTGACTGTTCCAAGCGAGATTTCAGCATT -CAAATTATCCACAAGCCGGCTAGAGAAGCGCGACTCAATGGGCTGCTGCGCAGTAACAGC -AGATAAATAGTGGGTCAGCTTGTCATGGGTAGTACAGATAAAACCGATACCGGTGTCCTG -GAATTGTGGACGACCGGCACGACCAAAAATTTGCATAACGTCGAGAATGCTCAGGTCAAT -GAATTTACCCTCTTGTGGGTTATACAGCTGAGTACCTTTGATAACAACTGCGGCTGCGGG -GAGGTTGACACCCCACGCCAAGGTGGCAGTACAGCAAAGCACCTTGATTAGACCTTCTGA -AAACATTCGCTCCATCAAGTTGCGGTCGCTTCGAGACATTCCAGCATGATGCGTTCCAAA -CCCGCTAGCGAATAGGTCACGTAGTTCACGCGCACGCGCATGTTTCATGTCACGAAGGGC -ATTCGAATAGTTTTCGTGATCATGGCAGCTGAATAAATCCTCTCGTCCCTCCTCGGCGGC -TAGTTGCATAAGCATCCTGGCAGTCATCACGGTATCTTTGCGAGAGTGAACGAAAACCAT -GACTTGATGGCCTCTCTCCATCATATCGCGAACCTTCTCATAGGCGACAGAGTCAATGTT -GTCACGAGATTGCTTGGAGCCAGGCTTTCCTTTGACACCGATAAAATGTTGCTCAAGAGG -TACAGGTCGGAATGATTGGTCAAAGAAGAACATGCCGGCCATTTTGTTTACTTTCAAGAA -ATCCGCCACATCAGTATAGTTCGGAAGGGTGGCTGATAGACCGATGATACGAATCAGTGA -CTGTGTGCTCTCAACCTGACGTTGAGTTCGAGCAACTAAGGATTCGATAACTGCTCCTCG -CTCATCGTGCAACATGTGGACTTCATCAATGATGAGAAGGCGGACCTTCTGAACAAGCTC -TGTGTCTCCGGTGCTTTTCCGCGTTACAACATCCCATTTCTCAGGGGTAGTGACGATGAT -CTGGGTCTCAACGATCTCGCGCTTGGTCAACTGCATATCACCAGTCAATTCGCGCACTTT -GATACCCAGCCAGGCTAGACGTTTTCCGAGTTTCTCCGTCACCTCAGCAGCCAAAGCCTT -CATGGGAGCAACATAGACAATCTTAAAATCGTCAACCTGGACAGTGAATTCAGTCGCTTC -TGGGTCTTCCAGCGGATTGGGAGATGTGTTCTTGCCCACAGCATTGAGGACAGTCAACAT -GGCGGCATCCGTTTTACCTGCACCAGTGGGCGCGCAAATAAGCATATTTTCGTTCGTTTT -GTATGCAACGTCGTAGAGCAAACTCTGCATACGATTGAGGGATTTGTAACCCTTGAAGGT -ACCGCGACAAAGGCCATCCATTTCAGTAATCTCCACCAGTTTTTGGCCCTTGCCAAGCGT -TCCCACCTTCGCTGCCGGGATCGCGAATTCGGTGTACTTCGGTTCCTCGATCTGCTCGCT -ACCCACCGGCAGCCCATATTTCCTTCCATTGGCAGAAAGTGTATTTCCAGAGGTGTGTTG -CTTGAAAACATGCGGGTAGGAGGGTTCCGATCGGGTTTGGGCCGGCATCAACGCTGCACT -TTTATGTTCGTAGTCGCGCTGCTGCAGAGTATATTCTCTCTCTGCTCGTGTTTGTAGTCT -ACCTGCCGCAAGCCCATCTGTCTGGGATTCCAAGCGAGGTTCCTCGTCCGACAGGATTTC -GCTTCGGTGTGCAATTAGCTCAATAACGAGATCTAGGTCATCGAACCCAACAACCTCAGC -CAAAGACATTTGTAGCTCCTCGTCGTTGCTGTCAGTAGCCAGGGTGGCGATAAGTTGTTG -AGCAAGCTCTGTCGCGTCTATATTCGAGTTTCGCATGGCCAGATCATGGCATTTGTCTTC -TAGCCAAAACCGATTGTAGGAGGATCCTTGTGCAAGATGAAGTCCATCGAGTCCATGCAA -GTCGTCTAAGTCGTCGGAAGTTTCATCGTCCGAGCTGATGATATCCCAGACATCATCATT -TGTCCCGGGAGATGAGTAGTCGTCATCAATATCCAGATCAAGATCGCTTCCATAGCTGAT -CGATTCATGAGGGGGGTCTTGGGGGAGCTTCAGATCGGCAATGGCCTGACGCATGGCTGC -TAGCTGTGACAGCCACTGCGATTCGGTAGCTGCTATGCTGTCCATGTTGCTAGATAGATA -GAACTATTCAGTTCGCTCAACGATCAAGAAGTGATAAGCCAAGCAACAAAGAATGAGGGG -ATGAGCTGAACCAAAAGTAGTTATAGGGCATGCCCGCAGGGCTCTGCCGGTCGCATGACA -AGAGCGGTGTGATTTCCATGCCTCAGGCCACGATGTAGAGGGTCCAGTGCATCATTGTTA -TCGGCAACTCAGGAGCCTGTTTTAGGCTTAACACTTGTCACTTTGCTTTGCCACTCTAGC -TGATCATACAACTGACCTCCTACGACAAAGAAGCTGCCTGGGTCTTTGATACCAGCGCAT -TTCCTTGATAAGCACCTCCTACCACTCAACGCCCCACTACCCCCGAGACCTACTTGGCTC -CTTGTCCTTGTGCGGACAACCACGCTGTCACCATGAAGATTCTTTATATCGGTGTATGTT -TCGTTGCAGCTGTGATTTGATCGAGTATAAGCTTGGATATGGTACTTATCTGTGAAACTC -GCAGGTTTTTAGGAACGACACTCAACCAGCTCTCCAGCTATGTGGGGAAAAGGACCTCAG -CAGGTAATTTAACTTGCGCCCTTTGCAACCGACTGAATCATTTCGCTAATTATTACTAGC -TTTTCGCGCTTCACTCGTCAGAAGTATGCCCACCCATCCATCCTCTCTCGGTTCGACACG -GCTAGGTCACTAACCGCTAAGCGCTTAACAGCTACGATGAATTCTTGATGCTCTTCAACC -GAACCGTCGCCGAGAAGACAAAGCCAGGTCAACGGCAGGATATTGAAGAAAAGGCCTACA -CATTCCACGCTTATGGACGGACAGAGGGTGTCGCCGGTGTTATCGTGACAGACGGAGACT -ATCCTGCCCTAGTGGCTCACCAGCTTTTGGGCAAGATCGTGGACGAGTTTCTTGCGAAAT -ATCCACGGACTGCCTTTTCCGATCCCTCATTGCGCGAAAACGCATGCCCCTTGCCGCAGC -TTAAGGATTATATCGTCAAGTATCAGGACCCAAGCCAGGCCGACAGCATCATGAAGATCC -AGCAGGAGCTTGACGAAACCAAGATTGTCTTGCACAAGACGGTATGCTCTCATCTCTGTA -TGAGCTCATTTTACGTACTAACGGTCTCCAGATCGAAAGTGTTCTGGAACGTGGTGAGAA -GATCGACTCCCTGGTTCAGAAGAGTGATGGGCTGTCAGCTCAGAGCAAGATGTTCTATAC -CCAGGTATGTAAATCGAATTTTGCCTTCACCTGAATGTACTAACGTAACGAAATCCAGGC -GAAGAAGCAAAACTCCTGCTGTGTTGTGATGTAAAATGGGATCAGCGGTACATTGTGTAT -TCGATTATGACTGCTTGTTCTTGGAATTACCTTACTCGTGTAATCAGACGTCGAAATGAT -GCATGTCTTTGCTAATTTATCTGCTCTTATTTTTCGACGTCTATCTTTTGTGTCAAGTTC -TCTCGATGGGGAGGATGTCGTTCTCTTGTTCTGCTGTAAATTTGAATTGGGGAAGTCTTC -CAGCAAGTTCCAGTAAAGGAGTCACCGGAAGCTTATACATAGCTAAATGTCAGAGTTCGG -AAAATAAGAATCAGGAGTAGATCGCGAGCAAATATAGCTGAGCCTAGACGTGGTAGCTGT -AGTGGGTCCTGGGGGGTCTATTATGTTATCGACAGTGGCCTCCAGGGCTAGAGCCTTCCA -ATCTCAGAAGGCTGCCTGGCTTGATAAAACATCATTTGTTGCGACACCTTTTCTTTATAG -ATTTATTACAGCAAATTCCTGCGCTGCGTCTGTAATTTGTTTAGTGGGAGGGTATGCATG -GTCAATGTTCCAGGCAGAAGGGCTGCGCAGAATGTCGGCGACCCAAACCAAAGTAATCAA -GAACGACGAAGACGGGCGCCTAGTTCTTGGTACCTAGTGTCATAGGAACTCTGCTACAGG -TATCGGTCTATATTTTATCAAAGAGCGGTTACAGTGATATCGTGCTCGGACGGGAGCTTT -GTTCGTCTACATTTTGCAGCTCTATCGAGGAGCGCTACATCAAGGTGTATATATTGATGG -GCTAGGAACCGTATAATATTACTAAGTATACCTTTGTGGGTGCGCTAAATGTAGAGGATT -TTCGATATTTTGCGGCCACCAAAATAGTCATGCCAGCACAGACTGGCACAAAATACACCT -TATTTTACGGGCGATTCGATCTGTGCCACGGCATTGCTTGTGCATTTTGAACTAAGCTAA -CATATCAACAGGAGTCTGGGTTGACCATATCCATACCGCAGCAACGATGCTTGAAGCAGG -TGGCTCAGAGAGCTGACATGCCCAACTGCTAGAGGCAAGATGAAGCTCAATAGAGCCGAG -GAGAATGGCGATCTATTAAGAAGAATAGCAGCATGATTTTTGGAAAACTTCACAACTACA -CACCGGTCATCACAATACATCACCATCTCGGGCCCCGTTGAAGCAAGTACAGATCAATCG -ATGGGGCTAAGCGGAGAATGCGCAGGGGAGATGATGACTAATGACTAATTATCGTCGTGA -CAGGAAGTGGAGGCCAATCAGAATTAAGTCCAGAGTGGAATGGATCATCTGCCGTGACTA -CGATTGGTGAGTCTATAACAGCCTAGCTAGACTAGGTTAGCATCATGGTGACCCACATTG -AACTAACCTTAAAATGTACCATTTATTGGTGATGCATACAGGGAGGCAACACCAAAGGCC -CAGGAGGACATCAAGGCAGCGGCTTTGCAGTATGTGAATCTTTGTTAGCGATACGTCAAC -GCAGCTTTTGGAATTATACAAATCATCTAAAACAGCAATCGAGAATTGGGCACGTTCGAA -GAGTGTTCTATATATGTTCTATATATGTCATGATGAAAATACGTGAGCGTTATTCATGAG -TTCTAGTTGTTTCGCCAAGTAACAGCTACAACTGGATTCTTCCTTGGCTTGATAAATAAC -AGGGCTCACACGATGACAGAGTATAACATTGCAGATCACGTTGCTGAGTAATCTTTAGTG -GTAATTGAAGACCTATGGACATCTTCCTGGCTTGCACTGGGACAGGGAGAGCAAGATGGC -ATTGATGCAGATTTTTTAGAGGCAAGAGGCTCGTCTAGTTCATGAATCAACTCAGAATGA -GCTTTCAGACTCCATATATTGGCAACTACTCAGTACATTGATCATACGAAATGCTATCCT -TTGACAGAAATGTATCCTCAACATGCTAGTCACTATAGCTATCTATGCTGGGAACACCGA -GACAAGAACAGAGTACCATTTAAAGGCGGTCGGCAATTATTTCCTCATTAGAGAGTGAGG -CACCGCCGATCACCAGCCAGCGCCCGGGCAATCTAACGCGTGTCCAGAATGAGAGGTGTG -ACACTGTTCCTTTCTCTGAATGCATATTAGCACATGATAAGAGCAACGTTTCTGACGCAG -CGGGGTATAATAGGTCGGCAGCCAAAGGCTTCCTTTTCGGACAGGCCCTCATACCATGGC -CTGAGAATGGTGCACTATATGCCCATTTGGCCTGGTTCAAGCCAATCCCTTATCCTGGGT -CAAGCCTCTATTTGATCCGGATTTCCATAGGGACCATGCTATCTGCCCAGTAAACACCAG -ACACCCCAAATAGAAATTGGTTTCGATACTCTAACGCCCGACCCTGGCGTACTTCCAACC -CCGCTACCTCCACTTTTGGACGTTTGGAAGTTAAGGGATGGGGTGCTTATCTGAGGTCTG -AGCATTGGCGCTTAACTATTTCTGCCCCAAGCTAATAATATATGGGCTATTGTCAAGCAA -GGAACATTGATATAGTTAATTGGACCCGGAGAATAGGGGAAATGATCACCGAGGGTGCCC -TGGCCAAGAGTGGCGTCTCAGCTAAGCTCAAAAAATATATCTGGGCTCCTGTCCTGATTG -GCCACGAATGTCCCGATATCAAAATCTTACTTCCAAGGCTAGAAGTACAGAAAGTCTTAT -CTTTACCTTGCCGTACGATTGCATCTGGAGAAACTAGGCGACGAGCTACTGAGCAACCTG -ACTTGATGGGTTGCTTATCACAGACGAAATCCAGGTCAGTCGTTTATCTGGGCCGGGCTG -ATATATAACTAACCAGTCGCTGTCAATATATTTTCTTTCCTGAAAATCCAAAAGCATGAC -CGCTATGGACTCAGATAACGAGAAGAAGCCGGCCACTACCGTGGAGCCAACCGAAAGCCT -CCCTCCATCCAATGATGATGTAGAGGTTGGTAAAATTAATCCGCTGAAGCGGAATCTGAA -GAATCGCCATATGCAGATGATCGCTGTTGGTAAGCATCCTATGGCGGCTTTTCCTTTCAT -TCTGATATTGATTTCTGCTAGGTGGTGCTATTGGTGCTGGTCTTTTTGTTAGTACGGGGT -CCGCACTACGCACGGGAGGACCTGGTGCTTTGTTAATATGCTACTTGGTTGTTGGAGGGA -TGCTTCTTCTTACAATTCAAGCCTTGGGCGAACTAGCCGTGTTATATCCCGTGAACGGAG -CATTTTTCACATACTGCGTCCGGTTTATCAGTCCAGCCTGGTAAGTCTTCGGTGTTCATC -TACTAAGTCATGATCAATTAAGACTGACTACTCATTTTATTACAGGGGTTTCGCGGTCGG -CTGGGACTATGCTATCCAATGGCTCATTATCCTTCCCTTCGAACTTACGGCCGCAAGTTT -AACAATTCGTTACTGGAGTGAAACTTTGAATAGTGGAATTTGGATCGCAGTCTTTCTAGT -CGCCCTGACTACAATCCAATTCTTTGGTGTTCGAGGCTATGGAGAAGGTGAGACACATCA -TCTATATGTCTTTCCGGGATACCATCGTACTAACCTTAATACGTAGTCGAGTTTGTGCTG -GGAGCAATTAAGATCACTGCTGTCATTGGCTTCATTATTCTTGGAATCGTGATCGATTGC -GGTGGAGCCCCAAAAGGAGGCTACATCGGGGCTCGATACTGGAGTGATCCTGGTGCATTC -ACAGACTTTAAGGGGTTTTGCTCTGTCTTCGTGACAGCGGCCTTCGCTTTTGGTGGCACA -GAAATGGCTGGTCTTGCCGCTGCAGAGGCAGCCAATCCTGCCAAGTCCATCCCCAAAGCA -TCCAAGCAAGTCTTCTGGCGCATCATGATATTCTATGTCCTGGGAACATTTATTGTTGGC -TTGATTGTGCCCTCCAACGCCGAATGGCTGCTAGGAGCATCTGGTGCCAATACCAAGGCT -TCTCCTTTCGTCGTATCAATCAAGAACGCCGGTATTTCCGGATTACCATCTGTCATGAAC -GCAATTATTACTATTTCGGTCATCAGTGTGGCTAACTCTGCAACATATGCGTCCAGTCGC -ACTATCCAGGCTCTGGCTTCACGGGGAATGGCACCGCGAGTTATGTCTTATATCGACAAG -GGAGGTCGACCTCTTTACTGCATTATCCTGCAGATCGCTTTTGGTTTGCTCGCCTTCGTC -AACGAGGCACCGTCTGGAAGCACCATCTTCAACTGGCTTCTTGCCTTGTCTGGTATCTCT -GACTTTTTTGTCTGGGGTAGCATCTGTCTCGCCCATATCCGCTTCCGTTCCGCCTGGGCA -CATAATGGGCACACGGTCAAGGAGCTTACTTATGCTGCTCCCTTTGGTGTGATTGGGAGT -TATATTGGTTTGGGTCTTAATGTGCTATGCCTCATTGCAGAATTTTATGTCTCTGTGGCT -TCAAAAGATGCGGAGACATTTTTCATGAACTACCTTGCTGGGCCGCTCGTGATGCTACTT -TTCGCCGGCTGGCTTATCTATACCAAGTTGAATAAAGATCCGAGTCTCGACCGTGGAGGG -TGGTTTATTCCAATTGAGAAGATGGATATCGACAGCTTCATAAGGGACTCTACTCTTGAC -GTGGACCTTCCACCAAGAGTTGAGTATCCCACTTGGGGTGCATGGTTCAAAGCTGCTCCT -ATGCGTATAGCTCGATCAATTATTTAAGAGAAGGAAAGCAGCTTACATAGTTTGGGGTTA -GAATTGAAATCACAATATCTAGGTGAAGTCAAAACTTCGTATATCCACCGTGCAGACTGT -CGTTATGAATGCAATGTATAGATAGTCCAGCTGTACTGACCACTAAACATACAATTGCAG -TCTACCATCGCGCGAGAAAACAATAGAACACAGGGCCCATCGTATAGGATGTCCTCTGCG -AGGTCCGATCTGCGGGGATTACAACATGGGGTATTCCTCACCAATTCTCGTCCGTTGCCA -AATCATATCATAAACTTGCGATGACAGCACATCCAACCAACACCTTCATTCCTAAAAAAA -AACCCCCCCATAGTTCGAAAAACATCGATAAATGCCATATGATATACGATACCGGAGAAA -TTCGACGGGGAATACATCAATCATTCAGACCCTGAAAATGTAGAGATGAGGAAACCGACA -GGGCAGAGGAACTCGGACAGACTCTCATTTTCGGTGCTCAAATAATAGTGCATTATCACA -AAATATACAAGTCAAACAGGAACATCTAGCCATGGAGCGTATACCATGATTTTCAAGTAT -ACCTACGGTAACCTACCGAACCCATCGGTATCCACATTTTAGGCATTTCAATGGTGTTGA -CTATTATTTACACCGTAGGAGCTATGCATCATTGAATATATGTTTTTATTGGTAGAGCAT -GAGTAATTTGGTAGGGAGAGTACGCATATAGCTGGAATTTGATTTTGGTAATAAATGCCG -CGGCCGTTCGACCTCGCAGCAGCAGAAATTGTACCCAGACTGTAGGTGGTTCGGATCAGC -CCTCCAATCCCTCGGATATCTTGACCAATTCCTTCTCTTATGTCATCTCATATTGTAGTT -ACTGTCGCCTTCCATTGGTCAAAGTAAGCAGATGATCATCCCTCATTTTACAACCGAACT -CTTTTAGGTCCAATTTGCGGGCGCCTATCCGTATAGACTCACGTTGGCTGACCCCGGCAC -GAAGGAATCACAAAGGGACTTTGGAAAATCAAATAGAGCTTAGTGAAGCCAAGGCAGTAG -ACTTGAGTCATATTGGCCAATATTGATTTACGGCGTTCCCTCGATCCAACGCAAGGCAGG -CTACTGCAAGCTAACGCGAAAATCCCTCAGACGACGCCAATGCCCGTCCTGCGGTCTTAG -TGAAATTGCCGTAAATCTTAGCCGATCCACCTGGCAAAAAGCCACGAACATAGTATTTAG -AATATTAGAGGTTGTCTTGGCCGGAGGGAATGCAAGTCAGGCCGCTCGCCCAACCTATTT -GACTATAAAACACCCTTAAGACTGAAAATCAATCTACAGTGTGGGCTACAGCCATAGAAT -GATAAATCTGTACTCCGTATAGACCTCAGATCTATATATTGTACAATGGAATTGACTTTT -TCCCTCGCAGATCCATGGCCTTGGCGAGCTGTGCGGCTCGACACAGTGAGGATCGGGTCT -AGACTGAGAACCCATTGAGTACGGAATACCGGCATTGATTCTAACCAATCAATAATCCAA -GTAATATATTGAAATTAATATAGACACGACTTTTTTCCATACCAATAGGTGGGTGAGTCA -TTAAAAATTAGTCATACTCGAGTGTTTGGCCCCCAAAACTCATCGGAAAACACATCCTTA -CACCTTGCCGTAGAAAGAGACATTGGAGGTCTTTTTCCGCTCAACTTCCCTTGATTATCG -CAACAAGTGATGTACCACACAGTCATTCCTAAGCAGCCTCGTAAAATCCATAGAGGAATC -CTCCGATTAAGACGCAAATAGTTTCTCTATTTCGAATTGCTGGCCTGGGGAAGAACAGGC -TTGGCGGCCGGTGGGCTGATCCAGTAGACCGGATAACGACGCTGAATGTTGTTCACGATC -AAAGCCACAGCCAGGACCATAGTCGACGAAAGTAGGATAACTGGCAGATAGTACCAAGAC -AGTGCCCACACAGCTTCGTCCACTGCGGGAAGGAGGGCTGTGGCGCCGACTGGAGGATGA -GTTGTCTCGGTAATTTGCATAACCACAATGGCAACAGCGGACGAAAGCGAAGCCGCTAGC -CAGCGCAAAGAATTGAACTTTTCTTCATCGGGCATTAGACTGAAGAGTTTCGTGATGCAG -ATGCCGACGAGAGCGCTCAAAAAATGGCCGCCGACCAGAGCGCGTGGCTGTGCTAAAGGG -GCCTCAATAGAACCATAGACGAGAACTGCCGATGCTCCCTACTTTGAAATTAGCATTTGA -AAAGGACGCAGAGAAAAGATGAGAAGCACATAAGAAGCAATGATACCTGGCACGCCACGT -TCAATGAAGTAGGAAGAGTAATTGAATATAGCCTGTACCACACATAGTCCACAGAAAGCA -GCTACGAATGACCAGAAGTGAACGAGATATTTCGAGGGAGGCTTTGGTGCACTTTCACGG -TAGCCTAACCAGTGGCTGATCGACCGAGGGAGGCGAGCTAAGAATTCTTTCTCGGAGATC -ATTTTTGCCCTTCGTGGGTTGAATATGACAAAGAATCGAATTGTAAGGTTAAGGTAAAAT -GTGAAACCGAAAAGGAGTCAGGTCGAAATGAACAAAGCTAGATGTGAGGCACATGTTTTT -GGTCGAAGGGGGATTACAAAGATTATAAGGGATACCGGCAGACTCCTAGAGCTAGGGATT -TGACTAGACATTCGGGATTTGAAACTTCATAACATTTTCTCAAAGCCCATGGAAATTCAA -GTGAGATGACGGAGTTTCCATTTCTCTAGAAATCCGACAGCGGCGCTCGACTAGAGCTCC -GCAAAGCCCACAAGGCCCAAAACCCAACCAGACCCAGGCCACTAAAAGGTTGGGTTTTCA -GTCCTATCTTCGGGGTAGTACAATTTAATGGGGGCCTAGGGCCCAGAAAGCACCAAAAAA -AAATTTCGAAAAACGTGACATTTTTAATTACTACATATATATATAGATCCCATTCTTCAC -CTTTGCCTGACGGTAATATACTATTATTTTAATCAATACATAGTAGACTAATAGTTTAAT -TTATCAATCACGTTCAAGAACGCGAGAAAAGCGCGCTATCCAGAGTCTTTATGAGTTGCA -GATGTTTACCAAATCAGCGGTAGTGCTTTCGAATAGATTCGAAAAATATGTCAATATTTT -CGCGATATCTCGACCTCTAACTACTGATCCGATTTTGTTTAAAATTGAATTCAAATTGAA -ACATACACTTATATAGCTATTGTTATTCTAAATTCCAATAGATGAAGGTATATATAATAA -GTGGAAGGTTGGTATATAGCGATTACAAGATACTAGTTGAACCCCCTAAATGGGCTGTGA -TGGGGGCCTGATGGGGTAATCCTCTGGCCCGCACTGTTTTTTGCCAATCAAAGCTTGCTT -TGCTTAAAATATCCAGGCGATATTCTAGCGCCTTGGCATACATTAATCTATCAACTTACT -ATAACTTCACCTCAAGGAAGTACGTGAGAAAAGAAGGTAGGTCTTACGCAAATCACTCAT -CGAATGCAATGTGGCGTGCTACACGACTGGATGGCAACGTGCCCTCATCTTAATGTCTTA -TCTGTCTCACTGATGTCTAGGTATAGTACCCCGTGCGTGATTTGCCTTGTTTACTTGGAT -AAACAAACTTCAAGCTTTTTCTTCGACAACATCTGTAAAGCCTATCTACTTGGTCTCGAT -GCCATGCCTCTCCTACGGGCATCTGATTTCCATGAGAAGATACACACAAATAGTAAGGAC -TAAGACCCTGCCCTTGTATGGGACTCCATTGACAGATTGAGATAATTTTTGACTTTGATC -AATTGTTGGCTCCAGAGTATCTCCAGCGATAAATTATACTGGTACGTCCATGCCGTTGCA -CTTAAAAGAGGGAGTTGACATCGCTAACAAACGGAAGTCGCTTTCTTCCTAGAAACCGTT -CAAGTCGAAAGCAAATCAATGGATGCCATCTGTTCACAGTGCCATGCGAAACTCATTTTT -CTTGGGGCGGTTAATGCGCGCATCACGGAATCACAATTACATCGAAAGCTGGAATCTGGT -CTAAGCGTTTCCGCTGGGTAAAAAACCGAGCCACCCGGCAGTGTTGGTGTCGCCCTCGAT -GCTATCAAGGCTGCCTCCAAACCTCACATGTTTTTTTAGCGTCACAAGCAAAAAGAATAT -CGGCCGTGTACGGTAGGTATTGTAACTATAAGACGAGCGGAAGAAGGCCATGGCCGATTT -TGTCGTATGATAATTCGGATCGAAACCATCGAAACTAGGTAGTTGCCGCTACGAATTGGT -TGGGTAGGTTGCAAGTGGAGACGATGTAGTTCTTGGTGTGAAGATCGAGAGCAATCTCAA -TGCGGGCAAGCAAAAGTGCCTTCGCAAGGGGCGAGCGGTTCCAACTTAGAGTCAGAATCA -TCGATGCGTGTATTGACTGGGCGATAATTGTGGCATTTCTGGCAATGCCAGCAAATCCTG -TTCAGTATAGACGTTTTGGGGTGAAGACCCCAGCCAGTGCGCTCAAAGGGATCGGGTTGT -TTCGATAAGAAAACCACAGCGACTGGTACTTTGGAGTTTTCGTCGCTTCCATACAATACA -CAAATTAAATTCTTCGTGTTCCTTGACTGTCTAATGGCAATACCTAGGTACCTAGCGCCC -TGGTGAGCTGATCGAGAGTATTCACGTTTTTCGGCGCCCAATTCATTATGAGTTCAGTTT -AAATACGAGCTGGCCGTTGAAAGCCCGAACATCTCAAGATATGCATAAAATAGGGATTCG -GAAACCGTTACTGGTGAGATACGCCAATTGTCCGGTCGGGGATGACGTCACTTGGTCAAT -GTCCACCACAATTGGCAACTGGAAAGCAAGGGCGTACTTGCAATAATCAGTCAGAAGGAT -GCTTTTACATTTCAAATAATTATCAATATGTGAGATATCGGATCTAAGCCCACGTATGTA -ATGTGTTTCTGTTCGTGTGTCCCTGAGCAATAGGCTATCCGTTGATAGCCTATTGCCTAT -CCCGGAACACTGTATCCTTCACAGATGACGTTGGAAATCCCGTAGGCTATACGGAGCTGC -GTAAGGCGAGGTAACACATCCGTGCACTACCGGACATCTGTTCAGTCTCCTATATAAACC -TAAAGACACTCTGCGCATTCTCGAAACCAGACAAATCCTCTTGCTCAACCAACATTCCAG -TCCAACCAGCCCAAATTCCAATATGCAGGAACCAACATACGACCCCAGGTGAGTAATCTC -AAACCATGTTGCTGGTTGGATACTAATGAGACAATCAGCCGTCCTATGCCCTCGATCGAG -ATTGCCTTCTCCTACAAGCTAGTCGGCATCCCGAACAAGACGATTGTCAGCTTGCGCGTC -GAATTCCCACCCAACGGATCAACTCCCCCGCACCGTCATGGAGGCGCGAACGTTGGCGCT -TATGTGCTGAAAGGTACACTTCTCAACAAAATGAATAGCGATCCTATGAAAACCATCGAA -GAGGGCGGAAGCTGGTTTGAAGCACCGGGCTGTCATCATCGCATTAGCGACAATGCTAGC -AAAACCGAGCCTGCGACTCTGATTGCAACCATGGTCACAGATACCGAGGCCTTTGAGAGA -GATGGAATGGGGGCACTTATCCAGATTGACGAGGAGTACCGGAAATGAAATAACTGAGGA -TTGAAGTGATCAATAGCACACTCCGTGTGAAAAAATGAATAGAACTACATCATCAGCCAT -aaaaaaaaagaaaagaaaagaaaagagaaAATCTGTAGAGATGTTATTATTAAAGGGTCA -TCCACGGTGGGATCTATTAGAGCATAAATGAGCCCGCTTCCCTTAGACCAGGAGCGTCCG -CATCAAAAAAGGCACAAAAGTCAAAATCGGAGTTCGGCAACATGTCCAATGGCGGGAATG -CAGGTGATTTTGGTTCTGGAATGGGCTGATCCACAGCACCCAAACTCGCCGGGGGATCTT -TAGCAAGAGATAACAATTCTCGGGCAATGATGCCAATCTCACGGTAAGTCCGTTTGCCAA -GCGACCAGTGCTCACTGAGGCGCTTCAATGTACCCATGGTCAAACGAATTTTCTCTCGGC -TTTTGACTCGCTGGTCCCCTGAAAAGACGAACCTGCAAGCTGATAGATGTGCAATAGTCA -CATTAGCAATCATGCAAATGATGAAGGGCGAGTGGTTCTTCATATTAGTTGGCAAAGTCA -ATAGCTCATCAATGCTATTGATAGCCTGAGTGCATTTGAAAGTATGCAGATCACGTTCCT -GCACGCTGTTGCATTTCAGTTGTTCTGACGGAGCCAGCGGAGCACAGCGCGACACTGATT -CAATTGCGCTGTGAGACAAAGCCGACAGCGGCCGATGGATCTCAACGGTGTATCTAGGCT -CAGGTTAGATATATGGACAGCGACGAAGGGACTTGCAAATCTCACGTGTGCATAATGAAG -AATGCTTTGAACATGACCTCGTCAAATGATCCATCGGGACGAATTAATTGCCTCCTTTTC -GGAGACAGAAGAGAGTGCCATGCTCGAACACTAGTATCAGCGCTAGCAGCCATCGTAGTG -TATATTCGATCATCTGCAGTGTTGCCTGGAGACAACGCCCGATCAATTCCCAATGTCAAG -CCAATAAGCTCCGCATAGGAGGAAAAACTAGCTTCGTCTTCGCCAAGGAATTCCCTGTTC -TCGTAATCTTCTATGGAGCGTGGGGGTGGAATAGCCTAATGAGAAAGGTCAGCTAAGCCC -TTCGAAAAAGATAAGACGGTTACGATACCCCTGTTTCATACTGTTCTTCTTCACAAGGAA -AGTCGATCGTGATTTCAATACCGCTTGTTCGGAACGGGTACGTATGGGTAGATCCTGCCG -TCATTAGATATGTGGCCTGAGTAATAATAATTGAGAGCATACCGGCAATGTGGGCATCGG -TGATATAAATCACCCACCAAGTGCGGCGCCAGCTTTCTTCTAGCACAGTGTCAACTTCTG -CATTTGTTGAGGCAAATTCCTTTCTGTTCATCCCAAGGGCTACTGCCATGCGAATTGCTT -CGTCAAGGAGTTCAACACCACTTTCCGTTTTATTGCACCAGTATACAGCGATGGAATAAA -GCAGGAGAGCTTGAACGTCAAATGGGGTAAAGGGCCGGATCAGTGACCTTATTTCGGCAA -TTGAGTCCCGAGCACGTTGAGCCCATGATTCTAATAATTGAGGTTGACAGGAAATATCAA -AAATAGATCCGATGTAACACATCACTCGAAGAAGCGGCTGAATTTTCGGTTCGTTGAGAC -GAGTTTTGAAAAATTGGAAAGGCAATGCGCAAGGATGGGCTGCATGAAAGAACTCATAGT -ACAAACTCAGAAATGAGGATGCCGAGTTCGAAGATGATTCAATCTCCCCGTTGGTAGCAG -CAGAAGAGGATGAAGAGATAGTGGGAATTTGCAGTGATGAGTCGATGGGATTCTCATAGA -CAGCTCTGACAGGCGACAGCGAGGGGTTCCCCGGTTTTGGATTTCCACGCCGGCGTGATA -CAGGATAAACGCATGGTACTGTATTTGTGGCGCAGCGTGAGCAGACAGGCTTCCGGCCGT -CGCACCGAAGATGCTTATAGCGACATTGTGCGCACGAAAGAGAGTTCCGTGTTCCACGCT -TGGCAGGTTTGTTCCTATCTGGCGGGGTATTAGACATGGCTTTTGAGAATCTGACAAGGA -TAATGTAGTGAGAAAGGAAACAAGAGTCACATTCCGTGGGCTACTCCCGGGTGTCAATCT -CAGATTAGTCATGCAATATCGGGTTCGGGACCGTGGATCTCCAGGGGTGGCAAGGGCCCG -GAGTGTGTGGATCTGGGCACCGAGCATGTAGGCTTTGTGAGGACAAGCATTTATAAGTAA -TTCAAATAGTATTGAGATTGTATTGATTGCTTCATTTTGCCTTTTTCATGTACTTGGGTC -GCCGAGGTGATCAAATGCCCGATAATCAAGAACCTCACCTTGAACACATGGTGTAGAGTC -TCAGGCTGCAAATGATATTTGAATAACCTTCGCTGACAGGGATCAGCATATCCCCAGAAG -AGTTCAGATTTAGCAGTTGATCATACTATTGGGCCAATATTCAAGCCATGATCGCCTGAA -GGGACGTTATTATCAGGTGGAGAGAGCGGATCTTACATTGATTTCCAAGATCATTGCTAA -GACTTTGTCTTCAGCTCTTAGCCTATCCAATATAGACAGAATACTGAGAACTTAAAAATT -AAGTGGTACTTTTGATGTCTTCTTGCTCATTGTATGGTCTCGTCTTATACCCTCAAACAA -CTTGTTGGAAGAGCAATCATTCTTTCATGGTCGGATGAAGGTCTTCTGTTTCGCTCGTGA -CAGCCACGTCGCTATGCCAGCCTTTGAAGTCCCGAGTCCGCAATCTCAACGAGAACATGC -GATCAATCTCCATTGCCGTTCGACCTTTCATTTCTGGGATACCCAGCCACACTACAATGA -CTGCAACTGCAGCGAATCCTGCCATCACGAACCCTGTCTTCGCCCTCAGATTTCCCTGGT -CTGCATTGTAGATATACGGCAAAATGATGGCAAATACTGCAGTCCCAAAACCGCTGGTGA -ACCACCCAATACCTTGGGTCTTGGCTCGGAGTTGTAGTGAAGACGCTTCGGCTGCAACCA -CGTGAGACGCAGGCCACGACCCAAGACCAGAGATCATCACAATAATCATCATGCTAACCG -CGGTGTACCTAGTTTAAGGTTAGCTTGTCATTCTTGGGGTCTGATATAGTGTCTTACCAA -ATGGTCACGATTCCCGTGAAGATCCCTGCAATCCCTACCGCGAACCACATTACCATAACA -ATATTCAAAGTGAGTAGAATGAGAAATCTTCTTCCAAAGGCATTCAGTGCCCATAGGCTG -ATAACGTTCGCAATGAGGCCCAATCCAATGCCCAAGATCAAGAAAATCAAGCTATTAGCT -GAACCCATACCCACGATCTGCATAAAGTAAGAGGAATTTGCCAGCAGCTGCAGTCCGAAC -ATTTGAGGTATGATGTTGGCAAATGCGACAATCAAAGTCCGTCGCATGTTGACTCCTTTG -AAACAATCGGCATATGTATGAGTTGCACTCTCCTCGTCCTCTGCTGCAATCGATGCCTGT -AACTCGTCAATCACAGCTTGCGAGTTGATGCGAGATGTATCGAGTCTTTTCTGTTCCTTG -AGAGCGTGTTCCATTCGTCCCTTTCTTAAAAGCCAGGTTGGACTCTCTGGCAAAAATGCC -GCTAGAACGAACGGGATAGCCGAGAAAGGCCATTGCGACGCAAAGCAAATGCGGTATGAT -TCTGCGCCTGTATGTTTAAGCGAAGTATAAACCACGATCGATCCAACGAGTTGTCCAAGA -AGTGTGAAGATCGGATAGAACGCAATGACCGGACCTCGGAGCGCAACAGGAAGGACTTCG -GACATGTATGTCTGCGTGACACACAAAAGAATGCCAATACAAATTCCCTGAAAGAGTTTA -CCAGCGAAGAACACTCCTCGACGAGTCTCCATTTCGTCTGGCAGATCAGAAATGTAGCAT -ATCGCGACCGCAATGGTGGATAGGAAACTTCCTATAGCGAGTGTAATCCGACGTCCACGG -CGATCTTGATAGTAACCACCGACGACAGAGCCAAACATAATGCCGATGCAACTGGAGACG -TTCCAAAGGGAGAGCCACATTGTTGGGATGATGTACTTTCCGTGGTATTCCTGGCCGAAA -ACAACTCTGTCGCGCGTCAGCAGCGACACGCTCTATTCTCCTGGGGGTACATTATTTACT -GGAACTGAGGCATTGCAGCCACCGTCGCCACGATGACTAGGTCATATCCGTAGAGAAGGA -TTGCCGAACTTAGCGCGAGGCAATAGCCCACGATTCTGGGGGATTTCTTGATCGATTGCC -AAAGTGGGACGTGTGAGGTCTTTTGGGTTGTATCAAGGACATCACCACCCTTCTCGTTGA -ATTCTTTGGTTGATTGTGTCTCTGTGAGATTCATATTGTAATTACGGACTTAGTGAGCGC -TTTGAGTGACAAGGGGGGGGAATTTCTTTGTCTTAAATTTGGAGATCAACCACCCCTACA -GAATATTAGAACAAAGAAACCCGTATAGCAACTTTAAAAAGGGGGGAAATGGGGGAGCGC -CGAGCTATGTTCAATTTCAAATCCATCCCTAGGCATGCAATGAAACGGACGGAACAGCGT -CCTCGAATCGAATATCTTGTTTTTGAAAATTCCTCAGCACCGACCGCAGTGTGTCACAAC -CAACAAGGATATTTGGATAGTTTCTTGCTCCTATTTGAGTACGAGGCGTTACCCCCGCAC -TGCCAAGTGATCGTCTATGAAAGAAGACCATCGCTTCGACGTGACGGGCTAATCCCTGGT -CTTCCCGCTATAGTGTGATCATGTGCCTAACTCCCGTGTTCGGTATGTATACAGCCATCA -TATAGGTCTTCACAGGGGTTTGTGCTCGCCTGCCTCGGTTTATAGTTAGGAAGGCTGATG -CCGTCAAGGCTGATGCCGTCAAGGCTGATGCCGTCAAGAGAATATAGACGCTGCATGGGA -TGGGGGTCAATCTTGATGCAGCAACTTTTATATAGGGGATGCATTGGAAGGGTTAGAGTA -GGCGAGTATGACGTCAAATCGCGACGTTAAATGTTTACTTCGAGGACCAACTACTCCCAT -TTTCTTATCATCTCGCAGGGCCCGGCCTCTTGGAAACCTTTGATAACATACACACTGGAG -CTTCAGGATCGAGACTGAACGGTGTGGTTGAAGGGGTGTGTTAGAAATAGGCCGAGACTT -GTATTATGCCTTATGTCAATGCATCCAGTTCAAACAGTTTGGGAGAAATTAGGGCTGTGT -GCCCCAGCCTGGCCTAGATATGCGGGTCACCACATAATACCAGAGGCAATCTCCGTTGGC -CGCTATGATCCAGAAACTTCTAATGTATTTGTGCTTCGCATTGATATTGTCAGTCGGAAA -CAAGTATAAGGTATAGTTGGCTACACGTTTGGAGCAAATGAGCTTACCGAATCAAAATTG -GTAAGAGCAGCTGGCAAATAATTATCTGTATTTTTTGTCATAAGCGAGCGCCTCTAGTCA -GTGAACTTCTTTTCAGATAGGCCGAATTAAGGCATATGCGAGGATCTAATATGTAAATCA -CTTCCTTAAATCGTCTAGTTTTCCTTGGTCTGACCTGTGACAGTTCTGAGTCCAATCGAG -TCAGACCCCGATCGTGCCTCTTTCATTTTCTCCAAGTCCCCCAGATGTTGTTTCAATGCG -GAAATCTCACGATCTAGCTCAGCAGATGACGCGAGTCGATATTTGAGCTTTGAAATCGCA -GCTGAGATTACAAGCAGTGCTAGAGATGTGGGGTACGCCCACTGATCGACCTGACTTGGA -CTCTCCCCTGGAGGTCCGTCATTATAGATTTGGATCTCAATCCAAACGACAAAGGCAATA -ATGGTGAGAGGACTCAATATCATGCTGAAGAATATGCCAAAGAGGATCGCAGCGTCAACA -GTCACGTGGAGAAATGTCTTGAGGGACTGACGGCTGAGTATGTGTCGCACTCGACGCCGG -AAAGCGGTATTCTCGTTTGTCGGCCTTCCGTTTTCATCGTCGTAATTTAGGTTGGCTGGT -AGCTTCATGACACCACTAACCTCTTCTTTGAGGCCTTTCCACATATTCTTTCGTTCTTTC -CAGATTACCACAATCTGCGCTGATGGGATGCGTGATCGATAAGGTAGGAAGCGGTAAGTC -AACAGGAGACAGTTCAGGATCAGACAGAGAACTATGAGACTGTATATGTATCGATGTGAA -TACAAAGCCTTTTGCCAGAAGCTATGGGAGGCGGCGACTTTGAAGTGTCCATCTGCAGTG -GCGGAATGTAATGAGCCTGGCTGTCGTAGAATTTGTGTTGAATTAAAGCAGGGATTGAAA -CAGATATCCCCTAGTTGATTCCATCGACTTATGTTCGAAAATATATCCCACACACTGTCA -TTCCATGTGGATGTTCTCCATGAGGGTTGCCAGCCGTCATTTGATGTGTCGTCGAAAGGC -GCGAGATCGGGAAAGCAAAACATGTACTGAGGTAAGGTCTTGGGTACTTTTGCAGCATAC -ACGAGGGCACATATAGAGCCAGCCAGCATGAGGGCATGCCAGAGATAAAATAGAAGGTTC -CGTGCTCGGTCATGTGTTAGGATGCGGGACCAGCATTGTAGAGGCAAAAAGACGAGATAG -CCAGTGACAACAATGGCAAGAACAGCATCTGAATCAAGCTCAAGTGATGCGGGCGAGACC -ATGACATGGTCGGTCTCGGTGACCAAATAAGGAAATAATTTCCGTAGGCTGGAGGAGGCG -GGCAGATATGGAATATCGACAGGTCCAGGAACTTGAGGCATTTGGTAGCTAGACACCATG -ATAAAGGCGTGGATTGCCGCAGTTCCTGCGTATGTTGCGGCACTTCCCAAGAATACGTTC -ATGACCCAACCTGTCCATCGAGTGACACACGTCGCCAGCATAAGTGCATAGAAGAGATAT -CGTGGACATTGATTGTATATTGTACTGATCGGATAGAAGCAGAGAGTCTCCTGGGCAATG -AACAAGGGGGAAACCGTGTTGTGATCCATTGGGGAAATGGAGGTGGGTGGAGAGAATGAG -AAAATCTATTTTCTGGATTCTCGTGCTTTCCCTGTTTACCTTCAGGCTGCCTCTTTTCTT -TTGGGGTTGACATCATACCTATGTATATGTAGGCATACTTTCCAATCCTCAATTTACCAT -CCCCTACCTGCCAATATCCTATCACAAAAATAAAAATGGAAGAATATAATACGTGTGGCA -ATGCCACATCAGCCACCATGTACGTCGATGCAAAAAGATAAAAGAAAGAATTAGAGAGAA -ACTTTGGACTTTGGTTCTTCTACACAGAGTATGAATAGGTCTTCGAACTTGCAATTTAAG -TAAGCCATGGCATATAGGTGTCAGATAAAATGGTCACTCAATACATTCTAAGATCATTGC -GCCCCCTCCAACCACGAATATATTCATTCAAACAGATCACCAATTTCAAACCCGTGCACC -TGTTCTTCAATAAAAAATCATGGCTACGGTCAAAAAACCACTCATTGCTTCCTGTCCTTG -ATCTCATCCACAATAAACATCACGAGTCTTGTGATGGGACCACCCCGGCGCTTTGGTCGG -GCTCTCTTCTCGCGCGTTTGTCCCATCAGACTGCCGCTCTCCAGCTCCACCAAATCTTCT -TGTCGGCTCGAGACCCTAGGCTGCTGAGTGTACTCTGCAGGTGGTCGTACCTGAAGTGTA -AGGCCAATACCCAAGCATACAAGAACGACCACCATCTCCTCTAGGACAGACATCACAAGC -TGTATTGTGACATTCCCTGCTAGCATATTGAATGCGGTTTTCTGACCGAATGTATAAATT -AATGAATAGATCAATCGCACGAGGATCAAGGGCAGAGAGACTCCAACGGCCAGGAGAAGG -CGATGTTCTCCGTCCTCGATGCTTTGAGTTCGTTTCGAAATCAGAAGGAAGAGTCCGCAA -ACACCGACCCAGGCAACAAGGTACAAGATAAGCCCGACTTTTGTCTTTGTGTTGACAAAG -CCATGGGTAATATCCGAGTTCGCGGTTGCGCCGACGATGCTTAGAATCACGGCAACTAGT -GCAACTGTTCCAACAAGCCGGAAGATTATTGGAGGGAGAGGGTTGGCGGTCTTTCGCATG -ATAGAATCATTTCTAAGGGATGTCATTAGCTACAGGTCAAATCAATCGCAAATAGGGGTA -TTGCTCACGCTCGTGATAAAAGGCCGATGCAAGCCAAGATCAATGGCGAAAGTCCAAGCG -AAGTACAAACAGCCCAGGCAATGTATAGATCTGCAGTCGAAGGATAGTTGATCGTTGCTA -AATAGCATGCTGAGCCCACAATGCGGGCAAGACAAAAGACGATGAAAAAGAACCAGCCAG -AGCTCTTTTTGAAGCCGTGATGAAATGCCATCCAGAGGGCAAGAAAGAGGGTTGGAGAGT -AAACAACCATCTCGAGGATCGAGATACCATGCCGATAGTCAATGGCCATAGTTGCAAATA -AATATTAACAGTATTGGACCCACGAAAGCAGAAATGGAAGCTTGTATTACCCTCAACTCA -TGGGCCATTCATTTGCTATTTAACTCTTTCTATGTCATTTCAGCTCGCGTTGATTTCCTA -TTCAATTCCATAAGGCAGTGCAGTAAAATTTTGAATCTCATAGCCACGCCCTATATTGTA -GAAGTATAAGCAACGCGGATACTCCACTTGAGGATATGACTGGCCAATCGCATCTTCTCA -GGTACATATTGCATACTGGATTAGGCTTAGTGGATTGGGTTGCTACCCAAACGGGTATGG -AGGTAGCTCAGGCTAAGCCTCTCTGAACGCCACAAGGCCATTGCTTGGTGAACAATCTGA -CAATCTGACTGTGTTTAAGCTCATATACGCACAAAATGATCTGCAGAGTTTGATGAGTTC -GACTTTCTTCGCCCTTATAGGGAGGCTGCGTCTTCTCAGGCGTCCTAGCGCTATCACTTT -CCATCTCTTCGTTTTACTGCGGCCGCAGTCTCCACTGTCGTGATCGTAGGGACATTCTGG -CCAGAGTGGGAACTGTCCGATGGCCTGAGGAATGAGGTACCCACGGTCATAGCACATATC -AGTAGTCAGATCCTGCTCTCACTCACCTCCctctctctctctctctctctctctcCATTT -CACTTCCCCTTCCTGTCCGCCTGTTTCAGCAGCATATACACACACATCTCTTTTCCCATA -GATATCCCTCTATGCGATGATTCCCAGAGAGATATCTAGCGTAAGAATATTTTAGCATTG -CAGACCAGCTCCTTACGATTTCTTCCAATTCCCCACTTGCTCCCCACTTGCTCTCGCTTT -TCGGATTAAGAATCAAGTTGGAAGATTTGGCTTCCGACCGTGGTAAGTCTGTAGTAAAGA -GGCTGCCTAGGTGTGAGTCTGCGAAATAGTGAGATGGTATCACTATGCCCGACAAGTGAG -GAGTAGGAGAAGGAAAAATGCCTCCGGACAAGCCGAACTAGCTAGCTAGTATAATTTATC -GGGCTGGAAGGTAACCAATCAAGGGGCAAGCAGTCTCATGTAGCTATTTTGTGTTGGATA -TCTAACTACTCCTGATGAGAGTCTCTAAGTCAAATAAACAGAACACAGCCATCAACAGAC -TTCGAGTTACCGAGAAAGGTCTGATCAGTCTATTTGATTTGCCGTCTTCAACCTCGTCAC -CTGCAAGATGAAGGGGAGAAATGTAGACGCGAGAGACACAGGTGACTTCAATTAAATCTT -CGCTTGCTCTGGACAGGCTCTTGTTTTTTATCTTATATCCTACATGACTTAGCTTGCTGC -CCCTAATTCTAGGAAGAAATGTGCAAAGTAACCTTCAGGATCAGTCAGAATCACGAGCTT -GATGATAGCCTTGTTGTATAAACTTGGTCTCATGTGCATAGGCTGATCGTAATAAGGGAT -TCAATATTGCTTAGTACAATTTGCACATTCTCTTCACCAAATGGCAACCCTTGTGTTCAG -CACTGAGCCTATTCCCCGTGAAGGATTTCCCCGCTTCCCGACGAAACGGTAGAACGACCG -TTGATCGACATCCTAGAACGATGAAGATCACCGCTTTCTTGGTCTTTTTTAGGTTCATTG -CCGAAATCTATCGTATTTCTACCGTTTATCGTATCTCTACCGTTTATCGTATTCGCGTCG -TATAAGTCTGCCGAGGCCACTCTCTCTGTAGATCAACACGCATCAATGAAGGTATAAATA -CATATACGCTTGATCTCTTGAATCTCGTAGTTCCTGATCATCACAGTCGTCGACTCAATT -GCTCTTTGTCTTCTAACATTTGTCACGATATAACATATCAATTATGCTATCGTTCGCTGT -GGCCCTAGTTGGCATTCTGGGACCTGTTGGTTCTTTGGCAACTGATCCCGGTCCAGCAGC -TTGTTCCAATATCAGAGCGCAACTGGGATCATCCAAAGTCATTACCAGTAAGCTCAGCTT -GAAGTATATTACTAGCACTGAGTACTGGAGCACGCCCCAGGATGCCTATAAACCATCTTG -TGTCGTATTCCCTTCATCCTCAGAGGATGTCTCTATCGCTCTCCAAGCCATCCAAGCAGC -CGGCAGCCGGTTTGCAGTCAAAGGAGGGGGACATAATCCTAACAAATTCTTTTCGTCCAC -CGACAAAGGTGTGCTCATTAACTTGAGCAACCTCTCGGAAAAGTCCTACGATCCAGACAC -CACGCTGGCAACATACGGGCCAGGTGGTGTTTTTGGCGACATATACGACTATTTTGCGAA -CTATAACCGCACGGTGGTTGGAGCTCGACTCTCCGGTGTGGGAACCGGTCTGGCTCTGGG -TGGTGGCATGAGCTATCTCTCTCCCCAATATGGCATGGCCTGTGACTCATTCAGGGAGCT -GGAAATTGTTCTTCCTAATGGAAGAATTGTGACAGCATCTAACACTTCCAATCCCGACCT -GTTCTTTGCGTCGCGTGGCGGTGGAGGGAATGCATATGGAGTTGTAACGAAATATACGGT -CCAGAGTCGCCCTATTGGCCAATTCTTCGCGGGCAATATCATCTATGCCTTTCCCCAAAA -GGATACCGTTGTTGAGGCAATTGGCAATTTTATCCAATACAACACTGACCCGAAAGCCAG -CATTATCGGCACTTATGAGATGCTGCCCACCCCCGACGTGAATCTGAACCTGGACGAGGT -CATTATCATGTTCCTAGTCTACGATGGTCCTGACGCAGGCAATGCGTTCGCGAACTTCAC -GAGCATCCCACATCTTCTCGATACTACTGGCCTCAAGTCCTACCCCGATGTAGCCGACAT -GCCACTTCCAATCGCCACCGAGATTTCACGCGGGAACAATATCTTCCGTGTTGGGGTCCA -CCGTGCCAACGATGACGAGTGTAAACATGCCCTCAATGTGTGGCGCAACTGGGGCGAGTT -GAATAAAGGCAAATACCTCCTCCTTTCGCTAGATTTTCAGCCTGTCCCGAAAAGCTTGAC -AGATGCCAGCAAGGCGCAGGGTGGAAATGCAATGGATATGCCCGACGGTCCATGGTTCTG -GTTGAATTATTTGATTACAACGCTGCCGGGAATGAGTCAGCATGACTATGATGCCATACA -GTCTAGCTTCCATGATATGGTTGCGTCGACAACCAATGCTGAAGGTCTTCCATTGTTCAT -CAATGATGCGAACTATGATCAAAATCCTCTGTCGACCTTCTCTACATATGGCAAGCTGCA -AGGGATCAAGAAGAAGTATGACCCAGATAACTTCTTTGCTGATAAGACAGGGGGTTGGTC -ATTTGACTAGATGTAGTGGGCTCAGCAAACTTCCATTTATTGGTGATTTCTATTGGTGTT -TGAAGGATTATACTACTTCAACGTTCAAAATACTTGTATTTAATCACCGTATTATATTAC -AAGAGGATAATGCGCAAGAGATGCCCAGGTGTCATTTTGTGATCATTCTACTAGACAAAT -TCAATGAGCTGCCAATCTCGTGATATCAATACCATGGCCTCAATCCCTGGCCTCGGCAGA -AATCAATAGAAAGTACGGACGATGTCCCTCATTCCTCCATTCTGGTTGGCCTTCGACATC -CCTCTTTGAAGGACTCCAATCCTTCAAGCCGGTCAACACAAATCCGTTCTCAAGCAAGGC -TGTGACATAAGTTTCAACAGTGCGATGATATTTCCTCACTCCCTCTGTCCCCAGCCAGTT -ACTCAAGCGCAGACCCTCACTGCTATAAGAATTTAATGGCCATACTATCATGTCTTCACC -TGCTTCCTGTATCACCTTGAACCCCGGCCCTGGATTGACTAGGGCAGTTCCAACGGGGTG -TTCAACAGAAAAGATCAACCTTCCCCTTCGCTCATGGCCACCACCCTTTTTCAGACAGCC -GTGGATTTCCCGAAGAAGTCTTACAAAGTCCTCGATGTAGTGAAAGGCCAGAGAACTATA -AACCAGATCATATAACCCTTGCTCAGGCTGATGCCTCAAATTGAGAGTCTCCAAATCGTG -GATTTCGAAGGCGACTTCTGTGGCCGACATCGTTGCCGTCACGCTGTTTTTTGTTTGGCG -GTCAATCTCAGCCTCAAACTCCTTTGCTCGTTCAATCATGTTTCGGGAAATATCGATTCC -TTTGACGTAACCAGCGCCTTTCTCACGGGCCCAGCGAATGAACCAGCCGTAACCACACCC -TAAGTCGAGGATTCGAGACTCTTTCAGAGGTTTTTCTATTGGCCCTGTGAGGGAGGACTT -CGGGTTCAGAATCATCTCCTCCAGAATTGGCCATTCTGGTGCGGCCGGCAGACCATGTTG -AGATCGAGGGAGGTTTCCGTACGCAACGAAGAACTCTGGATTGTCATAAATATTTTGGGA -TTTGACAGGCATGGTCGCACGGGTAAACTCGGTACGTGGCTGGCGGTAGCTAACTAGGTA -CAACTCTGTAGTTTGATTGCTGAGGAAGAGCGAAGAGGGAATAATATAACAAGAAGAGCA -AATCTTCAATGAGCCAGGTAACAAATTACCCTCAAGTCAGTCCGATGGTGTCATGAAGAC -TTGGAATTACGAAGACTACAGATGTATAGAATTGGCGGGATTACCCGCTCCGTATATTTC -TCGATCCAGTGCCCGCTCTATTTTCAGTGCATATTGAATTTCGGAGTGATCTGTTTATTT -GATTAGCTCGTCTAATTTTTCTTGCCTTTGCCGAAATACTCTTTAGAAGACTTCCAAATA -CCCCTGGTGCCGATCGCAATATCTTGCCGTCCGTCATAAAGTGGGAAAATCACAACCGCC -CCAAAGGCAGCGAACTGCCAGATGATGGCGACAACTACCCATCCCGCGAAGAATGATTTT -GTGAAGATATAATCGCGATATAGTGGAAGAGGCCAAGCAACAAACGTGATCAGCACAATG -GCCCCGAACATATACCAAGCAATTTTAAGCCATCTGTACAGCTCTTTCAATGTTTCGTTG -TCGAATGGGTGCTGAATGTCATCGAGGCTGATTTCCACAGAAGAAGGCGACAAATCTTTC -TCCACGGCTTTCTTTTCGTTCATGTCAATTTGCTCTGGATCGTTGTTGATGCTTTCTGGT -TGACTGTCGGCAACCGGTTTGACAGATCCAGCGATATGCTTCGAATCACCTGAATGGTCT -TCCCCACTGCTCTCGTTCAGTGTGCTTGTATCCGAGGGCGATTGGATTTCTGCTTGATCT -GCAAGTTCGATTCGGAGGAAAACCCTCCAGTCAAACTTATACGGCTTGTAGAGTGAGAGA -ACAACGGAATAGAGAGCCGGCGTAAAGAAAGAAGCAATAGCTGCATATAGTGCCGGATAT -GGGTTTGTGGTAGTCGTGATGTTGATGGTTCCATACATATATTTGGTTGTTGCCAACCAG -ACAGCAAGACCTGAAAAGAAGCCAAGCACAGGAGAGATAATTGCAGCTATCCGCGTCTGT -CGAGACCAGAACAAAGTGAGAATCATAGGAATGATCCCTGGGCATGAGATAATAGGTCTC -AGGTACCCAATCCATGTCATGTTTGCCCCTCCGTAGTTGAGAGCGATGGAAATCCCGGTG -ATAAAGACACCATGAATGACCACAGTCAAGTGGCTCACTCCCACAATTTGCTTGTCACTA -GCTTTCGGGTTGAGGTATGTCTTGTAGATGTCGTAAGACAAGATGCTACTGACAGCAATC -ATTGAGGAGGAAACGGTACTTGTCAAAGCCATGAACAAAAGGAAGAAGAATCCGATGATT -CCTTTGTCTCCGATCAGTGCTTTGATGGTATATGGCATCACCATGCCTGAACTAACTTCG -GTGGCAGTAAGCACACCAGGGTAGGTAGGAAAGATTGGTGTATCATGAATTGCCCTTGTT -GCCAAACCAATGACTGTGCCTAGACCCCAGGGGATACCAAAAACAGCCAATGCAGCAATA -TTGTATCCAGGTACAGTTGCCTTGACCTCTGTCGCAAAGGACTTTTGCCAAAATGCAGTA -TCCTAACGATAAGATATTAGCTTTCTGAAGAAAAAAAAAAGGGTCTGTCTGTCCAGAGAT -CTTTCAAAGATCGCTTTGGGGAATTGACTTACCATAACGACAAGTGCTAGATTTCCAAAT -TTGAGAATCAACCCCCAAATAATCGCATCCTTGGACTTGAACGTAAGAAGTGAACCTTCA -TAGTTGCCCGGGATATAGTTATCAGCAGCCGTTGCAATCACCTTATCATACAAACCACCC -AACCCACCGACCGCCGAGTTTGTCAAAACAGCGATCGTGAAATAGATGATGAGAATTAGG -GCAATCGCTGTGTGCAAAAAGTCGGTCAGGAATGTGGCCTTCAAGCCGCCCACGGCTGTA -TAGAGGACAACTATAAAGGAATCAGTTTCTGTTTCCCGACACAAGTTTATAGAATCAATT -GAACGAACCTCCCAATGGAACGAGGATAGTTGCTGCTACATAGTGCATTCCAGAAACACC -ATGGATGAGTTGAGAGCCCGTCATAATCATCGAGGCGCAGCCAAAAACATTGTTGACGAG -GTTGAGCACGATAAACACAAGATGTCCAATCTTGCCATAGCGCATCCGAACAACTTCCAG -CGACGTGTGAGCATAAGGGACTCTAATCTTGGCCATCACGCCAAGTGCTGCCATCAGTGC -AATTTGGAAGCATAGGCCCGATCCCCACCACTATGATCTGTCAGCCGTGCAACGTCATTT -GAGGGCACAAGGGAACATACCAGCGGCACTGCCAGACCATACCTATATGTCATAGCCGCA -CACAATACTGTCTCGTTGATCCACATCCAGGAGCTAAAAACTGCAGATGCACACAATCCT -CTCCCTACAGAGCGATTGGCGACCATGAACATTTCCGACTTGCCAGAATCTTCAGCAAGA -TATGCTTTCTGCACCTTGACGGCAATCAAAATGACACCGCAAAACACGACGCTGAGACCA -ATCAGAAGGCCATAGGCCGCCCCCTGAGGGATCAAGGCCAACTTGGACGTATCTACCGTG -TTAGACATGTCTGGGCAAAGTAAAGCCAAGTTCAAAGGCGCAGGGCCGAGAAATATATAT -TATCTGCAGGGTCCATCATATCTTCAATGCCAAGCCTGCTCGTTGTAGATTAGCAGCCAA -TTGGCGTTCTCTGAAGGATTGCGTAACAATCAAAGTTGCCATGTGGGCGATATATCTCAC -GTCGTCCATATCCGCAATTGCGTTATGTGGAATGAGTAGGAGATATGCGACTCAACAGTC -TAGGCACAAGGCCATTGGAAGATTGCAGCGCGAATAAGATCAACATGTGGTTGCTGAATA -CAGGTGAGTTGTGAGGGGGATGGTTAGGATTGCGTGCCTCAACTCCCAGATGACGATTAA -ATGACTTGAAATCGCGAACAGGGGTCATTGCGTTCTCCACACAGTCGAATTTGTCGGCCG -ATATGAGATTCAGAATATTCATACCAAATGTCGAATTCCAATTTACCAATAGGGCGAATA -CATCCACCGATCCAGCGATGGTGACAGCGCTTGGCGCATACGTCTCCGCTATGTATACAA -AGCGGCTCCCATTGCTAGAGACTCGGTGCTAAGGATTCAGTACAATCTATTAAATTCCTT -GCATATCGAATTCCAGCTCAGAATGGGTGAAATCATACCAACCTCCGGGAAAAAGCAGAT -CATCCTCAATGCTTTCGTCATGAATACCCCGGGCCATTTGGCACCCGGGCTATGGAAGCA -CCCAAGAAACAAGACCGACCAGTACAAATCCCTCTCATTCTGGACTGATCTAGCTCAGTT -GCTCGACAAAGCTGGTTTCCATGCGATGTTCCTCGCGGACACCCTGGGGGCGTACGATGT -GTACAAGGGCCCTAATAACGTTGTGCCAGCATTGGCCTCGGGCGCGCAATTTCCAGTCAA -TGACCCATTGTACCTTGTTCCTGCCATGTCTGCCGTTACAAAGAACCTCATTTTTGGAGT -GACTGCCAGCGTGACATATGAGAAACCGTACGCGCTTGCACGCCGACTTTCTACCGTGGA -TCACCTCAGCCAGGGCCGTCTAGCATGGAATATTGTCACGTCTTATCTGGATAGTGCAGC -CCGAAACCACGGGCTGAATGAACAGATCCCTCATGATGAGCGATACGCAATAGCAGATGA -ATATCTTGAGGTGCTCTACAAGCTCTGGGAAGGCAGTTTCCGAGACGACGCCGTACTCGC -CGACCGACAGCTCGGAACATACATTGCTAGCGATGGTGTGCGAGAGATAAACCACAAAGG -AAAATACTTTGATGTTCCGGGGCCTCATTTCTGCGAACCATCGCCACAGCGTACGCCATT -TCTGTTCCAAGCCGGTGTGTCTGAGGCCGGTGGTCGATTTGGCGGAAAGCACGGAGAGGC -CATATTCATCGGCGGTCAAACCCCCGAGGGTACTCGCTTGACGGTGGATAATATACGCAA -CATTGCCAAACAACAAGGCCGCGATCCCAATCATATCAAAGTGATCGTGGGTATCAACGT -GATTGTCGCAGCAACAAATGAAGAAGCGGAGGCAAAACGGGAAGACTATCTACAATTTGC -CGATGACGAAGGTGCACTCGCTTTGTTTGGCGGATGGACCGGGATTGACCTGTCAAATCA -TGCTGATGATGAGGATTTCCGGTTCAGCGACTCTCCGCGGGTTCAGTCCATTGTTCGGAG -ATGGTCCGCTACCGTTCCAGGCACAGATAGCTTGCCTTGGACCAAACGGAGGATCGTGGA -GTACATCAGTGTGGGTGGGCTCCAGGCCAAGGTTGTTGGCAGCCCCAAAACAGTTGCAGA -TGAATTGGAAAGATGGGTGGAGATATCCGACGTGGATGGTTTTAATCTTGCCCATATTGT -CAATCCTGGAACCTTTGAAGATATCATTGAGTTTTTATTGCCTGAACTACGACGGAGAGG -GGTGTTCCGAGAGGGAATTGAGAAAGAAGGTGCCACAGCTCGTGAAGTATTTATCGGATC -ACAGCGTTTGCCAGCAGATCATCCTGGAAGCAAGTACAAGTGGCGGGCAGGGGAAACCAT -TCCTCAATATCAGACTAGGGAGAACTAGGTATAACTATTCCGTAGCCATTCCTGATTTAT -CTACATACATGTACAGATTATACGCTCAATATGTCAGTGTAGCAGGGCGCATTGTTGAGG -TTTACGGGTGAAGAAGGAAGGGTGTGAGTGAAAAAATAGATATTACTTGTGTATGTATCT -ATGTGCAGAGTTTTCACTATATATATAGAAGAGTACTCGGCCGTGTATGCGGCCTCTGCC -ATTGAAGAGGTTTCTACATTTACACTGTATGATTTGATCCTCGAGCCAAAATTGGTTGAC -TGCTTGACCATCTATATCCTCAGATAGTAAGATAGTAACATATTACTATAGTGAGATAGC -GAAATAGGGCTCTAGAGCCTCGCTTAGTTCAGATATAGTGTAGGTAAGATCAGAAAAACT -AAAATAAAATATCTAGAATTTATTCTAAAAACTTCGAAATACCCTAGGGTATCATGATAG -TAGACGCCTACCTAGTTAGCGGCATAATACAGCACTAATAATAAAGTGTGTAGCGAATCA -AATTTTAAACGACTACTATGCCTCGAAAACTAGATCAATTACTCTAAGGTTCGGTCTCTA -TATTCTTCGCAATAGAACTTAGAAGCATATCAATTCTTAGGTCGATCCTCGTACAAGAAG -TAATTCCAGTCTACTTAGTTCCCCCACCTTTTTCTCCTATAGTTAAGCCGAAGTCCTAAT -CCGATTGCGGCAGCCCCCATAGGATAGGGCTAGAAAATTAGGGTTTCAAAGGTACCTAAC -AGTCCCGATTTGCCTTATAGAAACTTGAGTAAGTCGAATGCCTCCCAATTTCTTGTATCC -ATTGCTTCCGTTACCACCGGCCGAGGCGTCGAAGGATGCCTATTGACCACTGAACCTAGG -AAAGAGCTGATTGTTGAGGATGACTGGCATCATGGCTGATAACTGCAAAATGCGAATGGT -ATGGCGTCCGTGGAACGCTCATCGGTTGCTACCTGGATTCTTGCTGCCGCTTATGTCCGC -ACGATGCAGATTGTTGAGGATTTTGATGTGGCTCATTCGAGTCTCTGATAGACATCAGAT -GATCAGTCTCGACTTGACCATGGTATCTTGGGAGTACCGACTTACTGAAGAAGTTTCCTT -GAGACGAAGTAGCCAACAGTGCCCAACAGACCGATCTTGAGAAGCACCATCCCTCGTCTG -GGTCCGTATTTTCGCAACGGAAAGTTGGCTCGAGTAAAGACTTGTCTGGCATAAAAGCGG -CTGAATCGGCGAGTAGTAAAGTCCATGATTTGTTGTAGAATGATTCCTTAAAATCTCGCA -GGTGGACGACGGATTTTTATAGGTAAAACATTCAAGCCGAGTCTGGAATTCCGAGGCCGA -TCGGGGTCGGGTTTTTTTTTGAATAGCTCGGCCGCGGACTCGGCCTCGGGTGTGCAGCGG -CGCCTTTGCGTACATCACCAACAATCAGCATCGGACGTCCGATCTTTTACTCGATATAGG -GAATTGCTAGCGACAGCAGGTATGTCCGATCTCCTTTCAGACGCAGAACTCGGCCTCGTG -CCAAAGATATATAACACTGGCCATATTCCTGAAAATCGGACTCGTTATTCCAATCGATGT -CACAAGCACAACTATCTTTTGCGCGCCCGATCAAGCACCCCAGAGCAACTATGCACGGTG -ACTGCACCGCTGAGCTCGAGAAGGGCAGTCCAACCTCTTCAAGACCATTGAGTCCAGCGG -AGCCCAAGCCTCAGGCATCTGGCCCACCAGCAGCACCTGAAGGAGGCACAAAAGCCTACC -TCTCGCTACTCGGAGGCTCCTTGGGTCTTTTTATCTCGTTTGGATGGGTTAATTGCATTG -CTCTGTTCCAAGCTGAATATGAGACCAATCAGTTGAAGACCTATAGCAGCTCAGAGGTGT -CCTGGATCACTTCATCCGAATGTGAGAAGCCTTCTCCCCCAACCGGCCGTGAAACAACAA -TCTCATTTCAATCTAGTTTTCTTCATGCTGTTCATGTCACCGTTGTCTGGACACCTATTC -GACAACTATGGACCACGATTGCCGATCTGTATCGGTGGATTGATGCATGTGTTTGGCCTC -ATGATGACCAGCTTGTCGAGCAAATACTACCAATTCTACTTGGCACAGTCGATTTGCTCA -GGAACCGGCACATCGCTCATTTTTACTCCGGCAATGACTTCGGTTTGTGTGATCACTCTT -GCAATTTATCGCGCATTTTCTGACAAGCTTCACAGCCTATGACTTACTTCAGAAAGCGCC -GCGCTTTGGCAGGAGGATTGACCGTTGCAGGCTCGAGTCTGGGTGGTGTAATTTTTCCCC -TCATGGTTAACAACCTCCTTTCGGAGGTCGGCTTTGCATGGACAGTGCGAATCTGCGCTT -TCTTGATCCTTGGGCTCTGGGCTATTGCCATGCTCACCATCTCCTCCAACTTGCCTCACA -CTCGCAAGAAGTTCAACCTTGCGAACTATCTTCGGCCTTTCACGGAATGCAACTTTTTGA -TCTTGTTGTCGTTCTCCTTTTTCCTATACTGTGAGCCGCCTATCATTATGTTTCATCGTA -ACACAGCTAACCAGGGTTTAGGGGCATTGTTTGTCCCTTTCGACTACCTTGTCCTGTCTG -CTATGCATAACAGAATGTCAATGACTATGGCCTACAATTTGATTCCCATCATGAACGGAG -CGAGCTTTTTGGGACGTACGATTCCTAATGGCCTTGCCGACAAGTACGGCCGTTTCAATG -TGCTTATCATCATGCTCCTTTTCACTGGTGTCATCACTCTGACCTTGTGGTTGCCTGGTC -GCAGTAACGCGGCCAACATTGTCTTCGCCGCTTTGTTTGGTATCGGATCTGGCACCACCA -TTGGCTTGACACCGCCCCTTGTGATGACTCTCTACCCACCTCAGGAAATCGGGTTTCGAA -TCGGGCTAGCTCTTGCTATCGCAGGTATTGGTGCCCTTACGAGCCCTCCGATTGCTGGTG -CAATTATAGGGACAGATGGAGGCTCTTTCCGTTTTGCGGCCTTGTTTAGTGGCGTGAATT -TCATGGTCGCGACAATTTTTTTGGTGTGGCTGAGGGGTAGAGTTTCTGGTTGGAATGCCC -TCACAAAAGCATGATGCCTAGGCATGGGCACTGAGCGCAGCAGCCCGCTGCAACTAAGCA -TCTTGTGAAGAAGAGACTGGCGTGGAGGCGGAGAAGTATAAATCAGCACATGCGTGATAT -CAGCACGATCTTACGGCGGTATAGTATAATTCCTTCTATATAATTACGAAAATTTTAGTC -TTATATTTACATCAGTCTTTGTAGAAAGATAGGCAATATATATAGATTGGGAGAGTCCAG -AAGTTCATTTTATGGCTATTAAGACTTACCTTTTCGTTATACGTACACAACGGAGACGGA -TAACCTGAAGGAACCTTAAAGTTATGACAGCGGTCAACTGGATTGATTTTGGCATCTGTC -CTCTCCGGTGCAGTACGAAAGCTTTCCGCCCATTTGCAAACAAAGGTGTCAACGCCGCTG -ACAGGGGTGACTGGGTGTTCAGCCGTTTCCTCATCAAGCCGTTCTTCAATCGTGATGTTT -ATACGAACACTGATCGGATCAGGCCGTATGTGGACTCATTATTGTCTCTACTGCCGCCTG -ATAGAGAGATATTCAACATCCAGCCCCTGCTCATGCGATGGGTAGGTTGCGCTTGCAAGA -AAGCAACTTGAACGTTTCATTACAATTCTCATACGAACATAGTTCCTGGATCTGACCCTT -GGCGAGCCTAAGGGTGCCCTCAATCATCCAGCCAAAGCCAAAATTACATGGGCCATGCTG -GATGTTCTTCGTGGTAGTCGCCAGCGTATGCAGGTCGTTCGCATGATCAAGTGGTTTAAC -TGGGACTGGTGGCTCAAGGCAGTATACGAGGCTCATGCCTTTGTCTACAAGCATATCACA -GCGCCTGAAAGAGATCGAAACCCGCAAGGCACGCATCACTGTTGGAGGGAGATTAGGCCC -TGAGCAGGAAGACCTTCTATGGTACACGGCATCACATGACTCCGACTACGAAGACCTGCG -GTCTCAATTGGCCCTGCTGCTGGTCCTGAATAACGTCAAAACTTAGTTTTTCATCCGTAA -TGCCCTCTGACACTTGGCTCGTCATCCGGATGCGTGGGCAAGATGTCGTGAGGAGGTACT -GACATCAGTTGAACGCAGCGAGGACGAAGCGGTTCGTCAACTCAGCTTCTCCACTCTCAG -AGACATGAAGTTTCTCAATGCTTGTCTTGATGAGACTCTGAGACAAAACCCAAACAAAGT -CACCCAAGTCCGTATCTGCAAGAACGAAACGACCCTCCCATATGGCGGAGGCAATGACGG -CCAGTACCCCATCTTCGTCCGCAAAGGCAACCTGGTCCAAGACAGTACAGCAGCGCAGTC -GGGAATACCGAGGCGAGGACGTAGACGATTTTTGTCCTGAGCGCTTCCTTAAGCTGAAGC -CCTTCTGGGAGTTCGTGCCTTTCAGCGGTGGTCCTAGAAGGTGCCCAGCACAGATGATGG -TCATGACGGAGGCTGCATACATGTTGGCTCGTTTGGTAATGCGCTACGAGCGAGTCGATT -CCAGAGATGCGAATCCATATCATGTTATTATGCGGATTGGACTAAGTAACAAGACTGGTA -TTTGGGTTGTCCTCCACAAAGCAAGCGAAAGAAATTAGTATCGATGAACGACAGTCGACA -TGAGAAGTACGTTTGCTCAAACTTGAGAACATTTGCTTGACTGTGAGATGCCAAAATGTC -AGATCCATGCCTTCCAACTCTTTATTGTCCATATACCTGCGTTTCACTCGTTTCTAAATT -CCTGCAACGGACCAGCACAACTTGGAAATCCAACGAAGGAGACGATGCATTGACGCCTAG -GAGCGAAGGCTATGTCCGACGATCGAAAGCGACGACAATGTGTCATCAATGATCGGCAAG -GAATTCGCAGGTTAGCTGTTGACACCAATAAGGGCTGTATGAAAATACCTGTACTGAGGG -GGTTGAATTGGACGGTGATTGACGAACGTTCATAGGAGTGGGCTTTATGTccgagccccg -agcccggacccgggcccgagtccgagAGCAAGATGGAACAGCACCTCACTCCAGCAGCCT -GCCGACCCCGAGCTTTTCCTCCGACGTTTCCAGTATCATTGATTTCCGTTGACCGCGCAT -TTAATACCTGTAATTTAGACTGTCGTCGAGATGGCTGAGGACCAGAGCTTTGGCAGTACC -CAGTCAGCACCTTCACTCGCTCGTCGGAGCCGCCGACCGGTGCAATCCTGCGACCCGTGC -CGCCGCCGAAAGGTACGTTGCGATCTCAAATTGCCATGTTCAGCCTGCCAAGTCTCCAGA -AACTCGGTGACGTGCACATATCAAGGAGACCAGCAGCAACCCAGTACCGCCCTGGAAGAC -CGTGGAAGCGAGGACAATGATGTTCAACAAGTTCCTCGCTCGCTCGGCATTGACCCAAAC -AACAACATCGATCACCCCGCAAAGAGGCAACGAAAAACGAATGAATCAGAACGAATTCGG -ATCGTAGAGCGACGTGTACAGCAGCTTGAGCACTCGGTTGTATCAGAAGTCGATcaccag -cagcacagcctcagcagcagcaacaAAAATCATCTCACGCTGCTGGCATTGTCTGATCGC -CTCCAGCACGTCGAGCAGCGGCTTGCTGAGCTCTCCCAAGCTCGTCCTGCGCCTTCAGCT -GTCACGGGCGATAAGTCTCTAACCATTCCGTCTGTCATCCCTCGACTTCGTCACAACCCG -AAGAAGGTAAAACTGTTTCCTTCAAGCCACTGGCTGCACACTGCCGAGAAGGTATTGTTG -AAGCTTCTTTCCAATGAACACATTGACTGATTGAACATTGGCTGACGTGTCAATAGTTCA -GAGTCAGCCATATCGGAGACAGTGACGTTGAATTGACCTTCGTAGATACCAAGGCCGAGC -TGGCCGATTCGGTGAATGAGATACGCAACTTGCGACTCACGATGAAAAATGAGCAGGCAC -CGCAGATGGATGACCCCGTTGCTCAACTTCTGGGTACAATGCCGGCTCGTTCCGTCTGTG -ACCGACTAGTGAATCACTACATGCGGACTTTCGAGTTGATCTATCGTTGTTTCCACATTC -CCACATTCTGGGAAGAATACGAACAATACTTTAGCCAACCGCAGCCTGTAATTACAAACT -TTGCAATCAAGCTGGGTTTGGTGTTGGCCATTGGAAGCGTCTTCAGCAACGACGACGACA -AAGTCCTACAGCGCTCGCGGCGTACTTGGATACAAGCTGCTCAATACTGGATGGTTGGAC -CGTCAGAGAAAGCTACTAACAATCTTGATGGAATTCAAATTTTCTGTCTCTTATTGGTTT -CGCGGCAGATAACATTGGCCGGTCGTTCACCATGGCTGTCGGAAAGTGGTCTACTTCGGG -TGGCTATGGCTATGGGCCTGCACCGAGATCCACAACTCTTTTCTCTGTCTCCCTTTCAAG -CCGGGATACGAAGGAGGCTTTGGACAACAGTCCTAGAGCTCGTAATCCATGGTTCCATGG -ACTGTTCCATCCCTTTGCTTCTTTCTTCAAACGACTTTGACGATCATATGCCTCTTAATG -TCAGCGATGCTGATTTAGATTCAGCCAGCAAACAGTCTCCATCTCCAAAACCATTGACAA -GCTATACGGAATCGTCGATACAAATTCTCATGCGACAGTCCCTGCCAATACGACTTGAAG -TTGTCAGGGCGGTCCAAAACGAGCGGTTAGAGCACTCATACGAAATGGCCATCGAGCTAG -CTACAAAGCTTCAGACAGTTTGCCGTACTCTTGCAAGCTATTGTCGGTCCCATTGGTCGC -AATTCCTAAGTATGCATCACGCTTTCCTTGATATGCAGATTCGCCGGTATATTCTGGTCC -TGCATCGGCCTTTCATGCTACAAGCGCAAAAGGATCCCCGCTTCTACTTTTCCCGGAGGG -TTTGTCAGGAGTCTGCAATGATTATCGCCACTTATGCCGAGCGACTTAGGCTACCTGCAG -ACCCTCTTGACGATCTGTCCAAATTGATGGTGATGAGCACAGGGTCATTTCGCGGACCTT -TGAGCCTGGACGTAATTTCAGTCTTGGGCTTGGAGGTTGTGTCTCAGCTTGAGGAAACGG -GTTCCGGCCCGTCATCTACTTCGCTTGGTCTCAACCACACATTGGTCCTTGATCCTCTCG -CTGAACTGGCGCGCGGCCAGCGAGCGCCCATCATTCGACTTTTGAAACACATCAGCGAAC -AATTACTGCAGATCGTAAAGTTGGGTCATCCTACGTTGAAACGCTACATCTTTCTTGCTG -GCATTCTTTCCCAGATATACGCCATGGAACGCGGCCAACCCACTTTGCCTGCTGTGCTGA -ACACTGCAAAAGACAGCCTGAAGGAGTGTTACGATGTACTACAGTCCTACAGGGCCGCCA -ACACTCCCCGAGAAGTGTCGCAAGACTTTGATAATGGGTTTCTTGACGGCCTGGATTTTG -GCGCAATGGTAAGTACAAATCACCAAATATGCGCCTCTTGTATAACACTACTGACTGAAG -TCAATATTCCAAGTACCTGGACTTTTCGGACTTTTTGGTTCCAACTATCTGATAGAGCAA -CTTCAGGACGATTTACGTCTGAAATGAAGATCGTGATTAATTGAGGTTAATTCTTCCCAT -TTTGTTCCATGTTCGATCGTCTCTGTCTCGGAAGGGTTTGCTTACAAAGGTGAGGTATGA -CCTGTCGCTTAAAGCAACACTTGTAATGCGTTGTGCACCTTGTCCAGACTATCTGAGGTG -AGCCCATACCGCCAAGGACCTGGGCGACTGTTTTCTCCGCTGTCACATCGTCTGCATTAG -AGCCAGAGTTTGACAGCATCTAACATGCCAATTATGAATGAAAGTTTGGAAGCTATGGGG -TTAGAATTATATCATTTGACAGTATGACAGGCTCTTGGCTGGGTTGGAGATATATGAAAA -CAAAAAAAGGGAAGGGTCGAGACTAAATTTGTAGATAAATCACAATGGGGTAGGAATATG -GCTATCTTCATGACTGATTTCTTATGGAATTGTATATCCCTCCTCGGTCCTAGTACTCGA -TTTCCGCAGAATGGAGCTCAGAAATCCTCTCCGGATAGAAGGCAGAATAGCACCGTAATA -TCTACCATGTGGGTGGAATATAATAATCCTCCCAAAGGTCAGACTCAGCATCTTTGAAAG -TATCAGTAGTACCTTCTACCCCATCCGCCCCAACAGACCAGACGCATTTTTCAACCAGCT -TACGACAACAACCCAACAACCGCTCCAGGCGCTGGCAGCGGTAGTAGTCACCACCAAAAG -GCTTATAGCCATGCTGAAAGAGCTCTGAAAATCGTCCCCTCTCAACAAGAGACCTATCGC -TTTTTTGTGCGCCTATACCCTTATGCCAATTATCATTAAAGGTAAACGGAAGTAACAGCC -GAAATCCCTCCTCCGCCACGTCAAAGCCGTTTCTCTCCACTACCTCAGTATATACGCCTG -CAGAAACGCGAGATAGTATAAACGTTTGGATAGACCTTGAATAAAATTCCGACTGTAAAA -ATGGAGATGTTTTTCTTGTTAGGTGGATAGGACGCTGGTCTCCTATATCAGCCGGGAAAA -GACATACTGGTGGTACGGTGTACTTAGAGCGGGGCAATATATTAAATGGCTGCGCTGCCA -TAAAACCAGCCAAGTTAGCCGGGGGTAGGAGAAGGCCCGGAGCGATATAGCGGAACTGAG -GCCGCCTCGCGCGCGTTAGAAACGAACGTGTAAAGGAAGGATTGGGCACCGAAGCGGCGT -CCAGGACACTTGATATAACTAACGGCTCTGAATTATCCGCATCAGTCCCAATCAGGGGTA -GGTTGAACGGCTGCGAGATACGCGCCTCAATGGCCTGACACAGCCGATCCCACTCGGCGA -CACACGTAGTAACTTGGGTGTTGCTATAAGGCCGCCACTCCCAGGGCCCGATCTTCTCAA -AGTCGAATAGGGCTGGCTCTTTGTGAGGCGAAGCCACAACTTTTCCAATATGAATCAGGT -CGATCCAGTTGGTGAGTAGTGTCTCTAGCGGGTTCCAAACGTGTTTACTACCCTCTACGG -GAAACCCATAGCCATAATCATCAATATGCATGAAGCTCGCAACTCGGTGAAGGCCTTGGT -GGTAGATAACACCGCCATCACTGTCCACAGCCGATAAGCCTCCTTCGACAGCGTATAAGC -GCACTAGGCTGTCAATAGGCTGATCTTGAAGATCAGCTATAGATTCCTCAAAAAGCCTTG -GTGGCCCATCGATACCCCATGCAAAAACCGAATATTGGAAGATATTCGTGAGGATTATCA -AAAGGAAAAGGGGATATGATAGCGGTGTCAAGGAATTCCGCTAGTGATGGATGAAGCCTT -GGACGACAAGCCTCGGCTTCATTTTCATCGCCGAAGCCCGTGAAAAAGTTGTTGTTGGTA -TCCAGAGTTTCCAGTGGGCGGCTTTCAGCGAGCCAGGTATATTGAATCAGGTAGTTGTGT -AGAGCTGCATATCATGTGTAGTCCATTCCTTCGAGATCATCTCGTGGAGGGTTATCTGGC -CTGATAAGTAGGCTGGTGATGTCCGGAGGTTGAGAAAATTCATTAGTTGTCAAAGGGGTA -CAGCTGGAGAGCAAGTCCCGCAAAGGCACAGCTTGAAAGCGGTAAGTGGGCGGTCAAGAC -AGGTAGTGAGTTAGACGTTGCTAAATTAGACCAATTGTTCCTCTTCTCTTGGGCTTACCT -GTAGGAGAGATCTACATGTACCTTGAGAGTGTTCCGCAGTCGTACCGTTGCCGAAGCCCT -CCAGGTACTCTCCAGGGCTTATACGGGCTCGGATCTACAGGCGTACTTGCAATTATCATC -CCAGTCACATTGTGGTCAATGTCAGTAATTTAGTATCAGTAGTCCGCGGCCTATATAAAT -TGGACGCCTCGGTTGACTCTTTCAACCCGGTCCACTGATAAGCTCGATTAATAATTATGC -CTGGGACCCCTGTAGGTGGACTTCCTAAAGGAGCCCTCGCTCCGTGGCATTACAAATCAA -TTTAATATAAACCTTGGAGTATCGACTTGATCTGCTTGAATCCCTTCAAAGACCATATTT -CAAAATGAAAGAAGCTGTTGTCGACAAGACCACTTCGGTCATCATCCGCGATGTTGACAT -CCCCACGCCTGGACATGGTCAAGTCCTCATCCGGGTAGTAGTGTCAGGTACCAACCCGAA -GGACTGGAAACTCCCCATATGGCAACCAGACAATATCATGAATCAGGGAGACGATATCGC -GGGGTATGTGGAAGCAGTGGGAGAAGGAGTCCTGAACTTCCGTAAGGGAGACAAAGTGGC -CGCGCTGCACAATCTGGGTTGCCCACATGGAAGTTATGCCGAATACGCCATTGCATGGGA -ATACACAACATTTCATCTCACCGAGGATACCGGATTTGAAGGTATGTCCCACACGTCTTG -ATTGTCACTGGGGTAAAACTAAACACATTGAGCAGAGGCTGCTACTATTCCCCTTGCTGC -GATGACTGCTGCATTGGGCCTATATCAACAGTTAAAATTGCCTTTGCCTTGGAGCCCCGT -CGATAAACCTACTCCATTGGTTGTGTACGGCGCTGCCTCAGCGGTCGGCGCATTTGCGAT -CAAGTTCGCTCAACTATCGAACATACACCCCATTATTGCTGTCGCAGGCAAAGGTGCTCC -CTTCGTTGAGACGTTGATCAGCAGAGAAAAGGGAGATACCATCGTTGATTACCGCGAAGG -ATCCGACACGATCTACGCGGGGATTAGAGCTGCTGGCAATGGCGTACCCATTCAGCATGC -CTACGATGCCGTGTCCGAGAAGGGAAGCTATGAGAATATCGGTGCTGCTCTCGATAAGCC -TGGGAAAATTACTGTTGTTTTACCCGTGAAAGATGAGAAGGCACAGGAGCAGATTAGCTT -CCATCCTACTATGGTTGGGTCTGTTCATAAGCCCCCTGCTGAAGGCCAGGCCCTTGGTGA -TAAGGAGTTCGCGGCTGCCCTTTTCCAATTCATTGGTCGAGGTCTGGCACAGGATTGGTT -CTCGGGCCACCCATATGAAGTGAGAACAGGTGGACTTGAAGGTCTCGAGGCGGCATTGAA -AGATCTGAAGGCTGGCAATGCATCTGCTGTGAAATACGTGGTTGAGATTGCCGAGACTAC -TGGAGTTCAGCAGTGTTAGTAGTTATATAGTTCGAGAAAGCGATCTCGAGTTACTCCCTG -AACGCAGTGCTGCAAAATTTATGCCAAGGCAGATCGGGATTTATGGATCGGGAAAGTATA -GATCGGGATTGAAATGACATCACCGTAATTCTCCAAGTCGCGTGGCGCGTGAACATGATA -TCTCAGTCACAATTGCCAACGCTTAACATTTCGAGGTCTTACTCCACCATGGCAAAACGT -ACCCTCGACGCATTCTTCAAATCATCAACCACCCCTGCAAAACGACTCAAACCCGAGTCT -GCCGAGTCCAACACCGTCTCACCCACAGAAACAGACAATACCCCAATTGAAGACACCAAA -CCCCCTAGTCATCACCCATCCTACCCAATTCCAATCGCCCAACTCCCCTCACATATCGAA -GTAGGCCTGGAACACGCAACACCAGCAACCTCGGCTCGTGAAATAAACAACCAACCACAC -CTCGACCTCCTCTACTTCCAACCATACATCCCTCGCCCAACTGCAAACGAGCTATTCAAA -TTCCTTCGTCGCGAGCTCCCATTCTATCGCGTCCAATACACAGCACGACGAGGCGACATT -CAAACGCAAATAAACACACCAAGATGGACAACCGTCTTCGGAGTCGATGAGACATCCACC -TTCGTCGAACAAGACGACGACCCCAACAACCTTTTTTTACTGGAAACAAAAACCAAAACA -CCGATCCCAAAAACAAAATACCAGTGCACGCCCCGTCCAATCCCAGCATGTCTCGATCTG -CTACGAAGACAGGTCGAAGCAGCGAATGCAGCCACAGCCGCCGCCGCAGGTACTGAAAAC -CCCAGCTACAATTTCTGTTTGGTAAATTACTACGCCGGCGGCGACGACAGCATCGCATTT -CACTCAGATGACGAGCGCTTCCTCGGCCCCAATCCGAACATTGCCTCCCTATCTCTTGGC -GGCGAGCGGGATTTCCTTATGAAGCATAAACCGTTTGTACCTGGACAGAATGTGAACCAG -ACTGCGGGTAGGTGCGTGCCTGGTTCTGCACATGTAATACCCAGTCCAGGTGGTACTGGT -GTGACGCCGGCGCGTGGTAAATTCACGGGTGGAACATTGAATACCGTCTCTCCTGAATCT -AGGCCTGCTACGGTGCCACTGCAGCAAATTAAGATAAGCCTTGGGTCAGGCGATATGGTT -GTTATGCGTGGGGCGACGCAGTCGAATTGGTTGCATAGTATTCCTAAGCGAAAGGGGAAG -GCTGGTGAGACAAGGGGCCGCATCAATATTACCTTTCGACGGGCAATGATTCCTGGTGGG -ACAAATAATTACTATCATTATAATGTTGGTAGTGGGGGAATGTATCGGTGGGATGAGGTA -GCGAGAGAAATGGTTCTAGGGGAAGGGAAGGTGGAGAGAGAGAGCTAGAAAAAATAACAA -TTATTTTTGCCCAGATCTAACTTATGTTACATCTCGCCAATTAGTTAAGGGCCTCGAGGC -TACAGTAGGTTTTAACCGCCAACGCACCACAACGATTTTTGCCAGCAGTAAAATTAGTTA -TATGCGTATTTTAAGCAATTCAATACTGTACTATGTAGTACTCTATAATCAATTCACCCC -TCTGACTTACTTGGTTTCACTATAAAGATATAATCCCAGAGGTGGATTACATATATGTAT -ATACCACTACAACTAAGTACATTCTTGGTTAAGACCTTTTGGTCTAATGCGTAAATGCTT -GTAGGACCGTCCAGCGTATCTCAATAGTAGAGTAATCAATAGGCTGCCAAGGGCTGAGTA -ATATCTCTTTATATAACAATATTTGATATAGAGGAGTAGGAAGTAATACTTCTTAAGAGA -TTCTTCTTATAGAATAACCTTAAGGTAGTAAAAGAAGAGAAAGAAGAGAAGCTTATAAAG -GTCGAGGTTAATCTAATTAGTGACATAGAGGAGTAGGGGGGTAAACTAAAGGATATAATA -GAGGAGGGCATTTTTTCCAAAAGTAAATATCGTTCTTCTTACTCAGTAGTATTAATCATT -AGGTTGAAGGGTTAGACCCAAAAACCAACCCAGACTAGGTTAGGTCTGAGTTATGTCTGG -GTTTTACTGGGTCGTGCGGAAGACTAATCGTACTCGATATTTGACACTATATAATCCCTC -CTATCGTTCCACGACTATAGAGTGTGGAAAACAACGCTTCCCGTCTCCTCAGCCGGACTT -GGACTGATTATTCTGCATAGTCAATAATCAATGCGCCCTCAGTATCCAAGGGCTACTCTA -GGATGTGTGCGGGTGATAGCTATGAGTACCAATCCTGGCTGCATCATATGAGGAAAGACT -TGCCCAACTGATGGTTGGACGTGGTACATCTGCTTCAGCACGCAGTATCAGTGTCAATTT -TCAATTCAAAACAGGCATGATGCATTAAGTTTTAGTTGTTATGCGTACTAGGATTGAGAA -TGCGTTCAGCTGTTTGCCTATGACTATTACTCAGTACAAAACGCAGCTATTGCAGTATCT -TATGATGAGGTTGTCTATCAGGCTTCTGGAGCATGGGGAGTTCGATATGACAAGTCCAAA -CTGCTGTTCTCGTATGGTACTGATATTCAATACCTCGCGTCCTGAAAGCTACGTTGTGGA -AATCGAGCTAACAAGGCTCAGTGAGCGGAATATCTAGACATACCACGAAGCCTCGGTAGG -GACTGAAAAGAGCTTTTCTGTCCCTCATGCTGATACACATTCATATCTCTTCTGTCTCGT -ACAATGAGAGAATCTCTGCCTGTGTCTCAAAATCGTTAAATAGCTTGGAAATGTCCGTGA -GAAGCCAGGACTCCTGAGCAATCCGCAATTCAAGAGAACACTTGCTTGGAATTATCATTA -TTTGCTGACTAAAAGATCAGAGTGTACTACTCTGAAAGTTTGTTTGGAGCAGATGGCAAA -TGTAAAGCCCCTGAGAACATGATTGTGAGGACGGGGTTGAGAATCGCGGAGCAGGAGTGC -TTTTGAGCGTTGGCGATACACAGCATCTTACAAGTTAATGTTAAGTGCTATGGTAGTCTA -CCTATCCACCGACTGGTGAAACCGACATCTCATACAAAAGTTGCAACTGACCACAGGAGT -ACAGACGTCTCAATATTTCTGATAGCACCCAACAAGCCTTAGACTACCATTAGGAATCCT -CAGAAGTATAACGTGCATAAAACTAACTATGTATCAGGTTGGCACAATATAATGGCACAT -GAAATTGGCTCGGGTGGGAAAGAAGGGGTATAAACTCTGCCAAGGAGCTGGATGCGACTA -CTTTGGACCATGACAGACCGAGACCTGACCATACCCTCCGTAGCCGCGATCTCCTCTGTG -GATTCGATCCCTCGGGAGTTTTGACACTGCACGTACGGATGATTTCTCAATGTAAAACTT -GATGAAAGGAAAGCAATCTACGCCGTGGGATGATGGTGTATGGAGAACAGATGTGCTTAA -TTTCTAGGAATTACATGAAGATAGGACACGGGAATGAACCCGCACCGGACGGAGGCAGTG -AAGAAGGTACACCCAGGCCCCATCCCCTTCCATTCAGACTCTCATATTTCCCTGACCACC -TAACCTCTACTCTGCAGATCACACTCGCCTCCACTCATATATGGGTTTTGGACCATATCA -GTGAGGGTGTCATTCTGTTCACAGCACACTTGCTTGCCATTTGCATCGTTGAGTGGATTC -CAATGGCCGTCGTACATGCAATCGTCCTTTCCATAATTAATATCGATCCAGGATTTTGGC -CAAGAGAAAGTAATTTTGCACGATGCCTTGACCATGGAGTCGGCTTTGTTGTTTCCATCG -AATCTTCCATACAAGGTTGCATTGAGTGTATTGCCATATCCTGGATCATCAACCTCTACA -GATCACGCCGTTAGCCTACTTAATCAAGGGCAAAGACAGGACAACCACTTACTAAACGTG -CTGCAGACAGGGGTGTTCCCGTTAGCAGTAATCCACCCATTAGCCACATGCTATAACGAT -GATTAGCAGCTATTACCACGTTTGAAAAGTCACAGACGGATTAGACGTACAATAGTAGTC -CCTTGAATGTTATAGTCCTCAGGGTCTTTACAAGGCCAGAATGGAGAGCCTGTAGATCCC -TTCGAGATAACCAGAGCGGTCTCCGCAAAGCCCTCGCATACAGGTATATTGAAGAAACCG -GGATAACGGACTCGTTTCCCATAGTCGCTAGCATTCCTATCCTGCTCGATCTCAGCAGCA -CTCGGTGATTTGTAGCCATTTTTGTAGCCAGCACCTTCATATCCGGTCATTGCACTTGCA -ATGATATCATCGAATGTGATACCTCCCCATTTCTCCCCAGAGAAGGTGAGTCTATTCGTA -CTGGGTAGTTCTGAAAATGGCACCGTTTGATCGGGTAGTGGTGCATATGGAATTAAAACT -GGCTTTTGCGGCGGCCTCATATTCAGAAGGAAAACCAGCTTGTACCTCCAGCACGCGCGT -GTGGCGACAGAAAGATCGTCACTCATAAGTTTCGTATTAATTTTGCTGAACTCAGGGATA -TGAGCGGACTTGGTATCACAGGAGACATCCGTCTTTCTGCAATAGGTTTTTTGATTAGTA -CCATTTTCTCAAATAATGATTTGAGTCCTCTTACAAAATTCTTGGGTAAGCGCCGCGGTC -ATCAGAGGCGAGCTTCCATGCAGTAGGTAATAGTTGGCCATAGAGTACAGCCTCCAGCTG -CGCTGCCATCTCATTTTGATTTATTGTGTTTGGCAAATAGTTCATTTTACCGTCTCCGAT -AAGGGATAGAAGGTCTTCCAGTGATGAATCGTCGCCAGAAAATATGGTCTCCAGATAGGA -GGCCTCGGTATCCTTCCAACCCTTAAATATGTCTCCAAGAGCGGAACTGACTGAGTTTTG -GGCTTGCAGTTTATCATGAACCCTACTTCAGTTAGCAAGGGCAGCCTTTCAGGGGGGTTG -GAGGTTATATATCGGCTTACTTAGGAATGTGATTCCTACTCCAGGCCGCCGTAATGGTAT -AAAGTTGGTATGTCATGTCGTTAGCATACGCCCTGCTATTGAATTCAACCGTTGGTCCTT -TCAAGTAGTTATTCAGTTCCGTTTGTGAGAATGGTGCAGTGTGGTAAGTATCGAAAGAGG -CTATGCTTCCAGATCCGGTGTGGAAACTGTCGGCTGAGGAGGCAATACTACCACGACTAC -TAGCGACACTCGAAGCTACCTGTACGACCTCCTTTTCAACTGTAATTTCGTTAATCCGGA -GGTTTTTAAATGAAAAGGGTGGTCTTTATACTTACCAATGCTAAAGAAAGCTGAAACAGT -AAAGCCCATAAATGTGGTCAACATATCAAAGATGAACAGGATCATCTTAGTATTATCACT -CTGGGGCGAGAACGTGGTAGTAAATTCGCCAATCTTTTGCTGGACGAGAATTGCTGCCGT -ATCGAGTGCATTGTACATTTCAGTGTGTAGCTATCAACAGGATTTGCTACTTAGTATTCC -CTGCTAAAAAAAAAGGTTGAACTCGGGATACCAGGAAAGGCCTACCTGATGGATACCAGA -GAAAGCATTTAAAATCAGGTATCTATTATCTGGTTAGCACAGCTGTAATAGTATGAGGCG -ACGAGACCTGATGACCGTAACCATACCCTGCTGGATGATTGACGTCCTTGCACTGGACAA -CGGAAGAGCATGGGACGCTACCGACATCCTTACAGTTCCAATTCTCTGGGCCGTGGAAGA -AGTAGCTAACAAACTGGGGGAACGCGAGGCCGTGATACATTTTGTTGTTAGCTTCGCCCG -TAGCTCCTCTCTTCCACTCGCTAAGCATCGCATCCCAGGCTCCTGGGACGTCTGCGGCAT -ACCATCGATCAACAAAACCGACAGTAGGATCGTCAATAGAATTCTGATCACATTGTGCTG -TTTTCCAGATGGACGAGCTATTCTGGTAAGGCACATCCGATGGAGTTGGCAAAGCTGCTG -CCAGCTTGACAATGCGGGTTGCTGAGGCTGCTGCAGAAGGGTGTGCTCGGGGGACATGGG -GGACTCCTTCATAAGACTGGAAAGCCAACCCACTGGCCGAAGCCAGGAGGCAGAGCAGTG -CAACAAATTGGCAGAAATACATCGGGGCGTATCCTTGGAGCCAGGAAGATGTATGACCGA -AGATGCAGTTTCGTTGGAATACGGCTCCGACAGAAGCAAAGAGCCATATTTATAGTCACT -TCATTAGATTGGGGCCCCTGAGCATCTGCATCTAGTGCATGGGATGAGAAGACGGTCATA -GTGTTTTGGTATTGCTACCAACTGCCTCTTCGAGGCATTGCCCACGGGACCGCTCTGCCT -CAGGCATATTGTGGATACCCAAGGCATCTTTGCGAAAGATGGCCGTTTTATCGCGATTAT -ACAGATTGGTACAGTCTCGTACCTAGTCCAAATTACTGACTGCTTCCTTCCCTTGAAAAA -TGTCCTAGTGTACGGGTAAATTACTGAGTTCTTGGAAATGGCAAGAACTGGGCTGAAGAC -TGGGTGGGTTTTGCGGAAGAACACCTGAATCATAGGCACTTTTCGCAAAAGAGCCTCACA -GGCACTGTGTCCCACACAGACCTCCGCAAAGCCCATAAAGCCCAGAGCCCAGCCCAGCCC -AGCCCATAAATGGGCTAGGCTGTAGGTCCAGTCCCTAGGGCCAGGGGTGGGCTTTAGGGC -CGGGGTAAAAGCCAACAGGCTTTGAATAGAGGCCAATTAGGGCCTAAACAACACAAATAT -GAAAGTTAAACCCCCCTTTCAGGATCAATCAAATTACCTCTATTTCACGTCCGTTTCTTA -CCCTAATCCTTCAGGTAATCTAATATTATATTTAATATTTTAGAATTAGTTACCAGTATT -CTCTCTATATCACCACATGGATATAAGAAAACGCGTCAATTAAAAGATTTTTAAAATGAC -AAAGGTTCATTAGAAAGTGGTAGTGCTTTAATAGCCTGTAGCCAGCTTTTCTGGCCTGCG -ATAGATAAAAATTTAAATGGCTTACAGCCGGCTCAGCGGCTACACCGCCGGACTGACCCC -GCCCACCATCCCGGAGGGAGTTAAGGGGGTTGGCTAGGATTTTAGGTGGAAttactaggt -gcgtattgctatgctagttgccgggtactggttaccgggtgctaatgctgggtgctagtt -ACGCAGGTGACGGACATGCGAACCCTTATGATAAGGGTTGCTCCGCTTCGCTCCGCAATT -AAAAGATCAATATTCACCTTTTCGGTTGTTTCTTAACCTTTACGGATTGaattttattta -gaattaaaatatatacttataatATCCCCTTTTTTTAATTTGAATCTGTACTTTATACGA -TCAGTGGAAGGTCGTCATATAACGATTACAGAGTTCGCTTTGTTAGCGCCTGTATAGGTT -TTTATAGAGGCCCACGGCCTAACCCACAGCCCACTGTGGGTTAGGTTGGGCTTGCAGGTT -AAGCCCATAGCCCACCGTGAGTTGGGCCGTGGGCCTAGGGTCCGGGTCCATTCATGGACT -TTGCGGAGGTCTAGTCCCACGAGATAGAGGCCCTGTTCGCCTATTTGTTATTGGATCAAT -GTGAATACAGGCTTCCTGCAACTACGGCACGAGTGCCGACAATGTGCCCAGCCCACCGCA -ACATCACTCCTTCCAATACGTCACAAGTCTCAACAATCACTACACCCACTTTTACACCTG -GGTCCAGCACCCTACGGCTTGTAATATTTTCTATCCCTTCACCAACAGTCACATCATCAC -TTGTCACAGACTCTGCCACGAATGCTGTTGTCACTGACATTTTCCGCAATGTGACTCTTG -GAGAAGCCTTCTCTCCCTTCCACTGCGACAGTTTCAATGTCGCTACCCAATCCCGTCCGG -ATAATTCCATTGCATCCTGCGTGGCGCGATAGAATACTTCTCTGTGCTCCCATGGGCAGC -TGGGACTGGAATGCGATACCAGAGGAGATGAAGGCGTGAGATGAGTATCCCGCTAAGCAG -CTGCAGCGGATGATGACCAAGGCGACACCGGGTGATGGTGATGGTCTATCGGCATGACGA -TAATCGTCTGTACGTAGACTGGAAGGAAAGCTTCCACAGTAATAAATACGACAAAAAGTA -AATACAATAAAATATATGTGAAAACTAGATAATCTATATAGCTTAGACTCTTCCTTTACA -CTGTTTAGTAACCCATACAGGTAGCCCACAAACCTATATAGGTTACCCAACATAACCTGA -ACCCTAACCTATTTCTGGGTTGGGCTTTTTGGCGCAGTCTAGGAGCAGGGTCTGGGACGC -TATCCCCGCATAGCAGCATGTATATAATAGCGTACTACATAGGGTAAATGAATATatcat -catcatcatcatcaccatcatcaCGGAGAAAGAGAGCTACAGGGTACCTAATAACGCAGC -GGAAGAGCTCATCCTTTATTACGGCCTGGTCCCTTACTGGAATAACTTCGGTAGCTATAA -TCATCACGCGTATCTTATGTCAGTCCCATCTTGGCTAGCTTTGACGTAGGAATGATTCAT -CGCCTACAATCCATCACACCGACTTGTATAAAGGACTATTGCGAACTATGTTCAACACTG -ACCGTTTGTCGTCGACCAGCAGCGATAATAAGTCAACTCCTCAGCAATCGGCAGAATATC -GACTGCAATAGTATCTCCAAACCGTCTGTCCCCACATTTTCAGTGTATCCAACAAAGCTC -ACCCTACACTGACATATATGGTAATGCGCCTCCGCTTCGCGCCCGATAAAATACCCAACA -CGTACGAGATATGCTTCCCCATTATCACAAAAACGCTGGGAAAATCCCGGCGGGTAGGGT -GCACGGCAATGGAATGACACGGCGCCCTTTACGCTTTTGAACGAGCAAACGCCCGCAAGT -CCCGTTATCCTTCACGTCAGGAATGGACCTAGATCCGTAGTAAGTTCGGAAAGGGAACAG -TACACATTTCGGTAGGCTTATTTGTATGGAGACAGCCTCCCCACCCTAGCCAATACCACT -GACCCTTCGAATTATGCCTATCATCTTCGTGCGCCCTGATGAGATGTTTAGGTAACCAGT -CGCCTTATCTGGATACTCGAATCCTCCTGTTCCTGATTCCTATCCTACTGTCCGGTGGCC -GAAATTGACCGATCCGACTAAGCTTCAGACAGTAGCTGTGCTTACTGCCATCGCGCCACT -TACATGTGTTAGAGCTACTACCTTTTTTCCGGCTTGGATTGTCTTCCGAACTTGAGTTTG -GAGATGTCCAGGGCTACGAGAAGCAATCACTTCCTGGAGTCGTAGCAGGCCGAACAGCAC -TCTACCACCATGAAGATGAACCGCTTTCTAGCACCATAAGAAGCCTTACCCACCCTCGTT -TTACTGACCTATCCTAAGCACTGACTCCTGATGCTACACCATAAGATGATTCAAATTACC -TTCAGCGGTCGGCTCTTACCCTAGGATGTAGGCTCTAGAGTGGTTTCAAGCTACCAGGTT -CCCTAACCGAATTTTTAAACGCTATAACCACTTATGGTATCAGAATCTGCAATCTCGTAG -GCGAAGAAATGCTAACAGTTGCTAATCATGGGCTTCCCTCGTTAAAAGAAGTATAACATC -TACATATCAACGGAGGAGATCTTATCGGCGAGGTCGTCAATACCCAATCTGAGCTTAATA -TCGCTCTCGTTAAAGTAACACCCGCTGCCTCATCTAGCTTCACTAACTCCTACTACTTTC -AAGCTGAACTACCTACTCACCTTCTGGAAGTGTCCCAAATATCCTAGATTTCCGAAGGCT -CATAGAGTGAAGTTGACGGCATAAGTTCGGGGCTCTTTAGCATGATGAATAACGGCTTTC -AAACAATACAACCGAAATGTCTAGCTGGCCATCCCGAACTCGAAATTGAAGAATGGGATA -CCATCAATTATCACCATTATAATGTCGGTGAGGGACTTTATCGGTAGGATGAGGTGAGGA -GATGCCTCTGGGGGATGAGAAGATAGAGAGAGCTGCAGGGTAGCTACATGTGTACCATGT -CTATGATTGAACAGGAATAAGATTGGCAAATAAGATATACATGGGTGCCGTAGCACGAAA -GGAAGGTCTCATGATTCTGAGTTGGATGTGCAACTCGAGGCGAGACAGATGATATCACTG -TGATAGCTGGGCACATTTGAGTCTTTGTACATTTGCTTCTGAAAGAGATAGAAGGGGGAA -AAAAGTTGACATCGAAGAGGGTCATCATGTGGCATTGGCTCTGCGTCTTCCCAGTAGCCA -TGGTATTCCTCCCTTGGGGAGTTCCGGCACTTGCCGGGGAGTAAAGAAGAGTATTCCGGA -AATACAAGAGAGCTACCAGCTCTTGTAGGAAAAGAGGAATAACTTAAAAGGATCAGTAGT -CATAAATGAGTTCCTCGCCGTAGACGTCTCCATAGCATTGGGACAACTCAGGGCAGCTGC -AAGCAAAGATGTTAGCGGAGAAATAAGATTATTGCAGGACAACTTACAAGACAGGATCCT -TGCCGAAAATGATGTGCCATGAGTACTCCATAATGTTCGCGGTAGCCTCGTCCGATAGCT -CATTGTTCATCACCCAGTCGTACATGCGCTGGTAATCGCTATGGGGGCGCTGGAGAATCC -GTTCTCGGGAAACGGCAAACTGGGAGCAGCACGGCGCCGCGATTTCGCGAGGAACGGGCG -TATCGGGGAACAGCTCTTCCCACGCAGCAGCGTAATGTGCCTCACCTGGGCTTTCGAGAT -TCGCGGGTTGGCGGAACGGTTGGATGGCGGAGGGGCATCCTGGGCTCAATATGCAACGCA -GGTTGACAAAGCCTGACTGTAACACGAAATCTCTCTGCAACGCGCGCACCGAGTCCACAT -TGCTGTTATCCATGGTGTCGATGTGCCAGCCGGAGAATACGCCGTCGCGGTGACTGTGGA -GGAAAACCACGACCTCTGGAAGGTTATCATAGTGGTCGATCAGGAATTGGAGGTAGGGGA -GAGCCTCGCGACCCTTGTTCTTCGGGGTGTGCGTCGGTGCATTGATATCGTCGACGGTGT -AAATGATGCTACGCCACCTGGAGAGAGATTGAATTAGCGCTTGTCTTCTTCTGGGGACAA -AACGGAGTTACGCTTACTCCGCGAGCTCTGCCCCGACCCAGGCAGTATCCTCCTCCTTGC -GCTTGCCCATCACAATAGCACCCTGGTCGGGGACCGTGATATCTGTAGTCTGGTAAAGCA -AAGCAGGTTTATTGTCCGATGCAATGCGCCATTTTCGTTGAGATCCCGTATCTTGTTTGA -GCTTTGGTCTGAGAGATGTGCGGAAGGAGCCGGTATCGAAGCGCCATATATCCTGGATTG -ATTCCCCCTTGGTCTTCAGACTGCGATTGACCAGTACGAGAGTCAGGATGATGAGTGCCG -CAGCGGCACCGACACGAAGTGACAACCTCATCGCATAACGCGATTGTCGGGATTGAGGGA -AAGAGTATTAAGGGGGGGGGTAACAGAGAGAGAATGAAAAGAGGCCAGTGTCTCAGGACA -CGCAATCTTAAGAAAGTGATAAGCAGCGCCCCATGGGCCGGATGAAACTCACTGTGAACG -CGAGATGACGAAAGTGCCGAGTCAAGGGAGAGAAGAAAGGCCTAAGAGAGCCCTCAGAGG -TCGGATTTCCTTAATAAGAGGGAAAAAAAAGAAAAGGAATTGACCGGCACTTTCCTCATT -TGGGGTGAACTTGTCTATTTTCTATTTCTCCAATTTCGACCACTTCAAGACTTTTTTCGG -GGCATATGGGTGATTTGGGGCATTTTGATTGGTCCAACTTAAAGTTCTCGGCCTATCGGC -CAATATTAGTTATATTCAGGGGGGAAAATGGAATTTGATTTTTGTGTCTGTTGTAGACCC -TGATGATCCACTCCTGAAACAATGATATATATGATACACATCCTATGCAAGGAGATATCT -GTTTGACTGCAGATCAATTGTTTCCTATATTGTCCAAAAAAAAAAGGGTATGACCAACGC -CATCCAGTCCGAACCCTAAGACAACCCCAAAAATATCATGAACCCACAACCCCAAGACAT -AGGCCAAGATCCAAGACATCAGCCGCGTACCGCCAAGATATTCACATCAAAATCCTCTGA -ATCAGGCGCTATTCGACCAACCACCCGCCGTATACTAGGCAAAACCGCTAACGTTTCCCT -CTCAACCGCCCAATGCTGTTCCGCCAGATGTGCCAGAAAGGTATCTAGATTCCCATAAAC -CTGCATCGATCCATGTGCCATCGCACAGCAAAAGACACAACAAAACGCGCCGCGGGGAGT -ATTAGGAGCAAAATGCGACGCTGCGTCCCGCTTACAGTGCACATGGGACTTGGCCAAGAA -CCCGAATCGATACCGGACACCGCTAGCTTTGTGCGTACGGACACTTCCCTCAAACTGCGG -GTTGCGGCTGGAACTCTTAGGGGCCATGGGTGCTTCGAAAGCGCAATGCGCGCACCGAAG -CCATGACTGTTGCGTGTAAAAGCCTGTTCCGGGTTCGGTATAGATTCGGAATCCCCGGAA -TCCGGTATGCACTTTCCAAGCACCTTTGCAGAAGCCCAGGTAGTTGTTCGATTTGCTAGG -TGGCCACAGGGGTCGGTTTGCATCGGTGCTGCTGAGGGTGGAAGGGTTTGACGGGGTGGA -GTCCGTGGAGGCGATTGAGGCTGCGGAGAGAGAACTGTGGCGGGTTTGCGTCGATGGGGA -TATGGGCGAGAGGTGGGCATTTTCTTCTTTAATCCGAGCAGGGCTGAAGGTGTCATCGCC -GGAGTCGGAGTCAGAGTCCTTCCACGGGTTCCCTGAAACGACGTCGTGGGGTATCGGGCT -CAATTGCAACTGAGGTCTGTGTGGCGTGGCTGGTCTGACATCGAGTTCCGGAATCCGGTC -TGTTGTCGACGGCGAGACTGCGTCGAATCCTCTTGGTGGGATGTACCCGGGCTCGGGCGC -TGATGATGCGAATGAATGTGTACTCGAGCGACCTGGACGGGAATGAAATAATTCGCCCAT -ACTAGTCAAAGCTGACCCAAAGACCGATGATCGTCTGTTCGTTCTTGGACTGGTGGCCGG -ATACTCCCTCGGTAAACCGACTGTGTCCTGTGTTAGAATCCGGCCTGTTGTGTAACTGCC -TGGTGATACGGGATATCCGCCAGAAAAGTATTGCATCTCCGACGAGTATGATGGCAAGTA -TCCGTCCGGACTATTGGAGGAAGATCCACCGCCCTGCGGGTTTCTCCTGTGATAGTTTGG -ATCTGGATTAATGAACATCGAAAAAGATGGCGGGATATGCGATGGCCTCGAAAGTCGCAG -ATACAGATCTCCTAAAATGGCGAGCGTGCGGACGCGCCCCTGGTCGGACTCTATCTGCAA -CGCGTTCAGATCCAGAGTCGTTCCCTGCTCCTGTGCCTCACGGAGGTGTCTCAGCAACGC -CCCCTGCAGTGTGATGATGATATCTTTCATCTGTTCACGAGCGATCGCGTCACCGTCCCA -GTATCGCCGTCCCAGCTCTTGGTGATGCTGATCAAAGCGAGACTGAATCTCGGTACGGCC -GAGAGTGAGCGACCGTTCTAGTGCCCGGGTGGACTGGCGCTCGTGGTATTTCTTCTTAAT -GGCCGTATACATACGCGATCCGTCATTGTAGGCGGAGACGACTTTTTACCAATTTGGAAC -GTTAGTCTCATCTATAGTAGGATAGGAGATGGGCTTACTGGCTGCCACACAGGCTACGAC -TGCTATCACCTCCACCCCCGACATGGTGAGTCAGATGGCACTCGGGATATATGGGAGGGG -TTGAATACGAGCGGTGTATCTTGGGATCCAAGGGTATAGTAACAAAACCGAGAATTCTGA -AAAAGCTTGGAGAAGATAAGAGAGGGAAGAAGAAATGCCCAGCAAAGGGACACTGACAAA -AAAAAGAAAAGAAAAGTGTTGGCACAAGGCCGTGCGGGTATACATTGCAATCGACAGGTA -CTTCTGATAGCAACAGCACAGTCCACCTGGGCACTTCTCTTTGATTGGCCCTCGGCCAAT -GGGACGAAATGGAATTCTCCGTTATAAGCGAAATGAGGGGGTCTGAAGGAGGCTGAAGCC -CAATACCTGAGAATTTCATATCTCCGAAATAGCGAGGATGGTCCAATCATTATCTTGTTG -CCTATGGCATGGACTGATCTACACGTTTATAATATCATCCATGTGTAAATCAGTGGGCAT -CTACGATCGGATCCACCGGCTCCACATCCGATTCTTTACTATGTCGCCAGAAAGACTACG -AGTTTATTGATCCTGTTGGAAATCCTACCAAGGCTTATTGAACGAAAAGTACCTCATGCA -TATATATTGAAGCCAAAAACACAAGAAGCCAGGACCGCCTAATTCAAGCCCTAATTACAC -TCTTCTAACTCGTACATCCATAATCAAACAGTACAAAACGCTAGCTGAGATGCAAGAAAA -AGAATCCAATCCAATCCCCCTAGCCAATCAACAGCAGCGCCCCTTATTGCGCGAGGCACG -TGTGACCCTCTCCTGCTCAGCACCTTCTACATCAACAGCTGCTGCTAACCCCAGCCAGCT -CCTCCTCTTATCACCAGGACCGAGCCGAAAACTCCCGGTACCCTCCACGATCCCAGGGCT -AAAAGGCCCAACCACCCCATTCGCTATGGATGCACTACCAATCGACAAAGCCAGCTCAGT -ATCCCGTCGATTGCGCTGCTCGACCAGCTTACGCGTGATAACGCGGAAAACCTCATCGAT -TCCTTCGCCATCCCGCGCGCTGATCTCATGACAGCAATCCCAACCAATCTCCTGTCCCCA -GAATGCCGAACTACGCTTCGAGTCCGGACTCTGTAGGGCATTTGACGAGAGTGCTGCGGC -GCTGGCGCTGGCGTTACTGCCACCACTAGTACCTCCAAACACAGACGTCCCAAAGCCTAG -TCCGCCCATGCCTGCTGTTGCGGTCGGTGGTGGGGTGGAGGCTTGGGTTGGGTAGAGTTG -TTCGGCGACGTAGGCGATTGTGCGCTCGAAAGGAACGCGGCGGCGGGAAGGGTCATCGGC -GACAATGTCGGACTTTGTACCTACGACGTGGATTATCAAGGAGTCTGTACCGTCGTCGCT -GGGTGTGATGTTTTTGCGGAGTTCGCGCAGCCAGCCCGCCATTTCTTGGAAGCTGTTTTG -ATCGGTAATCTGGGACAGTTAGCATCAGTATGATTGTGGGTTTTATATACGTTGCGCATG -TAATAGGCAAAATGTTAAAGCAGACTGTAAGGGCAAAGAGGGGAAAGTGTGGATACTGAC -ATCATAGCACAGGAGTACTGCTTGAGCTCCCCTATAATAAAGTCTCGACATACTGCGAAA -CCTCTCCTGTCCGGCTGTGTCCCATATCTGTAATCTGACTATGGTATCGGAGGTAGAATC -GAGGACGCGCTTGGTCACGAACGACGCGCCGATAGTTGAAGCCGCTGCGGGGTTGAAAGT -GTTTGTACAGTATCTCTCCACGAGTGCAGTCTTTCCCACCCCTGATTAAAATCAGTAAAG -TCTCTTGAGTTCCAAATTATATTAGGGCTCACATACCCTGTGCTCCAAGGACAACAATCT -TAGCTTCGAGGCTTGAACTCATGTTTCCCCGTTGAGGAAGCGGGGATCAAGTTTTTGTAT -TCGCATTATATGTCGACAGTCAGAATCATTTGACAATCAACGCGATAGAAACAAAGAAAT -CGGATCAAAGGGATATTTGAAGGAGAAGTGAAAGAAAAGATGACCCGGGCAAGTTGAGGG -CGGTATCAGCCGATGGCTTCTTGTTCACGGGGTCAACAGCACGTGTCTATGATGGTAAAA -GAAGTCAGAAAAAGAAAAAGGGGATTTCTATAAAACAAGACTAGTTATATCTTGCAGCGG -ATATGAGATTCAAGGATGTATTCCAGACAAGGGGATTTGGATAACTGCCAGCTAATTAAT -AATTTAGTTGATTGGCTTATACGGGTTGCTATGCTATTATATATAGTTTTAAATGCGAGT -ATGAATGGTCCCTTGCTCGGGGAATCTATAAGCCTCATGCAAGGCAGCACTCGCATGAGT -CACTATTCAAACCCGGGAGAGGCAGGTTTAGCGCACTTGCAGGGACATAAGATTTACATG -TCTCGAAATTTACATTTGAAATGGGAAAAGAATAGATAGAATATAGGTTCGTTCTCTTGA -CTGGTATCATACAAAACGCATATACATATAGCTGATAATTCCCATGTCCGACCGAACTAT -TTCCAGAGGTCATCAAAAAAACATACTGCACGTTAAATTCCAGAGTAGGGCGCAAAGAGG -ATCGTTCACCGGCACCAAGGCCAGGGTGTCATGCTCCAGGCCAGAAAAGGCCAGAAAAAA -TGGGTATATAATGTCaaaacagaaaattgaaaaatgatgagaaaattgaaGAAAAAAATA -AAAAGACAATCTGGCGTTCACTCGTCCATTAGACAGAGAGAACAGCACAGCCATAACATG -TTGGATAGCAGCGCACAGCGCTGTCACGCCTGTGACTTGAAACCCCGCTTAGCGAAGCTC -ATTGGGCCAATGGCGCTCAAGAGAGAGACTTGGAGTTCTCGGTCTCCTCGTGGAAATTCT -TGTCCTCGCTGGAGTTGCTGCGAGGGGAGCCTTTCTCGCCGGCGTTCTCCATATCGCCCT -GACGGCGGCTAGACTCGAAGCCATACTCAGCAGCCTTGGCCTCAATCTCCTCGTCGGTGA -GCTTGGGCAGCTCGCGGGCAGCCTTGACATAGCTCATGTTCTCGCAGTAACCCTTGGCGA -AGATGAGATCGATCTCCTCCAGACTGCGACCAGCGGTCTCGGGGTAGAACAGGATCATGG -GGGGGATGAAGATAAAGTTCATCACGGCGAAGAACAGGTAGGTACCCCATGAGATACTGT -CAATCATAACCGGGGTGATCATAACGACGGTGAAGTTGAACAGCCAGTTGGAGCACGTCG -AGACGGCATTGGCCTTGGCACGAGTACGGAGAGGCGAGATCTCGGCGGGGTACAACCAGG -GGAGGGGCAGAATGGAGGCACCGAAGAAAGACATGTAGAGGAATAGACCAAAGACGGCAC -CCTTGGAAGGTCCAGTCTGGTCAGGGATCAAACAACCGAAGACGATGATCATAGAAACCA -TCTGACCAGTCATACCACCGATGAAGAGCTTGCGGCGACCAATCTTCTCAATGAAGAACC -AGGAGAAGGTGGCGAAGATGGCGTAGACAATCATGTTGATACCACCGATGAGCATGGACA -TAAAGTTATCTTGACCGAGAGACTTCTTCAGGAGGACAGGGAGGTAGTAAATGACAGCGT -TGCAACCAGACAGTTGCTGGGCGATTTGAGAGGAAGAACCGATAAGCATGCGGCGCAGGT -GCTGGGTCTTGCCACCAGTGAGGAGATCAGAGTAACCAGCACCCTCAGCAACACCGGCAG -CCTCGATGGACTCGATAACAAGCTGCTTCTGAAGCTGGGTCTCGTGTGATTCGATTTCGT -AGCCACCGAGAGCAGCGAGCACATACTCACCCTCGTGCACCTTACCCTTGGAGATGAGGT -AGCGAGGCGAATCGGGCAAGTAGTACATGCCAATGACAATGATAAGACCAAAGACGATCT -GGAAACCAATGGGGAAACGCCAGGTGAGGTCGTCAGGGCCGTAAGAGGCACCGAAATCAA -TCCAGTAGGCGATCATTGTACCAATGGCGATAATACCACCTTCGATACAGATGAGAAGGC -CACGGTTGGAGGAACGCGAGCACTCGGCTTGGTATGTAGGAATAGTCGAAGTGTTCATTC -CGTTACCGATACCAGTGAGGATACGACCGACAAAGAACTGTGCAGTAGCTCCGTGGCCCT -TGATAGCAGAGATTTGAATAACAACACCGATCATCATAACCCAAGCACCGTGAATGATCA -TGAGACGACGACCAAGCTTGTCACCAGCGAACAAGGCGTACATAGCACCGACCAGACAGC -CCAGCTCGTAGACGGCAGTCACCAAGGCCTGCATGGTGGAGTCGCCATGGGTGATGGGCA -TAAACCTTCCGAAGGCGTCGGAATCAATAATACCGGACATGACACCCTCTGCAGATGGTT -AGCGTCAGGTTTGCGTAGTATGTAGATCGATGTTATACTCACGATCATATCCAAAAAGTA -GGAAACCCATAGTAGCGATGGTTGATACCGTCAACCCTAAGGGTTTACCGGACATGCCAG -CAAATGTAGGCGCCATGATGAAGAGTGGAAGACAATCTCGAATCTAGTCAATCTATACAA -GTCCAAGGATATATAAAAGGACTTGGAACACGTGAGTAGAGAATGACTGGGGTTATAGAT -ACACGCGGAGAAAAAAGCACCCCAGGGGGAGGGAATAGGGAGATATTTGAATGTCTTGCT -TCTCCGGAGATTGACGAAAGTAGTAATGAGATTCATATAATATTTTTTTATTCCCCTCGG -GCTATGGAGAAGACACGGTGGGGGAAATAAAATCAAAATGTAAATAACAAAAAGTGCTAA -TTAATGGTTGAACGAATCATGTCATTATCAAGGGGAGCCCCATCAGCCGAATGCATCAAT -AATGGCATGTTAAGCATTTTCGCGAGATTTCTAGGTGGTTTTCGTCCCTGATGATGTCAC -TGAAATTTCTTTATCATTCTTTTTTTTGTTAAGACTCATGTTTTTTTCATTGGTCTTCAA -ATTATGGTGGAGATGCTCTATATGGATCCATGTTATAACTGACCAAACATAACACTTCCC -AATGAATTCTGGATCAAGTTGAAGTGTACCTGGTGCGTGTCTTCTCGGGTGAGAAAATAA -AGGCAAGGAGAAAAGAAGACCAATAACAGAAGGCTTGCCGATTATTAAGTCAGTGCATCC -CGATCAGCGGGATTGCGCACGCACGCACACGGGAATATGTCAAATACGACCTGTTAGGAT -CCCTGAGTACTCCATATAACATAGGACGTTGTACCCGATGTCCAACGGATGCCGAGCCCA -AGTGCCAAGCCACTCCCGTGCTTCTCGGTGCTGGTTGCAAAACGGGGCTTCGGGAAGCCG -TCTCCATCCCGAAGTCTTTCGGCGACAACAATTCCGTGAAAGGACTAGCGCCATACTAAA -GGGATCGGAGAAGGATCGGCATTGTGGCTTACTCGATATAGAATGTGCGTTTCTCCTTGC -TTAAGATCTTTTCATTCTTCATTTCATGGGAATTCCTATCTTAGCCTATATGTAATACAA -TATAAGGCTGCATATGTACAATATTGTCGGATCTGAAAAGAGTATGTGGGATAATCTATG -TTTTGATCTACTGCATGTAGAATTAAGCAAAGCCAAGAGCCAAGCCGCTTAGGGCCCGTG -AGGCTTTTTTTTTTTTTGCTGGACCGTTCTTTTCAGCCTGAAGTAAAAAGATGACACATG -AATTATATTAAAACTTTTAAACCCGATTTAATTCTTTTTTCCATTCAAATGCTGACCCAC -GTCATGTCTCGAAGAGATGGACATTCGGAAAAGTGTCGCTTAATATCCGCAATGATGTCG -TTGAACCACACCGGCGTAGTCAAGGTTTTTCTCGAACTGCATCGAAGTGGACACGGCCGA -GAGAGGCACAAATTTACACACGGTTAAAATTTCCTTTCCATATGCCTAGGCCCAAAGGCA -CTGGATTTGAGGAGCCCATCGCTATTGAAGTGTAGTAGAAACTACTACGTACCAATTCTT -TACTTCTCTTCGTGGCAGGGTCCGCATGTCCATGTCCACAATTCAGGAGACTATGGGTCT -CCTTTGGCTAATAATGTATCTCGCTGAACCGCCTTCCTGATCCCGTAAGATTATCTTTAT -ATCCGCGCCAACAGATCCCGATGAGGTGTGAATTGATGGAAAAATTAAATATCAGACGAT -GTCACATTGCATATTCAGTTGAGAACATTGTAGACTGCTGCCTGGGGGACTTTTGGATTT -CAACTCAAAGCCTTTGTTGTCCGCGCAGCCGACCTGTGGATGAAATCCTGTAGTTAGGAT -TGGTTCTTATTATTGTCATCTTTTTTTTGATAGGTAACCCGGGCAATACCCATGCACCTA -CAACAATTACATAGTCCTAAAATACCATTTTCTGAGCAGTGGTGGCGATCACTTTGCGCC -CCGGTTCGATCTCCACTTCCATGGACAGATTCAAGGATTATGCGCCACATACACATGGTA -CAACTCGTGGACAGTGCATTTGGTGAACCCCTTCCCCGTACAACGTCAGCGGCAACCACG -AGGCAACAAGGGGCCACCACAGTGGCGGAAGATGGAGAAATGGCGGAGGTGTGGGAGGTG -GAGAATATTCGGAGGCGCCGGCAACGCCAGGGAGACGCATACTTTGTAGGGCGAATAAGT -GAACGTGCCCGATGATAGCATATGCCTCGATGCCGATCACGAAGTGGCTTGTGCGCCTCA -TACGCACTGCCAACATAAGCTTTTTTTTGGCGGTCTCTTTGGACTTTGAGCTAAGAACCA -GGTTTCGAAGAAGCTTGTAGACGATCGACGGTGGCGCTGTCAGCCAAGCCCAAGACAGGA -TCCTTCTTGGGGATCTACAGTATGTACTCTATAATATGTATTGACCTTTAATGATGCCGG -TGATAATTTTCAACTGGCAGTTCGCACTATCGAATAGCGTGGGAGCTTTCTATATTGCCC -AAAGCCTTGAAGGCTGTACCCTAGTGTATACATAGAGAATTTACTTTCACATTTAAGCCT -TTTGCCGCTTTCTTTCTGCTGTCTACATACCCAGAAAAAAATGGGGGAGAAAATATCCCC -TAAGAGACAACACAGGGCTGCATAGGCGAGCGTCTACTCTTGTTTCAACACAATGGTATA -TGTTGTGGTGCACCTTGTACTGCCAAGACTTGTATGACAGTGATCATGTGACCTCATCAC -ATGCACATGCCGATGCCGATGCCTCGTGCAATTCGCTATTCCTTCCTTAAAAGATTATCC -CCGACATTCCAACTGGGGAATGTAGGGCTGTCAACAGCGATGTGTCCTATTGGCTGTGTT -TACTTGTAGGCCTTCTAGACGTTTTCTGAATGCAGTACATTGTATGTCGCACAAAAATAG -CACAGAGAAACGGTCCACGGCTGTGTAGGAAATATCCGGGCATGTATACCTAATGCGTCC -GTTACTACTGGCTGGTAATATGGACGTAGCCTAGACCGAAATTCTTATGACGATCCCAAT -CCAATCTCTTAGATCGATAACTTGGACTTTATACATATTAGTTATACACCAAATTAATAT -CCGGACTCAAGGAGCAATCGACGTCTCATCCGACGAGCATTCGAGGTCTCATCCGACGAG -TCGGCCGAGAACCTCCCCGCAAGTTTCTCCCCCGGACGGACTCTTTTCCCCCGAGGTCCA -CTAGATCATTTTTCCTTTAACTACTTTCTCCCTTCCCCTTTTGCATACTACAATATACAC -TCTCATATCCTTTTCAAAATGAGCGAAGGCAAACTAACCGGCGCGAAGGTCGCCGAGCAC -AATTCCAAAGACTCTTGCTGGGTCATTGTGCACGGCAAAGCCTACGACGTCACCGAGTTC -CTTCCGGGTATGTTCCTCTTTACTACTGCCGTTCTCCTCGAATGCGCACTAACATTTCCC -AGAACACCCTGGTGGTCAAAAGATTATCCTCAAGTATGCCGGTAAAGATGCGACCGAAGA -GTTCGAACCAATCCACCCACCCGACACCCTAGACAAGTTCCTGGACCAGTCGAAACACTT -GGGACCAGTAGATATGTCGACCGTCGAGCAGGAAGAAAAGGCATTCGACCCGGAGGAGGC -CAGCCGCCAAGAGCGCATTGCCCTCATGCCCTCCCTTGAAGCATGCTACAACCTGATGGA -CTTTGAGGCGGTGGCCCGCCAAGTCATGAAGAAGACTGCGTGGGCTTACTACTCTAGCGG -AGCGGATGATGAAATTGTAAGTTATCATGATATTGACATCGATACCCTATGCATCAAATC -ACCCAATACTAACCCATGGCCAGACAATGCGTGAGAACCACGCAGCCTTCCACAAGATCT -GGTTCCGGCCCCGAATTCTAGTTGATGTCGAGCACATCGATATGAGCACAACAATGCTGG -GCACAAAATGTTCAATTCCATTCTACGTGACCGCTACAGCTCTCGGCAAGCTAGGCCATC -CAGAAGGCGAAGTGGTGCTCACAAGAGCGGCCCACCAACACAACGTGGTCCAGATGATCC -CCACATTAGCTTCCTGCTCTTTCGACGAGATCGTCGACGCCAAGCAAGGCGACCAAGTGC -AATGGCTTCAGCTGTATGTCAACAAAGACCGCGAAATCACACGCAAGATCGTCCAGCACG -CCGAAAAGCGCGGCTGTAAGGGCCTCTTCATCACTGTCGACGCCCCCCAACTCGGTCGCC -GCGAAAAGGACATGCGGTCCAAGTTCTCTGATCCTGGCTCAAATGTCCAATCTGGCGGTG -ATAACATTGACCGCTCGCAGGGCGCCGCCCGCGCTATCTCCTCCTTCATCGACCCTGCCC -TCTCATGGAAGGACATCCCCTGGTTCAAGTCCATCACGAACATGCCAATTGTACTGAAGG -GCGTGCAGTGCGTTGAGGATGTTTTACGCGCTGTCGAGGCAGGCTGTGACGGCGTCGTGC -TTTCCAACCATGGTGGACGACAGCTCGAGACCGCCCGGTCAGGTATCGAGGTGTTGGCTG -AGGTGATGCCTGCTCTCCGTGAGCGCGGCTGGGAAAAGCGCATTGAGGTCTTTGTTGATG -GCGGTGTGCGTCGCGCAACTGACATCATCAAGGCGCTTTGCCTGGGCGCGACGGGCATCG -GTATTGGCCGGCCCTTCTTGTATGCGATGTCTGCTTATGGACTCAATGGTGTGGACCGGG -CCATGCAGCTGCTCAAGGATGAGATGGAGATGAATATGCGGTTGATTGGAGCTACCACTG -TTGCGGAACTCAATCCTAGTCTCATTGATACCCGTGGGTTGTTGGGTGGGCACTCTGGAG -TTGTGCCGTCTGATACCCTTGGGCTGGGTGCCTATGATCCATTGGAGGCCCCAAGATTCA -ATGAGAAGCCGAAGCTGTAGTTTTGTTTGCTGCATTTGTTTGTCATTTTTGTTCATGTTG -ATGCTTGCTGGGCTTGTCCGCGTCGGTCTGCAGGATGGTATCTTCTGGACTTGTAACTAT -TGAGTGCATATATATCTCTATACCCATGCAATGTGTACGAATCATTTATTTCGAAGAAAA -TTGATTTTCTTTCGCTTTGTAGGATCCTGGTGCTCTCCGGAAGCTACAGCAGTTGAAGCC -AAGCACAGACAGATATATATTTTGGCAATCAGGTAGACATCGACTTGACATTGAACAAGA -TTCGATTGTTAATAAACCCGGAAATAATTAAGCAATCAGAAGTCTAACGAGGTTTCCTTG -CCTCAAATCTCAGCTGATCTCATTTTCAATCTCGGCGTTATGTTAGTATTGCTACCAAAA -TTCAGCCGTCAACATATGCATTATACAGATTATCACCCTTATCCTTTTAATCGAAGATAC -AAGAATTGACCTTGAGAAGCAAGTTTGAATTTCCTCAGGCCGAAACCAAACGGAAGATGT -AAATACCTAAACAAAGACGCCATTTTTGACATAGTTAAATCGCTCAGCTTCTATTTACAG -TTGCAGTTCACCAAGCGCAAACCACCAAAACATGTATGGTATTATGAACATGAAGCAAAA -AGCGCACGCTATGCGTTTTTGTGGGAGATCAGAGGTTAAAAACCCTCGGGGCATTACCCA -GTCGGCCACTATCGCGGTACATGCAAGCAGCGTTGACCATCCAACCACCATTTAGGTTCT -CCCCACCCCAGATATGAGCCCAAACCACACCTAGAATCCCGTATCCCACGACCCGCGCGG -CATCCACCCAGGCAGACAGATCGAACAGATCACGCAGGAAGCCGGCTAGCGAAGCAAGGC -GGCTCAAGCGCGCACGCGAGCGGCGCGAGGCACACAATCTTTCCCAGGCGCGTTGGGCCT -CATCCCAGAAGATGGGCTCCTGTCCTGCGGATTCATGGACGTGGCCGAAGACGTGGAGGG -CAGGACGGACGCGCCATGATTCTGCTAGCAGATTGGGACATCCTGTAGAATATATGGGGG -ACAGATCGAGATGGGCTTGGGGAGGGGTGTGGGTGACGAGGATGTCTGTTTCTGGGGGTA -CGGTGCCAGACCAGGCGTCGTGCTGTGGGGGGTATGTGAAGGCGTGCTCGGGACCGAAGG -GGACGATGGCTGGGACCTGGGGGGCGCCGTAGATTGTGAGGGAGCGTGAACGTGAGCTGG -TGAGGGGTGTTGTGGACGAGGGTGTTGAGGTGTCGGCGAACGAAAGCGTCACGGCTGAGT -GTTGGAGGTAGTGGATGTCGCCCCAGTCGATGCGAGAGGCGCCGTTCAACTCGTGGAGTG -AGTGGATGGATGCCGTTGAGGAGGAGACTGCAGCGAAGGAATCCCCCGTAGAAGCTGGGT -CTATGCGGTCCTCGTCCAGGCGTGACCGCACGTCGAAGTAGCTGTCATGATTCCCTCCGA -TGGCGATTTTGTGTGGGTGGGGAAGACTTCGGAGCCAGTTTACAGCGGCTTGGATATCTC -GCGCAGAGCCATCATTGCAGAGGTCGCCGGCGTGAATGAGAAGGTCTCCATCAGGTATAT -CTGGCCATTCTAGGGTGTGGGTGTCGGAGATGCAGACTACTCGGATTGGTTGAGTGCCTG -GTGGAGGGAGTCGAGGAGGTCCGCGCAGACGGAGTAGGATCTGGTGCACTGGATAGAGTA -GGGCAACCAGGGGAGAGGCGATAAAGTAGTCTGGAATCGGGCGGCGGTTGAAGGGGTTCG -CCATGGTGATAGAGACGCCAAGGGCTAGAGTAGAGGAGTTCAAGAGGTGAATCCACCGAG -GTAAAGTCAGTTACCCCAAGCGGGGATTGTGACATCAGGACTATAGTCGATCCTTTTCTT -TTGCTCTTTTTTCTATTTTCAATTGATATTGGGTTAGAATAATACTATGATAAGACAAAA -GAATATGTAGCATCAAATATTCCTCGCCGAAACAGTCACAGCTAAAACAGCTCCAAAACA -TCACAATATATCTTGCACGATATATCTGGACTTGGCTCATCGGCCTGTAAGGAGTACTTC -CCTACTTTATATGTTTACGCCAGTTAAAATATCGACCTCTTTGAAACCTCTTTCTATATT -CTTATAGGCTATTTGTTGTCTGTCTCAATGCGTCCATATTTGCCACCACGACGTTGTGGC -TGCTTGCGGAAGACCATCCCATCTCACGTCTCTTTTCTCCGAGTTGAACGATCTCTTCTT -TTATAACTGAAGTTTGTGATCGCATCGTGGTTCATCCTATATTCGCTTTGGCCTTTGCCT -TACCTATTTTGTTTTCTATTTTTTTTCCCATCAACATCATTTTTCCCTCCCCAAATTATT -CTCATTTATTTGATTCCCTATAATATTCCCTGATTACTGTGTCTTTTACTGTGTGCACGT -AGCCCCCTCCAACATCCACCGCCTCTAAGTTCCGCTTGAGCCAACCAAAAGCCAAGTGTG -CCCCGCCTCTCAGCTTGACCTCCCAAATCTCCACACCATGGAGGGAGTTGATCTCACCAA -GGCGATACTCAACAAGGGCAAACAGATGGCAAACGTCGCTGCTTCAGCTGCCAATGGCAA -CGGCGGCAAGAAGAGGAGAAAGGGTACCGACTTGAAGCCCATCGTCACCAATGAAGCGAA -CCCTGCGCCCGGCACAGAATCGTATGTTTTGAAAGGGTTCCTAATGAACAAACTTACTAA -TTTAGTTCCCATAGAACCGGCGCTGCTCAGAACCCCAAAGCACCTGGATCGCGCTCATCA -TCATCATCGTCGGGCGAAGAACTCGAGGCGACTGCGGAAGAGGAGGACTCCGAAGATTAT -TGCAAAGGTGGATACCACCCCGTCGCTGTCGGTGAAGCATACAACAATGGTCGCTATATT -GTCGTGCGCAAGCTTGGATGGGGCCATTTCTCCACAGTCTGGCTCTCGCGAGATACAACT -ACCAACAAGCACGTCGCGCTGAAGGTGGTTCGGTCCGCCGCACACTACACTGAAACCGCG -ATCGACGAAATCAAGCTCCTCAACAAAATAGTTCAAGCGAAACCCTCTCACCCTGGTCGG -AAGCATGTGGTCAGTCTGCTGGACTCGTTTGAGCATAAGGGACCGAACGGTGTCCATGTG -TGTATGGTCTTTGAGGTACTGGGTGAGAACCTGTTGGGTCTGATCAAGCGGTGGAACCAC -CGCGGTATTCCCATGCCTCTGGTCAAGCAGATCGCGAAACAAGTGCTTCTGGGCTTAGAT -TATCTCCACCGTGAGTGCGGGATCATTCACACGGATCTGAAGCCGGAGAATGTCCTGATC -GAGATCGGAGATGTGGAGCAGATCGTCAAGGCACACGTCAAAGAGGAAGCCAGTAAAGAG -GCCAAGGAGAAAGAGGATAACCGGAATGGGCGGAGACGGCGACGCACACTGATCACAGGG -AGCCAGCCGCTCCCTAGTCCACTCAACACCAGCTTCAACAGCTTCGACTTCAAACATAGC -TCTTCCAACTCACACAGTAGCCTCAGCCAGATGGTCAACGAACCTGGAGGTCAGTCTCCT -TTGATGCGTCTATGCCATAACCGTGCGTGGACTGAAATTGATCTAACATCTACTACAGAA -ATACCCTCTATGAAAGAACTACTTGGGGTCAAGGAAGAAGATGTGAAGCAGAACCAGCGA -GAGAAAACCGCGTACGTGACTCCGTCTCATATTCATTAGTCTGATTGGCAAGAACTTGAT -CTAACAATATACCATACAGTGATCTCTTGGAGCGAGAGGTGTCTGGTATCTCTCTTGATA -AAGGCTCTTCGAGCAAATCAGAAGAAGAACTTGATGTCAACATTATTTCTGTCAAGATTG -CAGACTTAGGCAATGCTTGTTGGGTCGGCCATCACTTCACCAACGACATTCAAACTCGTC -AATATCGTTCACCCGAAGTCATCCTAGGTTCAAAGTGGGGTGCGAGTACTGATGTGTGGA -GTATGGCATGCATGGTGAGTGACATGATCATCACAGCCGGAGCTGATCAACATACTAATT -GTCTAGGTTTTCGAACTCATCACTGGAGACTACCTGTTTGATCCCCAATCAGGGACCAAA -TATGGCAAGGACGATGATCATATCGCCCAAATCATTGAACTGCTCGGTCCCTTCCCCAAG -TCACTCTGCATGTCCGGAAAATGGTCCCAAGAAATCTTCAATCGCAAGGGTGAACTCCGC -AATATCCACCGACTCCGTCATTGGGCCTTGCCCGACGTCCTGCGTGAAAAATACCACTAC -TCCATGGAGGAGAGCATGCGCATCAGTGAACTATTACTGCCCATGCTTGATTTGTCACCA -GAGAAGCGGGCAAACGCTGGTGGTATGGCAGCTCATGAGTGGATAAAGGACACACCTGGC -ATGGATGGGATCGACCTGGGCATTGCCCCTGGCACCCGTGGTGAGGGCATTGATGGCTGG -GCTACCGAGGTTAAAAAGCGATGATGGTGATCTTCGTGGCCTCTGCCTTTTTATTCTCTT -TCCTTAGCCTAAATGGGGCTATCCCCCAGGTCTTTATTGCGGACATGCCCTTATGGCGAT -GATGTGAGTTAACGATCTTTTTTCTCTTTCCATTTACGGATCGATGCTGGTTGGTTTATG -TCTCTATTTCTTCCTTTCTTCAAATCTTTCTTTCGTCCTATGCTGGTAGTTGGTTGGATG -GGGTGTTTATCGACATGCTCGTATGCTTGTGCCCTCGCCTTTTCCCCCTGCACGTCTGTC -CACCTTTTGTGTCTGGTGCTATTGCATCTCATATCTCTCTGATAGACTCGTCTTTCCATT -TTCATTGATCATATTCGTGATGGGACCCCTCTTTAGTTTCCTGTGTCATCCCTTCCCACT -CATCTGCCAAGCCCGGATAGTATTCTAGATTCCTCCACCAGACAGACAAGATATGACCGT -TTGCTATGTATGGATTCAATCCCCGTACAATTTTTTTTCTTTGAATCATCATTAACGCTC -GAATGTAATGCAAGTACATTAGAACCTGATGTATGAAGTTTGTCAACGTCTCTACGACGG -TATGTGTGTATGGTCAACTCTCCTAATCTCATTGCTGCTCATCCGTTCTTTCGCTGGGAA -GCTCGCTCGGAAACCATCCATACCGAGAAAAATCAACATAAATCTCGCCCATGCTATCTG -TAGTAACTTCCACACCTTCCTGTGATAGAGCTTCAGCCTGACGTTGCGCACTCCCGGGTC -CACTATTATGGTGTCAGCCGTGAACAAGTGTAAAAGCTCATTTTCAGTCTGCGCCATGTA -GTCCAATTTGATATAGTATAATATCCCAAAATAAGAGTAGCTTGGAGCAAACGCACCGAT -GCGAAATCATCCCTTTGGAATTAATCACTCGCTGCCACGGCACGGTGTTCGAATTGAAGT -GCGAACCAGACTCCGGCGATGCAAGTACCTTGAGGCAGACCCCAACCTGGCGGGGACGCT -GAGCTATCACCAAAGGAAGATGGTCAGTGCAGCTAAAACAATAATTTGATTGCATCCATA -CGTTCACCGAGGAGGCGTGCGATATGACCGTAGGAAGTGACTTTGCCCCGGGGGATCTCT -TGTATGGCTCCGTAGACTGCGTTGGCCCACCACTCTGCTTCTTCGGAGCGTGGCATGGCG -CAAGACTTCAAGTAATTTCAGCAACAAAATTCGAGGTCTTCCTCTATTTTGCCAAGTCCA -TATCCAAGTATCGTTCACTTATTGACAGCGCTAGGCGGCGTACAGTTCGGGAAAACATTC -ATCCGAAATGATCACTGAGCTCAACCTGCACCTCTACATTTTTCACATTCACACACCATA -CTGAAATCATGCCTGGCTCACTCGCCGATTACCTGGCAAAGAACTATCTGACCGCAGACC -CCGTCACCGAGCGGCCAAAGAAAAAACGCAAGAAGACCAAAGCCATCGATACTGCAGGCT -CCGGTCTCATCATTGCCGACGATGATCCGCCAGACATTCGCTCATTAGGAAACACGGGAG -AAGACGACGAGGACAGACCTTATTTTGAGACATCCACCAAAACTGCGGAGTTTCGCCGCG -CAAAGAAGTCAAGCTGGAAGACGATTGGAGGACCCACGCCCGGCCAAGGCGGAAGTGAAC -AAGAAGCCGCCGACGCAATTCTAGCCGATGCAGCAGCCGAACGCGCTGCCCAGCAAGACC -CAgaagatgaagatgcgcttatgatagaaaatgaagatgaTGGCGCGATGCGCATGGAGT -CCGGAGCACGAGGAGGCCTCCAAACAGCCGCCCAAACCGCAGCGATGGTGAAAGCCCAAG -AAAAGCGGCGCAAAGCTGAAGAAGAAAAGTACCGTGATCCCTCAGCAGTAAACGAAAAGA -GCCAAGAAACAATCTACCGTGACGCATCCGGTCGAATCATCAACGTGGCTATGAAGCGCG -CCGAAGCCCGGCGCTTAGAAGAAGAAAAGCGTGAGAAAGAAGAGCAGGCGCGCGAGGCGC -TGATGGGAGATGTACAGCGACAGCAGCGCGAGGAGCGCAGACGGGACCTACAAGATATTA -AGGCCATGCCGCTTGCACGGACGATCGAGGATGAAGAAATGAATGAAGATCTGAGGGCCC -GCGATCGGTGGAATGACCCTGCGGCGGAGTTCCTTACTGCCCGCCGTGATGCTGGAGCTA -GTGCTACGGGGCGGCCGCTGTATCGTGGTGCCTTCCAGCCTAATCGATATGGGATTCGCC -CAGGGCATCGCTGGGATGGAGTGGATCGGGGTAATGGGTTTGAGAAGGACTGGTTTGCAT -CGCGGAACAAGAAGACCAGATTTGAGGCTTTGGAATATCAGTGGCAGATGGATGAGTAGG -TGGCTTCTCATGCTGCCAGTCTTGCCTGCTTTCCATCTTGTTTAATACCCATAGTGGGTG -CTGGCCACCCAATGCATCAAGTGATATTACGTCTCGAAATGTTTGACTTGGGTTGTCTAT -ATACTTGTAAAGGCTATGTGGATATCTCGATCTGTCGTCTACTCCATAATCCGATGTCAA -CATTTCGATTAGGCTTACTTTAGTACGGCCTTTGCATGAAATATTTCTTCCTCGAGCTGT -AAAATAAATCATTGAATGATCACAACCAGGTCTATATAGATGTAAAAAAGATGAAGTTCG -CAGTCATCGGAGATAAAGAGTGCCGATGCCAAGGTTGCTACGGGAGTCTGGATGTTTACC -CCAAACAAAAAACTTCCCGCCTTTAACAATCACCATCTACCCGCTACAGATCGACACTCT -GCTCGACGTAAAGATGGAGAAGCGAACGGGAGAGAGTCCAATGGACTTTGAATGGCAAAA -CCGAGCTCCCGGCGATGTGACCTCGCCATTCTACCAGCTAGGAGCGCAGCATGACAAGAA -ACGTTCGTATCTCAGTCTGAACGAGGACCACTGCAATCTCGAGCCTTCTGACACTCTCAC -AGGTACATACAGCGCATTTGAATCCCCTCAGAAACAAGCGCTACCATCCCTACGCGAACC -AAACTCTCAACCTTTCCTCTTCTCACAGCCCCGACAGCAACCCGCACCACTATCGCCAAA -GGCTAAATTTGGCCAACCTGCTTTCCAGACGCCACGCAAATTTGACGTAGATTTCTCATC -AGGCGCCGAGAATATGTCGTCTCCAGACTACGCAGACAACGAGGATACGCCTGAGCAGCA -ATACAAGCCGGGGAAGGGAACTGCATCACTATTCAACTTTCATGGCCGGAATCCGCAGAG -CCCAGGTCGAGGCGAAATCCCTCGGCTGACCCACCACTCCAACGCAGCTTTGCACCGGAT -ACAAAAGAAACGACGCAGAGACAAGGAACTCGGTCGACAAATACGAGTCGATAGCGACGA -CGAGAGCGAACAAGACCGGCCTAGCAGGGAAGAGAAATCAATAAAAGCCACAAAACAAGG -GAAAGGACAGAAGTCCGGACAGGGAAGTTCGCGGGTGTCATCCTGGTCTGAGTTCTTCAG -CATGCTGGAGGCACACCCGAATGTCCCCGCCATCCTGTCATGGTGGGCCCAACTGGTGGT -AAACCTGTCGCTCTTCTCTCTGGCAGTCTATGTCGTATTTGCATTTGTGTCTGCCATCCG -TGGTGAGTTCGAGCAGGCTGCACAAGAGATGTCCGATACTATCCTGGCCGACATGGCGGT -CTGCACGAAAAACTACATCGACAACGACTGCGGCCGGTCGAGTCGTGCACCGGCCCTGGA -GACTATTTGCGAGAACTGGGAACGGTGCATGAATCGTGACCCGGCGAAGGTTGGCCGCGC -CAAGGTCTCCGCACATACAATGGCCATCATCATCAACAGTTTCATTGACCCCATCAGCTG -GAAGGCGATTGTGAGTTTCCCAGCGCACTCGTTTGTGAATCTTTACTGACCTGTACACTA -GCTGTTCTTCCTCGCAACCATATCCACCGTCACCATTGTTAGCAACTGGTCATTCCGCTC -ATTccgacaccgtctccagcaacagcactacgcgcaacacccaccccaaccccaacccat -gcacccacagctgcagcaaaaccCCTCATTTGGGTACTATGAGCAGCAAGATCCTCGACA -AACCCAAGGCGTCGCCTACAATGAGAAACAAGATCAGCCCCTGATGCTCGAGAACACACG -GGCAATGGACTTTGTCACTGAGCGATCTCGTGAGCGCGAGCAGCACCTCCGCACACCGAG -CCCAACTAAGCGTAGGTTTGCCTAAACCACTATTTTCTATTTCTGGGCCTGGGTCTGGTC -GGCGTACTTTGCTTTAAAATGGATGGGTTGGCAGCATACCTATCGGGAGTGGGATTTTGA -ACTGGTCTAGTAATGGTGTTTTATTTCATATCATTGTCATGGTATTTTCTTTTTCTTGTA -TTTGTTTCGAGCTGATATAAGTCCGGGGGGCTATATCGGCTTCTCTGCTGTCTCTACCAT -TGGTACCAGGTACATACTGGTCTTTTATCCAGGCTGCCATAGATCTCCTTTCCTCTTAAT -TTTCTTTTCTCCCACTTTTTGTGAAGGTGTACGATCGGAGCAATGCAGTATCATGTATGT -CGAAACACGTGCATTTGGTCTGGATTTGCCGATGCCGAGTCGGCCAACGTCCGCCGGTGG -CCGAGGTGCTCGGCCATCGGCCGTTCTTTGCCAATTGCCACAGAGCAGCTTAAAGTAATA -AGGTGGAGTTCTTGAAGAGCTGCATATGATTCCAATTGCAATCTTCATTCGTTGATCCCG -TTATTCATTCCGGCCCAATCCCGAACCCGTTTCCGATTTTCATGCTCTCGGCGAATTTAT -CTATGATTCACCTGCACATCAAACAGCATGCATACATCTAATGCTTTGATATTGGCCGAG -CTGACGAAAATTTGCAAGTGAAAATTATCTACTCTACCCACTATGAAGCAGTTAAAAGGA -GCTCCATATCCGTACAGTAGTAGCTTTCATATAGATAGTTCCAACAACAAACTTCCTTTT -CCGTCTTGTACCTATTACCTATTGTGTAGCATAAGATAATGCATCACGAATCACCCGATC -CAGTCGACTGTCCAGAGGGAGGCCTCATTGGCAACCTCGTTGTGGCAGGTGGCTTTAGCG -CTATCATGGGCGGACTAGGCTTGATGAACAGCATCGGTATCTATCAAGCTTGGATCTCAA -CACACCAACTTTCAGACATAAGCGAAGGACAGATTGGCTGGATCTTCGGAATATACAACT -TCCTTGTATTTTTCTGCGGAATCCAAATCGGGCCGATTTTCGATATCAAAGGGCCGAGGT -TGCTCATGTGGATCGGCTCAAGTCTGCTCGTATTGACTTTCATCTTAATGGGCTTTTGCA -CGGAGTACTGGCACTTTCTGATTGTCATTGGCATCCTAGGAGGCGTGGGTACATCATTCA -TATTCATTGTACCTGTCGCGAGCATTGGACATTTCTTCGTCAAACGCCGAGGGGCAGCAA -CGGGCCTTGCATTGGCTGGAGGTAGTATCGGCGGCGTGATCTTCCCTTTGGTATTGCTAT -ACCTCGCACCGCAAATTGGCTTCGCCTGGGCTAGTCGGGTTGTTGGTTTGATCACACTGA -TCCTTTTGATCCCCGGCTGCCTTCTAGTTCGAGCGAACTTCCCTCCCGGGGTCCGGTCTA -CTCCATCATTCAAGGCTTTCCTTCCCGATCTCACTATTCTTAGAGACCCAGTCCTTGCAC -TGACGACACTGGGGGTCTTCTTCATTGAATGGGGCTTCTTCATTCCACTCGAGTACATTG -CCTCGTATGCAATTGCGAGCGGGATCTCCACCAATCTATCGTATCTGATGGTCGTATTCT -TGAACGCGGGTTCTTTTCCTGGTCGTTGGTTGCCAGGCATCATTGCTGATCGCATTGGTC -GGTTCAACACATTGATCCTGACAAACATTCTTTGTTTGATTTCCGTCCTGGGTATTTGGA -TGCCCGCAGATGGAAATCTTGTTGCCACGGTGATATTCTCTATTGTATTTGGATTCGCCA -GCGGGAGCAATATCAGTCTTGTGCCAGTCTGCGTCGGAGAGCTGTGTCCTGTCGAGAATT -ATGGGAGATATTATACCACTGTGTATACCATTGTTAGTTTCGGGTAAGTGTTTCTTTTTT -TTATACTTGAGTGTCATTTTATGAGACCATATAGCTAACATATATTCTTTTCAGGGCATT -GACCGGCGTCCCGATTGCTGGAGAGATTATAAGTGTCTGCGACGGACAGTATTGGGGATT -GATTGCGTTTGCTGGATGTTCGTATGCAGCCGGACTGAGCTGTTTTATTGTTGTAAAACT -GCTACGAGACAAAAGAGGCAAATCGCTGGAAAAGGTTGAGGCGGGTGCTGAGTTCTGACA -TGCCCATGACGAAATCTAATAGCAAAGAGCAATGGAGCATATAGGAGAAAATTAAGATAG -AGATAGAGCAATAAAATCAATCAAATGTCAAATACTGAGAATTGTTATGACATTTGCCCT -TTCTGTCGAAAATAGGGCTAAATGACTGGTTTCTATTTTATACTCCAACCCTATAGGACC -CGGAAAACCGATCAACCAACCATGTTGTAGATTATCCATCTGATTTCAAATAAATGATAT -ATTGGAGGTGGAGCTTAATGAAGCCAATCCCGTCCGGATTTAAGCATATGGATAATAAGT -AGGAAAGGGATGCGGTTCCTTTCATACTGACGTAAATTTGCAATTTGCCATTCGCCATTT -CTGCGGCCATTTATATCTGCGGCCATGTATAACTGCCGATTTGCTCTGTCGCTTCTGCTT -CTTTGAACATAATATCTGATTATTCTTTTGTCCAAATCCAGTGCATGACTCATCGCCCGT -TCTTTGTTGTTGGTCAAGATACATAAGGTTCCAGCTGTTGTAACAAGATGGGCTCAATCA -CCGACGATAAACGAAACCTCATTATAGTTTCAAATCGCCTTCCATTGTCCGTTAAACGAG -TCAAGGGGACATTTGAATCATCCCTCTCGAGCGGTGGCCTTGTCACTTCTCTCTCTGGGT -TGACCAAGTCCACCCAGTTCCAATGGTTTGGCTGGCCCGGAATCGAAGTCAAAGACCCAA -GGGAGCAAGAGGATGTACAAAAGAGCTTGGACGCACATAACGCCATTCCTATATTCCTAG -ATAGCTCCCTAGCCCATGAGCACTATAACGGGTTTTCGAGTAAGCTTTGTGCCCTGATAT -CGTGATCCACGCTGACCGCCAGACCGTATTCTCTGGCCCATTCTCCACTATCAGTCCGGT -GTAGTTTTCGATGAGACTCCATGGCAAGCATACAGACGGGTAAACGAGTTGTTTGCAGAT -GCAATTGCTCAAACGGCGGAGAGTGGAACGTTGATCTGGGTCCACGATTATCATCTCATG -CTTTTGCCCCAGCTTCTCCGTGACCGACTGAAACAGCAAGGCAAATCTTGTGCCATCGGC -TTCTCGCTCCACACACCTTTTCCAGCCGGAGACTTTTGGAGAAATCTTCCAGTGCGGAAG -CATCTTATTGAGGGGATGTTGTCCAGTGACTTGATTGGGTTCCATACGGATGAGTATAAG -CAGAATTTCATCGACACATGTGCCAGTCTCCTGTGAGCATTAGCCCTGATCCACCCTCCC -CAGATGATGAAGAAGGATCAACTAACACCACAACAGAAACGCACGCACCGAAATTCCAAA -TCAAATCCAATATCAAAATCGGCTCGTCTGTATAGACAAATTCATTGTCGGCATCGACCC -CCAAAAGTTCACCGACACATTGCAGATGCCTGATGTCCAAGACCGCATCCGAAGTTTGAA -AGATCGCTACAAAGGCGTCAAGGTAATTGTTGGCGTCGACCGACTGGATCACATCAAAGG -CCTCACGCAAAAACTCAAGGGCTTCGACGCTTTTCTCGATGACCACCCAGAGCTGCAAAA -CAAGGTTGTTCTCATTCAAGTCGCCGTCCCAAGCCGCGAGGATGTAAAGGAATATCAGGA -TCTCGAAACGGAACTGTGTACTATTGCAGGGAAGATCAACGGAAAACACGGTAAGCTCTC -GTTACAAAATATTCGAAGTATGCTAACTAGCCAGCCACCCCCGAGGGTACCCCGCTGTTG -TACATGCATCGTTCTGTTCCATTCAACGAATTGACCGCATTGTACTCCGTAGCTGATGTT -TGCCTTCTCACCTCCACCCGAGACGGAATGAATCTCGTGGCCTTCGAGTATGTGGCCTGC -CAGCAAGAGCGCCATGGTGTTCTAGTTCTCTCCGAATTTGCCGGTGCAGCTTCCTTCATG -ACTAATGGTAGTATCCCGTTTCATCCTGCAAACAAGACGGAAATGTCCGAGGCAATCTTC -AATGCGCTCAATTTGGACCCAGCCGAGCGAAAGGCCAAATACGAGTACCTTCGTGGCTTT -GTCAATACCAACACAAGGTTTGAAGTGTTTCCTATCTCAGTATTGTCAGTTGCTGACCTT -GACAGTGCAAAATGGGGCGAGACCTTCATCGAAAGATTATCCCGTCGCTGTAGAAGGCCT -GGTACATCATGCTGATCGTTGCAGATTTCTGAGCATAATGTATGGCGACTCTTCAGTCCC -TGCAGATTCTGCTTGTCTGTCAATCGCAGGGCGGTTGTTGTTCCTATTGGAAATGACTTC -TCGTCTCGTTCTGTCGGCGGGTACTGAGTGCTAGGAACTTGGTTGAAATTTGCCTCTTAG -CTTGGACTGTCCGTATATTCTTCGCTTAGTAGTCAAGAACTATTTTGCTGAGTCGCCGGT -TGCCAATTCGTCTCCGTCGCTTTTTTGGCTCGTCTTGTTGGCTATGTCTAGGTTGCTTTT -ATTGAGCATCTGTTTACGTTGAAGTAATTGCCGCAATGGCGAGGCAGGGACCCCCCAAGG -CGGGTTGTATGAATCCAAGGATTCAATGTACCTTGAACTGCAAATCTCGATCTCCAAGCG -AGATGTAGCCAACTTTATCTCGCGCACCACGTCGAATTTGATCGTGATATTTCGTACAGA -CAAACCCTACAATCTAACCAAAGCATTCACAAAATTTGATATTAATACCTATATCGAAGC -GATCAGGACGTCGCATGTCTCACAACAGCATTCATAATGTAGAAGTAAAATGTACATCTC -CAGGGCTACCTACCTCCAGGACTGTACCTATGCACCCCAGATTCCATCGGACCAATCAGC -GTTCAATCTCCCTGCGCCAACCGCCAATAAGCGTTGTGTACGAGTATATGGATTATCCTC -GATGTCGATTGGTCCTTTTTTGTTTTTCTTTCCTCCTACACATCCTCTCTGGTTGCATTT -AGCTCCAGTAACGTCATGAATCTGCTATTCTCAATTGTATTGGACTTGACCTCGTTCACT -CTGCACGTTGATAATTTCCCTTTTACTCCATTGCAGGTTTGATTTGTTCGTATACCTTGA -ATCCCAGGTCAATTATACTAATGTCACGTCTTATGATGAGTCAATGGTCTCGCTGCCTAG -GCGACTGAGCGCAGTTCGCTTAGTTGGCGTGTACGCTTAGTCGTTGTCAAACTCATGCTT -ACTCGTCAAACAGTTGTATTTTTATTTCTATTGACTTTCAGCCTCGATTCATTCATCTCC -TCTTGGTGAGTTTGTCTCCTTTTTCCTTCACTGTCCCTTTCGTTGTGCAGTGCTCTCACT -TTCCTACTTGTATTGGCTTATGTCCTCTCCCTTTTGGGACAATGATAAGAAGTATGATGA -AACATTCATCATTCCCCTTTCCTGAGTACCTACAAGCCTTTCTTCTCTATTGACGTCTCG -CTTGCTTAGTGCCTGCAGGTATACTCAATTTGACTCAGCCCAATCCACAATGTACGAGGT -AAAGTGTATGTCCCCTCCCCCAGCCCCATTCCCTTGCCATGGCCTTTTCTTGAACTGATG -GACTTCATGCTCTTCTTGCGGTGCAATGATGATGTACAAATATCTTTTCTGTGACCTGAG -CTACCATATGAACATGAGCGTATCTGCTTGATATACTCTTTGCTCGTGTGTGAGAAGTTT -CACCTGCTAACAATATGGGTTCTCTAGTCTCGCTGTGAGATTCTGGACTGTCAGCATGGC -AGTAGTGACAGCAAATGGGTTGGCACAATATACCAATTTTTAGGTGAAGTCTACGCACCT -TTCCCCCAATTCCATTCCTTTGCCATGATCTTCATGTTTTCCCAGTGGTCCAATGATGAT -GGATTGATACACAAACCTCTGCTATGTGTTCTGAGGCCTAAGGAAGCACAATGTGTATGA -GCCACCGTTCTGAAACCATTGCCTTCTTTACGAAATTATGATTGGCATACAACCTTTACA -TATACCTCACTCGTTTTTAAAGTTCTATCTGCTACATTACGTTTCTTTAGTCTGTCCGTA -CCATTCTGGACTGCCAATCCGCCAATCCTATATATCCTTTCGAGGTGAAGTATACACACC -CCTCAATTCTGATCCATGATTGCCATGAGTGTATCTTGGCCTTATGGGTTTTGTCTTCTC -TAGGTGGTGCAATGATGAATCGCAAACATGGCATTGGTGACCTGAGGCCTGATGAAGCAA -ATTGTGTCTGAGCCACCATTTGAAAGCCATTACCTTATCTACAATATGATGATTTTCGGG -GTTCCAACTGCTGATTAAATATCTTTTCCTAGCAATCCTTTACCTTTCTAGGCTTTCATT -GCTGTGATAACGGCCAATGCCTGTTCTCATGCCTTGACTCAATGCTATCTTCTCATTTCA -GAGGTAAAACACATATTCCGCCCCACCTTATCTTTGCTCTGAGATCTTCTTGATCTGATT -GGTGTAGGGTGATGTTGGTGATCTACAAACCTCTGGGCATTGAGCCCTGGTGAAGCACAT -TGTGTTTAAGCCACTGTTTGGACATTGACATGACTGCTTGATACACTCTTCGGGTATTTT -TGAAATTTCATCTGCTAATAAAATGCCGCTTCATAGTCCTCCTCTGCCCCTATGATTCGT -GCTGTCAATATATCAACGATAACGGCAAATAGCTATTCTCTTCAGCTTTTTGATTAATCG -CCGTGGTCATTTTCCTGCAATATATACCTGCTCGTTCCTGAGACTGACACCCAATGATAT -TGTGAGTCCATTCACCCCTGCATCTTATGGATCTCAAGTCGGCGATGATGAAAATGGTCA -TTCCTGAACCTTATGTAGTATCCGCCCATTGTGGCTCCAAAAAAGAGTCCTGAAGAAATC -AAACCCTGTCTGAGCCTACATTTGACATTTCTCTTTGAATATCCATATCATGGTACTGGA -GTGCATTTGCTAACATTACTGCATTTTCCAAAGAGCCACCATCGTCCATTTCTTACTCAA -TGATCCTGCTCATTTTCAAAATGGCACATGCTGGTGCAAAGGTAAGTCTATCCCTGCATA -TTACAGCTATCGTTTTGGCCATGATGAAAAGGGCTATACCTGTTGGCTTCGGCTTGTTTG -ATGTATGTTCTTAAATGATGAAGATAGCCCCTAGTCTGAGCCTCCATTCGATGAAAACCT -TTCTATTCGACCCCGATGTCATATTGCTGGATTAGACTTTGCTGATATTATTGTATCTTT -CTAGGCCAGTTCCTCACACACCCCATCCCGCTCCTCCCCTGGGGTTTCAATGTGGCAATC -ATACAACCGATTATATCTGAAAGTCAGAATGTGGGCATTGGCTCTATATGGTTGCAAGAC -AGACAGTCCAAGAAAAGGCTGATTCATGCCTGAGCCTACATACAAAGCCCTTGGTCTATT -CGCGCTCGAGGTTCCAATCTTCGGATCTAAAACTGCCAGTCCCACTGGCTCTTTGGTACT -GCATATAGTTCTCGACCGAGTGAGTTGTCGTGACCTTGGTGCCTATAGTTTCCATCCTGC -CAATCTTTCTCCCATCCTGTTACCATTGAGCATATTCGATGCACCATCGCACAATTCTCC -CCTTGTAGATTATCTTCCTCATGGGTATTGCTGAATACTGACTTTCGCTAATTCTCGAGT -TTAGCCTCTGCACGTTGTCGACTGACCTTTTCCCTATAGTGAATTGCTTGGTGCCAATGA -TGGAAAAAGAGAGCTAACTGAACATCTTGATGACCAAGCTACACTTGTCTGAGGCACCTT -CTGAGCACTTTGCCTTTGTCTCTTGCGTTTACCAATATTCATTGGTGTTTGAGGTGGAAT -TTTCGTTATGCTGTCAGGTCTGTCCACATACAGTTCACTCCAATGGCATTTGCTTGGTGG -TGATGATGAATTCTAGCACTGTCTATCTAAACTTCATTGACGAAACATTAACGATCTGAG -CCACCCTCAAGTTTCTTGCCATTGGTTCCTCCCCCCTCTTCCCCATACTCCTGGATTATT -ATACATCCCCTTGGATCTCATTCATTAGCTCCAATTTTATCTCTGCCATCAAGAGATCTG -CCTCGGGCGATGATACAACTATCACAGATCGAAGATGCAGCAACGACACATTGCTGGTAT -GATCCCTCATCTGAAGCGTTGTACCATTGCCCTTATCTGTGCTCTTCTTATATTCTGAGG -ATATTTCTTCTCGTTGCGCCTGTCGCGCATTTTTAGGGTGAGATTTACTTCTATGCTTGC -TTTAGGGGCAGCTTGCTTGGTGGCGATGATGAGAAACTTGTTACGCGGAACTTCATTGAA -GCCAGATCTAGTGATCTGAGCCAGTATTTGAGTTCCCTTGTTATGCCTCTTGTGTTCTTG -TCCTTGAATTCCCCGAATCGATATTGATTTTCAATTTGAAAGTCCGGTCTCCGACGATTG -CTAAATTCCCTTTTTTTTTTTTTCGAGTTCTCAGAAGGGCCTATGAGATGGTCCAAATGC -TGCCAGAGATCAACTTCGGTGTTCTAAATTCAGAGATACTATTCAAATAGTATATGAATG -GTGTGTACTCTACGAAACTGTAACTAAATCACGAGATTATCTAAGCTCCCTGCAGAAGGC -CCCAGGGCTTTATTTTAGCCCCACAAGATCACCCCATGGATCCATGACGCCTCTGCTAGG -GCTTATCACGACATTTGCAGCAAATGAGAGCCCAATTCCTTATTGTTGGCTCCAAGCGAC -ATTGTCCACGTTCTTGTAGAGGAAAAAAAAAAATACCTCCTCTCACAGATCTGAGTGCTC -TTGCTTTTTTTGCAGTTTTTCTCAGTATGACGGACAAAAAGGATTTCAACGTGACATACA -GGGGACCCGTTCAGGATGGCGCTACAAAAAGCATCCGATCGGAGAACCGATTGGATGAAG -AAACTGGAAGAGATCCTTTGTCTACACCATTGAAACGCCAGCTCAAGTCTAGACATTTAC -AGATGATTGCTATTGGAGGTGAATTTGCGGTCGCATTGTTCTTGTATGGTTCTTGCTGAT -CAAAGTACTCTGTATTTCCAGGCATTATCGGTCCTGGCCTGCTAGTGAGTTCAGGCAAAG -CCCTGCACGACGGTGGACCCGCGGGGTCTTTGATCAGTTTCTCACTGGTCGGCATCATCG -TCTTCTTTGTCATGTAAGGAGTACATTCTTTAGTTTCAAGAATGATTCTGATGATTATAG -GCAATCGCTGGGTGAGATGGCTACGCTGCTCCCTGTGACTGGATCATTCACCGAGTATGC -TGAGCGCTTTATTGATGACTCTCTGGCGTTTGCATTGGGGTGGGCGTATTGGTACTTATG -GGTGACTGTACGACATACTCCGTACACTGAGTATTTTTGGTATCATAGATAGTTCCCTTG -CTAACCCCACACTCTAGGTTCTCGCAAATGAATACAATGCGATATCCCTGGTTATTGGGT -ACTGGACAGATGTTGTTCCTCAATGGGGCTGGATCTTGATCTTCTGGGTGATGTTCCTGA -CGCTCTCGAATCTGGGGATTTTGGCCTATGGCGAGATGGAATTTTGGCTGTCTCTGTAAG -TTTTCATTTGCTTGGGCTTGAAGCATCAACTTGGCTAATAGACTACAGCATCAAGGTACT -GGCGTTGATTGTATTTTTCATTCTCGCAATTTGCATCAGCGCAGGTGGGATTGGACCGCG -TGCTATTGGATTTCAATATTGGCATGCCCCCGGGGCGTTTGCAGATTCAATCAACGGTGT -TGCAAAAACCTTTGTTGTTGCGGGAACATTGTATGCCGGGACTGAGATGTATGTTCTCCA -ACGTTGAAAGTGGCGGGAATTGGCGAAGCTCACTGTTATATTAGGGTTGGTATCACCGCC -GGAGAATCAGCGAATCCCGAAAAAGCAGTACCCAAGGCTATCAAGCAGGTCTTTTGGCGT -ATCTTGATTTTCTACGTTGGTAAGTTCACATTGAAGCAGAAGACAGAATCCCTACTAAAC -TGAATTCAGGTACTATATTTTTTATTGGAATGCTCATCCCATGGAATGATAAGAGATTAT -TGGGCACGTCATCCAAGACAGCCAGCTCTCCCCTGACAATCTCGTTAGAGGATGCTGGCA -TCCTTCCGGCCGCTCACCTCATTAATGCTCTCATCGTGATCAGTGTCATTTCCGCAGGAA -ACAGCTCGCTTTATGTGGCATCTAGGACACTGCTTTTCCTGTCTCGCAATGGAAAGGCCC -CCAAATTCATTGGCCGGACCAATCGCCTCGGAGTCCCATGGGTTGGCCTGGTAGTAACCA -ATGTCTTCGCATGCATTGTATTTTTGGAGCAGTCCTCTAGTGCTGGGCGCGTTTACTCCG -CATTAATCACTCTTTCAGGAGGTATGTTTCTCTTCCCGGTCTTTTCTCAGAAACACCATT -GTTCCGTGCTAACACAGTACAGTTGCCACCTTTATTGTTTGGTCGGTAATCGGACTTGCA -CATATCCGATTTCGGCAAGCCCTGGTCGCGCAAGGCGAAGACCCATCAAAGCTCCCATTC -CAAGCTCTCTTCTACCCATATGGCACCTACCTCTCGCTGGCAGCAAATGTGTTCCTCATC -TTCTTCCAGGGATATACTTGCTTCCTCAACCCCTTCAGCTCGACCGACTTTGTGATCAAC -TACATCCTGATCCCCGTTTTTGTGTTATTCGTGATTGGGTATAAATTCTGGAATAGAACA -CGTGTCGTCCGGCTGGAAGACATGGACCTTTGGACCGGGCGGAGAGAGCAGGTCGGTTTG -GAAGAACCTGAGTCACCTAAAAAGGTCGCCACGTGGTGGTCACGCATCTGTTCCGTTGTT -ATAGGGTAGACAAATCACCCTCCAACTTATACCATACCCATTTATATACAACCCGAACCT -ACCTCTCGGTTACATGCAACTGGTTGAAATACCAGTAGACTAGCATGTGACACAATTGAA -CTTAATTTACTCTGTACTTCGTATCGTGTAAAGATAAAGTGCCGAATATGGCCTTGTGCG -AGTGACAAGAGATAGAACCAAGAAACAAACCGTGCGATCTAATCAGCACGCAGCGGGCCA -GATATCATACTTCAAGCGATCAGCACGGAGACCGTCGGCGCGCGGGGACACATGAGGGGA -AACAATATATGTGAAGCACAATTGAGGAAAATCTCTTATCTTGAAGAGTAAATACGTGGA -ATATTGAGAACAACTAGATGAACGGAAAACTCAAGAATAGCTGTTTACTCACTCAGTTTT -CGAATGCCGACAGCGAGATGTTTAACCCCACCCGGGGACATCTCCCGCTGAGTGATAGAC -CATTAGATCAACATCGTGCCCGACGATTGACAGTGTGATTGGTCCGATAGACAGCGACCC -TTGTCCATTCAGGTGGAGTCTAAAAGCGCCCCTTCCAGAATGACCCCCGTCGATCGCCGT -CTCGGATAGGGCGCATCTTCAGTAATCCGGAGTCGTAGCACCACTCGATTCTTCACGTCC -CTAATCAACCAATCCGGACTGTCTTGTACCTCGCTTTGACTTCTATACTTCCAGAGTTGC -CGCTCGGCGAATGTCTCTCATTCGGGTATGGTGGTCGTTATTTGATCATTTGGACCTGCA -GCATGCCATTGCCAATCAGATACCAACACGGCTAGATGTACGGAATGTAACATCGCGTGG -ACAGCGAGTAGATTCTCTTCGCAGTCCGGGGTGACCTCCGGCGACCTATATTTAAGCAAT -CATATTTCCCAGGAGATTTGGAAGGTCTACAAGTCAGGAATCTTCGGAGCCAACTAAGCA -GTCAAATCTCCCAGTCAAAATACTCGCCACAATGCAGTACACCAAGGTAATACTTTCCTT -TCATATCAACATTAACAACTATTAACAGAGATCCTAGATCTTGGCTCTGGCCACCCTCTT -CATCTCCAGCGCTCTCGCCGCCCCGGTAGAGGTGGAAGCCCGCGCATGTGCCGCGACCTG -CGGTAAAGTCTGCTACACCAGCAGTGCTATCAGCGCTGCCCAGAATGCGGGCTATGAGCT -GGAATCCACGAATGACGATGTCAACAACTACCCCCACGAGTACCACAACTACGAGGGTTT -TGACTTCCCCGTTTCAGGAACCTACTACGAATTCCCCATTCTCAGCAGTGGCAAGATCTA -CAGTGGTAGCTCCCCTGGTGCCGACCGTGTTATCTTCAACGGTAGTAACAAGCTAGCCGG -TGTTATCACTCACACTGGTGCCAGCGGTAACAACTTTGTGGCATGTACCTAGACGATGAA -TTTGAATTCCATATGAAATTTACACTTGAAAACATACGGTGATCGGTATGGTCGAGAGCT -ATGTAAATAATTTGAATGTAGGAGACAACCTTATCACGTAGACAGGCGAGTGAAACTGCC -AACATGCTGTATCGGTCTCCGTTGTTTTAGGTATCTAGAGGCAAACAACAATTGACACCC -GCACATTTGCGAGAATGTGGGAACTTGTCATCGTCGGGGTTTGATACCGAAGTATGGTAG -ATGAATATTGAGATAAGCAAGAGCAGGTATACGTTGCGTAGATATGAAAATATAGGATGG -ATCAGAAAGCGTTTGTGTAAAGTCAGCGGGTGCGCATTAGCTCACAAGACTCGACTGGAC -CTGTAGAGCTGGGTGAAGGCTGAGGTTGAAACGGAGGATATGACATTATGATTTTGGTGT -GAAGGCTGCAAATAGATGGAGTAGGAGTGTGGATAGGGAGAAAAGTACAAAAAGTGCGAC -GGTGCATAAAACGATCCAAGTCACACGAATGCGGGGTATGGTCTGGGCGGTGTGTACCCC -CTTCGATACCCCAAAATGGAAGGGACATCTATGGACCATGGATGTCCTGGGAGTGTATGG -GACATTGCGAATAGGGAAACCGCTTTGGACACTCCATAGTAGTTTCCGCTTCTTCACAAC -ATGCATCACTGCAACAACCATTATTCATGTTTACTCATTGAAGGAGGGTTAAGAGGCAAG -GACCGCACAGTAAGTGTGATCTCTGCCCCCGACGTGATTACATGTACTTGAATGTGGGTT -GTTGATGGGTCGCATGATACGTATTCTACATGATTATAAACAAGGTCGCGTCACACAGCC -CATCCAGACTGTTACTACTGAGTGATCTGTCAAATAAACCCCTCGGAAACTACCTAGGTA -ACAATACTATGAACGATCGACTCTTTGGAAGGTTCTTCCAAATTAAGAACCATGGTTGGA -TCTCGGGATCACCCGGGAGGTCCACTGTCCATAAATTTCGGCAGTGGAATAACATCAACT -CCGTATTGCAGGTAAAAATGCACCCCCGAACATTAATAACTGTGAGGCCTCTTATCCTTG -ATGATGATTGGCCTCATGGACATTCGGACAAAAAACGGACGAACAAGGACGGAACGTTAC -AAATCTGACGGTTGTCGGTACTAATACAGACAGTGATTGCTAATCGAAAATGTAGGGTCC -GAACGTCAAAACCATGATCTTGGAACCCACTAACCATATAGGGACTAGATCAGTCAACGC -TAACTAGATAGGGTTGGCAAGGTGTCTTAAGTCCGTCATTTGCAGACAGGGTATCAGACT -TATAAAGCCGATACCCATCGCCGCAAACATGGTatcacacttgccattaaaattccaatc -acaattaaatCCCAGAAATGGCATCCACAATCAAACCCATTCAGCTCTACGGTGGCATCA -TGGGCCCCAATCCCCTCAAAGTTGGCCTCATCCTCACGCTCCTCGACGTACCATATGAAA -CTGTCCCAGTCCCCTTCGCGAAAGTCAAGGATCCCGAGTATATAGCGATCAATCCAAACG -GGCGCCTACCTTCGATCCATGACCCAAATACCGACCTGACTATTTGGGAGAGCGGTGCCA -TCATCGAGTACCTCATAGAGCGCTACGACACCGCTGAACCCCGCAAGCTAAGCTTTACAC -CGCGCAGCGCAGAGGCTGAGCTTGCACGCTCGTTCCTCTATTTCCAGATTTCTGGCCAAG -GTCCATACTACGGGCAAGCCTTTTGGTTCAAAAGATTCCATGCGGAGAAAATCCCTAGTG -CAGTCGAACGTTATATTAACGAAACGAAGCGCGTGACGGGCGTGCTCGATTCGTGGCTGG -CCAAGCAGAAGGAGGCCAACCAGGGTGTCGGAGATGGTCCCTGGCTGGTCGGCAACAAGA -TGTCGTACGCTGATATAGCGTTCATTGCTTGGCAATTGACGGCACACGCACATTTTGTCG -ATGATGGTTTCAATGCCGACGAGTTCCCGAACGAGAAGGAGTGGTATGAGCGCATGACGT -CTATGAAGGTGATCAAGGATGTTCTGGATTCTGCAGCTGAGCAGATGGCCAAGGGGCATT -AAAAGAGATGAGCTATCACTTCTGAGTACATTTAGACCGAGTATGAGATGTACCACGACA -GAGCAATGAAACCAGCACAACGCAACTTTTTGCTTCATATGACTGTGTCCTGATGTCTAG -CTAGTATCTTTAATTTGTGTTTGCTCTTGCTCCAATTGGCCGCGTAAACCTAGGCGTCTA -GCATCCATGGATATGCGCACAAACAGAAAAGAACATCATCCCAAACTCATCAACAGATTG -CAACCACCGCTCGATTTTCTCCCGCCGGGCATCTGTCCCAGTTTCAGCAGACTTCCATAG -CTCCGATATCTCAAGCAGCCCATCCTCCCAGCGAGACAAGTGCCGATTGTACTCTCTTTC -CCAAACGGTACGGTCTGAGGCTTCCCAGAGTATTTTGCTTTCTGGTGCCATAGTCCCTCT -TAGTTCTTCGGCAACAAAGTTCGGTAGCCCGAGTTGGGTATTGTAGACGTTGGTGAAGAG -ATACATGGTAAAAATGGTACGGCGCTTGGCAGACGCGACTATCCACGACTCCCAGCTCGG -CCGGGTCTGGGATTCTTCTGCTTTGCAGAGGAGACCTCCTTTGGCGGATCGGAAAGCTAT -TTCTTGCATTTCTATCTGTGTCTTGTTATCGAAAGCGTGGGGTTCTTCGACAGACTCTGT -ACCAGGAAAGAAATGTATCGTCATGGAGTAGATGAGATACGCCTGGAACGAGCATAGGTT -GTCAAAGTCGCTTTGAAGGGCGCCCTATGTCCAATTAGGCAGGCATATAAAGGTCATAAG -AATGACAACATACCTGCGTTTCTATTTTCTTCATCTCTGCTCGCATGGTCCCCAGGATCA -AAGATTCACATCCAGGAATCCGAGTCAACCACGTCCGAACCAAGGTAAAGCAGGAAGCCA -AGACAGGTGGGCAAGCCCCCATCTGTTGCATTGAGTGAAGAAAAGGTGGTGTAATTGAAT -TGAGTAGATTCCTGAGATATGATTTCAAAACACAGGTGAGATACTGTATGGTGTGGGAGT -TCAACTGCTTGGGCTCTTGGCCGGTCGAGCTTGAGATGTAGGGACGTAGCCAGCGATCGC -GAATTTCCTCTGCATCGATCATAGGCGCTAGATCGAGTTGTTGGAAGTCTAGTACAACAG -TGGATTCTGAAAGGGTTGGTCCACTACCCGTGATGATAGAGTTATTTAGGGTACGAGATA -CATCCGCAGTGTGTTGAACCAGACCCCCTGGCCATGTGGAATCCAGAAAGAGAGCATCCG -AGATATTGCTCAAAGGACTAGCGCTCACGATGGAGTATTGATTCTCTTCATTACCATCAC -TTGTTGATTCAACGAGCGTTGGATATTCACAAGTCGTGCCGCGATCTCTGCATCGCTTAC -ATTTCGGCTTTCTGAGATCACACGCAGTTTTGGCTGCTACACATTTTTGGCATGCCTTTC -TTCGCGTTCGTGAAGTACGGGGGATTCGAGATGAAGTGGGGGTTGATGGCCCAGGTTTAC -CCGCGAGTCCGTTCATCTTGAACAGAAGAGGAGATTTCGGAATTCCGTGAAAGGTTTCGG -AGTTCCGCAAATGCCAAGCCATGAAGTCATCACCTGAAGTAGATGAAGGTGTTCAAAATT -TGAATATACAATGTTTTTTACCTAGAGAGAAACATATCTATGGGTTAGTTTCAATCAGCT -CAATCGTGTGTATCGAACACTAGATACATGTTTATATATGCCGTTGAGGCCAGTTTGAAG -TCTCTAACGTCGTGTCCTAGGTATCTTTGATTTTTGAGCTTAATTAGAAACTCCTAAGAG -TCCTAAGAGGTCAATGTATAAATTGAACAAATTTTTCCTGACAGCTAAGGAAGATAATTA -CATGTCTTAAATTGGTGGTGTTGAATTAACCTAAGGTTCTGACAACCAAAGTCTAGGAAC -GGCTTACATCAATGATCATTACGGAATTCCGAAGCGGGCTCAGATCCCCCCCAACTATAA -AATCCCCCATCCAATTCAAACACACACTTTCAACCAAAACAATTAGAACTTCAATTGAAA -ATTGAAATCCATTAAATATGTCTCAAGTCTACACGACAGACCACTCAGCCTCAGTGCTCC -GCACACACAGCTGGCGCACAGTTTCCAACTCCGCGCCATACCTCCTTCCACACCTTAAAC -CAGACATGAAAATCCTGGATGTAGGCTGTGGACCTGGTTCAATCACAATTTCTTTGGCAA -AGCATGTCCCGTCTGGACATGTCACAGGCGTGGAATACGTTCTCGATCCACTGGAAGGCG -CACGTGCGCTTGCCCAAGCTGAAGGCGTGTCAAACATTACCTTTCAAGAAGGAAACATCC -ACGCGCTGCCATTCGAGGACAATACGTTTGACGTGGTGCATGCACATCAGGTTCTGCAAC -ATATCACCGATCCAGTTCATGCGCTGAAAGAAATGCGGCGAGTCGCTAGGCACGGCGGTA -TTGTTGCGTGCCGCGAGTCGGCAGAGTTGAGCTGGTATCCCGAATCGGTGGGTATTGCGA -AGTGGCGCGAGATTACGGAGCGCATGCAACGTGCTAAAGGGGGCAATCCGCATCCTGGGA -GGATGATTCATGTTTGGGCACGTGAGGCTGGATTCGACAGTAGCAGTGTGAAGCGAAGTG -CGGGGGCTTGGTGCTTTGGGAGTGATGAGGAGAGGGCGTATTGGGGTGGGTCTATGGAAG -AGAGGGCTAGATCCTCTGGGTTTGCGAAGAACGTTGTTGGGGAGGGCTTTGCAGAGTCGG -AGGATTTGGAAGTTATTGCAAAGGGGTGGAGGCAGTTTGTAGAAGATGAAGATGCCTGGT -TTGGTTTGCTGCATGGGCAGATATTGTGCTGGAAGTAGAATGAATAGGTAGTGTCTTTAA -GCTGGTTCAGGATTTAAAATGATATAGAGAAGAAATATATCGATGCAATGTCAAGGAACA -GCAGATGTAATGATGAAAAACCTTCGCAAAACGCAACCATAAATCCTAGCAAAGAATGCG -AATCTTGAAGAACGATCCAGTCACGAGGGGTCTTCAAGCAACAGCCATGGGTGAAGACAA -TTACCAGCCACCATCACTGTGCTCACTGTGATCTGACTCATGAGACTCTTCTTCATCACG -ACGGCCCGCCTCGTAAGCTTCTTCAAGATCTGACTTTTCATCATCCGAGTCTGGAGGTAT -AGTTAGCATGTGaaaaaaaaaaaaaaaaacaaaGCAGTTTCATCGAAGCAAAAGATATCA -CATACTGATCTCATTGGCAATCAAAGCACCGGCGATACCACCAACAGCCAAACCGGCAGC -GCCAGCTGCGAGGTATCCGCCAGCATTGCTGCTCTTCTTCTCAGGCTTCTGCTCGTCCTG -ATAGTAGCCAGGGCTACGATCATCGTAACCCACGGGTGGGCCAGGAGGAGGCGCATAGTA -TCCCTGGGACTCGGGAGGAGGAACACTGCGAGAGCCATCATTGTACCCAGAGTAGCCGGG -GTCCTGGTAAGGTGTCTCCCACTGGGATCGACCAGTGGCGTGTTCTACGTAGAATGCACG -ACGGCAACTGGGTTCCCATTCCTGGGACCAGCCGTGGGGCAGAGGAGGCGGGGGGCCTGG -AGGGGGACGAGATTGAGGCGGGGGTGCACTGTATGGGCCTCGGTCATATGCTGGACGGTC -GTATGCTGGACGGTCATATCCATCTTGGGGAGGGGGACGCGCATAAGGATCAGAAGGTGG -GCGTCCGTAGGGGTCGTATGGTGGACGGTCTGAGGGAGGGCGAGCGTAAGGGTCACCGTA -AGGAGGAGCGTAGGACATGATGGGTGTGTTTGGAGGAAAGAGTAGATAAATGAACTTGGA -GAAAGCGAGGGGAGGAGGGCTTTTATGGGAAAGAAGCGTAAAGAACACCGTTGTAAACCA -TCACCTTCCGACCAGAACCGAAATAAACAACTCCACACAATATTTGTCTAACACTATAAT -CCTGCCCCCACTTCTGAGGAGTTACATGTTAGCGCCCAAGTTTGTGCTCCGATTAGGCAG -ATGCAATATCAAATGCTCCACGAGCCCCTGGAGGGGTGGTCAGATTGACAATCCTATCGC -CTTGCAGGGCTAGTTAGCCCAATGACCAATACGATTTCGGCATGCCAAGTGCCCCTGTTG -AATGTTAGTGTATCACAAATGAGCCACATATACTAAacggtgtacggagtacagagtacg -gcgtacggagtaGAGGTTGTATTATAGCCAGATCAGGAGCCTTTTTTTGCTCCACTCAGG -AGCTTGACTGCTTGTTTCCAAGGTTCGGTCGGACAATTGGCATAAAAGGGGCTCTATTGG -TAATTGTAGATTCCGTACAACATCAAAAAAATAGGGGGGGGGATGATGGATTTTCCCCTC -TATAGGGGGAGAATGGGCTGCGGGATAAAATTGACCTATCAGGCTCTCGCCGAGCCACGA -TGTGGCTCAGGTAACCAAATGGTGGGAAACACAGCCACTGACTGGCACACACGTGGCTGG -TGGATATTGGGATTGAGGCTTGGCACATTTACTTTACATGTGTTCCGTCAAGGTTGCAAT -TGGTTTCAAAGATATTGAGTCAAATCAGCAGCTAATGCATGTAGGGGGTCAAGATTGCTA -TCCACGACATGCATACCATGTACGAAGTACTCCGTATAGTATGAAGCGCGTGTGACCCTC -ATACTCCGGACAGAGTACAGTAATGTTGATCTCTTTATGCCAAGACTTTGGGTCCCTGAG -GCGACCTGACAAGAACGATCTGTCAAGTGGTACCTGCAATGGCCTCTCTTTTACGGAGTA -CTTTTTTGAGTATTTTCTACCTTCTATTTGGAATGATGGATCTTGCCCTATTCACTCATC -GATGTTCCTATCTATAGAAATGCCAATTGAGGCCTCTTAACTAGTGATGTTTCCACATCT -CTTTCGACTTACGTGTCCGGCGTATACATGCGGCGTGTGCGCCGGCATCCGACCTCGGAC -TTTCGCTGAAATGTCGAGTAGAACAAGAACGGCCTGGACCCAGCCTGCGCGGGGCCATCC -GGGGGGGCGGACTATACACGATGATCGGCCATATGGGGCACTCCCCGTGTTTCCTTCAAT -GAATTTTGCTTGGGTAAAATGCAAATAAAATTCCCAGAAATACCAACAATCTGCCGTAAG -AGAAGGATATTTAAAAAAGAAGACGTCAATCTCATAATCTTGATCGTACTACGTGGACTG -TTACAGACTTGCCTACTAGGATAGGGGTGGTAAATCAATTCGAGGCGAAGGATCGTGATA -TTATGATCGTGTGGCTATGAATGGCGATCCAGCTAGCAATCAAAGTAGAACAATCAGCGC -ACCGACCTTTTGACTGTGGAAAAAATCCACGCTCAACATCTCATCACCACAAGTTAAGGA -CTATCCCCCTGAGCCAGCGATAACACTTGATTGGTCATTGCGTGGGTAAAAGTAGGCAGA -TAGACTGGGAGTGGTTAGTACATTGAAGGGACACCAGGGAGAGGGATAAACTCACGCAAT -GATGATATAGGCCGCAATTAAGAGGACACCCTCTAGGTAATTGCTTCGTCCGTCCAAGAC -GAGGAAGTTAACAACGAAGGTGGTCACGAAGAGACAAATGGTCTCGAATAGCGTGAAGTA -GAGAGACATTTCCTTGTCCATACACCAGCCCAAGATCACGACCAGTGGTGTGACGAAAAT -AGCTGTTGCAATGGTCAGAATGTGCACATTTAGACACGTCCTCGGGGATAAATGGGATCT -TACCAATTTGAATACTACTGCCAACAGACACACCAATCGAGAGATCCATCTTGTTCTTAG -TGGCGACACTAACGGCTGTAACATGCTCAGCTGCATTGCCAACGATGGGTAGGATGATGA -GACCAATGAAAGCTTCGCTGACGTTTGAGCTTGCAATCATTTCGGGAATAGCATCAACCA -GGAATTCGGCGCAAACAGCGACCAAAGCGGTGGACAGTAAGAGCATGACAACAGCAGCGG -TACGTGACATTGTGGGTTCCGGGACTTGAGGAGCTGTGTCTGGGACCACGGGATCGTTCG -GGTCGGTTAGACGGGCAGCACCACGCGCGAACTGCACCGCGTTTCCGGCCGGCGGAGGAC -GACTAATATGTGCAGGGAGCGAGTTGCTGCGAGAAATTCCGGGGCGTGCGGCATTGGATC -CGATGTAGGGAGAGCCCATTGGTGACTGGGAGCTGAAAACAGTGTTGGAAAGCAGAGACG -GAATAGGTCCGAAAGGATAACGGCGTTTCCGGGAATCCTCTCCTGCACCTTCAACTCCAC -CGACGTTAGGGGCAGATGGGAATGCCATAAGTGCCGGGCGTTGATCAGTCGAGGGTCCAC -TATCCTCTGGTGCAACGATCTTAGCCTTCTTCTCTGCTTTCCGGGCCTTGCGCATTTCGC -GATCCCGTTTCTTCGACTTTTTGCTCGCTTTGCTGTCCATACGACTCAGAGCCAAACCAA -AGTCGCGAGACCGGACTGCAGAAGAGGACTGAACGTCTTCATCGGCGTCGTAACGAAGTT -CGTCACTAAGCTCAATGGCGTGCTCATTGCCGTCAATGGAAGCCTGGTTTTCATCGGAAG -TCGATGCCATAGCCTTTTTTTGGAAGGAGTTACGAGATGTTGTCCCTTTCGAGCTTGTAC -TTGACTTTCGATGTCTGCGGTATTTCATTGCTCTTTTGATCCGTTTGGCGGTAGTCCAGG -AAGTTGTAGTGTCATCCGACTCATCACTAGAGCTAGATGAGGAGTCGGAAGAGCTGTTCA -TAAAGTCTGCCAAGACACCAGGTTTGGACTCCTCATCGATGATAGCCTGGGGAATACTAG -CGTAGAGATACGAGTGTGTTCTGAGCTGGAAGATGATGTAGAGAATATACACCAGGAGAA -GAACGACACTGGTACCACGGCTGACCTTGAGAGTCTCGCGATCAGCATCGCCGCTATCCG -ACCACGAAGCGTGGAAAGCAGTGGGCAAGAGGAGACTCATCACACTCAAACTGAGTAAAC -AAGCACTCATCTGGGTGACGGTACTATTGTAAATCTGTTCTCGGAAGCGAAGACCACCAG -ACAGGAATGCCATGCCCATGATCAACAATAGATTGGCGAGAATTGAGCCAAGAAGAGAAG -CTTGAACGATTCGAATCTCGTTCTTGACCAGAGCGATGATGAAGATGATCAATTCGACTG -CATTTCCGAAAGTGACGTTGATCAAGGCTCCAATGGTGTCACCAAGACGGCTGGCAACAC -TCTCCGTGGCATGACTGAGCAATCCAGCAAGGGGAATAATAGCCACCGAGTTCATTGCGA -AGATGATGGCTGGGTTAAGACCCGCAGCCTCGCAGGCGATGCCGACCGGCACAAAGATCA -ACAGGACATTAATCCAGCTGTGGCAGATCGCATCCTTGGTGTGGTGATAGAAACGGCCGG -AGCCATTTCGCATACGGATCAGAAAACCCGGCTTTGAGTTGTTTTCCGTGAGGGACCCTG -GAGTGGGTTCCTCCACAGGTATATTCACTTCTCCATGTTGAAGAGGCTTTTCGGTATGAA -CAGGGTCGGCGGCCTGTTGAGAGGAAGTCTGATCTGGCAGGCCGCTAGAGGACGCGTGAG -CTTTCGCCCACGAGTGAATCCGGTCTGCAGAAGGGGGCATTGTTAGCCAGGTACACAAGA -GCTGAAGTTAAAGTTCTCAGGatagctatacatagctatagctatagcttagATCCCTCC -CTATAGGCACGACTTACGCATGTATTTCAGGGAAAAATAGACAGATGAAAGAAGGCAACA -AGCCTTTGATGATAAGACAGTAGATGATAGAGAAAAAAAAGTGAAGTGAACAGTTGACAC -GCGAGTTTTGCAGTCTCTTAAGGGGGAACTTGGAACAGGGCCAGACTTATTTAAGATCTT -GGCGCCTTGGCATGATGAATTGCTTAACGCCACACTTTTTCCACTTGTTTTTCGTCTCAG -CCCTGGAGAGGTGGGTCACTCTGAGAACTTTCCAAGGAATGTTTGGCACCTGGGCAACCT -ACGATAAACAATACATTCTCAATCTCTGCTCTTTCGTCATATATGCCTAGCATTATGCCA -ATTTTAAAACTCCAGGGGCCGTCGATATATTGGGCTTCAATATGCAACTGTGTCAGAACT -CAATCTAACGAATGACGCGTTCTCCACGAATCTCCCTATAAGGATGCCAAGAGTGAGAGG -CGGGAAGTCATGGTCTATGTATCATTGAGCCGATGGACATTATTGGTTAACCATTGTCAC -AATCAACCATTAGGCTGCCATGTTTGGGTGGGATAATCATTGGCACGAGGCCTTGTTTTC -GCTTGTTCTCATCGGATGCTAGGCATTCTACAAGAGGACGGAGTAAGGAGGAATCCGTAC -ATCGATGTACCACATGTGATCTACGATTTACGTCTGCGTGTGCTGTGGTGCTTCACTAAC -TGGAAATGAGGTCATTTATATTCTCAGAGAGGCTTCACACATTTCCATTCTACGAAATAC -AACTATGTAGTAATCTCTGGATACCGAGTACATGGATGGATATAGGTCTGCTGCGCCTCT -GGATGTTTTGGCTGTGGAGTTTTAGTCCCACTTTATATCCGCACTTGTGGCTGTTGCCAC -AACCACAGCCACAACCTACCATCTTTAGATGTTACTCCCAGATGTCTCCTCCAGTCTGAC -TCATTGACATCAATCGCCTGCTTTGTGGAACTATCATCCACATGCCCATAGATTCAAAAT -AGTGTAACTCTTCGTATCTTCAACGCGATACTCTGGGAACACAAAGCATACAATGCGATC -TTGGGCGTCCATCTACGTCAATGTAGATAACCTGAAGATAGGTGTGGCAGGTGTATCGCT -TCTGGGGATAATTGAGGGTTAGGCGCCAATGCCGGCTCATATGCCCAATTTCCGACTCAA -CCGGAACATCATGTAGCAAACAGACCAAAGAGGAATAACTGAGTGCACAGGATATGCAAG -ACGCCGATAAGACCAATTCGCAAACTCCACAAAAGCACTAACGGAGTGCGACCATGCATT -TACAAAGCATTTCAACTTTCATGCTGAGCTTTCAAGAACAAGGTTACATAGGGTAAACCA -AATTCTACACCTGAGCAGCAGCATCCATACAGCCATCGGAACTCTGCTTCAGCGACAAAG -TTGGGGCGACAAATGACCCCTAACACCCCCACAAAATCTAAGCCTTATGCCAAGAGATGC -TTGTCATGACAGGTCTCCACATGCACAGTCACTCTAGCACCTTCAAGCGGCCTATCGGCG -AGGAAATCTCTAAGTGAACCTACAGCAACACTATAAGCAAAACCCAGCACATCTTCACAA -GCATATCACTGATAGCATACAAGGCTCCAAACTAACTGTAGCACCAAAAGCGATGATCAA -TGGATCGCGCAACTAGGAGAGATGTCAAGCCGATGCTATGCTTGATTTATGTTTTAACAC -ACATCCAATGTGTTTTCATCAGCACCATCATTGCCATTGTGGGATAGCGCTGCTCTGAGC -AGAATCGATATGTTACCATTTAAGCCTTCCAACTATTATAAGAGTGAATGTTCTACAAAA -TCTCATTCCTTTATCATTTTTGACAAACGTGGTATGACACTTTTGATGCAATGGTATGCA -ATAATTCCATATATTTGTCTGTGTCATAAATCCATCTTGAGCCAAACATATCCATCAGCC -TTGATATCTATAGGAGCTTATACCTCACAGCCACGGCAATACAAATAACCATAAATGCCA -CAAAAGCACCCAGGATGCCAAGAAACCACGCCAGCCCATTTGCATCTTGCCCCGGGACAG -GGACGTTCATTCCCCACAGACCCGTAACTAGGTGGCACGGCACGAAGATAGTAGCCAGGA -GAGTAACTTTACTCAACCCAGAGTTGATAGTTAAACTCAATCGGAGATTTGTAGCGCTCA -TCTGGGCCAGATAATTGGCCTGGGACCGACCAATGATCTCATCGATGTGCCCGAGAGTCG -ACAACGTTGTGACTAGATGATCTTGAACATCGCCGAGATATAGAAGCAGATCACCGTCCG -GGAAAACAGGGTGCTTATCGGGCGATTGACAACGCTTCACAAATCCGTTCAACACGTCGA -CCTTCCCATTGAGGCAGCGGATGATGTGAGTGATTTTCTTGCGGAGAACGTCGACTTGCG -GAATGAGAGCTTGGGCGTCGTCGATGCGACTAATGAACACTTGATCCTCGATTGCCTCGG -ACTCGCGCTCCGCGTCCCGAACAAATGGCTCGAAGCTATCGATGATTTCGTCTCTATGAA -GTAATTAGCAAAAGGTCCATTGGAGAGAGAGTTCACAGATGGAAATTTGATTGAACGTAC -ATCATGGCATAGCAGACCCAGTCGCTAGTCAATATAGTGGGATCATGCATTTTGCGAATC -CGTTCTCGCGCTCGAACGACGTGTCCGCAACCACTAGGCGAGAAGGTGACCACGCCATAT -TGAAAGACAAGAATGTATAATGCAGCGGAGATGGGAATCCCCGGTCGCTCCCCTTTGGCC -TTGTTTTCCACGAGAGTCTGGAACGGGATCAGGTAGTAATTTTTGTAGACGTCGACTTTC -TCTCGAGGTTCACGAATAACAATATCCTCCGCCGTCAGGGGATGGATGGACAAGGCTTGG -GAGACAGCGTCGACATCTTGTTCGGTGGCGTCGCGGATGTCAATCCACCAGATCGGTTCG -TTGGGATCAGGACGACTTTTATGCTTCTCCTGGAAGATATGGTCGATATTTCCGCTGGGG -AACAGACCATCCCAGGTGGACGAGGCGATCGCTTGTCGCGTTTTTGACGAAAAGAAGCTA -AACCGATCAACCAAGGGCTGATCAGTTTGCAGCTTCTTTTCTTGTTTTGTCTCCATCTTA -TCGACTGCGCAGAATAGCCGTGGGGGAGATTCTTGGTACCCTGATAGGTTGATCTGATGC -CATCAGGCGTTGATCTGGGTGTCCTTTTGGGGAGTGCTGCTCCTTATTTTGGATGATCGA -ACTTCCATGGATGACGACGAATCTATTCAACCCATTGCATTCCCTAGAGTTGGACTCTTG -GGGATATGAGATTTTAGATTTCTATCCAGGGTGCTTACAAATGGAGAGACAGGGGAAAAA -GAACCTTTTTGGAAAGAGCGCTTCACGTGAAGAGCGTACCAGCACTACCCGTTAGGTTGA -TGCAACGGTGTGTCAAGCGGTGGGACTGCCCAGATGCCAATTTAATATGATGTACTTCGT -ACGGAGGAATTCAATGAACGCATCCAATCATTTGGAGGTAATAGGGGACATTAGTGACCT -TCGGAGGTTCGATACCCGGAACAACCCCGAACTTTTGGCCATTTACGTTATGAATTTCAA -GATTTATTTTTATTTTACGGAGCATAGGAATAGTATGTTTCTAGAAGATCATGGTTCTAG -AAAGTTGACGAGAGGGATCGGTTGTATTTATACATTTATTGTAGATCCTTGATCAGGAAG -ATCTTATATATAGTTGGTTGAAAATCCGTATTCTCGTCGACCAAAATCACCATCAAATGT -TCGGAATTCTTTTACCCCATAAATATACAATCCAAAACGCCCTTGAAATGCCGTGCCTGG -TTGCGACACAGTGTCATAGATTCGACACAAAAAAAAGAATCGTCAGTGGAATTATGTGGA -CTCGGTGATGGGGTTTGATATTACACCCAGATGGCATCTCAATACACGTCAGTGATAAGC -GTAATCTAGCGCATTGAAGATGGCCATACCATGCTCCGATCGTCATTTAACTTTTGACCT -CTGACTGCCGGGGCTCCGTCTCTTGTTCATGTTCAAGACTGTACGACTTCCGCAATCGTG -GATACACTAGCCCAAAGAATACGATGACCGCTAGGAGAACAAAAAAGGTCGGCAGAAACA -TGAAAATAAACATAAAGACGCTCAATCCGAGGGTGACATTTTGCCCACGGAGAACTCGAC -GGGCCATGATCTCGACGATCGAGGACATACGGTAGCCAAGCCACTAGGAACGTGATTGAC -GGGTTCAGTCAATTTGGCTGGAGGATAATAATGGAGTTGGGAAATGATCTCATTGTGCTC -CGGTCGCACATGTTACCTCATTATACTTATAGCTATGTACCTGGAAGGGCCGGATATATG -GGCACTGCCATTCCCAACATTTATGCTCCTGAGACCGTTTTTAGCCTGTGAATGTTGATG -CCTGTGCCATTTTTCAAGTTTTTCAACCCTCCACACTTGGCGCCAATGTTCAGAGAAACC -TTTTCACCGCACATAGCAGATCTCCAATAAAATGTGGGTTTCCTTAAAATCAAGGCTTGG -CGCGAAGTGGTAGGCAGCAGCATTTTCCTCAACGGTTTTGCCCTGCCAAGCACAGTCTCT -TATTCTGTGCCTCCGTTGTGGGAGATGTCCCAAATTCATGGTGCACTATTCTGTGAATTG -TGTCTGTACGGAATACGTAGCCCTTGCACCTGCCCTCAAAGTTCGAAGCTCTGCATTCAG -CCCTGGCCGATTTCCACCAATCTGACAACTGTACGCCCCCGGTGTGGATCTAGCGCTGTG -CAATCATTGATAGTCCTTCGGGTCAGCTGCCACAGTGCAGGTTCTCGAATATGTCCTGCA -AACTTTGCAAGGTATCGGAATACTCTTGACAAGCATGCAAGGGAATGCGATCGGAACGTG -CTTTGTGATTGGGTGGCCAATTGGATCCTGTAATAAAACCTCCTTATAGGGCTGAAGCTT -GGCTTGCGGGGGCTGATTCGCGTCTCTGGAAGCATAACCGGTAGGGATCGGATGATCGGG -AATAGGGATACAATTCTGTAACGTAATCTTAGCTGTATATTTGAGAGAAGTTGCCTGTGG -ATGTGATCAACAGGTAAAGATGATGCATACTTTGTACTGTATCCTGGTATTTTCCCTGTG -GAAACAAGAAAGAAGGAAATAGAACAATAGGATTCCGTACTCCGTACACAGGAACAAGGT -AATCTTCCCCCACATTCTCCCCTCCCGACTTCTGATGTAACGTGTTGAACCTTTATCCTT -CATCCTCACCTCTTACGCTGCTCTGCTCATACCTTTCCAACATGTCTTGGCTTGCGGGGT -ATGGCAAAAGCGATGAAGACGATTACGATAACCTGGGCGTGGATTGGGTATTACAATATG -AGTTTGGAGACATTGGTAAACAGCATGCCTGCATGGGTGATCATTGGCATTCTGCACCTA -CTGACGATCTATAGACCAAGCTCAAGCTATTACAGAATTTCGAACGCTTATCAACGACCT -GAACGAGGCTGGTCTTCGGGTTCAGGTCCGTCATGGCCATGGTCAAGCGCTTTTAGTTTG -CATTCGGGTTCCGCGGGATCATTTAGGGAATTTAGTGCATCAATCCAGGTAAGTGAGGGT -TGAGTGGTCTTGGATGTGACATTTTCCCCCTTGGACGGCTAATTGACACGTCGCAGAATC -AAAGATTGGCTGTTTGGAATTACCCATACCCTACCGGCCGGCGATGAAACAGCCATTGCC -GACGCCGATACACCATCCGAAGAGATCCGTTCCGTATACCATGCCGTCACCTGGCAGAAA -AAAATAGGAGGCGCAGGCATCACACCAGGCTTTGGGAAATGGGAGAACGTCACTGCCTCA -TTTCCGTTACACGACCAGGAGGCCTGTGCGGAAATGCTACGTCAATGGAATCGAAAAACC -GTGTTGACAACTAGCGACCTGGATGCGATTCGAGCCTTGTTTGGCGAAAAGGTATTTTGA -CGCTCCGTCGGAGTCGATTATATGGTCTTATTGTGACGGATAGTTAACCCTGAGCAGGTC -GCCTTTTACTACGCTTTTATTCATTGCTATTCGCTATTCCTTGTAGTGCCCGCAGGGCTG -GGAATTCTTGGATGGCTATATCTAGGCCCATATTCGATTGTATATGGAACCGTACTTTGC -GCCTGGTGCATTGTGTTTGTCGAGTACTGGAAAGTTCGAGAGGCCGATTTAAGCCAGAGA -TGGGATGTCAAAGGTGTCGGAAGATTGAAGGTGAATCGCAAACAGTATGTGTGGGAGAAA -GAGGTCAAGGACCCAATCAGTGGTCAGGTGAAGCATGTCTTCCCAGGTTGGAAGCAATTC -ACGCGCCAGTTGCTTTTGGTCCCGTTTGCTTCTGTTGCAAGCGTGGCCCTCGGAGCCTTG -ATCGTTGCTTCTTTCGCATCGGAGGTGTTTATATCCGAGGTATACGATGGTCCATTCAAG -GGGTATCTGGAATTCGTACCAACCGTCCTGTTTTCGCTATCCTTGCCCGCCATAACCTCG -TTTCTCACGAGCATCGCGACCCGCTTGACTGAGTACGAGAACTACCGAACGCAAGATCAA -TACGACCTTGCCCAGACTCAAAAAAGTTTCGTCATGAATTTTATCACCTCTTTCCTCCCC -ACGATATTGACCGCTTATGTCTATGTTCCTTTCGGCAAACAAATCATCCCCCGGCTCGAC -ATCCTTCGAAGAACAGGCTTCATGGCCGACTTGGTCAGCGGACAGAAGGAATTCGAGGTT -GATACAAGCCGATTCCAGCAGGAAGTGATATATCTCTCCATGACAGCTCAGGTACTAAGC -TTTGGCGAGGAGGTCGTTCTACCATATATAAAGCATGTCTTGAGACAAAAGTGGCAAAAT -TATCAGCATCGCAAGGCGGAATACAGCCGGAAGCGGAAGTATTCCACAGCAACCGACTTA -TTCTTGGTGGACCCACCCGACGAAGCTGCATTTATGACAAGGCTGCGCAGCGAGGCCGGC -GCAGAGGAATACCATGTGGAAGAGGACATCATGGAAATGTGCGTACAGTTTGGCTATCTC -GCATTATTTGGTGTTGCCTGGCCACTGGTGCCGCTTGGGTTCTTGCTGAACAACTGGTTG -GAGCTGCGAGGTGACTTCTTCAAGCTCACACTGGAGTGCCAACGACCACCACCAATCCGA -TCCGACTCAATCGGCCCATGTCTACTGGGGCTGGATTTCTTGGCTTGGCTGGGGACGCTA -TCTACAGCAGCCATTGTGCATGTCTACCGGGGTCCCATTTCCGAGGTACGGCTCTCCTCG -CTGTTGTTGACGTTGTTTTTTGCAGAGCAGATCTACCTGGGGATGCGATTCACTGCTTCT -ACAGCCCTTGAGAAGATCTTCTCGGACACAATCCGAAGAGAAGAGGCTAGTCGGTATGCA -GTACGTAAGAATTATCTTGCGGCCAGTCTGGCGAGAAACTCTTCCCCCAGTTCCGGCTCC -CAGGGCTCGCCCAACGGCCGGACCCGTCATCGGGTGCGTTTCAGCGAACGGGTCAATGTA -TACAGCTCGAATGGAAAAGGCCCATCGAGTGATGATACCCCATCTCCGACTAAAGAGGAG -CCGGGGAATGATGTGCTTCGTGGGTCCGACCGTGAGGCCCAATTCTGGAGCGCGCGTCCA -GGGGACATGGCAGACGCAGGCGTCAAGCTAATTCGCGCATTAAGCACACCCAAAGCAAAT -GTGCTTAAACCTTCTAAAGAGGCATAGATATTAGCCCTATGTATTTTTAGAGCGTATTTT -CAATTTTTATTACACTGCTGCCTGAGTTTTTCTATGTTTAGATCGCTAGGTGTACCGCCT -CGTAATACAGGCATTGCCTGGAAGGATCATACCTGTGGGGGCTCATTTCCACATTGAATG -TATTTCAGAAGTACATTGGTACCTCGTACACAATCTATGTTAGACACGTATTGTCGCCTG -CCAAAAGCATATCTCAGCCCTATAGGGCTGTGCCAAGACGGGTATATCCACACTACCCTT -GAGGGTGATTCCCTGCTCTTACCCACTTGGTCTTTTCTCATTCTTCTCTTTGTCTCTTTG -ACTCGCTGCCTCCTGGGTGTTACCCTCTCCTCACTGTTTAATACCATCCAGCCCTCTGAA -TCTGGTTGTACAGAACATCGGCATGGACAGCGTCGTCTGAACTTCCCCTTCCTACGGTCA -CCCGGTTACCCGCATCCGACTTTTACAATGGTGACCCCTACCGACTCGTCTGATACTCTA -ATCACACTTCCACCCCCCACCCGCGTCTCACAGTCGACCCCCAATCCCACGACCATACCC -ATGGAGCTGGACTTACTATCACCCACCAAGTCCCCACCACCCGTGGTCCCGAAACTGCTC -CGTGAAGCTAGTGACATCACATGGCTGGAATCCCTGGAATCAAAAAAAGGCCAGGTGGAA -TTAGTCCGACGACGCGGTCGATACCGTCTACTGCACCAGCAGCAATTCAAAAACAGCTTG -CTCGCCGGTGTCGGGTACCTCGAGCTAGCTAACGCGGGCGACTTCGCTGCCAATGTCTGG -AATGAGATCCCTGTTCCGAAGTTCGCCGCCGTGCTCATGGGAATCGGGGGTACGTTGGCC -TTGGGAATGGTGTTGGTTGTTATTCATGATTTCCGACTGAGCTGGATAAATGTCAAGCTG -TTGCGAGCGGAGCGGGATCACCTGAAGCGTTTGCGCCAATACCACGACAAGAACGCAGAT -ATGACTCGACTCATCGACAGTCGACTTGGGGTCGGAGTGCGAGAAATTGGCACCGAAGTG -ATTGATCGGATCGTGATGGATGTTTTGATGGGACTCGGCTCGCTGTTGGTCGGCGTTGGT -ACTTTGATGGCCATTGGCGGCGCCAATCCCCACGTCTATAAAGCCAGTAATCTGCTCTCT -GGATATATTGGTAATAGCCTAGCTGCAATATTTGGCCTTTTCAACGCCATCTGGTCTGGG -TATCTCCTTTATCGATTCCATGTGCACGACGCTGCTGTTTTTGCACGCGAGCCCAGTGAT -GACATCCGTCGCCGTTTGCATACTCGAGTCCGTCGCTTCCAATGGCACGCTGGCGTGAAT -GGGCTAAATGGTCTTGTTGCTGGTGCTGCTTCGATGGTCACTGCCGAGCGATGGTGGGGA -TATGTTGTCCTGATTCCGTGTATTGTTTCTCTGGTCATGTGCAACTATTTCTGGCGACGA -AAGCTGGGCTATGATCGTCCACTCCTTGGGCATAAATCACCAACAGAAATACAGGTGACT -CCGCTCATTGAAGATCTGCATTACGTGATTTTAATGCAACGAGCTCTCGCTGATGCTCGA -CATTCGCTTCCGCAGGCCATTGTTCAACCTGACTCCTTAGACTCAATTCTCCATTTCATC -GTCCGTGAAAACATGCTCGACACGTACTGCGAGTCACTTGCCCGCGATAAAAGTACGCGC -CATTTCCTCACTGCATTCCCATCTCTAGACACATCTCCTGATCAAATCACAATCTCCCTC -GATGCCCTACTCCGCCTTCCTCCCAGCCACCTAGATCTCCTTCTGGAGCATGCAAAGAAG -TTCCTGCGAACGACCGGTGTCCGTGTCTTCACACATCGCGAACGCTACCTCCTCGAGCTT -TTGGGTTACGCTGTCTGTCAAGATCACCAACGGGGCACAACAGCCATCCAAGATGACACG -ATAGAGCACACTTAATATCTTCGACACTTGAATCTATGGGCGGCTCATGACCTCCCCGGG -GACTCATGGGGTGGAAGCGAAAGCGCCGGCGGCTGGTCGCCGGTTGCTCTCTATGTTGTT -GCATATTTGCATTGATTGCATTGCGGGATCCTTCCCCCTGTTACCCTAGAAATGCTGGAC -TTGAGTCATTAGTATACATTAATTTTGTTCCTAATATATTGATCATCCACTGTGATAATC -TTCATGTTCGTATTATGCTTGCTTTGAAGATATTCCCTTGCGTACAATTACTCTGGATGG -CCATATGCCGAGAAAAAGGCAATTTGAAAGACGCCTTGGCCTGTTAGAGTCATTCCCAAG -ATAGATATCTGCCAAGAAAACACTTCCCAAAGCTATTAGTCCGGTGTTTATATTGCCACA -ACCCCTAGTTACCTAGGATTGACAACTCAATCACTTGATATATTCTGTTCCCTCCAACAG -CATTCCAAGTATCTTCTGCACAACGAACTGACTTTTAGTAAATTCATAGTTGGAAAAGCC -TCAACGATATCTTTCTCTGAGGTTAAAGAAAGGTCTTTCTCGATCCCTCCATCGAGCCTT -ATGTATTGAGGTGCATAAAATGAAGGCTTTTTTTGAATGAACATAATACATGGGAAAGTC -AGAGCTTCTAAGAAGATGCCTGGTACTTTCTGTGCATAAGCGCTGCTCGCATTAGAACCT -TGCCACTTGTGGGAAGGGACCTTCGAATATATATATATATGTTTATTGGTTCTACTTGTT -GTACATGTTATCTGAATATCAACAGGCTAGAGATCAAATGATTTTCCAATGGCATAGCAA -GGAATAACTTAAAAACTGAACAGTCCGCTGCGGGCCTTGCCATATGGATAAACAACCAGG -GCGGAAATGAACAAACCAATCGTAATGCCAATAGCTACCTGGATCATACCAAACCCTAGG -CTGAATACAGAATTCCCAGAAGATAAGCCAGCCCCGGGCCCTGAGCCAGCCCCAGGACTA -TGAGACCCGACGTTGTTACGAATTTCATCCGCAGATTGTACACCAGAGATGAGAGAACCT -GTCGCAGCAAGCCCTGATGGAACCAGAACAAAGATGGCAGGTAGAATAGCGGTCGCAGCG -TGGCCATGCCAGAACCGGCTGTATATGTTTCCCATGATGCCGACGACAAACGCACCGACT -GTATTCGCGACCTGGGAGTTTGATCCCAGTTTCTTCGTGCTGAAGTAGTTGCTAATATAT -CCCGCCAGTGCGATGACGCTCATGGGTGGGAGCTGTTTCCATTTTCCTTGGTTTATGATC -AACAGCCAGATTGTCTGGACTGCAACGAACGGGAACCGTTGAACATAAGGGTTCTTGAAG -GCACCCTTATCTGGGCATTGAGTGTCGGATACTGCATTGTTGTCTATTAGCCCATAGATG -GTTGTGCCAACAGTGATTCCATATCCAATGAACAACGAGAAGAGGAGTGCGTAGACCATC -CGAATCGAGCCAGCGATGATCTGATGGGACTGAAGTTCCAAGCTGCTACAAAGAACAAGG -AACCCGGGCAATATCAACGCAATCGATGATTGCGCGATGGCAGAAAAGCAAAACAGGTAC -TGTGGCTTGCCATCCACAACTCCAAGGGCAATGCTACCGAAGGCTCGGGCGAGGAAGGAC -GTCAGCACTGTCGATGTCACTTCAAAGACATTCGAATACAATACTGAGCGAGGCGCAGCG -ATATGTTGCATTAAACCGACAAGAAGACCATTAAGGAAAATGATCGGCATGTCAATCGGT -CTGGCGCTAAAAGCGAAAGGTCCAACAGTTGCAGTAGCCAGCCCATACACCAAGACAATG -ATGTACTTGTGATACTGGGGTTTTTTCTTGATGACCGCATCGAGCTCCTGAATCGCCTCT -TCAATGCCGATCACATCATGTATCACATTCTTGTAGATGAGATGTGTGTCCGATAGGCGA -GCCAAATCAATTCCCTGGGCTACGCGGACCAGCTTCACCTCCGTCGTGCGAGTAGAAGGG -TCATCAAACGACATGATCATGCAGCCCGGCAGGTATAGGAATTGGCTGTCGACCTCCAAT -ACCTTTGATGTCATCTGCATATATTCCTCAAGCCTATGCGTGGGAGCACCAAATAACATC -AACGCTCTGCAGAGCTGCATGATATACCGCTGCCGACAGATAATCTCGGCAATGTGCACT -GTAACTCGAATCTCATCCTCAAGCCGCATATTGCGacctccaccaccgccaccaacagca -gcagcaccaccaccTTCCTTGTCTCGACGCTTCGAAGCTTGTGACAGGATCTCGCTCGAA -ACAGGAGTACTTGCGCCACTAAAACTTTCTGCACTCCCACCAATCAAGGTTGAAGTGGAC -GTATTCTGGGGCTTCTTGTACCATTTCAGTGGCTTTCCCTTTTTGGGGACTGGAGCCCCT -GGGGGCCCGGTTGGGGTTGGGGCCCCGGTGGGAGCTGATTCTGTGCTGCGGGACGAGTCA -GTTGAGGATCGAGACTGGTCTCCAGCACCTTGTTGCTGACCACCTTGCAATTTGATCAAC -TGTGACAGGATCCCACCACCCCCAGAGCGATATCTCAAGCTCGCATCTGCTCCTGTTTCA -TTCTCTTCGTCTTTGAACTCGTTCCCGGCGTGAGGGTAAGGCTTTCCACGACCCTTGCGG -CGTTTTTGGGATTGATCTTGGGTCAAATCTCGCACCAGACGATGAGCCTCGGAGCTAGTC -CCTTTCTGGTGCCGCTGTTTCGTCTCCACACTTTTGTGCTTGTCATCTGTCATATCAGAT -AGAGCAGCTGCACCGCCAGTCGCCCAGTCCGCGTGATACTTTCCGGCTGATAAGTCGTCT -CCCTCGGCCTGTGGGCGGCCCAAACGATCACCAAGCTCCGACGCTTTCTGTCTGAAGTTC -CTGGTCATGGCTCGCCATTTCTCCAGTTTTGATCGCTGGTCGGGTGTCTCAGGCTCCTTG -TTCTCATCTTCCTTCTTCTCATTTTCTGGTGGCTTGTTCTGGTCTGTTCTTTCGACGCCT -TCTGTCTCACCCGGCTGCGGTTCGTGATCTTGCAGCGGCGTGTCGGCGATATCCTCAGAT -TCATGGGAGTAGGGCATTGTCGGCCTATGAGGCAGCCCTTGACTTTGAGCATGTTCCCAT -TCTTCTCGAGGTCTTTGTGCCGGCTGATCATGGCGAGCTCCCTGCATAAAACCTGCATTT -GATCGAAATCGAACCTGAGGTGCTGGCTCTTCATCTTTTGGGCTGGTTTGAGGTGCATGC -GGCCCAACTTCGGACGTGCTTGTATTGTCTGGGTCCGAGTCCTCGTTGGGGCTGCTGATG -GATGCTTCCGACATGAAGGCGACGCGTGGGTTTCACTCATTCGCCAACATCTAGCCTCGG -ATTGTTTGGAATTAGATAAAATTGCGGATTTGAAAATTTCTTCAGAAGAATTTTGAACCA -CACAAAGTGTCATATCTGATGTCTTCGGGATTTCCCAGTCAAATCGAGCTACCATAATCG -CTATATGAGCTCACGGAGACGTGATTGGGGGTATACTGGTGAACCACGGTCGGCTATCAG -TAAGGGATTTCTTCTCGCACAGGTCAACAGTGTAAGGAATCATCAATTGAACGGGATTAG -GAGCCCTTTCCGCCAGTCATGAGGTATTGTTTGCAGGTGTCGCCTCATCGGAAATCTATC -CCACGCAAATCTCGGGTTTCCAAGTTACAGGTTTCTTACCACACAACCGTGTTTAATATC -ATCATCAATTGCCAAAGAGAGGCATGAATCGCCTGTAAGAGGCAGAAATTAACAATTCAG -ACCATGGGCAATTCTACGTCTTCAATGGTTGTACTATGGGATGTAAACAAGCAACTCTTG -ACTCCCTATATTTACAAGTCGCATGAGGCGCATGGTTAAATATGACACTTCTCAACATGG -AGAATTCTTGAAGTATCATAAGTCGTGGTTTGCCGACTTCATCTAATTGCCTATCAATTT -TCCATACTTTAGTTGACAAATATATTAAGAAGTACGGTGTGCCCCCAATCACGAAAAAAA -ATAGCCTCGGCTATTATAGCATCAATCGAGTAGTTAAGGCACTTGAGCATACCTCTGCAT -ACCTACCGTTGGCATGGAAGTTCATATAAACGCCTATAGAGCTTTAACCTAATGCAAGGG -ATGATGTGGAAAACCTTGATTCTATTAACATTCAACAGATTACCATGTGGCTCGAATCTA -GAGCATTCTAAACGTGCCAAATCACACATATTCTAACGATGAATGCTTTGACTTGGGGCC -TAAGAGGGATATGGATCAAGAATTAGCCTAGGGCTCTGGTGGTAATTTTAACACATCCGT -ATAGGCAACAAATAACAGGCAAAGAGACGTAAGCCGAGTCTTGAACGCGTCTCCAGTCCG -TTGTGTCAAACTGACAACAAACATCCCAGTTTGAGTTGAACGCCGAATTAACACCGTGGT -ACAAGGATGTGAATTCTGAGCTTGATGAGATTGATCACTTCAGTTTTGCAAGCATACAGG -ATTGAGCACATTGTAGCGACGACCTAGGCCTGCGATTATTTGCCCAGAACGTAACGAATC -TCGATTTTCTTGCAAGGGGCGTCAATCATGACAGACGATCCACCCGGGCCATTTGACCTG -TATCGACAAGTGTATACACATTCTTCCATCAAGGTAGTTGTGTATGCATTCCAAGATCGT -CTTCATGGTATCTGTCGGAAAACCTGATATTCTACCAGGGGATTCGTTTCTATTATCTAG -TATTCACCTACCTTTCCCCTTATTCTTTTCTATCTACTCCAAGATCGAGGTAGTGATCTG -TGGTTGATTGAATATGAGAGATTTCTCATATCTGGCAGTGGCAGCTATTTTGTCTACATC -ATCAGGTCTTATTTGGCCGCTACACTTGGTGCGTGCATCTTCATCGCATGCACTTTCATC -CAAACCCGTTCATGCAAGCCCGTCGGATAATGGCTACCCCTTTGATTGGGCGCTTGCGTC -TTCTTGGGCAGGTCTTAGCAGTGAACCATCCACATATATTGTTCATCAACACTCGAGCGC -TACGACTTTGTCCACTGCAGCAAATACTATGTGCTCAGTCATGCAATCCCTACCGAAGGC -TGCCAATGTTCCATCGGCCACTTTCCCACCAAAAGTGGCTAAATCCTCAAGGTCGACACA -GGCTAGCCTGACAGCAGCCAGCACTGATTCCACAGCGATAAACACACCCCCAACGAGTCT -AAACACTCCATGGCCAAAATCTAACACTCAAATGACAAGAGGCAGTGTTCTGACTCTACT -GTGTGTGTTGAAAACACTTTGATAGCATGATTTTATGCTAACCAACATGACCAGTTCAAA -ATCTCAAGAAAGCCATCGTCCTGCAACAAAAACCTGTGCTGCCCCTACCAGCAAGGCACC -AGAGACCTACTGGCTTGACGGGCAGGACCACAACCAGAAAGGAGCAGGATTCGCTCCATA -CGCTAAAACATCGCCTTACCCGGTTTACCGCAATGTGATGGATTACTCGGTCGTCAATGA -TGGTTCTGGTGATCAGACGCCGAAGCTACAGAAGGCCATAGATGATGACGGCGTGGGCGG -CAGTCGCAAGGGCCAAGGGGTCACGCGTTACCCTGCTCAGGTCTACCTGCCAGGCGGCGT -GTATCAGCTTGGAAGCACTCTCAACCTGACTGTGGGCACGATAATTGTTGGCAACCCGTT -AAACCCACCCATCATCAAGGCTGCGCCAGGATTTCGTGGTGACTATCTCATCATGGGATA -CGATTCACATAACGGCAACCCGGAAACTAGCTTTGCAACGCTTGTGAAGAACGTGGTCGT -GGATACGACCGCCCTCATGCCGGACCGCCACTTCGCTGCTCTTCAATGGGGAGTTGCCCA -AGGGTCGGGCTTGACAAATGTCAAGATTCGCATGCCAAAGTCATCCACTGGTCACACTGG -AATAGATATTAAGGCCGGGTCAACAATTGCTGTTACTAATGTGGTCAGTAATATAGAGAA -GCCCAAGACAAAGGACAACCACGCTAACAAATTGTAGGATATCATCGGCGGAGCTATCGG -AATCAGGAACTCCAACCAGCAGGTCAACTTCAAGAGCATCAATTTCCGGTCTTGCCGTAC -GGCATTCTCCGCGGTAGGTGGGTGGACCGTGCTGCTCCAGCAGGCGACCTTTGATAGTTG -CGGCACCGGTATCGATATGACTGGCGATAAATTGGGCAGTCTCGTTCTTCTGGATTCCAC -TTCCACCAACACAGGACCAGTAATCAGATTCCATGACTCGTCCCGTGATTCTGGAACCCA -AAACAGTCAATTGCTCATTCAGAACCTGAATCACGATACTCGGAGCGCTATCGTAGTTGA -TGCTCAGGAAAAGGTGGTATTGGCCGCTACCTCCCATGTTGATACATGGGTCTGGGGCAC -TGTCGTCCCAGAAACCTATCAAACTGGGGCAAGTTGGCAGACAAAGCGCCCGGCTTCTTT -GTTGGTGGGAGGCAAATATTTTACAAAGGCCCAACCCACATATCAAGATTACCACATGGC -CGATATTGTCAATGTGAAGGCCGTATCTGGACACACAGTGAAAGGTGATGGTAAGACTGA -TGACACTGAAGCTCTGAACGCTATCTTAGCCCAGAATGCCGATTCGTGCAAGGTCACGTA -CATCCCTTTTGGTGTATATCGAGTCTCGGACACAATCATTGTCCCAGTTGGAACACGCAT -TGTTGGAGAAGCATGGTCTGTCATCTCTGGATATGGGAACAAGTTCAAGGATTCCAACAA -CCCGAGGCCTGTCGTCCAACTTGGAAACCCCGGTGACGTCGGTGTGATTGAAGTTCAGGA -CATGCGATTCTCCGTGGGCGAAATTCTCCCGGGGGCCAAAATCATTGAAATCAATGCCGC -AGGTGATCAGCCCGGCGATGTTGGTCTTTGGAACACAATGGCGATGGTTGGCGGTACCGT -TGACACTAGTATCTCCGATGCTTGCACTTCGCAAGATAGCAAGGATTGCATGGGTGCGTT -TATGGTAATGCACTTGACCAAGAACTCCTCAGCATACATTGAGAACTTCTGGGGATGGAC -CGCCGATCACAACCTTGATAGCGAGTCACCTCTCACTATTATCTCAACTGGCCGCGGGAT -CTTAATAGAATCCACCAAGGGTACCTGGCTTACAGGAACCGGTTCCGAGCATCACTGGCT -GTATAACTATAATTTTTACAACGCTCAGAACGTCTTTGCGGGTCTTCTACAAACAGAAAC -ACCGTATATGCAAGGTCAGGGTGAATATCAGGCCGCGCCAGCGCCATGGACAGCCATAGC -CAAGTACGGTGACCCTGATTTCTCCTGGTGTGCTGCAAATGACCAAAAATGTCGCACTGC -AATAGCTACGAATGTCGACGGTGGATCCAATATCGCACTGTACAACTCAGCTGCCTGGGC -CTTCTTCGACGGGTATTGGAATGGTCTATATGATGAGCCTTGTAACGGGAGCTGCCAAGC -CAACATGATGCGGGTGACGAATGACCCTCAGAATCTAGTTTGGTATTCGATCAGCACGCG -CATGACAGATGTTATGGTGTTCGACGGGAAGAGCAATCCTCGTGAATCAAACCATAAAGG -CGGGTGGGAAGGCATCATCCAGGTCTATGGTCAGTTCATAGCATGATATACTTGTTATGT -TGCATCATTGGACATGTGCATTTAGTAATTTAAATTATATATAATAATTCAAGATTTCAT -GTGAACAGTGTGCAAAGTACGAAGTATGTACCATTAACCTGTTCCTCGATGACGATATCT -TGGCGTTCGGCTCTTTTTTTGGCACCGTTACACGTGATTAGTATGCCTCAGGCTGCAAGT -GGATATCGCCTGCAGCAACTACGTCCACAGCCAGTCATAATGCATTTGGCTACTTCGCTG -CCCCTTCGCAATAACGGCGCTCAAACCTGCTATATGACGCGTCATTGAAGTCCTTGGCTA -CCCCAAGCCAGCTGAAATTGCAACAGGGGCCGATTTACTTGAAAATCGGCCCCTGTTGGC -CCTGTTGGGTGGTCCACTGTTTAAATTATAATGACGGGCGCCGAGACTGGTTTATCCAAC -ACAAGAGCTGACCCACTGTATTGCGAGACTTGCTTTCATGCCATTGTCGAGTATTCGCGA -AATTTCGGGTTAGTAAGGATCTTCCTCAAAGCCACCCCTAGGAGGCTAATTATGTGTCAG -ATTGGTACCGGGGTCTCCGGTGCGAATCCAGTTGTTGGGGCCGCCAAGGCAGCAGCATAC -CCAAAATATTGCAATGTGATTCTTTGGCTGTGGGCTGTGTCGCATTGCCCGCAAACAAGG -CGAATGAACTTATGCGAGGACATGCAAGAACATGCACGTCTGATCTTGAGGGTTTCCGGG -GGATGAGGGACTAGGGCGTTAGATGGGCCCCATAACTTGCGCCTTCATAACAAAGTGGAC -CTGAGATGACGATATCATTGCCCAAAGCCGTATAGCGGGATATCCAATCCAGGTTCTCGA -CAACAAAACAGGTACTCCATACTAGCCCCTATCCCTGAACGGGCCCTACGATCGACACGA -GAATCGCTACAAGGCTGAGGTCATGTCGCCACTGCCGCCTCGGCTGGCCAGCCATGAGCA -GTCAAACTCCGAATGTTTCTATCTATAGAAAACTGGTGCCAATTGACCAAATATAAGGAG -TAGTTTTCCCCCGGATTTCCAGTTTATCATTCCAATCCCCAAGTCATTTGTTATTTATCC -ACACCACCCAATAAATCAAACAACACCTTCCCAAGAAATCCTTCCTTCTATCTCCACCCA -TCAACATGAAGTTCACTTTGATCTCCTCCGCCCTCTTCCTGGCCACTGCTGCCATGGCCT -CCCCAGACGGCTCTGATATCAACTCCGTCGTCTCCGACGTCGAAGGTATTTACTCCACTG -CCGCTGGCGGTGGCCAGTCCGTCGGATCCGAGATCGCCTCAAAGGCCACCTCTCTCGGGG -ACGCCGCGAGCACCGCCATCTCTTCTGTCAAAAGCGAGGCTTCCTCCCTTGGCTCTGCCG -CCGCCTCCGAAGCTAGTTCCATCGCCTCTGAGGTTACCAGCCGGGTTATCACTTCCGTTT -CGGCTCAAACTACCAAGACTGATTCCGACGGCAAGCCCACCGCTACCGAGTCCACTACCT -TGATCAGCACCTCCACCGGGTCTCCTTCCGGCTCTGCCTCCGTCACTGACAATGGCGCCT -TCGCTCGTCCCACTGCTGTCGGCGCTGTTGCTGCTGGTATGGCCGGTATGCTCGGTGTCA -TGGTCGCCCTCTAAAGTGTTGATACCTCTGTTGGTTCTCGATTGATACCAGGCATGGTAT -CCACTACGCCTTTTGTCCACGACTTTGCTCGATCTTTTGTGATCTTTTGTTGATCCCTTT -TTTCCTCGCATCTGGGAAAGAGGTTTAATACCGGCGTTCTTTTGAAGCTGTAACTATACC -TTGGCTGTACACTTTGCATTTTGGTCGACCATTCGACTACACCAAATCATTCAATATGCT -GATGGCTCATCCTATGACCAATGACCATGTTCCGTAGCATTTTTGTAACCAATTGTACAA -ACCTTGTATCAAATCTCCCGAGGCTCGTAAATTTTCTTTGATGCACAAATTAAACATAGG -CATAACTCATTGTCCCACTACAACAATGACGAACAGCATCAAGAAGTCAATCATAAATGC -AGAGCCGGCCAGACCAGGAAGGAAACTAGaaaacaaaaacaagaaaacaaaaaaagaaaG -GCCTAGCCGTGATTTGAACACGGGAGTCTTCGCATTACACTTCGAGAATTGTTAGAATGA -GAAGACGGAGATTGTCAATTAGGTTAAACTCACAGTGCGACGCGTTACCACTGCGCCACT -AGGCCAGGGACGTGTATTTTTGAAAATTCTGAACCAATAAATTTATAATACAATGTAATT -AAATACCCTGTTTCGATACCGGAAAATCTCGGTGACCTAAAATCCGGTAAATAACAGATA -ATGCTTGGCAGCTTCGCACTCGAAACCATTCCCTTTTCTATCGAACCTGTCTTGAAGGCT -CCGGTATCAAAATCTTGATGGGAATCTTCGTACTACTACTTTGAATCCTGCTAGTCTTGG -CAGTTCGGCAAGTGCATCTAGCAGTTCTGGTACCAGAACTGATATGTTTGTATTTACTCG -GGAACACAACTCCATCGCGTCTGTGGCGATCCGTGTGACTACCTCCAGGCCGATAGGTTG -CAAGTCCGTCATAAAAACCTCGACTCCGGCTGTATTTGGTCGGACATGGCGGGATAGAGG -ATTTAAATGCCAAATAAGCTCAATAAACACGAGTCAAAAGGATATGGATCTTGAAACCCG -TCAAGAAATTCAAGACTGACCAACATTTGGTGGGCAGTCAAAAAATCACTGTTCGGCCAC -TTCGGCCACATTTCCCGAGTATTGAGAATCTTGACGCTGAGGTAAATCAGCACGATTCCT -AGACAATCACAACTCAACTGCACCCACACGGGCTTGTGCATAGTGCAGACCAGCCACTTG -GTTTTGATTGGACTTCTCGACTATCCCAACTCCAATGTTTGATGGGTTCTAATCCTTGCT -TCACACGCTTCCAGTTCAATTCCCACACTTTATCGATCAAGCTTGCTTCGATCATGTTCA -TCTAGGCGAAACCAATCGTCAGATCTCCCCCAAACAGCTCCTCGAATGTTGGGGCAACAT -TAAACATGCTCATCGAATGGAACTTCCCTTCTGGTGAATGGGCTGTGAGGGAAATCGGCA -TTGGTGGTACTCTGCGAACCAATCAAGGACTCCTTTCTTGCTACCTCTTCCTCGTTGTCA -TCCTGACCAGGATTGGCTTTCTCCCAACGTGCGAGACAAAGAATGTGATAGAATGCCGAG -GCAAGAAGGGACCCGAGCAGAGGACCAACCCAATAGATCCAGTGATATCCTGGAAATGAG -CGATTGATCACATCCGGTCCGAGAGAGCGAGCAGGGTTGAGAGATCCTCCAGTATAATAA -ATACCTGACATGGAGGGTCAGCACCGTACATTGCGAAGTACTAAGCCTGGCTGTTACTTA -CCAATCATTTCTGCAACAAAGAAAGCCAGACCAATTCCCACTGGAGCTAAATAGGTAGAC -TTGTGCTTCACCACCGCCAACATGATGACGACGAACACCAGCTGGGTCGTCAAGAACATC -TCAATGAAGAGTCCTTGCGAAATTGATGCACCACCGCCAAGTCGGGTCTCGCAGTTTAGC -GGGCCCGGAAACAGGGCACTCGTCACGCCCGCTGCAGCGATTCCGGCAACCAGTTGCGCA -GGGAATACACACAACCCCCGGATAGGAGCCATGCCACCAGTCAGGCAGAGAGCAAGAGTG -ACCTGGTGAATGTCAGCTACGAATATCCCCATTCCTCTGCAGTGATGCTCATACCGCCGG -GTTGAAGAGACCACCTGTAACTCGGTAGAATGCCCAGACATTCACCGTCAAGGCGAACCC -AAAAGAAAGCGACGCATACAATAGATTAGAAGTATTCGGCGGGGAGCCAGGTGGCGGTTT -CGGGGTATTGGAAATCTGTGTACCCGCGAAGGAAAAGAGGAGGAACAGAAACGTCCCTAC -GAACTCGCCCACCACGCAGAGCAAATTGTACCGCGGCTTGGGGGGGATCCTCAGCATGGG -CAACTGGCTAAGACCCCCGGAACTCAAGACCTCCCCGAGCGTCGAAGTATCAGACGAGTT -GCGTTTCTTGAATCGCATTGTGAAGGGTGAATTGAGAGACAAACTGAAATCGGGGTTGAC -TGGTGTCTCTTCAGAGTACCCAAGTGAGGGCACCTCAGCCTCTTCTACTATTCATTAAGC -TCACACTTTACAGCCATGGCATATCGCAGACGTCACTACTCAATAAGGAGAAGTCGCGCA -ACTGGTTTATGTATCATCGATTAGAGCTTGACTCAGCTGAAGCTTATTCTATGGCTGTAG -TCCCTTTTACCAGACTCGATAGGATAGGCTCGTACTTACTTTCCCGCATAGAAGTTGAAC -CATTCAGCGGGAGCATCCGGGCTTCCGCAAACGGTTGCTGGAGTCGCCGAGACCTACCCA -CACGTCATTCTACATGATTTGTCCAATGGATTATGACTATCGTCTAGATCTGGGGCCCAA -TCCCCAGAGATGATACTTGGGTTCGAACTGGAATGGCAGAACGGTTAAAGATCCATCGTT -TTCTGGCTTACATCAGCAAAGAGCTGAGTAATTATCGGGTTTCCCCGGAAGGATGGAAGG -TCTGGGGTTATAGCGTTGACATTCTCGCGCATGGCACCACGTGTGACTGAATAAAAAAAG -AATTTGCGTCTTTAATTTAGGTTGCATTAAGGTCTTATATTAGAACTAACCATCTACCAT -GGGGCGATCCTGCAGGGTCAAAGGTACACATATGCATGTACACAACCGTCTTGTTTGACA -GGTGTTTTGTATTTAGATATCATATTAGAGATAGAAAATATGTGTTTCAATTCTATACCA -TTGCGTGTTGCTGAACAGTAAAATTTCGGTCGAAAATTCCGAGTTAATCCCTTTATGCGT -TGTATATGCCTCTCCGCGGACTCCGCGGAGATGTTCTTCACTTCGGCATTGAAAATATTC -ACAGATCATGCCAGACGTCTTCTAGATTTTTCCAAATCCCGTCTTTTCCGTGTCAACAAT -TCGGTTACCGATAGAGATAAATAGCTGGATAGCCAAATGCAACCTTGGAACCGCTTTCAT -ATTACTCACATCTGACTCGTCACTTCTATTTAAACCTCTCCAGCAGTGCAGGGGTCAATC -GTTTATTTTTTTTTTCCTTCTCAAAATATTCAATCAATCATGGCTTCTAACCCAGGAATG -ACTCCGGACCAAATCGCGCTCATCAAGGCGACGATTCCTGTTCTCGTGGAACATGGAAAC -ACCATCACGACTGTCTTCTATCGCAACATGCTTGAAGCCCACCCGGAGCTGAACACCGTG -TTCAACACTTCCAACCAGGTTAACGGACATCAGCCTCGAGCGCTAGCCGGTGCACTCTAC -GCATATGCCTCGCATATCGATGACCTTGGCGCACTAAGCTCAGCGGTTGAGTTGATCTGC -AACAAGCATGCGTCTTTGTACATCAAGCCCGACGATTACAAAATCGTTGGCAAGTACCTC -CTCGAAGCCATGGGTGAGGTGCTCGGCGCGGCATTGACACCGGAAATCCACGACGCATGG -GCCACAGCCTATTGGCAGCTCGCAGATATCATGATCGGCCGCGAAAAGCAGCTTTACGAA -AGTGCCGAGGGATGGACTGACTGGAGAGACTTCAAGATTGTGAACAAAGTCAAGGAATCT -GAGGAAATTACCTCCTTCTATCTCGCCCCAGTTGATGAAAAGCCACTACCGGCCTTCCAG -CCCGGCCAGTACATTTCTGTCCAAACCCACGTCCCAGCTCTCAAGTACCCGCAGGCTAGA -CAGTACTCTCTCAGCGACCAACCGAAGCCCGACTACTATCGCATTAGTGTGAAGAGAGAA -ATTGGCCTCAACCCAGCTGCTCCAGGCGCTGCCACCCATCCAGGTTACATCTCGAATGTA -CTCCACGATACGTGCAACATTGGCGACCAGGTCAAAGTCTCTCACCCTTACGGCGACTTT -TTCCTTTCTCCCGCCGATTTGGAAAGCGGCAACCCGATCGTGTTGATCGCTGCTGGCGTG -GGCTTGACGCCACTGACGTCGATCTTGAACACATTGGTTTCCAAATCGCCCGAAACACGC -AAAGTGCACTTCATTCATGGCGCACGTTCGCAAAGCGTGCGTGCTTTCAAAAAGCACCTC -GCCGAGCTTCCAAAGCAATTCCCCTCCATCCATACAACATTCTTTACGAGTCACCCCTCT -GAGGGAGAGAAGGAAGGTGTGGACTACGACCATGCCGGGCGGGTGGATTTGTCTAAGTTG -AAGGATCAAGACTTGTTCCTTAATGACCCTAAGGCGGATTATTATATTTGTGGACCCGGA -AAGTTTATGACAGATATGGAGTCGGCTCTCAAGGCCAAGGGCGTGAGTGCCGATCGCATC -AAGATGGAATTGTTCGGAACCGGGGGTGTTCCTCATTGAGTCCGGAGTATGCTTACCAAT -TCTCTTTCTTTGTTCATCATTTGGCGTTCAGAAGTGCATTATCTCCTCTCTTAATATTTC -AAATTATGATTGACGAATGAAAGACGTCACAACATCCTCTGTATCCATCTGCTGAGCAAG -CAACGACTTTTTTCAAGGGGTCAACTATACCCATTCCAGTAAATTATTGGATTTTCCGAC -AGTGGTGGAATTTTGGCGCAAGGGGGCCATCTTGATTAAAAGGGAAACAGATCGACATCC -ATGGATTTATAGAGAAACCGGCAGAGTCATTGCATCCGGTCATTGTTGCCGTAGACCGAA -TACCGAGCTCCATTTATAAGCAAACGACCATGAGATATATGTAGCATATAAAGGCTAGCG -AAATATCCCAGTACTATCAAAGAGTTACAGCATACCAACTGTTTATTATCCTCACCATAT -ATCAACATGGGTGTCAAAATCGGCGTTTTCCCCGCCTCTGGGGCTCTTGGATCAAGCATT -GTGAACCACCTGGCGAAGCTAGTCCCAGAACCCGATCTGGTTCTGATTGCCCGACACCCC -GAAAAACTGTACGATTTCAAGCAAGCTGGTGCTACGTTACGCAGGGCCGACTATGATGAG -CCATCAACATTAGATACGGCCTTTGACGGCGTGGATGTCTTGATGCTGGTTTCATATGCA -TCATTCGAGACAGACTACAGGATCAAGGTAAGGGAAATCAAGTAAAATTGCACAAACACC -TAGCTAGTTGATCTAACAGCTATAACCAGGCACACCGTGTTGCTATAAACGCAGCGATCA -AGAGCGGCGTCAAGCACATCTTCTACTCCTCTCTTGGATTCGGCGGCGATCTAACCGACC -AGAGTATCGCTCATGTGATGGGCGCCCACATCGAGACAGAGAAGTACCTATCTTCCCTGA -AAGATAAGGTATCCTACACCTCCGTCCGTGAGGGTCTCTACTCTGAATCCTTCCCGATCT -ATACCGCGTTCTACGATCTACAGAATCCAGCGGACGAGGTTACGATCCCCCACTCGGGCA -AAGGTCCAGGTGTAACGTGGGTCAAGCGCGATGAGCTGGGCGAGGCGACCGCTAAGTTAA -TTGCCTCGTATGTCCATGACCCGACAAGCTTCAAGTATCTAAACAAGGCTGTTCTGCTCT -CTGGGCCACGAGAGATCTCTCTTGCAGAGACGGTTGAGATTCTCGGTCGGGCAGTTGGGA -AAGCGCTCAAGATCCGGGAGATCACTGTCGATGAGTATGTGAATCTTCCTGGGATTGCTG -ATAAGCAAACATACAAGGGGGTTAATTTGTCGAGAGAGTGGGCTACAGCTTGGGAGGCTA -TCAGAAGGGGCGAGACTGCAGTCGTATCCCCGGCTCTACATGAGATCTTGGGGCGTGAGC -CTGAAAGTTTTGAAGATACCATCAAGGCAATGGTTGAATGAGCCTTTGTAGGCTGCTCCG -ATGTGAACCTGTGTATATTATGGAATAAATAGATTGTCCTCCGAGATAGAAACAAATTCT -GATGATTTTCAGCTGCTTTGATGTCGCTTCAATATATACCGTTGGATAAAACAATGACAA -AGTAAAAGGTCGTAAAAGGGATATGCCAACATTCAAAACCTCGCCAAACGCACAAGCAAC -TTTGATCACGCCATTCTGTCGTATCGACTTGGCGTCAAGACTGAGTAAATTTTTAGACAG -AGTAATTCTTGAGCTCCTTAACATAGAGCTTCTCGCTACGGTACACAGAATTATGAGCAC -CGTACCAATAGATTTCCTTGACCTTGCTATCGCCGTAGCCGTCAACGGAGAACCAATCCT -TGCCGCCATGAGTGAACAGACCCTGATCGTTGACACCAAAGCCAGTGAGGTTGCCTCCCT -CGGGGACAGCGCCAGAGTGAGGGATTGTCAGACCGACGGGCGAGACAGAGTTTTCGACGA -TGTATAGGTTCTGGAAAGCCGTAGGAACACCGTTCTTGGCAGTGTATGAGACGAGGCCGT -TAGGACTGCCGCCACGGACTAGAAAAATAAAACCATTAAGGGGCTTGATCGGAATCATGA -GGCAATTAATGCTTACGGATTAGGATTTCATGGTCAGTGGTGTTGGCACCAATATAGGCG -TTCTGGCCATCGGTCAGGACAGTCTTGCCGCCCTCAGCAACGAGAGTGAAGGCATCAGGA -AGGTTGGCGGCGCTGATGGCGCCGACGAAGGCGGTGATGGTGGCGAGAGAAAACTTCATT -TTCAATTTTAATGAGGATTTCTCGAGTAGATTTGGTGATAGACTTGATGAGGAACTGGGG -AATCTTATATTTCAGAGCCAGATCCGTTCGAGTTCGAACCAAGATCCGAGCGACAAGTGA -GGTCATAAAAGCGGGCGCGATTCCATTTTTAAACAAAGTGGCTACTCTAAATCACTTTTA -TAAATTCTACTGGACGATCATGTTGCAAGATTGAGACATGTCTTGCAACCCCTCGTGGCA -GAATCCATGCTTTACTTGACCATGTCGCCAGTGTCACCAGAGCATGGCTTAGTCCATATT -CCATGGCATCCCGCACATCATAACATTGATTATGTCCACTCAACTACTTCTTAGACGCTG -AATATCACTCAAATCTCAAAGAGGCTATACATCTCCGGCTAAAATCCGGGAGGTCTCTTC -CCGAAAAGTGGAACTGAATCCATCAAGCCACTGTCGATCTTCAGGTTATTGGTGGAGAGA -GCGGCATTCCCTTTTTCGATTCCCATCCCATCGAGTACCAAGTCCCCAATTTTCAGGCCG -AGCATATGTTTCATTTCGGCTGTTTGCCGGTACGTCAGATGTGGTCCCAGCTTGAAGTTG -TAAACGGTCATCCCGAACGTCGCTGTTTGCTCGGAGAAACTTGAGGGTTAAAGTTTGGCG -ATGTAGAAATAGGTTGACAAAAGTACCATTCGTTTGACATAGCCACCATTCTTCAAGTGC -TTTTCCATTCTTTCCTGCAGATGCTTCTTGGAAATTGATGATACCCATAAAGTCTTTGTT -TCGGTCAAGCTCTAATCACTACCACGAGGCCTGATATATATTTCACAATTTAAGACAGTC -ACCCTTGACAAATATGTGTTGGAGGTATATTCAAAGACCCTATCGCAATCAATATACAGT -GCAAAGTACTGTACAAAGAAGTCTTCACCGGCAAGTCGAGCTAGGAAGACGGTAGCTAAT -TTACGCACCGACACTCACTTGCGCATATGGAAGTAAGATGAGGCATGGGGAAAGAAATCG -GATGCAGTGTCCCAACAATCTGTTCGTCCTGAAATGAAAGTGATCGATATTCGGAAATCA -TTGTAAACTACAGCAGTACAAAGCATTCCCTACTGCTCAAACTAGCGATGCTGTGGAGAG -GCCTATCTTCATCTTCACTTACATGCCAATCCTCGAACATATAGCTTCCAAAGTCTGTCT -GCATCAAGTATAAATTTACTCAGGCGTCAAGTCCGCATTCAAGACACACTCCCCATTTCC -CGTATATATACGCCATAATGGGATCGACATCCCTAGCTGATCGGAATGTTTTACCTACTG -AAATTCTGCTAATGGGCCATGTCTCGATTGGGTCAATTATATCCACATCCGCTTCATTCT -GTGTACTGATAGAATGATCGAGCCTTGATAGTTTACGGCATCTGGAAGGCACCCTAGACG -TCAAGAGCAGTGAATTTCTGCAGACCAAAAGCCTCTCATATTGTTCTAGGGACCGGTAGT -CGGAATTGCATTAGACTTTTGGTCAGCCCCGGATATTTGAGCGAATCGCAAAATCTGCGC -CCACAAGACAATTCACATGTGTGATCGTAAACCCGCCCCCCTGTATATCAGATCCGACGA -CACCACAATTCCAAATGTGCCCATATGGCATATTCAAACTCTAAACCTCGGCCCATTCTA -GTTGTACATCACTTAGGGATAAGTTACGAAAGATAGAGTGGAGACGAGGACACATTGTAT -AATACGCACTCTTCGTATTGTGAACAAACATTGAAATTTTGTAAAAGTGGATCAGGTATT -TAGATGGAGTATCCGCAATGGAGAACTTCCATGTGCTTAACGTATGATGTATATCTGATT -TGGGAGCGAATTCCGAACTGTCATTTAAAAGCTGCTCTCATTGGAACGTTGGCTTGCTTT -GTGCACCACAAGGAAGGGCTGTGTTCCTTCCAATTCGAGCCAACCATAAAGATGGATATT -ATAGAAAATCATATTGATTGAAAGGAATATCGAGCCAGGGCTTGATGAGACTATTGTGCG -GCCTAGGGCTATGAAAAATATCCATCAGAACCGCATGAGCCAACTGGATGTAGATGGCAA -TTGGGAAACAAGCGAGAATCAAACCCTGTTACTATAGACCATTTTCAGCCTCCAGGAGCC -ATAGTTCCAGATCCATTCCTATATTTATCCCGAGATCTAGCCCTAAAGATTCAAGCTAGG -TTAGTCTCAAAGCAAAAACTCGATACAGACTTCCAAGCATATGAAAGTTGCCTCCGGTTG -CCTACGAGTGTGAGGTATCGAACGTCAAAAGTTTAATTAGATACATGGAGTTAGCATCAT -GTAGCATCTGGGCAATGATCAAGAGTTTCCTAGGTACGGAGTGCATAGGAGATCCGGCCG -TCGGGAACCAAGATCCAAGAACGATAGTCGGGACCGGAATCGAGTCAAGATCCGTGTATT -TTGTGCAGATGACAGGTCTAGCGCCCGTTATCGCTCCACTTGGTCAAGGCGTGCCGGGTG -GACTGACCCTCTCAGGACACAGGGGAAATCACCTGTAGGCTAGCCTCGGTCTAGATGGCT -ACATAGTGTAACTTTGAAGGTCGTGATCGATCTGAGATGTCAAGCGAGATAGGGTCATTA -GCCACAAAAGTCTAACAATATGGTATATCTGTTTTTTTTTTCTGCAGTCAAGGACTGTGC -CAGCAGCTTTTATTCCGATCTCTCGAGAAAAAGTACCATCGGATCGCCCCCAGCATCGGG -AATGATCAGAAGACGAACTCTCATGCCACAAATGCAGAACTATTGACCGGAGGCCAAACT -GGAGAAGCTTCCGTCTACAATTCGTGTGAAATCTCGGTCATAGGGACGAGCCTCGTGTAT -GGACTCGGTGGATATCGGGTTCGAAGGTAGGGCGAAGGGAAAAGGGGAGCATCCGAGTCA -CCCCACTCTAACGAGTGGCGCCATTTTATGAAGAAAATTAGATAAGCGGGATTTACCTGC -GTCGTTACTGGACGGACCCCCGAGAACCAATATCTAATTCTCACACTTTCTTAATTGCAT -AGTGTTCTCCGTAGTTACTCCGTCGTAAATCTGGATCGGACATGTTGGTTTTAGATGTCT -GATAAATCTCAAAAAGTGGCGCCGATTTTCAGACGGACAAAAGTACTAGACTGACACACC -TCGTTAACTTTGCCACAAGCGGCCGAGCAAATACAGTATTGCCAAATATACGCGACCCTG -CCAAAGACGATTAGGGAAGTGTTGCGCCGGTTTGAATTCATATCTATCACAGTGGCGGAT -TCAACTGAGTGATTTTAACCCTGGCTTGACGTGTCAATCTTCATCATCATTACTATCATA -TCAATTTACTTCGGAAGATCGGGAATGATATCTCAATGATCCTCTGTTGGAACGCAGCCG -ATGACGTAGAATTTCGTCGCAAATGCAGAGAGATCCATTCACAAGAAAACGAAATAGATT -GCCTGACAGGGTTGGCGACAGGTGATCGTCAAAGTATATCCGGCATATTAAACAGGCTAT -GACTTTTACCGGGGGATAAAATATGTAAAGACGCGAGGCCGAGTATGCAGACGTCCGTAT -GGAGTATATCATTCTGGATAACGTGTACGGTATGTCGATATACCGGGTTGGGACCCTTTC -TATATTTGATATATATTATCGACGTCCCCGCTTTTACAAAGGCAGTTTGGGTTCACTGTA -AGGCTACGTAGACAGTATCGCAGCTACATTTGTGTTGTTTGTGACCCACTTCGTATAAAG -GACCAGCAATTGGTGAAGATGTGAATTACAAGATGAACAAAATCAAAATAGTTGCGATGA -TTGTAACCGAGGACTGATTCCCGTAAAACTATCGGCCAATTGACGCTAGTTATAGCCACT -GGGCTAAGACGAAGGCTAAGACGAAATGATATACTCCGTGAAATCCGTATATGATACGTG -GCGCTTTGTTTTGACATGCAACACAGATACTCTGCAGGGTAACCGAGGAAATTGAATAGA -GAAGTGATGAAAACATTGAGCTTGAATTGTATATATAACATAAATCATTGACAATACCAC -GTGTGAAAGGGTCAACCCGGGTCTCAATTTTTACGTGAAAAGTATCAAGAGGGAAACGAT -ACAGTGAAAGATACCAATCCTGTGAAAGATACAATAAAAGGTAGGGGAAATGTACAAAAA -GGACATAAATTACATAAATTACATAAAGCGTCGGGTCCGAGACTCGGCCGGATTGGGCAA -AGGCCTTCCCGTGACACTCTAACTTCAACGGTTTCACCTTAGGGGGAATATCGGAAAATG -AACTCGTCGGAGCCGGCCGTTATAGATCTGGGTGGTCTCGGCTTTATTTCACCGAACAGG -AAATCTAGTGGCTAACCGAAGAGGCTAGGCCACTTTGGATACGCAGAGACTGTAAAGACC -TGACATCACAGGTCAATTTCCACTCCGTACACAGGCGTAATCGTCTGTTCCGGATCACCT -TCGTACCCCTGTTTTTTTTCGGGGCACGCTGTGGGTTTATATATAAATCCCCGGCCGAAA -CTCATGGTAGATATCTTCAGTTTATTTATAAATCCTCGATTCCTCGCCACCCTTCACTAT -AGTAACTCTTCCATCTTACTATATATCTCCCAATCCTGCTACTGCCCTTTGAGGCCAACG -TATGATCCAAGATGTGCATCATGGCTTGCAACCGATCCAATGAGATCCCCAATCTCTCCA -TAGACCAGGGGACCTACCTCAGCCAACACCTGAGTCCCCAGTCTCGACCCGCCGACTTCT -TTGACAGACCCGACCCTCTCGCTGCCAACTGGAGCTATGACAACGCCATCGATCTCTTCT -CCCTCAACCCCACTGACATGGAACCCGTGTCATTCGACTTCGCCGACAGTCTTACAAACC -TCGAGTCCAAGGATCTCTTCAATGACCCATTTGCCAGCTCTGGAATTAGTGGATTCACCA -TGCCCATGGCTGAGGATGCAGCTTCCCTATCATCGGTATGACTCCCCACCGTCCATATTT -TGACagaagaagaagaagaagCTAATCTCCCTGCAGGAATTCGAAAGTGACGATCAAACA -TGGCCATCTGTTGCTGCACGGCAATCCATCGACTCGAACGCGTTCGAGACCCAGTCGACG -ATCCAACCTTCTACCTACGCCTCTTCTCAGCCTCAAACACAGAGCAAGACCCGCTCATCT -TCAACAAGATGGTCCTCAAGTCCAGAGATGAAGGAAGAAGAGATCGCTACCCCTCAACCC -TCCAAAACCAATACCTCCACATCCCGCAAGACCCGCAGTTTTTCCCGAGACTCAAACCAC -TCCTCAACGGGTGGCCAAGACCCTCAAATGCGGAATGCTGCCAAGCGCGCGGCTCATAAC -ATTATCGAGAAGCGCTACCGCACCAACATGAACGCCAAATTCGTTTCCCTCGAGCAGGCC -ATTTCACCCTCAGGTGTTCAAAAACATACCAAGGTTGGCGCGGGGTCTCTCAAAAAGTCC -GAGATCCTCACCAATGCCCTCAGCTACATCGACGGCATTCAGCAGGAAAACCAAGCCCTG -CATAAGGAGCTCGCACTGCTCAAGCAGAACTTGCTACCTGGCGGAATTTGGCGCCATACC -AAAAACCCTCGACTATGATCAGGTACACACCGATCATGCCACAGTGAAGCTGCCCTTCAA -GTTTCTTTTTTGTCCTCATGACCGCGACTAGCGTTGTATTCTTAGAGTAGCCGGACAACG -CGGTCCCGGTCGGCGTTTTCTTTTTGACTCCTTTGTTATTCATTTTCTTTTATCTCGATG -GACTGGCGCAGGCGTTCGGATTAGGTGTTGTTTGGGCTCGAATGTAGCCCTCTCTCATCT -CTCAGGACATGATTTGATTTTCGGGTCGTGGCGACCACAAAAGTGTTTTGTGACTTGTGA -TTCCCCAACATGAATGGGAATACTTCCTGGATAGTATCCTAAAGCATTAATATAATTGCC -AAGACTCGAATGATTCATCTTGCTCAAACATCCCCTTTTCTTGGTTGTGTGCGTATGGCC -CGATGCTCCCCAGTATGGTAATAACTGCGCTTTGGTGGTCCCACCCAGGGATATATTTCT -GCAACGTGAGTGAATTTCCTTCGTATCTACTCTATACTATCTGTTGAACTAACTGGCCAC -TACCCGTTAGCAAGGTAGATTCACCTGAACCCAATGCCAACTGCGTTCCGCTCCATGACT -GTGGCCGGGCCTCCATATCGACTTCACCTCGCCCTGATCCGGCTAAGACCGTGCTGAATA -ATAATCAACTTCTCAATCTCAGGATTTATATTCAATGGACAGGATCAGTCTAATAATGCA -ATTGAGCAGAGCCAATACCATCTCGAGAGACGTGCTCGGCCCCTAGCTTTTGATCGGGCA -AGCGGATATTCTGATCGGGGATGCGGATAGGAGAAGCGAGATCTGGCTTGGTGAATTGAT -CATGACTTCGAGCAGTGGCTACGGAAGCAGTACACAGTATTCAGCGCGTGGCTTCTTAAA -ACATTATGTTTGCCCCTTGCTACTATAATCAGCGCAGCTAGCCTATTTCACTAACGGGAA -ATATCTTGCTGCCCTTCGTCCAATGTTCCCCCTTTGGGATTAGGCTACCGCACCGTGCCT -GAATATCGCTGCAATCCCTCGGCTCACGGGTAGTGGTAGATGAATACTCCGTACAATCTA -CGCAGTATACACCTGCCATGCCCTGCTTTCAAGCATACGCAATCGATACTGAGCCCACCG -GCCTGTCCACGAGTACATCTGGGCGCTACTCATATGAATTGAACTGATTTGGGGTCCACG -GGGTTGTCCCACAACCGTGCGTGCTATGGCCTATCTGTTATTGGCCTATATCGCCGTAAC -TCGGTGCTTGCGTCTCCTTCTCCATCCTTTGTGGGGTCCATCTGGTGAGATTTGCCTTTG -CAGATGAATGGAGAGGGCCCTGACGATGCCCAACTGAGGAGGCTCGCCAGCCTGAGCTAA -AGCTTTCGGCTCTCATTACGAAACAATAACTGAGGAAGTGGACCTTGGTGGCACCGTTTC -CCCAGACAAAGCTCAAAGCTGCTGCTTTCAGTGGTGCAAAGTGCACTCCAACTCCGTCTT -GTCGGGCTTTCCCTTGGGACTACTTTCACGAATCATACTTGGTCCTTCCCCGGGACTTCA -GCCGTTGATTGTCTCCAAATGTTTCAAGCTATTCCAAGTCCACTTTGATATCACATCCGG -TTTCCGCTGGATATCTGTCGCTATCGCGCTCATGTGGAATTGACTCCACATAGATACTGT -TTAATATCTCATATGAAATGCAGATATACAGTTCCACTGTTGTTACAATGCTTGTTTACA -CATTGAGGCATGTCAAATTTCGCGATTACATACTCCAAAATTGTGCTAGAGCCGCGCTCT -GAACGCTAATTGATTCTTTATCGGCCACGCATGGACTTGCCGTTTTGGGTCAGCTGTCGC -CACTGATGATTGCACTTGATATGCCATTACTTTTCATATCGAACGAAGCTACTGATCTTC -TTGTTGGAATTGGCATACTAATCCAGGGGATATCCTTGAAGAAAAGACGCTGTTCTACAA -CGGTCGGGGCGAGCATACAAATCATGCAATTTATGGTTGCGATCTAGATTCCAGAGATCT -ATAGACAACATACGTATCTACATACCTACATGTCTACAGATCTATATTATTATCATAAAT -ATAATCTCTAGTTATACCCCATTTCTTCTGGTGGGGAGACTAAGGCCCACTGGAGGGAGG -TCCTCCCGAAGACGATTCCCCGCAAGATAAAAGGGCCGATCATTTCTATTATCATCTTGA -GTGACCAATCCTTCTGATTCACTGAAATCTACGGAGTAAAAGTATTAGGTCTGCGTCATT -GGATACTCTCAATTGGTCCCCTTTATCTAACTCGTATTTGCCGTGAGGGGTTTATCAACG -CGGACCGAAAGCGCCAGATCATCTAAAGTATATAGCAATCACTTCTATCCATGTTTTCAT -ATTATTTTCAGTCAGAAAATGGGCGCATCTCCTCCCTGCCCACTGAGTTGATCAACATCG -CCTCCGGCTGAATCACCACGGCATGGATTTGGAATCCAATTCGCAACCCCTGAAGCCACC -ACTGACGGATCAGAATGTGCAAGATGCGCAGGACCTCGCTACGCTGGGCCATTCCCAGGC -CCTGACGCGGAAATTTGATCTGTGGAGTATGCTGGCGCTCGCCTTTTGTGTACTTGGTAG -GTTTGCATGTTTTGCCCGATGAAACCCGAAAATGGGCTGACGTGTTCAAGGTACCTACTC -GACTTTCGCACAGGATCTGAGCAGTGGCCTCACCAATGGTGGTGCTATCACCATCCTCTG -GGGCCTGGTCCTGGTCACCGTATGCAACCTCTGTGTTGCTCTCTCGCTGGGCGAGTTGAC -TAGCAGCATGCCTACCGCCCTCGGCCAGGCCTACTGGGTATATCGACTATGGAACACACC -TCTGGGTCGGTTTGTCTCATATATGTGTGCATGGATCAATACATTTGGATGGTGGACCTT -GACTGCGTCACAAGTTGCCTTCATGACTGAGTTTCTGCTGGGCATGAAGACCATGTTTGA -TCCCGAGTGGGATGGAGTCAACAAGGGCTGGCTCAACTTTGTTGTATACATCGGGGTGGT -CTTTGTGCTGACCCTGATCAACGTGGTCAGCTGTCGTAAGGAGAAAATCCTACCCTGGCT -CAACAATTTTGTGGGAGTGTGGTTCTTCGGTCTGTTCATTGTCCTTTCTCTTGTCCTACT -CATCTCTGTCGGAACCAAGAGCGAACTTTCATTCCAGCCCGCTTCATTCGCGTTTGGTGT -TTGGAAGAATGAGACCGGCTGGGGCGACGGCGTGGTTTGGTTCACCGGGTTGGTACAGGC -AGCGTACGGGTTGACCGCATTCGATTCAGTGATTCATATGGTGGAGGAAATTCCTGCGCC -TCGGAAAAATGCTCCTCGAGTTATTTGGATGGCTGTTTTGTTCGGTGCCGTGACTGGATT -TATCTTTATGGTGGTCTGTCTGTTTTGCATCCAGAATGTGGACCGCGTCGTCAACGCCGA -CCTCCCCTTCATGGAGCTTATGCTCGAGACTGTTGGACTGAAAGGGGCCGCTGTCCTGAT -TGCACTGTTCATTTTCAATGGCGTAGGACAGGGTATCAGTATCCTCACCACTGCATCCCG -TTTAACCTGGGGCTTTGCCCGTGACGGCGGCGTACCTTTTAGCAATTACTTCAGCCACGT -CGACCCTGTGTGGCAAGTTCCTGCCCGAGCTCTCTGGGGTCAGGGCATTGTCATCGGCAT -CGTCGGCATCCTATACCTATTCGCCAACACCGTGCTTGAGGCGATTCTCAGCGTTAGCAC -GATCGCCTTGACGGTCTCCTACGCCATGCCAATTCTTGCCTTGCTCGTCACCGGCCGTGA -GAAATTGCCCCCAGGTCCGTTTAAGCTCGGTCGCATTGGTCCGTGGTTGAACTGGGTCAG -CATCTTCTACTGTATCATCACTACCATTTTCTTCCTGTTCCCTGGCAGCCCTAACCCGGC -ACCCAGCGACATGAACTTCGCCATTGCTGTCTTTGGTGTTATGCTGGTCATTGCTGTTGC -ATTTTGGTTCATTCAAGGCAGCCGAACATATCTTCAGACCGAAGATGCTATTGCCTCGAT -GGTCTACGCCCACCACCTGGAAATGAATGAGCCAGGTGTCGATGACACCGTGCCAGCTGT -TCAAGCCCCGGTCATTGTTGATTGTAAATAATTCTGTGAGCTTTTGTCTCTCGAAGCCAG -CTATTTTTGGACTGTAGGGCAAACATGTACATAGTAATGCATAACCCGGTTGGAATTTGT -ACATCTGGAACCAGTCTGACGTGCCTCTTCATACACAGAAACCTTCAGACCATTCAAGTC -ACGACCTGCTATGGTTATGAGCTTTGAATACATTCATTCAAGAAAAATGAAGAGAATTAG -ATGAACATTGTTGCGTCGCGTGTGGACACAACTCGACTAAGAGTCACGGGACAGCACGGA -ATACCCGCGGACCGCTGATCTTCTCACCTCCGAGGAAAGAGATTAACATTTCAATAGCGC -CTGGACACCACATTGGATCAACAAAACAACCCCCATGTTGACCCCTGATATGTCTCACAA -AAGCAAACATTTGGAGTGGTACTCCGTATGGAGTCCCTACGGCTCAGATGACACATCAAA -TAATGAAGCCCTGAAGATCGAGATCACCAGCCACACCAGTCTCCAGTGCCAAGATGATTC -TGGCACAGAAGAAGTTCTGCGTGAATTATTGGCGCCAGCCTCCGGCACACAATGAAAATA -AGCTTGATATTTGGGTGATAATGCTTAAGCAGATTTCAGTGCTACAGAAGTCTAGTGTCT -CCAAGATTGTCTTAATTTTCGATACGATAGCTCGATTGGCTGTCAGCTACTGCAGCAGTG -CATGCATGATCACCATCCCAACCTCCCTGCTAGCTAATGTAGCAGTGCATGATCATCATT -TCAACCTTCCCCCTCGAGAGGCTTGGCAGGGGATGGATTATTTTGTTCATTGAACAATTC -TTCCAGCTCCCATCAGATCTGATAACTGATGAACTGTGTGTGGGCTGTAAATCAGAGCCA -GAACCAGAGGTCTCTTCTTAGGGTCCAGCCATCCAGCCACTTGACAATACAAGCAAGCCC -CATCCGACGCCCCTCACATTGTTTTCCTCATAGTGGGTCATCCCCTGCCGATCATTAACG -GACCCCGACGGACGTACTCAATATCACAACGGGGTAGACGACTTATCGCTATACGGAGAA -TGCACGGAGTACACCGTATCGATCATATCATCAATTCACCCTCGGGTTGGGATGGCTGTT -GGTCACCATGGACGGAGTAGCGAGATCCCGATGCAATATCACAACCGCCAGTCATTGATT -CTAAACAGTTTCACCAAACGGGAATCCACCGAGGGTCCACTGGACAGGAACAGTGGAATC -TGTTATGCCAAGGAGATGACGCGAAACTTCGATTCAAGCACTCCGTCCTTTTCAACCTTG -TATTATCCTGGACTTCGTCAACATTCTTTATCGCCCCATTCATTCCCCCCCCACAAATGC -AACCATAATAAATACCACTTTACGGTTTTAACCCCTCCTTTGGCTTTGATAATTTCCTGT -CGGACTATTTATACTTCCTGTATTCGCTGTGTGAAAGGCACCGGTATCTGATAGGGTGTC -ATATCACTTGTGTTGATTATGATTGTATCCCTTTTTAATCGCGGAGCTTTGTCGCTCGCG -GTCTTGTCGCTTCTCGCGTCCTCCGCTGCAGCCAATGTATTTGAGAGTTTGTCCTCTGTT -CCTCAGGGTGAGTAACTTGTGTTCTAAACTAAACCGACGGAATTGATACGCTGACCCACC -ACAGGATGGAGATACTCTCGCACACCGCGGGCTGATCAGCCTATGAAGCTACAAATTGCC -CTGGCACAAGGAGATCCTGCCGGATTCGAAGCGGCTGTGATGGACATGTCAACCCCTGAT -CACCCTAGCTATGGGCACCACTTCACCACCCACGAGGAGATGAAGCGGATGCTGCAGCCC -AGCGCGGAGTCCGTAGACTCGATCCGTGACTGGCTCGAAGGTGCGGGGATCACCAAGATC -GAACAGGATGCCGACTGGATGACCTTCTACACCACTGTGAAGACGGCAAATAAGCTGCTG -GCAGCCAATTTCCAGTTCTACGTCAACGGCGTAAAGCACATTGAGCGTCTTCGCACACTC -AAGTACTCCGTCCCGGAGGCTTTGAAGTCACACATCAACATGATCCAGCCAACCACCCGT -TTCGGCCAGCTGCGCGCCAATCGGGCCATCTCGCACACCCAGGTCAAGGAAACGGACGCA -GCTTTCCGCTCGAAGGCCATGTCCGCTTCCCCGGACTGCAACAGCGTCATCACCCCCCAG -TGCCTCAAGGAGATGTACAATGTTGGTGACTACCAGGCCAGTGACAACAATGGAAACAAG -GTCGCATTTGCCAGCTACTTGGAGGAGTATGCACGCTACGCTGATCTGGAATTATTTGAG -AAAAACGTCGCACCCTTTGCCGAGGGCCAGAACTTCTCTGTTGTCCAGTTTAATGGTGGT -GGTAATGATCAACAGTCGAAATCTGACAGTAGCGAGGCAAACCTCGACTTGCAGTACATT -GTTGGAGTCAGCTCTCCTGTTCCCGTTACCGAGTTTAGCGTTGGTGGTCGTGGTAAACTT -GTTCCCGATCTCGACCAGCCTGATCCTAATGATAACAACAACGAGCCGTACCTTGAGTTC -CTCCAGAATGTGCTCAAGTTGGACAGCAAGGACCTTCCTCAGGTGATTTCGACCTCTTAT -GGCGAGGACGAGCAGGTATGAGATCCCCTCTTTATTATTTATGGTTTCGTCGGCCTCTAA -TCCCCACTTCCAGAGCATCCCCGAGAAGTACGCCCGCAGTGTCTGCAATCTTTTCTCACA -GCTCGGCAGCCGTGGTGTGTCTGTAATCTTCGCATCTGGTGACTCCGGCGTTGGCGCCGC -TTGCCAGACCAACGACGGCAGGAACGCGACCCACTTCCCACCCCAGTTCCCTGCTGCCTG -CCCGTGGGTGACATCAGTCGGCGCGACAACCCACACCGCTCCAGAGAAGGCCGTGTACTT -CTCGTCCGGCGGTTTCTCCGATCTCTGGGACCGCCCGAAATGGCAAGAGAATGCTGTGTC -TGAGTACCTCGACACATTGGGCAACCGTTGGTCCGGCCTTTTCAACGCTAAGGGCCGAGC -ATTCCCCGACGTCTCAGCACAAGGCCAGAACTACGCCATCTACAATAAGGGCTCGCTGAC -CAGCGTTGACGGCACATCCTGCTCGGCACCTGCCTTCGCCGGAATCATCGCTCTCCTCAA -CGATGCCCGCCTCAAGGCTAAGAAGGCACCCATGGGCTTCCTCAACCCCTGGCTGTACTC -GGCCGGCCGTGATGGCCTGAAAGACATCGTCGACGGCGGCAGCAAGGGGTGTGATGGCAA -TGGCCGCTTCGGTGGCGCCCCTAACGGGGGCCCATCCATTCCAGGTGCGAGCTGGAACGC -TACTAAGGGCTGGGACCCTGTCTCTGGCCTTGGGTCCCCTAACTTTGCTGCCATGCGCAA -GCTTGCGAACGCTGAGTAGATTGGGAGTCTTGCATGTTGATGTAGCCCAATGTTTGAGTT -TCAGAGAATGATTGATTCCGACCGGTCGGAAACTTCCGGTATACATGCATGCATGCTAGT -TATATACATATAGACTATTTCCTATATTTGCAAATATAATAAGCTTGTGTCTCATTCACA -TGTACTCTTTCTTTCTACCGTGGTTTCGTGTTGTTCGACATTCACCCAGGTTGAGGCGCT -GCACCCAAGGAAATCTCTGCTTATGCCTATGATAATCAGTCGCACTATTTATTATAGGGT -GTTCTTTGAAGGGTGTATAGGAAATTCTTCTATACAACTACAAAGCAATCTTTTCCCGGA -ATTTTCTACTATCTACCTTCAACTATCTTGAAGATATCTAGGAAGACCGCTGAAAAGTCC -ACTTGGGAAACCTATCAGACACCATCAGGCCACACATCTAGTAAAAAGGAAAGAAAAATT -GAAGTAAGTCTTTAGATCGAAATGAGTGTATATGTCTCAAGACTTCAAGAAGGCGGTGAT -GATTGTGGTAGGGCAATCGTCTAAAGAAAGAGTTATGTATCGGTGGTCGCCCACCTCTTG -AAAATGTTTATACAATGCTCCTTATTCCTTAAACACCGGCTCGGACTTCTCAATATCCTC -CTTGCTCAAACACCACTTATCCTGCAACACAGGATCAAGACTCGATGCGTAGACATCCTT -GGGCCACCATTCACCCCAGCTGGTCAAGAAAATCTGTTCTGGATCAGCGGGGAGATTATC -AGCTGTGACTGCGCGCAGGATACTTTGGGCACCTATTGATGGTTAGATACCGTCTTCAAA -CCAAAATTTAAATCCAAGTTCCTTACCCTCTGGCACGGTCTTGGCATCGGGCATGTCCAT -GGTCAATCTTAGGCCCGAACGTTGAGCAAGCTTAGTGTTTGGGATGAGACCTGGCGAGAC -GGCTACGACTGTGCAATCTGGGAGGTTACGGCGCCAGTAGTGTGCACCGAGCAGTTGAGT -GAATTTCGATGCGCTGTACACTACTTTTGTGCTCGCGCCAGAGTTTGCTTTTAGGTCGAC -ATCAAGAGTGCCTGATATGAAGTCAACATTGTGAACACCTGCACCTTTTGGTGACACGAC -TGAAAAGATGGAACAAAACTTACTTGGGTCGCCTTCTCTCACATTACGAATTGCACCCGA -AGATACAACGACGATACGTGATTGCGAGGCAGATAGTACCTCAGCAAATTGGTGGGTCAG -ATAATGCTGAGCTGTGGGTTGCGATTCACGAATGATTAGCTCCAGCTTCCTATACTGTTC -TCTTCAGTAGCCGATGCATGGGACTTGAACATACCTAGATGGTTTACTACGTAGGATTCG -CACCATTTTGAGCCATGAGGTCCCGGTCCATCGGCGCTGGCGGCGAGGCCCGCAATGAGG -AAAAGATTGTTCAATGGTCTTTGACCCAATTCAGAGAGTGCTTGCTTAGCAAACGACTGC -ACGCTCTTCAGGTTCAAAAGATCTAGGGGCAAGATAGAGACGTTATGCTTTGAAGTGTCA -AATTCGAGGTCATCGTACGCGACACGCGTCTTCTCTGTATCACGAACGCCCAGAATGAAG -CTGTATGGTTGTGCCTGCTGGAGTAACTGCTTGATTGCTTCAAATCCCTATGATCATGTT -AGGTGGAGTCTGGATATTCTCAAGCGGTGCAGTCTCACCAATCCCGACGAGGCGCCTGTC -GCGATGACGGTCTTTGCAATTGAGGACATCTCGGCAGCTTTATCTAGTGATATTTGATAA -TCAATGGCCTAAAAGACTTGGAAGTGTTGGAGGGGCAGGAGGAAGCTGATTGTGTCCGAT -TCCATGACGTCAGGTGATCCTCTAGACCGAAAAGGGAAATTAAAATAAATTATGTATGTC -AATCTTAGTTTACATTGAAAGAATCCACGTTCAGAGGATGCGAGGTAAAGAAAACAAAGT -CGCTCATCCGCTAGAAAAAAACAATCATCGAAACAGTCGTTGAATCCTTCCCCTGAAATA -ATCACCAACCCATCCGTGGACCTCCCGGATTTAAGCAATAACAGCCATCAAAGCAGAGCC -AAAAAGGAGTCCAATCAGTCCCAGACCGGACTTGGAGATTGGCTGGTTTGCGATCGCAGC -ATTCGTGGAAGTCGAGGTAGTAGAAGAGTGTGTGCTAGTGCCCGAGGAAGTGGACTTGGA -GCGTgtggcagtggcagtggcggtcgcattgagagcggcgttgcagAAGACTTCGTGGTA -GTTCCCGCTCTTTCTGGAGATGCAGGACTGAAGCTTGTCGACATCCTGGCCCTGGGCGAG -ACAGTAGATGCCACAGTTGTTCTCATACTTGGAGACCTTGGCGTCTTTACAACACTCATC -CATCACATCGGTCATGTTCCCAGTGACAATGGCAGCACAGGCTGCATCAGTTGTGGGAAT -TTCCCAGACTCTGGCGCCGCAGGCAGTGCCTGTTGTGCTGGCGGATGAGGTGGAAGTAGT -AGACATTTTGGATTGTGGAGGTTTGTGTTGGGAGACTGAGAGATGGGTTGGAATAATTTG -AGAAGAAGGAAAGTGAGAGGCGAAGAAGTCAAAAGGACCTGTCCAAGACACAAGACCTAA -AATGTGACAGGTGTTAGGCCTCCAGCCTTGGCCCTCGATTTACTATACGTACCGTACGGA -GTACGGAGTACTCCGTAATAAGATGAAAATCATTATTTTACCTTTTTTTGCCACGAGCTT -GTTTTTCGCTTCTCTTACGATACCTTTGATGAGGTGCATGTGATATGTGTTATGCATCTT -GGAATTCGCATCCAGACATGGAGTACAGAGTAAGGAGGGAAGAAAACGCCAAGGAATTGT -ACACCGTATAGGGCTAATATGTATGTCGACCACTTGCTGATACATACTGACAGAACTTGT -CGGCCTGTTGATCAAAAGTTCATTGATATTCAAGGTAACTTGCCTCGATTTAATTCTTCA -TGGTAGACACGACATTCTCGTGCCTCGACACCTCGACCTTATTCTCATCCATCACAGCCT -CGTGGTGTTTCTCACCAGCAGGCTCCGCGCCGACAAGGGCTAGAAGCCCAATTTCTCGTT -CAATTCGTTCCTTGCGTCCAGAATCCGCAGCTGCCAGTCCGACCGATCCGAACAATTCGT -CCATCTCCTCCAGAGTGCGGCCTTTGGTTTCGGGTACAAAGAAAATGACATAGAGCACAC -CGATGGTCGTGATGCACCCGAAAAAGATGAATGCACCAAAGTTGCTTTTTTGGAGGAAAG -GCGATGTAGCAGTACCAACTGCAAAGTTGTTCAACTGCGGAGTACCCAGTTAGAGATTTC -CATTGAACTCTGTGCTATCTCGTGACTTACCCAGTTACTGCTGCCACCAATACTGACGCC -CTTGGCACGCATACTGAGTGGGAACACCTCGGAAGTAACAATCCATGCAACAGGTCCTGT -AGTAGAATGACAGTTAGCAAAGTCTTACCCTCCGCTGGGAGTTTGTACATACCCCATGAG -TAGGCGAAGTTGATGATGAAAATCCAAACAAAGACAATAGCAACCCAACCAGCAGCCTGG -TGCTCTTCGAAGCTACCTTGGTATGCGCCGATGATACCCGCGACAATGAAGTGGCAGGTT -GCCATTCCAATTGCACCAGCAATCAGGATCTTCTTTCGTCCAATCTTGTCAACCCACAGA -ACGGCAGGGATGGTGAAAACGAATTCAAAAATTCCAACAACACCAGTGGCAAGTAGTGAG -ATCGTATTGCCACCTAGATGCATCTCCTTGAAGATGAAAGGTCTGCGGAAGTGTCAGCTG -GTATCTTCTACTACGAAACTGCATTTGTGAAAACTCACGCATAGTAGTTAATTGCATTAA -TGCCTAGATAAGAATGTTAGTGATGGTTTCAATATTCGGCTTTTTGTATATCCTACCATT -CCATTGTTGGAACACCATAACCATACACTATGATATCGTCAGCAATATTCATGGGAGGGC -CACATGTGACACCGAACGTACCGCTGTTGCCGTGCGCTTAAACAAAGACTTGTCGGTGAT -CAATGACATGTAATCATGTAGTCCGATCTTGAAACGGCTGGAAAATGAGTCATCTTGCCA -GTTGGGATACTTCTCGGCAGCCGTCTCTCGTTCGAAGAGGTACAAGCTCTTGATTTCAAG -GAACTCAATGCGAACAAGAAGATCATCCGGGGGTGCGCTTCGCAAGCGAGCCAGTGTTTC -CAGGCATTCTTCCTCACGACCGGTGTTCATAAGATGACGGGGGGACTGTGGCATGAACAT -CATCATACCCACAGCCAAGGCGACAGCGGGCAGAATTTGAATACACACTGGGACCAGCCA -TGCAGCATCCGACTGTGTTGCCCCGGTTCCGCCAATGTAATTTGTTCCATAGCCAATCTG -CAAACGTCAGAGCTGAAAGATTTCACTAGAACGGACATGACCTACCCAGAAGCTGACCAT -GATTCCAAAGGTGATTGCGAGTTGCTGTACAGCCACGAGTGAGCCACGAATCTCTGGTGG -TGCCTTCCGTCTTTGTTAGCCATTTTCCCCTCTACAACTCCCTATGTGATTGCATTCGCT -CACCAATTCTGCGTTGTACAGCGGCACGACCATACTCAAACTACCAACACCGAGACCAGT -CACGAATCGACCTCCGAAGACATAGTCTGGGCTCTCTGTACAAGCTTGGACAATGACACC -AACACAGAACACTACCACGGCAACCAAAACGGTCAGACGACGGCCGAGTGCATCGGCGAG -ATATCCGTTTGCCAATGTTCCCACCCAAGCACCAAGTTCGAGGATAGACGTCAGCATTCC -TTGCTTAATACCTGTTTCTTTCGCATAGTCTTGGGTCTATGGGTAGGAAGCCATTAGCAG -TGCAAAATGGTGGGTTTAACGAGTGATACACATACCGCTGCCTGGAATGCCGGCATTGTC -AGAACTTGAGCGAACATTCCCTGGTTATCTGGATGATATCAACTTCTATTCACGAGTATA -TCGACATGTTTTCGTCAACTTACATCCATACACCAACCCTCCGAGTGACGCGAAAAGTCC -GATACATGTTGTCTTCTTGTTGCTGATCAGGCCCTTGAACCCATTTTTGCCTGACAGCTC -AGCACGTTTGGCCGACACGGCCGTCTCTCCCGCAAGAGGGGCCACCATGGTGTATCAAAA -CACGGATCGATAAGGATAGATGATACAAAGGTACCCAACTTCAGATGGAACTGAAAGTAA -GAAGAGAATGGCAGATATCGGAAGAAGTTCTAGCGCGTTATTTACGTCTTGACGGGGGAT -CTGCGAGTATTTATTTGCTCGGCTGCGGATAATGAGCTCGAACTCAACATTCCCCCGGGA -ACTTAGCCCCATGGGGATACAGGGGAAATGATCGGGTTGAAACCTGAAAACATGAGGTAA -TCGGCCCAATCAGCCCATTGTGACCAAGCTTCGGTCTCCTCTCCTCATCAAGCCATGTGG -GCAGGTCGGATCGGCTGTGAGACGGGAAAGGCTCGACTAGCGTGGTATTGAACCGCAAGA -CCGTCAGTGGCTGCAATAACCTTGGGCCGTGCGTGATATTCAAGCAGAATTTAACCGGTT -CTTGGCAAAAGATCCTGAAACGGCTGGGTAAATTCGGGAAGACCGTACGGAGTATACAGG -ATCGATTTGCTAGAAGGGAGTTTCTGGGAGTTGCTGAGTCGGCATGTGCCGTTCAAAGGA -CTCGGAGCTCCACCGGATGGAGAAATTCCGGGGACCCCGCGGATTTCAAAATCTGGAGAT -TCGTCAATCTCAACACCCAACTTGCAGTTCATTGAATAGCCGTATACGACGTACTAGTCC -AGATTAGATTGTCTGAAACCGCCATCTAAGCTACAATCCAGAGGTAATGACTGTATCATT -TGAGGTGAAGTCGACAGTATTCCCCACATTGCACCGGGTGAATTGCTTTGATTTAGCCGA -TTTAGCCTCTGTTTTTATGGCTGATCGCCACCACATTTTGACTCATGCACGAATCGGCTT -TGGGTGGTCGTGCTGCCGGTCATCCACTATGTACTTTGAACGTTGAACCTCAGGATTAAC -CGGCAATGGTACGGAGTATGTACATCAGACATCGGCATGTGGGTAAAAGGGAAGGACCTG -TAATTTTCTCCAAATATGCGCTTCCCCAGACGCCAGAATTGATCCAAAAAAAACACCCAT -AGGAAATTTCTCGGCTTCTTGCCGGAGATTGATCGGTTGTGAACCATGAGACCTAGCCTT -GGCGCAATAAACACCCGAAAGTGCTGTATCAAAGGTTGTCTGGATGCTTAAACTGATGGC -TTTTCCACAGACTCCACTTCGGTCTCGTGGCTAACACGCGCTTGGCCTCGGCCCGTCCAA -TTCAGAAATCCCGATTCCCTTTGCTTCCCCGATGTGTGCTGTGTGTTCGTGCTATGAAAA -TGAAATGGTATTTATTTTTCAGTGTTGCACGGAAGTATTCCATGGAACAATCCTCCATTG -CTTTCACCTTAGTTGGACTCTTCACGAATAACCCATTAGCGCATATTTGAGTCTTATATA -TATGTCGATTCTAACAAGTGCTATGGCATTCGGCAGCCCCATATAGACCTCAAAGACCGT -AACCCGAGTCCAAGCGCCATGGTTATAATAGAGAACACAACCGGCGACCAGGCGACCAGA -AATCGAAGCAACCCTCATTGGAATATTGCACTGGTAGCTCGCAATGGCGTAAATTGTTCC -AGTAGAAAGAGATCACGCTACGATATTCGCAAGCACATAACGTCAAAAAGTGTAGAATTC -AAAGCCTATCCTAGGCATTGAAGTGTATAGCCAGGAACTCCAGCCATAGAAGAACCCGGG -AGTTTCGATGCGGCGGAAGGATATGCCAAGGTATAGACGGCATGAACCAGTAAAAAGGCG -CCTCGAGCAGCGAACATAACGCCCAATACTCCAGGTGTGCGGAGTCCGATGTTTTCCAAG -CCATTAGATTTTGCTTTCGAGAGAATTTTTCTTTAGATCAAATGATCTCTGAGTGTTGCA -AGTGGGGGTCTTTTACGTTTTCGTTTCCGGTTTCCCCCGGCTGAACTTCGAAGTGTTGAC -ACATATCTCTAGGCTAGACTAGATAGTTTGTAGGCTTCTTTTTTTGTTCTTTCGGACTGG -GAATGTTAATTCAAATTAGCCTATTGTTTGAATGTCTCAGTCACATGATCTTTCTCAATA -GGTTCTTACATTCCTCTTCACGTACATGTTTCATACTTTACAACATGCACCTCATGCTAC -ATACATCTACTACATACCCAGTAGCATATTCCACACCTACGTGGGAAGGAAATGCACCGA -AACAATGCAATATAGTCCTCGAGGGGGATCCAGTTATCTATGTGTCCCTAAGATTGCTCT -TTGAAGGCACATGCTGCAACTAGATAGATAGCCTTCGCAGATTTCTAACTTATTCGTTAT -GCGAGAGAGTAGTGTCAGAGTAGTATAAAAGCAGAACATACTATGCACACAACAACTGAG -CCCCCCAGTACTTACTGTGAAATTCCGAACTTTATAGCCTTGACCACGATCCATCTCAAT -GCGGTTTGCTTCCCTGTGAAACCCTTTGATGTCACCGCAAACATGTCCATCCCCCTGGCA -GTTCTCACAAGCCTTTCAAACTCTATTCACTGGATACTCCGACCTATCAGAACAATTCAC -CATGGTGCGTATGGTAAATCCCCATGTCCAGGAGACAGTGGGCCGCATTGTCGGGGCACT -TGTTATGAGCATCACATCCAGTGGCATCCAATCCCATACAGAAACCAACGGGATGACTCC -CCCAACGAATAAGGACTATGACGCCTTCATTCGTGCCTGGACTGTTCAAAAAAGCTTCCG -TGGTTACGGTGTTGGCGCTGCCCTCCTCAACGAAGCCGTCTTGATTTGCTATCATCATCA -GTGGAAAGGACCCCGATTCGCGAACTCTCATACTAACTCGCTTCGCGATTTCCCCTCCCT -TTTCAATAGTGAGATGGATCAGGAGTCTGCCATGTGGAGCTCGTATCTGCGCAAACGAAT -CGAGGCCAATCGCCAATCTCGTTCTGCTTTCGAGGCTCGAATGCTCCATGCGCCTCTGAA -TCATCTGAATGCGAATTATGGCCAGAGCCCAAGGGTTGATATACCGATGAGATTGATTCG -CATCCAGGTAAAGCTAGAGGAGTTGATCCGAAGATATTTCGACATGCGCCTTGACAAGGC -ATTTGAAGCTCAGGCTCGGTGGAAGGAAAGCTCGTAGTCGGTCACATTTCAATAATAGAT -TGCTGTTTCTGAATGAATGTATTTTCAATGAAATTCAGACATATTGATGTTGATGTTCGT -CTTCTGCTTTTGCTGTTTATCTACGTAGGTATCTTTTGAATCGGATATTCTCACGAGCGC -ATTCTCCTCTCTTCGAATACACCGCCACTGGCAATTCCAGAAGGGAACTGGATGGATATT -TGCAAATTTGCACTCAAATATGAGCCCCGTGGTAATTCATTGTTCCTGTACAGGTCAATG -AAGAGAGGGGTATATAAGCATTTCTATGTTCTGTGCTCTAGCCATGGATCTTGAAGCAAT -TGCTTGGCTGTCTTTCTGTCTTCTGGTCGCCATTGAAGCATTCCTCTCATGAAATCCAGG -AATATGTCTTTGTTTCTTCCATTCAGGCGCTCCTCGGAGTTTTCTAAGGACATCCCATCG -GGAAGATCGACTTGGTTTCTCCAATGTCCTGTCACACATGCCTCCGCGTCAGAACAATAA -GATCCAAGTGAGGTTCGAGTGTGGCATACCATATTCAGTAAAGAATTCGCAACTTCGCTC -TCCGCGCTTGAGGAAATCCAAAGGAGGAGGACCCAGTACTCCGATTACTTCTGCAAGGTG -CATGCGGGTGGAATAACCCTTGACATCTGGGTCCATTCCAATAAACATGTGTTTACCTTC -GAACAAGTCCCATACCTAGTGTTTTCATCCGCTTAGTGGCGAAGAATAGACAGGGTATCA -GAAGGGGGAACAAACCATAGTTCCAACATTCCAAATATCAATGGGGTAGCTCCACTCAGT -TTCTAGCATGACTTCTGGTGACCTGTAGATGGTGGGTTGAGCATCATGGCTCCTCTTCTC -ATCGCCCCGTACAGCTGAGCCAAAATCGCTAAGGACTGCCCTACTGAATCTTGGAGGTAA -TTCAAATCCTCGGGACGCATATACTGGCATGTTATCAACTAATTTCCGCGCCGAGGGATG -CTCCATCTCGCTGTCGGTAAAGTTGTCAAGTATTGACATTTCTTCAAGCTCCTGGAGGAT -ATTATCGCCCTTGATGTCTAGAAATCTTTGTTAGTGATCTGGCTTGGTTGATGTCAGTCT -GGCAATAATGAGAAAGTATCCCCACCGGTGTGAACAAGCTGGCATTCGTTGTGCAGATAA -TCAAGCGCAAAGAAGATATGCCTAAGTCCGCTCTTCAGAAGATCTTCGCTAAGCCGATGT -TCCGGGGAGCGATACATTAAATGTCGAAAGCTCCCCCACATGGGTTTTTGGACAACACAG -TGATGCTCGTTATCTTGATACCCGCTCCCTTGATGCAGGATCGTGAATCCATCTAATGCA -GTCCTCACATGGGCGTAGCCTGGATGCCATCTATCTCCCTGGTTCAACAATTTATAAGTC -TGGACCTCCCTGTGGCTAGTCCCATCCACTGTGTACACCTTCAATGTGACATACCGATGA -CCCCTGCTTGAATCAGTATATTTTGACTTGAAGACTCAATGGTTTTCGGCATACTCGAGA -TCACGAGCTAGCCATACTGTTGAAGTAGTTCCAAAGCCTAGCTTGCCAACAACCTGATAT -TTAGATACTAAAACGTCACCAATCTCCACAGGATAGTAGCGTCCTTTCTTGAACTCTTCA -AAGCGCTCTTCTTCGAAGATGTTTGACGGTTTGATTACTTCGAACCCACTTGTTGGAAAC -CGAAGCACAGGCGATTGACCACTTTTTATAGCTTTCTTGGCCCACTTGAGAAGGGAAGTC -ATGTTCATAGATAATCTGGAGAGATAGAGAGAAGTCGAAGGTGAGGTGGGCCGTTGCTCC -GAGGCTCTCAATTGACACGGTTTTCTCCGATTTAGGTTATTGAGAAAGTGTAGAACAGAT -GGAAGAGAGACCGGAACCGGAAATGTCCAGTTGTCATCTTGGAAAACCCGATATATCAAC -AGCGAACAAGTGGTCAGGTGACGATTTTGCGGAATTGACGTTGAAGTTTTTAAAAAGAAG -TAGGTCGTTGGGAACATCTTCTTTCCACGTTCCTTTTACTATGTTACTGAAGCGACTTGC -TTTATATTTCAATCTTGTTTCGATTTCTTCTAAAGCAATATACAACAGGACTCACCCAGA -CTATGAATCAAAAACCCGCCTTACAATTGATATGTCAAGCTTCAGACGGTGGATAAGGTG -ATATCACTACTACATAATGTATACACCATGGACGCGATACAAGAGATAGTTTGATCGTGG -TCTTAGAACAAAACAATCTCGGCAACTTTGTATCACCGGTAATGTTGTCTCCATAAAGCT -ATATAAATGGCTCCATAACAGTAAATAATCTCCTTCTTTGACACTCCATCAACTACTACT -AGAATAGTATATTCTACCTTTGTTTTGATTCGGCTTAGCGAGCCTCTCTCTTAGAACCTA -TTAATGAATCACCCCCTCATAGATCTCTCAACACGAAACACGAGGGCCGCAACATTCGTT -GGCCACGGAGGCATTGATAGCAATATATATCCAAGTTTTTGTCAGAGTAGGACTCTTTTA -CCCGGGCACATTTGATAAAGCCATTCATGCATTTTATTTATGTACATAATCAGCCCATCC -TTTGTACATGTCGCCTGGGCAGCCATCAAGGCAGCAAAATATCTAATAACCAACTGACTC -TCCCTGCCTATTCACCGCCTATTTTATCTCTGCTTCACATCCCATGCTTCATCTTCCACC -GCATGACGAAGGCAGCAAATGGATAAAAGACTAATGATACTGCAATCCAAATGAACAGAA -CAGCAAAGTCCAATCCAATGCGCGAATGGGTATCGAAAAGGATGGTTCGCATAGCTTCGA -CAACTGGAAAAAGAAGTCAGATTAACAACAACAGAGCATCCTGACCACAGTGAGGAGAAA -CTTACTTCTGTGTAATGGCCATGCATACCCCCATCTATAGAACCCCGGGGCGAGATCAAT -GGCATAGAACCCTGTAGCAACATTCGTGATGACCCAAAAGATCAGGAAAAAGGCACTCCA -TGGGAAACCTAGGATCATGGCCATGTTCTCACAGGGGAATCCCAAAGCAGTCATCCCAAC -CCAGTTCAGCATCCAAAACACGACAAATGAGCCACGACCATAAGCATTGGGATTACTTGC -GGAGAGGACATCCGATTCACTGCCATTGTCAAATGTTATCTGAAAAGCCAGAGACACGAG -AGAGTAGCATAGGGAAAGGAAGAAGTAAGCCGCGATGCTTGAGAGAACTCGCCAAATGAG -CCAATGTGGCACCTTTAGCGGCGGATGGCCATCTGGTTTCATGAACAGGGCATGGACAGG -CATAAGGAATGGGAAGTTGAAAAACGCAATGATGATCAGATAGATCAGACCGATTGTCAC -GGCAGGCGCAGCAACAGCCGGGGCGAAAGGGCGTAGATCAACGGTCGTGAATCCAATGGC -TGGGTTGATCGCCTGTGGGGGCACGGACGACAGGTTGCTGACGGATGAGGTCAGAGACTG -GACCCAACGCGGACCAAACTCTGCTAGCACCATCTGCTGCAGTATGGCTAAGTTGGGTAG -GATATAATTGTAGTAGGATGCTTGATCGCGCGCGGAGATGATCACGGTCTCGATGGCACC -GGTCGGATCGTAGGTTGTATTGTCATTGGAGACTGCATTGCGGAGCAGCGTTGTTGCGTT -TGGATTGATTATCACAGCTGCATAAGCATGCTCATCGTAAACGCCCTGGCGCACGGCCCA -AGGGTCATAGTTGAAATCGGCGGGGGATTTGATAGAGTAGCCGAGGTGGAGACTGTCAGA -CTGCAGGATTCTGTTGGTGACATCGATGACTGCTGGACCGACTAAGACGTCATTGGAGCG -GTATGGATCAACTTGGCCGTCAAAATCGACAACCCATACCTTCAATGCTGGGTAGTTGGC -CTCAGTGCGGTATTGGGAGCCCCAGAAGAGAGATAGCACAGCGAATATAAAAACGCACAA -AATCAACACTGATCTCTGTTAGTTTCCATTGCGTTCTCTCACGGCCGATGTTTTTGGATG -TTTTCTTACAAGTGCGAGCCCACTGGTAGAATACCCGCAGTCGAATCTCCTTGAGGGATG -GATCAAAGAATGATACAGCAGCCGGTGGGCTGGTGTCACCCTCCAATGACTCGGCTTTTA -CTGTCATGACTGAACAGCTTGCAGTAGATATCTATGTAAAGAAACTGCGGATGGGCCATA -TTAATACTTTGATGACAACTACACGCGAAAAGGTGCTCATCCGCTTGCGGAACTGTAGTT -CTCCTCGGAGTTTATCCATTCTTGGAAAGTCTGTCTTAACTCCGAGCCGACTGACAAGAT -CTGATAATTTCTAACGAATGAGAGTGACGCATACACATATTTGCATAGTTTGGTGTTCCC -GGAGGATAAAGCTATAACCGCGCGTTAAATGTGAGAGGACCGGTGCAGAAAATTTATATA -TCCGTAAACGCGGTCTAACATTCTACAACATACACGTCAACCATGGACTTCAACATGTCA -AAGGCGTTGGTCGTAATATGTTGGATATATTAACCCCGGCGCGTCGCGCGGAAAATGATC -CTTGATGTATCATTTACCCATCATTGCGCGTCCCCGTTTCATCCAGCTGATCCTGGTCAA -CTTGCACGGCCACGCATTCGCTGCTGAGAAGGTAGCGAGATACTGCGCGGATCCCTTGAA -AGCTGAAATTCTTCTATTTTCATAATATCTATGAAGCATAGATCATCGACAAAGCCCGCT -TGATACCAAAACGCATAAACCAGCGATGGCCGTAGATCGAAACCCTGTCGAAGGGTGAGT -ACATAGCAAAAAAGAAGGGTGCTGGGTTGCAAAAATTGACGATCGTAAAACTCATTTCAT -TTAATCTAAGGGTATCATTAAATTCTATATATATCTTTGTCCACCAAGGAATCCCAATCA -ATCTCTCGACTTGCAACAAATTGCCCATAGGATCTAGGAGCCCGCACGGGGACCCTTTAC -TGCATGAGACAGTGACTTTCGGAAGGAGGTTCGAAATTGTTCAGACACAGGTGACTTGGT -ATGAACCGTAGCGGCGCCTTGTTCCAGCCATGCGGCAAAGTTGGGTGGCATTAACTTGGC -TGGCGATGTCGGAGAATTCGTTCTGATCGCACATGGTGTTATCGAACAACTCGATGCGGC -AGCCCTCGGCCACAGAATTCAGGGTGTAGGCAGAGGCAACGCTGTGGAGATTGGCGCGAC -ATCCTGTGCTGCCAGATCCGACGTGGGGGTCACCGATAGTTCCATTGCATGAGGTTGATG -GGAAAAGTGTGAAAGACCATTGAATGTCGCGGCCGACTAAGGGGAATGCGGAGGAGAAGT -TGAAAAATAGAGATACGGCCAGGAGACAAAGGTGAAGAGTAGAGACCATAGTGCCTTGGG -AACGTAGAGGGTGTCTTCAGTGGAGAAGGGCTGGAGGATGGAAGCGCACACAGGAGAGGA -GAGGGCTGGTTCTTAAAGGTCTAGAAAGTAGAACTGACAGGGGAGGAGAGTCCACAATTT -ATTATGGATATGAGACCCGCCTCCAGGGGACCCTGGAAATTAATATAATTCCATGCCAAG -CATTACGCGTTATGCCCACGTTCGCACACTTCGGGGTTCGAGCAAGCATAGTACAGCATA -TGAATTAAAGTCTCTACTCAGAATTCCTTCCGTCTCCATACAAAGACAATGTAGAAGCCC -CTAACAACGGGTTTTCATTCCAGATGATGTCATCCCGGGTAGATGATCATTTAGTCCCAT -TCGGGTAAGTGGGAGGGGCTGGGATATCCATCTTACGCAGATGCATACGTACTCTTCCCG -AAGAGACATATGCTGTAATTCGTACGGAGTAATTCCTTTGTGACATTTCTATTTTTATCA -GACTCCAACTCAATATGAAAGGTGGCTGAATCTCACAAAAATTAAGAGGCACAGGTGGCA -AAAGTGGCAATAACGATATCTAGCCCTGTGGTAAACAACCTTTTTTTTTTCTCCGATCGG -CAGAGATATCCATGGTTCTACGCTTTAATCTTAACATTCAACCATGGATCATCAAAAATT -CTTCCAACTTGGGTTAGGGTGAGTCCAGGGCGTAATCTACATAATAATTTGAGTGCTTCC -AAGTTTCTATTGCCTACGTCCCTACGATACTACTATGAACAATTCCGATTTGAACGTTCT -TAGTTTCCAAATTAAACAGAATACTGCTATTTACCTAATGGAGAGGTTCGAAAGTTGCTG -GCTCGTTTCCACATCACTTGGCCAAGATTCCTTCCACAATCAGCGCATGCTGCATCAGCA -CCTCATCCTTTTGTCTACGCCCGATAAGCTGCACATGGCAGGGCGCTTTTTCGACTTCTT -TCGGGTGATCTGAGTTCAGTCAGCATATATCATTAGACATATCCTAGAGATGTCTTACAA -GGAGGATAATAATCCACATCGCGAACAAACTCGGCATCAGCGGCCATCTGAGACGAGCCG -TAAGGAATGACGCATGCGGGATACTAGTCTTTCAATTAGTGTCAATATTGGCAAGTGACG -CCAAAGGAAACATACGTCGACCAGGTTGGCAATGACTGTATACACTGGAACCCCGTAGGT -GTCATGAGGTACTGCCGTGCTCTGGTATCCCGGTCCGATAATCACGTCCAGATGATTGTC -GAGGAACACCCGGCGCATTTTAGCCATGATCTCTGCACGGGAGGTGTTCAAGGCGAATAA -TTCCCGCAAATTCGGCTCCTCTCCCTTTCCATCGAGGTCGTATGTAAATCGAAGAGACGG -GATCGCTGGCTCGCCAGACTTACGAATATGCTGCAACGCTGTCTGGTCCGGGTCGATGCG -AAAGAATCGGAATGCTAGATCTGCCGCAGCTCCCAAGAAGTCAATCTGGCTAGATAAATC -CACGATGCGATGGCCTGCATTTCCTAGCTTCTCAATTGCACTTTTTAGAGTACGCTGCAT -GTTGGGATGCAATGGTGACTGAAGATCTTCCGGCAAGATGCCGATTGTGAGCTGCGTGGG -TGGTTTCACCGGCTGAATCCAAGAAAATCCCAGAGCCGTATCGTCCATATCATCGGATGG -TGCATTGAAAACCACTTGGAGAAGCATCTCTGCGTCGCGAACTGAGTGACACAGTGGTCC -TGCGCTTGGGGCTATCCCAGTCATCCCTGATCTTCCAGCACTCGTTTGTCCGGCATATGG -AACACGACCGACACTAGGCTTGAACCCTACGGTACCACAACATAGGGCTGGGATGCGAAT -GGACCCAGCAATGTCAGTGCCAACTCCTAGGATAGATCCACGCATGGCCACCAAAGCTCC -CTCGCCTCCACTGCTGCCACCGGCTGTGAGATTATCTCGGTGTGGGTTGAGCACCCGACC -AAAGACATTATTGTGCGAATCTGCTGTCATCATTGTCTGGGGCACATTGGTCTTCACGTA -TAGCACCGCCCCTGCCTCCAGCAGAATGTCCACCAAGACTGAGTTCTGGGTTACGGGCTC -GCGGTCCAAGAATGAGACAAACCCGAGCGAAGTCGGTACTCCCTTGATGTTGAAGGTCTC -CTTCAGACTAATTGGGATACCGTGAAGCGGGCCAACAGGTTCCCCAGTAGCCTTGATATG -CGCATCCAGCTCCTCAGCTCTAATGAGAGCTTCTGAGAACATAGTTTCAGTTAAGCAGCA -AGTCAACTGCTGGGCAATGGCGGCACGCTTGCAGAAGGCAGCGGTCACCTCGACGGCACT -AAGCTCTTGCGCAGCGAGCTTGTCTCGTAGCGATGTTGCGTCGGTGATCTCCGTAATTGC -GAGTTCTTTATTGCTAAGGATGCCGCACTTCGAGGGCACGTTGATAATTTCCAGCTCATT -GTTGTTAACTTGCTGTAGGATCTCTGCTGGCAGTCGCCATCTTGTGGGAATTTTGGCGTC -TACCTCGGCCCGTTTGATGGCACACTTGTCTTCCCAGGATAGATTGGTCATCTTGCTAGA -TCTCAAGAACGATTTGTTCAGAGTAAATGGGTAGAAAAGCGAATAGAGATAAGTTATTAG -AGAAGAAGGGATGCTGACAATGGTTGAGAAGTGGAAGTGAGATGAGAGATAGAAGACCTC -GGGAAAGATAAATGCGGGGAGAGATCAGATGCGGAGAGAGATGAGGTGTGAGAGAATCTA -TTCCGAGTTCGAGAGGGGCAAACACGCAAATACGCAAGCTGGGAAGTTGGAATTCAAATA -CTCAACTTGATACTGGCCGACATCCTCATCTTTATACATCGTGCTCTGGTGGGGATGGCA -GAGGGGTAGGCCGATCAAGTACATTTGGGCCGACAAAGTACAAATGTAGGATTCACCCGG -CACACACACACGGCAGTTGCATCTGCCCTTTTGCCATGTTGTATTTTTATTGAAATCGCA -TAAAAGGGCACTAGACACCAGGTACATATCATCCAGGTCCAGCCAGAAACAATCATACCT -ACATGATGTCTAAACGACTATCGATTCATACATCAATGATTTGGCAGAGCCTTTATACGT -TCAAATCTCAGATTTTTACAAAGTGGCTATCAATCACAATATTTCGAAATCCAGTGTTCA -TCGGGAAAGGCCATTGCTGGCAAAGCCATGGTGTTAACGGGGTGGGATATCCCCTTGAAA -AGGGAGTTTTACGGCAATGCATGCAATGATTTTGACGAAGCGAAAGATAAGCCCAGCATT -CTCTCATAGACTATATGTCCACATACACTATTTGTCTTTGCCTGTATATTCCTCATGGCC -AACCATCTGCCTTCATAAAACCGAACACATCCATTCAGCGGTAATTGCGCCCTATTGAAT -GTGGAGCTTGACTCGGATATGCGATCCATCTTCATATCCACCACCTTAGATGTCGATAAT -GGTGAGAAATCTAGGAGAGAAATATATCAAGACAACGTTATTTCTACCTCTCTAGTTATT -CTCGTCCTACGTGCACGTGTACCACGATGACTTACAGAGGACCTAGAACAAGAGACTTCT -CTAGGTAATTTTACCAGCAGTTTCTCCCCTAAAGAGTCTCGCCGGGTCCCTAGGCAGGTA -AGTTCTATCATTCTCCCAGTATTTGGACTCTGCTACTTCCTCCCTTTTTTGCACAAACAA -ACACTGAGCAAGTATCTGTTTATAGTCTAGGGCTGCTACAGACTTGTTCTGGCAGTCTAT -CTTTTTTTCTTCTAAGGCGCAGCATATGTCGTATATATGAGCAACGGGATCATAACCAAA -ATTTGCCAGTCATTTATAGAACAAGAGATACAATTTCGAACTATGGATCGTCGTAGCACA -GTTCACAATGCTGATCCAATGAGAAATGGGGATTCGGCCCCACCTTCACGTGTCCCAGAC -ATAATATTATCAGATGCTCTCCGTCTCTTCCCCTTCCAATGACAACCCCCATTAGAAATG -AATAATCGAGTCGCAGAGAATGTTCTCGGTACTCTGGGTGCAGTGAGTTCGTATTCCTGA -TTAGTAGACCAAACTAATCGTTCTTTCTAGGTTTGCTGGTCTATCCAAGTATGCGGTTGC -TGTATGCCAAATCGCTTTGAGTCCACGCTAATACCTTTGTTCAGCTCGTTCCTCAAATAA -TCATCAATTACAGAAGAAATGACACTGAAGGGCTGCAAGGGTCAATGATGCTCTTATGGG -CCGTCGCAGGGGTTCCTCTCGGGGTGTACAACATTGTTGAAGAATTCAATATTGCCCTCA -GGATTCAGCCACAGATTCTGACCACACTCAGCCTCCTTACCTGGGCACAGTGTCTCTACT -ATGGAAAGGTAATCACCTCTCTTTTTATCCAAGGCGATCCTCGTCATGCTCACTCGGATT -CAGAAATACCAAATCATGAAATGCTGTATCGCGGTCACTTCACTTCTGCTGCTGCTGGGT -GGTATTGAAGCTGGACTCATATTTGCCTTGCAAACTGCAAAAAATCGTGGGCTGGAGTGG -CCACTTATCCTGATGGCCGTTCTTAGTGCATGTCTACTTGCAGCGGGAGTACTGAGACAT -TACTGGGATATTTATGTCCATCGGACTGTTCGAGGAATCAGCTTCATATTTGTTGGAATT -GATGCTGCTGGGGACTTGTTTTCACTCGTCTCTGTTCGTAAGTCAGCTTCATATTCAGAT -AGAGATTGCCAATGGTTAATATGCAAGCAGTGTTTGGATCGACTATTAATGTCCTTGGTA -TAATAATTTATGGAACTGAATTGACCCTTTGGGTGGGCATATTTATCTGCGGCGGTGTGT -TCAACCTCCGACCCTGGATAAAAAAGCGCTTTGAGCAGCGAAATAGCAATCATCAGGACC -CGGTGTCTCTGCATCCAATGCCCTCGTCGACTTCTGTATTCAGAACCGCGTCTGGTTCTC -AAATGACGGAAAGGAGGCCATGGCAGAGTTCATAAAGAATCAATGGCACATTATTTAGAT -AATTAGACATCAAAAGGCCTTTATTTTTCTCTGCTGTAATAGCCACCTTCGTCTGATGTG -GTCCCGAGTAATTTCACATTTATCAAATGCCACAGATGGTATAATCTTGCAGAACTCGAA -GTATGAGATAGGGCTGAGTCACACAAGTGGAGTTCGATAGAGGTGAATGGAGCTGGAAAA -AATTATGGAGACTGTGAGAAGCGGCTGTCAGATTCATGTTTAACTTTATCTAAGGAATGC -GGCTTAAAAGAGCTCCGCCGCATCCCGGACCCGCTTGCTTTTTCGGGGTACATCTATGGA -TGTTCAATCTTTGATGAGATTCCACAAGAGCTTGCTTCCGAAATATGGACTAAAAAATTT -TGCGACGTTGAATTCTCTTCGTTTCTTATGTAGCTGTTCTATGAATACTCGAAGTCAAAC -TTCAGTCTTCCTACTGCTCTTTGTCCCGTAAATATATGCCTGCACCTTCCGGATGGCTGG -ACAGGACAGCTGGATAGCAGTTTGTTTAGCTACAGCTTCGCCTTGCTTGCCTTTGGACGG -CTGCCGGTATCAACGTCAGACCACCAAGGAAAGCTGGGCGGTGCATCATCCTCAAGAGTT -GCATTGAACTTCGGCTTGCGTTTCTCGAAGAATGCTTTGATGCCTTCGTTTGAGTCTCTA -TAATCAGGTCAGCATCTCTTCTTGACATATCGATTTATACGAGAAGGTTTGGAGAAAAAA -GGTGGAGACAGCGTACTTTCCACTAAACATATGGTACAGCACTGCCGAATCAACTAGGTG -AGTCTCCTCGGCACTCCCTGGATTCCGCCACATGAGTTCTCGCGAAAGATATGATGCCAT -AGGGCTAACGCTCTCGGCAACTTCCGTAGCAAGCTCCAATGCGCGCGCAACTACCTGGGT -TGAATCAGGGTATGTCTCCTGGAATAACGTCCCAAAGTGTGGGGATGTAGGTGGGAAGGT -GCCTCCAGTTGTGAGCAAGTAAAGGGCATTCGAGTAACCAATCAAACGCGGAAGAAAGAA -TGAAGAGCTTGATTCCATCGTCAGTCCCCGTCGCGGGAAGACAAATCCATACTTGGATTT -CTGGCAACCAATGCGGATGGCGGCTGGTAGACACATTGTCATACCTAGCCCGACAGCGGA -GCCCTGGATAGCTGCTATAGTAGGCTTACGGCAGCGGTGAATGGCCAAGGCGAGACGGCC -GCCGCTAGTCGTGGTATCAGCGGAAAGGGTATTTGGGCTACGTACAAGAGCTTCTTTATC -TTACCTATCACGGTGGTCCTTGTTTCTCTCCTTCTCCATTCCACTGAATCCTCGCTCAAG -ATCTGCACCAGCACAGAAAATCTTACCAGCACCGGTAAGTACCACGACTTTTACTCGTTC -GTCAACATCAAACATAGGGTATATCTTCTCAAACACATGCATCATATCTAGAGTAAATGC -ATTATGTTTTTCGGGTCGATTCAGCGTCACCACAACAACGGGGGTAGCTTCAGGAGCCCC -AGCCGGGTGGTGGGAGACTTTGACATCGGTCAATGGAAGAGTCTCGTAAGACTTGGGGAT -CTCGATACTGTCTGGGTCGAAAATCATGGTGGTCACTAGGAATCAAATAAACTGCAGTTG -AGCTCACCATTTTCTGAGATATGTTGGTAACTTATATTTAAATCCACGCCTCGGCTGTCA -AGTAGACTTGATCACTCATCATGAATTCTGAAAGGCTGGGCCGCGGTTGACATATTTGTT -GCGTGGGGGTGGGATTACATTATCTCGGGACATTTTGAAAGCTAGGAGTAACACACGTCC -TCGGGGGTGGAAATGTTAATCACTGGCTTCATTCTCGATATACATATTGGCTAAAGTTAT -TCCACCACAATCCGCGTCCGCACCTTCTTTCCAGATTATGCCTGTTTTTAACCCGAGAGT -GTAGAAATCCGGACGATACGATGCCTCACAGAATGAACTTCAGCTAAATAGATATATCGT -ATTCAATTCATCGTGCCATATTTCTAGCAAATGCCACAGCCATATACTCCGGTGCATGAT -CCGGTGGGAAAATAGCTGATAAATAGGCAGTCCAGCCAGGGATATCGTTGCAAAAGGTAT -ACGCTCTAACAGTTTCACACCATTCAGGATGCCAGCCTGCAAGCTCCCAATCCAATACTG -CAGTGACGTGACCATGCTCATCAACCAGGATATTTCTCGGTGCCAGATCACCGTGCGTGA -AGACCATGTGCTCCTCCGGCCGTTTAGTGGCCGTATAATCCCCCAAGAGATCCGGGACTG -AGGTCGTCAATCTCGAAAAAAGGTACTCGTCCAGATAGATTGGGCATTGTGTGTATATAT -CTTTATGACTGAAACACTCCAGCTGTGCATGCTTCAGGCCTCGTAGTTGCTGTATATAAC -CGTGTAGTTCCTGCGCAACAGACATCTTTTGTGTTGGAACCATGCTGGACCAGGCATCTT -GTAGCGTCTGGCCTTCGACATAGTCCATTAAGATAGACTTAACGCCACCATCATGGGTTT -GGGTTCCGTGAACGTGGGGAATCGGGATGGTCGTGTTTTCGGCGATATAGCGCATATTCT -CGGTTTCAAGGTCTATATTTTGGGAGGATCCATATTTAAGCAACCTGTCTTTGTGGACTC -GTATGATTTTGCGGAGATTGGGTTCTTGTGGCTCTACATGCCTCCACTAGTTAACGTGAT -GGAATTATCCGGTACCACTTTTAGGCATACTGACCCTGGCCGGAGAAATGCATCGCGTCC -ACAAAGACCGCCGCTTTGAGACCCCTCGATGGGAACACGAGCCTAGAAACAGATGATCAG -AAAGTGAGGCATCCGTTCTTCGATCCGTGTCCAATCACGTGGCTGGGAGTGGTCAAATAT -GCAGTTGGCTGAATACAGTAGTAATCTTGGATGTTGGACCTGGTCTCTAGTCTCACAGCC -ATTAGTTCCGTGCGCCGGGGAATGCGGATCTGAAGGACTATATGGAGCAGGACAATAGTC -ACGAATGATGAACACTACAACACATTTGAATAAGAGCACCATGTATTTTTCTACTCTATG -TGGCGTGAATAGTCATGATCAGGAAAGCCCAGGTACATAGGCTACCCCTCATCTACCTAG -TAGACAAGAATATTTAAAGGATATTTAAACGAAGACCTTGGCTTCGCCGGCCTTCAGCTT -GCCAGCGTCAACAAGGCGCTTAGCCTCGCCAGACCAAGCAGTCTTCACAGCAGCGCCATC -GGTGCCGGCGATGGTGAGTTTCTCAGCCAGAGCGGCGAGCTCATCAACGTCGATCTTGCC -CATGATGAGATCGACGTCGCTGAGGACACCGTCAACCTCAGCGGCAGTAGGCTCCCCGGC -GGCGAAGTAGTTGACATCCTTGACGGAGCCCTTGAGAATACGCTTGTTCTTCTTCGAGGC -GTTCTTGGCGGCCTCCTTGGCCTTCTTGTTCTTCTCACGGTCAGCCTTGGCAGCCTCCTC -GGCGGCCTTCTTGCGCTGCTCCTCAGCAGCCTTGGCGTTCTCCTTCTCCTCCTTGAGACG -GCGCATCTCCTCCTCCTTGGCGAGGCGCTTAGCGTCCTTGCCAGCACGAGCCTGCTGGCG -GAACTTCTTAATACGCTCATCGGAAGCGAGACACTCGTCAACAAGCTCACGCAGACGGAC -GGTGTCCTCGGTCTTGCGCTTGCGGCGGGCGTTGGCGTTCTTCTTCTCGACATGACGCTT -CTGGTCACGGCTCTCTCCATCATCGGGCACATCCTCGTCGAGGTACTCGAAAGTCCTCCA -GCTGTCAACGTTGTACCAGAAGTTGTAGAAAGTCTCGACGTGCTCCTGGGTGCTGTTCTC -GTCTCCGAGCGTGGGCACAGGCTGCTTGTTGGAGAAGCGACCCTCGGCAACAAAGACGGG -GCCCCAAGCCTTGTAGAACTTGCTACCAGTGAGCTTCTTGGTGGGAGGCTCAACGTCGGC -GTTCTCATCGACGGAATCGAACTGGCGACGACGAGTGGGGTCCAGAAGCAGATCGGTGGC -CTTCTGGATACACTTGAAGAAGCTGTCATTCTCGTCGCTCTTACCCTGAGCGGCCTTCTT -GTCTGGGTGGTGACGCAGGACCTTCTTACGGTGAGCCCGCTTGATCTGCTCGGGGGTTGC -GCGCCAGCGGTACTTCTTCAAGCCCAAAACGGCGTAGTGATCCTGACCCTGCTATGAGTT -AGAACCTGGCCAACCAGGATAGGCGTTCCATTACTGACCTTCCAGTCCTTGGCCTCACGG -GCAAGCATCATAGGGTCCTCATCCTCAGAGATATCATCATCCTCCTCGGTCTCAGTCTTC -TTGACGTTGTTCTGGGCTTGAAGGCGCTCATCCTCGGAGAAAGTACGGTTGTGACGAGAC -TAAAAGTTTATTAATTTCAAAGAAGCCAAACAAAATTGAGCTCGTTTGTAGAACTTACTC -GGCGAGCGTGAGCCAGGAAGTGCGGGCCAACGGGCTCCACGTTGCGCTTGGTGGCACCGG -ACAGAGCGCCGACAATCTTGAAATCCTTCTCGCCAGCCCAGCCCTGAGGCAAGGTAGGGA -GGGAGATGTTAACAACTTGAGCAGCAGCCATTGTATAATGGTATTGGTGATGATACCAAG -TTGTGTGTGGAGAGTGTTGGGTCCCGCGATGGAATTTTTTGCATACCGTTTTTTCTGCCC -CAATTATCCGCCCCTCCGGCCAATCATGGGTCATCCGATTTGGAGTTTCAAGGTTGGCGA -TGGCACCAGGCCTTGCTCCTTCTGTTGACTTGTATGTTCAAATTTTCTGGTTTGGTCGAT -CTGCAATCTCTGAGGTAACCTCTACTTCGGATTAACAGTCATCAATAACTTCATCCCATC -TCCTTATCTGGGCCGATCATCCGCTGCGACAATTTGCAATCTGTGCTTCTCGATTTATTC -CTAAATACAATCACGTCTATACTTCTGTGAAATTTCCGCTTTCCATCATGGCCACTTCCA -CTGCCGCTGCCGCTAAGAGCGCCTATCGCCAACTTCTCCGGTCCACCCGGGTTGTTTTTC -ACAGTTCGTTTGACAAACATAACCGAAACCGAGGCCTTGTTTCCCTCACACTGACACTCG -TCACAGACGATCTTCCGGTCCTCACCGCTGCGCGCCAGGAAGCGCGCCAGAACTTTGAGA -AAAACCGTCGCCCAGCCGTCGACACTGGAATGCAAATCAACCATGCGATCGAGGTTGCGA -ACATCCTGCGCCACAACATCGTCCAAGGCTCGAGGGAGCAAGGAGATGAAACTGCCAAAT -GGGGTATGTGCAATTCTATTTCGTAGCTTGGTACTTATAAAGCCCGTACAGATTATTCAC -TAACGAGCGATTTGTTTTACAGAACTCAATATCCATGATCAGATCGAACGCGGTGATAAT -GACTCTATCAAAGTCGGCGAAAAGGACGTGAAGATTCACAAGGCTTGCTCGTCATGATGG -TTTCTTTCCTCTGCCATGATGCTCAGTTGAATCTGAGCGCACTCTCCGAGTCCATTCAAC -GTCCACAAACGATAGTGAAAAGTCGAAATCAGTCTGCGAGCCTATGTCAAACCAACGGTA -CATGGCACGGTAAATAAGCTACGACAGTGACCGGACATGTATTATAAGATACCCGACTCT -TTTATTCATTGGCGTCTTCGGGGTATTGTCGATTGAAGTACATAGATCTGCATAGAAAAA -CCCAATCCTTGATAACGCCCGCTCCGAATCCTAAGTTACATGCAACCTCGACTTGACTTA -ATGGTATCTCTGCACAAGTGCTGTTCAATCGCCATTCATGCTGGCGTACTGGACCTCACC -CGATCCGGGCGGCGGTGCGACAACATGAGGCTGAGCTTTCTCCTGGCCAGCACCCTCGCC -TGCAGAGGTAATGGCTTGTCGGTCTGCTTGCTTGCCTTTGCCGACTGATCCCACGGTCTG -GCGACCCTCACGGGCATTTTGCATTCTCTCTTCACGGTCCAGATTATCCAGTGGCTTCGG -CTTTCCCCAGCGAACACGCAGAGGGCAGCCCTGAATGATGGCCTTGCCCTGGCAGTGGTT -TGCAGCGGTTTCAGCGTCTGCACGGTTGATGTAGTTGATGAAAGCACAGTGGGAGCGGTG -GGAGCAAATAAGCGACCGGAGCTGGCCAAATTCGCTGAAAAATGTGCGGAGTGTATGCTC -GGGAAGATCGTCTTCCACGCCTGTAACAAACAAGGAAGTGACGTTGGGATCCGCTGGGGG -AAGGATATCTGCAGCTGAGGGCGGCAGCTGGGTGGTACTGGGGAACGGACGGCCACCACG -GCCACCACCTCGTCCACCGCGGCCTGGAAGGCGATTTCCGACGCGGCTTTCACTGGTACG -GACCGGACCAGGCCCATTTCCATAGCGGCTGTTTGTTCGTGGTTGGTCCGACGCCGTACT -TTCCTCTCCTTTGTCGTCCTCGGCAGGTGCTTCGATCCTTCTGGGCTTGCGGTAATATGG -TTCACTGTTTGCAAGGCGTCGCAAAAGCTCGCGAGCCTTGTCGTCTGTCTTTTCGTATTC -TTCGATGGCACCGCGTCCCTCTTCGATTTCCTTCTCATTGTTCTGCGCGTAGTACTCGCG -ATTGATAGAGCTTTCCGGTCCTGGGGCGACCATTTTGAGGGCAGCATCACGGACGGCGAG -AGGGAGACCGAACGACAAATCTAGCATGCAACATTGACAGCAGTTCTTTAGGCGCGCGCA -TGTGAGGCAGATGATGGATCGCTTTTGGCGGGAGGTACGATCGGCCTTCCAACGGAAGAT -AGTGAATGGGCGCGTGCACTAAGTAGTTAGTATGTGGTTATGGACAAGCTTGGCGGGAGC -AAGAAACACTTACAATCTTGCATTCAGCACCATGATCTTCCTTGATCATTTGGACATAAG -GATTTTCGGGAAGACAATTCTCACAGACGGAGGGAAAATCGGTCGACTCCCAGCCGGATC -GGTTCAAATCATGTTTAATTTGGGGAGGCATGTCGGAGAGTAGGTCAATCCGAGAGATGA -GAGAGCTTCCCAAATGTTGGATTATCAAGATCGATAGAAGTGGCGTGTGGCGTAATGTGG -CTGGTAGATCACGTGGCCATATGCCTCAGGTCACCCGTTTGTCCCACCGGGTGGTGGAGT -GTCCGACTCAGCCACAGTCCGGAACTTTGGAGTCCCGTGAGGACAGAAGAGTGTGGCCAC -TCCCCAAAATAATAATATTGGCCCCCTCCAGGGACACTAAAGGTCTAGTCGTTCGGAGAA -CCCCCCCTGATTGGCTCTCCACTTCAGATTGTCAAAAAGTTGCTTGGGATTCGACCCTCT -TATTTATTTCCCCTTTCTCCTACTTCTCCAGCAATTCATTCCCTTTGCCCTCTTGATTCG -CTTCTCTGGTATTTACAGTGGTTTACCTTGTTTTTTCTTAGAAAACCTTGGAAATTGACA -CTTTTTTGCCTCCTTTTACGATCTACCCCTTTTAATCTTTGCGACTTTCTTCACCATGGC -TCCCTCTGCATTGGAACGTGAGGACAAGGCGCGTGATGCCGCCTTCAACAAGGCTTTGCA -CGGCCAGTCTGCTCAGGCGCAAGGTGGTCTCTCTGCCATGCGCGGAAAGGACGCCGCCGC -TCAGAAGGCCGCCATCGACGAGTACTTCAAGCACTGGGACAACAAAAATGCCGCTAATGA -GACACCGGAAATCCGTGAGGTATGGCAATGTGTATTGAAGAGATTTTGATTCGTGTCGCT -AATAATTTGCGCGTAATAGGCCCGCAAGGCTGAATATGCTACCTTGACCCGACAGTACGT -GCGGAAATAAACCCATTGATCGACGCTTTTCATACTGATGATTCTTTTACAGCTACTACA -ACCTGGCCACCGACTTCTACGAGTACGGATGGGGCTCTTCGTTCCATTTCTGCCGCTTCG -CCTACGGTGAGCCTTTCCACCAGGCCATTGCCCGCCACGAGCACTACCTTGCCCTCCAGG -CTGGTATCACTGAGGACATGAAGGTTCTGGATGTTGGTTGTGGTGTCGGTGGTCCTGCTC -GCGAAATGGTCAAGTTCACCGGTGCTCATGTCACCGGTCTGAACAACAATGACTACCAGA -TCGACCGCGCCACTCACTACGCGCACAAGCAGGGTCTCTCTGACAAGATGGCCTTTGTCA -AGGGAGACTTCATGCAGATGAACTTCCCCGACAACAGCTTCGATGCCGTTTATGCCATTG -AAGCTACCGTCCACGCCCCTGAGCTTGTCGGTGTCTACAGCGAGATCTTCCGTGTCCTGA -AGCCTGGCGGTACCTTCGCTGTGTACGAGTGGCTCATGACTGACGATTACAACAACGACG -ATGCTGAGCACCGTCGTATTCGCCTCGGTATCGAACAGGGTGACGGTATCTCCAACATGG -TCCGTATCTCTGAAGGTCTCGATGCCATGAAGAACGCTGGCTTTGAACTGAAGCATCACG -AGGATCTCGCTGCCCGTCCCGACCCCATTCCCTGGTACTACCCGCTGGCGGGCTCCTTCA -AGCACATGGGCTCGGCCTGGGATTTCTTCACCATCGCTCGTATGACCTGGTGGGGCCGTG -GTATTGCCCACCGCTTTGTCGGTGCCGGCGAGTCCATCGGTCTTTTCCCCAAGGGTTCTC -AGAAGACCGCCGACAGCTTGGCCTTGGCTGCTGACTGCCTGGTTGAAGGTGCCCAGAAGA -ACCTCTTCACCCCCATGTATCTCATGGTTGGCAAGAAGCCTGAGTAACTCCAGAGCAGCA -ACGGTGTTCGTGGTTTTTCTGTGTAAAAAATATTACCCCCGGCGTGCACGTTCTTCCCTG -GTGTCCGTCTTGTTTCATTTGATCTTCTTTCTTCACGTTACCCTCCGCATGTCACCACGG -GATTTTTCTTGACTCAAACCTGACCCAATGTCGGTCGAGAAAGCGAGCCGTGTGCTGCTA -TCTCTCAATCGATGGTGTCATTAATTTTTTTTCTTCTCTTGTTCCTTTTACCTGTCAAGA -TAGCAATGTAATATTCATTGAGTCAAGAATAACGAGCAAACAAATGGTATTGTTTTATGT -TAATTTTGTCTTTGTTCTGCCAGAACTAAATATTGTACATGTCTTTAGATGCCCAGGTGG -TCCTTTTGGATTACTTTTCCATCGGCCCCCGGACGCTGTAAAAGATATCTGGACTATTGT -GATCCGCAAAAACAAACTATCTGAACAATCTATCCTTGTCATGTTACAAATATCCCGAAA -CTGGAGGAACAATCTTCGGCATGCGTCCATCGGTGTAAACATCCGTGTCATATAACTCCC -TGGCACCATACGCACATAGCAACTCTACCATGGCTGCACGACTTGGAGAAGGACACCAAG -AAGCCCACCACAACATTGAATTCGATGAATAGCGTGAAGTTCCCCCAATCTTGATATTCG -GGTCCGCTCCACGACGAAGCAGTAACTCCACCACTTCCATGTTACTTTGCCAGACAGCCA -GGTGAAGAGGTGTCTCACCCATCACAGACTCGTTCACATCTAGGCCTGTGGCAATCATCT -TCCCAATGAACGAACAATTCATCGATCTTGTCGCATGGACCAGTGAAGTCTCCTTCCTCC -AATGTGAGACAAGAGGATCTGCACCATGGTCAAGCAAGAGCTCGGCCACCCGATAATTTT -TCAGCCTCATGGCCTTCGCCAGAGGCGATAGCGGTTTCCCCGAGCGGTGGGTTTTCGAGC -CGTCAAAATTGCACCCATAGTCGTTAGGATTGGCACCGCAGTCAAGAAGCATTTTAATCG -TATCGTAGTGGCAGTGCGATATAGCCCAGAACAAGATTGACTGATTTCGCGATGCTAGTT -TCGGATCTGCCCCGTTTTCAAGAAGCAACCGCACAACATCATGCCTACTGACTGGCGTAT -TCCGTGGCCGAACAGCAAGCCAGAGAGGAGTTATCCCAAACTCCGGATCGACGTAATTGG -GGTTGGCACCATTCTTCAGCAATAATGACACAATCTCGTTACTAGGAGACGGCGTGGCCC -GTCTTGAATAGTTTGCATTTGACACTGGAATCCCAAAACCGGCGATAGCAAGACATAGCG -GTGTTTGAACAACTCTGGATCGAGTCCCACACGCTTCAGTATCGGCACCTTCTTCCAGGA -GCACCTTCATAATTCCCTTGAGCCGAAGTAGACTTGCAATACCAAGTGGCGTCTGATTCT -GCGAGTTCCCCACATTGATGTCAGCGCCTAGCTCTAACAACTTTTTTATTTCCTCGGGTG -CTCGTGCTTCTATTACGGCAAGACAGAGCGGCGTGTTGCCCCCGTAAAATCTCGTGTCAA -TATGGACACCCAGTCGAACCATCTCTTCGAATAAGGGTGGACAACCTGTAGCAGTTCCTA -GGTAAAGAAGCATAGGACCGTACTTCTTCATGATGTATGGATCTTGCAGTGCCATAGTCA -GCTTAGGAAGATCGGAAACTAGAGACCTAGCGGGCTTGACTCCACAGTTACAGAAACCAG -TTCTTTCAACGATACCACTGCGGCTGAGCAAAAACAGGGCACATTCTATATGACCATTTG -CCACTGTCGCAGGCACAATCAGCGCTACTCTGTGTATGTATTGTTTGCAAGTGAGGTTAT -CATACTCTTCCATAGCAAGAGTGTAATCAACACACGGTATGTGACCACCCTCGGCAGCCA -GCCGGAGAGTGGCTTTGCGCCCTGGCTCGCCGTCATTCAGCATGTTTGCGAAATTGGTTT -CCCACAAGATTTGTGGACGGAGACCTCTTCCTACAAGGTCTTTCATCATATCCAAATGGC -CATTCGTTGCGGCCTGGCCGAGCAGGTGCACATTACAGCAATCGTGGATGCATCCCCGAT -GGAGATCAGCGCCATATTGAAGAAGTGTATTGGTCATAGAGACATGGCCCCCCTGAACTG -CCCAGCAAAGTGGAGAATGAAAGATTTGACAGTTGTGTGTTGGGAACCTCAGATCATTCG -TTATCTTCGACGAGTCTACTGCCAGAGACTTCTGAACGACAGTATCATAGCCGCGGAAGG -CGCCGAATGCCAATGATGTTTGACCGCAATGTTGTGCATTGAACTTGTACAGCCGCGTTT -CAAGCTTGTCATAGAACCATTTGTTCGTCTGTGAAAGCGCATTCAGGTCTTTTTGTGTTG -CGATAGAAGCGTCAGTGTTCTCGGGCTCCGTTCTGGTGATTTGGGAGAAGAGGCAGTCGA -TAATCAGCAACACGATCTCGACAGGTAGATCTAGAAGTGAGATCATATTGCAAATTGAGT -AGGGGTATATTGGCTCGTAGGAGAATTGTGCGTTGAGCACACAAGAGCTCCAAGTTGTTA -ACTGACTGATCACTTTCGAGCTGAACTAGCTGGGGCTGGAGCTTAGGCACCTACAGCCCT -ACATAGGTAGTGAGTATGCCGATCCTTACCTAATGTGATATATCACTTTCGTGGAAGGCA -TAGGTTATCAAGTATCTGAGTATAAACTTGGGGCTAGTTCAACACTTATGAAAATGGAAT -GTTACGATAGCAGATGCGAAAGAGGGGTAATGTATGTGGCACGTTGAGAGAGTGGCTGCA -TGATTACTGGGGTCGGTGCGTGAATTCATACAATAACAAGAGTAGGCGGCCGCTTATGCG -GCCTTATGATGTCTTACAGGTCTTTTCAGCGGGATTCTTAGATATCTTCAAGATCGCCTT -TCCAGTCGTTTAGGTAGACTGCAGAAAAAGGCGGAAAAAGCATACTTTGTGCAAGCTACT -TTTCTGTTTTGAAATATCAGCTGCATGTGAGGCAGTTTCATCTACTTCGTAAGGAGTACA -GAATTAGATATTCGCCGGAATATATGACCTAATAGAGCTATGAGAGAGGAGAAGAAGGAC -TGGAAGCTACCTCACTAAATAACCATGTAGTTGGTTCACTGACTAGTTAACTACTTCACT -AACTAGTTAATTATCCTTGCAGCCTATCAAAGCAGTCACAGCCCAGGCGACTGCTAACTA -CAACAGACCAGACCTCTGCAAAGCCCACAAATAGGCCGAGATACAGGGCCCAAAGCCCAA -TCAGCAGTGGGCTCTGTGCCCAACCGCCCAACCCAGAGCCCACGAATGGGGGCCGTAAGG -CCCCTATCAAAGCCCACTACTCGGCTCTACGCACGGCCCAACTCCCGAGGAGTTGGGCTA -CACGGGTTAAATGCAAGAACTAGTTCAGTGCAACTGTACCTTTTCTACGCGATACGCATA -CAAAATGTACTTTATATTATTTTTATATATCTCGACCATCTTATTTAATTATAGGAAGAT -AAATTGATAATAAGAACCTACGTACCTACGTGATATACCATCTCACCTAAGACAACTACC -GTCCTTCAAACCTCAGACCGACTTCTGCCGTACGGAGCATCGCAGACGGGACTGCCCTTG -ATTGGGTTGCTTCCCTTTGTTCCTTCCAATATAACGAGGGAACCCCTTTACAAGGAGAAA -AATAAGAGGAGCTGTTTCTGAAGAACTGGTGTTGTCTTAGCTATTGTATGAATGTACATA -TATGTAGCCATTCTACTGGTTTACTATAGGCTTGGTTCTTCTAGATAAGGACACAGGGAT -GTTGGAACAAAGAGGCTAACTCAGATTACGAAAGGGGAGAAGAAGGAAGAAGTGGGGGGA -TTATCCCTGGAAGAAAACATAAAGCAACCCCGAACAGTCCAAGCCCATCTTAGGGGTTGC -AAACAAAGCAAAGCGATTAGACGTAATATGTCCTCTAACCGGAGATATAGGGCTTCTCAA -AATGAGCCAAGAAAAGGTAAGCATAAGCTACGAAAAGCTTTTATGAGATATGAGATAACA -TATGGTGAATATGGGTAAAAACAACCTTGGAAGTGTATGGGGTCAGCTGCTTGGTCCAGT -GACCCGTTGGGGCACGGACAATAATAACAGGTACCCGGTCCACTATCCAACATCCAATGA -AGCCGCCTCTATTTCTACTCTAATTCAAGGCTCATTCCAAGCCTTGACCCTTTAGGACAA -GCTTGGTAATGCAGCCAGTCAGTTAGCCCTGCATCAATCACAACGTGGTATCGACTAATG -CCGCGTAACATGGGATCCTTCCAGCCGATGAGGGGAATGTCATGCCTCCAAGGCTCGGAG -TATAAGATCGCCTGTCTTCCGGCCGATCTTCTCTGCTTTTCTTATCGTCACCTACTGTGA -TCCCAGACATGTTCACCAAACAACTTCTTCTTGCCTTTGTCGGGGCCCTGGCCCTGGCCG -CGGCCAAGACTACTACTGAAAAGAACCCAACTCAGGCTGAGATCGATGCAGCACGTGCTA -CGGTCTTGCCTTACTCCCCCGTATCAAATGTAAAAGGGTTGGCTTTTGATCGCTTCGTGA -ATATCTGGTTCGAGAACACAGTGGGTTTTACCATGGGATAATGAAAGATAGCCGCGTGCT -CACCCTTGGCAGGACTTCGATACCACTGCTTCAGACGAGAACATGGCCAAGCTGGCTAAG -GAGGGTATACTCCTCACCAATTATTTTGCAGTCACTCACCCCTCGGAGCCCAACTACTGT -GCTTCTGCCGGGGGTGACACCTTCGGCATGGACAATGATGACTTCATACAGATCCCAGCA -AATGTCTCGACCATTGCTGATTTGTTTGACACCAAGCACATCGCTTGGGGTGAGTACCAA -GAGGACATGCCCCATGCCGGCTACCAGGGCATGCGGTACCCCCTCAGCGGTCCGAACCAG -TACGTGCGCAAGCACAACCCGCTGGTTTTGTATGACTCGGTTACCAACGATGCCGTGCGC -CCGCGTCAAATCAAGAATTTTACCACTTTTTACGAGGACCTGGCACACCACAGCCTTCCC -CAACACATGTTCATCACCCCGAACATGACCAACGACGCCCACGATACCAACATCACTGTG -GCTGGCGATTGGGTCGCTCGTTTCCTGCCTCCCCTGCTGAAGAACGAGCATTTCACCAAG -GACAGCCTGGTGCTGGTCACCTTTGACGAGGCGGGCAACTATTCCTACCCAAACCGAGTC -TTCAGCTTCCTGCTTGGTGGTGCTATCCCGAAGCACCTGAAGGGCACCACTGACGACACT -TTCTACACGCATTATTCAATCATTGCCTCCTTGTCTGCTAACTGGGGTCTACCATCGCTT -GGCCGCTGGGATTGTGGCGCCAACTTGCTGAAGTTGGTCGCTGAGAAGACTGGCTATGTC -AACTGGGAAGTTGACACCAGCAATGCCTACATCAACCAGACTTACCCTGGCCCTCTGGCT -GTCGAGAACTATTCCTCTAGGTGGCCCGTTCCTGCCACTAAGGGCAAGTGCTCTGCTGGT -CATGGTATTGCTGAGGTGGTGAAGAAGACCTACCACGGTCTTCACCCTACCTACGACTAC -AACAGCCCTGTCCCGTATGATGCGACCAGTGGAAATAACGTCGGCATCAAGTATCACCGT -ACTCTGGTATGGACCCATTGCCCTTCTCTCCTTTGGTGATTGAATGCTGATAATTCATAG -AAGCATGGTAAGGAGGAGTCAGGAATCACTGGGTAGACCAATGATACGTATATGTGGTGC -ACGGCAATTGCCTTGTCAAACGTGAACGAACATATTTGTCCTTGTCTACAAAAGTTTGAC -TGAGTGCTATGTTCTACGACGTCACAAGAGTTTTTCTGATTTGCTGCAAATCGTTGCGGA -TGTGCCGGATGATTCTTCCAGATTTGATCCACACAGACTGCATATGGTGATGAGTATCCC -CAGTTTTCTGCTACCCCGCGAAACATATATATCTCCACAATCAAACTACATACTGCTCCA -CCGACGCTGCCCAGGTTTTCGGAAGATGGAATATCCCGATGGATCAGAAGTTATGCATAA -AATAGTCTCGTACACAAAGTATGCATTTTCCCGCCTTTGTCTGTCGCCTACCAAGGGGAT -CTTCAAGATACCCGAGAAGAATAACTGAGAAACCCATTGAGAAGACTACTGATTAACCTA -CTTAAGAGCCTTGCAAGAAAGACATCACAAGACCGCATAAGCGGCCGTCTACTCTTGTTC -ATCGTCAATGAAATAAGTGCGCTCGTTTCGCGAAGCAATTTATCTGCTCTATGAGCTCCG -AGTTATCCTTTTCCCTCCCTGATATCTTTCTGTCTGGGGTTGAGGTATGCCGGTCGAGCG -CAGCAGACATTATGCAGACAACTTTACGCTCTGAAATATGCTCGCTTCATCATCGTCTTT -AGCTTTCAGACAATTCGCAGACTAGGAGATACATAGAGAGGGCGGGACGTTTTGTTTAAA -TGCCAAAACGCTTCCTTCTCTGGGTATAACCAATCCAAGATTTGGCACGTTGAATCATCT -CCCTATTGTCCAGGGTTGATTCCAACACTTTCTCTACAACCCATAGTTACATATTTTGTT -GCTATACCCCCCAACTACTGCATACTTTGATTAACAGGTCCAAGTTCACATCCGCTAGAC -CTTAACGGAGATCGATATATAACAAGCCAATCAGCGGTAGTAGGACTTAATCTCAAGTGA -TCATCCTAGATAACAAGGAAGTCTTGCGGAGAGTTCAGCCTTGCATGGAATCTTGCTTCG -GGGCTTTTCACGGTGATCAGGAACTCGAGGGGATAGGGACGCCAAGCCCATGGGAGCGCC -ACGCGGTTTGAGGCTTTTCTCATGACGTGAAAGGGTCGCTTTGACTCTGAACGGCGTTGC -CGTGATCTGCAAGCCTTGTCCCGTATAGTATCGACAGCCCAATCTAGGGCTTAAGCTACC -GCATAAGCGACATTTAATATCGGATGTGGCATCATCGCGGACAGCGCGCATGTTCTTATT -CGGGAAGGTACCGAGGATCGCGATGCTCCTACCGCCGGAGAATTATATTATTGGTCTATT -ACGGCTAATATGTACAGAGATGTATATTCGGAGTACGTCTGATTTAATTTCCATTTTCAT -TTCCGAAAGCAGAAATTTCACTAAACCGGAGGTGAATTGTCCGACAACGCCCGCTCTCCA -AATGGCTTTTTTGAGTCCCAGGATCTTATGTGGGCCCGTGTTGAATTTTTTCTCCCTTTC -TTTCCCCCAGTTTATACTTACATATGGCCAAGATATGTCAAATGCTTCGCCTCCATAAAC -TTTCTTGGAATCGTCTTGATTGACAAAAAAAAATTGGAACCTTGGAATGCTTGGTTTGAG -CCAAGTCATGGTAAGCAAGTACATAACTTGGAAGAGGACTGAAAATCTTTGGAATCGAAC -GCTGAGAGACCCATCATTTCCTAATGCGGAATCTGCGCAGGACCCACCTATACTTCCCGA -AGTTTTTCGACTTTGAAAAAAACAAAAAACACTGTTGAGTATCCCGAATCATTCCCGGGG -AGTTCTTCCCGATCTTCTTCCCGAAGAAGCTTCGGGCAGAAAGATCAAAAGATCCGGAGA -TACGATCAAAACTTCCTAGAAAATATCGAGTATACAAGTTGTCTTCCAGGTGTTACCCAA -TGTGTGAGGCCGATGATCATTGCTCAAGACGGCCACATAGTACCCCGGGGAAATTAATGT -AGTCACCATATACACCCGTAACCCGGGACGGGCATGGCGAGATTCGGAAGGAAAAAGGCA -ACTGAAACGAGATCTTTTTGAATACGTAATATTCCACAATCGGCACGAGAGACGAAAAAA -AAAGAAAAGAAAAGTAGAAAAACGCAGGAAAAAAACAGAAAAAAACAGAAAAAAGCAAGT -GACCAAAGTAGCATTCGACATCCTTGACCCCTGACCCACCTCAATTTCTCTTCCCCGTGG -GTTGCCGGGTCGGATAAGATGCCTTTTCCGGCCTCTCTTGTTCACCCGCAAACTCACCCC -CAGACGCCCCAGGAATTAGATCCCCCACAAAGATGAGAATGAAGCAACACCGCCTACTAC -GTAGACTAGGCTACACAACAACCGGGAGATTCTCACCTAGGATTTCAATATCAACAGATT -TATGGAGTATGTACTCCATACGGAGTAAACGGTCCAAAGTTATCAACGTATCCCCGCAAC -CCCCCACTAATGGCCCTTGGCCCCTACTTGTCTCTACTTGGCTTCACTTGGCCCCCACGG -CGGGTCGCAAGACTTGCCTATCCAATGTGGTAGCGCCGCCGATGTCACTCACACACTTTT -TTTTTCTCGTTCTTGGCATGGGGACCGAATGCATATAAGACTAAAGATCCCCTTGTCTGA -GCTTTTTCTTTCTCCATTGATCTTATATCCAAGAGAGGCCGTTCATCATGTGGACCACCA -CCTCCGGCTTGCGGGGCAAGAAGCTCCATCTTGCCATCACATTTACATCCGTCGTTGGAT -TCTCACTTTTTGGTTATGATCAGGGTTTGATGTCCGGTATCATTTCTGGTACCGAATTCA -CTCGGGAGTTCCCCGCCCTCTATGGTGACTCGGAGCATGTCGCCGTCCTTCGGGGTGCGG -TCACTGCCTGTTACGAACTAGGGTGTTTCTTCGGTGCCATCTTCACTATGGCCTACGGTC -AACGTATTGGTCGTACTCCTCTCTTGGTTGCCGGTGGTCTTCTCATGATTCTTGGTACTG -TTATCTCTACTGCGGCCTTCGGCCCCCATTGGGGACTGGGCCAGTTCGTTATTGGACGTG -TCATTTCCGGTCTTGGAAACGGCATGGACACCGCTACTATCCCCGTCTGGCAGTCGGAGT -GTTCGCGCGCTCACAACCGTGGTTTCCTCGTCTGCTTTGAGGGTGCTATCATCGCTGTCG -GTACCTTCATCGCCTACTGGCTCGATTTCGGTCTCTCTTATGTCAACAGCTCCGTCCAAT -GGCGGTTCCCTGTCGCCTTCCAGATTCTGTTCGCCATTCTCGTCACTGCGGGTGCTCTCA -TGCTCCCCGAGTCGCCTCGTTGGTTCGTCATGCAGGGTCATGACCAAGAGGCCATTGAGG -TTCTCGCCCAACTCAACGACAGTGCCCCCGATGCGGAGGATGTGCTTGCCGATTTCAACC -TCATGAAAGCCGATCTCGCCGCCATGCAAAATGTTGAGTCTAGCAGCTGGGGTATTCTGT -TCACTGGAGGCAAGACTCAGCACTTCCAGCGTATGATGATCGGTTGCTCCGGACAGTTCT -TCCAGCAATTCACTGGTTGCAACGCCGCTATCTACTACTCGACCCTCCTGTTCCAACAGA -ACTTGCACATGGATGGCAAGCTCCCCCTGGTTCTCGGAGGTGTTTTCGCCACTGTTTACG -CTCTCGCTACCATCCCATCCTTCTTCATGATCGAGAAGGTCGGTCGCCGTAACCTGTTCT -TGATCGGTTTCCTCGGTCAGGGTCTCAGTTTCATTATTACCATGGGATGCCTCATCGACT -CGAATACGCAGAACGCCAAGGGTGCCATCGTCGGTATCTTCTTGTTCATCTGCTTCTTCG -CCTTTACTACGCTGCCTCTGCCCTGGATCTACCCTCCCGAGATCAACCCCCTCCGCACCC -GTACCAAGGCCGCTTCCGCTTCCACCTGCATGAACTGGATCACCAACTTCGCCGTCGTCA -TGTTCACCCCTGTGTTCTCCAACCAGTCTGACTGGGGTATCTACCTCTTCTTCGCCCTTG -TCAACTTCATCGCCATTCCCTTCGCCTGGTTCTTCTACTGTGAGACCGCCGGTCGTGATC -TCGAGGAGGTTGATATCATCTTCGCCAAGGCCCACGTGGAGGGCAAGTGGCCGTACAGGG -TTGCACAGGAGATGCCCAAGCTCACCATTGCCCAAATCACCCAGATGCAGGCCGAGCTGG -GTCTCGATACCTCCGACCTTCGTATCAACACCGAAGCCGAGAAGGCTGAGACTGCCATGA -GCAGCAGCAGCGAAACCAAGCACGACGAGATCAAGCACGAGTAAATACTTCCTCAGTTGC -CCTTGATGTCGACTGAATGACATTCGCGATGCGAATCGGATTAATTTTGTCTCGGATCTC -ATATTATCATGTGACCATAGATTCCCTATACCCCCCCCCGTCTTACGGGAAAGCGGGCAG -ACTAGTATGTAGATAGTCTGCTTGGCGATCTCTTTTTATTTCTCTATATTTCCTTTGATC -TAAATACATACTTTTTTACTATCACAATTTCTTCTCGAAACATCCTTGTAATGGGCCTGA -ATGCATGATCTTCACTGACCGCTACTTCGATTATGCAGCCTTCCGATCCAGTATCTTCTC -TCAGTGGGGATGGAAGTAATCAAGGCTTGGCAAGATCATATCATTTGTATGCTGCAAAAT -CGGACAAATTAGCAAAGGCATCCGATCTAGAGTCAATCTGTGCACTCCGTAGATTGTACA -CCGTACGATGGACCTCTTTGTGCAGGTTTTACCCCCCCAATTAACGAGGAACCTGGGAAG -CCCCTGTAGATCATCGATACGAGACAAAGGAAAATAAGCATGGATGTACTTTCGTACTCC -GTTCTCCGTACGATACTTCGGCATAATTGACATGTCATAATTCATAAACCAAAGCACATG -TCCGAATCAGGGGTTTAGTTGAGAACAGTACTCCGTACGAAGTCTCGTAACATAGGATGA -ACCGGGGAAGCCGAGGTATTCCTGATGATGGACACATGCCAAAAAGAAAACTGCTCGCCA -TAGTTTCTTAGTTTCATGAACCAAACATTGAATAATACCTTAGCATATCAATACAGCCCT -GTAGTAATTGGTGAGGTAGCCAGCCACAAATGAACTTTTTTTTTAAACTTCTCCCAGAAC -AGCCACAAGAATACTGGCGCCAATACCCCAGGGACAATGACCAGGAATGTTAGAACGCTA -TTCTCCCACCCATATTTCACTATATACATCCGGTGCAAACAATGGAAACCACAACCTAGC -TGTCGAGTGTAGGAATGCACCAGTAGTAGGCGCGCTAGCCGCAAACCGCGTATAGACATC -AACTATATAAGGCTGGATACACTGGAAGACAAGGATAGTACCGGCATCATACAGTACAGT -GCCGATGCTCGGCGCGATCCAATGGTGGTACTCGGCTGTCTAACCGGAGATGAACAAGCC -CGCGGGGACAAAGAAGGCGCCGAGGAAGAGCGCGGAACTCCGGTTTTCCGAAGTCGCCGT -TGCGGGCCTTGAGATAGCAATATATGCCGCCGTTGAGTGGGGCGCAGATCTGCCAGCCGA -GGTAGAAGCCTAGTCCGAGTGATTGTGATTTACCTAGCGCAGGCCCCAGACACCGGTTAA -ACCATTATATTACTTTGAACTAGTCTAGAGGATGGGGTATGTTGAGAGGACGATGTAAAT -CACTGATTAGATGTACGCTATGCACAGGGTGGGGACTTGGACGATTGGCTCGGTGACTAG -AGCACGTGCAGCTTGAACTTGCTCGGGGATTTGCTGGCGAGATTGCTGATATGTGTGGAG -TACGGAGCGAACATTCTCTTCGGATGTCTAATCTCTGTTGGTGGTTCGATCTGTGTTGCT -GTGTTTGCATATTAGTCCAGCGCACTGGTTTATAGTTTATAGTGCCTGGCCGATGCCCAA -TTCTTTCTTGTTCTGGCACTATTTTTTTTTGAAACATTCAAGTTTTTCCCCAACCTTGAC -GGTGTTGATATGGGGCTATTTCTTCCTATTGCTAGGGTGAGTATATCGTCGAAGCTTGTG -CATTACAAGAATCGCCAAAACGGAGGAAATACTAGTGCCATAGCGCTAGTGATACACTAG -CCAAGAGAAGGCCGCTTAAGCTCCAGTGTTAATAATTGGCGACGTTAGGTCATCAAAGAA -CCGTGATCTATTGCATCCAAGTTATAGCGATGTGTGGCATACGGAAGTTCAGGTCCGGAA -CTTTCTATAGGAATGCAATACCCTAAGAATGTTGCCGCCGCTGAACTAACAGTGAGACAT -AAAATGAGATTGGGGGAAATATCAAAGATAAAGACCCAAAGATCATCAAGCCAATTTTCT -ATACTCCGTACGGCACACTCTATAACATTAGCTATGACCGGTGACTGGAGTGCCAAGATT -ACCCCACACTTTCTTATCAGCTTTGCTCAGAGATACGATCGGCGCTGAGATCCCTGGCTT -CTTCTCCTATCAAATATGCCGCCAGATCGACGCGTGCAGCGAATCGTCCGAGTAAGAACT -GGCTGTTTGACCTGCAGGCGTCGTAAGAAGAAATGTGACGAGGCACGTCCAATCTGTGCT -GGCTGTCAACGCAATGACCTGGGCTGCCAGTGGCCCCAAATCGTACCTGGCAGGAGAGAT -TCCAGCCATGACGACGAAGCTCAAGTATCTGCGGCAGAACACCGCCCGAATATTTTCGAT -GCTATTGAGCAACTCGATGAAATTTCGATGCCTAGGAGTCCCTTGTCGCAACGAGCACCA -TCTGTTTCATCGAGTGCAGGTCACTCTGTGACTATTGGAGACCGATCTGAAGACACAACA -CTGCATGATGAGACCCATAATCAACGCCGACTGTCAACAGCTACCAGGTCTACCACTACT -AGTGATGCACTCGTCACATCTGCCTCAACGGGAAATGTGCCGCCACACAGTCTCTCTCTG -ATGCCTGGTTACGATGCCGAATCTTATCAACTTCTCAGTCATTACCTGGCCACAACAGCT -GATTGCATGGCAAATGGCTCGACACCTATCAACCCGTTTCTGGTACAGATTGTACCCCTG -GCGTTTTCTAGTAACTTGCTGTTGCAGCTTGTAATCACTCAAAGCGCTGCTCACCGCGCA -TTTCGATCACGGGACGAATCAGACACAATTGCCCATAGTCACTACACAAAAGCCCTCCAG -CATTTTCGCCGTGGAGTAACGGACTTTATCGATGGGAAAGAATCGAATCCACTCATGCTT -CTAGTGGGGGCACTTCTGATGTGTTTCACCGAGGTAGGAAAACCACGATTCAAAGATAAT -GTGTTCCATGATCTAACATGATCAGACCGCAAAAGGTGATATGAACGGGACAATATTCGA -CCACTTGTCCGCTGCAAACTCACTGTTAGTCAAATTGCTAGCCCAGAGCGACTCGGCAGT -TCCACGGGACTTGAAAGATTTCGTGCTCGAATACTACACCTATACTGCGACCGTCAGCAT -GATATCAATCGATGCTCGTTTTAGCGGGCAGCTTTTCTTAAACTTGGATCTGGAACAACG -GTCACGCGAATTACTTCAGACCCAGTACGTGGGCAACTTGTGCGGTTGCTGGCTAGAACT -CTTGCTGCTGGTTCCGTGCATCTTCGATCTCGGCCGACAGTGGATCATGGACGATACACA -AGCGGTCGTTCCAACAGCTGATGATATTGCCATGTTTGCATCTATCCAAGCCCAGATTCT -GCACTGGTCGCCTTATCCCAATGTCGGGACGGAGGTTCATCTGGCTGGAAAGATCTTTCA -AGAATCTATACTCATCTATCTATACACCTCCCTGGGTGGCTTCCAATATACCAGGGATGG -AATGTATAAAGCTATGGTGGAAAATGCAGTGACAAAAGCCATGTCATACCTGAACGAGCT -GTCACCCAGTGCGCGCATCAACTCTGGCCTCTGTTGGCCGATTGCTGTGGTGGGCTCATG -CCTTCTCAGCCCCGGCCAACAAGACTGCCTTCGAGGACGTTTGAATGCCATGACTGAAAC -GTTTGGTCTTGGGAATATGCATCGAACACTTCTACTCCTCGAAGCAATGTGGCAATCACC -TGCGTCTGACGCTGGGCCATGGAATATCTGTCGAGCAATGCAGAAAAATCAAATATGGAT -CTCATTTGCGTAATGTTGATCATCATTCAGTCCTATTATGTACAAACTAGCAAACTCGTC -ATGTATTTAACCAGCCAAGTTTTTCGGTGCTTCCGTCGCCGGACCTTCAGGCGTAACATA -TGCTTTTGTCAAGCCACCATCGACGACGAAGTCAGTAGCATTCACGAAACTGCTCTCGTC -GCTGGCAAGGAAGATCACGGCATGGGCCTGTTCAATCGCCTCGCCAAACCGACCGGTAGG -GAAATGAACCTCACGACGGAAGCGCTTTTCCTTATCATCACCAAGCCAATCCTGTAACAG -AGGCGTGCTATCCGAATGTCAGCTCTATCGAAATTTAAACAGGGTCAAGGGCAATATTGA -CATACTTGAGAGGCGCAGGACAGAGCGAGTTGAATCGAAATCCCTCACGCGCATGAACAA -TACCCAGCTCCCGGGTCATTGCCAACACAGCACCCTTGCTAGCGGTATACGCCAACTGCG -GCGTCGCAGCCCCAACCAGCGCAACCATACTAGCAGTGTTGATCACACTAGCGCGCTTCT -TTCCGTGCCTCCGGAATGCACGAACAGCATGCTTTGAGCCGTACCACACGCCCTTCACGT -TGATATTCATCGTGCGATCCCAAATATCCTCGGGACACTCTTCCGAATCGCCGTCCTTGG -GATGCATGATTCCGGCATTATTGAAGATGATATCTAGTCCACCCCAGGCATCAAGATGAG -CGACTGCTGCTTCGACTTGAGATTCGTTCGACACGTCGACAACTTTGGTTTCGACCTTTC -CGGCGCGGGTTGAGGCGGGCACGACACTGTTTACCGTGGACAGGGCTTTGTTCAAGCCTG -CGTCGCTGATGTCGGCCATTAAGACTGAGGCGCCTTCACGAAGCATGAGGATTGTAGTCT -CAAGACCGATACCTCTGCTTTTTGTTAGTTGTGTATTTCATTGTTTGGTTTATCCTAAGC -GGCGAAGAGAGGTACACACCCAGCTGCACCGGTGACGATTGCATTCTTGCCTTCAAGACG -CCCAACCATTTTGATATTGTATAGATGTGATATGATGAGGGGGGCTAGGGAGAATGGGAC -AATGAACTGGAAGAGTTTGAGCAGTATAATGGATAAAATGGACTGCAGCGGGTAGGCGCT -TGGTAGAGGCCTGAATTTATACCATTTTCTAACCCCCAGAGTCCACGTTGGACAATCTCC -ATATAGGAACACCCACCGATGACCAGCCCACATGATAAATTAGACCCGACCACATGAGCC -AGCGCGCTACATTCCCCATTTCCCTTTTCAATCCATCTCGATCCACAAAGCTATCGGAGT -GCAAAGCTCTATCGCGCTCCTACTACCCCACCACACGCCGAGCTTGACGCGTGAGGGATG -AGGGGTAGCTGTTAGGTAGATCTTCAATCATTCAGGCATATGAAGCATTTGCCGATATCG -TCGACTATTGACTATCTTTGGTGGGGCACTCGGGGCTAAGGCAGCGCGGTCCCGGACCGC -GCTGTGGGGCTATTGGGATGAGCTTCAATATATTGGAGAGTAATGCAAAGATCCTCGCAG -GTTCATCTGTGTTGCTTTCTGTTATGAACATTTATAAGCTGGCTATAGGTCTTGTCATGG -CTCTGAATGAAAATTGTGGGAGGCATGCTAAGATAGTTGTATACTTGAATAACAGTAAAG -ATAGAAAATCCTTGatttattctattttattctagactatcatactagtcaatCATTTTC -AAAGCTACTCATGTCCTCATGCTCCGCCATGCTTGATGTGGTAGCTCTTCAGCTTTACGA -ATCCATCGTATCCTCGTGGGCCAAGAGTGCATCCCAAACCTGATGTCTTCCAGCCTGTCC -AGGCAAGGTCCTAATGTCCAATATTAGTATTAATGAACCGACAACATGTCGATTTAGAAG -TTGTCAGAGTGTCATACCGGGCTTGGATAGTCACAACGATTAATGAACACCGTACCGGCC -TCAATTCGATCGATCAAATCCTCTCCGCGAGCGATATCCTTCGTCCAAACGCTTGCCGTC -AGGCCATACTCACTGTCATTCATCAATGCCACAGCTTCATCATCACTGGAGACCTTCATG -ATTGGCATTACAGGACCGAAAGTCTCCTCTTTCATCGTAACCATGGTGTGGTTAACATTG -GTCAAAACAACCGGAGCGACAAAGTTCCCCTTTTCGATCTGAGGGTTGCTTGTCGCCTCG -AAACTTGGGTTTCGAGGGGTGGAGTTGACAGCTCCTTTAGCAATGGCATCCTTGATATGA -GCAGCGATCTTTTCCACGGCCTGCGTTGAGATGACGGGGCCTGTTGTCGTGCTTTGATCC -TGCGGATCACCAAGTTTGTACCTGATTTGAAAGTCAGAGATATCCTCTTTTCAATGGAAT -CAACCTTACGATTTCAGCTCCTCCTGCACAGCGCAAACAAACTCCTCGTAAACTTTCTCG -TGTACGTATACCCGCTCTATGGAACAGCAACTCTGTCCGGAATTGAAAACAGCTCCGTCC -ACTAGTTGCTCCGCGACATGCTTTACGTCGGCGTCTGGCCGAACATATGCGGGATCATTT -CCACCAAGCTCAAGGTTCACTGGCTTGATATGTCGGGCGGTTGCCTCCCTCAGTCTAATT -CCACCTGCAGTTGATCCGGTAAATGAGACAGACTGGATCTCGGGTATCTGTGCTACCTGA -TCCAAAATATCCAGGTTTCCAATATGAATGACCTGAAGGACCTGCGGTGGAAGCCCTGCC -TCAGTAAAGACTTCAAGCAGTCTATTGCCAAATAACGGGGTCTGGGGCGAAGGACGCAGG -ATAACGCTGTTACCAGCAAGGAGTGCCGGGACCAGAGCATTGATAGTAGTCAGGTAGGGA -TACTATCATATTGATGAGCAAGTCTATGGATACTTCAATAATAGTGTTCACTTACATTCC -AAGCACCCGCGATAAGAACCGGTCCCACGGGAACACGCTTGACCGTTCGCCGGAAGCCGG -CTTCTGGCCGACCAGGGAGATCGGATAGGCTCTCTTCGGCAATCTCCATCAAGTACTCAG -CGCGCAAACGCATAGTGTTGATCTCGGATGCACAGTACCGGATGGGTCGCCCCATCTGGG -CAGTTAGTTCTTTGGTCAGGTCATCGCGGCGCTGCTCAATAATGTCCAGTGCTCGTGTCA -CAATGCTTTTTCGGTTCTCTAGAGGGCTTGTTCTGTAGGACTCAAAGGCTTGCTTTGAGG -CAACAGCAACTTTGATTGCTTCCTGGAGTGAAGTACCGGGCTGGTCAAAAATGACCTCGT -GCGTCGAGGGAGAGATGGTACGAATTTGGTTTGACATAGCGTAGGTTGTATATTCGATCT -CGGAGAAAGAGTGAAATCCCGAAAGTGGGAGAGGTGGACTTTATAGCGCGCGAGTCTGTG -GCGCGGTTGCAGATCTAACAGCTCGTGCTTTTCCAAATTCCACTTTTATTGCTTATCTCA -GTCTTATGACAATATCAATTTATTTCGTATCTGCTTGAACTACTTGCTCTTCCCATCCAG -TACGAATCTTGCAATTACCCTTGCTATCAACCACCCATCATGGGAATCACCAACTCGCGA -TAACCCATTCTTGTACAATTCATCATCAAATATTCCATTGTCGTGCCTGGCTTCGAGCAA -ACATGACATGATACCTGCGTTATATTCCGGGTGCCCTTGTACCGACAGCACCTGCTTGGG -CAAGTAGAGACCCTGAACTTCACACTTTGGACTTGTAGCAAGATTTGTGCAACCGACTGG -AGCATCAAAGACAATATCCCGATGCATTTGTTGAATACTCTATATCCAGTCAGAACCTCC -CTCGATTGGGCTTGTTCGAGTGTTTCTTCACTTGCCAGTGTATCCTTTCCAAACAGTTTC -TTCCCAACCTCAGTAAGTGCGATCTTCTCCACAGAGACCTCCCATCCAGCTTTGTTTCTT -CCAACTTGGGCTCCCAGTGCCCGGGCGAGAATTTGATGTCCAAAGCAGATCCCAACCACG -GGCTTCTTGTGTGTTAGAACAGTCTCACGGATGTTGCTTGTGAGATCACAAATCCACTGC -TCATCAGCCCAGGCATCGTGTTCTACCATGTGATTTTAGTTTCCCAATTCAGATCTTAAA -ATGGATGGTAACGTACTGCTTCCAGTCAAAAGAATGGCATCGATGTTCTCTAGATCAAGA -AATCGGTTACTGCCCACCATGTGATGTTCGCTAAATTCCACTGTAACATCTTTTAACTCC -TCGGATCTCTCCAGGTCTTTGAAGCCTTGCCTTAAAAGGTCTTGAAACATTGCGAAGTAG -TCGCCATACTTTCTGACAATGGGCTGGATAGGCGTGTCACACACGAGGATGGCGATATGA -ATGCGAATAGGTGTCATTTTGAGGATGGGTACTTCTACTGTAGATTCACAATAAACAATC -AGCCCGTCAGGCTAATATTTGATCTCGATTTTTATCATTTCGCAAAGAATAAGCGCGCTG -TCTATACTGTGGAGTGAGGAACCCCACAGCGCGGAGCGCGGAGCATCTCCGTGAGAGCAT -CCGACAGCTTCACCGGTAACATCCATTGTCTTCTATCGAAAATAATTGCGCGTTATGGTC -TAAACCGCGCGGGATTATTTTTGACCTGAGCTGCGACGCGGCATTAATGTTGACCACATC -TTGGACCCACAGCCTCACAGCCCATTTCGCAATTGTATAAAGCTACAGGGACCGACGAAA -TGTCTCTGCCAATCCAAACCCCACTGTTGAACGCGACCGGTCCTGCTGCTATTGCTGTAT -CATTTCCAGAATGATTATGCCCACCATGATCACTGTCGAATCCCTCCCTAAACTCCTCGC -CGACGACATCAAGGTCAAAGTCGCCGGTATCGACAGCGATGGCGTACTTCGTGGCAAGGT -GATGGCCAAGGAGAAGTTCCTTGGAATTGCAGCAAAGGGATTTGGCTTCAGCTCAGCTCT -TTTCGGTTGGGATATGCACGATATGCTATGGACAACCGATGCACGTGTCGCACCCCCGGA -ATCAGGGTACGCAGACTTCCTGGCTGTTCCAGATTTGAATTCGTTCCGCCGTCTGCCTTG -GGAAGACAACATTCCTTTCTTCCTAGTACGCTTTCTTGACGATACACGCCCGGTTTCGGC -CGATGGGAGGAGCATGTTAAGAAGTCTGTGCGATAAGCTTGCGGGCGAGGGGCTTCGTGC -TTTGGCCGGAGGTAAGTCAAGAAGAGATCTGACGAATGGATTCCAAAGGCTGATGACAGA -CAGTCGAACTTGAATTTATGAACTTCCAGACTCCTTCCGAAGACGGATACGGCAGTGCAG -GCTCCCAGCATCCCAACCTCGCTACTTTCCTGGAGAAAAATGCCCCCAGTGCCCTGCGAC -CAATTACAGCTGGCATGTTCTGCTATAGCTCAACGCGGCCGGTTGCCAACAAAAAGTACT -TCTACGATATTTTCAATACCAGCGCACAAATCAACTGTGGCATTGAAGGATGGCACACTG -AGGGCGGACCTGGTGTGTATGAAGCAGTATGGCATCCATCCAGGTCATTTGAACTACTCT -AATATTTTGCACAGGCCTTGATGGTTTCTGAAATTAGTGAAATGGCAGACAAAGTGGCAT -TATTCAAGTGAGTGCCACAAGTACCGCAAACATTTTGTCACTTCCAGAGCTAACAAGTTG -ACAGACTTTTAACCAAATCCCTGGGGGTTGACCACGGCATTACACCATGCTTTATGGCTA -AACCTATGCAGGGAATGCCAGGCAGTTCTGGACACATTCACATCTCACTTGCTGACAAGG -ATGGGAAAAACCTGTTTGCAAGAGAGACGCCCGAGGTAGACCCTCAATGGTCGGATATCG -CTCATCTCTCCGACACAGGACGTCATTTCCTGGCTGGACTTTTGGAAGCTCTGCCAGATA -TCATGCCACTGTTTGCGCCAACTGTGAACTCATACAAGCGCCTCATTGAGAACTACTGGG -CGCCAGTGCACATTAGCTGGGGACTCGAGGACCGGATTGCCTCAATCCGTCTGATTACGC -CACCAGTATGCAAGCCAAGTGCCACACGAATGGAAATTCGCATTCCTGGAGCTGATCTAC -ATCCCCATTATGCGCTAAGTGTCATCCTTGCAGCGGGCTGGAGGGGTGTACAGAAGAAGC -TTGAAATCAAAGTCCCACCGATGTCTGCACGCACGCCAGGGGACCGTCCAGAACTTTTGC -CCAATACCTTAGACAGAGCCCTAGACCGCTTCTCGGCTCCGGGAAGCATTGCAAGAGAGA -TTCTGGGCTCGGAATTCGTGGATTTCTTCACGGCCACAAGACAGCACGAATTGGGTCTTT -GGAGAGAAGCTGTGACTGATTGGTAAGTCAGAAATATTGTGCCTATGGATGCAAAGCTAA -CTCTGCTATAGGGAGTTTAAGCGATATATTGAAATAGTGTAAATTTCTTGATGATTGCCG -TTGCGAAGCAAATTGACATTGGGTATTCATCTCCGTATGGAGAGATATTACGTAGTAAAG -AGAATCCTCTTACGATCTAAGGAATATTACGTTATGGGCTTGTGTCAAATCAGATTTACC -CTAGAATATACTAATAATACTACTCATCTTTTTGTCCTACTTAAAAACCCGAAGCTACCA -TCATACGTATATCTCTTGCTCCGTAAAGCAGTGGTAGATACGGGCAGGAATGCGCTAAGG -CTTTTAGGTTTCAGGGGACTAAAGCTTTAGGCCGCCCACGTGATGATCGGCAGAAACACC -GGGGCTCGCCCGGGGGGTTGATAGCCCCCAGCCCCAACCCACTCATGTGTACATAACAAG -CATCAATCCATAAAGGCATTCTGTTCGCAAAGCCAGCGCGATGGCTTCGAACCTTCCTCT -TCCTCTCCCGCCGCGCACCCCCACTCCTCCAGCGGATGACGACTATGTCTCCCAGATCAA -CACAGGGATCCCCATCGATCGCAATGCCTTATCGCCGCTTAAGGCTTCCTTTCCAAAGGG -TATCATGGATGCCGAAAGTAGAGATACTCTAAGCCCAACGAAGTCGTCTTTCAATCTGGC -AACCTCCCCTGAAGATGCAGAAACACCGATCGAGAATAGCCGTGGGGACACGTCCCCTGC -GGGGCCTTTCAATTTCAGTACTACTGTTATGGCGAAGAGCCCAGTAATCAAATCGGTGGG -CTGAATCGCGCCGTTGACCTGTGATGGGGGTGAGCTAACTTATTTCCTCGTATCTTAGAA -CATGGGGCAACGTCGTGGCCACAAGTACAAGCACAGTAGCATCTCGCACCAGATCTTTCT -CGAGCCTCCTCCTCGCGCACCCCTGGCCCTTCCGAACTCTCTGCCCATGCCAACACTGAA -AGAATGCCGATCTAGTATGTCGAAGGATCAAAAGACTCGCTTCTGGTGGAGCCTGTGTCA -TATGTTCATTGCGGCATACACACTCTGGACCGCACATGGGTCCCTTGCCATGACAGCCTT -GTCGCACCTGATTCTGTTTGATTCAATTGGGGCCCTCTTGTGTGTTGCGGTTGATGTGCT -GGGCAATTTTGAAGTCTGGAAGCGATCGACTATCCGCCACCCGTTCGGCCTTGAACGTGC -AGAAGTTCTAGCCGGATTTGCGATGGCTGTGCTTCTTTTGTTTATGGGGATGGATCTTAT -CTCTCATAACTTGCAACATTTCCTCGAGAAGTCTGGGCATGAGCCTCATCGCCCGCACGA -GCATGATCGGGTCTCTCTGGGTAGCGTGGATGTGACTGCAGTTCTAGCTATCTCGTCAAC -ACTTGTGTCGGCAATTGGTCTCAAAAACCACGCTCGAATTGGGAAGGCTATGCGATTTGG -GTACATTCAATCTCTTCCGTCGGTCTTGAGCAACCCATCTCACTTCCTTACCCTCTCTTG -TTCAACTCTGCTCCTGCTTTTGCCTCTGGTCTCAATCAGGATCTATGATTGGCTTGATAA -GCTCCTGTCTGGCACCATTGCCTTCTCAATGTGCTTCCTAGGAACTCGCTTAGTGAAGAC -TCTAGGTTCCATGCTTCTCATGTCCTACTCCGGCCAGGGAGTTTCTGATGTCATCAAAGA -TATTGAAGCTGACCCTTCTGTGTTCGGGATTGATGACGCCCGATTCTGGCAGGTCCACTA -TGGGTTATGCATGGCAAAGCTCAAGCTCCGAGTCACTGGTTCCGAGGATAACCTTGCCCG -GCTACGAGAGAAGATATCCAGCTTGATTCGCAACAGGCTAGGCGGTGGTTATGGCACTGG -AGGTCAAAAATGGGAGGTATCTTTACAATTCACGATTGAGCGTGCATGATACTTTATCTG -TCAGTTGTGCATATGTCTCAATCAGTCTACGGAATAGCTCTGTATATCATGACCTTTTAC -CCCATATCGAGCATACATGCCCCCAGGTTCAACAAATAAGTGTTCGGGAACATCTACTTT -GAAGGAATTTAAAGGAATGTGCAGTAGCATGTAAGAACCAATTCGCAAATCTCGTGATAT -GATTCATGGTCTAGTCTGCACATCCCCATATCAATTTCAGAAAACGAAGAATGCACAAAG -AATAGATAGGAACTGGCTGGTGTGGATATTCCATCACCGAGACAAAGATCTCAGGTTCGT -TGACTTAGGGCCAAACTATACTATGATTTTTTTTTTGCTTATTGGACTATGTACTGCCCT -GCTTCCTCttctattttctattgtctatatgtatttctatatatctaAGATTAAGTTTAA -AGAGACGATCTTGTCTCTTTCATCGGGGATCATACTCCACCTCTCCCATCAATAAGAAAC -TCTCCACGGAAATGCCTCTCCACTCCTTCTTTCCATCCACCATTTTGAAGAATTCAGGCT -CGAAATCAATGACCTTCCTCCACTCCCGCATTAACCGACGCCAGTAACACACCTCCGCCG -CAGGACTGAGATACCTCTCGCGGAAAATTCGAATGCTATTGTCCGCAATTTGTTTGGCCT -CATCCTCATGAGACAAAAGATGCTGCATTTGGCGCTCCAGATCTGACCAGTCACGTCTTA -CCTGGACAAAGTTTTGCTCGGGTCCTGAGGACTTCAGGAGATGATAGTAGTGCTGGATCC -ACTCCAGCTCGTGCGAGACAACCACACTCCTGCAACTTTGAAGATACTTCAGACGACCAG -AGTACGTATTCCCTTCTGTCTGCGCAACATATTGGTACTCGCAGTGTTGTGGCATAGTCT -TGTAGTCGTTTTTTAGACTATCCTTGTCGTTCCACAAAAGGGCCTTCACATCCGCCCAAG -GCTTGTCTTTCGTCACTTGAATAAGCTTATTCCGGATTCTTGGGCCCATGCTGGGCACTC -CGCGCCAGAGCAGCTTGGCCGTTTTATTGCTCCAGGCCAGCCCTTCCTCTTCCGCCCATT -CGGCTTTAGCTTGCACCTCGCGTGTAGATCCAGCTTTGGTCTCTGGCCAAGACCAGTATC -CAAAATCCGGGATCAACCATACAGACTTGTCTTTGGCCCGACGAGCATATCCCCAAAGAG -GCAATTCATCAACCGAGTCGTCTGTGTTGAAAGCGAATTCAATATTAGGGAGCGGCTCCG -GCGATGAAAGTATCGCTCGATTTATCGCAGACAGCGTAGCCTTTTCGCGAGACCAAATAG -AGCCTTCCTTGTCGATCACATACAACTAGATCACACTTAGCACTACTCTCGTGATCGACA -AAACGGCCATGCTGCTAACCTGCTGGTCATATATCATAGCTCGAATATATCCATTTCGGG -CCGGGATCGCATCCAGCTCCTCGAGAGATATCGGACGGGAGTTGCGAGTGTCCCTAGCGC -GATCAACTTCGGCGTATAGATCAGGAAATGCCTGGTCACATTGATCTTGCGTGAGCATCA -GGTTATCTCTATCGCGCTGGTAATTCCAGGAGCCATCAAATAAATGCTCTCGATTTGGCT -TCTTAGCAGGAAATTTGTCAAGTACTCCATTGTGGGTGAGTGGTCGTGGCTCAGGTTGAT -AGTAAACATAGAGCCCTAGTACTAGGAGGATACAGGGCAGAAGAGCTGCCACCCGTTTGA -TTGAGGTCGGCATACCTCCAGGACCTCCGGGGAGAAAAATCAATTGAAGATGGCGGCAGA -GGGTCTCGCCTTGAAAGATAAAAAAAAATCATGATAAGACTAGGGCACAATATTTAGACG -TGGGAGATAACCTTAAATGGGGCTGAGCATTGAATTGTGTCATATTTAAAGGGGAATAGA -TGAGAATTTCGTTCTCGAGACATCAACTGGACCTGCATCTTCATTACAAGAACTACAATA -TAGACGATGAATGTGCTTTCAAGGCAAAAGGGGGAAAGTGCGCCATTTGGCGTGTCGAAT -TACTCATCGAAAAATGTCGGGGTCCAATCAATGGTGTTCGAGTCTCTGTATTTTAGTATC -CCCTTTCCTAGCAATTTATATGGGCGGGTTACTAATTCTAAATTACAGAACATGCTGGAT -TAATGAATCCTCCGGTATGCCGGTCTTCATGCTCCGGCATGACGTTGTGGCCTTGTAATC -TAGGTGTCTACTAATTTAGGTGGACCACGTGGTAGCGATGTTTTTGTGTTGGTCGGAACA -TACGGCCAGGAATCTATACAACACTCCTTTGTTTAATTCCTGGCTTGAAGCGGGGGATAT -CGACCTATATATGGATATATATATGTAGGTTGGTAATTTCAAATTGCCTTGGAAGAGTGA -TTCTTGTTACGCGAGGGCTAGAAAATAGCCATGCAGTTAGGCGGACTAGAAACTCCTCGG -GAGCAGTTCAACGGACTACTAGAGATAAAGAAACTCCAACGAGCTAAACGCTTGCATGTT -GCAGCCAACTCATCCAAATCAAACATTTCCCCAATAGGGATACCCCACCCAGCCAGAAGA -TGATGATGCAAATCAAAGTCAGGGTCCAGAGCTGGGTAAACCTCGAAGCTAATAGCATCG -GACGCAACAGCGGCAAAATGATTATCCCATATAAAGCGCAGCACCCGCTCACTCTGTTCG -ATTCCAGCATGTTGCGGCATCCCCTGCTGACTATAAGCCACCCGCTCCTCGGTTGACATC -CGCTCCCACGTCTCAGGCAAGCCAACCCGCAAGAAAAGGATATCACCCTTCTGGAATATG -ATGTTGCATTCGCGCGCGATCTCCTGTACCTCATCAAGCGAGATCATCTGCCGACTCAGC -GCATTGACGCTGATGCCTTTCTTCTCCGCGTAGGAGACGTAATCGATTAGCACGCCACGC -CCGGCAATGCCCTTCTTCGCCCAAAAGCCAATGCCAATCCGTGACGAGGATTCGCTTAAA -ATCTCCTCTGCAGTCGTGCCACCATAAAAGAGACGTCGGTCTGGATCTTCTGGTGTTGGG -GCTGGCGCATTGTGGTGTCGGAGGCCATCCCACTGGGATGATTGCTGTGGGTTGAAGTGA -TATTCATCGTCGAAAGCAACACCCTCGGCGACCCATTTAACTCGGTGCTCGAATGGCTTG -CGGCCGAACCCTGATTCCTCGAGTCATTAGTCTTACTGTGTGTTTCAAGTTGATTCGACG -GTCAGGTTCCCTTGTTTTACCTGGTGGGCTCAAATTCTCCATATCCCAGTTTAAGCATAC -CCTCTCGCCGGTCTGGATTTGTGTTCGCGCGGCTTGGGCGACTAGCTCAGGTGTTAAAAG -GCGGAGCATTCCCAGACCTTCCTCTTCTGACCCTGCGGTTCCAGGCCAGAAGCGACGCTT -GTCGGGGAGATCATCGAATGTCTCTGGGAGATTGGGAATGCCGGACATGGTTTGGGAGTG -GCGATGCAAACTGAAGACCTTTTCACTGGTCAGGAAAACAGGTCTGTGATTCATATAGTG -GGAGCGACCCTCAAACGGTTGTTATACCCCTCATCTCGGAGGAGATCCGGTCTATTTGAC -GTATGACCGCTATGTAAGCCGAAAGGTCCCCAATCTAGTTCAAATCAAAATAGAACAACT -TCTCAATAAGATTGGTGTTTTGGGTGGACTTGGGACAATGTTCGATGTTGGATTGGATGA -GTCAAAGTCATCTGAAATTATATCCCAACAGGATGAATTGGTACCAAAAGGATTGCAGAA -TTTCACTTCTGATGCAAGTTGGAAGCAAATACAGGCGCACCAGATGTACTAAGCCCTACC -ACCTATCACCGCTGTTATTCTGAGCAGAATGATACTTTGGGCGTTCCTTCCCATCGTCTC -CTAATGATTCTCTTTCCTCTTTCTTTCGGGAAGGTACGTTGTCTCCACCGGTGAGGATCA -ACCTCGCCCTCGTCCTCGTCCTCGTCAAATCCATACCTAGAACCTTCCCCTAAGCCCGTT -CTCAACTTCCCATCTCTCATCAAGAGCTCCACACGTATGGCATATTCTATCTCCAAATTG -ACCAAACGTATTCAGATGGAAGTCATATGTCAAGTTGTTGCCAATGAGATTGTAGTCAAA -AGTCAATGTTGTACCAGTATGCATATAGAAGCTACAGAAAGTAAAGTACAGGGTACAAGA -GCCAAAAGAGCGCAACACACAAATTTACATATCATCCAGGTCGTCATTAACGTTCGACCC -ACCCTTCTGCATATTCGACATTTCATACTCTTCCTTTCGGTCTGAACCAGCTCGCTCCCC -AGCCGACATAGCAATCGCCTGGCGTCCAGTGTCGGAGGATTCGAGGAACTCGCGCAGGCG -GCGTTTGCCTTCTACCAGAGCTTGCTGGGAGGCGGCCTCAGTCATCTTCAGCATGGGGCC -CCACAGATTGAGGTGATAGGTGATGTATGCGCCAATGGCACACGCAAATAGCAGGATAAA -GTATGCGGGGTTGCGCAAGACTGCAGAATATGTGTCAGTAGGCGCGAAAATGGCGGATGT -TATCAATGAAAACTTACCAGCCCAGATTTCGTTCCAGCCTAGAGCCAGCAACAAACCGTA -GAAATACAAGGGGACTTGTGTCATACCGCCAATGGCACTCCGCTTGGCCTCGACGTAGAC -ACCGTCGGCGGTCTTCTTAAATCGTACAGTGAGCTCTTGGCGCTTGGTATCGCTAAGAAT -TGTCATCTCTTCTTCCAAACTCTTGCCCTCGTCTTCGTCCACACCACCGATGGGGGCCAG -ATCCTCCTCATCCGCAGGTGTTGCAGTGCTTGGGGTGTGTCCAATCCATCGGTCAAGCGG -GGGTGGACCACCAGTCCTTTCCAGGCGGAAACGAGATAGAAGAGGAATCAGCGTCAAGGT -AGATTCGCGAGCACGGGTGTAAATGCCCTCGATGTCATCGGTTGGGCGCCAGATGCGTGG -TACACCAGCTTCGTCATAGCGAAACTTATCCTCGAAGTTCTCCCGAAGCTTAAGGAGCAA -ATTTCCCTCAATCATTTCCTCTTCGATCTTGGCACGGAGAACACCCCAAGACTTACGGCG -CAGTCGCCAAAGACCAACATCAACTTCGTCAATGCTAGCATCGAAGCTGCTCGCACGGTC -CGTGAATCTCCGCTCAGCGTCAAGAACAGTATCTTCAAACAAGTTCCAGATGCGATCCCA -GAAGTCCTTTTCCGTAGGCTTCTCCCCGTTCTCGGGGGCACCACCGCCAGCTCTTCCAGA -ACCGAGGGCGTTGAACTCTAGCCCCACAGATTCACCCAGACGAGATTGAACCCACCGCTC -GACGCGGGTAGCCAGCCGTCTCATTTCATCACGGCGTAGTCGGGCACTGACCTCAGCCAA -GTCTTTCTTATACAAGGATAGCTCTTGCTTGTAGTTGCTCCAGGAAGCTCCATCCACCAA -TGTGGAGCGGGCGACCTCCTCGAACTTCTCCAATGAAGACTGGGCCTCCTTGGTGACGAT -TTCGGCGAAGTCATAACTACCGCCCTTCTTCTGGCCATCCTTCACAGCAGCAGTGACCGA -ATCACTGAAATCCTGGATTCCAGACTTGTGGGCTGCAGATAGCTGGCCGTGGAAGAGTGC -CTTAAGGCGTGTATCGACCTTGCCTTGAAGCTCGGCGCTTTTCCGTTGGTAAACACCCTT -GTGGTATCGGCTGGCTTCTGTCTCAAAGCTCTTTAGGGTCTTGGCGCGCGCAGCACGCAT -GGCAGCACCAAGTCCCCCTAGGACCTCAGGTGCACCTAGACGCACTGATTCGGCCTGTTT -GTCTTCAAATCCGACGATCGCCTCATCGAAGCCTGTCATTACCTCGCGTAATATTTCGTC -ACAACGGAATTGCGCGAGCAACTCCTGCTGTGTGGGCAAATCTAAGTCTTTGTTGTTGAC -AATTTGATCCCAAATTCCCTCCGCATAATGTGCGAATCCGTCTGCTGGGATACGGCGATG -GTACTCAGGTAAGAAAACCCCTCCTTCAGAGAACTCGCCCTTGAGTGCATCCCTCTTTGG -GTCTCGGTGACCTTCCCGGAAACGCAGGCCAAGCTTCTTTGCTTCTGTCACAAACTGGTC -AGGCTGGTAACCCTTATGCGGCAGTCCATAGAATTGGAAATCAAAGTAGTCGTGGATCGT -CGACTTCTCCAAGCCCGCAGGTTTTGATATAGTATCCCAGAGACGGGAAAGGTCTTCCAA -TAAGGTCTTCTGCAAGTTCTTGAGTGGTGTGTTACCGATGAAATCACGAATGACGAAGAA -TAGGAGGGAACGGTGGGTAGTACTGTAGAGATATTAGCACCCAAAGCGCATAGAAAAGGG -GGCGATCAGAATGATACATACTGTTTATCCTTTAGGAACAGCTGCAAGTTAACTTCGAAC -ACGGTCTTCAACAACCCCATGTTGGCACCTTGATACAACCCCACCTGGTGTTCCCAGATG -TTGACAATGAGGACTTCACTTGTCGCAAGCGCAAAGAGCGCACTCTTGCGCTCAAAATCT -TGATCCTCACCTCTCTCGCGACCATCAGTCCCCTCCACATCCATGACCAAGATGTTATCA -GCCATGCGCTCCGCCGCGCCAGGTTCGCCTTGCTTCTTGTTGTTCGACATCCAGATACCC -TTTGTTGTTTGTCTTCTTTCCAGCTCCGACATCACAGAGAACTCGGTCCCGAACAACTGA -TTGAGCAGCGTGGACTTTCCGGTGGATTGCGAACCAAACACGGAGATGAGATGGTAGTTG -AAACCAGCCGACGTAACATTCTCGAGGGAAAGATACTTGGATAGGTTAGGACTATAGCGA -TGCGATCAGCTTTGAAGCCTCATAATTGGGCGAACCATAGTAAAGGGAGGTTGCCGTACT -TGAACTCCTTATTCTCATCAATCACCTGCACGCCGTGTTCATACAAGGCAGCATCGCCTT -CGGCACCGATCGAAGCAAAGTGGCCATTGGAGAGGATCGGCGGCGCAGTTGGACTCCTTT -CCAACCCCGATGGGCGTCGTTCAGTCATGTTTATCTTCCTCTGCTCTGTGTATGTTGGGG -GAGGGGGGAGGTGTTGTGGCAACAGCGAGTTGAAAGAGCAAAGATGTACTAGGATCTAGG -AGACCATGCAACTAGAAAGAGCAACAGACTAATCAAAGGAGAAAAAGAGAAAAATTCAGG -GATCACACGGTTGTTTCACGGGTTCATTGAGATGATCAAGTCCCGAGGTCGATGCTGGGG -AAAACAGTCTTACATAAACCCCTAAAACTCCGGTAACGTCATTTTTCAGACCACTCTGGA -CCTGGGttacttgtttttattttaatatattctgtattttatCAACGAAATATTGAGATG -TCATTTAATAACATCGACATTGCAGGAAATCAATACTATCTGACCAGTTGGACTTGACTG -GTGTTTGACACCAGATCTAGAGCCTCGATCATGCAATCAAATGATAGAAATCGCATTCCC -CTCGACGGGATATGTATATCGTCAACTCTTCTAATTTACAACATGTACAGACTGTAGACA -TTAATATTAATTTACAGGGGGATCTGTATTTACAGTGATGGCACACCATCTGACCAGTTT -AGATTCCAGCCCTGATCAAGTCCGAGGTCAACGCTGCGGTTTGGCCACATATACTGCCCA -AAATCATCGAACAGTCCACTGAACGCGTTGAACTCCAAGCCAGGATGTGGGTTTGTCACA -TTTTCACTCCTCGAAAGCCATGGCTCGTCTGGATATCCAGGCGTCATCGTGGGTGTGATA -GAAGTACCATTCAAGGCGTCAGTGTTGGTACTTGGCGCACCTTCAGGGTCAGATATCGGC -GTAGTATACAGCTGACTAGGACACTGTACCGAAGAAGAGACCTCCGTTTGCACTGTGTCT -GATGGTATTGACGTCTGCTCCTTGCTTTGGGAGCGACCCTTGAAGAACTTGCTTCCTGCA -CTGACCGTAATAGTACCAAAATAGGGCACTGTAATCTTGCAGCTCGATATAGCGCACCAA -TCAGGCTTGATATCTCTGGTATTGTAGATGGCGCCGAGGATCTTCGCAGACTCGGCAGCC -ACTGAGCCTCCTAGCTCTGTTGCCGCCAGGTGAAGGGTCTCGACGACCTTCTTTACCATA -TCCCAGTCTTGTTCATCTTGCTCTTTACTGTGATCTGGACTCTCGTCGGAATAACCTTGC -AGGTGTATTATCAAAAGGAGACATAGCGTAAAGGCCATGAAGTCAATCATTTTGCACAGA -GAGGGCTTAGTGGTGGATCGGAGGACCTTATATAGAATGAGCCCTTCTCTTGCGGATTGA -ATCGCAATCGTGTGGCAGGGAAGGAATTGAGGCTCATGGGGGGATTGCAGCATCAATGGG -AGATGGAGAAAGGCACGCAGTAGCTTGAGCATGAATTGCAACGGGACCCTCTCATTGTAG -TGTTCTCGTTTCTCGTTTTGGCAGGGCTCTGCGGCTTGAAAGTGACTTGGCATAGATTTC -CAAGAATCCTGCAGCTCTTGGTCCAGTCGTAAAGTTGCTGCGAGGGTCATCTTGGAAGGG -TCTTGGTTTCGGTCAACCATATGTCCCGTGATAATGCCAATGCGCAACATGTATTCCTCA -GGAGCTGATAAGTTGGAATTAAGACGTTGCTCTACCTGTGGCAAAAAATATTGATCTGTA -ATACCATAAGGCAGGCCAAGGATCAAACTTAGGAACCGATCCGATGTTGCCAGGAAGCAC -CAAATTTTCAAGCGCCTTTCATATAAAGAATCGCCTGGCCTCGGAACTTGAGTGGACAGG -TGCATTCCAGCTAGATGGGCAAACTCGATTGCCCGACGAATAACCAGCCAAGCTTTCCGG -AGCCGGCCATCAGTCATGTGAACCCTTTGGAGCAACATCTGACATTCAATACCGGGCAAA -GTAGCTGCGAAGTCATCGTCACGGACAATCAAACGGTCCACGATCTCAGTGCATTTCTCA -TAGAACTTTTGAGGCTCAAATGGCACGATTAAAGCATTAAAGTCAAATTCAGATGGGGCG -TGAATCGCACTGATGCACAGGCACATGAGTGCCCTAGCGACTTCGGCGGGAGCCACTAGA -GTATCGCGACAATGGCCATTAAAAACCAGTCGGTCGGCATGTCGATCGAACATATCCGGG -AAATGATCATGCCACACAAACAAATGGAAGGACGCGTTTATTATATTATTCATATCCTCT -GTGGGAGGAAGGAGAGACATGAGTTCTGCGCGCACCGCTTCGGCCTTTGGCGACATGTCC -TGAGCACCAGCGAATTTATCATTATTCGACGAGTCCTTTTCGCGGCTCACGATATAGTTA -TCAAACAGTTGGAGTACCGGCGCATTGCAGATTTGATCACTAGACGGGCCTAGTTCATCT -GTCTCGGTAGCTAGTGGCCTTGAATGGGATTGTGCACCGTCCGCTACATAATGTCAGTCA -CGCGACATCAATCCGCACGCGTCTGGGATATACCAACGTTGATTTGTCCCATGGTGCTCA -CAAGATGGCATGCGTGAATCCTGCTGGTCTAGTCGTTTGGCCAACCCCTCGACAAGGTTC -TCCAGGTGTGCTACTCGCTCGCGGAGACTATATGGCTGTTCGCCTAGTTTGGAGTTCCGC -TGGTAGCCGGATAATGGAGGGGTAATAACTGTATGTTCTTGATCAACACATTTCATGCCT -CTGGACCGACATTCATTACAAACTGTCGGGCGGTTGGGGTGAAATGTGCATCGAATCTTG -CGTCGCCGACCTATAGTTAAACGCTATTAGAGTAGACATATCAAATCACTCTCTTGACAC -ATACATTCTGTGCAACTTCTGGTCCCCTTTCTCATTCTCTTTCGTGCCGGCTCTAGGTCT -GTGCAGAGTTCCATCTTGATCTGTACAGTGAAAGGGCAAGGATAAGAAGATTATATGTTC -ACTGATGTGTTGTGAGAGGAGTTGATCAGTAGGTTGTGCAGCATCCTCTTATGGTGAGGA -ATACTCTTTGGAGTAAGACAAAATGACAGATCAAGGGATGTAAAAATTCCGAAGAGTCAA -GCTAAGCCTGAATCTCAGATCAATCCTACGTAGTCTCGAACAAATGAACAAAAAAGGAGC -CCACAGTCGTTGGATAGTTGGCTCTCATAGATATGTTCGTCCGTCAAGTTCGGCCGAAGT -GGTGCGACATTTAGTCCGGGCCAATCAGACGCAATATCTTTCGTCCGTTAAGTCCGTCGT -ACGGGGTTAATTTCATCTATAAAACCGTGTTGGGGATTTCTCTTGCTATTTATTCTATCT -GCTTTTTAAATATCTCCATGAATTCACGCCTATTCATCCCAATCTGCCTCAATCAATCTA -CCAAGCACCGAGAGGCTGCAAGTATTGTTTCATATCTGCTGCCTGTAGTTTATGATAGTT -CTGCCCAGGGTGGTAATGATTCCACCAAATACAAGAGGTGCCATTTTCATTGAAACAAGT -GGCATTTGGAAACCCATATGTCTGGGGATCGTCCAGGATCTTTGGCATAAAGGACCAGGT -ATCGTAAAGAACGATAGTAGTCTACGATCAAGTTAGTTTTGTAAAAACGATGTAAGAAAT -CAAAACGTACGTCGGTGTGGCTTGATTTGAAGTCATTGATCATTGTTTGAAGGCCATTAT -TGTAAGCCTTCACCCACGTAGCATATGTTTTGACGGCTTCCGGTCCCTCTTTGATGATCA -TAGGACTCCGATCTGTAGGTGGGACATTGAGGAAAAGGAACTTCCGTCCGCCAGATGCAT -ATACCTTTTCAACGAGGAACCTGTACTGTGCCATCAATCTCGGCACAAGTACACTAGGCT -GAGTGTTTTGATGACCCCAGCCGATACTAAAAAAATGAAAACATTAGAACATAGCCGGCA -AGGGGGTATTAAGACGCAATACTCATTGATTCCAATCCAGAACCCAAAAATCGCATTCTC -TGACGTCCACGGCGCAGATGCAGGCTTCTTGCTATATGCTAATTCGAAATCTTGCACTTG -CGAAGTCACATCCTTGACCTTGGTATCCACAATTGAGTTGTCGATCGTCGCCCCTCCAAC -GGCGAAGTTGTAACTGAGTACCGGCGAAGCATTGTAAAGAGTGGTCAGGTATCCGACCCA -ATTGGTACCACCGCTCGTTGTACCAGTCCCTATTTGACTCAATGAGTATCTCGATTCAAT -CCTACTTGCCTATGTGAGTAGCTCTCACCTAGCGCGGGGTTTCCCATCGGGTTTGTGGCA -GATGGCTGTGTGCCATCGACAGTGAACTCAGTCTGGGTGTAGGAATCTCCGCTGTCGAGG -TCAATAACCTTCCTCCGATCGGTTACAGCAAATTTGGAATCTTACAATGTAAAAAGATAA -GTCATTGCATCTTGAGCTTGCGAGGGCGTTGAATCAACTGTGCTACTGCCAGCTACAGAA -CCAAGATCATGATGATTCATGTTGATATAGACCATGACCACAATCTTCGAAGGATATAAC -AAAAATTCCAGAAAATCCCGCGGCACATGTGCCCATAGCTTTTCTCTATATATGTGTTAG -GATTTTGTTTCCTTTTATCCGGTATTGCCTGTGGAGCTCCTAATCTATTTGATAAAATCA -TCTTTTGGTCGCATTTCTCCATTTCTCCGGATTGGTTTACGGGGTGATGTTACTAGCAAT -AACAACACAAATGGAACCGTGTAAGCGAAGGTCTGATAGCGTATCAGTGGATTGATATGA -ACTATGACCTAGAAGGCACAGGTGGAACTAGCTGACCCAATGAGAGATGAATCCAAACAC -CCCCTTATAACCACTATAACACGCGCCCTGGAAAATTACTATGTTGTACGGAGTAGAAAG -TTCCACTCCATGGATGCAAAACTTAGTAAAGCCGCAGTTTGGCCATATGGATATAGCAAG -GTTGTAAAGATAAACTCTGACTTATGTAGAGACAATGCTTGATGTGCGGAGTATACTTCA -TTTCTACCCCGTGTCGGAGGAGAGAAGTCCGTTGGTACTAAACTAATATTAGTGTGGCGT -TATTTTTCAAGGCATGCGGATGTAAATATAGAGAAAAACAAAAAACAGTATTCTTTGATG -ATAATTTTGCGTTTTATCCATCCATTTAAGGCGAATTTTGGCAGATTCATGCTAGCACAT -CGCCATTCAAATGCTGCTAGTACAGTAGTTTTGAACCAATAGTGCGCCAGGACGGATTTT -GGCTGTGGTGTTTTTTTGCAATATGAGCATTTTATACATCGTACATCTGGATACACTACG -GAGTAATAGCACAGTGTTTTCAAGACGGGGCAAACATCGCGATATCCCTGTTCCGATATA -TATATAGCCTTGCATATTGAACCAGCACGTCTCGAGGCTGTCAGGCGACATTGGGAAATG -GTTGTTTAATACGAAGGACAGAATGGTTCAAAGGAACATAGTCACCCACTTTTCGCTGTG -TATATATATATAACAGGTCTCAGCACATTTCAACCACAATATGGTAAACAATATACTACA -CTTTCACAAGGATGTCAAGGGCTTGTTAATCCATTCAGTGGAGTTAAAAAGGGCAAACTG -CCCAGGCCCGTTGAAGAAGTGGTTGATTAAGGTCTATGCATGATTGGATAGGGCATATTT -GAGAATCCGTAGCTACATAGATATATTCTGAACCGACGAGATCAGCCATTGGGCGAAACA -CTAATCAGTCAAAATGACCGTCTTCGCCATCAACAGGGCGGATGAGGTGCGAATACCACG -GAAGCAGAGAACGAGGTGTGAGCCAGGCGTTCATGTAACTCAATTGAGTATCTACAGGCT -AGCTTCTAGATTCGTTCTTATGGAATAGATAAGATCAGGGTCGACCAGTAGAAAGATGCA -CGTTGACTTGAGCACAGACTAAAATTATATATGATCCTAGGAAGAATTTGGTCTGAATAC -ATTTTCACGGGAGTGGAGAGATGAAAGAAAATCCGACTACATCTTCAAAACAGTAGAGGT -ACATGAACGATATCAAGCGAAAACTCCGGTTTACCCCCGGATCCCTCGTGCTGATCGATG -ATTCCCGCGCTGTGGAATATAAGGTGCCAAAACAAACAAGCTGTTAAAGAAAAGGGGTTC -ATAATACAAGGGTCAGACACATCGCCTGCCTGACCATGGGAGCGAGCGGACCATTTACTC -GACGACACGGGTAATCAGACCAGTGGCAACAGTACGGCCACCCTCACGGATGTTGAAGCG -CTGACCAGGCTCGGCGGCAACAGGGTGGTGGGTCTTGAGGACCATCTCAACGTTGTCACC -GGGTTGGACACGTCTGGCCATGTCGCCATCGGGGAAGCTGAGATCACCAGCCTCATCTAG -AGCATAATTAGTTATCAATTGTATCAATTTAATGATATGGGATTTGGGCGAAATTGAACT -TACCAGCAGTGCGGATGTAGACCTGGGGGCGGTAGTTGGCGCCGAAACCAGTACGGCGAC -CACCCTCAGCCTCGGTCAAAACGTACATGGACACAAGGAACTTGTTGTTGGCCTTGGTGC -TGTTAGGGGCAGCAACGACCATGCCACGGCGGATATCCTCACGGCGGATACCGCGGAGAA -GGAGACCGGAGTTGTCACCAGCACGGGACTCGTCACAGGACTTCTTGAAGGTCTCAATGT -CGGTAACCTTGGTCTTAATAGGGTTGCCAGAGTCGGTGGCACCGACAATTTCAACCTCAG -TATCCTTCTTCAGGAGACCACGCTCGACACGACCAGAGACGACAGTACCACGGCCAGGAA -TAGAGAAGACTTCCTCAACGGACATAAGGAAGGGCTTATCCATATCACGCTCGGGGGTAG -GGATCCAAGTGTCGACGGCCTTCATGAGCTTATCAATCTGCTCGGTACCAATATCGGGGC -GGCGGTCCTCGAGAGCGCAGAGAGCAGAGCCGAAGACGATAGGAGTCTCCTCACCCTCGA -ATCCGTAAGAGGAGAGCAGTTCACGCATCTCGAGCTCGACGAGCTCAAGCATCTCAGGGT -CCTCGACAGCGTCGACCTTGTTAACGAACACAACAATCTTCTGAACACCGACCTGACGGG -CAAGCAGCAAGTGCTCACGGGTCTGGGGCCTGTTGGGTAATTTGGGTCAGCACATTCTTC -CGACTAGCTTTATAGGCTGGGGGCCGTACATCTGACCGTCGGAGGCAGCAACGACGACAA -TGGCACCGTCCATGTTGGCGGCACCAGTAATCATGTTCTTAATGTAATCGGCGTGACCGG -GGCAGTCGACGTGGGCGTAGTGACGATCCTCAGTTGCGAACTCAATGTGGGCAGTGGAAA -TAGTGATACCACGCTTGCGCTCCTCAGGGGCCTTGTCAATGGAGGCATAGTCGAGGAAGT -TGGCAAGGCCCTTAGAGGCCTGGTGCTTTGTGATGGCGGCAGTCAGGGTAGTCTATGGGC -GTACGAGTTAAATTCGATCTTCGGGTGGCGGAGGCATCGGAACAAGTCGCGAATGGCTTA -CCTTGCCGTGATCAACGTGACCGATGGTGCCTGAGCAATAACCTTCAGCATGGAGACGTA -GAATGCGACAGCGATAGGATCTTACCAATGTTAACGTGGGGCTTGTTACGCTCGAAAGCA -GTGGCCAATCCACGGCCGGCCACCTTGTTGCTGAAGACATGGTGGACGGGGTTGACGGTA -GTCGGACGCAGCAGGGAGGACCGGCTGGTGCGCGCCAGGAGGTTCAATGAACGGGAGATG -GTCGACATCTTGGTTCAATCGAAACAAAAACGATGGAACAAGAACAAAAAGGAAAGATTA -AACGGGTGGAGTAAGGATGGAAGAGAAGAAACCCAGATCAACACCAAATTTTTTAGTAAG -ATGGTTTCCGGGATGAATGGATCCTCAGGGCAGGCTTAGTCATCGACCAATCACGGACCT -CGAATTGCGGACAGGATAGATGTTGAACACGGGAGATAAAACATGAATGTACGTATCTAA -GGATGTTTCTTGATGCTCAGTGGCATTCTATCAACATCGAAGGTCTCAATGGCCTAAGGT -CTCTAGGGCTACTTCCAAAGTATATAGGGGAGATATCAGGTATCATGGTGTAGGATTCAA -CAAGTTCATATCCTTTTATATCCCTACAAGTGATAACAAACAGCGCTATACAACTGATGC -CATGTTGTAAGGTTCAATGCCATATAGTATACATTTGACCAGTCCTCTGTGCTAGTTCAT -TTTCTGTGCAGTGATCTTAAGTTGCTTTCCTGGCTCTCCCTCCATTGCTTTGGTCTGGGT -TCCACCAGCTTCTCGGATTGTGTCCAACACACTCTGCATGACCTGCTTGATCTCATCAAC -TGTGGGGCTGCGCTTGCGTTTCTTGGTGGTCAATAGGATTTCCACCCTGCGACCTTTCTC -CAAAAAGGTCGAGAGTTGCTTTAGGCGGTGCGACAGGTCATGGGCATCGATTGCCCAGTT -CAGCTCAATTTGCTTGAATGTAACCTTGGTCTCACGAGCTGACTTGGCTTTGGCCTTTTC -ACGTGCTCGCACTTCTTTCTTACTGTAGACCTTGCAAACTGGCGGGTTGTTATAATCTCC -TTCTTGCACCTGGAGAAGAAAGAACTCGGCATGGTCAAAGCTTTCAAGGACGTCCTCTAG -TCTGGTCGGAGGGTCAAGGTCGCCTTCATCGTTCACAACTTGAACAAATGGTGCACGGAT -AGCATCGTTTATCGGGGGCTTTTCAACTGGTGGCCGATATTCTCGAAGCTCTAGAAAACG -GGATTGTTGAAAAGGTCGAGTCTGAGGGGTGTTTGGAAGAAGCCTGGAAGGAAGAATGCC -AACTCTGGAGCTGCGAACGGATGTAGTGAATATCTGACGTAGGGCCTGGGCGGATGATAG -GAGACCGCGTGTGTGGTTCATGTCGTCGGCTGGTAGAGAGAAATGCAGTAGGAGTGACAG -AAGGGCGGTAAGTGCACCAGGGAGGGAAATTATAATAATGGTTTGCAACGATAAAGCACA -GAGAGGATTATTGACTTGAACAGTCGACAGGCCGCTAATACGGGGAAGTCAAATACGACT -GAATGCAAACATGACAAACATTACTAAACCACCATTTGCCGGTACCGGGGGCTATCTACA -TACCAACTGGGCAAGTTCAGATGCTTCCGGCCAATTCATGATACCTATGTAGCTATCAAG -TGATACGACCTTATTATAGAGCAAGTATGTATGGCGCGTATTCTAAGTCATATATTTCGA -TATATAGGATAATGGACGTATCTCCCCCAGTTTGAGAGTCGGGATATAGGTACTGCCCAG -GTGGCAATAAATCCATAAATCATGTAATTTGCGCTAAAATTGCAGGTAGTGCCCTCTTTG -ATATGCTGGAGGATCAGTCTTTATATCAGTGAGATATGATTTTTCTTATAATATACATTC -CTAGTGGTATGTAGATCTTGAAAGTTCTCCCAAAGTTTGTATTTTTATGTACATCCATGC -ATGCATTTAACGCCACATCTGCGCCACAACCATGAAACGCTGATCTCCTTTCTCTCCTGA -CAATTTTCCCTTTTACAACCATCCACTTCTTACAAGACAGGCTTGGTGTTTTTGGAGGAC -ATTGGGATCAATGGTTGCTTCTTTCTTGCTTTCTTCCTCTACCCATACAATCGTTCAACC -ATTCTTGCGCTAGCGACAAACGCCACCTGTCACTGCACCTTTATAATTCGCCAACTGCTC -GAATGTCGACTGGAGTCGACAATCAAACCCATAACAGCAAATTTGACTACGTTGATTCCG -CCAGCCGTCATCGCTCCATCTCCCGCAGCGACGGATCTGCTATATCACAGTCCATTGATT -TCAATCAAACCTCCGGAAAACAACCCGTCAACATGGGACAGACTCAGAGATCGCGTTACT -TGAAGACGGGCGCAATCATTGCCTTCATTTTTATGGTATTAGTGTGGCTCTCCCCGTCCC -AGCCGAGTACTTCAAGTCTCACCAACGGTCGGTGCCCCCCTTGCACGCTGCCGTTTGCAG -AACTGACCATTTGGCAGAGCAACCACCTTCAACCGGTGGCGCATCCAATTCGAGAGTATG -CACCAAACCGTTTGACTCATCCAAGCCTCTTATTCAGTACGCTCTTATGATCGATGCCGG -CAGCACTGGGTCCCGGATTCATGTTTACCGATTCAACAACTGCGGCCCTACCCCCGAACT -TGAGAACGAGGTGTTCGAACAAACGAAGAAGAAGGAAGGAGGCTCCGGTCTCAGCTCCTT -TAGAGATGATGCGGAGGGTGCAGCCCTCAGTCTGGACCCTCTTATGGATGTCGCTGTGAA -GTCGGTTCCGGAGGAGTACAGGTCGTGCTCTCCGGTTGCCGTCAAGGCTACGGCCGGACT -TCGTCTTCTGGGTCCTGAAATGAGCGATAAGATTTTGGAAGCTGTTAGACACCGCCTGGA -GACCGTATACCCCTTCCCCGTTGTCTCCAAGGAGCAGGGTGGTGTTCAGATCATGGATGG -TTCGGACGAAGGTGTCTACGCTTGGATTACGACAAACTACCTGTTAGGCAAGATCGGTGG -CCCTGACGAGACTCCCACGGCCGCTATCTTTGATCTGGGTGGTGGTTCTACTCAGATTGT -CTTCCAGCCTACCTTCGAGCAGAGCAAGGCTGGTGGTATGCCCGAGCACCTCGCCGCTGG -TGACCACAAGTATGACCTCAAATTCGGTGGCCGCCAATTCGAGCTGTACCAGCACTCTCA -TCTAGGATACGGTCTCATGGCCGCCCGCGAGGCTATGAACACCGTTGTCGTAGAGGCGAT -GCTAGCTCAGAGCCCCAAGGACCTGGCCTGGGTGGAGCAGCCCATTTCTAACCCCTGCAT -TGGCCCTGGAATGAAGAAGACAGTCACACTCAAGTTTCCCGCTGACCACCCTCTGGGGCC -TAAGGTCGAAGTGACCATGGTGGGCCCGAAAGACAAGTCTATGGGCGCCCAGTGCCGTGG -TATCGCCGAGAAAATTTTGAAGAAGAGCGGCGAGTGCAAGCTTGCACCCTGCTCTTTCAA -TGGCGTGCACCAGCCCTCTCTCGCGAAGACTTTTGCCCGAGAAGACGTGTACATCTTCTC -TTATTTCTTCGACCGCACGGCTCCTCTTGGCATGCCGGAATCTTTTACTCTGGATGAACT -CAACCAGCTGACCTCCACCGTCTGTGCTGGTGAGAGTGAATGGAAGGGCTTCGAGGCCAT -TAAGTTTGAGGGTGAGGATAATGCGCTTGTGCAACTTCGGGATCGTCCAGATTGGTGTAT -GGACCTTAGCTTCATGATGGGTCTATTGCACACCGGTTACGAAATGCCCTTGTCCCGCGA -GGTCAAGATTGCCAAGAAGATCAAGAACAACGAGCTGGGCTGGTGTCTTGGTGCGAGGTT -AGTGCACTCCTTCCCGTCTCTTTCTAATCAGTATACTGACCTACTACAGCTTGCCTCTTC -TTAGCCAGGAATCTGGTTGGACTTGCCGCATCAAGGAAGTGGTCTAAACGAGACAGCGAA -CGCAACGCAAAATTATCACTTTTTAATAATGTCATTCGGTCATAGTGCATGATGGGTTCA -TGCATATATGGAAATTCGCGATATGTTCGTGTTTCTGTTTTACTACTTTCTACTTTTTCC -ATGTCTGTTACCTAGACTTGATTTCTGACCAGTGGCATACATATATAGGAAATGAATGTA -TGGGTGTGTTTATGCATGCAATTGGTTTGGAGCCATTTTTACAAAGGGGGAGAGAAAAAA -AGCAAAGCATTAGACGCTAAATTATAATACAGAGCCTAGGTACGACATGCAAGTAGATGT -TAAACCAGTAAATCGCCATGATTGCATCAAGACCACACATTACAACCATATGTCAGATAT -AATGCAACCCCACCAAACCAAACAAGTGTCTACTGAGTCATCTCCTCGATGGACGGCTTA -TCCGTGTTCGACTTCTTATTTTTACCGCCCATAAGCTTCCCAATGATGTCCGAGTCATTC -ATCTCCTGGGAACTCTTTCCATCTCCAGTGGCACCCTTAAGCCCTTCATCATCACTCTCC -TCATCACTGGAAGAATCACCATCGCGCTTCTCTTCCAAAACCCCAAGACCAAGATCCTGA -TATCAGTCAGCAAAGCCTGTCACTACCATCAAGGACAATCCAGAAACCCTACCATCTCAA -TATACTGCTCTCCATTCTCATCTGCATTATCGAGAATCAAATCCCCGGCTTCCCCAGCTT -CAATTTGCTTCTGCAAATCTTCATTTGCATCCTTCATCTTTGGCAAAAAAGCGTTAAGCC -GTGACAAAATATCCGAATCATGTTTCATGCGATGTATGCGCGGTTTCGGGCGGCCGGCGA -TGTGAGGTAGTGAGGGGGAAGGATCAGAGACATGTGCGTGGGTGCTCGACTCTCTTATGC -GATTCATTTCTCGGTCAGAGTGTTGTTCATCTTCTCTTTCGCTTTCGCTGTCGTCTTCCG -AAGAAGAGCCCGAGGAAGAGGTTGGGTCGGAGTCGGAGGACTGAACAGAGATGGCGCGCT -GTCGACGGGGCGCAGTGCGGTCTGTTAAGTTGATGGGCACTGGAGAGGGATTGCTTGCAG -CAGTCTCGGATCTTGTGGTCGCTTTAGTCATATTTTGGGTATTTGGAGGTATGCGATTTG -ATGGGGCTCGCAGAGATTTCCGGGAGATTGTGATGCGCTGATGTGGGGGTGGAGTGTTGG -AATGGGTTTTGGCAAGATATTTCGATATTTTGTATATATTTCAATGCAATGTCCAATAGC -CCGCCGATTTTTTCTGTGTCTTGGAAGGGTGGATGTTGCCGAGTCTGGTTGGAAGTCGGA -CATTTTCAACTTTTTTGGGCGGAGAACTGATAACGGATAAGCTTATCGGCTATCGATGCT -GGATCCATCCATGTGATCTAGGTAGTATGTACAACAACATCGAAGTGAACACGAAGTGGA -ATATTTGAGGGTATAAATCGTAGTTTGCAAGGAAATTTGGACGCTTATAATGCTTAAAGC -CATACATTTTATTATTACAGGGTTTATCGAGAAGAATAAAAGCAAGCCAGTAAAAAAGCC -TCACTGGTCTCTGTTGTACATGGCAAAGCATCAGTGTGAAAATCAACGGCTCCGGTGCCT -Tctgcgactgcgacttcggcgtcggcttgggcttcggcgtcgacttggacttcttcgtcg -acttgggctccttcggcgacgacttggacttcttcgtcgacttggactCCTTCGGCGACT -AGGTGTTTGTCGACGAGGTCGGCGATCATCTGCAtccttctctttgtctttttccttttc -cctctccttttccttttcTATGCCAACCTCAAGAGCTTCCTCTTTAACATGAGTCGGGTC -CTTATCCCGTTCAGATATAGGTTTGTAACGGTCAATGTCGTTTAGCCGGGGTCGCGATTC -TCGCTCTCGCTCTGGTGATCGGACCCGGCGGCGGGGTGATTTGCTCCGGTTGCTGGTGGG -GGGAACATAGCGGTCAATATCTAAAGTGGgagaaggcgagcgacgccgagatcgagatct -ccgtcgagtagatcgagagcgtgaccgagggcgatcgcgagggcgagggcgcgagcgtga -gcggctgcgagaacgtcggcgtacaggtgagcgagagGGCCGACTGCGTCGAGGGCGATA -GTTGTCATGATCTCGGTCTCGGTTGCGGCTGCGGTCTTTGGACCGGGATCGATCCCGTAT -TCTATCTCTATCTCGGCCAACACTGCGGTCTCGCTCACGCCGGCCGGCGGATCGACTGCG -ACTGCGATGGCTTGGTCTGTAGCTTCCACCACCTCGGTCATCCCGAAAGTCGCGGTAGCC -ACGGCGTGCATCCATATCACGATCTCGAGACCTCACAGAAGTATCCCGGGACCGTTCACG -TCGACGGTCTGTATCATAACGGGATGTGCGCCGTCGGCGCGAGCGACTACGGCTTCGATC -CCGCGTCAAATGTCGGGCCCGATCACCCCGGTCTGCAAGTGAACCACGCCTTGGGTCAGT -ATCTCGGGATATGCTGACTGCTCTGGTGGATGTATTGGAAAATCTGGAATCATTCGCACT -CTTGGCAGGCCCTAATGTTGACCCTGGCGGTTTCAGTCCATTCTCAATAGCCTCGGCCTC -TTCAAAGTCAAACTCAGGCTTCTGCCGAGCCTTGGCTGCAAGTTCTTCGCCTTCTTTCAG -AAGCATTTGCAAAGCAGCTTCCTCGAGTGACTTCTCATCCACAAGCGGGGCTAACACAGG -AGTTGGAGCATTCGATGTAGATGATTTATCGTGCTTCGGATCTCGTGGCCAAGGGGATAA -GCCACGGTCAGAGCGATAGGCCGGAGAGCGATCACGGAAACGGTCTCTGTCTCGAGTTCG -ACTGCGGTCGCGGTCCCAGCCTCGATACCGctccctatcttctcgtcttcgtcgctcata -tctttcatctcgctctctttctcgctgctcGTCGAGAATTCGTTGCTCTTCACGTTGGCG -CTCACGATCAGCTCGCCTCTGTTCATCAATCTCCTTTCTTCGGGCCCGGTCTTCCTCATC -TTTCTGACGTTCAAGTTCGCGCTTCTTCTTGCGTTCTTCCTCCCTGGTCCGTCTTTCCTC -CTCCTCAATCTCCCTCTGTTTGCGCTCCTCTTCACGCCACACTTTTTCCCGCTTCTCTCG -CTGTGCGCTGACATATGATTCGTAATCTGCATCCGTTTTGTTCCCTAGTTCGAGTTCACG -GGCGGCTACTTCCTCTCCAACGTCCTGGCATCGGATCTCACGAACAGATTCGAGGATAAA -ATCCAGGTGTTTCGATGCTAATTGGTCGATAGAGTCCTCCACGCTCTTGTAAACATCACC -ACGATCCACCGCGCCCTCAATGAGTGTCGCTGCTTTTCCTCTCTCGCGGGACAGCAGTGC -TGGTTCGCGATCGATCTCCGACTCTGCAAGTGCAACTAGCTTGTTGGTAAAATCGGTTCG -AATTTCCTGTTCCGATAGGATTAGCAGCCGCAGAAAAATATAGACAATCCCTCGCAAGCA -CTGTCTCGGCAAAGTCTCATACCCCATCATTAAAATCAGCCCATATTTGTTTGCGAACAC -TATCGAAGCCACCTTTCTTCTTGAATGAATGTAGGAGTTTGTCAATGACCGCATGTTGCG -CAGCAGACAAGGGAAGGTCATCGGGTTTGAACTTCTTGCGCTTCACGCTCTCAATATCAA -TAAAGTGTTTAGCCGTAGAATGGACATCCTCTTTTTCCTGAGAATTGGCCATTGCCTCAT -CAGAACCCATAGTAACACGACTGGTCTGGACAAGAGAGAGATTCAAGTCGCGCGAGCGAT -AAGCGATAACAACTGTGATCGGAAGAATGGTTATTCCGTGATTGGGAACTGCGGCCCGCT -GATTTGCTTCAACCTCGTCTCTCCTGTACTATTGCGACGGCAACATCCAATTCCGCCTCC -ATTTACCCCGGCTGCAGTGGTGTTGAATTAACACATCCACCCATTTGCAATTCTGATTCT -TTCCCTCGGATCCCCTTCCGCACTCGAATCACAAACACATCCCCGCGCCCGACCGCGATC -ATGGTTCACAAAGTACTCTTCTGGAGTGGCTTCGGTACGTACATATCGTCCCTAGGTTCC -AGATTTTCATAAGACCAAGCTTCTTGGTAGAAAAATCATTCAATATATTTTATATGACCA -TTGGAACTCACGCGTTGATCCCACCAGGCATTGCCGTCCGCCTCTGGCAACTCGGTATCG -AGATGCGTCCAATCCTCGCCAAGGAATCCCTCTGGGTCTACCCTCTTTTCGCCGGTGTTG -GTGGAAGCTTTGGCTACTGGCTTCAGGGTGTTGAGAGCAGACAATTGAAGATGCTTGCAC -AGCGCCGCGAGGCTATTCTTGAGAAGCGTCGGCGACGGGATGAGAAGCCCGAAGGTGGTG -TCCTGGCTGCTGCATCATAAAGCAAAGAGCTATTATGATTGAGATGGTCATTGTGTGTAT -GCGTTTTTCTATTTGTACCAAAATTCAACCGGTTTGATATCAGCATTCGGGTCTTCGCGC -ATTTAGTGGACAATTCTCTCCACAGGTGCATTGACATCTCTTATATTTTTGATTTGTTCC -TCAAAATTCTGGATAAGAATAGTCTCCAATTTGCCTATCTCTTTTTTTAATTCCACGGGG -TATAAATCCCGAAGTAAAACATTTCCAAGGTTGATATCGGCCCATCCACCTTGTCTGAGG -CGTCATGAGTCAGCAAGGAGTGGGTGCCCGGCGGGCAGGCTCCCCGGGGTCGAGTACATC -ATCAACTCCGCTCCGTAGACACCCACTTATACTGCGCTCGTCATTCATTATTTCTTTCCT -GTTTGTTCCTTTTGTAACTTAATCGAGCTCCCCCGGCTTGTCTCGGGTCTTTTGTTTCTC -TTTATCGCGATGCTTATTGTCTGTCTTTGAGGAAGAGTCCGATGACGAGACTCCGTGGCA -TAGATGGATAACCGCGGCTCCTTTTTCTTCTTCCTGCTGGTGTTCTACATTATCCTCAGC -TCTCAGTCTCGGCCGCCATTGATCGATGAGGATCGGGCCCGGCAACGTGAGCTTGATCAT -GAGCGACAGGTCCTTCGTCTATTAAATGGTTCGAAATATGGCGACTTCGATCCGCCCGCC -AATCGATGGCTTCCTTTTTCGGGTTTGAGGGGAAACGACAGCTATGCTTGGGACCTACTA -CCGCAGGCTAAAGGCCTAGCTCGCCACCAGTTACAATCCGCTGTCTCAAATGCTGGATTA -ACCCCCCCGGATGGCCTTGAAGACCCCACCGTGTCGCCTACGTTGAACTTGACCAAGCTC -ATGCTTCCAGTATATCGTAATTCAACCGGAAAGCTCCGTGGAGACTGGGTTCGGAGAACT -CAAGGCGTTGATCGCCGTGAACCTCTGAACACGACTGCCATCGCCGAAGAACATGAATAT -TTCACTCGTGAATTCAGCCATAATATCACTGGCAACGGGGGCACATTTTACTTCGATCTA -GAAGAGGGAGGTGGCGAAGAGCTTCAGATGGCTGATGGAGTTTTGAGAGAGATGCGCGCC -AGCCTGACGGTCGAAAGTGAGGACTTCTGGGGAAGTACCTGGTACATCTCTTTATATGGT -GTACACTTTCCAGACACCGGAAGCATCGTTTTGACTACCTCAAGCGAGAAATTTGGCGGT -ATCTTGTCTTTGCCGCATTTCATGCTATCTTCTGACACTTACCAACTATCCCACGAGCTT -CTTCTCAAGTCGCTGTCGGACACCATCTCAGAGAAGCAAAATCGACCCCCGACACTGTTT -CCCTGGTCCTCTCTGGCAGGAACAGAACAGATGGAGTTCCCTGCGCCTAAATGTGAACAT -ATCATCTACTTGCAGCAGCATCCAGTGATGCTCGGTGATGCCGCTGCTGACCGATTGTTG -TTGGAGCGTATGGAGCAAGAGCTCAGGCATCCTATGGGAGCACCAATCCCAGATCCTCCT -CTTATGGTCATGTCTGCCGTTGTCTTCTCCCCAGACTGCGGGTACATACTTCAAACGAAA -GGGACCCCCGAATTCCCTCCTTCTGATTCGCTATATGTGACTGGGCCCAAGCATGAAGAG -TACAGTAAATACGCAGCTCGTCTCATTTTCGTGGTTTCTGGTGTTTTTGTTGCCCAAATC -ACTCTACTTCTCCGACAGATCAAAGAGGCTTCGACTCCTTCTACCCGAAGTCGGATCAGC -TTCTATACCATCGCATTGATGGCGCTCGGCGATGCATTTGTTTTAACATTCATTGTTTTG -GAACTATTCGAGGCAGTGTCTTTCTTGGTTCTGGCTACTGCATCATTTTTGGCGTTCCTG -TCGGTGAGTTATATTGGAATGAAGTTCATGATGGAAATATGGGCAGTGCAAGAGCCAGAA -AGAAGAGAGCAGGATCGTCCGCCCAATCCTTCAACTTCGACTACTCGCCCTGAGAGTTTG -CCCTTGCCTGCTACCGCTCCGGCAGTGCAAGACTCAGGGGCTACCCCAGTCATCTTGTCG -CCCGACCAAGATGCCCCTGATGATGAGATGGATTCCCCGCCGCCACCCACCCGAACGGCT -CCCCCAACTCCAAGGCAAATTCGCAGCGACGTAGGAACAATGTACGCACGGTTCTATCTT -GCGCTGTTTGGCATGCTCATTCTATCAATCTGGGCTTTTCTCTTGCCGAGACGCCTCGGG -TCGATCTACACCAGATTTTTGGCAGCAGTTTATCTCTCCTTCTGGGTTCCACAGATCTAC -CGAAACGTGATGCGAAATTGCCGCAAGGCCCTTCGATGGGACTTTGTTGCCGGCCAGAGT -GTTCTTCGGCTCTTTCCTTTCCTGTACTTTCTCACAGCGCGGGGAAACGTGTTGTTCATC -CGCCCCGACTACACAAGCGCGTTCATCATGACTGGATGGGTATGGATCCAAGCCTGGGTC -CTAGTCAGCCAGGACGTTCTCGGACCACGCTTCTTTGTACCTCGTGGCTGGGCGCCGCAT -GCATATGATTACCATCCCATGCTGCGCGATTTATCCGGAGCGGACGAGGATCTTGAAGCT -GGTGGTGGCGTTCTGTCGATTGCATCCCTGCGAGGCGATGAACTAGATCTAGTCAGTGAC -TCCAGGGATGACGATAAGCAGCAGAAAGATCGCAAGAAGGCGGTGTTCGACTGTGCTATT -TGCATGCAGGATATCGAGGTGCCTGTTCTCCCCGCACTTTCTGCTTCAGGTGGAAGCAGC -GTCGCCGATGGAGCCTCAAGCATATTGAGCCGCCGGCTGTACATGGTCACTCCTTGTCGT -CATATCTTTCACACTGCCTGTTTGGAAAGCTGGATGAGACTTCGCCTACAGTGTCCGATC -TGCCGTGATACCATACCTCCTGTATAACTTCTATTTTAATGCTCTTCATACATGACTTCA -TCCTCCTGTACACAGACATGCTGCATTACACTGTTGAGACTTGCATGATCCAGCTCTGTA -TACATCGAACTAGATATATGGGGCATGGTTTATCTACAAGGAAATAAAAAGACAAGCCAT -TCACATATACAGAAATCTTGTTGAACAGTATATCGATCCGTTTACAGATCGACAAATCTA -AACGCTCTCATTGAAGGAAATCGGTCACCATTGGGCATACATGATAATGACATCATACTT -GAAGAAAAAGGAATTCACCACATCCAGCATAGGCTGGAATTTATAAACCACGAATTACGA -TCAATCTTTACAATCCACAAAGCTAAGCATCAATGGTCTCTTTGTAAGCAGCCTCATCGA -GCAAGGCATCGAGATCAGCAACATCAGCAACCTCAATCTCAGCGATCCAGCCCTCACCCT -CGGGGCTCTCGTTGATTGTCTTAGCCTTGTCCTCGAGAATGCTGTTACCCTGGATGACCT -TTCCGGCGACAGGGGAAAGGACATCGGAGGCAGACTTGACGGACTCGACAGCGCCGACGG -GCTCGCCGGCGACAATCTCGAGGTCTACCTCAGGAAGTTCGACGTAGACGACATCACCCA -GCGAGTGAGCAGCGTATTCGGTGATACCAATCTTGGCTGTAAAGAGTGTTTGTTAGTTAT -CTCGCTATTCCAGATTCTGTGAACTATGGAATGCCATATACACCCGGCATGTTGGTTCAT -TTGGTCAAAGGCAAGTGTATGAGACCAATTGATCAGGATGGTAGTGTACTAACCAGTCTT -GCCGCCGTCGGCCAGCTCAATCCATTCGTGTGATTCTGTGTACTTCTTCAACTGGGCTAT -AATTGCGAATCGATGCGTCGTTAGTAAAATGTGACATCATAGAGCTTTAGGAACCGTGTT -TGACGGATTGTGATGCTTAGCACCGGTCCAAGGGTGTTTCAAATCGTCCTGGGCAGCATT -TGACAGTACTGGAGGGTTTATGGTACTCACAGAGAGGGCTCTGGGTGAAGCTCCTTGTTC -CCCGCGGGAAACGGAAAGCAGGCGAAACCTGGCAGGCGGAGGCAAGAGGCTTCTGGGTAA -GGACCCTGGAAGCAAAGGGGCGGATGGCACGAGCGAAGGAGGACATTTTGAGGTATAAAG -GCAAGGAATtggaagcgtggaaggtgaaatgggaagaggggagagtttaagtagaaggGA -AAGAAGGTGATGCCCTTCTCCGGCGGAGTTTCCCCGATAAGGAACTGCCAAGACCTCTGA -TAAGCAACGCGCTAACCTCGTCGGATTTTGCTGGCTTTAGATGGTCATCGCTTCATTACA -GTATCTAGTGCGCGCCACCCCTCGATGCCATGTTGTACGAACTGATTGCCATTGTATGTG -CTTGGTTTGGTTGAAACTGTCATATTGATTCTGAAGGTTTTGTCTGGATTTGCTAACCGC -TCTGCCAACAGGTCCGCCCGGGCAGCCTTAACGAAGTGCGAGAGTACGTCCAAATCAATA -CTTTCGTCCGCGGTCCCCATCATCCGAGTCTATCGAATATTAATATACTGTTTTTTTGCT -GCAGAATCGCCCGTAACGCCGGTACCCAAGTCATCCGCTCCGGCGGCGTCGTTCGCGGCT -ACACCAACTGGGGCGCCTTCCGCCTGCCCAGGGTCACTACGAAGCATCAGGCCCGATACA -CTGAAGGTCACCACTTCATCATGCGCTTCGACTCCTCCGGCTCTGTTCAGTCATCTATCC -GCCGCACTCTCGGTCTAGACCCGCGCATGATCAACTTCTCCGTGGTCAAGCTCGGCGACA -AGCTTGAGGAAATCAAGGATATCAATGGCAAGGTCGAGTGGAACAACGTTTCCACCATCA -CTGACCAGATCTAATCGATGCAAATTTTTTGACGACAGTCTGATTGATTGTTGAAATCAG -ATTTAGAAGCGTGGATTTGGATTTCAGCACTTTTCTTTTTGTTCTTTGTATTATTACGGC -ACAGGCATTGGAAACTGCCACTCGAGCCTATCAGCACAATCGGCGCATATTGTACAAAAT -ACCAGTTTGATCTACAAGTCTATTTTGTGGCTGTTACCATAATTCGCTTTCCCTCCATTC -TCCGGTCCTCCATAAAAGATGGCAAGGGTAAGCATGGCAAGCAAGCCATACACCTGATAG -AAACACTCGGTGTACCTTTCCTTATTTCAATTCCTAGAGGTTGACATTGGGCACTCATTG -AGCGCCGTCATTGTTCAATTTGTGTAGACCCGAAGAGCGAAATGCTCCAGCAGAAATGCA -TTTCACATCCGATCAAGTCTCAAAAGCAAAATTAGGATTAAGCATCCTATTCATACATAT -ATAGGCAACCGGCTCATACCAAACATACCAAACTTATACAAATACTTTGTTCCAAATTTC -GACATTTAGAACATTTCCCTACACCGTACCGTGACTCAGATTGACAAGCCGAGGCATCGG -CTAGCCCCATCTCTTAGTCCGGGGTACCAATCCCATACATCCCATTATCAAATTCATCAG -TCCTACTCACCATGATACCACCCTGCAGGTTTCTTTTCGTATAAAAGCAATCCGTACAAA -ACCTGGCTTATTAGCTCCACCTACTACAATCGCGGGGAGGATACTCATCGGATCCAGGGG -CCGATATTGTTGATCGGACAACCATCGGCCTGCAAAGCAGTTCACCTTTCGAGGCAGGCA -ACTACTTTGCACAGCTTGTAACCTTTGGGAGAAGAACAAGCCAAGATCAGAACGTCTGTC -ACCTCTTTTGCGCAATGTCTCCGCAACGCGGGGCCCCGGACAATGACGGCGAACTTATGT -CCCCGCATTCTTTTATTGAGTTGATGGCGCTACAACGACTCGAAGATACTTCGATCTCGC -ATCCCGACACCACCGAGCCAGAAAAGATCGAGCGGTTCCGTTCGCTAGCAATACCTTACA -ATCCGGGACAGGGAACGCGTTCGTTCGGCGGACATGTCTACGCCCAATCAGCCTATGCCG -CGTCCAAGACCGTCGGTCAGGGTTTTGTGATTCACGTAAGATTTTATACATTAATTGTAC -CCAAAGAGGCTCAAACTAAAATACCGGCAGGATATGACGGGCACATTTATTCTAGGGGGA -CGGCTGGACACGCCATACGTGTACACAGTGCGACATCTCCGCGATGGTTTCATGTACAGC -ACACGAGCCGTCGACGCACGTCAAGCAGGCAGAATCTGCTTCTCGTGCATATGCTCCTTC -AAGCGTGATGAGAAAGAGCGTGTCTTCGAGCACCAGCCAACATCAGCACAAATGCGCTTC -GACTCAATCCTCTCGGCAAAACTGCCAGAAGACCAGTCTCCTAGCCCCAGCGTGGATGCA -GACTGGTGGATTGATGCTGTGCGGCAGGGAAACATCTCCGAGCACGAGTTTCCTGGTCTA -GACGTGCGCAAGACCGACATGAAGGATTATAACCTCACTGAGGAGATCCAGCAACATCCT -GAGCGGTATCGCCAGCTAACGCAGTACCGGCTTAAAGGGTCACCAGACGAGGACCCTGCT -GCGACTTTGTCGCAGATCCGGGAGAGGGAAGAGAACGGGGAATATGATAATTTGTATGCG -TGTGCGCATATGTATTCCAGTGATAAGAACTCCCTTCTTCTGATCCCGCGCGCGCTGGGG -ATCAAGCATTGGGCTGAAATGGCGAGTCTTACCCTTACGGTTATTGTGCATCAGCATGGG -GAGGCTCTGCGAATGGTGGATTGGGGGAGTATTGGCGATGGCGATGTTGGCGAAATGAGT -GAGTTGCCTATGAAGTGGTTCGTTCAGGAGGGGTGGACACCTCGGGCTGCGGAGAATCGG -GCTACGCATGAGAGTCATCTTTGGAGTCCTGATGGGACGTTGGTGGCGACTTCATTGCAG -GATAGTATGTTGCGCTTGAGGAAGTTGAAGGTTGCAAACTTGTGATTTATAAAGGCTCCT -CATATGTACATACATGTAGCTTATAGATTAAGACGGTCGTGCAGATGGGGCTATGATGGC -AATATTATTTGATAGGAATCACTCATGATCAGAAACTGACATCGCAGTGGCGGCGATATA -GTTCTAAGATTTGTGGAGCCTATTGACCATTGAATTGTATCAGGACATATTTAGCCTACC -GCAGAAGACAGGTGTCGGTCAAAAATATGCCATGTAAATCTCACAAGAAGGGTATTATAA -TATAGTATACGCTAGAACAAAGAAAAAAAAAATTAAATATCCTCCCCAGATATGTTAGCA -TGCTTGAAGTACTTCATACCGACCCATGCGACAACAACCCAAACCCAAGACATGACAGCC -GTGAGAATGATAAGTGCAGGATAATAGCCGACAATCGGCAAAGTAGGATCATCTTCCAGC -TCAGGCGGCGCATAGGGTGTCTCCCCGACATCCCAAGCCCAGCTCCAAACCTCAAAACAG -CTTCCAATGCCCACAACGAAGACCAACCACGTCACAATCAATATAATCCAGCCCCACCAC -TCAGTCTCGGAGTAGTCCACGTCCACATCGGGACTGCTTTTCATCTCAACCGGGTGTGTG -ACCAGGGGCGAGGACTTGTGTCGATGGACGGGTAGGTATGCCGGTTCGGGCTGAGGTTGA -AGCGAGTAGAATGAGCCTGAGCTGGAGGATGGTGGGGAAATGTGTAGCCCGGTTGCAGAG -CTGATCTTGGGGCTGGTGTCCGTGCCTGCCGCCGTGGGATTTGAATTCGAGTGGAGGCGA -GGGAAGGAGATGAGGTCCTCCTCATCAGTAGCTGGCTCTTGGATATTGTGCTGTACAAGG -CCCTGGCGTCGGCGACTTATTGAGGGTCGGATGAGTTTCTGGGTATTTGCAGCGTGGAAC -CAGGTCGGCTGTTGGGGATCGTCGCGGTCGTTGTCGAAGGCCTCGCTCTCGCGCGGTGAG -TATCGTGTAGGTCCATTATTGTAGAGCTTTTGATCTGTTTGTTCTACAGTCGCTTGTATG -TCGGTGGCGGGGAGTCGGCTGGGCTCTGCGCGGAGGGCTTCAAGGGCTTCTGCGGCTTTG -GCGGTAGCTCTTTTGTCGCGTGATTCGAGCCTGGTGCCTAGCAATGCGGTTTTGCGGAGG -GAGCCGCGGCGGCGACGTGTTCGATGTTGTGCGAGGGCGGCAGACATTGCCCGGCTGCTG -GTTGTTCCTCCTTTGTTTGTGCTCTGCCGCATGACGGAATGGAGGTCTTGGTCGCCATCG -TGGTCCATGTCTGGGTGGTGTCGGTTTAGGGAGGTGTGTGCTGCCTGGTTCACGCGCAAG -AAAGAAAGTCTCGAGAGGAGGGAACCCGAAATGCTTCGTTTGTGGCTGCTACTTGCGCTG -CCCGATCGCGTGCGAGGAGATGGTTCGTGGACCGCCAAGTCCACGGTTCTCTTGTCATCG -TCCATTGCGAGTGGAACCGGGAGCTGGGACGTCAGCTCGGTCAATTGTCTTTCATTGGTG -AAACTTCAAGAAATGTTTTTTTGTGTATTTAGTATTTCTGCTCACATTGCAAAGAGATGT -GTGACCTGCATAGAAAATTAGTTAGTTAACTAGTTAACTTGGAAGGAGTGGCGTTATGGC -CTGCAATATCAGGCAGCGATGAGACTACCCATGTTAACTAGTTAGCCATGTATCAATATC -ATTTGATCTGGGCAGCAATGGCATAACGCCCTCAACTAGCCTTGACTTATATCTTCAATG -GCCTACAAGGGCAACCATAACATCTCAACCTCCTTGACGCCAAAATTCCATTTGGGATGT -ACTTGCCCGCGGTCCAGGAGTGAAAGTACGACCTTAGTTCGTACCTTCAGCTTTTTTCTC -TTATTATATTTGCGGCCGTGAGCCAATATTGCAAAATTGGAAATCAGAACAATTATATAT -GTCCCTCAGGCGATAAATATGGGCTGAATCAATTTTATCAAAAAAGCCCCCGGGAAATGG -CAAGACTGGCCATAAAAAGGTCTTTTGATATGAAACAGTCGCTTGGCCTCCGGGTCTTTG -ACCTCCAAGCTTGGTAAGACCCTGTCGCACACCTGATACCATAGCTTATTGCAAATGTCG -TACCCTTTGCCCTACAAAGTAATGAAACGTCTAGTATGCTTAGAATCGGGTCCAGCGACC -CCACACATGAGAATAAGAAACATACGTAAGTGCATGGACACCTATTACTTGGTAGTGTTT -GAGGCTAGACAGGATAAACATGGCGGTAGCAGTATTTATGTACTGCCTAAAGTTCTCATA -CTTCGCGATGATACAAACCAATCGGAACGTGGCTAATCTGGTTGACGGCCCTTTCTCTTT -GCAGTAAGTTTCAGATCTTAGCCTTTACCGAATTGTTAGAGTAAAGGTAGAAGAACGAAG -AGAAAGGTTAGAGGGCTAAGGTTCTTATACAAAGCATACAACTCGTGGCTCCTCTCATCA -ATACCTTGACCAGGACCCCAGCATTCTTTGTCCATCAAGGCAGGTTTGATCAGAACCACA -AATCACTAAGGCATAGAAAACCAACTATATGATTCAGTTGGCTTGAGACTATACTAGCGA -ATTAGTCTTCAATGGTGCCTTTTATCCACTCCCTGTTGAAAGCAGGAGGGGGTAAACAGC -AAGTACCACAGAAGGGGCTTCTATGAGTATGTACCAGTCTATGTATGGCTGTGGAATCCG -TAGAACACTCCAACACTGGCTCCTAGCCTAGGTATAGAGAAACATAGAAATAGAACAGGT -GTAAGATGTGCATCGTCTACCCCATTAAAAACATGCTGTGCATCTCATCACTGTCTTACT -GCATATATAGGCAGCAGGGAAAGGACTGGAGTTTACCAATCAACTGACGCTACAATGAAT -AATAACTACCAAGAAATTGGGTTGACTTCAAATGCCCCACAGAGCATAACACAGCATGTT -TAGTTTCACCTGCGAGCCTATGGGCAGAGAAGATCATGCAGCCCGCCGTCTGCTCTGAGC -ATCatacatgatatatatatatatatatataaataAACAAAGAACACGCCATCTGCAATG -AATTTAAAGAAAAAGGCAGAATGCGGTAAAAAAAAAAAAGGCAAATTGGTATATGGGCTA -GAACGCCAGCAAGGCAGAGAGATACTTAAAAAAAATCAATGATGTTCGTGTCCTCCAAAG -AAATTCATCACAGCATATACGGATACTCGTGGGAAAGTGCCCAGACCAATGTGTCAACCG -AGAAATCCATGGCCCATAACAAGCATCTTTTGAGTAAAAATGAGTATCAAACAAAGCCTA -AGAAAACAGAAAAGCAAATTTGATACAACACATACTCGAATGATAAAACAGCTAGAGAAA -TGAAATGAAACAAGATGGCAACAGTCCAAATACGCGGGTGAAGAGTAGGGAGGTCGCTTT -TATTTGGGGGCGTGACAGAGCCAAAAGTCCATAGGACCTTTGATCCAAGGCACAGTAAAA -GAAGAGAGGGAAGAGAAAAGTAACAAGCCAAAAAAAAAAAAACCGAATCAACGTGGACTT -GGGGTATGGGTGCGCAACACACAAATTCCGCTTCAGACAAAAGCAACTTGCTCGCTTGAA -AATCCACTCAATTTAAGTAAAAGAAAGAAAATATCATGGGTGAGGGGGAATTGAAGAAGG -GTGCTATGGAAAGAAACACAGAAGGAATGTTAAATAAACTTCGTTCTGTGAGCAAATGCG -CGAAGTGTCGTATGTGAGCAGACCGATTGGAAGCTGTCACAAACTGAGCTACGCCTGGTA -GCCTCCGTTGAAGTTGTTGGACTGGTCCCAGTTGCCGGACGGGGACATGGGCTGACCACG -GCCGTAGCCACCGGCATTACCTGCCTGGGCATAGCCCTGGCCAGCCATAGTAGGCCCATT -GAAGTTGTTGGATTGTTGATCCCAGCCTCGACCAGCAGGGGCAGGACCTATTCCACTAGT -AAGCAAGTGCCGAGAGATAGTATGAATACTCGACTCACCTTGATTCATGGGGTTTGCGGG -ACCACCGTATTGGGGGAAGAAACCGACAGGGCTGTTGCTGAAGGGGGCCGCGTTACCCTG -CTGGGGAGAGAAGTTGTCGAATTGGCCAGTAGGAGGGCGATCCTTACCCCACTGCGGTGG -TCGGTCAGTAAATATAAGCCGGGAATCACGGAACAAGCAATAACTTACACTGCATTTCAG -AGGACGGCCGTTCACGTTATACCCGTTCAGTTGACAGATCGCAGAGGCCGCATTTTCATG -CGAATCCATCTTCACGAATGCAAAGCCACGATCGGCTTGGAGGCGAGTTTCAAGGACATA -GCCGAAGTTCTGGAACAGGGGGACCAAGTCGTTCTGGGTGGTATACGGGGTTAGGTTGCC -CACGTAGCAAGTAGTCTGCCACTGGGGAGTTTGTTGGGCGACCATGTCGTAGCTCTGAAT -GCCGTGGGTTGGGAAGTGGTGGTGTCCAAAAGGAGTGGTAGGAGTCATACCCATCGCAAC -CAGGGCCTGCTGCTGGGAGATTGAAGGCTGACCCTTCTGATTGGCCCAGTTGCAGCGAAT -AGCACGAGAACCGAGCCATTCACCATCCATTGAGTTGAGGGCCTTGTCTGCCTCTGTACG -ATCGCGGAAAGCAACGAAGCCGTATCCACGGGAGCGACCAGTCTTCATGTCCCACATCAC -ACGCGCCTCGGATACTGAACCGAAAGCAGAGAATGCCTGAGTCAGCACTTCGTCGTTGAC -TTCATTGCTCAAATCGCCGACAAAGATATGGAAGTGGTTAGAAGTATCCTCCTTGCTGGT -GCTGTTGGACTGGTACGCCCAGTTGACACGGATTTCCTGCGGCAATTAGACACGGCCACG -AAGTACCACGAACAAGAGGAAACCTACCGATTGGTGAATTCGGCGACCATTCAGGGTTTG -CATAGCCCTCTCGGCCGCACCTGGATCGTCAAACTCGACGAAACCATAGTTGTGGCCCTT -GGTGGTGAATTGCCCCTGGAGTTGATAAAATAATCAGCTACCGATACTTCGTCAAGGCCA -AGAGAGAAGGCAGTTAAATCACAGATCACAGCTCCAGAGACCATTACTGTGACTCAGGGG -AACCGATATGCAGGGGGTCGACTGTCAGGAGAACTCACATTCTTGTCGGGAATGATCTTC -ACGCTGACCACGTGGCCGGTGGTCTCAAATATCTGCTTGAGGATGTCCTCAGTGACCCTC -GGATCTAGCCCACCCACATATAGCGCTCTCTTGTTTGGTTCAGGGGCCGCTCGGCGAACA -TATCCGCCAGCACTGGTTGGGGACATCATAGCCCCTCCGGACATAGGAGAGGTCATGGCG -GTGGGAATCGGGTTGGTATTCTGGGGAATGATCACAGGAGGGCGCGGGGGCGGGGGCATG -TGGGACTGGTTGCCCTGGCCGTTGCCCTGAGGAGCATCGTACTGTTGATTTTGACCGACA -GCGGGCTGAGGCGCAGCCGGGAGAGAAGATGGAGCCGAGGCTTGAGCGCTTTCAGCCATT -ATGATTGCTTTTTGAAAGAATACGAATATATTCCAAAGGCGATCAGAGGAAGTGAGTGAT -GCGGGAGTCGGAGTTAAATGTCAGACTGGGAGGATCCAGTATTAGGTGATCTGTCAAATT -CGGAGTTTGAGACAATGCAGAGCGTCGCTGAAAGCCACAATCGCGAAGAAGACTGGGAAA -ATATATGTAGTATAGGAGGTCGGGACCAGGGTGGAAGAAATCCAATAAGAAAGACCCTCA -GAGGTGGAAACAAAGAGCACCGGGTATAAACAGTGCTAGCAAAAGAGACACCGAAGTGGA -ACCTAGGTGTCTTGTGGTTTTTTTTGTTTTTCGATCAAGCTCCGGTAGAAGAACTAGGAG -GTATTTCTGTGGCCTGATAAAGCTGCGGTCGGAGATTGTTTGAAACGATACAAAGCGGCG -TTGGGGGAGGTAGGAAGCCCAACAAAAAGAGGAGAAAGCTCAAAGAAAACCCAAGCATGC -AGGTCCCTTGaggaaaagggaaattcaaacaggaaagaaaagagaacaggaaaagggaaa -agggaaaaaaaaaagatgaggatagaaagaaaaaagaagtgaacagcaaaagtaaaggga -GGGAAGTTAACCGCTAGGGGTGGGGGGGAGACACTTTACCCCAAAGGGTTTAGCCACACT -TTTTGGTACATCCCAAATtctttaccttctcattcttctactttctttctattcgtatat -cttcatctcttaatcttcgtctgtatctggttcttcatcttcatcttcCAAATGGCCTAT -TCAAATTATTCAAATCCAGATTCACTCACAGAGCTTTCGAGCATAATAGAGCGGACTGTA -AGCGCTGGCTCTGGAACATTGCTCTTGGCACCTCTAACAGCTTCTAGCTTATTGATACTG -GTCGTTTGTTCCGCAAATCTGGCTCTATGCCATCGAGAACCCACCTGCAACGATCCATCC -CCTTGTATCATGATAGTTTCCAAAATGCACTGGATAACCTATCAGAGCAAATTGTGAGTA -CAAATCTCAAATACATGAATAtttgctttttttttcaattctgttttttccctccctttt -cttcttttctGACACTCTGCAGTTCATTGCAAAAGCTTTCTTGGAAAAGGATTATGAGGC -TCTCAAGGCCACTAGCGCCGCTCCCCAACACATTGAAGATGTGGCGATGAGCGATGTGAA -ACATGAACCTGACATTCAGCCTCCCCTGACTGAGAAAGTCGAAATGGACACAGACATTGA -GCCTACTATGCCTAAATTAGAAGCGGCTATTTCTAGCCAATCAGCTCCAGCAGAGATCAA -ACCCGACACCCAATCGGGTGATGATGTAGTTGTCAAAAAAGAGGGCGACAACAATACTGC -CACCGATCAATCATTCCCTGGACCTAATGAAGATATCACCTTCGATTCAGCCCTCAACGA -CAACGGGGGCACAAACGATTTTGGTCTTAGCTTAGACTTCAATGATGATGACATGGGCAA -TCAAGCTTTTCTATCCGGCTCAAATTTTACAGCCTCTGGAGCTACTGGCACGGACAAGCT -AAGCACAACCCAACCTGGGAATACTGCAGATGTCCCTGCTAGTGGTGGTGCATTTGATAT -GGAGTTACAGAAGACAGAGGGCGACGGCAATTTCTTACAGGGCAACTCTGGGGACGATTT -CATGGGGCCCGCTGAATCGAACTTCGACGATTTGTTCATGGACACTGATAATTTCGGGGA -GAATGGTGGGGATTTCAATCAGCTGGAGGGGGACAGCCTGATGAATGTCAATGAGCTGGA -TGATAATTGGTTCAACTGAAAATATTTGGAAAGGGGAGGGGGGCACTAGTGGAATCTCTA -CAATCTCTAGATAAGTCTAGATAAGAGCAATAGCCAGTGGGGGAGGATGGGAGGGGATTG -GGGAAATTGAAGGAACAGGTATCCTGTGGTCTGCATGCGCTCGCATTTAGCATTAGTGAT -AATACTCCACACACATAAATCTAATGGCATAATCCGTGATTCTGTGGCTGGAAGCATAGA -TTATTATGAACTGTATGAATCTTTCGTTACTTCGTTACTCGGGTTCACTGCATGTAGGTA -GATCCTGTGGAATCTCGATGCTAAATACACATTTACTGGCCACTAGGTTTAAATCTGCCA -ATCTGTTTCAAAGAGAGAGCCAATATCTTTGACATTTGAGAGACAGTATAAGTTCTGACA -TCCCAAGTACGGAATGCTCCCCCCAAAAAAGATGAGACCATAATCCCTCCTAGACAATTA -TTGAAAGCACGAACCCAGTGGTGACACAGAGCCCATTGTACAGGGCCATAGCGTCAAAGA -ATATAAATCGAACCACGTTTTCCTGTACCTGGAAAGATCTTCTTTCTAGTATATAGGGAG -CTAGTACAGTTTTTTCTAAGTGGTGACGTAGTTTAAGATGTGGAGAAGAAAATCACATTC -AACAGGATGATCACCCCAGAGAGGATCACCCCATAGAACACGGGAACAGAATTTCCCGAA -TAGATCTACTCTATATATGCACATATGTCTGTTGTATACTCCGAGTACGTACTAAGAGGG -TCTAGGCAAAATGTGGCTGATGATGTCATATATGAAGCGGGTAATGTCGTCACGTTAGCA -TTTACTCCGTATAATGCCTTTGCTCGATGGTGACGAACTAGAACTGATCTTTATCGGGGT -CTATCTTCCGACCACTCACAACCATAGCCGTGTCCATGAACACCCCCATAATGGGCCATA -GCCCGGGTATAGAAGAGATATAAAAGCTTTAGTCACATTTCGGGTTCAAATTCCATCAAT -ATCAAGGTCATGTGAAAAATTCCCCCACATGTAAGCCAAAACTTCCAGATTGGGAAGGCC -AAGATACCCGCTAACACGTTCATACCGTGAAGTGAAGTATAGGCCGTGTGATCCGAATAC -TCACTCTAGATAACTCTAGACCCTAACATGTTTCTCCACAAGAAAAATGCCGAACCAAAC -CCGTCTCTTTTTTAATTCAAGGCGTATGGCTCGGATCGCCGGATGTGAGCCCCAGCCTTG -GCTCTCGTTTTTCCCCCCCGCGGCTTTCTCAATGGTGTTGTCATCGAGTTCGGATTTATC -GTTGCCGACTTTTTTTTCTTGGCCTTTTATGTTGTTTATTTCCGTGGATCGGGATTGCTT -TTAGTTTCAAGGCCTTTTCTATACTTATATATCTTCTGCGCCACATTACGCCTTGCCAGA -GATCACCAAACAGAGATCAAGGAAAAAGAACGGAAAAGGAACCACTCTTACTTTGCTTGG -CAGCCAATCCTACACGTCGAAATAGGGATCTGCATAATGATCGGAGTGGACTTATGAACG -TCAGCACCCCCGAGAATTCCACCCTCCTACAGTCTCCTTGCGTAAAGGGATTCAAGGGAG -AAAAAACCATCGCACAAGATTAAAGATCGAAGGTATGTTGAGTCTGTCCTACACATTCCC -TGCACTTTGTATCACCTTCCTCGGGCCCATTTTGAGCCCTTGTCTTTCCAAGATTGCCCA -ATTTGGCGCACTATCCAAGGGATATATACAATACAATTTTTGGGGGGAGAACCTTCGGGA -TGCTCAACAAACTTCACAATGATGGGTGCTAATATCATCCCTCTGCCAGCGCAGTTCTCT -TCGCGGTCGGCTTTCAGCAACCCCACCCCGCACTCGGAGTTCACAGGACCTCCGTCATTA -GGAGTATCGATCTCCAATCTCATCACACGGTCGGCTCGTAACGTGTGTATCGAAACCCAA -AGGCAACTCCCTAGGAGTCATCGGGACTCAATCTTCAAAAAGCTGGCGGGTTCACGTGAA -AACGCAAAGCGATTCCTTCGCTTGCGCTCATCTGGAATTCAGGCACCTTCGCAACGTTCT -AGTGTACCAATCTCGAAAATTATATTGTCGCCGACATCGGTCGATTTGGAGGACGAGATG -CGATCAGATTCATCATCTACCGAGGTACTACCTCGCCCAGTAGCTGAACACACTTTGACT -ACCGATACTACCACACCATTCACTCCCTTGCGAAATGAGAAAATTGTGGCCACTGGAAGT -GGCATTTCTGTCGGTATTGCCCTTACCGAGCCCGTTCTTTATTTGCAGGGCTATGATCAG -CAAGATCCAAGCAGTAAAAAGTCCGCAATCCTGCGGGGGCAAATGCATTTAAAGATTACC -AAATGTGTTAAGATCAAGAAGATCTCAGTCTGCTTCCGCGGCCATGCTCAGACCGATTGG -CCGGATGGTATGAATCTACATAAAAGTTAAGACTTCAACACGAGCTGACACTGTTCAGGT -ATCCCCCCTAAGAAAATCCATTTCCATGACAAAAAGGATCTCTTCACGCATGGCGTGGTC -TATTTCAACCATGGTGACACTGCGCTGATGCAGAATGACTATGGCGCGCATTACTATCAG -CATGCCAAACCGATATCATCTGCACCAGGAAAGGAGGGAGTGACAATGACGACTCGAGAA -TTATTTTCCAATCGCAATTCTGCCGCCACACTCGCCCTTCCAACCTCTCGAGATGCCAAA -CGTCTTTCCCTGCAAGCCAACCGTGGCCATTCGCGCAGCTTCAGCAAGAACGAAACTCCG -ACTTCCCAACCCCAACCCCAGCGAAATTACCGCATGTTCCCTGTCGGAGATTATCTATAT -AGCTTCGAGTTCCCAATCGATGGATCATTACCAGAGACGATTAAAACGGACCTGGGGTCT -GTGAAGTACGACCTGGAAGCAATGGTGGAACGATCAGGCGCTTTCCGACCCAACCTCCTC -GGCGCCATGGAGGTCCCCGTGATTCGGACCCCTGCCGAGGGTTCACTGGAGCAAGTCGAA -CCGATTGCGATTTCGCGCAACTGGGAAGACCAGCTTCACTATGACATCGTCATCTCGGGG -AAGTCCTTCCCGCTGGGCTCGCAGATTCCAATTGCGTTCAAGTTAACCCCACTCGCAAAA -GTAGAGTGCCACCGAATCAAGGTGTTTGTGACGGAGAATATCCAGCATTGGACAGCAGAT -AAGAGCGTGCACCGGTTGCAACCGGCCAAGAAGGTGTTACTCTTCGAAAAACGAGCAGAT -TCGTCGAGTGTCAGCACGTATCCCGGCAGCTCCATGCGTGTCACCGCAGGCGGCGGCATC -GACTGGGATCAGCGTGCTGCTGCCGCAAGGGGGCAGGAGATTGTGGATCGCAATCGGACG -AATTTACTAGGTAATCTAGCTAATGATTCTGGTGTCGGACCAACTGAGATGGAATTCAAC -GTGCAACTGCCAAGTTGTCACGAAATGAAGGGGCGGGATGAGGGTCAGCGGCTGCATTTC -GATACCACTTACGAGAACATTCAGATCAATCACTGGATCAAAGTATGTCATAGGTCTTCC -CTCTTTTACTTATTGTGGTGATGATACTAACCGATCCACGCAGATTGTCCTCCGTCTATC -CAAGGTCGACGAAAGAGACCCCACAAAACGAAGACACTTTGAAATCTCCATAGACTCGCC -ATTCCACATCCTCTCCTGCAAGGCCACCCAGGCCAACATTTACCTACCAGCCTACACAAC -CCCAGCCGAAGAGCCCGCTCCACCAGCCCAAGAGTTTGAATGCGGATGTCCCGGTGCATC -TCTCGCACCCCGGGAGCAGATCATCGCTCCGGCGATATCTGATCGTGAAGACAGCAACCC -TGCCCTAACAACCATCGCCCGCCGCGGCTCGACAGGCCACAATTTCACCCGCAGCTTCAC -GAATGACTCCGGTGGCCTCGCCCGACCACCACAAGCACACCTCGCATCACCACCAAACGA -CCGCAACATGCCCATTCCACCTCGCCCGATGCATCTGCTCCGCGCACCATCCTATGCACC -CCCCGCTTTCGACGAGGTCCCTCCGGCACCGCCGCTTGTCACCCCACCTCCAGAATATAA -CACCATCGTCGGGGACGACCGAGAAGCCGTTCTTGAGGATTATTTCTCTCGTTTATCGTT -CTACGAAGAAAACGAGGACGATGACCGTGGTCGTGGCCGGGTTGATGTTCCGCTCACACC -GGGTGGACGCGTCAACCGCAGCATGGACGTTCCCCGTGAATGGGTGCGACTTGAGGATTA -TTTCCAGTAATACCGCTCCCTTTTGAGGCATTTCTTTTTTCAAAAAAAAAAAAAACGAGC -TGTCACGAGGTCTGAATCATGAATTTACGCTTTTGTTCATATTGGGATTGGTGTCCGGCA -CATGTTTTCCAGCGATGCTATCTATCCTTTAGATCTGGGTCTGAACCTGAGTCCGGCtca -tcatcatcatcatcatgttcttcttcttcttcttcttctttttctttttGGGCGGGGCAG -GTGGAGGGGGAGGTTATACTTGGGTTACTTTCTTTCCGTTTAACAGCACTTCATGATTTA -TGCAGGACCGGCTTTACTTTTTCGGTTGTTAATACCGCTGGATTTTTTTTTTTCTGATTA -GGCAAAAAGGCGATATGTGTCATTTAGTCAGATATCTTACAAGGGATACATGTAGTAGGA -ACTACAATCAAGCATCAGAACTGCGCTCAAATACTGAACTATCATAAAAATCCCTTGGCT -GAAACCCTCAGGCAGTTCCAAGGTCGATAAGAACTGATGTGTCAGCGATAAGGCTGCGGG -CAATCTTGGAACGCATGGAACTGCCATCCCCGGTTTAGCTCTCCCCACATTTGTCTGCTT -CTCCAACTGTCACAGCTTTTTCTTCTTGCCTTCTTTCTTCTGTTCACACTACTTGTGCCT -TGCTAAATTGCTGAGAAAACTACAGAGCTAGGTCCTACAGGGTTTCGTAGCGCACTTCGG -AACTTTTGAGACGGATTGCGCTGATTGGCTCACGCTTAGTGGCATAGGCAAGTTAGCATA -TGCGATATAGATGAGCTGCAAGAAGCAAAGAACAATTCTGACTAATTTCAGTGAGCAAAA -AGCAGCTTTGGTTTATCCTCGACAACCGCATTCTTCAATTCTCTGGACGTTTGATCCTTG -CATCCGGAGAGTTATCCGCCGTACAGACTCCTGGCTGTCATCATGGCTGTTGAAGTCGTA -CCTCTCACTGAGGCAGATATTCCCGGTGCTATCGAAGTAATTCAGCAGGCATTTGCAGAT -GATCCTTATTTCAAATGGGTCTTCGATTCATCCAAGGTACGTTGCATGTTGAGAGACAGC -TACCAAAAAAATAAAAAATAAACAAGGCACCCCTCCTCTCCTAATCACCAGACTAACATA -CATTTCTCTCGTGGCGCGGTCAACTTGACCTGCAGTTGCGTCTCCTGAGAGATGATTGTA -CTTATAGTTTAACAAACAAAGAAACTACGACTCCCTCGCAGCACGCTGCCATTGGGGAAT -CAAGAATGCACTATTCCACGTTGCCAAAGAAACCCAATACGACGGCTCCAAATCCCACAA -CACCCAAACTCCAACCCCAATTCTCGGCGTCTCCTGCTGGCTAGCACCTCACCCGCCCAC -GCAACCAGAGAGCTGGTACTCCTGGTTTCAATCCTGGACCCTATCCTTCAGCCAGATGCT -CAACAACATCCGGTACTTTGGCCGCGGCGGGCTGCGCACGAACAGGTACTGGATCTGGAA -ACAGCGGCAGGCTGAGGCGCAGGATGCTATTTGGGATGATCCGCGCGGATACTACTTCTG -TAACATCGTTGCTGTTAGCCCCGAGGCGCAGGGAATGGGTGTTGGGAAATTGTTGTTTGA -GGCTGTTACGAAAATAGCTGATGCCGAGGGGGTCAAGTGTTATCTCGAGAGCTCGAAGAG -TGTGCCCAATGTGGCGATCTATGAGCGTATGGGCTTTCACATGAGTAAGGAAATGGAGTG -TCGGGATGGGGTGGATGCTTGTATGGTATGTACCTTATGCTTATTTTCTGGTGTTTGGTC -CCGGGGGTGGGATTCGTTTGGGTTGCTGCTGACTTTGATTTTTCATAGTTGTACTGCATG -GTCCGGGATCCGAAAAAGTAGTGATTTTGCTGCTATTGCTTTTGCTTTTACGATTTAGGT -CGGATGTGTGGGCCAAATTGTGCGTATGTCATGGATTTGTCCACAGCGAGATCTGGAAAA -AAATGGGCACAGATGTCCTTTCATCAAGAGGTTTCACAAATCTAGAAACGAACTACGAGT -ACATCTTTCCTAGTAATCTTAGATATGGCCCCGTTGTAAATCAGCCCCAATGATCCGATC -CACTCATCTACTAATCTGCAAACAGGGAAAGATACCTCAAATCTGATCAAATCAATTCAA -TCTAACATTGCGAGAATAGAGTATCATACCGAAGAATGGTCTATGAGATTGCCATTTACA -CCAGTCCATCCGTTTGCAACGGTCCATCCAACCCGGGAAATGATCTATCGACCCTTCGAT -CGGATCTCTCCCCACGCTAGGATACTTGCATGTTGATCTGTTTATCTTACTGCACCCAGA -CAAGAGGGGCAATAGTCTCTGCATCCACATTTTAAGCTTACCCAGTCATGTTCAAGCTAA -TCTGCCGGGGTTCTAACGGCCTGGTTTTCCGGGGAATATCCGATCATCCTAGCTTCTAGA -TGATGTTTCAAGTCCAGTCTAGTTTGAACATTGCACCGACTGGTCGGGTGCTTGCTCGTT -GGAAATCCCTCATCGTGTCGAGGCTCGTGCGTTGATCGAGCACATATATATATATCTGAA -ATATGGAAGAAACCTGTAGATACCATGAACATCAGATCTCCAGCCAAAGATTTTGAAAAC -GACGCTTTTGGATAACTCGAATAAAAGCTTTCATTGTGACTGCTTTTTTGAGCTTGTGCG -TACTCCTCGTTGGGCATACTTGGACAATGTAAATGCTGACTTTCCTTTAAGTGTGCATAA -TTTTGATCACCATGTTTCTCCTTCTGCTACGAAAGCTATGGATCCGACGAAGAAGACCAG -ACACACATCAATCGTCCACCTCCTTAACTACGCCCAAAGAGGTTGAATCCCAAGATAAGG -AATCATTTATACCTGAAACAGTGACCCTCACACCACAACACGACGAAAGAGGCCAGCTCG -CCAATCAATTAGCTGCCCTTGCAGCTACCGTGAGCAATTCATCATACAACAGCACCTTAT -CATCGATTGCTGCACCCATAGTGGAATCTCAACCAGCAGCGCTGGTATCTAAGCCAGAAC -AGTTGGAACAACCAGAGCAACCAGAAGAAGAAGAAGAATCTTTCTACCATCTTTGTAGTT -TGATAACAGTCTCCTGCTTGCCGCAGCATATGTCAGGCCACGCATCAAAACCCTCAGCCT -CAGCACTAGATTCCAAACCAGGCAAACGCAGAAGTAAAATCTTTGACCGGAAGAGAGGGT -CGTCGATTCTGAGTTTACATATTGGTGATCTACCGAGCCTACCGTTTCCCTCGAAAGGGA -AAGTCCCATGCAGGCCTCGCCCGAGATATAGTCCTCTTCCTCCACCATTCGTGCAGAAGA -GGCTTCTTATCCGGCCCCAGGTACGTTTTTAGGAGCTCGGGTTTGGGTGGAGGGTGGACT -GTTTGGGCTTCTTTATAGCAATTTGTATTTGTTCTATCTTTGCTCTTGTCCTATGATATC -CACAGTGAAAATATTGTCTACTGTCTATCAGTTAATATTCATGTACTGCTGCTGGCCTGT -TTTTCTTAATGTTGTTATCCATCTCGGGTTCTAGGTGGAAGACTCACTCGTACATTTTGT -GCTCCTACCATCTTCGGACCTACATTGTATATCGTAGAGTGGTCAACCCGGCATATCTAA -TACGAATTATTCTTCCCATTCCTAGTTGCACTTTTTTACCTCATCATTGTTCTTCTAAGT -AACACCTCTCAAAGAGACACTCAAAGACATCCGAGTGATCTCTTGGATACAATATCCTCA -ATGTCGTTTAGGCTAAGTATATCAGGTAACTAGACTATAACTACGTAAGCCAAGACATAG -CATACACAATTTTTTTTCCTTACCATACACCTCGAGATCGATAATGACTTATAGTGCAGG -ACTACGTATGTAGTTGTTTACGCAACTGCGACGTAGTCCTATAGCCTGATAATTCGTACC -TCACCTACGCGTACCTCCACAGCACAGCTCTACTGTCTATCCCTCTGCCGCTTTACCAAA -GAAGAGGTACTATAACGTCCAAGCGCGTGATCTGCAGGGCGGCAGCCGCTAGCCTTGGAA -ACTTGTTTGCCCACGCTAACACGAAACGCATGTCCACACGCACCTCAGAGTCCCTGCATG -AAACAAAAGCCGGAATCCACTCCCGCGGAGTTGACTAGCAAAAATGGAGCAGATCATCGG -GTCTCCAAGGCTAATGACTCGGTGTTCATTTCCAAGCAATAACGTCCAGGCATTTGTCTT -AGTTCATGGGATCTGTTATATTGTACTTAACTCGATGGATAAATGGAGAAACCAATCGCG -TGACTGGTACTAATTGGATTTATACGGAGTACCACATAGATAGTTGTATCGATGAAATCG -TGGTTGGTTGTCTATGTTGTAGAAGGCTTTTCAGATGAGAACCAAGCTGTAGCATGTTTT -GTATCTTCTTGGATATACTACATATCTACTGGACTCAAGCAAGATGTCCATTTTTTTCTC -AGACGGCATAGACGCGAACGCCGATTGGTAGGGGGGAAGGGGCGAGAGTGACCTCGAGAT -TCAGACCGTCCATATATAACAAATACGTAATGTCGTTCGATTTTCCCAAGATATGGTGAT -CTGCAAGCTTCGCATCAGCTGTTTGGTTAGTCTATATGATGTACACGGTAGTATTTAACG -TTGTGGGCGGGCTAATCTCCGTGCTGGAGAAGACAATTGATGGTCAGTGATATTCGGACC -CGGCAGCGGCAAAACCTGAAACCAAAGGTTTCAGCTAGCTCAGATTGGTAGAAGTGCCGG -GACTCGCCCCCACTGCCGATCAATCCCTTTGTTCATTAATATTCTCGATCTTGATGTGGT -ACGTATCATGAGCACGTCCCTCCTAGAAGGGGTGGGTATGTAACCGGTTGTGGCTTCGGG -ACTCCACCCCCGAATATCGCTTGGTACGCACAAACTGTTGTCAAAGCCGCTGATGGTATT -GGCCTTTACTTTTTCTTGACTATATTGTATATGTTGTCGGACTCGATCATATTTCTTACG -GAGTACGGAGTACCAGAGCTGATCTTCCGCTTAATAAGATCCATAGCAGCAGATATCTTG -ACCATCACACAAAACGTGGAGTCGCAGATGAATGAAAACACGTGGTGTGTGGATTATGAC -CAGGTGATTAGGCTGCAGATGTTGGTCTATCAGAAACCAATCACCAATCGTTACCCCCTT -GGCCCTTCGAGGACAACCTCTCGTTGATATGGATTTATTTGCCAAGACACTATCTTTACG -GAGTACATGCATGAGAATCACCTAGTCTAATTGGAGAGCACAATCCAATCACAGGAGGGA -ATACTACAAATAAGGATCAAGAAAAGATGAATAGTCCCCACCGTAGAGATATATTAATGT -TTCGCTACAAGGAAACGATCCAATTAAAACTATTCAGTGATAACGTATGACTTGTAATAT -CAACGAATCGCATAGTTTCCGGCCCAACTGAGCTTGTCGCCCTAAAATCCGGCAAACCAA -GAACAAATGAAGTACAAGTGGGCGCAACCCGTGATATTCTACTTAAATGTTGGGATCCAG -GCATGTCGAGTCGACAGGTTTTCCCAGGGTCACTAAAACCAAGGCCATACCGGTCTAGAT -GCGCAGTTTCTTTCCTCGAAGGATGAGACCCCTGTCGGTGGCTCAATTGAGCACCAGCCT -CGTCCGTGGTTATCCGCACCGCGGCTGCTGGCGTTCTAAGCTATGGCGTTTATGAGCATG -TGAGAGAAAGAAGACACGTTCAGCAGAAACGTTTTTGGCATGACAGTCTAGGTATGAGCG -TATACAAGGACCATGTGTAGCATACCTTCACTTTATAGTCAGGCAGCCAAAACATAAATT -GCAGGCTCGGCACTCGGCAACTAAGACACCCGACAATAAGATAAATATATATGATACAAA -ACAACCTTGACCTTGGCTGCCATAGATCATGATCCGTGCCACTTACCCGATTCGGCTGTC -GGCATGGTTTCCATAATCTTCAGGGGTGGCATAACAAGACTTGGCTTTCGGTAAGTGTAC -AACGTACGTGGTACTTGTCCCGATCTGTACGAGATTAATCTCAGCCTGTTATTGGGTTCT -TCGATACAGCTTCCAATAAGCGCCCGACACGGCACACAAGCTCATTTATAATTTGTGATA -ATTGCCGGAAGCCAAGCAAACAATTAATGCAGAACAACGCAGCCCAACATTTATATTAAA -CACGGTCTCCTCCGTATGGAGGAACGCACAGACTAGGTGCGGAAGGGGAAAAAAAGGAAA -CTCTAATACATAGTCGGTTCATTTCCGGAAAAGCCTCGAACTAGCGTGAATTGGGAGAAA -TTGGGAAACATGCATGTGTGAAACGGTTTAGGCACATTTCCAGATTCCCGGGATTTGAAT -CCAATTGTGTGTCCGTACGGAGTACTCCGTATATGGTAAGTAATTACCTAGTGTTCTATG -CTCTAGATAGGCGATGCGACGCCCTGGTATCAATCCGCTGATTCAGCTACGGACCCGTGG -ACCGCCGGTCCTTGGAAGGCTAATTCGGAAGGCCATCTATCAGCCGTGGATCCACCACCT -CGGGTTCCTCGACAAAGCCCTAAGTTATGTATTTCTCAGGACCTTGGGTTCTGTATGGCA -TATCTTTTAGTTATACTGAACCACCCTAGGTCCCTTGCAAAAAGGTATGGCATGGCATCG -GGCATGAATCGAAATGTAATTTCAAGAAATGGAATGGAGTTACAGGATGGAAGGTGAAAT -TGGCGGCGTAGATGAAAAAGGAAGCTAACTTGAGAAAAGGATACAAGGAGCCAGTATAGA -ATCCCTGCAGGGGGTACACTGTATTCAATCTTGACTTCTTCAACCGGCGACCCCGGAGTG -GAGATTGCGTGTTCTTGGCCTTGTGTCGGGATTTCATGATTCCATGGATTATGCCCGGTA -ATATCGCTCAACAGATGACGAAAAATGAATTCCAATCTTTCATAGCTTCCGTGCGTCCTA -TCAGGAAAAAGCACGGACTCGATCGGGGAGAACATCCAACCGTTGTAAGCGGTCCAGCCG -AAGCTGCACGAGGGAATGTTCCGTCGATAGTCTGCTGCGGGAACTAGGTACATGAGTTCT -GCATTCAATGCATGGGACATAGGTACCTCGTCACTTGTTTGTCGTCATAGGTTGCTTTGA -TTTTGATTTTGGGGCGGTTTCCTTTCCGCACGGTATTGGGCAAGCCCCCCCTTTTCCCCC -TGGCCTAGACAAGGCGGAATGTGATGGAAGACAGAACAACCACGTCGCGCATGCAGCCCC -TGGAGACCTAAATTGAATGAATTGGATTTCCTATCTGTAAGGTTTTGGGTCCGAACGTTG -TTGAAGACCAGTCAGAGAAGTTGAAAAAAAAAAAATAATAATATCTTAACAAGCATGTGA -TTTAGGAACGACATTGAAAATTGGGTATGCTTGTACAATGTATTTCTCTCTCGTCTTCCT -CTACCTCGGAAGGAAACAACTCTCAGGTTGCTAAGCTGCAGCCAAATGAACTAAAACAAA -TAGATCCACGACTTTGAAGTGAAGTATAGGACAAAATTTCCAAATCGGGGAGTCAGGACC -CCAAAACCAGAACGGTTCACTCGTCTAAATCTCATATATGAACCCACCAGACACCCAAAC -AAGTTACCAGACGCTAGAAAAGAACAGCATGGTAAAAGAAATACAATGAGATCGGAAAAC -ATCAACGCTTGCAAAGCCAGTAAACATCAAGATATAGCCAATCATTCGAAGTATCGGAAA -AAAAAGAAAACGTAGTCGACCGCGAAGCAAAGTTTAAACCAGCACTCTGCCGCACAAATT -CAGGTAAATCAGAATTTTAGAAAATTGTCGATCATGGTCACGTAAATGGGATTTGAAATG -TTTCTCAGAGAGCCGCGTTTGTCGGTGGAGGGTCGGCTGTACCCGAATAGTTCCTGATGA -CGGAGTCTTCCATGCCAAGAACGGACAGGAGAAAGCGGGATGGGAAACTTTAGAAAATGT -TCTTGACCGAGTTTCAGATGTGGAAATTTTATTTCGAATCAAGTCGTCAAAAAGAGCCGT -CACCGAGAGCAAAGTACACATTTTCCGACTACCACAATGAAGCGGTCGTGTGGACTGTCG -TTGCAAGTGCTATTGTCTTTCGTTCCCTCCATTTCGGTTCGGAGGAAGTGAGCGGCGGGA -TTGGAACAGGTGGAGTATATATAAAAGTTTTTGAAGAAGTTGCACATTTCGATCCCACGG -GAGAGGTGTGAAAGGGAATGTGAGATTTCAATATAGAGATATACAGAGATAGATAGATAT -TTATAGAGAAAAAAAAAGATATTCGATGAGATATGTaaataaaaaatatggaaaaacata -aaacataaaaaaaagaaaTTCCTTTCCATTAAGGATAAGAATGAGAGTATACTGCACGGC -ATCCCCCACCATCCCCACCCCTAAAAACCCTCAAAGCGGGTCCTGCATAAGGGCTTTAAT -TTTGACCGTTGAGACCTTTTtttcgttttgttacgtttcgttacgcttttttttttcgtt -tTCTTGTGTATTTTCTAATTTCAGTATGTTTCATCTTATTCGTAAACAGAAAGACATCGG -GTAGATGGGAGATCGACACGGTTCCTCGCAGCCGTTGATGCTTCTTCAATTCACACGTGG -CGCTAGTCGGGATCTTCATGCATGAGACAAATACCTTCGGCAGTCTCTTCAACTTTGAAA -ACTGAAGCCAAATACCAGAACCGTGGGGTTAGttctttttttcttcttcttcttATGACC -CTTCAATTGATTCAAGAGTCAGACGAACACGGAGAATTCCTTTCGTTGAAGAGTCTGGTG -ACGATAGTAACATTGTGCAAGCTTACCAGTAAAGTTGTTGGAAAAAAAAAAACCGGGATA -AGGACGCGTTGGGCAGTCGGAGCCGGCGATAGGCTCTAGCATCAAGCCGAGGGGGGTTTC -TTATTTTGGGGGGAAAGGGGGGGGGTAAGGAAATAAGTAAGAAGCTTCGTCTCTTAGCAT -CGATACCCATCTTGATACCAAGATCTGTCCAGATTTGGAGGTTGGAAGGCGCATACTGTA -TAGGATATGTAACTTCATAAATTACCACGAAAGATTATGTATTCGAGTATCAGACCATGA -GTCGAGTACACAACCATCATAGAAGTCTTGGCAACGAAATCCGACAAACTGTTCTAGGAC -ATGGCCAAATTAGGGTCGTTCAACGTAGTAGCAATGATGGCAGCAGACCAAAAGATCGGA -GAGCTTGTACCCGATCTGGAGTATACGGGAAATCGTTTTCCGTCGCTCTTGTAACATCTA -TATTGTCTACGAATAAGAAAAAGGTCAAAAGAAAAGGGGGAATGTATATCGGGTCAGAGC -TTTTTGTTTTACTTCAGGATCGAATACTCCAGCAGCCATGCCCCATAGATGAAACCTCAT -GTGATGAACACCTTTCATTTTATTCCGTTCCGAGACTCTGGAAGCTGATCATCAGGCCAT -CTACATTCTACTCATTTTTTGCCCACAAAGGCCATTTATTGGGGGCAACGATTCCGTTAA -ATTCATCCCTTCTGCAGGATTGCGTCGTACCCGTAGAGACAATTTGCCTAACTGGGCAGC -TGGTCTGGCGAACAAATGATTGAAAGAACTTGCTCTTCTTGAAGTGGATTGTATGTCAGG -AGGCCTTGAATTAGCAAGCCCCAGTCGCAAAACAGGCGATGGTGCGACAGTGCCCCAGTT -GTAGGGTGTGGAGACAAGAGGTCAGTCCACATACCTGAGCAAGTCTAATGTATACAACTG -TGATTGTACTCGGCAGTAGTCGGGATTACAGAAGTAAAAAGAAATCAAGGAATAGAAAAA -ATAAAAAAAAAACAAGGCCAGTAAAATACAGGCTAAGATAACCCCCAACTTTCCCAAGGT -TAGAATGGCCAATAAGAATGGCCAGCATGGCACTATGCCCATCAACTTTGGAGACTGAAA -ACGTAGATATAGTACTTTGCCGTAGTATCCGGGTATAGCCTAAACATTGCCTAGTCTGAT -ATTTAAATTGGGTAGAACTGGTATGATGGAATTGGTATGTTAGTGTGCCTTGACCCTTAA -TGTATCATATAGTGCGTTTTAATAGTTCGGCTTGGGCCGAGCTCGGACCCGAGTCACATG -AGATCTCTTTGAGACCATTCCCCTTTGGATCTAGCGTTTCCGATCGATGTCCGAATGACT -AAGGCCTCTAAGAGCTCCCGAGGTCCCCATCTCCAGGGCTGGTCCTAGCAACAACGCTAA -ACCTACAAAGGAAATTCGAATCCCGTGATCTTGAAAGCGAAAACTGAGACACTTCCCCCT -CGCATCAACGATTCACATAGGGGTGATCTGACCTTCGACATTCGCTCTTCAGGGCTATCA -GCCGATCGGAATAGTGCAGCCGGGGTGCAGAAAGAGCATTCATTGGGGACCCGAGTTACA -TACCGTCATTCCCTCCACCGTCCCGTCGTTGTGGAGACCAGCCGTCCACGCTCGACATTT -CACCGCAACTACGGAGTAGGCCTCTGCCTGTTTGCTGTCGCACTGCTCAACATCTGAGTA -GACATCATGGGGTTCGGTCAGTTCGACACGATATGTGAGAAGGCACCTGTGCCTTTGTGC -TCCTTGGTGGGCCCGGCATCGACAATCTCCGGGTCAACAGGCATCATATCCAACTGCTAT -GCGCGTAATATTGAGCTTGCGAACACCATAATCTTCCAGGGTGCCGCGTCATTCATGCAT -ATAATAGCGCTTAGCATGACAGTGATCATGATCTTGCACGTTCGGTCCAAGTTTACTGCC -GTCGGTTCGTCTCGACTCCCTCTCCGCTCATACTCGGCGCAGACTGACTAATCTTTTCAC -TATCGATAGGCCGCAAAGAGATCATCACCTTCTTCTATATCTACTTAGCTCTGACACTGT -TCTCTCTTCTCGTTGATGCAGGTGTCGTTCCGCCAGGGAGCGACCCGTTCCCGTACTTCG -TGGCTGTTCAAAATGGCTTAACATCTGCGCTATGCACCTGTCTTCTCGTCAACGGATTCG -TCGGCTTCCAGCTGTACGAAGATGGTACAGCCCTCTCCGTGTGGTTGATTCGAGTCCCCT -CTGCAGCAATGTTCGCTCTCTCGTTTGTGATATCGCTGGCGACTTTCAAGTCGTGGGGCT -CGATGGGTCCCCAAAACACCACGGGCTTGTTCGTGGTGCTATACCTGCTGAACGCCATCT -CTATCGCTGTGTACCTGGCCATGCAATTGCTCCTAGTTGCAAACACTCTGGATGACCGCT -GGCCGCTAGGCCATATCTCCTTTGGTGTGCTGGTCTTCATCATTGGCCAGGTTCTTTTGT -ATCAGTTCAGCGACGTTGTCTGTAACAACGTGCAGCATTACCTGGACGGGCTCTTCTTCG -CTACGTTCTGCAATCTCCTCGCTGTCATGATGATATACAAGGTACGTTCGTGTTCCGTGC -TTGTAGGCTTGATTTTGCTGACCTTCTTTTAGTTCTGGGACTCAATTACCAAGGAAGATC -TGGAGTTCTCCGTCGGTGTCAAGCCAAATACATGGGAGGTTAAGGAACTTCTGTCTGAGG -AAGATCGCCGAGCTACGGTCTACGCCGACAATAACTCCGAATATGCAGGCAGCATGTACC -ACCATCGCTCTTCGACGTACGGTCACCATAATTATTGAGCCTTGTTCATTTTCTTCTAGC -TGCTCCATTCATATATCTTGTTTATTTCCGTTTCATTCTAGCGCTGTATATGTGACTATT -GACTTCATGATATCTGATTTATCTCTTTTTTGTGGATATTCAAATGCTTCCACAACATTG -GCGCCCTAAAGGCATCTTCCCGCACGGATATATACCTCGGCATCACATAATAATTCCACA -ATTTGTCAATTATTGGTCGGACAATCATTGGAAGATATTTTTGCTTTTTACTTTTCTAAT -TGAGTTTTGGCACCAAGTGGATTCATGTAGGGAGGACTACTAAGTTGTGCCATTTACCAC -TAGCGATCGGATACGACATACGAGAACCGGGTGCCTTGGAAGGTTCCGTTTTGAACTCAA -TTTACGTTGCTGGTGTCTCAATCACACGTGCATACCGCGCTTTGAAGGGAAGGTCCATAT -ATAATATTATTTCTGTTGTATATCTCGGATATCATGCTCCGTTTCGACCCAGCGCTAAGT -TCGTTTGGGAAGAAGTTGGCGTCCATTTTATTCCTCTGCAGGCCATTTTCCCAGGCGCTA -GCTCAGAGCTAGACTATGTAGCAAGGAGTGGCAGGCATAATGTAAGAAAGGCAAGAAAAC -TTGAAAGTGTTAGCATTCATCCGTCACATTTTGCGGCGATGCAAAAACGGCCCGGGCGGA -CCCCGAATATCTCGTGGGCCGCGTCACATTAGTCCAAGGGTGTGATCACTAGGCAGGACG -ATCATCGGGAGAGTGAGACAAGTGGTACATAGGATCAGCACTGCTGTGCTCCCCAACGAA -AAGAAAAACCCACTGAGCCACGGGCATGACTGCTTAGATTGGCAAAGCGGAAGTAATACG -GCGGCGACGCCTTGTTCTGGGCGTTGCGGGACGATCAAGGCCCATTGGCGTTTTTGTTGC -CTGACGAGAATGAACCTCGATACCACGGTAATCGTGGTTTTGCGTATACTATTCGCTGTG -TGCTAAGGTGTAGTGCGCGCGGACCGAAGTCTAAATAAGTGGTCTGCCGATCAGGGAGGG -GCCCCATGGGTAGAGTTCTTGTAGAACGTTCTAAAGCGATAATGAAAACAAGGGGCAGTT -GTGGAGCGCCATGTTTGAAGGATCGGGGGTTGCGATCTCAGTCAACGACGACAACGTCAT -CGCCATCCATGAAGTCCCTGTCTGTTTTGGGGCTTGTGTCCCGTGTCGTATGTGGGTGCT -TGATATCGTCTTTCTTGGCTGGTTCGGTGGACGAATCATACCAGCCTTTGTCTATCGACT -TAACCAAGACCGCGCACGCTTCCCTAAGCATCTCGAAAAACGCAGGATTCTGTTTGGATA -GAAACATGGTAATATGTGCGTTGGTTTCGTCAGTAGCCTGGTCATTAACTGCCCTGGTCC -ACAGAAGTCCTGGACTTCGAGATTCCTCGCCACGTTCCACTTCTTCTGGCAATGAACAGA -ACGTCCTATTCCTTGACCTGATAATTTTTTGACTGAAGGATGTCTTGTTTGCTGATGTGT -TTTCCACCAGACTTGTCGCACCTTTTCCGAGACATGTGTAGAAGTTCACACACCCGATTC -CACGTTCCTGCTTGACATTCGATAATTGCACAATACGCTGGCGCAGTTCTGACGTGCGCG -CGAGACAACCGACGAACTCAAGGTGCGATGATACCCATCCAAACCCGTCAGTGAGCTTCT -GGCGCTGCGTATACATAGCTGCACCGCCAGCGGCAACAGCACCAGCCGCGCCGGCGAACA -TGGCATATTTACCCCATCGTTGCCAAGAGGGCGTCGCAGCGGCGTCGGGTGTGGAAGACA -ACGCTTTGGAGCGCTCTTGTGGAGGTGCATTCGTGCTCCCCTGATTGGATGCATTGCCCT -TCCCACCGTAACCGAATAATCCTGCAACCTCGGAGAAGGTGTTGTAGGCAGTTGTAGCAT -TGCGGTAGTGTCCTTCTGCACCGAATGACACGACTCCTGGCGCTATTCCCAAAAATGGGG -TATCAAATGCAAGCACTCCTTGTATATGAGGGAACATGAAAGTACCCGGTTCGACGATAG -ATTCGGTGCCATCTGCAACCTCGGACTTTGAGTTTGAAGTCGAATTTCGAAGAATTGGTT -GCTCCGCTGCCAACAAAAGAAGAGCTTCTGCTCCTACTATTCCACCCATTGAGTGACCGA -TCAGAACGACATGGACTGAGGGATCGACGGTGGGCGATGCGGTATGGTTGGCGACTTCGA -GATCAATCACAGTATCTTGTAACCTGCATAAGAGAAATATGATCCGTCAGGAAAGCTTGT -ATTTAAATCAAGGAATGGACCAGATCCTACCAATCTCGCAATCTTGCCACACATTCTTTC -AAGTCACCCTTAGTCTCATACTTGGGGTAGACGGCTGTGGCGACCTCGACCGCAGGAAGT -GCTCTGCTTGCGAGAACACGAAGATGCTCAGGGAAGGTGCCAAAAGTATCATCGCCACCC -TAAAATCGAAGTTAGACAATGTAGGTTTCAAGCCGAAAACGATCTTCTTGAGCCGAAATA -TGTAATTCGTTGCGGAACCAGGTCTCCAGCAATTTGACATACCTTGAAGCCGTGAATAAA -GACGAGTAGTAGGGTCTTCCTCATTTTGAAATTCTCAGACAGCCATTTCATAAAATCAAC -ATGGGTAGTGATTCGACCAAGCGGAACAATAAGAGCTAAGTCTCAGGGCAGTTGATTCAA -ATTACGTCATGCTTAGCCGGGTGCATCCACGTGACTGGAAGATCTATACAAATATTCGAA -ATTTCGAAATCTTTCGCGAGTTTTCGAGGTCCGAATTTATATAGAATTGTGGATTTTAAT -GTGGTAGGAGGTAATTTCCTTCATCGAGTTGGAATTCTTGTTCATTTGACTGTTCCATGC -TACAATATAGCTATCCCGAAAGACCACACGGACCCAAACGCTACATCCCATCTCATAAAC -TCCAATTTAACCCGGATATCCTGCATATTTGTCTCATTCCCAACGCCGTATTTAGACAAC -GCTTTCTAATTGGTGGCTGGGCCATAGAGTTGATCTCAATAATGACCAGCTCGATGTCGA -ATTCAGTGTGACACGGCACATGCGGTGACATGACCAGACCGCAAGCTCTCCATGCCACTG -TTGGTGTTCCCGATAGCATTGCCCGTTGCTGAAGGGACGAAAGGCTCCGTTGTATTTGGT -GGTGGTATGGTTGTCCCGGGAGCGGTGCTAACAAGTCCCTTGGACACAGCAAAGCAGGAA -GAGCGGTCATGGTTGAGATTCTCGCCATAGAATTCATCAAGAATTGGGTGAAGCATAACA -GTCTGCTTGTGAATGGTGACAACTGAGTTAGTTGGGACTGATACCCAATTGTCTATGTAT -AAAATTAGCACATGTTGATAGGTATGGAGAATGGGGAAAAGGCTTCTCACGTCGTTCAAA -GGTGATGGGTTCACTGGCCACAAGAACGATATCGGCACCTTTATCGTGTCGCTCCATCTT -AAAGTGTCCTTTTGTTTCACCTTCCTTCCATTTGGTACCGGAAGAGAAATACAAGCTGGC -CGGTTCGTCTGTTTTGCTGCTGATGTACCGAGTGCAGACAACGGTATGACCGTCTGTCAC -AGCGAAGTTCAACAAACTCCGAGTCTCAACTCCAGTAACATTATATTTTTCTGGAATTTC -CTGGACCATTTCATTGATTCTGGCAATGGTTTTCATCAAAACACGACGTAGAAGAGCTTG -TCCGAATCCCTCAGGACCGGGGTCAGAGCTTGGATCTACCCCCTCTTTTTCTAGAAGGTC -AAGGAACAGGGCAAAAGCCCATTCACTGTCGGTGCCACCTTTGACACCCAAGTACCACTT -GTCGGCGAGTGAGCTTCCCAGCGTACGCTTGATGTAGCTCCACCCGCCGATGCCGCCATT -GTGCATCCACATCAGGGATTGGTGCTGGAATGGGTGGCAATTGTTTTCGGCAAGAGCACC -CTCAGTTGTTGCGCGCACATGTGCAAAAATCAGATCTGAGCATGTCTTCGCAGCGAGTCG -CTCTAGATTCTCACAGTTCCAGGCTGGCAGGGTTGAGGTAAAGATGCAAGGCTCCGGTCC -AAGCTTGGGATCTGTGTAGAAGCCCACACCAAACCCATCACCGTTGACAGGACGGCGATT -ATCCTTGATATTGGATGTCAGTGGGTTGCGACATGCTAACCACGTGGATCTGGTCTAGCG -AGATGGGACAACCTACCAGTCTCAGGCGTGAATCGTATGATTGAGTAAGGATTGAATGGC -TTGGCTCAGTTATCAGCTTACTGAGACGGATCTCATTACGCCCTTTGTATATCTGATCAA -GGTATTAGCTCCAAGCCCAAAGCTTGGAAAGATGGTCCTTACCAGGAATCGACACATATT -TTGCCTAATCAAATGCAATTGAACTGACACTGCATAAAATAACTCTTGTAACTGTATCCG -AAACTTGTAAGGTGTATACCAAGTTTGTCATTCGAAAAAATCGTACTCGTGATCAAAGAA -TGTATCAATAGGCTCGATTTCGTGGGGTTCGGGATGAAGAGTCACCGACTGTTGGGAACC -GGTGAGATGCCCAAACGTCAACCACGATTTTACGGCCCAATGATAATAATACCAGGATGT -AATTGTGCCACGAATTACCAAGAGAGAGGAATTGTTTTATCAGAATCAACACAAAAGAAT -AGAGTCACTTCCCTTGCACAGCACGTCATGCTTCACGTGACTGTGATATTACGGGAGACT -ATGCGGCTACTTGTGCAAGTCACGGGAACTGTTACATAAGTTATGCCTGATGTCAGTGAT -CCGGTAATCTACAGGGCCATCCCATTTATTTTCATCGTCTATTGAATGATACCACAACAA -TAATGAGTGGGAGGATTCTCTCCCATCGACTAGGACCGCTATTGCGGAACGGTCACCTTA -CTCGCCATATCCATAATGCAGGCTCAAGGACTGGCGGCCTGCTTCGGGCAGATGGTGGCG -CTACTCTCAGAAGACGCGCATGGCCAGTGGGCGCAAATGCCATCCACAATGTCCCTGCAG -TTCGATCAATCTCATTTGCTCGAATGCTTCCGAAGCTAGCATTGAAGCTTGTCAAGGTTC -CGGCTATGTTCGGTGGTGCGACAATCGCGGGTCTAGCTTATTTCCAATACCAAGCAACTC -GTACGTCTCTCGTGCATAAAGCTCGCTACAAACGGAATTTCAAGCTAATTTCATCTCACA -GAGGCCGGTAACTATGCGATGGATGTTTTCAAGCAAGCTTCGGAGACTGCAGGGAACACA -GCTTCGACCTTGTTCCAAGGATTGCAAGACATGGCGGATCAGACACAACGCGGGTGGAAC -AAGACGACCGAAGATATCGATATGCCTGACTGGCTGCAGAAAATTCTACGTCTCGACGAA -GAGTCTAAGTCCGGAAAGGGCGATTCATCTGGTGGTGGAGGCAAAGAGCCGAAACAAAGT -CGAgctggtactgctgctgctgccgctactACGACGGCGTTTGGATACAATCAATCGGGA -GAGGAAGACTTACGACCGGGGAGAGATGCTGCGGAGGATGATCAAATGATGCTGCTTACA -CGCAAAATGATCGAAATCCGAAATATTCTCCAGACCATCGGCCAGTCTGGCACGCTCACT -CTTCCGTCGATCGTCGTGATTGGGTCCCAATCCTCGGGTAAAAGCTCTGTTCTTGAGGCT -ATAGTCGGCCACGAGTTCTTGCCCAAGGGCTCGAACATGGTGACTCGACGGCCAATTGAG -CTCACGCTGGTTAACACTCCCAACGCTCAAGCAGAATACGGTGAATTCCCGGCCCTGGGT -CTTGGAAAGATCACGGATTTCTCTCAGGTTCAGCGCACACTTACAGATCTGAACCTTGCT -GTTCCTGAAAAGGATTGTGTCACAGACGATCCCATTCAGCTTACGATATACTCTCCCAAC -GTTCCGGATCTGTCTTTAATCGACCTGCCAGGGTATATCCAGGTTGCGGGCCGAGATCAG -CCCCCAGAGCTCAAACAAAAGATTGCTGATCTTTGCGATAAATACATTCAGCCACCAAAT -GTCATTTTGGCGATCTCAGCCGCCGACGTTGACTTAGCAAACTCGACGGCACTTCGAGCC -AGTCGTCGAGTAGACCCTCGGGGCGAACGGACTATCGGTGTAATTACAAAGATGGACCTT -GTCAGCCCCGAACGTGGTCACGCCGTCCTTTCCGATAAAAAATACCCTCTTCGGCTAGGT -TACGTGGGTGTTGTTACTCGGATTCCTCAGACAACAGCTCTCTTTTCTAGAGGCTCTGGA -AACATCACCAGCGCCATTCTCAAGAATGAACATGGATACTTCTCTGCTCATCAGAATGAA -TTCGGTCCCCACTCTGACGTCTCAGTTGGTGTTTCGACACTGCGAAAAACGCTGATGCAG -GTTCTTGAACAGACAATGTCATCTAGCCTGACTGGAACCAGAGACGCCATCAGCCAAGAG -CTTGAAGAGGCAACCTATGAGTTCAAGGTTCAATATAATGACCGGCCTTTGAGCGCGGAG -TCTCACTTGGCTGAGAGCCTCGATGCGTTCAAGCACTCTTTCAAGGCGTTCGCGGAAAGC -TTCGGGCGACCGCAGGTACGGGAAATGCTCAAGGCAGAATTAGACCAGCGGGTCATGGAC -ATTCTGGCGCAGCGCTACTGGAACAAGCCAGTCGATGACTTGGATCCTCCACCGCTGGAA -TTAGATCCCCTCAAGGATCTCCCCAAAGCCGATGCCGAGTCGCTATATTGGCACCGGAAG -CTGGATGCTTCTACCTCGTCATTGACAAAGCTTGGAATTGGTCGATTGGCTACGACTGTT -GTCGCCAATGCTTTGAACAACCATGTAGACTCGCTTCTAGCCTCCTCTACATTCGCCTCT -CACCCTGGCGCTCACAAACAGATCACCGATGCATGCACCAGTATCCTGAATGACAAATTT -TTCAGCACCAGCGATCAAGTTGAAAATTGCATCAAGCCATATAAGTTTGAGATCGAAATC -GAAGATCCTGAATGGGCTAAAGGTAGAGACAATGTGGGCAAAGTTCTGAAGGAAGAGCTC -AAGTCGTGCGAGTCGGCCTTGAAACATGTGGAGGACACTGTCGGAAAGAGAAAACTCAAA -GATGTCATCTCCTTTATCGACAGGGTGCGGAAGGGCGAAGTAGTACTAGAGGGCAACGGC -GAGGGTGGTGCGGGTGGCTTCAGTGCCTCCTTGTTGGAGAGAGGTTGGTCTCTTCCTCTT -TCTTCTATTTTCAAATACTGACATCTATTATCTAGGACGAGAAGGTCTCTTCCTTAGGGA -CCGAGCGGATCTCATCAGGATGAGAATTCTCGCTGTCCGTTCAAAACAATGCGCAACGCA -AAAGAACAAGTACCACTGCCCAGAGGTCTTCCTGGATGTTGTGGCAGACAAACTCACCTC -CACGGCCGTGCTATTCCTCAATGTCGAGCTCTTATCTGAGTTTTACTATAATTTCCCCCG -TGAGCTTGATTCAAGACTTGGACGGCACCTTTCAGACGAGGAGGTCGAACGCTTTGCCCG -CGAGGATCCTCGAATCCGCCGTCACTTGGACATCATCCGTAAGAAGGAGCTGCTGGAGCT -GGCTCTGCAGAAGATCGAAGGCATCCGACAACTTGACGGCCGCACTCGGAAATCAGAGCG -TGGGCCTCCTGGAAAAGAAACGCGGAGCCGTTGGAACCTTTTTTAGAAGAAATTATGTTT -TTTCCAGAGTCCTTCCCCTGTGCGAAATGATCTGAATTTTCGATACCTCTGCAAGTGTTC -TCATCACGCTTTTCATCCTGTTTAGCCATCTCCCCATTCACTCTTCTTCTTGGGATACTA -GTACGCATCTCTACATGTTTTGAAGTCAATGTACATAGCTCTAGCCCATTCAACACACAA -TTGATAATCTATTCCTTATATCTATGGCCTTCAAATGGCTCAGTATTCTTGATTTGTAGA -AATCCTAGCTACTAGGGAATTTGGTCAGTAGGTACACGGCCCACATATAAACATGGCAGG -CAACCAACCCGTCATCGCAAATGAGGCTTCCTATTACAGAGTCAAATTACACGTACCCAA -TCCACCCCAAGGACAGTGCTCTCATCCCCAGTCGGTGATGATCTCCTTGATCTCCTTCAG -ACCACGCAGGATGCGTACCAGGCTGTCACCATCGTGATCCCAGCTAGTACCTGCCGTTTG -ATTCGAACATAGTGTCACACGAATATTGAGACACGAACACCGGCACGCAATTGCTGAGGC -CATACTCAATCATGCACCTTTGGGCACCGTGTTTCCCCTTCCATTCATCATAGTCTAGTG -CCACTGTATCACTCGAAAATCTAAGAGGGTCAGTAGATGCCCACTGTTATTGTCGGCTTT -GGCTTTGGTAGGTGGAAAGCGATATACCCGTGTTGGTTTCGTGCCGTTCTGAGGCCTTGG -TCGCCAACGTGGACTCCCTGGTGGAGGCTCTGGTTCATCGAAAAAGATAGTCGGTGGTAC -TAGACCAGCAAGCCTTCTAGATCGGCGAGGCTTCCAACTAGGTTCGTCTTGTTCATAACA -GATTCAGGTCAAAATCACCAGAAAATTGATTCTTTGATTTTGGAAGCTTCTTTTTTAGAG -GCCATTTTCCTAACATAGCTGGAGACAGAGGGAAATCTAGGCAAGCTTTTATGGCAGGGC -TAACCGTCGATAAAGGAATCCTTTCTACGTGAGTGATATGTCTGAAACAAAGGAATTATG -TTTATAAAAAGATTTGGGATGGTTTGATGCTTTGCTCATCCTTCTTTCATAGTGGCCCTT -TCTGTGAACACACGGTTGTTGATATTGACAGCAGTCGGATAGATCTATAATTATATTGGA -TCTTCATGTTTCAAGATATTCAATGGTCTTGGCCTTTAAGAATCGACGCTATTTTAAAAT -CTATGGGGCCTTTCATGAAAACTAATATGTCCTAGTAAAGGAACAACTCCCCAATAGGTG -AGTATTTAGACAGCGACGTGTCCACCAAGCTACATGAACGATCACAATCGCAATGTCAAC -AACCTTGATAACTCCTTCGGCCGAGGCATTTTTGCCGGCTTTCAATGGCCAACTGAACTT -GAAAGATGCATGCACTTGGAGCCTAACACAGGTGAGTTGCTGCAGCTGTAGACTTCTTCA -GTTTTGCCAGAGGATAGCTAAGCTAATTATGTGCAGATGAAAGCTTTTATGCTCGGCCTT -ATAAATAGCCCTTCTACCCACGGGGTAGATGAGAACGTCATAGGGGAAATGAGAAGACTG -CGTACCGATATTTTCGATCTTTGGTGAGTTGGATGTCCACTAGACCCTATGTTCTGTCAA -CTGATTGGATTCTTGCAGCGAAAATGGCAGCAAATATCTGTCTGAGGCCTCCCACCCCCA -ATCAATTTCGAGGATAGTTGACCCAAACCATAGCAAGGATACATCATACTTCTTGTTGGA -TAGCATGCCATATAGAGGTATTCCATATTGGTGCTGCTATGACCTTCTAGGTAGGTTCCT -TTCCGTTTTTAGCCCAGCACCCGAAGGTGCCACACAGGACAACTTCTACCTCTGACAACC -ATGTACGCCAAATGGTGTATCAAGATGGCTCCGTATTCCCCATGGACGTATGCGTGCGTT -TGGGCGGAGGAAAAGGGTAGACAGAGTCATTTTCACTTGGGAGGATCGTTCGCGGGATAT -CGCAATCCGGAAGGGCAGAGCAACTACTGGGTAGAAATCCTGCGAAAAGCCCGTTTTGAT -GTTCTTCGTGATGCAAGAATTGACACAGCGAATATCACGATTTTTGATTCAACTTACCTA -ACTCGAGGAACCAATATTGTTCCTTTCGGAAACTGCGCAGAAACATATCCTCTAGTGCAT -ATTTTAAGGTGAGTCTATGCCTTGAGATCTTATACGTTCCATACTAACGAACCAGGAATC -ATGGCCCTAGAGAAGAGGTCTATTGCCTTGCATTAAAGCGTAAGGATCTATCTCAAGACG -CATACAATCATCAGGAAGCATTGCAGGCATTGGTTGGACCTTGTGTTAATTGCGAAAAGG -TGATAATCGCCTGGGGTGGAAAGTTAGAGAACTTCACAGATAATATTACAGAGAAGAGTA -CCGCTCCATCTTCGAGGGTCCAAGATCCTGCAGCAGAAGGTCTTAAGCCAGCATCTGAAT -CGAACATCGCTTTATCGGTCCATGCTAAGCGGCCTACTGCAGCGGATGGCGCCGATCAAC -CTGAGGCAAGGAGCACGAGGGGGTTAACACCTGTAATTCCTTTGATTCGAACAGATAGCA -CTGATCAACTCACGGTAAAGAGGACAAGAGGGCTGACGCCTGTGATTCCTTTGATTCCAA -CTGGCGACACTGATCAACCTGAGGTAAAGAGGACGAGGGGACCAACGCCTGTGATCCCTT -TGAAGCCACCGGCTCATCTAGAGAGAAAAAAACAGAGAGTAGTGACGATATAGGGGCTTG -TAGAGTTTAAAAAGCTCTTGTTTTAAAATGTATAGACGTAAAATTTTGCGAGGCTCGAAT -ATAAGAAACAGATGCGACTACCCGGAAAGAATACCTAGTATTGTCAATATTATTAGTAGA -CTCCAATGATCGAGCTCTTATACCATACTAACTCTTTTGCCTTCAATATCCACCAAAGGG -ACAGAGCCCCCAGAGATATACAAACAGTTCTATGTAGTTAGACTGTGTCTACATAAGTTA -ATTACCCCGCTGAACTAGAATTCCctcattgcttattgctgattgcttattgtgtttcct -CCAAGCGCTTAGTGTCTGAAAAATGGGTCTAGACCATTTCCCCGAAGAACTCATTGAGGT -TATTCTCATATTGGTAGATGATGAGCAGACACTCCTACTCGCCCAACGAGTCTGTCGACG -CTGGGCTCGGTGCATCCAAGCCTCCCGAGCACTCCAGCAAGCCCTATTCTACCGGCCTTT -ACAACCCGCCCTCCCAAACACGACCATCCCACAACGAAATTCCTTACTAGCAGCAAAGTT -CCCATACTGGTTCCCCAATGACGACCAGGCATCCCGCAATAGCTTATTTGGGGCTGGAGA -TATGCGAGAGTTTCTAGATAAATCCGCCGCATGGCTCCATCCAAGCGCAAGTTGGCGCCG -AATGCTGGTCCAACAACCCCCGGCAATGTCGCTGGGCTGGATAAGCCGGCAAGAGACCCC -TATCGGCCATATATCTCTGCAACAGTGGGAACTTTCATTGGAAGAGTATGGTGGGCTTCG -TATGAACATGCTTTACGATCTAGTGGTCAAGTCATCCGATGACGCGGCAGCGTTCTTTTA -TTGGAGGATGCATTGGAGCTATCCTGGCCTGGAAAGCTATTCTATCAGCTGTGATCCGGA -TATCCAGATGGACAGGGTAGACTACTACCTCGAAGCAATGATGTCGGCCAATGCGCAGAC -GGATATCGTGCTTGATACGTGGAACACGGCGCACCTTGGATCTCACATTCGGCCGGTGTT -TGGTAAGGATacttggacgtgggacttggtactaggacttcgaTCGCCGATGGGAGATGT -GAATCTTGATGTGGTCTATGGGTTGAGACAAATCAAGGAGTTGCGGAGCTGGGAAGTGGG -CCTGGCGAGCTGGTATATGCACGAAATGCACGGGGATGGATGTGGATACTCTCCTATGGA -TGATCCAGATCTATAATGTCGAGCCGCGGAATCGAAGATGATGTCCAGCGAGCAAGAGGT -GAAATCCACACATGCAGTGTGTTTTCAGCCGCACTATCCACTATCCAATGAGTAAAAATT -ATTCACAGATGGAAATAACAGTTCAATACAACAATCCCAGACCCAGTCTATATATTCCGA -TAGCGTACAGTACAGTCTAAATAGAAACCAAAAGCGAAACAAAGAGGACGCGAATTAATG -CTTTTGTCTCTTGATGGAACCAATCGCAGCACCAACCGTGAGTCATCAGGAAAAGACGGG -AAATGGATGTCTATCAAGGTCACAGGTCCAAAAAAAAATAAAAAATCAAAGACAATGATA -GTAGGGGGGTGGCATAAGGGGGGCGTAGCGGAACACATTATACGGTCAATGTGTCAAGGG -AGGCATTGGATAACCAAAAAGGAAATGCAGTATGGATGGCAATATTACAGGAGATCATTG -AACCCGGGTGAAGATATTTACTCCCGTGAGACATCACGGCCGAAGAGACGAGCCTTGACC -CAGTCGACGCAGACAAGGACACGGTTGCGAGCTAGATTATATGTTAATAATGAATCTTGA -TCAATGGTAGGGATAGAACTTGGAACTTACTGCTGAAGCACATGCTCAGGTAGGCACTGC -GCCAGAACAGGTAGGTCATGGTACCACCGCTGGCGATGTTGCCGCTCAGCCAGCTGATAT -CGGCCACAGCACGTTCCTTGCCAATGTATGCCAAGGAGCCCTGGTGCGAGTACTGGAAGG -GACCGATCTGCTTGTTGCGGCGGAGCTGCTTCTGGCGCTCACGGATTTCGTCGAAGATCT -GGTTGCGCTCCTCGTCGCCCTTGGCCTGGGACTGACGCTCTGAGAGGGTCTCTAGCTCCT -TCTCGAGAGCCTCAGTCTTGGCCATGGTGTTGAAGAGGCGGCCCAAGAAGGCACCCTCCT -GGCTAGCAACCTGGGCGGTCGGGGCGTAGTTGGCAATGGCGCAGTCGCCGACGGCCCAGA -CATTATCGGTACCGTTCACGACGAGGTATTCGTTGACAGCGAGACCGCGGCGGGACTCGG -CCTGGGCGGGGATCTGGGACATGAGGTCGCGGACGACGCTGCGGATGGCATTACCGGTGG -CCCAGACGAGAAGACCGTAGGGGATAGTTTCAAGCTCCTTGGATCCGTCGGGCTTGGTGA -CCTCGGCCTGGATGTACTTGTCGGTAACCTTCTTGACCATAGTCTTGGTGCGGATGGAGA -TCTCCTCCTCCTTGAAGGTGGATTCGGTGTATTCAATCAGTTGCTTCGAGAACATGGGCA -GAACATTGGGCAGAGCCTCGACGAGAGTGACGTGGAAGTTGTCCTTGATTTCGGGAATCC -ACTTCTTCAAGTCGTCGTTGAAGAAGTCCTGCAGTTCACCGGCGAATTCAACACCAGTGG -GGCCACCGCCCACCACAACCATGTGCAGCAGACGCTTGACCTCGTCCTCGGTCTGATCCT -TGAAACAGGCAGTCTCGATACAGTCCATAATACGTTTACGGATGTTCTGGGCGTCACCGA -CTTCCTTCAGGAAGCAGGAGTTTTCACGAACACCGGGAATACCTAAACGTCATAAAATTA -GCTCAGAATACCTCGTTTTAAATGCGAGGGGGGATCCCACCAAAAGTCGCGTTCTCGGCA -CCAACACCGATCACAAGCATGTCGAAGGGAACCTCGGTGTGCGAGATATCACCCTTAATT -TCCGAATCATCACTGATGTACACGATACGCTTCTCATAGTCGATCTTGGTAGCCTCAGCT -TCGTAGAATTGAACGCTGGCCTTCTTGTGGCGCAGGATATTGCGGATAGGCTCCATGATC -GAGCGGTGCTCGATCAAACCGGTGGTACATGAGGGCAGCAAGGGGGTGAAAAGGAAGTAG -TTACGGGGGGAGACGACGATAACGTTGTAGTTCTCAGTGTCAATCTTCTTCAGGAGGGAA -ACGGAGCCCCAGCCAGTTCCTGAACAAACATAGTCGCATTAGCGCCACTTCAGCCATAGG -GGATCGACGATCGCGACCGACACATACCGAGAATGACCAGAGTCTTCTTTGATGGGTCAG -GGGGAGACTGGTCCGGGGGTTGACGGAGCTCGAAGATGCTGTATGCGACAGTACCGGTCA -GACCGATGGCAGAGAGCCAGGTCAACCGCCAGGCCCAACGCAAAGGTCTGAAACGCTTCT -TAGACTTCGGGGCAGATGAGGGGGCAGCATCGGCGTAGCTACGACGAGCGATACGCGACA -CAGACTGGGAAACTTTAGTGGATCGGACTGGCTTTGCGCGAAGAGCCAGCTGTGATGTCG -ACATTGACCTGCGCGTCATCGACGACAGCTGCATGATGGACGGAGCCATCTTGCGAATTG -CTAGCGTGCTGCCAGCCATTGTGAGAGGACACACCAAAGGTAGCAAATGGAATGCAGGAA -TGAAATATCAAGGGCGGAGTAATTTAAGAAGGGGAAAAAAAAAAGATTTGATAAGCTTCA -TTGCGTTGCTGTTTTGGGCGAATCGGTGTAGCCGGACCCAAAGGGGGTAACTGTGTATCT -AATACTTGACCGATTGCGTGCTTTCCTATAGTAAACCAATGTACTGTCTACATAGTTTTT -TTCTTGTTTTGATTTTTTTCGATTACTAGAATATCTAAGCTGTACTCCGTCCCTCCTGAG -AGATATATATACATAGAATATAAGTGATTCGGCAAATGCAATAGTCCTAAGAGCTTGAAT -CACAAGCCAAAAGAAAAAAAAATGCGAAAAATAAAATTGGAGTGAGGGGACATTTCCCGG -TGAGGAGAGAATTAACCGGCAAATTGTTTTTTTGGGGTAAAGCCTTTCTTGATCTCCATA -CAGCTTTTCCCCAATTCCTTGACTCTTAACCGGTATTACCCGTTCCAGTCCTTCCAGCCC -TAATTTGGAGCTCGAGATATGCTAGATTCTCTGGTAAAATCTGGGGAAGTTCCGCTCTGA -CATCACTGGCCCGGTTTCTTCCATCTGACTTTGAAAATCCCGACATAGGAAAGCTCGGCC -AAAATCGGCCTGTCATTACCTTTTAATGCTCAGGTGCTCAATCGGCACTCGTGATCTTGC -TCGAGCATGATAGAGAATTGACCGGTTCATTACCGCTATTACTGGTTTCTCGTGAAGGGT -TGATATCAGCCCCGGGAGTAACAGGGTCAGCGTCTAGTGTCAAATTAGTATGACGCATTA -TCAAATACATACGCGCTAGAGAGTAGCTCAAATGCTTGGCGCTTGAGATGTAAGGATGAA -TAATAACAAGAAAAGTATTCGGATATGATATCCATATCATATGTTCCATGGTGCAAGCCA -GTATATCGGATGAAACATTGTGCCAGGAGCTTCCTTCTCATTTGCCAATTTAGTGTTTGC -GGCAGATCTCGGGATGACACATGCTAATTTATGCTTATTTTCCCGAAAGACAGACGAAGG -CTGTTGGTTTTGAGACATGTTGAAGGCTGCCGTATGACTTCACTTCCCAGAGTGTCACCA -GAACATGTATTACATATAAGCATAAGCCAGAAGATCAACCGAGACATCATGGTCCGATAT -GACTTGCGACAACAAGACAATCTCATATCCAACCACACGACGGGATATCGCCCGTGAAGT -CCGACGCTAGCACCAATATAAAATCAAACTCCAACCACTAAAATGCTGACTGGGTATGAG -AGCAAGAACGGGAGCACAATATATCATCCAACAGGGGTTCCCAAGCCCTCTTATACATCA -TGAAGTTAAAGCAATCCAACCCTAAAACGGAAAAAGAACCTATTATTCAAAGCACCCAAT -GATACACCAGAGCCTAGATAAGACATTTCTTTACATATACATACAAGTGACAACCCATAT -GGATTTCAAACACGACGGCGCTGATAAATGCCCACCTATCCGCCCATAGAGTATGCAGGT -AAGCATGCCTAGGAGCCTGTAACTGGTGAGCCTGTCTATTGAGTAGGCTGGAACCCTGCT -TGAAAAGTGTACAGGTGTGATATTATGCGGGTACGAGCACAATATCGCCTCTCTCTGAAT -TATTTGCCCCGCCTTTGCTTGTCTACGCGTCAAAACTCTGGTTTGTATCTGGGTTAATTG -GGTATATCGGGGTATCCGTGTTGGACGAGGGAATATACCCTCATTGGAGTGTTGGAAGTA -TGGGTATGCAGAATGCGGCTTGAATCGTTATCGAATAGAATATCTTGAGTTGTACTTCCT -TTTTGTCCCTATCGACCCTGTCTGGTTGGATTTGACATCGCTTATGCTTCGGGTTTTCAT -GGGGTTCGTTTGTCTGCGTTACAGGGGCCATATACATTGGCCACACTGCGAGATGAGAAT -ATGGCCGGTCGGTTTGTGAGATACGTCGGGCCCAGGAAATGGGAGGCTCCATTATGGGTA -TGGCGAGGCCGAATTATGATGTTTCAAATGCTGACGTGGCTTCGTGTCTGTATATATGGA -CAGAAGCTGGATAACAAGTCCAATGGTTTTCTATTGAGATTTCGCCTTTTCGGATTGCGC -CTTGCTTGCTTCCATCAACATTGACTGGTCATTTTGGTGGGTGCATTGGATGCATTGAAT -GCGCATGTTTGATGGGGCAATATTGTTGTCTCGAGTAGATGAGCAGAGAAGCTTGGTTAG -TTTAAAGAAACAAGACATCAAATTGGGATATGCATTGAGATCAGATCGAAACCAACAGGC -ATGGCCAGTTATTCCCGCTCACAGATAGGTGATCTCCAAATTCTGGCTTCAACGAGTTCA -ATGAGATGATAATATATATCAAATAGAAAGCGATGTGACCAATGCTCGAATCATCGAGAG -CGACAAGGCGGACTCAGACGCCATCAGGCCTTCGGACTTGGTAGAGGCCAAATCGTCCGC -GTAACAAAACCAGCCGAAGATGCCCGTAGCCATGAAGCCCTGATCTCTTGCATAATTCTT -CTCGAGTCGATCCGAAGGCCCATTTTGTGTTGTGGCGTTGTGATTCGAACAATCATGCGG -AAGAGAATATACAGGTCAAATTTGGATATCATAGTAATTGTCTGGAAGGAATCAGCCTCC -ACGTGTTGTTAACGGAGAGACATGCATATCCACAGACGAATACATCGTTTGGGCCAAGAA -TAAGACACCTCGCATGTTCGAAGTAAGGCTAGGTCCAAATGCAACTTGCATGAGAATCGT -CTGCTTGTACTATTTCGTCCTTGGCGTATGTGGTTAGAAACTTGGAATCGGAAACGAGGA -ATCAATCAAAGTATGCCATGGTGTGCAATCCATCCAGGCCAGTCGATTTTGATCCAGGCA -AGCGAAAAGAATGAGCAAAGCTGTGAGCAGCCTCGTCACCAGCAACCAGTAACCCCGCTT -AGGTCCAAGGAATAATCAAGACCAGTAAGCCCCCGGTCGGGTTCGGAAATAGGACGATTG -AATAAACCCGTCGCCCCTAACATTTCGACCAACAAAGAAACCTTGAAAGCTGATCAGATT -TGGCTTCAACCCGCGGTTGCCGAAGAGAAGAAGAGATGAAAGGAACACGAAGGGTGAGAG -GGCGGTTACGCCAAAAGCACAAACCCGCCACTGGCCTGTGTTCCCTGGTCATGGCCACGA -CCATGTCTCAATGGCGTCGGAGACCTTGAAAGGAAGACGTCGTCAGCCACCAACCTCGGG -AGGTCAAAGATTTAGTTTCAAGTAGATATGGAGACACAATGATGATAATGAGAACTCAAA -ATGCGATTGGGTCCGAATTCTTTTGAGTCTTTCACGGTTCTCGTCCATTCACCTCGAAAG -GTCAATGTGCAGATATTCGAACATGATTCAGCGATCTGTGAGATGTCGATGACTGTGACA -TAGGTGTGCCGTTCATCCATGGTGCTGCGCAGGCGAGGGTAGTCCGCTGATCCAAGTTTT -CCCAACTTCTGAAGGTTTCTTTTGCCGCGTGTCCGTACTTTTGTTCTTCTATCTTGAATT -CATTCTGGCCCTAGTATTATATTGCATATACAAATCCACATCTACAGTCTACAGTGGGAC -GTATTGAGTCGAGTACAGACTGCAGATTGGAAACAAGTCCAGCTCGCCTCGGGACTGTAC -CTGGCATTGCAACACGTCTTGGGCCGGGGGAAGATGAGCCATTTCCCATAGGACGGCTTA -CTGAGTTGAAACACTTTTGCTCATTATTTTGATTTGTCTCATTTCACTCGTGaaataacc -aaaaaaaaaaaaaaaaagtgaaaacaagaaGTGGCACCCGGATGTAGACGGGAATGTGAT -GTACGAAGGACACTGTGTACTATTCCACTCCGTAGTAAGAAAGGGAGTCAGATTATTTGG -ACTCTGTACGCGTTGACTCCACCATTAAAAGCAGAATAATTATTTTGGATGCGGCAGGTG -CGTTTCGGCTAGGATCGTCGAGTTGTGACCGCTCAGCATATATAATATTCCATTACCAAG -GCTCTAGACTCTCGAGAACAGTGCCCTTCGTGTGCCTGGTATTACTAAGCCCCGAAAACA -CTAATCTTAGACGTTCGAAGCGACTGAGACAAGGCTAATATATTCCCCCACACTTTGTAT -ATTGGCATGTATACTACACTGTATGAAGTCTGGAGTCTGGACTCGGACTGAGTGTCTAGC -AGCCAATGTGATCTCTGATACACGGAATAGCCAGATTTCATTTTCTACACCGTCAAAACC -GGGATCACTCATGAGCCACCGAGAGCCCGGGAGGGGCGAAAAGAGTATCGATTAGCCCCA -AGGCCCCATTGAGTCAAGCGGTAGGTTATAGATTCCGCTAAAGTCAAATCACGAGCGGAA -GCAAACAAGTCTTTGATGTACTCCGTACTTCGTACTCCGTACCAAAAGTCAAGATTCCAA -GACCCGGTCAGTCTCAGCGCCAGCGCCAGAAACCCGCGCTTTGCTTTGTGATGTGCTCGG -GGCGCTTTGCAGGATCCAGACAAATTCCGAGATAAGTCCTGGTCAAGCAAAAGTCAAAAG -GCTAAAAAAAAAACATAATGGCCGATCGTTGCCCCTCAACCGGCTCTGTCTTTTGAAAAA -CCTTTCGAGCATGTGGGTTGTATAGGCTTGTTCACCCCCTCTACAGAGTATGGCCTTTGG -ATTTTGGCCCCTGTGCGCACTTGGGGGGGGGGGCTGAAGCACAAAGCCTGGAATCGGAAA -TCAGCCTCAAAAGACATCAACTGATAGCGCGAGATGACCCTGTTCTGTCACACGGTCCTG -TGTGCGGGAGACCATTTGCATTTGCCCTTGACCAAGTGTGCACTTCTGGAAAGGTCAAGG -TCAGGTTGCTGCGGTCTGTCGGTGACTGCTCGGAAGAGATTAAAGAAAGGCAACCATCTA -CGGAGGACGGAGTAGGTGGCGTGCAGTTGGGTAAATTGGGTCTGATCATCGTGAGATTGC -AGATGGATATCCATGGATCTAGATGACAATCTGTCTCGCATCCACAAAACAGCTTTTCAT -GGTATGTACATCGTAATGGAGAATAGAACGTTGTACTCCGTGCATGGACAACCTTACAAT -ATACAGTATGACTCCGACGTCATAGATAACCCACCAGGTAATATTAACATTCACGTTCAA -CCGACATCAGAAGGACCGGATCTGCTATTGGGAGTTATTCAATTCCGCCATCTTTTACTC -CGGTTTCGGATTAACAAGACTTGAGAAATCCTTCTTCGACTACTTGAAGGGGCAATACCC -TTCAGGGGCAATACCCTTCACAAATGTTTGCAATCTCCTTGATATGGGGAGTGGATCGAA -TAATGACGCCAGGAAGAAAGGATGAATTGCAGCCTCATGCTATTGAAATTGAACACAGAC -TGTATAGCCTCGGGCGTAGACCGCAAATAATCCACAATAGATCATTGTAACCTATGTTTT -ACAAGGTTGCAAGAGAGGAAATTCCCGATAGCATCCATCAACCAGAATACTACATGTAAT -GTTATGTGATATTGAGGTAATTGTTTCTATACTATTTAATAATTGACCTCAGTACTCCGT -ACTTTGTAGGAACCCGCAAGGATGGGAAACATGCAAGGAGCCGTGCTGGGTAAGAGGCAC -AGCAAGGTGGTTGAACACAGTGTTGAGAATGTTTTTGCAAGGTGACTCTTTTTTTGGCTT -GCAACTGACGCATAAGATAGATTACCTGAGGGAAAACGCAATCAAATGTCACAATATACC -AAACAATTTAAATTTTCAAGAGGAAGTTGATGCCGGGCGCAGAGCGCATCGGAATCCAAG -CCCCGCCGTGTAATGATGAAGGAAGCGTCAGGGTTAACGTGTAATTAATAGAGCCACAAA -AAGCATGAACGAGACGGAGTAAGCCGGTTGATGTGTATGAGTTATACCATTATACAATAG -AAGCTATTCCGGAGTCGAAAGTGTACGAAATGATCAACTTCGGTTTTGCTAAGTTAAGAC -TCCCATTTATTAACCAGGGTTAGATCCCTAATCCCGTGGATTTCTCGGAGTACACTGCCC -GCAACTAAGTGGTGTATCGGAAACTTTGGATGTTCATCGCCAATCAAGAGGCAATATCGC -ATTGTGGTTCTGCGCAAAGTGCTGCGGAGATGTTCCCAAATGCCCCGTGACTGTAGGATT -GGTACGTGATGGATATACAGTCGAAGATGTGAGCTCGGATGATTTCCTTAATGAAATGGG -GTTCCGCGTGCATCAAGTCGACACACAGATACTGAACGATGCACCTTTGAATTTATGACT -CTGCGGATCCCGATCATAGCGGTCGCAGTCAACTGCAAGCGCCAATGGTTGAGGGTACTT -GTCTGAGAGCGATCATCGGTGGAATCCTAGCTTTCACACAGGGGGGGGGGTGATACTTTT -TTTTTTCCAGACTTGATCCTTCCCAAGGCCGACATTTTCATAAAAAAAAAAGCAATTAAA -TATTTCCCCTTCACACTATACCAGAAAAAAAAAAAGGCCGGGATTAGACTGACGCCCAGG -TGAGCTCTGCAGGTAGGACTGGAGTAATCTCACCCGCGAAAGGCGATGGGTAAAAGTTGT -GGAGCTGTCGGGAATTGTGCACAAGAAATGAACCGCAACCGGGATGGCAAGGTAAAAGAC -TCCATTTCAGGCTCCTGTTTTTTTTTTTTCTATTGTCGACCCACCAAAGAGAAGAGAAGC -GAATGTAAATCAAGGGGAAAGAAAGAGACATACTGAACCTTTGAAGAGCTTCTTTGTGTA -CTCCGTAAGTACTGTGACGAGTGAACATGTCCCAAAACAGTAAAAGACGGGGGTCACTAT -AATAGCTCACCAATCAGCAGTCCCTCTCTAGTCCACGGTAAAAAAAAACCTTTTAAAACC -GCGGCATAGATCGTCTAACAAGATGGAGCAGTATGACACAGTGGACTTTTTCCTCCCTTT -CAAGTCCACCTTCTCTGTCTGGGGGTTCTAGAAACGAAACATCTTCCGTGGCTCGGAAAC -CTAGTGGAGAGCGAAATTCCAATTTCTTTTTTAATTTCGCCTGACTCTTTTTTTTTTTTC -TGATATGAAGGTCCATTTGGTAACATGACACCTACTCAGCTTGACCGTGGTTTTGTCGCC -AAGATAGCACACTTGGATTGCTAGTCGAGTTTGGAGGTTACCCAAGTTGCCTGGTTTCGG -ACGTGGCGTGATAGACCCAAGTGTGTCTCGCTGTGAGAGTCCACCCGATGATCTTTCGGG -AAAGGTGAAGATTCGAGTTCAATACGGCTTTTGCATGCGTCTAGAGAGTATACGAAAATG -TATACGTTCCGTGTGTGCTTACTGTTAAAGAAACTATGGAACTATGGAGCACACTGATGT -ACGGAGTACGGAGTACTTCTTATGTTACCGGGATGTTCGCATAATAATTTATGTATAAGA -TAGGTTGCGAGGAGGGTGCACATCTTTTGAGGCCTGTTCGAAGGGATGACCGTTGGGGGG -GTGTGAATGGCCGATAGCTTCCAATTCCACCAGCCGATCTGGACCCGTTGCACCTTCGGC -TTTCTACACTGAATCACAAGTGCAACTGCCACCTTACGTGAGGCTGTTCCCACCTAGATG -GGTCGAACAGTGTGAGTGAAACAGCCAATTCTTGCTAACAGTCCTCTACTCGGCTTAACA -CAGGGGCGCTTGGACGGGACACAAATACCGACAGAGGTACGGAATACTAAAGTACAGTAT -GCGGTGACCTAGAGAATACCTTGGGGAAGTCTAGTGCATAAAAGCCGGACCAATCAAATG -CTAAACATCCCAGCCCTCCCCCGAGGCTCCTGAACTATCAAATTTCAAGGTAATTATGAT -GAGATTCCCCTGAATCACAAACAGGAAATTCCTCCGGAGCAGTGGACGCGGGATGAGACC -AAGACTTTGACCAAATTACTTATCTCCGAGTAATTGGTGATAGTCGGGGAGTACGGAAAT -ACATAGAAGAAAGCCTACCGGTACACGGGATAAATTAAATCCAAATTTGGCCTGAGACTT -CGAGTGGACCACTGGCTTTGATTGGTTCATTTCTTAGTAATTTTGCCTTCTTACATAAGA -ATGCACCATTCTTACCGAGACTCCCGATCTTCCCTCAACCAAGTAATTGATCTTCCATTC -TCTGATTGGTCTCTTATTGAtttcttattcccttttttttccctttattctctgcctttt -ttctttttctttttttctttttggttcattcctttgcctttCCAACAACCGACAACGGAG -TACGGAGTACCAGCCACTGTGCTCAAACCTCCCTTAGACGCTTCCCGGCAACCCCCTTCC -CtttctttttttttcttttttttttttCCTTTGGGGGCGTACCTACATGCTTGCGCACCC -TCACAGCCCTTCACCTCGCATAACACGGTTTGGTAATTTATTGACGCTAAACCCGACTTA -TTATTGGATCTGATATGGGTCTCAGGATTGAACTCCCTATTTCTCTTTTGATTTCAACTT -GCTTTGGGTTTTACTTTTCCTTTTTGATAGGGTTTTTTTTTAATTTAATTTCTTCCCTTT -TTTTTGCTTTCACTAGTTCTACGCTCTCTTAACTGCAAATGTGAGGCTGGGATCAAGTAA -ACATTCTGTGACCTACCCTTTTTAGTCGCATCCACTTGACCCCGTGGCTCTGACATCAAC -CGGTATTTCCAATATTGTGTTATATACACATATAAAATATTTTTTCCAAATCTAGATTGA -CCAAACCCGGAACCCAAACGAGCCCAGAAAGGAGGATCAACCTGGACCTGCGTCCGGGTC -AAACTCTGTGTATGGCCATCGCTTTCTCCGGATGGGGTCTGTGGACTTGGTTAAGAGATG -ATGCCTTGACAAtttattttatttgattttcctttttatttttttttCCCAGTGGAGTAG -CCGGATCTCTCTGGTTTATCGCTTTCCCTATATGGGCCTATCGTGGTTACTCTATATACA -TCATTCAAACTCCCTGTGAAAGGTCTATATTCACTTTTCCCTCACCTTTTTGAACGTCTA -CCCTTTCCTCCTTGATTGCATTCCAATTGGGCAGGACCCCCAACACTTACTGTCTATTGC -TTGGCTCTGTCTTCACCTTTTAATAGAGAACATGGGTCACTGAAACTCCTCGGTTCTCCC -CGCCGACCACCTTTATTTGCTTTGATAAAAAAAAAGAAATAAAACTCCATGTGTCGACCT -CTCTGTTCGTCAATCATACTTTCCCTATGTCTTCCAGTGTGGACTTCCCTTTCTCTTTTT -CGACCCTGGGAGTTCCAACCATCGACTTGACTGAGGATCACCTGTTGAAGGCCCTGGAGA -GCCAACCCTCGGGAACCTCTCGGACTGTTCCCATTCGGAACGGTCTACCTACCCCTCCTC -ACGATATGACCGGCATCACCTACAATGCCATGCCACCTATCGCATATGGGGGGAAACCGA -ACGGGGTGCCCTCTCATATCTACGGCCATGCCCGTCCCCAGTTCGATTCCGTTTCCTCGT -CCATGGCCATGAAATCGCAAAGCCACACCGCAGCCAAAGAACCCGCCTCGGAACCAACTA -CTCAGAAGAAGTCAACGGGTACAACTGGATCGCAACTCCGCATTCCGTCGTCCATCAACA -ACAGCAAGGGCAGTCTGGCCGAGTTTGCAGCTCAGGTGAGAATCCCCAGGTCTACAGATG -TGATCATGTGGCATGAGCTAACTTGTTTGTTGCATTTGTAGATGACCTGTCTATTCTGGT -TTGAAAAGACCTCGAAATTGCAAGCCATCGAAGACCGCCAACCTCAAGTTCCTTCCCTGG -TCCCCGAGGCCATACCCACTGTAGGATTTCAGAAATGGGTTGCCACGATTCTCTCCACGA -CCCAGGTCAGCCAGAATGTCATCCTACTGGCTCTCCTATTTATCTATCGCCTGAAGAAGT -TCAACTCTGGGGTCAAGGGCAAGAAAGGCAGCGAGTTCCGGTTAATGACGGTTGCGCTGA -TGCTCGGCAATAAATGTGAGTGTTCTTATCGGGCTCACCGCCCATTATTTGATTGGACAA -AGTGGCTGACTCATCTCGAGATCTCGACGATAACACCTACACCAATAAGACGTGGGCTGA -GGTATCAGGCATTGCGGTGCACGAAATTCACATAATGGAGGTCGAATTTTTGAGCAATAT -TCGCTACGATCTGTTCTCCTCTGAGGCTGAGTGGGACCAGTGGCACACCAAGTTGGGGCT -CTTTGGAGACTACTTCAATAAGGCCTCGATGCTATCTGTCGAGCCCGCCGCCATGCTCCG -TGTTTCGCCTCCCCGCGTGCAACCACAGTCGCCATCGTCCAAGTTGCCATCGCCTGGCAC -CGATATCTTCCGATCCCAGCCCAATTGGTATATGCCACACAACGCATTGCCATACTCGCT -GCCGCCACAACAGCCGCAACACACAGCTGGCGAGGCTGCGATGGGAGGCTCTCGGAAACG -AAGTCGCGATGAGAACGATGAGCTTCACCCGACCAAGCGGGCTGCACTTTCTTCTGCCAC -ATCTGCTCCTTCGATGACAGCCCCGGCCACTGCCATGAATGCGATGCCCACCCTGCCACC -CGTCTTGACCCCCACTTCTGCCCCGGTGGCACCCATGAACGTTTCTCGTCTCCCACCGCC -TCACCATCCGccttcccttccaacttcccttccaacttcctatcccGCGCCTCAGACCCT -CCCTCCTACAGCACCTTCCCACCTTCCAGCTATCATGCCTCCGATCTACAATCGGCCGGC -GAACTGGCAGCAGATGCCTCCTCTCTCTGTTCCCCCGATGCCATCTAGCATGTATAACAC -TCCTTCACTGCCAGAACTAGGCCGACACCACCAGAGCCCATTCGGCGTATCTTCTGCCAC -TGTCTCACCGGCTGGTTCAACCTATTCTGTCCACACCCCGCAGACACACCTCTCTCCTTC -GTTCTTCCTCGCCAACCGCAACTCGCCTTACCGGCCAGTGCGGTCCGTCAATACGTTGCT -GATTCCACCTCCGCCAAGCTCTCTACAGCAGCAGCGCAATGTTCCGTTTGACCATATGCA -CTACCAGCCACTTGGGAAGGGTAATACCTCTCGCAAGACCGGCCTGCTGCCTTACATCCA -GCCTGACGCGTGGAACCAAGCTCCTTACCTACACCCTATCTATCCACCCACCGGTTACTC -TGGTTGAAGCATTCTTTACGCCTGTTTCTACCACTCTCATCTCCTGAGTTTATGACCTCA -CCGCGCCACGACCCGTCTCACGAGAAAAAAAAAAACTATCTCATTCGTTGTACACCTTTT -TATTCCTTTCTCTTGCTTGTCTTCCTTTTCCGACTGCACAGACTTCCTCCATTTCACGAT -TGTATCTTGCATATTTTCGGCGCTTCATTTAGAGGGAAACCGGTTTTACGCCCCCCCCCC -CGCAGACGATATTCACCTCGGGGATGGCATCTGCGGCGTTGTTCTTGACTTTGATTTCGC -CCATGTCCGTCGAAAGTGCCCGAATGCACATCGGCAGTGTACTGCTCTCATCGCATTTTG -CATACATACGGCGTTTTACTTTAGCAGACTTAGCTTTATCTCTGCAGCATATCAATCTAG -ATTTACCAACTGGTAATTTTTTTCTCGCTTGAAACCTCGAAATGTTCTCGTATATTATTA -CCTAGTGTAGTACTGAATGGAGCTATGTATCAAATAACCAAGGGGTTGGCATCTTGAGTT -GAAGATCTACAGTCATATACCCAACTGTCTGAATCCACACCAATCAAGATACCCAAAAAT -GTATTAACGAAGAGCACATACCTGATGTATTAAAAAAAAAGCTACTATCCTTTGGAAAGA -AACTTGAAGAACGAGCCCTCGGTGCTCAGGTCGACCTCTGGGACTGTAGTTAGGGCCATC -TCGGAGTCGGATTAAACATAACCCGCCCCGACAGAGATCTCGAAACTCACCATGAAACCC -GCAAATCCAACATCAGCGAGTCCGTAACCACAATCACACACTATGTCTCCCCGGCTGATG -GGGCTGCCCACGCAGCTGGCACAATTTTCCGCCCGGTCATTCCAACAGGCCTCCCCACTC -GCCTTCCTCCTCCCACAATCCCAGCAAACCCGCAATGCTTCCATCCTAGCCTCCCTCTCC -GACAACCCGACTGCCTACAACAAGCGCATCCGTCGAGGCCGCGGTCCCGCTTCAGGAAAG -GGTAAAACCTCCGGTCGTGGTCACAAGGGTCAAAACCAGCACGGCAAAGTCCCAGCAGGT -TTCAATGGTGGCCAGACCAAGGATATCATCGTGCACGGCGAGCGCGGCGGTGTCAACATG -TAAATCAATCCCACTTGTCAAAAACAACCCAATGCCCAAAGCACATCTCTAATTAATCCT -CCTTCTCGGATCTAGCTTCTCCGTCGACATGGCAACCGTCAACCTCGACCGTCTCCAATA -CTGGATCGACCAAGGCCGCATCAACCCCAACCTCCCGATCACAATGAAAGAGCTCTACAA -GAGCGGGTGCATCAGCCGACCCATCGATGGCCTCAAGATTCTCGGTGATGGCGCCGAAAC -CCTGAAACAACCAATTCACGTCGTTGCCTCGCGCGTTTCCTCCTCCGCCATCACCGCTAT -CGAAGGCGCCGGCGGCTCAGTGACTACCCGCTACTACACCCGGCCGGCCATCCGCCGCAT -CCTGGACGGTGAGACGCACAGCTTCCTGTCTGCTGCGTGGGCGCGCGCTAGTGGCTCCGA -GCAGCTCTACAAACTCGCCAAGGTTCGCCCTACGGCGGGATGGATGAAGTGGAGTACTGT -GGTCAACACCCATCAAAAGAAGTTCCGGCTGCCGGATCCGACGAAGCGTAAAGACATTGA -GTATTATCGTGATCCTGCGCATCGGGGATACCTTTCGCACCTGCTTAAGCCGCTTGAAGG -ACCTAGTTTGTTCTTCCGCTCTCCCGAGGAGAGAAAGTCTACTGCTGGTGTTAAGAAGGA -GAAGGTCCTGCCCGAGAATAGGCTTTGGTAGATTTGAGAGGGCTTGGGGAGAACCTCATA -CTTGATTTGTTTGTTCGGCATTGTTTTTTTATGGCTGGTTTAGACTGGTCTGCTGTATAT -GTATAAATATGTTCTGTTGGATATGTTAGGCTTTGCAATTAGATGCCTCTTGGCCTTTTT -TTTGTCTTTTCTCTTCTCCCAAGAAATCTTTAATTCTGTAGAATGCTCCAGTAACATTTT -TGAGGTTTATGCGTTTGTTCAATACCTCCCTGGTTATACATTGAAAAGCCACTTGTTTGT -TTATGCTTTGTACTTGAAGGTTTCTACTGTCTGAAAAGCCCCGCAAGTCTTACGGCGTCA -AGGACGAAGGCCGCATAAGCAGCCAACTATTATATAGCAAATTGCAGCCCCGAGAATTTA -ATAGCTTAATCAATCTATTTTGTACCGATCGTACTAAGCACAGGCCAGTCACCTGGACAG -ACACGTCACCAAATTGAGTAGAGAAGACAAATCAAGCTCAGTAAAAACGGATCAAAGATC -TGCACATATAAAAGTACACCTGTAAAAGTAAATGACCAAGCTAGGTTGACCAAAAAGACC -GAAGTGACGAATGCTTATGCAACATCCGACCGATCACATGGCAATAAAAAAAATCAACGC -TTCGCCTGCTATAACAGTAAATCATAATCGTCCGTCTCATGATGCCCAAGCCTGTGccag -aaccaaacccaacctagaaacaaaaccaaaaagaaaaaaaaaaatcaagtatcaagaatG -CGCTGCGAGGGGGATATTGAGAATATTTTGGGAGAACACACACaaaagagaaaaaaaaaa -aaaaaTAGGCACACGCTGCCATTCTTGGGCCAAGGCAAATAAAAAGAAAGAAATAGATCA -GAATAAAGCAAAACGGAGGGGTATCATAGTGATTTCAAAGGGCCCTAAGAGCATCGGCGC -TTGTAAAACCTTCAGCGCCGAAAAAGAGAGCCATCACATCGTAACAAATTTGACGTTAAA -GCGTTGGAATAATCTCATTTGCCTTGAGACCCCAAGTGTAAGGGTAGGTAGGATATGTGT -TTGTTGATCACCGAATGGAAGAAAGGATGAGCAAAGCTCATAGTCTCCACACAGTATCGG -GTGAGGGCTCAGCTGACTCATGAACCTCTCGTAGCCAGGACGAATCTAGTACATCGCGAA -TAGCCCAGCGCTTGTTGGGGTCCATTTCAAGACACCCACGGATCAAGTCAAGGGCGTTCT -GGCGATCTTGGCGAAGGTGGCCATCCGTAGAGTCACCACATACGGCATTATGATCCCAAG -TGCCGCTGAGGATATTTGCTTGGATGCGAGGAGTGAAAGAGTCTTGGAAAGGCCGCGAGC -CGACTATGACGGTATAGACGATAACCCCAAAAGCCCACATGTCGACCGAGGGATGCACGA -CACCACCTACCGATTGAAGCAATTCGGGAGATGCGTATTCCAGACTACCCGCGACACTGG -TACTGGATCCTGCAGGGCCCATTTGCTGTGGCGGTGGACGATCAGCGGCATCCTCGTAGG -GGTCGGGAGAGTCGCCACCGTTGTCAATGGAAATCCATTCTGCCATTCCAAAATCACACA -GTACCAATGTCGAAGATGTCTTGCCATCAGCAGACTCAACAGGATCGAGCAAGCAGTTCT -CTAGCTTGATATCTCGGTGAACCACGCGGGCATCTTCGTGCAGGTAACGGATTGCACAGG -CTAATTGGTACGCATATTTCTTTGCTAATTGTGCATCTAGACCGGTCCGATTTTGGCGGA -CTAGGTCGAAGAGAGTGCCCCCAATTGCAAGTTTGGTGAAGCAAAACGTGGCGTAATCTG -TTTCATACACAGCATCGAGTGTCAATACATGCGGGTGGCTGAGGTACCGCCAGACTCGCA -CTTCGTGGTCGAACTCGGCTTGGACTTCGTCGTTTTCGTGCTCACTTCGCCCTGTAATCT -GCTTTTTCACAATCTTCACAGCAAGTCTTTTGGTTTCGCCGTGCTTTTCAGCTTTATAGG -CTTCTTTCACCGCACTGAACCCCCCAAACCCGATTTGCTTTCCGAGAACATAGTCGTCTC -CAACCATTTGACCTTCGTCATCAGGCTGGGGAGCAGGCTCTTCAACCGTGTTCCACATGG -AAAGAAACATGCTGGCGGGCGACTTGCCGCTCGAGTTAGTCGACCGCCGAGAACCATGCC -GAGTGGAGTTCCGAAGACTGCTGCTGCGATCGAGAGTAGAACTGCGCTTGCTCCCGAAAG -GGGTTGCGATCCCTCCGGACCAAGAGCTGCAGCGGTCTTCGGGAGAGGCAAACGATTGGG -TTGGCGATGGTACGCAGAATGGCGAGAGCAACGTGTGGCTAGAGTCATCGGGAGGGGTTC -GTACCTCGTCCGTAACCGGAGATACCGTCGACTGCGATGTCAGGGACGTCGGACGATCGA -GCTCTGAGCCCGCGGTCAAGCCACTCTCATATCTCGCAAAAGGCCGGGCTCGGGTTTCCC -CGGCCGGGAAACTCCGATCCAGAGATCGTTTCGGACTCCAGTTCGGAGACTCGTGGTTCG -GTTCGACATGATCATCATCTTGTGAAAGAGCATTCCGCAGTCCCGAGCTACGAGACTCAA -ATTTCTGCGGTGGGCGGTCCTTCACATCTCCCATCGACAGTGGCTCCTCAAGTGGAATTT -TGTTACCAGAGTCCACAGAGACCTTCGGGTTGAAAGAGATCGAGGGTCGGCGAGTGTGCA -GATAGCTGTCGAGAGGGGATCCCTGCACATCATCGGGCTTGTGTGGGAAGTTGTCGGTAT -CGTCTGCATGATGCTGCTCACCGTGGCGCGGGACGGATATAGGAGGGGTCGCTGAGTGTA -GATCACTTAACAAACTGCGGTCTTCATCAGTATCGTGGCGACGCGCATAATCATAAGCTT -GATTGGTGTGATCGCTCTCGATACCATCGCCCACTGTTGTTGCTTTCCTCCCAGCGTTTT -CATTCGCCCATTTTGGCTTGTCCCGCTCATGAAACTGCGTTTCAATCCTTAAGCCTGCCA -ATCCACTTGCCAATCCGGCCCTCTCGCTATGTGCCATGCCCCAACGAAGAGCACGAGGCC -GAAATTGTCGTATAGTCAGCCAGTGATAGGCTTGCAGTAGGGTTGTATAACCCTCTGCTT -GGCCTTTTGGTATGCTTCTCGATGGTTGGAGATGATGATGATTGTGGGGTGATAGTGTCA -AAAGTGGGGTACAGAACGTTTCGATATATTGGTGATTTCAATGAAGTCGTAATTGAGGAG -GAGGTGTTAAGAGTTGGACCAGAAATGAGAAAAAAATCCAGATGAGACACAGTGGGTACA -AACAAAGGTAGTAAGGGAGTGATTGGAAGTGATGTATTCGTCAAGTAGTTCAGTTCTGAG -AGGGGAGAAAAGACCGGGGAAACCCGGAGGATAACGATAATTGCAGGTCACCTGACTTTA -GGACAACCCTGGCCCTAATTTCCCTTCATGGCCGGGTATACAGCAGGAAATAGGGGCATC -GTGGGTATTTATTACTTTGATTGTTCCCCACAGTCTGATATACAGTCTTCATAAAGTGCC -AACGCAGACATTATCTATACTCAACTGTCTTGGGTAAATCTCCTAATATATCGATAATGG -CATAGTAGATATACAAAGACCAATATCTTGTCGAAGCTCCGACCAGTCGGCCATTATTAC -GTGACCACACATGGGCTCAGGGGTCAAGCCATAGTTTTAACCGCGCGCAAGGGGCGCTTG -TTTTGGAATACCTCCTTTTGGCTTTTGGAGATGTGAAAGGAGTTATCATTCAGACTTGAA -GTCGCCTCGGCAAACGCCATTTTGTTCCTTCTTTTCTCTTCTTTTTCTCCTTGTCGCTCT -GGCAGTAATTTCTTTCTCCTTGTGCTGACCTCCGTGCTGGACCGAAGTGCAGACCAGCCC -CCACCATTCTCATTCAAGGCACTGTCTCTCATTCAGACCAATTCATCAAGTGCTCGCGAC -CCCCAGTCGAATTTTTTTTTCTTTGCTGTCTATCTTTCGTATACTACTTGTCCACATGAA -ATTTGGAGCTTGCGCATTGCTTTTGTCCCGCGCTTGTCTCCTAACTTCCACATTTCTGGG -TCGCTAGTCACTCCTTGCGCTCGCGACTTGTCTCAATCCCATTGCTCTTGATACCACACT -CATTTACACTGCAACATGACTAGGCCTCAGATTATCCGAGCTGGTAGGTCAAATACTTCT -GCGCGGGGTCAATTGATTCGAAGTTGAAGGTGTTGACCGCCATCTCTAGATACCTTGGAC -CTTCAAGATCACGATGCCCCTTCCGCCAAAGATCACACCAAACAACCCGAACGTCCAGAG -CCTCTTGGGAGCGGCCGGCCTGCTCCCCATCAAGAACTTTCAATTCGTCATGCCGAACAA -GATTCGCAAGCGGAGATAAAGGGCGGTCCGGACAAAAATCACGAGTTCCGCGGTGACAAG -GAGGTATATCGAGAACCAGATGACGAGGAAGATGAGGATGAAAGTCACTACGGCCACGAA -GATGGCGATCTGGCAGACGGCGAAAGCGATGATATGATGGACGATGACATGGACAAAATC -TCCAGTTCGCCATCTATCGATGATGGTACGGATTCTCGGGGACTATAGGATCTAATTGGG -CATTAGCTGATTTTATACAGAGGACATCGACTTTGAGTTTGTATATGCGCTTCACAACTT -TGTCGCAACGGTTGATGGGCAAGCAAATGCCGCAAAGGGTGATACTATGGTTCTTCTCGA -CGACAGCAACAGCTACTGGTGGCTTGTGCGCGTCGTAAAGGATGGCAGTATTGGTTACCT -CCCTGCGGAACACATCGAAACACCAACAGAAAGGCTTGCCAGATTGAATAAACACAGAAA -CGTGGATCTGTCTGCGACCATGTTGGGCGATAATAACGAAAAATCGAAGAATCCTTTGAA -AAAAGCCATGCGCCGCCGGAATGCGAAGACTGTTAACTTCGGGGCTCCGACGTATATCGA -CGCCTCCGACGTCGATTACTCAACAGACGATGATTCCGAGCACGGCGACTTCTTCAACGA -CGACGAGACTCTGGATGGGGAAGAAGAAGATGTGCAAGAACAATCCCAAGATATTGTTGT -GGAACCTCTTAGGCCCAAAGCCCAGAAAGAACACGCCGCAGAGCATGCTGATGCCAACGG -TGGGGAAAGACAAGATCCACAGCGTTCAAGTTCTGATCGACGTCCGTCTAGTGAAGAGTT -CTTTGAGACCGAAGGTAAATGCACTCAACTCAACTGCTCTCGGATCGAATGTGGAACTCA -CTCTCACAAAGTAGAACCCACTGTCAGTAGATCCAGAAATGGCACTGTACGCAACACCGA -TTCCTTCTTCAAAGACGATACTGCCGAGCCCAAGAAGATTTCTCTCACGCCAAATCTTTT -GCGCGATATTCGCGATGAAAACAGCACAAGTACCTTGGCTGAGTGGAAAGAGGTATGTTC -CCCATAGCGGGCTTATCCAAAGAACAGTTGGCTGACGTTTCGGTGAAGCCTACTCGTGGA -AGCTTTGAATCCATCGAAAAGGGCTCTAACTCACAGGAAAAAACCAAAGAGAAAGAGGAG -AAGAAGCGAAAAGACAAGAAACCCGGGATGCTCAGCGGGTTATTCAAGCGTAAAAAGAGC -AAAGATGACGTTGACGACCCAGAGAGGTCTCCTGCAGAGTCGGCATTCCGCACATCACCC -CAACCCAAGACCTCGATGGAGTCTATCTCATCAAAGTCTCTGTCCCCCGAACAACAGCGG -CCCTCTAAGCCCCAGAAAATATTGAACAATAAGCTACACAAGCAGCCCCCAGGGATCATG -AAAGTTGATCCAGTTCTTGACCCTGCGATGGCCGAATTCACGAGGGGCAACGAGGCCATT -AATCAGGGTCAGGGTAACAGGAGCATTCGACAGGTGCCACTACAAGAAGAAAGAGAAAGC -AGCTACGAAGACTGTGAAATTGTGTTACAATCGCCTATTAGTCAATCGAGTGCTTCCAAG -GACCCCTTTGCTACTCCGCTCGAGTCCATGGACCCCTTGGAGTCGCCTGTTGATCGACCA -TCTAGTGAAGCCCAATGGAGAGGACCCAATGACCCAGGCCGAACAACGGCTGGACCGGTG -TCCGTTACATCTCCACCCCAGAAATTCGTTCCTACGCCGGGGGCTAGAGATGAGTCAGAG -TCTCCCGTCAATATTTCTCCCGTGGAGGCTTATTCTCCTATGAGCGCACGCGGCTCAACA -AATAATGCTTCTTCTCGAGAAGTGCGCTCTATCTCGCCACCGTCTCCGCCATCATCACCC -GGGCAGGACACTAATTACCCCAAGGGTAGCACGGCTGCTACTGCCTCACTAGACACCCTT -TCTGTGGGTACCCCGACGTGGAGCGATGCCAGTCTACGATCATACATGGACGAAGAAAAT -GACATTCGTGACTTGTTTATTATTGTTCATGACAAATCAAATGTACCTCCCGCTGGACCT -GATCACCCCATCACCGGCCGTCTGTTCAAGGAAGAAAGCAAGAGACTCAAGGAGATGAAT -AATCAGCTTGACGACATGCTTGTTAACTGGATGGCACAGCGTACTAACAGTTCGACCTCG -ACTCGTGGCATCCAGAGTCCACCTGTATAAAACAGGTGTGTATGAATCTCTTGGAACCAA -AACCATGTGTCGATAGATGGTCCGCCTTGTACGGCGATGAAGGCCGAGATGTTTTAAAAT -GTGCTGGGCGCATCCTTTCTGGCCGACCCGTATTCTTTCTTTTCACCTTAACTTTCTTTT -CTTTCCATGTTGTGTCTTGATTTCACAGTTTGGCGCGAATACTTGTTCTTCATTTTTCCA -TCGCCTCACATAGAGAGAGCGATGTGAGAGCGATGTGACCCTGAACGTTTAACGGCCAGC -TCTATTCACCATGCTTTCCATCTTTTTCTTCGATTTTCACCGGAATACCCCGACTCATCC -GTGTTCAGTTTTGTTTCTTTCACATCTTGTCATTCAAGGCCAAACTGGCGTATATCTCAA -TACAACATCAGTTGCCAAGTATATTCTCCTGAGAAAATATAGAAATATTCCAAAGTTAGA -AGGCGTACCCCTCCACTGTATGCAGGTAAGCAGATGATCAATATCTAATTGGAGAGAAGG -AGCATTCCGAAGATCCTGCGGAAGATCGAAAGTCGTGCGGGCAGGTCAGCCGCACCGTGT -ATACACAGAGCCCCCACAAGTCGTCTGTCAGACATACATAAACCCTTTTCCTGTTTCCCG -TTCTTGGTGCAACAACATTTTCCTCGGCGCATTCGTCGCATCACCATGGCAGATCCATAC -GTTTACAGCTCGCACTCAACGCCTACGCCGAGCGGGGTTGCCTACTATCCCCCAGAAGAT -CAGTCCCAGTACCTGGCCTATGGCCACCAACAGCCATATGGAAACCAAGAATATCATAAC -TCTGGACCGGACCCATATAACCATGTTCCAGACTCTTACCAGCCCATTCAAAGCCCAAAC -CAAGCCTACGCTCCTCATCAAAGCTCATACCACCTCGTGCCCGAGCCATATGGATCTACG -GAGAGAAGCCATACGCCTACAGGGCAGCCAGACTACCTTGGGCCAGTGACTCCAACAGGA -AATGAACATGTGCAAGATAGAATCCCCGGGAACGCAGGATACTAGTGAGTGGACAATACG -TACCCAGCTATGAAAACCGACACTGACAGTGAGCTCTAGCGATAATCATCCAGCCGGTCA -ATCAGGGTACACGCCCTCCGAAAGCCCCCATCGCCCCGCTGTGTATGTATCAGAGCCAGA -CAATTCAGAACGTAGTGGCATGGGTGAACAGCGTTCCGATACAGACACGGACACTGACCG -CGGTATGGACCGCGGGCTTGGAGGTTCGCTTGCTGGTGGCGTGGCGGGCTATTACCTCGG -CCATAAGAAAGAACATGGCTTGCTGGGCGCAATTGGTGGTGCTCTGCTTGGAAACTTACT -GGAGAACAAGGTGAAGGATTCCAAGAAGGATGACGATAGTGACAATGAGCATGGGCACGG -TCACAGTGatcatcatcaccgtcgccgtcgccgccatcaccaccaccatggccatcGGAG -CCATTCAGGGCACTCAGGGCACAGTAGTTATAGTGGTCATAGCGGGCACTCTCATGGTCG -GCACTCTCATAGTTCTCATCACAGCCACTCGAGGCACCGCGATGAGGAGGACTACTGAGT -GGAACTAGATTTCGGGTAGTGTTTTTTTGCCTAAGGTTTTCTTTGTCTTGTCTTTTTCTT -TTTTTGAACGTGCTTCAATTCTAGCCTTTTAGCATGGAATATGGGTTAATGTACTTTCTT -ATGCTCCAGACTTGGCATCGGCTGAGAGACAGTATTCTTGTCTAGATGTATTTCTTTTTG -TTCATTTCTTTTTCTGTTCCCAGAAGTCTCTGGTTGATGAGCACGCTGTATTCTCTCCCA -TGCAGACTCTGTTTCAACATGTGCGACATAGATCCAGGCCTTCCTGGTAGTTGATTATCT -GTGACATATACATGATGGTCATACGTAATGAGGAAATCCATCCTTAACGCTGATTTGCAA -ACGAACAGGTGATCAATCCATGTCCCAGTCCTCCAGGGCTCCTGGTCACGACCTCGTATG -CCATCTTGTGTCATCGGTCAGTTGGAGAAAGTAAGAGCTTCAATTCACGCCCACTTATGT -TAGAAAACTTATGTCCAGGAGATACTTCAAATTGACGCGATGACCTTGAGTACGACATAG -TTCTCCCATCTAGTCTCGTTCACCTCAGCCTCTTTTTCCATCCCTCTCAATGCTGGTTTT -GCTCTCTTCTCTTTCGTTTGGTGTTTTTTCTTTTAGTGAAAACAGCCATGGGTATGCCAG -AGAGCCTACGCCACTAATATTCATCCAATCCACCTGAGGCGCGAAGATAAGACTGGTTCC -CGGGCCTTTTTACGCGGATAGACCTTTTTTCTATTTGTTCCCTATTTGGCGCAGAGTTAC -TGAGTGTCCTCAATTGACGTAGTAAATCGGGTCGTTATGAAGGGCGGCGGGCAATCAATC -CTCAAGGCCAGTTGCGGCCAACCTGAGCTTTGGTTACATCGCAAGCAAGCATTTAGTTTG -CTATGCCTTCCATCATGCTGGGGAAGCAGAAGTATGGGCAGCCATAAGGAAAATATCCAC -TGATTCGATTCGCTCTCTTGGGGAGAGGTTTAAATGAGGAATAGATATGTACATAATGTT -TACTATGCTGGCTAAGCACTAGACATTCCTCCTACGTCATATCCACTGATTTTTTATAGA -CTCTAGTGTATATGGATTATTTTCTTATTTTGAACCTCCTCTTTCTTCAACTGCTGATCC -TCGTGTTCATTCTGGCCTGTGAATGTTCATATGATATGGGCTTGGGATGTGTCATTTTTA -CGAAGTCGTGAAGATTGAGATATTAACTCCGTGCATGAAACATTTGACTTGTTTATCTTG -ATAGCCGGGGCATGGCCTACATCATAAACATAATGTCGGGTGTTACCTAAATGCAGCACC -CTGCATTTCCAAGCTGGGAAGCTCGAAGCCCTGATACGCCCATTTATGCTTATCGCATGA -CTCATCCGATCTGCCTGCGATGAACTCGATAAGGCGATTCAGCTCTCGTGACACTTCTGA -ATGACTTGAGGGctgttctctcttcttgattctgttcacttactctgtactTATTTCCTT -TTCCAAGATTACTCTATTAGCTTAATAGACTTAGAGTACATATCCTCCCTCACCTTGTTT -ACATTGGATTCGCTTATCGCCCTTACCCCATACTTTGATGCCCCTACACCGGGAGACACC -ACCTGAGGCTCTACTCAGCCATATTTCCTGTCGAAACTGTTGAAGACCCCTCTATCAATA -TACCATGTCTCCCCCGTTATTACAATACTTGCTTTCTGGGTCCCTTCCTGATCACTGTGA -ACCGACCTCGCCCTTCCTGACCGCCGTCTCCTCCCATCTCCACATCTGCATCCCCACTCC -TCTCGCGGCGATCTCTTCCGTTCTCGGGACCCTCAGCATAGTCTCATGGCTCTTTGCACA -ATTACCCCAGATTTACAAGAACTTCCAAGTGCAATCGACGTCAGGCTTATCCATATTCTT -CTTGATAATCTGGTGTCTTGGTGATGCGAGCAACCTCGTTGGAGCATTATACACCCGGCA -AGCAGGGTGGCAGGTGGTGATTGCCAGTTATTACGTCTTCGTTGATATCACTCTGGTCTT -CCAATTCTTCTGGTACACCCATTACAAAGCTCGCCGAAATGATTCGTATAGCAACCTGTC -ACACTCGCATGGCTCGGCTCCTCGAGACATCATCCAAGGGGTGCCTCCTCCAGGCCACGA -ATCAAACCATCAGACCCCAGCTCCCGTAAACATGGACAAGAAGCGCTCCGAGGCCAAAGA -TGTGGGCGTACAGGCCGGGTCAGTACTCAACTCAGCCCATGGACAAACAGCTTCATACTC -AAACGAGAAATTGAGCTCCTCCCGTCGATCCGTGAGGATGAGCAGCAGCGTGCAGAGTCC -TCCATTCGCACTACCCCGGACGATGCTCGTGGCGTCACTCCTCTGTGCCGTACTCGCCAA -CGCTGCTCCCACCGACAAGCCTCATCCGCCCATTTCTGAGGTACCGCGGGAGGCCATCGT -TGAGATCATCGGCCGCGTCTTCTCCTGGATGTCAACAATTCTGTATCTCGGCTCGCGCCC -TCCACAGCTATACAAGAATTACAAGCGCAAAAGCACCGAAGGTCTCTCCCCATTGCTCTT -CATGGCTGCATTTTGTGGGAATCTGTTCTATTCCAGCTCGTTGCTCACCAACCCGAACGC -TTGGTCCGATTTTTCTCCATACGGCGGCGGCGGGTGGGCCGATAACCACGGCAACGACCG -CTTAGAGTGGATTGGACGTGCTACCCCGTTCTTCCTGGGTGCCTTTGGTGTCTTGGGATT -GGATGGGTTCATGGGTGTTCAGTTCCTCATGTATGGCGACGGTCCAGAGCACGAAGATGA -CGAGAGTTTCATCAGCGGTGATGGGGATGATCCAAAGCGCGGCCGTGGTCGCTGGAGACG -TGTTCGCGGCTGGATGCGTGGCTGGATCCCCTCTTCTTCGCCTCGGAGGGTATTAGGGGA -GGCTTCGGCCCCGCAGGAGGGACAGGCTCTTTTGGGCGCAGAGCGCGGCCGATACGGCAC -AGTCTGATCGTGGCTGTGCATATACAACTCCCTTCTTCGTTTGTTTCCTGCCCACGCCTC -TTGCGCAGGGTAGGTATGATATATATGATATGATGACTTCAAAGCTTTATTTTCCTCTCT -TAGCATCTATTTTCCTGCATTTTGTCATTTGTATGAATTGTGTGTGATTAGGTACATCAA -GTGTTCGCCTTCATTCGCTCTTTAACCGACTTGTGTTTTCTGGGCGTCTTTGGGACTGGA -TAGCATACATAGGGACTCAGTCTACACTCCGAGATCTCGCATTCCTTCTGGGTCTTTTCC -TTGTGCAAATCATTAGCTTGTATAACATCTGGCTTGCGGCTTGCGGCTTGCCCACGTTCT -TTCGGCTGCGGTACTCACTATAGCCGCGCCTGCTCAGACGACCTCGTCTTTGGCCCCGCT -CTTGGGTTGTGGCTCTGAGAACTTGACAAAGGATGTGACTTTTGAGACCATCAATGTCGA -TGACAGTTTGTGGACTGTTCTGCCGCCTAAGTTTAGTCCCGTTGTCGATGGCTACCTGTC -AGTGTTCATCTAACCACATATGCAGATCAAAATTCCTTGGCAAAGTAGAACTGACTCGAG -ATTTGTGATGGCCATTTGAGTGCTGGGGCTCTCCAATCGCAGACCTTTTTGATTCAGACA -GTCGTCGGCGTCGTTGCGGAAGAGAGTTATGCTGTCTCTTTTGATTACCGCCAGACAGCT -GGCTCGAAAGCTGGCTCGAAAGCTGATGTACAGCTTGTACCGTAGATCTGCTAATCGACG -GGGCAGATGTTGGCACTACAACTCTCACAGCTGGTGTCTGAAGGACCATACCCAGGCAAT -GGATGGCCGTTAGCTCAGGATTCGAGATGCTTATATTAATTGATTGCCCTCATGCAACGA -TCGGGGGCACAACAGTGGATGTGGGCAACGTCTCGTTCCAGAAGAGCTGTAGCCCACCTG -CCTCAAGCAGCGTCTCGGTCTCTATCAAGGCATCATCGGGTTCGTCTATTGCTACTAGCG -GTCTACTCGCTGCTTCTACCTCCTCATTTGGTCATCCCAGCTTCGAGGAGTTGATTTCTT -CATCGCACATCTCCTCAACCTCGCCAAGTAGCACCGCATGTGCTGTCACCTTGACAACCA -CTATTATCTAAGCCGCATACCCTCCCTCTAGCAATGCCACTACGTCAGATATGACAACCT -CTGCTACCTTTACTACCCACATTGCGACTACCGCAACCTATTTCTCGACTATCGATAACT -ACCCAGCCACCAACAAGACAGCTCATACTACAACTGAGGCCATCGTTGTCTTGAGTATCA -TCTTCTGCCCTGTTAGAGACACAACGACCGCTACTTCGGTCCTCAACAACATCGCCGTGG -CCACTACGATACCTGGCTCTGGCTTAGAGGTCTCCCCTGGTAACAATGCCGGCCACTCCT -CCAGCCTACCTGGCATCAAAGGCACACGCATCTGCTCAGCCACGGTCAAGTCTTCGCCCG -TCGCAGCCTCTTCTTCTACTGTCAGCAGTAGCACATCAAAAATATATCACCCGCGTTCAA -CGGTGTTTTGTCCTTCACTGTTTCCGGTGTCGTGTCTGCCCTTGGTGCCATTGCAGCTCT -CGGACTCTTCTTTTGAGTATATGTCTATTTCCTTGGATATCTTGGATACGGATTATAATG -GATTTGAGCTTTCGTTCTCCAAGATAGAGTACAACGTCATACTGAACATACATCCAGTAG -CGCTTCATATTTGCCCTCCCGTAGAAGCTCAATTCAGCTTTCGAAATCCCAAAAAAGACA -TGGAAAATCCATCGCTGTACTTGACATAGCCCATTCAATAGAAATCTTGCTTTTCTTATT -CCATACTCTCACTCTCTCACTTTGTCCTTTTTGAAACATGTTTGGAATTTTTGCAGACTA -ATGTATATGCGCCTGTCTAATTGGTTGTTTCTCAACATCTTTTCATTGGCGGGCTTGCCC -CAGCCCATGTAGAGGCAGTGTACGAAGCACTGAGTAGTGAAGCCTCCAACCTCTGAAAAT -TGTTTTCTCTGTCTCTCTGATCCAGGCATGACCTAGCCGCCTGACGTAACAGCGTAAAGG -CTCACCTGGGCACAATAGCATACCAATCATACTCGCAGACCGGTCACTTATATCTCAGCT -ACATATGATATCGGCATGAGGACAACCCAAAACAGGGATAGCGAGAAACAAGAAATCATA -GACCACGATCATGACTCATGCTATAGTTTTCCGGTGGTTGTTCAGTCTAATCTTGTATAT -CCGGAGTACTTGTGTCTTATTCGCCTCCCCCTCAAATAAAAAGGAGGGGTGCTCTGCCCG -AATTTTCCCTTACTTTACGGGGATTTAGGGAGGGGAACTCCAGTTTTGGAGTTCTCGTGG -TCTTCGAATCATATGGTGCAGCTAGGAAATACGGAGATGTTTCAGAGAAAAAACCTGCCG -AAAGTAGCCGGTCGGGCATCGTTGGTCCTATCCGGTTTTTTTTTTCCTTTCTTTTTTGTC -CTTTCTGTCTCATTGGATCTCTGTTATATAGAACCATCACATTGGTAGTGCAAGGCATGT -AGGCAGGTACTAGATGTATAGGTCGTGGTACCAAAGAATCGGAACATCGAAAATGGAGCA -TGGCAAGGCACGAGGTCCGAGGGACTTTGTTTTTTTTTCTAGAATTTATTATGCACGGGC -TTTGGGGGCCTTTTAAGTATTTTAAGTATTTCAAGATTAATTTCTGAACTCTTAGACAAA -TGAGTCCAATATATGCGGGACTTTCTTTTTTGGCTCGATGTATGCACCTTTTAAGCAGGT -AGGCACATATATGTTATATATAGAGGGGGCTGTCAGTACAGATAAATACCTATCAAGACA -GTGTCTTTGCCTCTAGTACTCCGTACTCCGTACGTGTATGGAGTACTGGCTCTGCAGCAT -GACGCTACTGCAGGGATCAACATCCAGGTCCATCGAGGGCGGAGTTGTCCACGGTGAGTC -AGAGCGGTCTCTACGTGGCCAGTTGAATGTCTGACATGTCAGAGCCAGAGAGAAAGGAGA -GAGTACAGAATACTACATACATTTTAAGATGTGCTTTCAGATGCGTACTGCCCTTGGTTG -TACGGGGTAATGGGCGCGTGTGATGATGTGAAGTAAATGACCCGATAGCTGTAAGGTTGA -CTATAATGCCTAAACTTTGCAGGCCATTACATATTTGGAGTACGGTACAGAAAGAGACCG -AGCACTGCAGCCTACGTAGGGGGGTAGACTGAGCAACGGAAGGAATGTCAGGTATTTTAG -AGGTATAAGTACGGACGGAGTATAGGGTCATGGAAATTTCAGTGTCTATGAATAATATCG -AAAAAAGCCAATACACGAACCAGTGAGAACCAGTGAGAAAACCAGTGACACGGGCGTTGC -ATCACCGGATTTGCCTTAAAGGTCTAGGGCCTTTGGTATGGATGTTACAATACCATTCTG -TGCACGTTCCAAACTGGGCTTGGATTTGGaaagaaaaaaaaaaaaaaagagaacaaGTAG -TGAAGAAATTAACCTGAGAATTGAATATAATAGTAATTACACATATCAGTATTCAATCAA -CAACATATGTATCTACCTAGATACTCCGTACAGAGTGAAATCATCTCAAACGTCTACACG -TAAGAACTTTTTAAATTTAGAATTTCCATATGTGGGCCTCCCTGTATACCAAATACATGT -AAAATTACCGGGGCAACCCAACGGCCTTTTCCCACACAACGCCCCCTATACACTTTTCTT -CTCTTCATCCTTCTTCTCAACCTCTTTTTTTTCTCATCAATCTCCTGTGTCAACGAGCTT -TTCACTAAGTGTCGTTGATCACCTCATTCTGTGAGGTTTGCGTCTAAGTGTGGGAAAAAA -ATTTTTTTCTTCCACTCTTCGTTCTCCGAGCGCGTTCAGTTCATCCTGCTACGTAGGCTC -TTCCACTTCGCTTCGACCTGGTTATTCGCCTCTCAAGCACTTTGAATTGACTGGACTGTG -ATCATATCACCTTCCCTCCTAGGCGTTATTTCGACACTCTAAGCGAGGACTGTGCCTTCA -CCGTTCATCACGTTCTTTCCAACTCTCAGTCCCCTGTCATTTGTCCCTTGGATCATTGAA -CGAACCTCGATGTCATTCAACTAGAGACTGCCCCTAGGAATACGACAGAGCGATCCGCTC -GCCGAGCACCGCTCTTCTCTCCCTCCCCCCCCCCAGAATCAGGATCTGGCGGAGTAGACG -GTTCGTATCCTTGTTTCTTCAAGTCTTGTCTCGGTCCACCTGGCTTTTGCCTACCCCGGA -GGCCTTGTATACTAATTACCCCCTTTGCCTGGCAACAGAATACGTCTAGCTCCCCGTCCT -GAACTGGCAACCTCCCCTCCCCTCAAGATGCCGTCCTTCTTCGTCCCAGGCCACCATGGC -CTGCCTACGCCTCCCCATGTCAACGGTGGTCGTATGGAAGACCCATCATTCTACCCGGTT -GGTCATGCAGGATTTCCTCCTCGCTACCACCAAAGCGGCAATGAGTTCATCGAACAGTAC -TCGCAGTCTCTGTGCTACGCCAAACCTACTTCGATGAATCACCACCAGTCCGCCCACCCG -ATGAGCACTGCTCGCGATCATCACATGATGAACCAGCAGCCCATGTTCAACCCCATGGTT -GGCACTGGATTGCCCTCAATCCGAAGCAACGTCCAACTTCCGCCCATGGACGCTACGATT -CCGCCTCAATATCGCCGACAGGAAGCCCCAATGCAACAGCAACCCGAACAACCCCGCAAG -GAGGAGAAAGCTACTGGAGGTGTCGCAGCTCACCTGGATTATGAGATGGATCGCATGTCC -GACTTTGTGGCAGAGATGGCTCAGGGAATGTATGATTTGTTCGAGACCAACATCACCATT -GCAGATATTGACCTCGTGCGAAGCATCTACCCAGGATCTTCGGTTCCCCCCCAGTTTCGG -AAATACGTCTTTCAGATTTTGTCCTCGACACGCCTCCCGAGTTCGACCATTCTGCTCGGT -CTCTTCTATCTCGCCAGCCGCGTGCGCCTATTGTCAGCCCAACGCACCTTCGCCAAGACA -GACAGTAGCCAGGTCTATCGCATGTTGACTGTGGCTCTTTTGTTGGGCAGCAAGTTCCTT -GATGACAACACCTTCCAAAACAAATCCTGGGCCGAGGTTAGCAACATCCCGGTTGCTGAG -TTGAACCACATGGAATTGGAATGGCTCTTTGCCTTCGACTGGAAGATTCACGACCGCATT -TACGACAAGAAGGATGGTTTCGCCTCGTGGCGCGCGCACTGGGATACATGGCGGGTTAAG -GCAACAGCTCGGGCCCAGGAGACTCGCCAGACCCTGGCGCCTCTCGAGACCAACGTGGTC -CGTGGCCAGGGTGTCACCAAGCCCCTTATGTCCCCCGAAGGCCCCATCCCGCCGCAGTAT -CAGCGCAGCTCACAAATTGAGAACTCCTGGCTCAACCCCACCGCGTCCGAATATTCGCCT -CCCTCTGCCCTTTCTAGCGGCCCCACCACCCCCGATTATTACTCGGTTGGCCCCTGGGGA -TACGCTAACCCGCCTCCGCCTCCGCCCTACTCGCGAGGTTGGAACGCACCCTCACAGTAT -ATGGCTCACCCCGCACCTCGATCCCAGCCCCCATCCTATCACCACACTCCGGCGTACGGC -TTGCCATTCGCTCAGAGTCTCTGGACGGGTCATGGTTCATCTTGTGGCTGCACCTACTGT -GCCAAGCACCTCGAGCACTACATGTGCAACAATGCATTCCCCTCCATGCAGCAGCCGATG -ATCGCAGCCTAGTTGACGATACCTGGCATGTTCTGTCCGCCACCCTGGATGGACCAGACG -GTGTCATCGGTACAAGACTATTGAGTCTTGCTGCTTGTTTTTAAATTTTTTCTTACTGCA -ACTCTAGACTTCTTGCAACGTTTGTTTGGGCGACTCGCATGGGATTATTTCGATACCGCT -TCCTCTTCTGCTTGAATTGCTTCCAACCTTGGCACCTTTCGACCTTTTGTTCCATCTAGC -TTTCACCTGCTTCTGAGCAATTGGCCTACCGCGTCTCCAAAAAAAAAGTACTTTTGTTCT -GCGTCCGCTGGTTCTCGTTTGGCAAAGCGaaaaaacaaaaaaaaggaaaaaaaagaggaa -aaaCACTCCCACAACGGACATTGGCATGCTCCCGGCAAGCCCACGGTGCAAGATCTCGAC -TCAGCCGAATATACTGGCATCACTTTTGATTTCATCAAGACTTACGGTCTCACCGTCGGG -CCTTACGTCTTTCCACATATGCCGGGGGCGCAGTCACGACTATCACGAATCCCCAAAATC -TGACTTCTTTGTTTATTACCCACGACTTCGGTTTATGTCCTTGGTCCTTTTGTTTTATGC -TCGGCATTTGTGTCTCGGTGATTACCCATAGAGTGCTTTCTTTCTTCTTTTTACTTTCAT -TTGATCTCGGTACATTATTCCTATTCGCCACGCTACATGCAGTGCCTGTTGTTAATCTTT -TTTTTTTTCACTTCTCATCTCATTTTTCAATTGTCCATTCACTTTTTTTTTTTTGTCCCC -TCTCGATTCCTCTTCGACTTGTTCTTATCTTTCTCTGACGAGCCTTCTCCAATCAATAAG -ATCAACAATTCAAGGATTCTTGACCAACCCATCTAGGTTGGATGTATTCAATCACTCCCA -CATTTCTCACTTGTGGCAGATGGCAAACAATTCTTGCGTCCCATTCTCCCCGTGAAATCT -TCGCATGGGTATCGGTGCCTTCAATGCATCTTCTATCTGATCTCATTCTGAGAGTCCTGT -CAGTCCTCGCGATTGACATCTATGTAGTCTATCTGATTTGTCTTCTCTTTTGGCTTGCTT -CCATATATGTGCCCATTGCCATCCGGCGCTGCCGCCAAGCCGGGGCCGTCGCATTTTCTG -TCTCCATTGGTCGTGCGCTGGCCTGGCAGTGAGTCCGTGGCTGTGGCTGTTGGCAAGCAA -ATTTATTGAAGATATGTTATCTAATTAGCGACAAACTATTGTCTTTGTCCGGGCTACAAA -CCTTTGGTGTCTCTCTCGTAAGGCTTGTGTCAATATCCCCGTCGAAAGTTCTGTTCAAGT -AGTGAGCCTTCGCTTTCCGTCCGCTTGTAGACGCTATATTGACCTCGGGGGCTGTGTGAA -AGTCCGGCTTTGAGTGATGTCTAATAGTGAACCGAGGGCGAGATCTCATAATCTTTGTGA -TGGAATTGAGTCTAGCCTAACACTAGAAAGAGCTACCAGGCTGAAAGGAAATATAGGTAA -AAATGCAGTTGTACTAGTGGGTAATCAAAATTCATCCAGACAGTCGTTGCGGGTCGTAAG -GGTAGATATAGAGATGGAAACAAAAACTTGTAAAGGAGGCAGACGTATAAAGTAGTCTAG -GCATCAGCAAAAAACAACCGGAACAAAATCAGTAGATGCTCCCTTGAATGCTCTGATCAA -GCTTTTGACGCTCCTCTAAGGATAATTCCTCGATGTGGAGATGAGATGGGGAATCCGTTG -ATGAACTTTCTGATCTTTGCACCTGTGATTCTGAGTGGTTGGACAACTCGTCTCCATCGC -CTAGGATGTTGGCCATGGCCACTGGGTGCTCAGATTGGTCGGTGGAAGAAGAGACgtggt -gatgatgatagtggtgatggtggtggtgTAGGCGAACTTTCTTCGATTTCTCAGTGGGAT -TCCCGCTCATCTCATCATCCCCATCATCTTCGAGCTCCTGAGAGGGGGGTGGCCGTTGGG -TTTTCTCGCTGGACGGGAATTCAAAGTGCTGCTGGTTCGCAAGAGACAAAGTGCCGGTGG -CGAGGTTGGAGGAGGCTGATGCAGCGTGACGGTGGTGATCGTGATAATCGCCTTCGTCGT -CGGATGGGGCAAGGCTATTTGCTCCACTCATGGAGCCTCCGCCGTCACGGCCGCCAAGGT -GGAGGAGAGCGTATTTTAGGTGCGCCGATGCTGAAGATGTGGCTGTTGGTTGGTGGGGGA -GATGGTCTTCGCTATTTCGGTTCAATCCGTGGAAGTTCGGCCGTTTTGGATGCAATGGAT -TCACATTTGGCCGGGCTTCTGAGCTAGAATCGTGTTGCTTCGTGGTGTTTCCGTTGTCAT -CGTCGGAAAGTGTGTGCTGTGAGTTGGAGACATGTCGAGCGTCGAATTTGCGGCTGGTAT -CATTCTGTACCATACTATCTTCTGAGGATGATATCACCTCGTCGATGGATATAGATTTTT -GAAGTTTGGAGCCAAAACTACCTGCATGGTCATCCGCATGCTTTGCTTGGGCATAAGCGT -GTTCGTGACGCATGGAATTCAGTAGATTTGCTCGCAACGATTGTTTTCCATCTACAGTCA -TGCCGTGAGTGCAGCCGTCGGGACAGCCACCCCACTTGCGCCGAATTGGGATATGGTCTG -TGAGGCCGACATGACCAGCAGAAGGAATGGGCGCAGGGACCTGCTCTTTGGATGCATTTT -CACGATCTCGATCTTTTCGAAGATCTGACCAGGTACGAAGCTTGTTCTGTAGTGTGTATT -TTCCATTCTCTGGGTTGTGGGTCAAGATCAGGAACTCGCAATGATTGATGTTGCGAAGGT -CCTCAAAGTATTCCACGCTCCAGTGATACCATTTCATGAGGAATACTCTTGCCATGAGGC -CGTGCGTAACAAGAACACAGACATTTGCAAAGTCGTCCTCGCCGAACTGACGCCAGAGGG -ACTCATTAAACCCACTCACTCGGTCATAGGCATCTGCTGCAGATTCGCCATTGGGAATCC -GGTAGAAGAAATGACCATAGTCTGCCCGCTCCATCCACATGCGCTCCATCTCTGTAGAAC -ACGGCTGGAAGTTACCAAAATCTTGTTCCCGCAGCCTGGGTTCTTCATAGACTTTGATTG -TGTGTCTTGGGAAAGGTGAAGGAGAGGGTGTATCGGAGGTTAGAGATTCAATGATTCCCT -CGGTAGTCTCTCTTGTCCGCCGATACGGGGAGGTAAAGAAGTGAAGTTTATCATCAGGTT -GGAGGAGGTCACGTAGTCGACGGCCTGCCTCTTGGGCTTGTCGATATCCTTCAGCAGTGA -GCTTCACTCGGTGGTCTGGAATAGTTTGATGGATATCGCGGTTTTTGTTGCCTTCTGACT -GTGCATGGCGCACTAGAATGATCATCTATAAGTAGGAAAAGGGTCAGTCTAAGTCAACAC -ATCCCAATATCTCAGAAATCGACTGACCCGTGGCTTCCCCATGGTGGTAGGGTCATGGGG -AATGTAATGGGTTGCGAATGCGGGGGAGGGGCTAGTATGATGATTCTATTTGCAGCAAGA -AGGATGAGGTGAAGACCAAAAAAGAAGAATAAATGAAAGTAGCCGTGATGATCGAAATAA -TCAATCTATCTAAAGCGGAACGGTTTGGAAGAGAGAGGTCAAAGTAAAGAAAAAAGTTCA -TTCCAGTCGGAGATACAACTGACCCGCACGGGCCAACGGGGTGATCCATTCAGCGCTCCT -TTACGCAACTTCAGAATAGCATCAACTCGCCATGGCCGATCTGCTGGAGCTGCTCGCTCC -ACATCTAGATCCAACTCAATCGCATGATGTAACTACCGACAAATATCTCACTCGCCTCTC -AACCTTGTCGCTCGAAGCTCTCCAGACCACAGAACCCCCATCGCTTGCCCAATCCTCCCA -CTCAACCCTTCTCTCCCTCCAAGCCTTATCGAACCGTTCGCATCGGGCATTTGTCACCTC -TGCCGATAATTTGTCCACACTATGTACATCTGTTCCGCAGTTGACCCGCGAGGCCCAACA -GCTACGCGATGCTATCCCTAAACTCGACGAGGAGGCCGTTGGGTTCTCCACCAAATACAG -TAAAATCACAGACAACGCCGCACTAGAACGACGGAAAAAAGCGATGCAGCTCTCTCGCAA -TGTCGACCGATTATCAGATATCTTGGAACTACCAAGTCTGCTGTCCAAAGCCGTCTCCGC -GGCCTCGGTCAATTCTGGTACCGGTGTCTCCTCCACGACGTACTCCTCTGCACTCGACCT -GCATGCACATATCAAGCGATTACAAACATTATACCCCGAATCACCATTGGTCCAAGATGT -TGCGTCCCAGGCGGAGGATGCTATGAAGGACATGACAAGTAATTTGATCACGGGGCTCCG -AGCCCAGAACCTGCGGCTGGCAGCTGGTATCCGAACTGTGGGTTGGCTGCGGCGAGTGGC -CCCTGATTTAGAGATGCTCCAGAGTGATGGGGCAGTTGGGACAGGAGAAGGCGCTCTAGG -GGCCATCTTCCTGGTCTGCAGACTAGCGCATTTGGTATCGACCCTTGAAGCGCTGGATCC -CCTCCGCGAGCTCGCAGACCAAGAAACCCAACGGAGACTCGGCGGCAAGAACAAGTCAGA -AACGACCGCGTGGTCCGGCGGTCAGCAGACTGACCGCTACTTAAAAAGATACATTGAGAT -CTTCCGTGAGCAGAGCTTTGCAATTGTCTCTCTATACAAGAATATCTTTAATTCCGAGCA -ATCCGAATCCGAATTTGCCATCTCAGGATTACGAGGTGCAGACGCCCGCTCCAAAGCCAA -GTCAAAGCCAGCACACTCAGACGATCCCTTGCAACGCCTACCTCCGGCTTTAGCTACTTT -CCCAATGCATCTTGTGCAGTTGCTCACTGATACAATGCGCTCCTACTTACCCAACGTGCG -AGATCGAAGTTCAAGGGAAAGCCTGCTCACCCAGCTTCTGTACTGTGCCGCAAGTCTTGG -TCGTCTTGGCGGTGACTTTGGCATGATTTTGACCGAGCTGAGCGGGGAGGAAGATACCGA -AGATATGACCTACGAATGGGAGGAGGTCATGCGCAAACATCGAGCCTTGGCAGGGCGTCT -GGAGCAATTGACCAGTCGGGTGCCAGTGAATTAACATCCATATACCCAAGTAAAATGTCA -TTTCGAGATGACGGTGGCATCTAGTGGAGTCTGTGTATAGTAGAGCTCTCGGCGGACTAT -TTAATTGATACTACCCACACACAAATACATCCAGGCGTTGTATAATTCATTTATATCAAG -GTTCCAGGTATGCTAGCATTACTTTGTCGTCAATATGTGGAGATGAAACCCATCAAAACA -CTTGACAATGAACGACAATGAACTGCAGCAAAAGCATTGTATGCCAGAGGCCCAGAGCAC -ACAGCAGTCGGTGGGTTTCCGTGTTTTGATAGCTGAAATTATCGCGTTATACCAGACTGT -ATGCCTATGGATGCTTCAAGGCGGTGATAAAGGAGGTGATTGAGTGGTAAGCAACATTGG -AATGAAAGATTTATATGTCCCGAGGTTGGGTGGCATAGTTGCCCATGGGCCATGGGCCAT -ACTGAGCCACTTTGTGTGGGTTCCAGTGTGAGTTCGGGCAATACTTGCACGAAATTGGAG -CGATACTGGAGCAACAAGGGGTCAGTAATCAGGTAAGAGGTTGCACCGTGGAAGATCACA -AAATGCGTACCTATCTAGAGGGTGACATAGAAATTAGCCTTGCCCACAATAGTTAGCCCC -TGGAGCTTCATTTGCCGAGTGACTCACAAAAATCTCACTCTCGAGTAGTGTCACTGAAAT -GCAAGCCTATATTATGTGAACACCTGGAATAGGTGTATATATTTCAATCGAGACTCTCAT -AACTGATTTGAGGTTTGTTCGTCATGGTCCACTATAACCCGCTCACCAAAGAGCCTTATC -TCCAACTCCCAGCACCATGTGCAAACATCATCATCACACCACACCGTGAACACCAGATTG -AGGAGACATCTACTGTGATGACCGAGCTTCTCAATGATTCACGCATATATTCCTGGCTAC -AGGGGCCACCCTACCCGTTTCTGCATGAGCACGGCGTGGACTGGGTCAAGATGAAATTAG -CAGAGTACAAAGATATCTTGTCTACTCTGCAAAAGGAGTTCGAAGACTCAGAAAGCCAAT -TGCAGGGTCATAGTCCAAACGGCCGAAAGCAAGCAGAGTTCTTCGATAAATGCCCTTTCG -TTTGCATACGTGAAGTAACAAAAAGAGATCCCACGACAGGCACGCCGCTTCAAGATGTCA -TGATCGGAGACTTCACACTAGTGCGATATTCTTTCTATGAGTTCCGACCCGATAGCTGGG -AGCTTGCGATGGTGCAGAAGCATAATAACGATTTGCCTGCGGGGCACGAGGATATCATCT -GGGGGTTAGGATGTATGGACACCCCGGTGGGACTCGAGGGTAATTGCTGACAATATTGAT -AGACTACCTTTCTCCTGCTCAACACAGCCGGGGACTTATGAGTGCTGCTGTTCAAACTAT -CATTCAGGACTGGGCTATCCCTAGGATGAATCTCCATCGTCTTAAAGGTAGCTTTTATGT -CGGGAACATAGGGAGCATGAGAGTCTTTGAGAAAAACAATTTTGTGGAGGCTGGTGTATT -CAAGGACTGGGCTCCAGCAAGCCCAAACAAGGGACAAGGGAAGAAGTCCATTGCTGTGAT -GGAGTGGAAGGGACTTTTATAGGTACTACCAAGTAACAGTTGCTCAAAAAAGGACGTTCA -AGACTCACCCGAGCAGTCATCCCATGCTGAAACTGCTAGATCATACGGAGTATTCCCCAC -CACTTAGTATGTGCCAATGGCAACCTCACAGGGTAGAACAGAAAAACCGAGCATTTGCAG -TCTCAATCAATAGTGACATGATCTTTGCCGATCTTAATATCACGCAGGCATAAACTTAAA -AGGAGAAAATAAGTGGTGCTCCAGCGGGTCTTGAGAATACCTCAAGGACGAACACAGGAC -TTTAGTACAACATAATCCCTGTGACAAAATGAGCATCGCAGTCCTGCATATGTCGTAGGC -AAACTGGCATATCGAGTTGACGCACTGAGTGACCCCCTTTCTTATATTGCTCAGCGTTGA -GCTGCTCAAGACAAATGACTAAACTCGGCAAAACTGAATCTATTCCCTTACTTCGTACTC -CAGATATTGTACGAAGTATTCCTTGGAGAATTGCAGATTTGTCACACGACTAGATCCACT -CTGTATTTATTGGCCAGGTTCTATATACTTAATCTCAGTCTTCAGCTGAAGGAAATTGTT -TACCAAGTGGGTCTCGAATCCCATGAAAAGATGTGTTGGATGCATCTACAAATTACGGAG -TAGATCGCAAACTACTCCGTATTATTGATTGCTTTATACTACCCCGTCCGCCAACCCCCC -TATTTGCTGAAAATATACAAGCAAAATAAAGCGAACCAATGGTTTCTGTGCCTGATACGA -GCTCAGCTAACACAACTGGCTTCTGCGCCCGATGCTTTATGTAGTGAGCAGAATTTTATC -GAAATTCGCTCTCATATAAGCAGGGGTTGGATTGAAATTCGCTTACTCCTCACGGAGTGC -TCCGTGAAGTATCCAGAGCTCAGATTGTGGATCCGACGTTTTATCCCATACCATTTACTT -CCATCTTGAGCACTGGACATATTGTAGGCCATTCTATTCAACCTTCTATACTCCGTACAC -AGTACTGCACTGGGCGGATGATACGCCGGACATTTCAGAAATATCAGCCTACAAATTTAG -TAATGTTAGGGTAGAGTGATCAGGCGAGGGATACTGGAAGCCGATAGGAATGAAGCCCAA -AGAGGCGCGTGGCTAGTCTCATGGAGGGAACTCACAGGGCAGGCCCACCTAGGTACATAC -TTAGTATATCTTACGTGCATGATTTGAATAGTGCTGTAGATCGCGGGATTGAAAGGTCAA -TACTCCTTGGCACTTTCAAATATATTAATATATAAATGAGTCTGTTTCTCTCTTCAGTCT -ATACTCCGGCATAGAGCTTACCGGTGTACACTTCCTATTCACCATGCATCTATCAACTAT -AAGTCTGCTTCTTTCGGTTGCTATAGCAGATGCCACATCCTTCGTCACTCCTAATAGACA -TCGCACAAACAAGGTCAGTAGTCCTAATACGGCTTTATTTTATATCCCAAGTGATGTAAG -TGTCAATGCTCATACTTCCTAGGAGCCGGACTACCCTAAAAGATGCTACCCTAATCCCTG -TAAAGGCGTGACCTACGTGAACTCTACTGCCATTTGCGGTGACCCACGACTTGGTCCGAA -GGAGCTTCCTGGATTCTTCCCTCTCTCCAACGAACTCGAGACATACGCTCGCTTTGGCGA -GCTCTGCCCGATCGAGTTCCTGGAGAAATGGACTTTGAATGCTTCCGACCCAAAGGGTTA -CTGGATCTATCCAGACAGCGACGGGTTCGCCGTCACATCCGAGAACGAGTCTATTCTAGG -AAATTTCACACTGCGCGTGGGTCAGAAGCTTGATAGATTCGGGTCTGAATATGGGAAGTT -CCTGGCACCGCTTGGAGCTCCATATATCGAACGCTCACTGCCACCATCCAATCTTTTCGC -GCCGCCAAAAAGCAGCTTCCCCTACAATTATCATGTCTACGAAGTAACCAAGGAATTTGA -TATTCTCCTGGGACCCATTGCACCTTGGTTTGAACAGCCTGGCTTTGGATCGCAGCTTTT -GGCTCAGTCCAGTGTGCTGGATCTTTTGAATGGGGGATATTTGAAAAGACTGGAATTGCA -AGACTATGACGAAGCTGATGAATACTCCGCGGGCTATCTGCCTGCTCCACCCAAGGAGTC -TTCCAAATAAGTATGAGTTGTGGATATATATCACAGAGCAAGACGGGGCATTGTTTTCTG -GGGGCATTTAGTTCTCTTTCTTGCAGTATATGCTTTTTATATCTTTAATCGCCAAATGCA -ATGCCTGATATCCTTTGAGCATTCATCCTCTGCGAGTCTATGAAAAAAATAGATTTGAGC -AGAAGGTCTCCAGTTCTAAAAATGAGATGGAAGGAAGTTACTTATCAAAAGCGGGACAAC -AAACCGAATTAGGGTGAATTGATCAGATACAATGAAAAAGGGTACAAATATATTCCATGC -GCCTATCTCCCAATCTCAGAATGGTATGGCATTACTGATCTTCTGATTTAGCTTGACAGG -GAATCCCAGCATGCTACTGTTTGTTAGTCTGGGTATCAAGAAGCAGGACAGCAGGACATA -CTCCTTTCCAGTCTGAGTGCAACCGCAACCACAGAAGATTGATAGGGGGTAACAGAAGAT -CTAAAAAGTGGTTGTCATGAGCCTAATATCAAGCAGGAAAGTCTTCATGCTGTCACTCGA -GAAGATATTATCTTTACTTACTGCAGCCACAACGGTAGTGAAGAACCCTGTTCATTTTGT -CAGGCATGGTCTCATATTTCGGCAATCACACTGCAGAGGATCTGAACTTACCGAGTGTCG -GGCCAACCACCTTACAGTGACATGGATTCTGATCGGTGCCACATTGGAAGCAGATAGGGA -AGATAGGAATGAAAGGCATATCACACATCGTCTTTGGCTAAGGGTCTTGAACTAGGATAT -GTAGAGAAATGTTCCGGCCTTCACTCAGATATGTTATATGTAGGTTCCTTGAAAGAAAGA -GTTGTCCTGAATAGAGAATTTACCGCTCAGTTACGTTGTAGTCGGAAGTTTGGGACTATA -TAATGATGACGCAGATGAGATCGTGATCCACCGATCGTCCTGTTTTAGGGAGTAGTGGGG -AGATTTAGTCCGAGAGTATCAAAGGTGATCTTCATCACGAAGATAACGTATATCAAACCC -ATCCAAAGTACTTAGTAGAATAAATCCTAAATGTCCGTACTTCAGATGGGTCCCAATATT -AACCTCAAAACGCCCCCAGGGACCGGGCCGGGGCCTCCCGATTGAAATTACTCAGCCGGT -GGCTAAGTGAAGAAGCCACGACACAGCCACGATGTACACCAGTTTCCCCTCGAATCAAAC -GTCAGTCTCAACTGTAAATGACCCGTTGAAACACCTCAAGCCCCTTATTCTCTCACTCCT -CATATTCAGTCATCCCCTCGGAATTTCAGCATTCCTGTGCGTCCAGATCCACTCCCAGGC -AACAGTGCCTAGAAAAGACCAGGCTCCAACTATTTTGCGAGACACCTATATTCTTGGCAT -CTAGTTCATTGTTTTTTTCCCGCGCCGTCTTCTTCTCTTGATACCACCCGCCCTAGTCTT -TGGATCTTCGCCTTGTTTTTCCCAAATCCGAACATCCCGATTCTATAGACTTATCCCCCT -CCGGTACCGCGATAAACAAACCCCACCATGGGCGCCCGCAATCTCGTTCCCCTCATCCTA -CTCCTCGTCTTTGTTGGCATCGTTGCCGCGGTCGGATTCGTCGTCTACAGCATCGTGCAG -GATGTAGGAAAAAACACGCGCGAAAAGATGGAGCGCAAGAATATTGCTTTCACCAGGGAC -GGCATGAAAGTGCAAGTAAGGGAGATCAAAGATGAGGCCTACAAGGATCAGACCCAGAGG -TGCGTTTTCTGCCTTTAACTCCCCTTGCTTATTACCGCCAACTGACCCGATGTTCTCCCC -GTAGCGTCCTATACAACATGTGGAACCACACCTCTTTCCCCGCTTACAAGAGTCGGCTGT -GGGATATGGCTGGCTCATCTTCGTCAGAACCAAAGAAGGGTGCCGAGAAGCGCAAGTAGT -ATGTCAGGCATGCATCTTTACCTATTCTTCCGGTTCATCGGACCGACTCGTTCTGCGCCA -GGTTTTGCAACAATTCTCCGGTCGCGGCTTTACACTGGACATATTTTCTACTTCTATCCC -CATTTCATCTACAAGTTTCATTGCATGCTCTAGTCCCTGGCAAAGGGCTGGTCCCCAAAA -ATTGCGTGCACTTCGGTTTCGAGGGATGGACTTCTTTCAAATTTCCACATTCTTGTTCAT -ACCATTTTGATTTGTGGTCTCCGTCAAAGGCCTCATGAATGATGGCAGGCCCTGATATAT -GACTTTACCTTCAATTTCAAACCTGCGATCACTATGCATCATACATTGAAATTCTTAGTT -TCGTAAATCAATATGTGGTGTCAACCTCGACTTTGCTTTGAGATCACGTCTGGGCTAAGT -TGTTGAGTGTACTGCGTATTGAAAAATTGAGCGTTTTGAGTTGATATCAATGGACTATAC -TGAACTCTCATGGCCCTGAAAATAAATGCACTAGGCGTCGTTCCAATCCATTGAACCGAT -ACTGATATCATAGAACGGCAGATTTGAATTTCAAGCCGGCGATCTCAAGGATCAAACCTC -GGTATTCCTCCATGGTTGCATTATCAATATAGCAACTCTCTATCCTGTTCCATTATGACT -TAGCCTGAGCCTTGCGAAGCGCATAGTACTCTCATCTTGATTCAAAAGGAAGAACGGTTC -AGGTTCCGGCTGTCCACTACCGCCATACCAGGTCGGCTTAACCAACGTCTTGGGTGTATC -GTCTCTTCGCTTGTTGGTGAAAGCCATTTCGAAGGTCTTGGTTCACTGTACCCTGTTATT -CAAGCCCTCAAGCCCTCAAGGCCCCACAGCCTCTGCAGGAAATGCAAAATTGAACTATGA -GTATATGCCGAATTTTCGGCCTACATAGTGCCTTGATCGTGGATGAGGAGGTTGGCGAGT -ACCCAGGGCGAGATTATGAAGGCAGGGATGCTGTAATGAAAGGTTAAGCCAAATATCCAT -GGCGCCAGAACTCAAATGAGATTAAGATGCTTATCGGACCCTCAGACGTGTGAAATCATA -CGTAAAATTGTGATTTGATGACATGCTTTTGAAGATGATACCATCTTCGGGTTGAGGCAC -GTTGACTGGGGTGGGAACATAGTCAGCAAACCCACCATGCTCATCAAAGTTGATCAGAAC -ACTGTTATGCGAGTGTTAGTATTGCCGCACTTAGCATGGGGAGAACGGAACTGACGCATT -GTCCCAGTACTTGGAGCGACGTAGGGCGTCGTACAAGTGCTTGACCATCTGCTCCCCTGC -TGCCATGTTGCTCGTAGGATGCATCGAGTCAATTGTACAGCACTCCGGATTCAGATACGA -GAATGAAGGCAGTGTGCCAGCTTCCAGATCATGGTAGAGCTCGGTTGCATGGATGAGACG -GTCCATTGCGTTGTCTTGAACCCACTTGATATCATCATTAGCCACTGAGCTGACACTTGG -ATGGGTAAAGGGAGCGACTGACTTTCGATTATGTCTGTCTCGTGATACTATATGGAAATG -CTCAGTCGGTGTTAGTAAAAAGGAACGAGACTTATCTCACATTCTTCTAGCTGATGTTTT -TCTTGCTGAGCAATTCAAAAATTGAGACCGCACAGGTAGTTCCAGTCACATTGGTCCCGA -ACCCAGCGGACTGATGAGTGTTGTCAACGAAGCCACATGTTGAGCCCGATGTTGCGAATT -GACAGTTCGGGTTAGTGGGACCCGGGTGCTCAGCGAACTACAAGGTAGAGTTGACAGTCA -GACATTTTGCAATATAGTCAAGAGCAGACCAAAGAATTACCCACGGATGAATCAAAGAAG -GCAAAATTGCTTGCCAATTCAGCCAGAGTTAAAGTCTTCTTCTGGCTCAAGGCCTTGATC -ACGAATGCCGAATCTCCCGGGGTAGCATTGTACTTCTCCGATTGGCGCTCAATAAAGCCT -GCCATATTGGGAATATCCTCTTTCGTCGGGTTCCACTGGCGGAAGATTTCGTAGATGACC -CCGCCGAAGTTGTGGTGCGGGTCTTTGAGTGTCACCTCTGTTGCATACGGCTCGGCGCAG -ATAGCGAGCGGCTCACCATAAACAGTCCAATTAGGGTTGGCGTAGTCATTGCAATAGGTG -AGATTGCGCAAATTGTCAATGTCTGGATGGAAGTCCCAGTAGCCGGCGATGTTGTCAAAC -GAGTGATTCTCAAGAGTCAGATAATTGACATACTTGATCTGGGATTGCTCTTATGAGACG -TCTGGCCTTTGAGAAGTATAGATGCTGTGCGGTATATTCACGTTCTCTCGCAGCTGACCC -CACACCTGAGTAGTTGGATGAGTAATTGCAATATTGGCCAGAGCCAGCAGAACGAAACCC -CAGAAGAGAATCATAGTGACTTTTGTTCAACGACGGAGTAAGGAACGAGAAAGAAGTGGA -GTAGTGGGAGCAATTATGGTTTCAACTATCTTCTTGCCATCTCTAATCTTCTACATCTAT -CTATACCGGAGGCAGATTGCCGCCGAGGAACCTTGGAGGGTGGAGGCCATGCTCAGCAGG -GTATTAGTGTCATGGATTCAGCAGATCGCAATCCAAATTCTATGGTGGTAATTCCAACCC -TTGAGATTGAGAAGAGCTAAGAGCTGTGATATATACTATATGTAAGTAGCCCACGTGGTG -AAAGAGAGATGAGAATCCTCCAGGGCTGGTTGGAAGCTTGCATAAGCGAAGTGACTAAGC -TGTATATAGTAATCTCGAGATTAAACCCCCTCTTGCCAACGGGGATTTATACACGTGAGC -GGGACAAAGGAGCGATTGCAAATGACAGTTTCTGATTGGTTGGAGAGCTCAAGTCAAGCA -CCGGGAGTCCCTCAACGAGGATCCACACGGGTCCACATTAAACTTGGCCCTCGAATCAGC -GTTGCACGTGCCCTGCGCATGGGCTGTAATGCCAAGCACGTTGTCTTCCTGATTCATTCG -ATAGGCTGTACGAAAAGAAAATGGGAGAGATCATCTTCTTTGTCAAAAGGCCTGGGGGGG -AAAAAAAGGCTCATGACTACACCGCGTGATCTGACTGGCTAGTTCCGCATCCCTCGCTTT -CACTTAAACATTCCCCCAAGTTCCTTTCAAGGGATGCAAGGTTTCAATTTCTCCCGCCAT -GTCAATAAGTTCCCTGACCGGCCTCATGTCAAGTTATATGCCACTCTCCGATCGCAGCTC -ATCTTCGGATGAGAAGGATCATTCTCAACTTCGACCGGATGTCGAGACAGGAAAGAACAC -AGACAGTCATGACTCGGAGCGAAGACCTTCTCGTTTCATCGACGGTCGCACCGTCTCCGA -TGCCATCATCGGTCTTTCCGACGGCATGACGGTGCCTTTTGCCCTGACTGCTGGATTGTC -TGCACTGGGCGATACCAAAATTGTTGTTTTTGGTGGACTCGCAGAACTCATCGCAGGAGC -TATATCGATGGGACTCGGTGGATACCTGGGTGCTAAAAGCGAGGAGTAAGCCTACCTAGA -TCCCGAAACCTTTCTCTTATCAGAATACGGCGTGCGCTAGGAGATATTTTGAGGATTTGA -TAGCTGACATGGTTATAGGGAGTCATACAAAGCTACATTAAAAGAAACTCAGACGCAAAC -ATTGACGGATCCCGCCTCTGTGTCTGACACCATCTCCGACATTTTCGAGCCCTATGAGTT -GCCTTCTGAGCTGATTGCACAACTCAAAAATCATCTGTCCGACTCTCCTAAGCTTCCATC -CTTCCTCATGAACTTCCACCACACTCTACCCGAACCCTCAGGCTCGCGAGCGATCATTTG -TGCTTTAACAATTGCCCTGGGATACTTTATCGGTGGATTTATTCCTCTCATACCATACTT -TTTTGTCGGACCTCATGAAGCCTTCGTCGCCTTGCGGTGGTCTATTGCAACCATGGTCAT -TGCACTCTTTATCTTCGGATATGTAAAGACTTGCTTTGTCAGTGGATGGCGTGGTCGTCG -AAATGTCCGCAAGGGTCTCATTGGAGGTATACAGATGGTACTGGTTGGTGGTATCGCCGC -TGGTTCTGCAATGGGGTTGGTGAAGGGCTTCCAGATGCTGGCTGATGGGCCTGAAGACAA -CAAGCATAACTGAATGACCATCGGAGTCTTTATATTGTTCCACGGGTTTGGCGCTGAGCG -GCTTTTTGATTTTCTTGATTTGCGGCGATACATTGGCTGGCGTTTGGGTTTATGTCCTCT -TGATTTACAACCCATCGAAATAAAGCAGGATGAGGACTTTGACTCGGTCACTCAATTGCT -ACTTACCTTCTAGTCTATGATTCTAGTAAAAGTAAAGCGGTCAGTGACACTGCCATTTTT -CTTCGCCATGATAACGTCAGTCCTAACATCGAGAGTAAAAACCGGTCCCTGCATGGCACA -AAAGTTTCCCAGCGACCAGAGGAAAGCGAAGGAGGACGCATCCTATGCCAGTCCTTCCTG -CTTCCCATGAAGCAAATTCTTCTGCTTTTTGTAACTCGCGTCTTTGCTGTTCCCTATCCC -TTCTTTCTCTAGTGCCTCCAGAAGATCAAGCACGTAGTCCTGGCAATTCCAACCCCGAAT -CTCATTGTGAACAGTAACTTGACTCGCGACACTCTTGATGGAGTCGATTTTGGCGACATC -CATATCATACAAGGGAAGCAACTCCAACAGTTTTTTTGATTGTCGAACATCTCTTATTTC -CAGCTCATAGCGAAATCGGCCATCTGATCCCGTCACTTGGAGCAAAACTTTGTCTTCTTC -TTGGGGTGCATCAATGAAAATGCCCCAGTGTTTGTACACGCCATCGCTGGTGCTGATGGC -AGCATAGATTGGCACCGTCGATGACATATTGATTAAAATATTGAGAAAGTAATACTGCGA -GCTTTTAGATATTCATGTGAAAGCAACGATTCTTAGCTGTACGTACTCTGATGGAGGACG -GCTAGTGGATATGAGGCTTTGAATTGAAGCAAAAACGAAGGTGGTGAGTGCAGTCAGAAC -ATATATATCAAATCTTTTTATAGACAGCTGACTCTTGACATAAGCGCCGGGCATAATGTA -CAAGCCTTTTGCATGATCTTTTTATTCTTGTGGCGGCGGCATAGCCAATAAGTGGTGAGA -GAAGCGCTTAACTGGAAGAGGTAATTAGGTCGTCAATACTAGCGCATATAGCGAGTCAAT -GCCTGGAACAGCACACGACCACGAGAGTGTAAACTAACTTCAAGCCTTGTGTCAGCCTAC -AAACACCCCCTCCCCCGGACTCTGAACAGGAACGTTGGTTTTTGTTATAGTCTAGTGGTA -AAACACACTGGATTTTGTCCAACACCTTGTCCTAAGACTCCAGGCTTCTCGCTGTTTTGA -ACTGTTTGAGTGGCCGCTTTGTATAAGAAGTCCCCAATATCCGAAACATAGTTGTAGTAA -GAGAGCTTCCCTATTGCACATATTACACATTATACACTTCCCAGGAACAATACCGCCGGG -TAGGACGTTGAAAATCAAACATTACGTATATGTATTAGCTATGGATCAAAGACTCTGGCC -TGTTTGAATATTTGAAGCACGAGTCCGTCGAACAATAAGGTCACGATGGGCTTGACTTGA -AGTATATCATCTGAGAAGAACCAGGTACTATTCTTGTCAAAACAGCGTCATCGAAGCGCT -TAGGTTGCATTCATAAGGAATGTGGAATGCACATGGGCAAAACGTATGTAGTCATACCCC -ACTGGCCGCATCGGGTTTGCGGGGATGTAAGGCGATAGCCTGGTAGGATCGACGACGCAG -AAACGTCGATCATGGTGTTATCTATCAAATGAGCACAGACATGTCAACAGAAACTGATTA -CTGTGGAAGGTCATCACATGTGTGGTAATCTCAGCCAATGTGATGCCAAGGGCGCAAAGT -GAACTTCGTATATCATAAGCAAAAAGTTAGGCAGTGACCTCAACCAGTATTCATAGAACA -AGTACTAAGTAGCATATAGATATTTTACATTCTTGACCTACATTCAAAACCTGATGAGGT -TAAGGCAAATTAAACCCGCTATTACAGCTTGACTTCATATCTGTGGCGCAGCTCTACACA -TCAGCCTTGCATCATTTCTAAGCTTGAACCCACCTCTAGGCAGCCGCCCCCAGACACGAA -TACGAACGTATTCGAAGGATACTCCGTACATTATCTGTAATTTCGGGATATCTAGGCAAC -GAAGTTATAATAGAACTAAACATGTGTTACCCCGTACAGATTGTGGAGTAACGATTGCCT -ATATTGTAAAATGCCCAGGGAGCTTTGACGCTGGATTGGAGATGAAGTGAAGAAGGTGAT -TGGCTGGCGTCGCGCGACATTCGAATTCTGAAACGCATGTGTACCACTCACTAGGTGTGG -AAAGTATATACAATCACAGCTGTGCCGTCTTTTTTACGACAGAGAGATCCTCAAGACTGG -TTGTTGCTTAAATCAGGAGGCTGGCTACCCCCTGAGGGCATTATTAGATGATAGCCTGCA -CATCCTCATCTACGCGCTGGCCCCATGCAAAACGGCCCTAGTGTGACGTTTTCTCCTAAC -CGGTTTAGACTTGTCAAACCGGGACTGTTGATCGTATATGACTCCTGAGTCTTGATACTG -TTATGAAGTACGGAGTATATAATACAGTATCATCTGGACGTTGGCCGTTGGCCGATAGCC -TCAATATTGTTTTGAATCACCAGGACGACGAAGTAAGATGTTACAAGCTGTGCATAAGCG -AAATTAGGGAGTTTTGATATTACCGGGACCGAACGATGCCAGTGGGGGCGAAATAAAAGC -AACCAATCAGATCTCCAGCCTGGATCTTATATGGAAAATTTGCGAAAATTTACGTTCCAG -CTGTAATTTTGAAGGGAATCACATTACGGAGTATGTAAGCAATCTTGAATTCCAACCTTG -GTTGAGATGGCTCGTCGTTGGGTCATGGAGATAGTTTTTACTATATCCATATGTATAGAT -AGAAGTTGAGAGTATTAATGGAATTGTAGAGATATGATCGGGAGTCTCGGACCCTTCTGC -ATAAAAGAGATTAACAGGTCGGAGCATTCGCATAACAAAGTCCGAGATATGCCCACCGGG -CAGAAATGTGATCTCCGTTCTCCGTTCCGTGGCATCCGAGTCAAATTATCTGTATTGTTG -AATACTGCCGAAGGCTCCGGTCAGAATTGCAATAGTTGAGGATAATTAGAGCTACTTCAA -ATGTTATTTACTATGCATACCTGAAATAGGAAAAGAAGATGAAAAGTAAAATGTCAACTA -CTCTGTACTATGTTATAGTGACGCCATACATACCTACATAGGACTAGTAGACAGATAGAA -TAAATGTAATTTAAAAGCATAAGTAATACCACCAAAATTACACCCAAAATTCAGATCTTG -GTAAATACGAATACGCCCAATACTTCAGCCCATGGGTGAAATCCAGGTCTTTTCTTCCCC -TTCTTCCCATCTCTTCTTTAATTGATATCTCCATTTCAGGGCTTCAAGTGGTCTCCTTGG -TTTTCTCGAGTTCTGTTCATCCTGGGTTTTCTCGGTTGCGCCCTTTTCTCCTTGCATCCC -GACCCTTGTCAACGTGACCTCCTGAGTTCCCTGGGACTCACGCCCCTCGGCCCTATCCTT -CGCCTTCCTATCGGCCTATCTCAACTTTTCATGAGCCTTTCTTCCGGTCAACGATAAACC -TCCTTCCGTGACGCGAATTCCTTCCGTCTCGCCCCACGGTAGTCCTACAACTTTCGACCA -CTACTTTTTCAACGCGATCCAGAGCCTCCTGGAGGAGAGTCGCAAACATGAGCGCCTCCT -ACCCGTCATGGAAAGACCGCACGCAAACCCAGTTCGGAAAGCTTCAGATCCAAGTGCCAT -GGCGCTCATTCCAGCTGTTGGTACCACATCGCATGCGTCGCAAGCTCCGGTCCAAACTCC -GTAGCCGTCTCTCACCCACCTCGTCCATATCGAGGCTGCAGACCTCCTTCTCGCCTGTCG -ACACCCTCAGATCCTTACAGTCACACCGATGGACAATGTATGACTTCCAATACTTGTTGC -TCCTCATCATCGGCATTTTCTCCTTGACAATCATTCAGTCTCCTGGTCCACTAGGAAAGA -CGGCAATTGCCACGGGGCTTTTGTGCTCTCTCCTCATGCCAATCACCCGGCAGTTCTTCT -TACCTTTCCTCCCCGTTGCTGGGTGGCTTATCTTCTTCTATGGATGCCAGTAAGTTTTGC -CTTCCATTGGGAGTTTCTTGCAAGTGATGCGGCTGCCATCTCCTTGTGATGCGAGCGGGA -TGTGAACGTGACACATGACGGGAACCTCACTAACATATATTTCTCCAGGTTCGTTCCAAG -CGATTGGCGGCCTGCTATTTGGGTCCGAGTGTTGCCGGCCATGGAAAACATCCTCTACGG -TGCCAACATCAGCAACATCCTATCTGCTCACCAAAATGTGGTACTCGACATTCTGGCATG -GATTCCTTACGGACTCTGCCATTACGGTGCGCCGTTTGTCGTTTCGCTAATCCTTTTCTT -CTTCGGTCCCCCGGGAACTACCCCTCTGTACGCTCGGACTCTCGGATACATCAGTATGAT -TGCGGTCTTCGTTCAGCTGGCTTTCCCCTGCTCGCCACCCTGGTATGAAAATTTGTATGG -TTTGGCCCCAGCCGACTACTCGATGCAGGGAAACCCAGCTGGTCTTGCCCGCATTGACAA -GCTGCTGGGTATTGATCTCTACACTTCGGGCTTCAAACAATCCCCTGTTGTTTTTGGAGC -ATTCCCTTCGCTCCATGCTGCCGATTCCACCCTAGCAGCACTATTCATGAGCCACGTCTT -CCCGCGATTGAAGCCTCTTTGGGTCACCTACACTCTGTGGATGTGGTGGGCTACCATGTA -TCTTTCTCACCACTACGCAGTTGATTTGGTCTGTGGTGGCCTACTCGCCACAGTTGCCTT -CTACTTTGCCAAGACCCGCTTCCTGCCCCGAGTTCAGGCGGACAAAATGTTCCGCTGGGA -TTACGACTATGTTGAGATCGGTGACTCTTCCCGCGAATTCGGGTATGATCTTGCTAGCTT -GGATGGCGATCTCAACTTGGATAGCGACGAGTGGACCGTAGGCTCTTCTTCGTCCGTTTC -CTCGGGCTCTTTGAGCCCCGTGGACGATCATTACACCTGGGAAAGCGAAACTCTCACCTC -GAACCACGACATCGAGGCTGGACGCTAATCCTCAAGCCTCATTTTCACATGTGGCTCGGC -ATCGTCCATTCTTGCATCTCCTACTTTCGAATCGGCGTTTTATGTTTGACGACAATGTTT -GGGAACATTATCTGCATCGAGCAATTTTCGGCACCTTAAGGTTTTTAATAAAACCTCCGG -TTCACGCGGTATCTCCGCCTCGAGCTGAAGGTACTCCTCACCTTCTGGCTGTACATGCCT -CGTCATCCCACTTTGGGCTGGCAACTCCGTCTATAATATTAATACCAGTGTCGGGAGGGA -TCGGGGCCTGTTCACCTGCCTGCATTCGCCAATCCCTTCCCCACCGCCACTTTTGGCAAC -ATTCGCTTGTACACATTCTCGGACGCCTTCGGGCGATCCCCTTTATTTTTCTGACCCACG -TCTTTCGTTACGACTTTATGAACTGGGCGAGCCACTTTTCTTTGCTTTCGGCTCGTCCAG -ATGAACAAACATTTTTATCATTCGTGTCGCTCGCTCATCATTGCCGCCTGGGAACTTTAA -ACCGGTTTTTTTTCGGAGAGATGAAAAGTTTCGGCGCTTTCTTCGGTGCTTTTTGCTGCA -CCACCATTCGCTGTGCACTAGAAGGATTTTTAAATATTGGCTGTTTTACGACATTTTGCA -TTGGCACAAGGGTTTTCGAAAAGGTCGTCTCGATCTACCCTTTGTGTCTAAGCCGGGCAG -GTGGTTAATCGGCGTTGACGTGTGGATTCTTTCCGTGTGGATGCACGAAGAGATCCGAGG -AAAGGAAGGGGAGGACATTGAAAGCTGGGACTGTGGCTTTCAAAGCTAGAGGCACCATGG -CAGCTAGGGTGAACGGGATTCGGTACTTTTGACGGAGGCCGTCCCCCCTTGAGCCTTGCC -TATTTTCTCCGCTTCGGTGGTGGATGAAGCTGATGCGACCTAATGCATGCGCGTCTCCCT -TGACATCCTTACGACTTGCTCGTTACCTCGGATTGGATTGCCTCTGTTGCCTCTTATCGT -TACGTTTGGCGGTGAGATCCTGTTGGGTCCACGGATATTGCTAGCATTATTTTTAATCAG -CGTATTGAGTCAATGAATCATTGTACATTACCTACATATTTGGGGTTCTCCTATACGTAG -TACACACATATTGTAGTATACAGTAGTCATGTTAACTTGCAGCTATTCACCCCTTCAGCT -TCAACATGAAGAAAACGGGCAACCCCGCCTGTGTGTGATAAAGGCTATCCACACTGGACG -AAACATGGTGTGATCCTTGCCTTTCACGGCCAATAAAGAGTCCAGCTCCGTCGATCACGC -GGGATCAGTTGCTGGTAGCTTGCATAAGTCACTATTTTTAGGGCTAGGAAGAAAGCCGTA -TCCGTGCTTATCCGTGATACTTGGTGACATGGTTTGTTTGAGTATTCTAGTATCATAAGC -TGGTAATTTAATCAATAAATATTGTCACAGAAATATAATGTATTCATATAAGAGTTTGTC -ATGAGGAAATGTACAAGAATGCTCAAAGGAAAAACCCCGTCTAGAAAAGTCCCAGATGTC -TAAAAAGAGGTATGTGTATCAAAACGCGAAATACAAAGAACGATTATCCAGTCATAAGGA -GACCCCATTAGATCTATTCGTCATCAAAGATTTGTACACGATCCGCATACTCGCGGATTT -TGGAGACGAAAGTCTCTTCGATCAAATCGCCTTTGCCGCCAGCGACACGAAGAAGAAGGT -CGTTGAGTGAGGCGCGACCTAAGATATCCACGTGGTCACCTATATACAGTCAGTCGTCTG -TCATACGAAATGGGAAAGAGTAGGCAACGGAGCAAAGAAAACATACCAGTGTTGGGACCA -CCACGGGGTGAGAAGCGATCAGGCTCATGAGGCATCTCAAAGACCTTGATCTTTGTGCCG -GCTGGGTTGTAACGCTTCATGCGCCAGCCTTTGGCGCACATGTAGCCCGTGCTAAGCAGG -TTCACGGTGCCGTCGCCCTCACCCATTAAAACGCCGTGGTCAACGCCCTCGTTGTTGGTG -ATGGTAGTGTCCATGCTGATCTTGAGGTTGACCAGTGGGTCAGGCTCTTCCTGGTAGTAG -TAGCTACGCTCTGTGGGTTTGCCCACGCCATAGAAGCAATAAATCTTCATGTCTGGGGCA -AGGGGTAGGCGCGACTCTAGAGGGTTGAGCCAGGTACGCGGGTCCTTCTCGTTGGCTTCG -ACCTCGCGTTTCGTATGCGCGACGCCATGCGAGTAGCTGTTCAGAACCTGGTTCCGGTAC -CAATCCTCGCTCTGGTCCAGGAGAAACTGCAGACTCTCTGTCGTGGTGAGATTCTGTTGT -GTCAGGGATGAGTTCGAATCTTGGAAGCGGAGTAGATTGCCAAAGGTCAGGGGCTGGCCT -GGTTGGTCATCTGGTGCCCAGGTCGAGTTTCCCCAGATGGCCTCACCACCTTTTGGTAGC -ATGCTGGAGATACCGGGCATCGCGCGGAAGATCTCCGCGCGTTCCTCTTTCGATAGGAAT -TTCTCCAGTCCATACACGGCAAAGGCGTTCAGTTGCGCAGTATCCCGCATTTCTCCGGAC -AGAACAGCGGTCATTCCCTTGACGGCGCCCAGCATACAGCCACTGATGTTGACCCACGAG -TCAATATGTTTGTTAACCCAGTCTTTGCCGCCTTTTCCGTGATCCTCGCTCTCTACCCAC -TTGAAGAAATACAGCACCACCTGAGATCCCATGCTGTGCGATGCCAGCGTGATTTTCTCG -CCCTGTACCTGCACCGCAGTTTCGATGTAGGACTTGAGTCGAGTGAAATACCTATCACGA -ACTTCGAGATTGAGGTATGAGAGTCGCCAGTCGTATGCAGCTGTAAATGCATTGGTTGGG -TCATATCCAATGGTAGCCAGATTCTCCAGGATTTTATTCCATATCCAGTATCCCGTGATA -AAGAAATCCGTGGCATCGAATCCTTGAGCCGCACGGAGCTTAATGCCAGGTGGGTCAAGG -CCTGTATCCCGATCGAGCATGACATGGTTCTTCCATTCAGCTTTGTCCATGACTAGCGCT -CTCATCATGCTCCAACTGCCCCATAATCGTCGGCGGAAGTATTGCCGCGAGGTTTCCCCT -GTGCCCCAGCTCTCCAAGCCAGTCGAGATAACTCCGGGAATCATCACGACTGGATGTTTG -GCTTCGACGCCCTGGGCTCGAAGTTGTAGTCCAACAGCAAACGAATCATAGCTGACGGTA -TCGCGTTCGTGTTGCTATATCGCCGGAGGAAGTCAGATATATGGTAACCAAATGTAGAGA -TAAGAGCACTCCCTTGGAAAGCACGTACCGAGAATTCTTTCACGTCACTGACAATTCCCT -GTGGAATGGACTCAATCCATGAGTCGAGATTTAGGTCCATGAGCGAATCCAGGCTTATGA -CCTCATGCTGGTTCGCGAAGAATAATGCGACGAAGATGCCGAAGATCCCGCCCAGCATGA -AGATGAACCCATTGCGACGTTTACTCCGGGGCTTGATGATTATCTGCTTAGTTTTTGTCT -CATGAAATTCCGCACGAAGCTGTTCTTCCTGGATTCCTGTATCTCCGCTAACTTTGTGAG -AGGGCGGGACCTTGTCCTTGCCCGTGCGGCGGCGGAACATAACGACCAACCGCGAGGAAG -GGAGAAGAAGGAAAGGACGTAATCGATTTTCCTGGAATCTTGATCGGCAAAAGCAACAAA -GTTGAAGAAAAAGAAAGAAACAAGAAAAGGAAGGGGACTAGAAAAACACGGAAATGCATA -AAGTCCGCGAGCGGACGGATCGGCATCACATGGCAGAGAGTGACTTTCTTCGACAACCCC -CCACCCTCCGTCAACTCCAACCATCAGCCCACTTCAATGATATCGCTGTGATGGGCAATG -CTCCCGATCTTCGCTACGTGCGATACGAAAAGTCGCGCGAAAATGAGTATGTCGCTGCAA -TGCGCCAGCTGATCTCCAAAGATCTGTCCGAACCTTACAGCATCTACGTCTACCGCTACT -TCCTATACCAATGGGGCGACTTGTGTTTTCTCGCGATGGACGATAAGGATGAGATGGTCG -GCGTCGTGGTATCGAAGCTAGAGCCACACCGTGACGGTCCGCTGAGAGGGTATATCGCCA -TGCTGGCCGTGCGCGAAGAGTATCGAGGTCGGGGAATTGCAACTAAGCTAGTGCGCATGG -CTATCGATGCTATGATTGAACGGGATGCAGATGAGGTATGTTTACACAGTGGGGATAGCT -TTAGGTCCCCACACGGAGTGGTCTTGAGGGTATGGTGGGCTGACTTGACGTGTTTTGGGC -AGATTGTCCTCGAAACCGAAATCACCAATACCGGCGCAATGAAACTGTACGAACGGCTGG -GCTTCCTTCGTAGCAAACAGCTCCACCGATATTACTTGAATGGCAATTCGGCCTACCGGC -TGGTGCTCTATCTCAAAGAAGGCGTGGGCGCGATTCGGACGGGACTTGTTGATCCATATG -GCCTTCCACCATCAGTACCTGGTCTATCTGACGCGTGTGGAGGACATTCTCATGCAAACA -TGGGGTCTTTGATTTGATTTTTGGTAGTAAGCAGCGCTATGAAGTTTGCTCGTTGCTCTG -TGAAGCTGTGCTGGCATGACATGCGATGATAATTATGGGAAATGACCAGAATAGATACAC -CTCACGGGAACTGGCGTCCTTGTGGAACTTATATTGTATAGAAAGATCAATTGTCTGAGA -AGTATGTCAAATAGCCGCTGTGCTCTGGTTGGGAATATGTCATATAAAGGCAGGTGCGGT -ACCTACTTCTCTACATCGTATCCCTATGATGTGTTACGCGCGGACGGAAGCAAGACCCTC -AGTACCGAATATAAGCTTATTACCTTCAGTGAAGTTCCAGGATTCTGTGGTGCTGCAAGC -TTCAGCTCCCTCATTCATGGAATTATGGGTATGGCAGGCTGGGGCTCAGTTGAGCCATCC -AGTTAGCCTGGCTAGTATCTTGATAGCATCAATGGGATAGAGTACTCACCTTGGAAGTAT -CAGATAGATTTGGTTTTATTATCCTTGTCATCACAGAGTTGGGAAATATTAAAAATGGAA -CGCCAGAAGAAGGCAGATAACACAGTGTATTCTATCCAACATGGCTCATTATAACAAAGA -TGTTGTGCCCGCAATATTATTCTTAACACAAGGAATGTCAATCTTATTATCTCAATCAAT -TTCTACTCAAGATGCACCCTCTTAGTCGTTATACACCAACCATAATTGGCATCAGCGTGT -CACTGTCGTTTGTGGCAACTGTCCTCATGGCCCTGCGATTTTACGCCTATTTCTTTGTGG -TCCGCCTTAAGAGTGATTGGGCTCTTACTTGGGCAACTTTGGGTTGGATACGCTTGATTT -TGGAAATCAAGAGTGTGTACAATGACCTCGCTAACAAGATGAAACTTGAGGAGTATTGAG -TGCTATACTATACTGCCTTTCTCTCTACTATCGAATTATCACTGCTTCGTGGATCTTTGA -ACTATTCAGGACCTTGCTGGTCTCTGGACGTTGACATTTATGAAGATTGCTATAACAATG -TATATTCTTCACCTACAGGATAGAATCTACAAAATGGGCAGATGGGTTTTACTAGTCACT -ATGGTTTTGAATGTGAGCTCATGGCATTTCCTGTCTCCTGATCATTTAAAGCTTCCCAAA -ACTTCCTTTGTATTTGCTTTCGTTTTGATGGCCTCGTGGATATATAATGGAGGGATTGGT -GCCATCCCTAGGGTATATTCCACTGGAAGTACACACGAGCATAATAAATTGGACGCATTG -TTATGGACTAGAAGCTATTATGATATTTGGGGGAAGAGGGAGCTTGAGGTCAGCGCTCAT -GTGGTATGTATGCACACCGCTTCTGAATATGTATAAGCTATGGCTATCTTGATAAAGATA -GTAACAATAATTGGCCGCTGGTGCGGCCCTGTCATGTCTGTTTTGGACTTTTTAGATGCC -TCGGATATACTTTAGATATATTTTCCAATGTGATAATTTCCCCTTTATTGACGCTGTAGA -AAGATAAGGATATAAGCAGTGGAAAAGGAGTAAAAATGTGAGATTTTTATATGACATGTG -ATATAACATGTGATAACGACTGTATAAAACAAGCTAACACCAAACCATTCATATATTCCC -ATTTGGATCACCGAATCTACCTGTAACCACATCTTTCGACCTGTCGGATATCCCGGTTCT -GGGCTTTTGTCCTAGAAGCATGGATCTGATAGAAATGTGGAACCTGTGGTTGTTATATAT -AGCCACCTGCCGAGGTTCCCCTGGCTGTATTTCAACATTCCATAATCTAAAATATTTTCA -TCCTCAATGGATCCAGCGACACTCAGACACTTCCAAGGCCCATCTGGGATAACAAAAGCA -AATAAGCAAGGTCGTCGACCCGGCAAAATAGCATGCACAGCTTGTCACGCCCGCAAAAAG -CGATGCGAGATCGCCCCGCCATATCACCAGTGCACGCACTGTCGCAAAGAAGGTCAATTA -TGCATCCCGCGAGATTCAATTGAGAGGTGTGTCAAATCAAGAAAAAGAGTAGATAATAGC -AAGACTTACAAAACCGTTGGAAGACGTACCCGGCAACAAATGCCCAACCGCAACCACAAT -AGCCAGAAAGCTAGTAAGAACGACGATCATCTCCACGTCAATGGCTTTCTGCCAAAGGGG -ATTGACCACGAAGTACCCCGCTGGAGTGCAATGTATTCGTCATATTGTGAGATACGTCGG -TTATTGCCTTCTCTTACTTCCACTTCCCCGTCCCCTTCGCCCGAGATTGAAGAGGACATC -GTACACATGCAAACTGTACCAATTGAGAGAAAGCAAAATGTGTCTTTGTCAGGGCGGGAC -CCTTCCAGACACCGTCAGTCTGAGAATTCATATAGGGATTGTGCAGAGCCGGCTTCTATA -AAGGGCATAAGCATACCTGTTTCTCGGTCTCGATCTTGGTCTCTGTCTTCGGCGCCTCCA -GATGAAAGAGTTGGGTTGGAGAAGTCAGCAGCTTGCGATGCTGAGATTCTGCGTGGAGAC -CTTGCTTCAGTGGACAAAGTGAAAGTTCCTAGTGGTGGAGATTCTCATTCTGCCCATGAT -GTTTTTATGACCGATCTCGATGCTTTGCTTAGATTCTGATGGTGTCCATATCAAGGGAGG -AAGATATTCGCTCACGATTCGATAGATGTATATATATATATTCAAAAGAGATGATTCTGG -AATTTGCAGCTCTATGGTTCTATATTTTCAGATAATGTACAAGTATAAGATACAAGGTGA -CAGGGAAGTGGGAAGACTAGAGTATACCAAACAAACACCAACCAAAACAAGCCCGCCAGA -TAGGATATTCATGCAACACGCTAGCGTCAAAAAGGTAGAAGGATCGGAGAATGCACAATG -ATGCGAAAGGGAGGATTGAGAGATTAttggtctttgtctttctcttgatctcggtcttgg -tcCTTACGGACGATGGGAACTTGAATGGCGTTTTGCATCTTCCAAGTCAGTGGCCGTGGT -ACACCACGCCATTCCTTGTCCAAGATGGACGCTGCATCCAGCATAATCGGGGAAGGATGC -TGGGGGGCATAGCGGGTTGCACCAAGAATGAGGTCGCGTTTGATGAGGGCATTGGCAAGG -CGGATTTGGAGATCACGCTTTTCACCCCGGGGCTTCCATCCGATAAGGTAGAGAACAGGA -TTGTGACGGAACTCGGAGGGAATATCGTTCAGAATGGAGTCTGGAAGCCTGTAGCTTTCA -TGGTGCACGTCGATACCGATAATGACCTTGCCGGCGTCGGAGCTCCACAAATCTTTGTCA -AGCTTCGTGGATTTACTGAAAGAGGCGTTGCGACATAAAGAAGCCAAAACAGCCTGCTTC -ATCTTGTAGGTTACGGTTCTCTTCATGCAAGAATACCGCCATATAACCAGACAGAGGTTA -TACTGGCGGCGTCTGAATGCGGCCAAAAACAAATCCTCCCAATTTTGTGCAGTCACCTCG -AACAAAGGGTGCTCGAAAAACCGCAGGGTATAATGCAAAGCCACATGGATGTTCGCGCCC -AGGGCTTGCACAATATGGCTGAGCGGATAACTGTCAACGGTGAGATTGTTCTCGATGCAG -ATTTCCATGAGCCGGTCTATGGAGTCGAAGTCCTTCAATCGAGCGACCACACTGAACATT -TGGCCGATCACGGAAGGGCTGAACCAATTGGCCCCCCATGTATTGGTCATGAGGCCAATG -AATGAATCTATATCTTTCCCACTGTCTAAATGTACCAAAAGCGAATCTTGGATTGTCTCC -TGGAGCGCGCTGTGGATGGTCGAAAGATTGGACATGTGACCACGCTGGACCATATGTTCA -ATGAGTTGGGCTTTTTCGCTGGGGACGACCAGGGCGCTGAGCAATGCAAGCCATGTTTCC -GGGTTTGGACGCATGTCTAATTTAGGCATTGTCCGCAGGAAGTGACGAAAAACGCCCACA -TCCTGACGCCGCGCAGCAGCCCGCAACAGGATATTATATGTATCAGTGGTAGCTACATGT -TCTGTGCATGAACACAAAGCTCGGGCAGTCTCAAGGAACTCATGCCGACAAAGAAATTCC -AGCGCGTTATTCAAGGCTGCAGTCGAGATGTAATTGGAGTATGCTCCTGCCATGAATAGC -GCCTCCAGATGTAGTTTAATCCGGACCACGTGCGGAAGCTTTGACAGAGACATGGCCTTC -TCATCAAGCGTTGGTTGCCTTGCAGATGTGACATCCTCGATGTGCTCGTTGAACTCCTTC -ACAGTGGCAATAACCGGACGAGGCAGCAGGATCTCGTCCAGATTAGAGCTGCCGTCCGAC -GTTTCACCAAAATGCCAATTTCCACGAATCAACGGCACAGGCAGCTTCGAGCGCTGCATC -GCGCCGCGAGAAAGGCACATTGGGACCGGTGGTTTGTGGATATCAACAAGGGGATCCCCC -ATGGCTTGTAGGCTCTGCGCGCGGGTGAAATGCCCCTCCACCAGCGCCATAATATGGTCC -GTGGCACTCAAAACGACTCGACGGAGTAATCCCTTGCTCTGACTTGCAGGTAGGATGTGC -ACACGACATCCATTATGGATCGGAATGTACCAAATATTATCTTTCAACGTATAACGAGTC -ACACCAGAGAACAGGGCGAGGGTCTCCTCCGGTACAAGTATCTCTTTCTTGTTCAGACTC -TTCTTGGGATTCTTCCGAATATCGCGGTACACTTGATCCAGAATGTCCTTAGTATCAAAC -CAAGCATTGTCGTAGAGCTTTGCTGAGCGAGCTTGATATTTTTTCTTCATATATTCGCGA -AACCGTAACTTCTTCCGATCTCGAATCGATGTATTTCGTAAAAAAAGTTGTTTCTGCGTA -GGAGACGAAGGTTTTGCATGCGGGGGTTTGTTGTGCAGTCGACCCTTCTGTATTCTCTTT -GCTCTCGTCAACGCGAAGCCACGTAGTGGTATCCGCGGAGTGGCGTGTTCGACATCTGTG -AGCGCATTGGCAGGTTGTCTCTGGCCTGCGGACAGCCGGCCTCCACCACTTTTATTCGGA -TCCTCAGAGCCAATTTCCTTGCCTTCGTTTATCTTAAGGTGCTCTCCAACCAATGAGGGG -GAATATTGCAGATTTGAACCGTCAGGAGCCCGAGAGGAAGACCCAGAGGAAATCCATTGG -CGCAAGTGTCGAGAGAACGAGGGGTAGTGGCGGACTGCTGCGCGGAATGGGGAACCGGAA -GTATGATCTATCCTTGCTGGAACATAAATCGGTTTCAGGGAAGATATCCGTCGGCGTTGC -AAAGCAGGCGCAGCCCAGCGGGGATAGAGAAACAGAAGAGGCTCATCGGGGCCAGTCGCG -ACAGTGTCCAAGACGGCAGACACACGCCTCCTTAGACACGGGCAGCCACGCAAGAGCATA -AGGGACGATGCTCTAATGGCACCGGAGACATGAATAAAAGGACGAGAATTGGGCGCGTTG -CTGAGGTGTTTTTCGATGGGCCTTTTTGCCCCGAAAATTGCTGGCCGCTGAACGGGTAGG -CGGTGGCCGGAGCTTGGCTCTGCCGGATGGTCTATACTTTGCTTTCACCAGAAATGATAT -GTATTTCGCTTACTCCGTACACTGAGTTTTATTACATTAATAGATGTGGCCTTCACGGTC -AATCTCGGCAGCCTTGGGAATGCCCCTGAATACCCGCCTCGTTGACCTCGCTCGCTCCTC -CAAGACCGGTCGCTTGGACATTAATGTATTTTGCTTGGTTACCGGAGTCAAGTCTAACAA -TCCCGTTGCTGACTCTCCATGATCACCTTTAAAAACATCCGGACCGGTAATATGGCGCTT -GCAAGATTCTGCAGCGCGAGCGAGATTATCTTTCTTGCGTTGGATTGCATCGGCGTCCTT -GTCAGAATCGCTGGGAGTTTGAGACAGCTCACGAATGACACCGTAAAGATATGTTACAAA -TTGCCAGCGGGAGGTCTGGAATAGGAACATGGGCATTATATCTGGTATCTGATGCGCGTA -AAAGTAAAGTTAGCCCGGTCTCAATATACAGAGGAGGAAATCAATCCAGCTCACCTTGTC -ATTCAGATAATAGAGCTCGTCGCCCCCGTCCTCAATAAACTTTTTTAGTGCATCAATCTT -CTCTCGATCAATGGAAGCCAATTTCAAAGCATGCATCAAGCTGGGTATATCCCCAATTTC -ATCGACATATGGCTCCTCATCAACCGTCATGCTCGCACGATTGAATCCGGATTCCACCCG -GGACAGTCCAGCAGTGCTCTGGTCCCGGCTTCTGACCTTGGACGAATGAATGGTAAAATA -GTCCTCAGTAAGCATATGCGCTTTCTCAAAGTCCGAGCGACCAAGGGTCCGCCGTTGATG -AGCTTTAGTCCGTAGTCTCACCCACCGTCTCCTCCGAACAAATGAATGAAACCACGGTTG -TGTACCATGCCACGAAGACGAGGAAAACGAGTAAGAATACTGCCAGCCTTGCTCGTCGGT -ATCACCAGACATGTCAGCATACCATGTCTTCCACGCCCATTCCCAGCTCGGATCAGGCAG -CTGGGCATTCGTGATATCGACGGGACTGGCTCTGCGGTCCTGGGTCATCCAGGCGCTGGG -ATCAAAATTCAACAGCGAGCTCTGCGAGTAACGTGGAACACCAAAAAAGAACCAGCCCCG -CTGGTTCTCGTAGAGAATGTCCATTTCCGAGTACGATTTGGTCTCCTCACGGCTGTTGGT -CTGCTCTCCTACTCCGTTCACCTGGGCGCTGCCATTGCTGTTATGAGTTGACACTCGCGA -AGGCGCAAGCTGTGCTGTTTCGACGTCTCTGCCCCTGCTGCCTTCTCCAGTGGAGCTTGC -TGAGATTGACCCTCCGCGCACCTGGGACGATTCTCGACTGAGCGAGTCGTTTGTGTCGGG -TGCGATACCAAGTCTGTGGGGTTGCCATTTTGCATATTTCCGTTTTGCGAGCTGGCTATG -GACGGATGCACGACTCAAGTGCTTCGTCAGGCTTCCTTGTTGACTGAGGGTGGATGTGCG -TGGTTGGAGTGCTTCACCGGTGGGTTGCTGTGGGACGGTATTATCCACGAGGGAGATGGA -AGGCGAGTTCTCCATGGTATGTGTGAGGGTATAGAAAGGGATGTACTAGATAGTGTTACC -GCATTCCAATAGCTCATCACAGAATATGCTGCACTCGCATTATCTTAGGGGGTATCAGAA -AGGAGGGATATAAGACGATCGATGGAGATGGATACTTGTAGTTGTTCTGACAAAAGAAAG -AAGAGTAATAGATAAGACAAGAAACAAACGAGAGTGCAAAACAAAAAACACAAAAAGCAA -GTCGATGCAACATCGATGACATCGTTTGCGGGGATGATCTTGGAACGCCAAGGCCCATCT -CCTGCAACAAATCGCGGGAACAAAAAAAAACATCAACAACCACAATCATGGCAGCACCAC -ACTACCGAAAAGTCGAGCTCCAATCCCCAGCAGACTTCACCTACCTCTATGCCAACACAG -TCGCATTATCACGCCGAAAACTCGACCTCTACTTCCCCCCTTCCGCAACAGACAATGACA -CGCCCGATCCGATGCGCGAACGCGTCCGGGAGTTAATCGACGAAGTAAGATCAGAACAAC -CACCTACGCACCACCTACAGACCCTAATTTCAAAATAGTACATAAACCAAACCTTTGAAT -CAGCCTCAACCTCAATCAGCATCAACGGCTTAGACTCCACTTCACCACAATTCCCATTTC -CGGCAGCATTCACCGCGCCAACAGAACAAGTCGAGTACGAGGCATTCGATACGGATCTTG -CGTCGCGTGTGACGTCGCTATATGCGCAGCTGGAATCGCTCAATACGACTGTTGCACAGC -AGAGACGGGATGCGCCGAAACGTGCGGCGAAGGAATATGCCGCGCAGTTgaagaagatga -ttgaggaagaggaagaagaagaatatgaaAATGAGACGGGAGGAAACGACGCCGATAACG -ATACTGGGATGGGCGAATCTGCGAATGTGCAGCCGCCCCAATCAAATAATGAAACTGAAA -ACACTACGTCCGGCCATGAGGCGGATGGTAGGGATGTGGATTCTACGAATGGAGCTGGGT -CAGCGCGTTCGCGGCACGCCCCTGGAAACTACGATCCTGCGTGGACGTTGAATGTTGCAC -TTGGGACGGAGGAGGAGCAGGAACGGTGGAAGAGTGGGGATTTTGCGGGTGTCTATGAGG -ATGCTTTGCGGATGCTGCTGCGTTTGCAGGGCGAGGGCAATGTTAATACCGACGCTGGTG -CGGAGGGTCATGCGTTAGCAACTACAGTGGGCAAAGCCGAGAGGGCTGGGCGTGCTGCTG -AAGTGGTGGAGAGTATGAATAAGTGAGGGCTTTGAGGACTTTAAGGGTTAGTGACTAGCG -TTTTCTTTGTTATGTTTTTTTTTCCCTCTATGTTTGGGTGCTATTGCAATCTTGGGGGCG -TTTCGGGGCCCTGGGCTTGTCCTCGCGATCATGATACCATGTCTTGTTTATTCTTGATGG -CTCCGTTGAGTCGAAATTTCATGTTCATTCCTTCGGACTGAACTATCATCTGCTATCTCT -ATAGTTAGCAAAGGCGGGTATTGTTGCTTATTACTATTGCCACTTCTACTGCTACTACAT -CGCAATAAGTTAAAATAAAGACCCAGCTTGTGTCCTAGACATGACATCTACCATTTCGAT -GTGATCATTCGTGTATACCCACTACCAACAATGAACAACCAGTTCAAGACATCTTCACAC -CAACTCGCTTTTAAATTTCATCACGTCCTGTCCCTTCTATACTGAGAGCAGTGGCAGCAC -ACGCTGCTTATCTTGATAGGCAAAAGCAAAAACAGAAGCAAGCTCTCCATGATATCTAGA -TCAGAACAAAAAAGAAGGCAAAAACGCAAACAGCGAATTTAGAGAAAGGAGTCCACATGC -TGTGGGGATTGGCGAATAGGTGTTGAAACTACCAGAGAGAGTTTCCAGTATGTCTGGGGA -GATTGGGTTGTGTTGAGGAGATGTCGGAACCCCCAGGTAATATTTTTTGAGGTTTTCAAA -TTGATCGGGGTTGAAAAATGACCTCCAACATCCCTATCTGAAGTTATGTTTTCAGTCGCC -ATATAAAAGGAGATTGTCGGCTTGTTGGAGCTCCAAACAAGCCAAGAAAAAGCCCTCAGC -AAGCTTAAGCGCTAGTCTGAACACCCTCCTGACCCTCGACATCCTTCTTCTCATATCCAG -GGTCCTCATTCAGAACAACAGGCTCAGCAGTCTCCTTAGGGGCCTCCTCCTTGGGAATGG -CCCATTTAGGGGGACGGGCCCACTCGGCCTCCTTCCAGGGGCCCATATCGGAGAGCTCAC -AGCCACCGGCGCACTTGCACGAGCCACCGAACTCAACGGGCAAGTTCTCAGCAGGGACCT -GGGAAAGAAGCTCCTTCTGGTATCCGCTACCGAGGACGTGGATCTTGCTAACGGTGACGG -GGTCAAGGAAACCCTTGACGGCGCTGAAGACGCTGCTGAATCCCCAGGGCGCGTTGATCA -GGTAAAGCTTGCCCAGACGCTCGGGGTAGTAGTTCTGGGAGATGTCGGAGGCCTGCTTGA -CGTATCCGTAGACGGACGGCACACTGGTGATGCCCACGCCCTTCAGATCCATGATGCTGC -AGCAGGTCTCGAGCAGCTTGCCGGCCTTACGAGAGCAAGCGGGCAGACGTGGGTCAGTCA -GCTTCTCGTATTCGGTCACCAGGTTCTGCAGCATGCGGTCGGCGGTGGTGATCTTGTACA -TCTGGTTAAGGTCGATCTTGCCGAGCTTCTCGATGTAGACGGGGCGGCCATCCTGTTTGT -TGTGAGACTATGTCAGTATTGCATCCGCAGCGCAGGGACATGAAGTCAGGGAGGACCAAC -CTTGTCCGTCTTGTGGTAGTACTGAGGGTAGAATTTAGAGACCTCCTCCTTCTCGGGGTA -CTCGAAGTTGCGAGCCAGATCATCGGTGCCGAACTCCTTGCGCCACTTCTCGCTGGCAAT -GAACCTGGAATTTCAAATTAGATTGACAGGCCTCGATAGAAACTGGAATCCAGAGGCTTG -GGATAGAAAGCTTTCTGGCAACTTTCCACAAAAAAAAAGACATCTATGCCCTCCGGCGAG -AATCGAGGAACGTACATAGTCTTGGCAGCCTCAACATCAAACTTGCGGGCACGCAGGAAT -CGCAGGAGAGTCAGCGTATCCAAACGCTCAGTGTAACCGAGCTTTTCGAGCTCAGCGCGG -AGCTGGTGCACCTTTGCATCCTGTTCGGGGGAGGTGTGGCCGGGGTGGCCAGGCTGCGCT -TCGGGAGCGGTGGTGGGGAAGTCGTACTGGTCGTACTTGGGATCGGTAGGCAGCTGGGAT -TCTGTGGCCGCCATTTTGATTGATTGGAGGTATATGATCGAGGGAGAAGAAGAGAGTGTT -TGTTCTGGAAAAAGGGCGGGAGGCGGAGGGGGGTGGGGATTGGCGTAAGTGGCCACTTTT -GAAGAACAAAAAAAAAGGCTTGGGGATTCCACAGTGGTTTGGAACTATTGAACAATAATA -ATAACAATGGAACTACTCAAATGGCTTTTTTGCTCGAGATTTACAGGGGGAGGACTGTCG -TCATTTTGCAAATTGACTGGTCTGGTGAGTTATTGGGTTGTCATTTGGGTCTAAAGGATA -TGTTTTGGTATACGGCGTATAATATGTATACATCTAGATATGAATATAGGGCTTCCTGTC -TGAATTCTCGACTCCAATACCGATTTTGATTTTAATATCTTGTCCTTACAGACATCAAAG -AGAAGATTTCACTTGGGAAAAGGGGCGAGGGGCAAACGTGGCGGTGTCGAGGTGTCAGTG -ATCAATCCACTCTGGTGGGCACCAGCATTCCCATGGGCATATTTGGCCGAAGGCAACCAT -ATATCAGAGCTCAAATTGCTAGACACTTCTGCATATAAATTCACATAACATACGGGTATT -CAATTCAAAACAGTCCAACCTTCAGAATCGCAAACAGGCATTATTATACACAGCGCATGA -GGACCGGAGAGGCCTGGGCGATCATAGAAAGACGCAGAAAGACATGCCGGGGGATTAAGG -ACTCGGGGCCCCTATGAAACCGGGGAATAAACAAATGGCAACAACTTTGCGCTGGGTATA -GACATTGAAAAGGATGAGGAGAGACATGGGAAAAAGGCAAGGCATAATACGAAAGGGGAA -CGAACACAAAAGGATATACCAAAAATCACAGGATCATCGACATCCCCCGTACCAGCGGAG -AGAGGGCCTTGATCATTCTCCATTAAGAGTTATGTTCGTGTTTCAGCAACATTCACAAAG -GGCATTCGCCCGAACTGCATGGGCGGGGAGATTGCAGTCATGAGCAGTCAACCAATCCGA -CATTTGGGATTGGGGGGAAGGGGGGGCGGCGCGATGGAAAAGCATGAGATATAGGCGTCC -ATCATCATCGCAAGATTAAGTGCCTAGGGCACGGTCGATCGTCGGGTAGAAAAGAGGACC -TGGCCGGGGCAGAGCAAATGGGGGAGCTGTTCGACCTCTGCTCGAGCTCATCATTCATCA -AAAGGTGGCAGGACTCTGAGGGGCCTGGGGCGTATAGCGCCTTTACCACCCCTTCTTTCG -TGCAAATCGTTTGTTTAGGAGGGCAAGCACCTTAGTGACGTTGAACTGGACATACTCCAG -ATCAAGGTCTTCAAGGTTGTCCTTTTGCATCTCCAGGAGATCATCCAACTTGAGATCAAG -CTCCAATAACCCACGGGCTCGTCCTAAATGATGCGTTAGTAATGATGTTATTTTTCCCTC -CCAAAGCTCAAATCCATGAACTCACCTTTATAGTGCAGACTGATGACAAAGGTGCCTGGC -AAGGGACTCTTGAACATAAAGTAGATGTTCGCCCGTCGATTTTCTGGCACATTGCTGCGT -CGGATGACACCTTCCTTCTCCAGCTGCTGATGGGTGAACTTGTACGGGCCAAGTTCCTGG -TTCTTTGGCGTCTTGCCGCCGCGGGTTTTGCCTTCAGACTGGCTACGCACGTTGTGGAGG -TAAGACTTGTATGTCTCCAGCTGGCCAACCAGATAAGCATTGTGGTCGCGAATCGTCGCG -AAAACTTCCTCGAGCTTCCGGGTCTCTTCCAGGACTTTCTCCTTGAGTGAGCCCAGGTGC -ACCAACTCCTGCTCCACTTCGTCACGAAGCAAACTGAACTCGTCGGAGCGGTCAATCACA -CCCATGTCTTGTAGCTGGCTGAGGAGTTCCATGGTTCGGATGCCCTTGCGGACCATCACG -GCATCATTTTTCAAGGTCGCTGCAGCCTCGGCAATGCGGTCGAGGCGTAATGGTCGGCGC -GCGACTGAGGTGTTCTGAGGCAGGGATCGCAGGATCTGGACAAAGGTTGACTTGGCCTCC -ATGAAGAATACTTCTTCCTGGGTGATATCGAGAGCTGCTGTCAAGTCATCAAGAGCAGTC -TCCCATTTACTGAACAGAGGCACAGTAATGGTTCGGTTCTCTTTGCGGGGGACCTGAGGG -GGAGCACCACCCAACTCTTGGAGCAGCACAGACAGGTGCGAATATTGGTCTTGGGCGAGC -GCTCCACTGTGTTTCTCGAGGAGACCGTGCATGACGTACATTTCATTAAGAGTAATCTGT -AGTTCAAGGTCCCGCTTCGACAAGGCCACATAGTTGTCCATTTCGAGACTCTCATAGAAA -TCCTGAACTTCACACAGATCGAGCAGGAACTTGTTGACACGTTCTTTGTTCTGCTGAATA -AAGGGCTGAAGGCTAGCCATGTAGGGCTCCTTGGCGTAGGAAGGCTTGTTGGCCAGATTC -TGAATCATCTTGGCGATCAGCGTCAAAGTACGGCGAGGCTTGTCCGTGGGCGTGGCATCG -ATCAGCATGTAGGAGCGAGGAGTAACAATAGCCGGGTTGATGAAGCGTAGGAAGAAGAAA -CCGCCAATGAGAGTACAGATGGTTTGATCCTGAGCATCGGGATACTTTCTGCGCGACAGA -CTGCGAATCTGCTTACAGATCCATCTAATACCATAGGGGGCCTCGTCTACCGATTCGATG -ATGGTTGTGATGAAACCGTTCGCGATATCCGTCAACATCTTCAAACGTGGCGTAATGATA -GCTTGCACTTGCGCATTATCGGCAGCATCTTCAGCCGTCACCGATCGTGCCAAGTAATCC -GGGAGGGAGCCGGTTTCTTCTTCGATCTGCTTGATCATGCTTTCATAAACCTTTAAGGGG -TTGATCTCTAGATCCACTTCCCGCAACTCAATCAGAGCGTTGATCTGGTCTGCCAGCACC -TGCTTCAAATAACTTTGACCGGGACCACGGCGCGTGTATGTGGTCATCATGCGAGAAACT -GGGGTGTTTTGCCGTAACAAGGACGAGTACTCCGGTGTGTTATCGAACTGATAGGTGAGA -ACGGACTGGAACATAGTAAGTAGGAGATGCTCTTCGCGACTCTCGTACTGGTTTCCGTAG -ATGGTGAACATGACGGTCTGGAGGAGGGAATCGATCTCGGCCATGGAAACTAGTCGGCAG -AGGTGGGCGATGTGACGGGGTTCGGATTGCAGGAGGAAAAGTAGATTGCCGTACTTCTGG -GTTCGTTCATCGTTGGGGAAGAAACCTTCCTGCGGGTCTGCGGTATCGTCGAGATGGCTG -GCGACTTCGTTTTGCTAGAAATATGAGACGTTAGTCACTGTATGCCGATTTTCGACGAAA -TTCTTACTTCTTCCAAGGCCATTCGATTCTGGATAAGCAATGCGATTCGCGAGTCCAAGT -ATCGCACATCTTTTTCCAATACGAAGTTCTTCTTCGATTGTGATGAGATCCTCGATTTCA -AATCTCGTAGGTGCTTTTGTGCTGTCGGAGGATTAACGCGGGTCCATTAGACGCCAGAAT -GCAACGTACCTCGTGCCAGGTCATCCGAAATCTCCAAGTCTCGATCCTTGGCGGACATGG -AAAGATACAAGGCCGTGACCGACATCCGCTTGGATTGACGGAGCGACCGAGTATCGTGCG -ACGACATTGTGTTCTGTCGTGCTACGGGCTGAAACgaggaggaagaggaggaggaTGCCC -GCGACTGTCTGGCAGGTTGGAGCATCGTCGCAACAGACATTGTGAGATAGAACGAAAGCA -CAGGGGGAAAAAAAAGGTTGAACCCGTACCGAGGGGTTTCGAGACCGTAACGGATCAGGG -GAAGAAAACAAATCAGAGGAGGGGGAAAGGTCCCAAGGTTAAAAGAGTTAAAAGGGTTAA -AATGTCAGGGAAGAAATATTTTTTTTTTTTAATTTCATCTGGGGAAGTATGTATTTTGTA -TGGTATTTTTTATTTTGATATTTACATATATCCTATTACCTCGATAAAATAATATAGGAA -TACGTATGTTGTAGTCATGCAAGGGTTTCTTTTTTACCCCTGGCCCAATGCACTTCGCCA -CCCTTCCATTTTCTATGGATCTGGAATGGAGTTTAGACACTAACTCCATACATTTTGCAA -GTTCATACTTGACGTCAGATCTATCCTTTTTTTTTTGAAAAGCAAAAAAAAAAAAAGCCC -AAAACAGAGCAGACGAAAAATATCGACAACATACAGAAAACCACAATCAGAACCATACAG -GAAGGTGACACGGGTTTCATGTCTGCAGGGGCTAATCGGTACCTGCGTCGATGTAAAAAA -AAAAAACGACTTGGACTATTCAGGCTAAGCCCACGAGACCTCCATGTGGATGTCTCGGGA -ATGTTCTTCTCTCTATGCTGTAGATGATGAATCGGAAGAATTGAGATATGAAGATGGATA -CAACGTTGTTCGAACTACTCACTTTCGGGTTAAATTCAGATCTGGGTAGAGTACCATATA -GGTAATCAAAAGCTGATTTCCACTCTCGGGGTTTGCCTTATCAATTAATTACGCGTCTAA -TCTTTGAATGTATTTAGTATTCAGTACTTCTGCCTTTCAAGTCATACCACCTATCGAAGT -CTATTTGGATTGTCTTAGACCTCGGCTGCATAGCTCTTTTTCTTATACATATCAAGTAGG -ATCTTTTTCTTTTTCTTTTTCTCTATTGACTTGGGAATACCAAAATAGATCCAACTGCTC -TACTTTCATATACAACAATGTATATCACCTAATAATTAATATCCAGATAATATATGATTA -ATATCAAAAGCCATGGAAGCAAACTATGCGGGGGTGCAACCCACCTTGATTTTTTTTTTT -TTTTTCCCTAAGAGATTGGTCTATGCTTTTTTTTTGGCTCAGGGGCAGAGAGTGGAGTCA -GTGGGTTCTACTTTCCCATACATATTGTGGAATGATATTGCACTGTATTTCTAGCATAAG -TAGTCTACATATAAACAATTATATCAAAACATAGATATCAATGCCAGCATGGCTTCATGA -CTATCGCAATATCATTTCCATCCGGCCGATCATTCACACGGGCCAATATGTATCCCATCC -CGTCCTATATGAAACACGCCAAAGCTCTGTAGATCACTTACCAACCCCTGTGTATCACTT -CTATGGTTACTTGCATATACAATTCGAGTGTAATCTATAAGTACTACACATCCATGTCCG -GTTGGAATCATCCGGGGTTCCGCTAATTCGCGCTCGGATAGGAATCCTTCGAAATCATGT -TCATCAGCTGCTTTCATTGCGATCTAGTAGGGATCGGCAATGGGTGTGTGCATGTCCTTT -CAATGGTGTATGCACtacatatatatatatatatatacaacatataGACTGCGTATGGAG -TATGGAGTAGCACGTACTGAGTTCCTGGATCATTACTCTGTTAGCACACAATAGTTATTC -CAATGCACACTCATCTTCCAAGTTTACGCTCCGAACTTATCACTACGCCTCTTTGGGCCC -CCGTTGAAGAAGATCCCATGGAACTGTACAAAGTACTAATGTTCCCGTCTATCCGTTGAA -CTTTGTGTTTAATGGCTCTTTTTCTTGTTATACCCTGTTTGTATAGCGGGGAGTGGTCAA -TGCAGAGGGGGGTTAGATCACAAGCACTAAACTACAATAGTTATAGATCGACCTGAACAT -CCTTGACCATTCATTCCATGCGTATTATCTATATCAAGACCTGCGATTTTGAGCCAACTC -GTGAGCTCAACTGAAACATTGTCTCATTTCCTACAGTCACGTCTTTGGACCTCGACCAGA -TCAAAGAATCTACTTTATTTTGATCGAGTACGaagacaaagaaaaaaaatcacgagatga -gcaaaaaaagaaaagaaaaTACAACCTACGTGGATTTGCGCATTTAATGTTAGTGTCGAG -AAGAACCTGATATTCTATACATGTACATACAAGAAGTAGAATCCCAGAGCCTCCAACTGA -CGCACGGGTATCGCTAATGCTAACTAAACAAGAAAACCAACCAAAACCCCATTCCTAAAT -GCTTTTGTGACTCCGGACCAGCCGTCTATTGGCGGAATGTAAGAGACTTGCCGGGGGTTT -CCCGCTTGGTCTTCCGCGCGCCTGAGGGGGCTTCCATCGCTTTAGTGTACGGGTTGAAAG -CCTTTTTGCGAGGGTGGGTGACTGTGGCAGCAGACGGTGCGTTGAGGACTGCTTCGGCAG -TGCCATAGTCAAAGGGAGCCTCGCCCTCGGCCTTCGCTTTCTTGGCTTTCTTCTCCTTGC -GAGACTTTTTGCTCTTCTTTTGCGTGGAGTCGGGCTCGTTGCTTGTGTCGTCGAGGTCGG -CTGGGATGGCCAGGGCTTGCTCGGGGTTGTTAACCGGGGTGACCTTTCGTTTACGGGGAC -CGCCGAGGTCTTTGACGGTGAAATACTTCTTCTCCTCTGGCTCCTGGggagctggagcag -gtgtaggctcaggagcaggagcgggTTGTTTTGCGGTGCCGAGGTGGTCACGGGTTTCAG -AGACGGTCGTCGGCATGGGTGGCAGAGGAAGAGAAAGGCGTAGAGCTTCAGCACCGGCAG -CAAGCGCATACGCTGGAGGAGGGGGTGATTCCCTGGCTTCTAAGACGTTGCCCCAGAATT -GGGACGTCTCGTATCGTTCAGCAATGGGGAAATCGACGGCTTCCGGGACGGCAGCCTCTA -GAACCTTGGCCATGTGTTTTGGGGGTGCAACGTCGCGCCATTCTGGGCCATCAAGACCGG -CAACCTTGGCTTGCTTGATAACCTCCAAAAGATCATGTGACCGTTCCTTCGTAATGGGCG -TCATGGGTGATAGGGTTCGGAATAGAGATCCCATATCAAGCGGCATCATTATGGCAAGCT -TGAAGAGCATGTGCTTGGGGAAAACACACTGCCAGCCCTCGTCCTCGGCGCGCGCTACTT -CGTCGCGCCATTGGTGAACAGCTTTGAAAACGGCGAATTGTTCCTTGGAAAGGTTACCCG -AATTGCGCCACAACAGATCGTACCAGCCTCCAGGGCCGTGTCCATTGACGGCATCGTAAA -CAGGACGTTCATAGCGCTGCAGGGCTTCATGCTTAGACCGCTCATTGACGTAATCAATCT -GGCTAGCCTCTTCGGTTGAGTTGTCGATCAAGTCGTTTCGTAGACGGTCGTAGATGTAAA -GAAGATAATGCGTGTCTGATCGCGCATAGTCGAACATAGCCGGTGGAAGTGGGCGAGCGC -GCCAATCAGCCGTCTGATACCGCTTATCAGCTTCGAAATTGACGAACTTGCTCAGCAGGA -ACTTCAAGCTTCGCTTCGGGAAGCCCAATGCAGATGCAGCATGGAAAGTATCGAACATTC -CAACCACATAAAGACCCAAGTCACGTTGCAGCCAGATGATATCCATGGTGGAACCGTGCA -GGACCTTCAAGATCTTTGGGTCCGCGAAAACCTCGTTCAGCATCTGCAAATCTTCTCTCC -AAGGTTGCAAGGTATCCACAACCCAATCCTTATCCCGGGTGCTGATTTGCATCAAAGATA -CAAGACCGTGGTAGGAGTGGACGTCGTGGTGCTCCAAATCGATAGCGATTTCCTTCGCTT -TCTTGAGCTCATTGAGCATTTCCTTGACTCCATCCAAAGTATCGACGAAAGTAGCCGTGG -TGCTTTCGAAAGGCAAATACTCAATCGGAGGAGAGACCTGGTAGACCGACTTCGGATACT -CTGAAGCAAGAATCTCCGCCTCGTAGGGGTGGGGGTAACCAGTTCTATCTTCGATCTTCT -TCGTCTTCAAGCACTTCTCCAACGGAACAATGGCATGAGGCTTAGACTTCAGAAGAGGCT -TGAAGGGACCCATGTCAGTGTTATCCGGTTTGCGATTGAATAGCAATTGCGGTTTGGGGA -TCTTGGAAGGGCCATAGTCGTAGACAGTAGGGAAGTTGGGAGCTCTCTTGATCGGCGCCT -GTTCTTCCTGCGACGGGCTGAGCTTCTTAATCACGCCTGTGAATTCATCCAAGCATGCAT -CGGCCTTTTCCAAAAGCGAATCAATCACATCCACCACACCACGCCAATTGTCCTCGACGG -AATCCTCATCGTGAAGGGTAGGCCCTGTTACATCGGTGCCGGCGGTGGCAGCTTTGATAA -CGGCGGAGGTCAGTGCAATGATGCGCGCACTCTGTTCATCTAGTGACTCAGAAAACTCTG -TGCTGGAGGTGCGGTGGAAGTTTAGGTCTTCCGACGAGAGCTGTCCGACGGTTCGAGTTA -CCTGCACCAGAGCGGAGGTTAACTTGTCCTGGAACGGGGAAAATTCTGCAGAGTCCATGG -TGACTAGCAGAGCTCCAGTTCACTGGATCTAGCGAAAGGAAGAATGACGGGATGAAACGC -GGAAAAACAGTGACATGAAGATCCAAGGTCCCGAAAAAAAAGTCGCGGCGAGAAAATGAC -AATGGAGAAATGGTAGGCCCGCCCTGACCGCCTTCCCAGAGTGGCGAGATTTGTATGAAC -TTCCGTCGATCTTTAAGTCGTCACCGCATTCTTTTCCTTGCATTCCTCTAGATCACTCAA -ATTTCAACTTAGGAAGGAGCGTGCAAGGCCTGCGGCATAATTTTTGATAAATTATCTTCT -ATCATATCCAGAATGGCAGCTTCTATTGCCCCCGAATGTAATGATATCAAAGAGTGAGTT -ATAGTGAACATACCCAGCTTCTACATCTTATCCAAAAGCTAATACATTACCAGGAAATAT -GACACTTGCTTCCTCAAGTGGTATAGCGAGAGTATGCACCTATGCCCTTCAGTCATAGGG -TTGAACCGCAGCAGCTAATTCCAAAACAGAATACTTGCGCGGCAACACTACATCCAATGA -TTGTGAAGCATTGTTTAAGAACTACAAGGGGTGTTTGAACGTGAGTCCTGCCTGATGTTA -TTCCTCGATATCTTTTTATACATTTTGAGTGGTTCAATTTGTCTGACAAAGGTATTGTAA -TGTTTGCAGAAAGTTCTCAAGGAACGAGGCATTGACGCTATGGTGGAAGATGCCCGAAAG -AGCGGCAGCAGTGAAACAGACGCTGAATTCCTGAAGAAATCTTGAAGACAGACGCCTGGC -TCTGAGACTGTGAATCGCTCTTCCTCGCGGAGAATTGTACCATCAACCTGTGTCTGGCTT -CATCGGTTATTTTCAGCGATAACATGCATTTTCCATGTACAACACAGTAGTAAACGGCGG -ACCTGTGAATTACTACATTCGGTTAGGTTTGCCTCTTTGGGATTAGCGCCCCAATACGCT -GTCACTTAACCTACCTTCAATGCCGCCGACATAGAAGACCCGTCTTCGCTGCTCATCCTC -CCCTTCTCTCACTCTTTCATTCTTCCTACCTCACTATTCTTCACTCATCGCCGCCCCCTA -TCTTTTACTCGGGCCTCCACCGACTACTGCAAATTTGCTTTCAGCCCGACGCACTGCCTC -TTTCATTAGATATACCAATGATCAGCTCCGTCATACAGAGAAAAACTATCTCTTCAAGTA -TAACATGTGATATATCTCTTTATCCTGAATCTATTCATGTTGTGTATCTCAACGTTTTGT -CGAACAATTCCATCGCAGCAGCAGTCCAGCCAGAAATAGGGTATCTCACCACGACAGCGG -AATCTGATACTTCCCGCAATTTTCAAAGCGTATCAGCATCCTTGAACTTCTCAATCACAC -CCGCAATGCCATTCTCATCCACCTCCCAATCGCCCGATTTAACTCTTTCCGCCCTTTTCA -GTAGCACTTTATGAAGCTGGACACCATGAGTTTCCAAAACCCATTTGAACCGCAGACCTG -ATAAAATTTATGGGAGTGAAAGCTTCATCAAGCACAGACTGTGGGTATTGAATATATATT -CCAGCTGATTCCAGCTGATTGGAATAGCACTTGACAACCAACTGCATCGAGGAGGCGAGT -GAATGTAGTGATTGACTTGTCGACATCTCGCTGCGTGCATTGATGATACTCTCATGGGAA -GAATTGGGAAAATGCCTCCTTTACTTCAAAGGACGTCTTCACTTTCCCCTATTCAATCAT -CTCCAAGGAAGCCCCGAGTATTTGCTGGAGTGGTATCCAGCAGCTTGCATTACTGATGAC -AAGTCTGCTAAACCGCTAGTCGGATGTAGGCAGCTAGTTCAGTCTTTTGGTCAAATATAG -CGGGAGGATTAGTATTTGGTTATATTTGCTTTTTTAAAAGAAGGGCTCACGTTATGCCAA -CTTCATCACCACAAGTTTAACAACCAGTGGCTTCATATATATACACGAATCGGTCATCCT -CATCATCGAGCAAAAGGTACTGGAGCAAATTGTCGCTGATCTGTAGCCCCCTTAAAGAGT -AGAAAAGCGAAGCCCTGGATCATTATTTGACCATCCCCTTGATGGGATGGAGTAGATCTA -ACTGAACGGAGGAACTGAGCGAGCTTAGGAGTTAAGCGTGACTCGAGGCCGACTGGGGAA -ATATTCCTCCCACCAAGTTGTGGACGGGAGCGAATCAAGGTTATGATTGACACTAGTCCA -TGCATACCTCAGAATTTCATCGGAAAGTTCAGCGGATCGCTGGGGGCCACTGCCAGGTCT -AATCCCACCTCGCAACTAAATATATACTAATATGTCTTCATACATCCTACTTGATCGACA -TAACAATATCAAAAGTATTATTTTAGGATAAAAACATTATCAGCATTATTTACAGGAGGG -GTTGGTTGCTGGATTGGTATTAGCGATGACCCCGTCTCCGAAGATCACCCTGACCAAAGT -TCGCCGGGACCCTAGGCACTGGATTACTTAGTGATCCAATACCTCCGCCCATAGACCCAG -AGCCACTGCTTCCACCACTGCCTTTCTTGGACTGGGTGAGCAATGTCCACACCTCTGCCA -TACCAGCCCGACGGGCAGGTAGCATGCTGTACTGCATGCTATAGAGGTTGCGGCGGTGGA -TAGGTGGCACAAAACGACCGGTCATAAGGCAGAAGATGACACGAGGCGCGAGGTAGTGTT -TCCGGAGCCGGTGACCCAGCTGGAAGTACTGGTCGAACATTGCACGCAGGGAGAGAGGAA -TGGACTATAATTCATGTCAGCTTTTCTGGGGCAATGTTTTGGAGAACTACGAAGACATAC -CTCGAGATGGATGTAAGAAACAAGCGCAGAAGAGTCCCTCTCCGTTGGGTTCTTGTTTGG -CTCATTCCGAAGCTCGAATCGGATACCTCCTGGGAGCCGTCGTGAGTCTTTCACGCGGAG -CATCAATGCGAAGAGCGGAAAATGATTCAAGAATGCGAGGCAGGTGTCAAGCGCTGCCTT -CATTGAGATAATTAACATGCGGGCCGAGGCAAAAGTTAGGTAAAACACCACGACCGTAGG -CAGTAGGAAGAATAGGACTGTGAAGAGGATAGTGCCAAGGAGGAGTTGGTCGAGGTCATA -GTCACATGAATCGATTCGGTTGCGGAGAACGTTGCGTTTCTTTCCACGGAAAAGATGGAA -AAGGGAAATGATAATGGTGAGTTGCCAGTTGAAGATACGAGCGGAGGCAATGTAGAAGGA -GTAAATGTGTACCGTCAGGATCGAAAGAAGGTCAGAAAAGAGAGCGATCGGCATGCTAGC -TCCCGCAAAGCTTGAACATCCAACAATGTAGATGACAGCAGGAAGGTAAGGTTGAAGGTT -GGCAATACAAGCGGCCCAGTTTTCAATGACCCATAGGAACAAGTCACCGAGGAACGCAGC -AAGCTCATTGTTTAGTTTCAAGCCCGCTGGCCAATCCATTAGCCATGAAATAGTGCGCTG -TAGTCCCTCCACTGTCCAGCCAGTCAAAACAGTGTTGATTTGAGAGGCGACCCAGTTTGC -ATTGTCAATGATATAGGACCCCATGGCAATGCCAATGATCACGTCGTTGGCGACAAGCCA -GAGGCTGTTATAGAACCGAATGTAGTCTGGATGACTGGTCGTGACACTCTCCCAGTTGTC -CTTTCTTTGCCGAAGCTTGACGTATTGAATCGGCCAGTAGCAGAATTGCTGCAGCCTGAT -ATCAAATTGCTGGGCCGTTGCTGAGATATCCTTGAGGGCAGCAGCATCAGGCCGAGCACG -CCACTCGAGAACCCGCAAGACAATTTCCGCAACAGATCGGTGGCATACAAGCCCGACGAC -GAAAATGCGGGTAACGACAGGCCAAATCCATTGCCAGAAAATGTGAGCAATACAGAGGAC -AATAAACCCCCATAATGTTGTCGCCGATTCCACGACACGTTCTCCAACGCTCATACTCCT -TTTCGCACGAATTCCAATCAAATGGCTATTTTTGTGCATGAGCTGCCCCATCTCGTAGGC -GCAATTGATTTGGTTGATAATGAGTGAAAGGGCTTGCTCCTTCTGTGATGGGATGTGCTT -GACGACCGTGTGAAACTTGAGCTTCTCAACCAACAGCTTAGGCCCAGCCTTGTCTTCCTC -CTCCGATTCGATCTTGTCTGGCGATGGGCCCCATATCTCCGTCTTTGTCGCTTTATCACC -CAAAGCCAGGGAAATCGGGTCGAGCGACATATATTCCATGCGTGTTGGGTGCGGGCGGTG -ATACATGATAACCTGGACAGACAGATTTGCTTCTGGTGGGCAAAATATGTGAGGGGCTTT -CAGAGATGAAGTTGTAGTCACATCAAGGTGGGAAGGGTTGAAAGATGTCGGAGGCTCGTG -AGAGTTGGTCGAGCCGAGGACATGCATCTGAGAGCGTCCACATAGAGTGAAGATGCGCAC -AACTGGGTGAGGGCTGTTTCGAAAGAGAATGCCCGCTCGAAGCGCATTATCGACATTTCG -AGGCTATTTAATCACAAGGCTAGTCAGCTTTATCAAACGAGTAGAAAAGGCAGTTGCAAC -CACATACCTCGACATCTTCCAACACCGTGAGCACAAACAAATCAAGCTCCGAGTTCCGCC -ACCCGACGATCACACCCGGTGACGAGGATCGCGGGAGATCATATGGCCAGAATACGCGCA -ACAGTCCATCGCTCATGAGCATATTGTCATCCTCCACCTGAGTCTCAGAACGAACCTCGA -CGGCGCAACTGTTCGCGCGCAGAGACGCCGAGATTCACGTGGCGTGGATGCTACATGTCC -CGTCAGCAAATAATTCTAAGCCAACACCAAGCGCTTTATCTTTCGACCGCTGCCCAAAGC -TTTGGTTGTTCCGGTGTTGCTCACCAATTGGCCCTCCTTAGCAGCGAGATGATGGATCGG -CAGGACGAGAAGGAAAAAGAAGACGGAGAGAAACAAAAATCCTTTCGCAGTTCGTGTCTA -AATAAGAACAATTAGCAAGGTTGGGTAGGCTAATGCGTGCTGGCACGAAGCTGATAAAGT -CGCAAGCGGAGGGGGGCTGAGGACCGATGACCGAGGGCCGGAGGAAGACATACCACGACA -CCGTGGTCCCAGGCAAAGATTGCCAATTCGTATAGACGGGTAACAAGGTCCCACCTGGCG -GTAATATAGCCCGCGACAAAGATTCCAAGGGTGAGGACAGAAAAGGGGTGCATGAATCGA -GGTAGTTCAGGCCGTCCGCGTTTAGTTTGCGCGCATAGCTAGATGAGGCGGTTGAGGGTG -ATTTAGAGGCACGGCGATAGCACAAGCCCAGAATTAAAAAATTTTCTGTAAACTTAAGCC -GCGAAAGAACGAATCGAAGGCAATAAAAAGAACGATCGCTTGGATTTTGGAATAAAGTTG -CTTCATCCCAATCCCGCGTTGATCGTCTAGACCGATCTACACGTGACTGGCCTCCGCATT -AGGATGCGCGAAGTAGGATGCGCGAAGTATAACGACAACCTGTTGGTTACATTATAGGAT -AACGGAATGTATAAGGAGATCCCCAAAGATTTCAAAGTAGAAAATACCCTAGTTCATCAT -AAAATGTAAAAGAAGAGTATACAGCGATGCCTTTCTCAAACCCATGCCTTCCATGAACGC -CAAATCCCATGAATAGCTCACATCTTATAGAGCCTTGATACCCTGGAACACCTTATTTGC -TTGTAATTTTTCATCACTCTGGGCCAGATCACGCATTACTTGTCGCTTTTGCTCCGCCGT -CATCTCTTTGCCCTCCGTTGCCGCCTTCCATTTTAGCAGCATTCCCTTGAGGAGCGCACT -CCCCTCCTCCACCTCTTCTTGGGAGTATTTGTCAAGCGAGGGCGCAGGAACTTGTGTCTG -TCCATTTTCGGTTGTCGATGTGAAATCCTCAAGCTCCTCAGCCTTCTGACGAGCCATATT -GACAACCTTATCCGGGAAGCGCACGAGCTCGGCGACATGAATACCGAAAGACTGATCACA -GATTCCTGGCTCCACACGGTAGAGAAGGGTGACTTGACGCTTCTTATTTTCCTCGGTCTC -AGCCTCAGTCTCGCCCTCATTGCCGTTTCCAATGAATGCCACGACGTGCAGGTTCTTGAC -GGCTTTGGGATAACGGTCTGCCAAGGCAGTGAGCTCGTGGAAGTGAGTTGCGAACAAGCC -AAAGCAGCGTATCTCAGTGACAATGTGTTCTGAGATAGCCCAAGCCAGGCCAAAGCCGTC -GTACGTGCTAGTTCCACGGCCAAGTTCGTCGATGATAATCAGTGACTCCGAGGTGGCGGA -CTTAAGGATGTTGGATGTTTCAAGCATTTCTGCCATGAACGTCGATACACCTTTGAGCTG -CGAGTCACTGGCTCCAACTCGAGCGAGAATGCAGTCGAAGATGGTCAGCTCAGCCTCTGT -ACAAGGGACGAAGCAGCCGGTCTGTGCCATGAGTGCAATCACACCAATCTGTCGGATGTA -TGTAGATTTACCGCCCATGTTGGGACCAGTGATGATAAGGAAAGATGATTCATCACGAAT -CAAGGAAACATCATTCGTGATGAAGGAGATATCGTCCTGCATTTCCATGCAAGGATGCCG -AGCTTCTTTTAAAACGGTGTTTCCAGTTCCCCGGGGGTGCATCTTTGGGCGAACATATCC -CGAAGGTGCGTGCATTGAAGCATGAGCAAAGCTCACAATGACATCGAGGTGTGCCAGTAC -ACCGGCGAGTTGCTCCAACACCGGGCAATACGAAGCGGCAACGTTGACGACCTCGTTAAC -CAGACCGGTCTGGGTTCGGTTGTAGTTCGACGACAGTTGGTCATGCTCTCGGCGAAGGGA -CTGCATGTTGGATGTAGTGAAGTATACGCCGTTCTTCTGCGTCGAACACTCCTGGTACTC -TTTCTTATTGCGAATGCAGCCCGCCTCATTTCGAGTGAGACGGAAGCACCAACCGTGCAC -ACGATGATTCTCCATGAAGAGCTTTTTCTCCATGTCTTGATTCAAATCCTTTGAAACTCG -GCGGTGTTCCATATCCATGTCGTATCGTAGTTTGTCCAGCTTTTTTCTAATGACTCGCAA -GCCGTCATCGAACTCAGGCTTGATAATGAATTCGTGGTTTTCCAGAGCAGCAAGGTCAAC -CGTTGTCTCAACCATTTCCTCGAGCATGGACAAGCTGTCAGAATAACCACGGAGCTTAGA -CGTGTATTCTGTCTCTAGAGGAGTCTGGTACTGCTCATCCATAACATCACTCAGCGCGGT -CACAAACCCAGGAAGACGAATTGCAACCTGGTATGCTCTGACAACATCTTCCAGGTTGGC -CTGTTTCCGCTGGAAACGTTTCGCCAACCGGTAAAGATCCGGAATGGAACGCAAATGTTC -CTCCTGCATAGTCTGACGCAGCTCGGTGTTAACGACAAATGCCTCCACCAACTGCTGCCG -TTTCTCAATCGCGGCAAGATCCATCAGAGGCTGCTTCAACCATTGGGACAACAAACGACT -ACCAACAGGCGTCTTGCAGTGATTCAGCAAACCGAACAAGCTCATACTCTTAGCACCGTC -CCGCGGACCAGGCATGAGGTTCAGCGCCCGAAGCGCCGAGGCATCAAGCTTCATGTACTG -CGACAGATCATGCTGGTAGAGCTGGTACTGGCCAAAGTTGGTAGGATCAGTCAAGACATT -AAGATACTTGATCAGCGCCGATGCAGACCCCATGGCCAGTTTCAGCTCGGTCTGAGGCAG -TGTACCGGCAGACCTTTCATCCCTCAACAATCTTGTAAGATCCTGTTCGATATCTTTCAC -ACCAAAGTCGGCAACAGGCCGCTCAGAGATCGCAATCCCACAACTATCCGCAATAGCCCT -GATCTTCGCCAGCTCGACATCCTTGCGAGCCACATCCATGGTCACGAGACACTCTTTCAC -GCCGAGCTGGATAACCAACGACTCAAAATTGGAGTAGATATCGTTATCGAGGAATTCACT -GACGCCCAGCTCGCGCACGCTCGCATCAGCAAAGCACACGCCGACGTTCCGGGCCTCGGC -TGCGCGAGCGGAGATCTTCACTGCAAGAATGATAGGCGCAGAATCCATTGACAGCGCGCC -CACACTGCCGAGTTCTTCTTCCACATCTTGCAGATTACCTGGACTGGCTTGCTTCGCCAG -CTTCCAGTTGCTCTTGCCCGAGCCGGCCGAGGCGAAAATCTCAATACGCTTGTTCAATCG -GAAAAGGGCTTCACGGAGGAAGTTTCGGAAGACTGTGACGCTCATCGTGACCGAGGGGAG -GCCGCCTGTTTCGCTACGGCCTAAGTTTCGGAGCACGGAGGTTGTTTTGTAGACGGTGCG -TGCGATGAATTCGGCATCGGCACCGTGCGAGGAGTACCAGTCGCCTCGGTCGAACACGCG -GATGGTTTCATTATTATCAGGTTCAGGGAGGCCACGGTAGAAGCCAATGAAGCCGACTTC -GTCGTCGACCTGTGTAAATTCAGTATTCGAGTTAGTGGATGCTGGATTGGAGGTTTAGAG -AATCGATATCCCATTCATACCTTTAAGTCAGGTCGGGAAGACATGGTGTGGATGTGAAAA -GAAATGCAGCAAAGGACAATTCAAATGCTGGTATCAAGGTGGTTTGGACGAGTCCAATTC -GACGCGCTGAATTCGCGTGGTCCGTGCACGCGTCGCGCGTTAAGTGCCCAGGATCTCCGT -GCTAGGGGTCTAGAGAAGTTTTTTTCTTCAATTTCATTAATCAAATATTATATGTGCATT -TTGAACCATTTTCAGTCTTTATTACTATAAGTACTAAGAGAGAGAAAGACCTTTCTGTAA -GAAAGATTACATTTCAGGGAATATCTGAGAAGAAATCTAAAAAGATATCCCGCAAGACTT -ATCAAGTTCGCACAAGACAACTATACTTGTCAAATGCTATACAGTAAAAGGTCGTTTCGT -ACATTTTGAGCAGTTTTACTATACTTGACATAACAATAAGATGATGGATGGGAGGCTATG -AAAACACCTTTGGTGCCGTTTTGAGCCTCGCACTCTAAATAATCTTAATAATTTTCAGGG -GCTTCGAGGAAATTGTGATTTTTTTTATTTACTTAGTCTTGATGATCTCTCATGACACTT -TTCAAGTAGATGTTTTGTAAAGATTTAACAGGGAAGGCAAAAGACTACCACTGAAGTGCT -CTCACGAATGAATATTTAGGAGAAATGCTATAGCTACCGAGGGTTTATTTCCAGGGCCCT -CTGCTTGGCAGATTCATTTCCTATGTCCGCCACCGGAAGGCTGGGTTGCGCCAATCTAGG -TCCTGAATTTGCTTCTGCGACATTCCAGCTAAAGCCTGATCAATATCCGCCTTGTCGCGC -TTTTTGTTGTCCCATTTCATCCACATCCACATCAACGCACCCAGGATCAACATCAAAGAC -GATGTCGCCAGGTTCAATCCGTTTCCGATATGGTAGTCAGGGCCATCAAAGGGGAGGAAC -GACCAGGTAGATATCAATCCACCGATATTACCCATCATGACATTGATGCCGATGGCGCTA -GAACGTGCTGTATCCGACACTACATTATTAGAAACATGCGCGTTGCAGAGGGCGCCGAAG -ACAAAGGCACCACTGGCTATGAGGAAGGTTGCACCGTAGCGCACCATGGAGTCCTTGCTA -ACCAAGAACATGATATATCCCACCATCATGAGAGGAGGACAAATAACAAAGTAGATCATG -CGGCGGTCGAAAAATCCGCTCAGCAAGGGGAGAAGCACGGTAATAAATGCACCCACAACG -TATGGAGGTACTGTATGTAGCTGCTGAGAGACAACGCTGTTCTCTGGGTAGATGGTGCGT -ACGATGGTAGGGGCAAAGAACGCAAGACCCTGCACGGTAATGTTGTCGAGGAGGAAAATA -AAGGCAGTTCCAAGCGTGACTGGGCAAAGAATTCCCCGTTTCATCTTCGCGATGTCCATC -TTATCCAAGACTTCAGTCGTAGACACACGCTCAGACTTGAGACGCGCAATCGCCAGGTCC -TTCTCTTCCTGGTTCAGCCAGCGTGCAGTCTCAGGACGATCCGTCAGCGTGAAAAAGGCA -ATGAGACCAAGAGCGATAGTGATAATGCCTTCAATGGCAAATATCATGCGCCAGGTCTTT -AGACCACCGAAATGATCGAGTTTTAGAATGGCCGAGGCTAACAGACCTCCAAATGCACCT -GCCAGTGGAGCCATGACAATGTAGAGCGAGATGCGGAACGCTAACTCACTGCGCCGATAC -CATCGTGATAGGTAATAAGCAATACCCGGCATCATCCCAGCCTCGAACACCCCCAGTAGG -AATCGGACTCCCGATGCAGAGTGTATATTTTGGACGAAGGCGGTTCCAAGAGAGCAAATA -CCGAAAAGCAGGGTGGTCAATGGAAGGAACCAGCCCGGGCCCATCCATTTACAGACAATA -TTACATGGCACCTCGAAAATGATGTATGAAATGTAGAAGACGGACAACACCTTATTGTAA -TCGTTGCCTTTTAGTTTCAGGTCTTTCTCCAAGCCGGCCAACTTGGCATTTCCTGATGCG -CGACGAGGTGGTCAGTAACTTGGAAGAGGTATCATCGCTTGGGTCGGCGAGGCTATAGAG -GGTGGGATAAAAATACCAATATTTGCGCGGTCAATGAAACAGAACAGATATAACAGCGCC -ACGGTGGGCACAATATATAAGTCGATCTTCATTCGCAATCGCCGCTCAGCTAAGCGATCA -AGTTCTACCAGTGCGCGACCATACTGGTCCGTGGCAGCGTGTCCACGCTCTGCTGCGATG -TCCGTGGATTGCAAAATCGAATCCTCATTGTGGTAGGAATAACACTGCTTCTCATCGGCT -ATGAGGGTAGCCGGGGTTAGGGTTGATGCCATATTACAATGTCTTGCAGTTGAATCCAGA -ATGAAGGCTTATAGGAAAGGCAGTCGGGCAGTCGAAGAGTAAGAGCCCTGGATACAGCAG -ATCCTTGAATCAAACAGAAAGCTCGATTACTTATGGATCTCCAGGAGCTTACATGGTACA -TCCCGGTAGACTTTACCGAATAGATGCACGGAAACTGTGCTCGGCTGATCTTAAGGGATC -GTGCAGACCCGCTAGATGAATGCGGAAAGGCTGTAGAGAGATAATTCGTGGCAATAAAAG -TTATAGCTGACTTGGCACTTCTTTCTTTGTTCCCTTCTGATAGCCAGTCCCCGGGTGTTA -CAGGCTATTCTTGCATTTCGTGACTTGGCAAGATTTCCAAGGTTGGCGAGACCTCATAAT -GCCAGGATTATCTCACGGGGATCATGCCAGTCACATGCCTCATGCTCGAGGACCACAAGT -GGTGCTCTATACAGAAGACGTAGACCTACCCCAAATGTCGGGGAAAGCTGGGGAGTGGGT -CAAGCACCGTTCGTATACCGTTCCAGCCTGCAATCATAAAGGGGTCCAAGCAGTTGGTTA -TCTGTCGGAAGGACTTGTCGGGAGTTAACCACATAAGCACGGCGATTTCACTGGTGCAAA -GGCCAATAAAACGCCACATGAAGTTAGCACGGCCTCGTACATATATCTTGGCGTGGAGCA -TCTCCACAATATGAGAAAATCCGGAGTAATATCAAACGAGTAAACATGACAGGTTGATGA -AAGGAAACTATAAAACAATATAGCCACTGGCACGCCCTCAAGCAAATTACCCCAGGCAGT -AATCCCCAGGCTGAGAGACTTTACCGAGTAGCGTAGCCGCTCTGCGGTTTCATGCTTTCT -TTTTACATGTATTTTCTTTTTCTTGTGCCTGTCACATGTAATAATTTGGCATACAGATGT -GGGATTGGAAGGGTTGAGATTTAATGCGTGGACTATGAGAAGAGTGTACTATAGATTTTC -TCTCTCTGCTAGGACAATGCAATCTTGCTCTATGTATTGGGAAATATTATATCAGAGTCA -ATCCTAGATTGGATTGTTTGTTTTATCTCGGATCCGTATATGATGCTATGATTCTTAAAG -CGTGAAAAATGATGCTTGCGAAGTGTTACCCAGGCAGGACCTGACAAGGAACTGCGCCTA -GCAAGCAGATTTCGGTGGATTTCGCAGAGCCGTTGTCGAAACCGTAGAGCTCATTATCAA -CAAGTTGTCTATATATATCTGATAGCCCAATTGAAGCCGTTTGTTGGATTCCATAGATTT -TATATATCAGGTATACCATATCCCACCAAGCTCTGCAAGGAGTGATTGAGTTGCCGGGCT -TACATGCCGATTAAAGCCCAATTACTGACTTAGCGGGCTGTATGCAGCCTTCGACCGTTA -ATTGCTCCGAAGAAGGACCGACATTTCGTCCGTTAAATAGGATTGAATGGAGTGGGGGGG -GGGGAGCATGCGGGTACGAGGCTATGTAGTCCATTCAATGCAGGATAATTGCGTATCACA -GTATCTAAAAGAGTCTTTGATGTCAGCGAACACATATCAGGTCAAATATCTATGCAAAAG -ACTACATATAGGGAAATATATGAGAAGACGTCTAAAATGACTCACGAGGCCGCATAATCC -AACTACTCTTCTAAGGCCATAGATGGGGCCGTCCTCACATCTGCTTGGACAACTCTAACG -TGGCCTCTGACTTCAATGAATGTCCATTTCAGAGAAATTGCTTGTGGTATTATGATTGGG -TGTCCGAGGACCTAATTGCTAAGTACTACCTAAGGTAATGTTTAATGCTAAAAAGACAAA -TGGTAAAGTACAGGGTATCGAGCGCTGATGCAAGACCGCGAGTAGAACAGCTCTGCGGAT -GTGAGGTATCCAACAATGAAAAAGAAAAAAACACGCCAAAAACGTAAATCACAAGAGAGT -AGTCTGCACACAAGATGCAGACTTCTCCCAATCAGCCTTCAGCTGAGCGGCTCTCGCTCC -AATGTCTTTGGCAGAGACATCTTTGCAAGACTTCAACACGACAGACTTCATCTCGCCAAC -AGTAACCATGCTAATCCAGTCACCAGCTACCTTTTTATCCGACAGAATCTCAAGACACAT -CTCCACCTCCTCACGGGGAAACTCGCTGCGGACCGAGTCACGGACATGCTCGACCATCAC -ATCGAGCTTGAAGGGCTTGCGTTGGGCGGCAAGCATAGCTTTGATGCCAGTACCCACATA -CCCAGCGGGACGCAGCAGGGCCAGAACACCAGCTACCTCTTCTACCCGATCGGCGGCTGC -ACGTCGCACAAGCTCTTTCTTGGGAGGCGGAGGAGGTAGCTTGGACTGGCGTAGCTCCTT -ATTCTTGATGCGATCTAATAGGCTTTGGCGACGGCTGATCGTTGCTGTCCGCTCCAGAGG -AGTCGTCGTTGCTGACTCTGAGCGCATTCTCTCAGCCTTCAAGCGGAGCAGACCACCCTT -CAGATCCTGGAGTCGTTGCTGACCTTTCTGGAAGACCGTGAACGGGGTGAGCGACTCGTG -GATAGTGGAGAGACCGAGAGACTCGATGAACAGAGTTGCTTGGTCTTCCTCATTGGCATT -CCGAAGTGCCTCTGGGGCCTTTTCCCACAGCAAATCCATGGTTTCTTCGAAAGAATCTTG -CAAAGCTGGAGGCTGGACGTTGACCTGCACAGTACGCTCCATGCAGATTTTGCCGAGTCC -ATAATTTGCCAGACGATATGAAATGTGGTTCGCTTTCGGACTCTGTTCCCAGACCCACAC -AAGACGCTGCAGGTCCTTCATCTGTACCTTGCGCTTATTCCAAAGGCGGGTCACGCTGCC -AAGGAATTCTCGGAGGTCTGCAGGGGTATTTGCGCCGTTATGTGCCATGTGAATTGAGAA -CGCCTGCACAAAGGAGGAGTGAAGATTCAGAATATCGTTGAAGAATGCAGGCCGCTCGAT -AATTGGTGTTTCCTCATGCACTGGAATAACCTTGTCGAAGTTCAGGGTTCGTTGCTTCGA -AGGGGTCGCGGGTGTCGAGGGCGCGTTCTTGACAGGGGCAATTCGTCGACTCGGTGTCCG -ACTCGGGGTCTTCGCTGGCGAGGCTTTCTTGGGCGACACATAGTGGCCGCTGCGTGGTGA -GGAAACGGATAATTCATTCAGTCGCAAAGTTTTCGAGGGCGTTCCTTCCTCCTCAGTCGG -CGTCTGTCTGCTACCGTTGTCGAAGTTTTCGAGTTCCTGTAGTTTGCGTTTCTTGGAAGG -CGAGACCGGGAGAGACGTGATCTTCTTTGCAGGTAGTTGTGGAGTAATGCCGGCCTTGGT -TGCCCGAGTGAAGGCTTGAATGGCTGGCTGGGCAACGCAGCTTGACCGGGAAGTGCGCGG -CATCGCGGCTGCTGAAAATTTTTTGCCGGTCGAGGGATGATAAATGAAGAAAGGGCAATA -GATAAAATTGTATTATAATAGACAAGGTAAGAGATATCAAGGTATGGAAGCGAACAAGAT -GGGGGAAAGTGCAAAAAGTGGAAATTAGGAAGTGGGAGAGATGTCAGAGAAGGCACAGGG -GAGAGGACGCGTTCGAGGGCGCGTGGAAAAATCCGCACTAGCCCTTTTGGGTTAAATGGC -AACGCGTGAGCGGGAACTATAAGAACTTCAATTCAAGTATGGACGAAAAGACTTTGAAAA -TGGAGAGTGAAGTCTCAATATAATCAATTCCAGGAGAATGATTCAATACCAAATATGGCA -GAGCTACAATGGCATTCTCGTCACTACGGTAATACTCCGGCATTGGAATGCTTCGTGTGT -CCAAGCTCAACGCCTGGACACTTCTCCGGGCGCATGAGATGCCCGATGCTGCTGTATGGC -TAAGAATCAGTTCCAGATCGGCCTCTGCTATTGTGGAAGGATTGGACGAGAAGACGCGGT -ACGGAGTACGCAATTTCAGTTCGCGCCCTGGTAGACCACACATTACCGGGCGCATTGAAT -ACTTGGCATACATGAACTCCAGCTACAATGACATTTACAAGGAACACGCATAAGTTTTAT -AGACATGGATTTCATCATAATATTCTCGAAACCGACCAGCGCCGTAGTATACTCATAACA -TTAAATCGTCGAGGCTCCAAAGTAAAAAGAGCTTATTTCTTGCCACCGAGCCAGCCAGAG -ACAGCCCCCTTGGCCTCAGCAGCCTTTTGCTCGACATCGCGATCGATCTGATCAACACTG -TGGTTGACCTTCTCCCGGCTGGCATTGAACTTGTTCTTTGCTTCATTGCGGAGCTCCTCA -AGCTTGTCCTTACCCTCCTGGACTTCAGCACGGGTTCTGGCGATCTAGAGTTGGCTGGTT -AGTATGACATGTCACATTGTCTCAAATTATGGGTGGTGGGGCGAGTATCGGGGTTATGCA -CGAATGCTTGGGTGAGACCATGGATGGAGTAGACTTACAGCATCGTCGATATTGGCGCCA -GCTTCCTTGCCGACATCCTTGCCATACTTCTCGGCGTTATCTGTGCCTGGAATCTTCTCG -CGTGCCTTTTCAGCGTCGACTGGAGTAGAAGAGGAGAGTTAACGTCGTGTTCTGAAATAT -CTCAATGGCAAAGGAATTGATCAGGACACGTACTCTTCATCTGGTGCTTTGCTGCTGAGG -GACTGCCACCCGCAGAGTAAAGGTAGTATCCGCCTGCACCCGCCGCAGCGAGGCCCAGCA -TCATTGGAACGCGGCTCTTCGACATGTTGTGTGGTGTTGTAGTTGATTGGGATTATGTAG -ATGGTGTAATTGAAGTGAATTCTGGAGATGACAGGTTGGAGTATCCCTGGTTTTATAGGT -AGTTCTCGAGCCTGGGGAAACAACCTTGGAAGACAGCCGGCTATGACGTCTTGTTCCCCG -TGCTTGTCCGCATGGGCTACAGCTTGCCTCAACCCTTTTAATTTTTCTTTCTTTCAAATT -TCTCATACCCAAACGCTGTGTTACTTCTGAAGTATCTATTAGCTATTTGAATATTGTAAA -GAGTTGTCTATTCCAATTTCCATTTACTAGATGTCTACAATATTCAACAGTCATAGGGTT -ACAAGTGGAGTCGGGAAAGCTAGACTTCCCGTCGACGTCACCTGCAAGTCATCGCTGCCT -CATTCCGATTCTTGTTTCTTGTGTATGTAATAGCATAACTTCTAGACCTAATTATACATT -ATGCTAGCACATTCTCGATCTGATGTGGACCCCTGCTTGACTTACCTAAGGCGCTACCTT -TTCCAAGCTCTATACTTGCACATCATCTATGTTTTTTTTTATAGGATATGGGGACTTGTG -AATAAGAGAAAGGCCGTGTCAATGCATTCCAGTCATATTCAGATCATAACATATAACAAA -TGTCCTCATAATTGTAGTCCGTTATAGCATCAGACCCAATCCACTCCCCAAAAAATATAG -TAAGACACATAACGAAACGAGAAGGTGAGATAAGGTGTAGGAGGGAAAAGACTCGGAGAT -GGAGCAATGAAGAGTCCATGGAGAAGGCAGAGAGTGAAATGCAACATTGAAGTCATACCG -CAGACCACCAAAAATGAAAGAGGAACCAAATGATCGATAGACAGGGTATCGTCATGCACA -GGACTCCATACAAAACTTCCTATGCGAACGCCAGGACATGCAGCTCACTAGAAACTCATC -CGAAAAAGATTGCCATCCAGAACCGTGCAAAAGAAATATCATAAACACAAGCATAAGACT -CCCGAACACAAGTAAAAGAAAGATGGCCTATGTTTCCCGGTCATGTATCAGTCATCAAAG -TCGTCAGTTCATCAAGGGCTTCACTATCATGAGATGGCAAGTATGAGAATCCCCGAAATA -TATAGAGAGAAGTAGGAAATAAGTGAAAAAGCCGAGAAGAGCCAATGGGAAGAATATTGG -AAGTAGAGTGGGCAGAAAGAATGGTTGATTGTGGTTGATGTGGCTGGATGTGTGAGATAG -TCGGGGTGGAATGTCAAGTATATGATGATCGCTCTCCAGCTAGTTGCAGGAAGCAAATAT -GACTTTCATATgtagagtggtgtagagtggcgtggaatggggtTGTCCAAACAATTTACA -AAAACAACTTGGCTGCTCCTTTGTTCTCACTTGTAGCGACGGCGACAAGAGCCTCCAGGC -GACCCATGCCCGAATTTTGGGCAGAAGTGTCCGTGAAGACGCCGGACCTGGGCTCCCGGT -CGTTGTGGGCTTCGTAGCCATCTGCGTGGGAGGATTGACGGCCACAGATCTGTGACGGGA -TTAGCAACCGGGGCGATAGCTGTTCGTTCGAACAGGAGTTCCTACCGGTGGAGGGTGATC -GGAGGAATCATTAGATTCGATGTATCCGCTGCTGTGCACAATCGCGGGATGATAATCGAG -CGATGCGGTAGATGGGGTGTGGACACCTTCGCTACTACTGGAATCCTCGGGGGAAGTTCT -CGTGCGACTCCCCGTTGAAAGGCTTGGGCGATGTCCCCGATACTCTTCAGGACTTCTTTC -GGCCGTTGGGCCGAATGTAGGCTGGTAAGATGACGACGGACGGGAAGAAACGTTTTGAAG -TTCTGCAAGGGTTTGTTGACTCCACTGAGAAGCAGATGCAGTGGATGGACGTCGGTCTCG -AATATCCAGTCCGGTAAGGTTATGATGCAAAGACATATCCCAAGATACATGACCTCGTCG -GTGACCAGGGCCGGAGATGGGGGGTGGTGGACGGTGAGCTGGCTGTGTGTAATTGAATCC -CGATCCAAAGCCTGCATTAGGTGGTGGGCGATGATGACTATCAAGCTGCATTGGGCTGGC -TTTCCGAGTGGGAGGAGTCATGGGCCGTTGGACTACTTTATCGAAACTCTCAATCCCGGG -GAGCCGGGGCGGATGCTCAGTCGAATCATTCGTGCCGAATGAAGGCGGGACATTATCAGG -TGTGTGATATTGGTTCAGGGCACTGGTTCCAGGACGTGGAAACCCTGTGTATGACGAAGG -GTGCCACGTTCTCCTCCGCATCTCCGCCTCAGCCGCGCTTAAGGTACCCTCATCACGAGA -TACAGAGTAATGTGTGCTAGTGGGGCTTGCGAATACACCAGCAGCATTTGGATAAGATGC -ACCGGCGGGGGTGTGATATGCCGGAGGATATGTGCTGCCATGCTGTGCGAGGAAAGGGTT -GCTGCTCGTTGGCACTGAAAGACGACGGGCAGCCGTCTTTCCTTCCCAGAAACCATGTGT -CGTACCAGCCATGGGCGAGGCATATTGGGGGCTTCCACCAGTGGCAGCAAACATATTGGA -GGTGTATGGCGATCCAGCTGTCGAGGGGCCAGGAGCCCCACGGATCTGGGCTGGGGGTGT -GCTCGGAGGTTCTCCCATGGGATCCATTGCCAGTCGGGATCGAGCTGCGGGGTCGTTGGC -CATGATCAAAGGTGGGGGTCGTCGTCGAAGTTCCGGAGGTTGGCTGTATGTTGAGGTGGT -GGACGCAACACTCGAGGTTGACAAATTCCTGCTATGGGCTCGGGTGGGACCACCGAGACT -GCCTCCGGTGCCTGCCCGCGCCCGGCCTGGGGGCCGCACTCGATCTGTACGGACCTGACG -CTGGAACCGTGTGCCGGTGGCCGCCAGCGAATCCCCGGGAATCTCCTCGTTGACGTGGAC -TGTTTGAGCATGCTGGCGGAGGTTGTCGAGTCGGGAAAACCGCCGAGAGCAGTGGCACTG -GAAGGGACGTTCGCCGGTGTGTTTTCGAATATGGCGAGCCAGATGTTCGCTCCGTGTGAA -ACTCAGATTGCAAGGAGGGAAAGATGTGCAGAAGAACCGTTGGCCTTTCTTCTTCTTCGA -TGGTGGAGCGTTACCCCCCTGGTTTTTATCATGGTCGGAATCGTCTTGTTCTCCGTCGGA -ACCGTGCATTTGTTCGTCAGACGTTGACGGGGTTCGACCGTGGGTTAAGGTTGTGCGGGA -TGAAGCGTCGCTACCGCGGCTGTGGTGTGAAACTCCGTCTGGAAACTGCTCCGTCGAAGA -AACGTCCGTGGCCGGCACGCCATTTGTGGTCGCACGAGGCGTAGCTGCTCCGTCCATCGC -GTGATCTTCAGGGGCCGAGTTGTTGACAGGAAGAAAGGAAGCCATGACGACGGAAGAGGG -AGGGAGGCGGGAGTGATGTCAGGTCCCCGGGGAGCAAATGTTCCTGACCACCAGTGGTCG -CAAAGCCGACAATCCGATCGGAATAATCACCCCAAGACCAGTCGGGGTTCTATATGTCTT -TATGTCGAGTCGTCAAAATCGCCGTACTACGGAGAAGTCGGAGATCAAGACGCGGAGTCT -GGACCGGGTTGACTGTGAACATTTGCGCAAACTCAGTAACCTGGACACTACTAAAGTGGG -CaagaagaagaagaaaagggagagaagaGACCAACAGAGAGTCGGAGGTTCACGTACGAT -TTGAAAGTGCGCTTACTCTGAAGAGAAAGAGAAAGGAGGGGCGAAAATAGAATGTGTACT -AGGATTCTCAAACGCAAGGGTGGGGAGAAGTCAAGAATGTAAGAGGAAAAGAAAAGGGAA -GGTTACTTGATCGCGATCATCTCCAGGGTaaagaaaaaaaaaaaaaaaagaaTAGATATC -CGACCAGCCGGAaagaagaagaagatcaatagtaagaaaagaaaagGTGTATATAGGCAG -TATATACGAAGTACGGGGAGTATAGAAAATATAAAAAGGAAATGGTGACGATGAGAAGAG -GCTTATCGGAAGCGAGGGGCAAACAGGAACTCAACAGGAACTCAAAGACCTTCCGGGGCT -ACCGATGCGAATCATTAAGTGTTTCACCAGATATCCGCGTGTCTCCTTTTAAAAGGCACC -AGGCACCTCCATGATTCATTAATGAACTTGATCTTTACTAGAATTCGAAATCTAAGTCTA -GACCTAGATCGATATACGGAGTACACTTGATACACTTTATATATCAGGGGCTAAACCTTC -CAAACGAACTTGGGGTTGACGTTAACACATCTTTCCCAAGAACCTGGGAATAGACATTGC -CAAGGCTTGATAAGTTTGGGTAGAAAAAAGGTCGAGACTCTTTTGGTGGAGTTCTTAGGG -TTAACCTCGATATACCTTCAGACCCCTACTAAATCAACCCCCTTGCATTCGCGTGGATGT -ATCGCACCTCGTCATACTCCGTACTCTAAAGAATGGTTACGCAGGGAAAGAGTTCCGAAT -GGTCCCGAAGTCTTCATCTTTTGCTCGAGCCCTCTGGAGAATACAGGATTGGTCCGAGCC -CATGATAATTTTAGACGATAGAGCTGGGGGTGTTGATAAGAGACCTTAGTGCCATCTCAA -TTCGACAATATACCTTCTAGAAATATATATTCTAGAATACATGAATGATCTCTGATGATC -TCTGATTGGATCTTCCAGATGTACCGTTATACAGTACTCCGGACTCCAAGACCAAAAGAA -CTCGTCACCTGGCCGATCCGACAGCTCCGTTTATACCGCATTCAGGCAACTCTTCTGAAA -CTTTCCTCGCCCCCTGGGACTAAGCCCCGTCCATCGTGCCTGCGTCCCCTAAATTCCCCC -ACTTGCCTCGAGATGTCTGCAAACGGTCACTCCCTTCCAAACGATCTCAGACCCTTGGTT -CTCCTATACTTTAATGTACTCTATTTGACTTGAGGTCAAATACCTCATCAAATCTGACGC -CACGATGTGGATGATGTGCTTAGACCATTGAGTTTGATCTATATTATAACAATCCAAAAC -CATGTTTCGCATTGGCCCCAGATCGGCCCCTCCCCATGGAGACTTAGAGGTTCGGTTATG -ATCCAGCAGACACGATTGGTGGGGAGGTAAGAGGCGATACACACTACTCTTTCTGGAGAC -ATGGTTCCCCTCCTTGAGTGCGGCATATTAATCTACATTACGTATATATAGTTTATGATT -CAAGGATATGAATTGGTGTTATAGGTGAAATCACTTGAAGATGGGGCTCCTGCCGCTACT -TTAAAATGATCGACGTTTGAGTTGACGTCCACGGGAAAAGTCCCATACGGAATACTCCGT -ACGGAGAAATACAAACCCATGTATGTGGGGGTCACCGAAGAGCATCGCTAAAAGATCCTA -GAAGAGGGGAAAATCCCAATCCCCGGGGACCTGGAGATACATCTGCAGGACGCCACGAAG -GCTGGCAGGGAGGGGCACGGTCAAGTAGACCCAGGTACCGAGCTGCAGGGCTTATCGATA -AACCTGAAATTATAGCCAGGTATGCCAACATCTTGACTTGATCTACATATTCTTTCCATA -CCTTCCCATCGAGATTTCTGTTATTGGGTACTCCGCACACTCTCACCCGTTTTTTTTCGC -CCCTTGGAGAATTGATCCGTTTGCCTTTTTAGCGGTCTGCTTATCTGACTGGTCATTGAC -CATGTCCAGGTGCCGATGATTTACTGATCTGTCCAATCACAGTCCATGTTCTGCATCTCA -TACCAGGTCCATCCATGTAAATTGTACCTGATTATGTGTCGCGGTGCGTACCTGAGGGGG -GTTGCCACGTAGGAGCCGCCTGATGATGTGACCATATGAATTAATAAAATCATACTGTCC -TAATTCTATGATGTATATTAATAGATTATGGATATTCTTGGTTGTCTCGTATGATATAGG -ATCTAGTGCGAGAGAGTATACTCCGTACATGTACCAACACATCCATCAATAAATCTTACC -CCCTCAGACATCAAACCCCATACACAGACACATAGTATGGAGGAAAAGTACTAGGAGGCT -GAGAGTCCAATTCACATGTGAATTCCAGACCGGACGAAACGGACAAAGCGCACTACCTCT -GCATTCTCTAATCCATACCCATGCCTTTTCCCAAATCTAATTGGACCCCTCCCTAACAGC -TAAAACATGCATCTACCCCCCTCTACATCCAGGGCATTGAGACAACATGCAGTGTGCACA -CCAGCTCTCAGCCCTTTTCATAACTGCCTAGAGATCCCCAGCACCGTGGTGCACCCTGTG -GGAGTCATCTGACTCCTCTGAAATACCCACCGCGTTCAGGTACTTGTACAACATACTTGC -AAATGTAGATCTACCCGCTTATCCGCGGCACATCCTGAGGTGTACCTACAATATACTGTT -GTACGCATTTATACATGGAGGTTATTTCCCGATGTTCGGAGGTTTTTGTGGATTCTGTGC -GAGCTGACGGTTTTGGCCCCGATAACCTGGTGTTGTGGTGAGGGTCCCAGGGTACCGACT -GGTTTCTTTATTCCTAGTCAGGTACGCTAGTTGATCGCGCCTTGGCTTTGGGCGTGGCTG -GTTGCTGGTCCTGGGACTTCTGGCCCTAGGGTGCAGTTTTTGTTGCTTCCAAGGGTCTGT -TGCCCATGGTGCGACAACATGGTTCGGATATTTATAGTAGTGGTAAACACCGACAGCCGG -GGGGATATTATAGAATATTTGATATTATTACAAGGTTGGACACGGAGTACATAAGAGTGC -TCACTGTCAATAACCAGAATGTAATCACGAATTCTGTTTGTAGCAACTGTTGCTAGCGGG -GAAGCGGAATTTTGAAACACGATGTTGCCCCTTTGTTCAAATATAATAAAGCATAAATCT -GTAGGAAATGAGATAGTTCTGTCAGCAAAGAGGCACGGTTTTATTTCCTACAGCCATATA -TGTTTGGTAGACCCACATTATAACGTCGGAGGCTCAAATATGGAAAGAAGCCCATATTTA -TAGTTACTTTTTCCTAAGGAAGCAATCACAATCAACAATATAAAGCTAGAACCTAAATAA -TATGGGCAGAATTGAAATATCGAGGCGCTGATATAGTGTGGTAGTAATTTACGTATAAAA -GAAAGCAAAAGTACCATCAGACTACAGTCAGCTCCCCAACCTCAACCTTAGCTCTGTGCT -CAAGGCTTTCTTTCAAGGTTATGACCCTCTTATGGAAGACCTTCTTGTCATCATTCAGAT -CAGAGTGTGAGTTCTCCAGTCTTTTGATTCGAGAACAATCGCCCTGCCGATCACTCTGCA -GATTGTGCATTTCCTCGGTCATCCCCAACTTCAGAGAACAGAGGTCGTTTTCAAGTCTTC -CAACCCTCTCTGCGAGAGACTCTTTGTCTAGTACTTCTCTGTAACAGTTGATACCGCTGC -GAATTGCCTTGCGCAGCAGCTTTGCGAGGGTTACTAAGACAGCATGGATGAGCGCGAGGA -GAGTCAGGAGACTCATCAACATGGTCACGTTACTAATGCTCCATGGGCCTTTTTGCTCAT -CGTGGCCAGCTGGATAGCAAGGTGGGCACATCATGGCTGGCCATGGGATAATTTCCTGGG -AAGTAATAAGTATGCGTAAGCCAATGCTAATGGCTAGTGACGGAAGCTAGACGCTGACAT -GGGACTGATGTCACTGACGTCCACCTCTATCTGGCTACGTAGACCGGCTCTCCCTGAGCC -TCAGTAAATTCAACTCTGCAACTTCATCTCGGAGACAAGATAGCCATGGGTAATGTAGAC -CCAGCCGGTCGCGGCTTCCTTAGGTATCTACCGGAGATAGCCACTATCTACTCGCAAGCC -TACTCCCTAGGAGTAGCTTATAGGGTAGCTTCTGTAGTCACGTGGTACGTAGTACCCAAA -TAGGTAGTTGTTGGTGTAGATATTATCCGATATAAAAAATTGTATTATATATATAGCGGA -AACAACCTACCGGATGGGTCTGCTTAGGGCGCATTATTGATAAATTGAAGCGGAGGTGGA -AGGTGTTGACCGAAACCAGGAAGGATAGAAGGGCCAAAAAGCCCAGTAGGAATTAAATTG -ATTATGTATTCTGATAGTAAATTGACCAATTAAGTAGTACGTTGTGGTGTAGTAAATGGA -GGTTATATTCAAATGTATGAAAAAAAGGCTGCATATGAGTGGTATGTTGCATATGTATAT -ATCCATAGAGTGTGTTCAAGTGTATTCAACAGGACCAGAAGCTTCCGCCTCCTCTACCTT -CCTGGTGTCTTCTTTTCCTCTTCAGCTCACCAAGTCACTCTATCTTTCCTGCCATATTTA -TATTTCCTCATCTCTCACGCCATCCCCCTTATCCTCGTGTTTTCCCTTTCCTCTCCCTCC -CAAGAGCTTATCACCTCTTGTGCCCACCCATAATATACTCACAGGAGTTTCTTCTCACAC -ATCATGGGTGTGTCCCTGCCTTGAAAACATATCTCCAAGAGAGGAAGCAATTACAACGCT -GAACACTTTATCTGTGGAATCTGCAACGCAACGAGCACCAGGAAGGCTAAAATCCAAATT -CCTTCATCAGCTTCGTTGTAGGGGAGACAAGCCAAGATCTAAGCGGACAAGCACGTGAAC -AGTGAGTTGTATCTTGTGAATCAAGATAATTGCAACAGTTTTAATCCCCTATTAGCTCCA -CCGTCCCTAGGCACAAATACCAATAGAGATCTTTCAACCTTGGTTGCCAGGTTCCAACAT -CGATAAACGATCCTCTGACCTTGTTTGCCTCAACCTTGAACCACGCTGGTTCCTCAACTT -CTTTTGCCTCATATGCTGTCTCTTTCTCTTCATGCTCCTCTGTGCAGACTTCTGACAAAA -TCTCTGAGCCACAAGAAATCTGGGATATGTTCGAGGTGATCATGATTCACCTTTCAGAAG -TCTAGCAGCTTGGTCTCGATCTTCTGTGTACAGTGGATGATTGGAATTCCTTGAGCAACT -TGAAAGCCTTGGCGCATCCCCAACGAGACGTGTCTCCGATCTGAGTGTTATCATTGACCG -ATGTTCCATTCTACACACTCTGCATTTTTTCTGGATACAACCATTCTCTACCATTGAAGA -TTTGTCTATCCGACCAGTGCCAGCCATTCAAACAGTAAACCTGCAACGATGGAGTGTGGG -ATCAAGCACAAATCATATCTGGCGTGTCGAACAATATCCCCTGCTTCGAAGAACCGAAAA -TTCGTCTACCGATAATCCTTATCGTCACCATGTCATTCATGGAAGACTTTCCCATCGTGA -CCAACGGCAGCAAGGCAATCCCCTTGGCGATACTCTCTGATATCAACTGAATCGTCATCA -ACCAACTGGCTTTTCCAGATAAGAGCATCAACATCGAGACCTTGCTGATAATTCTGCCTC -ATATTAGGGAGTGCATCTCACATTGGCACTTGTCCTAGTCTATGGCGCTGTTTCAGTGGC -GATTAATCAAACAGTAATGCTGTTAATCAAATTTGCCATACCATCAGCGATCCTCTTGGA -TTGATATGATACAAAATACCAAATCGAGCCTGTCACGTTGGATAACTCTCATATGTGAGA -GAACTTTGTCCTGAGCAATCTTGGAATCGCTGCCCAAATTGTTTTACATTCTTCCGCCCA -AATCTAAATATGACTCCAAGGGCCTTTGAACATACACCGTGCAATTTGATATTGTGGTCG -TGATGGTTCCTCCGTTATCTTATATCGTTCCCCAGAAGTATACGAGTACAGCACTCTTGT -GTTTCCCTTGCACCAATACTGAAATTGCAACGGAAGTGGCTTCCTATCCAGGGATTCCAT -TGAAAGCGATTTCATCACAAGGACGACGGCAGGAGTCAATCGTGACGGCAGGTGAAGGTT -CTCCTTGATTCTCCAGGCCCGTGAGTCGGTGGCATCAGTCGTTCTCTCATAATTCGTCCC -TGCTGGCGACTGAGAAACGACAAGAAGAGTCAGACTGAGTATCATACTGTGTCTAGCGGC -AACTGAAAACCCAGACTGTTTCTGGGAAGCTTGTTCGACGGAGTACGGTCAGGTGGATTC -CTGTGCGCCACGATCACAACCGGGCGAGTAAGTTCCATGGATTTCACTTGGTGGACTTTT -CCAACATATACTTGTACAGATTATTCGCCTCAGAAGCGCCGTCTTGCATGATCTGGACCA -ATATAAAAAAGCAAGGTGGTCTATATAGCCACTCGAATCCGTGACCACGACTTCGACTTC -CAAGACAGAATGCTTCCTACATCCTGGATTGTTGCACAGAGACTCATTTTCAGGCTTATA -TATTCTTTCCCCGAATGAACAGATCAGGCTTCAATCGCGGCGGGAAGTGCTCCTTCTAAT -CTCCATCTATTCCAGGATATAAAAATCCAGGAGGCACCCTGACGTGATCAAAAACAGGAA -TTTATTTTCAAATGGCTGATGCCTGGTTGTTCGAGTCCCGCAAATGATGCCAGCCCTTAT -TAATTCTGCTTTTGAAAGGATGTTATCATATTTTCTCTTTCCCATCCTTTGGTGGGCTAG -CAGTCCCAACAAAGAAGTTTGCTCTCGGTGATTGGCAACGAGACCTACAGTACCAGGTCT -GCTTCCCCATGGCTTGTCGTGCTCAAGTGGCAATTACCACGGACCGGAGTAAGGGGAGTC -CTAGGTCTAATGCGTGCTAATGCAAAGCAAACCAAAAAGCCCTTTTTCTCCGGGAAATCG -CTAAAAAGAGGAATCTGCCATCATTCACTTCGAAGGGGATATAACTAATATAACTACCTA -ATGCTCTCTATTGCAATTAAAGCCACACTTGAAAAAAGGCTGGCCGCCTTTTTTTTGCCC -AGGCAGATTTGAGGCTGTTGGAGTGATGGCCCCATTATCACTGTTTCATGTCGTCTTTGA -AGCGCATGATCGTGTGATATGATGTTGTGACGAGATGGCTGCGTCGGACAGGAGACAAGA -CATGGTCCATTGGTTGTGTGGCACTTCTTAAAGCCCCTAGAAGCCAGTGGAATCCAGTTG -TCAAATCCGATGATGCACATTATCTCATAAACGCTACTGCAAAGCAAATCGCCCAATTCT -TCATGCTGGAGAAACCTTTGAAAAAGAAGAGCTTCTGTCAGCATCCCTCACAAGAAGAAG -CTAGTATCTGATGTTCTCTATTTCAAATCAAAAGGGGAAAATGTCTTTTCATCTTTTGAT -TTGGACAAGGATACCTCCCGGCCCCAGTCAATCGAAGCAAGAAACTGGAGGGTGCTATTA -AAGCCGTGACGGAATTGAGTCCCCGTCCTCGGCCCCGACAAGCAATATGGTGTGGCTGAG -CTGCTGCAAGCAGGTCGCTCAAAAGCGTCGATATTACTTGACAAAAACAAGCAGGTAATC -AGGGTGTGGAGTGAACCAGGTTTGCAGCAGACTTGGCAGAAAGGATCCAGATGGCAGCAT -CGTTGACTGGCGTGATGGCGCTATACAGCTTGAAGTTCTTAAAATTTGGATCCAGATGCT -TCTGATGCCCCATCCACCCTCGGTCAGGCTTCCACTCGTCATCGATTTACCTAAATCTCT -TGTCCTGATTGGCCGTGTTTGGTCACGTATGAAGCCCTGATCTAACGTGAATGCTCGCTT -CACCGGGAAACATGGACGCTGTCCGGGTGAATATGATTGTTTCATTTCGTCGCCAAAAAA -TACAATCCGACAGCACGGTGGGATATTTCTTGATACGGACCTTCTCTGGCTCAAGCATGT -ATGATGTGGCTTGAGGGACACTTGTAGATCTCCGAAAGTTCGCAGTCTCCCAAATTCATT -TCCGAACATGACGAGCTTCGAACTACCTGTGTCGCTGAGCACCTCCAACCTGGGTGATGG -AATTCGCAAAGAACTACCAACCTGCCTCCAGCGAATCCGTTGTTGGCTCTTTGCTCGCCC -ACTCAGTTGATGATCAGACCGATGAGCCTTCTGAAACGACAATGAGTGATGATAGGACAC -TCATGTGGCCTGGCTCTGCGACTGAGATAAAATCTTGAAAGATACGGAATGACTCAAGTC -TCAAATCCAGTGATGACTCTTCTCGATGGTGGTTGGTTGGCTTCCAACCGAGCTCAATTT -TTGCTTGGAGCATGATATATTCTATGGAACGACTCACTCGGAGAGTGTCTCCCTACATGT -TGTTCCTTTGGGCCTTTGGTGCTCCTGGGTTGCTGTTAGCTTGTGATGGCTTGTCGAGGT -TCACAATCTCCTTCAAATATCCCATGAATTGGCGTTTTTCCAGGAGCTTTCCGACATTTT -CTCAGCAGACTTCCATTCTGGCGTAAATGGAACCAGAAGAAGTATTCGATTGCACCGAGG -AGAATGGGGTCACAAATATTCATTCAATGCTCAGCACTTATATTGCAGCGAACAGAGATC -AGCGCGGGCCTCCACATTGCGATCGATATGCTAGGTTTCGAAGATATTCGTACAACATCG -TGGAGGAAAAGAAATCTTGACCAAATACCACCTGCATGGTGGATGGACAAGCTGGGAACC -GGATATCATCAGGACTATGAAGAGTTTGATGATATGGTTCAGAGGCACATGGAATCCAAA -TGGCCCCTATCATCTAGCGAAAGAAAAGGGATCTGATTTGGAATCTTTTGACGAGATGCG -AAATGTCCGAGGTCGCGAAGGAAAAAACTGGCGATAAGCCAATTCGAGAAAAAGTATCCT -TGCCTAGGAGGGAAAACGCTACAAAGCTGGGGGGATGGGGACATAAACTGATTTTGTAAT -GTTTCTCTCTGGACAGAGAGTCCCGGTTTTTTCATAGCTCACATAGCCCAAATGATTTCT -CTTTAACGGTCTTGCCGACTCCTTGTACCCTGTCCCTACATAGGTCGATAGATGATGCCA -TACAATTATATTTCTCATTAGAGCTGAAGTCAATCCTTGCGACTAACCTCTTTTATTTCG -GACTCAATCTAGAATCTCCCTTGTAAAGACCTAGGAACATATAAATCACGCTTGAGGTAG -CTACAGTATCACCGTCCCAGAGGACCAACGACTGAGCTCACCATCACGGCAGCTTCGCGT -TCCCAGTTTCCAACCACCACCATGTCGGGTCCTGGTCGACCCTTCTCGGCTAGCAGGCTG -GAAGCAGATACCTGCGATGCACGAAGCACACACTCTTTGATCTCGGTTCTTAATCGATCA -CGGTTGGATCGCAATGTACGGACCGAGACACCGCCTCGGTTGGCGATTTCCTGGACTCGC -GGGGCGAGAGACGAGGCGAGGGAGCGCCAATAGTAGGAAGGAAGTGATGTGGTGTCTAAA -GCCTGGAGAAGAGGCTGAAGGAGATTATATTTGGTTGAGGTCGCTGTTTGGGCTTCGGTG -GCCGGTTCATCGATTGTGGTGGAGGCCAAGAGAGGGTGTGGGGGTGGGCGGAGATTTCTG -AGAATGCTCTCCAAGGCGACTATATCTTGGCATCGTGCGGATATGTCGGTGAGGGTTCGG -TCCATGGTTGGGAGTTGGGTAAGAGCCCGGGCAAGGGAATTTACAGATGTCATGATCGCT -GTGTGCATGAACCCTTGTAATGTTGACAACAAGAGTTCTGGCTTGTAGTCAGCTGCAGTG -ATTGTGCTATTTGGTGGAGGCGAAAGAAGATATAGAGTTGTGGCGGCGGTTGCTAGTCGT -GCTCGGGCATCGCGAGCTTGCTTGTAGCCAGATGGAATGGATGACTGGCCATTGGAGCTT -GTCGAGTTTGAGGATAGAGAGAATCTATTGATGGTCTGTTGCGCTCTTGACTTGATCATG -TTCTCTGCGGGAATGACAAATTCCCCGCGTAGAGTGCGAGTGACCTTCACCCGATCCAAT -CCATAGCCTTCCTCGCCCTGTGTCGTGGCTGAGAACATTTCTCGCAGGTACAAAATTGTG -TAAGCTGCTCGCTCCAGTGCACGGAAGCCGTCTTTTCCAGTTGACAGTGATGCTGGTCCA -TCCGGTCCTGCGGAGAGGGCTGGCCCTGTTGCGCCTCGACCTGTGATTTCGGACACCTGA -CTTTCGAGCTGTCGGCCCAGGGTTAAACAGCGTGCGACGGCCCTTGCTAGGCGGACAGTT -TCAAGCGAGTTTTCAGCAGCAATACGGGCTTCGTCGGCTGCTTCCCATTTTTGTACAACC -TCTTTTTGGAGGCGCTCGTACCCTTGAGTCACAGAAGCTATCTGCGTCCCTGATTCTTTC -AAGATATTATCGGCAGCAGCAGTTTGACTTTGGGTGTGTGAAAGCAATGGGAGTGCTGAT -TTAGTTGTCAATGCATGGATGTGTGTGTCGATCTCCTGAAGATCAAAGAGTACCCGTGAC -AATGGTGTTGATAAATCCAACGGGGTATCTGTTGCATTGTTTGTGCTTGTCACCAATGTA -TTGGCGAATGAAGCTGGTGAGAAGTCTGGGTCTAGGAAGGCCTCATAGTCTATGTATGAC -GGATCTCCTTCCGCCATGATGGGTCTAGACGTGACCAAACGTGTCCAATCTTTATTTCAA -TGGAGCCACATAGAAGAAAAGAGCAGAGTCACTAGAGAACACCTTTGAACTAACCAATCA -GGGGCATGTGGAGTTCAGAAGCAATATTTCTTCGGCTCAATGTTTTGACATCAGACTATG -TAGGTCATTGGAGGAGAGAGGGATATTGGCCATGAGATAGGAAGCCCACATGGCTGATAA -CAGAGGTGTCACGTGCTGTTGATTAGTCACCCTGTACATTGTTAGTAGATATACTTGGAA -CATAGGTAATGAGGCCATTACTGACTATGAATAAGATAAATAATACATTATGAGCAAATG -AAGGTACTACATTCAATCCAGCTCATAGTCCAAGACATATAATACGGTCGTGCGGGCATA -TTAAGACATTGGAGGCAAAGAACCCCCAGACATTGACTAATGTTATCCTATCACGAGAGA -GTCACCGATATCATGGTATATAGAGCCAATGAATATGTTCCATTTCGAATATCGAATACC -AATTGCGTTGATATACTCTCTACTCTACTGCCAGCGCCTTCCAACGACAGAGCGGGGTCC -ACAGATAAGAGTTCGGAGGTAAGCCCGAGCATGAACATGAACCCTCTTTAACAGCACAAT -TCCTGTAGAAGATAGTGCTCCAAAAATAATCATCCACCATGTCGACATCTACAACAACAC -AAGAGCAGACTGTTCGGGTGATCAACGAAGATCAACTGCCTCCATTACCACCAGCGGCAA -ACGATGCATGCTTCCAGGGCCTTGCTCGTGGAGGCCGGAATGGCACCCTGAAACTCCGTG -GCATTCCCACATTTTCAGACCTGACTGAGAAGCGCAAGTGGATGAAGGAGCACATGGCTG -CTGCATTCCGGTTCTTTGGTAAGCTAGGGTATGGGGAGGGTGTCTCCGGGCATATTTCGA -TGCGAGGTAAGTGTGACATTCTCTTTTCAATGAATGATGCATTATGGAGGAGCAATAGTA -CTCTAACAGCCCACAGACCCGATCCTCAAAGATCATTTCTGGATGAACCCATTCGCCAAA -CATTTCTCGACTATTAAGGCATCGGATCTCGTTCTTCTGGATGCTGATGGATTCGTCGTT -GAGGGTGGAGCTCAGCTGCCTGTCAACGAAGCTGGGTTCTTGATTCATTCAGAGATCCAC -AAAGCACGCCCGGATGTTGTGGCTGCAGCACACACGCATGGTATTCATGGAAAGACGTGG -AGTTCATTTGGAAAGCCAATCGAGATGCTTACACAAGGTAAGCCACTTTTCTTCTTCTCC -CAGGTTGAATATTGAGGGCTTTACTTATTACTAACGAGCTGGACAAAGACGCGTGTAGCT -TCTTTGGCAGAGTGGGTGTTTACGAAGATCATGGCGGCATTGTCCTGTCGGCAGATGaag -gaaaggcaattgcaaaggcattgggcaaggaaaaCATTGCATGTATCTTGCAAAACCATG -GGTAAGTCCATGATCGGCTTTTTGTCTTCTGTATGCGTCAGGTTCTATCTGGCTAATCTT -GTGTTACTCATGTACAGCCTCCTAACTGTCGGTCGAACTGTTGACGAAGCAGCGATCCTC -TACTCCATGCTGGAGCACGCTTGTCAGTCACAGCTTCTTGCTGAAGCAGCAGCAGCCAAT -GGTCTTCCAAAGAAGATCATTAGTGATGATGTGGCCAAGTTCACCGCCGATGCTGCTCAA -AATCCTGTAAGTGGAATGTCGTACATGTCAAAGTGCTTGAGATTATGTATGCTGACAGCT -TTAGCACAATTTCTATACCGAGTTCCAACCTGAGTTTGAATTACTTGTCGAACAGACAAA -TGGAAGGTTCCTTCAGTAACTTTTAACAATATATGTCATGTTCTGCTTCGGTTCAAAGCT -CCTTTAATGTGTAGCCTATGCATCCCATGATGTCTAGGACAACACTAAACACCCAACCCG -AAAGGTAATATTGAAAATCACTTTTTACTTAGCAAGGCATAAAGGAGACGGTTTTAAAGG -GTAGCACCTAGTCATCTTTCCCCTAACTGACAGAACACTCTTTCAATCAAATTGTATTTG -AGCTTCGTTGGACTAAATGTTAGCTCTAGATTCAGGAGAAATAATGTTCCCTTTCAAGAA -ATAGAGATTGTACATGTGTTTTTTCATGTAGCTCATCAGATAGTCTGACTGAGACGGTAT -TAATGTAAACAAGTATGGAACAAATAAATATGGAAATAATTCTTTCAGAACAAACGCCAT -CTATACAGATGCAACTGCCGCAAGGGTATACAAAATGGAAAGAATATATCAAACAGTCCA -TGGTAGACCAAGGCGTCGGAAGAAAAAAGGGTGTAACAGGGTAACAAAAGGTCGTTCATA -GCCCCAACAGGGAACATCAACGATGGGGCGAGAGGGTGAGCGGACAGGGGTTCATACAGC -ATGCTCGCCCGTGTGCTTCATATCATCCAATTCATATTCTTGCAAGCTGCGTTCTCGACC -ACGGCCACGGCCAAAGCTCGACTCCTGTTGGCTATCGCCGTCAGATGCTTGTTGCGACGG -GTCAGCTTGAGTGCTACCACCGTGTCCACCAAACTTGATAGGCTCCAGCTCCCGCCTTTG -ACACGCCCACATTCCAAGGAAGGTCATCATAGCCGCACTGGCGAATTGAAGTCCCCACCA -GAGCAAGTTCGAAGGGAGGGAGTGGGAGTATAGAGTAGTGGCGATGAGGTGGAGGAAGTG -AATCGTCAGGGCGAAATCCGGAACCAATTTTGAGCGGGAAACAAAAATAAGTAGAAATAT -GACGCTAAATATCTCCCAAAGGAGATTAAAATTAGCTGCTGATCGGAATACTGCTTATAA -TGTCGAACCAGTGGACAACTCACCTAACAAAGCAATTCAACAACCACACCAACCCCAGCA -TCCATCCAATGGTCGTATCGCCACGTAGGGAGTCCCATCCAAAGACTAGATCTAGTGAGA -ATGGTGCGCCATAGACAGCTGTCGTGAACAAGATGAGCACCGTGGCGCAGACATAGTATG -CAAGCTGCAGCAAGAGGATCTTGCGGACGATCTTCAGCGGAGGGAGTTCGGTGCGCGACC -CCGCTGGTGGACGCCGGCGCGCGGGCATCAACTAAAAAGACAAGGGAGGTTCTTGTCACC -CTCGTGAAATTCGTTCAAGGTCGAGGTGTCGAGGTGGTCAGATTGGGCTCGGCGCTCAAG -GCGAGGATAGAATGTGGGGGCGAAGGGAAGGCGTTCCAAGTGAAACCCAGAGACAAGGAA -GTATCCAGTGTCAGAAGCTTCAGAACTACATGTAAAATCCAGAACGTAGTGCGTCGAAAG -TGAAAAAGGGTCCGGAGATGGAGATCGTCTGCCTGGGATCAACGGCCTTGGCGGTCACAG -GCCTGACCCGCCAACAACTGGACATCTACCGTCACTGACACCTTTGGCTGCATAACTCGC -TACAAACCACACGATACCCTACTCATTTGAATTTGACGAATCAATTCCCCACCCGCCTCC -TATTCGATATAATTCATTTCTCCAAAATGGTCTGGGTACCTTGGTCTTCCGGCGATTCAG -ACAAGAACGCAGCCTCCGACGGCGGCCGCGTTGCTCCGGACCGAACGAGTCGCGCGCGCT -GCTGGGAAGGTCGTGATCGATTTTTTGCCTGCCTTGACAAGAACGATATTCTGGACGGCA -TTAAAGATGATAAGAAGGCGCGACAAAAGTGCGCGGACGAGGTTGTGGAGTTTGAGGCGG -CATGCTCACAAAGCTGGGTACGTACGGTGTGATTTTTGATCATTTGCTCAAATTATCGGC -TCTTGGTTTTCTAGCTTGCTCTACTTTATATCTATTCACGGGAGATTGCTGACTCGAGAG -TCTTGGTACCTACAGGTGAAATACTTCAAGGAAAAGCGTGTGATGGAATTCAACCGGGAC -AAGACGATCGAGCGCATTCAAAAGGAGGATGCGGCCAAGGTGAAGGAGTTAAAGCAGCAA -GGTTGGAATTCGAAATAAGAAGCAAGATGGACTTTGTTGGCGAACTTTTGTACAATATGT -AGCATATCCTCTCTTATCACGTACCTTGATGACCACTGCATTTCGAAGGTACATTGCTAA -GCTGCAAGCCGAAGACGGCTTAATTGCTCAGTATTACTTTGTATTACCTGCACAACTCAA -GCTCTAGGATTGAAGTTTCGTCGCTTTTGATTTGGCCAGCCTTTGAGCGCTTCCAAAAAA -GCGGCACTAAGAAGCCTGTGAAAGCCGAGAGAATATAGCAGAATGAGCTAGGCGCACATA -CTTTAGTGTCAAAGGAAAAGATGTCACAACAAATGACATAACCCAGAATATCTTGGAAAC -ATCCGTATTATGTCCGGAAACTAAACCAGTAGTCACAATGAGCAAGAGTCCAATGGTTGA -AAATACATGTCAACTCCATCCTAAATACCGCCTTGTCCTAATATCTACCTAATATCTTAC -TGCCTTTGTGACATATGAGAAGCAACGAGCAAAAAAAGGAAGAAAAGGAAACGCCTAGTG -TTGCGAGAACTGGTTTTTGGCCAACCAGAAGAAAAAGGAACCGAAGCAGAGCACTGAAAT -CCACCAGTAGGAATGCAAGAACGCCAGGCTCCAAAAACATGAACAATGACGAAATGAACC -TTAAGTTCGAATGAGAATGATAGGTAGTAAGAGAGTCGTAAAATCATTGCTTCCATTTAG -CCAGGTGCTGCATCAGGTTCTGTACTTGGGGACCGACGGGTCCCTGACCTTGCTGCTGTT -GTCCACCTAGTCCTTTGGCCAGGAGCGAAAGCAGGGTTGGATCAGTGATGACGGGTGCAT -TTACAGGCTGGAGATTGAAGGGTGGGTGGGGAAGAGGTTGCTGCTGGGTGGTTTGCATAG -GCGGAGGACGATGCGCGTTGGTTAAGAGACTGGCGAGGTCGGCCGGAGGTGGGTTTGGTG -AGGAAAAGGGCGACTGAGTGGCGGACACGGGCTGCGAATGGGGAGGTCGTTGGAGCGCCG -ACAAGAGAGTTGAAAGGCTGGGACCATCCAGTGAACCAATGAGGTTCGCAATGTTGGGTG -GGTTTGAAAGCGCAGGCATACCAGGCGGAGGACCCGACATGGGCTGTGCGGGAGGGACGA -TGAAAGCAGGGCCGGGTGCAAATGCGGCTGGGGCTGCTCCTCGCTGCATCGCTTGAGATT -GGTGGCTTAGGAGCTCCGCAGACATATTAGGCTCGAGCTCAGGGTAATCTGTGGGAGGGA -AAATAAACATGAGCCAGGCAGTGATAGAGAAGCGAAGGGAGAATGCGCTTACCGAGGAAG -CGGACGTTATCCAATCCGGCGGTTCGATCAAAGAGCTGCAGAGGAATCTTCCGAGAGACT -TGATTCGGGCGAGAGAGGCGAACAACCGCAAGAACACCTTCAATAAACTGACGATGCACG -GCAGCGCCCAGTGGGATCCGTGGACCAAGTACCAGGACATCCACTCGCAATCCACGGTTG -CGGAAAGCGTTCTCTACATGTAGGACGAAGTTTCTTCGCAATGTCAGTGGCTTGGTCCAA -ACACAACTCCATGTATTCAACCCACCTATCAAGTTCCTCCAGTACGAGGAGCTGCACCTC -TGGCACATCTCGCTGAGCTCGTCGAGGCACAGGCAAATCCGCTTCACCTTCATAAACTCC -ACGTAATCTTGGGCTGATACTGCGGTAACGTCGATCTCTGGAATAAGGAGAGCGAGGAGA -ACGCGGAGAACGGGAGCGCCTTCGCTCGCGGCGATCAAATCTTTCTGGGGTCCTGTCGCG -CGACCGATATCCATCTCTAGACCCCCGAAAGGGTCGTGGAGAAGGCGATCGCGGCGGGCG -ATAATCGTCGCGTCTGCTGCGGTGAGAAGGTTCATCCCGGAAATCGCTAAATGGAAGCCG -ACTGGATTCATATGATTTAGACCGGTCATATCGATCGACCGGGCCCCGGGCGTTCGGTCG -TCCAGAACTGGCACGGCTGAACTCCGGTGACCGGGAGCGGCGTGGCGGCGGTACACGGGA -AGCTTCGGGTGCTTGAGCAGGCCGAGTCGATCGCTGAGGTTTTGAAATTTCGAGATCTGG -GGACTATTAGATCTTCGAGAGAGATATGCGAGATTAAGGCTTCTGGCACTCACGTATCTT -CCTGCCCCGCACGACGGCTCCCTGTTCAACGCCGAGAGCCGCATGGCAGGAACTCGCCTC -CAGAAACTGTATGAAGCCGTATGCCTGCTTAATCGAGATCTGAGCTAGTTTGCCGTACTT -ATGGAATATGTGGAACATGTCTCGCTTGGTGACCCTCTCAGTAGGGAGATTACCTGCAGT -TGAAAAGACCGTTAGATGGACGAATGGTGGATAGAAGCGACAACGATACTGACCAACAAA -CAGCCTGGAGCCTATCGGGAACCGATCCCAAAGACCTTCCGTCACATAGATCCGCTCATC -GTGCAGGAACTCATCATATTTCTTTTGGACCTCTGGACCCCAGGGCGCATCATCGTCAGC -AGGCTTGATAGGCCGACCATGCTTGTCAACACGGCCTTTCTTGGAGGCTACAGAAAACTC -TATGAAAGAGCCTTGAGATTCGGCACCGGTGGATGGGGTTTGCGGGAGGGTGCCATTAGG -TAGTAGATTGTTAACGGCCGAGTTCCCCCCAGAAGCAATGTTAAACGGCTGAGACTCGTT -CTGGGGGTGAGTATTACTAGGTTGGGCTGAATAGCTAGGCGACGTAGTGGCAGCAGCATC -ATGGGCAGACGGAAGTTGGTGGTATGCCTCGCCATTTGGAGTATAATGCGTCTGAATAGA -ggcttggctttgggcttgagggtgggcatgggTACCCTGGGGTTTGAGGGACTCGCCAAT -GGGTGCTTGGTGGAGAGACGAGTTCTCGGCTGAAGAAGCGGCAGGATTGGTTGCACCAGG -GTTGGGAAGAGAAAGGTTGTCGAGGAGAGTCTGGAAATTGACTCCCGCATTATGTCCCAC -GCGCTCCTCAGCAATTAGGCGGTCCGGCATCTTCGATTGCTCCGAGTTGTGGGCATGATC -TACTTCGCTCCCAGTGGCAAAGCTTTGGGACACCGGAGCAACCGGAGCAGCGTGAGAATA -AGTGAAATCGGGGTGAGAGACATCTGAGCTCGGTTGGCCGGGATTTTCGGACCCCGAAAG -TGGGTTGGGAGGGTAACATGCAGAAGCAATTATTTCATTCTGCATCCCAGGATTCGACTG -CAAGCCTTGAAAACTACCAGCATGTTGGGTACTCGGTTCCCCTTGCCCGTCTGTTAACCC -TCCGTGCGACCATCCATGTGTTTGCGGGACCTGATTATACACCGAGCGTTCATAGGTGGA -GGTATCGTTGAAGACGGGGTCCATTTGGTTCTCTAAGACTGGGATGTTCGATGGCTCGGG -AATATGTAGAGGTCGAGGGCTTTCCGGGGTCAAAGTCTTTCCGCGGAAGTGAAGCGCTTC -ATCGGGGGGACTGGGCGTCATGTAGACTTCGAGAAGGAGGTGGTTTCATCTACGGAGAGA -AAAGTGTCGTGAAGAAGAGGGTCCCCCGGGCCAGGAATTTTCGCAATCGCAAGCTAAGGT -GGGGCGATGAAGAAAAAAGATGGGTCGGATTTCTGCCTTTACCCAAGAGGGAAAGTAGAC -AGAGTGAAAAGGAAGAAAAAATAGGCGATGAAGAAGGAAATGGGGAATAGGTGGGGGGAT -TTGGGGAGTTCCTAGAGGTGAGCAGTGGAAATGGACAGACAAGTCAGGTGAGCAAGATAG -GAGGCGTTGAGAATTCTGGATCTGACTTATTTCTTCAGTATCACGAGGATGTAAGTTGTT -AGTTTTTATTTTAATTTTTGAATGTAGCTAAAGTGTAGAGCAATGGTTGATGTAACCACT -ATTATGGACTAGATGAACCACTATATCACTATACAACATATAAGATACCCATCATTTCAA -GTCTATCACTACAAACAACCTTAGAAATTAAACACTATAATCACTTGAATCACTTGAATC -ACCCTAATATGATCATCCCAGTCAAATAAAAGCTTCTGTTACGTACAGCGGGGCCAGACG -GTCTTTTTTTATCTTTGGATTTTAATTAGCCACTCCACCTCTTAACTTCCTCATGTATTC -CCACCGTATACGGTACTTGAGGGCTCTCTTCAGAGCACCCTTGAGAGGATTTGCCTCTCA -GGGTGACCAAGATATACAAATAGCCAGAGATTGGCTCAAGACCTTTAATTCCAAAACTAT -CCCTCGGAACATTTGCGAGGTCTCTTTCAGTCGGTCTGGCGGTCCAGGTGGGCAAAACGT -GAACAAGTCAGTGTCCCCACTTGAAGATCGATGGTACGGAGTAATGAATGTCACAATGCT -AAGAAAGGAAATAGGGTCAACTCCAAGGCCACCTTGAAAGTCCCATTGGATGCGTTGCTA -CCCTTGGTGCCCCGTCTGATACATCATCCACTACGCGCTTCGCGATATGTCGCCGAAAGA -ACCCAATGTCTAGTCATCCAGTCAGACGAGGAGCGGAAACAGTCCATCAATGTGGAATCC -TGCTTTGACAAGCTCAATCAGCTGTTGCAGACCTCGGCCAAGGAGGTGATTCCTGGAGAA -ACATCTCAAGCGCAAAGAAATCGCGTGCAAAAGTTGTATGTTTACTCTTTTCATGGAAGC -GTTCGAGCAAATCTAATAGGGAAAAAACACACAGACAAAGGGCTCAGAATGAGGGTAGGA -TTAAAGACAAGAAACAGCACAGCAACAAGAAAAGTAGTCGTCGGGGAAGCAAATACGACG -ATTGATAATAATCATGCGGACCTGATGAAAGCATAGGGACAAGTTCGCCAGTGGACCCTG -AATTTTGATAATATCCGATCAACGCCTATGTGTGCTAAGTACAAATACTTGGGTATGCTA -TGTATACTACTCCTGTGCTGCGCGCTCCGCATTGCGGACAGTAGCCGCAATAACCTTTTG -CAACGAAGGATCCAGGCCTTCTGTTGTCTGAACAGGCTGAAGCCAGTTGATCACTTGGCG -TGCGAGATCCTTACCCATGGGGTACTCCCTGAGACGGGCAAGCGAGCTACCAAGTGCCCG -TGGGAATTCGGCCGAAGTGGGGGGGATTCCAGCGGTCGGGAACTTGATGTAGCAGATCTG -CGTCCGTGGGCTGGAAGGCTTGTTGGCGCGCATCGTCTCAATAACATGTTCCAAAATCTT -GCGCTGACAAGGCGCATCCTGGAATTCCCAGACCATATCAAAGAACTCCTGACCAAATCG -ATCATCGTGGTCAAAGTGCTTTGTGGCGGTATCAAAGATGGCGAGCGGGATAGCACCGCT -CAGAGCAGGCGTGGAGTTCATGTTTTGTAGCGCAACCTGGTCTACTTCTCCGTCCGCTCC -TGTGCTGGGGTTGATGTCCTCAGCTGTGAGTGATGGGAGGGCAATCATGTCGGCATCGGT -GTCGTCGACTTCTTCTTGAATGGGCTTAGGTTTCTGGCTCTCCTCATCAAGACCGAGAAT -CCGTTGGCGAGCGACCAGCTTTGCGATGTAAATCAATTCCAGCTTTGCATATTGAATCCA -GATTGTTCTCGCGCTCTTGCAGAACCGTAGACCACGCTGCATGTAACTCCGGGCCTGTGT -CATATCTCCGTGATCATCGAGAACGTGCTTAGCGGCATAAACCCATAAGTCGGCGCTGGT -AGGGTGTAGTCGTAGAGCATCGGTGAAAATCATCTGGAGCTTCTTGTGTGCCTTTTGTTT -GCGAGCATACTCGATGTACTGGACCCAGAGGTTGATGTCGCCGTGGAACTTGCGTGTAGC -TCGGTCGAGAACGAAGAAAATACGTCGCTGGCCATTAAATCCTGCTGACCGAATCCCAAG -TCGCTTCACCCGCTTCTTTCGCAACACATCGAGGTTCATTTCATATTCAGCGTAGCGCAC -AAAATCCACCGGCTGTGCACCACGTGCGTTGAGCTTGTGCTCAAAATCAGATCGTCTTTT -GACGATTGAGGAAATTTCATCCTATTTATGCTGATCAGCTGTCATTCTGATGGAAGAACT -TCGCTTTGCGACATACCTTGGTGAAAATCTTCTTGCGCTCATATTCCTTGAGCTCAGGCA -CTGACTTCTCGAGGAAGAATCGAGCCTTATCTGTTGCGGCGGACATGATTGCTGTTCACT -GAAGAGTGTTAAAAAAAAAAGTCGGCATCCCGCATTGAAAAAACACCGATTAGCCCCAGA -GGGCGCTTAGCCGCATTAGGTTAGCGTACAGGGCGCTCAGAACCACCGAGGGATCCGGAA -GTTGTTTTCATTTCCCTCAGGGGCCATTTGTTCGTGACATCTACTTGTTTAAGCCATTCT -CTCTTTCGTTTATTTTCTTAACGTTCTTTTCTTTTTCAATTGTTTTTTTTACCTTATAGG -TGCAGAGATTGCCTTCCCTCTGTGCCACCATGGCTCCCCGATATAAGGACGGAGATGCCG -TGGTCGCCGTAAGTTCTAAAGCGCCATTCAATGCGGATCGAACGCCACTGACCGGATACT -CTTTGCAGATCAACGGCAAATGGGTCGCCTGGACTCACACAGTCTTCGCATATGCTGCGT -TTATCAGCGCGTTGATTGTTGGCGTTGCTTTGCACTACCACAAGATCGTTCAAAACGAAT -ACTACGGCTACCCCATTGAATGGTTCCCGTCTGTGTCTGCGACCATCGGTGACCGATATC -CCGAACGGTCTTTTTTCATGGTCTTCATTGCTATTACTTCCGGCCCTCGGTTTGCCCTCG -TCTTTCTGTGGTACATCCTCACTTCGCGTCCGAACTCGACCTTACCGAAGTTGGTTGCCG -GCGTGGGCATCTTCCGGACATTAACATGCGGTGGTTGGACCTATGTTACATCTACTGATG -ACCACGACTGGCATGACATCTTCATGATCTCGTATTTGGTGGCCACCTTGCCGTGGACTC -TGGGTTGCCTTGCGCTCAGCCCGAACAACCGCCGTGCCGTCAAGTACCGCAAGATCATGG -CCAGCTTGTTCTTTGGAACTTTGCTTCCCTTAATCTACTATTTCATTCAGCACAAGGTCC -ACAAAGTTCCTGGTGGTGAGTACTTGTCCTTGCGAGGATCCATTGAAAGGTTACCACATG -CTAATCAGTCAATTACTTAGCCTACACTAAGTATGCATTCTTTGAATGGTCCCTAATTTT -GTTCGATGTCGGTTTCGATGCTATCACAGCTCTTGACTTTGAGAGCTTCGAATTGGTGGT -GAGAGATGTTAAGGGCGTTAGCAGAGGGTATGTACCGTGGCTTCCGAAATGACACTGTCC -ACATCAAATGTATAGTGCCAGAGGCTATATTTGTGTTTAACAAAGAAAACATTGTCTAAG -TTTGTGCTCATTAGGCAATTGAAAACGACTGCGGATGCTGTCTTGGAAAAAGAGTGTGTA -CAATTCCATAACGAACTACAACCATACCGAAAATTAACCACGTGCGTAGGAAGGGCAAGC -CAGTGGGCAACACCTTTGGCGAGGGCTTCTTCTGGGCTGAGATCATTGATGCTGCCTCGG -ATGTCTACAATGGGGTAAATTGTACAAGTCTACCATTGGCCCCCTGCGTTACTGACAATG -GGCTCTAGTTTGTTTTCTGGACCATCGTGACTGCTCTGCCTGTCCTTGTCTGGTGTAAGT -AGAATCTATTCCCCGATACGAATGGTCATTCCTGATTTTGATAGATTTCCCTCTTTGGCA -TATGGGTATCTCTGGCTACGAGGTTGCCATTGTTTGCTGCACATCCCCGTTGTTTTACGT -CATCCCATCCCTGCGATATGCAGCTGCAAAGAACCTTCGCCTTCTCCACCTGGTCTCACT -TGTTGGTCTTCTTGGATACAAGTTCCAGGATCCTGCAAACCGCCTCTTTGTAACTGGGTT -CGCTCTTTCTACTACTTGTGCAGCATGGACCGCTAGCTTCTTTTCCGAGAGGGCCAACAC -TCCCCGTCTCGAAGCCCGTATCATGGCTTGGGGAATCGGTCTGATCATGTCCGTCGTTGC -CAAGTTCGCATGCAAGACAAACAATCCCCTCTGGCCGATTATGCACGCCGAGAATGGTGG -CTGGAACAAAATCGGAATTTTCTTTGCTCTGTTCGCCGTTTTGAGGTCCCAGAGAGGCTC -AAACACGAGCGGGGGTGATTACTTACCAGCAGGTGGTAAGAAGGGCTCGTCCATTCTTGC -TGGTGTCGGATTTGGCGGTCTGATGTTCGCCATCGTCTCGCTTTTGACTGACTCGAGTAC -AATGATTTCGTGGGTCTGGGACGGATACCCAGTGCGCGGACCAATCGCAGCTCCTCACGG -CGCTCTGACAATCTTTGCAATGGGAGCCGGCCTTGTGTACGGCATCTTCCACCCCGGAGT -CGCTGGAAGCTGGACGGCCTTCGGTATCGGTTCCATTGGTGCTGCTCTTCTTACTTGCTA -TCACCACTGGACTGGATTCTATGGTGCCCTCGTTCTTGCGTTCTATATCATGGCCGTTGC -CCCTGTTCTGATTGCATCCAGCGTTCGTCATTCTCCTGCGAGCACATTCGGCATCGGATT -CATCGTTTATGTGGCACTCTTGCTCTTCCACGTTTGGGTTGTCGCCTACGCCTTCGTGCC -AGGTGGTCCATTCCTTCGTGAGCACACTGACTGGCTCATGACAACTATGATGCTGTCTAT -CGGTGCGGGTGTTTTCTCTGCGGCGACCTCGAACTCGACTCGCTCCCGCAAGCCTCACAA -GCCCGTCAACCCCAACAGCAGGAAGCAACGATCTTACTATATCTACGTTCTTGCTGTGTT -GCAATTACTTTCTGTATCCATTGCTTACCTTCGCTTCCCGACTAATGACTACACCCCTCA -TCACAAGGAGGACAAGGTTGTCACTGCTGGTATCTGGACTGTCCACTTCGGTCTTGACAA -CAATATGTGGTCATCTGAGCGCCGCATGCGCGACGTCATCGGCGAGCTTGAGCTGGATGT -CATTGGTTTCCTCGAATCCGACAACCAGCGTATCATCATGGGCAACAGAGACGTCACCCA -GTTCATTGCCGAGGACCTTGGATACTACACTGACTTCGGCCCCGGTCCCAACAAGCACAC -TTGGGGCTCCGCCTTGCTGTCTAAGTTCCCCATCGTCAACTCTACCCACCACCTTCTCCC -CTCACCTGTGGGTGAGCTGGCACCTGCAATCCACGCTACGCTCGATATGTACGGTGAGAT -GGTTGATGTTGTTGTGTTCCACTCCGGACAGGAAGAGGACCCTGAGGACCGCCGCTTGCA -GAGCGAGTATCTGTCAAAGCTGATGGGCGACTCCCCGCGTCCTCTGATCCTCTTGAGTTA -CCTCGTCACCAAGCCGCTCGAGGGTAACTACAACACCTACGTGAGCGAGCGTAGTCAAAT -GAAGGATATTGACCCTACCGACTGGGACAGATGGTGTGAGTACCTTCTCTTCAAGAAACT -TCACCGCACTGGCTACGCCAGAATCAGTCGTGATACCATCACGGATACGGAGATCCAGGT -AAGTGATGTTGTCTCTTCATCTCTTCTCCTGCGTGGGCTTAATTTCTGACTGTTTTGTGA -TCTAGGTTGGAAAGTTCGTCATTGGCGAGCCCGAGCCTGAAAATGAGATGCGTATCCCAG -AGGAGATGGTCCCCATTGGCCGCCGCTTCCCTGCCTTGTTCCGCGGCCAGGGAGTGCGCG -GACACCGCTACCACGTCTTCGACGAGCCCAGGTACTGGCAGTAGACAGTCTGTCTGGGTG -CGGAGCGTGAACACGCTTCGTTGCGCAAAAGTTCGCCCAAAGAAGATTGACTTTTATGAG -TCTGCCATATCGCGACATGATTACGGACCGAATAAGGCAGACATTTCACTATTCTTAGGG -CTGTCAATATACCTAGCCTACTTGAGTGACGAGACTATTTTTTGTTGATCGCTTTTTATT -CTACTGTGTTTACTTTTGACTCGTGCTTCTCGACCTCCTACATATTCGTACATCGCATGG -CCCAAGGGACTGGGAATTAATGTATTGTATCCCACATGGTGTATAGGATAGGAGATCCTC -TAATTCTGTGGTTCTATCTCTCAAAGGATAGTCACATGTATATTCTCACCAATCTCCTTT -CACTAGTGCTAGAGCATTTTCTTCCTTGCTGCTTCAATGATTTTGTCAACTTGGAGATCA -TGAGAGAACAAATGAGCAAGCTACTAACAAAACTTTTTGCAGTCTCGACTGGGATGTGGG -AACCCCAGCAGCAACCAGATCGACAGGTGCATAATACAGCATCTCCGACACCACCAGGAC -ATCGGAATGTTCTTCTTTGTCTTGAGATGCTGGAAATTTACAATCCATCGCAAACTCGCA -GGCTTGGAAGCTGACGCTTTCGTGACTGATATTCTTTGTCCTGTGCTGACGTGCGTTCGC -TAATGAAAGCCCTAACCACGATCAAAGATGACTCGAATACAACCTAGTCAATCAGATCAT -TCCAATCAAGGAAGTTACCTCGCTTCCTTTTCTCTGTCAAATTAACCTTGCCCTTCAGCA -TTTAGTATGGAGACATTGAGTATTCAGCATAGGGCAATGAAACCAGAGTAGCGTGGCTGC -CCAGGTCTGAGTGCATTTTAGCCCTACTTAGCACTCGATCTAAACAAACGGACTTTCCAG -GACTGTACGAATTCATTGATCCCTAAAGCCCTTTAAGTGGCATTGGTCAAATTCTAGTGC -CAGATCTAGACAAGGGGTGAAATACACCTCGCTTTGTTTCGAAGGCACTTTCGTGATAGG -GAATGAGGGACCATTGCCAAATACAGACACGAACTAGTTATCGGTCACATTTGGTCAGCT -CTTTTGCCTAAAAATGACTGATATCCCTACTTGGAAGCAAGCCCTAACTCGCCCCTGCAG -TGCCATTTCTCATGAAAATTTATCTAATTTGCTACATGGCTACAGGGCTAATTTATTGGA -TTTCAAACACCAATGTTTGAATGTCTCGCCCAAATTAAGCAAGATAGGAGGAATTACTCA -AGATGATCAGGAAAAGTGGCTCAAATCCCGTCATCTGAACTCGCAAATTTTTTAAGAGAT -CTTGTTTCTGGGTCTGATAATCCGGGAACTACTTCCATTCTCTTTGATATATTCATTGCC -TAATTGAACATCGATCTTTGGCTTTGCTACCTGTGGTTTCTGGTCTTGTTTATTGTTTTA -CATGGGCCATTCGAAGTCTGATGATATCTTCTCCTTTTCTTTGAGCTGCCCGGTCTCGAT -AGCTTGGATTCCTTAGTCTGTCCTGTGGGCTTCAAGCTGGTATTGCTCTGCTGCAGTCCC -TAGAAACTTTTGATTTGAAATGATCGAATACCGACGGAGGGTCGACAAACCGTATACACC -TGTTACGATACAGCTGCTAGTAGCTCTGGGTGAATTGAACCAAAGTCACCTCCAATTGCA -CGAATCTGGACATTGTATCGAGATTGACCTGAAACGCTCTTTGTCAAGGCTCTTGGGCGT -ATCTGCAATAAGCTAGGTGCGGTCAACAAAGAAGAGGCCTCGGAAAGATATTTTTAGTTT -TCTTTGCGTTGGCAAGTATCTGCATGTTTTTTGGTCGCTCTTTGCTTGTGCTTTGTCTGC -TGACGAGAATAGGTGACCCCGTCTCTGTCTTCTATTAATATAGACCCAATATGTCCAAAT -CTATCCTCGACCACGAACACAACCCCGACTTTCTTCACTTGAATCTTCTATCACCAGATC -AATCATTATCACTATAACAACAGCAACTGATAATAAGTATGGTCCTCGTGTTATCGTTTC -ACATATTCTCTAATTAATGAATCAACACAGACAGGAGGACCATCGATCAACTTGAAAGGA -TGGTTAGAACACGAGGAATGGCTAAGGGTACCCGGCCAAAGGTCCAGCCTCTACCGGCGA -AGCCTACCCAGGCCGAAAAAGCCGCCAAAGCTGCCAAACTAAAGGCTGAGAGGGAGGCAA -AAAGGGCCGAGAAGGCTGCAGCGAAGGCTGTAAATGATGCGAAGAAAACTGCAGTAAAAA -CAGAAAAGGACGCAATTAAGGCCGAAGCGAAGGCCAAGAAAGATGCAGAGAAGGCTGCAG -CAAAGGCTATAGCAAAGGCCACGAAAGTTTCCAACACCAAACACAGCACAAAGAGCACAA -AGAGCACAAGTAGGATATCTGCCGCAAAGGCTAAAGCTACCAAAATCACAAAGGCCACAA -AGGTCGCCAAGGTTGCGAAACCCACCAAGAAGGCGAAAGCTGCAAAAAGAGTTAGCGCAG -ATTCTGTGAAGCCTCCCACAAATCAATACgaagaagagacagatgatgatgaagacgaag -atgaagagaaggaagatcacgatgaACCACAGGCCAAAAGGCGCAAGATTGCGACTCAGG -CTCTTCGCAGACCTTATGTCGAAGTCAGCGCGCCTAAAACAAAGCGTCCGAGCTTCCAGA -TTGGGGAATGGAACTATATGAGGAGAACATATATTCCAGAAGGTCTGGAGAACGAAAATA -AAGAACCTCTACCAGACATCTATATCGGAGGCACTGCACCACAGCCTCGTTGGGTGATCA -CTCAAAACGAAGACATACAACTTCCTATCAAGCGACAACCTGCCTCCTCAAATATGGAGA -GGAGGGAATTTCAGGGTTTACAGTGGACCTTTGAAGAAGGCGATTTCCAGAACTGGGCCG -GGCTGGAATACCCAGCTCTTTACGAACTTGCGTTCTTGTGCCTAGAATATACCCTCATTG -ATTCAAAGATCCGAGAGCAGATCTATGATTCCCTATCTCAGGGCCCCTTGCCCAAGGAGG -CAATCGAAGGGAACATATGGCACCCAGCTCTGCCGCCGATTCCCATCACAAGGAATTTCG -TTCCAGGGTCCTCCGCGCAAACTAGTCGAAAGTCAAGTGATGCGACCTTTTATTTTGAAA -GCTATGCGCGCCCAAACATGGCAGTTCAACGTCGTGATGAGCGCCTCGCGCAGCTGTACC -CGCAGGCCGTGGCTTCCAGTCTATATATTCGACCGACATCCGCGAATTGGCTGACTGCTC -CGCGCTCTGCAAGTCCGGATCCAGAAAGCGAGGATGGTATCGAAGAGAATCAGTCAGTCA -AGTCCAGGAAGTCTGTGAGAGGGAAGCTTTACCGGGAACGAAATTCAGTGGACACTTTCT -CCGAACCGACAGCATCTATATCTCTTCAAAATGAGCAGAGCATTAAGCGAGACGTTGAAC -AGGCCAGCGAGCAAGATGGCGAGCAGGACAGCGAGTATGACAGCGACGATAGTCAGTCAG -CCAGGTCCAGGACGTCGTCTGTGTCCAGGAAGCTTGACCTGGAACTAATCCAAGTGGGCA -CTTATTCGGAACCGACAACCCCTGTGTCTCTTGGAAATCATGAGAAAAATGCACGATTTG -GTGAGTTTGAACATGCCTAGTGTAGTTGGTTTTCTCATTCTCTGCATCTTTCTTTGCATT -ATCCTGTATGCATTCAGGGTGGTCAAATAATTGAACCACGGGTGTGCCAATGGATCGATG -TATGCGAGTGTGTCATCTTTACTGGAACACAATGAGCTACATAGAAGCAGCCCGATTTGA -AGTGTCTAGCTTTAGTCTTTGGACTGGTTGTCATGTTTTGAGGAACAAGAGGGCTGATCA -CTTTCATATTATGCATTTCTCTTTACCGGTTGCACACTGAAGATATATGCGCGCAATATG -CTAACCGATCGGCTCCAGACACAGCCTCGGAAGATGGTTCAATATTTGGTGACGGTGATG -ATTTCGATGATAACGCTCAAGCAGAGCCTAGCTCAGCTCGAGACTCTCGACAACAATCTC -CTAACAGCCTTTTCTCTGGCTCCGGGTCCGACAGCGAGGTCAGCGCTTCTCCCAATAGCA -ATGAGCAAAGCAATGTCAACGTTGCTGACGATCATAGCGCCCAATCGACGAATAGCCTGT -CTCCAGTATGCTGTGATGGGCTCTTCAGCCCTGGACAGATTGATAGGTATTATCGCGAGA -GAAACAGAGTTTGTAAGTAAATTTCCCAATTGAGATATCCTCAGCCGTTTGATTATTTGT -CCCCTTAATCAATTATGCTAATGTTTGATTCAAGTTGAGCAGGAAAGGCTAGGCGGACTT -GAACATCACGAAGCGTCAGGACAAAAGAGCCAAGCACAATGCTCAAGCTCTGGGACGGAT -ATATGGGACCCAACCAAAGCCGTAATGGGAAAAAGAAGTTTTGAAGAAGAGCTCGCAAAT -TCAAGTAAAAGATGTCGAATTGAGTCGCCATCAACACCATCACTACCTGCCACTCGAAGG -AAGACACCGGCTGATATCAGGGCGGACAATGTTATGACATTCGCAGAGATGCTACAAAAG -CAACGGGAGGAAATGCGAGCCACTGCAGCAGAGAAAATATCAGACAAACCGCCGGTGGCA -CTTTTGAAGAAGGCTACACAAAATCAGGCGGGTGCAATATTGCAGGCGGACGCACCTAGT -CCTGAGATTCTGCCTAATCTGCAAGACAAGTTCCTTGAACAGACGAAGGCATATCTTAAT -GATCCAATCCAGAGGGCAAAACATTTCCCTGACCCTCTTGCTGTCCGCCGAGCTAGGGTG -TGGGCAGGGGAGGAAGTAGTGGGCCATGAGATCAAACCCACAGTCGACGCTACACTTCCT -CCTTGGGCTGGAAGGCATCGGCCTTTCAGCCAATGGCAGGATCCGGTAAGTGCCTCGTTC -ATTGGGCTTTTTGAGAACCACGCTGATCATAATCATTTAGATTGGGGTAGCCCCTGGGCC -TGCGGCGAGCACAAAGGTCTCGAGATCGAAAAAACGGAGCAAGGGACAAAGTAAGAGAAG -ACCAAGACCAGCATCTGCAAAGGAGCAGTCAGTCAGTTCAGCCCGGTGGTCACCGTTTAG -ATAAATGGGTTGTGTGTATCCCCGTGGTTTCGTAGTAAGGCTTAGATTGTTCATATGTTT -GATTGTTCGTTCTGGAGTTATACAGATTAGGAGCCTTGGATGCCCATGCCTTTTCATTTG -AGTCAGCTTCATTTGACCTTTAGCACATTTATATATCTTAGTTTGACCGTCATCATTGTT -TGAATTAGTATTCATTATATTCTCACAGCATTAGATAAAAACAAACCATGTATATTGTAC -ATCGTGGTTGTATGCCTATGAGATTGGAGCAGAGACCTCAGGACCCCAGCAGGATAATTC -CAAGTTTGTTAGTCTTTCATGTTGTCCCGTTGTGGAATATGCCATTATCGCGTGGCAACA -TCCCATTGCACGATATTCAAGTAAATCTCATGGTTATAAAGAACCTTTATCCACAATTAG -ATAAATTGTATCAAAAAAATCTCTTACAGGTGTAAGTGAAGGACTTGATCTATACAACAG -GTTCTACGGCATTGATAGGCAATGCTAGGTGTCACGTGCAAACCGAACAAACAGCTCACC -CGCCGTCTGGTATCTTTCGTCTTTGTTCGCGAGAGTCGGATCAATTTGACACCCTGGGAT -AATGGATCAATCTTGGAATTGAGATATTATATCGAGGCATACAGCTGCATTGATCGTGAC -CGATCTATCGTCGCTGGTCCAGAACAATCGCGGCTGCTAGGTTTGCCCTAACGGCAGACT -CACGATGTTCACTTTCACTCCCCTCCTTGGCGCTCAGTCGTCATCTTCAAGGGCGTCGCA -GTCTATCCTGGAGCTTGATGGAGGCATCAAGATCCTGGTGGACGTTGGCTGGGATGATAA -GTTTAATACCCTTGATTTAGCGGAACTAGAGAAGTGAGCATGCGATCAACGCGTTCCCTT -TAGATTCAGATGAGCTAACATCATTTTCAGGCACATTCCTACACTTTCTCTCATTCTCTT -GACACACGCGACGCCGGCACATATCGGCGCTCTAGTCCACTGCTGCCGAACATTCCCCCT -CTTCACGCAAATCCCAATCTATGCAACGAATCCCGTCATCGCCTTCGGCCGTACGCTCCT -ACAAGACCTTTATGCCTCCGCTCCGCTTGCTGCCACATTCCTTCCGAAGGCTTCTGTTTC -AGAGCCCGGCGCATCGTCAGCAGGATCTACGACTGTGTCAGGCGGAGATACCGAAGCCGC -TGACAACACAAACCGAATACTTCTTCAATCGCCGACAGCCGAGGAAATTTCTCGATACTT -CTCTCTCATTCAGCCACTCAAATATTCCCAACCACACCAACCGCTCCCATCGCCTTTCTC -TCCACCGCTGAACGGACTTACTCTCACGGCCTACAATGCCGGTCACACCGTCGGTGGAAC -GATATGGCATATTCAGCATGGGTTGGAATCGATTGTCTATGCAGTGGATTGGAACCAAGC -TCGTGAGAGCGTGGTCGCAGGCGCTGCGTGGTTCGGTGGATCTGGTGCCAGTGGAGCAGA -AGTCATCGAACAGTTGCGCAAACCCACGGCCTTGATCTGCAGTACCAAGGGAGGCGATAA -GCTAGCACCTTCGGGGGGTCGGAAAAAGCGAGATGATCTTCTGCTCGACATGATCAGAAG -TAGCCTTGCGAAAGGTGGAACAGTCCTGATCCCAACAGACACAAGTGCGCGAGTTCTTGA -ACTGGCCTACGCACTAGAACATTCTTGGCGCGATGCCGCGAATGGCGACAAGGAAGATGT -TCTCCAAGGAGCTGGGTTGTATTTGGCCGGCAAGAAGGTTACCAACACAATCCGACTCGC -CCGAAGCATGCTGGAGTGGATGGACGAAAATATTGTTCGCGAATTTGAAGCGGCGGAAAG -TACAGATGTCACAAACGGACAAAGGACAGGTGGGCAGGACAAGAGCTCTAGCAAGGGTGG -GGGACCATTTACTTTCAAACACCTCAAGATCATTGAGCGCAAAAAGCGACTTGAGAAGCT -CTTGGCAGAACCCGGACCTAAAGTAATTCTTGCATCCGATACCTCATTGGACTGGGGCTT -CTCAAAGGATGCACTTCGACAAGTAGCAGAAGGACCGAACAATCTATTGTTACTGACGGA -GTCCTTCCGCATGGACATGCAGACTCAAGAATCTAATTCATCACAAAGTTCGCAAACAAT -TGGAAGCATGATCTGGCAATGGTACGAGGAACGAAGAGATGGTGTGGCTCTAGAGAAAGG -GTCTGATGGCGAACACATCGAGCAGGTCCACAGTGGTGGGCGTGAGTTGTCTTGGACAGA -TGTGCAGCGAGCTCCTCTCGATGCTGGGGAGCAGTTAATTTACCAGCAATATCTCGCCAC -CAAGCGCCAACTCCAGGACACATCTCAAACACTTGGCCAGGAAACCCTGGAAACCGCCGT -GGATGCCCTCGATGATGGATCGAGCTCTACATCCTCTGAGGACTCAGATCCTGAACACCA -GGGTCGCGCGCTGAATTTCTCGGCCTCACTTGCACACGCTACCCGAAGCAAACTTGCCGT -CAGCGACGAAGATCTAGGTATCAACATCCTTCTTCGTCGGAAGAACGTTTATGACTACGA -CGTGCGCGGCAAGAAGGGCCGCGAGCGCATGTTCCCTTATGTTGCACCGAGAAAGAAGGG -AGACGAATATGGAGAGTTCATTCGCCCAGAGGAGTATCTTCGGGCCGAAGAACGCGAAGA -GATTGATATGCAACAGCGGCGTACCGATGCCGAGACTAAACTGGGCCAAAAACGACGATG -GGATGATGCAGCTGGTCCCCATGGACGCAAGTTGTCTGGCAGCGCAGCAGGACGGAAGCG -TCCTCACATTGATGGCAAGAAGATTGACGATGACGATCTCAGCCTTGCCTCCGATGGTGA -AGACGCTGATGTAGCTGCGGAGTCCGAAGATGAAGTAGAAGGCCAGTCGTTCGAAGGTCC -TGCGAAGGTCATTTACCATAGCCAAACCATCACCATCAATGCTAGGATTGCGTTTATCGA -CTTCATGGGTTTGCATGATAAGCGCAGCTTGGAGATGTTAATTCCACTGATCCAACCTCA -AAAGTTGATCCTTGTTGGTGGCATGAAGGAAGAGACGTCAGCACTGGCAGCCGAATGCCA -GAATCTGCTGACAGTCAAGCTCGGCGCTACCAGTTCTGACCCCACGTCCGACTCGGCTGC -CATTATCTTTACTCCGTCGAACGGCGAAGTCATCGATGCAAGTGTCGATACAAATGCCTG -GATGGTCAAGCTGAGCAACACGCTCGTCCGACGCTTAAATTGGCAGCATGTCCGCAGCCT -CGGCGTGGTTGCTCTCACAGCACAATTGCGAGGACCGGAGCCTATCGCTGCCGAGACCGG -GGACGCTGAAATTGCAGGCAAGAAAATGAAGCAAATGAAAGACGAAGCAGCCTCTTCTGC -TGTCACCCCGACATTGGAGCAAACAGACACAAAAGCAATTGACAAGGTGGAGACCTATCC -AGTACTCGACACTCTCCCAGCTAGCATGGCCGCAGGAACCAGGTCTATGGCTCGCCCCCT -TCATGTTGGTGACTTGCGATTGGCTGATCTCCGCAAGCTGATGCAATCCGCTGGTCACAC -TGCCGAGTTCCGGGGCGAAGGAACGCTTCTCATCGACAAGTCCGTAGCTGTGCGCAAATC -CGGAACGGGTAAGATCGAAATCGAAGCAACGGCGCAGTCTTCCGTAGGTCGCCCTACTGG -TCGTGGGATCGGGAGCTTCCTTGCCGTCAAAAGAAAAATCTACGAGGGCCTGGCTGTTGT -TGCAGGCAGCTAAGGTTATGCACGTGATATGTATTTCTATCTTTTCCCGGTGTATAACTT -CATGATCAGCAAAGTATTCATAGCCACTTCTTTCTTACAAGCACCAAGAGATCTATAATC -AAACGACCTTTATTCTTCTAACATCTTGCTCAAGTCAATACCGCAGGTAGTATTCTATTC -AAAGACCAAACAACTGTATCATCAACGCACAATTTTAACCCGAAACCAACATGACCCATG -GGTCTTTATAAATGCTCGAATCTCCAACAAACCGAGCATAATCTATATCTAATTCTTAAG -CTGAAAAATTGTAACAAAAGTCGTTGTCGTGATGCAGGAGAAGAATCTGCCATCTAAACA -GTGATATCGCCTGACTCTTCGGAATCATTGCCGTCGTCCTCGGTGGGAATTTCTCGTATT -ATATCATTGGTTTGAGGGCCAGTGTACTTTCTGCAGAAGGAAATGTCAGTATCCACTTGA -ATCATGATAGACGAACTGTTGATTTTTGGGTCTCTTACCCTCGAGCTGCAACCCACCACC -ATATCATAGCAAAAGCTGCAATAAAGCCGCCAACACAAACGGCGTAGTTCCTGTGTAGAA -CATGTTAGCATGGAATTTTCGCAACTGGTCATAAACTAGGTTCAGGAGGCTTACATATTT -GCAGGTGTCACAGGAAGCTGTGGGGGGAAGAAAAGAATTGTACTGATGAACAAGACCCAG -AAAATGGCAATGTAGTTGATAGAAGTACCCCATTTGCCGAGCGTGAAGGGACCTTCAATA -AACTTGACTTTCGATTTGTAGAGCTGATGGGCGAGAATGACAGACACGTAAGACAGATCT -AGTGCCGGGGCAGTGATATTGAAGATTGCAGTGGCTGTTTGCGTGGAGCCAATGGCGATG -CAGTTCAAACCTATGCTGAAGATGACCACGAACCACACTGCATTTACAGGGGTATGGGTA -CGAGAGTTGACCTTGGATAGGAACCTATAATGTATCAGTGTGGAACTGGATCAAAAGCAA -AGTTTAGACAGCATCCTACTTTGAAAATGGAAGCGCATCGTCTCGGGCAAAGGCAAAGGC -CATTCTCGTATCTGCCAGCATGGCTGAGCAGCCCGTGAAGAATTGGACCAAAATGACAAA -TGCCCACATGATTGTGCCACCTCGCTTACCACCAGCATTGAGGAAGATCTGGGCTGCAGG -CATACCGGTAGGAGAGGTTAAAATTCCCTCAAAATCTGTTAGACAGAAGCACATTGAGAC -GGTCAGGACCCATCCGAGCGCTCCGGATACTAGGACGGCTGTTTGGATAGCAACTGGTCC -TCGTACGGCGGCATCATGGGTTTCTTCAGACATACTACAATAAAGGTAAACGTTAGAATG -TAAATTAGTGGAATATCGGGGTCGCGTACTGTGTCGTTCCATCGTAGTCGGTCATTGTCC -AGGCAACAGATAAGAACCCCAGGAAAAAGGAAAAGACCCTTGAGCCCCATCCTGAGCCAT -CAGTGAAATTGCCAAACACCCAGGACGCTGGCTGTAGATTGGGGGTAAGGGTTATCAACG -CAATACATATACATATAGTCGCCCCAACTGGAGCAAAGCGAGCTCGTTAATTCAAGTTCA -ATAACTGAGTTCCTTGATACATACTGTTTATGGGAGCAAACCAAAGGATGATTCGATGCA -ACGATTTCGTGGTCAGTGAACAAATCACCCCAAGACTGCATAGAATTGCGATTGATACCA -GAACGGTCTCCAGTGCTGTTCTGGAATGATATCAGATAGAAACACAGCAACTAGAGCTGG -CTAACTTACGGTGCATACAAGTACTTGCCATCGACAAGATCCGAATTCATACTTACACAG -GCCAGCAACATCTGACTGACCGTGTATGCCACACTGGAGACCCCTGCTGTCTGTCCGAGG -AGGTTACACCAGCCCTGGATCCAGGAGAAGATAGGGACCTGGTCGGGAGGCACGACGTGT -TTTGTGACAAAGTACATCCCTCCAGCCGTGGGGTATGCTGATACCAACTCTGCCACAGAG -CTTCCGATGCACATAGCCATGCATGAACCGATCAACCAGCACCATACAGCTGTTGCTGGA -CCGCCAGCCGCTAACGGTGCACCAAATGTTGCCGGCACGGAGCCGAGGACTCCCAGAATA -GAGATGGCATAAGAGACGGTAGACCATTTGGAAAACTCTCGTCGTAATTCCTAGTCCGAG -TAGATTGTTATCAGATTGTGCCCCGCTAGGATTGCCACAAGGGCTTGGGCGGAGAATACC -TGCTTATAGCCGATGCGAGCCAGCAACAGCTCATCATCATCGTGAATAGTCGCCATTGAA -GGTTCAAGAGGAGCTGGTTTATCGCCACTTTCTTTCCTGGGCTTGCGATCCGGGTCTGGT -TTTACGGTTAGGCCCATGATAAGGAGTATGTGACCAATGTATATGCAAGGAGCTTTAGGG -AAGCAAAGATAAGGGGGAAAAAGAAAAGAGAATTCTCCGGACTTACAGGAGTCCTGTATT -ATATCTACGATGTACGGAGTATAGTCGGCTCAGGAGACCGAGCTAGAGGGGAGCCATCAA -TGCCGGGAAAGAGACGGTGAGATGAACACGCCAGAAAAAACAACACAAACCCTCTAAGAA -GCTCGATCCGAAGCTAGGATTGGACATCTGACGTGATTGGAGGGAATATGATAAGCGTGG -AACTGGTCAAAATTGGCCAAGCCATGCAAGACCAATTGCTGTGCATGAGGTGATGACACG -ATAAACAGAAGGGTTCCATCAAAGACACTCCGAATGGGATGGTCTCCGAATCGCTAGTCT -GTCTCTAACACGGAGTTACGTAGAGTGCATAATAGCGGGTGACACTATATTTTATTAAGG -ACAGATTGACAACTTCAAAGTCATTTGAGTATGCTCTAATAGTAATATACAAAGAAGTAC -AAGTAAAGGGAATATAGCGAATCCAGCGAGTACAAGACCCACGTGAAGAACATCGGAAGC -ACGTAAATCCGAGGGAGTTTAGGTGGTCAATCCGATATATCGTCAACAGAGTCTCTTGTT -ATTTGTTATTCCCTAATTCCGTTCCGTACGTGATGATTCGTTGAGAATTTGTGTCTTTTC -TCTCTATTTCTCAATTGTGTGACCTCTCTCGTCCTTTCCCACCCCGCGGCATGATCCACC -ATGGCCGACCCTTCAATGTATAACACAATGGGTCAGGGATCTACCGACCCCTCCAATCCA -CAGTACATGGCTCAAGCTCCCTCTCAGCAATACCCTGCTGGGTATCCTCCTGGTGCCGCG -CCTCCACAACCAGGAGCGCCTTACGCCAACCCAGCGCCGAATCAGTGGCCCGCCTACGGC -TCACCACAACAAGCGGGTTTGGCGAGTCCCGGCAGCGCATACAATGCACCACAGCAGGCA -ATGGGTGCAGCAGGAGATCCTGGCGTGGCAGGTTTATCCTCTCAGATGAGCGGGTTGGGG -ATTGCAGCAGATGCGGGGGCTAGGACACACCGGAAGAAGCACCGTCATGCTCACCACGAC -ATCGGAGGTGGTGCAGCACCCCCGGTACAAGGATTTAATACTGGCATAGATCAAGGAGGC -TTgcagcagccgcagcagcaacagcagcaATCGCAGTTCCTGAACACGGGATTGAACCAA -CATGCTGACCGACCAGTCTCCCCGGCGGTGGGGTTAGTATCTGGGCAGTCTGTCCCTGAT -ATACCTGGCATGCCGAGCGGTGCAGGCTCTGTACCTACTTCAGGTCGAATCGACCCAGAG -CATATCCCCAGCATCCCGCGATCGCGCGATTTACCCGCTCAGTACTATTTCAACCATGTC -TACCCGACAATGGACCAACATCTCCCCCCACCAGCGGCGATTCCGTTTGTGGCTCAGGAC -CAGGGCAACTCATCTCCTAAATACGCTCGTTTAACGCTCAACAATATCCCCTCCGCCTCT -GACTTCCTTACCTCGACTGGCTTGCCATTGGGCATGATCTTGCAGCCCCTCGCGCCTCTG -GATCCTGGCGAACAACCGATCCCCGTGCTGGACTTCGGGGATGTTGGTCCCCCGCGATGC -CGACGATGCCGAACTTATATCAACCCCTTTATGTCTTTCCGATCTGGCGGTAGCAAGTTC -GTCTGTAATATGTGCACCTTCCCGAACGATACGCCTCCTGAATACTTTGCGCCCCTGGAC -CCATCCGGTGCGCGGGTGGATCGCATGCAACGTCCTGAATTGTTGATGGGAACTGTGGAA -TTTACGGTCCCGAAGGAATACTGGAACAAGGAGCCTGTGGGCCTTCAAACCCTCTTCGTG -ATCGATGTCAGCCGGGAGTCGGTTCATCGGGGATTCCTCAAGGGAGTTTGTGCGGGTATC -AAGGATGCTCTGTACGGTGCTGATGATGAACCATCAGAAGGCACTGAAGGCGATGAATCA -TCACGAAAATTGCCCGTTGGAGCGAAGGTTGGCATCGTTACATATGATAAGGAGGTGCAC -TTCTACAACCTTACCGTAAGTGTTTCTCGCCCTGTGATGCTTTCATTTATTAACATATCC -ACAGGCATCATTGGATCAAGCTCAAATGATGGTTATGACCGATTTAGAGGAGCCATTCGT -GCCATTGAGCGAGGGTCTCTTTGTGGATCCATATGAATCGAAGTATGTGATCAAATATTC -CCCGTCTCCTACGGATCGCTAACAATATTTAGGTCTGTGATCACTTCTCTTCTTAGCCGG -ATACCCAAAATGTTCTCTTCTATCAAAAATCCTGAGTCGGCTTTGCTGCCGACATTGAAC -TCGGCGCTCTCCGCCCTGCAGGCTACAGGTGGCAAGATTGTCTGTGCGCTAGCTAGCTTA -CCTACTTGCGGTCCGGGGCATTTGGCAGTGAGAGAAGACCCTAAAGTTCATGGAACGGAT -GCGGAACGGAAACTTTTCACCACAGAAAACCCAGCTTGGAAGAAAACGGCAAGCAAGCTG -GCTGAAGCTGGCGTGGGCTTGGATCTGTTCATGGCAGCCCCTGGCGGCACATATCTCGAT -GTGGCAACAATCGGTATGTTGAAAGCGTCATTGCACCCCATACAGTCTATCAGCTGACTT -GATTCATAGGACATGTCTCTAGTCTCACCGGAGGCGAGACCTTCTTTTACCCTAATTTCC -ATGCTCCACGCGATCTTCTCAAGCTGCGGCAGGAAATCACTCATGCCGTCACACGCGAGA -CGGGATATCAGACCCTGATGAAAGTCCGATGCTCTAATGGACTCCAAGTGTCCGCCTACC -ACGGAAACTTTGTCCAGCACACCCTCGGCGCTGACTTGGAGATTGCTGGTGTCGATGCTG -ACAAGGCCTTCGGTGTACTCTTCAGCTACGACGGGAAACTCGATCCCAAACTGGATGCCC -ACTTCCAGGCCGCCTTGCTTTACACTTCCGCAGACGGTCAACGCCGTGTGCGATGTATCA -ACGTCGTGGCGGCTGTGAATGAAGGAGGCATGGAGACGATGAAGTTTGTCGACCAGGATG -CCGTGGTCTCGGTTATTGCCAAGGAGGGTCAGTTGAAAAATCTATTAGAAGCCATTGACT -GAGTTTACTGATACCATTTTAGCCGCCTCGAAAACCCTGGATAAGACTCTCAAGGATATT -CGAGCCAATATCTCAGAGAAGACAGTCGATATCTTCAGCGGATACCGCAAGATCTTCTCC -GGCTCGCACCCACCCGGTCAGCTAGTTCTACCTGAGAACCTGAAGGAGTTCTCGATGTAC -ATGCTCAGTTTGGTCAAGTCGAGAGCTTTCAAAAGTGGACCCGAGTCATCAGACCGACGG -GTGCATGACATGCGCATGATCCGGTCAATGGGCTGCACGGAAATGTCCTTGTACCTCTAC -CCTCGCATTATTCCCGTGCACAACATGCAACCGGAAGATGGCTTCGCGAACGAGCACGGA -CAGCTCCAAATCCCGCCCTCCCTGCGAGCCAGCTTCTCGCGTATTGAAGAAGGTGGTGTC -TACATCGTGGATAATGGCCAGGCGATCATACTCTGGCTGCACGCGCAAGTCTCACCCAAT -TTACTGGAAGACCTATTCGGGCCGGGCCAGGACTCCCTCCCGGGTCTGAACCCCAACACT -TCGTCCCTGCCTGTACTCGAGACGCACTTGAACGCACAGGTGCGTAATCTGCTGCAGTAC -CTGTCGACAGTGCGCGGGTCCAAGTCCGTAGCGATCCAATTGGCCCGACAAGGCCTGGAC -GGGGCAGAATACGAGTTCGCCCGACTTCTTCTGGAAGACCGGAATAATGAGGCCCAAAGC -TATGTGGACTGGCTGGTGCATATCCACCGCCAGATTAACTTGGAATTAGCTGGTCACCGC -AAGAAGGAAGAGGGCGGCGAGGGCGCATTGGCTAGTTTGTCGGCTATGCGCGCTCCGTAT -TGGTAGATGCCCGATCCGGAATACTCCGTCTAGACCCCGTCAGGCACCGTTTCCGTTGGT -TCCTGTCAAGACTCTGCAAGTAAGCTGCAGCGGAGGAAAGTGGAGGGCTAATTGGTGGGG -GCCATGGGGGCAGGGGCCCAGCTTATTCCAAAACAGGTCCACGGGCGACTGTGGCGCTGA -CTTCAGTGGCTTCTCGAGACACCTAGGCAGTGGAGCACCCCGGGCTCAACAGAGTGATGG -CTAAACAGAGTAATTGTGAGATATGGGAAAATTCATTTGTGTATACCAGGGGTAGTTTCT -TTTACTATTGCGAGTTTGTGTCACAATCTCCGTACACATGGCGAATGCCCTAGTCATACA -GTTCACAGCTATCGAGGCCTATTCACGATAGTTCAAAACTAAAAATCCCGTAGACGATGG -GAGACCTAATCTGGGATCACTTCTGAGTTTTGCAAGAGTACAATGTAGAATGCCAAAAAA -AGAATCCAGACTCGATCGCCAGGTACGGGTCAAGGTACGGTCAATCATATAGGCCTGGGT -CAACGTCAGAGGCGTCTAGTGGGGCCCCGGGATTACTAAGGGCGCCATTGGTATGTACAG -TCTTTGACACCGAAGATTATTCTAATTTGTATTCTAGTCTTGTATGCAGGTAGATATTTT -TATTGTAAGAGGGTCTCAACTGTGGAGTGTCAGCTCCCACCTACCTAGTAGGGATATGTT -GTAGGTATATCAGATTTATCGTGCGTTGGATCTGGGCCACAGGCCGGTTGGATTGGTAAT -TATGTGTAGTAATTTGTAGGAAATATTATGACTGGCCCAATTGATCTTGAGAATATTGCA -ACCTATATTGCACAATTGAATTGTCTGGGAATGTCCTGCTTGCCACCATTATGATGTTAA -ACTATACAGCGTATACACGTTTCTCAACATTAGTTGCTCATATTTGCCTCCTCTTTCGAC -TTGAACTTAGATCCTGATTTCTTTCTATTCTACCACCATTAACCCCGAATGTAATCCGAA -GGAATTGGATAGAAAACTTCCATGTGGTGGCCAGCACCATAAACATCCATAGAACGAGCT -CTAATCCCCCAGTTGTCCCTGGTGCAAATGCCAGCTCAGTGGCTGTGCAGCCACGCAGCC -GAGCTTCTCCCATGTTCTTTGCCATAGAATTTTGCCGGTTCGAAGTTTGTGAATCTGTTC -CTTTGATCTGTATGGAAGAACCGCGGATGAGAACGGATGGACATGACGTGGAGTTCGGGG -CAAGGAGTTTGTTCGGGCGACAATTTATTGGAACTATTGCCACGATGTTCTTGCGGTTTC -TATGTGAGGGTGGGGTGGTTGATAATGGTAGATTGTAGAAACACGCAGAAGTTGCTACAT -CCACGCACCTAGCCTAGACTATGGTCTACAGGGTTGTAGCGCACATATACGATGTCCATG -TAGCGGTGATGCCTTTGGTAGCAATGCGTGATGCAGTTTGTGGTAGATATCTTACTCCGT -ACATATCTGATATGTAGCATTGTACATGCCCAGAATATGTTATATGGACTATGAAACTCG -AAGAAGCCCTAGAGGCAAGACGCTCTTGGTTCCACTTTTCTGGAAttcttttttattctt -tttattctttttatttcttttttttttgtttttCGCCTTTTTGCCTTTCGATTGACCCAC -ACCTCCCTGAATCGATCCATGGCTCCAAATGTGTGGGTGGCTTTGTACGCTTTCTAGGTA -TAGGCATCTCCTCGTTCAAAACCTATATAAACCTCCCCGATCACCCCGGAGATTTCTATT -CTTCATCACATTCACTCCAGTACAATCAGTACATTCTTTTATATTCACTCTCTTCAGTCT -CGAGATTGACTCTCGTTACACTCTTTACATCTGTCGATCAGATTCAAACTACCAACTACA -TCACTCTCTACATCCAAAAATCTTTTCAAGATGATGTTCACCAAGTCTCTCGGCTTTGCC -GCTCTCTTCACCACCATCGCCTCGGCTCTTCCCCAGGACATGATGGTCCGTCGCCAAGGC -ATGGTCCGTCGTGACGGCGGCGGTGTCAAGATCACCAACAACCTCCAACAGGACGTCTAC -GCCTGGTCTGTGGCCGGCGACGTCGGTCCCATGCAGACCATCACTAGCAGCGGTGGCACT -TATTCTGAGACATGGCGCACCAACCCCAACGGTGGTGGTATCTCTATCAAACTGGCCCTC -GACCCTGACCAGAACGATGTGCTCCAGTTCGAGTACACCGAGGCCGGTGACACCATCTTC -TGGGACATGTCTTGCATCGACATGCAGGCCGGAGACAACAAGTTCACTGAGCTCGGCTTC -TCCGTCGTGCCTTCTGATGAAACTTCCACTTGCCCCAAGGCCATTTGCAAGGCTGGTGAC -ACTGCTTGCTCTGCCGCTTACCTCCAGCCCACTGATGACCACGCCACCCACGGCTGCCCC -ATCGATACTGCTCTTGAGTTGACTCTCGGCCAGGCCGCATAGATTGATCCTGTGCTTTTT -TTGATTTGCCGGTTGAACCGCGCATCCTTTCGTTTTCATACCTTTCACATTTGCATCTAG -CCCTAGCTGGCAGATGATTTTTTTTTTCTTTCTTTTCATATATTACCCAAGATGGTGATA -GTTTTTCGAGATGAGATTGGTGATTTTTTTTTGTTTTTCAACATGTTCGGTTGATTCGAC -ATGTCTTCTCCCCATCTATAGCAGGTTGATTCCCGCTGGGGCAGAATGTCTCGTCACGAG -CTATGATTTCTTACGGTTTTGGAATACCCCCCGCACTTGAATGGAATATGGAAAAATGGG -AGTTATATACATGGGGGAGATATATAGACTTGCGAATAGACATATGATCATCAGATCTTG -AAGATTTACTGATTACGAAATATACGATCTTTACCTTTAAATCCTTCCTTGATCTCGTAT -GTTAACTTTTCACTTGCCTGTAAAAGAGCAAGTGAAGAATGGAATTTGAAATTTTTGAAA -ATCACTGGGGCAAAAAAGGAAAGCCCTGAAAAAGACCAAAGGCACAACAAACAGGTAAAA -ATCCCGAGTCCACAAATGATCACCTAAAACATACGAATAATAGTGGAACCACGACGACGT -TGTCAAACACACAAAGAACTTGTATAATCACGATCTAGAACAAGTGATAGACAGGGACCC -TTAAGGGACCCCAACGCACAAATCTCCTATTCGTAGATCTAGCTCCACATGTCAGGGCCA -TAGTCGAGAATGTCGATCCTCCGCCAGTCGGATGGGCGGATGGCGTTTAAGATATCCACT -GAGGGGTTGCGTGGCTTCAAGATTTTCGATTGGAGATTTCCGACTTGAATCTTCAATTTG -ATTTGGTCGATCTTTTTTCGCTTTTCGGTCAAAGGACCGAGATGTATTGATTTGGTCATG -ATCTGCTTCGATCTGCGCTCTACGCTGTTAAGCAATATATTTTACGGTGTTGTCATTCAG -GTTTATTGCGCACATTTTATCATATTAATTTCGTCATAGGGCTATAGACTGCTATGTTGT -ACATGGTATGCTGTACAAGGAGTCTAGCACACGTGGGATAAGCAGGTACAAAACTCAAAG -TGCCGAGTTATCCATTCATGTCTATCAAACACCGTTTGACAGGATTGTTCCTATGAAGAG -GCGATATTGTTTATTTGGTCGAACCACCCGTCCCAAACCATGGAAGCCTTCGCCTGAATG -TTTGGGAAGAACTGACGGGGGCGAACCGGTCGAAATAAAAAGCTGATTATTAAGGTTCTT -CCATCCAATGGACTTGGTTATGCTTAAACTTTTTTCCCACCTCTTTTGCCGTTTAGTCGT -TTTGAAATGTTTCCACTCCATCCGATGCCAAGACCGCCGGCCTTCACTCGGCATGGCTTA -ATTCTTGGTTGCGCAGTCTAGAACCACCGTTCAAGGGTGGGGATTTCCTTTCGGCCAACT -CTATCCCCATTGAACACGACGTACGGTATATTCAGCAACCCGAACTGCCTGTTACGGCTA -CCTGGATGTACCGGGTGCCCTCGGGTAGAATTGTGAGTCACGGGCTTCACCGTAGAGAAA -GCAATAAGGCAGGTCGTATTTGTCACCGTGTCAAATTTCCATATTAGTCAACAGTAGGCT -TCTCTTGGCTGTGTGGCACGTACACATAGCACCCTTGATGCCCAACGACTTGAACATGTT -GATCACGTGACCATCTGTCAACAGCATCCGGCTTGAACAGTATGTTGTAATGTTATCTCT -TTTCAAATTATGATTTTCTGGCATCCAAAGGTTTCTTTATCTGGTGATTTCTACCAAAAC -GGCTAAAGCCAAGACTTGAGGCCTCGGACTGTAGACATCAAGCGAATCTGTGTTGTAGAT -TGAAGTTTTTTGTTGCCGATATTGAACTAGAACAAAAGCCTTTAGGGTCCAAAATCAATC -ATAGGATCAAAATTATATCCATTTCCGCatatattgatagattaaatagatatataagaa -gataAAGTGCATTGGAGAGAAGAGGAGCTACAACTAAGATACGTAAAATTAGGGCACGTG -ACCATGAAAATGTTCGTTTGCTGGAAAAGAAGTTTGTGCAAAAAGTAATAAGAACACAGC -ATTAGTTTAGGATGAACAGAAAACTGGAGTTGTAGGTATAATGTGTGAGAAGGGATGGAA -CAACGGGAGGAAGGATATAGAGAGGCAGATATCAACACTGGGTGGGGCCTGAATACCCCA -AGGCCCAACCATGATATGAGGCTCTTCATTTCTCTCTATAAAGGAGGTTAAAGGAAGGGG -AGTGATTGGCGCGAGATCAACCAGAGTCAAACCGGAACAGATAGGTAGGGGAAAATCATA -GGCACTTCCCTCGTAGTACACATGCAATGCATGTTACTACTAGGCCACCCACTGTTGAGT -ACCAATTTGGAGTTAGATTGCTCTTCCTTCTATCTTACAATTTCAAAAGCTTTGTTTATA -CAGAGACACATTAATCATTATGCGTATTCAGGGAAATTCCATAGTATTAGCTTCTGATCC -TTGGACTGGGTTTCTTAAGACTTCATTATCATTAAATGGTCGAGTCTAGTTTGGCCATCT -GAAAAAGGAAGCAAACAGCATGTGCATGCAGCATGGGTATCTCAACTATGTACACATACA -GTAACCAAAAGAGTGAAATCCAGATCTACCTAGCTTTCTTCTGAGCCCGCCACTCCCTCC -ACTGCTCCATAAGCTTATGCGATCTCATGACCTTGATAAAAGCATAGGCAACAACCCGAT -AGATGATCGTGCAGACACCCAACGCCATGATATTCATCGGCCCACTCTTATCCAGATTGT -AGAGCTCAAGCACCTGCTCACCTGAATCAACAGGACAACTCCCATTAGCAAGCCTCTGAG -CACCAGAGCAGTGAAACACCTGACCATGCATTGAATATGGCGCCAGATTAGCAATGGAAT -ACTTGATCGGCGAGAGATGGTTCAGCCCCTGCAGCACATTGTTGACATTAAGACTCATGA -CGCCGCCGAGAATGGTTGAGATGGAGAGAAGAATAGACGTCACATTAACGGCGAAGCCGA -CGTGCGAGAATAGCGTGCAGAACATTATGCCCACGGATTCGCCGCAGCTGATTATGCAGA -AACAGTTGAACGCGCTGATGAGGAACATCGTTACGGTGCGTTGTAGGTTATCTGCGAAGG -CAGCTAAGGTGCCGAAGATTAGGGATGAGATTATTTCGAAGGGGACTTCGATTGCGGTGT -AGCTGAGGATGAATGTTGAGGCTGTGTAACAGTTGTCTGCTTCTTCGCGGTAGAAGACGT -CGCGCTCACTGGGGTAAATTGCGATGTTTTGGAGCATGCCTGTTTGGGAATATTATTATG -CGTATGAAATTGGAGAATCAGAGAAGAGAGCTGGGGAATATAGATCAAACTTACCGACAA -AGTATAACGCCGCAAACTCCTGGATAAACCCCATGCGTGATTGCACAGCCGCATAATCAT -TCTTCAGCGGCGCAAAGAAAAGCGCCATGATAATCGCTATTCCCGGGATCTGCATCGACC -GCGCCATAATCAGCGGCGGCTGGCGCCAGAAGTTTATCATGGATCGGCGCAATACCAGTG -GGAAGGTCACACGGAATGGCAACATGTGACGCTTAAGACTTCCCAGTTCTGCGGGCGTGG -CGATCTGTGACGTCTGCCGGTCTAGCCCTAGCTCGAGAGGATTATCTTGCCAGCAAGAAA -CAAGACCCTGTACACGCTCACGCGTGACGAGCTCACGGTCTGCTTGCTGGAGATCGACCG -TGATGAGATCCAGGACGAAGTCTGCGGGGTTGGTTGTGCGTGGACATTCGTGACCTTGTT -TGGCGAAGTGCATGAGCATTTGCGTTCCGGGCCCTGCGTAGACCGGGTAGCCGCCGCGGG -CTAGGAGGAGAACTTGGGAGAAGTGGGTGAAGAGGTCTGAGCGAGATTGGTGGAGGGTTA -GGATTAGAGTGCGGCCTTCGGCAGCGAGGCTGTGGAGGAGCTCAATTATGGACATTGCTG -TGAAAGCGTCGAGGCCGGAGGTTGGTTCGTCGAGGAGGAGGACTTTAGGGTCTGTGAGGA -TTTGAATTGCGATTGTTACGCGCCGCTTCTCACCGCCGCTGATGCCTTTGACGAGGTCGC -TGCCGATGAGGTTGTCAGCGCATTCTTTGAGTCCCATCTTATAGAGGATCTCTTCTGCGC -GGCGGCTTTTTTCTTCGCGTGACATCCAGGTTGGGAGGCGTAGACCAGCGGCAAATCGTA -GACTCTCCCGGACCGTTAGTGAGGGCATCAGTGCATCGTCGTCTTGCGTTACGAAGGATG -TCACTGAGCGGATCACACTCTCTGAGGGTACGGCGCCGTTGTATAGCATATTACCACGGA -CGCGGTGTTGTGTCCCTACAGACCCGTGCAGCCTATGAGCTATGGAGTTGAGCAGCGATG -TCTTGCCACTTCCAGATGGGCCCATGATGACATTCAGCTCTCCTGCCTGGAACTTAGCCG -TAATGGGTCTCAGGATCTGTAGAGTCTTTGCAGTTTCCCAGGGGTGTTGTCTCTTTCGAA -TGTCCAACGCGTACTCATCAAGCGATATTGCCACTTTGCGCGCCTCCTCTGTTGGCCGGA -TAGCGAGCTTAGATTTCCCACTAGATGGGTCTCCTTCGGTTTTTCGGGCCTGGGCAACAT -CTATGGCGAAGCGGTTGTATTGTAGCAGCAGACCGGCGAAGAGATAATGACTAATGGCAA -ACGCGACCAGCACCACGATAGGTCGCCAGATCCAGTTTGACGGCATTCCCAGGCTTTCCA -TGATATAGCGCCCAGTGTATTGCTTGCAGGCTGGGTCTAGAGGGTCATCAGAATATGGAC -AGGCATAAAATTGCCCCAGTTCTGAGCCATTGGGTCCGATAAATTCGTTTGCACAAAGGG -CTCCAAATATGTAGAAGGTGTAGGCACACCACTTCAACCACCTCACGTAGACCGGGATCT -GGTTAGATTGGACGAAGTATCCACACGCAAAGGACTGCAACGTGAAGGATAGGTTTGCTA -CCAAACTAGCTCCCGGGAAACTCCGCGCTACACCAATGGCCACCGCAGCAAATGTCACCG -CAATGTAGTGCGTGAGCAAGGTGAGGATGAAGAAGATAAAGAACTGCCCCGGATCCAGAC -GGTATCCCACCATGAAGTAGAAGATGAGAGAGAAAATCAATGGCACAGGAAGATCCTCCA -ATGGTAACCGAGCTGCTCGTCGACTAAGAAGGAAAGCTGGCACACTAACCACGCCCTCGT -TGCGTTCACGGTCAAAGAGGCGAATGTCAATCGTCAGTCGGTAGGTCTCGTACAGTAGAA -TGAGATAGCCATTCAAGCTGCTGGCAGTGTATAGACTGCCCTCCCGCGAGCGAATCCCTG -CCTGGCTTTCGTCAAGCCGTAGGAAGATCCACCCATTAATGACAGCCATTCCAACGGCCT -CAAGGAGACTACCAGCAACACCCATCGGGTCACGAATGGTAGTCTTAAAAGTGCGAGACG -TCAGAACACTGAACTGTCGACGAAATGAGACCTGCTTCGCAAGTCCAGTATCAAATTTAC -CGACGTCAGGTTTCGGCGTCCCATCTTCTCCTTTCTCCTTTGCCTCGGTGTCTGTAGCTA -CAGTCTGCTCCGACCGCCACAACTCCCTCAGCCGCTCAACCCGAAGATTTGAAGCAGCCT -CAAGCGCCTCCGTCCGATTATCGATAGCAGCAAGATCAATCAAAAACTCCGCCGGATTGA -CGAACGGGGGAAGAACATATCCGCACTTCTCGAAGTGCGCCAAGGAGTCTTCCACTGAGC -CACTGTAAACAGCCGCTCCCCTAGCCAATAAAACAACATTATCAAAAAGACTCCAAATCT -CAGAGCGCGGCGCATGAATCGAGACAACAACTGTCCTCCCATCCAGCGCGAGACGCTTCA -AGGTTCTGATAATCTGGAACGCACTAGTAGCATCCAAACCGGTCGTAGGCTCGTCGCAGA -AAAGCACAGATGGATTTGCCAGGAGTTGCACGCCAATACTTGTGCGTCTTTTCTCACCGC -CACTGCAGCCCTTGTGGGCTGTCGTACCGATGCGGGTATCAGCACACTCTTTCAGACCTA -GTTCGAGGATGACTTGTTCCACGATTTCGTGACGCTCTGCTGGAGTTGTTGGTGGAGGGA -GACGAAGGTCTGCCGAGTAGCGCAGGGTCTCGCGAACGGTCAACGTTGGGATGAGGACAT -CTTCTTGCATGACGTAGGCGCTACGAATGCGCGCGATATCGGGGTCATCGTTAAAAGTTG -TTGCTCCGCATGTGTTTGCTCGAGAGAGACTCATTCGGTTGGCCATTAGGTTGAGTAGGG -AGGTCTTGCCCGAGCCACTGCTTCCGATTATGGCGGTCAGGCTTCCACTTGGCATAGAAG -CGGATACATTGTCCAGGACTGTTTTGCTTGTCTTTGAGGGCTTTTGACCCCGTAAACGCT -GCCAGATCTGTGATGGGGAGGTCTCCCATATTGGTGGGGTGGTGTTGACTTCGAGTGATA -GGTTACGGACTTGGGTGCATACTGGGTTGACCGCGCGCAGAGAGAGATTGTATACGGTGT -CTCGCACATCTGCGGGGAGGACCATCTGCTTTGTGCTCAACTCGGTAATGATCTCGTCTT -TTTCCATTGCTTGAGACTCGGCTCATGTAGCCCTGGAGATACACGTCAGACTTTCCTGTT -GAGATATGATAGGCTTGGATTGAGGTCTCGCAGAATGACCCCGGATTTGAGTGGGAGAAA -AGTCTAAATTCAAATTAATTCTCCTTACCTCGGATCCCCCCAGACTATACTTAGCTCAAC -TTTTCATGTCACTCCAAACGTGGTCCTTGCATAAGCGAGGAGCGAATCTGGAGGTTTGAC -AATTGTGATTCAATACTTCATATTCTAACATGGACCTCTCAGAAGTAACCTATTTGAAAT -CAGAATCCTAGCATTAATCTGATCACATTACAGTCAAGGATATCCCTCACCACTAAAAGA -AACAAAGAAATCAAATGAGATGCATAATCTCAAGGTCTAGACTCTGCGACCCTGTCCAAT -ACACTTGAAACAACGATACTATACAAGGATAAAGACCCAATTCCTATTATGCCACCGCCA -AGGAAGATTTAAATATATACAATCACCTATACCAAGTCAAATAACAAGAGACCAAACCAC -ACCCCTCCAGACATAAGAACGCATCCAACTCATGAGATCATCCAGTCGACCTCTTCACAT -CACTCATCCATAGCACCAATTCGGCCTTTTTTTTAAAAAAAGAAATCGGAAGGCTTGTTG -GAAAGACCGATTTGGCACTTGTATCTGGCAATGCAATGCAATGCAGCCAGGGGGTACGCC -TTTGACACGGAGAGTCCGCGACGCACCGATGCTTGCAAGCCCAGAGAAAAAAAAAACATA -AACGCAAACGTAATCGTAATACTCCCCTTCAAGATGCAAATATGCATCGCAGTCACAGTA -ATAGGTAAATAAAAAAAAACGCCGCATAGTACAGCCTCGTCGGAGGAGTGATCATCGTAT -CCCAAAGGTATGGGACAAAGGGCGGAGAATCCCCTTTCCAAGACGGTAATAATCAACAAG -AAGATCAGAAAAATAAATAACGTAGAGTAAATAGAAAATGACAGGGTATCAGAAAGGTAA -AAGGGGAAAGATGGGAAAGAAGGTAGGGTATGGGAGAAAGGAATCAAGTGATCAAGAAAT -CCATGTTCATATCATGGATGATCTTTTCCCAGTCCCAACTATCTTGCATCTGCGTATCCA -TAATCTCCCAAACCTTCGAAACAACCTTGAAAGCAGGCTCGATGTTTCGAAGGTTAGAAT -ATGACTTCAGATTTTCGAAGCCCTGCGTAATACGTTCACGCTGATCAGACAAGAACGCAG -AACAACCAAGCAGGAACAGCGGCATCAGCACGATACTGAAAGCAGATGCATCCTGTGGCA -GCTGATCTAGGTAGTCAAGTCCGTCGTTGACGACCTGGGCGATTTTCTCGCTCGGTCGCG -AGGGCCGGATCGTCCGATACAGGTACACCCAGGTTGACTGGCGGTATAGCTGGGCCAGAC -ACCAGTTTGGGGTGTCGGGCTCATGTGAGGTCTCCCAGGTACGGATGGCCATATCAATAG -AAACCGCCTCGCTCAGGGTTTGGTAGTCCACTGCTGGCTCATAGCCCTCGTTGAAGCGTT -CACGGATACGGTCGCGGAGCTGGGTTATCTGCGAGATGTAGTTGAAGAGCCCGTCGAATA -CACCGAGGAACATCCCTGCTGAGACACCGGGTACGAAGTTGGGCATGTGCAGGTCATTGT -GCAAGAGGTTGGGACGGCGGTCAAGCGAGGTAAGGGAGTTTGAGACATCGTGGTACTGGA -AGAACTCGACGATGAACTGGCGGAATTTTTCGTTCCGTGGCTGTTGTGACAAGAGCAAAT -ACCTGGCAGCATCCATGTGCGGTCGGTATTCACCATTTGTTTCACCTTCGCAGATGGTGT -TCAAGCTGAGGGCGATGGTGGATGCGATGATCGGGTCCTCGACTAGGAGTCGCTCATCAT -CGTCGGGGCTCTTAGCTGATGATGACGCATTGATATTATCTTTTAAATCTTGAATAGCAT -GGTGGAAATGAAAGTACTTTCGCTCCTTCAGCTTTGGTTCAGGATCCAGACCCGAGAGAT -GCGATCCGGACAGGCACATCAGCGAATGCATCAGCCCCCTATGTTGTGTCGCCATCGGGA -GAAGAATCTCCTTGAAGGGATTTGAATCATCGTTGATCAGTGTCAGTACGCGACTGAACC -CGTAGACAAAATGATCCAAGAAGCGACGGTCGACATCCGTTTCGATGCCGTCGATGAGGA -GGGGTAAACTGCGCGGTACGGCTGTCAGAGACTGAGATCGAGCTACTAAAAAACAAAGGA -ATTAGCGTATCATTCCTAGCCAAAATCCAAATGTCAGATGCGCCTACCATCAGCGAGTCT -CTGTCTCCCACTCTTCCAAATCTCCTTGAGTGGGTAGCCTTCGCAAACGACCGCATTCTT -CTGACAGTTTTGACAACTATCCCCTATTAGACAAGGCAACTCACTTGGACGGAATATAAC -TCACGCAGGTTTTGTCTCATCACACTTCTTCTTTCTCCTTCGACAGGTAATACAACCAGT -CTTGGAGCGACCTTTGGTGATCTTGGCCTGACCTCCCGGAACAGGCAGCGGATGTTTGCG -TGGGCGACCACGTTTCCGTGGAACAGGAACGGGAGCGCCCGACTCCACATCTGCCTGTTT -GATACTCAACAAGTCATCATCGAGTTCTTCCATTTCTTCCACCTTGGGTATCTCCATAAT -ATGTTCCAATTGCGCCGGATCAAAATGTGAATATCCCTTCAACTGTTCCTGCTCTCGATC -GAATGACGGTGATTTCTCGCCATTCCAGTCTGAATCGACCGAGGCATCATCAGTGCTAAC -ACCAGTGCTGGCACTGGATGTGCTGGCTCGGATTACGGAACCCCGAATTCCTAATACAGG -TATATCCGATCGGTCACGACTCTCCCTCTCAATCTCATTCAAATCCGGGGGAAAGAATGT -ATGCATGGATAACATTGAAGAAGAAGCATCCGATAAGAGTCTAGATGCGCAATGCTGACA -AGAGGGGGGAGATTATCGGGTATAAATGGAGGGGGGGGAACGGGAAAATGTGCTTGGTCC -ACCACAGTGCGTGTGATCAGAGAATACACTTTTAGATCCAACAGTTAAAGAGGGGGGGAA -TTCTAGTGCAGTCTGAAGGATATTaagggtataaagggtataaagggtataaaggatata -aaggATATTAATATAGAGAGGGAGAGAGAGTCTAGGGTATGAGTGAATCCAATGTGTAAT -GAAAACTCCACATATTACTTTCTAGCTATGAACGCATTAGTCTCTAAATTTATAATCCTA -GTTAAGAAAGCCATTTGAGTGATTTAAAGAATATTCTAATTAAAAAAAAAAAACATAAAT -ATTCGACGTACTTATTCTCTAAACTTTCTATGCGGGGGAGGCTGCCAAGCATGTTGGATC -TGACCTGATTTGCAGTGAAGAAGCCCCCCCCCCCAAAAAAAGCAATTTGACAAGGGCATC -GTTATGGGGTTTTGCGTTTTATTTGCTTATTCTTCTTTTTTTTTCGGGAATTGACTCGTT -GCCTTTCAGATTGACCTTTGGATCCAGAAGTCCTACCTCGTATGATTCTGGGCCAAAGCT -TCATCTAGTGTAGTATACAGTACGGAGTAGTATACATGTAGTATACAGGTAGTATACAGT -AGCATACAGATCTTCGGGCATGTTACTCCGCACATATATCCCTATAACGTGGATGATTTG -TCACGAGTTATGTACTCCAGATATCAAACCTGTAAGACATTCCCCCTTGATCCATTGGAA -GGACTTGGGGCCAAGGAAAATAACCCCGGATGTGATGATGCATGGTCCCCGGAGAACCTT -TTGGTAGATGATGTGTACTAATAGATATTACGGAGTACTTGGCGATATATTTCTCTTCCC -ACGAACCATTAAGCTGGAGTATAAGCCACTCCCCGCACTATCGAAGCTTTTCttcctttt -tctgtttctttcttttctttttctcttGTACAATAAAGAAGCGCGTCCGGTGAATCTGAT -GGGGAGATCTCCTGTCGGGGACCATTCCCATCATTCGGGTCTAAGCGCCCCCGATCCCCG -TCCATATTGGAAATTTCCCCTAATTTCGATTATTATCTCCTTGTGTGTAACGCCAACAAA -TCTGGGGTAAAGAGTGGGCCAGATCTTCTCGCGACCAGCACGGACATCCGCTGCAGGGAA -TCGACAAGGTTCGGAATCCAGAGCCCTTCATTCCGCCTATGGAAGATACGCAATGTTCTG -TGGAGAATCCATCTGTCTTTCTTCAGTTGCTTCGTCACTCCTCTCTATTTGTGTTTGGCA -TTTTTATCGTGTTCTTTCTGCCTGTGATAACCCAAAGGGGTGGAATGTGGCGAGGTCACC -AAAAGTACACACTGCGCAATGCCCTCCGTGGATCTGCCGGGAGACTAACAAGGAAACACA -CTCGTCTGGAGTTAATAACGGGCTAAGGCTGGGGCCCTCTTTTTTTTTAACCCTTTTAAC -CATTTTTGACCGATCAGCTGAGACGCGGCAGAGAGAAAGATTTTCATACCCCTGTTAAGT -TTCCGACGCCCGAGCTTCTCCGCTTACGTATGAGGGTTTCGTATGAGCTCCATGCACTAG -GGGTTAGGCTATGTGAGTACAAGGTCAATTAGAAAAAAAAGTCTTTAATTTTGAATCGTG -CAAAGATCGACTATGCAAGTCCATTGATCTGACGGGGGACTAACAATCAGGCCAATGGCT -CGATGACGGTATGTGCCGCTAACGAGTCAGACAATCTATACGGATTTCGCTAAAAATATG -GGGAACGCGAAGTCCTTTTTGGCGTCCCATTCAGATCCCTGCCCTCCGTACGGAGCACCG -TCTTTCTTTACTATATTGTACGTTGTATACAATCCAACAAAGAAGTATCCGCTTCCAAGG -TTCAAGGCGGGTCAACGGTTATTCCCGCTCAGTGGGGCCGAAGTAAAACAAAACAACCTT -GGAGAGAGATCCATGAATGTATTCAATATAATCTTGTTCCTGGTGAGTAAGGGGTATGAA -GCAATGAGGTATATGTATCTGGGGGAGGACGGAGTACGGGGTACGGAGTACGAAATCGAA -AACTTACCTTCGGGTAATAGGACGGGGACCATATGACCAGGGCGTGGATCAATACAGTAA -CTAAGCCGAGGTTCAGGTCTGGACCATCTTTTCCGTACTTTTTCACCCTCTCGGCCTCGC -TCTCTTTCTCTTGATCTTTGACCTCCCCCGTACAGATGTGGAGCGAGTAATTGCCTAATC -AAGTTACTCCCATCCATCGTCGGGGGTTCTCCGAGAGAAGACTTCCACGCCAGTGGATGT -GGACACGAGGAGAGATTGTTTAACCGGCGTTCGCCGCATGATGGAATTCCCGCCAGGCAC -CACCGGAACTGCTGCACGACCATTCCTGGGTCCGATGATGCGGAATATACCAATCAAAAA -CTAATGGAACGAATTGCCGTCTTGCGCAGGACACTAGTCAGATCATCAAAAAAAAAAAAA -AAAGTCGGCTTCCACCTTCCTATTGGATGCACACAGCCTCCATTCAACTCGTCCAGCCGA -TCTCTCCAATTAAACTATCCAAAATCAGTAGTTGATGGCCTTTTCAATATACTTTGTACT -CCGTATAAATGTACATTGAAAATTGACCTCGTTTGTGCTCTACCTGGCGTCGATTCTGTT -TTTCTTTCAGCCCCAACTGCTTCAGTAGGAAAAACCCAGAATTTCGTCGGCTTTGGTCAC -AAGTTCTTATTATTCTCTTGACTAGGTTTGGGTTTAAATGGGAAAGTTTGGAAATCTGAA -ACATTTCTCTTTTGAGAAAATATGACGCATGCAAAATAGGAAAAACATAACTACAACATA -CATATTGTACTCCGTGCCTGGAGCAAATAAAGCTCAGACCACTCGAATGCAGGATTGACA -ACAAAGGCTACCCAGACTATATCACAATCCCTCTGTTATTGACCTTCTTGTGGCTACATC -AGGCAAAACGCGTAGGTGACTGCATTTGACTTAGGTAAAAGAAATGAAAGATATCCCTTC -CGGAAAAGAGATCAAGGCAAGAGATGAGTGGTTCTTGTTTTTCATATGACATGGGCTAAC -ACATAGTATGATCGACACCCAGAACAAAACTTGTGGATTGTGGATACGACACTTTTTTTG -CTCCCAGATCGCCGAGCATCATACCAGAGGTTCAACCAAGACTCGGCATATTTTGCTTGC -TTCTATTGTAAACTTAGCATTTCGATAGTTCGGGATATGGGACTATAAGAGCTAACATCT -TTTGGTTATAATTGGACAAACCCACTTTGTATTTGCCAATTCTTGTTTAGCAATTCTTGG -AACAATGCCAGATCTCAGACATACATTACTATTGTCTGTGAGGAATCTATTCGTTCTGTG -CGTAAAAGTGACTGGTGGGAAGTCTTTCTGCTTGACGTTCATTAGACATATCGATATCTG -CCCTCTGGCACTGGCAGAACCCTAGGTTGTCCATACATCAGCCCAACCTAGTAGCCTAAA -GTGCAGCATGGCTTTTTCTTTCATGCCACATAAGATGTCATTCTTAACAGCTTTTATAAG -ATTCAGGGTATCTCGCATAGTCCCAAGCCACATAAAATCACAGGTAGGAGTAAACTTGTT -TTGAGCTTGGGGCGTAGAAATGACGATCAAGGCACCCAGACTACTCCCATCAGCAATATC -ACCCAAAATCCACTCGTCTATATCATATCTCGATGGGTTCAAACATGCAAGGAATCAAGT -AGTAGGGAGAAAGGGGTTCTCTATACTAAGTAAGCACAAATGCCTGAGGCACTAACTCTG -ACCTGAGGCTCTAGATTTCTCTCCTTTATATTTCTATAAAGAATAGGAAAAGAGGGGAAT -TAGCGCAAGATCGACCGGAGGAACCGGAATAGACAGGTAGTGGAAGATCCTAGGCACTTC -CCACACGCCATGCATGTTACCACCAGGCCGCGCGCTTTTGAGTCCCCATTAGACACGAGG -TTGCTCTTCCTATTATCTTGCATTTTTAAAGTTTCTTTACAGACAAATGGTGCTGGCTAT -GTCTATCCGAGACATACGTCTATTTTCAGAATAAGTACATGTTGTCATTCGACCTACTTC -AGTTTCATATGGTTGGTAGCTCCTTCCCAATTCACCAGGACTGTTTAGAACCTTTTCACG -GCGCGAATCTTCCGAGCACTACTCAAACTTTGGCATAGGGCCGAGCGATTCAACGGGTTT -TGAGCTTCTGATACTAGTTGCGGGAGTAGAGGACGGGTCTGGGCGCTCCCGTGGAGTACA -ACAGCACTGTAGAGCTTGAACATTCCAGCAGATGGCAGTACATTTCAATCTTCTATTTCT -CTTTTAGAAAATAGAGACAAATCAATATGAACAACATATGTACTCATGTCCCAGGGAACA -GCATACGATCGTTCTTGGGCTTCATGGCGCAGTCAAAGTCGACCCAGTGGACGTAAAATT -ATATCGGTTATTTTGCCAGCGAGGTTGCCTATTCCTCGGTCTTTTTTTCGCAGTCCTCTT -CGCGGCGAAGGCGTAAGTATACCTAAGTAGGCCCAGGACAATGTTAGATTGATCAGAGAT -ATTGCTGACAATTGTAATCGCCCCCGGAGGTAGTGTGATTACCAAACTGATTTGGATATA -GTTAGCACCTTCCTATTTCGAGGATTTCCACAAAAGAAGAGGTTTAATTGTTCCCGGGGC -AACTGGGCAACTAGATTGAGGCATTTTCTGTTGACCTAAGATGAGTATCAAATCGGCCTC -CGTTTCAAAATGAACGCCCGGGGCGTAGTGGCTGTCACTTCTGGTGGCGTATATCCCGCA -TAGTGGACATTCAAGGGCCATCTAACCAAAAGAATCAATTTCTCCGTTTGGAAATAGCTG -GAATAGAATTGGGGCGGGAGAAATTCCGCACAATCATGAAGACAGCCTCACTCCTGAGTG -ACACCACGCACAGAATCAAGCGAAAGGGGCACATATGTCATTTCCTGGGTCGATCGTCCA -GGTTATTCCAGCTCGATGGTGACGCCAATGGTGACGCCGAGGATTGCACCGATAGACAGA -TGGATCCTATCGATTCCACTAGTTCCCAAGGTTATCAACGCAGATCGATCGTGGCGCGAA -CACACCATTTAAGCCCCAATCTTTAGTTCAATTTGAACAAGGCTATATTCAATGGTCGCG -GAGGCCCTTTGTTATTTTCCCATTCCAAGATCATCTCCGGGGCTCTTCGGGTGTGTTGTC -AGAACATGTCTTGCCCAGTCTGATGGGCTTGATAAATCCTTGGCATATCTCCTGGAAGTA -AACTCTAGAATGGGAGGTTGGTTTCAGGTTGAATTCCAAGGAAATCCATGTCCTGCAGAA -TAAACTTCGAAGTTCTGCGGGCTCGTCTGAAGGATTGGTGTTATTGAATCGACAGATCAA -CTTCCTATTTCTCGGCGCCAAGCTATTAATGAGCGAGATTGTTCGGCAAATCTGCCTTTG -CATAACCAATATGCATATACCCGGCCCGGTGAAAAGCTCTGGGGGAAAGGTCGGAATAAT -TACGGAGTACACCCCGAGTACTACGTATTTGTGAGGTTGGACTCCCTACGAGTTTCTCCT -GGCGTACAGCATTTGGTCTTGGGTGTCATCTTCTCGTCCCCTTCCCCAGATAGACCTGAG -TTGAGGTTCCTATCGGATACCTGTGTTATAGGTAGTCCTAGAGAATGCGCAAATAGGGCT -ACATCTATATAGCTTGAGTTCGAAATTGTTAAAGTTGATGATACTTCACGCCTGCCGCAA -GCAAAGCCAGTATACAGAACGCGGGAGCTCTGCTACATACATCAGTACATGATGCGAAAG -TAATGAAGTAGACATACCGATGGCGGGTTTCTAAGCTAGTTGTAAGGTGCCTACGACGGG -GTTCTACAAAGATATGCATCCAAGTATACGAAGAGCACGATAAAAATCTCAGAGACGGTA -GCAAATCAATTGACAAGACACAAGTAGCCCATTTAGAAGTAATAGCCGTATCCTAACCGT -GTAAACCGATCCCAATGCATAAGATGAAAAAGTTCCATCGAGCCGGTAGCCGTGACCCGA -TCACATATGAAAAGCAAGAATTCGAATTACCCCGACCCATGCAAGATGCTAATGATGACA -AATACAAAAATAAAACAACAAGGATATCACTAAAATTGCAACAGGAACAAGACACCCCTC -TCTAGGGAGCTGCAGCAGGTGAAGAAACATAGACATGTAGACAAAGCGTGAGGCAAAGCT -GGGTATTAGCCATGTGAAAGGGTTCGGGTTTTAAGCAAGTTTTCTGTTTTCATTGAACAC -TTTATGCGATGAATGCGCGGTCCAGTTGGCGCAACGCCCTCGTGTCCTCGCCTCCGAATC -CAATCTTCTCGTTGGGGATATCGCGTGGCCCCTCGGCGAAAGAGCTCTGTACTGGTGCTC -CATTCGGGCTCGCGGTGGGGCCACTAGGAGAGACAGCTGAGGAGGAAAGAGGTGCCAGCG -ATGCACCGTTCATTAATGAGTTGATTCCAGGAACTGCATTCCCGGAACTCGTGGGATCGG -AAGATTTTGGTGACGCGACGATTGGGTTAAGTGAGGGCAACGCAGCGGATTGCTCCTGCT -TCACTAAGTGGGGGCTGTTCGCGTGTCCATTTGATTGCGCAATGGACTGGACTGGCACGC -GCGGCGATGCCATGCGAGGATCAACAAGAGCATGTCCATAGACCGGTGGGGTGCGTTGCT -cgtgagcgtgtgctgcgtgggcgtgggcttgggcttgggcttgggcctgagcgtgggcat -gggcgtgtgcgtgagcgtCGTGTATTGAGCGCGGCGATCGATTCATGTAGCTAAGTTGTT -GTGGCGGTCCAGCTGGAGGTAATGTTGATTCCATATGAGGACGATCATATGGGCTTGTCA -TCCCATGTGGAGGCGGGGGAAGTCCACCAGCATGGGGTGGAGGCTGATGGTGTGGCGGGC -CATAATGATGTCCATTGGCACCTACCGGGGGGAGATAAGAAGGACCAGGACCGTATTGTA -CTGAGACAGTCGGTGGTGTGGCTAGAGGAGTTGGCGGCCAAGCATAGGCAGGCGGGTATG -GCTGTGTATGAGTGACGCGCGGGTATGCGCCAAGGGGATGAGGGGTGATGGGGTAAGCAG -CGCCTGCCATAGGTCCACGGGTAACGACAGGGAGATTTTGCTGATCATTGACTGTCGATT -CGGGATAGAGACCTTTCTGAGGTCGAACATGGCGCAGCGCAGGATCTGAAGTTGGTTGTT -GCAAGTTGGCGGCAGAAGGATGGGAACGTGAGTAATAGCTGATGAGATGCAAGTGTTGAC -CGTTTGATGTCGTGATACTGAAGGACTGTTTCATCAAACCATCTGGCTTATAACGATACC -CATCTGGGCCCTCGTCCGTGCCATCAGCACGATCATCGTCACTGCCGCGACTCTCAGGCG -TCTTGCCGCCTCGAGAGGTAGAGCCTTGAGAGACACTACCGCCTCCACGCTTGCCTTCCA -TTTCGCGATAAGTCAAGAAGCTACCAGATACACGACTGGCGCTCCATGACTTTCCGTCAG -TCCATCGCCGCATACCCGCTTCTCGCTCATCCCATACAAAGACCGAGCCGGACCGAATTG -ATTGTCGCTCTTTTTCTGATAATCGTCTTTGCACGCGAGGTAAAAGGCCGATGCGACAAG -CCTCGAAAAGAATGATAGCGTCCGCAGGTGTGCGAACGTGCCCGTAATACGTCTCCATCC -AAGCTCTCGAGTCAACCGAGGACCCCGGTTGATTTGGTCTGATTGCCAGGTTGAAGGATA -GAGGGGGTTTTTTTTAAGTGGGAGTACAGGTCTGAGGGGAGTTAGATCTAGAGGGAAAAG -AAAAGAGGCGCAAGTGGAGAGGAAGAAAATAGGGTTCAGGTCCTATAAAAGGAACCAGGA -TCTGACTTGGGATATGGGGTAGCCTGTAGACCGGCGTGGGTAAAGGCCGAGCTTTTACGT -CACCCAAGGCGGTTAGACCGGCTGAAAAGTAttatttttctcttttctttttctctcttt -ttcttttttttcttttttctttttttGATCCAACTTTATCCCAAAATGGAAGAGGGAGAA -CTTCAAGAGATTGATGTGGGCCAAGTGGATTTGAAGGCGTAAGAATGCCGAACCAGAGGG -AGAGAGCCAAGGCTGAAAAGACACAAAGAGATAGAGAGTGGGGGACCTACTTTATGACTC -CACTAAGTTTTTTGAAGGAATGGCTTAGCTGTTGTCTCGGGAACTTTTTTGGGAAATGAT -AGTGAGGgagaaagagagaagaagtggaaaaggaaaaaaaaaagagagagagagaaagaT -CTTTCAACTAATGCAAGCTTATAAGAAACAATGAAGAAATCATAAAAAAAAGAAGGTTAA -ATAAGGGAAGCGGAGAGAAGAAGAGCGGATCAAGCGATCCAAGCGAAAAGATCGAACGGA -ACTTGCTAAACGCCGAGATTCCACGAGGatttatttaatttaatttactctatttacttt -atttaagttatttaatttatttaGCTTGAGTTTACCTTGGAGTAATACTGGAACCTCTTT -GAAATACAAATTTATGCATGGGGGAACCTTGGTAAGAGATAAGGTCTATACAAAGGCTTC -AATATTGTGTAGCCTATGTATTTTATGTGCAAAAGTATAAGTAAATCCTTAACGCCCATC -tttaaagtctcaaattctatattgaaatttaaattCTATTTTCCTGTCACGGGGAGTGAA -ACAGTTTTCCGGATTGGCTAATTCCACGTTGGTTGACTCTTGGCTCGGCTTTACGGGGTT -TGAAAAACCTTGGAAGGCAGTAAGGTTAATTTGGCCCCCCCTTCAAAGGCCCCATTCCTG -ATAGATTAAGCTAAAATGGGACAAAGGGTAAAAGGATCAGCAATAATAGATAGGCAATGT -GCACGGAGTGCTCTGTACGCAGTATGGTAATAATACATAAATCGGAATAAAATTAAATAA -AGGTATGTACATAAAATGTTCTGTGTCAGAAAAGTGGCGCTCTGATCGCAAAAAAGAAAA -AAGGTTAGTGCGGAACTCTTAAGCTTAATCCAACACGAGGAGGCCGGCGCTGAACTTTAT -GTTTGCTCGCTTACAGAGGCCCCTCCACCGTGGTTTATTTTCATTCTTGGCAAATGGGGG -GGAAACTTCATCGAGTCTACAATACTCCGTCTATTGGAGTAGTGGGTTATCCTTTCAGTG -TATCAGTGTACGTCTTACATAACATAGTTAACCAACCTTCCTTAAAAAGTTCCAGGATCG -GAAAACATATTTACGTACAACATAGGTACATAGCATCTTTCGGACCCCCCAATCGGACTT -CCGTCATAAAAGCTCAATCCCAATTGCTATTCTCCGACAAGAGTGCCTTGTTCATAGTTT -TGTTTTCTTACGAAAAGAAACAAGTTGGCTACCCGGATCTAGCACCCAATGAGGAACTTG -CGTGATCACCTATCGATTAATAATAGCCTAAACAGGGTTTGAGCTGTTCCGTCCGATGCG -CCCTTATCCAGCACCAACTCTCTTGACAAGTCCCCATACGAGTCATATCCAAACAAGGTT -GACCTTAGCGAATCTGGCCGGACATTTTAATTAACATCCGATATCCGATATCCCATAGGC -TAAAGATGATAAGAAGAAGCCACAGTGGCCATATCCTCGTGACTCGTACTCGTATCGGGT -CCAGTGACGCATCGTTCTGATTTCAACTTGCGCTGACCACAAAGCTTGACAAGATACTAG -TCCAGATCAAATCCATGATGCAAAGCAAAGGAAACAGGAACTAAAGCGACGCAGTGTACT -TTACACGAAATATATGTGACAAGAAATTTCTACAATATAGGTCTCAAGCCTGTTGATCTC -ATGTACACTATTTGCGCGCGTAAAAGTATGCGCGGGGAGAGCTAAATAAGAAGCCAAGAA -GCCACGTAACATGATCGCCACGGGCAAGCATACAACGGATTACGGCCGGCTATACTCCGT -ACTATCTACGGGCTCTGTATTGTAGAAAGTGCACTGCTTCGTATCTGACTTATATTGCTC -GTGTTCCGACTAGGAACAACAATACACATTGGCTTGAAACTAAATGATTCGCCACAACTA -CTAAGCGATGGGCCAAAACATGATTCGTCTAAATCATATCTGCTGACACATTGACTCAGC -GATAAGGTTAACTGGTCCGAGTCTGCGCTCCGTGCCGTTCATGTTCGCCGTGCAGTTAGC -CTTGTACTTAATTAGCCTTGCAGGGGGCCTCGCATTCTAGATCCAATATTCGACCCCTGT -ACATCGCATGAGGCGTGAAAGGTACCAGAAACATTCGGAATTCGCCGCCACATTGACATC -TCTGGCCATGAACACTTGAAGTTGCACTATATCTTTATCATAATTTGGCATGTTACGCAC -GGGCCAAATAATGCAAGCCAGTGAAAGCGTATCGCCAACAAAATCCCCCGTTGTACAAGG -ATCACTCTAATAGGGTTCGGGGGAGCTGGTCAAGCTTGGCAGCTGATTTTCTCCTTGGCT -TCAGCCCTGATTCAGTTTTGGGGTTTGGGAAGCTCGGGGGCCCATGTGTTTCTATTCTAA -CAAGAGAGTCAATGGTATCAGCGTACTTCGTACATAGACTGTATCGCCTGCATTACATGT -CTCACATAGCACCGTCCTGTGTGACTATAACATCACAGTGTTGACCTGATTTTCCATCTA -CCAGGTCTGCGGAGTGTGTAAGGTCGCACTGCTTGCTTTGTCGAAGTTCAATATGATGAT -AAAACAGGCTCGGCCGATTAAACAGAAGACATACAGAATAGTGTGTATGATCTTACATGC -AAACTATATGATTAGCAGGCTAGTACTCCGTATAATCATGCTGTGTTCTTGTGCTGCCAC -GGTGAAAATTGCTTTAATGGTTAATGATAGGCTCTGGGCAATAATCGAGGTTAACAACCA -GACTGGCGCAGCTGATCATTTGTAATGTCGAAAAAGGAAGTTGGAACGGTTACATTTTCG -CCGTGTCAATCAATTTAACATAGGTTCTCTTCCAATCAATGCGATAAAATAAGCATAACC -AACAGCACGAATCACCACATGTATTCCCTTACCTGAAGCATGCAACCATCCATGGAGGTA -CTCTATACATGAATTCTATTGGATATTAGTGTAGAAGCTCAGGCCCCGACCGAATTTTTA -TTTTTGAGGATCGCCTGGATCCACGCCCGATAGCCATGATCCCGCAAGTTTTGCTACATT -AGGATAGTCTGGTCTCTCATTTTCCCCAGGCGGAAAATCATCTTCGGTTTCTCCGGTATA -TGTTGCCCCCATTATCTGAAGCTGAAGTATAATAATACCAATGTCCACATTTGGACTTTT -CTAATCAAAAAGGCCAGCTTGTCAGCTTCGTACGAAAAAGAAAAACATTGCGTACCACAA -ACGTCCATCCGACTATCCGAAAAGACTCCTTTTTCCAAGTGGATCCAGCTCCACATCTCG -CTAACTATATCTCACCTACTGATACACATAAAGTAACCTTGCGGGATAAGCCGACTGTTT -CGCCGACCGTGGTTCGTGCGGACCTACTGCAACTGTATTCGTTAGCTACTAATCTATTTC -ATAGTCTCGGACTACCTTCGGATTAAAGACAGGTACGGAGCTGAGGTATAACTCCGCTAC -ACAGCATTTACTTTTATAAATAATCTCCGATCCGAGATGCTCATATGGGTGGCCTAAGAT -TATGAATGAGAGCCTCAAGTCAGCCGAGGTATTTTAATTGGATTCCTTTAGTCAGAGGCA -CATAGAAGACATCTGACAGATAAAGTGGAAAATGACGTGCGATTCTTGTATTGTCATGCA -TTCGTTGCAACCCTAGAATTAAAGGTTGCCAAGAATTAATTTTGTGAAACGTCACACGAG -CTTATCTCCACTAGCATAATTAAGGTATCCATTGTCTGTATGAATAGGGAATGTGCTAGA -ATGGGCATTACTTTTTTTAATTTTATTTCATATCGTATTATGGGTAGGTTTCATTGCATG -GGGTCAAAAGGGTTTCATTTTATTCTAGTCCAATGTTTGTAATATTGACTTTCAACCTTG -TATTCCAACCTTGGGGGGCTATCATATTCATCACATCCTAGGAGTCTAGAAGATGGCAGT -GCAACAAATGCTGGTGCCTACCCAAATGGTAAGAATCGTAAATCTGGAGGCCAAGAGTTA -TGAATGCCAAGATGACAACTTTTTGGCTAGGCTTATGGGGGCAAATGTCATCGAGGCTAA -AATCCAGTTTTTCTTTCGGGAACCATGGAAAAAGTGGCTGCAAATTTATGGTTAATGTTA -ACTCCGTACCTTTATTTTGTACTTAATTTGACTTTACTTTACCTGTCTTCCATGGTTCAC -AATTTACTTTGACCCAGCTGACATGGGCATAAATCCATGTGGCGTGCATTATTTTCTCTC -GGGGAGTTTCTGAAGCGAATACGAAGATCTGTAATATTGGAGTGTAACCTTCAACGGAGT -AGGTAGGTAATCCTGGATCATCATTAACGGACCAATTACTCGAACGTACTAAAATAAATA -GGATATTTCTATTACTTACTCCGTACTGGCTACCGTAAATCACATCTTCATATTTGCAAT -GTGATACGTCAGGCAGGGGTGTACTGCATGACTCGTACAAGGCGCGCACTGCCCAAACTT -CTTTTTGCTTATGCTTAACCCTGGTAAGGTTGGGGCCCCTTGCCTGCTGATCCTGGCCCC -CTTATTGGCATACAAGTTGCGCTCGCTTTGTCAAAGTCAAATATATTAATTCAAAGGCTG -TGCCTTTTCCTATGAGGCTGTGATCTTTCTAAATTCTAATCTGATGTACTTTCTTTTCAT -TATACAATTGCTCCTCGCCACAATAATAAGGAAAAGTGGCAAAACAAATCTATAAGCCCC -AACTGAAACCGGAATGCTCGGCATTAATAAGCGAGGTCCGAAACAGAAACTGAGCCAATA -CTCCAAGATACTCCAATGTCCCTCGGCGGACCACGTTTTCCCCTGGAGTATGCCGAGGCT -CAATTCCTTGAATTTCTTTGCCGATGAACAACCTGTCTCGCTGTCTTGTTTCCTAATTAG -GGTACTAATATGTATCTATGTTGTAATATCAAGCAATTAACCTACAACAGTACTCCGTAC -TGCCCCAAGGGCTAGTGCCACGCCCCAATTTTTTTACGTTTCGGTGGGACGCCTCATTTC -CAGACTACAGTGCTTGGCTTTGCTTGGGGTCCGGAGCTCCGGTGCATAAGTCAGGGATAA -ACTCTGCCGCTCATGTATATATTGTACGAGGTACACATAAACGCCAATATGCTACCCCAG -CTATGTACGCATGTACCAAGAAGTCCGGTCTACTCCGTATAGCTACTTTTCTCGTGAAAC -GATCGTCTACGCCTGGCGCTCTGCTAATGCAGCAAGATCTGTGCCTTAATTTTATTCCTC -GGTCAAGTTAAACCCCATACCTTTTTTTCGTCCGTGCGGTGCTTCTTGACGTTGAATGTC -TTGCACTCTAGTTCAGCCCTGTATGTGAATATCAAAGTATGATGAAGTGAGCATACGTGA -TTCTCTGACCTTTCAGACTGGTGTTATGTTTTGATATCTTATAATCATGTGTTATTCCCT -GTTTAGTACTAGGGCTTTGGACTCGGATATTTTTTTTCTCTAGTTACGTAGCATTCCTTC -GGGTTTTTAGCGTTGCGCTTTGTTCTGGCTTCTTCGGCCGGTTGTCTAGGATGATCACTT -GGCTGAGTCTTGGGACAGGGGCCAGATCCAATAATGCAAAAGGAACAACTACATCGGTGG -ATGTGACTATAACTTAGTCTTTCCTTTTCGCCCGAATGCCCATTCATTTATTTTGAGCCT -TCAGGTCGCAATTTAAAATTAGAATCTCGGGCTAGCGAACGAAACGGGGAAATGGCCATA -TAAAGGCGGGTAGATCAATAAGCTCTACATATAGCCTCTGCCCTAGTTCTTATATATTCC -AGTGATCTACAGAGTACACTCCGCACGGAGTATGAAGATAATTTAGTCGCTGGGCATTTT -CTTCCCCTTTCTTTCCCGTGCCCTGTCACACGTCAGAACCATGGAGTTCATAATGCCTGT -GCGGTATGTATTTTTATCAGATTATCTTGGGGGGCTTATTATTTGTGTGGGATGGTTAAA -GGAGAATAAAATACAAAGTAAGCTGAGCCGGGCACTGCCGGTTAGTATCTGCAAGCCCTT -TTTTTTGAGCTCCAGGTGGTAAGAGGAATGTATTCTCACTAGTCGAATTTGGCCACAAAG -TGCGACGGAGGCTCCGTTATGCGGAAGTCTGCGGAATAAGTGCCCCTGTCTTTGTCCTAT -TCGGGAATATGTCGGCAACATTTTCCTGGTGCCGTTGGGGATTCAGTGTCGCTCCGCGGT -TGATAAGGAATGCTTCATATGTTTATTGTCAATAGAGCAAAAGCCAGGCCCAGTTATGGG -TTACAGAGTGGATCATCTCATGTCGCTTTTGGATTGTTCCCAATTTTATTTCACTTTATC -TATGTAAATATGTATTTAGTTTTCATGCATATTATATTACAAATCTATCATACACCTAGC -CAGATGATATCTTTGATAAGCCTCAGCAATCAGCACGACCGAAGTATCGCGGGAAGGCTA -CGATATCTCGAACGCTGGAGACACCAGTCAAGTATCCCAAGAATCGATCGAATCCAAGCC -CAAAGCCTCCATGAGGAGCAGAGCCCCAGCGTCGGAGGTCAGCATACCATTGGAGGTGGC -TCAGATCCTCGTCAGGGGCGAGGTGAGGATACAACGCGTCGCCGAGAGTTAGATCACCAG -ACTCGGGAATTTGTCGGGGCTTGATCAAACCGTGCTCGCGCATATTCTGGATGAGATTGG -GCAGGCGATGCTCGCGCAGGGAACCCCCGGCGACTTCGCTGACTTCGGGGAGGAGCAGAT -CGAAACATGCTACTGTCTCCCCGGGTGTGTTGCTGCCATCGGTGGTCTGCGAAGGAGCCA -TGTAGAACGGCTTGACGGTTTTCGGGTAGTCCGTCACAAAGACAGGACGCCCTTGATGGA -GAACCTCGACGATATACTTTTCATGTTCAAGTTGTAGACCTCCATTCCATGTCGGAGGAT -GCTCGAATGTTGCTCCGTCCTCGGCAGCAGCACGCTGGAGCATCTCCACTGCTTGGGTGT -ACGTGACACGGTGCCATTTTGGCCCATCCATTAGATTGGTCCACCTTTGCCGTAGAGACC -CCGTCTCGGCGCTAGTTTCCTGGCCTGGCTCGCCAGACCTCTTGGCCGACAGGATCTCTT -GGCCCACCGGAGTATTATATAGACGACGCGTCAGATCGCGTAGCAAATGTTCCACTAAGC -CAGTAAGTGAGTCAAGGTCGGACATAAAATTGACCTCGGCTTCGAGCATATAGAACTCGC -TGAGGTGGCGCGGTGTATCGCTCTTCTCCGCCCGGAACACAGGTGTCAATGTCCAGACAT -TGCCAAGCTCTGCCGCATAGGCTTCCAGGTGGAGCTGCGACGATACAGTGAGGTATTTTG -GTGCTCGGAAGAAATGCTCGTTCTCAGTGCTAGGTGTAGCGGCACCACGTGGTGCCAATG -TGAACGTTTCACCTGCGCCTTCACAATCCGAGGACGTTATAAGGGGAGGTTGAACTTGTG -TAAAAGCTCCGTCCGGATGAGAGTGGAAGACATTGCCAAGTTGGAACATGCACTCGGATC -GGAAGCGCGAAAGGAGAGAATTGAAGGGTGTGCGAATGCGGAGGTGGGGAATCTGGCGCA -GAAAGTCGGGGCTGTGGTACTTCTTCTGGATTGGGAACGTCTGCAACGATTGATATCATG -GTTAGTGTGAGGTCTGGCGACAGCTGGGGGAAAGCTCTCGGGCTCACCTCTGGGTCCGAT -GCGCCTACAACCTTGACATCGGTTGTTTGTAGCTCATGAGTTTGCTCTTTTCCCGGTGGA -CAAGCTTTCCATACGCCCGATATTTCGATCGCGGTTCCAGTTGATAGACTAGAAAAAGGG -CGCTCGAGTTAACCTTCATTGACAAACAAGTCAGGGATCACAGCTCGCGCGCAGTCGTAC -GTACTCTGCTGCTTGCGCGGGTTTCAGGAATGCCTGCAGTGGCTCCACAGTCGAGCCATC -GGTTATTTCTGCAAAGGCAAAGCGTTTCTGCTTCCGTACAGACCGAACGAACCCGTTCAC -TTTGATTTCTTGGTTCTCGAAGCTACAGACTCCGCCATTCTTGCTAGCTTGGAGGATCTG -CGCGCACCGCACGATAGATGGACGAGCATTGATTGAGCTATTGTTAAGCCTACCCATAGA -CGTGGCGAAGGTCCGCCGCCATAGCTGCATTGCTGAAGGTTCTGTGGGTTGAATGATCGG -AGAGTTCGGAGATCGCGGCCAGGAAACTTATCGGCGTTGATGTCGGTCCGATTAGCGTTC -TCACCTGGGCAGCATTTTACTGACTTCATGTCAATCATCGTCTCGGTTCATGGTGCTAAA -GTATTGTGTTAAAACATGATTTATTTCGAAATAATATGTTAAATATATGTCGTAGACTAC -ACGCTCGTTATGTCCTCAGAGTTCTATACAGCCGTTTATATATAATCGGCCCCGCCCTAC -CAAATTATATAACAAGAATAACCAAGAAAAAGAATAGAAAGACCACTCAGAAGGTACATA -AATATGAGGTCTTGGAGCAACCCCAGCAAGTCATTATAGCAAAATCAGAGTTCAGCACCA -TCGAAATTAAAGAGTACCACACAAATTCCAGTCAAGCTAAATCGCCGCTAAAGGAATGAA -TGGGCCGTTATACGACCCTGTGTAAGAAAAGCAAGCAAATATTCCATGTCTCACTGTGGA -GGCCGCCGTTCATAAACCTGTCTTTCCTCTTCAATCATATCAATCGCATCCATGAGAACA -CCATCTACTATAGCCGCGTACACAGCCATCCTCGACCGTCCACCCTCCTGCCCAGCCCCG -TAGCCCATGTCGATAAGAGCAGAAACACATCGTTCGATTGCCGCGTTATCAGTCTCGGGA -ACACTGGCATCAAGTTTGTCAGAGAGTCCGGCCCCCGTCGTGACATCCGCCGTGGTATCT -TCCACCGGGAAGCTTCCGGGGATTTGTTCGTGTGAAGATGCAGCACTTCGTTCTCGCCTA -GATAGACGGGCCCATGTGTCTGTTTCCGCGGGACCACGTGATCTATCCCCAGGACGACGC -ATGCTTGAAGAATGCTTGAGGGGATTATGACGGCCGGTCTCCGCAGCGTGCAAATGATTT -GATTTATGAGTTGTGTGCTCAGTCGCTCGGCGCTTCAGTTTGGGCGCATGCTGGGGGTGA -CTGAGACCTACACCTGGGCGCATAGTCTCGTTGGAAGTGGAAGCAGGTTGAGATTGACCA -AATCCCTTCGGAGTAGGATGCACGGGTAAAGTGTGAGTTGAGACTCCCTCGCAGTGTTTT -GAAATTGGCGATTGACCATCCAAAGCACCAGTTATAGGCTCTGTTTCACGGCGTTCCTGC -CGATTTGCAAGAAGCTGTGCCTCCCATTGAGACACGGGTGGGAAAGGAGACTCCGTGTTC -GAATACGAGGAAACACCCCGACCGTTCAATGGATCGTTATGCTGTGTGCTAATTTCGGGC -AAAAGCGAAGGGAAACGTGCGCTGAATGCAGAGTCGTCGCTTGGAGAGTCAAGTAGTGCG -GACGATTCTGTTTGGGGTGAACCAGAATCAACATTTTGCGTGAGCTCTACGAAACTGACA -GACTTTCTGCGATTCTCCGTTTTTCCAGGCCGAGACGAGCTCATTCGTGACTTGACAGCA -AGCGGGGACGCAGAGGGTCCCAATGCGGTATTTTCTCGTTCAGAGCGAGCATTTTTTACA -TCGTTCATCCCAGACTGAGGGGTTTTGGCCTGAGTATCGCGTGCGGTTGTAGCGGCATTG -GGTGAAACCTGCGCCGGACGCACACTTTCGATGGCGGGATGATACATAGGCTCCAGGTTT -ATCGAATGACCGTCAATGACAGCACCTTGCAATGCCTCCATTGCAGCGAAAGCCGGATAG -TCCGAGGGGAAATTTACGTAGCCAAAGCCTGCATGTCTTCCTGATGTGGTATCTAGCGGG -AGATCCACCTTGACGAGGAATCCCTTGGCAGCGAACACATCCTGTATCATCTTGTTGGTA -ACCTTGAAACCAACATTCCCAATGAATAAGGATTTAGTTGCTGATTTGTTTGTTGACTCT -CTAGACTGCACTTGTGATGTCCGTCTAGCATCCAACGGGGGATTGAACACAGCAGGAGGC -GGAGGCGGAGGTGCAGGCGGCCAGTAATTCAAATTCGGCAACGATGCCGACGGTAGAGGA -GGACAGGGAGGCGGGGGTACATTATAAGTTTGGAACGGATTCCACATCGGAGCAGGCCAC -GAGGGAGGTGCCCAATGATTTGGGTGGGTGTGAATAGGGTGACGACCATGTTGACTTTGG -ATGGGGGTTACTCCTGCTTGTGCAGTATGAGAGAGGTTCGATTGGCCTGGGCCCGCTGTT -GATTTCTCGTTCGTGGCTGTGCTTGAATGATGTCCGTCTGGGGTTTGGGAATACATAGTG -CCCGTAGCCGCAGAGCCATTGCTTGCAGTAGTGTTAAAAAATGGCCCTGACGGTCCAGGG -GTTGGCACCTCGGAAGTCGAGGTCGACGTAGAGGCTGCTGATGTAGCGCGGCCAAACTCG -CTTTCAAATGCGGCAAACAATGTTCTTCCGACCTCGTTGAGTTCGGAGGCCATCGTACGA -AAACCATCCGCGGCATTCTCGGCGACAGGCCTTCCTGCTTGGAAGGCATCTTCTGCCATT -TGCCTTCCGCCATCTGGAAGATTATTAAATGCAGTTCTTAGGTGAGCCTCAACAGTGGCA -AGTAGTGTCCGCAGTGAAGATTCCACGTTCTCAGGCAGAGATCTTTGTGCAGCCTCAATC -TGTCTCTGAGCATTCTGTAGCTGTCGCTGTAGCTCAGGAATTTTTGATCTCCATTCAGAT -TGTACCATAGTAGCGCCGTTTATCAGCTGGTCTAGAACCTGTGCCGCTAAAATCTCAACG -GGATGAGGGGATCGCCCTGAATTTGTGTTGGCTGATGGTTCAGTAGTCGGTGGCGCATCA -GGTTGAGGCGCTCTACTCTCCAAGGACCCATCCGCATTGAGCATCCCCGCTAGCTCGGCC -TCAAAGGCTTCCATCAAAGGCCGAGGTTGATCCCCAGAGATAGGCGGCGAGTTTGTGATA -TGAGGCACTGGCACCTCATGTTGCTCTTCGAGGACAATAGCCTCCGAAGTAGCTTGTGTG -TCTTTCTTCTCACTAGCCGTGCCTTGTTTTGAAGCGTTGTCTTCAAATCGCTTCCACAAC -TCGGTCACGGAGTTTGATCGACGAATGTCAAATATGTGCATCGGACTGATGTCATTGGGG -GAAAGCCGGACAGATTCGAGTTGTGCGTTAAAATCTAGGGGCTCCTCAAGCCGCTGAGAG -AGCTCGAGAGCTGATCCAACCTGAGGAAATGTGTAAGATTTTGACCTCAGCATGGTAGAT -CGCGGACTGTTGTGAACTTACAGTGATTTTTTCGTCATCATCATCGTCATCCATATAAGT -TAGCCTTGCGCGGGGATGGTTTGACGCTAAATCATCGTAGTCTGTAGCAGAAATCTGGAC -CACTCCCGAAGAAGTACAAGAATCGGCAGGTTGCTTGGGTTGGTGCTGAGCGGGTACCCG -AAGGTTTCTGATTTGGAAGGGCTTCAACATGGAGAAGATGCACTTTGAGTCCTGTAGATT -AGGACCACGAACTGATGGGGCGTTACAGAATCGAAATCAAAAAAAGGATAAATTAAAGTA -AAGCTGGTAAAGATATGAAATGGTTTGGAATTATGATGCCTTTGGGGCTAGTTTGAAGAT -TGGGGGGGGGGGAGTTGTATAAACGGGGGAAAACGATCACGAGATTCATATGTTGCTGCC -TGTGGCAACACTTTCAACTTTTCAAGTTTGATTCTTCCAGATGCAAGGATAAGAATCCCA -AAAATCAAGGATTTCAAGTGAAAAAAAATACATGGGAGTTTCAGGCAACAGGATAAGAGC -AAGCATGGTGAAGCTCTCCGGAGAATCCCCGCCCCGATCACATGACATCAAGAACTGGAA -TTTTAGGCGTCAACTCTGATGTTCTTGACATTCAAATGAATTCAATAGGCAATGAATAAT -AATTGAGAATAATGTCCACATAACATCCCCAAATATCTGCGCGATCGTATACTCTCTCTT -CATCTATCAGCAGCTCTCAGATCTACGATAATCCCAATGGCTGGTCTATTGGAGACGGTA -CATCGCATGAACGCCCCCAAAACCTAACATTCCGACAACTTCGATATCACGTCGCGCCGA -TTGAATCTCCATAGAAATCAATGGCGACCTCAAAGTCTGCGCCATTGCAGACTCCAGCAG -ATCGCAATGTGAACAACGTTGTATTGGGCGATCTCTTGTTCAAGCCATGGAACCAATCCA -TTTACCCCGAAGATCTCGTCGCCAAAGACGCCGATCGACTCTACGTCTGTCGTTGGTGTT -TTCGATATTCATGCGATGGTGATCTGTTTGAACAACATAAACGCGCCTGCGAACATCGCA -TAACCCCACCAGGCACGAAAGTCTACGACCATGGCGGGTATGCAGTGTGGGAGGTTGATG -GGCAGAACCACAAACTTTTCGGACAGAATCTGTCCCTTTTCGCCAAACTCTTTCTCGATC -ACAAGACAGTGTTTTTTGATGTCGCAACCTTCCTGTACTACATTCTCACTTTCACCGACC -CGGACGACTCCGATAGCTACTACGTGCTCGGTTTCTTCTCAAAAGAGAAACTCTCCTGGG -ACGCGAACAATCTTGCCTGTATTCTGATATTCCCGCCTTATCAACATAAACAGCTCGGGA -AGTTGTTGATGGGGGTTAGCTACAAGCTCAGCGGATGGGACTCCAACGGGGGGTGCATCG -GTGGACCAGAGAAACCACTCAGTGAGCTGGGCCATAAAAGCTATGTTCGTTTTTGGGCCG -AACGGATTGCGCGATTCTTGCTCTGCGGGAAACCTAGCGGCAGCTTCATCGAAATCGACC -AACAGAAGCCCAGCTCAACCCCCAAAGGTTCCCGCAAGAGACCCTTGCGCGAGACCATTA -CCGTTGAAGAACTTGGCTTTGGAACTGGTATGTTGACTGAGGATGTTATCACTGCACTCA -ATAGCATGGGGTTATTTGAACTTCAGGCTATCCCCAAAAAACGTAAATCCACCCGTACAG -CCGTGGAGGAAAATACCGCCCCACGAGGTCAGATGGTTACCATCTGCAAATCTGATGTGT -GGGAGTGGGCTCAAGCACACCATCTCTCCCTAGATGATCCAGTCAAGGAAGAAGGCTTTG -CGGGACTCTGGCCCCCAGAGATTGTGTCCGATGGGAGTGGCCAAAGTGTTGCCTCAGACG -AAGACTGAGTCTACCAACGAGACAGCTTCCAATCTTATGTACGATAGCGCTGATTATAGT -ATTTCATCTAACGACGACAATGAGAAGCAACGTGAAGTTTGTGTATTTTTGAATAGATTG -GCTCTTGGCCCCCAATTCTTTAAGGAATCAGGTCTTTAAACCTTTTAAACTTTACATGAA -CCGCCTGATTTCGTAAATGAGAAAGTGCATTACTCTTCCTGACCCCCGGGGCCACCAGCA -CCGGATTTGCTCATCTGCCGAGCCTTCAGTAGCTCATCACACAGAAGGAGGTTAGAAGCG -ATGCCTGTGCTGGAAGCGATGCAGTTCCGGAGGACTCGGAATGAATCGAAGACACCCTCT -TGGACGGGATCCATGGGTTCGCCAGTGACCAAATCCAAACCAACAACGTTACCATCAGTT -GCCTCGTCGTGAAGAGCAGCCAGAGAATCCTGGATATCGTGGCCTGAGTTGGCAGCAAGA -GTTTTAGGAATGATCAAAAGGGCATCAGCGAACGCACCAACACCCCACTTGGCCTTACCC -TTGACGGTCTTGGTGAAGGCCTGGGACTGCAGGTGTGCAGCACATGCGACCTGGAATGCA -CCGGCTCCGGGAATGACACAGCCATCGACAATGGTGTTGTAGACAGAGCGCAAACCATCA -CGAACGGCATCCTTGACCTGGGTGATGGTGTGCCCATTAGGACCCTTGATGAGGATGGTC -ACAGACTTAGGATCCTTGACCTCTTCAACAAAAGTGAATTTCTCCTCACCAAGCTGATGC -TCGTAGACCAGACCAGCCCATCCAAGAACCTCTGGAGACAAATCCTCAACACTGTTCTGA -GCGGTTCCACCACAGACAAGCTGCAGACGCTCCATGTTTCTGCGCTTGGCTCTTCGCAAA -GCGAAAATGCCATTCTTGACCAGAACATCCAGACTGAGGGGATCGATACCCTTCTGGTTG -ATGATGACGAATCCCTTATTGGGGTCGTTTCCACAAACTTCCTTCTTGAGGTCCACAATC -TTCTGCAGCTTCGCATCCACAAACTTGCGCTCACTCTCAACTAGCTTATCTCTCTGCTCG -GCGTTAGAGTAGTAGAATCCGGAGTTAATTTCGGACTTCTCGTACTCCAGACTGACGTTC -AATGTCAGAATGAAAGCGTTCTCAACCCGCTTGGGCATGTCGGGGTGTCTGGCACCGTGG -TCCAAGGCAAGACCACGAATAAGCTGGGTATCAGATGAGGATCGGTGTTGCATCGTCATA -ATTTCAACCATGTGCAAATCGGGCTTCTCGGGGGCTCTGTGGATGGCGAGCACGGCATCG -ACGATGTCGGGGGTAAGCTTCTCAGCGAGAGCACCGTTAAGCTTGGTGGAGAGAGAGGTA -CGGGCGACGGAGAGCAACAGCTCTCGGTCAATGGAACGCTCGAGCTTGAAGTTTTCAAGG -AACTGTAGGTGATGTGTTAGATCCATGTCGAGATCCGAGTTTCAATCTCCATTACATACT -TTCAAGGCCTCGGTCTTGGCAATCTCGTAGCCATCAGTGATGACACGAGGGTGCAGACCC -TCAGAGATGTGACGCTCCGCTTGCTTCAAAAGCTCTCCCACCAGCAAAACGACAGATGTT -GTTCCATCACCGGTAATATCATCTTGTGCCGTCGCAGCGCGAGCGATCATAACCTTTCCA -TTCTGTCAGCAACATGTAGGAAGCTCACGTAGAAATCCAATCTTACCGCCGTGGGGTTTT -GAATTTGCTGTTGATTGTTAGCTTCAAAGCTCGAGGCCCTGGGATTCTGGGGGCAGAACG -AACCATCTCCCGCAACAAAACATTTCCATCCTTGGTCAATTTGATCTATCATAGCAAGCC -TGTATCAGAAAAGGGATCGTTCGTGCCGTTCGAGAGGAATTGGAGATTGCATACTGCGCC -TGCACCATCAACCAGCCTAAGTTTTGTCAGCTCCATTGCCTTTGTATGTGGGGTGTATTC -GTAGGAAACTCACATCTTGAGAGTTCCCGAAGGACCCAAATTGGACTTGAGAACATCCTG -AAGACCTTCACCGGCACCGATGTTCACCTTCAAGGCCTCTCCTCTCCGCTACGTGAAATG -TTAGTGTGCGGGCATATCATTGAGGGGCCATCTGGGAATATTTTCCATACCCTCGACTCC -GCCTTCGGGTTAAGTAGTTGAGTTGCAGACATCTCCCAGGTAATGAATGGCAGGGTATCA -CGTCCAATAACTCAAACAATATTAAATTAAAGGGATGGTAGAGCGCTTGTTCCACGTTAG -GTCCGCGGCCAAAAAGCCCACATTCGAGCATGCTCTATGCTCACGTGACTGTATCAGCGG -AGCGGGTAAACATATGCTCCGAGGCCGACCGTCATACCTCCGGTTCGGTGCCTTAGGATT -CGCACTAGCGGCGAGAAAAAAAAATTGTCCATTTCCAGCCGTCGCCAACCTCACCAGCAA -TCCATCGAATTGAACTTTTCGAGGTTTTCTGGCCGGTGTTGCAAGATTTTATCATTATTT -TGCGATTGACCTTGTATTGGTCCTAACACAATGGCCGATTTCCTACTTTTCGAGGGCCCT -ATGGGTTACTCGCTATTCAAGGTCGCCCATCAGGGCGACTCTGTCGGCAATGCCTTGAAG -GAAGTTCAGGAGGGTGTAAATGACCTTGCCAAGTTCGGAAAGATGGTCGAGCTTTCGAGT -TTCCTGCCCTTCGAGTACGGTTTATCTGTCTATATGAAAATGTTTTCATGGCCCACTAAC -AATTTTTAGGAACAACAAGCAGGCTCTTGGAGAAATTAATGACATCTCCGAAGGTGTCGC -ATCCGATACTCTCGTTTCTTTCCTTGAATTGAACCTCCCCAAGCCGAACAAGAAGAAGAA -GGTGATCCTGGGTCTTGCCGACAAGGCTCTGGCAGGAAGCATCAAGTCTGCTTTCTCATT -CGTCGACTGTGAGACTGGTGATACCAGCGAGGTCGTTCAGGACCTTCTCCGTGGTATTCG -TCTCCACGCTGGCAAGCTTCTCAAGCAGCTGCGCGATGGCGATATGGACACTGCCCAGCT -TGGTCTCGGTCACGCTTACTCCCGCGCTAAGGTAAAGTTCTCCGTCCAGCGTGACGACAA -CCACATCATCCAGGCTATCGCCATTCTGGACCAGCTCGACAAGGCTATCAATACCTTCTC -CATGCGAGTTCGCGAATGGTACTCCTGGCACTTCCCCGAGCTCGTCAAGATTGTCTCCGA -CAACCAGCGCTACGCCGAGCTCGCCCTTTTCATTAAGGACAAGCAGGCCCTGACCAACGA -AAGCCTGCACGACATTGCCGCCCTTGTTGAAGATGATGAGGCTGTTGCCCAGAGCATTAT -CGATGCTGCTAAGACTAGTATGGGTCAAGAGATTTCCGAGGCCGACATGGAGAACGTTGT -CGCCTTCGCCGAGCGCGTTGTCAAGCTCGCCAAGTACCGCAAGTCTCTCTACGCTTACCT -TGTTGCCAAGATGAGTGTCGTCGCACCTAACCTTGCCGCCCTGATCGGCGAAGTCGTCGG -CGCCCGTCTGATCTCCCACGCCGGTAGCTTGACCAACCTCTCCAAGTACCCCGCCTCTAC -CGTTCAAATTCTTGGTGCTGAGAAGGCTCTCTTCCGTGCCTTGAAGACCAAGGGAAACAC -CCCCAAGTACGGTCTGCTGTACCACTCTTCCTTCATCGGCCGCGCTGGCCCCAAGAACAA -GGGCAGAATCTCCCGTTTCCTTGCCAACAAGTGCTCCATTGCTTCCCGCATTGATAACTT -CTCCGAGGCCCCTAACACCAAGTTCGGTGAGGCCCTGAAGAGCCAGGTTGAGGAGCGCTT -GGAATTCTACACCTCCGGCGCTGCCCCTACCAAGAATGAGGTCGCCATGGTACGTTGAGC -TCTACTTATCCTATTAAATGGCTATTTATGCTAATAAATTACAGAAAAATGCCATGGACG -CCGTCCTGGCTGACCTTGATGTCGATGCCGACCAGAGCGATGCTGATATGGAGGACGCCG -AAGTTGCCGAGGCTAAGGCGGAGAAGAAGGAGAAGAAGGAGAAGAAAGAAAAGAAGGAGA -AGAAGGAAAAGAAAGAGAAGAGAAAGTCCGTTGGTGGTGAGGGCGAGTCAGACAAGAAGA -AGAAGCGCAAGCACGACACGGATGCCGAGCCCTCCAAGAAGAAGCAAAAGGCCTAGATAT -GCAATTTGTTTGGACTTGGGAGTCTTTGTCATGGCGTTAGGTTTTGCTTTTGTCTTGATG -ACTTGATTGGTACGGCTTGCAGGTATTTGTAAATTATTACCAGGCCTTTTGCCTCAGCAG -CTTTGACTATCGCTGCTCTGATATTTTTCAATTAAGTGTTTTAAGTTGTTCATGTGGCAC -TATATATGGAGTAAGGATGATATTCAAACCAACCTGTTTTTATTCCTTGCAGAACAGAAT -CAAGCGGAATCCCTTGATGCATGCGCGGCCTAGTCGGTGGCGATCATGTTGTACCCCACG -TCTGATTCAATGCCGAATGTCCCCACGACCAAAACCCCTCTCAGCCCTGACCTCCGCGTT -GTGCAAGCTCTCCAGTATGCATACCTTCGTTTCTTGAGAGCTTGGTGGTGGATACCCTAT -TCACCTGCAAATGAGATCCATCGGGTTGCTCAGTGACGTCCCCACTACGTCCGTTTTCAG -TATGATGCCGATGGGCTTACCTTCATCTTTCTCTCCCAAGCGCAAACGAGAGCCCTCGGA -ATCAGACTTCTATAGCCCCTCAGCCTCACCCACATCAACAACCTCCATTGATAGTTTCCC -GGAGGTTCCACTTCGGGACGAGGACGAGCTGGAAAGCTACAGTCCTCGCGCTGCAGTGGC -TGGACGATTTGGGGAGTTAGCCATCCGCGGGGACCTGTTCTTGGACCCTGGAACTTTGAC -TGGCAACTCTAAGCGAGGATCCCTGGCTCCACCGACACAAGCATGCGAGCCTGACAGCCA -CGAGCTCAAAAACATGCCTGAAGCCCTCCCGGCCACTAGCAGCGAACCCGGTGACCGCCA -AGCTCTGCGATTCGAGCCGCCTACAACACAGAATCCGACCTCCTTGCCGACTTCTTCGCC -CATTTCTCCCTCAAAGAGGAAGTCTGCAACTTTCTCCCGCAAACAGTTGAACCCGTCATC -GCCATCTAAACGCAAGCAACGTCTTTCCCCTCCTCTTGCGGGCACTCCGTCCGAAGAAGA -CCCGCTTGTATGGCATGACTCGGAAATAACAGGCCACAGCCCATCCGACCCAACGGATGA -TGGATACGGAATCAATGGTATAGGCTTCAAGCCCACTGCCTCAATTGCGTGGGAAAGATC -ACAAAAGCGACAAAAGCAAGTGGCAGAATGGAAGAATCGCGAAGCCCGGGAGGCTCGTGA -AAGACGACGCGAAAGGAGGAGGGATGGCGCTGAGTTAGACAAATTGCATGGCATAAATGA -AGGTGCCATTCAGAAGCGTGTGAAATTCAATTTCTAGCGGAGCCGGCATAGCTGTGCAAC -ATGCAGCCACATTTGGATATCTTTATTTCGGCGTTTTTGTCTTGGAAGGGCAGGCATATG -GGCGGTTTTCTTCTTCTTTACTTTTCTCTCGGACAAATGGGAGGGTGCGGCGATGAATAG -GCGGACAATATGAACTTTGCAGAGAGCTATGATACACTGAGATGACTTATGACATGGCGT -CGATCACCATTTGATCCATTGATCAGAATACATATACAGTTTTCGGAACCCCTCATAAAC -TCCATGACTCCTTTTCATGAACCAAGAGCACTACCAGAACTTGTAGTTGGCAGGATTGGT -CCAACTTTTGCTCTTCCTTTCCTGCTCAGCAAGACGCGCGCGCAACGCCGCTTCTTCCTC -CGCCGTATCCTTCTCACGTTGTTTCTTGGCCTTCAATTGATTCATCAGATCGAGCGCTTC -TGCCATGCCACTCAATTCGTTATTTCGTCGCCAGCGGCAGTATTCATATGAGGCTAATGA -AGTGATCGCAAACATGCCAACAGCCCAGTTTCCTGCAGACCATAGATAGTACCGTCCTGC -ATGCACATATTAGACTATGATTGCAGTGTTCCAATGTTAAAGAGTGCAGCTATACCTTTA -ACCAGGCCTCGTAGACCACCAACTCCAAATCCAACACCAATACCAGTCAACAAGGATTCC -CGAGCACACGGTCTTTTGTGAAGGGTGGTCATTTCAGAGACCGGGAATGTTCCAATAATC -TCGGAGTATGAGACATCCTTAGGGTTTTTGCCCCGGGGCTTATAGGTTGCTTTGGCGAGT -GCATTTGCGGGTTCCTCGGGGTTTCCGAAGGCATCCCATAGCTTTCCAGTTTGTGACTGA -GATGGACTGTATTTTGGTTTCTGCGAATCGGCCTGATCATTGGGGATTTGTGTGTTGCTA -CCAGCTTCTGGGAGAGAGCTGGGGTCTCGTGAGTCTTCCGACATTGTTGTGTAGAGTGAA -TTGCTGCTAGACTTCTACTTGCTGTCTTCATCACTATTGAAGAAGATCACCTATATCGGA -GGAATAGATAATCCATAATTTTAAAATGGATAGTTATTCCGGAAGGCTGACTAACATTTT -CAACGCGCGTGGGCGGTACGCGAACGCGTCGAGACTATCGAAATCGATTTGATTGTAACA -GATTATAGGCCATTGCTGGACCCTCCTAGAGGCACAATTAGCCCCAAAACCAGATCATTA -TATTTGCGCCTAACTGAACCTTGTGTTTTTCTTTGCTTGATCTGAAATACACCAAGTGCG -CATTTCATGTTCTACACTGGCCACTGGTCCCGTCCTTTGAAAGAGGTACTTTAACAAGTC -CATCTGGGCGCAGCGTCGCAATGCCTTCACGAAAACCCAGCAAGTATGGAAACAAGTTCA -GGTCTAGTGCTGCATCCTCGAATCCTCGGCGCTCCAAAACGGTCCAATTCAACTCCTTAC -GCTCGACCGAGGCAACCTCCCAAGATGAGAAATTCGAGGCCATTCGGTTGGCCAATACCA -TCGACGAGACTCTAGGCTTCCCGCGATTCGAGTCCGGCGAGACAAGGGCCGGCTGGCTGG -TCAACATGCACAGCACATTGATTGAGGACCCGGACGTACCTGGCGGTCGTGCCGGCGTGG -ATTATTACTTTCTGCAGGACGACGGAGGAAGTTTCAAGGCTACCGTGGAATACGATCCTT -ATTTCTTGATTTCGGTCAAGAAAGGCTATGAGATGGAGGTAGAAGAGTGGTGTCGCCGAA -CCTTTGAGGGCATGATAAAAAAATTCAAGCGACTTGAGAAAGAAGATCTCAACCTCCCAA -ATCACCTCCTCGGCTACCGGAAGACCTACATCAAGCTCAGCTTCGCCAACGTGAGCAGCC -TTTTAGAGGTTAGGAAGACTCTCCTTCCTCTCGCAGAGAAGAACAAGAAAAATGAGAGTG -CGATGGACGCATATGTTGAGATGACCAGGTTTGTTCTGTATTACATGATTCGATCTGAAT -ATGCGCTAATGCATGGCCCCATAGTGCGACTGCTGGTATCGACCTTTTTGATGACGAAAT -AATTAATGAGCAACGACCTAACGGCAACCTACAAGCGAGTGATTTTATTGTTGATATTCG -AGAATACGACGTGCCATACCATGTCCGGGTCTGCATTGATAAAGGTCAGTATTTGTGTCT -CCTGGCGAGGCCTCATAGCTGACGCATCGCAGACATTCGGATTGGAAAATGGTACAATGT -TCAGGCAGACCATGGCGTAATATCGCTGACTTGCTTGGAGGATCGGCTTCAGCGTGCCGA -TCCTGTCGTTCTCGCTTTTGATATCGAGTGTACGAAGTTACCGTTGAAATTTCCCGACGC -CACGATGGATCAAATTATGATGATTTCATACATGATTGATGGGCAAGGGTTCCTAATTAC -CAACAGAGAGATCGTGTCAGAAGACATTCACGATTTTGAGTACACCCCCAAACCAGAGTA -CAACGGACCTTTCATTATCTTCAATGAACCCAATGAAAGAGCACTCCTCGAGCGATTTTT -CGAGCATGTCAAAATCTCAAAACCTACCGTTATTGCGACATACAACGGTGACTTCTTCGA -TTGGCCGTTTGTGGAAACAAGAGCCAGCATCTTGGGACTCGACATGTATATGGAGATCGG -TTTCAGGAAAAACAGCGAGGACATCTACCAAGCCGACAACTGTGTGCATATGGATTGTTT -CTCCTGGGTCAGCCGTGACAGTTACCTCCCTCAAGGAAGTCGTGGTTTGAAAGCCGTAAC -TGTTGCAAAGCTTGGTTACGACCCGGATGAACTCGACCCAGAGCTTATGACACCGTATGC -GAGTGAAAGGCCGCAGACATTAGCCGAATATTCGGTTTCCGATGCTGTTGCAACCTATTA -CCTGTACATGAAATATGTTCACCCATTCATCTTCTCGCTTTGCACAATTATCCCCCTCAA -TGCTGACGAAACGTTGCGGAAGGGAACTGGAACTTTATGTGAGATGCTTCTCATGGTTCA -AGCCTACCAGCATGAGATTGTCTTGCCCAACAAACACAAGAATCCTCCAGAAGCCTTTTA -TGAAGGTCATCTCCTTGAGTCGGAGACCTACGTTGGCGGCCATGTTGAAAGTATTGAGGC -TGGTGTCTTCCGAAGTGATATCCCTACACATTTCAGCGTTGATCCTACTGCGATTGATGA -ACTTCTCAAGGATCTCGATCACGCCCTCAAGTTCAGTATTGAGGTTGAAGAGAAGAAATC -CATGGACGATGTCGTGAACTATGATGAGGTTAGGGCACAAATTGTGGAGCGTTTGCTTGA -CCTCAAGAACAATCCAGTCCGGAACGAGGTGCCTTACATTTACCACTTGGATGTCGCCTC -CATGTATCCCAACATCATGATCACAAACCGATTGCAACCGGACTCGTTGATTGATGAGTC -AAATTGTGCCGCCTGCGACTTTAACCGACCTGGAAAAACTTGTGATAGACGCATGCCTTG -GGCATGGCGAGGTGAATTCTTACCTGCCAAACGTGATGAATACAACATGATCCGACGTGC -CACAGCAAACGAACGTTTCCCAGGCAGGTACCCAAAGTCCCCAATGAGGTCTTTCGGTGA -ATTGAGCATTGAGGAGCAAGCTGGAGTGGTCAAGAAACGGCTACAGGATTATAGCAAGAA -GATTTATCACAAGATTCACGACAGCAAGACGATGGAGCGGGAGGCTATCATTTGCCAAAG -AGAGAATCCCTTCTATGTCGACACTGTGAGAGACTTCCGTGACCGACGATACGATTTCAA -AGGCAAGCAAAAATCATGGAAGAACAAAGCCGACGGTTTGCAGTCATCGGGCGGCTCAGC -TGCTGAAATCGAGGAGGCAAAGAAGATGATTATTTTGTACGATTCTCTTCAGCTTGCACA -CAAAGTCATTCTCAATAGTTTCTACGGATATGTGATGCGAAAGGGTTCTCGGTGGTATTC -CATGGAAATGGCCGGTGTCACTTGTCTGACAGGCGCTCACATCATTCAGATGGCGAGAGA -ACTTGTTGAACGTATCGGCCGTCCCCTAGAATTAGATACCGATGGTATCTGGTGTATGCT -TCCAGGAGGATTCCCCGATAATTTCTCCTTCACTCTGAAGAACGGGAAGAAGCTGGGTAT -CTCATATCCTTGTGTCATGCTTAACCACCTCGTACACGCAAGGTACACCAACCACCAATA -CCAGGCGCTTGTCGATCCAAAAACCTTCAAGTACGAGACTCACAGCGAGAATTCCATCTT -CTTCGAAGTAGATGGACCGTACAGGGCAATGATTTTGCCAACGTCGAAGGAGGAAGACAA -GAACCTGAAGAAACGATATGCAGTTTTCAATCACGATGGCTCCCTAGCTGAACTCAAGGG -ATTTGAAGTCAAGCGAAGAGGAGAGCTGAAGTTGATCAAGATCTTCCAGACGCAGCTTTT -TAAATTCTTCCTCGAGGGCTCTAACCTTGCCGAGACCTACGCCGCTGTGGCCCGAGTTGC -AGACCGGTGGTTGGACGTTCTATACGAACATGGAACAACTTTGGCTGATGAAGAGTTAGT -CGATCTCATTTCGGAAAATCGAAGCATGTCAAAAACCCTGGAAGAGTACGGTTCTCAAAA -GTCCACATCCATCACGACAGCAAAGCGCCTGGCTGAGTTCTTGGGCGAACAGATGATCAA -GGACAAAGGCCTCAACTGCAAATACATCATTTCATCACGCCCACGGAACACACCTGTCAC -TGAACGAGCCATTCCTGTGGCTATATTTTCAGCTGAAGAGAACATCAAGCGATTCTTCTT -GCGCAAATGGCTAAAAGAAGATCCCGGCGATATGGATCCCCGAACCGTGATCGATTGGGA -CTATTACCTAGAGCGTTTGGGGTCTGTGATCCAGAAGTTGATTACCATTCCAGCTGCGCT -TCAGAAGCTTCCAAACCCAGTGCCCCGGGTTGCCCACCCCGATTGGTTGCAACGTCGGAT -CAACACGAAGGAAGACAGGTTCAAGCAAAAGAAGATGACCGATCTATTCACCAAATCCGA -AAAGACGCCCCTTTCGTCGACAAATGCCAATATCCTAGATCATCGGGTTCAGCACGCTGG -CGACCTAGATGAAGCAATCGCAGGTTCAACTCAAAAATCAAAATCGCCTAACAACAAGGC -TTCCCAGAAGAGAAAGCATCCAGAAAATGCTCCAAAGACAGCTCTTGATCCCTTTGCTAG -CCTGCCAGCAGTTATGCCATCTATGTCAGAGAACTACGAAGGGTTCCTTAAGTATCAGAA -GCAAAAATGGAAGATCCAGAAGCAAGCACGACTTCGCCGTCGTCAACTGTTTGGCGAACG -AACCAACGTTGCCTCAGATTCCTTGAGTAACTTCTTCCGCAATCAAGCTCAAATGCTGTA -TATCAGCACATGGCAGGTCCTGCAGCTTTGCGAGACGGGCGTTCCGGGAGTTGTGCGAGC -TTTTGTACTGATTGACCAAAAGATCCATGCCCTCAGCGTTAGAGTTCCACGGCAATTTTT -CTTGAACTTGAAACGGGACTCTTTGCCGGACGTTGACGTGCCAGACTGTGAAGTCGAGAA -GGTCAACCATACCCTACCCAACGGCCATCCCTCGGTTCACTTGTTCAAGCTGTCTTTGTC -GGAAGAGAAATATCTAGAAGAAGCCGACAAGATGGACATTCTTTTCCAACACCCTAGCGT -TGAGGGCGTTTACGAAGGAAACATCCCACTCAGCACTCGAGCGGTGCTCAAGCTAGGAAG -CCATTGTACCTTCGATGAAGAGCAACGTGGTGTTCTGGGTGACGGCTTGGACAAGGGCTT -CGACCTTTCAACATTACTTCACACATCTTCTGAGCAGCCCTATCTGATGGACTCGTCATT -GGTGTTCCACTATTTATACCACATTGTCTCTGGAGATAGACAAATCTTCGCAATATTCTC -GACAGCCAAGAATGAGGCGCATATCATTGTTCTCAACCGCACAAGAGATGTCCAGGGTCT -TCCCAACGTCGATAAAATCTACACCGACCTCCTCGCACGTAAGATGCAAGCCGCATCGGG -TGACGAGTCGCAGCATGCGTTCGAATACCAGGACAAGATTCATTTCAAAACTACCCAAGT -GATGACGCGAAGGAAGGCTCACCTTGAGGTTGGTGATTTGATCAGAAAGATTCGAAATGA -TGAGAGTCAGCCAGCCGTTCTTGTCATTCAATCCCAACAAAAAGACCGCCTGTGTCACGA -CATTCCGATCTTGAAGGAGTACCCAGTATTATCCGTGAAGCCCGAAGCATCGGACATGGA -ACTTCCCCCACTAGGCTGGCAGGCATTCATTGCAAGACGTCTAGTCACACGCTATCTATA -CCTGTCTTCTTGGATTCAGCACTTGACCATGCTCGCTCGGTTCGGTGATGTTCCGCTTTG -CAATCTAGAAAGTGAAGATCCTCGGTACCTTATTGATATTTCGTACGCTAGGCGACTTCA -ACAAAACAACGTTGTGCTCTGGTGGTCATCCGGGCCACGTCCTGATCACGCGGGCTATGA -AAGGGATGACATTCTAGGTGCCTTGGACAAGGTCAACATGCCATCTGTCAATGTACCAGG -TGCCTACTCCACAGTGTGCATTGAACTCGAGGTTCGTAATTTGTCGATCAACACAATTCT -GACATCTTCTATCATCAATGAAATGGAAGGCAATGATACGCTCCTGGCTTCTGGGGATGG -GGAAGATACCGGTGTTTTCTATTCAGAGAAAGCATTTGCCTCTGCCGGTGTTGTCGTTCT -CCGAGAGATGGTAAAACACTGGTGGACAGAAGCCTGTTCAGGGAACAATATGGCCGATAT -CATGGTCCAGCATCTGATCCGATGGGTGGAAAGCCCTGTCTCCTGTCTGTATGACCGGTC -TCTGCACAATTACGTGCGACTACTGTCACGGAAGTCATTCCAGAGATTGATGGCAGAGTT -TAGACGTGTTGGCTCGCACGTCATCTTTGCCAGCCCTACACGCCTCCTTCTCCAAACGAC -CAAGACAGAGGTTGGTAACGCCTACGCCTACAGTCAGTATGTTCTCAAGTCAATTCGCGC -CAATCCTTCATTCCACTTTATCGATCTAGAGATCAAGGAGTACTGGGACTATCTCGTTTG -GTATGATGAATACAACTACGGAGGCAAGGGTTGTCGAGAGGTCACCGAGGCCGACGAACA -GGATCTTGAAACAGCGATGCACTGGCAGCTCAGCCGGTTCCTGCCAATGCCCATGCAGAC -TATCTTCCATGATTGGATTGTTGAGTATATTGAGCTGATGCACGGCTTAAAACGGCCAGA -ACTCCAAGACGGGGATCTGTCTTCAACGCCTCGGCCAACACAAATTCCCATCGGACGGAC -CAACGAAGAAGACGACGACGAAATCACTGCCATTCTCTCGGACCGATTCTCAAAGCCATT -GAAGAAGCAAATCTCGGGATTGATCCGCAGACAACGCGACGAGATGCTTCATCCTGAGCT -TGCATCTGACTATGCCTTCCCCGTTTTGCCAGGAGTCCTCGTCGGCCCCAAGGACAATCG -CAACCCATCTCTGGAGTTGACGAAACTGCTGATGCAGGTTCTGAGTCTGTCCAAGACAAC -AACTCTGGAATCCCGGTTGCTGCGGCGCGAACTTCTTGCCCTTTTCGAGGTCCGAGAATT -CAGCAAGGAGGGCCGATTCGAGAATCCTAGTGCTAGCTTGAAGCTCCCGGAGATGTCCTG -CAACTCCTGCTGTCTTATTCGAGATCTGGATCTTTGCCGTGATGAAGATGTGCTGCCTGA -TCCCAGCTTAGAAGCCAGCAAATCTACCCAGCCCTGGCGATGCCCCTTCTGTCACGCCGA -GTACGACCGATTGGCGACCGAAGAATTCCTCATTGGTCAAGTCCATGGCTTTGTTGTTGA -ATGGCAGACACAGGATATCAAGTGCTCAAAGTGCAGTGGTCTTAAGATCAGTGATTTCAT -GGAGCACTGTTCTTGCAGTGGTGCATGGACCGAAACAGTTGATCGCAAGGAGATCGAGAA -GAAACTACAAGTGTTAGAAAGCGTGGCCAAATTCCACAAGCTGCAACTACTGGAGCACGT -TGTCGAGGAAGTGCTGGGGCGATTTTGAATGTACAATTCTTTGATTTTTTCTTGATTTAT -GCCCTGTTTTTCCTTTGTTTTGCATTGGGCTGGCGTTGAGTGAAATGTACATAGCTTAAA -TAACCAGGGCAAAGGAATGCCTCCAATAATCGCGCCGTCAATATTTTTTTTATATCCTGG -CTTTGAATTCCAATATTTTGCAAGATTCTATTTTCATAATCTTGGAGTTGAGCTGTTTCA -TCGGATGTTTCAGCTCTTCCCAATAACCCCCCGCCAACGATTCCCCAGGGATCTTCAACT -TCGAACCCCCGCCCTCGAACGTCAGACCGTTGCGCATACAATATACCCATATAATCATCC -CTTGTCGCCCAACTGTAAGTATTCTCCGTCGTCATACCTTGCGTTCTCGCGCTCGGGTCC -TGTGCCACTCAAGGCTGATTTTCGACTGCGTAGGTACATAAGGAACAGGCGTATATACCC -TCGATTCGAACCCCTCTAGTACCACGCTCAATCTTTCACACGCGCCTTGAGTCACATAAC -TCCTCCACCCCCCACGTTCTGCGTTGCGCATCCCGCACTTGGTTCTATTTGAAATGGCGG -CACAACCCCCGAGCGGCCAGAAGCCGAAGGTTCAGCCGTGCCGATACAAGACCGGAAAGA -CACTAGGCGCTGGTTCATACTCTGTTGTAAAGGAATGTGTGCACATTGATACCGGTCGCT -ACTATGCGGCGAAGGTCATCAATAAGCGTCTCATGTCCGGGCGGGAGCATATGGTGTGTG -CCGGGAGATCTACAGCTGCGAGACAAGAGCATGACTGACGCCTGAACTTTACAGGTCCGG -AATGAGATCGCGGTATTGAAGAAGGTGTCCCTGGGCCACCAGAATATCCTCACGCTGGTC -GACTACTTTGAGACTATGAACAACTGTGTGTTTCATCCCTCTATAATAATTACAATCGCA -CCCCCCCTGACAGTAATGCTGTCTAGTATATCTTGTCACCGATCTCGCCCTAGGCGGGGA -ACTCTTTGACCGCATCTGTCGCAAAGGCAGCTACTATGAATCCGACGCTGCAGACCTTAT -GCGCGCAGTGCTTTCTGCCGTGGCCTATCTGCATGATCATGGGATCGTGCACCGTGATCT -AAAGCCCGAGAACCTACTTTTCCGCACACCAGAAGACAACGCCGATCTGTTGATTGCCGA -TTTCGGATTATCGAGGATCATGGACGAGGAGCAGTTCCACGTTTTGACCACGACCTGCGG -AACACCGGGGTACATGGCACCAGAGATCTTCAAAAAGAGCGGTCATGGCAAGCCAGTGTA -AGTAGCCCCTCAATGCAATTGCTTATCGTTTACTAACAACCCACCCCTATCTAGCGATAT -CTGGGCCATCGGTGTGATTACCTACTTCCTGCTATGCGGATACACTCCCTTCGACCGGGA -CTCCAATCTCGAAGAGATGCAAGCAATTCTAGCAGCAGACTACTCGTTCACGCCGGTCGA -GTACTGGCGCGGTGTATCACAGGAAGCGCGCAACTTCATCAAGAGCTGTCTAACAGTCAA -CCCACAGTCACGGATGACAGCCCACGAAGCTCTCCAGCACCCATGGGTCAACCTACCCTA -CGACAGCGGCAAGGTGGGCTCGGGTGAAGATCTGCTGCCAACGGTCAAGAAGAACTTCAA -TGCGCGCCGCACGCTGCACCGTGCTATCGACACTGTGCGTGCTATTAACAAGCTCCGCGA -GGGCGGTGGCTTCATGATGGACGGCGTTATGAGCGTTGATCCCAAACCTGAGCGTGTCAA -TGGTGGTGAGATCATTGAGGAGCAGCATCATGCTCCTCCGCCTGCGTCGGCCAATGCTGG -TGGGAGTCAGATGCAGATTGATAGTCGTGGTAATGCGCGTGGGCAGACTGAGGAGCAGAT -CCGGGCCCAGGAGCGGAGAGTTAAGGAAATGGTTGCTGGGTTGTGGAGTCGTACTCCTCG -CTAGAATGCATGAATATGTGTCCTTTGCTTTTGTCTATGCCGTGTTACGGTTTCGTCCGT -GGGGAGGTTCGAATTCAGATTCGGATTTGGGAGTACCACTGGCTGCTCCTCTCTAGTCGC -GGGCCTTTCCTTTACTTGGCTCTTTTTCAATGTCTATGTATACCGCAACAATATTATATA -CAACCTTGGAATGAATGATGTCAGTATTTATAATCAATAATTATCCCAAGGTCAGATGAG -TCTCGATTTAAAAATTCTGCTTCACTATCTGATAGAGCTGATGCAATATCAAATTCTCAT -CATGACCGTACACGGTCTAGCCATTGTCGCCCCTCATTAATGCCCATACCGAAACGTTGT -TAATCAAATATCAAGGAAAGAAGCAGTATCATCATGGTCATTAAAATCATCAGTCCCACT -TGAATTGGCGAGATGTGGGACGGAAAGCAAAGCGATCTAAAAGTGAATGGCAATCAATCC -GCGCTTTCCAGGATCTCCTCCCCATCATCTAGAGAGAGAACCCACTCTCGCCATTCCTGC -CACTCTGCGTAATCAGCGCTTGCCTTGAGGCCCCGATACCTGACAGTACGGAAATCTGAG -ACTTGGTCCCAAGGTTTAGTGGCACTGGAGTTCCCACCAGCTGCACCGCGTAGCAGTCTC -CGTGCAGTACTGCCACCACCATCCCAGCCTGCCTTACGCATCAAGCTCTCCACTGTCTCT -TCTTTGGTCCAGCCCTGCTCGACAGCAACGTCGGGGAGATATGTAGCGCCATAACGACGA -CCACGGTGGATGAAGGAGATCCGCAGCCCGTGAGTACCCAGAGACCAGTCCATGGCATTT -GTACACGGCTCGAAGGTCCCAAGCAGTGTGAGAGAGCATGATAGTGAAGGGATGAGGGAT -TTAGGAATGGGTGCAAAACGGGTGTCGTCAAATGCCCTAAAGTAAATGTGTGAGTTAATC -TGATATGTAGAAGGTAGGAATATACCACTGTTTGTATTGTATCGAGAGAGGGGTATTCAA -TAGGAACCACGTACGAGGTCAAAGCGTAGGACTTCAACCCCGCTGCTAGTTCCTGCGCCT -CGAATGTGCCTATGCACCCGCGTAAAGACTTGTGTCCGCTTCGTGACACGGTATTCCATG -TTACGAACAGAGGATATCGCTGATCAGACGATCTCGGGTGGGAGGTCTCGGAAACGGCAC -TTGCCGTGGTGACATTGGTTGAGCTCGATTGTAGCGAATTAGAAGAAGGTGTCGATGCGG -AGGATGAAGATGTCGCTGCACTGGAAGAGGCGGAATTTTGGCTCTGAAGACGGCTGATGC -TGCGGAGTTGCAGCTGCTTAGGCTGACTACTCGGCTGGGGCGATTGGATGCTATCGTCAT -CTTCGTCCTGCGCATTGACGGATTTTCGGTCTGTATCATCTTCTGCTTCTGTTTGTTCTT -TAAGGCTCGACAACTTCTTAGTCTGCTCGTGCTGTTCCCATAAGGCCTCAACGGCTGCCA -GAGTGATGGGCTCGCGGTTCTCGAATGAAGCTAGGAGAGACTCAAAACAATAGTAGCATT -GAGCGGTGTTCGCCATATTGGGAGACTGTCTATCCTTTGATCACCAGTGTGTTCTTAAAG -ATGCACTGCACTGCTTCAGATTCGATCTTAGATTGGTGTATGCTGGAGGTGTGGTTCTCC -GCAAGAGTCCGGGGCGATAAGCACTGCCTTATCGATGGGCCTAGAACTCCAGCCAAATTT -GAAGCAGGAGAGCGTCAAGCAGTGCAGTAACGAGGACTAAAGGGCATATAGAACATAAGA -GGAAACAATGGGAGAAACAGAAATATAGAACGACGGAAGAGTTATCCGTTCACGTGGGCC -GACGGCAGTCTACCGCCAGTGCGTCAGCTCAGATTCTGCCTGCAGTATATAATGAGATTC -GTAGATAATATGAGCATTGCAAGTATACATCTAATGTACTACCTAGGTAATCATGAACAA -TGATTCCTAGCTTACTTGTGATCGTTTCTAAAGTCCTTTATCGTGGAAGTCTCCATTTAC -CTCCTTCGCCTCGCTTGAGAGGACCCTAATCCATAGAGGTCATTATCTCACCAACTCCCT -ACCTAGGTACAATTACAAAATCTGTCATGTGAATGTTTCCAATAAGACGAATCGATGTCA -TCGGCGCTGTTACATGAACTACAGAGTATCGTCTGTGGATTTCTCGTAGGTTTGGGGGGA -CCAAGGTGCGTTGGATGCCCGCTGCAATAGTGTCATAAGCCTGTCATTGACCTGATGCAA -CAATGATTGAAGATTGAGAATGAAGGCCGGAGTCTTATCTCGCGGACGGATCTCTATCAA -GTTATGTCAAATACAAATTATCCCAATGAGGAAATACAGCTTTATTTCCGAGCCTCTTCT -GTTACCTTTCACTTCCATCGATTATTTTTTCTGTCTCTACTCATCAATTTCAATATGGCA -ACCACTGTTCTCGAAGCACCAAGATACACGCAGTTAAACGTGCGGGTTTACCAGCACAAC -GGCTCTCTTCTTGCCGGTAAGTAACGACAGCTTTGCTAGAAGCCTGGGCAATAACTAATG -CAGCCTCGTTCTAGCTCTGGAAGTCCCAATTGACCCACGAGATAGATATGTTTTCGCTGA -GATGCTATACAGATATTGTTCTTCCATATTCATCTTTCCTGACGAAGACAAGTGGGCTAT -TTTTAAATTGCGGGCAAACACCACTCCAGGCGCTACACTCCGACCTAGTGGTCGAAGCTT -GGTAAATCCCGGAAACTACATGGTTCTGGAGAAAAGTAGGTAGGATATGTCTAGCATAAC -TCGGGCTCCCAATGCTGATGATTGTATAGATCGGAACCCCACCACGGTTGACTTGACAAC -TAACTGTGCGCCACGCCGTGTCAGAACCCTTGACACCCATACCCAATCCCAGGGCCAGAG -TGAACGGGACCCTCTGGTAGAGTCAACATTGGAATCATGAGAAGCATAAGGACGACATGG -CTAATTATTTCTAATTTCTAAAGCAAGCCACATTTCGCAACACCCTCCGCCAAAGAGACA -ATTGCTGTGTAATTACCGGGCAGACTAGGGCAACCACCTATGAGCGACCTTTCCTCGGTC -TCGATGCTACGCATGTATTCCCTATCAGCATGCTAAATTAATGGAGAAAAGGTGGATATC -GGCAATATATTATCGACACTCGCCCCGATAGCGACATTGGAGAGTCTCGTCTCTATTCTG -CCCAGAATGAGCTTCTCCTCAGCGCAGACATTCACGCTCAATATGATGAGTTTCAACTAG -GAATTGATCCTGACGTGAGAACCCACCCCCACCTCAATATTGATATGTGTTGGTTCCTGA -ATAAATACTGAATGCTGTATACATAGTCTGGTTACAAGATTATCGTATTCGGAGCGGACC -CTGCAAAAATGGGCCGTACTCGTCTCAGAAGCTCTGCCCAAAACGGGACCGACGGTGTGT -GTCCCGAGCTCCTTCGTTGGCACCTACGTATGTGCCTGTACAAAAGTCTGAAAGCCAATG -TCGAACCACAGAAAATATGGGAAGAAGATCTTGGAGAAAATCCTATGGGTAGCACTCTGG -AGCAGTCAGATGCGGCTGAACGAATGGAAGTTGAGCTTTTCACGCGCCTGGGAAGTTTTG -TGGCATAATACGTTCAGAAAAGCAGACCATTGTTAGATTATTTATTATCAAAGTAAGATT -CTTAACAAATGTTGAATCTCCACGGGGATGTTTCATTTATAGAGATGGACTTCCACTGCT -ATTCACATACAAAGTCCCTGACTATACATAGACAGGTCGTCTCTTCCACTTCTCTGCCAT -CTGGCTATAAGACTGAGAAGGGGACAAAATATCTAAAGGAATAGGCTATCAAAAAGACAT -AATTACGCCGCATAAGCGGCTGACTGTGCTTATTCCCAATAAGTAAACGTAGACAAGCCA -ATAATAATGCACCGCGTTACATCACGCTAGCTATAGGGCCAGAGCCAGGTAAAACTAGTC -ATTAAAATGCAACCCACAGGGATGTATATGTGATGCAGTCTTAGATAGAATCGTATTTGG -GCACATAGGCAAAAGATAAGAAGAAAATAGTCCTTGGATTCTATAGATCCTTGTGTTGAG -AATAGATTGTAGAATGTACATAGGACGCCCAATTAATGTTGAACTTTTCCCCTTTAGAAA -AGATGTGAGCTCGCTGTTGAATAAATGCGAGGCCCAACCGCCAAAAATCACAATTGCTCG -TTTCACATGCAAAAAAGCATTCCCATATCCCAATGACCCGCGTCAGGTTGAAAAAGCAGT -CCGGTAATTAACCCCTAAAAACGCACCCATGTATTTTTCTTTTGTTATACGAAATGATGT -TGAACTCGTGGGATCCTGATTCAGATCCCTCTTTATTTTTTTTCCGTAACTCATACTTGA -TCCTCAATGGCCCATTTAAACGGGGAGAGGGAAGGCGCTGGCCCACTGAGCAATCTCCTT -GCGGAGAGCAAGGATCTCAGGAACGCTATCGTCAGAAACCTTGGCCTTGAAGTCCTTGAG -CTTGTTGGCCTCCTTGGGAAGCTCACTCTGGATCTTCTTGCAGAGGGCGACGGACTGGTC -AATGTAGCGGGCAATGCGCTTGAAGTCCTCAACACCCATACCACGAGAAGACATAGCGGG -GGCACCAATGCGGATACCGCAAGGAGTGAGGGCCGACTTGTCACCAGGGATGGAGTTCTT -GTTGCAGGCAATGTTGATCTGCTCCAAAACGGCCTCAACACGAGCACCATCAAGGCTGTG -GGCGCGAAGGTCAACGAGGACCATGTGGCTGTCGGTGCCGTCAGAGACGAGCTTGTGGCC -AAGAGCCTTGAACTCCTCCTCGAGAGCCTTGGCGTTCTTGATGACCTGCTCCTGGTACTG -CTTGAACTCAGGGGTGTCAACCTGCTTGAGAGCAACAGCGAGAGCAGTGATGGTGTGGTT -GTGGGGACCACCCTGGTGGCCAGGGAAAACAGAGAAGTTGATAGGGTTCTCCAGGTCGTA -GAGGATATCCTTGCCGGCCTTATCGGTGCTGCGGACACCCTTGCGGAAGAAGATCATGGC -GCCACGGGGGCCACGGAGAGACTTGTGAGTGGTAGTGGTAACCACATCAGCGTACTCGAA -GGGAGAGGGAATGACACCAGCGGCGATGAGTCCAGAAATGTGGGCCATATCAACGATGAG -GTAGGCGCCGACCTTATCGGCAATCTCGCGCATGCGCTTGTAGTCGATCAGACGGCAGTA -GGCAGAGGTACCGGCAACCAGGCACTTGGGACGGTACATCTCGGCGTTACGCTCGAGAGT -ATCATAGTCAATGATACCGGTCTCGAGGTTGACGCGGTAAGGGAAAGTCTCAAAGTAGGT -GGAAACGGCAGAGATCTTGCGAGAGGGGGTCTGGTAGCCGTGGCTCAAGTGACCACCGTG -GGGAAGATCCAAGCCCATCAAGCGGTCGTGAGGACGCATGAGAGCCTGGTAGACCTGGAG -GTTGGCAGGGCTGCCAGAAAGGCACTGGACGTTGACGCCCCACTTAGCGGGGTCCAGGTT -GAACGCCTTGAGAGCACGCTGCTGGCAGGTAATCTCAATGGCGTCAATGTGCTGGTTGCC -ACCGTAGTAACGAGCACCGGGGTAACCCTCGGAGTACTTGTTGCACATGGGGGAGCCAAG -GGCATCGAAGACAGCACGGGAAGTGAAGTTCTCCGAGGCAATCAGAACGATTGACTCACG -CTGACGCTGGATTTCCTTCTCCTGTACGAGTAGAAGAATTAGCAAAGCTGGACTTTCAAT -GGGTATTGCATTCAATTGCAGGTCTCAATGATGTATAGTTTCGACTTACCATGATGGCAG -CAATCTCCGGGTCGCTGGACACGAGGGAGTTGTGCATTTGCTGTAGATGAGTCAATAATC -TATATGCTGTAAATAAGGGCAGACATGATACTTACGTCCCGGTGGGACTCGGAAAGAGCG -TAAGACATGATGGAAGATGGTAATCTGTCTGGAATAAAACAGAGAAGAGTTGATGAACAG -GAAGGAAGGGAGGTGAAGAGGGAAAAAAGAAATGGAAGTGGTGGGGGAGGGGGATCTGCT -ATTTGCCTCAGGCCGCTGAATTTTAAGCCAACGATATCGGAGACAGCTGACCAAATTAAC -TTTTTAGGTTATAAAATTCGTAACTTGGCAGAAATTGGACAGGGATTGGCTAAAAGTTTT -AAAACGAGATATATCCAATGGAATTGGTTCTTTTGAATCCATTGAACGCTTCGAGAGCGA -CAAGATGGTATCAATTGCTATTTGCCACACCGCTCATGAGATGGCGCCTTTGCTGTTTTG -AAATCGCTAAATAAACATCTGGGTATTTCCTTAAAAAACGCCAAATAAATCAATCTGATA -ATCTTGCATCTATCTCAATTGAGAATTCCCAAGAGTTCAAGCTGCATGTTTCATGCTTCC -ACGGTCCTCGTATTGTCCTCACGTAGCATTCCTGCTCGGTCCCCAAAGTCAGAGATGACA -GAGCGCAAACGTCTCACACGGTCCTCTAGCTCTGTAATCTCTCTTCGTTGGTCATCAACA -GACCGATGCACATCTGGTAGCCCTTCGACGACTGTACGAGCCTTCTGGATTCGAATCCTG -ACAGAGCTGGTTTCGGTAGGGAGGGTTTTCACATCGAGCGACGGACGAGCGCTTGAATCA -AGGACCGATACTCCTGATACAGCATGGGAAGATGTATTGTTGCCATTGTGCCCACCATTA -GGGATGATGGAAGGTGCTTGTTGGGCTTGTGCTTGGGAAGCTCCTGGGTCCCCTGTTGTA -TCTCCGGGTACGTCCGATGAACCAGGTTGCTGACCTTTGGAGAGTAAGCGAGAAAGAATC -CCATGCAATGATGGAATTATCTCAAAGGTCTGTGGTGGAGGGAAAGGTACTGTTTGCGGT -GTTGGTGTGGACGCATCTTTAAAGGGTGTCTCGGGCATAGGGGATTTCAGGAGCGGGGTG -CCCACTGGCGGAGATCGAGAGGCCATTATCGGAAATTGTGATTTAGGGGGTCCCGAGGAA -AGATATCTGAAGGACGAGTAGATCCCGGCTGGGGTAAATTTTTAGGGAAGAGTCGGAAGA -CATACCCCGTAGACCTGCCTCCGGAGCCCGCAATACATGCTTACATCAGCAAATACGCTC -CTGGCACCCGGTGACGTCGGGGCAAATGCCCCTCCTTCACCGGCGGCCTTTTTCTCTGCG -TTAAGTGGTCTGATTCTCCTTTCGATATCTTCTCCTCTAAACAACTATCTAAAGGTAAGT -AGCTCTATTTCTTAAATATCTGTTGGCTCAGCAATTGATCAGTGTCCGTATAACTCTAGT -GATTCAATCACTATAACGAACCCCACTATGGCTTCGGTCGACCAAAAAGTTGCTTTGAGT -ATGATTGACCACCACCCGGGTCCTCAAAAGATGTGCAGGCTAATATAAGCCTTATAGTTG -TGATCGATGGCTGGGGTATTGCAGGCCCCAACTCCCCACCCCAGGGAGATGCCATCGCCG -CGGCCGATACCCCACACATGTCCGGCTTCGCCGAGCCTAACTCAAAGACCGCCCAGGGCT -TCGCCGAGTTGGATGCCTCCTCGCTCGCTGTTGGTCTGCCCGAGGGTCTGATGGGCAACA -GTGAGGTTGGTCACCTGAACATTGGTGCTGGCCGTGTTGTCTGGCAGGATAGTGTTCGCA -TTGACCAGACCCTTAAGAAGGGCGAGATGGGCAAGGTACCCAGTATTCTTAAGGCTTTCA -CCCGTGCAAAGGAGGGCAATGGACGTCTGCACCTTCTGGGCCTAGTCTCGGATGGTGGTG -TTCACTCCAACATCACTCACTTGTTCGCCTTGCTCGATGTCGCCAAGGACATGCAGATTC -CCGAGGTCTTCATTCACTTCTTCGCCGATGGACGTGATACCGACCCTAAGAGCTCTGTGA -AATACATGCAGCAGCTGCTTGAACGAACCCAGGAGATCGGCACCGGCGTGGTTGCCACTG -TCGTTGGCCGTTACTACATCATGGACCGCGACAAACGCTGGGAGCGTGTTGAGGTTGGCC -TCAAGGGTCTGGTCACTGGCGAGGGCGAGGACAGCTCGGACCCGCTACAGACCATTCGGG -AGCGCTACGAGAAGGGTGAGAACGATGAGTTCCTGAAGCCTATCATTGTTGGCGGCAAGG -AGCGCCGTGTCCAGGGTACGTGTGGTTCGAGCTCAAAGGGCAAAGTCACGACAGCTAATT -GAGCTTATACAGATGATGATACCCTCTTCTTCTTCAACTACCGTTCGGACCGTGTCCGTG -AAATCACTCAGCTGCTGGGCGACCATGACCGTTCCCCGCGTCCCGACTTCCCCTACCCCA -AGAACATCGACCTCACCACCATGACTCAGTACAAGACTGACTACAAATTTAATGTTGCTT -TCCCTCCTCAACACATGGGTAACGTGTTGGCCGAATGGCTCGCCAAGAAGAACCTCCAGC -AGTGCCACATCGCTGAGACTGAGAAGTACGCTCACGTCACTTTCTTCTTTAACGGTGGTA -TCGAGAAGGATTTCCCCGGCGAAGTGCGCGATATGATTCCTTCGCCCCGAGTTGCTACCT -ACGACCTTGACCCAAAGATGAGTGCCGCTGCTGTCGGCTCCAAGGTGGCTGAGCGTCTTG -GTGAGCACAAGTTCGACTTTGTCATGAACAACTTCGCCCCTCCCGACATGGTTGGCCACA -CTGGAGTGTACGAGGCCGCCATCCAGGGTGTTGCTGCCACTGACAAGGCCATTGGTGAGA -TCTATGAGGCTTGCAAGAAGAACAACTACATTCTGTTCATCACCGCCGATCACGGGTAAG -TTGTTCTACGAAACAGGAACACCTCCCATCAGGAGAATTACTGGTTAACCCACATTCACA -GAAACGCCGAGGAGATGCTCAACGAGAAGGGCACCCCCAAGACCTCCCACACCCTTAACA -AGGTTCCCTTCGTTATGGCCAACGCCCCTGCGGGCTGGAGCCTCAAGAAGGGTGGTGTGT -TGGGTGACGTCGCGCCCACTGTTCTCGCTGCCATGGGTGTCGAGCAGCCTGAGGAGATGA -CTGGAGCCAACCTGCTGGTCAAGTCCTAGATAATTTTACCCATACATAAATGTTTGATTA -TGAATCAATGAATAGAATCTTGTCAATATACACATAATCTTCCTTTTGATTATATTGTGT -CAGAAGTCGGTTATCCTCTGGCTTTTTTGGGTGTGCTTCTAAATGGTCTTGTCAAGTGCA -TTTATAGGCGTTCTTTTAGTTAGGTAGACATCGAATGGAGTTTTCAAATCTTCTAGTCGA -GTTTATCAACAAAGGACCGAAGGCTTACTGCTGAGCAAGACCGGAGCAGTAATCTATGTA -TATACAGATAGGTAGACCTCATCATGGGACTACATCAATTGACTTCGGGTCGCTGGAATG -ATCGATCCTATTGAATAATCTTCACATTGGAAGACAAATTAATCAATTTTGATAGCATTG -TATTAAAATTTGGAGGGTATATCTTTAAGCTGATCTATGCGCTGTTCTGGTGTCGAGCAC -CTGCACAGATCATACATTTACAAGGTATCCCAAGTCAATTGGAATCTGGTCCTCAATGCC -CAAGCATTCCTTGAGATATCATACCTCCAATATAGGGTTCATAGTTGCATAGTGCATGTA -CTGATTCACAGTGAACCGCTTGAGTGTTGGTCCCATCCGGCGCCAGCCTCGTAGCTCACG -CCACATCTTGCATATCCGGGTGATATATCCTTCGCCTGCGAGTTCTTGGAGTCCTTGGCT -TATGCGGAGGCCGACGCAGCAGCAGCCTGCTGCTGAGCCTGGCTTTGCAGTTGGATAAGC -TGGGTGAGGGGTTAGAATGTGCCTATGGTGGGGGACTGATTGCAGTTGGATGAGGGCGGT -TTACCTCTGAGCGCATCTTCTCGCTTTTTTCTTGCGTCGTATTAATCTCGCCTTCAATTC -GCTTGCTGTATATTCTCAGTCAGCATCTCTTTTCGTTGGTTTACAATGGTCAAGGCAGTG -GCACTGACATCTCCTTCTCAATGAACTCCAAACGACTGTTCACGGCCATGGTGGCTTCGG -TCTTATCCTGCTTCAACAGCACCGGGCCAACAAGCTTATAGATGTTGGAGTCATCGTCGA -GCTGGGCGAACTCGCTCTGCACGCCTTCGTTCTCTTGTTGCTGCGATTCGAGTTTCTGGC -GAGCGTCGACCAGAACATCAAGCTCTGGATGCGGAGAAAAAGGTTAGTAGCTATTGCTTT -CATCAATGGGATATATACCCATACCGGTCTGCAGTCCCTGGAACTCTTCCGAGAGTGCTT -GGAGCTGTTTCTGGGCGTCCATGGTGAACTTGGAGGTGAAGTGGGAGGAGGGAAAATCAA -TGGCTCCCGCCCGTAAACAACTTCCACTCTCACTTTCCCCTCCAATTCCCATCAACCCTC -GGTTCTAGACCTGTTTTGCCCTTCTTCTTTTCTTCGACTCCAAAAAAGCTTCACTTTTTC -TTTTCTAGCAGGTCGCTATCTATCCCTCCAGCCGTGGCCCACTACCTGACCCTTGCTCCC -CTCTCCCCGCTTTATCACCTCTGCCTCCGCGGTCGACTCGTTACCTCTACCCCAGACACC -ATACGCATTTTCCATCATGAGCAATACAGATTTCTTGGGGCGGGCGATCGATACGGTCAA -GAAGGCGATCGAGAATGACAATGATGGCGAGTATGAAAAGGCCTATCAGATGTACTACTC -CGCACTAGAGTTGTTCATGCTGGCTTTGAAATGGGAGAAGAACCCGAAATCGAAGGAGAT -GATCCGCTCAAAGGCCGGCGAGTATATGGATCGCGCGGAGAAGCTTAAGAATCACCTGGC -GGAGGATAAAAAAAAACCAAGCGCGGTTGGCGCAAATGGAAAGGTGGCGCAAGGCAGTGG -GAAGGGAGGGTATGTACTCGACTTCTTTTGACCAGAGTTTATATTCCCAGCACTCCTAGA -TATGTGATAGGCAGTGTTAGTGAGTATACTATTGTCCTCAGTTATCCTCACTAACAACCC -TGGATCGTACAGGAAGGAAGATGACGATAACGGCGAAGATGCGGAAGCGAAAAAGCTTCG -ATCAGCGCTTCAGGGAGCTATTTTGTCGGACAAGCCGAACGTGAAATGGGAGGATGTTGC -TGGCTTGGAGAACGCGAAGGAAGCATTGAAGGAAGCAGTCATTCTTCCCATCAAGTTCCC -CCATTTGTTCACTGGCAAGCGCCAACCCTGGAAGGGTATCTTGCTCTACGGTCCTCCGGG -TACGGGAAAATCTTATCTTGCCAAAGCCGTGGCAACAGAGGCAAACAGCACATTCTTCAG -TGTCAGCAGTAGTGACCTGGTCTCGAAGTGGATGGGTGAGAGTGAAAGGTAGGGCAACAT -CTTTATACGGGAGCTTCGAGAGTTGCGCACTAACTCGCTTCTAGACTTGTTAAGCAGTTG -TTTAACATGGCTCGCGAGAATAAACCCGCGATTATCTTCATCGACGAGGTCGATGCACTG -TGTGGACCCCGTGGCGAAGGAGAGTCTGAGGCCTCACGGCGTATCAAAACCGAATTACTT -GTACAGATGGACGGTGTTGGCAAAGACTCGAAGGGCGTTTTGATTCTGGGCGCAACCAAT -ATTCCCTGGCAACTGGATGCAGCTATCCGGCGACGATTCCAGCGAAGAGTACACATCAGT -CTTCCCGGTCTCAATGCACGAATTAAGATGTTCATGCTAGCCGTGGGCTCGACGCCATGT -CAAATGACCCAGAACGACTATCGTCAATTGGCCGAGCTGAGCGAGGGCTACTCCGGTAGC -GATATTAGTATTGCCGTGCAGGATGCATTAATGCAGCCAATCCGCAAGATTCAAGGTGCA -ACACATTATAAGAAGGTATGTGCACTCTGTTCTCACCGTTCCGGTCTGCAGCGCCTGACT -TCACTCTATCATAGGTACTCGTCGACGGCGCTGAGAAAGTCACGCCTTGCTCACCAGGGG -ATGCGGGTGCAGTGGAAATGAGTTGGCTCGATATCGACGCAGATCAGCTTCTGGAGCCAC -CTCTCGTTTTGAAGGATTTCATCAAGGCAGTTAAGAATTCTCGGCCGACCGTTAGCGGAG -AGGATCTTACGAGGAACGCTGAATGGACTCAGGAGTTCGGCAGTGAAGGCGCCTGATCGT -ATCTTCCGAAGGATGGGGATATTCTTCCTATCATGGCCCACGAAATGTGAATATATCTAC -CTCACTTGATCTGATCGACGGCATACGTTCCTGACTCTCTAGCTTGTCCAAACAGGACCC -AAGCTGATTAGCCCGTGCCATTGCCCATAGTCCCCATTTTCTCATTTGCTTTATCTGTGT -CCATGTCAATACTTGCTTCATCTCGAATTCCTGTGTTTTTGGTGGGTGTCTCCGAGCAGT -TTGGTGTTTAGAAGCGTTTCTCAATTACTTTTCACAAGGGCGCGGCGTTGGCTCAGCAAA -TATTCCATGTGTCTATCCTGCAAGTAGCATACATACTGCCTTGAAGACAATAAATTTGTG -ATGTACTTTCGCCGTTGCTTGTATGTGTAATTAATGCCAATCAAGGCTCAATTCTTATAC -TGGCAGACGCTACATAAATGAATAACGGCTCTTAAATCATCAGATTCCTCGATGGGATGT -CACTATGCACATATCATTCGGACAACTGGCACAAGGCCAGCACTAAAAGGCCTCAAAGAA -GAACTGACAGAAACACAAAACAGGTCGAAAAAGAGCAGTATACTTGCAAGACATGTCGGG -GGAATCTCTCATGACAGAAGGTTGACGCTGGAAGAACAAGAGACAAAATCACAACGAATG -ACATAAACAGAGAAACAATCCCAGAGCTACTGGCCAAGCAAGCTCTTGTTCGAGCTATCA -CGGCGCTTAAGCATTTCAGTCTTAGCACGCTTCAATCCCTCGCCCAGTTTCTTAGTCCGC -TTCCAATCATTGCTCATCTTTTGCATGCGAACAGCGGCCCTCACTAGAGATATGGCGGAT -TTTAGTTTATGGCGTGGTGTGCGGACAGGAGCTCGGTCACGGGCGTGCACGCCCATTCCG -GAAATCATACGGAGATCAGCCTCGTTGCTGTTGGTAATTTGTTAGCTGTATTGCAAATAA -GATTCAATAGGAGAGGGGACACGAACCAGGCAACTCGGACGCGTTCGCCAAGTTCCATCA -AGCCCTTGCTCCAGGCAAGATCACGGCGGAACTTTTCCTCACGCTGCAGGCGAGCGCGGA -GCCAGATGATTTCCTTGCCTAGGCCTCGGATCTCTTTTTCATGTTTAGTTTTGCTAGTCG -ACTTGGAAAGGGAATTTCTCTCGAGGATTTCGTCATGGCGTCTTTGTAACAAATTGAGAT -TTTTGGCGGTTGCTTCAGCCTCGCGCTTGTGAGTATCTCGTTCTTCGCGAATCCTGCGCA -TATGTGTCTTCAATTCGTTCAGGCGTGCTTCCCTTTCAGAAAGTTTCAGGGCCAATGATT -CGGCTTCGAGGGTGGAGGACCTAAGGAGCTCATGGAAGTCCTTTTGGTCCTCCTCTTTCA -TTGCCGCACGTTCCAGCTCTCGGTTCTTCGTGTTGACCTTCTGCAGGGATCGGTGTGCTT -CAGCCACTTGTTTTCGTAGACTGCGTAGCTCTTCATGAAGCTTATTTGATGGAGAACTAT -GTGGGGAGTGTGACTCGAATTTGTTCATTATCGCATCATGAAGATCTTTCTCCAATTCTT -CAACTCGTGACTTGTAGCTTTGCGCAGAAGTCTTCTGCTTGGATTCACGCTGGAGCAGAA -TCTTTTCGCGTTTTCTAGCCTCTTCTAAGCTGTGTCGAAGGAAAGGGAGATCCCCATGTG -CACCTGATTGTTCAGTCTCCATCACAGGCTGCTGCTCAAGAAGAGCTTCCAGTTCTTGGA -CGCGACGCTCCAGGCGCTGTACCTTCGTCTTGGCCGAAGTAAGCTCTCGATCTCGGTTAC -CAGAGCCAGCAAGCAATTTGTCCTGAAGGCGATCGACCTCAGCCGAAAGGCGACCCTCCT -CTGATGCAAATCGCTCACTTTCTGCGACAAGCTGGTCCTCGAGGGACGACTTGGCGTCTC -GCAGTACTTTGAGTTCGTTCTCAAGTCTTGTGACAGTCTTGCGTAGTTCAAGTTTGTCCC -GGTCAAACCGGGTGGTGTCATCTGCGTTCTGGACTCGGTCGAGTTGAGTTTGGAGCTCGA -CGCGTTCAGCCTCAATATCCTGCACTGACGTCTTCAGATCAAAGATTTCAACATGAGCGT -TCGCAAGCTGATCACGGAGTTTTTGCCTCTCGGCAATTGACTTTTCTAGGTTTTTGTCTA -GGTCAGATGTCCCATTTTGGCGACCTTTGGCAAGTTCTTGCTCATCCGCCAGGCTCGACT -GTAAAACGATCACCTCGTCTTCCAGGGCTCGCACCTTGTCTACAAGTGCTTGCTCTTCGC -GTCTAGATACCCGAGCATCCTGCTTCGCCGTAAGGAGCTCGTCTCGTTTTTCATCAAGGT -CGCGTCTAGTCTTATCGAGGTCGTCTTTGAGATCTTGGATTTGGCGGTTCAGCATTGACT -TTTCTTGCGTGTAACGCTTTTTCTCGCTCTCAATTGCCTCCTGTAGCCTGGATTCCTTCC -CAGAGTATGAGCTACCGGCTTCCTGAAGCGTGTCGATAGTGCGTTTCAGGTCCGCCACCT -GACCTTCTGCCCATTCTTTCTCTGATTGTAGAGTCCGCTTGCTACTTTCCCATCGACTTT -GATCGACTGCAAACCGGCCCTCCTTCTCCTCAATCTCGTGGTGAAGGTTTTCAATCTCCT -CTTGCAGCCGTTCAAGTTCCGCCTTGTGCTGCCAGCGAATGTTGTGCCCTTCATCCTGAG -CGCGAGATGTGGCATCTTGCACCGCTTTTTGAAGTTCGCGGATTCTAGCTTGTGCACGAT -CGAGTTCGAGCTGTAGTCCTCCAGATTCATTCGTGAGAGCATTATGACGAGTCTGGAGAA -GTTCCTTTTCGTCCGTTCCGCGCTGAAGTTCATCCAATGCCTCCTGTAGTTGAGTAGATG -TAGTTCTAAGGTCCTGCCGGGCTTGATCGCGATCACGTCTGGCCACATCAAGGTCCTCCA -CCAGTTGATTGGCATTATCCTCTAGGCCATGCTGAATTGTCTGATAACGCTCTTCAATGA -GAGCTGCATGATTGGTTTTACTCTTCAGTTCTGTCGCTAGAGCATCGTTTTCTCGTTGCA -GCTCGTGTATCTCTTCTTCGAGTTTTTCGGTCTTGTCCTCCATCTGCCGGCTCAGTCCTT -TGGTCGTAAAGGATTTGTTTGATATCTCGTCCTGTAACTCCCTCAGATCCTCTTCGGCAC -GATCCTTCTCTTTAAGGGCCTGCCTTGCTGTATCCCGTGACTCTATGGTCTCGGAATTAG -CCCGGGCAAGACTATCATGGAGCTCTTGAATCTGCTCTTTGGCACGGTCAAGCTCAAGCT -GGAGCTCTGAAGCTGCACCATTCTCCTCAGAATTACGTTTCAGATCCTCGAGCTCTTCTT -CGCGTTCCTCGAGGAGCCGATCTTTCTCTCTCGAGCTTGCTTCCAAGTCCTCAATCTCGT -CTCGAAGTTTTTCAGTCTCTTGCGATTTGGTATCTTTGATATGCCTCAATTCGTCGCGCA -CTTCTCGAAGTTCGAGGTCACGCGATTCAAGCTCTTCGCGCATCATGTCCATTTCACGCT -GTACCGTCTGGTCCGCTTGTTCCCTCCATGATTTTTCCTTCAATTCCACTAGCTGAAGCT -GGTAGTCCTCAAGATCTTTCTCTGCTTGCTGGAGGCTCTTCCGGTATCGGGAAATATCGC -GTTGCATTGTCAGTCGCAAGACCTTGAGCTCCCTATTTTCTTTGAGGGCTTCGTTATTGT -ATTCCGGGCCAGCCTTTTGCAGTTGTTCTTCCATGAAGTGAATCCTCAGCTTTAGACTGA -AATTGACTTTGCTCAGTTGATCAATGGCCTAAGGATGTCAGCATTAGTCTTCAGCTCGTG -CGAAGCTAAAGGTTGGGATATGTATCATACCATAGCTTGGTCCTTGAGCGACAAGTTATT -GCCTCCATCACCAAGGACTCCGTTGTGTCCTGAATACCCGGGCAATGGTGTACTTTGCGC -ACTACTGCTTGCCGCATGTGGAACTGGAGTTGCCTCTTCCATTGTCCGACCCTCCTCGTA -AATATCAGACATGTCCATAGCAGGCAAGCCAGGTGTGTGGGCGTTGCTGCGATGAGGCTC -CCGCTGATATCCGGGGGTTTTCGGGCCGTCTGTTCCCTGGGTTACAGACATGTTCCTCAA -AAGATTGTTCTTTGTCGCGCTCTTCATTAACGGAGTAAACTCTCCCTTCCCGGGGGCTGT -TGATGGAAGATGCCTTAACGCGCTCCTCGTTGACCTGGAAGCAGCACCGGGGCGCGGGGT -GACGCGTTTGTCGAAGCCTTTGATCACATCATTGTCTTTGGATGGTGTTTGGAATGAATT -TTCAACAGAGTCGAGTGCAGACAAATGCTGCCGCCCCGCGCTGCGAAATCCAGTTGTAAG -ATAAGTCGCGTTGCCGTCGTATTCGGTGCGCGGTGTGTCGATGTAGGGGAAGGCCATGTT -TGAGGAGGGAGAGGTAGGCCAGAACCATTCAGCGCGTGACCATCACCGTCAATAAGGGCA -AGAGCGAAGTTGTGTACTGAAAGTTCACGCTTGACAATCACAGACGTGTTTTGGGCGAGT -TATCCCCGGCTATGTTAGGCTTAGCGTGGACACATCTGTTGATTTCTAACCTAATTAGGA -TTAGCGTGTGGATCACGGGATACGATCTCGCCGCTTTTGCTTTTTTTTCTATCAGAAGAA -TGCATGAGGTCATTCATGGTCTTTGACACAAGATCAGCCATGACTTCTGAAGAGGATCGC -GCATGGTTTCAATCAACCTTTCGTCCTATCCCCAAACCGGAACTTCCCGATGACGCTGTT -GACTATTTAATCTATCATATTCCGTCCTCCCCCACCTCCGCCGTTATCGATGAAGCTGCC -GAGACCCGGGCGCGATTGCTTGAGGTGCAACGCACGGCAGCAGATTTAACCAAAGATTTA -CTCAAGGATTATATCTGGCAACGTGAGGCCTTTCGATTGGAAATTACAAAAGAAGATGGT -ACATGAATCTCCCTCTCAAACCCTATAACTATAGTTGAAGCTGATAGTCGCGGGGTTTAG -GAATCACTTCACTACAAGGCCGCACCACCTTTGGTGACTCCATTGAAGATGAATGGGTCA -TTGTATACTTCCTCCGCGAACTGACCAAACGGCACAAAGATATATGGGTCAAAGTGGTGG -ACGGCGATGGCGAGTTTCTTCTCATCGAGGCTTCGGGCACACTCCCAGCATGGATAGAGC -CAGACGTTGCAGACAATCGGGTGAGCTATGGCTTGTTACTCTACGTTGGTTTATTAGGAA -CATCATACTAATCAGTTAATCTATGAGTCAGGTGTGGATTCATCAAGGAGACCTCCGAAT -CATCAAACCCAAGCAGGAGACGAAGAGACAGGTCACGGAAAAGATTTCACTTCCAGAAGC -ACGAAAGATTATCCAGAAAGAACCCGGTCGCATTCTTCGCTCGACAATAATTCAAGAAGA -GGCGTTCTATCGTCTGCGCAAATACCCTCAGCAAATCAGCGAGAATCTTCACTCCGCCAT -CATCACAATTCCTCGCAAAGCTGCATATCTCCTACACCAAAAGCCGGCATACGTTTCACC -TGCCATAGAGGCGTTCTATGTTCGTGATCCGATCGCCTTGCGACCACTACGATCGAAGGA -TGCATCCGACCTTGTTTTCAAGCCAGATGACTTGGTCGAGGTCAGTGTTCGGTTCACCCG -TGTCGGATATGCGCAACTCAAAAGCCAGGAATTCCCCGTGCCTAATTCTTGGGCGGGCAA -GCTGCCCTCCACTGAGGATAAGAAGGCATATGAGCGGGCGGAAGCGGGAATGAAACTTGC -GTGTGGATTCGAAATGCTGTTGTACGACCCCCAAAACCAAGACAAGCCCGCAGTCCGCGA -AATGAAGTTCCTGCTCGAGGATGTGGAGACAGGGGATGAACCCCTCCCCACTGATCAAGA -AATCCAAGAGACCTGGGAAAAGCGAGAAGATGATGAAAAATGGCTTGATATCAACTATGA -AGACCTGGAGACAGAGCTTAAAGGAAGAGGCGAAGGAAAGGGAGCAGATGCTGGCAAATT -CGGTGACTCTGGTGCCCAGGAAAATCTCCAACGGATCGTGGCTCGCTTCGAAGAGTTCCT -GAATGACAACAAAGCAGGCTTCGATGGTGCCGATTTTATTGATGACTTTGATACGGATTC -TGACGTCGATGATGAAGATGACGAGGAAGTCGATAGTGACGGTGAGGATAAGGACGCGTC -ATTCAACGAAGAGGAATTCTCCAGAATGATGAAAGAGATGATGGGTATGCCCTCTGCACC -CGATTCCATCCGCAAACGAGTGGAAGAACTCGACTCGGACGATGAAGATGACACGGAAAA -GATTAAGGAGCTCTCCCGACTCATGGAGGCGGAACTGAAAGGGACGGGCGTGCTTGATCT -CAACAGACGCCCGCATGAATTATCTTCTGGTCCAGTGCGTGCATCCAAAGGAAAGGCTGC -CAAGGTTTCCAGTGAAGATGAAAATGATGAGGATGAGAACATCAACATCAATTTCGCTAA -GAATCTGCTGGAGAGTCTTCAAGGCCAGGCTGGAGCCTCTGGTCCGGCCGGCAATATGCT -GTCAATGATGAACATGCGCATGCCTAAATATGATCGCCCTTGAAGATGACCGTACACGAT -CGAGCTATGTGTGTGAGTAATCATTCGACATGTCTGAGAAAACAAGAGCAGTCCCAAGGT -CTGAATCCTGAATCGGCAAATGCTTCATCACCAGCCCCTTTCGTTTAACTGAAGGAATGC -CTCCTTTGCAAGACAATAGCAAGACAAGGCGATCCGTCCATCCAAGAGAAGGGTTATTCC -TATTCTGGTTGCCGGTGTATCATATCGCTTTACACACCCGCCGCAGGGAAAGGGACAACG -GGGCTGATGACGCAGTCGAACCAGCATCTTGCAAAAGCCCTGGTGGCTATAGATGGAAAA -CAAGAAAAAGCACATCTGATTTTATCTTTCGGGTGGTCAACTTGTACTCCGTACCCCTGC -CCAACTTCCATTGGGCAGTCACCAGATGATCATGGGTCAGACTTGACAATTCTACCCGTA -ACAGCTACATGAGGCTTTGTTGCTCACAATCTTGATATATCTTTAGGTTGAGTAGTCACA -TGCCCTCTTACAGATATGATGAATTATAGGTCTGCTCAACCCCTCACATATGTGATGAGA -CAGACTATACTTATACTTCAATGCTTCATGTACCTTCTAGAGTCTGTAAATTTAAGTTAG -GAAACTCGTTTAAGATATGTTCCACTCTCCTAAAATATATGGTTCAACGGGACGGTCCCC -AAGTTTGACATAGGCCTAGTGGGGTTAAGGCCATCCATAAAGGCAGGCCTGGAACTGCAT -AACATGTATAATAAATACTAGTAAAGATCAACCCGATAACATCGGAAGCCACGTCGGCAG -GGTGGCGACGGTATCAAGAGGCGTATACCAGGTAGGTATGATCAACGGTGTCGGCTATAG -ACCCACGGTCCTGTCCAGGAGCCCGAATCTGATGACCTCTATGAAGGCTCTCTGAATGTC -AACTatatatcatgtatgttgtatgctataatgtaatatgtatgttgtatCTAAGATCAG -CAATCTAGTACTTTAGGAATTCCACGTGGCTAGATCGTGTCTTGGATCGACCCAAGTGGT -TTTTAGTCTGTTCCTGACAAATATAAAGATGTCTCATACTGGATCCACTTCTGCAAGCCT -TAGAACTCACATAAACGACCTTCCCGAATCGAATGTCAATGTTAGAAACTCAATTACATT -AGACATTTTCGGCTTGGAGTAAAGTTGGGACATGGACCCGGTAGAAAGAAATCATCCGGG -TATCGGCAATCCATCAAAAACCCGTTAGAGTGTCGTGTGCCTCTGTGGCGAAAGGGATCC -GCTCATGTCCGGTGGGTTTACCGGTGATATATTTTTTTTTTGTTTTTCTTTTACTAATGT -GTTAAAATCCCTCACCACTAAACTAAAATGTATCTCCAATATATTCGGTGTGATAGGATA -CGAGAATAGTGAAAAGAAGGTGTTGTATAATCTACTCCGTAGACAGCACTTGCTATACAT -TATACACTATTAGTGTACTTAGTGGGCTTAGTGTGCTACTAGTGATCCAAGGCTGCCTGA -TAGGTACTTTGTACTCCGTCTACTCCGTATACTCCGTACGGAGGGTCACCTGGCTCCGTC -GGCAAGGATCTAGGATTCACGATCCCGGCACAATGCCGGGGCAACTGCCATTTCGGAGGA -TCAATGTTTGTTTCATCTTTGGGGGAAATCACCTAAACATGGATCCGGTCTGGAGTAGCG -AACATGCACATAAGCCGCTCCGTTTGCCAGACGGGACGGATGAATGGATCCATAACTTGA -TTTAATGGAAGTCAATTCCGGAGAAGATCATCCTGTGGGAGATCTTTAGTGGACCAGCCT -CCAGTATATTGAACATTGCATACATCCGTACAGCATATAATTTTGCATACATACATGATC -GAATATGTATGATTCAGAAGCTAAGATAGCAAAAAAAAGGGTAATATCCAAGTCCCGGAG -GCTTGTCGGGAAATCCCAACACAGAGTTTAGATCCCTGCTAGACACGCTACTTCCCCCGA -TTCCCACTTTTCCACGGAACTGCTTTGTGTCCCGCTCAGGGAAAGATGAGCATGTTTTGT -ATGAGATCGTCTATCGCGAAGGCTAACCGAACTCCTGTAATTATATGATACACATCCAAT -ATGACCTCCATGTTCAACAAATCGGAGGTCAGGGATATCTTCCCTTCACCTCCTTACCGT -CCCCCTAACACGGAGTTATCTCCAGATTACTCCCCGAGGAAACCCGGGCTGGAATGGAGA -CGGCTGTCGACTGATCTGACACATGCTGATTTTCGAGTGACATGAAAAGTCAAAAGAAAA -AAGAAaaaagcaaaaagcaaaagcaaaagcaaagcaaaaCCCTTACAGAATATAACCGAA -ATAGGGAGCGGTGAACCCCTGTTCTAATTGATAGATCATATAATTCTGATTCAAGATGGA -AATGTATTACGATCCAAATCAGAGAGAGTACGAGATTAATAATGTTTGGTGAATTAGCGG -CACGATCATTGGTATATTACAATGGAGCCCCTGTTTGCAGTAGATATCCACTTCTACGCA -ACGGTCCAAGGGCAGATGAGAACAGCCATAAGACGCTAGGCAAAGAACGGATATAACGGA -TATGAAATATTAATGGGCCGATTCTAGATGACTTTCACATGCAGCAATCCAGGAGAAGAG -CGTAAAAAGAATGTGATCGGAACACATCCGGAAGGGTGGTGTAACTATATAATAGCAAAG -GTACGATACAAAGAGATGAAGAGTAGAAGCGTATAGAAGTAAAAGAGAGAGGAGAGCAAA -GGAAAAGAAATACAAGAAATACAAGAAATAAAAAGGTGGAAAGTGGATTTAGTGGAGAGG -TTCTATTCTTCCATCGACCTTCCAGGAAGTTAAGGCAAGTCCCGGCCTTCGACCGAAAAA -GTTCCCTTTAGCCCCTTCGACTATTATATCACTACACGTTCTCTCCCTTCTTTTAATATC -TCACCTCCCTTTTCTTTCTTCCTTAAACCTTTCCCAGGAACCTGGTCACTGTGTGGACCC -GCATACCAGTCTTTCGTCATTATTATCCAGTCATGCCTGGCGACAATGGATTAGAGATGC -CGGTTATTGCTTGATCAGGTCCACCATGGATTTCATTGTAGGTAAAGACGAAGTTAAAAT -TATAGAGCTTGCACTGACCTTTGGAAGCTTTCTTTGACGCACTTCTGCGAGGTGCATGGT -CCCACCTCTATCATTTGCTCGCAGGTTCTACCTTTTTCATGCCCGCAATGTCACCCCGAA -TCCGACTCTCCTCAGAGTACTCCAGCTTCGGAGTCTCGCTTCGATCAGCGATCTCACAAA -TCTATTGATTCCAAGTCGCCCAGAATCGAAGATCACCCATACTTTCTTAAAGGACACCCG -GCCTCCCCCGAGGACAAATCTAACCGAGGTGGTGCAAACGGCGACACCTGCGCCAGTTGC -AGCTTGTCTCTTCCGGAAACTGTGAGCAAACAGCTACCTCCCGGTGCTCCTGGAAGTCGC -GACGCCAAAGGCAATTCTACTAGTCCGGTGCTCCGCTCGCGTGAGCTCGTTTACTCATGT -GGCAACAATCATTCGGAATTGGATGATGAGACCGATTATCACACTCATGCGTCTCCTCCC -GACTCACTTCACTCTGCCTCAGTGGCCTCCGATACCTCATGTCACACACACATTCACACC -TATCTCTCCCAGCGAGGACCGCCTAAACCCGCTGACTATGCACTTCTCCGACGCTCCTCA -ATTCGTACCCTCAGCTGTGAGCTCTTGCCCCGAGGACTTTCCTCCGGCCCCATCTGCTTC -GGCGACGGAGTCGCTGGTTACACAATCGCCTACATTTTCCGTCTTCCAGACCCTATGGCC -CGCGGCAAGCGACGCAGTTATGCATTAGTCGCACTGGCGGGACGAGATGCCGGAAGAGCA -TTCCGCGCATGCCCCATTATCTGGCGCGCTTTCGGCCGCATCGCAACCAGCATTGTCAAC -GCCGCTGAACGCTTCCAGGAAGACGAGAAGACCCGCGATGAACAGAACAACCCCGGAAGA -CAGCCCGGCCGCCAGTATCCTCCTGTTTCCTCCTTCCTCACTGGCCGCGGTATGGATCCG -GCCGGCCGGCGTGCAGCCGGCCAGATTCGTGCCCGCAATTTGGCCGAAATCGTTGGCAAT -GAATACATCTTTACCGAATTACATGCTCATTTTGTCGCACTTCTTCAGCAGCTGGGCTCA -ATGTTTGGCGGAATCCCTCTCAACGAGGAACGCTTTGTCTGCAGCACTATGGGGGACGAA -GAAAGCGCTAGTCCTTCTCAGCCGGTCCTTGTCTCTCAGGCAAGACAATCAAACGGCGAT -CAAAGGTTTGACGGCAACGCCATCGATGTGTCAAACCTCGAAATTTCCGCGGGACCCAAG -GCGATCCCCATTGCGCCTCGCCGGCCTGTCATGGCTTGAAATATTCTTTTCATCATTTGC -ATGAACCTCTTATTGAGATACCCGCCTGCTTTGTTCTATTTGATTTTCAGGCACTTTCAA -CCTTGACCTTTCTTTTTTTTCCTGTGCTTTTTTCCGGGTTAGCAAGGTGTTTTGACCGAT -TTCTGCATTGCTCATTCTGCACAATGATTCGGCCTTTTTTTTTTTTTCTTGCGTTATGGG -TATGGGCATAGACTATGGCGTCTGATAAACACGGAACGGAGTTTCTTGTTTTACCAGCCT -TGATTTCAAAGTTTCCTCTGGTGGGGGATATGCCTTTCTTCTCTTGGGCACCTCGTTCGA -TCTCCACAGAGTTCAACGATTCAAGCACGTCTGTTTAGCGGGTTTCCATTCTGGTGACTG -GCGCTAGGTTATGATTGAGATGTTATGATCGCACGAGGCATTCAAGAGATATGTATTCTA -TGTATTATTTGAACACGGATCCATGGCCAATGGCCACTTTTTTCTTTTTTTTCTAGAAAG -TCGAGTTGTATTGTAGTATAGTCCGGAAAATTACGGGTAAAAGAACCCTTTTAGGTTAAT -CCGGCCGCGGCCACAATCAAATCAAACCTTAGAAAATCCCACCAAAAAGAACAGCAGAGA -CAAAAGACCCGCATCACCTCCCTCCTCCCTTTAGCAATATCAAAATGGTCGCCTTCGACA -AGTGTGAGACCCGGCCTGCCCATATTGAGGCCATCCTCAACGGCCTCGACCGGTACAACC -CCGAGACCACCACCGTCTTCCAGGACTATGTGTCCCAGCAGTGCGAGGACCGGTCATTCG -ACTGCTACGCCAACTTGGCCCTGCTCAAGCTGTGAGTTGGATTCCTTTCTTCTGAAATTG -TATTGTGAAACAAAATATCTACTATGATGAGTATCTTCTAAGCGTTTCCTATTACCTGAC -ATCCCCATAATCTACTTGGGCATGTTGAGAAATTCTGTGAAGATACTGAACGGGAAGAAC -CCATGCATTGGTCAAAATTAGTAGCATCGCACGCTTCTCACAAAAGCCTCAAACTGTCCT -GAAATTCTCGATTTCAAGCTGACAACATTGCTCTTCGAAAAATCCAGCTTCCAGTTCAAC -CCCCACCTCATCCAACCCGACACCGTGACCAACATCCTGGCTAAGGCCTTGACGGTCTTC -CCCTCGCCCGCCTTCTCGCTTTGCCTCGCCCTGCTCCCAACAAGCACTCTGCCCTTCCCC -TCCACCGCCGAAGCCCAATCCGCTTCGCAGACCTCCGACTTCGTCGAGTCCGTCCAGAAG -CTCACCCGTCTGTCCACCCTCCTCGAGTCTGCCCAGTACGCCCAGTTCTGGTCCACCCTG -AACTCAGACGACCTGTACGCAGACCTGACCGCCGACGTTGCCGGCTTCGAGGAACTCGTC -CGCATCCGCATCGCCATCGAGGTCGGAAACACCTTCCGCTCCATCACCGCCGAGATCCTC -GAGTCCTGGCTCGATATTCGCAGCCGTGACGCGCTCGACAAATTCGTTGTCGATGTCTGC -GGCTGGAAGGTTGATGGCCCTCTTATCCGCGTGCCTACCAACAAGGAGAACGAGGCCCGC -TCCGAGGTGAAGAGCGAGCACGTCGGCATCGAGCAGTTCGGTCGTGTCATCCGTCGTGGC -TTTGAGTTGCCCGCATAAATGCTTACGGAAACTCATTGTTCTTGATTTTATCTTCTCACG -TCGATTCGAAATGTTTCACTTCTTTCCGATTTTCGTATCATGTTTTTTTTAAAATCCCCA -AAAAGCTGTGCGAGGGAAAATTCGTTTCCCATCCATGACATGAGTTCATGAATCCCACCC -CCGATCAAGTGACGAACCAACGATAGATTTCACATGGAGGAGGGGCTCGGAGATGAGTCG -GGACCTGGCGCATCTGGCGtttttttctttttttttccccttctatttctcttgtctgta -ttggattcttttttttttACTTGTCCATGCAAATATTTGCACTGGATAATATGATGAAGA -GAGAAAGATGTCATGTGCATACCATAACATAAATTCAGTCGTGCCTCCATGTAGGGCCGG -CTTTGCCCCCAATATAAAAAATGATACAGAGCTAGTTCTAGATGTAGAAGCAGGGGAGAT -ATAGCACGACGTAGTGTCACTATAGAGGATACAATCTTTAATCACATCAATGTTCAAAAT -ATTCAGCTCCTACAAATCATCACTAACTCAATCACAATTTAAATTGATTGATAGGAGGAA -CCGGTCCAGTGAAAGCCTCCCCAGTCGCCGCCACTGTCTTCCCATTATCAGCAACAACAA -TCTGACCATTCATCGCCGATGCCAGGTCCGAAGCCAAAAAGATAGCGACATTGGCAATTT -CCTCCGGTGCACTGCACCGATTATAATAGTATTCCTGCTCTTCACCTATCCCAAAGCCCT -CAAAGATCCTAGCCGAGCGTTTCTGGATTTCCTCGACCGTCTTTTCAAGTTTATATTCCA -CGCCTTCTTCAGCAAGTCTCGAAGTGGACATTAAGCTTGTAGCGACAGGACCAGGCGCAA -CGGCATTCACGCGGATATTGCTAGAGGATAATTGAACAGAGCCGCTTCGCACGAGACCAT -TGCAGCCGTTCTTCGCTGTTGTGTATGCCATGTCTGCAAATGCACCGCTGAGTGTTGCAC -ACGATGAGGTCACTACTATATTTCCCTTTGGAACGGGTTTGTCTGAGGATGTCACGCTCA -TGGCTTGGGAGCCGTATTTCAGTGCGAGAAAGGTCCCGGTTTGCATGACTGCCATTGCGC -GTTCGAATTGTGACACCGAGAGGTCGTTCAGACCACTCAAGCACTAGTCAGCTTTACTGG -TATCGAGAAATGGAAATATATGAGGGAAATCACTCAAGACTTACCGATATATTACAAAAC -CTGCATTGGCAAAATAGAAGTCCAATCGACCATAATTCTTCAATATATGTTGTATTAGGA -TAAGCGTCTGTTCCTCGGATGAAATATCGAAGACATGACATTCCACTATAGTGTCACATC -CGGTGGCTTTCACGGTCTCTTGGAGATCTGCAAAGTAGCTCGAGCTGATGTCGCAGGCGT -AGACGACTTTTGCTCCTGATTTCGCTGCTGCAAGTACTATAGAACGGCCAATGCCATAGG -CTGAGCCTGCGCCGGTGACAACACCGACTTGTCCCTTTAGTGTTGCTTCGACCATCGTGG -GTAGATTGGTTGCATTCGGGGTCTTTTCGGAAAACTGGAATCCAGGTCAATCTTGCTCAA -ATATATAATGGGTCGACGGGCACTTGATGTCGCACAACAATTAGACCTGCAGCGGTGAAT -TATTCAATCCTGTATCGATTGCCGAGAATAGGTCTGGCTGCATCGGACTAGGACCGAACA -GAACACCGTCTGCATTGTATGATTTGCTGGATCGGCTTTCCAGTTGACCCCAGCTCATCA -TAGCTTGAAAGTGGTACATATGTATGGTCAGAAAGGTCTATGTGGTTGTGCCCAAAACTC -TTCCTGGTGTAACTTCCTTCATATGTATAAGAACGTAACTGGTAATAATAGTGTCCAGAA -TCTATCGAGTAGGGTCTGAGCACTCGAGGTCCTTCTGCTACATCGGACCTTCATTTTCCA -GGATGGTGCATTACCTGTGTGGTTATAGCGTCGTAGGGTTACCAATTGTCAGCCTTACCT -CGAGATATTTGGAATGTCATAAATTTTCTGCGCTTTCATCACCCTTAAGATGGAAATCAT -CTTGTGTATGTGGAGTAGATGAGATTACTTACAAGTATTAGGTACCAACAAGGCGATACC -CGGAGAACCGAAATGGCAAAAAGAACAACTGATGGTAGCGATCTCATACTCAACAATCGG -GAAATGGAAACACTTACTTCCCCACTAACCCATGGCGATAAAAGACTCTGTTTCGTCTGC -TTCTGGTCGAAAATCATGATATCGTTTGATATGGTATCTGCCATATTTGTCAAATTCAAA -GCTCAAGCCATTTGATTAGTTGGGGCGGCTTCTTACAGCCAGTTCACCAACCCAATGTCA -ACATTTTTGCAGCATGTCACAAACATTGGGCCCTTATCGGCCAACCCGCTCTTAGATGCA -AGGGCAAGAACATCCGGCACAAGGCTCTCGAAAGCAGTATAGCTATCATTAATAAGTTGA -CAGGCATAATCAGTTGCCTCCTGTGGCCCCAATCCTTGATGATAGACGAGCAAAGGAATT -ATATTATCTACATGCTTGCACCGCTAGACCTAGTCAGATGTTGAACTTTCATATAACTGT -TGACAGAACTCACCAATTCATTGCGAGCGGATACAATATCGTTGCTCCTATCAGGGATCT -TTTAGCACGAGGTCTATATTACTTCGACCATATACCGACAGTGATACCATGATGTTGATA -TGTCTCATCATTTCCATTGGTACAGTATGGTTGATAAACCACGCTGGAAGAGCGATATCC -TGTAGAATCCTGACCTGAATTTTAGTAAGTTGCAGACGAAGTAATGACCGAGGACTCACA -AAGCCTGCCCTATCACTGCATAGACACCTGAACCTTCCATCCTGTCTTTTAGATAGACAT -CTAGATTCATGATTATACCATTCTGAAATGCGTCTTCGAAGTTGACTACTCTCGTGACAT -ATTTGCAAAGTTCATTGCAGAAGAAGTCTTGAAGAGGCTCTGTACAATTGGTTAGCCAAT -AATCCCTCGAAGGTTGAACGGGGAAATGAAAACATACCAAGCTCCGTTCCTTCTTGAAGC -GGAAACCCTATGCCGTAAAACGATTCAGCCAGAGTAACGGGGACATCTTTCCAGTCAAGT -GGTTGTTCTATGTCAGTCTTTCCTAAAAGACCTCTCTGAATGGTTCTTGACATCTCAATT -CGTATTAGCTCTCGTTGCCTCGGCCCAACACTGAAGCTGCCTATATCGAGAGCTGTCTCA -CATTAGTTACAAGCAACGAGATCGCGAGATATGAAGATACCGTCATCCCAAGCAAAAAGC -TGTGGTGTGTTCTTTGTTAGTCAAAGAGATAATGAGGATGCTAACTCCAGTACGTACCCA -GCTAGTAAATCTTGCTCCAATCTGCAAGTTTGGCAAGCTGGCGTCTGGACTTGCAGCTCG -CGTGAAGTAACCACAGTCGGTAGCCAAATTACGCTTGTAAGCAGTGGGCTCCAGTAACCA -ACTTTGGGTTTTTAGCATGATTGAAATATATCATTCGACTAAATTTGCATACCGTTGACG -CCATAATTCTAAATTCTCCTCCAATTTTCCATCCAAATCTGGAACCACTCTGGGATGCCA -GGAAGGGGACAATGAATACAAAGATGGGAGTCGGACCACTTGACCCTTGAACTTGTCTAA -GATCTGTTCGATAATAGACATCTTGACGCTGCTTTGACTGGAGTATAGAGCAAGTAGAGT -AAGTAGACAATGTTCATGGACTTCAGGAGAGAGGATACTGGGACTATATATAGAGAGTCC -TTTCCCCCCAAAGACTAGTATACATAAGTCAGTTTGTAACTGTTGCGCCGATCTTGTCTA -TTCTTGTTAGCCATACACTCAGCCGTATAAGGTTTTTTTTTTCTTTAGACATCAGGTGTT -ATGGCCACTGCATGTGGCACAACATGACGATGAACTATAACTCTAATCCAACGATACTAT -TCAAACGTTGTTGTTATAAACATGCTTTTTAGGAACACGGAGAATCGTCGTACAACTTTG -ACATTGATGCAAAATTCGACATAGGGATGACATTTCCCGTATTAGATGTTATACAAAAGT -CACAAGGCGTTCCCTCGACAAAATATTGGAAACGCTGTGTAAAATTAACTCTGTATATAG -CTAGCCAATATTTGATTTTCCCGGTAACATTGTTATGTACAACCTAGGGTAGGCAAAATA -CTCTCATCTAATGCAAAAATATTACACCTACCGCATCTCCTCCCACTGCACGCCAGCCCA -TTCCTCCCGCCACCCAGTAACGCCCAGCTCCTGCGGCGTACAGTTCTGACCACAGACTCC -CATGATGATAGTGCCCCACTCCATGCCCGCATCATCTTTTCCAAGACCAACACCAGCACG -CCCCTCCTCAAGCATGCTGATAGCATGCGGCACAAGCTGGAACTCAAACACACGCTGACT -TCCACAGTACTCACAAGCTGGGATGCTCCCTGCACCGGTTGTGGTGACCTTGGCATTAGG -AGCACTGTGAGCATGCAAACGGGCACCGACTTCATCGCTAGTCGAGTACAGCAGGGGTGT -ACCGCGGAACTCATACCGCAGCACTTGCTCCGGGTTGTGCTCGAGGCGCATGGAGAACTT -CATGAAGTCCTTGTCAAGCTCGGACTCGAAGGTGTCCTTGAGCTCGGGGCCGCCCGCACC -TTCTTCTTCCATTGGTTCGATCTGCACATTATCTGGCACTGTGGGTGTAGACGGATCCGA -GAGAGTCTCGTATTCTGCGTCTAGGAAGAAATTCTTATACGGCGGAGGGAAAGCGGACTG -CGCGGGCCAGGGCGCCGTTGCGCTTGATTCTGGTGTCTTCTTTGTCGCGGCCGAGGGGGC -TGGCGAGGAGATGCGCACTTTGTCGGCGAAAGTTGAGGCCAGGGTGCTTTCAGACACGGT -GGGCTTCTCGGTTGTAGGTGCCGTTGCCTGGACTGGTGTTGCGAAGGGGTTGCTCCCGGC -GGGTGTTGGGCCCGAAGCTGATGAAAATGGGTTCGGGTTCGAGGTGACATTGCTTGTCAG -GTTTGTTGCGCCGAACAGGCTCGCACCCAGGTCCTGCTTGGGTTGCTCTGGTGCGACGGG -CTTGATTTCCTCTGCTTTCGCTTCAACCTGTCTGTTTTGTTCGCCGACGATGACCTTGCG -CGCTGCACGGAATGCTCTGATACTGCCTGGTTTGCGGTTGCAGGGTTTTCTGGGGCAGCC -AAATATGTATAGGCGTCGTTCGTCGGTTGGGAAGTGATCGGGCAGGTCGCCGTGCAATTC -GAGGAGGAGCAGCATTGGGCTGTTGCAGACTTTGCAATTAGCAAAATCACCAGGTGCTGG -AGAGGCATCATCAAGCCATTTCTAAACAAGGAATTGCGATTGTTAGTTGGAATAGGTTTA -TATAGAGTGATCCCTCGCAGTGGTTACACATACAGGCCAGCCTCCAAGATGGCTGATTAC -ATCTTCGACGACATCCTCAGCAGCATAGCCTAGAAGAACACCGGTATCGGTGAAGTCTTC -ATCCAGACTGGAGTCACTGTCGTAGGGATCCATCTTGAATCAATTCGGAGTTAAGTGAAT -CAAATCTGTGAAGCGCCCCTCTAAAAGATAAGCGATATGAAAATGCACCAGTCCTAAAGT -AGACTAGCGTGACATTGATACGTCTACCAAGTTGTCCGCCCTTGACACTTCGACCTCCAT -TCATCCTTTTCATCCATGGGCATCTCATATTTGATCTGTATCTAATATTTGGCTAATCAG -TATTGGCTACTCAACGGCAAATTAGCTCTGCCAGCTTCTAGTACTACATAAATCATGCGG -TCTATGCGACCTCTCTCTCTCTTCCTATACTCCATCCTACTCGTCTCCTTTGGTAATGCA -GCAGAGCAAGAGCCATTGCGAGGAGCCGTTAGTGCAGCACCCAAGCGGGTGGCAGTAATT -GGTATACCTCTCTCTGTAAAATAAGTATGAGACATACGGGGCTAATATCCTTCATAGGCG -CCGGAGCTGCAGGCTCGTTTACTGCATATCAACTGCGCAAACTTGCAGATGAAGCCAACC -TTCCTGTCGATATCACAATATATGAGCGTGAATCACATGTCGGGGGGCGCTCGACAACAG -TCAATGTATTCGACAACCCAGCGTACCCTATTGAACTAGGCGCTTCAATTTTCGTCCAGG -TCAATTATAACCTGGTCAACGCTTCCCGTGACCTTGGCCTAACGGTCCATAGTGCTGATC -ATGCAAGGCCTAGGGAGACCGATGAGAGCATCGGGATCTGGGATGGAAACCAGTTTGTCT -TCACGATTAAGAGTTCGTATACCTGGTGGAATATTGGCCGGCTATTCTGGCGGTATGGAC -TAGCGCCACTACGTACTCAAAATTTGGTGAAGAGTGTTGTTGGCAAATTTCTCCGCCTAT -ACGACGAGCCGTTGTTCCCTTTCAGCTCTCTCACTCAGGCAGCTATTGCGGTTGACTTGA -TCAATGCCACTGCATCTTCTGGCAATGTGTTTCTCCAGGCAAATAGCATCGACTCTCTAT -TTGCGAGGGAGATCATCCAAGCGAGTACTAGAGTGAATTATGGGCAAAACTTACAGTTGA -TCCATGGCTTGGAATCTGTTGTCTGCATGGCTACGGATGGGGCGGTTTCTATTGAGGGTG -GAAACTGGCGCATTTTTGATGGCGCGCTGAAAGCATCTGGAGCAAATATTAAAGTGAATA -CCACGGCCACCGAGATTTTACGCAATGATGATGGAACGGTACGGGTATCCTCGAAGCTAA -ATACCGCTTCCAAGTCTGAAAATGAGGTATTCGATGAAGTTGTCATTGCCGGTCCTTTGC -AGTACTCGGATATCTCCATCTCCCCTCCATTGGAGTATACTCCCGATGAGATTCCGTATT -CCAATCTGCACGTTACGCTCTTTGCATCCCCACACCGCATCTCACCCAAGTATTTTGGCC -TAGATCCAAATGCTCGAGCGCCTGAGACCATTCTGACTACTCTGCCCGAGGGCCTTGATC -TGGGTTCGAACCCAGCTGGAGTTGGGCCTAGCAGCTTCTGGAGTATTAGTACACTCCGAA -CAGTAGATCATGCTATTCTGGAGTCTGAGGATATACAGCAACATTACGTTTACAAAATTT -TCTCGCCAGAGCGGCCCACGGCTGGATTCATTGCCCAAATCCTGGGGCTGGAGAGTGACA -CTGCGGGGAACAATACCACCATTGGAGATCTCTTGAAAGATGATATCAGCTGGTTCCATG -AGAAGCTATGGAATCCATATCCCCTTGAGTATCCGCGAGTGACATTTGAGGAGACCCTCC -TGGCTCCAGGTGTATGGTACACAGGTGGTATAGAGAGCTTCATCTCGACAATGGAGACCA -GTGCGTTGATGGGGCGGAACGTGGCAACTCTTATGTTCCAGTCGTGGTAGAAGCAAGGAC -ATCACATAGTGATACATCTGGAATTTAGGTACTTTGAAGCCTGTTTATTTCTTACCAAAC -CAATCAAACATTATACATGTAAAGTCCCTTTCTTGCTTTCAGCCTTCATTCGTAAACTAT -ACCTTCATCTTACATGAACCACTCAATGTGAATGTCGGACATCCTTTAATCTGCCAGTTT -TCTCCCAGACATTTAATCTCAATGCCCATTAGATGCATAGTTTCTTTCTTTCTTCGCGCA -TCTACTCCTATTTCCTAGTAGACCTCTAGACCTGTTAACTCGATAAAACATGAAAAATAG -TCAAACCCCGTTAGGGTCGACCCCACTTTTTTGTAGACCTCGATAGTTTTCAAGAAATTT -GGGCTAAGAAAGCCGTCTGCTTTGGGCTCGGTGATTTTTGCCGCTCCGCCCCTGAGTGGC -TCAAAAAGCTGCAAGACTCTTGGGATCAGACCTCTGATGTGGAAAATGTAATGGGCTGTA -TGATCCAGCATTCGATGGCTCTCACTATAGCTCAACTTTGCTGTGGCAATGAAATAGTGC -CATTACCATTACCTTCCCAGGACCCGGATTATACAGAGGTAGCAGAAATGATCCTAAAGG -AGAAGGAATTCAAAATTGGTGGACCTCACGGCGCAGGAGGATTCGCGGAAATCGACGAGG -AATCAATCGTGATATCGCCCTTTGCTGCCGCCCCAGTCAAGCAGATCATCGCCGATCTTG -CTCGTCCTATACTCATTATTTTCACCGGCTTTGATGTTTTCAATGGCAACGAGTAAGTTG -ACCTTTATCAATCCTCGAATCTAGCTAACTAGGGCACTTCATAGGAAGTCATTGGCAGAT -GCTGAATCTCCGAGAACAAGACAAATGTGACTTGATTATGATATCTGTAGCCTCCCAAGC -CATTCTGATATTATTGAAATAAGCTGTGCATTGAAAGGATTATATTTGTATCATAGACGC -CAGCAACCACTGGATTCGCAAAGAGCTTGAGCTCACAGAACTTTATATCCAGGGATTGTG -GATCACAATGGCCTTGAAACAAAGTTGAGGGTATTTCCAGCATGTGTCCGATTGGTAGAC -CAGAATAATATGAGGCTGCTCCTTCCGCGCTTCTCAGTCCATGCACAATTGATGCACGCT -TGCTTGATAGGAAGCTTCCTAGAAAGCTCACCAAAGTCATTACAAAGCAATGTTTGGTCG -GGCTCAATAGTTACACCAACGAAGATGCAGAGGTTAATCATTTTCTTGGAGATCCAGCTT -GGATGGTGGAAGTCTCTCGGAACCATTCTGTGATGTTGATTGTAACATTCAAGAGAGAAA -ACGGGCGAAGATACTTCCTGCTTGGGGTGGGTGGTATGGCAGAGAAATACACCAGACAGA -GTACCAGTGTCATGTAAAATACGATTTGTCTCTAATATTGGAAATCGGGGGTCGACAACA -CCACCTTTCAATACTATAAATGAATATGAGGGTACATAATATCCGATTTTTTCAATCCTA -CCCGCATGAACGTCTCCGCTTTAGCTGACACGCCCTGATTACCTAGGTACTATATAGGGC -GCCAAGACGTTTCAAAGCTTCAAGGGTTGAAGTGTGATTTGGTCCCCGTACTTGCTTGTA -CCCAATTCATGCTCGCATAAGCATTTTCTCGGCCTCTTCCAGCTCGCTTTGGTTGCTAAA -CAGCACCCTGAGGAATTCCGCAGCGTCAAGTGTTGATTTGTGATCCGGTAGCGTTTTGTA -CCTAGCTGGCTTTGACTGTAATAGATTTGATTCCTTCCTGTTTGGGTTAGTTTTACTAGT -TTAGGTTTAGCCTCCAAATGCGCCCGACATGGAGACCTTCAGCGGTAACCCTAATTCAAC -TGGGATGTATGTTGTAGCATGTATTCTGACTCCTGGTAAATCGAGTTTAGCTACGGTGCA -GTTTGATGTTGTTATCACAATCCGAAGGAATAGAGTATTGTGTCTGAAAGCTGTTGCATT -AGACTTCTATATAATTGGATAGGAGCTAGTAATCTGCCTCGCTCTCGCTGCCGGGGTATG -CCTTCTTTCTCAACTTCCATAGTGACTTTTCTCGTCGCGCAGCAGCTTCACGCTGGTGGC -GACTGTTATCTTCCACGACAGTCATCCAAGGCACATTGCTCGCCCGGCGGACCCTCTCTC -GCAGCAGTGCAAGCGTTCGACTATGCTCAGCCGGATCGAGGTCATGTGACTGTTCACTAC -CAGAGTCTCCAGGTTTCTTCGACCCAGATGTCGATTGCTGGTAGCGGCGCTCTGCCGCAC -GAGCGGCGTAGTATTCCTCACTGATGGCCCAATTCGGGTTATCAAGCTGACCACGGACCA -GCACAATGCCATGACCTTGCTCGATGGCTCTTTCTAGGTGAAGCGAGGGAATAGGAGTAG -AAAGCTCAAGCTTGCGTGAAGAGGTATCGATCGAACGGACCAAAGCTAGACCAAGACAAT -TCGACGCCTTGGGATCTAATGGGGTGCAACTGCCGGAACCAACAAAGAGGTAAGGAAGGT -CTTCGGATGTTCGTATGATGCTGTCAGCGATGGAAAGCTCAGCTGGATTACCCGCGCCTG -GATTTTCTGGATCCATGTCAATATCCCCATGGTCTTCGCCATTCGACGTCTGTGATATTT -CCATCCCACTCATGATAGCATTCGGCGATTCCACTCCCACGACTCCTACGATAGAACCTT -CAAGCGTTTCGTGCAGTAGATCGGGGTTGATCTGGGATCCCATGACCATCACGCCCAGAA -TACCTTGTCGTGCGCCAGCATAGTCCACAGTAATGGGCCTCGTGCGAGATAGCGGTACGT -CTGACCAAAGCGAAGTGCTGAGATCATTTGGATGGAACATATGGAAGTATGACTGGATTT -GCATCGCTCGCAATTGAGCGCTTGATCGGCTCACGAAATCAGTGGGCTGAGATGGGAGGA -TTGTTAAGGGAACTTGAGCTTCACGGGCAGCCAGACTTAACGGATCCACAACTTCTGAGG -GGCCCTTTTCACTCATATATACCACATCAGAGAGACCGAGAGATCTGACCAGCCAAGTTG -CAACCTCTAATCCTTGCCCAAAGATCCAACCGGGGTAGTTGATAATCAGGGGGCATTGGG -GATGTTCCTCCAGCAACGAGCGGTATTGATCCATAAGATCCATGACGGCCAGTGCATAGT -GATCTGGGTCTTCTTTTGGTGATGCTGCGCCGATATGATGCGCCCGCACTATTTCACCGT -CATGGGACTCATTCAATGATGGGTGAGTGAAAGGCGGACCGAAGAAGGGAGAGCGCAAGC -GTGCAAGATAAATTTGACCCATGGGAGCAAACTCAGGCTGACCTGGGTCTAAATCCAGGA -AAGCAACTCCATCGGTATTAGTATATCCATTCCCGGACTCGGGTGTCGGGCTGAGAACGT -GATTCAACAAGTAGCGACTAAAAGTGGACTTTCCGGATGCTTTGGGGCCACATATTAGAA -CACGAAGTTGTCCTTCACGCTGTGAGAGAGCTTTCATTGATGCACTCCATTTTTTATCAA -GGTGAAGTGGTCGAATATGTCTATTTAAGGAATCATCTGCCGATGTGTACAACTGTCAAG -TCAATTAGCTTTAAACACAACTTGAGCTCGGAGTTGAATGTGATGGCATACCACTGAGAA -AGTCCGCTTCGAGTCTTTTGCGGCTCCTTTCAGGGTGACCTTATCCGCGGCGGTGTTATC -GCCAGCCCAGATCCTTTGATACAGTGGCGAGAGATCACGGAGGCGGGAAATGCCACTGGT -GCAGGACTTGATCTCCACTTCTGCGTCGCCATCAATACCAGCAACGCATTTGATAACGGG -CAAAGAGTGAGTGGAGGGAGCATAGACTCTGTGCAGCTTTGAGGATGGATGTAATTTCGC -GCCCATCAGACTGACAACACCTCTCTTGACCCAGAGGTCATAATGGCCCAACAACGCCAA -GTTCTAAGTCAAGTTAGCATTCACCCCCGAATGACTGGAAAGGCTCCTCCGTACCGTTCG -TTCCTTGATGCGAATGCAAAGTGTGCTCTCATCACTGTAGACGATATTGCTTTTGTTCAA -ACGAGCTTTGGACAAAGGAAAATTCTGGATTTCTGATGGTGTTTCTGCGGGCGATTCATA -GCCATCGGCGTCACCCACAACAGATGCAATATCATCCTCATCCATCTCTTTCGCATCCTG -GGCTTGATCTTTTTCAGCCTGCTCGTCTTTGGCCTGCTTGGGTATATCCTCCACTTCGGG -TTTAGCTTCAGATTTTTGAGGCCGGGTAACTCTTACTGAGCGCTCCGGTGTTTTCTTTGC -GGGCAATGGCTCATCTTCTTTCTTTGCTGTCTTTGTTCGTGTACCACGACGCTCTTCATT -TGTTTCAGTTGTCCCAGGGCCCTCGAGAGACTGCCGTGACTTTTTTGAGGGTGGCTCGCT -CTCAATGATAGTTTGTGGCACATCCTTTGGTACATCCACGCTTTGTGCTTGCTGTTGCCG -AGCTCTGCGCGCTGCCAAAGCGGCGACGGCACTAACAGGGCCTAGACAAACAACAAGGTT -AGCTTCGAATTGGTGTCGCATAGCATGGATAGACTTACCAGCTTGCTGCTGTTTTTCGGC -CTTTCGCTTCATCATGATATTATACAATTCGACTGCACGAGCTTTGAGCTTGTGTTAAAA -AGAAAATCGATCTCGATAATGCGATCCAATGCCGATAAGCAAAAAAAGTTGACTGGTGGT -CCGTGCTCCCGCGGAGCAGAAAAAAAAGATCGTTGGACCATGAGCATCCTCACTCAACGT -CGATCCAATTAATTTTGCGAAATCGGCTGTGCTGAAACTTTGCATATATATTCTGTGCAT -CATTCAATTTTCCAATTTCATTCATCTTCTGCATTCCGGGAGTGCTAAGCGCCGCGCATC -CTTGCGAAAGCATTTTTAGTCTCAACTTCGAGTTTGTCCGACACATTGAACTGCCGCCAT -GGCATCCTCCGACAGCGATAGCAGCTCGAGCAGGTCGACCTCGCCCGAGCTTATGACTCA -AAAAGAGAAAAAAATGACGCAACATCTGAAGGCGCAGGAATCAAGTGAAGATGAAACCTC -GGACTCAGGTTCAGACTCGGATAGCGACTCCGACAGTAACGAAATGAAGCCCAGCTCCAG -CTCAGGCAAGAGGTAAGAGATTATCCTTATATAATTTTTGGTAGTAACTAATAAATGCGC -CCATAGGGTCGTCATTTCCGGTCCTCAGCCATACAAACCACCAGCGGGTTTCAAGTCTGC -GAAGAAGCAAGCCCCGCCATCCTCCAAAGTTTCCTCCCTTCTCTCCAACCTCAACGGCAA -ACAAGTCCTCCACCTCACAGCACCAGCCTCTCTACCTTTGTCCAAGGTCAAAGAGGTGTG -CATGGCCAAGATCATGCAGGGTGAACCGATCATCACCCACGAGGGTGTGAACTACGGTAT -CCCAGTCGAGGCTCTTTCCGAGGCCGATCCCGCTACCAAATCGTTGCTTATATTCGACGA -GAAGACACAAAAATATTCCACAGCAGCCCAGAGTGTCCCAAGTTACCACGTCCAAGAAAT -GATCGGTCTCCCAAGCACATCGAAGAAGACTGACGCTGCAGTGGCAGAATTGCGCAAATA -TGTCAAGCCAGCCCGGCCGCAGCCGAAGAACTTGAAGATGCGCTTCCGACCTGTCGGAAC -AACCACTGCGCCTCCAGAGACTCTTGGGTCTGACTCGGAATCCGAGGCTGAGGTGCCTTC -ATTCAAGGTTCCCAAGGGAGAGGAGCGGAAGAGGAAGCTTGATCATGACGAGGCTGAGGA -TGAAGCTCCTCAAGCGGCGGCGCTTCCTCGGAAGAAGTCAAAGAAGCACTCACAAGAAAA -AGAGGAGGATGATAGTCAGAGTCGGAAAAAGTCCAAGAAGTCCCACAAGGATAAGGAGGA -AAAGAAGCGGAAGAAGTCCGAAAAGGCATGATTTGGCGTTAGTGATTTTTGAGAATAGCT -TTTGTTTAACGACGTCCCCAACACTGTACAATCTACAACTCGATCTTTTACTTGAAAACT -GGTAGCTCTGATAGCCAGCTGCAATGTGAACAATACTAAATGTTTTTGGCTTTTCCATTG -CCTGTATTCTCAACAGTCACAGGACTGAAGAAGTTAATATTAGTCTGATGAGGTATTCAC -TGAATATCGCCCTAACCTGCGGTCTCCTTTAACACAAAGTGTTGGCGCTAATTTGAATTC -GGCATTTAACATTGAATGCCAACTGCCATCCGATCTCCAAATACTTGGCCTGACCATGTC -GACTGTTTTTGGGAATGTATAGCAAAAGAACAACCGAGTCGAAGGATCAAGTCAAAGTTT -GAAAGATCCAAAAATAGTTCAAGAGACACAAAAGAAGGTAGAGCGTACGCATAGACGTTG -TATAGAGATGATCTACAAAATTTGCCTCAGGCACCTAGCTGACATAAGGGCGATGGAGTC -ATAGATCACATGCATGCGGAGAGAGCCCAGGTGATGTTTCTCCCTTTGTGTATTCTAAGA -TTTCTATGAACTCTTGGATAGATTTGCGCTTTCCATTATTTGATCTCCTTATTTGCAATA -TAGATCATGGCGCTTGGATAATTTTGTGGTTTGACAGCTTCATCCATATCTACCTAGGTA -GTGTCCAAGCTTCAAAGCGCTTATCGGCCTAGACCCTGGAATCCCGTGCGTTGTTCACCG -TTTACACTTATCACGATCTGGCACCTGACCGCAGTTTTGATATGACAAAGTCACTTGGTG -TCTAGGTTTTTTGTGTATTCTTATTTGACTGGTGCATACCTGATCGTCATAGTCCTATCA -GCGCCGAGCTGTCACCTGGCTCGAGCTTCTGACTGCCCGACCGTTCTTAGTGATTCACCA -CGCCCTAGCATCGTTTACTCCCATCCTTGACCTACATTTTGCGTCGATTGTATCGTATCG -GTATACCAGAGATAAGGATGAGACCTTTAAGAGATGATGCCAAACGCCGGGCAGACCGGC -AGTTAAGCGCAAGTATGAAACCTACTGCGTCCAACCGTCAATTTTCAGACCGAGTGCCAG -ACAGATTTAAGGATGGGGATGATGCCCAAGTTGATTTCACAGCTCCGCCTCGAGGTATGG -GTTCACGGGATGGCAATGTGCATTATATGCAACAGTCACTCTTCTCCATGATTGCTGCTG -TTGGGTCCAAGTCAGACTTTCATGCCCGGTTTGAGGAATCAAGCGATAGTGACGGAGAGA -TGGAACAACACTCGCGGATGAAGCAAACCAGTGGAAAATCATCATTTCCGACGCCTGTGC -CATCCTTGGACTCCCACCACCTAAGTAAGATATCAGAACCAAGTCCGCTCGTTGAGCAGG -AAAGAGGGCGCCGCCATCAAAGGGCACGATCAGACAATAAACTTTTGCAGAGACTATCCA -TGTTAGATTCACAAGGGGGGGTAAACGAGTCATCAAACAAAAGGGACTCGATCCTCAAGG -CACCTCTAAGGCGAACTCGCAGTGCGACACCGCGAGCAGCCCCAGTCCTAAGTCGCATGG -TTGAAGCACAATCTCACTTTGACCTGACATCGGCAGCTCCACCCTTGCCATCCACCCCAG -ACAAACTCATGGAAGACCAACAGCAATCATCTGCATCCGCTCTTTCTACGCGATTGATGA -AGATGTTTGAGTTTCCGAAGCCTGAAAAGGTATTGGTGGAGTATGCTTGCTCGTTGCTCC -AGAGCATGCTTCTCCAAGGCTACATGTATGTGACCGAAGGTCACATCTGCTTTTATGCCT -ACCTTCCTAAGAAGTCCAACGTGGCGATAAAGTCGGGATACCTATTCAAGCGTGGCCGCA -AGAACCCCACATATAATCGCTATTGGTTTGCTCTGAAGGGTGATGTCCTCTCATACTACG -CAGACCCCTCCAATCTCTATTTTCCTAGCGGACATGTCGATCTTCGATATGGGATCTCTG -CGTCTCTTGCAGAACGGAAAGAGGGTGATACGAAAGACTTTCAGGTAACCACTGATCAGC -GAACTTATCTATTCAGAGCAGACAGTGCTACTAGTGCAAAGGAATGGGTGAAGGCTCTAC -AGAAAGTAATCTTCCGGACACATAACGAAGGTGACAGTGTCAAAGTATCATTCCCAATAG -CGAATGTTATTGACCTGGAGGAGAGCCCAATGGCCGATTTTGCGGAGACATTCAAGATTC -GAGTTCTTGACAGCGGCGAAACATATGCAATTGACGAGGTAGGTCGAAGGATGAAATGTC -ACTATGTTCATTGCTGATCACTTCACAGTACTTCTTCTCGTTCTTTGATTCGGGGCAGGA -CGCATTCAACTATATCAAAGGCCTGGTCAATGCTGCGTCGGCTACCGCCGCAATGAAAGA -GCCGCAAAGCCAACACGACATCAAAGATCAACCTGAATCAAGTTGTGCTGCCCATAAAAG -ACAGTCACTCAGTAGCGTTCGAGTATCTGATCAGAGACAGGAAGGGTCACCACGCAAGCG -GAGCTCCAGCGTAGGAAATGAAAACCAAGGCTCAGCAGATTCATTTGCCGAGCAAGGGAC -TGGATCATCGCCGATTATACAGTCGATGACCGACACCACTGAATCGGCCAGTCAGATACT -GAACAGAAGCGACGTCTTTCAATCACCAACTATGCATACACTGCAAAGGCGCCCATTGGA -TGCGAGAGAAACTATTCGACGACATTCTGACGAGACGACTCCTTCTGCATCTGCGCGGCT -GGACCTGGGTGCTGCGACAGGCTCTCCATCTGGAACACTAGGTCACAATGAGGCCACAGA -AGACACACGATATACCACTGGCCAATCTAATCCAATGGAATCTTCAAGACCGAGCTCTGT -CACTCATCTGAACGAGCTTGTCAGAGCGGGCGCATATCCCCTTCAACGCGCCGCTGGGTT -TGCAGAATACTTGAAGAGCCGCTCAAAACAAATGAGTACACTTTTAGCAACAGAGTCAAT -GGGCTATATAGAAAAGGTCTCGGGTATGTGGATTGGTGGCCAGAAGCATTATGGAGAGCG -AGAAGGGCCATTGCTTGAAGACCAGAATGTTGATCCTGAGGACAATGAAGGTGCATTCAA -TTATGGGGATCGCTTCCGTGCGCACTTTGCTCTTCCGCCCACCGAAAGACTGCAGGCGAC -TTATTACGCCTATTTGCATCGCGTGCTTCCGCTGTACGGCAAGATCTATATAAGTCAGAA -GAAACTTTGCTTCCGCAGTCTGATTCCCGGTACTCGGACCAAAATGATCTTACCGCTCAA -AGACATTGAGAACGTGGAAAAGGAGAAAGGTTTCCGCTTTGGATACCATGGACTTGTTGT -CATCATTCGTGGCCATGAAGAGCTTTTCTTTGAGTTTAATGCTGCAGACTCACGAGATGA -TTGTGCAGTCACATTACATCAGAATCTGGAGTCTATAAAATTCCTGGTGGAATCGGGCTT -GCTAGCTGATGAAGAAAGAGACGAAGTTGAAGCAGCCAAGGCCGAGCACCGCATGCTTCA -GGAAGCAAGACTGGACAGCCCTGAGGAACACGATCCACATCCAACTTCTACTGAGGATTC -ATCAGAAATATATCCGTTCTTTGATGATCCCCGCGCTTCCATTATCAACTTCAAACCTTC -CGAGCCTCTAAGAATCACCTGTCTGACCATCGGCTCCCGGGGCGATGTACAACCGTACAT -TGCGCTTTGCAAAGGATTGCTTGCGGAGGGCCACAAACCGAAGATTGCAACGCACGCCGA -GTTTGAGCCCTGGATCCGAAAACACGGAATCGACTTTGCACCAGTTGATGGCGATCCTGC -GGAGCTTATGCGAATTTGTGTGGAGAACGGCATGTTTACATATTCCTTCCTCAGGGAAGC -TTCGTTGAAGTTCAGGGGATGGATCGATGATCTTTTGTCGTCCGCTTGGGTTGGTTGTCA -GGGTAGTGACCTTTTAATCGAGTCACCGAGCGCCATGGCCGGAATTCACATCGCAGAAGC -TTTAAGGATTCCGTACTTCCGCGGGTTCACCATGCCATGGACAAGAACACGGGCCTATCC -TCATGCTTTTGCAGTGCCAGAAAATCGCATGGGTGGAGCGTATAATTACATCACATACGT -CATGTTTGACAACATCTTCTGGAAAGCCATCGCGGGGCAAGTGAACCGCTGGCGAAATAA -CGAGTTGGGATTGAAGGCCACAACCTTGGACAAGATGCAGCAGAACAAAGTTCCGTTCCT -TTACAATTACTCGCCTTCCGTGGTGGCTCCGCCTCTCGATTACCCGGACTGGATTCGCAT -TACCGGCTACTGGTTTTTGAATGAGGGCACAGATTGGACTCCTCCAATCGAACTATCGAA -TTTCATTGCACAAGCCAGAGCGGATGGCAAGAAGGTTGTGTATATCGGATTCGGATCGAT -TGTGGTCTCCGATCCGTCTGCCCTGACACGGACTGTCATCGAATCAGTTCAGAAAGCAGA -TGTTCGATGCATTCTCTCAAAAGGGTGGTCCGATAGGCTTGGTGACCCAGCTAGCACGAA -GAGTGAGATACCTCTACCACCAGAGATTCATCAAATCCAATCTGCACCCCACGACTGGCT -CTTCTCTCAAATTGATGCTGCAGCCCACCACGGCGGAGCGGGAACCACCGGCGCAAGTCT -TCGAGCCGGCGTCCCCACCATCGTCAAGCCATTCTTCGGAGACCAGTTCTTCTTCGGCTC -CCGGGTGGAGGATCTAGGCGTTGGCATCTGCATGAAGAAACTGAATGTCAGTTCCTTCTC -GCGAGCTCTCTGGGAAACGACACACAGCGAACGTATGATTGTGAAGGCACGGAATCTGGG -CATTCAGATTCGCAATGTGAGTTACTCCCAGTCTATGTAGGCTAGGTGAATGGAGTGCTA -ATTTGTTATTCTCTGTTCAGGAAGACGGCGTGGCCACTGCAATCCAGGCCCTTTATCGTG -ATCTTGAGTACGCTAAGACGCTGGCCCGACAAAAGTCGCTTGCCTCGTCCACCCCATTCT -CTCCTACGCCTACAGCTAAGACCTCCCCAGATGGGGGCGATGACGATCTTGATGATATCG -AAGAATGGACCTTTGTCGGCGAAGAAGCTGGCTTAGACATCTCGAAACGGATGCGTGAGC -GAGCTGCTTCTGATGCTGAAAGGCTAGGAACCAACATGTTCCAATGACTCTGTTCTCCTA -AGTTGAATGTTTAGCATTTTGTTTTCGCTTCTTTCGTTGAGATACAGCAGCGCACTGTCG -AGACCTTTCACCATTTCCAAATCCCCGTTCTGTCTCCAACAAGATAATGTACAGGCGCGG -ACTAGGAGATGTGCTTTGTGAAGGTAGCATCCTCAAGCTTCGAATATTTGGGTGATCGGt -ttctttctttttttttcttttctttttttcgtttttGACGATTTTCTACCTCACGAATTA -TGACATACGTCTCTTATTCTTCGTTTTACATGCATAGGGCATCTGGTTTAGAGCTTTTCT -TCTAATATCCATTTCATTTCAATGTTTCTCTATTGCATTTTATGATTCCTTATTTTTCCC -TTGACATTTCACCTGGAAGATAATAGACCATAAATTTTTATAATATTAAATCCCTCCAAA -TTCAACACATTCAGCCACCAGGGCGCTTATTTTTTATTTTCCTCATCACCCTTGCCTTCA -GCCTTGTCTTTACGTCTTTCTACAACGTCCCAGTTGTACACACGCGCCGTAGCAACCTCC -AATGTCTGCAAAGAACGTTAGCAAAAGTGCCATCCCAACCCCACACCTCTCGCCGAACTA -ACCGTAATCTGCTCACGCAAGAATTCTAGATCCTCCTCGCAATTCTGCAAGCTCAACTGC -GCAGCATTGAGCTTCTCATCCAGCATAGTCTCCGCTTCGCCGAGCGGATACGACAACATG -ACATTCGCCCCAAGCCAGAGGTAGACCTCCTCGGCATCAGCAGGCGAGATAGAAGCACGA -GCGTAAAGGGTATCGTTGAGTTCGAAGTTAGTCTCGAGGTCTGAGTCGGGGTTGGACTAG -AGCAAGTAGAGCGTCAGTGATATTCCTTTCAATTTCCAAGTCGTCTTTTTTCCATGGATA -CGTTCCACGAATGGCGTTTTCCCTTGGCTATTATGGTAGAATCACAGGGAAAATATGTAC -CTCCTTGCGCAGCTGCAAAAACCGCACCATCTCCAGCGTCTTCTTGATATCGGGAATCTT -CTCGCGCAGGCCCTGACCGCGGCGCTGCGTGTTCACTTCCATGAACTGATACTTGGAGAT -CATCTCCTGGAAGGAACGGAGAGTAGGCTCGACTTCTTCCCGAGAGGAGACATAGTCGCT -GACATTGTCGATGAATGGGGCGACGGGGATCCCGCGCGGGTTGGTTTGTGTTGTTACCGT -GCTGGGGTGGAGGGGTTAGTTTTCTGCGACTGCGGACATGGGATTGAGGCTGGAGGGTGG -AAGCTAGAAGGGGGATTGATGCAAATATATGAAAAGGGGATTGAAACTTACCTCTTCTTT -GAGTCCGCCATAGTCGCTAATGTGTGAGAGTGAGAGAAAGAGGAAGATGACGATGTGGGG -AGCTTAACTGCCACTTTTTTGGGGGGTCTCCGCTTTATCTCCCGATGAATTCAATGCATT -TCTACTTATCCTAACTTGTTGAATTGATACAATCATGGTCCTCCTTTACACGACAGAGCG -CATTCTTGCCGCGTTCGAAGCAATTTCAGCTTCGCGCCACGAGGAACTAGGTCTACCTAC -TACACTCGAAGCACAAGGCCCAATAGCCCACGAGCAACTGATCCGACTCGCCAGATACCT -CCAAACCGACACAGAATATAAAGATCAAGCCTCTCACACCCCAACCATTCTCAGCTACCT -CCTCCACGGCACGAAAGTCTACGTTCCTCCGCCGCCAAAGAAGCCAGAGCCCGTACGTGG -TCTATACACAAATACATCCCAACCCGTTCCATCCCTTTCTTTCGACGTGAAACCCCATAA -CGAGCCAGGTCGGATCCTAACATGTATGAACAATCCAGAGCGCCGAATACCTCGCATCAA -AAGCCCGTCTCCTCGCCGCAACAGAACAAGAATCCTACAAACGCCTCCTAAACCCAAACT -ACCAACCAAACGCCGACCACGCGGACCCGCATACCTCGCCGTATACAAGCGACGGCCACG -TAGAAGAAGACACTCTGACAATATCGCTTGTCTCTAGTGTCTTCATTTCGGTGTTGGTGA -CTGGGTTCGCCGTGTACGCCGCGCTCACTCGGTTCCCGACGCCGGAGATCTTGGCGAGCG -AGGCGGCTAAGGTTCTTCTTGGGCTTTTCGCGGCCCTGGGTGTTGCTGTTGCGGAGTCGT -TCTTGTATTGGACCTATATTGAAAAAGTAGATAGGGCGAAGGTTAAAGAGAGGGCGGTTA -GAGAGAGAAAGGTTGTTATTGGAGCTGTTGGGGAGGTTGACAAGGCCGTGGACGAGGGTG -TTGAGATTGGGACGGAGAAGGAGGAGATCTGGGGGAAGGGGGTTAATGGGGGTGTAAGGA -GGAGGGTTAGGGAGAAGTGGGAGAAGGGGAGGGATGGGGAGGATGTGAGTCTTTGATATG -TAATTGTACAGTATTCTCGAGATCAAGGATAATGACTACACTAATGTATTTTTTTGAATG -TGCTAATTTGTTCAAGGGACTTGGATTCGGTTGCTTTTGGTATTCTATCCCTAAAGCTAC -AATCCGGTGTCATTAATCTAACGGGTATGGATGCTGTGCGTTCATTAGTGCTCATCAGAG -GGTTCAATCTCACAGGTCGAGATAGAGGTGTTAAGTGGCAGAAATATAGAAAAGAGACCA -AGGCGCACTGCCGGAATATCAACTAGTCCTAAGCGTGGACAAAAGAAAGAAGAAACAAAA -GAGAGGGAATACCTCAAACACGTTTTTCAAGAAGTAGCTTGCCGTGCGGCGATTGGCTAT -CGAAGCCCTGTTCACGGCTGAAGATACGTTCACCACGCAACCAGGTCTCGCGGACCATGC -CGCGCAGAGTACGACCTTGGTAGGGGGAGCATTTGTTGCGGAAGAGCATGGTGCTCGGCT -CTACAATCCATTCGGCAGCGTCATCGAAAACACAAATATCAGCATCGAAGCCGACAACCA -GGTCACCCTTTTGTGACTGCAAGCCAACTTGGGCAGCAGTATTGGCACAGCAGAGACGAA -CGACGTCCTGGAGGGCGTTCTTGGTGTTCTCGTCTTCGGACGAGGACGTCAGGCCCTTGC -GACGGCTGAGGTCTGTCCACAGGATAGGCAGACCCAATCCAACGGAAGAAATGCCGCCCC -AGGCAGAGAAGAAGTCACCAGTCTTCTCGTCCTTACCGCCAGCGCAGTGGCCGGGAACAT -TGGCCGGGAGCATCTTCAGGTCCGGTGTGCAGGGGGAATGATCGGAGACGACAGTCTTGA -TGACACCGTCGGCGGTGTGCTGCTCTAGCTCTTCCCAAAGACGATCTTGATTCAGCTTTG -AGCGGATAGGCGGGCAGCACTTGTGGCGGGTGTCGCCGTCGCGTACTTCCTCAGCTGCCA -GGGACAGATAGTGGAAGCAGGTCTCGGCTGTGATATTGACACCATTGGCGCGGGCTTCCC -GCAGCAATGGAATAGCTTCCATTGCAGAGAGGTGAACGATGTGCAGGGGGAGATTGGGAG -CAAGATGAGCCAAAGACAGAATACTTTCGATGGCGCAGGTCTCGAATGAGGAGGGCCTCG -AGGCCAGGAAAGTTGCATACGCCTCGAGGGGACCGACAGGAGCGGCAGGTGCATCAGATG -TCTGAACCTCGTCACCCACTGATGCGGTAATGGGAGGCATCATCTCGGCGTGGAACATGA -GAGTTGTTGGTTCATCGGCCAGCTCGGCCATGACCTTCTTAATGTCCTCTCTACCAACCG -CAGGGAACTCGTCAACCTATGGAAGAGAGTTAATACTAATTAGACTCACGATGTCGATAT -CATTCACTTACACCACTGTCAATCAAGAAACCCTTGAAGCCGCGGACACCTTCCTGGACA -AGAGCCTTGAGTTCGCCTGCATTTCCGGGGATGATACCGCCATAGAAACCGACATCAACC -CAGCACTTGCCCTGGGCAGCCTGGATCTTCTCTTTCAACCCATTCACAGTGGTGGTGGGC -GGGATAGCATTGAGAGGCATATCAATGACGGTAGTCACACCACCAAAGGCAGCGGCTTGA -GTGCCCGTATAGAAGCCCTCCCATTCCGTGCGGCCAGGCTCGTTCAGATGAACGTGTGCA -TCAACCAGGCCGGGCAGCAGGACATGGGGCGAGTAGTCGGTATAGGGAGTATCCACAGGG -AAGTTCGATGCTGGAATCACGGAATCGAAGACAGCGGTGATCTTACCGGTTGTCTGTGAG -ATCACTACAGTGGCAGAGGTCAAACGGCCGGAGATAACGGCACGGGATGAGGCCACAACG -GCAATGGAAGAGAGATTAGCCATGCTTGAATGTGCTAAACTGGAGAGTATGAATTGGGAA -TATAAGAAAGGCGCTCAGATAAGAAAGATAAGAGGGGGCAAAAAGTCCAAGATAAGGGAA -TTGGGCGAGATATAAATACGCCAGGGATACAATACCCCCAAAGCCCCAAGGGCAAAAAAA -TTAAGAGTCTGGGCCTTGGGGTCGGATTTCACTTTTCAGATCGAATATCCAGAACTTCTA -CACCACTACCGAGACATTATTATTCCCGGCATTGCAATTGCTACATAGTCAACCGGTTCA -GACAATCACAGCCCCGGGGAAGACTTGGAGACAATCCCTCTCTTCGATATACTAACATGT -TCACAGCGTAGCAGAGTCAAAGCGAATGCCGTCGGGCACCTCCGACATGCGGACTCCAAA -CGCTTATCAACGGGCCCGCTTTATCAGATTACTCGATCGAAACTGTCCTGTCCGCGTTCT -TTTTACTCGATCAAAACTGCTTTCGGGAAAAAAAACTCTTCATGGAAAGGGATATGCTGC -CTCGTGACCATCTTGGCCCTGTTTTCGAGGGGATTTCGACAATTTCGCCATCGCATATGC -CGAGGATGTTAAGGAGAATGAGAATAAGGGAAAAATTATTGAAGAGGATCAAAGTAACAT -TGAAGAAAATAGGTAAGAGAAGAAGTCTCAGGGTTCCCAAACTTCCAGGGATTCTTCTAA -GACTGATGCCTGCTTGGATTTCACCTATCACTGGTCCGTCCTCAGCAGTGTAATCCACCT -AGATGCTTAACACGACGTTCCGATGTATATGCAAATGTAAATATCTCATATATTGATCGT -CATTCTAACTCCAGCCGGTCAATTAACGGCTTCGTGATACAGATGAACGTCGCGTTATGA -CTCGCATATATCATAGCTTTCCTTGCGTTCTAGTGCTCAGCCCTCGCCGCTTTTTGCGTG -AGACAGCACCTGTGGCCGCCCTATGCATTACATAATCATCAAAACCCAGATCACGAGGTC -AGCATCACCTGACTATTATATAAGCAAAGCAACTTGCTTTGCCCACCGAACGCCACTCGA -CGGAACAACGCCAAGCGCCATATACTAACCACCGTCTTGCTCCGAGTATTATGACCTTGA -CGTTAGTCACTAGCCTACTCTAAGGTCCTTTTTTCTATAACCGATAGCATTATATTGCTT -TCAATATCCATAGGTTCCAAAAATACAAAAATTGTTTAGGCCGTCAACCTGTTCCTACTC -AAATGAGTACAATATACAACATGCCCAAGAAGTATATTTTTTTCGAAGGATCCTAAGCTT -AAAGAGCTATATTCGCCAAATTCAGGACCAATCCTTCGTCCCGGATGATGTGCAACAGCA -GACCAACTGAGCTACTGATATATCCTGGTCCTCGACGACCGCAGCAGCGTCATATCCTCT -TTGACTCACTCCTCTCTTCTCATCCCATACCCTAACTAGCAGCTCAGACAAACCACCACA -GCCCACTCGTCATAGGAAAGCGAGGTAAAGCTATTGAGCACCATCAAAATGAAGTGTGAT -GTCACCATCCGCAGACTGGAACTCCCCTGCCCAGCGGCCTCGACACGAGTAACTTTCCTT -ACTACTTTAACTTCACCAAGGATAGTAAGGTCAACTCGTATGTCAGGACAGAGCCACCGA -GGGCAGACCGTATGATACTGATTACCAGTAGACTTTGTCATATCAAGATAGAGCCTGAGT -ATGTCCTTTGCTTCACTACGGATCACTGCGAACTCAATATGTCGTTCTACCACCATCCGT -TCTTTTCTGGCTTGGACATGATCCACCTCAATCAAGAATGGCTTCTTGCAGGATGCGTGG -TCAGCCCTAGTTGGTCCTCAATGTTACTTCAGTTGGCGACTGGTGTCATCATAGATAGAC -ATGTCTAGATTCTATCCCTGTGTATGAGCGCGACAATCTATGGACTATAAGTACACATGA -TATGACCGGAAGATGAGCATGTCATCCCCCGAGTACTCGTCGATTTTGTGAGTGTGGGTG -TTGAGTTGTGACGATTATCAAAATGAACCCAGCGCGAGACAGTCGTCAGACTCCCCTTGG -TGGAGATTTCGATGAGAATGTCTCCAAGACTATGGACTACTGGAAAATTCCAGGTATTGC -AGTAGCAGTCGTCGATGGGGATACCACATGGAAAAAGGTAGATATCTTTCCACAGACGAA -AGACTCCAAGTTAACCAAGTGTAAAAAAAAGGGATACGGAATTGCCGACCAAGCATCGTC -TACCAAGGTGGAATCAAACACTCTGTTCTACGGAGGAAGCACTACCAAAGCATTTACAGC -CGCCATAATGAGTATGCTGGTCCAGGACAATGCCAAATACCCCAAGGTGCAATGGAACAC -GCCTGTCAGTGAGTTGATCCGCGAAGATTTTGTCCTTGACGACAAGTGGACGACGGCAAA -CATGACTATTGAGGATATACTCTCTCATCGGACGGGAATGCCAGGCCATGACTTGTCGCT -TGGTAGTGTGCATCCCGGCAACCAGGCAACTGTTCAAGATGTAGTAAGGAGCCTGCGCTT -CCTTCCATCTAATGCACCCCCACGCACAACCTACCAGTACAATAACTCGATGTATATTGT -GGCATCGCATATTATCCAAAGAGTTATGGATGATGACCTTGGAGCTGTCTTGGAAAGGGA -AATATGGCGCCCACTCAAGATGAGCAGCACGTATTTTCGGCTCGCTGATGTGCTTGAAAA -AGATAAGCCATTGGCAAAAGGCTATGCCTTCACCAACGGTAAATATGAAGAGGTGCCTTG -GAAGAACAAGCCTGAAATTTCCGGTGCTGGTGCGATTATCAGTAGTGTGGAAGACTATGC -GAAATGGGTTCACGCGCTACTGAATCAGACCGGAACGAATCTTTCCTCTGAGAGCTGGAA -AGCAATCTGGACCGCAAGAACGCTCATTCCAAACAGCGAGCCGTTTTTAGCACCGATGGC -ATATGCCCTTGCATGGAACCGTTACGTTTACCATGGAGTGGAGATCATCACCCATGATGG -GGGGATTGACGGATTTGGAGCTGAAATTGTAATGATTCCATCGCTCAAATACGGAGTCGT -CACCATGGCAAACTCGACCTATTCGTCGAATTTTGGGGGGACTTGCCTGGCGTATGACCT -TATCGATTCAAAACTGGACATCGCAACGGAAGCAAGGTTTGGATGGAAGGAACAGTATGT -CCCTTTTCTAGCTCTATATATGATTTCCAACATTCAAAGAACCAGATGACTGACTAACAG -GGGAGCAGCTATGTGAAAGTGCTCCAGCAAATGGAAGAGTTCAACAAGAAGGCGGTAGAA -CATTTCTACCCCAATCTTCCATCTCCTCCTCTACCGGGGCCGACATTCCCTCTTGAAGCT -TACACCGGCACTTACTGGCATGATGCCTACGGCCAGCTCGCTCTATTCCTCGACACTGAC -AAAAAGCTCCACGCGAACCGAACAAGTCCTACAACTGCCTGCTCCCTTACCTTCGAGCAC -GTCATTGGAGACTACTTCATTGCCACGATGCATGTAGTTGGAGCCGAGACAGTCATTCCA -GCCGAGTTTTCGCCTGGACCAGAAGGCAAGCCAAGATCTGTCGGTATAGGGTGGGAACCA -AGACTTGGGAACAACAAAATTTGGATGCGAAAGGTCGATGATGGAGTTGAGCCGGTTCTG -ACCAGTTTGCAAGGAGCCCAGCCCCTGTCCTACCAGGCTTACCAGGCCCCCCAGCTGCCT -GAGTTTTTGGCCAGTCAAATTTTCATGTAGGGCTGGAGTTATTATAGCTTATTCATCTAA -ATTTATCTTTTGATCTCAGGCTGGCGGATTTGGAAGAGCTTAAAAAAAAAGCCATATCCT -CAATGGAAGGATCTCGATCAAGCAGAAACAGTCATATGACTCTAGCTAGCAATCAAGCTT -ATACAGAACGGGCAACAAGGCACAAGGTCATATAGTAAGACGTAATCGAAACAACAGGGA -AACCACTCCGTTATGGCTAGACGACCATCCTGACTACAGAGATATATGGCCCTTTGTCAT -ATACACCAGGGCGGCAGCATCTAGAGCGGTGTAAATCCGCTGCAATATCTCCTCAGCCTC -CTCCAGGCCCACGGAAAACCGGATGATAGTCTCATTTGTTCCATTGGTAGCCGCCTGCTT -CTTACCTGTCTGCCCCGTCATCTGCATATAAGGCGACGCGATGCAGACATCCGCACCAAA -TGAGAGACCCTTGTAGACGTACAAATGGTCAAAGAAAACAGAAGAGCTTGCGACGTCAGC -GAACTCCATCGTGAAGACACCCCCATACCCCGGGCGAAGCCCTTCAGGCCAAGGAGGACG -CATTTGACGCTCGTAATACGGACGGGATGTGCACACGGTCGGGTAGAAAATGCGGGTCAG -CGGGCTTGAGGGATCCGAGACGAGAAGCAAAAGCTGGGTGACAAGGTACTCTGTGGTTTC -GTTGATTCGAGCCATGCGAGCCAGAAAGTCCCGGCTATTGAATTCCAGTCGAATCGCATC -TTCGACAAAGAGACTGTTCTCGTACGTGGCGGACATCTGGTTCCGCAGAGCGGCGTAGTG -CGTCGAATTGGGATTCAGAATGGCACTGTATGCGTGGAGAGGAGAGCATCAGTCACTGTG -TCTGTCTGTTCGCGCCATGGTTCTTGCCTTACCTTCCTGCCATTAGGTCGGCATATCCAC -TGAAGTATTTAGATAGGGAGGATACGACAATATCCGCCACTCCCAACAAATCGACGTTGG -CGAAGCTGGCGACGCTATCGTCCACCACGACAAGAATGTTATGTTGATCCGCCAGCCGAC -GTAACTGCTGCAAGTCGACCGTCTGCAGTATTGGGTTACTGGGACATTCACACCAGATTG -CTTGGACCGCGCGAGCATCTTCGGATCCCAGCTCCAGCTGGGCAGCGAGGTGATTCAAGT -CAGCCTCCGTTCCTAGGTTGTAGGATCTTAACCCCGGTCCTTGTGTTTGCAGAACCCGAA -GGGTCGGCTCATACAGGAGGCCAGTGTTCACGGACTCCCCGGCCCGCCAGTCTAGAATGG -ATTGATGTACGTGGTAGATGGCGGCCATTCCGGAGGCATATAAGAAGACGTCGGAGGCAG -CGACCTGCTTATTGATCTGAGGTTCAGCTGCCCGGTGCTGCATATATCCAGCGATTCGAG -AACGGAGTTCCTCGTACACAGGATGAAAGTCGGGAGGGGTGCTCCTTGTGGTGGTGGTGG -TAGGAGCAGTCAATTCATGGAGCCGAGGTCTAGAGCTGTACTGCAGACATTGCTGCGCCA -ATCGCGATGATATGCCTGGCCCAGCGCGTTGCCAAAACAAATGAGCATCCTGAGACGCAG -CACTGGGGTAAAGGACGGCATAAAGGGTTGGCCCAGACGAGCTAGTGTGGTGGTCACGAC -ACACTGAGCTGCTGAAGTCCACCCAGTATACCCGAATTGAATCGGCGAGGATGGGCTGAA -CCTTTGTCTGTGAAAGCAGAATAATATATTGCTTGCAGGAAAGCGCCGCCCGGGCACCGG -AGAACAGTAGAAGTGAAGAATGAGTGCAATCTACCTCGTGGACCTTTGCCAGGATCGTGG -AACCGAGCTTGAATCATATAATACCATCATCAGCCCGGGAGGCCCAGCTAAACGTGATCA -GAGAAAGCTAAGGGCCTTACCAGACGGATATCGGGGTGCACAACGGACCTTGGATAGCCG -TTGTTCATCTGAGCTTTTCCTTGCCCAAAATCCACCATGTCAGCCCATGTCGGGATTTGA -ACCAACAGACTGTTTTCACGACCAGGATACGGCTCTCCCAGTCGGAGTGTCTGCGGTACT -GTCATCTGGGGCGAGACTGTGATAGTATGTTCAATCCAGTTGACGGCAAACAAAAGGTGG -ACTGCCGGTTGCCAGGAACGCCAACCAGCTTCATTAGAGATTCTCTTTCATAAGACAGGC -TAACAGTTACCGTGACTGGGGCTGGTTGTCCGTCGCATGCCGGATGATCTCGCCCTTACT -GCCACATATCTGATCCAGATATACAGTACAGTGGGACATACACGGATATAACGAATATTT -GACTTTCAAGAATACCAGGGCATTTAATCCATAATACTAACGAATCTCCTTGCGGATTTG -GGTAACTTATGAAGATGCATTGACGATCCCGTTATTATGCACGCACGATACATTGCAGCA -TTCAATATCACATTGTCAGATATGTGGGCTAGCTGAACCACAACTAATCTGATCTCATTT -CGCCATTTACTCGCTCCAAGGCTGGGTGCGTGTCATATCTAATTCCTTCTATCTAAATTC -AGCTGCGCCTTGTTATCCTTTCTTGGTACGGTGGTTGACTTGTATTGGTCAGGTTGCCCT -CCAAGAATATTACATTGCTCCAGCAAGCACATCCTCTCGACGTGAACATAAGTTCTAAAT -GGGCCTTACTGTAGCAGTATGTCAGGAATGTATCCACAGTGGAATAACCCCCAGATTCTG -GGAGACGGTTACACTGCGCCGAATCATTGTAGAAATACTCTGTAATGGACAGCTTAGGAA -GCATTTCAGGGCGCTTGATGACATCCACGATATACAATTTACTGTTCAATTCAAGACTAC -TACTGTAAGACCTCCAGGCATTCAACGTAAAGGTCCACTTTCAATTTGACAGCCTCGCAT -CAGGCTAGCTCAGACACACACATCACATGTAGGAGTACTTTCATGCTCTTCGGGTTATTT -CACGTCCTTCTCTTCTGTCGTAGGCTGCGCAGGGGAATTGCTGTCGCGCGGCCCCTCCTC -TTCGGCGGCTTTCTTACCAGGGACTCGCTTCCACTGCATAGAAGCCGATCCAAGAAGCGA -CAGAGCGCACATGGCGACGCCGACGTAAAATGTCTGCGTAACAGCCTTGCTATACGCCTC -AAGAACTGGCTGCAAAAGCTCCGACGAAACACGATCCCGGAGCAGCGTCGCTCCTTCTGC -AACAAGAGAAACATCCATGGAGGGGGCGCTGCTACGCAAATTGCCCACCAGCCGGTTATG -GAAAACAGTCTGCCCAATAGTCAGAGCGATGGAGCTGGACAGCATTTGCAGAAAGCAGAG -CGTCGCAATGGCCATGATTACATCGTCTGGAGCGCAAACCACGGAAGGAACAACCATTGC -ATTCTGGGCTCCAAGTCCAATGCCAAAACTCAGCACCACTTGGAACCCTAGGATGTTCGC -GAGGCTGGAGGACGGATGCAGCGAGCTCAGTAGACTGGCTCCTGCTACGGCCATCAGCGA -GGAGATGATCAAAAAGGGCGTATAGTACCTGAAGACGGTGACCAGGATTCCCCCAGTGAG -AGAGGCAATCACCATCCCTAACTGGGTGGGTAGAATTGCGAGGCCAGACTTGGAGGCAGA -ATACCCCTGGATGGCTTGCAACCAAATTGGCAGCTATGCGAAGATGACCCAGTCGTTAAT -CCGGGCTACAGAGATTCCAGCTAGAGAAAAGAGGAAAGGGATGAAGGGTGAGAACATACA -TAGTACACAAAGACAAACAACCCTCCACTGTTTCCAACAATGTAAGCCATGAGACCCATC -ATATTCCGATCCGAAACAATGCGAGGGGAGATGGTTCCTTTATCTCCGGCGCGAATCTGA -ACGCCAGCAAAGACCATCAGTAACATGACCGACAGGATGAAGAGCATGATGACTCTCCAA -TTGTCCCAGGAGTACTCCGAGCCTCCCCATTCCAGGGCCAGTAGAAGCGTGAGGATTCCA -GGAATAAAGAATGCCGAGCCCAAAAGATCCATAGATCGGACCTTTTCTTTCCAGCTCAGA -TGTCCTGATGGGGGTTTATTAGTAGATAAGAAGAAGATGATTCCCAGGATGGTGATTGCT -CCTATAGGCAGATTGAGATAGAAACACCATCGCCATGAGGCATGGTCCGTCAGTAAACCG -CCAATCCTAGTTTGTCACACGGAGTTAGTTCCAGGTCTAGTCCTTGAGAAGAGTAAAGCT -CACAGGGGTCCCGATACGCTGACTATCCCGTGAACACACCCCAGACACCCCAGGTAAATG -GGCCGCTTTCGCGCAGGAATAGTGTTGGTCACGATCACAATAGCACCGGCGTTGACGCTG -CCTGCACCCAATCCTGAAATGCTCCGGCCCGCAATCAGGGCGCCCGAGGTGGGAGAAACA -CCGCAGATGAGAGATCCTATCTCGAATATAGCTACCCCTGTAAGAAACACCCACTTGGCA -GGGTACATCGTGTACAGCTTCCCCCAAGCCACCTGAGTCGCTGTGAACACAAAAAGATAC -GAACTTCCGTACCACCCTACATCGTTCAGAGAGTGAAACTGGTTTGTGATGCGAGGGATG -GCTGTAACGAGCACCGTTTCATCCTGTAAGATAAGTGACAATATGCATATATCAGCATGG -TGAGTCTTTGATTTAGGGAGCATTTGCACCGAGAAACAAGACTGCTACTTACCAAGGAAA -CGCAGAAAAGAGTGAGGCACAGACTGATCGTCACCAATAACGACTTTCGGAACGATGGAT -AAATGGATTCGTCTTCCATGACAGAGCGTAGTTGCTATTTTCTGGCCGATTGCAAGAATT -TAGGGGATGGAATGGCGAGCTTTCGCCCAGGCAATGCCTATCACTTGCTCAAGCTCGTGC -AGGTCCAGTGTGAGAAGATGGGGCTGGGCATGCAGCAATAATCTGTCCCAGGCAAGGAGA -AATCAAGACAAAATGGCAACTCAGGTAAGCTTAGGGTTTGATATGCCCGGTCTTCCCGGG -AGAGAGAGAGTAGGCATTTGACTGGGAATTCGAGATTTTGGGACCTGTTCACTGATAGAT -TATACAGAGTTCTCTGTTCATACATACTTATGCAGCAAGCCTCAGTGGAGACGGATCAGA -TATTCCAGCGAAGGAGCGCAAGGGATGTGCGGGAGCTGCCAGCGGAACGTGCGTCAAATA -TGACCGGAGTACTCCGTAACTATGGAGTAGTTAGATGCATCATGATCTACAATAAATAAT -GCCAACCCTGTCGACTACTTCTATGAAAGTCTCAGATATGAGACAGAGACGGAGACGAAT -TCGGGGTATTCTTCGAGATAGAAGAATACTCTTCGACACAAGTCTGGGTTTTAAAGAAGC -CCTGTAAAGCACGCGCAGACTGGTCTCGAATAAAGCATCGTAACAGAACTAAGAATCAGT -GCGTTAGAGAGGAGACTGCTAACATTCCACGGAGCAACATCCTCAGGCTCTGTTCAATCA -GAATCAGCATTGAAGTTGGGACGTACTTGTCGATGGAATGTAAAGTGGACGAGACTATCC -ATTTGTTTGCTTACAGACTGGATCTTTTGACGAATTTCTCCTAAATAGCAGTGTAAAAAT -GCCCTGTTTCTCTACAGATTTGCTGACTCGGCGACGGTGAAGGGTCAATCAACCGTGGTG -TAAAGCATCACAATTAGTTTGCTGGCCAAGCTATAGGCCTATCAATTCTCCTGTGACTAT -GCGCTGGCTTTTTTACCTCAATTTTCATTACTGAAGGAGCTGAATTCATAGAACTGAAAC -TAAATTAGGTCAAAATAAGAAAGAGAAGAACAAAAATAAACAACAACAATACTATATCCC -GAAAACAGTAATTTCACACGGTCTCAATTTGCATGCACAAAGCCTTCAGCATCGTGTCCA -GGGTTTCCTGGCTGTGTACCTTGGAGCTCGCCGACACCGATATCTCCCAGAACTCTCGCA -CCGGCTTCGCCAGGATGAAGATGAAATTGCTGTCCACCGACCGATTTTTCTGCTCGTATG -GTGCCAGTGTACTGCTCATCCCTTGAATTTCAGTGCCCTTGTCGTGCTCTAGGTTGTCGA -GGTTCTGGAACTGAATAACTAGTTCGGGAGCTTGCGAGTCCACGGGCCAATTGGTGCTAT -GAACAGCAATCTCGCTGAACTGGCTAGTCTCGAAAGCTAAGCCCTGAATATGCTGGCTGT -GGACCGTGGCGAGTTGTTCCTCCATTGACTGGCCTGGTTTAATGCGCAATCGGATGGGGA -TGGTGTTGACACATGGCCCGGCAACGTTTTGGAGTCCAATTGGCAAAGAAGAACGACCAG -AGACAATGTTTCCAAAGACTACATCGCTGAGTCCCGTCATGTCTGCCAGCATGCGGGCAC -AGAGGGTGGTGAAAACCGTTGCTGGCGTGAATCCCGGTCGATCCCGCACGGCAAGCACGG -CGGTCTTGGATTGAACTAATCGGCCAAAGTGCTGATTTGGCCGACAGCTTTCATCGACAA -CAGACATCTGTCTGGTCCGAGTGACAGATGAGCCATGCAAGAGCGTGCGCCAGTAGGGAT -AGGCAGCTTGTCGCTGCTTCTGCACATGCCGGATGTAGCCCGCAAACTTCGGAGCCGGGG -GCAGCGCATCCTGGCCCCGATAGAAAGTTGCAAAGAGAGAGAAGATGACAGGCAAACTGA -AGCCATCGTACTGGGCGTGAGACAGCCGGATGGTGAGTCGGGCTCGGCTGTCTGCGGCGG -AGACGATGAAGAAACGAGTGAAGGAGCGGCCTAGGCTCAACGGGTTATGTAGATCGCAGT -TGTAGGCTTCTTCGCATGCTGTTGCTAGATCCCCCTCTGTGTGAAGCATGGTAATTTCTG -GCTCCACTTTGTTGAGGACGACTTGCAATTGACGGCCCTGCTCAACAATGAACACAGTAC -GAAGGATGTCGAGATGATGCCACAAGTCAAGACAGCTGCGACGGGCGCGTGGAATGTCCA -CCCCCCGAGGAAGGTCCATGTATATGTACTCGATCTCCCTCAGATGTGTATTTACTGCAG -TAGATACGTAGCTGTTCTGAAGGTCCGTGGTGGGATATACGTCGGCGATATGGTGCTGGG -GACTTTCCAGGAGCGGACTGATGCTGGCGGCCAAGTAGTCTGGCAGCAGAAGGCCCTTGG -CTGAGCGAGGGGGTAGAAGGGAGAGGGGATAAGGGTCTTGGTAAGTGGCCTGGTCTTCAC -GAAGGAGGAGAGCCAGATCACAGAGACGGGGCTTGTGGAAGACGTCGGTCGCGGTCACGG -CAAGACCGTCCTCGCTCGCGCGCTGACACAGGCGGATCACCGCAATTGAATCACCGCCAA -TGCGGAAAAAACTGTCATCGGCAGCGATGGTACTAGCTTCGAGCTCCAAAACCGCTGCCC -ATAGCCGCTGAAGCCGCTGCTCCATTTTAGTAGTAGGGATCTGCCGTTGGCGGCTGGAGG -GCTGCATTTCAGCCAGTTCGCTTAGTGTGTATGATGCCCCAGTATCAGTTAGACGTTCCC -GATCAATTTTACCAGTCACGGTCATTGGGATTTCAGCGACGGGGATGTATGCATTGGGGA -TCATGTGGTGGGGGAGCTTTTCTGAGAGATACCAATCCAGCCCACTAGTATAGGTCGCCA -GCGCCTCTCGAATGCGGTCAACTGGGCCTGTGACCAACTGTCCTAGACCAAGATAGACGA -CCAACATTCTGCTGCTGCCATCACGCGGAGTGATGACCCCAGCCACGACAGTGAGATTGC -CTGTAGTCGGCGAGATACACGCGATGCCCTCCTGGACATGGCGCTCAACTTCGCCTAGCT -CTACGCGCTGACCGCGGATTTTGACCTGCGAGTCTTTGTGTCCAATGTAGACGAGCCCGC -CATCGTGTCTATAGCGGACTAGGTCGCCGGTGCGGTACAGCCGGCCGCCACGGCCTAGGT -GTCCGGCATATCCGGAGCTCAGCCAGCGGGGATTCTCCACAAACCTGTCGGCCGTTTGCT -CAGGGAGCCCCAGGTACCCAAGTCCGATGAGGGGCCCTTCCAGCCAGAGCTCTCCGATAC -CACCAACTGGGACGAGTTGGTCGGCGTCGTCGGGGTCCACAATCCATGTGTTACAAGCAG -AGCCCACGCCGAGATTCGGTTCATATATGGTGTCCACTGGGATTTGATTCACCGTGACGC -ACCCAGCACACTCCGATGGTCCATAGATATTGATGATCTCGTTGGTGTGGGCAGCCCACC -GTATGATATCAGCCTGCGACGGGGGTTCCCCAACGAAGGCAAGGGTATCTAGGTGCGGCA -CAGCAGCAGGGCTGAGCAGACGAGCAACACTAGGCGTGAGAGTAGCATAGTTGACCTTTA -ATTGGGACATGGCCATCTCAATCCCTTCCCGTCGCTGAGGCTCGGACGGGATACAGAAAC -AACCACCAGCAACCAAAGTGCGGAGTAGGTTGGACCAGGCCACGTCCGACGCATAGGACA -CGAAGTCGAAGACACGGACGTGAGGACCAAATGACAGGACCTCCTCCTGCTGGATCAGGG -CACTGCTGACATTCGCATGCGTGACAATTGCTCCCTCAGGGATCCTGGTGCTGCCAGATG -TGAAAGTCACAAATAATCGGCGAGAGGGATCAACTGTAGGAAGAGTCACACCCTGGCTGA -TAATTCCCAGCCCAGAGAGCAATCTCTGACCAACAGGAACAATTTGGCAGGCACTAAGCC -AGCCAATCTTTTCGTCGGTTGCTTCCGAGCAGAGTGCTATCACGGGCTGCGTTTGCTTTG -TGATTGAAGGCCATCGCTCTTCTGGCTGGCTGGCGTCCATGAATATTGAACAACCCCCAG -CCTTCATCACAGCAAGCATGGCCACGGGCATCCACATGGATTTCCCAAAGCAGAGAGGGA -CCACCGTGTCCGAACATACCCCAAGAGATAGCAAGTGATGGGCTAGGTATGTTGATAGGT -CATCTAGCTGTCCGTAAGACAACTCGCCATCCCATGCACAGATTGCGTTGGCTCCGGGCT -GCTCCTTTACTTTATCAGCAAGCAGGCTGTGCACAGGAACGTTGAGTGTAGGCGGGACAG -AGTTGTTCCAGTTCCAGATAGTCTGCTGGTCATGGCCTGTTGACAGATTCACCTCGTGCG -CCTTCTTAAAGGGAGTGTGAATGATTTCCCGGACCGCTTGCAGCAGAGATTGCACGATGC -TGTCTGCCTGCTGCTCTAATAAGAGGGACTTGTAATAGTTGAACGTGATTGCTGTTTCCT -GTTCTCCCAGACCAATGTTCATCATGATATCGTACTAAAAATGGCCATGTTAGGGGACAA -GCGTCCATAAATCCATAGTAAGGGTTGGAAATACGTACTTCAGTTGGGTCGTCACCGTCT -TGCTCTTCAAGACAAAGATCACTTTCTTTACCCTGGCTCCTCTGCTTCAGATCCATACCC -TGGACCGAGATGACGCTGTTGAATAGAGGCTCGGCCAGCCCGTCGGAAGAGTGAGCTATC -TGCGCCAGAGAGCAATGCTGGTGATTGAGGCTGTTCAAATAGTCTGCTTGATTTTGTTTC -ATTACATCCATTAGAGTGACACGGCCTCCTAGCCTGACTCGGCTCACTAACATGTTAATG -AATGGCCCGATGATCTTATCTGCGTTGTCAATAGGAACATCACGGCCGGATAGCAAATAG -CCAAAACACACTGTCTCGGAGCCAACAAAGCACCGGAGAGTAAGGGCCCATGCGAGATGG -AAGACGTTAGAGGCAGTGAGCTCGTTTGTTTTGAGAAAGGCTTGAAGGGTGGCGTTCGTT -TCTTGGTCTAGCATGTGATGTGTAGAGCCTCTTGGATCCTTATCCTCTGCATTCCGGCTG -TTCGATGCTAGAGTCGGGAAAATGCAGGAATGCACACCTTCGAGATATTGCCGCCAGTAG -TCTGTCCCCACTACTGCGGGTATTTCTCGAATGTGGCGGATGTAGTCACTGTAAGAAGTG -CTCAATATATCCGGGAGGGGTTGGTCGTATGCAGCCTGAAGTTCTCGCTTGAGTAGAGCG -GTCGAAGCGGCATCCGTGATTGCATGATTCATTTCGAGCTCACAGAAGACCTCCCCGGAG -GGTCCTTGGCTGAGGACTAAGGAGTGAAGGTGTTGGTCTTTCCGTTGCACTCGCTCCCAG -TGGTTGTCAAGAGCCGTGATGGGATCTATAGCCTCCCGACCTGATATCACATGTATCTCG -GGTTCGCTGGCTTTCAGGACAAGCTGATCTTTTAGACTGTTGGGAGAAAGGCCATCGATG -AATCTGGTACGAAGCATCGGATGTCGCGCAACAACCCTCCTCCAGGCCCGGGCCAGTCGG -TCAGGATCTAGAGGCAATGTCGACTTAGAGGACAGGGCCCGCCACCGAATGCGAGTCCAA -TACAGCCCAGGGCGCTTTGCTTGGCTCAACAACATACCCTGCTGAATAGGCGAACAAGGG -TATATATCCTCAACCTGGCCATAAGAGAGCCCAAGCTGCGGAAGGCTAGATCTAAGAAGA -ATCTGTAAGGCTGGCTCTGTCAGGCGAAGAAGTGGGAAATCACAGCGTGTATAGCTTGGC -TGGCGCTGCATTAGCACTTCGCAAGACTTTTGTAAAGCATGCTTGCACTGGTTTACCCAT -TCCTCAATGCTGTCCTGGTGCTTCGTATGACGATTAAACAAGAAGCTGAACTCCAGGCAG -CCCTGGGTCACCGATGCGGAAACCTCGATCAAGGCAAAGCGATGTATCATCTTGTCAACA -TCCGAGATACTCCCTGGGGGCAAGATAGCAGGCTGAAGTAGGGAGCTATCCTTTTCAAAT -TGCTGGTAGAGGCCCAAGTAGTTGAATGTAATTTCAGGGAGACCGTGGGCCTTAAATGCC -TTCTTCCCAGCAGGGTTTAGGTAACGAGATGCAAAGTATGGCCAGCCATTTCCTGGCACT -TGACGGCGACTATCTTTCGTATGACGAATAATGTCGGCTAAAGTATCATCTAGCCCCATC -GAGGTCACAGTTGGAAGTAGAGTGGTAAACCAGCCGACCGTTCGTGACAGGTCGATTGCG -GAATCCCATGGCTCGCGCCCGTGCCCTTCGCTGAACACGGTAGGGGGAGCCCTATCCTGG -AAAACTTGGGTGAATGAATAAAGTAAAGCACCCTGTAAGACCTCCACGGGCTGGGTATCA -AAGGCCTTATTGGCGGGCCCTAAGAGAATGTTTGTGACATCTTTGCTGACCGTGAAGCTG -CGCTGAATCGCGTTGCTAAATATGTTAGTTTCTAGATCAACTCCCCAGTAGTCGCGCTGT -GGTGCAGGAATCTCGACGGGTAGGGCCATTTCGGGGGTGAGATGATCTCGAGCATAATCG -GACTGGAGTTGACACCAGGCCTGGAAGGGCATCGGCGTAAAGGAGGAGGACGCGTCGGCT -GTCAAGTGGTTCTCTATGTCTGCGAGGATGACTCTCCAAGACACCAGATCCACTGCCAAA -TGGTGCGCCACTAGATACAGATATTGGTCCTTGCTGCTGCGGATCTGGAGCAGATCCGCA -GAGAAGAGCGGTCCCGTGTCTAGATTAAGGGAGACTTGGCTCGCATCCAACACCGGCGAG -GCCTCTGCGAGAGAAGACTCGACATATTCACAGTACCGGTAACAACCTGGGGCGTAAGGC -TTGACCTGTTGCATCCAGATACCGCTCTGGTCAAGTGCAATGCGTGCCCTCAACATGGGA -TGTTGAGTGACAACCAAGTCGAGGGCACGCCGCACATCCTCTGACCGGACCGGACGTGTG -ATGCGTAGAAAGAAGCTCTGGTTGAAATGATTGCGCTCGTGGCGTACCACATCGATGAAC -ATTTGCTGAACAGGAGCAAGGTTAAATGGCTCGCCTTCTCGGGTCTCCCAGGTGTTCGTC -ATGCGTCCCTTCACAGTTGCTGCACTTGGTGTCAGTCGGGCAATTGTTTTCAGTTTAAAT -ATATCAGGTACTGTGACGCGGAAGCCTGCTGAGCGCAGCTTCGCCGATAGCTGCATGGCT -GAGATCGAGTCTCCCCCGAGGGCAAAGAAACTATCGTGGACCCCGATACATTCTTGACTG -ATGTTGAGGACCTCTGCCCAGAGCGTTTGGAGGAGCCTCTCGACTTCTGTCTGAGGAGCC -TGCTGCTCACCTCCCTGGGGGGGTTGGATTTGAGCCAGTTGTTCCATCGTAAGGGATGAC -CCTAATTTACGCAGTCGGTTTCGGTCGATCTTGCCCGTCGCGGTCATCGGAATATCTGTA -ACGGGGATGTACATACTTGGGACCATGTACTGGGGGATTCGAGCGGAGAGGGAGGTTGTG -AGGTCCTCGGTGAAGGGTCGTAGCGCTGCATGGATGCTCTTTAAAGGAGAATGAATAGAC -TTTCCAACGGCAAGATAGGCGACGAGCATGGCATTGCTGCTACCCTGGGGCTTGATAACG -TCACTGACGACTCCAACCTTTTCAGACAGGCTAGCCTGGAGATTATGCTCCACTTCTTGC -AACTCGACACGCTGCCCGCGGACTTTGACTTGATTATCCTTGCGCCCTACAAAGAGGATA -CTGTTATCGGGACCGTAGCGCACGATATCTCCAGTGCGATACAATCGGCCTCGACGACCA -GGAACGCCGGGGGCCCCGCGAACGAGCCATTTTGGGTCGTTCACAAAGCTGGCGGCAGTC -TGTTCCGGCTGGTCAAGATAGCAGAGGCCAACTAAGGGCCCTTCTAACCACAGCTCTCCC -ACAGTGCCCACCGGGGCCAGGCTCGTCTCATCCGATATGTTTACTATCCAGGTGTTCAAA -CCAAGTCCACGCCCAATCTTGCCCAGTTCCCGATCTCCACGATGTTCCGTGATCGTGTCC -ATGACGGCGGCAACAGTACACTCGGCAGGACCATACGCAACACGAAGGTCAACCCGGGGT -GCCCAGGTAGTGATGTCATGCTGGGACATAGCTTCCCCGCCCATGATCAGGGTCTTGAGA -GTCTTACATTGATCTGGGTCTACGGTACGGGCCATCGACGGGGTGAATTGAGCATGAGTC -ACTTTCATCGCCTCCATGGCCTTGACAGGGTTGTCTCGCCGCATGGATTCAGATGGAATA -CAAAGGCATGCTCCAGCAGCAAGCGTATGTAGTAGATTAGACCAGGCAACGTCGAAGGCG -TACGAGACAAAGTCGAAGACGCGCGAATCAGGATCTAGTGCGAGAAGGCTCGTCTGGTGC -ACAATAGCACTAGCAAAGTTGGCATGGCTAATGATTGCTCCCTTAGGCGTGCCGGTACTA -CCCGATGTGAAGACGGCATAGAGATAGCTCGATGGGATGACTGTCGGCAAGCAGAGCGAC -CTAGGATGCGGAAGCTGCGAAAGGAAGGCCTGGTCAATTGCCAGCACGGGCTGATTGTTC -GCCAATTTCTGGGCAGCCTTGAAATTTGCACTGGAAGCCAGGAGCATATACGGCTGTACC -TGGTGCACAATCGCCTGTAGACGCTTGTCCGGCTGGGTCATGTCAATGGCCACGGATGCA -CCACCCGCCTTCATCACTGCCAGAGCAGCTACCGGCATCCACATGGACTTCTCAAAACAC -AGAGGTATGAGTGCAGTTGGCCGGACTCCTAGATCCACAATATACCGCGCGAGCCGGGTA -GACAGGAGGTCCAGCTCTCGATATGTGAGCGCACCATCCCATGCATAGATTGCAAGTGAA -TCAGGCCTGTCGCCTGCTTGACTTCCAATAATATCATGAACACAGGCATTCACGGTCTCT -GGAACGGTTTGATTCTGAATCCAGAGACTCCGGAGGTCTTGCTGGCTGATCACATTGACC -ACATGGGCCGTTTGATCTGGGACACGGATGACATCAGCGACAACCTGAAGGAAAAGCTCT -GCAAGGCTTTGTGCTCCCTGCTCAGATAGTGTATCGGCTGTGTAGTCGAATAAAACATCC -GTCTCGTTTTCGTCATGGGAGATAGCAACGGCAATGTCGTACTATTGTACCAGGTTAGTG -ATGGTCAGCCATGGAGAATTGTTGTGATATAAACAATCATAGAAATAAGAATAAATATAT -AAACACATACCTCTGGTGAGTCTAGCCCTGTCTGGTCTGTCAATGTGATTTCATGTTGGC -CTGGTTCGCTAGAAGTAGCCCCGCCCTGGACAGATATTCCGGTGTTGAATAAGCCCTGGC -CGGACGTTCCGGAAAGCTGTAGAACCTTGCTGAGTGGAGTGAACTGATACTGTAGGGCGG -CGAGATAGTCTTTCTGGACCTTCTGCATGGCTGACAATATAGAGGTTTCCGTCCCGAAAT -TGAGAACACAGACAAGCATGTTAATGAGAGGACCAACAATCATGTCAACTCCTGGGATAG -GCACGTCACGCCCGGACGTGAGATAGCCAAAACAAACCGTGTCTAAGCCAGTGTGGGCAC -GAAGAAGAAGTCCCCAAGCTAGATAGAAAACGTTCGAGGATGTGACCCCATGCTTTTGGC -AGAAGGCACGCAGCGCTTGATGATTGCCCGAGTCCAGCGATATGGATAGCGTCATGTGTG -TGTTCTGACTGTCCGGTATATGTGACCGGCCGAGTGCTGGAAAGTGACATGGTTGAACGT -TGTCAAGGTATGACTGCCAGAATCTTTCACCTTCATCGGGGATAAACCCCTGCAGATGCT -GAACATACGCCTTGTAGGACGGTGCGGGCGTAGTTGGCAGGGTTCCAGTATATGCAGCAC -ATATTTCCTGCCTCACAAGGCCTAGAGAATATGCATCTATCATCGCATGGTTGATTTCCA -AGTCGCAGAACACGCCTCCAGACGCTGTTTGCGTCAGCACCAGGGAATGCTGCGGGGTGC -CCCTCCTTTCAGAGAGATCGGTGTGCCATTTAAGCCCAGTAACTGGTTGTGCAAGCTGCT -GCTGCTTCTCAGTATACAAGACCTCGACATCCACCGGAAAGTCTTTGAGAACCACCTGGT -CCTTCACATTGCCTGACCCGACACTGTCAATAAATATCGTTCTCAGAATTGAGTGGCGCT -TTACGACCAGTTGCCAGGCACTCTTCAACCGACTGAGCTCGACTGGCGATGCGCTTGTTG -ACTGCAACATCCACCGCACTCGAGTCCAGTAAAGCTCCGGGCTCTTGGCCTGACTCATGA -GAATACCCTGCTGAATCGGCGCACAGGGATATATATCTTCGATCTGGCCGTAGGAAAGAC -CCCAAATCGGGAAAACCCTAGTAACAAGCTCGCCTAGAGTTTCGTCTGTGAGAGAGAGCA -AAGGAAAGTCGCAGATAGTATAGCTTGGACTCTGTAGCGGGAGTTCCTCTGCTGCCGCTT -TAAGCGAGCATTCATACTTGCTGATCCATTCAAGTATCATGCCCTGCCCTTTCATGCTCT -TATTGTATATGAAATCAAAAGTCAGCTGGCCCTGCACAACAGATGCGGCGACGTCAATCA -AAGCAAACCGAGAAATAGTTGCCGCAGCAGTAGGGGGCTCTGTGACCGCAATATCGGACT -GCTGGAACAGGGAATCGGGGCGTTCGAGCTGCTGATACAAGCCCATATAGTTGAAAATAA -TCTCCACAGGACCCTTGATCTGGAATGCCTGCCGTCCAGCGGGATTAAGATACCGCGACG -TAAAATAGGACCAACCATTGTGAGGTGTCCGCCGACGCGAATCCTTGACACAGCGTACCA -CCTCTGGAAGACTGTTCCTTTGCGTAATCAAAGCTGCAACTGGGAATATAGTGGTAAACC -AGCCAACGGTTCTAGAGAGGTCAATGGCAGAGTCCCAGGCTTCTCGGCCATGCCCCTCGG -TGAAGACAGGCGGCACATCGCGGTCCTGAAACGTATTTGCAAACGCGTGCAACAGGGCTG -CGTGCAGGATCTCAACTGGCTGCGTATTGAAGGCGGAGTTGGCTGGTCCCATAAGGATGT -CGGTCAGACGCTTGTCAATCGTGAAGCTTCCTCGACCAGCATTGCTCAACGTATTTGTAT -CGTACCCAGGGACTAGTCCCCAGTAGTCATGTCGGGGCGGACTGACCTCAAAGGGCAACG -CAACATGTGGAGGCAGATGATCCCGAGCATAGTTTGTTTGCAGTTGGCTCCATGACTGGA -ACGAGAATGGAGGGAATCCGGTGATTGTCCCCGTGAGCATGTACTCCTCCAGATCTCCCA -GGATAATTCTCCACGAGACAAGGTCCACCACCAGATGGTGGGCGGTCATGAAGAGATACT -GCCCATCGTGAGACGAATCAATCAGGTCCACGACCAGGATGGGACCTTGGCCAATATCCA -GCAGCTCCTGGCTCCTATGAAGGAGAGGCTCCATTTCTTGCAAGGTTGTAATATGGTGGC -TGCGGAATACATAGCAGCCGGCTGTATCGGGCACAATCTTCTGCCGCCAGACGCTGTCCG -CACCGTGCACAAATCGAGCCCGGAGCATGGAATGACGTGCTGCCAGAACACTGATTGCCT -GCTGGAGCTCGTTTGGGATTAAGGGACGGGACACCCGAACCAGGAAGGACTGATTGAACA -GGTTCTTTCCATTTTCCTGTGAGGCGAAAAATAGCTGCTGTATCGGGGACAGCTCGAACA -GGCTGTCGGTGACTTCTGGGAGCTGCCTGGAGGCAGTCGCTGTATGCCCTGCATGGCTTG -CCAACTGCGCGACGGTCTTGTGCCGAAAGAGATCGCCCACTGTGATGTAAACGCGGCCGG -ACTGGGACTTGGCTGACAGCTGCATTGCCGTGATTGAATCGCCGCCGAGCGAGAAGAAGC -TATCATCTATGCCTACAAGGGCCCAGTCGATGTTTAGCACTTCTGCAAACAGTTTCTGCA -GCTCAAGCTCCTCTGGGGACGATGCTGCACGCCCTTTGCCCTCGTCGGACTGGAGGCTAG -CCCATTCAGCCAGAGTGCGGCTGGATGCCATAGCTTCGAGTTTCCTGCGGTCGGTCTTTC -CATTCGTAGTCACGGGAATCTCAGCCACCGGGACGTAGATGCTCGGTATCATATAGGTCG -GCAGGTTCTTTTTCAAATCATCATTGATGAATGTGGTGTAACTGCTGAGAGTTGTGCGAA -CCATGTCCAAAGAACCCGTTGCCCGATCACCCAGAGCTAGATAGGCTGCCAGAATctgct -tgttgcttcctttggggctgacagcccctaccatcacagtgaccgccgctgttgttgcta -ctcctgctgatgctgcggctgctgctgcGTTCAGCATAGCCTGCTGGATGTACTTCTCTA -CTTCTTCCAGCTCTACCCGCTGACCATGGATCTTGATCTGATTGTCCATTCTCCCAATAT -AGATAATTGAGCCATCCGGCGTATAGCGGGCCAAGTCTCCCGTTCGGTAAAGCCGGCCCT -GTCGACCGGGGCTATGTGATCCTCCTGCTCCTTGTACCAGCCATCGCGGGCTCTCGATGA -ATGCAGCTGTTGTTAATTCCGGGCTTTGATGATACCCAAGACCCACTAGTGGTCCTTCCA -GCCAGAGTTCACCGGTACAGCCAACGGGGGCCAATTGATCAGGTCGACGAGGGTCAACAA -TCCACGTGGTGCAGCCACCCCCCTTTCCGATGGAGGGCATGGTCATTCCCGAGTTGAGAT -GCTCAAATGTCACCCATGGAGTGGTTTCAGCAGGACCGTAAGCGTTGATCAGATGGACAT -GTGGGCTCCACTGTTCCATATCCGCCGGTGATGTGGGCTCTCCACTCAAGACGAGGGTTT -GGAGGAGCGGTACATGGGAAGGGTTCAGAAGGCGTGCGACAGATGGAGTTAGATGCGCGT -AATTTACCTCCAGCCGGCGCATTGCTGCACTGATATCTCCGCGACGCTCCGACTCCTCGG -GCACGCAAACGCAGCCACCGGCCGCAAGGGTGTGAAGGCTTGAGCCCCAAGAGACGTCGA -ATGCGTACGAAGCAAACTCCAGAACCCGCGAGGAAGCAGAGAGTTGCAAGGGGTGCCTTT -GGTGAATAATGGCGCTACTAAAATTTGAGTGACTGATTGCCACTCCCTTAGGCAACCCTG -TGCTCCCAGAGGTAAACACCACATACAGGGTATTGGCAGGATCGACTGCGGGTAACCAGG -AATTACATGGCGTGGTCAGCTGGGCCAAACTGGCGCCATCGACCTGAAAAATAGGGCAAG -CTGCAAGTGCTGCTGCGCTGGCGTGAGTCCCGGCCGAGGCAAGGATGATATGCGGCTGGA -CCTGCTCGATGATACTTCGTAATCTGCCTTCTGGCTGAGAAACATCCAAGCCGATCATAG -TTCCACCGGCCTTCATTACTGCTAGACTGGCAATGTTCATCCACCGCGACTTCTCAAAGA -GCAATGGGACATTGACCCCCGGCCTGACACCAAGATGTCGAATGTGGTATGCCAGCTGTG -TGGAGAGATTGTTGAGCTCTTCGTACGTAAGATGGCCATCCCATGCGCAGATGGCAGCGG -CTGTTGGTTGTTTCTCTGCCTGCTCCATTATGAGTCCGTGCACGCAGCGGTTCAGAGGCT -TCATTACAGCGGCATTCCACCCCCAGATGGTCTGTAAGTCTTGTTTGCTGATCAAATTTG -CTTTCTGGGCTGTTTCGTCCCCGTGCTCGACGATATGTGAGACCGCGTGCAAGAACAGGT -TGAGGGCACTCTTTGCATGGCTTTCAGATACAACCGACTTATTAAAGGTGAAATTCAAGC -CAGTCTCCTCATCTCCGGCCCCGATATTTATCATAATATCGTACTAAGTCTCGTGTCAGC -GCATTAATACGAAAAGAAATCGTGCTGTATCTCTTACCTCGGTGGGGTCATCCCCTCCTT -TGTCTAGAAGCGATATGGTCGACTGACTGTCCTTGAGCCCATACCCATCAGCTTGAACTG -ACATGGCAGTATTGAAGACCTCCTTCTCGGGCATGTTGCTCAAGTGGAAGATCTTGGCCA -GGGGGTAATGTTGAAACTCCAAGCTGGTGAGATACTGCTCCTGGTCCCTCTGTACCAGAG -TCATCAGCGAGTCCCCGCCGGCCAAGTCCACACGGCTAACAAGCATATTAATGAAGGGAC -CAACAGTTCTATCGGCCCCTTGCAGCGGAATATCCCGGCCAGATGTGAGATAGCCAAAGC -AGACAGTCTCCAAGCCCGTATAAGCCCGCAGGACCAGGGCCCAGGCAACATGGAGAACGT -TGGACATTGTCAGTCCATGATCCATGCAAAGGACGCGCAATGCCAGGTGTAGCTCCCGTT -CGAGTATAATGCTGATACTGGCTTGCGAGTGCGGGCCTTTTTCTTGGCCGCCCAAAGGTG -GAAATATACAGGGTTCGACTCCTCGGAGATGCTGTTTCCAGTATGCCTCGGCCGGGTCAG -TGGACAAGGACTGGATGAAGTGGATATAGTCACTATAAAGTGGTGCTAGGGAAGAAGGAA -GAAGACCATCATATGCAGCACACAGTTCCCCCTTGAGAATTTCCAGGGAAAATGCATCAA -TCAGCGCATGACTGATGGATAGCTCACACAATAAGCCCGAGTCTGTTGTGGACAGAGTAA -GTACTGAGCTCGCTTTACCCTTTGTTGTGGATGTGCCCTCAACCATTTCCTCATCGGTGG -ACTGGACGACATGGACATCTGCCTGGAGATCCCTCACCACCACTTGATCCTTTAGTCCTT -TGCCCGAGATGCTGTCGATAAATATGGTGCGAAGCGCTGGATGGCGGTCTACCACCTGCT -GCCACGCCTGCTTGAGCAGGCTGATATCCACGGCTGTCGATCCATGTCCAGGCTGTGCCA -TCCATCGGATCCGGGTCCTGTAAAGGTCCGGATTCTTGAGCTGGCTCAGGAGTATGCCGT -CTTGGAGTGGTGAGCAGGGATATACATCTTCCACTTGCCCGTATGTAAGGCCTAGCTCCG -TAAGCCTATGCTGGAGGATCAACAACCCAGGGGCCGTCATGCGAAGAAGTGGGAAGCTAC -AGAGGGTATAGCTCGGTTGGAGCAGGGGAAGCTCCTGCGCTGCGGCCTGAAGGGAATCTT -TGCATTCTTCGATCCACGCGCGGATGGAGGCCTGGTGTTGCATGTGCCGATTATACATGA -AGTCAACGTGCAGACAGCCATCCACCACCGACGCCGAGATGTCCACCAGCGCAAAGCGAA -ACAGGCTTCCGGAAACATCAGACATTTCATCCACCCCTTCTGGCATGCCGCGTAGGTGGA -AGAGAGCGTCCGTCCGCTCTAGTTGCTGATACAACCCGAGGTAATTGAAAATCACCTCAA -CGGGGCCATCGAGCTGGAACGCCCGCGTTCCACCCGGGGTGAGGTGGCGGGCTGCAAAGT -ATGGCCGGCCATTACCTGGTATCTGGCGTCGGCGGTCCTTAACATGGCGCACGATCCTGG -CAATACTATCCCTCTTCTTTGCCGTGGCTACTACAGGGAACAACGTAGTAAACCACCCGA -CTGTGCGGGATATATCGATTGCAGAGCTCCACGGCTCACGACCGTGGCCTTCGCTGAATA -TTGTTGGTAGTGTCCGGTCGTGGAAGGTCTTGGCAAAGGAATGTAGGATGGACGCATGGA -GAATCTCCACCAACTGTGTATCAAATGCCACATTTGCAGGCCCTAACAAGGTCTCTGTTA -CCTCCTTGTTGAGTACTATGCTACACTTGACTATATTATCAAACGTGTTTGAGCTGCATA -CCGGACCCCAGTAGTCCAGCTGTGGGGGTGGCGGAAGAGGCGCCCCGTCTGGCAGAACAG -CCCCCAGATCGAAATGGCTCTGGGCGTGGTCCGCCTGTAACTGGCACCAGGTCTGGAAGG -ACATTGCGGTAAAGCCGGAGAGGGTCTTGGTAGTGAAGTGTTCTTCCAGGTCAGCCAAGA -TGATTCTCCAAGACACTAGATCCACGACCATGTGGTGGGCTACTAAAAAGAGGTATTGAC -CCTCGGCTGCATTGATGAGATCCACGGCCATGACTGGACCATTCTGGATGTCCAGCGAAA -TCTGGCTTTGGTTGATCACCGCCGAAGCTGCCTTAAGGTCCGTAACGTCATGCTGAGAAA -CGCAGTATGTTTCTGTGGTCCCAGGGGGCAGGATTCGCTGGCTCCAAATGTTATCCTCGC -TGCACTGAAAACACGCGCGCAGCATGCCATGATGAGCCACCAGAGCATCCATTGCGGCCT -GGATCTGAGGAAAAGTGATTGGCCGACGAATGTGCACAAAAAAGCTTTGGTTGAAATGGT -TGCTGATCCCATGCTGCGCATCGGCGAACATATACTGGATTGGGGACAATGCAAAAGGGG -CACCCTGGCCCTCCGGCGGATGTTCCATTTTCAAATCCACAGTCGCGGAGACGCCGAGAG -CTAGTCGAGCGATTGTCTTATACTTGAAGATGTTCGCGACTGTCATCTGGAGGTCCATGG -ATCGGCTCTTTGCTGAGACCTGCATGGCGGAGATCGAGTCCCCCCCGAGGGCGAAAAAGC -TGTCATGAATGCTAATCTTGGATGCACTGATATTCAAGCACTCGGCCCAAAGCTGCAAGA -TCTGCCGTTCAAGGTCCGTTTTCGGCGCTTGCTTGTTGCCCCTGGTCGGTTGTAGCTCCG -CCAGTTCTTCCAGGGACTTTGAGGCCCACGTCTCACGGAGCCGCCTTCGATCTCTCTTGC -CCGTGGCCATCGTGGGAATATCGACCACTGGAAGGAACAGGCTGGGCAGCATGTAGCTAG -GCAGCTGATCTGCCAGGCGCTCCCTAGCCCCCTGGCTGTATTGAGCCAGCTTTACTCGGA -CAGTCTCCGGTGGCCCTAGTGCAGCCTCCCCTATTGCCAGGTAAGCGACCAATAATGGAT -TGGCACTGCTCTGCGGAATAATGACCTCGACCGCCACAGCGACATCGATGGCAGCCGGGG -GCAAAGCTTGGTTCAGATAGTGCTCGACTTCGGCCAGCTCCACGCGTTGTCCTCGGATCT -TGACCTGGGCATCTTTGCGGCCAACGCAGACCAGGGCTCCATCCAAGTTATAGCGCACAA -GGTCACCAGTGCGGTAGAGCCGGCCGGGTCGACCTGGGAATCCCGGCCCCCCGCCCTGCA -GCAACCAGGAAGGGTTGTCGATGAAGCTGGCAGTATTGAGGTCCGGACGGCCCAGGTATC -CTAATCCGACCAGAGGACCCTCCAACCATAGCTCGCCCACGGCCCCGTACGGTGACAAAT -GGCGGCCTTTCGTTGGCTCGATCACCCAGGTCACCAGTCCTCTACCGGTACCAATGGTTC -CTGGGTCGAGGTCATTGTGACCGATATCGAACGTGGTCGAGATCGCACTGCATTCCGCGG -GGCCATAGCCATTCTTGAGTTGGACGTGGGCTTTCCACTGAGTTACGTCCTGTTGTACCA -CCGGCTCCCCGATGAGAACAAGGGTCTGGAGGTCTGGGACGAAGGTGGGATCAATCAGGC -GTGCGGTGGAGGGCGTGAGGTTTGCGTAGGTCACGGCCATACGGCGCATACATCCAGCAA -TATCACTCCTGCGTTCTTCTTCTAGTGGGATGCAGAGACAGGCACCGATCGCCATCGTGT -GTAGGAAGTTCAGCCAGCAAGCATCGAACGCGTATGATGAAAAGTCAAAGACCCTAGCCG -ACTCGGTAAACCCAAGGGCCACCTGCTGATAGTGCACTGCACTGGTCATGTTCTGATGGG -ACATTGCCGCACCTTTCGGAATCCCTGTACTGCCTGAGGTGAAGCTAATATAAAGTCTGT -CCGTCGGTTGAACCATAGGCAATCGGCCGTGCTGAAATTTGTCCAGCGGCGGCGCCTGTA -GGCTGTCTTGAGAAACAGTTTGCACCGGTGTGTTGGTAAGACGCTGTGCCAGCCCCTTTG -CCTCCAGGGAGCTCAGGACTAAAGGAGGCCTGACCTGCTCCACAACTGTCCGGAGACGGT -CCTCTGGCTGCGTGATATCCATGGCCACCGATGCACCGCCGGCTTTCATGACTCCAAGCA -TTGCCACGGGCATCCACATTGATTTCTCGAAACACAGAGCAACTACAACATGAGGACCGA -CGCCTAGTTTTGCTAGTTGATGTGCCAGCTGGGTTGAGAGCTCGTCTAGCTCTCTATACG -TTAACTGTCCATCCCACGCATCAACAGCCTGGGAGTCTGGCCGCCGCTGGGTCTGCTCGG -CGATCAGATCGTGCATGCACGCTTCCACTCTCACTGGGGCCGAGCAATTAAGCGCCCAGA -CCCTCCGGAGATCATCTTCACAGATGGCCTCGATGTGAGATACGGGAGACAGGTGCTGAG -TTTCATCGCAAATTAGTCGCACCACATGCTCGAACTGCCGAGCTAGCTGCTGGACCTGCG -GTTGGGCCAGGACATGCGAGTCGTAGCTTATCTGAATGGACACCCCAGTGGCCGTCAGCT -GGCAATTCAGAAGGAGGGAATATGAATTGAAGCTGCTGAGGGCTTCCTGGCTGATAGTAT -CGTCCTCTTCATAGAAAATTGCGACGCGTCCATCCGTCGCCGTCGGTCCTGGAGCTGGCT -GCACCACCAGCAGTGTCTGAAACTGGCAAGCTCGCTCGGTATCTGCGCTGAGCCTTCGAA -TCCGCTGCAATCCTACCTGCTCGTGTGGAATCATATTGACAGACTGCTCTTGGACCTGGT -CTAGCAACCCTGTGAGGGTCATATTTTCCTGAATGGTGACTCGGACCGGCACTGTTGCAA -TCGTCGGACCTTCAACGAGGTCAACGCGATGGACGGGAGCCTGCCGTCCGGTCACAGTAT -ATCCGAAGATGACATCGGTTGACTGAGTATAGTGAGCTGTGAGGATTGCCCAAGCCGTGC -GAAGGGTTGTTGCTGGTGTGATATTATTCTGCAACCACTGGAGCCCCGATACGTGGTGCT -CTAACACATTCTGGGCATCCGGCTGGTACAGTGGAGAGGGAAGGGCAGGGAATGGCACCG -CCACTTGACCATCCAGCGTTCCATGCCAGTAAGAGTCGGTCTCAGCACCTAGCTCCGTCA -CATGCTTGATCAATGTCTTGAAGGAAACGAGGCCGTCACTCGCAATTCCGCAATAGGCCT -TACTGACCTCTTCCAGAAGCAGTGGCAGAGACCATCCATCATAAAGTGCATGGTGGAGCG -TTAACAGGAAGTACCGGTGCAAATCCCCATCCACGCTGCCGGCGTCCACGAAGCCAAATC -TGGCCAGGGGAGTACCAAGGGTGACAGGGCTCTCCAGATCTTTTTGAATGAGTGCGTCGA -GACTATGGCCATGGTGCCACCCGGGTGCCTCGCTGGTAACTACTTGGACTAGCCCTTGCC -CGCGGAGATCCACGATGCGGGTCCGAAGAATCGATGTAGCCATGGCCACTTGGTTCCAAG -CGTTCTGGAAGCGGGGAAAATCCACCCCGTCCCGCAATCGCAGCACCTGGCGGTTCACAT -AGTGGCCCTGGTGCTTGACTGTCAGAGAAATCAAACCTTCCTGTAGAGACGTACACGGAA -GGATGTCCTCTATAGTAGACGGGTTTAGCTCACACAACGCCGCAAGCTGCACCCTCGCCT -GGGAGACGACTATCTCTTGCTTGAGGAGTGAAAATGGAGGGATATCATCCTGACCGGGGG -CCGATCCCAGCTTCGCCGCCTCAGCCATATCGCACAGAGTGGGATACTTGAATATGGTTG -CCACTGTGAGCATGATGCTTTGCTCTGTAGCCAATTGAACAAGTCGAATGGCCCCAATTG -AGTCTCCTCCAATCCGTAGGAAGCTGTCGGTTGCCGCGATGGTGGCTGCGTCGATCTCCA -GGATAGCGGCCCACATCCCTTGGATACGCCGTTCCATCTCTGTTGTGGGCGCACGGCGCA -TGCGCTGCGATGGCTGAAGCGCTGCCAGCTGCTCTAGGGTATATGAGGATGCGACCTGTT -GTAGGCTGCGCCTATCTGTTTTTCCGTTCGTTGTAAGTGGAATCTCTGCTACCGGGATAT -ACATGCCAGGAACCATATAGCTGGGTAGCCTTTGGATCAGGCAGTGCTCCACCCTGTGCG -TATAGCGACCCAACGCTGCACGCGCGGACTGGGGTGACACAGTAGCTTCGTCGCCCAAGG -GAAGAAAGGCTGCCAGCACAGGATTCGCACTCCCCTGGGGGCAAATGAGTTCCACGACCA -CCGGTATATTTGCGCCGGCCGGTAGTGCCTGGCGTATATGATGCTCAATCTCCCCCAGCT -CCACACGCTGACCCCGGATCTTCACTTGGGTATCTTTGCGTCCAACATAAATCAACGATC -CATCGGAACGGGATCGGACAAGATCTCCAGTCTTATAGACTCGGCCCCGGCGTCCAGGGC -AGCCCGGTCTTCCACGCAAGAGCCAGCTTGGGTCTTCTACAAAGCTGACCGCATTCTTCT -CGGGTTCATCGAGATACCCTTGTCCGACAAGCGGGCCCTCGATGCAGAGCTCTCCTATAA -CACCAAGCGGTGTCAGGCTTTCACCCTGCAGAGGATCTACCACCCAGGTTGTCACCCCAA -TTGGGTGACCTAGCATCCTGGGCTCACATGTTTTGTCCTCGAACTGGTAGATGGTTGTAA -GTATGGTGCACTCAGAGGGACCATATGCGCCCAGCAGCTTGACGTTTGGCCATCGGGTAA -TATCATCTGCACACAATGCCTCTCCAGCTAAAACCAGGGTTTCTAAGCTAGGAGTTGCTA -CAGGGTCAATGATCCTAGCAATTGAAGGAGTAAGAACCGCATAATTTGCCCTCAGAGTCC -TGATACAGCCCGCAAGATCGTCCTTTCGCTGTCTATCGGAGGGGATACACAGACAGCTGC -CTGAGGTAAACGCATGCAGAAAGTTGAACCAGGGAGTGTCAAACGCATACGATAAACTAT -CCAGAATTCGTGCTGCGGGCGGCAACTCAATGTGTGCCTGCTGGTAGTGAATTGCAGAAC -ATGCGTTTTGATGGGACATCGCGGACCCTTTGGGGGTCCCCGTGGTCCCTGAGGAAAATA -CCATATAGAGCCTGTCCATGGGGTGGATGACTGGCAGGAGCGATGGGTTTGGCTGATGCA -TCGCCTGTAGCGATGCCCCATCCAAGACGATGACCTCGGGCCCTAGTTGGCACGCTAGGT -CCTGATAGGCCGGAGATGTCAGAATCACCCCCTGGGTCTTCTGCACTACAAGCCGCAAGC -GCTCCGCTGGCTGCTCTGGATCCATGGTAACGGCCGCGCAGCCTGCCTTTAACACACCCA -GCATAGCCACGGGTACCCAGCGGGATTTCTCAAAGCATATGGGCAATATTGACTGCGGAC -GGACGCCCCGAGCAATCAGGTGATGTGCAAGACGCGAGGACCAATCGTCCAGCTGGCGAT -AGGTCAATTCTCCATCCCAAGCACAAACGGCTACTGCATCAGGCTGCTGCTGAACCCATC -GGGCAACCAGCTCGGGTACCGAGCCGGGCAGGGCCTCCGGCACCTTCGCATTCCACCCCC -AGATCGTGACGAGATCTGGGTGACTGATTGTCGTCACCAGATCGCCAACATTTCTCTCCG -GATGACGCACAAGGTTGGTCAAAACTTCCAGGAACATGGCCGCCACGGCATTAGCTTGTC -GCTTCGAAAGTAGGTGCCGGGAATAATTTATCGAGATTTTAGGACTGCCGTCCAATAAAA -GGACTGATGCGGCAACGTCGTATTCATTAGGGTCGTGCCATGTCTCTTCCTCGAGTGCGA -TGGTAGCTGTGTCAGACTGAGCCGTCTCGGCGCTGCCACTCTGTACCGACAGAACTGTAT -TGAAGAGTGGCTGCCCCTCTGTGCTCAGTGCGTGCAGGACTTCTGCTAGCGCATAGTGGC -TGTACTTTAGACCGTGCAGATACCCTTCCTGGATCTCTTGCATTATATCGAGGATGGTTC -TCTTGCTATCGATCTCCACGTGGCTAACCACCATGTTAATGAACGGTCCAACGGCTCGGT -CCACACGTCTCATGGGCACATCCCGACCAGACACTAGATATCCGTAGCACACGTACTCGC -TGTTGGTGTAGCAGCGTAAGACTAAAGCCCACGCGAGACTGAACACATTTGCTGGGGTAA -GGGCATGGGCTTTGCAGAACTGGCGCAAGCTCTGGTCTGTCTTGGGGCTAATGGACACTG -ATGCAACGGCCGCCAAGTCCCTTGATTCAGTGATGGGCTCGTTCAGAGTTGGGAAGATGC -ATGGCTGAGCATCAACTAGCTGTCTCTGCCAGTATTCCTTTGCCTCTATCGTGGGCAAGG -ATTGGAGATGCCGGATGTAGTCGCTGTACAGGGGCCCTGGCTCCGCAGGGAATGTCCCGT -CGTACCCTGCACACAGCTCTTGTTTCAGGACCCGAATTGAGATCGCATCAATGATGGCGT -GGTTGATCTCCAGGTCACACATAAGCTCTCCGCTCGCCATTGGGCAAAGGACAAGGGAAT -GCACTGGCCGCGCTTGCAGCCCGCTAGCTGCTCGATACTTGCCAATGGCCTCCACCGGGT -CCCCGGCCTCTATCTGTTCCTCGGTCTGTATCATGTGGATACTAGGTGAAGCTTCCTTTA -ACACAATCTGGTTACCATGCCCAACCGAGAGGGCACTATCGGTGAATCTAGTCCTAAGTA -CAGCATGTCGTTCGACGACCTTCTGCCAGGCACGCCTGAGTCGGCCCGCATCAACCGGAG -ATGATGCCCTACTGCTGGACCGGACAAGCCAGCGAACTCGGGTCCAATATATCTCGGGGT -TCTTTGCCTGGCTCAGCAGAATCCCTTGTTGCAGGGGCGAGCATGGGTAGATATCCTCCA -CCTCGCCGTAGGCAAGGCCAAACTGGGAAAGTGTATCCAGCTGTCTCCTTGTTGCGTCTG -TCGGAGGCAGCAACGGAAAGTCGACACAGGTGTAGCTGGGTGGTGTATCTAAGAGCTGCA -AGGCTGTTGCTTCTAATGAGCGCTCAAACGCTGTAACCCACTGGGTCAGAGCGTCCTGCT -TCTCTATGCGTTGGCTGGAAAGGAACCGAAACCGCAGGCAGCCTTGAGTGACCTCTGCGG -AAACATCAATCAGGGCAAACCGGGCCATGTGCTCGGCAACATCCAACACTCGATCCTGCA -GACTATTGCAGTATTGGAAAAGAGCTTCCTTCCTCTCCAGCTGTTGATAGAGACCAAAGT -AATTGAAAGCAATCTCAGGGAAGCCATGGCGGCCAAACACTGCCTTTCCCGCGGAGTTCA -AATACCGCGATGCAAAGTACGACCACCCATTGTGAGGGATTTGTCTCCGGCGATCCTTAA -GGTAGCGGACAAACTCGGGCATGGTGAGGTGGCGGGTGCCCGCTTCCACTGGGATCGGCG -CAAGGGTAGTGAACCAGCCAACCGTTCTAGATAAGTCGATTGCAGGGCTCCAAGGCTCTC -TCCCGTGGCCCTCACTGAAGACAAGAGGAGGGTCCCTGTCAGTGAAGATGTTCATAAAGG -AGTGCAATAGCGTGGCTAGAAACAGCTCGACAGGCTGAGTTTGGAATGTATGGTTTGCTG -GCCCCAGTAGTATATCGGTCGTTTTCTTACTGAGCGAAAACCCGGCCTCCTGGATATCGT -TGTGCGTATTGAGCTCCGGCGACAGTCCCCAGTACGCTCCATAAGAAGGCTCAACGTCAA -ACGGTAATGCGGCATCTGGCGTCAGATGGTCCGCAGCGTACTTCTCCTGGAGTTGGCACC -AGGTCTGGAATGAGAGTGGCGCAGAGGAACTAGGGGCAGTACTAGTCATCATGTACTCTT -CTAGCTCTTCCAGGATAATCCGCCAGGAGACAAGGTCAACCACCAGATGGTGCGCCGTTA -AAAACAGGTACTGGCGGTCTGAGGGAGTGTCGATCAGATCGACCGCAAATAGGGGGCCGA -CCTGGATATTCAAAGATCGTTGACTGTCGTTGAGTGCTGGGACGGCGTCATCCAACGATC -TCAGGGAATGGTGCCGGTAAATATAGGACTCATCGGCACGCGAGGTGATCTGCTGCATCC -ACGCCCCATTGGCGCCCAGGTGGAAGCGAGCACGCAGCATCGAGTGCCGCGCGACAATGA -ATTGGACAGCGCGCCGAACGGCCTCAGGTTGTTGAGACTGAGTGATGCGGACGAGGAAGC -TTTGGTTTACATGACCTTGCTTGTCTTTTTGGACGTCAAAGAAAAGCTGTTGAATGGGAG -ATAGGGCAAAGGGAGTGTCGAGCAGTTCCCGATCGTCGATGGTGAGACCATCGGTCCTAA -TCTCCCGTCGTGCTAGACGCGCAATCGTCTTGAACTTGAAGATATCTTGCACGGTAATGC -AGCCCCCTCTCGTTCGGGATTTAGCAGACATCTGCATGGCTGAGATGGAATCCCCCCCTA -GGAGGAAAAAGCTCTCGTCCATGCCGATTTGAGAGGGATCTATGTTTAACACCTCTGCCC -AGAGCTGCTGGAGGTTCTGCTCCGTCTGATTCTGCGGCACAGCGCGCTTCTGGTCTCCGG -AGGGTTGCAGTTTTGCCAGTTCGTCTAGGGCAAAGGAGGAACCCGTGTCGCGCAACCTCC -TTCGGTCTGTCTTGCCCGTAGCTGACAACGGGATATCGACCACGGGGATATAAAAACTTG -GTATCATATACCGGGGTAGGTACTTACTAAGATGGTCCTCGACCCCCTGGATGCAGCACT -GAAGCGCCGCCCTCACCGTCTCGGTGGGGCCCACGATTGCAGCTTGCCCTAGAGCCATAT -AGGCCACCAGCACCGGGCCTCTGCTACCTTGCGGATTTGCCATATCTACCACCACGGGTG -CTCTGGTGGCTTCTTTGATGTGATGCTCGATCTCGGCCAGCTCGACGCGCTGCCCACGGA -TCTTGACCTGCGTGTCCTTTCGCCCCACGTATACCAGCTTGCCATCAGAGGTATATCGGA -CAAGATCACCAGTTTTATAGAGTCGGCCCTGCCGCCCAGGGTAGCCTGGTGCTCCCTGGA -GCAGCCACACAGGATCGAGGACAAAATTTATAGCTGTCTTCCCCGGATCCGCAAAATACC -CCTTTCCGACTAAAGGCCCTTCCACCCACAGCTCCCCGACTGCTCCCAGTGGACTGAGGC -TATCACCGGCCGATGGCTCAATTACCCAAGACGCGACGCCTACCGGAAGCCCAATGATAC -TAGGCTGGTCATTCGGACCCTGGAACGTATGTATTGTTATCACTGCGGAGCATTCTGCTG -GGCCGTACCCATTTTTCAACTCGACCTTGCCGGCCCATCTGGCCAGGTCCTCGCTGGTAA -CGGCTTCGCCGATCAAAGTCAGGGTCTTCAAGCTGGGGACCGCATTTTCGTCGATGAGCC -GGGCTGTTGACGGGGTTAGAAGAGCATAATTGGCATGCAACTGCTCTATGCAGCCTGCAA -GACTGCCACTGTTTCTCTGAATATCCGAGGGAATACACAAACATCCCCCGGCAGCCATGG -TATAAAGAAATTCTACCCACACCGCATCGAATGCATATGAGGAGAAACCCAACACCCTTG -CAGCGGAGGTCAAACCTAAGCCCTCCAGGCTATAGTAGATGGCGTTGCGAGTGTTTTGAT -GGGACATGATTGCACCTTTGGGGTTTCCTGTACTGCCTGAGGTGAACGCGATGTAGAGTC -CGTCTGTCGGGTAAACGGTTGGTAGATCCGAAGAACCAAGACCCTCTTGTGATAGGGCCC -GCAGTGTAGCCATGTTCACGCAGATAGCTTCCTCCGCGAGTCTAGATGCAAGAGGCTTGT -ATGTCTCAGAGGCTAGAATGAGCGAGGGCCTGGTCTGCTCAATGATCGACTGCAATCGGT -CCTCTGGCTGCTCAGGGTCCAATGTAACTACGGTACTACCAGCTTTTATAGCGGCCAGCA -TGGCAACCGGCGTCCACATACTCTTCTCGAAGCACAAGGGAATTACCGAGTTGCGGCCAG -CCCCCCGCTGCACAAGGGAGAAGGCCAGATGCGTGGAGAGGGTATCTAGTTCCTGATAAC -TCAGCTCTCCGTCCCACGCGCAAATTGCCGGTGCATCTGGTTGTTTCTGAGTGCGTTGCG -CAATGAGATCATGGACACAACCCTCGATGGTCTGCGGCACAGTTGCATTCCAGGCCCATA -CCTGCTCCAGGTCGCGCTTGCTGGCAGCCGTGACGGTGCGCACTAGCTCCTGCGCGTACC -CATCACTGCAGACTTGGCGAAGCGTCTCTTCAAACTGTCTGGCTAGTCTCTCAATCTGTT -CCGTCTGAATAACTTGGTGGTCGTAGCTCATTCGCAGGCGCATCCCATCAGGCTCGAGGT -GGCATTCCAACATAAGCGCGTAAGTGTTGATACTACTGAATTCGTCTGCAGCATCCAGAT -CAACGTTGAACAAGCGCTCGTTTGGTCGCCGATATGTTTTTGAGCTTGGCGGCTGCACAA -CCAGCAAGCACTGGAACTGCGTTGCCTGCTCTGTATCCGAATTTATCCGCCGAATCTGCT -TCAGTCCCGTCTGCTCATACGGAATCATAGCCACAGCCTGTGTCTGGATACTCTGCATCA -GACTAGCAACCGTGGTCTCCCCCTGCACATTCACGCGGATTGGAACAGTCGCAATAGTTG -GACCAGCCATATGTTCAATGTATGGCACTGTGGCTTGCCGTCCAGAGACTGTGGCCCCAA -ATACCACATCCGAAGATTGGGTATAGCGGGCTGTGATAATCGCCCATGCTGTTCGGACTG -CAGTGGAGGCCGTGATATCATTCTTGGGCCAGGAAACATTTGCAACATGGTATTGGAGCA -GATTACCGCACTGCGGTTGGTACTCTGTGGAGGGTAAGGATGGGAAGACCGCCGCCTCGG -TCCCGTTTAATTGAGATTGCCAATATGTCTGCGCGTCCTCTAATGTACAATCTGCCAGAT -ACTTCATAAACCCGGCGAACGAGGGGAGCTTATCGTAGCTACCACCTGCATAGATGGCTT -CAACCCGTTCCAGCATTAGTGGCAGTGACCAACCATCGTATATTGCGTGATGAATGGTCC -AGACAAAAAGGACCTTGCCTTGTGCTTGATCCGACACCAAGCCAAACCGAGCCAAGGGTG -TCCCTAGTCCCATGGGCCGTTCTTTCTCTGCCTCCAAGAAGTGATTAAGGTCGCTGCTGG -TGGCCCATGCCGGCTGGTCTGCAACCACGACTTGTAGGAGCCCGGCGTTGGGCAATTCCA -CAATCCTAGTTCGCAAAATCGCGGCTTCATGAATAACCTGGGCCCAGGCAGCCCGGAAAC -GAGCCAAATCCACATTAACTGGGAGCTCCGACACAATCTGACGCACGTACTCTCCGGCTT -TCTTGACAGTCAGTGCCAAAAGCCCTTCCTGGAGTGGTGTACAGGGCAGAAGATCTGCGA -TGGATTCTGCCGAAACGCTGCATTCGGCTGCAGCAAGAGCACAAACATCCCCTGCCTCTT -CCCCACCGCGCAGCAGGGAGAAGGAGGGAACGGTGGTCACGTCTATGATTTGTTCTAGTT -GGGTTACCTGCGCCATCTCGCGCAGGCGCGGGTAGTTGAAAATATCGGCCACTGTCAACA -TAATGCCCTCTTTTCGGGAAAGCTGCACCAGACGGATTGCTGCCATCGACTCCCCGCCAA -TTCGGAGAAAACTATCCCCTGTGCCTATGCTGTTTGGGTTCATATTCAGAGTTGCTGCCC -AGAGGTGTTGAAGCCGATGCTCCATATCGGTCTTTGGAGCCTCGTTCTCTGATGCTCGCG -AGGGCTGCAATGCTGCCAGCTGATCTAATGTTAGGGAAGAGCCAATCTCACGTAGGCGTA -GGCGATCTGTCTTCCCAGTGGTGGTCATAGGGATCTGAGGAACTGCAAGATACATGCTGG -GCACCATGTAGTGAGGTAATTGCTCAACAAGGTATTCTTCCACTCCATTCATGCAGCTGG -CAAGACTCTCCCGGGTGCTCTCCGCGGAGCGGGTTGCTGCTTCGCCGAGAGCTAGGTATG -CGACTAGGATGGTACTGCTACTGCCCCGAGGAGTGATGGCCTCGGCTGCAACAGACGGAA -TCTTGTCGGGCAGTGCAAGTTTTAGGCGGTATTCCACATCGCCCAATTCCACACGCTGGC -CCCTGATCTTCACCTGCGTGTCTTTACGAGCAACATAGACCACCGAGCCGTCCTGATTAT -ATCGTACGAGATCTCCAGTCTTGTATAGTCGGCCTTGGCGTCCTGAGACACCGGGTATTC -CCTGCGTCAGCCAGAGAGGATCGTCCACAAAGCTTGCCGAGGTCTTCTTAGAATCTCCGA -AATACCCTTGTCCGACCAGTGGCCCCTCCAGCCATAGCTCACCTGTGTGACCAAGGGGAA -CCAGTCTCTCTCCATGGGCCTCATCAACTATCCAGGTATTGCAAGCGACGCCTTTCCCCA -TACTCGAGTCCCAGGGCTTGCCGTGTGGGATTGTCTGAAATGTCACCGCCACCGTACACT -CAGCTGGGCCGTATGAATTGATCAGTTTAGCGTATGGCGTCCATTGAGCAACATCGGCCT -GGGTCATTGGTTCTCCGATCAAGAGAATCAATCGCAAGCCGGGGATATCTCGGGGATCAA -TGAGGCGAGCAACGCTAGGGGTTAGTTGGAGATGATTCACCCCAAGCCTGCGGATTACCT -CGGTGATGTTTCCACGGCGCTCACTCTCGCAAGGAATACAGAGGCAGCCTCCCGCGGCTA -AGGTGTGTATGATATTTCCCCAGGCCAGATCAAAGGCGTATGAAGCGAAGTCAAAAACTC -GCGCCGTGTTGTTGATGCCCAATGCGTCATGCTGGTAGGCGATGGCGCTACTAAAGTTGG -TATGATTGATCATGACTCCTTTGGGCGTTCCAGTACTGCCAGATGTGAAAACAATGTACA -AGTTACTTGAGGGATGAACTGACGGAAGGTCACTGCCTTCGATGTCCGGCAATTGTTGCA -GGTGGCTGTCGTCGAGAAATATGACACTGCTAATTTCGAGGCGAGTGGCCACTTGGCTCT -GGGCCGGTGATGCAAGCATCCAGCGTGGCTGTAGCTGCTTCACGATGGTTCGCAAGCGAT -CCTCAGGCTGGGTCGGGTCCAGAGCAGCAATCACTGCTCCTGACTTCATCACGGCCAAGA -TGGCCACAATCATCCACTTAGACTTCTCAAAACAGAGTGGAATGATATCATTCGGTCCAG -CACCTAATATCACCAGGTGATGGGCCAATCGAGTGGAGAGCTGATCCAGTTGCTGATAGT -CTAGCTCACCATCCCAGGCAGAGACTGCTGGCGCATGTGGCTGCCGCCGGGCTTGCTCGG -CGATGATATCGTGAACACACGTCTGCACTGCCGCAGGGACTGTGGCATTCCAACCCCAAA -TCATATCGAGCTCGTTGTTCGACGCAGCGGGCGCTGTCTCCACCGGCACATTCGCGAGAT -TGCGGTTGGAAATGGACTGTCGGACCCGCAAATCCAGGTGCTCTACAAATGCTTCCGCTG -CAGCAATAGGCGAGACAACATGCAATTCCAGGCCTTCTGGGCGAGCCTTGCAATGAATTT -CGAGGAGCGGTGCACACCTCTCCTCCTGGCACCAGCTCCTGGGGGCCTCTTCCGGTCTCC -AGGCTTGACTTCTTTCCAGAAACCGCATAATAAGCAGCGTCTCTGGGCTATCAGTTGCGG -GGCTGGCATCCAACAGTAAGGAAGCCGCAAAATCGCAAGCGGCTTCGTGGTGTTTGTGTA -TCACCTGGCGAATCTGGTCCCCCCGAAGTTCCGCCAATATATCCTGGATTTCCCCATCCA -GCTGAATCTTGACCTCAGGGCCATCATTCGGTTTGTCGCTCACGCTGCCTATCTTCAGCA -ACCCCTTTTTGGTTTTCAACTGCTCCACGGCCTGCAGAACCCAGGCTTCGAGGGTAACCA -GCAGTACCTCCACCTCATCCTGGCCCTGAAGACCGTCGATACGGTGCTGGACCTTCAAAC -TCCCTTTCGACATTGCTCCCAGACTCCGGCTGTACGGCCCTTGTGGTCTTTGCTATGTAA -TCGTAACTCAGACTCTAGTCTAATCGATGGATGTAGCTGTTGATAGCAACGTAATCCTGA -GGTGTCTATCAGCTTGGATGTCCAGATACTCATGTCCACTGATGGAAGAACCGACTAACC -TAGCGAATTTTAGAATTCTCTGTCAACAATGTAACTTCTATTGCACTCAAGTTTAGTTGG -TCACGAATAGACAGCCCGGTCTAATATATCCGGGTTGCTTACGGCGAGGCTGGACTGTCT -CCGTATACCTTACTCCTTTGCTATGCATATAATCTTGAGACATGGTACTCCTCACATCAA -AGACATGTGCTCTCTGCCTTACCTTGGACAATCCATGGCTGCATGTCTGGATTGCCATCC -AACTCGTCATTCTTTTTTTTCTTTCTTTTTCCATGTACCTACTCTAGCTCTGGTTTTTAA -TGATCCAGGTGCCTTAGCGGCATCTGCGGGGGAGAGAAGAGAAAAAGGAAAAGGTACAGC -ACTGAGTATCGGATCTCAAATTCCTCGGGCTTTGCTTTGTACGCAGCGGTATAGGTGCCT -TTTTCAGGTATCATATGTCTGATACCCAGTTACTTCCAGATCCAAGATGTCTGAATTAAG -ACTAGCAGATATCATGATCCCCAAAAAGGGGTTTTATCGCATGGCACCTATGCCCCTTCG -TAGCTAGGTTGGTATTTCTCGGGTAGCAACTCTAGCCTCGTGATATATCTAGCATAGCAT -AAAATGCCATCGTTCTGTGAGTCTCCTTGTGTCTCACCACATTCAGTCGAATCAAAGTTT -TCGTAAGATCCTGGTCCCATACATCAGATATGTATACCTGGTTTATACACAATCTTATGG -TCTTGGAGGGATTCTACTATCACCTTACTTCGATATCTATGTGATCCTTGGGTATAGAAG -CCTCTTGGAAGCATTATAATTAATTACTTCTCACGCGAGGGTTTGAGTCCACTTGCAAAC -AGCGACCTGCATAGAGGTGGATCGACCTCAGCTTTTAAGAGGCGTTCAGGGGCGTCAACT -AAGCAATTTCATTGGTCAACTTTCATTGAGCCTCCATGACGAATTTTGTATGGAATCTTC -AATTGGCGAGCTTCGTAAAAACCAGGTATTCGTTATATCCGGTTTGCTTCTACGGGTGTG -CTGTATTTATTCAGGGGTAACAGTCACGATGGAACTAATCTAAGGTATTCTGTAATCACC -ATCTCGCTCTCGCTCTATCTGTCGGCTGTAAGAGATTTTCTCATAGTAATTTTGCCCTGG -CTTGTCGGGGTTTCATTATCACCTCATGATCAGCCCTATGCACAATACGTCAACCGGGTC -TTTATCAAATTTATACTGCTTTTACGAGCCAGCGACACTATGTGAAGTATTGAGTTACAA -TACCGATTAGAGAGCCATATCACCGAGAGGCATCCAGTAACAGGGGAGAATGACTCAGGA -GTTTAGACATCAAGAAGTTCAGCATAGACTAGTTATGGTATACAAGGACTCTGTACTGAA -TTGAAGTCTGGAATTCAAGTTCAAAAGTATATAATTCCAAGCGGACAAAGTGTAGTATTC -CAACAATTGAGCCACAGGCACGACTCAAATAGGACGATTTCAAGTATGGAAAATTCATCA -AAAATTCAAACAGAATGTTAAATGTTAGGAATTGAGACGATATGTAAGTGTGAAATGCTA -CAAAGTATATAGGGAAGAGCTGCCTGCAGTGTCTGCATTATGATGTCAATGCGGATAAAT -ACCACGGATAAATACCAAATAAACTCGCCTGTTGGACGGTCCGACACGTTAAAGTCTCCT -AATGGATCCCCCAGAGATCACATGCTGGTCATGACTACAATCATATCATATAAGGTCCCC -TTACTTTGGGGGGAAAAAACCATTAAAACCCCTAAAAAAATAATAAAAACGACAAATTCT -GATCATATTTTTAAGAATAATTGAAAGCTAAGACACAGGATGCATGTAATGCACGATATA -ACACAGCGTGTATGCGGAGTACAATGGACATGTACATAGATCTAATATCATATCTAGTCT -AGATAGGCGGGTTATACGTAGATCAATGCTGACCGCCATGGTATTCTTTCCGTCGGGATT -TGCCTAGGACCATTTCCCATCAATTTCAGAACTATGCCAAGCGCCCAAAATACCACCCTT -TGTTAATGCGTCTAACCAAATGACGCTCTCCCCCAAAACGTTGGATCTTTCATTTATCTG -TTGAAATCAATATGTTTAGATAAGCACTAGTAGTATCTGTATACCAATTTACTACAGAAG -TATCTCTTACTGCTGTGGAGCTTGATACTTAGCTTCAGCAAAACTCCTAGCCAATTTAGT -GAAGAGAGAATCCAAAAGAAAACTACATAGTGTCATCACCGGTTTAAGAAACAATCGAAG -AGCATGAGTGACACCAGTGGCGCTCCGCAAAGTCCAACCCAACCTACAGCCCATTCGTGA -GCGCCCACAGGGCCCATATCATATAGCTCATTTGGGGGGTTCAACTAGTATCTTGTAATC -GCCGTAATCTCAACCTGTTGATTCATGTATAGCATTAGAATGTGGCATTCACAATTGCAA -TGGCTATCTCAATATGCATTCATAAATTAAATCGACTCCATCCGGTAGAGGTCGAGACCT -CGCAGAAATATTGATTTTGCGTAGATATATATTTTATGTATGAGCGGGAGCAGCACTACT -TTCTAAGTGAACCGCTGTAAATATCAAATTTCTAAACAGCGCGTTTTCTTGCGTTTTTAA -ACCTAATTTGTATATTTAATTTATGATACATGAATGAAAGTAATAATGTATTACCTTTAG -ATAAAGGAGAGAAATGGGGGTCTCTATACATAGTATTTGGAACATTTGTGTTGTTTGGTG -CTGTTAGACCCTAACGGCTCCCCAGTAAAAGCCCAGTGGGCTTCATTCTAGGCCTATAGC -TCACCGGTGGGCCCGAAGATAGGGCCGAAAACCCAACCTTTGAGTGGGCTGGGCTGCGTC -TGGTTGAGCTTTGGGGTTTGTGGGCTTTGCGGAGCGGTAGTCACACCAGTTACGGGACGT -ATTGTGTGACTGTGCCAGGAACCGTGTCTCTTGTTATTCCCACTATCAATCCACTAAGCC -AGCCAGCCGTGCTGAGGTCTACACATTTTGCGCCTAGGCGCTCATAAAGCGACCTAGAAA -GGAAAGGACCCGGCAGACAGTTATTATATCTAGATAGGCGCATACAATGAAAATGATTGG -GAAACTTAGTAAAAGGGGTTTATTCATTATCCCAGCAGATGCTGCTAGCTCTCTTTTTTT -GTGGTTTTCTTATTTTCTAGTGGCTTTAATCTCAGGTATGGAGGGAAGAACAGCCACCTG -CGCACGAGAAAAGAGCGGTAGATCAAGATTCAAATCAACCGGTAGGAACACCGGGAAAAA -CAATTCCTCCCCCAGACAAAACCAACGCACATCGCAGCTTCACGAGGAAGCGGGCCTACT -CAGAGGATTTACTCCTCTTCCACCTCTTCGCGGATATTCTGCAGATCACTGAGGTCGAAC -GGGCCAGTCTGGCCTCTGATAACATCATCATAGTGTGCAACATCCGAGAATAGAGCGTCA -GCGGCCGTGACAGGAAGACTGTCACATAGCGCGGAAGAAAAAAGGTCGTCATGGAGTGCG -GAAGGAACCGCCGGAGCAACCAGGTCAGAAGGACCAGTAACAAGGTCAGGGGGATCATCA -TATCCGTCCTCTTTCCCAACTTGGTCACTTTTCAGAATTGGAGCATCAGAAGCCGTGACA -GGAAGATTGTCACGTAGCGCGGAGGAGACAAAGTTGTCATGAAGTGCGGAAGGAACCGCC -GGTGTAGACGGGTTAGAAGGGCCAGGAATAGAATCGATACGAATGTCATATTCGTCATCC -TCCTCAACTCCATCAATGATCGAACTCTCACTGATACCGGAGATGTCATCATTGTCCCCG -TGATGCAACTTGTCATTGGAAAAGCTAGCCATCTCACCGGCGTGTTTATCCTTAAAAAAG -GTAGCCATCTTATCGGTGAGCATAACATTGGTCAGGAGGGATTTGAGATGGTTCAGGGAG -GCGCGCATTTCAACCTCGTCATCAGTGAGTTCAATGTCGTTGCCCGGTGATGCGGAAATT -ATCTTGCTATTGAGATCGGTCTCGCCAGAAATCACATCAAACTTCTCCATCGCTGCTGGG -CATAGATACATGACATTCATCCATGTCAAGATGTGTGTCACACCACGAGTGACCATGCAT -GATGACTTGGTGCCTTCCCCAGACGAGTACGTGTTACATCCACCGTGAAGGTGGTTAAGA -CCAGGCACCCGCGTGCCAATCGGTTTATCACAGATAACCCAGCCACGGCAGCGGTACTTC -ATGAACCACGAGAAAGACTTTGGATCGTTCAGGTCGTCAATATTGGTGTCGGTCTTAGCA -ATGGTCTCGGGATTGATAAGGGACAGGGAGGAGACGTTAGGGCACCAGAAACTCCCTAGG -AGCCATGTCAGTAAGCCATCCTCAACTGTCGTATTTAGTCAGATACTCACATTGACTCTT -CAGGTATGTAATGGCAGCATGACCACCTTCAGACAGGCCAATGACGTCGATTTTTGCATC -GACTTCGAAGTTGTTGCGAATCAAAATTCCTTCAAAAATGTTGCGGATGTGCTCAACGAT -GCCGTCTTTGTCAAACATGTTACGAGGACTCAGTTGACGTTCGCGGTCCACAGCAGGGCT -GCAGTGGTGACCATGATAACTGTCTTCGGTTACGGCGTACCAACCTGAGTTGTACCAGAT -CTTGTGACCGGCGTTCGCAATGATGAGGGCATTTGGCGAATTTCGAGCATTCTCGCCAAG -CAGGGCCTTCGCAAAGGCAAGGATCGAACCGAAAGAAACGCCGTCGTCGCAAACGTCACG -GTACGAGAAGATACCCAAATCTTCGATGATCTCACCACAGAAGACAACAACCCGAGAAGC -AGTGTGGACATTGCCCGACACGAGGATAGGCACATGACGCTTACTGCCCATGGAGATCTT -CTTAAAATGCATCCCCATTCCCACGAGCCGGTCAATCACGATGGTACGGATCGCCTCTGT -GAAACTGTTAGCGCCAGTTGATCCATCAAGACCATAGCTTACGATGGAAATGAATTTCAG -GGTTTACTAACTATGCAAGGCCTCAATGTGAGCCTTGTTGATCCGCGCACTCCGGTTGAT -CTTGTATCGCGGGCCATGATCTGGGGACCCGACGTACCGGATCGTGTCATCGTCGGTGAT -AACGAACCTGTGTTATTCAATCAACTGTCAGCCTTGAAGTTTGGAATGATCACGTTGAGT -ACGATGGAAAGAAAGCCTACCCGAGCTCTAGCAGGTCTTCAGCGAAGACCGTGTCGTCGG -GGAGGAGATCTCCCACAATGCTATTCATTGTGGAGATGCTTCAAATCGATGTGTGAGTTT -GATTTGAgaagaaagtcagagaggaaagaggggaggatgattgatggtcgaatgggagag -gaagaaggaagagaaaagaagaggagaagggaagGGGGGGGACTATCACACGCAGAAACC -AGAAGGCGCAGCGCTTTTCCAACGCAATGTCGACGGGGCAACAGTCAACGACGGCGTGAT -TATGGCATCAACAAGCTTCTCATTTGCTTAATACCCCAGGGTGACGCGACTGTCAACAGG -CTACGCCAAGGAACCTTGCCCTGATATTGAGGAACTATGCGTTGAAACTTTGGTTTATGT -CACTTAGACGATATGACCGGCGGATTATCATTCAGATCAACCTGCCGGTCTAAAAATTGA -CCGGCATAATGGACGCAAACGACCCAATTGACATGATCTCAACAAAGAAAGTTTCATATC -ACAACATCAGCAAACGAGGATCATGGCCAAAGCAATGCTTGGACTTGTCAACAAGTCAAT -GCTAACGGCCCGAGAGCAGCGGAAGAGTCAAAACGACAAAGAGATATTCGATTTTCTTGT -AACAAGAAATGCATACTGTACATACTTCCGCCCAATGGAAGTAAAGAAGAAAACGCCATC -GTGCAAACAATTCAACCAGAGGTCTCCATATATCACAGAGTGCCGGCAACAATATCATAC -ATTTACCAACTGGCCTTCCTGACTCCAGGCAATTCACCAGCGAGGGCCTGCATACGGAAT -TGGTACTATAGCAGACAGTGTGTTAGCGGTGTGATATCAAAAGCTTCGAACAAGATGCGG -CCATTGGCCGGAGCTCTCCACTCACTCTGTTGATTCTGAAATCGCGGAATACACTGCGCG -CAGTGCCTGATGCCACACAACGGTTCTTGATCTGCGTGGGGCGGGTGTAGGCGTGCATTT -GCGACAATTGAAGCTGAGCCTGCGCGCGAACTCGGGCAGAGAGGGAGGTATTGCGGATCA -TGTAGCGGAGAGCTTGTCTGCAAATGAAATGAAGGAAGAGTCAGCATTTTTGTCTCCAAT -CTCGGCAAAGATTTCTCAGCCCGAGGCATCCATCCTCGGTCAAGCTAAACATCTCGAAGG -ACAGGGTTCGTACCGTTCGGGCTCGGTCTGCTCGAAGGCCATGCGCTTGTTGTTGTCCCG -AATCAAACGGGTATTGACAAATCCAGAGAGGTCCAGCTTCTTGGGCCGGAAAAGGCCTGC -CATTTTGGTCATTTTGGTGTGTTCAATTGGGGGTATCGCTGAGTTGAGGTCGGAAAACGA -GTGAACTTCGGCCAGGCGGAGTTGGTGAAAACTGGGTGCCCGATCGGGTTTAGCGTGCTT -TGATGCAAGATTTCACGCGGAGCGGCATCAAAGTCCTACGTTACTGCTAGTATTTTGATG -GATAGCCGTTACAAACAAAATTGAAATGACACATTGAAATTAAGGGCTAACTTTAATTGA -TTGCATGGTCGCGAAGAACTATAAAACCAGCCCTAAGTGAAATAGCATAACAATGAGCAG -ATGCGAAGTATAGAGTCCACAAACCCTAAAGCTTTACCCATCATACAACATACTCCGGAA -TATTCGGCTTGAGTTACTTTTCTTCCTCTGACTGATGGGATAACATGTAGTAGAAGGGAT -ACTACGATAGGAAAGAGCGACTTGCTCGGAATGGTACTGGTACATCTTGTGAATTGACAC -CCATCGAGGTAGGTATAGTCTATATGCTTGGACAGCGAGAAACTGCCCGCACAGTAATCA -CAAAAAGTGCAAGCCGGCCATCGATGACCAGGGCCATCTCCGTCCACCCACACATCACGA -CGGCTCGTCCTGCTCCTCTTCAACGAACAAATGAGATCTCAGAATGCATTCAAAGTCGAC -CCGATTTAGTGGCCAGCCTTGCCAGCGGGCTGAGCGTTGGGGTTGCGGGGATCATTCTTG -TACTCGGCGGCTATGCGGATTTGAAGGTTAGTTGTTTATGCTCGTAGCACGCTCATCGGA -GTGTCGTTTCTCGGATATTGTCGCATGTGACCCAGAAACTCGTGGATGGTTTATATGTAA -CTGCGCGGAGTGGACTCACTGTTGGACAGAGCGTTCTGAAGAGAGTTGACGCCGTAGAGG -ACAATGAGGCCTGCAGAGGGGAATCGTTAGTAATGTTCCCCGACAGCCGCTTCAAGTGTG -CGGCATTCCATATTGACAATCCATGCGTTCCAAATTGTCAGGGAATTTCGGGATCTAGAG -AGTATGCGTACCGGCAGCGAAGAAGGGAGTCAAAGGCTTGGCTGTAAGTGAAGATGCGTC -AGTCATTGTTCCTCATATCGTTCTCCCACAAGTCGTAATCGAGGTCCACAAAACACGTAC -CCATAGCGCCGGGGAACTTCTTTCCAAGGAGAGACATTGTGACAGGTGAATCGAATGCGG -GGGGAATTCGGTATTCGGGAAGGTGGTGAGGCAGGAGACGGAAGAGGAGAAGTGAGGTTT -GGGAGGTTTGCCCGGTCCCGAGACCACGCCGCCTTAAAGGCGGTGTCATGCCGCTTTTAC -CGCCCGATCTATCCCGACCGTAAACAATCATCGCCAAATCAAGCTACAAATCATACACGA -TGGCGTCTTCATTATCTTCCTCAGTTACCTCGTTACAGTCCTCTCTCCAGATTCTGGATA -ACTCTATATCGATCCTCGATTCGGGTGTGAGCGATTACCCCCGTCTATGTAAAGTCCTCC -AGACGACGCGAGTATGTTGTTCTAAAGTTCCATTCCCAAAAGAATTGAAAAGATATGAGT -CTAACTATTTATCTTTGAATAGCACTTTGAGCTCCTACCTGAGCCTACGTTGCGTGAAGC -GCAGCAGTCTCTCCTTGACGAAATCACCCCCAGCATCGCCCACCTCCTCACACTATCATC -AAACCACGTTGAGAAGCTCGCCCGCCGCGAACAAGGCCTAAAGGCGAAATGCGACCTGCA -AGAGGGTCGACTTTCGTCCAACCCGGAGAGCCGGTCATCAGCCACTTGTAACCAACCCCA -AAGCAGCTCACTGCGGGGTCGCATGGCGGGGCCCAGCGCTGGCTCGTCGAGTAATGCAGC -AGCAGCCAAGGCAGCGGAGCTGAGGAGACTCGTGCAGAAGAAAGAACGGCTCAAGTTCGC -AGTCGATCGATTGGAATTGCAAAGTACACAGAAGCAAAGACAATTGCGAAAGAGTATGGC -TGCGCAGTGAATGTGGCGGTGTCACTTTGCAAGATAAGAATATCTGCTATTACGAAAAGC -GTCTGGTCTGTCAGGAGGATTTGCCTCTAACATGACCTTTGGCTGGTTTTCATCCACATC -ATTATCGACTGCGCGTGCTTCGCCAACTTCGGCTAAAAGAACACCAAGCCTGAAGCCAGC -GACGGCCTTTCTCAGAACATAAATACCTTTCACGCCCGGTGCATTTTGAAAGAACCTTGT -GTTCATTGCATGGCTCCCTTCATCCAGGAAAAGCTTCCGGTGGCTATTGGCGCTTCTTGG -GCCTGGTACTGTCCCCCCAGGCCGTGACAGAACAATGTTTCATCACAACCCAACTAGCGG -CATCCCCAATGGCCTCTACCAAGATTCTCTCATGGAAGAAAAGAAGAATGAACTCGGGCC -CAAACAACCCAGAGGATACTGTGTCAATCAATCAAATACATACCAAACATTGATTAGAGT -GAAGATGCTGTATTTACATCAGTAGAAATGCGAGAATAATATCCTAGTGTACAAGATCCA -GTCCCCAGTCCCAATGCCAAATGCTGCCAGTAGTTAGTAGTATAATACATGTTTTCAAAC -ATCCAAATCCACAAAAGAATTGCGCCAAATCAAACACACTTATTCGTCGGCAGAACGCTT -GTTGGAAGCCTGGCGAATCTTCTTTTTATCATTATGCTTCTTCCAACTGGGGATATGCTG -GCCCCTCTTCTTGACAGGAACATCAGGAACATCCGAGTCATGAAGCTCATACTCTTCATC -CTCCTCTTCTCCTTCATTCTCAAAATCCCCCTCTTCCTCGGAAGCCTTGGTCGCCGCACG -GCGAGCCAAACGCTCCTTCTTACCTTCGCCATGCTTATCGCCAAGGTTCAAGTTGCGCGG -GATGCGGGTCTTGGCTCCCAGAGCCCACAAATCTCCCCAATGAGACTTACGGGCATCGCG -GTAGGCTTGGCGGCTGTAGGAGCGCGGGACGGTCCAGCGCTCAATTTTGCCGGCGGGCGT -GCAGAGATCGAAGATGACATGGCCGCGGCGTTTCATGGGTGGGTATATGGCGCGGGGAAG -GGATAGAGCGCGGAAGGCCGGCTTGTTGGGAATGTCAGAATCATCGGGATCTTCAAACCC -TTCAAATGCGGCATCTGTAGCCTCGGAGTCTTGACGAATAGCTTGGTCTTGTCGTAGGTC -TACCCCACGCTGCACGGCGAGGTAGCTGAATTTGAGATCTTCGTGGTTGCGGTCTTTCGC -TCCCATGATGCGCTGCAAAAATGGAGGGCGAATGTATCGCTGCTCGAAATGGCAGAAGTC -CCTACGGCCCTTGGTAGCGCCTGAAGAATTGTGCATCGGACAGGTTGAGTGGTTGGTGCA -GGGTGCCACGATCATTCCACGCTCCTTCTCAATGACCTCGCGTTGGTCGGGGTCTTCGAG -GAAATTGTCGTATTCGGTAGAGCCGGGGCTTGCGATATGGCGTTTCAAAAGCATTTCACG -GGCACCAGAGATAGCCTCGAAGCCCTTTTGGCGGCCTTTCTCAAGCAGAATCAGAATACC -CCCGTTGGGGTTCAACAGAGACCATAGATTTTCCACGTGTTGTTTACGCTCATACTCTTC -TTCAAGACCAAGAAGAGAGTGCGGTGCGATGATGATATCGTACTGTTTGCGCTTGGGGGC -TTTCCGTGAGTCATCAATTGTAGGCTTCTCGCGTACATGGACATAATCAGGTAACCGTGG -GAGGAAGGTAGTGTTATCCAGCAGGACAGAAGCACGTTGCCGAAGCGCATCGGAGCCTGT -CAGAACAGTTGATCTTCCAAAGGGGATGGGGGCTCCCGAAGGATGATCCGGCACCATGAG -CTCCCATTCTGCGCGAATAATATCTCTCCAGGCGAGGATGCCAGCACCACCACCGCTGGC -GTCAAGAACGTTTGGCCCACCATCTTGTGCGATGAGACGGCGGATCCAATCAACACCCAA -TCGTTTGCGAATCTCGACCATAACACTAAGTGTCGATGCGAATATTCCAGGGTACAAGGC -TGCCAAGTATGCATTGGCCTCCATTTCTCCCATTTGACGCTGCGAAGCATTCAGAGCAAC -AGGAAGTTGTGGTAATTGGGCACGCGGGGGTGCAGTTGTACTCGAATGGGGCAATCCTGG -TCCTCCAAACAGGCGCCGGGCAGTCTCAGAAATGTGCTTGTTGGAGAATTCTGACAGAAT -CACCGATATGGGACCAGTGACAGTATCCTTGGGTAGATGAATGGTTGTAGGGTCAGTTCC -GAACTTGCCCTCGAGAGTGAGCGGGTGCATTCTTGGAGCAGAGTCGGGAACTGCCTCGTT -CTCGAATTCCTCAAGCATAATCTCGCCGCCCAACTGCTCTGCCACTTCTCTGGTCCGTTG -CATGGCAAGTGTTTCCCGCTCTTCTGGAGGTATGTCCATATCATACACGAGTTGAATCTC -GTCCTCAGCGGCGCTCTCTTCTAGGTCGACTTCCTCCCATCCTCCTTTTCCATCTTCGCG -GAGCAAGCTCATATCTTCCTCTTCCTCCTCAACCATTAACTCGACTTCCTGCCGAATAAT -TGGTTCTCCATAGAGGCGGGCATATATTTGAAGCTCGGCATCATCCAACAGGCCCTCAGG -AATCTCATCTCCAAAGCGCTGTTTGATCATGTGTACCCATGACTCTGGGGTGTGTTCGTC -TCGTTGGTCGATCAGGTCGATGAGCTCGTCTGCTCCGCCCTCCTCGCCCATAATTTCCAA -CTCTCCAAGTATTTGTGCCATTTGCGCCTCATTTTCGCCGATTCGATCGATCAGGGTATT -GATATGATCCGGTTTGATAGGTTGGCTGGGTTGTTTCGGCGAGTCGGCAGACGCTGGGCC -TGACACAAACCTTCGCTGGGGCTGACATTGTGATACCGCATAGTCGGAAGTGTTTCTTCG -ATGCGTAGGACTGCTTGACGCGATCAAAGGTGAAGCTTTTTGGACAAGAGTACGTTGGTG -TGTAGATGTCAAACGGTTGACAGGTCCGCGGAGTTGTCGCGATCCAGCAGGGCAGGCAAA -GGTGCGAAATAGTTCGCGTGATTGTGACCTACTGGCACCTGATGCTGAAGACCTGGATAA -CATTTTCCAGCTCTTGTCGACAATTCCTTCTCAGGAGGTTGAAGGGGAAAGAAAAATAAC -CAGGCCCAGAAAATTGATGCCAAGACAACCATGCGGCCATCACAAACTGTGGGCCTCTCA -TTGGCTGGTCCCTTTGGCCCCGGTTTTAATTCGGTATCAGAGAGATCTACAACATATAAC -ACTGATGGACTATTTCATCACCACAGAAATATTTATATATTGTACCCGATATGCTGATTG -CTATTGGTGCATTATTTATTATGTGTCCGACCAAAAAGGTTCTAGAGCTAGCCTTATGAT -TGTTGAAAATATATCTCAAATAACCTACTCGATACGTCCAGAGACAGAGACCATGTCTTG -AGTATCGCATCGTATCAAGCATTACAAGCAGGTCTTCATAGTTCAAATACATTAGGGTAT -CACTTTATTAATTAAGCCTCGCTCTCCGTATCTTCATCGCCCATGACAATCCGCAGCGCA -GCAACGCATCGCATCACCTCATGCTCAAGGGCCGATAATTTTGCCATGACCGCCATCAGA -CTCGGGTCCTGACTCTCTACCCGGACCTTCTCTTTGACACGAACTTCTTCTCCCTGCCAT -TGAAAAACGCTACCGGCCTCATCATGGGTTGGCGCTCGGGAGCCGAATATCCGGTCACGT -AGGGAGAACCCAGTCAGGGATATCTCCGAAGAAAAAGCATTTTGCGCATGGTTTAGGGCA -GTGGCCTCTAGAGTCCGAAGAGCAAGTACCAGAGCAGAGTCTGTGATAGAGAGATGGAAT -GAAAGATGTGGTGGAAGCGGAGGGTCAAACATCTGTGCATACATATTAGCAAGATATTGC -AGGACCTTTGTCAATGAAACTGAACCTACATTTTCGGTGGCACTGGTATCCCACCACTTG -CCTCGCGTCTCATCGCTGTCTCCTTTCAGCATTTGTCGTGCCTCGCTAATCGTCTCGAGC -AAGAGGTGTAGTTGACTGAAAATGAACCCTGCATTGAGGGGATCGCCTGTCCATGTGCTC -ACATCCACAATATCTAGGCTTTGAGCCACAAGGTCTCGAACCGATACTAGCTGTCGAAGT -ACTAATTCTGGGGCTTCGGGGGACTGGGATAGGTTCAATCGGGTACTAGGGGCACCCCGA -GGAGGCGGTAGCGATGCCAGACGAAGTTGAACATCCTAGAGAAGAATCGGGTCAGTTGCA -TATTCATTGGTCGTCGAAAAATACTTGGGATACATACGCCCTTCACGATCTTAGTGCCGA -CCCTAGTCACAAACCCCTTCACGCTTTCCGACCGCAGCGAGGACAACACCAAGGTTGACC -CCGGTTCTTGGGGAGCTAGGAGGGTGGCACATTCTTGGAGACCTTCCTTCAATGAGACAA -GTGAATCCTGTAGTGAATGCAAGAGCCATTCTAACTCTCGAGCCTAAGATAGAGAAGTTG -GCATTATACTAGAGAAGCTCCACAATGGATCATAGCTACAACCAACCAGGCCTCTATGTA -GAGCGCATCTGTTGTAAGGGTCCATATATGTTTTACATACCAGCGCAGTGTCCGCCTCAC -GGTTGATCTGCTCTGCGGGCAGTGGGGGGTATGCCCAGGTCGCCATTACGACGTCGTACA -GGGAAGACTCGTAGTGTGAACCCTCGGGTCGAGCCTTAGGGGTAGGTAAGGCAGGACAAA -GTTTTTTTCAAGGGCGAACGTGGGTTCAGGTAGGAACGACGATATTTTAGTCCACGTCAC -TTCTTCCATAGGTTGATGTCCCTTGTGAAGGAGCTGTAACGTCGTTTCCTTAGACGCGCT -TGACTTCGTCATGGACCACACGGACTAACCCGATATGGCTTAGTCTATAAAGTAGAGTGT -ATGCTAACCAGGAACGGCTAGCTTGTCAGGCGCAAGTATGACGTTATGATCGTCGATACT -TTCAATATTGTAGAAACCTTGACAAGAGAAGAGCATGATGCTCACTCGTCATTGTAGGTT -GTCCCCGGATACCCCGGCTGCTCCTAGAGCCTTATACCAGCTAGTCCTTTCCGCCTGACA -TTCTTCTCTTTTGTCTCTCGACTTTCTCTTCTTGAGCAACTTGTTCCCCTTGGAGATCAC -GACACAACCTCGAGAGGAAAAAAAAAGCCCCCAATCAAAGTTTAGTGTGGCCAACCAAAA -GCCAAATATGGGTTTCAGCAGTAAGTCGACCCTGATCAACGGCTCTATAGTATCTGAATC -CTGATCATCCATAGGAGCAAGTCTGGGCCTTACAGGCCTGTTTTTCATGTCAGGCGCGAT -CTTGTTGATCATACTCACTCTCTTGGGAGGTGTAACCAACACGACGCCACTGAACCGAAT -CTACTTCCTCCAAGCAGATACGGGAAATATCCCTGGTGCACCGACAGTCTCGCGTTGGAC -ATTCTGGAACCTCTGTGAGGTGAGAGACGGCAAGAGTCATTGCGGAGATACCCATGTGGA -ATTCCCATTCGATCCCCCAAGCCACCGTAACTTCGGCACCACCGAGAATGTCCCTCATGA -GTTTATTGGGTAGGTTACTAACTGATATACTTATTCACCGAGTCGTGCTAATCATACACC -GTAGAACGAATCACTACTTCTTGACATCCCGGTTCATGTTCCCCTTCATCATTATCGGCC -TGTTCTTCGCCGTCTGCTCTTTCTTCACGGGTATGCTGGCCATGTGTACCCGCGTTGCCA -GCTACCTCTCTGGTTTCCTGGCCTGGATCGCCCTGACTTTCCAGATCATTACGACCTGTC -TCATGACGTATGTACCACTTCAACTGGAATTAGGGTTGTCCCGTGTAACACGTGCTAACT -ATATCTCACAGTGCTGTCTTTGTCCAGGGCCGCCAAGCGTTTAAGCACAACGGCCAGGGT -GCACACCTGGGTGTCAAGGCATTCGCCTTCATGTGGACCGCCTCTGTCTGCCAGTTCATT -GCGTGCATGCTGTACTGCGTCGGTGGTGCTGTCGGCCGGAAAGATACCAACAGTGGATAC -AGTGGACGCAGGGAACGTCGCCGTGGTTTCTTCTCCTCGCAGCGCTCAAACAGCGTGAAA -AGCCAGAAGAAGGAGGGCACAAACTACGCTTAAATGGATGTACACTCTCCAGGAGAAGCC -CGCTTCCTCATCTCGACAAGAACAAGCCGGAGTACCTGAAAATGTCGACTCAGAACTGTC -GACCTCCGGACCTTCTCACACCCGACTTTGTCTTCTCGCGTTGATATCCCCTTTGAGAGT -TTATTCTTTCTTCAGGAGGCCGCTTGTTTCACGTTGATCTCGTCAAGCTACATATGATTG -CCCTGTATGCAGCCTAAAAGATTGATCTACCCACACTCGCTTGATGCTTTTACAAAACCC -CTTACGATCATTACTTGCCTGATAAGTCTGCGGCCTTCCTGCATAAGGTGGCAATTGAGC -ATTCTTCGCGCACTACTCTCTCTGCCTTGAGTCCGGTTATGTTTCAAGGCACGTGAGCCT -TTGATGATTTGTTGATGAATCCCATGGTTCAACGATTCTCCGATTTGAAGATTATGCAGG -AATTTGTGCTTTCGGTTGCGAACCTATCTAATCTAGTTGTAATATAATTTCTTTGTTACC -TGATCCTTGGAGTGTAAATTTTATACAGTCAAATATGGTGGATTCTCAATAAACAGTTCT -GGGAAGCAGAGGGGGACCTAACAGAATGAAGTGGGCCGAAAAGGAGAACCAGTATTGATT -GGTTTAATCCAACTGAGTCTATTCAAGAACTATAAAAGGAAAGACTCCATGACGATAACT -GCAGTATAAATCGTGGACAAAAGGAAAATCACAGCCTCTATCAGTCGTGCATGCATCATC -ATACCATAAACGAGATCCAGGTTTTTTTATTTTATTTCACGGATAAAAGAACCTAGGCGG -AGCCGTTCACGGCGCCGCGGACGGCATTCTGGGAACCGACCTCACGGACGGCGATCTGCA -GACCGAGATCGGGGTTGACGTGGACCATGACCTCGATCTTGCTGCCAGCCTTGACGCCCT -TGACGGCGAGCTCAGCAACGGGCTTCTCGGTCTTCCAGACAATCTCGCGGGTctcctctt -cctcctcctcctcGGAATCGAAGTCGGAGTCATCCTCATCCTGGGCGGGCTTCTCCTCCT -TGGCCTTGGGCTCGGGCTTGGTGACCTTGATCTCGCGGGCACCCTCGCACACGCGGACCA -GGACATCGCCACCCTCCTTGGGAGCGAAGTACTGGGCGATGCGGCGGGCGGGGAGGGCGG -TCTCGGCGCCGAGGAGGGGCTGGAAGTTGGTCTCGGACTCGGAGGTGAATTCAACACCAA -TGGCCTTGGTCAGATGAGGGGCGACAGTGACCATAGGGTGGATGGACTGCTCAATGTCCT -CGTGGTCGAACTCCTGGACCAGAGAAGCCTGGAAGGCGGCACCACGGGCAGACAGCTCGG -AGGGGTTGATGGCAGTGACAGAGGCGGCGGGAGCCAGAATCTTGGTCTTCTCGGGGAAGA -TGTTGCTGGCCAGGCTGGCGAGCTTGGGAGTGTGGGAGACACCGCCAGAGAAAATCACCT -CGTCAATATCAAGGACATCGAGCTCGGCCTTCTTAACAACCTGCTGAATCAAAGCGGTGA -ACTGGCTGAAGACCTTGCCGGCGAGAAGCTCGAAACGGGTGCGGTTAACGGTGGAACCGT -AGTCAACACCGTCGACCAGGGACTCAATGCTCAGCTGAGCGTTGCTGCTCAGACTGAGGG -CCTTGCGGGTGGCCTCACACTCAAGCTTCAGCTTGGCCAGACCACGAGCGTTCTCGCGGG -GGTCGGTCTTGTGCTTCTTCATGAACTCCTTGGCGAAGTGGTCAATGATGATCTTATCCA -AGGAGGCACCGCCAAGCTCGTAGTCGTGGGCAGTGGCCAAGATAGTGTACATGCCACCGC -GGGATGCAACAACGGCAACATCGGAGCGGGTGCCACCGAAGTCAGCAACGACAACCAGCT -TGTCGGTGACAGTCGCCTCGGGCCGGGCATCGTAAGCCATAACAGCGGCAACGGGCTCGT -GGATCAGCTGGAGAACGTCAAGACCAGCAGCCTTGGCGGAGGCAACTAGGGCCTCACGCT -GGGCATCGGTGAAATCGGTGGGGATGGTGATAACGGCAGCGTTGACATCCTTACCAAGGT -AGTCAGAGGCGGAGCTTCTCAGGCGCTTGAAGTGACGGGTGGTGATCTCGGAAACGGTGA -CTGTGTTGGGGGTCTCGCTTTCGGTGTCGCGGATGGTGAAAGCGACAGTTGAGTCCTGCT -GCTGGGGGTGGGCAGACTGATGGCAGGGAGTAGGGTCGATCGACTTGAATTCCTTGCCGA -TGAAATCGCGGAAGTACGCGACAGTGTTGGCTGAGTTGCGAACAAGCTGAGCCTTGGCTT -GTGTGCCATGGTACTCCTCGCCCTCAATATAAGAAAGGATGGAAGGAATTTGACGATCTG -AGTATGTTAATTAAAGTTCTCAAATCATGGGGCTTCATTGTCCTCACCTCCTTCCTCGTT -GGCAATGACCTCAGCTTTTCCATCCTATTCAATTGTCAGTCACCATTGTCTGACTCGAAA -ATGAATATCAACACGTACAGGCGAGATGTGGGCAATGGAACTGCACGAGTTACCAAACGA -GATACCGATAGCAAAGCGCTCCGCCTCAGCGGGGGCACTGTTGGCAGCGTCACCCATGTC -GAAGTTGTAGATATGTATTTTTTTTTGGATTAAAAGAATAAGGGTCTCACGTCACGGCGT -GAAAGTAGATTTGGGGGTATCCGAAATAACTGGTGGAAATCTCGATTGAGGGGGAACGAT -CGAAAAAGATTTTTGAAGATTTTCGTCTGCCGCAAATGTGGCTAGTGCTTATCGATAGCG -GTTGTGGCGATTTTGATTAATGCACCGCCACGCTTCACATCCCCATGCGCGGACAGTTTT -ATATGATTGGTATGGCTATGACGTTGAAATCAAATCACTGTGAATGAGTGAGGAAATGTC -AGTAGCACTCTAAAAAATCTAATATATTCATATGGCCAAGGTCATTCCTAGGGATGGAGC -TTGGTCAGTGTACATACATCAGAGTCACGTGGTGATAAGGAATGTCGTTTTCTTTCGCAG -AATCAATGCGCAAAAGCAGGCCAATCTATCTGCACGAAATAAATCCTGCAATACACAATT -GTAGCTCCTCTCTCAGATATATCTACCCTTGGGCTATTCTCCAAACAAATTCTACCCAGT -CTGGCATCATGAGTGCCCCGCTTGTCCTATTACCTAGCGACGAGGTACCTCAAGACCATC -TTCCTACCAACAGCTCCAATCCTCTACGACTAGGCCCCGGCCTCCGCATCCTATCACAGC -CTTCCTCAAAGGGAACCAATCATGTTTTAACAGCAACACAAGCTGGTCTTTTGACGACCG -ATGCGAAACGAAACACCGTCTCAATTCTTTCTTTCCCGAACCGCCGATACATTCCCACCG -TCAACGACTTTGTCATCGCTCAAATCCACCACTCCAGCGTTGATTTCTTCCACTGCATGG -TGACTCCACACACGGCCCATGCGCTCCTGGGTCAGCTGTCCTTTGAGGGCGCATCCAAGA -AGACGAGGCCAATGTTGAAGCAAGGCGAGCTGGTGTATGCCCGAGTACAGTCCGTGGGAG -TCGGCGCCGGCGCCGAAGTTGAACTCACCTGTGTCAACCCAGCAACTGGCAAGGCTGATG -GAGGGTTGGGACCCTTGACCGGTGGAATGGTATTTGATGTTTCGACTGGTATGGCGGCCC -GGTTGATTCGGGCAAGCTCTTCCTCTGCCGAGAACACGGATGCAATTGAAGGCTTGGTAG -TTCTTTCTGAACTTGGAAAGAAGTTGGAGAGCTTGGGAGGATTTGAGATTGCAGTTGGTC -GCAATGGAAAGGTGTGGGTGGACTGCTCCAACGCCGCAGAAAGTGCTGTCAAGGTCACTA -TCGCAATTGGACGTTGTCTACAGGAGACAGATCAGCAGAACCTGCACCCTCATGACCAAA -AGAAACTAGTGACGAAAATTTTAAGGGATATGAAGCTTGTTTCGTGACTATGTCTATAGG -ACCTGGAGTGTGGGTTTTATGACTTGCAGTTGCTTCTTATATTCTTTATGCCAGCATGTA -CACTATTAATGCAATCATGAGCATGATACCTGCCAGCACTGTGTCCAGTAAATGCTAACT -GTCGGCAGACATTTCTTGTGGAGTATTGTATGGATCGCATGAGTACCTTTGTAGTAAGTG -ATGTACAATGGCATTGTATGGCAGCCTTCAATGAATCTTGGCTTAATATTTAGAATATTT -TTTCTTGGTGCCTTGAAGAAAAATCCTTTGTTAAATCCTTTCAAGGCTCTTTCTTATCTT -ATTATTTCTATTGTATTCTGTCTCTTCCTACAACAATGATATGCATAGCTCATCGACTCC -GTACAATAGCCATGAATTGATATGTACTCGAATGGCCCCGCATGGGCGGTCAATGCCAGC -GGTGAGCCCCAATTGCCGGGTTGCACGCTTCGTGGGCCGTAAATTTTGAGGCCTCGGTCT -CTCCTTTTCATCGCGGCCTCTAATTTCCCCCTCACTCTTCATTGTCGCAGCTTATTGCCA -TGGGAAATACATCGTCTCAGCCTGAACCATACCCCGAAATTCCTGAGGATTTTGGAGATA -CTGGTTGGTGAACACACCTGATCTTTGCGCGATGTCAAACTAACATTTGCAATCTGGCAG -TCCCTGAAACTCCGAATTCTTCCATGGTGGATGCGACGAAGAAAAAGAAAAACAAACATC -TTCCATTAAAATCCAAGCCTTTATCGCCGGAAATGGGGGAAGCCAATGGTTCAAACCGCT -CCCCAGAAGTAGACTCCAATATGTCGAATACTTCAAAGCGAAAGCGCGAGTCTGAACCTG -AGCGCAAGCAAAAGTCCAAGAAGCATCGCAAATCAGACAAGTCCGTTTCCATGTCAGCTT -CACCAGAGAAACACTCACCCCCCTCTGCAATTGCTGTTCAAAATCCCCCAACCCCCAGCC -AACCCACTCCATCAAAACCCAAGGCGAAGAAGGTCCGCAAGCAAAATGGCGAAGAACCGA -AATCAGTGACTGCGTCCACTTCTCAACCTACTCCTAACTCAACGGCTGCAGAAGAACAGC -CCGTTAATAAGCAGTTGACTGTCACCAAGCAAACTCCAGCCCCTGACTCACCCAACCTCG -AGGGTAGCACCACGCCAGGAGACAGGGCCAAGGGAGTTAGAGGCTCACGCACCAGACAGA -AGGACAACTTAAGGATTGGCTTCTATACGCCCGATGAAGTGCGAAAAATTGAAGCGTACA -AACTCAATTTTTGCACCGTGCATGGCATTCCTAGTGCCAAATTCGACGAGATGGTTCAGC -ATTCTGAGCGTGGCGCAAACGGTGAATTTCCTGTCTCTACGGACGTCATTTCCAAAGGGG -ACTTCTGGGACGAGATCTACGCTCTTGTTCCAGACCGTGATCGACGATCTGTGTACCGAT -TCATGCGCCGACACTTCCAAGCCTCGGCTCAAAAGGCACACGATTGGTCCAAGGAACAAG -ACGATGAGCTTATCGAGCTCCACGCTAAGCACGGTCCAAAGTGGACTTACATTGGTAAGC -TCATTGGGCGCAGTGATGACGATGTCACTCAGCGCTGGAAGAACAAGCTTGAGCACCAGG -GCACCATGAACCAAGGCGCCTGGTCAGAGGAGGAAACCAAGATTTTCCTTAACGCTATAG -AATCAACGTGGAACACCATGAAGCCAATGCTAGCCGATAAGGCAGGCAAAGACATGTACG -AGCTGGATGAACGCCTCATTGTATGGGGTAATATTAGCAAGGAGATGGGACATATGCGGT -CTCGACAACAATGCGCAGACAAGTGGCGCAAGATCGTGAGGCAGGTCATGATCATGCGAG -CCAATGGTCAGCCAGGCGCAGTGTTTGATCCGAAAATCGCTGCTAAGAAAAGTGCTCACT -GGAACATGAGACTGGAGGCGCAAAGAAAAAGCTCTCAATTCGTGAATGAGGATTCTGATG -ATGACGAAGCCACAAAAATTACCACCGCAAAGCTCAATCCCAAGGGTGACGCAGCGCCAT -CGCCCATTACCAATGACAATGTCGAGTCCGGACACGGAGGGCAGTCCGAGGAGGGTGAGC -CCGACTTGCCCGAGCCTCCTAGAAAGGTTAAGAAGTCCAAGCGCAAGCACAACGAGGACC -CCTCCTCCTCTCCCATCAAGGAAGCCCCGTCGTCTTCCGCCGCTCTCACAAAAAGCAAGG -AGGAGCGAAAGCGCGAGAAGAAAGAGCGGCGAGAGAAAGAAAAGCAAGAGCAGGTTGAGC -AAGAAGCCCATGAGGACAAGGCCGCACGCAaggagcgcaagcggaagaagaaggaagaga -agaagcgaaagagacttgaggaagaggaaCGTATCGCTGCCGACGAGGCCAACAAGGCAC -CTAGCAGCGATTCAGAGGCCCTTGAGCCCCTCAAGAAGAATAAAAAGTCCAAGAAGCAGA -AGCAAGCTTCCGTTGACAGCCCTGCCCCCTTGGATGAACCCTATCGCTCTGTTGTTCCAG -AATCCCCATCGCCCGAACCACCTGCTTCTCACAGATCTGCCACTGAGCACGAAGCTGTCG -TCGATGAAACTGACTCGGACGGGGGAGATGACAACAGCAGTGAAGTCAATGTCAAGTACG -AGACTGACTCGGATGAATTGTGAAACTGTATGAAACGAAGAGTTTACCTTTGCTATCGCT -TGCTTCCTGTGTTTCGAATTATACCCTTGTTTATAAAGATATCAATTCTTTGAGAGCGAG -CAGTAGAAGAAAATGTCCATGAAAATACACAGGTCATTCACTATTCAATCCAAAATGCAA -CAAGTGTAATCATCAATGCAATCGCTTTATCTAATCAAACCCACTAAGTCAACTAGTTAT -AACTCGTCTTTGCGGTCCTCGTCAGTCTGCACAAACTCATCCCCAGCAGCAGCCTGCTCA -CCAGCAACCCGACTCGCCAAAGCTGCAATCTCATCATCATAGACAGTCCACTCAGGAACC -TGTCTCCTAGCCCTATCTAGCTTGGGCTCCTTGGTAAGGGGATTATTACATAGGTCAATA -GTCTTCGCCACATCCAAATCCTCATCGGAACACCAAGTCTCACACCAAAGCCACTCCTGC -GGTAGACTATGGATGGGGATGTGATGCTGCATGTGGTTGGGCAGATCCTGGTCAAGATTG -CTGAGACTGTTCGGATCCGCCGACAGCATTTGATATTGGCCGCGCAGACGATCGCCAGCA -GCAAGCGCGCGGAAGCGTTTCAAGTCAACAACATACAGTGCCGAAATATGGTAGGGCTTG -CCGCGGAGGAAGGTGGACCAGTAACCCTGTTTCCAGAAGCGGAAGCCTTCCATTTCGGTC -CGAGAGTCGCCCATCGGCGTGAAGCCGTAGGGCGCTCCTTGCAGGTCGTGGGTGACTAGG -TCGTACATATCTGTGCGCACAATCTGGTCGGCATCGACGAATATTACTTTGTCCAGAGAA -AGGGGGAAGAGCACGTCCAGAAAGAGCATTTTGTACCCCCAGATCTCGCGTTGCTTTTCT -TTCTGGGCGCGCAGCCAGTGCGGCCATTTGTACGTTACCATCTCATAGGAGAAGCCGTAT -TCATGGGCgagggaagggaggaaggagcggaaggagggTGAGAGGAATTGCTCGATGAAC -CAGAATTTGACGGTGTGTTTGGTGTGTCGCATCACGGAGACCATCATTATGTTCAACATG -CGCTCGTAGAGATGTCCGCTTGCCACCGAGAAGATGTTGATATCGGCGTGCTTTTCGCCG -CCGCTCTTGGACCCGACTCCAACACTGGAGAGGACGCCGGATGCAAAGTTGAGTCCTCTG -GCAAGATAGTCCATCTTTGAGCCTGGCTGGGCTTCTGTCTCGAGAACATCCTCCTCCTCA -TGGCCTGGCTTTCGGGACAGGCGAGGGAACAGCGTCCGACCATGGAACGACAGCAGAGCA -ACCTCGTTGTTCTCGTCACCGGGTTGCGGACGATAACCTAGACCACCAACGCTATCGATC -TTGAAAATTTTCTCGCTGCGGCCGGGCTTCAAGTTGATCTGCCACAAGCCCGGTTGTGCT -TTGAATTGGAAGTATCCCAGGTTAGCCATGATAATAGTATCGGCGAAATAATGATCGTCC -TCAGTGCCCAAGAGAAGTTGAACACCTCTGGGGGCAGAATTCGTTGTAAGATCACGAGAG -TGGCCTTCAATTAGAATGTGTTCGAGAGCGTAGATGGCGTCGACGTCAGTGTCAGGTTTG -ACCGAGCTCAACTTGATGTTATCCAGGTCGTATACTGACTCACTGGGTGCTACGAGCCAA -GATGACGGCACGTCCATACCCAGTGTCAGTAGTGACTCGACTGGCACGCCATTGAATGAC -GCTTGCGGCCGAGCTAGAGCGCCCTCGTTCGTGAAAGATGGCTCTGATTCCAAGACATAT -CGGTAGAATCGTTTGACCGGAAGCTCCGTGAGTTCTTCCTTCGGGTTCAAGAAAATCTTC -ACCTGAACGCCGGAGAGCTCTGAGAGAACCTTAAGAATCGGCAACCAACGCTGAGCGACC -TCAGACGCCGGGTCAAGGCTAACAGCAACGTAGATCCTGGGATCATCGGAGTTAGAAATT -GTGATCACAGAGTGCTCCGTCCTCCATGTACTGGACACGTCCACTCTGAAATCAGGAGTG -TTTTCGAAGATGCCATCAGGTACATCCAAGACGGTGGAAAGAGCCACAAGGGAGGTCAAT -TTGGCAAAATCGAGCGGGTTTGAGATCTTATCTGCGAATCCAAGTTGGTCAACAGCCGCC -GCAACGGAATCGAGGCGTTTCATCCGTTCGTAGGCAACCAGTTGGCTCATCTCTTTGACA -CTCAACGAGTGTTCCTCCTCAATAGGGCCGACTGCCCTTCCATTGACAATGAGTCCCTCG -GTACCAGGGTTGAATCCAAGCTCTTTGGCTAGAGTTTGGTGCACAGATTTTATAGTAGAG -GTCTTTTCAGCTTCAGTCTTCAGAACTTCGCTATCAAGATCACCGGATGCAATCTTAGTC -AGTACTTGATCGATAGACTCTCCCTTTGCAAGCGAGAGATAGAGAGTGACGAGATTCCTG -GATATATCATCTTCAGCGTCCGATGCCGCATTGTGCAGCATCAGAACCTCCACGTCACCG -TTCTCCTTTCGAAGATTGAGAGCATCGGTAAGTAGCTTCATGCCAGTCTCTGACTCGAAA -TCGCCCACAACTATCATATGAGCGCTCTCCAGTGCGTCCTTGTCCGAGGGAACGTGAGGT -ATCTTGCTCATTACATCTTTATCCGATGCAAGAATGGTAGCGATGTCCACAATCTGTACA -CTCTTAGGATCCTCGGGCATGAGGAAAGTATTGCGTCTCTCCAATGCACCAGCAAGGAAG -AACTCCGGAAGCCATGCGTCCTCCTCGAACACTCCCTCTACGATGCCTTGCTGGACCAAT -TTCAAGTCCCTGCTGACCTGCATGCTCATTCCCTGCATCCAGTTGCCTTCTCGGGAGATA -GGGATACCGTTCACCAGGAATTGAAGTCCGTCGCCGGTAAGACTCAACCGACGCTGGTAA -GCAGATGTTTGGGACACCAGAGCATTATACTTTTCGCTCTTCAATACTTGATCAAGAGAC -AACGCTTCCTGTTCGTCAACAAGCTCGTGTTCTTTAGTCGCAGACTGGAAGCACAATTTG -TCTGGGAAGCCTATTTTGCCTTTCGAAGCAGACTCTTGCAGATATTGCACAAGAGACCCT -ATGCCATAAGCATCGAAAAGGTAATGCGCTGCTTTCAGTTGCGCAATTGACTCTGGCGAC -GAGGCTGTAGGTACCAAGCCGAATCTCACAGGAATCCCTCGCATGACGAAGGCTTGCAGG -TTTCCAGCAACAAGCATCATGTGTTCTGGGTTAGAAAAGTCGATAGACACAACCACGTTG -TGCAGATCTCGGCCCACCGGCGGTAGTTGTCCGGGGTAAGAACCGGCGATGTACTAGGGT -TATGTTAGTATGTGTTCACCCCAATATCCATACAAGGTAAGTACGAACTGCCTCGAGAGC -ACCAGGCCAGGACTCATATCTAGAATCTTTCTCCAGATTGTTCATCCAAATAATGACCTG -GTTCCCTTCTAAATCATCCCGGTAGTCATAACGTTGCTCTTCAGCACCAGCCTCGGCTTC -GGTGAGCAGTCGGTGTGACAGTAGCTTCACGACATCGTTCGCAGAGAGACCCAGGCCGCG -GAATTTCTCAATCAACTTTCTCTCTCGGCGCAGATGTTCTAGAAGTGAGAACGCGTCAAT -CTGCTGTGTGTCCATTTGCACACCGTTGATCCACATGGCATTAAAACCACCAGGGAACAT -TTTTGACCGGTTGTGGCGGATCTCTTGCGCCATTTCTCCAGTAGAATTATGGACAGCTAC -TATAGATGAATACTTAGGGAAATTTTGGGACATCTTGATCAACGTCCCCAAAGGATCAGT -GTTGTCCATGACATAGCTCGCAGAGTTTAAGCCTAGGCGAGTCACTTCGGAGGAAGCCAA -AGGCTGAAGGTCATCTGGAGCATCTTCCTTCGTCTCGTCCGGATCGGTTGGAAGACTTTT -GGCGCTCTTTTCTTCAAGCTGCTCTGCATCCCGATCATCGATAACAATGTAGTCCGTTCG -CTTTAAAGCAAGCTCCACGCCGTACCCAGAGACAAACAAGGGACGGGAAACCCAATGTTG -GGGAGGCCGATAGCGTACACGGTACGATATCTGTCCCTGTTTTGCCATGTCGCTCAGGGT -TTCATGGAAATCCCTAAACATAGGAGAGCCGACGTCTGCGTAAAGAACAGCTGGAGGTAG -CGATGCATCCCCTAGGACGCGATCAAACGGTAGCTCTCTAGGATCCCTGAGGCACAATTT -CAAGTCAGCTACCAGAGTCCGTACCCCACAGCCTAGGATTTACTCACAACTCTCCCTCGA -CATCCTGTTGTGCGCGTTCCATCGCAGAAGAACAATACTGCTTTCCTTCTGTGTGAACCC -ATACAGGACATACCGCATCTTGGGCCACCATCAACGACTGCTGAACTGAGGTGTTATAGA -ACTGGAAGTGTGCCTCAATACGAGGCGCCGATGATCGGAGCGAAAGCGATAATTTCAAAG -ATGAAAGACTCTCCGGAGTCTTTATGTGACCATCATCCTTCACAACTGTCAAGAAGCGGT -CGTAGAGCTCCTTCTCCGTTGTGAGGTCATCAAACGTCCCGTCGGCGATACGGTCCAACA -AAGGAAAATAGGAAGTAGAGTTCTCTTCGGCGGCAGACTCCCTGTGCTCAAATCATTAGT -CCCCACTTGTATTGGCAGGTCTATCCCTGGCGGGGGATCATTCAATGTGCAACGTACAGG -AGCTCGACGAGGTATGGGGCCGAGTCGAAGGAGGCTTGGAGCGCCACGTTCACAGCTGGA -CTAGCACTGGCAATCCAGCCCGGTAACAGAACTCCGACGGCGGCGTACCACCGCCAGGAA -ATCTGAGATCGCGTAGAGACCCTCATTATCGCGCAATGCAGTGTGGGGGGCAGTTGGGTA -TATTTGTTGTATTGTATAGGGGACAGCCTCGAGTCGGGCAAGTCCTATGTAATGTGAGGT -GAGATTCCTAATTAAAAAACATGCAAGTCAATCCATTTGCAAGCATCACGGGCCAAATGA -GCCAGAGCTCTCAACAAAGATGGTAGTAAGATCGAGGGTGAATCGGCTGATCTGGACACT -TGGAGAGTATCTTATCGCAGCTTCTACGGCGGACTGAGCGGCAACTGGCGCAGTCTCCGA -GATCAAGATGGCAGCTTAACTTGGCACCTTGTATTGCATTGGCTACATGTGTGAGTTCCT -AGGTGTGTTTACATTCAATGGTCTAGAAGTGCGaataatgatgatgataacaataataat -aatgatactaataataataatTTTGGGTTCAAATCCAGTAAGCTGGACGCTATCGTAAAA -ACATTAAGTCGTAAATCAACACCCCCATATGCTTCTCCCTCGAACGCCTTTTTTTTGTTC -CGAAACACCTAAGGTATGTCACATGAATGCAGCCCGGCTGGCTCTGCAACAGAGGCCCGG -CATTGGCAAAGTTGGGAGCGAGTGCTCCTTCTGAATTTTGTGCTCGCAGGGTGACGACTG -CAGCAAGAATGAGAGAAGAGTAAGAGATGATCGAGTCGAATGTAAAGAAGTCAAGTGAGA -TGTGGGCAGCCTGTCCAGTCCCCTCGtttgttttatgttttatgttttttgtttttcgct -tttcgcttttcgcttttggaattttgtttgctggtttCCTTTGGTTGAAGTGGCAGAAAG -TTGCGATTAAACAGCCTGGCCGATATTTTGAGTCGCGTCGATTGCTTTGCGAGCTTGGGC -TTCATCGATCAAACGCCGGCGTTTATCGTCAGTCAATTCGAAGGATATATTTGCGTTGAA -CTTGGCTGGTGTATTGATGCTGCTAGCCCGACTGTTGAATGATCTGTTGGACTGCATCTT -CTTCAAACTGCGCGCAAACTCCCTATCCTGCTCCTTCTTGGGGTCTTTCTTCCTGCCCCA -TCCCGACAAGGAAGAAAATCGGTCCAGGAATGAACCATGCTTGCTGAAGCCTGTGTTCTC -CTCTGGGAAGTGGGGAAGAGGTTTTTCGGTGTCTGAAGATTGGGAGATAGCAGGACAGGG -GCCAAAATCAGGACCGAAGCTGATGCCCGAGTCGGTGCGGCGCTGCACGCCGGACTTGCC -AATCTGACAAGGCTCGTTGATCAAAATACACTCGTCGATCATTTCGAGGACTGCGTTCAG -GTATCGGTCAAGGGTTTCATGCAATGCAGGCCACCCATTCTCCTCATTGGGTCGAGCGCC -TTGTGTCATGATAGCTTCCAGCATCGAAGGATCACCCTTGTAGTTGGTTATGTATTTGAA -GAATGTGTCACGATGATGTCCAAGGGTGGCTGGCGTCAGCTGCTGTGTTGGGGTAGGGCT -GTCTCGGTGAACAGGCGGCAGTAGAGTGTTAAGAAGAGCAAGGCAGTTGGCCTTGTCAAA -CGGCTCAAGCATCATCTGATTGACCAGAACCTGAGGAATAATGTTAGACACAGAACTTAA -TGTTTGAATATCGGGACAGAACTTACCCAGCGACGCAGGCCGACATGGATTTGCGTGTAC -ATAAAATCGATCAGGCGTGCCTTCTGACTGATGTCTTTGTACCAGGTCAAGACATTAGGA -TACTTGCGAGCCACCTCCGAGTGCTGAGGGTCACTCGCGTCCTTGGGAGTAATGTCGAGC -CGATTAAAGGCGGCATCAATCTCCTTTTTCCAGCGACCTTGGATAGCCTTAAGGTAATCA -AGGGCAGTCAAAGCCTGGGCAAAGTCGACATCGGACAGCTCCAGACAAAAACAACGACGT -ACAAAGGAGAGAATGAAGCTCGGAGGGAGCGAGCAAGGTTGAGAAGGCAATTCGGAGATA -GCTTTAGTCAAACAAGCACGAAATTCGGCATTGGTGTCGTAAGAAGGAGTATCAAGAGAG -GAGTTGGAGTTGCGAGAGTTTTTAGACCAAACAGAATTGCGTAATATGTCGGGTGACTGA -GGAAGAAATGACGTCCCCATTGTGTTTTTCGATGGATGGCGATTGAGTTCGTATGCAGCA -CGAAGGGAAATTTCGTAACTGTCGGCGTAACGCAGGCAATGATCGAAGATCCACGTCAAT -GCTGACCCATCGGGAGCATCCAATGTCTCATAGTTAGGAGCTTGAGACATGCTATGGGTG -TTTAGCAAGATTGGAGTGTCAGGACTGCGATCAACACAAGAGAGTGTGTAAAGGATAAGT -TGTAGAGAAAGTCGTGATGAGAAGAAAACAATAGAGGTCCAGTGGCCGAAGAGCAGTAGT -TATGGTTGACCCAAAGAGGCGGATCCGTTGTGGCGGCTAGGATATTTCAGAGCCCGTCAA -GGGGGTAGTGAGAGACCTGATAGGCCCAAATAGGAATAAAAAGTTGATGGAGGGACCCGG -TTGGTCGGATCAGCCCTGTAGTTTGGGCTTGAGTCACCAAGGAGGCCAACAAAGAAGAAA -AGCTTTCGCGGAAGGCGAGTACTTTTTTTCCCTCTAGGATATCCCTTGAGAAGCGAAACC -AATCACAATGCAGGATCTTTCGGATCTCCTGCACGCCAGATCTGATGTATGCAAAAAAGG -GCAACGAGCGGCTTCAATGGCTGGCTTCCGTGCGCTGAAACGTGAGACAGGTCTTAACCC -CCTTCAAGGTTCGAGTAAAGAACGTGAGACGGATAAAGGGCCAAAGTGGGGGATATGAGA -CCAAGCGAATGTAGAAGGAAGTTAAGAGAAGTTGAAAAGGTTCAAGATCCAGTGGAGGGG -AAAAGGCCCATACAAAAATGCGCGAGAGGTTGCCCCATGGCTGGACCAGTCTACTAGGTG -GGGACTGACTCGAACGCAAAAATCCGCGAAACAAAACTTTCCGTATTTAAATTGTTTTTA -AATTGTTTTTTAATTGTTTTGATCAATAAATTCGATAATTTAATTAATTAAATGAAGTCA -ATCCCCCCTCTATCTGAGTCCAGATCCACGGATTCTGCTTTTTAAGGCTACTCCACCTGC -ACCGACCCGCCGGAAGATATCATTAGCCCCGATTCGCGTGAATTACAATTGGTTTGGAAT -GCTTCAGAGACAGCCGTTGGCGCCATTGAGACCCCACTAGCATACGTTGAACACCATTCC -TCTTCAGGTAGGTTCTAAGCCAAAAAGGCTAGCTCACATTTCTGATTGGATTGTCATCCC -CGTGCCTTTCCGGCTTGAGGCGGACTAGAACTGTCTTGCATTCAAGGCTCCAGTGCAGGC -GGAACTCTAACATCGCGACCGGGAACTGGGGGAGCCGTTACGCTTAGCAATGTTTTCGGC -CCTGTCAGATTCATTAGCGCACCGTGGTACTTAATATGCAGAATGTGTACTTTTCCGTCG -GACTGGTCAATTTACGGGGTAAACCCTTTTTGGAGATCCGCCTCGGCTATACCATCGCAC -TCAGCTTGTATCTACAATATGTACGGAGTACGTACAGGTTTCGACAGATTGTGGCTCATG -TGTGGCCACGTACTTTTGTCTTCGCCTCCAGGAAAAAATTCAAACACAACATGCATCTAT -TATTATACTATATGAGCCATCCTCCCATGTTTGCTATTTCAAGAAACAAACCTCCCATTA -ATAGCTCTTAGATCCGATTGCGATTTTGCTCGCTTCGATCGCCTGTGGGGCTACCACATA -ATACCTACGCAATACCTCTTGTGTACTCCGTACTCCGTATAATATTGTGTTTATACATAT -CCCCGGTGATGAAACCACTGTGACCGTTGCCTTGGGTCCACTCATAAAGAGCCGTTGGAG -CAGTCACACGGAACAAGAACCCAAACTTGATGTACGATATAGGCAGAAAGCCGTGATAAT -CTTCAAGGTTGTGAGGCGATCAACCACACGTAAATTTGCGGATAAATAAACAGCGGTAAT -TATATACTTGACCCATGGTTCAGGATGTGTCTTTCATGGTAACCTGACTGCTATACCGCG -GTTGTCCAGTCGGACGGGCCGCTTTCTATGCCCAAAGTGCCCGAGGGCCGTTATCTGCCT -GAGGCGCTTTTCGTTTGCCTGATGACAGGTCTAATGTACTTTTAGCCTCCAAGGCTTACA -CCCCAAAAAATCAGCTACATCGACTCCGTATTAATTTGATACATAGTTAGTTGCATATAT -AAAATTAGGGATCAAATAACAAACATGCGCATATCCGGAGCCTTTCATGATCTAAGATAC -ACCGACGCTGATCCACATATCCGCCAATTTCGGGACCTTCCCAAACAGGATCACCTGATC -AACATTCAGCTAGAGGCGTTGTGTCGATCCGTCAAGCCCAGATACAATGATCGGCAACGC -CTCTCTTTGCTTCCGATATTGTATATTCCGTAATTATATACAACATAAAGTACAACATAA -GGAGTGCGTCAACCAGTCTCAGGGGAAGCTTTGCCAGATCATTAGGGAGCCACGCCGATC -ATTTCATCATCGCCGTTGTGGTACGGAGAACCCCATAATGGGGATATTGCTAGCTACCAG -CTACCAAATACTCGGGTGGCCTCGGTACTGAGATGACGACAGGGGTACTTTCGAAGCTTT -GATCGGTTATTCTGATGGGTACGGGGTACAATGTACAACGTAAGCTTGTCAAAAGACCCC -TTTTAATCTGTACAGTCAGGGCTTTCTTATCCTGTGACACCCTCTGCCCCTGACTCAAGA -TATAAAGAATGATCTATAAAGACAATTCACTATTGGAGAAGGTGCCCAAGGGGCTCCTTC -CACAGTTGGGTATCTACAATATAGGAGAAACATCTGGGCAGGCTGTTCATATCGATATTC -TCCACTTTAAACTATCTTGTACAAAGAAACCCCTTTCCGCATTAACACATCCTAAATGTG -AAGGCGTCTCCAATTGAGCAATCTCAATGCGTATACTCCGTACGGAGTACGGTGTGCTCT -GTAGGATGGTCACACAATCGGGAACCCACTGGGGTCCGCTGGTTAGAAGCCTCACTCAGA -ATCATAGCAAAGATTATACTGCTCATCGCCGGTAAAGATCGGCAGTCGACAGCTCACTAG -TCTCCTGGCGTGTAATTATGAGAGACTATCTTCTCTCCAAAACGCAACAAGATATTGCTG -GTATCATTAGGCGACAGGCATGCTCCAGGTCCTGGAAAGCCCATCCCATCAAATGGGATT -CCTGACCCACCGAGGTGACTGACCATGTTCATAGTCCATACGGAGTACGGAGTACGGAGT -ACCCAGACCCGCCTTCCCAAGCACAAAGTACGTTGTATTGCTCAGTATTACACCAGGCCT -AGTGATTATTGACGAGTTGTGCTGTCGTGGAGTGTCTGTGCATCGGCAATTTCGAAGTGG -GCAGTCTGTAGTAACCATATGAAATATCAATACCGTACTCTGGCTTTCCATTTCTGCACT -ATGGGACTCTAACTGACTAACAAACACGACGCCCGCTTGTCAGATAACATAATCAGCCAA -GTAAGCCCGATCAACGCCCCCAACATAATTAGGCAATGCGAAGGAGGCCTTTGGGCCTAG -CTTCCTTTCTTTTTTTCCATGGAAGATAGATAAATAAGACATCACCGATCTATAACACGC -CGCCGGCCTTCCGCCGGCCTTCCGTACTTTTTCCGTGAGCGAGTCGCTTTGTGTTCTCTT -TTACCTTCAGACACATTGGTGTGTGGAGCTGATTTGCTAAGAAAGACCAAGCTTTTAACG -TCGAATTGGTGAAGGGCGCTACAATATAGGTATCGTCCATGGATATATTTGCCGATTTAC -TTCGATTCACGGAGTACGGAGTAGATACAACGTATAATTCCCATACAACCTTGGAATGGA -TGGAAACTAAGGTTGCCCAGGTGTTCAACAACCGATGACATAAGCAAGCCGTATCGGGCA -GGCGGCCCCACCAGCCTGGCAGAAAAAGGCAAACGATAAGACGCGCAGCTACAATACTTG -GACCTAGCTCGCAAATTTCATTCGTGCAACGCACACATCAGTCATGGCAAAGGGGTCATT -GGAGGCCAAGCTCAAATTTGAGAAAAAGAACAAAGACCTTATCAAAGTACCAAAACGTGA -GCCTTCCTATCCGGACGCCTAGGGTCAATGACCAATGCTAGAAACGAAAGATTACATGCG -CTGACTTTTAGAGCAGATGCCAAGGTAGAAAAGCGCCCTATACCTCATGCGCCAGTCGCC -TCCCCATATGCCGGTGCTTCAGTACCCAAGATTGTCTACGTTTCCAAGACGACACCGTTC -ATGAGCGCCGTCAAGCGCGTGCAAAAACTCCTCCTGCAGGCCGAGAAGCGTGCCACTGCA -AATATCAACCTTGAGGACACGCGCAAGAGCGATAAACAGATACTCGAGGAGCTATCAAAG -GTGTCAGAGAAGCGAGAGGAAGTTTTTGTGAAGGCGACAGGGCGGGCTATTGAGAAGGCG -CTAAACATCGCTAAATGGTTTGAAGAAAAGGAAACCGAGTACACAGTCCGTGTCATGACT -GGGAGTGTGGTTGTCGTGGATGATATTGTTGAGGATGAAGAAATGAAGGCAAAAGAGGAA -CAGAAACAACAACGGCAGGAAGAGCAAACCCCAACTGACCAGGGAGATTCTGCTTCTAAA -TCGGAATCCAAGTCGGCTGCAAAGAAGAGGAAACGTCAGGCTGCTGCGGCGGAAGATGGT -GCCGAGCTCCCTGAGAGCCGGACTAGGTGGATCAAAATGGTTGAGGTCGCCGTCAGCCTT -AAGTGATTTCACCTACGATCAATTCGACAAAAACTCCTACGATAGCTGGTTATCCGATTA -TCGACCATCATCGGCTGATCTGAGGGTAATTGGCTGCAACACTCCACATCTATGCCAGGG -AAAAGGCCATTTCATAGGCCTGGAGCCGAGCCCAGAATTTTCACCTAGGACATTGGGCGC -ACGCACATCCCGCTGAATGATTATGATTCAAGTGGAACGCCCATGGTGGCTATACACAAC -CCAGTAACCGGGCTCAGACGACTTCTATCCTGAGTGCCTGTTCTGGGATGTTGCTGATGG -AAGCAGTCAAAACCTCAAGTCCTCATTTGTGGGTGGGTGTCCTCTTACCACACTTCGTCG -CATGCATATACCCAGTACCAAATACTCTACAAGTGCAATATCCACGTTACAGAGGCCCGG -AAGGCCTTGAGCAGGATCATAGAAGGAACAATTCGTTTGCCCTTGGGTTGTTAGCCTAGC -ACAAAATCATGTACATCGAGATTGTCCTTGAGAAATGAACTCTGATTTACCCAGAGCGCA -CGCTTTACCCGACCTCAAAACCAGCCCAACACGCTGGAGATGCTGCACAGAGTTGTATAA -GTGAGGAAAGAGAGAGACAGGACTAGAGTAACAGTAACAGCACTACTCCAATCAAGAAAG -AGTTTTAAAAAGAGGGAGCCTGAATCATCGAATCTGCCAGCGAAAAGAAAATGGCATCAA -TAAAAAAACGAAATAGTACGAATAAAAACTCCTTGATACGCTCACAAAGCCCTGTGCTGA -CATGACATGATGAATGATAATGCCTGAAAATCCTGCGTCGAACGGAACGTTTTAAAGAGA -GGCTGCAACTCATGAACGTATCCTTATTTTCCCCATCATGTACAGACAAAAGAGAAAGAG -AGTTGTCGATTCCCGAGGGGACGACAGGCTAATAGTAACATAGTAGGAGGTagaagaaga -acaacgagaaaggagatgagaagaaagagggggaggaacggacgtcgaaaaagacgacca -agaaggagaTCACAGAGGAATCTCGAGGAAGTCATCAAAAAATTCGTGAAATAAGAGAGA -CAGTGTGGGCTGAAGGAAAAAGGCTCCGAGTATGAAGACAAAGAAATGATTCTCAACCGA -GAGGGGAGCGAGCAACTAGAACATTCATCGCCAGAAAGTCGTGACTGGCAAAAACTAAAA -TCTGATGAGAAAGGTAGGAGACATAGTTAAACAATGCAAAGGGCCTATGCAATAGAATAG -CCGTCAAAAGAACATGGAGCCTGAGTAGTAGCATGCGAGGTCAAGCTGGATCAAATGTTT -ATTTGTCCATCTTTTGCCAACCGTTGAGATAGGCCATCAGGCCTACTCCTAATAAGACCA -CGCCAAACATGGACGCGTAAGGACTTGCTTCTTCCACATAGTTGCGTCGATAGCGTTGTG -TTGCAAAAGCTGTGGCTGCAGCTGCAGCATCCACGTCCTTGTTCTGAGAGGTAATCGGAT -CGGGGCCCTGCATCGACGGAGTGTTCATCTTGGAATCCTTCGAAGTCTCATCGCGTGCAA -GCAAGTGCTCCGACAAGCCATTGCTCTGCTTGCTATCATGTGTATCAGTTTTCTTCTCAT -GCACCATCGCTAATTCTTTCTCAGCGCGTTCGTGGCGCAAGGTTTCAATCATTTCGTTCA -GGGACTTGCGAGATTTCGAGTTCTCATCTTCGGCCTTCTCGGCGCGATCTTTGAAGGATG -CCATTTGCTTTCTCATCTCTGCCATCTCCTCCATCATGTTTTCCAATCGAGTCTGTAGTT -CAGCAGTCTTATCCTCGACAAAAGCCTCTGCCTGGCTGTCTTCGGGAGCAGGAGTCTCCG -ATGGATGGATACCGTTGACCTGCACGGTTGCTTCCGTATCGCTCTGCTCTTTCTCTGAAG -GTGCAACAATGCCTTTGTCGACTTCAACTTTTCCGACATCCAATGCGGAGCGCATCTCCA -ATTCATTGACTTTGTCTTCTGCCAATTTTCGGGCAGCCTGCTCCTGGAACAGCAAGCTTT -CAAGTTCCTTGACACGTGTGCCATGGGTTTCGATCTCTCGCTTGGCAGACGATAGCGCCT -CAATGAGATTCAAGATCTGGCTGGAATCGCGCGAGACAGGGGAACCTGTACCACTCTTGG -GCTTTTCTGTATCGCTGCGTTTCAGTGGCGAGAAAGATTCCGAAGCACTTCGTGATAACG -CATCGGGTTTCTCTGGCAGAGGTTGTTGAGGAGGAGGGGCTGGAGGATCCGAGAATCTTG -AGAAGGAATCGACTCGAGGCATTTTAGGCCGGCCGTTGATCTGGCGATGAGCACTGTTCT -CCAAAGGAGATGGCTTGTTGAGCTTTTCTTTCTCTGCGTAGCCAGGTTTCATGATCGTCG -TCAAAAATTCATCAGCTTGAGAAAGATCTGTTGTTTGTTGCTTTGCAGATTTCAGTTCGC -CCTAGAACAAGTTAGCAATATGTAGTATCGAAATAAGGTGTGGTTCGCACCGTAAGCCGC -TTGACGACCTGTTCAATGGAAATTGGCGAATTCCAGTAGTTCATTTGCCGTTGTTGATGT -AAAGCATTGAGCTGGTTACTGGTAACACTCTGAGCACTTCTGCTACTCGCGTTGCTTCCG -GCTCGACCACGCATGTGAGAAAGCGGTTGACTTAAAGGAGAAGGGAGAAGACCGCCCCCA -GAGGCAGGGTCCAAGTCTCCGAAATTCAGATCAAGGATATTGGGGGTAGTTCCATAGGTT -CCGGCATGCTCAACCTTAGATGAGACTTTGTGATGGACGATCGTGGATTGATCCTCGCTG -ACAATATCGATACCCAGCTCTAGGGTATCAGTTTCTCGCAGTTCGTGCGGCTCTGACTCC -CGGTTCTCAGGAGACAGTCGGTGGCCGTTCACAAATGTGCCGTTTGAGGATTTGACATCT -CGAATCCAGACTTTGCCAGACTTATCGGCCCACACCTCCGCATGCTGCCGAGAGAGAACT -TTAGAGTCAAAAAATCCATTAAGGGGCGTAGGCACTGTTTTGGCGTTTGTTTGTCGTCCA -ATCCGAAGGACCTCGGGGTAGTAAGGTACATTGATCTGTTTCCTGTCAAAGGTGCCGTTC -AATGGCAGTAGAACCAAGATTGCCGGGTTGTCCGAGGGTGGACCAATTGGCGCACGGACG -CCATTCTGCTGTGATAATTGAGGCATCTGTTGCGATGGGACAATATTTGAAGGCTGATGC -ATTGCGGACATTGTTGATGATGCACTGGGGCCCGACGTTAAAGCAGGCACTGCCTGAGAT -CGCGCGTTCGTTACCCCTGATACCGGCTCGGATTTCGAGCTGGGCCAAATATTCTTTGAA -TACTTCTTTTTTGATGATGACCAAGTTTCTGGTTCGCTGGCGGAACTCTGCGCGGCATGG -GTATCTTGAGAAGAGGTGCTCACAGTAGACGTGGAAGTGGTCGAAGAGTTCGACCCCAGA -GAGGAAGAGGAGTTTGAGCGTTGAACGTGTTTTCGTGGCATGAATATCCGTGACACGTCG -TCTGCGTTCACTGAAGACAAAGCTCCTTCTCCTCCGCTGCTAGAGCTGTACCATCCCAGA -CGAGACCCCGATTCGACACTAGGTGGAGATGCCACAGCAGTCATGGAGAATGATGCGAAT -GGTTCAAGGAAATGGGGTATCTAGACGGAGCACGGAGCGCGTCGACGGTTCCGAATTGTA -AAAGGGGCCGTATTCTGAGATAGTCTGAAGGGTAGAAAAAGGACGCCTGATAATGTTGGT -TTTATGTCCAAGGGTTGGGAATATTGAATGCGCGCAAGGGTGATTGGAGCTTTAGTTGAC -TAAAATGAAGACGGTCGCGTTAGTGCTGTTTGGGGGGAGAGACAAATATAAATTGGCTCC -TAGGTCGAGGGTAGACCTCGACCCCACAAAAAAAGGTTTGTATATAGAAAATAAGGAGTG -AACTCACTTCTGAATTGTGAACTGAGAGACGTTGAGGAATGAGTAAAATAGGTGCTTCAA -GACTAAGGAGGTGTATGGAGAGGGTCAGAAACCGTGAGCGACCGGTCTCAGCCAGGGCAA -TGATAGAAGCACAAGACGGGACTGCCGCCTCCGCGCAGGCTTCAGGCAGCCCTTTTAGAA -GATGTAACTGAAAAGAAATCAAAGATCGGGGATAGATGCCATGAGGGAACCCAGGCAAGA -GCGCAGGGCCTTTCGTTTCCTACTCCCTTGACGCCGCCAGGCAAAATGGGTCACGTGCGA -AGTCGGCACGTGTGTACTTTAAGGTCCGATTGGCTCACGTGATCAAGGCGGAGAAGCATC -AAACCGGGGAGGAAAAAGAGCTCGTTCCTTGACTATCACTTCGGGATCTCTAATTCAGCT -CATAGCTGTTACCTGCTATTTTAATTCTATTTACAACCTTATTATTAGCTTCTCATAATG -CCTGTGGTTGCTGGACCTCCCGCCCAAGCGGGCCCAGGGGGTGCTCCCTCAACTTTCGAC -AAGAGTATGTCCTCTACAACCCAGTGATGTAGATCACACCGCGCTTGGTTCTTTGAGGTG -TTATATTTATGCGGATCAAATATGCTGACAGATTATTAGTGAAAATGGGCTGCATGATGG -GTTCCAGTATGTGATCGCCAACCCACTGAGCCTTGTTCACCAAGAGCTTTTGCTAACATA -GCTCCCCTCACAGCTGTTGGTGGCATCATGGGCTTCATTATTGGTTTGTGCCACTTAGTT -CCAAAATGAAGATGGATCGGATAAGCTAACACAAGGCAGGAACTGTGACAATCTTCCAGT -ACGGTGCTGGCCCCAACGGTGTCATGCGGACTCTGGGCAAGTACATGCTGGGTTCAGGTG -CTACTTTCGGGTATGGTTGACTCTTTGGGCTTGGAAACTAGACTATTTTGGTTGAGAATC -ACTGACTAATCAAATTCCCTCAAATAGACTCTTTATGTCCATCGGTAGTGTCATTCGCTC -TGAAGGACCTCGCAACGATGCCTGGCTCCGTGCTCGAGGACCCCCGATGATGCTCCCTCG -ACAGAACCCCCTCCGGCCCATGAGCCAGTGAACACAATATTTCTCGCAGTCTCAAGGCTA -TATTCGAGAAACATGCGACATGACCACACCCTGGCGACGTCCCTTGTACGATATAATCTC -TATGGTTGAGAGATACTTTTTCAGGTGGTTTCAGGAGACAGGACAACCACTAAAAGATGG -CTGTGACAGACAGGTTCATGGGTACTTGTTACTTCAGCACGCGATGTACATAATCTAGCA -GTACCCAGAACATGGATATCAAAGTTCAATTGTCTCAGTTGTTCCAACCATGTTAGGCGT -TCTTGTGATAATATATCCCTCTTTTTTTGCGGCTTTCCACAATCCAAGTCCTATCATTGG -AACCGTCGCTCCCCTGCTTGTGAATAAGCCTAGTGATATACTAACTATTATCGGGAAGAT -ATCTATTACTCTGTTCCAGGCCAAAGATCCTTGTGTATGACTTAGTCTGATTATGGTACC -TTCAAAGTTGGGGCAATAAATCCGGTTGGTTCTGAACATCATCACGTCGATCAAGCTTTG -AATAAAGGCGCCGATTATCTTAGATGACCGAGGCGCCTATTTTGTTTCAGACCCGGCTTT -GAAGTTGCGGCTCAAATGTAAGCCTAGGGACAACTTCAATGCTCGCGCACAATAACATTT -CAAATCTCGTCTTAACCTGGAAATACTCAACTTCCTCGTCTATAGTGACATGTTGTTTTC -TGCCTTCCCTTAAGCATGGTCATCAAGCTATTAAAAACATCCTCGGTCACTTGGAAGAAG -ATCTCAAGTCTTGACTTGCCACACAGTACCGGTATTTATTTCTGTTTGTTTTTGTGTTTG -TCCGGTGAACTCCGAATTTAGATGCGGACGTACTGGATTGCTCTTCTAACTGCACTCGGG -GAGTTCAGGTTTTTGATCCCAGGTTATCCAAGTTGGGAACTATACTATTTGGCTCGACAT -GTGCAAACGGGTATGTACCGTCAGAGAATAGGCAACGGAGGCGATTGGATGGCACAGGTA -CGGTCGCAGTATGAGAATAGCATGAGGATGACTATAGTGTTACTCATAATCATCAGCTTT -ATTACTTTATTGCTTATCCTGAAAAAGCCACTGAAAATACAGTCAGCCCCCTTATATATC -TGTCCACTGTACCGAGTACAATGGGAGTAGCATGTCTACACACTCGCCAATCAAAGCGCA -ACAAACGGAGTAAGGAGCATCAACCTACGGTGTGAACGGCTGGATTCTAGTTCGATCCAC -AGATTACCCAATAATACATACTTTCTTGCTCCAGGCCTAAGATCATAACCCATTTTTGTT -TCGATAGGCGCTAGGTCACCAGCCCCCTGGCACAGCCAACTTGAGATGCGCATGATTGAC -AGCTCCATCCAATGAGGAATCACTTAGAAAATACCCCAAGGTTCTTGATATAAGCCCCGG -AGTTGAGCCTCAAAATTAGGGAATATATAAGAAACCACGTCTGCTAGGGATTGTCCCTTA -AGCCTTCCTAACCTCCCATCATGACTACTCAGCTAGCATCTCGGGCCCTTCATTATAGCT -GTACTTTGATATCAAGTCTCTACATTGTCATCTGCATTTTATTTAATCCGGTAAAGAAAA -GAAAGACTCGCAAACGCCGCAGTGGACCTCGAGTCATCTTAACTACCTTGGTCATTTTCA -CATATGTCAGTCAAACAGTTCTCAAATTCATCACCGATGACTTGGACAGCTCTCAGCCGT -TCCTTGTGCATATGACCTCTCAAGCGGTTGTATGGACTGCACTTTGGCTACGACAGAATC -CTTCCAAGTACGAAGTGGCTGGAGTCTCGTTGGTCACCGCTGGTTTTGAAGTTCCACTCC -TTGCGCTCTCCTTGTATTGCAAACCGAGACAGTGGGTACCAATCACGCAGATCACATTCT -CCGCAGTCCGGCTTGTCCTGCTGATCTTTGTGACGGCTGCGATCCTATCTGAAACCAGAC -AGAGGAAGAAAAAGACATCAGAAGAGTCGCGCCCCTTCCTAAATGGTAATCACACAAGAT -ATGGTGCAACGAATGGCAATGAGTCAGACAATAGCAATGTGGATTCCAAGAGCGAGAGCG -ACTCGGACACAGACAAAAATAAGAATGATGCCGCCACACGCAGGAAAAGACTCCGCAATG -GGAAGTTGCAGTCGATTGGGGGCTGGTGGGATTATCTGAAAGGTTTCTCGATCTTTCTGC -CATATTTGATCCCCAGAAAAAATCCCAAAGTGCAGCTTTGTATTGCTACCAGCATTTTCT -GTTTAGTATGCCACCGAGCTCTGAATATTCTCATTCCTCAGCAACTCGCAGATGTGACCG -ATAGCATCTTTGCCAACAAAACCCCATATGCTTCACTAGGAAAATGGGCGCTGCTGCAGC -TCATCCGTGGGGGAGCTGGGCTAGGATTGATCGAGGCGCTCGTCAAGATTCCGATCAGGC -AGTTTTCTTACCGTCAAATCACCAATGCTGCCTTTAGCCATGTCATGAATCTATCCATGG -ACTTCCATATCGAGAATGACTCGGCAGAAGTTATGAAGTCCATCGATCAAGGTGGAGCGC -TGAACAATCTGTTAGAAGTTGCCATTCTAGATATCATCCCGACAGTGGCTGACCTGGCTA -TTGCATGTGTTGTTTTTTATTTGAAGTTCAATGTCTATGCCTCCCTTCTGGTGGTGATAG -TATCTATTGCTTACGTTGCGGCTGAGGTCTTCACCTCTAACTGGAACATGGACGCCAGAC -GTGAAGTCACACAGACACAGCGAAACGAAACTCGCATTATGCATCAGGCCGTGCAAGGCT -GGCAGACTGTCACATACTTCAATCAATTCTCGTATGAAAGGTCTCGCTTTGGAGAAGCAG -TGGACCTATGCTTGAAAGCTAGCGCCCGTTTCGGACAACGCCGAGCAATCGGCAAATCAC -TCCTAGATTTACTCAAGCCAATTTGTTTCGTTGGTCTCTCCTCGCTTATTGTACACGAGA -TCTCCGTGGGGCGTTCTTCTACGGGAGACTTTGTTTTCTTCATTCAGTATTGGTCATCAC -TCATCTCTCCTTTGGCCTACCTCTCCGCACAATATCGTTGGCTTGTCTCTGACCTAGTCG -ATGCCGAGCGACTACTCTTCCTGTTCCAATCCAAGCCCAGCGTCAATGACAGGGAAAATG -CTATGCCATTGAAGTCTAGCGATGGCCACGTTTCATTCCATCATGTTGACTTTGCCTATG -ACACGCGGCTAAAGACCCTCAAGGATGTGGATATATCTATTGAACCCGGGACAACAGTTG -CACTGGTTGGTATGACTGGTTCTGGGAAGACAACAATTCTCAGACTTCTTCTCCGGCTTT -ACGACGTTACTTCGGGCCAAATCAAGATCGACGGCCAAGATATCCGTGACATAACCCTCA -GCTCGCTACGACAGACTATCGGCGTCGTCCCACAAGATCCCGTCTTGTTTAATGCGTCTA -TCATCGAGAATTTGCGTTATGCCCGGCCCTCAGCCTCAGACGAAGAGGTCCACGCAGCAT -GCCGCGCCGCAGCTATTCACGATAAAATCCTCACTTTCGTTGATGGATACAACACCACAG -TCGGAGAGCAGGGTGTCAAGCTATCTGGCGGTGAACTACAGCGCATAGCCATTGCTCGGG -TTTTCCTTAAGAAGTCGCCTATTCTCCTATTAGATGAAGCAACGAGCGCCGTAGACTCGA -ATACGGAATCGGATATCCAGGTTGCGTTAGATAGATTGAGAGCCAAGCGAACTACCTTTG -TGATTGCGCATCGACTTTCGACTATTTCCAGCGCCGATCGCATCCTTGTTGTGCATGAGG -GACAGATTGTGGAGAGCGGGTCACATCACGAGCTGCTGAAGAAGGAGGGTGGAAGATATC -AGAACCTCTGGCAGAACCAGTTCGGGGGCATCAAAGGTGGAAAACCTGCGCTGTAGGCAA -GGAACTGGAAAGGTTCGGGGTTATAGGATTGGGACGATAATTTAAAAGAATCGCAAATAA -TGAATTGACATGTTTGATGTTCCTGGGCTTTCAAGATTACATACGATCTTTATTTGGAAT -GCTAGAGCGTATAGAACGATAACATATTCCATTCAAGACCAAGCGGGACCAGTAGTTTCC -AGCCCCTGAAGAGCACAAACCCTCCCGAGAAACATCATGTGTTTTGTCCAGTCCCAGTGT -CATCGTCATCGTACCTTCGGTCAACTTTGACATACAAATGTGGATTTCAGTTGAAGGTTG -CTATGTAATTGACTGTGCTATATGAGTCTAATATATTTTATATACTACCTAGGAAATGAC -AGGTAACTCCAAGGACAACCATGATGTGTATTGGCCTGGATGCAGTTAGGAGTGTAGCGT -TTTATGGATCAGTCAAACTATGATGAGTACCAATATATATCTCCCATCCAGCCCTTACAA -CATCGGTCTCAAAACTTTCCATTTTCTCGGCATAGATCTATAGCCAAGGCTGAATGACCA -CTGTTCTTTCATTCATATGAACTTCATGGTTGAAGTCATGTGTGAGACTTGATTGCCATC -TAAATTTGATCAAAGTCCATGCGCGGAGGGAATTCGCCTCTTCGGCTTTGCCAAGTTCTC -ATAGAGAAATCAAGAAGGTATATACAGAGCCCTTGTACATGGCATTGTTGAGAGCGTAAC -GCATACACACTTCTATTCATACCCTTCATCTTGTTGGGTCTGCTGAATTTCGCACTGGGG -GTTATATATCGCAGGCGTCGTGGCAGGGCATAACTAGTGAGGGGATGGGCCCATAGCATA -AAATCAAGTGGAGTACGAGTATCAAGTAGACTAGCAGCAAGTAAGAAATTGTCTCAGCAA -TCGAGATGAATACAAGTACATGACCTAGTAGATTCCATTTTATTCTTCAGAGACAAGACC -CTTTAAACCTCTTGCCCTAATTTCAGTATCTAGCACTGGAGACCCTGATTGTTTAATCGC -CTAACCGCACCTGTTTTGGGCTACTCCGGACAATACAATCTCATTGCACGAAGTAAGATT -AGATTGAGCACAAGCCACAAAGAAATAAGAGAGAGACCTTTTCTACTTGGTTTTCATTGT -CCCCTTTTTTCCCCAATCCAATGATCTTAGACAGATTACTATGGAGTTCTGACTATGAAG -TTCTGACTATAGTATTCAGACTATGGAGTGAATGATGGTCGTATGTTTTCAATTGACACG -GAGTATAGCTCGTACCGTGAGGTTCGTGTTCAAAGTGGATACGGGGAACATAGGGCTTTG -CGAGCCCAATGAGTGCAGAGCCCCTGAAAAAGAACCCCTCTTTCAACATATTCATACCAA -CACAGAGAGTCCGGAGTAATGTTTTTACTAACATGTACGGAGTACTCAGATGCTATTACG -AATACAAATGACAGATAGTTAACCTTTGAACAACATGGAATGACTCAACATTCCTCCTTG -GGCAGACCCTCACACTATGGAGTAGAGATGCCCAACATAGTAGGCCCGAACGCTGATCAG -TTCGTCTTCAGGTACCTGAATGGAACTATGGAAGCGATCTACAAAACCTTATAGAGTACT -CCGTACAATATGTAGCAAAGGAACTTTTTCCTGATAGTGTGGGAACCGGAAATTGAGGCA -AATCTCAATCTCGGCTAACAGGGGAACCGGGATCACTTTACAAGGTAAATTGAAGAATCT -AGATCAAAGTCAAATATTGCATTGAATGTTTGTTATTGTTATGTTTAAGATTAACCGAAT -CACCAATCGTTGTTCCTGCACATGACGCCAGGGGCACGGAACTGACGGAAGCCTCCGTGA -GGGAGAGTGAACACCTGCAGTAGTGCGTGATGCTGCCCAGCGAGAATCGTCATTCTATCA -CTCTAGGGGTAATAAGACAAAATTAGGTAAAATTAGCCCGTCCATATAGAAGACGGAGTA -CGAAGTCAACAAAGGCGCGTAGGGAAACATACAAGACCAACAACCTGTTCCACAATATGA -AATCCCTCACTAGACTGGGATTGGGAAGTGCTGAAATTACACGAACCTATGGAGTATACG -TAGGCGAATAATTCTCGTATTAGCACTGGAGGGACACGGGGCTATGGGGCCAAAGGCGAA -CCCTCAAGCCTTTTCCGGCTATAATTGATTGGAATTGGAAGACTAATGGCAATGAACGTA -ACTATAGCATTTGACACCGTACGGAGTAGTAATAAACAAACTCTTACCCCTGGCGTCAAA -AGTCTGTGCGGAAGACCCTTTGCTTTTAGGGCTCTGAAGGGTCATCGGTCTGAGATGGAG -CCCAATGGATAGTTCCAGGGTCCGCGTGTTCAAGGGGCCAATAGGAAGAGCATATTCCAC -TCGAAAATCAATAATAATCTAAGAAAACAAAAAACAGAAGCGCCACAGCACTTCTTTGAT -GGACGAATCGCCCTGTTTCCCCCATGTTGTGGACGACGTCCTCGTGCCTGGACTCTTATT -AAGTCTGGGCATGCCACATCCTGGTAAAATTTCCTTTTTTTTTTTTCCCTTAGGCGCATT -AGAGATTTTCTCAATACACTCATTTCACTCATTTATACTCCTTGAGCAGTGCTCTACACG -ATCTTTACATTGAACCATTTTTGACGCTCACTCATTTGCGATCAAATCTGAAACATATCT -ATTCTACCCAAGATCTCATACTTCAAACGTTTCATAGCAAACCTTGAGATTCAATCTTGC -TCCCTCTTCCATATATTCACACTCGAAATTGCCGAAATGAAGTTCACTGCTATCATCGCC -TCTCTCTGCCTGGTTGCGGCTCCATTCGTCGCTGCAGAGGATATTGTTACCTCAACTGCA -ACCACAACCATCACCAAGACTCTGGTTCGCGTCAATTCAGTGACCCCCACTCCTAGTACC -ACCGCGAGTTCCAAGGCAAGCAGCATGAGCACAGTCTCTGCTTCGACTTCAACTCCTCTG -TCTAATGCCTCCTCCACTACTTCTGCTGCCGCAGCTGCCACCTCGACCGGTGCTGCAGCC -AACATTGGCGCTGGCATGCCGGCTGCCTTGGTTGCCGCTGGCTATATCGTTGTGATGATG -GGTCAAGCCCTGTAATCACATTCGCAATTTCGCGACTCGCAAGAAAGCCCTCACGCAGGC -AACGATCATGAATTGAGGCCATTGTCAAGACTCAAGACCGCGCTCAATTCATACTGCATC -ACGGCTGCCTTGTCAAATACCGTTCATCTACAGCAGAGCCGGGAGCTTATTCTATTTCTT -TTCCCCCCTTCTTATACTTAACATTTCTATTCCTCTCGGTTGGCTGAAGAACCTATTTTC -GGAGAGGAAACGGTTTAACGCTTTTTCAAATACCCCCATTTCATTTTTTTATTTCATTAT -GACTTTTTGGACCGACATGAGTTGATGTTATTATTCTCTTCTTCAACCCTTGTTTTATTC -AATTCGTTTGTCCATATACGTTGTCGTGGGTGACTGATCAGTCGACTGATCTGGCTGAGA -TCCACTTGCATGTCAACTCTTGAGTCATCCACAGAATATAGGCACTTTATTTCTTCTCTC -CTTTGATATATTGTACTCCCTATAAACAGCAATGTATATAACAGAGTAGAATAGAGAGTA -GATCATGTCGATCTAACATTCCAGAGCGGGGACGCCATTCGTTCCGGGGTCAACTGATGC -CCGTGCACGTGtgatcggatcttatcttttgatctgatctCTTTTTTTCCAACCTTTGAA -CGATTTTTTTCTCTTTCAAATATGACAGGTCTCTCAATTGATTCCAGTCGATCCTAAACC -CTTATCTCCCCCAATCGCGCTATTAGCGCGTTTGTTGACCTATCCATCGCGGGTCTCCAC -AACGCGTCGCGCAACTCTGAACATCAGATAACCCCGTGTTTGAGCGGAACCCCTAATGGT -CTAGGTCCAAAACGAGAGACTCGTCTGAACATCGCGAAACCGAAATCGCAATGGACAGCG -AGATCCCCCTCCCACCGCCGCGGCGGATCCGGCATCGCTCACCCGCAACCGCGAACCCCT -CAGCGACAGGCGCGGTGTTCTCCTCATTCCAGCGCGCACAACGACTGTCGCGCTTTGATG -ACCGCTCTAGTCAGCCATCCAGTGATCCTGCGCTCTTCTCCAGTGACGATATTCCCGCTT -CTGGGTTGGAGAACTACCATGCGACGGTGTCGGGCGCGGGTCGCAAACGGCGGTATCGGG -GGACCTGGTGGGGGGAGCAGGTGATAGATCCGAAGCGGAAAAGGGCGGACTTTAAAGACA -AGAGGAATGTTGATAGTGGTGTGTGGATGGGGAGTGACGAGTCCGGTGCTGAGTCGCTTT -TGCCGTCAGAGGATGGCTCGGCGTGGGGGGAAGACCTACGCAAGTCTGTGCTTGATCCTA -GGAAACCTGGAAGCTCTACGCCTTTCGTGAATGAAATGGAGAACATGCCTGCGCAGACTC -GCGTGGCGTTTAGAAGTCCTGAAGAATCCGATGCTCATCGATTCGCGCGGGAGGTTGTCA -GCGATTGTCTTGATAAGGGACATGAGAGCATTGATTTGGGGTATGTTCTGGTTTCATCTG -ATGTCTAAGTTTCATCAAATATCCCGTTGTTTCGAGTTAATGGTACTAATGGCTTTTTTT -TCCTACACAGTGACTTCCATCTAAGAACTATACCATCCGGCCTCCTACGACCTTTACAAC -ACCTAACGAAGCTTCCATCAGTCAAAGAAGCCCCGGTTTCAGAGAACGTTTTCACATCTC -TGCAGCCGTTTCTTCACATATATCTCCCCAATAACTCCCTATCAACCCTGCACAACGATC -TCTTCGAGCTCAGCAACATCAAAGTCCTCAGCTTGCGAAACAACAAGCTGGCCGAAATCC -CATCTACAGTCCGCAGACTGATAGCCCTGGAAGTTTTAAATCTTTCTGTCAACCAATTGA -CCTACCTTCCCTGGGAGGTCCTCAAACTCATGCAGCAAGGCGAACTGAAACACCTGACCG -TACGCCCGAACCCATTCCTTCCAATCGAAGAAGCGCAGATCGCAGAATGGCACTATAAAC -CGACCAATGAAAAGGAAATAGACAACGACGAAGACTCATCAGACGAGCAGTCATCTCCCC -CGCTCCAATTTCAAGAGCAGGAATCTTCTCTCAACGAGGGCTGGACCCCTATCCACGTCG -CAACAGGCCCGATCACCTATATGAATATGGAGGGCAATCCCATCAGAGACACTTCATCCC -GCAACAGACCCACTCTCACACCATCGGACCTGGCATCTCCTATAAACAACGCCCCATCCC -TGCGCGAAGTCGCGCTCCGAGCTGTGAGTAAGCTCCCCTACCTGGAGCAAACCACCGACG -AAGAGCTGGCCGAATATCCGGCGCTGATTGTGCCCTTGCTGCAACAAGCGAGGGAAGTGC -GCGCTGCAGGTGGACAGTCTTGCTCTGTTTGCCGGCGCGAGTATGTGATTCCTCGCACTG -AATGGATGGAGTGGTGGGATTTTACCCCGTGTGAGAATGGGATGAAGATGCCTCGCTGTC -CGGGTGAGAAGCTTCGGCCTTTACCGTTCCGGCGGTTTGGTTGTAGCTTGTCGTGTGTGC -CAGTGTGTTGAACATATCTTGAACTTTTCGCTCTTTTGAGTGATGAGTTGTTTATTTCGG -GTTCTACTTTGTATGTATCTGGCTTGCATGTTTATATTGTGGTGCTGGGATCTTGTATTT -TACTTTTCTGTCCGGGGTTTATTGTATAGAATTATGACTATATCCACTACCTTGTCCTAT -ACTCCGTATTATAAGGATATAAATCTTTTAATATCCGTGACTACCTTAAATCCGCCCATA -AGTCCGAGGCTTGCAAGAGGAATGTGATATAAAAGCGCGGTGCTATCCTGGCATGATATC -CTCCTAAACCATCAATCCTTAACAAGTAGACATCACAGAGTCAAATAAACACCACAACCC -ATCCATCATCTATCACCCAACACCACTCACAAACCAACCATGGCAGAACAAACCCCAACA -TTCGCCAAACCCAACCGCCCCCTAACCTGGCTAATAACAGGCTGTTCCTCTGGCCTAGGC -CTCTCACTGGCACGCGCAGCCCAATCAAACGGCCACATGGTACTCGCAACATCACGCCAG -CCCTCCCGCTCCCCAGAGCTAGTATCAGAGATTACACAAAACGGGGGCGAATGGCACGCA -CTAGACGTTGACAGCCCAACCGCAGCTTCTGAGCTGCTATCTACCCTCGAGTCCGCAGGA -CACAAGATCGACGTGCTGGTCAACAATGCTGGATACGCGATTCTCGGGGCCGTCGAGCAA -TTCTCAGATGAGGAGGTGCGTGCACAGATGGAGACGGTGTATTTTGGACCGAGTCGGTTG -ATCCGAGCGATTGTGCCTGGGATGAGGGAGAGGCGGTTTGGAGTGGTTGTTAATGTTAGT -TCTGGGGCTGGGTTGGAGGGGAGGGAGCGGATGGGTGCTTATGCGGCTGCGAAGGCTGCT -ATGGATGGTATGTTTTTCTCATTCTTTCCTTTCCTTCTTCTTGCTCTTCTAGATGCCTCG -ATTACATACATGTATTAATGTCTATGCAGGTTTATGTAAAGTTCTCGCTAAAGAAGTTGC -GCCTTTCAACGTCCGTCTTCTGACGGTGTGGTTGGGAGTGTTCAACACACAGTTCGGACG -CGGATGTCTTTCTCCTGCTAAACCTCTACCGACGGACTATGTTGGCTCTGTGGCCACGCA -GATGATGGAGGCTATAAAGAGTGGTAAGCATGTTGCTGATGGTGATACGGATAAGGGTGC -CAAGGCGATCTATGAAGTCGTCGTTGGTGAGGGCGTTGGTGTTGGGCGCGAGAATGAGGA -ATTCCTGCCTCTAGGTCGGGATATGATTCCTCGTGTTGAATTGGTGCGTGATCGGTTGAT -TCATACGCTTGATGTGTTCGGGGATATTGCTGGAAATGTTTATGTTGAGAAATGAGGCAT -TAGGTCTTAGTCTGAACAATGCTTTTGCCAGTTCATTATTATAGCGTAATTATATGGTTA -CATTAATGGATTAAAGCAATGACAAGTGCCAATTGAAACCATTCAGATATCCTTACCAAA -GAACATAACATATAAGTTTTGAAAACGATTGACCCGCGGTATATCCCAGCCGCTTCAACA -ATGCTTGCAGGCACTTTGCAGAACCGCCGATCAGTCACCCAACGCCAAGCAGATTCAAGT -CATAAGAAAATCAAAATCATCGCTAAAACAGTATGCTTCGATACAAGGAGAGCAAATCCC -ATATGGAGAGAGGAGTGAAAAAAAAGAGAAGAAAGAGAGGATAGAATATAGACATTAGCG -ATTACGGTCCGCACGAGCCTGGTGCCATATCGTATCGCCACTCAGAGCGGAAAAGCTAAG -AGTAGCCAGGTTTCGCCAGCTTGACTGTGGGGCAGAATTTGCATGGTCCCCAAGTAGACC -TGCATTTTGTTCAGCATCTAAAGCATCGTCATTGATGCTTTCAATTTGCACTGGAGCAGA -GGATGTACTGCCTCGAGAAACCGACTGGGTGGATTGGACATCATCACTGGACTGCTCACG -ATGACCCTGAGAGTCATTGGATTGACAATGGGACAAGGAGCGGACAACATCGCCCTTACA -AATTGGGCAAGTTCGTCGACGAGTAGTCAACCAGGGTGTACTGAGAAAAGAGGTTAGTAA -AATGTCAAAACAAAGACCGAATTCTTTCGTATCGCTTACATGCATTCCACATGGAATTCA -TGGCCACATGGCAATCTCATAACCCGGCTCAGACCATCGACATATTCCTCAAGACATACG -ACACACTCCACTTGTCTTCCTGTGTACTTGCGTCGCCACAAAGTAGATCCGTTTTGCATA -TTGCACTTCTCTTCCTTGGGCGAAACAGACGAGTCTAGCATAGTGCCCGAAAATGTTTGC -GAACGTGATCTATTTGATCGGGATGCGCTAGGCCGGCTTTCATTGCCAAGAAGTGGGGAA -GTCGGAGAGGGAGGGCCGGGACTAGGAGGGCGAGTTGAGTTGGACGAGGAGGAAGAAGTG -GTAGCAATTGTATGGTATGTTCGAACAGGAAGTCGTTCTATGACTGACTTCGGGGCACGC -CAACGACGACGGCGGATTCGGGACCGGAGCAGCAAAAGAGCATAAACCACCGTCAGCGTG -AGTAGTGGACTGACGACAAGGACCAATAGCGTATCGAAAAAGGGACTAGTGGACATGCTG -GTAGGCGTCATGGTAACCCAAAGACCTTCGTGCTCGAAAGTCTCCGCGCTTAGACGGCTC -GAGGTGTGAGGGCTATCTTGCATTTTCTGGTTTGATGCTGTGTTTTTCTGGAAGGAGGGC -GACGAAGAAGGCTGAGGGCCTTGATGAGTATTGTCTCCCCATCCAAAGTGACTCGAGAAC -CAGCCCTTCGTGGAAGATTCAGGCTTCCCATGAGTTCCGATGGAGCCTTTCTCCGATCCC -TTGAGGCGCATGTCATGAGAACCAGCACTGGATTTATCGATGCTTTGGTATTCCCCACTT -CCTGGTGTAATGCTACCACCCGGAAGACCATTAGGCGAGGAAGCAGCATCTCTCTCCGCA -CTAGCAGTCTTGGAGGTGTCCTTGCCAGTTACAGAGGTGCTAGCTGTCGGGGACGCAGTA -CTCTTAGGGGGCAGCAAGTCGGGATCTCTCCAATCCTGAACCCCGATGACAAAGTCATCA -TCCTCAAAACTTTCAGGGGACATATCAGCCTTGTGGCCATCGAATGTGTCACCCTTGGTC -TGACCACGATTATTGGAATCTGTTGATCTCTTTCGGGCGCTTGACGAGGCTTCCTTTTCA -TCCCATGAGCCAAGATTGATCCAGTCAATGTTGCCGCTGCTAGGCGGACGTCGGCTGTCT -TCCAGATGCCGCGAGCCTCTATTCTTATCGCACAACCCCAGGAATGCACCAATTGCGCGG -AAAATCCCGTCATTTGAGTGAGACGTGGGCGCAGGTCTGTCTTTGGTTGTCGAATGACTG -CTGGCAGTCGGAGTTGGAACAGTTGTTGCTGTGGTAGTTACCACCCGTTCTTTATCGGCA -TCAGCTTGTCCTGCCAACTTTGCCTGTGAAGATTTGAACATGTCTCCCAAGCCGGCAGTC -CCACTGGCATGTCCAGGAACCAATGAAGATAAAAGATGAGCAGTAGTGTAGGATGTGAAT -AATGCCGGTATCGTGACATTCGATGTGTCGCCTCGTGCGTACATAGTGACCAAATTGCCA -CCTCGAGTATCGTCGCCGACAATCAAAGCAACACCTCCTCGTCGTTGTGCCCACTTGGCC -TTCTCCAAAAACCCACAGCCACCTCGACTCAACAGCACCACCTTACCCTGTATTTCGGCA -GTTTCCTGTAGCGATTGGATGTCGGCATGCGCAGGTGGCTTGCTTTGGACGTGATCTCCA -TACTTCTGATTTACCTCAGGAAGTGAATGATGTAGATGATCGTCGGTTCCATCGTTCGTC -AAGTGTGGGTGAGTTGGGTCATCTCTATCGTTTTGAGGTGTGGTATCGGCCTGCGACACC -GACCGCGGGCGGTTTGATAATTGATCATTCACTGTATTACCTCCGGAGATTCTGGAACTC -TGTTCTGCGCTCCTGGAGCTTTTGTCTTGCTTCTTATGGTTGGCGCCCTCCCCCCAACCG -GGAATATCGGAGCAGCCTAGTTCCCCTGCCATACTAGCTAACCCACCCTCACCAAAGCCA -CTTCCCACCCATAACTGACCGCTGAGGCCTTTTGTTGGCAGCAGCGGGCCAAATGCCGCT -GGCCGGGCGAGGAAGAAGGTCGAGTTGTCATCAGTGAGACTAATAATGGCCGACGGAGGG -AAAAGGGACGATGGTATGTTGAAAGAAAATAAAGCGCGTAGCCCAGTTGTCTGGCCAGCG -AGCGTATTGGGCGTTGCTACATTGGGCGATGTAAGGGCGGAAAAGAAGGTGAGGAGGATC -GGAAAGAAGATTAGGCAGAAGAGGACGATGAGCAAGCGTGGCGGTCGCATGTCTCTGATG -TATGATCTCAAACCACTTGAATCGAGAAACGACTTGTCCGGAGATATCTAATCACAGCAA -GGACGGGTGGATCGAGTGAATGATTGACGATGCCAGCGGCTCCGCTGGACCACAGTACTT -TCGGGTGGACTCTTTATGACCAATCCAACGGTTGGCGGCAGGCTAGCAGGCCGCCACTCA -TCGAGTGCAATGAGCAGCACCACGGCTGGTTGAAGGAAGTCGACAGATGACAGTCAACGT -AAGCGCAGGGGAAGTCAAAGCgagggagaggagaaagagggaagggagttcgaaagaagt -cagtcagatggagaaggagaagTGAACTCATAATATGGATTATCTAACCCCGGGTCGGCA -GGTGACGGCATGCCCGCCGTGTCATGTGTTTTAAAGCCTGAGGCATTCTATTGGCATTGA -TGTCTTGAATCACATGACGCTCTTAAGTAGCTGTAACATACATCCATGGATTGGAAAACA -TTGGAATGGCCACAATATTGGAGCTTGATCCTATGGTGACTTATCTAACCACTTGTATAG -TTCACTTCCCAATTTTCTTTTCAATCAAGTGTCATATGCTCGTAGGTCCAAAGATGCAAT -ATTTGCTGAGACAAAGACTCAATATTGGCTAAAACATAGACTCCAAGTGGTGTGAGAGCG -GGATGGGAATTTAAGAAACTCTGAGCAAATTCTGTAACTCATACCATGTCATAATAAGAT -AACAAGGCGGGAATGCCTCTCACTCGGACCTTTGAATCTCGCAATAACAGGCATATATCA -TTTGATATATAAAGTTGATATTTCCGAGGTTCTCGCCCAAAGCTACCGCCGATCATGCCA -TCACTGCCAGGCAGCTACGTAATCCACCGCCTTTTCGATTTTTGATTTGAATCACGTTAA -CGTAGTATATTAGACTGTTGTAGTAGCCGATCGAATCATGTCGTGTATGTGCCCTGCATA -GTGAATATGATATCAAAGAATATCAAAGTATATCCAAGATACACCCGTAAGATATTTTTG -GACATAAAGCTTGGCCCACTTATCATATATGGACTAGCGTGGTAATCTACCCCTTGGGAC -ATCTATGACGTATGCTGACCTAAAAAAAGGACGTTCACATCGTAGAATCATTTAGAATGT -TTCTCTGTGTACTACTGTTTCTATCGGAGTAAAGGGGTCCAGGACTCAACAATTGACGCA -GAGCAATATTAGACTGATAGCTAAATTTCCATTGCACACATAATGCTTCGGAGAATTAAC -ACTCGTGGAACCCAGATGTGGGAACCCACTTATGTAACATGAAATTCCAGCCCTGCAGCC -CTTGGGTCAATGGATCTACAAGGCATCCTTGCAAGAGCTGGCTCAAGTGGCGTTCTGGGT -TTTGCCGATCCGGGACATGACGACTGATGACATTGATAGCTCTCGTTTTCAGCCAAGAAT -TCCAATCTGTATGTGTAAACCCGTTTGGAACGTCATGACAACCTTGAATGGGACCGTCGA -AGAGCTCCAGTCCATGTTTCACCGGTAATTGTACTCGTGGAACCCCGCGTTATGGCGTCA -CTCAGTCTCCGCAAGGATTGCTCTAATGACAAGGAAAGACAAGCATATATAAACTCCTCA -CTGAATGCTCTTTGTTCTACTCTTCTTCCATACAAAAACATCTGACTTCACTATCTTCTC -TGTTCTCACTATCTTCACTTTTCAAAGCTCCTACTTTACCAAAGCATAATTACCACCCAT -CATGTCAAGCATCATTCACAAGGTCAAGGATGCCGTGACCGGCCATCATGATAAGCAGAC -CACCCATGACTCTTCCAAGGCATCCGACAAAATTGATGGTTCGTTCTTCAAGAGACATTG -CACGCCGTGATACTAACAAGAGACTTGAATTCATATAGATGCCCGAGACAGTTACAAATC -CTCAAACAACACCACAGACAACAGCAAATATACTTCTACCAGTGGAGGTAACAACGCCAA -CACTTACGGCTCTGGTTCTGGCCCTGGCTACGAGACCACATCCACTACCGGCGCTGGAGG -CTACGGCTCTGGGGCAACCACTGGCTCTACTACTGGCCAAGGCCACAGCTCTGGCACTGG -CACTGGCCTAAACTCTGGCTCCGGCTATGAGACTACACCCACCACCGGTGCTGGAGGTTA -CGGCTCTGGTGCGACTACTGGCTCTACTACTGGCCACGGCCACAGCTCTGGTACTGGCAC -CGGCCTAAACTCTGGCTCCGGCTATGAGACCGGTACCTCGACTGGGGCTGGCAACTACGG -CACTGGCAGCACTGGTCACGGTGCCCACAACTCTGGCACCATCCATGACGCCCGCAAGGG -TCCAATCGATGTAGGCTCAGGAACATCCGGCAATTACGGATCCAATACTGGCTCCAGCAA -CATTAATGCTGGCCCCCATGACTCTAAGCTAGCCAACAAGGCCGACCCACGCGTCGATAG -CGACCTCGACCACCGAGGCTCCAGCAACCCCTTGTCTGGCAACAGAGAAACCTACGGCAG -CAACACCACGGGCCGTTCTACTGGAGAGACCGGTTATGGCGCCAGCACCGGTCACTCTTC -CGGACAGACTGGTTATGGCTCCAGCAACCCTCTGTCGAGCAGCACCACCGGCCACTCCGC -TGGCCACTCTTCTGGACAGACCGGTTACGGTTCCAGCAACCCCCTCTCTAGCAACACAAC -CAGTCACACCACCGGACAATCTGGCTATGGCTTCAACAACCCTTTGACGGGTAGCGATAA -CTATGGTGGCTCCACCGGTCACTCCACTGGTCAGACTGGCTATGGCTCCAGCAACCCTCT -GTCGAGCGGCGCTACTGGTCACTCCACCGGACAGTCTAGCTATGAGTCGAGCAACCCTTT -GTCTAGTAGCACCACTGGCCACTCCGCTGGCCACTCTTCTGGACAGACTGGTTACGGCTC -CAGCAACCCCCTCTCTAGCAACACAACCAGCCACACCACCGGACAATCTGGCTATGGCTC -CAACAACCCTTTGACGGGTAGCGATAACTATGGTGGCTCCACCGGTCACTCCACTGGTCA -GACTGGCTATGGCTCCAACAACCCTTTGTCCGGTAGCGACAACTATGGTAGCTCCACCGG -CCACTCCACCGGTCACTCCACTGGACAGACTGGCTACGGTTCCAGCAACACTGCCTCTGG -AACTGACGCCTACGGCAGCACTGGCCGCTCCACTGGACAGTCTGGCTACGGCTCCAGCAA -CACTGCCTCTGGAGGCGATAACTACGGCAGCACTGGCCGCTCCACCGGACAGTCCAACTA -TGGTGACAGCAGCTACAACCAGTCAACTTCGAAGACTGGAAAGACTTCTGGGCCCCACGG -CTCTGACCTCTTGAACAAGCTTGACCCCAGAGTCGAGGAGAATGCGTCGTCCCTCGGCAC -CCAGCGCAACTACTAGGCGATTAATGAGTGACCTTATGACCTATTCCTGAGTTGTATTCC -TAGCTTTTCTTTGGTTTTTTGATGTTTAATGAGAATGACCCAACAATATCAATATCCTTT -ACCGTTTATCTTATTACTCTAAAATGAAAAAAATATATCACCCAAAAAATTAGGCCAATG -TTTCGTATTCATACAATTCTCTTTTCTATGACAATCATAATAGCTTTTTCCAACAATTTT -GCTATGCTCATTTCTTCATGTGCAATTATTCATTCCCACACTGTCATTTTGCACCTCCGT -AAACAAGCAAAGTAATGCACGAGACTATTTTTGCCCTCATTTGGCTTCTTCACAATAGAG -TTGGTATGTAAATATGGAAGAGATCAACTATTTAGGACCATAAAGTGGGTTCGACAATTG -GAGACTAAACTAGAAATGGCCAGTCTATTCCGCACAGCTCAGTAAGCATAAAAGGAAAAA -AGATGACTCGGTTGTTCTATCAGTCAGTTCAGGACAGCCTCAGGTGCTATGTCCCCACCC -TTGCTAAAGTGAGTAAGCACGAGAATTTCAAGCATTATACGCTCCTCCGTCTGACAAACA -AGCCTAGCGCCCATTACAAAAAGTATTGTGCTTGATACTGATAAAGAAGTGGCAGACAAT -AAGGGCAAAAAGAAGTTCATAGACCCGAAACTGGGTACACTGCCTCACATGAAATAGAAT -AGATAGATAATCGTCATGAATAGCAAAGGGTAGTGAAGCAACCCCAAGGGAGCAAAAACC -GACAACGCAGGGGCATCAGTGTATTGTATGGAGTTGATGAAGTCAATCGCCAAGACGCCG -TGTAGATTTGTTCATTGGGTATGCGGATGCATATTACAGAAAGGGCAATATTAGGACAGG -AGAAAGGTACGATGCCATGAAGGTCCCCGCACGAAGGTATCATGAAGTTCGGAGCTGAGT -TGGAATTAACAGGACACGAGGTTTACATGATGGTCGTTCAGTGCGGCGCAGGCAAGCACA -GTCCGGTGGGGATGGAAGGCAGTGCTGGCAATAGGCGAAGAACGGTTGTGATGAAGGAAG -CTTGAGTATGGCTCGAACGTAGAAAGGTAGGTGCCATCTACATTGAAAGTCTTGACTTCA -TGGCGATTTGTACCCCTGCGTCATGTTAGCTTTGCACAAATAAGTTATCAAAACAACGCG -GTGAGGAAACGTACACGCTGAAGACGGGTGCGTGCTCGTGAACGCTCAACGTTCGCAGCG -TGTCTTTGGTGGCATAGATGGTGGAAATCGGGCTGTCCATGCGAAGATCCCAAAGTCGGA -CCTCACCATTACGGCCACCTGAGATCAATTCTCGTAAACCACCACGCTGCATATGGACAT -TTGTGATCCATTGCTTGTGTTCACGCCAAACCTTGACCATGGATGTGGTTGGCTTCAACC -GTTGATCAAAGACTCGCACAGCACCGTCACCGAAACCGGCAACGAATATGTTGCCCGCTA -CTTGATCAGATGTCAAGGAAGTGATGCAGGAACCTGATCGTGCAGGAATATCCTGTTATT -GAACTTTAGTGAGGAACGCGATACATTTTGTGAAACTAGATGTTGAAGACTTACATTTGT -GCACACCTCAGTAGCAGCATTCCAGACACGGATGACCTTGACATCGCCCGCAACTAGAGC -TTTGCCCTGACCCTGCTGCCAATCAAGCACAAGGCCTGCGTTGCGATTGCTTGGGATCAG -TTCAGGGAGGGCCCTGAAGGCTGTGACCACTTCAATATCCTTGTTAGTCTCGTAGTGGCG -GAAAAGTTTGATTACACCATCAGAAGATCCAGTCATGAGCAATGCCTGGTCATCCTCATT -GATGTATCGGACTTCGTTGATCTTTGACCCGGCCGGATTGCCATTTGAGAACTGATTGAG -GCGCTTGTGGCTCTGCCAGTCCCAAATTCTATACGAAACCAATGGTTAGAAGTAGATGAA -GAGCAGATGACCTGATGGTACTTACGAAATTGTATCCCGATCATCTGCGACAGCAATGTG -ATCCTCGAATTGATGGAAACACATCTTATGGGGCTGGCTACCATTCGATAGCAAAGCAAC -CGAGTTATCCCACCGGCTCGCTCCAGCTTTTCCTTTGAGGGGTTGGGTCTCGGCGATGAT -CTTTTCGTTGCGGCCACGTCTCCAGAGGCGCTGGTTGTAGTCAGAGCTACCAGGCTCATC -TGGCTCGTTGGGTTTCATCTGGGGCTCCCGGAAATACTTGTAGATGGTGAGTAAATGTAG -GTAATAATAGTTGTGGGACTTGATGGATTCGAAATCCGGGATTACCTCAGTGGACCAGTC -TAGGAATCTACTCTTTAGAGGAATAACTGGCATAACATCAAAGCCTTTGGGTTCGAACCC -TCGGGATGTGGGAATCGGAGCCTGGTTATATCCCGTTGTGGGAGCAACATGGTCGTTTAC -TTCCGGAGGTCGGGTCCATTCCGGAGGGGCTCGGCCGCGTGGGGTAATTGTCATACGGCC -GGTTGAAGGAGAAGGAGGGTTTGAACTGTCCGATGATGGGTCTCCTGTTGAATTACCAAA -AGCAAGGTTCTTCAGGGAAGCAGCGACGCTGGCAGTTCGCCTAAAGCCCAAAGAAAGATA -GCCTTCCTGTTTTGAGGCTGGTGCGCCAGACACGGGAGGCGTTGAAGGTGGCGCGGTTTT -CTTTGCCTCAGTTCGCTCGTGATGTTGGAGTTTTGTCAAGCCTGTGATTTCCAGAATCTC -GTTCTGAAGCTTGTTAGTGAGGGGCGCCATCGGGGATTCCAACAAGGCAAGGTGTATGTA -ATCCACGATCACGGCAGCATCCTGGGCGATGTCAGGATGAGGATCAACTGAAAGAGCCAG -TAGCTGCTTCCAGATAGCCCCGAAAGTTGTATTGCGAGACAGTTGTGGTTGTTTCGGATG -AGAAGATAGTGTCACGACCTTTCCTTCGGAGCGGCTGGTTTGAGAGGTTTCGTTCTGCCG -GCGATGGGACAGGGTTCGCTTTTCCTCTTCAAGCTCCTCAAATGCCGCGACCAAGAATTT -GTTTTGGTGGCGTCTGACGAATGTTGAGAAGAATACTAAAAGCTCCTTCCTCACAACCAC -ACTTCCATCGGCAGACATGGGCAAGACCGCCAATGCCAAGGTTTCCTCGATTTGTGCAAC -CTGGTCTGTCAGATCTGGAATTCCCAAGAACGTAGTCACAGCGTGGAGCATAGCAGCGCG -AACTTCGGGTACGGGGTCGAAATTCAACTCGCAGAGCCTAGCGGGGGCTGCGCATCGAAT -GCCCATCCACTTTGCTTCGGGAAACTCGCACCATAGCATACTAAGACAGAGACAGGACCA -CTGCCGTAGCAGAGGATTTTCAACATCGCCAAGATGCCTGAGGCAAGAATCAAACAATTC -ACCAGAGAGGCAGACATTCTGTCCTTGAGGGTAGTTTTTGCAGAAGATCGAAACTATGAA -AGCACACATGGCCCGGTGTTCGCTGGCATTTCCCACAGGAATGGGCGAAGACGGGTTCAA -GATTGAGATAAAGTAGTGAATACCGTTGTCTTTAAGTAAATCATTTTGTACAGTGTGGTC -GACTGCCATGATTCTCGCCCAAATGAAAACCATCACGGGTTTCAATTCTTGAGCTGCTGA -TTGGAGCAACTTGACGACATAGGGGAAGATGCCAATGCTGAGGGCAAGGTGAACAGCCCA -TGGGCCGAGGTCTAGGAATTTGCTCAAAAGAATCAAAGCCCGCAAGCGATGTGCTTGACT -CAGGAGGACTTGCAGGACGATGGGAAGCTGATCTGGCGGATTCTTCTCCGTAGGACCGGA -CGAGAGATATATCTCAAAAGCAGTGAGTTGCTCAGCGAAGAATGCCGAGTGCTGGTACTC -GTACTGCCGGCGCCCTTCCTCTTGATCGATAAGGGCGGGTAACTGAGACAACACCATTTC -GACTGCAAGGTCCCAGCTTTTCCATAGAGGGTGATGATGTGTGTCTGGCAATTCAGGAGA -AGAGATCGGGTGACACTTGTATGTGCGCATGATACGTTCGCTTAGCAAAAAGTTGCGGAA -AAGAGCGGCGACCATTAGATCCTGTCGAAAGAGCTTCTTGAACAAAGCCCGGGGTAATGT -GTTCCATGCAATCGTATCGGTAATTGCAGTGAAGATCCAATTCAATTCTCCAAGAGGGCT -TCGACGATCTTGAAGTCGACCAGGAACTCGGAAGTCGTCAATGGAAATATTTGTGCGTAA -CGGGTTCTGCAGGATGAAAAAGCGCAGTGCGATTTCAATCGGTGTTGTTAAGCAACATGT -GAATAGGTCTGCAGGGAGGTCGGGATTGGTCGGAAGGGACTCTGTCTTCTGGCACGCGGC -TAGGAGGATGCAGTCACCGTAATTCTGAATGACAGCATTCGGGTCTCGTTTCTTAGTCTC -AATATTCTCTTTCTCATGTTTCTCCACGAAAGTGTGGAAATTTTGGACAATGTTGCCCGC -ATGTGAAACATCAAAGACAAAAAGACTAGGCCCAGCTAGCCAAGACTGCAAATCATAGAG -TGGGACAGGTATGTACTGGGTATAGTTCTTGTTAAAGACCCATATTTCACCCGACTGAGT -TGGCAACGGCACTCCGTGTCCATTGTAATGTAGAAGCACTCGCTCGTCCTTGGCGTTACG -GCGAAGTGAAATGCAGAACTTCTTCGTCTCGTCAACTGATGGATCTAAATACTGCTTGTA -TCTTGTCCGGAGACTCAAAGTCTCATATTGCTCCTGTAGCTTCTTACCGATCTGCTCCAT -GATCTTATTTTGACCACCGCCTGTGGTAGAAGTCGGGTCTACCCAACATTCCAGCTTCGA -CGTTGGGTTGGTCTTGACCACATCGGGAGGGTCGACTCCGATGTTCAAACAGATGGCCAA -AGCCGCAGACACAGTTTTCAGGCGATCTTTCATGCGCCAATCCTGACTTGGCCAACTCCC -AACCGGATCCCTGGGAATGCCGTTCGTCTCATGACGTTTCTCGGTGAAATACATGTAGAA -ATTCTATATCCAGCGGGTATTGTGTTAGCTTCCGAAACTTACAATCAGACTGGTCATGCT -CACCGAATGCAATATTTTAAGATACTCGCTAGATGTATATTCCTCCTGCCACCCATGTCG -CATCGCAAAATCTTCATCGTCATCGCCCACCACTTTATCGAAGGTTATCCGACTTGGCCC -ATAGTCACTTTTGGCTCTCTTCAACATAGCCGGCCGGCTACTCTCCGATCTTCGAGGCGA -TGGGCGGCGGGGTGGAGTCTCTCCGATGGCACTGTTCTCTCCAGATGAGCTAGTTGGGCG -CCCGTGTGGTGTCAATGTGGGATGCTTATGTTCAACTGCTGTGAGCGAACGCTGATGGCC -ACTCAACGGAGCTGGAGATCTTTCGCGGGTAGGAATCGTGGTCTCTAAATCAGGACCAAA -TGCGACGCGGACCGATTTATTCGGATTGCGACGCGAGGCCGTCGACCCGTTGGTCGTTTG -TTGTGAGTTTTGTTCCATATCGGTATTGGGATCCCTCATGTTGTATGATCGTTCATTGTG -TTACCCGAGTATTGAGGGGAGAGTAAAGGCTGAAACGTATGTCAAGGTAGGCCGCAGGTA -TCCCCGGTCGGTCACCGGTCACAACTAGTGAATTTCCTCCAGTTCGCTCGCGAGGGTCTG -GATGTTATCGACTGAACCTGGGAAGAGACCGATGATGAAATGGCCCGGGTGATGTTGGAG -GTCGAATTTATAGTAAGCCAGACTAATCTCAAATTGCGACTGCCCTGGAGATATTCCAGG -AGCGCGGAGAGAACTCAGCGCACAGCCTATGGATTCTTGTCCAAAGAGAGCTTATCTGCG -TCTAGAGCGGATGTGGTTCGGAAAGGCGAGGTGCTATGCGGAAACACAGATTATCCGGGG -ATTACTCAGAATGATGTCTTGGCATTTGAGGAAGTATAAACGTTTGATCAAACGTGTACG -GAGGACAATATGTAGACTTTTCACAAAATCGTCATGACTTGCACGTGGCTTATCTTTTTT -TCTGTTCTAGTACGGTAGGTTATGGCTGTATGTAGAGTCCATGCATCTATATCATCGAAT -CTAGTGGTAATAATGTTCCAACATGGGTATCAGTATTGGTTACATCATGGAGTACAACAT -AAGGAAGTCTATCTTCAGGACTGCCTCGGTGGGACGAAGCCCCTGTTAGTCAGCTATATA -TCTTCTAAGCAGCATGTCTTGCGATATTTGCTCTTATATCATAATGGCGACATCGCATGG -TTGGAAATGTCTAAGACAACAATGGTAGGGGGATATAAAAGGTTTCACTGGAGGAAGGAT -GGACATCTACAATCTGACACGTCAATTTTTATACAACAAACAACATAGCAAACTATAAAT -ATACAATATACTCAGACATATACAACATAGTATGTAGCCAATGATAATAGATTATAGCCA -AAAAAAAAACCGGTCAGTGTCAAGAGGTTCATCCCTACAGCCCAACCGGAACACGAGAGA -TCGCCCGTATTATTTCGATAGGCATGGAACCCCCTTTTTTCTTCCCTCTCTCAGCTTTAG -CACCCTAACCTCTTCCCCCCCAGGTATTCCAGAGCTGACATTCACCAGGTCACATGCCTA -ATTATCATGTTGTACGGCCTTCATCCATGAACAATTGGTATTTCTTCATATTGTGGGGTA -TGGATCGCCGTGTGGAATTGGGGAATGCAGTGATAGGATACATTATCATTGGACCTGCAA -TTGTTCAGGCTGTGGCCGTGTCCACGGCCCGGCATATAGCCATTGGAGTGTGATAGTGTG -GTGATTAGGGTTAACAAGCCTGTGAATGGCATTTCAACCCTTGAAAGATAAGATTGAATG -TAGTCAAATATTCTGAAGACTTTGTAACATTGCTGTGAAACCTTTCTTTTTGTTCTACTT -CCTACTTCCTTTGACATATTAATATCTATCAGAAGGCTAAAGAAGTGTGGATATACCATA -CTCAATAGTGGAATGTGAAACCCCCCTCGGTTCCTGGAGGCTGGTAAAAAAAAAGAGACT -GGGAAAAATGAGGAATAGTACACTGTAAGCAGTACGGAGTGCATAGATATATACATATCT -AGTAGAACAAGCATTGAATTAAATCAAAGTGGAATGTCTTTAAATATTCGAGTAATAGCA -TGCCCCTGAGTATGCACGTTCAATGAAACTAAGACTAGTTCGACTTAGAGCATCTCCGGG -AAATTGAAACGGCTTCCATCTTCGCTTTTCACATTTCCCTACCACCTCCCCCTTTCTTGC -CTTGGCTTTAGCCGCTTTGATTTTTCGAGTCCCCAGTGCGAATGCTTCTGTTTAGAGGCC -TCCCCTTGCCCACTTTTATTTTTTTGGGGGGACGTTGAGCGGCTTGTGCATGGAACGTTT -TATGGTTCCTAGTGGTTCCTGGTTCCCACTTCACATCTGAAATAGACAACTTCCCCCTAT -CTCTTCGGATTTTCCACTTCATCCCTCGGGACGACGCCATTCTTTTGATTTTCGGGTGTT -GGATCGAGACCGCACTTCGGCGCAGCTTGACTTTGCGCCAGATCCCCAGATACGACCGAC -TACGACCAAATAATTACGATGGCCTTTCTGTTTAAGAAGAAGGCTCACGCCCAGCAGAGT -CAGCAGTCGGCTGCACTCCCCCCGGCGACTAGAAATGTCCACACTTCCGAGGGTGCTCCA -TCTACTGGATCGCCTAATATGGCCAACGGGGTCAAAGGGGATGGCATGCATTCCCAGACG -CCAACTCCAAGCGGTAGCTTCAGCAATTCCCTACATTCCGCCACGAGTCCGACGAGTCCA -GACGCAGTACGATCGCGGCAGAGAGCAGATTCGGAATCACAGGTGGGTATCTGTTATGTG -ATCCATTGGTCGCTCTTTGACGAGGACAGAGCTCTAATGTCTACAACAGATGCAAAGATC -CCAGCAACCACCCAACGGGATCCCCCCTCCAAGCCCCAGTTCATCCCTTTACCCATGGTC -GCAACGCCGTTTGAACTTCTCCTCTCCCCAAATGAACCCGTTTCCTCGCTATGGAGCAGC -AATAAATTCGGTGGCATCTAAGGAAGGAGACATATATATGATGGGAGGCCTGATCGATGG -ATCAACGGTCAAGGGTGACCTATGGATGATTGAAAGCAGTGGTGGTAGTTTGAACTGTCT -GCAAGTTGCAACAGTCTCCGAAGGCCCTGGCCCGCGAGTCGGTCATGCCAGTTTGCTCGT -TGGGAACGCGTTCATTGTGTTTGGCGGTGACACCAAAATCGAAGAGAATGATTCTTTGGA -TGACACTCTGTACCTTTTGAACACATGTAAGTGTGGTATCAATAAATTCATATCTACCAT -GCGTCTGACAATTTTGTTCTATAGCTTCTCGCCAATGGTCCCGCGCTATCCCCCCAGGCT -CCCGACCTTCCGGGCGTTACGGCCACACTTTGAATATCCTAGGCTCCAAACTCTACGTCT -TCGGAGGGCAAGTGGAGGGCTTCTTCTTCAATGATTTGATTGCATTCGATTTGAATCAAC -TTCAAAGCCCAACAAATAAATGGGAGGTTCTGATCCCGAACAGTCACGAAGGTGGGCCTC -CACCAGGCCAAATTCCCCCAGCAAGAACAAACCACACCATTGTCAGCTTCAACGAGAAAC -TGTACCTATTTGGAGGTACGAACGGCGTACAGTGGTTCAATGATGTCTGGTCTTATGATT -ACATTTCCAATACCTGGACCGAAATTGATTGCGTTGGCTTCATACCGGCTCCTAGGGAGG -GTCATGCCTCTGCTTTGGTTAACGATGTGATGTATGTGTTTGGAGGCCGCACAGATGAAG -GTGTGGACCTAGGAGACCTCTCAGCTTTCCGCATCTCAACTCGACGATGGTACTCCTTCC -AAAACATGGGTCCTGCGCCGTCCCCTCGATCTGGGCACAGCATGACCGCTTTTGGCAAGC -AGATCATTGTTATGGCAGGCGAACCGAGCTCGGCTCCTCGGGACCCCGCGGAGCTTAGCA -TGTCCTACATCCTTGATACTAGCAAGATTCGGTACCCCAATGATTCACAAGCCGGAGGAC -GCGCCCCCGAAGCGGCGGCTAGAAAAACAAGTGTTGATAAAACAGGTATTTCTTCTGGGC -GCACATCTCGGGAGGCACAAAACGCCGGCCCAGAACAGCAAATGCGACGTGGACCGACGC -CATCGCGTGAGAGCATGATACAAGGCACGCCTGCTAATAGGCCAGGGGAGTTCGGCTCGA -ACCCAAATTTGGGACCTGGATCTAGACTACCACGGGCATCAATTGCCCAAGCCCCTGCCG -GTCCACCCCCTCCTGGACAAGCGCCGTCTCCTGGCCCTCGTGGCACTGGCCCTCAACCTG -GTGCGAATCCCCGCAGCAAGACCCCGACCAAAAATGATCGCGGGTACACCCCAACGGTTG -ATGTTCGTGTAGCGAACGGCGACGGCGGCGAAGAATCACCAATCATGAAAGATGGCCCAC -AAGATCCCCAAGGACCTCCATCGGCTGGTCGTCGAACCCCTACGCAGCATCAAAAGCCAT -CAGCCAAGGCCATGGAAGCAGGCGAGGCGGCTCCATTGATCAGTGCTCCAGGCCGACAAC -GGTCTCTCCGGTCTCAACGACAGCGTGGGTCTATTGACAGCGCCGACGAATCAATCCTTG -GCCGGCAGTCTAGCATTGAGGGTTCAGTGGAATCCCGAAACTTCCGCAACTCCAAGACTC -TTGGTGATGAACCACGGTCTCCTCGACTTACACCCCACCAAGAGGCGTTGATAAAGGAGC -TGGAGGTTACCAAGGCCCGTAATGCCTGGTATGCCTCAGAACTCGCACTCGCAAGAAAAT -GTGGCTATGTCCCCAATGCAACCAGTAACCCCGCGCTAGACGATCGGGCAAATGAGGCCC -TGAGTGATGAGGATCGCCCACTGGTTGAGGCATTCTTGGCTATGAGAGCTGACCTTAGCA -AGATGCAAGCTACTGTGGATCGACAGGCGGCCATCGCATCGAAACGAGTTGCTGAAGTGG -AGCATCAACGCGATATGGCTATCAATGAAGCCGCATACTCTCGGGCCAAGCTCGCTGCCC -ACGGTGGTAGTCAGCGGGGAACCCCTCAGCCTGATGCTTCTCGAGACGGAGATGACGAGC -GACCTACCGACATGGGACGTCGCCTTGCCCTCGCTTTGGCATCCCAGTCTGAGTTGAAGG -CTAAATTAGATTTGCAGACCACCGAACTTTCGCAAGAACGCCAAGCCAAGGAACTTGCCG -AAGAAACAAACGAGGCAACCAGGAAACATCTGGCCGAGCTTGAGATGCAAAGTAATACCC -TGGAAGCCGAAAACTTGCGCGCTGAGCTTCACCAAGCAGAGGCTCTCTCCAGAGAAGAGG -CCTTCCTGCGTGCAGAAGCCGAAGCTTCGCTCCAACAACTCACCCTGGACAAAGAAGAGT -TGTTGGCACAGATTGAGGATTCCTCGATCCGCCTGAAAGACTTTGGCTCCAGCTTTGATG -GCCTCCGAGAAGCTGTATCTGTTTCCGCCGCGAAAACAGCCCTCGTTGAGAAGCAATTCC -AAGAGGAGCGAGAGCGTCGTGAAGGTCTGGAGAGAAAGCTCCTGCAGCTCCGCTCAGAAC -ACGAGGAACGCACCACTGAAGTGGAAAACCTTACTCGTCGCCTTAAAGATGCGGAGGAGT -TGGCAGAAATGCACGCCAAAGAAGCCGAAACTCACAAGATTGCCTTCGTTTCTGGCCTGG -AACGCGCTGCTTCTACAGACTTAGATGATTCGTTCCGATCTCGTGCTGACCAGCGAGTCG -CTGCTTTGGAAGCCCAGATTGAGCGGTCCACCACTCTAGCTAAGGCCAACCAGGCAGCGG -CCAACGCAGCCTCAGACAAACTCCGGCGCGCAGAGGAACGGATTGCTGGTCTTGAAGCAT -ACCAGGAGCAGGCCAGCCGCGAAGGCTTACAATTGCGCCGACAGCTTCAGACTGCTATGA -AGGAAAGCCAGTTGGCCGCTACTGAGAACCGGGACTTGAAGACACAACTTGAGGCTCACC -AGCGAGAGGCTGGAGCTTTGGCGGTTCAACACGCCGCTTTGAAGGATCTGCTTGGTGAGC -GGGGAGTGAGCAACGACACCAGACGGTCCCCACGGCTTGACTCCCCCGGATCACGATTCG -GTACTCCAGAGCAAGGCCGCCTACGCGAGCTTGAGCAACAACTCTCAGCTAGCATTAAGG -CCCATGACGACCTGAAAAACAGCTTTGAAACCCGTGAGCAGGAAGCCGACCGAACCTTCC -GAGAGAAACTCGAGCAACTTGAGAATGATTACCAGTCCGCGGTTCATTACGTGAAAGGCA -CCGAAAAGATGCTGAAGCGTATGAAAGATGAGCTGAGTCGGTATAAGACCCAAAATGCTA -AGATCCAGTCTGAATTAGATGTGGCTCAGAAGAGCCTTGAGAATTCAGACGGCCAGGCAT -CTTCCGCTGAGTGGGACGCTGAACGGTCCCGCCTCGAGCTGTCGATTTCCGATCTCCAAG -ATCGGACTTCATCTTCTATCACTAGCCTTGAAGGCCAGATCACACAGCTCAAACAAGATC -TGGATCAGGCTAGGGCCGAGAAAGAGTTGTCGCGAAGTGAACATGAGCGTCTCAAGCAAG -ATTTACTCACCGCGGCCGAGCAAAGTCGCACCGAGCTGGACCAGCTCAAGCAAGAAAACA -CCCACCTTGAAACTCGTGCTTCGGATGCTGAGCGCAAAGTCAACATGCTTCTGGAGCAGG -TTCAGACCTCAGTTGGTCATTACCGCCGTCAATCCCAGCATGGGCAAGGTACCAATGGAA -TCTCACGCACACACAGCAATGCTTCTAGCAACACAGTCAGTGGTGGCCTTGGGCGGGCAC -GAGCTGATAGCAATGTCTCGCAGGACTCTACATTCCCTGACAATCGTGGTTCGATGGCTC -TGGACTCCCTTGCCAACGAGTTGGACGCCTTGCGTAGTCACTGGGAAAGCACCAACCGCA -ATTACCGTCTGAGCAACCAGCTGGACCTGGATCGGACTCCCACTAAGGATAATAGTGATG -GACCTGCACTGATGAGTGATAGCTTGGCAGAATGGAGGAGGAGACTGGACGATGATGACC -GGGTTGGGTTTGATGTGCAAACCAAGCCATTGAAAACTGGAGTTTCACACTCCGAACACC -CCAACGTGATCTAAAGATCTCCTCGTTGTTCTACGACCATGTGCCCTGAGATATGTCAAT -TTGCACCGTCTAAGCGTTCTGCACGTGTATTTGCCGACCCTTCTTTTTCCCCCATTGTGT -GTTCTACTCTCGTTTCTATGTATGATGGGTGCTTTTACCCTCTGTGTTCATCTACGCCGA -TGCACAGCATATTTTTCTGGTTCTGTTTCACCACTGCGCTCATTTGTTCCCCCATTTCTC -GTTTTTGATTTGTTTTTAACCCCCCGGCTGCCCTTTTTTATACCCCGGTTTTCTGATGTT -CACTGGACTCTGGCTGTACCACTAGCATATTCCACGCGGATTCCTTGTTCATCCATAAGT -CATAAAATCGTTCTAGTTGCTCTGTGGAGGCTCTGATTAATTTTTTTTATATTTTTCTTA -TTCAAGATCACCATGACTGGGATATGGGATATCCTGACGTTCAGTACACTTGATACCACC -AGGATGTCCAGGCCCACTGGTGGGAATGATGACCTGTCATCAGCCCGTAATCATAGGTGC -AAGCTTACTTCCTATTGACGATATTTCTGGTGTAGAACTAGCCAATCTAGAATTCGATTT -CATTCATAGGAAGTAACTGGGTGCAGGCGTAATACCCCACGTTTGCCCTAAATAGATTGA -GACAAAGGAAAATCGCGCATACTATAGGATCTGGTATTGTTGATTCAGGATCTATAAGTG -CGAACATGGTGGTCACGGCTTGCAAGAGGCAATAATGTGAGTGTATATGATTCAAATGAA -AATCGTAGCTATCCAATATTGAGCATCTCTTCAAATTCGAGAGCCTATTGATACCTGGTT -TACTATGGTTGTTGTTCAGGTTTCATTGATCACATACCAGTCAGCGGAGGGACTGCATCG -CCACCAGTGAGAACGGTGTAATCTTACGGAATACATATGGTTGAGGACGATGGAATATCT -ATACATCACTTTATCTCAAATTCTACATGTCAAAGTATATATGTCATTTTTATATGAATT -TCATACATGTACTCTGTCAAGCTGTCAAAATGGTATAGATGCCCAATACCAATCTTACCA -TACCACGCCCAATGACATACAAGACCCGAGGTCAGGCGTGTCCCCTGCATATTAAACCAC -CTTGGAAAAAATACCCTCACCGACTCCCTTCTCCTTCTCATCTCTCTTCCTTTAAGACGT -TTAGCTTTTGAAGGCTTTGAAAGGCTTTGAAAGACCTTTGCCCTTGATATCTCTCTCTCT -GTTTCTCGAAGTTAACCGGATCTTCCCGGCTCTCCTTTTCTTTGAGAGCAATTATTTCAC -GGTCGGCAGCTCTTTTGATCCCGTTATCTCCAGGTATGTCTAGACGCTTTTCTATTTTTT -ACCTTTTTACCTTTTGACTTTGCATTATTACATTTTCCTATTTTCGTTGCATCATTCTAG -TGTTTGAAGTTGATTTGGCCCTTCCCAACATTGAAAGTTGCCTGAGGCTTGCCAACCCCT -CCCATTTCCTGTTTCCCTCTCACGCCTCTCTTCCTTAACCTCTATCACACCCCTTTTACC -CATCTCAGCTTTGCATATCCCGGATTCGAACCGCTAATATTCTCTCTCTTCTAGCAAGTT -TCTCACAGCTAACCGCTGTGTCCCCATCGGTTCAATATGACGACCATGGTCAGTGTTAGC -CACCCCCGATTTCTTTCTTTCTTTCTTCACTCGGAGACTTCCAAGTGACTTGAATATTAC -GCTCGGATCATACCATGACCGTATGATGATCAGCACGTCCCAGGTTGTACTGACTTTGCC -TGCAGGATTTGCGAGTTGGTAACAAGTACCGTATTGGCCGTAAGATCGGAAGTGGCAGTT -TCGGTGACATCTATCTCGGTATGGATACAAATCTAACTTTGTCCCTGCACAACAACTCAT -AGCTCTTAGGAACAAATATCATCTCCGGTGAGGAGATTGCCATCAAGCTCGAGAGCGTCA -AGGCCAAGCACCCCCAGCTCGAATATGAAGCCCGTGTCTACAAGTCGCTCGCCGGTGGTG -TCGGTATTCCCTTCGTTCGTTGGTTCGGTACCGAATGTGATTACAATGCTATGGTTATCG -ATTTGCTCGGTCCTAGTCTGGAGGACCTTTTCAACTTCTGCAACCGCAAGTTTTCGCTGA -AAACCGTCCTGCTTCTCGCCGATCAGCTCATTTCCCGCATCGAATACATCCACGCCAAGT -CCTTCATTCACCGTGATATCAAGCCCGACAATTTCCTCATGGGCATTGGCAAACGTGGTA -ACCAAGTCAACGTGATTGATTTCGGTCTAGCGAAGAAATATCGTGACCCCAAGACCCATT -TCCACATCCCTTACCGCGAGAATAAGAATCTCACTGGTACTGCCCGTTACGCCAGTATCA -ATACCCATCTCGGTGTCGAGCAGTCTCGACGTGATGACATGGAGTCTCTAGGCTATGTCA -TGCTCTACTTCTGCCGAGGCTCCCTCCCCTGGCAGGGTCTGAAAGCTGCTACCAAGAAGC -AGAAGTACGACCGCATTATGGAGAAGAAGATGACCACCCCCACCGAGGTCCTGTGCCGTG -GTTTCCCCAACGAATTCGCCATTTACCTGAACTACACCCGTTCTCTGCGTTTCGACGACA -AGCCCGATTACTCCTACTTGCGCAAGATTTTCCGCGACCTGTTCGTTCGCGAGTCCTTCC -AGTACGACTACGTCTTCGACTGGACTGTTTACAAGTACCAGAAGAACGCCGCCATGATCG -TCGACGCTTCCAAGAAGGACAAGGATGCCGAGGATCAGCAACGCCGCCAGGCTCTGCCTG -CTGCTGGCCCGATGGGAGCCAGTGCCGCCGCCAAGCCCGGCGCCATCTCCAGCCAGCGCC -GTAAGGTCGTCGAACGGGGCACTCTCGACACTCCGGACACCAACCGTGCCGTGGGAGGTA -GTGACAGGATGTGAGTACATGAAACCTGGTTCCGAGTAGGGAACCCCTTCTTCCTTAACT -CCTACGTTGAGTGATTTGCTTTTCGTGACGCTGGCGTGGAGGACCCTTATGTTCTGAATC -TGACTAATCGCATAGTCTTCGGCGTTAGGCTACGTTCTGCCTCGAAAGGTGCTGCTTTAG -GTTATGGACCTTCTGGTGGCCGTTCGAAGCGGGATGAAGGAGCGGAGGCCCAATGGTACT -AATCGTGTTTACATGACAGTGAGGCTATGCTTTGCGTCTGCTCTCCTGTCTGTTCACATT -CTTGTCTACTAAGAAACCGGGGACCCGTTCATGTCGCTTTGGAGCGGACCATGTCGCATT -CGCCTTCTCGGATACTTCTTTGTTTTTCTTTCTTTCTCGGATCTCTATTCATTCTGCACT -CCCCGACAGCCCTCCTATCCCGTTTTCGATCAGATTTTGATCTCAGCCGTTTCGATTCGC -CTCCGTTTATCGCCTGCTCGCCTTCAGCCCCATGAAACTTTTGCTAATTGGGTGGTCAAA -TTTACGCTGAGCCTGCGAAAAACCTTGTTGACTTATCTCTGTCTTTTCGGATTTTTTTGT -ACCCGTTCTGCGGGACAACCGATCACACGTCTCGAAACGCTTCTGATATTGTGGCTTATT -TTTACATCTTCGCCCTTTGGTTTCTCTTTACAACCTGGCATCTTTGGTCAGAACGACCAG -AGGGCAAGACGAAGGGAGATGCAAACGACGGCATTAGACCGCAATTCTTTTTGTCGGATT -GTTCAATCGCTGGTACTGGAGATCCTTGAATGCATCCTCGGATGCGCTGCCCATCATTGC -TTGCCCCTTTTCTTTACTCATGACGTTTATTCTCTACCCTCAATCTTTTGCTGATTTTTT -TTGCGTTGCGTTTGCCTGGCGATTAATTTGTGAACTATCGACGCTCAATGGGGACGAGGT -GTGGCCTTCAATCTGGCCAAGGGAGGGGACAGGGAGACAGCTTTCGGGAGTATTGCTTTG -CAGGCTTTCTGGATCTCTGCTCAAGCTACGTTGCAGAAATTTCTGTTACAGAAGGTCTCT -TATGCAATCCCGCTTGCGGGGGAAGCTGGTGTGGTTGGCGACGGACATTGGTTAGGGTCG -CTTCCGGGGTGGTATGCGTCGATTTGTGTCAAGTTATTTCTCTTAGCTCTTTCTACATTG -CACTCTCAATTTGCTTACTCTCTGGAATGCTAATCAACTGGCAAACCTGGAAGGCCCTTC -GAGCATCCGTACTTCCGGTCCCAAAAGAGTAAAAAAAAACGGTGAGATCATCGAAATTTG -GCCCATATACGAAGTACCGAGCATAAGAATGGATATTTTTGAAAGGGATTATATTCGATA -TGTAAACCCTGGAAGTATACGTTGAATCGGGATGTACGAGTGTCGTGACATAGAAATCAT -GTACATTGTACACGGAACACCGCCGGTACATGAAACTTGAGCAGATAATAGATTTTCCTC -ATTTTTCCACCTTCACCTCCTAACTTTCCGACAAGAGTTAAAGACCTTGGGAACTGTCTA -ACATTCCGTAATGGATTACTCTTTCTACGACCCTCGATCACAGCCCTCCGGCTTCTCGCT -CTATGGACTGCCCACCCCCGACCAACCCCATGCGCAACCAGATACATTCGCCCCTCTAGT -GAATTCTTTGTTCCCCCATTCAAAAACTGTCGCTAATGGACTAGAATAATTACCAGAACT -TCCCTGACTATAATCCCTCATTCCCCATCGACTCCTTTGTTCCGCCACCGCATTCACCAC -CAGAATCAGTCAAGCACTCTGCTTCCAGCAGCGATACTGCCAATCATCATACACGCCCCT -CATTTGATCGCGATGAATCTCAATTCGCAGACCCGACCCTAGGGAGGAGCAGCAGCGAGG -AAAAGGAATCAGCGCCCGCACAAAGCAAACGCAAGGCCCAAAACCGAGCAGCGTATGTGA -TATATCAAACCCACGTGGATCCACGACACATCCCCCCCCCCACGCAAACACTAACCACTA -ACATCCGTTACACAGTCAACGGGCATTCAGAGAGCGCAAAGAACAGCATGTCCGCGACCT -TGAAGACAAAGTAAACACCCTTGAACAAGCATCCAGCACCTTACAAGCCGACAACGAGCG -TCTCAAGCGCGAACTAGCCCAATACACCACCGAAAATGAAATCTTGCGCGCCACATCACA -AGTCAACCGAGGACATGTCAGCTCAAATGAGGCCCCTATGCCAACAGTCACAGGTCCAAT -GAAGTTTTCCCCCACAGACTTTCACACTACCTTCATGCCGGACGGCCCTGGTACGCCCCG -AAGCCCGCATCACCGTCTGACTGTGTGTCCGATCACCGGTGAGAGATTGCTAGATGCGCG -TGCGACGTGGGACCTCATTCAGAAACACGAGCTGTTTGAGCGCGGCCAGCTTGATATCGG -TGATGTGACGGATCGGTTGAAGGGTATGTCGCAGTGTGATGGGCAGGGACCTGCTTTCAA -AGAGGGTCAGGTTCGTCAGGCTATTGAGGAGAGCGCGGCTGTTGGTCGTGATGAATTGAT -CTGATGACTTAAATGTTGGTGGTATTTTCTACGTGTACCGCCGATATGGTTATGAATTTA -ATGTTTGCTGAGCGTTGGGTTTGATGGGATGGATCAATAGGATGAATGGCGATTGGATTA -TCTGATTCGTTAGTTCAGTATATACGATTTGGTTGTATTTAGGAGCGTTTGAGTCATTTA -TACTCGAATTTGGCTTTGTTTATTTGCGGTCATTGCAATTGCCTATCTGGTTGGCAAATT -GATCCCCTGAGCATTGGTATTCGACACCTAGTCGCCATTATGAACCCTAGGCTGGTGATC -CGTGCGTCAATGGGATTTGTTGTATGGATATCTAGAGACCAAATCCAAAGCCTTCCACCT -CCAACTGCAGATATGGATATAGCAGATTTCGGAAACTCAATTCAAATCGAGACCGACAAG -CGTGTTATGTCTTACAGCATCGTTCTCTTGGCTCCTGCAAACATGGAAAGAGGGTCATCA -TTTTTTGGATACTTTCATAATATACAAACAAGTCTGATCATAGTATTTGCAGGAATCAAT -CGCAACCTTGCAGGAAATCAATCCCAGTCATCATCGTAATCTTCAACGTTCAGGTCATCC -GCCTTTTTGTGGTCGTCCTCGTCATCATGTTTTTTGAGACGAACATCGCTCTCCTTCAAA -GCACCAATTTCCACGAGTAAACGCTCAATATCTTTACATTTGATTAGTACATAGCACTTC -TTTGATTCCTGTCTTGAACAGAACTCACCTTCAATCTTGGTCCGGGGACCCTTCAACTGC -TGGAGCGTAATAAGCTGCTTCTTGATCTCGGTATCTTTGTATACCAAGATTGTGGGTGTA -TTCTTCTCTGGGTAACCCTCGATGCACATATCGGCACGAATCTCGCAGAACTTGATGTCG -CCGAATTTGGCAGCAAGCTCCCGCCAGAGCTCCGTCAACACGCGCGACTCAACGTTCCCA -CCCATCGATGTGAGGTTGACAAGTACATAAGCACTGTTGGATGCTTCTGTTACCTCACGG -GCATAATCGACTTTTTGAAGTGGGAAGACCTGGTTGAAATGACTGGTTTGTTGGAGGGTG -GAGAGCTCGGCGAGTCGTTTTTGCCTGTTTAATTCGACCGTGAGTAAAGAGGGACCCTTA -TATATACATGCATGACCAGACAAAAAGCTTACCGATAGCTTTCGAGGAATTCTTCGTCTT -CTTCATCCTCTAGTGCGTCCAACTCGTCGAGATCCTTGTCCTCAAGTCGGTTCTCGTGCG -CCCTTTGCTGCGCGGCGATCAGGGCCTCTTGAATCAGAGGTTCCGGGTCTTTGGGCTTTT -CGGGAATAATGCCATGTTGGCGGAGGATATCGTTCCTATATAATATTCAATAACTGGTTA -GCAACACTACATTTAGTTTATACCCCGGCGGGGGTATGTGCAAATAATAGGCTCACCACT -CGGTATCCTCGTTAGGGTCGACTTCAACCTGCATAGTGGATTGAAATGATCAATGATCTT -TAGATCAAGTTCTATTCAAGCGAAATATGGAGAAAATGAGGGGAGCTGGAGGGATAAATG -CGCTGCCTTCAATCATGATGCCACCTGTTTGCAGTGTTTGGTTCGGGCCAAACAACACAG -AAGGCCACCTTCAACTTGTCCCTAATAGCCAAGTTCTTTGGAGGTTCGAGATTCTTTACT -ACCTCTTTTACCTCAATCATATCCTTGTATTTCTCTCCAATCTCCAACCGTTCGGCTCTC -AATTCCTGGGTGGCTATGCGAATGCTGTAGTCGCCTACACCGCCCTACCCTGCTTCTGTG -TACTCGACCCGTGTCCCAATTGCACCAACTCCACCTTTCTAATACGACAGCAACCACTCA -TTGCTGGTGTGATACCAGCTGCTGGGCAAGATGAGTTACATGAAGAAAAACGAAGATGAG -GACCAGGTCATGATCAAGCTTGATCGGACCTCCGTTTTCCAAGATGGTATGCAATCAACA -AGACCCCCTGGCGCCTCCATGCGCTGTACATATTCAATGCTGACTATATAACCGTCCCAG -CACGCCTATTCAATTCCTCTCCTATCTCGCCGCGAACATGCCGAACATTACTCACCAAGA -TCGCTGTCCTCCTGTTCACCGGGGAGCAGTTCCCTACGAATGAAGCCACCACCCTCTTCT -TCGGAATCTCTAAGCTCTTTCAGAACAAAGACCCGTCGCTGCGGCAAATGGTATATTTGA -TCTTGAAGGAGCTAGCTAGCACTGCGGAAGATGTGATCATGTCCACAAGTATCATTATGA -AGGACACTGCAATGGGAAGCGATGTTTTGTACCGGGCAAATGCCATCAGAGCGCTCTGCC -GCATTATTGATGTAGGTCAACAGCTGTGAGGGCCTCAATGGTTGAGTGTATTTGCTGACA -ATTGCAAAAGGGTTCCACAGTGCAGGGTATCGAGCGACTTATCAAGACCGCCATTGTTGA -CAAGACCCCGTCGGTGTCCTCCGCGGCCTTGGTCTCTTCCTACCACCTCCTTCCCATTGC -GCGTGATGTTGTCCGGAGATGGCAGAGCGAAACACAGGAAGCCGCCTCATCATCGAAATC -ATCCACCGGGTTCCTTGGTTTCTCCTCGAGCTCACAGAGCCACGCCATCTCTAACTCGAA -TTTCATGACACAGTACCATGCCATCGGTCTTTTGTACCAGATGCGCTCGCACGACCGCAT -GTCTTTAGTCAAGATGGTACAGCAGTACGGTGCTGCAGGTGTGGTCAAGAGCCCGGCTGC -TCTGGTTTTGTTGGTACGCCTGGCGGCCAAATTAGCTGAGGAGGACCAAGGTCTCCGGAA -ACCCATGATGCAGATGTTGGATGGCTGGCTCCGCCACAAGCATGAGATGGTCAACTTCGA -GGCGGCCAAGGCTGTTTGCGATATGCGCGATATCACCGATGCCGAGGCAACGCAGGCAGT -CCACGTTCTCCAGCTCTTCCTGTCCTCTCCCCGGGCGATCACCAAGTTCGCAGCCATCCG -TATCCTACACAGCTTCGCCTCCTTCAAGCCACACGTCGTCAATGTCTGCAACCCCGATAT -CGAGTCCCTCATCTCCAACTCGAACCGCTCTATTGCCACGTTCGCTATTACGACACTGCT -CAAGACCGGTAACGAAGCCAGCGTCGACCGCCTGATGAAGCAAATCTCCGGCTTCATGGC -CGACATCACCGACGAGTTCAAGGTTACTATTGTCGAGGCTATTCGCACTTTGTGTTTGAA -GTTCCCTTCCAAGCAAGCCGGTATGCTCGTTTTCCTGAGTGGTATCTTGCGCGACGAGGG -TGGATACGAGTTTAAGCGCACTGTTGTTGAGAGCATGTTCGATTTGATCAAGTTTGTCCC -GGAAAGCAAAGAGGATGCGCTCGCCCACCTCTGCGAGTTCATTGAGGACTGCGAGTTCAC -CAAGCTCTCGGTTAGAATCCTGCACCTGCTTGGTACTGAGGGCCCTAAGACCAGCCACCC -CACCAAGTATATTCGCTACATTTACAACCGCGTCGTATTGGAGAACGCCATTGTACGCGC -CGCTGCTGTCACCGCACTGGCCAAGTTCGGTGTTGGCCAGAAGGACCCCGAAGTCAAGTC -TAGCGTTCATGTTCTCCTCACCCGTTGTCTTGATGACACCGACGACGAGGTGCGAGACCG -TGCTGCGCTTAACCTGCGTCTAATGGGCGAAGAGGATGAGTCGGCGAGCGCTTTCATTAA -GAACGGTAAGACCTTATCCATGATTGGCAATAGTAAATTTACTAATGGCTTTAGACTCAA -TGTATTCACTTTCCACATTTGAGCACCAGCTCGTCATGTACGTGACATCTGGGGACAAGG -AGACTTTCGCTGCTGCTTTCGATGTTGCCACCATTCCCGTTGTCTCACACGAGCAGGCCC -TGGCCGAAGAGAGGACCAAGAAGCTTACCTCGGCTACTCCCACTCTCAAGGCCCCTTCGA -CTGGTCCCCCGAAGGGCAAGGCAGCCGGTGGTATGGCCGAAGCGGCCAGCGCCTCTGCCA -CTCAGAAATACGCAGAGCAACTTATGCAGTACCCCGATATCCAGGCATACGGTGTTTTGC -TCAAGTCCTCTGCACCTGTCGAGCTTTCCGAGAGCGAAACCGAATATGTTGTTACGGCCG -TCAAGCATATTTTCAAGGAGCACATCGTGGTGCAATATGATATTAAGAACACACTCCCGG -ATACCCTTCTCGAGAATGTTACTGTGGTTGCCACGCCAGAGGAAGAGGATGTATTGGAAG -ACGACTTCATCATTCCTGCCCCCAAGCTGCCCACAGATGAGCCCGGTGTTGTCTACGTCG -CTTTTAAGAAGCTCACCGGTGAAAACAGCGTCCCCGTCACCTCCTTCACAAACATCCTCA -AGTTCACCAGCAAAGAAATTGACCCCGCAAGCGGCGAGCCAGAGGAAAGTGGCTACGACG -ATGAGTACCAGGTGGAGGATCTCGAGCTGACAGGCAGCGACTATATCATCCCCACATTCG -CCGGCAGCTTCGACCACGTCTGGGAACAGACCGGTGCCAATGGCGAGGAGGAAAGCGAAA -CCCTGCAGCTCAGCAACATGAAGGGTATCAACGGTACGTACAACAAGCCATATATCTCTC -TATTTTCTATAAACACTTACTTACTGTCCCAACAGATGCCACAGAACAACTGATCGCAGC -GCTATCCCTGCAACCCCTCGAGGGCACCGATATCGCCCTCAACAACAGCACACACACGCT -CAAGCTGTTCGGCAAGACCGTCTCCGGCGGTCGTGTCGCTTCTCTCATCAAGATGGCCTA -CTCCAGCAAGGCCGGTGTCACTACCAAGATCACCGTCCGGGCAGAAGAGGAGGGCGTTGC -TGCTGCTGTGATTGCTTCTGTGTCTTAATCTCCCTTTGTTTCCCTTTTTGTTGTTACGTG -CTTAATCCTGGTTGGGGGGGACAAGAGCGTGTTCTTTTTTCATGTGTCTTGGTTCGTTAG -ATTGCGTCGTCCACCCCTCAAAACTTGTCAATGCAACTTTTCATCCAAACATCATATCAT -ATTTATGTCTATTCGTGTCTCTGGTCTATCTTCGCTGTAACAGTCACTGGTATATAGACC -TTGGAACTTGTATGCTTTCTTCTTAACGCTGCCTGGCTGTTGTCTAAATCAGAGCAGACA -ACAAGCTGCCGAGTCATCAGATTCTAAGGGTTTTCTCTTTGTACTCCCAAAAGTAAGCTA -GTACCGTCAAAGATGTATATATGAATGGAAGAAAAGAAAAATATTTTGTTTTTATATGCT -AGTGATGTTATGAGATAGTGCCGTATAGTGCAAAATCAAGGACGCAGACGCTGAGGGTCA -CGGGGGAACAGCGAGGCCAAACGGACATTAGGCAAGCCAAGGAAGAACATAACAATACGG -TTAAGACCGAGACCACCACCAGCATGAGGGGGGCAGCCCTGGCGGAAAGCATTGAGATAA -TCCTCGAAGCCCTCCTGGTTAGGCTCAAGACCCTTAGCACGCATGGACTCCTCCAACTCC -TTGATGTCGTTGATACGCTGGGCACCGGACATGATCTCCTCGCCACGCATGAAGAAATCG -TACGAGTTAGAGAAGCGCTTGTCTTCGGGATCGGCCTTGGTGTAGAAGGGACGAACTGCC -ATGGGGAACTTGTCGAGGACATAGAAGTCGGTGTCGTACTTCTCGCGGATGATCTGGCCA -AGCTGCTTCTCCATCGCGGTGGAGAAGTCGTTCTCGAAGCGTTCTTGCTCGGAGACGTCG -ACGCCGGCTTCCTTCAGAAGGGCGACTCCGTCCATGTAGTTCATGCGCAGGGCCTTGCCA -TCCTTGGGGAGCTTGAAGTCGCCGGCCTTGGGGTAGGACTTTTGGATGGTAGCGATTTGG -GAGGCGTAGCGCTCCTTGAGCTGGGTGAGGATGAAAACGAGGAGGTCTTCGGCGAATGAC -AGGACCTCGTGGTAGTGGCCGCGGAAGGTCTTCTCGAAATCGAGACCGGAGAACTGTGGG -ACATGTTAGAAATAATCTGTCTTGCGTGTTTGAGCATATATATCAGTAGATCATACCTCG -GTCAAGTGGCGGTGGGTGTTGCTGTCCTCCGCACGGAAAACGGGGGCGATCTCGAAGACG -TTCTCCATATCACCGGCGATACACATCTGCTTGTAGAGCTGGGGACTTTGGGCCAGGTAG -GCATTGCGCTTGAAGTACTTCACCTCGAAGACACCGCTACCACCTTCCGTGGCGGCACCG -ACCAGCTTGGGTGTGGAGATCCATCGAGAGCCACTCTTAAGCATGTACTCAGCAAAGAGT -TGGGAAACACCGCTGGAAATCCAGGTGATGGCCTGGCTGGTCTCGGTCTGCAGATCGAGC -ACACGGTTGTCGAGACGGGTCTTAAGAGTGACGATGGGAGTACCCTCGGAGTCAACCTGA -GGACCTTCCTCGGTGGTCTCGGGAGGTGGGCGCTCAGCATCCTTGACTTGCATAGGCAGC -ATCTGCGCAGCCTCGGAGATCATGTAGACCTTGCTAATGTGGATCTCAAGGTGGCTGATA -GAGGCGGACGAGATGGGGATCTCGGGCTTCTTGACAGTACCGGTGACCTGCACGATAGAG -TTGACGTTCAAGCCGCCGGTGTACTTGATCATCTGTCGCGAGATGGGCTCCGCGGCCGCA -ATGACGGCCTGCACCTTTTGTCCCTGCTGGCGCAGCATCAAGAATGCAAGCTTGGCACTC -TGCACACGCGCATTGTCCACACGCGCCACAACTGTCACTTCCTTCTCGTAGTGTTCCTCT -GAGAGTTCGGTGAATCGTGTGGCTGGGACGAGGTCCTCGGTCGCTGGGAGGGGACCATAT -AGATGCTTGGCCGTGTCGTTGGCATCCTGGGCGGCGGCCTGAGCCTTTTCTTGCGCAGCA -CGTGCCGCAGCCTTTTCTGCCTTAATCTTATCCTTCGCGGCCTTCTTCAGGGCGTTCTTG -CTTGCGTTCTTGGTCTCACCTTCCGCGGCGGGACCGGAGGCATCCTGGGCCTCGGGCTTA -GGACGGTGGGGCAATTCGGAGTCGGCCATGGTGCTACGAGGACAATTAGAAGGGGCAAGG -TGTTGAGTTGGGGGCAGTAGATCCTACCTTAAAGATTTTCTGTCCTGAGAAATTTGCAGG -ACTTAACGCAGCAGGCTGTTTGTAAGTAAAGCTGCAGAGAAAAGTGTTCCGTTGACTCAC -CCCGCCATGCCCTAAGAGGGGTAGGCGGTGTGGGGATTCGAACTAGATATACTGTCTACC -AGCTTGATAGTATACTTGCATGTAAATTATTCCTATGTACAAATCGGTATTACTTTTTAG -CTTGATTGAAATGTATTTGACATGCTTATGTGGCTTTGTGCTGGTAGCTATTCAAAAAAT -ACACACATAAACACACAACCAGCGCCATATTCATGCTGATCTTCATACCAATATAGCGAG -TCTAGTGTTCCTTGAAAATCACGCTGCAGGCCAGCTCGATCGCTTCAGTGCCCATTGAAC -ACCTTTGTGCACATCATCAGCAATTGTCTGAGGGATCCAAGCAGGCTCTCCGCCACTGTA -TACACCAGTGCGGACAAGGATCGAGTGCCAGCTACTGCCAATTTCACTGCGATAGGAGTT -GGCTCCTCGAATGTCTGACTCTGGATTATCACCAACCATGTATACATTTTCCAGGGGCGT -GAAATTTGCAGAGCTTCCAAAGGTTCTCAGTCGGTTCTGAAGCAATTGACGTTCTGCAAA -CTCGTAGGTAAATTGATACGGTTTGCCGATCACGGTCTTCTTCAACTCAATACCCTTGCT -GGGACCACCTGTAGTCGCAGCCCAAACGCCCTCAAGTGCTTCTCGGAATCCTCCCTGACC -TAGACGGGGCAAAGGGTAAGCGGCAGCCCACCACAGATCTGGATTAGAGAAGTACAGATG -GGGCTGGCCATCCTGCTGATATCCATGGTTGGGAAGATCGAATCGGCCGTTCTTTTTAGA -CAAAGTGCCCATGCGGCCTTGCGACGAAAGGAGGAGATCCATGATGACCTGAGTGTCCAA -AGCCCAATCACGAGGATCATTGAACACAAAGACGGCGTCGATCTTCAAACTCTTCGATGG -ATCTTTGAAGTTGATGGGGGTCGGAAGCGGTCGGGCAAACTTTTTATAATAGTCGCTGAA -GTTCTTCGAGAACGGCCAGACAGCGGGATAGGCCATGAGAATATCGGCCGGGGTGACAAC -ATTTTTAAACCCATACCTATTACCATTATTCAGCATATACCATTCAATATCAGGAAGGGG -CCATTGACATACTGTTCTGCCACGCGTCGACAGTTATCCCCATCACCTCCGGCTACTAAC -ACACACTTATGTTCCAGAGCGTTCGACTCGTCCGATCCCTTGACCAGCTCAGCAAAGGGA -GAGTGACTCTGTACAATGTCTGTGGCATCTAAGGGGACTCCTAGTTTCTCGCTAATCTCT -GCGACGCGCTCTGTCTCGTGTTTACCGCCGCCATTGGTGAGCAGAATGAAGGGGATGCCT -TGCTCCTTCAGGAGAGCCAGCGAGTCGGCGGCACCGGGTATAGGTTTCGATGCGCGGAGG -AGGACGCCATCAATGCTGTATCATGTGAGAAGAAGATCAAGTGGAATGCGTCTGGTCAAA -ACTCACTCAAATGCAAATGCGAAGTTCGGGATCGGTCGAGAAGTAGCTATGGGACTAGCT -GTTTGAAGAGCTCGGTAGGTGATTGGTTTCCGATAGGAGATAGATTGGCGCTGTAGTTGC -AATAGGCCGAATGACCGCCAAATGGTTTGCGATCTCATGGTGGAATGTGTACAGAAGGAA -ATGTACGTCATTGCCAGGTTCCCCGGGGCCAAATCGGCCAGGTCTTCTCTCCGAACTTCC -CCACTCTCCACTTCACTGCAACTTTCAGATTTCTAAAACTGCCCTTTCTATTTAAAGCTG -GAAAAAAATGCATGATTTGAATCAAATACGCATTAGAATCAATTAAACCATGACTTAGCC -AGAATTTTCACCACGGTGCTTCATTATCCTCTTCAATATGTGTCTTTTCTTCCACCAATA -CTCTCCATTGAATCGGGTTTACGGGTTGTGATATACTACGCTCAGAAAATAGCTGTTGGG -ATGGTGATATGACTTATGACAGGCTGATGTAAAGAGATGTGTTACCTAGGTATGCAACCT -TGAAAGTCTACCTATAGAGAATGGGCGCTTAGCAGATTGGCTCACTATCCATGATAACAA -GACCCATGTCAGAGAGACAATCGAGTTCCTCCTCAGAACACTTATCAATGTTCTCCCAAA -CCCACTCGTCAAGTTTGGCATGTGAAGCTTCAGGTATTCTCTTGAGACAGCCAGGGCTGA -TACCAAGATAGCACACCTCAGAAAGAGTACTTTTTAAGGCAACACAAAGATCATCGGGAG -CAATCAGATCAGAAGAATCGTCGTGATATGGTGCCGTGAGCTCTAGCTCACGAATTGACG -TAAAAACACTCATTGGGTCAAGCCCAGGAATAATAAGAAAATGATGAAGCAGATCGATGG -GTATCCGAAGGGAAGTCAAAGCAATGAGGGTCGGTAAGACCTCAGATGGTCCCGCCTCAA -ACATCTCGCGGTTGGAAGGGTGAATGACCAGTCGTGTAAGCGTGGCTCTGATTTGCTCGT -TGAAAAGGATCTTTTCCAAGACGGGAGTGTCTAGATTCTCGCAACCACACAATGTTAAAC -TATGTAAGTTAGGGGGCCACCTGAAGGAGGACATCTTCCCAACATCAAATCTCCCGCCAA -TCTGTAGGTTTTCCAAATTGGATGGCCAAACTTCAGCGGAACCATCATCCGTGATAAACA -TTGAAGAGGGAAGCGTAAGCGTTTCCAGTTTGTCGAGACCGCTGATGGCTTTCTTGAGAG -CAGAAAGCGGAATTGGCTCAGCTACAATGTCAAGGCCAAGATAGGAAAGCTCCTTGCACT -TGGCTAAGGGCGCAAGGCTGTTGATCCTGAAGAAAAAGGTTGGCTTAGGATTGGATTGGA -TGTTGTGTTTGTCAAGATATACTTACGAAAAGGAGACCCTTGGAGCAATAAACGTCCTCA -GATTCTTCTTCACTCGACCTAGAAGCCGTGCTGTCAGGCTATTGGAGCTCTGATGTACCA -ATGATCCGAGGTTCAGTTCCTGAACGAAGGAGCCTAGGTCTACTTTTCGTTCTCTGGAAC -GAATTGACGGGCATACAGTGTTGGTGAAAAGCAAGAAGCTATTCCCTTGGTGAAGTTGGG -GTCGGCAGTAGAGAAATTCAATGCCAGCAGAGTACCACTGGCGAGAGACCGAACAGAACC -TTCGAAGTGTGGCCTGAGATCTGGGGGTATTGGAAATGAATTCGGCGATTTGTATTAAGA -CCTCTGTAGGAAAGTGAACATATCGGCGAGAGGTCATCACTTCGGAGGGATCGATGAACT -TTGACATTTCGCAAGTCAAAGAGGTCGAACTTCCTAAGTAGACATTGTAGAGCACAAGTT -TAGGCGAAATATCATGTAGGATGTTTGGTCACAATCAGGGATGCCGATGAGAACCAAGAA -GCAACAGAGAGACCGAACCACTTCCGTGAAAGGCAAATACACAGGTCATCTCAAGTGAAC -CGGGATTGTTAGGCAGTTTTTCCTTCACTCAGGTTGATGATGGAGCAAAAGAAGTTTCCA -AATGAGACCAATAAGAAGGGGTCCAAGGAAAATAAGTGGAAAGTAAGTGGTGAAAAAGAA -ACGAAGGTGGGATAAGAGTCAATAATTGAAGATGTAAAGATAGGCGATGACGATCCTTAT -CTTTTTCTGCCAGCAGATGCATGTCCCCCGCCTTCGATAAACAATTGTTTTACACCGCCT -TAGGTTTACTTGTATACAGTTCCAATATCGTATGTTGTATGTAGAATTACGATAGCAATA -TTAAAAATACAATAAACGACTGAAAATGCTTCCACATTTTTTTATCTGTTGTCACATTTA -GAAAATACATGTAGGTCGCCAATTCCATGTTGTTGCAAACACCATTGAGCCGCTGTCTGC -AGTTGGCCCAAGCTTCTTCGATCCTACAGGGTGAAGTACCACCTTCACCTCACCATGCCT -GACAAGATAAAAGCCACCATGTCGCCCCGCCCCTTGAAACGTCCTCGGGTATCTTATGAG -GATCAAGATGAAGCATCTTTGGCAGCATCTTACGAGCGTCCCCGTCAGCATCCTCTCTAT -GGTCAGAAGAGCGCATTTCCGGGTTTGGATGATGAAGGCGATGAATTGCTGTACGATGAT -CCAGAGGATGGATTCGAATATCTACGCATGGTTCGGTATGTTTACCATTTGGTCTCAAAA -GCCTTTTGAATCCCCTAAAAGATGCCGAATCTTGCCCATTTCTGCGCATGTCGAAGTTTG -AACTAATTTTTGCTCGATAGATCCGAAGCAAACTCACTACCTGTGCTATTCTCTGCGCCT -ATCCAGACTGCAGACCAGTCAAATTCAGCATCGAACGCGCAACGCGATTTGAAGACACTT -GATCCAAACAACCCTCCTTTACCAGCAGGCTTTTATGAGGACGAGGCCTATATTGCTCCT -GCGGGAGATCTGTACGAAGACAACGCCGCACACTCCCCTTCAAAGCTCGATGAACTCTAC -CCCGATGCACAAAGATCCTACAACAATCTTCTCCGGCATCGGTTCCTCCTCTTGCGATCG -ACCCTACGATGCAGTCCACCTGCTGGTGCCATCAACGCCCTCGACAATGACCACCCTATC -AGTCTCCCCCGAAAGGCCGCAGCTGCACACAAAACCTGGCGCCGGCTCTTGGTGACAGTC -GAACCACGCATGGTTCAACTTGCAAGCATGGACATGGAAAGCGTGCTAGAAGTCCTGGAA -ATACTGGCGCGTATGGTGTCTGATGTTGTGCGAGGCGATGACACCCAGCGCGTACGGCGC -ATTGGAGCTTGGGCTTGGGGCTTGTTGGGGAAGTGCCGTGAGGTTGGGCAACTTTCTACT -CAAGAGGTAGGGATTATTCGAAATCTAGGCAAACGAGCTGCGACGATTTTGCGCAAGGTG -CAAGAGCTAGAGAACGACGAGTACGAGGAGGAGGCCGATTCTTCTGTCGCAGACCCAACA -AAAGAGGAAATCCAGCAGAATAATCTCGTCGAGGATGAGAAAGTTACCAAGGACAACGTT -CAACAGGGGTCTACAATTCAAAATGAGGGAGTGCCCAACCAGGAAACCCAACCGGAACCT -ACAGCCCAAGGCGAGGAACGGACCAAGCACGGATATCAGCTGGAGACTAAATTTCTGAAC -AGGGAGGAAAACAAGCAAGAAGAATTGCCAGATACTGCAGAGGCAAAAGACTCTAGCATG -CCTGATGCATTGCTAGAACAACCCGAGTTGGAGGCGGCTAAGGCACGCTTGCGGGCTCAG -CTGCAGAACAGTCGCGACCCTAATGAGACACCTGCTAGCTGCGAAGAAGAAGAAGACGAC -TGGTCGATTCAAACCCATGCCCTGCTCGACATGATCATGACGGTGGTTGGTGAGTTCTTT -GGCCAGCGGGATCTGTTGGCTGAACGAGAAGTCTGGGATTGAATGGATTTAATTTGTATG -TATGAGATACCCATAGTTAGTTAATATTAATTCCGTGTTACAACGTGCCCATGTCTTGTG -TTCATACGTAGTGCTGATCATCCCGAGGTCATGGTGCTTAGTGGCCTAAGCTTCCAAGGT -TTTGCGTCATGCACCAGCAACCTCTCCCCTAATCTCTTTTGTTTTCCCCAACTGTTGAGG -GTAATGATAGTTCCGTTTCAATTCTAATCAAAATGGCGCCAAACGTGGAAATAGCGATCC -CCAATACCACCCTATCAAACACCTCCCCGCCATACACCGTCTACCACATCACCCTCCGCC -TCCCGCTCCGCTCCTTCACGGTCCCCAAACGCTACTCAGACTTCTCAGCCTTCCACAGCA -CTCTAATCTCCCAAACAAATGCCCCACCACCAGCTCCATTACCTCCAAAAACCTGGTTCT -CGAAAACAGTTTCGAATGAACGATTACGCGAAGACCGTCGCCGTGGCCTAGAGGAGTACC -TTCGCGCGATCAATGAATCTGAAGATGGGCGCTGGCGCACTTCGCCCGCGTGGCGTGCAT -TCCTCAATCTCCCCACGGCAGTCGCATCAACGGGTAACGGATCAACAAAGACCTCCAGCC -GACTCCATGCCGCCATCACAGACCTAGCCGCGACAAACGCATCTATCACAGACCCGACGC -TATGGCTCGACTACTACCGCGACATGAAGAATCACCTCCACGATGCACGACTACAACTAT -CTCGCAGAGACCAGGAGACAACGCCTCAGAAGCAACACGAGAGCTCTGCACAGGCGAAGG -GCAGCCTTGTCCGTGCGGGGACAATGATCACCACTCTGGACGATGGTCTGAAAAATCTAG -GACGCGATAACAGCTCCGCATCATCCGGTCTCGGTGGAGGCGAGATCCGCCGACGAAAAG -ACCTGCTCATTAATGCGCGCAAGGAAAAAGACGGCCTGGAGGATCTCTTGCATGCGATGG -CAACCAAGAGCCGGCTTGACCATACTGTCGCATCTATTCAAGATAAAGAAGCTTTGATGA -ACTCTGGGGATGCGGCTAGTGGGCGTAGGACGCCGGCGCGAGCGGGCAGGGTCCTTGGCA -AAGAGACGGAGCGGACTCGAGAGCTTGACAATGAGGGGGTGCTGCAGTTGCAGAAGCAGA -TGATGCAGAGCCAGGATGATAATGTGGATGAGTTGAGGAAGATCATTCTTCGACAGAAGG -AACTCGGCACGCAGATCAATGAGGAGCTCGAGGTGCAGAATGATCTGCTGAGGCTGGCGG -ATGAGGAGGCTGATATGTTCGTTCACATCCCTTTCGCACTTTTTCCAGTTGCTTTGATCC -ATCTTGCTAACGAGTTGCCTAGATTGAAATCCAAGATTGATATTGGAAGGAAGAGAATTG -GAAAGATATCTTAATTCTAACTGCATGTGTTCCAGTACGTGTAGAGCACATCTTCTATCT -ATACCTTTGAGCGGATCTTTTCAGCACCATTCAAGCCAATTTATCTGCAAATAGCATATT -GGTCCCGTATATCAATCTTCTTACTTGCGCGGATATCGTTTTGCCCAGTAATAATCCTCT -AGAATGGGTAGAAGGTGCAGGTTTCCATTCTCAGTCGCTTCAATCTGGGCCCGGTCAAGT -CTATTCTTCAACTCCCCCAAAGGAATTTCTTTCCGATCAATTGCATCAAGCATGAGGATG -AGCACTGCTTTATCGCCCAGTCTCACACATGCTAGGAATGGATCATCGTGTCCGTCTTGG -CCTGTCAAACTCATGCGCTGCCGCACAAATTTCACCATTCTGCTGGGAGGACCGTCATCC -TCAGTCCCGATATTAGAACTAGGGATTGACCGTCTACTCATGAATGGCGAGTCAGAATGA -GTGTGGGCATCACTTGTCTCTGTCACGAGTACACCCCAGTGGTCATGCTTGGCGAAAGGA -TCTGCGCCTCTCTCCAACAAGAGTCGTACCTCTGCTTGGTCGCAAACCCACTCCTTTTTG -GCATCGACATGCATGACTATCCGGTACAAGGGCGTCATGCCAGGATAACACCGTTCCTCG -ATGTCAGCGCCATGGGCTAGGAGCAGATCAAGAATGGCTGTAGCATTACTGCTACCGTCA -GTGCGAGGGGAATATGTTGCAGCGACCAGCAAACATAGCGGAGTGTAGAGTGAAGGTACT -TTGGGATCGACGCCTCGTTCAAGTAGTCGCTTGACCACTCCAGTCATCCCAAATTGGACA -GCGTGTACTAATCCTTCTTGTGCCTCGCTACTCTCAGGATCGATTCTCAACAGTCCCCGA -TCCAAGACCAGCTCTGCCATCTCTTCTCCTCCCAAAAATGCCTCTTCTATCAATTTCTTG -GGAATCGTTAGGAAGCTAGGACCTTGGTGGTTTCTAGGCCTCTGTTGCTGCTTGGGTAAC -GGAGGCATTTCCAAAGCAATACCTCTCTCCATCAATATTTCCACCACATCCACTGCACCG -AAGTACAATGCTCTGGTAATAGCGTTTTCCGCCGAAGATGGATTGAAAGTGCCATCAGCA -CGTATTGCAGTTTTCGTCGGATCAGCTCCTCGATCAAGCAACATGCGAAACATGTCGGCG -GATACCATCGCAGCCTCGGCCAAAGCCGTTTCTCCGCGGCTGCTTATGCAGTTTATGTCA -GCCCCGTGGTCGAGTAAAAGCTCTGCAATTTTGACATTGCCTAATTGGATTGCCAAGGAA -AGTGCGCCTTCATCCATGATTTCGCCGTCGTACTCCTGTGTATTGGCACCATGCTCGACG -AGGGCCGTGATCTCTCCCTGGTACTCTTGTATATTGGCACCATGCTCGACCAGAAGTTTG -GTGATTTCAATATGGCCATTGTCGATGGCTTGGAGTATCGGCCTCTCGTACCGTGTCTCA -CAGCGACGATAGACTCCATCTGCGGAAAAGCCATGCTCCAGAACACGCTTCACAATATCA -TCCCACCCATAGCCAATGGCTATATAAAGAACCTCGGCTTGTGCTTCCTCATCTTTGTCG -TTTATCATATCCTCAAAGTCGGCGAAGGTTCGAAAAAACTCGGCAAAAACAGCCTTTTTC -TGCCTCTCAACTAATCTCCAATTCGGATCCCCGATTATCGATGATAACAAGGCGCATTTT -TTCGTTAGTTCCTTGGGGTATATGTCCAATCCTCGCGCCAACAGAAGATCAAGGATTGTC -AGTTGACCGTGGATAGCTACATGGTAGAGTGTACTCAGCCAACCTTGAACCATATCAGGA -TCGGGATCTGCTCCATGGTCTAAGAGCACTTCCACAATCTCCACATGGCCATGGCCAGCC -GCACATGAGAGGGCCATTGAACCACAGAATCTTTTGTTAGGATCTGCTCCCCGTTCCAGA -AGCAATCTGACCATTTCCAGGTCCCCATTTTTGGCGGCAGACATGATAGGCGGCTCCTCT -TGAGGCGAGGGCTCGGCCCCTCCATCCAGCAAATATTCAACCAATTCGGTGTTGCCCTGA -AGAACCGCAGCATGGAGTGGTTGCTTATGCCAGTATACGCTGTCTGCGTCGTTCAATGCG -TAGCCAGCCTCGATGAGAACTTTCACAACGGCTAGATGACCATTTTCTACTGCTGCAAAT -ACAAGATCGAAGCATGTTTCACCGCAGCAGTCGTGGTCATGTTCGAAAGTGACGTTATGG -GCAACTAAGAGCTTGACAATCGCGACGTTGCCACGCACTGCCGCCAGATATAATGGATGT -AGGACATTTGAGGCAGTCTCGGATCTCTGATCTTCACACTGGCCGCAGCTCTGATTCTGG -ATGAATATACCAGGCTCTGTGTTTTGTTCCAGTATCAGGCGCACTATAGTCTCACTGCTG -TTGTCCACTGCCGCGACGAGCGATCGTCTGAAGTTGTAATGACAGGAGTGTGTGGAGGCT -CCAGCATCAAGTGCTCGATGGATGGTCAACTCGGAGTTATGTTTTGCTGCCCATACAAGG -GGGTACCCGTGTAATAAGCGATTGCTGTTCTGATAGAGAATTGGGTTTAATAACAGATGT -AGCCCCGAACATGTTTGCGCCAAAGAATTGAGGTAACGTGCTTCTCTGAGAAACTTGACG -ATTATGTGGATCAGCTCAGCTGGGAGATGTGTGAGAGACATGGCGGGCATGTTGAGTATA -GAAATCTATGTGAGAGAGGAATTGAGTTTCTCGGAAATCGCAGACAAACAAGTGAAGTGT -CTTAATATTACGAAGGTGGGGGTGTAGGCAGTCACTTGGCGGCCCCTCTATGCAAGTAAT -CCTCGAATTCCTCTACTCCGTTATCTTTATATATATCTATGGAAAGAGACATACTATATC -TTGCTTAAGTATCTACGCGCTGAACCCACTTTGAGAAAGACATGTATGGCGTTATGTACT -CCGTTCGCCATACGTATATCAAACATGGGTAGGTGCAGGCTTGGGTCAGAGCAGTCGGGT -TATCTGGTCATTCCATGCTGACCTGCATATCCGACCCGGCTGAGTTCAGCTTATGCAGCC -CTAATGCACTGGCCCAGCACACAAGGCACATCTCTCGAGACACATTTATGGAATATATTA -TTATTGAGTCAGCCTAGCCATATGATTGATGGTTTTCAGTGTGTTTTTTTTCTATCTCAA -ACAAATGTGACGTTATCTACATCACCGCTCCACAGACGTTGACACATCAACAGATCGGGT -CACTCCTATGCCACTCAAGGTCAATCTCGGCAGATCTCCGTGTTCACTATTACCGTGGAT -GACTGTTTCGATCTTCGGATTCCCCTGTGCATTGTACATGAGCACATCTTCGTCTGAGCC -TGTGCCTCTCACACTCATTCGATTCTTGCTTCCGTTCATTGTGCCCTGGTTTCGCATTGT -GCTCATTTTGTAGTCGGACTCGAGGGTGGCTTTGTTACTGGAATATTCGTGGTGGATGCG -GATAATGAGAGCCGAGAGGGAAGGGAGACAGGTACAGATAATACCGATGGAAACTTCCGC -GTTACTAAAAACAATTTCTCAGTTAGAATGTTGAGTCAAAAGGATAGGCTCAGAGGAACA -CACCCTAGCATGTTGATTCTCATAAACGCATGAGAAAGGTCTTTCGACTGTCCGGTAACG -ACAATAAGTGCCAACCGGATGATACTGGCTGCCACAGCCAGACCCCCTGCTCCCAAAATA -CCCATCACCCGCATTTTTTTCTTTGTAGACATTTGCAGAGTCAATGTCAATGGCAATGGT -AAGATGAGAACGATCATATCACTGACCACACTGACAACAGCATCAGCCAAGATGATCGAT -GTCTGGTCTAAACATGTCCCATGGATGCTCTGGTCCCAGAATTTGGCGATGGGGTTGCAG -ATTCGGATTTTGACAATGACAGCCGGGATGTAGTAAGCCAACATGACTCCCATGAATATT -TTAATAAACATGACGCATTTTCGGAAGGGACTGAATACACGAGTCATGATCCAGAGGAGG -CACACTTTCGTGAGATAGGCTGTTGGTCCATACATGACCAGTGTTGCGTAGATAGTCTAG -AGAGTCAGAAGACGGGATGATCAAGTATTCCAGTACTTTTCATACCTTTTGGAAAGGAAT -CTTGTTCTCATCGGGGACGTTCCACCAGTGTAAACCTCCTCCATATTGACCCACTATCCT -CGCGTTAGATCGAGCTCGTCGCATGTGGTACTAGAGACATACTGAGGAGGGCAATGACTG -AGTAACAGACACCGAGAAACTACTCAAATCATTTGTTAGCCTAAGTTTCCCCCTTCTTCT -CACATGTTAATCTTACCCAGGCAACTGCACATGTCACTATAGAGGCTGTTAGCATTTCTC -TGTTCGGTGCGCTTCCATCCAACAATCCCACTCACAATCCTCTTTTCCAAATCCGTTGAG -AATAAACAATCGAGTATAAACACGAAGACCGAAGAAGGCTGTCATCCCAATGATACATAA -TGACTGGCAGGCGTAGTTCCATTGCTGCACACTCCGATCAATGGACACATCCCCAGAGCC -CCGTTCTAACAGCAACATGGAATGCATCACGGGTTTTAGTAAGTCTAGATTTTGTAACGG -GGGACGGCTGTTTCAAGACAACCCCGTCTTTTAAGCAACAATATGATGACCTCGGAATCT -AGACAATAAATCGGCTCAAAAACTGGGTCAAATTTCAAACTGACCTGCTTCCACGAACTG -CATGGTTGAAATCGGGCTATGATCGCCCACGATCGGCTCCCCTGAGATAGAAGAAACACT -AAAGCTGTCAATGAGAACTCTAGTAAGGTGCCAAGCATACCAACCAACACCGCTACCCTG -GTGTTGGTGCGCCCTAGCAAACGCCCAATCTTCGGAATTATATCTTGTATAACTTGCCCA -TACAATGGATACTTTGGCACATAAACATTTTTCGAATCTTATTGTCCTTTGAGATAGTTA -GGAGGTATAGGAGGGTGTACATAGTGTAAAGTCTGATTCGTTTATCTAAACTATGGTAAT -CAAATGTAAAAGCAATAGTCTTTTATGTATCTAGCACGTTTCTACTCCATATATGTAAGC -TTTTATGTATTATGATTCTTGAAACGCTACACTTCTTTCATGTAAGATACATGAAATGAA -TCGGAGTATTCAATCAATTAAGCCAAATTTATATTATATAAGTATCCTTTCTCCCTCTTA -ACTAACTAGTTAGTGTGTGGGTCAAGGAGTGTTAGTTCATACCTGTGAAGTGTTTAAGTA -CTATTCTTTGTATAGGTAAGTCTTTACCTAATTTCCGAAAATTGGGCGTTTGCTAGGGCG -CACCAACACTAGGGTAGTGTTAGTACTTGGTGTGCTTGGCATGGCTATTTGTTCATAAAT -GGAAACATGTGGCCGGAGCCAATTATCCATGCATAAAGTCTGTGGAGTGAAATGCTTGAC -GATGACTCGTTCCTCGGGTATACACTTCGCCAACCCTTTTTTTGTCTTGTTTTGACACTG -TATAGCCTTGATGCACGGCGTAGAAGCCGCACATAAAGTACATCGTGGCTTATATTCAAG -CTTTTGTCCTGTCATTTAAATAACACATAGTACATACAAGACCTGTCACTCTAGATATCT -TACAGACAAGCTACTATATGTTGCACATATAAAATGCCGAGATTAAGTCAATCAAAGTGT -TTTCGGCAATAGCTTAGCCCAAACATCTATAAAACTAAGAAGATATAGGCACCTCTGATG -GTAGGTAAGGTTTTAATTACAGACCTCGGGAATTCTGGACATCATATTGAGTTAGTGGCC -AGAGCTAGTATACGCGGGAAGTGGACCCCAAACGGTCTCGTCAACCATGGTTGCGTAAAG -CAAAACGTGATTTAACTTTCGACCCACGGCATTATCTTAGACAAGTGCAGTTTCGTCAAA -GTGGAACTTGAATTGTGGAGCACAGGGTCTATCCTCTGGTGGAGAGTCCAAGTTCTAGTC -TAGCCCCGGAGGAAAAGCGAGGTCAAAAATACGAGGCAATGTCCACATTTGAATAGTGTG -CTCTCTTTGACCCTACTAAGTAATTGAATCGCATATCATTATTCACGGTATACTAAACGG -CTATTGGAGTAGTTTGCCGTGCCGATATGTATTCTCTGGTACGTTCGCCCCGAGCCTTCG -ACCCACAGTCCATAATGTAACATATATATTGTACTCATGACACTTGATTGTCCCCGAGCT -ATCTTTTGTTGCCCCCAAAATCGCTTCTTTTGCACAACATAGTGGGGCTATCATGTCATT -TACACACTTGGAGCGGGACGGCACCACCCGAGTCAATTGCCGGTCAATAGGGTTTACGTA -CTAGGATTTCTGCCGAAAATGACTATTCTTGATTGAATATGTGACAAAATAATCCGTACT -CTCCTCGGGCCTAAAAAATAGCCATCATATTCGGAGGCGAGGATACTCCACCGGGCATGT -TTAAAGGCACGGATGCCACGAAAAAGCTCCACCGCTGGAGTTCCTGGCAGGTGCGTGCCA -GTGCCTCAAGATCGAATAGCTCCCCTGTTGTTTGTCAGTCAATATAATGGATCTGCAGAT -CTTGGACAAAAACGCTCCGTCAACTTACCGATCGGCATCCCCCATCCAGCAAGCACATAC -TCATGGAGGAAGATGTCAGGAGATTGTGGCGGGTAGACCTGTGGGTCCGTCAGTTGATTT -TCTTGATCGACTCGACACATGACGAGAATATGGTGGAATATTTCTCTTATCACTTACCTC -CCAACTAATTGCATCACTAGCGATGGCGGCAAATCCGGTGTCCCACAGCCAGCGGAGAAC -TTCTGTGGTCGCCTCAACTCCAGCATGCTCTGGGCTGGGATTATCCGCATAAGCTTGTTT -TTGGTCGTCTGTCATGGACGTGTCCCACTCCTTAGTCACTCCTACACGGACGAAAAGGAT -ATCACCCTTCTGGAACGTGATATTATTCTCCTTGGCAATCTCGAGGATATTAGTGAACCG -CACTTGGTGGGTCGAAAACGTGGTGTATTTGATCCCCTTTTTCTCGGCCCATGTAGCGTA -GTCGATCAAGACTCCACGTCCTATCGGTATTGCATGAGATAATATCAGGGGATTGATGTG -AAAGGATGAGTACCTGCAATGCCTTCTCTGGCCCAATGCTGAAGACCAATTCGATCATTT -TGGCGATCATTGATTTCTTCGCTGGTTGTTCCACCATAGAACAAACGCTCAGATTTACCA -GGCACTGTCTGCGAAAAATGGCGGAGGCCATCCCACTGGCTACTTTGCTCTGTCAAGGAG -TCAATGTCTCAAAAACGAGAAAATTAGAACTCATCTCAACTCACGTGGATTCATAGTATA -GATATCATCGAATGCCACACCCCCCAGAAGAGGCACAATTTCATGATGGCATGGCTGACG -GTTGAGATTAGGATAATCCAATTTCGTCATCTCCCATCCCATCGTCACACGTCGACCACA -TCTGATCTCGCTGGCAGAAGCCGCGGCGACAACTTCAGGGGTAAGAATCGCTAGCTTTCC -CAGTCCCTCCTCATAGCTACCTGGCTTTCCTAACCAAACCTGTTTTGGATTCGGAAGCTG -GTCAAACGGGATGTCAAAAGGGGATTGGGCCATTGTTCCGATGTTCTGTCGTGTCCGCTG -GTCGGAGTTGTAAAAAAATATATGTACTTTTGAGTCCCGGGGAATGCCGTAGTCTCGTTC -TAAATCAAAGGAACTAGCATATGGAGTACTCCGTACAACGCATAGGCCTTTTGCCGACGC -AGTGGGGATGTAATTTGTGAACTCAATAGTGTCGGACCTGCACTCCCATGACTCCACATA -CGCATCCTTGGAGAATACAACCAACGTAGTACATAGAAAACCATAGATGATATAGAGCAT -AGCATCTTGGAAACAAATTTCAACAAGCGAAGATTAGCTTATGAAAATATTTTGAGATGT -GTCTGGACTCAAACAAAGAAGTTCCCGGTGATCGGCATACTAGTGGTATTCCTACGAACT -TTCTGCAGCCACAACTCCACAGAGTTAGTTCCCCTGCTCAGCTCCGTACAAAAATGCTGG -AATTTCTCTCACATTCCATTCTGATGCTGGTATCCGTTGTTATATACCCTTGATTTATTG -AGGGTCAGAACTACAACAGACCATTATGGGTAGATTCGGGTCTCAGACCAGTACCTATAA -CAGGCTGGTTATTGTTTTTGTCGCGATTGGGTCAATGGTAAGCTTGAGCATCTTTTTGCA -TGACTATTTACTGAATTTACACGAATAGACATATGGATATTGCTCTTCAATCATCTCTAG -TACAATTGGCCAGCCAGGATGGTATGCCTACTTTGGCCTGCCCGCTGAGGGTGAACCGGG -ATACGCAACCATCACTACCCCTGCAATCTCAACGGCAAATGGTGTCTTTAGTGCTGGTGG -TGCTGTCGGGACACTTTTTATAATGTGGTCGTGTGATTTCTTCGGCCGAAAGGCGAACAT -CCAGTTCGGTGCCTTCTTTTCACTGTTCGGAGGAGCACTTCAGGCCGGCTCGAATTCACT -GGCGTATGTCACTTCTGCCTTATTCCAATTCAACTATGGCTGACAACTTCCAGCATGTTT -CAGGCTGGTCGCTTTATTTGCGGTCTTGGAATCGGTATTCTTGTAACCGTCTGCCCTATG -TATCTTTCCGAGATGTCGAGTGCTCTGCGCCGCGGCTGGCTGGTCGGCCACCACGCCATT -TTCCTAGTTTTTGGGTATATGCTTTCAGGATGGGTGGGTTATGCTTGTTATTATGCAACA -GGCTCCCTTTCGGAGTTTGGTTGGAGATTTCCTCTCGCCTTGCAATGTCTGCCTGCCTTG -GTGCTGCTTTTGGGGTCCCCTTGGCTCCCGCGCTCTCCTCGCTGGTTGATATCGAAAGGT -AAACTCGATGAGGCGCAGCATGTTCTTGAGCGGCTCCGTGAATCACCAGATGACCCCAAC -AACTTGGCTGCCAAGGAGGAGTTCTTCCAGACTAAAGAACAGATCAAACTCGAGGCTGAG -AAATTGGCCATTTATGGCAGCGTGTGGAAGGCTGTCTTCACAAGAAAGACATACCGAAAG -CGAATGGCTATCGGTTTCCTTACCCAATGGGGCGCTGAATTCGGCGGACCTTTGATCATC -GTAAGTGTGGCAATACCTAATTCAAATTATACCCCACTGACAAGGGCTAGAACAACTACG -CCGTGCTTCTGTATACAAATTTGGGCATGGAGGGTGGTATGCCTCTACTCTTGAGTGCTG -TCTGGCTGACAACAGCTGGTGAGGATAAACACCCAAGATCTACTATGTACCTCGTTTACT -AACCTAGCTCAGGTGTGATCTACAATCCTCTGGGCGCCTGGCTCCATGATAAAGTCAATT -CCCGGCGTGGCATGTATATGATCGGCTTTGTCGGCATTATCATATCTACATCGTGCCTGG -CTGCTATGACTGCCCAATATGCTGGCACAACCAACAAGGTCGGAAATGGATTCGGTATCT -TCTTTATGTACCTTTATCTGGCCTTCCAGGGGTGAGTCAAGTTTAAACATCAAAATAGCG -ATATGTCGAGTTAACTTTTCTGCAGTACTTTCTGCGACACGACCATGTACCTTTATGTTT -CGGAGATCTTTCCAACTGAAATCCGTCCAATTGGCATGGGCTTTTCTCTCTTCGGTCAAT -TTGCTTGTAAGCTGTTCGTTACAATTTTATCTTGGAATCCTAACTGACAAACGTAGCCAC -CCTCATCCTCCTGCAAACCGCCCCCATGGGCTTTGGCAACGTTGGCTGGAAGTATTACCT -TGTCATTATTTGCTGGTCTGCATTCTTCATTCCGGGTATGTTTTCTTCCCGATCAACATC -CGCCCGTCAACTAAAACCCAGTTAACATAAATTGCAGTTATCTACTTCTTCTTCCCCGAG -ACCGCTGGTCTCACTCTAGAAGAGATCGCCAAGAACTTCGGCGAGGAGGTTGCCGTCAAC -CTCACCGGTGCCACAGATGAGGAAAAGGCTCGGCTCGACCACCAGCTAGCCACGGGTGGG -AACATTGAATCACCTATGCGCTCAGAATTGGAGAAAGAGGCCAGGCTGTCGTCGGGAGAT -GGGGCTGAGCAAAACGTGCCGAAAACCGACTAAACACATCGAGTATGTTTAGCTTTTGAG -CAGAGAGTGGCATGCAGGCAATAGATATTTTCCGGCTTGATGATTTTGAAATCACAGCTA -TGTTTGGCGCATATATTATGTTATTTTGAAACAGGCAAAAATCCCCTGCACTCCTTTATC -GGGGCTGGTTGGGATTGGTTATATTTTAGAGTTACCTAGTTAGTGATAACTTCAGCTTAT -AAATCATCAGGAGCCTCCCTTTTCTCGGGATCGAAACTCTTTGACTCTTGTACCTACAAT -TCTCAATTATTTCCCCCAATCACAATGTGCTCAGACCTGAAAGTGACTTTGTTTCCTTGG -GATCCCAACTCGGACCTTCATGTGAGATGTCTTGTGTCGCAGCGAGTCGAATGCAGCTGG -GACCAAGAGAAAGTTGAGATCAAGTGGAAGAATCAACAGCTCAAAGGCGAGAAGTGCATT -TATTGGATTGTGAGTTTTATTCGATATTACATATTTTTTCGTTGTATTCAACAAGCTGCA -TGGCAGATATGCTTACATTTTGTGAAAAGGTCCTTCCATCAGATGAGTCACAAGCGATAT -CGCTAAAAGTAAACATTGGAAGCCAACCGCGCAAATTGAAGAAGTAGCTAATATTTGACT -GCTAGAAAGATGAAATACTTCAAGACACAGCTACAACAATCAATGCAGTCCCACGCCAGC -CGACAAAAGAAAGCTTCATCCCTATTGGCCATATTTCACTAGATTCTAAGAACCCCGATG -CAGAGCATCTTGAGCTTGACTTGCCACCCGGGAACGTCTTCTGGATAAAGACATTTTATG -TCCGGCAATCCATCCAGGGCCAGGGAATCGGGCGAGCTGCAATGGATGAGGTCGAAGCCA -TGGCAATCCGAAAGCCACTACTAGCCAAGACATTGATGCTAGACACGGTTCAGAAAGACG -ACCAGAAGAGAGAAGAATTTGCGAATGCAACATATGGTGGTATTCCAAAGGTAAATCTGA -CTACACAATTTGAGGTCCTCGGTCAAGTAACTTATATCGTTTGCAGACTACGAACGAGGA -ATGGTATGCTCGTCGAGGTTACCAATTGATCAAGACAGTGCAGAACTATTATCGAATCCT -AGATAAGAATGGGAAGATCTGGGACACTAGGACAGTTTTCATGAGAAAGGATATTGCGGA -GGATCAATAAAGGTGCAACATGGTATATATCGCATCGATTGATAGAATTTCTGCGTTATC -TCTCTCAACACTATACATGCCTTGAAATTAAATATGTAGAAAGTCATAGATTAATAATCC -CAAGTTTTGCCATCGAATTACTGTCCACGTACTTGTCTCATACTTCGAGTTTCCCAGGTG -TGACACGCTAAGCCTAACTTGGACGAGGCAAACTGACCACGGGTATATTTTTCCCTTTCT -TTTTTTTCTGAATCATTTACCACTATGCCTTGGTCCTCGAACCATGAGAAACGACGATCG -GCGTTCTGCATCGCCTGTCACGCGTAAAAATGGCACTTTGCAGTCGTGTGAACCTTGTCG -GAGATCCAAACAGCGATGTGACCATGACCGGCCGATTTGTCGCCGATGCACTTCAAAAGG -AATCAGCGATAAATGTTTTTACCATCCAGCTCCAATGACTAGACAAAGACCTTCCGATTC -AGCAAATTCTTCGAGAGTTTCTAGGCGACTGACCAGTCAACCAACCTCAAAGTAAGCGCC -TCCTGTAACGGATGATAGACCCATAACTAATATGTAAAAACAGTGCTTCCAGCCCAGGCT -CAGGCACACTGGCGAATGGGCGTTTCTTCGCGCCATTGGAACCACCCTCAGCTCTTCCTG -GATATCTGGGCTCGACAAGCTACTCTGCAGTTCTGACTGAGCACCGCAGTGATATTTCGT -TCGAGCCTGAAGATGGTACAGATCCCTCTGCAGGGTGGGTAGTAGAACCCGATCGACTCC -AATCCGGGCTTGATGTACTGCAGTTTCTGTACAATCTCACTCTGCGTGATTTGCTAATTG -CACGGTTCTATGCTCGGACATTGAACATCGTCGTGCCCAAGATGGTAATGAACGAAATCC -TGCGCTCAGTGCGCCAAATCTTCGACGAGTTCTCTAGTGATCCAACAGCACAACTGCAAG -AGTTGGCGAATCAAATATTTCAAAACACCTCCCGCCCGATGCGGACGCACAAGTCCATGT -CACCCGAGGAGTATCTCTCCTCTTTCACCGGTCGCAACCTCCGGTGGGAGGCTCTCGGAT -CTATCCTCACTCTCTGCGGAATGCAATTGGCCATTACGCCAGATAACGATCCTGACATTA -CGCAAGGCTCGGATGACCCTCGAGCAAAAGACCGCTTATTAGAGCAAGTGACTGTGATAA -GTACAGTATGCCTTGGCCTCTGTGACCAGACATCGTCGGCAAACGAGATGCTGGCAATGT -TGCAATTCAATGATGTTATGTTACGAACACAACAATATGGTGATTCCAGTAAGACATTCT -TCATTATCCTGGTCCCTATAGATCCACTGACCACCTGTTAGGCTATCAAGCATGGCGTCG -ATTGGGTGACCTTGTATCCACGATTTACGCAGCTGGCCTACACTTGGATACTAATGGTAC -TGAGAGTTCTGAGAGTTGTCCATTCTTTCTTCGCCAGTGGCGGAGGCGCTGCTTCGTTGG -CGCCTTCTACATGGATAAAATGATAGCAACATTTGTCGGACGACCGCCACTGATGAATGG -TCGATTCTGCACACTCGCTGCTCCATTGGATCTGAGCGACGAAGTTCTTGTTGCAGGCGG -AGATGTCCTCAGCAAGGCTATTTCCGAGCTAGACAGTGCCGGGTGGAATACTGAAGGGAA -AATACACCTAGTGACCCCAACTCGCCTGCGCTATCAGCTGGGTATCATTCGGGAAGAAAC -CCTTGAGGTTGTACTGGGGACTTGCGAGCAACACGATCTAATTCAGAAGTCCGAGTAAGT -ATATCTTCTCCTCCAACCCACATGTTTTCTTGGTACTGAAGTGATTTTTTTTTTTTTCAA -TAGAGAGATCCAAGCAAAATCCCGCGCCATATGGGCAGCTACTCCGGATCAGCTTCGGTA -CGACCGGCGATCGAACGACCATTTCCATGATGCGTGGCTTACCATTGTCTACTTCTATTT -GGACTATCTAATGACCTGCTTTTTGCTCTACCGAGCAGTCGTCAAGCATACGAACACCGG -ACAAGCCGATCTTTGCGATGTATCTCGTCGGGTGCTAGCCATTGTCATTCAGATTAATTC -ATTCCGAACCCCAATGGTTGATCTTGACCGGCACTTTTCCTGGATAGTATGTCGCCCATG -GTTGCCATTGATAGAAGAGATACATGTGCTAATAAATACAGGTCCTCACATACGGTGTCC -CTTCCGCCAGCGTCCTTCTCCTTGAACTCCTGCGCCAATCGCATGAGCCCGGTCCACACG -CTGTTCCTCTCCCGCGCGCTGAGTTGATCCGCAACCTCAGCGTGTTTATTTCCTTCCTAT -CCTGGGTCGCAGGCCCCGGTCACGGCAACTATCATACCTGCAAACAAGCCGAGAAAAAGC -TCTCTCGCATTCTTGATCAGCTACTCGATCCAGAGCCCGTGCAGCAGCATGTGGTAGACG -ATGTTACAACAGGGCTCGACAACTTCCTCAATTGGTCGAACTACAATACGATCTGGGACT -TCAACGCCGATTATATGCCTCCTGCCGAAGGCTTTGCTCCTTGAGCATTGGTCACTATCT -CGGTAAGATCATTTGGAGGTCAAAAATATATTCATTACGTCGTGATTGTAGAAACCCCTC -TGATCGCTGGCTGGTATCTGCTCATACCATCCACACATAATCATTACGGGGCGGTGTTTC -GGCCACATAATAGGCCCGATGACACAGCGGGCAATTGAAGTGGTCTCCCAAGAACCACTT -TTCTAAGCATTCCCGGTGGAAAACGTGCAAGCACCGCAATTCGCGAATCTCCTGTGTGCG -GTCTACTTGCTCCAAACAAACGGCGCTGGATCTGATGTCAGTCACTGCTCTCCCAGTAGA -GCATGGTTAAAAACATACCACACGAATTGTCCGTCGACGGCCTCAGACAGTCCAAGATTC -CCTTTTGTGGTCGTCCACCACTTCTCGAGCGTGCAGGTGGGCGAGACCTTATCCAACTTG -CGCAACCGCTCACTGGGATTTTTCCTTGATCTTAGGAATGACTGCCTGTCCCGGCGGTTG -ATAAGTATTGTCAGACTGGATGATACAGTTAGGATATGAGGTGGATTGGAAGGATTCATG -TAGCATTGACGGAGGGAATGTATCGGTAAGAATGGGGCGGATGGAGGGGACACAAGAGAT -GACATGCAATAATATATACAAGATGCAAAGAAATCGAATCTCACTTACATCAAACTAAGT -ACGATGAGTAGCACCACCCCAAGTCCGATGCCTAACACGATCGCCATATGGTTCTTCTTC -AGTTTGCTTTCTTCAGCCATTTCTCCGAGTCTAGGATTCTCTGTTGCACGGGTCAGAGTT -GCGTCGAAGAGATAGTGACCAAGGTCGAAAGCGAAATCCAAGACCGCACTATCAAATGAT -CCTGAGTTGGTGATCACAAGCTAAAAGTAGGTGCCCAGGTGTGACAGCCCGGTCGCCGTA -CCGGTTCAGAGAGATCGCAGTATAACAGAATAGATTCTCCCCCCAAAAATAGGGAGAAAG -ACACAAAAAGAGCTGGAAGACCAGTTAGAACCGGAGGAAATGAGTGTCGGGTCTGAGGAT -ATCAAAATCCAAAGAAGATCAGAGTTGTAGCAAGGAATATCAATCTGGGGATCTCCATGT -CCAAGCGTCAGAGCGGGGAAAAAGATGATCATAGCTAGTATTTGCTAGCATTTGCTATTT -GGTTAATTTCTTGGGCATTGAGGTATTGAACCAATCAACATTATGATATGAAGGGGGGGA -ATGTTGATCGGCTGGATTGTCCTTTTGTTGTACGGGGGGTTAAATAAGTACGGAATAGAT -ACTCTCTTATTATTCGTCTGTTGTATTAGCCTTCGACTTCGTATTTCAGATTGGTGCCAT -CGCAATGTGCCAGATGACACTGTTATAAAGGCTAATGTGCCTCCCTACATTATCATTGCA -AGGGTCGTATTGTAGATGGACACATTCCTCGAAGATTCTAGCTGGCAGTGTTTCACTCTG -TTCTGGTATTCTAGACTCCTACTTTATTTCAAGGGACTGTTAGGGTCTAGATCTATGTCC -GACTTGGGGGATAGGTCCAATACGCTCAGCAGGTAGAAGAAAAAGTCAATCAACGTTGAA -GAGCTACCCCGGCCCGGACCCGAGCTAGATGTTGTAAGTACTTATTGACCTCCATACTCT -GAATCCATTGCAGATACGCTTTGCTCTTTTTTGTTGAGTAGTTGCCGGTGGTGTTATGCA -TATGTGCTATGTCACTGCTTCAAGTGGCTTGCTCAGTGTGCGTCTACCGAAGAAAGAATC -AATACTATTGGTTTCTTGGTGCACACATTTGGCTTAGTAGACTGGGAGGAAGATGGGACA -TCAGAAGCGATGACATAGCCAGTAAGCATGCGCCCCACTCTGATCAGTTATAAAGTAGGG -GCTCTACGCAGCTGCTTATGGGGTGTCGATCAAACATAGGAATGCTCCTAATTGGGTGAA -CTGGTGGAAGCGGTCTATACCTGAGTCAATTTTGTACAACATATGAAGCCGATTACAACT -GTCGAAGGAAGCAGCTTTGTTTGCTTCTTTTCTCGGTCGATCCTCACAACATGAGACTAC -TTGATACGTAGGATAGTATCCTAGATTTGAGGTTGAAGGTAGCTGGGTAGGTGGGTATAT -ATATACAACCTCGACTATGCGCTAGGCCTATGCAACTATATTTCCCCAAACTCGACCCGA -AGTTTTGGTTTCCGAAGGTCAAGGGGATACGTTCATATCCGACCATTCCGGTCTGTTAAA -TATGTGGCCATCCCAGTGAAGCTTTTTGCTTTAAAAAATAGATGGAGGTCTACTTCACCG -ATGACCCACTGTGCACTCGATTACAAGTTGGAGTCAAACTGTAACTGCCCCCTTTGAGAA -TATAACAAATCTGGGATCTCCTTTCTGAACTAATGCTCCTTTAACTCCTTGCAAAAAGTC -ATCCGGGAGAATACATTCCACCACGTGTAAGTCTTCCTCGTGGGGTCACGTGAGGGCGCG -TAACCACGCGTTGGGTGCTCACGGGAGGTGAAAGAAGGCAAAGAAGGCAAACAGTATGAG -TTATCCTGTGGTTCAGTCCGAGGAGAAATTTCAAATATGTCTGTATCGAACGCGAGGCCA -CGCTTCACGCTTCACGCAGATAGCTGTATAGACTACTGCAACCTCATACCAAGTGTGGGC -CCAAACGTCTAGCAGCCCAACGCGTCAATGTAGACAAATGTAGAATAAGCATTCGTAGGA -GTCGAATATATAAGAAATCTATAGACTTACAAAGGAACATGATCAAATCAAATTAGAAAA -AGATTCGTGTGCCCAAGTACCAGGTACCTCGGAGGAGCACTTTGTTACTAGCGCGGGCCA -CGTTCTTTCTACTCCGCGTCCACACCGCGACGCGATTTGATACAGTTTTCTGCATGCCAG -CCTCACGGCTTTTCCATCTTTTGTTCTGTTGTCTTTTTTTCTAATTATTGATCTGGTGAT -TCGGTCCTGTTTCTTTCCCTCTACCTCCCAACCACTCCATGCTCTCTTAGGTATCATGTC -TGATCCGGAGCCCTCGGTGGCAATCTCATCCTCGCCCTCTCTCCCGGCCACCGACTCCAA -TCCTCTTAATTACGCTTTCCTCGTGCATTCACAAAAAACCCTCACACAAAATCTTCCTCC -TCGTGTGGACAATAAACTGTTAGCCCGCCAGAAGCGTCGTCGAACCAGGTATGTTATTAT -GGTTGTGGTCCATACTGTCAAACGCGTCAATTAACCCGATATGAACTATTTTATAGTCCG -GAGGATCATGCAGTCCTCGAGTCCGAATACCAACAAAACCCTAAGCCAGACAAAGCAGCC -CGCACGAGTATCGTGAACCGCGTCTCACTCGGTGAGAAAGAGGTTCAAGTATGCGATGCC -AAGAGTTCACGCACAAAAACCACTTCTAACTGGCTCACAGATTTGGTTTCAAAATCGCCG -CCAGAACGATCGTCGGAAATCCAAACCCCTTCAGCCTCACGAATTACTTGCCCCGCGGTC -GGGCACGGTCGATTCATCGGGTCTGCCTCTAAGTGATGATAACGCGTCGACGGAGCCGGG -GTCTTCAAGTGGGGCAGAGCAGTTTGACTACAACGACCACCGCGAGGAAATGGCATCAAA -ACCATGGCACGGAGGACTGCTGCAATTCTCAGATGATTCGGATCCAGTGACTGAAGATAA -AGATGAACATGAGTTGCCCCAGAGCACTCAAACAAGCGTGGCAACAGAGGCTGCGGAGGA -AACTCCGCCTACCACACAGGAAGAATTTTCCCAACTGGTGAACGACCACTCCGAAGACTC -GCTTCTGCAGGCACCAAAAAGAAAACGTTCAGTGTCGGATATTCGGGGCGAGGGAGAATA -TGAGCAGCAGACGACAACCACACAAATCGCACAAACCGTACAAACCACACCTACCAAGTC -ACCACCATCGCTGCGTTTGTCGATGTCGTTTGATGGAGAGGCTTTGTTAAGAAAAGAGGG -CGAGTTAACACCATCGCCGCCCAAGGGCCGCAACGCATTGCGTATTGCGATGTCATCGGA -TGGAAAGGCAGTCATCCGCGGAGAGAACGAACCATCACCATCCAAGAACCGCGTGGCGAT -GTTCTCCGTTCGCCGAACGAAGATGTCAAGCCTGCGACGGAGCAGCAGCGCCGTCTTCCC -AGCCACGCCGCGGGCAGGTGTTGCCGAGAAAGATCGGGCTTTCGGTCGCTCTCGTGACCC -ACGTAACTGGGAATCATTCTGTGATACAGACGCAAGGAGTGCCCTATCAACACCAACCAG -CGGGCCGAATGGTTCTCCCGGTCTCTTCCAATCTCGCAGCCAGCGCTCATTACCGCGGAG -TGCCTCAGGGCGGCACAGCCTCGCTGCCCATGGCGACCCAAATACACCAATCTCTCAATC -CATGGGAGAGAAGAGACGAAAGCTTGCGCGCACAGTTTCATCCCTGGGACGACTGGAGAC -AGGTCGAAACAAGGCGAACATGAACCCATCCAAGCTGTCTAAGCCTCTATTTTCCAAATC -CGGAAAACCGGATCTGGAACAAGACCCCGGCGACTCTGATAAAGAGAATTGGGTTCCAGG -CACACGATCGACAAATATTCGTCGTCGTAACACATCACACATCCCACGACCTGTCCTCCG -CGATGCGAACCGTAATCGTGGACTCGGCATCTCGACCAACGGTAAACGCAATCGACCACT -CCATACCGGCTCACAAGGCAAAGCCCCGCCAGAACTCAGTGCAGAAGTCTCGGCTTTCAT -GGCGGGTGGCTCAGGGGATAGTCAAGAAGATGACCTGGACTGTGTCCAGGGTCTCCTATC -GTTGAGCCAGGGGGCCTGGCGATAAGGGATGTATATGATTGATACATTGTTAGTCCGAAG -CGCCACTGCCTGTCGTTGCATCTTGATTATCCATTTCCCTTTGTGATACTGAATTTTCTT -TGTTCGGAGCCCTGTATTTTTTTATCTTTATTGTCTTCATGATGTGTGCTTGCAAGCGGT -CTTTGCCACTGTCACGTGAACCTTGTCCATGATATACTTTCTATCCGGGCTACTTTGTTT -CTCTGCATCTTTTATGCATCTGGATCTACATGGTTCTTTATCTTTTCGTCTGCTTTACTC -GGACCTGTCTGGTCTTACTGGATTTTCATGGTTGAAGAAACGTGAAAGCGAATCCCGAGA -AAGATACTAATCCTTGAACGAATCCAACGACCTGAAATACATTATTTAAGTATGAATGAA -AATGTTCGACTTACTGGCTCGATTTTATAAGACAGATCATGTTTTACAGCTATGTAGGGT -TTCTATCTCTCTATCACGCTATGTTTCCAAGCTATAATTTTTTCAAAGACCATCTAAAAA -CAAACCACCATCCAGGTCTATCTATATCAACTCACATAAAAATCATACAACCCCCTATTC -AAAGCTTATCAGCAGAAGGCAAGGAATCATCCTTAACCCCCATCCCATCAACCTCTCCAA -TCCCAAACCCCCGACAAACAGCCCTCGCAATCCTCTCCCCACTCCAATACGCCCCAGTAG -TAGTCCCAAGCGCCACAAACGGCGCCGTATGCTCTCCAGCAAACCAAAGACCCCTACTCA -ACCCAATCCCATCCCCAGACCGAAGAACCTCAATATCGATATCCGCCTCCTCAATCCCAA -CCTGGAAATTACAATACGACCCATTCCCAGCAAACACATCCTTCTGCCACTGCGTAGCAA -GCAACGCAACCGGCACACAATCCGGGTTCTCAGCGCTATACCCAGGAAGTCGGGAGTAGA -AAGGCTGTAGAGTTGAGATCAGGATGTCTCTGTATTCAGGTGATGAAGGTTCATGTCCGG -AAAGACGGTTTATTATCTCCGCGCCGCCTGAACCGTATGTGTAGAAGAGGAGGGTTGAAT -GGGCGTGTGgggatgggagggaggcgagggagagaCACTCTTGGTTCCATTCTGTGTTTT -CGGGGTGAGTGGTGTAGGATGGGGTGAGGAATTGGGCGAAGACGGTGTTTTGTGATTGCG -TTGTTGAAGTGCTGGTGTTATTGTGCCAGAATGCCTGTGGGAATGTCACGTATATCTTTT -CAAGTCTGCCGTAGGAGATGTCGTTGATTGCTTTTTCGAGACGGGGTGGGAGAGCAGGTG -TAAATGCTGAAGTGTTTTGTTTGAGCCAGCCTAGTGGACAGGTGACGACTACTTCATCGA -AATTATAAATTGTGCCTGTGGCTGTTGTTACGGTAACTTGGTGTTTTGATTGGGGTTTGC -GAGGTGGTGCGTTGATCTCTGTGACTGGTTTGTTGAGACGGATGTCGGCTTTTTCAATGG -CGGTTTTGGCGACGTGCGCTAGGATGCGTTTGTATGTAGATGCCACGACGAAGTTGCCTT -GATTGATGTGTTAGAAGGGATATTTTACAGTAATGATTCATGAGGGATTTGCTATACATA -CTCCCATCAACGCACTCTTCCAGACGGAAGAATTTCAAGCTCTGCTTGTCGACTTGGTCG -CCGACATATGTACCCCAGAGCCTACAGGCATCGAGACAGAGTTCTTTCTCTTCATCTGAG -AAGGTGGTGTTCTGGACTTGATCGCAGAAGAAATCGAACAGACTTCGCTCGACGGGAATG -CTTTCTCCGTGGTGATTGCTGTACTTGAAGGCTTCTGCGATAGTAGTCCATACAAAGTCC -TGGACTTTAGTGCTCACTTCATCGCTGATTGGGTGGCCGTCTCGTGAGTAGGAGATATGC -CTGCCACCTTCCGGATCATACATGACGGTTTCCGTTTCTTCCGCTATGGTCATGATGGGG -TTCTCACCCGCACCATGAATCCAATTAGGGCCTCTGGTTTGAGGGTTAGCTTGTGTATAC -TTCCCGAATCAAACTATGCAGGGGTACATACAGATCTACCACATGGTCGCCAACTGTTGA -TTGGTGTATCTAACCGATGAAAGTTAAAAATATGACTTGGCTGCAGTACAGGGAAGACAT -ACTCTCCCGCCAATGCGATCTCTTGCTTCTAGGATTGTTACTTGGGCCCCATTCTGGATG -AGCACTTCGGCGCATCGGAGGCCTGAAATACCTGCGCCGATAATACCGATATGCGGACTT -AGAGTCATGGTCAAAAATCAATGATCTGTTGATATGTGCAAAATGATGATGAACATGTGA -AGATTCTGGATGTCATTGGGCGGTGCTGCCGAAGTCGAATGATTGGCTACCGTTATCACC -CTGGAGCCACTTGGGCTCTTATCGAGACATCTACAAGTTACAATAGGCCTACTAGGTTAT -AGAGAGGTACTTTTTTGGGACTCGATCGCATTGAGAAAGCGAGTCTAAAAGACATCGCAG -ATGCCGGGTCAAATAAATGCTCGGCAACAGATCGAGCTCAGCTTATGACCAGCATAATAG -ATACAGATAAAAGCAAAGAAGCCATGCGCCTGGTCGAGCCTCCTGAGCCGCTAGGAGCGA -GGACATGGTGAATACCACTGTCGCTGCCGTAGAACGCCGTGACGAAAGCGCCTTTTGAAA -CGAGAAACAGACGAAGGGTATGTACGGATCGGAGGTAATGGAAGGGTATCAAGGAAAGTC -AAGAGAACGGAGCAAAGCGCCGAGTAAAGAAAGCCGATAAAGAAGTAGTGGAGATCAACT -CCACTTTGAGACAGTGCGAGAGCACAAGAGGTGCTTTGTTGTAGATGAGGCGCGGATAAT -GGGTAGACTCGTATTAGACTGAGACACACCCTTCAGTCTTCCCAGATGGTCTCCAGGTCA -AGATCCTGCCGTCGCTCAGCACCACAGACACTCCATCGCTTGCGCTTATCACTCCGGGCT -GCATATCCGCCCGAACCCCCGCGCATTGTACTGAATGAGTTGGGCGCATCGGATGTCCCG -CGCCGTCGGTTGGCATTGGTTGCAGCGTCCGCTGCATCAGCCGCGGCTTTCAACTCGGCG -ATGCGATCCTCTTCAAGAGCCTCATTCTCGGCGTCAGGGATATCGGGGATGGTTTCCAGA -GACGAAATGCTGGGGCTTCGGGAGCGAAGGCTGGTCGGAGTTGAGGGGACGGAAATGGAG -GAAACGGAGCCGGATGGACCGGGGTAAGATTCGTTGTATTTGGAGCCCAGAATGGGCGAG -GAATGTAAGGGGGGGCTGATAGGAGTCTGGAAAGAAAGGGGTGAAGTCGGACGCAGGCGA -CCGATGCGGGGGAGCGTCTGCGCCATGAGGACAGGAGACGAGGCTAGTGGAGATTTGGGA -ATGGTTTCCAATTCTGCGTTCTCGGAGATGGTTTCTGAGATGTTGAAAGAGCCCATACGG -GATTCGATATAATCGTCAGGGCGCATCATGGGATATCGTTTTGTGTCGCGGCCATTGACG -AAGATATAACGACCTTGGGAGTCCAGCCCGGGGGAGGAATAAGCGCGCGTCATTGACGGC -ATTCCCGGGATTCCAGCCAGTGACCGAGAGCGGAGATGCGATCGAGCCAAAGGGGATCGT -GTCCGAGGTGGTGCCTGTGGCTCAGAGGAGAATGCGATCATGACGTCCTCCTTTTCCGGG -ACAACCCCGGAGTTGGTCCCGGATTCGTTGCGCATTTCTTCAGGTCGTGACTCCGTAAAG -AAATAGAGAGGAAAAGGGGGTAAGCGCGATTTACCCTCGCATCATAGTAAAAAGGGAAAA -AGAAAACTCTTGGACCTGCCAAGTGAAAGAAAAAGGGGAAGTTCCGAAATACTGGTGAGG -TTGACGTTGTGAAAAGATAAAAAAAAAGGAGAGGCGTAGAATCTCCGACTCTCAGAGAAT -CTCAGTCGGAAAACCACCCTTACTCAAACTGCTGGGCGCACCAAATATCCTCGGCGATTT -CTCGCAGAGCCTCTTCAACAATTTGTATGTCTCTTTCCTTTGAGGGTGTTCTTCCGACAA -AACCCTCAAAAGAGATGATGAGGGACCCGGAAACTGTGACTTGGCTACCAATTTTTTCGG -ATGAGAATATTCTGAGTGCTTCCCTGGGAATCCCGTTCGTATTTTTCGTGTTGGATCTCC -GGTCTTCTTCGATTGATTTTCATTGCCAACGCAATGCTGACGTTCCCCCGTGCCGGGTCT -ACCCACATGTCTATATCTCGACGTAGTCGAGCCAGAGAATCCGAAATGGCACTTTCTAGA -ACCATTGTGCGACTTCTGGGTAATTTCCTTGGGACCCAAGCGGCATTCGGCTGTTTCGTT -CCAAGAGGGTGTTCGTGTGGCACAGCTGTATAGTCCAGGAATACGCGACGCAGTCCCATA -CGTTCTAAAGCCTCTTTGACTGCGAATCTGAGAGAACAAGCCCCTTGGGCATTTGCTGGT -GTTATCGTTTTGACGATCAACTTTTTCTTTTCTTTTTCGAATGATGTGAGGATAGACCGG -ATAGGACATTCTTCGGAGTCGAGGAAGTCCCGCTGGTAAGTCGCATTGTCAATGTAAAAA -ACAACGTATTCATTATCGAGAGCCGTAAGATTGTCGAGAAATTCATCCTGAAAATGGTGG -ATCATTCGAATGAATTGTTTCACCCCTTAATATAGATATGTCCTTGTTGTCAAAGGTAGA -CATAGGTCTTCCAGATCACCATCCGAAATGACATTTTGGCAATTATCCTCCACGTTTTAC -TTTTGAATTTGATTCCTGATATTTATTCTTGAAATCAGTATATCAGTATCACTTCAGAGA -TGGAGATGATGAGAGTATCCCATATGGATCTCAATCTGCTTGGCGGCATCTATAATATCA -GAGGTAGGTCATTGAGCATGTATCAGCATAATGGACTCGAATTTGAACTGGTGGCTTTAC -AATTAGGTATACTATATAAATCTAAGACACGGGCGGCAATTTCATATGTCTTTTAATGTG -TCTTTCTAGATATCTTTTTTGGGTATCTTCCTAGATGTCTTCCTATATCCCTCTCTAGGT -GTCTTTCTAGATGTCGTCCAAAACGTTTCATCTGTGTGGTAGCCAGGAGTATGATGATGA -GCCGTTGAAGCGCCAGAAACGGGAATAATAATACGGACCAGACTCTACGATTTGAATATT -GGCTGATCATAAGCAAATTTTGATCTCGACTGCAGGATTCCAGACACCTCCATATGATGT -CAATCATGTGCATAGCATTTGATACAACTCCTCCGAATTACCTATTAAACAGTATAGAAA -AAGCCAAGTAAATGGCATATAAACAAAAGGAAGCAAAATCATCTCACGGCTCGGCCTCTT -CCTCGCCCTCCTTGCACTTGGAAACGTTCTTGCGGACATAAGTTTCTCGCACTACATCTC -CTTTATGCTTGAACACCTCCCACTCACCCATACCACGCCCCTCCGGTGCAACACGTATAC -CACGACCACCGCCACCAACAATCACATTACCAGGACGGTACTTGGGATCCGCCTTCATAC -GCCGAATAGCAGAAACATGGATCTCAGCATTAAGCGGAACATCCCGCACTTCGCCACGAG -GCAACGGCCAGCTAATCGGCTTCCACGACCCATCAGGCTGCAGATCCATACGACGGAACG -GCAACCATTCCATAATCCGCCAGGTGATAACAGACATTCTAGGAATACCCTGGTTGAACA -TCAAGCAGTCATGCAGCAGCCCCTTTGTACTGGAGGTGTGTAGCGCATCCCGGAAGTCGC -GACTCGCAGCAGATCGCTCCTTCTCATCCGGCATGGAGCGGAAAGCATCCTCATTCTCAG -CATGAGGCTCAACGCACTGACTCGGGCGGCGGAGATTGATCTCTTCCCGGATCGGGCTGT -AATCACCATCGTACTCCTCCAGACACTGGAACTGCTTCATCTTGCGCTGATCCAGCTGGA -GTCCGGCCTTGTACGCTTCCTGTGTCATCCACACGAGCGGCGCGTGACTGAGAGCCCATG -ATTCGCCGTCTGAGAGTTTCCACCCGCCGCCGATGTCTGCGTGGCAACCTGGGAACCAGA -CCTCGTGGATGTCTTGGTCGTCTTCGTCGTCGCTTGAATCGACCTCCACAACCGGAATTT -GTACTGAGAGGGTGTTGGATTGCATGCTTTGGAAGGACTCCTTGCTTCGCACTGAGTGGA -GGTCATCCATGGATGCTTTGGGCACCGCGAGGCTGCGTCGGGGGGAGTGTGCGCGGTATG -TGTCGGGTTTCATTGTCCCGTTGTCGCTACCGGCGCGGGAGGGTGGTTGCGAGGATGTGT -GAGAAGCTGGATGGTGCGAGGCACGGCGCCCGGTGTGGTAGTATGTCTCTCCGGTACTGG -GCCCCCATTTGGGATCCTTGAATTCAGGCTCGGCCTTCTGTTCCTCCTGCTGTTCTTCCT -GGGAGTCATCATTTAACCGAATGGCCGGAACGTTGTCTGGATGTTCCTCTTGAGACTTCT -CCTGGGGTTCCTCCTGGGGCTTCTCTTTCTGATCCTGATGGAAGAGGTGCAGATGGTTTT -TGGGCAAATGATGGTGACGGCGATGTTTCTTCTTCTCCTTTGGACGCGCGCCAGAGATCA -AATCTTGGCGGAACTTTGCGCGGCGCTCGTCAATACCGACGGCATGGCGAATGACCTTAG -CCGAGCTGCGTGCCGTGTATGGGAACTTGGTACGCTGCATCCACGCAGACTCAAAGCGTG -GCACACTGTTGACGGTATCGAAAAGACCCATGAAACGGATACGCGAGATGGGGCGACTGA -ATGTCTCACGGAAGGCTTTCATGTACGCGAAGAGCTTTTTCTTTTCTTCGCGCTCTTCTT -CCGAGTCGTCGGACCGCTGCTGCCACTTGGCAAATGTCTTCCACGCAAACCGGACTAGTT -CTTCGTTTCCTGCTTCGAGCAGACCGATGTAGTCCAGCATCTCTGCAAGGAATCGCGCGA -TGTATGACCCCCGACTAAAGCCGATGAAGAAGATCTCATCGCCAGGGGAATAGTATCGCA -TCAGGAACTTGTACCCACCCATGACATGCTCATCAAAAGAAGACCCGACCGCCGAGTCCT -TGGCCTTTTGATAGGCGGATTTGATACGTTGGATGCGGCCATGACTAGTCAGTGACTTCG -AGGTCACATATGTGCCAATGCCCGGCTGGTAGTAATGGTACTGATGGCTCTTGCTGCGGT -CCAGCATCTGTAACTTCTTAGCACCTGTTCAAACGCTTGGCTCGCAGTCCCACTCACGCG -AAAGATCTTGAGCACATTACTATCCGACTCGTCACCAGCGAATTTGTTTCCGGTACCATC -AAAGCAAAGCACAAACTGCTTCGGCATGCCCGATAGGGCTTGGTGTTCATACCCGTAAGC -CATGATGCCACCGAATAAAAACCAAACAAAAAAGTTCGAAACTCTTGACAGCTAAGAATT -CAAGGGGGGGGCTATATACGAGCTGGGGGAAAGGCTCCAGGCTGGGATGACGCTTTGATG -CAGCTGGATACAACTAGCATTACACGAATCAGCAACCTACATCGGACCCAGCGAAGGGGG -GTAGGATCAGAATATTCAACCCACTAGCCGATCTTTGCAGATTTGTGAAAAGAATACGTC -AAACTTGTGCTGGCTGACGGGCGGGTCCCCCAGACTTGGCAAATTTCCATTACAAGGGTT -CATTTGTTTTACTGGGATTATTTTCTCTGCGGATGAAATGATTCACTGACTCGCATCTGT -CCAGGTGCAGGTCACCGTACATAAAATCTTCGAGTCTAACGGGGGCGCATTCCACCCAAT -GAAGACGCGCCGGTAATCATTGCTGGATATGCAAAGTTATCGCGAACTGGGCTGAACCTG -ACTTGAATTCTGCACGAGGGATGCGGCTTGCGCGGGCCGGACCTGGGTAGCGTGTGAATT -TGGAATCCTGGGAGATGGAGCTGAACAGTGACTAAAAGAGAGACGTGCATGTTTGCCCTT -TTGCCATTCCGTGCGCGAGTCATCATTGGCCCCCAGTTAGCATCGTGGTTGGAGCTATGA -GATTTTGAAAGGAGATGATGCTGGGTGTTCGGGACATTACTGTCGATCATTCTCCATAGA -GTCATTCTAGACACCAATGATTCAGGGGGTGTAAGGGGTGAGATGATCCTATCGAGTGTT -CCGTCGTGTCGGTTCGGCTTTGAGCGATCAGATCCGGTCTAACCCTCTGTGGTAATCAAT -ATACAATCCTTCACGGCCCAGGGTAGGGATCCACTTATGTTTGCCGCCACTTTTCTTGCG -ATTGAGAAATTTACTCAAGGTGTCAGTGCGATTGTCACTGCAATCACATACACAAAACCT -TGCCATATCCAAACCTTCAGGCGTCATCCCGCGTGTGGTTCCCTGAAAACGAATGACAAT -ACCCGAAGCGGAAGGAGAGTGGGACTTTCCATATCCCGAAAAACAAAAAAAAGGACGCCA -ATTGGAATTGGATCGACAGCCGAACAAGGGATCTCTGCAAATAATACGGAGTACGGAGTA -GGTAATGGCTGGATTAAAGACAACCTTCGAAGATATCCAATTTATGCACCTCATATCCAT -GGATAGATGATGCGGGAGTGACCTGGAACTAATAGTTGACCCATGGTTCGTCAAAACCAA -ACCATTCAAACATCCGATCTCATACTTTGTATCTTCCAACGTTGATCTGCTGACGGGAAA -AATGCAGTTTGTGACTCTCGCGTGGAAACTCCCGTTTGACCGACGCCACGAGGATGTACA -GAGTCGACAATAACAAAACCCTAGAAGTGGATGTCCCGAGATGGATCAGATTCTGAATAA -TTAGTGACTGGCAGATTTCAGCTGCCAATTAGAGGCGACACATTGTGGTTCGCGCGAGTG -GCCCGTTCGCCACTTAATTGCATAGTAAGTGGATGAACAAGTGGGCCTCCGACGATCAAA -ACAAAAGAAAAAAGCTAAATAAAAGTGAGTTTTTCATGGTGAGTTCTGCGTCTCTGGCAC -ATTAGGGTTCTTCCTTACATCCCAAGTGCTACAGAGTTCTGATGATGGAGCACTACCTTA -TGTTCTACACATAACCCTTATTGAAGCAAATCATAGGGCGTGACAGCACTCGAGAGTTCG -GTTCCAGCCAGCCACCAAATACATTGGAATTTCTTTCTCGGAAAATGCAGACAGTCCGAG -CAACTTACCCCACTTGGGCTCGATTTAGTTCGATCGTGAGATCCCATTATGCATCAAGTT -GCACTTCATCCGGACAACCTCGTACAGAGAGTGTACACCGCTGATTATAGTCCCGTATTA -CGGCATTCTTCCAACCATTGGTCTATTATCCGCTTCTTCCGAGCTTTATGTACATATCGA -TAGATTTGGGGTTGCTTTTCAGCTCAAAGGATTCCGGGGTTTACCATACATACCGAGTAC -TTCCACACAACCGCGGCCATGCGCTATCGAATTGAACCATGTACAACCTAGAGAGTGATC -CGTGTACATCATAGAACAGATATCCCCCTTTCTCAGATGCCGAAGGCCATAAATGTTGAT -CACTTACTAGATTGAGGTACTTCATCGAAAGCCCACTATAGAAACTCCACTCCGCTTGAG -AACTAAGCTTGTACCATGCGGGGCTGAGATAATGGTTACAGAGGGCCAGGTGCAGGCCCG -AGAGATCGACCTAATGCTCTCCATCAAATTAAAAATGCTGTCATTGCAGTCAATCAACTA -TTGGCGATAGCAAATCCGCCACTTTTTACACGCTCCACAGTCCTGCAGCTGACTTGCGTC -GCCCGCGCAGCTCGTTTTAAAGAGCACCGTCGAGCTTCTGATTTATTTCCTCCCCTGATC -TAGCCCTTGGTAGCCACTTTGTCCTTTCTCTTTGACTTGATTTTTTTTTTCTGACTTGTT -TGTGGCATCATTCTTCACAATGTTAAGGAGATTCTCTACTCAGTTCAAACGGTCCAAGGA -CTCCAAGGATCCGAAAGACTCCAATGGTGAGACCGAGCCCAAGAGCACAGATAAGAGTAG -CAAGCGCATCTCCAAGGTGTCGCCAACTCGGAAATCCACCTCTAACCAGGAGGAAAACCA -CGTCGTGAAGCGTTCCGAGGTCGTTGCTGTATTTGAGAAGTATGCGCAAGCAATTCACGC -CTCGCAGGATCCTCTACCCAACCAGACCGGTGATAGCTCCTACTTGAAGCACGACAAATC -GGCCGGTCTCATCAATGACATTAAGTCACTGGGATTCCGTGATGTCAACACCGTGAAGGA -TCTCATTGCGAGTAAAACCTCCGGTGAGCTTGTTGATGACAAGACCTACTTGATGGAGCG -CATAATCCAGATGGTTTCGGATTTGCCCGGTAACTCCAAGAACCGTGTTGAGCTCACCAG -TGTATTCTTGGATGAGCTATGGAACTCAATTCCTCACCCGCCTCTCTCGTGAGAAATCCC -CAACTGGAGACGATCATTTTACAATTGCGCGCTAACTCTGCGACTAGTTATATGGGAGAC -GAGTACAAGTACCGCTCGGCCGATGGCTCAAACAACAACCCTACTCTCCCGTGGCTGGGA -GCTGCTAATACCGCCTACTGCCGCACCATTCCTCCTCTGACCATCCAACCCAGCGGATTG -CCCGACGCCGGCCTAATTTTTGATACCCTCTTTGCTCGCCAGGAGTTCACTCCGCACCCC -AACAAGGTCTCCAGTGTTTTCTTCGACTGGGCCTCTCTGATCATTCACGGTAAGCCAGTT -CCACTGAAAGTTGTAATTCAGACACTAACATGCGGATTATGTAAACAGACATTTTCCAGA -CCGATTACCGCCAACAACACCTCAACAAGACCTCTGCTTACCTCGACCTGTCCATTCTAT -ACGGTGATGTTCAGGAGCAACAGGATCTGATCCGTTCCCACCAGGACGGAAGGCTGAAGC -CTGACTGCTTCTCCGAGGGTCGTCTGCAGGCCCTTCCTGCCGCTTGTGGTGTCCTGCTGG -TCATGCTGAACCGTTTCCACAACCACATTGTTACCCAGCTGGCTGAGATCAACGAGAATG -GTCGCTTTAGCAAGCCTCGCCCTGGTCTGTCCGAAGAGGACACCAAGAAAGCTTGGGCCA -AGCGTGATGAGGATCTGTTCCAAACCGGTCGTCTCATCACTTGCGGTCTTTACATTAACA -TCACACTCTATGACTACCTGCGCACTATTGTCAACCTGAATCGCACAAACTCAACCTGGT -GCTTGGACCCCCGTGCCCAGGTGGAGAAGGCCGGTGCCACTCCTTCTGGTCTCGGCAACC -AGTGCTCTGTCGAGTTCAACTTGGCCTATCGGTGGCACTCGGCCATCAGCCAGGGCGACG -AGAAGTGGATTGAGCAGATTTACTATGACCTTATGGGCAAGCCTGCCGAGGAAGTCACTA -TGCCTGAGCTACTGATGGGTATGAAGAAGGTTGAAGGCACGCTCGAGGCTGATCCTGCCA -AGCGGACCTTCGCCCGCCTGCAGCGTAACGCAGATGGATACTTCGATGACGGAGAGCTTG -TCAACATTTTGACTCATGCCACCGAGGATGTTGCCAGCTCTTTCGGTCCCCGAAATGTGC -CTAAGGCTATGCGCTCTATCGAGATCCTCGGTATCGAGGCTTCGCGTAGATGGAACGTCG -GCTCTCTTAACGAGTTCCGCAAGCACTTCGGTCTGAAGGCCTACGAGACTTTCGAGGAAG -TCAACTCCAACCCTGAGATCGCCAATACTCTCCGTCACCTCTACGAACACCCTGATTATA -TTGAGTTGTATCCCGGTATCGTCACCGAGGAGGCCAAGGAGCCCATGATCCCTGGTGTTG -GCATCGCCCCGACCTACACCATCTCTCGTGCTGTGCTGTCCGATGCTGTTGCGCTTGTCC -GTGGTGACCGTCACTACACCATTGACTATAACCCTCGCAACCTGACTAACTGGGGTTACA -ACGAGTGCCGCTACGACCTCAACATCAACCAGGGCTGTATCTTCTACAAGCTGGCCACGC -GCGCCTTCCCTAATCACTACAAGCCTGATTCCATCTACGCCCATTACCCTATGACCATTC -CCAGCGAAAACCGCAAGATCATGAAGGACCTGGGTCGTGAGCAGGATTACTCCTGGGATA -AGCCCTCCTTCACTGAGCCTCGTGTCAACCTGGTGTCTCACCAGAATGCCAAGCTACTGC -TCGAGAATCAGCGAGACTTCCGCCCCAGCTGGGCTCGCTCCATGTCTGAGCTTTTCGGCA -AGGGCGAGTTTGACACGAAGCAGCGCGAGGCTATCGGCAAGGCTCTCAACACCGAGGAGT -TCCCCAAGCTCGTCAAAACCTTCTACGAAGACATCACTGAGCGCCTGATTGCGGAGAAGG -CTGGCCATCTGGGCAAGATCAACCAGATCGACATCACTCGCGATGTCGGCAACGTGGCCC -ACGTCCACTTCGCCTCTAGTCTGTTTGGCCTTCCCCTGAAGACCGAGCGGAACCCACAGG -GCCTCTTCACCGAACAAGAGATGTATATGATCCTCGCGACCATCTTCTCGGCGCTTTTCT -TCGACGTCGACGCACCTCGTTCGTACGCCCTGAATAGCGCCGCCTCCGCAGTCTCCACCC -AACTCGGCCAGGTGGTCGAGGCCAGCGTCAAATCTGACACCAACAACGGTCTCTTCGCCG -GCATCATGGACAGCTTCCGCCCACATGACAACGCCCTCCGCGAGTTCGGAACTGAAGCCA -TCCGTCGTATGAAGGACGCTGGCTCCAGCGCCTCGGAGATCACCTGGTCTGCCATCGTCC -CGACGATCGTCGGCCTAGTCCCCAGCCAGGGCCAAGTCTTCACTCAGATCATCGAGTTCT -ACACCGCGCCTGAGAACAAGATCCACCTTGCTGAGATCAACCGCTTCGCGAAGACCGACT -CAGCTGAATCGGACGAGAAGCTGTACCGCTACTGTCTGGAGGCTATTCGTCTGAACGGCA -CCTTCGGCGCCTTCCGCGAGGCCAAGGAAGCTGTCACCGTCGAGGAAGACGGCAAGATCT -ACACCATCCAGCCTGGTCAGCAGGTCTTTGCCTCCTTCAACCAGGCTAACCACGACCCCA -GCGTCTTCCCTGAGCCCAACCAGGTCAAACTTGACCGTCCCCTTGACTCATATATCAACC -ACGGCCAGGGTCCCACCACCGGCTTCGGCGAGCAGATCACCAAGATCGCGCTCATTGCTA -TGCTGCGTGTTGTCGGTCGCTTGCAGGGCCTTCGTCGTGCTCCTGGTGCTCAAGGCCAGC -TTAAGAAGATTGCCCAGAAGGGTGGATACTCTGTTTACCTCCGCGGTGACGGAACTTCTT -ACTTGCCCTTCCCTATGTCGCTTAAGCTGCACTGGGATGCTCCCTCCGAGCAGAAGAAGG -CTACATCTTCTTAAGAGTCTCTCTTATGTTTGTTGGCTGAAGCTGCTTATATCAAACTGC -GGGGATGTCATGTTATATGGAGGTAATTAACTCGGGGGTTGGCTATGATTCTATGCCTTT -TTTTACGTTTTGTTTTCGACAACCTGTTTGGTTGTCAGGATATCCAGAAATTGTTGAATC -TTTGTTGTTTATAGCAGTGTAGTGTATAATTTATGAATATTTCACTGGTTTCATTCTTGC -ATGTCATTCAGTCGAGTGTGTAGACAGGATATCGGATCAGCCAACTTTTTAGGATGGAGC -TGCGACTCTGGGCGTGCATTTGACGGTCGAGAAATAGAGAGGCACATTGTTGAGATCGTT -GGCGGGTTAGGTATTATTTGTGACGGCACAATGGAAGGCTTGAAGGGTGATTTGGTATTT -GACGCAAGCTGTTGGTATTCCAATAAGCCCCCTCCCCAGCCTTTGGGAACAAAAGGCAGT -GAAGCAAACGCAGTATCAATATCAACATTGCGTGTATTCTGTATCTAGTCCCTGTTTGCA -GACGAAGGTGACTGTTGTCAGATCTCGCGCTGTTCTCTATCCAATATCAGTTACCGGACG -GGTAACGCTGATATTCTCTTCTTTCTGTGGTTAATATATCGATTTGAGCATATTAGTGAC -AAGCTAAGATTAAATTTATTTGATTCGGCGATATCATTTCCTCTAGGAGTTATCCGTGGA -GTAGAATCCAAGTGAAGTCAAGTCAGATAAGTCGCTGTCGCGTGTCCGGGATCTGCTCAC -CGCCTTCGCGCCTGTCCGCAACTCCGATGTTCAACCAAGACCTCAAGGCACTGGTCTTTG -AATCAAGCTTGAATCAAAAGATCTGGATGTTCAGGGGAGGGTTTCGGATTGTTTACTTCT -CATTCCCACCAAACAATCACCCTTTGCGAAGACATTAGACCAGACCCCATATCGTCGATC -GGCGGTGGGATTCGCGCCAACCCTGCAACCCGTGAGAATTCAGTCAACACAAGACCACCA -AGAGCACTCTCCCAATCGCAAAACAATCGCAAAATTGGGATCGCGCACACTGGCAAAACC -CTGAACCCACATCTGCAATACCCTATACTCTCGATGCAATCCACATGATGTCTCCGGCAG -TCTCGGTGGACCTCGTATCGGTTTAGTTTTGCCCGCGATCTTCGCTCGCATATTTGAACC -TCGACGGAGTCTACACAAGCTTCAATTTTCGAAACTACGTACGGGAAACAGTCCGAAGTT -TTCATCCCGATCTCGCGCACAGTACACCTTCCCTCTCCATTCGTGACAGCCATGGCGGGG -ATGGAGACGGCCGCCGGCGCAAACCATGTCAACGGCTCTACGGCCACAGAGCCCACAGCC -AATCCCTACGAAACCAACCCTGATCACATTCCCAGAGATGACCCGTTCCTTTCCCAGAGC -GCGCAGTATGGTCGCTACACACCCCGCGCCGACGATTTCACCCCTCGCTTTATGAACTGG -TACCAGTCTGACCCAGCTGTGAACTCGTTCTGGGAGAAGGTCGCGCAGAAATACTGTACG -CCGGAATACTCACTCAACATCTCAGGGCCGAGGGAGGCATTCGCGGCTGGGAGCGTAATT -ATCCGGGTTGACCGCGAATCGGCCGATGGAGCGGCGGCTGAGAGATACTCGTCTGCGAAT -GCAAATGAACTGTTGGCCGCGCAGAAGGCAGAGGACTCGCTCAGAGATATTGGCGTTGCA -GTGCCGGTGACATATTTTTGCGGGACGATTGAGGGGAGGAACGTGACAATCGAGTCTCGC -ATTGCGGGCGTCTCGCTGGAAGTTGCGTGGAGGTATCTCGACACGGAACAAGTCGAGCTT -TTGAAGAACCAGTGTCGCCAAATTCTCCAACGACTCAGAGCAATCGATTCACTCCCAGAC -GAACCGTCCTATGTATGCCGTGAACTCAATTCCCACATTCCGCCTTCCGTCGAAACTCGC -GAACGAGATATCCTATTCACAGATAAAACCAACGAGGAGGAACTCTGTTTGACACACAAT -AACTTCATCCCGAGCAATATCATCGTCCAGGATAATCGGGTGGTGGGGCTTGCTGGCTGG -CGACAATGTGGATACTTTGGCACTGCCAGAGCTAAGAAGGTCCACCAACTCTTCAGGGAT -CTTGAACCTGCGCCTCAAAATGGCGTGGCCGGTTCTGAGGAGTGTGCGACTTGGACCAAC -CTTTACGATGGTGTCTACGACCCCAGCAAGGGCGTTCCGCTTGTCGCAAACCAAGATACA -CCCCTTCCATCAGTCAAAACCGAACCAACAAGCTCTACCCTCGATAAATTCCCGGCTAGC -GATGACTTGGACACGAATGCACTAGGCTTCGATGGCACGGGCGACTACGCGACTTCTAAA -ACAGTGGCGAACCTCAAACATGGGCTGACATCAAGAGCGTCATCTTCCGATCGGTCCTCC -CCAGCCAATTCCGTCAAAACAGCAAACAAAAAGCCCACGACAAACACTACCAAGAAAGGT -ACTGCAAAGAAACCTGCCGCCAAGAAGCGCAAGACGAATGATCCGGAGGTAGACAGCGTT -GATGGGCGGCGTTCCAACACACCGGCCGGCAAAACACCCGCCAAGAAGCAAAATTCCGTC -TCGATAGCCGGGTCACCACCTCCAGAGGAAAAGAAAAAGCCTTCAAAGAAGAAGAAAAAA -GGCCCCAAACAAGCGACTGCCCAGGAAAATGACGACTCCGACAGTTTTGACGAAAACGCG -ATATTCTGTATTTGCCGTCGACCTGATAACCATACATGGATGATCGCGTGTGACGGCAAC -TGTGAGGACTGGTATCATGGGAAATGCGTCAATATCGATCCTCGAGATGCCGATCTGATT -GAGCGATATATCTGTAAGTTTCAACAAGCAGTAGCCTTCTTCTACTCAAAACTAACTTTC -AACCACTAGGCCCGAACTGCGCAAGTGAAGGAAAGGGTTGCACAACATGGAAACCGATGT -GTCGCCTAGTCGAGTGCCGGAAACCGGCGCGCGCCAAAACAAAGCCCCCGAGCAAATACT -GCTGCGATGACCACGGCCGAGAATTCATGCGTCAACAGACACAACAACTCAAGAAGCGTG -CCGGCCAAGCGAATGGTCTTTTTGAAGATCTGGGTAGCATGGGTGGCATTCTTACCGCTG -GAGACCTGAAAGCCGCCATTATGGGGGTGACATCTACCCAAGAATTCCGCAAGCTCGGAA -ATCGCATCATCTCCCCGCCACCACAGGTAGACGAGAAAGAAACCACCGAGGCTGGAATCA -AGGTGGAATCAAAGCCACAGAGTGGCAGGTGGTTGGGATTCGACGTCCATGCCGTTGATA -TGGAATACTCGCAAGACGAGATTGCGAAGATCGAAAAGCTGCGCAAGCAACGCGACGAAC -TCCTTCATCGCAAGGAGATGCTTGCTGCACGCAGTACATTCGTCTCCCTTCTCAAGCCGC -GCGCCAAGGGCATAGTAGAGAAACTGAAGCAGCACGAGCCGAAGGGCGGATGGAAAGATA -TCTGTGGCTTTGACTCACGGCTTTCGTGGTCCGACGAAGAATTCGATGAATGGCGACTCT -CCGACGCTGGAAAGAAAGCTCTTGCGGAGGGAACGGCTGAAGCTTTGGCTGTGGGCTCCT -CGGCTGGCACTGATGCAGACGGGGATACAGCGATGAACGGCGACAGCGATGACGATATTT -CATTTTGGACGCGTGGTGTCTGCACTAAGAAGCGGTGCGAAAGACATAAGCAGTGGGTCA -AAGTGCAGCAGCAAGATATTCTCTTCGAGGAGGAAACGGCAGAGCAGGACCTCGCCAGGT -GTGAGAAGGAAGCTCGGTCTGTGGTAGAGCGAGGTGTGATGAGACGATGGGCCGAGAAGG -ATAATCAGGTTTGAATGCCGAATGGAATTTTTTCCTTAGTGACTTCCATGGCCAGCCGAA -TTCCACAAAACAAAAGCTTGATACCCCTTGATCGCGAGGCAAAAAAGGCTCATGGACTGG -AGTTGGTACTTTGCAAATTTCAATATAGCAGGATGTTCGTGGCTGGGTTTTTGGCCAATT -GCATACAAAGCGGCGCTTTGCGAGGGACGGATCCGTTGTGTTTGAAAACGTTATTTTGTA -AGAATATGGACAATTGCATCGAGATCGGTCAAGATCATCTTTTTCCGGCAGAGACATGGC -AAATGGTATTATTCATTGCTCGTTCCATACTCCCCATTTACCTAGACTCGTACCGACTCG -ATCTTTGATCACTTCGACTATCTGAGCTCCTCTCACATCAGAAAGCAAAATATGGGTATC -AATGCCTTGGTCTAACGTCTTCATGAGCAAGGTTGGGAACAGGGAGGGAATATCCGAATG -GAAGACGGAGTGGATGCCATCGTCTAGCGGATGTCTCACAGCACTACCTCCGGTAGTATT -TCCAGTCAACGATTGATTCTTATTCCACTCTGGATTCATGATCCCCAGTCCATTAGTATT -GACAAGAATGATCGAGAGATGAGAGGCATGCACACGAGACAAGCGCGTTAAGGTGCGAAG -TGTAGCTGTCAGGACCGCTGCACCTCTAGCTGGGTTAGAGGCTCGAATTACACCTTCGGT -CAGGGTATCGAGGCCCACAACAATGAGGACGATGGGGTTGTGGACGAATGGGCTTGCCTC -CGTCGATCTTCTCTGTTCCCTGTTTTCTTGGATCTCGTGGAGCGTTTCCGAGACCTTACC -GATGGCTTGTACGGCATTGGGGAAGTTATGTACGGGAAGTAGCTGGACAGAGTCAAGTCT -ACGGATTGCATCATCCCTTGATAGACCGGGCTGGTCATGGGTGGAAGACTGTAGCTCATT -GAGAAGAGCCCTTGGGGCAAAGGCATCGAAGCTTCGCTGGTGGATAATGAATGCGCTTGG -TTCAGATGATGGGTGGATACTTCGCAGTTGGTTTGTGAGAATATATCGAGGAATCTATGG -TAAATATTAGTTTCATGACAATGTAGAGTGATTAACATCGTGTCTGTCATACCTGTCCGA -TATACTCGTTGCTTTCGTTGCTTCTCATTTGCAGTCTGGAAAAGCCTAGGACAGCTCCGC -ATCGAAAGACTGCTTTTTTGGAGGATCGGATACCCTTTTTCCAATCATTTTTGAGGTTCC -AGTCATCTTTTGTAGTTGTCTCATTTGAAGGAGCTGCATTTTCCGATAAGTCTGGTTGGA -GTAGAGAATCAGCAAGGATTGGGAATGCAGGCTCTGCCGAGTAGACGTTTGGTGCGCTTG -TCATGGTTCCCCGTTTATAAAGTATAGTCAATTTAAACGGGTCCGATATCAGGGTATCAC -AACTGTTAAAGAGGCGAAGAGCAAGTGGTTGATGAGGGACGCGAAAGTGGATATCTCCCG -ATTTGGTCTAGCTCCACCCTTCACTACTTGAACGTTCAATCCAATCTTTTTTGGTCCTGG -AATATGACTTCGATGTTAACTTTTGATCATGTGTTATGGTGAAATTATATAAAGAACAAG -ATGATACAGGTACATTTCTTGCGGAAAATAAGCTCAAGGTCGCTTCCACACTCCACGTCC -ATCATGCACCCAGCCCCCATCCCCGATGACATCGTCCAAAACGGCCAGCAGGTCCTGCCG -CATGGCTTCCACATGCAGATCACCGCATCTAAACTCTTTCAAGCAGGGCAGATTATTCAC -GGTATACCCAATCAGAGAGTACGAGGTTGCGAGTTCCATCCAAGGATCATTGGGATCCAT -AGACCCTCGTTGTTCAAGCTCGGTAATATGATTGTCCTCAAGGGGGGCCAACTGTACATC -AAGGCGACGCAGACACTGCATCCTCGCGACAGCATCCATTACAATTTGCGAGTGATTGTA -AAATGGAAACACGGCCGTGTAGGAAAAGTATGTGAGCTCGTGTAAGAGAGCTGATAGGTT -GCTTGAGCATTGAAGCGTACGGGTTGATCCGCGTACGCCACTTGTACCCCATTCTGCCAG -AACCGATGGGACAGAGGAAAGGAAGTACTCGTAGTGATTGTATGCTTTCAAAGAAGATGA -CTCGTTAAAGGTTATGGACGTCCACGGTCGTGATGCTAGAAGACTAGTCCAATTTTCCAA -GTCAGGCAGCTGGGATAGGTCACGAGTTTTACACTCTCGTTCCAAAAGTAAGATCTGTAG -GGGCATATCGAATGCCCAGTGGTGGCCATCGTTGATCGGAGTAGCTAGAGTGTTGCCAAT -GAAGACTGGCGGCCCTATTACTAGAACACGACGAGGGTCTAGATACCGGAGGACTCGGCG -CCACCAGAATGGATCGACCCGATGATCTGGGTTTTCATCTGCGATCGCGACAATAGACGT -TACATGTTGACAGAGATCCGAGTTGATCATAAAAGAGCAGAAGTCGGATTCATCATGGAG -GTTGAGTCGGACATGAGTGAATAGTTGAGGGCGCACAAGTTGAAATAGGCGCCGTGAGCA -TAGCTCGAGATGCTTGAGGTCTTTTGTTGGAGATTGGGTGAGTTTCAAACTGGGTGTACG -ATGTAGTTGTCCAAATGACGGTGGCTCCGTGGATAAGTAAGAGATAATCTGGTCTAAGAG -CTCATTGGGGAGGAGCTCCAAAGGAGATTGCACAGACATGACAAATTGTTCATGTCTAGG -GCCTCGTCCATTACTAAGATTGGATATCCACCACCGAGTGTCCAATGTAAGTATACCTAG -AGTGCAGTGAGTCGTCGGCTGAGGGAATGAAAGTGAAGTGCCGGAGTTCCCCACTCAAAT -CGATGGGCTGGCTCCAAGGCTCCACTTAACCTTTGTAATCGGTGACTCAGCGATTTTTTC -AAGGTGTTTTACAATGTATCGTAGTGTGTGAGGTATGTTCAAAGAGTGAAGTCAGAAGAA -GGATTCGAGGGATAGGACAAATAGAGGTAGAATAGCAAGGGACTACAGATAATGCTTGAT -GTTATAGAAGCAACCATGTGGTCCGTGCACCTATATTTTGAGACAGCCGACGTCGGGAGA -GTTCAGGGATATTGAGATACTCCAGGTCGGTTGCCCATTGTCATATCATCATGTACCAAA -TTACATCCAGAAAGTTCGATATGTCCTGTTTATATGGACAATGGCTCCTCGCATGACCTC -AACCAAAACAAGATCAAGAAATCAGATTGTACAAAGCAACTCTCACGCCAAACCGGCCCA -TGTGGAGAGCCTCAGCACTTTGTGGGGCAATGCCTATCATGGCTGGATACGATGTAGAGC -TCGGATGAGATGGCAATGGCAATGGTAGTGCCCCTCTTTTGATTTATCCGTTTCCCTAAA -TCTCCTTTGTTTCTTCTCTGGACATCAATTTAAAAAGATTTCCAAGGTAAAAACAACGCT -CATCGTCTCTTGCAGTTGATCTACATACCACATTCACAATGGCTCCTCAACTTCACCAGC -GTCCTCCTTTCCGTGCAGAGCACTTGGGCTCTCTCTTGCGTCCAGATCAGCTTCTGGACA -CCAAGCTCGCTTTCGAGGCTGGCAAGATCCCCGAGTTGCAACTGACCGCTTGCGAGGACA -AGGAGATTAAGGCCATTGTTGAGACCCAGCAGAAGCTTGGCTATGCCGCCATGTCTGATG -GCGAGTACCGTCGTCACAGTAAGAAACCCAGAGAGCCATGTAGTGAATCATGTTAACATA -TAGAGTGTTCTGGGGTACTTTCTTCCCCGGCTTGGACGGCTTTGAAGAGGTCAGCGATGT -CGATGTGGATGATTTCCGTCCCTATGCTCCCGATATCGCTGCATTCTTGGAAGCCGGTCA -CAAACCCGGTGAAAGTGTCCTGTGTACCGGGAAGATCAAGCACGTCGGTAGCACCTACAT -CGACCAATTTAAGTACCTTGCCAGCCAGGTCCCTGCCAACGAGGTCGGAAACCTGAAGAT -CACTCTTGCCGCACCTAACTGGTACCACCTGCGTTACCGTGAGGGCAAGGCTTACCCCAA -GTCTGTATACGCCAACGACGAGGATTACTTCGGCGACATCGCCAAAGCCTACCAGGCCGA -GCTTCAGATCCTATATGACGCCGGCTGCCGTAACGTCCAATTCGATGACCCCAACCTGGC -TTGTAAGATGCTACTCATCCAGCCAATATCGATTGACTAAACTAACTTTTCACTCAGACT -TCTGCTCCGAGAAATTCCTCACTGGCTTCAAGGAGGACCCTCTGAATGTGTACTCCGCCG -ATACCATGTTTGAAAAGTATATTAAGCAGTACAACGACTGTTTTACCAATCTGCCCGCCG -ATATGCACGTTGGTGTTCACCTATGCCGCGGTAACTTTGTCGGTAGCCGTCATTTCTCCG -AGGGCGGTTACGACCGTATCGCCATCAAGCTCTTCCAAGAGCTGAATGTCCACACTTACT -ACCTCGAGTACGACACCCCCCGCGCCGGTGGTTTCGAGCCCCTCAAGTTCCTACCCACCC -ACAAAAACGTCATTTTGGGTGTCGTCACCAGCAAGTTCGCCAAGCTTGAAGATAAGGAGG -AGATGAAGAAGCGCGTCATTGATGCTGCTAAATTCATCGCTGAGGGTAACAATATCTCCC -TTGAGGCTGCGCTCAATCAGGTCGGTGTTAGTCCACAGTGTGGGTTTGCTTCCCACCGTG -AGGGTAATGCTATCGACTGGGACGGTATGATCAACAAACTGCAGCTGGTCCGTGATATTG -CCAACGATATTTGGCCAAACCAGGCTTAGGCTTGACATTTAAGCCTTTAAAATCCTAATC -AATCAAGCTTTTCTCCTCGTTAGTTTCAACAAATTTGACACGGCTTGGGGTTTGTGTTTT -GCATGGCGCTTGTTTGAGAAGATACCTTAGACGAAGGTAATGATTTTTGGATGAGCACTT -GATGTCTTTATTCTTGAATATACTTAACGGGATTCTTTTCAGTCTTTTCAGTCTAAATTA -TTTCTCCTCTCATTTTAATGCAAGAATCAACGACAGTCTCTGGCTGTTTACATACAACGC -GAGGTCCAAAACTGATATCTTGGCTACCTCCTGATGTCAGACTTCCTGCATCAAAGACCT -TACCACTTCAAGCTTTTGAGGCATAACAATCTTCTCTTCAACCAGCTTCTCGATAACACG -CACCGTCCACCCCTGACAATTCTCCGTAAGAGCCTTATGATTTTCCGCTCGTGGGGGAGG -CTCCGAATCTGCAGCCTCTTTCACCAGTCTCGCCGCCCTAGCCTGTTCCTGAGACACAAC -AGCCAGGGTATAGACGTTTGCACAGAGACCCAGGCCCGCGTCCATGACATTGATCTTTGC -AGCTGACGGCTCGTATTTCATATCCGCTGCATCTCCGGTGACTTGATGAATATAACCGTA -GTCGGGCTCGTTTTCGTCGGCAAGAAACAGTGCCCAATGGTTTGGCTCTCCGGGGGCTTG -TTTTGCTTGGACGAGGTAGAGATAGAGATGGGGGTTTTCGACGGGTGGCTTCAATTCATC -TGGATTAGAAGTGTCCTCCGTCCACTCCTGTGCACCTTTCTTCTTGAAGAATGTACGCCC -ATCGACCTCGCACACCTCTCTGAAACCCCTTGTCTTTATAGGAGTCCCTGGAGGTGGGAA -GTCACTCATTTTCTGGGAGAATTAGGCATCAGTATAGTGTTTTCGTATGAGGGTAGACAG -CTTTGCTGGTACCTACCGTGAGCCTCGAGAGCTGATGTAATTGAGGTTTAAGTGTGTGGT -AGACATAGACAGATGACGAGACCAATAAAAGTCTCAAATGCGCAGGAATATATCGCCTTT -TGGGGCTATTGATCACATTTCTACGTCATTCAGGCACTATCACAGACACTATTGAATAAT -GTATACCAAAATGATTGACCCTCCAATTGCCTGTTCGCAACACAGAAATCGACAGATCTC -AATATGACAAGGTTCTCATGGGCGATTCCAATCATCCTACTGCCTCTCTCATGTAGCCCT -AAACATCTATATGGCAAAAGTTAATTTTGAGAGTCTCTTCTACTTCTTCAAACACCTGAA -TATCGTAAATACTAGAGAATACTAGACAGAGCAATTCAGTTCAATTTAATATCCTAAGGC -GTGTACATCTGTCACCGAAGAAAAGACAACGCGTTACTCTGCCGGTCCCTCAATGTTGCC -CTGCTTGATAGAAGAGCTCTTGTGCGTGTGCATGTTTTCGATCATATAGCGTGGCTCGTC -ATCTGACGAGCTACGCCCTCCCAAGTTTTCACGAATGACTCCGACGCTTCTGCTTGTTTT -GGATTGCGCGCCTGTTGTGTTTGTTAGACGATTGCTTGCTATGGACATGTTTACGTATCT -TATAAAAAAGGGCGTATCATACCGCCAACCGGCTTGTAACTAACTTTATCTCCTTCCTTG -TACTGGGCCTGAGGCATTGCTGATGGAACAAATTAAGATGGTGGTAGTGATAGTCGCAGG -TCTGGGGGTCGTGTAATTGAATGGTTTGTATAGATGTAGATGAGTGGACAAAACTTTCGG -TCTGTGTAGATAGAAGGTTGTTTTCCAAAAAGAACCATTTTTGCAAGAGGACTCTAAATT -CACATTCTATTTATATTTGCCCATGGGGGAACAAGTTCCGCTATCGTCATGTTGAGGTAG -CAGGAGAAACAATCTGACTGGGTTGGAATTGATCCATAGCTATACGAACGACTGGATCTA -ACGCGGAGGGCGGCTCCGTAGAGACGAAATGCAAATTTGATCATGCGCTTAAATCAGAAG -CATATCATTTGTTTTTATCTTCTATTTAGTGTCATCTTGTCCGCCCTGTCTCCGCTATAG -CCAACTATGTGTATAAGGAAGCGCATCCACAATTGACCGAGGGTGTATCAACAATAACGC -TATAGAGAGCACAATGAAATTAAAGTATTAAATTGGACCCAACAAGCCCATATAAGAACA -GAGGGGATATCATAGAAGTGAAAATAAACGGAAAAAGCAACGGGATAACGAAGGCCaaca -aaagaaaagaccaagaaaacaaaagCCCAAAATGCTGCGTCGTCTTGTCATCAAAACACG -GACATGAGGAGAAAAGAGAAAACACTTGTCGGAGAATGCGAGATGTTGTACAACGAAGCT -GCTCTAGGAGCGCCTCTCGAAATCACAGCGGTGCATCTTATTTCTGGGGCTGGGTCTTGA -CATTGTTTGAGAGCCAGGCCTAACAAGAGAGAGCACGGTTAGATCGAAATTTCCAAGTGG -ATAGATCAATGACACAACTTACAAGACCCTCGAAAATACCTGTTCCATCTGTTGCAACAC -TAGGCGCAACGTACCACAGCTTGTCCTTGAGCTTAGTCAATTGAAGCTGCTGAGTGACTT -CCTCCGGTGACAGGTCTAGAAGTGGGTCAGCGGACAAATCATTGAATTTCAGGAACGGGA -TACGCACGGCCAGGGATATCTTGCTTGTTGGCGAACACCAGAAGCAGGGCATCCTTCATT -TCACGATCATTGATGATCTTGTGCAGTTCTGAGCGAGCCTCCTCCACACGTGCCGTGTCG -CTAGAGTCAACCACGAAGATCAATCCTTGTGTACCCGAGTAATAGTGTCTCCAGAGAGGT -CGGATCTTGTCCTGACCACCGACATCCCAGACGTTGAACTTGACGTTCTTGTAAGTCACA -CTCTCGACGTTAAATCCGACGGTGGGAATTGTAGTCACGTCCTGGTTACTGAGTTTAAGT -TTGTAGAGAATTGCTAAAAGGAGTTAGCCAATGTTGACTGAGGTGTATAGTAAGGTCCAA -AAATCATACTTGTCTTCCCAGCAGCATCCAAACCCAACATGAGAATACGCATTTCCTTGG -TCCCGAATATCTTCCCCATCATCTTAGATACTTGTCCACCCATTTTTGCGGGATTTGACG -CGGTATCGGACGTGCTCAGGAGCTGTTTGGTGGAATATCAACAATTGCGACCCGCGTCTG -AACTGCGGGTTTAAGAGAGGGAATATAAGACACCAGACGTTTCCGTACAAAGTGACTCTC -TCAATTATGATAGCTCCAAGGGATAGAGAAAAAAGAAAAGGTGTGGTGTGTGTTGAAGAT -CTTCAATTCAATTTCCCTGAGGCGGGGGAACCTGGAAAAGGGGCTTGGGCTGAGCTTACT -CATCATAGTCTCCACGAGGCACGGAGTACACGAGGCAGTTTCATCCTAATGGTTTCATAT -ATTGAACTTGTTTCATCCCTAGATTCTTTATCAACACACTTCTCCATTACCATTCTAAGG -TTAAATGGCATCCAATCCGTGTCCCACTGAGGATTATCTAAGACGGAAAAATATCACTCC -GGGGGTAACCATGGACCCTCACGGGATATATATAACCCTGGAGTGTAGCATGTCACTGTC -TTATAAAGATGTATTTAATGACACTCACACTTTTGGATATACAATGTTATGTACTACCAC -CTTATTGCACAGGGTAACATCATACCAGTCTTGGAGGTTGATAAGACTATTTAGAACAGA -TCAATGTCTATAATCGTCAATTTGTCGTTACGTCATGGCCAAATGACTTCATCCATGCGA -TACTACATATGCATCCGATAGTTCAATTCTTTTTCTTAATTTTTGAATAAGATTGATTTT -TTGAAATTTCTAATTATGCCTGGCGAAGGACATAAGTTCGAGCGCCATGCGTCAGTCACA -CAGCAACGTCGACTGTCGCTACAGTACGAGCGCAACGCTTGGTTAGGGCCGCCGTCAGAT -GTATGCACATTTTCAAAGTATCAGTAAAAGCCACTTGGGTAATTCTAACAAGAGCATTAG -ACTACCTACGCCGGTATCTCGGGCAATTTTCAGGAACACCACACATCAATCGCAATCGCA -ATCCGCGATACTACTTATCTACTGGACTTCATCGAGAGGAAGTTCACGGACGTTCATGCA -TCCGCAGAAGAAGTCACTGACTTCATCATTGCCAAGCTCAGGGCATATTCCAAGCACCAC -ATGGAGAAGTTCATTGGAGTGTCAATGCCTGAAGATGTTGCAAATCACTGCCCTCGACTA -TGCCCTCGCCTTTGGGCGGAGCTTGACATCGTTCCATTGGTCCTGTCAAACGTATCACTG -ATAGACAGGATGAGTGTCGAACAGTTTGACGAGACTCAAGACCCAGCAACTACTGGATGG -GATACAAAGACCGTCGATGAACAGGCTGAATCCATGGCACGAAAAGGTGTTAGGTTGGTT -GATCCCGTTCAGTATCATGAAAGGACTCATTTTGACTCTGAGTACACGCAGACTATTTGG -ACCGGAAAACACGCCCCTGCTACAAGTGGGCTTCCTCGGCCTCGTGGAAGTGGACACGGC -TTACCATGTGAGAATGGCGGACTTGAGTGATTTCCAGACAACAGTTTCGGATAGGACATG -GTCCGCAGTGCAACAATACGCCAATGAAATGAAGGAGCGCAAGGTAAAATTGGCATTTTT -CAGCTCCACGCCTCAGGGTGGTGGCGTGGCATTGATGCGGCATGCTCTCTTGCGATTTTC -GCATTGTCTGGGCACTGACATCAAATGGTATGTATTGTGTTTGTATCTGTGTCATTGCAA -GAATAAACACTAATTGCTCGCAGGTACGTCCCGAAACCACGGCCAGGCGTCTTCAGAATT -ACCAAGACAAACCACAATATCCTGCAAGGTGTCGCCCATGAAGACGAGCGCTTATCACCG -GAGAACAAGAAGATCCTGCGGGAGTGGACTGAGGAGAATGCCAGGCGATACTGGACCCGA -CGAGATGGACCTTTGCGTCCGCCATCAGAGGGCGGTGCAGATGTCATAGTTATTGACGAT -CCTCAGATGCCATGCCTTATCCCCATCGCAAAAGCACTGGCTCCTGATCGACCAGTCATC -TTCCGAAGCCACATCCAAATCCGAAGTGATCTGGTTGCGATACCTGACTCCCCACAGGCA -GAGGCCTGGGAGTTCCTCTGGGATAATATCAAACACGCCGACTGTTATATCAGTCACCCT -GTGAGCGACTTCGTGCCGCGGAATGTACCCATAGACATGGTGGGCTATATGCCTGCATCA -ACTGACTGGTAAGGACCGAATAAACCGATGATCTGGTCTCAATTTTCATCTCACTAATTG -GGACATAGGTTGGATGGACTGAACAAGACTATGCGGGACTGGGACGTCGCGCACTATGGC -CGTATCTTCAATTCCGCATGTCGCAATACTGATATGCCATGTATCCAATGGCCCGAAGGT -TAGTAGCCGAACCAGCGCCAGCAATGATGATCATATCTTTGAAGACTGACACAAACTAGA -TTCGTACATCGTCCAGATCGCACGGTTCGATCCGTCTAAGGGTATTCAGGATGTGTTAGT -TTCCTACGAGAAATTCTACAACAAACTAGTATCCGAAAGCCCTGAGATGATCCCTCCTAA -GCTACTGATTTGCGGACACGGATCCGTGGATGATCCCGACGGAGTACTTATCTACGACGA -GACAGTCGAATATTTGGAGAAGAAAGTCCCAGATATCCGCCACCTGATCTGCGTGATGCG -GGTGCGGCCCTGCGACCAAGTCCTGAACGCCATTCTCTCCAAGGCAACCATCGCACTGCA -ACTATCTAGGTCCGAAGGTTTCGAGGTCAAAGTCTCCGAGGCAATCCACAAAGGCAAACC -AGTGATCGCCACGCGTGCCGGCGGCATCCCGCTGCAAGTGACGAATGGGAAGAATGGGTT -CCTGGTTGATGTTGGCGATACCGATGCTGTGGCACAGCACTTGTTCAATCTCTGGACTGA -CCACGATCTATACAAACGCATGTCAGAGTACGGCATCAACAATGTAAGCGATGAAGTGAG -TACCGTGGGAACTACTCTCAATTGGCTCTATCTCGCCACCAAGTTGTCGCGTGGTGAGAC -TCTGCGGCCCAATGGACGCTGGATCGATGATATGGCGTTTGAAGAGATTGGTGTTCCCGA -GAAAAAGGATGAGCTGCGGCTCACACGCGCAGTCAGGGTGGAGGATATGGGCTGATTGTA -CTTTTTGTTTTGACTCTCCTTCGATCACATATTAGTAATATATAAACATTTAAAATACGG -AAAATATGTCATTAGATACTTAATTGGTATCCAACGTTGAAAAGTAGTCGATTGGGGGTA -TCATAAAATCAGAAAAACAAGAGACAAACCCAGCAACCTCGTACAGCAACCTCTCCTAAG -GGCGTCCAACAACCCGATACCTCTCCCTGGCCATGCCCCCCTCCGCCCCCTCAACGCCAT -AACCCTCCCCAACCTCTCCAACAATCCTCGCTGGCTTTCCACCAACAATACACCAACTCG -GCACAACCATACCAGGCGGGACAACCGCGCCATCAAGCACATAAGCATAGTCCTTGATGA -TAGCCATACTGCCTACAGCAGCACCCTTCCCAACATGAACGTGGTTCCCTAGTGACGCCG -CCTCAACAACAGCGCCCTCGCCGACAAAAACATGATCGCCAATCTTGAGCGGGTAGAACG -AGTGCACGCCGCGGTGCAGCCGGCTCGGCGGACGCAGGATCGCGCTCTTAGAAATGTAGC -TGTATCGTCCGACTGTTATCGCCACGCTGGGCGTGCTGGGGTTCGGCGCTACTGCGGGAG -TGTTTGCATCGGTGGGATTGGGGACTGTTGTGGTGCGAAATAGGTCGCCGCGGATTACAG -CTTCGGCTTGAATGACCGTTTTCCCGCCCAGGATAATGTGTTGTGTGCCATGGATTTGGG -ACCGCCGGGATATCTTGTTTCCCGTGTCCTGTTTTTGGTGTGTTAGGTTGTGTTCTTTGG -TTTTGGGTGGGTTGGGAGTCATGTTGTGAATTGAAAGGGTAAGGTGGGATTTTGCCTACA -GTCTCGATATATTCACCCTTTGCTGCCTTTGGTGGAGGCATCTTGACAAGGATTAAGGGG -GTTTCCTGTCGCTGGATGCTTGCTGTAACTGTGGAGTTGGAGTTGGAACAAACATAGCTT -GATGCTGAATAGTAGATACGGCAGTATAGTCCTAAGGCGGTGGAGGTAAACAACTATCAA -TGTATGATGATTTTGATGGACCAATTGCAACAATTTTCCAACACCTATCACTCGTCACTC -TCTAGTACCTATCCTACAATATACACTTCATCAGGTGACTCTTTATTTGACGCACCTAAT -TCACTATACATGCGAGTAATTCAATCCCTTTGACCCTCGGCTTTCCCCACTTCGTGCGCG -GCAATATATCGTCCAATAATATTCCCAACTTCGCCAATTTAAAGCTCTCAAATCTCGTGT -TGGCACTCGCCGGCGCAAGAAAATTCTCTTCGTGGTCCACCGCAGTCAGATGAGTTCTGA -AATTTCGGGACCCTCATCTTTTCGAGATCCTCAGCGGCAAATAAGCTTGACTGGCTTTGG -AAAATCCCTCAGTAACTGAGTAGACTCGCAAGTCTCGCTACCTGCTTCGAGAAGTTCCAC -AAGGACTCAGTAGTTCATCCTTCAGTAAAACAATAATGGCGCTACTACTTTGCAACCATT -CGTCAATAAGTAGCGCATCGCTTAGGCCGAATCCCAAAATCGAGACGTGCGGTCAGTCGT -ATACTTCAAATTCGGCCTATAACTTCCTTTTCTGTAAGGTATACAGCCGTTGGCGTTAAT -CCCGCTACCCATTAGTGGCTGTATAGCCTTCGCACTTCCGGCTCCGACTCGTTTGAGTCT -GTACTTGTCAACGCTTGATCCGATTTCGGGATTCCGAAGAATCGACGTCTCGCTAGGCAC -CCTACTCGTTCTTTCGACGATTTGCTTAACGACACAAGTGCCGCGTCCAACTAGTAGGGT -AAAAAGCCCTTGATAATTAGACTGAATACTTTTACTTTGTACTCGCAAGGCCGGGCGCAC -GCCAGGCACACTTCAGAAGGACTCCGGCCACAGGTCTAATGCCGAGTATAACTCAGCAGG -CGACCCTATTTTGTGATACAACAACTTTTTTACTAAGTAATGTTTACAATCTGTAGGGGA -ATTACATCTCTCGGGTTTTATTTAAGTGGTGTGTCAAGCTTCATAGTAGAATTTAGTATT -CTTAAGTCATATTCACTATGCATCTCGAAGTCAATATACATATCTCCTTCTTTCTCTAAC -AGTTGTGCATTGACATCTAATTCCATTCCATCAATGCACTACAGCTTGCAGATAAATAGA -TAGCCATATATTCGACTCGAACTAGCTCCCGCGTGGTCACGCTCGATGACCTTCTCTCAT -ATCATCTTATACCAGGCCCACCACCGCTCATGATGGTAAGCTCCCAGTAAGTCTCGATTG -TAATTATCATTCATTGGTGAAAAGAAACAATGATATATCAAGGCCCAGCAGATAATGTTG -CTAAAGGCTTGCGTGGATGATGGTGCACGATGGGCTTCATCCAATTAGGATGTATTGTCT -ATTTTGCATGTTTGTTAATCTAGAAACTCACATCGTATTTTGGAAGTGTGCAGTAAGTCA -GAGACTTAAGGATATACTTACTCTGGGGCTGAATTGCATACACAACGCTGCAATGATTCG -AATTGAAGAATAAGTAGGATAGGAATTGCTTTGACCATGGTCATGTAGATACTCACGGGT -ATACAAAGCTGGAGACATGCGATACTACAAGCCCTAGTCAAAGGCAAGTAATCAGCAGAT -CAATTGAGCCCTCTGAACGGGCAGTGTGTCTAGATCGGCTCCAATTGCCTTCTTTGTGCC -CAGATCAAGTTATGATATCATTCTAGAGATAGATGTTGGCACAGGCTTGCCAAAAATAGA -ATCAAGGAAGATATTGGCGATTGTTTTCCCGCTGTTTCAGATGATACACTCCCGCAATCA -GGATGATCAGCGAGGAATGATATGCATATATGTATAATTGATGTATTTTCCAGTGTTTCA -GAGAATATATCCGGCAAGCAGGTTGGCTAGCGAGGAATGATGTACATCCATGCAAAAGCG -ATCGTCTTCCCGGTGTTTCAGAGGATATACTCCCGCAAGCAGGGTTGAGAGTCAGGAGTG -ATACATCTACATCTATGCAACGGTGATCCACTTCCTAGTGTTTCAGGGGATATACCACAG -CAAGCAGCCTTGGAGAGTGGCGAGTGAGGCATAATACATCTATATGACAAATGGAAATTT -CATTAGCAAGAACGATAACTGAGTTCTTATCACAGGGCATCGCAACCGACCTACATCATA -GTCTAGGCATTTGTAATGGACAGGCCGTTCATTCCAGATCGGGTTTCGTCCTGTGACAGG -ATTTGGACGATCAGCAATCGAAGATCCTTGACTGGCGGGATACTCCGTAAAGCCATGGGC -CACCATGTGGCGAGACGTTTCAAGTAAAAATAGAACCGGACGGCTCCCGGATGTCAGGTA -CAGAAGCCAAATGATCAGCTCGTGACATATGCTTGTTGTATACAGCAGTTTATATTGGGC -TGTAGGACCTGGCCAATGGTTGGAACGGAACTTATCCTATGCACGAGAAAGAGGGCCTCG -AAGCTTTGAGAGCTTGCTTCTCTAGGCAGGTGAGACTAAAGAAGTAAATGATCTCCTTCT -CGGCGGAAGATGCCGCGTCTGTAGTTCCATTGGCGATTAGGAATGTGCTCGTGCTCAACG -GGCACCTCCCCCTGTGAAGCCCAGGTAGCTGGGGCACTAGGAACTGCAGCTAATTGACGC -GCTAAGTAGGGTGAACCACTATCCCGATGTACATAGGTGCTGTACACATGTATAGCTCAA -GTAAAAGGGCAAAACAGTTTCTACTTAAAGACGTTCAGCTCTGCTATGTCCTCAAACTAG -ATATCCACTAGAAACGTCCAAGGGAGGGAAACACTATCTAGATTCAGATGGGGGTTTACG -GCTAAGAGAAGCGTGCCCTAAAAAGGAGGGAGGCGTTGGAGAGGGAAAAGCCAAAAGCCA -CGTCCCCCACCTGGGCACCAGAACGGGAATCCATATCGCTCTCATCCATCCCGTTTGCTT -AGGTTCATTAATTTGAGCTTGTTCAACTAAATTTTGAACTAATCCAAATCCAATGTACAT -TTACGCCGAAATGCAAAGAAACAGGCTATTTTTCCCAAAATTCCGGGTCCGAATCTTCGC -CTCTCGCATTACCCCAGTCTTTGCAGTATGTGGAGCTTTGACCTACCTCTAATGCCCTAA -CAACGTTCTCCAATCTATCTCACGTGGGGGCTATTTCCTTTCCTTCCGTCTTGCTAGGAT -ATGCTATCCCCTGCTTGCGCATCGATCTCCCACACTAGTTTGTTCGACACTTAATTGTTT -TGGCCCCCATGCAGAGAACATTCGACCGGCTAATTTGACCAATATATAGGGGAATAAAAC -GTCGGACTAGCCGACCTGCAGATGCTGCCTTACTATTCTCCTCTGAGCGTTTGAGTAGTC -CCACTACCGTTTGCAGGGCAGAACTCCCCACAATAGCCACAATAGCACCTCCACTAAGGC -GATCATGGCCGAAAGGAGTATATGGTTCAGTTCTCACATAATGCAACTTTGTTTCAATTG -CTCTTTCCCACTTCTACAACTTGTGTTGTACACCGTCATGTTATCCAAGTTTTAGAGCAA -TGGGGAGGGAATCGGCCTTTCATACTTTGATGGGCAGATCAACCTGCATGGCATTGAAGA -TTTTCAGTAATCCTTGACGATCATCAGGGGGGTTCCTCTTGTGACATACCTTTGTAGCCG -GAAGATAGTCACTTCCCGCCCTATTCAATTTGCGCACACACTCTTAGTTCACGATATTTA -AGACATTAAGATTACAATTGACACTGTGGATATCACAATGTGGGACATTAGTGGCAACCA -TGCACGGGCGAGAGATTAAATCCATAAAAGATGTATGAATCTACACTAGTATATAGTACT -ATGGGTAATACAACTTGGATCACCTGCATAGATGGGGTAAGAAAGATGAATGAGACAAAA -GAGCATAGACCCATTAGGAACGATGTCATGTCCATTGTGGAATAACAAAGAGACAAGCAA -AGTCATTGTCAACAAACCAAGGGAAATTGGGTGTATAGATTCAATCCACAAAGCTCTGGT -GGATCTGATTGCGGAGTTTTGTCAAAGGCTAGAGAAAGCCATGGTAAGTATTGAACGTAT -AGAAGGTCTCTGATTCCTCACGTTTCTTTCATAATCCACAGAGAGCATTTTTACACCGCC -AATCAACCAAAAAGGGAAAAAAACCGAGAAAACTAAAACACATTTATCAAACTTGCGCGA -TGACATCTGACTCTAGCCTCCAGGCAAGTGTATTCATGAACGGCAGGGGCCGAATTGCAG -ATCCGCACGACGACGTGCCGGACTTCTGCTGGGTTACAGTGCATTTGCCCGAGAGCATGT -CGTGTGACCGGCAAAGTGACGAGGAGATTATGAATCTAATCCTTTGCAAGATCCAAATTG -GGAATGTCATCATAGGTGACAGTGCCCTGCATAGCCTAGGATTCTATTGCCTTTCGTTGC -CGGTCAAAGGCAACCCCGGAAACACCGGTGAAGTTCTATCTTACGCTGAAACTCTCACAG -AATGGTGGCTCAAGCAGATCCGCCTGGGGAGAGTCTGTCTTGACCAGTACAGTATTTTTC -AAGCCACAACGCGCTCACTAGAGCACACTAATAGCCTAGAGGATGTACCTCATAGGTGGG -GGCTGTCGCGGTAAGTGTAGATTACTTCTCATGGAAATTGGATTGGATGGCAAAAGAGCG -AATGAAAGGGCTTTTTTTGCTAAACGGATATCCCGGAATATGGCGAATATTTGCCGGTGA -TCGACTAAAAAGGTACCCAAGAGGGCATGTCGGAAGCGTAAAAGCGACAAGGTGGGGGCG -GAAATTGGGGATAGTCTCATAGTTTAAATTTCCACTTGAGTCAGGTAATGAGATTGACAA -GAGCTTAGAGTTTGTATTTGATGCCTGTAGTCGGGTAATCTCCAACGTTTATACGCGTAG -TTGATAGACCCAGTATATCTCTCTTATCTGGTCATTTCTTGAAGATTTACTTGTCTATAC -AATCTACGGTTTAGCAGTCTAGATATCTCTAGCGCAATTAAGCGAGGTATTTCACAATAG -ACGACATGATCAGCCAATTTTGATAATAATAGCGCACCGTTGAACATGAAACTTCCCATC -TCCCCCTGCAGAGATCTCCCTTTCAGCCCCCACAAAAACAATTTAAATTTTTTGGTCAAC -CTGGTATCCATGCATATTACCCCTTGCAGGTTTTCCTGCATCAAAACACATGGGAAGTCA -TGCAATGCTTCGACATCAATGCCAAGACACTAACGGACGGAGCTCCCCCGAGGGCCTAGA -GGGTCTTGACACGTGCGGTTCACTCCATATGGTTCCACGGGTCCGCTTCAACGTTCACGA -TACCAGATATCTTGCATCTGAACCAGCGGTAGCAATTCAGATGGATCTATGAGGACGATC -CCTTTATCGTCTTAATCTGCAACACGGGCAACTCAGGGCAACACGCAGATTAGAGACCGG -GTAGAGAGCCGGGTGTCTACCCGGGCACATAGAATTGATGATTCGACAATTCAATACGGT -GGAGCCTATCTACTTTGCTCTGAAATATGGACATGATAGATCAAGACTAGCCTATTTTGG -AGACAACATGAGAAGTTCATTCTGTCACGGCCCCTGTAGGGGATGATCTTATCGACCCTG -AAGCCCATGTTGGAGGTCATAGTTCCACTATATGCTAGCGCATTTGGCCCCCAAGCTATC -AAGAGGTGCATTCTTCCACATACGCAGCTATTCCAACTCTGACTCGCTGCACCGTTGATC -TCCACACGTGTCGACCGAAATCCTGGAAGGACGCTATTCCAGTCTAGTCTAGTTCGTCAA -AAGACGATGGGCGAGTGGAGCAGCATATCCCGACTAATCTGCTGTCCCTGGTTATCTGAA -TCACAAATGTTACTATACTCCTTTTTACGTTAGATCCTGGTTTGAAGAGTGCCCCATTAC -TACTTGGGTGTTCTATTCTTGGATTCTTCTTCTTTTAAGAGCGGACGTTCGACTCTGTTT -AAACTCGCCCCGTCGATTCCATCCAAACATTCTCCATACTGTCCTTCCTTTACCCCACAA -GGAGACGAACGGCTCCAGTGTTTTGACTACACATATCTATTGCCCCAATACCTTGCATTG -GCAGCTGGAAATATGTCGTCCTCTCCTGTCAACGCCGCCAGAAATGCGCCGGACGCATCC -ACCGTACGGTGGCAATTCATTGACTCTTCGAATAATAGTCGATCCAACTTGACCCAGGTG -AAGCGCCATGTTATGCAAGAATATATGCGCCAGAAGAAAGGTGGCACGCGTCAAAGTGAG -AGTGAAGAGGAGCAACCACGAGCAAAACGTGGGAGACCCAAGAAGACTCGAGCTGCGATT -CGAAAGTCAGAGAAGAAAATGAAATCGGAGGGGGAAAATGACAGTCACCCCCGAGCCAGG -AGGTCAACGAGGAAGCAAGTTGCATACAAAGAAGATCTCATTGTCGAGGTTAACCGCAAT -ATGTTTGCTTCCAATCCTATCTCGTCTGATCCTCTGTTGATTGAATCGGAGGATATGCCC -TCGTTTGCCCCTTTCCGGCCTTCTTCTGCCCAATCTCACGATCTTCCTCTTCCACTGGAT -GGCTTTGTCGACGTACATCCTCAGAGTTCTCCGACTCATGACATATATTTTAACAATATC -TCATGGTCTTCACCCTCCACGGTGCCATACCAGTTCATACCATCACCGAGTACCATGTTG -AGCGATCCGCGAACCGACTCTTTCAGCACTCTTCCTCTGGATTTGGATCGAGACGGACAC -AGGGCATTTGACCCCTATGTGAACGATATACCAGCCTGCTCCTATGGACCCCATTATCGG -CCAATGGCTAACGATTATACATCGGCCTTTGTGCCTGAGACATTGAAAGGTACTGTCACA -TCCCCAAAAAACCATTCTTGTACATGGAGCTAATACTTCGCAATGAAGGGGAACCCCAAA -ATACTCTTCTTCGCAAAGACCGTGCCGTTTCCTACATCCCCCATGACATTTCCGATGCTG -CAATCATTGCCAATCTGAATGCTGCAGCCTTGGAGGAATGCGATCCTCGACCAGGCCACA -AGGAAATCAGCTCGGCACATATGAGAAATGCACGCGAGATGATACGAGCGCGTGGTGGTC -CCGCCGCATTTGCAAACACTCGTATCGGCATGATGATCAACTGGCAAAACTACATCTTGC -CAGGATACGAGACACACGGGCCCAACTTCTTCTACGAATACGACCAGCATGTGCCCGTCT -CATCTGAATCTCTAGCGTTACTACCCCATCCCACCCCAATTCCCCATTCAATGCCCTCTC -CACCATATTCCACCTCCTCAGCTTTCTCAGAAGTCTCGCCCAGCCCTGAACCTCGCACAA -TGCTTCCACCAGGCCTACAATCAATCCCAATAGACGAGACCAAATTCCAATGCGAAGAGT -TCCTCGACTTCCTCCGCCGTTGCGAACAAATCGCCCTATACCAACGAGATAACCCCCAGT -CATCGTACATTACCCGACACACTGCAATCCAAGAAACATCAATCCTGAACCAAATCCTTG -CGGCACCTCCAGGCGCCAGATTCCCCACCCCGGGCGACCGAAAACAAACCATTGCACGCT -TAACAGCGTTGATGACGCTCAATGCCGCCATGTGGGATTACCGTAACACCCCGGCGCGCA -CAGCAATCTTCCTCGACACACTCGAGAAATCCCTGGTCGACAGCGAAGTCGGCATGAGTG -GCTCCGTCGAAGCGATGCTTCAAATCCTGCTCGAATGCACAGATGGCACTCCTGATGGCT -GGTCCCACAGTGCCGACGGCTTTGCTGCCGCTGGGCCAGTAGAAGAGATTCCAGATTTCT -CTCAATACTTCCCGACTGCTACTTCGCCCTCTGCGCGCCCGTGGTTCGCGGGGCGTATGC -TCAAAATTGCGAGTCGGTTGAGTAGCTTGTCGTGGTACCGCGTGAATGAGTTTTTGTTCT -CGTGTTTGACCCTCCAGGTGCAAGAGCCGTCGATGGGTCTTTGGGAGGCGGATCTTCGTC -GAGAGATTCTTGATCAACCGACTACTTATTTCATGCGGTAACTTACTGTGTTATGTATAC -CACTGGCCTGGGCCAGATGACCTGTATTtgttttgatgttttgatgttattatgttttca -tgttttCATATGTCTTTTCTTACCCCCTTCTAACCCTTTTTGAGGGTTTCTTGTATATAC -CCCTCGCTATCATCTACAAAATATTGTGAATTATTGATGAATCTGGAGCAATTGTAGAAA -GATGTCAAAGTACATGAAAGTATATTTACCGGAATTTCAGGTTGCCATCGTGATATTTCC -GATCGGGGTGCCGCCGCTTAATGAGTACGTTCAAGGTCCTGCTCACTGAGTATCTAACGG -GCTTCCGAGGTCAGTGTATGCTTGGTAATGTACTACTAATGCTCAGAGTATGCTCAGTGA -TCACTCAGGTCTTGCTCACTGAGTAAATCTGTACAAACCTGAAGCTGATCGTCATTTTCT -TCTTGGCGTCTTCCATAATACTTGACCTCCCGTTTTCAACCTTCATGGTTCAAAAAATGG -TGAGAACAGCTCAAAATTTCACAGGCCTCTGCCCAACCTAGCGGACGGATTCAATGGGAA -TATGTCGACCATGAAGGCAACACCGTCATTTGGGGAAGGATGCCAACGAAAACCAACTCT -ATAAGCCGGAGGAAGGTGAACCTTACCGACTTCAAATATACAGCATGGCTCCCATCCAGC -AGGAAGAGCTCAAGCAGCTGTATCAATATCTCTGCTTTGACCTTGGCCAGCTCCCCTTTT -TGGAGTTTTATTAGTGCGGCCCAGCGGACGGCTTGGCTTGCGTGGAGCATCAGCGTCGTG -AAGTTGCCCTTCGCAGGCGTCTCCGTGCCCAACAGCGCAAAGGCGAACAGGACAAGTCCC -TTGCCCTTTTGATTCCAATCATGTGAACTGGCTTCGATGATCAGTCTATGTCTAGGTTTT -GCTTCCTTCTCACGTCAAAGATCTACCTCCAGGGATCATTCCCAGACAATGAGCACGGTA -CAGGGCCCTGGTGGATCAGTTTCGATCGGAGTCTTCCGTCCGTGATGAAGAACCTAGACT -TGATTAAACGTCTTGACAGTCCGGCGACTGATCTTCAAATTTTTGCCGAGCAGGGGATTC -CCGTCAATCCTGAGATTCGTGATATCAATGTGGATATTATGCCGAGCGGGGGATTCCCGT -CAATCCTGAGATTCGTGATATCAATGTGGATATTACAACCGACCAGGCTGAAATAAAGTC -CGACATGGAAGAGCTTTTAAGAAGTATCTATTCGACATTTGCTTATGGGAAAATTGATTA -TGGACTCCATGAACCGCTCCCACCCCCATCCTCCGAGGAGACTCCAACCTCGCAACATAT -CCAGGGGATTCTAGAGCAGCAGCAGCAATCGGCGGAAGTCCAATCTATGGACTTGAGGTT -ACTTCATCTTACATTGGGAGCTGAAAATAACATTGTCACAGTCACGGACAGCTCTTCGGG -CGGGGAATGCGACCTTCACTACTTGATCTACGTCCTGTTCCTCGCGAATGTTGACCAGGA -ACAGTCCACCCTCCAAGAGACCACTGCACGCACATTCACCACTGGCGTTATCTCCTAGCT -CCCTACCCCCCAAACAATTTATTTTGAGTTCCAAATCCCTGGCTCCTCCTTGTCTTCTCT -TATCTCCGCTCCACCCAATGGATTCGTTGTCGGCGCCTCCCATGAATTTGAGGCGGGTTT -GGTCATACAGGTACTACCTCAGATTCGCCGTGACGTCTCTGTCCGTCTGTTACCCCATCA -CCTTTTCACTATGGTTCTTGACAAGCCTGCCTTTATCCAAAAGCCAAGTGTGTTGTTCTA -CACCCTCTGGACTAAGCCTTCGCAATACATTGAACCACAGCCCACTGATACCGTTATTGG -GACTCTGCGAAGTGCTGGTATTCAAGAAGCTAAAAGAAGGCTTGCAATGCTTGCGGTGGA -GGAGAATATCACCGACAGTCCAAGGAGACTCACGCGGGAATTATCTTTGTCGCCGGAAGA -GCAAAAGCAGAAGATGAATCTCTAATATTTCTAGGCCAAGGAACCAGGGAAGAGACATAG -CGTTGGAGGTACGTAAATGCAACCCAACTGTGTATTTGCCTCCGACGTAGAGGCGAAATA -AGCAGGACCGACCGCCAAGTTGATAACACCAGCCTTTCGTTGAAGGGCAACGGTCAAGAT -TGCTCGGTGAACGACTGCATCTCGATAGGGCGAGTGTGGTGCGTTTGATTACCTAGGTGG -GTAAGGTGAGCTACTGACTTAGCTACTCTTTCACTTTCCAAGGTTTCCAAGGTTGCTCAG -GCTGCTCAGTGAGCACTAAAGTGGAAAGGGAAATCCATCTCGCGCGGCCTAAACCCGGTG -TATATTTATTCGCCATCGATTCGTATCTTGGTGTAATTATCTCCCATAGTATCACCAATA -CTATTGTGTCAGGATCGCCGCTACCTCTCATCAACTTTTCATCTGTCGCTTGGTTGGACA -TCTAACCTCGACCCTCTCGAGTCTATTCAGTGTTTCTGTGTCTTGAAAGTGGGCCTTGAA -TGTAACAGTTCCTTAATTCAAATTAATCCAAATATGATAATAACATATCCAAGTGAAGAC -AAAAGCCAAATTAAGTGTGAGATCTACAACTAATCTTCTCCAACAAGTTCCTAGACATAG -ATCGCCAGCAGAATCACCATCATAGTACCCACAGCCTGCATAATCGACGATTGAGTACCG -ACAGACGCAGCTCCAGTATACACTGGGCTCACTGTACTTGTCGTAGTTGAAGCCATAGTC -GTCGAAACAGGAGCCGTGGCCATTCCACTAGAGTGGCTACTGGGGTACCACGAATGAGTT -GAGGAGGCAGTGACAGAACCACTGGCTCCAACTCCAGAGCCCCGGTGTGGGGTATACACA -GGTGGGGCCGCGGTGGGCCCGGCGGCATTGGTCTGGGTCAGCACGATCGTGTTGATGTAC -CCGGTACAGGTATCATCGTCAGAGCAAGATTCAACGAGGATGGTCGTGGTATGGACTGGG -TTTGCACCTGGGCCAGGTTGAGATCCGGGAGCGACGCCGGTGGGTTGAGGGTTTGCAGTA -ACCGGATAGATAGTAGTCGCGACAGCGAATGTTTGGGTGGTTGCGTAAGAGGTCTTTGAT -CTCAGGGGGCAATCGGTCACAGATTCAGGGCAGGCGAGAACGGTGGACGTGACAGTAGTA -AAGATAGTAGATGTGGTCATCTTAGGAGCTCCGTTATCACCACCTCCAATTGCTTCAGTA -ACAGAAGGAGATGTAGTATGCTTAGTGGGCACGGCGCCGTTGGTACCAGTAGCATCTGCT -ACGGGGCAAACTGTCGTGGAGACAACCAGGGTCTCGGTGACCACGTTTGTGGTCTTTGAT -TTCAGGGGACAATTGGTCACTGAAGATGGGCATGCGGTGATGGTGGCAGTGCGAGTGGTA -TAGACAGTAGATGTTGTCAGGTCTGAGCCATTAGCACCGCCAGTGGTGGATAGAGAAGCA -GGTGCTGTCTGCGTAGGGCCGGCAGTTTCAGTGACAGGACAGATTGTGGTGGATATCAGA -ATTGTCTCTGTCGTCACAGACGTTGTCTTTGAACTGGCAGGGCAGTTGGGGACTGTAGTA -GGGCACGCAGTGATGGTAGCTGTGCGCGTAGAGAAGACCGTAGATGTCGTCATAGAAGCA -ACAGTTGTAGTGCTGCTATCAGAGGCAGAATTCGAAACGCTGGCCTGTGAAGAAGATGAT -CCGACTCCAGTGGTTGATCCTTGGGAAGGAGAACCACTTACTTGCGCAGGTCCAGTTGAA -AGAGAGCCCGTTGCAGACACAGTAACAGATGGTTTGGTTAAGGGAGTAGAAAGAGACGCC -GAGGTTGTCGACACAATAACAGATGGTTTGATCAGGGGAGTAGAAACAGGCACCGAAGGT -GAAGTAGGGATTACACTGGAGGAGAGAACCACCGATGAAGAGCGAACAGGACATGGCTTG -CGACTGGGCATAGGCCTAGGGATGTGAGACCAAtggtggtggtggtgatgctggtgatga -tgctgcttatggtgTGTCGCCACAGGAGTAGGAGTGGGAGTCACAGTTGGCTCAGCAAGT -ACTGTCGTAGCAAGACTGGTCTGCACAAATAATGCAGCCAAAATCCATCCTCGAACAATA -AAGGAACCCATCTTAACCGTAACCGGAAGGATTGAACGATTGACAGTAAGGCGATAAAGT -AGCAACGAAAAAGAATGACTGATCCAACCAAATAATTAAAAGGAATGACTGGTAGTTGGA -CCGGAGATGCAAAAAGAGTGTAAGAATCTAAAGATCACAAGAAAGCTACCTAGAGGAATT -GTGCTCGTATATACATTGTCGCTTAGGGTTTCCTTCCATAATTGGAACACGGAGACACCG -ACAACAACAAAGCCAGAACTTGCGCTAACGTAGGCAAAAAACTCCGGTTCCTGATTTTTC -CCCCTAAATTTGACTTAAACTATTGACGCCATTGACGCCCCCATTCGCTTGTTTGTGACT -TGTCTTGGGTTTGTACACTGTCTTGGTAAAAATAGCCGGCCGAGACTCGTACCCAACACA -AATGGGAGAAGTATTTAGTGTGAGGACGTTCCAGCAAAATAAATTCCCCGAATCATCTGA -AGAGCATTTCTGTGAGATAGAACCATGTTCATCCGTCAATTTACCCTCGCCGCAGGCCTT -CTGCTCGGTCAGGTTTCAGCCAGCACTCCTGCAGCCAGCTCTCCCCCAGCCAGTAGTCAG -GCTCATGCGCCCACAGCATCTTGCCGAACAGAGCTTGGACCTAGCTCAGTGAAGGTTGTG -CCTACCACAACTATCACTCGTACGATCCACGATCATACCCCCGTCGTTGTGCTCACAACA -ATTCTGGAAACTGTCACGGTCACCCCTGCTGTGTCGACCGAGTTAGTGACAGGCTATGAA -ACCACTACCGTTACATCCACAGCTGATGCAATCACCGATACTTTCTCAACAACTTCAACT -GAGTTTGACACTGCGACATTGACGCTGACTCCTGCCCCTATCACCACAACCGTCGCCGAA -GTCCTCTCGACTACCTCAACCCAAACCTCCACCATCGCCACCTCTGCAGGCTTTACTCCG -ATTGTAGACACTCTGCCACCGTCTGTCACTTTCAAGCGCTCATTGGAAGAGAATGATGAT -TGCTCTCCCTGGGTGGATGATTGGAAGTATCCTCAAGCAGTCGTATGCCATGAGAAGAAc -atcatcaagaccacaactgtctccactgtcactggatcacccatcacctccactgctgct -actcctaccaccacggtgacggtcaccaccaccatcaccaccagctcagtgattgttccc -tcggatgtctccaccacactgagctttagcaccacctccaccatcaccgagacaacattc -gctgccggtgaaaccatcacagccactacgaccaacacTGTTCTAGCGGGTACAACTACC -ACGTCCTTCTACGCTGCCTGTGCTACCAACAACGTCGCTGGCTCTCCTCTCTCTTCAGAC -TTCGGCCCGTATGCAGGCAAACACATCTATGCCCTCGAGATGACCCACATCCCTGGCCAA -AGTATGATAGTTGGTAACACCAACTCGGCATACGATTGCTGTGCCAGCTGCATCGCAGAT -GCCCATTGCGCTATTTCCTACTACTATGCGGGTCCTGGTGTCAAATACTGCTATCTGATC -ACAGGATCTACGTGCTCCCCCGGTTCGAACTATGGCATAGCTTATATTCAAGATGGCACG -ACGACCGGTGTGCAAATCTCCAATGGAAATTGTGGATATGTGAAAGGTACATTTGGAAAT -TTATAGGTGTGAATTGGTGTATAGATGATGTAACATCAGAGTATAATTGATATAATTAAT -ATTTCAACCTTTCATTGTGTGTTCGTTAAACATCCTGGATAGCCATGGGCAAAAAAAAAA -AAAAACCACCGCTTATGAACACCGAGAGATCCAGGTAAGATCTCGGGAAGTTCCGTCGGT -GCCGTGCCACTATGCCTAGGTAGGTATGTGGGCACGTGGTTTGATACGTTAGGTATTGAG -GCTTCATGTTGTCGTTGATACCGTGAACTGCTTTGGTAAAAAAGAAATGGCTGTTTTCAC -TCAATCCGTTGATGGACAACTATCAGACAACGCCGACCGTACCAAGATTTTCTGTCTTGT -GCCTGAAGATGGGGCCTGTGAGATGTTTTGACAGTTTTGTGACATCTCTCAATGCGCAAT -ACTATCAAAACTACACGTAGCGAAAGAACAACACATACATGGTCCGTGCATTTGGTGCCT -GGATCTCAAGCCTATATCGGGCGGATCGACGCCACAGTAATCTTTTAGGCACTAAGAGAT -TTTGATTTTGAAATGCTATTAAATTGCCACAGTGAAAACTTGACAGGCTCCAGGGCTATG -ACGCTCAGGGGAACCCAAACTGTTGGGAAAGGGGACTATCACATGCAAATCTATAAGTGG -GGGCCAAAATTACATGTGATCATTGTTGAGGGTACGATCCAGGAGTATGAAGTATAGAGA -GTAAGGAGAGAAAAACTAGAATGAATATGATATTTCAAATCTACAGAGAATTTCAATGAA -GAAAAAAGACGTAAATTCAGCAATGCCGGACAAAATCTGGAGACACGATAAGCCCGCAGA -GAGTACCCCACGCGTGTGCGAGGGAAGGTTAATGGTTCCCATTAGCTCTCGAAGCTCTTG -GAATCTCAACAAAATTTCGAGTTTCCCGTTTCCCTCGACATTGTACAGATTCCCATGATC -ACATTTTTACACTCATATCCCTTTCCTCCCTGATTCAGTGAACTTTGGCAGCGTTTGACT -TGTATAGCGTGATTCTCCCTCTTGACCCGAAATGGCTATCCTCGACATCACCAAGGATTT -GCCCACACTCTTTAAGCGACAGGTGCAAGCAACCCCAGATGCCATTGCCCTGGAAGATGA -GACCACGACATACACATATACCCAGCTGGACCAAGAAGTTGAGACTCTTGCCAGTCACCT -ACGAACCTATGGAGTGAGCCGTGACACCCTCGTGGGTGTGCTCCTGCCACGCAGCGCACA -TTACGTGATTGCCTGTTTAGCAGCGCTCCGGGCGGGTGGTGCCTTCCTTGTTCTTGAATT -GGCTTACCCCGCGGACCTCTTGGCGGATGTCATTGAGGACGCTAGCCCCGCCGTGATTAT -CACCCACAAGTCTGAAGCCGACAAAATCAAAGCGCCAGTCCCGATCATTGCCTTGGATGG -GCCCGTGACCGAGATCAATGGACACGCCAAGGAAGCTTCTCCGTTGCCAGCGGATGACGA -TTTGGACCGCTTGGCGTTTGTGTCTTATTCGTCAGGAACAACAGGAAAGCCAAAGGGAAT -CGCCAATCCACACCGCGCCCCGGTACTATCATACGATCTGAGATTTGCAGTGCAGGATGT -ACAACCGGGGGATCGGGTGGCTTGCAATGTCTTCTTTATCTGGGAGATGTTGCGGCCGCT -ACTGCGTGGAGCTACGGTGGTTGTCGTGCCCGACGAAACGAGCTATGACCCCGCAGCTCT -GGTTGACCTCCTCTCAGCTAAGAAGGTCACTGAAACGCTCATGACACCCACTCTTCTGGC -GACTGTTCTTTCACGCTATCCCCGATTCGGTACTCGTGTGCCTGATTTGCGCATCATTTG -GCTGAACGGCGAGGTTGTGACAACCGATCTTGCCCGCAAGGCCATCAAAATTCTTCCCAA -TGCCCGCTTACTCAACTGCTACAGTGCGTGTGAAACGCATGAGATTGCCTGCGGAGATAT -CCGTGATATGGTTGATGGCGACTCTATTTACTGCCCAGTCGGCCCATCGATTGTTCCTTC -ATATACCTACGTTTTAAACGAGGATGGGCAGGAGGTAGAAACAGGGCAATCGGGAGAACT -CTTCATCGGTGGTCCTCTGTTGGCGCGGGAATACATTAACCTTCCCGAAACCACTGCCAA -GGCTTTCGCTCCAGATACCTTTGATTCAACACCCGGTGCTCGTATGTACCGGACAGGCGA -CCTGGCCCGCAAGTTGTCTTCTGGCTTCCTTGAAATCACAGGCCGCGTTGGTGCAATGAT -CAAGCTGCGCGGATATTCCGTTGTACCGGCGAAGGTCGAAAGTGATATCTGCCAGTACTT -GGCTGTCAGTCAGTGCGTGGTCACTGCGTACGGCGATGGACTGGAACGGCAACTAGTCGC -ATATGTTGTCCCCGACAAGGAGGCTTCCTCCGATCGCCCTACGGTCGTGATCAACGAGTC -CGGCCACAGTCCTCGCGCGCGTCGTGTCCTTGAGAACCGTCTTGCGCAATACATGATCCC -AGCTCTCTGGGTCGCACTGGATCAGTTGCCTACTAACGAGGTCTCCGGCAAAGTTGATAT -GAAAAACCTTCCTTCGCCTCGTAGCTCCAGCCCCAATGGCAGTGAGCAAAGCGCAGGAAA -AGACCCGATTAGTCTCAATGACATTGCGGCAATCTGGGAGGCTGTTCTGAAAGTCTCAAA -GAGCTTGCTCAAGGCCGAAGATAATTTCTTCGATCTAGGCGGCCATTCTTTGTCCCTGGC -AGATCTGTCATCCAAGCTTTCCAGACGTTTCGGCTTCCGTGTTCCTATTCCTCGTCTCGC -GGAGAACACTACTCTTTCCGGCCACTTGGAGACTGTTCGTGCTGTCAGAGATGGCCACAC -CGCAGAAGTACAAGCAAATCTGCCGGCGGTCTTGCTTTCCGATGCCACACTGGACGAGGA -CATCAAGCCAACAAATACCGCGATCACCTCAATTACTTCGGCCGACACGGTGCTTTTGAC -TGGTGTGACTGGTTTCCTAGGCGCTTTCCTGCTCAATGATTTGATTGAGAACACCTCGGC -CAAAATCATCTGTCTTGTGCGTTTCAGCGACCCCGAGCAGGATGATCAGGCCGGAGGTGT -GGCTCGAATCCGTAGGAACCTCCTAGACATGGGACTCTGGCGCGATTCTATTCTGGAGCG -TCTCGAGATCCTGCCTGGTAACCTTTCTCGGCCTCGACTGGGTCTAAGTCCGGAAGAATT -CGACAATATCGCGGCTCGTGTCCAAGTGATTGTGCATGCTGCAGCCACGGTCAACCTTGT -CTACCCCTACGCTGCACTTCGGGGAGCAAACGTGGGTGGAACCAGAGAGATTCTTCGTCT -GGCCGCCAAGGGTGGCGCGACTGTGCAGTATGTCTCTACCAATGGTGTCTTACCACCATC -TGGTGAGAATGGCTGGCCCGAAACTACCACCCTAGACGTGGAAGATGTCCCCACAAAGCT -GCTCGATGGCTATGGTCAGACAAAGTGGGTTGCGGAACAGCTCGTTCTCAAGGCAGGTGA -GCGGGGATTACCAGTCAAGATTCACCGATGTGGTACCATCAGTGGACACAGTGTCACAGG -ATCTGCCAATGCGTGGGATTTGCTCACTGCATTGATTGTGGAATCCATCCAGCTTGGATA -TGCCCCAGACGTGGAAGGATGGCGTGCCGAGATGACCCCTGTGAACTTCGTCAGCAAATC -TATTGTCCATCTTGCCACCCAGACTCAAACAGACCAAACGCTGTTCCACCTCGGAGATCC -TACGCCGGTCGATACTCGATCAGTCTTCAAGGACCTCAAAGAGCTTGGTTATGAGACACA -GCCTCTCCCATGGGATGAGTGGGTGGCCCTGTGGTTCGAGAAGAGAGGTTCAGCTAAGGG -TGGCGATGGTTCGTTTACCGTCGATATTCTTCGCAGTGGCATGCCGACTGTTGAGTTCCT -CCGAGACATCGTGGTCCTTGATAACGCTTTAACCAGGCCTTTCCGGGCCGTTGTTGAGCG -TCCCAAGGTGGACAGCCTGTTGCTAGAGACCTACACCCGGCATTGGTTTGCTCGAGGGTG -GCTACCCCGGGCCCCATCTCGTCAGCATGCCCTTAACCGACCTACAGAGGTTGCCAAGGG -TCCACTCAGCGGCCAAGTCGCTGTGGTGACTGGTGCTTCTTCGGGCATTGGTGCGGCGGT -AGCAGCCGCCCTAGCCAAACAGGGTTGTGCTGTAGCACTTGGCGCTCGACGCTTGGATGC -ACTCGAGAGCACCAAGCGCAAGGTCGAAGCTCACGGTGTCAAGTGTATCCTCCGATCAAC -CGACGTCACAAGCAAGACTCAAGTGGAGGCCCTCGTCCAAGCAGCAAGTGAGGAGCTCGG -TCCAGTCGACATCCTGGTCGCATGTGCTGGTGTTATGTACTTCACCATGATGGCAAACAC -ACAGATGGACGAGTGGGAGCGCACAGTCGATGTCAACTGCAAGGGTCTCTTGCACGCTCT -CTCTTCAACCGTTCCCGGTATGCTCTCGCGTGGCCGCGGCCATGTCGTCGCCATCTCCTC -CGACGCCGGACGTAAGGTCTTCCCTGGTCTCGGTGTATACTCGGCCAGCAAGTTCTTCGT -CGAGGCCACCCTCCAAGCCCTCCGCCTCGAGACTGCAGGCGCTGGCTTGCGCGTGACGAG -TATTCAGCCCGGAAACACTTCCACGGATCTGCTGGGCATGTCGACAGATGCTGAAGCTGT -CAAGAAATTCGGAGAGCCATCCGGAGCGAAGATCCTCGATCCTTCCGATGTTGCCAACTC -CATTGTTTACGCCCTGACTCAGCCTGAGCATGTCTCCGTCAATGAGATCCTTGTTGAGCC -TCGCGATGAGCCTATTTAAACGATACAGGTCATTTACCACATGTTTTATGAGAGTTTCTA -TGATTTGCTGTTAATTATCTGTTGCTAGTTGTGATGCAGGGGATTTGATTGGATTCACAC -AAAAATAGACGACCATATAGAGGTAGAAAGGATATATTGTATAGAGTATTGAATACTGGA -TTTCCACACGCGCTTCCCCCATGCAACTCAAGCCCATATGCTCTCATTCCACCACCTGGT -CCGCAAAAACCGCAAAAACCGCAACAGCCAGGTCACATCATCAACGCATCCAAAGCAGCC -GATAATCCCAAAATCCTTTTACAATCTATCTTCCATCATATGCTCATCTTATTCATGCAA -AAATCCGGAGAGGGTAGTTGGGCAAGTAAAACCCAGGATCGTTAAACCGCAGATCCCTAT -CCAAAGCCGAAATAGCCTCAATTTCCTCATCCGTCATATCAAAACCCAGCACATCCAAGT -TCTCAGCAAGACGAGTCCGATCATTCGACTTGGGAATAACAGCAATACCCCTACACGTAT -CAAGAATCAGCACCAAACTCCCAATCTCCAACAAAAAAATCTCGAACCCACCTCTGCGTA -GCCCACCTCAACAAAACCCTCGCAGACGAAACACCATACTTTTCCGCAACAACCCTAACA -GGCCCCGCCTCAAACAGCACCGGAATCCCCCGTGCCCTCTCCGAAAACGCCGGCGGCAAC -TCCACAAAAGACTGAGGCCCGAAACTCGAGTACGCCGTCACGGCAATCCCATTCTCCCGC -GCCATCTGGATAAGATCAGACTGCACCAGGTAGGGATGGTGCTCGATCTGCAGGGCTGAA -ATTGGGTGACGCGTATACGTTTTTATGTCGTACAGGCTCTGCGCTTGGAAATTGCTCACC -CCAATACTGTGTGCGATTCCGCTGTCTACGAGGGTTTCCATTGCTTCCCAGGTTTCGCGG -ATTGGGACGTTGTCCGGACTGACGGTTCCTGATTCGTCCATCCACCATGCCTGGGTGTAG -TAGTTAGCGGGACTGTGAGGTCATGCTTGACTGTTGGGATAGTTGGATTTGAGAATGCAG -GGGGCTTACAGGGTATTCCCTCACTGCCGGGTCTATGTACTTCAACGCACAGGGAAAATG -CATGAGGAATAGATCGATGTACCCCAGACCCCAAGAGTCATTTTGTTTCTTGGCCATTTC -GATGGCATGTTCGCGGCGGTGATAGTTGTTCCATAGTTTTGTTGTGATGAAGATCTCGGC -GCGGGTTACGAGGCCGTCGGCTATGGCGCGCTGGATGCCTTTGCCGGCTTCTTTTTCGTT -CTGGTAATCGTATGCGCCATCGAAGAGTCTGTAGCCGGCTTTGATTGCCTGGGATATGTT -AGTGTGTATTTGGTGTGAGTTGGGACTGGATATTGAGGATGTACTCACGTTGTATACTGT -ATCCGCGGCTGTCTCTTTAGGGACTTTCCATAGGCCGAAGCCTACGAGGGGCATTTCTTT -GCCGGAGGCTAGGGTTGTGGCTGACTTTGTCATTATAGGACAGGTAGATTGTTGGGTTGT -CTTTTTTATTGTTTTAATTGAGACTTCTTTGTTGTTTAAATACTGCAAGCTGCTTCGTGA -TGCTGGTGGTTCTAGTTGCCTATTTCGGACGTGGGTCCTTGTATACCCAAAAAGGCATGT -TGTGGCATCGGAGCCTCAACTTGGGTGGATCCCAAGAATCTTTCCTTTTTTTTCTACATA -TGAATACATTTTTCCGACATCCTACGAGATGGAAAGAGTGACCTCGCCGAGAAACCGTAT -CTTGACCTCCGTGATAATTATGTATTTTGCGCCTATAACTGGAGATGTCGTGTTCGATCT -CATTTCGTCTCTTCATAAATTCTATGTCGTACAATTACAAACTAAACCTCTTTAGTTCCA -ATATCAATCTTAGACCCACTGTGACTGCTACCAATACTAACAGTGTCATAAGTGGGATGC -ATCGACAGCGGATGCAGCCCCTTATGTCGAATCTGCGAGCGCGGCGTGATAGCCACCGGA -GTGTTCTCAACAACAAAAGAGCGCACCGACTGCGTATCAACAAACTGTGTTGTAAAAGCC -TCCTCGACCCCCTCAAGAGAACGACCCGCAGTCTCAGGATAAAACGCCCAAACAACAACC -ATCGCCACCCAATTGAACGCGACGAAACAATAGAAATAATTGGTCCCCATATCTGCCAGT -GCGAGGGGTGAGCACTGCGTGAAGATAAGGCTAATACTCCAGTTCGTGGCGGTCGCTATG -GACGAGCCGCGCGCTCGAATGGGTGTGGGGAAGATTTCACACTGGTAGATCCAGGAGATC -ATGGCGATGGCAGTGAAGAAGAACGAGAAGACAAAGAACATGGCGATGGCAGCTCGCAGC -GCGTGTGGGTTGGCGTTGGGGTCGTTGAAATCTATGTACCGCGCCAGCGTGGCTTCGATG -CACAGGGATGCTCCCATGCCGAGCAATGATGGGATGAGTAGTTTCCGGCGCCCGATTTTG -TCGATGAAAAGCATGAATACAGTGTTCCAAAAGAGTGCTAGTGCACCCCAGACTCCGATT -ATCATGATGGATGTTGATGCTGTAAGGCCTAACGTTCTGAAGAGCCGTGGGACGTGGTTT -GAGATTACGGTTGTGCCTGAGCACTGAGTGAATACTTGCAAGCCAGAGGCTAGGATAATC -CTGTGTCGCCAGCGCGAGGAGAGGAGGAGATGACGCCAGGATCGTACGGCGGAATGTTTC -TCGGATGTCAGGCTCTCTTGGATTTGGAGGAGCTCGTGTTCGAGTAGGGGGTTGTTTGTT -GCGTTTTGGTTGAGATGGAGTCGGGCGAGGACAGAATGGCCTTTTCCCTCTCTGCCCTTT -TCGATGAGCCATCTTGGTGATTCAGGAAGGAACCAAACGCCACAAGTGAGAACGATTGCT -GGCACGGCGCAGAAGGAGAGGGGGAAACGCCAGGAAAAGGCACCATCTCGAAGAGAACAC -CCGTATCCCACCCATTGCTTGAAACATGTCAGTTGTTTCCTAGCAAAACTCGAGTGGTGG -GTATACTTACAGCAACCACGAACCCTAAGGCAATCATCCATTGTTGCATACTAGCCAACA -TCCCTCGAATCTGTGGTGGGGCGACCTCGCTCTGACCTCATGTCAGATAAAGCTTAGCAG -CAGGAGCGTGCGTATTTGACCTACACAATAAACCGGAATGGTAGCAGACATCTGCCCAAT -TGCCAGACCAGCGATGGATCGACCGGCGACAAGCATCGCGATATTAACCGCGCCTCCCTG -GAGGCCAGCTCCCAGAACGGCAAGCAAGCCGCCGATGAAGAGGGCCATTCGTCGGCCATA -GTAATCCGAAATATACGGGACGAGGACCGAGCCCACGATTGCGCCACCTGATCGCGAAAA -TTAGATCCCGATCGCGGCAAGATCTGACGAGTATAGAATCTCACCGGTGTATGAGGAAAT -AATGACACCAGCAGCGGTGTCAGATATGGTTGGAGATCCGAATTGGCGCAAGAAGGCCTC -CTGCTCGAGAATGGACGAGATTACGCCAGAATCATAGCCGAACAGGAACGATCCCAGTGC -GGCGAAGGCGCAGCAGAAGGTGACGAGGTAGAAAGTCATATTATGTATCGCGTTGCTCCA -TAGCCTGTGAAAATTGGCGTTCAGAGGACAAGCTGATCCATATAAGGCGTAGAGAAAGTC -GCGTTGTTATACATGCCGATCCACCCTGGGGAAAGTGGGACGTGCCACAGGGGTGGAATG -CCATGCGGGATGCGAGGGATCGTCAGGTTCGTTTCATGGATCAATGACTATATGGGGATA -GTCAACAGTCCAGTTAATACCAAGCAATTATGACGCGACAATTTGGTACATAATTTTACT -TAAAAGAGCTGTCAATCTCTTTAATGAGGTACCTCGGGAACCCTAATCTCGGGAAGTCGA -CCGGAACAGATTGTCAAAGGCTCTCGATTCTACTCGATCATAGGGTGATAGGTCATTCAG -TTTGATGTTTGCATTCCCCATGTCTCGCGTCACAGGGCTGTCAAAGCATTTTGACGCATG -CTGCCTTGTTTTGAAGACCCCTCCATATGGACCCCCTTCCTGATGACCGTGCAAGGGTTC -CAGGGCCCAGGACTGTATTTATGCCCTATACTTTGCATTTTGTGCTTATAATGCATGGAC -ATAAAAAAATGGAGTCAAGTCCTCTGCGGTCATAATCCTCCGTACAGAATACTCGAGATC -GAACGAAACGATTAGATCCAGGTGATCATCTTGGGCTGGGATTATGCAGTATGCAAGACC -TAGTCATCAAATCTGGTCATTGGTAGGAGTATTCTGCACGTGTCCATGTCCATACATGTC -CATGTAGGTACACCGGGAAAACAAGGAGAAGCCATAAGCTGGGCTATCCACAGACTCACC -CGACACGAGGCGGTTCAATTCCACAGAAGCCCTCTGGTGTATGAACATAGAGTACATGTG -TACGGAGTACTCCGTCTACCCGCAAGATAGTCAAAGATTAGATAGTCGGCAAATGTGAAC -AGAGCCTAGACAAATCAGGTGGAGACGGGGACCCTGAACAATGTCATCAAGTTCTTATTG -GATGGGGAAGTGTCAAAAATGCCTCAGTCCGGGAATTTTTTCTGGAAGGAAAACATATGC -ATGGAACATGATGTCGGTGTCGTGAAGCCCCGATATCCACCTCTGGGCACTCACAAGGCG -TACTTATGACTAAACTAAAAAACTTAACTCTTGTCCTATTCTTTGGGACTATTTTTTCTC -TATTTTTCACTATTTTTTTCTTTTTTTTTCTTAAGATAATATACGCTCTGATCCCTAAAA -ATCCTTCTTTTTGATACACCGCTCTCGGGAATAATCATTTCATCATCGTTGATTATCTCG -ATCTCCCCCCCTCCAAGCTTACCCTTTACCAGTACTCCAGATCTTGACATTCTATTTCAG -GATTTTCAATCGACATCTAATCTTGCAGTGTCTTTGTAATTTTGCTTGTCTTATAATAGG -GTGGTCCATGACATTTGGTGAAATAAGTGATCACTTCAGATCGCCCGCCAAGGCCACCAG -TTCTCTTTACATGAATCCGTGCACTCTACGCACCTCAGATATCCAGTCATGGCGACTACG -ACGAGCCCTCATTCTGGCGAGAATGAGGAGGATTCCGCACCCATGAAAGATTTCAAGTAT -TCGCTTGACGACCACCATGACTCTTCCACCCATGATGCCTCCCCCACAGATTCCAAATCA -CTGCCTGCACAGAAAAGGCGACGAGTCGGCCGCGCATGCGACGAATGTCGCCGAAAAAAG -ATCAAGTGCGACGGCAAACAGCCGTGCACTCATTGCACAGTCTATAGTTATGGTACGCAG -CCCCTCGATCGAAATCTCGGAAAAAATTCATGCGCCCTAACAGTCAAATTGTAGATTGCT -CGTACGACCAACCTTCCAACCGCCGTCGAAATCCCGCCCCGCAATATGTTGAGGCTCTTG -AAAATCGGCTGCAAAAAGCCGAAGCGCTTCTTCGAACCGTTCTTCCAAATGTTGACCTCG -ATGATCCGCAATTCGACATGCATGCGACAGAGCAGAAATTCGCAGCCGCACAAAAAGCAA -AAAAAGCGGCCGAAGATGTCAAACCAATAGTTGCACAAGATACAGCGCAAGAAGGCGGAG -ACGAGGGACTACTTGAAACCATGGTCGATAATTCGGGCTGCTTGGATCGGGATGACCAGG -GTCATTGGGATTATCACGGCCACACGTCGGGCGTTCTCTTTGTTCGGCGGCTGCGCAAAC -AGTTGGGTGCTGCAGATATTACGGGTCCCATGGCACGATCGCGCTCCTCGATATCTGCGC -ATATGCTGGATAGTCCAAAGTCCATGTCTGAATCACCTCAAGACACAACATTACCTCCTA -CACATGACCTGCCTCCGCGAGAGGTTGCTCGCCGCTTGTGTCACAATGCAATCGACCATG -CCTGCTCTCTGATGCGATTTGTGCATGAGCCCTCCTTCTTTGCAAGTCTGGAGCGCATCT -ACGATACTCCCCCGGAACAATTCACGAACGAGGAGAATTCCTTCTTGCCGCTCTTATATA -TCGTCATCGCCGTCGGCTGCCTGTTCTCGGACGATGGATCAGGACAAGGTACCCTAGATC -TGTCTGGCTACGAAGACGCAATAGGCCAGGGGTAAGTCTTCGTAAATCTGCCTCGATGGT -ACCGACTAATTTCTTTGATAGGTTCCAATTCTTCAAGGCCGGTCGGCAACTGCTGGAGAT -CACCGACTGTCGTGACTTGCCTTCTCTCCAAGCAATCTGTTTCATGGTCCTCTTCCTCCA -ATCCTCGGCGAAGTTGAGCACATGCTACTCATATGTGGGTATTGCACTTCGTTCAGCACT -GCGATTAGGTCTCCATCGCTCTGTCGCGGCAGACTTTAACCCCATTGAACGAGAACTTCG -AAAGCGCATCTTCTGGGTCGTCCGCAAGATGGACGTCTACGTGAGCACGCTCTTGGGTCT -TCCGCAAATGCTGAGTGACGACGATATCGATCAAGAGTACCCAATGGAGGTGGATGGGGA -TTTCATTACTTCGGAAGGAATCACTAAGCCCCCGTCTAATTATACACCGCTGATGGCTGG -ATGCAATGCACACACTCGTCTTTCCAACATCATTCTGAAGGTGGTGAAATATATATACCC -CGTCAAGAATGCTCGCTATCGCTCCAAGTCAGACCAGCGATACATGGTCAGCCACTCAAA -AATTAGGGAAATCGAGCGAGATTTGCAAAATTGGATGGAGGAATTGCCTCCTGCTCTGCG -GCCGGGCACAGAAGTGTCGCCCCAGCTAGAACGGTAAGTCTCTACAGTACTGACGTTTTC -AATCAATTTGACTCACATTTAAGTAGTGTGCGACAATTGCTGCGTATTAGCTATGCTCAT -GTGCAGATGGTCATGTACCGCCCGTTCCTCCACTACGTCTCTAGTGGTTCTCAGGCGCGT -GGCGTGGACAAGCGGTCATATGCCTGTGCAGCTGCCTGTGTCAGTGTTTCCCGAAACATT -GTTCACATCACGACCGGAATGCAAAAGAGAGGACTACTCAATGGATCATTCTGGTTCACA -ATGTACACGACCTACTTCGCTATCCTCTCCTTGATCTTCTTTGTCCTTGAGAACCCGGAT -TCGCCAACAGCCAAGGATGGCGTCCTGAAAGATGCCATGGAGGGCAAGCATACTTTGGCC -GGTTTGGCCATGAAGAGTCTCGCTGCCGACCGTTGTTCCCAGAGCTTGAACGTAAGTACC -CATATATTCCAATCTGGAATCGCCTGGACTAATTATATTGCAGTGCCTTTTCAAGACGTT -GCCCGATATGTTGAAAAATCGCCAGAGCTCCAAGACTCCTGTCAACCTCAAGCGGCCCGC -GCCAGTCAACACTATCCAGCCTGAGCCGAAGCCTACCTTCGAGAAAACGCTGCCACATCG -GTCAAGCACCTTCCCCACGCAAATGGCGTCCCAGCCGGTCGTTACCGATAACCAGAACCG -CCGACAACGAAGCTTGGACAATGTCCAGCCTATCAGCAATCGCCCTACTGATATTAATCA -CCAGTCCACATGGGTTTCAACCACTCCCGAGTTGCTGACAGAAACCATGACTACTCCGGA -ACATATTTCCCCATCCGGCATGACGCCCTCCATGCCCAATCAAGAGCCATCAAGTCTAGC -ATGGGCTCAGCAATTCACCAATCCGTCCAATCTTCCAGATCTTATGCCAATGATGTTCCC -ATCGGACGATCCATTTGCGTACCCAACACAGCCCATGTCCACCTTAGAAGACGATCACTT -CCGGCACGACAGTGCCGGCATGCCTTCGCAGTACCCCTTCGATTCAACACCGGGCATGGG -GCCCACCACACCCGGCGATCCCTCCAGCGCAGGGGTCTCCACTCCAACCTATGACTTCAC -AAACCTCCCGAATTTCCCTCTCGCAAATTCCTCAGGGATTAAGTCATCAGTGCCAAGCCA -CCTCCGCCCCTCCCATTCGCGAACTTCATCCCGCATCCACTCCCCGATCTCGCAGAGCCA -GACCCCGGCAGAATTGATCAGTAGTCCAGACCTGGTCTCAATCCCGAATCAGAACTTCGT -CTGGCAGGGCTATAATTTCCAGCCCAATAATCCAAATGACATGGCAACCGAGTCTGCACC -ACAACAGCCAGAAATTCCCGGTCCAAATGCAGTGCCTGATTTTGGTATGGGCATGGATGA -GAACATGGGCATGAACACGGATCTGGGGATCTCTTTTGATGATCTTTTCGGCAATAATGC -CGCCTATCGGCCTGGCAACGGGGCGTCTGCTGAGGATTGGGCTCAATGGATGAATGCCAA -CAGCATTTGACCTTAATTTAGTTTGCCATCTTTGTCTTCTTTGTTTTTTACGAGGATATG -CTATCATGAAAGCCAAAATCTTGGTGTAAAAATTGTTGATACCTGGTAGGCGTTTTGAGC -TGGGGTTAGGAGTCTCGCTTTTCTTTTACAATTAGCTTTATACATAAATGGATCCGGATG -TCTATATCGCTTGGTGCTTTTCTCTTTCTATTCGGAGGTCGATATCTTGCTTCATTCCAA -GTAACCGCAAGGTAAGCGATTTGAGGGGATATTGTATGTGCTTGGAGTCGAAGCAAGTGC -TGGATCAACCCTTTTCCGGCAAACAAACATACCTGCCCAACCAGCTCTCAATTCCAGGTC -TTTTTCCCTCGCTAGTTATGATCCTGCATACTCGACAGGTCACTCGGCATGTCTTCTGAA -ATCTTGAAATCAGCTTGGAGTAATCCCCATGTTCCATTCGCGCTCTATCCCTCTCACGAT -TGAATTCTACACCAAAAGGCTCAGTTTTGAACTCGTCAGCGTCAAGCCCGAAGACTCATC -CGAGGAAAGCTCGTCTGAGCAGACATTCTGCTCCTTGTTCATTGGCAAAAAAGTAGACGC -CAACTTCTATTTTTCAAAAGCAGCTACAAATTTCACTTCGTCACAGGCGATGGTTGCACT -GAGAACCTCGCAGTTGGATGAATATAATAATCATCTGCGATGTCTCGATGATGTTGAGAT -TGTGGAGGATGTCAAGGATACACCGTGGGGGTATAGACAGTTCTCAATCAAGGATAATAG -TGGGAATATCTTGACTTTTTTCAAATTTCTGGAGGGAGGAAATCCGGGACAAGACTAAAT -TATACTTAGCAGCCGTGTCTGATTCCAAGTACTAGGAGTGTGTGCACTGTGGTCACGTTG -TCGATAGTATACTTTATAAATTCACAACCAAAAGAATTCGAGCATGCGCATAACTTCGTT -CATTCATCAATGCTTACTAGCTACATATCTATAGCTAATGCCAATATCCCCTGTAGGGAT -CTTGGGGAGATTACTACTTCCATGATCCTAAAAGTTACCCGTAGACCCTTGACTTGAGAA -AGTAGACACCTCGTCCCAGCTAAGTGCATTGATCTGCAACGGCATTATAGGTAGAACCGG -TACAGCCGCTTCCATCAGAGGTACACTGCGCATTGCTTGTCAATCACAGGCTCTGTGTAT -CAAGGAATAAATATAGTTTACCTGTTCATGAGCGGGACAAGCCACTACGGCGATTAGGGG -AGCTGGACCGTCAGGATTCAAGGTACACTGTTGAGTGGACTTTGAGCATCTCTAATATCA -TATTAGCCAAATAACCCATCCAAGTCCTCTCGTGTTATCGCTTACACCACCAACAGTTTC -TGCCGCTGCGAGAGTAGCTCCGAGGGCCAGGAGAGTCAGGAAGTTCTGATAGAAATGCAT -GATAAAGATTGAATGTATTGTAACAGTGGTCACCGAGCAAATGAGTGATTTGGTCTCTCG -ATTTGTCTGAAATATCTGTGGTTTATATATTCATCGGAGAACATACAAATATTCTCTTGG -TGGCCTTGTATTACCTTGACCAGGATCAGCAGTGGCAGTCAGAGTCCGAGATTTGCTCTG -TTCTCTATCCCTGATCCTTGCCCTATCTCTTTGCGATATACATCACATATCTACCATCTA -TATCTGTGTCTGCGTTCTCTGGATATTCTCCTTTTGCTGATGTTCACGAAATGCTGTCCA -ATTACCCTCTTGCATGAACTACTCCGTTCCACTGCGCAAAGCACACGGTTCTAGGATTGT -TGAGTCTAGGCGGGTCGGAAATGGTGGAATGATTGTCAGCATATCGGAGGCATAGGGATA -AATCAGGGTGTTGGAATCACCCAGGACTTTCACTTGTTCCCTGTCGTTTTTCTTGACAGA -TGACCGCATCCTATTACTGTTCAGATTCCTCTTATATCTCTACCACTATATTGTCTCAAC -TGTGTAGAGATTTACTTGAGAGCAGACTCTTCGAGTTTTGCGGAGAGTATTACCGTTTTA -GGCAATGAAAAGGGAGTGAGTATACTGTGGGGACATTATTAGCAAAGGTCTTGTCACGCG -TCATATCTGATGATATGATTCATATTCTTGGATAATCTATATGCGTCATGATGGATCTGT -TTATAAATTCTCCCCACATGTGGGATTTGGTAAGGTCCATCCATGCGACTTCCGATATAT -ATATTGTAGAGTCTGACTAGGCCTAATTGAGTAGTGACTAACTGTTTCTGGCAGTTCAAC -CTAGTATTGAAGGGATATGTTCCTGGAAGGAATGAATAGTGAATTATTGGTTCATACAGA -TTTTCCAAATAATAAGTGAATGATAGGTTAATGCCAAGATCACTAATTAAAAAGTGACAA -ATGTATTCATTTACAACCATACAAAGATTGCCGAGAAGATGGCCGATGGCCGAGGAAAGC -AGGTACGTGTGGAATTACCCGGATATACAGCTCCGCGCTTCCCAAACCAGCCAATTGTCG -GTAGAGTAGTGGAGATATATGTATCCAGGCCATACTTATGGACAGAAAACATGGCCGCAG -CTGACATGGTAGCGGTTTGCGTTCTTGGCCAATCCCCCATTGTGAGTGTTAGAGCATAGA -CCAGACGACCAAGAGCAATAATATGACCAGGACACCACAAACGAGACAAATATAGTGATA -ACGGATCAGTTAGCGTTTCTGCCCCAAGTCCTCATTTTCGGTGCTTTGTGTAGAATCCCT -GCAGGGACTCGGATAAATCGCCGAAAGGTGGCTTGATTTTCATCCTCCAAAATCACATCA -TCTGTTAACACTTACGCTACAAATCCAAGTTGTCCCTCTGTAATTCTCCTCGCCATCTAT -CCAACCATCATGGCCTCCGGACTCTTTTTCGACCCCCGTACCCTCATTTTCGACCACCGT -ACACTCCTCCGTACCGCCCCACTCATAACCTCCACCTGTACACTCTGGTACTCGCTCGAC -CAGGACTTCTTCCTAAAGGTCTTCCTCCACCCAGACCACCGCACTCGCAGCAATGAGATC -CTGCCATCCTATTTCCGGGGCTTCTTCGGCTACGGTACCGTACGCGTGCTGGCCCTTCTC -ACACTGACCTTGACGGGGGGTGGATATAATATCTGGACAGAGCGACGGTCGGGATTGGGT -AGCTCGGCCTCCCTACCTTGGTATACTGCTGGCACAATATTAGCTGCCAGTCACCTTCTC -TTCGTGCCCGCTGTTGCACCCAAGGTTCAGGCTGTGATTGAGGATGCGTCGAAGGGCTCG -TCGACGAAGGATCTCGAGGGTTGGCTGACTGTTCATCGTGTACGTACGTGGACGGTTGAT -CTGGCTTCGTGGGTGTGTTTTGCGATTGGAATGTCAATCGTTGAGTGAACGATGGATACT -TGATGGAACAATAGAACTAAACATGTTGCATGAGGAGAGCTGGTTGTGCTTTGGCTCAAT -AGATCATACAGCATAAGTGTGCATACTACAATGTATATACTCAACAAATCGGATCGGTGC -ATTTGATGAGATTTGAATAAATTACTCTAGTATCGACTTCTCACAAGACAGTCAACTATG -TTTCGGACAAGTAACTTGTCTGTTATATTTGAAGTACTCCGTACTCATAGTGAGCTAGCC -CATAATCTGTTAGACCCACATATTTCGACACTCATCTCAACGTTCAGAACCTCTTCGCAG -GGTGGTCCTACGCTACACCCTTTCTTGATCATATTCATACTTACCCATGGGCCTTTTTTC -TATATAAAAACGGATCAATTGTAGCATCCGCAAGCCTCTCGGTCGCATGGAGAGCTAAAA -GCCCAGATCTCCCATTGTACATTTCCTTGGCCTGCATACCCCCGTGACTATTTATAGCCA -ACTCGAATGGCGTCACTTCCAAAAATTTCTACCATCTTCACCTGGGCCGTCTATTTGATC -CCTGTGTATATCTTCCTCTTCGACCCGCTAGTGCGCGGGTTCTTCCCGAGCCTGCTCGCA -CCACCCAGTGACAGCGACAACCTATTCGACGAGTCCGGCTCACCAGGACCGGGAATCAAT -CTCACCGACGACAGCTTCATCAGCCCGGAAGATGGTGTACCATTTAGCTGCCCCAGCGCA -GAGGGCTACAGCGTGCATCTCCTCTCGCACGAACCGCTGATCATTTACATCGAAAACTTC -GTCTCCGAAACCGAAGCAAACCACATCCTGGACATGAGGTAACTCACCAATCTCAACCCC -AACCAAAATCCCCCGAGGAACGACGAATTTACACCATAACACACACAAAATAACCACAGA -CCAAGCTAACCGGAACAAACCTCTCCCCAATCCCAGTGTAAACAAATACACCCCTTCAAT -CGTCTACGACGGCCAAACCGAACGGGTAGACCCCAATAAACGCCTGTCCGACCGCGCCCT -CCTTGACCGTGACGACACCGTCCGCTGTCTAGAAGACCGCGCCCGCGCCTTCCAGGGCTG -GCGTCCGGACCTGTACATCGAGCGCATGTGGGCGCAGCGCTACAATGCCTCCGGTCACTA -CCGCCACCACTATGACTGGGCCGGGTCGTTGGCGCGGGGTGGGGACCGTTTCAGCACGTT -TATGGTATATCTCGATGCGGACTGTGAGGGTGGGGGGACGAATTTTCCACGGTTGCGGAT -GCCGGCTGGGGAGGGGTGGTGTCGGTTTTTGGAGTGCGAGCAGGGGAATGGGAAGGGGGA -GATGGGTGTCACATTCAAGCCTATCAAGGGTAATGCGGTGTTTTGGGAGAATCTTCGGGC -AGATGGCACGGGGTACCCTGAGACTTGGCACGCTGCGTTCCCTGTTACAAAGGGGACGAA -GGTTGGGTTGAATGTGTGGAGTTGGTACCAACCAAAGCGGAGGAATCGGCTAGGAAGGTG -AGATGGTCAGAGAGGCAGAAACGTGCACGTGGGATGGGTTGTTATGAGCTTGGCTGGCCA -AGCCACTGGCTAAGTCATGTTCCAAGGAGCATCTGGAATGCTTGGGCGAAGCGCTTCTGA -AGAGCTTAGTCAGTATATTTGTATCTCAGGGGCGTGTCAGGAGATGTTCCAAGGTTAAAG -AATAAAATTAGACACTAGACTATGACATGTAGCGTATATAATATGAACATCTGTGTGGTT -GGATGTAAATGCCTCTCCATACTGACCCCGGAACCCGAAACTGGCTTATGCATACCGTAA -GGACACCAGGCAGCCGGATTAATCAAATGACTTGTCACACAAGAAACTTTATACGAAAGG -GATGTGAAATTGGTACCAAGACACCAAGACTCCAAGACTTGAGTCTTGGTGCCCGGAGCG -GGAATTATTGGCAAGAGGAATCACTTGGCTGCCAAGAAGCATTGCCCATGGGATGTTTTC -TTGGAAGGTCGGCTTGGCGAAATGAACACGCCACTTTGTTTTCATCTGAAATATGGAATT -CAATTCAACTATCCGGGGAGACCGTGTGTGTTTTAAATTTCCACTCAACTCTTCAGTTTT -ACCTTGAATTTACGGTATTGAACACTAACCTGATTTAGGGCATCCGGATGTACAAATTTC -AAAAGTGGTCGGAGTTAGCCCGTATATGTTAGATTACCGTACTGTACCCTACTGTGCTAC -GTGTTAGCTCTGGAATGCTCAATACACAGCCCCTCGTTCTTCCTTCGGTCCACCAGTCAA -AATCCATGGCGCCAACGCCTCTCGGGTCTTGGGGCCCGCACCGACTCTTGGCCCGGTCTT -GGTCTGTTCGTTTCTCTCCTTGTCCCTCCGCCAGGCTTAGCAAAGCGGTATTTTCTGGGC -AAAGGCTCGGCTCCTTTTCCTCTTCTGCGGAGANNNNNNNNNNNNNNATCCCCCCCCCCC -CTATTCTTCTCTGGCCTCTTTGTATACATATAGGTTCATCTCTATTTGATCTTCAACTTT -GTTTTCGTTTGATGTTCTCTAGGCTTTCCGCCAACAACAGATCAAGTTTCTTGGCATCAT -GACGGAGAATCATACCCCTTCTACTACACAGGCGACGTTGCCTGCGCCGGTCTCTGAACC -AGCGCCGATTCAAGCAACTCCAGCCCCTTCTGCCTCTGTCACCGCGACTGCTGCCGCCGC -CACCGCCGCGGTGAACAGCCCCTCTATGAACGGCGCCGGCGAGCAGTTGCCTTGTCAGTG -GGTCGGATGCACTGAAAAGTCCCCCACTGCCGAGTCTCTATATGTAAGTTTGATCAAACA -ATCTCAAAAAATTGGAGAATCTGGGGAGATGATGTTAATATTCTCCATATAGGAGCATGT -TTGCGAGCGCCATGTTGGACGTAAAAGCACCAACAACCTCAACCTAACCTGCCAGTGGGG -CACTTGCAACACAACAACAGTCAAGCGTGATCACATCACCTCCCACATTCGCGTTCATGT -GCCACTTAAGCCTCACAAGTGCGACTTCTGCGGTAAGGCTTTCAAGCGCCCCCAGGATTT -GAAGAAGCATGTCAAGACCCATGCGGATGACTCCGAGATCCGTTCCCCCGAACCGGGCAT -GAAGCATCCTGATATGATGTTCCCCCAAAACCCTAAGGGTTATGCTGCTGCAACACATTA -CTTTGAGAGTCCGATCAATGGCATCAATGGCCAATATTCACACGCGCCACCTCCTCAGTA -CTATCAGCCACACCCCCCACCTCAGGCCCCCAACCCGCATTCCTACGGCAATGTATACTA -TGCCCTGAGCCAAGGACAAGATGGAAACCACCCGTATGACCGTAAACGAGGATATGACGC -GTTGAACGAGTTTTTTGGAGACTTGAAGCGCCGCCAATTCGACCCTAACTCCTACGCCGC -GGTTGGTCAGCGTCTGCTGGGTCTGCAAGCGCTTCAGCTTCCCTTCCTCAATGGCCCTGT -ACCTGAATATCAGCAAATGCCTGCATCTGTCGCGGTCGGCGGCGGAGGCGGTGGTGGTTA -CAGCCCAGGTGGTTCCCACCCTCCTGGTTACCACCTTCCCCCCATGTCCAATGTTCGGAC -TAAGAATGATTTGATCAACATCGATCAATTCCTCGAGCAAATGCAGAACACTATCTACGA -AAGCGACGAGAACGTGGCGGCTGCCGGCGTTGCCCAGCCCGGCGCGCATTACGTGCATGG -TGCCATGAACTATCGTGCCACCCACTCCCCTCCAACCCAACTCTCGCCAAGCCACGTTAC -CGCAACCTCCGCTCCCATGGGAGCTGCTTCGGCCCACTCCCCATCGGTCGGCACCCCAGC -TCTGACCCCGCCTTCCAGCGCTCAATCATACACCTCCAACCGCTCTCCCATCTCCATGCA -CCACGCTCAGCGCGTGTCGCCCCCTCACGAGAGCGGCCCGGGTATGTACCCGCGCTTGCC -GTCGGCCACCGTCGCCGACAGCATGAGTGCAGGCTACCCGACCGCCTCAGGTGCCGCGCC -ACCCTCGACTCTGAGCGCTGCGTATGACCACGACGATCGCCGCCGCTACACCGGTGGTAC -CTTGCAACGCGCCCGGCCGGCCGAGCGTGCTGCCACTGAGGACCGTATGGACACCTCACA -GGATAGCAAGACTCCCGCTATTCACATCTCGGAGAGTCTGATTGACCCCGCCCTGTCGGG -TACCTCCAATCCTGACCAGGAGGCTGTTAAGCGTACTGCGCAAGCGGCCACTGAAGTCGC -CGAGCGGGATGTCAACGTTGCCTGGGTTGAAAAAGTCCGTCTGCTTGAGAACCTGCGTCG -CTTGGTTTCTGAATTGCTTGAGGCTGATAGCCTTACTGCTGGATATGGAACGCAGTCTTC -GGCTTCTCCTACACCAGGACTTGATGCTATGGAAGGAGTCGAGACTGCTAGTGTACGTGC -TGCTTCTGAGGAGCCCAAGGAAGAGCCTACCAAGTCTGCAAGTGAGGGGGTCTTCTATCC -CACCTTACGTGTGGATGAGGATGAGGATGGTGATTCTAAAATGCCTTAAATTCTTTCTGC -CAATGGGTATAGCTTCACGCCGTGACGACATTTGTTTATTGCTTTTCCGCTCTACACCCC -CTCTATAAGGTCTTTTCACGTTTTAATTTTTTTTTTTCTATTTTCAAACAATTCTTTAGC -TCAAGTTTGTATGATTACATTGAACTGTTTATTTATGGAAGTTGTTTGCATCTTTGACTC -TGTGTAGAACCTTGGTTCATCCCGATGTTACTGTTCTTTCAGATCTGTCAAACCCCAAAG -AAGTAAGCCAAACTAGATCCGGGGAACGCAAACGGGCAGATCTCACTTTGATCAGATCTG -GGGGATGTGTGTAAGGAGGTAAATTAGTGTGATTGAGATGTTGTACATAGTTATTTACAT -GAGTATATGTACATCTTGAAGAACGAAGATAAAGAGATACACACACATACATAATGTAGG -ATATCCTAATCTCGGTACTCCATACATCATAGAACCTATTGAACATCCAAATGCAGAGAT -GGTGAAGTCCCACTCTCTGCACTCGGAAACATCCCAATCACACGCTGAGCACGTGCTGGT -ATTGGTATTTCGGTCCTATCTCCGCCTATCGGTGACCTTTCCCTGTATACATACGCGGTT -ATGACCACAACTCCACCAACTGTTTACGGTCATCCCATCTTCATCGTCTTTTCCTACTTG -TACACTTTGTTTTCTCCGTTTTTATATTTTCTATATtctttctctctctctctgctttct -cttctTTTTTCTATATTTTCTGGTTGTTGTGTTAACACGCTACGCGACAAGGCGGTCACC -TCCCCAAACAGACGCCATTGCGCGTGTCGATCGGCCTAGGGATTATCGATAATAACATCC -AGCCACACAATATCAACCTTTCGATTGGGGGAGTTTTTTGATTAAAAAAAAAGAGTGATC -CAGCCTCTTAGACTTGTTTACAGTCTAACGGCCTTCTTCTTTTTGCTTTTATTTTTCCTT -TCCGTGGTGGCAAACCCATGCTCTGACGTTCTTCGTTCTCTGGTTCTACCTCTGAACGGT -CTTCCGATTCTGGGGGTGAAATTCCGAATCCAACCTTGGAAGAGTCGGCTTTGTTTTTTT -TCTGTATGTGTTTTTCGCCTTCGGCTTGCCATATTTCCTCTCTAGTGCCCCGCGCTTGTT -TGTGGAGTACTTCGACTCGGCCTTGATTGCCTACGCAACGGGAAAAAAAAATAATCATTT -GTCAATCTCCCAACTTCGACTTACAGTATATCATACCGGAGGAAACCCGATTTCTATAAG -ATATGCCGCTCACGAACCGCCGTCGCGCGCGGCGCAAGGAAATTCAACTGCAAGAAACAT -CCGATGCAGAGGCTCCGGACTCATCGCCCAGCCGGCCATCCGCAAAGAAGCGCAAGGTGA -GTTAACATGCTTGAGCGTTTAGTCTAACACCAAATATATCCCCGCGCAAACTGGAGCTAA -GACCCATTTTCTTCTGATCCGCAGGTTGACCGCCGCGCCTCCCCTCCCCGAAATGTCAAA -CACGAGTCCGGTGACGAAGCCGAGGACTCAGCAGGCGAACACGAACCCGCTACCAATCAA -GACCTGGTCGATCTAGTGATTTCATACTTGAATACACCTCGCGAGGAATTGCGCGTCTCG -CGCGACCACTCCAACACCAAGACTGAGAATACGCAGCGTATTCAGGCTTACGCTAAGATC -GCCGGCCGCAACTGGACCTATTACGTCAAAACCCTGCATGTCAATATCGGCCGCGAACCT -GATCGCGAACAACGCGCCGTTGAGCAGTCCAGCCCCGTCACAATTGCAGCTCGTGCGCTG -CCAGATGTTCATGTCGACTTGGGCCCCAGCAAATTCGTGTCGAGACTGCATGCAGAGATC -TTTTATGATGGCGAGGAGACGCCTGCCTGGCACATCCGCGTCAACGGCCGGAACGGTGTG -CGCTTGAACAATAGCATTGTGAAGCGCGGCACCGATGCGATTCTCTCATGTGGCGACATC -ATCGAGGTGGCCAATACACAAATGATGTTTGTTACGCCCGGTGACGAAGCCAATATCCAA -CCTAGCTTTATCGAGCGCGCCCAGCGCATTGCTAGCGGACAGGAGCCTGATCCGGCTTCG -GTTTCCTGGGATGCTTCGCAGCATGCCCATCCTCATCCATCGCGGAATGGCGAGGATGCG -CCACCATCTTCGTCTGGCGGGCCTTCGTTGGCGCCTGCTCCGAAGTTCCTCAAGCGCCAG -GTGACGCCTCCGCCTCGGTCGCCTGATACCGCTGGTCAGCGCACGGCTAAACAGTCTCCG -CTTTACAACCGAGGCATGATGATGGAGAGCACAGAGGAGATTGATTACAGCAAGGATTCC -TCCAAGGACTTGAAGCCGCCCTACAGTTATGCGACGCTGATTGCCCAGGCTATTTTCTCA -AGCGAGGAAGAAAAACTCACCTTGAACAGCATCTACAATTGGATCATGGATAAATACGCT -TTCTATCGGCACTCCCAAAGCGGCTGGCAAAATTCGATCCGCCACAATCTCTCTCTGAAC -AAGGCGTTCCAGAAAGTCCCCCGCCGTACCGACGAGCCAGGCAAGGGTATGAAGTGGCAG -ATCGCAGTAGAGTATCGGGAGGAGTATCGGAAGAAGCAAACACGCAAGGGTGGCACGCAG -TCATCTGCCCCGTCCTCACCGGCGACAAAAGAGCCACCATCATCCGCACGCGGGTCCCGC -GTTACCAAAATCGACACCTCGTTCTCAGCAACAGCGAAAAAGTCCCCTCCCGTCTCGTCC -CCGGGCTTCAGCTCATTTCCTGTAGCTCCGGTGGAAGCATACACACCCGAGCGTGGTTCC -CGTCTTGGTCGAGGCCTCGGCTCCGACCACCCACTGCGACACATGAACCCACGCGACTAC -GACGAACCCTCGCCACTCCCATCGCGCTCGCACAACCACACCTCCAGTATCTCTACTCAG -GTGCAGAACAGCAGTAACAACTTGGCCCGCGCATATGGCATGTCCGACAACATAGCCGGG -TCCCCTCCAGTCCTTTCCTCGTCTTACTACAACGAGGAACCCTCTTCAATGATCACTCCC -GCCCCACAACGCCAACAACCTCGTCTCCCACCCCCAAGCACAGCGCAGATTCCGTCCAAG -TTCATGCCAATGAGCTCGCCCGCGCAGTTCTGGAAGTTCGCTGACATCGGGAGTACCCCC -GCCCGCCCACCGCCGGACATGAGTCCCTTGAAGGGCGAGGTCGAGGACCGCATTATCGGC -GGCTTCCCAAGTAGTTCTCCCCCGCCGCCGAATCTCGTTAGTCCCAGTAAACCTGGGACG -TCAAACGGGCTTGGCAACGGTCGTACTCTGCCACCGCTGCAGTCGGATTCGGGGGATATG -GGACCTAATGGGAAATCCCatgtgaggaatgaagatgatgaagatgataatgGATCAGGC -TTTGATCTTGCTAGGTATGTTTCCTTTGCTGTTGATTTTTTTTACCAAGCCAGCGTGCAG -GAACTAATGCTATTCCCAGGGGTTTCGAACCTATTGGCTCATACCATCGACAACTAGGCA -ATGCTGCCCGTGCGGCTGCTACTTGATGTTGACTTTGGTTGTACAACATGTCCACTTTAC -AGACTGTGTCCATGTGTTTTACTATTTGCCCCATGTTTCATGTCTCCGCATTACGAATGG -TGTTGGCGGCTTCGGTTATTTTTATACTTTTTTTTTTCGAAGACTTGCTCGATGACATTT -TCATCAATTTGGGAGCAAAGATTGGAGTTCGAGTTTTGATTTAATTTTCTCTCGGGGGGT -TGTGGACGGCGTTTTTGTTTTTGCTTTTCTAGATCGCCTTATACGTTATGGGCTTCCAGC -CAGATGTATTATAACTCACAATATCTTCCAACTGCAAAGACATCGTACATTTAAAGTTGA -TCTCTAGATCAGATAAGATACAGATACATCATTTGACATTCGGCCAAAAATTCTCCTCCA -ACCAAACAATCTGCCTCCACCACTCCAAACACCTTGCCTGCATAATCTTCAACTCCTCCA -CCCCTTTAATCTGTCCCGCCATCTCATCAACAACATCCCCAATTCGATTAACAAACCCTT -CAAAGTCAACACTAGCCCAATTCTCAATAAACTTCTCCCTCAACGCACCCCCATCCGCAT -CCTTCCCATAATCCTTCTCCCCATCCTTACCCATACACTCCGCTGCAAACCTCCAAGACC -GAAGATAACAAACCTCAGTAGCCCACAACACAGCCAATCCCTCCAAAATAGAAACCCCAC -TACTCCCAGCAGACATGAACATATCAATATAAGCCCGCGTCGTCCGACTAGCACAGAAGA -AAAGATTCCCACACCTCTCCCCACCACCCTGAACCCCAACTTTCCCTCCAACATCCCCAC -CAACAGCAGCAGGTTCCGTCTCCCGTCCAGACTCATTGAACTGAGCCCCGGGCGTACAGA -TCACCCCACCAGAAGGCATCTGACATTCCCCCTGACTAAGACAAAGCCCATGGCCCGGTG -CGCTACTCGACTCATCTGTCGACGCAAGACAAGCCCCACCAGCACCATCACCACCCCCGT -TCCCGTCTCCCAGTCGTGGTCCTTCGCCCTCGCCACCCGCAATGCCAGGACAACTCCCCG -AGCCACTGGTGGAAATCCCAGCCGAGGTAGTAATATCCGAACCCAAACCACAAGAAGTAA -GCGTGCAGCCACCTTCTTCGGCGGAGATAGCTGTAAGGTCTAGGTCGTAGTCATTTGCAG -TTTTCTCGAAGAAGGCAAGTTCAGTGCGGATATTGACTAGGGCGTCGATGAGGACTGTTA -tggcgttgtgttctggggttggggtgtgggggcctggggAAGTTGGGTTTTGAGTTGGAA -GACGGATTTTCGAGAGGAGGAGGCCTATAAAGCGGATGTAGGATTGGGCGTAGAGCCTGT -CTTGGGAGAGCCATTGGCTTAGTTGGGATTTTGGGAGGGTGCCGCGGCCTGCGGAGGCGA -GGAAGGGGTGGTTTGTTGCGCGTTTGAGGGCATTGGGGGTGCTTGATAGCAGGTAGGATG -TTAGAGGGCCTTGGTTTGACATTTTTGGGGAAGTGTTTGGAGGTTGTGGTCTGTATTTTT -CTTTTTGGAGGGGAGATTGGAGATTAGAGTTGTGGGATGGGGGCGATAGTGGGATGAACA -TGATATGTCGTCAGTGGGACATAATATCAGAGGTATATCTAGTGCTAATAGTGACTACTT -CTTACGTGTAAACACGTACGTGTATAAACATGGCTCATAATTTAATGTTGTAAACATTGA -TATCAAATTGGCGGAGATTTCATTCAGTTCAGACCATAGAAAGCTAAAACTAACCATACA -TCCCCTGTTTTTAGAGTATTTTCCACATTTTCGATCTCACGCATTCTGTTCAAATTGAAA -AAAAAAAAAAATTCAGTCCTCTGGGCCCATTCATCCTTCCAAGGGCAGTCGACAAAGAGG -TGTTCAAACGATTCTCAGACGTGACTTCCCGGCTCCAGTTACTCCCGTCACTCTTACTTT -CGTGAATGGAGAATCGTTGAAATTGAACATCTTAGTGCCTCCACACAGGGGCATCGCCAA -TATTCTTGTCGGTGTTGTCATTGGCCTTTTCAAGCTGCGCAGCGATGGATTCAAACTTGG -GTTTGTCCTTGGGGTTGATCGTGACCTTAGGATTATCCCTGCGCTGAACCTTGGGCTCCT -CATCACTGACTTCAGCACTCTCATATCCCCACGGCTTCTCCCCAGCTACTCTCAGCATCA -GTCGCACAAGAGCATGCAAAGTGTAGTACGCATCGTTTCCCGCATTGTGCAAGTCGGGAC -ACAGAATATTCAGTATTCCTAGCAGTGCTCGAAGGCTTCGCTTCTGGCTCTCCCCAGCAT -TAACACGGTACAGCTCTGCCACGTCAAGAGTCTCACGAACGATAGAAGAGGGGTGATTCA -GATTGATGAAAATTTTGCTTCCCTCTTTCTGGAGATGCGAGATATCACCGTGGAGATCGA -AACCGACTAAGATCACAGTGCGCTCCTGATTCTTGTATGAGGGGATCTTCTCATCCTTGG -AGGGAATCATGTATGGAGGATGGAATGCAGCATCCACAACCTTCGCAATTTCAGCGCCTG -GAACGAACTCGCTTTCGCCGAATCGGAAGTCATTAGGACATCCCTGGGTGTACTCATTGT -TGACCCAGTGTCGGTACTCCGCGACACGCAGGTGGCGGGATCGAACTTGGCGTAACCACA -TCTGTCCATAATCACCGGGGACCACACCTTCCAAATCCCTCATATCCAAGGTGGAAATTC -CAACCTCAGTCAGCCATTCGTAACGCTCCATCCACTCGAGATCAACGCTGATGAAAATAG -GGGCGTCGTGAAAGGCCCATTTGACAGGCTTTAGAACATCAATTGCGTCGACCTGGCCAT -TGGCAAACGACGGCTGTGTGAAATTCGGCTGCAACGCAGGACGCAGGCCAAAGTAGGATT -GCGCACGAGCTAGGCTTTCACGCAACTTCTGAATTTGGGCATCTCGGGCCGCCTGCTTGT -GATTCTTATTCTTGATTGTGGCGATCGACTGTTTGACTTTCTTTTCAAATTCTTCGAGAA -CATGGGGCTCGACCAGCTGTGTCCAGGGTCCCCAGGTCTCCAAAGGCGAGGGAACATGAT -TCTCCAATCTTGCCTTCCGATCAGGGCTGCTGCTTCGGCCGAGAAACAGAGGCCGCGGCA -GATTCTCATTCTTGATTTCAAGGACAAGACCTTCCTTGTTGACGCCAGTGAGAGTCAATC -TGAGTTTCAGAGTCGAATTGATTTCATCGAGCAACGCTTGAGCTTGATAGGTGGGAATCA -GAAGAAATGGGGCTTGAGAAATTCCTATTGGGACAGGAAGGTAATACCTGGATGAGGGTT -AGCGTTGATATATTTTGCTAAATGATTTTAGATATCACAAACAGATCCCAGGTCCGATTC -CAGAACTTATTACCCACATAGAACCGCTCATTGACTCTCTGTGAGTTTGCTCCATGAAGA -TACTTGAAAGGGTAGCGGGCCATTGTCACCGCCGGGGTGAAGGATGCTGGAACTTGGACC -GCAGTGACCGGTGCTTCGTCGGAGATACCGCCGCTCTCATCATCGCGCACATCCGAGGTG -TCAAGGTGAAGCTCGGGAGAGCTACCGGGGGTGCGATCATCCTGATTGATCTCGGTCAAA -TCGTCATCCGACTCAACTAGCTGTCCAATAACATGGGTACGGATGTACGACAAAGGAAAT -TGGCCACTCTCAGGATATTCCATGCCATGCTCCGGATCAAAATCAACGGGGAGAGCATAT -CCACTTACTCCTTTCTTGGCTTTAATGGACTCGTCTACCGAGTCCTCTCGGTGAAGACTC -TGGACATGTTGAACGGCTTGAGCCGTGTTAAAGTTGGTATTCAAATCTCTGAGACTTTCA -TCTCCTTCGAGGAGCATCTTCAGACGCTCGCGAACATCCATTTTGAACAAGCAAGATGAT -AGCTGTTAGCAAAGTCGAAAAAGTGAGATGTAGAGAAAGGAGGGGAAAAAAATAAACAAA -GTCGCGCGCGAAACGCACGGCGAGCTGTTTAGATTGAACTTCTCAAGTGGGAAGGAGCGA -TGGAAATTGGGAAATCACAGAGTTCTATTTGTTCAGGTTGAATACCATTGGAGAGGAAGA -CACGAGATGATTGCTGGTAAATTTATAGCTACCTTTCTGTAGGATTCTATAGGCACGAAG -AGGCAAAAGGAACTCGCTATGTCTTCTCTTTCTGTGCTCAGAAGCTGTCACATACAGTAG -TATACGTACATTCATAAAGGCAGGTTACTATGCGGTGATTGACAATGATCTCAGCATGGA -ATTACTTCAACCTCGGTCTGACTATCTGACAGAGGAATAAGCATTTGATACTTGAGTTCG -ACTTGAAAAAGTCAAATGAAATTCGATTAACTCTTGAATATAAAAGCCAAGGCTGAGTAT -AGATATGATTTTCAGGTGTAGACCCACAGATAACTAGTTATAGTGAGTCCAATAGTTATA -TGACGAGCTAGATAGTTTCAAGACAAAGCCAGATACTCGTCGAGTCTCAGGATATATTGG -TAATTCAAAGTCTTCCATATATTAAATATAGGCAAAGATGTTTTATTTATGTCCTGAAAT -CTAGCTGGGGATATGGGGGGCTTCTAACTTATATTCACGTGACTGAGAGCTCCCCAGACG -GCCACGGAATAACTATCATCCGCTGTGTTGAGGACCTCGTGCAGCAAGGCATCCAGCTAC -GTCTGTTATCGGACGCCGAGACCCCTCGATGTCCGCATCTATCATCGCAGAATCTTAGTG -AACTTGGCAAACCTCAACACAATCGCAAAATCAAGGCCCCGAGCCCTCACAGCCCATCAA -CACCTACCACCATTGTGTTAGACATCAAACATGCCTATGACCGACGAGACAGACGACAGC -GGAGGGGTTATCTTAGATGGCCCCTTCGACCCTGACGCGCAGGCTACTGTCACAGACTTT -ATTGACTACACTGAATACCTTCCCGCAGACTTAATTCGCTCCTTAACCCTTATTCGCGGC -CTCGATGACCGATACCTCGACTCGGCGCAAGCGGTCCACCAGCTCACTCAGATCTACGGC -CAACTCCCAGACCTTCCTTCAGATGACCGACCTGGCGCAATTGCACTGCGCAAAGATATA -TCTTCTCAGGTAGACCGTGCTATCAATGCGCGTGAATCGGCATATGCCGAGGCCTGTCGC -CTCTACGACGTGGTAGATCGCCACTTCGATCGATTAGGCTGCATCCGCCAGAAGCTCGAA -GCATTGCCGAAACCTGCCTCAGCAGAACCGTCACCCCCACCTGAACCTACACCGAAGCGC -GCGCGGGGAGGCAAGAAGACCAACAAGGATGCCACGACGCGTATCACCCTTCGGTTGGAT -AACAATCGCAATCGGAGAGATAAGAATAGAAGGCGCATTCTAGGTGCTGATGGCGCTTTT -GACCCAGACTCTCCCCTTGCCAGTACCGAGCAATCGGACTTGGAGAGCGAACTTATTAAA -TCTGCCCCCAAGCCCGCACGACCCCCAAAGAAGGAGAAAGCGCGTCGCTTGAGCTCGGGT -ATTGGACAGTCAACAGCCCAGGCCCTGGCGCAGTTGAAACCGCCGCCAGCGGACGCGAAA -TTGGGCAGCGAGGACCTGCCATGGTTGCGCTTGACGGAATGGGAGATGACCAGGCTCCGG -AAGAAAATGAAAAAGAATGCTGTGTGGCAGCCGAGTGAAGTAATGATTCACCGTGAGCTG -GCCCTGGCAAATCGCGGTTGGGAAGCATACCGTGCAGCAAAGGCCCTGGCAGAAGAGACG -GGTGGGGATTTCATTGACTGTGATAATATTGAAGAAACCCGGCGTGGTGAGGCTGCCAAA -GACTTGGAAGAGACGAAGTTGAGCAATCGTGGAATGAAGCTTAACGAAGCAAAGAAGTTG -AAACGCGAACAGTTGGCCCGTGAACAAGCATTGATGGAAGGCGAAGGTGGTGTCTTGCCT -AGACCTTTGCCAACCCCAGCCCAAGCACCCCCACCCGCGAATCGATCATCACGGAAGAGG -AAACGGGAAGAGGTTATCCTTGCAGCGACAGAACTTGTTGCTTCCGAGCCTTCAAAGCGT -ACCGCAGTTGCTGCCCCCGTTGCACCGCCTGCCCCTGTCCCTGCGCCCTCTTCCAGGGCT -GGCCCGTCTCGGCGCAGATCCAGCCGAGGAAATGCCGCAGCCGCAGAGGTTAGCAACGAC -CTCATTCTCCCTATCAAAATAAATATGTCCCCCGCAGTCGAAGCCAAACCCTCCCCTCCT -CCCATCAGCCCTACAAGCTCCGGACAATCCAACCTACCCCTTGTCCAACCTGTCACTACT -CCCACACCGCCCGTCACTCGGCCTTCATCACGGCGCTCTGCAGCCGCTGCCTCAATTGAA -GGAGGGTCCCTCATTACCATCCCGACAGCAATAGCAGCAAGTGGACGCGATCGCCGCATT -AAAAGCGCAACACCTGCACACAAAACACCCATCCGTGAGTCCTCGAATGCGCCAAGTGTC -CCCGCCCCGGCCCGTCGGCGCAAGCGTCCAGCACCAGGACCCATTTCCTCTGGCCAAGAT -GGCGGCGCAGCAGTAAGCTACGGCCGTCGCAAAGCGAAGCCAGGAAAGAAGCGTCTTAAC -ATCCGCGACTCCCAGGACGTCCGTGTAGACGAAGACGGCGTCCTAGAACAGATCGACGCC -AATGAGCCTCGATACTGTCTCTGTGGAGATGTTAGTTTTGGAACGATGATCTGCTGCGAG -AATCAGGATGTAAGTTATCCCTACTATGGCACCTTCCAGTAGTATACTAATATCCTAATT -CGCAGTGTGACCGTGAATGGTTCCATCTCAACTGCGTGGGTCTGACCGAAGTCCCCTCCC -GCACGGCCAAGTGGTACTGCCCGCAATGCCGTGTCAAGCTGCATAAGGGTGAAGATGGGA -TCATCAAGGGGAGCTCGCGACGTTAATTGGATTTGAATGATGTTTCTTGCATGATACCAC -GAAATGCGTTTGCTGCTATTTTGTTTTCCTATCCGTGTACGGAGTACTGCCGTATGGCCT -TCTGGGATACCAGACTTTTCGTTCTTTCTATCAAATAATCCTATGTTCATCAATCTTGGA -AGCATTGTGATATCTTGAACCCCATATGGCCTTGGGGCCAATTATGGCTTTCAGCCTGGG -GCATTAATGTCACTATTGTGCAATCGCCACCACTCAATAGCTCGTCTCATTTGTTTCCGT -CCAGAGATCGTCAAATCATATCATGGCTGAAATTAAGCTCTTTCCTCTCCGTTCTGCGCC -GGGCGAGCATATCTCTTTTGACAAGCCTACTTGGTTGAGTTTAGACTCGCAGGGCAGGAT -TACGCGATGAAAACGTTCTGGATGGCGGCGACGAGGAAAGGAAGAGTGAGAGAGGCTCAT -ATCTGGGATTAACAACGCCTGTGGCCTCGCTATGTATTACCCCTGATAATAATAAGGATC -GATATCTTTTCAAGTGGAAGCTTTAACCCTAAAGTTCCCTGCCTGGCGACTATGTAGCTC -TATTTCCTTTCCAATGTGACCCTTCACTACGCTCCTCTGAAATCTCTTCCATAGCAGCTA -GCTCAGCATACCCCTTATCCAACCGTGATACTTTGGAATATCGACTGGTATCCGGCAACT -CTTGATATTTATCAGCCCCAGGCTGCGGATAATGAAAGTCCGCTGGCAATTCCGCAGCAA -AAGTTTTCCTTTCCTGTCCATCTTGCATCGGAGACGCATATCCAGGAGACATGGATCCCA -ACCGCACCGACTCCGTCGGCGATTTCGGCGCCCAAGAAGACGACGACATCGACATTTGAT -CCGATTTCCGCGACGCAACTGATATTCCCCTCATAAGTCTGCTCGACCGATCGGCCGGAT -TATACGTTGTCAATTTCCCATCTTCCATAATACCCTGCAAAACACCCGCATCAAGAAGCT -CGGCTTTCTTTATCAGCCGCTCTACCAGCACCTTCGAAGCAGCCTTCAATTGAGACTTGA -CGAAAGAAATTAGCGTGCGGTTGCATTCGATCTCGATATCCTCGCGCAGATAGAGTCCGG -ATCTTGGAGCGTCTGTTTTGCGCCCGTCGCGCGGTTCTGATGGCTCGTCAGGCTCGTTGC -CTGCAATGCGCCATTTACTTTTGAGGTCAATTCCCATCGGCGCGTACATGTGCGTTTCTA -GTCCCCATGCGACATTGTGGAAACAGCCTTTGAAGCTGATCTTGCCTGAGCCCATTTTGC -CTAGACCTGGGATGTATTGTACTCGTTCGGTGATTTCGTACCATGTTGAGTAGTATTCGT -CGGCGACGGCATCTCGTGGTGCCTTGACTGGTTGATGTGAGATGACGAGTGGATTTAGTG -TGATTATCTCGCTATGGCTGTGTAGGATGTCGAGGGCTAGTTGGCGGGGCACGGATGAGG -GGATGGGTGTGATATTGGTATAGACCTCTTTCCGGCGTATCATTTTGGATGATTTAGTTT -GATAGATTTTAATAACTGTGGAGGGGAGGAGTTAAGTTCAAAAGTGCAAGGCTGAATCGA -AGTTGGGCTCGTGTTTCATCAGCTTACTACTTTAAAGTTGCGGCGTTGCTGGCTATCAGG -GCTAGAACTTGCACATGACAGTATGTTTATGGGGGCCAAGTAAATTTTATACGAGGGACA -TTTGACAAAAAAAAAATACACTTCTGATTGGTATAACATGAATTCATTTACATCGTCTTT -ACTTCAGCTCCCACTTGTCGCCCTCGGAGACAATCTCCACGGGCCGTCGCTCATCCAGTG -ACTTGCGTCCTGCATCGAGAATTGCCGTTGTGGCGATTGTGTTGCGTAGAGTGGGCAGTG -GGCGGGCGTCCAGTTGGTCGAGGGTAACACGGCCCTCGTTCACAGCTGTGACAGCATCGA -TGAACTTCTCGAAGCTGATGTAACCGTATCCAGTCTGTCCGCTGAAGTTTCCTTCTTCAT -CAGGAGCGTAGCGCATATAGAATCTAGTAAATCGGAGTCAGTATGGAACCATGATGGAGC -ATTAATCTGAGAATGGGGCTCTTACGGGTTGAAATGAACCAAACCCTGCTCATCACCGGT -CACTTCATAGCCACGCTTGGCCTGGTTCACGCGAATCTCGCCGTTGGCGGCCATATCTTT -TCCAATGTGTTAGAAGAGGTCTGTTCATGACTGCCAGAAAATAGAATAACATACAGTGGA -AGTACTGGTTGGAGTGAACACCCGCATTTTGAGGTGCCGTCCAGCTAGCTGTATAGACAC -CAGTAGCAATCTTCGCCGGGTCATCACGGCGGCGCCACTCCACGAGGAGGGTGATGGTGT -CCTCAGTCTCGGGGACACATCCAAGCGCAGCAGCGGTACCCTGAGAAGCAGTTGCTGTGA -CCCGCACAGGCTTGTAATCTGGGACCATGCTCTCGCAGATGTCAATGTGGTGAGAGTTAA -GGTAGTAAGAAATATCGGAATCACGACCAGCCCACGCCTTGAAGGTCTCAAGCTGGCTCT -TAGGCTGGCTCATGTAACTATAAAAGTAATTGAAGTCACCGAGGCTCTTTGCCTTGGCCC -GCGCATCAGAGTATGCAGGATCAAAGCGCTTGTGGTGCTCGATGAAGACGAAGACGCCGT -GCTTGCGTGCGGCCGCGACCAGAGCTAGGTGGTCTCCTAGCCGCTGGGTTGCTGGCTTAG -TGATGAGCACGTGGATGCCGCGCTCAATTGCGTACAGTGCAATGGGGTAGTGGGTGGAGT -CGGGGGTGAAGATAGTGATGGCGGAACCTTTGGGGAGGGCATCAATGGCGGTCTTGTATG -CGTCCGGGTCGGTCTGGTCGTCCGCGGGGAACGAGGTAAATGAGGTATCGAGGTTATTGT -AGACTTGAGAAATGTTGCGGTGCAAGTGGTCGCCTAATATCGTAATTAGCTTCTTGTTCG -CTAGGTTGATCTTTCACTCAACTTACGAATACCGGGGAACTTCTTGCCCGACACTCCCAC -CATGCTCAGGTCGCCCACCTTGCCTCGTCGGCGCAGGTCAAATAGACTGAGACCTACCAC -GCCAACCTTCTTGTCTGAGCCAGATGCACCACCGCCGACGAAACCGGTGGTGTATTCGCC -AGTGCCGACCTGTGAGCTAATTAGCTTGAGATCTACTGCGGGGAGTCAGTATGCATACCA -TCAAAACGCTAGGAGGAGCCATGATGTCTGTTAATTCCGAGATTTTGATCGAATTCCGGG -GGTTTTCTGGGGATTCCTGGGGAGAAAAGGAAATTTAAGATATATCACAGAAAAAATATC -AAGCCGGGGTGCTGAATTGCTGGATATAATTCTTCCGGCGTTTGAAATGCCAACTCAATA -GCCGTATCTTATCGGGTGAGGTTTTTGATGTCATTTTTCTTTTTCCACCGCAATATTCCT -TAGAGAGATACTtatacactatacactatacactatacattatatactatagcactTGCT -TCCCATCTCAATTGTCAATTTCTCAAATGCCACAACTCATGGCCTCTACTCCATCACCGG -CTGAACTGCGCGTTTACCCGTACCCCTCCCCCACATTCTGCTGCACTTCAGTCTAATTGG -TGGAAGATAATTCTCCGCCATACAAAGTGCCTCGGACCATTCGCCGACGTGGGGGAGAAG -TTCATAAGTATGCTAAATGCCAATATGACCGGATGTTTTGATTATATACCCATCAAAACT -CCAATTAGGTCATTCTATTGACAAGAATCAGTGAACATCAGCTCACAACACAGAATCATG -TCTCTCACTCCACTCCGCCCCGGCGTTTATGCGCCGACAATGACCTTCTTCGATGCCGAG -ACCGAAGAGCTCGACATCCCTTCCATTCGCCGCCACGCTGTGCGTCTGGCAAAAGCCGGC -CTGGTCGGCCTGGTTACCATGGGCTCCAACGGCGAGGCTGTGCATCTCAGCCGCGAAGAG -CGCATGACCGTCACTCGGGAAACCCGGTCTGCTCTTGATGAAGCTGGTTTCTCCAATGTT -CCTGTCATCGCCGGCGCCAGCGAGAATAGCATCCGCGGAACCATCGACTTATGCAAAGAG -GTCGCGGCTTCTGGCGGCGAATACGTTCTGATCGTGCCTCCCAGCTACTACCGCGCCGCA -GTCGGCAACGATGAAACCTTATACGAATACTTCACAGCCGTTGCCGACGGCTCCCCCGTT -CCTATTATTCTTTACAACTACCCCGGTGCTGTCGCCGGCATCGATATGGACTCCGACCTT -ATCATCCGTATTTCTGAACACCCTAACATTGTCGGCACTAAGTTCACTTGTGCCAATACC -GGCAAGCTGACTCGTGTCGCTACCGCTCTCGGTGCCATCACCCCCTCCTCACCCTTGGCT -CCGGCTGAGCGTACCGCAACAGTCTCCAAGCCCAACGCCAAGCACCCATACGTTGCCTTT -GGCGGTATTGCCGACTTCAGTCTGCAGACCCTCGTCTCTGGCGGTTCTGCCATTCTTGCC -GGTGGAGCAAACGTTCTTCCCCGTCTGTGCGTTCGCATCTTCACCCTCTGGTCCGAGGGC -CGTCTCACAGAGGCTATCGAGGCCCAGCAGCAACTCAGTGCTGCTGACTGGGTTCTCACA -AAGGCTTCCATTCCTGGCACCAAGGGTGCTATCCAGTCATACTACGGTTACGGCGGGTTC -CCTCGCCGACCTCTCAGCCGTCTTACTGATGCACAGTACCAGACTGTTGCTGATAAGATC -AAGTCTGCCATGGATGTGGAATTGAGCTTGCCCGATGTGGTATAATGGCTTGGTATATGG -CAAGGAGATACATCTAAAGTCTAGTAATATAATTCCAATGCATATATTCCAAGGTTTTCA -TGATCTCTCTTTTTGACTTAAAGTCTACACCGTTTGCTGTAGTATCCCATGCGCTAGTAC -CTAGTACCCTCTTAGACCAATGCCACGAGACTACTTGAATGGTTCTATCTACTTATGTAG -GTTGTACTTTACGGGTATGTGTGATGACCTGAGGCAAGCCGTAGCCGTAGCCTGAGGTAT -GATACGTAATAGCTACTTCATCCAAGTTCCCCAGCTCAAATCCCCTTTCCCAAACTGGCA -TAGTTGAAAAAAAATACTTTATGCCTAATTTCACGTCGGCGTATATCCACTGAGAACCGA -GACGCTCACACCTATTTTGTATTGGAGGAAGCGGACGAGATTTTGGCTAGGGCCTTGTAG -GTCAAACCGAGTTGAAGCCTTAAGTAGTATGATCACCTCTCAAAAAGTTATTCTTCTCAC -CTAAATATGATGACCACTATACACTCAAAGCCTTCTTAATATTCGTAGGCAGATACTCCA -GCCCATGTGCGATACTGCCGTATGTATTGAGAGGTATATCCCGTCAGATGGAAATGTTCG -CCAATGGAATTGCGCCAGAGGATATCCCCATGCTCGATGAAGTCTTCCCAGTACGTTCAG -TTTACTCCTAACTCCGTCCCTTATGCCATTTATTACTGAGATTTAGCGAATCCTCCAGAC -CATCATCATAGCCTTCGACCAAAAATGCTGAGCCCGTGTGCCATACAGTTACTTCGATGG -TAGATTTCACGTTCAACTTACCCCGGTCTTGAATTTTGAAGAGTTTATCGAAGTCAGATC -GGCCACCTGGGATGGTTACATGGGTCAAATTGACAAAGACAAATAAAACGATATGATGCA -CTTGCTGCTTCGGGCGTGGCCTCTTGATCATCATTTGACCACTATCGCTCAACTTCAAGA -CCCCTCCTACAAACTAGTATGTAGGCTTTACTAGCAAGTAATTCCATAGTAGCTCATATA -ATTGCAGATAGAGTTGCTTGCCCTTGACAATAAATTCAATCTGTAGTAGTAAAGATACTG -TGGGTTCGCCTTTTCAGTGCGAGTACATCTGACTTGCACCCTCAACGGGGATAGGTAGCT -TTGGACTTCACTTTCGGTTGATTTCTAATCTCAAATTCAAAAGCGGAAACAATGACTCAC -CAAGAAACACAGCAAGATCAACATTCCAAGTGGCCAGTCGTTGTTCGTGCCTCAATGACG -TGGAGCCTCGTGGATTGGGGGCCAGGGTGGGGGCCAGGGCCAGTGTGATCTGGGTGGTTT -ATCTGCTTTTCATCTATCTCATTGGATTTCTCGAAAAGAACTTCCAAGCAAACAATAGAA -TATGGCTCTGTCGAATAAAAAACTTGTTCTTTTGACTGGTGCCACGGGATTCATCGGGTT -TCAAACACTGTTGGATGCCCTCAAAGCCGGATTTCGAGTACGATGTGCAGTCCGCAGTGT -AGAGAGGGGAAATGTTCTCTTATCCAAACCAGCATTGCAATCACTACAGCAAGACAACCA -AGAAGCACCAGTGGAGATTGTCGTCGTGCCAGACCTGGCCGCTCCAGGTGCCTTCAACGA -AGCTATTCTCGGCGCAAGCTACGTGATCCATGCAGCATCCCCGCTCCGATCTACTCAACC -GGATGGCGACCTAGAAGCGGAGTTCGTGGGCCCTGCCAAACAGGCTACGCTCGAGATCCT -CGAAGCTGCTGCCAACACCGCCTCTGTTGAGCGCGTGGTTATAACCAGCTCGGTTGCGGC -GTTGATATCACCTACGGCCTTTACGGGAATGAACCCTCACGACCTGGACCACCCCTTTAC -TTCTAGCAGTCGTGTGCCGTTAATGAAACCGCCTTTTGCTCATCCGAGCGTCGCCTACAT -GGCCTCTAAGATAGCATCTCTGCAAATTACCGAGACCTGGGCGAAAGAGACACATCCCAG -GTTCGGCATCATCAACATCCTTCCATCATGGGTGCTGGGCCCGGCTGCATGGGCCGGTGA -GGACTCTCAGGTGTTATTGGACACAAGCAATGGACTGGTACTCAGGACGGTTGTACAAGG -CATCAAGTCCCCCTATCCCCACATGGCGGGTGCTGTTGTAGATGTTCGTGACGTCGCCCG -GCTCCATGTCCAGTCATTGCAGCGGACAGATATCGTTGAAGCAGGGGGTTGTCGCAGTTT -CATTGCCAGTACTCCTGGGAAGTGGGAGGATATTCCAGGGATAATCCGGAAGAATTTTGT -CGGTGAGCTTCAAAACAATTCGAAACGTCTTACTGTTCTGGGTGAACAGACGTCGATTCC -ACTTGCCATCGATTCGTCTGATACCGAGCTCGCCTTTGAATGGAAGTTTACGGACTTGGA -GCGAATGATCACCGATCTCATCTCGCAGTATATTGAGCTGAAGTGATGGTGTCTAGGTTC -TTTCATTTGTGACTGTTATTCCCTGACGTATACCCATGTAAATACCCATGTAAACCCCAA -GCCCTAAGCCAAAGCCCTAAATCATCCCACTTCTTATCGTTGATAAGGCCGGGTATATTA -AATTATCCTCGCAAGCTCATAGGTGGAAAAACTACTGAATCGGGGTTGATGCGACAACCC -GTCCACTAGATGAAGGGGTCTGTTATCAGCTACGTAGCATGGATGTTGTTAAGAGTCCTA -CCCCACCAAATAAAGCAAACCACCCCACGCTACCCAGCCTCTTTCCAGCGTATTCATGTC -TCTCGGACGATATCGAATCCCGTATCTGCGCTCTAATCGCCCTCGGTTGTCAGTTTGCCC -GGTGGAGCGGGTCCACGGGACAATGTCCGACGGGGGGAAGCGGCTGTGAACAGTAGCAAT -ACGTAGCGCCATGGCTTTTGAGATTATATCCCGCGAAGGAAGAATCAGGGAATGACTCGC -GATCCCCGCTTGTAAGGGTCTAAATTTTAGTAGCGTTGATATATTCCCCTCCCCCGCTTG -TCGCCTGGTTTCTACTTTTCTTTTCTTACTCTTTTTGCTTCTCTCGTGTATACTGCTTTG -CTGCCCGCTTTTCCAGAGAATAGTCATTTACAAGGACTTCTTCAGATCAATCACCCATTC -TTATCACAATGCCTTACCCCGAAGAAGCTGAGGGCTTCCAGGTTGATGGACCTGACAGCT -TCACCCAGTTCCACAAGCGTCACGTATGTGCTCTTTTGATGTGTTCGCTAGGATACGCGC -TTACCTAGGTTGATTGAAATAGTTCAAGCTGAAGCCTTTCGGCGACTATGGTGAGTTGAC -CTGTGCTACTATGTTTCTTCCCGATGCTAACCGAGTATAGATGTCGACGTTAAGATTGAG -GCCTGTGGCATCTGCGGCAGTGACGTCCACACTATCAGCGGTATGGTGATTATGTTGAGA -AGTCAAGCAGAAACACTGGCTAATGCATTCCTACAGGAGGATGGGGTGATCAGAAGTTCC -CTCTGTGCGTGGGTCACGGTGCGTATTTAGAAAGTTACAAAATGCTACTGTCTCTATGCT -AATGACCATTCTAGAGATCGTCGGTCGGGCAATCCGTGTTGGACCCAAGGTCACTTTGAT -TAAGGAGGGCGAGCGCGTTGGAGTCGGCGCGCAGTCCTACTCCTGCGGCAAGTGCAAGCA -GTGCCGCAACGACAACGAGACATACTGCCAGTTTGAGGCGATGGACACCTACGGCTCGAA -GTGGCCCGACAGTGGCATCGTTAGCCAGGGTGGATACTCATCCCACGTGCGGACACACGA -ACACTGGGTGTTCCCTATCCCTGACGCACTGGAGACCAACTCGGTTGCCCCGATGCTGTG -CGCCGGTCTGACTGCTTACTCGCCCTTGGTGCGCAATGGTGCTGGTCCTGGCAAGAAGGT -CGGAATCGTTGGACTTGGTGGTATTGGTCACTTTGGAATCATGTTTGCCAAGGCGCTGGG -TGCGGAAACGTGGGCTATCTCGCGTTCTCGTGCTAAGGAGGCGGATGCTCGCAAGCTTGG -TGCTGACGGTTACATTGCTACTGCTGAAGAGGGGTGGGAGAAGGAACATCTATGCTCTTT -TGATTTGATTATCAACTGCGCTAACTCCTCGGAGGGTTTCGATCTTGCGCGCTATCTATC -CATGATGGATGTTCATGGACGCTGGATTAGTGTTGGTCTTCCTGAGGAGGAGGGCCAGGT -CATTAAGGCTCAGAACCTGATCTCTAATGGTGTTCTCATCGGTGCTAGCCACCTGGGTAG -CCGTCGGGAGATGCTTGATATGCTGCAGCTGGCTGCCGATCGGGGTCTTAAGGGTTGGGT -CGAGGAAATCCCCATCTCAGAGGAGGGACTCAAGGACGCCATGTTGCGCATGAAGAAGGG -CGATGTTCACTACCGGTTCACTCTGACCGGGTATGAGAAGATCTTTGGGTGAAGAGACAT -GTTCGGTGTTGAGAATTAGTATATAGCCGGGGTCAACATATAAAACTTAGGTGGTAGAGG -GCTGGGAGGTTAGGGGTGTTTAATTATGAGGAAGTTAGAAATTAAAAACATTTCCAATAA -TAGTACTGATCTACCCATGTGCCACATCTCCGGATCGAATCCGGCTGAAAGAAGTATAAG -CGGTCGAAAACCAATTCAGATCAGACTGCATTTAGGTATGTTTCCAATATTAGATCTGAA -AAAAAAAGAAAGCCTATCGTTTTATTCTTGATAAAAAAATAAAACAGTAATCGCCACCGG -ATCCAGTTGGTAAACGGTACTTCGATCCGCAGCCAATTTATTATGCCGATCCAAGTCACA -GTTACTCCGCACTTCCGGTTTCCCCCCAGGATTTGCCCCCCGATCCTTAAAATTCTGAAC -CCATGCAGGGGTGAGTACTGTAGAAGTACTCACTACCTGTTTTGATGGTACATTAGTACT -TCAATGCTAGTACCAGCGACTACTTGGCAACCTCATGGTCGAACCAATCACTCCTTCCAA -ATTCACCGCCCGATAACTCAGATCCATTCTGCTTTCTGGAACTCTTCCGCCCTTTTATCT -TCTCCTTGCGCCTTCGttgttttttttttttcttttgttattttctctctatacttcatt -cattaattcttctctctttctatcttttttttCCCCCTCGGGTTGGAGTTCGCGAACAGT -CGTTGCTATATACAATCTTGTTAGACTTATCTTGCAGAACCCCCCAAGATCAAACGTCGG -TCGATTGATAAGTAGATCGGACACAAAAAAAAGCCTTGTCACTCTTTACTGCACCAACTG -CCAGTACATATCGGCGATCATATCCATCCAATTGTTTTTTTAATTTGCCCTCAACACACC -AACAACCTCGGTTATGTCGCCACGAGAGACTAACGCCACCTCCACCAGGCGACGATCCTC -GTTGTCGTTTCGTCCGCGCCCACGCACTGCTAGCCATGCAGCCCCAATGTCGAAAGAACT -TGTCTTGGACACATGGGAGGAGCCATATTTGATGGAAGTTTGTCAGGATGACCATGTACC -GGTCGTGGAGGAGATCGACCCATTCCCGAATGCGGCGCCGCCGCGGCGCCGTGCTGCTAA -AAGCTTTTCGAGTTTGAAGCATCCTGTGGATGGCTTGGTTGCCCTCGGCCGCCGCTTGTC -CGTCAGCCTTCGCAACAAGCCTTCCAAGCAAACACTCCCTGTCACTGAGGACATCAAATT -GCTTGATGAAGATGAAGACTGTCACTGTAACCCTACTGTGAGCCACGCAAACCACAAGCG -CATGGCTTCCGGTAGCTGGGATGCCCGGTCAACAAAAACACATTGGCACCAAGGATCCAC -CCACCAGGCCTATAGCGTCAATCGGCGCCCTTCTTTGAATAGTGTGTCTGCCCTGCAGGG -TTTCTATGCCCCCACCGGCCGTGGTCGACATGTTCCTCCAGTTCTGCCAAATCACATGTC -CGGAAGTGCGGCGCGCGCCGCAGCTGCAGCTCAGAATGAGCAAAGGGAGGCTGCGCGGCT -CGCTAAGGCGGAACTGGAGAACAGGATGCTCGATATGAAGGTTCCGCGGGACTCGGAGAG -CGGTATATGTATCGACCTCCGCGATCGATCGGATGTGTCAGATGCCGATTTGCTGGGACT -CATGCGACTCGGTAAGGATGAACTCCCTGGTGGATAGCGCGTTTGTTTTGCTGACTTTTT -TTTTTCCTCTTCAGATCCCGTGGCTCTGCTTCCGGCCGAGATTACATCACACATCTTTTC -ATATTTGGACCCTGATTCACTCATGGACGCGGAGCTCGTGTCTCGCACTTGGTGTAATGC -GTCCTCTTCTCATGTTTGGAGGCACGTATTCCGCAATTCATATGGCCGACGCCTAGCAAG -TGAAACGGCCTCCAAGCTGAAGCTGTCATCCGGCTTGGGGAAGTCAATCCCGAATCAGGA -CTGGAAGAAGAAGTTCCTAGTTCGGCGTGCTCTCGATCAGCGCTGGGCTGATGGCAAGGC -TGCAGCCATATATCTTCAAGGTCACCAGGACAGTGTCTATTGCTCTCAATTTGACGAGTA -AGTCCAGACCATGATGTTATCATCGTGCACTGCTAACGATTTTCTAGGGACAAAATCATT -ACTGGTTCCCGTGATAGAACAGTCCGTGTGTGGGACGCACACTATCCATGGTCGTGCAAG -AAGATTATTGGGCCACCCGCGGCCCGGACTTTCCGCCCTGGTCCAGTGAACAGTCCAACT -TCTCAGGCCACGGGTTGCGCCCCCTTCGTGACTATTACTCCGCCATGGCCCACATTGGAT -GAGACAACTGAAATTACCGCACCATTGGAGGAAGAATCGATATACCACAGCGCATCAATC -CTGTGCCTACAGTTTGACGAGGAGATCATGGTCACAGGCTCTTCAGACTTCACCTGTATC -GTGTATGACATTAAGGACGACTATCGACCGATTCGTCGTCTTTCTGGTCACCAGGCCGGT -GTGCTTGATGTGTGTTTTGATGACCGGTATATCGTCTCATGCTCTAAGGATACCACCATT -TGTGTTTGGGACAGAAACACCGGTGAACTTTTGAGGCAGCTCAATGGTCACCAAGGGCCA -GTGAATGCTGTGCAGCTACGCGGCGACCTGATTGTGTCAGCCAGCGGTGACGGAGTTGCC -AAGATGTGGAACGTGATCTCAGGCCAATGTGTCAAGGAGTTCACCAGCAAGGACCGTGGC -CTTGCATGTGTCGAATTCAGTGACGATGGCCGGACCATTCTCACTGGTGGCAACGACCGC -ATAATTTACCAGTTTGATGCCAACACGGGCGAGCTCGTCAACGAGCTCCACGGACATTCG -GGCCTCATCCGATCTCTCCATCTCGATAGCGCAAATAAGCGCATCATTAGCGGTAGTTAC -GACATGAGCGTCAAGGTGTTCGACCGTGATACGGGTGATATGTCCATCAACTTCCCTGGC -TGGACCACAAGCTGGATGCTAAGCGTGCAGTCCGATTATAGACGCATTGTTGCGACGAGT -CAGGATTCCCGGGCTGTCATTATAGACTTTGGATATGGTCTAGACGGGATCGACCTCTTG -GAGGAGTGATCCATCTGCATTTGTTTATAAATAGCTTGCTCTTCTCAGCGCTGATTATTA -CCCGCTGTGGGCTTGTTTGGTCATTACTATTACATACATGTATAACATGTGTATTTTGCT -AGGATTATGGGAGTTTGATATGCGGCTCCTCGTCTTAGGTACAATTTGGACCAGGTCCTG -GACTATTGGCAGAATAGACTTGCTTTTGTTTTTCTTGCACATATAGCGTCAGTACAATTA -TATAGCTAGCATATATTACCTTGTGTCTTGTATCGCGATAATCAATTGAATTTCAAATGC -TCCGTGAGTTTGTCGCATGAGGTGACGAGAATTACAAGATGATCAATAGATCGGCCATTT -CTGCTAGTTAAATATGGCGGCGAGGAAGGCGGTAGAAATGGCGGTCAATCACCCAAGTTG -TAATGCATGAGTCAACAACTTGATATTTTAAGTGACAACATGACGGACGATTTGATCTAT -GAGATTTGACTTGTTGCAGTGGAATTGCGTCTCACTTGACGATGTAATAGTCTCATGCAC -TGACATACTAACTCAGACTACAATGTGATACCTCAGGTAAGACCTGACACCTCAGGCATC -AAGTCAATGATTAACTAATCGATTTGTGATTCGCCAGAAAAGGTGGGCTAGCGCCAGTGT -GATCCGTGGGATTTAGCATCAAAACAATCCCTCTGCTGTTTCCCATCGACCACACAAGCA -GAGACTACAAGAAGTCCTGGACTTGCGGTGTAATATCTTCATCGAGCTTCTTTTCCGCTT -AGCGATCATCTCAGTCTCATTTTCCGCTTTAGCGTTTCGATTGATATCTACTTCTACCCC -ACCATCATGGGTGAATCCAGGTTCGGCTTTTCTCCTATTCCCCGCCATCTCGCGACCAAA -TATGGCTTTACTGACGCAGATCAATTTATGTTTTCAGACAGGAACTGCTCGCATGGCTTA -ACAACCTGCTGCAGCTGAATCTTACCAAGATTGAGCAGTGTGGAACCGGGTATGTTTGAT -CGATATTCTCTCCAAGTTCTTAGGGCCCGCGGGGCGATCTTTTTTTCTGCATAGACCAGA -TGGGCTAACCTGATATCTTGTCTATAGTGCCGCCCTCTGCCAGATCTTCGATTCGATTTT -CAGTATGTTATCACAACTTGCGCAATAATTGAGAAAGATGGATCGGTAATCGTTAGAACA -TATACTAACTAATTGATCTCTTTGTTCAGTGGATGTTCCCATGTCCCGGGTCAAGTTCAA -TGTTAACACTGAATATGCCTACCTTCAGAACTTCAAGGTTCTTCAGAGTATGTGAGGGTG -GATTCCAGATCGGCCAACGATCCCCTTTTTAACATGGTCAATCACCCAGATGTCTTCGCC -CGTCACCAGGTCAACAAACCCGTCCCCGTCCAATCCCTTACAAAGTGCCGCATGCAGGAC -AACCTTGAGTTCCTCCAATGGGTTAAGAAATACTGGGATCAGCACTACCCCGGTGGTGAA -TACGACTCCGTCGGCCGACGCAAGGCCTCCGGCGCCCCTGCTTCTGTCGGTGCCGCCCCC -GGTTCACGTGCCCCATCTGCTGGCAGCGCACGTCGTGGAGTTACCCCGACAACTGGCGGC -GTCCGTCCCCGAGTTGCCGTCGGTGCGGCCAGTGGAGCAGCCACCGCTGCTCTGCAACAG -GAGATCTCGACACAGAAGGAAGCCATCGCAGGACTGGAGAAGGAGCGCGACTTCTACTTC -GCCAAGCTGCGCGATATTGAGCTGCTGCTCCAGAGCGCCATCGAAGCCGATCCCGAGCTC -GAAAAGGAGGAAGATACGCTTGTTAAGCACATCCAGGGCATTCTATACTCGACCGAGGTC -CGTTCCCTTGATTTATTCCCTGAGGATTCCCCCAACTGACGGTTACGCGTTCCCCACTAG -GACGGCTTCGAGATCCCTGCTGAAGGAGAGGAGGTTGCGGCTGACGAGCTCGAGACTTTT -TAAGTGAAATGAGTTCTAGGAGAGGACAGACTCGTTACTAATACCCCCGCGTCTTCTATT -CTCTTTGCATTGTCAAATAAGACGAGCCGAGCACATAGAAAGTCACAAATGCATCCTATT -TTTCCAGAGTGGTCTGACACTTAGGCTTAAAATAGGTTTATGGAGAATGGGATCGGCACA -TGATTCTCCAGTACAACGTATGGTGGGATTTAGTGGCACAATGACAGATTTTTTAATTGT -CATATTATTGTTCTGGGGGAACTAGTCCAAGAGGAACAGATATACCTCTACAATAGTGTA -TCTCTATTGACTTCCATGCCCATGCATGTAACCCTTGCCTCTGAAAATAGCCCAAACGCT -AAAAATGATAATTAAACCTGCATGGTAGACTAGTCCTAAACGGGACTAGTCACCTTGGCC -AGAACCTCATCTGGGCCCTGTCACTTTCTATTCGTTGTATCTGGCGGTACCTGCGTTGAG -AACTGGCTTTTGTTTGGGGGTGACTTTTTTTCTTATTCTTCTTTTCTTTATGAATTAGCA -TTCAATTTCAACCCTTCGTTTTCCTAATTTCAAAGCAGAAGAATCTGTCGGTCCCCGAGC -CTAGTTTGTCAACACGGGTCGCCTAAGGCTAGGCCTCGATAGGGGATTGTTGGGTCCAGT -CTGGGTCTTTTTCATCATTTTCCCGTTTCGCTTCTCTTTCGTGTGTCTCGCTTTTTGCTT -CCGTTTTGCTTTCTTTTTGATCGCTTTTTGGGTCCTGCTTTGCATTCTGTCTTCAATCCC -CTCTTGACACTGAAAATTGTGCCGAGAGTGTGACCATACTACCCTGAGAGCCGTCAATCC -CGACCTGCTTACCCTCCCGACATATTAAACACTCTATATTCCTCTTTTTGGTTCCAGATA -TCCAAAAAAAAAAACACAATGGATTAGGAAATGAGACTGGCCTCTCTTTTTTTCTCAATT -CCAATTTTCTTGTCACTCTTTTACTTCGGGTATGTGTTATCAATTCGAGGCTGTTTGCTG -TACGCTTGGGGATTATCCCAAATAAATCCATACTAATTCCGTCTAGTCGTTTGTACGATT -GCCGTTCCCTCCACATAACTGTGCGCCACATATCACAATATTCTTGTGAAGCAGCTATTC -ATACTACGCTTTTGTTGATCACAACGTCTGGTTTCGCTGCCAATATCCATTCATTCATAC -TGTCGACCGGGTCATTCCTTTCTCAGTCCGATAATCTATCAATCTTTCCACCCCAACTGC -GCACACGCTAGAACGTGTCTTCCTTTCGTTTACATTACATCTACATCTACCAACTTCAAC -CTTACCAGTCCAATTCGAAATACTACAAACTCAACCTCTGCGAGGATCAACGATACATTC -TTTGTTTGCTATTTTGTCTTGATATTCCATCGCATGTGATTCCCAATTGATCTACACGTG -TATGGAAAAATTGTCAAAATGGTTCATGGTCACCACCGTCACTCGCGCATTGGCAAGCTA -ATTAGCGCCAGGCGCCGCGGGGGTGCTGTTGATAAACAAGATGCGGGCTCTGATTTACTT -GAGGATGATCTATTTCAAAATCCCATCCTTCCTGCTACTGTCATATTTCCGGATGAAAAT -TGCGACTCGAAAAACTCTCTGTGTCTTGAACATGGAATGAATGAATCTGGTCCATTGCTT -GAAAAACGGGACGAAACTCCTTCCGCCGACCCACCTGTTATTGAGACGGTATTGCAGATA -GTCGATGCTAGCTCACACACACTCTCTCAGTCGACCGGTACTGATATTCCGATGACGATA -TCGGACTCTGCAGACGCCAGTAGCACTCTCCCGGACTCGGACAGCTTgactgcgagtgcg -agtgcgactctaattgcaactgcaactgcaactgcgagtgcCTCTATAGACTTAGGGCTG -TCCAACTCGCCGACGCCCGCCCCTGCTACCCAGCCAACAACGTCGCCATTACCCCAAACA -TCATTTTCAAGCAACTCCACGATCTCAACTCAGGCCTCTTTGACCACTTCCTCAAACTCT -AcaactcaactcacatcaacattaaaatctgcctcagcaacagtcattaacTCGTTTGCA -TTGACCTCCACTCCCCTTGTCGGTCCTTCTAGCTCTATTCCCTCTGTTACGACAACATCC -ACCCAGGTGCACAATCACTGGTCATAttccggttctagctccgactccagttccgacccc -agcttcagcttcacctctagtcccacttcaacttccacttctagttctagttcCAATGAC -TGGACGAGCCCGACGTCCAGCACAGACTACACAACCTCTTCGTCAGCTGTGTATGGAGGT -TCTGAGTCAACCGAGTCAGCGACTGGCACTGGATCCGCGCCAACTACAAGTGAAACGAAC -GCCTCGGGGGGTCCAACAATCGATCCCGAGACATCTAAGATTGTGGGCGGTGTAGTGGGA -TCAGTTGCCGGCCTTGCTCTCATTCTCTTCCTTATATTTTACTACCTCCGCCGCCGAGGG -CTCTTTATGGCAAAAATGGGACGTCCCAATATGTTAGGTGATGCAGCAGCAGGGGCTGGA -GCAGGGGCTGGGTCTCGAGAAGTCGTCGAGCGGCGCGAAAGCAACGATCCCCTCTTCACA -GCCTCATACCTCGCCCCCGCATTCATGAAGCGATGGCGCCAGTCAACGGCAACAACGCGC -TCTGGCAGCACTATCGACTCTGCCCCCAGCGAGCGCGGGTTCCAAAAGATCTCCGGCAGA -AAGCTTCCTCCCGTCTTTACCCACGGCGGCGACGGATATGGCGGCGGACTAGACGGCGAT -TCCATACCAGGCCTCACTGCCACCTCCCCAGTTACGGGACCAATGGGATCACCTTCCTAT -GCTCCGCCCCCAACCTCGCAATATGGCGGTATGCCCCTGGACTCAAAATACACTCGCGAG -GTTGAAGAGCCCGCCCCACCCCTGCGGTCCAACCCAGTGCACCTCCCTATCTCCAGTTCG -GTCAACGTTGCCACCCCGATCACTATCACACCGGCCCACCCCATTGCACAACCCCAGAGC -GCAGTACCTTTCGCGCCGCCGCGGCCAGATGCGCTCGGTCGCAGTTTGCATAGCTTCGAC -GGTAGTCGAAGCAGCCGCTTCACTGAGGTtattgatccgtaattgtgaatgtgatatggg -acatgtgaaatgATGGAGATGAAGTGACGGGTGGGGTGCATGCCCTGAGATCGGACTCCG -AGGCTTGGCCCCGATCGTGCCTTTTTTATTGTTAATAATTTCTGTTCGATTCTACCTTTT -TGTGCTGTCAAGCGCATTTCCTGTCTTTGATTGTCATTTTGTTACATTTTCTTTTTCATC -ATTGATACCCCCCCCTtctttctctcttctctcttctgccctttttcttctctcttctTG -TTTAAATTCGTTTCTCTGGATATTTTCTATCAACCCTTAAGGTTTGTATCATTGTGGATA -TATATTATATTTTGTTATTTTAGTGTGAATGACTACTGTATGAGTCGAGGCCTAAATGGT -ATCAGCTCAGATGATCAATGTAATCTAAAACTCTCGAACTTCACGTATGAAACCTTGTGC -ATCTGTCATGAATAGACGGTAGGAAGTAATCAAGTTTTGGCTTATGTGTAGATGACCAGA -ATTCTAAGCCCTTGAAGTTGCCCAAGTGCTCCGTGTAACATAATTTGCCTCGAATATGTC -ATGGCCATGCATGCCGACCACGTGGCAAGGTCGGTGAGACGAATATCATTGCAAATGTCA -TTTCTTCCAAGACTGCTTCATCTGACATGTGAGACTTCAATCGTGTCGTCTGAGACTACC -AGCAGTTGGGTTAGATATCCCTTCTCATATGCCTGATGAGTCAAAAGAGATGAGCCTCCT -CGAGCGCTTTGCAAACCAGCCTCACTTCCAATTTGTTTCTTTAGACGTCAAATGGCCGGA -AGAGAGACAGAGTTGATGGCATAAATTTGAAAGCAGGCGCTGGTCCTAGCATTGTTCAAG -ATTCGTGTCAAATGAGGCTTGGTAGGTCCGTACCTGTCAAAATATTGGCATCATCTCAAC -CTTATCCGCTTTTACGCGTCCTGAGACCTTTCCTCTTACCGCCATTGTCAATTGCTCAGA -TGCTGTGTTGAAGTACGAGTGCTTCTGCCTAAATGCTCCAGGTTCTCGTAGTAGCCTTGT -CTTGAGGCGAGCTCATCCAATATCACTGTCACCTATTGCACTGCCCTTCCGCGACCCAGC -TGGTGATGTTTCTTGCCTCCCGGGTCATACTAGAGCTCATGAGGCTCCTGCCATAGTGTG -AGCCGCACCGCCACATCTTCATGCGCACCGCCACAATAACGCCACACAACGATCTCGTCT -CCCTGTTGTCCTCCTTTCACCCGGAAAACATCCAGTTACTTACTCCATAGTGGTGGCGGA -TAAGTGGCTTGCAAAGCCATGTCATTTCTGTGTGCCTTGATAGGTTTCCACAGCGTCGCA -TGGTCAGTCATGACGCGCACAGTCTAGCCGCTTAAGTGGACACTTCCCCCAGAAACCTAG -AACAGACATTACCAAGCCCCATGATGAGACCTACGAAGTTTTGCCTCCTTACTCCGTACT -TGCGCTCACTATCAAACAATGCCAATACACCCATAGGAAGTATGAGAGATGAAAATTATG -TTATGACCCATGATTGGATGATCATCTGTTAGGAATCGAGGCGCAGCTCCTTGGCCCACA -ATAACAAATTCTGTGACGCTCTCACATTGGATACTTGCACTTGGCGTAGCGCTATTTGAC -GGCTGATCACAGAATTTTAGGTATGCATGTATGGACGTGAAGAAACGGGGCTAGAGTTTG -AGAATTAGAGCACCAGCGCGTAAATTCCTGGGCTTTGGCTATGGATGCCTATGAGGTTCG -CCTTCATAATATTACACGACTCTGTAGCCCCTGTTCCAGTTAACAAAGATTAACTTACCG -CGCAAAAGGGGGCTGTGTCTTGTACAAGATACTAATAAAGTGGCAACACCCTGACTTTCC -AAGTATATTTTACTTTCTGCGATAGCTCTCACTTTATTAAGTGGATATTTTATATCCCTC -TTATACTTGGCTGTCTTTCACGCTCGTTACGAATTGCCTGGAATTCGACTAAGATCCTCG -GCGTTCTGGGCAGATCCTTGGGAAGTACTACACAGCCCCGTTATCGAGAGGGGAAAGATA -AACAGAGTCTTCGGGATCCCGGCATATGAAACGTTGATATTGAATATCATACGAACTGCA -GCTCCGTTTTTCAATTCGCAAATTTCTCCTGGTCTCGATGCCATTCGCGCTCTATGTCTT -CAAACAATGACTCCCGTCAGGGCTCATTATTGAGCCCTGTTATAGTGACAGACTTTTCGG -GCAGAGAGGTTGGTCACCCAAGCCATCCTATAAACACCTGAACTTTTTCGCTGATTATCT -CATTCCACTTTCTTATTTTCCAGCGATCTATATCTACAGCCTCTTCTGCATGGTCAGGTC -CTACAGATATAAATCAGAGCCCAACTTCTCCTGTCTCACCAGATATGGCATGGAATCAAC -CAGACCCAGGGCGGCTGATGGTTCAAAGTGCCTCACATCGACAATCACAGTCGATTGATG -GCGATACACTGCGTCCGCGGGCGGACTCATTTGCTTCTTCTGCAGATACGGTAGTTCGCT -CGCGGGCAAACTCTGAAGTAAACCCGACGCATACTTCCAAAGATGCATATGATGATGTAT -CGTTGTCGGATGCATTGAAGCCGGATCCTCGGGATGAAGCTGATTTCCAAGTCGAGGATA -ATCGGTTTGCGTTTTCCCCCGGCCAACTAAACAAAATGCAGAATCCCAAGTCTCTAGCTG -CATTCCATGCTCTTGGCGGCTTGCAGGGTCTGGAGCGTGGTCTGCGCACAGATTTAAATG -CCGGTTTGTCAATAGATGAAGGGCGTTTGGAGGGAAATGTCGAGTTTCAGGATGTGACAC -CACAACGCACAGCTGCAGTGAAATCTTTACCAGCCAACACTTCGGCGCCGGCGCCGGTGT -CAGCAGCTGGTAATGGCTCATCTTTTGAAGACCGCATCCGTGTCTTCAGTCAGAATAAAC -TACCCGCTCGGAAATCGACCGGTTTCCTGAGACTGTTCTGGATTGCTTATAACGACAAAA -TCATTATATTATTGACAATCGCCGCTGTAGTCTCACTGTCCCTAGGGATATACGAGACCG -TGTCAGAAGGCACAGGCGTCGACTGGGTGGAAGGAGTCGCCATCTGCGTGGCTATCTTGA -TTGTTACAGTTGTCACGGCTGCCAACGATTGGCAAAAAGAGAGGCAATTTGCCAAACTTA -ATAAACGGGTAAGTATTCCCATACCAAATTCAAAAGTGTGAGCATATCTAACATCCCAAC -AGAACGATGACCGTGAAGTCAAAGTCACCCGCTCGGGCAAAACCAATATGGTATCAATCT -ACGATATAATGGTTGGCGACATCCTCCATCTTGAGGCAGGTGATGCCATCCCCGCGGATG -GCATTCTAGTTACAGGTTACGGTGTAAAATGCGACGAGTCTTCCGCCACCGGCGAGTCAG -ACCAAATGAAAAAAATCACCGGCCATGAAGTATGGCAGCAAATTGTCGACGGCAAGGCAA -CCAAGAAACTCGATCCGTTCCTGATCTCTGGTAGCAAGGTCCTTGAAGGTGTGGGGACTT -ATGTTGTCACCAGTGTTGGGCCCTATTCCACCTACGGACGGATCTTGCTGTCGCTGCAGA -CTCCCAATGACCCAACGCCGCTTCAGGTCAAGCTGGGTAAGTTGGCAGATTGGATTGGAT -ATTTGGGTACAGCGTGAGTTAAGCCTACTTACTTCCTTTTTTACTTGTACGGCTCACTGA -TGATAATTTACAGCGCTGCTGGTATTCTGTTCTTTGTTCTCCTTTTCCGATTTGTAGCCG -ATCTTCCCAATCACCCGGAGAGGAACGGTGCTATGAAAGGCAAAGAGTTTGTGGATATTC -TTATCGTTGCTGTCACGGTTATTGTTGTCGCTATTCCAGGTAAGCGCTGATGTTTCGCAG -GATTATCTGAACTATGCTGACCTCCACGTTCTTCTAGAGGGTCTTCCACTGGCAGTAACG -TTGGCTTTGGCATTTGCTACCACTCGAATGGTCAAGGAAAACAATCTCGTCCGTGTCCTT -CGCGCATGCGAGACAATGGGCAATGCCACGGTCATTTGCTCTGATAAGACAGGTACATTG -ACTCAGAATAAAATGACCGTTGTGGCCGGAACCTGGGGCTCGGATCAGGGCTTCAGTCAG -AGAACCGAAGATGAAGGTGTGAAGGGTTCAATGACTATTTCGGCCGTTTCACAGCAACTA -TCGGCCCCTGTCAAAGACTTGATTATCAAGAGCATTGCTTTGAACTCGACTGCGTTTGAA -CAGGAAAAAGACGGCGCCATAGATTTTGTCGGTAGTAAGACCGAGGTCGCGATGCTTCAA -TTGGCCCGCGATTATATGGGAATGGATCTTGTCTCGGAGCGTGGCTCCGCTGAGATCGTC -CAACTGTTTCCTTTTGACTCCACGAGGAAGTGCATGGGTATCGTTTACCGTGTTCCTGGA -GTTGGCTACCGACTCCTTGTCAAGGGAGCGTCCGAGTTGATGGTTGGCGTCTGCACCTCG -GAAATTGTCAACATTGATACTTCTAAAGAGAGGCCTGATGTGGAGCAGCTTTCTAATGCA -CAGAAGCAGCACCTCCTCGAGCTCATCGACGGTTATGCTCACAAATCTCTTCGGACAATT -GGCATGGTCTACAAGGAATTCACAGCCTGGCCACCGGCCGGGGCCAAGCATTCTGAAGAT -TCTTCCGCAAACTTCGATGAGATTTTCCACGATATGACCTGGGTTGGAGTTGTAGGTATC -CAAGACCCTCTTCGCCCTGAAGTCCCCGGAGCCATTCGCAAATGCCACTCGGCAGGCGTC -CAGGTCAAGATGGTGACTGGTGACAATGTTGCCACTGCCACTGCTATTGCATCTTCTTGT -GAAATCAAGACAGAAGATGGATTGGTTATGGAAGGTCCCAAGTTCCGCCAACTCACAAAC -GCTGAGATGGATGAAGTTGTTCCCCGCCTGCAAGTACTGGCACGATCATCCCCCGATGAC -AAACGAATCTTGGTCGAACGGCTCAAGGCCCTCGGTGAAACAGTCGCTGTAACAGGTGAC -GGTACCAATGACGGTCCGGCTTTGCGGACTGCAGATGTCGGCTTCTCCATGGGTATTGCT -GGTACTGAGGTCGCGAAGGAGGCGAGTTCGATTATTCTTCTGGATGATAATTTCAAGTCC -ATTATCACAGCGATTGCCTGGGGACGAGCTGTCAATGACGCCGTTGCCAAGTTTTTGCAG -TTCCAGGTCACGGTCAATATCACGGCCGTGGTACTCACCTTCGTGTCATCTGTGTACAGC -AGTGACAATGCCAGTGTTTTGACCGCTGTGCAGCTACTCTGGGTGAATCTTATTATGGAC -ACCTTTGCTGCCCTCGCCTTGTGAGTTGCCTTGCTTGTTCCTTATCTATTTGCTTGGACT -TTATGCTAACGTTTTGGGAAAACAGAGCTACCGATGCACCGACTGAACAAATTCTCGACC -GCAAGCCTGTCCCCAAACATGCCTCCCTCTTCACATTGACCATGTGGAAAATGATTCTCG -GCCAAGCAGTTTACCAGCTAGCCATTACATTTATGCTCTATTTTGCAGGCGATAAGCTCC -TCGGTGCCCATCTCAGCTCAGATCCCGAGATGCGTGCGAAGGAGCTCTCGACCGTGGTAT -TCAACACTTTTGTGTGGATGCAGATCTTCAACGAATTTAACAATCGCCGCCTCGATAACA -AGTTCAATATTTTTGAAGGCATGTTCCGCAACTACTGGTTCCTAGGAATCAACACCATCA -TGGTCGGCGGTCAAATCATGATCATTTACGTGGGCGGCCAAGCATTCTCCGTGACTCGTT -TGAGCGGTACCCTCTGGGGTGTGTGTGTTATCTGTTCTGTTGCCTGTTTGCCCTGGGCTA -TCGTGCTTCGACTCATTCCAGACTACCATTTCGGTATCGTTTTCAATGCCGTTGTTGGCG -GTATGGCTGTTGTTTTGCGCCCGCTTTCCAAAGGGTGCAAGGCAATTGGTCGTGGTATTA -GGTCCTTCTTTAGGCCCGTGAAGCGTTTCACTCGCCGTGTTGTCAACAAGCGGAAGTCAG -AGGATGAAGTCGAGTTAAACCCGCAGCCTGCAAACTCTGCTGATCCCGAAGAGGCTCCTG -AGAGACTTGAACAGAACAAGCAAAAGACTCCTGAACGTCCTACCACACCTCCAATGGTCG -TCGTGCCCCCTATCACAATTACGACCTCTCCTTAATTTGTCATTTTAGCGTTGTTATTAG -CATTTGGTGGCTGTGTGATATTTTATTGTTGTGCTGTTTGTTTCTAAAGCGTTGGGATTG -ATAGAGGATGTATATATATACCACCCAAAAAAACGACACATTTGCGTTAATCATAAAATT -TAATTCTCCCTACATAGAAAATATCCAGCAAATCAGTATCCTTGTTTCTTGATATGCTCA -ATAGTCAGAAAATAAGAGCCTCCAGGCAACTGGGTTGAGGTACTATATGTTTCTGCCTCA -GGCCACAACACCCCGTCTACCAAGTGTCCAGCTCCATCTCCCGTGGACTCCCCAATTGAC -TCCGCATTACCCTGTCGACTCACCTTGACACTTATCTGTGATCAATTAATCTGATCTCTG -TTTCTCTCACACACACATTGATCTTTCATTTTTTCCTCTTGGATCTCATACCGCTCGGAG -ACGTTCCCACGTCACTAGTAATCTCCCCGCTTCCGCCCACCGCACCGATATACCAAACAG -ACCAACAGGCAGCTCCTCACAAAGCTCCCTAGACCACTCGACTGGCTCGACCAACAGCTT -GATTTAGATCTATCTCTACCCTTCCGCCCTCAATCAAATCCCAACCGCCACCATGTCGCA -AATCAACTACCGCACCATCAACGTTGATGCCCTGGACCCCGAGTCCTCGACCAACTTCCC -CATGGAATCCCTCCTCCCCGCCACACTCCCTCAAGCTGCAAGCGCTAGTGACGCCGCCAA -TGCCGCGACACAGGTGCGCCAGATGCTGCGCGGCGGTGACCCGGAGGGCGCTCTGCGGAC -TGTCCTCGATACGGCGCCGCTGGGCGGTGACGACCGCGCCAAGGAGGTGCATCTTGCTAC -CGTCATCGATGTGCTGCAGGGTATCCGGCAGGGCGAGATGACCCGCATCCTGGAGGGCGT -GTGCAGCGGGGATGGCGGTGCCGAGCGGGCGGATTGCTTGATGAAATACCTGTGCGTGGA -CTACTGTTTCCGATTTTCAGAGGAACCGTATGATGTGAAGTCATGGGATTAATTGCTGAT -TTAATAATATTAGGTACAAGGGAATGTCATCGGCTGCTCCTGGTGGCGGTGCGCAGACGC -CAAAGAAGGTTGTTTCGCCTCAGAACACAGGCTTCTCGCAAATCCAGGCTCGCAACTTGG -CCGAGGGTGGTGGTGGTCAGCAGATGAGCGTTTTGTTGAGCTGGCACGAGAAGCTTGTGG -AGATCGCTGGTACTGGGTCGATTGTGCGGGTCATGACTGACCGTAGGACAGTTTGAGTGG -AGAAAACAGCATCGGATGGAGGTTCGTTTCTTATGATTTACATTTGACTGTTTTCTTTTG -TTCCTGTCTTCTTGGTAGAGGCTTGCATTCCATACATGGATGATATTTCTATGGTGTCGC -TTTAAGCGTAGTTTTACATTATTATATTGTGACAAAATATTGGGCTCTTTCTCTTTGCCC -TTTTGTTTCCATTCTTCATGGTTTATTAAAATCACCTCCCTCGAGCACATCCTTGAGGTT -AATATCGCCGCTTCCACGCTGCAATGGCTGCGACTGGCCCTCGCCTTCCTTCCATCCGAT -CATAAAGATCAACCGGAACGTCGCCGGAATACCACGGTCACCGTCCTGCATATGCAGAGA -ACGATATATAGCCTCGTTAGCCAACAGCACGTCCCGTGAAAGCGGTGCCATGTCACGGTT -CATGATAGCGTTACTCTCGCCCATCGCCTGCAAATCCTTCATAAGCGCAAAGGTATCTGG -AAACTCGACCACAATATCCTCAACATCAACAGTAAGCATCTTGAGCCCAGCACGTCCTAA -CAAGCTCCCGACATCTTTGACATCCGCCAGAGGCGACACATGCGGACTCACACCACCGCG -TCGCTCCATATCAGCCAACTGCAGCGATCCGCGCAGCTCGTACAGTGTATCACCGCCAAA -CATGGCCGCAATGAACGGCGCATCGGGCTTCAGAATCGTGTTCACCTGTGCTAGCAGCGC -GGGCAGGTCGTTGATCCAGTGGATCGAGAGCGAGGAGAGCACTGCATCGAAGCTGTTCGG -TGCGTACGGCAAGGTCTCCAGGTCTGGAATCACATCCCGCTGGATATCGACTTCCTTGTT -AAACGGCTCGTTGTCGTCGCGGTGGAGCAGCGCGTGCGATGTCTCCACGCATGTCAGCTT -CGAGATCTTTTTTGATAGTGGTGGTGTGATGACTCCTTCCGGCATGGATGAATCGAGATC -GGGGGATGTGATAGCTCGGGCGATGTTGCAGCTGTTCGCGCCTAGATCTAGAACGTTTGG -GAAGTCGCGTTTAATGTCCTACTCATGGTAAGGGAAGCTCCTGTCAGTGGATATGTCTTT -TTTTCTCTCTAGTTTTGCGCTTTTTTCCGCGCTTGCTTCATTGAGTGACTTACCAGTAAG -CGCTCACATAACCGCATTGCTACCTCATCTTTCAGGTAGTCGACTTTCCGACTCTCTTCG -ACATTCCGGGCTGCCCGATCTTTTTGGAGGTGCTTGACTTTGCGGTTGAAGACTTCAAGC -ATTGGGTTGCCGGGCGTCTGGCTAGCATATTGTCGGATAGTAATTGTTGGGGTAAAGCTT -TTCCTTAAGAGGCTGGCACGAAATGTCTGTAGAGATAGCATTGTGGCTGTATAATGCATG -TGGAGGATTCTAAACCCTCAAGCATGCAAGTCTATGGTCTTGCGGCGATGGCGGGCAGAA -AATGGCGGGATCTGACGTCGATTGTTGATGCGGAAGAGCTCCCTCTCCGCAAATCGTGGA -CTTTCTGCAATTGGTGTTTCCGAGACACTTGACCTGGCGTCAGCTCGTACTCCTGAATTG -TGCATACTGCGCATAGCACATTGAGTGTGGCAGAACAATTGCAACAGCTGCTCGAGTAAG -TCATCACGCGCCAACACGCTCCTCTCAGGGTTTCTGATTTGGCGCAACAAGCTTGGGATC -CTGGAAATTAAGATATTTTTACTCAAAAAGATAAGCCATCTGATAGACGTTTTATCTATC -AGTCGACCTGTCTACTATATTACGCTATTCAGCTCCCGACCCCACAAACACGAAGCATGA -TTTGACCCAAATACACACCTCGAAAGCTACCATACATCACCTCGCTCCCTCCTCCACGTT -GATATCAGAATGTCGCGTCCTTTCCCATATACCTTTATATCATGCCCCTGTGCCGACACT -CCGGTCCCCGATCCAGCGAGGAAACGAAAGTCGAGGGAGTCCCCGCAGAAGTCACCACAT -ACACCACCACCCGAGCGGGCAAACACAGACCAAGACAAGAAATTGTCAGCAAAAGAGGAG -CAACAAGCAGAAGATGAGGACGAAGAGCAAACATTTGATCCCCGCTCTCCACGGTCCAAC -TTCTCGCTGTATCCGCCCGAACAGCTTCTCTACTGCGAGGAGTGCCATCAGATCAAATGC -CCCCGATGTATTACGGAGGAAATTGTGAGCTGGTATTGTCCTAGCTGCTTGTTTGAAACC -CCGAGCAGTATGGTGCGGAGCGATGGTAATCGGTATGTACTAAAGGGCGCTTACTTATCT -TCGAATACCCGTTAACACATGCATATCTAAAGTTGTGGCCGGAATTGTTTCAATTGTCCT -GTCTGCACGGCTCCGCTGGCTGTCAGCACCATTGAGAACGCTACAGGGAACGGGTCTCAG -CAGGGTCCGTGGGTGTTGTCTTGCGGGTACTGTCTCTGGACGACGTTAGACATCGGCATC -AAATTCGACAAGCCGACGAATATCCGCAGTCAACTGCAAAAGATGACAGACTCCACTGCC -CCGGCTGTACTGGACCGCTCGAGGCAAGGTTCTCGCGCTTTTGGAGATCTGAAACATCCG -TTGTCCAGCTACGCATCGATTGATGAACCATCCAATGCACGCGAGCGTGGTGGAGAGCAA -GCTGCACCTAAAGACGAACCGGCTGCACCTCCAATGGGCATGGATGCCCGGTTCGCAGCC -CTGAAAAGCTTTTATCGTACACAGATTGCCGAGACATCAAACTCGCCAAATGATCATCTT -AGCTCAGAATTTGGGTTCTCCTCACCTGGCGCCCTAAATCGTCTCATGTCTCTCTACACA -TCCTCGTCACGACTGAGCGGGCTATATGGTGGCAGCAAGAAGCCCAAATCCAAGCCACCT -GTTATGCGCGAAGCCCTCACAACGAGCGAGGGACTTCAGATCGCCCCAGAAAACAACGAG -ACAGACATGATTGCACAACTTCAGTCTCCAGAATGTGGATGGGATGGTATGGCCTCGATA -GACCAGCGTGCAACCCAATCACCGGAAGCCCGCTTCGTCGACGATCTCCTCCCACTCCCA -GTTCTCCTCCGCACCAAGCGAGCCAAACGCTGCAAGTCCTGCAAGCACATCCTCGTGAAA -CCCGAGACCAAGCCGCAATCTACTCGCTTCCGCATCCGCCTCATTGCCCTTAGCTACATC -CCCCTCCCAACCCTCCGCCCTCTAGCCCTCTCTTCCTCATCAGCCCTCCCAGCCATGGCC -CAAACCACAGACCTCAACTCCCTCCCACCCCTGCGCCCAATACACGTCCTCCTTACCCTC -AAAAACCACATGTTCGACCCAGTCCGCATTACCCTCGCCACACCCCCCGTGACCCCAGGC -CAGGTTGCAACGAAAGTCACAGTCCTCTCTCCGCAGTTCGAGATTGGCGCGAATAGCGAT -GTTTGGGACGAGGCGTTGCAGGGTGCATCTGCGCCCTTAACGAGTGACTCTCGCTCCGCT -GTTCTACGCGGAGTCCCCGAGGCAGGCAAGGTCTGGGACAAGGGCCGGAACTGGACGACG -GTCGTGTTGGAGGTTGTCCCTGGTACATTGCCTGGAAGTAGTGCCGAAGGTGGAGATGGG -AATTCCAATGCCGGGACTAATGATGAGGATGTCGTGGCCGTTGCAGCTTCCAAGCAGGAT -GAGGATGTGCTTGAGATTCCGGTTTTCGTTCATATGGATTGGGATGCGGATGCACAGTTG -GATCAGCAGAATGTTGGGAAGGGGTCTAAGCTTGATGATAAGGTTACCAGGGAATTGGCT -TATTGGATGGTGCTTGGTGTGGGGAGAATTCAGCCTTCGTTGTAGATAGAGGATGTCATT -GTCTTGTTCATATTCGTGTCGACTGCTTTGCGTTTTTATACCTCTCATACCCCGAGTGTA -CTTGTTTTTTTTTTCAGTTGATAGAAGCATCCAAGTTTATTATTGTCCAGCGAGCTGTCC -ATTTCATACTGCCAAGTAAACATAACAGTAGATCATAAGCTACTTGACCTAACCATGGAG -ATATCCAACAATTATATCTAGATATCCTCAGGCAATCAATCCTATAATCTACAAGCAGTC -TAGACCACAATCCCAGCCAGACAGAACCATCTAAGAAAGAAGCAAGGTAAAGTCGGAATT -GTTGAATTGGAATACTATCAGATATTCCATCCCACCCTCATTAGATCGGGAGAGAGAGAA -CTCAACTCCATCGCCTCACCCAGAGCTCACACCGGACACAAACACTATGCAACCAGTGAC -TGGGGGCAAAAAAAGAAAAGAGAAAAAACCGTCAATGCATTTTTATGGAGATCATCTCTT -TGGAACCCATCAATGGCGTTGAATCTAGCAACAATTCAAAACTAGAAACAATGCCATTCT -CCCACGCATCAACCGAGCACATTCCAAGCCCCTAGCCCCAAAAAAAGTCAAAGTGAACAA -TGAATTCTAAATTGTGCATTTAGAACTCAGCGCCGCTCTGGGGGAACTCGAAGGCAACGG -AGCGGCCGGTGAGGCGGCGGTAGACCTCGCCGTAGGCATCGAGGCGGTGGTCGACACCAC -CACGCTCCTTCTCGTCGAGGATGACCTTGAGGGTCTTGGAGCCGTCCTCCTTGGTGCGGG -TGCGCTTGCCGACAATCTCGACGGGGTAGACGAGGTCGGAGAGAATGGCGTCGTGGACAG -CAGTCAGAGTGCGGGAGCGGGGACGCTTCTGGGTCTGGGTGGTGCGCGAGTTGACGGAGC -GCTTGGGGCGGGGCAGGATGCGGCGCTGAGCAACGAAGAGGACGTGGCGGTCGGAGAACT -TCTTCTCGAGCTCGCGGGTAAGACTGAAGTTGGTATTAGTTTTCGAGTTGTTCTTTTTTG -GGATATCTTCTTGGCTTGTTGGTCCGTTCGTGCTGGAAAGGATTGGTGGTTGCTTACCGC -TGCTGGATCTTGTGGAAGTTGGCCAGGAGGGGGACGGGGACGAAGACAATGATGGCCTTC -TTGCCGTGGCCGACCTCAACCTAGATAGCGAGAAGAAATGTCAGAGGGTTGTCCGGTTCA -AATTGTCGTGCACACAGAAACTGAATTTCCTCCTGGAACGGCGTCTTGCTCTCAAATTTT -CATATCCATTTGCATGTCGCCTCCATTGCTGATGATTCCTCTCCTTGTTGATATTTTTTT -GGCGTTGGCGGACATGAATTTAAATCAATCCACAAATCCGACCAATATACCCCGGAGCAT -CGAAAATCCCTCGAAAATTCAAAGACGCCATTCCAGAACGATTCAATTCGCATGTCGGGA -GATTCGGACAACAATTCTTACCTCGCGGGCAGAGACGAACTGCAGGGGACGAAGGCTGGC -CTTCAGGTCCTGTGTGTTGCTCTCGAGGTCGAAGAGAGCGCCGGCGATCGCGGTCTCCAG -CTCGGAGGGGTGCTGCCGGGACGGCGAGTTGGGGGCGATCTTGTTGATGGCAGCCATTAT -TGCGAGCGACGGGGTGCTGCGGGAAGTGCTGGTGGTGATGTCGACGGAGGGGAGGTTCGC -AGATTGAATCGAATGGGAATTTGCTGTGGGATCGGCGCATTCGCTTAGGGTGCGCTACTT -CCTGTTTGGGTTTAGTGTGGCCGTCTTAGGGCTCGTTGCTAATTCGTGGATTGCCGCTGA -AGGAGATCCACTATCCGACTCGCCTTTGGTTGACAACTACCCCAGGACCTTTTTTATGGC -GATATTGTTCTACTCTTTACTGTCATGTAGGTATGGGGATTATTCCCGATTGCTAATGCA -AATTTCGAAATAAAAATTTACACTCTAATCACATTGTCAAAACGAGGTGTCTCTCGGGGA -ATAGCACACTGTAGACTATAAGAAGACACGGAGTCTCCATTGGAGCAGGGAACaagcaag -aaatcgcaaaaaagaaagcaagaaagaCGCCTGTATCCATGTCAATAATTGCAAGAACAT -CTGCCAAAAACCAAAGAAATGCCAAAATCAGCAAGCGCAGCCCTCCTTGGCCGTGGCAGC -GTTACGATCATCCAGGTTGACCCGTGATTCTTCTGCTGACCGACCCAACGAAGTCTGGCC -AGTTCCCGCACCGGTACCACGTCCAGTCTTCAGACTGCTCTCGGGGATGGCATTGGCAAT -TGCCGTGAAGACATCCACAACATTGGTTCCAGTCTTGGCGCTAGTCTCGAAGAACAAAAG -ACCTTCCTCCTCCGCATAGCCGGTAGCCTCCCTTGTTGATACCTTGCGGGCATCGCCGGA -TACTGCATCCTGCACATCATCTTCCTCGTTGGCGCCTTCCTCGCCAGCCTGTGCCTCCTC -AGATTCGCCCTGTGCAGCGACAGCGGCCTCGGAAACATCTCCGCCGTCGTTGGTCAAGTC -CAGCTTGTTACCGACGAGGGCAATGACGATTCCGGGGCTCGCTTGGCGCTGTAGCTCGGC -GACCCAGTGCTTGGCTTTTGTCAGCGAGGAGGGTTTCGTGACGTCGTAGACAACAAGCGC -TGCCTGCGCGTTACGGTAGTACATCGGGGCGAGCGAGGCGAATCGTTCCTGGCCGGCGGT -ATCCCAGATCTCGAACTTGATCGTGCGCGTGGGAAGGGAACATTTTTGCGTCAAAAATGC -GGCTGGAAAAATGACGTTAGTTGGCCATCCTAATGCAAAGTCCTAATTGAGGGGTTGGGT -GGCCTGTTTGCGGGGGATAATCCCATTTCATACACAATATACTGCCCCTGTATTCCCCAG -AAAGTATATCATCTGCCAGGGACAGTATCTTGTGTCTTATGGGAGCTAAGTCCATTGGGA -AATCTTACCGCCAATGGTTGGTTCCTTGTTCTCCTGGAAATCATCGTTGACGAAGCGGAG -GACAAGCGATGACTATGAGGGAATAATAAGATATGGTTAGTTGATTAAAAATCGTGTAAC -ATGGCGAATAGAGGAGTAGTACCTTGCCCACTGCGGCCTCTCCTAGGAGCACAAGCTTGA -CGCTGCTGCTCGGCTTCGGGGCATTTGCGCTCGCTGGCTCGGACATAGCTGGAAGAACGA -AACAGTGAGATCAGGGTAACAGTCAGGCCTTCTGTAGGTAATGTGATCGGCGATCAGGCG -ACCGGATGAGGTGGCAGATGGATTCAGCTGGTAACCGCAGGTCGGGGAATGGGGCTGCCT -AGAAGTAAGACAGGAGATGGGGCACGAGTCGCTGTGGTTGCGCTCGTATTGGGGGAGTAT -TAAATTCTTGTCAAGAAATGAGAAAGGCAAAGCGTAGTATAAACGTTACCGAGACAAGAG -TGATTGATCCTGAAGTTGGAGAGAGGCGGAGGTTGATGATGATGGGTGCCTCGATTGGGC -TTAGCGTAGTCCGTCTGTCACCAGTCACCGGAAACATGTCTAAGATTGAAGTATACAACT -GACATACTTCCCGTATTTGCATTTGCTTTGCTTATACTTCTATTGAGGCTTAATTTATCT -GTATCTATTTAAAGAGACTGACATAAGAGCCAATTTCGATATTCAATCATCTCCCGCCAT -ATTGATCTCTCACATCATTAGATACAATGCACCAATGATGGCATGCAGATCACACGGACA -TAGCTGTCGGGATAGGAATATTTGAACCGGGGAGCGCGAAGACACGTACTATGACTTGGT -AAAGGGTAGGTCAACACGAGATAAACATGTTCCTGGCACCTTGACAACTGTCACAGATTA -GGCTAGTGCCCGACTGGCCTAGATGATGTCGGGTGCCAGTTAGGAAAGACCCTGGTCAGG -CTCGGGAGATATATGCCGATGCGCTAGCCAGGCCCGTATCAACAATCGGGCTGTCGTTTC -TGTCTCGGCATTAGTCGGTAAGCAAATCTGCCCGATGTTAGCGCAACCGCGGCCGGTACG -AGCGGAGTACGACCTGAGATGAGTACGAGTTTGTAAACTCGGGGTTGCAGAATATATATA -TGTACATGAAGCTGGAGTGGTCTAGACTAGCTGGTTGGAGAATGTTGAGGAAAAAAAAGT -CCATTGGCGCTCACGATTATGGTTTTGTGCGCAAGATATAATACACAGGCATCAACGTCA -AAGTGTAAAAGCACAAATACTCATACAAATAAACGCAGGAGCTGTACCCATCCAATAGTA -AAACAAGAAAAACAATCAAAGCATGCTATTGACCGCGAGGTCGAGGCTCTAGGTGCTGGT -ATCAGCCCATCCCATTCAATATTCCGTCCCGTCTATACCAAAACGCCGGTGAACAAAATA -AATAACAGAAAATAAACACAAAAGGCACTTTTTGGGATGGGGACCGTGGTATCGAGTCGT -ATCAGAGGTGAAGTAGAACGTGAGGTGAGGCGGAATAAGGTTCGAGGCCCGAAAAATAAG -GATAAGGCGCATGTGCGAAGACGAGAGATCAAAAGTCGCAAGGGGAAAAAAGTGTCAACG -CTTGGTATCCGAAAAGAAAATGCAGGCAGAATGTTGAGCCAAAATTAAAAAGGGTATAAT -CTCGCGCGATGCCTGCGAATAGGCAAGAAGAAAAAAAGGGTATGGCCCGAGACCGGGACA -AGAAACGATAGTGTTACAGATATCTAAAGGATTCGGTTAGTTTTGATGCCCAAAGGTGTG -TTCGGAGTGTGTCTTACCAAAATCAGGTAAAACTCCAAAACGCATATGCGTCACAAAAAA -CTCCAGCCACTGTTGGGCGCCAGACCATCCGAAACATGACCGAGTGGTGTAGAAAGCAAT -GTTGAGTATGACGAGGTTTCCGAATGAATTTATTTCGGCGGTGGTGGAGGAACGCCAGGC -TTCTTCACCCCAGTTGTGTCGTAAGCTCCTCTTCCCATGTTGAAAGCAGATTGATCCCGA -AGGATCATGTTAAGGTCTGCGGAGGAAGTCATGATTAGATGCTGAAGGTAGGATTTCGGT -TGGAAATAGGCGAGTTCTTAAATGCGCCAACGGTAGGGTGAGGAATGAGGTGGTATGAGA -CTTACCAGGTAAACACGACGTCGACGCCAGGATCTTATATAGATATTTCCGTGGGTGAAC -AACAAAATGAAGTCCCAATTCAACCCCCCGTGAAAATTAATAAATCAATTTATTTTTTTT -TAGATAAAAAAAAATAAGAAGGCCTAGACAAGAACCGGCGGTGCAGGCGATAGATATGAA -TGCAGTCGGATCGGAAAAAGCGGTCCTTTTGAAGAATAACCAGGATGCGGGATCCAACAG -AATCGTAGGGCAGGAATGTGACGAAATGGGGGGGGCCCCAACAAATGAGAATTGGAGTTT -TGAGAAAAGCACAGGTCGAAGTTGGTTGCTTTTTATTAGGAGATCGGAAGCAAATCAAGG -AATAAATGGAAATTCTCTCCAAATGATATGCGCTAGGGTGGCTGCAGCTAATCCATAAAA -GTCGCGATCCGAACCAAGAAAACGGGCAGAAGGGATATGAAGTTGAGACGGCAAGGGATC -AAAGGAGTCCGGAACTGGCTCCGGTCGATCCGGACAAAGTTCGAATCGACGTATCGCaag -ggagaagggagaggaaaggaggtaggggagaaAGATGGTTATCAGGTATGTTCAAAGGAA -ACAAGAAGGGGGGATTGCGTTGATATAGACGTTTCCTCTTGAATTTTTATTATTATTTAA -ATATCATATCCTTTTTATAAAAACCAATAAAACAAATTCAAATTAGAAATTCAAATATAC -AAATATACAAATATTTTAAAAAATACCAAATCAAAATTCTGGATATTTTTCTTTGTTTAG -CACAGCGGCTCTCCCGTCCCCTGGTCCCGGGGAACAGGAACGCGAAATGTTCCCCTTTCT -TCTCCAACGTTTCTAATCTATAAGCTTACCATATTAACTCCGTATGTTGTATATTTTCGA -TATATAATTTACAACTAAGAGAAAGAGAAAAAGGTACCCTCTGATTCATCATAATCATAT -CTGAGATACATCCCATGTCTTCATGAGTCCAAAAGTCCTTCTATTATGTATCATAGAAGC -TCTCCACTATGAAGCTAGGAACCAAATCTCCGGGCAGCCTTGGTGCATAAGTGGAGAAAG -AAAAGGGAAAAACAGCCGCAGGAACGAGAGACGGATCCCTCCGAACTTttttattttatt -ttagtctttttgttttCTCGCGTGGTCGGGCTCCATCGGGTGTGATAGTTTCCTCTAGTA -TTACTCCGTAGATAGAGGCAGCCATCACCAGTGACATATGATGCCTAGGTAAATCCCCTA -GCCAACCATAATATTTCGGCTATTGATCAAGGCCAGTCATCTGGTCTTTCACGCAGTCTC -CCGTGCAATACAACATATTCAGACCAGTCGCAAACTTCTCTAGTTTCGGAAGCCTTTGAA -TCACTCCCCGTTTAGAAACACATACATCAGAAGCCCATCCAAAGTACTCCGTACATATCG -CAATTCTGTCATTCCCGCTGTCTTTGTCTGATTCTCTCGCCGATCAAGGAAAAAAAAAAA -GGAAAAAAGGATTATCTTTACGATGGGCTCCATCATAGGGGGTGGAGATACGCTAAACAA -AAAGAGGCATTACGGAGCCCAAAAAAGTGAATTCCCGTCTCGGTTACGTATGAACATTCT -TTTTACCTTCTAGCTTTTCCTTTTTCTAACCCGGGGACTTGGGTCAAGGTATACAAGAAC -CTACACATTGATACGGAGTAGCAACATCTAGAAACATTCCAAGGCCCCACCCCCATACGG -ACTTCACAAGGTACCGGTAATTTAAGGGTAATCTGCGTCAGCTTCTAGATTTGCCACACT -GGAGCCGATGCATTTCAAAGCCACAATTACCCTTGACCATTTTGTGACGGTTCACGTTAG -GACTCGTAGGTCGTGTTTCAGCCCCGTGTCCACCCCATGCCAAAAGAAGGAAATTCATCT -CCCCATCGGCACTGAATCTGGGTCTTGCCTCGTGGAAATCGGGGGCTGGAAGATCTACCG -TACATATTCCGTAGATACCAAGACTACCGTTGAGAGATACATAGATTCCCAACGCAACTA -GATTGAATCTCATCTCTACATCCCACATCCTAGCACTGTGGCTTATTCTGTTACACTGTT -CGCATTAATGAAATAAGACATTTTCTCACATCTTCCATTCAGACTTCTCCTTCCTAGACA -CACACTTTCACATGTTGAAAAAAGGGGAAACTATCTCCCGTGCTGATCATCCTTTGTGGA -CTGATTTTTTGAAAAACAGAAGTCAGCACATCCTAATATATGTTAATAATTATCCCGATC -TGGAAAGCGAGCTTCCTATAGTGGAACTGGACTGTTGTACGGAAAGTACCCGTTTCGATT -ACAGCTAGAATGGCGCTAGAAACGGGAGAAAACCAGAAAGTCTCCATCAAATCCATCAAC -TCCATCAACTCCATTCTATTCACTTAGTCAACCTTAACGGCCCCAGTTGCTTTTCTCCCC -CTTTTTCCCCGTTGGGAATACCAATTCACCAATCCATCTAGAGTAGTTCATACCGAGTAC -GGAGATGTTCTGGATAACCCGAGACAAAGCCCAGCCTGCATTAGCTCCAAAAACAAGGTT -ATGCGATTGCATGAACTGGCTTCCATCGTTTCCGGAACGTTTAAGCTTCCCGAGACTGTA -TCCATGGATCACGTATTTTTTTTCTTTGATACAACACCTGGGCAGTATGTACGGAGTATA -TATCTTTTCCTCTATGAAACACTGTATTCCTTTTGGATCTGGCATAACTTGATTGTGGCG -TATCGAGAAAAAAAAAACGGTCGCGGATCACCACCCGCATTGACTTTGGGATTTGCCTTG -CAGGTGCATCGAAAAGCAGGGGAGTTCCCGGTCCGGAAGCATCCCCCAGCCGCTCTCTTT -GTTCATCTGACAGGCGATGTGGATGTGTCAACGCTCCTAAAATTGAGATATCGGCGACTT -TGTTTTCTTTTTTCTCTCTGTTGAATATGGCGGATTTCCACTTCTCTCAATCTAAATTTC -ACTACGGAGTAGAAACAGGGGAATGGCGCACCAAGACTCCGCGGATGCGAAGCCGGGCGA -TCCTAGTCCATTTTTCCGATTTCGTCAAAAGGCTCCATAAGATGGAAAATTCTGGAAGCA -CTATGTACTCCATACAACGGGCTATACTTAGCGGTGTCCACAATAGGTACTCCGTTGGTT -TTCCACCAGTCGAGGTCAGTTAGAGTCAAATGGGTGTTCGAAAGAAGAGATAGAAGCAAT -ATGTAAAACAATTTAGAGTCAGATATATTGCAATTGGAACCACTTTTCCGTTCCGAGACG -CTTTTCAGCTGTCTCAATTCTGAGCTTCTCCTTTTTTTCTATTTCCAACAAGTTCCAGAC -GTCTTATTCGACTCCCGAGACGTGACGGCAGCTCACAGCTGAACAAAGAAGCTAGAAAAA -AGTGGACTGAACTTTTAGACCTCTCTGGGCTTTCCCGGGTCCTGAGTGACAGGCCCAAAT -CCACATCATGACCCGGTTACCTGATTAACTCACAGAAAAGAAACAAAATTGACGATCTCC -GACAGCTCCATGGCTCCAGCACCATTGTGCCGGGTCGATCGCCCCCTTTCTTTTTTCTTT -GCTGGGTCAACCCCTGACTTCGGGTACCTATTTTAGTTGTCACGATATGACGCACAAAGG -GCGTATGACTGATCGAACCATACAAATATAATCTAATATGCCCCACGTTGAGATGGGTCG -GCTTTTCGTCGGGGGGTGATCCGCGACCACCTAGCGCGTGAACGGGTAATGTTCCCCGAG -ATGGATGCTCCTGCGCGGGAAATCACCCAGAGCAATCGTACATACCTACTGCATCACAAT -CTATGCACTTTGTAGATTGTACATGCAGATATGTACTGGACTACACTGTACGCTGTATGA -CACGCTGCCGAGTCAGaaaaagaaaaaaagaaaaagaaaaagaaacaaaaGTACATCTTC -ACTCTGCAGTGCCGATCACTGTGACCTCGGCCCTTGAACGCCATTTGATGCGATGATGTC -TGATCCTCCAGATAACAATCTAAAGATTGATAGTCAAATTGGGCCAAAATAGCGTTTCCT -AATTCGCTAGTACATTGACTTCGCTAATTTGCATGCAATGTACTCGAGGGCTTGTCGTCC -GAGCCCGCTAGTCTGCAGATCATCAAGAGCCACTTAGAACCACAATTGAGACTTCGATCC -CGAGATCATCTTCGATCGCGTCAATCACCCATTTTCCCGTGTCTTGCATACAATCAAAAT -TAACCCTGGGGACCGGGGAAGAGCTTTCCCCCCGAAGCTAGAAGCTATCAAATCCGACAC -CCTGGATGTATCACATTCACTCCTGGTGCATGTTACATTTCATCTTTATTTTTCCTTTCA -TTATTTCTTCTCGATGCTATGGAATATGGGGTACACTGATCCACAGTAAGGTAACCTAAG -CTGGCGTTGGTATGCAGGGACGGTCCTCAGGCCTCTCTTTTCCCCCTACCTACTCCGTAC -TGTTGCAGAAGTCACGTACCAGGAATGATAAATGATGCATTAGCATCGATGCATTGCAGT -CAAATGAGGCGCTAAATATGACCCAAGTACCGGGGATGTCCCAATTCCCCAATCGCTGGG -TCCGGCAATCCAGGGCTGAGAGCTGACTCGTCGGAGACTACCTGTCAACCCAGCCACATA -GAACCCTCTTCCGCAGTGGCAAGTGTCAAAAGGACGAGGCGACATCTCGTGATCTTTTGA -AATTTGACTAGAATTTGAATTTAGAATATTTGGAATATTTTCGATTGAGTATGTACAGGG -TGTACAATGTAGAGAAACTTAGATGACATTTATCAATATTATCTATCGCAAGTCAAACTA -TATTCAGCTCCCCAACCTCCTCCATTAGCCACCAAGGTTGAATATTGGTAAATAGTACTA -TTGTCACTCCCAATAACAAAAGACAAAAATAAACAAAATTCGACACCGCTTATGAGAAAT -CGGTACCGCGAGTATGCCTATTCATACAGTGATATGCCATTTCAAACTGCAAGTGTAATC -TCAAGTCAGTAAATCGAAGTATTCAAAAATGAAACCCAATGACCAGAGCCTACACCTCCC -ACGAGAGGTCAAGGTCACGGTCACGGTCAATGATCCTCAATGTTTGAGACAACATACCTG -TTCATCAGTCTGACAAACGAGAACTCTGGGCCCCTTGCCGGAACCCTTGGTGATATCCTT -GACCGTCTCCTCATCCCCAGGCCCCTTGTCATTAGCCGCATCATCGATTGCCAAACCTAG -ACACTGGCATTTATCGATCAATGTTCGTCTCAGAAGGGCACTCCTCTCGCCAATCCCGCC -AGAGAAGACTACACCGTCTACTTCACCGTTCAGTTTAACGTAGTAGCTGCCGATGTAGCC -CAACATGCGGTCGACGAAGATGTCGAAAGCAAGCTTATGTGCTTTGCTTGGTGGGTTCTC -CACTGCGATCTGGGAGAAGTCAGTTGTTCCTGTGAGGGCTTTCCAGCCTGATTGTTTGTT -GAGGATATCCTCCGCCTGGCGGTGTTGAATTTGATTAGTATTTCTTGTAATTATTTTCAA -CGTTGCTTCTCTGTGTTGGGTATCATACCGTGCTAATGTGCATTTCTGTCGTGCTGGCTG -GGCTTAGCTTGCCTGCTTCATTGGTATAGTGGAAGACCAATCTAGATCACCAGTATCAGC -TCGCCATTGCTGCGTTGTCACGGAAGTCCAGTGGTCCAACTCACGATGGATCAATACTTC -CACTACGTGTCGCACCAGGTAATCCTGCTAATGGAGTGAGACCCATTCTAGTCACCGTAT -TAGCAAAGCTTCCAGCCCGATATATCGAGATGTAATATAAATGGTGTCGCGTGACTCACG -ATGTATCAAGCGACTTTCCGTTCTTGATCACACACATCGAAGCTCCGCTCCCGATATGCA -TGATGATTAGATTCGTCTTCTCGGTCGGTTTGTTCAGAAATTGGGCTACAGATCGCGTGA -TGAATGAATAGCTGATTCCGTGGAATCCATACTTGCGCAAACCATTCGCCTTGGCAATTT -CCTGGTTGATTGGGTATGTGCGTACGGCCTCCGGGATTGTCTGGTGGAAAGCAGAGTCGA -AGAAGGTGATGCTTTTAACGTTGGGGAGTTCTACCTTGCACAGGCGAACTATCTCTAGTG -CGGAGAAATTGTGGCTTTGGGCGGGTTGGTGTTAGCGTGAGATTACATACAGTTGAAAAA -ATTGAGATGGATGAGCTTACAGAGGGGCAAGATCTTCCAATCCTTTCAATCGACCAAGTG -TCTCGTCGTTGATTTCGACCGCGTCACGGTAGTCTCCTCCATGGACGACTCGGTGGCAGA -TGTACTCTACATCATCGACGCTGGCGATGTCGGAAAGCTCGGGATCGCTGAAACAGCGCT -GCAAGAGATACTTAAATGCAGCTTGAGGAGTGCTGAGGTCTTCTTTGACTACCTCATTGT -GCTTTTTGTCTCCTTGGTGGTACTTAAGTTTAGGTGGAGGTGCGGTAATTCCAGAGATCG -CCGCATCTGCGATGGGTCTTGGTGGGTTCTCGAAGGTATAGAAGGTAATTTTGACGGAGG -ACGAACCCGCGTTAACGGAAAGGATAGATTTGGGCATTTCTCTGGGTTGCTACAGAGAAT -TGAACATGTGCCTTTGAGAATAGTAGATCTTAAAAGCTGAACTGGTTTCAGCGTTGTAGT -GGTGATGTCAGGATGTGCAGTGACGTCGCCAAAATGGGGAACAAGACTTGTTGTTTGATG -ACATTTATTTATTTATCTCtttcaatttcaagttcaatttcaagttcaatttcaCTCTTC -GTTCTATATCTTTTTTTGTACTTTGAACCTTTCATGTTCTTTTACTCCTGCCAAGCAACG -GAAGAATCTACAATATCAAATTCAACATGCCTGGTGAAGTCATCAGCGAGCCAAATCCCC -AACCGCTGCCATCGCATCTGCCAGACTATCTTGAACAATTGAGCGTGAAATTGGATCATG -AAGACCTAGATAAGAAGGCCTGTGATGCCTTGCTCAAATTCCGCCGAGCGGCTTGCTACA -TTGCGGCTGGTGAGCGCGATATATGTGTCATATATATTTCAACGTCGCGACTAACTTAAT -ATTAGCTATGATATTTTTGCAAGAAAATACTCTCTTGAAGTCGGAACTCACATTCAATGA -TGTCAAGCCTCGACTACTTGGTTAGTAGCCTTCCCAGAATTTTCAACTTGGTCATAATGC -CTGATGCCATACTTCCAGGTCACTGGGGAACATGTCCGGGCCTAATTCTGGTCTACTCTC -ATCTGAACTACCTGATCCGGACGATGGATCTGGACATGCTGTACGTTGTCGGCCCAGGGC -ATGGTGCACCGGCAATACTAGCCGCACTATGGCTAGAGGGCTCGCTGGAGAGATTTTATC -CCCAGTATTCGCGAGACACCAAAGGCCTGCACAATTTGATTTCGACCTTTAGCACAACGG -CCGGTCTTCCCAGGTCAGTACCTTGCCTGGTATTTGGTTTTCTTTACTGACAGTTATGCG -CAGTCACATCAATGCCGAGACTCCTGGTGCTATCCATGAAGGCGGAGAACTGGGATACGC -TCTAGCAGTGTCCTTTGGCGCTGTAATGGACAACCCTGATCTGATAGTGCCTTGTATCGT -GGGTGACGGAGAGGCTGAAAGCGGTCCCACTGCTACGTTCGTCTATATGTTGCCCTCTGC -GCTATGCCATGCTAACATTGGACAGGTCCTGGCATGCGATCAAATACATTGACCCAAAAG -AATCTGGCGCAGTTTTGCCAATTCTGCATCTGAATGGATTCAAGATCAGCGAGCGCACTA -TTTTTGGGTGCATGGACCATAAAGAGCTTGTGGCTCTGTTCAGTGGGTACGGGTATCAGG -TTCGCTTTGTGGAAGATATTAGTGATATTGATGCAGATTTGCACTTCTCTATGGTCTGGG -CCATCAAGGAGATCCATAAGATCCAGAAGGCTGCCCGTTCTGGGAAGCCGATCACGAAGC -CAAGGTGGCCGATGCTGATCCTGCGCACTTCCAAGGGTTGGTCTGGGCCCAAGCAGCTTC -ATGGCAAGTTTATCGAGGGCTCGTACCATTCGCACCAGGTGCCATTGCCAAAAGCGAAGA -CGGACAAGGAAGAACTAGATTTGCTGCAGAAGTGGCTCTCAAGTTATAAGCCAGAAGAGC -TTTTCACTTCGACTGGTGATGTGATTGACGAGATCAAGTCTGTGATTCCCGCAGATGACA -AAAAGAAGCTTGGCCAGCGCGTTGAAGCCTACAACAGTTATACAGCTCCGAAGATGCCGG -ATTGGAATTCCTTCTGCGCGCAGAAAGGCTCTAACGCAAGCTCTATGAAAGTGATTGGTG -CTTTCATCAATCAGGTATTCAAAGATAACCCGAGTAGCGTGCGATTGTTCTCACCGGACG -AGCTGGAAAGCAACAAACTGGATGCGGTGTTTGAAGACACCAACCGCAATTTTCAATGGG -ATCAGTTTGCCAATGCTCGTGGTGGCCGTGTAATTGAAGTCCTTAGCGAACATATGTGTC -AAGGATTTATGCAGGGCTATACCTTGACCGGCCGCATTGGTATTTTCCCGTCCTATGAGA -GCTTCCTCGGAATCATTCATACCATGATGGTGCAGTATGCCAAGTTTATCAAGATGGTCA -GTTTTCTTTTTCCCTTGCAAATGTCAATTCGATGCTCACCAAATGATCTGTAGGCACTGG -AAACAAGTTGGCACGCTGGTGTCAGTAGTGTGAACTACATCGAATCGAGTACCTGGACGC -GACAGGAACACAACGGCTTCTCTCACCAGAACCCATCTTTCATCGGCTCTGTACTAAAAC -TCAAGCCTACTGCAGCTCGGGTATACTTGCCACCAGATGCGAATACTTTCCTGACAACCC -TTCATCACTGCGTGAAATCAAAGAACTACATCAATCTGATGGTTGGCTCAAAACAGCCTA -CTCCAGTGTACCTGACCCCCAAAGAGGCCGAGAGCCATTGTCGTGCAGGAGCGTCGATCT -GGAAGTTCTGCAGCACAGACGATGGAGTGAACCCAGATGTCGTGCTGGTCGGTATTGGTG -TTGAGCTGATGTTCGAAGTAATTGCAGCAGCGGCACTTCTGCGTAAACTCATACCTGAGC -TGCGTGTTTGCGTCATTAACGTGACCGACTTGATGATCCTTGAAAACGAGGGTCGGCATC -CGCATGCCTTGTCAACTGAGGCATTCGATACCCTCTTCACTCACGATAAGCCTATTCATT -TCAATTACCACGGCTACCCAACCGAGTTGCAAGGCTTGCTCTTTGGACGTCCGCGTCTTG -AACGTGTCACTGTCGGGGGCTATATTGAAGAGGGTAGCACTACCACACCTTTCGATATGA -TGCTTGTCAACCGTGTCTCGCGGTACCACGTCGCTCAGATGGCTCTTCGTGGTGCTGCCA -AGGCAAACGAGAAGGTCCGGGTGTACCAGCAGGAGCTGAATGCTCAGCTTGAGGGAAGTA -TCGTCAACACGAGAAAGTATATTGTGGAAAATCACAATGGTGAGGATCCAATCTGTAAAT -TGCCATGCCATATAATGCTAACTCTTGCTCCTAGATCCCGACGGTATTTATGACACACCA -CAGTTCCACAGCTTCCACAAACCGACCGAGTCATTCTGGAACACTGAATAGTTTCATTTG -AAGCAGTCACCCTCACTCTAGACCAATAGTTAATGGAATGTTAGCAAGAAATTAGAGCCC -ACGGTTTCTTTTCTTATTTTTTGTCCATCTGTGGTCCATTAAGTTTATCCAATCAATGTG -GCAGCCAGGTCATTATGTTCAAACCAGTCCGCTGTTCCTCATATGCAGGATTAAAAATTA -CAACCCAACACACCCAGGGCTAAAGGTATTCACACGAATCAACGGATGCCCGGGTCGACC -AACGAGTAATTGTAGCTGAGTTCCAAAGCAAAGGAAGGGCCGAGACCCAGAACAGCCGCT -TTGGTGAATTCATATTCGGTGCGATTCTATCCACAATACGTCAGCCTGGTTACAGTCTAT -TGCCGCACTGCCACCACCGACTCCAGGGTCCTCAAACAATGCAGCGGATGCATCAATACG -AGAATGGAGTAAGCAAGCACAGTGTCAAGGTTGGAAACTAATCAGTGCCTTTAACCTGTG -TCTCACGTTATGTCAATCTCGGTGTCTGCTCGGCGGTAAGGGTTTGAAGCGTATCTGAGA -AGCGCGTGTAAGCACTACTGCATGTGGACGAGGGGATGTGTATATATAGATGTAGATAGC -AGTGTCATCTCCTGGACTTGACCCCACGAGGACGGGGCCGTTGCTCCCGCTTATCCATAC -AGACATCGCCCCCACTTTATGCTGTGATCGGCCTGGCGCTCCGCCACGGACTATTCTCGG -TCTGGGCCTGGGTTTCTGATCAGATAGTCAGATGGGTGATGTGTGTATATTTTGTCTTGA -TACAATGCAATGGGGGCCTCACTCAGTCAGCACAGTGCGTTTGTCCAGAATACCTCCGCG -TGTTGATATCTGACCTGTCAAAACGTGGCTTAATCTTCCTTTCGGTTGATTGTGACAGAT -AAAAAAAATATCCCTGTTCTGTTAGGTAGTTGACTAGACCTTTGAAAAATCCGTCGGGCT -GTGATTGTTATTTAGCCCTTTCGAGTATCAATGGACTGCTTTTCATGAGGTGGAGGGACT -TGGCTGTTCGATCTGCATTTGTTTTGCAATGGAGTGAAGATCGGCAACCTGGTTGCCTTT -CGCAGCATGAGTTCAGAGTATTTATATGGAGTATGTATACTCCGTAAGGAGTATGCATTT -CTTCCATTTTATTATCGTTGAACCTTGAGGCATGTATCTTCCGAGGTAGGCGAGGGCTGA -TATGTCTTATTCATACTTTTGACGTATAAGCATATTCTTGTTGAGAAACTATTGTATTTG -CTTTTATTCTCTTGATCTTATGATTGTCTCCCAAAGGCAGTATGAGGGGTTATTGCTTCA -AGGATTGCACTTTAGCTAATGAGAAGATAACTGAAGTCGAATAAGCAGCAGGGGCATGTG -GATCCTTGTGCATTTTCCTTGTTAGATTTTTTTTCTTCTCGTCACATACGTACATCCGCT -GCCATCCGAGACCGTGGACCTAGATTATGTGGATGGATCCGATTTCCCGAGAAAACACAA -AACACGAAACCAAGGTCAGAGCCAGGGCCAAAATCATTTGAAGCAAATTATCTCTTATCA -TCTTGAAGCGGTCTACGGAGGACCTACGATCTTACTGCATTCTTTCCAGCTGAGCCGCAG -ACGTGTTGAAGCACGGACTACGGACTACTCCGTACATACATGTCGGAAGCCGGAAGTCCT -CAAACCGCCTAATGTGCCTTATTGCCGATTTCCATGACTCATTCGACCCTATGATACTCC -GTTCAAGCTCCCGTTCGGTCTACCAACGGCTCGGAGATAAATGGAATCCCTCAGGGCAAA -ACGTAAACTTGGCGATGTCCACGCTAAACCTAAAAGGGATGACCTTGCGAGTATACCACA -GGTTGTAGTTCGGACCGATTCTCCATCAGATCTCGGCGTAttttttttgtattgttttgg -cttagttttgATCTCATAATCCATCTCGTGTTCCATATGGCATATAAGGTCCAACATCGA -TGCAATCTCATAGGCTAGGGATCCTCCACTGTGGCGGTTGGCTGCCCGATTGTGCGATAG -ATTCTCACAATATGCCTTTTCCATTCTTATACCAACCGGAGTCGGTCCGTTTATCAACCC -GCTctggatgctggaaactggaccatgaacactggagacACACTCGAATCTCGCGATTGG -TTACTCCGATGCGGGTCTTATACTCCCGATAGATTATTATTGAAGTAGAGCGTGCATTGA -AAGACCGTGGCTATGGCCTTGAAAGGTTTTCCCCCGGTGGTGGGGGCTAGTGGATGCTCA -GCCATGCTTACTCTTATTTATACACTCGTCGTCCGCTTGAATACTGAAGCCTGATTATCA -GTCTCTTTTGGGTATCTTTTTTGGTTCCAGGCAAACGGGGTGGTTATCTATAAATCTCTT -GTCTGATCGCATAGGGATTCTCTGCCCATCTTCCAATATATCCAAATATTACGGATTTAC -TGCTAGCTTTCCATCTCTCGACGATCTCCTCGATAGTGGTTTCTTGAAGTCGCTTATCTC -CCTTCGCCATGTCCGACAACAGGGAGCTACTGCAAACCGGCTGCCATGCCTATATCGAAG -GTATTGACACTTCATATGGATATATCCCATCTTTAGGAGCAGGTATTGCCTATTGCGTAC -TCTTCGGTCTCTCAATGATCATCCACACAGTCCAATTCACCTGGAAGCGACAATGGTGGG -CAAGCGTGTTTAGTGTCGGATGTTTGGGTAAGAGCTTGTGACACTCACAACCCTAAATCC -CCGCAAGACTAACAACACAAACAGTCGAAGTAATCGGCTGGGCCGGTCGTACCTGGTCAG -CTAAATGTCCATACAATGGAGATGCCTTCATCATGCAAATCTGCACCCTCATCATCGGCA -AGCTACCTTCCTACCCTGAACTATCCACCTCCAATCTAACATTCCACAGCCCCAACTTTC -TTCACAGCAGGAATCTATGTCCTCCTAGGTCGCTTCATCCAACTCCTCGGCCCCGAATCC -TCCATCTTAACGCCGAAGTTATACCTCTGGATCTTCTGCACCTGCGACGTAATCTCGCTC -GTCGTCCAAGCTATCGGCGGCGGTATGGCCTCCCAACAAGCCGACAAAATCGACGGCGAC -ACCGCGCCTGGCACGCACATCATGGTCGCTGGCATCGTCTTCCAGCTTTTCTCGATCACC -ATCTTCGTCGTCTGTGCGGCTGATTTCATCCACCGGGTTATGCGCAAGCGTCTGCTGCAG -ACGGTGAACGGCCCGGTTATTCCCCTGTTCGCCGCGATGATACTTTCCATCGTCTGTATC -TATATTCGGAGTATATATCGTACGATCGAGTTGTCGCAGGGATGGGATGGTTACCTCATT -ACTCATGAGACGTACTTTATTGCGCTTGATGGAGCTATGATGGTTCTTTCGGTGGCTGTT -TTCAATGTGTTGCATCCTGGATGGATGTTGCCTAGTGAGAAGCCTGATTCATATAAGATG -GAGACGGCTTCGGTTGATCGATCGGAAATGTACCAGGGTCCACCTACTCACTGGTGATTG -CTTTGGTCGATGTGCAGGTTACTTGGTTTTAGTTTCATGTTTTGGATTTTGTTAATTTTC -TCTATACCTGTCACGTTTGGTAATGATTCCCCCCGCAGATGCTAGATACTTAATCGGGCT -ATTAGATGTTACACAGCGATTCACATTGACATAGACTTATGAGTAATCTCAATTTCAACG -CATTTTGAAGATATCAGATTAAAGACAAAGACAAAAGTGGAATCAATGAACATTCGAAGT -AACGAAAGGCATACTTGACAGCCAGGGAAAAAAGAGAATTTCAGAAATTTTAGACAACGA -ACAAAGAAATTGACACCAATAGCCCCGCGATTGATCAGCAGAAGGTGTAAAAACCAATGC -AAGGGGATCGGCGTGGACATTATAGTCAGGCGCGCAATATGAGAGAAAATACTGGATATG -AGGGTATCGCCGCATTCACGTCTCATGCACCTAGTGCGGCTGGAACGAAGACCAACCTGG -AAGTACACATGCCTTCGTGCTGTAGTTATAACGTATAGTATCATACACCATCGACCGCTT -CAACCCAGCCGAAGGCTGCACCTGCAACCAGTGTCGCAAATGCCCAGACTGCTCCCCAAG -TGGCGGCACCTTCGACAGAGACATCGTTGATTGTGCCGGCCACTTGGACAATCGTGTTGA -GCAGGACTCCAAGTGCAAGAACAATGGACCGAGAAACCAAGACTCTCGTCTTATGGGACA -ATTGGCCCCAAGTGTTGCGACAAATGGCTGCAGACACACATTCACTACCTTCGCAACAAC -CAGACCTAGCAGGGGTGGTGGAATAGCCAGTAGAACTGACGAACAAAAAGTCACGCGCGG -CCCATCCAGCAGGAATCAGCGTCAAGAAAAGGACAGGCAAGCCAGCGGGACCAGCATGCG -CCGCACTGATATCCGGGAGTCCCTCGAAATGAATGACCAGCTGGGTAGGAAGCCAAGTCG -CATAGCTGGCATACAACACAACGCTAAAGATCGAGGTTGCCAGGAGCGTCGTGAAGATTG -TGGTAGTTTTGTCCTGTAGGATGTTTCGATTGGCAACTGCATCAGTAGGCGCATGCGAAA -GATTGTGGACGGACGAGGGCCGGCGCAGGAGGGCGAATGGAAGCGCTGTCGACACAATCG -TGACGCCAAATGCCAGCAGTGCAGAAGTCGGGCGAATGCCATAGAAGAATGACAAGAGCG -AGTATGTTGGCAGGTGGGTCAAGAATAAGAATGATGCCACATCACGGCCTAGTCGCAAGC -ATAAGGTTAGTCTCCGCATAATGTCCGCATAGCTGTAGGAACCTCGTGTCTGGATGCAAG -GAACCATGTGTATAGTAGTTGGTCCTTACCATCAAATCCTAGAACCCAGGCTAATCCGAC -CTCCACGGCCTTCCAGGCTATCAAGCCCCCCACCTCCCACCATTCTTCCAGGTGTTTGCT -GATAGGCCCGAGATCGCCCACTGTAAAGGCCGACGTAAATGTGAATAAGAGGGAGCTCAG -AGTGAGGCTGCTGAGAACGACAAGGATGAAGCGGGCTGGAGATGGGATGCATGCCAATCG -AGAGGTCTGCGCAGTGGCTGTCTTTGCAAGCTCACGCGCGGGTTGCGCCTTGTCGCTCCA -CGACACGGTTCGCTTCCTGGCAGTCTTGGTCGGCATTTTGATATTTCCAAAGTTAACGGA -CGAGGTAATTACAATGCAAAGTTCAGAGTAAAAATCACTGGCAATTGCTTCTCACTTGGT -TGGAAGAGAAAAAGAGAAGGCGGGTGGAATGTTTGGTTAGCGGTGAGCATACGATGATGT -CACTATTGCTTTTTCCAAGGTCCAAGAATTCGATTTGATGCCTCTGCGGCAAAATATTGT -ACATTTTAATTACAGTGGACATCTCATCTGTAAGATGTAGAGATGATACAAATTAATCTT -CATCTTCGCGTTCTGCCACAGCAAATTCAACAGTCCCACTCTCTCGTCCGTGGTTGTCCC -GTACAACGACGCGATACTTCCCATTGTTCCAGAGATCTTTGATAGTCAATTCGCGAGTAC -GGCCAAGACTCTCAATGTCTCCAATGCGCTCGATGGTGCCATCCAAATGATTGTCGAAAT -TTTCTAATGCGCAGATCTCTTGGAAAGCACGCTGCATGGGTATTGAATCGGCACGTACCA -TGGCGCGCGAGTTAATTGTGAAGATATTTTCGTGATCGTAAGATTGGATAAAGTCAGCAC -GGGTCATGATCTCGCGCGCTTTATGGAGCGGAATTTCTGGCAGGATGTGGCCTTGCTCTT -GGTGATAGCTTGATTCCATTTCTTTCTCAAGCTCTACCAGCGCTTTCGCCAGGACCACAG -CGCCAGACTGTGTTGTAATTTGAATTAGCTGGAGTTGCATCGAACGGACAAGAACCAGGA -GATCCTTTAGTGTATACTTACGGCATACTTCTGCTGGTCCATCACATCATCATCCATCTC -CTTTCCGAGGATCCAGCACTTATTTAGGAACCATGTTTTCTCGGCCAAACAGATCTCATC -GCAGGTCTTCATGACATCCGAAGACTTGGCCACGCGGTTCAAGAAGGATAGCCGACCACC -GACTTTATCATATACTTGCTCAAGAGTCGCAGATGGAAGTGTTTCGTTGTGGAATTGTTG -GCGATACTTCTTCAAGGCCTTCATCGCCTTGTCCTTTGGCAAATCTGTCACGGGAATGAC -TTCCATGCGGGTTGCATAACGCTTCAAGCGCTCGTAGACCCAATAGTCATCGCTATTAAG -AACTTGAAGGGAACAAAGTTAGCTATGAAAAAGAGGAGCGGATGCTCGTCAAAGTCTTAC -TCGTTGTGACGAGGCCACTAGCAGCCCATTGCTCCGCACGCTGCTGAATCATCTCGAGCA -AATCCTGTCCGTCGTGGTCGTCTCGGAGTAAATGGGTACTGTTGATGATCAGCACGAGAG -GAGTCGATCCATTCCGCCTTCTGATAAGAGCCACTTTCTCCAACTTGTTGAAGGCCCGCT -CAATGTCCAAGAGAGCCGTGGTGTCTCTTGGTCCACGGATGCTGAAAAGGCTGCCAATAT -AACTGGGTATTCCTGTTAACACTATGACCTTATGGTGATAAAGGTATAAGCTTACTCCTC -GTGGAACTCGTAGTCCAAAGCTTTGCCTAGTCGGATTCGGAATATCTCGAGATCAGCATG -CGCTTCGAATAACGCACAGCTGTTACCATTGATCTTGCGCATGGCTTCTAAAAGCATCGA -AGTCTTGCCCGTTCCCTTTTCACCGACCATTAAATAGTAGTGGCCACTTTGTCGCCCGGC -AACAATATCGTCAACTTTGGGTTGCTCATTGCGAACGACCCAATGTTCCTCATGGCGGTA -TTTGTGTTTTCCAGAAACAACACCCGCGACCTCGAGGGCGGGATCACCTGGGTTGAATGC -ATTGTCCATCTTGTCCAAGATCAACCACTTATAGTATTGGTGATAACCGTAACCCGCCGC -ACTTGGGAGCAGATTAGCCATAGAAACAGATGACTTCAATGATATAGGTAAACCTTACCC -TAGAATGGCAATTGATGCCGCTGTGGTTGCCGCAGTTTCAAGCATCTTGAGAATCGTAGA -TTTGAAGCTTGGGTCTGAATCGCCGTTACGGTGCTGACCCTTGCTCCTACCCTTACCCTC -ATCCGAATCATTCTGATCAGTGGGGTCTGGGGTTATTTGACCGGTGTCCCGGGGGTCTGA -GTTAAACCGCTTGGGTATATGTATCACAACGCTGCTGTTAACCTTGAGAGATCTTGGCAG -GCCCTGTCGGGCATAGACCCTCGCAGGCTGTACGAGTCGGCTCCATAGCATCGTGACTAG -GCCTCTTGGCGTTTGTACGGGGTGCCAAGTCCTTCAGCAAAAGGTGGATAATGAAGAGTG -GGAAGATATATCCTCGTCAGAGATAATTAGTACCCCAGGCGGTCTGTGCAGGACCTGTAT -CAATTTAAAGACAAAGTTCTCAAGGAGTGTACTGCTAGAATTCTAGACTTGTAGATGGTT -CCAAGCCACACCTCCAGGGAGTGACGTGTCGCTGGTGAAAGAGAGACTCAAGCCAATTTG -TCAGATGAAAAAAGACTATCAGAAAAGTTGaaaaaaaaaaacaaaagaacaaacaaagat -acaaaAGAAGTTAATGAAAGACTGGAAGAAAGGGAGTTCAAAAAAAAAAAAAACAAGATT -GTGGCTAAGTTGTGGTGTTGGCCTCAGCCTCGATGAACCAGGGACGCAAAAGGCTTGGCG -ATCGAATCAGCTTCATATATTGGGGAGACTCGATGTGCACTGGATATACACTACTCGGTT -CAATTCTCCACTAAACTATTTCAATGGCTTTGTTTTTTATTGATTTTTTCTGGAATACGG -GTGGATCCATTGAAAAAAAAAAAAAACTATGTATATGTAGTAAATAAACAAGGCATTTAT -AAGGCCAATCACTCCTATACATGAGACTGGATAGTATCGAATACACTTTTTATATGTAAA -TCATCTATACTCCGTATACTCCTTAGTCTCAAATACTCTCACTTTGGAGAGTTCGCCTTG -ATTGACTGGGGATGGAAATTCAGCCAATCAGACGCGCCACCGGATGCCCACCATGCAGTA -GCTCTTTGTTTAGGCAACATTGCTATCGTCATCATTCGAATAGTTATATCTAGCCATCTC -ACGCCTAAAAAGAAGACCCTGACCACCATGTTACCGCTATCTTTGTCTGAATTCCAACGG -CTATGGCCTTCAATTCCACTCCTTGGGCTGGTACCTCCGATAACGCGTTCGGGCCTGTTA -GCCGCCTAAGAACATTTGACTTTACGATCACCTTCGAGCAGGCTATACTGTCAATAATTC -CATCGACAATCCTACTACTGGCTGGACCACCCCGACTACTCCATCTCTCTCGATGTCAAC -GAAAGACACGATCCAAACATGACTATGTTCTGAAGCTGGTCAGACACTCGTCATCTAGTA -CTTCCCATAAGCTGGGCTGCGATTGATGTGTTTATAGATTGCCGCTTCAATCAATTTCGC -CCTTCAACTGTCCCTCTTGGTGCTTTGGAGCGTATCGGTTGCTTCACGGACATCGACCAG -CATGCCCTCCGCTATCCTGGGCCTCGCCAGTTCTGCCATTATCATTGGACTGTCCTATGT -CGAAGACATCAAATCAATGAGGCCTTCTTCTCTGTTGATTGGTTATCTGCTCCTCTCGAT -TCTGTTCGACGCAACACAGATCCGGACTTTATGGCTCACACATCGAGTCGCCATTGCGGC -TGTTCAGAGTGCGGTCTTGGGGACTAAAATCGCCATGCTCTTACTGGAGAACCGAGAAAA -AACATCTTATCTGAAATCCCCCTACAGGGAGTATCCACCCGAAGCAACCTGTAGCATTGT -TAATCTGAGCTTTGTCTGGTGGCTGAATCGACTCTTTTTGGCTGGGTGTCGGAAGATCAT -TGGGAGTCGCGACCTTTTCGCTCTTGAGCCTGGCCTTAGCTCTGGTATCACAGGCGAAAG -ACTGAAGAAAGCGTGGGAGAAGCATGGTAGGATATTTTCTCCAACGTTGAATATATCTTA -TGCTGACACGTCCCAGGTAAAACAGATAGCATGCTTTCCTTGCCTTGGGCCTTTTTCTGC -TGCTTCTGGTGGGAATTCCTCGCCGTGGCCTTTCCTCGAGTGTGTATGATTGGGTTCACT -TTTGCCCAACCATTCATGATCACGGCGGTTTTGGTTTACGTAGAGGATCCGATCACACCG -GAGAGCAAGAATCGTGGATATGGTCTTATTGGAGCCGCTTTCTTGATTTACTTTGGATTA -GCAGTATGATCTGTTCCTTTGCAATATTGCTTACATCTATCGCCGCTAATCAAAACACTA -GCTGTCTAATGTACACTACCGACATCGCTTCTCCAGGGTGAGCACTTTGTTCCGTGGCAC -AATGATCTCATTGATCCATGACAGGACCCTGACACTCCAAGTTGACCTATTTGCGGAGAG -CGCAGCTCTTACGCTGATGAGTACAGGTAAGTGAGTGGTGCTCATTCGCCTGTCTGAGGA -TATGAAAGCAAAATCCCGGTGTGCTAACAAGCTCCAAACAGACATTGATGGCATTATTGA -GCATCTGGAAAATTTCAATGACGTGTGGGCTCGCACCATCGAAGTGGCAATTGGTACATG -GCTACTTGAAAGACAAATGGGACCAACTTGTGTGGTTCCATTAATTCTGACTCTAGGTAA -GCAGCAAGCCTTGCATCCAAGTATGCAAAGTTTTCGTACTTATATTTGGAACCAGCATGT -CTGGGTGGTCAGAAGCTAGTGGCAAAAACAATCGGAGAGAATCAGCAAAATTGGAATTTT -GAAATCCAGAAGCGGATTCAAAATACATCCTCCGTACTGGGCTCTATCAAAGCTACCAAG -ATTTCAGGACTCTCTGGCAAAGTATCTCAGGCCCTGCAGGGGCACCGCGAACGAGAACTA -TCTGTATCCCGCGCTTTCTTCGAGGGAATCATGTGGCTGAACGGTCTAGGTCAGACAACT -TGTGCTTGTATGTTGTTCCTCCGCTGATATTAAGCAGCAAGTGTGCCTCGGATATGGTCG -CCTGTCATTACCTTCATTGTGTACGCCGTACAAGCACAAAACAGGGCGGATGACCCCCTC -ACTACCGTGAAAGCCTTCACTGCTCTGAGTATAATCACCCTTGTCACTACGCCCGCGGAA -AGACTACTCGCTGTATTGCCCCAGCTAGCGGCTGCTATGGGATGCTTCCAGCGGATCCAC -GAATACATGGTATCTGAGCCAGTGAAAGATAACCGTATTGACAACACAAATCCGTCGTTT -ACTTCAAGCGAAACGGAGCCGGATCTTGCCATCGCCTTGGATAGCGTGACTGTCTTGCCT -TCACAAAGGGCTACCCGTACCGCCCTTACTGACGTGTCTTTCAAGGTCACCAGTGGCAAC -CTGGTGATTGTTATTGGGCCTGTTGGCTCTGGTAAAACTACAGTGCTGAAAACCATCCTT -GGGGAGCTTGGTTGTCTGTCGGGTTCCGTGTCTATCAGGCCGAGCAAAGTGTCCTATTGC -AGCCAGAATCCATGGTTACTCAATACAACAATTAAGAAAAGCATTACTGGTATCTCGGAC -CATGAGGTCGACGAGAATTGGTATAAAACTGTTATATATTCATGCTGCTTGGATGAGGAC -ATTCGTCGCTGGCCGGACGGTGACCAAAGTGAGATTGGAAGCAAGGGTCTTGCTCTTTCA -GGGGGTCAAAAACAAAGAGTGGTATGGGGAAACTCTAGAACGCGTGTCACCCTGACGAGT -GCTAACCCTTGTAGGCCCTGGCTCGTTCGGTGTACAGTCGCCACGATATAGCCTTGCTCG -ACGATGTCATGAGTGCGTTGGATGCGCAAACGCAAGAAACTATCATTCAACGACTTCTCT -CTCATGATGGAATATTTCGTCAGCTGGGAACAACTGTGATTCTGACCACCCATAATGGTA -GGACACCCTAAGAATTAGATCTTTGCGCCGAAGGAGTGCTGACTCGTGATATCCTCAACC -ACAGCTCGACACCTTGGAGTCGCAGATAGTGTGGTCTCGCTCACTTCAGATGGTCGAGTT -GACCTCCAAACAACAGGGATTGAGGCTATTCGCAATAAAACTTTCTGGCAGGCTAGACAA -TATTCCGATTCGCAATTTCAGGATGATAATAACAAGCCCAACGATCATTCAAGCATAATG -GCTTCGAGCTCCGACCCTATTAACGACATTGAAAATATGGATCGAACTCGGCAGATCGGA -GACCTCTCAGTATATTATTACTACGCACGAACAGTCGGCCCAGTTCTCTGCGCCGTGTTT -CTTGTTGGGCATGCCCTATTAGCATTTGCCGAGAACTTTCCCCAAGTGTGGTTGAGCAAA -TGGACCACGGCAGGAGGCGGACAGCTTCCTCTCTATATATCCGTCTATGTGGTGTTGGCA -CTGGCAGCATCAGTCCTTACCATTTGGTGCATATGGGTCGTATTCCTTGAACTGATGCCG -AGGTCAGCCATTCGTTTGCACTGGTTGCTTTTGGGCACGACTATGCAGGCACCGCTGTCC -TTCTTCTCTGCCACTGATAATGGTGTGACCCTGAATCGGTTCAGTCAAGATATGACCTTG -ATAGACCTGGCCCTTCCTATTGCGCTGATGTCTTCCGCCGAGGCATTTTTTGGATGCGTT -GCCACCATTGGGTTGATCGCCACGGGGTCAGCGTTCATGGCGACAACAATACCGGCGACA -CTAATTGTGCTCTATTTGTTGCAAAAGATTTATCTCAAGACGAGTCGCCAACTGCGATAT -CTCGATCTGGAGTCTAGAAGCCCCTTGTACTCTCACTTTGCCGAGGTCCTGGAAGGTCTG -CCGACAATTCGGGCCTTTGGTTGGCAAGACGCCTCATCAAAAATCCTAACTCGGCATCTT -GATCAGTCTCAAACACCATACTATATGCTACTTTGCGCGCAGAGGTGGTTGAACCTGGTA -TTGGACATCGTGGTGATGGCATTGGCAACCGTTGTGGTAACACTAGCCGTCATGCTGCGT -AGCAGCACAAGTTCCAGCCTCCTCGGCGTCGCGTTGAACAATATCCTTGGATTCAATCAA -CTATTGTCATATTTTATCACGTCATGGACTACTCTCGAGACCTCATTCGGAGCGATAGCC -CGTGTCAAGTCATTTGTTGAGACAACACCTTGTGAAAACCAACCGGGTACAGCCGCAGAG -CTGCCTACATCATGGCCCGATAAGGGGGAGATTGACATACAGGATCTTTCTCTCCGCTAT -CCCGACGGATCCCTAGCTTTGATTAACATCTCCATGCATATAAGGCCGGGCGAAAAAATC -GGCATCTGCGGCCGCACTGGCAGGTACGTATTTATAGACTTCTGACTTTCATATATTCCA -ACAACCAGCCTACTGACCAATCCTTGACGTTTCAGCGGGAAAAGCTCCCTCACGCTAGCC -ATGTTGCGCCTCGTAGACCTCTCGCATGGCAGTATCACAATTGACCAGACTGATATTTCC -AGAATCGCCCCCGGATCAATCCGCGAGAGGCTGATAGCCGTGCCGCAGGACCCGTTCACG -CTTTTGGGAACCATCCGCTATAACGCCGATATAACGGGACTCTCCAGAGACTGCGAGATT -ACTTCTGTGCTGAAGAAAATTGGCGTCTGGGCGGCGATTGAAAGCCGTGGGGGCCTGGAC -ACACTCCTCGAAGACCATCCACTGTCGCAAGGCGAGCAGCAACTCTTCTATCTGGCACGG -GCAATTCTTAAAAAACAAACCAGTAAAGGTGGATGCCAGGTGCTCATTTTGGATGAAGCC -ACGAGCAGTCTTGATGCAGAAACAGACCGGCGAGTGCAGAAAGCCATGAGAGACGCTTTC -CATGACTGTACTGTTTTCAGTGTCGCGCATAGAGTATGTTTCCAGTTTCATCTGTCACCC -AACTTACATATCATATTTCTTATTGGGCCTAAGAGCGAGCTAACTGTTCAACAGCTTGAC -ACGATCATAGATTTCGATCGAATTGCTGTGTTGGATTCTGGATGTCTTGTTGAGTTTGAT -ACTCCGCAAAATCTTCTTGCCAGAGATGGTTTGTTCAAGCAGATGTACCCCGGTACAGAT -AAACAAGAGCTTCATCGCTAGTCTTGATGGACATTAGCCTCAGCCTTCGATCATGGGTTT -CTCAGCCTAACTCATATGCATCCAAAGCATCCCAGAATCATCCGCAAACATCTCTATGAT -TTAGGTCCTCATATTCTAGATGTAAAGATTTCATGCATTTATATCTCACGAATGTGTCCA -TTCAGGACAAGTGATATGTGGAATAGCCAAATATCTCAACGTGCCATTGACTTTAGCCTT -TGTTTTTTCCCCCCGGCTGGTGGCTATGCTATCCCTGATCTATATTTTTGTCACGTATAT -ATCCCAAACCGGCAAAGTTTTCCACCCCATAGTTTGAAGTCTTTTCCCACAGTCTACTTA -TTAAGCAGAGAAGAGCGAAATAGAAAAAGAAAGCTAAATACACAGCCGGGAAGTGTATAC -ATAACACCAAAATCCCCAGGGCCCAAAAAATTATTATTGTCTACGTCATTTGGACAGGAA -GTTCCCCAACATAAAAGAATAGCACATACTTAATGCTCCATATCTTTATCTCGCAGATGA -TCCTTCTTAGTCAAGTTTATATATAGATGAAGTTCACTCTGTTAGCTTCCAACTAGCTTC -ACCCAGGTTGTCCAAGGAATATATTTTGTCTTCCCAAACGGATTAACTCGTGGGCAAACC -TCTCCGGGGGAATAGCTATTAATGAAATCAAATGCCACGGTGTACACAATCGCGAATATT -GACACACGAAAAACGTTGGTACCTGAACATGATATCCGCCTAACTATATATTCTAAAATA -TAGGATCTCCAATCTACTACACAACTCTTTCTCAACAGTCCAAAAGGGGAGACGTGGAGA -AAATGTCCCTCTCCGCCCCTTCCCCGAACCCACCGCCCGAGCACTTCCGAGGGTATGCCG -CATCCGAGAAATTTTCTCTCCGGTTTTCCAGTTTCACCTATCCATTCATCCTTGCTCTCT -TTTTCCAACTTCCTGGGGTACGGGGTTTTTGTTTCTTGCTCAATTACTCTTTTGTATACT -ATACTAAAATTTCTACCTGTCGAATCCCCGCAGGTCAACTTTTTCTTTTCCCGTTCCACT -TTCCCGAATTTGATTTCTCACCCGTGTGACGGTTGTGAGATAAGGGTTACTGTGATTTCT -TCGTCGAAGTTCTTTTAATAACCGGCTCGACATAATGGCACAGACCAATGGGGAGTTGGA -GCACTCGAAAGGTGTGTGTTGGCCCTGGAGCTTCGCTGGCCAAGGTCCATTGGGATACAG -CATGGGTTCCAGGTAAAACAAGACGCAGGGTATACGAGCTAACGGAGTAACAGAGGCTCC -CCAAGCTTCTCAGGAGATTGCCAATGGCATCGTTCCCGAGGGCACAGAGGAGGACAACGC -GGGTGGTATGTTGGGTGACCTGACGCACAACTGCATAGCGGTGTTCTAACTTGACTACAG -GTCTTTTCCAGATCTCCGTGAAGCTTCCTCATGCGCCGCATAAGATTCAAGTGATGGTGG -GTTCGATGTTATGAAGCCCACGAAGAGGGGGAAAAATATCTGACTCGATTTTGTCTTTAG -GTTTCCAGCCAAGAACAGGTTCAGGATGTCCGCCAATCTATTGTCGAACTACCGGGCACC -TTCCAATATACTTGCTTCCACCTCGAATTCAATGGAAACCGCATCAACGACTTCGTTGAG -CTGTCGGAGGTCCCGGGCCTCGAGGCTGACTCCGAGATCGTCCTGGTCGAGGATCCCTAC -ACCGAGAAGGAGGCCCGCATGCATGTTGTGCGCATCAGGGAGCTAATTGGTGCCTCCGGT -GACCGTGTCGACAATCTCCATGGTATCTCCGCTGGTCTGTCTCTGCACGATACTATCACG -GCAGACGCCATCAAGGCTAGCGAGTCCGAGAAGGAGCACTCGCTCTCCAAATATGACCTG -ACTGGAGCTTCACCGCTGCAGACTATCCTGCCCACCGAACAGGCTCCTCTGCCCAAGACA -GTGAAGTCCATCTCGTTGTCTTCATGGAACCCCGTTCCGTACAACCTCCGCCAGAAGGGT -CACTTGCTTTACCTCGTGGTTGCTACTAACGAAGGTGAACAATTCCAGATCACTGCCCAC -GTGTCTGGATTCTTCGTGAACAAGTGCTCCAACATCAGATTCGACCCCTTCCCGAAGCCA -ACACACCCCAAGAAGGGCAGCGCCCACTCCTTGCTCACTCTCATCTCCCAACTCTCGCCC -TCGTTCAACGCCTCCTTCGAGGCTCTGCAGGAGTACAACAATGAGAAGGATCTTTTGACC -ACTTTCCCCTTCCAGAATGCCATTCCCAACAGCCCATGGCTTATCTCCCCCAGCACTTCG -AGCTTGAATGCCCACCAGCCCGATATCACCCGTTCCCAGGAGAACTACCTGATCTCTGGT -GTTGATAACGCCGAGACTCTGCGTGACTGGAACGAGGAGTTCCAGACTACTCGGGAGCTT -CCCCGCGAGACCGTTCAGGATCGCGTGTTCCGGGAACGTTTGACCTCCAAGCTGTTCGCC -GATTATAATGAGGCTGCCGCTCGCGGTGCAGTCCTGGTCGCCCGGGGTGAGGTTGCTCCT -CTAAACCCCACTGAGGCGCGGGACGCCCAGATCTTTGTCTACAATAACATCTTCTACTCC -TTCGGTGCCGATGGCGTTGGGACTTTCACCTCTGAGGGTGGTGACGAAGCTGCGCGCGTG -GCCGTGGGTAAGGACGTTCTTGGCATCAAGGCTGTTAACCAGCTTGATATCGATGGACTG -TTCACCCCCGGAACCGTGGTCGTCGACTACCTGGGCAAGCGTATTGTTGGCCAGAGCATT -GTTCCTGGTATCTTCAAGCAACGCGAGCCCGGTGAGCACCAGATTGACTACGGCGGTGTT -GAGGGCAAGGAGGTTGTGGCCACTCACGCTGACTTCGTTCCCGTCTTCGAAAAGTTGTCG -AAGGCATTGCGTGTTAAGCAGCACCCTGTCTGGGACAAGGAGAACCAGCGTCACGACCTA -GAAGGTAGCGTCGAGACCAAGGGCCTTTTGGGTACTGATGGCAGGAAGTACGTTCTGGAT -CTCTACCGCGTGGCTCCTCTGGATGCTGCGTGGCAGGAAGAGGAGGGTAGCGATGTCTAC -CCCCACCGCATGTCCGTTCTCAGACTTGAGCTGGTCGAGTCCTACTGGCGCCACAAGATG -AGCCAGTACGTGAAGGCTGAGATCGAGAGTCGTAAGGCTGCTAGCGCCGAGGCTGAGAAG -GAGGGCAAGGCCGAGGAGGATAAGACCGAGGAGGAGAAGGCCGAGGAGAAGGCGGCGAAG -GCGGAACAGGAGCGTGTTGATATTTCTGGATTCAGCCTTGCTCTCAACCCCGATGTTTTC -AGCGGCCAGGTGCCCCAGACGGAGGAGGAGAAGGAGCAATGGGCAAAGGATGAGCAGGAA -GTTCGGGATGCCTGTGATCACCTCCGATCCAAGGTCATCCCCGACCTGCTCAAGGACCTG -CACGATGGTGACGTTGGCTTCCCCATGGACGGCCAATCATTGACCCAGCTGTTGCACAAG -CGTGGTATCAACCTCCGTTACCTCGGAAAGCTGGCCCAACAGTCAGCCGAGAAGGGACCT -CGCCTCCAGGCTCTGTCCATTCTCCTAGTCCAGGAAATGGTCACTCGTGCCTTCAAGCAC -GTTGCCAACCGCTACTTGAACAACGTTCCCGCTCCCTTCGTCGCCCCTTGCCTTTCTCAC -TTGCTTAACTGCCTCCTCGGATCGGATGTTAACGCTACCCCTCAGGCGGAGATTGACGAA -TCTCTACGTGCAATCTTCCCCGAGGGCGATTTCTCTTTCGAGAAGGTCACCCCCGAGTCC -CTCCGGACCGAGATCGAGAAGCAGGTCACTATCCGCTTCCGCTTCTCCCTCGAGAAGGAC -TGGTCCAGCTCCCTGAGACACTTGCAGCTTCTGCGTGACATCTCCATCAAGTTGGGTCTC -CAGCTCGGTGCCCGTGACTACGCCTTCACCAAGGCCCAGGCCAAGGAGCAGGTTGTTGTT -CCTGTCACCAACAGCTCTACTCAAGAAGAGCCTAAGAAGAAGGGCAAGAAGAAGGGTGGC -GACAAGTCCCCTGCCCGCGCTGCCCCGGCGCCGGCTAAGCCCGCTGTCACTTTCACTGCT -GATGACATCCTAAATGTTGTTCCTTTGGTCCGTGATGCCTCTCCCCGCAGCGCACTGGCA -GAGGAGGCCCTGGAGGCCGGTCGTATCTCTCTCATGCAAAATCAGAAGCAGCTGGGCCAG -GAGCTCATCCTCGAGTCTCTGTCTCTGCACGAGCAGATCTACGGTATTCTGCACCCCGAG -GTCGCCAAGCTCTATCACCAGCTTTCTATGCTGTACTACCAGACTGATGAGAAGGAGGCT -GCCGTGGAATTGGCCCGCAAGGCTGTCATCGTCACCGAGCGCACTATGGGTGTCGATTCG -GCCGACACTATTCTGAGCTACCTCAACCTGAGTCTGTTCGAGCATGCCTCCGGCAACACC -AAGACCGCTCTGGTTTACATCAAGCACGCCATGGATGTCTGGAAGATCATCTACGGTCCT -AACCACCCCGACTCCATCACCACGATGAACAACGCCGCCGTGATGCTGCAACACCTCAAG -CAGTACAACGACTCTCGCAAGTGGTTCGAGGCCTCCCTCTCCGTCTGTGAGAACTTGTTC -GGCAAGGACTCCATCAACACCGCCACCATTCTCTTCCAGCTCGCCCAAGCTCTCGCTCTG -GACCAGGACTCCAAGGCTGCCGTCGGCAAGATGCGTGAGGCATACAACATCTTCCTCGCT -CAACTTGGACCCGAAGACCGCAACACCAAGGAGGCCGAGAACTGGCTTGAGCAGCTCACT -CAGAATGCTGTCTCTATCGCCAAGCACGCTAAGGACATCCAGGCTCGTCGTCTGCGTCGC -AACCCTCTGAACCCTCGCGTGTCTTCCATGGGTACCCGTGTTCAGCCCCAGGTTGGCCAG -TCCGCACCTGAAATCACCGCCCCTGCCGACCCCTCAGGTCTCGACTCTCGCAGCATCGAT -GAGCTTCTGAAGTTCATCGAAGGTAGCGACACCAGCTCTTCGCGCTCTAAGCAGAAGAAG -CGTGCCGCCACCAGCAACCCCAAGCTCCGTGGCTCCAAGCAGTCTAACAAGGCATAAATG -ATTTACTAGCCGTTCTTATTTTTTGGTTATAACATGTGTTAATATCTATATACATTGTCA -TTTTGGGGATCGGTGGGCAGGCTTGATGAGCGAAAAGAACTGTTGTATTTTAGATCTCTC -TCCTCTCTTTTTCTGATTTTTGGTTTCATCTGCCAGGTTGGGTTATCTCTCGGGCCGTGC -ATGGAAAAGACTCGGTTGTCTTATATGGGCTCATATCTGGTGCATTATGCGGATTTGCCA -TTTGGGTCATATTTTCTTTCTGGGTAGAATCCATAAAGTTCACATGCAAATACTTGAAAT -TCAATAGATACCTTTAAAGGACTTGACTGTATCTGGCATTGAACCTCAGGGACTATGATC -GCCATAATAGGCAATATGTGTGGCCCGCTTTACCGCATTAGTCAATGTTTGTTTTCTTTG -TTTGCCGCGCAATGCTGTCATTCGTTTTCCACTTAACACACTTTAGTCCCTCAGTATATC -TTATTTGCATTGGACTATTGCTGCATTATCGGCAATATCTGGTTGAGTGCCATCATTGCT -TCTATCATGCATCATTGAGGGGTTTGTCTGAACGGCGAGGTTGGTACAGCGCGACCCCAA -TGACCAAGAGAAAAACTTATGACAGTGCTGATTCTCTTGATAGTTGTATGCTATCAGCAG -ATATCATTATCCTTTTCGTCCTGCAATGGAGGACGCCCAAGCCACTGCAAAATGGGCAAA -CCGTGTCCTGCGCCCCTTGACCTCCATCTGCCGTCGAATCGCAAAGCACAAAGAAACGCT -GTCGACAATCGCGACCGGGTTGAAACCACCCGAGACTGCAGAGCGCAGTGATGTGCCTAC -TTTGGAAGCCCATCACAAACCAGAAAGCCGACACAACTGTTCAGGCTCGGATGCGGATCC -GGATGAAAACGACCCAACTTGGATTCCAGGAAAAAAGGCCGAGCGGCGTCGACCCAGACA -CAAATATTCGTCCAAAGCGGAGGATTCTGGGGGGAGAAAGCGCAACAGACTATCAATACA -CAGCCCGGAACCCTCACGAACCCTGCCCGGCGCTATTGAGGTCGCAACACCACTCATCAC -TGGGAGGAGATGGGAGATGCCACCGAGTGCACAGTCAGAACGCCCGGAAGGACAATTCAA -CCCTGACATCAAACCGTCACATCCGTTAGTCTTCCGTGATCGATATTCGCTCCACAAGAG -CCCTTGGCAGGAACTTCTCGACCAGTCTGGTGATCCGGGCTTCGCCGACATAGCACACAA -CCTTGATCGCGTCTTTCAGAATTTCCTTTGCAATACTGTGATATCTAAGCGAGATGTGAA -ACCTTCATCCGAAAGGAGCAGACGCGGTGCTCGATCATTACTGTCCATGGTGGCACGGAG -TCTGCCTAAATTTATTGCGAGCGAGCAAGAAGCACAGGACGAGTTGGACGGGGACGGGGA -TGAAGATATGTGCAATGCATATTTTACAGAGTTGGAAGCATTTTATGCACCTTATGGGAC -AGGTTGGAAGCCACTCCGCGAGGCTGTCCGCGCGCAGGGTATCTACTTGGTTTCTACCCT -GATACAAAATCATTGGATAACAGACTCAATTGCTTGTGCTCTAATTGAGAAATGTCGTTC -ATTTGCACCCGAAGAAAGTGATACATTGCTCTCCACATTGCTTTCTACACGCAAAAATTA -CCCATACCCCCAGGCTCTAAGGCCAGCGGTGGGCCATGACGCCTCGGATCCGATCCGATT -GTTACGAAAATATGCCCATCATGGCGTTGCCAGTCGCTCTTATATTTTCGACGAACTTGC -AAAGCTCCTCTTGCGGCGAGTGCTACCTCCGGAATGGATGGGGACCAAGTTATGGACTAG -CTGGATGACACGAGCAACAATTTCCCTTTCGAAAGAAGATGAAGACTGTCCAGCGGCTAC -ACGGTTGATTGAAGCTGTAGTTCTCTCAGCTAGTGATGCTCGTCCTTTCGCTGTTGCTCA -AAGTTGTACACAGAAATCTCCCCCGAAGGAGGCTGCCCGTGCCCGTGCGACCAGAGTTGC -CTCCACCACAGCGTGGGAAATCTCGAATCTCGCGCGACCTTGTCCATTACAGGTAGAAGA -TGCCCTGAGCAATCACGTCATAAGCCTCATGGCAGCGCTCTGTGGGATGCACATTTCACG -GTCTCGTGCCTCCGATGACACTGATTGTGCGGACAGAACAAAGGCGAGCTATATTACAAA -CTATCTTTATATCGCTCTGCAGCGAGAACTGGAATCAAATTGCGCCGCCCAGCGCCCTGA -TAACACCCCTCATCAATTGTTACGACGTGGCTGCATCTTACTAGCCACCAGCCTTATACA -GTGCAACGACAAAATTTTATTGATTAATAACCATCATATGATGGCGTCAACAGCCAGTGT -TGATGAATGCGCAGAGATTATAGCATCTCATTCAGACATGATTAGAGAGTTAGCATTGTT -TGTGCATCAAGCGTTTCGCTGCCTCAGAAAAAGCGCCGAAGATGAAAGTGAACATACTAG -CGGCGAAATACGTGACATGATTTCGCAACTTGCCCATTTACCCGATACACCAGCTTTGTC -GGTCTTTCTTGGGCGAGTGGCTGCAGAAGCTGCCATGGAGTTTGCAGAAACTACAGGAGA -TCCCGATGATCACATGTGGGCCGTCGACATCCAAGAGGTGGCGGTCGCAAGACAGCGTCA -GGAGGAGACTGCCCAAAGGTCATCAGAAGAACCCGAGGGATCTAGTCAAAGCACGGGCCT -GTTTCGCTGGGAAGATAGCATAGGTGAATGGGTGGCATCTACACCCGTGACCAAGGGAAA -GCCCATCGTAGTGCAGAAAGCCCAAAGACCTATGCGCATGCTATCAAGCCCAGGAGCCTG -CAGCGCTTCCTCTACAGACACAGATACAGACAGCAGCTCACCTGGGTCTGATCGCTTCCA -AGACTCTGTTTCAAGTCTGACATCTCCCCTGCTGATGGCGACCAAACGCACTTTCGAGGA -CGCCGAGGCGTCATCCATACGACCTAGAAAACGACAGCGACCCACACCTGTGGTTGTAGT -TGACAAGGGTGGAAACAGACCTCAAGCACGCTCTCCTACGGCGACTAGATACAGTTCTAC -TGTGGAACCAGTCACTCGCCGAGACATTTTGCGAGAGCGCAGCACCAATCTGACCAGACG -GACGACTCCCGCAACACAACAGGCACGGAAGGTTGAAGTCGTGATCGTCAATCACAGAGA -AAGCAGTCCATGTCAGCCTGCCGTCCGGCCCACTTTGGAGCGTACTGCAAAGCAGGTCCA -TCGTACGGTAGAGAGGCGAAGGTCTACGCGCCCTTCAGTCTCAACCATACGTCGTATTGC -TGAGCCGACCCCTCGACAAAGAGTCATTCCATGTCCCCAGGATGACGACAGTGACGATGA -ACTCAGCTTTTTCTGAGCGGGGCTTGGGCTTGGCCCTAATTGAGGTCTGTAACAGTTTGC -CCGCGTCGGAGGAATGTCGGATATTCAGAGTCTCCCGGCCCTGCCTTCGTGCTGAGGTTT -TATCTTATTTGACAGTGCAAACGGATCTCTTGGCTGTCTACTTCGTGAATTACTGAGTAT -GACTGTTTGCAAGATGTTGGTTGATTTTTTTTTTTGGGACGGAACGGAACGGTGGATGTG -GGTGGTCCGCTTTTTTTTGGGAAATCGGTGTTTTAGGATTGTTGTTTGATACCCATGGCC -TTGAATTGGTGTTTTAACTGAGGTGTTCAGACCTCATTGTTTTTCGCTTGTTATCTCAAC -TTTGGATTAGCCTTTGGACCTCATGCCAATAGAAAAACCTCAGATTGGATATTTGACCCG -AGAAAACCCTGGCTGTAAGGTTATATACGCACTTAATCTGGCACATGAGATAAAACACAT -TGCAGACCGACAAAACTCCGGCGGCAACACCGATGGCCTTGAATGATATAGACCTGCACT -CAAGGTTTAAGACATCGGTTTTTACATATTTTCCCGTTTCAGTGCCAACTCACGGAGTCT -AGGGCCAATGGTAAGTATCTTTATTCCAAGTCCTTTCGGAAGGGAATTCTCCGCGAGAGG -AGATGAAGGTCTGATATAAATGAAATGCCAAGAAAACAACCCCTGAAGACGGAGTTTAGC -CTCTTACAGTGATGACTGGAGCAATAGCCTATGACCGGACTGTTTCATAATCCCCCTAAA -GTGGATTACTCTGTTCGCTTTAATGTGTGATGAGCTGACATAGGCGCTTAAGAATCCCCG -TGGAATATGAAAACGTGACCCCACAAGCCCTCATTTTCTCCACTCCCAACTGTAGATTGG -CTCATCGATGGAGCAAAGGCCCCTTTCTCAAACAACCAGTCGTAGGCCAAGCATTGAAGC -GGATCTTTGTCGGGGCCTGTGTGGCGTTTGTTGATCTTCGTCAAACTCTTCCTGTTCACT -GGCTGTTTCACTGTTTCATTGCTGACCAGTCACAGTCCTTAGTCGGTATTCCCGCCACTT -TTACTCCGTAGGATAACTCGGCTCGACCTGTCGCATGTTCCAGGTCCATATTCTGCCAAT -GTGTGCTGAGGCTATCTGTGAATCGGAGCCCCTGATTCGCTGAAATGGATCTGCAGGATC -GTCTGCCTTCATACTCCATATGCACATGAAAAATCCCTTGCACCGATCTGTGGTCCCAGC -GCTAAACCCCCTCTTTCGAAGAAATTAGGAAAAGTCCCAGAAATCAAGGTCGAAATGACC -GGATTCCACATGATACGGGCCGGGTTAGAGGTGGAGAGCCTTGCTGGAATTCTACATATT -GGTCCGACCCCCCTGTCTCATACTCCATCCACTGGACTGGACACAAGGAGAAACAGTCCT -TTCTTCTATTTCCGATCTTTCATTGTCGACAGTCCTTTGAATTTTTTTTTTAACACAATC -AATATTGTCACATTTTCTTTGACATTTGTCTTTGAGACAATCCCCATTGTATCACCGTGG -ACAATTTTCCATCTTGAGCACTTTTTTTGGCAGTGACTGGATCACTAGCATTCCTTTCGT -TGCCCGTCGACCACAACCTCTTACAAACATCGCTCGCTGTTCTGTGAACATTTTTGCTTC -TCGCTGCGCCCCATTGGACTGATAAATATCGACACACGGCTTTTATTCCATGCTTTCCCT -CTGATTTCTATACGTTAGATCTAGGCATGTCCTTTATTTGACACCAGCCACCCAACAGAA -AACCCTAAACGGCTAAAGCCCTTCAATCCTTGCTTCTATCGTTTGATCAAGCAACCTGGG -GTTGTTTCGTTTGCTGTTTTACTCCTCGACAATATCTTCCGAGCCTTCTGTGGCGTCGGT -CCAGATATCAACGATCCCCTTTTCAATACAGTCTTTTCCAGCGAGCTGGCCTCGCTAAAC -AACATATCAAGCTCAACCTTGAGTACTCGACGTACAGCGACTTTGGTGGGTCGTCATGCA -ATCCTTTCTTCTGGCAATGTCCCTCGCTGGACTGGCCAAGGCAACAGATATTGAACACAT -CGAAAAACGAGGTGGTGGTTACTCTGTCTACTATGAAGCTGTCAACGATGATCTGGCTCG -CTATACAATTCTCGCTCTCGGTTGTGCTGCCGCCGCTTATTACATCTGGCAGCTCGTCTT -CCGCTTCTCTCGCCATCTTCGACGCCTAGCAAGCTTCAGCGATGACCGACAACGTTACTT -CGTCTCTACACACAATACCTTTGCCTGGATTAAGGAACATATCATCTATGCCGCATTGTT -CCGCAGCCGACACAACCAAGAAATGCAGCTTTCATCAGCTATTAATATGGGTACTTTGCC -TAGCCGCTTCCATGCTTTCCTTATCACCGGGGTTATCGCCATGAACGTCACACTCTGCTG -TGTGACATCTCCTTATGGATCAGAGCCAGACTCAATTGCTGGTGTCATTCGTAACCGCAC -TGGAACTATGGCAACCGTCAACATGATCCCATTAGTCTTGATGGCAGGCCGTAGCAATCC -TCTAATCGCTATGCTCCATGTGCCTTTCGATACATGGAATCTGCTCCACCGATGGCTCGG -CCGCATCGTGGTGCTGGAGAGTCTCGCACATATTTTTGCCTGGGCTATCCCCAAAGCCCA -AGAAAAAGGCTGGAAGGTTGTCGGCATGGCTCTGGGTGGAAGCAGCTTCCTGCTGACCGG -CTTGATTGCCGGTTGTGCATTCACTGCGCTCTTGCTGCACTCGCCCTCCCCTATTCGCCA -TGCATTCTATGAGACATTCCTCCACCTGCACATTGCCATTGCAGCTGTTTCCATGGGTTT -CCTCTGGGTCCATCTCAACGGTCTGGTAGCACAAACTTATCTGCTTGCCTCTATAATCCT -CTGGGCCCTTGAACGAGCCACCCGCTTCCTGATCATCGTGTACCGCAACTGCGGCCGTGA -GTCCACAACCGCAACCATTGAGGCCATGCCCGGTGACGCAATGCGCATTACCCTGCGCAT -GGCCCGTCCCTGGACCTTCCGTCCGGGCCAGCACATCTACCTCTATATCCCCGCAGTGGG -CTTGTGGACCTCGCACCCGTTCTCCGTTGGCTGGAGCGAAACCGAAGACACCAACTCCGG -CGAGAAGTCCCTCCCACTCACAAACAAAGACCCCCTCGGTGCACCTAAAAAAGAAACAAT -CTCTCTCCTCGTCCGTCGCCGCACCGGAATGACCGACAAGCTCTACCAGCGTGCTGCGAA -CTCTATGGGCTCACATATTACCCTCCGCGCCTTCGCTGAGGGCCCATACGGTAACATCCA -CACCATGGACTCATACGGCACTGTGATGCTTTTCGCCGGCGGAGTCGGTATCACCCATCA -CGTCCCCTTCGTCCGCCACCTAGTCGCCGGGTTTGCGGACGGCACCGTAGCCGCCCGCCG -AGTCACACTCGTCTGGATCATCCAGTCCCCCGAACACCTGGAGTGGATCCGGCCCTGGAT -GACGAGTATCTTAGCGATGGATCGCCGCCGTGAAGTCTTGCGCATCATGCTCTTTGTTAC -GCGGCCGCGCAACACCAAGGAGATCCAAAGCCCCTCTGCAACCGTGCAGATGTTCCCTGG -TCGTCCTAATATCGACACATTGGTCAGTATGGAGATCGAGAACCAAATTGGCGCTATGGG -AGTTCTTGTCTGTGGCAATGGCGGTTTGAGCGATGACGTTCGCCGTGTATGTCGGAAGAA -ACAGGCTCATTCTAGTGTTGATTATGTGGAAGAGAGTTTCTCTTGGTAGCCTGATATATG -CGATCTATATACGTTGCTGTCTTTTTTGGCGTATGAGCGTGTTTGTTACGATGGTAGCTA -TCGCTGTTTTATGTAGGGTTATACTGTTGTATAATTCTTTACTTCACTGTTCGGCTTTGC -AGCTGGCTTTTCTTGTATACGTCGAATTACCCATTCTCTTCTGGCAGCGGTTTTCCTGTC -ATCATGAGTATCTATACACTTGTACATATTTGCAAAATGCAAGATTTGATCAAGATTTGT -CCTTTACTTGAGCATCAAGGCTCACCAACAATGTAGCATTCCACGGAATTTAGATACTAA -TGTTGCCTTTCTTCAGGGCAAGTCCTTGGGCATGTGGCAAACTGTTTGAGGAACATCCCA -AGTCCTGTTTGCAGTCCGCCACCTCCACTTCACTCGCTGTAACATATTGTTAGAACTATA -CCTAACTTAACAGACACAGCTGCTTATGCACAATGAAATGAATCAGCAGACATTATATCC -CCAATACCTTTGTACTTCGCTCGTTGTGGGTATGGGCACAACATTCTATCATATGCCTTC -CCATCTTCTGGAGAGTATTTCACCGATAGACTTGTAGGAGCAGTGCCAGACTCGACCCAA -GCGACAAGGCTGTCAAATATAGTGGAGGGATAACCTCCTTTGGTGCCAAAACAGTGTCCC -ATCATTGGAGCTTCAAACAACTTATAGTACTCATGAACACTCGGGTCACTGGCATTTACC -GCTTCGTAGTATGCCCTCGTACTGTTAGGAGGAATGACCTCGTCCATCTGCATCAAAAAT -CAGCATGTGTATTTCTCTAACAGACAATATAAAACACACCAGGCCATGATATGTGAGCAT -CTTGCCACCAGCGTCACGAAATTCTGACAGATCTGGGTTATTTGTTCCAAATACTGAATT -CCATTCCTGGACAGACTTTCGAAAAATGTGGTCGTATTTTCGGTGTGTGATGTTCCTCAG -GGTGAAGTTCAGGTTGTGCTCGACAAATACCTGAGCCCATCGATTATATAGCTGACTGGA -GACCGCAGTGCATGTCCCATTCGCACAGTTTGTATTTACCGGTCTATTTTTTCCTGTCAA -TGCGGTCCCAGGATTGAATCCATACCACATGAAGTTGCCCCGTGAGTCCCGCGCTCCTGT -CCAGGTAGCATTGACAATTTTCGCAGTAGCAGACGAAATTCTCAACTGCGATCCAGTGTT -GCTACAGTTGATTTGGGCTCCCACCAGAGTGAACGGATCAAAGACACATACATCCGGGTC -CGAGATTACACCATCCACAACACCATCAGCGCTGTCGCAAGCGGAGAGTACAGCATCTGT -CACAGCGTTGATCTCGCAAGGGTATGGGTATTCATCGAGTTGGTCCATGACAAACTGAGG -CCAATATCCTCCTATGGAAAGTTGCGGCCAATTGATTGCCGGAGCAGAAGCAACAATGCC -ATCATATGCTTCGGGATATTGCTGGGCGAGCGCCAGACCCTGTCGGCCGCCTTGCGAGCA -TCCAGTCCAATAGGAGTATCGGGGTTTCGATCCATATGCACTTTCGGTCAAGCTCTTTCC -GATGACTGCTGCATCGCCGAGAGAAGCAATTCCGAAATTGTACATGGTGTCGTAGTCTGG -TGTTCCGTTTTCCAGAAAGCCCCAGTCTGCCGGATCGTCGGTTGTATGGCCGCCGTCTGT -TGAAAGGGCGACGTAGCCCTCGGCAGCTGCTGCGATCATTCCAAATTCTGTTATGGATGT -GAGTCCCGCTGTGTAACCGTTTCCTCCGATTCCTTGGAGTCGTTCGTTCCATGAGTCACT -AGGTAGAAGGACGCCAACGTTGATGGTATCATTTTTGGTGGTGTGCTTGTAGGTCACTGT -GACATTGCAGAAGTTGAGATTTTTAGCTGTCACTCCGCCGTGGTTGTAGTAGTACCACTC -GGGGATGTATTTGGATACATTTGAGACTTGGGAAGCTTGCAGTGATAGCACTTCGGCGCC -TGGTATGTGAGGATATGAAAAGGCTTGAGGCGAGCATAACGATGGAGGGAGGCTAGTCAT -AGCGTTCAGCCCAGCACTTTGGGCAATAGGCCCGGATGAAATAATTTGTTGCATAGTTTG -ATTCGGAGAGGCCAATACTTTAAATATGTCCCTGTTCTCGGTTGCTTCTTGGGTGGTACA -CGGTATACGCGTATTCAACGGACGACTGTTGTCTCCCAAGGAGTCTGAACCGTTGCATTT -CTCTGAGTTGATTCTGCATATTCAGAGATATTTATGGCACGGGAAGGTGGATGTGGACTA -TACACACTCCACTCTGAGAGGCCAAAAGCTATACGCGTATTCGAACCCGAGGCCAAAGTG -GGGCATGAGTCCGAGACCCTAAATTGGCCCTAAATTAAGAAGTGTTCTCAATTGCGAGAC -ATTGAATATCTATTAAAGAAAATATGCTTCATTGTATTGTGACAAGGTATAAAAGGTATA -AATAATTGTTAAAAAGTATTTGAACAGTCTATAAACAAAAGAACACCGAAAACTCCACGG -AAAAAACAGCCTATAGAGCCTCACTCCGAGCACAAGAAATTCAAACTCCTGAATAGGTGT -GTGCAAGCGGTATGCTTCCCTCAGTATCCGCATCAAGTCTCATCTGGTCAGGATACCAGA -TAAAGAAGATAACCATCACAGCAGCCATTGGCACGCTGTCAAAAGCATACAATGTCCACT -CATGCATAAGAGGATAACCATCATTGTCCATGACATATTCAATGATCCGGAAAATAGAAC -GGACCAAAATGAGGGCACTGACAGCATATAGCATGTGCAACGATCGTTGCCATGGAATAC -CAGGCTTGAGGCACTCGGCTGTGGGTGTACGACGAAGACGCTTTGCGAAGATAATAGCGG -TTACCCAGAATAGGGAAAATGAGATGAGCTGAATTGCCAACCCAGCAATAACGCAAACCT -TTCCCCACTGGGGGTATCCCAGAACCGAGAGAGATGAGGAATTACCCTGGACAATGAAGG -ACAACACGTCGCCCCAGACAAATGTTTTTGTGAGGCGTGACACCCGAATGACAGAAAGGT -GTTCTCCTTGAACTCGGCGGATGACTCGACCGAGGGTCATGTAGATAGTTGCGGCGAAGA -GCGTGGGACCGAGAAGAATGAATACGTTGGCGATGATGTACGAGCCCATTGTAGTTGTAT -TATTGTGTGCAATGCAGCGACCGATATAACCTATGATCTCGACTGTATTGCAATGTAGGT -TAGTGTCTGGGATTCAGGGGACATAAGGGTCAGTTGTACGGGGAAGAGAGTCCACATACA -CAAGCCACCCAGGGCAAATGCTATACAAAACCATGTCCTGGTTTTGAACAGTTTCCAAGA -GACAAGAGCCGTTAGGGCAATGAATATCAGCATAAAAATGACCGCCGCTGGTATCGAAGG -GGCATAGTTCCAAAATTTGTATGAACGAGAGGCGAGTTGAGACATGTTTGGCAATATGGA -CTCGTAAAGAAAGAAGGAGAGATATTGTGAATTGAGGAGACCGAATCAAAGGATTCAGAG -TCTTTATAAGTCGTCTTTGGGACATTCTATAGAATCTTTTTTTATTTTTTAGGTTTATAG -ATCGGGGTGCGGATACGCGTATAGAAAGAATCCACAACGAAGTAGGGTTAGCCAGTCATG -GTCTTGAAAGTGCAATCTTTGAGCTAGTCTTAAAAAGCGGATTAGGTACCTAGGTATTCA -TGTCAATTTGAACCTTGGAAGCTTGTATAAAACATTTCGTTTTTTCGTCCACAGCGAGGC -CTTTGTTTCTGCGCTATGCGATTCATCCGATAATTAGCTAGTATACACGTGTAAGGGAAT -CATTAAATCAATTAAACCCTCTGATCAATGAGCTATCCATGAATATTGAATTCTTCTGAT -CAGAGTAAACTTGGTGGAGATATTCAAGTTGAAGTAGCGATGTATGCGACGATATACAAT -ATTTCAAACGGGTTGTGCGGGTGGAATCCGATTGTCGTAGGACTTGAGCTTGAAGAGTGG -ACCTATGTGGATATAGACACCTGAGGTATGGTGAAAAGTCCTTGTAATGTTGTATATATT -AAACTTAAGGCCTTTTTACATCAATGTCACCAGCATTGGTCCCGACTGTTAGTGGCTGAT -TTTAGGTTTTTGTTTTTCTTCTTCTCTGTTCAGTCGCCGTTCGTTTCTCACAGGCACCAT -TACAGTTTAAATCTATATGTCTAATCTGATAGGCTATGATCCACCGACAGCATTCCTACT -GGGGGTGGAAAAACGTGAGCACATGCTTGACGGGTCTAAACAGATAATTTATGGAGCTCA -ACGGCTGATAGACCGAGCCAGGCACGGGTGATTGACGAACTCGTACCGGCACCTGCACCT -ACTATAAAGTCACTGTTGCGGGCGGCTGGGATACCCGTCTGCCTTTGTCTCTGAGTACCG -GCTTGGCTTGTCGCAGCAACGGCAGTGCGTGTACTATTTGCTGAGCCGGCATACAAGCAA -CACACGGTGTTGAAGCTGCCGAAGATCCAGTATTCCTAGCTGAAACGGTTGCTTCCTTTA -TTCCGACACTGACTGCAGTGGTCCTAAGTTCCAAACTGGAGCTTTAATTAGACCTTCGAA -GTATATCTTCGCTTTTCAGCTCGTGTCTAGTTGATTTGTTTGTCTATACCATTGTCATTT -GTTGTACTAGAGCTATCACGGTGAGAAGTTAGTTATTGGATAAAATACAGATAGCTGCAC -TGGGTGATTAAACAGGTTGGACGAAATATGCGATTCAGAAGGTCTTGTGTAGCTGCATCC -AAAGTAACATCGGTTCCCCAGTAACGTCATTAACATTATCTCAAAGCTGGATCTAGTGTC -GATAATGCTAAACAGGGTCCATGGATAGATGATCATTCTCTCAAACCAGCAGATTGTCTC -CTGGGTCAAGTGCGCAGGCGGAGCGAAGCGGGTAAACCCAGGAGTTTTGTTGTACAAGTT -GATGGTAATATTATGCGGAAATGGGGCCGGCCCTTCAAATATGTTTCCAGACGAACCATT -GATTGTATCATTCCCAACGTAAATGATGTCCATATCGTAATCGTAGTGGCGCATGGGAAT -ATGGTATTCTTTGTCCACCTGGAGATGCAGCTCGAGGCCTTGCTTGCCAGCCCAAGCTAA -GGTGATCTCACGAGCCTCATGGTTGACTGAGGTCAACGGTAGTGTGACCTGAATGGGGTC -AGACACGTCGGGACAGAAGTCCACGGTCAGGCAATCGCGGCCAGGGTGTCTGGGATCTGG -CAAATTTTGGCGGCGCCACGCATTTAGTTTGTATGGGAACAAGGCCGGGGCGTCTTTCTC -GGGCAATGCTTCAAGTCATATCTTGATACGCAGCTCGGGGGGTAAGAATCAGAAGAGAGG -GAACTTCATCGAAGGTACTGCCATAATCATTCGAGTGGCACTGGCGATGGGAACATGGAA -GAGAGGCAATGAGATGACGGAGGAAAGAAAATATGTGTAGAATTGAAAAGCGAAGAACGA -ATGCACCCTCGGATATAAATTGTAGAGAACTTTTGAGGAAGGTACCAGTAGGTAATCGAT -TCGGTACCATTGATGTCTGAGACTGCCAGGCCACGACTCTTTTCTCATTTAGCTTGAGAT -GATATAGGATACTATCTGCAGGTGGAAGATGTGCTTTTTAGTTTTAATTTCGAGAGGTGT -AAAAATCGGTCTAGATCCAGGCCAAATCCAAGTCTGGGGACATCGAATGGGTGTGACGAC -CTAGGTGAGCGCTATGTCCGAGTCCTGACTGATTGATAATCTGTAAAGTGTGCTGGGACG -TTAGATTTGAAACCATAAATGTCCGATGTCGTGTTAAGTCCAGCTATGCTCAAAGGGGAT -ACGCCTTCATTCTAGGTCTTAGTAGAATTCTAGTGCATCGAATGAAAAACAGGAAGTCTT -CAAAGCCAAAATCTTGGAAGGTCCGAATAGGTTCTTCGCGGATACAATTGTCAGCAAGTG -TATAGACCTCGGGACCGAAGACATAGGACGAGGTTTGTTCCCTGAATTACGGCAAATTAA -CCGCCAACCTTTGCGCCACAACGATTTTTGCCAGCGGTAAGATTAGTCATCTGCGTATAA -CAAATGATTCAATACTGCACTATATACGGAGTTCCCTATGATCAATCCACCTTGTTCATT -TGTTGAGTTTCACTATAAACATACCTTCTCCTGAACACTATGTGAAGGTGTAATTCTGGA -TTAGCTATCGTACAAAAATCCTAAATGACTAGAGGGTGTTTAGGGAGGATAGATAGAGGA -AATTGTACCTATGTAATTGCCAGAACATATTGCTACCTACTTGACTTGCTAAATTGAAAT -GCCTAGCAAAATTGCTGAATAAAATTGCGAAATAAAATCACTAGGTTAAATTGCTAGGTT -AAATTGCTAGGTGCAATTGCTGGGTGGAATTGTTGGGTAGATTTACTAGGTTGAATTGCT -AAGGTGGAATTGTTGGGTAGATTTATTAGTGGAATTGCTAAGTGGAATTGCTGGGTCTAA -TTGCTATGCCTAATTGCTAGGTGGAATTGCTGCCGTGCCTGGAATCTCTCGGGGCTCGGG -GAGTTGGAACTTTCGGCCTTGGAAGTTCCACGAGCTTATAAATCCGCAAAGTGGGGTCCG -TCCACGGGTGTCCCTGCCGAAGACTACTTAAACCATCTATCTGATCTATCAATGCAATCC -ATCTTCACTCATTCTCTTTTCCCTACTTTGCCAAAATGACTCAACCAAATATCACCCTAT -ATACGGCTCAGACCCCTAATGGCATTAAGATCTCTATAGCGCTAGAGGAGCTTGGGTACA -GTAGTTTACCCCGCATCTCAAAGCTTGGTCCTAACAGATTGTAGTCTGCCATACAAGGTC -GAGAAGATTGACATCTCGAAGAACACTCAAAAGGAAGAGTATGTGCTTGCACTTCCAAAA -AACACATCGACACAAATAGACCTAACCACTGTATAGCTGGTTCCTAAAGATCAACCCTAA -TGGCCGCATCCCTGCCTTGACAGACACCTTCACCGACGGCAAGGACATCCGTCTATTTGA -GTCTGGTAGCATCTTGGAATACCTGGCGGAACAGTACGACACAGACCACAAGATCTCATT -CCCCAAAGGCACGCGGGATTACTACGAGATGAAGAGCTGGCTGTACTTCCAGAATGCAGG -CGTAGGACCCATGCAAGGCCAAGCCAACCACTTCTCACGATACGCGCCCGAGCACATTGA -ATACGGTGTGAAACGATACGTCAATGAATCGCGTCGGCTGTATGGTGTGTTGGACAAGCA -CCTCGCAGAGTCCAAGTCTGGTTATATAGTCGGTGATCACGTCTCTCTTGCTGATATCGC -CCACTGGGGGTGGATATCCGCTGCAGGATGGGCCGGTATTGATATCGATGACTTCCCAAA -TATCAAGGCTTGGGAGGAGCGTATGCTACAGCGACCGGGTACTGAGAAAGGTCGTCACGT -TCCTTCCCCTCACACCATTAAAGAGCTCATCAAGGATAAGGAAGCTACGGAGAAGATGGC -TGCTGAATCTAGGGCTTGGATTCAACAGGGGATGCAGGCCGATGCCAAGCGCAAAGATTA -AATGAACCGCGGCTGAGAAGACCCTAGGACTTTGATGACAATTGGAATGACAGAGCAGTT -AATTTGAAATGAAATTTGAACCAAGAGACTCGTAAACTTCACATTTTTGAAGAATGCATG -TATGTATTTAGTGTTATAGTTCTCCGTACACTTCACACACGAAGTAACACAATTTAGTTT -ATATATACAACCAGGTATCCACCAGCCATGAGATAGAGCAAAATCGTGATTTTCGAAACA -TTTTATTCTTGCAAGTCACACTTCATACCTACCAGGACAGCTTGGGCAAGAACTCGTCCT -GACTTATGCACGCCTCCGATATTACTGATTACAGTATTCTTTCAGATGCATACGATCAAT -TGAATAGACAAAGGTTTAACTTACATAGTGGTAAGTGATTGATATTACTGCCAAAAAGAA -GAGTGGCGTGCGTGCATTAGGCCAAGCGGCCCTCGTACTGCCGCCCGTCTCCTTCAGCTA -GCCGCCTTGCTTCGGATATAGATATATCCTCGGATATCCGTAGATCCTCGGAGAGCCACC -GCCTACTAAATAAAGATTTTGTTATAATCACGTGCTATTGCTCGCCTCTTGGACACCCGT -GGACGCTTTCTTTCATCGCTTTCCACTTATATCACTAACCTTTACCTATTCCTTATCCAA -AGCAGCAGTCATGGCCTGGCGCAACCAAGGAATAACTGGCTCCAACAACATCCCGCTTGG -TCGGCGACGCATGGGCGGTGAGGAAGGAGAGGATGAAAGTCGCACTGCGACCCCTTCCAT -GCCCTCTTCGTCTATGGGCCCCGATGGTCCCAAGCGTGGGCGTAGTCCTGTTCGAGGTAT -GTGTCTGATTCTTCCAAAGTCAAAGGATTGAGAATATGTTAAGACTGACATTCATTTCAA -AGCCGATCCAGCTCCCCTGAACACCGAGGGCGATGGCAAGAGACGCAAGAAGCGTAACCG -CTGGGGCGACCAGCAAGAGAATAAGGCTGCCGGTCTTATGGGTCTGCCGACTATGATCAT -GGCGAACTTTACTAGCGAGCAGCTTGAAGCTTACACACTTCACCTTCGTATTGAGGAAAT -CAGCCAGAAGCTTCGGATCAACGATGTGGTCCCCGCCGATGGTGACAGGTATCCGCTACT -GTCTTTTGGAATTCATACTTTCCTGTATACTGATTTTGATAATAGATCTCCCTCGCCACC -ACCCCAGTACGATAACTTCGGAAGACGTGTGAACACTCGCGAGTACCGTTACCGCAAGCG -TCTCGAGGATGAACGTCACAAACTGGTCGAGAAGGCCATCAAGACCATTCCTAACTACAA -CCCGCCTTCTGACTACAGACGCCCAACCAAGACCCAGGAGAAGGTCTACGTCCCCGTGAA -TGACTATCCAGAGATTAACTTCAGTATGATAACTAACCCTCTAACTCCTAACCATTCTTC -CATTGCGTCTATCACGAGTCCCTACTCGACTCGAGCTGACAAGACATACTACCTTGAATG -TCCTCAGCTTGTTTCTTGCTAAACCGTGGTGCCTGATGCTGCACATATCTTAAAAAATTA -CAGGATCACTAACACTCAGCTTTTGCTATCTTTACAGTTGGCTTACTCATAGGACCTCGT -GGAAACACCTTGAAAAAGATGGAGACCGAGTCCGGTGCTAAGATTGCCATTCGAGGCAAA -GGCTCCGTCAAAGAAGGAAAGGGTCGATCTGATGCAGCTCACGGTAGCAATCAGGAAGAA -GACCTGCACTGTTTGATTATGGCGGACACCGAAGAAAAGGTTAACAAGGCTAAGAAGCTT -GTCCACAATGTCATTGAAACAGTAAGTTCCTGTATCTCAAATTGTGTGTCAAAGAACTGG -TATTGACAATTTTCAGGCCGCATCCATCCCCGAGGGCCAGAACGAGCTCAAGCGCAATCA -GCTTCGTGAGCTGGCTGCTCTTAACGGTACTCTTCGTGACGACGAGAACCAGGCTTGCCA -GAACTGTAAGTGAACACCTTTCAAATGACTTTGGTTGCGCAATCGATTCTTACTGAATAT -CAGGTGGTCAAATCGGGCACCGCAAGTATGACTGCCCCGAGCAGCGTAACTTCACTGCGA -ACATCATCTGTCGTGTCTGTGGCAATGCTGGCCATATGGCTCGCGATTGTCCTGATCGCC -AGAGAGGCAGTGACTGGCGCAACGCCGGTGCGGATAGACGTGGCGATCGTGCTGTCGGCA -CTGGTGATGCCGTGGACCGCGAGATGGAGGTAATTAATCCCTGCTCAATGACCGCTTCAT -CTGACATAATACTCACATACTACTTAGCAATTGATGAACGAGTTGTCTGGTGGTGCTCCT -GGCGAATATGAACCCCGTCGCATTGAAGCCGGTCCCGGTGGCGGTAATGGCCATCCTGAT -GACCGTGATGCGAAGCCCTGGCAGCAGCGTGGCCCGCCACCCCAAAGTGATGTGGCTCCA -TGGCAGCAACGCCGTGAGCATCGCCCTCGTGACGACTATGGATCCCGCGACAACTATGGA -TCCCGCGATCATCAAGGATCTCGTGACGACTATGGATCTCGCAATCAGGGTGGCCCTGCC -CCTTGGGCTGCGCAGAGCCAGCACCAAGCTCCACCCCAAAACCGCGACTACGCTTACGGA -TCTCAAAGTGGTTATGCTGCTCCTGGCACTGCTCCAAATGCTGCTCCTTGGCAACAGCCG -GCCCCTCCTGGTGGACAGCAGCCTGCCTATGGTGGATACGGTGGATATGGCGGATATGCG -GCTTACCCTCCTGGCATGGGAGCTCCTGGTGCTGCCCCTCCTGGCATGGGCGCGCCTCCT -CCCCCTCCTGGTATGGCTCCGATGTACCCCGGAGGTGCTGGCAGTcctccccctccccct -ccccccggagatgccccgcccccgccgcctccaagcgacctgcctcccccaccccctcct -ccCCAGTGAGGGACTCAGCTTCTAGCGGAAAAGTGATCTCGTTGATATCGTTTGCCTATA -CGTGAAGAAATTGGCTATGGGGGCTTTAAACAGACAATCCAGCCAGGATTGTTTTCAGAT -ACCCACGGCTCGCTAACAATTGTTCTAAAAACTCCATTAGTTTTTTCTATGCAGTTTGAA -TATTAAGTCTAATCATTTTTAGTTCATCTTATCTGAAATTATTGTATTTGCCTCGTCCAC -TCATAGATGACCGTAGCGGCCTAACTTGGAGGTTTCGGGCCCCACTTAACAGGTCTTGGC -TTCATCAGCTCGAACATTTCTCCGCAAACTCAAGCTCTGCTCTATTTAACTATTTTCTAA -CTGTCGCTCTCCTGTATTTCTTTGCAGATCAATGCACTTATCTCGCTTTTCATAAATTTA -TGCCAATTGTGTTACACTGAAGCATCGATTTGCATATGCAAGGGTTACCGCAAGGGAGCT -TCTCCCTCCCGGAACCCAACATGCACTCGTCCACGTCCACTCAACGAAAACCTTCTGGGC -AGTCGCAGCGGCGATCATCAGGGCGCTCAGCGCACCCAGATAGAACTGGTCATGATACAG -CACGTCAGTCTGATTATTTTGTGGGCCAACAAAACGGCGCTCAAGAAAACTCGCAGTACC -GTGCGTCAACGACAGCGCCAGTGCACTCGGTGCAGGAAAAATGGTGGCACGTCCACCTGT -TCCGTGGAATGATCAACGATGTCAAGCGCAGAGCACCATATTATTGGAGCGATTGGACTG -ATGCATGGGATTATCGTGTTGTGCCTGCTACGGTCTATATGTATTTTGCTAAGTATGCTC -GCTCTGGTTGGACATCAATCAATATCTCGATATCTTTTCTTTGGTGTCATGTTTCACTTG -CTTACAATAAGTGTAGTATTCTGCCTGCTCTCGCCTTCTCGTTAGATATGTTCGAGAAGA -CAAGTCAAAGCTATGGCGTCAATGAGGTTCTACTTGCATCTGTACTTGGTGCAATTGTCT -TTTCGCTATTTGCGGCTCAGCCGTTGGTCATTGTCGGCGTTACTGGTAAGAAATATGGGA -TATTTCAATCTCACAAAGGCTCAAACTAAGCATCGTAGGTCCTATCACCGTGTTCAACTA -TACTGTCTATGACATCATGGCTCCTCGAGGGACGCCGTATCTTGCCTTCATGTGTTGGAT -TGGAATGTGAGTTTTCCCTATTTTCTTATCAAATGGCTGCACTGACATAAATTTTAGCTG -GTCTCTGATTATGCATTGGATACTGGCTATCACTAATTCATGCAATGCCTTGACCTATGT -CACCCGGTTCTCGTGCGACATCTTTGGGTTCTACGTTGCTTGCGTCTATATCCAGAAGGG -TATTCAGGTTCTCACCAGACAATGGGGCTCCGTGGGAGAGACCTCTGCTTATTTAAGTAT -CATGGTTGCACTACTTGTCTTGATGTGTGCATGGATTTGTGGAGAATTGGGGAATAGCAA -TCTTTTCAAGCGACCCGTTCGAAAATTCCTCGAGGACTACGGTACACCTTTGACCATCAT -CTTCTTTACTGGCTTTGTCTACATCGGCCAGATGAAGGATGTTGATGTCGCTACACTCCC -TACTAGCAAGGCATTCTTTCCTACGACTGACCGGTCTTGGCTTGTGCATTTCTGGGATAT -CAGCGTTGGAGATATCTTCCTTGCTATCCCATTTGCCCTTCTTCTGACAATTCTCTTTTA -CTTCGATCACAATGGTGAGTACCCTGTATTAAGCAAATTCCAGTACATTTTATGATTAAC -ATGTCCCTTCTTCCAGTGTCATCTCTCATTGCCCAAGGAACCGAATTTCCTCTTCGAAAG -CCAGCTGGCTTCCATTGGGACATCTGGCTCCTTGGTCTCACGACCTTCGTGGCCGGAATT -TTGGGGATACCATTCCCCAATGGCCTGATTCCCCAGGCTCCCTTCCACACTGCCGCCCTC -TGCGTCACTCGCCAGATCGCAGACGAGGACGACACAAACAAGGGCAAAGCTATCCGGATA -ACAGATCACGTAGTAGAGCAGCGATTCAGCAACTTCGCACAGGGACTTCTGACCCTGGGA -ACAATGAGTGGACCCCTTCTCATCGTCCTCCACTTGATTCCACAAGGTGTCATGGCTGGT -CTGTTTTTCGTTATGGGAGTCCAGGCTCTACAGGGAAATGGCATTACACAAAAACTAATA -TTCCTCGCGGAAGACAAGAAGTTTACTTCCACGTCGAATCCTCTCAAGCTAATCGAGCGC -CGTTCCGCCATCTGGGCCTTTGTCGCTCTTGAATTGTTTGGCTTCGGTGCTACTTTCGCT -ATCACCCAGACTATCGCTGCCATCGGTTTCCCGGTTATCATTCTCCTTCTGGTTCCGGTG -CGCTCTTTCCTGCTTCCTAGATGGTTCACCCGCGAGGAGCTTGCAGCGCTCGATGGACCG -ACAGCTAGTCCATTTACCATGGAAAGTGTTAGTGGCACCCGTGGGCTGGAGAACTACGAT -GAGGCGGTGGCTAGCGGTGCCCTTGATGAATATTCTAATGAAAGTGACCGAGTTCTGAGG -AGCAGAGCTTCAGCTTCTCCGGAATCTGCGGTTGAAGATAATGATCTAGAGCGAGGTGAC -GCATATGAGCTCGCCTCTGGTGTTTCACGAAGAAAGAGTACAGCTAGCAGGGCAGATTAA -AAGGCGATACCATAGCTTATACTACGATCAAAAGTGCCTTGCATTTAATCTGGTTATCTT -CTTGCTTCCACCAGTGTTATAAACCATTTGACGAATAACTGCAGTTACTGCTCGGTATTC -ATTGATGCCTGATTTATCCGGCAGTAACTACATAAATTGGTAGTCACTTTCATGACTTGA -TGACTGGTACACAGAGCTTAAATTTTTCAATGCTTATTAGTACTTGAACTTAGTTATTTG -ACCAAAATGATGAAAATGATGTATGCATGTAGTCGATTCCACGGTCGTTCGAAAACATCG -TTGTGCCTTATAAACATGGCACCTAATTCCTACCGCTTGTCAGAGATAACTTTTTCTGCC -ACGAAATTATTTCGTCCTGATAAACAGTCGCAAACAAAATGGCACCCCCGATCACAGAGA -ATCCGGAGTTCGAAGACTCAGATGTATCCGATAATTCCGATGTGGAACAACCCGATGTCG -ACGACCGAGCCCCCAAGCGCCGTCGTCTTTCAGAGTCCTCCAACGACTCGTATGTCGCAC -CTGCGCCGCTCCCCACCCTCTCTCGTATCAAGAAGAAGAGTGAGGTAAAGGAGGAGCCTG -CATCGAAGGAAGAGGAACCGGTACTTATCAGCGATGCAATTGAGCTTGGTAAACAGAGCG -GTTTGTCCACTTTTGCGGCTTTGGACGTCGCACCTTGGCTTGTCTCGTCGCTGTCGACAA -TGGCGATCAAACGGCCAACTGCTATTCAGAAGGCTTGTATTCCTGAGATTTTGAAAGGAA -AGGACTGTATCGGTGGAAGTCGCACTGGTTCCGGAAAGACCATGGCCTTTGCGGTTCCAA -TTATGCAGCAATGGGCGCGCAATCCATTCGGAATCTACGCTTTGGTTTTGACCCCAACCC -GGTATGTCCACGATGGATAGAGATTGCACCTTTAGTCTTCGGTCACTAACTTGGTTGTCT -CTGTCTAGTGAGCTTGCACTCCAGATCTACGAACAATTTAGAGCCGTTTCAGCGCCGCAA -AGTATGAAGCCTATCCTCGTCGTTGGAGGCACAGATATGCGGAAGCAAGCCATTGAACTT -GCGAACCGTCCACACGTGGTTATCGCAACACCCGGCCGACTAGCCGATCACATCAAGACA -TCCGGCGAAGATACTGTAGCCGGTCTCCGCCGCGTCAAAATGGTTGTTCTCGATGAAGCC -GATCGCCTGCTAGCCAGTGGGCCAGGAAGCATGCTACCAGACGTGGAAACATGTCTCGGG -GCTCTTCCACCATCCGCCGAGCGCCAAACACTGCTATTCACAGCGACCATGACAGCCGAA -GTACGGGCACTGCAGTCCATGCCTGCAGCAGGCAACAAGCCCCCGATCTTCATGACCGAG -ATCGGTACCGAAAACCAAGGCAAGATCCCGCCAACCCTCAAGCAGACATACCTGAAGGTT -CCCATGACGCACCGCGAGGCCTTCCTACACGCGCTGCTCTCCACCGAGGAAAACATCACT -AAGCCAGTCATCGTCTTCTGCAACCACACCAAAACATGCGATCTGCTAGAACGCACCCTC -CGTCGCCTCGGTCACCGCATCACCTCCCTCCACAGTATCCTCCCGCAATCCGAGCGCACG -GCTAATCTGGCTCGTTTCCGAGCCACCGCTGCCCGGGTCTTGGTTGCTACAGACGTTGCT -TCTCGTGGTCTTGATATCCCCTCCGTAGAGCTGGTTGTCAATTTCGATGTACCTCGGAAC -CCAGATGATTATGTTCACCGTGTTGGTCGTACTGCACGTGCGGGACGGGCTGGTGAGGCT -ACCACTCTTGTTGGACAGCGTGATGTTGAGCTCGTTCTTGCTATTGAGGAGCGGGTTGGT -AGACAGATGGAGGAGTTCGAGCAGGAAGGTGTCAATATTGAGAGCCGTGTTGTTCGAACT -GGTTTGCTCAAGGAGGTTGGCTCTGCGAAGCGTGAGGCTGCGGGTGAGATTGATGAGGGT -CGTGATATCCTTGGACGCAAGCGTAATAAGTTGAAGAAGGTCCGCTGAAGGATGTCCTAT -TAGATAGATCATAGATGCTGTCTACGTTGAATCAATTGATACATTTAGCGGTTTTATTCT -CTGGAACTTGGACTTGTCGTTTCGATGATGCAGGAGCAGCAATGCAGCACGTGGTATTGC -CCTTGCATCCTTGGGTGAAACCTCGCGTGGCAGTCCCATGCCCTAGCCTAGGAAGACGGC -GATGTGGTTCTAGATCCATATACTGTCAGGAGAACTGGGCAATGTTCAAATGCACAAGTG -GTATCATTGATTACAACATAAAAACGAATTATTCTAATAATGCTGTATGCCTCCAAAATA -GTATCACTCGCTGTCATGTTGGGACTCACTTGAAGTATATCAAACCTCGATATGCTCGTT -GTTCGGCCTTGTACACCATGCAATACACTGCTTTGGAAAATAGCCATCCATTGCAATGCG -ACTTATTCACTCAGGCATGCTCCACAGAGTAAGCAATAGTCTGGAAAGAATATCAACACA -GAAGAACAAAGACAATGCAGCCAGATCATCCAGTCCAGGCGTCCGAACTTTGATTGCCTG -TCGTACTCATGAGCTTCAATAACCCATCGACCGCGTCACCTCGCTGTGACGGCGACATGT -CATCCACACCTCCCATACTGAGCTCAGCTCTTGGGCCCATGCTTGTTTGTCTGTCGGCTT -CACGGCGAATTACCGTGGCTGGAGGGCTACTCAGCTGCATTGGGGAGCCGAGCGCACGAG -GCGGTGTTCGTTGCGCGTGTGACTTTGTGCGTTGCTGAACAACACTGGAAGACAATGGGG -AAGGTCGGCCTGGAAGATCGGGTGTGCTGTGGGAGGGGGTGGAATGGAAGCCAGTGCAGC -CAATGCGGGGGGTCTTCATTTGCGAGGATGGGAAGGATGTGAAGTGCTTGCgagatgaga -ggggtggtgtggagaggacggagagtgagagTCGTGGGCATTTGCGACTGTGCGCCTCTA -GCTCGGAGACCCGGCGGTCGAATTGGTGGTCTGTCACATTGCGCATGGCGCTTTGTAGTC -TTGATCGGAGAAGGTTTGCTTTCTGTTCGATTGTTAGAGACTGTCCGCCGGGGAAGGTGT -TCGATATCGATATCATGGTTCACATACTTGTTGGATGAAGATCTTGCGTGCGTCCGAGTC -TAAGATGACGCTACGCGTTTCTTGCTCGGGGGTATTGGGCACATTGAAAGATTGATTGGT -ATCCATTAGATCTTGTATTTGGTCGCGTTCCGGTTGTGAGTTTTGTGGTGTGGGTGTTTG -AGTGAGCTATTCGCGGTTTATATGATTAGCTTTGGATGGATCATTCTTGGGACTTCTTTA -TCTGTCTCGCATGAGCATACCTGGCATTGCATATCATCATTGACAGTGGATTGGACGCCG -GGCTGCGGAGAGGATTCCCGCGTCTGCATGTGTCCGAGGTTTTCGTCCATTTCATCCACT -TGATCCACTTGATCCCGTGTCCGTTTTTGGCCTGCAAATATAGGAGAAGGGAGCAGCTTC -TTAGGTGAAGTAGCAAATGACATCCGCTTGACCGGGGTTCCAATGGCCGGAAATGATGAT -TTAACGTGTGTGACGGGGGATAGGCACGCGTTTGAGTCTTTCTCGCCGAGGATTCGTCGC -CCGTCCTTGATTGGACTGAATTGCACTGAAGTCGAGCTCATGATGGAGCTTTCGAGTATC -TCTAGTCTGAGCGGCGTCGAGGGGAATAGAAGTCGAAATAGTAGCTGAAGGTGCTGCGGG -GAAGTGTGATGTTGACGCGAACCAATGCGTAAAGGGAATTGCGATTGCAATTGCAAGTCA -AgtagaaggtagaaggtaaaaggtgaagagtaggaagtcgaagtagaaaaagaaAAACGG -TCGCGTCGATCGCGTTTAGTGATATCTTACTAGCCTTGTATAGAGCACGGACTGGGCTCC -ACACAGGCGTGTTGAGCACGCGTTTGGATTTGAATGCAAATGTTTTGTTACTGTACTATG -AGTACTATTACTATGACTATTACTACTAGTTCGGTTTGAAATAATTAATAGTCTACTTTT -ATATGCTTGTCATTCTCTAGACGCGTATAAGTTTTGACTTTTCTTGATATGCCCTGACAT -TTTGATATTGAAGACTCGATATCTTTGGGGGTCTCTTTTATACACTCTAATATTCCAATC -TGCTTTGCTCAGACCCCTTTATCAAGATCCCCAATATTTTCTCTCTTTcacacacacaca -cacacacacacaGCAACCCCGGCCGGGGCGCGTCTAGTCCTTTTCGTGTCCTGGATCCAC -AAGGGTTGGAGACCCCACGAACGCGGTCGATTAAATTCCTTAACAAGAATGTCATTTACA -ATTCTTTTGCTTGATTGTATGTGGGAGCTGAAGAATTTAGAACGATCAGCATGTATGTAG -TGTTAGCAGGACAAGATAAGAATATATTATCCACGGCAAATCTGATAGATGGCGTGAGGA -ACAATGTCTTTGTGATTAAAAACATTGTGCAATTCATGCCGTGCATGACTAGCTTCAAAT -TTCCCTTCCGTGTGTTCCCTATCAATTTCTGTTTGAAGTAATTTACTAGGTCCTATTGCT -CCGGTGGTCTGTATGTCGCATTTTAACGATACACTACGTCCCTTGTAAATACTGTCCTAT -GTCAATGCCTAGTGCTTGGTTGATCAACATAGTAATGGTCTGCTGGTAAGTTGACCCGGT -TCCATAGAAACTTTCCTACGGTATACAGCCTGTCATGATATATAAAGCCCACCAACGATT -GATTCAAGATTGTATCGAACGGACAGACAGCCCATATAGTACAATCGAGTCTGGAATTGA -GGCGTACCATGTCAACATATGCTTAAAATATGGCAATAACTTAATATTGTTTTCCCCATC -AAGTTATTTTCCACACGTTTTGTGGGTGTGTCGAGAGAATACAATGGAGGGATGCCGACC -TGATTTCTCAGGCCGACATTCTAATATAGATCACCGAAACCAGCGATCTATACTCCGTGT -TCAACCTTCCATTGCTGCTTTCGACTCTCTAATCTTTTTTAATTTATTGAAAAAATATGC -CGGATGTAGTTCAGAACAATGTATGATACCCGTGATAGGAAGAATATGCACGTTCTACGA -TTACATATTGTTCCGTATGACATCATCGGGAGATGAGATGGAGAGGCCAAAAGAGTCTCA -TTGCATTGGATCTCATTACACCGTGGAGGAAAGGACAAAAAAGCAAAGATGAGATGACTA -GATGACTATTACAGCATAAATCTCATCTCATCAACTTCCAAAAGTCAAGGCCTTTCAACG -TTGATCCCTTCCCCGCATTTCTATTTATGTACCTGAGTCCTGACATTGTCCCTGATCATT -TTCAACTTCTTCATTGCTACAACTTTCACTCATTCGCTACACTCTTTGTCTTATTGCTAC -CAATTTTATCGTGGACCAAACGCTTCACCTTCATCTCGCGTGACCATGCAGCTGTCGCTC -GTGGTCCTGGCCGTTGTCTTCCAGACAGCGGTTAGCACCGCATCTTCGCTCACTCCGCCT -GTGATCCCATTAATAGTCCGAAATCCATATCTGAGTACCTGGCTTGCCAATGCTCGAGAT -ACACCGTGGTCAAAATGGCCTATGTTCTACACTGGAGAGGAGGTGGGACTTTCTTTGATG -GCTCACCTTCCGAGTCAGAATACTGTTTACCCTCTTCTTGGCAAACCCCATGAGTCTTTG -GATGATAAGGCAAGGTATGGATTTACAGAACACTATGTGAGGATTGAGACACTAACAAAT -TCCAGCTACAAGATCAAGTTTCCTAACTACAATGGCATGAACTATGATGCCTCGACGACC -AACCTGACCTACCATATCGACACTGGTTCTGCAAACCCTGTCGACGTTACAATTTCATTT -CTTTCACCCATCACCCCAACTTCAACCCTACGTCAGTCAATCCCGGCCTCCTATATCACA -ATCGATGTCCAAGGCGATGTCGATGTCAGCATCTATATGGATGTGGATGGCCGCTGGGTC -AGCGGAGATACTGGTAGCAAGATCAAGTGGGGATGGGATAACTTGAAGACCGAGAATGAG -GAGCTTGCCCTCAATCGATGGCAAGTGCAGAGGGAGAACGAGCTTGATCTCACAGAGACT -CGCGACCGTGCCGAATGGGGCACTCTTCATTTCACTGGACCATCTGTAAGTCCGAGCTAG -TCAAAATGACATGACATCTATCTAACCACAGCCCAGGACGTTGAATACCAGTCTGGAGAT -GCGTCTCAGGTACGCCAAGTCTTCGCTAGCTCTGGAGCTCTGCAAAATATCAATGATGAT -AGATTCCGCGCTATTCGGGATCGCACGCCCGTTTTTGCCTTCTCCAAGACTTTCCACCTC -GGGCATTCATCCAAGAGTTATGCCGATAGCGTAACATTCACTCTCGCTTTTATCCAGATT -CCCGTTGTTCAGTATGCCTCATCCCGTGGGCTCACCATGATGCGACCCCTGTGGGAATCT -TGGTATCCAACCACCGAAGAGCTCTTGAATTTCCACTACAGGGACTTTGCTGCAGCTAGT -TCCCTAGCATCCAACTATTCGCAGCAGTTGGCAGATGACGCGTACTTGTCAGGAGCTGAT -GACTACGTTGAGATCGTTGCCCTAACTGCGCGGCAGGTGATGGGTGCAACTACCTTTTCA -GGAACCCCGGATGACCCGATCCTGTTCCTAAAGGAGATCTCTTCAAATGGCAACTTCCAA -ACTATCGATGTCATCTTCCCCGCGTTCCCATTCTTCATGTATACAAACCCCCGGTGGTTG -GCGTACCTTTTGGAGCCGTTGATCGAGCACATGCTGAGCGGGCAATACCCCAATAAGTAT -GCGATGCACGATTTGGGGACACACTTCCCCAATGCCACCGGACATCCTGACGGCAAGGAC -GAGTACATGCCTGTCGAAGAGTGTGGCAACATCCTCATCATGGGCTTGGCTATCGTGAAC -TCGCTCCGCTATGAGGACTCTGCTGCAGCCTCGTCCATCTGGTCTACACAGGGGCTGCCC -TCGCCAAGCTCCGACAATGAGAGATCTGGACCGTTCCCTCTCGGTAATTTGCAGGTACTG -TCCGGAATTGCTCATCAAGATGGCAAATGGGGTGGCGGTACTAAGGGCCAGCACGAAGCT -GAGAAATGGGTTAAGCGGAGCTATAGTCTTTGGAAACAGTGGACCGGATATCTGGTGGAA -TTCTCTCTTGAACCTGCTAACCAACGTAAGAAAACTTTCTCTATCTTTTCTGGTGGTGTA -TTTATCCAAGCTAACCTTGAAAACAGTCTCTACCGATGACTTTGCTGGCTGGCTGGCTCT -CCAGACCAATCTGGCTCTCAAAGGAATTGTGGGCATCAACGCCATGAGCAAGATCGCCGA -GGTAGCCGGTCACGATGCTGACGCGACTTATTTCAAGGTATGAACTACCCTCATCTGTGC -GTCCTGACAGTTTGACTAACTACCCACACAGAAAGTCGCTAGCGACTACATCGCCAAATG -GGAAGAGTTTGGCATGTCCCGAGATGGCTCTCATGCAAAGCTTGCCTACGACTGGTATGG -CTCGTGGACAACGCTCTACAACCTCTACGCCGATGCCCAGCTCTGCTTCCATCTAGAGGA -TACCGATACTGACTCACCTGGCTTTGTCCCACGTCACATCTACCAGAAGCAATCGGTCTG -GTATCACTATGTGCGTCAGAAGTACGGCCTTCCCCTCGACAGCCGCCACCTCTACACCAA -GACCGACTGGGAGTTCTTCTCAATGGCTGTTGCTTCCAAGGACGTGCGTACAGAGGTCCT -TGAGTCTGTCGCCCGCTGGGTCAACGAGACCAACACCGATCGTCCCTTCACGGATTTGCA -CAATACGGAGGGTGATGGGGGCTTCCCTAATCCGACGTTCTTCGCTCGGCCTGTCATTGG -TGGCCATTTTGCTTTCCTAGCATTGGAGCGTGCCTGTGGTGGTAAGGCCATGGATGGCTT -GTCTTTCCTCGACAATATCGATGAAGAGACATTGGCGCTTTGGACCCAGAGTGCCGAGTC -TGCCGCTAAAGAGTTCACCATGTTGGGGCAGAACCGCCATGGCGAGGAGCTGTAGAGGCT -TGCGTCTCATCACAGAGGGGTCGTACATGGAGAGAGGCTGTTTGACACAATGGATCTGTC -GTCGTCTCTCATCTTCATTTCATCCCTCCCCCATCCCAGAATTGTGCCACTTTTGCTATG -TTATCATTAGTACAGACCTAGAGTTAAGTCCATATTCATCCGTAACACCTTGGATAAATC -CGAGCACAATTTTGGTCTATTTCCGTGCTAGTCCATCCTATACATTGAAAAGCGTATACA -TGCTTCTCGAAGTACAACCTCACTGGTAGATCTATTCAGATACTAATTCTCTAGTGGATA -TAAGAAGCAGACATGTCCCGTAATTCAATTGAAGATTTCTTTGCGTTCCTATTTTTTTGT -TGACGTCTTCTGGATCGATTCAAATTACATGGGCCCATTGAATTCAGTCATCCAATTCGC -GTTTGCTCACCGAAGGCAAATGTAAACACACACACAACACATATAAGACCAACCCGGAAT -ATGTCTTTTCCCAAGATAAAAGAAAAAACTTAAAACTGGGCAGTTTTTGCATCCAGCAGA -AGCACGGAGAATCCTTCACATCCCAGCTCCCCAAGCCCAATCCCCATGGGAAAGAGAAAA -TAAAAACCATCACAGATCAAGCGACACAATAAATACCACCTCCGGGTGAACTTGCACACC -TAGCAGATGGAGAAAAGGGATGGAAGAGTCGGGAATATAAAAGATATCCATGTCTGGGAG -CACGAGTTCCCGACAAGGTCAGGCAATTTAGTTGCCGCCGAGGCTGGGGAAGTTCTTCTC -GTCGACGGTGGGGCCGGCAGGGCCACGGGCGTTGCCACGGGGGGCAGCACCGCCACGGGC -GGCACCACCACGGGGAGCGCGGTCACCGCGGGGGGCACGGTCACCACCACGGGGGCCACC -ACGGGCAGGACGGTCACCACGGGGGGCGCGGTCGCCACGGCCGCGGGGAGCGGGGCCGCT -GCGGGGAGGCTCGACGTAGCGGAGGTCGACGTCAAGGAAGTTCTTCTCCTTGCGCTGCTT -CTCGCGCTTGGACTTCTCGTGGGAAGGCTGGATGTAGGCGGCTTCCTCCTGCTTGGCGAG -CTCCTTGGCGGCAGCCCACTTCTTGTCGAGCTTGGTGCCCTCGTTGGCGGTACGCTCGGG -CTTGGCGGCGAAGGTCTCGCGCTGGGCCTTCTCGGCAAGGTAGTCGGCGAAGGACTTGGA -GTTATCGGGCTCCTCCTGCTCCTCGGCCTCGACGGGGGTCTGGGGCTCGTTCTCCTCCTT -CTTAGCGATCTTCTCGCCCTGGCGCTCGTCGTCTAGGGTCTTCTCGCCGCTCTCGGCACC -CCAGCCCTGCTGGACCTGCTTGCGGGTGTCGCTGTATTATCGAAATTCAGATTAGTTTTC -GTCTTTTTTTCGAAATTGTATGATTCCCACAATGAAATTCTCATGGTTTATCATGATTGT -TTCTTTCTTGCGGGTTTTATTCGTTCATTTCGAGGGAATCGCTTACGTCTGACCAGTGCG -GGACTGGCGGTCACCGCGGCCGCCACGGCCACCACGGCGGGCGGGGGGAGCAGCCTCATC -AGTGGGCTTCTCACGGTTGTGGTTACGGCCAGCGTTGCGGTCGCGGAAAGCTACCCATTG -AATTAGTCCGGTTGTTCCGTTAGAATCGTTTCTTGCATTCACATACCGGCCTCGTTGCCA -GCGGTGACACGGCCGCCGCTGCGGCGAGCGTTGGCCTCGGTGTGACGAGCAGGCTGCTCC -TTGGGAGCATCACGCTTTCCGTGGCGGTCGGCGGGGCGGTCGACGGCCTTCGTGGGGGGA -GGAGCGGGTCTGCTGGGGTCGAGCTCTGGGTCGTTACCTGATGATAGGTAACGTTAGCGA -TCAAATTGACCGGAAGAAACGGATGCACGGATTCGAATGGAGCTTTATTCTTCTGAATAA -AAAGAGCTCATTGTCGTCGGGAAAGATAGCCAAGAATTGAATTCAAGGGAGGGGAAGTGA -AATTTACCCAGAAGCTCGTAGAGGTTCTATATGAAATCAATTCGAATTAGTTATTGAACA -TTTCAAAAAAAAAAATCCTCGCCCCCAAATCGCTTCCAGATCGCGATTTCTTTCAGCATT -GCGTCGCTAGCAGCGCGCAGGAGAGCTCCAATTCAGCGTACCTTCGACCGGACGGACTCC -GCCATGACGCCTGTTGTGTTTCGGAGACCTTCCGGGGGGAGGCGCTCAACTTTAGACAAT -CGGTCTAGAGGAGTTAACGGGGCACAAGGCAGAACCCAAGGGAATCCGGCCTGTCAATTT -GATGATGATGAATGATCAGGGTGAAGAGAGGTCCACGGTTTGTGGTTGCAGGGTGTAGGG -TGACTAATCAGGAAAGGCGGCTTCTGCTCCTAAGCGAATATGCCCGCGATTGTTGATTGG -CTGCCCGCAGTCTAGGGCTTAGGGTACTTTACGTAATCATTACGTCAAGGACTGCTCTGG -GGATCTTTTTCTCATACTTGCTGGGATTATTAGCTCTTATACAATTGATATCCACCCTTT -CTGTGTGTGATCTTATGCATATTTCCCAGACCCATTCATTAGTTTACACAGAATTGACAT -CTCCCGGTGGAGTATGCCGATCTTGACTGTGGACTATACCGTTGTACTTTATTGCCTTGC -AGTAGCTATGTAATCATGGAGCTATCCGTGGCAAATATGACGGATAAGTTTGGATACACA -TTGATGTGAGTGGATATGGTTGTATCTGGTTAGCTATTGTATTGATCATTCTCTATCCCT -TGCGTATACATTCCGGGCTCGATATATGCCCAAAGCTCTAAAGCTCCCTCATACCAGAAT -TATACCCAAAATCATATACGACCCCTCACTTCTGGGCATAAACACCGGCCTTGATATCAT -CCAACAGCTTCGGGCCAACCGGCTCCCAACCAAGGAGTCCCTGGGTCTTGTTGCTTGAAA -TTGGGTTATCAAGGCTCATAACCATAGATATGAAGCCGTCAAGAGTCCCAGCGGTAGGCA -TGAAGCCGTCAAGCTTCCCCATTTCTTCAAGGGACTTAGACACAGCTGGAAGACCTAGAT -TTTGACCGATAGCTTCGGCTATATCTTTAAGGCGAACACCCTCCTCGCCCACAGCATGGT -ACTTGCTTCCAGCAACACCCTTCTCTAGGGCAAGTCGGTAGAGCCGAGCAGCGTCGTGAC -GATGAACAGCAGGCCAGCGATTCAATCCCTCATCGATGTAAATCGCATTACCCTGCGAAT -GGGCCAGAGCAACGAGTCTATCGATGAAGGCGTGGTCCTCCTCGCCGTGCACAGTCGGCG -GCAGACGGATGACACAAGCACGCACGTCTTGCTCAGCAAAACGGGAAACAACCTTTTCAT -TACGAGAGCGGATGCTCATCGATTGTTCGTAATCGATGACTTCGTCTTCGGTGCTGGGTT -CACTACCCTTGGAGAGCCCAAAGGTGCCAGAAGTGGTGATTAGAGGCTTGTTGGTTCCGA -TCAGAACTGAGCCTAGAGTTTCGATGGTTGCTTTTTCGTCGAGGCAGTTTTGAATATACT -TGCTGAAGTCGTGATCGAATGCAAGGTTGATCACGCCATCGGCCTCTCTTGCTCCTTGCT -TGATGATCTCGGCGTCGCGGAAGGAACCGCGGAGTACTTTGACGCCTTTCGCTGTTAGAG -ATGCCGCGGCTTCGTCGGAGCGAGCGAGGCCTGTAACCGTGTGGCCGGCGGTTAGGAGCT -CGTTGACGACTGCTCTGCCGACGAAGCCGGTGGCTCCAGTAACGAAGACGTGCATGGTGA -GAATGGATGTAGGGAGATTGATGAAGCGTGAAGTGTAGCAGGTGAAGGCTGGGAGATTAT -GGTATTCTATTGCTTGAAGTATATGATTGACTATGTATGATGAAAGTTATACAACATAGA -AAGCGAGTTCGCGGCCCTTTATAGATCGAGCTATCGAGGAACTAGATACTAGTATAACAC -CGAAATAAACAATTGCTAAAGTCCACATCAACCATTCCTAACTCCATATTAATGTTACAT -TCTAGGCCCTTGGCTTACCTCCAGATAAGTAGTAGGATTATTCATCATGCCCGAAGTGAA -GTAGAACGATCAATTGCGGCTGCAAGCTGATATTATCATCACTGCAGGGTTGCAGGACTA -CGATGTCACCTAGATTGACTGGGCAAACTACCGTACGAGTTATCCCTTTCCTTCAAATCG -TACCTTTGACAGCCTTAAATTTGATACAAGCATGAAGATGGTAAACCAACCGCAGTTGGC -CCTTTAAGCGCAACGGGCATGCTAATTATAAATTAATACATATGGAAGTCTCGAATAATG -AATACTATATTTGAGTTTTAGCCTGAAATTGGGTGAGCCCTCTTGTAGTGTTATAATTGA -CAGGTTCGACCTACTGCATACCTAGTATAAGATATTTGACTCCAAAGGCGCCCCTAAACC -TTGCCCTTGAACTTTATAAACGCGTTAGGATAAGATGAAAATATCAGTGCTATACTTGAA -CTAAACTTTCTAATACAATATTGTCCAGGAATAGGATTTACACTTCAACTTTATATGGTT -TACCTAGTAAAAGGTACAACTACATATGGATGATACATACCTTATGTACGTTTGAGTGGG -AGACCCTCTGTACGAGGAGAGTCTGTGATTCTTGCTTTGAAAGTGTTGTAAGGGCCGGTT -ACGGGCCACAGGAGCGATATTAGTGATAATCATATCATTAAAGGAGGCACCAAGGACACA -AGAATCGCGGCCTCCCGTATGTGACAGCGAACATAACACTACTAATCCGAATACTAATCT -TAGCTCGAATCTCGATCAACAGTATATGGGCTTCTCAGGCCTCAAAATCCCAGCAGATGT -AGAGTGGAGATAGCGAGTGAGGGGATATGATTATGATTTTCATTCAAGAATAGAAAAGCT -GGCTGAGTAGAAGCAAATATGCAGATGGATAAATTAGCTAAAGGCCGTTATGGGAAGCAT -CGATTCATATACAAAATGTCATATAAATATATCTCTGAAGAAACCCAGAAGAGAACATTG -GTATCAAAACCTTGCAACGCCGGGAAATGCTCATATCAAACTTTTAGGAAGAGATGATCA -TTTGCCACATAAGATGGACAGATATATGAATATAAAACAAGAGAATAAAAGAAAAGAAAA -AGACAAATTTTTCAAAAGCTTATTGCCTCGGGTTGTGCTCGTAGTTGAACGGGGTGGGGA -AGTGCATGCTAGCGCTTAACCCGAACGGATCCGTGTCCAGCATCGGCTCGTACTGGCCGA -GCAGCTGGTTTGGATTACCGAAATCACTGGGCGTGCCCTCGCCCGGTGGTTGGTTGAAAC -CGGACATATTGTGGGAGCCCGAGGTGACGGACTGCGGGGTGAATCCAGGGACATAGGACG -CTGAGCCGTCGTGATCTTCATCGACCATATCAGCCTGTGCATCGTACTCTTGCTCTGCTT -GAGAGGTCAGCTGGGGAGGGACTTAAACCAAAACAAAACAGGCAAACATAGGCTTACCGA -GTTGATCGTAGTGTTTCTGGAATGGAGAAGGATAATAAGCAGCCGCTGCGACCCGCGCAC -CGCGAGGACCCATAGCTCCACGGGATTGGGGCTTGGAGTCTGGCGGTTCGCTCTGTGGCA -TGCTGATTGCCGGCGGCGAAGTCTGACTGAAAGTCCGGGGTTGGGGGAGGAGTTGAGGCC -GTCCCTCGACAGGTGACATGGCGCCCTGGAAACCGAAGCCGGGAGACTTGGCGTGCGAGG -GCGAGGAAACGTGAGACGTGGGGGTTGCTTGGAACTGCGGCGACGGGATCCCGTATGCGC -CTTCCCGCTGCGACATGCCGAACGTCTCGGGCTTCGAGGCGATGCCTGCGGGGTGTCGCT -GGTTTGGACGAGCCCCTGCAGTCGGGGCTGCCTTTGGAGGCATCGGATTTGCCTTGGGGG -GAGCGCCCGGCGCCCCTGTTTTCAGGCGTAGTTCGGCCTGAACATCGATGCCTAGGGAGG -GTTAGTTAAGGTTTGGACCAGCGGTTCGGGGTTCAGGGTACCTTTTTCAAGGAGAATCCG -CTCTAGCAGGGAGTTCTTGTAGCGCAACATTAGACATTCGTCGGCGGCACTGCGATGACT -CTGCTGCAGACTCTGTAGAGATTCCTCATTGCGTTTGATGGTGGTCTCTAGCTGGCGGAT -ATACTCCGTCCTGCGTTCTCGGAAGGCCGCCTGGGCCTGCCTATTGCGTTGCTTTCTCTC -TTCTGAGGTGGCATATATCTGAACGGATTGTTAGTATCAAATGAGATGAGTAACACAGAG -GGAATGCGGGGCCCCAGATTTGCAAAAGATGTTACGTACAGGTTTCCTGCCTCCCTTCCT -CTTCTGAGTTTGCGCGGCATCCTGAGTCAAAGGTTCTGCTTCTGGATCTGGCGTCGACAC -AGTCGAACTGATCGATCCATCAGGACCAGGTTCATGTTTTGGCACGATCGAGTCCATGGC -CAGATCACTATCAACCATGGAGACCATTATATAATCTAATTCAAGCCTGTAGGCGGCCCT -CCGCACACGGAACAGACACAGCCAGTTTTCCCCGCGTTGCCGGACCTGTTGGCCGGCaaa -aaaagagaaaaaaaaaaaaaaaagaaaaaaggataaaggataaaaaGGGTAAAAGGGGAT -ATCCAGGCAAATGCACACACGCGCGGCTAAAGGGACGTGCTTTGGAATTCAAGTGACATA -AAGGAAACTGGATTATGAGAAAGAAAGAGTCAGAGGAATTGAGataaaaagaagaaaata -taaaataagtaaaacaattgactaacataaaaaaGTCTGTCCGGTCAGGAGGGGCTTTCC -TCTCGCGTACCTGCCCAGAACTCttttgatctttttttctttttttttttctcttttttt -tCTCTCTAACTTCATTGGGGTTGGGTTTTGGGGTTCCACGTTTTTACTTGAATGGCCCAT -GGATATGACCCACAATGGACATTCATTACACTCTGCTGTACTCTGTCCTCCGTATGTTGG -ATGCTGTAGGTGTGCACACTGCACACTCTGCACACCGAGAGTGTGCCAGTACAGAGAACA -ATACACCTGTACGCCTATACGTTGGCCGCGGAGAACCCTGCACAGCCGTATATCCGATAC -CGGATCGTCCCATTCGGAGTCCTCCGCCTGCCACATGACCTGACCTATTGGGGTACTTGG -AAGCGAATCACGCGCTTCTCCTCGGAGACCAATAAAGGATCGCATGACGTTCCCTTCCCC -GGACTTAGAATGCATTGCAGGATGTGACTCTCATGGGGTTTGTGCCAAGAATTCCAACGC -ATTTGGTCTGGACTTATGATCTCATGGGTATGCGGAACCATTTGGAGATCTGATATGTTT -TATTAATAGTATCCATGGATGGCTCGGACTACATCAGGGGTCTGGAAACTCGATCTCGAT -TTCAAGACCGGCAAATGATAGAGAACTTGAGTGTGACTTGAGAATATACATATCTTTCAT -CTCTACTGATAAAATCATCCGGCGATATAGTCCTAGCACATACTATAGTATCTATATACC -TCTACCTAGGCAAAATGTAAACCCTCGGCGATCGGCATTTCCCCCGCCAAACTCCCGTCA -CATGTCAACCTCATCTCCAGACTTTCAACCTCCCAAAAAAAATTAACCTTCACTCCGACC -GGGCCGTATCTTCAATTGAGAATCATCCTAGTTTATTTGAATCGCACAATATGGCCACCC -CCTTGCCGACTGAGGATCGCCCCAATGAGGCTCCCACCCTGACCGCGTCAACACAAGAAT -CCCAGCCCGCGCAAAACACTGCCCCAACCGCGTCAGCACCACCACCGGCACCGGCACCGG -CACAACCCCAGAAAGCAGCTGGAGATGACGATGAAGACTCCGACTTTGATGAATTAGATG -GTAAAAAGATACCCAACCTCTCCCCCCCTCCCCTCCCCCTCACTTCGCCCCGCCACGAAT -AAGACACAAATACTTACACCTCCAGACGTCCTCGACGACTTCAACAAACCCAAACCTGCC -CCCGCAAGCCCACCCGCACCATCCACCAATCCAGACCCTTCAACCGGAATCACACCCCCA -GACTTCGACGAAGATGCCTTCCTCAAGCAACTCGAACAGGACATGGCGAACATGATGGGA -CCCAGCGGCGCAACCAGCTCAGACCCCAACTTCCAAGCAACACTCGACCAAGGCGCCGAC -GTATTCGCAAAACAGCTCGAAGAGAGCGGGATCCCACCCGGCGATTTCCTCAAGCAGCTT -CTCGCAGACGTCATGGCTGAGGAGGGCGGGTCAGGGGGACCTTCGGCGGACAGTCTTGCT -GCAGTTGCCGCCGCGGCTGGTATCCCTGCTCCTGGCACTCAGGGGTCATCTAGTGCGCCA -GCTCCAGCTGCAGGCGCGGAACAACCTGACTCGTTCAACGACGCCATTAAGCGCACCATG -GGTCGGATGAAAGAAAGTGGTGATAAAGCTACTGCCGCGGCTAGTAATGAAGATGATATC -TCCGATGATATGCTGGCGCAGTTGCTGAAGGCTGTTGAGGCGGGGGCAGCGGGGGCTGGT -GACGAGGGAGATCTGTCGAAGATGTTCATGGGTATGATGGAGCAGCTTTCTAATAAGGAG -ATGCTGTATGAGCCGATGAAGGAGTTGGATATCAAGTTTGGGCCGTGGCTCGAGAAGAAT -AAGGGTACTGGGAAGGTTTCCGCTGAGGATATGGAGCGGTTTGAGACGCAGGCGCGGGTT -GTCAAGCAGATTGTGGTCAAATTCGAGGAGAAGGGATTCTCGGATGATGATCCCAAATGT -CGGGAGTATGTGTGGGAAAGGATGCAGGAGGTGAGTTACATTGACATTTTGATCGAGATC -TTGGTGTGTTTCGTTTACTGACCATGTCCCAGATGCAAGCCGCTGGAAGCCCGCCTGAAG -AACTCGTGTCGAATCCCCTCATGGAGGACCTGGGAGGTGCGATGGGTGGAGCTGGAGGAG -TGCCTGATTGCCCGCAGCAGTGATATGATTTTGAAAATGTATGATGAAGCTCGAAAGATA -TTGCCTTGGTGTCAAGGCTTGCTTGTTTCCCTTGTTCATACATCGTCCCGTCCTGTATTG -TACATATGCGCAACTCGTGTCCATATACCCTTATTTACTTTATGTATATATCCGGATTCC -CTCGCCCAACCGAAATGTTATAAAAAACGCCGTCCAATGCAATGCCGCCAAATAGAATCA -ACACTTAACCCATTGCCCTAGCCCGCTCACGACATATCCATATCCTGAGACTGCGACTGA -TCCCCTCGAGGCTCCGACAATCCCATAAAAATATCATCGTACTCGTCATCACTAAACAAC -ACACCAGGCCCATTCCCATTCGACCGCAAATCTCCTACCCTCCCAGGAACCGCACTCTGC -GTCTCTCTCAGCGCCATCTCCATAGCTTCCTCTTGCGATATAAACTCGTCAAGCGCATCG -ATATCATCTACCAATCTTCGTCAGCAATGTGCAATCTTCCATCTAATATATCCAACCAAA -CTCAGAGAAACCAAAAAACATACCAGGAAGCATATCATCCTCATCCCCCTCAAAACCCCC -AACAACCTCCTCCCCATCCCGCGCCATACTCTCCTCCCACCGTCTCCGATCAGCAAGATG -TTCCATCCGCATCATCTGCTCACCGCGCGCCTCAAACCGTCCATCGTCACGCTCATGTCG -CACGCGATTAAGAAACATCTTACGCGTTTTTGAGCGCGCCAGACTCGCTGTATCCTTGAG -CGGATTAGCGATAGACGCATAGCGCGCTTCGTATTTCGGCTTTGGGCGCGTTGGAGAGCT -GGGCGAGGTTTGTGGCTGGGCTATGAAGAAAGGGTTGGAGAATTGGTTTGCTGTGGGTGT -TGTTGAGGCGGACGCGGACGCGTTAAAGTTCATTGCGATTGGGGAGTGGTTGTTTTGTGT -GTGGCGGGTGATTGGTTCAGTCTGGATATTGAAGATTGGAGGTGGGTTGGTGAAGAGGTT -TGGGGTTGTGCTTTGCGACGCGCTTTGGGGGTTTCGTGTCGGGGACATTGTCCATATGGG -TGGGGTTGTGGGATGTTTACTTAAAGGGGAGGAGCGAATTGGGGTGTGGGTTTGGCGGAA -GGGTGTTTGGGGGGATTGGATTGCGAACATTGTGTAGGTGTGGATTATGGGATGGAGGGT -CTGGATCCGTCGGATGCGGGAGTCGGTTGCGTCAGGTGGATTTTGGTGGTTTGGTGGGTT -TGGTGGGTTCTCTAGGAATACGTATTTTAGGTAAGGTCTGGTCTGAGAGTGAGTATGGTG -GTTTATTCTGTCGAGGTCTAAGAGATATAAAGTGATGGCAGACACGTCGTTGAGAGTCGC -GCTAAATTAGCGCGTTTTACCCCTTGCATTGGTGGTATCGGAATTTACTTCGAATACTGA -TATTCCGTCCAATTAGCAACACTTAAAACGCTTTCCAATGTGGCTATCTCAAACAATTCA -TCCAATCATCTACAGCAATGCTAGAACCCCAGCTAGAATCGCAATGCCAGCAGCAAATCC -GCTTCCCTTAACGGCCACCGCAGAATTCTTACTACCATCCTCATCATCCTCGGGATCCTC -TTCAGTGACGACTTTCAAACATGTCAGCTGAGAAGCAGTCTTGTCGACAAGACTACTGTT -GCCCTTCCCAGTGAAAACAGTGAGGACTGGGGTAATCTTATAGGCCTCGTCAATGAGAGC -CTGTGCTGTGTAGTTGTTCTACAGTATATATTAACGCCTGCACACGTCAGATATGAATAG -AACACCACTTACCAAAGCAGTGACACTCGCAATCTCCATCAGCTGATCAGACTTGGGCTG -AATAGGCCAGCAATTCGAGGAATTCTTAACCGTATTGAGATCCTCGAACGATTCAACCGT -GAAATTCCCAAGCTTGCCACCTGCACCCGCGAACCCATCGCACTCATCGAGAGAGACCTT -TTTCAGTTCCCGCTCAAGCGTCGTACAGACATTTCCACTAGCGTCTTCGGCTAACTTCGT -TGCCTTCTCTGTAAGTTTTTCGATGCATGTCTTGTCAATAACATCGGAACAGGTCCCGCT -CGCAGCACGGGTATCTGTTCCATTGTTTTTGCCTTCCAGTTTCGGTCCGTCGAACTTCTT -AGACGGGGCTCCGTGGAAGACAACAACACAGCCGGACAAATCCTTGGTCTCTACGTCCGA -GGGTGTGCCGAGGTAGTAGATCGACTTGAAAGGGCGCGTGGGCTTGTCATTGTCCTTGCC -GGCCATACCAGCTGACACGTTGACGCCTTTCACGAGGGAGAAACCCTCAAGAGACGATGG -AACGTCGGGGATGCGGGATAGTCCGATACCAAGGAAGGTATTGTCTCCCACTGTGCAGCG -GTCATCGGCACCTTGCAAGGGACATGAGACCTCTTTGCAGCCTACGAGGCTAGAGGAGAG -ATCGCCATCCTTTGCAAGGGTGGAAGAGACGAGTAGAGAGAGCCCAAGGGCGAGCGTGGA -TGATTTGAGCTGAATCATTGTTTTTGGATTGTGTAAAAGTAGGCTGGATGTAATTTTGTG -ATGTAGGGGAAACCAGTCTGATATTGGAGAAATTTCTATATCCAACAACCCGCTGGGGGC -TTCATTATACGGGTTCTTTGTGTCAACTTATGCCCTTATAAGCTAGCATGAGTCCAATGT -TTGTTGGGCACCTCAATGACATCATTTGTGATCGTTTGCATGCTTGGCTAATTGGGGCCA -TCCTTCAGGGGCGACTTGGAGGAATAGTTCGTGTCGGCTGAGCGGGGCCGTCGTGGGGTA -CCGAGGGCGTTGCTTGTCTGATTGGAGGCATTTCCTGTATGAGTCTCCAACCCATAATAT -ATCTGCATGCTTGGTTGGGGGCTGTTTTTATTGGAGTCTCCCACTTCTAGTTCGCTTGCT -TGGCTGCTTTGGGGGCGGTGAATCTTAGTGGCCCCTCTCATGGTGTGGTTACAGCGGATC -TCTTGGCTGCTTATACGACGATCCCTTGGCACTCTCTTCAGGTTTTCAACAATGTTGGAG -ACGTTGCAGACGTCGTATGACCTAAATGGAACAGCCTAGAGACTGGCAGCAAGTACCTTG -GAGATTCATGCTCTATAGAGGCAGTGGCTGATTCAGGTACACCTTCATGTGACGTATGTA -GTATTCGCTCACACTAGATAGGATGTTCTCGCTTGTTCGTCTTGTTTGCTCCAAGCCCTT -CATTAATCGGTAGGGCTAGAAAACCAGGTAATCATTTCCTCCAATTGGCAGTGATCTTCG -CGCAACTACTCGCCTTGTGGAGTGTCGGGATAACCGCATTTCTACTCCGACAACCTGACC -TTCTTTGGTGTCTTCGGTATCTCTGACTATATATATACTATCCGGGTATGTTGATTAGAC -CAATTTATACCGACAACAAGTAGATGAGCTGCCGTCAGCTACCGTCTCAAGCAAGCTCAA -TTCTCACAAACCAAACTAAAGTAGCATCTCTAGGACGAATGTATGTAGAAGGGAGTTCTC -GCCGTGACTCATGAACTTTCGCATGTAAGACATCGTCATGAACAACTTGCTTTCCGGAAC -AAAGCGGATGTTTTGAATCCTTCGTGGGGGCTTTAATCTAATTCTGCATCTCCTAATACA -ATTAAGGGCCTGGGCTATCAATTTGTTTGTGATAGAAAGCCCAATGTTGCATTAGCACCA -GGGGCGAAGTGGCAAAAGGGGCCAAACGATACGATTTCGGTTGAAGTGATGTCGATCCGA -CGATGGAATAGATGGAGACAGCCCTATAGTGCTCACTGCTGGGCATAGTGAGCTTCTAAC -GAGGAGAAGGATGAGGAGAAAGGGCAAAGAGCAGAAGCCTGATCATTACCGTGTGGCAGT -CTATATAAAGTCAGCTCACCTCACAAGATCGAACCCTCATCCATTTCCAGAACAGCCCTC -AAGTACAAACTACCTACCTCAATCCGCAAAGATGTCCGTCGTCGATGCCATCGGTGTCAT -CTCTGGTATCCTTACCATTGTCTCATTTGGAATGGACAATTTCGGCGAGGGTGAGACCTC -TGGTTCAACCATCAAAGTTGCCGTTGCTCTTGATGGCCCGAACGGGCCAACAAATGCAGG -TGGTGACATCCCTGATGTGTAAGTCAAGCAATCATACATACCAACTGAAGAGCCAAAGGC -TAACAATCAGAACTAGACGAGTGTGGAACGAGGTCGGGGATTTTGTCGGTATGAAAGCCG -ACCCCGGTACCGTAGGAAACGGAAACCTGGGCGAAGTCAAAATCAACCACGAGAACCAGG -GTGTATACTCCCTGTTCTCGGCCAATGACGACGCCATCTGCGTGGCTTGGGTAACCACCA -CATGGAGCGACGACCGCGGTGGCAACAAGTATGCCGTGTCGGGCGACTACGGCGAAGCAT -GTGGTGGTTCGTGGTTCGAGAGCCACATGTACCCCAACAGCAACGAAGACTACCAGCCCA -AGTGCTTCTGGATTGACAAAAACGGTGATCAGCCCAAGACTGGATTCCAGGTCCGCTGGC -CTGCCTTCTCAAAGGACGAGCAGGACGAACATGATACAAATCCCCAGAGAATGTGCAATG -ATATCAACTTCGGACTGCGCGAGGAGGATGATCCCAACACCATCAATTACAATGTAAAGC -GGAAGCGCGGTGGGCCTATGCGGGCTCGTCAAGCCTCGAGCCGGCCAGCGTGGGCGGCGT -CTGAGCTGGTTATCAGTGACTCCAAGTCTCACTCGGCTAAGAGACTGTGTGAATCTGATA -CATCCATGGGACCTGATTTTGCCCATACTGGAGAGGCTGCCTTTTGTGATATGGGATCCA -AGGTCCTTTATGCATTCTGCACCGAGGATGGACAGACGGACTGCTTTGATTTCGACTCTC -AAAGTGTTTCTACCGGTACAGCCACTCGTGATACTGCGGGAGGTGTTGATACTTACTCAA -GCGTTCGTGATTGGCGCTCGGCCGGCAAATCATAAGCCTATGCTGTATTTGTTCTGATAC -CAATTGTGGCGTCATATATATGTGACGGAGTTCACCTCCAGTCAATAATTTGTTTTCTTT -CTATCCTTTTATTGTATTTTGTCAATCCTATCTCGTTTTTCCCATATTGGTAATCAACAT -CTGGGTGTTCTTCCTCAACCACATCTGACCTGTATAGTCTGACCTGTATACTCTTCAGGG -CATCTGAAACAGTATTACCATACCGTACAGGTCTCACCTACCTACCTTGACACGCCTCAT -ATCAATCTGAAGAAAATCCAGTCCTCTCGCAAATCTAACCCCCATCCAACAACTCAGTAG -TAGTTGGACAAGTGTTTGATCTGCCAGATCCAGTCAACCACAAGACACCCCAATGCTCTG -CACTCCCAGCTATACGTACGGGAGCTACTTATACCTAGCTGGAGGCGCAAGGGCGATCAT -CACATTGATAATGACTGTCGTTCATTGAGGAACAACTACTTCGGTGAACGGTGCCAATCT -CCCACCTTCAAGGGCTGGGATCGTCACGTGGGCCAGTCCACGTGGATACATATTCCGGAT -CGAGATCGAATGTCGATCTTAATCTACATCGATGTACAATAGTTTGGTACGATTTGGTAC -TGTGGATTGTTTCACTGATTGGATTTATTTGCCTTGAATTACATACATACAGATCAGAAC -AGATCAAGATAGAAAGGGCCTTTTGCAGCACGTGTCTATCAACACTATACAATCTGTGAC -GTCTGATGCTGCAAAAGGTACAAGAATAGAATATTGGCCAAGGCATAATGAAGTTCGCCG -TAGGACAACACCCGGCGCCTAGATACGGGGAATCAAAAAGAGATGACAGGCAGAACGGAA -AAGTCGTGCCGTATTTTTGTCAGTCGCGCAGGGATCGAGAATCGCGATTGCGGAAAAAAA -GGAATGTGGTGGTTGCAGACAGAAAAACGCTCAAATGAATGCGCAGATGTTCAACCTATA -AAGGAAGAGACAGGGGGTGAAAAACGCCCGAAAGAAAAAAAAAAGCAATTTTGCAACCGA -ATAAACAAACGCAACCGTGAACTCGCATATCCTGTCCATAGCGCCTGAGAAAAGCATGAA -GATCGTACAAGATTTACACTTCGACAAGTGTCGAGTCAACGCAGTACACTTAGAAGTTAG -ATGCCGCGTTGACAGGACCAGGGTTGTGATACTGGTCTAGTGTGTTCTTGAAGGGGGTGT -TGCCGGCCGGGCTGGGGGCCTTCTCGCGAGGACCGGAGCCGACAGCGGTGCCGCCCATCA -GATCATCCTCATCCCCGAATTGCTCATTGCGCTTCTTGTGAATTCTCCAGAAGACTAGGC -CTACGGCACCGATAAGGATGGCACCGCCGACACCCACCACGACACCGATGATGATCTTCT -TGTCCGAAGAAGAAACACCACTGGAGCTGGAACCCTTGTCACCATTAAGAGAAGGGGCAC -TGGCGGGAGTGGAGGTGGTGGTCATCACGTTGGCGATGGTGGATCCATCGCTGGTCGCGT -AGGAGGTGATGGTCTTCACGACCGGGGTTTGGGTGGCGGTGGTCTTGGATGAAGTGGTGC -TATCGCCCTCGACACTTGTCTTAGAGGCGGGATCAGTCTTCTTAGAGGTAGTTGAAGCGA -CAGTAGGCTCCGGAGTGGCAGAGGGCTCTGCTGAGGTGCTAGAATTGGCGGTGGTGGGAT -CGGTAGTACGCTCGGAAGAGGTGGAAGAGGTCGAGCTGGTAGTAGGGTCGACTGTGGGGG -TTGGTTCCTCAGAAGTCTCCTTGGAGGTTGTGGAAGATGATGAAGTTGTGGTTTCAGGCA -CCGAGGAAGTGGAGGATGAGGTGCTTTCAGTTGTGATATCCTGCACGGGGTTGGAGGCAC -CAGCACTCTGGGCTGCGACCGCAGTCAACAACAGAGCCTGGATGAAGAACGAGAGGCTGG -ATAGACGCATGATAACGAATGTGTGTACAAAGTAACGAAAGAATTTGACCAGAAGTCAAG -AATAAAGAGAAATCAGCAGGAAAATGAAAAAGGAGTCAAAGCGAGAAAGTGAACCTTTGG -GCAGTGTTTGTCCCGAACTTTGGGTGGCGCTAATCCTAATTAGGTACTGGTCTCAGACTA -AACACAAGCGCTGAGACATCGGTCACTCTGATTGGCTCAAATTTCAATAGAAAACACGTT -AAAAATGGGTCGCCTGGACGTTTTGACCGATTGGGAAGTTTTATTTTAGATATTTTAGAA -AGGTCTTTACATTAAAGAGAATCGAATGAATAGAAAGGGAGTTTTAGAGACTCTCGAATG -AGTACATGACCTACATACTACAGACTACATATGTACATAACGATATGTGTATGAGCGAAA -TCTCTGAACATGTGGGCTGAACCCCTCGCTTGCCGCTCCATAATTAGCACTCCCATGATC -TGCCTCTGATCTGCCATTTTATTGGCATTGGCGTAAGCCGTATGACATGTTGCATCAATG -CGTACAATCCGTTGTCACAAGTCTACTCCGTACTCCGTAATGAATGGTTGACATCTTCCC -ATATTACACGGATGCTATCCCATACTAACTTAGGTACGTGGCCCCTTTCGTCTTTGAACA -AAGCCGACACAGCCGGAGTTCTAAAAATTGGAGTGCTTTTGCCCCTAACCCTCATTGGTT -CATGTGGCAACCGTCCACGGGTAGGTTTCGGGGTTCCCGCCTGTTCTGGAACATTATGGG -ATCTTACCTGTCGACCTCCAGGACTCGGGAAGACCTCGGGAATGTACATTGTAGAAGGCA -ATTGACTGGGAGAACATCACACTCGAGGAAAATATCCTAGCCAAAATGAGTGGCTGTGTA -TTGATCCCTATTCCCCAGGGGCGTTAATAGACCGGTCGACCCGGTTTGCATGAATGTTGT -AGGATGTACGGAGTAGATTTGAGGCGAATGTACAACATATGGAGCATAATAAGTTCAGTT -TAGTAAAGCATTACTCTTCACAACTAGTCAACGGAAGCCCGATGCCTACGATTTAAGTCT -TTCCCGGGATTCCACATGAAACCGTGGCTGCGTCACCTGCATGGCCACTTTCCTATATCC -GTCGCATCCGCCCACACCTTCACATCTTTGTTGATTTACTGCTAATGATGAAAAATAGTA -TTCATAATATGTAATGAGTACGCAAAATAATGGTCCATCTGGGCAGCCTTTGATAAGGTT -ATTTGAAAATCGCGAGATCATTGGGGATGTCCAACATTACCTTTGTGAGACAAAGGCCAC -TGCTGAAACGCTTATCTGGGGCGAATCAAACATTATGTAAAGAAGTTCCTTTGCCAAATC -GGGAAAGAAGCTGTTTGTTCCTATTTTGGAATATTGAAGGGAGTAACACAATGGAAGAAC -CTTTAGTATCTATGGGTTGCATGAATAGAAGGTTGAGTAGACGTTTCACTTTCGAATCTT -CCTTTGTGGTCAAAAACAAGCGGAAGGATTGACTCCTTGCAAAAGGCGACAAAGGGCAAA -TTGCCCATAGCCATGGAATGAAGCACTCAATACTTGAGTTTGACTGTTTGATCAAGGATC -TGTTTATTGTCCCAAGAATAGAGTCTCTTGTTCTCGAGGCCACTCTCGCAGGAGATGGAG -ATCTCTCTTCCTTCTTCCTTCTCTTTTGCTCAGTGGCTCTGATATAAATGTCATCTCTTC -CTGTTTGACATTCCAATTTGACATCTGACTTGCATACATCTTTCATAATAGCCAGCTACC -TTGAGCACACCCACAACACATTCATCATTCAGCGTCCAGCGTCCAGCGTTCAGCATATCC -AAATTCGACATACCAATCCCATTCCATTCTGCATACTTCATTCAATCAACCTATCAACAA -CCAAATCCCATCATCTCAAGCCTCACTAAACATGGTTGAGACACGCTATTATCCCGAGGT -CATCACACCCTCGCTTCCCAACCTTGATATGCAAGGCGAAAGCCGATTCGAGAACTACGA -TTTCGCAAACATCACACATATTGTCGTCCAGTAAGTACCATTCCAATTGATGAGCACTCT -ACCCAAGCTAGTGAAGATGATTAACGGGAACTAGCCCATCTGATACAATGATGAAGAACA -TCAATGCCGGTGCCAAGGCCAATACCTCCTACAATTACGACAACAAAGATTATTTGAAGA -TCATGGGAAAGATCCTTTCTGTGAAATTCTTTGGCAACGACTACCAGCTAGAATGGCTAG -ATTCCCAGGGTGTTGGCAGACACACATTCATTGCATTCACCAATACCGAAAAGAAATGGG -AACGTGTTCAAAATCAGACCCCGAGTGACGAGAAGCTGGAAGTGGTTGAGGTGATGTTCA -TGCCAACCAACCCTCGGGAAGTACGAAAGGCCTTCTGGAGGCCCGCTAGAGATTTTGTCT -TCGGAAGGGTTCAGGCCAGAGTGAAGGAGAGTTCGCTTGAGATGATCCGGCTTCGAGTGA -TGTGGACGCGATTGACTGTGTCTCCCCGGAAGTTGCAGAAACTGAGAAAGGGTTCAGCCT -CTCCAGAATCGGCTTGCCTTGATATTTAGGAGTCAGACTAGACTAGCGAGAGTGACTCGA -AGTGATAATTAATGATAATTGTACTCTTCCTGATCGCGTACTGCACCGAGCTAATAATAA -TTTATTGTATAGCGTAGGCTTAGCAATAATACAAATTGATGGCAATTTGACGGGCCTTGA -TTGCGCCTGAAGCCAAGTTAGATCCATGGTAACAATCCTCTGGCCCACCCTCGAGAGCAA -TCCCGAAACTCGATATATATGCCCCAGTTATTGAATCCCAGCCGGACTGTATATATATAA -AGAGCTGGGAATAGTAGAAGTGGCAAGTTGTTGATTCAAATCAAGTTCAATCTCCGGATG -AAACAGCGGATACATCCTAGGGAGTGTGAAAATGTATCCCACTGTATACCCGGTGTCGAC -ATAAGCGCCAAAACGATGACAGAAAGCAAGAATCCAGATCTGCGAAATGCAAAAACGATA -GCATGTGCTATGTGTCATATTCAATGTGGGTATCATCAGTTTCCTTATCAAACAAGCGCC -AGTGTATCAAAATTTATTCCGCGGGTTTCCCTTCATCTGGTCCTTCAGCTCTTCCTTGAA -ATGGCTAAGGAATTTTTTGTACTCGAAATCTAACAAAAAGATGTCAGACATAGTACTTCC -CCGAATTTTGGTACGAAGCATACCTTGTCCCTCGAGTTTGGTGCCACCCTGCTCCTCCGG -AGGCACCCCGTACCTGATATGTTGGACTTTCTGGTGAACTCGGCCCACCATACGGTGAAA -GATCGGACTTCGAAGGAGCTGTGTGAATTATTAGAATGCAAACTCAAGCGCAAGCATGCT -AGGGTCGGAGCAACGTACTCGAACGGTGAGCCATATCTAATATGGAACAATGTTAGTGAA -TATCAGAGTGGCGATAGCAAAAGCACTTACCTCCATACTTCGGAGCAAGAGATTCAAAAC -CATCTTGCAGTTCTATAATACTTGACCATGAGATTTCAAGAATAAGTTGATATTGATTGA -GCAAGTCGAAGATTGAGATGGAAGCCCAGTCTTGGGGTCTCGATCGCCAGAAGATTTCTT -AGTCATCCTCCCCGCGGGCCCCGCGAGAGAAGCTTTCTGAATCATCCCGAGCTCGTGGAG -CATGACGGATTTAATTATTTTACAACTGACGGTTCTCGACAAAGTCACCCCTTAAACGAC -TGGAAATGGTCGTGTGGCTTTGTCAATATCCCGTCTTCGAATCCTGACCATAACCAACGA -TGGCGAGTGGCGCCAAACAAAATTCACAAGTGACTCGGTTCTTCACTGTCCAGTTAAGGA -ACAACGTTTCTCTCACAACACCCAGATAATGACCAAAGGTTGCTATACCTGTCGACGACG -TCGCATCATCTGCGACAATGGTCAACCCACCTGTCGGAAGTGTCGAGATGCGGGGAAAGA -ATGTCTGGGGTACCAAAAGCCACTTGTTTGGGTCAAGGGTGGTGTGGCTAGTCGAGGAAA -GATGATGGGCCGCAGCTTTGATGACGTTGAAACACCGATATCGTCTAGCAAGCCAAAGCG -CCAGTCTGCTACCGGGACGGCCAGCAAATATAATTCGCCTGCGACGAGCAGCGGATTTGG -CTTTTTCCCTATCGCCGAATCCCCAACCGATGCGGAATCGCACAGCTCTGGGAGCCAAGC -TAGTCCGGAGACCGATCCATGGGCTTTTGACATGGAAGCACCAATATTTGGAGGGGAAAT -AGCACAATGGAGGGCCAATTCGACCGAGGAACAGGACACCGCGGTCATTTATCTTCCTCG -GAACACTCCTCCTGCCGACTATACTCCAACTCCATGGGGACTGGTAGATCCACTGCTCAA -AGATCTCAGTCATTTCTCAAGGTACTATGTACATCACTGTGAGACTACCGCAATGAACTA -TCCGTGGCCCTGCTGACAATCACTCGTAAGACAATCAATATATGGTCAATGACTTCGCGC -TTTACTCCCAACACAAAAATCCCTTCCGTGATCTTACTGCGCTGGTCAATCATTCTCCTG -TTCTTGCTAGCAGCATCACTGCTCTTGGAGCTCTTCATTACTCACTTGTATCAGAATCGG -ACTCATCTGTCCTACCATGGTCATCCGCCAACCTATCCATGGCCGACTCGGACTTATCAG -TAGAGGATATTGAAGATATCGTTGCGCCAGCAAGTTCTCGAAAACCAACTTCGCAGGCAT -ATCATCATTTCTTAGAGTACAAGCAGCGCGCCCTTCGTCAGTTGTCTATGGATCTCAATA -ACCCAGCAATGCAAAGAGACGGCAGAACTATAGCTGCTATTGTCTTACTTGCCTTCCTAG -ACATCTTCGAATCTGGCAGCGGCGCCTGGAGTTATCATATTGAGGGCGCAAAGAAGCTTC -TCAAAGACCGACCTAAAAATGGTCCGGGACAAGGGATCCTTGATGACCTGGACGCTTTTG -CTCTTGACGGGTGCTTAATGTAAGATTTTGGTCTCTACGTATAGCGAAAAAGCTCTCGCT -GACCATTACAGAATGGAAATCATGGGATCAACACTAGCTCGCCCAGGCGCGCTCTCAAAG -CCCTTCTACTCCTCATCCATGGACCCAGCGATCCTCAAACGCCTCGAAGAAAACTCTTGG -GTCGGATGTCCCGCATATCTCCTAGAAGTAATCTTCTTCGTCCACGCCCTCTGGTACCCA -GACCCCGAAGTCACTGTCATTACCCCCCAACCAACAGCGCTGCCAAATCCAATCCAACCC -GGTCAGCCTCTCACCCTGGACGCATTTGCAAGCCTCCTCCAAGGAATCCGCAACTTCGAC -CCAATAGCCTGGAGCCAAGAAATGCAAAATGTCTTCTTCCTGCCAAATCTCAACTACCGT -CTCGCACTCGCAACTTCCTACCAAGACGCCGTCTACCTCTACACAAGCCGCGTCCTCTCA -CGTACCAGAAAAGGCTTCTCACCCCCCTGGACAGACGTCGGCCTCCCGCTGGACCACAGA -CTCATCGCCACAAACCTCATCACGCAAATCTGCCTTATCCCGCCCTCAGACTCCCACTTC -AAATGCCTCATCTGGCCCACTTTCATCGCTGGCGCGGAGTGTCGTCCTTCGCAGCGCGCT -CTTATCCTTGAAAAGCTCCGCTCGCTCTATGAGGCCCTGACGAGTGTTAATGTCCGTAAT -GCTGCGTGGGTCCTGCGTCTCATGTGGCAGAAACAGGATCTCAAGCGCCGTGATCAGAAC -GTTTCTGTTGAGCAGGATAATGATAATGATCACGATATTGAGTTTGACTGGATTGAGGAG -CTGGATCATTCGCGCCTCGACTGGTTGTTCATTTAATTTGAAAATAATGGAGATTGAATC -TCACCTTTATGAGTAATGTTCACATGTACCTACCTATATATATACCTGTCAATTAGTACA -TTTTGAGATCACCTTGCTTTGGATACGCGGTATTTCATTATGGAATCCAACATATGTAAC -GATTGTGGGGAGACGGTAGAATAATGATTTGCGAGTAAGAAAATCGGGTATATCAATTCT -CGAGCTAAAACAGAGGAAGAGGAAGAGCCAAAGCAGGAAAACAGGGAATTGTGTACTGAG -AGGAGAGCCAAGGGGTGAAGCAAGTAGAAAAAATCACAATGAAAATGAGGTCAAATCAAG -ATGCCACTGTAAGGAAGCAGGGAAATCGACGTAGACAATGAACCCAACGAGAAAGGTAAT -GATATCCAAGGAAGGTACTCTTTTGTCCAAATTGATACTAAAAACACCGAGTGATATAAG -GGAAACATTGTGAGGGAAATGGGGACGCAGTATAAACAGCCTTGAGTTCATATTTCTCAA -GGGTAATCAACACGAGTCAGTCCCGAGAAAATCTGATATCCCACCAGGAATAGGGAACAA -AAATGAAAAGAGGACAGAATAAAATCTCGATGTTGTGATGCTTCAGGCCAAATCAGGGAT -ATATAACAGCGGACGACATAGCAAACGACAAGGAAAAGCGGTATTCTGAATAGGCTATCA -GGAAATGAAAGAACAAAGTATGCACAGGCTCCCTTCCAAATCTAAATGCAAGAAACATGT -TTCACCAAAAACATAAACAAACAGATAAACACGCTTGTCATTGGAATATGCAAAAGGTAG -GAGAACCGATATGCTGGGATAACACCCCATCTTTGTGCGAATGCCTGTGCATGAAAGGGA -AAGTAAGAAAGAGAGCCGGGGAGATGGAGGCTGTGTTCTGGAGTCGAATGATGAGATGGC -AAGGTGCGAGGCAAGGCGGCAGGTAGTATAGGAAAGTCGGTTTGATACCAACGACAATGA -GAGGGAACCTAGCATGGGTGAATAGATAGACTGTAAGATGAACAGGTCTTCTCCAATGCC -ACTAGAACACTGCAGCGAGGCCATTTTTCGAATGACGCTTGCTGAATCGTTTTCTTCCTT -TCGTTGACTCGGTGTCGCCGGTCTGCTGTTTCACCAGGTGATTCCGACCCGACGATGGTG -GGCCTCTATCGTCTTTCTGCATGTCGGAGGTGCGGTCTAGTGTATTGGCGGGGCTGGGGA -AACCAGAGGAAAGGTCGCGGAATGCATCGTCGGAGTAGCCTGCGCTTTGTTGCTGTGATG -CCATCTCCCGGGACCTGTCCAATGCGTCCAACCACTCTTGTACCTGGACCGCTGTGGCGC -CGATGTTGCTTGATGATGGCTTGGGATCCTCGAAGACGTAATGTTTATCTCTCTGTAAAT -TTTTGTTAGATAGGTCCTTTTGAACGAGAAAGGATGAACTACATACCGTGTCCACAATCC -AGGATGAGACTCCTTTGGAATCTGTCGTACTTCTGTAACTTGTCCCTGGAGCAAGCAGGG -AGATCTCAAGCTTGATCTTCTTATCATCGCCGCCCGATGCGGCTAGAATCACCCGTGCAG -ATGATGTGACCATCACCAAGCGCTGTCGTTTCTTTGTGGTGCTTCCTGAGAAGAAGCGAG -AGAACTTCCTGGGTGCCTCAATCTCACCACTGCTGTTCTTGGATGTCGGGCTGTGTGAGG -CAGCAGAAGACAGGACAACCAAATTACCCAATTTGAGGATCCGCTCGTTGGATTTGCTTA -GTACCGGTGACCATTCGATATCGAGCTGACTCGGTGGGGGTAGCTCGGTTACCACCCGTG -GAAGGTGGCGGGAACTGGCACTTGGTGCGTTTGATGGAGCAGTGGAGATGACTGGAGGCA -GGCTCTCCCCGTTTTTACTGTCATTGAGCTTGATAGGGTCGCGTGCAGGGGGTACATAGG -CCTTCAAGCGGGGTGCCTTCTGCTTCCATAGATCTGGACCCCATGTGATGCCTTCAAAGA -ATTCATGATTTTTGATGTGTTCGATCTGAAGACGCCGAGCAGGCTCTAGAACGAGAAGAC -GTTCGACAAGATCACGCGCCACGGTAGGAAATCCAATTGGGAACTCGTAGTCGAGTGCCA -CAATTTTCTGGAAGGTTTGGTACTCGTTTCCAGCCTTGAATGGTGGACGACCGGCCAAGA -GCTGGTATATAATGCAGCCAAAAGCCCAAAGGTCGCTAGCTTTGCATGCATTTTTGTCTG -TCAACAGCTCGGGGCTGACATACTCTGCTGTGCCGACGAAAGAGCTAGCCCGCTCTTCCT -GTGTAAGGTCGGCCGAGTCGAGAGGAAGAATGCCATTTGAGCTAGGATCAATTCGTGGGG -TCTTGAGAATCTTTGCCGTTCCAAAATCAGTGATCTTGACATACATACCGCTGTCAAGTA -GCACATTTTCCGGCTTCAAGTCTCGGTGGATGACACCGCGCTTGTGCATATAGTCTATCG -TGTCAAGAATCTGAGCGCCGTAGAATCTTGTACACTCTTCGTCAAACGTGGTCATCCGTT -TGAGGACACCGAGCAATTCTCCTCCTTTGCATAAGTCAAGCACGAAATAGAGTGACTGCT -CATCCTGGAAGGTGTAATATAGCCGCACAATACCCGGGTGGTCGGTAAGGCGGTTGAGTG -TATCCTTCTCAATGTTCACGTATTTAACTTTCCTCTCCTTGATAATGTGTCGTTTGTCAA -GGATCTTAATCGCATATTCCTTCAAAGTCTGTCGGTCCGTTGCAAAGACGACCGTACTGT -AGGAGCCTTCGCCGAGAGTATTTCCAAATGAGAAATCTCGAACGCCCTTCTTGATATAGC -GGGAGACAGTTTGGCCATTGGCATCAACCTCCTGTCTTACAGAGACTGCGGCGCCACGCT -CTTGCCATTCCTCACTACTCGCCAGGGGGCCATTTTCTGCCATATAGGACGTTCGCGGCA -TCTGAGACGCGCCGCCAGTTCTTGCCTGTGCTCGAGACGACTGTCGCATCACCGCCGGGC -CGTTCCCCGGGATCTGCGCGGTACGGTAGGATCGGTCACTATGGGAGTTGTCGCGCGCCA -AAACACCGGCATGTAATGCCGACGATCCTCCTCTTGCGTCTAACCGCGCTTGAGACACAG -ATGTGGATGAAGCACCGGAATCGGTTCGAATGCGATATGTATTATCACGGGGCAGGACCG -ACGTTGCCGAGCCATTCACGTATACTGAGGAAAGGGGCCGGTTGGGATTCGGAGGTGATG -AGAGGGGCGGTGCCGAGTACGGCTGGTATGGTAGCGGTGGTATTTCGGGGGACGTTTGAG -GGTATGAAGTCTGGCGATCGTCTGTTACAGGGACTTCGGTTGGTGCAGGAGGGAAAGTAG -AGGGGTGTTCGTCATGTGCGGGATTGTGTTGTGTTGATACCGAACCGTCCGGCAATCCGA -GCGGACTGGAGCCTGTTGATCCAGCGGTCGCATTCTCGGTTGGGGGAGGCGAATCGTCCG -GATTGGCTATCCGAAGCCCACCTAGAGCCTGGGACAGGCTGAGATCCCCGTTCATGGCGG -AGATTTTGGAGGTTTTGCGGCGACCGGAGATATGCCGGAGTAGTGTTCAGGGGAAGTGAG -GTACAAAGGTCAACGAAAGACAGGCTATGATCGGAAACGCAGCGGATGCATGATCCGAGG -AAGAGTTGGGGGAAAAGAGAGAGGTGTGAGAGTGAGAGTCGACGTTGGAGAGGGGGAGAA -GGATCAACAGGAGGCTCCGTAGAAATTCGGTCTAGTGTCGAGGGGCTGAGCCTGACTTTG -ACTATTAATATTTATTCTCAAGGCTTCGAAGATTCCGAGAGAGACAGAGATAGGGAGATT -TTGAAAAGAGAAatatataaatatatatatatatGTTATTCTAATCCCTTGGAGTGAGAT -CAAAATGGGGGTTGATTTGTCCAGTTTCGAGCTGATAGTTGACAGCACACAGCATTGGCA -TATAAACAATATATTTACACAAATTTCCTATAGAAATCCAGGATTATCCGGCATTGCAAT -ATCTATTCTCAGCATGCCCTGGTCGAGGTTTCCTAAATTGCGAACGCGGTTTCCTCTCTA -AGACACAATGTTTCTATATACGGAATCTGCTCTGTGCAACTGGTAATTCAACTTTGAAGA -CATGATCAGACGGCCCCCAACTATCATTGCTCTAGACGAAAATGAGGTTGAATCTCACAT -CCAGCGGATCTATACCCGTCATACCCTAGCTGTCAAATTCGAGCAGCTGCAGTTAGACGA -GAGCTGCGATGAGATGGACCTATCCCCTAGCTCCTGTGTCTCTTCGGAATCCGATGTGGA -CATTGATACCACAGGCCTGAGCCAAGAAGCATCCTCGTGTTGTGAGCCTACTCCAAGGAA -CAGAAACTATGCTTCAAGAGATGCAATTTCACACACTCCCATGACAAAGTCCTGCCTCAA -ATCCCCAACCTCAAGCCAACAGTCCTCGTTACAAGTGACAATTTTCACTGGAGTACATTC -GCAAGCAGATCGGTCTGTTGACCCACCGCGACTGAAGGTGAAATTTGCTCTCTCACCGCA -GGACCTTGAACTCCATAGCGATAAGGCCCCTCCTGAAGAATCAGGTATGTCAACACTCCC -GAATAAACATGAACCCCACCTAACTGTCCCATCCCCGGGCAGATCGAGATCATCGAGAAA -ACCACCAGCCAAGCCACACTGACGCTATATATACATCCATGTCTATTGACTCGCCAGCTT -CCACGCCACAGGTTTCATTCTCGTCGTGTGGTGTGTCGTCAAGTCCGGCCACCGCAAGCG -TCCCACTCCGGGACCTAGCTTTAGCGTCCACGGTGTCTTACGAGGAAGTCTTGGCCGAGG -GGGCAGGTTCGCCTGCTAGTCCGGCCAATAACATTGGCGAAGATTGTGACTTTCACACCG -AAATCGTCATTTAAGAGCTCTATCAAGAAATCATGGATAGAGGTTTGATGATAGGGGCTG -CGTTCTAGCGGGTTGGCCCTTACGAGTCCACCATGAGCTGTCCCAGGTCCGTTTTCTATT -GATACCAGTGGCTCTGTATATTTATGGTATGATGTCTCTCGCCCCAAGACCCCATGATCA -TTCAGGCAACAGATGAAGTATGCAATATTTAATCCCCAATATGAGATATCGAAACAGCCT -GACGCAGAAGTAGTTCTAGGCCTACGAGAAATGTCAAAAATAACCCCTCCAAGCCTTGGT -CTAGTTTGCAATGTCTCACGTTAAAATGCAAAAGAAGCAATCTAGAATGTCAGTAAACAC -AAACCATCTGAAACAACGATATCGGCAACCCAAATCTATCAATAGTTCAGTGGAGATGAA -CCACAAACTTGTCTGCCTCCAGATCTGACGCGGCCTCGGCAGTGCCCTTATAACCGTTGA -CACATCTCACCATCGCTAAAACTTCTGCCTACGTCACTCTGCATTGCTTACTTTGCTTAC -TCGGCTTCTTTGCTTTTTGTTATCTGCAGGACATGCCGTTTACATTGTACATGTTCGATC -CTCGATACGACTGGTTCACGGTTGGCCCGCAATCTCTGGGGGATCCACTACGCCACCGCG -TAGAATAACCTGTGTCGGGAAATTTCCCTGTGTTTGATGGCATTAGGAGTGTTGGGTACG -GAGTATGGAGTGGACTATTCTTGAACCCAGGGCTCTGCGATTCAGGGGATTTTGGGTTCT -GGGTTTATGAATGGACATGGCTGTGTGTACAAATCAGTGATTGTCTGGTTGGTGATCTGC -ATTGTGGCGAGTGTGACTGGGGGGCGACGGCATGGTGGGGGCGTAGAGGTTGAATGGGAA -ATGCCACGAAAGGATCTGTCCATTAGGGTGCAATTTGCTTGTCCCTCATAGCTTGACTGG -ATTTTTGGAATTGCAGCCTGAAGGCCTGGTGTGGCACGGGTCACGTGGAAATCACGTTGA -TCATTTTGTGAGAGGGCCAGTGCATTTTACTTGAAAAGGTAAGAGAATCATCGGCCAATT -GAATGAATTAGAAATGAGCAATTGCACATGTCAAGGCAGTCTAATAAATTGTATCAAAAG -CAAGTCCCGTCGGCTAATAGAAGAACAAGGGAATAGGTAGATGATGAGGTGAAGAGGGTT -AGATGGTGTTAAGTAACAAGATGGAACGCCAATTGATCGTCTTCCAGATGCCGCTTCAAA -TGCTCTCTCGTGTATAATTTTTCTCCATGACGGTGTGAGCTCGTTTCAATCGCATCAAAA -AAGGGGAGAATAAAAAAAAATAGAACCCTTCACTTCAGACAAGAATGGTGGGACAAAATA -AAAAGGAAAAAGGGGAATTATCTACTTCTTGGCGAAAGCCTGGGCACCATCGACCAAGCG -GCGCTCGGTGTTCTGGCGACCGTTGACGAAGTCGGGGTAGTTGAGGGTCAACTCAACAGC -ACCGTTGACAATGTTGTCAACAGCCTCAACACCGAGCATGAAGGAGTGATCCTGGTTACC -GACCTCATAGCGCCAGCTGCCGAAGCGGCCACGGGACCAGATACCCTTCTCCTGGAGCTT -GGGGAGAATCTGGGTGAGGGCACCCTCACGCTCGAGGGTAGGGGTGGGATAACCGTGGTC -GAAGCGGCGGTGGTAGGTGGAGACAATCTCGTCACCGGGCTGGAGCATCTCGGTGTTGAC -GAGACCCTTGATAGAATCGGCAAGGAGGGTCTCGTGGTTGACGGGCTTCATAGAAGACTC -GGAAACCTCCAACATGACGGACCAGTAGGGACCATCCTGAGCATCGGTGTTCTTGGGCTT -GGAGCCGTCAGCGAGCTGAATGGTGGGCAGCTTCTTAGAGGCCTCGGGCTGGTTGTGGGG -GGAGTAGTTGGAGAAGATGGTGGCACGGTAGAATGGGCAGTCATTCTCGGGGAAGTACAG -CCAGCACTTGTCACCGATGCGGTCGGGACGGGCACCACGAATACCAACACCGATAACGTG -GGTGGAGGAGTAGAAGAGCTGCTTGGTGAGGGGCATGAGCTCGGTGTCGTTCATAGCCTC -GGCGAGGTAGTCGACGGCCATGGTGGAGACCAGCTTTCCGTAACCAATGGTGGTGCCATC -CGCGAGGGTGACGGTCTTGTTGTAGGCGTTGACCTTCTCGACCTTGTTCTTGGGGCCGTA -GCGGGTGTTCTCCTTGGGAAGAGTGTTGGCAACAGCAATCCAGATACCACCAGTACCATC -GCGAGCGGGGAAACGGAAGGTAGCGTTGGGACCCCAGTTACCGGCGGTCTTGTTAAGGAT -GACGTTGGTGGTCACAGCCTTGACGTTGGGGGCAGCGACACGCTCACCGAGCCAAGCGCA -TTGCATCTAATGATAATTGGTTAGTATTGTTTCAAGTCTTGGGAAAAAAGAAGTCAAAAC -TTACCTTGGTGGTGGGAACGGCCCAAACCTTGTAGTTGTAGGGTCTCATGAAGAGGTCGG -CAATACCAACACCCATCATGCGGACAATCCACTCATCGAAATCCTTGGGCTTGGTGTTGG -AGACACGGGCCTCGAGAGCAGCATCAATCATGCCGTCAATGCACTTCACCTGCTCCTCCT -TGGGCAGCATGGAAATGTTGTTTTGGAAGGGGTAGGGAACCCATTGCTCCTGGCAGCGAA -CGTAGGAGATACGCTCGTGGGTGTACCAATCCTCCTCCTTAGGGAGGGCCTCGTTGATGC -AGTCATCGAAGTACTTGTAGTGGGAGAAGATAACGTGACCGCCGACATCGTAGAGCTGTG -AATAAGTTAACGGTGTGGACATTGACAATTGAGATCGAGTCGCAACTTACGAAACCTTCG -GGGGTAACATCAGTGGAAGCCAAACCACCAGGGATCTCGTTGGAGTCGATAATCATCCAT -GAGGGGCCGTTCTGTATAATCCATTAGTTTTTGTTCATCCCCGGATACCCCTCCCCCTTT -TTGTTGGTTTTTTTTTGTGCCCCTCTCCGACACAGTAAACATTCAGAAAAAGACTTACAA -TCTGGTTCAAACGCTTGGCGGCACCCAAGCCAGTTGGGCCAGCACCAATGACGAGAACGT -CAACAGAACTGTGGAAGCAATTGTTAAGACGTGACGAATGTTGGGGTGTGGGCGAGACCT -TACATGTCGGGGTGGGTCATTCTGCCTTGTAGGAGGTCCTGAAAGCTTGGGACTCGTTTG -AATGTCTTACGGGCGACGTTCAGCATAGAAGCACAGCTGCAAGAATCTGCAGAAGATCAA -GAGGGAATGGAGGGATAATAAAGAAGGAAGAACCAAAGAAGAAGGGGAAGTGGGAGGACA -ACGTGACAAGCCAAGCTATCTAGACGCATTAACGGATCCGGTGGAGACAGACGTGCCTGT -TTCAGCTTTACTGATTCGTGACATTTCTCTTTATTTGTTTTCAATTTTTAAGATGATCCT -TTTTTTTTTTTGGGAATTAATTTCTTTTTATTTTAATAAACCCTTAAACTAGGTCATCCT -ATCATATTGACAGAGCTCAATGACAATTACAAGTACTCAGGGGGAGGCAGTGCTTCAGGG -TTCAATTTTAAGAGTACGGAGGATGCAGCACGGAGTAAAAGAACGCGTAAGGTAATCTCA -TCACACTGTAGTCATCCTAGTTTAAATCAAACAGATCACATTCAGCGTAAAGCCGTTATT -GGTTTTGAGCTTATCAAGACACCAAAAGCAGGGACCTGGAGTGCCTAAAATCTGCATTCA -CCTTTTGCATGCGGGTGAGTTGATATTGGAATTCCCTTGGCGTGTATTGGATAACATTCA -GGGTCGCCTGGAATGCCCGTTATCATGTCCCATCTCGGGACATTATTCTAGAAGAACCAA -AGAGGCGGAAAAAAAAAGGGGCAATATACAGATAGGGGCAACCATTGAATCTCGATCCAA -TATGGTCAGGCTAAAAACGCCACCGATGTAGGCGTTGGCACGTTGGTTATTGGTTAGGGT -TAAATTAGTACATGGTACTGTTTGAATACGGAGTAGTTTGTATGGACCTCCATTGCATAA -ATTCTGGAACAAATTAGACTATCTTCTACTGATATCTTAGAGATGGTGATCTATGTTTCA -TCTACATCGATTTTCTTGCACACTACTCCATGCATAGAGTATTGTGTATCATATGCACCA -TGTCGTCAATATAATAAGATAATCTTAGAGGAAAATACCCTGAAGCTTCCCGGAGTAACA -TCCGCAGAGTTGATCGTTCCATCTGGCCTCAAGGAGAGGAGATTTGGCGAACCCTCAATG -TACGGAGTACAGGTAGCTGGTTCCACTGATTGTCACTCCCAAGGAAGTAATCCGGCGAGG -ATCTCTTCACAACCAATCTGTATGTTGCCATTGATCCGCCCCTTGAAGTAAAAACTTCTC -GAATCAACAAAGAACCTCAACTCTTGAAACACCAACTCCATGAATCCCAAAAGCTTAGGG -GTCTCCTCCCTGGTCCAAGCACAAAGTACTTGTGGGGAGGGAAGTCCCCTGATTGCGTCA -GACACCTTTGGAAGGCGGATCGCCGCATTCTCCACCCGTAAGGGACCGAATGGGGGACCC -AGGTCACTGAACGCGGGCTCTGGTGGGTCTTGGCTAATAGATGCTTGCGTAATGCCTTAA -TTTGGATCTGCACTCTTCCGGTTTTCTTCAATTTTGGATTTGACGCATTCTGAAATGTTC -CCGATAGTATTTGACATACCTAGAGATATTGAGTTAGGTCAAAATTGGAATATGGTTGAC -ATTTAAGTATAAGAAAGTGATGACAATCCTGGAGTGGCCCGGGGGGATCCACCAGGTTAG -GTAATCGGGAATATCTATAGGGAAAAAAAAAAAAGGTAATGCCCATTTTGGTCATTTCCC -AAAGTTGGCCAGACTTTAAAATTAAGAGAGCTAGTAAGGTACCAACGGTATTGGTACTGG -TATAACCAGTATCCATATGAAAAGTATTACATAACAAGGGTGGATTCACTACGGAGGAGA -TGATCAACAAAGAGTACAGGTGGCCTTTGCAAATATGTAACGGTAATTCACAAACTTTCA -TTACCATGTATATTATTGCATTCGTACACCTTCAAGACTTGCTGTTTGCGGGCAAGTGGA -TCTATTCTGTCCCTGTGTCTATGCTGTTTTGGTTATCTGGTTCTTGGTCTATTTTCGACT -CTCATTTTCTTTATCACTTGGTTTCCCTCTCTCTGTTTCCCTTGCAAATCCCCGAGTGTT -TGGTGCCGAAAAAAAAATTACTCCCGAAGTACGTACCTCATTCCATCCCTTCAACCTTCA -TCATCATTTCTTCATCCACACTCGTTTTCTATTTATTGCGATTCTTATGGGCGCACTCGT -TAATCAGAGCATTTTAGTGATCATTGCCTTCTAATCTCCATTATCTTGTCCACTTTTCTC -CGTGGCTTCCTCCGCATTTAGTCAAGGAAGCCTCTCCCGGCCACGACCTTTTGAATTTAA -GCGCTTTTTATCCGATCAATCTAAAACCTTTACAATGAGCGACGGCGAGAAGGTCCGCGC -CTCTGGCGAGACTCCCCGGGAGCAGACTCTGCCTACTGTCAACCCCGCAGCGGAGAAGGG -AGAGCAGGCCGGCATTACATTCCACCCCGCGGTTTATGTGACGTGAGTAAAATCAAAAGG -AAAGAGGTCGGACAGAATTGCTGACATCCTGACAGCACATGGATCACTCTGAGTTCCACT -GTTATCCTGTTCAACAAGTACCTCCTTGACTATGCCAACTTCCGTAAGCCAGGTTGAAGA -TGCACCGTGAAGGAAAATGGTACTAACAATATAACTAGGTTTCCCCATCATCCTCACAAC -ATGGCACTTGACCTTTGCTACTATTATGACCCAAATCCTCGCTCGCACCACCAATGTCCT -CGACGGCCGCAAGAAGGTCAAGATGACCGGCCGAGTCTACCTCCGCGCCATTGTTCCTAT -TGGTATAATGTTTAGTCTGAGCTTGATTTGTGGCAACATGACCTACCTCTACCTCAGTGT -TGCCTTCATCCAAATGCTCAAGGCCACCACCCCCGTGGCTGTCCTCCTCGCAACCTGGGG -CATGGGGATGGCCCCGGCCAACATGAAGGTTTTGGCCAATGTCTCCATCATTGTGGTTGG -TGTTGTCATTGCCTCCTTCGGTGAGATCAAGTTCAACATGGTCGGTTTCTTGTTCCAGAT -TGGTGGTATCATCTTCGAGGCCACCCGCCTGGTCATGGTGCAGGGTCTGCTCAGCTCCGC -CGACTTCAAGATGGACCCCATGGTTTCTTTGTACTACTTCGCTCCCATCTGTGCTGTTAT -GAACGGTGCCGTTGCTCTGTTCTTGGAATTCCCCCACGTCACCATGGACCATGTTTACTC -TGTTGGTATTTGGGTTCTGGTTCTCAATGCTGTTGTGGCCTTCCTGCTTAATGTCTCGGT -CGTTTTCCTGATTGGCAAGACCTCTTCCTTGGTCATGACCCTGTGTGGTGTTCTCAAAGA -TATCCTGTTGGTCGCTGCCTCCATGTTCTTGTGGCAAACTCCCGTCACTGGGCTCCAATT -CTTCGGTTACTCCATTGCTCTGATGGGTCTTGTCTGGTACAAGCTTGGCGGTGACAAGAT -GCGCGAGTACACTTCTTCCGCTGGCCGTGCCTGGGCTGAGTATGGCAACAACCGCCCTGC -GCAGCGCAAGTTTGTCGTCTTCGGTGCTGCTGCTCTTATGTTCTTCCTCTTCATTGGCAC -TATGGCTCCCAGCTATGCCAATAACTCCGTTGACCGTGTCAAGGGATATCTTGGTGGTGC -CACCGCTGGTAATGCTTAATTGCACTTATCAGACCCTTGAATTTCCAGCTCTATAAATTG -GAACGTTCAAAGCTCGGCTCACTTTCGAAGCCACGATACCCCTTCTTGGATAGAGTCAGC -CGAAATGAGGGCGCGAGTGGATAAGTGCAGGCATGTAGCGGTATCCGTTTATCTCCGTCT -CTCGGTAACACAGAGACTGCCGCTTATGCTTTCAATATTGTCTTCATAGGCCCGTTGCGG -TGCAATGGGCAACGGAGGCGTTTTGCTTCTGGCTGTTTTATTACCAATCTTTTTTTTCAG -CCCGATTTCTGCGATGTATACAGTGACACGAAAGTGTTGTTACTGGTGGTGTACTATCTT -GTTTTTTTCTACGAACATTGTTGATGTTAGACTTGCACGAGCTCTGGTGTCGGTTTGCCT -TGGATTTGATCTTGTGGTTTCTTACGTTCTTTTTTTTTTTCACATTCGTGTCCTTATAGT -AACCCAATAGACTTGAAATTATGACTTCATATCATGCACGTGATCTGCCATGTACATATG -TACTTACACGTGATATGTGAGCCTCACGTGATGTGATCCTCACGGGCCACCAGCCCTGCC -TTATAGGCCAATGGTCTCTAAGCTGGATGGGTCCAGCTCGACTCCACCCGAGGCGTCTTG -GCTGCATGAGCCAATGGTGCCCGTTAATAGCGCATGTATTGATACCGTCACCGGCGATCG -GATAGCTGCCTTTTGCTGGTATATTACCGTTCGACTTTGTAACTCCGCATTTCGCGCATC -GATTCATCCACTTCTTCAGATCGTTCTTTCCTGTCTCACActcttacccttcctacttcg -ctctacttctgttcttccccccatttttcttttttcccttcccaattctctctcttctct -tTCAATCCCACCTTTCTTCCCTGTTACTATATCTTTTTCTTCCTTTGAGACACCTTCCAC -CTTTTACCTCTCGGTCTTACTTCCGTCTCTCTCCAGGTGGTTGAAGCTTCTTGTGTCTTG -CTCACCTAATCTAACAGAACAGTTCCGCTTGTCTTAAATCTAAGACTACCTCGAGGCAAT -AATGTATCCAATGATGCTTAATCGCCTCTCGGACGGTCCGGCCACGGTTCTCTCTCCTCC -CCAGGAACACGCCTTCGCTCAGTTCTCCTCTCAGTCCTCTGTACGCGACCCCCCGGGAAG -CTTCAACGGCTACCCGGAAAGTCCAAACACTGTATTTTCCAAAAAAATCGCTGCGCAGCC -TATCCCCTCGAGCTTCAGTTTCTCGTCTGCCCCTGCGGTTCCCAAGCCCTCCCGCAAAAG -ATCGCGCGATGAGTCCACTTTTGAAGAGGCCGTTGCTCCCAGCGCACCCCCGGTGCCAGC -ACCCAAAGTCCAGCCTATCTATGGGGAAGGCATGGTTCTTCTAAATCCACAGACTGGACT -TGCAATCTCGGCCGAAAGCCAGACCGGCACATGGTACGAACAAGAATCCGAGGCGCAACA -GGCTGCTGCTGCGCCTGTTTCCTCGCGGTCAAACGCGCTGCTTTCTAATGCTACCGATGC -AAGCCGCAAGTCTCAGCGTCTTGATCAATCTGCACCAGGTCTGGACGATATTGCGTTGTC -CTCAATCCGCCAACGTCTGGACCACTCTAGCTCCAACGATCAGCATCGCACTCTCAATGC -GGGGCCTGCTCCGCCTGCAGAGCCTCTTGTAGATGATGCCACCCGTCTTCTAGGTATTGG -CTGGCAGCGTGTCAACACCGACGGAGACATGGCTCCTGCTGTTCGTGGCTGGACCAAGTA -TATCAACAATCAATACTCAGCCCACCTGCACGACTCTCAAATGCTTCTCAAAAGCCGCGC -TCTGAATGCCTATCTTGTGACTGCCACTCCAACTTCTGGACACTCGCCTGCCTTTTATCT -TTTCACCGAAGACCTCACTCAAGGCCAGCTCGTTGCCTCCTCTTGGGAAGCATGCCTCCA -AAACCTTCGTAGCTCTCCCATCATATTTGAGGGCGCTGCTCCCCTCGGAGCTAACGATCG -ACCAAGTGCACAGCCCACCCCCTTCAACTCCATGGACACGGGCGTGCCACTTCTCCAACA -AGCCATGTCCAACCACCCTCAAACATCGAGCCTCGGCGAACTGAGCGGAGGCGCGGAAAT -GGGAATGGAGATCGATTCGTAAATTCTTTTCAAAACGAATTCGGATTCCCTTCGACTCCG -ACCCGACAACATCGATCGATCCGGCCCGCCATAACTCGACGTCCAGCCAATTCTCGCGAT -CGATCTCGGACTTTTCTCCGACTATTTCCCCGGCTGTTCCATCATATTTTTTTTATTATA -TTTTTTTCACTATTGCTTATTTAGTTATGAGACTCACGCGCCTGACTGGCCCAGCCTGGC -CCAATTCGGTTCTCGCCCAGTGGAGGGTTTGATGGGGTTCGGTGTTTTGCTTTCCTCTTT -TCTTATGGGAAATGGTCGGCGTCGTAATCTGGGATTTCTTCAAGTATCGCATGCTTTTGA -CGAACGTTATTTATCTACGACTATTTATTTCATCAAATAAAAAATATTTAAAACATTTTT -GGGGAGGTTAATATCGTCCGATTGAGCATGCGGTATGGTGTACATCGAGTTTTGTCTGAC -CCGCGAGGCACATTTCATTCGAGTTTGGTGTCTATTTCTGAATTTTGGATATTTTGACAC -AAAATGAGTCGAAGACAGTATATATATGACATGTAATTCCTATTTCTCTTGTGAATTGAG -GTTTTCCACTAGAATGGCGCAGTATCTTAGGTTTCCCAGGAAATAGAGTCGTCAGTATGC -AGTAATCCTTCTTTCTTAGAGGCTTTTGGACTACTTCCCTCTACGCGATCCAAATACTTT -CGTAGCTAATTCGCAATATTCACAAACACCGAGAGCGAGATTTCCAAAACCAACCACTAT -GGAGAAAGACCCGTTTACTTGGGTCGATGTCGAGGACCACAAGGACGGACACCGAATGAA -TAAATGGAATGAGCTCGACATCCTCAGAAGACTTGAAAACCTGAAGGAAGAGGTCAGTAG -CTTCAGGCAAGAAAAACCGATCGACCGAGAGGACCACTCCCTTATCCCAAAATTGGAAAT -TCTTGACGAAGAGGACAGCCTACGTCAATCCGTTTCAGAAAGTGTCGGCTGGCTTAACAA -ACAGCCCAGCATACTAGGGCAAAAAAAAAAGTCGATCAAACGAAACGATCTCACCCTTAT -CCAAAGAAGGACAGACCTTCAAAAAGAGGTCGACAGAAGAATCTCGATCAAGCGAGAGGA -TCCTTCCATTATCCGAAGAATAGCAGGTTTTGAAGACGCCGTCGAAAATCTCATACAAAA -GAAGAGCAAGGTGAAAAAGAAAACCTATGCTTATCGTGAATCTGTCTTGAATTCATGGAT -TCAAGGCCCTTCCTCAACGATAGGTGGAATCAAAAATACAGTCTACGAAGGAGACATCCA -TGCCGATATCCAGGCTATCATTGCCAAGGAAGAGACCGACCCGGAGACTGCTGAACGATG -GAAGGTAGTTTTTCTCGATCGGCATGGTCTCCAATTGGGGGGGGGGGTTGACTGTGGAAA -GGGAAATGAACTAAAGCTTGTTCATGTTGAGGTGATCCGCTTCTTCAACATCGGGGCTCA -ATTTCTTCATGGCTGGGGTGAAACACATACGAAGATACTTGGTATCTGTGATCATTGGAT -AGGTCGTTGGAGATCTGGCCGGGTGATCTACATCCCATCGGAGGAGTATTATCAGCTATG -CCAACTTTATTACTGCGGAACTTGGGTGTGATGTATCATTACATTTCCTTGTGGCTGAAA -CGATTGCTTTAACCCGTGAAGTCTTGCCTTCAATGACTCCCTGATGACTGAAAAGGAGGT -TCAATTAATGCTATGACTTGTTATAAGCACTTGCCTTCTATGGTTTCAGTCATGACCTAG -GACTGTTTGAATCGTACAACTTTGGAATAATAGTCGGCCGCTAACGCAGGCCTGTGTAGT -CGGGTAGTCTTTTTTTTTGCAAATTTGTTTCCCTCAGTCTTATAAGTACATAGCCTGGGA -CTTTCCCAGGGCAATAAGCGAATAGTAAGTAGGTGGAGGATGGAAGGTTGGTCTAATGAC -CATTATCATTGTGATCTAGGGGTAGTAGCATTTAGCAAATGCTTTGCTTAGCGTACATCG -ATGCAACCGAACAAGTGCAATTGCAACGGAAATTAATATGATAATTCAATTCGGAAACCC -CAATGCTCGTGTGGGTGAAAAACATGGTATCATCTAGGCCGTAAAACAGAAGATCAAAGA -AAGCAAATGGATGACAATCGCATCCTGATAACTCCAAATCCGCCGAAAGGACATGACAAA -TGCTCAACAGTACTCTTGAGCTTGATTTCCTCAgtcatcgtcatcgccataatcatagtc -actttcgtagttgcctgcaaatgcgtcgtcgacgccatcatccttctcgtcgtcgtcata -atcgtcttcgtcctcatcatcCTCCTCCTCAGGTTCTGCCTCGACCTTATTCACGAAGCC -ATCCTTGGCCATGGCAGGCTTTGCGTCCTGAGACGCGCTTGGAGCGGTAGATGGTGGAGT -AGGTAGTGGTGTTGTAGAAGTAGGACGGACGAACTTAGCATCATTCTTGGGATCAAACTT -GGCATCGAACTTGGCATCGAATTTGGCATCTGATGGAGTTGCACGCCGGGATCGTGCTGC -TGCTTCTGGCTCGATGCTGGCGCTGCGCCGTGAGATGACgctttggctgcgactttggct -tcggctcatgctttgtgactttgCATCTCCCCCTTCGCCGCCGTACAAGGTGTCCAGGAG -AGAGTAGTTGATCTTTTTACTGTAACCTCGGCGCTCGAGCATGCGACGTGTTGCCTCAGC -CGGTGTCGAGGCACGAGAGCTGCGGCCATCTTCGCCCTCGCCCTCTAGGTAGGTAACATC -TCCAAGACGGCCACGGCGGCGCTTGCGTGGTTTACGAGGTGCGCCTCCACCGTTTGCCTC -TTCCATGGCCCGCTTCAGTGCTTTGGCTTGTTGGGTGCGGAGATACTCCTTGTTTTCCGT -CACCCAGATTGACTCCTTGATTTCGACTTCGGCAGGCATGAGCAGACAATTGGCGACTTC -GGGGTCATCTTCAAACTCTGCAGCGTCGATTTCAGTGTTGTCGGAAACAGTGGTCCGCGG -CGGTTGGTCGCAGCCAACACTCTCAATCATGGTTGAGCCTTGAGCAAGCATAGACCGCAT -TTCATCCTCTAGCGCCGCTTCAGATGCGACCTGCTCGGCAGAAGGTTCTGGCGTGGGCTG -TCGCTTGCCCTTGCCCTTGGAGTGGATCGGTTCGTTTTTGATATCCTCAACAACTTCGTT -CACTGCCTTAGCAATTACTGATGCACGCCGAGGGTGACTATTCGCAGTCAAAGCTGGATC -AATCGGAAGGTTGGGAATAGCGAAGCCGTCGGCATCTACACGATGCGGCTGCACTGATTC -CGCTTCACTTTCGGCTGGAGAGTCGTCATCTTCGAGTTCGGCCGCAGTGTCTGGTAGATT -CTTGAAGCGGCTTTTCCGACCTTCTCGCCCGCGAGTGAAGGATGGGGGATCGTGTGTGTT -CTCTAATTGGACTGATCGAAATTGATCAACGGTCAATTCACCACTCTCAGTGGAGCCGAA -TTCGTTCAAACGCTGGCTGATAGTTGTTTCGGTTACTTTGACAATATAGACGACCTCACG -CACGGTTCGGCGGAAGTTATTCATGCGCGCGGCAAGAATCAATGCGGCACCACAGAGACC -AGCTGGGCGTCGACCTGTAGTCATCCAATCACGGTTCATTCGTTGAACGATTCTGACTGC -CTCGCCAGCCACCGCCATAAGAGATGGTCCGAATTCTAGCTGCTTGGCGAAACGATAAAT -CAAACTTTCGGGGTCGATGGGGTTCATGAGAAAGACGTTGCCACCTAGGCGGAGTTCATC -GAGAAGCGCCTTATATGTACGACCAAGTTTAAAGACGTTGATCATCAACACATCGGCAAA -ATCAATCAGCATGACGGTATTGCCATTTTGGCGACGGCATGCAATGTACAGGCAAACCGC -AGCTACCGTCTTGGTGCGACGACCTTGAATGAAGTTGAGTCCCACAGCAAGCTTGAAGAC -CTGACCAGCAGCCTTGGTTGCACTTTCCGGGATAGTCAAAGCTCGAGAAAGTTGAAGCAT -ATATCGGTTTCCATTTTGTTCGGTCATTTCCCGGCTCTCCATGGCGCCCCCACGCTGGAA -ACCCGGACCATAGCTGCGTACATGGGTCTGATCTTCTCCGACGAAAGATCCTTGCACAAC -AGCTGCACCGGACGAGGATTCCCCAAATGTGACCTCAGAAACAATGTTGTTCTCGCTGAT -GACTGTACCACAACCCGTACAGACCTTCACGCCTCCGTCATCCACAATATGCGGAGCTGG -ACACCCTGGGTTTGGACAATTAGATGTTTTGGGGTGTGTTGTCGGGCGGGGTGGTTGAGG -ACGTCCAATAGCCGTGGGACGATTTATAGGTGTTGAGTTGGGAGGTTTAAGACTGGCCAG -TCGTCCCACAGGAGGCCGGGGCCCGCGGGGAGGGATTGGACGCGCGCCGGATCTGGGTGC -CGACATCTTGGTTGAAGCGGAAAGTAAGAGGGTGTCAGAAAATGGGTGTAGACAGGGGCA -CTCGATTGAGGCCGAGTCCTTGAAGCTTCAGAATTTCCCGAATGAGCGTCGCGGTTTTTT -TTTTTTTTGTAGCGAAAGGTTTATCAATTGTCACGAGAGTCGCGATAAGGATGAAAGTGG -AAGTGAGAGGAAAAAGTATTCGACCGGCTTATGTAATATGGAATCTGCAGAGCTCCAGGG -CTGCATTCGAACATACGGCATAGAGGTCTGGAGGTGGTTGGATGCCGATAAATAAAACCT -GAGTCAAAGCCAGATGAACAATTTGCTCAATCAAATCCCTTTGTCTATTGTTAAAGGCTT -TATGAATCGACTGGATTCTATCGACGAGTCACGTGATCCTGTGTGGTCGATGGTACGCTA -GCCCAATCCGGCTTAGTACCATCTGATATTTGTCGTCCGCCACCGTGCCTGAGGTTCTAG -GATGATCCATGATCAACGACCACAGCATTATCAGACCTGATACTCACACGTGGAGTGATT -GATACTACAATGTTGAATACGGGAATCTGATCTCATTCTTGATTGCCAGCCTCACACTAG -CGGAATACTACATAGGCGTTGCAGGGGTGATCGAGATGTCTGGAATGCGGTATTTGTGCG -TGGTTTGGAATCATCACGCACAAATGTCGCATCTCAGCCATGTTTCAACCAATCCATCCA -CTTCTTGGATTTTCTTTTCTTTTCTTCCTTAATGAAATAAAATATTGGACTGCGAATTAT -ATCCCGTTCCCGGCTTTTGCTTTATATCATAAAACGCCGCTGGTCGCTCTTCCCTTGATA -CAAGACTGCTCCATGAATACATAATCCCACCGACTGCCTGTTTCCAACCTGGGTTGAACA -AGACCTTTCAGTATCAACTCCGTGGAAGTTTTTCAAAGACCGAGATATCCCACACTATAC -CCCATCTTTGAGCTGCAGAGATCTCCGTCGATCATGTCTTCAGGGTCCAAGTACCAGTCT -TCCGACCTGGAGGCAGACCACCAGAACTTGGATACCCAGACCAGTCTCTTCTTTGTAACA -AATGTGCACTGCGCATCATGTGTTGCATATATCACCGAAGTGCTCTCCGAAACACCTTCA -GTTGGAAACATCGAAGTCACAATCTTGACGCATGAAGTACGTGCAAGCCATAGTACCACT -GTTCGGCCGGCCGACCTCGTCAACGCGTTGATTCATGCCGCATTCGAAGTACACTATGTT -ACAACATTTGACCAACGAGGAAAGCCTATTGCGGAGCTCGATACCTCTTCCTGGAACTAC -CGCGGCTCCACGCTGTTCTCCTCGCCGCGAGCCTCTGTTTCTAGTATCTCATCCAACATT -AAAGAGCGAATTCAGTCCAACAGTCATAGGAGACATATTGCCAACTGCGATGCATGCAGG -AAGGAAGAACTAGAGGCCCTTTCTCGTCATTCTTCTCGAACGGAGCTGGGACCTTTGGAC -GAGAAGTCCTCACAGACACCCTTCCGGCCATTGAGCAAGCACCCCGAAAAAGACATCACT -GTTACGAATCTTGAAACAACACCAGGGAATGCTGTGCAGGAGAGCCTTCGTTCATTCACT -ACTTCAGAAACAAAAGTTCAACCTGATGTGTCTGCTCCAAACTTACCAGCGGACATAGCA -GAGCTTCCCAGTCCATCCGCTGAATCAGCCGACGAGTTCACCGCACAGATCAGCATTGGA -GGAATGAGCTGCGCATCGTGCGCCAACAGCATCACCGCACAAGTCCAACAGCTTGAGTTT -GTGAAGGAAATCACCGTTAATCTTCTCACTAACAGCGCAACCGTGATTTATGTTGGACCC -CGCGGAAATTCCGACGAGATTATTGAGCAAATCAATGACGCTGGATTCGAAGCATCTCTT -GACGAGGTGAACCAGCTACCCAAACCACCGGCATCTGCAGAGCGGGCAGCAAACTATGTC -AGCGAGATTGCAATTACCGGCATGACCTGTGGCTCTTGTGTCGGAGGAGTGACTCGAGGA -CTTGAAGAACTTCCTTTTATTCACGATGTGTCTGTGAATTTGCTTTCCCATAGCGGGAGG -GTGGAATTCGAAGGCCGAGACAATCTTGGTAAAATTATAGAGAAGATTGAAGACCTTGGA -TACGATGCAATCGTCAATAGTGTCTCCCCAATGAAAGTTGGCACTGAAAAGCTTAGCACT -GTTCAGATTAGAACAATTTCTATACAAGTGGATGGAATGTTCTGTCACCATTGCCCACAA -ACTGTTTTGGGGGCAGTGAAATCAGTTCCCGGTGTCACCATCGAGGAAGCACTCTCCGAG -AAGACCCCCATTCTCAAAATCACGTACACTCCACGGCCACCTTTGCTCACCCTCCGCACA -ATAATCTCAGCCATCAACTCGGCAAATGACAGTTTCCGCGCGACTGTCTATCACCCGCCC -AGCATTGAAGACCGGTCTCGTGCGATCCAGCATCATGAAAGGAGTCGTCTGTTGGCACGA -TTGCTTTTCGTCTTTATCACTACCATCCCGACTTTCCTTATAGGCATTGTATTCATGTCT -CTCGTCTCCTCTGAAAATTCTGTTCGAATGTATCTCGAGCAACCTATGTGGGCTGGCAGT -GTGACCCGCATCGAATGGGCGCTGTTCATAATGACAACTCCGGTCATGTTCTACGGCACT -GATGTGTTCCATGCCCGGGCTCTGAAGGAAGTTTATGCGTTATGGCGGCCTGGAAGTCGG -GTGCCGATCCTCCGGAGATTTTACCGTTTTGGTAGTATGAACCTTTTGATCTCTGCAGGC -ACATCGGTGGCCTACATCTCGTCTTTGGCGGTACTTATCGTCGACGCATCTGTGGGTACC -AAGTCTAGCGCTCATAGCACGACTTATTTTGACTCGGTCGTTTTCTTAACTCTATTCATT -CTTGCTGGTCGGTTCCTAGAGGCCTACAGCAAAGCCAGAACTGGCGATGCTGTCACGTCT -TTGGGAAAGCTTCGACCTTCAGAGGCACTGTTGTCTGATGAAACATTCCAGAGGGGAGTG -AAGCGTACGAGTGTTGACTTTCTGGAGGTTGGTGATGTCGTGAGTGTTCCTCATGGAGCA -TCGCCTCCTGCAGATGGAGTTATCGTCGACAGCGCGTCGTACCAATTCGACGAAAGCTCT -TTGACAGGGGAATCCCGACCGGTCAAGAAGACGGTGAATGACATAGTCTATACAGGCTCA -GTCAACGTCGGCCAGCCGGTAAGAATCCGCATCACTGAGCTCGGCGGTTCATCTATGCTT -GACCGAATCATCGCCGTGGTTCGTGAGGGACAAAGCAAGCGTGCACCTCTCGAAAGAGTT -GCTGATCTTCTCACTTCACATTTCGTCCCTATCATCACTCTGATCGCCATCTCGACTTTC -ATCATCTGGTTAGCGCTTGGTCATTCAGGTGTGCTCCCCGCAGATTATCTCGATGTCGCA -CACGGCGGCTGGACATTCTGGGCCTTGGAATTTGCTATCTCGGTGTTTGTGGTCGCTTGT -CCCTGCGGCCTGGCCCTTGCAGCCCCTACGGCGCTTTTCGTTGGGGGCGGCCTAGCGGCG -AAACATGGTATCCTAGTCAAAGGTGGTGGTGAAGCCTTCCAAGAAGCCAGTCGCTTGAAT -GCAATCGTCTTTGACAAGACAGGCACACTGACTGAGGGCGGAAGTCTCAAAGTGTCCGAT -CATGAAGTCTTGACCAGTGACCCAGAGGTTGCGAAAGCCGCATGGGCCCTGGCTCGGAAA -ATGGAAGAGAGCAGCAACCATCCGATTGCCCAGGCAATCACTGAATTCTGCAAAGCACAA -CAATCTTCCTATGTCAAATCGTCAGATGTTCATGAGATTTCAGGGCAAGGCATGAAAGGG -ATTTTCACCGTGTCTGGCTCCGAGCACGAGGAGCAGTATGAAGCTGCAATCGGTAATGAA -CGGCTTCTCAAGAGTCTCCTATCCCCCGAAACAGACACATACTTCATTTCCAATCTGCTG -GCGAAATACCAGTCAGCAGGCAAGTCGACTGCAGTTCTCTCACTTCGTCAAGTACACACC -CAGTCCACCGAACCATCCAACTTCATACCTGCTATTATCTTCGCCACATCAGACACCATT -CGCCCGGAAGCTGTCGAGATAGTCTCTCAGCTTCAGAAGCGTCACGTCGATGTCTTCATG -TGCACTGGCGACAACCAAACCACCGCACACGCCGTCGCAGACATGATAGGCATTCCGCGC -TCCAAGGTGATGGCCAACGTCCTGCCAGCGGAGAAAGCCAGCTTTGTTCGTCAGATTCAA -GATCGTTCGCCAGATGCAACACCGGCCGATGGCAAGACCACTATTGTCGCCTTCGTCGGC -GATGGAGTGAACGACTCGCCTGCTTTGGCAGCAGCAGACGTGAGCATCGCCATGGCATCT -GGTTCCGACGTCGCTATCAACTCTGCCAGTTTTATCCTCCTCAACTCAGATCTCAGCACG -ATTTTGCAGCTGGTCCTGCTCAGTCGCCGCGTATTCAATCGCGTCCGTATGAACTTCGGT -TGGGCGGTCATCTACAATCTCTGTCTGGTTCCGGTTGCTGCTGGTATCCTTTATCCTATT -GTTAGTGGCCATCATGAGAAGAATATTGATGGCCACATGATCATGGCCAGTGAGCACTGG -AGGCTGAGTCCAGTGTGGGCTGCTCTGGCCATGGCATTGAGTAGTATCTCTGTTGTGCTG -AGCAGCTTGGCTTTGAGAGTTGACAAAGAAAGTATACTGAAGATTGTGAGCAGGAAGAAG -TAGATATTTTGGCATTCGTTTATTAAATCTAGCGTTATGTTTATATTTAATACCTAATAC -ATGAATGAATCCAGTACTCAAGTCTGTTTCCAAAGTAACCTCGATAGGAAACCCTTCTAA -TTCAGATCCACCTCGCCCCAGACTTCCGCAATCCGCTTGCTGAGATCTTTAGCAATCTTC -TTCCGCCCTACGACTTCTGACATCCGATTCGCTATCATCATCTCTCGCTCCTTAGGCTCT -AGAGCCATGTAAGCATCCACCAGATGCCGCGGCGTTGGCCAGATCTGTTGGATCTCAAGG -GCTTTTTCGCCCGTCACACCACGGGTACACATGAGCATTTTTAAGAAAATGTCACGAAGA -GTAAGAACATCGGACTTGGAGGTGAGTGCGGAGAATGTCACGAATGAAACCCCGTACGTG -ATAGTCGGGTCTTGAGAACGGAGATTATCGAGGATGGTGAGGTAGGAGTCAGTGGCGAGA -CGGCGACCGGAGATGAGCCCAATTTTGGATGCTGGAGATGGAGGAACTGTACTACGGTTG -ATATTGCTACCGGTGTCTCCAATAGAGGGCGCATCTTCGACCCCGTACATCTTCCGTAGA -AGCAGAGTCATGCGCGCCAGGTACCGGATTGACTCATCCAAATGTTTCGTCTTTTTGATA -AAGTATTGATTAACCACCTGTGTCGAGGCTATGGCTGACGCAACCATCTCCTGGTATTGC -GCTGCACTCCCGCTTGTCGAGTCAGGATGTGTGACTGCGAACTCTTCGATCAGGTAGATG -ACATTCTTGATGCCAGAGCGACGAAGTCGGAATTTTTGCTCGTGAAAACGGCCGTCTTTA -ATGGATCCAATCAGATCGTCTAGACGCTTTCGCTCAACGATCCAGTCCAACATCACTTCG -TCACCTTCTTCGCCGTGCCTTGTGAGGAAATTTGGGTCATGGCATTTGGCTACCCACATC -GCGTCGCCGACTTCCAAGGCACGTACTTGCGGGGTGATTCCTTGCTTCATCAGTTCGCCG -GAGATGTAGTCGCGGTCGGTGGAGGTGCGGACCTCTCGGGTGTCTAATACTAGCTGGATA -GTGAAGCTTTTAGGTGGGAAGAAGATGGGCTCGATGTTGGCTATGTCGTGTTCGGATAAA -TTCTCTTGGGCATCTGCCGACGGATCACCATCTTGCTGGAGGGAGTCAAGGTCTGCTGTC -AAGGCTTCTTGCTGAGACTCAGTCGTTGCAGTAGCCTGGGAGCCAGGCGTGGTGGGTTGA -GCAGCGCCGGGTAACGTCTTTTGGAGACGTTTAGCTACCTCCCACCCTTCTTCGGAAAGA -AGATACTTTCTTAGTGGACGCCCATGTTCATAGACTAGGTCTTTCTGGATGAGAGTTTTC -ATTGAATTCCACGCGGTGTAGAACTTGGTCGGGTCCGGGGGTGCCGTGAATGAAGTGTCG -CAGTATGGCTGGGCCACCTCGATCAGGTGAGCCTTCGTCATCCCCTGGGATGCATTCTCA -TCTTGGGTTCCCAAGCCCAGCATCAGCGCATATGGACCCGATCGTAGTGTTGGTACATAT -GGCTTGGCTTTCCGAGGCTTCTTCACTGGTTGATCGATGCCGCCTTCGGAGGTTCTCTTT -CCACCTAGTTGACATACTTGAGTGTCGGTTGCTTGTACGGAGGAATTAACACGACAAACC -TTTGTCAGGCGGTTCCGGCATTGGAAGGCCATTTTCTTGGCAGTGAGCCTTTAATTTATC -AGTCAAGCGGTCACACAATTTAGGACCCAGTCCATTCAATTGCTGTGCTTGAGATGGGTG -TTGGAATTCTAACGGACAGGCTTTCATTGATTCATAGGCCTTCTTGTATCTGCAAATTGT -CAGATATAGTCTTTTCCGAAGGGCGCCAAGAGATACATACACTGTCACGCCTTTGCTGTT -TCGTTCTCGAGCTTGATCCAGCCATTCTTTTATCCACCCCAAGAGTAGGGGGTTTGCACA -CTCCTCGCTCATGATGAGCCGAGTGTCCAGTGACTGAGTGGCAGAACAGAAGAGCGAATT -ACCATTCAGACGAGTTTCGCCGGTTGACGCGAAGTGATGTTGCCTTTTTGGGAACGCGAC -TGGCACGTGCCGATAAGAGAGATAAGAAGGTGGTTGCAGGCAGATCGGGCGGGATGGACC -GAGGGCTGTCAATAATCTTAGGACTACTCTTAACCTATATTCTACTTAATTTGCAGATCT -ACAATGGGCTCGGCATATCATCTACTTGCATTTATTTTATTTTCTTCGTACGGCGGTTGG -AAATTAGGATTTGATAATCTCCGTTCACTGGCACTCGTACAGCAGTGTAGTTAACTCACA -CAATAGGTCGGGCCTATATTCCCGTGATATTTTTTTTACCAGTCAAATTTCTATTATCGC -AGTTGCTTCTATGTGAGCTGTTACTTCAATTGATTCAATTTTGGTCAGGGCTGTTCAATG -GTGTTGTTGCCACCATGACCCTGATCCGAGGAAAGATGACGTCGACAATCCAATCGCAAC -CGACGTGTATAAGACAGGTTCCAGCACCCATGGGTAACCTTCCCTATCAAGCCAATGCGG -CCAAAAATAAACAAACATCCTTCTCAAATTAGCCACAGAAAATTCATAACGATCATAGGC -AATGATATCCGCTTCGGTCGACGGCGCTGGACCAACTGACAGCCATGTGCTGGCTCAGGT -GGAACAGGATGAGAAAGGCCTCTCACAGAAAGCTGGCGATACAGCAGAAATCACTAATAT -CGGGTGGGATGAACCTCCAGATGCTATCGAGGAACCCTTAGTGGCTGGACTCTCAAATGA -GGATTTATGGATGCTCATTAGACGATTTGACAAGGTATGTACTGAGCAGCGCACATGAGA -ATGTGCGAAAGTTTGAGACTAACAGTGTTATTTCCCTCTTCAGCAAGTCTATCGAGTCCA -AGCCGTTCCCGATGCTCCTGTGCAGAGACTGGATCTCACACGCAAAGACGACGATGAATT -TTCCCCGGATAAACTCAGGGCCACACTCGAACGTTTCTATGTAACTGTTATTGTGGGCTT -CACCAATTGCATCAAACATATTGCTCGATTGCGGTCTTGGAAGGAGCCGCGACGGACCGC -GATATTTTGCACGGCATGTCTCCGTTCCAGAGAAATTTAGAATATATTTGTACCATTACT -GACTTGTTGTAGGCTTACTTCTTATCATGGCTTCTCGATCTGCTCATGCCAACTACGCTA -GTCACCCTAATTGCCCTGGTGGTTTACCCACCATGTCGATCACTTATTTTCCCTCCCGCT -CCAATCGCTCTTGTCAACAAGGACACGGGTGGTGTACAAAAGCCAATGGCGGGGATTCTT -GGATCTGATGACAGTATCACTGGGGCCCCTGAAAAATTCAAAGGCGAGGCAGCTGAGCAA -GAGGCTAGTAATCTTGTTGCAGGAGTTGCCAGCGTAGCTGTCGGAAGCGCAGTTGGAAAG -CATGATCAAGGAGTCCCTGACGATGCACCACTCGAAAAGGATGTTCCAGACGCGATGGAC -ATTGTTGCCAATACTGCCGATGCGCAAACCGCTGCCCATGGAAGGGTGCCGACAGACTCC -CATGACAAAACAAGACAGCCTATGAAGCAGAGCGTGATGGATGCCGCGAACCTTTCAATG -CAGGTGGTAGGCGACATCACAGATACCTATGAGAAGCTAGCCAAGTGAGTGAAGAGTTTG -TGGTTTTCTTGGCCATGGATCGTGGTTAAATTTATTTTATCTAGTGCTCTATCCCCCACA -TCCCCTTTTCCTCAAATGAGTCCTCGTCTGCGTCTGGCGGCTGTGCTAGGCCCAGCATTT -CTAGCCTCAACACTGACATCTTGCTACGTCTTTATGAAGCTCAGTACACTTTTGATCGGG -ATCGTCTTCTTTGGAGACCCTGCCATCAGACGAGGGATTATACATCTGAATAGCCGCTTC -CCAAACTGGCAGAAGCTCATACAGTTACAAAAGTACGCCTCCAAGCCATTAATCTACACA -CAATACTGACTTCCATAGTTCCGTCTTGAAAGGTATTCCAACAAATGCTCAGCTGACTGT -TACTCTTCTCCGAATCGGGGAGGCTAATGCGTCACCACTTCCACCCCCTCCAACGACACA -AGACAAAGTACCCTCGCGGCCAGCTTCTTTACATCATGAGGAAATTCCTCTAGGCGCTAC -TGACGAGGAAATTAAACAAGCTGCACTGGTCAAGCCTAACGACCATGTTCAGGAGGCTCA -AGCTTCGCCAGAGAAGGGCCACAAAAGGACATTGGGCTCTAGCATCCTGGGCTTCTTTCG -AGGTACTACGGCCAGTGGTGTCGAAAGCAAAAGAGGGATCGATCGCTTGCGAGCAGCGGT -TGGTTCACACCAGGCCAAAAACCGTGTCGGCGTCCTGCGTGATAGAGGCAAGAAGATAAC -ACCAGGTGGACCCGTGGCGTTTGACGCTCGATGCAAAGGCAAGCGTGGCACTGTGATTAT -CGACTCAACCAAACAGCCTCCGCTGCTATACTTCACAACTGATACACCGCAGCAAGGAGA -CGAACAGGTGGAGCATCGCAAGGTTGGATCGGTTCTGTTCACTATGCCGGTGACTGATAT -CCAGGAGATGCGAAAGTTAGGAGGCATGGGATGGAAGGGGAAATTGGTCGTTGGTTGGGC -CCTTGGAAGTAAGGAGGTGGTTGATGGTTTGCTCATCACTGGCAAGAAGCCAGGGCAGAA -ACACCAACTTACTGCCATGGGGACACGGAACCAATTGTTTAATCGGCTTATTGCCATTGA -TGGACAGGTTTGGGCGAGCTGTTGACTCGAATTCCAGGATGCGTGCTAAGAGAATCCCAG -GTTTGGAAATGATATCCTCGAGGTTTTGGTGCCGATGGCATACCGTCTGCTATGTTTCAT -TAAGCAATGTATTAAGATATCAATAGCGTCTGAGCGTGTATGAGGGAAATGAATGATGCA -TTTTATTTTGATTCGGCAATCTGACAATCGTAGTTAGTACCTTCTAAATTTGGCACCAGA -AGAGCATCGTATCAGTAAGCCGTAATCCGATTGATCTGAAATCTGCAATTCATTCAATCT -ACTGCAAAATTTCGGACCAAGCTGTCTTATTTCAAATGATCAAACTGCATATCATCGGTC -GTGGTGCCGGCGGCACTGTATGGGCATCCGAAAAAGGACCTGCCTACAAAAGGGAGGGTG -GCAACCCAGCACGATCCCTCCAAAACGACTTTGAAATGCACAACGGAGTACTTCAAAGTC -GACAAACCCTGATGAGCCTGAGAAACCCCACTAAAGTTCAATTCCAGATACCATACTGTC -ACAGCTTCATCAATCTAAAACGCAAGGAGTGGTAGGCTGCAAATCTAGAAAGGTTCCCAC -AAGGCTATACACCATGCAATATGATTGAAGCACAACGCATTCCACCATTTGAAGAATCAA -CCTGACACCTTCTTAATCATGTCTTTTGCCCACAAGAGATCAAACAAAATATCATCGACA -GTGAGCCGGATCGAGACTGCCCAGTGCCTAGTTCGACTATATTCCGGCAGACGTCGAACA -CACACCCGGGGTTCCCAAACCTCTCACGATTCAAAACCTTCTGTTTGCGAAATTACTCAC -TATGTGAAGATCAACTAGAGGAACTCGCAATCACAGAAGACGATCTTTATCAATATGGTC -AAACAATGGCAGAAGCACCTGCTATGATGCACTGGATTGCTAAAATAGATGTCAATGATG -TTGAGTTTGTCCTTGCACCCTGCAATGATAACGATGGTGGCCACATCGAAAATGTCTTGG -GAAGGCATAGCATATGGATGGTGGACTTTGATACATGAAGGATTATGACAACGGACGAAA -ATGATGTCGGAAAAGCAGCGAAGGCATTTTGGAGAAACGATACCTTGTATCCTCGTCCAG -GAAATACCCTATGAAACACGTTTCGAGAGTAATACATTCATACTAGTGATGTGTGCCTGG -AGCATTGCGACATCTCAACAGAATAGCTGAAAAGACCATTTGAGAGACACTAAGGCCGCA -CAAGCGGCCGTGTACCCTTGTAAATCATATTTGAACAAGCAAAAAGATCACTTTGGGCAC -AGCAGTCGAAACGCTATCGACTCAATACATTCCATCCTATTCACAAGCATTAAGTCCACA -TTAGCATACATAGTCTGTTTTTGAAATCACAATTATTCAACTTACATCTTCCAGCTCCGC -CTCCAAGATAATCTCTACCATTTTATTAATGGCCGCTCGACGCTCTTCTTTGGTCAAACA -GCCAATCTTAACGAGCTCAGTCAAAGTCTCCCCAACCCAGTTCTGGCAGTTCCAGTCTCG -ATCGTTACTGCTGTTGCGCACCGACGTGCTAGCACATGCGAGAATGATCTTCGCCTTGGT -GACAGATTCTGGCGTTGTACATACAGGAATCCAAGCTATTGGCTGTTCTGACATGGGGTT -CTCGGGATCTTTTTGGAATTTAAAGAAAGGATGTGCACCGGCAACATGAAGCCAGTCATA -TTGACCGTCGGGGTAGAGGGCGTGCAGTGCTGTGTGTCGGTATTTGGAGAAGTCTACTGG -CACGCCTTTGTACACAAGAAGGGAGATTTGTGTCGGCATATTCGAGAGACTGATTATCCG -GAGGGAAAATTCTAGAATATTTCGCCAAGTGGCTTTTTCGGTGGCTATGCTGAGAGTTGA -AGGGATGTTGGGTTGTTTAGGAAAAGCGGGCGGTGGTGCTTGGGCTGGCAACAGTCTTTA -CTGGCCGTGGAAAATCCTGGTCTATATCAAAAATCACATATGCGTTTATTGTCTCTCTTT -AACTGGTAAGATGGCAGAGAGTGTAGAGATATCAAGCAATGAATGATATCATTGAGGTTC -TTGAATTTGAAATAATAATTATTGTGTCAATGACTGTTGATAATCCCGCACGGTCTGCTC -AAAGGACTCGTCGCGTTTGAACCCCAAGGAAATAGCTTGAGTGTTATCAAATTGTGTGGG -CCATGACTTCAGAATTGGAACCAACGCAGAATCCTCCTTCTCTGTAAGCAGAGCCAGCTT -ATCTGCACCGCCCACGGCTTCCAATGCATCCATCATACCCTGGACTGTGACGCAGATCCC -AGGCACATTAATCTGGCGAATGTGCGGGGGAACGGAATCAGCAGGAAGACGGAGAGTGAG -AAGTAAATTTTCCACGAGGGTCTTTGGCGAGCATAGCCAGGATTTGAACTGTCGGTCTTC -AATGGGAATCACACATTTCTTGCCGTCAAGTGGCTCCCGGATCATACCAGAGAGGAAGGA -GGAAGCTGCAGCGGTGGGTGCACCCGGTCGGACGGAAATGGTAGGGAAGCGCAGAGTGAA -ACCGGTAATAAACTTTCGGCGAGTATATTCGTTCACCAACGTTTCGCACACAATCTTCTC -GGCACCGTAGGAGGACTCCGGAGTTGGGATCACGCTGTCGGTCACAATCTCTGGCAATGG -TTGACCATATACGGCCTGACTGGACGAATAGATGACCCGCACTCCTGGACAAGTGAGTCG -GAGCGCCTCGAGCAGATTGCGGGTTGCATCAATGTTGACACTCATACCCAGCTCGAAGTT -GGCTTCGGAGCCGGAGGACATGATGCCATGGAAGGCATAGACGGCATCCACGGAAGAATC -AACAATGCGATTGGCTTCTGTCAAAAGATCGGCCTTGATTGCCGTTGCTGATTGGGGATA -GCGAACGCCCGCAGGAATTGGGGGTTGATTGATATCGGTCAGTGTGACGCGATAGTCTGG -GTCATTGAGCAGCTCCTTTGCAAGCAATTGGCCGATGAAACCGGCAGCGCCAGTGATGAG -GATGTGCATGTTTAAAAAAAAAAAGGGTACAGAGTAATGAACAGAGAATATGGGAAAGGT -GTTATAAGTTTAGTTGCTCTCCGCAATTCGCATCAAGTCCGAGGCGATCTTTAGCTGGTC -CTCCACACCGGACTTGGGCCGGGCACAATTTCCAAGTGGATCGACGGGACTGCCCACGAA -TCCCCTTGGACTTTGGTCTTTCAGGGCCCTCCAGGGTCCTCGTTCCTTCGGGATTCAATC -CGGGGGAATGCCTACTGTGACCATCTGAAGTAGGCGACATACTATCTCTTGATGGAGCAT -ATTCAATTGAGTTACCCAAGGCGTCTTGAAAAGTCCACCTTCAATGGTCAATGTATAGAG -ACCAAGGTCAAAAGTGTGTGGATTTCGGGAAATTCACGCGGGGGGCCCAAGTCACACAAT -TTGTTTTTCTCTCTTCTCTCTTGATATCCCCCATTGGCTATAATCTCCTCGAATACTGTA -TAATTTCAATTTCTTTCTCGGTTCTAGGTCGCTTGACTCCGACTCTTTTCCCCCACTCTG -GAGAAGATTTTACCCCTAACCCGCATGCCGGAGTATCACCGATTCTCCAAAGCATCACAT -CATGGATACCACTCCACTGAAGCGGTCTCCTGATGACGCTGGGTTGGAGTCGCCCCGAGG -GACCAAGACACGGTCTATTCCTAAGATCTCCAAAGCTCGTGCCTGTGAGTCTTCCCCCAC -ATATTCTCCGAACGGCATTAACGGGGAACAGGCGCAGAATGCAAGCGTCATAAGATTCGC -TGCGAGTTTCGACCAGGAGAATCAACTTGCACAAAATGTACCCGCAGCGGTATAAAATGT -GTCGTAAATGACTTTTCACAGAAATTCGTTGACGATGATGGAATGTATGTGTGACCCCGC -CCCGGCCCTTTGGTGAGCTTTGGAGCTATCAGGGGCTACGCTAATTCAATGCCTCTGGTA -TAGATGGAAATCACAGGCCAATGCGACTATGCTCCAGCTGCAGGCGGCTGTTTCACATCT -ATTGCGACAAGGCGGACTGCCGGAGCTCTCAACCTATGCCCCCGGGGACACTGTCAATGG -CCCTAGTCCGGCCGAGTCATACCATGCCCACCATCCCTCGGTCGATCGGTCGCAGACACA -ACCCAGTCACCAGGAGGGATTGGGAGTTGTAATGGATGTTACCCGGGAGCCGTCGCCGGA -GCAGGATCTCCAAGACCCGGATTTGGTCCCTGCTCCTATGCGCAGTCTATACGAGGTCAC -TAAATTACGAAATCTGAGAAATAATCATGTCGAGCCACCGAAACAGACGTTGCTGGAGGA -AGATTTTATCTCGCGACGATTGATCTCCCTCCACGAAGCGGAGGAATTATTCGCGTACTT -CAGTCGCACTATGAACCAGCTACTGTGGGGTGGAATAATTCTTGTACACCGTGATTTAAC -ATCCGTCCGTCGCGCGTCGACGCTTCTTTCTGTTGCGGTGTTGACCGTCGCGGCACTCCA -TATTCCGAACCGCACCGAAACCTTGAACCGGTGCTACACCGAGTATGTATCGCTCGTATC -CAGTATGGCCCTCACGCGGGCCCACACTCTGGATGACATCCGAGGCCTCTGCGTGGGTGC -ATTCTGGCTATCAGAGCTGAGTTGGAAGCTGTCCGGGCATGCGGTACGAATTGCGACCGA -GATGGGCCTCCATCAAAGTTATCAGCGCCTGACTCGTGGACACACGGATCAGTATGAACG -CGCCCAGCTCTGGTATCTGCTTTACGTGTGTGACCATCACTTCAGTATTGCTTATGGGCG -ACCACCTGTTATCCACGAGGACGTGGTGATCCGCAACTATGAGACATTTCTTGCGCTGCC -GATGATTGTTCCCGGAGATATTCGACTTTTAGCTCAAGTTGCCCTCTTTATGATTTTGAC -CGAGGCTTATCGAATGTTTGGTAGTGATACTGAGCAAGCATTGACCGAGGACGATTTTGG -ACAGTTGCGCGTATACAATGTCGCCGTAGATCAATGGCGACTTCTCTGGCAACCTCGATC -TGGTGAGTGATTTCTCAGGCTAATTCTCAGCAACTATCGGTTCTAACATACCATGGAATG -CATTACAGCCGATAGCGCCTATGTGCGGACTTATCCATCCAAAGGAGTGGTTCTGCACTA -TCATTTTGCAAAGTTCCAGTTGAACTCGCTTTCGCTTCGTGCGCTGTCCCCATCCAACAC -ACCTGTCTTTTCTATGGATCGCAAGGAATCGGCCAACATCGCAATTTCCTCGGCGATGGC -ATGTCTCAATATGGTTCTCGAGGAACCTGATATTCGGGATGCAATCGTCGGTGTTCCAAT -CTTCACCCATACTATGGTTACATTCTCCGCTGTTTTCCTACTCAAGGTCGCAATCAATTG -GAACACTGCTTACCTTAGCATCAACCCCCGCCAAGTACGCCGGCTAGTTGAGCGTGTGAT -CGAATTGATGAATTGTGTATCTGCGGGAGAGCGACATCTGACTCGACATATCGCACGTGG -GTTGGGCAAGATGCTTGACCGATTCGACGCTTGGGAGGCTGCTTGGCAATTCCAAAGCAA -AGATACTGCAGTGGAAGGCAGTGAGGTCCCTGGGGGAGCCAACGCAATGGCGCAGGGCTT -CCCCCCACCAGATCTCATCTATGGCATGGTCGGCACATACGGATTTGGTCTTGATGAGAA -TCTGCTAGATCCGAGTATGGCAGATTTTGACTTTCTAACGCAATGACTATAAGCATTCCA -ATAGAAAGGCTTGAATCGATCCAAACCCGGGGTCTCAGGAATACGAAGGATATCGAGATC -TGAAACATTTGAATATCCTAGATGGACATGGAATGTAAATAGTGGGAATCCAGGGTTATC -CGGCATTTGGCGACACGCACTATGTCATGGGCCACGCACCGGAGAGCTTCTGGACCTTAA -CCCTAAAGGGTGTCACTTCCATGTCGTCACCGTCACTCCCGCTTATGCCCTTGGGTCTCC -CCCCCCCCTTCTTACACGTCTGATTTCTCTATAATTTCCCTTGACTTGGGACTTTTGCCA -ATTCAGAACCTTCTCTCTTTCTCCCCCATAATTTCACCCGTTCCTATCCTTCGCACTTAC -TTAATTCTCACCAGCCACCTAATTCTTTACCATGACTACCCGCAAGAGAAAGCAAGAATC -CGAGGAGGAGCTCCAGGCCCTTCCAAGTGATGTGAGCGAAGAGGAGGAAGAGTGAGTTGT -TACATCCCGACATCAATTACCCCTTTCTCCTCACCCCTGCGCCAATAATCTATGCATAAT -TCCATCGCGATTCTGTACGGCTATAGTAGCTGCGCATCGATTACGACATGATCTCAAGAC -AGCCGCGCATTGATATATATGGCGCGAAACTCCAAACGTAATTGATCTTTACTAATCCGA -TCGCGATGAATTAGATATGAAGACTCCGAGCCCGAGTTAGAGGAAGGAAGCGACGAGGGC -GAACAGGAGGAGGTGGAGGGAGAGTCTGAGTCTGATGAAGACGAGGAAGGCGAAGAAGCA -CAGCCAGAACCAAAAGCGGAAGCCAAGAAAGAGGGCAAAAAAGAGACCAAAAAAGgagcg -aagaaagaggagaaagaggaagaggaagGTATGTTTTAGCTTGAGCCCTGATATCCCCTT -GATCCCCAAACTCGCTCATGCTAACACACCCTGCTCTCTAGCCGCTCCCCCAGCCAAGAA -GCAGAAGACTGCACCTGCACCTACTGTCGTCGATGAGGACGAGGAGGACGGTGCAAATGG -CGAAAAAGACGAGGACGAGGAGGAGGAGCCTGCTGAGGAAGATGATGTGGAAGAGAGTGC -CACTGAGACTGCTGAAAAGAGTGGTCCGGCTGCTGCTGCTGCTAAGGACAAGGGTGGTGT -TATTCCCAAGGAGTCCGATCTGCCTGAGATTGAGGCGGTTGAGGCGGCTGAGAAAGAAGA -ATGATTTTGCGATCGAGAGTTTGCCTCCACATGAGTTTTCTTCGAACTTGAATGAAATGC -GCTCTTGAAGTGGAAGGGTGGTTGGATTGCTTTGCTGTGATAGCCTGCTGTATACAATCA -AAGGTGCTATGAATGTAGACTGCTGTGGGTTAGAAGTGAGCTCTAGCTTATATAGATCTG -GCTTATATAGATCCGGGTATATTCAACTAGTTCATGATGGAAGTAACCAATCCTCGCTAT -GTTCTCGGGCACTTGAGGATGACGTGCATAGAATTCGCCGTTGAAGCTCATCTACTGGCT -GAGTGCTATATTCCTCTCCACTTCAGTGGGTTCTGATGCAAATCCTAAAATAGACTGGTG -TTGGGCAATGATTGGAGGTAGAAGTCGATGCCTTAAATGCCAAATTAATACATACATACT -GCGCATGAGATCCACCCTATTTCAATCGAGCACACAACCAGCTGAACATAAAAAATATAT -ACCATCTAAGAAAACTACCTAACAATCGGACCAAGACAAAATGCAAAAAAAAGAGGTGTC -ATCAATCAAGGATGGTCATTAGCCCTGTCTGCGCCTCCGCTTTCTATCTTGATATCCAGG -GGAGAACATGTATTTGTCTTCCAACTCAATTTGTGACTGATCATTGCCAACGAATTTAAG -TGTCGCTACCACTGCTACGGAAGGTAGCGCACAGTTGACAGTACTGGTGTGTGAGATCGA -GAACGGCCTGAGGCAGGTTTGTTGCTTGGACAATGCTGCGGACCTTGACAGCACCCTCGT -CTTCAATAAGCTTGCGAACCATGAACAAAAGCTCGGGGGAGAACTGATGGTCAATTAGTT -ACAGTAGTCAAAGTGATATTGGTCAGGGAATAAAGCATACCTCGGAGTACCAGCGATATT -GCCAGGCGATGAGGCGAATCAATGCGCCGAGCATGCCTCTGGTGTCATTTCGCTGTTCGT -ACAACTTCAACACTCCACGCATAGCCTTGGGCGAGAATCGAGTGGCTAGACAGCTCTGGT -ATGCCTCAATGCTTTCGTCGAAATGGTGCAGTCGCTGCGCAAGCTCGCCAAGAATTTCCC -ACTCGGTGGCAGACTTCTTGTATTCCATGGCTTGTTGACGGAACTGGGCCATTTCGGTTC -GCCAGATAGTGTAGATACGCAGATCTTCGTAAAGTACCATGAACAGGTTATCCAGCCACC -TTTCACACAGTCGCTTATTTCGGAATTGCGTGTATGAGGCGTGAGATGGGTCGGGCTGAG -GCATAAACAGTTAGCATATATATTAAATACGTCCTTGATCGGGGGTGACACACATCTTCT -TTGCCAGACTTGACAACTTCGGACGCTACTGTTTGCTCCGGTCGCTCGAGGGGACTGTCA -GGGTGGCTGTCTCCCACATCTCCATTGGCAGATTCTTCCGAAGATGGCTCGGTCGGGTCA -CGGCCAGCCACAGAGCCAGAGTCATCTTCATTGTCGGGGACCTTGCCGTTCGTGGCCGTC -TCCGATGATGTAGCCGGCTTAGGAACATGCTGCCGCTCAACACGGTATTCTTCTTCCATC -ACGAAGACCTGGCTGCGGGCCTTGAGTAACTGATCCCATCCAATGGCAGCAGCGATTTTT -GTCAGAAGATTGTAGGCCTTCAAGAAAGTACCTTGGTAACCAGCAGCATGGATCTTTCGA -AGCGAAGGGTGAACATAGTCGTGAGGGTCTCCCTGCTTTGGTTGGCCTTCATCGATTTCG -TCGAGAATGCTTTCAGCAAGAATTGGGAGCATGATGCGTGACGGTTGTGGCATGCGAGGG -GTATCTTTGTCTTGATAGGTGAACATAGGGCAAGAGTTAAGAGTCAGAAGCGCCAAGTCC -CAGCGTTCCATACTGACATATACCTCTGCGAGGCGAGCCCATGTACTGAATTCGCTGGGA -GCTGCAGTAACTGCACGCTTAGCGCACTCGAGTGCCATCTCGCTTTCGCCTTTGCTTTGA -CAGAAACTGGCCTGGCAATCGAGTAGAGCATAGTCCATCGGAACGTCTTGTAAAGCATCA -TACATCAGGCGTACCGCTTGCACCTCCTCGTCTGCCATAATCTGTACACGGGCCAGAAGG -GACGAAACCTCCACATCCTTTGTACGGAGCTTCTCAAACAAGTTGATTCCAGAGGTATAG -CGACCAGTCGTGTGGATGTATTTGAGGATGCCGGATGTGAGGTGGTTGCTCACAGTGTTG -GGGACTTGGGTTTCCGGGTCAGAGCTCAGTTGGCGTCCTAAAAGAATTGTTAGTCAATAA -CACAACAGGTCATCTTGGAAATTGGCCCGATATCCATACCCAGGAAGAAAAGCTTCTCAG -CAGCATCCAAGAACTTATGCTCCATCTCTGTGTTTGTCACAGGGTTGAATCGTCGGACTC -CAACAATCTTCTTGACGGAATCTCCGCTTCCATCGTCCGCGTAAAGATAAGCCCTCAAAA -CAGAGCAAAGGAAGGTCTCAAGCCAAAGGGCCTCGGTTGCAACTCGCTTGTCACCCCGCT -CATCAATACAGTAGCTTTCCAAGCTTCCAGGGATTTTAACTTCGACGCGCATATCGAGAT -GGGAGAATGCATTGTAGCAACTGTTTAGACATTAGCACGTGGACCCAACACCGCGCCAAC -AAAGACTCTAACCAGTAGATCCCCGATACCACTTTATGCGTCTTGTCGAGGGGAGAGAAT -GTGAGGGTATTAACATAGGCGGCTAAGCTAGCGGATGAGGAGGCATCGATTCCAGTAACA -TGATGATAAACTCCGGTCTGCCATAGATAGAAGATCAATCAATTGCTTCCAAATAGGAGA -TTGTACCATGGATGGAAAGCTGGTATCCTATATCTTACCTGGCGTGTCGAATTGGCCTTG -GGCTGCTTCACCAAATAGACCAAGTCTGGAGGGCCTAGTTCTCGCAAGCTCTGCAAGGAC -TCCGTTCGCGCATCGACCGCAACATAGATATCCTCCTCCTCATGGATCCTGGATAATGCC -CTTGAGGTCAGTCACAATTCCTATAATTGTCGTAGTGGCGGTTGTTGATAATCTGGAACA -ATGGCCATGCATATGCAAACCGGGGAACACGTAAACGGGGACAGGAGCACATACTCAGGG -ACAGCGGGCGCGACCATGGCGCCTGCACAAGCGCGGTGGAATGAAAGATGAGGGTATCTT -AATTAATGAAATCTGCGTGAATCGTAAGACGTTTCAGGGGTTGCGCTGAGGGGAAAGTGA -AGAGTGATCTAAGAATACTCCAACTGTCAGTACTGCCCCCAAGGAATGGGTTTCAGGCCA -CAAGGCATTCAGGCACCCAGGCACTATTCAGGCCCAAAACACTTTCCCCGATTGGTGCAA -CGCGTGATAACGTGTGTCTGCCCGCGATAACTAATTGATTTGATTATTGCTGCCCGCATC -TATGGTCTCGGTATCATGCCCCATGCTGTCGCCTGATTTCTCTTTTCCATCTAATATTTG -AGGGACCCGCGTGGAGCTCACACCATGTCGAGCTGGGTCGCATTGAATGTTGAGCCCGAT -GAGGCCGTCGAGGAGGAAGTTGACGACACCAAGGAGCTTCAGATCGAGGAAGCTTTGAAG -CTTTACCACACTGCATTGAAGCTGCATTCACAAGGGCCAGACCACTATTCTCAGGCTGCC -GAGGCCTATGAAGCTCTTCTCAATTCAGAGATCTTCAAATACCCAGAGTCTATATCCGAT -TTCAAAAGGGGTGCCTTCGACGATACGCAGGATGAGATTGCTACAGAATCGACCGCCGAG -TTCAACGTTAACGACTCTACCTCCAGTCTGTTCCAAATGCTCTATCTATCCTATAAGAAC -CACGGCAAATTTGTTCTCGATTCAGTGCAAGACTCCATACGAACAGCGCCACGGACCCCC -GAATTTGAGCAGGAAATACAAAGAAAGTTGACGGAGGCATCCAGAAAAGCTTTAGGATCT -TTTGGGGATGCTTTGGAACGAGACGACTCAGATCTCAACCTCTGGAGGCAGAGCGCAAGA -CTCAGCAGTTCCTTGAACAGCTACCGCTTGTCTCGGTATTGCTTGGAGAGTGTCCTAGCG -GACGATGACAATCGCTTGGAATTACGATCAGAGCAGCTTGGTCTAGAGGAGATATTTGCG -CAGCAGCGCTTGCGAAGCACGCTGCAGTCCCTCTTCGACAAGTTATCTGCCTCGCAAATT -CCCGCCCAGAGGCCAAAGAAAGCTTTGTTGAAATATCTCAAACAGCATGAGGATCCATAT -CCCAGCCTGCCGGCACTTCCCGCCGATTTGCGCCGCCTAGATCCCTCAAGGGCCCCCCTT -GCCCTTCACGCTGCCCGCCGAGAGATAACCCCTGCTCACCCAACCTGGGAGTCAGTTGGC -AAAGCGCTTCTCCAAGCACTTGACGATGAAGATGATAACTCGCTCGCCGTTCCCACTCAG -TCTATTGGTATCATTTTGCCTTCCAGAAGCTCACAAACTCCGGCTACATCGATTGCAGAT -GCGGAGGCCACGGAAATGGCCTTGGATACCGAGGAGAGCGGGAATCAGGATGGTAATGAG -GTTGCCGATGTGGACATGCTTGAGACAACCGAAAACGATGCCAAGCCCGAACCAAACTCA -TCTCAGCCAACGAAAGAGCCCACCGAGGCAGGCGAGGAACAATCATCGATCGATCAAAGT -GCTGAAAAGCAATTGATGGAGTCCTTGGACCGTCAATCGACGCAGCCACAGGATCAGCAG -ACCGAGCAAGATGGGGCCCACGCAGAAGAAGTTGACACGAACTCTGCAGTGGATATCCGA -AAAAGATCGTCTGCATCGGCCGCGAACGAAGAGCCGGAGGGCGGCCGCATGAAAAGCCGA -CGAACCCGAGCCCGAGAGTCAAATGCCGACACTTTGGCACTGCCCGAAGAAGCTGCATTC -GACCAAGAAAAATACTACGAAGACCGTCTTGAGGTTTTTGTCAATGCCGATGATTGGATG -TTCAGTACTGTGGACTCTCTCTTCTCGAAGGTCGGTATTGATGCACTTGGTAGCATTGAA -GGCCTGAGAGAGAAGTGCTCCGGAAATGATTCGCCCAATACACCCCAAGATCTCGAGACA -CGATTATTTCAGGACTTCCGAGGTCTCATGAAGACATGGGATGACGAAAAGTCTCGATTG -ATGCAGCAGAAAGACGATTTATCTCCGCTGAAAGACATCCGTGGCACGAGCAAGTCAGGA -TTGGCTGTTTTCCTTGAGCACTCAAGAAAGGGTGCTCGGAAGCCAGTTATCGAGGAAGAG -CTTCCTTCTGGGGAGCAATTGTATAACTTTTCCAATGCCATCAACTCTGACTGGTTACAT -CCCCACCAAACTTCGTTTGAATGGCTAAAGTGTCTTCTCATGCCGGATTTTGGGCAAGAG -TCTTTAGACTGGTTAACAGCAAAGTCGACTTACACCTGTTTTCTGTGGCCAAAAGACTTG -AAGAAGACCGTCGTGGAACTATTAGTTCGGGAGGATGAATTCGCCTACCGGGAGTGCAGC -GAAATGATCGCCAACCTCGAAGCTCGAATCCTTGGCTCAAGCAGTAAAACTCCATTTGAA -TACCAGCCGAAAGACTTGTTCGAATTTAAGATGATACAAACCATCTATGAGCTTCATTTG -GATGTCTTTTCTTCGGTCGAGAGCCTTGACAGTGAGACAATCCAGGAAACAAAACTCGCA -CAGCGTGATCGGCTGGCTCGGTGGGGGATGCTAGCTCGGTCATCGATAGATCACTTTGTC -GACCATTGCCCATCAAGCGAATCTCAATCAAACCTCATTTTGCGCCATTTGTGGGCGTCG -GCATTCCACGTCAATCTTGCCGGTGACGCACAGCGAGAACATACCCTGCTCTGTCTCCAA -GACCTAAAACAAATACTTCAGTCCCTCGGCAATCCAAGTGTCAATCTCATCAACAACGCC -ACGATGTCAGAATTGTCTGCCGCTGTGATAGACCAGGAGGTCTTGAAGCTCAAGTGCATG -GACTTTTTCGCCAAGGTATTCAACTCAGACTCTGAGGATCCCGTGACTCTCATCGAAGCA -ATAGAGCCCATCCTTGAACCCTCGTCGATTGAGTATGCTGAGAGTTCAACGGAGAATGAC -ATGAATCATCCTACCTCACACCTCGGTGAAATGGCTGCCTTCCTTGATAGAGGTGATGCT -ACATTGCGACTATTCCTCTGGCGTCGACTCCAGGAAGCATATCAAGCTATTGATTACCCG -CCTAAAGTTGTCTCTTGCTATTTACGAAGCATTGAAGTGGTAATGGCAGAACTCGAGGCA -GCGAAACACAGCGAAGAAACAAGCCAACATCGTCAAGTAGCTCTGCTGGGTTGGCTCAAA -GCCCTCGATGGCATTGTGGCCAAAGCTATACCACTTATTCTTGAAGACTCAGAACAAGCA -TTTGATTGTATAGATATGGAACATTTGCAAACCTCAATGTCTGCTATTGCCCGCCTCGTG -AGGCTGTTGCACAGCTTTATACTCTATGAGGACTCGGTACGAGTAGGCCAGATTTCTGGT -CGCGACTTCCGAGGCTCATTGGCGAAATCGCTAGAAAACCTGAAAGAAAGAATGCGAGAG -CTATATGTTCGCTGCTGGATCTTACAATACACCCTCTTCGTGGAAGCGATTGCCCAGAAC -AAGGAGCTTTTCGATGATCCGCTTGAAGACCGCATTCATTTCCTTCGTTCCGTTCATAAT -GCATTAGGCGTTCGATCGATGTGCAGATACTCGCAAAAGAGGTTTTTGAAGCTTCTGAAA -GCAGAACTGTTTGGTCTTGAAACCAAGGGTGATTATGAATTTGATATGTATCAAATACTC -CTCGATCTCCATGGTATCAAATTCTCCGCGTTTGATGGGACTATGGACCATGGATGTCCC -CCCGAGAAATTAGATCGCCCTACAGCGATCATGATGATCGACTTTGTTCTGCAACAAGCA -AACAAGATGAATATGAAGGACTTGTCCAAGTCCGAATTGAAGACTACCATTGAGAAGATG -CAGCAAGCGATTGGTCCTGCCAAGGCTTCTTCGCAAACACCCCAGTTGACTTTCAATCGT -CGCCTTGTGAATGCATATCTCAAAGCCGCGCTCAACCCATCCAATCTCCTCCGTGCTGTT -CAGGGTATTACAGAGCTTTCGACAACAATAGTTCCAACTGAGAATGCGAAGATCGCTGCC -AAAGGGTGGTTCTTCCTCCTTGGCCATGCCGCACTCACCAAGTTCCGATCCTCGAAGCGC -CTCAGCCCTGGCCCAACAAGCGAACTTGATGATGCCATCGGATTCTTTAGACAAGACTTG -GATCATGGAACAGGGAGATGGGAAACCTGGTACAGGCTCGCCCAAACATATGACTCAAAG -CTTGATGAAGACATTACATGGACAGCTGACAAGATCAATAACAATCGTTCGGACTTGGCA -GCATTGCAGCGAAATGCTATTCACTGTTATGCCATGGCGGTTTCAACAGCAATGAGAACT -GCAGAGCCAACGGCAGAGACCAGAGAAGTGATGTCAGATCTGTATTCGGACTTCGGTATC -CGCCTGTACTCGTCCTCTCGCGAGCCCTTATCCATGGGAGCCTTCAGTGTTGCGGACTTC -CCTCGCCATTTCAGCAGCGAGGAGAGCCAACAAATGTACAAGGGAACGCCTTTCAAAGAG -ATGTCGCTTTATTCAACATGGAACTTTGCTAGCAACCTTCTCAAACGGGCAGCTGTCGAC -AAGTCTAAGCGATGGATGTAAGTGATTTCAATATTACCCGACAAAATTGAGACATTGAGA -GAATTCTGACATATTGGTAGGACTCACTACACGCTTGGCAAATGTCTTTGGAAAATGTTC -ACCAGCGACGATGCTTTGAGAACCACTTCTAAAAAGGTTGAAATGCAAGAGGTGATAAAT -TCTATGCTCGATGCTATTGCTGCGCTTCCGCAGCGAAGAGACTCACGCTCTGACCCGATC -TTTGAGCCACATTTCAAACTCTTCTCTTTTGTACACAAGCTTGTTCTTCGAGGAAACATG -ACGGTAAGTCTATCACTTCTGATATGAAAAACTCATTCTGATAGTCACATTAGCCTACCG -AAGGAAGCAAGACCCTCCTTGCAACTCCTTGGGCTCGTAAGATTGACCCTCCGGAGAATA -TGGAGGGATGGAACCCATACATACTCGAAGTCCTTCGAAAATATAAGAGCGCTGACAAGT -CCAACTGGCATCATCGCATGGGTGTAAAGGTAAATCATCCGCAGCTTGATTCTCTTTTGC -TCATAATGCTCACAACCCCTAGGCTGCGCATGTCATCTATGACGACCAGCGAAACGCAAC -CGCTGCTATGGCTGCCAAGCAGGAACTTTCCCAAATATTCACCAAGACGCTTACAATTCA -AGTCTGGCGGCCAGAATTTGAACGTCCCGGTCGACATTTCGTTTATACAACCCGCTACGT -GTACTTCTTTGTTGGCCTGCTTGACCAGCTTGACGATCGCGCAAGCTTAGATCAACTTTT -GCGCCGCGTGAGAAAGAAGCAAGGAGATTTCTTCAATCACACCAAGCTGTGGGAGGATAT -TTGCCTAACTTATGCAAAGATAATCCGCCGCGCAGCCAATATCAACGAAGGATATGAAGA -AACCATATTCAAACCAATCGGGTGGGAAGAGTTCTCTACTAAGACGGCTCGTTTGGAAGG -CCTACCTCACCTGGGTCCAGAGAGCCAGGCCATCTTAGAATTACTTCGAGACGCCCTGGA -GCTAAAGAAGCTTAACAACAACCTCATGAAAATTACCATGTTCGAAGACCTGATCGCCGA -CCTCTACGCCCGACTCTACGAGATCAACACACCTCACCTTGTCGAACAAGCAACCGAGGA -AAACAAAGAAAAGATGAAAGTCGACCATCTCCTCATGGTAGGCGACGGCCTTCCTGAGAC -TTCAACCCCtccagcatccgttccagcatccgttccagcttcTGATACTCCAGCACCCCG -TGGTCGGACAAAGGGCATCGCCCGTCGCGATATCCAGAAACGCGCCGACACAATCGTGAA -CACGAAACTGGCTCCTCGGGCCGTGGCGAGTAAAGTAACCGCTGCAGGCGAGACGGAGCC -ACCTGCGACTCCGCAGGGCGTTGAATCGACCGCTGCGGGGCCGAGCTCTGCGCCCCGTGG -TCCCGCCAAGCAGGCTTCTACTACTGGGGATGGCTCAGctgagcagagagatgagcatga -gagtgcagatgagactgagaATAGCGAAAATGAAGACACGAAGCTAGTTGACGAGGGGAC -CTCGGCGCTTTTCCCTCATGTGACtgatgcagaggaaatggggacgggTGATGAAGGTGC -AGACGAAGTGGATGGTGAGGATGATGAAGGCGGTCATGAGAATGAAGGCGAAGAAGGGGA -AGGTGACGGTGATGGTGATGATGACGGCGATGGTGATCTTGGTGGTGATGGTGAGGGAGA -TGAGGATGAGGAgatgcaagatgaggaAGCAAAGTTGGAAGAAGACAAAGAAGGGAAGGA -AACTCTCACTGCCGATCAGTCTGCCAACGACACTGAAACTGAGCATGTGGCTGACAAGCG -AGATACCGGGGAGCAAGATGCCATGGACATTACACCTTCTGAGCCCTGAGGAATTTCCAT -ACATCCAATGAGGGGCTTGCTTGTTTTTTTTGTCTTTTCGGTTTATGGAGCACATGGGGT -AAAAATCTCAAGACAGGAGTCTTTTAAACTACAGGATGGCGTTGAGAGACAGGTGGCTTG -GATAGCTTACGAGCTCGCGTGTACAATATTCACAGCATTCAAATCTGGAATTGTCAATTG -AGAAATGAATATCAACAATTATTGAGAACAAAGGATAGATTGCCCCCAAACGCCAGAGCC -AAGGGATATCATTCCATATTAACCAAAAAAAAAAGATACACCGGGAAATTAAAGGCTTTA -GTATCTTAAGCAGCGCAAGAAGGGTAAGTCGTAACAGTCATGACGGAGTAGAGGAAACAA -GACAATTTAGATCTGGTCCTCTGGGGCGAGGTCCCATTTCCGGGTGCCCATCTTTCTCGT -CTTCTTGATAGGCGAGGCTCCTGGTGACGGAGGAGCAAACATGGTATCTGCGAGCTTCAA -TTGATCCATGCGTTGCAACGCATTCTCAATATCGCTCTCGCGGCTGAGATTAGCCCGTGC -AGGCGTGGCATTCTTGGATCTATGCTTTCTTATCGGGGATTGATTGTGCATCTCCCGTTC -AAGTGCAGCGAGTTTGCGGCGAAGCTGGTCATTTTCTTGCTCAAGCTCCTCCACTCGTTC -GTTTGATCCGGAATCTTCAAAGACTGGAGGTAGAGGTCAGTTCCGGGATTTACGTATTTC -GCTTCTCATGGGGCAACTTACTTTGGAATCCACGGGATACGAGCTCAACCTTCTTGTCGA -TGTGCTCGTCATTGCGACCAATCTGAAAACCATCAGTATTTGACAAATTTGGAGCTGATC -TAGGGCTACAAACCTGCTCATCCCAAGCCCTCTGCCAGCGTCGCCTTTCTTCTTCCGTTT -TCTCATCCATCTCTGCCCAGCATTCTTCTCGCACATCCTGCTCGATCATGAGACATCTTT -CTTCGGCAGCTCTCAATCTCATTTCAACATCAGCCCGTGCAATCTCTTCTTCGGCTAGCT -TAACAGCAAGGCCGTGGCAATCGCGTGTGAGGCGCGTGATCTCGGCAGTAGCTCTCTGGA -GCTCTTCGGTTGAATCAAAATTGGGCGAAGTGCAGCCACTGACTGAGCGGTGACTGCCAG -AGGCGACAGACAATATAGATTCACAGACCGAGGGAACTCTAGGAACGGTAACCTCACGAG -CCAATGCGGAGTAACGCAAAATTTGGGATGTGGCGTTGAAATCACCCAGAGGGTCAGCGG -TGACTATCATGATAGCCTTTTGGGGATATTGGCCTCTGGACATCTGATGAGACGACGGGA -ATGAATTTGAAAACAGAAGCTCGGTCAACTTGCACTGCCGGTAAGGAACGAGAGCAGTCT -TTGTATGTTAGCAGCTGCGAATAAAGCAATCAGACGATAATTTCAAAGCATACCTTCATT -CCATCCTGTATGTTACTTTGCATCTGAAGGCATTGGCCGAGATACATCAAGCTTTCGTTG -ATCTTGCCGGCCTCGGCCAGTGTAGCACCCGCTGTCTTTGCCGTTCTCGCACGCTCGGAG -CCTAGATTAATTAGCGGCAGCCTACAGCAAGGATGCAGCATACTTACCAGCAAGATCCAC -AACAGTGAGGGCGTTCCCAGCCCAACTGTATTCTCCGGTTCGCTTTGAGTATGTCCGCTT -CTTGACTTCCAGGGAAAAGAAACCATGGCTTCGAGAGCTGACGCTATTCATTCCGGTACC -AGTTACCTTGCGTTCAGTCAAACCAACGTCCAAAATGGCCAGCGCTTCGTCGTAGTTGCT -GCAAGCGATTTTGCGCAGCCCAGCAACCAGTTTCCGATCTGGTGAGCCCTCGGTAGGCTT -GAAGAGCAAGGGACGTCGTCGATCTTTTTGATTATTTCCGCGAGACATATTACCCTGACC -TGAGACGATAGAGGGAGATAGAAGATCGAAAATTCGGTCATTGTATACCTCATACATTGA -CACCAAAACGACATAGTCGGCATTTGAATCCATATCCACAGTGAGGTGGCTCATATCGGG -CAGCCTAGGAAGGGCGCATGGTCGTTCACGTACGTGACGGCGGGGGAAATTCAAGGCAGG -GGTCGGTTCCTTAATGACATAAAGACGAGATTTAGCTGATTTGGGCTCAAACAGATTGGA -GTGAGTGTAGGGGGTAGGATGAGAAAATCCTGGAGTGATCCATCCAGAGGACTCAGTCGA -GATCTGGCCAAGGCCAGCTCGAGCAGCGCCATTGGGACTAGTAATAAAAGACAAATCCGC -CTCGCGGGAGCTTCCAGGCTTAGATCTCGGCCTTGTGAGAAACGATCCAGGGGTTCGTGG -TAGTGGGGTCGGGGATCCTTGGGGGGAGTTCAGGGTAAGGGGGCTTCGTACCATCAAAGG -AGTATTGGCACGAGACGGGCTCATGGGTGTTTGCGCTCTCGAAGTTCGTCCGTTTGGCTC -GCCATATACAGCTTCCAGGAACGTCTGGGCGGAGAACAGTTGCGCTTCTGAAGCGTCCGC -GGCAGCCAGTGAGGCAAGCATCACGGGAGTAATGCTGCTATCTTCGGTCTTGATTGTTGG -CTCTAGGGAGCGGAATATAACGTCAAGTGCCATTTGGGTCATTCCTCGTTCGGTTTTGGA -TCCTAGAATGGTGTGGCTCTATGCACGATCCACCAATTAGCTTGGTGGTCACGATGCTGC -AATATTCGAGATCATACTTTTCCGCTTCCTGTCACTCCCAGGGTAGCGACTAGCCCATCT -CTGTTCTCCTTCAGAACTCCACGTATGATAGGTTCCATACCAGTGTCATGGAAAACATCT -AGTTGGGAGGCAGCCTCCTCGAAGACCTTTGTGAAACCAAACCTCTCGACGGCGCGCTTT -CTCGTGTCATTTGGCGGTTGAAGAGTAATGTGGGTTGGTCCCGGCGAGACGATCTCTGTT -TCCTCCTGTGAAGCCTCTGGCGGCTCAACGTTCAAAAATCGGTCGGTTTCATTGGGTTTT -TGGGCCATCGGCGGCCGTAACCGGAGGTACACTTGGAATAGCGATGCCTGGGGCTGTTTG -GACGGCATCTTGGGTGGCGCGTGAGTATCTAGACAACACAAGGACAGGTGACGTTGAGCC -TCTCAAGCTGTGGGGTGGATGCAATATGCAAAACAGCTCAGCAGTCAAAGGCAAGTCCAG -AACGTCCACAGATCAACATTGCGCTTACTGCTTTGGGCTGTCAGTTACCCAGACAACGGG -TATATCACGTGCTACACGCCTATACCTCCTTCCAAGGTTGATACCGATGTGTGTCATTGG -TCGTGGAGGTTGCTTTTCTATAAGGAGTAGTTTCCCTGACATATATGACTCGACGGAGTA -CTCCGTACTTTCTTCACATATACTTGATGTTGAAGTAAATTGTTTTACGGCCCTGTTCGC -CCCAAGCGAAAACGAGTTACTTAGAATAGACTAGCGTGACAAGAAGGTTGGCAAATAATA -AGTCAAGGCTGTACGAAGTAGGGCAGTTCGTATTAGCAGTCGGCATGGCGAGGCAAATAC -GAGGTCATTTTAAGCCTAAGCGTTGACCCGCTATTCCCAATGAACCAGTGACCCTCTGTG -TTTGATCATTCTTTGTGGATCCAAACTGAAGAGCTACCCAAAATAGAGATCATAAGGCGT -TGGAAAGTTAGAGAAATACGAAATTGCCTAAAATGAACACACCGTTAATCAGTGGGCTGA -ACAAAGGTTTGACATGCAAATCATCGCCCGAAACACGTCCAACTCTTGTGTATATATATA -TCTAACAACCTGCCAGCAAGACACTCATGTTCTTCATGTTAGCATCAGTTACATCAACTA -CCCAGTAGTACTTTTGTGTGAACTCGATTACTTCACCCGGCAATATATATGGTCAATCTA -CCCCAACCATGCTCAAAGCACGGTCATCATATCGTAAGTCGTCTACTAACGCAAGCACGA -TCAAGAGACAGTCTGACAAAACAAAGAAGAGGGAAGCCTTTGGTTCTATGCACCAAATAA -AGGCGCTGCAATTGCCTTTGCGATTCTCTTCGCTATCTCTGGCGCCATTCATGGATATCA -ATGCTTGTATGTGGCCCCTACCCACTAGGAACCCGTGAAGTGACAAGATTTTCCAGCAAA -TATAAATCCTGGAAAGTCACCGGCCTTCTTCCATGGTCTGCCATCTTGTTCACGGCAGGG -TTTATCACGCGAACTATTGGAGCGTTCGGGCAATGGGGAAGTCTAGGCGTCTATATCGCA -AGCACTGTGCTTCTCCTTGCCGGACCGTAAGGTGTACCCATCCTATTATGAGAGGTGCTA -CTAAAAACTGATCTTCGATTATACTAGGCCAGTATATGAGGGTGCTAACTTTTTCATTTT -GGGTCGCATACTTTACTACATCCCATATCACTCCCCCATCCACCCTGGTCGCGTATTCAC -GACCTTCATCGCCTTGGGTGTGGCCATTGAGAGTATTACCGCGAACGGAGCGGCTCGAGT -CGCAAGTGCAGACTCTAGTGTCAGCTCGCAAAACACCGGCAAGGCGCTACTGAAGGCGGC -GCTTATCATGCAGATCGTCCTAATGGCAGGATTTGTAGCTTTGGCCGGCCGATTCCACTT -CAATTGCTCACGAGGAGGCGTATTGAACCACAAGATCAAGAATGCGTTACTGGTGTTGTA -CTGCAGCTGTACCCTCATTACGATCCGTACCATCTATCGCACCGTGGAGTACTTTACTGC -CGCAAGTCTGAATGCCTACACTGACCTCGAGAACATCAGTCCTGTATTGAAGCAAGAGTG -GTTCTTCTGGTTTTTCGAGGTTGTGTTCATGTATAGCAACACCACTTTGCTGAATGTCTT -TCATCCAATGCATTGGCTTCCGCGATCAAACAAGATTTATCTCGCCGAAGACGGTGTCAC -AGAGATTGAGGGCCCTGGCTATGACGATCCCCGTCATTGGCTTCAAACTGTTGTCGATCC -ATTTGATATTTACGGACTGATCGTTAGCCGAGGCAAGAAAGAGAAGTATTGGGAAGCGAG -TCTGACTCAGAGAGAGGATACCACCGCTAAACTCACTCAAAGGGTATGAGCGCGTTTTGT -GTCGGCACAACTTCTCTTTGGAGTTCAGAAATTAGTTTATTTCTTACCTTTTGTTTATTT -GGCTCTCATGTATTTAGTATCTCATATGACTCCTAGAATATTCCCTTGAAAATCGCAGAT -ATATATATCAAACAACCAAAACTCCATAGATCACCAATCTTCCTTTTCTTTACCCAGCTC -CTCCTGGATTTCCCAACTATATCGAGCAAACTTGCTCGCTCGACGAATCCGCTCTCCATA -ACAAATAAACACGGGTGGTACTATACAAAATGCAGTTGCCACTGCAGCAAGTACAGACAT -AGCATTATTATTTCCCAACCGATCAAACATCTGCGTCGTAAACAGCGGAAAGACACCGGA -CAGCAACGCTCTCATGAACGCAACAGCAGCGGTTGCACTGGCAGAATAACTCAGATAGGA -ATCCGAGATATACCCATACAGCACGGTATCCATTTCCGTTAGCGCATAGCCCACCAGAAC -GAGGGAAATAGTGGGCACAATCCACGGAGTGTGGGTCTCGGGCGGAATAGTCCAGGCAAA -CCACCATAGCCCAATCGCCAGGAATGGGCCACCAAGGCCTAGCCCAACGAGCTTGTATTC -GGGCTTGATAGGCCGTCCTTGTCTGCGGAAGTAGTTGAGAATGTAGCCGTCTAGCACGCG -AGTCAATGTGCTGAGACAGGTTCCCAGGCCAATAGCGATGAACATCAATGAAGCTTCGGT -TTTTGCAAATCCCATAGACTCGTAGATAGGCTGGAGGGCTTCTGTAAAGATGTAGATGAG -GGACATGGCGACAGAGATCATCATTGCAATGACGAAAATGATGGGCTCTTGGAAGAAGAG -TTGGGCTGGGCGAAAGAGGGATTGTTTCAGAAATATATTGAGATCCGGGATATGGTCATG -GTTGAGTGGCGGTGGAATTGATAGCATAGTAGTCATATCGGTGACTCGTTTCACCTCGCA -GGCGAGTAACAAGGAAGGTCGAGATTCCCGAATAAAGAAGAGACCACATGTAATTGCAGC -AATGATGATAGCGAAGATATAGAAGTTCCATCGCCTGGGTAGTTGTGAGAAATTGACGCA -GACCATAGTTGAGGGCTACGTACCAATCAAGACCTGCAATAATATAACTGCTCATGATGG -GTCCAAGGATGAGACCAATATTTGAAGCAACTGTCCAATAATACATAGCCCATATTCGAG -CATGCGAATTGAACATGTCCTCTATGCTACCGCCAATGATGGTATAAGGGATTGCGGAAA -GAAGCCCGGCGAGAATGCGCATGATGACAATAACTGGCATCGCACGCACGAGACCAATAA -TCATGCAGCAAATGGCGCTGAGACCACTGCTTACAATGTACATGTCCCTGCGCCCAAATG -TCTCTGACCACGGGGGAAATAATATGGTGCCTGCCACCTGGCCGAAAAGAAATCTGTCGA -ATGTCAGCTACGCAAAGTAGACCAAGACTAGCTGATCTCACATCGTGACAAATATGAAAA -TTGAAAGCGTTTCACTGATATTGTATTCATGTTTGGCTTGTGCAGCAGCTGCCGACTAGA -ATAGACCAGGATGTTAGCTCGGCAAATATTGTAGACACGCATACATGGATAGCTGTATAC -ACTCACACCTGCGGTGCTGGACGCTGTTCTGTAATGATTGTTAGCTTTTTGGCAAATCAA -CGTTGGCATCTAGTCTGGTCATACATAAAAAGATCCAAAAGACAGACAAGGACAATATCA -TAAGCCTTTCGTGGCCCAGACCAATTTCGAGGATGTCTTGGATGGGTTTGCAGCCATCGC -ACATATTTTCCATCTGGGGTAAGCTCAAGACGGTGGTCACGCAGGAGCTCAATAGCTGAC -GCTTTGGGCTCGTCTGTCAGTAAGGTCGATTCTGAGGAGATAGACTGCATATTGGGCAAC -CGTGGAGTAAATCATATAGAGATGATTCTGAATGTTTGACTGAGTCACCGTGATCTGCAA -TACGTCAGCCTTGAATGAAGCTGGTCTGGAGTGGGATTCGAGCCTTTATCATCTTCGCAA -ATATTCCTGCCCGGACATTTTGATGCAAAACCTTGCCCCGAGGAATGAAGCCCCTTGCGA -CTTGCGCAGTGGGGATCTTGGTACTCAGTTGGCAATACATATAACGCGTCAGTCACCTAT -ATTTTCCCCAGGAATCTTAATATGTAATTACCGAATGCCGTCTACAGATAGGTGGATTAG -CTGAAGCCAAGTCGAGCTTCGTGTTTCGAGATCAAGTGTATACCGTGAACAACCTAACAT -CTATCTAATTGGCTATTTATATAGCTCAGAGAATGATGACGATTTTAGCCACGAGACACA -ATCTATTGGCTTAATTCCTAGGGATAGCTTTCTTCAACATTCTTCATGGTGATAGCTCTG -CGCACCTCAAAAGGTGCAACTCGTGGAAGAATCAGAGAACAAGGAATAAAATCCACCTGC -CTAAAATGTCACAGTATTTTGAAATTAGCTATAGTAATACTCAAAACGCTATGCCCGAAT -CCCAGGGAAAGGAACAAGCTGTGATTTCCAAGGATATCCGGGGAGACGGGTGACCCGCTC -CTATATAGACTGCTAGGGCCTAGGATTTGAATGTCTTTGTTAAAACATAACATACCAAAT -ACCACCTTCAAGTCTATAGGAAATCCAAGGCTGCAGGGCGCGAGAAACAGGCACACCATA -TCATTGGCTATAAATGCTCGCCCTGATAGGACAGTTCTGAAACGGACATAGGATGGTCTT -CATACCTTCACACCAATCAAGCCAACAGTATCTCCACTAGAAAATTGCTTTCATTCAGCA -CATTATCGACTTCGAAGCTACTGACAACCCTTTCGGACGAATTGCGATCTGTGACCACCA -GGGTCCATGGAGGCATGGAGACCAGTGAAGAGAGCACACAGATCCACTCTTGCTCTGAAC -TGTGGGATCTGATCCGTTTGAATAGGATATCACAATTGGATACATCTTTCTCCAGTCTCG -CCCATATGCTCTCACAAAACCTGTAGAGCACACAGATCCCTTTGTTGCAACAGATAGGGT -CCCCGAGTACCCCGACAAATTTTCAAATATGAAAGGAAAATCCATCAGATAATTGATTTG -AGAGATTCTCGATTTCTGGACTAGCTAAGACCCGATACCTGAACTCACCTGTGATACTCC -CATATACACGGCGGTTATACCTGCAAGGCGGAAAGCCAGTGGATCTGCGCCAAGGCGATC -TCCGACGAGGCCACCTCTCCGCACCCTCTAGCTCTATCAAAACATAACCCAACCAACCGA -ATTAGCTGACCACGATACGTATGTTGACATCCACCATTCTGCAGGAAGAAAAAACGAACA -TAGTTTCCTCGGCAGTATCTCTAACCAAGAACAGTCACTATTAGAGCGACCACCACAACC -CTCCCGTCTATCCGTTGATCGCGAACCGCCCACCATGCTATCCGCTCTCTCCTCCAGCAT -TACAAATGCTCGGCAATCAGTCGCACAAGTGCTGAACTTTGCATTGGTACTTTCCACAGC -TTTTATGATGTGGAAGGGCCTCTCGGTCGTGTCCGCATCGAGCTCTCCAATAGTTGTGGT -GTTATCCGGTAAACAAAGCTATTTCGCGGTGGTCTGAACACAGCTCACATCCCTTTCCCA -CAGGTTCAATGGAGCCCGCATTTCAGCGTGGCGACCTGCTTTTCCTCTGGAATCGAAACA -CGAGGGCGGAGATCGGGGAAGTTCTCGTTTATAATGTTCGAGGAAAGGATATTCCAATTG -TCCACAGAGTCGTCCGCACTTTCCCCCAGGTTGAAGGACGTGTAAGCGCAAGGAAGGTGA -AAGAAATTACAGTGTATGTCCTTGCCCATCGCCCGGCAACCTTTATTGTGGCTGCTTCTT -TATGCTAAAGAGTATCTAGGGACACAACACCTAGCTCCCATATGCTCCTCACCAAGGTAG -GCGATCTAACAACTTGACTGGCCTAGAAAATTTCTAATAATTCGCTAGGGTGACAACAAT -TTGGCAGACGATACTGAGCTGTATGCTGAAAGTAAGCGATACGAGAGCGTGGAGGACACT -TCCGAAGATTACTGACAAGCCTAGACCAGGATTATCTCGACCGGGCGCAGGACATTGTTG -GCAGTGTTCGTGGCTACATACCTATGGTTGGATATGTTACCATCATGCTTAGTGAACACC -CTTGGCTGAAGACAGTGATGTTAGGTCTTATGGGACTGATGGTGATGATCCAGAGAGAAT -AGTGTACTATAATATATATCGGGAATGGTGGAAATGGAACGAATTCAATGTTTAAGGAGT -ATCAATGGATACTTATCAATTTCATGTGTTATGTCCACTATGATGTGCGTCCATAAAGCG -GCAAAGATATCGAACTTCTCCACTGACGTCTTTTGCCGCGCACCCCACTCGGAGCTTCTA -GCTTTGACTAAAAGCCCCTTTGTTCCTCCAGCATCTCTCGCAAATAGTGTCTGTTCATTA -CTACTCAATAAATCCCAATTTTTGTCACTGCCTTGACATATTGAAGCGCGTCTTGCAATT -AACGCGTTTGCCTGCATCCTCCCTGAACCTCCCCGCTCTTATCACCCCGCCAAATCCACA -ACTGAACACCACAATGGGCAGCATCAAGCGTATATCCAAGGTAAGTGCCCATATATCTAC -ATAATTATCATCAGCTATCAGTGTACTAACCATATATTTCAATGATAGGAGTTGACTGAG -CTCACCGAATCCCCCCCAGAAGGGATCACGGTTGCACTGTCTGATGAGTCGAACGTGTAC -GAGTGGAGGGTCACAATGGATGGGCCTGAAGGCTCACCATTCCACGTATGTCAAAACATA -ATATCTCCCTCCTTTTTCCTCATCATCTTCCACCAAGGTCTCCAAAAGGACATCCCCAAA -ATCGAACCTCACAGTTCAAGTCTAAGAAACAGTGTTCTCTAACCCCGTGACAATGCAGAA -TGGCAAGTTCCTAGTCAAGCTCTCCCTACCCACGGAATACCCATTCAAGCCACCGACAGT -CTCCTTTGCAACGAAAATTTACCATCCGAACGTAACAAACGACGAAAAAGGAAGCATGTG -CCTAGGCATGCTGCGAGCCGACGAATGGAAGCCCAGCTCGAAGATCGCGGCAGTCCTGCA -ATTTGCCCAACAGCTACTATCTGAGCCTATGCCTGACGACGCCGTCGAGGGGCGAATTGC -TGAGCAGTACAAGAACGACCGGGCGCGCTACGAGGAGGTTGCGCGGGAGTGGACTCGCAA -ATACGCTTGTTGATAGTGATGCTGTGCTCTGATTCGAGAGCCGGCTGCTCTATCGAAATC -GATCTGTGTTGTGTATGCTGCTGCCCTGGAACTAGGCTCGCAGAATGGCCGCATGTTTCG -AATCTTGCCCGCATCATAGATCGAGTCATACCTATCTGAGAGTTAGCCCGCTTACGACTG -AAACGCTGTCATGTTATTATTGTAAATGGCGGACCTGTGAATTACTGGTGGATTTAATTT -TGCCGTTTTTTAATCTAACGGCGTCATACATCGTTCTGTAATGTCTACCCTCTATTGCAC -TGGCACAAAAGTTCGCTCGGCCTCAGGGGTCTCCTGCTTCTTTCCTTGCGCTTCCTAGGC -GTGTAATCCGGAGCTGATGCCGGCGAACTACGGCTTGCTGTTGGGAAACCAGCGTGCTCT -CGCAATTCGGATAGCGATATAGATTAGAAGCTGGTTAGAATGATCTCGCTACCTATCCTT -GCTCCACATTTTGAACTGACCCACTCGGTATGTCCGAGTATCAATGTATTGATGAAGTCC -TAATCCTGAGATTCAACTTGCTATTCGCCTGAATCTTTGAGCCTCCGATTAGCAGTTCAA -GTATTCGAGTCTCTCAACAGAAGTCAAAGAGATATGAACAACCACCCATCACCTCGCCAT -TGAGAAACAAAGAGCCAAACCATAACAATCCCACTCTGTAGAGCAATCAAGTCAGCAACA -ATACACAATAGCAAATGATTCGACACTCAACATACCAGAGATACCCAATTCCCGATTTAA -AGCACAATAATAATCCTCCTACTACCAGGACGGGTAATCTCCAAATCATCAGCCGAAGGC -CGAAACCCATGCTCATTTAGAGTCGAACGATTAGACCTCACAAAGGTTCGTTTCATATGA -CGGGGCAATCGATTGAAGTCCGCCACGATGCTCCGGTGCAAAGATGCCACATTCACCTTG -GCCTGTAGCCAAGATCCGCGGTGGAGATTGAGCTGTGCGGGTGTGGTTGACGAATTGATG -ATTGCCAACATGGTTCGGTCATAGATTCCAATTGCTGTCTTGATGTAGACTAGTTCCTCT -AGCTCGCCGCCGAGGTCAAGCAGCTGTTGTTCGGGTGGGGTCGTTAGTAAGGAGAGGTAT -AGCTGATTCGTTTATACGTACCGCTTGTGGGATCCGGATTAATTGGGATGGTGGCATGGT -GGGTTAGGGAGCTGGGGTGATTTCTATTTGATAGAGGTCTGTTAATATGGATGCAGATGA -AGGTGGGAGGATATAGGGTTCATACTAATGTTTTGGGTTGGATGATAGAGGTTAAGGCAT -GAGGAATGAGAAAGGGTAAGGTGCTGGTTAATCAATATGACGAGCGCCTGCGATAATAAG -AGAGATTTAATTGAGAAAGACATGAGAGAAGCCGCATGGTTTGCCTATCGAGTGAACCAC -ATCGTGTCAAGAGCTCAGGGTTCGGGGCTTATTTACAACTGAAGAGGTAGGAAACAAGCA -AACATCAAGTATATTCGGTGCTCTACCAGCATCTATGGATAAGACATATGTAATTAGAAG -GTCGAGTTGATGGTGAACAGATGAAAGTGTACAAGTGAAATCCCAAGGAAGAATACTACA -TGATCTGGTCCGTGATTACCTCGAACATTGTTTTTGGAATAGAATTATCAAACAGAACGT -TACAGTTCATATTCCAGAGGAGCGGTCATAAGGACCGTGATATGGACATTCATTACAGCG -TGTCAAAATACAACGCTACATACAACCTGGACGTCCACATCCCCAGGCACGTCCAACGCT -TTTTCGGCCGCATGAAGAGAGGATATGAGGGTAATCAAAAAGGGCAAAGAAAGGGAATCA -AAGATGCGACATCACAGAGACATCAGCATTTACAGCTGGCCAGCAGTGTTGAAGTCATCA -AGGGCCTCCTTGACACGCTTGCTCATGGTCTTCTCACCCTCGCGAACCCAGACGCGGGGG -TCGAAGGCCTTCTTGTTGGGCTTGTCAGCACCGTCGGGGTTACCGACCTGGGACATGAGG -TAGTCCTTCTTGTTGAGAACGAAGTCACGGACACCGGTCAGGTAGGCGTACTGCATGTCA -GTGTCGACGTTGACCTTGACGACACCGTAGCTGATGGCCTCCTTGAACTCCTCCTTGGAG -GAGCCGGAGCCACCGTGGAAGACGAAGAAGACGGGCTTGTCGGAGGCAGACTTGATCTGC -TCCTTAACGTACTTCTGGTGCTTGGAAAGAAGCTCGGGGTGGAGCTTGACGTTGCCAGGC -TTGTAGACACCGTGGCAGTTACCGAAGCCAGCGGCAATGGAGAAGTAGGGGCTGATGGGG -GCGAGGGCGTTCTGGATGGCCAGAATGTCCTCGGGCTGGGTGTAGAGGGAGTTGTTGTCA -ACAGACTCGTTGTTGACACCGTCCTCCTCGCCACCGGTGATACCGATCTCCTATAAAAGC -TGTTAGTACTGCCTCAATGCAGTCATTGGCCTTTTGGAGTTGACATACCATCTCGAGCCA -CTGCTTCATGGGGGCAGCGCGCTTGAGGTAAGCGGCAGTGGTGTTGATGTTGTAGTCGAC -GGGCTCCTCGGACAGATCAATCATGTGGGAGGAGAAGAGGGGCTCGTTGTGCAGCTTGAA -GTAAGCCTCATCGGCATCGAGCATGCCATCGAGCCAAGGGAGGAGCTTCTTGGCGCAGTG -ATCGGTGTGAAGGATGACGGGGATGTCGTAGGCGGGGGCCACGCTACGGATGTAGTGGGC -AGCGGCAATGGAACCAGCAATGGAAGCCTTCTGGCCATCGTTGCTCACACCCTTACCGGC -GAAGAAAGCGGCACCACCCTGGGAGACCTGCAGGATGACGGGGCAGTTCTGGTCACGAGC -AGCCTCGAGAGAAGCAACAACAGTCGAAGAGGAGGTGACGTTCTAGGAAGAGTTAGTATA -GGATCACGACTGTATAGTAGTCGAACTTACAATGGCGGGGATGGCGAAGCCCTTCTCCTT -AGCGTACTCGAAGAGACGGAGGACATCGTCGCCGACAATGACGCCGGACTTGCGGTTAAG -TTGCTCAAGAACACCCATTATGAAGTTTGTACAAAGATGTAGAAATCTGAAGAAAGGAGA -AAAGGAATTAGAGAAACTGTAGTATTTAGAGAGCTCCTTTACCGGGAGCTGAAAGCGGGC -AGAGTTAAATAGAGGGAAGAGGGGCAAATTGGAGGGGCAAGCGTCATCGGTGAAGGGAGA -AGAGACAAGGTAGGACTCACCGGTGGAGATGTGTGGAAAAGTGAGGTCAGTGTGATTTCA -AGAAAAGAAAAAAGAAGAGATGATATAACCTCCTTATGTATGGCTGTGCTCCCGACCCTC -CCCGTGATGGCCCGATGTTGCCGTTTGAGGTCTCGGGAGCTCCTAGCTTATAGGGCGATT -CATTGGGGGATTATGAGGGGATTTTTTCCTATATCATGGGTTGCTGCTGCGTAGTTCTTG -ATCAAATCTATGACTGATAATTGATAGACAGCTAGAACAACTATAAAACCTATAAAATCT -ATACAGTAACTCCCTAGAACTCCCAGTGCCCCCGACAATCCGTCTTTTGTTCGTGGCGGG -GGTGCCGAGGAGTTCGAGGCCATTCGCTCCAATTCCACAATCCCCTACGATATGTTAGTC -ATATATTCAATCCGGTTCATAATTAGATATTCAGGTTTATTTGTCCTTCATGCTCCAGTT -TATTAAAAGAATGTTAGAGAATGTTGAATAATGTTCAAAATATGCTGTAGCTGCAGAGCT -GCAGAAAGTCAAGTGACATCATCGCCGTTGATCGGAAGTTACCTTTTCCCTCTTGTTCTG -ACTTGATCCGGTGGGCAATTTTCAGTCGGATTTAGTACGTTGTATGTTGTACATCTTTCA -AAATTTCCCTTCTAATTTGATGGTCCCCATTCTGTACAGAAATATGGGGATATGTGTATA -TACTGGACTCTAAAAAAAAGGAAACGAATCGGCCATCAACGCCCCTCAGTAGGGCTCGCG -TCAGGCTTCTCCAGTTCGTAGAATAAGAGGTAGACCTCCCTCTGCATGCCCAGCACGTCC -GAAGTCTTACATTCCTTGATCTTCTCGTCGGAGATGCGCCACCAGCGATCGTTCGGTTTG -CGACGGCGCCGAAGCCGCGAACTGACCCCCGTGGGGGTATCCTGGGAAGCGTTGGGTATA -CTGCGGGAAGGCCGCGGGGACGGACTCCAGCGACCCCGGTCAGTCGTCGGGCTGTTGCTT -TCCGAGGCAGGCGACAGCGTCTGTTTAGACGAACGGGATGTGTTGCTCTGGAAGGAACGA -CGCGAGCCAGAGGTCGTAGGCCGAAGGTGAGGAGGGGTGAGTTGAGTCGCCGAAGCTGAG -GTCGAGGCTCCAGATAGTGAGGTCGATGAGCCTGCTCTCGAAATGCTTCCATCACCGTCG -ACATCGGGGCTGCTGCGTGGTACAATCCGAGGACTGGGCGCCGCGGAGGGAGTTTGACTG -GCAGCTCGACTTTGCGCATAGGAGCTGAAGGCCTCGGGTGTCGAGAAAGGTGGGTAGATA -TGATTCCGTCGGAAGGACTCGTAGTGTCCGCTATGATGACTACCTTTGTGACAGACAATC -GCAAGCAGCTTGTACCACTTCTGGCTCAAGATGCTGCCCAAGGGGAGCCGCTCTGGGAAC -GAAACCTTGGCCGCGTTCTTGCTCGAGCTGCTCTGATCAAAGATCGACCGCGACAAGTGG -ATGGCAATGACCTTTGGGAATGCCGTAATGCGCATATGCCGCGCAATCCTGCGCTTGGGG -GCGGTTTCCGGCGGAGGGAATACGATACCCTCCAGCTCAGCCTCAGGGTCTGTCTCCAGA -GCCTCGCGGAGGCGGGAAATCTCCTGGTCCAACCGATCTCGACCACCTTGTGTTCTGGTG -CGGGCTAGTTCAGTAGTCTTGACATCCAAAGCATGCTGCAGGCGGCATCGATCGCAGCGG -AAATCCTCGATATATTCAGTCTTGAGGAGGCCGTCGAAGCACGAATTGAGCGTAGTCGAA -CTCTTCTGCGGTACCTGCAGTGTCAAGTTGACGAACGCCGTTTGATTTGGCTTGTATTGA -TAATGGCAGAACTGGCACTCGATCTGCGACTCCATTTTACCTTCGAACGGGAACCCAGAC -TCCCGGTCGATTTCCTTGAGTTTCGTGTCAATGATTGCAGGGAGTCCATTGGCCGATCCA -TCATCAATGCGCACCTCCACCTCGGACGAGCTTTCTTGTGTGCTGGCCTCTTCGCCTGTC -GGGCTGGAAGTAGCTGCATCAATGGCATCCAGTGCTCGTTGACGTGCACGCGCACCAGCA -TGGTACTCATCACAGAGTCGCTCGAGAACAATCTGCAAGAATTCCTGCGCGTCCTGCTGA -TTGCGGCTGATGCGCGTGCGGAACGCGTATTCCAGAGCCTGGATAAAGTCGCGGGCCGAG -ATTGTTTTCTTATAGATCGGTCGCTCATTCAATCGGTCCAGCATCTCCTTCAATGCCCGT -GTTATTATCCCCTGTTGCAACTCCCGCAACTTGTCCGACCGCGCCGGCGAAAGTTCGTCC -TCCCGAGGCAGTGTATGGTATATTTCCGGTCCGTCTAGCTGTCGTCGGTGCAGCTCGTGG -ATCAGGTAGACCCGCAGATCCCCCAGTCCGGCTAACGCCTGCAGCACCGAATTGATGAAG -CAATCATTGGCTAGGTTGGACAGACCCACGATGCTCTTTTTGCGATTGCTGTCGTTTGAC -TCATCTCCGTCGATTGTGTAGTTCGGCCCAAAGACATAGAACAGAGCAACGGCAGCGAGT -GAGGCCCCGGCCGCATAGGCGGCGACCGTTGTCGGCTTTTCTTGCATCAAGCGCTGTGGT -ATAGAGGAAGAGAGGCTCGGTGGTTCGTGTCGAAGCAGGCAGACATATATCTCATTGCCC -CATTCTCGTATTGGCCCGGATATAGGTAAGGGCGAGGATTGTCCTAAGATAAATCACAAG -AGAAGTCCAGTGTGGGGTCCAAGGTTGAAGGAAATAAATTTGCCGAATCGAATAGGTTGG -GAGACGGGAGAAGAGGCGAGAGCGGAGCGCGGTTCATGCGGAGAAAAGAAACAATGAGCA -TTTTCGTCCGGAGAAATCAGGTAGCAGTAAAACGAGACAAACTCCACTCAAAGTTGAAGG -CACATTCCCCTTTTTCGTCTGATACTCTCGTTTCATCCAATGGGGTTCTCTTGGCCCAGG -CACTGTTGTAGAGGGTGACCGGATTCTCCGGATATCCGTCTTACGCATCCCCCGCATATC -TGGAAGGTTGACCGTTACATGTAGTCTGGGGTAAAGCCATCTATTGTCGGGACAAAATTG -GGGAATGTCAGAGCATCTATCGGAATATGGTCTTAGGCTCTCAAAATTATGAACTTTTGA -GAGTCATGTCCGCCACCATATATGTATAGCGCCTCAGTTGAAAATTACTGGAATCATAAT -TGGCCAAAAACATCAAGTACTTCAAATAGCCCTGTAGCGTGTCACACCAAATGGAACTAC -AGAGCTTCTACGGCCCTTCTAGAAGAAAAAAAAGAAAGGAACGATTCATAATATTTTCAG -CATTTGTCATATGTCTGAACTTCCAACTTTTGTGACATTCCTTATCTGGAAAAGTTTCTA -TCAATTGCACCCAATCATGTAAATTTCTCCACATAGACGCCGACCAGAGCGACAGGCTTA -CGCCTGGAGTGACTTAGGCAGAGTAGCAACCTTCTCATCGAAAAGTTGCTCGGCCTGCTT -CCAGACCCGAGAGTTGGCCCGGTCATTGAGCGTGAGAAGGCTAGGCAAGAGAGCCTCGCT -GAACATCTCGGAGCTCTCACGGGGAAGGAGAGACGGGAGGTGATCAATGCTGATCACGCT -GAGAGGCAGACCTCCAGCCTCCAGCCCGGAAACGGGAACTGTTGGCTTATCAAATGTGGT -CGTGATGTCGTACAAAGGAATGGGGTTCAAGGGGTTGGAGCTTTTGAGAATTAGCTGTGC -ACTTGCTAGCATAGTTCGTATCTGATAACTCACGTGTCGGCGCTGACGTCGCAAATAACG -GACAAGGTTCGGTTGGGGGCAGACAATGACTCGAGGTTCACGAAGTCTAATGCAAAAAAA -GGATATTAATCAATCGAGTCGAGACAGGAGGATTTATGCATCAACCTACGTGGAATCTTG -GCGGAAAGGTAGATGCAGTTCACGAAGATATCCGCTTCATGTGAGATTTCCTTGAAAGGA -CCACCTTGGGAAGTTGGTTCATGAGCTTGAAGTTTAGTTGAGGTAGAAGATGATAATTAA -CTCACCTCGCGCAGTCTCTGCCATATCCCACTCAATGATGTTGGACTCGGGAATACCGAC -ATCCTTGGCCAGCTGGACGGCACCGCTGCCACAGCGTCCGAGCTATTGCATTGTTAGTAT -GTATATCGCTTTTTGTGGATCTGCGCAAGACTCACGGCTCCAATAACAAGGATCTTGGGT -GCCTTGCCAGCAACCTTCATTCCTGCCTCGAGCGACTCCTTAACGGACTCGACCAGGCGA -GTTTGGTTGGCGTAAGGAACCTCACCAGGCAAGGTCTCGTTGGGGTGTGTCAACTGCCAA -GCCCAGTTCTTCACAGCCAAAGCGGATCCTGTACCACGTCTTAGTATTGATTGTGTCAGG -TAATGAGCTTGACGGATAGATACCTGCGTAACCGGCGGACCATCCGAAGGCTAGGAAGCA -AGCCAAATTAGCATTTGAGTCAAAACAAGAACACTTCAGGATATGAGTCTTACCAGCAAC -TCGGCGTCCAGAATCATCGGTCAAGAATTCAAGGTCGAGCAAGGTGCCGTTGCCGCGAGG -CCAGCGGCTAAGAACCCTCTCCCAACCACCCTGCTGCTTGAAGCAATGTGCGAATGTGAC -GTGGACGTGCTCCAGAGGAAAGTCATCCTCCGGCAGCTCCTTGAGACCCAAAATGACGGC -ATCCTTAGGGGCATCCTGAACCCAGGACCCCTCCGCGACCAAAGGCGCACCGATCTATAA -AAATTTATTAAATTGAGGGCGGGTTCGGGCTATAGTACCGAGTGAACCATACCTTGGAGA -ATTCCTCATCTATAGAGCCGAGTTAATCATTGAATCCAACGGCAAGGGACGGAGGTGGCA -CGGGAAGTATACATACCTTCAAAGATGCGCTGGGTAGACCGCTCGACAGTCACGTCATAT -CCAGCATCAATTAATGCCTTGCAGGTAGTTGGGGTCACTAAATGGTGTATTAGCCAATTG -CTTCCCCGCCATTGTACGGGCTGAAGCTACTCACAGGCAGACCGCGCCTCAGCGGGCTTG -GTCTCAGCGCGCAGCCAAATCTTTTGTCCAGCCATTTTTGAGAGTACAGGATTAAAATAT -GTTGAAGAGGGAAAAAGAAACTAAATAATAATCGCCCTATCACCAATAATGACACAGACT -TTCCCCTCCAGTAATAAGCGGGGATGGAAGGGTTATTCCGCTCCGAGAAATGACTCAGTC -CCGATTATCGGCCACTTGAGACCCAGTCCACCGCAAGAGTCGAGGTTTCAGGATTCAATC -CTATTCGGGGAATTCATTTAAAAGTTACAAATACCCTAGGAAGAACTTTGACCTAAGGAT -ATATTGGAGAGTAAGGAGCAACAGGGGTTTCTTCAACAGAATTTATAGCAGCACAGATGC -CTGCATACTACATACTAGTCGTGTACAAACATTTCCTTTCTTAGAAGGTGGTATGATATC -AGGAATGTTGCGCAGAACTTCTGCAGTGCTTCAATTGAACTGTTTCCCCTCCGGATTACG -GTCAGGAAAAGGATATTCTTCTTACTGCAGCGCGTATACACCCAAGCCGATGGGGACATG -AATATACACGGTTCAAAGTTGGAGCAAACATGATGCGATGGAAGTAACACCGCAAACCTG -CAAATAGAGTGACTGCAATCTATTCAAGCAGCCTTGGTCCGGCTGCGAGTCCGGCTACCA -ATGCCATCTCCTGGGATAACCTTATTCACAACATCCTTAATCTTATCCTTCTTGGATTGA -CTAGCCTTCTTTTGTCCAGCGTTTTGTGTGCTGGAGGGAGCATCCGCGGTGTCATTGGAC -TCTGTGGCAGGATTTTCGTCTGTCTTCAACTTCTTGGTATCAGGCTCATCCAACGGTTCA -GGCTTTTGTTGCTCCTCAGTTCCAGCCAGGGGCGGAGTGGGCGTCACAGTGGAATCAATG -CCACGCTTCTCGCCCGCCACTGTTTCATTCATGGGCCCTACTGAGACCTGCGACTCACGG -GCAACAGGAATACTGCCGATGGGAGATCCCGTGGACTCTGTGGCAGGTGCCTTGGTCTCT -GGATCGGGCTTGGGTGCAGCAAGCCCGGACACTTCCTCACGCTGGTGATTGTCTATATCA -GGCTTTGCTGTCTCGGTGATAGCCGGGCTATCCGTTTGAGTTGTGTCTAATCTGTTCTGG -AATGCATGCGAGTGTTCTCTGACTAGCTGGGCAGTGTCCTGATCTGTTATATCCCTTAGT -CAGCATCTCGTATGCATTTTACAAGCAGATAGTTACGCAAAGAAACAGCGCCCTGAAACC -ATGTATTGACATAATAGTTTCGCCATTTGAGCCGATTTTTTATCAGATTATGCAGCAGTG -CATCTGCAGACAAATTACGTACCGGAACCTGCCTTGAAGTGCATGGGGACTCCTTCAGTA -GTCATTGTGATCGATGATGTCATATGCCGGAGCAGTGTGTTATAAATTGGTGAAGATTGA -GTGGGAGGGTATTCGCAAAAGCAAAGCGACTGCAATGCTTCCTATGGTGAGTTTGTATGA -GGGAAAAAACAAGTGAAGAGGAGTTAATTTATTTGAGGGGAGGTGAAGTGAGGTCGCGCG -GTGACGTACTTGAGAGCACCGGAGACGCTACCTCCAGCTGATTCGCCGCTCCAGCCCTCT -CAACTCTCCACGGCATTCTGTCGCACAGCCTCACCAGCTCCAAACATACAATTGCCACGC -AAGGTTCTCCGCAAGAGATCTGTAAGCACAACAGTTTTGCTAGGGGCCAGCCATTGATCA -CAGAGGTTGTTGATCTTATCATAAGTTAGAAGACCGTGTATAAATGCTAAGAATATGCTC -TTCATTGAGCCTAAAGGGCTTGTATAGGAACTGGCTGGGTCACTATTGACAGCAGGTTGT -ATATATGTATATATATCATCGAAGGAGCGCTTCCGTGTATTGGTACGACGGAAAGGCAAA -TTTGAAGATAATTGATAGTGTTCAGGCGTATCGCACACCTCCAAAAGTGTCAGTTTAACC -AAGTCTTCCTGTCAGCTTAAATACCTCATACAACTCGACAAGTGGTGTATGTAAGGTCGC -CAGAAGATGAGGCTCCAAATCGGAGTGGAGTAGCGGCAGTGGCTTTCCTTTGTTCAGTGA -TTTGGAAAATGACGGGTTGTCTAGCAGCCAGGCTGAAGCAATATCAATCGTGTAAATGGT -ACACAAATGAGTGCGGGGGAAATTGTGTCTTAGCAGTGTGCCGACATGACTGCAGGAGTC -CTGCCTATGATCTCCTCTTAAAGTGGAAACATAGACAGATGTGTGTGTGTCTTGGGTGTT -GGTGCTGGATTGGATGTTCTCCGAACATCCTAACTATGTGCAAAAGACTAGAATGGTGAT -TCGATAAGTTTCAGACTAGGGAATCATGTAAAACCCCAATTCAAGCGACCGAGGTGTTCC -TCTTCAGACGATGTATGTGACTTCTCGGCAATGGGAGCTGCTGGCTGTTCCGGTAGCAGC -TCTGAAATATGCTGCTCTAGCCGGGAGATATCACGGAACCTCTTCTGGTCAACAAGGACG -CGGAGAGCCTCCGAGCCTATCTCGAAGAAAGCATCACGCCGAATTTCATTCCAAGCCTGG -AATTTGCTGGGTGAGAGGTTCGCCACGAATTGGAGGAACGGGCGTGGATCAGGATCTGTT -GGGTGACAGACAGCCACAGATGCTACCAGTAAGGACCGAGGGGACAGCACAGAAACCGTG -TTGCTGAACTGGTTATACACTAATGTCGATACCTGTCCACTCGATGGTTTGTACTCCATG -GCCATACGGCTCAGGTGAGCCCCTGCAGCTAGGAGCATCGGTTTCCTTGACTTATGAGTA -GATGCACTTTGGAAATGCATCTGGCACACACTCAGATATTGTGCCATCGCCGAGCCGTAG -CCATTTCCGAAGCAGGCTTCTGCATCTATAAAATCAACAAGAAGATGATTAAATGTCTGC -CGTGCGAGCCCTTCAGTCATACGGCCATTCTGACTGCCCAAGTCATGGCTCATGAGCATT -TGCAGCCACTGCATGACAGTACTATGCAATCCCTCCGCCACCATGAACTTGGTCAACGAG -GTAGTTGCTGACCGCAAACGCAGAAGGGATTGCCTCGATGTACCATCTGATGCCCAATAC -CAATCAACGATCCTGGATGCCACCCGAGATGTTTTCATATCAGCCGGGGACCTGGAAAGG -AGAAGTTGAGATTTTAGACAATCATTGATTGTGAGTGCAGTGACTGAGCCAGTTGCAGCC -ATTTGATCAAAGACTGCCATAGGTTCTTCGGCTAGTCGTCGCTGGCCCTCAAGGCTTTGC -AATGTGGTGGGTTCTTGTGGAATGGGTCTGGTCGGGATGATACGAAAAAGAGGGTTATCG -AGGATATTCTGTAGATGTTGATCGGTTGCATGTGCTGATGATTCAGGATTCGAGAGAGGT -CGGCCATGATTGTTGCCTGATGCCGGATAAGCCCCATCTAATTGACGACGGAAGGAAGAC -GTGAGGGCATTTAGCAGTTGCTGAGACTCTCTTGGAGTACGAGGTAAAGGCTGATGAATC -TTGGGCCAGGATTTGAACTGAAAAATAGATGTACCCTTCATGTTGGTCGAAGACTTGAAG -GTTATCAATGGTACGTTGGATGATGCATGAAGGTAGAGAGGGAGGATCTACGAGGGCGGT -GAAGCCATTCCCTCCCCCTTCCTAAGGGCGAAAAAAGGTGAATGACGACTAGAAAGAAAC -CACACTTATAGCAATTTACAAAATTTGTAAGTGTTCAAATAGTAGAATATGTAAGGGAAT -ATCTATAAGTACATGTAAGCAAAAGACAATTATTCTATACCAAAATATATTAGAACATTG -TAAATAAGTAAAGATCTCCCTTTGTTGTAAATCCCCCCAGCATCCAATAAGCGCCACTCG -CTAAAAGCCAGGCAACGCCAAGACTTGAAGGGAAGCAACTCAGATCCCGTTAGCTTTTGA -TCCTTTCTTCAATCTTGTCATCTCCTAACGCATTCTCCTTCTCCCCCAATTAATCTCTTT -GATTTTACCTCTTCTTTTTCGTCTCTTCTGGCCCCCGTGCCTGTGGTTCTGGTGCTGTTC -CTCTTCTGCGAGGGTGCTCCATTTCAATAGCCTTTCCCTGCGAATCGTGACCTCCTTCCT -CAATACTTCTCCATCTCACACCTTCATCAATAAACATGTCCGACATTCAGAACCGGTCCT -CCGCCTCGCGGGGTAGAGTATCCGCCCGTGGTGGCCGTGGCGGCTTCAGCTCCAGAGGTG -GTCGTGGTGGAAACAGATCTGCAAATGACAACTCAGACCTTTCATCCTTCGAAGAAGGTG -AAATTGGACAGATGAAGAAGAAGTACTCCGACACTCTCCCGACCCTTAAGGAAATGTTCC -CAGACTGGAAGGACGAAGATCTGGTGTTCGCACTGGAAGACTCGAACGGCGAGCTCTTGG -AGGCAATTGAGCGCATCAGTGAAGGTCGAGTCCCCGCCAACCCCAATCTTCCTAACTCGA -AATCAATTGAATCTGGTAGTTCGAGGGCCGTGACTAACTAGCCTTACAGGTAATGTGTCT -CAGTGGGGTGAGGTCAAGAAGAAGACCACAGACCGGACTCGCCCTAAGCCCAAGGAGGCC -CAAAATGCTTCGACCGAAACGACCGTAGCTCCCGCTCGTGGAACCCGTGGTCGTGGTGGA -ATAGAGGGTCGTGGACGTGTTCGCGCAGACCGGGGCCGTGGCGGTCGTGGAGGCCGTGCA -GGAGCCGCTACAAATGGAACTCGCGTAGTGTCCACTGCTGGAGAGACTCCGGCGCCTACT -GCTGTTCCTGCTATCACATCGGAGTGGGCTGCGCCCAAGGCAGAGGAAACTGTTGCAGAG -CCTTCGGCTGAAGGCGAATGGGAATCGACCGAGCCTAAGAGCAGTGTGATTCCTGAGGGC -ACTAAGAAGGGATGGGCGAGTCTTTTCGCCAAACCACCTGCTCCTCCGGTGCAGAAGAAG -CCCCAAGCCCCTCCTCCTGTTCCCGTCGCCGAGAAGCCCGCCGAGCCTGTGGCCGAGCCA -TCTCCATCCGCTCCTGAACCTGTGACTGCCCCCGCCGCCCCCGCCGTCCCCGCCGTCCCC -GCTGCCCCCGCTGCCTCCCAGAAGACACCCGTCGCCAAGCCCACCCATCCCGTAATCCCT -GCCATTCCGACTACCACAGTTAACCCTCCTAAGGGCGATTTGACCGAGACGAACCTCGAA -CAGATTCCCGATGTATCCGCTCCCGCTCCTACTGCCACCGCCGCAAGCACCGTTGGCAGT -GGTATCGACCCCGCGGCTGCAGCCGCAGCCGCTGCTACTCCCTCCCGATTCCCCTCCTCT -GCCTACCCTCCAAGTGCCACCAAGCAGGGCCGTACCCCAGGTCTTCAACGCCGTGTGATG -GAACAGCAGCAGGCTGTGGTTATGCCCGGAAACCACGCAGTGGACCGCGCTGCTGTTCAG -TTTGGTAGTATGGGACTGAACGGTGACGCTATCGATATTGATGAGAATCGCGAGGAGGCT -GAGACCCGCGCTCAGCCCCCGCAGCACTCTCCAGTTGCTCCTCGTGCCTCTCTTCCCCCC -GCGACTCAGGCACCCCACTCGATCGAGACTGGTGCCGCTGGGCGGCCTGCTCCTGGTTTG -CCCCCGGTCCCCCAGGGCGCTTCCGCCGACTCTTTTACCGATTTCGCTCGCTACTCTGAG -CCCCAGCAGAAGCCATTCGACCCCTTCACCCAACAGGTGAGCCAGCCGCAGCCGCAGATT -CCAGAGCCATTCGCCAACCAGGCCCCTGCTCAGCCCACTGCCACCACTGGCAGCGAATAC -TCTCCTTTCTACGCTGCCGAGCAGCAGCGTTTCCCCTACTACGCCTCCTATGGTTCTTAC -GGCCAGTCCCAGGATGCCCCCACTGGTCCCCGCGCCGGATTCGGTGTCACTGGTGCTGAG -GCCCAGGCTCAGATTCCCACTACGCAGCCTCCTAACCGCTACGGCCCTGTTGATGCCACT -AACAGCGGACACAACACCCCCAACCCCACCGTCCCTGGCGCCACTCAGCAGACCCCCGCT -GCTCAGCATATGCCTGGCCAGAGTGCGCAACAGCACGCCTACGGCTACCAATACCCCAAC -TACTACAACAACCCCCACTACGCTTCGTACATGAACCAGATGGGCcagcaacagcagcag -cagcagcaacagcagTACGGTGGCGCTAACCGCCCGAGATACGATGATGCTCGCCGATAT -GAGGATCACTACATGCAGCAGCAACAGCACAACCAGCAATACGGCTATGGTAGCCAGTAT -GGTCCATACGCCGGTAAGGGGGGCATGTACGGCCAGCCCCATGGTGCTTTCTCTTATGAA -CAGCAATCGTCTTCCCCTGCAAACACTGGAAGCTTCAGCCAGGCCATGCCCACCCGTGAC -TCAGTCTACGGCCGCAGTGGCTCTGCCCAGCAGTCTGACAACCAGTCTGCTACTGGCGCC -AATGCCTTTGGTACTGGGATGTCGGATGTTTTCAGCCGTTCCCAGGGTAGCTTCGGCCAA -AACCCTCCTATTGCCGGACAGCCCCCCGTTGCCACCGACGAGACCAAGGGCTTTGACGCC -CCCAAGGCCGGTGGTCCCAGCCCCTCCCTCGCTCAGAATCGTCCGGGATCCGCTACCAAC -AGCGTCCCCGGTCAGCCCCAGGCACAGACTGGTCTTCCCCCGCTTCAGGGCCAGCAGGGC -CAGCAGGCCTTCGGCAGCTACCCTCAGCTAAACCCCCAGTACGGTGGACTCGGTGGACTC -GGTCACCAGGGTGCGGCCACCCAGACTCACCAGGCCTCTGGCTACGGTAACAACTACGGT -GGAGGCTTCGGTAATTACTATGGAAACTCTGGTCGTGGTGGCTGGGGTGGTAACTACGGC -CACTAAAAAGACAAAAAGAAACGAAGGGGAGACATAATGACCCACCAAACACATATGGCG -TTGCATTTGCGGATGATTTGAAAAGGGGATTGGTTTTTTGTCCACCTTATCAGGAAAAAA -ACCTCATGTCTTGTCAAGAAGAACAGACAGTACCCAGGCAATTTCTCTTCATCTTTCTCT -CCTCAACATCTGTGTAATCTCTACTTCTCTTATTTTGGGGAGCTTGGTTTTCTTTACTTT -GGGGGATTGTCTTCTTGATGTTTTTTGTTTTTCTCGTTTCCTTCCCCTTCCTCTCAGCCG -TCTTGCTCTTCTTTGCTACCACTGCTCCCATCTGATGCGGTCTCTGTATTAAATTCACCA -ATTCCTCTCTAGGTTTCTCTGTTTTTCATTTTGTTTTCTCGTTATAACCTAGTCGGTGTC -TATTCGGGTCCGGTGTGTCACTTGTTTCCGCGGACGGTTTTCCAGTTTTTTTTTTTTTAT -CTACTTCTTTCTCTTCTGTTGTTCTGTCTTTGTGTTTCCTCACCATGTGCAATATTGGTC -GATGATTATGATGTATGGCTGAATGGGTCTGGTAGAGGTAGATCCAATGCTCAGTTGTGT -TCTGGGCCAAAAGTGTCATGAATTGTGTATGTCCTGTATGATCCAGTATGGCACCTATAT -ATGGAGATTGCTCTAGTATGATATACTGTAGCCTCTCGTAGTCACAAGTACTAGGCATCT -GTGATCCACGTAACATTCAACCTCTAAATTTACTTGCATAAGAGGTTCAAAGATGTAGAA -CATCAAAATCATTGAAGATACCATATCATTAAATGGAAAGGTCGGGTCGCCAAGTCCAAG -CATCAAAGCAAGTCCGCGGCTATGAGTTCAACCTGGACCGGGAACTATATACAATCGAAA -TGCAACATGACACGGCACAGAATAAAAATCCAAAAAATCAAATCTATGTGGTGCAACCAA -GGCCCCAAGACAAATTCAGATGAACAAAAGAAGAGCAACCGAGTCCTGTGATAATTTACT -CAGCGATACCAAGACCTACGCCGCCAATCTTGTGGTAAGGTGGCTGAGCGCCAAGAGCCT -TCTGAGCATCGAAGCGCCAGATACCGCCAGTCTTATCAGTCTCGAAAGTGAGATCAGCCA -CTCTAGCAAGACGCTGGATCAGGGGGTCGTCGGCACTATCCTCGATACGGTTGAAGAAAC -GGCGCACCCACTCGCGGCCCTCCTCCTTTTCAATACGGCGCTGTTCACGCTGAGCAGTCT -CAATGCGCGACTTGAACCCGGACACGGCATCCATGTCCCCCTTCTGGATGTTGGCGGCCA -CGTCGCTCCAGGCACGTCGGCTCTCGTACAGATCCTGTTGGTCGAGTGGAGCCAGAGCCA -GCGGGGTGACGGGATTTTCCTTGGAGTTCCAGGTGTCGACGACGTTCTTGGTGCGGGCAT -CCTTGAGAGAGAACTTGTCCGACCACTGGCCCTCTACTGTGTAAAGGGGATTCTTTTCGC -CCTCGCTCTCTTTGAATAGGCTCGCGCTGAAGGTGTTCTTCTTGCCACTCAACCAGCCCT -TGCCGGAATAGTCGATCTTGGCGACATAGCCGCTGCTACTGACAATCTTGGTCGACTTCT -CCAGTTCGACAAAGGGCGAGCCGTAGATCAGCGATTCGATGTGTAGGCTGGGCAGCGTGA -TCAGGTATGTCTCCTTCTCGTTGGATCCGGGCGGGGTGAGTGTATACAGCGCATGGCCAA -TCTGCTTTACAAGGATAGTGCTAGAGAAAGAGGCTTTCTGCGCGTTATATCCTTGCAGCT -GTGGTTGTATTGTCAGTTCTGGGCATCTCTCAGGGACTTGGTACAGGCTTTATGCACTCA -CCTCAACGCCATTCTTCTCATTGCGGATCGCGTAGGCGGTGGCAGGAGGATGGTGGCTGT -GATTTGATCAGTGGGTTATAGTAATACAAAAGTAGCTTCACATACCTGACTTGTTCACTG -ACCAAGACAGTCTCCCCTACATCCGCGTCACCATCCCATTTACCGATGAACAGCTCTCCT -AAGAACGGGTTCAAAGGCTTCTTCTCACTGCCGAGCTTCTCACTACGGCTGCAGTATTGC -TGGCGCAGTGTGCTGAGGAACCACTTGAGTACCAGGAGAGCACGCTTCGCGGGATCAGCC -TCCTTAGCCGGTGCCACGAATAGGGCAGGGTGCTCGGCCCAGTAGGCAGAGTACTCGGTG -AGGGAAGTAGTGGAGAGGATGAAGGGAGGAGCAGTCAAGGAAGATAGATCACCGTTGAAG -GAGGCGATCGACTAGAAATTTGTTAGGCTCATACACTCGCGCAAATGGGGTTGCATACCT -TTAGGAAACTTGTCCATGAGCCCTTACTATTGGCGGGGACCTGACTCTGAGGAGACATGT -TTTAGAATAAGAATGATATCTTGATCAAGACCAAAGACTTGAAATCTCCAAGACAGAAGA -AACGTGGGGGAGGGACGAAGTTGGTCGTTGGCGGTCAGAAGCGGGATAACGTCATGGACA -GGGGGGAAAAAGTGAACAGCCACAATCGGATCGGGGAATTTTCCAGATGCTCCAGTGGAA -AGACATCAAGACATTCATCTGATACACTTCCTGACATTGCCATCCAGCATGTCACACTTG -ATTGTGCAACGTCTCTCGCAGGGCCAGTGCTAGGCTATAACAGGGGAGGACCTGAAAACG -GCCAGCATTCCACCAGGGCATATTTGGTTTCTAATTTAATAGATTTAGGTACATTGTGAT -TCACATTTCCAGCCAGGATATTCCTTGGAGACTACCTACCCTCGGTACCCAAGTAGTCCG -GATTCTTTTCCGCCCTTTTGCCCTACCCGGCAAGGCTTCGGGTCTTCTCCCCGAATCACA -TTGGTCCATTTCCTCTAGATTAGGCGGATGGACCCCGAGATGATAGCAAACTCTCTCTAT -ATGGCCTCTAAGACGCTACGTCGGAGACAATCCGCCGATCCTCGCCCAAGATGATAGAAA -AAAAAGACATTCGGCTTTCTCTCTGACTCACGCTTCAAGTCTGCGGTGTATCTCGTTGCT -GTTGACTTGCTTAGACTCAATCCATTTACTTATGTTTCTTCGTTTTTTACTATACTTCCA -ATTGAAGATATCGACGCATCTATCCGACGAGCATGACCTCGGAAAGAAGCCGCAACTAAG -ACAATTGCAGAATTAGTTTTCTCGTATCTTGCATCTGCACCACGAGAGACTTGGATTTGA -TTTACTACAAGCTAGCCAGGTCTATAGCTAAAAAAACCCCCTCCCGCTATGTCTCCAACT -ACCTCGAAAGCTTCCAGCGAGGAGAGGCGGTCTCAGTCCTGGATGAATGAAACCGAAATC -CAGAGATCTCAATCCCGGCGTTCACAACGAGACGGAGCTGTCAATAGTCTTGACCATGTC -CGGAGCCATGTCTCGCACCATGACATGCCTGTCAATGATGACTTCCAGGAGATCGATGCA -GAACAATATCTACGTTTTTCACCAGCTCGCAAAGTCGTCATCGTTGGTATCCTGTCGTTC -TGCTCCTTCCTGGCACCTATCTCCTCCACATCTATATTAGCCGCTATACCAGAAGTGGCA -AAGACCTATAACACCACAGGAAGTGTGATCAACGCGAGTAATGCGCTATATATGGCATTC -ATGGGTATTGCGGCTCCCTTTTGGGGGCCCTTCAGTCAGGTCTGGGGGCGACGTCCGGTG -GGTGAAGAGCAGAAGTTCATCTTGCAGTAACAAACTAATTGGAAGTAGGTCTTTCTTGTG -AGCGCGTTCTTGTTCTTTGCGTTTAGCATTGGCACGGCTCTAGCACCCAATCTCCCTGCA -TATTATATTTTCCGTGTCTTGACTGCATTTCAAGGAACCTCTTTCTTGGTAGTTGGAAGC -TCGGCGTTGGGTGATGTATACGAACCTCGAGCACGGGCAACAGCATTGGGGTGGTTTTTG -TCAGGAACCCTAATTGGGCCAGCTTTTGGTCCTTTCATTGGCGTACGTCTGCCCTGCAAA -TTCAGCACTGAGATAATTGGCTGACCTATGAACCTAGGGTGTGATTGTCACCTTCCGCTC -ATGGCGAGTGATCTTTTGGCTCCAGGCAGCTCTAGGAGGCAGCAGCACACTTCTCGTCTA -CTTCTTTTTCCCCGAAACCTACCCGCATCTGACCAAAGGAGATATGGCCGACAAGACGAC -GTGGCAAAAGGCCAAGTACCTTTGGTATCGGATCTCTCCCCTCCAAGTTGCTGTCATGTT -ATTCAAATACCCCAATCTCTTCTGCACGGGTCTTGCTGCAGGCGCTCTCGTCTGGAACCA -GTACTCCCTCTTGACCCCTATTCGCTATGTACTCAACCCACGCTTCCACCTCACCAGCCC -TATCCAGACGGGTCTGTTCTACATCGCGCCAGGCTGTGGGTATCTTGTTGGTACATTCAT -GGGCGGGAGATGGGCAGACCATACTGTGAAAAAATGGATCCGTAAGCGCGGTGGTGTCCG -AGTACCCGAAGACAGACTGAAGTCTTGTCTGATATTTTTGGGTCTTGTCATTCCCGGCTG -TATCTTGGTCTACGGCTGGACAGTCGAGAAGGCTGTTGGTGGAATTCCGGTCCCAGTTCT -TGCAATGTTCTTCCAGGGAGTGGCGCAGTTGTTCTGCTTTCCTAGTCTCAATACTTATAG -CCTGGATGTCATGCAGTCCAGCGGCCGGAGTGCCGAAGTCGTTGCCGGTAGCTATCTATT -ACGGTACGTGTTCGGTGCCCTGGGGTCTGGACTGGTTCTTCCGGCGGTAGAGGCTATGGG -GGTGGGGTGGTTCAGTACCATCAGCGCTCTGTTTTTGGCTGCGTCTGGGGTTTGTGTATG -GCTCACAACTGTTTTCGGTGATCAATGGCGGGATCAGGTCGAGAAGAAGAACCAGCTTAA -GGCTGGGGAGAGGACCGACCAGACACAGCAGGACACCGCGAAGGTTGAAACTTGAAGGTT -CATCGTTGGGATCATCTATGGGAGATTTAATGATATAATGTGTTGCAATCAAAATAGAGA -CCGAAAAAAAAGTAGGTTATAACAACTGAAGCATTCCCAAAATATTTGGACTTACCCCCT -CACCAAACTTCTTAGGTACTCGTTCTGCACTTCGGGGACATTTTCATTACGGACAGGTTG -CTTCCCACTTGCGATATCATTGGGATTTAGACCACTCTCCGCCACAACACGGTCTATCTT -GCGCTGCTCGGTGTATTTTCGACAGTCCTCACGACCCTTTTTGACGTCTACAAAGCAGAT -GGTGATCATTGCCGCGGTGCAGATGGCGAACAAAAATGGAAAACCCATCCAGCTGTTGTG -GGTATCGTTTATGATGGCTTGAATAACATTCGGCCCAATGATTGAAGACTATAGGGTTAG -TGAAATGAATAAAATGCAGCACCAAATACGGCCGACTTACGGCGCGATTGGTTATCCCAA -AAAGGGCAAAGAACATATTATCGTAACCGCGTGGCATCAGCTCACTGATCATCGTTTGGG -CGTACTGGCATAATGTTAGAGATCAAAGCAGCAGGAAAATGCTAGATGATTCAATTCACC -GCGTAGTATGGAGCCTGGAAAAGCCCGAAAACGACATTGTAGAAGTAAAATTCCCACTTA -ATCGCTGTTAGCTGTAAGATCTTAGTTGGAGCCTCATACATACCCTATTGTGGTACCCGA -TGCGATTGGTCCACAGTCCCAGCATTCCCCAAAAGGGAATGAGAACGGTGAAAAAATTGG -TGAACAAAAACATAGTTTTTGTCTTGAATTTGAAATATCGCTGTATATACCAAAAACCAA -ACGTTGAAGTGATCGAGCAGGCAGCTTGGGTAATGCCGAGATATGTAATCTGCAAGAATG -AAAATGAAACAGAGTCATTTTGAATGATGCTGACTAGAGTACCTATTTGACGGTACCTAG -TGAGTTCTCTTGTCTACATTTATAGGTGTATTGAGTCTGATTTGCATGAAGCAAATAAGG -AAAGAACCAAACTCACCAGTAGTGTTGAGGCCATCAGCTAGAAGGAAATAGGCGAAGAAA -TATAGAAATGTCTGTGGAAGAGACCTGATTTCTCGAATGGCCAACCAGATCTGCTTGAAC -CCAATAGTGCTGTAGCTTGACCCTTTTGGTATCTTCGGACCTGGCCGTTTTTGCTGGAAA -ATGAACCACCAGATGCCAAGAACCACCCAATCTGCAGAAAGTCAGATAGTCCAACTATCA -GGAGGATTGCCCAGAAAGCCTACATGAGTTAGTAAGACAGAGTGCCAGGTTATTTGAGAA -ATTGTTGTTTTGAAGTGGAAGTAACACGCTCAGATTAAGCACGAGTGTGAGTAGGTATCC -AATATTGCTGTGTGCAGTTGAAATGTTGCTACTATTGTCAGCGACAATAGGACCTCAAAA -AAGATCAAACTCACCTGATATGATTTCTCTCTAGCGATTCAATAGCATCATATTCCTCTT -GGTCGATCTTACCCTCTCTGAGATCTTCTTCTCTCGCTTTGCGCACATGCGGGGTATACC -GCGCCAGACGGGGGAAGACAGCAGCGTAAAACACCAGCGTTGCGCCGTAGGCAATATATG -CCACAACGTACAATCCCATCGCGGTAGGCCACTGGCTCGGATGCCGGATGGACATCATAC -CATACTGCAGTGCCCAGCAGACGACCGTCAATGCCAGAAGAAGCCATCGGCCGAATGTTC -CATAGTCTGCAGCACTTCCTAGCCAGACGAATATCACAGTCATGAAAGTGAAACAAAGAC -CGTTGGCAATAAGAACAACTGACGATACGGATCTCGCTCCTCCAGTCCAAGGCACAACGC -ATGCCGATGAATCGGTGCAGGCTTTGGGTGGATCGGAGCCAGGTTGATGCCCGGCACCAT -TCAAGGCCCACTGGAAGCTTCAACAAATTAGTCGGGAAACTCCCAAGTTGCATTGTACCT -ACAGGGCTTGGCTGTAGGATCCCGGGCCGACACCGTTGTCACCATTGTAATAGAGATAAT -ATGACCAAAGTTCCTTCCTAGAGGTAGTCGGCTGTTCATATGAATCCTCGTCCACGCCGA -AGAGCCGTTCATACGCTTCTAAGGTGGGATGTTGTGAAGTAGAAACCACATCAACCGTGG -CGTCATCCTTTGCATAATTTTCATGAATCTCAGCCATGTTGTAAGGTAGAGAAGATATGT -ACATGTACTTCTACCGCATTGTTCCTGAGTGGGGCTTCCCTAGCCGCTTTCATGCTCAGT -GGAGGCTCGGTAGATCTCAGCACGGGCAGTCTGGGGGCATCGGGTCACGTGCCGAGGAAC -AGCTGAAGATCTTCAACTGATCATGACTACACCAAGTATGTATTTAGATAACAATCAATC -AGCAGAGAGATCATTCTTTACAGTCTTTCGGAAGAGCTATTACGCTGTCAATGGAAGCAT -TCTGTGATACAACACACTACGTTCGCATGCCGTTTGGAACAACCCTGGTTACCGGTAACG -AGGAAACATGGAGCAAAAGACTGTACGACAATCAAGTCAATTCAGCACATTTATTTTTAC -CCTTGTCCACTTGATATGCGGAGTTGATGAGGATTGGTAGATAGATGGCAGGATAAACGG -ACATGAACTAAAGGGAGAGAGTGGAGGACACACTGGTGGAGTGGTGGAGTGGTGGAGCTC -ATTTTCCCCCACGGCCTCTACCTTGCGCAATTAGATCTAGTGACTTGAATATAGCTGCAA -TAATGGAATATTTATCGCAGGAAGCCAATTTCCCCGAACTTGTTGACGGGGAAAATTGTA -GGTTTTGTCACTTCATATGGTCTAAATAGGACAGTGGATTTCGCATGCCCTCACTGTTCT -GTTTACCTATCCTCCGATTCGACATCAACACACCTCGAATATACAGATTGCGAAATTGTG -ACGAGAATGGCTGCTAAGCCAAAGGTTGCCTTCATTGGATTGGGCGCCATGGGGATGGGA -ATGGCCGTACAGTTGCTCGAAGACGGTTTCGCTGTGACCGGGTTCGATGTCAACCCTATG -GCTCTAGAGAAACTACTTGCCATGGGTGGTGAGGCGGCTGATGATCCAAAAACATGCGTT -CAAGACGCCTCATTTGTTATCTGCATGGTTGCGAACTCCATGCAAACCGAGGATGCATTT -TTTGCCGACTCAACGGGCGCATTATTTGGCCTCACACAGAACGCGGTTGTCATATTGTGC -TCGACTGTTGCACCAGGGTTTCCGGTGGAGATTTTGGGCCGAATTCACCATGTCTTCCAA -AGACCAGATGTACAGGTGTTGGATTGTCCCGTGTCTGGTGGAACCATACGCGCCGCTCGA -GGGATGCTGACTATCCTGTCATCGGGCCCGACAGGCGTTTTGAAAACGGCCCAGCCAATC -TTGAAGTCAATGAGCGAGAACCTGTATGAGATTGAGGGAGGACTAGGTGCTGCGAACAAG -GTCAAACTTATCAACCAACACCTTGCTGGTGTTCATATTGCTGTCTCCGCAGAGGCCATG -GGGCTAGCTGCAACCTTGGGGGTGAACACAAAGGAATTCTATGCGGCCGTTTTGAAAGGT -CCAGCGTGTAGTTGGATGTTTGAGAATCGAGTACCTCATATGCTAAGCAACGATTGGACG -CCTCATTCTGCTATCAGTATCTTTGTTAAGGATATGGTAAGCTTTCTGTCCCTATTATCG -CCCAAGCAAGTTTTAAATTGCTTTCACTGACACCTTCATCAGCGAATTGTGACCTCAGAA -GGTCTTCTTCAGGACTTCCCTCTTTATATTGCGTCTGCGACAGAGCGACTTTATCAATAT -GCCGCGCGGATGGGATATGGAAAAGACGACGATGCTAGTCTTGTTCGAATTTTCCTAGCA -CAAACTCCATCTCTGGTTTCGGAAGCAACACATTTCCAGAACTCTGATAATGCGCGCGCG -TCCGAATTGATTTGCCAGTTACTGGAGACTGTCCACACCCTCGCAGCGGTTGAAGCTCTG -GCCCTCGGAGAGAAACTCGGCCTTTCGATCAACACTCTCACATCAATCATTTCAAATGCT -GCTGGTGCTAGTGAGTCATTCAAAGAAGTTGCCTCAATGATTTTGGCTGGCGACCTGTCA -TCTGGATACACCATCACACGGACACGGAACAAGCTGGTAGGTCTTTCCTCTTCAGAGCTT -CAATGGACGCCACTAACCTACTCGATAGAAAGAAGTGATAATCCTTGCCCAAATGCATAA -TTATCCTCTTCAGCTCACAGCTACTATATTCCAGCTGTTACAACAGGCTGTGACCTATGG -CCTGGGCGAGGAAGGCCAAGCAGCGCTCGTCAAGCTATGGACAACATCAAGTCTTTTAGT -CGAGCCCTTGACCATCACCGAATATGACCCTATATCCCTCTCGGAATTACACTCAAAATT -GCCGAAACTTGAACACAAGTCTGAAAACCTTCTATGTAATATTCAAGATCATTTACGGGC -TGAGCAGAACTTCAAGCTCGTTGTTCTTGATGATGACCCAACAGGAACGCAGACGTGCCA -CGATATCAATGTCCTCACCATGTGGGATGTTGATCTTCTGGCCTCAGAATTCCTAACAGA -GGGTGGAGGTTTTTTCATTTTGACTAACAGTCGCTCTTTGCCTCCCGATGAAGCTCGCAC -GCTGGTTAGTGAAATATTGCGAAATGTTTCTCGGGCGGCTGCTATGACTGGTAAACAATT -CGAGGTTGTTCTTCGGGGAGACTCTACATTGCGTGGTCATTTCCTGGAAGAAATTGAATC -GCATATTGATACGATTGGCCTTCCCAATGTATGGATTCTTGCGCCGTTCTTTGGGCCAGG -TGTTCGCTATACCATCGATGACGTACAATATGTGGGCGACCGAGATACCCTGGTCCCAGC -TGCAAAAACTCCGTTTGCTAAAGATAGAACCTTTGGCTACAGATCCTCAAATCTCAGAGA -GTGGGTTTGTGAAAAGGCAGGATCGCGTTTTTCCAGCAAGGATATACTGTCTGTCACATT -GGAAGATATCAGACTCGGAGGAATCTCTGCAGTTGAGCAAAAATTGCTTCGTGTGCCCAA -AGGTGGAATCCTGATCGTGAACGCAGTCCAGACGGAGGATATGCTTATGTTCAGCTTGGC -TTTATTGGAAGGTAACCATCCTTGTTTTTTTTTTTTTGGTTCGTTCACAGCGCATAGTCC -TCCCAAACTGACTTATCACTTGAAGTACGCAAAAAGCACAGACTCCGTTTTGCCTATCGA -ACAGGGGCGAGCTTTGTATCTAGTCGCCTCGGGATCCCAGTCAAACCGGTCATTTTACCG -AGCCAGATCCCAACTTTCACACCCGTGAGACGCACAGGAGGCCTAGTTCTAGCGGGCTCT -TATGTTCCCAAGTCTACCAAGCAGATCGAAGCATTGATCCAAAGATCAGGGGGCAATCTA -ACAGCTCTCACCGTAGAAGTGCAAGCCCTTTTGATCGAGCTCCAGAGATATCCCCACATT -CCCACAATGCTCCGTCACTCACCCGCGTTGCTAAAGATTGTTGTGCGTGCATCTGAAGAC -CTTAAAAATGGCAAAGATGTGCTAGTGATGACCAGTCGAGACCTTGTGACACTGGATTCT -GTCAATTCTTGGGCGCCCGAAACTCCAAAAAGAATAACGAATTTGGACATCAACAACTTA -GCCGCCAATGCTTTGGTACACATTGTTCGCCACTTGAGCGTTTGCCCTCAATATGTGCTA -GCGAAGGGAGGCGTGACATCATCTGATGCAGCGACAGCTGGTATAGGAATAAAACGCGCA -AGAGTGCTGGGCCAGGCGGCGCCTGGTGTGCCCGTCTGGTGGTGCCAGAGTGAAGAAGAT -CTCAAGTGTCAAGATCAAGGAGGGCGAGATGCTAAATGGAATGAGGTACCATTGATCATT -TTTCCAGGCAATGTTGGAGATGAAGATAGCTTGGCCGATGTAGTCGAGCAGTGGGCACTA -CGATAGACTATTATGCTTGAGTTATGAAGACAATCGCCAACACCTGATGTAATTAATTGT -TGCGTATCATTTGCTGCTTACACATCTAGACTTCGAAATCGGTACATCAAACAAAAGTTT -GCCTGTGACTGGATTTTCGAATAGTCCACTAGACAGAAGAAAGCAAAGTAGAAAGATCTA -GAGAAAGACATGTAAAAGGTTGATCGGGAGACACCTAGACTTACCAGAATTTATAATCGA -TATGTAGAATTCATTTGATTCAAAAGGTCATAATGTCAATGTGTGTTTCATTCAAAAAAT -GTTGAGATTAGGGTGTGCGTGCCTAGGAGATCAGGGCCTAACCTGGGAATCGGATAGATA -CATGTAGCAAACGAACAATTCTGAGAGTTTGTCTCCAAGATACAACTCTATTCCTCTCTC -TCCAGCTTCGAGCCCAATTGACTGAATAGAAAATAGGTTTTGTCTATCTGATTCCCAAAG -TCGCTAAACCATGAACTCCTCAAAGTCCTCGTCGGGCAAATGTGCTTTTTCATTCCCATC -AATCGTACCAAGATACCACGACAGGTCGCTTCCGTCAAACTCGCGTGCAGCGAAGCCGTT -ACCAAGATAGCCAAACCGATTGGAAGTTGTATAGTCGAGGTTGTAATCTTCCCAACGGGG -TTGTTTCATGGTCTCAAAGAAATGAATTCTCGACCCCGGCCACATCATTACCTCGCCATC -GACCGTTCCCTGCTTGAACCATGAGCGACATGGAGATGTCCAAGCGGTACGGGGCATGAA -GTTTCGGTGGTGTTCAAAGAAATCCGCAACCGCCTCAGCTTTAGGTTCAAAGGATGTCAC -ACCTTCCGATGACATTTTCTCGAGAATCTGGAGGAAGTGTCTTGATAGGGTCTCTGTGAT -TGGAAGGAAACTTCCATGGCCATAGGGGCCAAATGGACCGTTGAAAACTGTTGAAACAAA -ATTAGTTGCCAACACTTTATTGCTGAGGGTGCACAGGGGCTCACTGAGATAATTGGGGAA -GTGTGGAACTGAGATGGAAAGATAACTCCTCGGGATATCTCTCCATTCTTCACTCAGACT -GCGAGATTTCCGGCCGATTGTCGGGTATGATGGCCGCCAGCTGACATCAAAGCCGGTGGC -ACACACAATAACATCGACCTCGTGATCAACTCCGTCTGCAGTTTTAATGCCCTTAGGGGT -AATCTCTTCGATTGGTTGACTGACAACGGAAGTATTCTCGCTGCAAAGCGCCTCGAGATA -TCCATTTCCCGGTGTAGGTCTTCGGCAGCCCACTGCAAAGTCCTTCGGAACAATCGCGTC -GGCGATTTCTGGACGATCGGCAAGCTTGGACCGCATATCTTTCTCCGCGAACTAAAGGGA -TTCACCTTGTCAGGAAACGGCCAGAAGTGTATTGAATCAGGGCTGGAATCTCACAGCTCG -CGCATTTGCTTGCTCCTCGGATCCATTGAGAATGAATCGGAAACGAGAGTTGAGCTCAGA -CTCAATTTTCTTGCGATAGGCCAAATATTTTTGAGGCTCATTCTTTAAGATCTCTTTTTG -CTTCTTGTTATCTGTTTATGTCAATACTTCGGATGGAGAGTTGATACGGGCTTCTTGAGG -GAAGGCTTACATTTGAAATTGGATCCTCCCTTTCCTGCAAATCGTTGGCCAAAGCCGGCA -GTCACCCATGAGGCACTGCGAATGAAGCATTTGAGTGATGACACCTCTGCATTGACTGTT -AATAGAGTCCTTATTTGCGCTTTGATCGGCGAAACTCACTTGGTTGAATGCTCGGGACGA -TTTGCACTGCAGAGGAGCCTGAGCCGATAACTGCCACACGCTTCCCCTGCAGTTGAACGC -TATCATCCCAGTGTGCACTATGAAGTCTAGGACCCTGGAAGGATTCTAGGCCCTTGATAG -CAGGCCATTTCCATTTGCTGGAGTACGTCAGTCTTATCATGTTACAACCGGAAAACCGAT -TGAGACTCACTTCAGTACACCACTGGCGTTGATCAATATGTGACCTTTGTCCTCAAAGAC -ATCTTCAGGATTTTCTCCTCTCTGAACTTTTACATGCCACTGTTGATCCTGCTCATCCCA -AAACGCGCCAACAAGCTTGTGTTCCAGGCGAATATATTTCCGAAGCCCGAACTTGTCGGC -AACCTCCTTGAAATATTTCAAGATTTCGGATGCACCCGAGTAACTACCAATCGGTGAGTA -TGATGTTCATTCCTTGATTCCGACAAGACGGGTGATTACACATACAAGGACGTCCAGTCT -GGAGAGGGAGCCCATGAGAACTGATAGTTGACGGAGGGTATGTCACATCCACACCCGGGG -TAGCGGTTCTCATACCAAGTACCTCCTATCTCCGGGTTCTTTTCATAAAGAACTAGTTCA -GTATCTAATGGGCTTGTGTCGATGTCATGGGCAAAGTTCAACGCGCTTGCGCCAGCTCCG -ATCACGATGATGCGGATCTTGCGACGGTATCGGCTTGGTCTTTCCACGATTTCATAAGGT -TTGACTCCATTGCCATCTCCATTACACTTGAGCTCATGATAGTTACCGATAGATCCCATT -TTTGGTAGAAGTAGGAAGGATAGTCAATAAAGAGATGATCAGGTTGTTTTTATTGAATCA -CGGCGATGCCAGGGCGCGGGATCGATGCATTTATGACACTCTTCTTGTAAGCACACATTT -CAGTACAGTTTCTGCTTCAATACTTAGACTAGGTGGACAATCCGTGACAAACTGTCGAGT -CGTGTTTCTTTGTAGCTCTGTGTCGGTGGAGCTCCACCACTCCACCACTCCACCACTTCT -TATCACTTAGGGCTCTTCCCACTCTACCATCTCCACTACGGAGAGAAGCGCGAAGTGGAT -GATCGTCAATCTATGGAGTCCAAATCTTGTAGTCATGGACTTAGACTTGATTGATAAGCC -ACCTAAGACCTCTAATAGAGACGATCACCCCGTGCCTCTACCAATTCTGCTGTATTCTAC -CGAGTCTGGAATCATTCCAACATGCCTGAAGGTACCATCAAGAAGCCGTTACGCATTTCC -ATTGGTGAGTATCGACCCTTCAGATATCCTGATTTAGTATTGACTAGTATAACCAGATCG -CGGGGGAACCTTCACCGATTGTATTTGCAAAGTGATCGACGGAGACGATATTCTCGTAAA -GATCTTGTCTGTCGACCCAAAGAACTATGCTGATGCCCCAACTGAGGCGATTCGTCGCGT -TTTGGAGATACATTATAAAACTTCAATCCCTCGGGGCTCTGAACTAGATCTTAAGGATGT -TGGTAAGTTATCGCCCAAAACAATATGTAGTCATTTTACTCATCAAACAGAATGGATCCG -AATGGGCACAACGGTGGCGACCAATGCGCTGCTCGAGCGCAAAGGCGAGAGCACAGCCTT -GCTTGTCACTGAAGGTTTCAAGGACATCTTGTATATTGGCAACCAGAGCCGACCATACAT -GTTCGATCTGTCAATTCGACGAGCAAAGCCGCTGTACTCGGACGTCTTTGAAGTCAAAGA -GCGTGTTACAGTTGCAATATGCAGCGACTCAAATCTCCGAAATGTGAAGCTGCAATCTCC -AGACCCGGTCGAGTCGATCGCCGGTACCTCTGGTGAAATTGTTCAAGTGTTGAAGCCTAT -CGACTTAACAAGTACTCGGACCTATCTGCAAGATATATATAACAAGGGCTTCCGCTCACT -GGCGGTGTGCCTCATGCATTCCTACATCTTCCCGACTCATGAGTTGCAGATTCGCGAATT -GGGGCTTGAGATTGGGTTTAAATACATATCTCTCTCACATCAAACCTCCCCACGACCGAA -GCTTGTCCCTCGTGGAAATTCCACGGTAGTCAACGCATACCTGACACCAAACATTGAGCA -ATATTTGCAACAATTCTCGAAGAACTTTCCTAATATCGACCAGTCCAGGACGCGATTGGA -GTTCATGCAGTCAGACGGGGGATTGGTCCCCTCGTCAAAGCTTTCTGGGCTGCATTCTAT -TCTCTCCGGACCGGCAGGAGGTGTTATTGGATTCTCCCAAACTTGTTTCGACGCGAAAGA -TCAAATCCCGGTTGTTGGATTTGACATGGGAGGCACTAGCACCGATGTTAGTCGATACGA -CGGCGAACTGGATCACATCTTTGAGACAACGACTGCTGGAATCGCGATCCAAGCCCCACA -ATTGAATGTGAATACAATCGCGGCCGGCGGAGGAAGTATACTGGCTTGGAAAGATGGACT -CATGTCAGTTGGGCCTGAAAGTGCCACCTCCAATCCTGGGCCAGCCTGCTACCGTAAAGG -TGGTCCTTTGACTGTGACCGATGCAAATCTGGCACTGGGACGGTTGATTCCAGAAGAGTT -TCCCTCGGTATTTGGCATCAATGAGAATGAACCATTGGACAGAGAGGTTGTTCTAGCCAA -GTTCAAGGATCTCACCCAAGCTATCAACAAAGAGACGGGTAAATCACTAACTTGGGCAGA -GGTTGCAGATGGGTGAGTAATATCCAAATCCACACATCAAGTTCGGTCACTAAGAGGATC -CATGCAGTTTCCTCCAGGTAGCCAACTCGGCGATGTGCGGTCCCATTCGAAGTTTGACCG -AAGCCAAGGGTCATGATGTTGCAAAGCATCATCTCGCGTCGTTTGGTGGTGCAGGGGGTC -AGCATGCCTGCGCTATCGCCGAAGCCCTGGGAATTCAGAGAGTTTTGATCCATAAGTACT -CCTCTATTCTCTCTGCCTATGGAATCGGATTGGCAGATCTCGTGCACGAAGAAGAGCGGG -TATGCGCAAAAGCCTTTGATGACTCTACGTGCGAAACAATATACAGCGAGATGGAAATCC -TCACAGACATCGCCCGAGGCAATGAAACCATGAAACCATTCAACATTGTTCAGACCCGCC -GCTTCTTGAACATGAGATACGATGGTAGCGAGACGTCAATAATGGTTTCCCTGGAGAGCT -CTAGGGACCCTCGAGAAGAATTCATCAAAGCCCATCATCAGCAATTTGGATTCACTCCTA -CCAACCGTGCAGTGTATGTTGATACTATTCGCGTTCGCGCAATTGGCTCCAGTGTGTTCT -CTGAGTCCTCGGTCAAATTACTGGATTCATATCCCAAACTCGTGTCTGGATCAGTGGCGC -CTGTTCCCAAATCATGGGAATCTACTTACTTTAGTCCAATGGGATGGGTGGACACACCGG -TGTACCATTTTGATAATTTGAGCAATGGGTGCCATATATCAGGACCGGCCCTCGTTATTG -ACAAAACACAGACGATCTTGGTCAGCCCACACTCAAAGGCGACACTGAGACAAGATGTAC -TGGTCCTGGACGTGAAATCTTCAGGGCCGAAAATGACAAACGCAGATGTCATCGATCCAG -TTCAGCTTTCTATTTTCCGCCATCGATTCTTTGGTGTTGCAGAGCAAATGGGACGCGTTT -TACAAAACGTCAGCATAAGTGCCAATATTAAAGAGCGGTTGGATTTCACTTGTGCAATCT -TTACACCCGAAGGGGACTTGGTCGCCAACGCACCCCATGTGCCCGCCATGATTGGGAGCA -TGGCCTTTGCAGTCAGATCACAGATTGCCGAGTGGCAAGGTAAGTTGCAGGATGGCGATG -TGTTACTCTCCAATACACCAGGTAAGTATCTCTGATAGAACATGAGAATGGTTCCGGCAG -CTAACCCGTTACCATAAAGCATATGGAGGTGTTCATCTTCCTGACCTTACGGTCATTACC -CCTGTTTTTGACTCCAATGAAAAAGATATTATATTTTGGGCTGCCTCGCGCGGTCACCAC -GCCGACGTAAGTTCTCTTTCAAGCCCCGAATAATCCACAGACTAACAAACTGTTCGTAGG -TCGGAGGCATCCTTCCTGGATCAATGCCACCCTTGTCAAAGCTTCTATCTGAAGAGGGTG -CAGTCTTCGATTCATACCTGTTGGTGCGCGCTGGCCATTTCGACGAGGAAGAACTCCACC -GCATGTTGTGCGTAGAACCTACGCGTTTCCCAGGCTCCAGTGGATCCCGATGTTTTCAAG -ATAATATCACCGATTTGAAAGCACAGGTTGCTGCAAATCATTGTGGCATCCGTCTCGTGC -GGCAACTAATCAAGGAATATTCTATGGATGTGGTTCAGGTAATTACTATTGATGAGGCAA -CTCTATAACGAATGACTAGACTAACTGAGGCAGATGTATATGCGCGCAATTCAAGACTCA -GCTGAGCTTGCCGTTCGCAATCTCTTGAAGCGTCTCGCCTATGATCACAATGGGGAAGAG -ATGTCCGCCGTCGACTATATGGATGATGGGACACCAATCATGCTCAAAGTGACCATTGAC -TCGTCGGATGGTTCCGCTATTTTTGATTTTACAGGAACCGGCCCAGAGGTTTATGGTATG -ATTATTCTGAGGGCTCACCACGGAAATTCCAATAATTGATTGACTGTAGGAAACTGGAAT -GCCCCAATTGCGATCTGCAACTCGGCAGTCATCTTTGCCTTGCGTTGTATGGTCAACTCT -GAAATTCCCCTCAACCAAGGCTGCATCAAACCTGTGCAGATTATCATCCCTGACAAGTCA -CTATTGCGACCGAGCTTCGAAGCCGCAGTATGCGCGGGTAATGTATTGACCTCGCAACGT -ATCGTGGACGTGATTTTCAAGAGCTTCAAGGTATGCGCTGCTAGCCAAGGATGCATGAAC -AATTTTACTTTCGGCAATGACGGGGAGAATGGGTTTGGATACTATGAAACCATCGCTGGC -GGGAGCGGCGGAGGCCCGGGCTGGGCTGGTACAAGTGGCGTGCATACCAACATGACCAAT -ACACGCATAACCGACCCTGAATCACTGGAGCGCCGGTATCCGGTGATTCTACGTCGTTTC -TGCCTCCGCGCTGGGAGTGGCGGTGCCGGGATGTATCCTGGAGGAGAAGGTGTTATCAGA -GATGTTGAGCTGCGATTGCCGATGTCCGTGTCCATTCTTTCCGAGCGCCGCAGCTTTGCG -CCTTATGGTATGGCAGGCGGCGAAGATGGTAAGCGCGGAAAGAACACATGGATCACCAGA -ACTGGCCGGCATATCTCTGTTGGGGGCAAAAGCTCGATCCGTGTCCAATCCGGAGACCGA -TTCGTCATTGAAACACCTGGAGGCGGAGGATACGGAGCTCCTGGGGCGCCAAAGAAATCA -AGTGTGGACCAGTCAACGGTCATGCCGTCGTTTGTTCCTATTGCCAACGGCAGTGTGGCC -GCCAATCGGAGCTTGGCAGAGGAGGTATAAGAGTTGACGTAGTTAGACACATTCTATTTG -TTTTTTTTTATAAGAGATAATCCTTCTATCCAGCCTCCGCAAAAATATTCGATATATTTT -CACTCAGTTAGGCTTCAACTAAGCGAAAGCCAAAGCTCGATCCATAAAACTCAACGGCGG -GGGTGGTAGAAGAAACAGCAGGAGTGAGCGCAACAAGAATTCCAACTCACTAAAACAGAA -ACTCTGAATCACATATTTTGAGAAGAATCCAAGACCTGCATTATGCATTACGCTGTCAGG -TCTAACAGTGCAAACGGGTTGAATCAGGGCTACCGTAGACTGGCCTGAGTATGCAGGGTA -CCACATATTTAACAGTCAATAACTAGTCAACTAGTTAGTTACCAAGAGGGACTCATCGTT -AACCGTCATAATCCAGAACTCCCTTGGCCCATTGTATGTCATGTCCGTTTTGTCAGATAT -TATTCCTTCAGAGTCTATAATTTCAATACCAGAGATTTTGCGCTACACTTCCATTATGGT -TGTCATCATACATACACACCATTTTTCAATATATATTTTTGCGTTCGAGCTTAGGTTCCA -CCCCATGAGATTCAGACCGAAGGATAGGCTGGTACAAGAGATAAAGACTACCTGGCTTAT -TGAAAGATTTTGTTTACAGGCAATTGCAATAATACGTTGAGTATAAGTAACAGCTACGCA -GGTTGAAAGTCACACTATCTCAGCAGTATCTTATAACCTGAAGTCCCGTTATGATAGATC -GTGATTAGATACATATTTTTCACCTTGGATGCCTGACTTCACCCAGGAAACGCTCGAATG -AAATATGACAAAAGTACCGAAATCCCTGTTCGAGCGTAAGAATTCACAGCACATAGCGAA -AGGCTGGCTGCTCGAAGTCTGTCAAATCCAAGAATTCAGAGCCAGGGGTAGCATGTGCAG -TATCAGGATCCTCAATAGCCTTCCGGTCACGTCTCTTGTTTTCCACGATATTGATCACCC -GAATGACTACCAAGGTGATTGCTGCCAGAGCATACATGATGGCGACCATCAGCTTGCCAT -CGAAATATTCAGGCGATTGGGTCGAGCGAAATGTTTGGGGACCAATCCAGTTTCCGGCTG -CATAGGCCACAATCTGGAGGCCATTCACTGTTGCTTTCTTGGTATACCCTAGAGTGTTGT -TGGAAATCATCACCATCACGAGACACCAGGCAGTTCCAGCGGCACCGATCAAATAATATC -CTATTAGTAAAGCGGCTTTGGCATCAAGGGGAATCACAGTCATCATAATACCGCCGATCA -TAGGAATCAGAATGGCGAATATGGCAAACAGGGTGCGATCAAAGAACCGATCAGACAGAT -AGGTCAATCCAAGTTTACAAATAATGTCCACGACGCCCATTGGCATGTTTAAAATAAGAG -ACATGCGGTCAGAAAATCCAAATGAGCTGATGATGATAGAGCCAAAAGTTGTGATTCCTC -CGTTAGGAATGTTCATCAGCAGCGAAAATAGCACGTAGAGATAGGTACGCGGGTCGCGGA -ATGCTTCAAAAAATTGAGGCCATTTCCATGTTTTGTTGCCGATTCCTTGGAAATTGCCCC -GAATGCGATCAATAGCAATGAGCTTCTCGCGATGATTCAAGAACCGGGCCTGGATCTGAT -TGTCTGGCATGTACCAAAGATACAATATACCCGTTAGGACAGTGGGCAGGCCAAATACCA -AGAAGAGAATTTTCCACACAGCGATGGAAGTTCCGTGCACACCCGCTAGGCCGTAGGCAA -TGGGCCCAATAAGCACTTGGGCCAATCCGTTACAACTGAGCCAAAAGCCCATGCGTCTGG -CTTGTTCCTCTTGTTTATACCAGCAAGAAGTGAACAAGACAAAGGCTGGCGTTACTGTTG -CCTCAAATGCGCCTAGGAAGAAACGGACACACAGTAGGCTCGCATAATTATGAGTGGCTG -CGTGGCAAACGAGTGCAATACCCCAGAGCACGATATTGATTGAGGTGTATTTGCCCAGGG -GCAGGCGACGAAGCAGATAAGTGGTGGGAAATCTGATGATAGACATTAGTAATGTAAAGC -AAACAACAGCAGGAGAGTAGGCTTACTCCCAAAGGATATATCCAGCATAGAAGATGCTCG -AGACCCAGCTGTATTGCTGCGAGTCAGGGGCCAAACGAGTATCTTCCCGAATGCCCATCA -GCGAGGCATAATTCAATGAGGTTTTGTCGGCAAATTGAATCAAGTAGACCCAGCAGAGCA -AGGGCATGAGGAATCGATCGAGTTTGGACAAAACCTTCCTAGCCTCGTCGGGGGTATATT -CAACCTCCTCTGCGTTCATGAATTGGAGGGCAGGGTCGAGACCCTGGACCTCCTGCTGCT -TTAGATCTCCATCCTGTATATCCATGACCGCCTGCTCTGTATCACGCTTTGACGGAACTT -CGGATAGCTTTATATCGTTCGACATGGTGCTGTCTCAATTCATGCCTGTACAACGCCTAA -ATGCCAGAATCACGAAGCCATGTCTTCCTCTTTAAATGCCCCCGGTCTGATACTGGGCTA -TCGGTAGAGCATTTGGCAGCCTATAGGGAAGTAGCCAAGCTCCAAGCTGTACCCCGGAAT -TTGTGGAGGTGGACATTTTCCTGGTGGAGCTCCAGTTTGTGTGGACGGGCCTTATCAGAT -TCCCGAGGGTTCCAACTGGCAGCTTATCTGTTCTGGTCCGATAGCAACGGGGATTGGGAT -TTGCTATCTCAATTTTACACCTGAGACCTGTCATATTCCAGTAATCCTATCGCTTGGATC -TCTGATGTATGCATTCAGCCATCTACCGAATATTCGGCCCGTTGAGTCCCGCGATCGATA -CCCTAGAACTCCCGCTGCCATATAGATATGATATTTCCTCTATACGAATGTCAGCTAGCC -TTCTCAGAAAAACAATTTCCGCTCTGAAATGTGCCTAACAATCAATCAGAGATATTTTGT -ATCAAATCCTCAAACCACGTGTCGGAAAAGATGTTTGTCAAAGACCAAATCCATACCCAG -TGTCTGAAAGGATGGGAGCCCTTGAATTGGGGGATGTTCGTAGGAGGAGCCCTGGATGTT -CTCATTTGCTGATCGCGATGCAGGCTAAATCCAAGGGTTGCCATTTTTGTCCTTGTACGT -TCCCTGTCCAATATTTACTTTGCGCGCAACTGGGTCCGCGTTGTATGGTTTGGTGAGCTC -GAAGATAGCGTATAGTTCACAGCCTCGAAAAACCAAAAAGCAGAGCTCATTGTGCCAGGT -GAAAAGTCGTCCTTTGAATGTACGTAGAATTTTCAGCGAAGTAGAGAGCATCAAAAACCA -ACGGAGTCAGTTATATTAGCCTGGATTGCGTTATGTGCCTTTCAATAGCGGAGCCCAGCG -ATGAAGACAATGTGGAGCTCCACCGGCAAATTGATATACCTGGCCAGCCGAGCTCGATGA -ACAAGAGTAGAAAGTGGGAACAATAAATTGCAGTCAGACATTAAATACTGTCGGCACTTC -ATCCATCCCTCACATTCAAAATCACGACAAAATGATAGTAAAGCCCACGTTCGGTGTTTA -TACACCACTAGTCACGTTCTTCCAGAATGATGAATCCCTCGACCTCCACAGCACTTTGGA -GCATGCGAAGCGCATGGCTGAAGGTGGCGTCGCTGGTCTTGTTCTTCAGGGAAGCAACGG -AGAGGCTCCTCACCTGGATCATGGCGAACGCAAATCTCTGGTGCGCGCTGTCCGCGACCA -CCTTGACCAGCAGGCCTATACAGGGTTGCAGTTGATTGTCGGTTGCGGCGCACCTAGTGT -GCGGGAAACGTTATCCTATATTGCAGAGGCTAAAGCTTCCGGCGCCGATTTCGCTCTTGT -ATTACCACCGGCATACTGGGTTACAGCCATGAATGCATCAGTTATTGAAAACTTCTTCCT -CGATGTAAGATTGTTTTACATCCCCGACGCGAACCCCCATCGTGTTTTCATGATTTTATA -ATATCTCAAAATCTAGGTCGCATCTCAGTCCGAGCTTCCTATCCTCATTTACAACTTCCC -TGGCGTGACGGGAGGTATCGACATCAGTTCCGACTCCATCATCCGCCTAGCCAAATCCAA -CCCAAATATTGTTGGGTGCAAGCTCACCTGTGGCAATGTTGGAAAGCTCCAACGTGTCTC -ATCGGCACTTTCGGGGACATCATTTGCTACATTTGGTGGAAAATCAGACTTTTTTCTACC -TGCGCTTGTCGCTGGGTCGAACGGGATTATTGCCGCACTCACCAACATTGCACCCAAGCT -GCATGTAAAGGTATTGCGGTACTATGAGAAGGGCGAGCTTCCGGCAGCACAAGAACTTCA -GTCCAAGCTCAGTCATGCCGACTGGGCCCTCACCAAGGTGGGCATTGCGGGTGTTAAAGC -TATTGTATCTCATCATTTCGGATACGGTACAGGTCGGGGACGCAGACCGCTGGGAAACGT -AACAATCGCGACATTAAGTCAAGAAACCGTGGCTCACATAAACGAGCTGGTTGAGCTGGA -GAAAAGCTTGTGAAGGATTATGAAACTGGCAGGGCAAACATAGTTACTCAGAAATGTATA -AATGAACATGAAAACAAAGATTCGGAAGATATAAAATGAGAGGCAATTTGCAGTATCTAG -CTAAGAGCTCGAGTTCCCGCAAAGAAATTCTGTGTCTTGGAACTACAGGGTAATTTAGGC -AGTCGGTGCTAATACCAATCATCGGGCGTCTTCTCTGGACTCTGGAGACTTCCTAGCGCT -CTTGAATTTTAATAGCCGACTCATGCGCACCGTATGTAGCCAACCTTATACCTTTTTAGG -GGGATGTCCAATCAAGAAATTAAGTAAAATTTAGATGAACCTTTGAAATATATAACTTTC -TGTCTGATCAGGAAGAAAAGACTCAGTTTCCCTGTCGACTGTTTTTGCTCCAAAAAGATA -GACCCGCTTCCATTCGCCCTATTCCATAAATGGCCGAACTATCCAAGCATTCACCAGCAT -ATTCTCGTGCTGTCAAGAGACGAGCTGATGCAACCTCGAGCGTTGGAAGACGAGCTGAAT -GACTAGCAGCGATGGAGAGAGCGTGTTTCATGTCTTTGCTTGCAAGCGCTGCTGCGAATC -CGGGAGATGTGTCCAGCGGTGGTGCATATGAGCCAGTAGTAATACGTTTCGAGTAGCTTT -CGAGCACCGGACCAAACATATTGCCAATGAATTCTTCCAGCTGTTGGTTGCCAATCCCTG -TCACCTCAGCAAAAACTTGCGCTTCAGCGATCACTTCCATGAAGCTGATAACGAGAATAT -TTCTGGAAGAAAGGAATGATTTAGCTTATTGTTAATGATCGTTTCAATGATTGAATGGAT -GCCTACCCAGATATTTTAAGAAGACTTGATTTTCGCACGTCTTCTCCCATGTCAATGATG -CTGCGTCCCATGACGTTCATGATAAGAGGTTTGATTGTCTCAACTGCTGTGGCAGGTCCA -GCTGCTGCAAAGATCAGCTTCCCGGCGGCTGCCACAGGGCTGGCACCAAACACCGGAGCA -GCTATGAACACTGCCCCATATTCACTGAGGCGCGATGTAGCCCATTCTGATGTGTCAGGG -TGAATAGTAGAAGTGTCCACAAAGATTTTGCCTTTTAATAAATCGCCTGCAACCAGTGCC -TTTTTAAATAGGTCAAGTAGAACATTGTCAACGGAGATCTGTTTTACTCGTCAATACAAG -GCGCATCCTGAATAAGAACAAAGTGTCATTCATACCATTGTAAAGATTATGTCCGAATTC -AGGACAACATCTTCGAACTTCTCTTCGGGTATGCCTCCCTCATCACGAAGAAGATCGCCT -TTTGACAAAGTGCGATTGCTGTACCGAAGTGGCGACAAATTATTGCTTTGGAGATATTTT -TGGAGATTGAGGGACATTCCTAGCCCCATCGATCCAAGGCCATACTATGCCAGATTAGAA -AATAGGCTTTTAAATGAAGGGTTATAACATACCCAGCCAACTCTTAATGAGGATGAAGTC -ATTCTGAATCTTTGAATTGCAGTTGGATTGTAAAACTTTCCGGAAGCCATTATAGGTTAT -TATTCGCGCGTATATAATGAGTTGCTGCCAAGTTAAATCGACTTGAGGAGCAGAACCATA -CATGTGGAGCTCCACTTGTACCTGGGCGGTTGTCCACTGTACCGTCCACCTGCCCCGTCT -CTGATAACAACCCCGCGGTTTACTCGGCCCCAGTTCCCGGAATTTCGCCCAATACAGCCA -GCATCGAAGGCCCGATCCCCCACAATGACCTTCGCTGACAAAAGGCCTTGATGATGTTGA -CAAATAATGATTCGCCTTCATCGAGCAGACTAAGGAAAGCCTCTCGATCCATGGTCCACA -ACCGAGGGTCTCGCACTCGATCATTCTCCGTGGAATTCGAAAGAGCGGACTCCATCAGGG -TGTTAAAGATCAACATGCAGAATTCTGACGTTCTTGGGAGCTCGTTGAACGTGTCCAAGT -GGTGACGGATATCGGTGCTCGCACGTTTGAGCGCTCCCCAGATACATAACAAAACACCAT -AACTGGCCACAGTGACCCATGGTGCTTCGCGGACCACTTTATCCTGTTCCTCCCCGGAAA -TGCTGGACAATGCGCCCGATAGATCATTTAAAAGAGCCAAAATTCCCTCATCGCCACCGA -CATGATAAGGAACAGGAGGTGCTGACCGCCGGATCGCAACAAGGTTCCCCTGGGTCAGGT -GTGTGGAAGCAGCCATTCCAGCAATTAAGCTGTTATCCACAATGCCTGCTATCAATAGCT -GGTCGGGACGACATAGGCTGATCTTGAGTGAAGAGATAAAGAGCTCGTTCGTTCTCCATA -GGAGAAATTGGGGGCTTTCACCTGTATAGGCTTGCTTCTTGACAATCTTCGTGATTGCTC -TTATTGCCGTGTGGACTTGTTTATTCTGAGCCTGGGTCAATATATGGAATGGGTTTGGGT -CGGATGGACTGGGCGGCATTTGATAGTAACACCGCGTAAAGTCCCGCACAAGGGCACCCA -TGTTCGTCAGGACGCAAAATGCGGCAAAGGGGGACGCTTCTAGCGCTGCTAAAAGTTCCG -AGTCCGGTGTTTCGGTCATGAGTTGCTGAGTAGCAATTGTCAGCGAGGCCGTCCCTAGGC -CTCGTGGTCCATGCAACAGAGTGTTTGCTGTTGTAAAAGTCGAAAGTCCAAAGTTCTCAT -TGAATCTCAAAAGCCATATCTGAGAGTTTTTAGCTTCCCACAGATTGATAGGGAAGGGAA -GTTCCCAGCCAAGTTCCATCGGTGACATCAAAGGGCGAGAATTGCAAGGTGAAAATGTGT -GGGAATCCAGGAAGACTAAAGTATACAGTGACAGTCTCATCGACTCAGTGGTAACATAGG -ACGTCCACCGCGTATAGAGAGTCTGTGAATCATCTCCGGGTGAGTTGAAATCAATTTGGT -ACATCCAAGACTGGGCATCCGGCATAGCTATACCCTGTTGGAACAGACCAATTTCATGTA -CAGCCTATATCACAAAATAAGCCTAGGCCGAGGGAAAAACTGTAGACAAGGCAGATACTG -ACATCAACAAGTTGCCGCAGCATCTTTTTCACGGTTTGAAACTGGCCCGCTTCAGTGGCG -TATACCCCATAGATGATATAGAGAAGCCACGATTGCATGACCCAGGGGTGTCTGATCTCA -CTGCAACTCGAGTCAACCTATTTTAATGTTGATTAGAACGTTGATCTCAAACGACATAGT -AGTAGGGTCCCATCATTACCATACGGCGAAGCTCTTCCTGCCCACTTTGCCATATATCCT -GACGCCATTGTGCAAGAGTTTGGTTTTCCTCATGATCGTACAAGTTGGGAGAAAATAAGC -TCCCAATTACAGTAACCGCCTTCAGCAGTAACTTCGGCGTCGAGGATACAGCGAATGTAG -GTCGATGCAGCAGAGGGAAAGACGGATGGAAGTGCTCAAAATACTGCTTGACGTATGTTT -CTGCGCTATTAGCGTGATTACTGTGTGTAGAGTTTCCGTTGTTAGACGTGTCATTTCAAT -TGATTAGCGCCCCCTCCTAAGGGGGAACAGCATCGGGCGCAATTGATACAAATAGCTCAA -GGCAAGCATACCTAGGGGAGGAAGATGAGAAACTCTGGGGTAACTCAGTAGCAAATGTAC -TGTGCTCTTCCCAAAAATCTTCGTCGGTCACAACACGTTCGGCTGGGTTGTGGTTGGTAG -CCAAAGGTGAGGTTATTCCGATTGCCTGGGGGGTATGCATACCCTCCCATGGTACGTGGG -GCTGAGCTGTCTGAAGTGGATATCCTGTCGCACGACAACGATTACCATGTCGTTTATATA -CGTCGCTGCAACACTATCAGTGTTATTGTAATGGTGGGCATGAAAATGGAAATAAGACCT -TCTGGAGAATCGAGCATGACACTTCCCACACTGGAAGTTCCTGCGCGAGGCGCCGCCGTG -ACCTTGCAAGACATCAATTTTTCATTTTTCTATGTAGGAAGACCCATGCAGACTTACGTG -CACGCTTATGCCGCTGTAGGTTTTCCTGCGGCACGCGTCAGAACTAAACAGAGAAGATAG -GAAGCTGGAGTACTTACACTCCGGGTAAACAACTTTGAGCATTCATCACAGGGAAACGGA -CGATGGTGGTCAACGCCATCGCCGCTCGCCATGGCTGAGGTGTGGTTCATTTCCTGCGTT -TATGGGAATTCAATACAGAGGGGAAATACGTCTTGACTGCTAAGCGCACGTGCCGCTATT -TCACCCCAACCTTAAATTGATCTGAACTCCTGATATTGTACGGAGTATATCTTGTTAGAT -TTATGTATTAGATCCTAATGACTTAACATGTCGAGGTGTTGAGAGGCCATCACTTTTTGC -GTCCTGCTTATTGTTCAGGCTGCACCATCCCCTGGGTCTCATAAGTTACCTAACCTTTCT -ACCCCCGGGACCTTGGATAGATACAGGATGTCATTGGACTACGATGATATTCCGAAGCAT -CAGTTTGGTCATGATCGCCTTCAAATACGGCAGACTGTCCGGGAAACTGGCCGGTATCCC -CGATGACTTACGGCGTGTACCCCGCGTTGTGGAGGTTCGGCCTCAAGTGCGTTATATGAG -TCCTAACTTGGCAAGATGAACAAGACACTCCCAGATCCCTGCTGGCAATATAATGACTTT -CCGTTACTAGGAAATGTCGAAGTGGTTGGTCAAGAATATGTGATTTTGCCAGTGAGCAAA -TCAGGGCAGGTGCGGGATGGAATGTCTCCAAATCGCCTGTGCCTGATTACGTAATGACAT -GTTATCAACCTTTTTCTACAAGCGGCAAGAGGTGAGCACTAAAATGGAAGTGGGAGGGCG -TCGATTGCTAGACATTAACCGGCAAATAAATTCCTTCCAGTCACAGTTTGTTTATGTCAT -ATTAAGATTACCTACTCACTAGATCCTAAGAGGTTAAGCTGAAATGTCCAAATCCATCGA -GTGGACCCGGGCAGATCAACTCCGATCCCCTTTCAACGTCACCCGTTCTGAACCATTTAC -TTGCAATCTATGAAACCGCTAAAAAGGCCAAAACAAGTCAAACTCATGACTTTCTCAGAT -CTTCAGAAGCAGGTTGGCCAGGTATATGATGACCATTGAAATCTTATCTTTGTGAATCTC -CACTGATGATCGCAGCTCTTCGCGGTGGGGTTCTATGGCTGCACTCCAAGTCCCGAAATC -AAGACTTTGATCCATGACTACCACGTTGGCGGGATCGTTCTCTTCTCGCGCAATTTTGAG -AATGCAGAGCAGCTTCAGGTGTGATATCGGAGGCTTTATCATCCTTGATTGGCTTGACTA -ATATTCTTTAGGCTTTGACACTGGCTTTGCAGAATGAAGCCAAATTGGCTGGACATAAAC -GACCGATGTTGATCGGAATCGATCAAGAGAATGGACTAGTCACTCGAATTTCTCCCCCGA -TAGCTACTCAGGTACCAGGGCCCATGGCACTGGGCGCCACGCATGACCCCGAGTGCGCAT -ACAGTGCGGGGAAAGCCACGGGGGAAACTCTCAAATTTTTCGGGATTAACATGAACTATG -CACCAGTGTGCGATATTAACTCGGAACCTTTGAACCCGGTTATTGGTGTACGCAGCCCGG -GCGATGACCCTGAATTTGTCGGCCGGTTTGCGAGTGCGGCCGCCCGGGGTCTGCGGGAGC -AAAACATTGTCCCCAGTGTAAAGCATTTTCCAGGTCATGGAGACACAGCTGTCGACTCGC -ACTTTGGACTACCTGTCATTCCTAAAACAAGGGATCAGCTAGAGCGTTGCGAGTTGATTC -CCTTTAGGCGGGCTGTCGCTGAAGGCATTGAGACCGTCATGACTGCCCATATATCCCTGC -CCTGTATCGATCTCACGCGGCCGGCAACGCTTTCCCCAGAGGTGATGGGAATTTTGCGCA -AGGACATGGCATATGATGGTATGATCATCACGGATTGCCTGGAGATGGATGGAATTCGCT -CCACCTACGGCACAGAAGAAGGCTCGGTGCTAGCGTTACGCGCTGGTAGCGATAGCATCA -TGATTTGTCATACCTTTGATGTGCAGGTGGCTTCGATCAAGAGAGTCTGTGAAGCAATTA -GTTCCGGCACAATCGATCAATCACGACTAGCGGATGCTTGCCGCCATGTTACTACAGTGA -AGGACAAGTTTTTGAATTGGGATGCCGCCCTTCGGCAATCCAATCTCGCAGACTTGAGAT -TACTCAACAATAGAATTGCGGAGACTACCATGGATATATATTCTCGGTCAACCACACTTG -TCCGCGATAAAAATGGTGTTCTCCCCCTGTCAAAAAATTCAATCATCATCTTCCTGTTCC -CTGGAGACAAGACCCCGGTCGGTGGTGCTGTTGATGGCGAAGGAACGGAGACGCCAGGTG -TGTACCAGTCCAGCAAGTATCTTGATGTGCTGCGGCAGCACAACAATTCCATTGCTGAGA -TCAGATATGGGGCGGCCAATCTGAGTTTTGAGCAATGGCAAACCCTCGAGGTCGCCGATG -CGGTGATTTTCGTCTCACTCAATGCAAGAGAGTCGCCATACCAGGAGTCGCTAGGCCTGG -AACTTGCTAAGCGTGTTAAATCATTGGTGCATATCGCTGCGTGCAACCCCTATGACTTCC -TTGATGCTCCTAGTGTCAAGACTTACATTACCACATACGAGCCGACTATCGAGGCTTTTT -CTGTGGCTGCAGATATCATGTTTGGAGGACTGGTTCCGACAGGCGCTCTTCCCGTGGGAA -CGAATGCTGTAACCAAAACCTCCGCCCTGGTAACGCCATTTGACGCACAAAGAGATCTGG -ATGAGGTGGTCGAAGTTTGGGCCGCTGCTCTCCCGACCTACCTCGTTCCAACTGAAAGCT -TGCGGTCTATGATAGTGCTCCCACATGGTCACCATTTCATCGCGCGCATTGGATCCCAGC -TAGTGGGCTTCTGTCTTGCTTACACAAATGCCCACGGCACACCGGACACGGTGCATATCG -CAGTTCTTGCCGTGTCACCGAAGTACCAACACCAAGGCGTTGGAACTACTCTGCTAGAAG -AAACAAGAGCATACTTCAGAACATCCTTTGGCTTCAATAATGTGAAGCTGGGCAGTTCAT -TCCCCCGATTCTGGCCGGGGATTCCTCGGGACCTTCCAGAAACAGTGCAGCATTTCTTCA -CTCACCGTGGTTTCCGCCTCAGTCCACCTGGTGCCCGGTCAGTTGACTTATACCAAGATA -TTCGGAACTTCCAGTCACCAGAGAAATACATGACTCGTGCCCGGGAACGAGGCTTCCGCT -TCGCGCCATTAAGGGCCAAAGATTACGAGGCCTGTCTAGTTGGCCAAAGAAGAAACTTTT -CTAATAATGGGGTAGGATTCCCGTGTTTTACCCACACTCAGAATTTATCAGAATCTGGAC -TAATATGTTATCAAGAGCTGGGTGGAAGCATATGTCGCATTGCATCCCGACAAGTATCCA -GAAAGTGTGATGACTGCATTCGATTCCCAAGGCCAGCAGGTCGGCTGGACATTAATGCTC -GGTCCATCAGATGCTCTGGATAAGTCTTGGGCCTTCCCCCAAACCTGTGGTCCCAACACA -GGGTTGATCGGTGCTGTGGGAATTGATGAGTCCCACCGCAAGCATGGTATTGGGTTGGCA -TTGATCTGTCACGCTATTGAAAATATGAAACAACGAGGTTTGGAAGGCGTCTTTGTAGAC -TGGGTTGCACTTGACGGATGGTATGAGCAGGTTGGTTTCAAGACCTGGCGAAGCTACAGA -CCGGGAGAGCTTTGAGTATTCCATCACTATCCCTTCTTTGTTCATTCTTAACTGTCTAGC -TCCGACTATTATAGGTGCTGGTATAAATAATCCAGACTTTTTGAAATAGAATTACTTGTC -CTGTATGGGAGGTTGAACCTTATGATATTGCTGCCATGAGCTTGGCTTGTTCCCTTGGAC -GATGTATTTTCGATTCTGTGAGTTGAAAGGATGCACTCGGATTATTTGTAGCTAGCTAGA -TAGACTAAGACCCACCACTGAGCCAGATGAGCTCTTGTCCACGGATGCAATTGGATGGCC -TTCGAACCGCCAATGACCAATAGATGACCAATTTACTCTCCGTGTTGAGACCTGGAAAAT -GGCGGCAAACCCTTGCATTAATCGCCAAGGACTTGGACACATCAGGGGGCACGAGTGCCA -GCTTTTTTTTTCCGTTATAACGGATCAGCTGGAGTTGTATTTGAGAAATTCTGGTATCTG -TACAAGTTCCGGCCGCTGAGCTTACAGCAGAAAGGTCGCTGATTCTTTCGAGTATCACCC -TATCAGATCTGAGAACAGCTTCACAATATTAGCAAGAATATTGAAATAATTTATTTTCAA -GTCTCGTTCGATCAAGCCTTTACGCCCTGTACAGAACCTATCGATTTTCATAAAGAGAAT -ATCTTACCGTTCATATTTCTTTGTTTGGCCCAAGGCAAAATTATCATCAAGAAAAAGCAA -TTTACCTCCTAGAGTCTTCCAATACGGTGTTAAGGAATATGCCTAATCGTTTCACACTGG -AAACTTCGTATATCTTTGCAACTGCTTATTCGTTTTACCCCTTGCAGCGCACAGATATTA -CCATCCAAGTATGCATTAGCCACGTGAATCTTTTAGTTCAAGTGTCACTACGTTCGGAGT -AGGCTGTACAGGCACCCACCCGCAAGGGCCCTTTATGGAAAGACAACTATTGGGAAACTC -GGTCATATGAGCTTTCGAGATTCACCGCTTGAATCTCCGCTTTCTTCGAACTAGTTTTTA -TTGATCAATTTGATGACTAGATATGGCTAATTTCCTGCTAGCAATAAGGGCCGATGGGAT -CAAAGCCCCCAGTGGTCGCTATTAACTACGACCGGAAACCGCTCTTCTATAGGGAGTAGC -ATGGCACAAAATCTCACACCAATCATGGTACAGAATCGGATTTTTGGTCGACAATGCTAT -CATTCTATCCCCACCAAGAATCAGTTTGGGCCATAGGATAACCTAATAGTCAAGGGGTAC -TCTGCATCCGACATGTCAATTACACTAATTTGGAAAGCTCAGGGAACTCTCAAGGTGGAG -TAGCTGTAACCAAAAGGAAACGATCTCAGTTTTCTCAATTGTAGTCCGTACTGCAAATTG -TGTCATGACTTGGGACATCAAGAGATTCCAAAAGCATCTCTAAATCTTTCCTCAAGGGAT -AACCGAATTGAGACAGAAAGCTATCCCCACATGGCAGGCTTAGCATTGCAACCCCCAGAT -ACGGGCCGCTCGAGCCTCGAAAACCATGTGCACATGTCGCCGGAGACGGTCACAATTGCG -GTCCACGTTTTTTCGGAGTGATGATGCGATCATCCTTCAATGTGCCGCTTTAGTCAGGTC -AGCAGCCAAGGCACGTTTCAGGAAGCACGTTGATCTTGGAGGTTGACTTGGCAGGATAAT -CTTTTGGATTGATTGTCGGGTTTGATGTTACATCCAACGCCTCTTACCATCACTCTCAAG -TGAGTGAAAAACGCTATCTCTGGGGCGCACGGGTTGGGACTGGGATAATCCTAGCAAAGA -TCGGGATCGAGTATTATTGGAGTGCGTCCGATATGCCTCCACGGCTTTTCATGTCGGTCT -CCGGGGGGTAATTCGTCCCCGAAGACTTAGGGTTGTATGTATTTGGCTATTCCGCACGCT -AGACTGCGCTAGCGTGGGACCAGCAAATGTCTCCCCCGTGATTTTGCCCCGGATTTCTAA -TTTCTTTTTTTTTAAGAAAAATTGCTTGGAATTTCATTTCAGGTGTCACTCATTCTATTC -CCCATAATTAAGGGAATCTAGATCTCAATGCGGTTCCAAATGGTAATTATCTTGGCCACG -TGAAACTCTAAGCCCTGTTCGCCGGAAACAAGCATGAACGGCAGGGATTACCGCGATTTA -AGCCTAATTTAGCGTCAATGAAGCGACTGGCCCCCAGCCATTTGCCTAAATTATTTTTCT -CAAATCACAACTCCGGCTTGTGCCCGGGGGTTACCGAAGCGCCATTCATTTTGTCGAATG -TGGCGCCAGACGCCAATATCGTGGTCTTGATACCTTGCGAGAACTACATTCCATATCTAA -ATTTGATAGGGGCTATGGATGGGGTAGGGATTGATTGCACATGGAGTTAAGAATCGCATA -GCTAGCAGGTCGTGGATTTAGGGCTTTACGGTAGAATAGTATACTTTAAacttggacttg -gacttggacgggcctggaccgggaTCCCGGTCAAGAACGAAATGTCTTGGCTTCTTGTCC -AGATGCATGTAGCGCTCGAGAGTCTCATACTTGTTTCTCGTACTTGGTAGGGAACACAGC -CTCCAATGTAGCATTTTGCCGAGCCCAAGTCTTTCCACTATGGGCTGAAGACTTACAGCA -AGAGTGGAGTATGTCTTTCTCCATTTTCTTAGGGTTTACGATTTCCCCACGGGCAACTCG -ACGCGGGACCCCCGGATTGCATCCGGTTTGGGAAGTAGATCAGAGGGAGGTCATGACAAT -AACGAGCCAATTTGGCTTTAATCGTGAGGCTTTGACGTCTTTTATTCTGTCCAATGAAAT -GTGCACCTCTTGTCAAACGGACTGGCCTGGGGAAGATCAAAAATTATATAAAGGTCACCA -CCTCCCCCAATGCCAACAATCATCATTCGCAACTTCATTTGTCTCTGGAGTTTTATCAAA -CTAAGCACTGCCGTATCTGAAGCTTGTTGGTTTTCCAACTCTTTCCCTCTATCAATTCTT -GTTTTTCCTCTTTCCAACATAAGAGTCAGCTAAGCCCACGACTCGAGTTTCTTACCTGGA -TCTTTATTAACAGGTTGACTAGAAGTATTCAAAATGAATAAAGATGATATCACTCAGATA -GACTCTGGCCAGGAGTAAGTTTTACATCCTTCTTGATCTCCTTGTCATACTTATCTCAAT -TTCATGTAGTTCCAGCGATGATCAGAACAGCGCGGAGAAGGGATGCTTTGAGCAAGTTGA -CCTTCACAACAATCTGTCTGCCAAGTAAGCCTCACTTGAGAGCCCGGTTCTCGAGTCATA -TGTCTCATCTTCCTCACAAGGCACTAACACCAACCAGGATCAAAAACCCTTTGGCCGACT -TGACCAAAAGCCAGGTGCTCCGCAACGTGGAAGACTTTGCCGAGGAGTACAACGTGACAG -ATATCCTACCCGAGTTAAAAAAAGGAGCGCTAGTTGCGCGTAATCCTACCGAGTTCGAGA -CAGTATCAGGCTTGACCGAGGTAGAGCTCACTGCCCTGCGCAACGAAGTGCTTCACAAGT -GGCGCCAGCCTAAAGCACTTTACTTCACTATTATCCTGTGCTCTATTGGTGCAGCCGTAC -AGTATGTAAGGATCCCAGAATGATTTGGGTAAATTACTAACATCTATGCAGAGGATGGGA -TCAGACTGGATCCAACGGTGCTAACTTGTCGTTCCCCGATGCGTTTGGTATCTCGGAAGA -CCCCAAGAAGAGCGCTGATGCCGAAAGAAACCTGTGGCTCGTTGGCGTCGTCAATGCTGC -TCCCTACATTGCCAGTGCGTGCCTTGGATGCTGGCTCTCTGACCCATGCAATCGGATCCT -AGGCCGGCGCGGTACCATCTTTATCTCTTCTATCTTTTGTGTCCTTACCCCCATTGGCTC -TGCAGTAACCCAGAACTGGGTACAGTTATTCGTAGTCCGTCTTCTCTTGGGAATTGGAAT -GGGTCTCAAGGCTTCGACAATCCCGATCTTTTGCGCCGAGAATACTCCTGCTGTCATCCG -TGGTGGTCTAGTGATGTCCTGGCAGCTCTGGACCGCTTTTGGTATTTTCCTTGGATTCAG -TGCCAACCTTGCTGTCAAGGATACTGGTAAGATATCCTGGCGTCTACAGTTGGGATCGGC -TTTTATCCCGGCTATCCCTCTCTTGTTTGGGGTTTACTTTTGCCCAGAATCACCTCGCTG -GTATATTCGACGGGGCGAGATGGGCAAGGCATACAAGTCGCTGTGTCGTCTTCGCAACAC -GTCTTTGCAGGCTGCTCGCGATCTTTACTATATCCATGCTCAAATCAAGATTGAGATGGA -CCTGATTGGCAAGAGTAACTATGTCACCCGCTTCATTGAGCTCTTCACGATTCCTCGCGT -CCGCCGTGCTACCCTTGCTTCGTTCGTTGTCATGATTGCTCAGCAGATGTGCGGTATCAA -CATCGTTGCCTTTTACTCGTCGACTGTTTTCAAGAACGCTGGCGCGAACGATACGCAGGC -TTTGTTTGCTTCCTGGGGATTCGGCTTGGTTAACTTTGTGTTCGCCTTCCCTGCGATCTG -GACTATCGATACATATGGACGTCGGACTCTGTTACTGTTCACATTCCCCCAAATGGCATG -GACACTGCTTGGTGCTGCCTTTTGCTTTTGGACTCCTGAGGGTACAGGTCACCTGGCTTC -GATCGCTTTCTTCGTCTTCCTTTTCGCCGCATTCTACTCGCCTGGTGAAGGACCCGTGCC -GTTCACATATTCGGCGGAGGTATTTCCTCTTTCCCACCGAGGTATGGTTTCCACTTTTCG -TTTCTTCTTTGGGCGTGACATAGTTTCCAGAACTGACGTTGATATCAGAGGTTGGAATGT -CCTGGGCCGTGGCGACCTGCCTAGGTTGGGCTGCCGTGCTGTCCATCACGTTCCCCAAGA -TGCTGAGTGCTATGACGGCCACTGGAGCATTTGGCTTTTATGCGTAAGTACTTCCACCAC -CATCTTAAGTTTCTCATTGGAATCTCTGACGGTGGAATTCAGTGGTCTGAACGTGACTGC -AATGGTCATGATTTTCCTCTGGGTCCCCGAGACCAAGCAGCGCACTCTTGAAGAGCTGGA -TTATATCTTTGCCGTTCCTACTCGGGTCCACATGAAGTATCAAGTGACCAAAGCACTCCC -GTACTGGTTCAAACGCTATATTTTCCGTCGGAATGTCGAGCTGGAGCCTCTTTACCATTT -TGATCATCTCGCGAGTAGCGGAGAGGATGTTGTCGTTGACCATGTGAAGTTGTAGGTTGG -TCTAGTGGACTGGCTTTTGGACATGCTTTTTTACTCTCGTGTTAGAGCCGACACTGATTT -TTTGGTTTTCTGATTTCATGAATATCTAAGTAACGAATGGAGCTCTATACATATTGAATA -TCATGGAATCATTGAAATGACTTCCCTTCAGATATTAGAAGAACCAAACCGATTCCTGGG -ATATTCAGCCGTATAGAACAACATCCAAAGTAGGAAAAGTGTTCTCCACCAAGTTGCCCA -GATTCCATGGTTGCTGGATCAGCTGATGTGGAGATATGTGATCAAGACAAGCATTGGTAT -TCTCGGGATCATCGGGAGCATTTGTATTTGTATCTACTTGAAAGGATAGAGTGAACTGAT -AGTGTTTCACGAGGTCAAGTCATGGGGCAATCATCCAAGGAGACATGTAGCCTACAATAG -GCTTTGATAGGTGTATCACGTGGAAGTTCGGGGGGATGGCAACGCTCATGTTTTGGTCCA -CTCCCGCCCCTCACTTTTTTCCTTGAGAGAGCTCAGTGTACGTTTCTTACCTCCGTTGCT -TCTGTTCGCATTTCGGGGTTGTTTGTGCCTTTGAGCCCAGAAACCTCAGTGTCGTGTGTT -TCGGCCCAATCCCAAGGAAGTCTACGAACCTATCCGTCATTGTGACCCTCCGTTTTGAAA -CCCAGAAACAAACACACCAGCAGCTGCAGTGGAGCTCAAGGAACTGCTGTGGGCTGCTGG -GTGAATCGAAACCCACTACCTTGACCGCCTCCCACCTATAGCTCATTCTCTCAACACCCA -AGCAGGGGGAAAAAAAATAAGACGTTCGCCTATCCTCAATTTGGCGTCACTTTTCTTGTG -TTTTTGCCCAAAGTATTTGTTGTCTTAGTTGCGGGGCAGATCTGACCCTGCGCCTTCGCT -GGACTCTTCTCCCCACAGTGCACCCTCCGTATCCCACTTAACCCTTTCCCACCATGCCCT -GCTTCAAAGGGCTTGCCGTGAGCATTCACACGCCCGATGGCCCAATCTCCGAATATTCCA -TTCAACGCCATTCTCGAGCCTCGCGCATAGGTTGTTTTATTCCTGTTCCGCCACCCAAAA -TTCCCGAATCTGGGACCGGTAAAGCGGAACAATCAACCTTCGCCATTTCCGTCACCCTCC -TAAACCCTGGGCAAGATGTTCCATACTCAACCCCAAAGCCGACGCCAGACTGCCCCTCGC -CGAAACCAAAAGTCGTAGGCAAATTGCCCGGTGAGCCAGGTCAAATCGCCGGGACGGTCG -CACCTTACCAGGGCTTGACAAACTCGGCGAACGAGACCGTGGCCGCATACATCTACTTCG -ACGGAAGACAAAAGGAGGAGGTGGCTACACTCCTGCGACGAGGAGAAGAGACATGGGTGA -ATTCACGTTGGGTCAGCGTTCCGGATAGCGAAGGCGGCGGAATCGCTGAACGCGAATTCT -TGTTCCGCGAGGTCGGACTTGAGCGCTGGCTCAATGGATTGGACCTGGAAGGCAAGGATG -CGGCGGCCAAGATTGAACGCAGGCGGCAGAAGATGGAAAAGCGACGTGCAAAGCGCGCCG -CCGAGGAGGATCCGACCGCCATGGAAATGGAAGACAGCCATCCCGTCAAATCCAAGAATA -TTATGCGATACGGAAATGATGAGAAGTCCCCTCTCGAGGATGTTTCAGATGACGACCTCT -CTTCCGATTCTGACGACGATGATCCGATTCCCGAGACAGCAGGCCAGATCAAGGTTGCTT -TGTTCCGAGTTTTGGCGTCAGGAGAAATCAAGCGGGGAGAGTACTCCCCGCAATTCGATG -CCCACGACGATGAGGAGGGTGGTCAGGATAATAGCAGCGGAGGAGATGCCGATGTTGACC -ATACCACAAGTTTCGCAAAGCCAAAGTCACTCGACCCGAAGACCATTAGTACGCAAACTG -TTACTGGTATTGACCCGACTGATAAACCATACGCTGTCTTCACTTTCATGTACCGTGGTG -AACGTGAGTATTTTCTGAGTCACTCTTTCGTCGGAATCATTTTTAATTTTACATGAACAT -AGGTCAATTGCAGAAGATGGGTATTTTAAAAGATCCCAAGGTGCAAGAGACGCCTGCTGC -CACCAAGCGCAAGTCCATACAAGCCGATTTCGCAAATCTCGGCCCTATCAAGCCTGGGGG -CTCTGTTGGATTCCTTAATTTCCGAGACAGCGAAGCCAAACCGCGGAAGGGCAAGAAGAG -CGATGATGACATGGATAGCGATGATGACGATACCGACAACCCCATACTTGGCAAGGCAGA -CGACGAAGAAGCTAAGGATGACGATCGGTTCCTGTCTCCAGATGACATTCAACGCCAGGG -AGAGCTAGCAGAGAGCCTGCGGAAGATTCGAGTAAGTACAATCCAAATTATTTACCATTT -CTGTGCTAACAATCCTACAGCTCAAGCGTCAACACTCCGCGGAACCGCCTGGAGGCAGTG -TCGGTGACTCTGCCGATACGCCGGCTTCTGGTTCTGGATCCACGCCTCCTGCAAACGAGC -GCTCAGTAACTCCGCCAAAAGCTGCCGTAGCTGGATCTGCAGGTGGGCCCTCCGAACCGT -CTCAGCTAGCCGCCGAGCCAGAAGATGGCCTGTTCGGCAGTCCACTGAAGAAACAGCGGG -CAAGTGTCTCGACTGCAGATGAGAATGCCCTTCGGCGCCGGATTGCGGAGTCGTCCGGTA -GAATCAATGAAGTTCTGGGCTCTTCCGCCCCTACGGAATCTACCTTTACCAGCCCAAAGT -CATTCGGAGATGGGTTCAGTCTCACGCCTGACCTTTCTGCTGGACCTCCAGCAAATGATG -AGGAGTTGTAAGCCTGCGCGGTTGAAAATAGGTCACTTCTGGGTGGTATGATCTCGAGAT -CTTGTTTGAGCCATTTGGCCTTATAGGCCACTACTCTTCGCTTTGGAAAGCACTGACCGT -TTTTGCTAGGACCCTATGGATCAACTAGGATTTTGTGAGTTAGCAGCATCCTCTTTTCCA -TTTTCGGGGTCCCCTGGATGGTCATCTTGCTTTTCTACAATTACGCCGTTATGTCCACTA -CCCACAATACAGGGAACTTGTTGAAGGAGAAATACATATGCTGCACCCATCAGGGCAGCG -TGACTATGAATTTAAATTCAAAAATAAGCGTTGTAGGAGAGAGAGTAATGGTGGTAATGG -ACTACATCGTACGGAGTAGACTATTCCAGTGGTTTCCCGGAACTGTCCAAAGCGGCTTCA -ACCCCGATAAGCTTTTAATCCTGGAAATCATAGCTCCAAAAAGATACATATCTAAACCCA -TCAACTACACATCCATGCTGAGAAGTATTCTTTCACCTGCATCTGCATTACGCAGTGTTC -TGGCCCAGCCCGGCTTTCTCCACCAAGTGAGAGATTCTGCCAAGCCCCAATTGAACCTCA -AATCATTTAACAGCTTCAACAGCCCAGGATAGCCCCAATCCGCCATAAATCAACAATGCC -GCTCGAACGCTCAACGGATCCTTTGGTCTGGATCGACTGCGAAGTACGTCCAAACCCCTA -TCTCCGATCAGAATCATCCCGCAACGCGTTCTGCAATCCCTAATCGCTAACCCCATCCGC -AAGATGACAGGCCTGGACCCGGAAACAGACAAAATCCTCCAGATCTGCTGTTTCATTACA -GATGCCCAGCTAAATCTCCTCGAGCCGACCGGGTTCGAAACGGTCATCAACGTCCCAGAC -TCCACCCTCGACGCGATGTCGCAGTGGTGCATCGACACACACGGACGCACGGGCCTCACC -GCCGCCGTGCGCGCCTCGACCACCTCCCCCTCCACCGCCGCAGACTCTCTGCTCGCCTAC -ATCCGCGAGCACGTCCCGGTCCCGCGAACTGCCCTACTTGCGGGAAATAGTGTCCATGCG -GATAAAGCTTTCCTTGCTTGTGCGCCATATTCGCAGGTGCTGGATCATCTGCATTATCGC -ATTCTAGATGTTAGTTCGTTGAAGGAAGCGGCGCGGAGGTGGGGAAGTGATCAAATGCTT -GAGCAGGTTCCGCCTAAGCGGGAGGTTCATCTTGCTAGGGATGATATTCTTGAAAGTATT -GAAGAGATCCGGTTTTACAAGGAGAAGCTATTTGGGTGATGGTGTGGCTGATATGGTGGC -TTGCTTGTCTCTGTTACTCTGAGACATGGGTCTGAGAGGTAGTTTCAAACTCTGCCTGTC -GAAATCGGTATTGACCATCTTTCTCTAAGGTGAGATACCATATAAATGTACATCATACAC -TCCATACAGATACAAATATACTTCGGAAGAATGTTGTTGTACAAGACGTAGTATTTTGAT -TCCCCCACGGGAATGATCTGCGGGGGCAATTGGAAAAAGTTGAGTTGTTTGGCAGATAAC -TTCTAGATTGGGCTGAATAGAACAAATCAGAGAGGTTTGAGCTATACGGAGCACAGACAA -ATTGAAATCACTAGTATTGTGGCAAAGTACTATAGGAGTACTAGTGGGATGATAAACACA -TGACGCCAAAGATTGGCCCCTCCCCGTGGAGCTCAACAAGTCGGTGAACTTCTCAAACTT -CTCCCCCCTTCCTCTTCTTTACTCCTTATTACCCTCTTTCCCTCTCATATTATTGCATTA -TGTCCGTCCAAACGGTCTCCATCCAGCCCTTCACTGACCAGAAGCCTGGAACGTATGTTG -CTCGTTGCCCCGCAATTGAACTGCTAGCTAACTTGCTCCCAGCTCCGGCCTCCGTAAGAA -GGTCAAGGTCTTCCAGCAGGCCAACTACTCCGAATCCTTCATTACTAGCATTCTTCTGTC -AATCCCCGAGGGTGTTGAGGGCTCCTTCCTCGTCATTGGTGGCGATGGCCGTTACTACAA -CCCTGAGGTCATCTCCAAGATCGCAAAGATCAGTGCCGCCTATGGCGTGAAGAAGCTCTT -GGTTGGACAGCATGGCATTATGAGCACCCCAGCTGCTAGCAACCTTATTCGCGTGAGAAA -GGCAACTGGTGGTATCCTCTTGACTGCCAGTCACAACCCCGGTGGTAAGTGATGATCGAC -AAAAAATATCTGGTGGAGACGAGACTGACCACGCGCTTCCAGGCCCCGAGAACGACTTCG -GTATCAAGTACAACTTGGCCAATGGAGCCCCCGCTCCGGAGGGCGTTACCAACAAGATCT -TCGAGACCGCGAAGACCCTCACCTCCTACAAATACCTCGATGTTCCCGATGTCGATACCT -CCACCATCGGCACTAAGAACTACGGCCCTCTCGAGGTTGAGATTGTTCACTCTACTGCCG -ACTACGTGACCATGATGAAGGAGATCTTCGATTTCGATTTGATCAAGGAGTTCCTTAGCT -CCCACAAGGACTTCAAGGTCCTGTTCGACGGCATGCACGGTGTGACCGGACCCTACGGCG -TGGATATTTTCGTCAAGGAGCTCGGTCTGCCCACCAGCAGCACAATGAACTGCCAGCCCA -AGCCGGACTTTGGCGGCGGTCACCCCGACCCTAACCTCGTGTACGCCCACGAGCTCGTCG -AGGCCGTTGATGCGAACGGCGTGCACTTCGGTGCCGCCAGCGATGGTGACGGTGACCGTA -ACATGATCTACGGTGCCAACAGCTTCGTCTCCCCCGGCGATAGCTTGGCAATCATCGCCC -ACCATGCCAAGCTCATCCCCTACTTCCAGAAGCAGGGTGTGTACGGCCTGGCCCGCTCAA -TGCCCACCTCCGGAGCCGTCGACCGTGTGGCTAAGGCCCAAGGTCTGCAGAGCTACGAGG -TTCCCACTGGCTGGAAGTTCTTCTGCAATCTGTTCGACGAGAAGAAGATGTCGATTTGTG -GAGAGGAGAGCTTCGGCACGGGCAGCAACCACATCCGCGAGAAGGACGGCCTGTGGGCCG -TCGTGGCTTGGTTGAACATTATTGCCGGTGTCGCCAAGCAGAAGCCCAACGAGACCCCCA -GCATTGGCTCCATCCAGAACGAGTTCTGGCAGACCTACGGCCGCACTTTCTTCACCCGCT -ACGACTTCGAGAACGTTGACAGCGAGGGCGCAAACAAGGTCGTCGGCACTCTGTCCGACC -TGGTCGCCAATCGCGACACCTTCGTTGGCTCTGAAGTCGCCGGCCGCAAGGTCCTGGACG -CTGGCAACTTCTCCTACACCGACCTCGACGGCAGCGTCTCCAAGAACCAGGGCCTGTACG -CCAAGTTCGATGATGGCAGCCGCATGGTCGTTCGTCTGTCCGGCACCGGCAGCAGCGGTG -CTACTATCCGTCTGTACGTTGAGCGTTACGAGGCCGACAAGTCCAAGTTTGGTCTCACTG -CTCAGGAGTACCTGGCTGATAACATCTCACTGGCCCTCTCTCTCCTCAAGTTCAAGGAGT -ATGTCGGCCGCGAGGAGCCTGATGTTAAGACCTAAGTTCAATTAGGTTGTTAGGGTTAAT -TTGGAACCTGCTGGATGGGGATCACCTTTTAATTGTCTTGCAGTCATATCTGAACACGAG -AAGAATGATGATCGAAAAGCAGGATCAAGATTAAAAGATTATACGTTGGCATGAGATAGA -AACTGCGTCTTTAGCATAATCCAAGCCTGTTTTGTCTTAAATCCGGGACCCCAGATCGGT -GAGATCGATGAAGTTATGCATTATTGTTCCTCGTGCCAGGTCTCAGACTAGGGCTAGTAA -ACACCAAATATCATAAGCGTCGTAGTGTCGTAGAATCGTAAATCATCCAACCCTTCCATG -CAATTATGCCTTCCACGCAGCAGCAGAAACCCTCAACTTCTCTCCCCACCAGCTCTCCCA -CTTAACAACTCTCTCATCCCTCCTCGCCTCCTCGGCCGCAAAAACAACAGCATGACTCCG -CACCGCCTCCTCAAGCGAACACCCGACAAACCTCGCTTGTGCCGTCTCAACATCTAGTCC -TCCCTCAACCGCCTGAACGGCACTGACAAATTGCCTGGCAAGCCCAAAGTCTCCACCGCC -ATGCGACTCATTCTGTCCCGGCGGTGGCTTCGGCACATCGTAAACCGTAGTCTTCGCCGT -GGCAAAATCATAAACCGAAATCGTGGTACTATCATACGAAACCTCACCAGTTGTACCATA -CACCCGACCCCTACGCTCACACTGTTTCTCTGTAGGTGCAATCATATGGAAGCTCGCCGT -TTTAGCAAGCCGATGCGGCGCCGCCTCCTCATCATCCCACGTGAGCGTCACAACCTGATC -ATCACAGACATCATTATCAGCCTCGTAGACGCACCGCCCATACCAAGGCCGTGCAGCGAT -ATCCAAATCTGACACCGTATCCCGATCATAGTCCTCACCAAGACGGGAGAGGAGCAGCGA -CTCGGCCGCCTTGTCGCCTTGGGCACGGAACACATCTTCGATATCTGGACAGACGATATC -AACCGGCCAAGCCGTGTGACCCGTCGCGAGGTGGCGATCGTTATAGATTCGAACCGCGCT -GTAGGTACAGTCACGTTCAGCGGGACAGGAAAGACAATTTGTAGCGTTACCTGCGGCGAC -GGGCTTGCGCTTCTGGCGGAACTGCGTCATTGTGCCGGCGGAGGAGATGGAGCGCGGGAA -GTGCGGCTTGTGGGCTTCTTTGGATGCTGTTTCGGGTGAGGGTGGGGAGCAGAGGAGCCA -GAGGATGAAGTCGATATCGTGGCAGGATTTTGTGAGGAGCGAGCCGTCACCTGCCGCTGT -TGCGCGTCGCCAGTTGCCCCGGACATAGCTGTGTGAGAAGTGCCACCAGCCTACCGGCTC -GCAGTGTTCTAGGGAGACTATGTCGCCGATTACGCGGTCCGTGAGGAGGAGTTTGCGAAG -GAGGATGTTGTGTGGGCTGTAGCGGAGGACATGGCCGATTGAAAATATGTTCTTCGGGGC -TTGTGGGATGTTATTTGGGCCTTCTGGTGGTTGGAGGGCTCGGTATACTGTTAGACAGTC -GTCTAGCGACAGAGCTAATGGCTTCTCGCAGAGAATGTGTAACTGGAGTGGGGCGAGGGC -CTGTAGAATCTCGACGTGGGTTTCGTCTAGTGTGCAGATGAAGACTCCGTCTACGCCCAC -AGGAGTTGGGGTTGTGTCTGCTGCGTTATCGTCGTTGTTTTGAGTCGCCCGCTTCCGTCG -CTCGAGCTCCCACTGTACCCATTCGCGCCAATCGGCGAATTCCTGGCCCGTTTGAGGAGA -TTCGGATACACCCCATATATATTTTTGTCCGAACTCGCGCCGTTTGAAGGCGTGGGGCTC -GGCAATTGCGTGGATTACACCCGGTGTTGCGGTTGTGACGGCACGACCATAGGCCTTCCC -GCGAGAGCCCCCGCCGATGACAAGAAAGCGTAGTTGCTTCGGTGAAGAAAGGGAGGTGGT -GATAGTATCCACGGGAGGAGGAATTGGTTCTTTCATGTTGGCGGTTAGTTGAAGTAGGAT -GCGGGGAACAAGTTGGAGCTCTGTCACGTTTTATCTAGTGATATAACGAATAATTGATAG -ACTTGACTCTGACAAGGCCTACTATCCTGACAGAACCAAGCAGAAAGAGAGATTTATATA -TAAAGTCGAGACACGAGTTACATTGAAGTTGCAATGTGAACGCGTTGTAACTGCGGGGAA -CATCAGCAACATCATCTGGGCTCTGATTGGCTGATACGCATTGTATTTCTTGTCTATTTT -GTTGTCGTCTCTTTACTGTCACTCTCACTCTCCGATTCGCTTTCTTTGTCAAAGATCATA -GCAACCTCTTCCAAAGACCGCCCTTTCGTCTCGGGAAAGAGGAAGTATATCAGAACCAAG -AATATTACCAGACACCCAAGGAAGACAAAGTAATACTTCCATGCAATCGCCGTAAACGCC -ACCGGGTTCACATATTGGTTGAAGAAACACGCCACAGCATCCACTGAAAGAGTGACAGCA -ATACCCTTGGCACGCAGCTGGAAGGGTAGGATCTCAGCTGGATACGAGATGAACAAAGGC -GCATAAGCAATGTCGTATGCTCCGATAAACAAAAATAAGAGTGGAATCACAGCTATTCCA -GCAGCAGGAAGATGCCTTTCGGCGAACAGACCAGCAGCCAGGGTGGACATAGACAGGAAG -ATAATCATGGACCCCGTGGAAATAAGCCATAAAATGCGACGGCCGTATCGCTCTGATGCA -AGTGCACCGGTTGCTGCAAGGAGGAAGTTCCAGGTATTGAGGCTCACGTTCACAGCTGCT -TGGTCGGCTGCCTTGGTGATACCCACCGATCTGAGAATCGGCGCGAGGTAGTAGGACAGG -AGGCCTGCGATTTTGTTAGTCAGAGGTTGTTCATTAGGGCGCAGTGAGGTGCATACCATT -TCCAGCCCATTCCTGCATGAGCCCAACTAATACACAGATCGCCAATCGGTGCATATCACC -TTTTGACGAGAAGAAGGAGCTCCAATTCCTGGTATTGGTAATCTCGGCTGTGATTGTACT -GCAAATCAAACGGTACTCATACTGTACCAGCTCGTCGTCTGTGTCTCCATTCGCGTGGTA -CTTCCCCAGAATAGCTAGGGCCTCGTCTTTTCTTCCTTTGGACACAAGCCATCTTGGGCT -TTCAGGCACAAACAGTATGAGCCCTAGGATCTGGAGCAACGGATACAACATCTGTAACAA -ACAAGGAACTCTCCACGACCAACTGTTGGCTATTCCTATGGTTGCATATGTAACCACTGC -CGAGCCAATGGCGCCCAAACACCAGGATGCATTGTAGAGAGCCGTGGCCGTCTGTCGCTG -TCGAGGGTGTGCAATCTCAGTGGCCAGGAGAGGAGCTCCAGTCTGGGTGATTCCCATCCC -TATACCCGACACGACTCTCGTGCCAAAAAAGGCCCAGTGGCTTTTTACTGCGGTTTGAGT -GATTGCTCCCACGATAACAATACTGGAACCAATGCAAACACATGTCTTCCGTCCGAGTCT -ATCGGCAATATGAGAGGCTGGTGCAAAAGAGATGAACGCCCCCAGAGAGGTGGCAGCAAT -TATCAACCCAATTATATTGGGATTTAGCCCGCCAATGATGGTACCAACTATGTCATGATT -AGTATAAAAATCACTGCACCAAGTTAAAGAGATTAAGACATTACACTCAGGCAGCATAAG -CAAGCTATTCACAAGATTTCCGATATATCCCGCTGACGAAGCAGTGAAGAACATGAACAC -AATACCCATGTTCAATTTACGCAGCCCCCTATCTTTGATCCAATTCGGATTTGTGTTGTT -CGGTAGGAGCTTAAATGCCTCCATTGAGGGCTCTTTCTCCATGGTATCTGTCTTCGTCGC -CATTTTGTATGCTTTTTCAAAGCTCAGGCTTGGTGAAGAAAATGAATTCGAATATAGCCT -TGGATCTTCACTCCCTCATGCTATACCAGCTGTTTATATCACAAAATTATACAGAACAAG -GGATCATTTACATTCCTCTGACCTAATAAACCTCCAGCCTTCCCCAGGCGATTCCCGGAT -AGTTTCAAGATAATCCTGGTGACTACAATATGAATGCACGGAATGAAGGGAGGAATGCAA -GATTCAACTACCCGAATAGGCAATTTTATCAGCGCAGAAGTTTTACTATCTCCGATCACC -TCGTCATAGCGCCCAAAAGTCATGATCTACAGCCCACAAGAACAATTGTACTCTACGTTA -TGAACCACCAACCACCACAACGACCTGAGACAAAATGATTCAATCATCACACTCCCCAAG -CAGCGAAGCATACCGATTCCCAAGCCCACAGAACCCCTCACACTGCCCACCAATTTCCCT -CCCCTTATGCAGCACGTCCTCCATAGCCCATGCAACACGTTCCGCGCCCACCCTCGCACC -CTCAAGATTTGGCGCGCCGGGCCCAAGCCGGAGAGATGCCATGCGACCTGTCATAAATAA -CGGCACGTCATCCCGCCATGACAGATCGTTCGTCAGGCACGGTAGACCTGACTTGACCTC -TATTGGGTATTGATCATTCATCGTGCGGAGCAGCGGCATCTCGCGCACGTCCGCCCGCGC -TCCGGTTGCGAAGTAGATATAGTCGATTGCGGGGAGCTCGGGGATTGGAGGGTCGGTTGT -TAGGGTCCAGGTTTGGGTGTCTGGGTTGTACTCGTGGGTTTTGATTTCTGTGCATGGGTG -AATAGAAGCGCGGTGTCGGGCTGCGTGCTGCTTTACGATTTTTGTATAGCGTGGTGTAAT -ACTGCCACCATTTCGGGCGGTTTGGAGCATCTCAAGGCGCTCTAGGAAATGTTAGCATTG -GCTTTTCGATGGCATTAGGGGCTTTCTTTCTTATACCTTCGTCGGTGTCGGCGGACCAGA -AGGCTGCTTTCTCCCAGTTCTTGTACTTGCCCATCCAGGGTAGACTGATGTCGAAATGTT -TGACTGTTGGTTGAATGAGGCATTGTTCAAAACAGTGCTTTCGGGTGAGGCTATACATAC -CTTTCATGCCAGACCGCATCAAAAACCACACTTTTGTTACGCCCTTGCGAACTGCCATAT -CGACAATCTGTGCTGACGACAGCCCGCCTCCAACAACGACAACATGAGTCTCCTGCCGTC -GCTGAATCTTGTTCTGTACATTGGGACTGGGAAATGATTTGATCTCCGTGCTGTGGCAAC -ATGCCGCAGCGCCCTCTTGCTCCGTCGAGGGCTTCCACGGATAAATCTTGGATACATTTG -CTCCGCCTGGACCAATGGCCAGGACAACAGCCCGACTATAAAACTTCTCTCCCGTGGATG -TTGTGACAGTGAACACTTTTGAGTCCGAGCTATCAGAGAGATAATCATATGAGAGATCTG -TGGCCTCGCGCTGCAATATCTGGCCGGGTTCATCCAAACCATACCGCTGGATAATCGACG -AACAATAATCCGAAAAAAGGTCTGTGGAAGGCGAAAAATAGTCTTTTCGATCTCGTTCGT -CAATCTCAGCTCCTATTATTCTGTTCACAAGGATTGAATGGTTAGAAATCAGTTTAAGCC -ATGCTTGCAAGCGCACAACTTACTGGGACTTCCCACCCTGCCtcctcttcttcttcttgt -gcttgctcatctccttcCCAACACACCCGGGAATCTCCCACAAATCTGCATCTCTCCCAC -TCTCCTGTGTATAAGCCAACATCCCATCCCGATCACCGGGATCAATATGGAAGAACATCG -GGCTGCGGAGCTGTTTGATCTCCAGCGTGCGGAAAGCATTGTTCCACCGCTGCATCCATT -TATCCCCAGAGCCATCGAGAACTAGAGTTGAAACGGATGATATCGACGATGTTGATGACG -CGGAGGACAGCGATGGCACAGAAGATGTAGAGTCAGCCGAGCTACGCCGTTGTCGGTCAT -CTCGACAAGGGCATCCGCGTTCACTATCCCATTTTGCAGCTGGCACCCCGTTGATCTTGC -TGTGGCGTCCTTGTACCAGACTCATTCGACCGCTGTGCTTCTTGATCCAGTGATAACGCT -GGTGTTCTTCGTCTGTAAAAACTGCTGATGGTGTTTCTTCGCTTAGGCGTGCGGCGACGG -CGAGGCCGCATGGTCCTGCGCCGATGATTATTACGTCGCGGACTTCATAATCTTCTGGTA -ATTCAGATGAGAACATTTTATGTGGCAAGGTAGGGTCGGAGTAATGGTGTGTGATGGAGA -CGTAGTGGTTATAGTAGTATTTTAGGACTGAATGAAATAGCAAATGTGTAAGAGAGTAGC -ATCATAGGACCAATATCTCTCTTTTGTACCCTACTGCCATATATATTGCTACAATCTCCT -CATAAGTCGACTATCGAGGTACTCGTCCCCAAGGTCCTTGTGGCATGTACACCATATAGG -GACAGCTTCATGCTCCACAGTTGATATGGCGAATTTGATGGATTATTGGAGTCAATCTCT -GAGGACCTTGGGATATTAGATCTTTTAAATTCAATATTTGGGCCTTTTCAATATAATGTT -GCTGGTATTTACAATGTAGGGTGGGCGACTTAAACTCCAGAGTTATATATCTTCTTGGCC -ATGTTCCACACAGAATAACACAACCACTATATATCAATAAAACCAATAAATCACACCAAG -AAAACCGACAATAATTGCGAGAAAGGGAACAGAAAAAATAAGCTCAAAGTTAGCCCTCTG -GCAGAATCTAAGTTAAGGGACGTCCATCCACAGTCTTTTCTACTATGTCTGTAGAACAGA -AAAGAAGACAAGGAAATTATATTATATGAAAGAAAACATTTGAAAGGGGGGAATAACAAA -GCACGAAAAGCCGCCGGAGAAAAAAAGAAAAGCAAGAGAAAACGAAATACACGCAAAAAC -CCGAACGCCATGAAACAAAACCTCCTGCGCCTACACAAAAAGACTTATTTATGCCATCTG -TTCTTCGCCCCAGCCTTTGCCCTAGACGCACCCCCTTCCCTCCGATGCCAAGGATCCAAA -GGCCAAACCCACCCCCTCACCCTCTTCGCATCCCTCCTCTCCCCAATTTGAGGCTCGGGA -ATCCACTTATCCTTCGCCTCCGCCAGAATTCTCTGACGAGTCATCGCACCAGGTCCGCCT -ACGTTGAGGTGGGCAGAAGGCTGATGCGGAAGGGGCACCGATTTCGAAAGCATCAACAAG -AAATACAACCCCAGCCCCGCTGTCATAGCCGGCAACGTATCCTCAGCCGGATCAGGATCC -GACGAGTCATGACCCGGAGGAAATGACGAGCTCGATGACGACGAGAAAGATCGCTCGCTG -ATAGGCGTTGCGATTGGTGCGTCGCGCGTGTGTTCATGCGCGCCCCTCCCAGCACCAGCT -GCTGTCTTGAGCGGGATTGAAGCCGCGATTTCCAAGTACCCGAAATAGGGGACGTCGTCG -CCGGCGTCGCAGCCTGCTCGCACGCAGACGAGCTCGATTTCGGTGATGATGAAGCCGTAG -CGGCAGGAGTGCTCGCGCATGCAGCGCTGGAGGTGAGCGAGTCCGTTGAGGTATTCTACG -CGTCTGTGGGGGGCTTCGTTGCGCATCCCGGTGTTCCAGCGGTCGAAGCTTTTCACAATG -CCGACTAGTCGGCCGCGCGGGAGACCGGCGCTTGTTTGCTCGGTGTCTGAGAGGTAGTTT -GCGAGAAAGTGCGGGTGGCCGTGGTTTTTGTGGCCTGTGTTTCGGACGTCTGGGGCGGCG -TAGAGTTGGAGGTGGTCTTTGCCTTGGGAGACGGCGAGGGCGGCGTTTACGCGTGGTGCG -TAGAGGTCACGGATTAGGCTCACCAAGTCGTTTTCAGACTCTGGCGCAAGGCTTGCTGAG -GGTACAGCCACGTGTGGCGTGAGCTCTGCTGGGATTTCGGTCTTCAGGAGCTGGATTAGT -CGGCTGTCCAGGGAGTCGAATGTCGCCAGTGAGAAGGAGCTCCAGCTACGCAGATTGCGG -ATGTCCCACCAGAAGTAATCGTGCAGGCCGCGAGTTGGAACTACGCTCACGTTGCGGACT -AGGCTGATGGGTTGGAGTGGCAGGGTCAGGTATGACAGCAAAGTTGTAGACCGTGGGGGT -AGGTCTTCCCGAGGACTGAGTAAGACTGGTGAATTCTGGGCAGCCAGTGCCTGGCGGTAG -TTATATTGAGGTGTCGGCACTGTCAATGGGGTCTGTGTGCTCGCAGGAGCGTGCTGGCTC -GTCATCCTTTGGGAGTATGATGAGTAGGGCAAGATGTTTGGGGTGATTGGGGTGATGGGG -GTAGTCGGGGTCGTCTGAGCCTGCATTTGAGGCACATATTTTGGTCGATCTCCATATGTG -GGATACCCAAATTGAGCCGAATTTGCGAGATCGAGAACAGATGCAGACACTGAGCGTGAG -TGGCCTCCGCCTGCTTTGCGCTTGTACGAAGGAGTGATTGTCACAGGCGATGACAGCGCA -GAAAGGCTGCCCTGTGTGGTCATTGGCGCCGGCGAGATAGGAGAGGCCAATGTCAAGCGG -TCCGCGACATCGCGTGTGCGGCGGCCAACAGATAACCGTGCCGGTGGGTAAGGGATAAAT -CCAGGTGGGTTCCGTGTGTTGGACAGAACCCGAGGATGACGAATGGGCCCGCCAGCAGAC -ACTGGCTCGATCACGATGTCTTGAGGTCGGATCTTAGGTAATAGCTTGGGGCCATGATGA -TGTCTGGTTGGTGATGTGGTAGGGCTAGTATTTCCAGTCATAGTCGACCAAGGAGATGAC -TCGCTTGAATAGACGGACGAATTGGAGGCATGGCGACTATGTCCAGAGCCAATGTATTCC -TCGAGAGAAAGACGCGGCGGCGTCGTTGGCCGAAAGCCAGTGGACTCGACAGTAGACATG -ATAGTGAAGTTGTTGACTGGGTGTACCACCAAAGACAATCTATCATAAGAGTAACCAACG -AATGAAGCAACTGATGTCTTGTTGACCAAGTTCAGAAAGGCATGATCCAGGAGGAGGATG -TTTGGATATTAAATGGACATTGACGGAATCTCAAACAGCCAGGCTCGACGCATATAAAAA -GATTGTCTGACGAACCCTAGCCGATCTAGAAAAGATGAAACATGGAGAAGAGCCCCATTG -TAACGCCTACGGAGCTAGAGAATCTTTTTTTGGGATCCTAGTCTCCGATCAGAGGCACGT -CCACTCGATGAAACACGAAACTGCCGACATTGTGCCTGAAGGACAAGCATGTTGAGCAAC -AGGGCCCTTGAAACTACCCCTCCGATGGGGTCCAGTCCGGCATAGTGCGGTTTCATCAGA -CTGTCGCGCGTCAAGTTGTTACTATATTTTGCATCTTCTTCAATATAATTCTTCAGATTT -CCTCAATCGGTTAGCCTGCTGCACGATTCTCACAATGTTCTAAAAACGGCATCAATTAGG -TCACATTTCCAGGTAATTAGCGGACTTGTTGGTGACTGTCCCTGCCTAAGGACTCGCGGG -GTCAGCGGTTAAGGGTTCCAAGAAAGCAAATGGATAAAAATAGCAATCGGGGATCATGGA -ACGTGTAATTCCAGGGTTTATACAGGGTAGAAAGCACTCTTTGATGGGAGGGTACCTGCT -CTAGATGCGCCAGTGGCCGAATGTGATATCGAAGTCGGCGGCTACATGTATGGGATGAGT -TTGAAGTACGCGCCGAGGACCGGCGTCATAACTGGTGATTGGACAATTTGACCATGAAAG -TAAGAAATTTCCCATAATTAGCAACAATTAATGCAGTTGCCCGGGGAAAGTTCATGCAAG -AGATCTCTGGTAAAGAAGTTTGAGGTCAAATTGGGGTGATCAACACATAACCGCGCATTG -ACCCCTGTGGTTTCGCTATACAATTGTACATAGTTAATACTTTTTTAGGCAAAATTATTT -AGAAAttcatctcattaatttgttttaattatttcatgacattcatttcatGTGGGTCAA -AAATGATTAACACCCAACCgaaagagtaaaaagagcaaaaaagaaaaaaaaaaaaaacga -aaGGTCTCGAACCCGGTGGTCTATGTACCTCGACAAAACACTCAAAAAACGCTCGGGGGA -ATCTCTCTGAACCGTCTAAACAACATATCGAACGGTAAAGGCAATCGCAGAACATCAAAT -TTTTAAGCCTTGATCTCGTACTTGCGGCGGAACATGTCATCCACCTCAGTCAGGTAGTAA -ACACCGGATCCAATAGTCTCGGTTTTACCAACGGGCTGGAAGTTCTTCTTCAGGTGGGCG -TGCTCACGGAGCTTGCACATGTCATCATACGCAGCAGGCTCGAGGATAGTGCGGGCATTG -AGACGCTTCTGGAGGTCAAGCTTCTCAACCATGCCGGACACATCGCCGACAATCTTGGCG -CTGAACATGGAGCTGGCAAGACCGGAACCGTAACTGAACAGACCGATGCGCTTGACCTGC -TCGGGATCAAAAGCGACATTGCTGAGCAGACTGGCCAAACCGCCGTAAACGGTGGCAGTA -TACATGTTACCACACTGGGTGGCGACCTGGAGACCAGGGTTGACACGCTCGGCGAAGCGC -TTCTTGGTCAGACCCATGAAGGTCTTCTCAACAACCTTGTCGGTAAGGGACTTCTCGTAG -TCCAGATCACGGAGCTCAGGGGCGACCTCGGTGAAAGCGGGGTGCGAGGGGTTGGCGAGG -TAGTCGTTGTAGAGCATACGAGCGTAGGACTTTTGGACGAGCTTGCAGGTAGGGGCGTGG -AACAGAATGTAGTCGAAGCGGTCCAAGGGGGTGAGGGACTCATCGGCGGCACCGTTGGTA -CCATTGCCGTTGGCCTGGGCCTTCAGGGTCTTCTCGCGGGCATCGTAGGCCTTGTAGCAG -GCATCAACAGCCTCGGTGTAGCAGCGAAGGGAGTAGTGACCGTCGACAACGGGGTACTCG -CTGGTGAGGTCGGGCTTGTAGAAATCGTAGGCGTGGGTGAGGTAAGAGGCACGAAGACCG -GCCTCGAAGACGATAGGGGCATCGGGACCAATAACCATGGCCACGCAGCCAGCACCACCA -GTAGGGCGGGCAGCGCCAGCGGCGTACAGAGCAATATCACCGCAAACCACAACGGCATCA -CGGCCATCCCAGGCGGAGGACTCGACCCAGTTGATGCTGTTGAAGACAGCGTTGGTGCCA -CCGTAGCAGGCGTTGACGTTGTCGACACCCTCAATGTTGGTGTTTCCGTGAGGAGCGAAA -AGCTGCATAAGAACGGACTTGACGGACTTGGACTTGTCCAAAAGGGTCTCGGTGCCAACC -TCGAGTCTTCCCACGGAGTTGGGGTCGATGCTGTATTTCTTCATGAGGGAGGAGAGGACG -GTGAGAGTCATAGAGTAGATGTCTACCGGGAAATATTAGCTGATTGATCTCGATCAGTTG -GCATCGGTGTACATACCCTCACGGTCATCACAGAAGCTCATCTTGGTCTGTCCAAGACCA -ATGGTGTATTTTCCGGCAGCGACGCCATCGTACTTTTCCAACTCAGCCTGATCGACACAC -TGCCATTGATATATCAGTATAGATCTCAGATCGATAAGATACAGGGACACTCACCTGAGT -AGGGAAGTAGACCTCAATGGCCTTGATGCCGATGTTTTGGGGACGAGTAGCCATGGTGGG -GTATGGAGGGGCAATGACAACACAAAGGGGTAAAGGAGAGGGAGGATAGTAAAGTTGAAA -GTGAAGGTTTGAAGTAAAAGAAGAAGAAGCCCAGAGAGAAGGCAATTGATATAAGAATGG -AAAGGAGTCTGTGTACTCGGTTCTACCGTGTACGATCTGTCACCACTTAGTAtttttttt -ttttttttttGGGTGGAGAGATCAACCCTCGTCGAGTGCCGTTTTTTGTTCCTTTGGGCC -GGATAATTGCCTAGGAGATACATCTTATACAACATAGTCTATAAGTATGAGTGAAGCTAA -TGTATATAAGCACTATAACCACCTGACTACAGAAGTACGGAGTATCTGGGGTAGCTCCAA -CACTTCATAGATAACATGTACCTAGCAAGTACAAGTACATACAGTGAAGATCCAGCAGTG -TACTGTGTACCTGTGTAGGTCTACAGGGTCGGCTACGTGGCGTTGTATAAAAGCTACGGT -GTAGAACTGAGTCTTATCGATCTGTCTCCACCCTCTTTCAAAGTTCCCCTTTTAGATCTT -TCCTTGTGGATCTTCGTAGTGGCATCACTTCGGACATCTTTGTCACAGAGTAGAAACCTG -CTTTATAGTCAAATACTCCTTTCAATCTCTTTTCTGATGCCTTTGTCTTTGACGTGTATG -TAGCATAATGGAGAACATCTGCATGACACTGCGCATTTCCGAGATCGTATATGaaaaaca -aaaaaacaaaaaaacaaaaGGACACTCCAGAATCTATACGGATTGAAAATGTTCGAATAT -TCTAATATTGGAAGAGGAATGTGAATGCTCGTCTATCTACAATATAATGCTCATGGTGTG -TCATGAAAGTTGCCAAGCGTGGGTCACCTCTCACTCCGGGGGCGTCTGCCTATCTCCTGT -CTCTCTCCTATACAACTCCCACCAAACTCCGGACATACTCACTTATCGGCATATGATGTC -GGACGTCGCTCATTTTCCATCTTCGTACAGATCATGATCCTCCGGATCTCCGCACCCGCG -CTACACCCATCTTACAGCTCCCCGGAGCCCCGGATCTCCCACAATGGGCGACGAGAAGTT -TGATTATCAGGCTGTCCCCATCCCTTCTTACGCTGAAGCCACCGGTCAACCCTCCTCATC -TCGATCCGATCTCGGTGATGGAACAGATGTTGAACGACAGGGCCTACTCGGTCGCGATCA -TGCCTCCACCCGTGGCTACCACCCACCCACCGTGGAATCGGCGCGCTCCAGCCTCGATGG -CCTCGAATCACTGGCCTCACGCTCAGATCGTGAATCGGTGGAGGAATTACGACGCGAATT -AGATCAAATGGACGTGGACGATTCGGCATCGGCATCCTTGACTGGCAGACGCCCCCCTCT -CCGGCAACGTTTTTCTAAGCAGCTATCGAGTCTGGGTCGTACGCTGTCCGCTATCCAACG -ACCTTTTCAACGGTATATGCCTAGATTTCACTTCACAATAAACCTTGGTGACACCAGGGC -ACGCTTAAAAGATCAGGGATGCATCATGCTTCTCCGAGTGTTCGCTCTTTTACTGGTGGT -TACACTAGTGTATGTGTTTTTCGTCGCAGATGTCTTCAATATAAACAGTCGCATGTCCAT -GGGTCAATCCTATAGCGCCGCCTCCGTCGAAAACTTCATCCAAGGTCACATCAACGAGAC -CAAGATTGCCGAAAACCTACGGCGAGTCACCAATTACACCCACGTAGCTGGCACGGAAGG -CAGCTTCGCGCTATCACAATTGATTGAGCAAGAGTTCCAAAATGCAGGATTCGATGAAGT -GTCGCGGGAGGAGTTTCAGGTCTACCTCAACTATCCACGCAAGGATGGAAGGAGAGTTGC -AATCATCGATCCACCCAGTCTAGCTTGGGAGGCAAAACTTGAAGAGGACGACGCCAAGGA -CTTGATCTTTCATGGGCATTCGAAATCTGGCAATGTCACCGGGCATCTGGTGTATGCTAA -TTTCGGAGCCCGGGAAGATTTCAAACTTCTCGCAGACAAAGGAATTTCACTGGATGGCTC -GATTGCCTTGGTCCGTTATTATGGAACCGAATCCGACCGTGCATTGAAAATCAAGGCCGC -TGAACTGGCAGGGGCTGCTGGCTGTATCATCTACTCCGATCCCTCCCAAGACGGATTTAT -TAAAGGCCCTGTTTTCCCCGAAGGACGGTACATGCCAAGTGATGGCGTGCAAAGGGGGGG -TGTGAGTATGATGTCGCAGGTAGTTGGCGATGTTCTCAGTCCTGGGTGGGCATCCACGCC -AGGAGAAAATCATCGTCTATCCCCCAAGGAGAGCCTCGGTCTACCCAAGATTCCCAGTCT -TCCACTAGCTTGGCGTGATGCACAGAAACTTTTACAGGGCCTTAAAGGCCACGGCTCCAA -AGTACCAAAAGAATGGGTGGGTGGTGTCCCTGATGTACAAGAATGGTGGACTGGTGGCCA -AGACTCCCCCGTCGTTCATTTGATGAACCTGCAAGATGAAGTAGAGCGCCAACCTATCTA -CAACATTCACGGCAGAATAACCGGCATGGACGAGCGCGACAAGAAGATCATTGTCGGCAA -TCACCGAGATTCGTGGTGCATGGGAAGTGTTGACCCTGGAAGTGGCACCGCTGCTTTCCT -TGAGGTTGTACGAGCCTTTGGCGAGTTGCTCACCTATGGATGGCGCCCGCTTCGCACCAT -CGAATTTGTCAGTTGGGATGGAGAGGAGTATAATCTGATCGGCTCAACAGAACATGTCGA -AAAGGAGGTCAACGATCTCCGTGCGAATGCGTTTGCCTACCTAAATGTCGACGTGGGTGT -TTCTGGGCCCAATTTCCATGTGTCTGCATCGCCCGTTTTTGAGCGCACCGTGCTGCAGGT -ACTGGGCCGCATCTCAGACCCCTATGCGAACGCGACACTGAAAGACCTTTGGGACCAAAA -GGGCTCAAAAATCACACCACTTGGTGCTGGAAGCGATTATGTTGCATTCCAGGATATCGC -TGGCACCTCGAGCATTGACTTTGGATTTTCAGGAGAGCCCTATCCATACCACAGCTGCTA -CGAAACGTATGACTGGATGGTAAAATTTGTCGACCCTGATTTCCAATACCACAAAATACT -AGCTCAATTCTGGGGGCTTCTCCTCCTCGACCTTTCGGAGAACCCCATGCTGCCGTTTGA -CCTAGAAGTCTACGGAGACTCCATTGGCGGGTGGGTCAAAGACTTGGACGACTTTGCCAA -GTCAAAGAAGGCTAAGGTAGACATGCAACCCATGTTTAAAGCTGCGACCGAGATGAAAGC -CAATGCTGTTACATTCCAATCGTGGAACAAGATCTGGCACGATACTGTGTGGGGTATGGG -AGGGTATGAAAGCAATGTCATGGCAGTCCAGCGGGTGAATCACAATGCTCGAATGGCTGC -CTTTGATACCAATCTACTTGACTTGGAAGAAGGCGGAGGTGTAAGTTGATGTGCTTGAAT -CACCCCTTAACCCTAGCTAACGACTCGTACAGATTCCCAACCGCACACAATTCCGACATG -TCATCTTTGGCCCAGAGCTATGGTCTGGCTACGATGCCTCGATCTTCCCTGCTATCCGTG -ACAGCATCAACACAGGGAATTGGACTTTGACACAATATTGGATCGACCGGGTCGCCAACA -CCATCCACCACGCCAGCGATAAGCTACTCCATTAATGTATTCTACCTTTCAGCCCTTTTT -TTTCTCTTGTCGGTCTTCTCAGCCTCACTTCATCCACGCTTATCGGATTGATCATTTTGT -CACGTTCTAAACGAACACTGTACAGCCTGACGCCTGCCACGTCGTCGGGATACCTCTATA -CCTGGATTTTGGGAACTTCTTGCACACCCATCATATACCATGCATGTTTTGAGCTCACGG -GACTTGGCATATTGCTCTTCTGTCGGTTTCATTCGTCGCCGCTTTTCTTGAATGTATCTA -GCTTCTCATCCTTGTCCCGAGGGACCGGCGTTCATATGGGGATATATCATATTGCGTATC -TACCTTTTAAGAGGATTTTCAGCCTGCCAGTCCGCATGACTTGCTTGCTCACGTCAAGTG -CGCATTGCTTTTTCAGAAGGAAATATTTCGCCAACCTAAGATTCAGGTGTTGCAGATCGC -AGTCTTTTGGATACTTACACCTTCATTGTCCAATTCAAAATAAGCCTCATATTGTCGTAT -TGTATGTTTTATCGTCATCACTCTCACTTGTCTAAATATATCGACCAAGCATGAAATATT -TCAAATTCAACATCAGTCTCCCCTTGGCAGCCCTAACCCTGCCAGGACCTTCATTGGTCC -AACGTCTTTCCATGCATTCAGTTGGTCATGATTCCAGATATAAATCGATCCCATCATTCC -AGCTGAATTTGCCGTAATGCCGTGGAATTCCTTAGCCCGTCGAGGAATTACCATCATCCA -TCCCCTCGTGAGGATGAAATTGTGGGGACAAGGGACGTCTTCCGTCAGCAAGCCTAGCGC -CTCTCTTGCTTGGTGCAATAGACACTGATAAATATCTAGCAGGTGCTCACCATTTTCCAC -GCAGCCGGCAGAGTCCTCAAATCTTTGTAGAAAATGCACAAAAGAAGGTCCCGTTTTCAA -AGCCGTATCATCATCTGGAAAAAACTTGAATCCTTCATCGTACCCTTCACGCTTCCCCGG -CGCTGGGATAACTTGTAAATGTTTATGTTCCCTACTTGACCCAGCTAGGACTGTACAATT -GAAAAAAACGTAATGTTCATCCTTTAATCCACAGATCGCACTCCATGCAGCTTCTATGTC -TTCAAGAACGAGAGGCTCGTGCTGCCGCTGATAGGAGTCCACAGTCAACAATAACAGCAT -GGGACGGAAGACAGGAAACTTGTTGATAACAAGATAGTGGGTGTCATTGACTGTGGTGAT -GCATATATCAGGATGCTCATTTGCTATGTCGCTGCCTGGACCGAAGGACGCGCGCTGTGT -ATTGACCCCCTGTGTCAAACTTGATTTGATTTCAGAGGCGTCCTTGGCTTGGGGCTTGCT -TTTGAGATATTCAACCAAGTGAAAGGAAAACTGGATTCTGATTAGGGGCGGCTGAAGAGA -GTCATGCGGTGAGAAAACATACGGTCATGCCGCCATCGACCACAAGAACCGGCTTGTTTG -GCTGGTAATTGATGATACCGTTGGAGATAAGCCGGTCAAATCTTGCGCAGATCTCATCGT -AACTTAAGTTCAGTGGCATTTTGATAGCCAACACAGGGATCGGAACAAAGAGGCTGCCAA -GTTCAGACAGAGGGGTAGACGATACACATGTCACGCGACTAGGGCATAATATCAGCGCTA -TAGCAAAGATTTCAGCCCGAACATAGTCTATGCCATACCTATGCGAAAGTCTTCTTTGGG -GGCTATGTCATGTACACAGGATTTATTCCTTGGGCTTCAAATCCTGGTGAACGTGCCGAG -GCTATTTGGCAAGGACTGAGCTCTGCCAATTCTAGATAAGATGGACTATATCAACATTTT -AATCCTGGCCCTTACACCACATGTGTATGAAAAGAGTTTGGCCTAACGAATGAATGAACG -AGCGTTGCCGCTTACTTTTCCACTTTCCCTCACAGATACCTTTGGGCGTCTCGTATTTTT -ACCCAGGCGCAAGCCTGGAAGTGAGATGAGGCTACTTCGTATTCCATACAGATATAGATC -AAAAAACCACATCCCTCTTACATATGCTGCTTCGCAGACTAGGCACCCTCTACTTTCATA -ATTCATCATATGATTTGTATCTTCTTTCCTGAAAGTCATGTCTGAGCTCAAAGACCCCCT -TGATCTGGCCGAGACTGGAGATTTATCCAAGTTCTCAAAGCTCGAAGCTACACTGGTACA -CACCGAGGTGGAGCGGCGGCCAAATTCAATATCGCAAGACGGTGTAAGTCGCCATGCTTT -GACTGCAGATCTTGGCGTCAACGAATTGGATTTCGATTTCAACCTTGCCACAGACCATTA -TCCACTAGATCATCTCAGGTCTCTCCAGCAAGTCTTTGATGCTGTGCAGTACCGTCAGCA -TTTGGCTCGAGACGGGGACTTCTCGTCTACAACTCGCGTGGAATGTTTCAAGGCTGATCC -GCAGGGAGGAGGTACGTAGGTACCCGAATCAGAAGGTTTCTGTTTTGGAAGCTGATATCA -TATCAAGAATGGCAAACACTTGATGAAACCAACATGGATCAACTTTTACAGGTGAGGTCT -ATCAATCTTATACCTGGAACTGGAGCTAAACATATCAAAGTCCTACGCTATTAACGCCTC -CTCACCTGTTAAACCACACCCTGGACGTAAGATGCTCGTTTTGTGAGTGTTTTTTTCCAC -CGCTCTGAGTATCATACTGACTTGTTTAGCTTTACACCATTGGTGCCCACAGACGGTACT -ACATACGGGTCACGCGTTCTAATGACTCAAAGAAATGCGAAACATCTCTTCGAGACATTA -CACGTCAATCCTTTGTTCCTGTTGAACATGATTGGGCGGCCAGATTACTGGGCTCCTCAA -ATGCATTGGGAGACAGCTGATGATGGGAGGCTGCTGGCTTGCGGTAAGAGTAGAAGCGTC -AGGCGTGAGGACGACCGCTAACATCACCAAAGACCTTTTTTGTCAGCATCCTCGTTGGAA -CCTTCAAGTCCAAGGAGCTCCGCTGTCTGTTTACGTGCGCCATGACATTGTTCGGGACTT -GACAACATACATCATTTCGCATAAAGAATTTGATACCTCCATCACCGCGCTCAAATCTAT -ACTGAACATTGCACTCAATGTCACCTCCACAGTCAAAAACCCAGCTGTCTTTTTGGATGG -TCCATTTGATATCCATGCCATTCTTTCCACGCTTTCCTTCGAAGCTTCTAAATTCCACGT -CAAGCGTTTCCAGCGGTTCATGTGGAAGCAGATGAACAAAGTTGACGATCACCTTACTGG -CCTCGAGTTTAGTGATCGTGCTAAGCTTGGAGACTTGACAAAACAGCTCCAAATCATGTC -CTCGAATGCAGATTCACATATCGCCAATGCTGATGTAGCAATCATCACGGCCAAAGCCAT -TCGAGATGCCCACTCACGTCTCACTCCCTCCCTCCCCCGATCTAACCCTTTTATAAACCA -GCGAGCCGATGGTTCAATATCATATATCATCTCTTCCATTGAGAAACAAAAAATCTGGTT -CTTGAATTACAAGCAGCGCAAGGACAATGCCATGTCCCTGGTTTACAATCTTGTGACCCA -GCAAGACGCCATCAACAACATTCAAATTGCTCATAGCATGAAGCAAGACTCTACCTCCAT -GAATGCAATCGCTGCCCTGACAATGATCTTCCTGCCGGGTACCTTTGCAGGCACAGTGGT -TGGTGCAGGAGTTTTCGGGGGTGCTATCAAAGAGACGGTATGGCTAATTTGGGTTGCAAT -TACATTGCCGCTTACGGTAGGAGTCATGATTTGCTGGTGGTTGTATAAGAAAGGAAAGCG -GCCAATGGCCAAGGGTAGCACTGTAATAGAGAAAGACAGGGCTAATTCCGCGGACATGAG -GATGAATTCACCTAGGAGGAGGTCCAGTGTTTTTTCCAGTGGCTTTTCCTTCCGAAGGTT -CAAGACTGGTATAGGTGAAGATATGTAGTTTAGTCTAAGTTCTTGTTCTTCTTCTCGTGC -GGGTATATGACATCAAATGAACAGCGTTCTTCTACCCTATCCATATGGTACTCACACATG -GGTATCATAAAACATCATACATCACAGTTCCACTAAGCTAAACAGGAGGCAAAGAAGGCT -CAATCGCCAACAAAGACGGGTGATCCGCCAGACTCTGCCAAAACCCGGCCTCGAAATCCC -ACAAATCTTGACTTAACAGCGACGCCCATTCATCATTGCCCTCAACCCCAGCTTGCTCAG -GTAGAGGCTGCGCCGGGTCCGAATGAGTCGAGTCTAGAAAACGTTGTATAGCCCGTGTAG -CCTTAGAAAGCAACGCGTAATTCGGATCGCCGGCTTTGACAATTGTCCCAATTTGGACCT -CCGCTACAAACACGCTAAGATCTTGTAATGTCCGTGTCCGGTGTGCTCGTGGCATGTGCT -GTTTCAGCATTGCCAGAAGCATTATCCCCGCAGCGGGAAGACCGTAGTATGCAACCTGGT -TAATTTGTATATTAGTTGACCGCTTGAGTTAAAGGTTTTAAGGACAGACAGAGTATATCA -TATGAAACCAAGTCAATTTCACACATACCTTCCAGATGAGACCTGTCCCAGAATTGGTCA -GCTGATCCCGCAGCAAAATAATCTCCACCACAAGAGTGAGCATCTGACCCGCCGTCTCGA -TAATCGGTAGATCGGGCTCAGTCGGTGATTTAAGAAGCAAAAGGCGAAGCAAAAATAGTA -CATGCAAATGGTGCAGTCGTACGCCAGCGACGAAGTCACGCTCAAAGGGGCCCTGTGTAC -ATTGCTTCAGACTGCTCTCGAGTCGGAAGTGGGCTGGTAGGTTTTGCCATTGCGTCTCTG -CTTGGCGTTGGATCACGCTGTGCCATTTCTTGATTAGCCATGGTTCCCACTGAGATAGAT -GGCACAGAGGTTGCTTTTGAATACCTTGCTCGTTGCGCGCATGACGGATCATGTTTGTCC -TGAAGAAGCTCCCAGATCTCCTCCTTCAGCGACGCACACAGTGCAGACCACCGCGTATCA -GCCCGGTACCCAGACTCCGCATCCGGAGTCCAGTCTGCTGAGCCTTCTTCTGCGTTCCGA -CAGGATGGTATCTGGAAGTGGCAGAAGCGCTTGTTCATTCGTGGTGGACGGCCGAGGAAT -ATTGCTATATTCTTATCTGCGGAGTAGATGCGCGCGAAGGCTGTTTTTCGCAGTTCTTTA -AGGAATGGCGGGAGTCCTGGTTTTATTCCGATGTTCTCGTGATAGCCCAGGGCATATGTG -GAGGATATCGCGTCGCCGAGTTTGCGCCAGGAACGATAGCCTTTGAGAGTTTAATATGCG -GCTCAAAGAGATATAGTAGCTGTGCTGCGGATGATTGCTGGTAACTTACTCTGGTCTCCA -TCCACGTAGGAATGAACGATAAAGTTCTCGTATTGCATGATCAGTTGTAGGTCATTCAGG -CAATCTAGAGATAGGGATATCTCCAATGCGAAGTCACAAAGTTGGGTACATAGTCTTCGG -AGTGTGTACTGCTCCTGTGCAGTCATATACAGAGACGGGAAGAAGGAAATATCAATTGTC -GCTCGACTGACAGCAGCAAAGAATATCCCCAAGGTCTCCCAACGAGAATTGTGATCAAGA -AATTGAGCCGAAAAATCAGCCAGGTCAGAGGTCTCATTAAAGTGCAGCGGCTGGGCCGAG -TTCTGCAAAAGTCGCCTTGCATAGACTAGATGCCAGCTGTCGTCGTGAGAGGTGAACAAT -TGGCTCACACTCTGCGCACATTGGTCGACAAATGGCTCTGCCAGGGCCAGGTTGGCTCCT -TTAGCTAGCCAGAACATGACGAGGTCTTTCATGGCGGACAATTTGTACGTCGTCAAAAGA -TGTTTGAGGATATCTGCGCCTTGTAAGACAAGATGATTGTCCCCGAATAAATCCGAGTGC -AGATCCTCTGGTACTACTTGATGACTTCCGGGGCCATGGTCTCCGTCCGGAGTGATATGC -TTGAATATAGCTGCATGGCTGGACGACCCTAGATACCCAGGATTTGGGTATGGATTACGT -CTTGGATTGAATGACGGCGATTCATCCGGTGAACCAAGGCTGCAAAAATATAATTAGCTA -ACCCGTATACAGTCCAGGTCAGTATAGAACCAACCTTTGTTTCGATTCGACTCTTTTCCG -CTTAAAGGGCGTAGTTCTGTAGATGCAAATCCCCTTGCTAGTACTACAACGCGTGCAAAC -AGGCAGTTGGTGGTCGCATGCCAGCTTAGCCTTTCTGCAGGCTTCACAGGCAGCCAGACG -CGACATGGCTGCTCTGGCGGAGTTTCGTCCGCCAGTGATGGTAAAAGAGAGAAGTCGGCT -AATCGCGGGGAGATCGGAGAGTCGGCGTGTCGGCGTGTCGGCGATAAGATGCGGGGATCC -ATCTATAAATACTTGCCTTTCTTTTTCTCCAACATCAACATCACATCCATTATGACTCAA -GATATTAAGCTCGCAGATTATCTGTTCACTCGGTTGCGCCAGCTGGGCGTTGACTCCATG -TTCGGCGTCCCTGGGGACTACAACCTTCGCCTACTGGATTTCGTTACTCCCGCCGGCCTC -CACTGGGTGGGCAACTGCAATGAACTCAATGCCGCCTACGCGGCAGATGGATACGGTCGC -ATTAAAGGCCTATCTGCGCTGATCACTACCTATGGCGTTGGCGAGCTCTCCGCTATCAAC -GGTATTGCAGGCGCGTATGCCGAGAACACACCCGTGTTACACATTGTCGGAACCCCACCA -CGCCCGCTCCAGAGCGCGCGTACATTCATGCACCACACTTTCTCAGATGGCGACTACCGT -CGATTTGCGAACATGTCGAAGCATGTCACTGCAGCCCAAGCCAGGTTAGAAGATGCTACC -ACGGCGCCGGAAAGGATCGACTATGTCCTACGCCAGGCGTTGATCCATAACCGTCCAGTC -TACCTCGAGGTCCCCGATGATATGCCCGACGTGCTGGTCTCGGCCGCGAACCTAGAAACA -AAGATCTGCATTCCACAACCACCACGTTCTGCCCGAGAGCCGGAGGTGCTGGCTCGCATT -ATGGAGAGAGTGTACAGCGCCAAGCGTCCATTCATCTTCGTGGACGGGGAAAGCACAGGT -CTTGGTATCGTCGACCAGCTCGATACTTTGATCAAAGCAACCAACTGGCCCACATGGACA -ACCGTGTACGGCAAGGGCCTAGTCAATGAGCAGCTGCCAAACGTGTACGGACTGTACGCG -GCAGCCTTTGGCGACAAGCCAGCGCAGGAGTACTTCCAGGAAGCAGACCTGGTCCTGACA -TTCGGACCGCATAACAGTGATACCAACACCTATTTCTACACATCGATCGCCAAGCCCGCA -GTGGCAATCACATTCTCGGGTAGCACCGTCCAAATCGAGAATGATACTTACCACGATCAA -TCCGCCCGCAACATCCTATCAAACCTACTCCAGAACCTCGACTCCACACGCCTAGTCAAG -GCCACCGGACCACCAAAACAAGAAATCACCTTAGCCAACATCCAGAACACAGATCCACTT -GCCCAAGACAATTTCTACCGCCTGGTAAACCCGCTCTTCCGAGAGGGCGACATAATCCTC -ACCGAAACAGGCACCGCCGCGCACGGCGGCCGGAACTTCAAACTGCCTGCTAAATCTCGC -CTCTTCGGTGCGGTGACATGGCTATCGATTGGATTCATGCTCCCGGCAACACTAGGCACA -GCGCTAGCGCAGCGTGagcaaaacaaaggcaaagaaagcaagagcCAGACGGTGTTGTTC -ATCGGCGACGGGAGTCTGCAGATGACAGCGCAGGAGATCAGTGTGATGATCCGTGAGAAG -CTGAATATCGTCATTTTCATTATCAATAACGACGGATACACCATTGAGCGGGTGATCCAC -GGACGCAAGCAGGTGTACAACGATGTCCCATACTGGAGACATGCGCAGGCGCTGAACTAT -TTCGGCGCTAATGAAGAGCATGTGGCCAAGAATACGTTTACGGCGCGGACTTGTGGCGAG -TTGAAGGAGGTTTTGGCCAACGAACAGATTCAGAATGGGTCTGGGGTGAGGCTTGTGGAG -GTGTTCATGGGCAGGGAGGATGTACAGGGGGCTTTGCTGTATCTACTGAACAAGCAGCTC -GATGAGGAGAAGCAAGCAAAGCAGCAAGGATAGATCTAAAATGAGACATAGCTTGTTACA -TAGAGAGAATATATGTATATACGTTTTACTACGATTTCCTTGAATGGCAGTTTTGATCCA -ATAGAATGATGTCTTACTATGATTATTGCGTGTTTCGCTCTTAGTACACGAAATAAAACC -CTTGGAGGAACAAAATCAGGTTAAGTGAACGCCGATTCCCTATGCTCCAGATCCGCCATG -CACACAGAAAGTAGAAGAAACGAAAAGGAATGAAACGAAACATTTGAAAACAGACATCTG -TTTGACATGTCACGAGGAATCTATAATTGATCTGTGTCGCCGAAAGAGGGTTGCTCTCAT -GTTGAATTATTTGCGGAAGCTTTCAACCCCCAGGGTTGGGGTTTCCACCATGCTATCAGT -GGGTACACCCTCGTCACGACGACGGGCGAAAAAGCTATCGCATTCGGAGACAATGTGTGC -CCACTCCGCAAGGGCACGGCCGGCATATTTGCTAGGCACAAGATGATCGTAAGAAGGCTG -GGAAGGAATTTCGGGTAGTGGGGAGTTCTTGGTTGTAGGTGTTCGCGGCCAGGTATCTGA -TGTAGGTTGTGTAGGGGTATCTTGGTTGGTGAGGATTATTTCTGGTTCTTGTGTATCATT -CTCGAAGTCGGACTGGCCCAGATTCTCTGATGCTGTGTCATAATCCGACTTTTCGGGAGA -ACGCCGATGCTCGATGTGCGATTGGGACGCGACAGGGAGACAAGGGGTGAAAAGACGCTT -GTTTTTAGTGGGCCATTGCGGACGATCCATCCATTCCAAGGAGAATTTGAAAAAGAATGG -CTGGTGGGGAACTTTGGGGCGGGTGATCTCGTCCATATTGTTGGTCGGAGTGTGTTCGTG -TTCATCCCTACATTTGCTATCAGCAGTCATTGTTGGATCTGATACGCCATTCTCGGAATC -ATCAGAACTGCTGCTGCTGCTGACACCATCGCCGGAACGTGCATTGGAAGAGCTGCCAAA -CATAGCCTTGAGTATTCCCCACCGTTTTTTAGGTGGTTGACCATCTGAGCCATTGGAGTC -GGAAGATCCGGTTCGACGGTGTGACATGAGTTGGTCTGAAGGAACGGGGGGAACGACTCG -GTCAAAGCTAACGAAGAGATTGACAGGAGACAGTTGATTTTCGCATCGGATAATGATGAT -TCTTCGTCCTGGGGCCGGTGTACATGGAGCAGAGGACAGGGGCGCTGTCATACCCTCCTC -GGCCTTAGATTGGAACCCGATGTAATACTCCCAGACTCGTAGGAGTCGATCGTAGAGAAT -TTCATAGATGTTGGAGTCAAGTGGTGTGTTTTCAGTATTGAACCGCCCAACCCTCCAACA -CAACAAACGATGGAAATAGGCTCGCACCATTGGACTCCAGTGAGTGAAGTAGTGATAGAA -TACCGGCTCATGTAACAAGAAATCCAGACAGATAGATGCTTTTCTTTCTTCTGTACCAGT -CCAGGTACCCCAGACGGAAAAGAGAAAAGAAAACACTCGCACCTCAGTGAGCGAATTGTG -GCTCTGCATCATTTGGCGACAGACATCAAGCCAAAAGCCCCAATCAAACAGTTCCGTTTC -AATGGACTGAGCGTAACGTGTGATGATTGGAATGACCTCCTCGAGAAAGTCACACAGCAA -AAAGCATGCGTTATGATCAAATAGAGACATTTTCTGCGCCGCAGCTTTGATGATGCCATT -AAATGATTCGGCAAACAATAATCGAGCACGATTCGGCTCCACGGACGATTCAGACAAGAG -GTCTCGGAGAAGGATGATCAAACGATTCTCTGCCATCGACCGGTGAGTGTTGCTACCATT -TCGGAGATGTTGAGTGGAGGCAGAGGCATCTGGTGACTCGATGAAATCATCAAAAGTGAT -TGCAGCAGCCGTGTGGGGATTGTCGGATCCTTGTTGTGCCGATTGCTTGTAGATTGTGTC -CTCCAGAACGAGTAGGAGTTGCGCATGTACTATCAACAGCCCCGGTGCTAGTACCCGTTT -CGCCTTTGCAGTTTCCGGGGGCAAATACTCGGCATACAGAATATGAATATATTTAACGAA -GACAAAGAAGAGATCTGTGTCGCGCCCAGCCCAACGTGCGACCCAAGGACGATTCCAATT -AATAGGAGTGGTGTTCAAGGGTAGCTCGGGTTTCTGGCGCAGATATCGCAGCAGAGGGGC -TTGTGAGTGAAACGCTAGGGGACGAAGGGCCGGTGGGAAGCACATAGCAAGTTCTTGCGT -GTATGCACGGATGTTTCCAGTGCGCGACCCCGTCGATTCTGACAAGATGCGTCGGAACAT -ATGGGGAGGCGTGCACCACAGGCGAACCAGGATCTCCGGCACTCCGGGGCAGAAGAAAAA -GGCATACGCGCAGACCTTGCCGCAAAAAGCGACCAGGCTAGCTGGTGCATGTCTCATAGA -CATTCGTTCGACCACGAACGCCATTTGAGAGAGGAGATTTTGGATAAAGATGTTCCGAAT -GTTGTGATGAATTGATTCAACCAGGAAATCGGAACCCGATGAACCTGATCCATCTGATGT -TTCCGACACCGAAGTTGACGCGTACTTCAATGAATCGGTAGGCCCACTGCCATTCTGAGC -CATAGGAAAGGGGATGCGCCATTCTGGTCGAGCCATGATGCCTACGATGGCTTCTAGAAA -TACCGGTCTGTCGGTTCCCGAGATCGACTGGTTATGCTTGCCGTTGAGCATCTCCAACAT -CCCAATCCACCATTTGTTGAGAATATTGACTCGGCGATCTAGGTCTTCGGGTCTGAGATT -CTTGCATGAGGCGTGGAGCTGATGGCGGCTGAGGAAGGGGAGGAGCGAGGTACGGATGAC -GTTGGCTTTCAGCGAGCTAGATTTGGCTTGAAACCTGTTTCCTGGTCAGGAATTGTAGTG -TGAATGGCTAAGGTGGTTGTACAAACTTTTGGAAATCCGCCTCCAAGCCACGAAAGATAC -CCCAAAGCTCATCCTTCTTGCGCGTATGTTCGGTACCGGAATTCGCATACGCCGCGGCAG -TTTCTGCTGCTCCCTTGGGCATAGGTGGTAAGACCGGGGTCGAAACGGAGAGTGCTGCTG -TAAACTTTTCGAATGAGGACCGGTGCCGTAGCGAATTTCCAGATGGACTATTTGGAGAGC -TGGGGCTGTCCTCGGGGCCCTTGGCCACAACAGCCAGCATTGGCCGGCGCGGGCGTTTGT -AGAGAATCGTTCGTCTGCGGGAGGGGGGCTGAATCTGATCATCTTCTGCTTTTCGGCCGC -TGAATCGAGTTGGGGATTGAGAATCTTGCTGCTTGGATCTCTTCGCCGATGGGGACGGAG -ATCGAGAGGAAGAACCCCACGGCTTCCTCGCAAGGCTGACTATCCTCCCAGACATTGATC -GGCTCTTAGATGGCCTCACAGCGGGCTCCGGAGCCCGTGTTTCGGGCACATTCGAGTCCG -TATCCTTCAAATCCTCCGATGACGACAGTGTGAAACGTGAGACGGAGGCTGACTTCGACT -TGTCTTTGGAGCGAAGGGACGTTCGGCGGAGAATGTCCTTCCCGGCATCCACCGTTTCCT -TTGTGGGTGACTCTGCCAGATTTGGAACAGCATAATCAGAAAATGTACGTCTTAAGCTGG -GTTCTGTGGTGGTTTGGCTGGGAAGATCGGTGTAGCTGGCGGCACGAGAGAGCTCGGGTG -TTTGGTCATCATTCGTGTAGCTCGGTGTCTTGCGCGAGGGCGAAGACATTGTCTAGGGAA -TAGCCGGGGGAGCAAATCGGGTATCGAGGGGATTTATTGGCTCATTTCCCCTTAAATCTC -AAGATTCTGAGCGTAGAAGAGACCAGCTCCTCTGTGCACCAGGAGGAATAATGTCGAAGT -AGTCAGGCACGCTGCATCTGGGGAAGAGCAAACAACGAAGTTCCGACGGTGAAACTGTGA -AACAGATAGGTAATAGAGGAAGACCAGGAAATAACGGAGTTTTGGGGTGGATAGATCCGC -TTAGAAGCTCTGATTCAGCAAGTGATCGACGGGGGGGAATCTCCCCCAAGGGCCTTTGGG -TCCGGGTGAAAGTGTGTGGGGGGGGTCTGAAGGTAATAATATCCAGGGAGAAAAGAAGAG -GAAATTGAAATAGTTCCAAGGGGCTGAATATAGAAGTCTACTTCTCTGTAGTTATTTTTG -GTTAATTAACTCGAGACATTTCAGCCTATTATCATCTCCATTATCTATAATACGAGCATT -ATGTAGCTCACTTTATATTAAAATCAGACGTTGCTTTCTAGCTGAGACAAATCCAAGGCT -ACGGACTACATATTTCCTTTTGACTCCACCTTTTTCTCTTCTCTACTCCCGGACTCCCGT -ATAGCCATCGCGTAAGACTACCATCCACGGAGGTCAGCTCAAGTCCAGGGTCTCTTTGCA -TGCACCGAGTCTGATTTGTAGAGACGTTGGGCTCTTCAGTCCCAATTACAAGGCCTCTAT -AGGCCAATCGTGTCTCCGGAACATAAACAGGGTAGAATTCACCGAGTGGGCTATGACAGC -TGTACAATCGGTACTTCGTGGTTGGTACAGATCTCGTATCTCATTCTTGGCCATGGAATG -TCAGAATGTCGTGCCGTGATTACCCTCTTTCTCTACGGGTTTAATTTTCGCATAACTTGT -GCAATGATTTCTCAATAGACTATAGGAAGAACGGTGCTTTTCGCAGGGTCAAATCCCTCG -AATTATGCACTTTCACTATCATAAATCTATGCGGGGCGCTTAAATCGCCTTGCCGAAATA -GCCTTGTAAATGGCGTATGTTCCCAGCTACCGTATTTAGTGTCttttctttttttttctt -ttttcttgttGCCTGTATCTGAGAGTGCCCTGTGTTGGCACACATCGCACAAGCCCTTGG -CAAATGTTCTTATCGCGCCCCCTGAGAAAAGTACAGTTGGACAATGGCTTGAAGACTATA -GCAATCAATTGAATAATTTTGTTCCTTGTTCCATTATACTTTGGATTTAGCTTTGCCCAT -CCCGGTTGATCCCTCCAGGGGTATTCCGTATATCAAATTGGATGTGAAAACAATGCTTGC -ATCTGATATCTCAATCCAGGGAGATTTGATACATCAGATCATAGTACTCCGTACTCCCAT -GAACTTCTGTTCGTCACTCCGTATTCCGTACATGCTGATCTATCGGCTGCAGCAGTGGAT -CCCAATAAGACTAATTAGTACGTACTAAAAGTTCCCTTAAATGGACATCCGTAAATCTTA -TGTATTCCCGAGGTCTGACCAGGCACGGCGTACTCCGTAACGGATGATTGATTTCATGGG -CATGACAATAAGCCTGCATAAGCCATGCATAACACATTTGGTAAACCAGCAAGCCCATGT -CATGTTCCGATCAAGTTATCTGTTTGGTTCATCTAGTGTATGCACGACAAGCGATTCCCT -AGACCGGCTAAGACTGTGGACTAGACTCGGTATATGGAGTGCCAGGGATTTATTACCCAC -AGCTCTTGTCATCACACTTGACCAGGGATGGGTCTTGCTATATTCTGCCTGTGCTGTGTA -CTCTTATCACGTGGTCAATTGTGCAGGTCAGCAATCTAGATTATTCTATGTTATATGCAC -TTGTTCTTGAAACATTACATCTGGGGACTACAGATACCGTTCCTGGTACTCAACCGAAAA -ACTGAAGATAACACAGATGACCCTAGTCTGGAGTGCGGCTGTTGCACCCACCAAAGACAG -GTTTTGGTATCAGCCACATCGCGTAGCCCTTCCAGCCTGAAAAACTTTACTCTGGAGTCC -TGCCCTGGACGAACAGATCAACGGTGACGACCAAGGAAAAAAAGAAGTCTCAACGTTCGA -AAATGTGGTCTAATTTACAAGCTTCAGGACCTCGAGAGGCGGCCACATTAGTTGCGTCAC -AGGCATTTAAAACCAGCCTTCTCCCCATATTCCCCCAGACCTTGCTAAAGAAACAAGAAA -GGGAAATTTAGTGAGCAGCGGGGGCCTCCTCATCCTCCTCGAACCAGCTCTCCTCGACGG -GGGCGGCCTCGCCAGCGGCGGCCTGGCCCTCGACCTCGACCTCCTGGCTCTCGTAGGCGG -ACAATTCGCTAGCGAGGTTGGCCTCCTCGGGAGACTTGGGGGCCTTGGGGGCGGTAAACT -TCTGGACGTGCTCTTCGGCATCACCGGCCTTGAGAGGGGTAGGCTTGTAGGCCTTGAGCT -CGCGGAGGTACAGGTCCTGGACGGGGTCGGCTGCGGCGGGTTGCGGGTTAGCGATGATGC -GACCGATGAATATATCAGGGATATCTATTTAATATGGACGAAAGTCCCCGGGTAAGATAC -GTACCCTGGCGGACGGCGGAGGTCAGGAAAGTGCGGCGAGCGACCGAGACCTGTGCCCGA -GAGGCACGGGAGAAGAGGGTGCGCTGCTTTCGCGTTAGTTGAGGGTGGATGTAGACGGTT -ATAAATCAATCAATACTCACCGAGGCCCGGAGAGACTGGGACATCATCTTGATTGAGGGT -GTAGATGAGGAATGAGGGGTGGTTCTCAACAAAATAGATCTGATCCGTCTTTCCGTGAAG -TTGGAGGTTCAAACGACTTCTGTTCGGAAAGGTGGCTTGGCAGATGACTCAGCCGGGCCA -TGTTGTTCCCGAATCTTTCCAGGACTCCAACACCTGCTTTTTAGTATAGACACAAGTTAT -CAGCGATGTTTTAGATTATTGATGGAAACTGAGCGCTTAGCTTGGATTCAAGCAAGGACA -GTGTGACCCCAGGAGTAGAGGACACAATTCTCTGAACTTTCTATCTACGCAAATCCCCGA -GACTACCCGTATCTGAGTATCTGAATCTGTTGACTTAAGTGCTTTATCTCATCCGCAGAA -GTCGACGTAAGAGTCAAATCTAAATTCCAGTTGTCCTTTCTTGTCTTTGATCTGTGCACA -GCACTCACCGAGAACAAAGAACAATTGTTTGTTATAGAACCCTGGAAAGCTTCCTATACT -TCCTCGACCAATGGAAAGGCATAGCGTCACGTGACTCACTTACTAATCCCCATCTGGACT -GCGCCTTTCTCGGGTGTCGCCCGGTCGCCCATTGAAGCCTCACTTCACATCGTCTCCCTC -CTCCACTCTCCCCCGGTCTTTGGCTTCTTCTGTCTCTCCTCCTCAGCCTGTCCCCATTTT -CGTGAGAGACTTCTTGCGGAGACCTTGACTGGTTCAACACGGCTGGACAAGCTGAGGCGA -GATCTCTGTTCTACTTTCGATCCAAGAAAGCGCAGACTGGTGAAGGGTTTTCTACGACTG -GGGGGTGACTCCTTTGAATCCCGAGGGCTCTGGCCTGTGAAAGGTAATGGACGCTTCTTT -GTTTGATACATCTTGGCATGACATTGTCAGAGGTCAAAGAACTAGATAACGTTGCGATAC -GGGAGACAATGAATTTGCCTTGGGCTGAGCTAATTTGACTGCGCACAACAGTGGCCGTGA -GCGACTGAAGTTTTCATTTTCGACCTTTTCCCTTGCTGAAGGGGTCCTCTGCAGGTGGCC -CGCTTTCGTTTTCCTCGCTACAACCGCCAAGCTGTATTCGCCACTTTTCCTTCGCTACTA -TAATCGAAAATACCATCTTCCCCGCATAGCGCCAAAAGTCCTTGCACGACGCTGCCACGC -TCGATACGCAAGTTACTTGAAACCCCACAATATGGCTACTTCTACTCTTGCGAAACCCGA -GGTCGCGGCTCCTTGGAAGAAAAACCTGTCCGCCCATCTCGTTTGCCCCGAATGTAAAGA -GGATCCTCCGAATCTGGAATTTCCAGACTCCCACGAGACAGTCTGCGGTTCCTGTGGACT -TGTACTCGCAGATCGTGAAATCGACCTGCACTCTGAATGGCGTACCTTCTCCAATGACGA -TCAAAATAATGACGACCCCTCTCGTGTCGGAGACGCCTCGAACCCTCTGCTCAATGGCGC -TCAACTGGAGACTTCCATCGCAAGTGGAGGCTCTGGCCGTGCCCGCGACCTGTACCGCGC -GCAAAACAAGCAGTCGGGCGAGAAAGCGAACAAGGCTCTCCTGGCAGCTTACAAAGAGAT -TGGTGCTCTTTGCGACGGTTTCAACATCCAGAAGACCGTCGCCGATACTGCCAAGTATCT -ATTCAAGATGGTCGACGACGCCAAAGCCTTCAAGGGCAAGTCACAAGAAGTCATTATTGC -TGGCTGCATCTTCATCGCATGCCGTCAGTGCAAGGTTCCTCGCACTTTCACCGAGATCTT -CGCAGTGACCAAGGTTACCCGAAAGGAAATCGGGCGCATCTACAAAGCTTTGGAGAAATT -TTTCACCACCCAAAATGTCGAGCGCCACAATGCTGCTTTGGAAAATGGCGAAACTCACGA -CTCTGCTGGCGATTACAACGCGACTACTTCTACCAAGCCAAGTGATCTGTGCAATCGTTT -CTGCAACCTTCTCGACCTTCCTTACCAGGTCACCAGTGTCTCTGTTTCCCTTTCGGACCG -CGTTACCGCCATGGGTGACTTGGCCGGACGTTCCCCCCTGTCTATCGTCGCGGCTTGTAT -CTACATGGCGTCGTTCCTTATGGGCCATGGCAAGTCTGCCAAGGAAATTTCCCAGGTTGC -ACACGTTAGTGATGGAACTATCCGTGGTGCTTACAAGCAATTGTACGCGGAACGTGAGCG -CCTCGTTGACCCTGAATGGATCAAGGGTGGCAAGGGTGATATGGGCAAATTGCCTGTGAG -CTGAACAATACATCGAATGCAACATTTTTTTCGGACACATCTTACGGCATGGCCAATCTG -CCTGTCTGCGAAGAAATTTTATCaaaaaaaggaaaagaaaaaaaaaaaagaaTTATCAAC -ATCTGTTGTCTGAGGCAGATGGATGTCTTTGAAACGTTATCATCATATTAACTCTCGTGC -TTAGGATTTTATCGAACACGCCCGCGGGCTCTATTGGTTTGTACATCATGGATAGTAGTA -GATTGAGTAACAAAACTATCATTGCCATGACATTGATCCACGTAGATGCCGCTTAAAGCC -TACTCACTCTTTACCTGTCCATTTAGGTGCCCCATGTGGCCTATCATTGTAGTCTTGTAG -TAAGGACACAACCAAGTAATTTTTGCAACTCTTTTCAAGTGTCTTTTCAGATGTATATTA -AGGTGGTGGAAATCATAAGCCGCCCAAAGGGCTTTCCTATATACAGACTCGGGATGTTTA -TGTAGAGAAACAAACCCAGGGAACGTAAAACACAAAGCCCACAAAATCAAATATGAAATA -TCGGTGCACCTTAGAAAGCAAAGCAGCGCCTGAACTCAAAAGCCCATAAAAATGATGAAC -CAGCACCATCTCACAACCTATCAAATCATAGCAGCCATATCACCGGCAAAGCAGGATCCC -TATTTCTTGAAAGCAGCTTCAACTGTGCCATCATCCTTGGCATGGTTGAACGGCTTATCA -TACCGCTCCTCCATCTCTTGATTGTGCTTCTTCACATCCTCGGGAAGGTCCTTTTTGGGA -TTCCCTTCTTTATCTATAACAGGTGACCGCTCTCCCTCGTGGAGGGTGGGCTTTGCCTTG -TTGGAGGGGATCGGGCGGTTGGCCGAGTCTGGTTGGTTGGTTGTGGCTTGCTCGGGGTCA -TTTGAGGATTTGGTCGCATAGCTGCGGTATCGGGAGGTGCGAGGTGCTTGGGAGCGTGCT -CCGGAGAGAGCTGTAATTGGGGATCGGGCCTGGAATGGGGTCTTGAGGCGCAAGAAGGTC -GTCATTTTGGAGGTCAGGGACTGGATTTTTTTAGGGTGGATATTGTAGGAAAGTGAATGT -GGAAAACCAGTGGAATTGACTTCCTATATACATAAGTTCCCCTATGAAATCAGGTTTCTG -GGATTGGCGAGATCATTTTGCGATGACGTACTCCGATGTCATACCCGTGGACGTAGCTAC -ACGCCAAGAGTTTCCACAACTTTATATCTATTTTTACGATTTTAGAAGATATCGGATGTA -TATGGAAGGTTCTATCTACAGTCTGCACCCTTCTTTCGAATGCTGTTCGCATCTTCCCGT -TGGTGCGAATATATAGACGACACCTGTAAAGGAATAGGAGCCCAAATAGGAAACAGGCTC -CAAACGGCATCGCTGCTTATACATTGTTCCCTCGTATCTCAAGTTACACAAAATGGTAAA -GCTCAATTTAAGAATAGGTTCAGCTGAATAGAAGTCCTGCTATGCAAGATTCAGCAATCA -TAAAATACAAACGAGCGTAATGTAGACCCATCAATCACTAGTACCAGAATAACAACAACA -TTTGGTCATCTTTGTAGCTAACCAAAGTCCTTCGAGGCAAGATCGAAGCATGCAACCGGT -TCAGCTTTCGCCCTCAAAGCCCATAAACACTGCTTGTGCCCAATGTACCCAGTTCCACCC -CGCAAAGAACGTTGAAACTAGACAAACGCTCAATGCCCAGAAGGATAATCGAATAGACAT -GAATAGAAAAATGAAGGAAAAAACCCCCGAGTAGCACAGTTAAGCATGACTCAACTTGGC -TGCTTGCAATCATATCTTATATGAGGTTTGATGCATGACCAGCATCGATGAGCTCTGGAA -TCGGGGTGATTCTTTACAAATCTCTTCGGCTGCAATCACTGGCCCTTCCTACTCAATCTG -AGGTTGAATCCAGTGGCTGCCCAGAAGAATTACCCTCACAAGTAGTGCACTCGTCAGGGC -TGGACAAATCGCGATGCTGTTGCTTCTGATCAAGCTGCGATTCATCAAATGGATCCTGTT -GACCGAGTACTGTCTCTTTTCGACGATATTGAGGTGGAGGGACCTTTTTGTGCGACCTCT -GATCCTTTCGCTTTGGGCTAGGGGGATTTATCTGCCCTGGGTGAAGATGAAAGGGTGCAT -TTGAAATATCGACAGGATTGCTCGGGATTTGTAGTTGCAGTTGGGGTTGTGACCGGGGTT -GCTGCTGCGCCGATTCCTGGGATGACTTGGGGTGAGCTTCCTGGGGTGCTCGCTGCTGTT -CTTCCGACGTTTCGGAAGCCAGATCGGGAAGATTTAACAAATATCGAGGGGGAGAATCCT -GGATCCTAGATTCAGTAACAGGAAATGTGTGTTCGAATGCCGTATCCAACAGGTCCTTGT -CAGCGGCCGTCAGGTTCTGGGATTTCATCTTTCGCCTCAGCCGCTCTCGCTCAGAGCTTA -TCATTCGCTCGCGATGTCGATGAGACAGATCAATTGGCCGCCGAGAGTGGCGCGTCTCTG -GCAGATGGCTTGTGGCCGTCGGTGAAAACCCACTTATTTTTGCACGAGAAGGCTGCTCAG -GAGTCCGCTGAGGGGTTTGCTGAGGTGATTTGAGTGCAACTGGGGGTCTTTTGGGTGGTG -CAAGGCCCTGGTCCTGCTTTGAGGGCACCACAGATGGTGCAGCAGGTCTGAAAACCGGCA -CAGAGAGCTGTCTCTGCAGTTCATCAGCACCGGTGGTATAGATGCCGTTGAAGATTAGCG -GGACCAGCTCCGGTGGAAAACCGGCACGGAGGGCATCCGACAACATTGACTGTTCGATGC -GCCGCTTTTCCAGTATCAAGTCAGCTTGTTGCGTCATTTCTTCTTGCTGCTTGCGCCGGT -CCTCTTCACTTTTGGCGTGAAGCCATATCTTCATGTCTTCTGTACCTTCCCATGCAGCAG -GTAGCGGGGGTAGGGGAGTTGACACAGTCAGTGAAGTTGGCAGGCGATTGAGCTCAGATG -GCGGGATCAGATGTCTGGACGAGCTCACTATGGAATTAGATATTGGGTATTCCTCTGTTG -AAGGCCGAGGTAGCAGTCGTGGGGGAGAGGGGGTCGTTGTACTGGCAGACTCTTGGAGGT -CCTTGGTCCCGTTCATGCACTTGGGGGGTTCTAAGTTGCGCTTCATGCTGACGGGCAACA -TTTGGATTGCGTAGGAACTTGAGGTGACAGAGAAAGACGACTTAAATTACAAGGTTATGC -GAAGTAGAAAGAAAGAGAGAGGACACCCACGTAGGAAGCGAAATGCTCTTTCCAAACTTG -GGTCAAATCGTTACACCATCAAGATGTCTACGTGTCTGTCTATTCCAGTATAACTTGTGA -AAGAGATGACGGCCCATCCTGTGTTGATAGTTCGGAAGTTGGATCAAATGTCGCACAACC -AGGCAACGTATTAGTCGCATATTAGCAGGGCTGGTACTCTTGAATGTGTAGGCATTCCTT -TCTCTTGGGGTACGAATCCCAACTACCAATATCAGTCTGCAGTGACAGTGAACAATACTA -TGCATGGAAGAGTTTTATTTAAAATATATACTGGGCGCATCGGCAGATGCCCACATCGAA -GCCACATCTAGCATATCCCAAGCAAGAAAAGAAAAAAGGCGCAAAGGGTATCCTATCGCT -TTATTCGCCATTTTCAAACAGCGATATCCCAGCGATTGTGGCAGGTCGCACCATCAAGCA -TCAAAGCTGGGGGAGAAGCAAAGAAAAAGCACTCATACTGCCGGGTTTATTTTATACGTC -GCCACGATCGAGTCGAATCAGAACCTCGGGCGGAACTGTGGGAATGTACTTGCGGGCTCG -TTCGCGATTCGTGCCACCAAAGCCAGCGCCGTAGCTGACCAGCATGTCTGTACTAGTTGG -AAGGGAGAAGGGAAGGAAGAGGGTGGGGTCGAGGAAGACACTATTGCCGCTACCGGGCCG -CTCTTCGTTCTTGGCTGCGACCATACTTGTCGGTCGAGATGATACAGCAGCAGAGGCTGA -GGATAGCCCAGGGTCGGGAGATGTGGGTGCTGCGGGCTGGTCTTTTGGTGGTGTCTTTGG -ACGAACTGCTGCTGCTGCTTGAGGCCCTGGTCCAGCTTCTGCCTTTGCCTTCTCTTCTTG -AATGTTCATCTGTTCGCGGTATAGAAGTTCAGGGTCATTTTGTATGCCGAGAAGGAACTT -CATTGCTTGCTTAGTCAGGCCTTTCTCGCCTACTTCTGGGCGGTATGATTTGACTGCATG -CAGAAGCTGGGCGGGAGTGAGTTGTTGGAGTTGAAGCAGGGTGGTTACAAGAGATTCGTG -GTCCTCGCCGAGCGAAGAGAAGCATTGCAGCCATTGGAGGAGTTGAATGACGGCGGCCAA -ATGTCGACGAGCGGCGTCAATTGTAGACTCTCCGGTGCTGATTGTCGATCCATTTTCGTA -ATGTTCTGGGACACGGTTGTTTGTGCGTGCCCAATCCTCCAATGCTGAAACGTTCATACG -GATCTGCATCGCCTTCGTGCGCGCCAAGTAACGTTTCGTGCTCATGATTCGGTTAAAGAT -CTCGGCACTGAGCCAGTAGAGAAGTTGGGAGAGGATTTGGGTCGTGATCACTGAATGAAC -ATCGTAGAGATCAAGTACAAAGAGGGTTGAGGATAGTAAGGAGGTGATGTTCCGAGGTGA -GATTTGTGCTCGTCGCCGCGGTGAAGGAGGCCGGAATCGCTTTTCGTCAGGCTCGGGGGT -TTTGCTCTTGTTTTTGCGGAACAGATTCCATTCGTTTTGGAAGGCCACATCTTCTAGTCC -AGGGATGGTCTCATGGTCCAGCATTGCCGCATCCAGGACTTTGTTCATTCGACGCTCGGC -GTCACGAATAACGAGAACGAATATTTCGTTGATCAGCTCTGCCAAATGGAGTTGAAACTC -GACCGTAGATTCTACTAAGCCTCCGTCCTTCTTCAGGTAATGAAGCAGAAGGGTAGCATT -TGACAGCCAAAAGGCTTGCATTGTCATATCCCATTGGCAACGTTCTACGACATCATTGAT -CTTCTCCATCGCAGAAGATAGCAGAGTTGTGAGGAGCTCAGGGCTGGCATGATAGTGTGC -ATAACGAGCACTGAGGAAGAGAAGATTTGCAGGCACTGGCTTTTGGGAAGGAGACCGCTG -GGGGGTCATGTTGGTGATGATCATATCCAAAATGCGCTCTAGCTCATGCTCCTGGAAGAC -AAACATCTGATCATTCAGACACTTGTCCCAGACAAATTCCTGCTGATCTTCCTCAAGCTC -ATCGTCATCGGACGGCTGTAAGGTGAAAAACTCCCCAATTAGTTCATGTCAAGCCCTCAT -TCTCACCACATAAACTAACCTCGAATTTCTCGTCAAGCCCTAGACTGGACAAATCGACCT -CCAGATTGATTGCGCTCTCTTCCATCATCATTCGCCGCTTCATCTCGCTCTCAGCTAGCT -CTTCTTCGAAACGACCGTGCGCCAATCCAGCGTCATAGAAGTCGTCATCAATATTAGTAG -ACCCAAAGCTGTAACCATTTTCATGCAAATATTCAGATATCTCACTGCCGGGCTGGGCGA -AATCAAAGGCCCTACCGCCGGACGACGACTTGATATCCGGTGATGCCCCGTTATCAAGAA -GAATCTTTGCTATCGTCTTGTGTCTGTTGGTCATGGCCCACATCAAGGCGCTCCACTGGT -TACGATCCTGTCTGTCGACGAAGGCACCGGCGTCAATGAGCGCAGCAACGACTTCTTGGT -GACCCTGTTATAACCCGTCAGCATCGCACCAATATTCTGCATATAATAGCAACCCACAAA -ACAACTAGCATAGATCAACGGCACAGTCCCCTCCTCATCAGGCTTGTTGACATCGACGTA -AGTCCTCGCTTGGCCCTGTACCAGTCTCCGTACGCGATCCACATCGCCATTACTGGCCGC -CATGTTCAAGCTCTTCTGGAGTATATCGCGTTTCTCATCGTCGGTCAATTTCTCGTCGCT -GGCAATGGTCTCTTCGTCTGCGGTGAGCGAATCCACCTCCCGATGAGCAGCTTGGGCAGC -AAGCATCTCCATCAGTCGGGCGTCACTATCTTCAATATCGCGCCCATAATGAGCCCGCAA -TTCATGCTCATTGTCGTGCGAGTGATCGTCGAGGGCTAGGCTGAAGGATAAGGGTTTTGC -GGCGACGGGCGCGGTCAGGTATTGTGATTGACCTGTGACAGATCCAACGATTAGATCTTG -GTGCTTGACAGTAATTAAGCCGGTAATGTTTCCGGCATGTAACCGTTGAAGGGGATATAG -TTTGGTCTCCCGCGTCGGGGAAACGGGGTAATAGTTCCGCGCACATAAGAGCACTCACCT -TGCCAAGCATCGTACATCTCAGTCTCCGGTTGGAAGACCGGGACGGAGCGACGATCGTCC -AGTGATTTGGGCAAGTCATCTGGAAGTTCCTGTTTAGCTTCGTGGAAGGACGCGGGGCTG -TGTTCGCTGTAGTCATTGGAGTCGATGGAGTCCTCTGAAGCCCCGCGCCGCGGTTCGGTC -ATGTCCATCGTGGCGGGCTGGCTGTCTGATAGAGTTGTGGCGGGACATCAAAGAGCTGAA -ATTCTGGAGAAGACTGAGGAACCCTTTCTGGGGATCTGTTCTGTAAAGAGCGCGCCCTCC -ACACGTGGGGATTAGTTGTTGGAGGGAGATGGTGGTGATGAATAGGTGGTGGATTTGGTG -TTCCGTTCCTGGACCAGCTCCCGCGATGCGACCCCGAAGACCCCGGAGAGATGACGCAGC -TTTCTATTTCCTTCGTTTGAATTGAATTTTGGACAATATGAAGGCTAATTTTCGATATAT -ATTTGAGTTGTGATCTTTACCTCTCCTACTAAGAACGTAGCTCTTTGTTGAAATTCGTGA -ACAAAACCCCTATTCACCTATAATGTGTCTGTTCGAAGCCTGGCTTGTTCTCTATATGCC -ATTGTACGATAGTATAGTCTATGATGCTACGTTTATCTTATAGCTTCAGTTAAGGGGAAC -GGCATTATAATTAGGGCAAATATTTCAGGGACCAGGTAGATTTGACTACTGCGATTGTAG -GGCCGCCTGCAACGGCCAGTTATGGACTGTTCTACTGCGCTACTCGGGAGGCATGTTTTG -GGCAATGAGGAATTTCTCAATGTATCTTCTAGCGATTTGGCTAGGATCTGACTTCGTCGG -TGACTGTTTCGCCGGAGAAGTCTAACACTTGCTCAAACTGCCACCAAGACCGTCTGGAAG -CAATCTTGCAATCCAGACTTTTGCCGTCATCTACTCCTTACCCTGGGCTCGAGCATCCTG -CGCACCTGCACTCGCCGTTGTCGATATGACCTTACCAATCTCCTCAACGTCCTCCAGTCT -TTTCAACCAGAAATTCCACCGATCCATGCTCATTGGCCCCGCCGTCCAAAGCCTCCCCTC -TCTCTCCAACGCCCAGGACTTCTCACCTTTCTTCGCGCCCTCTCTGAGTCTTTCCCCAGC -TAGCTTAATCCACACCGCCGCAGCAGGTACCTCAAAGTCCCACACTGAATCTTGGCATTC -GAGCGCCTCGGAGATGCACTCATAGCCCCAATCTAGTGGCAAGAGACCATCTTTAGCATT -CGGTCCTGGAGCAGCCATTGCACAAGATGCCTCCACAAAAGCCTGGTGCACGTGGGCATG -GCGCCTCTCTGCTCTTGTGGCTGGATCCCACGTTGCCAAAGCTTCGCGCCAGTGGCCCTG -CTTCCATCCATCTGACCACGAGTGGCCGAAACCTCTAAGGCCTTTCCAGACTGGGTCACC -ACCCTCTTCGTCTGGTAGCTGGCTAAATTCCTTGAGCAGATCAACGAGGGCAGGCGTGTG -GCTTTTGGGCATCTCAAAAAGCGCGTCTTCAATTATGCTTAATACACGGTCTCGCAGGTC -AGTCTTCTTCTCTGAGCCATGAGGCTTGGTCATGGCGGCAGCTGTCTCAGATGGCTTGAG -GTCGCCGTTTAGGTACGCTTTGAGAGCCGCAGCTTGATAGGGGTGCACCCCAGTCTCTTC -CTCGCCGTCCATTGGTGCAGTGTGTTGTTCGAACCAAGTGTCGTCGGCGACGGAACCGAA -GCCATCGAGGTCGACCGATTTGCACGTCATTTCTTGAATACTAGTGTTCATGATGACGGA -GGCGAGGGCTCCCGAAGGAAAGGCTTTTTGGTCTTTGATGAGTGTTTGAGAGACCAAAAG -TCACTATGCTCCGTTGCTAGAGTGGGTGATGGCTGTGTTATGTTCGAGGAAAGAAAGTGC -AAAGCGGTCGCAATACTGGAAATTTCGAAGTGGCAACTAGGCAGAACTCCGAGCTAGACA -GGACTACACGATCGAGGCGCAGTATACTTCTCATGTTGCAATAGTAGCCCTGTGCCGTGA -GTGATATCATCAGCCTTGGAGGTAGCCAATGACTGATATGCCACCCAGGAACGCCGCCAC -CTGGCAGACAAGGAGTTGACAAATAAGAGCCGCACTGGAAATAGCGAAAATGCCCTGACA -TCACAACTTGTTTAGTCTAGCCGCTATTCACAGCTGCATTTGCTATCATCTTTTCTCGGC -CTGATGGACATTAGCGAACTTGAGAAGTTTCTTTTGGCAAGATAACTAGTTAGTTAGCTA -GAATCGTGCTTATCTCAGAGTTTGCGGGGCCGCTCAATCCCGTCGACCGGGTGGCATCTG -GGGCCGAAACGATATCAAGTGGTTACAAGGGCTTATATCCAAGTACAATCACCAACCCCG -ATTTTTGTATTCGTTTGGGGGTGGAGGCAGGGTTGTATCCATGCCAGTAATATATTTAAA -GTAACAAAGCTGCCGACGGTACTTCTCACCATTGTTACGAATGTCGAGGCACTTCTCATG -GACCTTCTACTAGAAAGGGAGGATGCGCTCAACGCGTTGGGACAGGAGGAAGGGATTACG -AGTAAGAATGGGGGAGGGGAGAATGATAATGGCACCAAAGTACATAGATGGACGATGCAA -ATTTGAAACTGCGAAGAGAAGGCACAACTACAAGTATTTGGGTGATTGATATGAATGACA -TTCATGTGCAGAAACGCTTCAACTATATCATTGTCCATCAATGAGGCATATATCTAAACC -ACGCTCTGATGCAAAAAGGATCACCCAAAACCCCACAAGGTATAGCTGTGATTGTGAGAG -CAGAACGCAACAAACTAGATATAAAGAACATAGCGATAGCAGTAAAAAGTGAACCACGAA -CCAAGAAACTGACTCGGACAGGCTTGGATTAGCAGGGAGTACTGGCCCACGGAGTTACCG -CTGCCATTTCGGGTCAACGAGGAGCACAAAGGATTGCATGGTGAGTGCTATCTCTAGGCT -GGCAGATGAAATATATGACGGTGTTAGGGAGCGAACAGTGAACTGCATAAGTAACTCTGA -CACATTGAGAATGGCAAAGGCCCCCGGCCATGAAGTTACCGCCACGGTTCCCGAAATGTT -TGGACGTCGACCAATGTGAAATACTCCGGCCAGGCAGACCAAGGTATGCATGGTGAGGAC -TCTCATTGGACTGGCAGACATCAAACATTGATGGCAGCGCTAAAGAGTGAGCCGCAAACG -AGGCTATTGATTCTGGCAGACTGAGAATAGCTGAAATGTTCATGATGATTGCTGTCATTA -TGGACTGGCTGGCACATATATAGAACGAAATGGTGACGATAGGTAGAAAGCGAACTGTGA -ACCGCGTAAAAAATTCTAGCGGGTTGAATGCAGAAACGCCTGGGCATTTCCGAAATGCTT -GAACGCTGACATGCTTCAATGCGAGCATGGCTGGGATGGCTAAGGTATGTACAAAAATTG -GTGTTATTATGGAGTAGCAGATCCCAGCCATGTAAAGACAGACGGTCCTATCATGGGGAA -AGGATGATTCAACGCCAACAAGACAAAAAAAATGCAAAGCGAAAGATCTTAACCAGATGC -AAGTATTGGTACAAGATGGCAGAGGGATAAAGCAGCCTCTGGAAATGGGGTATAAACTAT -CCAATCACGGATGTGAAGATTGTAGTAAAGAAATGGATATAGAGATGCAACCAAAAGGGC -CAAGGAAAGGGGAAACGAGAGATTATTCAAGTGCTTTACTGCGAGATTGAATTTCGTCTT -CAGGAAGACATGCAATCCACAGGCACCGTTCGATGATGCCCTGCACGGGCTCGAACTTAT -ATTTGGTACTGGCGTCGATGAGACGGAGGAGGCTCTTAACGATTCGTGCTGCGGTGCGAT -AGCGCTCGAAGTTGACAAGTAGCCCACCGCTAGTGCTATTAACTTGCGCGGGTTTCTGTG -CATTATAAGTGAGATCGTGGACATATAGGCCTGGAAACAAAAAAGAAGTCAGCAAACAAA -ACCTAGCGCGATGTTCAAGTAAATGATTTTGAACCTTACCGATGAAGGGAATGCAGCCTT -CTTGCAAGCTGGCCGTCTCCATCTCGACACGCAGATCATGAAAATTGCGCACCGGCTGTA -TAAGACACTCCATATCCTTGAACAGCCGCTTATCCTCAATCGAAATCAACTGCCATGTAC -TCTGTAGCCGGGAGCAATCAGACGAAGACAAAGCAATGGCGATCTGGAGCATTGTGGCGT -AATTGCAGATGCGGCGTGCATGCGCAGCTGTATGTATGTACTTGACAATTGTGCGGGCCC -GCTCGTTGATGTCACGGGTCAAAACAATCTCCGACAAGATCCATTTGACCATAAGGTTGA -AACGGCCGACGACCAAATCGATACCCTTGCGTTCCTCGTACGTCAAGAACTGGACCCAGT -TTGTGACCAGTGGGCAGGCACTGCTCCATTTCATCTCGACTAGGTCTCGCCAGTCAACTT -CGCTCAGGGCGGCCATTTCGATTAATGTCAGTTGGTGTGCAAGGACCTGGGATTCACATG -CTAGAATGAAGGGTACATGGTCGGGTAAAAGGCTGACAATGGGGTCCTGCTCTGGACGAT -TGTTTTCTGAGTTGAATAACACGTCACTCGAAACCTGTTGTATGTTTAGAGGACGCGAAG -CAGAAAGGCCCGTTTCCTTGTGTCGCGGGGATGTGTCCGGCGTAAGGGGCTGGGCCTGGA -ACAAGACTGTATTCAACGGCGAAGGATAGGAAGTGAAAGAGCCCGGATGTTGGATGGTCA -TCGGCGGGGAGGGAGGTTGCGCCAGAGGATGAGGCGCAATCTCCACAGTTGATGAATTGA -GGCTGTGGGTGTCAGTCGAAATCCTTGCATCCATAGCTTCATGCGGATGAATCAGGTCGA -GCGACATCTCTGAAGACAAAGCTGAGAAGCGCTTCGGAGTAAGCTGGCCAGCATCATGAG -TAGCTGAGGTCGGGATGGTTGCGCCACGAGGGATGCGACGCATGCTGTCGGTTTCATGCA -CAATTTGAATTGAGGGGTGTGACGAGGCAGGTTCAGAAGTCTCATGGCTTGAAACGAGGG -TGAGCGGAGCACCCTGTGAGTCGTGTCGGACTGGCTCGCTTGTAGGCTGCCATTTCATGG -ATTCGTCGGTCAATCCTCTTTCAAGGAGTGGAATAGAGTTGTAAGACTCCTCAGACTCAG -TAACAGAATCAGAATAGGGTCTAGGTGGCGCCAATCGCCCCCCAATTGTAGAATACGCGG -ATTTGTCCGTCTGTCGCCTGCGAGAGAGAACCTCCCACGTAGGCTTGGTCGTTTCGAGAT -CATGAGCAACTAAAGGCTGTTCTCGGTGAGAGTGAGTCTCTTCTGCTTGTGCGGACGAAG -AATCCCGATTGGCGCTGGGTGGTCCATCCCACTTTCCCTCCAGCTTCAACAGTGTTGACT -CAATACCACCGTCGTCGTCATCAGGAATCTGTGCAAACTTCGCAATAACTGACTCAAAGG -AACGTCTAATTTGCCGGGAAGTGCGCGGCTGGATAAGTGAGTACCGGGGATTCGGAGGGA -TCAACGTTGAAGGAGGTCTTGAAGCACCGCCATCATAGGGGGCCGTTTCGTCCACGACCG -AGCTATCCCAAGACGCAAACGAGTCACGGTAGGAAGGAAGCTCGGTCTCATCTCCGCTCT -GCATTTGGCGTAAGTTTCCACCCCGGCGCCTGCGAAGAACCGGTCCTGGTTGTTTGTCAT -TCTGATCTCGAATCACATTGGAGTCCATAGTTGATTTAGCCGTCTCACTGCTCATTGAAT -GCCGCTGTCTCGAAATTCCGCTTTGAAAGGATGCATACTTTCTCAATCGGAGAGATAGAG -AATTCTTCTTTTTGATAGACATCGATCCACGTTCGACCGATGACGATCTCTGCGGCGTGA -CAGGGCGAAGGGGATAGAATACATTTGACTGAGCAGCTGATTCTGATTCGTCGGCATTGT -AACAGACCGGCCGAAGATACTCTCCTTCCACGGGTATCGATTGCGTTGAGGCGTCATCGG -GCACCTCACGTTCTGATATTTCATAAGTGTAATTATTAGCAATTGGCATTGGTTCATGTG -CTACCGGCCCTGATCTGAAGGGCATTTCGAAACCAGTATCATCGACAATGATGATCGATT -CGCTTCCCATGGTGATACCGCTCCTGAGTTTGGGATCGGAATTAATATTTTGCGTCGGTA -AAAGATCAGAATTGCTGGATGCAAGCTCTTGCGGGGCCCCTTGATGACAGTCAGACGCAG -CTGGTGGCTGAGTTTGGTCTTTCATTCCTCCCTGGCTCATAGCCAAGCGGTATTGTTTGA -GGACCTCATCACACAGTACATCAGCCCGACCTGGATATTTGGGAACAGCAGCCATTTTCC -TCTCTCGGTAGAACTCGGAGCCGAAAGCCAGTCCAGTTGGCATTGCAGATGTCTTCCCGC -GAGAAGGCAATGAGATGGCCTCGTGGGCATTCGCGATGCGAGCAGAGACGCTTTGTCCAC -CATTCCTCGTGTGCAATGCTCGTTTGATTGAGCCAATGATTGTTCTCATACCTGAATTAG -GTGACGACTTGGGGACGTCGGGTGTGCTCCGACGATCCGGATTCAGGTTCTTCAACGGCG -GAGGAAGGGGTGGCGATTCCGGCGCCATCATAGTCATGTACGACTCTGCAGGGGGGTACA -TCTCTCCGCGTATCAAACTAGCCGTGCCGAGTATATCTGGTGGTGCGTATCCTGTTTCGG -GCTCCATCGCAAACATCGGGAGTCGATCATCTCGCACCGAGTCCGAGAAGCTTCCGCTTC -TTTTATGGCTATGGCCGTGATATGGGTGCCGCGAGACCTTGGGAGATACGGGTGATCTGG -GTGGTTCATGAGTCTTGGACGGAGACTTGGATTTTGGGATACCCAATACCACAGGGCGGG -GGGCCTTGCTTTTTGAGGCAGACATGGATAAGCGCTTGGGTGATTTAGGTGGTAATGAAC -ATGAAAGCGCCATCAAGCTTTGCTCACTTTTGGCAGAGAATGGAACGCTTTGTGTAGTTG -CTGCAGAATCATTCCGATCATGTTGTACAGGGAAAGGAAGACTTTCGCGAAAGGTGGTGT -CTGCACGGGGCGTCATACCATGAGTGGGCACATTTCGCTGCAGTTCCTCGGCTTTGGGTA -GCTCAGTCTGGGTATCTCCAGGTACAATTGGAATTTCTGGGTCGTTATAGGCGCGAGACA -AGTCGGAGGAAGACCAATAAGCCGAACACTTGCCATTCCAGCAACGCTTCAAGTCAATGA -GAATCTTTCGATCACTCGTGCCGCCATTTTGTCGAGACCTGACCTCTGAGTATAGCTTAT -TGATGGTATCGCAGAAGTGGGTCCGGAGCTCGTAATTCGTAACAAAGTCATCAACGAAGT -AATTGAGAATCCAATGTCGCAAAGCCGCAAATGTGCGGATCCTGATAACACGTCCATCAT -CTTGTAGCCGATTGATTGCCCACTTTAAGCGCGCAAGAAGAAGAGCCAAAAGGTGTGTTG -TGGACAGGTATGAACGGAAGGTAAGGAAGAAGTCTGATACTAGCTCATAGTCCATGAATG -ATTCAGAGGAGATTTGAGCAACAATTCGAGCAGGCGTGGCAGCACTGATATCCTTCGTCC -CAGTGATGTATCGGACAACAGACCCGTCATCCATATCATACACCAGTGTTTCATAGATAG -ATGGCTCAACCGGCTCCTTCAATTCATAGACCATAGAATTTCTTGGAATTGGTGAAGCTC -GATTCATATCCATAGCCGCCGCGGTGCCCGAAAACATTGAAATTGCAAAGGATGGCCGAT -GTCGCGAGAAAGAGTCTCGAGGATATGGACTATGAAGTCCCGGCAGAGTGTCTGCCATGC -CTCCAAGCAGGGTTGAATTCCGAAAGTTGTCCGACATCGAGGTTCGCGTGTAGACAGAAC -TCTGCATAATTGCACTGCCTTCGGCGTTGAATCGCGGGGACCCGAGCCGATAACGCGGAA -TAGGAACCTCCAAGGCTGGTGTCGAATTTTCATGTGCAGACTTGGGCCGATCTTCAGATC -TATCGATTACGATCCGCAGCTCGCCACGCTTCCCAGCACCGGCTATCGATTGCTGCTCCG -ATATTGTCGAGAACGAGAGGGGTCTCGTGCGGTGCTGGCTGGAGAGTTCCGGGGGAGCAG -GTACTGAGAGGACGGACTCGGGATCTTCGGGTTGTTCGCGTGGACGTAACTCAGAGAGCT -GCGAGCTCGACCACCTGGATGGATCTGGATCCTCGCGGGCATGTGCATGAGGCTGCGGCT -GAAGAAGACTGGATTTCACGGTTTCGACTGGAGAGAAGGATGTAGGCTGTGGGCTTTTTC -GGGGCGGGTTGGCAATCGGCCTAAGTTATTGAAATAGATCTACTATCAGCAGGCTGCACA -GTATGTGACACAGCAAGAATTATGGGTGTACCTGAGAAATATTTTGCCATTTTGCCCAAC -ACTTCTTACAGTGAAATGACCGGCTTTCCTCCCGGGTGATCGCGCCTCTAACGGTCCACG -ATGTGTTTGGGACCGCTGTAAGTTGTTCTGCCGTAGAAGCTCATGGCTATCCTTGGTGTT -TATCCCAGTTGCTCTCTCGCGTGTTGCCTTCAATTCTAGGTTCCGCTTGACATTGCCGCT -CTGGGCTACGGTATTGGCACGGCGAAGATTCGAGATTGTAGGCCCACTAGACATTCGGCT -TGAGGCTGGCAGTTCATTGATTTGGACTTGCGGGCCCAAAAAGGCATCCTTGAACTCATG -TGGAAAGGGCTCCATGACGCGAAAGCTATAGGAACACCCGTCGCGACGGGGCCCTTGaac -aaaagcatataaaatactaggtaagaaaagaaaagaCAGCTGTTGTGGAGTTGACCCGAC -GCGGCGGGAACGGAATAACAGGAACTGGGAACGCTGGGTCCAATGTGTTTGGGAAATTTC -CCCCTTTTACCGCTTTTCCTTTTGGTGTGTAATCCGTCGTTTTTATTTAGATTAAATCTT -ATGTTGCTACGAAGTACGGAGTACCGGAGTACTCCGTTCAATATATCCCTTCAGCATTCT -ACCTGGAGATGATAGGTCACCGGAAAGACCCCCGAAAAGAAATCTTTACAAAAAGTACAC -GAAAAAAAAAGTACCAAAATCAGGCAGGATTTGGCTAAATCCACCATCATTTTGCACATC -TCCACATTTGGAGTGCGCGAAACTGGGAAATCTCAAGAACTTGGACAAACACATGAATAT -ATCTTATGTTTTACCACGGAGTCCTTTGTTGTCACCAGAATCTGTCTCATATGTCTTGTT -TTTCATTAAACGGTGATATTTGGGAAGTAATTTACCCATTGGCTGCCCTACTTTAGGTCT -GTACTAGCGCACATGACCCGGAGATCTGAGGTCTCGGAAATTTTTAGGGAGATCAGGAAA -ATTGGCAACTTTTTGTGTCAGTTTTTATCCAGCTCATGAATCTATGATTAAATGAACTAT -TCTGTATGGAGTGTTCCATGTCAAGTCTCCAATACGGACGGAAGACTCCGTAGTCCAATA -TGTATCCCGCATAAGACGTCAATTATCTTGCACTTTGCCGGTGAGACGAGACGTCACATG -TCAAATGTCGATCAAGCTGAGATCCATAAGAATTGACATATCCACATTAAAACGGCCCCA -AAAACCACCTAACTCCAAAGAGGTAAGGATCTAGGTGACTAGATCTATTAGTTGACTCTT -TACGGTCTTCGCTTTCTCCTACACCTCTCTCTCTTCGGATTCCGCCCGCCGACACCATTT -TTTTTTTCTTTCTCATCCTAAGGCTCCCGCTTGTTGTGGGTGCTCTGGAGATCAGCAATT -GTACGGTCTGGGCATCTTGGCTGATGATATCCTCTACTAGGGCATCGTTGGTACCCGTAG -AGTCAGCGATGGATTAGACATCACTGACAGCGGTTTGCTTTGCGGGTTTAGACAAACAGC -GCTAGTAATATATGTTGAGGCATTACTCATCTTCGGAAAGCATGCTCAAAGTTTGGGGTG -TGATGCTCATATCCCAGCGGCAGGAGGGAGTTTCAGATCACACGAAGAACTCGCCAGAGG -TCATCACACTCATGGTGGCTTAATAAGAATGGACGTGTAAGAGTATCATGAATACGAAGA -GGTAAGTTCTCGAGCAAGTGACGATGGATGGTGGATGTGCAGCGAGGGCCGGTTTTGAAA -GTGAGGAAGTGACAGCAATTTCCATTCAAGAACTGGGCGAGATGCAATGCGGCCGAGACA -CAGCTGAACACGTGTGACTAAGTCAGAATACTGGAACTATGCAAAGCTCATGGAATCAAA -TGCCATTGACGGTAAACTAATCAAAACTACCAAAGCTTGCAAGTTGGCTTCCTGTCTGGG -CAACTAAAGGCATTCTTGAACTCTCTAGAATTGGCCATAGTTTCCTGCGACAGTGGTTAG -CAAGGGCAAACTCAAGAATGCATTAACGAGACTCACGATGATTCGCGCTGATTTAGGCGC -ATGGGGGTCGTTATAGATCGCCTGTTCAGCCGCCTCCTTGGTAGTCTTCCCACACCACCA -ATTACCATAGGATATAAAGAACAGCTGCTCTTTGGTGAATGCATCCAATCCCGGGATGTG -AGGCTCAGGCTTTGCCTCATCGTGCTTCTTCCATGCGTGATACGATGCTGTCAGACCTCC -AGCGTCGGCAATGTTCTCACCCAATGTCAGTCGTCCGTTCACGTGGAGAACCTTGTCCTC -GGGACCAGTCACGGTAAACTTGGAATACTGATCAATAAAGCATTGAGCACGCTCTTCAAA -TGCCTCGACGGTCTTATCGTCCCACCAGTTAGTGTAATTTCCACTCTGGTCGTAGTGTCG -ACCGGTAGAATCAAAAGCTATACCAGTCAGAAATTGGGCAATAGTTTTGCAGGAAACTAA -ACATACCATGTGAAAGTTCATGGCCACTGACGGCTCCAAATGCGCCATATGCCAGATACA -ATGGCGCGGAAGGCCCGTAAAACGCTGGCGGCTGCATGATGCCGGCAGGGAAGACGATCT -CATTACCTGGCGGATTGTAATAGGCGTTGACAGTCGGTGCACTCATGTCCCATTCGTCCC -GGTTGGTGGGCTTGCCGAGCTTAGACCATTCGTTGCGAAGTTGAAACCGTGCCACAGCCA -CCTCATTCTCAAAGAAAGTATCTTTGGATAATTCCAGGTCCCGATAGAACTTGTTTACGT -CCTCGGGGTCCAAAACGTTGGGACTCTTTGTGGGGAATCCGATTTTCTGGACAATGTTTC -CCACCTTTTCAATACCCAGCTTACGCACATCCGGAGACATCCAGCTGGTTTGATCCAGGG -TGAAAATGAACCGCTCCTTGATATCGGACACAATCTGATCGCCAAGTTTCTTCGAGTCCT -CAGAGAACGCATCGAGCACATAGAAACGGCTGAGACTCCATTCAAGGCCTCCATCCAGAC -GCCCAAGGCATTTACGCCATCTCTCCTGCTTGGCCTGGGGTTCTTTGCCAGCGAGTTCGT -TTTTGAATCGCCGGAGGGGTTCAATGGTGGCATCCTCAATAACATCCGCAAACGCTTGGA -TAAGCTTCCACTGCAAGAAGAGCAAAATGGTCTCTCTGGGTGTCTCTTTCAAAAGAAGAG -ACAGTGCCTTCATATAGGGAGGCGAGCCAACAATAAGGCGATCACCCTTGTAGTCATGGG -GTGCCAAAGAGGAAATGATATTAGCAAATGATATCTCAGGTACCAGTGCCTCGGTTTCCT -TGATGCTAAGGGGGTTATAGTATTTGGTGACATCTTCTTGAGTTTGGGTGTCCGGGGTCA -CATCCGCAAGCTTCTTTTCAAATGCTACAACATCCTCGGCAGTTTCGTCGAATCCATCTC -CAGCAAGTCCCCGAACAACTTGCTTCACAACAGTGGTATAGTCAGCCACAGTTTTTGTGT -TATTATAGTACTCTCTCGCGGGAAGTCCGATTTCGCGAGGCGGCGAGATCATGATAACGA -CATTATCAGGGTCTCGATCGTCGGCCTAGGAGGAGTCATTAATGGCTCTCATAAAGGATA -GGTCGCATAGACTTACAGTGACACCAGAAGAAGCCAGAGCCTCGACACCAGCGTTCGCCA -GATACAACAAAGCACTGGTGAGTGAAGCTTGCGATCCTTTTACGAGACCTGGATTCGCAG -GGTAGATAGTCTTCAGTTCATCCAGAATATCAGTCAAAGGCTTGCTGCCCCGTTTGCTGA -CTGTAGCCTCGTCCAGGCAAGCATCGTACGCAGCTTTGAGTTTCTTGAAATTGTCAGCAT -CAGATGACTGCGGCGGTTCGGTTCGCTCCAAGATATGACGAAGTTTAGTCTGGGCGTTCT -CCTGCATGATTGTGCCTGCAAATATCGACCCCTGGTCAGGCCGCATGTCATGGCGCTCCC -GCCATCCACCGCAGACATATTGATCAAAATCAGTGCAAGGATCTATGTCCTCATAGTTGG -GATGCAGATTGTAGAGTATTTCGGATGCAGCATGAATGCATTCTTGCGACTGGCAAAGCG -GAGGAGCGTCGGGGTGCGGGGACGGATCCTCAATGGACGGAGAGCCTGATCCTAGAATTA -CTTATGTTGGCGACGCATTTTATTAGGGAGAAGTTATTCATACCAAGAGGGAAACCTAGA -TAGTAGGTAACAAAACCTGTGATCACGAATACACAGAGCCCTCGTCTCAGTGTTCTCGCC -CAACAGGGGCGAATTCTTTGTTTATTGTGACGGGTGTGGCTGGAGACAACACCATTGTCC -TCGGGATCGTAGACTTCGTTTACAGCCAGAAGCGGGCCTTTGTCGGACTCCATACTGTGC -AACGAGCAGTTGGGGTCACGTTTTCTATGGTCACTGAGCTATTAGCGCCTTGTAGCTGTC -GAGCACGACACGCTCATGAAGACATACCTGGATTATGGTGTTGGAGATACTACACTGAAG -AAGACGCGTATCGGAGAACCATGGACTAAATGAGTCTCGGTGCATTGCTCTTTAGGCAAC -AAGGAGAAGCTGGGGTTGAAATGGCGTCACACTGCCGTGACGAACTCTTGGCGGAGCTTG -CCCCTTCCAAGTGGATCCCTCTCACCAATGATGCCTCGGCTGGTTGCATTCTCGAGGTCA -CTTGGACAGGTCCAGGAACTAGGTATTAACCTGCATTTCAGCTGGGCTATATTCGGTGGG -AATTCTTGCTCTAGAACCTGTTATATTCAAGAAGCGAGGTCCTATGTTACTACTTCGGGA -GAACGAAGCCCCGGGATTAGGCGATTGTTATCAAGTGCCCTTCTAGATAGAGTCAAACAA -CAGACTACACTCAATATAATATACTAAGGTTTGTTGGGGACGAATATAGATGCATCTATA -TTGGCTCGGGACTGAGGTTCTTTTCAAAGGCCCTCAAATATGTTCACTTTGAGGGGGCCA -AGTTCTCGAGACGGCGGGGAGTCTAACATGAGGTGACGCATGTGCACGGAAGGTGAAATA -TGAGGGAGGAAAGAACTTGTTCCTAAGAGTCAAAAAATTACTAGTCTGTTATTTCGCGGC -ATCAAGGCAGGAACACAATAGATATTCCTCTCCTAATGACTCAGGGACCTAGTGACTACT -GCAGGCAGTCCTATCAGGCATTTGGCTATTGAATTCCATACTCACGATTACCCATGGCCA -AATATGGGATCGATAGCCTTGAATGGGACTGAGCTCATAGCAATTTGCTTTTTTTTTTTT -TTTCTTATTGAAAACGTGGACGAATCTCACAGGTCATAAGATGAGAATGCTCTAAATCAA -TGGAACCCATCAATAGTCTTCGATAAATGGCCTATATCTATCATGAGTAGTCCATGAGCA -GTGTTATAGAAGCCAGGCCTTATACGGAGAAGATCCCTGTTCAGGTTGAATGTCTGTCAA -TCACAACTCTGCTAGAGATTACAGTCTAGCCAAAGGCCCATATGCATTTACAATTGCTAG -ACTTGCAGGGCTGATTTCGGTGATGGATCTACAGCCTGTGAGCTGCATACATCGCTTGAA -ATCATCGAACAGCATCTTGAGCATTAGCTCGACTCCCTGCTGACCGTCATACTACAGAGG -GAGTCAGTATACCCGCCTCTATCAGGATCGTTGATGTAACGGGCAGCACTTACCGCGAGT -CCCCAAAGTGCGGGACGGCCAACCCAGCAACATTCAGCACCGAGAGCCAAGGCTTTGAAA -ATGTCCACTCCAGATCGAATTCCACCATCAATATGAATCCTGATCCGCCCCCGCCCAGCC -TTGGCACAAGCAGGAAGCGCATCAATTGTGGCCGGTGTCTCATCCAACTGCCGACCACCA -TGATTGCTAATGATGATTCCATCGCAGCCATATTCGATAGCAGTCTCAACATCTTCCGGG -GTGAGAACGCCTTTAATCCAAATCTCCATCTTCGTGACACTACGTAGCCACGGTATCTCC -GTGGCCCAGGAATGAGAATCCGAGTTTGTCGAGCTAAACCCATCATCGTGAGATTGTTGC -TGAATCTCGGCAGATGTTCTTTCATACATAGGATACCCCAAGCCAACTGGGGGCATGAAC -CCATTCCGCCATTCATTCCAGCGCACCCCAAGTACGGGACTATCCGCCGTCAGAAATATA -GCCTTGCAGCCTGCAGCCTCAGCTCGACGAACGATTCGTTCTTGTTTAGCTTTGTCATTC -ATCGCGTAGAGCTGCATCACATGGTGAATTGGTGCCACTTCTTTCCCGGCGACGGTGATC -TCTTCGACCGAGTGATTAGCATAAGAGGAGACACCCATGTTAACGCCCATTTTGGCACAG -GCTCTACTTGTGGCCAGCTCGCCTTCTGGGTGGGCCATAACCTGCATTCCAGTGGGTGAG -ACACAGAGCGGAAAGGTGATATCTCGGTCAAAGAGGGAAATCCCAGTATCGACGCGTGAG -ACGTCTCTCAGAACTCTTGGTAGGAGACGGTATTTTCTATATGCAGCACAGTTGTCATGA -AGTGTGACTTGGTTTGTTGCACCCGAGTTGAAAAAATCTATCTATTCGCGTTAAAAGAGA -AATAATCCCTGCAACATGGGAGCTAATACATACCTCTTGCGGAGACGGATAGCGAGTTAG -AAGCCGCCTCCTCCAGGTCAGCTATTGATAATATTTTATTTGCCATTTCAGGAGAAAGAA -AGGAGCATATGATGAGAGATATGTGAAGAATATCACATCTTGATGTAGTCGATGTGAGGG -GTAAAATGTGGGGGATTTTTCGATCTATAAAGGTTTACCGCTTGTTGCCTCTGAAATTGA -ACAGTCCATCACATATATTCTATTATTCTAGCAAACGTTATAATATATAAATTGGGTGAT -TATTGACATAGAAAGAGTACACCGTACCAACATCCATGTCCGGCCATACGAAATGCCCGG -CATGACAGATATGTTTTGATGTGAAGCCCTGACATTTGATTGGATTACTATTGAGTATGT -GTAGGATATTGGAAGGAGCTTGCATTATAGCCTTGTCAGATCTCTGGAAATTGGGATGCC -CACTTGAACATTCCTTACTTTTGGAATGGATCGAGGTATTTTGGCCTAACTCAAGGCTGT -AATCAAGAGCATATAGCCGAAGATTATATGCAGTATTTTGCGCTACGACTGGTTCTCCTT -TTGAAACTTAAGCACGGAAAGCCGATATACTACACTTATATACCTGACGAAAGTTTTTTT -TAATTCAGTTGTAATAACAATAAAGATCGAAGAATCTGAGGACTGACCTGAAGGTACTCT -GAGATGCCAGCAGCAATGGGCTCCATATTTAATGGGACCACTAGGATCGTTCAGCGAATG -GATTAGCCAACTTTCAGCACTGCGATTTTCGATAAATAATCAACTTGACTTGACAATATC -GATAGTTTAGTTACACTTGCCCGAGGATGGGAATACGTGACTCAATTAGTAGTTGTAGTT -ACTATTTGGCTACACATTTAGGAGATCATGATCAAACTTATTACCTGTCACTTGAGCTTT -CTTCCTCTTTGCCAAGTCAAGTTTAATATAAAGGCTACTTGCTCACCACAGCCTCTTTAG -ATTCTGGCAGTGTGCGGGGTTTTGATAGCACAATCCTCCACATCATCTTTTTGCTTCAGA -TCTCTATGATCTTCATGTACATGATAGGCTGCCCACTCTCCATGCCAAGTTAAACCAAGA -CTTCCCCGGCTTTTCATATCCTCCTCTACTTGGCATCAAGCGCAAAGTTGAGGGGTAGCC -TTGGTGGCGGCCTAGATTCTTCAATTGCCCATCTTGTTCCGAGGGTATTGTTGTTACTGT -GTTGCATAATCTCGGTTAGGATTCTTTCTAGGTGTTCTTAAGTACTTTCTCTCTATCATT -CCCGGCTCACTACTTCGCCTTCTATTTCCTCACTTTTTCAGATAATTAAGGGACACATCA -ATTCGCGACAATGGCATCGATTTTGGCCAGCCCCAAAGAAGACGAATTTATGAAGAAAGA -AGACCTGGAACTTACCCATCTTGAGGAACAAGAGCTGTCACCCCGCAAGTCTGGCAATGA -ATCACCTGTGTATGATATTGACGAGGCACATCAGAAAAGGGTCATGTGTGTTCTCTCTAC -ATTCCAACATGGTGAACAAGTCAATTCTAATGTTTAGACTAGTCGACGAGTGGATCTCCG -CCTCCTCCCAATTCTTGGAATCATGTATTCCATATCACTTATTGATAGGACGAATCTTGG -TCTTGCATTTGTAGCAGGGATGGAGGAGGATCTTGGACTTGACATCGGCAACCGATATAC -AATCGTTGTGATGGTATTTTTCGTGGCATACATGTAAGCTCTCTGGTATCATTTACAAGT -CTTTTAAGGATGCAACGATTTTCTGACTCTGTATCCCACTTAGAATATTCGAGATACCTA -GCGTATGTCTGCGCTCCAACCGGAAAATTCATTATGGATGAAACTGACCTCGGCAGAACC -TCATACTTCCTCGAGCCGGTGCCGCAAACTGGCTAGCATTTCTTGGGTTGAGCTTTGGGG -TGATTTTGATCGGCATGGGTTTCACCAAAAGTTGGACTACAATGGTAGCTTGCCGAGTTC -TTTTAGGTAAAGCAATATCCTCCTTTTCCATTGTATGCTACTAAAACGTCATCCTTGCTT -AGGTGTAATGGAAGCCGGCTTTCTTCCAGGTATATCAAAATGAACTGTGTCTCCATTTAA -ACATTACTAACTATATAGGTTGCACCTACCTAATTACATGCTGGTACACACGATTCGAGG -TGGGAAAAAGACTAGCCACTTTCTGGCTGGTTTCTGTTGTGCTGAACGCTTTCGCGGCAA -TTTTTGCCTATGCCTTGACCCTGCTAGACGGAAGCTATGGTCTGAATGGATGGCGCTGTA -TGTTGCTCGTATTCGTGCCATATGACTCAGCTGACTGTTTTATCCAGGGATTTTCATCGT -CGAGGGTGCTATCACTAGTGGTATCTGTTTGATTGGGTGGTTCATCATCATCGATTTTCC -GACCCAAGCAGATACGTTTCTCAAGCCAGAAGAAAAGCAATTCATCATCGCCCGCATCAA -CAATGACAGCGGAGATGCGGAAGAGGATCCAATTACTATGGAAAGGGTTCTTCATCATCT -CAAAGACTGGAAGCTGTATGTCTGGGCATTCAATCTCATGGCCTCCACCCTGCCTGGATA -TGCGTACAGCTACTTCAAGACAGTCATCCTCATGGGCATGGGCTTTAGTAGTACGCAGAG -CCAGCTTCTTAGTGCTCCGCCTTACATTATTGCTGCATTGTTTACATATTTAGGTGGCTG -GTTCACCGATAAATATCAAATCCGCGGGCCGGTGATTGCTATCCATCAACTTCTGACAGC -AGTGGGGATGTTGATCACAGCTTATGGTGGTGCAAGTGGGGCAAGATACTTTGGGATATT -CATGGGTGAGCGTTCAATCTTTTCACAATGAAGTGTAATATACCTAAAGTTATACTCCAG -GTGTGGGATTCCTACAGTTTTGCATCCCCGGTGTGCTCGCATTTCAAGCGAACAACATTA -CTTCCCACTCAAAGCGTGGAGTCGCATCAGCAACCTGTCTGATTGGGGGTGGCCTTGGGG -GTATTATTGCCAGCGTCGCCTTCAAGTCTGATGAAAGCCCGCATTATACTACGGGCATTT -GGGTGACCTTTGGTATCTCGATGGCTAGCATATGCTTGACGCTCATGATGGACTTTTACT -TTTGGAAAACAAACAAAAAGGCTCGGGAGACAAATGGCCAGATTGAGGGCATGACAGGAT -GGTACTACACTCTTTGAAGATAACATGCTGTTGGAGAAATCATGAATCAATTGGAGCTTT -CGTGGTATTTTGTGACAGCCGAATGACCCGCTCAAGGGAGCTTTAATCTATGCGTCTGGC -TTATATTCCACATTTAAATAGCAGCTGGTGGTTATACAGATTCATCTCATGTCCAGAATA -CAAAGAGAGCATGGAAAATAGTAATTCTGCGAGGATAAATTGACCATATGGCTTCCAATG -TGCCACAACTTCACATCTATGCTTCCTTCCAACTATGCCCAATAAGCTCCCCGACCTCAA -CTGCAATTGATCGACCCAAACTAAGACTAGCAAAATGCAATGCGGCATCAATGCCATTTA -TCATAGCACCCGTCGTGATGATTCTGACTCCGAACTGGTTCCGGCCAGCATCCAGATAGG -AAACCAGCTCCGAAGGATCTTCAGGGACAACTCGAGCCCCCTTCGCACCGGTAGTACGAT -TCACATAACGTTGTACTGCGTCTCGTAGACCAGGGGTGGCAAGACGGTGTGTGGTCGCAG -TGAGGCCATCAAGCACGCCTCCAAAAGCTAAGAAGTAGGATCCGATGCAGACTGCTAAGA -TGGTACGCTGTCGCGCGCTCTCCTTATCCATGTCTGATGCAAAACTGGTGATAAATTCAA -CAAGCTCCGATGGCTGCCCCTGCTCGGATTCCATATGTGGTAGGATATTGGTTGGTCGTG -CGCCAGGGACGATCAATATTTCATAGTCGGTCAGTTGAAGTTTGGCTCCAGCAAGGGATA -TGTCTCTCTTAACTGTCACATTCTCTTGAGATGTTGTGAGCTCATCCTCAGCGGCATAGC -TCAGGGAGAAGTCCGAGTTGTAGAGTACTGAAACCGGACCGCTCACATCGAGTATGTTGA -ATCCGGGGAAGATGACCACGAGCGCTCTTTTCTTCATGTTGCCCATTGTATATTTTGAAG -GCCACGGAGGATAGAAAACTCGGTTTTGCAAGATGTAGTTTCTGTCTGGACTCTAGAGTG -CTTCGATGACGTCACCAACACCTTGTGAGGTCTCCGGAATGAGACTGCACCACATCTACA -TCTCCGAGCAATCCATGGTATATACAGATGCTTCTAATGTCCACAAGATGGCCAACTACA -GCTATTAGAATGAGAAATCTTGGATGTTTGATACTCGAGATAATACAGTTGAATATCTAG -GTCGCTAATATATATCTGCGAAAGCTCTGCATCTATTGCTCAAGCTGGGAGACTGACTAG -GATCACCTCTGCGTCGATATGAAAGCGCCCGATTAAGACATCCTATCGCTGGAGTAAATT -TTCACTAGTCTTATTAGCTTATCCAGTCCCCTACTGCTTTACTACGTTCAAGAATTCTAT -TTGATTGCTCGCGGAAACCGCGCTGCCATCAATTTATCCAATGCCGTATGGAAAAGGAGG -CATAGATAGGTACGCTGTACTCCACAGGTAGCTCTTAGCATGAGACTAGTAATTAAAAAA -TCCGGATATATATTTTCAACAGATCCTCTCGGGGAGTAGTATGTATTCCGGTCGTCACTA -TCCTGCGGGGCCCTAGCCTAAAGATAGACCCACGAAAACGGCCACTAAAACCGTCATAGT -TCTCCATGTTTCAGTTGTTATTCCGAACATATACTCCATAGCGTCCCTCATTTGAACAGA -CTTGACGAGTACTATGCTGGACTAGGTATATCTGCCCCGGAGCTATTCCCGAACGACGTA -CGCTCGGGCATTGAAAAACCGGAAACTAGACCTGCTGTCAAATGCTGATCGACAACGTTT -AAATTGCTCAAGTCCCACTCAATGCCCCTGAATAGTTTGGCCGTTGGCGGGTGTGCCGTT -AACTTCACACTGGCTACTCCGTACGAAGTTCCTGCTCACGCAGGTGGGTAAATGGGCTGA -TGGGAATCTAATATTAGATGGTCCTTTGGATTTCTTATGTATGGCGCGTCTGGCTTTGGC -AAATCTAGATCATTTTACATATAATGTACATTTGTACGGAAATTATACTGCAAGATTTTG -AGATAAAAGGGATCTATAATGTATACAACATACAGAGTACTATCTAAGGTACTTGGGTTG -CCAATCTGATTTACCGAAGTGATAAGGCTTACGGAGTAGAGTTTCCTGAGGTACGTAGTA -TGAAGATTTGTCAGAGCTGCATGATACCAGTGGAGCGCCCATTATGCATAAATAGAAACA -ATACATATCAGTAATAGTATGTAACAAAATCGATACTTCGGGTAGGCCTTCCGAGCAATG -CTAGAGAAGAGATAACTCACACGTGGGTGATTCCTCGGAAGTTGCATGTGGAGGAATCCA -ATCAACTCAAATTCCAGGGGTTGAGATGAGACCTCCCCTTTTACATCGTACATGGGTAAG -CAGTTGGTAAGCGAGAGAAGTCAGATCCACCGAGTATTCCCTACGTATCATAAGGATAGC -CTCGATCGCATCTCTTGGATATCCATGGCGAATCTTATACCATATCGGGCCACGGAGAAT -TTTCATTCTAAGATCGGTGATGAACTAGGTCCAGGTTGATCCGGCTTTATACCCATGCAA -AATACAACGTTGTACGGATTATATTCTCAGATTTCCGGTCGAATGTGACTTGACTTTGCC -TAGGTCAATCCCTACAGGTGCATTTAACTAGTCCCAAGAGGATCACCCCCTGCCTTAGTA -GCACGGCGTGATGAGATTGCCCGCATTTAATGAAAATCATATCTTGAAACTGGGGACTGC -GAACCGATAGAATGGGGCAATTGGTCTTTGTTTCAGACCCTTTGGGCCTGAAGTGTTGTG -GGGGTTGCATACCATGTTGCAGACCTGAAGTTGAGTACAATCTGTCACCTCGCGATCTTC -ATAAGATCCAAATTCATACTCACTTACTCCGTACAAAAAGGACTGGCTACCACTTCACCT -GCGGCTCAAACACGGCAGAAATTGTGTGCTGTGGGGGCGAAGGGAAATGACAACGGTCCA -CCGAACATCTGTTTCTCTTGGGAATCCCTCGAAATCACCCCCCCAATGGCGTGAAAGTAA -GCCAATCTTGGATTCGAGCTCTCTGATTGGGTTGTGCAAATCCCCTAAGCGCTGTGATCC -AATGTAAGTCTTGGATTCGTCTAAACCTCGGCAGAACTGTGGCTGTCGATGTACTCTGAA -TGGTTCTCTCGAGAACACAGAATCTCAGGGGGGCTCCAGTGAACGGAGCCAACACCTCGA -AGTGGTAGAGAAGAAGAGCAAGATGCCCATGGTGATCCAAGCAGACCCTTAGTGTTCTAC -CGGGACAGAAATCGGCCAACGTTTCAGAGTGGCCCTTGCATTGGCCGCTGGCACGCCCAT -GCAGGTTTCAGCACCTTCGTGTCGATCGCTGGAGCTCTGCATTCCGCATTGGCTTGGCAA -ATGCACCGTTTTGTACGGAGTACGGAGTATATTACATACTGTGTCCTTTTCGTATTGCGT -ATATCGGATGTTATAGATATATCTCCATGCCATTTTCTTTTTCtgaaataaatgaaataa -aatgaaatAACGAAAGACAATTCGATTGTTCCTGAAGTTCTGATTGGATCAATGCGTGGG -TGTACCCGAAGTACTTTCAATAGTGCCGTTCGATGGACTTATATGTTTTTTTTCCCCTCT -CTTTCTCTCCTGCGAGACTGGCAAAGATATGATACCATCAAAAGGGTAACAGGACAATCG -GAGAATAGGCCAACAACTGAGCTACAGTATCATTTTGCAGTGTCATAAGTTCCAATTTGT -ACAAAGTGCAGACAGGGGTCAAGCGCCCCCAATGGTGGATCGGCCTAGTGGCCCCCCTAG -GAACTAAGTAAAGCCTTTGTATTTCAATACCAAGAGAAGATCTTCGGGAAAAAAATATTT -CAAATAAGATAAAAGAAGGAGTAGTTCGTACAGAGGGAAAGAATAAAAGAGAGGATATAA -TCCATGTCAAAGAATACCTGAAAATACTGAAGATACCGAATGTTGATAGACACGGAAGTC -AATGATACCGAAATAGGTATACAGAATACCAGAATATCAGTGATCACCGGCATTCTTCCA -TTCTCCATTCTCCATTCTCTCCCCGCTTGTGGACTCGCTCGTGGGACGGGAGATATGTAC -GACCGGCCTTGAATGCGCCATTCCATTTCTTCCATTTCTCACCCCCCTCCGGAGAACTCG -GCTCCCAACTTCCGAAACGTCCTTTTGTCCTCACTCGGGAAAAGCCCATATTTTGAACTT -TGGAACGCGATCTCGGCTTCCCCTGTCACCGCCAACGTCTTTCCCCCCTCAAGCTTCCTA -TTTGGCCTTCGGCAGGCTGACGCTGTGTCGCTCACACAGATTCGGCGCCTGTCTCGGAGG -ATAACCTGGTCCAATTGGAGATACAACTGTTCAATTCGCAACGGAACGTGGTCGATAGGT -GTGGGGAACAACGGAAGCGATCGAACCTTTCAAGCTTCTGCCTATCACGTCGCACACCAG -TCATTCAACATTAGTTTGCACTTTGGCATAATATAGGAAAGGATTTTGGTTCTTTTTGAA -GGTAGGCATATCTACCACATTGCGCGCAGAGTTTAGTTACCCGTTGAGTCTGGTGGAGTC -TAGTCGAGTCTTTTGTCTCTGAGCTTCGACTGGACTCCCGTATGGCGCAATCCAATCGCA -TCCCTGACTCAGATCGAACACACGTTGATGCGTCGAGTCTCTTGTTGCACCAGTTCAACC -TCCAATGAACCGCTGCTAACCACCATCATGCTAGGTAGTTTGTCGTTACCGGACGTAAAC -CATGGGTTGCTTAGTTTCGAAGCCCGAGGAGGCCGATAAGGAGGCTTTCCAGCGCAATGC -GCGCATTGACAGGAATATCAAGAATGATAAGAAGACGCTGGACCGGACCATCAAGATTCT -GCTATTAGGTGGGTGATTCTCGGTCGCTGGTTGTCGGCTTGCTTCAGTGTCGCATTGCTA -ACAGAATGCTCAGGGGCGGGAGAGTCCGGCAAGTCCACTATTATCAAACAGATGCGAATC -ATTCACAGCCGAGGATTCCCTGAGGAAGAGCGTCACCAAACCCGCGCGATAATTTATTCG -AATATCGTCATTGCATTCAAGGTCTTGTTGGATATCATGAATGCCGAGGAGATCGATTTC -GAGAACGAGGAAAAGACTAAGGTACTGCTTACACTTCTCCTCAAGAAACCAGCATCAGCT -AACGAGGACTGAAAGCCATTTGCGGACCTATTAGACAATACGGACCCAGATGTAGGGTCC -GATGAAGCCTTCTCCGATCTCGAGGTGCGGGATGCCATGAGGGCCATGTGGAAGGATCAA -GGTGTGCAGAAGGCCGTTGCACGGGGTCATGAATTTGCTCTTCATGACAACTTGCACTAG -TAAGTGGAAATGGCGCCGCGTTGAATGAGTCTAGGCTAACGAAACACAGTTTCTTTGAAT -CTCTCGATCGCATCTTCGCCCAGGGCTGGCTTCCTGATAACCAGGACATGTTGCAGGCTC -GACTTCGCACAACGGGTATCACGGAAACCCTGTTTGAGCTCGGACAGATGAACTTCCGCA -TGATGGACGTTGGAGGACAGCGGTCAGAGCGGAAGAAATGGATCCATTGCTTTGAGGGTG -TCCAATGTCTCTTGTTCATGGTGGCCTTATCTGGATATGACCAATGTCTAGTCGAAGATC -AGAATGCTGTAAGCACGAATCTCCCAATGTGCTCGCTCGCACCACTAATCATATTCTACA -GAATCAAATGCACGAGGCCATGATGTTGTTCGAATCACTAGTCAACGGCGAGTGGTTCAA -GCGCAAACCAATCATTCTTTTCCTCAACAAGATTGATCTGTTCAAAGGGAAACTCGCCAT -CTCCTCTATCTCGAAACACTTCCCAGATTATGCCGGCTCTGATACCGACTATGATTCCTC -GGCACAGTATTTTGCGGACCGATTCCGTGGCATCAACCGCATTCCAGACCGCGAAATCTA -CATCCATTACACTAACGCAACCGATACAACTCTACTCAAGGCGACTATGGATTCAGTACA -GGACATGATTATCCAGAAGAATCTGCATACCTTAATACTATAAGGTAGAGGCCCTCAAAT -TCCCGAGGCATTTCCTATGACCGTCTGTCTACGACGATGATTTTCATATCTTTCTTCATT -TTCTATTTCTCAAGCTCTTCAGCCTACCAGTTATTTTCCGGTTGCATTCCAGGGCGGCTA -CTACTCGTTCACTCCTTCGTGTTCATTGCCATATCTCATTTTCGTTTTTTTCTTTTTTTT -TAACGAGGCGTCTTGATTGAGAGGTTATCTGCTCTCTGTTCACATCATTGGGTCTATCAC -GTTGCATCCAAAAGCCTATCCAATTTCGCCAACTAATGCCCATGCTTCGGTTTAGCGACT -GTGTCGTTGCCACTCCCCTTTTTCTTCTTTTTTTGGGTTTAATGCAGAGTGATACCATTT -GTTTGTCTTTGCAGCTGTGTTTACTAACGACTGCTCTGTATTATCACAATCATTCTGTGT -CTTTATTTACTATATCTCATTTCTGAATGAACTCGATATCCTGCTCTTTTATGAGCACTT -TTTTTTCCCACTCTTCGGACTCCAGTCTCAATAGATTTCTCGAAAAAATAGGTAAGGGTA -TCCAGTTATCATGTCGTCAGGTCGCTTACCGGCGAATTCCTTGCTGTAGATCCATCACGC -GAGATGAACCATTTCTCCACCTGCGAATGGCTGCACTGCATAGACCTCTAGAACCCTCGA -CAACTGGACGACTCGACGAAATAGCGGGGCTAATATATAGAGAAGACGGGGCACTCATTA -AGTTGAATCACGCGAGTTCAAGATTACTTAAGGTGGTAAGGTGATCATAGGCTCTACCTA -TGTACGCCTCAGGCACCAATATTACATAAAATCCGCATGATCAGTGACCCCGAGTTCCGG -GCCGACTGATTTCGCCTCCAATTCAGACTCATTCACCTTCCCAGTGACTGGGTTATCTGC -CAACCATGTTGTCAACATCTTTACGCCGGGCGGCATGGACTCCCATGTCGCTTCCCGGGA -TAACTCGCTCGAGTCAAAATCTTACCAGCAGCTTGCTCGGCGCAACCCGACCTCTGCACC -AATGCCGGTATTCCTCATCCTCCAAACCGCCCGTCCCGCCCAGCGATGGCTCCCGACGAA -TGGACACCTCTGCCCCGGCAAAGGGGGTCAACTCGTCTAACGAAAAGAGCAAGGCTAACC -GCCGTCGGGGGAAGGACAGCTCTGCCCGAAACGGATCTTCAAAGTCTAGCCAGCATGCCG -CATTTTCCAACCTCCCAAGTGTTCCATCGACACAGCACCGACAGCCACATGGTTAGTGCT -CAGACCCCAGCCAGCGTTATACATTGTTTTCCCTCAAAAGAATGATCTAAAGCTAACTTT -TTTTATTTCTTTGCTTGCTCCAAGATGTCCATGTAGCATCGTTCTTCTCCATCCACCGGC -CGATCTCAGTGACGACCACTGTCCCCCCAACATCGACTACCGATGCCTTCGAAGCGATAT -TCTCGTCAAAGCGGCCGCTAAAGAACGAACCGCAGGACGTCATCTTCACCCTTTCCTCGG -CCGTGCAGTCGATGGAAACTGGCCCTCAGGGTATGTCAGAATCCGAAGACCAGTTCCGAA -GCTTCAGCGAGCAGGACGGTGAACTGCGCATGCTGGATGGTGCCGACGCCAAGATGTCCG -TGGAGGAATTCACCCGTCGTCTTCGCCCATTCCAGCCTCCTCCCCCGCCCGTCCCCATGG -ACATGTCCGCTGCCGAGACGGCCGAGTCCACCGATGCTCAGATCAACAACGAGTACCAGA -CATACTCGACCGTACTGACTATCCGCGAGGCTCGCCACGCTGATGGCCGAAAGACCTATG -AGGCGCACACTGGACCTTTCGTTCGCAGTGGAGAGATGGAAGACCCATCTGATATGCGCG -ACGAGATCTCCATCGAGGCGCCCTCTGATTCCGCCCCAGGCATGACATACATGGAGCGTC -TGCGCAACAACCACACCATGCATGCCATCAGCACCATGAGACGGCGCAAGGCTAAGATGA -AGAAGCACAAGTTCAAGAAGCTGATGAAGAGAACGCGTACCCTGAGACGGAAACTCGACA -AGTAGACTCGTGTTTGAAGTTTCTTCATTCATCTTTGTTCTTGCTTTTTCAGTTGCATCT -TGCCTGCGCCTGTGCATTTTGGGTCGAGTCTAGTCTTGTGAGAGATCAGGGCATATATCC -TTGTTTTCATACATTTTTTTTATTACTCTCTCGTTCTTCCCTACCTATTCTCTTTCATTT -ATACTCGGCCAAATATTCACATATACCTCTTCTCCCTTGATATTTTTTACTTCGAACTGC -TATACTTGCGCAGAAGTAAATGCAGTCTTTGGTTTCTGTGTTATGTGGGCCCTAGCTCCC -CAATGCATTGCGAATAATTGCGGATAACCCAATAGATCTTGATTACTTGATTTTCGTTAT -GGTGCCGCCCAGTGGCTGCTTTCGCCGACCTGAGTCTGTCTTATCCCAGTAAATAATACT -ATGCGGGTAGCAATAGAGCAGGGGACCTCTCGATTTGAGATGAGCTCTTGTAAGCTTGCC -CACAATATTCCAATGCCTGGGAGTATCTATATCCTAAGTAGTCAATGAGTAACAATACCG -CGAGTCTGAATGCACAGAATAGGATTAACTGTTCTACCGAACCGAGTCCAAGGTGACGAA -AGCCACTTGATCATACAATTTAGTAATGTCGAGATGGAAAAGCTGATGTCCACAAGACAA -AGAAGTCAAGAAACAAAAAAACTCCTTATAAAAGATCCTGCCTAACCACCATAATGGGAG -AGAGGACGGAGACTCAAGCCCCGAATCAGGGAATTTCATATAAAGTACAGAGTCCAGCGC -GGTTTCATGCAATGTAAAGCCAAAAAAAATCCAATATCTACCTACCCGCACCTGTCCACT -GAGTCAAGACGAATGCTTGAACAGTCCGATTCTACATTCACTCAGTCATCGAATCTTCAC -TCAAATAGGCTGAAATCGATTTATCATTTCCGATATCATCCAATCCTGCATCCGCACCAG -CTTGATCATGTCAGATTCCCCGGCCGAAGAGACATATTGGTTTGCGTCGGGGAAATGTTG -AGAATGCATATCTAGGAATAAACCAAGAATCGCGTGGTTGCAGTTCATGGGGCGATTCGA -GGTTCGATTCCCTTAATGCGGCGGGAGACCTCATGTTTAGCAGCCTCCTTTCGAGCGAGA -GTTCATGAGAAGAGCAACAATCAACCCGTACAGACCTGCGAGGTGAGGAGGGTGGTTAGA -TGCGATTATACATATTGGCATGAGGAGAAGAGTCACGACTTACCCAAGACTTCGGCGAAA -ATGAGAATCAGAATCATTCCAACATACAGTCTAGGCTGTTGAGCCGTTCCACGAACACCC -GCGTCACCAACAATACCAATAGCGAAACTGCAATCTGAGTTAGGAACAAGGCTCTAAGAC -AATGGGAGACACCCCGGTGTCGCAAACTTACCCCGcagccaaaccagccagaccgacaga -cagacCGGCACCGAGCTGGATGAAACCAGTATAGAGAGGAAGCTTCTGTGTTAGGTCGTT -GGCGACCAGGACCGACACGACAAGACCGTAGATACCAATGATACCCGCCATGACAATGGG -GACAATGTCTGGAAACCAGGTCAGCAAATTGTTGCTATATCTTCTTGCACGAGACTAGGA -ACATCTCCTTGCCAGATTGGCAGAGCTCTTGTTGGGATCGACCTACTCTTGACGATCAGG -TCGGGGCGGAGGACAGCCATGGAGCAAACTCCGACACCGGCCTTAGCAGTTCCATATGCG -GCACCGAAGCAGGTGAAAACGATAGCGGATGTGCAGCCTAGCGCACCGAAGAAGGGCTGA -GGGAAGAAAAATTAGTTGTTGTTTCAATGCATTGTGAGCAGGTGGATGACGCAGAGGGGC -CGAGACACGGGCGATTGGCTAGTCGTGCTCGGTTGTTGGGATCTAGCATATAGTAAGACT -CACCGCATAGACGGGGCTGTCAGAATTCAAATGTTAGCAAGGTTGATCCATGATCCTGAG -GAATTGCGAGCTCGATAGCAAGGTGAAGTGTCGATCGGAAGCTGTCAAGCACCCAGGGGT -TCAAGGTCAGGTTGACGACTTACCAAAGTTCGGTGGCCATTTTGATGAAAGACGACCCAA -CAAGGTTCTCGTAAAGTAGTTTGAGCTTTGAGAATGAATAAAAACACCCGAGGTCGGCAA -GCTCAGTTGGATGCGGTCGGGAGGATGGAGAGTCCAGAGGCTTGGAGGTCGACGGCAAAC -ATGCTGGCCTCTCTAAGTCACGTGTAATATCTTGGCATATCCACCGGATAGGCAATGACC -AGTGTATGTGATATGTAGAGATTCAGCTGTGATGGTTTTATTTACTCCGTAGTTGTTAAT -GTATTGCATTATTGTATGTATCATTTTGTTTATACAGTTTCTAGAATAACTCGAGATCCT -GGAATGTTCAGCTTTGTACACGACTCTGATGCGGTTTCTCCTTTGATGGGCTGTGAGTAG -GCAATCGCCTTCTCGGACAATGCACCAAAGGTACGAGGCAGGGAAATGATCCCATTACTG -TCCTCTTCCGTTTCATGTGTCTCAAGTTCTATGGAGTCAGCTACCATTCCAAATTGATAG -CTGATAGGCTCTTTCTGCGAGTCATGATTGCTCGAAATTTTGCCCTGTGCCCCAGTCGGT -TGTACCGACACAAAGAACTGAATATAATCACCTGTAGTAGCCACCTGTGAGAGGAGAGAT -AAATCATCTACAGTGATTCCCTTATCAATAGAAGGGAGTGTATTCAACTGTGTGGTATCA -CGATCGAAAACCTGACTCAGTGCGACAAGATCGTCTTGTTTGGCGGTCCCCGAGAAGGTG -ACCTCGGGATCCAGTGAAAGAAGGCCTTGCTTCTTTCCCCATCCACCCCCACCACTCATG -ACATGGTGCAGTTTCCCGCCTTGGCGCAGAGATTGGGTCAAGCGATCCTGGGTAGACGAA -CCTGGATCGGTCACCTCCAAGTCAGGTTTCTCAACAAGCGCCCATACGGATACACGCTGG -TCAACGATATTATGTTCTGCGATATAGCGGGGAAGCTCCTTCTCGAGTTCGGACGAAGCA -GGTATTGTGTCTGTCGATGTCGCATCCGTTGATTTTGCTAGTTGCCGTAAAATGTTTCCC -ATGCTAGTCATGACTCTTCGCCGTTCAGTGACTGGATGTAATGGCAATTCCAATGAGGTA -TTGATCGCCTGGGAGGTTGCCGTAACCATGCAGCATGTGAGATCGATCGAACTGTCCAGA -GCATATCCGTTTGAGTCTTTGTCCCAAAGCCAGCGCATACCAAAGATTGTTGTTTCCCTC -CCATTGACAAATATAGTGTTCGCCAGGCGCAGTCCAATTTCGTGCGCCGGTCCTTTGCTG -GTTTGACCAATCACGTCGGTTTGGAATGAAAACAAGAGATTGGTCTCTTCACTAGCGGAG -CTTCGAATGAGACGGGGTGGTGCAGCCTTTGCCTGTACGTCTGACTGCCCTACTGCCAGA -AGCGACATCCCTTCAGACTCACTTGAATTTGGACTTTCGGACATAGTGTTTGGGAGTGTG -TCAACCACGGCTGCAACTGCATAGAGAGTATCGCACCCCGGGGAGTTTCCGTACAAGCGA -CTGAGGAATTGCGATAAGAAGGGGCTAGAAGAATCAAGCCAGTGGGCAAAGCTTGGGGTA -ACGAGAAGAAGAGGGATAGAAGCGGCCGCAGCTGATGGGCTCACAGGCCCTGAAGGAAAC -CTGGACAAAATATCATCTGGGGTCTTTCATTTTTTTTTAGCTGTGATACCCAGAGTGCAG -AAAGATCGAGTTTGCTTACTGTCTCTTCCGCTGTCTTTGTGAGCCATGTGTATGCAAAAG -GCTGTCCATCCTTGGCCAGGCTCTGTGTCGCGTACCATCTATTTGCTCGGCCGAGGCTTC -CACGGACTTTGCATGAGCGTAGAAGCTGCTGGTGTCCGCGATTGAGGATCACCCTCATTT -GTTTGAATCGCAATGACATTAAGGTATCAACGTTAAAGCTGGTCCAATTAGGAAAACGCG -GAAGTCATTGCGGAGGATAGTGGGTCAATCGCGGAGCGGAACATCTGTTTTCACCGTGCT -CCATATAGAGACTACCAATACACGAGAACTGTAACGCAACTATGGGCAGCGATTCTTCAT -ATCAGCCTCCCATATGGTTGCCCTAGCTCCATCAAGCATAGGACGCTGAAGGTACGGCAA -ATCTGAAACACATCTATTCGCAGACGGTATGTCGATTAGTACGGACGGTATGGCATTGGA -GCTGAGATGTGTGCTAGACATTGGCTGGGCAGGAGGTATCCTGCACAATTATGGATAGTC -AGGCAGCACATTTTTCTCGTAGTGCTTGTGCTGGAGATGGATTGTTCCATTCAATCCCAA -CGCACTAGACTCCGATTCTCGAAAGGCCAAGGTCTTTCCGCTTACAGGACATTAGAGAAT -CTGGATCACTATTCCAGCGGCCTCAGAAGAACATGCACACGCTGCTGTCCACTCTGAAAA -AGCAGCCCGTACATGAAATCTGCAGTGTGTATGCCTTGCAGTAAATCAGGAACTAGTTTT -ACTCCCTACGGAGTAGCGACTCGCGGAGCAGGGTTAAATACATAGCCCGTCGACCGGTGG -ACCTAATGAACCGTCTATGACCAGGATTTGACCAACGACATTGAGGATGGCTAGCAACAA -CTTTTAATCGGAGTTTGTGGCTCGCCATATACGTCCCGCAGTGGAACTCCTAGTCCACTA -TAATTGATTTTGACTGAAAAACGATGTGCAAACCCGACATCCACGGTTCGAGATCAGTCT -GCTGACTACAAGATATTATGTAGGACTTTGTCCACCAAATTTGGTATAGAATGTATGTAC -ATAGTCCCTCGGATACCTGGATGCTAGACCCTTCGCATCAGCAATAGTCCCCAGCCACTT -TTCTGTTTCAGTGAAGTCATAAGAGGCCAACCACATATCGATACCAGGCGCCTTGGCAGG -TGGGGGGCAAGGGCCATAGGGCTGGCAACCTAGGTTTTAGCGCCACACACTGTCTAGTAC -GGAGTAAGTACTCCGTACTTGGCGGACCTAACCAGTAAAGATTCCCAGCAGAGTTTAGAA -GTGACGATATCTGTAGAGATCGTCATAATCATGAGACTCAGGCTGCAGCATGGGCGCATA -CATGGATACTATGTATGTAGTAACAAAGTACTCTGTACTCATACTGTACAGAGTACTTTG -TACAACATATACGGACTATAAATATCGAAAACAGAACGGGaaagaaaaggaaaaaaaaga -aagaaaaaaaTGCAGGCTAATTCCTAGTGCCAAGCTCCAGGCAGCAAGCAAAAGAATGAA -CGGGATAGGTGAGGAAACCTAAGGGAGTCGCAGCCGCCAGCACAGAAGATCATTATCTTG -ACAACCTTGGACAGGTCTGCAATTGTACTCCGAATGAAGTAGTGATTACCAAGCTAAAAT -TAAAATAGTAAGAAAATTCTAAGAAAATCACTAAACAAAATAAAACAACACATTTTGGTC -ATAGTCTAAGCCACATTTGCAGCCTGTTGGTGCCTTGATTTTACTAAAATGGTCCAGATT -GTGTTTTTTTTCCCTCACAAGCCACAAGTGGCGGCCCATGATGTAATGACCCAAAGCTCA -GGTCAGGTCCCCTTTTAAGACTTCCCTCCCTCCACTTCCCTTCCCATTTCTCACCGTTAA -CATTTCCATCAAATCTGCCCAAAATGCCTTTCACCGCTTCGTAAGTTATCCATTCTAGTC -TTGAATCTCACGTTATTATATCAACCTGTCACTCCGTCACCCTCACTCTCAGCCCAGTTG -CCGGGGCGCCGGGGCGCCGGGTCGCCAGGTCACCAGCGAGACTATGTCCAGTGTACTAAT -ATCGTTGACAGTGACATCTGCAAGATCATCTTTGCCATCATCCTGCCTCCTCTGGGTGTT -TTCCTGGAGCGCGGATGTGGTGCTGATTTCCTGATCAACATCTTATTGACCATTCTCGGA -TGGATCCCCGGTGTTATTCACGCCATGTATGTGTTGACGAGTTTCCAGCCCCCCTTGACA -GCTGCTGACTATATTACAGCTACATTATCTTCAAATACTAGAGATACCCACCTCCCACAC -AACCCTCGCAACCAACATTTTCCAAGACTAATACCCCTTGGAAAGCGATGCATTGCGACC -AATTTCACAGTCCTTACTTACTCTTGTCTTGAGCGTTGAGTAAGGGCCGGTGCGATTCAA -TATTCAGAATGCCGGTGTCAGCGTGATTCCGTGGAGGTCCGGTTGAGACTAGCCTAATGC -AGCCTCCACCACTTGGCATCGGGTCCGACATGAGTCCGGCATGATGTCCATAATTCCCTA -ATCTTTTGTGTGGTCGCATGgttatgttttgttttgcttcttttcctttgtcttgcttGA -TACTGCGGGTTTTGCTTTTTCGTTATGAATGATACCAGCTCATACGGTCTGACTCTGGGG -CTTTGTGTTTACCTATGCTCACCGTTGGCTAGTCTTTTATGATTCAAGGAAATGGGATTT -CTACTACAAACCATCGAGTCACTTTATAATCGTAGAGAGGTATAGAAAATAAATAGCCGA -TCGATTTCTAGGTACCGGGTCCTCCATGTGGGTACGTACCTGGAATCCATTGTAGCCAGA -AAAAGAGCAAATCGAACAAGATTTTTGATCCTAACATACTGAGTCTTGAGTCTTATGGAT -CTCAGCCGCTTTCTCGATTTAATAGCCCAAGTCATTGATGAGAGCGTTGCATGAGGCGGG -TGCAACTGCCCCGTAGGCTGTCGTACAATCACGGTGTTCCGATGGCATGCCAGGTTGGCC -CATCAGCAAGTCCTGAATACATGCCAAACAGGTAACACGCCAGGAAAAATGTGACTTGAC -CACTTGGATCATTGAGCGAGTCGAAGTGTTCCAGCAGCCCTCAAATGGATCATCGTCGGT -ATACcatcatcatcatcatcatcatcatcatcaGGCGGGTAGCCCGCCACCCAAGCAAAA -CCCACGAGATGGAGCGTTGTATTACCCAAATTTGAAAGTCACAAGCCAATCATCCACCTG -GGTTGAGAGATCTGAAACTTAACCCGGCGCCATCCACAACTTGTTCTGCAGCCTAAGCGC -TTTCGATGGCCGGGCGCGTATCAGGGCCGCTTGCAAGGACCACTTCTGACAGGGCGCTTT -ACCCCTACAGCGATTCGTCTCCGGTGTGGGGGATCTTACCATTTCCAATGCCATGCGGTT -TCACTCTAGCGCTTATACAAACATAGCCTCAAGCACTATGTTTCAGAACTGTGTGCAAAT -GCAGATTCTTGGCCCTAACTCTCGATGGGCGTCATGGTCACTTTTGCCTCGCGCCTAGAA -TGGCGTCGCGCTTTGGCTTCTTGTTTAACTAGACTGGACTCCCTCTTTTGGTTGTTTGTT -TGGCATTTACATCTTCATTCTGGCGTTGTATCCTTGTTTCGAATTCACGCCATTTGCTAT -CCAGTTCGCAGACAAATCAACTAATTATGGTAATCACATGTACAAGTTTTCGCGGGTAAT -CACTCCCATGCAAGAAGACACAAGGTTAGTTATGGGAGAAGAGCACATGACAGGTCGTCG -CTCATAGTGAACGTGTACAGGGCTACCGCAGCATGCTCCTCCATCCCATTCACAGGATAC -TCTGCGCGATCCTCCTCTTACTCGCACTCGGCTCTGCCGCCGAAGATACCTCCAGCGATG -CATCGAACTCAGCTCTATACGACATGGTCCCAGACTGTGCGAAGGACTGCGTTGATAGCT -TCATCAAGTCCGAATATACACCTATGGAATGCAAGTCACCGTCCAACATCAAGTGCCTTT -GCCGCACCCAGACAGACAGTGGCTTGACATTGGGAGAAGCAGCTTTGAGCTGTGTTTTGT -CTTTATGCTCACAGACAATATCCGCAAAGTCCAAGGCTTATCACATATGCGACTCGGTGT -CTAAGGCTCTTCCGAAAACACACCAGACGATTACCGCGACGATCTTTTCCGATACAAGTA -CAACGGAGACGAGTGATGCAGAAACTACTCCAGTAGCTTTGATAACTGAACACGGTTCGG -AGGCCTCTGTGACACCATCTATAGTGAcatcggcatcggtatcggcatcggtattggcat -cggcatcgACTCCGACGACAACTACTCCCAGTCAAACATCCGTGATCACATTCTATGATT -CTTCTTCATCTTCGAGGCCCCAAGTGGAGCCAACGTCCTCTACTGAAGAAAGTTCCAGTG -AATCTTCTGAAGCCACGAATAAGAAAGGTGATCATGCTATTACTCCAGCGGCAGTCATTG -GAATGTCAGTAGCATCAGGTTTGGCAGGATCTTTCATCATCGGCGTTGCTGTTTTTTTCT -GCTGCAAGAGATGGCGTCGAAAACAGCGAGAGCAAGCACCACCACACATCTTTGAGATCG -GAGGCGCCATGTCTGAGCCGTCTGACTTTTCGAAACCGATGCCACCACTTCCAACAGACG -GTTTAGGTGTAGGGTCTTCATACAGCCCAAGGAGTGGTCGTGAGACAATGTCACAGCGAC -CGGGTACCTTCCATTCAATGGCCACCGTTCAGTCCCAGTCACGATACACGCCTCAATCCG -CTGCGGCATATCACTCGGGTCAGAGCAAAGAGCCCAAAGATCAAGAACGGATAGGTTTTG -CCATTAGCTCCGACTCAGACTGGGAAACCTCGCCTCGGACACAATCTTCACAGCACAGCA -TAGCACGACTAATACCAGATCCTGCAGCTGGACTATACCCAAAGCCGCTGAAGTGGAGTC -ATCGGCCACCTAGTGGAGAGACCCTCTTTGAAGAAGACGAAAGTCAACAAGCGGCGGCGG -CGGCAGGAATGATTCAAAACAACTCCCCAAGGCTAGGAACCCAGCCAAAACTAGCGGGAC -TGCCGTCCAATCCACGCGCGTTCATAGAGGGATTCCCAGCAGGAAAATTCAAACGAAGAT -CCAGGGCCTCAAGTACCTTAGCGCCTCCATTCAATCCCAACCAAGCCTTCAGAACCTCCT -CATCAAATAGCAGCGCTACACCCAACCCAACCTCCGAAGCCCCACAAACCGCCTACTCCT -TTAACCCAAACACCCTCCTCGCCGCTCCCGTCAACACCACCCAGACCCAGAACCCCAGCC -TGTCACCAGGACGACAGGCCCCTCAGCATTGCCCCGCCCCTTCATCAACAACTCTCTCCC -CAAGCTCAGAGATAGTATCCAGGCCGCGGATCGTACGGGGGAATGACATCAAGCGTGTCC -AGATCCGCAACAGTCCACGACCGCCTAGCGAAGTCATCGCGCCATACTGTCCAGATGACT -TGTGGCTGGAGCGAGGACGCGCCCTCGCGCCGCCAAAATCCCGAGAGCCGTCTGGGGAAT -TGCCGTATCCTTCGGATATGTATTCTGGTGCTGTGCATTATCCCGATAGTCCGAAGAAAA -GAGTCGGCACCGAGTCAAATCGGGTTTCTCCGACTAGTCGGAATTTGACGCCGTCAAGGC -GGGGCGATGATTTGATTCTTCAGGTTGATTGATAGACTCATTTTGATGGGGCTGTGGAAT -TTATGTGAGAATGCGATTTTGTTGTATATTATTACCATGTTCATTGATACGGGTGGAGAG -CTTTGAATTTTGAATTTTGTGAAAACATCTGTATAGAAATTTGATAATCTGCGTTGAAGA -TCGGCTTATCTGGTTTTGATAGCTTTCGTCTCAGCTTTCTTCAATACAAGGTCTCTTTAG -TTCTCAAAAATGAGTTTGGAAATGAGAAAAGGGCAGAAATAGAAACGCGAAAAACGCGTC -AAGCGTATGAGGCCAGGTATTGGCGGGGCTTAGATAAGGACCTGAGGTCTCCCCCCAAAA -GAGTAGAAAACCCATCTGCCCGTGGACTTCTTCTACATCCCAATCTTCAGTGAGTTCGGA -GTTCATCCATTCTTGTTGCCAATGGCTTCAATGCAGACGTTCGTTCAGAGATCTACAATT -CTGCCGGTCTTTCCCCTAGAGCTCTGGGATATCATTATTGAATACCTATACGATGAAGAA -TCTGCCCAACACCTGAGCCGACTTGCTCAATCGTGTGCTACTCTTTATACAAGAATCAAC -TCTAAACTCTATAACCGCGTTCGCCTGTCTAATTCAGAGAGCGGTGCTCGTCTTGCTCTC -ACCATCAAGAAGCATCCAGAGTTAGCACCACTCATTCGTGAGATTCGACACAAGCAAGAT -GCTGGGTCAGAATTTTGGTCTTATCGTCATTTGAAATTCTACGAAACGGCTGCGACTTTG -CCGAATCTGGAGAACTTGAGTTTAAGAAGAAACCCGAGGCCTTTTGATTCTACTCGGTGG -GCTAGTACACATGCTAGAGATGACGCTATGTTGCAATGGGTGAGAAAGATTGCCTCTGGA -TCAGGCAGCGTCAAAGAGTACCGAGAGTTGGGATATGGGCCTACAGGGGAATCTTCGTTC -AGTCCTCCAGATCGCAAGCTTAGTTACATGCAAGACGGCGGGGCCGAGACTTTGTTCGTG -CACGCTTCTCTTTTAGACCCGCCGGGACTCCCCTCTCTGCGTGTTTGTGAGTCTTATAAC -TGACTTTATCTACTAAATCGAAAATCTAATTTCATCAGGTCACATCGGAAGCGACTATAA -TCATGACGAGCACAATCCTAAGATGGATGGCAGATGCCTACCAGTATTCGATGAAGCACT -ATTCAGCCACCCAGGTTTACGGAAACTCTGTGTCACTGGTGCTATTTTTCAACTTTCCCA -GTCTATACTTCCAGTATTAACCACATCGTCGCCTCTGGAGGAACTGACCCTTCTGAACTG -CATCATAAATCCATCGGACCTCGAAACAGCACTCGGGTATCCAAAAGCCTTGAAGCGGTT -CACCTTCAGAGGTCCTAGATCACAGGAGATGGTAGACCCTGTTGGGGAAACCCTATGTTA -TGTTCACTCGACGCTCATGCATGATAGTTCCCTGGAATACATGGATTACGATCTTTACTG -GGGTGGTGATGAGGAGACAGACTTCGGCGAACTAGTTCGCCTCAAGCATCTCACAACAAC -CCTCGCCACACTTACAGGGAGGGAATGCATAGAACTGGATCCAGCTGATGATATTCTGCC -GCCGAATCTGGAGAGCCTAACAATACGATACGACGAAGTGAAGCCATGGCTCCCTTCGGC -TATTTACGAGATGGTGAAAGATAAAAAACTCCCAAAGCTGCGTCGATTTACCTGCGAGAT -ACCGGAAATCATGGAACACCTTCCATCAATCAATGAAGACAAGTCCAATCCTCGAGCCGC -CGAAATATGTCAGGAGGGCAATACCTGGAAAAGCAAGTTTGCGGAGCTGAATGTGGATAT -GTCGATGGTCTCAGTTCCTTACCCGCTTGAAGTTTCTAAATACGACATGTGTTCTTGCGA -ATGCCTGAGCTTCTACCACCGGATGACCTACCATCCGTACAAGCGATTGGCAACTCCTTG -GGAGGAAAACGATTTGGGAGAGGAGGACGTGTTCTTTGATGACTGGGCAGATGTTGATGA -TGAAATTGACCTTGATATCCTCATGGATGATCTTGCGGATGAGTCTGGATCTGACGTGTC -GGCCTGAAGGCTTTACTCTCATACAACCCCCAGGTTTGATGCATCTCTTGTGCCTTGCAC -GAATGCCAATATACAATTGTGGCATCAGTATCTTCGATGTGTGAGAACGAATCGAACTCG -TCGGAAGTAGAAACTAAATCAAAATATCTGAAATCCAGTATATCTTCTAGGGGAATATGT -GATTGGTCCTATGGATCTATCCATTTGTTGTGTATCTAGAATCAATTAAAGACCCCTAGG -AGGATAAGAACTATACAGTGTAACTCAAAGCAGCCCAGTGGAGACGCTGACAGGGGTTTG -CTTCAGATTCTGTCGATCTTCATGTGTCGGCAGATACATCTCTGGATCAAGATCTTTTGC -TTATATCTGCATCCATGGGGGGGCCCTTCTGGCTTATACCAAGAAGCTTACGTAGGGAGC -CAAATGTTCGTCAGGTAAGAAAAACATTAGGTTTCGACTTGGCATGTGAAAAGCTACCAA -CAATGCCCACTTACTCAACTCAACGACACTCGCCTACCTACCGTGATTGTGACCCTCATT -ACTCTTAACAAATACAGACGCACAGACTGCGCCTAGACTCAAGGCTAAAGTCAACGATTC -AGCTTCCGTTCCCATCAAATCACTCGGGTGCAAAGTTCACCACACTGGGCTTCCGCAGAC -AGCCGCGTGTAACGCCAGCCCTAGACCCTACCCTATTCAGATTCCATCCCTTAATCAAGT -GCTGAATTCATAGAGACTGGCAAGCTTGCGGTGCGCAGGATCTTGTGATCAGGCAAAGCC -TTTATCGTGGTTCAGGGTATGCACGCACAGTGGCGCTCTGACCAAGTCTGGCAGGTATCC -CAATTATTAGGAGTTCATCTGAGTTCAGGGCATGAAAATGTTCCTTGCGCTGGGTATTCA -AATAGCCAGACTGGAACTTCTGCTCTTGACCCCTACTTTTTTGTATTCGCAAAGAAGTTT -TCATAGGGCATCCCAAATTATCTGTGGTTGTGGCTCGCCCGAAGTCCTCTCATCTGCATG -TTGGCCCGAGGTCGTATCAGCATGTCCACACACCTGGTCTAGGCCCTACTCTTCTACACT -TTAAACCCAGATTATATCATGATCTTTATGCGGTATAACGCCATAAATCTGTTCTTAATT -TGCTGGGTTTGATATCCGTTCCTGATGAGGTTCAGGCTCATGACGTCAATGGCCCTGATG -CCTTGGATAATCCCTCGGACGCTTTTTTAATGGAACTCCGCGCTTTTTACTTAACCTGTT -GAGCACAAAAGCAGGTGCAAAGGTTGAACTAGCCAGTCGACTCCGAAAACCTGCCAATCA -TTGGCTATTTAACTCGGAAATGTTCCTACTTCAATTTCCAGGAACTCCACTTTCTTCCAC -ATCTCTTATCAAATCAACGTCATCAACCCGATGCACCGGTTTCCATGCGCCAAGGATGTG -CCGTTGATGATCCTTTTTGATTTCAGATTCGGGCCTGTCTTAACCCGGAATGACGAGGTC -TGAGCCTGATGAAAGGGGTGGGACCCCATTTTGGAGGGGAAGAGGGTCGACCCAAAAACC -CACTCCGTTTGGGCCTTTCTATATGAACTGGATTTCCCTCACCATTTCGATTCCATTTGC -ACTTTATCTGCTTTAATTCTGCATATATAAAATACCTGCCCTGCACATCGCAAAGATCTC -TATCGCTTGTGTCCCAGTTAAGAGACAAGAATTATACATCCTCATCACCGCAGTGATCAC -CGCGTACCCGCACGCCCCACACCCTTCATCATGGCCCCAAATAATGATAGATTCGGACTC -CAAAATCTCCGACTTGACCTGGACAACGCTCGGGCCGATCTTTATGGCCCCAATCTCATT -GTCCCTAAGGATTTGCTGTCTTCAAATTCTTCTACTTCTGACCATATCTTCGACTCCGAT -ATGACCAAGCCAGAAGTCTCATCAACGAATACCTCACCAATCAACACACCTATGCCAAAC -CCTCCTAAGGTTACTAGTGATGACTTTGCACTAGCCTTCGACATCGATGGCGTTCTCATC -AAGGGTGGCGAGCCCATTCCTTCAGCCGTTGATGCCATGAAGTATATCAACGGCGAAAAT -CCCTACGGAGTCAGAGTGTAAGCTTGTACCCTATGTGTTGGTTCCTTTATGATTCCTTCC -CGGCATGAATTCCATATTACCCCTCACCCCGGGGTGAGGGGTCAAAATTGCCTAGATGAA -TCTGAAATATGAATCTAACAGCAATATACTAGTCCTTACATTTTCTTGACCAACGGCGGC -GGCAAGACAGAGAAAGAGCGATGCCTCGATCTCAGCAGACAACTCGATCTGGACGTCGAC -CCTGGCCAGTTTATCTGTGGCCACACGCCTATGCGCGAAATGGCAGAGCGATACCACACC -GTCCTGGTAGTTGGTGGCGAGGGCGAGAAATGCCGCGTTGTGGCTGAAGGATACGGATTC -AAGGATGTCATCACACCGGGAGATATCATCAAGACAAGACACGATACTACACCGTTCCGG -ACGTTGACGGATGAGGAGCACGATAATTCCCGACTACTTGATCTAGACAACGTCCGCATC -GAAGCCATTTTCGTCTTCGCAGATAGTCGCGACTGGGCCGGCGACCAGCAGATCATTTTG -GATTGTCTCATGACCAAAGACGGCTGGTTGAATACACGGTCCGAGACCTTTACCGAAGGT -CCACCAGTGTTTTTCTCACATACTGATGTGGTTTGGTCAACATCGCATGAACATTCACGT -CTTGGAATGGGAGCGCTGCGCGCTTCCCTTGAGGCCGTTTACACGGCTCTCACAGGCAAA -GACTTGAACACTATTGCCTTCGGTAAGCCGCAAATCGGAACCTATGAATTTGCCACCCGG -CTTTTGCAAAAGTGGCGAAAGGACTCTTGTGGAATCGACAAGTCTCCGTCTACCGTGTAT -TTCATCGGCGATACTCCGGAGTCCGACATCCGCGGCACAAATGAATTCAATAAGACCACG -GAAAATGACTGGTTCTCGATTTTGGTTAAGACTGGAGTCTACCAGGATGGTGCTGTTCCG -CGCTATCCACCTAGGAAGATTTGCGATAATGTTTTCGATGCCGTCAAATTTGCTATCGAG -CGCGAGCACCGAAAGACTTCCAAGGGCACAACCGTTTCCGAGCTTGATTTTGACACCAGC -CAAGAAGTTCGCAAATAACCAAATTCCGGTATTATGTGATCCTCTTTCGGCCACGGCTGC -ACTCTCAATCGATTTCCCATCAGTCTAGTCTATTCCTACGATTATTTTAATGGCCTGTTT -GATATGATTCCCAGTTCTTCTGAACTTATACAATTTGTCGCGATTCATGGCATAGGCCTA -CTTTTACATATACAGTTCCAATAAGGGGTATATATTTTCTAGTACCACCAGAACCCATGA -ATGAATTAAGAAATGTTCTACAAGAGCAATCCGAGCCAGGCCTTGTGTAGTGTTCCTAGT -ATTTTGAGATCCCATTTATTAGATATTTTCTCTCTTGTAATGGACTTTGAATGTTCATAA -CTAATTGGTATCCACGGGGAGATAAGTAAATAACTAAATAGTTAACTAGTTAGTTAACGT -AGATATCAGAAGCCTGAATACTGAGGCGCTCCGCCCTGTGGTTGACATGACCCACCATAC -GGGAAAGTCCTCATAAAAACAGAATCTCTCCTTGAACAATATATCCAACTTCCTTAGCGC -AAAAGCAGCGCCAGGTAACAAGAATGGTACGAACCCGCTGGATATATCCCGATCTACCCT -TACTCATGCGCCAGCCTAGATCCCTAGGACAGAGGCAGTGTACCTTGATGATGCCACGCT -GCGGACATTGACCACGGAGGTTCTATCCTCCCAGTCAATACCTTCACTCTCCGAAGAAGA -AAAAAGCCTAGCCAAAAACGTGCCCACGGAATATAATGCAATCATCATGCACCGCACTAT -ATTTTATGCACAAGGCGGCGGACAGCCCAGCGATACCGGCGCAATTGGACCAGTAGATCA -AGATCCCACATTTGAAGTCTCACTGGTACGCAAGACGCCCGATGGACGGTTCTTGCATTT -TGGAAAATTCCTAGATGCTGCCTCTCCCTTCGTCACAGGCCAGTCTGTCGTGCAGAAGGT -CGATGACTCAAAACGCAACTATCATTCCCGGCTCCATACAGCTGGCCACATTGTTGGGCT -AGCCATGCAGCTTCTGATGCCGGATAAGAAAAAGGTTAAGGCGAATCATTTCCCGAGGGA -GGCTTCCATGGAGTATGAGGGGCTACTGTATAATGAGCACAAGCCGGTGATCCAGGAGAA -GGTCGATGAGCTTGTACGGTTGGATCTGCCTATTCTTATTTCCTGGCTACAGGGTGTCGT -GCAGGTTGGCGATGGGGAAGGTCCTGAAGAGGGTTCCCATAATGGTCGTACTCGGATTGC -AAGTATAGGTGGGCTCGATCACAATCCTTGTGGTGGTACTCATGTGGCGAGGACGAGCTT -GGTTGGGTCTGTTGTCATTCGCAAGATCAGTCGGCAAAAGGGGATCAGTCGGGTGTCTTA -TGATGTTACTCCAGGAATTGAGGCATAGTTTTGTATTTGCTTGTCATTATGGTGATATGA -AATATCCGAAAATTAGTTAGTGGAGTGCTGAGACTATAGATACATGCTCTAACGCTGGAG -AAAGGCTTGATATTTAATCACGAAATCATAACGCGAACTCATTGTCTAGCCAATGCTTTA -TCACCATCTTGCCCTCTCGGAAACCATCTTTCTCGGCCCGGACGATGATAGCTGCGTGGC -TGTGGTGGTGCGTCTGTATGTGCACGGGATATTTCCGAATTCCAACAGAATTGAGGGAGT -CGATTATGCCAAATTGCTCGTCGGTCAATGCAAGCTTGGGAGAATAGGCTGAAACCTCCA -TCGTTGATGAACGTCTATCCAATGCAGACTCAACGTCGGCATCGTTTTGAGCAGACTTCG -AGACAGACAATCTAGCGGACTTCTCGGGCGCTGTGTCAGATACTTCAGTGTCGCTGTTGG -ACAAATAAGCTGGTTCTTGAATAGCATTGACATTCTCAAAGGCTTGCTCCACGGCATGCT -GAACGTCTCGGATCATAACCGGTACTCTGTATCTTCCAAAGCGCTTGCCATTCTCGCCAA -CTTCATGCATTCGAATGCGTTTCGAGCTGCGAAATGTCTGGATCCCGGCATTTATCAAGA -ATAGTGTAGAAGCCAGAGGCACGAGAACAACGAGTAGTAGCGACAGAGGTATCCAGAGGG -TTGCCGATTTCACTTTCTTCCAGAATCGCGCTCTTATGGTTTCAGGTTTTCTCTCCGCCG -GAGGAAGCACATGAAGGTCGGGATCGATAATGACATCTTCATAACCCTCGACGTAGTTGA -TATTCAGGTCTTCCAGATCCCGGAATGGGTCCACTTTGGAGAGACCGGAGGTATAGAAGA -GAACTGAGCGGTCATTGACAATATTCGCATAAACACTCCGCTGTCGGAACTTCTTCAATC -CAGTCATGAAGATGCTCCCCGGATCAGCCAGAACACCGAGCAGTGGTCGCCCAGTATCTC -GGAAGGAATCAACCAGCCACATTTGACGGCCAGATGCCGAGATCGTCCTTGGCCCTAGAA -CATTGAAGATTTGATCCTTGTACCCTTTCAATGGGGCTCTCACTCCAACATGTGGCGTGG -CAAAGGTTGTGAAGTTCATGGGTTCTAGCTTATCTAGCCAGCCCCGCGCATTGAGCAAGC -CGATCGCATAGCGCGCGAGCAGACCACCAAGCGAATATCCAATAATACTGATTTTTGTGA -TCTTTCGTCCCTTCTCGGCTAGGGATTCAAGCGTCTCCTCAACCTCGTGCACTAATCGCT -CTCCACCCAACTCGATTCCATCGTAGGTATAGTTGCCACTATTTGCTTTCGGGCAGAAGA -CGTAGAGATGGCTCCCATATCTATCTCGGAGTGAGGATGCGATATAATCGAGGTGCGAAG -GATTTCCCCAAAGTCTGAAGATTTGGCGCATTAGCGGGCTTATCAAGACGTCATTGGAAA -GGGGACACTAGCATACCCGTGAACCAAAACACAAAGGTGGTCGGCTTTGGAGATATCCGG -TAGTGTTTTGCTTGTTGACTCCATCTTTGAGACCGAAGTTTATTCCAGAAGAAAAGAAAA -AGGAGCCTACTCAAGGAGTATTTAACCTGCGGGTGTCTGTTTGCCCGAAGGCCGCGGATC -TGCGGGCGTCAGCCTGGAGCTATCGGCGATAAGGGACTAGGCGAATTCTCGAAGTACAAA -TTAAATTTATATATAGATCTGTACATAGATCTGTAAGTAGACATGTGTTTGCTCGTCGGA -GATCATGGACTTCTCAATCATATCGAGCAGGGCCATGAATTAATAGGAAACAACCATCAG -GTGAACCAGAGCATGTGTTTCTGCCTGAAATATCAGGCATGTGTTTGTCTTCTTTGCATC -CTTAACTTATCACTTCACTGAACAGTTTCCAATAATGTTGTCCCCTAGTGATGTCAATCT -GATCTCCTAATCCCTTCAAATCTACGATTTCTGTCTTAAGCTCCAGTCAATTTGTTTGGG -ATTACACAGTTTAGCTCTATTTTAATGGTCTTCGCCACAAGCAAGCCCTGAAGGATGCGA -TAGCGTTACTTGCAATATTCGTTTCGGTCTATTCAGTGTACAAAACAACCACAGAAGGGT -CTATTGGGACGATAGCTATACATATTCATTATGTATTTCTAAGATCTGTGATTGATTTTG -ACTGGTGGCTTTTACAGTTTACACGTGGATATTTACACCTTAACCTGTCTTCTCATCTGG -TCATCATTCATCATGTTCACATCGCTCGGCCTGTTTGGGTCAGTTTCATGCCCGCGAGGC -AAGGATTGCAGTCTTCTTAAATGCATATTCTCTCATGATAATGTGCCGATTCCAGCTGAG -GCTCAGCCTCTGAAGATTCCCGCGTCAGGTGACGATGGCCCTCCCTCTAAACGGGTCAAA -GTGGAAAGCGTACAGTTGCCAGGGTCCTCCATCCCATCTTGCAACTTAACACAATCGTCC -CTCGAAATGGGTCGGCGCTCATCGAAACCAGTCGCCCCTAAGCAAAATCAAAATACACAA -GACAGCTCAAGAGCTCAAGAAGCCACCCAGCCCACCTCAAGCCATGGCCCACCAAACCCT -TCTGGTAGTAAGCCTGCGAAGCACATCAACACCGCATCCGCTCCATCTTCAGCACAGCTT -CCCCCACGCAAAGCACCAAAAGAAACTTTGAATCCTCGCATGCTTGCTCATGCACCGGCA -ACTCACGGCACTCGGACTGCTATTCTCCAGAAGCTGCACTCCGCCATGACTGCTCTCAAT -GAAAAGCTACGAAAGGACAAAAATAGCTCGAATAGATGTTTCGTCCTAACCCCCGATGAA -ATCATCACAATGGCTCTGGATGAGGAAGAAAAGTTCGCGAGGGACAGTCCAAGCGTTTAC -GGCAATGTTATCAGGCTTCGAATCGTGAGAGTCACAAAGATGGGCATTGATGAATGGATC -AAGGAGGTGATGTCTCATCTGAATACACGATATTATAAGATAAACCCCGTCCAAAAGCCG -CAGGCGATACCCAGGCCGATCAACACAGGCCTCACTCCTGCAGAAGAAATCACAGTTGTA -TCCCAGCTAGTCACACCCCTCACTGGCCTCGAAGAACATGGATATGTCACAACACAGCCT -ACGAACGCAGAGATAGAAACCGCAAAGCGAGGTGTTGATGGGTCCAAGGGCTGGGAGAAG -TGTGACAGATGCAGTCAACGATTCCAGGTATTTCCAGGCCGCCGAGAGGATGGTGCCCTG -GCCAGCGGTGGCCACTGTGACTATCATCCAGGACGGCCCGTTTACCCTCAGCGCAAGAAA -ACAGACCATGTTACCGGTCCCTCCCAGCCATACTTCCCCTGCTGCGGCGAAGCATTGGGG -ACATCTACAGGCTGCACCAGAGCCGAGACACATGTCTTCAAGGTCTCAGAGACCAAGCGA -CTTGCATCCATTCTCCAATTCAAGACAACTCCATCGCAGCCAGGAAAGGGACCACTGGAA -CCAATCTCTATTGATTGCGAGATGGGCTACACGACATTGGGTCTCGAATTGATTCGTCTC -ACTGCGGTCAGCTGGCCTAAGGGGTCTGATTTGCTTGATGTGTTGGTCCGGCCTATGGGC -GAGGTATTAGACCTCAACACGCGCTTCTCCGGTGTTACGCCACAACACTATGCCTCCGCC -AAGCCATATGGCACTCCAATTCCCAGCATATCTTCACCGTCAATAGACGGGGAAAAGAAA -ATCAACCCACCTTTGCAACTTGTTCAGTCACCCACAGAGGCGCGCGATTTGCTCCTTCAG -CTCCTTCAGCCTGAGACACCTCTCATCGGCCATGCAATTGACAATGACCTCAACGCCTGC -CGTATCATCCACCCTACCGTTATTGACACCGTCCTCCTGTATCCCCACCCCAAGGGTCTT -CCCATTCGTATGGGTCTCAAGGCTCTTGCTCAAAAGCACCTTAGCCGTGACATCCAGGTC -GGGGATAATGGGCACGACTCAAAGGAGGATTCGATTGCCACGGGTGATCTTGTAAGAGTC -AAAGTGGGAGAGAAATGGAAACTGTTGAAGAAGGACGGACACAAAATCGAGGGGGGAAAG -TTGGTTCATCCGAGTGGCCGAGTATCGACAGCCACATCTTCATGGACAACGGTCCACAAA -AGAAAGTTAAGCAGATGATCGTGGGGATGTATTCCTCACTCCCATTTGCCATGATATGAT -TTTCACTTTTGTATACGATACCCATGTATGTGCACTTAACGGACGAGAGTTTACAATCTA -CATTATTGCATAGGAATAATATGAGGGTATAAACTTTTTTAGTTTTATTCGTGGCCTCAT -AGACCAATCTAATCCCCCCATGGCTCTCACTCTCAATCCAAACAGACGTTCCGAGCGCGA -TACACAGACTAGAAGCGATCGATCAACGGGGCAAAGAAACAAGACATGTCAGGAGCGACA -CGCTTATTCCTCATCCTCCTCAGCCTCGTCGTTGACGACGTTGTAGAAGCGGAGCTCGTA -GACACCCTTGGAGGTGGAGACGACACGGAGCCAGTCACGGAGCTGCTGCTTCTTGAGGTA -CTTCTTGGTGCTGATTTGCAAAAAAGGCAATGGTCAACAAAACTGCAGTCGAGCATATCG -GGAGTCACCCCGGGGAATAGATGGAACTCACAGGTACTTCAGGTAGCGGCCGGAGAAGGG -GATGTGGGCAACAACCTCGATCTTGCCATCGCCGGACTGGGAGATGACGACGTTCTCGCC -AAGGTTACCGACACGGCCCTCGACCTTGATGCGGTCGTGGAGGAACTTCTCGAAGGCGGA -GACATCGAAGATCTTGTCGCTAGCGGGCTGGGAAGCGTTGATAACGAACTTCTGGGTAAC -CTTCTGGGCCTTGCGACCACGAGCCTATACCATGTAGAATCGAATTAGCCTACTGCTCCA -TAATGTCGAGCCCCGGTGGTAATCTCGCGACTTCGTCTTCAACATCAGAGACTCGATAGA -TTTTTCAATACCGAGTTTCACAGTGTTGTCGAAGACGAGCTCGCAGAGACTCCGGTGGGG -GGAACAGCAAAGCGATTTGGACTTCAACGTACAGCAGCGGGAGCCATTGTTAATGGATTT -GGGGGGGATGGATTTTGGAGAGTTGGGAAAGGTCGCAAAGAGGTGAATATCCTGCGCCGA -CGAGAGAGTGGTCAAGACAACTTCCTAAATTTCACAGTGGAGCCGATTTGGTTATCAAAA -ATTCACGTCCAGAGGTGTGTCGCACTAGTCTATATTGGGACATAGGGTAGGGCAAGATCG -GGTGAGTCAGCTGGTTGAGTCAGCACATCGCTTAGGCTCGGAGCTACCTGCTATCTCCGA -TTGGCTCGTCTAATCGCGTGACTGGCATAGACCCACCGCCCTCAGGGTATAAGTAAACCC -CTCTGCCCTCATGGATTCCTCTTCTTTTCTTTCTTCTTCAGCTCTCTTTGCCTATTGGCT -TAGATCAAGTGTAGTATCTGTTCTTTTCAGTTTAATCTCTGAAAGTTTCCGTCCAGGAAA -CACTTTGATTATTCCTATTTTTGGACCCCGCGACTTGGCCCCTGTGCTTGCGCATGCCTC -GTCGCACAGTGTCATTGGGCTCACACTACCCCCGATCGACGCGAACTCCTTTTTCATCAA -CTGTTCGGCTTGTCCGGGCGCTTGGAGAAGATGCCTTATTCTTGAAATCTAGAGTCTTGA -AAATGTAAATATTCATCAACAGTTCGATCCTAGCCTGTTTTCGATTTCTTTTCGTCATAA -CCCCGAAGATCACTGGTGATTGTAGATTACATGTGTCGTACGATAAACTTCGTTGCATAC -TCTTATGAGAGCTTGGATAAGATGCCTTCGACAACTTCATCGCTTGATGTCTCTAGGACG -ATATATCTACTGTGTGTCTTTGAGGAATGGACCAAGCATCTTAGACAACCCGCAACTACT -TCGTACTGTGTACTCCGTATAAAATAGCTAGATGGGTCACATGACGTGACAGCTTCGATC -GCGGGGGGCCAATCGATAAGCCTTCCATTTTTTTCCATTCCGCATACATACTCCAAGGTA -TTGACGCAAAACCAGCCATCGCAATGGGTTCTATCGTCCTCCCTCACTTGCAAACAGCCT -GGCATGTTGACCAGGCTATCCTTTCCGAAGAAGAACGCCTGGTGGTGATACGATTCGGGC -GAGACCACGATCCCGACTGTGAGTGCCACCCGCATCCATTCGCAGGGTCCCAGACTAAAC -CACATTCCCAGGTATCCGCCAAGATGAGGTCCTCTACAAAATCGCTGAGCGGGTAAAGAG -TAAGTGACCAAGATTAAAACCCCCAGCAAACCGACCAGCGGCTAACAGGGCTACCTCCAG -ACTTCGCCGTCATCTACCTATGCGACCTCGACCAGGTGCCGGACTTCAACCAGATGTACG -AGCTTTACGATCGGATGACAATCATGTTCTTCTACCGCAACAAGCATATGATGTGCGATT -TCGGCACGGGAAACAACAACAAGTTGAACTGGGTGCTGGAGGACAAGCAGGAGTTGATCG -ACATCATCGAAACGATATACAAGGGCGCCAAGAAGGGCCGTGGTCTGGTGGTCAGCCCCA -AAGGTACAAGCAATATCCACTCTGGAATATGAATACCTTGCGCTAACAGCCGGCTTTTAC -AGATTACTCGACCAGATACCGGTACTAAGCATTGCCTCTCTCGCCGCGTGATCACACTGG -CACCGGCCAGTGTGTATCACTTTCTTACGACATTTACGACGAGCATAGACTTGGTCTGGC -AATAATACAACCTGCAAATATGTTCATGGTTCTGGAGTTTTTTTTTTTGTTTCCGGGTAG -GAGTCGGTTTTGCAGTCGTTTCGACTGGGGTGACCACAGGTCTTATACAAGCCTCATCAT -TTGAGATCCAAATCTCTTCATAAGCAACTGGGAACACTCATAAGCTTTATCAAACATTCC -GTGGCCGGTAGCTGTAGTCTGGTGTCGTAAATCATAATCACATGGCACCTTATGACCTTA -ACTTGGAGGATATTTCATTTTGCGAAAGTAGAGTATGTTCCAAGAAGCGATATCATAGCT -TTTCATATGACTTTCAGTCTTGAAAAATAATTGTGGCATGTTGCGCAAGCTATCGTAATT -TTTTTTTAACATGGATTTTGACGTAACGCACTTCGAACACCACATCTCGCAGATCTGCCT -TTGATTCTCTCAAATACCTATCTGCAACCACAACCCAGTAACAAAATGATTGACCAACGT -ATTTTCGAGAACCTCCAGACGAAGATCGACGAGGAGAGCACTGTCCGGGATGTAAGTCAT -ACCAACGCGTCCAGAGCTTGGTGAATTCTTTGCGCGCCAAGCCTACACCGGAACAAGGCC -CTCGAGCCAAAAACATCGACGAATAGCTAACCCTTCTGTGTAGGAGCTACATGAGATTGT -CCAGACACTCGCTAGAAGAGGTATGCTTTGCAATTCGTGACTCCGACCAGGATTTTCAGG -ATTTCTGCCCTTCCGGGGACGGCGGATTGGAGTTGCAGGCTGAACATGAGGGCTGATGTT -GGTTGATACAGGACGTTCCACACAGGCTATCCTGTCGCGAGCTCATTCCACTCCCGCTGA -TCAGCGTAAGTATCCAGTATCTATTGCAAGCAGTCTGATGGGAGTATTGGGATGTAATTC -TATTCGATCTTTGGCCTCACTGCCTGATCACTGCCCTTTtctctctctctctctctctct -ctctctctctctctctTCTTCCCCGTATAACCTTCAAATCCCTTCAAATTTGCCCCGGGC -AGCATTCAGCACTATTCCTAAATGTCTCATGGATCATCTTGCTGACTTCTACAATAGTGA -GGCCTGTGCTTGACGATGTTACTAAGGAAATCCTTGCCCAGAAGCAAGAGGTTGCTCGTC -TGAAGGCCGTGGCAGATCAACATCCCTTTTACAAGTACAACGGGCTATGGACCCGTGAGC -TGCAAAATCTGGTTTGCTTTCTGCATTCAACGTCTGAAGACTTCTGCTGAATAAATCTAT -CAGGTCGCCTCTATTGAGCTTTGCGCATGGCTGGGAGGGCTCCAAGAGCACAAGGGACCC -AGCTCAACATCTTTCATGACAATCGAGGACGTGGGCAAGTTCCTGGACAGTACGTTTCTG -TCTCCCTATCTATAATAAGCACCACGGGTGCTGTATGCAAAGTCAATTTACAGTACTGAG -TGGCATTCTATCATTAGTTCCTGTCAACCTCAAAGAGCAGGATGCCTTCCACTTGACGAT -TGAGGAGTACTTACTGGCACTGATTGCGATGGTCGAGGAACTGGTTCGTTTCCCGCAATC -TCAGAGCATTTCTGTCTGGCTGCAGCTAATTTGTTCCCAGGCTCGCTTAGCCGTCAACTC -AGTCACCCTGGGTGACTACACCCGTCCCATGCAGATTGGCAACTTTGTCAAGGAATTGTT -CGCTGGATTCCAGCTCTTGAATTTGAAGAACGATATCCTACGCAAGCGCAGCGATGGTAT -CAAATACAGCGTATGTTGAGTCCATTGCAACGCCTACTTCGCTGTTTTGAATTTCTAATT -TATCTGGAACTACAGGTCAAGAAGGTCGAGGATGTGGTCTATGATCTGTCGTTAAGGAAC -CTGGTCCCCAAGGGTGGTTCCTCTGCTTGAATGGGGGTATCAAGGACTCCCGGGCTTACC -GAATCGAGCCTCATGTTATACGGAAACGGAAATAGATCCAAGAACTGAGCAAGCGACTGT -TGCAAATTTGGTATCAATGGGCTTTGATATTGGAACATGAATTACTATTACCACTCGGTA -TAACTCCCATCGTAAACCAAAATCACAAACCGAAAAGGAGGAATATGTCACAAGCCGAGG -CACCGGGATATAGATACAGAAAAACAGATGAAGGTCTCACCATAATGGTTCAGAAAATAA -CACCTCAAGCACAGCCTTACGACAACTTAGCCTGCTCAGCTCCAGCCTCACTCTTCTTCA -GAGAAGCAAGAGTGCCCTCTTTCTTCGCCTTCTCGATCTCCTGAACGATGGTGTCCTGAA -GTCCTTCGGGCTTGGTAAGGGAAGCCCAGCGGCCTACAACCTTGCCATCGGCGGAGACGA -GGAACTTCTCAAAGTTCCACTTGACACGCTTCATGCCCAGAAGCCCAGGCTGTTGCTCCT -TCAAGAAAGTCCAGACCGGTGCAGCGTTAGCGCCGTTAACATCCAGTTTTCCGAGCACAG -GGAATGTGACTCCGTAGTTGACCTGACAGAAAGACTGGATTTCATCATCCGAGCCAGGGT -CTTGGCTGCCGAACTGGTTGCATGGGAATCCGATGACGGTGAAGTCTTCGGGATACTGGG -CCTTCAGGTTCTGGTAGAGCTTTTCCAGGCCTTCAAACTGGGGAGTGAAGCCACATTTAG -AGGCAGTGTTCACGACTAGGACGACCTTGCCGTTCAACGAGGAGAGGGGGAAGGGTGCGC -CCTTTTCTGTTCGCATGCGTAGTTTGGTTAGCGATTAGGTACTTTGGGGCTTGTGTATGG -GTGAGGAAGTAGATAGGCAAGGGCGCAGGGGTTATACGTACTATCGACGGGCTCAAAGTC -AAAGAAAGATGTAGCAGACGACATGGTTGTTATTTGACGTGGATGTGCGAAAAGAGGGAA -CTGTCTCTGGAGTGTTCGCGGGGGTGAATACAATGACGTCGTTAACTGGAAAGTTGACCG -GAAAGACTTGGCAGAATGTGATACAGCGGAATTTTGCAAACACGAGAACATGACTCTCTT -CTTGTAGTTCTTCTGATGGTCTAGATGAACCGAGGTCGTGTCACTTTCTTACCAATTGGC -CACGGGCTGGCCACGCGGTATCCAGAGGAGCCGACGGACTTGGAAATCGTCCGAGCCGAT -TGATGATTGGCTTTGATTGGGCTGTTTTCTGATTGGTTCATCGAGCAAACTGGGCAGTGC -ATAACGTTATTCCAAATCATTGCTGACTGTAAATGAAAATCTAGATTCATAAGGGCTTTG -GGTTAGCTCCCAATCATATATTGTAAGACTTAATATTACTTGCGTGTCTGTATTCGTAGA -TAATCAAGGCCCCCATGATAATTACTCGATTAAATAGAGGTGGACCCCAAAACGAAAAAA -ATATATATGTTGAGAACACAACAGTTCCTTAGGCAACCTTTCCTGTCGGTAATTCCACGA -CACGGAAAAATAATAAAAAGGACCAAATAGGAAATACGTACATCTGTATAACCCTGAGAA -CATGCCATTTGGCAGTTAGCTGCCTACTTCCGTCAGCTATGGTAGTCAACACGAAGGTTC -TGGTAGAAAACCCATTGAGAGCCATACAAAATAAATGCTGCCTAAGATTTTCAACATGCG -AAGGCTTCTTGACACTACTTTTAACCATGTGTCACGCGTCAATGACCATGAATGGGATTC -AAGAGCTAGAAACTCAAGACTCCATACCCTAAAGCGGATCAATATCGGCATTAGAATCAC -GTCTTGTAGTTTTGGGGAGAGTGACATTACAATAAGGCTCCAATTAATCTACAGCTACGC -GCAACGTCGAGATCAAGTTAGGAGTATCAACCAAATATGGGGTTATGTATTTCGTTATGT -ATTTCCTAAAACCTGGAAGACTAGGACGGGCAAAGGAAAAAGAGAACATAAATGTGTGGC -AATGCTGTCCCTCTCAGGCTGAGTACCTGGAAGTCACACACGGATACCGAACACTACTTG -AAATTATTGTACATGAAAGTCGATTCATCATAAAACCCCCAAAGGATGACTATATGTAGG -AAGAAATGAAGAAGTAGAAAGTATATCGTAATGACTCAGTATGGCCACAGTAGTCTCTAT -CATACTAGTAGCTCGAGAGACAAGGCTTTAACCGGTACCAACGGTACTGCTGCCGATGGG -AAAACCGCCGTTGGCACGGCTGGGAATGGTCTCGATGGTCTCTCCAAGTGGGGCATTTTC -ATCGCGGTCAAGAGCACTTTCATTGATAATGGCGGGAACAGCGCTTTCAAGTACTACTAA -CAAGGTTGTTGAACTGACGCTTCGCAGGTAAACTGGCACCGGTAACAGGGTTCCAGTAAT -ATCAATAGCACTACCATCCATGCTAACACCGCTTTAAACAAGACTTGTAATGGTATTTAC -AGCCTCACGAAGAGCAGTAGTATAGTCACGCTTAGGACCACTAGTCAGGCCACCAGCGGT -AGAGGTGACAGACACAACAATGTTAGCGCGGTACCCATAACACGCACCACCAACGGCATC -CTGAATGCCAGCAATAGAGCCCTCCACATTGCTGAGGCTTGGGAGAGCGGTACAATGGGC -TAAGCTAGCAAGGGCGAGTGTAATGGTCAATCCAGTGAGCTTCATTATTGAGATAGTTGT -AAAGATAAAAAGGGCTGAAAAGACTAAGTTGGCGAAGAAGCAGTGTACAGACACAAAAAT -ATTGATGAGAAGTAAATGAATAACTAGTCTACAGAAGAGAATGCGGAAATGTCAGATTAA -AAGGCCGAAAATATTATAATTGATTCCCTGGGGTTGTTTCAGCAACATGCTTCGTGCGGT -TATCCGGAGCTCCCTCGGATTTGGGTATATCTCGTGTATTCTCAAATAGAGGAGATCGCA -ATGTGAGAGATTTCGCACGGTCTAGTGGAGCATATGTGACCAAACCAAATAGTTCTTCGT -CAGAATGAGAAAGAATAACACCAGCCAACCATCGGAAAATTTGCGAAACTAAGTGAGGGA -GGTATATGAGCGCATTGGCCAGAGGACTCGGGCAATGGTCGTTATATGCTAGGCTCAAAT -GTGTTGATCACCGACTTCCCAAGGGAGATTGGAGGTAAGTGTGCCCATGTGGAGCATTTG -AACTTGAAAACAAAACAGTTCCATCGGACATCCTTGCGTTGACTCTGCATGAAACCCAGC -CCCTACCGATATTGCAGTATCCACCCTGCCCTTGGTACACTTACTGGTGTATAGAGGGAT -GCTCTGGTTGGAAGTGGAAACTGGAAGCTGTCTGTTCGTTCTTCGTTGGAAAAACGGTCT -CTAGTTAACGAAACTGGTTCACTAGAGATATTCATGAATACCTCATCAGGGCAGACTGTG -ACTACATGTTCTATTTGTCCAGACTTTGAAGGAACCACCAAAAATGACCAACTGAGCATG -GGGCCACAAGCCATTGAGAATGAAAATCACAATTGAGGAGATTCAACATTCGACAAATCA -TAAATAAACTGTTGCATCGAGCCAATTTTCTTCTAGAGCATGAAGTACAGATAGCTCATT -GTTTACACACTCTTTAGTAGATGGGTCACTCGTGTCAAGCACTGTGCACGTAGTGGCTTG -AATATCATCCCCCACTGGTATGACGATGGCCGTCGATAGTACGGATAGTTGAGTAGTACG -GATAGTTAGGTGTTGCAGTGACCACCATTGTCCAATAGTGATGTATCGGCCCCATATAGA -TGAGAGGAGGCAGGGAGGGGGGGCGCACAAGAGCCTTATTGTTGACCTCATTTACTGTCG -TAGCTTGGAGTCCTAACTAGGGCATTGATTGGTAAATTTCAAGACAGTTATTCATCAGAG -ATATGAGGTACTGTCATGTGGAGTTAGGACATAGGATTCAGCGGTGAAGACTAAAGTGCG -AATTCGGAGTTGAGAAAAGTATCAGATGTGAGCAAAGAAGTTTCAGAGTCGATGTTCGAT -TAGTGTTGCTTGTGAAGTTTGTGCTGTTTGTGCAATTTGAGCAGTTTGTCTGTTTATTGT -GGTGTTGATGTTTTGGAATGTGGAAGGCACCGATTAATGCCTGAGGCAACCGGCGTTGTC -ACGTCAACAGCTCCAACTAGTCCTGAATAGGTAAGGCACAACATAACAGTGGCATGGCAG -CTGCCCCAAACATCATGAACTCGAGTTTTGTGTTGATTTCCTTTTCCTCTAATCCACCTG -TACAGGTTTTTCGCATCCCTTTTGGTTTCTCTTTATTCTTAACATCCTATCCTCATAAAC -TTATCTCCCTGAGTTCGATCATGAGCGTTTAGGTCTGACTTTTACCGCGACTATTCTCCT -ACTGGCTTTGCTATCCTCCGAGTCACTGTGGAACAACAGGACCTGTAACGGTCCCATATT -GATCTATCCTCGCTCTTCATCAGCGTTACACAATCACTCCAGTTCTAGATCAACAAACAA -TCCTCAGTATGACTACCGACTTCAATCACGGCGGCCAGATGATCGAGTACATCCAGGGTT -GGTACTATTTTCTTCGGCAACTTGAATGTCTTTCACCCAGAATTGAGAGCTAAGCAAGGC -TATTATTTAGACCGTCTCTACCTTGCGTCTTATGATTCTGCTCCCAGTTCCAGAACCCCG -TTTCCCTTCCATCTGGACGCACCCAAATCCCCAAGCAAGCGCGCACGTGCCCAGCCCGCC -ACTCCGAGCAAGCGCCGTTCTCCCGTGTACTTTACCGTCGATGACACTCTTCTTTACAAT -GCCTTCCACGCTGATTTCGGCCCACTCCACATTGGTCACCTTTACCGGTTTGCCGTGCTT -TTCCATGAGATTCTTGGCGATCCAGCCAACAGCGACCGCCCGGTGGTCTTCTATAGCAAG -ACTGATGCTCGGAGTCGCGCGAATGCGGCCTGCCTTGTTGCATGCTACATGGTCATGATT -CAATCCTGGCCACCTCATTTGGCTCTGGCACCCATCGCACAGGCAGACCCTCCTTACATG -CCTTTCCGTGATGCTGGATACAGCCAGGCGGATTTCATTTTGAACATTCAAGATGTCGTC -TATGGAGTGTGGAAGGCTAAGGAGCAGTCTCTTTGCGGACTTCGCGACTTCAATCTTGAG -GAGTAAGTACCACTGTTTACACTGCCGGAGACTGGATGCTGATTTTCTTCGATAGGTACG -AGAAATTTGAACGAGTTGATATGGGCGATTTCAACTGGATTACACCAGACTTTCTTGCCT -TTGCATCTCCTCAGCAGCAGCCCGTGGCACCCATTCCTGTCAACACGCCCGAGTACAACG -CATTGCCCGCAACAATCTCTGAAATCTCTTCCTCAAAGCTTCCCATGCCATTCAAGAATG -TTCTCGCCCATTTCCACCAGCGCAACGTCGGTCTTGTTGTACGACTGAATTCAGAGCTGT -ACTGCCCGTCTTACTTCACCGCAATGGGAATCGCTCATATCGACATGATCTTTGAGGATG -GCACCTGCCCTCCCCTGCAGCTCGTCAGACGATTTATCAAAATGGCCCATGAGATGATCA -CCATCAAGAAGAAGGGCATTGCAGTTCACTGCAAGGCAGGTCTCGGCCGTACGGGCTGCT -TGATTGGTGCATATCTGATCTACAAGTACGGATTCACCGCCAATGAAGTAATTGCCTTCA -TGAGATTCATGCGACCCGGTATGGTCGTTGGACCTCAGCAGCACTGGCTCCACCTCAACC -AAGGCGCCTTCCGTGAATGGTGGTTTGAGGATAGCATGCGCGAGAAGCTAGCACAGTCCA -CACCTGTAACTCCACGAGTGTCCACAAAGAAGCGCACTAGCAACGGGGTCGTCTCCACCC -CACCAAACAATAGCCACTCCAAACGCGCAGCCCTCGGAGAAATTGATCACAACGAGGCTG -CAGCTTACCCAGACCAGGATCTTCCTGCACCAACTCCTGGCCAGCCTCGCAAGTCTCACC -GAAAGGACTCACGTCACCACCCTTATTCCAGAACTGCCTCCGGATCCCTCGCTGTTGAAA -ACGAGCAACGGGGCCACCGCAGCCACAGGAAGAGTAACGAGAGCAGTGAGAGTGAAGAAG -AGATCCAGCTTCGTCGGTTAGCGCAGCGGTCATCCAGGTCTCCTGTTGCCTCCCCCACGT -CACGGTCTATCAGTTACTCTGCCACAGTCACAGCCAGCTACACTCTCGCAGAAGACAATC -ACAAGGATCAAGAGAATTGGGTTGATCACTCAGCACCCAAGACTCCCGTGAGCCGCAAGA -GCGGCGCTGCGCCTATCTCGGTCAGCAAAGTTCGCTCCAGTCCTCGCCGGGCAACGGAAA -ATACCCGCAGCGAGTCTCGTGGTGTTCGCAAGCCCAGTGGCCGTATTGGTAGCAATACCA -TTAGCCCCGCACGGGCAATTAAGACCATCCACTAAACGTTCTGCTCTTTTCTTTTGCCCC -AAGGAACTCTGCATGTTTCCTCGGAGCGCTTCTTCCATTCTTCTCGTTCCGCTGGGACAA -AACTTCTGCATACCCCTTTTTCGTGATGCATCTGGATGCCTTTTAATGTTTGAGTTTGCA -AATCTCGGTCTGTTGTTGTTTTCTGGAGCGAATACTGGTTATTGGGCATGTTGTGCACGG -CGTTTGGTTCTGTTGTTGATTGACGAACCATCAGATCTCTTGCAATTGGGTACCGAGTCA -CTTCGTGCCACGTGTTGTGTTATCTGCAGGTTTCGCTCTTGTATGTGTCAATTCTACGTC -TATATGACAATGCTGTTCATGAACTCCCTCGATATTTGCATTTCTCCCTCAGTCTTTAGA -GTATTGTATCTCTTGAACTCAACGTCAGTAATTTTGAACCCAGTCATCTGGGGTAGATCA -TAGTTGATTTTGTGTTCCACGGCCATGCCTTCCACGCTGTACCATATAGTATCACTCGCA -CAGCAAGTTCTACACGGAGTGAGGTTGGCGCCAAATAAAAGCCTGAGTATTCCATATCTC -TCCCATGTTGACTATTTTATGATCCAAGAGCGAGACCTCAATCCGCCCCCACTCAACTCA -ATTGGCAGCCAAAAACTATATGCACCGTTAGCGATCGTTCCATTTCATACCGAGGGGGTC -AAATCTCACCTCTTGAGCCTCTTCGTCCTCAACAGCAGAGCTATCCACCTCACTCTTCCC -GATAAGCCCAAGCTTCCCCTTCCTCACGAACCACCATAGCTCCCTCCGATCCCCACCTAG -CTCCTCCCGCAATATCTTACCCTTCAACTGACGCACAAAGTCATTGTACACCACCGGTTC -TTCATAATCCACTAGCTCTTCGCGCATAGTACCAAGGCCTTCGATAACGCGGTCGTAGTT -CGAGTGACCGAGACTGTGTGTTACCTGGGTCTCAATAATTTTAGCCATTTGCTGCACAGC -GTCGTGGATCGCGTCAATGTTTTCTGCGCGGGAAAGTGTTTGCTTGAACTCCGGGATTGC -GTTCTCGGTGGAAATTTTCGTTCGCTTGGGTTCAAGGCTAAGGAGGGCGTCGACGTCCAG -ACCTGACAGGGGTTTCTCTGTATCGCGCTGGCGTTTACGACCTTTTGTCTTTGGTGGAAC -TATTAAACGGAATCCGTTGGTCAAGAGTGTTTTCCGAATGTAGTTTGAGAATCGGTCATA -CCTTTCTTGACATCAGCCGTAGACATCAATCTCTCAAGATGGGACTGGGAGTTCTTGACT -ATCTCTTCTGAGGGGTGTGCGAATTCAGTGAGCCGCTCTGAGGGGTCAAGGACAGGGTCG -CTGGAATGCACTGCACGGTAGCGAACGGCCGACTCGATGCGGTGCAGTAATGGGGAGAAT -GACTCGTCAATTGGGAGATCATTAGTTGGTTCGCTAACTCTTTTGATTAGCTGGTGGGCA -TGATGATCAAGTGAACCCTAATTACCCTTCATCATTCGTAGTGATAAAATCCATGCTGTC -CACGTAGTTACTCATCGCATCCTTCAAATCGGCACTTGGAAGGTTGCGGTGTTCCGTTAC -CACTTTGCCAGAGACAGTGATGATCTTGTCTAAAGGTGGGAACCGATATGACCGTACGTC -TTCTGCAAATGGAAGCTGTACTTCTATCAAACACTCATAATCTGCCTCGACGGATGGGGC -TAGTAACACGAGCATCGGTGGTTTGGATTCTTTAGTCACCAATCGGGCAACCGCATACGA -GTCTAATTCATAGAGTGCATGAATGAGGGAAGACAACGCAAGAGAAGCATTGTCATTTGC -ACGCTGAGGAATGATGATATTTGTGTTGGACATGTGCATGTATCGGTCATACTAATGACA -AGAATTAGCTGTGATAACAATTGAGATTCGCATAGAGACATACCTGATCCTTCTTAATGA -ACCCGATTAGCCCCATGCCGGCAAAAGTTTCCAGATTGGCGACATTCTCATCAGTCTGCT -CGATAGGAACTGCTGTACGTCCATACTCGTACCCCTTGGCGAGGTCATCGCGTTCAACGT -CAACCTTACCCCCTGGTGCAGACTCGTCTGTAATCTGATAGGTCCGTGACGTTCGCACCG -AGGCGAGAGCGTCGCCTTCAACATTGCCTTCACCCGGTATCTGACTTCCTGGTTCGGTGC -CGGGGCGTGGCACGAAAGAGCTCGCTGAGATCGGCTTGGCGACGTAGGTTCGGAAGTATC -GCTCCACGGGTATAGTGATCGCCGTGTCATACTCTTCGGGATTTCCGAGCATGAGATTTC -CCTTGAAAGATGGCATGCTCTTAGTCACTTTGATGCGGGGAATATCTAATTCCGAGACAG -CTTGCTCCAGTGTTCCATAAACACCTTCGCTCTCCTCAGTCAGATTGCGGAGAAGAGTTT -CATTTTCAGCCTGCAAAGAACTTGGTTAACAATCAATATGCCAATATATTGAAGACGACT -TCGTACCTTTCGACTGTCTTTATCTTCTTCCTTTACACCATACTCGGCGTCATCAAAATC -GGCACCACTGGATGATATTAGCATGATGTAAAGTCACATCAGTAAGAAAACATGCATTAT -CACCAATTCGATGTTGATTTCTTTCATCTTTTCGATAATGCCTTCGATATTATCATCGCT -CATCGTACCAGTTCCATTAGTCACTAAAATGATCTTGCGCTTGTATTTGTTTGTCTTGGT -GTAATCGAGGATCATCTGGATCGCGATGACAACAGAAGAGATGGCTTTTATACAGTCAGA -ATAGCAGTTTAGCGATTCTTCAACAGCCACTTACCATCGCCTTTATTAGTTGTGCTGGGT -TTGATTTTTTCGCGTAGTCTCCGGATATCAGGCATGAGGACTCTGAAAGACAATTTAGTC -AAGTCATCCAACATGGCATAGTCAATTCAGCACGCCACACATACTGGCCAAGGCCGAAGA -GAACAGAAATATTAGAAAAGTTCTCTTCATCCAAGTTATTGATGGTTTCTGTTCTAAGTA -TGAGACAGGGCTCCCAATAGGTGTTGACAGATAGGCTACCACTTACCATCTGTCCTGAGA -CCAACCACGCCAACCGTAGCCGTCTTGCGACCGGTAGCAACCTGGGATTGTTAGTTATCT -GGCGTTGAGCGAGCTCAAGTCACATACCGTGGCGGTGATCCTATCCCAGACATACTGCAT -GGCCCATTCGAGATCTGTCATAGGACGGCCATGGTGGCGCTCGCCCATAGACCGTCCCAT -GTCTACAATGTAAACTGTAGCCTCTTTCTCCGCCATGATGATGGTGCTGTTGGGTAAATA -ACAAGTCAAAATTGCTTGAGGCTCAAGACAACAGGCAGACAGGCAGACGCGTGATATTTC -CACTTGCGGTTTAGACCGAAACGCGAAGGCACATGACATCACACGCCGTATTCGTTGGTG -CTCGGTGGTCTGTGCGTCGCTATGAGACAGCGCCGCCTATTTGGAAGTGGAAACAAGGCT -TTCCTTAATAGCCGGGCCGTAGCGATAGCTGCCCACTTCTTCCCCTCGTTGCAACCTCAC -ATCAACTCTATCGTCAACTCAACTCTCCCGGTATTATAACATATACGAAATCATGGATCC -TCGCCCCTACCACATCCTGAGGTTTGTTTATTAGCGAAAATTACGACTCATATGCTAATA -AATGCGGCAGCTACGGTACCCTCCTAGGGACACAGTTCTACCAGGTGAGATCGATATCTG -AACTGGGCATACATCAAATTGTCCTCTCCAAAACATTTTCTGACATACGTCAAGTCTTTT -GTTGGAGGCTTCGTTGCTTTCCGCGCCCTTCCGCGGCCTCAGTTCTCTAATCTGCAGACC -GCCATCTTCCCCATCTACTTTTCACTGCAAACTGCGCTGCCTGTAGTCGTCGCCTTGACC -GCTTCCCACGGCGGACAGGTGTTGGGTCTATCTGGTCTCGCTGCACCGAAGAACCGCATT -GACACCTTCTTGCCTCTGGCAACTGTGGCAGTGACTGGGTTGGTGAACATGTTTGTATTG -CTTCCCATGACAAGAAATGTGATGCGCGAGCGCAAGCATCAAGGTGCGTTAGCAGCTGTC -ACTATGTTCTGATAAAGAATCAATTTATCAACCTAACCTCTGCAGAAACTCGTGATGGAA -AGAAGAGCTACGACCCCGCGCCCCACTCCAAGGAAATGCTGGCCCTGAACAAGAAATTCG -GCCGCGTGCATGGCATCTCAAGCTTGGTCAATTTGGCCAGCCTAATTGCTACGGTCTGGT -ACGGTGTCGTTCTAAGTAAGAGACTTGAGTAATGTAGCAACGGCAATCAACAGCAATACT -CAACAGGAGGTTATATGCATATAGCTCTTGTACATATTTCGTTTATTTTCCTCTGTCTAT -TGTATTTCTGGTCATACACAAAAAGTCACTCTGTAGGGTTGAAAAATTTTGCCTTATCAT -CTTTCAAGCTGTAAAGCTGAGACGAAGGACAAATAATTTTAAAAAGGGCATCTAAAAGCA -TATCTGAGCAGACATCGAAAAATTCTATCAAAAAACACTACCAGAAACGACATAATAATT -CCACATAAGCGGCCTAACATCTTTGTACTGTATGAACGAGAATACACAATTATCTCACCT -TCTATGGTCACGGTGATCTACCTCATGATGTGAACATTGCCCGAAGAGATGTATCTTCTG -AAGGGCTAGTAATTATATTGCCTGGTATCATGTCATCGCCACAGAGCCAGAGCTCAGTAA -TACGTCAAGACACGGCCCACGAATAACAAACTGGCACCGTCTGTTCTATATCTGTAGCAT -CTAGCCTTTATGCGCTCTTTCCCTCTTCTTTCCTTTCTCCCTTAGACTGTATATCACCTA -CTTCAGCAAGACGCTCGGCACCTTGATAGAATTCTGGCAGAGGCTCCAGTCTTTGACGAA -GAATCCAGACACGGAAATGCCACAAGGCATTCTTGATCTGGTATGAGGCATGCTTGCGAG -TCGAGACAGAGAAGACCTGGTCCAGTTCTTCCAGAGTAAGAGCTGAAGACGAAAATATTA -GCACGGTGAATCTCAGATAAATCGGGATGCTTACACTTTGTTTCAGGCACAAACAATAAG -ACCAAAAACCAGCCGATTACACACCAAGCAGCATACCAACCAAAGGCACCCTGCGGTTTG -AAAGAATCGACCAGATGCGGCCAAGTAAAAGATAAAATGAAGTTAAAGCACCAGGTAGTT -GCTGTCGCCCATGACATACCGACGTCGCGGACTTGCAGCGGGAAACACTCGGCAGAATAT -GCGAATGGGACGGGCCCTTCTCCAGGAGAGTAGAATATTTCAAACAGGTACATTCCCGTC -GTGACCATTGCAATCCTCTTCTTGCTTGTTTCGTTCCCTTCTTCGATCCAAAAGGAAAAG -CCAGTCCACAAGAGGGTGATAGCGAGGAAGGGAAAAGTGAATAGTAGCAAATTCCGGCGA -CCCCAGGTATCAATCGTGAAGAAAGCTGGTAGAGCAAAGACCCAGTTGAGAATGCCGGTT -CCCATGGATGCTAGTAGCGCACTAGTCAGACTGTAGCTTGCTTCGGTGAAGACGGAGGTA -GAGTAGTACGCAATGACGTTGACTCCACAGAATTGCTGCATGAACATCACAATCCAGCTC -GCGAGTGTAGCACGGCGGTTTCGTGGGATGGTAAAGAGCTCAATGAACATTGTGAAGAAA -TTCTTGCCCTTGTTAATCTTTCGTTCAAGCTCAACGCCCACGTGTGCATAATACAAATCT -CTGGCGGCCTGTATGTCTGTTGGACGGAGGATACGGAAATTAGTATAGGCTTTTTGGATT -TTGCCGTGCTGGATCAGCCATCTTGGAGATTCCGGGCAGAAGTAGACTTGAGCGCAGACA -ATCAGTGGCAGTACGACAGTGGATCCTAGCATGAGACGCCAACTGATGTCTCGGTCTAAA -CTCATGAAAGCAACACCCATTATGTTGCCAAGCATGATCCCGAAGGCCGTCCACATTTGC -CACATCATTACAAGCGCACCTCGAATTGGGGCAGGAGAGCATTCTATTGAATCAGCAACA -ATGAAAAGGAAGTCATGCAACAACACTTACCCGCGGCGTACACAGGCACAGTACTCGATT -TCGACCCGATGCCCAGACCAAGAACAAAGCGAGCAATGAAAAGATTGACCCAGGTATTTG -CAACACCTTCCCATACTGAGGCAACAGCTGCAATCACGCATGAGATAAAGATTGTACCAC -GCCGAGCAAATAACCGATTTAGGGGCTCAGTAAGCCAGCAACCGATAATTGCGCACGCCA -AGTATGGGGCCCCGACGACTAGGCCTTGAATGTTTTTGGAAGTGATGTTAAGTTGCTAGA -ACAAAAGTCAGTTGATGCTAGCATGATCAATGAGAGGAGACGTACGTCTTCAGCAAAGTA -GATAGCCTGAGCACCATTATTGACTGTTTCATCCATACCTTGTACAGCTGCAGCCAGAGA -GCACATTGCTGGAGGAGGGAAAGGAAGTTAGCTCAGGCCAATGGAGCACCTGTAGTCTCA -CACATACATGCTAGGAAGTATAATTGCCATGGCTGTGACCACTTGTGGGTATGTTCACGC -TCGAGGGCTTCCCTCTCTTCCTCCGATAACTCGGGGAGACTGGTTGCCATTGCGGGATTT -TGCGAAATGAGAGCACCCTTTTTGAAAGTCTCGATATGATCCATCAAATCGTACTTGGTG -CAAAATTCGGCAACATCTTTAAAGAGTTGTTCGCGGGAAATTCCGGCAAGAGGGTTTTCG -AAGACGGCATTAAGATTGCCAGGAGAGCGGCTCTCGAGGTCTGTCTTCTCTTCCTTGTGA -ATGTATATAGGATCTTGGCTCTCCTCTATATGCTCTACCTTCTCTGCAGTGGAGCCGGTC -ATTCCCACTCCATTGTCCTTCATATTGAATGAGCAAAGGATGACAAGTGATGATCGAATA -TCTCATGAAACGCTAGTGATGTACCGGACAATTATTGATCCCCTGGGTGTAGTTAAGTCA -GCTGAAAGACTTTTCTTATATGAAGGATGATAGGCAGGAGGTAGAGTGACCTGATATCAA -ATTTGCGGTTTGTTACAGCCTATCAAGGCAAATCTAGAGCAATGGCGAGAATAATTTCAC -GCTTTTAGGGCGAGGAGACTGAAGGGGCACTGTGTGCTTTATAGAACAAGCATCCATTCG -CCCCCAATGTTGAAATACACTAAAGCCTCGAGCTTGCATTTAGTTCGGGTGGTCTTCCAC -ACGAAATCAGCCGCTAGGAACCATAGAGTACCATTCATGTAACATAATTTATATTAGATA -GCATGGTCTGTATCAAATTATTGGATACTGGACCCTTGGAGCCTGAGCTAGAGGATCGCT -TAATGAACAAAAAAGCAGGGCAGAGCTTGCGGGCACAAAGAAACATCCCTGCCATTGGAG -ATTGCCCACACACTGAGGTTGCCCATAGGGAGTCGAGGTGAATCATATACGGGGTATCAA -CTATGTCATTGGATAATAGGAGCGAAGTCACGACCAATGAGTAAGAAGTATATCGTACAA -TCCGATAGCAACTGAAGCAACATGGAAGGGATGATCATGTGGATTACGTCATTGCCCAGG -AGACACCCGTGGCTTTGTGTTCCTACAAAAAGGCGGGGAAGAATGGGACGAGTCAGGTGC -ATTTACGTAAGTCATGTTCGACCAGGCGGAAAGCACAGGATGCCATTCCTCTAGTATCAT -TCTCTGATCAGATCTTCTTCGTAGCTTCCTTTGTACTTTTCAAGTACGGAGTAGATGTCG -GAGCAAAATAATAAAACCCCCAATCTTATTTCAAAGGCCCTCAAGCAAACCCAGGATCTC -CTAGTGATCGCATAAATAAACACGAGTCAGAGATAAACCCTGAAAAACAAGGGATTCAAC -CCCTGGCCACAACTATGTAGTTCACTACTCAGCTACGCTCCTTCGCACCAGTGGCACCCG -AACCAAGTCTTCCAGTGTCCCCCTCAAGCCATAAACCCACGCCTGAATAGCCCGCACCTA -CAGCTTATTGCTATACAACTCCTTAGGAAGATAAGGCTTGAAGAAAAGCTTCAAGACAAC -GCTTTGGAGAATGTCTCAAGTCTACTCAAAAGATTGAGGAGGCTCTTTATAATGCTCTGA -CCCAGCCGCGAAGGAGGCTTTCGGGAAGGATTCAGTCGGACTCGCTGAGTGACATATATC -CGTCCATCTCTACTATATTCCCGAGCACAAAAACAATTGAAGAACGATACAAACCTTACA -TCTTCATGTCACTTCCAAGGTTTCGCCAAACTCCCCGCTCTTCATCTAGACCGGTATAAT -CAAAGTATCCGCGGGAGAGGCAAAAATGAATTTTGACCATGTAAGCCAGATAAAATACCT -AAATAGCTACTCCGTATACGGCAGTATCACTATGGCAGAAATTACTGTGGCATGATTATG -GATGTACCCAATTAGGAAGGCGGTTCTGGTCCAGGGGGTTCGCGGCTCACACTCCGCTTT -ACAATATATGCACATAACCAATAACTTTCACAACAGACTGTATAATAAATGTGTCACGGA -TCACAATTTCTCAACCCCCGAAAATATTACATGTATTGAGACTTGGGCCGCGACTATCAC -TACGCAGTACGGTGAGTGGAATGAGTTCGCTTCTTCCTAGTGGTGACCAAGCTATCGGGA -CGGTGTCAGTGAGCAGTGAATTGAATAATTTTCATGGATACGAGGGCAAAAGTACGGGAG -TACAAAAACTTACGTTGTCCCGCCCCGCTCACTGGCACGGACCTGGCTTTTTTTCCATTG -ACTTTGCCCTCGACTCTCAGGCATTTACATGGTATACGTTCTATCCGGAAAACGGAGTTT -AGGGAGTCAAGTTCATACATCAGTACAGATGTACATAGCAAGTAGTAGATATCATTCAAA -TCATGGGTGGCTGACACAAATTACACCCGTGGACCTCGACCCGTTACACTGCACCTCTGA -TTGGTTGAAAAAAGCCCCTTTTTAGCCCTGCGGATCATCACACAACAGTGAGCTAATCAT -CTTAGAGCGATCAATACGCTAGCTGTTGTCGAACCAGTAATCTTTTTCGAAAAAAACCCC -CCCCCCATTGACAGTTGCACGGGATATGCAATCTAACAACGAAGGGAAAAGTACATACGG -GCAGTCTTCTTGATACAAAGGTGCACTTTTCCGGGCTTATGGTACAAAAGACCGTTGAAT -ACAACCTTGGGACGTAATAGCGCTGGACCCGACAACTCGGGACGCTAACAACTGCCTGAA -GACAGAGGGGTCATAAGGGGGGTCATAAGACTGCGGTGTGCTTGCAGTTTTCACTTTTCA -CTTTCTTCCTGATCCCTTCCATCTGTTTACAAGCCTGTGTCTTGTACATCCGAAATATCG -AAGATGCATCCAGAAACACACCGCGCCTCAACCCCGGTCGAGCCCGACGTTAGTATCCCG -TGGAAttttttttcttaatttttttttcctatttttttCCCGCGCATTCTCTTTGCTTAT -CTAACTAATGTCATCTAGTCTGACACCGATGGGGCCTCATTATACTCTGACTCGTAAGAC -CGGTGACTTGAGATGATTTGTATTTTCTGCTGATCGTCACAGACTTGACAATGCCAGTTA -CACCACAAGTGTGACATCGAGTGTTTTGGACTATCAGTACGTTAATGTGGCTGGAATATG -TGTTTAATGGCTGCTAATAATCGCAGATATGAAGTGTGTTTATCGCTATTTGACCACGAA -TTTAACTGATAAGGTGCAGAACGGTCGCAGGTATCATTCATACCATGAAGGAGAATACGT -TCTAGTACGTGGCCCTATCAACGTTTAGTCTCCGGCTCAGACTAATGTTCAAACAGCCAA -ATGACGAACGGGAACAAGATCGCCTTGATCTGGTTAGTTTTCACTTGTTGATTCTAATGA -TACAAGATTTTAACGAGCAACAGAGCCATCACATTTACCTAATGATCCTCAAAGGAGAAT -TGTGCCAAGCACCAGTCCAAAATCCAAAGCGAGTATTAGATCTCGGGACAGGAACAGGTT -TATGGGCAATCGAGTATGCAGAGTAAGTTCTCTGCCAATATGTCTGTCTATTCTACCCCT -AACAAGCAATATAGTACAAACCCAAACGCACAAGTCATCGGTAGGGGCACATGTCTATAA -TCCGATCAAAACGTTGACTAATGGAAACAGGAATTGACCTCAGTGCTATTCAGCCATCAT -GGTACGTGCTTATGCTACTACAGTGGCTCCATATATAATATTGACAGAAGTCCAGGGTGC -CTCCAAATTGCCGCTTCGAGATTGACGACTTTGAACAACCATGGTCATATAGCAAACCAT -TCGACTATATCCATGGGCGCGAATTGGAAGGTTCCATCCGGGACCACGATGTCCTCTTCA -AACAGGCACTCGACAACCTGAATCCCAATGGATGGTTTGAAATGGCGAGTTTCGACGTCA -ACACGTATTCGGACGACGGAACTCATCTCGGCGCAACGAACTTTATCTTATCCATTAAGC -ACATGCATGAATCCTCGCGCATGTTTGGTAAGGACATGGCTTCATCCGTGTCCTGGAAAG -AACGGATGGAGAAGGCCGGGTTTGTGAATGTCAGAGAGGATATCCACAAGGTATGTTCCG -CAATGCCTTTTTGATGAAAATGAAACCCCATGCTTATCGGTAAATAGCTGCCTCAAAGCC -CATGGCCCAAAGACCCCAAGCTCAAGGAGCTTGGTCGGTACCATCAACTCAACATGCTCG -AAGCCATGCCTATTTACACATATGCGTTGTTTTCAAGAGTGCTTGGGTGGACACGCGCCG -AGATTGAGGGACTGCTGGCGGGAATCCGGATGGAGCTGCGGGATACATCGTATCACTTGT -ACACGAAGTTGCGGGTGGTTTACGGCCAGAAACCCGACCGACCTGTGAATGCCTAACCGA -TGTGAATAGAAATTTCATATTCCGAGTTACATATTTCACGGATGATTCTAAATATAGCCT -TGGGCATGTGTCACGGAACTCGGGAGACCCTGGGGCCATAGTTAATCTTTGCTGTTAATG -CTTCGATATTTGGGCGAAGGCTCTGCACCTATACCAACCTTCAGCTTAGTGAGGGTAATC -AGACTCTTTTTCACTTTTGAGGTGTCCATATCCTAGCTAGAGGCTGGGGAAAAGCAATAT -TCATATTCTTTTTGCCAGAGCAGCTCTAAAATGTGCTTTCACCTCCGTACGTCGTCCAGG -TATGCAACCGTTGCACTTTAACATTCCTTGTGGATGTGGGCATGCGTCCAAGCCGAGCGA -TGGAAAAAATCGTGTTCGTTTTCTGCCTCAGTCACTCCCTTCTGTCTACTTTTTTCCGTT -CTCTTCCCATTCTTTTACTTGCTGACTTGTCGAGATCAAATGCCGATAGATAAGTGATAC -ATGTATCAAAAGGAGCGCGGTGTAATTTAATATATTAGGATATTATAAATTATTGAGGAA -ATTAGGAGAGAAAAAGGCTATCTGTATGTCTTGAAGACAGCCATATCCTCAATATTCCAA -GAAGACGCCAACAAAATGCGACCCTTGAAAAAAGAGCAACGAACGCCATCTTGCAAAATA -GTCCCTGCATGTGCTGCACAAAGCAAACGTCATTGTTTTGGAAGTGTTCATCGGTATCGT -GAGGACATAAGAAGCTTTGGAAAGCAGTCAGGGTCGGGTTGATAGGAGGAAGATCGCATA -TCTTGCATGGCCATGATCGTTGGAAGTATTAAGAATGGCATTCTCTAGGTAGAAAAAAAA -GTTGAGCTCTTTTGCCGTCTTGACGAAAAGAAATGCCGATTCCTGTCTTCAAAGCATTAT -TAGATACCAAGTTTGATCATGATGTGGTAGGCTGCAACACACTCACCATTGTCAAGCTAT -CTCTGCCCAGAATAACGCCCCGGTGGGAAAGTGTACTCGTCATTTAAGTCCTCTTTGGCC -GCCATGAGAGGCCCCCAGTCTTTTTTTATGTCCTTCCATTGTTTCTTCAAGCGCTGGAGA -GGTGAAGGATGTGACTTCTCCTGGGTGGCGTTGAGATCCTCGACGAAGGAAGACTGTGAC -GTGCTCGTTTTAGGGGTAGGTCCCTGTGAATCAATATCTTCAGGCATGATGTGATGAGAA -TTTTGGAtatatttatttttattccgatttgtatttgtaCGGTGATTTCACAGACTTTAT -ATCTATATTTCTTGAGATAAGGATGACATGTTGGCAGGGGTTTTATTTAAATCTGTCAAC -CGAGTCTCTCCCGACTGATTACCGAGATATCACCGCCGAGGTATCTCCGCAAACCTACCC -TAAGGCATTTGAGTTTTCTCTATTGAGTTGCGGCACTTCAAATCTTCATTTTCTTCGAGC -ATTACCGCATTGTTCGAAATGGTGGCTAGTATTGGATATTAGCAAGTTCTCTTGTTCAAG -CCAGATGACTGTCTCGGTTGACGAGCAGCTATTGAACCACACATATCATGCTATACTTCT -AGAGATGAACTACAGTCCATGCCCAACTAGAGAATGCGGGTAGGTATAGAGGAAGGAGAT -ATCGTGAGAGAGAGAGGGAGAGGAAACCAACCTTGAAGGGGAAGAGATGTGAGCTATGGA -TCCGCCCTGGTACGGTGGGTCCTCTCGCCAATAGATGCGGACACCTCTATGTATGTGCAA -CACAACACATAGGCTAGCTCATTTGACGCATTTCAGCAACATCTTTACTGAATAGCAGAT -TGATAATAACTCGAAATGGCAGCCGTGACTCCTAGGTGATCATATTTCCAGAGGACACGG -TTTGATCATGAGCGGTAGAAGTCTAGAGTCATCCTTGGAAAAGCTCTGACCTGTATCGCA -GCAATTTCTCACTATGAAACATGCTAGAGAGCATTGCAGGGAAGCTAGGGAAGATATAAG -TTGCTAAACGACACGAGTTTATCCCTTTTTGCCCTCTGTTTCTCTCTGTCTTGAAGGCAG -TTTCCATAATTTAGGCTCAACTACTTCACCTTCTTGACAGAATGAATGTTCTCCTCGAAG -CTAGGTCCGAAGCTGTATGATCACTATCTGTCGCGATGAGGCTCTTCAATGTTAGAATAT -TTTTAGCCAATGAGAGGCGATACCGCCTATCCCCCACCGGCCATGCCGCCAACCGGAGCC -CTCTAAGTAGCCCGCGGAGGGAGGAACCGCGGACTTTGCCGCGGTTCAAGTCAACATCAG -ACCATATAGTCGAAAAAGCTGTCCTTGATTCTAAAGTATTTAAAGATCTCAACAGTAGCA -GGGAAGGGATAAATAAGGAAGTCAAATTTCACTCAGGATCAATAGAGCCACTCGAAAGTA -TGAAAAGCGAGTCTGTAGTGATCATCGGGTAAGTGGGAGCTAAAACACGCGACGACTGTT -TCGGAGTCTTGGTGCTGATCGTTATCAGGGCCGGCGTGATAGGGCTAAATGTAGCCCTCG -TTCTCGCAGAGAAGGGCTATGGCCGACACACGACCATCGTGGCAGAGCATCTTCCAGGAG -ACACTTCGATAAACTACACTTCACCATGGTCCGTTAGAATTCCTGTCGCAATTTCACATT -CACGAAGTATTAACCGGAGTTCAGGGCTGGTGCCAATTTCTCGGCCATTTCTGCAAGTGA -CCCAAACGCATTGCGATGGGACAGGCTTGGATATATGTATCTTCTCGATCTTGCAGCCAA -GGATGGGAAGAATGCGTTTGTGAAGGAAACACCTTCGGTGGAGTATTGGGATGAGCTGCC -ATCCCGCGAGAAAATCGAATCCATGGCCGCATATCTTAAAGATGTGAGTTATGGATGATG -TAATTGTTTTACTTTTTTTCCAGATGTGCTTATCCATGTTGCCATTGAACAGTTCAAGGA -GATTGCCACACAAGACCTGCCAACGGGAGTCGCATTCGGCATAGAGTTCACAACAATAAC -ACTCAATGCACCAATGCACCTTCGGTATCTGTTCCAGAAGGTCACTCAAGAATATGGGGT -GCGGGTAATCCGCAAAAAGCTACCCCAATTGTCGTCTGGTTATCTTAGCGAGGATACTAA -GGTGGTATTCAACTGCACCGGAAATGCGGCAAAAGAGTTACCGGGAGTCCAAGATTCAAA -GTGCTTTCCCACCAGAGGTCAGATTCTACTTGCAAGAGCATCTCATGTTCAACAAAACAT -CATGAGACATGGTAAGGACTATGAAACATATGTGATTCCCCGACCATACTCGAATGGAAA -CGTGATCCTTGGTGGTTTCATGCAAAAGAATGTTGGGTAAGTGGGATTCATTCAGCCTTT -GCGAGAAAACCATTGGCCCATTGTTTTTTTCTCTTGGGTGTTCCACATGACAGTAACTAA -ATTATATCGACAGTACACCGGATACATTTGGAGAAGAGACAAACTCAATTCTGTCTCGAA -CAACGGCTTTGCTTCCTGCTCTAAACTCGAGCGAGACTGAGATTCTTGGTGCATTTGCCG -GGCTACGCCCATCTCGTGAGGGTGGTGCTCGTGTTGCCCGGGAAAATGTCCGAGTAGGCG -CTGCAGGACGTCACGGAGTTGTTGTCCATAACTATGGCGCCAGTGGAACAGGCTACCAGG -CGGGATATGGAATGGCGATTGAGGCAGTCAACACTGTGACCGAGGAGATTGATGCACTCG -ACATTCAGTCCCGTCTTTAGATCTTGTTTTCAATGTCCAGTATGGGTGGATTTTTTAGGT -ATAATTTATAGATTTCATTGCCCAGGCAGAAGGACTATTTTGACATAAGCCAACAAGTTT -GCAAATTTGTTCATGTGGTATCGTTTCAAGTTTCATAATTTCATAAGAATTCTATCGCAA -CTTATCCAACCTGGGAAGGTCATGTGGTGATACGATCACCGTAGCTCTGCCGTGCTATTC -ATCCCACTCGCTTCATAGCTTGGAGGAGGAGTTTCATCTATAGGGGAGTAGATGAGATTA -TCGTCATGATCACCATCATACTGCGTCACGATAATTTTCGGTAAAGGTCGCTCATCTTTC -ATGGGGGATAAACCTGGAAGCTCGAACACAGATATGACACTCGTCTCAGATGAGAGTTCT -GGGATCGCATGAGTGACCATTGGGACCTCTGATGGAGAGAAGAATGATTCGTCCTTGCTT -GAAGTGCTCGATAGGGAGTCAAGGTGGCCAAGAACCGTCTCGAGCTTGTCGTGAGAAAGA -AGCATGCGGGATTCCTTTTTCAGAGCAAGTTCAAAGTCACGACGCTTCCAACTTCTGTGA -GCTTTGGGCCTTGTTTGTCTAAACTGCTTCACAAATGCAGCTAAATCATTGACGGTACTT -TCGGCCTCGGTTATGGTTGAGTCGATCCACTCAAGCTCGATATCACCCAAGGCTGTCCTT -TTGTCATCCCGCAAATTTCTTGCCTGCCCCACCGTGGAATTGACGAGTTTAGTTGCTTCA -TACAAGCCTTCAATCTGCTTATCTGGAGATAAGGAAGAAACAGTGATAGAGACTGATTTG -ATTGATTTGACTTTTGTGGATGGTGACGTGGTATCGGGGGCTGGAGGTGGGCTTATTGGT -TGCGAGTTGAGTGTTCTTGAATAAAATCGTGATTGTGAAAACATTTGTTCTTGGGTGTTC -CCATCGCCTGCTGTGACATGCTGTTTTGTATCTTCAATACGGGCGGCGCGGTATAACTGA -TCTGCTGATTGATTCCATTTTGACACCGATCGATATAGCATCGATTTAGCTGACCGCTGA -CACCGATCTTTCGAATCACTCAATGTATCTGCTGATTTGTGCCATAATTCCATTGTCGAT -TGATGATAACGATATTTTGAATCATCCCACATGGCTGTTGAACGACCGACGAGGTTCCTT -GTCGATCGCTGTAATTTCCCTGTCAACTGGCTCTTTGGCCGATGTGTGAGGTCGGCTGGT -GCCATTCTCAGACAAATGCGTTTGTATAAAAGAAGAAATGAATTTGGATGTCTCAAATAA -AGTGAAATCTGTAAGGCAGTTCTGAAACTGAACGAGAAACGTGGAAGAAATCCAAGGTAT -ATAAATATCCAATATCAACCCAGCCTTACTATATAGGCTACACAAAATCCCGGAAATTAA -AGATGGCTTTCTCCCAATCAATCAAATTCAAACATGTTGGTTGCCAGCAGATCATGGCGA -TGCTGGAAGGCTGTGTGCTAGTAGGAAACGTATTTAGCACTGGTGTGTAGATCCACGTTC -ATCATTGGTCAACTTTCTGGGCATCGCTAAATTTGGCTGTGGATGACGGGGGCCAAACTG -CCTGGGGGCTTGTGCTGCCCAATTGGCGTGGGCTTAAAAGTTTGAATTCAGAGTATGTAC -GGGGTATATTGTGGTCGATACGAGGTTGTCCTCCGACCATGGTAGCCCCTTAGAATCCAT -CCTCGTTGTTCATATTGATTACAGAGCAAAAGTCCTCCTTACAATATAGGGGAGAAATAC -ATTTCCTATTGAGGTAGGGGGTACTGTAGCACGTGATATCATGTGGCATCACGTGCTCTC -TTGGCAATTGATGTCATGTACGCCGTAACTCCGTAGCATTTCCCGTAATCTCTGCCATAT -TTCCCCGGATTATCTACAATGGTTGTAATTGATGGAAATTCTTCAGATGTGAGACCCTTA -AATTTGTCCCAAATAAGGTCAATCCCGGCGGGATAACCCCGGAGCATCATATAGTTTTGC -AAATCAACTCTAAATTTGGACAAAATATAGTTATAAAAGCAGCATAATGCAAGCAGCATG -ATGCTGCCCATCAAGATATAATTCCGTTTAGCTCTAGCATATCAGACATGAGAATTCCCC -AGGTATTTGCGTTTGCTGCAGCTGTGACTGCAGTGACGATTCCGCGCTCCAAACATTCTT -CTCCGGCCCCGTTCACCAATGTGACCATTTTCACACCACCTTCCAACTACATTGTTCCAC -GGACGCTCTACCCGCGTAACGAGCAGCTTCCCAACGGGGATCTTCTTGCAACATGGGAGA -ATTATTCCCCAGAGCCACCGGCTGTCTACTTTCCTATCTACCGCTCCAAGGATTACGGAA -AGACATGGAAGGAGATTTCAAAGGTGCATGACACTGCGAATGGGTATGGCCTGCGCTACC -AGCCGTTCCTCTACTCCCTACCTGAGCGCGTCGGCTCATTCAAAAAAGGCACTCTACTTC -TGGCCGGTAGTAGCATTCCGACTGACTTGTCGTCCACAAACATTGATCTTTACGCGTCGC -AGGACGATGGCGTGACTTGGAAATTTGCTAGTCATATTGCTTCTGGGGGAGAGGCAGTTC -CTAACAATGGCCTGACTCCTGTGTGGGAGCCATTCCTACTTGCCCAGTATGTTCTTCTGT -TGTGCCTACAAATATCTTACTAAACCCATTTACAGCAAGGGCAAGCTGATTTGTTATTAC -GCTGATCAGCGGGACAATACAACTCACGGACAGACAATAAGTCACCAGGTCTCAACTGAT -CTGAAGAACTGGGGGCCTGTGGTAGAGGATGTCGCATATCCGACATATACCGATCGACCC -GGCATGCCAGTTGTAACTAAGGTGAGCTAAACTAAATATAACAATTCTTTACCGATTTCC -TTATGCCATATCTAGAAAATGCTGACCTTGCTAGCTCCCTAACGGTCAATACTTCTACAT -ATATGAGTATGGCTCTTTCTTTGATACATCAAGCTACTCATTCCCTCTGTACTACCGTTT -GTCGTCCGACCCCGAAAAGATCGCCTCCGCCCCGGGCCAGCGTCTGGTTGTCTCAAGTGG -TGCTAAGCCGACATCTTCGCCGTACGCTGTGTGGACACCTTACGGTGGTAAAAATGGCAC -CATCATTGTCAGCTCCGGTACGCAGAGTAGTTTGTTCATTAACAAGGCTCTTGGCGAAGG -GGAGTGGACAGAGATCGCTTCACCCCAAGAACACGGCTACACACGGTCTCTGCGGGTACT -TTCGGGGGATGGAGGTCGTTACCTGGTTGTGCATTCTGCAGGAGTGCTACTCGGCACAAA -TAATCGGGTCTCTGCAAGTGTGATGGATTTGAAAGAGGTATTGTGAGTGAAAAGGCAATT -ATGGTGGCGGATGGCTCGTACTGTATGCTGCTCAAGTGCGGTTACTAGTTGTTTGATCGT -TTTGATTAACTTAACCCATTGTCCAATATGTACGTGCTTGGTATTTGACCGAAAATCATT -CTAGATTTGGTGAAAGACTTCAGACGTTATAAACATTATGTGTTACCAAGAATCCAGAGC -GCCCAAGTTGAAGATCAACATCGAAAACTTTGCACAGCATTAATGACCTTGAAATACTGC -AATGCCATAGTACTGTAAGCATGAATATGAAGCTTCATTATGGAACTGTAGCAGTCAATA -TAAAAAAACAGTCAACATGTCGGATCCGAGCCTTGAAGAGAGCACTGAAAAGCCCATCTA -ACGTCACACCGATACCTCTGCACATGGCAAACTTGAAATAACAAAAGGTTCTCTAGGCGC -CTGCGCGGGCTTCCTACTCAAAAAACCGATCATTATCCTTGACGATGCGTTTATAGGTCT -TGACCGAGAAACCGAGAAGATAATTCTAGAGAGCTTGTTTGCTGAACATGGTTTCATTAA -GTGTTCTCGGCAACTAGTAGTTATGGGAACAAACTCGGGTCTGTATGCGCCCTATGCCTA -TCCAAACCATTCATTAAAGTTACTGTTACATCGCATCATCTTCTCTATACCAACAACGTC -ATCGCGCTGGATGAAGATGGGAGAATCATTCAACAAGGATCGTCTAGTGATCTCACGAGT -TCTGCTGGCTATATTAAATGCTCTCCAATGGTGTCCACTCGGTCAACAACACTGCTCGAT -CTTCCGATGTTCTATTGGATGACGAAAACTTGCGGGAATTGATGTTGGGTCGTCAATATA -CTTAGTAAAGTCGACAAACTGGCGACTGGACTTTCTATCACTATTATCTTAAGAACATTG -GTTGGCCTCTTCTGTCAGTCTTTCTTACATTCTGTGTTCTGTTTGTTCTCGATCTCAACT -TTTCTTGTCGGTCTCAACAAACCTACCTGAACATCAGGTTCCACATGTTGCTAACGGTCT -GTACAGAAATATGGCTACAGTGGTGGACCCGCGCCAATGAGCGGCATCCCAATGAACATG -TTTATTATTGGCTGGGTGTTTATGCTGCACTTGGTCTCTTTTCACTTATAACTACCTTTG -CTAGCTCTTGGTATGATTCTAACAATGTTTTTGGTCAATCGATCCTCTTGACTAACTTTT -TATAGGATTCTCATCATGATAATGCAACCCAGAACGGTCTATCGGTTCCACCAAATTATC -TTGCGTACTACAATAGGGTAAGAGAGAACAACTACTGTTGATTGCTACTGGTTTTGACCT -TCGACTCAGAGCGACAACGTCCTTTTTGACTTCTATGGATATTGGAACTACTACGAATAG -GTCCGTAGACCATTCTCTCGTGAGAGTCTGCTGATTTTATTGATCTGTTTTACTGTTTAC -AAATTTAGTCAAGATCTCGGACTCATCGACGAAGTGCTTCCTTCATCGTTAAAGCTAACA -GTCAGTGCTGCGCTCAATTGTATTGTAAAATGATTTTTGATCTTTGTTGGCTCTTCTTAT -GTTACAACAGCAGTGGTCCCAATCTGCATCCTGTTGGTATACTACGTGGTAAAGTTCTAC -GTCCAGACCTCTCGTTAGATGCGCCTTCTAGGCATGGAAGCCAGGGCACCTTTATCCTCG -CAATTCCTAGAAGCCCTCGGCGGTCTCTCCAGCATTCCAACCTATATCTGGACAGAGGAT -TACCAGTGTCCTTCCTATTTGCTTTATTGCCTTCAGCGCTGGATTGGACTTGTGCTAGAT -CTGATAGTGACATACATTGCTGTCATCGTCATCGCTGTTGCAATCTCGATAAAGGGTAGC -CCTTCCGTGAACTTACGTGGTATCGCGTTGTTCAATATTGTCCACTTCAGTGGCACCCTC -TAAATTCTCGTGACATAATGGATTGGATCGGGAACCTCCACTGGGGCTGTTTCCAGAACT -CGCTCATATGTACAACAAGCCACTATCGAGGATCGAGATTCCGAAATTGAGGTCGTCCCT -GAATACTGGTCTCAGCAAGGTGGTATTGATATAACTGGCCTTTCGGCATCTTACAAGTAT -GCCCTTAGAGGTTTCTTTGATTCCTTTGGATATATGCTCAGTAGTCTCTCTATTGCGCAC -AAAATGCATGGAATTCTAGACTTTGACAAAGTTGCTTTCATGGAAAACGGCAAAATTGTG -GAGTTCGATAGACCCAAGGCGCTTTGGTGGAAAGAGGGCTCGGTTTTTAGGGCTTTGTTG -AAGGCTTTTCATCCCAGCTCTAAAAGATAGACAGGTGGTAATCCTTTGCATATTTTCTTT -TTTTTGGTAGTTTAAGACTAGAGAAATTCCTCAGTCTCAACCCGAGTTATAGGTCTATAT -GTTCGCAAAGCCCTAGCTTGAAGGGATGAAGATATCTAATGCTATATTGTGAATAGTATC -GATGAACCTCATGCAATGGTAGAAAGTTGCATTGCCGAACCTAGCTACCGCTGCTAAGGA -CGGGAAATCACATGCTGACCACCTGGCTAGTCACAGGATCTCTGTGGCCAGGGCTATCTG -TATCACACATACCAGCCCTTATATCACACTGCTATACGTAGAGAACCGTGGCTATCATTA -GTTGCTATTATTGATTTCGCGTCTTAGTAGCTGCTTATGTCAACGAGGTTGAGTTCTTCC -TATCATAGCTCCGGGGATCGAATCCCGCAGCACGGGCGAAAATTTTTACCAGCATCGAGA -CAATATCTTGGGCATTCTGGACCCAGCTATACGTCTTTTCAATCCACTAAAAGATTAATT -AGTTATATTCTAGCAAGTTTCATGATGATTTATTACCTCCCCATTTGAGCGAATCCGGAT -AAACACCCACTCGTAACCGTCCGTCGCAATTCCGTACACAGACCTGTCTCGGATCTTGGA -CTGCTTTCTTTTCTCGTGGATCATCGCTATTCAGATTAGTGGTAAGTCACGTATAATACA -GATCAAGATTACTCACCCATATACGCGAGACAATGCAACATGCCACTTTTGAGCAAGTGC -GGTCTTTCCACTTCAACCATAGCCACATTGGTCGCATGTTTATTAGGCATGCCATATGAA -AGTGAGTAATCAACGATGCCGAAAAGTTTCACTAGTCGATTCTCGCTTTTCCAATTGAAT -CTTATCTTGCGTTCAAATTGTAGGTGAATAGACTTGAGTGATACGGAAGGTGCAGTAGAG -GAAGCTGACGCAACGGAGGCATGGGGTTTTGGAGCAAACTCGCGCATCATCTTGGCCAAA -GTTGTCAAGATAATCACATTGATGCGAGATCTTATCAGTACCTCGCTTCTAGTTGAGCCG -CCGGTTGCCATGTCATAGTTTGCGAGGATAGAGTCTATATAAGTTAGCGTAGAAATAAAT -TCACATCAATAACTAAACTTACTTAAATGAGGAGGGAGCGAGACCTTTTCCGTCTTTCTG -GGTGGAAGTTCGAACTCGGCAGGTGGACAGTGAGTGAGGTTAAAGTGGTGCGGAATTTCA -ATCGGATCAAGGTCGCTAATGGCGATCGTAGGCATCTTTTCTGTATTGTGCTTATAAAAG -TTCCCGGGAGTCTTCTTTCGAGAAACAATTTGCTCCATGGCTAGAAAAAGTGTGCGGCAA -GTTGAAGAAAAAGGGGACGTAAAATATATCAAGAGGGATGAAATCTGACAGTATGCCTTC -TGTTACTCTGAAAGGCTTAGTACATCAATACTTTCCATGGTTCTAGAATATATATATCAA -GGTTTGTGAGTCTGGCTTGTGTGGATGTCTTTTTGTATGTCGTTTTGTGTCTTTCGTACG -CCTCGAATATCACTTTAGATATTTCGCCCCTTCTGTTATAGCTATAAAGATAAAATCTAC -TTCTCGGGCTTTTATATATGTGCAGAAATAAGTTCAACAGTCCGATTGTCAACCTTGGAA -AGATTCCAAGGCTCCGTTTCCTAAAAGGGAAAGTACACCGAGTCATATGAAGCACGTTTG -CTCCCGAGATCGAAGTTCCATCTCTTCAATTATTAACTTTATTGGCTTTGCTTTCCTCAA -TGTATTGACTTGCACTGTCGCCTGCAATGATTGAAGAAGACTGTGCAATTATCAAGACAA -CCAAGGGACACCGACGGGTAGTGAAGGCCTTTGGCCTCTCACCTACTAGGTGGTGGAATA -AGAGGCAACTTAACGCAAAAAAAACCTGGAGCATCCCAGATCATCTCGGTCAGTTCCAGC -TAATTATCCACATGTTTAGTATTTTGCTTGCTAACAACTAATAAGCGTGCCTGTGGGACG -ATCTTGACTTAATCACCGCTCGGGAGGGTGTTACACATCCGTTGTTCCCAATGAGAATAG -AATCACTTCTCCTCGCGCTGCTCGCAGATAAAATCAGGACAGAGTCATTTCATCCTGACG -GGCCCCTAGACTCGCCCCATTGGAGCTTTCGGAAGCGGATGTTCACGCCAGGAATGACAG -ACGACCAAATTCTAGCCCCAGTGAATATCATCGACTACGTTTTGTGGTATGGCCACTGCT -GGGAGCTCGAAACAAACATGATTGTTATGAAGACTAAGAGTCCGGTGCGTCGGAGCTGGG -CTCTTATCCAAAATATGTCGAATATTCACCATTCTCGCAAGCTAGCAGGAAGAAACGCAG -TTATATACGGTGTCATGACTGACGGGTCGAAATGGGTATTCATACATCTAAGTAATACAA -GTCGGGTGAGTTCTCAAGGGAAGCTAGGGAAATTTGTTCACTCACACCTTCCCTAGTATA -CAATCAAGGTATTCTCTTGGGATAACGAGCGAGACCGAATTATTGCCCAGACCCAGGATA -TAATCAATCAGGCAGTTGCTCTTCACAGAAAGATACTATCACGCTCGGCTCTCCCTACAC -CTACTGTTCACCAATCTAGTAGCTGTCAAATTAAAGAGATGTCTGCTCCATGCAACGGCT -CTGATGATGAGAGTGGCCGTATAATTGAGGAGAGTACCTATGAGATGGATATAGAACCTT -TGGCCTGGTAAATTACTGGAAATTCAAGTTTGATATGTACGGTGGGCACGTTGGACAATG -GGAATGTTTAAGTTCTCTTATAGCTAGTCACCTCTCTCTAGCAACGGAATCTTACACTTC -TTTCCTTTGCCTGCTTGTTTCACAGTCTCGGAGTTTCTGGTATACATAGCACATTGTGCC -TGAGGCAAAAAGTCCATGCAAACGGGTTAAACATACCTACCTGCATTGCTGTCCAATCAC -ATGATGTCTACACCGACAGACTCTAGGTCAACAGTACTCTTCCTTTCTGCCATCCTCGCT -CAACATAGAAAATTCATCTAGATGTCTATTGCAACACTCCCCACGGAGCTTGTATGGGAT -ATTTGTGGCCATCTGGAGCTGCGTGACTGGATTGCGTTTCGAACCACCTGCCTCGGAGTG -TACACGAAGTCGCTAGATGCTTTTGCGGACCGATATTGTAAAAGTATCGGCTTGATTCTC -ACTAGTCACAGTCTTCGTCGATTGGAACAGCTCGCTGCAAACGAGTGCTTCCGAACACGA -GTTCAAGAGCTATGGGTTGTACCGTCCTTGTTCGGGGGCTTTTATGAGATGGATGTCGAT -ACCTTCGAGTCCTGCACCACTTCACATAGAGACAGGCTGCCTATTGCCCAGATCAATACT -CGGCATACTGCATATCAGGCTTTTGTGGCAGATCATCTCAGCATCATCGAGTCCGATACT -CTTAACAACGTCCTCAAAGAATGTATGGCACGATTTGAGAACCTTACTGTTATTAGAATG -CAACATAACGACTCCGAATATTTCTGGAACTCAGATTCAATTGCAAAGGATGAGTTCAGG -TGTCTCGGTTGGCGCGACGTAAAACGCCAGCTTGGATTCAATCCATTCGGATTGAATATC -AGAAAACCCATACACAACCTCTGGAAGAGCAGAGCGAAGGACCACGCTATAGCTTTCTCG -GCTCTCCTAGAGGGTGCAGTTGCCTCAAACAGAAAACTCAAAAAGTTAGACATATGCAAC -GATGAACATTGTGCCCTACCCCCTTCGGATATTACACTCAAACTCACACACAAATCCCTG -CTTTTCTCCCTGGAAGGATTAGAACATCTGCACCTCTGTATCTGTATCTGGGAGGTACAG -TCAGATGATTCTTCACTCAGATATCTCATCGATATCCCTATAATAGTTGCACCAAGCCTC -AAGGTCCTAACATTCTCACAATGGGACCGGAACGGTGTAATGAACCCTCACTATTTTATC -GACCTCTCCCATCGTGTCAACTTCACCCAGCTAGTTGAACTCGATCTTTACTGGATTGAA -ATCACCTATGATACCCTGAAGGCATTTTTGCGCACCGCAATGCCGACTTTGAGGTCTCTT -GCCCTACAGTCCTTGAACTTGAGGGGTGTAGCCCCAATGGACACCGATAGTGGCCATAAT -CTATCAAGATGGGACTCTCAAGAGATGATCGAGGAGAGCAGTGCTGCTTGGAGACAGGTT -TGGAATTTCCTCGGAGATGAATCCTTGCTACGGTCTCTTTCCTTGAAACATCTCGGTTAT -CGAGGCCATCGTCTGCACTTGCGCGATAATTTGAGCAATCTCAGCGGGTCTTCAGAGCCG -AATGGGCCGATAATTGCTGTTTTTGATGCTGAGCGTGCTGGTGTTCCCTTTAAGGAATGG -GTCACTCAGCTACAGACAGGGCCACCGCGGGGACCGCGGATGTGGTATGATAGCATGCCA -GGGAAAGATTATACCGCACAGATGGCTTAATCTGATATTCTGATGAGAAATTGCTACAAT -AATTACAGATTCAGCGCATTCAAACTGTAAGCACAACTTCCACTCGACTCTCAAATATCC -CGGCATAATCATAAGCCTCGCACGGTCAATCCTTCCAGCAACAAACGCAAAACCTCTCCA -CCTGCCTCAACTAAAAACAACTCCTAAGCCCACCCTCTCTCACGAGCTTCACTTGCATCT -CGACGAAATCAAAGCTCGTCGAATCAATATCAAACCCCTTCTTCTCTCCACAAACCCAAA -AACTTCGCCTCGCGAGCAGCTCCTCGCCAAATCAATAACATCAAACCGGCCCTCTTCACC -ACCTGAATATCTAAGTCTACATACTCATCAACCTCACCAGCCGCAACCCTTTTCAAAAAA -ATGCTCATTTTCTCCCAAAAGATCCAACTAGCCGCAGCAACATGAATACCAGCACCCACA -GCAAGCTGCACAAACAAAACACCAACCCCACCCTCACCAGCAACATTAAACACACCCTGA -TCCAACTGATAAGCATGCTGCTAATCTTTGTCCTGATAACACTGTGGTTACGACAGCGCA -CGCCCGCCCGGAAGCGGAAGCGGAAGATCCGCATGCACAAAGAACGCCTCGTACGCCGTC -TGCGCGCTGAGCGGCAATTCAATACCATCTCTTTGCGCCAGAGACCCGCTGGTCTGTATA -CAACTTCGTCTGTTGTCGCGAGCGCGTATGAGGCCCGTGTTGAGACGCAGTCTGCGGAAG -TCATGCCGAAGACGGCTTGAGGGGAATTAATGAACATGGGAATATGGTTATTACTATGCT -TGAGAGGTCGTGGTCCTGGAATGTGTAGGGCTGGTGGTATGGTTCGGGCTATGTGAGCAT -TTCATGGATTATGGGTGTTGCTTTGATGTGGATTAGAAGTTGATTTGGGACTGTGAGGGT -TAGGATTGGGAATAGGGTGATCGGGAGGATGATGCCGGGTGGTCTCGGATGGCTTTCATG -TTAAGGGCTTGTTGGGGAAGGGATTTGGGAGAGTACGATGTCAGAGGCAGATTGGATGGA -TGTCTGGCTTTTATGCCGGAGTTAGAGAAGTATGGCACTACAGATAGGCAATTGCGTGCC -GGATTCAATAGTTCAAGGCATCAGCAGCTCAGTTTTGTGCTTCTATGAATGGATTGAATA -AAAAATACTATTGGATGTCATTAACTATCAATGTAGTCATGGGAGGCATGTCTTGAGAGA -AAAGAAAAATATGTGCAAAGCGCAATGCTGGCTGTACAACCTCAAGAAAATGATTCATGG -GGACATGTGTATCCTCCCGGTTTCAATTCAGGGAAAAAGAAGAGCATTTACTCGACCGAG -GGCAAGTCGGCCGAAAGAGGCAACTTGCGGGACGAGTTACCGACATACACCTTCTTTTGG -GCGTCGGAGATTACCCAGTTCTGAGAGGCAACGTCCCAGTTAGAGACGTCACGACGGGTC -AGAGTAGTGGTCCACTGAAGGGTCTGGCCAGGGGCAATAGTGACTCTGTCGAACTGACGC -AGGACCTTCGACGGCTCATCATCGCCACCAAGGGAGACATACAACTGGGGAACTTCGTCA -CCAGTCACGGACCCGGTGTTGGTGATAGTAGCGGTCACCTGGTAGAGGTCTTTGAACAGA -CCGGGGTTACCACCAGAGTTACCGCTAGCGGGGAGAAGGTCCTGGGGAGAACCATCAGTA -GCACCCTCAGGAATGTAGTCCTTAGAGTCCATGCCAAAGTCAGGGTCACCGGACGAGGCA -CGCAGGTCTGTCGAGTTCAACCACGGGTAAAGGTACTGGCGGACGCGCTTAATTCCTTCG -GGGAACAGGTTATCCGAAGCCTTGCCAGCTTCACCAAGAACGGGGGCTTTTTTGGTCTTC -CCAGTGGTCGCTTTGTATCCGTTGTATTTAGGCGAGGCAAGAGGGGTGACCTTCAGGTCG -GAGAAGGCAAAGTTGGTATAGCTCAAACCGTAACCGAACTCGTAAATGGGCTCCGTGTTA -TCCTTGTCAAAGCGGCGATAGTCAATGAAGACACCATCCTCGAAGTCATCCTGAGGAGCA -TCCGCACCGTTGTTGGGCACGGTGAGGAGAGGAGGCCCGTAGTCCTTGCGAGTCTTGCCC -CAGGTGAATGGGGTCTTACCACCGGGGTTGATGCGACCATACAGTACGTCCACCAGACTG -CGGCCGCTCTCCTGGCCTGGCAAACCAGCCCAGACTATGGCAGTAATGTTGTCATTGTCA -TACCAGTCACTAATCAGGACAGCGGATGTGCTGTGGATCACAACGATAGTGTTGTTGCAG -TGTTGACTAGCAGTCTTGATCACCTCTTCGCCATTCTTCCACAGCGTCAGGTTCTTGCGG -TCACCCTCGTTACCGTCAACGTTGATGAATCCCTCACCAGAGTCGGCGTTCACAAAAACC -AGGGCAACATCAGCTTGAGAGGCCAGGGCGGCCATCTCCTTGAGGGCCCAGTTATCAGTC -ACAGCGAAGACGTTAGTCTGCCCGTTGCTGTATTCCAGGACTTCATTTTGGATGGCTTGC -TCGGGTGTAATCAGATAAGGGAAGTTGGCAGTTCCACTACCCCAGCCCATGGCCAAAGTT -CCGTTGTCGCAGCCACGGTCGTTGCACCCATTGGCGCCATAAGCATTAGAGCCCGCATCC -TCTCCCAAAATACTGATAAACTTCTCGTCATGGGTAAGGGGCAGCGCACGACCCTTGTTC -TTGAGCAGGACAGTGCTGTCAGAGCCAATCTTGCGGATGATCTGGGCATGGTCACGCTGA -ACATTTACGCGCTCGTTGACCTTGCCCCAGCCTTCGCTGACCATGGCATGCTGGAAGTTG -TACTCGTCGCGTGTCCAGGAGCTGAAGTTGGGAGGGGTTCGGACGCGATCACGTCCAACC -TTATAATAAGCAGCCATGATTCGGACAGCCATATCGTCGATGCGCCATTCTGGGACGGTG -CCGTTTAGAACGGAGATAGTCAGGTTGGTTCCCCAGTAGGAGTAAGGGCTACCAAGATAG -ACATCACCGGGCATAGACATATCCAGACCGGCCAGAGCAGACCCGACACCGCTGTGGTGC -GCACCCCAGTCGCTCATGACGAAACCCTGGAATCCCAGCTCGCCCTTGAGCAGCTTGTTG -AGGGTGTAGCTATTTCCGCAACCGTAGCTGTTGTTGATCTGATTGTAGGAGCACATGACA -GAGCCAACCCCAGCACGGATAGAATCCACGAAGGGCCAAAGATACAACTCGTGCATAGTC -TTGTCGTCAATGTTTGAGCTCAAACTCTGGGAGATGTTGTACCCGTATCCCTGAGCCTCG -CCAGACTGACGGAAGTGTTCCTGCTCATTTCCAATGAAATGCTTCGCACAAGCAATGACA -CCAGCATCCTGCATACCCTTGATAGTTTCGGCCATCATCACACCAGTCAATACAGGGTCC -GGCGAGAATCCTTCCCAGTTACGACCGCCCTCGGGGTACTTGCCCAGAGGGCCAGCAACA -GGACCCAGCTGCACATCCACGCCCTTGTCGCGGTGCTCTTCTCCCATTGCCGTACCACGC -TGGTAGGCAAGTCGCTTGTCCCATGTCGCAGCAACATTGACACCAGAAGGGAAACCCGAG -TTGTAATCACCTTTTCGAATTGCATTAGCGAATTAAACCAATTGTGTTCGGGGCGGGCAA -TCTTACTGAAGCGTATACCCAATGGCGAGTCCTGCATGCACATTCCCCACATCCCAAGTC -GGGGAATGCTACCGGTCTCACCAACACATCGCTCTTGCTCCCAGCCGGTACCGGTTGTCA -AGTTGACTTTCTCGGCCAATGTTAGGTTAGAGACAAATTCAACGGCGCGTATGTATGCAT -CAGACCACTCGCCCGCACCAGTCATCCATGGAGAGGGATAATATGGAGGAGAAGAGGCCA -GAGGCTGTTTTGACGCGACATTATTATCAGTGATACTATTAATCGACTTGAATTGAACTT -ACATTTGCATTAACCACCGACGCTGTTAGGCCGGCCACTTCTAGCCACCCGAACTTCATT -GCTGTCAAAGTGCTGTGAAAGTGGGAGAAGAATTCACAGAGGCAGCATGAAAAGAATAAA -AGGTGTGAGGAAGGAACTAGCCACAGGGAAGGGAAATACATGGACCTTAAATGTTCGATC -TCTCACAGGGGGGGCTAATATTTGTGCACGTACTCCATATAATAGTTACTCCATAAAATG -CAATAAATCGAGAAAGTGATCGACATCAAAGTATGCACTATCCTGATCATATTTACATCC -TTTCCTATGATTGGCTATGGCTGATACTCACTGGAACTGGACTTTGGAGACACCTTGAAA -AGGGGACACAAGGTACAGTATAAATAGTGGCTGGTAAGGGATCACACAGGGTGAGAAGTC -AACCCCCTTTCTTTGCTACGGTCAACAAAGGTCATACCTCGACACAACCCACTCCTTACA -TTTGTCGTAAATTCACTATAGACCGGCTATACTTCGGCTCACAACGCTACGTTACAACGC -TCATGCCTCTTCCCCGCTAATCCTAATTCTCCGTTGGATGTAGGTCTTGGACCTTTCAAA -GTTCGGCTTCTGACTCCGTCTGGGGGGATGTTGGGGTATGCATATAGACTCACCTGATCT -CGAGAGGCGACTTCCACGATTCTAATGTGGACTGGCCAATTTCTTGATCACCACTTCTCC -TACCGAATGGGGAAACCCTTCTCATTACATCACTCCGAGATGCGAAGACCTGGTCTCCCG -TTCCTACAACACTGCACTGTGTTGTATATGGCGAATTCAAAATATTCCATTTAGTTTCAA -TCAAAGAATATGCTGTGCTCTAACAATATCGGGTCTATTTTCCTACCCAGATACTATGGG -TTAAGCCATCCGCAGCTAAGCGTTTTCGTGGAGATCGGTGCGGGGAGAGGGATGCCGACT -TTTACGGCGTTTGCGCGATTAATTGTCTACTTGGCCAAGCGCATTCTCCCGTGTTCTTTT -CGGAGACTATTCATGTAGTGAAATGGTGCACTGCTAAGTGTTGCATACAAAGTTCTCCCT -TCACCGAGAGAAAGTTCCCCCACACTTGTGCCCCGGCATCGGACCAAACCCCCATATCCC -CGGTTTTCTCGGCTTAGACCAAACTGTCAGCACGGAACACGCTGACCCCGGACGGGATGC -ACCGCTAGGCCATCTTTTCCCATTTTGCGGCTAATCATCGGTGACAGGACTTAATCATCC -TTTGTTTCAAGGCGTCCACCGGTTGGAGATCTGGCAGTCCGAGCGATTTAGTTTGGGGGA -TCGTAGCGTTGAAAGCTTGTCCTTTTTGCATAGATTTCATTCTAGATAGGTATTTCTCAT -CAGTGATTTCCGGTTTGTGATGCGACAGGTAATCTGAGACATCTTGACGTCATTAGCATA -GTGAATGCCATTCCTGTGGGTTATACTCTTGCTCGCAGAGCGTCGGACTCGGGACCTGCA -ATCGGGGATATGTTGCCATTTCGCCGTCGGGGCGTGGGGTCCGGGGAAATATCCAGGCTT -GGTGTAGTGGCCCCACATATTAGGTGGGATTTCCAGGCACAGAATCGTCCCGCTGGGGGT -CTGAGGCCGAGATACCGTGGTACGGAACAAAATGCTTGTAATTTTCAAGGCTCGCTATTG -CAGAGAACAGGGTTGAAAGCGATTCTTGGTAGTAGTGTAGTCCTCGTTCCCATTTTCTCT -GTTAAATGTCAAGACGGCGGGAACTATCAAATATCCCAACTGAATGTGGATGCTAAACCA -ATGGCAAAGTCAGACATCACTGCTGATATCGTCTAGCACTTGGAACAGAATGTGATTTTG -CACTCTACAGCCTGTGCCATTTGCACCTCCTCGGAATCCAGACCAAATTTTTCGTCGAAT -CTCATCAACACCAATCCCTTTGGTGTTGTATTGATTGATCATTTCTTCTGCAATAGCACA -TAGCGTGGAATTGAGCACATCTTCATCCGATATCGTTTCTGGAATTTGTCTCTCGGACCG -ACATCCACACAAAGCCGGGTCTTCTTCTTGCGGGGTTTGCTTTGGTGGCTGATCTGATGG -TTGCACCACTGTCGAGGCACATGCCCTACTGACCGCTGTTGGCAGTTCTCTTGTCACAGA -TCCCTCTTTGTAGACGTGAGGTACTGCCATTGACGAATTCAATATGCGAGTAGATCGACA -TGATTGAGACAGAGTACTATTTCCCTCCGCTCGCTGAATAGCAGGAATGGCAACCTTCCG -GCTAGTGTTTGCTCCCGTATCCACTTCCTGCAGATACTGCTGGACAGAAGCACTCGACAC -TCCCAGTGATCCTAGAAGATCTCTCAAATGCTTATTCTCAGCCTCGACTTTTTGCGTTGC -GAGTCGGTGCTCGATATCTTTTTGTTGCGCTCGTTCTTTGCATACTGCTAGACGCTGTTC -TAACTCATTCACGTACTCCTGCTTCCGGGCTCGACTCTTGCGCTGGTTCTCACGCACCCG -TGCGAGTCGCTCCACATTCTTCTATGGCAGTTAGGGGTCATGTATTGACTGCAACTACTT -CAACGGATGTGACTGACTTTCTGGTCTTTCGTTGAATCCATCAGTTGCAGACCCAGGGGT -ATGGGGCATGGTCGAGTTCCCTATCAGTCGAACTTTTCATATAAATCGCTCATCGGATTG -AATGGGAGTATGAAAAGCGTGGTGGCAGGCAGTTTGACACTGTCTTTGAATTTCGAGTTC -GGAGAGAAGATGCATCATTGCTGTCTGACTTTTATGGTTGATATTTGGTGGTCACGTGAA -CGTAATTTTTAGAGCTGCAAGTAGCATTACAATTTAGTTCCATATATGGTGAAATGAATG -GTATAATTAAAATGTTCCCACAGGAGGGACAGAACTCGGGATTCTAGTGGTGCCGAAAAA -GGTACGGCTTTGTGTCGCTTTGCTACTTTCTCCCTTCCGACCAAGGCTTCTATAAAGTAC -TGGACCGCCCGAGGCATAGAGAGCAATGTCATGCTCAAAGCCGTATGAGCTATTCAGTGT -TATCATGGATACTAAATTTACATATCTAGCCAGGAAAGTCTCAATCTGGGTAAACTTAGG -AGGCGTTTCAGGGCAACAAGAATAATTAAGACTGGTAAAAGTAGGATGAACAAATGTGAT -GCATACATTGTTTCTAGCCCCAGGTATATGTTACCGTTGCCTACTCTCTCCATACAGAGT -ATCACAAAGAGAAATACTATTTGAGGAGTGACCACATAATTCTTGAGCATTTCTATAGGT -AGGTACAAGCCGTTTGATGAAGTCTTGCAGGTACTATGTTCTGACTGGGTACAGTAGCAG -GCAGAGTACCTAGGTATACAGTATACTACACAGAGACCACTAATTCTATAAAGCCCATCA -GCCCGTCTTGAGGAGAAATTCACGGATTTATGCAGTCGTTTTTCCAACCAAAGCAACCAA -AGCGAGGCAAGTCAATAGCCCTCGGATGCAGCCCGCAAACGATCGCCCTCAACGTCATTG -GCCGTCAGTCGTGAGCTATATTCTTTTTCTGCTAATTCGCTCCTACTTTTCCCCCCTCAC -AGTAAATATCATTATTGCAGTCCACTCTCTCTTACTCACATTATCAAACATTCATCATGG -TCTCCACGCGACACCATCCTCGCGAATTCCCCTCGCTCAACACCGGGAAAGAGCTGTCCA -AGCCACCAGCTACCACCAACGCCAATTCGCGCAAATGGACCCATACACCTGCCGCTGCGC -TGACAGTCTGGCTCGTTATTTCAGTTCCCTTGGTCATGTGGGACACAGGCTATGTATTGC -TGCGACCGCATACTATGCCCGGTGGAAGTCTACACTCTCCGATCTGGACACCTTACGCCC -TATACGGAAAGGTTGACTATATCTACGGCTGGCCTGCCTTCAACGCGCGCAATGGCTTTA -CTGCTGCTCAAACTATCATGAATGTTTTCGAGACTATCGGTTATATATCTTACCTCTGGA -TTGTCTACCACTATGGGTCGACGATTAGAAGAGCCGGATCGCAAAAGATCACCAAAGGAT -TCACCTGGTTGCTGAAGGGAGAAAAGGTGGTCGCTGGGCGCATCGGTGCGATCGCGCTCC -TGGTCGCTTATACCGCTTCTGTCATGACTCTGAGCAAGACTATCCTTTACTGTAAGCTGC -AATAGTTGCCTGGGCTTTGTGTATTGGTTTATAACCCGAGAAATAGGGCTCAACGAGATC -TTCTCCGGCTTCGAGAACATCGGTCACAACGACCCGCTCACTCTCTTCCTCTGCTGGATC -CTTCCTAAGTGAGTTCGGCCTAGTATATTTCCACACGAAATTCGAAACTAATATTACTAA -GCGGTATGTGGCTCGTGTTCCCCAGCTACAACATCTATATCCTCGGAGGTGAGATTTTGA -ACTCGCTTGAGCTCGCAACACCGCGCCAGAAGGTTGGACGCCCGAAATCCACATAGGATC -TGTCGCGCAAACTAGGGACTAGATTGCGTACAGAAATTCGACACGTGCAATGACCATAGC -GAGTGTGCTTGAGAAAATTCGGATCATTTCGGCCAATCTTTCGGTAGGTGGCATTTAAAG -GTGGCTAGTATAAGAGAACGGAAAAAAGAATCTTCTTCTGTAATGCACTCGAGATGATCG -ATGAATATCTTATTGTGTTGGCCATTTAGATTCCTCGGGCACGTAGCTGCATGCTAGTCC -TAGTCTTCTGTCAAGCTTACGATGTGCCAGCCCAGATGCCAATTCGGCTGTCCTACTGTC -ATACGCGTTCGGGTCCATTCCGAGAACATCAGACATATTACGAGTATACCCATGAAATTA -TGATGTGTGATTTTTATGGCACGAGGGCGAGATCGTATATATATTCCTGGTCCTAGAGTT -AGCTAGTATACGAGAATAGTAGGATATAGGACTGGGCAAAGACTCCAGGCATGTAAATTC -AAATCGATTGCGTACTTTCTTCTGCTTCAAAAATTTACTCTAAGCTACAAAAGACTTCCC -ATATACAACATAGATTGTGGTGGAGTCTCCGTAATATGGACAGCGTCAATGGCTGAATCA -TTGCCAAGCCGCCATTCGCCGCTGAACCTTTCCCCATATTCGGGGACCTCCAATTTATTT -TCTCCTCAATTCAGCCTCCTGTTCGCTCCTCTCACTTTCTCGGTATATTCCACTCCAAAT -ATACCTCCCCCGCAACGAACTCGGCCTCCATCTTGGAAGCAATTTCTTTGTTCCCGCCCG -TGCCGAAGGATGCTTCGGGCCTGGGTCCTGGGGTATCTGCACCCGTCATGAACCAACCCG -ATTCCGACAAATCGCCATCGAAAACCCCTGGTGTCAGCTTTGATCTAGGCCAGGAGGAGG -CAGCAGCTCTCGACGACTTCGATACGCAAAGCCATGCTAGCTCCCGTGATCCCGGTAGCG -CACAAACAGCGGACCCCGCGCTAGGCCCAGACAACGGGACACTAGACCAACGAGACGACG -GCCAGTCCAATCTTGAACCTCTAAACGACGACGATAAACCGGCTTGGAGCGAGATGAAGA -CCAAGGCTGGAAAGGAGCGGAAGCGCCTGCCGCTGGCATGCATCGCATGCCGCCGCAAGA -AAATCCGCTGCTCGGGCGAAAAGCCTGCCTGCAAACATTGTACGCGCTCGAGGATTCCGT -GTGTATATAAAGTAACTACACGGAAGGCGGCGCCGCGCACGGATTATATGGCTATGTTGG -ACAAGCGACTGAAGAGGATGGAAGATCGCGTAATTAAGACAATACCCAAGGAGGAGACGA -GAGATATGGTCTCTATTGGGAGATCGGTTGTCAAGCCTTCCGCCTCAGTCCAGCCGTCAA -AATCACAGAAGAAACGAAGCGCGGAGGAGGCATTTGCGGCAGAAATGGAGGAATGGACTC -GTGGGGATGTCCGCCCACCACAAGAGACGTTCCCAATGACCCGTGAAACCAAACCCACCG -ATGGGACTGGCCTTCTCACCGAGGGAGCCGAGTTCTTACCATCATTGGAGATTCAAGAGC -ATCTTGCAGAGGTGTTCTTTGATTGTGTCTATGGGCAGTCATACCTCTTATTACACAAAC -CCAGCTTTATGCGACGGCTTAAAGCAGGCACAATTCCCCCTGTCTTGATTCTTGCTGTCT -GTGCTGTATCTGCACGATTTTCCACACATCCTCAGCTCAACTCGGATCCGCCATTCTTAC -GTGGAGAGAACTGGGCTAATCCCGCTGCGGCCATCGCCCTGAGCCGGCACGACGAGCCAA -ACATTACTATCCTTACGGTTTTTTTACTTCTCGGTCTTCACGAGTTTGGGACCTGTCATG -GTGGACGAAGCTGGTCATATGGAGGCCAGGCATTACGAATGGCATACGCGTTGCAGCTCC -ATCAGGAACTTGATCAGGATCCAGTACTCAATCAGAAGAATGGCAACGGCTCACAGTTGA -GCTTTACTGATCAAGAAATTCGACGCCGCACTATGTGGGCATGTTTCTTGATGGATCGAT -ATAACTCTTCCGGAAGTCAACGGCCCCCTATTGGGAATGAAAAATTCTTACAGATACAGC -TTCCTATCAAGGAAACTCACTTCCAGATGGAGATACCTGGCCCAACAGAAGATCTTGACG -GCGATGTCCTTAATCCCGTACCTGAAGATGCTGGCCAATTATTTAATGCTAAAGAAAACA -TGGGTGTCTCGGCATACATTATCCGTGCTATCATCATATGGGGGCGCATTGTTGACTACC -TGAATCTGGGTGGGAAGAGAAAGGATTCACATCCACTTTGGCATCCAGACTCAGGGTACA -TGCAGCTTAAGCGGCAAATTGAGGAATTCTCTGCGTCTCTTCCGGTACCTTTAACCTTCA -CCTACGAAAATCTTCAAATCCATGCATCAGAGAAGATTGCAAATCAGTTCATTTTCCTCC -ACATCATCACACATCAAAACATGCTGTTTCTCAATCAGTTCGCCATTCCCTTGTCTCCAG -GGGGGCGACCGCCAAGAGATATGCCCAAGTCATTTCTCAGTAATGCAGGCCGAGCCGCAG -TGGAGGCTGCGCATCACATTTCAGTGCTATTTGACCGAGCTTCGGCCTATCCCCTCACTG -TTCCTTTTGCTGGGTACTGCGCGTACTCGGCAAGTACAGTCCACATCTGGGGGATCTTCT -CAAAGAACGTCCAGCTAGAGGCACGCTCAAAAGAGAACCTCCGGCATACCTATCGCTATC -TCAATAAAATGAAAAAGTACTGGGGAATGTTCCATTATATGGTTGAGAGTGCCAAGGATC -GGTATCGGCTGTTCGCTGATGCAGCTATCAAGGGCTCTGTGGCAACTGATGGCGCATCGC -TGGCACCAATGTTTCAATATGGTGACTGGTTCGACAAATATCCACATGGAGTGTCAAGAG -TACACTGGGAGGGACCCGAAACTCGACACAAAGAAACGGGCGAAGACGCAGTCATGGGCC -AGAAACCCGACCTCCAGAGCGTGGAAGATTTCTTTGCCGGCCTTTCCCCCACTCCGCAGC -CAGTTCAACCTCGCAAGTCCCGACCTCGGAAGAACAGCAAGCTATCAAATGGAGCACCGG -GTATGGACCAGACTTCACCGCAGTCAGTGATGGAATCCATTGTAGGAAACGCCCCAGGTG -GCCCAGACAGTGCATTCCCACAGCCATCCATGTTCCAGCAATCCCGGCCAATGCCATTCG -GTCAACCACCATTCGACTTCAGTATCCCACCAGACCAATTACCGCAACTAGATCGGCAAT -TTGTCTACGGTTCATTTGCTGACTTCGACCCCGCCTCATTTGTTCCAAACAATCCTCCTA -TCCCACCGCTCAACGGCGTCGAAACTCAACCCCCAGATCAAACACTGTTCACTGGCCACA -TGGACCCGAGTGCCCCTGCTGGAATGGGCGAGTTTTACCAGCCCAGCGCGTGGTTCCTGC -CCTTTAATCTTGACCCTGTTGGCGGCGGAAATGCACCATCACAAGCTCCGCCCCCGGCTT -CCATGTCTGGGGGAAACAATGGTGACGACACGTCATCTGGGATCCCTTCTGTGGATATGT -CGGGCTTTGGGGGTGCCGGAGGCATGTCTTTCTAAGCGCATGTGATCTTGGTATAAGTGG -TTTGAATATGAAACCCTCATAGCTGTGATCGGGTCGGGTTTTATTCACTGGCTGCTTTCT -CAATGACAGCTACGTGGTGTTTCAGTTTTCTTTACAATTATTGCAGAAGGTTGGAGTCGG -GTTTTAGGGGTGGGGTCTGTTACTTTCGTGTTACTGTTCATATCTTAGGCACATTTGTGT -TGGTTTCTGTAAATAGTTCGTCTCCTACATTTTCCTGAGCAATTTTGAAAAATACCCATG -TGTAATTGATTTTCCATTGCAACCTTGTAGACGATCCCGACCGAGCTCGGCAAAAAAAAA -CCTCCCCCGATTGACCGACCCTACTACCTAATGCCAAAATACTTCAGCAAGTCAAATTAC -CAAAAAATAAATTGTTGTGCATCATCCGTGAATCGAACACGGGCCTCGTCGATGGCAACG -ACGAATTCTACCACTAGACCAATGATGCTAGTTGTATGGTTAACTGTCCATTGTGGGGAA -TATATCCGCTTAATATCTATATCCACAGTGGCTCGAAGTATGAGATCAGCATTGAACAAA -ATCCTATCAATTATCTGTTTAATTGAAGAAGGTGGGTATAACTTGAGATAATTTATGACT -TGGTCCTGATTATGCTCGCATTTGAACCTTCTGTATGTCAATCTAGATCCATGAAGATGG -CATATGATATATTGTAGCGTGGATATATATCCCGCCGAGGTATGTTAAAAGCCACAAAAT -ATGTTCGTGGATGAACAAAGAAAAAAAAAGAAAGTGCAACACAGATAAGCACACAAGGAG -AATAGCATCAGAATATATTGTTTGAACAATCTGACAAATAGAAGTGGCCATAGCTGCCAC -TGGATAGAAAGGATTGTTGGGAAAGGGTATGGGAAACACAAGTCGCTATAAGACAAGGAA -AACCAGAGGAAAGAGACAGTGGCTATCAACTAATCATCCCACTCTTCATCCAGTTGATCT -ATGAGCCTATTCTCGGCATCCACATCACGCCCCTGACCACGTCCTCCCTGCCCCCCTCGT -CCTCCGATGGCGCCAATGAAACCGCCGCGGCCACCGGAGTCCGCGCGGGTGTAGCCATTA -CTCGCGGGGGATCCCTTGCAGCGGAGAAGATTCCCTAGCGAGGAAAAGATGATCACGAAC -ATGTCCTGTTGCACAGAAAGAAAGGAATGGTCAGCGTCATGCTCCAACAATGAGGCTCGC -AACCGCAATAGATTTAAGCACATGCAGTAGAACATGTAGTCCATCTAATGAGGCAAGATA -AGTAGTGACTATGCAAAAACTGTCTCGTATCCAGGAATCCCGAAGATGAAATGATAAATC -CATTTACAAGGGTCCAAGGAACAGCGCATTGCATAAATCAAAACATCTTGCCAAACATAT -GAAAGTGTACATGCCATGACCAAGGCAGGCGGTCGAAAACCCAAAGAATCAAATTCAaag -gaaggaaatgcaagacatgaatgaccgatgaatgataagaaccaaggaaatgaatagaaG -AGGCCAAACGCAACCGTATTCTCGAAGTGATTCGTCTCCAGACCAGCTAGAGCGAGCTCA -ATCCATCCTTGAAACAGGTAGTCCTGCCGCATTTGAATCCCGGAACTTATGTGACACGAA -ATTGCCATATCTGAATCTGCGACGAGGCCAACATGGGATCAATGCTAGGATACTTACTTT -AACAAAGTCGAAAACTCCTGCCCACAGGCTGAAGTTCGGACACTGGCGCCAGCCACGTTG -ATGCATGACCGTTCGCTGGTAGGCGCAGCCACCAATCAGATATACGGCGATGGCGATCAG -CATACTAGAGCACAGAGTCAGCATGGATTATATTGGGTCTTTTCCCAGTAGATAAAAAGT -ATACTAACATGACACCAAATACGCCGCCGGGTCCTAAGCCCCCAGCGTTGGGGTCCGCGG -CGATTCCACCACATGCCGCCGAGCTGCGAACTTCGAAAAAGTACGTGCATTCATCCATGG -AGCCCACGAAGGAAGGTGTAGCGACATTTGGCGCAGCGTCGCGGTCACAGAGGAAAGACA -TGATAGTCGATTTAGTGCGTGTAGAGACATTGGAGCTGCCCATGTTATAATCCGGACAAG -GCGAGCCATTGGTATAATTGAGGACGAGTTTGCGGCCGCGGAAGAAAGGTTCAGATGCTT -GTTGCCTGGATATGATTGAGAACCGTTAGCACAATAAGCAGTGAGTGAGTCCCAACAAAT -GCGATTAGTTGGAATGGAATTCGACCATACCCTATGGAATACACTTTATCATCCCGCTCG -TAGTATGCACTGACGTTCTTCCACCGCGATGACTCTATCCCGACCACGTCCGTCACATTC -TCAATCACCGGTGAGCAGATGTTGATTGTGAAATTGGCTGGGTAGTCGTGACCCCTGGCA -AACCAGCTGCCTTCGCGCGCATCTTTCAAGAGTTTCTTGCCGTCCTTCATTTCGGGCGGG -GACAATGCGATCGCATTAAGATCAAAGTACAATCCTGTAGTCGGGGAGCGGGCAACACAT -GGAGCGTCATCTTTCAAATGAGTAGAGTATGACGCGTTAGATAGTCCGAGGATTGAGGAT -AGAAGGAAAATGGAGTAATGAGCGGAGCTGGTGAGCTTCATGGTGGGCTATGTTGGGGTC -AACCAAGGCAGTTTAGAAAAAAAGAAATTGGACAAGGAGAGACTATAGAGAAAGAAAGGT -ATCTGCAAGATGATAGAAATGACATGGATGATGGAAGGTGTATAGGTGTAGTGTTCAGGT -ACCTAGGCACTGATTGAGGCTTACGTAGCATCGCAGCTGTGATTATGCGGAGCAAATCTG -CGGACATCCACTATCCGACGTCGTACGCGATCTAAGGCTTCAGGGTCTTATTTAGGGATC -TCTCCTCCCCCAAATTCCTGGATTCTTGCTCTATTCCTCGGCTCCTTCATTATGTCTACC -AAAAAGGAGTCTCAAAAATTACCTGTGCAGCCAGTGCCAGAGAAACGCGGTTATGAGTTT -GGTGGACCGTATGTTTTTCCCCCTTCAATTAAACTAAACTGAGCTGACATTTGCAATAGA -CTAGGTGCTTTTGGTATCATCTTTGGATTGCCGACCTTGATTTACCTCTTTACCTTTGTC -TGCAACGATATTTCCGGTTGTCCCGCGCCGTCGCTGCTGAACCCCTCGACTTTGTCGCTT -GAGAAATTGAAGCATGAAGTGGGTTGGCCTGAGGAGGGAATTACGGCGCTTTATGATACC -AATGTTACACTGTGGACCCTAAGCTACTATGCCTTCAGCCTGTTCTTGCAGGTCTTCCTT -CCCGGCCAGGAGGCAGACGGTGTTGTGTTGGCTTGCGGAGGACGGCTGAAGTACAAATTT -AATGGTATGGCAATGGCTGTCTCTTCTCGCACATCGCATCCAAACAAGATGATATTTTCT -GACTCGTTCATTCTATAGCATTCTCTTCTGCTATTATTATCCTCTCGGGCCTCGCCGGAG -GCACTTACCTCTATGGAGCTGATTTTGTGGTGTGGACATTCCTCTGGGACAACTATGTTC -AGGTCATTACAGCGAACTTGTTGATCGCTTCTTTTGCTGCGCTGTTCGTCTACGCAAAGA -GCTTCACAGTTCCAGCCCCAGGGCAGGCAAACTCAAGCCTACGAGAGCTGGCACCTGGCG -GCCATACGGGCAACATGCTCTACGACTTCTTCATCGGCCGTGAACTCAACCCTCGTGTTC -GCCTGCCCATTCCGTTTGTCAGCGAGGCATCACGTACCCTTGATATCAAGGTGTTTATGG -AGATGAGACCAGGCTTGCTTGGATGGACAATTCTAAACTTGTCCAATGTGGCCCACCAGT -ACCGAACCTATGGCTATATCACTGACTCGATTGTGCTGGTCACTATCTTCCAGGCATTTT -ACATCCTCGATGCGCTGTACATGGAGCCAGCGATTATGACCACCATGGATGTAATCATGG -ACGGATTCGGATTCATGTTGTCTTTCGGCGACGTGGTTTGGGTGCCCCATCTGTACAGCA -TCCAGAGCCGGTATCTCTCTGTCTTCCCCTACGAGCTTGGCTTGACTGGTATGGCCGTTG -TTTTGGGTGTCACGGCCGTTGGATATTCAATCTTCCGCGGCGCCAACAACCAGAAGAACC -GTTTCCGCACGAATCCCAATGACCCCCGCGTGAAGCACATCAAGTACATCGAAACCGCCG -CTGGATCTAAACTGATGATCTCCGGCTGGTGGGGTCTGGCACGTCATATCAACTACCTGG -GTGACTGGACTATGTCGTGGGCATACTGCCTGCCGACTGGTGTTGCGGGATACGTTCTCA -TCGAGAGCATCAACCCAGCCAGTGGCGCTGTCCAGAAGCAAGCTGTGCAGACCCCGGAAA -TCCGTGGCTGGGGTATGATCTTTACATACTTCTACATGCTCTACTTTGGTATCTTGTTGA -TCCACCGTGAGATGCGTGATGAGGAGAAGTGCGAGAAGAAGTATGGTGCTGACTGGAAGC -GTTATACCTCCATCGTCCGCAGCCGCATTATCCCTGGGATTTATTGAGTGATCCCAAAAA -CTCATTTTACGGGCCTGCGGATTCTGGCTTTTGTACATTAATCCCCAATATTTCGGCGAA -AACTATTATGATAGGCTTGCATTCTTTGGATTACAATCTTCCAAAGTTGTCGCAGGATAA -TTTGACAAAACAAGATAGAAAATTTATTACCACGAGACCATTTTTAGATATGAGTATCTA -CATCCAAGATTTGCCCATGCCATTCATTGCGTCACTTACTGGAAACCAAGAGGATTGAAT -CATATCAGCTTCGAACTATTCCTCGTGGATTCCATGACAGTATCCATCAAGCGGAGAAGC -TATAGGAACAACTCTTTAAGTCGATGTCAAGAGTGGATCGCAAGTTCGAAATCAATAAGA -TGACCTAACAAATACAAGATATGAACATTTCCGTTGGTCTTTCATCATCTTCAGAATCTT -CAATATCGCCACCTGCTTTCCTAACAGAATCAACCATATGAGCATTGAGGTCTTTGACGA -AGCGATCAATCTTTGTCTTGAAAGTCGGCGCCAGATCCCATAAGTAACTTATCATCCACA -TTCCTAGAAACGAAGACGGCAAATTGATGACGGGTCAGTCGTTCAAGTCGCATATCCTGC -CGTTGTGCTTTTGTTTTTTTTGAAGATATGAAGAGTTGACAAGTAGATCAAATCCTTGGT -CTCGCATGCGTCGTAGATTCGAGGTCCAGTCAGTATCCGGCATTCGGTGTCAGTCAAGTA -CCTGATGAAAGGAACAAACATATCTCGGAATCCATATTGACATCCTGTGGTATCGATGAT -CCATTTTTCGTTGTTGTCGAGTTGTGTTATCTTCAGCACCGTGTGTGGACCACCGTGTAA -ATTAGGACCAGGAACAAGCCTAGCTGGAACCCGTGGTCTTCCGATATTGACGACTATGGT -CTCAATTCTCAGTGGCACACCTGCTTTGTTTTAGCCATGGCTTTATATGGTACTTAAGAG -CTGCAAAGGCGTTTTACCTCGAAGGAGTTTTCTGGTTAAAGGTCCCAAAAGGGCCATGGC -TGCTGTAGATTGGCTTTTAACAAGAGCTGCTTCCTTGTGCTCGATGTTGTCGCTCATGTA -GTTTGGGAAGGGACCCCGTTTGAATTGAGGCGAAACCGTCCTTTGATTTTGATGGAGATA -GAGAACTCCATCTCGATACTCGATCCTAGTTAGATCTACGTCGTACACGGTCTCCCTGTA -TGTCAACATAGCTGCCTTCAGTGTCTGGGCAGCTCGGAACAGTATCTTTCGTTGTCGCAT -GCTCTTGCAATAGTGTTTGTGCTTTGGCCAGCACATATCTGGTCGTGGCTGCAGTAAAAA -AACAGCCCGCGACTCGCCGCTTTGGTATACGGGTGCATTGGAGCATCCCAAACGCATTTT -CCTAGTTTTTGTCCCACATTTCGCGCACTCGACTGAAGGCCTAGGAGCCATGTTGAAGGC -AAAAACATGGGCTTCGACCACCTTCAAAGCTATGTTGAAGACACGGGATATCCAATCAAA -TTTTGCCGAAGATGTGAGGTCGTAGATTGACAAGGCTGGGGACTAGAAAGTGGGGAGAGA -TAAAGGAGAGTGGCTAGAAGAGCGAGAGCATCTAACACTCAAAGGTTGAGAAGAATCCTG -AGAGTTTTGACCTTCCCAGTGTTGACAGTCCGTTCGACCCATGGAGGTCTGCCTGCTTAG -GCCGCAGTTTGTTTGGTATGTAGTCTCACGGTCTATCAAAGACTGATGTTAATCAGGAAC -AGCAAACCAGTCCTCTTCTCCCTTTGACAGTCCTTTCCTCCCGACATGATACTGCCCTAA -TTATCTCTGGCTTCAAATTTAAAGTACTCTGTACGGTTTATGGCTTTGAGCACTTTCATA -TGAATATCTCAAAAGATATCGAAAAAGAATGGCCGAAGACCACTATAGATACATCACAAT -GCCGCATAAGCGGCCAACTATTATTGTTGTTGGTATCTTGAAATGGAGATACAATATGAA -CACTATGTATATGTAAATGGCCTAGATTAATCTTCATCGAGCAAACTTCCATGCTCAGGA -CACAATCAACGCTTTCGCTTTTTGGGGTCAACCAAAACATCTAAGCCCCAATATTCATCT -CCTGAATTAGGCACCCATCCCTTGAGAACCAGTTTCGCATCTCCGATGGCCTGCATACGG -ATGTGCAATGGGTCTTCCAAGCTGCTAGGATTTTTGAAATCCTGATAGGTGGTTATATCT -GGCATGTGGTGTTGAACTGTATTGACATGCGGAGCTTCAATTTCCGCAGCACCATCCAGC -GATTCGGCTGGGGGAAAATCGGCTGGATAGATACGAAGAACCGAGGCAGGCGAAATTTCG -CGAAAAAGAGCCCAAAGCTTAGCCCGTGATAGAGACGAGGCCCCACGGGCACCAGGCGAG -GATGCCCGGTTTCCTTGCTTCACTCCGTTGATGATAGTCTCATACAAACCGGTTTTGGAA -CGGCAAAGAGTTGGCACTGACTTGCTGCCGCTATCCATGGTCATTGCCGATGGGTGGTGA -TACGAGGGAGCCAATATCCAAACGGCGCTCACATTACCTGGTCGGGACTTGCTAGATTTT -GCACCAGGCGTTGAAGGGTCTCCTTCCTGCTCCAAAGATCTCGCAGGACAAGGCACGGAT -TTTTGCTTCGCAAAAGGCCAAAGTGTCGCCACCTCTTCACTTGGCACCGAAAGAACTTGG -AATGACCGGAATCGATACCCTGGCGTGCTTTCTTTAAACTTTGTCTCCCTAGGCCAAGAT -CGCCCACCCAAAGCCTGCATCCGGCCGTTCTCGCCAAAGGAGCGGTCACAGCCAACTTGA -CTGATCTCATCCTCAGGAAGAATCAGACCTGCGAGATAGGCGTTTTCGGTAGGGGCAACG -AGGCGACTGGTATTACAAGACAGCAAGGAGGACACCTGCCGCAATGCAAGCTTGTCTGAG -CAGGACTTGCTGCGTGTAGACTCGGCGTCCATGCGAGCTGGCTTTCGACGCACGACTCCG -AGAAGAGAGAAGTGGGCCCGGCCATCTAGAAGGGTCTCTGTACCGGGGACCTCTGCGTCT -TGAGATTTTGGCAAAGGGCCTTCTTGCTGTAAGACTTCCCACGGTGTTGCATCTTCTTGG -GCTGCCATGCAGAGCTCCATACTTGCGTCGCCGCATGGAGCACATGTGCAATACATGTAG -ATATTAATGTCAGGCTGAAGCTCAAAGGGCGGCCATTCTGTGGATAATTTGGAAGTACCA -TCTTCATTCTCTTCAGTCTTTCTCCGTTGTATGAATGGAGAGCCTGAGACTTTTTCATTA -TCGGGGGTGGAAGTGTCATGGCCATTTGGGAATTGCTTTTCCCGAAATAGCACACTGTTA -CATTCCGTCAAAAGCCAATAATTGAATGCCCGGATGGCCAATATCTCGGCATGACAGTCA -TGTAGGACAAGGCCCCTGCAGCGTGGGATATGAGATGCCGACAAGCATTTTGCGCCCGTC -CTACATAAATATCAGCTTGCTTGAATGTAGGTCGCTAGCATATATGCCTTGGTCAGGTTT -GCAATCAAGACTTACGTGACAGATATGCAGGTCAGACTTTCCGAGGGTGTGTTTTCTCCT -ATTAGCGAAGTGACAAAATGTTACTCTTAGAATGAAAAGAAAGACATGTGAGGCGAAGTA -AAGAGCAAATAATGGATTGACCCCCTTCGCTATCCTCATATAGACAAGGAGTGATAGCTC -TTCCATAAATAACTCATCCAAAAACATACCTTTGACAGCCACAATACCAGTCATTGGAAT -CCATTCTCTTGATCCATCCGGAAATATGGTTGGTTTACTGCGTGTTGGCAGAGCATCGAA -ATGTGCATGAACCAACGATGCTATCCGGGACGGTAGCGACGACCCAGATATATCAGCAGA -CATTGTATATGATCATAGTCGAACTAACAATTTCTTTAAAACAACTGATAGCTTTGCGAA -AGGTGAAGAGATAGGATCGAAGAATCCAAGATGCTATACGATCACGTGAGACTATATATG -TGATGCCTGAGGCGTCGGTTGTTCTCTTTTGTCTTTCAAGCTTTCCGCCATTCTTCTCAA -CCCAACGTCAACGTATCTACTATCCATCATGGCCGACAAGCTCCGCACCATCCAGAACCT -CGAGGCTCTCCAGGCCCGTTACATCGGTACCGGGCACGCCGATACCACCAAGTACGAATG -GACGTCCAACATCCTGCGTGACAGTTACTCCTCCTACGTCGGTCACCCTCCTCTGCTGTC -CTACATGGCAGTCGGCATGGGCGAGCCCAAGGAGAAGGTGCGCGCGATGATGTTGGAGAA -AATGGTCAGAGGCGCTGGTAATCCGCCAGAGGTATGTGGTATTTTGCAGAGTCGCTCATA -ATATAAAGTTGGCTTGGCTAACATGTGCTCTCCAATGATTCAGACTCAAGAGTAAAACCA -CTCAGTGTCGTTCAATCTTGTCTTTCTTGATACCCCTTGTCGTGCCCTTGGCTTTCCCAA -CAGGAAGGCAGGTCCTTCGATCCTCAGACCACCCCTAGTCCAGCCCAGCAGCCTCGCCGA -CATCAGATGATCCCCTCTGTGTACCGGTCTTTAAACTGTCAAAGCCCCATTCTCGAGATT -TCGCTCGATCCCTTCGATACGGAACAACACTTCCAACCCAACATACGAACGCTTTATGCA -AGCAAGGCCTCTCTCCGGGGATTGGTATCCCCTTGGATTGCCATTTTGGCCCCTGGAATT -TCGAGCATCCTCCAATTCTTGTTAGTCTTGTTAATCAAGAGTCAAGCTGTAATTCACCAT -GCCTACCACATGAATTATGCGCTCAGCAGCAAGTTCTAAGTACCCAACCAGCATCATGGT -GGAAGACTTGGTATTGATCAATGGACATTCAATTCCCCAACAGCATATACTGCATGGGAG -AAGCTGTTCCAGAGCTGTATCTCGCTTGATTGGTCATGATCTGCAAATCACAATCTAATA -TAGATGTTTGAAGCATAAAGATATCCAAGATCAAGATCTATTCCCATGCATATTGATATA -GTAATAGACACAGGACAGAGTAGTTATATAAATCATGGACAGATTAGAGCCCAAACAAAG -CTCAGAACAAAAAATAGAAGAGGCTATCAAACAAGCTATGTAGGACTCCGTATGTATAAG -CCAAATAATTACGCTCTAGCCACCAATTAAGCACCCTCCGACTTCGCCGCCAACCTCGTA -TTCTCCACCTGCAACGAAGAAACAAGGTCGATCCACTTCTGGCGCTCGGAGCGAAGCTGC -TTGAGCTCCTTCTTGAGATCATGTACCTCGTGCTTCAAGCTATCCACCTCTGACTTGATC -TTGGACTTAGACTTCTCCAAGCGCTCTGCCTCCTTTCGCTGCTTCGATTGAATCTTCGAC -ACAAGTTTGCGCTGGCTAGCTTCAATCTTGTGCAGCTGCTTAACAAGCTTAGACTCCCCA -TTGAGAAAGTGAGAGGCTGCTTTGGAGAGCGGCGCTGGCGGTTCGGGCGTTGCCGTCCGT -GTAGATGACTTGTCTTCGTTGTTGATCTCGCTCTGCACTTGGCTCTGGCTCTCTGGTGTG -CGAGTGCTGGGGGGTCTGGGGGACATATTCGCCCCTACACGCTTGCGCATCTCGTTAGTA -TCGCTGTCTACCTCACTAATGCTTTTCAGTGGGGTTCCGGAGACGCCAGGCGGCTGTGAT -GAGATCTGAAGCTTGTCGAGTTCGCCGCGGACTTCGACTAGCTTGCCTTCGACCTCGCGT -TTGCGAATGGCTAGCTTTGCGAGATCTTTCTCGTGGGAAGTGAGCTTGCTATTCTTTGTC -ATTTTGATCAGCTCCTCCGAGGACATGTTGTTGTGCGCTGCCTCGTCGATGGAAGTAGCT -GCCTGTTCGGAATTGATCGAGGAGGAAGATGAAGCCTGGTCCCGTTCATTAGATGTTGAA -GATCGGGCATCAAGCGTTGTTTGATCTTGTTGAACACGTTTTGGGTTTAAATGTGCCACG -CCATCTGCATCGATATAGGAAACCTCATCCGCAGATGGGAGATCACCCGATACCTCATGG -TGGGCACCATAGCCCAGAACATGTGGTGCGAATCGTTCTAATCCAGTACTGAGCAAGCCA -GTGACACTGGCAATGAGACCATGATGCGAATGCTCGCCATCGCTGTCGAGTGATTCGTAA -TCCGAATCCGAGTCATCTGCCTGGTCGGAATCGTATTCATTCGGAATCTCCGCGGAGTTA -GCCTTGGCGCCTGCTAAATTATTCGCAGCTTCCGTATCTGTGCTTGCTTCTTGTTTAGGC -GGCTTGTCATCTTGGACAGCCCAATTGATGAATTTAGCGGCGTCCGTCCCCACGCTTCGC -GGTGTTCCCTTGTCCACCATCCACCGTGGGATGCTGCCGCCCGGGTCACTCCTGGTGACC -ATGATCCATTCAACAGGATTCATTTCTTCGTCTTCACTATCCTCATCTCCATCCTGTTCA -GGTAGCCCCTCAGGGCGTTGCAGATTGGAGGATGGAGAACCATTACTATTCTTAATTCCC -TGGTGAAGGGAGCTGCTTCCACTGCTGTTTTTCTTAGGAATCTCCCGAATGAGCTCGACA -GACTCGTACTCTCCGCGAGTATACCCCGGCTGATGCTGGATATTGGGGTGGTCACAAGGC -TTGGAGATCATTATCCAGCTGCGACCAGGCTGCTTAGAACCCCCAGCTTGCAATCCAGAA -TCGGAATTGATGACCAATTCCACAAAGTCACGCGGAGCAGTAGGTTTGGGAAATTGCGCG -GAGACGTGATAAACAATTAAATCTCCCAGAACCGACCCGTCATGGTCCTGGACCTTAATA -GTCTCGATCTTTTCTTGGGCGCCGATCCCGCGGATGCTCTTGTCGGGCGTCTGTCCCTTC -TCGATCTTCTTGCGGTTTACTTTGAGCGTTTCATCGTACTCAGTTGAAAGCTTCTTCCGC -CACCGCGAGAATGGAAGGCCCTCGTGCACACTCCGTCGGCCGAACCATGAGCCTCCGCCG -TCGTTCGCAGAAAGTTTCCACACAGTCACATCCAACGGGTTGTCCTTTGGTCCACCCATC -TTTACTGGCTTCCCCCATTCTTTTTGTAGTTCCGTGATCTCAGGGTCGGTTTCTTCCACT -CGTGCGGATGAGGGGACAATTCGTCGGCAGGAAGAATCTGCCGGAGCATCTAAGCCGGGG -TATTCATCGCTGTCGGAAATTGGGGGGTCTGGAAGTGACTCCGCTATCAGGCGGCTGTTT -TTGAATATCTCGCGCACGTAGCCACGGAGCTCGTCGGGATCTTGGGGGACTTCGTCCCAT -GATGTCGGCTTGAGGCATTGGAGGGCCTCATGTAGAGTATCCATAGGTGCCGTAGGGGGG -TTCAATTGTTTTTCAAGTCTCAGACCTCTGCGATGTTGTTTGTAGAGTCCAGCAAACTGT -ATTGGTAGTGTGGTGATGTTGTTTCGAGTGGTAGGGTTTGTCAGTTCGTGATTATGTCGG -AATCAGGTCATGGAGATGGGATAAGCAGTGGCGAACAACAAATTATCCAGAACTCAAGAT -TGAAATTTCTAACAATGCTCAGATGGCTTGCCCAGACGGAACCGCGACGGAACTGTAACT -GCTCTGTAGAAAGGCTTCAAGTTATAAATGAAGCGGCTGGTGGATTCCAGGTAAGAGGCA -AATGCAACAAATCTACAATGCAAAGGGACAATAGCACAAAAAACCCGAATCAATACGCAG -GTAGAGGTGTTAAAGGATGTGAGTGTGAGTATCAATCAAGTTGGATATGATGTAGCCTGG -ACAATGTTGTCTTGGCGGCTGACGTCATACATTGGGGTAAAATGCGAAAGTTGTGTGGAG -TTGATTGATACCTAGTAATGATCCTAATGTATAGATTTACTTTGACATTGAGTCTTATTT -CAAATGCTTGCCATATCTCATACTGTGTTTCTTCGGAAAGTAAAAAAGATACCCAGTTAC -ACATACAAATTGATAGGAAGAAATTGTGTACGCACTTTTTAAAACTAAGCTAAAGTGATA -CATACAATATTTGGACAGAGCTGCTGATAGATCAACAAAAAAACCAGGAGAAGAGAAGCA -GCCAGCGCAAGGACCAGCCCCTCGATCAGGCAGATCATAAAGGAAGGTATCTGTCAGATC -GAGCATCTGATTCTTGTCGTCGGGACAAAGAATGAAGGGCTCTTCCATCCAGTAAAAGAG -CCCACCGACAGAGCAAGCTTGTCCAGGAATTTCTATAATCCAGCCTGGACGATAGGAGAT -GAATCTATCATTGCCAATTGTGGTCATGGGAGGTGAGTTCGTAGCACAAAGGAAGGAGAT -GACTGCCGCTCAGACCACTCGTCGCTTGGGGCGGGATGTGGAGATCCCCAGGTCACAGAC -CAAATGAGGAAATCCAGTGCATCGCCCTTTGCGCTCGGTATCTGCCACAGGCGGATGTAC -TCGAGTCTGACGAAGGTTGAGGGCAATCATCTCTCGCACTCGTGTCCTTGATCTTCACCG -CCTCGAACCAAAAATGACCGAAAATGCGTGCTCCAACTAAGCATCACAGATGCAAGGAAG -GTGACGCTGTCGCATAAACCTATCCCCCAATCCTCCAATAAAGAGAACTGGCCAGTCCGG -TACCCAGTACCAGAAAGACAGACCTCGTAAGAAAGGCAAGAAAGCAAAAGCAATCGCTGA -ACCCAGTCCAGTCGAGTAAGCTAAACTTGCTTCCTTCCCTTTCTATCGAGCTGCGACAAA -AGAATCGAAAACGTAAGGACATTCCACGTCCAAATTCATATCCTCCACATGCCGTGAGGC -AGACCAGTCTTCATGACATATATGCGGGAATAACAAAAGCAATGAAACATAACAGAGAGC -CGACCCGCCGTTTGTTGATGCaaaagaaaaaaaaacaaacagaaaagagcaaataatcaa -aCATCGCGGACGCTGGAATCGAACATCGAAAATATCAAAACAATCGCAGAAAAAGATAAT -TCTGGTTATCATGCAAAATGCAGGAGGGAGTATCACAGATGCACAGAGTCAAATCGAAGA -GAACGCCGTCAAGAATGCGAATTGTCGTTGCTTGGAATCGGGTTCCCGGTAGGGAGATAG -ACGAGAGAGGAGAAGGGGGAGTTGAGGAGCCGCGAAGAAACCGAAAATTGAGCCCGCATC -TGGTTTCAAGAAACGATCCACGTTTCTCAACCTCACGTTGCACAAGCCGGGAGAGCCTGG -ACTTGGGAATGCTCGACATCACCGAGTCTACTCGGGGGGGCTGAGTTAGGCGATCGTGCC -ATATGGCATCGGAGCAAAGTCGCCCCAAGTAAAGCCCCCCATACCCATGTTGACAGCCGT -GCTACAGGGTTGTATAAGCATGCTCGGGAACATGGTCGGGAACATATTCGGCAACGGGAG -CGGGCTGAGAGCGGCCGGTGGGGTTGGCATACAGGTGGTCGGTAGGCATTCGTAAGCCTG -GAACGGCGAGGGAGTGAAAGGTTGGCTGTAAGACGGGGTCGGAGAGGCAAGCTGGGCATC -ACTGAAACTGAGACTAGCCCAAAGATTCGGATTAGAATCGCCCGTGGTGGCCGGGTTGGG -GCTGGGCGACAGTACTTTCCACGACTCGAGTGGCGCTGGCTGGGGCACCGGGTTCTGCCA -CCAGTTTTCCGGACCTGTCGAACTGGGCAGGAGAGGCATTGGGCGCATGACATCCGTCAC -TGGTAGTGAAGGAGCAACCAGTGGGACAAATAGCTTGCCGATAGCTGCCTTTGCTTCTTC -GCTGCGGCCGCTCAATAACTCTCCAGCGCCGTCCTCTTCTAGCCCTTCGAGAGCCTGAGC -ATTGGTGCGGTAGACAAACTTGTTGGCGGCTTGGACGAACTCCTCCTGTAACCTCTTCAA -CTCACCATGCTTTCCTGACCAGTTGTATGTATTGGCAGCGGCATCCACCGAGTAGACATC -CTTGTGGAAATCAACTAGGGCTTCGATGGTTTCCGGGGTCAACTTGAAGGCCACCTCGTT -ACGGGAAACCATGTCGACGTGTACAACGTAGGCCTCCATATGAATCATTTGATGGTTCTG -CAGTGATTCCCAGAGCATGTTCTCATTCTTCTCTAGCTCTGCCTGGGGGACTTTAGTGAA -CATCATGGAACACAAGGTCCAGATGGCATTTGTTGATGAGAGTGTTTCTGTCAGGCACCG -GGTGTGCTGGGTAACATTGTTCAAAGGACGCTTGACTCGATTAACTCGCTTGAGGGCTGC -CGTCAACCAAGGCTCAACCTTGGTGTTCAAGACGACATTCGGAGATGATCGTGGCGCAAA -TGCAGCGGCGGAAGATGGGAGTAAGACTGTTGGCATTATGAAACTGTGCAAAAGGTGATA -CGTCCGGCGTTGGTGATATAAAAGAATGTAGATAGAATATAATTCATAAAGGCGACATTC -GCCCACACAAAATGTTGATTTTTTTTTTTTGGATAATCGGAAATTCTCTAAATGTCGTAC -AATCCAGTATTAAAGTGCGGGCGAATGCGCAAAAAGAACAAAAATTAGAATATAAAGGAA -AGGAGATGACATGAAAGTCAGTGTTATAGAAGAAAAAGAGGGAAGTCGAGATAAGTTAAG -AGGGGAACTTTTTATGGAGATTTAATAAGAGTATGTTAATTTTTACTCCGTATTAATTTT -ATATTAGAACATATGTATGTTTTCTACGGAGTACGGTATTTCGGTATATGCAGTCTGATG -GAGTCAATGTACATCAATGGCCAACATCTCGGACTCCCCGATCCACAACTTGAAGAATCA -GTGGCGATAATAATACTTGATTGAAACATTCAAAAAAAAAGGGCGCGAGAGTGTTTCATC -AAATTTGACAGGTTCCTACAGTTCCAGTCTCACACATCCGTTGCACGTCCATTTGGCCAG -AACAGCTGCATGATGCCCTAAACTGCAACAGCTAAGCCCCAATGATCCAAATGGCTTGAA -CAGAAAGAAACATCCGAGTCTCCATAGATCATCTAGAATTAGTTTACATACTATCTACTC -CATAGCGGGGACTTGAATACATTCTCAGCTGGGCGATGTGCATATTTCAGTGTGCGCCAA -GATCTACATTTCGTTAAGAGATACTGGATCCTCAACACTAGCAATCCCATGCTTTGGGGT -GAAGACTCTATTTAAATTTGGCTGCAGCCTGCAAGCCAAGACTTAGTTTAGATTCCTAAT -TTTCAATGTGTTTCAACACTGTTGTGCTTCAATATTCAACTGGATATTAGCCTTAGAAAA -ATAACATACGGTTTACATTAATAATATTACACGAAACACACCCATAACAAGCATATCAAG -GTTTTTTTTTTGGACCTATCGAGTGCTTCTGATACGATACAGAATAGTAGACCTCAGCTG -ATGGAGGGTCGAGCAAGCGTGGGGCAGAGCCCGTCTTGAGTCATTTCAGGGCTGGGATTC -GCTGAGACGAACCGAGTCCGGCCGATGCACAGAAAGCTGAGATCCCTGTTACGATCCGCC -ATTGGGCTGAGCCTCTGATGCCAGACTTTGGAACGGGCGCCACTAGCCTGTATGTACTGG -AAATACCGTTTCTTATTCTTTTCTTGTCCACATGATTCAGGTTTTTCAATGATCTCATCT -TACTCGCTATACAACATACTTCGCAGGCGCCTCTACCTCTGCCTCTTCCCTGAAATGGAA -TCCCTGCTGCATATGATGTCGGTCACATGGTCTGGTGTGGGTGTATAGGGTTTGCTCTGA -GGTGTGTTGAGGTTCCATGCTAATTTGTATCTATAGAGAGCATATCACGTATGGAAAAAA -CATGCGACCGAGTGCTTTTTCCTAAATTAGATATGAGGCGGTGCAAACTACCCGCCACGT -GTGGCGGATATCTCCGGCCCCACATGTAAATATGCAAAGCCCTAGTCAACTCCCACGTAT -GTTGTAGTTTCTTGCTCTGTTCGCTTTACAAGTTTTATCAAAAACCGGCTGGAGAAGAGA -TATGAGTTGGCAAAAATGGCAAAAATTGGGAGCTGGAATCTCAGGCGGATCCTATCCACA -TCCACATCCACAGCTAGGCGGCTGCTGAAACTCAGCTCTGCCGGGCCTGGATCAAGCCTA -GATTCATGATTGTCAGGCTACCATCAGGTTGAATGGTGACATCGCCGTACTATGAATATA -GTAATATATTGAACTCCAACGTGTCTCGCAGACTGCACCGATTATGACTCAGATGGGCGC -CGATGATGTTCGATTTGAAACGAAGCCAGATAACGCCCTAACATACCACAATTGCATGGC -GACCAGGGGTCGGGTGGCTGATTCTCTATGTTTGGTGCCTCAACCATGCCTAATTTTGCA -ATATATCGTTTGTTGGACAAAGTGCTAGTTGAAGCCAGAATAAACCAGTGCCTTTTAGGT -ATGCGAACTGACTGGCTTACTGTGACTTATGTGTAGGACATATCCAAGGGGTTCGGTCAT -GAACGAGTAGGACAGCGCACTCCGGGTCTTGGACAGACCTATATTAGACTATTTGATCAC -GGAATCCATGACTCATATATTATTGCTCAGTCTTCGGAAAGCCCCGATAATATTTCCCTT -GTTCTTCATAGACATTCAAATTGTCAGGTCACACGCAAATTTCTGAGCAAGGAGTTTGGT -CGTGTTGAAAATCTCCGAACATTTTTGATACTACTTAGCAGCCTTGGAAACAATCAGTAA -AAGTATTGAGTTGGAAAGTGTCTGTGTATCGTACTGACATCGCTACCGGGGAGAAAGCAT -ACGATATCTCTAATGGTCTTAAGGAAAAAGCCATGTTTTCCCATGCTTCATGACGATACC -AAAATCTCTACATGCTAACTCTCACTTTGCTAGTCAAACTTTACACGCGTTGCAAATTCA -ATACTCGTATTATGTGTTGTATCCATATAACGATAGCCGTACAGCTCTTCTCAGTCATTG -TTATCGTCATTCATTCCATTCATTTCTAGCCCTGGAACGCTGAAGCCCTGTCTAGAATTT -CTCCAATTATGAAATACAATCTGGGGCCAGACAGTAGCACTACTCGAACATATCCTGACC -AATAAAGCTTTGTTAAAGCGGTGGATTATGTAGTAGCATAGATTGTGATACGACGAATAA -GAGGGAGAAAGCTCATTTTTTTGCCGAGCAGTGACCTGCGTTGTTGCGACACTCCCCAGA -AGATGCCATATACTATGCTAATACATCTAGCTCTCAGGGGGACAGGTAGAGATGGAAAGC -GAAGTATGGGTCAATCTGACATTTTCTCCCCGAAGTATATATAGGTATTTCAAACACGGT -GTGGTTGATACAATGAATAACTATCTCTAGTGTCATTTCTAAGATGGTTTTCTGGTACAT -CAATACTTGGAAAATAGCGGTAGCCTTGAAACTTTGTCATGGCGGTCCCAAAAACTTCAA -TGTTTGATCTGCGGGACCAACCGCCCCCAGGGAGGTTGGGTGGGGGTCAAATATATAATT -TCTAGAAAGACGCTTGGTGAAATAGCCTCATAAGCAGGTAGCTGCCCATAAGCTAAAATG -GATTATTATGAGCTACAAGCCTTGTGCAAGGCCCGAGATACTTGAAACTCGCATTGAAAC -CTGAAGCTTGGGGCAAGAGTTCAAATCAAGTCAGGGATTGGCTTGGCTTAATTCACCGAA -CAAAAATATTAACAAGTATGCAGTTACACGAAGGACCAAGCATACGCATTCACCCGCGAA -GCAAAAAAAAGGCCAAATTGCCATGGTCCAAGATTTCTACCTACAATCCTGGTCTCGCCG -TAGGGGAGTTGAAATGTCATCACGAGAACACCCAAGATATATACCAATACACTAATGCAG -GGACGGCAGATCCCGCATATAATGGCTTTGGGGGGCTTTCTTTTCTTGCCTGGTGAGCAT -AAGGCACCATACTCCGTATATTGTACCAAGAAATATCGGCTGAAAAGAAAACAAGGAAAA -GCATGGCGAGTGAGAAAAGATCGCCAGATAAGCAATACCACATACGAAAAGAAGATTTTC -ACACAACGGGGTTACGCTGGGGTATATAACTTGCCAAAAAAAGGTAAGAACTGCACGCGG -GATAATGGTAAGACCCGTTTCCCCAGAGAATGACTATATGTCGGCCAATCACTTTGTCCC -AACCATTCCCGGTATGAGGATCGACCAGGAACAGAATATGGCACCCCACGCACCCCTTTA -GACGTGAGGAGAACTTTGTGGGGAAGTTTTTATCGCATGGCAGTTGGATACTAGGGTTAT -TCAAGAATGGCGAACAGCTGCACTACTCCCTAAATGAAGCTTGCCATAATAAATCAACAC -TAAATTTACGCTTTATACAGAACACGGGGAGCTATCAAGTCGCCAGGAACGTGGGTTTTC -CCGTTTCCCCTGGCTAAGGACCCCCGGCGCCGATCTACACGCTAGCGCCTTCAAGCGGGT -AATGGGGGCCTACCGTTCACATGCTTTTTCCGCCTCAATGCCCGCATGGCGTGCATGAAC -AAGCCAAAGCACATAGATCTTTGGCTGATGGCGTACACAAACTATATATAACCACGGTTC -ATTCCAAGGTTGATCAGGGTATGATTAAAGATCATTTCTAGAAAGAAAGCCCCGTTATGT -GGAATTAATAGAGATACATCCATCAAAGTCGTTCTCTCCACAATTCTATATACTCCCCAC -CCCATGGCGTTTGTACTATATCCCGAGGTCAAAGATGCAGCCTACTAGAAAAAGGTGGGA -ATTGAACTCCATTGACCGGATGCCCTTGCCAAAGTTGGACAAACATAGTTATGATAGTGG -AGAACACCACTAAGCGAAGGGTGTGATTCGATATTGATTCATTGAAATGCGCTTAGCGTA -GGGCCTAACACTTCCATTTCTGGCTTCATCCAATGAGAACCATGTCATTGGCTGAGATCA -GCGGTCAGTTGAGATGCCTGATCAACACCACGAGTCAAGCGAGATGACTGCTCTGAACTG -ACCATCTGGGTCAAAAAAACAACCCAATGCCGATCATCAAGCAGGAAAGCAACCGTAGTG -CTCAAGGCCCAAATCAAGCGGGTAGCAAGTCGTATTTTCCAGTATGCCTATTCGTCGTCC -CCGCAAGTCTAAGCCACTAGCTTCGGACGGTGAACGGATCTGTTGACGAAATATGGTCCC -TGGCTTCGTTCGCGGCTTGGGGGGTCATGGATGACCCGCTTGAGTCCGCGAACACGGAAT -GAAAAAAGGCAGTCCTCCGTCCAGATCGGGTCTAACCGCCCCATTTTCACTGACATCAGT -ATAACCTGGAAAGACCACACTGCTTCACATGTGGGTTGGCTGTCCTCGGTATGCCCCATA -CAAGGATCTGCAGTGGACGATTTCCAAGGACATAAAGGAGACCCAATATCTCCCCACAAG -ATGAGTTCTGGTCTCAGACGTCAACCGCCGGGAATCCCTCCTGTCAAGTTACCTTTGTAC -AGAGCAACGGCCCAAGATGCGTTTCTCCGCCGCCTTTATTTCAACCCTGGTGCTCTCTAG -CACCGCGCTGGCCGGCCCGGCTGCCCAGGTATCCTCATCTGTGGGAGCTGGTGCGACACC -AGAGGCAGAAGCGAACAAGAAGGACGTTGCGCAGTCCCATGCAAACAGTGAGAGTACATC -AGATACCACTCTTGTGCAAGTAGCAGAGAACATTGTCAACACAGCTGCGCAGCAGAATAC -TCAGACTGCAACTCACACAGCAGCAGCAGTATCAACAGCTGAGTCGAGCAGCAACGACAA -GGAGAAGGCTGCTGCGGACACAGCCAGCGCAGTGCAGAAGGATATCCAGAGTACTACGAC -TTCTGCCAACGATGAGACTTCGGCCAAGCCGGACACGCCGGGCCTGAACAACATATTGTC -GACAGGTAACTCTGTTTCGGAGAGTCTTACCGGAGAGACTGCTGCATCTCATGCCTCTCA -AAAGACCAAGACAGGCTCTAGTGCTCAGGACACGGCTACCGCGACCCAGACTACAAAGTC -TGCAGAAAGCACACAGAACTCCTACACAGGGGGCGGTAGCACGGAGAAGCACTCCTCGGA -TGCTGGGAAATTTTTCGACGGAATCGGGGGTCTGTTTTCCCCTACCCTGCTGGCGGATAT -CGAGTCCTTCTTCCACCATGTCGGCTATCTCCTCGATGATGATACCACACAGAATACCAA -GGATCTGATTAATACTGCCTCCGGTCTGCTCACCAAGAGCCTGATCACCAAGGTCACTGG -ACTACTTGACAACGCCAGCGACCTGTTGACTGCCGAGTTCGTCAAGGAGACCAAGAGTTT -GATCAAAGCCGTTGGTCCCGTGATCACTCCTGAATTGTTCAAGGAGATCAATAGTCTGCT -TGGCAACGCAAACCATCTCCTGACCGCTGAATTCGTCAAGGAAACCAAAGAGCTCATTAA -CACCGTCTCCCCCGTCCTTACGCCCGACCTGTTCAAGGAGATCAACAGCCTGCTAGGTAA -CGCCAACAACCTTCTGACTCCCTCATCTGTGAAGGGGATCAACAACCTGCTCACTAACGC -CGGTGGTCTGCTGACTGCCGACTTTGTCAAAGAGACTAAGAGCCTGATCAGCGCTGTTGG -CCCTAATATCACACCTGAGCTGTTCAAGGAGGTCAACAGCGTTCTTGGCAACGCTAACAA -GCTTTTGACTCCCTCAACTATCAAGGAGATTAGTGGCTTGTTGAGCAATGCCAACACTCT -TTTGACACCTGCAACAATCAAGGAGATTGGCGGCTTGTTGGACAATGCCAACGCTCTCCT -GACACCTTCAACAACCAAGAATATCAGCGGCCTTCTCAACAACGCCAACAGCCTCCTGAC -TGCCGATTCTGTCAAAGAAATTGGTGGCCTCCTCAGCAGTGCCAGTAGCCTTTTGACACC -TAGCTTCGTGAACGAGACCCAGGGCCTGATTGGCGGTGTGTCGAAGTACCTGACCCCAGA -AATTCTCGCATCTCTCGGCCCCCTGCTCTCCAACGCCAACGACCTCTTGACGGAAGACGG -CGTCAAGGAGATCAATGGCCTACTGGGTAATGCCAACGATCTTCTCACCGCAGATTCGGT -TAAGGAGATCGGTGGCCTGCTTTCCAGTGCCAGTGATCTGCTCACGCCCAAGTTCGTCCG -CGAGACTCAGGGCCTTATTGATCTAGTCGCCCCAGCTATTACATCGAAGCTCCTAACCAA -CGTTTCCTACTTGCTCGGAAACGCTACTGGCCTGCTGAACCCATCATTCGTCAATGACAC -CCGCACTCTCATCACTGATGTCTCGCCTGTCATCACTCCCAACCTCCTCGCTGAGGTGGG -TGGACTTCTAAACAATGCCGGCGACCTCCTCACACCCAAGTTTGTGAACGAGACCCAGAC -TCTGATCGAGGATGCCGCAGATGTATGTTTCCATTTCCCTTCACTGGTCTGTTGAATGTT -TTGCTGACTCCGATTTAGATCCTTCCTCTTGTCACGAAGCTGCTCGGGTCGTTGTAAGAT -GTTCTTGCTGTTCCGATATTTCTTTCGCGACGAAGCATTCATGGTGACTCATGATGATTA -ATGGTAAATATGAGTATCTTTAATACGAGATAATCGAATGAAAAGAGATGTTGATACATG -ATATTTGATATTCCTAAGGCTTTGATTCGAATGTGTTGCCTTGTAGGTATGAGACTTATT -TGACATATGATAATAGCTCATCGATTCTCATAAGTTTTGGTGTTCTTTGGGTGAGCCATT -GTCGGGTTTCATCAGTCCTGGGATTCCCAAAAAAGGCAAGTGATGTATGTTCCAAGATTT -TGCAATTGGATTGAAGAGCCCTGTGGTATGGTGTGGGCTAAGAGAGCTCTCACATAGGTA -GATATCGGCGCAAAAATTCTTTAGCATGCCTGTCTCTCAGTTGCAATATGAATGGCATGT -TACATCCCCGGGACGTGTTGGCAGGGGTATAAATCCACGGTGAATGATAGAAACAATTGT -GTCACGGAATCATGGATGAAAAGTCCTTCCAAGTGTGGCTGAGGACGAATAGAGTCTTCG -CTGAGGGGTGATTTCGTGGTCTAAATAGTTGGGGAGAAATGTTTCTACCGCCTCCCTCAA -GACGGGATGTCCGATGCCTTAGTCTCTTGAAACTTAGACACAACCTTCATCTCAGTGTCG -CGTTCCTTGACCTTCTTGTCCATGGATTTTTGAACCTCCTCTCGTTTTCTTTTAGCAGCC -TCAATCTTCTCTTTCACTTCTTTGGAAAGTTGTTCTTGCGCATGGAGCGCAGCCTCGCGT -GATTTGTATTGTTCTTCCAGCTGCTTAAGCTCAGTTTGGTAATCTCGAGGGATGATTGGT -TGACATGTGACTGGATGATAGAAGTATGGTGGTAATGGTCCATCTGGAACTTTGATCTCG -GCGAGGTCAGGGTGGATTGGGGTGACGCGAGTTAATGAATCCAAAGGAACAGCCGCAACT -GAGGCTGGGGCAGGAGGGGTAGTCTCGGGTTTCACCATGATTGTGGAGTGCAAAGAGGGT -CAAGAATGGAAGTATTTTGCAAGCGAAGAAGTAGTGACAGTCGCCTGAGGATTCCGAGGA -TGTGAGGCGCATTGGCGTAAAAGTGACCCCGAGAGTCCACAGGATGTTGATTCTGCCACA -TAGTCACACTTCACCGCTCGTGGTGTCCGCGACGTATGCGGAGAGGACTCTTAGCCAAAT -TGACGACTCTATCACTGCTTGTCATTTGATATTCATGCTAGGTGATTCTTTTCTCGGCTC -TATCTTGAGATATTCCCTCGAGGGTGGTCCTGAGTGGACCAGTCGGTGTGTCTGAGTTCC -CACAACCTTGAGCCGGTGACGTGACCGTCAACATGCAGGCATTTGGTATGTTCCTTTATA -CTTCTTGATTGTTGTAAGCTAACGCACCCACGATAGTCCCCAAGAATAGACGCGTAGGTT -ATCTAATAAGGCCAAGTGAAATGCATATGTAACTGACCTTTGAACAGCCCAGGTTCGAGC -TCGGTTTAAGGGTATGCATTCTAAACATAACATGGAAATGTTATCAAGACTTCTGACATT -GAAAAGATTATCGATATCAACAATGTCCCCTTGGGCAACAGCGTTGCTTTTGTGCGATGG -CGCTTGCCATCGTCAAGCTCCACTGAACACCAAGGCCATACAGAAAAAGCAACTCTATCG -GACCACCGCGCTTACTGGGGCTATGAGAAAACCCTTCAAGTGAGGCTTACAATTGATAGG -ACATCAATGCTCCAGGAGTGTGATCTCAATTTTGAGGTTTTACAAGAGTACACCTCGTCG -CCTATGTCCGAAAAGTGCGTGCTGGGCAAAATCAAACTCAACCTTGCCGAATACGTGGAT -AAAACCGAGGAAGACGAAGGGATCATTCGGCGGTATCTGATGCATGAAAGTAAAGTGAAC -AGTACCGTCAAGATCGGAATCGCAATGCGCCAGGTAGAAGGTGACCGCAATTTCACGACG -TAAGCAAATTCCAATGCTTATTTCCGATTCACATTCTCAATGCTGATCATTAAAGACCAC -AGTTAAAATCTGCCACTGTCTTCGGAGGAATCACTGGAGTTGTTCACTCTACCGAACAGC -CCGTCAATGCTGATGAATTTGGCCGGCTGCCGTCTATTGATACCAAAAGCCGTGAGATTG -CCGATATGCAAGACATGTATCGGCGCACTTTAGCAGCGTCATGGTCTTCAAGAGCCTGTG -ATCTACCTGCTGATAAGTTGGTTGAAGAGTTGTTTGCTGGAAGTTCTTGCTGGAATAACG -ATGCACACAATGCCAACGCTGGCATCTCAGCGGGAGATCACCGAGATTCTCTATTGAGCC -CAGAGACAGTCCCACGACAAAGCCGCCCCGGGAAAAAGCTATCACCCAGTTTCGAGCGGC -GCCCCAAGAGTTCTTCCAGCAACCGCTCACACAGTAGCGGTAAGACTCCAGACTCTCTTG -CTGCGCTTGGGCATCAGAAAAAGGGTGGAAGTATCGAGCAACAGTTGTATGAAGGGGCCA -AGGGTCGAGCTTGGAAAGTGCCAGATACAACGAATGAGCTCTCCGAGTTCGATGTAAGGG -AGGATTTGCGCAGCTGGGAAGTTATACCAAAAGACTGAATTCCTTCCCGAGGTCTGCGCT -ATATATTACATAATAGTGTATTGCTTGAATTCCACACTTTCAAGAATGTTTTTTTCATGG -AATTGAGAGACGTCTTTTGAAAGAGCTTTGTTCTATATAACCGAATAATTTGTTCACACG -CTTTGTATAAGACCTCGGAAGATAAACCTCGGGAGATAAACCTCGGAAGATTGCGCCAGA -ATGATGATGCCTGAGGCAGGAGTTAATTTCTTATCGCGGCCTAGCGCCGCGATTATCGCC -GGAGAAAAGCAATCGCAAGATAATTTGCCTTCTCCACCTTTACAATAGACCTACTGTTCT -GTCATGCTTCGACGTTCCGCAGGGAACAAGCTGGGGCTTCTGAGCTTCCAATCGCTTTCA -TATCGGAGACCACTATTGTCATACCCATACACCGTCCTTCCCCGATACCAGAGTAACTGG -AGTCGCGATCATGAACACGATGCTTTCGATGGACCCAATTCCGAACGAAACCGCGAACTG -AAAGATAATAGAAAACGTGAAGCCGGCACATTCACATTGGATGTGGACACGCTTGGAAAG -CCAGGACAAATCGTTGTTGTGCCTTCACGACGCCGTCGAATGTTAAATCGCAATCGCGAC -AAAAGCCTCCAAAATGAATCAGGCGGCACAATTAGCTCTATATTGGATGGAATTAATGAG -GAGAGCGCACCGCCTAGCGATAAGTCGGTCCAACAGAGAATTGAAGGGGTTCGTGGATCA -TATCAACTGGGGCAGACTTTGCCTACTGCCGACTTCAAGACGTTGTGGTCAAAATTGGCT -TCCTCTTTCACATACAAGCAATTGTCGGAGTTCATCTCAGAATACAATCGCAATAATGTA -ATAGAAGAGAAAGGCTGGACGTGGGGGTCAAATAACTCCAAATCGCAGGACAAGTCATTG -GGAAAGACCAAGGGCTCCAGGGGGAAGTCACGGGCTGCAGAGACGATCGTTCGGGATTGT -TGGCAGCTGGTGGCCGAAGGCGAGTCTGGCCAGTTGGAATTCCGCATTCCCGCACATTTC -ATATCTTTGCTTCTCCATGCTGAGCATTTCTCTTTCCACGAGCTAGCTAGTCTCCATGGC -TGCGGAATTGATGTCACACAATCTGCGGGTCTGGTATCACTTACTGGCAAGCGAAATGAT -TGTGAGTCTGTTCGCGAGATCATCATTGATGCCACTGGCAGAATCCGCGAGGAGGATGTT -GGAATCATCCCATATACCCACGGAGACGGAATCAATCAAGTTTTTAGCGCAGAATTTTTG -GAATGGGTCAACAGCACCCACGGGGTATTTGTTGAATACAAAGCACACGGGCCGCCACAA -AAGATTCTCTTTCTTGCCGAGAATAAGTTGGGAGCAGATAATGCGCGGAGGACCTTGAAC -TTAGCCTTATCCAACACCACCTCTCCATCGACGCCATTTTCAACCTATCTGCCTGCATCG -GAACTCGCCAGTGCTTATACCTACAGACCAGAAGCTCATTCTTCGTGGTTTGAGCAGCAA -AAGTCATGGTTCCGGTGGGCTATGTCTTCCAGTCAAAATGCGGAGGCAGAAAATCTTGAA -ACTCCTTTCTTCGATAAACATCAAACACGACTATCAGATGAACTATTGAAGCTTCTCCGA -GTCACTACTCCGAAAACTGGTGCAGCGGCCGGTTTGCATGAATCTGTGACAGCCGTGGTT -GGGAAATGCCTTTTCATGCAAAAACCTTCTCTCGATAATACAGCTATCAGTCCGGCACAG -TTAGGGCGACTATCCCTTCCACGGGCTTTTACAGCTGACGTACCTCGGGTCTCGCATTTC -ATCAAATCACTCATGCCTATCCGCCCAAAGAATGAGGCACCTCTATATCGACTTCGGTTG -TCTCCAACCTCCAACGCAGCATCCCCGCCAGACCTCGAAATTGAGGCAGCCATGAACTCG -AGCTTGGATGGCATTGACATCCACAGTGTAAAGGCTATTATCACCACAAACAGCGTTGAC -TACCTGCTTCCAGAAAATGGTCTGGACTTGCGTTTCACACGAACTGTTTCACAAGATCTC -TTGTATAAGCCATATCCCCACTCAGAACTCTCGCAAAGCTCTTCCGAGCAGCTCGAGCCT -TCTTTCTCGCCACAGATCCAAGCAATGCTGCAAAGCCTCAAAATCTCCCTCGGGGGCTCA -ATCGTCAGCAGTGGATCTTCGAAAGGCCCTGTTCCACTTCCTATATTTTGCAGCATCGCT -CTTCCCTGTGGTCTTGTTCCACATTCTGCTTCGCAAAGAGTTGAATCCAAAAAAGTGGAC -GAATCGATGACAGAGAATAGCGTCACTGTCGAATACATGTTCCCTCCACTCACCGACATT -CGAGGTGCCATTGTCCAAAAATACGACTTTGACGGACGGCCACTGGACTACCGGTATTAT -GAGAGCGGTTCGCTCCTCGCTGCTCGGACAAATGAAGTCTCTCTGGGCATGGAGATTCCC -CAGACCGACTCTGCTGCAGAAAATGACCAATCTGAGCAGCTTGATCAGGAGTTCCATTCG -TTCTATAACGCGGCTTGTGGCATGGCATTCAAAATCCATGGTAATAGATATGTGGATTGA -TAGCATGGGTCACTTCTTGTTTGAATTTGTACAGCGTGCATTAACGACCATCATCATATG -GGATGGCGGCGTCCTTGGGCTGTATTTACTGTACATTATATGTAAAATATCTTACTGTTC -AGACACGTATTATGTTTTGGGCTTGGGTCTGCGTAGTCCAACCCCGTTTTTAATAGACCC -AACTACTAATAAATCATGACTTAGTTAGGAGTATGAAGTCAGTTAAAGTGATACTTTCCA -CGATCTAAACTACAGCTTTCTTTGATATATGTAAATATTCTGCATGTTATTGATGCTTAG -CGAGACAGCCTGCATTGCCACCGATTGAAATGATCTTGATCTATATTCATGTATGGATTG -ACTCCCAAATCTGAAGTTTGTCTCAAGGTGGGCCAGTGTTCCCAAGTCCTGCTCCCTGGC -CCAAGATTATTTGGCCAAGAGTCCCGGGAATGTCTCTTCGTGAGCTTGAAATCTAGTTGA -TTGAGATGAACGTAAGCCCAAACAGCTTCATACTCAATGTTCAGATCAAACCGTGAGCCC -TGTGCACCTCCGAATTATCCATAGATTTCAAGTGATAGGCTGAGAAATCATATTTTGAAT -CATTCTTCTCACGCTGAATATAGCACAAAAATGCATAAAAATATAACATATACCTACATT -GTATGTTGTACATTGTAGGCGTGACCTTGGTGAGGCCCCACAGCCGATCAAGGCCCGTGA -CGTGTAACACCTCTCATCTCAACTACTCCACGCGCGATACTTCCAAGGCCAGGTGCTCAG -GACTGATTAGTCGCCCTGATTGACTACGGATCCACAATCAAATATACATATATCTATTTC -GATCCATCCCACTATCGCTGCTGTGCCGTCCGATCCTACTGTTCCCGCTGCTCCCCTCAC -CTACATCCAGCATGAAGACCACGCGCCAGTGATTTACACGGCATGGAGAAAACACGGATT -GCTAAGGTTCGAATCTATATTCAGCGCCACAATCATTTCTCTCACTGACCATGTTGCTTC -AGGTTGATAGTGTGACCTTGGCTCGCCGCGGAGAGCAGGTCGATGGCACTCTCCATCTCA -CCCCTCACCACCTCATCTTTTCGCACACCCCACCCATTTCTCCAGAAGATCAGATAAAGG -GGGTGACAACAAGGCCTAGAGAACTATGGATCACATACCCTATCATCGCTTTCTGTACCC -TGCGGCCAGCTCCCGCAGCCTCCCGCCAGCTCTCTTCCATCCGCCTCCGATGTCGAGACT -TCACATTCGTCTGTTTTTACTTTGTAAATGAGCACAAAGCGCGAGATGTGTTTGAAAGTA -TCAAGCAGTGGACGTGCAAGTCCAGTCGTATCGATAAACTTTACGCTTTTAGCTATCAGC -CTCCCCCGCCAGAAAAGCAGTTCAATGGGTGGGAGCTGTATGACCCGCGCAAGGAATGGA -CACGCCAGGGCTGCTTGGATGAAGGCAAATCATGGCGACTTTCAGAGATCAACGTGAATT -ATGAGGTATTTCGCCCAAAATAGTCCACATGTACTCCACTGACCTATCTTAGTTCTCTCC -AACCTATCCCGCCCTCATTCCCGTACCTTGTTCCATCTCCGACAATACACTTAATTACGC -AGGGCGCTATCGATCTCGAGCAAGAATCCCTGCTCTCACGTACTTCCATCCTGTCAATAA -TTGCACCATCAGCAGAAGCTCACAGCCGCTGGTCGGTGTCCGCCAGAACCGAAGCATTCA -AGATGAAAAGCTTCTGTCAGCTATTTTCTTGACTTCTCGCTCTGAGCGGCCGTTGGCGAA -TTACTCGACCACAGAGAAAGAAACAGATTCTAACACCCAAGAGACGAACCAGGTGATGGT -GCCCGAGTTGAACAATGCCGAGGAGCTAGAGGACGAATTGATAGCCGCTACAACTGGCGA -TTCTGAAGATAATCTCTCCATATATGGCGCCCAGCAGTCAAACCTGATTGTAGATGCACG -CCCTACCGTGAATGCATTTGCCATGCAGGCTGTTGGACTTGGATCAGAGAACATGGACAA -CTACAAATTCGCTACCAAGGCATACCTTGGCATTGACAACATTCATGTCATGCGAGACTC -ACTGAACAAGGTGATAGATGCACTGAAGGAGTCAGATGTTACTCCGTTAGGGCCAAATCG -TGACCAGCTAGCACGCAGTGGGTGGTTGAAGCACATTGGTGGGATCTTGGAGGGAGCCAG -ACTGATCACTCGTCAAGTTGGACTTACTCATTCTCATGTTTTGATCCACTGCTCTGACGG -CTGGGATCGCACAAGTCAGCTCAGTGCGCTGAGTCAGATTTGTCTGGATCCCTACTTTCG -GACGCTGGAGGGTTTCATGGTACTGGTAGAGAAAGACTGGCTTTCATTTGGACATATGTT -CCGACACCGGTCTGGTCATTTGAACAGCGAGAAATGGTTCCAGATCGAAAATGAACGTAT -CGGCGGAGATCCCAACCGTGCCTTTGGTGAAGGTGGTGGTCCAGGGAAAGCCATTGAAAA -TGCATTCCTTAGTGCCAAAGGGTTCTTTGGTCGGGACAACAATAGCCGTGACTCTTTGCC -GGACTCGGATGGAGAGCTTCAAAACTATGACTCGGATACTCCTCTCAAGAAGCCCACTAC -ACCGCGCTCCGGGGTGTCCGAGAAAGAGATAACCAAACCAAAAGAGACGAGCCCTGTGTT -CCATCAATTCTTGGATGCAACGTACCAGCTACTTCACCAACATCCGACACGATTTGAGTT -TAATGAGCGTTTCCTGCGACGACTTCTCTACCATCTCTACTCTTGCCAATTTGGCACATT -CTTGTTCAACAGCGAGAAAGAACGCGTGGACTGCAAGGCCAGTGAACGAACTCGTAGCGT -TTGGGATTACTTCCTCGCCCGCCGGGAACAATTCACAAACCCTCACTATGAACCTGTCAT -CGATGACCATCAACGTGGCAAGGAACGCCTTATCTTTCCCCGGGTCAGTGAAACACGGTG -GTGGAATGAGGTGTTCGGTCGAACAGATGCGGAGATGAATGGCCCCCGTTCCTCAGCGGC -TCCAAGTGAAATCCCGGAAAGTACGGTGCTGACTGGTATTGAAACTGCAAAGTCAGACCG -TGCGAGTAAAACGACTGATGTATCAGGGGTGACAGCTGCTGTGACGTCAGGCGTATCTAA -GCTTGCATTCCCAGCGGAGCGCACCGACAAATCGACGGAGTTAGAAGTAGAGATGCAATG -AACCGAGGTCTTCAAAGGACTCTGTGGTGATTCTCAAGCATTTACAAGTTTTATGTTATG -CAGCCGCATTCAGAGCACTAATAAAGTGATTCTTCCAATGATGCCGGAGTACGGAGTAGT -TATATCAAGGTAATCTTTAGAATATTTCTGCCTTATTGCCTTATTAATGCCTAAAAAGTA -AGGTGGTCGTACTAGTCCCAATATGGGAAGGCGCCGGCAAGCGAGGGTTGAGCCCTGGCC -CAAACGGGCAAGCAGCCCTAGTCCCGAAAAATCCCCACTAAATAATGCACTCTCAGAATT -GAAGTCTCTCGACCACGATCCCCTCCGTCGACAACCCCCGATTCCACCACACAAAATGGC -CGCTGTTCAGGGTGCTATCTCCAAGCGCAGAAAGTTCGTCGCCGACGGTGTCTTCTATGC -CGAGCTGAACGAGTTCTTCCAGCGCGAGCTGGCCGAGGAGGGCTACTCCGGTGTGGAAGT -CCGTGTGACTCCCACTGTCACCGACATCAGTACGTCAAAATCCGAAAATCAATCTCTCCT -GCGTGATCGCTTGAAAATTGCTGTTGAAATTGCTGGAAAAAAGAGGAGGAAGAGAGGAAT -TTGCGATCGCGATGGGATTTGCGGAATTCGAATGGGGATTTGATGCTAACGGGCTTGTCC -AGTTGTCCGTGCCACCCACACCCAGGAGGTTCTCGGTGAGCAGGGTCGCCGCATCCGCGA -GCTCACCTCCCTCATCCAGAAGCGCTTCAAGTTCCCCGAGAACTCCGTCTCCCTGTACGC -CGCCAAGGTCCAGAACCGCGGTCTCTCCGCTGTCGCTCAGTGCGAGTCCCTCCGCTACAA -GCTCCTGAACGGTCTTGCCGTTCGCCGTGCCTGCTACGGTGTCCTCCGTTTCATTATGGA -GTCTGGTGCCAAGGGTTGCGAGGTTGTTGTGTCCGGAAAGCTCCGTGCCGCCCGTGCCAA -GTCCATGAAGTTCACTGTACGTCTACACTGGGAAAACTGCGGCCGACGAATCGATCGCAA -ACGAGTTACTGGGAAGAATATCATGGCTAATACAAGTAACAGGACGGTTTCATGATCCAC -TCCGGTCAGCCCGCCAAGGAGTTCATTGACTCCGCCACTCGTCACGTTCTCCTCCGCCAG -GGTGTCCTCGGTATCAAGGTTAAGATCATGCGCGGCTCCGACCCCGAGGGCAAGGCCGGC -CCCCAGAAGACTCTCCCCGACTCGGTTACCATCATCGAGCCCAAGGAGGAGCAGCCCGTT -CTCCAGCCCATGAGCCAGGATTACGGTGCCAAGGCTATTGCCGCTCAGCAGGCCGCTGAG -CAGCAGCGTCTTGCCGAGCAGCAGGCCGCTGAGGCTGAGGGTGGTGCGGAGACTTTCGCC -CAGGAGTAAATGTTTTTATCTACATTAGGCCCGGCTCTTCCTTTTTCTTCCATCTGTCCT -AACCTAACCTGATACTCTACCTCACGCAATCAAAATTACATGACCTTCAAAGCCTTTTTA -TTCGCTATTCTATAATGTATTGGCCAATACGGTGGAGGTCAATTGGAAGGTGTTCGAAGG -TCTGGTACATATTGGCAAGAAAAGGCCTTTCTTGTCATTGGATATCAGATATCATATGCT -AGAATCAAAGCCTCGATATCTGTTCTCCGTTCCTGTGCATGTTCCTCGATTACCAAGTGC -CTGCGTCCTACTTCACGTAGTAAGAGGAAATATCAGTCTTGCGTCCGAGTTCGGATTTTC -AACGTTGCTTTGGTTCAGATGTGACTGGGGAATCTTGAACTAGAACGTATATATCAAGCC -ATGGACAGACTTCCTGGTAGAACTAGGCAATTTAAAGCTTTCTGTGATAATACATACCTA -TTTTGTCTATCAATAGAAACATGTAAGGTAGAACTGCGGAGAGAGATTTACTGTGGATAC -TGATGTAGATTTATATCTACATATAGACCTACGAAAGTAGTCATGTATTCATGGATGAAT -TTCTAGATAAACTGAAAGAGTAGGAAGAGACTCAATGCTCCGTTGAGTGTCGCTATTATT -ATTTCGTAACAAGATATCATCCAAGAAATTTCCAGTACATATGCATATGCAATAACAGTC -AAACAAGATGGAGTACATGTCCTAATCAGGTCCAATCATAAAACAAATCGAATCAGATTA -TCCAAAATTCATAAAAGTAACGAAGAGGAATGAGTACTGTTAACGCTCACGATATGAGGC -AGCGGCAAGCCGACTAAACAGCAGCTCGGGATGCTCCAAGATCTCCGAGTGCCGACGATG -CAGCTCCTCGAGTTGAGCTAGGGGATTCTGTAGACCGCCATAATAAGTATGTTGCGTTGA -GGAGAGAAGAGAACACATCAACCGTGACACCCACCCATTCCTCCTGGCTAGGCACGCCCT -CCGCAACAGGGAAGGCAGCCGTACCGGAGGCCTCATCCCCGTAGCTGATGCGGTGGATTG -GGGCCGGCGTTACGGGCAAACTTGGGCGCTCAACTTCTGTTGGGAAGTCGGTGATCTTGG -TGCTAGGACGAGAGCCTGGGGCGGGGGGTGATTTGTCGGGACTATTGGAGTCGGCTCCCG -GGCTGGTCACTTGTGATTTTGTGCGTCGTGCTTGAATTGTTTCAATGTAGCGCTGGATCT -CTGGATCCTCATCCCAGGCGTTTGAGCGGCTGTATGTTTGCCATGAGTCGGCACTGGGTT -CGAACGGTACCCGGGTGTAAGAAGGTGAGGGTCCGAGAGGGGCAGGCTCGCTTAAGGGTG -AATGTTGCTCTGGTTCCTCGGGTATCACGGGGGTTTCCACCAACTCCACAGGTGCCTCCT -CGGCGAAGACGCGGGTGGGTTCTGGAGCGTGGCTCTCCCAGGGGAAGACCGTGATGGGGT -TGGGCTCCGGCTTGGCTGGAACCTCATACCACATGTTTTTCGGTGCCTCCGGGTAGGATG -ATGGGGGTTGGAATAGATGCGTATCTTCGGACATATTGTAGGTTTTCTGCTGTAAACTGA -AACCTTCGGGTTTCGTGTTGACCGGTGGTGGCTCGCTATGGGATATTAGAACTGTAGATA -ATATCGAGAGGTATGCTCTTACCGCGAGGGATCCCACTCGGACTTGGGCGCCTCAAAGGT -CGAAACCTCTGGGGCCGGAGACGGAGGTGATTGGAAATCGACAATTGGAGGAATTGGCGC -CAGCGGCCGTGGTTGGTGAATATGGACATCGGGGTAGtgtatctgtgtgtgtgtctgggt -ctctacctgtgcctgttcctgggtcccggtctgtgcgtgcgtttgtatctgcgttccgta -tgtgtcgtggctCACTTGAGGCGCAAAAGTGATTGGCGCAGAAGGCAATGGTGGGATATA -TGTTGAGACATGCTCCTCTCCACGAACATACTGCGGTACCATGCTTAACACAGGTACGGC -AGCATGGCGCACATAAGCAGGAGCATGGTGTTCGTGGTGCGGTTCCGAATGGCCGTGCTC -GACTGGCGGTGGTTGGTGTGATTCCTCGTGGAAGCTCGCGGGTTCTTCAACATAGTAGCG -CTGCCGTTCAGACGTGGGCTGTGTTTCATGGCGAGGGATGCTGTCATAATGAGGAGCACT -TTCAGAATGAGGAGTGCTCCCAGAACGAGAGACCTGATCATAAGTAGGGACATCGATTTG -CTCGCCTCGAGTCAGCACCGTCTTTTCAATTTCGACTGGCGCGCTCGGTACCTAGAATTG -TCAGTCCACATCTTCATGATCTTGTATATCACTTACAAATTTCGGACGATAATGTTTGTC -ATAAACGGCCCACCACCGGCCTAGAAGCTGATTGTACGGGGACTCCATTGGGATAATCTG -CCTCGGCATATTCCACGGCTTCTGGGCACCAATAAAATGGATCAAACTGATGGTACTCTG -GAAATGTTTGTACGCGGGAATGTACTGGTAATTAGCACTTGGTGTGCAGTTATACGTGAA -GCTTAGCCTATGCCAGTTCCGAAAATGCATGTTCAGCAGACCCTGGTCAGCGCCATCGAA -ACTGATACCTCGCTGGGCGAGTGCTTTTAGCGCATAATAGTCCTGTAGATTAGGTCGGAG -AACCATCATTCCACTATTAAAGCAATCCGGCCATCCAACGTCAGGGGCAGCAGCAAAGTC -TGCCTCCAGTGATAATAGCTCGTCAGGAGCTCGGAGAGCTACGACATCACAGTCAATGTA -TACGACTCGCTGGTACTGTGTCAGGCGCCAGAGCTCGATTTTGGTGAATGTGGCAATCAG -GTCTGGGCGATCCATGAGCCAGAGGTTCGCCGGCGTGTCATTTACCATCGAGCTCACCGG -GATCAGTTCGTCGTAGACGGTCTAATTTGTAAGATAAAGGTATGGGTTTGGGGAATGCTG -CACGTACCCTTAACTCATCGATGGTGCTGGATTGTAGTCTGTCTGGCGTAAAGAGTGCCA -CCAACCTCGCTTTTGTACCATTGTCGCGTAGGGAGTGGGCAAGGACCATAGCACCTATTA -AGTTTTAGAGTAATTTCACATTCCACATCCATGAGAACATACCAGGAAGATAATTGTCAC -TCAATAGAAGCTGTCTCATCATTAGTTCGCGTCATAATCATATGAAAGGGGTAGAATACT -GTGCAGTAGACTGCTCCATTGGAATCCATCATGCCACTGAAATAATGAAACGATGAATGG -AGAGATGAAAAGCGCCCCAAAGAAGGCTATATTTAACCTGTCAATTGCCAACAGTTCAGA -CTAGAGCAATATTTCGTGATACCTGGACCGGACGCAGATTCGGGGGACGAAAACAGGAGA -AAATTTTCTTCACAGCATGTTGTGCAAGGTTGAGGAGTGGCAAGTGGTAAGATTCAAATG -GGAAAGCAAAGGCCGTGAAATTAAGGTCAAAGCTCCCCGAGGGGGAAGTGGAGTTGCCCG -GGTAAATCATCGATCCCGCCGTCCACTTTATATTTCTATTCTTTGCAATTCTGTGCAATT -CTTCTCTACAATTCTTCAATCCGAATCGCCCATGTTTCCTTTGATAATTCCTTTTATTTC -TACAAAATGATCCACCAGAGTGACCTCAGTGGGATAACCGCTTTCCCCCTAACCGCTTTA -GAATCGGGTTGCTCCCCGCATGAAAACATTGAACATTGCCTTTATCGTGAAAAAGTAACC -TGCACACCGGGGTAGGATTCAACAATAACCGCCCCAAGGTCCCTCTCTTCGCCTTGAGCA -GCCCATTCAACAGTCTTGATCAAAGCCGCGCCGCTCTCACGTTCAAGCGGAGTCCAATGT -CCAGTACGGGGAAGTAGCAAAATTTGGGACCCCTGCACCAAGTAATCACCCATCCCATCA -AGACAAATCCTGGGGTCAAGAGCAATATCATCCTTGCCCCAGAAAATCGTCGTATTAGCT -TTGAGAGCGCCCGGGGGACCTTCGTCAAACAGCCCAGCTCCGCTGCTCGCCATTCGCTTT -TTGCATGCAATACTGTGCAAGCTAGCAACAGTCTCAATGGACTTGACCCAGGGTGCCGTC -GCCGCTCGATCACGGTAATAAGCCGCCATATGCAAGACATGTGCTAACGCGCGTTCGCCG -AGGCTGGCGGGATAGGCATCACCGTTAGGGGTTTTAGAATCTGATTCTGCGGCGGACGGA -CCCATGGTGCTAGCCATGCTATCGGCCACGTCGATGTGGGTATGCTCCTCCTTGTTGTAC -GAGGTGAGATGACTGCTTCTCAATAGTGACCAGTTGCCTCCGGTCAGGAAGTAGTAGACC -ATGGGCGTGGGTAGCTGCATGGCAAAGATATAGCCGGACAGAAGAAGCTGGCGGGCGAGA -GGGGTGATGGTTCTGAGTGCTTGCACCAACGGGCCACGGGCGTGTAGTGGTGAACGCCAG -GCGTTAGCGAGCATCTTGTTGGCCGAAAAAAGCAGACGGCGGATATTGGATTCAACCAAT -TTCATCTGAATCGTTAGTTGAGATATGACCTTAAATTAGCAGGTGTTCTAACATACTGAT -GGCCCGTTGGACAGGATGAACCGATGGGCCAGGGACGGAGCTTCAGCAGCAAGGCGCATG -GACAAGACACAACCCCAGTCATGGGCGACAATGATGGTTCTCTTCTTGTTGCTCTCGTAG -TCCGAGTCCACACCATACTGGGTGCGCAGGGTGACAATCAGCTCTGTTAGTCTCTCCAAG -ACACTTGTTGCGTTATATTCTTCCAGGCTCTCCGTTCCTCCATATCCGGGAAGGTCAATT -GCAACGAGCGAAACGGCGTTTTGAAGTGTAGAGGTTTTGATAATGCGTCGCCAGATCGCC -CAGCTATCGGGGAATCCATGGATGAAGATGACCAATGGTTTCTCGGAATTGATTGTTTCC -GGTATGTTCGGGATATCATTACTGACAAAGTGGAATTTGAATCCACTTTGCAAGGTTAGA -ATGTGATGAGAGAGACCGTCAAAGACTTTAGATAAGTTCCACAGGCGATCGCGAGCTGGG -AAGTCAGCAAAGAACTTTTCGGGAGAAAAATAGCTCACCAAGCTGTAGGTCCAGTTTCTC -GCTTTCAGTGGGGCGTTTGAAGTATGTGCCTTTCGCTATAGCAATGAGGGCGTAAAGAAT -AAGTGTAAAGAAGCCGTACGTAAAGACGACCGTGCCAATCAAGGCACGGGTTAGAGATTT -GCCCATGTTAGAACCTTGGAATCATGTGTTGAAGGAGCAAGGATTTCGCCTTGTAATAAA -TCTAAGACAAACCGGGATGCTTGGGCCGTTCGCGGGTCCACGGGTAGCGTCACACCTGAG -TCAGCGATATAAAAGCGGGCAGCTGACAGCGGGTACCTACATGTTGATATGTAGGTACGG -AGTACGAAGTATAAATTTAGATACAAATTCAGGAATACCATCATATTACACTGTAAATTG -TAGACAATTGACAGCTTTTCTCAATCTATTGCATACCAGACGCCGCCTCCACATGCACAA -ATATTATTGGTAGAAAACAGTCCAAGGGAAAGGGTATTAACAAGGGTCCAAAAACAAAAA -GCCATCATCTCGTCGAGGGAATATATACAGATCAAAGAAGAAATCCTAGTTGCTCGCGGT -TGTCGTCTACCAAGCGGCCATACCTGGAAAGTCAGCGAGTTAGCAAGATTTTGTGATAGA -ATGACGAGGTACTTACTCTAGGAGTAACCGGAAGAACCAAACCTCCAGATCTTCATTTCG -TAGCGTCTGCTTTGCCCAGTCCTGGCCCTGGCTGGCGATTTGAAGAGCGGCCTTTTTGCC -CTCTTCTTCTGAGGTAAAGTATCGCATGCTTTCAACGTACTCGTTGCCTTTCAAGCTGAG -AGGGACGTAATGTACCCAGGGCTTCAGCCACTCGTCGTGCCACTCGCGGAAGATAGCGAT -CTTGTAGACAAAGCTGTTGCTATGCAAAAAAGCGTGGAATCGGCCGCTAAAAGCGTTGCC -ATCGATGTCGACCAAGAACTTGTGCGCCCATGCATCTTGTTGCCCAGTCTGCTTGAACAC -TCCAAAGTATTCAGTCTGCGCATTGCAGTCGTCCGGGTCGCACTGGCCAATAAGTGTGAA -CTTGACATCGAAGAAATCACGATAGGCGTCCCGATTGGTCTCCTGTGAAACCCAGCCATC -ACCCTGGCGAGCAAGAACTTTAGTGGTGCCTAAGGCGTTGACGTTGTCGACAAATTGTTG -TCGATGTTGTCGGCGCCAGCCACCCGCTCGCGAAAACCCACCAGTTGTTGAGCCACGCCA -GTACATCCGGTTAGACTTAGCCTCCCACGCATAGTCTTTTTTATGATCGTAGGGGACCTT -GTCGGCCCAGTACCACGGACTTGGGTAAATAATATCTTGGAAGCTGGAGATTTTGCTCTG -CGAGAAAACAGGGAACAGGTCGTGGACGACATCAAAAGCGTTGGGCCGGTCAAAGAATCC -ATATGTATAGCGGAGAGAGGGGGTATTGCAGATGTCAGAGGCCGCCGTAGTGTTGTAGAT -GAAGCCGAGTTCGCCATACGCGTAAGGATCAACGTCGTCGGGGGCACCTTCGTCGAGTGA -GCGCGCTGGGCTGTCAACCGGACAAGAGACACGGGAGTTGGTCCAGGTCGGTTGGTGTGC -GAAGCGGTTGAAACGAGTTGTGCGCACCTCGTCGATGCGGTCGCCTTTGTTTAGGTCCGA -AGCACGTGGGGACCACGCATTGGTTGGGGTTTTCTTGAACGCTGATGGAATGATGACATT -TTTGGCTGTCTGCACCAATCGCTGGAGGTCTTCGTTAGGGAGGATCACGCGCGGCTCGTC -GTGAGTGTTGAACACCAAGTCCATGTCGGGCAGGTATTTCACAAAAGCACCCATCATACC -AGCCGTGGCGTCTCGCTGCCACTTGCGGTCATCGCCACCTCCATCCGTTAAGGACACCTT -GCCGTCTCGAATCAGAACACCAAGCACCGCATTGTCAAATCCAAGGGTTTCGCGCGCCCG -TTCCCGAATCGTTTGAGGTTTGACACCCCAAAAGGGAAGAAGGGAATGATAGATGGTGTC -AAAATCATCGATGAGTTGCGCATTTCTTGACTGGGCAAACTCGAACCACTTATCGAAATG -TGGCGGCGGGTGCATATTATAGCGGCGCTGGTATTCACTCACGGCCTCGGCCAGGGTCTT -CGATTGCCGCGACTGCGCATGATCGAAATCCCGTTGAGCATTTCCAATGAGACGTGTGAT -CGGGTGTTCCTTCTCGGGGAGGTATGTGTTCTGCCCCGCATGGAGCTGAGGGTGTTGTAT -CGGTTCGGGAGGAGGCAATTCTATGGGCTGTCGCTCTGGCTGCGATGATCGTAATAGTAG -GATCGAAAGACACGCAAGAACACCGATCGCAAGTAATACGATCAACCGTTGAATGCGCAT -GCTGCCATTGGGGAGAGGCATGAAGAAAGAGAGTGAGTCTTAAGGAGGGTTCTCCACAAG -TCTAGAGTTCCTGGGTTCCAAGACCCTCGGGCTATTAACCTTATCCGCCAAGCCTTTCTA -ACGTAATCGCAATCTAGGGCTACAGGTACAACGAATCACATAGTACTCCATACATAGAAC -CCTTTGTATTGTTACAAGGGTTTGAAAGACAAAGCGTACAATTATACGGAGTACGGAGTA -ATATGGTTTCATCTGTGCGGCCCCCAATTCTTGACGGGGTCCATGTACTAAGTGGCGTTG -CCTAGATCCAGGGGAGGTTGCCCATAATCTAATTCAATACGTGTCACCAAAGGCGCTTGG -ACTACAACATACCTCGATGTACAATATACATTGTACTCCGTACAAAACATCAAGAAACTC -GTCCAAATATTGGTTGATAGATATCTCAAACACTTGAAGCAGTAACAAATCTGGTCATCC -TCAAATATCTACCAAAAGAGCGCCCATGGTAACGATGCAATTATTTCGGACCCCTTGATG -GAAGATCAAGGGGCAAAAGAAAAGGGGCTGAATTTAGTGCACTGTACGGAGTGTAGATTG -ATGTCCTCTCTGAAAACAACCTCTAAACGAGTATAAAACAAAATAAATCACAAATTACCG -CTTAATCGAACCTTGAAAGTGATTTAGGCATTGGGATATGTTATTTGCTCTACTATCCAT -AGATGGATGGATGAATGAGGCATGCCATATAAGCTCATATATGATTTTGATACCTATCCC -TGGAAAAAATATCAAGATGCTGATCAAAGGAATAAAAACACGAAACGATCATCAGACATG -GCAGTATAGACGCCGTGTTATATGGTTCCAGCCTACACTAACACAGCAGCCGAGAGCATA -CCCAATTAAGCACAGAAGCTGCTAAAGTCTGCAAAATGTACTTCTGCGCCTGCTTCACAT -GAGAGTAAGCCGCAACACACAATAACACCACCAAAGATACATGACCAAAGTAAAACATCT -TTCTTCGTTCTTGAATGGTACCCTGCATAAGCCGAGACTGGAGTCCATTCTGGGAAGCAG -GGCGCACGAAGAGAACGAGCAGCACAGCCGTGAAGATCGCGACCAGGCCACATTGGACAT -CCAAATCCTGAACTCGCTTAAGGAGCAATATGCCAAATTCAGGATGACTGGTTTGCACGA -AGAACAGCGTATACAGCGCCGCATGAGCCAGGAGAAGAGAGACAACCGCCCGACCAAAGA -GACGGTGGTACGGCGTCAGTGCTCGCTGAGAGATACCAGTCAGAACAGACAGTACAGATG -AGGCTGCTGGTTGAGAGATGTTTGCCGGAGACATCAGAACCTGTAGTGGCAATTGGGAGA -GCCCAACCCGACCCAATGCTTTGGTCAAATGGAGGTAGTCTAGCATGTTAGCTGGTCCCT -GAAGGTTTTTGGGAAATGAAGACATACCATCACCACTGTTCCATATCGAAAGGCCTAAAA -GCCATAGTAGCCAGAGCCCGCAGACAAGATACTGCCGGGTCTTTCCAGAAACCGGGCCAC -CGTACATAACAGTTGATCTTGTAGCAAAGCGAAAGATTCGAATGGCGACTATGACGATAA -TAATTGACCATTGTGCGTAGTACCCTCGAAGATCAAGCAGGTCCCTGCGATGCAGCTTGT -CGATTTCGGAGAGAGAGACGAAATGGTACGGCCACAACATGGTGACTGAAGAATGAGAGA -AATACTGTAGAATTCCAACCCAATTGACAAGCCCGCAAGATACAGCGCGGCTCAAAATAA -CATAATTGAAATGTTACTATTGGTTTCACAGATCGTTTTTGATATACTGTCACTGAAAAA -TAAGCAGTCAAACCACAGATATGCCCTGAACGCCCCCCGAGAAGCCCGCGGCTTTTTGCT -GACAAATAAAATGCCGAACAGTACCTGGGGGCTACCTACACTCCAGCCTCCAGGCCTTAA -ACCTTTCACAGATGACAACATTCAATGCAACTGCCCTCTGCTGCGAAATTTACATCTCAC -GCGGGTTTACATGCAAGGTCTTTAGTTGCACATTTGTTTTTGACTACCTACCCTGGCTAA -ACGTTATGTAAAACTTCTACATTAAAAACGCCTGAAAGAAAAGGACGCTTGGCTATTTTC -TAACCTGACAGGATGCCTTGTTGGGTATTCGCCGAACAAGTACATAGTTAAGGGGAGGTC -TGCATCTCATCCTGGTTGCCACCGGCAGTCGTAGCAAAGTAGCGGACATCCACATCGTCA -TCTTCCTGCAACTTCTCCAGACTAGGCACAACCCTTGATTGAATCAACTCCAGGCTCTGA -GGTGAGGGCGTGATAGGTTCTTCAGACTTCTCCACCTCAACCAAGGTCTTATCGGCCGGC -AGACGACGCAGTGTGGCGATGAGAACGGCGTAGGACTTGGCGACATTGAAACGGATGTTG -GGGATTTCATCCGTTACCAGGCGTTCCAGGATAGGGAGAAGGGAATTCTCAATAATTTCG -AGGTTGACAACGGGAGTGAGAGTCTGATAATTAGTATTGATCGTCGCAGAAAGAAGGGAA -ACTTACAGAAACAGCAAAGCACGTGGTCATGCGGTAAAGATAGTTAGGGTGTTGACCCAT -GGCCATCACTTTGGGAATAATAGAAGCTTTTGACCACTCCACGCCGAATACCTCTGTCAG -TTTCTTCAAGTTCTCAGTGGCGGCCTCTCGGATGGAGAAGACAGTGTCTCCGAGCCAGCC -CATGCAAAGATCGCTCAGCTGCTCGTCAAAGAACTCGACTCCAAGTTGACTTGCAAGGAG -TGGGATATATTCAATGATAGCAAGGCGAACACGCCATTGCTTATCCTCAGCCAGTTGAAC -AATGGCGGGAAGAAGAGACTGGGAGAGAAGCTGAATGCCAATGACTGTAAAAAGTTAGGA -GATAAACATTCATTTTAAAATCAACGAGTGCTTACCTTTGTTGACTTGCTCGAGCTTCGA -GATGATGTGCAGACGGACGTCGGGGAATTCGTCCTTAAGCATCTGGAGGAACATGGGGAG -CAGGTGGGCAATAGTCCTGGACGGTCAGTGCCAATTACTCCTAGCAGATATGCAGGATAC -TTACTCCTCCTTGCCCAAGATAGGCGCGAGCCCGCTGATCTGAGTACCGAGGGAAGCCCG -GACGTGCTGAGATGGGTCAGAGACAAGGTCTTCAACACTGGTCATGATCTCATTGAGGAG -AGTGTCGCGATCGATCAAATTGCAGAAGCCTAAATATGTCAGTCAGACATCAACTAATAA -CCACTAGGTGGATCATACCCGGAATTTGGCCAGCAATTGCAGTTCGAACCTCGGCCTCTG -TATCTTTGAGGAGCTTGACAAAGGCAGGCACCATGTCGCGGGTAACAACCTCTTCGTGGA -CAGCTTTGGCGATCTGTATACATTAGTGTCGCCCAATAATTTGGAAGTCAGCAGGCAATT -TACCTTCTCATATCGATCGGCAACCATGTATCGGACCCTCCAGCTCTTATCCTCAAACAG -ATTCCGCAGCGATGTTAACAAAACACCGTGGCTGGGTTGCTGTTCTTTGGGAATTTCCTC -CGCAATCGCAATAAGAATGTCAACTGTGAGAAGACGGACGCTGTCTTGATCATCGCTCGC -CAAGTATTGGAAGAGGGGAATTATTTCATCGATGACGACCTGGCTTTGCATCTCCTTGAC -AAACTTGGCCAGATTGTTGGCCGCCTGTCGCCGCACCATCGGAGTTTCATCGTGTACCAA -AACTCCAAAATGCTGTCGGAGACTCTGTTGCAATGCAGGGCTGGCCTTCTTGTATGGTGT -AGAATAAAGACCAGTCGCAGAGACCTTCGAGGTGAACCAGTCTGCCTTTGACAAGCGGAG -GACAAGCGGGACAAAATACTCCTCAACTTGGCTCTCCGAGAGCTGCTCGCATACTTTGTT -GAGAGATTCAACGGCCTGGAACGTCAGCATGCGCCTCAGAAACTTTTAGCTGGTGTGGTA -CCTTCTCCCGCACCAACGGCTCTTCAATAGCCGCCAAATTCTCCAGCGGGGAGAGCAACA -CGTGCCCGTATTCCGGACCTCCAACATATTCTGTAAAATTGCCCAGCTCTTCACTCAGTG -CAGTCAAAACTTCGTCCTCGTCCTCGACGGAATCTATCTTGGTTAGACGGGGCTGCTAAG -CGATTGAAGGCTGGACAAACCATCAAGAAATGGGATGAGCTCCTCCCGGGTTCGGTCGGG -CCCAAGTGCAAGTGCGATCGTGGATAGACGGTGAATGGCATTGAGTCTGAGAAGGACATC -GTCGTGCTAAAGAAATGTGAGCTCAATATTGTGTGCACGACAGAGGGAGATATACCTTTA -GCTCATCAATGAGCACGGCGATGGGGTAGAGCTCATCGTTCTCGTTTTGTCCCTCCATTG -TGGATTTGAATATGTATTAAGGAGATGCGACGAGTCCCGGTCCTGGAGAGGGAAGAGGGT -GGCGTTGGACCTGACAGCGATTCGGTAGAGGAGGTGTGAGAGGCAATGAGATGGCTGATA -AGGATTAGTTCCGACTTCGCGAGATGGCAAGGGGATGAATGGTGGTAGAGTGAGGGTGGG -GTTTTGGGCCGCATGGACACCTTCAGCAGAAGCATACCTCGACAATTGAAGAGTCTCTTG -ATCAACAGCAAATTAATAGATTTGTCACAAGTTCGTAAAGAGAATAGAAAAATGCCCCCT -TCGATGATGCAGCAACCGGAGCAGGGCAGTCGAGCTTTGAGTGTTTACCAGTGTAAATTA -CATACTGGCGGTGTGCACATCAATATTTGAATATGGAGGGGAATTACCTTACCAATACCG -TGGTCACAAAAGAGGGGGGATCTGTGGCGGTGGTGGCTTGCGGGCAGAATTTCCACCAGC -ACAAGAGTCATCCAAAGACTTTGTGCACCGATCACGACTTCCTGAATTTCCGTACGACCT -TTCCCTTGATACTGTCGGAGTCGCCTACTTGGGTTTTACTCTGGCTTACCGGACATATCG -CTGATTGACATCAGAATTCACAACCCCCACCATGCCTACCATCTCGGTCGACAAGGCCGC -TCTTTTCAAAGAGTTGGGCCGAGAGTACGTCCCTGCATTTTGTCATTGAACTATGCTAAC -ATAGCTACAGGTATACCACGGATGAGTTTGATGAGCTATGTTTTGAATTTGGTGGGTTGA -CTAGGAATTCTTTGCGGGGCATGACTGACTCTGAGCAGGAATTGAGCTTGACGAAGATGT -ACATCCTCCGAAATCCGATTATAACCAATGCTGATATTCTCAGACTACAAATTCGGACCG -CCCTATCGTTGATGGAAAGCAAGAAGCTCCGCAACTCAAGATCGAGATCCCCGCCAACCG -GTACGCCTGATATATGAAAAGAAGAAATGGAAGGACCTAGACTAAGAAGATACTAGATAT -GATTTGTTGTGCTTTGAGGGAATTGCGCTCATGTTGAACATCTTCTTGGGCCGGAATCCC -CTGCCTGAGTACAAGTTGACCCAGCCGGCACAGATGGAGCAGATTATTGTCAAGGAAGAT -GTCAGTGCTCCAACACTCAATTGCTCAAACTGAGGGGTTCCGCTGACCAACATAGACCAC -TAAAATCAGACCCTACGTGGCTGGTGCAATTCTGCGGAACATCAAGTTTGACCAGGCTCG -TTATGCGTCCTTCATTGCTCTCCAAGACAAGCTCCACCAGAACTTGGCTCGGCAACGAAC -ACTGGTATCAATCGGTACCCACGATTTGGACACTGTGAAGGGCCCCTTCACTTATGAGGC -TCTCCCTCCCAAGGACATCAAGTTTACCCCGTTGAACCAGACTCAGGAGATGGATGGCGA -GGAATTGATGAACTTCTACGAGAAGCACCAACAGTTGGGCAAGTACCTGCACATCATTCG -TGACTCTCCAGTCTATCCCGTCATCTACGACTCCAACCGGACTGTTTGCTCTCTGCCACC -TATCATCAACGGTGATCACTCGAAGATCAGTCTGAACACCACCAATGTTTTGATTGAGAT -CACAGCGCTCGATAAGACAAAGCTGGACATTGTGAACAAGATGATGGTCACCATGTTCTC -CCAGTATACCCTGGAGCCATTCACTATCGAGCCCGTCCAGATTGTGTCTGAGCACAACAA -GGAGACTCGTGTCACCCCTGACCTCGCTCCCCGCACTGCTCAGGCTGAAGTCTCGTACAT -CAACCAGTGCTGTGGCTTGAGCCTCAGCCCTGCTGAGATCTGTACTCTTCTCACAAAGAT -GGCCTTCCGCGCCAAGCCTTCGACCACTTCCCCAGATATCATCAACGTGGAAATTCCCCC -TACCCGTGCCGATGTTCTCCACCAGTGTGATATCATGGAAGATGTAGCCATTGCCTACGG -TTTTAACTCGCTGCCCCGTGCATTCCCCGACATGTCCGGTACAGTCGCTCAGCCTCTCCC -CATCAACAAGCTTTCCGACATTGTTCGTGTGGAGACTGCCATGGCCGGTTGGTCAGAGGT -ATTGCCGTTGATTCTGTGTTCGCATGATGAGAACTTTGGTTGGTTGAACCGCAAGGATGA -CGGAAATACCGCTGTCAAACTCGCCAATCCCAAGACCCTTGAGTTCCAGGTTGTCCGCAC -CAGCCTGCTCCCAGGTCTGCTCAAGACTATTCGTGAGAACAAGCACCACTCTGTTCCCAT -GAAGATCTTCGAGGTTAGCGATGTTGCATTCAAGGATCTGTCTCTGGAGCGCAAGAGCCG -TAACGAGAGACACTATGCTGCTGCCTTCTACGGTAAGACTAGTGGATTCGAGATCGTTCA -CGGTCTTCTTGACCGTATCATGGCCATGCTCAAGACCAGCTTCATCGCCGAGGGTGAGAC -TCCCACGGGCGACTCTCAGTACTATATCAAGGAACTAGACGGTGTGTACCCCCTCCAACC -CGACCAGAAGATTTACTAACTGTGTAGATCCTACCTACTTCCCTGGCCACGCTGCTTCCA -TCCACCTCTGTACTGGTGGCAAGGACCAGGTCATTGGTTCCTTCGGTATTCTGCACCCTA -CTGTGCTGGAGAAGTACGAGTTGAAGTACCCAGTCAGTACTCTGGAGCTGAATGTTGAGG -CTTTTATGTAGAGCAAATAGGCCTACAGATGAGTGGCTGTCCTTCTGATTAGAGATATGA -CCAAAAGCAAAAAAAGTACATAAATATAAGAAGATCCTAGTAAATGACATGTCAGAAAGG -GCCTTGCCTTTGGCGATCACTGCGCACAACGTTTTTCAATATGGAGTATGTACAAGTCAA -ATGAAATGAATATGTATTTGAACAAAAAAGTACAAAAAAGGGTATCCGAAAGGAACTGTC -CGGGCTACCATAAAAATGGTTCCAGGCCTTATGGCGGTAGCTTCGGCAGAGGTAGACAAC -GTAAAGGTAGGCAACCTCCACAATGTAGTGCTAATGATAAAGGGAATGGACAAGACTACT -TCTGTCCATCATCGAGCCGCTGACCGCGGAACGAATAATAGTTGGTCTGCTTGATGTGCC -TGGCCTTGCCAGTTCGTTCATCGTAAATACCAAATCCATGGTTTGCGGCATTGGGAGGTG -TGTAAACTTATCAATTAGCCTTGGCTCCATTCATTTTCGCAAAAGATAACATACGATGGA -GTGAGATAGCAAAGTTGTCATGATCTGGATTGGATATCCGATGCAGACCAAGCTTGTCAG -ACATGTAGGTGACCTGGTTTTCGCCGTAGGTTGTCTGCTTGGTGAGCTGTGGTGGTGAAA -TTTGGCCATTCTCGATCTGCTCTTGGTCGGGCCATGAGTACAGATCTTCTTGCAGTTTGC -CTTTTAAAACCTATTCAAAATTAGCTTCCATCTACCTGTCAATTCAAAGACGACGTACCT -TCATCACGCAGTGCGCGTTGGCATGGTCATGAATTGTGCTCCCTTTCCCGGGGCTCCATA -CAAGAATCAGCTGAACGAGTTAGTCAAGCAATCGTTGAGGGTGGCAATGGTCATACCAAA -TTGCTCTTGCCGTTCCCCTCATCAATCAGATTTCTTGTGTATGTCCTATTCGCATCACCC -AGCGCATATGGAGCCCATTCATCCGAATTGGAGACGTATCTCTCCATCAGTATTTGTATC -TCCATGGGATCGACATCACTAGAATCGAGACCCGAGCTTGGACCTAGCGCAGAACTAAGG -TCCTCCACGAGCTTCTCAAAGTCAGGTCTCTCGGCGGTCGGTTCGGACCCGTTAAGGAAT -GGCATGACGACCGAGCGTTGTGCTAGACACGTTGGGGATGGAGTCCATGTATGTAACGAG -GAAGAAAGGGTATCGGAAGTTAGCCGAATGACGAAATCGAGAGTGTATCCACCACAACTG -AAATGTAAACTAGCATAGGGGACCGCACAATGCGTGTGTTTTCTTATGAACTTGAGACCC -AAGTATAGTAATTCAATGTTCACGAGTATGTAGTTGCCCGTTGCCATGCAATTTGGCTAT -GATAGCGATAAGAGCTGATGCCTCAGGCTTTAAATGACATAATTGCCCTCGTCGCCTTAG -GCGCACACCCCGCCCTGACTAAAAACTGCGGTGCCACAATTCAAAGATTGTCGCCGCGGT -TGCATTTCTCCTTCAAAAGTTGTCGCTCAACTCGCTGGTTCTTAGCGAGGGACATATCTT -AAGTCCCCTAATACCAGCGATTATCAATCGTTTTCTTTCCATTGATTTTTTGATTGCTGG -GTCAAACCGCCAGAATGTCGACCACTATTGAAAAGGTGAGTGCGGTCAAGATTTTTGAGC -ATAGCACCAAACAAATTCCTAACAATGTCTCTACAGATCAAGGAGGTCGAGTCCGAGATG -GCTCGAACTCAAAAAAACAAGAACACCTCGTATCATTTGGGTACGCGCACACCCACGACT -TTTCCCCGATGACGTCTTCTGACAATATGCAGGACAATTGAAGGCGAAGCTTGCTAAGCT -CAAGCGTGAACTTCTGACCCCGACTGGAGGAGGTGGTGGTGGTGGATGTGAGTATTCCAG -ACCCGAACACTCTACTAGGTGCTAACTTAAGATTAGCTGGTTTCGATGTCGCGCGTACTG -GTGTGGCCAGTGTGTAAGTAACCCGAGAAGATTGCGATCGCCTATACTTACAAGTGCAGT -GGCTTCATTGGTTTCCCGTCCGTCGGCAAGAGTACACTGATGAATAAACTGACTGGCCAA -CATTCAGAAGGTTGGTTAGGACCTCCTTAACAACCGATTTTTCTAACAATCTCCAGCCGC -CGCCTACGAGTTCACAACCCTCACAACTGTGCCCGGTCAAGTGATGTACAATGGCGCCAA -GATTCAGATTCTCGATCTTCCGGGTATTATTGAAGGTGCCAAAGACGGCAAGGGTCGTGG -TCGACAGGTTATTGCGGTGGCCAAGACTTGCAACCTCATTTTCATCGTGCTCGATGTGAA -CAAGCCATTGGTTGACAAGCGAGTTATCGAGAACGAATTGGAGGGCTTTGGCATTCGCAT -CAATAAGAAGCCTCCCAATATTGTATTCAAGAAGAAGGACAAGGGTGGCATCGCCATCAC -GAGCACTCAGCCATTGACTCACATCGATAACGACGTAAGATTGAGTTCCCACATAGTCTT -TGACGAGTACTAACAGATCCCAGGAAATCAAAGCTGTAATGAGCGAGTATAAGATTTCAT -CCGCTGATATCTCTATCCGATGTGATGCCACGATTGACGACCTGATTGATGTCCTGGAAG -CGAAGACCCGCAGCTACATCCCTGTCATTTATGCGCTGAACAAGATCGATGCTATTTCCA -TCGAAGAATTGGATTTGCTGTACCGGATTCCAGACGCCTGCCCAATTAGCTCGGAGCATG -GATGGAACGTCGACGAGTTGATGGAGATGATGTGGGAGAAGCTCAAGCTGCGCAGAATCT -ACACGAAACCCAAGGGCAGAGCGCCTGACTATTCGGCACCTGTCGTGCTGCGTTCCTACG -CATGCACAGTCGAGAATTTCGTAAGCATTGTTTTCTTCATTTCTAAAAATTATACTAACG -ATACAGTGTGACGCTATTCATCGCACAATCAAGGACGATTTCAAGCACGCCATTGTGTAT -GGTCGCTCAGTGAAGCACCAGCCGCAGCGAGTCGGTCTTTCGCACGAACTCAGTGATGAA -GACATCGGTAAGGCCACTGAAACAAAAACCAAACATGAATTTCAAAGCTGACCAGCTACA -GTGTCGATCGTCAAGCGGTAAAGGATTTATGTCAACTCCTTGACCCTCATAGCCCAAAGC -GGCGAGATGATCCAAGCGCTGGGGTGGTCTGTGCCAGCCTGTCTTTGCGCCTCCGCGATC -TGACAGGTTTCTAGGACATCCGTAAGACGCCACCGTATCCGAGCGTGGTTGGATAGACTC -GTTCACGGAAACATAATCAAGGGGGAGGGGAATGTCTTAAGTCCACCTTTTGACAATCAA -AATCAGTCATGCACATCACTGCTGATCTTTGATGCATCTCAAGGTATAGCTCTTGATTCG -ATGAACTCGAGTTAGGCTTCAAAGGTTACACAAGCCCAGGGGTACGCACGAGGCGAAAAG -CCATCGGTCTTTCTGATATTCTTGGCATCATTGGTAGTTTGTGGCCCTTGAACCAGTTTA -TATCGTTATGATTACCCTTTTTTTTTATGATTGAGGTTATTTGTCCATTCAGCGTCCTCA -AAAGTTCAGATTAACATGGCATCCTCGTATTGTCTAAAAAATAAAAGCCATCAAAAACGG -TCTACTCGTTCTTCCCTGCAATTTCAACATTGAAAAAGACTGTGTGTGCCTTGTTTCCCC -CCATATAACGAAAAATTCATCATGTCAGACCAATCAGCTCCTCTGATCGATTCGTATTAC -TTGAGTAAAAATGAATGCATCATGCGACTTCTTATCGCCATAGGATTATAGACCCTATGT -ACAGATCCGAGAACGCCTGCATGGACAATTTATTTTGATGACATTCCGATTGGCATGTTC -CCCTTGCCATTACTTCCCGTCCTGCACAGCATCCTTGCCAGTTATTCTAATGTCTATAAG -ATTGTCACTGGTGGCTTGACTTCTTTGTTCGCATTGCTTTCCCCCCGGCCCATTTTTACT -TTTGAGGCTGACGAGTCAGACCATAAAAGGCCTAGGCCTCTGTGTTTGCGCCGATCACAG -CACTTTCTACTCTGTAGCTGTCAGCTCGAAAGCTCCGAAAAACGAACTTGATAGCTTCGA -CGGAAGAGCCGGAAAGGTAATTATGGTATGCATACACCGGACTCCATACTCCGGTGCAGA -GAAAGGCCGCGACTCAACGCATTGACCTACTGCTAATAGCTTAACGGTGATCATGACGGG -GTCGCTTACTCCGTTTATGTATTAAAGCCACAGGCTTTGGGTTGCCATTACATGTTAATA -AACCCGGCATTACCAGTCTAGCTTCCGATAGGCCTAAAGGGGGTATTATTACTATATTAA -GTAGAAAATGATAGGGGAGAGATCAAAATGGATCTGTCAACTGACTAGCACGTTTTCCCT -AGCTTCTGATCTCTTGGCTTCTGGTCTGCAACTACTTACCGAAATTTGAGGGCGGAAAGA -AGAAAAAAAAAAAAGTACAGATCTGCATATGCACAGGATTGACATGGATGCAAACCAACT -TATTTCAACCAACCAGAAAAAAGCAAAAAGTGAGGCAAGGTAGTGGAAGAGATTTTAAGA -AAACAAGCGGTGAATAAGAAATATTGGTCGAATTATGAACGTTGATCTGATCAATTCTCA -TCCACTCGGTACAGATGGTACATTTTGACGGGAGTGCATTATTTGCCCAATAGGGGGGTT -TTCCAGTTTACGTGTACTCCGTGATTCTCATGAACCCTCTGCGGAGACAATTCTAGGTGG -AGAATCTCCCGCACATCGTCGATTTAGTGAAAATGAGAAAGGGGGCTGTTCACAATCTGG -TCCGTTCTCAGATGCAAGATcggcgtacggagaaggtacggagtacggaggactgaggta -tgaggtatgaggtgtgtggtaATTTCTTGTATACTTCATCAGGTGACTGTGATGGCACCG -ATCGCCCGCATACCTTTAACGTACGTTGTACACTGTTCTCTCGTGGAGGATCCATCGGTC -TTCCCCGTCGCATTGCATTGCATGAGCCATGCTCTTCCCCGGAGTCAGCTTCTTACTGTG -GTTTTCTTGGTCTTGATATACGGGAGCCCCCTGTACGGAGTAGATGATGGTGTTTCGCCT -AGAAGAGTAGTATTCATCTGATATATTGAGTTCTCGATACAAGATTCATTCCGCATCAGT -CTACGTTCCGGAGATACCTCGGGCAGTATTTAAATTGCAGCTCAATCGGTACCTGAGTCG -AGTAACACTGACCCTTACAGTTGTGTGTCCCAACCCTAACAGGAAATTTCTCCCCTTGAA -AACAGCCAAGAACGGCCCAAATTGAAAGAAGCAAATATACGAATTGGCCCGAGAATTTTG -ATCTTGACCGGTCTAGACTTGTTCCTGCTCCTTCTCCGAAATAGCGAAAAGCAAGGCTTT -GCCTTCAACAAGATAGAAATCTCTCCAATAATGGCGTCAGTCCAGGATCTACCCCGACTG -CCTCCCTAAGCCCCCGTTAACGAATGTAATTTTGAAACTGTGCTAAAATTAAAATCGTTT -AAAATTCGTCACACATTTCTTTTTTTTTTTCCTCTTGATTTAACTGGCGGAAAGCCCAAG -AAATATGACGGTAACAAGCATAATGATTGGCTCTGCGACTAAATCAGCAAACATATGAAT -CATACGAATTGAAAGCCCTGCATTGTTGGGGATGATTTGTCAGACCTAATACCTTTTGTT -TTGCTTTTTTCGCTTTTTTGTTCAGACGTCTTTCAACGAAATTTTTTCAGATGTAATCAA -GCAGGTGAGTTCTACACTCGGCTCCCTCGTGGCGATGTCGAACAAGCGGGGTGTTGGATG -TTTGGGTAGTATTCACGTCCAAAAGGGACCCGTTCTATAGTAGAGCATATGAGAGACGCT -ACTCCGTACTACTTCCGGACTTTACGGAGCACACGCCTCTCCATCAATCTACTACTATAT -TGCTCTATTGCtctatagtctatagtctatagtctatattctatCTGTTTGTCTGTTTGA -AAATGTTGAAGGTTCCCGGGCCGGAGGGGACCGCAGTGGGTCGTAGTGGAGCAGACGAGG -GCTGAACTTCCAAGGTTCAAGAGAAAACCATCTTCCCCTCTTTCTTGACGACTGTGATAG -GACAATCCTCCAACTGTTCGGCCCTTGTATCGGACCTCGTATCAACCGCTGGTCAATGGA -GGCGGTAGATTCGTCCATCCAAAGATCGTCTTGTTGCTCCTCATTTGCTGGAGCCGTGTC -ATCTTGGGTCTGCGATATGAACTTCTATGGATCCCCTCTAAGTTCCTATGGGCTTTGTTC -AATCTATTCGTGGGTTTGCTTTCCTTTCCTCTGCTCTGCCGTATGATTTGTGCTCAGAGA -TCAAACCTCGGACCCGGTTTCTTCACCTCCAGGTGAACCGTGCCCCCAAAATACAACCTA -TGGGGACAGGAATCTGGAGGTTGGGTCTTTGATTCAACGGTTTGCTTTTTGATCCAAATG -AAGATATCTCCAATGAAGTCTCTTTTCCATGGGAGCATAGTCGATAGTTTGTCTGCCGCT -ATCCTCCCGGTCCAGCCTGATCGTCTTCGCCCGCGCATGGGTATTATGCGGGCTTTAGGT -TCAGCGGTTTATCGATCCGCTACCCGGCTTTCAGCAAATCAACTAAGAAAGGAGCTACAC -TGTGAGCGAACTTCGATCCTCTCACAGATGATCAGCCCTGATTTGACCCTGATTTCTAGA -GACCCTCAGCCTGGTCAAGACGAGGCAACAACGCCCAGCATGAAGGGCTATACATCTGGT -CAAACATTGGTCAAGGATTCGAAAAGGAGCGGTCACGCTGTTGCATCCATTATTTCCCAC -CCCTAGGCTACCTACATTGGACGCTATGTTGGACCTATATTGGATAGGTGATCCAGTATA -TAACATACTCGTACCGTCGAGTCTATCATCCTGGAAGGTCATACGGGAGAAAAGAAAACC -AAAAACCAAAAAACCCTAGAAAATCCCCAAAAAACAAGAGTATACCGCGTTAGCCCTGAG -AACGTAGATTTTCGATACCTTTACACTGTAATACGCAAAACGAATTGGGCCTGACGATGC -AACCCCCTGCCGCAATATATCGCGCAGCCAACCAACCAAACGCCGTTCCCCCTCTATTTA -CAGGGATCATTTTGAAATTTCATGAAATTAAAATTCTTGTATTTGGTCTCATTTCTTTTT -TTATTTGTTGGTTTTTTTTTATGGAATTTTTTTCACTTTGGATGGACTTTTTCACTGTAA -CCAATCACCGACCAGGTCGGCTGGATAACTCTCGGGGGAAGGTTATCATAGATTTCTCCG -GACAGTGGTCTTTCTCGTGCCAGTTGCAATTGAGATCTGTAGTTGTTCTGTCAAATCCAC -GCGATACTGATCGACCTACCGATTTTCCTTTTAACCCCATTGCCTTACCCGCGTGCTTTC -TACCTTCTTGCATACTGTATACTTTATACCTTTTACTTTGCCCTAGGTAGTCTATAGGGT -TGTTCTTCTCGGGACATTACTCTCTGGGTAATTTTATACAGGTCTACAGAAGCTGTGGAA -CATGTATCAGATTCTATTGTCTTACCCTTAGATTGTTTCATTGTTCCCCGGGAtttcttt -tttgtctctctttctttgatttcttCTCAAATCGAGCCTATCACGACAACAACTTCCCAC -AAGCAAAGGCCTGCCTCCATTGTTCTTGAGATCCATTGTCCATGGAGGAGCCCACCATGG -AAACGGAGACTCATCATCCGCCGGAGAGCGAATACCCACATTCGCCATGGGAAATGGGCA -CATTTACTTCTCCCCAGCACTCGCCTCCTATGCCAGAATATAGTGGGTTTGACTACGGTC -AATCGCAGCTCATGGCCGTTGACTCATATGGCATGTCAATACCCCCGCCCTATGCCACCA -TGCCACTACCTATGCCTTCGCATTCCTGGCCCAGTATGCTCACAACCCACTCTCCGTTTG -CTGCATCTGGGCTAGCTATTTCCACCCCGACGTCGGTATCTCCTTCGGCTCCCATGCCTC -CAGTACGAAAGACCTCGACTGGAGGGTCAACACCGCGCAGGACACTCACCGACGAAGATC -GCCGGCAAATGTGTATTTATCACGAGGAAAACAAGACTGCTAAGCAGACTGATATTGGGG -GTACGCAATATGTCTTTCTCTGTGTTCAAGGGACTAGGCTAACTCTATATACAGCGTTGT -TTGGAGTCGAGAGAAGGTATCATTGCAACCTTAGTTTATTCGATTCAATCTAACATGTAA -CAGCACTGTTTCCAAAGTCCTTCGCCAGAAGGAAAAGTACCTAAACCCTGAAGACGGGAG -TCGATCTCCCATCAAACGGGCCAAGGGCAGAGTCCCGGATATTGAGAAAGCTCTATCCAA -CTGGGCAAGGAACTACCAGCGCCAAGGATATCCCCTGAATGATGAAATGATCAAAGAGAA -AGCACTCTTCTTCGCCAGCACCTGCGGCTGCCCCGAAGGCAAGGAAAAAGTCTGCACTAC -AGCTTGGCTAGAAAAGTTCAAGCACAAGAACAATCTTCTCGGCGCAAAAGTGCGCAGAGG -ATCTACCGAAATTCGCAGCGGATCAAACAGCCCAACCCACCTCAATACAGATTTTGGGTC -TGCTCTTCAAAGCCCTAGCGGACCCTCGCCTACATCTCCAGTCGACGGCTTCGTATCCCC -ATTGTCTCCCATGAGCCAAGAAGGGTTCAAACGGGACATGGCTGAACTACCTGATCTGAC -AGGGGGCTACCAACACGGGTATTCCAAGAGTACAACCTCAATCGACACTTCGGCGGGTAT -GATTAGTCCAACATCCACTTTAGTCTCTGAAAGTCCTTTCACGCCAACGAGCCAATCCCG -TCTCCCAATGGCCAACAATAGCACCAACCGACCACGCAGCCAGACCTTCCCGCTCGTTCC -AATCGACCCAACACTTCTATCCGCCGACGAAGTGACGGACCCGCAGCGCCAAAAGCGCGA -ACTTCAGCAATCACTTTCGGTCTCCATTTTGCAATCCCCAGTGGAAATGGACGACCCTAA -GCCCGCCACGTGCCCGGTCCACCAGACAAACGTCATCAAGCGCAATCGCAGCAACCCAGA -AATCAAGACCCAGTCCATGCCGCCGCCTTCAAAGTCTACTACCATCTCGCCAATCAGCTC -ACCGGGATCACCGACGCAGGACGAGGCTCGTCGCGCCCTGGAAGTCGTTATGACCTACTT -CGACCACCAACCGGCCGGTCTCGCCGCACAGGAGTACATGACCATCGGAAAACTAATGGA -GAGGCTTGCGCTCGCCAAGAGCCAGGCCGGACTTCTGTTGGGTGGCTTGCCGCGAATTGA -TGAGCATGAGGATATTCCCATCCCGCGTGTGACGAAGAAGAGGAGCATCCACAATTTGGG -ATGAGTCTTGAACAGAATTGCATTTTGTGTATCTTTCCTTCCTTTTTTTTTTTTTGCGTT -TATTCCCCCCCTTCCTTTTGCATCGTTTGATTTGCTACGGAAGCTTTTCCCCTTCCCCCA -TCCCCAACATTATCTGATTTACCCTGATTCGTGATTCCCCATTCTCTTCTTCCTTTCCTT -CATTCGGTGGTCTATTTGCTATTCTGTTCTTTTTCTCTGGTAACACTTGTACGAACCTTT -CTTTTATATCCATTTCCTATCTCTCCGGCCAGCAATGGCAACGAGATATAAAACATGGGC -AACCAAACAAAAAACATACACATATCCAGAGGGCGACCTCTACTTTTGGAACTCTTGGGA -TTACGGATTTGGGAATAGGCGCAGACGCCCTGGCATTCCCAGTCTGGACGAGTTGGATgc -atatggcataatgggcacatggcaatatgtgatggtatatggaattgggatCATCGGCCT -TGGACTTGGGAGTTTTTGCCGCTTGCTTTCTTCTTCGGGttttttttgcttttttttgct -tttttttttCTGGCTGTTGGTTTGCTATATCCCTCACCCCCTTGTACTGCATCTCTTCTC -ACACTCTTTCACATTAATTTTTTTCCCTTTCTTTTCTTAAAAATTGGGCTGCTCACGCGG -AAAGAACATAATTTCTCATTAGGTTCACTTATTCACCTATACATGAACCACGTCAGATTA -GTTTTGCCTATTATACCCCGATATCTTCCTACAATCACACTGGTTTTTTAAAGTTTGGGA -CGTATGCTAGTCATAAGTAAGGCATATCTCATTAATTAAGTCTGGTTATGATAAAAAAAA -ACCAACAATATGTAATCTATACAAATGCAGGCGGTACTGACGCCACCCCACGATCCTCCA -GTAAATCTTCAACACTGCGCTCAGCCATTTCAGGGAGTACCTTGGCCAACACTGTCTCGA -TAATGCTCCAAATAAGGTGTTTATTGCAGTAATCATCTGCAAAGAGATCTAGAAAATCAC -GCTCGATGATATCCAGGAGATGCAGTTCCTCGCAATCAATCCCCGCAGATGAATCGTCTG -TGCGCTGCCGCTCAGCGGCAAGGTGACCACACGTATCTGGACAAGGATCCGAAGGCGAAG -GCGAATGCGATGACGCCAAATTTGCCTTCTCCTCCTGGAAAGCTCCATGGTGCGCACCGG -GGGAAGAGGGCGGGGATGTGACGAGAGAGCGCGAACTGTTGGTGCTGCAGGTACCATCAC -CGCTGTAAGGCGGAGGCGCCGACGAGGGAACACCAAGGAAAGTTCGTGCGACGGAGCGCG -GGATCAGGGCCAGAAGACTAGCTGCACATCGTCGCTTGATAGCGGCGATCTCCGCCCTAG -ATGGGCTGTTCGGTTTTACCGGCTCGGCCGGCTGATTTAACTCTGGCGTTGGGACATCGT -CTGCGACGACAGAAAGCGATGGTTGGCCTGAGGTAGCTGTTGAGGCACCAATATTGCCGC -TGTTGCTGCTGTTCCCCGGATTATTGATTCTACTCATGATATCAGTCCTATCAAAGCCCC -TGACTCCTTCCGTAATAGGAGGAATAGGTGAGGCAGAGGGCGCGAGGGCTTTCTGACTGG -GCGTCGGGGTCTGCACAGACTGCAGAGCCGAGGCCGGAGCTTCAACATGCCCGGCCGCTA -TTTGAGATGTTGGACGGGTATTGGCCGGGAACAAGGCGGTCCTTGTCGCAAGAAGAAGGT -TGGGCAACAAAGTGGGGGTCAGCACGTACTCCTCAATGGTTTCCCGAAGGAATCTGTGGG -AGTTAGCAAAAAGGTCTCAAATCATGACGTCGGAGCAAAGAAAATAATAAATTAAAGCGA -TAATCTCGACTGTGGAGCACCGGGACGTTGTGCCTTTTTTGGCGCATCATAAAAAAAAAA -TATGTCCAAGCATATAAGAAATAGAGGAGGCGGAGCAAGATCGGCTTAGGCATGAAGGAG -CTTGGGCCAAGGGAAGTAGAAAAATATCCAACATCCCGTACCTGGGGCTTCATGCACGAA -CCTCGGTCAGATTACGAATTCGAATCCTTTCCGGCAGCAATTTCGCTCATTGGACTGAAA -TCAGTAATTCAGTCTTGGCTGCTGGTCTTTTTTTCTTTTCTTTTTCAGCCAAAAGAGGCA -TGGCCTTCACCTCCACCGATGGGGTGATCGGCGGTTTTTTTCTCTCTCTCTTTTGCTCGT -GCACGAGATTTGAAAACCCTTCTTGCCAAGAATGTTTGGATCGCCTAGATGTTCTCATTC -TTGATTTGATCAAATCAAGTCCATTGAAAGTTGGCACATGTTGGAATCTTGAGATTCGCA -GAGTGGAGCGGCACGGAAAGGCCCGGGCCCCGGCGCTATCACACACTTCGAAGAACAGAG -ACGGGGGACATCCGCGGGATACAGTCATCCCTGTCCGAAAAAAAATCAAGCTCGCTCGGA -CCAATTTGAAGGCAGCCAATCTCAGGCCAGAACAGCCCAGTAAGACGGTAATAACCGTAC -GGTAAGATGACATCGTAAGGTTGTAAGATGACCGTAATCTCTCCGGGTATTGCAGCCTTC -CAAGAGGTTACAGCCAGGTCAAGTGACCGACACGGCATGCGAGCAAGACGCTTCATGTCC -AACTCTAACTCGCACAAGTGACACTTGACCCGTAGATCGTGTTTCGGAGATCGTGCCATT -GCTCCACAGTCGAGTATTTCCCAAGGCCTGAGCCCTCGAGAAGGAGGGGGCATTGACATC -GCAAAGAAGATGGAGATAAAAGAAAAATAGAGCAAGACTCACCTATCTAAGATTCCATTG -GTAGCACCGACCCTGCCTGGGCCTGCCAGGATAAGATGTTGGAATAGGGCGAGCAAGCCA -CTTAGCCATGGCATTCTCGATGGTATTCCCATTATTTGCGAGACCATGGAGTACATCCGG -TAGTCGAGAACTGGGCGTTTACTTGTGCCATCTGTCGATTCCATCTCATCTCTTTTCTGG -TTGATGGGTAAGGCAAGTGAAGCACCGACACCGTGCGAAGAGGAGCCTGGACTCGATGCG -ACACGAAATAATCCCGTCGCAATGAAACGCAAAAAGACGTAACCAAGATAAATCCCCTGT -AGAAGGCTCCATATCCAAGTGGCCGAGAATGACGCCGATGGGTCTGGGAGCTCATCGTCG -GTTGAAAGCAAGCCAAACTTTTCTAATCGGTTGATCGGAGAGCCCGAGGCGGATTTTGTC -TCATCTTGGGTTTGACGGCGTCCCACTTGAGCGGCGAGTTTCCCTATATTTTCCCAAATG -AACCATCCCTCGCATGCCCTGCCACTGACTTGGTTTCCAAGTATCAGATCTGCTATAACA -TCAACCACCAGTGCTCGAAGACTCCTGTTCTCGAGGTCTTCTGTTGGAAGAAGAACTGCG -AGGATACCGTTGGCGAGTAGTCGTCGATAGACTGCTTCATTCTCGCTCTGCTCGGCGATC -GTATCTGGATCCCCTGGGTCAGGCACCGGAGAAAGACTGGGATTTGGATTTAGCTCATGA -TAAAGAACACGGTACGACGTCGGTAGACCAGAAAGGTGTGACTGTTGCCTGGCCTGTCTG -TACGCTGGATCATATAGTCAGCAATGTTGCCATCTCTCAGCGGAACCACCATGGATTCCG -GCAAGAAGAACTTTAGAGTGATCAAGTCTCACAGAGTACATGTGCTTCCACCAGGCCAGG -GATTTCGTCCAGGGCTAATTGTGCAACGTCGATTTGGCGCAGCCTCTGCTCGAGTGCACG -CGTACAATGGGCGATTACTTGTAAAACTTCACTAACAAGAGCCTGGTCGGGTGTAATCTT -TGAGTACCACGAGTAAACAAATTCTTTTATTATGATGGCGAGAAGGGCGTAAAGTTGACG -ATCGACGTCATTGGAGCTCGTGAGGGGAGGTAACAGCTCTTCCGGAGGCTGTGGGGAAGA -TGCCCCGTGGTTGCTTGTTTGGGGGCACAGGACGCGTCGGATCAGGGCAGTGGTGGCCTT -TTCACTAACATAGTCTGGGTCATCCTTTCCAGGGTCTCTCTGTGTACGGAGTCGAGAGGA -AGGACGCGAAGGAGGCTGAAGCGTGGCAGAGGGCAGCGCGGTGGATCTGGAGCTGGAGGT -TGTCGGCCCAGCCTTGAGATGAGATATCGGCTGAAGACCGGGGCGGAGCGGATCATTCAT -TTCATCCCATAGAGCTTTGGAGTCGGTTCAATAGCTATCAAGGAAGCGTTTTCTGGTAAT -AGCTGGACAAATTAACCTTTGAAGCGTTGATGGTTTGTACCTGGCCCAATTTGGATATTA -GGGCTTAGGGCTTGGAGATAGGCCTTATCGCGCTCCATTGATGCGGGGATCTTTTAGATG -GCCAAATGTACTTAAGTGCCCAAGTGGCCTCTTGACTCTCAATTGTATTATTTGTTGCTT -TGAAATATACGCACCATGTTAGACCAATTTAACCAGGATAAACCTATCCCCTTGTGGTTG -GATTGTGATCCTGGTCAGTCCCCCATGTTCCTCTCGCCTTCCTATCACTGACCTCGGGAC -TTTTAGGCCATGATGTAAGCCTTTCAAGGTACATGTGTATTCTAGAGAGATATTTATTGA -TACGATATAGGATGCTTTTGCAATTTTGATCGCCGCTCACCATCCATCTTTGGAGCTGCT -TGGAATCAGTACTATCCATGGAAATGCATCGTTAGAGAAGACAACCGCCAATGCCGGCAG -CATTCTGGAGGCGATCGGCAAGCCCGACATTCCTGTATATCCCGGCAGCAATAAGCCATT -TTCACGGCCTGCTCTTCATGCGCCCGATATTCACGGTAAGGAACCCTCGCAGTGCGAGCG -CAATGTTTCAATAACACTTCCTAGGCGAATCTGGTCTTGATGGAACCGACCTTCTTCCCA -AAGCCACTAGGCCCCCCGTCACCGATCAGAATGCCATTCTAGCCATGAGAGATGCTCTTC -TTGCACAGCCCAAAGGCACTCCCTGGGTAGTTGCAACGGGTACCTTGACTAACGTCGCAT -TGTTGTTTGCAACATTCCCCGAGGTGGCTGAACACATCCAAGGGCTGAACATTATGGGTG -GTGCCATTGGTGGAGGATTTACCGATGCTCCTATGAGCAGACTCCCAGGTGAGAAGTCTA -GGATTGGAAATGTGACACCATGGGCTGAGTTCAACATCTATGTATGCTTTGCAATATTTC -ATCCCTCTCAGGCTCGACTAATGGCGACGGTTTAGTGTGACCCGGAGTCTTCAGAGTCAA -TCTTGAGCAACCCAGTTCTTGCATCTAAGACAACTATGATCGGTTTGGATCTTACACATC -AGGTCTTGGCCTCACAAGCCGTCCAAACACGCATCTTGCATGGCTCGATTCACTCGACCT -CGGAACCCACTGTCCTTCGACAGATATTGCATGCACTACTCACTTTCTTCGCTGCCACAT -ACGAGAAATGTTTTGGGTTGATCACTGGGCCTCCACTGCATGATCCCATTGCTGTGGCCG -TGATCTTGTCGACGCTGAATCCAGCCTTCGCAAAAAAGCACCCTGACCAAGCTCTTACTT -TTGATGATAAAGGCGGCGAGCGTTTTGCTGTGAGTATTGTCACGGATGGACAGCACGGAA -AAGATGTATCCACAACAGGTCAACTAGGGCGCTCGATCGCTACTCCTGTTGATGGTGCGG -GCGTTGCTATTCCCAGGGGCATCGATTTAGATGCTTTCTGGAATATAATTCTACAATGTG -TCCAGCTGGCGGATGATTGCAATGCTGCACGCACTGTCTAATCTGCGATATCTTTGGACG -ACCTGTGAACATTTTAGGGATTTTGGTTACTTGAGACTCTGGTCTTTTAGACTCTAGTCT -AAATCTTGAACATAGAATGATATCTCGGTGGAAGTCAGATAGGGCCATAGACCAGGATTT -GCGACATGTCTAAATAACTCAAGTTGTACCTAGTACTCATATTTGACTCTATCCATTGTT -CATAAATGCCCTGATTCGATAGTTTGACGCCCAAATGCAAACTTTCACGTATACAGGGGC -AAATCTCAATTATCTCATTTATTTAGGGTATTCAAGATGCCCTTGGTGGCCTGATCCTAA -ATGTTGCTTATGAGTCTCGAATCCCACCGACCTGCGATAGTGATAAGGCATGTCTCACTC -ACTCATGAGAAGGCAGGACGGCCTTTTTTTTAAGGCTTTGATAGATCACTTTTCAGATGC -AAGTCGGCATTTTGTGATGTTTTACAGGGGGCCTCCACGTTCATGGCTTTCAGGGATATA -TATGCATAGAATAATCAAGAACAAGTAAGAAATCTAGATTTGCCTAGAATCCCCTTGGAA -GTTGAAGTTCAAAGGCCCGGCTGAAAGGAAAAACGTCATATGTTGGCCTCTCCACATCTC -ACACCAAACACTTGCTCGTGGACATCTTTTTCCCCAGACTTGATTTTGTTACTCATATTT -TGGTTTCTTGGCCTTGCATGGACGTCGATCCCGCACACTCCTCGACGGCGCATTCCGATC -AGAATGCCGCCTCCTAACGCATGTTTCATTGCAATCCATCTATGATATGTAAACTAATAT -CTCCTTTAGCATAGCACGCCACCTTCTCCGACGGCACCTGACCCTCCGCGCCGATCGATT -CCCACTGCCCCTGTGGAGACCGATAACGCCCACGATGACAACGTGAATTTCCTACGATCG -CGCCCAGCTCTTTTCAATCCGTTATTACCCTCCTTGAGCCGACAGCGTCGCCGTCGTTAC -CCGATTAACCCGCCACCGCAAAACGAGGATCGCATGGAGGTGGATGATTCCACCAACCCG -CGCACGTCCGTTGAACTCAACCGTCGCATCCCGATTGTCCGCCGCCAGGAGAGATCAACC -GATATGCCGAATTACGAAGGACGAATGCCCAACTCACGGTCGTTGTATGGATGGGCGCCC -GGATCTGATGATGAGGATGGTGTTGCGCGCGAAGAGTCAGAAGACGATCAGATGCAGCCC -TTCCTCCATTCCACCTCAAGTCGTGATACACCCCCCGCGCGACCTCCGCGAAATGAGGTC -CCGGGCCCCGCACAACTGATACCCCATGAGTATGAGACGTTACCAGGCAGCAACCGGGCT -CGCACATCGGTCTCCGCAGTCGCGGCGTTGTTACAATCCGCAAGACGACAGCCTCGGCTT -GCTCGAAGTCGCACATTAGAGAACTACATCATCGATCGTTATTCAGAAACCACTCCCGAG -GAGGACGCCGAAAATACAGTAGCGGTCGCAACTCGTGGATACCGATATCGTCCTTCGTCG -CGCGGGGAGTCACATCATACCAACCTTACCCACAATGACCTACGCGCTCGTGCTACCGCT -CATCGCCAACTACACTCAGATACCCACCTGAGCAATATACTGGGCGAAACGATAAAATAT -CTTGAGCGCATTCGGTATTCAGACTCCCTTGAGGAAAGTATGCTTGCCGTGGCAGACAGT -CGCCTACCCATCTCGATGGACAACAAGGCTTGGAGAGATACAGATTTCATTCTATACACC -CCCAACATTGCACCACCTGCGGCGTGTTCATGGCTTCAATCTGGAATGGCTTTTTCAGGC -TGCCAACGAGCTGCCAGCGCCGGATGTTCGGTGTTGTCGCAACGCGTCACCAGTCCACAT -GGCCCCAGTGAGCCTGTCATTATAAATGGAAGTGATACCACCCGAGTCAGCGTCTCCACT -ACAAGTGGTCGCCGATACCTAGCTAATAACCGCGATGAAAATTGGCCTGTCAAAGTCACG -ATACACCAAATCAACCACGACGACATGACTCTATCCGGCACCATGGAGGCCTACAATATC -CCCGATAAAACCTCGCCCACTCACGATGCACATATTGTGACTTTTTTGGAGGGTGAGATC -ATCGATTTCAACAATCACACGCTTGAGACCAAGAACTTCAAAGCAGATGCAGACATCGAT -AGCACATATTGGCGTGAACTACAACCGTTCAAGGGTCTGACAGATGCTGAGATGGCACGA -AACCTTGTGAGTCGCAAGTGGATTACTGAGGAATTGTCCAAGGGATGGATCTTGATGAGA -TGGAAGGGTAAGTAGCAGAAGAACATATATCCTTTTACCGTGTATCATCTTTGCTAACAT -GCCCTAGAACGATGCTTCATCACCCCAACCGATGCTCGTCAAGGTCTTACAATTTCTGGA -TTCTACTATATCTCTCTTCGCCGCGAGGACGGACACATCGAAGGTCTATACTATGACCCA -GGAAGCTCGCCTTACCAGCAACTCTCTCTAAATCCAGAAGTATCGAAAATGGTCCGCCCT -TCTTATGCCTTCCGCTGAAAACCCTTATGTGCATATCCTCCTCTTCTTAGTTTTGCTTTG -GCCCAGAATAATCTTCTGGTCCCTCTATCTGAACCTTTGGTGTCTCTGATTTAGCGTTTT -GGAAAATGTCAGATGGCATTTAAAATGGCCCTTTGCATAGCTCGGCGGTGTTCTTCTTGT -TCGGTTCATTCGCTTTTCTTTGCATCTTCTTCCTTTTGAATAGGTCCGTGGCCACCAAAG -TGCTACACTCATTCTGATTTGTTTCCTGAGATCATGACCATGTCATATTTGACCTTTTGG -TTTCTTATGTCTCTGTATCATTGGACAAAATTGTTATCTTTTTGTGGCTATCATGGATCT -AGACATGAGGAAATCCCACTTATTCCAAGGCCCCTGCAAATTACTCCCGTCTTTCTACTT -CATTCATGTATAATCTTGTTACGTATCAGATCAAATCTCACCTCCGATCTCATTGAGACG -GCAATTGAGGTGTCTCAGGAGTTAGGAATTGTTTGGCCGTGTCCAATGCAAGACATCGGC -AAACGTGACCAATCGTCTCAATGAATCTGCAAAAAAAGAAAGTCATCATGAGAAAATTGA -CCGGGGGTTTCCTATTCAGCTCACCTACCTTAATAAGATTATTTGCTACATGAAGTACGA -GTAGACTCTTGGCCTCACGCACCCTGATGCGCGACTAGGATTAACTTGTAGATCACCAAC -ATCCAACATAGGTCCATTGACACTAAGATGACCGTTTAAGCGGCAAGTCCGAAAAGCCAA -CCCTGGGAAGTGCTGCAATATTACTCCTGTGGCTTGAATGAAAACCGATTATCCAATCGT -ATTTCCTTATCCGCCGCATACACTTGAGCTGCAAGGCTAGAGTCGAATTGAAAAGCTAGT -TGGTGGGATAAATGCGGTCTCAGTGAGACCCACCGTTCAGTACGGAGTAAATATGAAATG -ATATGCATGTACGTCCTGGATAGACGAGTCACATAAGGCCACATAGGACATATGTATAAC -ATTTCTTGAATTTCAATTGAAATACTCCGTACTGGAAATTCAAATGCAAGGACTCACCTC -AGCCGCATCTTCATAGTGGGTCATGGTGCTGTAGTGGATATGCATAATATCTCACCTCCT -CAACCTTCCTGCATGCCTAGGTACTCTGTACATGTGCTGTATTACTTTGTATAGGTCGGT -AAGATGTGCCGAGAGAGTTGATCTTCTCTGATCGTACCCCGAGATATTGTCTCCCTGTTT -GTTTCCTAAGCTGCTCGAAGTCCGACTCTTAACATGCCTGCAGATGAGATATTGGGCGAC -GCCAATGCTCAGAGACGGGCACGTTCTTTTGACCACTGAAGTCCCGGGCCACACAGTTGA -TGATGGCAAGGATTAAGAGGAAGGACGGGTAAACAGGGTCTTTCTAGCGGTTGCATTGAT -AGATTAGGCGGTGGTAGTACCAGTCGATGAAGACATCCAACCATAATGTTACGTAAAGGC -AGTTACCTCGTGATGACTGACGGTTTATTTGTGACGACAACAGACAAAGAGATTGGAAAA -GTCTTCCAAAGATCGGACATGGGATCCTTGGCATGTATCGAGCTATAACTTTCCTGTCAC -GCAAGTCACTAAAAACTCCTTCAGGGGTATATCATTGCCGATGCCAACCTTGATGGAAAT -TAGATTCTTTATAGGATCCGGCGGCTCTGGGCTAGTCGATGTGCAGCACCTCTTTTGGTG -ATTAATGAGTTATATCTTTCAAGACATACGATTATCCCAATTTCTATTCTTCAAGAAATA -AGTCTTGGATTTTATGCCTGAATCATATTATGCAACTATTTGGCCTGTCTTGTGCATTCT -GGTCCCAGTACGGAATATTTCTGCAGGAACCGGAGTGGCGGTCTATCTACTACCCATTCT -ATAGCTTTGTCAGGACGGAGTCGAGGACATGCTTGGACAACGATTACTCCCTATTATCTG -GTCCAGCATCTCAAATCAATTATTGTCAAGCAATTATAGTGCTAACTCTACGAACCAACG -TCCTCCGGCGGAGCCTGCATGGGGTATCTCAAGCCGACGAGGCTTGAGGATTAGTGGGAC -CTTTCAAGCAATGGTGTTTGGTCTTGTCTTCTGACTATTTACTACAGCTTATACAGAGAC -CCACAACGCCAATCGCTTCGCGAAGTCATTACCTGAGCAAATCCAGTCGTATGTTCTACA -AAACTTTCAGCACGGCTTTTAAACCATACGAGGTACGTATAACCGTCGAGCCTCTAGTGC -CAGAACCAATCAAATATCGTACCTTTCCTGTGCAATATTTCAGCGGAAGGCTCCGACAAT -ATTCAATATTATGGGTATCGACGATAACGAGTCACCGTAGACGTTATGTATAGATTAGGC -CATCCCAAACTGGGTTGAATAATATATGAGACACTTGGGAATATATTGATATATCAATCT -AATCTGGAATTAAAAACGTAATATGAAGGGGTTGAAAACAAAGGAGACTGAAGATATCTC -CGCAGATACGTGCCTCTAATAAATTATATGCTTCACTTAGCTTCACTTAGCTTCCCGCTT -GTACTTGCCCGATTGCGGAAAGGCCGAGGATGTCGGGGTTACATGACCTTTTGTAGATCT -CTGCTTATCTATGCAGGTACAGCCTTCAATGATTATTTATTGCAATCAACTGACCCCATT -TTCGATCTCCAGGGATCCAGCATGCATTACCTTTCACTTTTGGCGATAGCAGGCGCTATC -CAGCCTGTCATCGGTGCGCCTCGCCAGATTCGAACTGAGGGAACATGCAAGAAGACTAAA -GTTGCAATCTTGTAAGCACTGGACTGCTTATAGATTGATTTCGAAACTCACAAAGTATAG -GGGAGGTGGTATGGCTGGTATCACTGCTGCTGTGGGTTTCCATCTTCGTTTATCTGAATG -TTATTGACATTTTATAGCAAGCCCTGACGAACGCTTCCGTGCACGATTTCGTGATCCTCG -AGTATCGCGATACTATCGGCGGTCGGGCTTGGCACAAGCCGTTTGGCAAAGACAAAGATG -GGAAACCTTATAATATTGAAATGGGTGCCAACTGGGTATGCATGTATCAACCGCACGAAA -GAAATCTGCTGACGGCCAAACAGGTGCAGGGTATTGGAAGCGAAGGAGGCCCGCAAAACC -CCATCTGGGTCTTGGTAAGTCTCTAGATATCGCTGATTGAACACAGACTGACCTCGATCA -GGCTCAAAAATACGGGCTGAACACTGAATTCTCAAATTATGAGAATGTATCTACCTACAA -TAAGGATGGCTATTCTGACTATAGTGATCTACTCAGCGCCTATGACGAGGCCTATGATAT -TGCCAACCAGAGGGCAGGTGAGATTCTTACCCAGAATCTTCAGGATCAAAACGCCAAGTC -AGGCCTCGCCATAGCTGGTTGGAACCCTAAGGCACATGACATGGAAGCACAGGCTGTTGA -CTGGTGGTCGTGGGGTAAGTGACCTGTCTTGTGAATGCAGCTAACACGGCTAACATCTGG -ACTAGACTTTGAAGCTGCTTATTCCCCGATAGAGAGCTCATTTGTCTTCGGCTGTGCGGG -CGACAACTTGACCTTCAATTATTTCAGCGACCACGACAACTTCGTTATTGACCAGCGTGG -ACTGAATATTATTGTCAAGAAGCTTGCATCAACTTTTCTCACTGACAATGACCCCCGCCT -TCAGCTGAACACCGAGGTCACAAATATCACGTATTCGGACCATGGTGTTACAGTTCACAA -CAAGGATGGCAGCTGTGTAGATGCAGATTATGCTATTACCACATTCTCACTGGGGGTCCT -GCAAAATGGCGCTGTCAATTTCTCTCCGGCACTCCCGGACTGGAAGCAGGAGGCTATTCA -GAAGTTCACCATGGGAACGTACACCAAGATCTTCTTCCAGTTCAACGAGACTTTCTGGCC -TTCGGAAACACAATATCACCTCTACGCCGATCCTGTGACACGTGGCTGGTACCCGATCTG -GCAGTCATTATCCACTCCCGGATTCCTCCCCGATTCCAACATCATCTTCGTAACTGTGAC -CAATGAGCTTGCCTATCGAGTCGAGCGCCAGACAGACGAGCAGACCAAGAAGGAAGCAAT -GGAAGTCCTGCGCAAGATGTTCCCGGATAAGGATATCCCCGAGCCAACGGCATTCATGTA -CCCACGATGGACCTCGGAGCCCTGGGCCTACGGCAGCTACTCCAACTGGCCTCCAGCCAC -ATCCCTGGAGATGCACCAGAACCTTCGTGCCAATGCTGGACGACTGTGGTTTGCTGGCGA -GGCTACCAGTCCAACCTTCTTTGGCTTCCTTCACGGCGCATACTATGAAGGGCTGGACGC -GGGCCGACAAATCGCAGCTATCATGCAGCACCGGTGCGTCAACGCAGACTCTGCCAAATT -GAGAGAGTGTGGACCTCGCAAACATTATGAAACCCTACATGGCACCTCGCCGTATTCCGA -CTACACCATGCTCAATGGATGGGCCGTAGATAGTTCGATCGATAATAACCCCGAATAGGC -GGAGATAGAGAAACATGGAGCAAGGAGCTTATGTACAGGCTTGCAAATTTTGAAACCAAG -TGATCGAGTTTATCCAATATTATATTTGTGCTCTATATTGTAGATTTTATATGCCGTAGA -GTTAGAGATATAATCAAAATATTCCAAATCACCCGGCAGATCTATATTAATGATATTCCT -AAGAAATGAGGCACGGTTTAGTCGGGAATAAGGGCCCACGTGGGCCCCCACAATAAGCTG -TGGGCCTAAACTCAAAGCCTAAAGCCCAACCCATTCGTGGGTTGGACTTGAGGCTTTGGC -TCTCAAGAAATAGATTAAGCGATGAAGTTAGTGGAAAAATGAGGTATGTGGTCGCGAGAG -CAAATTTCATCTGGCCTATCATCCCTCAACTACGATTATCGTCCGCAAATCGCTCCCCCC -AGGCATTCCGAAAATGAGAAGTCGCCCCAACCTCAGAAATCTCATTGGCTCCTCCTCCGT -CAAATTCACATCCAACACCTCCTTATACTCGTCCAACCTCTCCACCTTCTGAGTTGTAGT -CACAGCTACAATATTACGCTGAATCAATTAAACAAGAAGCACCACATTCTCAGTCGTCTG -ATGTGCATCCGCAATCCGACCGAGCGGGCCCTTGAACGGTCCCTCTGGAACTCGGAAAGC -CGGAGTAAGGCCTTTGAACGAGCTTACCTGGATGTTGTTCTCGTGCATCCAAAAAATTCC -GCAAAAGTGAGGCTCTCGCAGGGTTTATCTATTCTATTCTTTTTGGTTCGCAAAATTGCA -TAGCTGCATCAATTTCCATGAATTGGCCTAAATGAGAAGGTGACCTTCTTATTTAGGTTA -TTCTATGAGCTTGCGGGTTATACGGGCTGGTCTCGGGTTATGACGCAGCCTAGGTTGGGA -TCGGAAAATGTCATCAGTTGAGTAGCCCTAATTTGAGGCTTTTCTTTTGGGATCGAGACT -GCGAGAACGAACAGTCACGTTGACTATTCATATGTATATTGCTCATGGACCTGGAAGATC -TTGCGGCTCATTCTCTGACATGGAATGTACTCACAAAGCCATTTCAATCAAGTCAACCTA -TGGCGCTTTCATGGGAACACTTTGCTTTTTCCTCGTAGCTGAGGTCCATAGGCCCTGAAT -ATGATCGGAATAGGGATCATTATTGCGGCTAAACAGCCAAGCAACGTGCTAGCCCATTGT -ATTCCCATATTTTGAAACATCTGCCTTGAGAATAGCGGAAAGCTTGCAGCTACTGCGGAT -CGAAGCATGATATTCGCAGCAACAGCCGAAGCAGCTCTAATAGGTACGTCAGTATGGTAG -CTTGATTTGAGTAAATAGGCTGCGAGCTTACATTGGCACGTATGCATCCACGATATAGTT -GAAGCATGGAAGAAATATGCATAAGACTGCAAACCCGATACATATCCCGGCAGAAGCAGG -TACTGCCCAGTGAATCTTGGGTGTGAATCCAGTCCAACCAAACCTGATGAACGTCCGGTT -AGCTCACATCATACATTCTAACTGCTTAAAATTAGATTACCCACCAAAAGAGACCAATTG -CCAATAGAGGCGCTCCTAGAAATGTTGGAGCGAGGCGCCACTCGGGAACAGGGATGCCTT -TATTCTCTGCCAGTTTCCTTGCATAGGCCAGCTGCCCAAAAAGAATAAACCCACAGGCGA -GCACCATGCCTATGATGAAGCTTATGAACATCAGACCAGCAATAGCAGGAGTCATTCCGT -ATACATGCTCGAAGACCAGAGGATATGCCCCAAGAAGCGCATAAACAAGGCCATATATGA -AGGACATGTAACTCGATACCAACAGTATAATAGGCTCCGTGAAGAGCAACTTCAATGGAC -GTGTGAAGTATTTCTCCACCAGACCCGCGATGTTGACCTCAATCTCCTCGTGTCTGGCAA -GAATACTCGAATTTTGGGACTCTTTGCGGATAGCAGTAGCCTTCATCACGAGAATATAAG -GCGGATATGTCTCATCCAAGAAGAAAACACTCAACAGCCCATTCGCCAGGCCGAAAATTG -CCGGGATGTAAAGAGTCCACCGCCAGCCAAGATAACTCGACACAATGAAGCCACCCACAA -AAGGAGCCATAAGCGGTCCCACGAAAACAGTCAGCGCATATAGCGTAATTACTGCACCGC -GAGTGGTGTTGTTATATAGATCAGCTAATACACCAGGGACAACAGTCATTTGGCTTGCGC -CGAACATGCCAGCGAGGAAACGGCATATGATCAAGGTCTGGATATTCTTGGCCACTGCTG -ATGCAATAGTGAAGATTCCACCGCCTAGCATTGCAATTGTCAAAGGCCATTTTCTTCCCC -GCAGCTCTGATGCAGGTGCCCAGATAAGAGGACCTGCTGCAAAGCCAAGAACGTAGAGGG -TTGTTCCCAGTATCCCAACTTCTGTCCCAACCCCAAAAGCTTTGCTTGCTTGATCAACTC -CTGGTGCAAAGATGGCGCTGGCTAGGGAGACGATGAAGGTTCCTGAACAGGCTAAGATAG -AGGCGAGAAGCCTATGTACAAAGTATGGTCAGTTGCACTCGGGTTATCCAGATTCGCTAA -CTTGGACTTCTGGCTTACTTCCTAGACAGTCTCCAATTCTGAGGATGCTCGGGGTCATCG -GTTCCTTCGAAATCAACATAGCTTGCTGAGGTAGAAAGAATAGTCGGAAGTGCGTTGCTC -GCCCCAACCGATAGCTTCTCTTGCATCTTGATACAGTATGACGAGCAGCCGCGATGTCAA -TGAAATTTATGGTTTGAGTTTCAAAAACGCTAAAATCAGGCCAAGCTTGTTTTCGGTAAA -GTGTTGGGATATTGAATTAAATATGTAGTGTATCTGTACATCCTTTTCACAGACAGCTTT -GTGGAACATGTGGATTCCCAATGTTTTACAAGCGGGTATTGCGGAACGGACCGCTATTTC -CGTAAGGCCTTCAATATTTGTACATGGCAGTCTATTGCCGCCAGTGCGACAGTTCAGAGT -TTGCCTGTAGGGCTTTTTATAGTCTAGGAAAATCGCGTATATGATTGAATCAAGTCTATA -TAAGGACTAGAAATCCAAGGCGGACTCTCGAATGAGATTCTTAATTTGGTTGTCATATGG -ATTTCTGATGCCTAAATCCTCCACAGTCTGCTACGTTTCTACACATCTATATCGCATATT -ATGTATAGAAAAGTCGTAGTTGGCCACTGGTATGAAAAGACTGTCTGTTTTGTAACACCG -CACTGGCTTTCAGGGTTGATATTCGAAAGTGAACCTTCTCGAAAGATGATTCAACAAACA -TCGATATTTCCAAAAGCCTAGGCGTCTTTGGGTAATTACCTCATTCCGATCCACCTAAGA -AACTTGGACTACCAATCATATCCGCATTAAGAAATTGATCAAGGTCATACCTTTGCAAAC -CGGGCAGATCAGAAACTAAAGCCCCAAGCCATGAGTACTCTTCTGCGCTTGGAAATTCTT -GCAGAGATGATCCAATTGGACTTCCTTGCTCCTCCATATTCACGATATCATTAAAGATTG -CAAGGTCATCTTTTCTTCGCTGTTGATGAAGCATAACTAACAAGAGAGCCGCACTCCAGG -CTTCTCGCATGGTGTTTGTATTGTAAGTGGATGCAACGAGCTTCAATGTCTGCATTGCTT -CAGTGAGCATCGCTAGTGATTGTCGAGTGTCCATCACGAGCAAGACACATATGATGTGGA -AAGGTACATTTGTCATATGTTGCCAAGGGCTGCAGTCTCTGATCATAGTGCGTGAGCAAC -CAAGTCCCTTCTTCAGCAACGCTAAGGCTTTTTCTGCGAGACGAGGAGATATTTCAAGGT -TTTGTGTGTGGATTCTGCGCAGAATGCAAAGGACAAGATTACATTGCGCCATGGCCGAGG -GAGGCTCATTATGAACTCTTTCTAGGATCTGAGATAGCGTTTCCTCAAGCTTCATTTCAT -CCTCTTTCGGTTTCCCTGGGTCTAGGCTTGCTGATAACGGAAGAAGTCCCAGCAACTCAT -GTGTGTGGTCACCCTCGATGGTTGATGGAAGCAATGGCAAATCATTCTTTCGGAATGATA -CTCTTGATAAACCAAGATCATAAGATGTCCACAAATTAAGATGTAGTGCCATGCCAAAAA -TTCGTCTCTGGAGGTTTGGATCAAATCCAGTGTCTATACTGCTTGGAAATACCGAATCAG -TGGCCTCTAGATGGAACCCCGCAGCTTCCACCAGATGCATCAACTTCGAACTCGCGATCC -ATGTTGCGTGTGGTAGATCTGTCATTCGCATGTAGATGACTCGTAACGTCCACCCCGCGA -GAAGGTCTACTGGAGGAACGCCAGACAGATAGTGCGTGTCTAGAATAGAGATTGCCGATC -CTATGAGGTGCACTTCGGTGATGGTCATGTTTCGTTGGGAGAACAAGCAACCTAGCGCAG -CGACGCCACCAAGAACACTGTCATATAGACCACCGGCCATGGGAGACTGCCATCTAGCAT -CCAGGCGCTCAAAAAATTGAGCAGAATCAATAAAGCCATAGAATGGATCAATCTTAGCGA -AGTAGACCTGAGCGAGTGTCCTCATGTGTTCAAGCGATGTGATATCCGTGATTGACACCA -CAGGGGGAACAACTTGAGACTCAGACGACAGTTGTCGCTTTCCAACATTCCAACCAAAGA -GGCTAAGTTTTGGTGCTTTCGCGGGGTCCATATTTAGGCCCATCCTCCGAACAAATGCAG -CACCGGAATTTGCCCATAATCGACGATCCAGCCCGTACGTATCCACTGAGTCAATCTGCT -GATGAGGCCTTGACGTTGATGGTACTTCTCTTGGGGACTTTTGTTGGGGAGCATGATGCT -TCCGTCGTGTCTGAAGTTCGTAGTGGCAATCGTAGCCCCATTGACGGCAAGTACTGCAAG -GCTCGGCGCTATCGCATTTTCTTTTGCGCTCCCGGCATGGTTCACATGCAATTCTGGCAC -GCTTTCGTTTCAAGTCGGATTGCGGCATATCTTAGCAGAATAAGCAGTTTCTGTGGAGAT -CCCGCAATTTCCGAGGAGAACGAAGGATAGGATATTGCGGGGGAGATGAGTGGGGAGGCT -AGTGGATATCGCGGGTTTGTCCTGGAGTGCCTGAGGCCTCCCAAGGCTCTGAGGTCTCAG -ATGTCTGATTCTTTTATCAGGTTCTCGTCAAGCATTGCTTTCCGTATCCCTTTCCCAAAG -CGTTACAAAAGTGGCTGCATTAGCGTATTCGGCTAAGTGGCTGTCTGTGAAGGTGTCAAC -AAGGACATACCAATGGCTGCTTTTGATGATGTTACTAGAAAACCATCCAATCCGACATAA -ATGATCTTCTATTTGCGCTTGGCACGGTTGGTAGTGGGCAATGCGGATATTGCGGATACT -GCGGAGGTGCGGATCGTGATTCGTGACGTCGACGATGCCTCGGCAATTCCAATGATTCAA -AGCAAACTTGTTCCCGTGCATCTTATCTTTTCGCATGGAAATCACCAAATTCTCACTAAT -AATCTTATTGAAACATTGTACAATTGCCATCTATCTCGAGCGTCAAAACAATGGCAGCAA -GCTTCCTATCACGGTACCCATGGGTATCTAGTCCATTTGTGGTCAGCGCCCCCATGAAAG -TAATGTCCGGTCCAGCACTGGCAGTAGCCGTCTCTCGCGCCGGTGGACTGGGATTCATTG -GACCCGGTGCCAAAACACAGGACACCAGTGATGATCTCGAGAAAGCATCATGGCTCATAC -GCCAAGGGGTCAGCACAGTACCGACTCCCGGCTCGACACTACCTGTCGGCATAGGGTATC -AGCTCTGGGCTGATGACATCAATGTGGCCGTCGCTGCTATCAAGAAACATCAACCATGTG -CAGCCTGGCTATACGCCCCACGTCAGGGGCCCAAGGAATACGATGACTGGTCTAGCAAGA -TCCGTCAAGCATCTCCTAGTACCCAAATCTGGATTCAAATTGGCACCGTGAAGGAGGCAA -AAGAGCTTCTTCAAAACCTTGAGCGACCAGATGTTGTTGTTGTGCAAGGGGCTGAATCTG -GTGGCCACGGTCGGGCGAAGGATGGAATGGGGCTTTTGTCGCTGTTGCCCGAAGTGGCAG -ATGCTATGGCAGGTAGCCAGATTCCTTTGTTTGCAGCTGGTGGAATTGCAGATGGTCGGG -GTGTTGCAGCGGCTATGTGCCTTGGCGCTGCGGGTGTGGTGATGGGGACACGATTCCTGG -CTGCTAGTGAAGCACGCATTAGCCGGGGTTACCAGGACGAGGTAGTGCGTGCAGCCGATG -GGGCAGTTTCCACCACGCGTACGCTACTGTATAATCACCTCAGGGGCACTTTTGGGTGGC -CAGAGGAATACAGTCCTCGCACTATTTTGAACAAATCGTTTATTGAGCAGCAGGAAGGAA -AGTCTTTCGAGGAACTGAAAAGGCTTCATGACGAAGCTGTTAAGACAGGCGACAACGGCT -GGGGGCCCGAGGGGAGACTTGCGACATACGCAGGAGCCTCTGTTGGACTAGTACATGACG -TGAAAGATGCAGAAGATATTGTCCATGGGATTCGAGAGGATGCACTGGAGATAATTCAGC -GGCTTTGTCCCCGCAAGGACTAAGAAGCAATTTCAAATGATGGTTGACAAGCAATCTTAC -TCTTTACATATGATCAGATGGATAAGTTGCGACCTCTACAGACATCCGTGTGGTAATGTC -TTTGAACGTGTCGAGAAAGGTTTAAACAAGTCCAACTTCAGAGCACGGTCAAAGAGAAAC -ATTCCCATTGCCATTCAAGTCCATTCAAGTCTCCATTATCCTCAGCTAACTCACGCATTA -CCAGCTGTTATCCAGTTCCTCCAGAGCTATATCAATTTAAATACCATTTCCTCCAAGCTT -GACATCCTCCGCAAAGACTATCACTGGCATAAATTGGCGTTGCAAACCCCAGAGCAGCAT -TGGCTGCCCGACATACAGCTCATATTGTTCAATCCACAAGTACCATCCCATTCACAGTAC -GCGTTAGTCTCATGTTGAAGACTGCATGACATATGCGGTGGGCAAAGCTTTTCAGCAGAG -CACTGTGTGCCGCGGACATCCTCGTTTTTCATGCAGACCATTTCGACATTGGAATCTCCA -AGCAAGTTAAGAACACGAGTAGGACGGCACCGCAGGCCAGAAGGACACTCAGAGCCCCGC -GGGCAACGCAAGTTTTCTGCAGCAGCCATACACATTCCACTCTGACAGTGACTGTTGACA -CATTGGCTGTCTTCCTTGCAAAGGGCGCCAACTACAGAAGTTCGCTGGCAGGTCTTGGTG -CTTGAAGAGCAGTACATGCCATAATCCGGGTGGGTTGATTGACAGTTCTGTTCACTGGAA -CAGCTGTCATTGGGGCAGCTACTTGGACTCTTGGTAGACGCATGATCATTTGAGAAGACA -TGTCCCCGTCTGTCAGCAGGCGAGTGGATCGTCACGCATCTCATGCTTCTGACATGCTTG -CCAACACCATCGCCAACTGTTCGTCTGAGTAAATTCGCATGGGGTCGAGTAAATGTGAAC -AGCTCGTTGTTGCAAGAGTTGTCGCTAGATGATTCAGCACTAGTTGCTTGTATTTAGAAG -TGCTCTACTTACGAGTAAATTGAGCACATCATGTCTTGCTTGACCTCAGACGGGCATGGG -ACATAGATTGATCCAACATTCGTGCGGCTAATACTTTGGTCAATAAAACTCGATCGGTAT -ACAAAGCATGTTCAACTTACTACTCGGGCATGTTGTCGAAATTCATGCAACTACCATCAG -ACGTAACAGACATTCCCTTTCGACCGGCGTTCATTGCGAAGTCATTCTCTTTGAAAATCA -TGACCTAGACATAGAATGAATATATTGATCAACTGGTAACACACGTCATAGCCGTACCTG -ATTAGGGACCATCATGATCGGGGTGGTAGCTAGTGCCAAAGGCACAAGTGCAAGAAGATT -AACAAAGCTGGACATTCTCGTTGATTGCGATGGCTTAGAATAGTCAATGGTGAGTAGTGG -ATTGACTGCTGGATGAGAGGCAACCTTTATATTTTCGGATTTGCTATCTGAGCGTGCTAG -TTTGCCTTTCAAGTTGTACCTTTATTTATGTGTATTCCTTTCGCTGTCGGACTGCGATAC -AGGAGGTAGGCCCATGTCAAGTCTGCATGTCGTATGAGCTAGATATCTCTCTTTGCCACA -GATCCGGCATCGCCGGGAAAAAACGATAAATCCAACAGTCAACTCACTTAAGCTTCACGC -TGTGTACTTCCAGATCTGGGGTCTCTGGGCTAGACCGTTAAGAAAAGGCCATATCCGAGT -CTTGCCTGCAAATGTGCAACCTTGGCTCATGCCCGGATGTCTCGATCTTAGTTCCGCAGT -TCCAACGTTGATTGTTGTCTCAGTTCCAGAGTTGTACACATATAACTTATGGATATTAAG -AATAATAAGAACGGATCTTTGTAGGACAACCTTATACATGTATGTAGAACGAATTTACGG -CAAGATATACATCTACGGCTCCCTTAGGCAAATAAACTGAGACAAGTCTCCTCAAGTGAT -TGTCCCAATCAGCGATCCAATTCATTCATCTAAAATACATGTATATAGAATATCGAGACT -TCTGTTTATTGGCAGATAGTGACTTGCTATTCAATGTTTCCTCACAAACGTGATGGGCGA -TTCCGTAGCTACCTAGCTCGGTACCTTGTATAGCAATCCACAGCACTCAGTTACACTGAA -AATATGCAAAAGTACGGCTAATTTGCAACATATTCAATGCTGTGCTATGGATAGGGTTAC -CATCAACGGCTTACGTTCCCATATATTGTAAGATTTCTGGATATTCCAAGCAGCTGACCT -TTCATCCATAGTCATGTCAAAGCTCCAGAAAATCTTGGCCGCAATCAGACGCATTTCTGC -ATAGGCAAGACTATAGATAGATACAATTAGTCCAATCCTCGAAAATATTGAAAATAAAGA -CTCACTTTCTGCCAAGACAGTTACGGGGCCCAAGTGAGAATGGTTGAACTACTGCCCGAC -TGTCCGACTCAAAGCGTTTGTCTCTGCTCTCCAGCCATCTTTCAGGGATAAAAGAATCCG -CTTCGGCAAAATGAGACTCGGAGTGATACGTTGAATAGTGGGCCCCAGACACAGAAACCT -GTAGAAGTCAATATACATGAAGATCACATATGAAAGTAGAGACGTACATTCTCCGGAATG -AATTGTCCATCGATCAATGCACCGTCTTTAGGTACAACCCGTGGAATGATAGACGGGACA -GGTGGATAAACCCGTAGACTTTCCTCGAGTGCTGCATTCAGGTAAGTCAGTTGACTGACT -GTTTGGAAACCTATATCTTCGTAAGATTTGAATGATCCTCGGATCTCATCCACGAGTTTC -TGATAGGCTGAGGGATTTGCGACGATATAATAGGTAAATCCGGAGAGTAGACTGGCGGTC -GTTTCACTTCCAGCGAGGATTAACAAAGCTGCATTTGCTTCTATCTCCCGTATAGTCATG -GACCTGTCATCATTATATCGCAAAATGTAAGTCATGAAATCCGGCCGTTCAACCTTGCTC -TCCAATCGACGGTGGACTTTGTTTGTCGCCAAGGCGAATGAGTCTTCTCGCATCTTCTTT -TGCTTTTTCGGTAAAAGCATCCTGACAATGGGCGCCAAAAAAGGATAAACTAACAGTGGT -CGTGATAAAGCAAAAACTTTCATGCTCACAAACATCATCTTGACCCAAGGGTGATATTGG -CTTTCCCGTAAGCATTCAAAGGGTTCGCCAAAGGCCAAGTCACCGATGATGTCAAAAGTG -GTGAAGTTGTACCACTGAGACATGTCAACTGCTTTCCGTGATGACGAACTTACACCCTTC -AGTCGCTCCACTAACATGTCAACATAGGACTGAACTAGGGTTTCTTGCTCCCGCAGGGCC -TTTTCAGAGAAGGCATGTGAGAGCAATCTCCTTTCCCTTGAATGATCCTCATCATTGGCG -TTGATAATGTTTGGCCCTTTCGGTCCCTGCAGGTAAAATTTTGGGTCTTTTAGAAACGAC -CCAGCTCCGGATTTTCGGTGGCCGTAAATATCCTTCCATGCTTGTGGGCTGCGATAGACA -AGAGAGTTGGGAGCTGTCCGAACTACATCTCCATACTTATCATGGAGCTCTTTGAGTTTG -TAGTGATACTGCCCTGTTGATGTCCAATATAGATTTTGGAGATTGGTACATGCGGCAAGT -TTCGGTCCTGGAAATCTCGACAGTGGGCTAATGTAAAGGTTGTAAATCACGGTCCATAAA -AGGTATCCTAATGGCTGGATTATTGTCAGTACTGTAACTCAGATGCGATGAATTGGAACA -CTTACAGTGATGGCCACTAGATAGATTATGGGTATCATTTTCAACCCCTCGGTGTTAAAA -TTCCAAGATAACACAATAAGAGTACGCAATATTTGGACAAGCCAAGGGATATTTCAGGTT -CTTAACAATCAAGCCGTCATGATTTGCAGAATAGGCCACCTGATCAGTAGCGCCAAATCA -CATGAGATTCCAACGTTCTGTTCGTTAGCGTCCATTCCGAGGTGCAATTAAGCGAAGAAA -AACGCCTTACGCTACTTCTTTTGCCAAAGTATGCAAACCCCAGTGCTCTTGACTGACTAG -AAACTACGCTATTTCTTCCTCTCCATACTTACAGAGCTTGTAGCTATGTGGACTATTCCA -AGCATGCTACAATGGTATGGCGGCATATATGATATCCCATATATGATATCTTGTATAGAG -CTATCTGAGCCCGTCTCACTTTCTAGTCATAATTTGGGCAACATAGCCCAATATTCATAG -TGACCCAAGGAATTCCATCATCCAGGACTCGTCAATCTCACTCAAAGCCATAGCATCAGG -GAGCATGGCCTCAGATTGGACCATACCGCTATCGATGCCGTTGTTAGCGGGTATGTTTGG -ACCCCATATTTCCATCAACTTCGGCTCCCAGTTCATTCTCAGGCCGTGAACAATTCTCAG -TCCCTTTTCTAGGAGATTCATTTCCCTGTTTTCGTCGAAAGTGAGATACTCAGGGTATTT -CCTGATGACGGCCTCGTTTCTGTTCAGGTATTCGAGAATATTCGCGGTGTCTCTTACCAT -ACTCTTGGACCATGCCGGGTCTTCCAGAACGGTGAGTTTGTACAGGGTGACAATGGCTCG -AGTGAATTGGAAAAAAATGATACATGATAGCTGAAGATAATCAACTCCTTCAATTTCTAG -CCACACGTCTAACCAGCCTTTTGATGCTTGGAGAGAAGTGTAAAGGCTATCGAGGCGATG -GAGATCAGGCGAATTTGCAACGGCGGGGGTCCCTATTGATAGTTCGTTGATTATAATTTC -GGCATTGTACAGATACATGAGTATGACTCCTTTTTTTTCTTGTTAGTTCTGAGAGTGGAG -ACTGGAGAGCGCTGGAAACCTACTGTTCTGTTCAAGGTGAATCGGAATTTCGCTCTTCAC -TATGTCTAATTGTTTTTGGAAGGTATCCAGATAGAAGGCTAGCGAGATAGAAGCTTCCCC -ATCCCGCCGTAGTTGATAGACTCTGTCTAGAATCAAATAGATCTTGACTTGTACAATCAG -GAGCTCATCTTGAGGGCACTCTTTGGCCTCGGCGAGTATTGAAAGACTTTCCTCCATATG -CGGTGTCCAGCGCATAGCATCAATCCTAGACATTGCCAGTGCAGCGCTGAAAGTATAGCT -GTTAATTTTTGTTTAGAGGGTTTTTTTTGGATATTTGCTATTTACCTTGATGAAATCAAA -AAGCATCCCAGCACAGCCCTCCTCTCTTCGAGTGTTCTAGCTGTGGGTGGAGTTTGCTTC -ATTCCAATAGCGACTTTGAAAGCATGCATGGCTGAATACTCATTTGGTTCATTTTTATTG -ATCCCAAGTTCGGCGACAACTGCCATTAACATGTGTGCATAGACATTGAGAAATGACCTT -TTAGTGTAGGTAACCCTGCAAAGAAATCATAATGAGCAAATTGGTTTTGATAAGGGCACA -TTTCTTGCTCACCATGATACGAAAGTCATGATCCCAAGGAGAAGATCCATGCTCGGCGGA -ATGTCTAACATAACTTTCTGATGAATATAGTTGGTAATCCTTCCGAAAGAGTCGCCTTTT -TCTGGATTTGTCGATGTGCTCACTTCCATTATGCACACCCAAAGGAATGGATATTTCTCT -CGCAATTGGTCAGAGGTTATATTAGAAGGAATGTAGACAAAGGGAAGATATTGCAGGTTT -CGAGTTCGAAATTGGTCGAGGTACTTCTGAGCCATGGCGGGTGGGACGTAGATCAGGGAC -AAGATCTCCCTATCATACGAGGGTATGAGGTGGGCAGGGTTTCGTCTTTCCTCGGGAGTT -GAAAGAGAAGAAAGACTAGTTGGGGTTTGAGTGACTGGTACAGAATGGCTTTGGCCATCC -TGTTGACTTGGTCCTAGGTTTACAGGTTCCCAATTTGGAGGAAGACCGGGGGCGACGCCA -CCCTTCTCAAACGCGGACATGATCCAATCAAGTTTGGCCTCGAGCTTGGCAGTATTAGAG -CTCGAGGGCGGCTTTTTACGCTTGCGGACTGTCTGCGCAGGCTGGCATTCCTTGCTCAGG -CGCTGGCATCTACTCCGATGCTTAGTTCGATTTCCATACTCGCTCGCACGGCCACTCACC -TTTCGCAACCACTACCAGTATGAGAAAGAATGCATTTGCACTTAGCGCGGGAACAGTTCA -TACATGCGCCGCCATATGGCGCAGGAGTCCCACCTGGGATATGAGCGGATTTCATGCCAG -GtgggattcgggattgggaaatcagattgggattaggatGTAGGAGGGTGATCTCGAGAG -CAGATAGGTACCTATTAATGCGAGGATGATAAATAACAATACCCACCTCTCAACAATGTT -AGAATCTCTGAAAAGAGAAGAAAAAAAAGGATAGGCTTCCCCTTTCAGAGCGTCCCTTCA -AACACAGTCGGAGTTAGGAAGTTGAAAAGTTGCGAAATCCGTGCTGACTCAGAGTATCCA -CTTCCCAGCCCTACCTAGGTACCTGAAGTATACAGTGCTAGTGATCAGTGATACTGGGGC -TGTGGTTTCTGACAAGGACCTGCCCTGCATGTTGAGGGCTTACGCTCGAAACATGGTGGC -TTCTCGTGGCTACAACACTTGCGCCATGTGATCTCCTTCGTTTGTGGGATGGGATGAAAG -GACCGTGAAAATACGCACACAGGTGACCAAGCATTTACAAGAAAGCGCACAGGTGCTATC -GCAATCTAGTTTATGATTTGTCCAAGAAGGTAGACAAGATGTCCAGTTGCCATTGGCGCC -GCGAACCCGAGTAAACGGAACACGGCTATAGACTAGGTGATTCGAAATAATAGGCTCAAA -AGACTAGATAGAGATATCACAAAGCTCCGCCAACCAAGGTAACTCAGAGTACACTCCGAG -ACCAGCTGTTGGTTTCAACAATTGATATATCCCTCATGCATTCCTCCTTTTGATCCCCAC -CACAAATCCCCCAGTCAACCCGACGCCTGAAACGGGACCTACTACTCACAGCCACTCAGT -AGAGATAATCTACTGCTCGGTCTGGAAGTAGTTGAGGAGAACGGAGCGCTCCTGGGACTC -CTCACCCCAATCCTTGACAACGACGCAAGAGCAGTTGACAACCTTGCGGGCGTTACCCTC -ACGGTCGAGAGTGCCTAATGAATAGAACGTTAGCAAGTCCACTCACAAAATGACAATTCC -GTATCAATGGAGGGATGAAACTTACAGAGACCGACCCACTCGCCGAGCATCTTTCCGTCA -GGAACCTTGATGAGGGGGATCTTGTGCTCAGAGCAGAGAGCGACAACGAGCTTCTTGTAA -GCCTCCTCCTCGCAGCCCTCGTTCAGGACACACATGTGGGCCTCGCGGCGGTCGAGAGCC -TTGGCGGCCTCACGGAGACCACGGGCAAGACCATCGTGGATGAGGGAGACGCGCAGAACA -CCCTTGAGGGCGTCCAAGACGGACATCTGGCCGCTGCCGGCATCGGCAGAAACCTCGACC -TCCTCGGCGGGGGCGATAGGGGATGTGGGCTCTTCTCCGTCCGACTAGAGAATTATTGTT -AGTATTATGTGCTTCAATCATTCTATTGCGTATATCGTTGTCGTGACTCTTGAGGCTTGA -AATTGGACTGGGCAGGCGCAGCGCATAGGTTATACGGCCGACGAGGGGTAGCCTCTACTG -CGGCATTGCAATCGCTGTCGGGCGATCATGTCCATTCCAGCATCTCCGTGACGAAATCCG -AGTCGGGGTATGTGTATGTTGTGAGAGAAATGTGCAAATATGAAACATACCATTTTGTTG -CTTGTTATGCTTGAAATCGGATGATCCTCGAAGAGTAGAGTTGTTGGAGAAGGAAATTAA -AAGGTGGTGCGGGCGGGCTTGAAATTTCCCGCTATTTCGACTTTTTCGGCTGTGTGTGCC -CTACGATTATGTCATGTGGTCCCCGCAAAGCCAGTCACGTGATATGTCTATTCCATCTAA -ATCATGGGCCTTGGGACAAGGACAAGTCCAGTTTGATGACGCTTTTAATTTACATCTTAG -GTCTTTAGGATTTGCTGTCGCGCTACAAATGTCTATATTGATTATGAGTTAAGTTGAGTC -TGAACTTGGTGTTATTTGACACGGGATGCATGATTACATTCACGGCCACAGTGACTACAA -AAAGATATCACAAACTACCTAAGAATAGTAAAATCATCACTGAACGCAAGAGGTATCATA -ATCGTACAATCAGAATCCGCCCTGTACATCACTTCGCTGGATTGCTGCCATCGCGCCGGG -TCCCTAGCGAAAAGACGCTGAAAACAAAACTAGACGTATGTACTAGACCGAGAAGGGCCG -TAACCATTAAGGAGCCCGGTCCCGTTGGTTTCGACTGAAGATTCTGGGCCATGGCCATTG -AGCTGAGGAGCGCCGAAGCCTGCGCCAGCCATCGGCGGCTCCGGATCGATGTGCATGCTC -AAGAATTGCAGTTCTAAGCCATGCAGGTGGGGGATGTGGGGGACGAGGTCTGTGAAATCA -GAAAGCTAAGGTATTTGTTAGCTGTGGAACATAACTTGCAAAGCTCAAGAGACATTACCC -GGATCCAGAAGACATATTCCATCTCAAAATACTTGGGGCGCGTGTCACAATCCTTTTGCA -TGATCTGTCCGATGCTCGAGCTTTGGAATCCAACATCGTCTGTATGGGTCAACATGCGAC -GAGTGACTCGCCACAGATTCACACGTTGAGTCGGGGTCGCCTTCCGCTTCTCCCAGCTGG -CGTATTGAGAAAGTTGAAGATCATTAGTAGCCAGAAGAGGCGCAACCAAGTAGTTAGGAA -TCCACTTCTCGTCAGCGACATCCACACCACAGCTCGGATCAAGTTGACGAGTGAGAGCAG -TAACGAGAGGCATGAAAGACTTGAGCCAGTCATGACTTTTCGCGCGGGGATTGTTGGAGC -CCACTAGGTTCGCTGCAACACGAAGTCGACTCGGGAGTGCCTCACGGCGTCGATGCTCTG -CTGCCTCGCGATCACGGCGCATTTTAGCAGCTCGAGCGCGTTCTAGCTCCAACCGCTTCA -AACGATCATCTTCGGCTTGTTTGGCGCGTCGTTGTTCAACCTCCTTGCGTTTCCGATCTT -CCTCTTCGGCTTTAGCATGGGCAACCTGAGCCGCCACTCGTTCGGCTTCTTCTTGGGCCA -GTCGATCGCGTTCGGCTTCATCTGCGATGCGCTTTTCGTTCGCGGTGCGCTCCTCTTGTG -CTTTTTTGGCTTGGGCTTGTATACGCTCATGAGCTTCAATCTCGATACGTTTCGCGGCGG -CCATTGTGTCTTCCATAGTGTCATTTCTGGGCTCATGAGATTCCTTTTTGGTATCGCCAT -TCTTTTTAACAGGATCTTGACGGGGTGAAGTTGGGCGGGGTGGGGGTTTTTTAGGAGCGT -CGGTGGATTTGTGTGGTTGAGGTTCAGCAGCCCGAGTAGAGTCCTCCAGGGGTCGGCGAG -GAGTGAATTTGCTGGCGTCATCAGGTTGCCTTGGTGCACCATGTGTAGCGCTGGAAGACT -TGTCTTCGGAGACAACGCGTCTCTTTTTTTTACGCGACTCAGGAGATAAGCGATCAACGG -CTGTGCCACGTGAACGAGATCGTTCTGGGGATCCATCAGCGCGAATGCGCTTCAAAGACA -ATGCGTTGTCGGGTGCTGTATCAATAAGCGGCTTCGGTGCCTCTTCACGTCCAGAGAGCG -AATCAGACGAGAAGAGGCTGGGCCTACGCTCCTGAGGGCGACGTCCAGCAATGAGTCGTC -TCTTCCGGGTGTCTTCGTTGGGTGTCATCCCACTAGACTCGCTGTCTCGTCTGGCGGATA -ACGAAGATTTGGGTTTAGAGGAATCCGTTGGTCGCGATGAATCGTGCTCCTTTTGACGGC -GCTTTTCTTTGAAAGAATCCATGTCTTTCAAAGAATCGCGATGTCTGCTGGCGTTCGGAC -TGGTGTTGATTTTTTTGCGCGCGACAGGGGAGGAAGAACCTCTACGTGGCGGACGTTTCT -CCTCTTTCTCTTTTCCACGAGCACGGCGGGGTGAGTCGTCCTTGCGAGCCTTGTCGGAAC -CTGCGAACCTTTCATACGCCTCCTTCAGAATCTGATACTCGTCCTCCCAATTCTCACCTT -GTCGACCCCGAGACAATTCAAAATACGTTCTATCTTCAAACAGTCGTCGCGTGGGGTTAA -ATCCGGGTTGGTTAAGGATGAGTTGAATAACAGCGCTGTTACCTCGACCAATCGCAGCAA -GCATGGGCGTATTGTAACCCGCTTTTTGACCACGCAACGGATTTGGATCCGCATCTGCAT -TGCCCATAGCATACAAAAGAGAAAGGACATCGTGGTGACCACCCTTTGCCGCGGCAATCA -AGGAAGCATTGTCGGCTTTCTGCAAGATATTCAGAATGCTCACCACCCCAGCCTCGTCGC -CTTTGGCAGCAAACTCTCGAAGGTTCTCGAATGTGGGTTTCGTCCACAGGAGGTCATTTC -TCGTGGCTTCGCTTCTTCCACCCTTGCGTCGGTTTGCCGCACCCACTGGAGGGGGGCTGC -GCATCGGGGGTGAATCTCGGGGGCTAGCAGCCGTTCCTCTGGCTGTTGACGCCCGTCGCG -AAGAAGGTTCTTTACTCTCTGGTCGTGTATGTTCCTCTGAGCGTCGACCAGGCCGCAAAT -TGGCCTTGGCATCAGCAAGCGCCTTCCGCATCTCTTCATATTCATCATCATCTACAAAGG -ATGGAACCAACTCATAAGGTTCATCACCTTCTGCATTCACAGTTCGTGGGTTTGCACCTG -CATCAAGCAGAAGTTTGACTACCTCCACGTTCCCGTTTTCCACAGCATCGATCAACGGGG -TGTCTTTGTCAATGTTTTTAGTGTTAATTTCACAACCTGCCTCTAGCAGGAACTTGACGA -TTTCAGCTTCTCCAGCAAGTGATGCGATCTGCAGGGGGGTGTTGCCTGCATTATCGGGGA -TATTAATGTCTTCCGGTCGTTCGGCATGGCGCATCATCACTTGGTCATGTTTCCGGTCCG -CACACGCACGCGCAAGAGGAGTCCGGCCACTCTTGTCACGTAGCTTCTTCGGCCCCATTT -GCTTAGCAGGCGACGCGGATGCACCGACTCCAGAGCCAAGCTTTCGCAGATGCGCCCCCG -GCAGTGGAGAACCGCTCTGAGAGGAGGAGGTCGATTGTCTATCTTCAGAGCTATGTCGCC -CAGAGCTTGTGAGCAGAGGGGTTGGTGCCTTTTTCTTCGTCGGGAACTGAGAGCCTGAAA -CAGATCGTTGATGAGACGGTCGACTAGGTGAAACCGATCGCGAGTTGGATTGTGGTCTGG -AGAGACGCGTGTTATTGTTATGCTTAAGTTCCCGCACATGATGGGATGCTGCGTGTGAGT -GTGCGCGACGACGATCTCGTTGTTCCTCGGCACTGTCACTTAGGCTTCGCTTGCGGAAAG -CACGAGGTCTCTCTGACGCACGGGCGATTCCGTTGGCAACGCCTTCCGTTTCTGACATAT -GAGAATCATCGACCTTTTCAATCTTGACGACCGACGGCGAAATTGCTCGGTGAGTTGGAG -AAGGAGGACTCCGTTCCTGTTCTCGTTCGCGTTCTCGAGTGTCGCCAAACACACGCGGTT -TCTTTCGCTGATGTCCATCGATGCCGTTAGGTTCATGACGACTGGGATCGTGCTTGATAT -GCTTTCTTTTCTTCTCCGGCGACAAGGATTCTCGACCTGATTGGATTATTGTCTCCGCCT -CCGAATCCGCCCTAGGACTGGGGGGCCCATCTTCCTCCCCTTCTGAATATACACGTGTCG -ATCCATCTCTTTCTACCCGTGGATGAGAGCCTGCACGAGCTTTCCCCGGTGATGAGATAC -CAACGGGGTCTGGCTTTACCGTAGCAAGCCTATTCTGACCGACGTCCAGGGCAGCTGGTA -AAGAGGCTGCCTCGACATCACTCATAGCGATAGCACTCGAAGACTTAAAATGCCGCTCGT -CATTTTGGGGGGGTAAACGCGACCGACAAAACGACCGTGGTCGACAACGCCAGCAGTTGT -GCGCGGACGCCGATTCTCCACCGCGGGTTTCACCCGCAAAGAGAGTCTTGAGGGTCTTAA -AACGTGGGCAATACCGCTCCGGAGTAGAGCGATAGAAAATGGTCGAAGAAGTTCGAATAT -TGGATTTACTGGTATAAGTATATAGAGCGAGGGTGAAATCGAGGTATGTTGGAAGTTGAA -TGGgggaagaaagagaagggaagaggaaaagcgggaagggaaaggaatgggaCTGGATTG -GGCGTAGGGCTCCTCACTGGGTCAAGTTTTGGCCACGGTGAATCATAGCTGCAGATGGGA -TTTTGGCTACTGAAAAGATGCTTGTAACTAAGAGGGGTCGAGGTGAGAGCTCATAGAGCT -TTTCATTTCTTCCTCTTATTCCTTGTTTTTCCAATTTGAAAGTCTTTCCTATCGAAGGCG -AATATACTACGGGAAGACAATACGATACCGTTGATACCGTGATACCGTGACTAAAATGGG -GGCACAGACCTCTGGGCGCTAACCCTAATTCAGAAAGTGCAAACAGGGCAAACAGTCGAA -ATCACACTAGAACTAATACGGTCAAGTGGATTTGCCTCCAAAAATCCTCCCAGTCGCCAC -ATCAGCCCATATATCGACCTGTCTACAGAGTACGGAGGTCATTGTTTCTAAAATTATATA -TCTATACCTAGACTACATATTTACAACATTGTATCGTACTTTGCTTGAGGACATCCTTAT -CAGTCGGCTTCAATTGCCCCAATCTCCAAGTTAATCCCGACGCCGAACTAACTCCCAACG -CCCCCGTTGGCGAGAGCTGAAGGATCGGCGGGGGGCTTTCTCCACAGTAAACACCTCCCT -AAAACACCTCACTGATATCATCACAATATTAATGACACTAAATTGAAAACAATACGAAAC -CGAAACGACGGGTAAGAAAAAAGGAGGAGAAAATAAATCAATCCAAGAGCCATGTCTCTA -ATCTCTCAATGGGTTGATTATACTTTGGAAGCCACCGGCATGGATGCCGTGGATGCTCTA -CGCGCGTTCTTCCTGCTGGCCGCCGCCACGGTACGATACGCCCTGTGATTGTGGTGATAG -TGAACACCTCCTGCAGATGAACCCAACTAACGCTGACCCCTCTATGCAGACCTTCTCGAT -CAGCATCCCAACCTCATTGCGGTCCCGCTTCCTAgcctacggcccacgctcgacctcggc -ctcgacctcCACTAGCTCTGCACCACCTAGCGCCCAGAACTTTTCCACCGAGAGCAAGGG -CTTCCTTGACTATCTCGCTACTTTGCAAGTGCCACATAGCTATTTCACACATTTCTACGT -TGTATCTGTCCTGTCATCCGTTTTCTGGGTTGCGCAGTTACTCTCGCGGGGTGCGGTGTT -CCAGGCCATCGCATCCCGCGTTAGCGAGGACCACCAACGGCACTCCATGTCCCTGACTCA -GTTGGTGATATGCTGTGTGCTGTTGGCTGTACAGGGATCCCGGCGCCTATGGGAGTGCTT -CATCTTTGCAAAGTCTTCGTCATCGCAGATGGGGTTTGCGCACTGGTTTTTGGGCTTGGG -TTTCTATCTAGCGGCAGGAATTGCAATTTGGATCGAGGGCTCGGGCACTTTGTTGACCAA -AAAACTGACCATTGCTCATTTGCAGATGACCAATGCCCCAAGTTTGCGGACTTTCTTCTT -CATCCCGCTCTTCCTAGTGGCGTCTGGTCTGCAGCACGACTCCCATCACTACCTGTTCTC -GTTGAAAAAATACACCCTCCCGGAGCATCCCATGTTCCGCGGGGTTGTTTGTCCCCACTA -CGGCGCAGAGTGCATTGTCTATCTATCGTTGGCGCTGTTGGCTGCACCGCAAGGTGAATG -GGTCAACAAGACTATGCTGTCTTGTCTGGTTTTCGTTGCGGTCAACCTAGGTCTCACAGC -TCGGAATACCAAGCGGTGGTATGCGCAAAAGTTTGGCAAAGACTCGGTGCAGGACTGGTG -GTTGATGATTCCTTATCTCTACTAACGTAGAGCTAAAAATAAATAAATGCAGTGGAATCG -AAAATGGGATACTCGTTACCTCCCACCTAAAAACTTCAAGGTATTCCCCATCTTCATGAC -ATGAAGCACGTCCCTATACGTGAGACTCGTTCTCGTCTTCCGCCCATACCACCCACTCTG -GTACGGTTCCCTCTCCCGCAAGAGCTCCCAGTTGCAGATAGAGTGGTGGCCCAGGTCCTC -GTCTATCGTCGTTTCCTCGATGCCATGCAGATGATCAGTGTACATCTTGCCCCTCGTGAT -CAACAGACTTCGTCTCTCCTGTAATATCCTGTATTGAGGTCTTCGGTTTGTGTCAGGTCT -CCCGACAACGTCTCTTCCGGCCTTAGCTGGTGTGTTGCTTTCTTCCCTGTCGCCCTGCGT -ATGCTTGTGGTACACATTGAGCACAATTGGGGCACCCAGACTCACTGTCGCAACTAGAGG -ATAGTACGCTGCTCCATCTTCATGCGGCATGATTCCCTGTCCAGGCTGGTACTCGTTTAC -TAGGACATGGTTGGGCTTTTGATGTGGCGCATCGTTGAAGATACGCAGTGCCTCAAACTT -CGGCTCGACAATCGGTGATCTAAGCCAGGCTGGTAGGGGCGATGCGAGAAGCGCATTACT -CTTTGTGAGGGCTGATGGCCAGATTTGAAGCCTGCGGTGCGAGAGCTGAGTCCAACGCGG -TAGCGGTGCTGATTTTACCTATGGATTGTTGCAGTGAGTGAGATCTGAGCATGACTAGGT -GATATGCAGCATCTATGGATTGTTCAAGAGAAATGACAATGAAAAGCACATAAATTCGTC -CAATATTTAACAACAACGGCGTGAGTCGTAGTCATTAATAACACATGGTCATAAAGGAAA -TCATTGGCGCTTACCTTCTGAAGTAACCAATCCTCCTCGTCCTCTGTAATGAAATCTGAA -ATATAGAATGCATCCTCGGGCAATGCAGTGATGCGTACAGCTTCAAGATCGAATCCCATT -TGCTTTCCAAGTTCCGGCTGGTACTGGACAGTCCCTAGTCCCCCAATATTGCTGACTTCA -GTGGGGTTTGGTATAAGCAATGGTTATTGGTGAAAGGTTTGCACTGTTGAGTGTTGACGA -AGGCGAATCAAAGATGAAAAGACACCTGGCTTTGTATAATAATTTCATATGCATAACTCC -TTAGAGTAGAGTAGAAAGTCTATAATACACCACGTCACACCAACCCTTTCTGCCTTGCAG -CCTCAGGCAGAAATATCCCACCCCACATCAGCTCAATCCGTGCAACCAACGGTTGAGACA -GCGATAACAACATATTGTTCAACTTCTCCTCTACAATGGCCCCCAAGAACAATGCCAAAG -CCGCCAAGGGCAAGGGCAAGGATGCATCCGCCGATGACGCTAAGGGCAAAGGTGGAAAGG -GAGGAGCTGTGAAAGGCGCACAGTCGATTAACGTGCGACACATTCTTGTATGTGCTTTTG -CTCTTCTCTTTCTTCTTTCAGATCCTCTGCCTATAGTACTGGAAACTGGCGAATACACTG -CCAGCTCCTGTGGACAGGGAAAAGGGTTGCTATGTAGCTCATTCTAATCATGTCTAGTGC -GAGAAACACGCCAAGAAAGAAGAAGCCCTGGAGAAACTTCGCAATGGCACCAAGTTTGAT -GAAGTTGCTCGTGAACTCTCAGAAGATAAGGCCAGGCAGGGTTAGTGGATCCCTGCGGGA -CGAAATGGACTGTGCTGATATTTTTGCTGTAGGTGGTGCGCTTGGATGGAAGACCAAAGG -CGGCCTGGACCCTGCATTTGAGAATGTTGCTTTCGAGCTTGAGACTAGCACAACGGGAAA -TCCCAAGTATGCCGAAGTCAAGACTGGGTTTGGGTATCATATTATCATGGTTGAGGGGCG -GAAGTGAACCTCATCGCATGACTTGCGCTCTATAGAAGATAACCACCCACATCCAAAATA -TAGCAACAAGAAACCCGGAAAAAAAAGTAAAAAGAAAAGCCATCTGGTACTTATGCTTTA -TAAATTTTGGAAATCTAAATGCTCTTCAGATGAAAACTTATCTACAGAGCCTAGTATAAC -ATCTCACAATTCCTTCTTCGCAGCCTCAATCTTCGACCGCTCTTTCGCCTTCTTCTCTGA -TCTTTGTACCACTGCTTCGGCTCCAATGCCTCCGAAGACCAGACTGATCCCAAGCCACTG -GCCATTGGACAGAGAATGCCCAAACCAAAAAACGCTGAGGAGCATAGTCAACATCTTGCG -TGTAACGGTGACAGTGACAAGCAGCAGTGAAGAGAAGCGAGACAGGGTGTGGAAAATGAA -AAGCTGTCCAACAGCTCCGCACGCCGCGAACCCAAGGACATGCTTCAGTGCCTCTGGGTG -GCGGGAGAGGAACGAGACGGCACCAAACAGCTCAGTCTCGTTGGATGGGGGGATGGGAAA -GGGCAAGAGGTTATGTAGAATCCCGCTTTGTGAAAGATGCGGCATAATCAGGAGGTACGC -AGAAGTCAGAGTGGTCGAGAGAACATTCTGAGCGACCATCATCTGGGGACCGGTGAAACG -GGTGTATAACTTAGGTGAGGAGAAGACGTGGTCCTGCGTGGTGTTGGTCAAACCATCCAA -CAGGAGGTTGATGGACAACAGGAAGATGCCCCAGCCGGAGGATCCAGACTGATCCTTGGC -AGCAACTTTCTTACTAGTTCCTGGGTGGTGCAGACTGAAAGTTGCGACGCCGAGAGTGAC -CATCAAGACAACACCGTATTTATACAGTGGGTATCTCTTACGGAAGATGGTCAGGTGCAG -AACCATGACCGGGAGGAGCTTGCATGATTTGGCAAGGATGAAGGTCAGGTAGTCGATGTG -TTGGAGACTGGCGTAGCCGAAGGGTGACGCAAGTGACGAGGAGATTGCGACTAAAAGTAG -GGGAAAGAGGATCCGGCGTGTGGGGAAGATAGATGGGATCTTTTGGCCACTCGGAGTCGA -GAACAAAAGGTATGTAAAGCCGGTGATGGCCGCGAAAGAGGATTGGACCGTGTTTAGCAC -CAGCGAGTAGGTGAACCGCTCTGTTGGAGGATTTGGCTCCGCGGCGGTTGCAGGACGGAC -GGGATAGCTTGTCGTCGTGATTGCCTCCTGTAGCACACCCCATGAGAGGCTTTTTTATTA -TTAGTTCCTTGTTGCTCTATCTCAGAAGTCAGGCAAAATGTGGGTGAGGCACTTACAATG -CAGCATATATACCGAGGACACAAATCACAAGCTGCGACAGCCCAGGGCTGTCGGCAACTG -TCTCCAGGGTTGCCGATGCTTTTTCAGACACGCTTCCATTGGCGGCTGGAGGTGTGTTTG -TGTTGCCACTTCTAGAATTTTGTGAGACACCGGAATCCTCTGATAGATTCATCAATTGCG -AAGAGGGGGCTCTCTGGAGCGGCGCTGCTTGCTTTTGTCGTCCCATTGCGACAAATCGAG -GCGGGATTCAGGTTGGAAATGCGAGTAACTCGAACAAGAGGCAGTTTAGAGAATGTGAAA -TTGGAAATTGACCCTGTGTCGAGTATAGTTCTAAGTTGTCACGAGGTTCGAGCTTCCTGG -ACCTGAAACCGAAAGAATAACAAGATGCCATGTGTTCTTACTTCGCCTTGTCTATGATCG -CCCACCCCACACAAGTTTATGGCTTAAATAATTTGAAATTTTGGTTTTCTTTTTCTTTTC -TTAAATTCAAGATGAAAGGAAGAATCGACTGGACACTCCTCAAGATATCATTTTAAATAT -CAATTCAACTAGCAACTTCCAAGCTTCTTATCTCAACTATACAATCCAATTAAGAAAAGC -GGTGTAAGTAAATCAGATCTATCCGGGCGGTGAACCCCCAAAAATCCGAAGATTTGCGGT -TTCTCGACCGGAGCTCTCTCCGTTCAAGCCCTGTAAAACAAAAAGCTTCAAAATATCCAG -GATGGCAGACAACCAGGAAGCGACCATTCGGCCTAAAGCGTCGGCGATCACCGAGCACAT -CACCCAAAAAGGCTACGAGTCTCCCCACCTTGCAACATCCGCGCAAGTCCAACACAACCT -CGAGCACCAACACCTCTGGACCTCTATGGTGGGAGATATCATTCCTGGACAAGCAGACAA -CTCAAACACATTAAAGAAAGACCCCCGCCAACCCCCGAAAGAACCAATCCCTCTGCTTTC -CGGCTACCCTCCACACCGTGTGTACACACATCCTGACGAGCAGCTGTACATGCTAGAAAA -CGGGATCCGCGAAGACGACCTCGAGCCAGAGCGCATGTTCGTGGTACCAACTACACAGGG -TCAGCCATGGACCTTGCGTCATATGGCTGCTGTGTTTGATCGGCTTCCTGAGTTCAAGCA -CAAGGCTGAAGAGCATGATCGATCTGTCGATCCTACAAGCATGCAGTCCGAGGCGGAGGG -TCTGGATCCTCAAAAGACAGAGAAACTGGCCCAGTACTATGATAAGAAGGAGAAGGCTCG -CCTGACCAAAGAATGGGGCTCGCAAAGGGTGTTGCTTGCTATGGTCGACAAAGGGTTGGG -TGGAGACGGGACGGTGGCGTATTATGTTGTGCATGAAGGCGAGGTGAAGCCGAGGCAGAA -CTGAGTGCGTGGAGCCGGATTGTCCCCATATGTACACAATAACACCTATTTGCATTCTTA -CCCCTCGCGGGTTAACGACATGATGTGACGAATACTCATTCAAAAGAGCAGTGAAATGAC -ATTATCCATATGGTATCCAGATTACATTCAAAGGACAATTTGCACGATATGCGCCGAAAT -CATACAAAATAGGTTTTTCACATGATCAGCTAGCTACCGGGTATAGTAATGAGCAAGAAG -CAAGAAATGCTAAATAGGCGTGCAGAAGGCTTGGCGATATGTTCAGATAGACGGCTAGAA -ACTGCTGTGGGAAGAAAATCAAGGAATTAGGTCACCGTGGTGATTAGATATGACCTGTTC -TTGCTCCATTAGACAACTCGAGCAATACTCCTCATACCACTGCTTGACGTTGTCTGCGTG -CTCGGCAAAAAGACGTTCGCCTTTGGCTCTCTCGGCAGTCGGCGAGTTCTCGCTCTGGGT -GGCCTCTCCGCGGAGAAGAACCTCGACCTGGCGGCAGCGCCGTATCTCCTCGCGAAGCCA -ACGCTCCCATTCGGTCTGGATGACCTCTTTCTCGATGCTATTCACAACTCGCAATGCAAC -AAGAAGGTTGTGTCGATACAAGCCAAGACGCTCACGGGTTTGCTGGAAGCGTTTTATGGC -GCTCTTAGTCAATGCATCCCGGGGACTCGATGTACTGAGCGACAGAGGCATTTCGGAATA -TCGCATGGTCTGCTCTCGGAAGGTAGCAAAACAATCGCTGGCATTTCCGGTACTGGGCCA -GATTGTGGAGTTTGCAACTGCATCATCAATATCCCGCATGTAGATAGATTTGCTCATAAC -GAGGTTTGGCTGTACGCCTAAGCGTGCCATGAAGTTTCCTGCATGTCTCTCGTGCCACCA -GTCCCAACTATCTCGTGAGGAATAGTATCCATTTAGAATTCCACTTGCCGCCAACATGAC -CAGAATGACCTTGTGTGCACTCACGGTCTTCCAAGACCAGCGGACTGCGTCAAAGGCCCA -CATCATTAGGAAGGAAACACTAGTTTCGAGGAATGACAAGAAGGTCTCAAGAAGAAGCTC -ATGCATGGCACGCTGTTTTCGAGGCTTGCGGGAATCAAGCTTCAGGTTGGAACCAGCGCC -ACTAAACCGGGAGACTTGGCTTTGACGGCCCACATGGCCGAAGATCGTGATAGCTTTTTT -GGTGCGACTGTGTGCACCTAGTCGGCGTACTTGGTCACTGACTAGATCAACAAGATCTAA -GGAGTCTTGTTCTAGATCACCACTCGCTTTTTTGTCGATCACATCTGGGTATATCAGCTT -ATTGTGCTCTGTTCAAATAGGAGAGTTGGGAGGAGATTCTCACTCTTGAGGCCCCAGGGA -GACCACAGCCATTCGACTTTCGTGTAAATAGCAAGCTTGCTCTTGGATTTGGCCTGGAAT -GTAATGACAACCTTGCTTACCAACCGGAACGAACGCCGGAAGGGGAGATGCCATGGTGTT -TTCTTGTCTGTGATCACGTAGCACAGATGGTCGTTGAGAACATCGATAATCTGGTAGTCC -GAAATTGCATTTTCGTGGGTATTACCTGTACAAAAATAAGTCACTGTGTGATATTAAGAT -CATCGGTAGTCAAGCCTACCAAAGAAATCTGTCGTGTCGATATTGTAGTGGAAATCACGC -CTCAAATGCCGCGATTCTGTACTAGCCCATGGGCCTTGCTTGATTTCTTTGCGGTGTCAG -CCATGTTAAGTGATTGGGGAGCGAATCGACGAACCTCGAGCTTGCCTCTCATGTAAAAGA -AGCTGCCACACGGCGCTTTTGTCGCCAAATAATATGTGGAACAAAGCTTTGGGGCTTACA -TCGAGAATCTTCTCGGTTACTAAAACAAGATCACCTTGTGGAACATACTCCACGGGTTGG -GTTGGAAGTCTGAATTTGGCAGTATCTCGACCACGGCCAGACCTGCCTCCATCCAGCTCA -AGATCTTTGTCAATGTAGATAGGGAGTCGAATGTCCTTTGCAAGACCCAGGTCTGAAGCC -CTGCCCCCATCTTGTTTGATGTCTGCAAGTCCAGAAGAAAGATCTTCCCAGCTATCTGCA -CTGGAATTCCGGGCTGGAGATCCAGTGTCCATCTTGATCAGTGCTTTGATCAGAGATTCC -AGAGACAATTGCTCGACGGCAATGGAGCCTTGAATCAAGAAGTTGAGCCGTTTTTGAAGA -AGTCGAAGGGGTTCAAGGAAAGTTTTGATCGTAACCCGGCCAGGGGTATCACTGCCGAGG -GGTGGAATAGTATGCAGGAAAAGGAAGTCGCAGTCTCGCCCAGGAGCTGCAGTTACCTCC -TTAATGCTCTCCAAAGACACGCCGGTGGTCAAAACCAGTCCGAAATAATGACTGTAAAAG -TATATGTTCCTTGCTGTGACATAAGCTCGTCCGGGGAACTCTTGTTGATCGTTGGGGCTC -CAGGTAGCTCTGAACACCAAAAGCAATGCGTCTTCTCTCTTGACATCTGGGAAAAGCAAT -CTGAATTGAGCATCTTGAATCCTCAACTGTTGAGGGTAGTAGCTGGGATATTCATGCCCG -ATACCCAGTGCAACTTGCGTGAGCTTAACATCATCTCCATCAAGACTTACTGTCTGTCTG -TGGCGTGGGCCCGATTGCTGTCGGGGTGCCACTAAAGTACCTGCTGCATCTGGCTGCGCG -GCGATGTCGTCTTTAGAAGTATCACTCAAGGCGGAGGGCTGCCGGCCCCCTGGGGTGTCA -TCAGACGCACTATCGATCTGGTGTTGTTCCCGTTGAAGCTTGTTCATGAAACCCCAGTTA -GAGCTACCCCACAGGTTCGCCATCATACCACTGGGCATGCCGCCTGTGCTATCAGAGACA -CCTAGTCCAATGCCTCTTTCATTGCTCACAACCACTGCCGCCTTGCTCATGCTTGTTGGT -GCTGGGGGATTGGCGAGCGTAGCAGGTGCAAGACTTGAGCCAGGCTCATCCACGTTGCTT -AGACTGCGACCTCGTTTCCCAGCATCGTCACTCACGTTGAGTGAAGCGGTGGCGCTATAA -GGGAGGATATTATGGCTGGCAGATATGAGGCTGGCTATTCCACCTGCAGGGCCCGGGATA -GATGGAGATATGGGAGCCGCATTGTTCGACTTTCGGTGAAGATCCAGCTTTTGGATGAGT -CTAGAAGTGTGCTCTCTTGCATGTTCTCTTGCGGAGCTCTCACTATCCAAGCCTGTGGGT -CTTCTACCGCTTCCAACGTCGGTACTGTTTCTCACAACATGTCCATCACGATCCGGGACA -GGAAGCGTCATACTTCGATCTCCACCTCCGGATTCATCGTGCGCATTGGGGGTGAGAGAG -TCCGCAGGATCTGCCGCGAACTCAGAAGCCGGGGGCTGAGAAATTGAAAATGCAGGGTCT -TGAACTGTGACTTTTCCAGAAACAGAAAGACCAGTGCTCGCGGGATTTTCCAATGCTTTC -TGCTTGGCAGCCTCGAAGGCACCAATCCATTCCATCAGCTCCTTCTGGTTATCTGCCTGA -AGCAGAATGGTGTTGTTTTTGGTCTTCACTTGGAAGCAGAATCGGCGTTCCTCTTGGAAT -GCAGGACGAATACTACAAAGCAGTACGCCGATTCGATCACTTTCTTCAACGCCGCCGGTA -CGAGAGCCCTGTACAAGACACCCAAATATGCCGTTCTTGAGGAAGGCCCACCGCTTAACC -CACACAGTGCGAGTTGGCTTTCCTGACAAGGTGCGCAAATACAGCCATCCTTGCTTCTCT -GGTCGTATCTCTTTGCTCACATTAAGCGTCGAAATAGGACGAGAGCCGAGGTATGGGACA -GTGGAGATATTATAGTCCTCCAGTTCACGAGACGGTCGAGACGCAAGCTCCGCGGCTTCT -TCAATGTGCTTTCGGGTGGATAGTAGCTCTCGTTTGGATGACTTCTCACTTCCTTCCATC -TCATGAAGCCAACCCTTGATACGATCCATCTCGTGGGACCATTTCATGTAGCTCCCGCTG -TTGTAGTTGTGGAATTCCCGGAATTCCCGCCATTGATCGCAGGAGACTTTAACTAGGAGT -CTATCAAGTGCGTTCCGTACCTGAGGACCCTGTACCGAATAGTCCATTGCCGCTTTGAGG -TAGGCTTTGTGGGCTTCGTGCAGCTGGAACGCTTCTTCTCTTAGGGCCGAGGCCTCCTTA -GACTTCGATTGAGAGGCATATCGAGCTTGGAGGTAGTCGTACGATTTCTGGGAACTCTCG -AGGTTGCGTCGGATTTCCTGTAACCGTGTCAGATGATCCTACACGAGCACAATTTGGTTC -GGGGTCTTCTGTGTACCTTGAAATGCCGCAGATCCTCATTAATGAAATCGCGAATAGGTT -CGGATATCAACGACTCCATCTTTCTAGTTGTTTTGACGAGTCCACCCCAAAGATCTTTAG -ATCCTTCACCACACCTCCTCATAGCCAAAAGTGTATAATCATGGTCAACAACCGCTTCCG -AGACAACGAGGGGATGTGTCGAGTAAGAAAGAAAGTTGTTCACTGCACTCTCCATAGTGG -TCAGCTCACAGGAGAGCCTTGTTGCAGCCTTGGCATATCCGTCGAGCCATTTCTCTATGA -AATCGATCTGATCGGAGAAATGGAGGGTGGTGGCACGAAAGGTGGGCGAGTCGAGAGCAG -CTTCTTTAAGACCGACCGGGCTGGAAAATAGGAGTACACATTAGTATTCAAAAGGAGGAT -AGGAGGATTCAAATAGCAATCATATAAATACTCACACAACGCTCACAAGCTTCCCAACTT -GGGGTAACGGGGTTTCTGCTTGAGTCGACATCTAGTGTATGTCTCCAGCCATACACTAGG -CGTGAGCAAAGACCGAGACGCGAGGGGTGTGGATGAGCCTCCTCTCGTTCATACCACTCA -ATTACCACTTCGACTTCAAGAACTTGAGAGAGCCCACGTATATGATGTTTTTGACTTGTA -GCTGTCTCGACTTTGCTTGAGACCTCAGCCCTGAGCCTCTGGGGACGAGATGATCCTAAT -TGGCTACGTCAGGTTCATGACTTGGTGCCTGATATTCAAGGCACCAAGAGGGCGGTTTTC -AGTAAGGCGGTGTGAGATTTTTTTTCTTTCTGCCAATTTGGGATTTCACCGAGTCGGCCT -CTAATCATATCCTGATCACATAAAATGGAGCCTCCTTCTAAGAAGATGCGAAAGCTCTTG -GACCAGGATAGCGATAGCGACTCTGGGGACGATGCTGGTGGAGTTCTCCTCGGCAAGCAG -TCGCCTGATACCGGATTCAAGATCAATGAAGAATACGCTCGTCGATTTGAACACAACAAG -AAGAGAGAGGAAGTTGCGAGACGTACGTGCTTATACTCGAAATTGTTGTGCTTTGACCAC -ATATGCTAACATTTCCTTGGTTAACAGTGGAATCAAAGTATGGCAAGTCCTCCAGTCTTG -GAAACCGCCCAGGCGAAGATTCTGAGGATTCCGACTCCGAAGAAGAGGATGATGATGCAG -AGCTCGCCACGGAAGCTCTAGACGCAGAAATTGCGGCGACGTTGAACGCCATTCGCTCCA -AGGATCCTCGCGTTTACGACAAAGATGCCAGCTTTTACACCAAGTACGACCCAGAGCAAG -CCGATACCGGAAAGGGCCCCAAGGAAAATTCCATGACGCTGCGACAGTACCATACGGAAA -ATCTTCTGAGTGGTGTTGACCCAGCTGAAGAGGAAAACATCCCACGGACCTATGCCCAAG -AGCAAGCCGACTTGAAGAATGCTATTGTCAAGGAGATGCACGGAGCGGCAGACAAGTCCG -ACGAAGATATGGAAGATGTCGACGAGTTCATGGTCCGCAAGCCCGGTCAGGATATCTCGA -TGCCTAAATCTGAAATCAAGGTCGATGTTGAGAATGCGGAGAAAGACCCCGAGACTTTCC -TGTCCAACTTCTTGTCAGCACGAGCCTGGATCCCCACAGACAAGCCAGAGCTGCATCCGT -TTGATTCGGATGACGATGACGATGTCGAGCGCGCGGACGCTATGGAGGAAGCGTACAACT -TCCGCTTCGAAGACCCCAACAAGCTGAACGAGGTGATAGTCACCCACTCTCGTGATCTCA -CCAACCAGCAATCGGTCCGTCGCGAGGAGAAGAGCTCCCGTAAAAAGATTCGTGACGCCG -AGCGCGCACTCAAGGAAGAGGAGAAGAAACAGCGCGAAACTGAGAAGAATCGTCTTCGCA -AGCTGAAGACCGAGGAACTCCAGAAGAAGGTGGAGCAGATCAAGGAGGTTGCGGGACTTC -GTGCCTCACAATTCACCGACGAGGACTGGTCTCGTTTCCTGGACGCTTCCTGGGACGACT -CAAACTGGGAATCAGAGATGCAGAAACGGTTTGGCGAGGAGTATTACGCCGGGGAGGAGG -GTGATGACAGTGAGAACGAGGGAGGCAAAAAGAAGAAGCATCCCAAGAAGCCTACATGGG -ATGACGATATCGATATCAACGACTTGATCCCAGACTACGAGGAGGAAGCGAAGCCCACAG -TCGATCTATCAGGATCCGAAGACGAAGATGACGACGAGGCTTCCGGATCTAAGAAGAAGA -GTAAGGCCGAAGAGAAGCGGATGCAGAAACGTGAGGCCCGCAAAGAACGTCTTCGCATTG -ATGATGCGGTTGAGCGCAACCTTGATCTCGATATCACTTTACTCCCTGGAGCTACAAAGA -AGAACGCCACAAAGTTCCGCTACCGTGAAACCTCACCAGAGAGCTTTGGCTTGACGGCAC -GGGATATTCTCATGGCAGACGACACACAACTCAACAAGTTCGCCGGTTTGAAGAAGCTGG -CCACCTTCCGCGAAGAAGAGCAGAAGCGACGCGATCAGAAGAGGCTAGGCAAGAAGGCCC -GGCTTCGGGAGTGGCGCAAAGACACTTTCGGTGATGAGAATGGACCGGAGTTCAAGTTTG -GCGGTGATGTCCAACCTAGTCAACCTGAGGAGGCTGTTGGTGAAGCTAAAGTTGATATCA -GAGAAGGGGATGGCTCACGTAAGAAGAGAAAGAGGTCAAAGAAGCATTAGACGTGTTGCA -TCTGTCTGCCATTGTTCATAGCTTTTTTCAAATTAATTTATGAAATGAACCCAAAATCTC -TCCCGTTCAAGTTGAACAGTCTACTTGGGTATGTAAGAATATTCCCTTGTAATATGTAAA -TATTATTAATTCGAAGTTATTAGAATGCAACAAGAAAAATGCAGATAGACTGACTGGTAT -AGGGAGTGGTAGACGGTGATACTTCTCATGATATATATATTTGCAGCTTTTCCAGAGAAC -CGGAACAAAAAGCATAAAATTGCGACTCTTTTTTATCGCCAATGAGCCAAACATTGTATC -ACAGTATACAAATATACAAAGTTCTATTAAGAAAGCATTATTCTCTCGCATTCTTTCAGG -CCCATATACTATACTACCTGTAGGACTCGGTAGCTCCCAAAGCGGAAAGCTCCCATGCGA -TGCGGGTCCACTCAGGCGCTAGCGGTTGCTGGGGCGAATCTCGGGGATATTTTGATTCCA -ACATGTGCAGCTATAGTAGGCTTGCAATACTGCTATTATAACTTTAACAGCTAAAAAAGC -ATGGAGTTGTACGAAGTAATCTGTAAAGACGTATCCTCAAGCACCAAGAAAAGTGGAGCC -AAGTTGTGCCATTGAGTGGCATTTTTTTTAACATGTGAAGTACCTAGGTACATTGTCTAT -ACGGCGATCTATCACCTTAAAGTTTGTACCCGCGACATCAAGATAACTTGGAAGGACAAA -AATGTCTCATTTCAACATTAACGCCCAGGTATCTCTCACATGTATCAAAGGGTTGGGAGG -TATCACATTTGCTCAATTGAGTTGTTCATGCTACTCCGGACAACGTAGATCATCTCAATG -AGAAATCGAAATAAAGAATATGGGCGCAGAGGATACTTCGGGAGAATCCACTCGTCCGAG -ACGAACAAAGGCTCCTCTCTCTGTACGGAGTATACATACTAGTGATATGGGGTCCTTTGA -ATTCTAGTTCTAATAACTAATCAAGAGCGACACATTTGAGACCACTCAAGCCCAGATACA -CAGGGCAGACAGGGAATAGCTACGTCGGTGGCAAGACCGAGATATAATTTTTATGCCCCG -TTTGCCATCGGATACCCGGCCTGGGAAAAGGAGGTTGCCCCGGGGAGTGGAGGTCCGGCA -TTGGACAGGGAAATATACACCGTACCCCTACGGTACTATGCACCGGCGCTTGGACCGTTG -CTCGGGACTACAGGAAGTCGATGATCAAAGTGAAAAGATGAAATGTCGACTCATTTCTGA -GGCAGAAAAGCCAATTGATGGGACGTTATATTGTAAATCGAGACAGAGTGATCCATTTAA -AAGACTAAAAAAGAGCCCGATATACCCATAGCACCGTCCAGACCCAATATCAAGACCAGC -TGGCCAGATTGCAGAATTTTCAGTGGATCGGGAGATATCGGCAGGAGGCAAACCAATACA -GTATAATGACCGTAATATACAGTAATATAACCCCCTGGGCTTCCTGATTGGGTATTAGCG -CCCGGCGGTCGAGTATTTTTTTTTTCTGGCCCCCCTTCCCGCCTTCTCTCTTCCTTTTTC -TTCCTCATTATCCCTGGTGTCTCTTTCTTCTTGACACTCTTTCCATCTCCTCTCTTTTTT -TTTTGCATCCCGTTTCTTTTGCACTATCTTCAAAATGGGTTCCGGTGACGTTAACCAGGC -CGAGAAGTCGGTCTTCGGCATGCCCGTGAGTATCTCCCTGTGAGCTGTCCCTGAGGTCGC -GTGAAGATTCAGAACGAGATGCGATGAAATGTCCATCAAATTGTCGTTGAGTTTACTTCC -GCGATCTCAAGGATCAGTCATTGCTATCGCACTGTGAATTGAAGCAAGTCAGAAAGCTAA -CCACAAAATCTACAGGGCTTCGTTGTCGACTTCCTGAGTACGTGTCATGCTTGATGCCTG -CAACTATTAGCAATCGCTAGCTTGATGAACCCGAATATTGATACTAACTGTGTATTAGTG -GGTGGTGTCTCCGCCGCTGTCTCCAAGACCGCTGCTGCCCCCATCGAGCGTGTCAAGCTC -CTGATCCAGAACCAGGTTGGTTACCACCCGTCAGGCACCGATTGGAACGGGCTAGAGACG -GAATATCACGCTTGATTGTATCGCAATTCACTTTTGAGGTTCAATATTGGACGCAATATG -TGATTTTGCGTTGATCACAGCGGGTGAAAAATTTTCTCCCTTGGCCCGTGACAGTCGGGA -GCTCAATATCTCCATTCATTCAGCATCTAACCAGCCTCAATCAATAGGATGAGATGCTCA -AGCAGGGTCGTCTCGACCGCAAGTACAACGGCATCACCGACTGCTTCAAGCGCACTGCCG -CTGCTGAGGGTGTTATGTCTTTGTGGCGTGGAAACACTGCCAACGTCATCCGTTACTTCC -CTACCCAGGCCCTGAACTTCGCTTTCCGCGACACCTACAAGTCCATGTTCGCCTACAAGA -AGGACCGTGATGGATACTTCAAGTGGATGATGGGTAACCTTGCCTCCGGTGGTGTATGTA -TTTCTAAAAATTTTCTCGCGCTTGGATGACAACTAACAAGACCTCTAGGCTGCCGGTGCC -ACTTCCCTTCTGTTCGTCTACTCCCTGGACTACGCCCGTACCCGTCTGGCCAACGATGCC -AAGTCCTCCAAGGGCTCCGGTGAGCGCCAGTTCAACGGCCTCGTCGATGTCTACCGCAAG -ACCCTCGCCTCCGACGGTATTGCCGGTCTCTACCGTGGTTTCGGTCCCTCCGTTCTCGGA -ATTGTCGTCTACCGTGGTCTCTACTTCGGAATGTACGATTCCATCAAGCCCGTTCTCCTC -GTCGGTCCTCTTGAGGGCTCCTTCATCGCCTCCTTCCTGCTCGGCTGGACCGTCACCACC -GGTGCCGGTGTTGCCTCTTACCCTCTTGACACCGTTCGCCGCCGCATGATGATGACCTCT -GGTGAGGCCGTCAAGTACAAGTCCTCCATGGATGCTGCCCGCCAGATCGTCGCCAAGGAG -GGTGTCAAGTCTCTCTTCAAGGGTGCCGGTGCCAACATCCTCCGTGGTGTTGCCGGTGCT -GGTGTCCTGTCCATCTACGACAAGGCCCAGCTCCTCCTCCTCGGCAAGAAGTTCTAAGCG -ATTCACAGAAATGTGAGTCTCTTGCCACTGTAGTTATACTGGAATAGGGGATGCACACCT -CCCCCAGTGTAACCACTGGGCGTGGAATTCGAGGATGTGGGATGGTCAAAAGATAGATGC -GTCAAGAAATGGGTCTGAAGGATCTTGAAGCATAGAATGGTTTATTCTTCCTTTTTGTAC -CCAAATGTACTTTACTGCCTGCAAACTCTTTCGTTTTTTTAACTTGTTCCTTCATAAACA -TCGTAAAAGTAAACCATTCCATCAGGCTATTCATGAGGCCTGGAGAACTTTGGTCCTCTA -GTTCATCGTCTGGACCTCCACACGTTGAGTTTAAAGTAGATTGCTCTCAGTTAAGTCCGC -ACAGCTAAACAACAGCAAGCAGATGGTCGACCATTTAAGCCTTACCCCCTGCTTTATCTT -GATATTTCAATACCTCACAGGCAACAGCCAAAAACTATCAAAAGTGTCCCGATGCGGCCG -GGCTCCACAGCGGCAATAGCAATGACGTCTGAGCTCCTTGAAGCTACAAAGCCACTTCAC -TCTTGACATTTCATCCTCCCATACCATCTATCCAGAATGTCTTCACCATCCGGTATCAAA -ACCTACAACTCAGCGCCCGTAAATTCCAACCAGGAAGAGGGCACACACACAACAACCTCA -CAGCCCGAACCCGCCACCGTAATCGAGTCACTAgcaaacgcaccctctcaacctccacaa -gcacccgcaacaacaacagcaacggcaacagcTCCCTCCAACCCTTCAATCTACACAACC -GCCGAGCCTGACTCTTCAGCACCACCGCCACCCCAACCAGGCGCCACACCAGTACAGCCC -ACACCAACAGCGACAGCAACAACCATCCCCGAAGCACAACCACCAACAACCACCTTTCCA -GAACAAGCAACATCCACAACAAAAACCCCACCCGCACCTCAACCAGGCGCCCTCCCATCC -GCAGAACCAGAAGCAGCAACAACAACACCCACATTCCACCCTCAACCCCCCAAACCCGAC -GACCCCATCTACACACCGCCACCCCCATCAGCCCAGCAAACAAACCAAAGCTACACACAA -TCATACTCCTATAATGCACCCACCGTAGGTCCACAGACGGCAGCGCCGAACTCGACCTCC -TCGCCATACTCGTCTGTCTACCAGACGAACGCGGGCTCGCACGCTCGTCCACAGACACTG -CCGTTGCACCACGGTGCCGGGACTGGGTCTGGGGGTGGGGCGAGTAGTATCTTCCCTGAA -GAGGAGGAGGAGGGGTTTTTGGGAGCGGCGAAGGGGTGGATGAAGTGGTCTGGGAATAAG -TTGGCAGAGGTTGAGAAGGGTGTTTGGAAGAAGATTAATGATGTTCATGAGTAAGAGGAT -TGGTTAGGTTTGGGGGTTTTCCATGTCTCCTATGTTTTATTATGGTTACTGGTTGAGGAT -TTGCAGGGTGGCTTAAGGTATTCCATTGTGAACCAGTGTCGGGTATCAGAGGTATGATGG -GAGAATCCAATTGGGTAATGGAGTTTGGGTTTGTGTTTTTGAGTCTAGTTTGATTTATAT -CTTGTATCGATGTTTTGATTGGAATTACATCCCAAGATAATTAGTGGCTTTGGTATCAAG -TAGTTGAGACTGCATTAAGCGTCCTGGTAAAGCTCTAGTATCTGGACTCGATTGTAAGAA -CGTCTAGCCTACGGCCAGGACAGGTCAGTTCACAAGCGAAAAGAAGTGGCAACTGAGCCG -CGCAGAATCTGCGACGGGTTAGTTTCTAAACTAGATAGGTAACTCATCCCATCCGCGGCT -GAAGCGTTGAGTATTCAGCCTCAAAGACTGAAGGCCAACCCTCAGAGGCAATGCAAAGGC -GATGAACCTGCAATGTACAACAGACTATGCGTAGAACACACAAAGAGAAACGCAGACAAA -TAGAGATAGGGAGAGGAAGAGAGAGGAAGAAGACAACATCTCAGTTGGATCAGTGAAGAT -GACTGAGATTCCATACAAGTCAGAAGGGTCAGTGCAATCAATAAAGATGACCAAAGCCTC -CCTCTGAATGGCTGAGATCTTCCAAGGATGATTGAGGAGTTATTAGAATCGAAAGCCCAT -AAGAAAAAGGAGACAAAGTAGAGTATTAGAAAAGTCCGGAGTATGAAGAAAGAAACACGA -GTCTTGTCGCAGTGGTAGCCGACTCGACTAATGCAAAAATAAACGCCAGGGTGAGTGGTC -GACAGAAAATAATACTGTGAGCCAACCCACGTAAAAAGTAAATAGTGAGCAGAGAAGAGA -AGGGGTATTTGATGTTGTCATAGTGGTAGCAACGACGGGACTGCTACCCTGAAACGTCAT -GACAAGTTGTTTCGTATCAGGCATAAGAGAAAAGTGAAGACTTGAACGCCATCTGGGAAA -CCCAATACCACAAAGGTCCAAACCCAACGCCTATCAATCGAGGTAGTTGCACTGCAAGCC -AACCAAATTTCCTGGTTTTGGATTTTTTTTTTTTTGGAATTGTCGAAAAATCACGGCCAA -AAGCGGTCCAGACCAGCTTCGACGGCCGTGAATATGAAGGGAAAAAAAGTGTATCCCGCA -AAGAGTTCAAACAAACAAACGCCAGTCATGATGAGGGTGGGGGCTTTACATGGAGCCCAG -GCAAAGGAGCGCCTCCACAGCCGCGCGTTCCTGGATGTCAAAGGTGCGGGGTGGCAATTT -AGCCACAGATGGGCAGGTGGCGGGGAAAATCCCAGAAGGTTGAGGCTTTGCGCGCGCAGC -GACGTGGCTGTTGATGAAACTGCCATTGTTGTTCATCGAACGGGCGCGCAAAGCGTCGGC -ACCAATTTGTGCCCAGTCCTCCTCATCGGTATCATCGCCTTCAACCCACTCATTTTCTTG -AATTTCATCGGGAGCCTCCGACGAAGAGCAGTATTCGCCGTCGTCACCATCGAGAGACAT -TTTGTCCGCCTCGTGCTCCAACATGTCGAGGTCCCCTTGCATGCCGTAATCGTCGGTGTA -CGAGAATGCCGTGGATTCGGCAAAGCTCATGCGGTATGGATGACGGTTGCCACCCTGCCA -GCCCGACTGACCCTGAATCTGTAGGCCACGCTGCTGGAAGGAGCCTGCGACACTCGAGGG -CGGCGACGACGAAGTGTTCAAACCACGATCGTGAGCAGGTTCACCACGACGTCGCGCGTC -CCAGCGACCCCAACCAATCTTCTCGAACACGACGTCGCCTTGAACACCTCCTGCCTCTCC -TCCATTTCCTCGACCTTCCAACGCAGCAACAACCAGTCTGCGCGCCTTGGCGGGGGGAAT -ATGCGAAAAGCCAGGGACCGAAGCTGTCAGATGACCCATAACGTGGCGGATCGCCAAAGG -TCCCTGCTGGAGTAAGATCTCAGGGAGATGGTACTTGGATAGCATGGCAGGGGTGATCGC -GCCGGTACTGTCGAGGTCGTCGACAGGCAGGGAGACATGATGATCGTTGGCGTGTTCCGC -ATCCTCGAGATCAATATCAGAGTCCAACTGGGGGACAGTCGATAGCGGCGATCCGGGCGA -CAGGGCCACTCGTTGCTTGAAGGTCTGAGGAGGGAGGTTCGGGGAAATGGAGTTGGGTGG -CGTGGGGAGCATCGCAGGCATCCCGGGTCGCGTAGGCACATGGTTGCCACTCAGGCCAGA -ACGGCGCTGAGGGATATCAACAGAAGCAGTCTTACCAGCAAGAGCCATCGCAATGCTGGA -GTTTGTCTGGAAAGCCATGATGGATTGAAAATGGATTTGTGAAAGGATCCGAACCTAAAG -CGCGCAAACCGGGGTATATGAGTCAGAAGACCTTCTAACACGCGACAAAGAAGTACCACG -GATTGATCGGGTGGCAACAGAAATAGTTTCCGAATGAAAGAGACCAGATCCAGTAACTAA -CCTTTCACACACAGTGACAAAGAGACACTGTAGCCGGTGTAGCGAAGTTGGGGCCAACTT -GTTTAGGGCTTGAATCCACGTAGGGGATGCGACGCGGCAAGGGGAGAGCGATAGTCGCGA -TAAGAGCAGATTCCAAGGTGAAGGAAGAGGTCCCAGGAGTGCGAGATCGAATATCACAAG -ATGTGTGAAGAAGAGAAAGGGAAAGATACGTTTGGAGGGGCACAGGGCCCTGGAAGTGAC -TTACATCTTTGTACTCCAGGTCCAAGGGTACCTAGGTCATCTCAGGAAAGCGCGTGGATT -GGCGCCTTGTCACCGGGTGCTTCCGGCTCTATACTATTGTGATTGTTATCCTACGACatt -attactctgtattttatttttatttttattGCTTTGAGTAGATCAGTATATACAAATTAC -TATTCTAACCATTTTATTCGTTTCATCCATGCTATACATGCTACAAATACACCTGATTTT -CACACTAAGATTCATCCCTGAGTCGATCAGGTCAATCCAGTCGTAGATATCTGAGTAATA -GAAAAGAAAAACCCCTCAATGCCAAGTTGTTATCGGCCACTCATGAGGATCCTCAGTGCT -CGTGCGCCTTTTAACGCTTGCAGTGCTCTCCAGGGCGTAACCCTATGCTTTCCTATCGGC -TGATAATCAGGGCACGTGGACCATTTGAAGTACACTTCCTTGTATAGTCCACGAGTGTCG -ATGTCATTCTGCTCTCCATTGACGTTTTGAATTTTGTGTCCATTGAACTGAGTTGAATGA -CCGTGCAGCTTGTACTTATTTTGTACATACTCCGAACATTAAGTACTAGGTACATCCACT -TTCCTCGGGTCCCACTATAATCTGGACTCTGGCCCTTGACGTCACTGGTACCCGTGACAA -TCTTGGCTTGAGCGCTTTATTCCCAGGTTATTCCCGTTTTGTGCATACGTATAAGGAAGA -AGATACAACGCAATTCTTCCAAGTCCTTTTGCCTTATCATGGAATCCTGCATAGCTGACA -CCTTGGTCGCCTAGCCTTTGCCCCGAAGAATGCATTGCCCTAGCGCGAGGTCCATGTACG -ACATAGCCAGCACACTTGTTGACTTGTGGGTTTATTATTGGAGTGTTCAAAAATGTGGGC -TTGCGCAATTCGGCCATTCCCTTCCCATTGTGCGGAAAGTAGATGCGAGCTCTGCATGTC -ACCTTCTATACCATTCTACAATTCTACAGTATACGATAACCCCTCAATCATGATACACGA -TTTTACTTGCCCGTTGTTGGGTCTTTTAAGTTGTCTGTGCCTTGATTAGTACTGATACCG -CTTACCTTGATTTGCTCCTCCAGGGCGACATCCACGTTGGGGATTGTAATTTGGTCATGC -TTCCCTTAATGCTTCTAGGTACATGTAATCAAACAATATTTAGTCAATAAGCTGACTCAG -AGTAAAAATGAGAGGTGCCGATGACCATGTTGACTCTGCGCTGTACATTCTATAGTTTCT -TTCCTCTTTCGGATACCGAAGTCTGAGCACCCCACGCCTCCCTCAATTATTTCTTCCTGC -CCGTCTCCCCGACCAAACCTTTAAATCAAAAGACCCCCCACTTAGGTCGCGCATATTGCC -CACGAAGAGAAACACGTACCCCTCTGCCTGCATTGGAGATTCGCCGTCCGTATACCGAAT -TGAGCATGCAACACTTGTATAGCTAGGCCCCTCATAAGCCGCTAGTTTTTCTGCGTGCTC -AATGGTCGGTACCTCGTACACAGCCCCTGTGACGGTATCTCCGGGATCGCCAGAGAGTAA -GGCTGGGTAGTGTCCCCAGAGCTTACACTTGTACCCCTCGATGTATGCGGGTCGTAAGTG -GGAGGCGTGGGTTAGACCTAACAACTCTATGAGTAGACTTTGGTCTTGAAGTGAGCCGTA -GAAAAAGTATGGGCCTTTTGGAACAGGACCTTGGGGTTGAGGTCGGGGGATCTGGGATTG -AAAGAAGTGGCTGGGAGATGTGCTTCGCATTTTTAGGACTGTGGGTGATATTTTTGACCG -AGGGTTCTGTGGGGGTGGGGGTGGAGGAGGTAGACTGCCCCTTGATCGTGTTGAGTCCTC -CATGACTGTGCTAGTACAAGGAATCGAGAATCTTGAGTGCCGTATCTAGGGAGATGCTGT -TCATGCTCATGTCTCAGAGCCTTTGTGGGGAAAATCCCTGCGGGGGATCACGGCTGCTTT -CACTACTCTGTACACACATTGTGGTCTATGATTCTAGACAATAATCGGCTACCGATGCAG -CTTTGTGATATCTTTTGAGCATTCTGAGTCTTTTCATATATTCATTCTACGTGTCATTTC -AAATTTCTCTTAGTTCTATAGGCGGTAGAAAAGGGGGAAATGTAAACCGTACAGACAGCT -TCTCTATGGGATAAAACAAGCTTTGTATCCTATCAAACCAAGATGTTGAAGATGAGGCTA -AACCCGTCTGATCGGATAAATGGCCATCTTCACCTATTTTACCAAGGCCATTCTCCCGAC -GCGGGGGTTTTATGTGCCTAAATCATGCGACAATTCGACAATTAACCAGAAGAAAAATAG -TTAAATAGGAGTAGAGTGTGCTTCCCATAGTTTAAGGGGTGGTTACTTCGAATCATCACG -TACTTGGCCATAATGGTCTACTATGGGTTAGCGATTCTCTGTCTGGCAATCACCGCAAAC -GCAGCAGCGAGCCCCAAGTCTTTAAACTCCGATCTTTCCATTCTCATTCACAATGACCTG -CTAGGTATGCATATTCACTGAGATCAGAACATGAAATCAAGGACTGACTTTCAAAAAAGA -AACCGAAAGTCCACTATCAGATTCAGGAATTCTGGTGCTCGACGCGAAACCATGGCAAGA -AGCCACCAAAAGCTGCCAAAAGATTGGAGAGGCTCTTTGGGGCACTGATTCCAGTTACAA -AAAGATTAAAAGTGACCTGGACTATCTTGTATACCAAGGCAAATACACTCGCAATCAACG -TTTTTGGACTGCCTCGGACCACCGCAAACTGTCGACCATCGATGCAAATGGTCGGGTGAA -TAAAGCCTCTGCAAACGAAAGACTACCCGTTCTTTGCACACAAAGCGCTCCCTTCTCCAA -CAGTACATTTCAGGATACAAGTGCTCAGTGGCAGGTGACTGTTCACTCCAATGATGAATA -TCTTACCGGGTAGGAACTAGTACTTCCATGACCTTGTAATCTATCCTAACAAACCACAGA -TTCCGGGATCGATTCAGTTTCCGCTTTCTGGGAATCCGCTACGCTGAACAGCCCCGACGA -TGGGACTATTCCCGACTATATAGAGGGACTGGCAAAAAATCTTCTGCTCTCGACTATGGC -TCTGACTGCACACAAGGCACAACTGGTTCAGAAGACTGCCTGTTCCTCAATGTTTGGTCT -CCCTACCTTCCCAAAAGCCGCAAAGTCAGAAAGACCCATCTCAAACCTGTGATGCTTTGG -ATTCACGGTGGAGCTTTCACTGGCGGCACTGGCAGCGACCCCACGTTCGACGGTGGCAAT -CTGGCCTCGAGAGGCGATGTTGTGGTCGTTACAATAAACTACCGCGTGGGAACACTGGGT -TTTTTTGCATTAGATGATGGTAAAACCAACGGTAATTACGGATTGGCAGACCAAACCACA -GCGCTGGAGTGGGTTCATCGAAATATCCAGGACTTCGGCGGAGATCCAGACAGAATAACC -ATCTTCGGCCAATCGGCTGGTGCAGGCTCAGTACGGGCACTACTTGCTTCTCCAGTAGCA -CGAGGTAAATTTGCTGGTGCAATCATGCAGAGTAATCTTGGTGGCTTGGCTTATGGCACA -ACCTACTCGAAGTATTATACCATCGCGCAGGAGATGGAAGTTGTGGGGAAAGCCATTCTC -AACGAGACAAACTGCACGGATGCTGCTTCACCCGTCGAGTGTTTGAGAAAACTTCCCGCG -TCAACGATTGCGAATCTTGCTGACTCGGCGCGGTATCTGGTTGTCGATGGCGTTTACCTC -CGAAGCTCAGAGCTGGATTTGATAAACCCAGCATCGACTGCCAATGTGCCTTTGATGATC -GGTACAATGAGAGACGATGGCGCTGCCATGATTGGTTATCCTGCAGTGGGACAAACTTTG -AAATCCTTCCTCAATGAATCGGGCTTTCCTGACTCAGTTAGCCCAAGCCAACTCTTTCCT -GTACCATCAACCGCCAATGCTACACTCGATATCTTCAACACGTCATCTCGCATAGCCACA -GATGGAATGTTTCGATGCATTGACCAAGCGACTGCACATACGGCCTCAAGGACTCATATC -TTCCCCGATATCTTTTACTACGAGTTTAACCGCTCATACCAAATGCCAGGCTGGTCACCC -AATGCCCCAGTGTGCAACGCTCCTATCACAGAGGAGTTCCCAATTGGTGACCCCAGCAAG -GAGTACTTTAAATGCCATTCTGGCGAGCTGTTCTACGTGTTTGGCAGTTTCCGTCGTCAG -GGGTTGCCGTTCCGGGATGAGTTCGATTTACCTTTCGGGCAGTATGTATTAGATTCTTGG -GCGAGTTTTGCGAGAGTAGGTAGACCTACTCCTGATTTGGCCCTTTTGAAGGCGAGAGGA -TATAGTAATACAACTCGTGAGATTGATGCGACTGGGTTGTGGGAGCCATTTGGCAAAGAA -AGTCAGCTTCGATTGCTCCAGTGGCCATCTGGGATGAGGGATTTGGTGGAACTTGAACAA -TGCCGTGCGTTGAAGTTGTCTTTAGATTACTTCGATCCTTAGGGCTAGGCAAAAAGCATT -AGTCCTAAGTACAGACAAAAGGATCTATCTTTTTTTTTTTTTTTGGCTCCCTAGATCTAT -ATATACACACGGGGCTATATTAATCCAATCAACGCTTCTTTCAGAGACAACATGAAGAGC -CTGTTGGTTCATTTGTTTGTGACTTTTCAGTGGTAGCCTCGTTTCAAGTGTCCCTTCAGA -GTCCCTTTCCCAGTCCCTTATCTTCAATCTCTTTGTAGTGTCTTTGATGGTGTTTCAGCA -TGATTTTTATAGATACAAAGCCTTTGTGTGGTCTTTGTTATGTAGGTGGCTCTTTGAATG -GAAATAATTTGAGCAGTCTCTTCAGTGTCCGCTTGTTATAGTATAAGAACCAGGAGGGCC -TACCTCGATGCCGAGACATACTCATTGCTCCCTTCTCGAGCTGCCTTTGCTGCGATTATC -ACCTACTATAAAGATCCGACAGAGCTCCCCTTTCTGTATCAAGATGCCTGGAAACAGAAT -CAACGAAGACCATTTCAAGAAGGCTGATTTCACAGCGAGCGATTATCTGGCTAATATAAA -CCCACAATATATGCAATGCTCTCACAATATATGCAATGCTCTCACAATATATGCAATGCT -CTCACAATATATGCAATGCTCTCACAATATATGCAATGCTCTTGGAAAACAGGTAAATGG -GTAAGCGCCCACGGCGATAATAAGGGGACAACTCTCGGCGCTTTTAGACTCCAGAGCATG -AAGGCAACTTCTGAGCCAGAAGAGTATCTGATTAAGATTTTGTATGTGAATGTCAAATTC -TTGAGCTGAGCTTTGCCGGGCAACCGGAACAAGGACGGCTCGACTAATCCTCCCACTTCC -CAGGATGTCATGAAAGCTCTGGCAGTCGAGCTCAGCAAGATCAGCAAGTAGAGAAAACAT -ATCTACAAGTTCGAGTATAGAGATATTCCGCAAGACTCATCTGTTTCTGCAACCATTTTC -AAGACTTACCTTTAACTTCGGGTGTGTTGTGTGAATCGAATATCTTTTCAGGCTCTCCCT -AATCTTTGACTGACTCTCATATCTTGCTTTGATGGGATGTTCGCCAATCTTTCTTTTTAT -AAGCTGTTGATATGTCCTTTGCTTTTGATTATTTTGCTTTATCCCGCAGTTATACAATTC -CAGCCTAGCTTTACGAACATTAAAGATCAGTTTATCCCTCTGGAATATAAGTAAATTTTA -GATTAAGAACTGTAACCTTGATGAAACATTTCAGATTAAGCTGAGGTGATAATTGACTGG -GGGTAGATACCATCTAGAAGTTAAGAGCATGAATCCGGTATTCGAACTTCTTTGATGTAA -TAAGCTGATAGTGTTAATATTTCCAGGGGACTTTTGGGCTCCTTTGGCTTCCCTTCTAGG -GGTTCAGCGCCCTGTCAGATCACTTAGAAAGTCTAACAAAGAAATCCAACATTAAAGATA -CTCGTAATTCTTGGAAATGAATATTTTTGCTATCACCGCCATGTTTGGACGTTGTATGTC -ATTATATTGACAGTTGGCACGGCATTTCGCATGAAAGAAACCTTTCTTTTCATCTTGTGC -TTTGCCCCCCTATTTTTGGTGTCATTTTGACTCTGTGTCGTCAAATAATTGTACTGTTGC -TTGCCCTCTTTCCAATTTCCAGTCGTCTCTACATCTGCTCTACTTTTAGACCTTTCTTGA -TCAACATCCTGAGCTGGCCTGATCCCACATTTGTAGGGATTGACACGCTCTTTGTCTCAT -GATGGAAGAAGAGACCAAAATTAGGATGAATTCACCGAGTAAGCAACCAGTATCGTCAGT -CCACAGCTTGTTGATCTAGTCTAACCTGTTCAGATGCCCGTATGTGGCCGGTAACAAGAT -CACGCTACAGCTAGAAGGTCGCCCGGTTCAAGCCACAGTCAATAGAACCTTCGAACCCTG -GACTTTCTCATGTACTATGATTTTTTCATTTGATTCTGATTTTTCTGCTCTAGGGATCAA -GGGGAATGTGGCACTGAAGCTATATGATCGGCGATTTTCAAAGCAACTACGACGTATCAG -AGCAATGGACCCCTAGGCACCGGGAACTGAGAGGGATCTCTATGAGTATCTTCGCTCAGA -CGAGGGAATGAGGATCGCAGTTTATATTGAGAAGTATTGCACAAAAGAACAAGAAAAAGA -TCACTGTTTTGTGCCAACGATCATTGAAGACGATGCCGGCGAGGAACATGCCCAGCAGAA -CGCCTGTAAGCAGGAAGATACCAATGAGACCGACAAAATCATTGAGAACAACAATGACAA -TGGAGATGGCGATGCCAACTCCAAATGCACACTTTCCAGCTTCTCCATGCTCTCGATCAA -GCCCCTTGCTCCTCTCTTCCACGCATGCAAGATGGCTTCCTAGGCGCTCTTCTGCACCAT -CAACTTTTTTCGCACTAGACTTACTGCGCTCATCCCCTGGGGACGCCGTAAGGTCCTTGA -CATTGAGACCCAAGGAGTCCACAGAAACGATGAAGCCAGCGGAAACAGTCCTAACCCCAA -AGACCATGAGATAGCTAGGTATCAACAGACCAAAAATCTCGATGCAACGGTTGAGGATTC -AGGGAAATCTGGAAATGAAGAAGAAGGAGAGGACGAAGATGAATGGGATTCCCTATGTGA -AGAGATGTATCTCCACTCTTACAGGCAGTTCCATTACGAATCAGAGATCCATGTCCCCCG -AATTTTTGCAAGCACCAAGATTCTGGGGCCAAAATGCACCCAAGATCAACCCTTTAGTCG -GTTTTTTGAGCCCTCTGGGATTCTTATGGAATATATCGAAGGATTCCCTCTGCCCGACAT -CGCAGATCATGCCCCAAGGAAAGAATGGCAGCCCATCATTGACAATGCTATCGAGATTGT -CAACAGCATCATGCGCCATGGGGTCATCAATGACGATGCGAGTTCAAGATGCTTTATCAT -GCAACCCGACCCAGCCAACGATTTGGATTCCGAATACAAATACAAAATGACCATAATCGA -TTTTGGTCATACTAGAGTTCGTACACAATACTCACCAAGAGAGGACTGATGCTGGTGGGA -AGCTCATAACGATGCAGAGGGGGCGATTGGACAAGTGATGTGTCGGACATTGGAGGCGGA -CTATGGCGGAGACTACGTCTATACACAGACGCCCTATTGTCAAGCGCTTCACAATGATTA -TATGAAGGAAGAAAATTACACAGATCGTGACGAGAGGCCTATTTTAAGCAATGTATGATG -CGAGGAGAAAGACCTGTATGTGCATTTGTTATGGAAAGGAGGATGCCCATATTAAGATAA -TCTGTATGCTCTCTTTCTAGCATTATTCCTCTCTTCCCTCTCTCTCTTCTTAGCTAGGAA -CAACCGGTGATTCTTTTTTAAACTTTAGCGCATAAAGCTCTTGTATTATATTATTATTTA -GGGCGAGAATGAGGATATCAGGCAAAACTGTCGAAGTATAAGCATTCTACTGAGGACATG -TCTATTTCATGCACTAGAAATATGTCAAATACATGAATGTGATAAGGACCTGCTGGAATT -TACCTGTGGACTTCTTATCTGCCATAACATCCCGGTTAAGTGCACGATTTAGTGTACAAA -TCTCCGAATGGATGACTATGAGTATCGTTTGTGGGTATGGATGTCCACTTCCGATGCTAT -TCCGAAATGCGGAGCACCACCCGTGGGCGTCCAACTTCGTGTACTATGTAAGCTATCTAC -TCTTTCAAATCTAGCTGGACCTCACATGCATCGGTATCTATAACATAAACTTGGTGTACT -AATATCTACTCATTGGTCCATACAAAAATGTAGAAAGCGGCCTCAAGAAACTTCGATCTA -TATAGTCGATAAGATCGGATGCGTCCCTTCCGACATTTCCCCTCATGAAGTAGACATATC -CCGCAAGAATTTATCATTTTCTCATTAATTTTCAACAACCTGCAAATCTAGCTCCTTATA -AACCCAGAATGTCTTACCAAATCACCTACGACACTACCCGTGCACCACCGCGCGCCGCCG -CGCTCGTCGCATTCATGGAAGACTTCTATCGCACCAGCGACACCGAGTCCCTGCACGAGA -AATACGTCCAGTACTTCACCCAAGATGCGACACTCATTATGGGACCAAAGGAGGCCAAGG -GCGCAAGCGGTATGATGCTATCCAATCTCGGTCACAATTTCGACTTTGATGCATGGAATA -TATATACATTTGTGTTGGGTTCCATTCCAGCTAACAAGACCAATCCCATAATAGCAATTC -TCACGCTTCGTCAGGGTCTCTGGACACATGTTGCTTCACGCAGACATACGCCAGTACAGA -TCTACTTCGGCAGCGAGGATGAGATCATGTTGTACGGTGGTGTGAACTATAGACTCAAGG -CTAACCCTGATAATGATGTTTATGTGCCTTGGGCTGGAAGGGTTGTGTTTTCGCCTCAGA -AGGAGGACGATGATCTCAAGATGAAATTTTATCAGGTCTACTTGGTGAGTACATCCTTGT -GCTGCTAGCTGTGATAGTTGTGGTGCTGATTGGTTGTCTTCTAGGATACCGCGGCTCAAA -CTGGAAAGAAATGAAGCGGATTGAAATGTGAGCATTAGACTTGTCTTGTACATATATGAA -TAAAAGTTGCATAAATTCAATAGCAATGATTGGATATAAATTGGGCTTTTGGGAAACGTT -GATCTATATAATGTGCAAGGCTAAAGCTCCACCAAGTAGCTAAATGACAACAGACACCTA -CATCTACACCTACAGCAGAGAGAGGACGGAACAGTTTTGAAGTGTTTAGACATGCTCTTT -ATTTCCAGGAGACCACGAGAGAAAAAAGGGACAAAGACGCCGTAGGGTTGATCGGGGTCA -AAGAGGTGAGGGAGATCATAAAACATGTCAAGGGGAAAAAGGATCCCCTCTCCTCGACAA -GAGAGAGAGGGGAAGGATTGTGTAGACCGTCTCGTCCGCTCTCTTCTCTCATTCAAGTCT -CCGGATAATGATGCCTCATCCTCATGCACTCTGCACCGCCCATATCTCTCTCCGCTAGTA -GCCCCGTCTGGTCGTCAGATAAAAAAAATATGGAGTTGGAACAAGAAAAAAAGAAACAAA -CGTAAGTCGCGCACGGGACCGCAAGCTCCGAATTTTTATTTTTGTGTCGTCATTTCCAAT -ATCTTGCCGCCGTAGCGAAGACAAAACCACAAAAACAAACGACTTATCCCCGAACTATTA -AAAGCCCATTGTTGCGATACGTTTATTCCTCATCAGACTCCTCCTCCTCATCGACGGTGG -TCTTGGCGATACGGTCCAAATCACGCTGACCACTCTGGGTGATGCGGCGGCCACCCTTCT -CCTCATCGATCTCGAGGACACCGATCTTCTCAAGAGCCTGGAGGGCCTTGCGGTCAACGG -AGCCGGAGGCATCAACGTGGTGGGCGGGACGGGCACCACGGTTCTTGGTGGAGCCGTGGA -CCTTGCGAAGACGGCCGACACCGACGGTCTTGCGCATGTAGATGTGACGGGCACAGGCGG -CGGCGCGGACGTAGAACCTTTAGAAAAATCAATGTCAACTGGTTGTTCGAATTTTGGTGG -CTTGTTATTTGATGCGGTCGTACCAGTCAGCATCCTGGGGAGGAAGCTCGTTGGAGGCAG -AGGTCTTGACAGTATCAACCCATCCGGGGATGGGGAGCTTGCCCTGACGCTTCAGGAAAG -CAGAGTAGGCAGAGATGAACTTCTGCGCCTGTGTGGGTGAGAAAAGTGATATGTTAGTTG -TGGCTCATCAGTATACGTGTGCATCGGTCAAAGTGTGTGGATTTGCAGATGTGTCGGATT -CGTTTCTGGAGCAGTTCATCATTCGTCGTGTGGAATATTCACTAGAATGCAGTCGTTTTA -TCCAGTATTCCAGAGAGAATACCAACAGATAGACAAATCTATCAAAAATTCGCCCCTCAA -AAACATGCATGCACATCCTTCAATATTCGACGTATTTCCGATAGGCAATTAAACAATTCG -CCTATTGCGTGCGTAATCCGTGCGAAGCATTTCCATATTGGCGATATGATCCGACCGATG -TTGTGGTATCCGTGCATCCAGAAACGAACCGTCCACATCATGCAAACCAGAGCTGGTGAC -CGATCACCTAAAATCGAGCAGGACACTTACGTCCACATCGCGAACGGTAACACCACCCAT -TTTGGCGACTTGTCGAGGGGTCGAAGGGTAGGGATTAGAGAGAGAGAGGCGTTGGGCGTC -GAGGTGGGCAGGGTCGAAGTCGAAGAGTCGCTCAAAATTTCAAGACACACACAACTCTGG -GCGGGTTGGGTCTGTGTGCCGTTCGCTTGTCCATTTTGGGTTAGCCGTACCCTAAGCTGG -GTCATGTGATGTGGCCCAATTTATGCCTGACTCTTAAGGCTCCACAACCACACCGCTTGA -TGCCTGATGTTCCAGGCGTGAATTATTTGACATACAGTCCGTGATGGAAGATTATTCTGA -ATAGTTCATTTTCTCATGTTCTAGTGATGCACCAATTAACATCTATTCCATGGCGGTTTT -CATTATTCGTAACTTTGAAACAATTAAAAGATGTACCCCGTTTCCTAAAACCTTGATCCA -TAGTCTTCGCTCCCAGTTCCAACGCCCAAATCATTCTTAATTACAGTGTGGGCGCTCTTT -GCAGTCCAACGCAATACCTCACAAACAGAGTGTCAATAAAATGACCATGTCGGATACCAG -GAGGTCGGCACTGGTTAAACTCATCAATGGCATCTTGCACACCAAAACCAAGATGCTCGA -TCAAATAGCAACAGATCAAAAACCCGGTACGGTTGTAGCCATAGTGGCAGTGAACTCCGA -CCACTGGACGTGGCAGAGACTTCTCTTCATCCGTACGCTGCTCTATCTCCTCATCAATTT -GCTTTTGCAGTCTATTGACTAGAGCGATAAAGTCGCGTGCCTCATCTGGAGTTGGAGGAA -TCTTGGATACAGTCGGGTGCTTGTAGTAGCGGATTCCACCCTTCTCCATTTCGGCCGGGT -TGTAGACAGGACTTTCATGACTGATGTCTATAACAGCGTAGATGCGACCACGGTACTTTG -CCGAGAAAGCCGTAGGGTTGTGTTCCTCGTCAACCTCGCGCAGCATCTTCATCGCAACAA -ATGTGTTGGCAATTCGTTCCGAGACTGGGGCGACCTTTTTCCATTTGGCGAGATTCTTGA -CATCCCACTTCCCAGATGTATTCATGTGCTGCAATTGCCAGCCCAGACCAAGACGTTTGT -CAATATTTGACGAGAGGAAATCCTGGATAATCCCCGCAAGAGTGCGATAGGTTGCCCGGT -CATAGAGAAGTGCATGAGATGCGGGCGCCGGCAAAATTGCCGATTTGACCACCCGCTTCC -GCTGGCCAACCTCCAGATGCTTCTCAGAGATCTGCACACCATGCACCTTCTCGTTGACGA -GATCGCCGGCTGAAGTTGACATCTGGTCGTGGACTCGAGAAGCGTCTGGAATGATGCTGC -TATCACTGGTGTCGATCGTGGTATGGGCCGAAGCATCACCAAAGAATTTCAAGATCTTCT -GTACCTCCACTGGTTTGGTCACAGCATCCGTTTCTCCCGCGACTAACAGAACTGGCATGC -GAATCCCTGCCCAGACCTCCTGACCAGGAAGTCCACCACTCGGGTGTTTGCCAGTATCGT -AGCTGGGAAGCGTGCCCCACGCCATGCGTCGCCAAACAGGCGTCCGACTTTGCTTGTTGT -AGCGCACCTGCAGATTTCGCGTTTCCACGTCGGCAGCGACACCGACCATCCGCATAACAC -TGGCACTTTGTTCGCCTCCACGTCTGTCCCAGCGACGCCAAAGATCAAAAAGGACACTAG -GGACATAGAGTGCTCGGCGAAACTTGGTAGTGTCTTCCGCAGATGGGGGTGAGGCTCGAG -GACAGATAGCAACAAGGCCAACTATGTGCTCCTTCACCTTTGCACCAATAGAGGACGCAG -ATGAAGCCAAAAGAGCGGACAAGGAACATCCAAGACTGTGTCCAACTAGAATGATTTCTT -GGCCAGCCTCCCGGTCCCGGTGCTGATCAATGGCTTCGGCTAATAGTTCGGAGAGAGCCT -CCACAGTATATGCATTCCAATCCGTAGGCGCAAATTTGGACAGGCCGCACCCAGGTAAAT -CAATACCAAAGCACGAACTGACATTAGATAGACTGGTTAGGAGGTGGTGGAATTGTGCTA -AGGACCCGCCTAACCCATGAACAACCACGATCAGTGGGATAGGAGTCGGTTTGGTCGGAA -GTCGAGCTAGGTGTGGATGCGGGCTGTAGAATGTGCGAATAGCGGGATAGGTAGCTACCG -ATGTAGTGTAGGAGTTGTATGTGGTGAATTTTCTGAGAATGCCAGGGTCCGTTTCATATG -CTGCGCGATTGTTAACCGAGGTCCCAAGTGAATTATGAGCTTTTTACCTACCATCAGAGC -TTTGCTGCTGAGTGTCCGATGGCCAATCTTTCTTCTTGTAGTTGGGATTGGTTCCGTGTC -GCAGCGATGCGTTGCTATCGTAGTCTTCATCTTCGTAGTCTTCCTCCTCCGAGGAGCTCA -GAATCTTCGAAGATTTGGTCAAGCTCTTGCGAGCGGTGGTGCGCTGCGCATGAGAAGCGT -TTGACCCCGGGCGCTTGCGCCGCAGCAGCGAAGACAACAACCCACACCCAGTCATGGCGA -CTACACAGGTAGTTAGGAACACAACCTGACCCCAATTTACCTCGTCATTGCGGTGAAAGC -CGCGGAAGTAGTTGGCGATGGAGGAGAAAGAAGCGTTTTGGGCGGCCGCGCGCGCTGAGT -GGAAGAATTGTAGAATAGCTGCGACACTTGAGGATTTGGATTGAGACGTCAACGAGTCGA -AAAGGGCGGAGATAGCGTCGGACCCCATGATTAAGTATCGGAGGGGCGAGAGCTATTTTC -AGAGCCCCCCGATAGATATTTTGCCAAGAACCACGCGATGGCGCCCCCGATGCTCCTTGA -GTAAAGGGCAAATAGGCTTGAGACCAATAAGGCAAGCTAGATCGACAACGTTGAAACCTT -TGAGTGGCGAGGATAAGAAAAAGTAGAGAGATCAGCTTCGGCCACTCACAGCCTCGGGAG -GACCGGCGTGGGGGTGAGTCATCCATCAGCCCCACTTGGGTCTTGCAGCCACAAATTTGG -GCAGGAACTTGCTTGATTATGGCATTTACTATGGGAGAAGAAGTACTTTTGAAGTATACA -CTATTAGGCATCATCTATGGATCTTCCATAGTTTAAAATAATTTAGTCTTCTTTTGCCGA -TAGTTTATACCCCTTACTCCCTTATAGCCTACAATGTAATACATAGGTATACGATTCCCA -AGAGGTCGATATTGGACTCCATTTCTTAGTACCCCTGGGGCCAGCATGCGTGGCTTCTTA -ATTACTACTGGCCAATCTCAAGCGGGCCCACGAAGATCATATCAATATCGAATGATTATA -CCTTTTCTGGAATGTGGTAGAATATATCATAACATCATTTCAATTGCTTCGATATATGCT -AGATACATTCACACTGGGTGGGATATCATAATGATTCATTAAAAAAGATCTGTCATGGCT -CCCTGTGATGCATCGCTGACCAATCGTGCATACTTGGCCAACACACCGCGAGTGATTTGC -GGTGAAGGAGGGGTCCACAGACGCCGTCTCTCCTGAAGCTCCTCATCCGACACATCCATT -GAAAGTGAGTTGGTCTCAGCGTCGATAGTGACGACATCTCCATCGCGAACGAGCGCAATT -GGGCCACCCACGGCAGCTTCAGGCACAATGTGACCGACAATGAAACCGTGGCTGGCACCC -GAATACCGTCCATCAGTAATGAGCGCCACGTTGGTCAACTTGGCTCCCATCAGCGCGGCA -CTTGCCTTCAGCTGTTCCGGCATTCCGGGCCCACCCTTGGGGCCCTCGTAACGCACGATG -ATCACAAGGTTCTCGCCGCGTGGGATGCGGCCTTCATTTAAGGCGTCGTTGAGTTGGTGT -TCCTTATCGAAGACACGGGCCTTGCCGACGAATCGGAGCCCTTCCTTGCCGGTAATCTTG -GCCACGGCACCGCCTGGTGCAAGATTGCCACGTAGAATCTGCAAGTGGCCGCTCGCCTTG -ATTGGGTTGTCCAATGGGCGAATCAGATCCTGGCCCTCAGTCAGGGATGGGAATGAGGCA -ACATTCTCGGCCAGCGTCTTGCCAGTCACTGTGGGAATGTCTCCATCAAGCAGACCGGCT -GCGATCAGCAGCTTCTGGACAGATGGGACACCTCCGATCTCATAAAGATCGGCCATATAG -TACTTTCCACTAGGCGAGAGGTTGGCAATGAAGGGGATCTTGTTGCTGACCCGCTGGATG -TCATCCAGTGTTAGGTCAACACCAGCCGTCCGAGCCATAGCCAGGAAGTGAAGAACACCA -TTAGTGCTACCGCCCAGCGCCATAGTCATGACGAGCGCATTTTCAAAAGACCGCTTCGTC -AATAGGTCCCTTGGACATATATTCTTCTCCAAGCAAATCTTGATAGCATCCGCAGCACGC -ACACACTCGCGCATCTTGGATGGCGATTCAGCAGGCGTGGATGATGAGCCGGGTAGTGAC -AGACCCATTGACTCAATAGCAGTGGCCATGGTGTTTGCGGTGAACATACCACCACAGGCG -CCGGCACCAGGGCAAGCATGCTTTTCCAAGTCATCCATGATCTCATCTTGGGTCTTGGTT -TGGTCACCACCGTCATCAGGCTGGCGCAGGGTACCGTAGGCGTAGGCACCCGCAGCCTCA -AAGCATGTTGAGACATTGATGGGTTTCCGCAAGTGTTCAGAGTAGCCGATTCCAATAGTT -CCGCCGTAGATCATAATGGAGGGGCGGTTATGGCGAGCCATGCCCATTACGACACCGGGC -ATGTTCTTGTCGCAGCCTGGAATGGCAATACAGGCATCATGGTATTGGGCACAGGTTACG -GTCTCGACACTATCGGCAATGACTTCTCGGGTCTGAAGAGAGAATCGCATTCCTATCGGG -GGTTAGCTATATCGATTCTAGCGGTGAGCATTAACAGTCAACCAACCTTCACTGCCCATG -GTGATTGCATCCGAAACACCAATCGTGTTGTACTGCCACCCAAGCATTCCCTGTTCCAAG -ACCGACTTTTTGACGGTCTTGGCCAAGTCCATTAAGTGCATGTTGCATGGGTTTCCTTCC -CACCATACTGAGGCGATACCAACATGTGGGCTCTTGGCCATCGCTTCCTTATCTGGGACA -CCGGCGGCATAGAGCATAGCCTACAGCGATTAGTAGTGAACCTCACGATGTTCTTATTTT -CAACGTACTCGTGCGCCGGGGAAATCATGGCCTCTGGTGATAGTATGGGAGTGACGATTG -AGTGCGGGGGAGCCATCCGGATGCTTCGCATCTGCGGGCAAAGAAGGGAAGTTGATATAT -TTGGCCTCGCTGAGAGTGCTAGACTCCGTTGGCTTAGAGAAATCCATGACTGCGAAAGTG -GACAGAAACAAACAAAAAGAAGAGGCTGCAGACGCAATTGACTAGAATTATAGCCCACAC -AACCTCGGGAGCCTCGCACCGATGTCCGACCCCCAGAACCCCACGTTCATGATTGTCCAG -TCAGAGGCGGCCGATTTCTTAGCAGATCGGAGTTGGCCCTAGTCTCCGGATCTACAATGT -ATAGGGCTGTATAGCATCAAGAATCTATCAATCCTTAACGTTGGGTATTATACTAGGTAA -AATAAAATATAGTGGTCGTAACAAGGGAGCCCACAAAGTCCGTCCGCATCCGTCTGTGGC -ACTCACGTCATACATATGCTACCTATAAAAGTAACATTCAGGAACTTCAGCTCTATTTTC -TTAATCCATTCTAAATACAAGAGAATCCACATAATGGGCACAGACCTGAGAATCGCCCAG -GCAATTGGGACGCTTGGCTGTGCAATGGCAGCAAGTAAGTAGCACTACACAACAGAATGA -CCAGACAACTCACAGTATACAAGGCGGAATTGCTTCATTGTCGATAATGAGCATCCCCAA -CATCCTCATTCCAGAGCGTCGCCCGCCCGGTGCGACACTGCCATTCGAAGACACCCCAGG -AACCTCGACCGCCCACTTGACACACCAGTGGTTTGATTTGTATGATCGTGGCAGCAAGAT -TTTCCCCGGTATCTCCGCAGTATCTTCCCTTGCAAACCTTTATGCGCTCTGGGAATTGCG -GGACTCTCCTACCCCTGTCTTGGGTTTCTTTGGGTCCAACTGGACCACATGCTATCTGGC -CGCTGTGGGCGTTACGATGAGCATTGTACCTTTCACTTTGACAGCTATGAAGAAGACAAA -TGCCAAACTGAAGGCTCATGCAAAGCGCGATGATGCGGCTGGCGCTGAGGGTACTGAAGG -CATGGTGGTTAGTCCGCAGGAGAAGGCGAAGCGGGCTAGGGATGATAGCGAGGTGGTTGA -ACTTGTGCAGCATTGGGCGAAGTTGAATTTGATCAGGGCTATGCTACCACTTGTTGGGGC -TGGAATTGGGTTTTATGCTGCTGTATCCAGCTGGGCAATTCCTTGAATGGGAATACCAAA -AGTTTTGGTGGGCTGAATGTTCACTGTTTTTTGTTTTTTTTTTCAAATTTATCCGTGTAT -GATTATGTGTGGGAAAACTTCAGTTCAAGTATGGGTATCGTCAAATTTCTATCGGGGTTG -CAAGTTGTATATGTACTGCGGCTACCCAGTTAATCGTATAGTTATAAACAAGTCCGAGAA -GAGAATGCCAGTAAATGTCCCCAAGTGTATCCACAGTGTGAAATATGTGCCTCAGGGTTA -CTGTCATGTTGATGCAGGCTTTTGGTCCAAATCCTCGTTCAGATCTCTTGCAGCATCGTC -CACTGTAGATTCTTTCTCAATGGTGGACCATTGCTCTGAATGTTGCAGGGCTTCATCAGC -GCTAATCTCCTGCAACGGGCTGGATTTGTCCGATGAGGTCCCTTGGAGAAGGTCTGTTCC -ACGGGTCTCGTCATCTGGCTCCCCGTCAATGGCAAAGCCAACCATCTTCCCAGTGCCCGG -CTGTGACCGTGCCTCTTCTAGCACATCGCCATTCCAAGGCTGGCATTTCTTCACAAGTTC -ACCACACCGGAGGCAAGACCATCCCTGGCCTTCTTCTGCTTCGACTTTCTGCACAATACA -GACAAAGCAATAAATGCAGCCGCAAGGGACGGTCTCATATGGGTTTGTGATATCGGTTTG -CGCCGATCCCACGATACCTCCTGATGCCGAGGCCGCCATGGCGTCAGCTTCTGTCGTTGT -GGTCGGGTTTTGATCGCGGTAACAGATAGCACAGGTTCGCTCTGGGAGGAAAGCCAGCTC -CCCTTGTTTTTCGGAAATCTCATCATCATCCCCGCTTGATCGGAGAGAATTGACCATCTT -ATGCCAGGCACGGGCGATCCATCGCCGCCAACGACCAATTCCAACAAGAGGAAGAAGGAA -GAGGAGGAATTCGGTAAACGCATGCCAAACTAGTTGACGGTTTAGATATTCGAAGGATAC -TTCCCGGCTAGCTTGTGCAGACGGCGGAGTGAGGCGGATGCGAAGGATGCGATCGACCAA -GGTACGGTATCGGCCATTCACCAGGAACACTAGGAAAGAGACGAACGCGGCAATCGAGTG -CGATGTCGAGATCATGTCCGTTAGTCGCGACAACATTCGGATGTCCTGTGATGGCTTTGT -AATATCTGTTAATAAAAATTTGCCAATAGAGAGCAATGTGTCGCCAACTAACCTCATCAT -AGCCACCTTCTTGCCCGACCAACCAGCTCTCCCACTTGTTCCAAGCATATCGTCCTCCAA -CAGTCAATAAGCCATAAATACCCTTCTGCCATTTCGTCGGAGGAGAGTAAACAGGTCCTT -TGCTTCGACTATCGGTGTATCTCAGACCTTGTAGTGCCGCACCATATGATGCATTATGAT -CCCATATTGATAGTTTGAAGAGGATAGCCCGTAAGGCGAACAGAATCTCGTTTGACCAAT -CTTCACGCATTTGAGGCTATTATCATTGTCAGTCGCCGGCCATCACAACACGTCATGGAC -TCATTTGCCAAGCATTCACACATACCCCGAAGTATTTGAGGGCATCCCCGACCTGGCCCT -TCATCAGACCAAGCAGCTCCTCGTCCAACAATTCAGCATCTACCTGGCCCACTCGAAATG -CGGGGCGCGTACCCTCCCGGCCATTGATAGAATTCCATAATTGCCACCCCGAACTGGGCA -ATCGGTTCAATGGATATGGCAGACGCTGAACGGTGGCCGGGCTGATTGGAGAGGCCCGCT -GCTGTGCTGCATGGCGGGCGCGCGCTTCGGCCTCACGCTGGCGACGACGTTCAAGAACAC -GCTCTTGTGCAGCGGCGAAATTTGTCGAGCTCATCGCGAAATCATGACTTTGTAAAGGAT -AGGAGAGAGCGGTTTGGGGGTTCGTGAGAACACGGGGAAATGGGTCCCGGAGTTGCTCAT -CGCGCTCGGTTCCCCAGACAGAAGAGCCGAGGTATCTCACGTGACTTTGTCTCCAATGCT -CTTTTGGGAATTGTGAGTAGTAAAAAATATATATAGATTCTTAGTGGCATAAGTGGAACT -ATCATTTATAGCTGCCTAGGTATCATGTTGTATTGTAAAGCGCTGTCGTCGGCTTGGCCT -TCGGACATACCCTTGACAAGCAGACCTCAAGAGTACGCAACTCAACACTATCTGTCAACC -TACAAATATGGCCCCCTAGACCTTCAATAGAAGCCTTGGTTTTGTTATGGTCTACTGGTA -AAATAGGCTGTATTTTGCTCAACAATATCTCTTATGCATATCAGTCTCGGTGACTGAATT -TTATCAAGGCATCCATGACCAAGGCTTATCATCAAATCCAATGTCCGGTGACTGCCTCAA -AGAACTGCAGATTCACACAGATATCTCGGCATTCAATATAAGCCATTCCAAATGATTCAT -GCAAGTTAGCTTGGCAGCTGATTCGTACTGTCTCAACCATATTGCCCAATCAAACCCATG -ACAACTCATGTCGGAGACTGCCCTTGCATTCGGGACCAAGGGGTAATGATTGGGTTCGAA -ATGGCGGCCAAATCGAAATGCTTCGCTGGAATTTCTTATAAATCAAACCATCATCCAGTC -TTAAATTTTGTTTCTCGGTTTAACTAGTGCTTTTGCTTGATTCGTTTTGGTCAGATTTGT -CAAGCTGGCCAGTATGGCACCTATGGTTGACGCATCTATCGAGACTCGGCTGTTCATTAA -TGGTGAGGTATGAAACAACTACACTTAAAGCATGCTTCTTAATACTGACCTTTTCAGTTT -CGAGCATCGTCTAGTAGCAAAACATTCGAGGTGGTATATCCATACACCAAAGAGGTCGTT -GCGCAAGGTACGCCCCACTCTACGCAAAAGCCAAATTTCCACTGACACCTGCTAAATTAT -CAAAGTCCAAGAAGCCGATATAAAAGATGTAGAAGATGCCGTTACTGCTGCTAAGGCAGC -TTTTCCCGCCTGGCGAGACCTGGGCACTGAAAAGCGTGGCGTATACTTGCGCAAGCTATC -CCAGCTCATCTTGGAATCAAATAACGAGCTGGCAAAGCTCGAAACTCTATCCACCGGCCG -ACCAATCTCGCAATTCTTCGACGCTAGTTTCGCAGCCCAGTTCTTCGGGTACTTCGCTGG -TGGCGGTTGGACCGCGCAGGGAACGGCAAGCCTCAACACGCCAGACCACCTTAATATGAC -CGTCAAGCAGCCGTACGGTGTAGTAGCACTTATCATCCCGTGGAACTTTCCGCTTATCAT -GTTTGCCTCAAAGATGGCACCAGCACTAGCGGCAGGCAATACAGTTGTATTGAAGAGCAG -TGAAAAGGCGCCGCTGACGGTTCGTGCAGTATATTTGGAGTATATCACTTGAACATGCGA -ACTGACACTTTGTATAGTCTCTCTTTGTAGCCAAACTTATTGAGGAAGCCGGCTTCCCAC -CCGGCGTTGTCAACATCATCTCTGGCCTTGGCAACCCTACCGGAGCGGTACTGGCATCAC -ACATGACTGTCCGCTGCATCAGCTTCACTGGCTCCACGGCCACGGGCCAGAAGATCCAGG -CGGCAGCCGCAAATTCAAACATGAAGCGTGTCCATATGGAGCTGGGCGGAAAGTCACCGG -CGATTATCTTCGAAGACGCAGACCTGGAGACAGCCGCTGCGCAGACACAGTTCGGCATCC -AGTTTAATAGTGGACAAGTGTGCTCGGCGAACTCACGGATATATGTGCACGAGTCAGTGG -CTCAACAATTCACTACGTTGTTCCGTGAGAAGTTTGCTGCTGTCCGAATGGGTGATCCAC -TCGATCCTGCTACCTCGCATGGTCCGCAGATTGATCTGCTGCAGTATAACCGGATCAAGG -AATACCTTAGTATCGGGGAGAAGGATGGAACACTCAGTCTAGGCGGTGATGCGAAAGACG -GGTTCTTTGTGCGGCCAACTATCTTTGAGGGTGTTGCTGAAGACTCGCGGCTGATGAGAG -AGGAGGTCTTCGGCCCAGTTGTTGCGATCAATACTTTTTCCACCGAGACCGAAGCGATTG -AGAAGGCCAATAATTCAGAGTTTGGCCTGTTTGCTGCTGTCTTTACCAAGGATATTGATC -GCGCCGTTCGTTTGGCTAAGGCTCTGGATGCGGGCACTGTTGGGGTGAACTGCACTAGCC -CGACGGGTGCTATGGATAGTGCATTTGGTGGGCTCAAGATGAGTGGCAATGACCGGGAGG -GATTTTTATATAGCCTTGACAACTTCCTTGAGACCAAGAGTATCTTAATCAAGACAGCAC -GCCTGTAATAGATACCTGGCTCCACCAAATGCATGGTTGACAGTGATCGCGCAATATTGG -GGCTATAGAGAGTTGAGTACATTAACCGCAGAAGAAGAAAACCCCTCTACCGAAGAAGAT -GTAGAATATACCAGTACCCATATGGCATTAAATTAAACCTAGCCCTGGTTATACAAGGAA -GTCATTTTCACAAATGAAGTAATTTCTCTTTGTCCAACACGACATCCTTCTGGGTGGTTG -CTGCGACTCTCTTGTGCATTTCTCCTTGCAAGACACGCTGCCGTCAACAATCCAAGCTCT -CAAGCTGCATAAAAGCAGATTGTCCGCTGTCAATTCTGACCTACCCGATCAAGTGCAAAA -GCTAGTGGGCAACAAGTTTCCATATTTGAGGTCCATCACACTCGAGAGAATGGTGGTTCA -TTATGATGGACAAATTAAGGGTCCATATTATGGGCTCAAAGAGATTTTCGCTGCAAATGG -AATCATTTGTCAGATAGAAAACGGTGACCAAGGTGCTATTTCGAGGATCTATGGGTAAAA -ACAATCTAGATGCAAGAAGATGGACCTTCGAGGTTCCCAAGGACTTCTACTCCGCTTAGC -CGAAGATGCAAAAAATACCAACAATGGAGATGAATGAATAACAGCAAAAATCAATACGGC -GGTCAAACCCACTATTAGAAAGTACTGGTATGCTGAGCATAGGCTAGGAGCATATTTCAG -TATACTGCAGAGGCGATATCAATATCGGGTTTGACATGTACTTTCGCCTGTCGGCGGCCC -AAGAGTGTTGTACGTTCTCGGCTTCATTCAATATGCCGATGCAAATGCCACACCAGCGTG -TTATGGTACATTCTAAGCAGTCTGCTTCGCAGTCTTTTAATAGAAAAAGCAATATTAGGA -TGATAACGCATTTAAATTCATAGCTAAGAATTAAGATATCAGCCCGCAGATAGCCAATCA -GTCAGGATCGCATCTGCAAGTTTCCGGATGATAAGGACCATTGAACCATAGCCACACCCG -ACCGAACAGTAAGAATGGTTGAAAAACATCTTAATCAGAAGCTGTAGGCAGCGATTGCCA -GCAGGACTCCGACCACGACAGTAGAGATGGGAGCCTTGGCAGAAACGCCAGCGCCTTTGT -GACTAGAGTCATCCTCTGTAGAGTTGGTCTTGCTATTCGACTCAGTCTCGTTTGCCCGCT -TGGCGGTGACCTTGTCGTTGTAGTGAGTCTTGGCGTTCCAGGGGTATGGCTTGGTGTCGT -ACGACGGCTCCAGGTGTCCATAGGTTTTCTTGACAATATCCAGGATACCATGACCGGCGG -AGCACTTTTCATTGCTATCGGTAATGGGAACCGGCCACTCGGGAGTGAAAGTGGAGGTAT -GACCGGTGGACTTGTCCCCAATTGACAAGGGACCAGGGTAGGTCTGGTTGAGCGTGAGGT -GGGTTGAGTCGACCTCGTAGTTGACGTAGCCGGTCTTGTTAGCGACAATCTCGAAGATGT -TGGCACCACAATCCCAGCGACCCAGGGAAGGAAGACCCCAGTTGGCGGAGACACTGGCAA -TGCTAGAGTAGTGGGTGTAGAAGGTGTCGTCCTTGGTACCCTTGAGGTGTTCAGGAACGG -CGCCACCAACGAGAACACTGAAGACACGGTTGTCGCCCGTGTAGGTGTCGTCTTCATCGA -AGGTCAAAAGGAGAAGAGTGTTGTTGTAGAAGTATTCGTTCTCGAGCAATTTCGCCACCC -AAGGACGCTCCCACTTGGCAGCAAAAGTGATGCTGGTGTCATGCGCATCGTTGGTCATGT -TGGGAGTGATGAACGCCCACTGAGGAAGCTTCTTGTCCCGAAGATCATTTTCGAAGTCAT -CAAAGCTCTTGATAGCCCGTGCACGAGTGTTGTTGACCACAACGGAATCATAAAGTACCA -GAGGGTTGTGCTTCCGAACATAGTCATTCTTGTAGGTTTCCTGATTGGAATAGTTGTAGC -CCTGGAAGCCGGCATAGGGAATGTGCTCCTGGTATTCAGCCCACGAGATGTGCTTGGTGT -CAAGCACATCAGCAATGGTGGAAACGTTCTCCGGGACCTGAATAAAGTCATCATTATCCA -TTCCAAACGTGTCACCGCCAGCGGCAGCGCAGTAGTTAGGCTCGGAAGGATGGGTGACAG -CATAGTAGTTGGTCCTAAAAAGCATCAGAATGGGTCATCTCAGTCAGCGCCAAAGTCAAT -GGAAACGTACAACAGAATACCCTGAGAAGCAAGCCACTGCATGTTGGTATCCGCAGCAGC -TTTGTCATAGTCCTATGAGCCTGTTAGTCATGTCTCAACAACTATATGAGTTGGATGTTT -TGGGTGTTTGTCACATACAATGTTCTCCAGCCAGACCTGATAGAAGCGATCAAAAGCCAG -ACCCTTAACATTAGAGACGGTGACCTCCGGCTTGGTTGTTGCGGCAGCCGCATGAATATC -CGCCACAGCGGGCTCAGAGGTAGTGGCTGTCTGAGCGACTGCCTGAGCAGCCACTCCCAT -CAGGACCGCAAGAACGGCCTTCATCTCGAATGATCAGGAAATCAATGAAAGATTGATGTA -GCTGATGAACCAAGCAGGCAAACTGAGGAGATTGAAGGTaagaaagaagaaaaaaagaga -agaaagaagaaAGGATCGTCAAGAGATTTATGGAGGAGGAGTCACCACACGTGCTTCTTA -CATTCGCTCGGAGATAAACAAGGTATACCTGACCGGTACGTCAGCTAAGTGTGATCTTGC -TAGCTTTGTTCAAGTCCCATGTAGGTACTATGTCACTGATGTTGAGTCGAAGTTTAGAGT -GTTGTTTGTACGACGTTGGCACATGTCGCGACGGAGACTTCTGATTGGTCGATATTTACA -AGGATTCTAAATGAGGGTTCGCGCCAAGACCAAAACGAATGCTTAGCTGATTTCTTTGTA -GCCCAAGGCAGTTTTGGCGGTAAATGGCCCTAGGCATAGTCCTTTCTATATACACTCGAT -CGACACATAGAGGATATAGTAACTATGAAGAGATGAAGTCTACCCAATTTTTTTGTAATG -TCGGTTAACAAGACCCTGAAGTGGAGACTTGTAGGACGCTAGCCCATGTGGTTTTCTGGA -GAAAACCTTGGAAGCTGAGTCGGATGCTTAGCCCGACGCTTAGTGAGTGGGGAATCGATG -CCCAGGCAAATCACTGAGCTTGGCCTAGGGGCGTGCTGAGCTTCACCCAATGATCAGAAG -GTCATTGATAATAGCACTAGAACAAAGGAATAAACTCTAGGAGTGAAAAAAAAATCCAGC -GACATGTGTGGGTAGTTCATCTAGGCATCATCTTGGGCCTGTCTAGTAGGGCAGTTTCTC -ATCGGAGTAATTGTCGGGGGGTACAAGTAAAACCAGCAGCCATTTTGAAAAGGGACGAAG -AATCCCCGAAGAAACCATATAAAACTCACTGTGAATTTCCTATAGGAAAAGCTTGTAAGG -CGTGTTATCTATCTAGATGATGTTACAGTGTTGGGGCGATTTCGAAATGTATCCATTATA -CTAGTCAGGAAGGTAACATTGCTAAGGCTTATTACTATACATCTAAGATGACTTTCTAGG -GCTGATGCCTCGGTTAGTCGGGTATAAGTCGGGACTTCGACAATATTAATGAAAATTCCA -ATGTTCTAATATCGTACAGTCATCAGGTATCATTTGTGATATGTATCTTTGCGGTAAATA -TTTGGGCTTCATATATCGAGGTTTCGAAGAGCTTCCTGGTTACCACGACGGTTCTCGGTT -TAGATCTTGTCTTCCGGACCACCAATACCCTCGAACTGACCTGCCTTGGCGAGTCTAGAA -GGGACGGAATGTTAAAAATGCGAACAATGGACATACCACAGAAGCAGTATATAGCTTACC -TGTTGCGGTCATCACGAGTGCCGCCCTCATCCTCGGGGATTACTCGGTGGTCGCTGCCAG -AGGCAGTTGAGCCAGGGCCGTACTTCACACCTGTAGGCTGAGAGAATCGCTGATCGACCA -TCTCGTCAACACCAGACTCGACAGCTTGAGGAGTTATTAGTACAAATGATATCGGTAAAG -GCCAACTTCTCGACGTACTTGAGTCACTCTTCTTGCCAAGACCTTGCTTGGCCTGGTAGG -AGTTTAGATCCCTCTCGGCATCGAAGGCTACCTTGTTCAATTCTTCGGTGTTGGGCATGT -TGTTTAGAGTTTCTTCGTTGTTATGATAGAAGGTAATTTGATAGGTGTGTCAAGGTAAAT -TTGAGCTTATAGATTACTATGTAAATTGAACTGAATTGCGCTTTATATACATCGCTCTGG -ATCCCTCTCCACGTCCACATCATACCTACTTCACTTCAGGTTAGAGATTGAGGTTGCATC -ATGATACATCATAGGTGTGTCACCAGGAAGAGGATAGGGACTGTTGTATGTTATAGGTGA -ATGAAATTCTCTTGTTCAAAAACTTGAAAGCTGTCAATTCCAGTGTGATAACATCCTAAG -CTTGGAAGTTGGTCGAGCCTGACTTCATTGTCTCCACCAGGGGGAACGTCAGGAGCCCTA -GCAACAGATCTAAATCGGGACTAAAGTTAGGGTCCAGCCCTAGGGTTCAGCGGGATCGGA -GTATGGATTTTCCGGAGGCGGGGAGTTTATGCTGCAGAGGGACCTCGGGAGCTCCGACGA -GACATGTCAGTTCTTTGATCAACATCTTGGAATTCGCTCAGAGTTCCTTTTCATGAATGC -CTAGTGATTCAGAGCACACCAGTGGAACTCTGATCTGGTGCTTGTTTTAGCAGAACTACG -GGCTTTGCAGGTCTTTTTTTCACGCGTCGGTCAACACTGCTATTCGCCATGGCGTTTGCC -ATCGAATCTCAGGGCGAGACCAACCCACTTAGCCCTCAGAATGTGCTCAATACGTTAGTC -CTTGCAGCGAGCTCGTCTCAGCAGCAAGTTCGAACGGGCACCGAACAGCTCCAGAACTGG -GAGAAACAGGGCATGTACTACTCTTTTCTACAGGTATCGACACTCTTGGTTGTTGTTGCT -GTTTCAAATTAATAACACATTTTTTCACATACAGGATGTCTTCCTTGACCACTCCGTACC -GAATGAAGTTCGCTATCTCGCCATCATCCAACTAAAAAATGGCATCGATAAATACTGGCG -CAAGACAGCACCAAAGTATGTCACCGGGATATAGCCAGGATGAGGACATGCTAACACTCC -TGGAAGTGCGATCAAACCGGAAGAAAAGGAGCATATCAAGATTCGGGCTCTTGAGGCAGG -AATCGTGGAACCTGCGCCGCTGTTGGCTCTCCATAACTCGCTGATGATTGCAAAGATCAT -GCGATTTGAGTTTCCTCATGACTGGTACGGCAAGATGCAGCTGTGTGTCTTCTCATTCTA -ATTTCGATCAGGCCCGATGCTATTTCGTACATCATTTCCTCACTCCGATCCTCCGTCCAA -CCTGGAGCAAATCCACTCCAGCTTCCCCGAACTCTCCTCATATTGCTACAAATAATCAAG -GAATTATCCACGGCCCGAATTCAACGAACCCGACACAATCTCCAATCTGTATCCCCTGAG -ATATTTCAGTTACTGGGAGGCATCTATGTCGATAAAGTCAATCAATGGGGCACGATACTG -GAGCAGGGCGGTGCGGGCGAGGGGGAGTTGCTGGAGACACTCGATTTGAGCTTGGTGGCT -CTAAAGGTCCTAAGACGCTTGATAATTGCTGGCTTCGAACACCCGAGTCGCAGCCAAGAA -GTACAGGGCTTCTGGGTGCTGACGCATTCGCACTTCAGCAGGTTCCACTCCCTCGTGGAC -GGGTCTACCAATCTACCCGAGCCAATTCATCGGGCTATCGAAAAACATCTGCTGCAAATG -TCGAAGTTGCATGTTGAAATGGCCAAAACACACGCCGCATCTTTCGCGTTGCTGCCAGAT -AGTATCTCGCTTGTCAAGTCATATTGGTCATTAGTTGTCACACTCGGCGAGAAATATGAC -AGCCTTGGTGCTGGAGGCGAAGATGAAGGGAAATCTTTGACTGAGAAGACCGGCCTCCGG -GCTCTGCTTTTGATTCGTGCTTGCGCAAAGATGGCGTTCAACCCTGCTCAGAGTTTTAAG -TATCAGACACCGGAAGACAAGGAGGAGCGGAAACAATCCGTTCAGCTCGTCAAAACGCAA -CTGTTTACCGAAGAATTTGTGATTAATGTCATGGAGCTACTCGTCACGCAGTTTTTCAGG -TTCCGCAAGATCGATTTCCAAGACTGGGAAGAAGAACCAGAGGACTGGGAGAAGCGAGAG -GAAGAGAACTCTGACGCTTGGGAGTTCTCAATCCGTTCATGCTCGGAAAAACTTTTCCTT -GATCTTGTGATCCATTTCAAGGATTTATTGATTCCACGTTTGTTGACCGTATTCTATACT -TTCGCAAGTGAGTTATCGATGAGCTATAGTTGTGCATGCAGAACTCTAATATATATACTA -CAGGCACCGACAACCACAATGTACTGTTGAAAGACTCGTTGTATTCTGCCATCGGCTTGG -CTGCGGCCAGTCTTGAGCAGCACCTGGACTTCAACGCATTCTTGGAGCATACCTTGGTTG -CTGAGGTCCAGTCTCAGGAACCAGAGTACAAGCTTCTGAGGAGAAGAATCGCGCTCGTCC -TTGGCCAGTGGGTCCCAGTTAAGGGAAGCGCATTGAACATGAACGCAATTTACCAAATCT -TCCAGCATTTGCTCGGCAAGCATGATCCATTGAATGACCTGGTCGTCCGCATCACCGCCG -GTCGACAACTTCGGAGTGTACTCGACTCATACGAATTTTCGCCTGAAGCATTTATGTCAT -TTGCGCCTACTATCTTGGAGAATCTCATGTCTCTCGTCCAAGAAGTAGAATCATCGGATA -CCAAGATGGGGCTTCTGGAGACGGTTCGCATGGTAGTGGTCAAGATGGAAGATCATGTAA -GTTGTTTTCTCGTGCTTATTGGTGGCTTCGTAACTCATAAGACTAGATCACGCCCTTCTC -TGATCGAATCATTTCTTTACTCCCACCGTTGTGGGAAAGTTCAGGCGAAGAACACTTGAT -GAAACAAGCCATTTTGACTTTGCTTTCATCTCTGATCCAGTCGCTGGGACAGGAGTCTGT -GAGGTATCACTCTCTCATTCTTCCACTGATCCAAAGCTCCGTAGAACCCGGCTCGGTAAG -CCTTCCAGTGAATTATGCTGCACATACCAACTTAGCCTGACCTTTTGCCCAAGGAAACCA -TCGTTTACCTTTTAGATGAAGCATTGGATCTCTGGCATGCCATTGTCCAAGCTACGCCAT -CTCCTGCCTCACCGGAGATCATTTCACTCCTCCCATCTTTGTTCCCCATTCTCGAAGCCG -CAACCGATAGCACCCCACAGGCTATTCAGATCCTTGAATCTTATGTGTTCCTTGCGCCGC -AGGAGATTCTCAGTGACCGTTTTCGCTTCCAAATCATGGTAATCCTTGAATCACTACTCA -AATCTACAACCAAACAACGCCTTGGTGTGATACCTCGCCTTGTTGATCTAATCCTTCGCG -GTGCCGAGACGGTCGACGGCGGTAGCGAAGCAACATATGGCATCATTTCCCAATCCCTTC -TAGACAGCTCACTCCTTGCTTCCCTGCTAGAAGGCCTTTACTCTGCCTACGAAGTGAGTC -AGACGTCTGGTCCAAACAGAAAGTCCTCACCTGTGGTAGGTGTCGTCGAGAGCGATTACT -ACTCTGTGTTAGCCCGCCTTGCGCTGGCCAGCCCTACCATTCTAGCCTCATCAGTCGCCG -CTGCGACTAACTCATCCGAAGAACAATCCTTGACTTGGATCTTGACCGAATGGTTTTCGC -ACTACGACAACATTGGAAGCATTAACCAGAAGAAACTTCACGTACTCGCCCTCACCCAAC -TCTTGGCCCTACAAGGAGCGCCACTCGATCCGTCTTCCCCCCCTCCACCCCCTGCCTACA -TCCTCTCCCACCTACAGTCCTACTTGAGTGTATGGACAGATCTCATCACTGAATTAGCAG -ACGGTAGTACTGACCCCAACGCCGATTATTTGGTCTGCTGGAATGCACCAGCCGGGTCAG -AAACCGCGATGCCAGAAAGCACCCAGGAGGTCGAATCGCCTGAGATCATCCGTCGCCGTG -ACTGGCAGTCTGGGGATGCTATCCATCGATTCATGATCCGAGACTTTGTGCGCCATCGTC -TACAAGAGGTCATTGCTGGCTGTGGTGGGGCACAGCGGTTCCAGGATGAATGGCTAGTCA -ACGTTGACACCGAAGTTGTTGCGGCGTTTGGAGCACTTGGTTTGCTCTGAGTCAAGGAAG -AGGATCTTACTCACTGCTACATCAGCATAAGCATTTATTCCATTTGCATTAGCATCATTC -TGTTGTACATGCTTATAACACCCCTCCCTATCTTGTTCCACTTGCATGACCATCAGGTCC -ATCCATACCTGTTCCACGCTTAGCACTCTTGGAATTTCAGCTACATACCCAATAAGATAT -TTTCTCAAAGTCTCTTGTAATCGCTCAATTATAGGTTTCTGTGGTGCTTGGACTTTTATA -CATCGTGAAGGATAAATGAGTTGCTGGGATTCAATGGCTTTTTTTTCCACCTCTACGATG -TATCCATTTCTCGCAAGGGGAGAGAACACAAAAAAACATAACTTTTGTCTTGCCAAAACA -CCTCCTTCTCTAGAGAATACTGGGGGTGTTCTGAGGTCATCGAGTGATAAAGTTAAGGTT -GCCCGGTAGGCTCCACGTGTACCAAACCTTGCAGGGTTCCAAGGTTGGGGCCAAAGTAAA -ACTGCCCACGTGGCTATCGTTCAGTACGAAGAGCAAAGAATCTTCAAAGAAGCAAATAAT -ATCAGTTGAATTAGTTTCACAACCTAACCAGCAAGCCCTAAGAATGAAGCACACAATAAC -ACCTTGCAGAACTTGCTTGCTTGATGCAAGCCCAACAGACAATTAAACAATATAAATCCG -AGCTGGACAAACAGTGACAAAGAAAGACTCGAAAAACTTCAAAATAAGATGCATGCTCCA -CGCAACATTTGAAAACCAAAAAGGGCAAAAAGGTACGTAACTAATAACGCTCAGCACTGG -CCGGGGAATTCACGGCTAGTGACAGGGTTGGGGAAGACACCACCAGCGATTTGCTGCTGC -ATATCCCACTCAGGAGCACCCTCGGTCTTCCAGGTCCAGTAGAGCCAGCCGGTACCTTTC -TCATAGGCATCGAGCTGACCTTCAATGAAGCGACGGAGGTTGCTTCGGTCATTCTCAGAC -AGAGCAGCCACGGATCCAGTAGACTTGCCCTCACAGCTGCCGATGTGGCTGCTACCGGAG -AAAGTACCATCATAGCGGGCACCGATACCCTTTCCGTTCAAGTACTTGGCACAATCAGTC -ATGGCGCCAGTCCACTCGCCAACAACAGTCCACTTATCAGAGGTCTGAACGTGGTCTCTG -GCAAAGCTGCAGACGTTGTTGACGTGGGATTGGGTATCCATGGCGAGAAGACCGCTGTCA -AAGACTTCGTAGTGGTGGGTATCCATCATCACATACCAGACACCGGTAGATTCGCTCATG -AAACCGTTCCATGATTCAGTAGGGACGAAACCGTCGTGCAAAACGAGAGTGGTGTTCTGG -CTTGCCTTGCGGATACGGCCCCATGAATCATAGTAGTACTGTCTCAGACCATCCTGGCTC -ACTCCACCTGGGATGCTGGGCTCATTCAAAGCCTCGATGGCAGTCACGACGTCTCCGTCA -CCTTCGTAGCGTGCGGCAAGAGCCTCCAGGGCATCCTTGGTGGTCTCAACAGTATCACCC -TGCTGCCATGTAATAGCGCCGCGCTTTCCACTGTTGTCGAACCCATTCTGAGAGCCAGGA -GCTGGATTGTTGGTCAGTAGTTGTCATACAAAATATAGTTGCATAAACGTACCTCCGTGC -AGGTCAACCATGACCTTGAGACCGGCGGAGCGAGCCCAGCCAATGGCCTGGTCAAGATAC -TCAAGCTGACCATTGACGTATGGGTCACCATCCACGTGCTTCAGGGCCCAGTATCCAACA -GGGATTCTGACGTGGTTCATTCCTGCGCTAGCGATTTGGGCCATGTCGTCGGCAGTGATG -AAAGTGCTCCAGTGATCGGAAAGAACAGAGCGACATCCGTCCGCGCCCAGAGTTGCGCAA -AGCGACCACTCATCAACGGCAGAATCACCAGCTGCGTCAAAGATGGATGGAGTGATCCAT -GGCTCTAATACGAGCCAACCACCGAGGTTGACACCCCGGATCTTGCTGACACCGTAGTCG -TAATTCAGACTACTATACCCCGATGTATGTTAGTATGTGTATAAACTGGGCACAATAGAA -ATGTGATTCGGATAGAGAGGTACCACTTACACTGCGGGTGTGGCCTGCACCAAGGCTGCC -AATAAAGACAGCCCGAGCAATGACTTCTGCGAAAACTTGGACATGGAGGGCATATTGACA -ACCCTTACTTGGTCAGACCCAGAAAATCAGGAGGCTGGGGTtgaaagatgagaaatgagg -aatgaagatgaggaatAAATAGATTTGAGTTGAATGACTAGAGGTCAGCTACTATAAGTA -ACTCGACGATGGGTGGCTGATCAAGCCGGATGCAAGCAAAGAGTTACTTGGGCGAGAACG -GAAATCCCCACGGGGCAGAGCAATTGCAGTAAAGCGTACCTGTATAGTATTGTTCTGAAT -CAAGAAACAAAGGTAGAGTGGCCGAAAACGGTGGTGGATCAGGGTATGGGGTTTACGTAT -AAAGGCACAAGGGCTTTGTTCTCTTGGAAGGGTATATCGGGAAGCGGCTTTGATATCCGG -GGTGGAACTGGAGACGTGGAGACTGTGAAAACGTCTCAACTTCTTCAATTGGTGCTCTAC -TTGTTCTAGTTGGAAACTGCCACCTAAAGCAGTAATCTGGGGAAGCTACACTCCATATAC -CTCCATATACGTCTATTCGCCCCTACTCTATACCAATTGGATTTCTCCTGCCACCTTTCA -ATTCGATTCTCATATTGACTGGATACTTCTAATACAGACTAGACGTATGACATATTTAAG -GGGAAATGTGCCAAATGTTTGCAGCAAATTTTCACATTCTTTGGGGTGATGTTGATCAAC -ACATGGCACTTGGACGCTTTTTATGGAATGTAATCTGCTGGCTTCTGGACGTCCAGCTGG -AATTATATTTACCATGCTGGTTTAAATATGAACTCCAGTCATCTCGGGTCATACAGATCA -AGGTACGTTGTTTCACGGACCCGTAATATTCTGGGCCTGAGAACGTACATTGCTGTATTT -TTTGATGCTCAAGCCTTTGGAAGATCCAAGGGTTATTCAAAATCTACTATACTGTCTAAT -GCACTGGGAACATGGAGTTGGTATAATCAAGGCAAGGCGGTCGAGGCCTATCGTCCTTGA -TACTTCCGTTTTCGGCGAGTGCATTATATCCGAGACAAAAACAACCAAAATTTATGACTT -GCATTGTACCCCAGAAAAGTTTAATGTTACACTGCTCTACTCTCACATCCGTCTATTCAG -GGCCATTATCCCGTCAAGTAGGGGTCTAGACTTGCGCCCGGGATTTGAATTTGGAACCCT -GAGATCACAGGCAGGATTTCTAGGCTGGAATGTGAATGTCAACATATCGGAATGTACATC -CACATGTTTCGCAGGTCTCGGCACTTGCGGTCGTTCAGCCAAGTCACTGGACTTTTAGGA -ATTAAGGTAGCTCATCTCAATGAGGTTAGAACAGAGCCAAAACTGGAAATTATCTTCAAA -AGTAACAAAGGAATTTTCGATTAGCGTCTAGACCAGGCGCTCATTTATAGCGGGCGCCGC -GATTTGGGCTGAAGCATACACCTAATTCGGTCCCTCGTTACTATGAAAACCTCTCTGGCC -TTGACATTTTGACAGCTGTCCGATCGGAGACCAATGTATTGCGAGAATTTATAACGTAAA -AAATGGAAGAGTTGAGATTTCTGTACAAATTCTATACTACAAATCCCTTCCAGAGAGCAT -CAATTGTCTCTAATTGATCTCGGATTGAGCCTCGGGGACTGCCGCATCCAAAGGAGCGGA -AAGTCCGACCCGGTCAATAGTGCGAGTGTAAGCAACACCGCCATTGACTCTGTCCTCGTG -CACAACCAAGTCCTTGCGGTTGGTGTCGACAAGAAGACGGATCAAATCCGGGCGCATGTA -GTGACCGCCGAAATCGGCCAGAGACTTGGTCAGATGGATCTCGTCGAGGTCAATCTGTTT -AGAGGACTGATTAGTTTGGCATTCTGGTCAAGACAAAAGCATCATCCACTTACGTCAACG -TAAAGAAGTCCCTGGAAGTCCTTGGCGGGGTGAGGAACGAGGTTCTGTCCATCGGGGCCA -TAGATACGACCGTTTCCATTGTAGATGTTGGGATCCTCGAGCTCCTTTCCAGGAGGGGTG -TTGACCTTGATTCCCTCCTCAGTGATTGTCTGCCAAGGTGCAAGAGTGAACGAGCCGGTC -TCGATCGCATAAGCAGGGGTAACAAGCTGAGCAGTCTTGTCAGTAATCAGATTAACTTAT -CGCATAAAGAAAAAAAGGGGGAAATATACATCTGCGTTTGCCTCAGCCACATTGGTGTAT -GGGTCGGGATACTTGAGGGTCTCCTTTCCGGGATACAGAGGCCAAGCAGCGATGTGCACC -TGTTCGCCGAGCGAGGCGGCGTAGGCCTTCATGAAGGGGTTCATGTTCTCCCAGCAGTTC -AGATGTCCAATGCGGCCAATCTCAGTGTCCATAACTGACTCGGTGGTGTCGCCAGTGCCA -TCACCGAAGACAAGGCGCTCAACGTGGGTGGCCTTAATCTTGCGACGGTGGTTGATGACA -TCTCCAGCTGGGCTGATCATGATTTGCGTGGTGTACAAGCTGGCGAGATCAAGCTCCGAG -TATCCAAGTGACACCCAAATCTTGTTGGCCTTGGCTGCCTCGCGGATGCGGCGCATCTCG -TCCGAGTCGGAGGCGAGACTGTTCTCGCGATACTTCTTCAGAAGGGGTAGGCTCTCCTGG -TAGTTGACCTTCCAGGCCCAGTAAGGGTAGCCAGGGATCCCTGTTTAGATTTATTAGTAC -ATTGTACCCGGTAGCTGGGACTGTGGTACTAACACAGCTCGGGGAAAGCAATCAGCTTGC -AGCCCTCCTTTCCTGCTTCATCGATCCAGTGGATAGTCCGGCGAACGGATTCCTGGAGGT -CGAACCAGCCAGGCTCAGCGTTGACTGCTGCGGCCTTGTACTTCTTGAGAACGGGAGCCA -TTGTGAGATGTTGATTGTACTGTGACGAGTGTTTGCCTTGAAGAATGTTGTGTAGAATGA -AAGTCTCTCTTCAAGCCTGGGAAGAACCTCAATTTATAGCAAGTCGAGCTGTCTTGTGTG -TTGGCTTTGAAACAACGCCCTGCACCTCCCAAACCCCGAACTACAGAATGAGCAACATCA -TGAGATTCGGATGCTCTCGGATTCGGGGAATGCTGAAATCACTGAACAATGGTCATCATA -GCAGTGAAGCCCAACTCTGAGCCACGGGAAATGATAACGGGATTTGCTTCGATGTGGTAT -CATGCCAGAGGTGAGTTCCCGGAAGTTCGACATTACGCATCAGTAAGCATAAAGTACCTA -GCCCGGGATCTGTTAGGGTTGTTCTAATCACACCATGATAGGTGGGCCTTATTGTTGCAG -AACTTCGGTGATCCGCCCTAATCCCGGGGCCCTACAGTGCTCTCCCGGATTGTTCCACCC -AAATCCCGACATCAATTCAATCCCGACAGAGACGATCAGTCACCATTCGAGAGAAAACAA -GGGAATTGCTTTGTTTCAGTCTTATTGCATCCCGAGGATCTTGCATCTCGATAGGATTTG -ATTAAACAATCGTATATGACCAATCGCCGCTCTCAGATTCATTCTAGACACAAATATCTT -AAGTAGGTCATTTTGCACCTACGCCGTCATATTTCAGCTAATGTTACACTTTTCCTTAGT -TTTTCGGTCACCTTCTATTACTGGGCTGGATTTATCGTCGATTCCGAGGTAATTGAACGT -TGGGTGCTGGTATGTCATCCACAAGAATCGGTAGTACTAATGTCGTCTGAGTTGACTAAT -TAATGAATAGGCCTTTATAAAAGGTTTAATGACTTTACTCAAACATGTCCCAGCACAAAT -AGATGCTAGGCAAGGGCTTGAAGGTCGACTAAATCAAGGACATTCGAGTTTCAAACCTGT -CTACGAAAGTGAGTATCGCGACAGTGACTCCCGTAGGCGATTTGATTGCTTATTCTAAGG -ACAACCGAAATATGTTCCTCTCCTCAATTCTGGCCTAAGTGCAAATGTCCGAAGAAACGA -CGATTGGCCAGTGTAACACCGCGGGCATCAAGCTGTATAAGCCTGCGTTCAGCACGTTGT -CTAACATCGAATCCTGTTGAGCAACTTTTCTATTGAAAAAAAAATAAGACACACTTAATA -GTATTCATGACAAGTCCATTAATTATCTGTCTCTCTCCCCCATATATCCTTATCGTCGGT -CGCCGATTGCTTGGCGGTGGTCGCGTTTTCAATCATTTCATCAATCCCTTCGATATATGA -CAATCGGGGCAACTTTGTATCCTTTTCAGCGAGTGAACTCTCCAATTTCCTATCAAAAAG -ATGGTCCGGAAGATCCTTGACTTGCATTTTTATCCCCTTCTCAAATCCATAGGTACAAAG -CGCTTGCAAGTTTGTTGGGAGATGATCTACAATAGCAAGTGATGCATCATCGGTCTGATC -AGCACCAATTCCTCGCGAAAGGTAGTAGAGGGTATGGATTCTTGACTCAGATTCTTCGGT -TTCGTGAGCCCACGAAGAGAGCAAGGCGATGAGTTGACCACCATGCTCCAGCTGGAGCTC -TTGGGATTCATCCGCCCACTACTGCTGATAGGCAAGCTGGGATCTCGGCGATAGATTATC -CATGGTGCCAAAGAAATATTTTCCAAAGTAGGGTCGGATGTCTCCGCCAGTGAGGTTTCC -GCTTCAACATTCAAATCCAGATGTACCGACAAATCAGCATGCAAGAGAAGCGCCCGGAAG -ATGTGATCCGGGTAGGGCGTGTTATAATGCCATCTATCGAGCCTCAGCCACCGATCGCAT -ACTTCAACTACTCCAGCCGTTTAGATGACTCAATGACCCGCATCAGATGGTAATACTCTA -CATTGGAAACCCAAATGGTTAGCCAAACGGGCGAGACCAGGCGTAGTCTGCCCTGTTTTC -AAGCATATATGATCATCCAGGTAGGGACGCAGAGGAAAGCTGGGTGACCAGAAAGCAGTG -TTTCTTAGTCCTGGTAGATGTTGAAGGAACGCGTCTCTTGGCTCTTAATCCGGCGCGGCA -GACGATCCTTATCAATCTGTTTGGGAAATTTGAGAACGACCAGAAGCTCTCTGTTTTGCG -AGGCTTTCGCATTAGGGTGCCAACCCGACAATTTTGGATTACTACACCTACACGTTTAGA -TCCCATTTTTCGCCTTTATCCAGAGGTAAAATTTCTTGTTTCAATCAACAGATAGTAAAT -CAATACCAAGATATATAGATCACGTTTGAAACCGCGAGAAAACGCGAGAAAACGCGCGAT -TTAGAGATTTCTTAGACTATAGAGGTTTATCTAAAAAATGGTGGTGCTCCGGCATAGATC -GAAAATATATATCTGATGTTAAAGTATATATGGATCTTTTTCCGACCTCCACTGGTTGGT -TAGTTTTCATCTAAAATTAGAGCATACACTTCAATAGTAATGAAGTTCCAAATTCTCAGT -CTAAGGTCTATACAATTAGTAGGAGGGTATCCAGTTGAACCCCCATGGGGGCCTTATGGG -CCCCCACGAATGGGCTGTGGGTTGGGATGGGCTTTGCGATTCACTAGTTCAAATTGGGCT -TGTTTAGACCATGTTTCGCAATCGGACTGTCCCGTCTGTATAGCATTAGCTGTTGTGATT -GGGCCCGTCACAAATTCTTGATTGGAAAAGCACTGCCATTGTTACGTATCttgttttttt -tttttttttGCAGAAAAGATGGCCGAGTTACCAACTTCTCAAAGCTATAGATTGGCCTAC -CTTACGTACAATACGCAGGTGATCTGGCACTTATAGGTAGTCTTAATCGGGGCTAGTAGA -ACCAACTAATTGACCAACTGACCTACTATTTGAGACTGGTTCTAGATACTCCACATTTGT -GGTCAGTCAAATTTTCAACGGGATCAACATCCCGGCAGCTTTACAGTTCCTCAACGGCAG -TGAACAATGGGCCTAATGTAGTTATGTTGTGCTCGTTATGGCGAGAATTGAGATGTTATT -CTTGTTGGTGAGCCTGAGATGGGATAAATACTTAACCAGGAAGGAAAAGCCATAGTTAAC -TAGATCGAGTTCGAATGACATAATGGTCTAGTCCACCGGGATTTTCCTAAATCAAGCAAG -AATTCCTTTGCCTCAACGCTTTGCTATTCCGGTTTTCTATCGCACCATTCTTCAAGTTCA -GATCTGATTGGAAGATCAAGTGGACTTCGGGGCTCACCGTAAGCCCCGCTCGTAGAACGC -GGGGAAGAAACGTGGGCGGAGCATCCAAGCCCAAAAGACAAAAGACACCGTCATGAGCTT -AGGCGGAGGCAGGTTCAACGACTCGCCAATCCAGAAGGGGGGATCGTCGCGCCAAGCCAG -ACTCGAAGGGTCAAGGCGGTCTTTTTTACTCTTAAATTGAAAGCTGATACAACCTTAAGC -TACCACGTCAGAGACAGTAGGTGCAACAGTCATCCGTGAAAGGTGAGTGAATGATAGATC -GTATGCCATGGCCAGACAGACCTATTTATCAAATATCCCGCTCAAGGTGGAGACGCTGGT -GTGCCATGCAATCCGGACGCAAGGGATTCGATAGAAACGCGATCCTTGCTCGTAGCAATG -CGAGATCAAACCATTCCAGTAAACATGACTTCATTCTTAGACTTGGCCTGGGCATACGGA -TGAGCTGCATATCTCGGCCAACGGATGAGACAGCAAGCAATACTATTTCCCTGGCACGTG -TGATGCATGTATCTGGTAAAGGTAGCTCGGCTATCGTCCGTGTTTTTGCGATCGGCGTTC -AGTTCTCGCGGTATTGGCCAGAGCAATCGGAACAGACCGAACATTCCGTCGCGTGAGCAA -ATGCCCAGCCAAGTTAGGATCATTATTTTGCTGACGAGCAATGCTCCACGGAATATCCAC -CACTGCCGTGATTTGTCACTCCCTAGAGAAGGGACACAGGAGTCTAGGAGTGAAAACCTC -GGGTCGGCTGTGTTCGGATAATTTTGGTTTGTCTAGAGAACCTACAGCTTGAATGCCAGT -ACGACATAGCAGACAGCTCACCGTCTGTGACGGGTTACTTGTTGACTATGTGCATATCTA -CATCATAGCTGCCTGCCTTTTGGCTGCTGTACTTGACTCCTGCGCTGTCAATAACGCCAA -GCAATGACTAATTTTCCCTGCGATCAGACACCGACATGCTCGCTGTCCCTAGTGAATCTG -AATTTCTGGGCATCCTGTGTACTTCCTGCGGGGTTAGCCCTTGTGGAGACGTCTCTGCCA -AGTGGGCATGTTGTATGCAGGGAAGCACCGTGCCATGAGTCTGTTAGTCCGGCGTACGCA -GGGTGCGATGTATTTAAATCCAGGGACCGGCCCAATCATGCTCAGAATGTGATATTTTAT -TCCGAAAATCTTTGGTAATATCGCTGTGGTGTGCGTATACTTCTGTCGAAGATCTGAGTA -CACATCCGATCAACATGCATTTGGCTCGGAGCTTCGTTTTGTCGTGCCTGGTGGCAGCTG -GTCTAGCTCTGCCTGGTAAGGAAAGTACCAAGCACCAGGAAAATAAGGCTCGCAGTGCGG -GTTACTCTCTGAAAGAACCTCCCTTGACAACACCATGGACTGATAAAGTCGGAACGAAGC -CATGGCCCGAATATCCGCGGCCATTGCTGCAGCGCTCTGAATGGAAGAATCTCAACGGGG -TGTGGAAGTACCAAAGTGCTCCGAGCCTCGATGCTGTTCAGCAACCACCTTTTGGACAGG -AACTTGCGCAGGATGTTCTGATCCCGTCGTGTCTTGAGAGTGGATTATCCGGTAGGTTTT -GCCTTGCTTGCCACGTTATCAAACTTCATTGACAAATATTCGCAGGAATCCAAACCGACT -CGGCTCTGTACTCTTGGTTCTCAACGTCATTTGAAGTATCTTCTTCCTGGAAGGATGAGC -AGGTACTGCTCAATTTTGGAGCGGTAGACTATGAAGCCACAGTCTTTGTCAACGGCAAAG -AGGCAGGCTTCCACCGTGGCGGATACTTCCGCTTCGCCGTAGATGTGACGCAGTACCTGA -AGTTTGATCAGCAGAATGAGCTGTGAGTAACTCGTGATCTTGACACAGTTGACTTCCATA -TCTGACTTTTGATACAGTCTTGTCTTTGTACATGATCCCACTGACGATGGGGACTATGTC -ATCCCCATTGGAAAGCAGACCTTGAGACCTAGTCACATCTTTTATACCCCATGCAGTGGT -ATATGGCAAACTGTTTGGCTCGAGGCTTCACCAGCCAACTACATTACTCAGCTCGATCTT -GACGCAAACATGGATGGCCAAGGTAAGTGAGAGGTGACACTCCGGGGAAAAGTTTGATTC -TGACTGCATGTAGTCAACATCACTGTACACAGCTCCGCTACGGATGAACAGGCCCAGGCT -GAGGTAATTGTCCACAAAGATGGCAAGACTGTTGCAAAGCATCAAGGCCCCACCAACAAG -CCATTCCAATTTACCGTATCCTCGCCCAAGCTCTGGTCCCCGAACTCACCTGAGCTGTAC -GATGTGACTGTCAAACTTGGGAAGGACAAAGTGGAGAGCTATACTGGCTTCCGTACAATT -TCTAGGGGTAAGATTGACGGGGTTGAGCGACCATTGCTCAATGGAGAATTCATCTTCATG -TTCGGAACCTTGGATCAAGGCTATTGGCCCGATGGATTATACACACCACCCAGTAAAGAG -GCGATGGTGTACGATTTGAAGATGCTCAAGAAGTTGGGCTTTAACATGGTCCGCAAACAT -GTGAGTGGCGCTGTGTTACCTCATATACCATGATCTCCGTGGCTCACACTGAAAATTAGA -TCAAAGTCGAGACCGATTTGTTCTATCAAGCATGCGACGAGCTCGGTCTGCTTGTCATCC -AGGACATGCCTTCCCTACGTCCATCGCAGTCCAAGAGGGGACCAGATGGCAACTCCTATA -CCATCCTACCGGACGAGAAGCAACAAGTCGAATTCGCTCGCCAGTTAGATCTGTTGATTA -ATCAGCTCAAAAGCTTCCCCAGTATTGCAACTTGGGTAAATTAATGTTTTCGGTGAAAGC -CAGGAATATGCTAGCGCTGACCACTATCTATAGGTCATTTACAACGAAGCCTGGGGCCAG -ATCACAGACTATTACCCCGAGTTTGAATTGACCGACAGGGTTAGGCAGCTGGACCCCACT -CGGCTTGTGGACTCGACGACTGGTTGGGACGACCACGGCGCAGGAGACTTTAGTGTATGC -ACCCTTCCCTGGTAGCTCACCCACGTGGCTAACCAAGATCTAACAGGACAATCACCACTA -CGCAAACCCACAATGCGGCACTCCCTTCTATTCCTTACGCTCGAGCCCATATGATCCTAC -CCGGATTGGGTTCCAAGGTGAATTTGGCGGCATTGGAACTAATGTCTCGATTGAGCATCT -CTGGAACAACCAAGCAGCCATCGACACTATCGACCAAACATACGAGATCGACGAAACCAT -CGAGGCATGGAACTATCGCAGTCATCTCCTCCTGAACGAACTCGAGGATCAGGTCCGCAT -GTACGCATGCAGTGGGGGTGTGTGGACGCAGACAACCGACGTTGAGGGTGAGGTCAACGG -GCTGATTACCTATGATCGTCGCTTGGCACGAGTCGACGTGAAACAGTGGCAGGCAGATAT -TAAGGCCCTCTATGATGCTGCCGCCGCACGGAGCGAAGCGGATTGAAAAGCTTGCTGTAC -AGTCTGGGCCATCCGAGGCTATCCGGATGAATGTGATGAAGATGGAGTATATAATCTTAC -TGTGGCTCCTGTTCTCCAAATGGAAACCCGTCACAGCAAAAATAAATGATCCTGACCGTT -CAAAAGCCCTAGAACCCAAGATCGGCAATCTACGACACGCATTCGTCACTTTCTTCAACC -CGCCTGTTTCATCTTGACTGGTCGATCATACTTCTGACAGCTGTCACCAAATCTCCCCAG -AAAAACAAATTAAATTGACTTCCAGAACCATGGAATCAAGCCGTTAGAATCATGGTAGGC -GGCAGTCTGCTGAATCACCCTTCCAAGATAGGCCAAGACTCCTGTTCTAGACCCAAACAG -TCGGAATCATCGTCACCAAGGCGGCAGTGATCGTCTACCGCGCTTGTTTTTGAAACGTCC -CAGATGGAATCCAAAAAACAAAACCAAAAAAAGGAAATGCCGCTCTAATCTATTCGGAGA -AATAAAAGAAAGATCAAAGCTCGAGGGATATGGACTCAAACTATTACCCCTTTCGCTCGA -GTGAAAAATTTTTCATTATGGGGTTATGTATTTTAGTCGACTGATCATGCCACACCCACC -GATCTAGCCCATCATGCCATGCCATCACACGATGTGGACGGAGTAGATTTAATAGATTTA -TGCGGGTAGATGGCCCATCGTAAATGGTTCTCCAATTTAGCGTTGCTTAACATGCATGTG -TTTCCTAATTTTTCTACGCCATCGCTCGTTAGCTCCGATTGCCCCTATTGCACATATTGC -AAATTGGGTATTCCTTCAAGACCCACACATCCACCTCTTTTCTAGAAGCATCAAAGTATT -CAGGGGGCGTGGTCTTTTTTGTTGGATCCAATTGTGTATATAGAGATGCTCAAAGTTCAG -GTTGATCTCATGTAGATCTATAAGGGTTCAAGTCTCAATGCCCGATCTACTGATGTATAA -TACTACGTCCTAAGCTACCCGAATATCATCAGGTCTCAAAGTCGGACCCGAACGGAACCC -ACTTCGTCGCGGGTCCTCAGCGAAACCGTTGAGCCCGTAGCGGAGTTCCGAGTGTCCGTA -GCCAGTGAGCGAGAGCGAGATGCAAATGCCGAATTATTTCAAGGTCAATTTAATTAATTT -GCTTTGTTGGCACGGGCTATGGTTCACGGAATCAAAATTGAGGATCGTGGTGTTCCATTT -CCAGCATCTCAAAAACGCCATCTGATCGTACATATATCGGGATGACATCGATGAACGGAG -GTCTGACAGCCAGCCCACATGCCTGTTGCCTGTTAGGTGCAGCATTCGTCACGTTTCTGG -ATCGCTTGTTTCAAGATGCCAAGTCTAAGAGGGTGCCACCTATGAACGCTACCGTCCTTG -GCTTTAATTTTTTTTTTCTTAGCTTGGAGATTGCAGGGATCAGCCCACACCACATGCCTA -GATAGTCACTCCACTCTAGGTACTTGACTAAATCATCTCACCCTGTGGTCTGATTCGACC -ATGGGCACTCTAATTGCTTATCGAAATGAAATAGTTATGAAAAACCTCGGCTTGATATTA -CTGTGCTTGCATGAAATTGGGTAAGATCGCTATTCTAGGTAGTAGTCACGGACCAGCTGG -TTCCAGGATTACCCAGAGCGGGTTCCGAATAGGATGTGGTTTACAGTTTAGTGGCTTCCA -ATCTAATCGTAGTGCATTTCAAAATATATGAACGGTGAACGGAGTATAACGGGTATCGCG -CATCCATGGTATAAATACTCGATCCGCCAGCCTTGCCACGAGGTTGCCCCAGTTTTATAT -TCTCTCTACTCGGTTTCCGATCTCCTCTCCATACTTCTATTTTTCCCCTTACCCTTTCAA -AGGAAACTATTTAGTCAGCGACTGCATACCAAGAGGTATCAACTTCAGAAGCATGGAGTA -CAGCACAGCATCGGGTGTTGAGCCCATGCATGTCGACCGCAAGCTGCTCTACACAAGCTT -GGAAGAGCGAATCAAGTATCTTCACCACTTCCTCGACTTCAACTCAAGTAAGTTCTTACC -ATAAACCCCTCTCATGAGTCATAGCCACTTACATTTCCTCAGGTGATATCGAAGCACTCT -TGACCGGCAACAAATACATCAAGCAATTGATCCCGGCAGTGGTCAACCTCGTCTATAAAA -AGCTCCTCCAATACGACATCACATCTCGGGCCTTCCACACACGGACAACAGTCGACGAGA -CCGAGCTCGAGGAAGACTATCTCCATGAAGAAACCCCAAAAATCAAGCGACGCAAGATGT -TCCTGCGGTGGTACCTGACGCGTCTATGCCAAGACCCGACCACAATGGACTTCTGGCGGT -ACTTGAACAAGGTTGGGTAAGCTCACATCCCCCTTTCCTGTTCCTAGATTCCCGCTCCTA -GGTTAATTTTCCGTGCTTGTGCAACGCGCTAACGAGGGGGTTTGTTACACAGCATGATGC -ACAGCGGACAGGTCCGCATGTCACCGCTTAACATCGAGTACATTCATCTCGGCGCATGCT -TGGGGTACATCCAAGACATCTGGATTGAAGCAATGATGTCGCACCCGCACCTGTCGCTGC -GACGCAAGATCGCGCTCGTGCGCGCCGTCAACAAGATCCTCTGGATCCAGAACGATCTCT -TTGCTCGGTACCACTCGCACGACGGTGACGAGTTCGCTGACGAGATGTCTGACTTCGCCA -GCTACGCTGAAGATCAGGAGGGATACCTTGGCAACAAGCGGATCCTTGGCAGCAGCTCCG -GTAGCTCAACTGAAGATGACCGGTCCAGTATTTCTAGTGGTGTGGCGCCTTCTATCGACA -GTGCTGCGCCGTCGACGATTGGAAATGCGTCAAATTCATCCAAGGTGTCGGTGTGTCCTT -TCGCAGAAATGGGCAAAGCCAGTAATTCGACTGAGACAAAGATTTGGGCCAATTGAGGGG -CTCCCTGTGTTATTGTGCGGGTCACAGTTTGCATCTTTCAGGCGCTAGGGGTTATGGGTT -TTGGTGATGGGGCTGGACTCTGGAAGTCTGGGTTTTTTAGGAAATACCTTTGATTGGCAT -AGACAGTGTTGGAAAGTAATTCAAAAAAGAACCTTGAATATATTGACATCTTGGCCTCTT -GGCATCTATCTCATCTTCATCTCGGCGGAATGCTTCCGGCTTTGTCATCTTTTTTTTCCC -CCCCCACAACATCCGCCGGCTCTTGTTCAAGGTAGTTGAAGAAGGCGTTAGCAGTTTATA -TCCGCAGGCAAATCGGAGATAATTGAGTAATTGTATAGCCCCTTCGGATATAATCGATCT -ACACAGGACCTTAATCCAGTAAGACATTGAGCTACAACATCAGACTCTACCACCCCCTTT -GTGTCCCCACCACCTCCGCCTCTCTCTATTTGGTTAAGATCAGAGGACCAGAGTTTCTCT -TTAAACCAACTCGGCCCCTTCCACATAGACTTGTCCCACCTCCGTGTTAAGATATCTGTA -TTATGCTTATTCAACTCGAATCGAAGGGGGTGACCCCGCTAATAATCAACAATGAGTCCG -TTGAGACCGATATCAAATTTGCGGTTCACGCCCCCGCCACTGGCGAGCTGAGTGGTTATT -GTGCCGGAGTGTCAGTTGATGATGCCAATCACGCAGTCGACGCGGCCCAGGCTGCCTTCC -CCGCATGGAGCAAGACCAAGGCAAACGACCGCCGCGACATCTTGTTGAAGGCCGCCGACA -TCATGGCCTCTCGCAAGGAGGAGTTGATCCAGTACCAGCGCGAAGAGACCGGTGCCGGTC -GGCCTTTCTCAGAGTTTACCTTCAATATGGGCGTGCTCTTCATCAAGGACTTCGCAGCCC -GGATATCCTCGATCGAGGGTGTCGTGCCGAATGTCAGCGGGGAGGGCGCAGGAGCCATCG -TATACAAGGAGCCGTACGGAGTAATTCTCTCAATTGCGCCATGGTAAGCTTCATGATCCG -ATCTGTGACATACACCGCAAATTATCAGCTTGCTTAAGACACTTTTTCTAATTGATTGGC -AGGAATGCCCCTTTCATTCTGGGTACGCGCGCTGTTGCGCTCCCTCTTGCGGCTGGAAAC -ACCGTCGTTCTCAAGGGCTCCGAGCTCTCGCCCAAGTGTTTCTGGGCTCTGGGCGACATC -TTCCGTCAGGCTGGTCTTCCTGCCGGGTGTCTGAACGTCATCTTCCATCAGCCCTCTGAC -GCCCCGGCCGTCACCAATGCACTGATCGCCCACAAGGCTGTGCGCAAGGTCAACTTCACC -GGTAGCACATTTGTCGGCTCGATTATTGCTTCTACCGCAGGTAAACACATTAAGCCTGTG -CTGCTCGAGCTGGGTGGCAAGGCGTCTGCGATTGTCTTGGACGATGCGGACTTGGACAAG -GCTGCCATGAACTGTGCCATTGGATCCTTCATGCACGTATGTAGTGTTGATTCGTCCGAG -ACAAGGGATATCACTTACTAACGAACGATACACAGTCCGGTCAAATCTGCATGTCGACCG -AGCGGATTGTCGTGCAGCGTTCGATCGCAGACGAGTTCCGGCAGAAGTTTGCCGAGACAA -CTGAGAAGTTGTTCGGCAAGGATGCGCCCGCGCTAGTCCTGGTCAACAGTGCGGCCGTCG -CTAAGAACAAGAAGCTTGTAGCGGATGCTGTGTCTCGCGGGGCCAAGCTGCTGTTTGGCG -ACGCCGATACAAGCGAGTCGATTAACACAGCCATGCGGCCGATCGTCGTTGATGGCGTCA -CCAAGGAGATGGACATGTACGCGACCGAGTCGTTTGGCCCGACCGTGTCTTTCATGGTGG -TGGACACCGAGGACGAGGCTATTGCACTAGCCAATGACACCGAGTACGGTCTGACTGCGG -CTCTGTACACCAACAATCTGTTCCGCGGGCTGCGGGTGGCTAAACAAATTGATTCTGGGT -GGGTTTTTTGAGTTCCGGATACAGGTGGAGATTGGAGGCTAACATGTTTTTTTTTTTTCT -AGTGCCGTGCACATCAATTCTATGACGGTACACGATGAGACTGTCCTCCCTCACGGAGGA -TGGAAGAGTAGCGGCTTTGGTCGCTTCGGAGGAATTTCTGGATACGATGAATTCCTCCAG -ACTAAGACTGTTACTTGGCAGGAGTAGAGGTGACTGGTCTCTCACAGGTATATGCTTAAC -CAAGAGTAGATTGGTGTCTACTCCGTAGGTACATGACAGACCTGTTGAGTTGGCCCCGTA -TGGGTCCGTCTGGCCCCCGGCAGCATTGGGCCGCCACGGGTGACGGTTTCCCTTGTTGTA -TTGACAGTGTTCTGGGCTTGTTTGGAGCCTAGTTGGGGGCTTAGTCGGCTTTGGTCCGAA -CCTTGCAATCAATTCAACCAATGGCGGCGCCTTCGGGCTTCGCCTAGAGGTGTGCTACAA -GGATGGGGTCCGGATACCCGGCTTATACTCCGTACGGTACTACCGTATAGACGAGTTGAA -TGACTCGCGGGGAATCCATGTGATGTGTGATTTGTGTTGCATCGGGCTTACAGCGTAGTA -TATGGCTATACGGGATAATTTGGTGTTTTTCACTAttatttttttattttattatttttt -ttCGGTACGGAGCCTCCGTTGGTCTGTGTCACTTTGGCCATGAGGCTGAAGGTCATGTGA -TCCGACTCAATTTCTCAGATGAAATCTAAAAATACCTATTTCGTAACAAATACGGAGTAA -AGGGCTTGAATCCATGCTTGGAAAAAGAGAGATTGCAACGCCGTGTAATCATTCCTTAGC -CGGCTCAAGTATAATGATGCCTGAGGCTCGATCGGGCTTGAACGAAAGTAAGAACGGTGC -TATCCACAAAGCCATGTTGGCAGAACACTTGGGCTCGCGCCTTCCACCCCGACCAGCACA -AGGCGCATTTTGAAAATATGCTCCAGGCGCTTGCACGATCAAAGAAACTTCATGAATATT -TAGCATGCCTAAAGCAAGCCAACCGCCGACGTAATCCGGGGGTTTCGTTTTCCACAGCTA -CTCTTCAAGTGAGCTTCGTTTGTTTTCTGCCGGACAAACAAGTGGGCGGCCGTGGGGCTA -TGAACTGTCCGGAGGAAGCTTTTCCCTACATTTGAAGTCTCGAATCCCAATACTTGATTT -AATACCAAATAATTTACCGAAACGAAGTCTAGTTCAAATGCAGCCTCAAGACCCGATCAA -GAAATATAACCCCAATAGTGGCGGCGAAGGCGCTGACCACAAGTCCCACCCTAAATTCAG -GCACAGAGCTGTAACAAATCACGCAAACTGCCGGGTGAGACAAAACCCattatttatttt -tgatttttatttctcattcttaCATGTCTTACTTCTACACCGTAAGAAACAAGCGATTAC -ACTGTGCAAACAACTTAGGATACTGCTATGATACTATGATATATATCGTATTCCATATAT -GCAGGTTAACTTTGACCCCCCCTGAATATCAGGCTGTTGATCCTTTCCACAGAGAACACA -ATCCCCGACGAACCGGTGGGATCGGCGAGTTTGATACTGCCCTACAGGCCACAGACCTCT -GTGGATCTAGGTGGTGATAGTCGCGATATGTGATGAATATCAGTGATTATCAGAATCTAC -CCCCTGCAGGTTAGGTTTGAAAACCTGTTTGCCCGTGGAAAGACACATGGTATGTACCTG -CATGGCCCAACGCCCTGTGCTTTATCTGTACAATACGGTGTATGACGCCCGTTACAGGGC -CTGAGTTTGAGGGGCGCAATCTCGTAATTCTGCAATCTCCCAATTTCGTAAAGGATCTTC -CTGTTACAATGTAGACGCGAGTGGCGTAAATGAGGAAATCAATGGCTTGAGTTATATACT -AAATATATCTACGTACATTATTCACAGAATACCTACACCTATACTTGGAATAGCCGAGAT -ATTCCATTGAGGCTACCATTGGTATGGTATGTACCATGGATAAACTTCGGATTAACTTTC -GGAACCAAAGATTTCACGGGTCCACGGTCTTGAGGGGAATCTTGGAAGGCTTAAGAGAAA -CCTTGTAACTAACTAAATATCAACTTTGCCTCTATAACATAAGCATTGGACATGACATAT -CGGATTTTCGAATTTTCTGCTTTGGCCCCAGTAAATTGACCTAATCTTACTTGGGACATT -CTCTACATTAAAGGGTCAAGCCGTTATTAGGAATAAAATTAAGTTCCGGATCGCCCACCA -ATTAGAACCGGTGCCTGGTCATTATGATTATTACTTGCCTGGTAACACAAAGCCTGGCAT -TATATTACAATAACATCTGAGCCACAGCCGACCCGCATGCCCCACGAGGACAAGATCAGA -ATTGCAGACTATCCGACAACAGGTCTGCAGTTGAGCAGTGATGGGTATAATTCGTATAGA -GTATCCGGTACTTACAACGTATTGCAGGGATCAAGTCAAAGCCACGATTAGTTTAAATTT -CAGAAAAGTAAAAAGTAGCCGAGGAAAGAAAAATGAGTTATGGTTGGTCTGCGCGTTTGG -TCTGTTGACTTAGTCTAGTCTAGACCCACCTTGGATGTACCAACATCCTTGAAGTGTACT -TTGCCTCGCCCTTCTTCCTCTCTTTATTATTCCTCTCATTTCAGTTCACTCTCTACATTC -TTTTTTTTTTTTCGTTCTTCAACGATAGCCATTCTTTATTTCTCCCACTCTAGTCATTCG -TTGTCAATCTCGACATAAAATCAACATGCGTTTCTCCATCACCACCGTCTTCTCCGCCCT -CGTGGCTCTGACTGCTGCTGCCACCACTCCTGACTACACCAAGGATCCTTCCGGCAATGC -TATCATCTATCCTGGTCTGAACGAGATCGTGCCTGAGGGCAAGGCCTACACCATCAAGTG -GGAGCCTACCACCGTCGGTCCCGTCTCTCTGATCCTCCTGCGCGGTCCTACCGAGAACGT -CCAGCCCCTCAAGACTCTCGCTGAGTCCATCCCCAACAGCGGCGAGTTCAAGTGGACCCC -TGGCTCTGATCTTGAGGCCGACGTCACCCACTACGGTCTGCTCCTTGTTGTCGAGGGTAC -CGGCCAGTACCAGTACTCCACCCAGTTCGGTATCTCCGcagctcctggctccggttccgg -ttccagctccaccaccaAGGCTACCGAGACCTCCACCTCTGAGGCTACTGCTTCCACCTC -GTCCACCGTCATTGAGAGCACCGTGATCACTACCACCATCTGCCCTGAGACCGAGACCAC -TGCTGCTGCCACCACCACCTCCGCCCCTCAGTTCACCAAGACCGGTGTCTCCCCCGTTGG -CCCTCCCACCAGCCGCCCCTGGACTTCCATCCCTCTCACCACCAGCGTCGCTCCCACCAG -CACCACCTCCGCTACTCCTTCGGCTTCCTCCCCTGTCTTCAACGGCGCGGGTCGCAACGC -CATCAGCGTCGGTGCCGTCATGGCCGGTGTTGTCGCTGCTTTCGCCCTTTAAGCGCATTT -CCTCTGGAATCGCGTGCCAATCGCACTTTCTTTCTACTGCAAGCCGGAAATAATGTTTCG -TTGTTTTCATAAGGCTTAATTTTTATCAATATCTTGATATGGGCCTCTTGGATTCTGTCT -CTCGGCCGAGTTTATGTATCTCATACCATGTTTTTCTTCTTATGTGCTATATTTCCCGCA -TCTCTATCAGCTATGCTTGTGTAATGTGAAGTTCATGTCCTTATGTTTCTTCCTACCTCG -TAGTTTGCTTGATCTGGGGCATAGACCGAGTTTTCACTCGTGTAGACTCGTAAATATCTA -TAGTTAGAGGCAACAGTGAGAATATCAAATCTTCTGTGGCCTCAGACACTCATGCGTCAG -TAATTTTGATACGGCCACACCCCCAAGCCAGCACCATCTACTTATAGAAAAAAGATAGTA -TAATCGATAGACTAGTAAAAATGGATACACCTCAGTGCCATGTAGCTATTTGAGTAGCTT -CATAAAGCCACAGAAACAATCGAGGTCTGACAAACACTACTTAAGCAAACACCAAGGCAG -CAGTCATCCAGGTGGGCAAACGCCTACGTCTCGGTAGAACAAGTAGCAAACAAAATGCAA -ATTGATTGCACTTGCAGATGCAATATTTGTCATTTTACCTTGCAGCTTACATTTGGGGCA -AATCTACTCCGTATACCAGTCAGGTGCTTTGCTGTGGACGACAACTCAAGAGTAGACGGC -CGCTTATGCGGCGTTATGATATCTTACAGAGCTCTCAAGTATTCTTTTCAAGTGGGCTTC -TCAGTTATTCTTCCCCATTATCTTGAAGATCCCCTTTTCTAGTCTTACAGGTAGACGGCA -GAAAAGAGCGGGAAAAGCATACTTTGTGTACGAGACTTTTCTACGAATAACTTTTTATCT -AAACAGACTATCTGATAGGGGACTTCTTAGCTTCTCATTGGACTGCTATCTTTCTCTATC -CAAGCACCTCTAGACTATGTTATATCTTGATGCGCCATCAAATTTAGCCACACGATAAAT -CGCGACTTAATGCCACCTCCAATTCCACAGTCCTCTCTACGCATTATCTATGACCCCCAG -GTAACTTCAGACTGTCCTGTCTTTTCGATACCGAAAGCTTTGTCGGACGCTGCGATTACT -GCAGCTTCAAGAGCACCGGGCTATACGACCCTCTAGAGGAGAAATGACGAATATTGCGTC -CAGTAAATGATCGAAATTGATGGCAATGTAGACCTCGGATGCCTTGTCTCCCCACCATGG -AGTATTTCCCCGGAAGTCATGATGCCGACCTCTACTTTGCTGTTGATCGAAAAGTGGCCG -AAGTTCACGCTAGCTTTATCAAGCGTCGCAGTACGTGCAACGGGGTTGTCATTATTCACA -TCACCATCCCGAACTCGGTCGTCGAGTACCTCTCTACACTCGAACTCTAGATTGTCTATT -GGCCAAGCGCAGAATAGAACTTGCTTATTCTTCATTGTCGACAGGGGAAAACGCTCCCCT -CTGAACTCTGGAAGTACAAGATTGCTCAGCGTGTAATTGGAACCATTTGTGACAAGCCAA -ATGTCTTGATTGGCAGTATGCAATGTCCAGATGATACCACGGAGCAGATGGTTCTAAAGA -CTAGAGATGAACGCAATGTCGTCCATCGCGTATAATTAGCCCATCAATTACCATGAAATT -GTGTATTGCTAGCCTCTTGATTATCGAGTCAATATCTGAGCGTTTCTGATAGTTCAGCTA -GCTAGTAATGTAAGGGAGAGCACCCTAACGGAAAAGAAAGCCATCCGGATCGGCCTTAAG -CCCGGTTTGTAGGTATCTCGATCGCATTCATATTCCATTCTACGCTAAAAGAGATATTTT -TAATTCTATCAAGTTGTGATCAAACCCAAGAGATGCCCACGAGCATTTTGCGCCTGTGAA -AGAACAATCTCAGCCGCGGGATATGTTTGTAGTAAATGTCCAGCCGCTTCCTCACAAGCA -TACAAGTTCGACATGGCCTGCATCCTCACAATATCATCCGAAACCTTCAGATCAGGAAGG -CTTGCCTCGAGAATAATAATCAATATAGTAACAGTGCTTCCAGGGAGAAAATGAAGCATC -CTGCAGTCATGCAGCTCATTGAGAGTCTCATTGACTTGCATAGTCAAACTTCTCATTTCC -GAGTTAATGGTAGTCTCGAGACTTTCCCCTCCGCTCGACGCTGCTTGAAAGAGTATGGAC -ATTGCAGTAAGATGGAGGAGATACAGGAAAGCCTGGTGGACGAGGAGAACGATCTCCGGA -GACTCTGTATGGCAATGGTGCAAGAGTGACGGGGAGTAATAATGTGCCGGAGCAGGAAGA -TTCTCAATCCAGACTTTCAGAAGCTGCTCACATTCCGTTATGGTAGCTTGCGATATCTTT -GATTTAGGTACCATGGAGTATTCACAATCTCCCGACGGTGATGGCGACCAAGCATCTTGG -TAGAGAAGACTGATGATGACGCCCACACAGTGAGCAAGCTTGATCTCCTGAATAAACAGA -AGCATCATTCGCTCTTGGTTTTCCCAGTTTAGCATAGCTGATGCTTCGCCCAGCATAGCT -AGCACCAGTTCATGGTATGGTTCGGAGGAATAGTATCTCGTAGCTTCCAGTAGCGACATT -GTGACATCGCTGTTTGGGATTCGTAGTGGTCGACGAAAACCGAGGGAGAGAACGCGGTCG -CGCACATAGAGACACCACCACAGTCTGTGCTTGTGAGAAAAACTACCTGTTTCTAAACTC -CCAAGGAGACCGACACGTTCTGCTGTCGAATAAGCGAGATCGAACCAGTATGTTGGGACT -TTGTGGCCGTCTCCACTTCCATGCCACTGCATCATGAGTAGCAAAGCTTGGATCTGGGTG -TATGCGTTGGATTCGGTCTCGAATTCATAAAGAAGCTGATGATGGGATCAAAATTAGTTT -CAACTTGTCATGGATTCTGTCTGCGGATATCTCACCTTAGCTCGTCCAAATAATGTTCTT -CGCAATGCTTTTCGTGACGAGAACCCAGCTTTGACGGCAGTTTCCTGCTCAATGAATGCG -CTCCCAGCTAACATGACAGCTTGATGAAGAAGCGGGCTTGGAAATTGGGATTCATCCTCA -ACCATGACATTGAATAAAAACCATTGCAAATCAATGACAGGTAGTAGAGGATGGACATAT -TTGATATAAGCCCCGAGCAACTGATTCCGAAGTTCGATAGGAACTATTCTGAGAGCTCCT -TTCAAATGTAGATATTCTTTACTTTCTTCAGAGATATTCGGAAGCCGGACCTGAAAAGTT -CGCTGCAGTTCAAATGACTGATTGTTGTCTCTTAGAATAAGATCCGGGATCTGTGGCGAG -AGTAGCTCATTTATCGTCAAAAGGTAACTGTCTAAGGGATATGTCTGGGGTACATTGCGG -GTCAAGGTCATATGTGGTTGGCCAGATATCGGCTTACTCCCTGATGCGACGAACTGCGCA -AAGGGATCTAACAGTGTCAGTGACGATGGTCAGATTGGGGTAACATGCTTGGCACATGTG -TCGTACCTTGCTCATCCCCGCCTCTTTCTAACCGATCTACATCCTCGGCATTGAAGGCCT -CGAGAAGCGTAAGAGAAGACGGCAGATCCATCGGCACGGACTCCGGCTCCTTGGTCTGCT -GCTAAAGGGATTAGCGCTAGGGCAACATCCACAGGCAATTTTCTCTTGGAACTTGGGGTA -ATTACCTTGATTCTTGTAGGCACCACCAGTGAGAACAACTCGACCGACTGCGACAGACAC -GTGAACTGAAATTAATTTCACACACAGCTTTGACCGACACACTCTGAGCGTCGTTGAGTC -CCTTGTTACAGTGTTATAAATGTAGCCCGAATTGTATGTGCTATTCTGGAGGATTGTCCT -CGATATGAAGCCCAGGGAACACGAGAAAAATCCGCGTCATCCGTGGGGAAAGTCGGGGCC -CCGCATTTTTGAGGATCCGAGCGAGTCTTTGATGATAATTGACGATAATTGGTTGTTCCG -AGTCCCGATTTTCTAGTCTAGACCTCTTGGAATGCTTAGAAATCGCGGTGAAAAGACGAC -CCTCTTCCACCGGACACAGGTGACCATTTAGGAACTTCCACCGGAACTTGCGGAGCCAAT -CACATTGTTCGAGATGATCTCCATCAACCCAGAAACGACTTCATTTTTTGATAATTACTC -CACAGTTCAAAGCTATCTACGGCGTACAAGCTATGTCCATCACAAAGTCCATCCAATCCA -TCATCTGCCATCTACCAAGGCAACTGACCATCCTTATTTATAAACCCGCCAGCATTCCCA -TCCCTCTCCCCAAGCACAATCCCCAGAATCCCTCTACCAGCCTCCAACGGATCCCCAGCA -GCTCCATACGCACTAACAGAAGCCTCATCCTCTCCCCTCAAATTTGATCTCACAAGACCA -GGACAAAAGGCAAAAACCCGCACACCACGACCCTGCAACTGCTTATGCAGCTGGACAGTC -ATCATGTTCAACGCCGCCTTACTCGCGCGGTAGTCATCATAAGGCACAGTATAATGCTCA -CATGTGGGATCATTTGCCAGACTCATTGAGCCCAGACCACTGGAGACCTGGATCAGATAT -GGTCTTTCTGATTTCAGGAGGAGGGGAGTGAAAGCCTCTGAGACCAAAGATGCGCCAATG -ACGTTGGTCGAGAAGATCATGTTCAGCTTGGCGCGGCCGCTGCTGTTAGGTGCGGCAGAG -CCGGCGTTGTTGACTAGGACGTCGAGTCGCTTGAATTGTTCGTTTACGGTGTCGACCGCT -TTCGATATGGAGGTGTCGTCTTCCACGTTCAGCTGGATCAGGGAGAGCGTTCCCTTGAGA -TCTTTGCTTTGGAGCTCGGAGAGGGCTTTTTCGCCTTTCTGGAGGTCACGGCAGGCCATG -ATCACATGGTGATTTGGGTGTGCGGCTAGAGTTTGGGCGGTTGCGTAGCCCACTCCTGAA -TTCGCACCTGGATATAGTTAGGTTCTGGAACTGGGGTGTAGCTTTTTGTTGGTGGATTAC -CTGTAATGAGAATGACGGTTGGTTGAGACATGTCGTGGTTGCTTCGTTGGTTTGGGGGAA -AGGGCTGGCAAAGAGATTGGATCGAGGCAGGGATACTTTTCCCTGTGAGATCACAGCCTT -TGAATTACTATATTCGGCTCAGCCGCTCTCTCCTGGTATCACTATTTATAGAGGACATAC -TCCATGTCGGATGTAAACCCCCCCCTATCTCCGTGAGGCTGTCGGTGTTAAGCCGGAGAC -CGAGAATTGCTAATCCTACTTCAAGGACAACAAGACTATAGAGACAGTCATGATGTCATA -TGAGCTCTGTGATACCTTGCCTTGCATTACTGGGTCTATCACAAGTCGACCAATCACTTT -TTAAGACTGTATGTCTCAGAGTTACATTGCATGCTAAGCTTTTTGTTCGGACGTGGGATC -CGAGTGCTGTCTCACGGATAGAGCCGCATGATATTCAGGGGTTTAGGCAGCAATTACGTG -AATTAGGTGTAAAAGCCGTCATATTGGGCATTGTATTTCTGTTCGAAATATTTCTTATAT -TTTCCAATTTAAAACAAGAATGATGATTCAATACATCGCCAATGGAGGTATCATCGCTGT -GCAATCCATACACAAAAAGAGCGAAATAAAAAGAATTCAAAGAGAGACAGGATGTGAGGA -CGAAGGCTAGTATGATGACAGTGAGGACAGAAAAGTATAGATCACACTTTTGCAGAACCA -CTGACGAGTCTTTAGAGATCAAAAAGAAATGTAGATCGATAGTTATACACACCCAGCCTA -AAGGAAGAGGCAGATTTTGAGCCACATCTCTAAAGGAGTATAGACTAACTGCGAGCGGCC -ATGCCAGTGACCGCTTGTTTGTCTCTGGTAGCAGCATACACAGGAGAGGGAGGGATTAAG -AGCATAAGGGAAAGGGGGGGAAATGACAAGAGAGAACGAAGATAATGATTTTTCTTTTGA -TGCAGCCGGAAAGCACGCGTCAAGGACGTGACCACGGTCCTTGCATTGGAGGCAGTGGGC -CTCCAGTAACGGAGGGGGACCCAGAAGGGGGACCAGCAATCGCCCGTCCGGCGGGTGCCA -CAGATGTAGAAGAATACGGTGGACTTGCAGGCCAACCAGGTTGGGACTGAACGGAGTCGA -GGATTCTTCCCTGTGGAGCAGGGCCCGCCGAGGTCGGCTGCGTGCCTGAACCAGATCGGG -CAGCATCGGCGCCCGACCACGCCGTGGTGCCCGGCTCGGATGCGGGGAGGAGTGTGGGTT -GGGTGGAGCGTGGAGATGAGGGTCTTGGAGGTAAGGCGCTGAGGGACATTGTGCGGCCGA -GACGGTCGCGATAGAAATCTCGCTCGGAGCGATAGTGGTCTCGTTGCTGGATGAGGGAGC -GGAGCTCCTCGCTCTGGCGGCGGACTGTTTCTACGCCGCGCTGGATCTCATCCTGCTGTG -CAGTGAGACGTTGTTCGAGCTGCATCTCATTGCGCTTCCGATTTCGGAACCGTCGGGATG -CGTCGCTGTTGGCTTTGCGCTTTTCGGCTTGGCTGGAGGAGCCGGATCGCATATCTAGTA -CGCACGGTATCATGCCCAGTGGAAGGGGGCTGGCCTGTGGTGTTCCGGCTCTTGAAGGGG -CTTCCGATTGGTGTCTGGGGGCTTTCGTTGTGGGGATGGGTCGGGTCAGAGGGTCCATTG -TCATATGTGGAGGCGCAGTGGGATATGCAGTGCTTTGTGAGAGGGAGACGCTGGTCACTG -CTGGGGATACACGGCCAAAGGGATTGTAGGTCGAATGAGACGTGCTAGGACTTGTTTCTT -GGGAGTTCGACTTGGTGAGAGGCCCCGGGCCGGCACTGGTTCGCCGAGAATGTAGCGAAG -TCAGAGGCTGAGTACCAGCGATAGATGCTATTGGGCGGAGCCCCGTCTCTATGGGCAAAG -GCGAAGTGGTGACAGGTAGGCGTGAACCCATCAGAGTCTCATGAGGAACTAGCGGTGACT -GTGCAACACTTGACTGACCAGAAGCCCTGCCGCCTCCGCCAACAAACCGTGCCGAGGGCG -ATGCTGGATTCATCAAAGGCCGTGAATGGGACCTGGGAGACAATGGCTGCGAGACCAATG -GATACCCTGAGCGGATCGGCGAAGTCCCTTGGGGGCGAGGTGAGGAAGATGGACCCGGTA -GGCTCAACGGCTCCCGACTGAGTGCGGTCAGATCAGCCGCTGCCTGGGGGTGGTTCAAGA -TGGCATGGACACCAATGGGTCGCGATGACATGGCTTCCAGTGGTGGCTCCACGGGTCCAT -ATGGATGCCATGACTGGGGTCGAGGTCGTTGAGGTGGACGGCGGGTAGGATTTCGGGTTG -CAAATGCACGGTCATCGCTCGCCCCATCTGTAGTCACATCTCTGGACAATGAACCTGCAC -GTCGAGAATCCTGCGACATAGAAAGCAAATCTCCCACGACGGGACGGGAGTTAGGGCATG -ACCGAAACCCCCCTCTCCCCTCGTCGCTGTGTTCTCGAGTGTGAGAGAGGGAGGGTGCAG -GCGAAGGATACCGATCTGAATAGAGTGACAAATTGGGTGAATGTGAGGAATATGGTGCGT -TCAACACACTGATACCTAAGGCAGCCACGATGAATAGGGGAGAGAGATATATAGGAAACT -CGATCAATTGATCGAGGCAAGATAATATAAGCTAAGGAAGCTAGAAAAAGCTAGGAAAGC -TAAACGGAAAAGGTCCGAGTTAATAACTAGAGAAAAGTATGAGATTTCGAAAATGGGGGC -GACAAGCTTAAGAATTGAATAGCAGCGGTGAGAGAATGATCCATCACCTGGTCTGGATGG -CTCCAAGACAACTTAGTTGTCTCTCTGTCCGACCCAGTTTGGCGGATAGTTGGGCAGGAT -CTCCTATGGAGACCACCCTAGTCGACCCCCGGAGTATTGGGCTTTCTTTTTTTTTTATTT -ATTTTGGTCTTTTAGTCTTTTAGTCTTTTCTCTTTTTATTATAATTTAGATATATTAGGA -AAAAAGAAAATAAATTCAGAGAATAAGTCCAATATATCTTAATATAAGTCCAATAGGTCC -ATGGATTTAGATATCCCCTATATTCTACCCAATTCAGTTATAAAAAGTATACAAAGTACC -CGGGGAAAGCGATCATGCACAGCACAAATCATCGGGAGCTGCAAGTGTGCGCCGTCACCA -TGGACATGGACAGGTAGTGGAGtcctttcctttttttttttttttaattacttttctttt -tCGTTCCACTTATTATTTCCGAAGGTTTTAAAGTGGCTTGAGTGGATAGAAATCCAGCAA -GTTCCTCTGAAATGAGACGCCAAACTACTCTGATCTATACCCACTCAGGTATAGGCATAT -CCTTGCCTGGCCGTAGCCTCTTTTCGAGAGTTTGATGCTTGGAAACCCCTCTAAGCCATT -ACAAGTAGCTATATAACGAAAAGGTCTCTCTCTCTCACACAAAGTTTATTGGAGATGTTG -TGCATCACTCTTCCCCGTGCGGCAAAAAGGGGCCCCGTTGACCGTTAGGGGTAGAATGCC -GTTCCTCGGCTAAGAAATTGGGCTTTGACACCTTGAGAACTTCAATTTGACGTCATGGAA -GGTAACGTAAGAATCCAAATTGATCATTCGTGTGACCATTCAGAAGCCATATAGGGCTAA -TCTTGTTTTCGCCGAGTCCCGTAACCCAGCTGACAACTCTCTATCAGTTTGCATTTGATG -CTCCACGATCGGACCAGACGGAAAGGGATCCGGTTTCCCCTAGCCTCATCATTGTGAGAT -GCATGAGAAGTGTGGATAGCTCTATTTCCAACTTTTTTTTCAAGCTTGTCTCAATTGGGC -ATAAAAGATTCCGCGTGGCAACGGTCAAATAAGCAGCGGAGATCGGTTTAGTGTTTCGAA -TACAAAGTGGAGGGGGAAATGGTCAACACAAGTTGGTGCAACTGAAGCGCCTTCTTGGCA -TTGGTTAGTATTTGTTGATCCTTCGATACGGCATCGGAAATGTTTCATGATCTCCGGATC -CTGATCTGGATAGTGAGTGGGTAAGTTGCGATAGATCTTCTTTGGTTGATGTGGCACAAG -TCCCGAGGTCGTTTCCCGATATTTGAAGGGGAGCTGTCAGTCTTTTGTAGCGTACCCGCT -TGTGAGATTTTCTTTTTCCTGCTCCTGGGCGAATCATCGGCCATGATAGAAATACAATTG -CGGTCTGATAGATCCAGTGATTTTGACTGGTTCCATCGAATGGGGCTAGAATATGAGAAT -TGGCTTCCTATTCTGTCTTTTCTTTTTCTCTTGTGGCTTGCAATCTAAAACAGCCCCTAG -TCCTGCCCAGGCAGGTTGGTATATTGGTACTTTGGAGATATTGCCATATGCGTGTCATCG -GACTATTTGAATGGTGCAGAAACGGAGAAGTTAAAAAAGATATCCGGTAGAATGGATTCA -TCGATCACTGGATGCTTTTTTTTTTTACTTTTCTTCAGTTCGCTCTGAACTGTGGAGACT -TTCCCCTTTTCATGATCTTCACCTGTTCACCGATCTCCGATTATCTCGTGTCCGCTGCAT -CCGCAAGTTCCTTGGAGAAAAAGCAAAAGAATAGCTTTGTCTAATCTAGATCCCTCCGGA -TAACTCCCGGTTGACAGGTACGGTAGATCTTCCCTCGAACCCACCTACCGAGTCGGAAAA -GGACAAAACCATAGCACATTATATGTACAATTACCTATGACTCTGTACGCCCTGCAGGGC -GGATCCAAAAAAAGAGGTGGCAGTGCTAAACCTGCCCACGCGTTCCCTGACCTCGAACTC -CAGCCGAGTTTGAAATTGGACGGAAATCAGGGGAACCAAGTTCGACATTGTATAGTATGT -ACAATATATGACATGATATGAGATCGATATGCAGAGACATGCAGAGACATGCAGAAAACA -TGCAGAAGCTGATTCGATCTATAAACCTGGTCTATAAACCTGGCTTTTTTTTTTGTCCCT -TGCGTGAGTGGAAAAGGCTACCCTGACCCAAGCTGCCGGACAATGACAGCGCGGACCCGA -CGTGCTCGCCCGTTTGCTGATCACCGATCTACACTCAGGTTTGCACTGCATATCCGAGTT -TCTTGACTGCGCCTTTTGGTACtttttttttttttttttCTCGTGCGAATGGAGACTGCC -ATTACCGCCTTTTGAACCCATTTTTCGGCGCGGGAAATGCAATGTACTAGTGATGCACCA -TTGATCCTTCTGCTCGACGCACATGTACAGTGCGTGAATGCGGGCATAATTACAAAGTAG -AACAAGAATAGACCAAGAATCAAGCATGTACTCCCCGGTGGTAGGTATGTGCTATGTATG -TACGGAGGACTCCCTGTCATGTACTATCCATGAATATATGCACACGATACGACACAATCC -GTCAGACCCTACTGAAACTGAAACTCATTCTTAGGCACACCGCGGACCCCAACATCAACC -CTGAAAACCCGACCCCCATATCTAACCGACTGTGGCAGCCGGTCGTCATCACGATCATCC -TTGGCGGTCGTGATGAACAACTCAGTACCGATAAAGGCTGGACAGGTCACGTTGCGGGTC -GGGAGATCGATCTGACCGATAATCTTTCCGGCCGGGGAAATGCGCAAAACCTTTCCGCCG -CCGTAGACAGCGGTCCAGATGCAGCCTTCGACATCAATAGCGAAGCCATCAGGCTCTAGT -GCGTTACCGATATCGAAGTGAACGCGACGGTTGCTAATGGCACCAGTGGACGCATCAAAG -TCAAAGGCAAAGATCTTGCCTGTTGGTGAATCAGTCAGGTACATCACGTCGTTGGTTAGG -TTCCACCCGATCCCGTTAGGGATCGTCACCGGTGCCAACATAGGGTGCACCGTCTGATCT -GGGTCTAGTCTGAACAGAACTCCTTCTGCTTCAGGCTTCTTGACCTTCGGGTCGTTCATT -GCTCCGGCCCAAAACCGACCATGGCTATCGATTGCGCCATCATTGAAACGCATCCTACAT -TTGGAATCAATGTTAGTCCACGCTCTTAGAGTGAGCGATATACGTGGCGATATCTTTCTT -CTCCTTTCGGGTTAGCCTACCTCTCAGCCTTCTCCTCATCTCCCCAAAGTGCATGGGTAT -CATGGATATAGGAGAGCTTCCCAGTAGTTTGGTCCAACCGGGCAAATCCATATTTGGCCC -CAACAATGAGCTGATTGGACCTTGGATGAGCTATATTAGCGGTCACGCTAAAAGTCCACG -TGGTACCCCGTCAGCCAATTTAATTTCCCACGGGAGATGAAAAACCCACCCAACAGAGGC -ATCGGTATCAATGACCGTCAATGAGTCAGGTCCCTTTGCCAGATCGAGTCGATAGAGTTT -CTGGTCCCAGATATCCACGAAACGGAACTCATTCTGAGCCACGTCGTAAAAGGGGCCTTC -GAGTAATGTTCCTGGAATGTCAAGGTACGGCTGATTCAGTTAGCTATGATTGGCCTTGCT -TTTTTTCTCTTTCTTTTTTTTCCCGTTCTCTGACAGGTTCGGGACAAAATAATCACATCT -GACCGACACGAGAACCAGAGaaataaaataaatataaatataaataaagaaGCGAATCAG -GAGATGTCGAATCTCATAGAATACTCACCTCAGTGACCGTCCACCTCTGAATTTCAGACA -TGCCGATCGGTGAGTGGACACCGCTATTTGGGAAGAAGAGGGGTGAGATGCTAGACTCGC -ATGAAACGGTTGACACCTGTTGACTTCTAGCCGATCAAGGGGAATTGTCGGTGTCCCTGG -TGTTCCGTCAGTCCGTATATCCAGTTTTCAGTCAGTTCAAGTTGGAAAGAGAAAGATGAG -AGAGAAGGTGGGAGATGCCAAGACACCCCCACGAGGGCTTACTATCCTTGTACCTTGACT -ACCTAGTATGTATCATGTACCTTTGTAATCTACTCCATACTCCATACATCCTAAGCCATT -TCTTTTTTCCCTAGTTCCCAGGACAAGGGAGAAATTCAGGTTCATTGGGCAATTGAAAAA -AAAAAAAAAAAGACTTTCCAATCCCATGCGCTATTGAAATCTCCTGCAGAGATCCCGTGT -CTGCAATGCGAATCATCACATAGTTGAGTTTGAAATTCACATGAACTACGGAGCCGTACG -GAGGTGGTCCGTATGCGTAAACTTTCGACTCTGCATCAATTCGTGCAGCTAGCCGACAAA -ATATATCATTCACACCCGATAATCAAAGGATATACCAAAGTACATATCGCTTTCCAGGGT -GGCTTTTCACATGACCAACTCTGCATGCAGACGCAAGAGATTCCTGTATGTGCTGGCGCT -AAGGCCTTGAACCTGGAACCTGGACTTACTCTTGACTACCAATCCCGGAGATTGAACCGT -CAGTTCGACTTTCCTGCCTGACTCGAGAATCGGGCGCCAATACCGGAGTTAAAAAAAAAA -TCAAGAGTTTACCGTTCTGATATGCATGGAGAATGTGCTTGCAGCCGACTTGTATCACCT -TAAGTGATGGATCATGCAAGTGGGAATTGGATTGTCGAGGTGGACAAATCAGCCTTTGCA -ATGAGATAGTCCGAAATATGTAATAGAAATAATAATTCAAGTGCTTGAAACTATGAACGT -GTTTCGTGCACGGCTCCTGGCCAATTAGCTTCAACGGCACAGCTCAATGCTCAAATGCAC -CCTCCCCCATATGGTACAGGTAAGGCTTGCATGCAGATAAGGCCTGCAGTCATCGTGAGA -TGAAGTTGGGCATCAATTGTGTTCGGGTCCAGTCTAATACGAATGTACGGTGATGAACAT -AAAGCACAGCGTTGTTTTTTTTTTTCCCACTTTTTTTGGGGGGTTGCGTATGGGACAAAT -TTCAATTGGCGGGGACGGGGGCGCAATTATTCACCGTCATGTCACTGCAAAGTAGGGCCT -GATCTACAGTTGCAAGTGACTTACCCGTGTCCCCACGGGGGGGGGTTTGAGGTTCCCGTT -TTTCCTTTTTCCCTTTTTTGCGGTTTTCGTGTGGATAACCGAAGAGCGAGAGCGAGAAGG -GGAAGCGATGCATCATGATCTCCAGCGAAGTGGGATTTAGAATAGATAGGTGCAGCACCA -GGGACAGGTACAATGTATCAGATCAAATACATACAATGTATAGACAATGTTTGTTGTGGT -TTGCAGAGATATGGACAAGAGAAGTTGGTCATTAAACATAGCCCGGTCAACTTATTTACA -AGAGATCACCCATTTTCGCATTAGTAGCAACAGCTTACAACTTGGTGCCCATGTTGACGT -GGACAGCCTTGACCTGCGAATATGCGTCCAAGCCAGCTTCACCGAGCTCGCGACCGATAC -CACTCTGCTTCACACCACCGAACGGCACGCGGAAGTCACTGTCATTGCTGCTGTTGATCC -AGACCATGCCGGCTTCGATATCGGAGGCAACACGGTGGGCACGCTCAATATCCCGAGTGA -AGACCGCGGCGCCAAGGCCGTATGTGGTGTCGTTAGCACGGGTGACGGCCTCGTCCTCAG -TGGAGAAGCTAGCAATAGCAACGAAGGGGCCGAAAACCTCCTCGCGGTAGATGCGCATCG -AGTCCTTGACGTTGGTGAAGATCGTAGGCTCGATGAAGAAGCCACGGCCATCGCCAACAT -TCTTGTGTGGAACGCCGCCGGAAGCGAGCGTCGCGCCCTCGGCCTTGCCGGCCTCAATAT -ATTCGAGCACGCGCTCGTACTGGGCCTTGGTGACCTGGGGGCCCTGGAAGGTGTCGTCGG -AGAAGGGGTCGCCGACCTTGCTAGTGGTCCGGACGACCTCCTTAAAGAGCTCAACGAACT -TGTCATATACAGATTCGTGCACAAGGATTCGGGATGTCGCAGTGCAGACCTGACCCTGGT -TGTACATGATGCCAATGTGTGCCCACTTGGCGGCCTGCTCGAGGTCCGCATCCTCAAAGA -CAACCAGTGGCGACTTGCCGCCAGTCTCAAGGGTGACGTTCTTGAGGGTGCCTGCGGCCA -TCTTCATAATCTCGCGGCCAGTGAGAGTGGAACCAGTGAAAGCAACCTTGTCGACGCCGG -GGTGGTTAACCAAAGCACCACCAGCAACACGGCCGTGGCCGTTTACAACGTTAATGACAC -CCGGGGGGAAACCAGCCTCCTTGATAAGACCGGCCAAGTACAGGATGCTAAGCGGGGTCT -GTTCGGCAGGCTTGAGGACGATAGTGTTACCGCACGCAAGGGCGGGGCCCAGCTTCCATG -CTGCCATTGCTAGAGGGAAGTTCCAAGGAATGATCTGGCCAACAACACCAATCGGCTGGC -GGAGGGTGTATGCGAATTTGGCCGGGGTGGTGCCAATAGTCTGACCAAATACCTTATCGG -CCCAGCCGGCATAGTAGCGGAGAGTGTTAGTGACCTCACCGAGATCATCATTCAAAGAGA -CCTGGTACGGCTTTCCTAGAGTACATGTCAGTTCCTATTCCTCTTGATACTTGACCATCT -CCCTTGTGACTCACCATTATCCCAAGTCTCAATGGTGGCCAGTGTCTCCTTGTGCTGCTC -AATCAAGTCGGCCAACTTGAGCATCAAGAGACCACGATCAGTGGCCGGCAGGAGCTTCCA -GGAGGGGTCCTTGAGAGCCTTGCGAGCGGCCTTCACGGCGATATCAACATCCTCCTCACC -AGCAGCATGCACTGACGTAATCTCGGTTTCATCGCTGGAAATCTGGTCAGCTCGTCAAAG -TCCCACAAGACACTTCAGAAGTACATACGAAGGGTTCACAGTGGAGACCTTCTCGCCAGA -CTTAGCAGCCACAAACTCATTGTTGATGAAGAGACCGATAGGCTGAGTGTAGCTGCGCCC -ATTTGGGGCAGTCAATTGTACGGAAATGTCGGACATGACGAAGGGGGAGCTGTGGAATGT -ACGGTGCGGAGCGATAAACCGAGTGAGATTTGATGAAGCTTTGTACAAACAACGGAGGTG -ACTATTGAACATGgagggggagagagagagagagagagaagagTAATATAGGAGGGGAAT -GGGAGACTGGAAGTGACCTCCCGACGTCCAAAGCTCCCCGCCTGATAAGCCGATAAAGAT -GCTCCACTCTCCGGATAGACTTACTCTTCCCAGTCAAGATATCGCCATTCCCAGCTTGTT -TGTTTGGTTTGCATTCCTCTTCTTCTCTGGTTGGCTGGACTGTCCGTATTCGGCGTGGGA -GAAATTCTTTCCTTTCTTGGAGCAAGCCTATAGACCCGATTTGCTCTTTATGACTAACTC -CAGTTACAGTCCTGCAGGAACCTGCATAGCGTCAAACACATCAAATACTTCATTTGCTAG -CTCCTGTCCAAGCCGTGTTCTCCGTGACTCCTTGCATATAGCATGCATCAACCTGCAGGG -GCCGACGGGTGTTTCCATGTTTCCATGTTTCCATGACTGAGGTAAATTCTGACACAGCTC -GTCCCGGTGCAGCTAACGGTCGCATATTTCGAACGGTAGATCGGACAACCTGGCGCGTGT -GTGAGGACAAATCAATGCAACGCAGCGACAAGGGTAATGTGACACCATCCCGTGTCTTGT -CAGAGCATTCATAGGGTTGTGCCGAAACCAGGGTAGCGTGCCAAAAGAGACTGATCAGCT -AGGAATCAATGCGATCAGTCCAAATGGAAATGGCATGACTGCAGTCGCAATCAATCCAAA -GAGAATCAACCTGATCCGATGAGTTTATACCTTGCACATATTTCTAGAGGACACATCGAG -TCAAGTAAAATATGGTTCAATTCGCTCGTGCAGATAACCCATGTGCAACCGGCAACGCCA -TTGATAGAGGAACTTAGGAGAATTATATGGAATAAATCTCTCTGTGCATCACTTGCAAGT -AGCGGATTGCTTTCTCAGGCCTGTTCCCCCTCAGCACTGGACTCCTTCTTAGTGAACTCC -TCTTGCGGTTCCTCGCCTGCCAACTCCAGTATGTATTTCCATTGCGCTGGAGTCACAGAA -GACACGCTAAGCCGGGATTGCTTAATCATTTGAAGATTCTCTAGTGGTTTTCCTGTCTGG -CCATGGGACTTCAGGTCCTGGAGTGTGACTTGCTTCCCAAGTTTGCGGCGATATTCAACG -TGAACGACAACCCACTTTGGATCGTCTCGCTTGGACTTCGGGTCGTAGTATGGGTGGTTG -GAATCGAAAGCGGACTCTGTCATACGTCTTGGATAAGCAAACTGATCAAGCAGTGTAATC -TCGAGCGACAAACCATCCGTAGAATGTTCCTTCACAATCTCCATCACTCCCACAACCCCG -GGAACCTTGCAGTTTGAGTGATAGAAGAATGCATAATCTCCCTTCTTCATATTGCGCATC -AGGTTTTTGGCTGGTTCGCTGTGAGTAAAATCTTCGTGGTCAATGTTACTATAACTTTAG -ACGGGGGAACATACCGACATGGTTGCGCACACCTGTTCCACTGTTAGCAATCTGACCATA -TACATAGTGTGCCAGATAGGCTAGACATACCATCCCAAGGCTCCGGTGTATCTGCAGTAG -CCAGGTCATCAATAGAGAATTTCACGTCCTTGCCCTTCTCTAGACGAGATTCAGGCTCCG -CCTTCATCAACCAGAAAGAGCGGCCAGTTTCAGGGATGACATCGACCACAGACTTGGCCT -TCGACTCGGTTTCAGCCTCTCCATTTGGCAAAGGGTTCTTGCGGGGTCTTCCACGACCAT -CCTTCTTCTTGATCGTCGGTGTGGCGGGAGTCGCATCGCCTGATAACATGGTGTATAGTC -AGTGTATACCACCCAAAATATGTCAGGATATTTCCCATACCTGAATCCTTGCGGGGACGT -CCACGGCCGTCCTTCTTCTTGATTTTCGGTGTGACGGGTGTCGCATCGCCTGACATCACA -GTGCATGGTCAGTATATACCACCCAAGGTATGTCAGTATATTCGTCATACCTGAGTCCTT -GCGGGGACGTCCACGGCCTCTCTTGGGACCCGCTGGAGCTACAGTCTCAGATTCTTGCTC -AGAAACAATCTTCCGTGGGCGACCACGACCACGCTTAGGAGCGTCAGGAGCAGGCTTGGG -AGTCGAGCTCTCGGGGTATTTGCGTGGGCGACCACGACCTCTCTTTTCGACTGTCTGAGG -TGTAGCATCAACCTATTACAAGTACGTCAGCTCTGTCCCAAGTTTTGGGGTGGAGAAGCC -AGGGACGCGACGATCAACCCCTGCATTATGCACTTACCACCGAAGCCGATGCTGCAGAAG -CTGGAGTTGCATCTACAGTATTTCGGAGCCTTTTGGAAGGCGTTTCAACAGACTCATTTG -CACTCACAGCGCGTTCAGACTTGCGCTTGGGTGGCATGACTGGTCAACTGCTTTGTGGAT -GAGTGGGAGTCAAAGAAGCAAATATATGCAGGAAATGGTGTAGAGTATTGCGAGATATTA -AAATAAATCTGTACAATCAATGGGGTTGTGCTCCCGTGAGCTGTTGAAGAGACTACCAAG -TTAAGCGTAGCTTCCAGGGCTGGAATGTTTGGCACTGAGCTTGACGCGGATGTGGGTCTG -GGGTCACACGTGATGGGGTCGATCACGATCGGCACTAAAAACATTCCAAGGACACTGCAG -GTAGACGATCTTGAGGGTTTCGAGAACATTTTTATCGCCCCAAAATCTCTTCACTTATTT -CCAGCATTGACCATCATTAATGTTTGAGGCATAATTAGTACACTTGGATCTCCCCGAGTA -CAAAAACCTTGACAACTGTCCCTTTTTCGCGGCTATCAAACCTAAGCTACTTGTTTAGTA -CTCTTTCTTCGACGAAATCAATATGATTAGAATGGCAATGGCCTTGCTTGAAGAAGATTT -TCCATGTTGGAAAGATATCTCAATCCTCGAAGGAAAATATGCTATACGTATTAGGATTGG -CGGGGAGATATATGTAGTCTCCTTATATACTCTTGGTAGGTGACTAGTGTTGAATCATGA -CCGTAAGTAGTCTAATCTCAACCTTGGCAGCTGAAGCTCAGATTTATGTGTGTCTTGCGA -ATTAAAAAAAGACTACACAGAGAGATATAAGTAGATAAAGAAAGTCAAGAATATTTGTTG -GTCAGAGCTACCTCGAAATGTCTCAGGTAATCAAAATAGTGTTTATTAAGCTTATCACAG -ACCCCATTAGCATGGACGAGATTGTACTAGCCTGACCATAATTAGCATCTTGCTTGGTCC -ATCTTTTCAAGCCAAGTCTTTGCTAGTGAAAATACTCTTTGGGATTCTCCAAGGCGGCTT -TGGTTGCCGTTATAACATGAAGGTAGTATTATTGTTGAACATAACTATAGAGATAGAGAG -ATAGAAGCAGTACATAGACTACATCGTACGATGTGTGTGTGGTACATGCAGTAAGACCCA -CTATATACATACATATGTGGGTCAAATGCTCATTAAGTTACCATTCTTTAGAACGACAGA -ATGTTGGATTATGCATTACGGAATATCATGCTAACATCTGCCTGAAGAACCACGTGAAAC -AACATTGGAAGAGTCTCTAGGTCACGTTACTGAAGACATGTGGACGACAAATTAGTATAT -AAGGTTCAGGCCTCTGGAAAACGGATCACTACGTAGTGCGCATACTCTAGCAGGCTGCAT -TGTAACCAATCCAGTCTTCTTACTGAGGAGACATAGAGAGCCATGCCAGGATGTATACCT -TCCTGGAACGAGTCACCGAATGCTGTCATGCTATGCGAGCTTCATGAACCAGTGCGACGT -TAAGTAATATCCTACGAAAACACATTAAACAAAAGAAAAGAGATAGCATGATATTCATTA -GATATGCTCTATCTTTCTGGAATCATCTTCTGAGACTACATTTACTCCTTAAGAGTGGTG -AAGCGAAGGTAACTACAGCATGTTAGTATCCTGTTTCAGATTGGCAGTTGGGCAAGGAAA -CTCACGGCTTCACTTCACGAATCTCGCCGAACTTCTTCTTAGCATCCTCGGTGCTAACAG -TTGGTGGCACAATAACATCGTCGCCCACCTGTGGTCAAGTCAGTATTATAATTCGCATAA -TTTCAGGGTCTGGGCGCTGAGGATGGGGACCATACCAGCCAGTTAACGGGTGTGGTGACA -CCTCTCTTATCACCAGTCTGCAATGAGTCGATCACACGCAGAACCTCGGCGGTGTTACGG -CCAGTGGAAGCAGGGTAGGCCATGATCAGGCGGATCTTCTTGTTAGGGTCGATAATGAAG -ACGGAGCGGATGGTGAAGGCGATGCCCTTGCTGTCGACATTTTCCAAATCGAGTTGATCG -ATCATTTGGTAAAGGTAGGCAACTTTGCGGTCCGCATCAGCAATGATTGGGAACTTGACT -TGAGTGTTGTTGACTTCATTGATGTCCTTGATCCAGTCGCCGTGGGAACTGAGATCGTTG -GCACTCTGTAAGAGAAGGAGAATGTTAGCCAAGGCTCAACGATAGTTTCCCCAGGGAAAG -ATGGTAGGAACTGACCAATCCGATCATCTTCACTCCGCGCTGATCAAACTCATCCTGAAG -GCGAGCGAAGGCTCCCAGCTCGGTGGTGCACACCTAATGGTGGTTAGTTGGTTTATATCG -CGATGAATCGAGATTTGGCATACCGGTGTGAAATCGGCCGGGTGTGAGAAGAGAATAGTC -CAACTACTACCAATAAAGTCGTGGAAGTCAATCTCGCCCTGGGTAGTAAGAGCCTTGAAG -TTGGGGGCAATGGAGCCCAGGCGGAGACTGAGAGTCGGTTAGCGGGAGTAAACAAGAAGG -TGGGGTAAAGCGTACGCAGCGTCAGTCATTTTAGATAGGGAGTTAGGGATATGAAGAGGG -AATTGAGAGATGTGAAAGAGGTTGGGACACAGTTCAAATATGTATGTATCAATGTGATCA -CGCCATCGTATACGGATAGAATTTGGACGATTGGTGCATATGTGGTGGGGTTGGAGATGG -AGGGGTAGCTCCTCCGTACTCGGTATTTGATACAATTCCACCCATGAGAACTCCTTTCTC -ATCCTTTCTTTCTCAGGTTCCCCTTTTTCTCATCTATTGTTGAATTCTATACTCTAACCC -TGATCGGCCAACGCTGATGTTATCTACGTTGTACAACATAGAACAAATGTCGGATGCTGA -TCACGTAGCCAAGCCTTACCCAAATGACGCCATCTATACCATATTATACGAATGTCCTAT -CACGGCAGATCCCACACTTGAACACGTCCATGCACGCTTACAGCGACTACGCAGAACTCT -TAAGCTTGGCCCCACACATGATTATATCATATTCTGCCGGCCAAATTATCCGCTCCAGAT -TTGATAGCTAGTATGGTCTGCTAAATTTTCTCCTGCTGAATAGAAGTTAACCCCGGAATT -TGAGAGCACAATTCAAACGGGTGCTTCGGATATCTTGCGCACTGTATGCCTTATAGGTGA -GCAATTGGTAGGCGGTAGAGCCAAGGTACGGGCGGTGTGATGGGTGGGTCTCGAGGGTGT -CACCTATAACGGTAAGACTTTTGCGTAGTCAGTGGTCACTGTATATCTGCCCATTGTCCA -TAATTCAACAATTCGAAATAATTACAGACGAATGAATGGACTGGATGCTCATTCAAATTA -GCCTCTTTATTAGAAAATAGGCAAACGGGAAGGCATGGTATGTAAAGGGGCTTAGTTAAC -TAGTTGACTAATTAACTGGTTAACATCGCAGATTACCCTTTGGAGGCACTACTCATCAGA -ATATGAAATTCCAAAAATACCAGGAGCTAACAATTTCTGATAACATATTCTCGCAAGTGT -GCACCCCGAGGACGCTACCGGAGAGCCAATAAATGGCCAAAGTCATTAGAAGCCCACCGT -TTCCTACGAGACGTGTGCAGAAATGTTTGAAACCAAGGACGCTACCCACCATCATATAGA -AGACAAGGGACAATTCAAACGCCACTGCATCTGAAACTTGTCTGGGAAGCTTATAAGGCA -CCTTTCTGCGAACGGGCGAGGGTGTGGAATAGAACACCTCAGGATTGGACCCGAGATGGA -CCTGGATTCGAGCTCGCCGTTGAATGCCCTCGACTTCATACAGTGCCACAAATATTGAGA -TGAAAGCAATTTTCGAGACAAAAAAGTCTCAAGACAAAAGCAAAGCCGCAGTAACTGGGA -GTCCTTTAAACAATAACGTGCCTAGTTCACAGCATAATCGATTGCTAGAATAGTTGTTGA -CTCAACGAGAGCGGTGTTAGCTCCTGTCACACGGACTCACACGGTCACCCTACTCCCTAC -TCTGTTAAGACTTGGTCAAACATTTTCTATCCAGATAGACCTCCATGAAGCCCCCCGCCT -GTTCTATCCAGATAGAATGAGATATACGGGCTCCATACTGGAGCAAAGGCGATACCTGAT -TCATAAAGCTCTAGACAATGTACCTAGTTCCGAACATGGTTTATGCCTAGCACACTGGAT -AATGTAAATTATTTCAGGCTCCTCTATATCTTCGTATATCACAATTTAAGACATACCTTG -AATTTGATATCCTCTTTCAAAAAGCTGAAGACAAGGTCCAAGATTTTCTCATTCAAGAAA -TGGATATAATATCCAACCTGATTAGTGGAGGGGCCACATGTATCGAAAGAAAAGCGGTAA -ACCATTTTTTGTGCCGATCTCCAAACTCGACGGCTGACACTTCGGACATGACTGTGCGCT -CATCCAAGATACGAACGGCAGTAGTTTGAACTGTCTAGTTTTTGTTTATCAGTCCGAAGT -TGCGGCGACGCGGGGAACCACCCCCTTTTTCGGCTCCCCACAAACGACCCTTCCAAGCCG -TACCGGACACACCTACGATCAGCGCCGGGAATATTATCCGGGAATACTGCACCTAAAGAC -GCCTCTTCGCATGCGAGCGACACTGGGCGAATTGAAGCAGGAGGTCAACGGCTACCGAGA -CGATCGTCTCTGTTGAAAGCGGACAGGAACTTCGCCCACCATGATGTCGCGAAGTCAGTC -GAGTCTGGGGTATATGGATTCAGGACGAGAGGATTCTGCGCCGGGTGCCTCTCCGTCTCT -GAACCAGCAGTCTACCTCTGGCCCTATCAATCTCTCTGGTCTGGTGTGCAACGTGCGCCG -AACGACCGGCAAGGAACCCCGCCCTCTCGTCGGTGCCACTACGACCATACTCGGGGATAA -GCTATACGTGTTCGGAGGCCGGATCCTCTCGCGCAGCCGTCCTCAACTAACCTCCGACCT -TTACGAATTGGATTTGATTCGACGACATTGGACGAGAATCGAGCCCGCTGGAGATGTTCC -ACCGCCGCGGTACTTCCATAGTGTGTGTGCTCTGGGCGATAACAAGTTGGTTTGTTATGG -AGGCATGTCACCCGCACCGAGTACGCCAAAGGACCCAGAGAATCCCGCAGAGCCGCAACC -GCAGTCACAATCGGAAGTCGTGGTTATGTCGGATATTCACATCTTCGATGTACCATCACG -GGCATGGACTCGTGTTGCTGCCCATGAATCTCCTCAGGGGCGCTATGCACACTGCGCTAC -GATACTGCCCTCGAGTGCATGCTTCACATCTGCTAGTGCACCACTTTCTGCCATCCACAA -TAATCCTGAATCATCCACTCATCAAGGCACGCTTGGCGTGGATATTGACGGCTTTGGAGG -TGCCGAAATGGTGGTGGTCGGAGGCCAGGATAGCTCCAATCACTACATCGAACAAATTAG -TGTTTTCAATCTGCGAAGTCTAAAATGGACCAACACTAGCCCCCTGGGAAGGAGCTGCGG -CGCATATCGCAGTGTTGTAGCTCCTCTGGTGGGAATGGATGTGTCCGAAATTGGATCCTC -GGCCCCAGATCGCGACCAGCACGAGCCGATTCAAGACAGCACCGAATCGCCAGGTTGCCC -TATGCTGATATACTCCAATTACAATTTCCTTGACGTCAAGCTGGAGCTGCAGGTGCGCCT -GCCGGACGGACGCCTTGTCGAGCGACCGATGCAGAGCCAAGCATCCCCCCCGGGACTGCG -TTTCCCAAACGGTGGCATCATCAATGGCCACTTTGTTGTCAGTGGTACTTACCTGACTTC -TTCAAAGCAAGAATATGCGCTGTGGGCGCTCGACCTGAAGACGTTGACTTGGGGCCGTAT -TGATGCCGGAGGATCAGTATTCGGACAGGGTAGTTGGAACCGAGGAGTACTTTGGCCTCG -CCGTAATTCTTTTGTGATTCTTGGACATCGGAAGCGAAGCCTTGTCGATGACTATAACCA -CCGGCGAATCAACTTCTCGCACGTCTGTCTGGTCGAGCTGGAGGCATTTGGACTATACAA -CAATCCTTGTCGAACCGCTCCAACCTCAGGATATAAGTCTTATAGCTCCTCTTCTGTCCC -AGCATCTTTACAAAAGAAGTTAGTGCAGTTGACATCAGGTGGCCGGCCATTGTCTGCGGC -ATCCGATGAACTAGGCAAGCTCGCGCAATCTTTGCCAGAAATGGCCGACATGGAGTTGCA -GGCTGTGGGGGGAGAACGTATCTCCGTCAACTCCCGCGTATTGACTCGACGCTGGGGCCC -TTACTTTATCCAACTCCTCCGTGAGTCGTCGGATGGAGGTGTTGCAGACATCGGAACTCT -CCGTCCGGCCCAGATACACCCGAGCCGCAACTCGAGTATTACCATCACTCCCTCAATAGG -CTCCGATGCCACAACTCTTGTCAATCAGTCTGTTCCTTCTAAGTCTCTGCTTGAGAATCT -TGAGGTTCCGTCTGCTCATAGCCTCGCCCCAACCTCCCGGCCGCGTGTGCTGTACCTCCC -CCATACCATTTTGACTCTGCAGGTGCTGGTTCAATACCTCTACACCTCAGCCCTTCCATC -TACAGGCACCTCGCTTTGCACACCACAAATTCTCTGCTCGCTTCTACAGCTTGCACGCCC -GTATCAGATCGATGGACTACTAGAAGCCACCGTCGAAAGACTCCATCAAGTTCTCGATGG -TCGCAACGCCGCGGCTGTATTCAACGCAGCTGCTATGGCTGCTGGTGGTGGCCGTGGTAC -TGGATTCAATGGTGGGCCAGGGGGTACACTCGAGGCGCTGAACGGAGCTTATACCGGCCA -CAACGGCACTGCACCTACGATTGGCGAGAACAGTAACTCCCAGCACGGTGCACTCGCATC -AGACTCCTCTGACACAGAGCACGGTGCTGCCTCCGCTTCAGCAGCGAGCAGCACAGGCGA -TCTCACCAATTTCGCTCGAGGTATTCCATCTCTCCGCATCGACACCAGTGTCTCCCAACA -TTCGCGTCAGCGCAGCGCCAACCGCGACCGCGAGGATTCCATGAGTAACCCAAGCACAGC -GACATCCGCGTCTAGTGCTAGTTTCAGTCAGTCTGACTCCGAATGGGTCAGTGGCGATGA -AAGCCGCTCGCAATCTCGCTCGCAATCTCGCTCGCAGTCCCGCTCTACCCATCACCGCCG -TGACACAGATGCCGAGTTGCGTCGCCCCCAACGTGAGCTCTGGACCGGTGACTTGAGTAG -TGTAATCGGTCTCCAGAAGCGCGGCTTGCGTGGTCTTATGGAAGGTCGGCGCATGCGCGA -GCGTAGTTCAAAGCCCGCCAGTACTGGCAGTGGCTCGATGTCTATGCCACCGTCGGCTGG -CTCAGGCGAACAGAACGCTCCTTTCCAAGGCGTCGCCACTTGATGGTGGCCTGGAGCTAA -TTTGGCGTGCTGTGACCCCTATACGATCTCTTCATTTTCGCCTTATCTTCTGGTACTTAG -TTCTCTTCGATGATCTCTCCCACATACCGTATTTGGATTATACATATTTTTTTTTCCTTT -GCTTTTCACCCCAGCAGGCTGGCAATGTGGGTTGGATTTAGGAGAGGGCTTCCAAAATTG -GGATGCATCTGGATGTGGTACCTTTCTTCTGTTTATGTTATTTGGGTTGGGTAGGTTATT -CTCGCCTACACACAGCCATGGATATATACTGAGACTCCGGTTTTGGAACTTACAACAAAT -ACAATGAGATGCATTAAACGCGAATTTATAATTTTCTTTTTCCTACAGAGACTTTGGGTA -GAGATGAGAAGAGACAGATTCTTTTGGGGCTTATTTACAGTTGAAAATCCTCTCCAACAC -CATGCAGGTTTATACCGTACATATAATTCCGCTTTCAAGGAACAGAAGAATAAGTTGGAG -CACATCAGTCCTTTTTGACCTGATCTCCAAGCCGCTTGGGACTCTTGCCACCATAGACTA -AGGGAGAGATAGATTAGCTGTGCACCACAAACTTGGATATACAACCGAAGAAAAGATTGA -GATTTCCTGGGCATTGAAAGACTTACCTCGCTCATAAAGAATCCACGACGTGTACATTAG -AATGGGTAGTCCGACAATAGTCGCCGTCCATCTAGAAACAGAAGGGGTTAGGTTTTTCGA -TTTCGTCATGTATCGTTCACCAACCAAAAGCAAAAGCAAAACACTGAAGTACAAATGTAA -ACAGATGATGACACCTACCTGCGTGCAGCAGCTTTATACTCCTTTGTCTGCCGAATATCA -TCCACCCCTTTGGGAAGCCGCGGAGAAGCGGGTTGCCGGACTGGGCGTGGAGGCCGGGTT -TGGGGGTCCACGGATCGGTATCTGTGCTGGTCTTTAGGGGCTGCCATTACTCTTGACTTT -GGTTAGATATATTTGAGCATATATGAATTTGTAGGGGCGGAGTTACATGTTTTGGTCGGT -GTATTGGAGTTTATGAATTCTGTCGGGGAAGATTCGGGAGAGGTGTTTGCGCCTGAGGCA -TCACTATAAATACTACCATCTCCAATGAAGAGTTATAGAATAGACTAAACTACAACCACT -TATTATGAGTAGATAGATATTATACATCAGAATATAATTAAGATCAAAGCCCTTTTCAAG -GTATGAAGCTTAGAGACGTTTTTTTTTTTCGGTTTTCTACCAATGTATAGGTCTCTACCA -CGTCTGTTAAGCTCGTATTACTTACCCTGCAGTTCAGTTGCATGAAAATCGCTCTGTATC -CATAGCATACCCAAGGTATCGAGGCAGTCAGTGTCGCTAACCGACATATGTGGGTTTGAA -TTTCTGAATTTGAGAAGAATAGGCCTATTCGTTTACAATGCCTTCCAAATGGCTAAAATG -CTTAGATTCACGCTTCTTTTCTCAATTGAAAATATGATTGATAAATCATATAGATGGAGG -AAGAAGAGATAGTCATAATGATTACTACTATAAAAGGGAGGTCAATGATTTATGAAGATA -GAGTTGCTGCCGAATTGTACTTATCACCGATCTTCAAGTAGACTGTGTAGTTACAATGTA -TATACAAGGAAGCTTAAATTGGCAAGCATTACTCGGATGGCAGGGCTAGTTGTGAAGTAT -ACTGCTGTTTCTTCAGCATGTAACTCTATATGAAGATTCGACGAATGGAGGTCGGAAAAA -GGTTACCGTATACTTTGATATTAGACATGTATTTAGGACTAGACGGGAGTACCACCACTT -TTTAAGTGAACCTGTAACTTAGAACTTATCTGAACCGCGCCTTTTTCTCGCATGTTTGGA -TGTAATATATAAATCTCAATAAGTACTCTAACTTATCATTGAACTAGTAGAATATTACCT -CATAATGAAGGCTAAAAATGGGGTCTTAGCAAAAAGTAATAAAATAGTCACGTTTCTCAG -GATTTTTGCATTATTTGGTGCTAATTGAGCCCTATTGGGCCCCATCCCAAGCCCAGTGGG -CTTCCAACGCCCGGCCCATAGCCCACCGGTAGGCAGGCATGCTATGCACACAGAGGGCCA -TGGAGGAAACACTTGCCTTTCTATGATATCATGCAAGTTGTAATAGTCGAGGTCTGTACT -TGCATCGCACATGAGTTATTTTCTTTCACTCGCACTGACATATCCACCAAGACTTCCATT -CTGGGAGAGGATCCAAAAGCCCCGAAAAGTAAGCCTGGATACATTGGGGGATGTATACGG -TAGTTGACACCCTCGACGAAAGGAAACGACTGAAACACATCATAGATGGTTACGAGCCAC -CAGAGTTTCAATGCTCATACAATCTACACACAGGAAGGGTCGACTGCCTGGGAAACTGTG -TGTCAATGATTCTAGATCACGAGGAATGTCCATATGAGAGGTTCTACAGTGCGGAGCGGC -GTCTTTCTCTTCTCAATACCCGCCCTCTGATGAAATCTGTCTTCTCTAACCCCGACCTCG -CTGCGTTGAATGACTTTCTCAAAAAAGAAGATATAGTTCACAGCCATTCGTGAGTAAATT -GTAATGGTTATGGCCAAGTACTCATAGAAAACTAATCCATAAATGATAGAAATCTCCTGA -GGAAGTTGAACGAATGGCATTGCTCAGAATTAAGAGAGCTCCGATTCTGTGGAATATTCA -TCAGCGAAGGCTGGAGGACTTAGACACAGCCCATGATCCTGTTCTTGACAGTGACTATAC -TGCTAATCATAGTCATTGCTGCTAAGTTGATATTCTACGATTGGGGCACAGCATGGAATA -TCGGTTGCTTCTTCGCCACATTGACAACAATCCCATGGGTATGGCCCACGTGGCCGCTTG -AGAAACCCTGTGCAGGCAGACTTAAAGTCGTTGTAGGAAGGCTCTGGTTAGTGACAGCAA -GGCGTTCTGTTGCTTGAAGGTAAACTAACATGGATCAATGGTGGTTTCGGGCACAGGTCT -AGAAACAAATCGTGAGGGTGCATAGGAAAATCCGCTCTCGCCACGCTTTTTCCATATCAA -TGGGCCTCGAAGTATCTTAGGACGGTATCATCAAAATACAGATCTCCAAGAATACAGTGT -CGGAGATGTCCAAGAAAAGGACCGGTTGCTACGCCGGATTATTTGAAATTTACCTTAGGG -TTTGCCATTCCTAGCCCTCGTTGATATAGATACTATTCTTTTTAATTTCAATATTACGAT -TAAGAATAATATATTGACTTGTCTCCTTAAAAGCTAATTATCGACCCGATATAGACTCTA -GCTTTACCTATCTAGGGTCGTAGAGTCCGGCTCATCTATATCCTCTGGTTCACCCTCCTA -AGAGATTGTGGGACCGCAAAATCTATACTTCGCCATGAAAGACAATTGGGATATTCAAAG -TTTCATTATTGACACTTTCAAAAGCAAGCAGAATTATGCTTCTTTGATTATTGATGCATG -TGTAACGATAATTTAGGCATGTTAATATCTTTTGTCACTGTGCAAAAAATACGACATTCA -GACATCAACACCATGTAAAAAGGCGTTTTGAGCCCTGGCCCCTATACGTCTTTGTACAAT -GTGCCACAACCTACAAATGGCTCGTAAGACAAACCAGGATACTTTATACCAGATCGATCT -TAGCGTGATGAAATCTTGATCTTAGTCTGGTTTCAAGTCTAGTTGACTAGATTTGAGCTA -TCAAACTGATGCGACCCTTTACAACTAAGATCACTATACGGAGTAGCTGCAATTCACTTC -TTAACTAAAGTTTGTACATCATACCTATCACAATTCCTCATCAGTAGGGAATGCCATGTT -TATTACACAAAGTAACCATATCTGAGATGTAAGTCAGGAGTACGCGGAAAATGAGTGAGA -AATGAGGATTACGGAAAGGGCAAATGACCACGAAGTGAAGAAATATGAGTGGACCACAAA -GACAACTGAAAAATTTCACACTTCACTATGAAAACTCCATGGAATGCTCCCGCGGGGCTA -ATCATACACGCTTGGAACGCCCGCTGGTAATCATACAAAAAAGAAATAGCAACACAAGAG -AGAGTGAATTCTGATATCGATCGCCCGGCTTACGCCAGGCGGAAAACACCCTCATCCTGC -CCTCATTGGGGCCTCACACGCCCCGGCGGCGGCGGCGCTTGTTGGTAGATGCGGGGGACG -GTGCACGTGAGGGGGCGGGGGAGGAAGGGGGCTGCACGGCGGGGGCTTGCTTGGAAGCAG -AGGCCTCATTCCCGGGGGTCTCAATCTTGGACTCCGGCTTCAAGGTCAACCGGAGCCGCT -GCACCTTAGGTTTATCGCCTTCGGTAGCAGCGGGGGTGACGGCGGCAGCCGGGCCCGAGG -ACTGGACCTTTGTCGCAGTAGTCGGAGAAGAAGTAGGAGAGGTAGGGGGAAGAACAGTAG -CAGCAGGAGCGGTCTGGCGACGGCGACGTTGTGACTTACGTCCACCGGTCCCGGTCCCGG -TCTCAGTAGTGGTGACGGGGGCAGCTGCAGCGGGCTCATCGGCGGCATCTGCGGCCGCGG -TCTGACGGCGCTTTCGGGCACGGGGCTTCTTGGTTGCAGGGGCTGGCGCAGGGGTTCGAG -CAAGAGCCCTGGCCTTCTCCTCTTCGACGTTTTCAGTGACAAAGGGGGAATTCGTGTCGG -ATGGGAGATTACGGTCTCCCATGAGTGGAGCGAAGCTGTGAGAGATATCGGCCTCGGCCT -GCTGGCTGTTTCTGGCGCGCACGGGCGGGAAAGGAAGACCCTCGGAGCCGTAAGTATGAG -CTGGATAATAGGGGTGGTATTCGTCGGGAATCAGAGCAACTTCTTCACCGCTGGAGTTGA -CATAGCCAGTCTGAACCCAGCCATCGCTGCTGACCTTGGTGTCAGACTGCTTTTTGTAGC -TCAATATGCTTGAGCCGTCGATGACGGTGGTGGGTGGAGTAGGAGGAGAAGGAGCAATTT -CAGGCGCTGGGGGGTCTTGGTTGatcatatcgatatcaccatcaacatcaatatcaCTCG -GCGAGTTGAGCTTGGTGGCGGACATGTCGGTGCCATATTTCTCTTCATCTTGGAGACGGA -AGTATTCACTTCGCTGCTGCTCCCAGAGCACGGAGAAATTCAAAGGGAGGGTGAACTCAG -GGCTGAGAGCCTGAACAGTCAAGTCAAATAGCCGTTTGCCAGCCACTTTCGGGTCAATAT -CGAACCCGATTAAAACAGCATTCTTTCGCATGTTCAAATTGGACATTCTTTTTGACTTTC -TGGTTGTCTTCATAGGCGTAGGCTTTTTCGACACATTGTTCTTCGCAACCTTGGTGGCCA -CGGGGGGAGGAGTTTCCACGGTCGGGGTGGATATAATCGTATTATTGCGGCGAGGTTTCT -TCTTACATGATTCGAAAGATTCAATGGCCCTCATGGTGGGTTTGCGAACCCGGGATGAGT -TCCGCATGTCGGAAGCACTAGTAGGGCTTGAGGGACCGTCAACTGTTCCTTCAGATTCAA -AGCTACCCATGTACTGGTCAACCGTCCAGCTAGCGTTGCCAGAGCTATCTGGAAAGTCGA -TATCCGAGACTGCTGGCTTCGGCGTTTCAAGCGCAGCTGTTCGTCTTGTCCGTCGACTCC -CCGGGGTGGTGGTCGGGGCTTCTAGGGCTGGAGTGTGCACAGTTGAAAGACGTGACGGAC -GAGATTGAAAGGCTCGCCGGCGTGAAGATGGTGTTTCAGGAGATGTACCACATGAACCAG -GACTTGGAACAGGTGGGGTTGGAGTGTAAAAATGAATATTGATAGAATTGTCTGGTGGAG -AGGCGGATGTAAGGTTCAACGTTTTGGCGGGCTGAACGCTCTCATTTGTACTTTTTGACG -GGGGAGGTACTCGGGGCGAGCGCCGAGAAGGTGTCAATGTAGGCGCTTTATTTATGCGCG -CAGGCATCGACTTACGCTGACCCTTCTTGGGAGGCATCGTGAATTAAAGCTAAATGGCCC -GGAATTCGTCAGTATAATCTATTCATGCAAATACATAGGCGGGAAATAGAGCCACTGAAG -CGTACAAATGAAAAACTAGATAATACCTCCAGTTACAGGCAACTAGGGTGCGGGAATAAG -AATATATGGGGATCATCAGATGAAGGGCAGGGTCGACGTCAGAAGACCGCAGTGCACGGA -AAGGGAGAAGTTGGGAATTAGGGCCCGACGTATAGACGCAGAACTCACGATATTAGAGGT -ACAACCATCCGCAGTTAATTTCTAAAGGTTTCATAAACAAGCTTGTAAGGACTAGGCGGG -CGTTGGAGCTGTATTTGTGGCAGATAAAGCGAAACAACGTGATGAAAGATGTTGAGGTCG -AGCAGAAAAGAAATTAGGGCTACGGTTGATCGGCGCTTATCGGCTTCACTTGTCCGCCTT -ATTCTGCAACTCCTCTATCCTCACATCATCTCCCCGAGCTCTCTACAGCTGTTCTCTGTT -CTGTTTTCTTTTGGTTCTTTTTATCACAATGCATATGTGGCAGGCCACCGTTCCATTTTG -CTTACTGGCCGCGGCTGCCTTGCCCGGCGCCGCCGCCACAGCTTGGGGCTTTACTGATGC -CACCGTGGCCGTTCAACCAAAAGGTGCTGGTATCAATGGAGGTTTCAAGGAACAGTATGT -ATTTTGACCCAGATAGAGGTACATGAGCCTAATTAACATATTAATATAGATTTCTTGCAT -CCAAGCCGCTCTCAAAGCCCATCTCCCTTGTCGGTACCGATACCCTACGCGTCACCTTGA -CAGCTCAGGAAGACAGCTCTCCAAAGCGCCCAGACCAGGCTTTCCTCTTGCTCAAGGATA -CTCAGACTGGATTGGATATCTCCTACCCTTTCACTGTCAAGGATAATGGCAAATCAAGGC -TGGAATTGGTATGCAAACCCTCAAAACGAATTTCCGCGGCGCAGCTAACACATGCCTCCA -GACCCAGAAAGACCTGCCAATTCAATTCCTTTCGCTATCTGATCCAGTTGATGCGCATGT -TGTCATTGGTGGATTTGGAAGCTCCGATTCCTACGACAGCTCTGCGTTCAAGTTGTCGAT -TGATCGCAATCCCGAAGAGGCCGTGCCGACCGTTGAAACTGAGCGATACGGCAAGAAGCC -CGAGATCCACCATATCTTCAAGGAATCTGCATCTAGCCCCCCAATTGCCATCACCCTGGC -ATTTGTCGCCATGGTTGGCGCTGCTATCCCCGCTCTTGCTGGCTTGGTAAGTTGATCTGA -TTACTGTGCCAACAACGTTGCTTATAGATTGTCTCTAGTGGCTCTTCCTCGGCGCCAACA -TCAACCATCTACCCACCGCATTCAAGTCTGCCCCTCTTCCACACGCTGTATTTCTCGGAT -CCTTGTTCGCATTTGAGGGGATCTTCTTCCTCTACTATACCTCGTGGAACTTGTTCCAGG -TCCTCCCCGCGATGGCTGCTGTCGGTGTAATCGCATTTGTCAGTGGAAGCCGTGCGCTGG -GTGAAGTACAGGGTCGCCGTCTTGCTGGTCTTCGGTAAACCTCTGGAGCATACATAAATT -CATCTACCGTGGTCCTGTTGGTCCTCTCAGGGTGCTCCTAAACTACTGTCTGGTCTTAGG -TCCCAGTGGTATTGTATCTTAGCTGCGGTTATACCCGGTATCTTGCAAAATTACAGATTT -CCCACTGAAACATCACGAAAATCTCGCTGATTTTCGAAAGTAATCGTTTCCCTTTAAAAT -GTCCAACGGTTGGGTTGGAGGTCCCCAAATCCGCATTCGTGCCAACATACGTTCCCTGTC -AAAGCTAGTCAGCTTGACATATTTATATATTTTGGACCAGTACTCAGTTGCTCCGCCTCA -CACAGTCTCTCTGTCTTCGTATTCTCAAGGAAGCTGTTGACTACATGCATGTATGTTCCT -TTGACGTCACAAGACAGGACAGGCCTTCACGGCACAAACGGTGATATACATATCTGGATA -TCTTACCGCTCCCTTTACCGCCTACTCAGGATGGTACAATCCTGCTATGGAGATCAGGCT -ATATACTCGACCGACACTGTATGAAAAGAGGGCTTCTTTAGTATAATTGATCATTTATAC -CGTTTTGCATGATCTCGACACTTTGAAAAGGCATTAGCCCATATAGCTAATATACTCCAG -CTTAGAAATCAGCCAATAGTTTTCGACTGTTCTGCTGGCAAAATACCGAAATGTCCGCGG -GTCAGGCCGTACGATAAGATCGAACGCCAAGGCGATTTCGGGACCTCCGCTCAAAGCTCC -GCAAAAAGCTGCAACGTCTTCCACAAACACATTCGATTCCACAGCTCATCATTTCCAATG -GGTTCAGCTGCTTCAAAGCCTGTTAAATCAGCGGCTGGTGCAGCCTCACGGCGCCAATAC -CCCAAGCAGCCGGCTCCTCCACCAAGGGGCCCGCGCAAAGCGCCGAAAGAAAGCAAAACA -GCGTCCGCACCTGCGCCTACTCCTGCAACCAAGCCCAAGGCTAGTCCCTCGCCTCCTCGA -GCACCCGCACCTTCCTCTCAAGGTCCAAAATACCACTCAAAGGAGAAAGCGTCCGGCGTG -AAGTCTGATGGTACGTACACTCGCCGCCGAATCTTTCAAGTCTTGACCATGCAACAACTG -CAAGATCAGCAAAGAGGCATTTATTAATGAGTCTATAGCAATTGATATGGACGGCCGGGA -TCCCGATTTCGCCGCCTCCCTCCGGTCAATAGGCCCCGTCGACCCATCACCTACATTCTC -GAACTCAAGCACCGTCAACCGCCCCACCTCAACGCAGACTGTCTTTCCACAAGCCTCTAA -CCCGGCATTGCTAGTTGTCACTGCTCGCCAACGTCTCACCGAGGCCGCAGAAAGAGAAGC -TGAGCACTTCGGTCGTGCGGGTCACCCGGGGAGATCGTTCCTGGATGCCCTGACTATCCA -GCAGGTGTTGACTATGCGGGATAAGCAGGGTATGCGCCGTGGAGATATTGAACGATTCCT -TGGGCTGAAGAAAGGGGTCCTGGAGCGATTAGGGAAGGACGAAGTTGTGTCGCGTATCAC -ATGATCTTCTTCCTCTACCTAGGTGAGCTGTTTTTGTTTAGCGGTCTTTCCCCCAAAATA -TCATTCGATATCTTTGGCGCATTCTGGATTATTGTTTGGCGTCATTCTTCCTGGCTTTAT -GTCTACATTGTATAGTAACGTGGTTGAGTCAACCCCACTCATGATAATATCGATTTATTT -GCTTACAGTAGCTATAAATGTCTTCTTTGACACTTTTCAACTTCTGGCTCAACGGACCGA -TAATTACAAATCATCCGAGTGACCCCAGGACCACTTGTCAAAATGGTCTACATTTTCAAT -ATCAAGTATCGGTAATATGGAGGGCCGTGATATTCGAGCGATACGAGGTCAAGTCGATGG -AAATTTGACGTCGACCACAAGCTTGTTGCAGAGCAAGTGAATCCAAGAGTCCCAGGTCAG -ATTATTTTGGATTTTAATCCCCATAACCCTTTTCACCAATTCCCCCCAAATTCTTACTCC -TTGATCAAGCACGCTGTCACAACTTTGGATAGCAGAAACGGAGCTGTCAATGGTTACTCA -AGCATGGCTCGGACAGATGTGACATGGACGACTGAATATTACCAACTTTCCTTGGTATCG -TGAGCTATACAAGCTCTGCCGGTCGCTGTTTCTTTCCAATGGCAAGGATGAGGGTGATAT -TGACTACATGAAATGGGGTCGGGTACCGGAATATCGGAAGTAGAGAGGCTCACTGCGAGA -CCAAGTGACCGAGCTAGGTATGGCGGGCCCTCAGAAACTCCCCTAATACTACTCTACATC -AAGATATTCCATCAAAGGGAGACTATAGCAAAGTGAGATCGCTGTAACACTTCCCACGCC -CCAAGCCCCAAGCCCGAAAGTAGGCCAGTTGAATATTGAGATTCAAAACACGGACATCGA -TATTGGTAAGAAGGTGGATATAGGATCATCTTTGTGTTGAAACGCCGGTATTTTCTCGTT -ACTAGACATGCAATGTTCCACGACGAAGAAGGGTCCCCCGTTACACAATTCATGCCGCCT -TAGTTTCATGATGTGGGTGCATCAGCTTCACCATGGGAATAGCCGTTCTCGCGACCGGGA -TTCGCGACTTGAATGCTTGTTCAATCTCTGCCAAAGTCTTGCCTTTCGTTTCGACCATGT -AAAAGAAACTGACGACAGTTGCGACCGCAGTGCAGCCGCCGAACAGAAAATAGGCACTGG -GCACGCTTTTATCAAGCAGAATCGGGCATACAAAGGCGACAAAGAAGTTACAAACCCAAT -TAGCTCCTGTCAGACAAGTTAGTCAAGCAATGCATTGTCAGGTGTCAAAACAAACCATAT -GCGAGACTAGTTGCCTGGGCGCGTGTGTGCTGAGGCTGGATCTCTGGCGCCCACACCTTG -ATGCCGATGCCCCAGGTCATGGCCTGCACCATGCAATAGAGATATATAGATACAATCACC -ACCCAACGACCAATACCCTTCGTGGGAAGGACCGCATTGCCAGCATAGAGGCTTCCAATC -AAGATTAGGAGCACTGTCAAGCCCACTCCGCCCACCAGTGTGCTAGTGCGGCGACCCCAT -CTATCGGCTAGCAGTGTCGCAGGGATACTAGTTACGACTATCAGGATGGCAGACACACCA -GATGCCTGCACCAGGAATGAGATCAGCTTTGTATTGCCGCTTGGGCGGGGGGAGAAGACA -AAGAACCAAGCACGAACCAAGAATGATGCTTCTTGAGATTCCAGGCCGGCCTGCTGGAAA -AGTAGCGGGGCGTACTGGAAAAGGTCAGATAATCCAAGGGCAAACCATTGAGTATAGGAG -CGTACATAGAGTACAGCATCGATGCCACAGAGCTGGAGGAATCCCATCAAAAGCACTGCC -AGAGATGTCTGCTTCCGCACGTCTCGGGCCCATAAACTCCGAAAGTTGACAGTCTTTTTA -CTTTCTTCCTCAGGTGAACTTGATTGCAGTGAAATTGCTGTAGGGTTCTCGGGTGTCGAT -GTACGGCCATCGCTTTCACGGTCTTCCTGTTTGACTTCTAGCTCGTCCCATACCTTCTCA -GCTGCACGATAATCTCGATGTCTCATCAGCCATCGCGGAGACTCCAGCAGAAGGAATAGA -TTGCTTAGCAGGTACAATGTAGCCATCGCAGAGATGATGATGAAGGGGAGTCGCCATGAA -AGTGAACCTGGTATCTTCACTGTACCATAGCTGATGAAATATCCCACTACCAGCGCCACG -CATGTCATAAACTGCGGTCCTGAGGCGAGTGGCCCTCGGGCTTTTGGGGGTGATATTTCG -CAGATGTAAACGGTCTGCGTGCTGAAGTAGAGGCCATAGCCAATACCCTCAATTACGCGT -CCTACGGCGAACATCGAAAGACTTACTGCAGCAGCCTCCAAGACGGTTCCCACGATAAAA -ACCGCTGCCCCAAATGCCATTGCTTTCGGGCGGCCTAGTTTGTCCGCCAGTGTGCCAGCA -AATATAGACCCAAGGGCAGCGCAAAGAAGAATGCAGGACACAAGAATACCATGGATAGCA -GAAGAGTGCTTCCCGAACTGCTCGATGAAGCTGTCCATGATTGTGACAGGTCCGATCACG -CCAGTGTCCATTCTATCATAACGGCCAGTCAGGATGTTATAGAGCTTGTATTGATGAGAT -TGCCAGGCAGGCTTACCCCAGTAAGAAGCCCCCAAAGGAGCAGAGGACACTAGCAGTTGT -GTACTTCGCAGGCCGAAAGCGAGCCCATGGCGGGAATACAACGCCGAGTTTCATTGTTTG -TGCTTGCTGAACGTCTTTTAATTTGACATAGAGATGCTGTCTACCTTGGAAAGCCCTAGA -TATTGAGCGCCTTCGCAACCACAACAAGTTTTGGTGCTCAATGAGAGGGAATGTAGTCCC -GTTTATATATTGATATGCCTTGGGCTCACTTGAAATAAAGGAGGCAGGAAGATTTAGGTA -GTAGATAGGAGAGGAAAGAGGGGTATATTGGGTGTATGCAGATGATCAAGTGAAGACTGA -CATTTCGCTGACTCCTACTCTTCCGCCAATGAAGAGTGTTGTTCGCAGCAAGGATGCCTC -CCAGTTTGCTCTCCATGACGATTGCCAAAGATTTACCAAACAGCCATAGGGGAAGAAATC -TTACCAGAGGTAGTCTAACGCGGGTCTCTTGTCCTATTCAGAAGCGAAATGTACTATATT -TCGACTGAAGACATCTGAGTTGATGAGTGTCTCAATCTATTCTCGCTTACTGGACTCGCA -TTATTTCATGGATGGAGGGTAGCTAGAGTCAGAAACCGTCGTACTCCAGCCTCTAGTTCT -CTGATCACCCACCAGTATTTATGGTCATACACCAACATGACAGATGAGTATAACTCAATC -TAGTAGGTTCCTCACGGGCCTCTAATGTCCCAAAATGGCAAAATTACGATGGCGCTTCGA -AGCTTTGAATGGCAGCTTTACGGCAAACAAGGATACACAATAGCAGCAAGAGTCCAAGAC -ACAGAATTAAGCGTCATGGCAGGGAGTAGGTACAATATTGATCACTGAAATTGAATTTTT -TAGCAATAATCTAGGCCATTTACGGGTTACTAGGACGGGAATCTCTAACTTAGCCGCTCA -ACTACTGATCGTATCCCCGCCTGATTGAAATAAGCCTATATATATAGACACTACGTATGT -TGAAGGGATTGTGTAAATGGAACTATACTGTATATTGTACGGAGTTTTAGAATGGGTTTC -TGGCAATGATATTGAGACTGTTGAAGGTAAAGTGAGTAAGTTATTCTCCATTCCATCTTC -GGGATGGAATTTCCCAGGTCATTTCCAACGACAGAACGGGTAATAATTTTGAGATCGAAT -CTCCAAGCTGATATGCAATTTAAAATACTTTTATAGGAAAAATGGTGATGCTTCGGCACC -GACTACCAATCACATAACAAAGATACGCAGTGACTCAGTGGGATACGATCTAAATTACCC -CATATAATGATAAACTACATTATATATATGCCTACCTCTCTCTTTGTTACGCACTCTGAA -AACAATTGAACACATTCGCTCCTCCATCCTTTCTGAAGAGTCACGGCTACAATGTATCTC -CGCGCCGTCCATGCCGAGGGACAACTCCCCGTCCTACAGCAATTCGTCCGCGACAACCCA -CTCGGAATTCTAACAACGGCGATCAAATCCCCAATCCACTCATTCATCCAATCAAGCCAC -ATACCATTCGTCCTTGATGTTCCTCCCTCGACTAAGGACGATGACACAATCCCGACCGGC -ATCTTGCGCGGCCACATGGCCAAGCAGAACCCACAAGCAAAGGCACTGATGGAAGCTCTT -GCTGAGCAGAATGCAGCAGGGAATAATAGACTTGAGCTCACCGACGAAGTTCTTATTCTC -TTCAACGGCCCGCATCACCACTATGTGACGCCCAAATTCTACACAGAAACTAAACCGGTT -TCTGGCAAGGTTGTCCCGACGTGGAACTATTCCGCCGTGCAGGCGTATGGCAAGATCAGT -GTATACTGCAACTCCAAGGCAGAGGAAACGGGTGCGTTTTTGCAGAAACAGATTGAAGAT -CTGTCGCGACAGTCTGAGACGGGTATCATGGGGTATACTGGGGGTGATAAGAAGAGCGCG -TGGAATGTCTCTGAGGCACCAGTCAATTATGTGGAGCTTTTGAAGAAGAATATCATTGGC -ATTGAGATTCGTATTGAGCGGTTGCAGGGGAAATTCAAGATGAGTCAGGAGATGGGCGAA -GGGGATCGGGAGGGGGTTGTGAAGGGGTTTGAGGAATTAGGGTCGGAGGTTGGGAGAGGA -ATCGCTGATGGTGTTAGGGAACGTGCTGCCTTGAAGAATCAGCAGAAGTCATAGTGATTT -ACCAGAGAGCATTTGTATCTAATCTGGCGGTTGCCGTTGCATATAGACCTGATGTGATCA -TGAGAGATGAAAAGCATACACAGGTGATATAGAACATTTTTGGAATTGTCCGTGTTTAAT -TATGTCCAAGCTTTCAAGACGGAGACTACATAAGTACATGTGCACGGTTCTCATACGTTT -CACTCTGCATTTTTGAGTTCTCTACTCATCACCTTGATCATACTTTGTGTTACTTGGGTT -CCTCCAGTCGCATTTTGGCAACCAAAGCCTAGGAATCATGCCTACGTCCAACATGCCGAG -AGGCTCTGACTGTTTCCTAAATGAGAAGCGAACAATGTAGGCCTTCATTTGTTTGTAAGG -CTTGGGGGGCATGCGTCAAGCCTCACCACCAAATCAATACGATGGCTCGGATTTTGCTGA -AAAGATCCCACTTGACCTGTATATCCATATTGCCATAGTGGTCACATGAAAGATACAGGC -CTGACGTACAGGTCTTTCTCCAAGCCTCCTGCGATATGTATCGCCAAGCATTTCTAGGCC -AAGACCTGCAGAATTAACCTCGAACACCCCTGGACTGGATTATGGCCGAATGCGTTGCGT -ATAAATATATTGCCCGGCAGAGTGTGTTTTATCCAACTCGGTACCTCCCCACAATTAGAT -CTCAGAGCCACGGGATCTTCAAGATCGGGCCATCATGTATGAGAGAGCAATGCCTCTAAG -TGGAAGTGGGGGCGAGGAAACGGGGAATTGACGCAAATCACTTTTCCCCGGATTTTACCG -TTAGAAAGGGACCTCGGGAGGGCAAACATGTTACCCCTGAACCCTGGGACCATGGAATCT -TGTAGCCTAAGATTAGAATCGTTCCGGGCGCTTCCCGAGGTCTCTGGTTTTTCAATTGGG -TTCCCGTTTTTTCAATTGTATTTTACTGTATTTCCGGTTTCACCGAAGGAGCGAATCATT -TCGGAATAGTGCAATGCAGGGTCCACTTAGTCATTAACGCATAACCTATACAGCCGCTTG -TCTCTTCCTAGGTCCACTTTTATTCAAAACAGGTTGGGATGATGCTATTTTAATATGGAG -AAAAGAGGGACTAACGTAATAATAAGTGTGCGGAAAATGAAAAAAGGGCGAAAAAAGGGC -AAGAAAAGAAAAGAAAAAGGGAAAAACAAGCATTTGGGGTAATTCGGAGCCGCTTAGTCT -TAGTCGCGTGCACAATTTCCTATCGCCTAATTGGTCTTGTTCTGTTATTCTATCTCTAAG -AATAACCCTTCCCTTTCCTCCCAAGCTCCCTTCTCTCTCTCTTCATGTCACCTTGTCTCT -CGTTTACCATTTCGACTTTGATTCCGTATTCTCCATTGCCAACTGCGCTCCAGTACAAAT -GAAATGATTCCAACATGTCTATCTTCGCCCTCTTACCACCTGTGCCGCCCTTCCTTTTCT -CTGTACTCGTCGTGAGTGGCCTCGCCTCCAAGGCTTTACACATTGCGCTACATTTTCGGT -CCTTACCATTTCTTTACTTCGTCCTCTACTCGCCCACCCTTATTCTACCAGATGTGTTCG -TCATCTTTCTCGTGCGAGTCTTGCTACGCTTCCATGCGCCGGATAGTCGATGGCAATGGC -TCTCGACTGTGTTGGGAGGGGCACTGTCGTATGTTTACCCCGAATACCCAACTACCGAAA -GGGAGCACAAGGGGAAAGTTGCGCAACATATTCTAACCCGAGTACTAGCTTGATCACATG -GGGTGCTTCCTCCATCCAGTTCGGCTTCTTCATGCAAACCGGTGCCGAGGTCGCCTGGGC -AGCTGGTGGTAGCTTCCTGAGCGATCCGGCGGCCATGAAAATTCTCCTCAGCGGCATCTC -AACTGTGACCGCTGCGGCCACCGTCCTCGGACTGGTCGCCTGGTTGCTTCATTCGCATTT -GTACAACATCACCGGTCTCGGCTTGCAGGCAATCCGAGACCTGTCCGCGGGGGGATACAA -AGCGCGGTATACATTGATAACCGCCCCTAGCAAATCGTGCATGGCCTCAATCGACCTCCG -GTCCCTCCGTCGCATTATTCCTGCAGTCGCCATTTGTGTCTCTCTCATCTTCTTGGAACT -CACCAGGCCTGCTGTACCATATGACCATCTGTCCGGAGCTCTGCCACTGACACTGCTGGA -TGCTTTTCAGAAAAAGACGGTGACGGCAGAGGGATGTCGCGAGCCCGCCATGCCATTCCC -TTTGTTGATCACTGATGAGAATGGGGTGTCCCGTCCTCAGTTCCAACCGGACTCGATGTT -TGTGGAACAATGGTCACGAGCTACCTGGCTTCCCGAAAATCCACCCCGAGGATTCAGCCG -CTGGGATCTCAGCCCGAGCGATCGTGCAGAGAAGGATAATCATTATTCTTTTGTTTGCCC -TGGGGATGAGGGATTCTACGATCCGAAAAACGACCCGCTAATGGTCACGAATCTGGGCGG -TGAAATTTATGGGCCCCTGCAAAAGGCCTTCAAAGAGCACTCAGTTGAGGTCAACCATAT -AGTGCTATTAACGCTGGAAAGTGGCCGCAAGGAACTTTTCCCCACTCAACAGGGCACCCC -CTTGTTCGACGGTCTTCTCGCCTCGCATAAGAAGCAGAACAACGGCAAGGCAATCGATCG -TCTGGTCAACATGACCCTGGTTGCTCAACAGTTGACCGGCGAGTACGCAACCGATAGCAA -GGGGAACAAAATCGACTTGAGTCACTCCCCGTGGCAGAATCCACCACAAGAAGGTATGGG -TGGGCTGAATGTCCGTGGCGCCATGACAGGTAGCTCTTTGACTTTTAAGAGTATTTTGGG -GAGTCACTGCGGTGTGCACCCTTTGCCAGTGGACCTGCTGGAGGAGTCCCTGCTCGAAAT -TTATCAGCCATGCTTGCCACAGATCTTTGATTTGTTCAATCAAGGCAAGTCGGGATCGAC -GCAGCGCCAGCAAAACTCTTCTTCTCAGGGCAAATACACGACTCTGAAGAATCCCTGGAA -GTCTGTCTTCATGCAGTCAATTACGGATGACTATGATCGTCAGAACATCATGAATGAAAA -TATGGGCTTCAAGCACAAGATCGTTAAGAGCGATTTGGAGTCCCCCCACGCGAAATACAA -AGTCCAGGGCGAAGAGATCAATTACTTCGGTTATGCGGAGACTGAGCTCCGACCCTACGT -CTTAGATTTGTTCGAGGAAGCTGCAAACAACAAGACACGCATGTTCCTCTCACATGTGAC -CAGCACGACCCACCACCCCTGGAGCACTCCAGACGATTTCAAGAAAGAACCATACATGGG -CGACCAGGGAAACATCAACCACGACCTGATGAACAACTATCTCAACGCCGCCCGCTTTGT -GGATAATTGGCTTGGAGATATCATGAACATGCTGGATCAAACGGGAATTGCCAACGAGAC -TCTAGTCGTTATCGTCGGAGACCATGGCCAAGCCTTTGGCGAAGATAACAAGGAAATGAC -CGGCACATATGAGAATGGTCACATCAGCAACTTCCGTGTACCTTTGGTCTTCCACCACCC -GCACATGCCTCGAGTGGACATCACCGCCAACGCAACCGCCCTCTCTATTGTCCCCACCAT -CCTAGATCTTCTCGTCCAGTCCAACTCCCTGGACGAGCGCGACTCCGAGATCGCCTCTTC -ACTCCTCCCAGAGTATCAAGGCCAGTCACTCATACGTCCCTTCCTCTCTTCTATGGAGGA -ACCACACCATCGCCGTGACTCCCATCCTCCACCGGATGAGCAGCCCGAGCACAAGCGAGA -AAACACCCAGCCCGTATCCCAATCCAACACTGAGGGCCCAGCCAAGCGTCTCGTTTGGAA -CATAGGATTGATCAACGCCGGTGGTTCGATGCTCTCGGTCATGGCAGCCGATATCCCCTA -CCGTCTCGTCCTTCCCCTCAAGGAAGGCTTCGAATACACATTCACCCACCTAGGCGAAGA -CCCTGGTGAAACCCAGCCCATCAAGGCATGGACTCTTGCGGAACTTACCCAAGACGTTAT -GCCTAAATTTGGCAAAGAGTCTGTTGAGTGGCTCGCGGACGCTGAGCTAGTTGCCAAGTG -GTGGGTGAGTGAACAAAAACGAATCTGGGGTTACCGTGAAGCATAGTCATACGCTAGCTC -CAGGGCCTCACCAAAAACAAAACAACTCAAAAACTAAACATGCGCCCGACTTTGGTGATT -GGTCTGGTCTATTTCACATTGCATTACTCCCCTTCTTGTTTCGGTTTTGTGCCCCTTTTA -CATTTCATGATATCTACTACATTTGCCTTGGAATCGGCGTTAATATGGAGTTATTTGATT -TCCACTTATGTGTGAATGCTTGAAATCCCTCATCTCCTTGAAGACATTATTCTACTGTGG -CTAGACAGCCCTTTTTCTTTATGCTTTCCCTTCTCGCGTGAATTTTCACATGATGTGATC -TCACTTTCGTGTTTTTCTTTTCTTTTCTCCCTTGATGTTACTCAGAACTGAATTTCGGTA -ACTGTACTTGTATGGAAACAGGAAGAAGAGGACATTGTACATAGCACCGAAGGACCCTTG -GGTATTTTGGGGTGACATCATGAACGGTTGAGGATCAGCCAGGTTTTTTTCTTAACCCAC -TATGTATAATTTACATAATGCATTCGACTTCACGTCTGTTTCCCTGAAACGAAGTTATAG -AGTCGAGTCAGTCGACATTAGGAATATAAGACCAGAGCGGTCTGGTAAGTAGGTATGATG -TTCTGGATTTTTTTCCTCCTCGATTATGATCATCGCAATCTACCTCGTACGGATGCAAAT -GCTTATGTATCCTTGATCTAGGCTTAACTTACCCGTGGACATTTAGGGCCCTGCAGGGCC -CGCTTGATCCACCGACTTTCCCTCGGGTACTCCGTACAATGTTTCACATCGCATATGGAT -GAGACTTGGCGATATCTCATGGGATATCCTGAAAGGCCAAGGTCAAGCTTTATTCTCAAT -TTAGGGCTAAGCTTCGCGCGTTGAAAATTTTAATTCAAGTGTACCACGATTCCCAGATTT -TACATGAGTTGTGGTTTCCATCCCTCAAGACTGCTAACTCTATATTTTGTTTTCAACCAC -ATACATGAATAGATGATGAGATGGTTCAACGCTCATCGTTCAAGATAAAGGAGCAGGTCA -GACTTTTCTATAATAGCGGCTTGATATGTAAGAAATCCCTGTATGCCGTTTTCGAAACAT -GGCCGTTTGTAGCAAGACAagaatgaaacagcgaaagagaaagagaaagagGGGAATGAG -AAATGCAGTGAATGCATCAAGGCAACACAGTGCTACACAGATCACTTTATAATACTACAT -GTTAGACAGCAAATACAGAAGGACTTGGCCACATGAATGCTTAAGATAAAATAAAGGGTC -CGGCGGCAATTGAGGAAATCCCCAACTGATGAGATGTCCATTTCTTAGCCAACCATTTTT -ACACCGACAACATTGACCCGTGCATATTTCACGAACAGCTCTTGTGGGTTCAACCCCCGA -GCCACTCGGGGCAGGTCCGAAAAGTGCACGACAGGGGGAAGGAAGAACCGACCAGACCAG -AGTCTAACAAAGGGAAAAAAAATGAATGCACAGACCACAAATTTACCGATCTATTTCCCT -TTTTTTTGGCTGCAGTGTATTACCCGGAATCGAGAGTGTGGGGGTTTCGTCAACACCGAT -AGAAGCCATCGGAGCGCTGGGAACCCGGAGTATGCAGACAGAGTACACACCGGTAGAGCT -AAGGTTAGATCCAGGGGGAAGGGAGATATCACGAGAGCAGAAAAGTGGAAACCTCGGAAT -CTCCCCATTGAGTAAGGAGTTGGGAATGGAAGATTCTGATGGTCACAAGGTGTTGGCTTA -TCAATAGTGCAGGGTTAGAGTCAGATAAGCTATATAGCATCGTGAGTATTGGCACGACAT -GAGTCTGGACTCTGAAATGGAATCGAGTCCTAATTTTAGTTCATGTACTCCGTACTTCTC -CAGAGAATCCGGAGTACCCCGATCTCCGAGCTTAGCTGCTTTGCATCACATTGTATAATA -CTCTTTACTACTCCGTAATCATATCCTAATATAGGATCTAAGATCTTAGGTTTAGATTGT -TTTCAAGAAAAATTTTGAGATCCAAATATAGGAATTGACCAATGAATTTCTCCCGCCACC -TTGGAGCCGGGGCGATCAGGAACGAGACAGGAACAGATATTACACGCCATCTGTACTAAT -GTGATTGGTACAGATTCAGTCGATCTGCAAACAGCCACCTGACCCTAGCACATTTTAAGG -CTTGGAAGATTTCTAGAAGACCCCTAAGGTCGGCAATATGTAGTTTTCTGGTCACCTGGA -TGAATTTTTAAGCAATAAAGAAAAATGTTGAGGTCAAAAATCAGAGTGGCGTTGGGAGTT -CCCCGGGTTCTCAGTTCCCAGACGGCCCCAGACGGGTTTCCCGAAAGAGAAATCCGGGCC -CCGATCCTTAGGTTCCGAACTGCCAGAACATCGTGGAGTTCAAGTCTGGTGACTTCTTAC -TTCGGTGATATCCCTAAATTCATTCTTTTTCCTTGCTAGAAAAAATAAAAAAAATTAAGA -GGCAAATTTAAACAAGGATTCTGACAAGAAAATACATGTGTTTGAAATTCCTTGATCAAT -TGATACATCCTAGGACTATACTATGTAGATTATGATTGTATGCGCTAATCGAGTCTGTTT -TGTAGGGTTGCGTATGCCTCGCCACAGCCGTTTAACCCACATTCCAGGAGGTCTGGACTC -CATTCGCTCTGCAGGACCCTTGGCCCATGTCTGGCTTTCCTTTTTGGGCTGTCCTCTAGT -TTCTTTCTGCTCACGAGACACCTCTGGGGCGCTTTGGTCTTGGTCCGGCTTGAGTTTGTC -GGTCAAAAAGATATTTAAAGGAAGTTGATTCTCTGGCTCATGGTACTCCATCACCTGAAT -CATCTTCATTCTTCTTCGTCTGTACACAAAGCAATCAGTCTAATCACCTTTTGACGTCTT -TAATTATATCCCTTCGATCCTCACTACTCTCATCTAATACCTACTCCATATCATAATGTC -TGTCTCCATTGAAACCATCCAAGCTCCCACTGCTCCTATGACCGCCCAGCCCGTCGTGAT -GGGCCAGAAGTCCAGAATGCCTGAGTTCAGTCTCGCCGGAAAGGTGGTTCTGGTTTCAGG -TGCTGCCCGTGGTCTTGGATTGGTCCAGGCAGAGGCACTCCTGGAAGCCGGAGCGAAAGT -ATACGCCCTTGACCGTCTGGAAGAGCCGGTAAGTTTCTACATCCCTTGGGCCTGGTGTAT -AGGATCACCAGCTCGTTTTGTCTCTATGCACTCAACTAACATCACTCGATTACCAGGCCC -CCGAGTTCGAGCAGATCCAGCAGCGCGCAAAGGAGCTTGGCACCGAGTTGCACTACCGTC -GTATTGACGTGCGTGACACAGAACTTCTCAATAGTGTCATCGAGACCATCGCCAATGAGG -AGGGCCGTATGGATGGCCTCGTTGCTGCTGCAGGTATCCAGCAGGAGACCCCAGCGCTCG -AATACACGGCCAAAGACAGCAACACTATGTTCGAGGTTAACGTCACGGGAGTGTTCATGA -CCTCCCAGGCTGTTGCTAAGCAAATGATTCGCTTTGGAAATGGAGGTAGCATCGCTATGA -TCGCCTCTATGAGCGGCACAGTCGCCAATCGAGTACGTTCCCACTCCCCTCACCTCGTTT -TACCCTTTGTCCTTTCCACCCCATTTGGCATTTTAAGTATCGGTATAGGCTTAGTGTCTC -CCTGCCCAAACTTGATATCTTTTCATACATTATTCCCCCACTTTATGTCACATTCCGTTC -TTCTATTTGGTCAACGATCTAACCTATCTTTCCTATAGGGTCTTATTTGCCCCGCATATA -ATGCTAGCAAGGCTGCTGTCCTCCAGCTTGCCCGCAACCTCGCTTCCGAATGGGGCACCT -ACAACATCCGCGTCAATACCATCTCCCCTGGCTATATTGTCACCGCTATGGTTGAGAAGC -TGTTTGTTGAATTCCCCGAACGCCGCGACCAGTGGCCCAAAGAGAACATGCTTGGCCGTC -TCTCCCGGCCGGAAGAGTACCGTGGCGCCGCTGTCTTCTTGCTCAGCGATGCTAGCACTT -TCATGACTGGAAGTGACCTGCGCATGGATGGTGGCCACGCCGCATGGTAGGTTGATTATG -GCCTTTGTTCGATTTTGCCTTTTCAATCACTGCGCATGACCATCTTCTTTGGTCTCTCAT -TGCATGGCGTTCTGCGTCTTTTTCTTTTTGGGTTATGGGCCGGAAATAGCGTATGGGCTT -TGTTTTTTGCGATATTGTTATTGATTGATAGCGATGATTGGCTTTGCGGATTTATGTTCG -CGGGCTCCAAAGCTGGATGCTACGATTTTAGACTACAAATCGCTCAAATAATAATTGTTC -AACTGTTAAAACTATTTCTGTTGACTTGTTACATCTCCGTAGAGTTTAGCTGCATCTGAA -CTCTGACCCACTTCGAGTTGAAGATCCACACTCTCATCTCATTGTTGTACAATGACATCC -ACTCGTCATACGTAAGGATGAATTACCCAACCGGCACCAAGAAGCCGAATCTCGAGAACT -TCCCACCAAAAAAAATAACCGCGTGCAGCCATCCAGTCGGAGCATACCACATTTCCAAAC -CGATGGTATTCACCACCTGACCTCTCAACTAGGCTGATCAACAAACTTAAGGTTCGATCC -AAAGATCAAATTCTGGGGATACACACGTTGAAAAAGGTTGTTGCTGCTAGATCTAGTAAC -CCGAAGAGAGTACCGCGTGATAACTACGTGGCCGTTGTAATCGTCATCAGCGTAATCAAA -AATATCCAGATGTCATTATGTTTTCAGCCGCCATGGAAACCAATCCGCGACTGACATCAA -GTCTGGGGAAGATGCGGAGAGCTGGGACCAATCAGAGTTGAGAGAGCCGGGATGTTGACT -GTTTGGAATCCAATAGGAATAGAGTTGGTAGCCAATCATGTTGCAGTTAGTCATGAGCGG -GAATGTTAGGCTTCTATGAGGGTAAATACCTACCTAGTCATGCTCACTCTTCGGTATTTG -ATATCTTAAGTTGTAGTGCAACTAGAATTATGCGCGGGTATAATCGCTTACTGCTTGGAT -GCCTTAATTACCTGTTGGGTACCCAAACACCCCGGACTCATGCCCGTGTTTACAACAAGC -AGTCACCTGCCATGCATCGGACTTCTAGGAGCATGGATGGAATACGGATAGGTCCTGGGA -GTCGACTCTCTAATTTGTCTAATGGCGACACCTTAGCCCCAAAAGGCAAGTATACGCAAC -GCTGATTGCTTGGTTAAGTACCGTACGAGCGGATGAAAGGGGTTTGTTGGTAAATCTTGT -GTAGACTGCTACTGTCGAATCCGTCCCAAGTCTACACAGTATAGGCCAAGGCAGGTCTGT -GACATGAGACAACTGCCATCTTGGCTTGGCATTCTACTTGTTCTTGGGGGTTTTGTGCTA -AAACATAATATATCTCATTGGCTTGTTAGTATCACATGCGTAGTCCCGTTGCTACCTTCA -GGGTTCTCTTCAATTTCTGTGCACTGGTATCTCAGTACCTGCTACAATCCGTGCACTTTT -CGAGGAGCCTCGCTCGAAAGGTCACGGAGCAAGCGACGCACGGTGATTCCGATCTCTAAA -ATATACTAGTAAAATTCAAAACACGTGAGGTATAGTAAGTACATAGAATAGAGTACCCTC -AAACCCCTGCCCCCGCCCTTAATTTTAGCATTATTGACCTTCCAAGGTTGCTACCGCAGG -AGTAGACCCTAGATAATATAATTTCTCTAGCGTAGGTAGGTAGTGTTTGATACACCAAAT -ACCCACCCTAGACCTACATCTTGCAACCCTACCTAGATATGACCCTCGCTTTCTTCTCTG -AGGGTTTCAAATTTCCACGAGGTAGTTTTTCAAGCAGTCACTCGTGAACATCAACGGTAC -ATTAAATGAGATTTGAACGGGTTAATTAAAGATAAATAGGAATATTCTAACTCAAGGTCA -GCCAAACATGTTGTGGTATCTTAGGACATTCCAACGACACACTGGAAGCGATTATCAAGC -TTTTTACAAGCTTTGCACTAAGACGTCTGCCGAGGTCCATGTGTAAGCTTGCTGCTGGAT -TACCCTTCCTTTATCTCAACTTTACCTAACATCAATCGAAATTCAATTGAAGCCCGCTTC -TTCCTTCCCAATTGCTCGAAAGAGGGGCTGTATGATTGATCAATCCTGATATCTCATATC -CAGTTGTATGGGGCTCAGTTCGACGGCGGGAGCAGATTGTATAGAATCGAACGCTCATGC -CATTCAATTCGTTTCATTCCTCCTTCTCTTCGGTGAGACAAGGTCCCAAAAAAGGAACCA -GATATGTTATGGCCTCGCGAAGTTACATAGCACAGCATGTTACATAGTGAGGCACATATT -GCATCCATCGTGGTCTTGAATTGCTGCTTTGGTCTACTAATTCGCCTGATGCTAAGCCAG -CCTTGTCTTGCCATGATGGTATCATAGTTGAAATTAAGAACTTGCATGACTGCTATTAGA -GCTATTTCTGTTTTATGGGTAACTGAAGTGAGAGACTCATCAGGTTATTGCCCCCTTGTC -ACTAGCACTCACTTCGCCCAAGATGTGAATTTCTGTCTCGTTTGTTTACTCCTCTCCCCT -CCGTTTTTTCTTTTTTCTTTTTGCCCCCTTTCACTCGCGTCCCTCCCATCGTCATTTCTT -CCATATCTTACACTAGTCTCTATCTTATCCAAGATGGCGGCTGAGGCACCAGCCCTGGGC -TCGGACAAGCTCAGTATCCATGATGTCGAGCCCGTCAAGCCTGATGGGCGAAGTGAGCCA -GTTTCCATTGAGAAAACGCCCCAGAAAAGTATCGTATCCGAGTCATACAACCTGGGTGCT -GGTGATGAAGACCCGACCCCAGACGAACTTCATGGCCCCAACGCCTTGCGTCGTGTCTCG -GCTCCAATTCCATGGGCTGTTTACACCGTTGCCTTTGTTGAACTGTGCGAGCGTTTCTCC -TACTATGGAAGCCAGGTCCTCTGTAAGACAATCACGCCCCACATGGAACCACATGAGAAA -CGATACTAACACTGCAACATAGACTCTAACTTCGTGAACCATGCACTTCCTCTTCCTGCC -CCCAATGGCCCTCTCGGATCAAACCATGCAACTGGCGCTGGTGGACCTAGCTCCCAGGGT -GTATCGGGCGCTTTGGGCCAAGGTGTTGAGACTGCGAGCGCCATCAACACCTTCAACACC -TTCTGGTGCTACTGCCTTCCCCTAGTGAGTGCTTACATCGCCGACGAGTACTGGGGACGG -TACAAGACCATCTCCTGGTCTATTGGAGCTGCCATTCTAGGTCACATCATTCTAGTTATC -TCTGCCATCCCACCAGTAATCACAAACACTGATGCTTCCTTCGGTGTGTTTGTGCTGGGT -GTGATAATCATGGGTTTGGGTACTGGTGGATTCAAGCCCAACATATCCGCCCTGGTTGTC -GAGCAGATACCTCTTGTCAATCTCAAGGTACGTACGCTCAACTCTGGTGAGCGTGTTGTC -ATCGACCCTACTATCACCCAGAGCCGTATCTACCACTACTTCTACGTGTTTATCAACCTT -GGCGCATTGATAGGTCAGATTGGCATGGCATATGCGGAGAAATACATCGGCTTCTGGTTG -GCCTTCCTGCTGCCCACGCTTATGTTCTTGACTACCCCATTCGTCATGTGGTGGGGTCGT -CACCGCTATCATCAATCTACACCGGCAGGTTCGGTAACTTACAAGGCCATCAAGACATTC -TTCTTTTGTATGCACGGTCGCTGGCACTATAACCCATACGTTCTCTGGAAGCAAACTCAT -GATGGAACTCTCTGGGACACCGCAAAGCCCTCTAAAATCGAACCCCAGTCCCGACCCAAG -TGGATGACCTTCGACGACGCGTGGGTGGACGAAGTGCGCCGCGGATTTGCCGCTTGTGCC -GTTTTTTGCTGGTTTCCTATCTACTGGCTAGCCTACAATCAATTGACAAGCAATCTGACC -GCGCAGGCCGGTACGATGACCTTGAATGGCGTCCCGAACGACGTTGTGAACAATCTTGAT -CCCTTGGCGCTACTCATCTTCGTCCCGCTCTGCGACACGTTCCTGTATTCCGGATTGCGC -AAAATGGGAATTCGATTCACGGCCATCAAGAAGATCGCTCTTGGCTTCTGGCTGGCTTCA -CTCTCCATGATTTGGGCTGCTGTCATCCAGTACTACATCTACGAGACTTCGCCCTGCGGT -TACCGAGCTAACAACTGCTACAACGACGACGGCAGCGTCAACCCGACTCCTATAATTGTC -TGGGCTCAGGCTGGCTCGTACGTGCTCGTCGCTTTTTCGGAGATCTTTGCCGTGATCACC -TCGATGGAGTATGCCTATTCCAAGGCGCCTCGCAATATGCGTTCTCTGATCCAAGCTATC -AACATGTTCACCAATGCCATCTCCGCTGCCATCGCTGAGGCACTGATACCTCTTTCCAAG -GACCCATTGTTGATCTGGAACTACGCCGTCTTCGCCATTTTGACGTTTGTCGGAGGAACG -ATTTTCATTATCCAGTTCTGGGGTCTGGATAAGGAGGAGGACGAGTTGAACAACTTGCCC -GAGGGTCATGTGCAGGCAAACGAGAACCGAGATAGTCAGACACAGGCACAGTTTGAAATT -GCCGAGCTTGGCCCCGGTCGTGGTTGAGATGGTGGGAAATCAGCTTACTTTGCTTTAGCT -TGGTACTGAGTGTCCAGAGTGGGGTGAAAGCTTTAGCCCAACCCTTTTCTCGAATATTTC -AATTCAACTGATAACTTCACAGGTTCTCTTGTCATCTGGTAGTAAGCGATATTGAACTCC -CGTCGTAATGCCTCGATTTGTGACCATCCATATAGCCTTACACGCCACAAAGGCAAAGTA -TAACACTATACCTGAAACTGTAGAATATAATTGGATTGTACTGCATAATCTTGAGCTCAA -CCAATTACAATCTCTCATGAATTTTTATGCCATATCCATCTCGGACCTTAAGTTGTCCGC -AGAACCGTTTGGAGAAATTTGAAATCCACTTTCAGGCAATGATGAACCTCCAATAATGAC -GACTGAAATGCGGATGCCTACAGAACTGATATTTATTCTCTCGGTGTAAGCACAATGAAT -CACATGATGACCTTAGGATCAGCCCTTAGCCCCTTTGGGTAGCCTTACCTACTGTATCTA -GGAGCTGAGGTTTGACCTGCGCCTCCAAATAAGAGAAGAATGTGATTTTTTTTCTATTTT -TTCTATATTTAATATTTTGCTGTGTAGTGTAAGACGGAGCCCCCGGCATCATTAACTTCC -TTCCGCTCCACTCGGCAGCCATGATTCGCGATCCCTACGAACATACTCACCGTACCAAGA -GAACTACTCTGCGACATTGGGGGGCTTGGCTATCCATCCGGCGGGCCCGGCGCATAAGTA -TTCCCCCTTGGTGCATAAGAATCTTCCCCTAGGACATGGGAATTCATTCCTTTCTCTTCT -GCACTTCACAGTCTGCCATCGTGATTTAACCGTCATGAAGACTTTTGCGCTGATCTCAAT -TGCCCTGGGCTTGCCCGGTAAGTGCTTCTCCTCGCGTTGGGTTTTTAACGGGGACACCAT -TCTTGGCGTAATTGGACCTTTACACCCGCAAAATAATAGTGTTCCACACTAATCACCATA -TTCCCGCAGGCGCGATCGCTGCCCCTGGCGGGTTCAATCCCGATGATATCACATTCGCAG -GTGTGCAACGAGCACTCCCCAATGCGCCCGACGGATATGTGCCGACATCCGTCAGCTGCC -CAGCGGACCGACCGACAGTTCGCAGCGCAGAGAAGCTGTCCTCAAACGAGACATCATGGC -TGAAAGTTCGCCGCGGAAAGACCCTGTCGGCGATGAAAGATTTCTTCGGACATGTGAAAG -TCGGTGATTATGACGTCGCCGGCTACCTCGATAAGCACTCCAGCAACTCCTCGAGTCTTC -CCAATATCGGCATTGCCGTTTCGGGTGGTGGATGGCGTGCGTTGATGAACGGTGCCGGCG -CCGTCAAGGCTTTCGACAGCCGGACGTACAATGCGTCGACGACTGGGCATTTGGGTGGTC -TGTTGCAGTCGGCTACCTATATCTCGGGTCTGAGTGGAGGCAGCTGGTTGCTGGGTTCAA -TCTATATCAATAACTTCACGACGATCGACAAGCTGCAGACGCACAAGGATGGTTCCATCT -GGCAGTTTGGGAACTCGATCCTCGAGGGTCCTGATACTGGTGGTATCCAGCTCTTGGATT -CTGCCAGTTACTACAAGGAACTTGCTGACGCTGTCGACGCGAAGAAGAAGGCTGGCTTTG -ATACCTCTCTCACTGATCTCTGGTATGTTAGTCCTGATTATGGAAAGTATGTGCATGTTT -GCTAACGTCGCTACAGGGGCCGCGCTCTCTCTTATCAAATGTTCAATGCTACCAATGGTG -GAGTCAGCTATACCTGGTCTTCTATTGCCGACACTCCCGCGTTCCAGGATGGAGACTACC -CCATGCCGTTTGTCGTTGCAGATGGCCGCAACCCCGGTGAGCTGGTTGTTGGTGGTAACG -CGACCGTCTACGAGTTCAACCCGTGGGAGTTCGGCACCTTTGACCCGACCATCTTTGGTT -TCATCCCCCTCAAGTACCTCGGCTCTAAGTTCGAGGCTGGCTTGCTTCCGAAAAACGAGA -GCTGTATCAGTGGTTTTGACAATGCTGGCTTTGTGATCGGAACCTCCTCCACTCTCTTCA -ACCAGTTCCTTCTCCAGCTCAACACCACCTCGCTCCCTGACTTTGTAAAGAATGTGTTCA -ACAGCATCTTGAAGGGGTTGGACAAAGATCAAAACGACATTGCTTCGTATGATCCAAACC -CCTTCTATAAATACAACCCGCACACGTCGCCGTATGCCGCGCAAGAAATGCTGGACGTCG -TGGATGGCGGCGAGGATGGCCAGAACGTCCCTTTGCATCCTCTTATCCAGCCCGAGCGCC -ATGTCGATGTCATCTTTGCTGTCGACTCCTCGGCTGACACTGACTATTTCTGGCCAAACG -GCACATCCCTGGTTGCAACCTACGAGCGCAGCCTGAACTCCAGCGGTATCGGCAACGGTA -CCTCCTTCCCTGCCGTTCCAGACCAGAATACCTTCATCAACCTCGGCCTCAACACTCGGC -CATCCTTCTTTGGTTGTGACAGTGCCAACCAAACTGGATCAACTCCTCTCGTCGTATACA -TCCCCAATGCCCCTTACTCCTACCATTCCAACATCTCGACCTTCCAACTAAGCACCGACG -ACACTCAGCGTGACAACATCATCCTCAATGGCTACGAGGTTGCCACTATGGCCAACAGCA -CCGTTGACGCCAATTGGACATCCTGTGTTGGCTGTGCTATCCTCAGCCGCTCGTTCGAGC -GCACGGGTACCACTCTCCCGGATATCTGTAACCAGTGTTTCGATCGGTACTGCTGGAACG -GCACTGTGAACTCCACCAGACCGACAAACTATGATCCAGCTTTTTACCTGGCTGAAAGTG -TGGCCTCGGTGAATTTGTCCACCATGCTGTCCACGGTTGTCGCAGCTGGTCTGGCAATGC -TCATCCTGGTATAACAGTATCTCATGAAACCTTCTTTTTCCCGCTGCTATGTTGCAGCTA -TTCACATTCGAAAGATGTATCTTTGGCATTTTGATACACTTCTCTTTCAAGCACGTACGA -TGTGCCATCGAGCCATCCTCGTGCCTTTTTCATTCTTTCGGTATCTATCTAGATTTCTTC -GGATTTCAGGTGTTCCAATTCAATAGTAATGCATATATTTTCTCGTTCAAGAAACATGTT -TGCGCTGAGTGCAGCGTTATATTTCCCTGCACTCAATGTTATTGAATGTTATTGCAAATA -AGTCGTCAGAAATTTACGTTCGTCAAGTAAACATGCCCTGTCCCTCTGCGACTTGAATTC -GTGCTGGACTTTGTATACATACAACACAAGAAGGGCAAAGTGCTAATACCTAGGTTGGAG -CACTATTCTACTCAGCATTTATCGCCAAAAAAAATAATATATAAAGAGGGTTGGGAACCG -TTGAAAGCTATACTGAACAGATAGCTGGTCAGAATATATATCTTGTGGTATACAACTTAT -GGTGCAAGTTTGAACTTTCCGATTATTTCGTCAATGGATAAATACAAGGAAAGACTTTTA -CCTTGCTTCTTGTCATGGCTCGTATCTGATCTGCACTTGAATGAATCAAATGTAACATCC -TTGTTGATGTGTTTGCCGAAAGAGGCGTCGATCATATCACGTGTCCGTGTTGCTATGGGA -AACGGCACAACAAGCAACCTCGGATCGGCCGCCACCTCCACAGTGGGTTGTTTGCTTTGC -CTTGCTTCCTTCTTCATGCACCTAATTCGACACTCTCGGTAACCTGCGACCTAACGACAT -CGGGCCGATATCACATTTGACCGACCAAATCCGCCATGAGCGTCGCATGCCAACACTAAC -AGCGACCACCAACGACGACCGGCAATATCAATATGGCACGTGCTGCGGTGATCCCACAAT -CCCCTGCCAAACGGGTACGAACTTCAACCAAGTCGACAGCTGCTGCCGATGCGAAGAGGA -AAGCTCCCCGAAAAGCAACACCAGCACCACGAACTGCACCTCCGGCTCTCGATGTTGAGT -CCGACGATGAGCTGGAATGTATCACAAGTAAACCCGTCAAACCCAGCGTACGTCCTTCTG -TCCGCCCGGCTACCAAACCAAGAGTGACCGCGACTGCGACTGTTCGAGGAAAGAAGGCTT -CGGACTCTCTCGCCGAAAGGTCAAAAGCTCAAAAGATCACCGGCGAAGATGGACTAGGAC -AGCAAACTGAGGCGCCTAAAAAGCGTGTCGGACGACCGAGAAAAAATCCGTTGCCCGCAG -AAGCTGCTGCGCCCAAGCCCGAGGCCGCACCGAAGACTAGAGGACGACCAAGAGCGGGAA -CAACAGCGAAGGCCCCTCAGATTCAAACGGCGGCTACGAGCAGGCGGACGCGAGCGGCGG -TTGATACAGCGGATGAGACGAAACCGAACCAAATTAGAATTGCGACCAACTCGACAACGA -TGCGATCAAATCTTCTGCGTGGGCCCGCCAAGAAGAAGACCGTGAAGTTCCAGGATGTCA -CTGATTCAGAAGCCGAGGAAACTGAGCCCGAGGTTCCTACCGCTGGCCGTCGACGAGTAG -CGACCAAGGCCACGAGCAGTGCCAAGGTCGGCCTTGGAGCATCCGCAGTTCGTAAGCCTG -TGACCACTGGTACTCGGGGAAGGAAGGCAGCTGCAGCGAAGAAGGATGCGACTCAACCAT -TATCTCCCAAAAAGGCCAAGCAGGTAGCCAAGTCTCTATCAGCATACGCAAGCTCCGATG -GAGAAGACGATGAATTGAACACCATCAAGGATGACAACACCGATCCGCTCAGACTCGTTG -TTCACTCGCCTGTCAAGCATGGCTTGGAGAATACAGGACTGAGCTCTCCTGTGCGGAGGA -TCAACTTTACTCCCAAGAAGGCATCGAGCTTCATTGACGAAAATGGCGAACCTAAACTAC -CAACACCAAAACACGGATCAGACAGCATCGGGCTTAGCTCACCAGTCAGGAAAATCAACT -TTGCCCCGAACCGCAGCCAAAACACAGTCGCAGATAATGGCCACCTCGCTCTTCCACCTG -GAAATTCAATTGAGTTCAGTGATTCTATGTTCATGTCTAGCCCTGCGAGACGACCGCCGC -CGTCATCTCCCTTCCTATTCTCCATCAGAGAGACCCCGAATCGTGGGGGTTCCCTCTTCC -GGGATAGCGTGAACCCGATTCCCGCACCTGATTTCACACCCGGACGCACTTCTCCACTAA -AGATGTCACCCAAGAAGGGATTTTTGGGTGCTTCCTTTTCGCAGTCGCCATTCAAGGCCT -CTACGTCGGTAGCCCCTGCTCGAACTCCTCTGTTTCAAAGCCCCGCAAAGAGAATCGCCT -CTCCATTCAAAAACTCGATATTCTCTGCGCATGCATCTGTGGGCACCTGTCAACCCACGG -ACAATGATGGAACCCCCTCCAAGGTGGCTGAACTCGCACAATCTACAACCCCCAAGTCGT -CGCCAAGGGAAAGTCAATCCGACGCAGGTTTTGAACGGGACTCTGAAATGGTTGAGGATG -TGGCTCGAGATATTTTCGGCATTGAGCTATCTTCTGATGGAAAGACATCATCGACTTCTC -CTATTCAGAAAGATATTACTGCCTCGGAGGTGGCTAAAGACTCATCTGATGATCGAAATA -TCGACTCTTTCACTGCGGAGATGGAGGCCGAAGGTGATTCAGACTTGAAAATAGATGATA -GTGAACATCAACAGCATGATGTCCAGCCCGAGTACGACGAGCCTGAAACGATTTGCTTTG -ACGTCATGGAAGAGGCTAATATGGCAACGGAGGACTGTTATGATCAAGAGCCCCAGGAAG -ACTCAGGCGCAGGAGTTGAAGAGCAATTTGAGGAGCAGGCTCAATTGGACCGCGAAGAAG -CCGACACTATTTGTTTCGATGCCATGGAAGAGGCCAATATGGCAGCGCATGATCTCAATG -ACCAACAAGTCCAGGAAGCTTCAGACCTAGAAGAAGAGGAGAAAGAGGAGAGCGACTTTG -AGTACGAAGAGCTCGACACAATTTGTTTTGATGCTATGGAGGACGCCCAAATGGAAGCGC -CTGATCTTCATAATCAACAAGTCCAGGAAGCTTCAGACCCAGAAGACGTGGAAGATCTTC -ACGACAACAATGAATCTGAGTCTGAGGAGCTTGAAACAATTTGCTTTGATACTATGGAAG -ATGCACACCTCGCGGAGAATGACGTGTATGATGGAGAATGTCAGGTGACAGACCCAGTCA -TGGAGGACACGAATGATCCCGAAGAAGAGCTTGCAGTACTAGAGAGCGAGATCGAAGAGG -AAGAGCCTCAAAATATTGATACTGGTTTCCAGGCTCAAGAATTGACACCGGAACGTCTGC -AAACTACGGAGATGCAAGAAATAACCATGACAGAGCAGCCGAATGAACTTTCTCTGCTCT -CCAGATCACCATCGAATTCAATTTATGGTGAAGAAGCCCAGCCAGCTATTGAGATGGAAC -ATGTCTTGGCAAGTGAAGGTGAATGGCAAGAAGAGGAAGAAGAAGCCATTCATTACCCCg -aggaagaggaagatgtgatgggtgaggagagtgaggtggaagaCACCGAGTCTACTCTGG -CTGCGTCTGAGTTTGTTACAATCTCGCCAAAGCCTGCTTTGCCACATGGGAGCGACTATC -TTGAGGACAATGCAACGCCGCCTCCAGGAACTCACGCGATTACCCCACTACAGCCAACTC -ACACAGGCGGTGAATCCACTCCCTTGTTCTTGGATCGCCCAGACGACTCAGAGAATGACC -ATGACTCCACCCTCGATGCCAAGAATGCCAATAATGTGATCATTGGCACGCCCCTGCATG -AAGAAAATAGCTTGGCGCCAAACACTCAGGTTGATAGCCCTGTATTAATGGCACCAAGTC -TCTTCAACACACCAAGTGTTGTTGACCAGCACCAGTCCCCTGCCGAGGCTAGCTTGGGTT -TTACGCCATTGGCACAAAAGTTTGGCCGCTGGGAACAAGATACGCCTTCTCAAGCTAGAT -CACTCCGACCGCGTCGACGTGGTGTTTTCTCGTTAGTCGGCCCGTTGGACCGTACCAACA -CCGAAACTCCTACGAATTCTGATACTGTGTCGTATCCTAATTTGCCAAAAAGTGCTTTGG -CACAGACCCCCTCTTTGTTTGCCGAATTGCCGCTTCAGCCCCAAAGCGATGCCACCTGCG -TGAGCCCTGAGTATGAGCGGACTCCACGGCCATCTTCCATCTCTGAAGATCACCAAATAA -TTGACTCGCCAAGGATAACCACAGATATTTTCGAGGATCTTGATTTGGAAATCATTGAAG -CAGAGCATACTTCTGCCGCAGAAGCTCCGCAACATCAGGGCCAAGAGTTCGATCTATTCG -ATGACAAAGAGAATTGTGGTCCGGTCTTGCCGTCAACTCCAATGAAGGCTCCAATCCGAC -TCGATGAGTTACGCACCATTCATACAGTGTCAAAGGTGCCTTTGAAAGCCGAAGGCGACG -TGTCACCTCTTAAGCTGTCTCGCAAGAGAGGTCTATCCCTCTCAAGCACATCGCCCACTC -GGTCTTCTCCGCGTATTCGCAAGCCAGCATTTATGGCACTCAATGACACTGCGCCAATTA -TGTCTCCCTCCCGGGAAAACCCCCCGGTGAGCAGAAGTCCAACACCAAAACGCCGCTCGA -TAAGTGGTAGACGCAGCAGCGTAAAAGCACCAGTGATGGCTGCGCCTAGAACTCCTGCCA -CAACCGCCAGTCCTTCGAAGAAGCCGCGTTGTAGCATCAGCACTGAACAGAAGACTCTGC -ATGGAGCAGTCGTGCATGTGGATGTTCATACCACCGAGGGGGAGGATGCCTCTGGTATCT -TTGTGGAACTTTTGCAGCAAATGGGCGCCCGATGTGTGAAGTCCTGGTCCTGGAACCCTC -GTTCCAGTCTCTCGCCAGTTGATGATGTAGAGCCCAAAGACTTCAGAGTTGGCATAACCC -ATGTTGTGTACAAAGATGGTGGTCTGCGGACTATGGAGAAAGTCAAGAAGGCGGCGGGTC -TTGTCAAGTGTGTTGGGGTCGGTTGGGTTCTCGAGTAAGTTTATGTTTTGTGTACCTTTC -TTGACATGGCTTTTCTGACTGTTATTTCTAGTTGCGAACGGGAAAACCAATGGCTCGATG -AAACACCATACGCGGTCGACAGCTCTATCATTCCACGTGGAGGCGCCAAGCGCCGTAAGA -GTATGGAGCCTCGTGCACTCAGTAATGTGAATGGTACACTCGTGCGCATTTCTGAATCTC -CCGCGCCGTCTCCCGGCGGTCGTCGCAGTGGCATCAATCCCGGTGCGGTTGAGGGTTTCC -GGAAGATCACACCCCCAACCCACCAGCAGAAGATGCCCTCCACACCACCTCAACAATCTT -CTGCTGATAGCTATCAATTTCCTGCCACTCCAGGATATAACTTTTCCAAGCTCGATGCCA -TTGGCATGTCACCCGCTACACCAGGATTCCTTGGGAATCGCTCCAAGCTTGTCCAGCAAT -CTTGCCCACCAAAGCAGAGCAACCGAGGTTTGTTCCCGAGTGCTAAGTCGTCCAGTACCC -TGCTAGACGATGGCCAAGATGAGGAATCTCGGAAGCAGCGCCGCTTCCGTATGGAAGCTG -CCAGGCGCAAAAGTCTTATTTACAAGCCTGCTGTCGGTAGCCCTCTTGTACCTTGAGTTT -AGTCATTGCGTTGCGCATTTTTGTGAATGCATACCCAGTCACTGTAAATATGGGGACTCT -TTTTGTTTTGGCGTTTTTCTATTTTCGCCCCACGACGGCGAGGTGGATAGGTTGGACTGT -TTTTGTGGGAGGCTAATGCTCTTTACTTTTTGTTCGAATCTCAGTTGAATCAATTCAATT -CCAGATGATTTAAAATACTCATGTCCGTGATAAAGTCGCAACACATGATTTTTAATTTAG -AAACTCGATGATAATACCCGGAATATTTTAGCTGACCCGCTTCAACTTTCACATACGTCG -TGTCCGTTCGACCACGACTATGTCACTCCTGAGACTCTTCACGGTAGAATCCAGCAACCA -AGAAAGCGAGTTCCAAAGCCTGCTTTTCATTCAAACGAGGATCACAGAATGTTGTATAAC -GCTCCCCAAGCCCATCCGCTGTCAGTCCAGCGGCACCACCCACACACTCAGTGACAGCCT -CCCCAGTAAGTTCAAGATGCATACCACCAAGGAATGAGCCCGCAGTGCGATGGATTTCCA -ACGCCTGCTTCAACTCAGACAAGATATCCGTAAAGTGGCGGGTCTTCACACCAGATGATG -TGCTCTGCGTATTACCGTGCATGGGATCGCATTGCCAGACGGGAATATGACCAGAGGCCT -GCACAGCGGCAATGTGACCAGGGAGGAAATCTGCGATTTTGGAAGCGCCGTAACGGGAGA -TCAGAGTGACCTTACCGATCTCGCGATCCGGGTTAACAACATCCAGCAGCTGCACCAGCT -CCTCCGCGGCCATGCTAGGTCCGATCTTGATTCCGACGGGGTTAGCGATGCCGCGGAAGA -ACTCGACATGCGCACCGCCGAGTTGACGTGTCCGGTCGCCGATCCAGATGAAGTGGCCGG -ATGTTGCGTAGTAGCCTGTTTCGTGGGTGGTGCTGTTTCCTGGGTACGCTGGATGGCGAA -GGAGTCGGGTTAGACTCTGCTCGTATTCCAGGGAGAGGCCTTCGTGACTGGTGAAGATAT -CGGCTGTCTCGAAACCGCGGTCTTTGTCGATTCCTACGGTGTGCATGAAGCGTAGGGCAT -CCTTTACCGCGCCTACGGTTTTGGCGTATTGCTCTTTGATCGAGGGCGTGATTACATGGC -CCAGACCCCAGTCTAGTGGGGAGTGCAGGTCTGCCAGGCCTGAGGAGAGAGAGGCACGGA -GATAATTCAGAGTTGCCGCGGAGTGGAAGTAGGCGGAGACTAGACGTGATGGGTCTGGTT -GGCGGGATTTGATGTCTGCTGGGAAGCCATTAATGTTGTCACCGCGGAAAGAGGGCATTT -CCACTCCATCGACGACTTCTGTTGGACTTGAGCGTGGCCTAGTTTTTTTTTTGTCAATTT -TATACTTATGGATACATGGGCGAGGGACCTACTTTGCAAACTGGCCTGCAATGCGAGCAA -TACGGATGACGGGCTTGTTGGCACCTATCGGTTGGGTTAGTTGTCATTCTCTTTGCTTTT -TGTGGGGGTCTGACACACCCCAGATGAGAACCAAACTCATTTGCAACAAGAGCTTCACCT -TAGCTTCGATCATTTCTTGGCTGCAGTAGTCGAAAAGCTCCGCACAGTCACCTGAAGGAC -ATGGTTAGCTATGTTTCGTGGATATTCTCTTGTGCAACGTACCACCCTGGAGGACGAATG -CCTTGCCCAAAGCGACATTTCGGAGATTTTTCTTGAGATTATTGATCTGTGAAACAAGTC -AATATGGCAGCTAAGACTCAGTCTAGGGATAAGACATACTTCCTGTGTGGTCACAAGTGG -AGGAAGCTTCTGTAATTTCTGCAGCGAGTCTTTTAACCCTTGAAGATCTTCGTAGATGAC -ATCTTGCTTGATGGGCTTTTGTGTCCAAGCTAGATATAGTTAGTGATGCCGGGCCAGAGA -AATTGTACAGAATTTTGCTCTTACAACTAGGGCTCCATTCGGCCATGGTGCGGGCAATTG -ATCCCGTTGGTGGGGGATGATATGAGTCGCGTCTCTTCCGTTTGACCAGCTGTAACCAAC -AAATGCAATTGATACCAGATTAGAATAAGTAGTGGAATAGTGCGAATTGGCAACAAAGGA -CTGAGCGAAAAGATATCAAAGGCACACGTAACAAAAGAAAAACATTGTGATGTGATACGC -CGCTTATCCCCGGTACTCGGATGACTCATTCTGATGCCTTACCCCTCAGGCCGAATATTC -ATCCCACATTCCTCAGTGTGTGACCTCGGTGTGAATACAGGTCAGGAGGTTGCTGAAAGT -TCCATGCAGCGGACTTCAAAATCAAGCAATTTTATTTGACCGGAAACAAGGGCAAAGCCA -TTGAGTATGCACCACTAGGTCTGATTGAGCCCTCAGCAACCTCCTAACCTGACCTTCATT -CCTCAGAGCCCAAGCAAAAAAAAAGGTTTAATGTTATTGAGTAACATCCTTGACATGTAA -AATATTAGATACAAGACCTGTCACGGCCTAGATGATGAAATCTGTTTTATCTAGGGACCT -GAATGGGTTTGAAAGGCCGTGGATTTGGGAAACGGGCTCTGTGATGATGTGAATGAGTGC -CTGTCCTCGAATATGGCCTGTAGGATGCATTGAGTGATGAAGGTTGCCTCATTGCAAATA -GCAGCGAAGTTCTCACAATCGAGTGGATAATCATGCGGGATAGGACTGGCAATCACGAAC -AGCGCCTCGTAGGTGGCATCATCTGAAAGTCTCAAGCTTTCTTTTTTAGTTTCTCAAAGA -TCCCTTTATTGCCTGACTTCCGAATCTTACAATCGGTTTGCTGGACTATGGGAGCCCCCT -TTTTTTTGCAGCACTGGGTACATACATCCTGCACGGTGGCACTGTATGTTACCTATATAG -CATGTCAGATGATTAATCTTGAATAAGACGAAATATTCGTAGAGCCGTGCTTCCTATGAC -CGCTGTAACGGGACTATTATAGCATAGGCTGAGCGCAAAAGTGAGCAGATTATTCATAAA -ATATACCTGGGTTCTAGACGGAATGAGTGGGATTGATTACATCGCAGAGGAACATCTTAG -GCAAAATATCCTCACAGTTTGGAGTCATTGCAATATTATTATTGAGTATGGTGTTTCACG -ATACGAGGTTCATTGGAATTATTTATATTAGTGTTACTCAATAGATAGTTGTCTGCCTAA -CCCTAATAACCAagtatggggtacagagtacggagtactccgtgcagagtagcagTAGGA -ATATGTATCATAGTCCGGTCAGAAAATTCACGAAACAATTCAAAACGACCCTTGTGGTAC -ATGATTGGCCATAAGATCGGCCTATAAGATCGGCAGAAGTAAGACCCTGGAAACTGATAG -ATGTGTTCACATGGGATAATGTTTGGAGGCAACGTAGATTCTTATATCAGTATAGTTTCT -GTAGTATAAAACACAGATTTGAAGGTAGAGTTCTCCCTTGGCATAGAAAGCCCTTCACTA -CCTTCAACTAAATTTAGAAACGGATCACGGAAACAAAAAAAGGGGCAAAATGGAAATTGG -AGACGGAAAGGCGAAATTAGAAGTCATAGCGTCTGATTAGTCTCATATAATACAAGGCAA -CTCGGTCGAGAGCGGTCTAGCGGAGAGGAAATTTGGTCTAATCGGGCCGAGCTGAAGGGG -TATACGAAGTGTGATTGGTCAGATCTCGGAATATCGATTTAGCTTTTGCTTGTTTGGCAC -TAGTGTGACGCTATAACAAGTAAAACGTGGAACCTTCAAGGCAGAAGAGGCTGAATTCCC -GGATTTCCTCCCCAAGCGTTGTATTTAAGCCTTCTGCCATTGTTTTTCTTTCTTCTCTTT -ATACTCCCACACTCGTTTAATCATGCAGTACAAGATCCTTGCCACTCTTTTCTTCGCCGC -CACGGCCCTGGCCGCCCCTGCGGATTCTACTGCCACATCCTCAAGCACCGAGACTACCGA -CAGCGATGACACTCTCGATCTCGAGGACGTTCCCAGCTCTATCATGAGCGTTCTAGCAAC -AGCTATCCCGGAATCGTGGTACAATGACCTCATGAACCCGGCCTCCCTCTCTGCGATCAA -CAGCGCCGCTTCCGCTGGCACCTACCCAGCTTGGTACAACGCATTGCCTAGCGACGTTAA -GTCCTGGGCTACTGAGAACTTCGGTGCCGACATCGTTGGTGGTGTCTCGGCAACCGCTGA -TAGCAGCTCCACTCCGACTAAGTCTGCTGGTAGCAGCGCCATCGAGACTGGCTCTAGCGT -CACTGCCAGCCAGATCTCTTCCAACGCCGCCCAGACCACTTCGGCTTCCTCTACCTCAGG -CTCCAGCTCTGATTCGAGCTCCAACTCTGAGTCCACCTCCAGCTCTGCATCATCTTCTCC -CTCTTCTTCGCAGTCTACCGGTGGTGCTCCCGCTCCTACCGGGGGTGTTGCCATGGGCGT -TGCTGGCGCAGCTGGTATTTTGGCTCTGGCTCTTGCCCTTTAGAGCATTGCTGGATAATT -CATTTGCCTGCCTGGCAGTCTATAATGCACTACTTCCCTGTGTGAAAGGAATCTGTGCTT -AAAACACCTCTTTCATTAATATGCACCTATTTAATACTATACATAACAGATGCAATAAGG -TATATGACTTGTGATCTACGTATCTAATTATTGCGTGATGTAAATTGGTCATATTCTTCT -CCTGTTATCCAAAGTTATGCATCTGGTATCACTCGCTGTCTTTATCTGATCCCAAGTGTC -ACCGAGATTCTGAAACCATAAGAAAGATAGGGTAAACCGCTCGTATCGCTCGTTCGGTTT -ATGGGCTGTGTATAATGTGAGATGTTAGCATTCCAAAGTCAATAGAGGCAACAATGTAAT -CTAAAGAATCAGACCATAGAGCTCGTCCAGTCAGGTATTGGGGTCATTTGAGATTGATGA -CCCAATATCATAAGAAGGGTGCGGTAAGTCGATCATTTCCTTTGGAGGCTGACCATAATC -TTCCGGATTAGAACGTTCAAAGTCAATGCCAATAGATAGTAATGAAATCCGACAAACCAA -GCTATCTATCATATCCGTCTGGCACTCGATCTTCGATACAGCCCAGCAGTCTGAGAAGGC -GTTTGTAAATTGTCCTATCACTCCCCGGGCAATTCGTGAGCCTCCATGCCAACATATTCA -GCTGAAAATGGAGATTGAGGCAATCCCGGATGAAACCAGTGGCGAAGTAAGTAAGAGCCT -CTTTTCGCACAGTCTGACAACCTGGAACGTGCTCAAACATGGCCTGTCCTGTGCTTTGGA -TATTGTCCACCTCAATTGGAATTCGAACCAATTTTTCAATAGTTGTTTGGCGATCCTCGG -GGGATGGGTTGCTCCAGCATAGCCTATCAAGTACTTCCACAATGCACACTTGAAAGCGAT -GCCACGCGTTGTGGTAGGCCCGTTCCCATACCGACACGACGTCAGTAGGGGGGCTTTCTC -TGGCTGGAGGCATTGTGAAGGGTGGTGCTCATGGTTGATTTGTGGGGTGATATGTGACTG -TAGGTGAATCAATGCTCCAAAAAAGACCAAATTGGTCAAAAGAGAGGATAAAGATGTAAC -CATTCAGCGGGTTGATAGATCGGATGGGAGATGGACGGAGGAAATAGCTCGAGGTGGTGA -TGGTGGTAATTGGAGCAAAGCTATTCATAGGATTTACAATGTGTTGGCTGTATCGCATAG -GCTGAGAGGTAACAATAGCAATTACGAAAGCGATTGTGCGTATTATAGTCTTAGCCAGAG -ACACTCGAAAATAATTGGGGACCTAACAAAGGCTTAATCGTCCGAACCAAGACCAATACG -GTCCAAACTCGAAGCCGTCAAAGAACACAAAGGCAGTTGCTTAGTCCCCTTGTCTCTTTA -TGTATGCACTGCCAGCTATGTTTGGAGAATCTAGTTACTATGTGTATGTTATAGGAGTCT -CACAATTTCAGTCTCCACGCTGGAAGCTCTACTCGTTATAGGGCTTCGTCATGAACCATT -TCTGACGATGGGAATTGTCTAATATTTCTTCATCGTTATTACAGCGCGAGACAAGCTGTA -CTCTGCGTCTAGTCGCACCTCAAAAAACTTCTTCATTGCTATTCTATGGCGAGTCATATC -AGTAATGGTGCGCTTTGGATTTCTTTGTGCCCAAGGTTGATTGATTCACCAGACCAGGGC -CATATGCCTTGGCTGCCTTCTCGTCTCAAGGTCATGCAGTCAGCTCGCCACCATGCTGCA -AGGGTTAGAATAATTTAGTTGTATATATCATTTTTCATGTAGCCCCAGAGTTTTATGACG -ACAGATTGTAGCGATTTCCGAGATTCTTTCGGCGTGTTCAAAAGCGGAGCCTCAAAGTGG -ATATTATAAGCTGTGGAGTCACCCGAAAGAGTCTTAGCTGCGAATGGGCACAAGATATCT -AGAAACATACAATACCAAGTCGAAGATGAAGATCAAATTGTTGGAAGAAAAAGAGAGGTC -GAATAATCCTGAAATAAAACCCAGACTTGCTGTCGAGTCGGGAACAAACAATTACGTCAG -CACTTATCTGAACTCGAGCTTTATCAGTGACGTCGTGCTGTCAAAACACAGATCAAGTCG -CTATTGAATTCATCCTTTAACGCCCGCCAAGCAAAGAAAATATGTCTGATGCAATTCCTT -CTGCAACTGCTGACCCAGATCCCGTCGAAAAAAGTCTCCCCGCAAACGCTGAAGACCGCA -AAGCCGCCGCCGCTCTCTCCTCCCTGAACACAAATGAGATCGCCACGGAGGGTGCCTCCA -CTGGAGCACCACTTACTGCGGACCAAGAGGCTCTTGGAAGAGCAATGAGCCGACTCGAGA -TAGCTGCTGGGAAAGATGCAGGCAACAAGCAGACGTCTGAGGGACTGAAGACTGATGGGG -CGGTGAAAAAGAAGACAGCCAAGGTCGCTCCAGCTGATGTTACGCTGCTGGTATGTTTGC -CATGTCTATGGCTCTAGTGCTACTCTTAGGGTGAGTTTATTAATGTGACTCTAGGTTGAC -CAGCTGGATTTGAGTAAGATCAAGGCCACGGAGTTGCTTAAAGCTCATGATGGGGATGCC -GTCAAGGCAATGAAGGCGTTCATTACCCCGTCTATTCGGGCGTAAACACATAAAATATTG -ACCGCTAGCTTTCCATAATGTCATGAGTTCAGTGGGAACGAAGCAGATATGAATCAAACT -TCGGTTGAACATACGAGACTCGTCCTTTATGCTAGGGTTCTAGGTTTGTCACGGTTTTAC -GGAGTGTAGACAATGAAATTTAGCAACATGTCAAAGTTGAGATCACTCCTCATCCTCTGT -CTCATCTTTCTTCCGTTCAATTCCCTTGCCCTCGGAAATACTCCACAGCATCACCCAGCC -AAGCAAAGACTCAACAGCCAGTGCCCACCATACTGCCCCAATCATGTTATTCTCGAGGCC -CAATCCCAGCCCCCAGCCACCTAGGAATGGCCCCAGCGTCCGCGCTCCACTAGAAATACT -TTGCGCCACTCCATGGACTGTGCCTAGGACAGACGGATCCGTGACACAGTTGTTCACTAG -GATGATAGCAGCAGGTAGGATGAATGTCCGCGAGATGACCTGCAGGCTAACGACAAACGT -GAAAGAAGGCCAAACAATCCATGGGATGCGTGGAAGAATCACAAGGAAGGGCATCAGCAC -GTATGCAATGGGCGAAAAGGGAAGGAACGTTCGGAATGACGTGAGTGTGCCAAGCTTTGT -TTGTATCCGAGGATAGAGGAATATCTGGAGTGGCAGGCCGATGAAACCAATGATGGCAGT -AGCGAGCCCAACCCGGGATGATGGAAGACCCAGTCCACCGCCAAAATGGAAGAGACCTTG -CCTGGAGTTCTCTGGAGCACGGGGTGCGGGCAAAAATACAAATGTCATAGAGTTGAAGGC -ACTTGTGTGGAAGGCAAGCAAGAACTGAGCCACCAAAGTCAAAATCACGTTGCGTGTCCA -GATTTGGCGGAATCCTAGTTTTTTATGTGGCCGTGGATGCACGCGAGAGCGGGCTGGACT -TGATGGCGTAGACCAAGTACCGACACTGTCCGTGTAAATAGAGTCATCATCTCTGGAGCG -TGTCAGTGGACGATAATGCCGGGGTCGTCGGCTCCAAGCTCTTGTGATCCGCTTGCCCAG -TTTACGACCCCAGTCCCTTCTATAACGTGCAATTTCATGGGTCTAAACGGGTGCGTGGTT -AGTGACGGTGTGAGTTTGGACAGTGATGATCTCACCTCATCGAGCCCAAGAAATACTGCG -AGAAATGATGCGAATATGAAGATTGCGCTCAAAAGATTTGGCAATGCATAGGGCCAACGC -TGCATCCACCAGACACCTTCTTTACCTCCAAAGGTAGATCCTGGGCCGAAAAATTGTGGA -AAGCTGTTGATGGGATCGGCCAAAGAGCCTCCCAGAATTGGACCTATGATTACACCAATA -TTGAAGCACATCGGGAGGAGCAGAAATGCGCGGGATTGATACCTGAATGGTTAGTCTAAA -GCAAGTCGGCGATTGATATGAACATGCCGACTTACTTCTTCTCGGCAATGATCTCTGCAA -CCAGCGTTCGCATCACTCCCACATTGCTGTTCAATATCCCGCCCAGTGTTCGAAAGACCA -TGGCTGTCGCAAAAGATCGAGAGAAGCCAAAGCCCACACAAGACAGACAAGTGCCCATCA -GTCCGATCAACAGCACTCTTTTCCGCCCCATCCATTCAGCATCCGCAAGACGGCCCCACC -AGACAGCGGTGATAAATTGCGCGGCTGTAAAGCTACCCTGAAGGATGCCCGCCTGCGCGG -AGATCTTCGAGTCTGGTAAAGATGGATCGAACGATTTGAGCTGATAAAACATGTATGCCT -GCAAAGAGGTCTGCGTCAACGGTTCTGAAAGTCGCGCTAAGGTTAATATTGCCAATTGAC -CCTTCTTCGGTAGCGAGCTCCAGGTCACTGGTTTCTCCTCATCTTCAGCCGGCCTGAGCA -CGCCGTCAGGTTCATGATCGCCATCATCTTCATCGGCGGAGGGCAGTCGTCGGTGATTGG -TTATCAGCGGTGCGGACTCCAATCTCCTCGTTGAATGAGAGTTGATTTCAAAAATTGCGG -TGTCATCGCTGTCGAGATCCGTGTCGCTCTCAGAACTCGACAAGTCTGCCTGAATACCGA -TCAGATCAGAAGTGTCCTGGGCAAGATAAACATCTCGGTCCTGGTGGCCTTGGCCAGGAT -TCTGAGTTTGCGGAGAAGGCCCCATCGGAGTTCCAAAGTTGGCTCGGGAAGCCACGCGAC -CTCGGCTGGGGTATATACCGGGGTGTACACTGCCCTTACCTTGGCACACTTTTATATCCT -CCGTATAACAGTGACTCTTACTTGAAGAGAGAGAAAAATAGGAGTGGAAAATGAGTTATG -ACACCATTTCACCCCACATTTGCGTGGGGTCGATATGGAGATGTTGCCGAGTTAACCGCG -AAGCCAAAAACATGTCCCCGATCATATCAAGATTCTCTGCAAGGGTTATACCTCCTCACT -TTTTCCGCCGTCTACTATGAAACTGAGAGAAGCAGAGAATATTTGAAAATCTATTAAAAG -ACTACCCGGCCCCTACTGTTCCCTTCTACTAGGCAGAATACATAAAAAGTACATATGCGA -ACTCTCGTTTTTGTCGACCATATTATATAAGGATCTTTCCTCTTAAGTACAGCACTAATA -CTCTAGTTTAGAGGCAAGTTTTAGGGGCACCTTTATAGTTACATGTCTCATTCTAGGCGC -GTAGTAATAGTGTGGAGCTTTAAGGTCCCATATTAACAAGCCCCTAACTTACCCTGAAGA -ACTCTGACGCTCCGACGAAGTTTCTATGGAGTTAATACATCTTGATAGGACCAATAGTCT -TGGTACTGGGATAGCATACATGCAAACAAGACCAATCCATTTGGGGCGTGTCAGATCAGC -TTATTACACCTATTCCCCGCCGTATACGTAGGCGGCAGGCCCCAGCGATCTTGGGCTGAA -ACAAAAAACCGTTTCTTTGTCACATATTAAGAGTTGCCAAAGCCCTTCACGACCTTTCCC -CAGTTTACAGACATTGATACGTCGAACAACCAGTCTCAAATAATCAAACAAGCTTCGAGC -CACTTTCGCATTGCCCACCTAAAGCGCACCAGATACACTGGGCTGAGCACTATGCCGCCA -ATGTTCTCCAAGATAGAACGTACCCTCAGTGAAAATATTTCTGTTACTGAGTTCTCAGCA -CAATGGAAGACGCCTTCGGACATTTTCTCTGTCCTCCTGCTTCTCGGAGGCGACATTGTT -AGCCGCGCGCTTGCGCAGTTGGCAGGATCGCGCCTCACTCCAGTCGCATTCTCTTTTGGT -ACGAGATTCTTAAGCCTCAGATTTGCTATGCTCTTGAGTATCTAACCTATTCTCTTCGTA -TCATCTTAGGCTGGGTAGCGTTCGCTGTCACCACCGTTGTCGCAGTTGTGGGAGAGAAGA -AGTTAATGCCACCCGCCGACTTCCCCTGCAAGGTGATCAATGGTGAAACCGGCTATATCC -GTGATAATCGAAGCTGGATCATTGGTCGTCTTGTGCGGGATTTCGATAATTGGATGGACA -AGCAACCGCGAGTACACGCCCAGCCAAACTCAACGCCAGATTCGAATCCAAAGCCCGTCC -AAAAGTGTGTTCAAGAGATTACCGACGCAAAGTGGAAGGCACTTAAGGAAAGTGCGAAGG -AAAAAAAGCAGCCAGAGCCAGGGCGGCCGCGTAAGGCCGGTCTCTGCGTGTCTATCTATC -GGGCCCAAAGGGCTATCAAGAGCTACCCAGGATACGATGCCCCGTACATAACAGGAATCC -TGACCACTATTGTCCAGCTTGGAATTTCAGCTATTCCATTTGGCCTCTCTGGTAATTGGG -GTATTTTTTTGGTTACAGCTGTCGGTATTGTTCTATCTTTTTCTACGGGAGCCTTGTCTC -AGTGGTCAAAGGAAAAGTGGGCTTGCCGAAGCAGCACGACGAAGACATTTATTCTAACGT -CAGGAAATGGGAGTCAACATGCTATTGTCATTGAAAGTAATGGTACGGGATTAGATCTTG -AGGATCTTGCCGCCAATGACCCGGGAATGGTTGCCTCCTACAAAACCAGATTGGCGATCA -CCGCTCTAGGTGCATTGTGGATTATTCTACTTATTACTGCCGCAGGAATTCAACAAGATA -CCTGGTTCCTTCTTGCAGTTGGAGCGTTGGGAATCCTTCAGAATGTGTATGCCGCTGGTG -CAAGCCGCTTACCAATGGCTTTTGGTGTGCCTCTGGAGTTTGTGGAGGTAATAGGTGACT -TAAAAGTCATGGACACTCTATTCAGAGTTGAGGACCTCTACCCTTGTCTGGGTAAAAGTT -TGTTGCCCGTTTTCTTTCCAGGCGAATTGGATGAGCGGGAGCGAGTAAAGTGGAAAGGTT -TTGATAATATTGCGAAGCTAAAGAGAGAGTCAAAGAAGCAGAAGAAGGAGGGAGCTGCAA -GTCATCGAAAGGGTTCTCAATCTAAATCAGAGCCAAAGAATTAATGAGAATATGTTAACG -AGGCCTATTTCAACAAGCCGCTCGACTCCCTCCATCAAAGGACCGATGTTTCGGGTCATA -ATGAAGCAAGCTAGATCCCATACAGGCATAGCAAAAGCTAACTGCATTGTGAAGCACCAT -GAGTGGCAAGGTCCTTTTAAATGATTATTTGAAGTGAACATCTGCCTGCCCGCTGCCAAG -CTTCCTGCCCTGATACCAAACCGCAAATTCGGTAGAAGCTCCAGAGGGCACCATCTCTAA -GTCGTATGAAAATTTCTTCAGTGCTTGACCGTTTTCCCCCTTGTAACTCTCCAGTGAGCT -GTATGATATACCCTCAAGGCTGACGTGAATTCTGCATAGTTCTTTGACACTCGTATTATG -TCTGGACGGCGGATCATTGTCGTCACACTGCATGATGATCGTCGTGATCATGTCGTCAAA -TTCATCCTCGGATTTATAGGACCGATAGAAATCGTGCCTTACAGGACGCGCCTTGAGGAC -ATCATCTCCCTGGAGCAATATCAGTAAATCAGAATACAAAAAAAGGGGACTAGGCAGAAC -TAACCTTTCGGAGATACCACTCCATCTGTTTCGAAGCTCTCCAAATGCCCTCATGGTTGT -CCCAATACCTATCTCTTGCAAGATGCACTGCTTCATCGTATTCAGTAGCGTATACGACCC -CGAGACTTTGGCGTGCAATCGTTGAAACCACACTTAGCGGCGCATCGGTGGCTCCCGTTG -CATGGGGCGCATCTAAGAAGCCCTTGACAACAGCTCCACGACAAATTGCTGTGCGCCTAA -TACCCATAGAAAGGGAGTATGTGAGCTTCCAGATAATATCGAGAGTGAAAAGAGTATAGA -AAGGAAAGAACTACTTACGGTCTCATGCCATTAGCTTGCATGATGTCTATCCGGTGGCGA -ACGTATCTTGAACGCAAATGATCGTAAAGATAGGGACTGGAACCAAGCCCCCCGACCAGA -ATTATTTTCTAGTCACTGTGAGAATAGACGACGAAATTGGCCTCATAATAACCTACCTTA -ACAGATAGACCTTTCTCATTGGCACTGCGTATCTGCTCGTCAACGAGCTTCTCGATGTCG -GAGAAGACGGAAGTGAACGCTTTTTCGATGTCCGAACTAGAGAATATTAGTTGACAGTCA -GCGTGGGCAGTAATCCACCATACCTAGAAAAATGGATTCGCGCATTCTTTATGACCGGAG -ATCTAGAAGTGTCGTTTATGCTTCCACCTTTGAAAGCTTCGGCCGGGATTGGAACAATGA -ATTCCCTGCCATCATCTCTCGGTTTGTATCCAGCCTTTATACTATCCTCCCACTCAGTTT -TCATGATACTATTGATTCCTGTTTGACTCAGGTTGCCCCACCGCCGACCTAAGCGGTTTT -TGCAAATGGCTTCGAATGCTTCATCCACAAAGATACCACCGCAGAGGCCTCCTAGTTTTC -CCAAAGTCATGAGCTGTCTGGTTCCAAGTGCACACAGATTGAAGATTTGGTACGAGGTGA -GTGAGATGCTTACCTGTTCCGACAACCGCTTCGTGTATCTGAATCGGTTCCACGCTTCCA -ACTCGGTAGGTGATCAGATCCTGGATATATCAACATTGATCCCGAAAGTTGATACGTAGA -TTATAGTTCCATCCCACTTACCACCGTGCCTCCTCCGGCATCACAAATTAGATACACGTC -ATCTGGTTTCATATACTTTTCCTGCTCGCAAAGTGTGACGAGAGCAGCAGCCTCGGGTTC -GGGAGCGAAAGCGAGAGTCGTGGCCCCAGCAGGGCGGTCTTCCAGGATACCGGCCTTCTC -AAAGGCCTCCTCCATAGCATCGCGGGCATAGCCCTTCCAGATGGCAGGGACAGTGACAAC -CACATTGAATATGTAGGCGTCTACCACAGTTTCGGTGCGTGTTTTGTATATAGTTTCCAA -AGTATGTTGCCAGAGTCCCTTGAGATAATCTGCAACTAGATCTGTAGCCGATTTTCCCGT -CTCTCGAAGCATCTTGCGCGCCCTCAGCACAAATTCAGACTGCCTGACCTCGTCGGAAAG -ATCTTCCTCGCGCAAAAGCAGTAACTTGAACCACCGGATGGGGTCCGCGTCACCGGGAAC -GCCGAATCCCCACGTCACCTTTCCATCCTCGTAGAGCAGCTTACTTGGCGCCTTAGCTTC -ATCGCGCCCGTGATCGGGCCAGCTTGTAATCACATTGATTTCATTGTTTTCGAAATCATC -TAATGTCGCCCATGCTGCACCAGAGAATCTACATACCGGTTAACTCTGGCTGACAGGGCA -AATTGTTCAATGATACTAACGTGGTTCCAAAGTCAATGCCAATGACCAACACATCATCGA -CCTCTGCGAGTTCCTTGATTCTCGCGGCCATCGCTTTCATCCCCCAACGTCCGGTGGGGA -CTTCTGAGGTCTTAGTCATATAGGTACTGTCTTGTACCTCTCCGGTCTTCCATTTAGTCA -TGATGTGTGAGTAAATCGGTTGAGTGTGGGCAAAAGAAGACTTAGGGCAATATGTGAATC -AGTGACAGAGAGTCTCGCTCAAGCAGAAGTGTAGATCGTAGCAGGCCTCCTTAGTAGGCG -TTTTAAATAGAAGATCTACCGACAATAAAAGTTTCACCCTGGCGACATGATGGCCAGAAT -GACCGTAAAAGCGAGCTTCGCCCCACAAATAACTCCTGCGCTATCAAGAAAATGCCTTGT -AGATGAAGGCTAACATAGACATAATTTCTTGGCTACGAACGCCGAGTACTATACAAACAA -CATAGTAAGCCTGGCCCCCGGTGGGCTTGGCTGTCAGCATTTGTTGAGGACATTTGGTAT -TGGATGCCTCGTCCCGATCCTCGCAAAAGCCACGATAAGATATTGGGTAGGCACGCAGCA -CAGTAGAGACTAAGCTGCCTAGCTCATTGCTTTAGCTAAATTTTAGAGCCGCGCTTCCCA -GCTTCTGGACGTTGTGGCCCCCTGTACTCCATATAGGGGGCTATGCAACGCGCCGCGTGA -TGGAAAGCCACAATACGAATGCGACTAGTTACTTTCACACATGTATTCAACTCTACTCTA -GTGTCTGTCTCAGCACGAGCAGTATCTCACTTTAATAATCCCATACGATATATATCTCTA -TCGCTCTTTTGACTTCATTCGGTGTGAAGTACGACTGCGAATACCTCTACGGAGTATCAT -TAGAAGGATTGACAACATGAGCATCAATACAAAACTTCCACCTGGTAATGATACCTCAAT -GGCACAGTTTGAGGGAATAACATCCCCGGCCGGAGAAAGATTGGATGAAGTGGCCCCATC -GGAAGCTCAGTCCTTGGCCGAAGCGCGTGCCATTCCAACTTGCGACTTCGCGATGATTGT -GGACGACGCACAAAAAGAGCTTGAGAAAGAAAGGAAAGGTCATCAAGCTACTAAGGTGGC -CCTAGAAAAAGCGCAACAGGAAGCAGAGAAGTACCGCAGGTCTTGGAAGCAATCGGTGAA -TGAATTGAACCGCCACCTACGACAGGGCCAAGGCTTTAACCAGTTGACCGACGAGGAGCT -CCTTCGAGATGTATCCGAGCTGCGGTTTAATATTCGCAGCTTCGCGGTACAGCATTTTGA -GCATGAACTGGGCAGCGTCACCATCGTCCCGAATGATTACGATGTAATAAATAGATACGT -ACCCCTTCATGAGCAAGAATTCGAGGCCTACATACGAAACAAGACTATGCGTCCTGAGCT -CATGCAAGCTTTTATTTGGAGGGTACTTTTCGTTTTGGTATTCGACCACCAGTTCCGCTG -GGCTGGGAATGAGGCGGGTACTCTAATGAAGGAGATGGTTGAATTGTTCGGTGAGTCTCT -AACCAGCTAAATCTGTGCCGCCGGAGGGCCCGGAGATGCGGACAATAAAACTGATATACG -CATTGCAGATCCACTGGTGAAAGCAACAGAAGACAAGGTCCCAGACCCACTGCGCAAGTT -TCACTCGTGGCGAGCGAACACTAGTAGCATGCTCATTCATCTCAAAAGCTTGGGCGAAAA -CAATTCACAGGATGACGTTCATGTGTTTGCTACACGTCAAAGCTTGGCACTGAACAAATG -GCTGTGTCGTTTCTCGCACTCAGATCCAGAAATGCTCCATAGCCAGTTGACCGACTTGAT -CAAACAGACCGTACGCTTAGATCAGGATCTCAGCAGACAAGTCGCCTCAATTCGATGGTC -CTTTGCCAGAGGCTCGTCGCTTGAGTTTAACCCCTCCTCCATGACCCTTCCGTCCAACTA -TCAGAAACCCAGCGAGGCTCTAAAAGTGAGGCTTGTGCTAGCACCAGGTATTACCAGACG -TGGAAGATCGTCAGGGGACCAATTTGATAAGATAGTTCTATTGTTGAAAACAGAGGTCAC -CTGTGAGGACCCGAAACCTGGACCAGCAGATCTCTCGAGCGATGGAATCATGGAACGAGT -GCGGAAGTCTTACAGACCGTGAAATGGGCAGGGTAGAAAGGATATTGACCTTACGAAAAG -ATACCAATATGCTGAGACTATGTATCAACTCATGTGACCGTCCACAAGATATAAAGGGCT -CTCAAGATGGTATATGTAAATAAAACTCGCTATTGGTGTATCGCCGCTATTTAGTTTGAG -TCATACACTGATTCATAACATGATATGAATAGCTAATACCATATTACAACTGACTACAGG -ATCTAAGATCAATACTTCCCCGGAATTTGTCCACTACAGGTCTATTTCAATTGCTTAATG -CCATTACTGGCATATATGTCAGATTCCGAATACTACTCCGGGTCGGTGGGCAAGGTTTCA -AGAAATTCAAGCCTATGCAGATATAAGCCGCCACCATGCGGTCTACTGGGCTCTCTGGCT -ATTGATTTCCAAAACAGTGACAGCACGTATGCATCAGCTGGCACTAAGGCTATGCCATTC -ACCCGGGCAAAATACAAATGAGGGGCATCGAACGTGGCTATGATCTTGCATGATCAAAAT -CCTATATTTTATCCCCTGAGCCTCAATCTCGGAAAATTACGGAGACCATAGCGGTGGCAG -CGATATCCTGTCCGATAGCATTGAGTTTTTAAGTTATGCTGACCTTTTGTACTGCCGCTC -TTTTTGAGGCAGCATCCTAGCAGAATAAATGGGAAAGCCCAGTTTCAACGCTATTATTGC -GGTTGGTGCCTTGGTGCTCCAAATGTTCCTTAGATAACAGATGAATTTTTGGGGCACAGC -GACCTAGTAAGACTTCGCATTTAGTTAAAGATACTTAAACCACAGTGTGAATATTAGCTT -AGAAGGAAAGAAGCTTTACAATAGCTGAAATCTCTAAAGATTATGGCTATGACGGTATAA -GCCTCGAGTAGCAGTAGTACAGCAGCCTTAGCCTCACCTGCGACGGCGAGTGAGGGTATC -GTAGATGCTTCACATCCAGCATTGATGTGGATAGTCCATTCAAGTTCCACCCGAATAATC -GTAACCCTGCCTACCATGGCCGAGATATCTAGCCATATGGCTGCTTGCTTTTGTCTGATC -ACAACATTGGTGCCTCGGTTAATAACTGAGTAGTCTTTTCATCTTTTATCTTATTAGCCC -CGGCGTCAATTAGTAGGTCAATCATCACACTAAACCCTCTTTATCTACTTATCATAGTCA -CGGAAGGATATCAGCATCTCCTTTATTCGCATTTTCGGCTGCTAGTAATAGCCTAAGGCC -GCTGGCTTCCTATTGAAGGTCGCGTAGTAGAGTGTCGTTGCTGTATCATTTACTTCGGCC -GGGAGGCTGACCTAAGTCTCGACAAGGAAGCGTACCATCTTCTCACGGCCATTCTTACCA -ACGAGGTGTGTGATACCTGGCAATGGAGACTGTGTCTATTATCCACGCTTGCCTCTGGTA -GCAGCTTCACGGTTCCCGTTTGTCTTTGGGTAGCAACGAGGTTGAGCGGCATGCTCTTAG -TAATTCAGTCGTGCGCAGAGACAACCCCATTCCTATCACTgccatcctcgtcactatcct -cgtcgctatcctcgtcactatcctcgtcgctatcctcgtcgctatcctcgtcgctatcct -cgtcgctatccccgtcgctatccccgtcgctatccccgtcgctatcctcgtcactgtctc -cgtcaccatcctcaccgccatcctcatagtcatctccattgccaATTGAAATGAAATTGG -AACCCTTACCACCACGATCACTAGCGCCTTTATTCACAAAGAGATTGATTTTGCTAGACC -CGAGCTGCAGCTGAAACCCTTTGTTGTGCCCGTCTCGATAAGCAACTCTCAACTGATCTT -TTGGGTCTATAGTACGAGCATCCATAGAGGCACCGGCGTCAATTAAGAGACGGGCTACCC -CCTGATGTCCACGGTGTGTAGCATAGTGCAAGGGGGTTTTTAAAGACCAAGAATTTGCGA -GCTTGTCGGCGCCAAACTCTAGCAGTAGCCACACAATTGCTTCGTGCCCGCCAATGGCAG -AAACATGCAGGGGCGTCTTTCCCGACGTACTTCTTGCTTCTTTGTTCGCATTTGCTTCAA -GTAGCAACTGTACAATTGCTTCGTGCCCATGATGGCAAGCTATAAGTAGCGGCGTGGTGC -CCGAGGTTTTCCCACGCTCCGTCTTAGCGCCATTCTTGAGCAGCTGCTCGGTAAGTCCCT -TACAGCCTCTTTTCGCAGCCCAATTCAGCGTCATTGTCTTATATTCTTTGAGCCAGTGGT -CGTCTTGCCAAAATAACCACTTGGTTGCCGTTTCGAGTTCGTTCTCTGCCGCAAATCTTA -GTGGTGAGAACCGATCCTTCTCAGGAAGGGTTAGATATGTATCAATATCCCTCTCTATCA -GCAACTTCGCTATGCCACGGTCATTGTTGATAGCTTTGCAGTGTAGGGGGGTCCAGTGCT -CTCCGGGTCTTGAGACGCTATCAACGCCTTCGCCGGGTAAAACGTCCATCAGCTCGCCAT -GCTCATTTATTTCTGCGAACTGTCGCGGCGTCCAACCATATTTATCCTTAGCCATCCTGT -CAGCGCCTGCCGCAAGCAGAGTATTGACAATTAACCTATTCCCACTCATCGCGGCGATAT -GCAAGGGGGTCCAGCCCACCAAGTTACGTCTTGCATTGACTTCGGCGCCATACTCGACGA -GGAGTTCAACCACCTCTATGTTTCCGTTTCCGACTGCACAATGTAGCGGCGTCAAACCAA -TCTTACTATCCCTGGTGTTGATATTGGTATATTCCAGTAATCGAACCACTTCTCTGTGGC -CCCATATGGCAGCAAAATGTAGGGGTGTCAGACCGCGACGAGCTGCAGCATTGCTAGTGA -TGTCATTTATGAGAGATTTTGCAACAGATGCAGCTCCCATCTGTGCTGCAATGTGAAGAG -CGATGTTTCCATCCTTGTCCTTTGTCATTCCATCAATTCTGCTTCGCAGAAGGTTCACGA -CGGCAGTCGAGCCTCCCCAAACAGCGCTATGTAACGCTGTCCGCCCTTGGTTGCACTGTA -AAGTTGAGGTAACTCCAGAGCGCAACAAAACCCGAACCATTTGCACATCTCCTCGTTCCG -CTGCAAGATGTAGTGGTCGTCGCCCTATCTCATCTGAGAGATCCAAGTGCTTCCACTCCA -TGAGACAGCTGAATGTATCCCAATATCCCCTCCATATCGCACTGCGTAACCTTTCAGGTT -TAGAAACACAGTCAGAAACATCAACCTTGTCAATGCTCAAGTCCTGAGCAATGCTGGTGT -CTATCAAGACTTCCTCTCGATAATCATTAATATAACTTGATATTATCACTTCTTTTTCAA -CCCAGTCATCGGTGGCTGTGTCTAATCCCTGACGGAAGTACAACACACCAATGTATTGAT -GGCGATTAGTCCATCCCGTTTTTCGAACAAGCAAAGCCCTTTCGACCAAGGAGGTACTGC -ACTCGGCTAAGAGCCGGCAATTGCCAGGAGATTCGAGAGCGTGTTGGAGTTCAAGCCTTA -TTAAGGCATGGGTACGACCCCACCAGCAAGTGCTTTCAAGGTCTGCTCTCACACGACTCA -AACGCCACTTCCCTATGACATATCCAGTTGGGGAAACAAACAAGGATGGAGGCGAAGAGT -CTAGAGGAATATCAAATAAGTTGGCCGGACACCAACTGAACCCATTAGACATAGTAAACG -AGTTGTGAAATAGGTGACCGGGTGCAATCCAGCTGAATTGCTTAATGATACTCTTATATG -TCTCTTGTTGCTGGTCTTGAGGCTTCACGCCCACTAAAAGTGCAGCGATCATGGCCATAT -CCTTTGGCCAAGATGTATATCTTGCGCTTAATATACTTAAGAGGTCATTCAGGCTTGTGA -TTTTGTCCCGGAGATGCTGAATGATTCGTGATGCAGACTTACGGGGACCATCAGGCTCGT -CCTCCTTTGCAAGGATTTCTTCATCCAGGTCCTTGAAAACAGGGCCATTAGACCCCTTAA -AGATGACCTTGACTTTGCGTGACTTGGCTAATTCAAGGGCCGTCCAGCCTCGACTAAACC -AGGGCGACATAAGGATGGCAAAACAAGCATTTTGAGGATCCCACCGCCAGTTCCGTAGAA -AACAGTCGTGTACTAGGGTAATTCGCGCGTTCTCGTAATTGGTTTGGATCTCTTGAATCG -CTTTATTACGAGTAGCCTTCTCCCGAGGAATGCAAATGGTGTCCCACCATATCCCTTCAC -ACTGGAACCGTTCCGCAATTTCTCTGAAGAATGCATATAAACATTCATTGACAGCGCTAT -CCGTCCAAGCTCCAATCCCGGTCCCATCAGACCAAACATGAGAAATGGCCATGTATGGCA -TAGGAGGCTGAAGCAAAGAGCTTCCGTCCAAATTCCAGACCGTTGAGCTTCCTTCGCTGG -CAGCACGCTCCAACTTATACCTTAGAAATCGACTTCGTAATCGTCCACATTTGGCGTCAT -GGCACTCGTGACGCTGTTGCACCGTAGTAAAATCAAGCTGAGAGTATTCACAGAAATTGG -ATGTACACTCCCCATGGTCCTTGAGGTTTGAAATACCCTCACGCCGATTTGACAAATACG -TGTCCAAGTTGAAACTTGGGAGAACCTGTGGTAAATCCTCCTTTGCAACCGCCCAAAGGC -GATTGGGACAGAGCTTGAGCTCTGTTGCTTTCTCAAAAGTTCCAAGGAGCTTACTGCAGT -CTGTATCCATGATGTTGTACAAAGGTTGACGTATGGTGTTTAAAGGTTCATGGGATTGGA -CAAGATCTATGAGGCTCTCCCCGCAGAATATGGGGGCTGACTGACCTACCAGGTTGGGCG -CTAATTCCAGCAGCCCAACATTATTACATACATTTGAGGCTCTCTCCCATTCGATTATCT -CCATCTACTGCTATACATGATACCAAGCTGAGGTCCGAGTGAGTCCAGTCTGCAGTGAAG -TCCGGAGGATCTTTCTGTATTGTCCCTCGCGCCCCCCCCCCCCTCCTCTCGCCTCCTTCG -CCTCAATAAATTAAGCCAAATCTTCGACATGTACCTAAAGTCTCTTGTCTCGCTCGCGCA -GCGCACCATCTCTAGGTCAGCCACGAATAGTTGGTGGGCGCGGGCTGCAGTTTGATACAA -CTGAAGAGTGATTGAAAGATTTGAAAAAAAAATAGATAGAAGAGAAACGAAGCAATAGAG -AAAAAAAAACGAAGTGACATGAGAGAAGACAGGGCTATATAGATACATAGACATGAGAGA -AAAACACAAAAGGAAAAAAAGGAACTTATGCGCCGACCGGGGTTCGAACCCGGGCTGCCG -CTTGATAGTGGATTAATCCATTATGGGAAAGCGGAGTCTTAACCACTAGACTATCGGCGC -TTTGTTTCAAGAACATATCTGTCAAAATGAATTTCACCACACTCATCTTCACTTGCTATA -AGATCGGCATCAACCATGGTTGAGCTGTAAGTCCGAGGTAAAGTCTTTATCTACTTCATG -GGAAGTATGACCCGAAATTGGGAAGAAAAATACACATTTTGAATACCCTGGAGGTGCCAA -TAGAAAATGTCATCAAATCTATGGTCAAGGCGTGCCAGCCGTCTAAAGGAATAATCATGT -CACAGCCATTGTCTTGACTCTCATTTAGGATGTTGAAGTATTGGAAGACAGATCCCGAAG -AACGCAGTGGCCGATCAGAGTTTTGAATGGCGGTAAAGACGCGAGGAACCTTACAGACAC -GATCATAGATGGTCTCGCGGTTTTGAACGACCAAATTGCGAGACTCGCTGAGTCTGTCTC -GTGAAAGAAGGGTCTTGACCAGATGCTTATTGTATATTGGCATGGGTAACAAAAACATTG -ATAGGATCCATGTTGGTGAACTGCATAGAACGTGAGGAAAAAGGGAAGAGAGCAAAGCGC -GAAGGAGAGGGTTGTATTCACGATAAACAGAAGCTTTGTAAATGCATGGTAAAAACCTGA -CAAGTCAAGCATCGGGGCAGGTGTGTCTCAAAAAGATTATTCAAATTTGAAAGGGTTGTA -ATCCATGGGCCGAGAAGCTAACACGCCCATCTCCAGCAAAGGATCTCGAATTTTATCTAT -ATTTACTCTATTTTATTATCCCTGGTTATTCATAGAAAATCCTCTGTGTGGTCCTTTCCG -CCTCGCAGCTATAGAATAGAGAAAGGAGAGCAATAAAACAAATACCTGAGAAGACTGCTA -CCAGGATTGCTAGGAAAGCGGCTATCAAAGATACTAAAAATTAATTAAAGCGTCGTAAAT -TGTACAATCTAAACAGGAATTTCAAATACACAGAGCATTATGTTATCAACAGGGCGAGCG -TGCATGCTGCTCTTGGTCAAATTGAACATCGCATTGTTTCTCGAACGAGCTTGGTGACAC -TTTGCTTATCGATGTCTCAGTTCCAAGATCCTCCTTTCTGCTCCATGTCTCAAGGGGGAT -AGCTTCAATACGATGCATATCGTTGAAGAACTCTGTGTTGGATCTGTTGATGGGCTCAGT -ACTCGAGCGCTTGGCTCCTCTGTACTTGCCTGGTGTTATGTTTTTATTTCCTCGGTAGTT -CCACATGCGAGAGAATGCAAATGAAGGAGATTCTGATTTGCTGGGGTATTCGTTCTTCTG -GTTATCGGAATTTCCAGGTCTCTGGCCTCGGTTGCTGTTGTATTGAAATGCCTTAACACC -AAACAGCTTGCGCAGTAGTGGCCACCAGCACATGAGGTTGGCGACATAGATTGCTGTGCT -GGCTTCACGTATGTACCAGATTTGGTAGGTGGTTGTGAGCGGGGAGGCAAAATTAAAATA -TTTGTTCAGGATAGCAGCAATTATGGTGAACAAGCCCAAACTCATGACACCGATGAGGAC -GACCTTTCTGCGCATCTTGAGTTGGGCTTTGATGAGGAGGGGAATCGGGACGGCGAACAT -GGCGGCGTCAGAAGAAATATTGAAGACGGCCTGGGTTATCGAGTAATGCTGGTATGTCGC -ACACTGCATGTTATCCACGGGCATGGCCCAGTATTGTGAGAATGGCCGGCAATAGACAGC -ATAATAGGTGACCATTATGACGACAAATCCGACGGCGACATAAACAGAAAGAATCTTGAT -ATAGAGGTTCGATTTCAGATTCTGCGTGATTCGCCAATAAAGCATCAACATGCACGTCTT -AACGCCCCATGTTGATACCAACATGCATTGCTCTAGCCCGATGACAATTTTGCTTCCATA -GACCCGATCGCTGACTTGCTTTGGGTCTGCTAAAACCTTGTCGTAGTCCTTTGGATCCAT -GAGGTTCGTCGCGTAGCGGGCAGATATTTGAATCAATATCAGCATTGTCGTGTATATTGC -CTTAGGATGAGCGGTTAGGTCAGTGGAACGAAGTGACCTGATTGTTGAGAGTAGAACTTA -CGAACGTTGCCGTCATCACATAGTCGTCCACTAGAGACCTACTGGTCAGTAGAATCAAAA -AGGTAAAGCCTAGAAGACTTACAAAACAAACGTTTGACTCCCTGTCCATTGCCAATTATA -CGAGCTAGGCTTTGCTGGAGTTAACATTGTTATAATTCGGTACTTGCTTTTCAAACTTAC -ATTCTCCCGACATAAATAACAAGTCCTATAGCCCAGAAGACTATGGCTTCTGTAAGGAGC -TGGCGTGGAGTGGGCGACTCGTTCCCCATCTTGATGGATAGAGTGACCTTCGCCTTGAAC -CGAACTTCAAACTGCCAGAATGCTACTGGACGAGTCTTCCATGTATGAGAGATGGCAATC -TATTCAAAGGTTTTGATGAGGAGCATCAGTGATAGGTTTGGGTGCGTGCCATGGCTTGTT -GACCGGCAGCTCGGGTAGATCTATAATAGATGTATTCGGATCAAGCTACTGGGCCGAGGA -GAGCGATTCGGTAAAAATGGCTAAGCTAGATCCCGGGAATAGATTTTTAAAGCTGCCAAC -CGAGAGCAAGGCTGTAGCATCATGCGTCTTGGTCCGTGATGTGTCAGCGCTGGGGTAGAG -TTGGCTGACAAGCAAGGAAGAATGCATTGCAAATGCAAGGGATTGCTTGCCTCGAAGCGC -CAATCCGATCAATGTCAGTTGGGTTTGGAATGTACAAAGTAGGCACATAGTACGGAATAG -ACAAGGAGAAAATGGGCTATACGCATCACATTCCTCGTTTGTGCTGGGAGTTCTTCCTTT -TGTTGGCTGGGCCGAGAGGCTGATAAAAGTTGCCTGTGACACCGAGTCTTCCAATAAATG -CCCAATGTTGTGCAAACTGCGCCGCAACTGTGCGCGATATCTGTAATAGGATGGGTTTGG -CGAGATGGGTGCTACCTAATCCATCCATATTCAATCGCATCGTTTCGTCATAGAATTAGT -ATTCTGATTTTGAGGCATAAAGTTCCCCGTATGCACGTTGCCGATAGCTTTGCCTCGAGT -ACGCAAACTCTTGACAGGTCATATCCTATAGTGGATGCGTACCAGTCGAACGGCTGATGT -TATTGGGAAGATCGGAACGATGATCTGCATGTCGATCCAAGCGATATTCAATAGCTTTTG -GGCACCGTGATCCATAAAAATACACTTGCGCGATATGCGATCCAAAGAAAGGGTGAGTAA -ACACTCACATGGTAGGCCAATCATGGCTGAGAATCGTGCTTGATGTAAATGTTCAACCTT -CTGCGTTTTATCCCTAGGGAACCTTGCTTCCCAATTGATTTTTCACGACGTCACGTTTTG -CAGGCAATTTTTGTCATTTCGGAAGCCAAAAATCCACTAGTGCATAGCTTGTACTCCATA -CTGTCAACTCTTCATGGCCACGCCCCGCTGTATAAATGTGACGGCCCAATGCTCCTTGGA -AGACCTAGACAGGCACTGCAATGATGACCTTTGCTATTATCGGTCCGGCTCATTTACCCA -GAACTTGGAAAGGTCTTTGCATAACGAAGAAAAGAAGAGCAAGTCGGATGAGCGCTACTC -GTTGTGGCTGCGTGAAAACATGTTAAATATATTCAACAATCTCATATAGCGATGTGAATA -AATCTTTTAGAATGCTTGATCTTTCAGGTGCTTTATCAAGGTGTCATATTTTGACAGGTA -AGGTCCGTAAGGGGACTCTTCCTTGGTAGGCATCTGGCCATTGTGGAAGCAAGGCGGTAT -CGCTGGACTGGAATTGAGAACCAAACTCATACCCCAAGGGTGCTGGTGATCAAAAGGCTA -TATTTTTCCTCGATATGAGGCACACGGTATGTCTGCCTGTACCAGCCTCTCCAAACAATG -GAGCAAATGAGTGGTGTTTGGGGTCGCTTGTAACAATTACCTGGCATTCAAAAGAAGAGG -GTAATACTACTATGTTTGTATTTGTAAATGTCCAATCTACCCTAGAATAATATAAATTAG -TTCCCGACCTCTTTGCAAAGGTCGCAGAGGGATGATGACTACACTGTCACATATACCTAG -GTAGAAGCAGCTTTGTACCACAGAGTGCTCCGTGCTCGGCGAACAGACAGGACGCAGAGA -CGTCATTGCACTTTTGGCGTGTGATATACCTACCGAAGTAATGCAATGACATGGATAGAA -TTGCTTTTCCTTATATCGTAGCCTAGTTAGAACTAATTAGCCACGTGAAAAGCCTCACAG -GCTACGATCCCTTGGAGCAGTATTGAGCACACTAAATCTAACGGCCCCGGACCTATCTTG -CGTGCTGTAAGAATATAGAATGCTTTTATCTAGCAAAATGAACCGTGTAGCCATGGTCAT -TATTAGGGAGATGTTTGATATTTCGAGCTATTCGAGGGGAGGTATTATGCATGATTACAT -GCACTAGATCCATTGAATCCGCCTTTTTGTTTAGCTCTGACAGCATTGACTATATTGGCG -TGTTCTTGAGTGTTCGTCATTCGGTGGTTTCTCGAAAAAGTGCAGAGCTCCGCCACATTA -GCCAACGACCATTGAAGTTGAGAACATCTACAGCCACATGGGCGACTGTTAGGAGCTGCG -TCATGATTTGGACGAGCTTACATGCCAAGTGTAATGTAAATAACTATCTTATGACCGGCC -TCAGAATGACAAGACCTCTATACTCTCTAGGATCAGTGCATAATGGCGTGTTTGAATGTT -CCTTTGATAACGGAGGTTCTCCCCGTCCACCGGTAGGCTTCTTTCAGATTATGATAAAGA -AAGGGAATCTTTAACCGACGTCGATCGCTAATCAGGAGGGGGTCGTGTCAAATGCCGGAA -CACCCGCTATGCCTGCTTAAAGCATTCTACCGGGTGGGACAATGATCCAAGAGTATAAGG -GGCTGTTGGTGGCGTCGATACTTGTCGTTGAACCAGGACACCGTAGAGAGGCAACTAAGA -ATGGATAATCTTGATTCTTGGATGCTCTTTTATGAACTCCAATAGAACTGTAGATTTGCA -TAGTACCTGCTTTGTGTTGTTTGAACATTTCTCGATAGCGTTGGATTTTGTCCAGCATAC -CTATTTCCCTGTAATTTGGCTCTGCCAGGCCAATCAAAATCGACATACTCTCTATCTTTG -GTCTGTTTTCCAAAGGTCTCGGGCTGGAGTCTCGCACCAGTCGAAAATGAATTATTATTC -TGCAACAGGAGAAATGAAAATATGTGTTGCTCATTCAAGCAATGGGGGACTATGGAGTAA -ACCGTCTTCATGTACTCCGTAATCATCGATTACCCGAGGCCATGTGGGCGCCTCATTAAG -GGAATCGCGATCGTATGCAGGGCGCAGACCAATTGGATGTTGTGCAGGACACCCCTGACA -ATGCTGGAATGTAGAATATCTCCACAAGCGAACACCTGAAACCTGTATAACCTCAAAGCC -AAGATGACAATAGCATATTGCAAGGGTTTCCCTAGCTGCTGAATGTAGTTCGCCTCAAGC -CACTCCGTACAGATGTTGTATTTTCCACTCCGTAATCTACCTACAACATACACGTCGGAT -ATGGTTAAACCCATCGGATCGACGCCAGAAATTCCGTATTCGCCACGAAACCCCCTTACT -TAGCCTGCGATGTATCAACTCAATTTCGTTTTGGGAAGGTCATAAGAGAAAGAATTTGTA -CAAGTCCAATTAAAACAGGATTGTAATACATCTTGATCGTATGATTCGTAGATGCGCATG -GTACGGAGTATGCTCTTCTGTTTCAACAAAGAAAAGTCCCACCATCGCCAATAACAACAC -ACCCAATTCCATGTTTCATCGAGGGCAACAAAGAAATATGCATTCTTTCGGAGAAAATTA -TAGGACTAGTCATCCTTGCATGAATTCTTGGACTTAAAGAATACATGTGGCAGCCATGAG -GTTATTAGCCCTCACATCCTACATTCTTATTGCGACACGCCCGATTGGGCCTATGCACCC -CGTACGCTCTGTCAAACTTAGTGAACATAGTATACGTCGTGCCTCCTTTCCTCTCTCCCC -ATCTTGTCATTGCCATGCATGAAACTATTTCTTGAATATTCGCTCCAACACCGCATGATG -CACTGCAAAATGGCGGCATTTTCAAGACAATCCTCCGGGGACATTGAAAAATCCCATAAA -GACCATGCCGGAATGGATGAGGTACGGCTAGAATCTACACTTGGGCATAAGCAAGAGCTG -CCCCGGAACTTTGGACTGTGGAGTTTGACCAGCCTTGGGATCGTCATTGCAAAGTAAGGC -ACTCGTCAATCGGCAATTGCAATGATTCAATTTCGATAGACTAACGAGACCAAGCTCTTG -GGCTTCAACAGGGGGTACAATCGTCGCTGCGCTCAAGAATGGCGGCCCAATGGCTGTTCT -GTACGGGTTGATTCTAGTCAGCGTCTTTTACACGACCATCTCTGCCTCTTTGGCTGAACT -CGCCTCCTCCATGCCTTCTGCCGGCGGGGTATACTACTGGTCATCAGTCTTGAGCCGGAG -TCGAGGCCGCACAGCTGGATTCTTCACTGGGTACCTAAATGCTTGCGCGTGGCTGCTATC -AGCGTCATCGATGAGTTCCATGTTGGGGAACGAGGGCGTGGCCATGTATCTGCTCAGATA -TCCAACCGTGAAGTGGCAAGCCTGGCAAGTATTTATTGTATTTCAGATTGTCAATTGGAC -ATGTTGTGCCATTGTGTGTCTGGGAAATCGGTTTATTCCTCTCCTCAACCGCATTGCCCT -TACACTTTCCATGTGTGGACTCGTTACAACTGTGGTGGTCCTCACGGTGATGCCGAAAAA -GCATGCCAGCAACGCCCAGGTGTGGACTGAGTACCACAACAACACAGGTGGATGGTCAGA -CGGAGTTTGCTTCATGACTGGTCTGTTGAATGCTGCATTTGCAGTTGGGGTCCCAGACTG -TATTAGCCATTTATCAGAAGAGGGTAAGTGAGAAAGAGGAGAAAATGAAGCTACCCACTA -ACAATGCGGTTTAGTCCCCAAACCTGAGATTAAGGTCCCGCAGGGCATAATGTTGCAAAT -GCTGACGGCATTTGTCACATCTTTTGTGTACCTGATTGCCCTCTTCTACTCCATTCAAGA -TTTGGACGCCGTCTTCAATAGTAACATTGGTTTTTTCCCAACCGCCGAGATCTATCGACA -AGCGACAGGTTCTAACACAGGCGCGATCGGTCTGATTGCGGTACTTTTTCTCGCGACCTT -CCCAACGCTTATTGGGACATTCGTGACAGGCGGTCGAATGTGGTGGAGTCTAGCACGCGA -TAACGCAACGCCCTTTTCGAACTATTTCGCCCAAGTGCACCCGACACTAAACTGCCCCGT -GCGTGCCACTGTTGCTATGAGCGCACTCGTCACTTGTCTGGGGTGCATATATATTGGCAG -CACGACAGCATTCCAAGCGCTGATCTCGTCGTTTATTGTCCTAAGCTCGCTGTCCTATTT -CGGAGCCATCTGTCCTCATGTTTTGTCGGGTCGTGGAAATATGGTTCCCGGGCCATTTTA -TATGGGACAGAAACTTGGGATGGTGGTCAATGTTGTTTCCTTGTTGTATATCTCTGTTAC -TGTTGTTTTCTTTTGTTTCCCCTTGGTGTTGCCGGCCACAGTTCAAAATATGAACTATAC -CAGTGTGATTGTTGTTGGCTTGATGGTTCTCACTGCCTCCTGGTGGGTATTGCGTGGCAG -ACGCCAGTACTGTGGTCCACATTATAGCTTTGAGGCTGCTGAGCGGTTGGCTGCTTTCCA -AGAAGGCAAGGAGGGCCGTCGATCTTTTATTGATACTGTGGGTGTTTCGCCTGGCGGTAC -TGAGTACGGGAATGCTTCATGATAAACGAGGAGCGGACCTCCAAAGGAATGAAAGTGGGC -ACCGCGAGAATTGTGAATATTGTTGCTTGTTGATGACCTTGTCAAAGCGTGTGTATGTAT -AACTTGGACCAATTTTGGATGAATATAATAGTGATATATGTCAAAGTACTGTCATCAAAC -AAGGCGGGGCTAGAAATCACATACGGCCGCATGATTAGCGGCCCTTCAATTTGGAATTGG -GAATAGGCATCTTTAGGTCATTACTATCATGCATGAAGATCACGAGATCACTAGTAGGGG -ACAAGATATCCTCTCATCTTCATAGTCGCGACTTAATGCTCTGTCCACCTTCCCAAGCCT -CGCCAGCTTCAAGGGTTTGCCAACCAGACACCGAGCCAGCCTCCACACAGATCATGTTTT -TGTAGGCCTCGTCTGGGCCGAAATCGGCCATGCCCTTGGCCTTTTCAATCCATGGGTTCC -AGACCACAACATCAGTCAGAGCTTCCCGAGTGATTGAGAACAAAGGCTGGTCATCCTTCG -CAGAGGCAACGACGATTGGAATAGCCGGGTCAAGTTGTTGGTAGATGCGATCGGTTTCTT -GGGTGATTGCCAAGGCGGGGGACGATTCAGTGTGAGAAGTCCCTTGGAGGGTCTTATCGA -GGTACGTCTTGGATTCGAGGCCTTTAACACGGATGTCGGAGATGTCCTATTTGTCAAATC -CCAAATTAGCGGAGGACTGGGAAAGGGTAAAACCTTGGAGGTGTTTCATCGTACCGCAAT -GTTCAGATACGTGTGCATCAGCACCTGGAACTCGAAGTTCTGCTGGCCCTCATTGCGAAC -CTGCAGCGCTGTCTCCAGTGAATCCGTGGTCAGCGTAACACTGTACACCAGACCGAACTC -ATATGGCCAAGACTGGCGGAACTCGTCGGTCAGCATAGAGTGCGACAGCCCGAAGTCCAA -CTTCACAGTGAGATCGGTTTGGTCGCTATCTCTTCCGGGCGCTTCCGAAGAGGACTTGCC -CAAGAACTCCCAGTTGGAGTTGCGAGCGAAGCCATGTTGTGGTAGAGATGAGGTAGCGTG -GTTCTGGGGTGGGGGACCGAAGACCTGGAGAATAAGCAGAGCGGTTAGCTGTAGCTCTCG -AGCGCGATGGTGGAGTTCACATACCGGGAACACCAGCGGAATGCCACCACGGATAGGCTT -GGAGCCATCGAGATGAGCCTTCTCACTGACAAACAGCTGCTCCTGGCCGCCTGTCTTCCA -CGAGGTCACAGTGGCGCCATACAGGTTCACAGTCACTGACTGGCCAGTAGGCAAGGAAGC -TTCAATGATGTTATCATTGATGGAGAGGGTGGGCTGAGGGAGTGAAGCTCCCAATCCAAT -TGCTGAGGGTTTGTTTGATCGGACCATTGTTCCAGGGGGTTTTGGAGAATAGAATTGAGT -CAAGAGGGAGAAGAGGTGCTTGACGTTTCGGCTTTTGAAGCGCGGGGATCTTATCGGTCC -CCGCCTGTCTGTGACGGGGCCACTCCAGAGCTACCTAGGATTACCCTTACCGTATAGTGT -GCAAATATTTTGATCAGTACTTCCATATGGACTATTCTTATGGCCTTTCTAGCTCCAGTT -GTTTGTCTCAATATACAAAAATGCCACTTCCTCCTTTGGTATTCTCCATGTAACATACTA -AAACACTTTCACCCCCCAACCAACGCTCACGCCTCTGGATAAGATAAAAAAGACGCACGC -ACAAGAATAGACGAGCCCACAAAGAATTATCAGACCAGCATAGTCATCCTGGCCCGTGGA -TGTTTGCACTTTTAAAAGCTCGCCGGCAATTGGAATGGCGATGAGTGTGCCGATACTGAC -CAAGCAGTAGGTGGTACCATAACGCTTCCCAAAATCTTCAGTTTTGGAGATTTGCGCAAC -ACAAACAGGCGCAAGACTGATCGCAGTGCCGCTCCAAAATCCAAAAAGTACGGCGAATGC -CAAGATGGCCGCCTTGTTCGACTGGGATGGAAGCCATAGGGCGAGAGTAATCGCGGAACA -GATGAACGATGTGATGACCATTATATTGAAACGGCCAAATCGGTCAGCAAAATAACCAGG -GACGACACGCCCAACGATGGACGCAGCATTCAAGATCGACGATATATTGTATGCAAGAGT -ATCTTCCATACCACGAGACTGAGCATAGGATGGCAAATATGTCAACGGGATCAAGAGAGC -GAAATCGATCAAGACAATGGCCGCGGTCAATAGTGAGAAGCGTAGTTCCTTTAAAGCTTC -GATATCAACGGTTCCTTTTCGTGCCGAATTGAAGGGTATTCTCGTCCTCAAAAGGGCGAC -ACTCAGACAGCAAAATGCTGCACTTATAAACCCAATAATTCGTATAGCCCAAGGGAAACC -TAGACTATCGGCGAGCTTGGATATCAATACCGGAAAGATGATTCCCCCAATACCCCCAGC -AGTCGCAGCTATACCGGTAGCCAGGGCACGACGCCTGTGAAACCAATGAGCTATGCAAGC -CACGCTAGGGGTGAACACCATTGATGAGGAGATACCGCCAAGGACACTGAATCCTAGCAT -GAATTGGTAATATTCTATACGAATTCTTATTAGTTTGGACCGCACCAGCTCGGTTATCAG -AAAGAATTACCTTCTGCAACGCTGAAGATCATCAGTGATGCGATGAGCCCAACGCATCCC -GGGATCAGCAAAGGCGCAAGACCATGGGCATCAAAAAGAGGTCCTATCAGATATCAGCCC -TACATATCACAGCGAATAGATTGAATGGGCTTACCAACTTGTACACTTCCAAAATAAAGG -AAAAACATATACAGGCTGAATATCCAGCCGATGGAAGCTTTCGAATAGTCTCTCAATTGA -TGGTCCGCCAACCAAGCCTCAAACACACCAATGGTGTTCATAAGTCCAAATGACGGCACC -ATCGCGCACCAAGAGCCCAGAACGACACCCCATGCGCGCCAGCCACCATCTGGAAAAACC -AAGTCGCTGTCTTGCTCGATTTCATTCAGAGAGTCGCCCGAACATTCTTCGGTGTTCACC -TGTTGAGAAGATAAAGCTACATCGTTGCAAAATCTTGGCTTCATGCTGGCCTCACAGAAA -AATGAGAGAAAAATCGGTGAAGGAGCTCTACCCCGAACTGCATCTCCCCTCTACCAGGGT -CTTCATACCTATTGAGAAGCTAATCCGTTTTGGGACGCCGGCAAGATCCTCGTCCTCTCC -ACATCTTGCTGAGTCAGGAAGAGCATGAATAGGTATATTCAATGAGAAACATGGAAGTTA -TATACACAAACGATATACAGTTTATAGCTTCGGCTCCGAGTGTTCAGTGTCTATCCTACC -ACTCTTCCGGGTCGAGGATGACTCCGGCACGCCAGATTCTAATTCAAGCACCTCGACGTC -TGGTACCAAGTCATGTTCTTTGGGTTGATTGGCGATCGTCACTTCAGTGCCCTCGGTTCT -CCGTTGAGGTGGAATAAAGATATTGTAGTATCGTGGAAACACCAAACCGAGCAGAACCAA -GAGCAAACAAAAGTGCGCCAAAATAAACCCGGCGATCATCATTGGGTCGTATCGCAAGGA -GTAGAATTCAGGATACGCGAAGCTGAAAACAATGGCCAGAATAGGCGCGCTAACGTACCG -GAGTAGAAGGGGCCAGAAACGAGGGATATTCCAGTTTCCATTGCCGCCAACGACGCCGTT -CAGATCGCGTCTTAATTGATTACCCTGCGCAGGTTAGTTTCTGTGGTGAAGAACTTGCTG -ACAGCGGTGAACATACAGAGTAGAATGCTAAATACCAAAATCGACTGAGAAAAGGGTTCC -GCCCCCAGAAGCTGGCGGCCTTGATATCCGGAGTCGTCGCGATGAGGACGGAAATCATTG -AACAGACGATATACAGCCCAAACCCGGCCCCAGCGCCAGCTGACGGATTCGAGATACCGT -GCGCGACGGCAACTCCAACAATCTGGCCACCGAAGTATCCGAAATTATACACAACAAACG -CAGGCAGGCCTGTCTGGCTCACCACGTCGTGCCAGCGATAAACAGTGGTTGCACCAACGA -GCTCCGACCACACAACAAAGATCAATGCTACATTGTTGATCCATCGGTCGATTCCATCTA -GCAAGTAGAACCCAAATTGAGTGCAATAGGGCAAACACATGAGGAATGATATAATTGTGA -GGCTCGTCACAATCATCGGTCGGGACATTTTCATGCCCGAATCCACGGCCAACGTAGCTA -CTGCGTCGAGCATCACAAAGGCTGAACTAAACCCGAGAACCACCAGGGTAAAGAATAGAA -GGATGGCCCACCAATTCGCACCAGGCATGTGAAGCACAGCTTCGGGGAGCGTCAGGAAAC -CGACCACGAATGCACCTAGTCTCTCGCCATCTTGTGGCCAACGTCGGAGGTAGCCTACAA -CGCCAAATACGGCAAAGGCCGCAATGTTCTCGAACAGGACGTTACTTCCGCAGATGAGAA -TTGCATCCATGACAGCATTGGAGTGTTTGGAATTATAGGAAGCATAGGAGGTGAAATAAC -CGAAACCGATGCCGGTAGAAAAGAAGACCTGTCCGACAGCGGTTTGCCAGACAGTTCCTG -AGGCTAGTTGGTCGCTTCTCCATGTCGCCCATAAGAGTCGGATACCCTCGCGCGCATTTT -CTAGTGAACAAGAGCGACCGACGATAATAAGGGTGGTGACAATGGGCAGACCCATTGTCC -AGTAGACGACGCGACCTGTCAGACCTACTCCACGGAAAATGGAAATCCATATCAAAAACC -AGATGAATGCACTCCAGCCGACCGTCTCGCCGATAAGGGCAACACCAGGGTACACGGTAT -AGTGGGCAACGGCACCGTTACCAACAGTGAGATTACCCACGACCGGATCGATATTGGCGA -CGACGTTGCCCATGTAAAATTCCTCAATTCTGCCCTCCCATGGCAGTGGACTTCGGAAGG -AGTTGCGAAAATAGTTCATGATCCACGCCAAGTTGACCGTAAAGTATTGCACGACAACAA -AACTGATCAAAATCGGGCCCATGCCAATGCCTTTCAGCCGAGAATTTATATTGTTGAACG -CGATCACCGTACCGCCACGATATGCTTGTCCAATCGAAATCTCAAGAATAAGAGCCGGAA -TTGCGATGACGAACACGGCCAACAGGTAGGGGATAAACCACTGCAGTCCATAGTTATTGT -AAACCACCGAAGGGTATCGCAACAGATTTCCCTGACCGGCGCAGCCTCCCATGGCGGCTA -GTAGGAACGCTGCTCGCGAGGGCCATTGATCACGTCCATCTTCCGACTTTTGCGGAGGAG -GAGCGATCCAACCCGCGACCTTCTTTAGATACTGGGCGAGTACCATAATGGTACCGATCA -AAGGAGGAAGATCTCAGCAGTTGAATGGTTTTTGTATCTTCATACGGCGAGAATGATGCG -ATGTTGCAAGATGAAAGCAATCAAAGAAAAGCAAACGGCTGCCCAGAAATCTCAGTGGTA -GATCATGGACGCTTTCGTATGGCCTCCGACTGATGCCAAGGGGCTTGACAAGCATCTCAT -CCTGCTGCTGGGCTGATGCCAAGAGGCCTGGGGAGACATTCTCAGCCTGGGTCCTTAGAT -TCGCGGGTTGTGCGATAGGAGAGCCATGCCAAGAGTCTTGGAAGAATCCCACTGTGGACG -GTACGCTATGTCGGGTTATTGTTGGTCAGGTTGATATTCCAAGGTTAGTAGCAGATACCC -GCTAGCCTGGTTCAATCATGTGAGGTGACATGTAGGCATCTATGATCTCGAAGAGCGGAT -GCCGGGTCCGGAAGATGGCCGAGCGGAAGTTTCTATCTTCTGAGAATCAGTCGAAACCCC -TTCAGTTGCCAAGGGCGCTGTTCTTGGCAGGAAATTGCGCAATTTTATCTGACTGTTATA -TATCCATGGCTAGTACTCACCCAACGGTTCCCCTCATGCACGCAAAATGGCTAGACAGGA -GGAGTAAGACTATATTGAAGACTCGGCGGAAATTTTGAGATCAAATAACTCCAAAGTTGA -GTCTCTGGACTGGGTCAACTCCCCAGTTGAATTGCAGACTTCGGGTACAGAGCGAATATG -CATAGAACGAATAGAGTGATAGAGGCGAGGTCTTGGTTCCCATAGTATTGAAGGTGTGGG -TTATGTATATACACGGCCATCCATGTGAGGATAGAAGAAAGACAAATTCTATCCAATTAG -ATGTGCTGCATGGGCTGAGCTTATAATCATATTTAGTGCTATCCCTCTGTCGTATGTACT -GAGACCACTTAGGGCATAGCGAACACGCGACATGAACATGATTATCCTGTATCATGATTC -GATTCATGGAGGCTGAAATAAAACAGCCTTCCTACCATGGAGATCTCTATGTTTTGGTCA -CATGGAAGTGCAAAAGCTCATTGATAAGCAGAATGAATCACCCCATGAGCGAGGGCCCTA -TGCTACAATGCTGCTACAAATTCAAAGACCTCAATATAATTGGCCCATTCTACAGAGTTC -ATATAGTCTGTTGCCTCTCCTAAAGATGTACAGCTATGGACCAATCGCATGTTGAGCCGA -TGCGGCGATTGACTATATTAGCACATCTCACCCTCAGCACTGCTGTAAACGGAAGAGTCA -TCACAGTGATTGGATTCTGAAGGTACAACTGCGGGGTAGCATAAAGCCACACTATGTGCA -TTGGGACTCGAGCAAGTCACTTGAGGCTACTCCGTGCACCGTCCGCGACTTCTTACTTGT -TGCCTCATTCTTTCAGTTGGCTGCGAGTTCATGATAAAGTTCCCAGGATGCTGGCTCCCG -CAGCGAGTATCTTGGCACCCCAAGGATTTGAGGCTCATCTGATTGGAAAGTTAAGAGGAT -CCAACGATAGCCACCGGAGCGACCGGAGTGCGAAGCATTCCCATGCAAAGGGTATATATA -GTAGGTGGAGTTCGTGAATGTGTGTTTTCTCGCTCTCATACCAGCTTCTGTCTGTCTCAC -CCTCTTGCCATACAAGTATCCAATAAGGACTAGTGCCTCGGCTACTCAAACAACTCAGAA -ATATTATTAGTCTGCGGTCTCAGGCGCAATCATGTTTGTTTCCCCTAGAGCAAGATGCTC -ACCCCAGGCTACTCTTCATTATAGAGCTAATTGGGTTAAAACGAAGGTGCATATACAAGT -TGAAGACTACCTCAAAGGGTACCATATATCTTAACACCAACACTGAGAGTTGGCCAGACG -ATGCGGAGACGGACGGCGCGGAAAGATATTCGGACTGCAACCAGTCAGACTGCTCATTCT -CCGACAGTAGAGGCGATTAGGAAAGAGCTTCGACTGCCGTGCTTTAGTGGTTACAGGTGG -CTGTTCTGCTGTTTCAATCCTTGGAGGTACCTATAGCAGCGTACTGAGGAAGCAGAAAGT -CAAGGAGAATGTTAAGCAATATTTCTCGCGTAACACAAGCCGATGGGCCGTCATTCAGCA -GGGGTCGATACTAGTAGTTCTGAAATGTATAGCCATCTCTCGAGAGCTGCAAAGTACTTC -CTTTTTGTTGTTTGCGAGAATATACAGCATGTATGGTAGAAGACCATTTCTGGGACTTGG -AACCCCTAAAAAGGGAGATTGAGGACGCCAATCCTGTATATACAAAGTACCTCTGATTTT -GGATACCCGTGCTCCAACAGCAGAGTCCTAGGGGATTCTCTGATCACAATTCATGGACTG -AGTCTTCTCTCTTACATGTACTCTTCCAGCGTAAAGCCTGCAGCGAATCGCGGACCAACT -TACCATTAAGTAACATGGGCAATAAAACAATTGTCTGAAGCGGGTGGACTAGATGGGTTG -AGTAAAAGCTCCATGAGTATGAGTGACTGATATCATCTATGTATTACTCACCCGTGCTGC -ATCAATGAGAATCTACTACCCTGTAGACTGACTCCTTGAATCTGTTTTATCTCATCTTCT -GTCAGCCGCCATCCGAGTGCCTGCATATCCTGTTCCGCTTGACGAGCATCACGCACACCA -ACCAGTGGCAGAACACCCTTCATGATGGAATAATTTAGCCCAACGGCAGGAACAGGCACA -CTCCGTTCCTCGGCGATAGTCTTTAGGACATTGGCAGTAGGTTCCAGCACATGCATGGGA -TGACTGCTGAAGCGCCGCCTTCGCTGGGGCTCATTGGAGCGCGAATATTTGCCGGAAAGA -CGGCCTTGAGCCAGCGAGGCAAAGCCCTGGAAGCAAATACTCCGCTTTCTGCATTCTCGA -GTCAAGCCACTCACTTCATGCAAGCGGCGGACGATTGAGTACTCGCACTGGCAGACAGAG -AGCGGGACGCCATGCTTGGCCAGCTCATCAGCCATCTTGATCATCTCTAAAGGTGTGATA -CTTAGCCATTTCAACCGCGCGCGCCATCCCGGATTTTACACAGTCGGCCATGCCCTTGGC -GACAGTGGATATCTTGATAGGGTGGATAGGCCCGTGGACGAGGTAGATGTCGACGTAGTC -TAGTTGAAGACGCGAGAGCGAATCTTCAAGCTTAGACTTGGGCCCGCCGGCCTGAATGCA -TGTGTTGGTCATGTCTGGTGTCGATAGCCATTTGGTCTGCACCACGAAAGAGTCGCAGGC -CCGGAATGGGGTCCCGCAGATCCGCTCGCTCTCACCGTCGCCGTAGGACTGCGCTGTATC -CACGAAGGTCAACCCAACAGACTTGAGCTTGGCCCACGCTGCGAGGATGTGCGGTAGGTC -TCATGCTGGGTTGTAGTTAAAGCTTGCTTTGTCGCCCCAGGACCATGCGCCGATACTCAG -GTAGGGAACGAGTATATCCTTACCATTCACGCTACGGAGGGTGATTCGGCTTTTTGAGTC -TGGTTTTAGGTCGGATGGAAGCATGCTGATCGGGTCACTGAGATGGGGGTCTACCGGGTT -GGGGCGCGGGGAGTGCATGCCCTGGGTGGCCGACATGCCAGCTGCTAGGCCTGCTTGCAT -CATATACTGAGTGCCTGGTGTGGCCATTGTTCGGAAAAGTACGAGCTAGGATGTATATAA -CTGCTGAAGCTTTGAATGAGATCTGCGTCGCCAACTTGATCGCAACTTCCTAGATTGAGC -GCAGGTCGACATACTATTCAGGAAGGAGTGGCAACCAGCGTCATCATAACATTCAAGCGA -ACTGTACCTGCCCAGGTGAGCATCCTCGGGTCAAATGCAAACTGGCCAATATGGAATCTC -GTTATTATTATAGTGGCGACGGCGGCTGACCGGGGGAAACGTGCGGAGGTGATTAGCTCA -GTGAGTTAGACACATGCTGTGTCTATCTTCTTACTTTTGAAAGACAATCCATTGTGATAA -TGTTCAATGTTTGGATTCTATTATACCCGCATGAGCCCCAAATGCGGAAGACATAACGTA -CAAGAAAAATGATTCGGGTCCAAGCATGTCAAGCAATAAAATAGGCGCTGTGCTTCAGGG -CATGAAGACATATAGGATGCAGGTGGGGAATAATGATGAGACAGGGTCCAATATAAAAAA -AAATCATATCCAACAGGTTTCGCTGTAAACGCCATTCGCTATGCTGAAGTATTGTGTCGC -GTTGATGCATTTTGTGCTTCGCAACGGAGATTGAAGAGAAGATCATCGACCTCGCGGAAG -GTGCTCGGGGACAAGGTCGTTGTTTTCATTTGACCCAGCAGGTTCTTGAGGGATTGGGCG -ATGTCTTGTTCGCTGATGCCTGATTTTGGATCGGAGGGTGTTTCTTGGGTTTGGGTACCC -GCAGCGACAAAAATTTTAGGTTGGTCGACCATTCCACCGGATTGTTGTGCTGCATCCATG -TCGATGTCAGTCCCGATGTCAGGTTTCACTGAAGGCTCAGTAGACTGCTCCATTTCAGAG -TTTTGAGTGGCAGTTGCCTTTTCGGTAGAAATAGGAGAGATGTTCTTCCCCGCTTGATCT -TCTGTAATCAGGCTCTCGGCGACCGTATCAAGGCCGACATGAGGTGGCAAGTGGTCAGTA -TTGCCATGTTCCTCTGATGGGATGCTTGAAACCTGAGGTTGAGCCGCATCAAGTGCATGT -TGCTGAGACCGTGTTTTGCGGGACTGGGTATCTAGAGGGACACTTTGGGCACTGCTTTTG -ACCACAGACTTGTCCTCGAGACGAATGGACTTGCGAGGCCTTTTGGATGGCCGAGGAGTC -TCAGCATCTTGTGTTGGTTCTTGTGCCACCGAGTCTTGTGTTGAAGGGATTGTGTCCATA -AAGGCATTACCGGCCTCTTTGCGTTGTGAACTGCGGGTAGATCGGCGAGTCGCAGTTGGC -GACAACGACTTAGATGCAGAGACAGACGGAGAAATGGCAGAATGTGTAGGCGAGACGTCA -GGCTGCAATGACTCGACTTCTTTGACGGTGGACAGCCGTGTTGACCGGCGCCTGTCGGTT -TTTGCAGACGAAGAAAGTTCCTCTGGATCCCGCTTTCTCTTCCTGGTGGGTTGCTCATTG -GGAGAATTTGCTCGCTCCAGGTGAGTCCTATCGTCCATGTCCACTGCTAGCTCCAAGTCC -TGTCCAAGCTGTGATGCTATTTGTGTATCTGTCTCCTCGCTCGAGTCCATGAAAGTATCC -GGAAAGCTTTCGGGGAGCATGTCCTCGGTCTGCTGGGGCTGATCGTCCGCAGGCTTAGAC -TGAGCTTGCTTTCGTTTCTTCTTGGAGCTCGAGCTTGGCTTACTCCTGGGAGCAGGTTGT -TGGACAGGAGTTTTATCATTGTCAACCAGTAGCTCTGTAGAAGGCGCTGCTGTATCTGAG -TCTTTACCCAGCGCAGATCGAAGTCGCCGACTGGAAGTTACTTCATCTGGTCGAGGTGTA -GCGTCTGCATTTTCGTTATCATAGTCATGGAGATCACTCGTAGGTGTGTCTATGCTCTCT -GTCGCAAGACGAGAAGTAGCAGGGCTGTTGACTGTGGACTGTTTTCCGGAAGCGCCAGCA -AGAGCCTTTCTTGCTTTTGCAGCCCTCCGACGCGCCCTTTTTGAGCGACCCTTCCGGGGG -CTTGGGCTTCGAGAGTGCATCTCGGGAGGGGATGATGGAGGATCTATCCCCGAATCTGTA -AACACGTGGCTCGACCTCAGAGCGTCGGAGGTCATGGCAGGTGTCGGGTCTCTGGTTCCA -GGTGTCGGAGACGAGCCGATGAATCCTTCTTCAGTCTCAGTAGGGGCGAGAGTGGGCGTT -AGAGGAAGTTCGGTCAGTGACCCTCCATCTAGTTTATGCAGATTGGGAGGAGTATCGAAT -CCAAATTTCCGTGGAGTACTGGCTGGTGATGAAGATTGAAGGGACTCAATCATTGAAAAC -ATCTCACGGTGACTTTTGCGCTTGTGTGAAACCGAAATGCCATCAACAATTTGCGCATCA -GTGGGCTGCGGAAGGTTACTAGGCAAGTGGGGTTCACAGAACTGGTCAGTTGGGTTGAGG -ACCATGGAGGAGGCTTCGGCCCGTATAACGGGAGACGAAAGCTGAGTGTCCTGGGAGGGT -TGATGTAATGATCGATCTATCTTGCCTTGTGGAGGAGGGGGAGGTAGCATCTCAGTATCC -TGGTCACCGTTTTGCAGAGGCAAGGTTGCGGTCTGGACCCTAGACTCGGCAGTTTGTACA -GCTTCCAATATGGTGACTGGAGTGACAAAAGACTGTGAACCAAAGGTCAAGCCCCAAAGT -TCGAGATATCTCCTGGCTGTCGAGATGTGCGAAGACTCCAAGCCGGCGCAAATAATAGGC -TCAAAGCACTTCAAAGATTGTAGATCATGGGAAACTGATGTTTGCAGAGTATTGATAGTA -GCAGACGAAAGCGCACGACACGATGTCAGAATTCTGCTGTCAACACCGCAGTCAACGTGG -ATCTTGCAAGATTTGTCCTTTAACCAAATACTAAGTGATGGCTGAAGAGATTGAAGGGTT -TGGCATTGGAACTGCGGAACACCGGTCCCCAGGAAAGAGGTCAATGCCTCAATAAACTCG -GCAAGACCTTGAGATTTTGATGCGCTGAAATTTTCGTATGCACCGTATAATGTTCGGGAA -ATTGACTCCAGAAGTTTATGAGGAAAGATAGCATGACCAGCTGCGATGTCGCTGCGATCA -ATCCCTAGCCCGGTTCCTTGCATGAAAGGGATAGACAAGGAATGGCCAAGCAGTGAAGTT -GAGGGTAGGTAAGTATCTTCAGCAGAAAGATGCATGAGGCGCTCGGCCGTGGGTTCCACA -ATGATGGTGGCAACCGCTTGGTTCCCTTTTTCTGTTCGAACCACACGAACAAAAGTTTCA -AGCAAATGAGACCACTTTTGAAAGGCCGATGGGAATGAAAGGCCGAACGAGAGAATCTTG -ATGACATTTTCGTAGTCACGGGATACAGAGCCATCGCGCCCACGAGCGGACTCCATGGGG -AACGATTGCAAAGCATTCGCAGCCGCCTGGGCAGTAGCATTCCAAACCACCTCCGATAGT -AGAGCCGCACGAGGAATAGCACTGGTCTTCGTGGTGCTCAATTCTGCACACTGTTGGAGA -AGTTCCAAACGAGATCCACGGGAGAATCTGCCATTGCAAGCTGCTTCAATGTTACCTAGG -ACAAGGTTTGTGTAAGATTGGCTCGGCGCTATTTCCCCTGCAGCGCTACTGATTATTCGA -AGAAGATGAAGAATAGCATTTTCTGGAGTCGTATTTGGGTGTGGATGTGTGGGAGTGGTA -TCGGTTTCGTCCGCGTTCCTCAGGAGAAACTTTTCGGTAAACGGAATCCCCCCAATAGAG -ACGATCATTGTTTTAGACAAGAATCTAAATCTTTCGAAGAAAATGTCCGCGGTGTCGGCA -TTCAGCGAGAGCGGCCCTGCAGTCCAAAGACGGTGTAGTAGACCCCAAACGCTAGCAACA -GCTTGCATCGACTCCCCTGAAGGGGTTATTTCTTTGCTGGACGCGAGCGACAGAGAACTG -GACAGACTGCTCCAGGCCAATGCCACGTGAGATTGGCCAATTGCGTCATCAATCCAAATA -GAGGCTTTGAACATACGCTCAAAAACCTTCAGAATTGACGTAATTCTTGATCGAACCCAT -CTGCTGTCGGCTGCAGGAAGTTCCTCTACTTCCAATCTGGTACTGTCATTGATTCGATTC -TCTGTCCATATTTTAGCCTGCGTGCTCCATAACAGGTTTGTCAATATGCGAGAAAGACAG -TCGGTGAGAACTGGGACAGATGAGAAGATGCTTGAAGAGGGAAGGGCCACGTATTCCTCC -CAAATGATGTCCAGATGCTGATAAGAAGCCGAAGGACGGAAAGTGTAGTATAGCAGATTA -TAGTAGCTGGTCAGAGCAAGTTGAGTGGGCGGTGCTCCAGACTTGTCGGTCTTTTTGCGG -TCGAATTGAGACAGTACTGGTTTGCCTAGCATTTTGAGGAGAGACCGGCTCGTCGATTCG -TTAGGGCTGATAGCATAAACAAATCTATTCCAACCGACAATAGCCTGCGCCTTAATCACA -GGCTCGCTGCAATTGAAGCATTTTTGGAGTACCAAAACCCATTCCTTAAAATGATCCCAT -TGCTCCAGATTCCATCTTTTGCTGCGGAGAAGAAGGATGATGATGCTCCAAATCTGAGGA -ACATGCTCACCGCTATCCACCGTCGGCATCATCCGAGCCATGCGCTCACGAACCTCTGTT -ACAAGCTTCCGGTCGTGCTCTAGTGGCCGGTCGAAGAGATCTCGGATGTTTTTCGACAGT -ATAGGATTTGGACCTGCTTCTGCGGCAACCTGGAAGCCAAGTGCAATCGCTTTTGACCTG -GTATCCTTCAGATGGTGAAGCAGCCCGAATATCAAGTGTTCCATCCATAAATTTGCCTGG -GAGATGAAAGCGGACTTGGCTTGGCCCAGCAGCCGTTGGTAGATGTTCAATCGATGGGAA -ACAATTGCGTTGCCACTGATGTTCTTGGTTATTTCTTGCAGTACTGTCAAAAGACGCGCT -ACACGTGAGCCGGTCATTATCTTGGGACCGAAGTTTTGAGTGGACAATATAGACATGTAG -TGCGTCAAGACGGATTTTGGGGCCTTGGCCTCATGGAGGCAAGTGATGGAGTGGTCGACC -AGAAAGATTTTGAATTCCTCCGAAAGTTGTGTCGAAATTTCTGGGTGCCAAACGAACGCA -GCTGATAGTTTGAGAGCCTGATTGGCAAGGTTCGTATCGAGAGGACCGCTGTTTGTAAGA -TCCCGATTCACATCTCGCTGGATGAACTCGGTGATGAGATTCAATTTTTCAGCAATCTTC -TGACCCGCCGGAAGCCCATCATACGTTCGCAGCGCACCAAAGAATTGCATGTAGGCATCG -AGTCGGGAGCTCATTGACTCGCCTGCCAGTTGTTGGATGACCGATTCGAGCAGCATGGCA -AGCGATTCGCTGGTGAATGTATCGACATTGGGTGACCAAACGGGGATCGGTGACTGAGTT -GCTTTCAGTATCGACTTTGTAGGCTTGGAGTCCGTGGAAGGAGGGATCTTGAAGTCGGGG -GCCGATTTCATAGCACTAGCAAAGGTTGGTGGTTTGATGTACTTGGGCCATGGCGAGAAA -TTCACTTTCTTCGAACTGCGGCTGGATGAGCCGGCTCCCACCGATCCCAGTGCAGAGAAC -GACGACTCCTTAGGTGTTTGTATGGCTATTGGTGAATCGGTGTTCTCAATCACGATGCGC -GAAGTTGTCCTTGGGGGTGTGGGCGGACGAGCAGAGAGCATCTCCACCATGATTTGGATC -AGACGAAAAGTGTGAAAAAGAAAAAGAAGCAATAGTCAATGATCATTGCTGAATGAGATG -CTGTTCAGGCCCTGTCCATGATCTGTTTTAGATTGAGGAATCAAGAAGCCACATCAAGCC -GAGAAAACTAAGTTCAACTTCCGACGCGCCGCGATGCGCGTAAAGTGTCACCGCCTGAGG -CGACGAGGCACCAATACGCGGACTTTTCATGCAATTTATCTTTAGCCTCAATGATCATTG -GAGAGGATTGTAGTGTATATAATAATCCACAATGATTGTATCAGCGTTCAGCAGTCAGCA -TTCAGCAATGGATAAAAAGTTCATATAGCGGCACATTCTCAAGGCAAACGGGGGACTTCA -AACTTGGCGCGCCAAAGATCTAGCACAAAGAGCTTTTCTCATAATTATTTACTATGTGGT -ATTATTGTATAGATTTCTACGGAGTAGGCCGAATATGAGAGAAGTAAGTGGGGGACACCA -AATAATGTATGACGATTTGCCTGACTTCCCCGCGTGGCTCCATCTCCTTTTTTTTTTAGC -TTGTCAGTAGCTACTCCTGCTCAGGCTACTGGTCTGGTCTTGATATTATACTGTGTAAGG -GACTCCACTTCCACAAGAGCGGATATAACACCATTCCTACATCACCCCCTACTCAGAAAT -ATTAAGCCGGTCCGGGCTGGGATCAGCATGGCGTCTCTTCAGATGCAGGGAACAGGTATC -TCCTGGACATTGAATGGCTCCCCCTGGCAAGCAGGTCGGCTAATTCGACTTGCGCATACA -GCGAAGCTACAAGAAGTGACACAGCTTCTGAAGTCTCTTGAGAAAGACCAACAAGAGAAC -AACCTCTCAACAGCAGGTAAGTCGGATGCTATAGCCATTATACCTGCCTGGCACTAATCC -ATTTTACTCTTAGACCGAGTCCAGACCCTCCTCCAGCTTCGACAACACGGCACAAACCCT -ACCAACGCAGGACCGATATATTCACAGAATGTATGAGCACAACAGATAAAGCTGCCCGTC -GAGAATTACAATTGGTTCTGATCATTTCCAGGGCATTAATACCTTGGTACGGTATGGCGT -TGAGGGGGAGACCCCTGACGTACGTCGAGCAGCATTACGATGTGTTGCGAACGCGCTATT -ACTTGACCCCAAAATGCGTCAGGCTTTTGTTGACACGGGATACACTGGGAAGCTAGCAGA -AAAGCTCAAGGTACACTCAAGCAGATACAATCCCGGTACTAAAGCTGATATTTCCGATAG -ACCGAAGACTCGGAAGATGAGATGCTCACAAGTCGGATATTGTTCTACGCGACCTATAAT -ACCACACTGGACTTTAAAGACTTGATCAAGAGCCACGCATTGGGCGATAATGTCAACTAT -GTGAGTGGATTGTTGACTTGAAACTTGCTAGGTGTGCCATGCTGACTAGTATAGCAATTG -AGTCGCCATGCGAAACAGTTCCCCAAGTCAGGGAAAAAACCATTGTCACAAATGGATGAA -CTGGCATTGTCAGACACACTCAAGTTGATATACAACATCTCCAAGATATTTCCAGATCTT -GCTACCGATTTCTCCCTCTCCATCCCTCATATTTTGAAGATTATATGTCGCATTGATACC -CCAGCAAAGCCCCTGGATGGCTTACTTGGCGGCTTGCTCAATACATTGTCTATCCTTGAC -TTGCAGGAAAAGAAAGGCAAAATTTCCGAGAGCAGCCCCCTTTTTCCGACTTTCGACCCC -AATTGCAATGTGAGTAAGTTGATCAGTATCTTGGATCAGGCCGTATCCATATACACCCCC -AGCGAGCTGGAGGAAAAGGCAGTCCCATTGCTATATTCACTCATCGCTATCCACGAAGTG -GCACCCGATGGACCGCGCGAGCATATGCGGTCCCTTCTTCTGCCCGAGAACACTGATCGA -AGCTTGCCAATTGGGCAGTCTAACACACTTTCTTCAAGACTTCTGAAGCTCTCAACAACC -CACTTTGCCAATCTGAAAGTCACAATTTCCGAGCTGATGTTTGTTCTGTCGGACAAGGAT -GCCGAAAAGTTCACTAAAAATATTGGCTACGGCTTCGCTGCTGGGTTCCTGGCAGCGCGA -GGCATTGAGATGCCGCAGAATGCAAAAGAAGCATCAGCTAAAGATGACCCTGAATCTGCA -CGCAATCCAATCACTGGACAACGATGGGCCGCTGAGCCACAGGATTCCGGGCCCCCTATG -ACCATGGAAGAGAAGGAGCGTGAAGCAGAGAGACTCTTTGTGTTATTTGAGAGGTATGTT -CATTCTACAAGAGATATATAGTCCACTTGTTAACAACCATCACAGAGCCAAAGCAAATGG -TCTGCTGAATGTAGAGAACCCCGTGACCCAGGCTCTTCACGAGGGACGATTCGAAGAACT -GCCGGATGATACCGATAGTGACTGAGCATCATTTTTGACGTGGTTGTTCAGTTTGGCAAT -GTCAGCTTTTTGAATTTGGTTCATTGCAGCTGCATACACGGTGTATGACATAGACCATAC -CCTCGATAGTAGCTCTGTAGCGAGATGCTGGCATTTTTTAAAGTATATGGGGGGGGGGGG -ACTGTCTGGTCAATATACCAAAAAGGCAACTTTGAATCAAACAGCAAATTGAACATGGGC -GGACATCGCGCTGATACAATTTATACCTGTCCACGGCGTTGCGGGCGGAATGACGTTATG -CAAGTCGGAGGCTGCTCTCCGCCCGTCAAAAAAATTATGCGCGGGGCACTGAACACCTGC -CACTTGATTTCCTCCACACTCGTATCACAGCTTTCTTGCGTTCACACATTTCAAAGTATT -CACTTCACACTCAACCGTCAAAATGTCAAATCCATTCAACCCCGAGGAGGCGGAGAATCT -GGAGGATGTATGACCCCGCGTCCACATGGTCTAAAGAACGAACATTGATCGCTGACAATC -GTACAGATGGAGAAGCAGTTTGCTGTCAAAGGTCAGTTACGTATCAGTCTACATGCCATT -CACACCTTCGGGCAAGCTCACAATATTGATTACCTTTCACGTTACAGCGGTTGAGCATCT -TATGACATACTGGTCTATCTTGGAGAAGGTTAAGGGCTCGCAGCTTCGCTTGACCAAGCA -GGACGATGATATCTACGAGTCATTTAAGGAGGCATTCCCTGATTTCGATCCCGCCACCCC -CCTCGTTGAGGATGAGATGAAGAGTAAGGCCGGCAAGGAGAAGTGGCGCAACTGGATGAT -GCAGTGGGACAAGGTTGAGGACTACAACTTTGGCACCATGATCCGTACCCGTGCAGATGC -CGAGTATGACCAGGATACCACCATTTTCGGTGTGCGCATGCAGTTCTACGCCATTGAGAT -TGCTAGGTAAGCACTGAGTGGAGTTGCTTGAGTCCCGCATGGAAGCTGATGGAGTCTTCT -AGAAACCGCGCCGGTCTGAACGATTGGATCTACGAGAAGGCCCAGAAGGCAAGCTCATGA -TTACAAAACATGTCTCATTGCTGCTGTTTTTGAAATCGAGGACAGTTCCACATCCGGGCT -GATCTGGCCGACGGCATTTGCCTGTTTCCTTGCGGCGCAGATGAGGAATGGCCTCGCCTC -TGTGATACTGATCACAAGATACCGCAGTATCATGGCTTGAGGTTTACTTTTACGATAGAA -GCGCGATCACACCCCATAAATATGTGGCTCTGATTTAGCATTGAACCGTCAATCGCGACT -CAAATGTAGATCGGCTGTCCGTGAGGACTTGGACCGTCTTTCAGCATTGGGTCTGAAATA -TACCCTGAATAAAATGTCATTTACACTGCTCCTTTGTCCATTACCTCGGTCGCCATTGTC -TATGTGGACAAAGCCTTAATTATGGGTGATCGCGGCCATTGCGCATCCCACACTTAGCAA -GTTCACCCTCTAGCCTCAGGTTTATTTGACGGTTAGAAATTATCCATCGATGGTCAAGGC -TAGGATTTCTGTTTCAAGGATGAGGCAGATGAATGATACATTCATCCCAGTTCCGTTTTA -CAATTTCGGAGTCTTTGTTCTGCACCCTCCTCAATGTTACCCCATTGGGGAATTTCTTAG -TATTGTTCTTTTTTTGTTGATTATATTTGTGGAAGACACTCAAATAGTCCATGATGACTA -TTCCATGTCGATCTTCATCATCTCCTCTCTTGCTACGCTACTATGCTACTATAACGTGGC -AAGAAAAATCTCCGCAAGCCATATCGGCCTTTCAATGTGACGCTTGACTCTCTCGCCAAA -ATGTTCCTCAAACGCAAGACTCCAAATCAGGACGGTATCAACAACGAAAATATTCCCGAG -GACAACCCGGGATGGAAATCGCTGTTCGGATTCACGACTCAAAGGCACCTGCCTACTCTC -ATATTGGGATCTATCTTCGCCCTTGTGGCTAGTCTTGTCACGCCAGCACTTGCAATCTTC -CTTGGCAATGTGTTTGACTCGTTCAACTCTTTCGGCGCGGATCAAACTGATGCCAAAGGG -CTCCATAGCCAAGTACTCACAAATTGTCTGGGGATGATTGGATTGGGAGTAGCTGGTTGG -TTTTTAAACGGTGCATATTATGCCCTGTTTGTCGCATTTGGAGAAATACAGGCCTCCGTT -ATTCGCAGCGAAGTGTTTCTAGAGTTACTCAAACGTGACCTTGAGTGGTTCGAAGTCAAA -AGCGAAGGCTCCGGGGCTTTGTTGTCAGGAATTCAAGCGTGAGTTTGCGACTATACTCGT -CGAAGTGTTATATCTAACATGGCATTGTTTTCAGACATATTCATGAAATGCAGATGGCGA -CATCGCAGCCACTTGGGCTTGTATTGCAATATTCTTGCCGTTCACTGGCTTCACTGGGGC -TGGGCTTCTATACATCGTGGAGCCTCTCTCTGGTAACACTTGCAGGAATTCCCATATTCT -CGGCAATTATCGGAAATCTTTCTTCCAAAATGAGATTTAGCATCACAGCACAGCAGGCTG -AACTCACCAAGGCTTCCAAGGTTGCTAACAATGCAATTACCAATATCGATACGGTGAAGT -GCCTCAATGGGCAGGCCTTCGAGCATCGGAACTTTGCTGAGCACATTGAGAGATCTGCCA -CACACTACCTTCGACAAGCTCGGTTAAATTCCCTTCAAATCGCTATCATCCGGTGGATGA -TGTTCGGAATGTTTGTTCAGGGATTCTGGTATGGCAGTTCGCTTGCACGCGCCGGGAAGC -TCACATCCGGAGAAGTTCTCAGAACTTTCTGGGCCTGTTTAACCGCGGCTCAGTCTATTG -AACAGGTTTTACCACAGATGATTGTGATGGAAAAAGGAAAGGTCGCTGGCGCCGCATTAA -AATCCATAATACAGAGTCGGAGGGAAAGCAAAATTGCGAGCGAAGTAAACGGAACTCGAT -ATCCGGAACACTGTGAAGGTGATATCAAGCTCAAAAACGTAGGTTTAATATTTCTTTCAC -TGGCTTTGGTCCGGATATTGACTACGGATGATGCAGGTATCGTTTTCATATCCTAATCAG -CCTGATCAATGTGTTCTTAAGCCATCAACCTTCTTCTTCCCGGCCAGAGAAACGACGTTT -GTCATAGGGAAAAGCGGCTCTGGTAAAAGTACCCTCAGTCAACTGCTGATGCGGTTCTAC -TTGCCAACTTCAGGCGAAATCTTAATTGACCGCCATCCAATGCAAGAACTCGACATCAAC -TGGATTCGCAACAACATCACTCTTCTAGAACAAAAAAGTGTTCTCTTCAATGAGTCTGTG -CTGATGAACATCGCCTTTGGCCGCCAGGACCATGTAACCGTCACCAAGCAAGATGTCAAA -GATCCCATCGAGTTGGCCATGCTCGAGAATACAATCGACGGTCTTCCCAAAGGACTCGAC -ACTTGCGTTGGTCCCAGTGGAAGCTTTTTGAGCGGTGGTCAAAGGCAGCGAGTTGCAATT -GCCAGAGCGAAATTACGTGACACGCCTATTTTGATCCTGGATGAACCCACCAGTGCTCTT -GATCACACCAACCGAGTTACAGTAATGAAGGCAATCCGTGAATGGCGAAAAGGAAAGACT -ACAATTATCATCACTCACGATATGTCACAAATCATGGAACAGGATTTCTTGTACATCCTC -GAAGAAGGTTCGATTGTGGAATCTGGCTACAGATATGCTGTCGAGGCCAGCCCGAGCAGC -GAAAAATATTTTCACACGCAGGTCGACGGTTCCTTGGAACCCAAGACCCAGAACAAGAGG -GCTCTGGATGATATGGACAATATGTGGGAAAGCTCATCATACGGGAGCTTGAACACATCA -AGACCCCAAACAGCTGTCCTCAGGCATCGTCGAGCCAGAGAATCATGGGCACAAGGCCAC -ATCCCCCCAAACTTTGGGTCCGGCTCACTGGACTTGACGGCAAGAAGAAATTCGAAATAT -GCGGTCAAAAATGGAGAGTTTTCCGGATCTTCCTTGCAAGATCGGAATCAACGTATTTCT -TTCATAGCCCAAGGGTCTGGTCCCCAATGGGATTCGAAAGAAACAACTATCCAACTTCCT -GCGGAAGAAGTTGAATTGGTTCATATTGATGGCCATGGTTTGAACTCGGATCCATGGCCC -AATGAACAGACCGCGAAAGACCCATCAGAGGCAAAACGCAGCCGGCGATCAAACGGACGC -CGCAAGAAGTTGAAAGAGCAGCCTTCGCCGAAAGAACGAATGACTCCGCTCTCACAAATC -ATGGGCACCATTCTGTCGACTTTAACACTCAGACAGCGAATCATCCTCTTTCTAGGATTC -GCTGCAGCACTGGCCCATGCAAGCGCAACCCCGATATTTTCATATTGTCTGTCGCAGCTG -TTCGGAACCTTCTATGAAGAAAACAACAGTGAGCATCTGACAATGACATGGTCACTTGCT -GTACTAGGGGTATCTTTCGGAGATGGCCTTGCGTCATTTTTCATGCATTATTTCCTAGAA -CTTTGCGGCGAGGCTTGGATAGATTTTTTTAGAAAGAATGCCTTCCAACGCATCCTTGAT -CAACCACAGGCCTGGTTCGAGAAAGATGGCAACGGATCTCTCAGACTCACGTCGTCTCTC -GATCAGAATGGAGAAGAAATGCGAAATTTGCTGGGCCGTTTTGCCGGTTTTGTGGTTGTT -GCTGCTGCGATCACCGTCATGGCAATTATCTGGAGCTTGATTGTTTGCTGGAAGTTGACG -CTTGTAGCTCTTTCCTGTGGGCCTGTCATTTATATTATCACTCGAGGCTTCGAAAGAACA -AATGGCTTATGGGAAAGACGATGCAATGATGCTAATGGTATCGCTGCCGATGTCTTTACT -GAGACCTTTTCCGAGATCCGTACGGTGAGAACATTGACATTGGAAGGTCACTTCCATCGC -AAACAAGCCAATGCGATTGCGCACTGCTTGTTGCTGGGGTTGAAACGTGCTATCTACACA -GGCATGCTTTTCGCTATGGTAGAATCGACTGTCATATTTTCTACCGGTAAGCTAGTTCAT -ACTTTTACTCATTTGATGTTGTTCTGACTATCATGAATAGCTTTGATCTTCTATTATGGG -GCAGTGCTCACAGCATCTCGCGAGTTCAATGTGAACGACGTGATGATGGTATTCTCAATG -CTACTCTTTAGCATTGGATACGCTGCTCAGATTCTTTCATGGGGTTAGTAGGTCTTGAAT -CACGTCGAATTACCCTACTAATACCCTGATAGTTCCCCAAATCAACACTTCTCGCGAGAT -AGCTACGCAGCTTATTCGCCTTGCAAGACTTCCACAAGATGAATCCCATGAGCACCTCGG -TAGTTTGACGGTTTCAAATCTCACCCCAATCAAGCTCACGCACGTGAATTTCCGATATCC -CTCTCGGCCAGACACAATGGTGTTGAAGAATATCTCGATTTTCATCCCACGAAACTCGTG -TACGGCACTCGTAGGACGATCCGGCTCAGGAAAATCCACAATCGCGTCGCTGCTATTATC -GCTCTACGAAGCCCCAGCCTCGCAGACTGGACAACCCACAGTGACCCTCGGCGGTGCAGA -CATTCTTCGACTGCATGTTCCTACCCTCCGCTCACAGATTTCCATCGTATCGCAAGAACC -GACTATTTTTCCTGGCACCATTCATGAGAATATCAGTTACGGGCTAGATCAACACTTGCC -TGTAGCCTCATCTCATAGTGTCCGCACTGCGGCTCAAGCAGCAGGAATTGATGAATTTAT -CTCCTCTCTCCCTCGTGGGTATAATACTGTGATCGGAGATGGCGGAATTGGTCTTTCCGG -TGGCCAGAAACAGCGTGTGGTCATTGCACGGGCTCTTTTACGTCAACCCCAGATACTGAT -TCTCGACGAAGCCACGTCCAGCTTGGATCCCGCGGGTGCCGAGATCGTGCGGCAGACCGT -GCAGCAGCTGGTGGCCGAGCGTCAAGAGCTTACTGTTATCATTATCACGCACGCGAAGGA -TATGATTGAGATCGCGAATCATGTTGTCGTTCTTGATCAGGGAGCTGTTGTAGAGGATGG -GCCGTATGAGGATCTGGCCAAAATGAACGATGGCAAGTTGCACACCTTGCTCAGTGACCC -TGAGGAAGTGGGTGCTTGAATCTTTTTTTTTTCAAGGTTTTTCCGGTGATCGCTGGTCGC -GAGGTGTACATTTCAACTTCTCATTTCTGCTTATTGTACTTATTGGTCAATATCATTCCA -GGTTGTGTCAATGGACTCAATAGAAGTAGATCTGCAACACCACTTAAATATGTCACTGCT -ATGGGCAATGAGCAGTTTAATAGAGCACACTGATGTGGATAATCTCAGATGTCTTAATCC -AGACGGAGAGAAAAGCGGCAAAATTAAAGTGACTAAACTGAAGTCCCGCATCAATTCCAT -ACCCTCTGCAACCTCTCAGCCTTGCAAGCTTTTATTGTATAAAACAAGCGCCTACGTCGC -CTTCACATAGCTTTCACATGAACCGAGGCGACAATGTCAAAGCCAACTCTGATTTTCGCG -GCGGGCGCGTGGTACCCGCCCACCGTTTTCGACCCAATCATCAGACAACTCGCCGACTAT -AGCTGCCACACCGTCGCATTTCCCTCCATCCAGCAACCCAGCTCGGTCGTGGACCTTCAA -CCAGATATCGATGCCGTGAGATCCATCACTCAACAGGAGGCAGATGCTGGTCATGACATA -GTCATCATCGCGCACAGTTGGGCCGGGTTACCAGTGAGCAGCGCGTTGGATGGACTGAGC -AAATCAGAGCGCGAAGCAGCCGACCAAAAGGGCGGTGTTGTGAAACTCATTTTCATCGCT -GCTTTCCTCCCTGGTATTGGCGAGAGTTTGATCGGCGCGTTTGGTGGGATGGCCCCGCCG -TGGTATGTGCGCGATGTAAGTTCTGTCCATTTATATTTGCTACTTGCGAGATGTCCATGT -GATTGTTGACAATTTTCCAGGAGGAAAATGGAACTGTTACTGCCAGTGATCCGCTGGTAT -TATTTTTCCATGACGTTGCCGATGGTGCGGAGTGGGTTAAGCTTCTCAAGCCACATTCTT -GGGCGACTCAGAATACCCCCGCTACTGGTACAGCGTACATGAACATTCCATCTGCATATC -TTTTGACTGAGGAGGATCGTGCGACTCCGTTGCCGGTTCAGCATGTGCTGGTGGAGCGGG -CACGACGGAGGGGGGCACAGATTGAAACTGAGAAAATCAATGCAGGACACACACCCTGGT -TGGCTATGCCAGATCAAATGGTAGATTATATCAAAAAGCATGCTATTGAATGAACAGGCA -AGATGGGTTTTGTTCACAATGATGCAGGTCGTCATTCAATTTGGAAGTACGTACCTTACT -ATCTTACTTGGTGCAAGGGATATAAGGGAGAAACACGGAATCACATTGGCCTACTTCCAA -CAAATCCAAAAAAGAGAGAATCACCGAAAGTGACGTATATACAGTAACATTTACAATATG -CGACTACATGACCAAACATCAGTGAAAGAGACCTAGATATCTCAGGCGTCGCTCTTCCTG -CGCGACAGCTTATCCACAAACCTCCTCAGCTCATGGAAGTGGCTCGGCTTCTTTCCATTC -ACCTTGTTTTTTCCACCCTCCACGGGTTGCTGAACTGGCGTAGCATCACCTTCAATGGCA -GAATCCTTGACAGACTTACCAGAGCCGCTTCCAATAGTGACTGTCAAGAGCTTTGGATTG -CCATCAAAAACACTGCCAGACTGATCGAGCCCATTGTTAGGTGCAACGTCATCATAGGGG -AATGCATATCCCCGAGCATCAAGGCTCACTTCATGACAGATGCGCGAGTAATGGTTTGTC -ACGTCGTTCTTGTAGTAGTTTGAAACCGATTCCCCGTTCGGCTGGTTGCTATTTATCAAC -AGGGTTGAGCGATTGAACGCAGCCGCGAGACGAGCTGTGATGGCATCTTTAGTTCCGTTC -CCAGAAACGGCAAAGGAACCAGTGCTATTGCTGAAAATGTCTCCAGTAGACGGCTTGGTG -TAAGAAACCCCTGAAACTGAGAAAGACAATAGATCGGAAGCGACTTTGGCTGTAACATCA -CCGGCGTCAGCCTGTGTATTGACCGTCAAAGTATTGTTGGTGCTGTTTGCATAGTGAGAC -CAAACTTCATTCACGTAGCCCTCGTAGTAGTTGTTGAACAAAGAATTGTTGAGGGTTCTT -CCTGTGTTGGGAGACACGGCCCGCAAGTTGACACCGTCTCGCTGGATTATGAGCTTGTCC -CAGCCGGCTTTATCCTTGCTGTTCTGGGCCTTAAGAGCATCGCAGACTTGGTCCAGACCA -TTCGCTGGCAGACCCTTGACGGTTTGAGTTTCACCAGCTTTGTTGGTTAGGGTCATGGCA -ATGGGCATCGACACGAAGTCAACGTAGCTGAGATTTGCGTAGAGCTGGTAGTCATTCCAC -GTGAATTCGCAAAAGGCCCAGGTCAGATTATAGTTGGGGTCTGCGGGGTTGGATACCGAT -GGCTCAACAAGGGCAGGGCCCGGGTTCAGATAGAATGTCAATTTCTTCTCTTGCGCGAAC -CATAACCGCGCGCCTGCAAGATGCGGAATAGTGATGGTGGTGGTATTGCCAGGTGCACCC -AGTGGAATTGCACACTCCTCGCTCAGGTTGGTCATAGTGTCAGATGGTGAGGTCGGGTAG -TAGGCTGTCTTGCCATCAGATCGCAAGAGGAAAAGCGCATTATGGTTGTCCAGCGCCTGG -CCTGTAATATAGGCGTAGACCGTGTTAGAACCGGTGTCGTTCTTCAAAGCAAAACTCAAT -GTTGCCATGTTGTGCGATTATAGTGTCTTTGGGAGTTTGAAGAAGAATTCAGGTGGACTG -ATGTCTTGAAATGATTGGCGCCAAAGTCCAATAAGAAAGTCGTGTCAAGATCCACAAGGC -TGCCAAGGGGTCTGAAGTTATTATCGACATTTCATTCTGCGATAGGGGCCATAATATATA -CTATCGCTCAACGTCTTTTGTTGGATTTGTGCCAAATTCATAGCCTCAGGTAGGGCAGGG -TGACAAGACCCTGTGATGCAACGTGGCGATGACCTCAAGTTTCGCGGAGATGATGGCTTG -GGCCCCATTGGACCGGCTATCTGATTAAACGGATATGTCGATCATAGTTTAGAACAGGCG -GTACATCAATATCAACTATATTATATACCAATGTCGATCTAAGCCAATACAGAACGGCAC -GATCATTCCGTGTCTCCAGGTGATCCAATTCCAACCTGTGGTGATCTTGGCTCTTTTCGC -GGGGCCACTTTGGAGCTGAACATTATGTAAAAACTCCATCTCGGGATCTGAAAGGCTGGA -GACTTGGGGGTCGAGAAATCTGACTTGCGATATTGTCGCTCGGTTCTAGCAAGGGGGACG -CGTGAGGGCCATTGGGTCAGCATCGGGTCATCCATGCATTTATTTTATTGGTTAATACAA -CCATTCCATAATTCCTATACCGCCACGAGATGATCATTAGCATTTTTCACAACCTTAAAA -GGTACACAAGTGTTGGATTTCTATTCAGAGATCATATGACGACGTGAATCGTGGGCTTTG -CATTCCCGAATCCTGAGGCTGAAGAACCTCGCGACGCACAACAGAAGCCCCTACGCCTAG -GAGTTTGAACATGGTTGCACAGAATGTTTTGACTCTATGGCTACGATGGAATCATGCTTC -TACAAATGGACTCTTTCCAGAGGAAAGGAAGGTTACCACGCGACGATAAAACATCGAAAC -GACATAAGAGGACTAGATCGTAGCAACTCCAGTCTACATATCCTCTCTTGGCTAAGTCAA -ATGTTTGATTTTCCATAATTTGAATTGATCTCAAGCAGCTGGACTCAATTTCTTGGAACC -CCTGCTAGATCGCTTTCTGTGTATTTAATCAATAGGTTAGGATATAGCAAAGGTCCAGTA -TACCCCAATCCGTTTCCTGATCCACTCACATGCCATTGTCTAGCAATATCCAAAAGTAGG -AAATGAGAGAAATTCTCTTCCACGTGTTTGATCTTACGCCATCATGCATCGAGAAAAAAC -CATCCGACGACCATCTGTTCATTTGGCCCGGGTGTGGCACATTCCTTAGCAAGAACATAT -GGCGATCTCGATTCATATGAATCTGAATACGCAAAATTGACGCACCACCCGGAGTTATCT -TTCACCTAAAGTGATGCCTAGAGCTTTTGCCAAGTTTTATAAATGGCGGTCTATCAATGT -TGATGTCTGTACGACAATTGTCAACCGAGGGTGAAGTCAGGAATCATAATCACACTTCAG -ATTGATCTTAAGTTGCGCCAAGAACTCGGTTTGCTAGACACCCCGGCTGTCAATCCTAAC -TGCCGTAGTTCCATATCGTCGCTGACACTAATGAGAAAGTCGCACTTTATTCGGTTGAAC -GGGCCCTTAGTAGACATGGTGACCAACTCTACTGGAACAGCATGACGAGTATATTATGCA -GGACTTTAGCGCATACGGGTTGCCTTATACTTGGCATTATGATTCAAACCATAGTTGACG -AGGAGTCATCAGCATCAACATTGGGCGATCCCCTTCTACATCCACGAAGTCAGGACACCT -AGGCATGGGAGATAGGACAGAAAGATACTTTCGGCCTGATGGGCATGGACTTTCTTGGCT -CGCATGGTCTCCAATGCATCCAGCGGATTGCTCGACCATGCGTAGGAGGCAACTATCTCC -ATCTCATTCCGACTAACCCAAAGGCGGGAGGGAATGGGGGCAAGCTGACAACTGTGTTGT -CCGCAATTTACCATAGCCTCGTCTCAACAGGCTGCCTTAACCAGCAGCTTCTATCTTATG -CCCTCCGATGAAACCCATGAATTGTCAGAACGTGGGATATGAGACTTAGGTTACGCATTA -GCAGGTTTCGCAGCGTGCCCGTGAAGCAAACAACTATCATCACTTGGGTTAAATCAAATT -ATGGGATTGCTGGAAGTGAAATGGTTCTTCCGTTTATGATGGGGATGTACCGAGGCATAT -CTGCCAGACACCATACTGGGGGCCATATCACAACAATCAAGACGTGAAACCTGAGGCTGA -AGTGAGACCAAGCTCTGGGCTCAGGATTCGCCACCCAGGCAACGTTGGGATCCCACCCAA -TCAGCCGATCTTCGCCTATCGGGAATTTACTGCGCCTCACAAGCCACTGCGAGCAACATC -CATTCAGAAATACTGAGGGAAATAAGATGATGGTCCAATGGCGTTGCTCTGCCGATGAAA -GCATGAAAATGGTCCCGAAGTGTCCATGCTGATGTAGACCTCGGCAGCTAGCTCCTTCTC -ATACTTTGAGTATCACGCCATGCTCTCAGCGACACAAAATCAAGTTGAGGACATGGTCAA -AGCACTATGAACATTCACTCGATAACACATGCTATCATTGCTTCTCTTTGTGGCGGGCAT -CGGCTGCGACGGGACCAATTCGAGCAGCAAGACGGCACAATAAAGGGCGCATCCGCTGTC -GTCCGTATGTAAGTTCCCGAGGTAGACGATGTGGGATGGCTTAGATCCACAGACCGAGGG -GAATTTGACGGTTTGCCGGATAGCCACCGAGACAAGCTGGTGGAGCCGACATGCAAAGGG -GGCGTTTCAGAGATGGTAAGGAGGGCTGAAAGGGAGCAGCACTGAAAATTCAACACTTCC -CAAAATTACATCTCACGATTTGGGCTGTGCTTGAGATGTTTTCCACGCAATCACAATGGG -ACTCTGAAGAGCCCTAGCGGTGGAGACGTGACGATTCACGATTCCAAGGTCGATATTCCA -TATGTCTCACGATCTCATACCGGTTGTTCCCGAACGCAAGGGCGTATCTATTTTGGTTTA -ATGTCTTCCCTTGCTGCAAAGAGACTCGTGCAGTTTATCTATGATCGAAAGTCAATAGTG -AAGTCAATAATGCATACGGAGTAAATTATTGGAGATTTGAATCTTGCAGAGTTGAAGGCT -CTAAACAGCTAGGGATCTCCCGGCAAAAACGATGTGGAAACCCTTCACCTGAAGAAAATG -ATTGGTTGACCATTGCCGATTACAGAGATCGACGATTCTACACATAAGTAAAACCTGGTT -TTGATTGAAGGAGAAAGCCCAAACCAGCCTCCACTCACGCCAGCGTTTCAGTGCTTGGCG -TATAATAATAGCTGGCCAGCCACCTGTCCAAGGCTGACGCGCTTTGTCCTTTTCACATTC -CTCGACTCTTTCGAAATCCGGGGATTTCTTTCCATGACTTCAATCAAGGATCTCTAGGGC -AACTGTCCAACACTATGTACATTCCACAGAAACGTCAGGATGACAAAACAAATAAGAATA -ACATATTTATTATTGTATGTCTCTTCGACCCTATCGTCCTTCCCCTATTCTGACTCGACT -TAGTGCGCCGTGAGCATCATATTTGCAGTTGCGATCTGTGTGGGCGCCTACTTCCTATCA -CGACAGCTACGACAAAAAAACTACGAACCGAAATACCTTCCGGGGAAATATCTCAAGCAA -AAATGGAAACAATGGGGCACAGGGAGCGCCTCATACGGCGAGGTCCCGAACCAAAGTAGC -CAAGATACCTCTTATCGTGGAGCTAGCACAGCACCAGAGATGCAAGCCAATAATGGCGTC -CGCCGCGATACGTCGATACGTTCGATCATGACCCTGCCCCCCTATTCTTTCAGCCCAAAG -CCGACCGAACAAGTCATCGCTCGGGAAGGTGAACGTGGTGGGATGGATATGGTCGTTGAA -TACCCTGAGACTGCGGAAGAACAGGAGACTCGCCGTGAGGAACAAATGGAATCTTTGTAC -CAGATTCGCCTGCAGCGCCGTCAGGAACTAGCCGATCGTGAGGCACGCCGTCGGGAGCGC -CGAGAAGCCGAAACGCGTGGGGATGTCGCTCGTCTTGAACGTCTGAATGCCGAAACTCGC -GCTCGGACGCAAAGTCGCCGTGCACGCACAAACAGTGCAGTCAACGTAACCGCGGTCCTA -GCCGAACATCAGTCGAGAGAGCGGGACCGACGCATCGCTAGTGTTAGCTATGCTGAACTC -GGACGTGTTCGCCATGATGGATCACGGCTTCGAGCCGATTCGCACAATTCTGACTCGCAC -GATTCGGATCGCCGACCACTGTTGCAAAGTGACGCTGTGAGCTCCACCGCGCCATCTTTT -GAACTCTCCAACGTCAACTCTCGAGGCCAGTCCATTTCCTCGATCGAATCACATCCTCCT -TCCGAGCTTGAACCACTATCTTTAACTCCGACCACGACACACGGCTCGAGCCGACCCGTA -TCACAAACTGACGATGGGGATCTGGGGACGCTCAACATCCCTCCACCTCCCGATTACGAG -CATCTAGACTGGGGTGATGCTCCTGCGTACCAGAGTCCTGTTACGGAACAGAGTGACCAC -GCGCGGCAGCTCCCAACCATCAACCGCTTGCCTTCAATCCACGTCAATGCACCCAGTCCC -ATGACTGTGTCTCCCGTGACGCCAACTCAGTCGCAGTTCCAGAGTACCAATAATGACGAA -CCTCCAACACCGACAGAGCAGACGCCGGGAATCCGAGTGGTGTCAGCAGAATCGGTAACT -ACCACCCGCCGTGCTCCATCACTCAGCCAACCTCAATGATTCGATCTCCCTTGGAAGCAA -CCATTGGATGAACCCTACCAGGAAAAGCTCCTCATAATATATCACTCACCACGAGCACGA -TTCACGACCTGTATAAGCATTAGTTCTGGGGTTTATAATCCTCTTTCACTCGTGTGATTT -TGTCTTTTTTCCCAGCGCCTTTTCCCTTTCATCTCATTTTCTCGGCGAGCAAGGCGTTTT -GAGTTTAGGTGGCTAGCGAAGAACATAACCCATACAGATATATCACGGGATCTTTGCGAT -CAATGTATCTACAATATCCAATATATATAGGAAATGAAGAGCAGCAGTGCACTACTCATA -ATCGCCCCGTTCCTATACATATCTAGTCCGCCGCACCTCTCCAGCCTATGTTGAATATCG -ATCCATACAATCTCAACTGCGTCACATACCATCAGATCAGACGAAGGTCCGAATAAGCCC -AAGGTGACTTTTTTGTCTTCTCAAGCTCTTCGGAGAAGTAAAGGCCCCGCCTCTAGTATC -CAGTGCGTTCTTGTCGCTTCGCCAACTGTCGATACTGCATGGCTCCATTATGGACTCTCT -TCAGTTCATGATATTTACTTGGCTAATTCAGACTGGGATTTGTAGGCACATTTGACGGAT -GCAAGGTTCTAGGTATCTATCTATGTGTTGCGGTGCGATGGCCCCTTGTGACGGCCCCGG -GCTTACAAGCACGGAGTAATAACAAAATACCAATGAAAAGAGTGTGGCATGATATCTATG -ATATCTAAACAACTGAATTTGCTTACTTCTTAGATTTGCCAAAACTCCAGCATGGCCATA -TTTAGTTCAATGGGTGGCCAAAACGAGAACAGCGGTATAGGTTGAAGCAACATAAGTATG -AATATGTTCCACACGCAAGCCCGCGCTCGCTGCAAGTGACTCCCATTGGTGCTTCGTCCG -CTCTTTACCAGCAAAGGTCATCATGGAAAGATCCGCCACGGTCGATTGCCAGTTAGCGCC -AGTATCCGGCAACACGGCATCATCAATCAAGATCCGCGAGTCGGGGTTCATGGCGGCGCG -AATATTGTGGAGGATCTTTGCAGCTTTTTCGTCGGGCCAATCATGCATGATTCTGCGGAG -GTAGTAGAACTTGGCACCTAAATTAAATGAAGTATTAGCTTTTGCCCCCGTCTATCCATT -AGATACAAGATCTTGACAAACCAATGATAGGCTGCGCTTGGAAAAAGTCATAAGCCTCAG -CCCTCACACCATCAATGGGAGCGAGATTTTCAACTGCCTCTGGCAAGTCCTGGAAGACGA -GGTGACCTAGGAGGTTGGGATACTTCTTTCCAAGTTCAATACATTGATGGCCATGGCCGC -CACCAACGTCGACGAGGAAGGGTTTCTCCGGGGCAGCGGATAAAGCCTTCAAAGGAAACC -GGCGGGCTTCATTGTCAAAAACTTCGAGTCCCTCGGTCCACTCTGATCCTTGAAGTGCAG -TCATGATCTTTTGCAGGTTTTCGAAGTGCTTTGGCTGCTGGACAAGCCACTCGAATGAGG -TGAGTTTGGTGTTGTGGGCTTTCTGGAATGGTGTGTTGGTATTGAGCATGACATCTTCAT -ATTTATTTTCAGCGAAGAAGGCAGGCATTTCTTTCACGACAGGTCCGACTGTGTCGAATC -TGAAGTTGTACCAATGAGGCTAGGTAAGCAGGGGTTTTCGCCAGGTGGATGGCCAGGACA -GAAGTGGAGGAGACTATATAGCTTACCCGTGGTATACCATAGCCTCACCCTTGGAGCTAG -CCAAAACATGGGTGGTCCTGTTTGCACAATATTCATTAATTCCAGTCTCTTTGATCATGT -CTATTGCAGCTAGGTGACGAAGAATTCTCTCTGAGCAGAACAGTTATTTGAGAATCCGAA -GTAGATACGATCATCTCGAATCTCTTACCTAGAAATGGAGATGATGCACCTGTCAGGTCT -GAAATTTCCACGACAGTCAAGGATGTATCACTGCTCGCGAGAGAGCGAAGGAGTCCTAAA -TCCGCACTGATTCGGAGCATTGCATTCGTTAGGCCCTACAACTGTCAGCACTCGATAAAC -GTGGGTTGTAGTAGATGACCACTAACTGGTACTGCAAGCTCTATGAAGATATCTTGCGGA -GACTGAAGTTCATTTTGCACTCTTTTGAGAGACTGAATAATCTTCAGTCGGCCACCTTCA -TCAGTGTTTTGGGCGATGGCTTGGATCTGAGCAATGATGGAGTCCATTTTCACAAAAGTG -ACGTAGAGATATCTTTTGGATAAAGAAAGAAGAAACTAAAGTCTACCTGGTTGTGCATTG -TACATTTCCCAACGAAAAAAGGTTCTTATATACCTGTCTCTCACATAGTTCTCATAGTAC -AATCCACGGAGGCCGCGGAACTTCCGCGGATTCGCCGACCTGTGATTGGTCAACTTCGTC -AACCCCCATTTTCTTCTAGGAGACAGACCCCTAGAGCTAAGCACACTCTGCCTCATCAAG -CATCCAGAAGAGAGATGCATCGGGTCGAAAAAGGTCGTGTGAGATCTTTACGCTCACGGC -CTGTATCTTGCCACTTCTGTCGATCGAGGAAGCTGAAGTGTAGTCGACAATTCCCATGTT -CCAACTGCACCAGTCGGGACATGACTTGTCAGCTCTATCCTGCTACAACATTGCCTGACA -GCCCTCAAGATCAAGCTGTCGAACTGCCTAGAAACGATTATACAGATTTACTGGCCCGCT -TACGGAGGCTAGAAGAGATAGTGATCACCAATCCACCAGCATCTAGTCCCCTGGGGCAAG -CTTCCACACCGGCCTTGTCGCTTCCTTCTCCTCCTGCTAGAAAATCAAATCAGACAACCC -TGATTCATGAAGGACAGTCATCTGCAGAGGCTGTAATATTGTTGGAAAACGAGATCACAT -CATCAGGTTCAATGGTAGGCCCACCTTGGTCGAATTAAATTATGGGGAATTGGCTGATGA -CGAAAGGCCACAGGGTAATATTTTGAATCATGAACTTGAATTCAGGACTTGCTCCATTCG -TACTGCAGTGGGATCTGAATCAATAACTCGTCAAGCCAACTCTACGAAAAGATGTATCTG -GCTGCCACACCACAACGAAGCGAAAATAGTAGTGGAAAAGTACATCGCAGAGATCACCTT -CCTTCATCATGTGGTTCACGTCCCCTCAGTACGTACCATGCTAGATGAACTCTATTGTGA -TCTGAGAGAAAACAAACCGGTCAGAATTGGTTACGTCTCATTGCTCCTCAGCATTCTCGC -CAGTACCACGTCCTTCTGGACTGAACGCGACATGTGCAACGCTGTGTTTTCAACTGTTGA -AGAAGCAGTTTCCCAGTCTACACAGTGGATGAGACTTGCGATGGATGTGGTCGATTACTC -GCGTTATAAACACCTCGAGTCAATAGAGGATATCCAGGCGATGATCATCATAATATTTGT -GACTGCTAACATCGTGGGAATTACATCCCAAGCCCGGCATATGGTTTCCACTGCTATATC -AGTTGGAAGAGAACTTTCCCTGCATCGAATTGATCATCCATACAATTCCAACCTCGATGT -GCCTTCACCGTCCTCTGCCAGAGCAGAGATTTGTAGGAGAGTATGGTGGTATCTGGTGGC -GACTGATTGGTAAGATTCCTCCTCTCTCGCCCTCGTTCTAGATTGTGAGTTCACTCGATG -ACAGGCAGATTTCGCAAATATCTGGCCCACAGAAGGGCACCTATTCAATTGATCCACGCC -ACATGATGACCAACAAACCTTGCAATTCGAATGATGAAGATATTGTAGATGGAATGGTCG -GGGTTGGACAGCCACTCACCAACCCCACATCGATGTCCTACTGTCTTCAACGGATCCGAT -TGGGCGAGTTTTGCCGTGAAATCACAGATTGTGCTCCATTTGGGATGTCCGATCCAGGAA -GCCCAGACTATGAGCATACGAAACAGATTGATGGCAAGATTTGTGAGTTCGCTAAGAGTC -TACCCTCGTTCTTTTCTTTGGCCCATAGTTCGAACGAGCTGCCAGAAACCGATCCTCGAA -ACTCTGCTGGAATAATCATACAACGATATATCCTCAACTTTTTGCTTCATGCCCAAAGGT -GCAGGCTTCACTTGCCTTACTTGTCACTAGCGTCTAATGATCATGCATATGATTACTCAC -GGAAAGCGTGCCTGGAAGCAGCTCGAATGGTGATCCGAACTGAACGCCAACTCTCCCTAG -AGACGATCCCCTTTGTTCCGGCGCGACTTAGGCTGTCAGGCTTGCTACATTGTGTTTGCG -TGGCGATCATTGTTCTCCTCATAGACTTTTGTGGGGGTGATAATCAGCGAGAAAAGGACC -TAACAGAAATTTTGGAAGCATTTCTAATTCTAGAAAAGGCTAAAGTGGAATTCCCTTTAG -CAGGAAGATTGCTTGAAACCTTCAAGATAGCTCTTCGCCGGCACGGCGCCTCAGGTTCAG -CTGTTGGAGAACATACAACAACGCAATCCACCTCTCAAAACCCTGAGAATTCACTGGGAC -TTGTCTGCAGCTCTACGGCTCCTACAAGCATGGTATATCCAACAGGTAGGAACATCGATG -ACTTCCCCATGGACCCCACATTGCCGACTCTAGATGAGCTCTGGCAGGTATTCGATGATA -ATGTGGACTCGGGGACAGTGGACTGGAACAGCATCTTTGCGGATTCGGACACTGGGTTTT -TGTCAATGTAGTAACAGTAGTGATTAAATCAGCCGTGGGTTCACCGGGATTTGGGACCCG -CAGCATTATGACGCATGAAATGTCAATTGATTTTATGTGGAGGGTTTTCGTCATTTCTAC -CATATACTCATCCCTTGAACTAGGTACTTTGGCTACCCGGTCATGTCCATATTGAGATCG -ACATGGCGCATTGGGTAATAATTGGGAAACAATACTAACGGGTGGAGGCAATAGGTGCAA -CAGCAAAGAAACCCACCCTGTACGCCACGATGCACATATAATCACAACTTAGGGACAAAG -ATAAATATGGGGTCATAGTAAACAGTGGCTTTCTATATCTACTTTAAGTCTTATATCTTA -TGTTAATCTCAACATTACAGGGTATATCCAAATCCATGTCACAAGATCAAATGCAGGCGA -CCTGTTCTAATAGCAACGATGCAGCTTTGCGATCAATTGATCGGGGTGCTTATGGAATCA -CACTGCCTGGTTCCTGTGCACATTCTTAATTTACCCTTGCACAGTGACAATGCTTCTATG -TCTTCCTCACGAGAGGTCTCACTGTATGATGATCGTCTCCATCAGGTAAATATAGACACA -CGTCGTGGACTATTACGCAGATCCACTCCAAAGTATACTTAATTTTATATCGTACTTTTT -GCAGTCCATATAATTGTTCAAGTACCAGACATGACGCGGGTAATTATTCCATGAGATTAT -ATGTTATTTGTGTTAGATATAAACGTACGTTGGCCGGTTCGGGGTCCACTTCCACCTGGG -CACATCCATTGTACAATGATGATTAAATTTCAGCTTCACACAATCCGATCCAATCTCTTT -GTTTCTTTTTTGTCTTTCATCATGATTGTTAAAGTGATGCTTGATGTGCATCTACCTATG -TTACACTGTTCACACTTTACTGACAACTATTTTTTATATATATCATCCTCAATCCTACAA -GCCCAAGAATAAAAGGTGCATTGTTATATGTAAATATGTGAGACTCTAGTTAGAGATCCA -TTGTCGATAGTGATAGTCTTACCCTATCTAGTCATATTTATTTCTTGGTTCTTATACTTT -TATACTGTATCTAATACGAGTGTACAGATTTGGGGGCTGTAGCTCAAGGTATACGAAGTA -CTCCGTACATTACCACACAATTGACTGTTTCAGTGACTGTTTATATGTATAAAATGTACT -TGATGAGAACTGGAGTACTTTTCCGACTACCTGGTACTATGAATGTCACTTTGTTCGCAT -AATTCAATGTTGAGCGATTTGAGCCTCTCGGAGGACACAAAGAGGCCACGATCCATGGGG -AAATGAATGTTGCCACTTTCAATCGGGTTGCCTAGACCAGGGACAGTTAGTCCACATTTC -TCTCAAACCCAGCCAATGACCATTGTCTATTCTTCAGATTCCGATTTTAAGTGGAGAAAC -AAACACCCTTTTATACTCCATCTTACTCTGTACCTCAGTGAATTGAAGCTCCGTTCAGAT -GACGTTGTGCATGTTGTTCCGTACCTTACTCTGGCTGGTTCGGACCTAGTATTGTCTACT -CTACGTTCCCAGGACAGTTGGCCCATCGGAGGTCTTACCCACGCTCAACCGCATGAATGG -CCGACAGCATGCAGTGTCTATTGTTTCTTGATGTTTTCATCTCGGGGGCTTTTGCCGACG -TGTGTTGTATTTCTCTATGATTCGTTTGGAGCTTGAACCCACCGCTTCTTATAAATATTC -AGTCTCCCGCAAAATCTTGTCCTCAACTGAGGCCATATCTGCCACCACAACTCTATACTG -GTGGTTATCATATCGTTCAATTGTCCACACGATCATCTCAAAAAAGGGCGCTGTGTTCAT -TCACTAGTTGATACACACACCTACGTTTGCTGAGGAGCAACGATGAGGAACTTCATTGCC -CTCGCTGCCTTTGCTGCCGGCACCAACGCCCTCGTTGGCCGTACCGATAGTTGCTGCTTC -CACCTCACCTCTTCTGGCGGTGCTGCCGGTAAACTTGGCCAGCTGGGTGATGGCCAAAAC -CGCATTGGTGACAATACTCTGCAACCCGCACAGTACTGCATCGATTCCAACGGCGCTATC -ACTGACTCTACTGGGCGCGGATGCATCCTCACCCGTAAGTAGACCAATAACTTCCCTGTC -TATCAACTACCAATGGATATATATCTGACTGTATGACAGCTCCCACCACTCAACTGCAAT -GTGACGATGGTGTGTCTGCTACACCCGGTTTCTCTGTCAACTCACAGGGTAAACTTGAGT -ACCACGGCAGCCCCGACTTCGTTGCCTGTGCCACCGGCCAAAATGGTGGTCTGAATGTCT -ATACTACCCCCAACAAGCTGGATGTCACTGGCTGTGTGAACATACAGATGTCTGCCGACA -CTTGCTCGAACTCAGGCGCTGGTGGTGGCAATGGTGCCCCATCTGTTGGTGCAACTCATC -CTTCTAGCGTGCCTCTTGCCCCGGCTCCTCAATCCAGCCCTGCTGGTCCTGGCCCTGGCC -CTGACGTTGGCGGTCCCGGTCCTGTTGCCCCTGGAGCTGGTGGCCCTGGAGCTGGTGGCC -CTGGAGCTGGTGGCCCTGGAGCTGGTGGCCCTGGAGCTGGTGGCCCTGGAGCTGGTGGCC -CTGGAGCTGGTGGCCCTGGAGCTGGTGGCCCTGGAGCTGGTGGCCCTGGAGCTGGTGGCC -CTGGAGCTGGTGGGCCCGGTACTGGAGGTGGAGCTGGCGGACCTGGCCCTGGTCCTGGTC -CTGCTCCCGGGCCCGCAACCGGCATTAAAACCGTCACTATCACTGTCACTGACTGTAGCT -GCCCTACTTCCGGTCCCCCTGGTCCTGCCCCCggcggtggcggtgttgttgggggcgctg -gtggtggGCCGCAGCCCTCTCCTAGCGTGCCTGCTCCATCTGGCGGTGCTAGTGTCAAGC -CTCCTGGTCCACAGCCTGCCCCAGGCGGTGGAGGCGGCGGCGCTTCTCAGCCTGGTGCCT -CACCTAGTGCACCTGCCCCCGGCGGCGGTGGTGGTGCTTCTCAGCCTGGTGCTTCTCCTA -CTGCGCCTGCTCCGTCTGGCGGCGCTGGCACTAAGCCTTCTGGTACACAGACTGCTCCCG -GCGGTGGCGGTGGTACATCCCCTAGTGCGCCTGCTCCCGGTGGGGGAGGCGGCGGTACTA -CGCAGTCCGGTGCCTCTCCTAGTGTGTCCGCTCCAGGAGGCGGTGGTGCCACTCAGCCTG -GTGCCTCAGCTAGTGTTCCCGCTACTGGCGGCGGCGGCGGTGGTGGCATCTCCATTAGTG -TGCCTCCTATCTCCCCCAGTGGTGGCCCTCGCCCAACCGGCTCTTCCAACGCATCCGTCC -CTGCTAGCTCTGCGACTTCCAAGCTCTCAGCCTCTGGTACATCTGGCGGTGCTTGTCCCA -CTGATCTCTCGGGTAACTACGAGATCCCTCACCTTATCATCCCGGTCGATTCTTCCTCGC -CTAGCACTACCGCAGGAACTGGGTTTAACGGTACCGTGAGCTCGACTACCTCGACCTTAT -TCAACTTCGACATTCCAGCCGCCGACGCTGGCAAGACCTGCAGCCTGATCTTCCTCTTCC -CTAAACTTGAGGATCTTGAGACCTCGTCTTATAGCTTCAGCGGTGATGGAAAGATTGACT -TTGCTAAGCTTTCTTCCACTGCTGATTCTTCGACAAACTTCGGCAACATGCCCTCGGTGT -CTCAGGACCTCGGAACTATCACTGTCAACCCCGGTAGTTCATACCTTGTCTCTACATTCT -CTTGCCCGGCTGGTCAGGCTGTCGCTTATGAGATGAAGAATGCTGGCACCACTCACTTCA -ATTTCTTCGAAGACTGGAACCCATCTCCGTAAGTTTTCTGCCCCGCGTTCCCTTTGGTTT -GTACATTAGCTAACACTCCGTTTAGTCTTGGTCTATATATCACCACTTGCTAAATGAGGA -ATCTCGCCACCATGGGCATGTGATTTTTGGGACCATTTTATATGCCTTGTTACCTCCATT -GATGTCTCCTTTACGTCGATACGTATTGTCTTGATATTGTTTCTTCCATGTTGGAATCTC -TTGGTCCGGTGTCCATAGGGAAGAAATTTACCTCAAAAGCTGACTCGGCCCGATGTGGAT -TAACGAACATTGCCATATCTGGTTGATGTTGTAATTGAGGCGCATGCACATAGCATCAAC -CTTGGGCTTGTGGATTAGTATATTTAATCACTTGTTCGGTTTCAGATTTTCTTGAGGCGA -ACAACCAAGAAAGTGCATATGTGATTTGTCTTTGGTCCAGGTCCAACCAAGTAGGTAGTA -GTTCACGTAATACTTGGGGAGATATAAAAAAAACAAAAAGAATACTTCGATACCGGGAAT -CGAACCCGGGGCCGCGGAGTACACACTCTTGAAGAGATAATGAAAATCCGCTAGGTTAAC -CACTACCCCATATCGAACTTGGATGACAAAGGCTCATTTAATTTTATTATAGTCTTGATC -AATGTAGTAATTCACCTTGATAGTCAAGTATGGATGAGATCTACAAAAAATATTTCAAAA -GAGAAAATCATGTGTTTCCAATATTTATTATAAAGATGAATGATATACGCCCATGTTACT -CCAGTAGGCGTACCTTGATATCTGAAGTACTATGAAATATAAAGCCCTGAGGGAAGCCAA -TCAGATACCTCAGAGCGTGCATGGGGGCCAAGCGCTATTAATATGCCGATGCCAAGCAAT -GCCTTTGACTAGGCAGAGATGTTTATTTCCCTTTTCCCCAGTTCTCATCCCCCTTTCAGT -TCTCTTAGAGGATGTCCTGGACTCAACATGGAAATTCCTGACAAACTCCCCTTCTCCAAG -CGGCGGCTTCGACCGCGTGTTGTTATCTCGTGTGTTCCCCAGTTTCCCCCACGGATTCCT -GAAACTAACAAATCATCAGCTACATCTTTGACTATGTGATCATTATGTACGAACATCAAT -TCCCCCTTATCCATTCCGTATTAATGAAAAAAAAAAAAAAATAGTGCATGCGCTATCGGG -TTCTCAATTCTCAACAAGGTCGAGCCCTACCACCAACACTTCTCCCTCAATAACATCTCC -ATTCAATACCCTTATGCCGTACATGAACGAATCCCTATGGCTTACGCCCTCTGCATCTCT -GCTGCGTTTCCACTCGCCCTCATTATTTCTTACACCTTGGTCCTCGATGGATTGTTTTCG -CACCACAAACCACAAGACACAGTGTCTGGGAAGCGGAAGCTGAGAGGTCCGCATCGCTGG -AAGGACAGACTTTGGGAGCTCAATTGTGGTGTTCTGGGTCTTTTGCTGGCGCAGGGACTC -GCGTTTGTCATCACACAGGCGCTGAAGACCGCATGTGGCAAGCCGAGACCCGATCTCATC -GATCGTTGTCAACCACGCGCTGGTAGCAAGGACCTGTTCCCCGGGTTGTCGAACTCGACG -ATTTGTACAGGCGATCCTGCCTTGCTGACAGATGGATTTCGGTCGTGGCCGTCCGGTATG -AACCTTATTCCCAACTCTGCTTGTGAGAGTCGCAAATGCTAACCTAATTTCATCCAGGCC -ACAGTAGCTGTGTGTATACCCCATTTATGATTCACCAAGCCTGTCCTATTACTAACACCG -TTCAATGAATAGCCTCCTTCGCCGGTCTTGTTTACATATCCCTCTGGCTGGGCGGAAAGC -TGCATGTTATGGACAACCGCGGCGAGGCCTGGAAGGCCCTTCTTGTGATGATCCCTCTAC -TAGCAGCAAGCCTCGTGGCCGTATCGCGTATCATGGACGCGCGCCATCACCCCTTCGACG -TGATCACGGGGTCAATGCTTGGGGTGGTCTGTGGCTTTGTCGCATATCGCCAGTATTTCC -CACCGATCACCGAAGCTTGGCGCAAGGGTCGAGCTTATCCCATCCGAACCTGGGGGACTG -AACCCGCAGGACCCGATACCGTTCGATTTATCGGCTCTAAAGGCGAAAGCTCGACGGCTC -TCCGTAACCATGAGGAAGAGCGTATGGACCCGCCAGATCTCCCCAGCTCGGAACATACCA -GACCAAATGCTTCTACCTACCTTCAGTCTTCCAACCCATATGTATCCAATATGTACAATC -GCCGCCCACACGACCATGATGCCGATGGCGGTGCTTGGTCGTCGAGCGAGGATGACGTTA -GTAACGGGTATGAGATGCGGCATGGTCAAGCTATGGGGCAGAATACCGGTGCGGGTCACC -GTATGCCTCAGTATGACTCCAGTAATGCATATGTATCCCAGACTCAGCCGCTGACGGCTG -ACACGGGAATCTATGCTCCTGACACGATGGCGGGTCCGGGGCGTCCGTTGACGAACATGC -CCGGTCGGGCTATCTAAAGATGAATGATGCGATGCGTTATTGATGATTGCAGTCTAGTCC -AGAGCGGTAATCGCCCAAAATGTTTGCTTATGATTTGAATAAGCGATGTATTATTGCGGT -GGCTGGTGTTCCAACTCTGTACTAGTATGATAATGAGTGTATTGGCGTTGAAAAGAACAA -TGCATTTTTTGCCGATAAGATCAAGAGTACGGAGTAGATTGACTAATTGACTAGTCCTAT -ATCTAGAACAGAAACAATAATTTGTACTACAATGAAGTCAAGTCCAATCCTCGTAGGGTC -CCCACTTAGATGTGGTTGGCTTGACAACGATCATCTATATCCTGCCCTAACCTGCCCTAA -TCTGTAACCTTTTATTCCGTAGTCTATGGAGTAAAGAGTCAATCAAAAATGCAAGACATA -GATTAAATGATGACGGCTCATGCAAGTGCCCAGACTAATCAATATATAAAAAGAGTCGAG -ACTAACATGGACGACTCGGGTTACCCCAGCCACCCACATCGTGCATTGTGAGTCGCCCAG -CCTCCTCAGCCCAAGATGACGCTGAATATCTTTTCTTTCTCTCTTATTTGCGCTATTTTC -CGTATTATTTTTCTTTCTCTTTTGTATTCTTCTGCATCTTCTACCCCGTGCATCCTGCAC -TGTAATCGCGACTGCCTGTGCTTGCTTTCCAAGCTCGTGTAGCCCACAATCTTGTGTACA -TCTGACGTACCGAGCACATTGGGAGAGTTGCACATTCCAGAGATCTCAAACCCAATCTTT -CTCTGAGAATTCGACAAGATGATTGGCAGCGTGTTTTTCATATTCAACCGCCTGGTTGAG -ATCGTCTTCCTGATCCCAATCATTGGCATGCTGGTGTGTCCCTCCATTGTTGTCCAGTCC -AATCAGCAATACCCCTCCGAGTCTCCTGCTAACCAACATCTCTTTAAATATAGGCCTACT -TTGTCAACGGCTACCTAAAAGCAAATCAGCTGACTCCACCCTACATCTTGGTCCTGTTCA -TCGTCAGCGTAATCGCCATCTTCTGGGCCCTCGACACGCTAATCCGCTTCTCAACAACAA -AGCGATCTGCAATCTTCGTCGCATTTGTGGACATGATATTCTTCGGCGCATTTATCGCAT -CCGTCTACCAGCTTCGCTTCATTGCAAATGCTGACTGTGCCCACTGGAATGGTGGCTCCG -TCTGGATCTCCCTAGGTCCATTCGGGTCCTACGGCCAACGCACAGATAATCCGCTGTCGC -TGGATGTTAATAAAACTTGCGCCATGCTCAAGGCCTCCTTTGCAATTGGTATCATGGAGA -CAGCATTCTTCTTCTGGACTGCGCTGATTGCGCTGTTCTTGCACCAGTCCCATGACAAAC -GGACTGTTGTCAAGGAGACAACGACTGTTCGGAGACGCAGCCATTCCAGCCGTCGCGGTC -ATGGATCTGGTTCCCACTCGCGCCATCGTAGCTCGAGCCGCAGGCCTGCGCCTTATGTCG -tttgattttgttatataggatttcctttttgtttggtttttgttttttgttttttatttt -ttACGGGTACGTCTCATGATGTTATGAAGATCATATGCCTTAATGAGATACGGATTGAAC -ATTTCCTTGGATGGATTGAGACCGGAACAATTGTGAATTGCAGCTATGGGTACCATATTC -GGACTTCTGTATGTGCTGAAAATATGGTCATTTCAGGCATTCGAGATTGCGGTCTCCATG -TCTTCTTATACTGTTTAGTGTGTTGCGTTTACAATGCCAGACCCATGACGATTAACACAT -GATACTCAAATGATATCTCTGCAGTTCTAGTCGTGTCGACATGCTATAAAGTGCCGTCTG -GGAAGTTCCAAGCCGCTGAACCTACATGATTCGTGAGCGCATGCGATAGCAAAGTACGTA -CATCCGTCGCAAGTGAGGAACTAGGGCGTCTGGAGAATCATACCCAGCAATCGATTGAGG -CATACGACGATTAAAGTGTCGCGACACAATCTCAAACATCACTAAAAGGTCCAACATGCC -TTCTTCATACGCAATGAATCGTAGTTTCACACCCAAATCACCTTCCTCCCCTTGACTTCA -CTTGTGTTTCACCTTGACTCTTACGCTTCACCCAAGTATTGATTCATCAATACTTCTACG -CTCATCCTACAATTTCAACATCACTGTTCGCTCAATCCTCTCATTGCATGTTGTCTTATT -GCCTTCACGCCTTTTCCCGCCTGTAACCACAATGTCCTCCAAGCGCCAATGGGTCGATCT -AGACGAAGGCCCATCGAAGCGCCTGAGGGACGCTTTGAGGCCGAGAAATTCCCCGACAAT -GTACATCAGACACGTCAGCGGATGGTACAAAGATGGCTTCTTGAGAAAATGTGACGCTAT -CGACCGGATTGCGACCAAAGCAAGACACATTAAGATGATCCAAGAAGCCCTGGACGACGC -GGAAGCCTACTACCTTATGTCGCTGCACGACCGTATCTTTTCCAAACACAGAATGGAGCC -AAATCACGTTTTCCAAATGGCGGATGATCTGGCTAAGTGCACAGCTGGCTACAAGGTAGA -ATACGCCGAAATCATCGACCAAATCAACGCGGAGAAAGATGACAGCCTCAAGGAAGCACT -TGAGACGGTGCTTTATGGAGTCCATCTACCAATTTCAGAGGAGGCGGCTCCCACGCAACA -TGTCGGTTCAGTGCTCCCGCTCCCCCAGGCCGTCGTACCAACCACCGCGACTGATCCTAT -TGCTGTCGAGGAAGATGACGGGGCGGCAAGCTCCACGAGTGCAGATGATTCGACGGTAAC -TGCCCAAAAGGCCACCGCCCATTCAGATAAGGCTGTCGAAGGTGAAAGCGACATCGACTT -CGATTACCTCAATGGTCTCCAACCGAACGATGGGACGGAATCCATGGGCAACCATTCGCC -AACCAGCGAGTCTCTCCAAGCCTCCGCGGATCAATCAGCCAGTGCATCTGCCTAAGCTCA -TCTCTGAATAACTGGAGCACTGAAGCTAGCCAAGGACAATCGGTCAATCGACGAGTCCAG -AACGATGCGTCCGTGGACGGCTAGGGCAACCGTGATATTTTTTGATTACATTTCGCTATT -TTCGCCATCTAAAGGTAGACTGTCTCACTTGGGACCTCTTTTTCTGCAACTTTTTTCTTT -TCGTTCTTTGCATTCTTTGATTTTTCTGGCAGTTCCGACTGCGGCGGGATGCATTCTGGC -TTCGCCATTCCTTCTTTCACCAACAAGGAAGTTGGCCGGATCTTTTAGTGGGGCATTGTT -GCGGGCTAAGATCTCTTTCTTCAGCCTTAGCTTTCCCAGTTCGGTAGATAGGAATCATTT -TCTCATCCCCCAATGGGCTTCTCAGGCAAGTAACTGGCAATTTATAGTGGGAGCTAGCTA -GGCAGAAAAGGACCTCGGCATCAGACATGCATGTTCAACAATGCCCCTGGTTTAAGTCAC -ATTCGTATCGAGGATATTTTGTGGTTAGACCTAATATGAAGTACGAGGAGCCAAGATCCT -ATTTTGTCAAAGATCTTCTCGGCCTTCCGTACTTGGCCCGGCGCCCCCTCTCTATAAATA -ACAAGTATTGGCTTGCCACTCATTGATTCTCAAAAAGATCTTCTCTCACCATGCTTTTTA -TCAAACATTGGGCATGGGGCTTTAAGCCGGTTGCTCTTTTGATTGTATTGATTGTTCTGG -TGTTCAATACCCAGCCTGTGGAGGCAACGATTCCTACATCCAAGCGCTTTGATTTATCAA -AGCCTTCATACGACCTCTTCCGCAGCAAGTCCCTGCATGATGTGACCGTCCAGCAAGGCT -TCGCATTTGACAATACCAATCGCCGACTGTTTGTCACCCAGCGACGTGACGGTTCCTCTG -GGACTTCGGGAGACCTATGTGTCACCCAACTTGACTTCGACGGCAACTATGTGGGCCACA -TGCATCTAACTAGTTTTGGTCACGGAGTATCCTTTGGTGTTCAAGCAGTCGGCTCATCAT -CCTACCTATGGACGGAAGTGGAGGCCAATTCGAACGGATATGGCAAGAAACTCGCTCGGT -TCAAATTCGTTAGCGGAACTACGCTCTCGTCATCGTCGGCATCACTCAAGAAATTTGTCC -CGATCACAAGTGCTACAGAGCATACTTGTTCCATTGATCCCGTGAGCAACCGTCTCATTG -TCCGCGCCAATCTTAGTGACGGTAAACACATTTCCGTGTTCGACTTGGACGAGGCAACGG -ATGGGGATTTCTCGAAACCTCTCGCCAATTTTAAGCATCCATCGTTGAACACAGCGTCAG -ACACATTCCAGGGGTATGCGGCATATGGTCAGTACATATATCTTTTGTACGGCAACTCGT -ATGATGTCACCGATGGTAAGGTCAATTCAGATGTGACCGCTGTCAACATGAATACCGGAT -CTGTCGTTCAAGGTCCTCTCATTACAAAAGCCGGATCAACGCTATCATTCCGAGAGCCTG -AAGGACTGGCTATCTATCAGACTGCCGCTGGGGAAGTTCGTCTGTTCTTGGGATTTGCTT -CGGGTGAAAGTGGTGATAGGCGATCGAATCTATTCTACAAGAATGCTTTGATTTAGTCTA -TGCATCTCGATCGAGCAGGGATATCTAATTTGAATTTATTTTTGCAACAGAGTCACCCTC -AATCAGGAACATGGGCATTCAGGGGAAGATATAAATTCACAGGGAAGAAAGACAAGACAG -ATGGGAGAGAAGGAGAGAAGGAGAGATGTATATGGCTATGCTGCTCAATTCTTTCTCCTC -GTCTTTTAACTCAGAAGCAATTTCCTCTTCTAGGCAAATCAACCATGAGATAACCCAACG -CTAGTAATCTCAGCCAGCTCGCCATTATTTGGCATCAAAACTACCCCATTCGGAAGCAAG -ACACATAGCTCCAAGCACTCTATATTCAGCCAATCCTTTGGGAGGAAGTAGTGTCGCTGC -ACTTGGTCTGGACTGAATTCAGAGCCAAGACTCCAAGAACATACCCAGTAGCTTTTATCA -AGGAACACAAGGCGTTCACGTGGGAGGATCCCTAGGGGAATTTCAATTTGGCGCTGGATT -TCCGGTGGGATGGTCATGGGCGTCTCGGTCGACGAAGATATGCTTGATATGGTTGGTAAT -CCGGATAGCGAAGATGTTTGAAATAGCAATGTGACGGTCTCTCTGTGGCCGCTAGTCAAA -AGGATGATCTGTATAAGAAAGTGAGAGCCTGAGTGCGTGTGGCAGATGCGCTGTATCTTG -GCGTTCTCTTCATAATGCTGGCGGGCTTCGACGGTATCTTGATTAACCTCTAAGTCTAGC -CCCGATACTAACGGTGGTCTCTCCAGATCAATGACAGCAATTTCGGAAAGATCGTCCCAT -TTGTAGACTCGAGTATGACTCGGGCTGAAAGACAATAAGAGTTTTGGGTCTGTTGGATGG -TTTACCCACTTCGTATCGGGTGATGTAATAGTGAAAGGCTCTCTGCTGGATTGAGTCAAC -CTCACCGGTTGAACGGTGACGGAAGGCCCGTTTTTGATTAAAACATTGGATCCATTGCCA -TTGAGCAAGACTTGTTGAATGCCTTCAGGGCTAACATTCATTGTCGCCTCAAACACTTGT -TTCACAGACCAAGTTCGGCGGCTGCCCGGATGAATCTCCTTCACGACGACCCTCCCTGCC -ATTTCGACGCTGGCTAGGAATCTTCCGTCGGACGACCAATTTAAATGCTCAATTGGAAGA -TCTAAAGGTGATCTCCAAAGCTCAGATGTCACATGTTCGGTATTCGAGTCTATCACGCTG -ACCACCCCAGCATCATTCCCAACAGCGTGATATCGCCCTCCGAATTGAACCGCAATTGCC -GTTATTTGTGCCCGTACATCCGCAGAAGCCTCGGAGACATTGGCTGATGGAATGCTGGCC -GCCTCGCTCCCAAGGTCACTATCCTCGCCGTTCTCGTCACCCGCAAGTAATGCATTTGGC -TCCCAAACGTTACACCATGGACCACGGAGGTCATATATGCGCTTACTGTCTGGACTGAAG -CAAATATCGGTGATGGTATGATCGCAGGAGAGCTTGTAGATCAGCGTGAAATGATGGAAG -CTATACAGCTTGATCGTTCCCTGCCTGTCACCTACCGTAAAGAATTTGCCCTCGGGACTG -CATGCAATAATTTTTGCGTCGGCCTCCAGCTCCTGTTGGCTGTTCGTATACGGGTCCCAT -CTGAATACTTGCCCTCCCATGTATACGCCGAGAATTTCCTGGGATCTTGGATGCCAGATC -ATCTGATCTACAACTGACCAGGAGTTTCCTGCATATTCATGGTCACGCATACATCGTCCC -AGAAGCGTCGCAGTTTCCAGAGACCAGACACATAGAGGAGATCCGCGATACGCGACAGCC -ACACAAGTCGCGTCAGAATTGAATGCCATACGCCAAGGGACGTTGTGTACAGGCCTATCC -AGAGCTGTGCCCGTTTTCAAAAGGTTGGGATGCAAGGTTGACCAGGCGGGAGCTTGGTCC -GTCAGCCGGGCGACTCTAAGGAGTCGATCCGTTGATCCGATGACAATCTGGGTATCCCCG -GTAGTGAATACTACGGCCATGGCCGTGCTGCCGATAGGATTTTTGATTCGGTGCATGACT -TCTCCCGATTTGACAGACCAAACCTTTGTCGTTCGGTGGCCGTATGTCACGAGAAGGTGG -GAGCTGCGACTGAAACTCATCGCAGAAACCCCCTCGGTGTGTTTGAGCACATGTCTAGTC -TCAAATGTCGTAGAGTTATACAAAGTAATTATTCCTCCTCCAGTGAGCACAGCAAAATGA -TCTCCGGCACAGGCAATAATGTTCCCTTGGGTTGCTGGTTGGAGGGAGATCTTTGCTAAG -CCGTCATCCCAAGTGGACTTTGATAGTCCCTTAACCACCAGGGAATATGGGCGGGGGGCA -AATTGCTCGAATTGGCGGTATATCATCGAGGCCTTTGGACAGAATGGCGGGATTTTCTGG -TAAATAGATGTAGGGGACTCGGTTAAATGCAGTCCGAACTTACCGATGAGTTTGAGCAGG -TCTGTTGCCCATGATTCCACCAAGTCAAGTTCTTGAAGCCGATGAGCCATTGGGTGTGTC -ACAGCGTTGCGACCTCGTTTCCGGCGAGCGTATAGATTCATGCTCTTTGCTGAAGACACA -AGAATCCGCAATTTCCTCGCTTCGGCCAATGCCACAATCCAGTCGAGAACGTGATTTCCC -TTGAGGAACTTGGCCAAAAGGAGGAGCGGTACATCCGAATCAGGGGACACTTTATCGAGG -TGATATGCCCACGAGGTCATTGCATAGTAGAAGAATTCTCCCCTCAGGCTGTATTTCCGA -TTCCCAGGTTCAAGTTGCAAATCTCGAGGTTGGTGATGCTTCTCCAAGACAGTTAGGCAC -TTAGTTAATAACTTCTCATGACCTTCAGGTACATTGACTCCAAGTTCTGAGTCTGTCGCC -ACTATGTGATCTCTCGCGGTCTGGTGGACCATCACTAATCGACTAGTGGAAGAGTCAACG -ATGACAAACTGCCCACATACACGGTTGATGAGGAGGCCCATCTCCACCATCGGCAAAAAG -CCCGGCTCCAATGCTTGTGCCAACTCACTGAGTATCAATGGCCGTCGGGAGCATGTGGCC -CACATCAGGATTCTCTGTCCTAGCTTTCGGTCGCGTGGTTTCGTGTTTGCAATGATTGTG -CGCTCCATCCGCTGATATAGCTGCTCCATACCACCCGGGATGCCACCCAGCGTTTCGTCG -ACGTCCTCTTCTGTATTGCACTCTGCCACCTCGCGCATTGCAAGACTGGCCCACAGAAAA -TTACCATTGGCACATTCAACCATCTTATCCACCAAGCGCGCTTTAAATTCGGGCGCAGCA -CGCAAATATTCCACTTCCCGTTCGACATACAGCTTTATGTCTCTCTTGGTATTCTCCAGT -GGCAAGTATGTCACCAAGAGCACAGTGGACAAGCGGTCGAATGTTGTGATGAGCCCCGAT -GTTTGCCGACTGACAAGGATTACCTTGATGCGTGTCAATGAATGAGGAACACTCTGCATA -AGTTCTACCAGTTGCTGTGGATTGTCGGTCTCGTCCAACGCATCGATAACGCAGTATATT -GTTGTAGAGAGCCTCATTTTGAATAACAGTTGGACAAATATTCTCTCCCAGATGGCCTTC -GAGTCTGTTTTTTCCAATGTTTTGGTGGCAAATGACAATTTTTTCAGCGCGCGGCAAAAT -TCGGGCAGCTGCTTTGCAATCTGAAAAGCAATGCTCTTCAAGCAGGCACTCAGTGAGCGC -TTGGACTCGTCACTAAATCGGAAGAAATAATATACGCAAACAGAGCGTTTTAAGAGGTAA -TTTGTAACAAACGCCGATAAAACCGATTTCCCACTCGCTGGCAACGCGTGCATCCACAGC -ATGGTTGGGGCATCCACGGCTTCATCCATCCAATTATTGAAGTCGGGATGAAACAAAATC -CAGTCGCAAGTTCCAGGAGTCCACCTGGAATGAAAGAACTTGAGATCATCATCAAATGTC -TCTGGAACTGCCAGCAGAGTCTGCAACTGTTCAAGCTGTGTTTTCCCCTGGTCTCCAAAT -AAGGATTGTCCTGTTATATCCCGGTCAGTATGTATTTGATATCAGTAATGACACAAGGAC -AAGTTGGACTCACCTGCTGAGCGAATAGATGAGACCAAAGTCTTCAGGGCGTTTCTAACG -CTAATGTAATTAGAGTCCTGGGTGCTATCAAACTTGCAGACCGTATGGTGATCAGCAGCC -AGGGCCTTTGAGATCTCCTTCGGATACCCGAGGGTAGCTGAGTCTTTCTCGACCACCATC -TACCGGCGAGTTAGCATGTATAGAGTAAACTCGTGAAGGAGGAGTCCTACATACCATTTT -CTTCGGGCCAATTGAAGTCTGCAGGGTTTCATAGAATGAAAGTATGTCGAGGTTTGGAGC -AATGTGTCGAAACTGCTCGTTCAGAGCCGTTACAGTCTGTGAACCGGCACTCAGCTCCGA -TACGTATAGTTTGGGACTCAAGTTGAACAGTGACACCGACACCGTTAGGATCTTGTCCAA -GATTTCGGCTAGATTAGATCCGCGATGTGGAGTCGCCAAAAACAGTATCGCGCTGATCGC -TGACACGATGTCCGAATACTGATTATCGTTCTGGCCAAGAATATACGCCTGTTTAACGAC -AAGACCGCCCATCGAATGAACAATGAAGATAATTGGTCTCTAGTGATGCAAGGCGTGAAC -ATCTCTTTTGAGATAGAGTAGGGCAAAGTCGAGGACTTACCTGGCCCAGTTCAAGTTCTT -CCAATCCTTCACCCTTCCCAAATTTCATCCAAAACAATAAGTCTTTCGCAAAGTCGGTAA -TACCCGTGATGGGAGTTGGGCCAGTAGATGCCCAGTTTGCATTGTATCCAAAAGATAGGA -TTCTCGCCATTCGAATACCTGGCTCGAGAGGAAGCCATTTCTCTGGCCAGAAGTACTCTG -GATCTTGTCCTCGTGCCCACGTAGCTCGGCTAGTACCGCCTAGGCCATGAACAAAAATGA -TGTCAAGAGAAGGCGAGGATTCTGGCTCATAGACAACAGTCAAGCCGAGAGGATCGTTGC -GTCGATTGGTGGACACCGATTGACTCCGCAAAGGGAGTCCGATCATTGTCCGAGCCAGAG -AGGGCTTTCGAATCAGCTCCATTGGAGTCTGCGATGGATCACCAAGCTCATTGGGGTGCC -ACACTACTTCGGGGGCAGCAGATGAGGGAGAAGGAGTCACATGCTGGCCTTGTGCGGCTG -TCTCGTCAGGTGGAGGTGAAGTCTTCGAGTTCTTCCTGGGAAACTATAGACAGGTTAAAA -TCATAAGTCGGAAATAATCTTTTTCTATTATATTTTAACCTACGAGTTTTTCCAAAGGGT -TGTTCGACATTTGTGGCAAAGATCAAACGTTGGCAAAGGTTCAAACGTTGCATCTCTCTT -TTGGTCTTGTCTTTGGGATGAAAAGTATTGCCGGGGACTAGACTGGGGGGCAGTGACTTT -GGGAACCGCCGGGCAGCTTGCGGCTCAATCAGTCTGCAGGTAGACAAAGGAGTACTTCGT -ACTGTGTACCCCGTATTTCAAGTATCCTTGCTCATTTCTCTCCAAATTAGTCTACGCACA -GGCTGTGGCGGCTGATTATGCACAAGGGTAGGAGCCGTCACACCCGTGGACCACAGCATC -CAATCCTCGGTATTCCCGCCGCCCATAATATTTATGAAATTTACCTCGTTGGGCGCTTTG -ATTTATTCTTTTTACATTGGTTTGAAAGTCATTTCAATACTACGTGGCATGGTTCTGTCC -ACACGTCCAGGATCGAGGCCGCTGGGGCCAGTTCAGGGGCTCTGCTTAACTTACCGTTTT -CTCGTACGGGGTATATCATCAGGCCGGGTTATCTGATTTGCTTTTAGAGGAGCTTTTCTT -CACATGGCTCCTACATGAACAGATTCAGCCTAGTTTTACTATCATAACCAAGATGTCGAC -AAGTGAAGCCATTGTTCATGAATGGACTCCCCCGACGGTTTCGCTGGGAAGCGTGCCTTT -GGTCACGCCCGCTCGAAGCGGCGGCCAAGATATTGAGGGCGACTTTCCGCTGGATCAGAT -AGTAGTTGACCGTGAATTCTCCCACTGTTTTCTAGGAAATACCTCTTAACCTGACTTCTC -AATTACTTTAGACTTCCCCCCAGGCACCCAAGTCCACTCCTGTGACCGCTATGGAGCTTC -GGCCTGGACAGTTACTGCCAGAATCATAACCACCCTGGCGAATGGGGATCCCAAACTCTA -TTTCCTCAAGGTATGGGAGCATCTGATAGCATAAAATGCAAATTGACTGAGGTTTTCGTC -CACAGTGTGCTGAAGAAGATCAAGGAAGAGCCATGTTGGAAGGAGAGTTCAATTCCATGG -TTGAACTGTACAAAACAAGTCCCAAATTTGTCCCAGAGCCTTACGCATGGGGCCAACTTA -GTGTCTCCCGCCCTGCCACGTATTACTTCTTATGTGACTTCATCGAGATGGACAACAATG -ACCCAGATCCCGTCCAACTCGCCACCAAACTCGTACAGTTGCATAAGACAAGTATATCTC -CTACTGGCATGTTTGGCTTTCACATCGATACGTGCCAGGGCAGTCTCCCTCAGCGGACGG -CGTGGAACCCAAGTTGGGTCGACTTCTATATCCAGCTAGTCAAGGGGGCAATGGCGCTCA -ATACGGAAAAGAACGGCAATTGGAAAAACCTGGAGCAGCTTGTCGATCGGCTCATAACGC -ATGTAGTCCCTCAAGTGTTGGGACCCTTGGAAGCCGATGGTCGGGTTGTAAAGCCAACTT -TGATCCACGGCGACCTATGGGATGGAAATATTGGAACTAGCCTCGAAGATGGGGAGCTTT -ACATCTTTGACGCAAGTGTTTACTATGCACATAACGAGATGGAGATTGCTATGTGGCGCG -CAAGATTCTGCAAGACGCTGAGCGCGAAGGTCTACCTCAATTCCTATCTCAACCGAATGG -GCATCAGTAAGCCCGTCGAGCAATTTGAGGACCGTAATCGGATCTACAGTTTTTACATGA -CCCTACATGAATCAGCTTGCCATAATGGCTCGAGCTTCCGGGAAGAGTAAGTGGTATTTT -CAGAAAACTGTTCCCAAGTTCTTACTGATTGAATGGCATGACAGGTGCTATGAGAACCTC -AACTACCTGGTCAACAAATATGCTCCACTTCAGGAAGGAAGTAGTTGAATTGGCACCATT -GAAGTTCCAATAGGTATCATCTATTATTACATGCATAGATCCAAAGCAATCTCAGATTTG -TGGCCCTATACCTGGCTAAGTGCTCTCTATGCCAACCTAAGTGCAGATAGTTCACTCGTG -AGTGTGCATATAAATGAATGACCAATAGAGACTTGAAATCTCGAAAATCCTTTCAATTAG -ATATAAATACCGGGAGGGCTTTCGTAAAGCGTGGAGGCAATGCCATTACTTGTAATGAAA -GGGCACATACGTAAATCCGCATATCCAGTCGAACAAATCACGGTGAGCGCGTATATATGA -TATAATCCCTGCTTTTTTATACCCAAATGATTCCTATTTTACCCCTCACGGTACCTAATA -CTCGGCCCTAAGCCTCATCCACGTAGTGCATTTGTATTTAAAGCTAGTGTTATTTATTCA -GGGCTCAGGCCTACCTAGCCATCTTTCACCCCAGTCTTGTACGGGGTTCCACCAAAGCCT -CACTATTATATACTCCTCTAATAAGAATCTGTTTACCTCTGGTGCAGATGTCTACCATGG -GTAATGAAGAAAACAAACCAGGGCATACTAGAGCTTCAACCGAAAAGGTTCCTCAGCAAT -GCTATCCTTATACTCCTGCAAGCATTTCCCGACATAACTCTCAAGTCTCGCTCCCGCCAA -AAGAGGCGAACTCGGAGCCTCAAGATGAAGCATATTCCATCTTCAGCAAAAGAGAAAAGC -ATTTAGCGGTTTTTCTTATCACATTCGCTGCAACGTTCTCTCCCTTATCCTCGTTCATAT -TCTTTCCCGCCATCAACGCCCTCTCCGCGTCCCTCAATGTTTCAGTGGAAAAGATAAATC -TCACTGTCACGTCCTACATGATAGTTGCTGGGGTTGCACCGGCAATCATTGGTGATCTGG -CTGATATGACGGGTAGACGTAACGTTTACCTGCTTACTTTGCTCATTTATGCTGTTGCCA -ATATAGGATTGGCCGTGCAAAATAGTTGGATAGCGCTTTTCTTGTTGAGGATGATGCAGA -GTGCTGGCGGCTCTGGTATGTTGACTTGGAATCCAGTCCTTTGCCCTATAAAGGTGATTC -ATGTAGATAGTATCAAGTTTGCTAACTAAGAATAAGCAACTATTGCGATGGGGTACGGGG -TGATCTCTGATATTGCTTCACCATCTGAGCGTGGAGGTTATGTTGGCATGGTGCTTTTAG -GGTAAGCCCACGTACTCACTCTCTCGCCACCAGTCATCAAAGTCACACCATAACTCTGCT -GTTTGGTAGCCCCAATATTGCAACTGCAATTGGCCCAATCCTCGGAGGAGCTTTAAGTGA -AGCTCCCGGATGGCGTTGGATATTCTGGGTACCCGCTATAGCCTCGGGGATCTGTATCTT -GCTAGTCGCTCTGTTTCTACCCGAGACAGCCCGTTCTATTGTCGGGAATGGTTCGCGAAA -TGTTTCAGCTCTTTATCGCCCAATTTTCGGCTGCCAGCGGTCGATAAAGCCCACCTCCTT -GCCAGCCCTTCACGAGAGGGAGAACACTCCACGGAATTTCCGGATTCCGAACCCTCTCAC -AACATTGAAGATCCTCACATCCAGGGAAAGTCTTCTCATCACTGCTATATATGGTATCTA -TTATATGAACTTCAGTTGTCTTCAAGGCTCTTTGTCAACACTTTTTATCGACATCTACCA -CTTCTCGGAGCTTAAGGCAGGATTGATTTACCTGCCCTTTGGTATCGGGTCCTGCATTGG -CGCATACTGTTCAGGTAATGTTCTTCGTTTCCCTTGATTGGAAGAAAAGGTCGCTGAACT -ACTATTTGGCTAGGAAAAATCATGAACCGTGACTACCGCCTCACAGCTCGCGCCCATGAT -CTGGTAATAGATACTACTCATGGCGATGATCTGACCACATTTCCCATTGAAAAGGCCCGG -TTCAGGAGCATATGGTACTCCATATGCGCGACAGGGATCTCCACCACTGGATACGGATGG -GCATTGCAATCCAGATCTGTGAGTAACCTCAACTGTGCAGAATATCAAACGTGATAGATG -CTCATATAGCCCTTCTACACATAGCATATAGCAGTTCCCCTCATCCTACAGTTCATAATA -GGCTTCGCCATAGCGATCACTTTCAATGCAAGTGTCAGCCATTACACATATGTTGGCCCT -TCTTAGGAGCTTTACTAAACGTTATTGTTATAAACTAGTGTTGCGGTACTCTCCTAACAG -ACCTGAACCCCAAGTCGCCAGCTGCAGCGCAGGCAGCAAACAATATCGTCCGTTGCTCAC -TTGCAGGTGCAGGACTTGCCTTCCTCCAAAGTTGTCTGGATGCAATGGGACCAGGATGGA -CGTTCACCTTATTTGGTGGACTATGTATGGCCTGTCTTGGCGTGGCATGGTTGGAGTGGA -AGTATGGCAAGATGTGGAGGCAGCAATAGTCCAAGGATAGAAGTATTTGTTTAACAAAAA -CATAGCGAAAATTACCATACAGTTTAGTTCATTCTAGTCAGGATATTTGAAGCCAAGCCC -CGTCTTTTCTTCCCCCGCATGAGCCCGAGCCTGGTCAGCTGTTCGAGACTGTCAACTTCA -TGCATGTCTGCGGCTTTGCATACACTTCTGTCCTATACTCTGTATTCCCTACATATATAG -AGACTCGCCCGGATACGGAGCACACATAAGCCACTGGGCCTAAGCGGGCCGATTTAATAC -CGCTTGTTGTAATTTTAATTCGAGGGGAAGAGGCGATTCCTTTGTGATTTCTTCCATAGG -GCTCAGGAGAATAAGGATTGAATCGAAGACTATCTTTCCATACCACCTTTATACAACAGT -ATCCGTTCTTGAAATTTCTATGTAATGATACAAGTGTAACCACATTCATGGATGAATTAG -AAGTGCGAGCGCTCCGCACACACCCTTCAACGATACTCCCGCCAAGATCCCAAAGATGAC -ACCTTCAATGTAGGTAGGTCTATGTGCAGATGCCAAGCTCTCCACTGCCCCATACTGTGT -TAGTGTACTCTATAGTCACAACCCGGGTGATTGTCCTCAATAATATCTCAACCCGCGGAG -CCCCATCTCCTGTTGTATAACACGATGTCACCCTCACCCTGGCACGCGCATGTCAAATAG -CCAACAAGTTAGTGATGCGGTACTTGGGTGGCCAGTCATTCTCAATATTTCGTGTATGGT -TTCCTTTGTCTGCGGCAATTTATAATCATATGCATCGTGTTCTTCTGTCTCTTTATCCTT -CGAAAAGTCTCTAGACACTACAGAAATGGCCTACGGGGAAACTCATTGCACTAATCAAAA -TTCGCATTTAATATAACAGATTTTTTTTATCTCAAACTAAGCATTTAGAACAATACGACC -ACATGTTTGGCGAAACTAAGCTGAGGGAATTTTCACATATTCTCCAGATCAAACCGCATG -TACAACTTATGCACGTCAGCGTCAATGTGTAAAGTAATGATTCTCGCAATTCGCTTTCTC -CCTTGACCCCACGCACACAGAACAAATGCTGCAACAAGTTTGCCTATACCGATACCTAGC -AGAGCATGGAACTCAGACGAGGAAGGCTCCCAGGTCTGCTCTGTGCTGGACGGATAACGT -ACTCCTTCGCGGGCTTTGTATATCTCTCTAACACACAATAGGGTGTTCTTTTCGTTGACA -TCGGAAACAAATACATTTTCCAAGCTATCCAAGGGGAAATCCAAATGGTAAGCTGCCTTG -GTAAACTCGGATATGTAGGGATCGTTGCTTCTCTTAGACCTAAATATATCATCAATAAAA -ATCACTCCCGGCCCGGTGCGTCCAATCCACCACACTGGGGGACCTTCGTCACGCTGCGTG -ATGCTGATCTTTCGATAGGTGGAATTCGCATTTTTCATGCTAAGGGCAGTTACTAACTTC -GCATCTAGCTTCACAGATGTGCCATCGACAATATAGTCCTGGTGTCTGGGCGTCAAAGTG -AATGTGGAAATGGGGTACTGGCAATCCGCTGCACGATTTTCAATCCATTTCTGTTGGATT -CGTCCCCTCATCACCCACGTAGCACAGCTGTCGGCCATTTTGCTCTGTGGTAGCGGCCAA -AGGTCTGATAAGATGAAGTAATAGAGAAGAGTCTGTAAAACTGCTGGCTTGTAGACTTAT -TGGTATGGTATGTTTTTTCCACGGGATTTGGTAGTGGTAAATGCTGGCTTTTATACCTCG -GAGACCGCGACGAGGCTGGGATTAAAGGATACTCGATCAATGCAATCAACCATTATTCAA -GGACAATGAAGCAAAAGAGAACAATGTGATGAAAGGATTTCAATGAAACAATGAACATTA -GAAGCCGTGAAGGCTCCCATTTGTTGAAGGAGTTTTCTTGCTAGCACACTTCAGTCCTAG -ATTGGAATTGGTACTCCGTAGATATATGTAATAATGCTCGACTATGATGATCAGTTATGC -CTCGCAGATCGTTGATAACGTTAGACCAATTAGTGGGTCTTCGGGGTTCCCAAGGTGGAT -CTGCCACGCTAGTGGACCTCATTGTCAATGTCAGATTGGATTCGAGGTTTTCAGTGACTC -ATCCGCACCTCAGAAACTTCCTAAGATATCAAAATCGAAAGCAGGTATCATTATGGCCAC -AGAAGAGGAATTTGTGACTGTGAAGGCATTCTTGCCCACAGTCCCGCTTCCCGCGAACAG -TGAGCGGCCAGCAATAATAACTGATCGCTTGCTCATCCGCGCTCTCCAACCAACCGACTT -GGAAGCGTTACATGTCCTGCGCACACAAGAAGAAGTCATGAAATGGACCAGTGCTAGGTG -CATCGATGAAAGCATTGAGAAGACCAAATCCAAACTGGATCCTTTCCTACCACCAAACGA -TCTGATCACCGCCAATTTCGCCATATGCCTGAAGGAAACGGGCGAGCTTATCGGTATCGG -TGGCTGCCATCTTTATCCAGGGAGACATGGATGGCCCGAAGTTGGATACATGCTTCGGAC -GGATGCTTGGGGGAAAGGCTTTGCGACGGAGTTCTTGACGGCCTGGCTTCGGTTTTGGTC -AGAATTACCTCGGACAGAGCGAGAAGTCAGTGTTGATAAAGATATGGTCATTGGAGAAGG -CGTGGTGGATGGACACCTTATTGCTATCACGGAAGCAAGCAATAGTGGAAGCCAGATGGT -ATTGTTGAAATGCGGGTTTGAGCGATTCCGGGATTTTACCGAGGTGGATGGTACCGAGAC -CGTAAACCTCGTTGCTTTTCGTTTCCTTACAAAGCGATAGATCTCATCCTTACATCCAAC -CTATACGAGATAGATAAGAAAGCGTTGATATCACAATTCAGAGATCACATCATCGCTGGT -GTTTATTTGCACATAAAAGAGAGCGGAGGAAATGCCGCCTTGCCCTTCAACTAAGATTCG -ATCCGTCAACCCCGGGCCCACATCCATTCTAAGGCCATTCAGGCTGGCTCTTCAACGTTG -CAGATCTTTGATATGGGAGGGGAAATGACACGTGAGCGGCCCGTCCCCGTCCCCGGTTAT -AGATCACGATCATATAAATAGAGATGACTCCATGAGCTGATGAATACAGTAGAAACGTCA -CTCGAGATACCAAATTAGGGAGAATTACAATGGAACATTTTGACGTCGCAGTTGTTGGGC -TTGGAGTGCTGGGAAGCGCCGCGGCGTACCAAGCCGCTCTAAGAGGCAAGAAAGTGATCG -CATTCGAGCAATTCGAGCTGGGTCATGTGAACGGAGCGTCTCACGATACATCGCGAATTG -TCAGGACAGCAAACGCAGCACCTCAATATGTCTCCCTAGCCCAATCCGCCTACAAGGACT -GGGCTGACCTGGAGAAGGCTACAGACCAAAAGCTGCTAACGATAACCGGTGGTGTTATGT -TTTTCCCCAAGGGTGCACACCCCCCACGGGTTGACTTCACGACAAGCCTCGCCACCAAAA -ACGTGCCATACGAGGTTCTAGATTTCAATGAGGTCAACAAGCGTTGGCCCCAGTTTAACA -TCCCGGACACCGTTGAGACGGTATATACGGCCGACACCGGTATTGCTCATGCGGCCAAGA -CAGTTCTAGCTATGCAAAGTCTAGCAAGAACAAATGGCGCAACTTTAAAGGAGCACACGC -GAGTGGACCGTGTGACGCCAAAGAAACCAGGCACGGGCGTTGTGATCGAAACATCAAACG -GACAATTCCACGCAACCAAAGTCATCCTTGCCACCGACGCATGGACCAACAAGCTCCTTG -CTCCTCTTGGCATACACATTCCATTGACAGTGATGCAGGAGCAAGTGACATACTTCAAGC -CGACGGACGAACAAGCCTGGGATCCCAGCCGTTTTCCCGTATGGATTTGGGGCGGCGATC -CCTGTTTCTATGGGTTCCCGTCTTTTGGCGAGCCGACTATCAAAGTTGCCCGCGATACTT -CGAACAATTTTATGACACCTGAGCAACGTACCTATGTACACTCGCCGCAGTTGCTAGAGC -AGCTTTGTTCGTTTATGGGCGATTTCATGCCAGACAAAGGGCGTCAACCACTTCGCACTA -TCACTTGTCAATACTCTATCACTCCGGATCGACAGTTCATTATCAGCCCATTGGAAAAGC -ACCAGGATATTATTGTTGGTCTGGGGGCTGCCCATGCGTTCAAATTTGCGCCTGCCTTCG -GTCGTGCTCTTGCAGAGTTGGCAATTGATGGGAAGAGTACGGAGGACCTTTCGAAATTCG -GCGTTCCAAAGGATGCTACTTCTACTAGCAAGCTTTGAAATATTCATGATTTATGATATT -TCAAATTCAAGCCTGAGAACTCTGGGATATCAGTGTATCCCTCAATAATGGCACATAGTA -CAATCGGAGTTTCCATCCAATTTTATTGAAGAAGCTAAGGACCATGAGAATATCGGCACA -CTCCAGGGCTAGATAGGGGACTTGGCTAAGCTATGGCATCAACTTAATCCAAAGCTCAAC -TAGGCAAGTGAGACCGTTCTCGTCTAGAAGAATGCCCGATGTCTATATAATCCGCACTAT -TAGCATATGCGCAAGCAACTATAGTATCGCAGAATGACGCACTATGTGTGTTTTCCTCAC -ATAGTCCCACGAGATTACCCAAGCGGGTTTTCATTCCAGCGCATATTTAGAACTACTCCT -CCTTTCTCCCTCTGCTCATTTCCCGTATTTTTCATATCATCTATTCTTATATATATAACG -TCTTACATATGCTATCTTTTATTGGTATTTCATCGTCAATTATATTTCTGGGATTTTGAA -ATGTGTCATACTCATTGTTGTCGTGATGGTGCGAGCTTTTGGCAGGGTAGTTCCTCCTTC -GTCCATGGCTCAAATTTGATCCTTGGGAGAATGAAGACACCATGGTTATTTTGGCGCATA -CTCTCTCGGTAGACTCCAGCTACAAGGTTTGAGACTTCCCAAAGACCTCAGAGGATATCG -TTATCTAAGATAGTATGATTAGGTAGATGGTTGGAGCCAACCGCTATCTTGAGGCTACGT -GGATCTCTTTAATGGCTTCGAAGCTTCTAGGCATACAACATTCAAGCTTTTTATTGTGCC -GCTTAGCTTGCTATTATGCAATATGGTACCCTACCGGCTGACCCCAGGTTAGAAGATGTG -GCCAGCAGGAAGCTCTTTGGCCTGGGTGCCATCATAAGTCTCATCCTACATGGATTCAGC -GACGACGATATGGACCTTGTCGAGAATTCCCAGATCATTGGGCTTGAGTAACGGCGTTGG -GAACAAAAATTTGTCTGAGAAGATGACTGGAGAGGGCGGATGCTTCGAGGCTTTCAGGCC -TGCTTCTACTACAAGATCTACGGTTTTACATGACAATCAGGCTACCATCAATGGCGAGCA -TCTCGAGATGCTTACAACTACACGACCCTAAGTGGACGAGCGTGCGGAAGACCGCAGGAA -CACGCGAGAACCAACACGTCTCCCCCTCAGCCTGCTACCATTATCAAGTTGTTTTTGGAT -CATTACCTCAGGCGAAATTAAGTAGAAAAAGAACCAAAATTGACTCAAAGTTGTTAATAC -ATATCTCAACACCAATTCCTGGAATCATCTATACAGTACAGAGTCCCGTCGTTTCAATGC -AATCGACTGGATACTTAAAGGAAATGCAAAATGCAATCAGAAGACAGACATAATACGCAT -CGTCCTACTTCTTTTGCCCCTCGCCACCAGGAACCATGCCGGTAAACGGATCAAGGCGCA -GTTCTTGTGCTGTGATAGGAGTATAAGAAGTCCGACCCTCCTGTGCCATCTTCAACCGAC -GCGCCTGACCCGCTTCACCAGCGCGACCAAGAATTCCAGCAATAAGACAAGTGGCCCGTA -GACTGTCATCGTTCGAGGGAATCGGGTAGGTAACACGCGTAGGATCCGCATCAGTATCAA -TAACACCGATAGTAGGGACATTGTTAAGACCACACTCATGCAGCAAGACAACATTTTCAA -GCGGGTTAAGACAAATAACCAAATCGGGCTTGAGCGACGGGCGATCAGCAAGATCAGACT -TGAAGTTAGGCAATTCCTCGTCCAGCGCGTTAACAACCTTCGTCTCACAGTGTCCAAGAA -TCTGCTCGCCGTTCGTCAGAGAGCCCGGAATCCACCGTTCGAAGATGTGGTAGCCCTTTG -CCAGCGACGCCGCCTTGACAACGGCCCGCTTCTGGCCCTTCCGCGTACCGGCGAAGAGGA -TTAATCCGCCGCGTGCGGCAACTTCCTCAACTACTTTAGCGGCACGGCGGAGGTGCGCGG -CTGTCACGTCCAGCGAGATGATGTGCACACCCTCGCGAATACCGAAAATGTATCGCGAGT -TCTGTGGGTTCCAGCGCGAGGTCGAGTGGCCCAGGTGGGTTCCTGCTGCGAGCAGCATTT -CCAGGGAAATTTGGGATGGCTTTGGGGGGTTGCGGATGACATCTTCGGGGTCGTAGGAGG -ATTTCACGGTTGTGCCGAGCTTTTTAAAATTGCTTCCTGGAGGAGAAGAAGAAGAAGGAT -AAAGTTAGCTGGTATGTTGGAGGAATCACCCAAAAGAAATTCGCGGCTCACTTTGCGGCT -CGTCTGATGTGAATTCTTGTTGTACGGGTGTGCTGGCCTCATGTTGCGCCTGCACCATCT -CAAAGAGGGGATTTTCTGGACGAGGGATCTCATCTCTGATTGAAGTCTGCAGTGTCTCGA -CTGGGGTTTCGGGCAATGTCGATTGGAAGCGACTAGAAACCGGGGCTCGTCGGCTCAGAG -CAAGGAGCTGGCGGCCTGTGGGATGTTTTTAGCAGGGGCAATTAAAGATGATAGCGGCAA -TTGGACTACACTCACTCTGGCGCATACAAAGCTTCCGTATAATCATTTTGGGAATGCTAA -ATTGCAGATGTTGTCTGGCAGGGAGGTTCATAGGGACTTGTCCGCTCGAACTTTTGTCGG -GCTTGGCAGTTGATGCCTGAGGCCTTACGCTATTCCGCTTGGGGTGGTTTGGTCTCTCCG -CCTGCTACACTACTTCTCACAGACGGCAGTGTCTATCATCATTATAATCTACAGTGTATT -ATTTGATTTACTGTTTTCAAAAAGGAACATTTCAGCAAGGTAATCACTTTGCAATTCTGA -AATTATTATCAGATCACATAAAAGGTGATTTCCGATATCAAGTACTGTCATGGTACAAGC -CCTTCAAGGGCAAAGAAAAAGATAAAAGTGACCTGGCGGAAGAAAAAGAAAATGCAGGCC -AACCCGTTGCGATAGAGAGCAAAGAAAGAAATTCATGAAAACCGTGTCATCAAGCAGAAA -ACGTGAGAGAAAGGTTCATCAAAGACCCTCTATTTCTTGAAGAAGCCACCGAGCTGATCC -ATCATGGAGGGGCTGCTGCCGTTGTTGGAAGGAGGACCACCATATTGGCTGTTGGGGCCA -CCGTACTGAGGAGGAGACTGGCTGTTGCTGTTGCGGTGCTTCTTGACGGCGTCTTCAGCG -ATGCTACCAATGATGGCTCCACCAATTGTTCCGAGGAAACCGTGGTTGACCTTGTGGCCA -GCGAAGCCACCGGCGGCACCACCGGCGAGGGCACCGGCAAGACCACGCTCGCCCTCCTGG -GCCTCACCGGGAGCGCCCTGCTGCTGTTCGTAACCACTGTGGTCTATCAGTATATGATGA -ATGAACTGAAAGATGAGCGAAGTACATACCCCTGGCGGTTCTGGTCGTGCTGTCCATAGC -CTTGCTGGTGTTCCTGGCCGGCATAGTAGTCGGATGAACCACCCTGTTGCTGACCGTAGC -CCTGCTGGTGTTGCTGACCGGCGTAGTAGTCGGAAGAACCACCCTGCTGCTGGCCATAAC -CTTGTTGACCGTAACCCTGCTGAGGTTGGCCGTACTGTTGCTGTTGGTCGTAACCCTGCT -GACCGTAACCTTGTTGGCCACCGTAGCCTTGTTGGCCGTAACCTTGCTGGCCATAGCCCT -GCTGGCCATAACCCTGCTGGCCGTGGCCCTGGTTGTATTGGTCGTATCCGGACATGATGT -TATATGGATTGATTTGGTAGAAGATTGATGGAAGGTGAAAGATAGGAAGGGTGAAGAGAA -GTGAAGTGAGGGGGGCAACCTTTTATAAGAGGTAACTCACAAGTGTACGGAGTAACCACA -GGGCTGACGGAAAGAACCCCACATGGCAAGCGAAAACTTACGGAACGAGGGGCGCAGTAA -GCGTCAGCCTTTCAGCCTTGTAAATCCTCTGGTATTACCATATTCTTACTAGTTTGCGAA -TCAGAACCGAATCAGAACCATGCACTCCGCCCGATTAAGTGACACCGCCACACTCCAGCA -TAGTACCTTACGTATTCAACTGTGGCTATGCCAGGCTCATGTTTGAATATTACCGGATCT -TCAAATAGATTACACGGATCCCTCGGCTTCCACAGATCTTCCAGAATATCTTTCCCCCAC -TACTGCATAGTGCCTCTCGGCATTCCCGCATTGGGGCATTTTATTTGGTTGCCTACGCTA -AGCCCCTTCATTTCCCGAATAGACCACCCGAATTTGGACTTTGCTACTCCGAACTGTGGC -GTCACCCGAATATGACTTTTGACATCAGCTTTCTAGTAAGCAGATGGAGTATCTATGTTC -TATAATATAGAGCATGAGCCCACCATTTGACATCTTGGTTAACCAATGTTCTTAAAACAA -GTCAGATTGCTTTGAGATCATAATGATGACATGTTCAAGTGATCTTCACATGATGGGCGA -ATCGGGAGGTAACAACGTATAGTGTAAACAGTGTGTATAATTTGACTTTGGGGATGTCAG -TAATATATATATATCTACTGACTGAAGAACATGATAGCCAAGATACAGGATATAATAGAT -ACATCCTGTCGGCAATGAATACTAGCAAATGAGACCCTTTTCTTGTGAACTTGACCACAG -ACCACCGGTTGATCCCGTGGACCACTCAGATGCTCTCACCTCGGGTGTGTAGGATGATCG -ATGAGGATCGACGCATGACTAGTTTAAACATTGTATTATGACATGGGACTTGATCGGATG -ATAGTAGCTTAATCCATCCCCGGGTAATGTACATCTTACGTTGTACAGCGTATGCGCTGA -CTGCGAGTTCGAATCACATCGGCAATATACTCCGAACACACGGTCTCGACCGTCGTGGGA -CCTTGATCGGCTCGAGCTGGGGATGATATAGGAGATGTGAAATATTGTTATATTGTAGTA -TCGATCAAATGTTCTGTAGTTAAGTGCATAGACCGCGTAAAATATACGATGTAGTTTAAA -TATATAAATTAACAGAGTGCAGCAGGCATGCCGACTATGCTAGACCAATCAAATCGCCGA -GCTATAGCAAATTGAACGAACGCTCACCTGTACGGAGTACGGAGTACATATGACTAGTTT -GTCAACTGCTCATCTGAGCTACAGCAGTTCACGCCTCATCAGCCCTGGCGCTCTCACTGC -CGGATGATCATTGGAATGTTACTCTGCCGATCCACTTTTTGCTTTGTTTTGAATGTGTCC -GCCCTATCCTTCTCCTCGGTGTGAGACTCCATAACAGGAGCTAAATCATGGTTATCCTGC -ACTTTTAACTCGAGCTAGGGGTACCCGATCGGCATGGGCAAGTTTTCATGGAGGTTTTGC -TTTTTTCACACTTATAATTGTCTCTAGGGATCATTCTTGGTTCTTCACTGCTTCGTTGTC -CAGATACTAGCTATTACGCGCTTACTTGTATATGACTGGGCTATAGTTAGTCCACGTGCC -AAAAGAATGCTGCTGCAATCAGTCCTCTCCGCTCTGTGCTTTTGTCTCAGCATCGCATCG -GCCAAATCACACCCAACGGTCTATATAATTCGACACGGTGAAAAGCCTCGGAATCCAAAC -GAGCATGGTCTGAGCTCAGACGGGGTTAAGCGGGCACAGTGTCTGAGGCGTGTTTTCGGT -CAAGACACGGAGTACAGCATCGGGCATATTATGGCACCTCGTGTGAAAAAGGGTAGGTTT -GCAACAAGGACCGACAGGAAACTAATGCGAGTGCAAAAGCGAGGCGCAGGGGTGTCGAAT -ATCATAGATATGGCTAACAATATGGTCCCTGCCTGTAGATGGAGCGCATGGACGTTCGTT -TGAGACCGTTCTGCCCCTTGCCAACGATCTCGGTCTCGCAGTTGACACACACTGCAAGCG -AAACCAGGTGAAATGCGTCGCTAAGGCGATCAGGTCATATGACGGACCGGGCAACATTTT -GATTGCATGGCGGCACTCGAAAATGGGTAAGATTGAGGAGGAGCTTGGTGCTCTCGAGTC -TATTGAATATCCGGATGAGCGGTAAGTTATGGTATTTTCATGTATCTTTGGCTCATGTGT -ATGGGGTTTGCAGTAAGAGAAACATATCTGTGGGAGACGGTTGTCTCATGGCATCTTCGA -CTCGCCATTCTCTTCGACAGAAGCTCCTTTAATTGCCTTGTGTTATGTTTCGGTGCTGAC -TGTCTCAGATTCGATCTCATTTGGTCAGATCCTTGGCCATATGGGAATGTGACGAGTATC -GAAAGCGAGAAATGCCCGGGACTAGATGTTAGGACTGCTCTCGTGGATCAGTTTTGATAC -ACGGAGTAGAGATGGAAAGCATATGCACTTTCTTTGTCATTAAGCGTATATTCATGAATT -GAAATATAGTAATACATATGATTAAGAAATCAATAGAATAGAAAGGTGGAATAGAGCACA -GTCAAGGGAATGAATCGAAAGTACAAGGTCAACGGAACAATCAACCGACCTCCAGGTAGC -CTCTCTATCAACGCACAGACCAACACAGCAAAATTGTTTTGTGAAGGCGGTGAACCAGCC -GAAGCGGAGACAATTGTAGCGCCAAGATTTCCTCCCCCCAAATTTTTCGTTGCGCTTGAT -TATCGAGCTTTCCCCTCCAGATTACAATTTCTGTTTACGGGAAAAGATCATGGATGATAT -CGAGGTTTGTGCACTCGGAAAATGCCCCGCAAGGATTGCGAGCTGACTGCCCCTAAGGAA -CGGCTGCGCAGCCATGCGCAGGCATTTGACGGTCTACTGTCTTTGATCCCCGCCAAATAC -TATTATGGTGAAGAAGATACAAGTGTATGTAGACCTTGTTGCATTTGATAAGCTGGCCAC -GTCGGAGTTGGGGTCAATATTGTGTTTTTCCCATCATCGTTCATTCTGAAGATCTGTCTG -ACCATCACCTTCATCTTTAGGATCAATGGCAACGCAAAAAGCAAACCAAAGAGCAAGCAC -GTGAGGCGAAGCGGGCGAAGCTGAACCCCGAAGCCGCTAAGACCGCCAAAGATGTGATGG -ATGAAAACGCACGCAAGAGAAAGCGGGAGGACGGCACCCTCGAGTCCGACTCGTCTGACG -GAGAGTTGGGCACAGAGACCCCTAAGGAAGGACTGCAACGTGGAACCGCCAACGTCAAGA -AGCAAAAGCAGGTCGAAAAGACTGATAAGCCAGACGCTGCTAAGACCGCAGAGGCTGAGG -CACGGAAAAAGCAAAAGGAAGAGAAGAAAGCCCAGAAGAAGGCCGCCCAAATGGAGAAAA -AGAAAGCCAAGGATGCTGCCCGGAAGGAAAAGGTTCAGCAGGCCAAAAAGCCATCGACTA -CCCCAACTGAAGATTCGGAAAAATCTGCATCCAAGCCTAACGGCATTGCAACCAAGGACA -CTGAAGTGTCCGAGGATGAAGAGGATGACGACCAAGAAGATGGCGGTGTTGTTGAGGAAG -GCTTCTCCGTTGAGTTCAATATTGAACCGGAAACCCCTTCTTCTGCCCCATCTACCACTG -ACTCCCCTAGCCTCGATGCCTCAAATCCTCAATCGAGCACCTCCTCTACTTCCTCCATCG -TTCCCCCCTCCGCCTCCACCGAAGCCAAATCCTCAGAGCCCAAACCTCTCAAGCACACAC -CCGAAGAGCTGAAGCAACGTCTACAGAAGCGCCTGGATGAGCTCCGGGCTAAGCGCCATG -CAGATGGACTCAATGGCAAGCCTGCACGTAACCGCCAAGAGCTGATTGAAGCCCGCCGGC -AAAAGGCCGATCAAAGAAAAGCACACAAGAAAGAACTGCGTGAAAAGGCCAAGATTGAGG -AAGAAAAGAAGAACGACGAGGCCATGGCCCGCCGCTTCTCGCCCGGTGGCTCTGGATCTC -TCCTGGCATCCCCCCGCTCCCCTGCCGAGTCTGTCGGCTCTGCCAGCAACAGCTTCTCTT -TTGGCCGTGTGATGTTCGCAGACGGCCAGCAGACAGATGCCACTGGAACCAACGTGCGCG -ACAAGCCGAAGACCAGCGGTGCTCGTGATCCGGGTGCCCAGCTCAAGGCCGCCGAGGCCA -AGAAAGCCCGCCTCGAAGAGATGGACCCTACCAAGCGTGCCGATATTGAAGAGAAGGACA -TGTGGCTGAATGCCAAGAAGCGTGCTCACGGCGAGCGTGTCCGTGACGATACCTCTCTGT -TGAAGAAGGCTCTCAAGCGCAAAGAGACAGCGAAGAAGAGATCCGAGCGTGAATGGGGAG -ACCGCATGGACGCCGTCGCCAAATTCAGGGGAGAACGCCAGAACAAGCGCGAAGAGAACC -TCCGCAAGCGCCGCGAGGAGAAGGGCACCAAGGGCGGCAAGAAGAAGTCCAGCAGCGGTG -GAAAGAAGAAGGCTGCTCGTCCTGGATTTGAGGGCAGCTTCAAGGGTGGTGGCAAGAAGA -AATAAGCAAGCTTCCATCTGGTGGCTTAATTTCATTTCATTTCTCACTTGCTTTTACATC -AACCTATCTTCTAGAGTCTTCTTTTTCTTTTCTCTGCATACCCTTCAAACGTGTCTTTGT -CTCTTGGATGGTCACATCGCTGAAACAAGAAAGCACACGGAAGCCCCCGAACCAATTAAA -AAAAGTGTTTTGAGACTTTGACTGGGAAATATCCGTCCGGAGATCAAGGTGCCTGGTGTT -TGCGTTTGTGTCTTTGGCAGCTTGGTCTGTCAATTTATGTCTATGCATGACAGCATCGGT -AGCGATTTGATTTATCTGTGAATACAATATCATTTTCTTATCGAGCTGTATACATGTTTC -CTGTGTCTTCATGTCTATGTAACCCTCTAGATCATATGTAATATATGTATGCGGTTAGCG -GAAAAGTGTTTCGCGCAGATATAAGATGGAATTTATGGAATTTATATATTTTGAATTGAA -TCTCAACCAGAAGAAAAGAGCTGCTAGATTCCATAACATAGGAAATAAAAAGAGACAAGG -GAGCCAGTGTTCCAGGAAAAAGGCATCTTCTTCAGGAATTGACTAGTACAAGATGATAAC -ACAATCGAGGGCAAGTTATGTACAGTGATGTGATGGGGCTTAAAATCTTTCTTGTTTTTC -AAGGCACTAGTAGTGTGCCTCTTGAATGGGTGGGTATTGTGACCAGCATCTTCGTGTTCA -CACATCTAAAATATGAAATAGATGTGGATTTGCCTGAAGATCTCAATCAGATTGGAGGAA -AATAGATTATCGGTAAGGAGCATCTGCGCGAGCAGCAGCAGCAATTTCAGGCTCCCACTT -GTCACCGTTGGAGAGAAGCTTCTCCTTGTTGTACAGAAGCTCCTCGCGGGTCTCAGTGGC -ATACTCGGCGTTGGAAGCTGAAAGACAAAGTATTAGTAATCCAGTCTGAATGTCAATTGA -GATGTGACCGGAGGATGTTGGACTCACTCTCAACAATGGCATCCACAGGGCAGCTCTCCT -GGCAGTAGCCACAGTAAATGCACTTGGTCATATCAATATCGTAACGGGTCGTCCGGCGGC -TTCCGTCCTCACGCTCTTCGGCTTCAATGGTAATAGCCTGGGCCGGGCAGATGGCCTCAC -AAAGCTTGCAAGCAATGCAGCGCTCCTCACCAGTGGGATAGCGACGGAGAGCGTGTTCGC -CACGGAAACGGGGGGAGATAGGTCCCTTCTCGAAAGGGTAGAAGATGGTGTAACTAGAAC -ATGGCAAGACAATTAGATGATGCAATTCGAAGCTATTAATTCTGGACTGCAATCCGCATC -GCCGGCAAGGTGTGGGACAAGGACTTACGGTGGTCTGAAAAACTGCTCGAGCACCACGTA -CATGCCGCGGAAGATCTCAGCCATGAGGAAGTATTTGCTGGCCTTGTCTAAGGCGGACTC -GGAGTCCTGATCCCAGCGCTTGGGCGGGGGCAGGCGGAACCCGGCCTGAGGGGGTCCCGC -TGGACTTGCTGCGCGGTAGGTCGAGGAGGAGAAGTTTGCGGTTGAGCCCAGCAAACCGAA -GGGGGTCGGCCGACGGTAGGCGATCAGCCTCGCGACCGAGCGGGTGGCGGCCATGGTTTT -GGAGAGGAGAGTGAGTTGGAGGGATGGGGAGTCGAGGTTCAATTGGAGGTGGTAGGGGAG -CAAGTCTTAGGTTGTGGTGGTTTGCAGGTTTCGGGCTTAGGTCATTCCTTTCTGTCGGGC -GTTGATTGGCCCGCAGGATTTTGCTGGGCGCGACCTCACATCTTTCCTCTTCTGCGGCAG -CCGTCTTCCTCCAATCTTTACCCTTTTACTCTCCACCTTTTCTTTTTAATACTTCAGATC -TTTTCAGCCTTTGCGCTAGTCCGATCAATCTATTTCATCATGTTCCGCTCCGCCATTGCT -CGTTCCTTGAAGGCTTCCGCCCCCCGTGTCATCAAGACCCCGGCTCCCTTCCACATCCGC -AGCTCCCCCATTGCTGCTCAGGTCCCTCAATTCACTCCCTGCTTCGCTCAGGGTATCCGC -CTCTACTCCGCTCCTGCCGGTCTGAGCAAGAACGAGGCCGAGGGCCGGATAGTCAACCTG -CTCAAGAACTTCGACAAGGTGCGTCATGTTGTTTCCCCGGGGAATCTCGCGATCTATCGG -CTATAGACGAAAATCAAAATGCTAACTGGATCTGTCTAATGTTTAGGTCTCCGATGCCAG -CAAGGTCAGTAATCATCAGTCGGATGGACCGTTATGACTATTCTATCGCTATACGCTATA -GGAGGGGCGACCGAAAACATTCATCAACTAAACCTGTTTGACCTACAGATTAACGGCGCT -TCCCACTTCTCCAACGACCTCGGTCTGGACAGCCTGGACACCGTTGAGGTTGTGATGGCC -ATTGAAGAGGTACGCGCCCGGTTTCACTCGGAAAGCACCAACTGTATATTCTATGAAGGA -TATCTGACTGTTTCAACAGGAGTTCAGCATCGAGATCCCCGACAAGGAGGCCGACCAGAT -CCACAGTGGTATGTTAGAACTTGGTGCACTTGTATTTCATCATCAGAGCTAACATGAATT -TTACGCATAGTTGACAAGGCTGTCGAGTACATCATGGCCCAGCCCGATGGTATTTGAAAC -CCGTAACACCTCTTGGCCCTTGATCTTTGCTGACTTCTTCATAGCCCACTAAATATTTAA -TTTGAAAAGTTGGGATCGGTTGGAGAATCGGATAGAAAAAGAATCAGGATGTTGGCGTCT -GTGGGGTTCGCTGGACACAGTGACGCTCAACTGACATGCTATTTTGTATGCTGAATACAT -GCTAGTGGCTTTTCTTTCCTATATCGTGTATATCATTTGAGTGCAATTCATGCAATTTTG -AAACAGTATTGACAGTATTGCTACTACATGGTGATAATCACCGAATGCGTCTGGCTTTTG -ATATATGGAATTTTAAGTGTATGTTCCCAGCTTAACGGTCCATTGGGCTACCTTCGAAAG -ATACATACACATACATGTATTCAGCTATATAGTTACCTTTATCATATGATCGTAGTCACG -TAGACATGCATGCAGAGATAGCTTCGCGAATGTCAGTTTCACAGTGAACACAAACAAAAC -ACAAAGTGCATAAGCATATTGAAATGGAGTGAAGATCTCGTAAGTATATGCCAAGTAATA -TAGATTGCCAAAATAGATAAAAAAAATTAGAGCTAGAAAATAGTACCATAACTGGGAGAT -TACCATCCCGATGTCCTTTGACGGGCGTCTACGCACCCGCAAATACTCAAAGAGCCACCT -CACCGAATGAACTTGCTTTGACCTTCACACTTTCCACAACTCCATTGCAGTCGCCTATTC -CTCTTGTATTGAGGCGGGTTGAATTGAATATAACCGACACTTTATCGTTTTGGCTAGCCC -ACTCGGTCCCTGGTTTCCCCTGACACCGAAAGTCCCACTTCAACCACAGACTCTCGGTCT -TCACGGTCCCTCTCAACATCGCACCATCCAGACCATTCAAACCACCGGCATATAGATCGG -AAGCATTTGGTCATACCGGGCCATATCTCTCTCTGTTCCTTTTTGATACCCCACCCCACC -CGATCTCATGGCCTCAGCAGCCGGGGGCTTGACTCGCCGCAGAGGCGCCGGTCGAGTCGC -TGGGGCAGACGAACAAGATGACAGCCGAGTGTCATCTCCAATCTCCCGAAATGGCTCGGC -GATGGATACCCGTGGGCCTGAGACGTCGTTCACCAATGCAGAGAATGGCCACAAGATCGC -GTTCGATCCCCGCGACATCAGCGAAAACGAGGAACGGGGCAAGCAGCCCAAATTGACTCT -GATGGAGGATATTTTGCTGCTGGGTCTGAAGGATAAACAGGTTAGTCTTCCTGGATAAAT -CTCATGTCCGATTGTTCGATTTGTGGTTCTGATCGCTGACCTGCCTGCGCCGCAACAGGG -CTATCTGTCTTTCTGGAACGAAAACATCTCCTATGCCCTGCGAGGGTGTATCGTCATTGA -ACTCGCGCTCCGCGGCCGTATAAGCATGCAAAAGGATTCCTCGCGGCGACGTTTCCCCCT -GGCCGATCGCATCATTGAGGTCATCGACGATACACTGACTGGAGAGGTCTTGCTGGACGA -GACACTGAAGATGATGAAATCAAGTGAGAAGATGAGCGTCAACTCATGGATTGATTTGCT -GAGTGGTGAGATGAACCTGAAGCAGAAATTTGGGTCGCGACGTATTTACTGACCGGGCGG -GTTGTAGGCGAAACCTGGAATTTGATGAAGATCGGATATCAATTGAAGCAGGTGCGGGAG -CGACTGGCCAAGGGTCTTGTAGACAAGGGTATTCTACGAACCGAAAAGCGCAATTTCCTT -CTCTTCGATATGGCGACGCATCCCGTGGCCGACGGCGGCGCGAAAGAAGACCTCCACCGA -CGGGTACGCAACATCTGCAGCAATCGCACTGTGATCATTCCTCCTAGCCCGTGGCTCCCC -GAAGACGCTGAATTCCGATACCTTCGTTCAATCACCATGGTCTGTGCCGCATACGCTGCT -AATGTCCTTGAGAACGCCTTGGTCACCATGAGCCATGAGGCCCGCGAGAGAGCTTTCGCA -CAGGTGGATGAATTGCTGGCCGAGTATTCGCAATGGCCGTTTGCCCGGAAAACTGGAGGT -TCGCAGGCGATTGGAGCTAATTTGGCTCAAGTGGTCAATGAAGAGATCAATCGAAACAAG -GACAAAGAGCTTCAGATGGAGGTAAGCTGCACTCCCCCCGGTTAACCACTTCATATGCTA -ATTCAATAGATTGTCGCCGCATGCTTGAGCGTTTTTACCAGACTTGATTCTTTACTTTAA -CGGAGGTACCGCGTAATACATTGGTTTTTGTTCACGACTtcttctttctcgccttctctc -accttctctctctctctctTGCCTTGCACCTCGGCCACAAATCATCGCGTTTCCTCTGTT -CATGTAGTAATCTGGGGACAGAGCGTCGCTTTCTTTTTTTTTATCATATGGGATTGTCGG -GTCAGAACTTATTCTTATATGGGATTTCTCATCGACTTTGATTTACTTCACCCTTTGTTT -TATTTTCTTCTTTTCTTGAACTTGTTGCATTGCGCAAATGGTGGCCTACTGAGTAAGTGA -TCGTGGCACAATTCTTTCAAATTGATACACCCTAGGTTGCTCTCCTCACTTTTGATATCT -ATTAAAATGCATAAACATGACAATCTAAGTACTGGCGACACTATTAACCCTAACAACCAC -CATTCCACCACGTCGACTCCCCGCCGCCCCAGCAGCTCCAGAAGCCAAAAGATCTGTCGC -ACGCGCAAGATCAACAACAGTAGTCTCCCTCCCGCAGACCAGATTCTCCTCCACCAACTC -CGCAATTCGCTCCATAGCCAACCCATCAGGATTCACAGGCACAAAACGGCTCTTAATACC -ACGCGTCTTCTCACCTAGCACATCATTCTCAGGAACAAGCGGAGACTGCTGCGCAACCTG -GCCATCCACAACAGTCAGTACAGCCCCGTAGTCCTTGACGACACCGGCAGCATGGACAGC -TCGGAATACCTCGCCGCCGGCACAGTCGAGCACGATATCAACCGGCTGCCAGCGGCGCGC -ATGGAACACCTTGTCGATATTACATTCGTCTGGGGATGGGAGGTGCGGGATGATGACTAT -CTCGTCCACTTCCCAGTCCTTCTCAATGATATCAACCTCGACTTGCGTGCAGGTTACACA -GATCCACGGCCGGACGGCGGTAGGAAAGAGCGAATCTGCTCGCAGAATCTGCACTGCGAC -GCGGCCAATGTCGTTGTTGCGAGCGTTAGTGATGAGGACTCGGATTTTGGGTATTCTTTG -CTTCCCGTTCCCGTTAGAATCTGTGCCGCCAATTATGCTTCCTATCAGGCTGCTACGCTG -TCCAGGGCTAGTGTTACCGCGTACTAAATTCACCAGGCTGGCGCGGCGGCTATTGTTATT -GGTATCGGCGTTAGCATTAGGATCGACTGTGCTTGCACTGCGGGTGGATGGAGCGTTGAT -GTTGCCTTTGATCAGACTGATCAAACTGTTGCGACGGACAGTGGCCTTGGGATTGGGGTC -AACAGTGCTAGCAGCGCGGGGAACGGCTTTGCCACTGGGATTACCGCTGATGAGACTGAT -TAGGCTTTCGCGGCGCACATTCTTAGGATTGGCAGGGGCATTGACTGCTCTGGCAGCGGG -GGGGATGTTGTCAATGTCACCACTATTGGTACCGGCAATGCCACTGGTATTGTCGTTGGG -TGCGTCGCCGCGGCGAGTCCACTGCCAAACCCAACGCCCGTTCCCAGTTCCGTTGCCCGC -ACCACTGATAGCATCATTTAGAAGACCACCGTTCTCGCCTCCAGCACCGCCCCCAGCACC -AACGACCGTGCTTTTGCGGTGCCCATAGCCATAATCCTTGCTGCCGACCACGGTATCACG -GCGTTGGGGCTGCCAAAGCCACCGCCCCATCTCATTTCCAAAAGCATCATCACCCTCGTT -TATCCCGCCAGGAACGTCGGGATCAAGCCCCGCATAGCGAAAAAGCGCCTGCCAAGCCGT -CAATGCCGGCAACGCAAGAGCTGCAGCCCCAGCAACAGTGATATTGTCTGGTTTCAGCGC -TAGCTCGCGCTCAGTCGCGACTGTATAGTCTGCCGCTCCGCCGTCACGCGTATAACTAAC -CACACCGAAGACTAGGTCGCCAATCTTGAACTTCGGGCCTTCTTCATGCTCGTCGTCTTC -ACGCGGCGTGCTGATGACTGTCCCGCAGAGACTGTGGAGTGGGATCTGCGGTGCGGCCTT -TGGGGGATTGAGGAGCTCAGCTAGACGGATCTCGTTGTGACAGAAGGCTGCTGTGGCGAT -CTTCAGCAGGTATTGCTGAGGAGAGGGCTGTGGGGTTGGAAAGCCTGTGTCGAACACGAG -GTCTGTTCCCTTCTTGCGAATAACGCTTGTGTTTGTGTCGGGTGACATAGCGTCACTGAT -TTCTCCTTCTGGGATGCCGTCTGGTGCTTCGTTTTCTAACACACCTGGGGTGCAATAGAG -GGCTCGCATCGATAGTGGTGTGAAAGGTGAGTCGTTCAGTGTTGACGAGGTAGGTTTTGC -AGCATGTGAAGTGGGCTTCATAGCCTCTGAGGTGCGCTTTGTTGCTTCTGATGTTCGCTT -TGAGGGCTCAGACGGCTGCTGCATGGCTTCTGCGATGCGTATTGAGGGCTCCGATGGACG -CTTCGAGGGCGCGTGTTCCACGATGGGTTGCGTGGGAGGTATGTCTGACTGCATACTGTC -GTTCGAATCTTGATCCATCTTTTGAGTTTAGAAATTTGAGTCCAATGAGTAGTAAGAAGT -CTGGGGCCTCAGAAGTAGAGTATTCTCTTTTGCCATCTCACTTTACTATATATCGGAGGC -CATTTCCTAATCTTGACGTATGTAGGGCTTAAGTCAACATCGCCACTTGACAATCCCGAT -CAATTCCATTGCAATGAACAGCAATTTGATCTTATATCTCTATTTGACTGGATTTGACAC -GATAATTAGTATATAGAGTCTTTTATACACGGGAGATAGCATGGTGAATTATCAAAATCA -AGGCCCTATAGTCACGGGTCTTATAGACTGTGCCTTGCCCTGGCCATTCAAGGTACGAGA -TAACTTAATCTAGCGGGTTGGTAATTGTGTATTCCTTATCCCCCAAACTGCTTGCCTTTG -ATGTATTTTTGCCTTCCGTATCGCTTAGACCCGACGTACCGTTCTGATCGCCGGTCAATC -CAGGTGACGACTATTTTTTATTTATCCTTATATTCACTCTCACACCAACAATTCGGAAAG -GCAAAAGAATTCGAGAGAATATTATTCACAAAATAAACAATCCAACTACATACGTATGTA -CAGATTAATACATCTATAAAACTTGGTCCTCCATTCCCAGTCCATCAACAAAATTATACA -ACTCTTCCGGATCCTCAAAGACCGTCCAAATATGCGATGGTACATATTCAAACGCCTTTC -TAAACCTCTCGGGCTCATTCTTCTCTTCCATATGCGCCACATCCGTAACTTTGCGAATAA -TTATAGCATGAATCCAGTCGGGATACCGACTATACATCTCCGCATAAGCCTCCGGATCCT -TCTGCGTCGAATCCCCAATACAAAGCACCCTCCGACGTGGGAACCATCCCCGAATCTTCT -CCATCCGATCAACCTTGTACTCCTGCGTCTTCTCAGTGAATGACTTGATAAGTCCCGTGA -TATCCATCCATGAGTAATCGCGTAGCACCAGCGTACCCGGGCAGTAGTGTTCATCTAGGA -AGTCGTGCAGGAAAGGGTAAAGATTATATGGCGACGCAGAGAGATAAAACCACGCAGGAC -AAAGTTCAGCATGAATATATCTGTAAAGCTCCGGCACACCAGCTATTGGCGCAGGGATGT -CCGCGAACGTCGTGCGAATAACTCCCGTGGGATCAAGCGTCTTGGTGTGTTTGATCGTAT -CGTCAATATCCGAGACGACCAGCCAGCCCTCAGGCGCAGCAAAAGTCGTACTCATCGACA -CAAGCTTATCGCCGAAATGATGCAGTCGCGGCCGCAAAACCTCCCCATCATAGATCCCCA -CTCGGCCCATATCGACCGTCTGACTGACAATGCCGTTGCGATCGGATGGACCGAGTCCAT -GCGACTGTGCCGGCCGGGATATGGGCACCTGCAGCGTGACCGTGCGCGCCGGCGAGACCT -GGTTGAGGAATGGCTGGACGCGCTCTGCGATGCGTTGGCGTGCTTCGTCATCGCCGCCGA -CCTCACCATCCAGCCCGATATAGTCTGCGATCGCAGCGACGAGGTTGCCCACGTCTTTCC -GTCCGTCTGTTTCAAAAATGCACGCGACTACTTCAGCGTGCCAGCACAGCTGAGTCTCGT -CCGTCTCGTCATTTATCGGCTGGTACGCTGTATTGTTTAGCAACCATACGGTATGCTGGC -GTGGGTCTGCTGGGTGCGCGAGCGGGTTCTTAGGACCGAGGAAGGATGAGAAGAAATTTA -TTAAGGCTGAGCCGCGCTTTTCTTGCGCTGGCGGTGGAGATGTGGAACTTGATGCCTCAT -GAACCTCCTTTCTTGCATCAGCGACCGACAACTCGTCTTGTTTGTGGTGCGGTTTGTGGT -GCGGCATGATATTGGCGCCAAGTTAACTTCCAATCTCAAGGATGAAGCGTTTATATCACA -AAATAGTAATATAAGCAGCCCCGAGGAAATTCCCCGAAAAGCACATGTCAGGTCGCAAGG -TTTAAAGAAATAGGTGAAACTCATCAGGAGAGAGACGACGATCATATACGGAGTAGCTAG -CCCGCGTTAGTTGACGATCTAGTGTTAGTGCCATCCGGTGCCCTCTAGCCAAGAGCCTCC -ATTCCCACCTGATCTTGTCTGCCTTCGAAGGCTCTCGTATTGATGTCATATCTGGAGTGA -AGACAGATCATATCAAAGGCTGACAGGGGCAGGGCCATATGCCATATCAGTTGGTCAGGT -CAGGCGAACGGAGCATCCGTTGCGGGGGATGGCTTGGCAGTAGGGCTGAGCTTCCGAACT -TGATCTTGACGAACTGGTCTTTTTTCCCCCGGGTGATCGGACGCACAGCCTCGATGTTTT -GAAGGCCTTGGAGGCTCTGGAGTTATGACGTTGCTTTTTTCAGTCGCGTGGGAAAATGAT -CGATCTTTTCATGTAGCATTCTCTGTGATCTCATGCAGAGTTTATTTTATTTACAACTAC -ATGGTTTCTATCAAGGGCACAAAGAATGAAATCCGCATAAACATACACCCCGAGAATTTA -TGAATCACATGTACCGTGTCTATATCTAGTGACCATGTTGACTTCTACGAAATGTACTCT -ATCCAGACATTTCAAATCATTCCTGCAACGGGTATCCGTATCCAACCAACATAGCAGGCT -CATCAATGTTATGATGCTGGTAACTGCAAATAGGAAAAAAGGTCAACTGTTGGGCCAGAG -AAGTGGGTTAAAAGAACATTACCTGAGGACCCCAACGGCACCAGGGTCGAGCATCAGAGA -AAGAGGGAACTCCATAGGATATCCTAACCAGCGCTTGGTAAAGGCACGAAGCAAATGTCC -ATGTGCAACCAGCAGGATATCAGAAGGAGTCTCTCCGTGCATGTTGCCCTTGTGGAACGT -GCGAATCTCTTTAATGAGATTATCAAGACGGTCCGTCACCTGTTGCGCAGACCTATATTG -ACTAGGTTAGATATAATTGATCGTAGGTGACTGGACATTATTCCAAGAAAATTCCTTCTT -CAAAAGACTTACTCTCCATTTTCGCAGCCGTCTCTCCAGATATCCCACTCGCTCTCGGTA -TCCAGACCGTGGTCCTTTCTCAAGGCCCGGATTTCCTTAGTCAAAAGACCTTCATACAAA -CCATAGTCCCATTCGGCCAGTCTATCAGTCTCAGACACCTTTTTCGCATCATAAAGCGCT -TGCTTATCGGCATCAGAGAAAGCAATCTCAAAGGTCTGCATAGCTCGTCGACGAGGACTG -ATATAGACATGCGCAAGTTTGGCTGGGTCAATCAGTTTGCCGGATCCGACAATCATCTTT -CCGGATGCATTGACCTGGTTAACACCATCTTGGGTAAGTTCAAGCTCAGTGATACCGGTG -TACCGTCCGTTCTTGGACCATTCCGTCTGTCCTAGAATCTATTTAGACCACAGAAAGAAA -ATATTGGCATTCTGGAACATACCGTGTCGGTAGAGGAAAACACGGGGTGTGCCAGAGTCT -TTATCGGGCATTTTGGACTACTGTTCAAGGGTATTCCAAGGGTGGTGGTTGAAAGTGGGT -TGCGGGATTGTGGGAAAAGTGGGCGGCGTTTCCGCCCCGGTGAAGGTGAGCTTCACCCGC -CACGTGTACGAACTCAACTTGGATAGAACATTAACCATTTATACTCAATTCCAAGGGGGC -TAGAGAGTATTGCTGAGGCACTTGATATCTATGACCTTGGGCTTTTGCCCCGCCATCGCG -GGGTTTGGTTATAGCGCGGCGAGCCTGAGAGGAGTGAATGATGCGTTCATGTTACTCCAC -ATAAAACCATTCAACATTCAGATAGATGAGACCGGCGTTTATATTGAAAACTTCAGTCTG -TATAGCCTGGAATGGAAAATCCTTGTTTTATTTTTATTTTCTACATCTGTGCCACTGAAC -GAGCTTTAGGATACTAATATTGTCATTGAGAAGCAGTGTCTCACCAAGAATTTCCTAATC -ATATAGTGCAACTTGTTGCGGAAAGGAAGCAGATGACAGCTTGATCCCGGCCCCTATACT -CCGTAGGGAAGAAAGCCTTTCCCAGTCGCGTACTGGCTGGCCTTCGTTTGTCATTTGTCG -TTGAAGTAGGTTGTTCCGAGATGTTCATTTTAGATCTATATAGGAGTACTACCTTTTTCT -TAGTAATCTCTATAATTTCTAAATTGTGAATAGCGTGTTTCTCGGGGTTTTAAACGTGAT -TTATGTAAGTACTAATATACTATATAGTTACTGATTAAAATAATAGAGTACTACTCTAAG -ATAAAGGCGAGAAACGAGGTCTATACATAGGGTTTTTAAAAAAGTCACGTTTTTGAATAT -TTGTACTGTTTGGTGCTTTTGGGACCTTAATAACCCCCCATCAAGGCTCAGTGTGCTTTC -ACTCCGGGCCCATAACCCACCGATGGGCGTGAAGATAAGGCTCAAAGCCCAGCCCATTTG -CGGGTTGGGTTGGGCTATGGGCTTTGTGGGCTTTGCGGATCGCTAACTATGAGGGCCCTA -ACCTAGGTCCTAGGTTGGGTCCAGGTTAGGCCCAGGTCTGGTTAGGTTCTACAGCAGCCT -AGGAATGAGATAGATATACATATTCACTTCAGCTTTATTTATTCATCCACGTGTAAGTTC -TACAAAAGCCTCTGTAGTACGCATCCCCAATGGCTCCTATACTATTTCCCTAGAAACGAC -AAAGATTCAACACGCATCAGGCTGTTGAATCCGGTTTCTTTTGCCCAAGATCCTGAATAT -GTATAAACCTTGTAACTAACGATACCTTGCATGTCCACTAAAAAGGATTTATCTTAGCCC -TTGCATATTCCAGGTCTTGCGTTTCCTAGGCATCCATCCTACCGTGACTTTTTATTGTGA -TTACCCTAGTATGGGAAACACAAGTCTCAATCAATTTCATATCTCAAGCAGATAAATCAG -ATAACAAACCTATTTGCATACGACACGAGACTTGGGTAAGTAGGCATATACATACCCGCG -GACGTCCACAGATCGAGGCCAATTCTAACTAGTCTATACCAGGTAGATATTCCTCCAACA -TGTCTTCAACAAGTTTACCCTTACTACCCACTCGGTCTACATCTCAACATCAAAACGCCT -TCAGATACGCCCCTAGGCGGAAGAACCGACCCAGCCGAGCTCCCATCCGTCCGGTCTATA -CAGATGCAAACCCGTTCATCTGTCACACTTCCTCCCACTCGTTGACGCATTCTTGGCCCA -AACATCATGCAGTCGATTGCCCGCGATGCAATCGGCACCATAGTGTCAGTTTCTGTAGAC -AGCAACGGCAGTGAGAAGGAATCTGAAGCATGTGAATTTGGAATCTTGATTTTGGGATAT -AATGTGACCGTGGAATAGCTAAGCTAGGCAAATATAAAGATATATTTTTCTGCAACCTAA -TTGGCCATTTGACTATTAAAGTCTTGTTTCAATAGACGAGATGGTTACTTAACTGAACTC -ATGCTGCTCGGGTCTATATACCAGTATCCGGCCGAGGAGATGAGAGGTGCGTCCTCGCCA -GTTTCTTGCAAGTATTCAAGGATCAAAGCAAGCTTTACATTTAGTTCCTCGAGATTGCCA -GCCATTTTTCGTTTCTGCGCCGCAAGTTCGGAGATAATGCTCGTTTGTTCATCAATAATG -CTTTTCTGTCTGTCGAGCTTGATTGCCTGTATTTTGAGCTGGCACGATTGTTCATCGAAC -CGAGACAACAGCTCCGCATATAGCTGTTCAGCATTTGACTCATCTTCAAGATTGAACATC -ATATCTGAGTTCTCGTGATCTGCAGTCCATTGTTCAATGTTTTCCACCAGATCCACTAGG -GCTTCGTGGGTTGGCTGAAAATACTCCCAATCGTTGGGTGGCATATCACTGATCAGTTCC -TCTTGATACTCGAGCTCCAAGAACATTGTGCTGGCAGTGTCACCAAGTGCTTTTTTCATC -TTCGTATCTCGCTTTTCCAGCGCAGACGCTATTCTGGTCTGTTCCCTAATATCACGAACT -AGCCATCCTTTTTCGCATTCGAGGTCGGCAATTTTCTTTGCCTTGTCCTTGAGCTTGCAT -ACAATTCTTGCCGTGTGTTGCTCAAGCTGGCTCGTGACATCTTCGTGGCGTTTTGTTAGG -CGGATGATGACCTTTTCCTGTTCTTTCATTGCGTGCCTCAGCCGTGCATTTTGTTCCCGA -AAGTCTTTGAGCATGGCTAGCTGTGCATTTTGTTTTTGATAATCCTCGGGCATGTCGTCA -TGATGTGACCGCATCGAATCCCACGAAGACCCAAAATCATTAACCATGTGCTGCGGATTG -ATTGAGGAGTCTATCCACTTGAATTCTTCCAGGGGTAAGCAGTTTATGTTTTGGTTCTGC -TCTTCAAAATTGCGTGACATTTTTGTGTCTTGTACTTTGAGCCTAAGATTGATACTCGCA -TTCATTTCGTTGGTATGAGCTGATATATCATCGGCGTTCGCCTTTTGATGGACAGCCCCA -GTGAGTAATATATGCTGTTGTATGGGGTTTGAGGTGATGAATGAGAGTTCGCAGGAGATA -TTTGAAGGTACCTATTTTAAGTGATTATGAGGTGTATCTGTTAGGTAGGTGATTCTTATG -AGCCGAGGGACAGAAGGTGTGATACTGGTTGACCTTGCTCCGATTCCGTATTTGGCCCTG -ACTCTATGTCATCTGGGGCTAAACTATGACTATAGGTAATAAACCAACCCTGGAAAAAAT -AGAGTGAATTCGATGTCATCCATAGATGATACCTGCTTTTTGTTTTCATTTCCATTATAT -TCCCTCGCCGAGTACAAAATAAGACATCGTTGTGAGTCACAACACGTAGAGAATGAGAAC -AAACATATTTGCCCTAACAAAGGGACGGGTATAGTTGCATAGGTCACATGGCTCAATTAA -AGAGAACTTGGTAATGCCGCTGACCGCGCACTTTCAGAAAAGTTGACCCTTGAATGGCGA -GATCGCGACTGCTAACACTGGTAGCAGAATGTTAGACTCAAATTCAACGGACCAAGTGCA -CCTGCTAGGGCAGTGGTCGCAACCAATAGAGCTGTTTGTGGGCGTTGCAACAAGAGAAAC -ACCCACAATACCCCAGGAAAAGGTGGTAGACTTTAAAGCAGGAAGCTAATTAAAGCTCAT -TGAGTGAAAAGGTGATCGAAAGAGAACGTCACTTTCATGCTTTCTTCGGGGCGGAAATCT -CCGCTCGAGAAAATGAGCCCGACAGCACCTCAAGACCTTTACGCTGCTGAATCGCCTTTA -TTCCCGTGCGATCAGTGACCGTCGTCGCGTTGATTATTTTGTCGAACTCTTCGTCCTCGC -TTCCCCCTGAGCCGGACTCAGCATCCGAGTCGACATACAGCTGCTCTGGTCGAGAAATCT -TCTCTGGTTGTTGCTTTTCATCTTCTTTCGGATTCTTGAACGACTTAGAATTGAGAATCT -TGCGGCGGATTTCAGCCATTTGCTCTTCATCTTCAGAGTCATCGGAAATCGCGAATGGAT -CCTTGTCCTCATCTTCTTCATCGCTGCCCTCGTCTTCGTCATCTTCGTCTCCACTTTCGC -CGTCCTCATCGTCTGAAGTTCGTGCCGGTAAACGATTGGTGTTGCCTTTGGTCCAGACAT -CATCACCTCCATCGACGCGATTGTCTTCATATTCGCGTTCGATCTTGGCTAACCATTCCT -CTTCTGCCCTCTTTACATCCTCTTCTGTTTCGCAAGCGGCATAGCGCGCTAGGAGTTCCT -TGTTGATTTCCAAAAAGGCCGGTCCGTAGCTGAAATGTCTGAGCAAATCTTCCGCCCATT -CCAAGCGACGGCAGATGGCGAAGCAAGCGGCGAGCGCTTCGACACAATTTAGACGCCATG -GTTTGCCGTAGTTAACCGTGTTGGCGGCGATAAGGTAGGGGACTATAAATGAAGATAGTT -AGCTTTCCATAAATTCAGTTCATCTTTGGCAATTTCCTATCACATACGAAGTCGTTCACA -TTTTCCACCGATTCGCGACCAGGGTACTTCCTTGACTCGCACCCAAGAGCATTCCACCAC -TGCAGCACCGTGCTGTTCCATGATATCTCTGTCTGCAGGAGAAATAATTCGCTTGGCATT -TGGACTAGTCAATCGTCAGTATTTAAGCCAGGGGAACAAAAAAAAAAGCGCAGCGCAACA -TACGAGACGATCACGCCGGGGTGCCGTTGGCCAATGTGAAGCTCCCGCATCAGACCAAGC -TTCATCAATCGCTTTCCTGAACATCTCTTAGGATCGCAGTGTCCCATATCCCAGCAGGCA -GCCTTGTAAGGAGGCCTCGACGAAGGCGCGCCTTCAACTTCTTCATCACCGCGGGGCACT -CGAGGGCGGGGGTTGTTGAATTTCTTGCCTCCTCGCGAGAAGTTGTCTTTTTTGTGGCGG -ACCATGGTGGATGATGTGATAGAAATCTAAAAAAGAATATTCAAGAGCACCCTGCAGTAT -CTTGTAGTATGCCGTAGTGGTAAACGCAGCGATTGAAAAAAAGAAGATCCAAGGCCAAGC -ATAAGCGCCACACGTGCAGCCTAAACGGGTTCTCCGCCCGCCTCCCCCGTCCGCGATTGT -ACACATCTCCTCTGGATCCCTGTGCTTTCAAGTACCTCTCTCTTTATCCGATTAATACCC -CTTTCCCTCTCCTATTTCCCAGCCCACCGTCATCCTGGAGGCTGCATCTGCACGAATGGC -GGCTAGGGACCGCTTCGGTGTCTACGCTGAGCCGGGCGCTTCGCCATTGCAGCGGGCAAT -TCGTGAGTTTAGTGTTTAAATTTAAGGTAACACAAATGATTCAAACTAATGATCAATTAT -CCACAGGAAATGCCTGCGATCCTCAAAACTACGAGCCAAATCTGGCCCTGAACCTCGAAG -TTGCGGATCTAATCAACTCCAAGAAAGGCAATGCGTGAGTGGGAACCACCCATGTTGTAT -AGCGCAGATTCATTGATTCCCTCCTTAGACCCCGAGAAGCTGCATTTGATATCGTTCATC -TCATCAATTCTCGAAATCAGAATGTCGCATTGCTGGCGTTGGCGGTACGTGGGGAGCTCA -AATGATAGCTGACGCGGGGGAGACTGACAGCCGCGGTTTCTAGCTACTCGATATTGTCGT -GAAGAACTGCGGATATCCGTTCCATCTCCAGATAGGCACAAAGGAATTCTTGAATGAATT -GGTCCGCAGGTTCCCCGAGCGGCCACCCATACGGCCCTCCAGGGTTCAGCATCGCATTTT -GGAGTCGATCGAGGAATGGCGCCAAACTATCTGCCAGACCTCGCGCTACAAAGATGACCT -AGGTTTCATTCGAGATATGCACCGTCTGTTGCTTTACAAGGGGTATATGTTCCCGGAGGT -CCGTCGAGAAGATGCCGCGGTTCTAAACCCAAGTGATGTATGTGACTCCCACGTGGTTTT -GTTTTTGGAGTCAGATTAACATTGCCTAGAATCTCCGCTCAGCCGATGAGATGGAAGAGG -AAGAGAGAGAAGCACAATCTGCCAAACTCCAAGAATTGATTCGTAGAGGCACCCCTGCCG -ATCTACAGGAAGCGAACCGGCTCATGAAGGTTATGGCAGGATTCGACAATCGGCATAAAA -CTGACTATCGAGCAAAGGCTGCGGAGGAGGTTGTGAAGGTGCAACAAAAAGCCAAGATTC -TGGAAGAGATGCTGCAGAATCAACAGCCCGGGGAAGCTCTCCCCGAAGGCGACGTTTTCG -AGGTAGGCGATATCAAATATCCTATGTGTCTGTTGGCCAGACGATTAGTCTCTGACGCTT -TTACCAGGAACTTGCGGGTGCTTTGCAAAGCGCTCACCCCAAAATCCAGAAAATGTGCGA -GGAAGAATCGGATGATCCCGAGGCCGTCCACAAGCTATTGGAGATAAACGACAGCATACA -TCGCACAATTGAGCGGTACAAGCTGGTCAAGAAGGGCGATTTGGATGCTGCATCTCGGAT -TCCCAAGGGTACATTGGGCACCACGACGGGAGTCTCAACAAATGCAAACAATGAGCTCTC -CCTTATCGATTTCGACCCGGAGCCTGAACCCAGCTCCAACGGCAATGAGGCCGGTCCATC -CCAGGGTGGTAACGCCTTGGAGAATGACCTGCTCGGCCTTTCATTAGGTGAACAGGGTCC -CTCTCCCGGACCCGGCGGTGGTATTTCTCTAGGATCTTCCAGTAAGTATCGCGGCACCCC -TAAATCATCACCGCTGACCAATATCAAAGTGAATTTCCCGTCCATGTCAGCAACGTCAAC -ACCGCCTGCTCCATCACAACCTCAGCAGCAGGCGCCAACAGCCTTCAAGCCCAATTATGA -TATCCTTGCTTCTTTCAACAGTTCGCGTCCCGTTTCTCAATCTCCCACACCTGTCATGGG -GGTCTCGCCTCAAGTCCAATCCAAAGCGACCCCACCGCCACCAGCAGCTGACCCATTTGC -ATCACTTGTCTCTGCCAGCCCACGCGCAACATCTAGTCCCTTCCAGCCACCAATGCAAAG -CCAACCTGCTCCTGGTTCGTCGTCATTGCTTGACCTGGTCGGAGGCACTAGTTCATCCCC -GCAACCTGCGCAGGCGGCCGCCCCAGTAGAAGATGATGAATGGGACTTTGCATCATCCTT -GCCAGCGAGCAATGCGCTTCCATCATCCAACAAGATCCAAGTCCTCAACTCCCAGCTCCG -TGTTGAGTTTGCTGCCCGCCGGGTCCCGAACCAGCCGCGCCAGATCCACGTTGTAGCCGT -CTTCTCAAACACAACCAGCCAGCCCATTGGAGACCTCCATTTCCAGGTCGCTGTGGAGAA -GGTAAGCTAATTTCCCACCATAATGCACTTTCTAAATCAGCTTACTGACAACAAATTAGT -CTTACACGCTACAGCTTCGGCCACAATCAGGTCGAGACATTGCACCCCAGCAGCAAAATG -GGGTCCAACAGGAGATGCTTATGGACGGAATCGATGTTGGAAAGGGCAACTCGGTAAAAA -TTCGCTTCAAGGTGTCTTACAAGCTTGGCGGCGAGGCCCACGAGGAACAGGGGATGGTGC -CACCTCTGGGCATCGCCTAAGCCAACGCCGATCCCAGAACCTTCTCATACTTTTGTTTGT -GTCCGAGATTCTTCCCCTGATCTGTGCGATGAGAGTTTGTCCTGAGCAGATCATCTAGCA -ACCATTTCCATTGGATCGTGGTCTCGTGTATTGTCGTTGGCTACTCTCATCCATCGCAGC -ATCATCGGCGGACTTGACTCTTCTCTGCTCTTGATGTCCTTTTCCCCTTGGCTTTGCACT -CTTTGAGAACTTAGCGTCTTTTACCTATTCATGCTGTTTATAAACAAGATTTTATTGCAA -TTGTTTTGCTCTCGTGGCGAGGGGATGGGGTGTGTGTGGATGGATTGATTGATTCGCAAT -GATACAGATACGAAAGTAGTCAGGAACGAATCAATGAATCCATCCCCGTCCTCTCCATGT -GACATGGAGAGAGGCCGATGCCCTTTTACCATATTCTGTCTAATAGCATCGCGTCTCCCA -ATGTGAATTTACCGTCCCAGCTAACTAAGTAACGACTGCGACAACCGAACACAACCAGCC -CCACTGATCTCGACGCGGCTTTTCCACACTCGTCTTGGCGGAAGTATACGACATGAACCA -TGAGGTGTAGTTTGGGGTTGTGAATATATATTGGGCACTGACTGTGGCTGTGAGTGTGGG -ATTTCTGTGAAGTACATCAACACCGATGATTCTCTGAGAGGTCGACCAGTTTGCCTTGGT -GAAATCTGAGGTTCTATACTCATCCCGACAGTTACTGGAGATATGAGATGGGATTGATTT -TGATATCTCATGTGAATTTCATACTCTCTCATAAATTCTATGGAATCTACCTCATGTATG -TACAGAGTAGGCGATCAGCCCGGGTGATCGGCCTGATCGCCGAGTCCCCACCTGGGCAAT -GTGTCAACCGGCAACGCCGGCAAAAGTTCAATTGAAACCTGCAACACGCCAATCATACTA -TGAATAACTAGATTTATGTGACAATGGCCTCGGCATTTGGGCTTGAAATCGCCGATTCCC -GGCCGGCTTTGGACCGATGGTAACCCCCTATCGACATTTAATATGGCGTGGTTTGTTCAT -GGGAGATAAGGCTATGTCATCTGAGATACCCGTATTTTCCGGTGTGTACGCTGGCAGCTC -AGCCCATATGTTGTATGGTGTAGTATGCGATGTCGGCTATATCGTTAGTGATGGCTCGTT -ATGGTCATCTCTGCATATCGTTAGGAAGGAAAGGGTGCTGGGTGCCTCAGGCTATCACTC -TCGTAGTGTGCATAGCTTCGGGGATTCCGACCCCCCTTTCGATCCCTCTGTCTCTTCGTT -TCCAGGGTCCTACAACGTAGTAATCAGAAATTTAGGTATCATGTCAACTGTGGGGCTGCA -TTTCATGCCTCGGTTAAGAAGTACTATAGATTGTGCAATATGATCTTACGGCTCGTCAGA -TTCAATCCCAAATATAGGCGTACTTTGTAGTTCATGTGAAAAACCGAGAGCTACATAATC -CAGCTCCTGTGCCAAATGTATCTTTTGGATAGGGGATACTAGTCATGTACCTGTTATATC -CAGTGATTGGTCCTGTTCAGAGGCGATGATCGACGATCGAGTATTTAAAATACTGCATCA -GATGTAATCTGCAGTAACCTTGTCCAGAGCTTTCGACAATCTTTACGCTAACTACCTCTC -GTTAGCTATAATCAAAGCCTTGAAATCCAGGTTTCCATTGGACCTTGAAGCTCAGTACTG -TGTGGTCTATAGGTCTAGAACCTCCCTGATCCGGATTATCACGTGGTACCAATCTCGTGC -TACGGAAGACTTCTCACAGAAGAGAGAAGAAAGAAAAAAGGGGGGACGCTTCAGGAAGAT -GATCTGCGAAAGTAACGTCGGGAATACAGTCGGGATCTGGAGTACAGGGTAACCGCTCCC -CACTTAATAACCAATGGTCTAGACCCCGATATCACAGCACATGGTATACGTTGCAAAACC -TGGAAATATTATCCGTAGACCAAATCGTAGTGGCGGTTAAGGAAGAGTGCACAACCATGG -GAGACCGTAAGTTTTCCAAAGTGCCCAAACTGTGATCGGCCCGTGGTTCTGGACCCGAGG -GCTGAACCTTGCCCCAAAGGACTTCTCAGTATTTAACTTACCTTTCCCCTCTTCTTTTGT -TCCTTCTTACATAACTTCCCATCCAATATGCCTTTGTCAGATTTAGACCAGGATATTACG -CCTATCCCCCGTTCACGGGAGGAGAACCAGGAGAGGTAGGTGATACGTGAACTAGATGGG -GGACCTTGTTAACAATTACCCAGAGCCTTTATTGCTGCATCACGACGAAAGGATCGTAGC -TTAGATGCTCGCTTGGAATCTGCTAACCGGGCTTCTATGTTACATAAGAAGCGCACGGGC -AAGGCTTTCCACATCACCAAGGAGATTGTGGATAAAGAAGCCATGTATGAAGAAGTCGAC -GAACGGTACCAAGAAAAGCGCATTCTCATGTTGCAACATCAGAACAACGAAATCGAAAAG -CAATTCACCAACCATCTACTAGCTGCCTTTGCAGCCCGCACTCAATTCAATACATCTTCC -ATCCACAGCCGGCGAGCCAGCCACATGACCCCACGTCCATCACTCAATCGTGGCCCCCGC -AAGATGAGCTTGGATCTGTCCAATATCCGCTCCTCTTTCTCAGGTCAAGGCTCAATGGAA -AGCCCAATGCCCACAAGTGATAGCTACGTACTATCGCCCACGGCATCATACGATCCAAGC -ACCCAGCCCTACATGGGCTGCATGGACGGTTCCGACACGCCGTACTCCGGCATGTTCTCT -GCCTCAGGGGACTCATCGGGACAAATCCCAGCCTACATGAACCAACAAGTCTCAACCCCA -GCGTGGAACTCTCAAGTACCCGCATGGGCCCCGATGCAGCAGCAGAACTCGACTCCTGCT -CAAACCCCCACAGACACGCATGCCGTCCAAATGTGGCAGCAGCAAATGATGCAGCAGGCT -CAGATGAACGACACCGCGGTCCAGATGCATCAATTCAGAGATCGTCTTGCGTCCGCACCG -GAGTTGCCCATGCAGCAGCATACGGCTCCAGCGCTTCCCTCGGCATCCATGTCGGGCCCC -CCCGGCGGCTCTACCCATGGTCACTCGCGCGGCCAATCCCAGCCCACGAACAACTTCCAC -GACCTCAATCTCCTGACGCAGAGCACGCAACTTTCCAACTCCCAGCTTGCTTCCTCGGGT -ATTGGCTCTCCTAAGATTGAGGCCCTGTCGGCGGGCACTCACTCTACTCCGGACTTCTGC -CCCACTCCCAACACGCCTTTGTCGCCGACTGCAACTGCACATGCTACCATCTCGCCGGTT -GGGAAGGTCGGGGGTGATGGCCTTATGGTTTCGCATGAGGGGATGGATCCTGATTTCACG -GATTTCAGTCAGTTTGCTTCCCACTTGGGTAACCCGGCTGTTCCACTTTCCGATCGTGAT -CAGCCTTTCGGCTTTGACGATTTTATCGCTGTGGATGATTTCACTAGCGTCTCTTGTTAA -CTTCATCTGTCATGCCTCTTATGATCTATGATACCTGATATACCCAGGTAGTTATTGGTT -TTCTCTTTGAACCTCTATTCTCATTTCGTCTATATATGGGGTTGCTGTTGCACATTTGCT -CCTGTATCATATTTATCACTCATTTTAGTATTCTGAACAGGGGTCAGGATTGGAAATGTC -TGTGTCAAATGAATATATTTAATTTTACTCTGTTTCCCTTCCATGTACAATAATCCGGAA -CAAATTCATTCTCTGAATCTACTATCCACTTTGTAAGGTTCTCTTTCAATGTTCATACGA -AAATTTTCTTCTGGTGGATCTGACGTTGCATCGATCCGCAAACAACCATCGCCAAGTCCG -TGTCCTAATCCGGCTGTCCTCAATTTTTTGAATGGTTTAGATTGGGTCCCTTGCCTAGAG -CCCATTGCACGATTCGTAGCACCCTATTCCCTTTGTACGGAATTAATCGGGTCTAGGACT -ATATGCTCCACCGGGCCGTGTCCActatagctggaggtagagctgtagctatagctTCCC -CCAAATCCACTCAGACATTACATAAGATTTTTACCTTTCTCTTCCCTTGAACAGTAGAGT -CTTTAGCTGGACTAAAGTTTAACCATTGGGTTCACCTTCCCCCTTCAAAATGCCGCACTC -TATGAAACCGTGGCAATAAATACCTAGCCCTAACTACGTTCCTTTTGTCTTCCCACAATA -AAACCCACCGAAAAAGCCTTCAGGGCTATGAGGTATTTGAGGTCCCAACCCAGTTCCTCA -CAGACTAAGTACCTAATCTAATTCCAGCGATCAGACCATCCTCAACCGTGGGTGTCCAGC -CTTCATTTGTCAAACTATAGCCACCCACGGGTGAACGGCGCTATGTCATAGCCCTACAGT -CTGCAATAGATCCCAGGCCAGGCCCTGGGTGAATATGGAAACAGGTACTTTTGTAACGGG -TTCCGTTGATCATTTAATTTTTCATGATCTCGCTCATTCTCGGCCCCGTGTGTCACATCA -GGATTGTTTCCGACTCGTGTCGATTATGCGGGGCGAAATAAGATTTCTTAGCTGATTTTT -GATTGAGCTGACTGGTACAAATGACCTGATGCCTCGTGTAGAGTAAATTTTGGCGTGGAG -GGAAAGTAAATTAACATTCAAGTGCCCCGTGTGCACCGTGCGCTGCAGAATTGGTTGTCC -CCCCTGCTGAGGCGGTTCCGGGATTATGGATCAACGGCGATCCTGCATGATCGGCTGATC -CTTTTCTTATTTCTTTTGTCACGGTCAGTACATTATTATTCTCTTAGATTTTTCAGGCTA -TTAGAAATAATTGAGTAATTTGAGATAATCTCTATACCTAAGCTTGAACCCATCCCCaac -gtaacagtaaacgaaaatgaaaatgaaaatgagagcgaaaaACTTGAAGGATTCAGATTT -GCGGGGTTAAGGTTGCAGCTTGGTCCCAGATTTTAATTGGCCACCAAATTAGCGCCCGCA -CTGACCCATGAAATCTATAGTCCATCACTACCACGGTCGAACGAGTCCAAACGAAGTGTA -CCAGCGGTTGAAACCTGGGTATCATCGGCACGCAAATCAATACAAATCATGGAAAATTCA -CCCATTCAATGAGCGAACTCTAACAGCTCTGGAGTCATAGGGATCTCCGTACAGATATAG -GCTAGATGCAGCCACCCACGGGTTGTGTGAAAGCTTGCCTGGATTAATTAGGTTGCAGTT -CGATTGGACTGCAACGCCGATACGTGCACGTTTGCAGAAGGGTTATTGGCTATTCTCTCG -TGTTCTTATTTGCTTATCTGCTTCCACGCTGTGTGTGGCTTCTAAGGGAAATTGGGTGGC -CTGTCAAACTGAGAGTACTATGTGGGTGACTTTCGAGTCAGTGAGAGGTCCATGGTAATA -TAGATGATGGATGTAGACCCTGATAGGCCTGGTAGGGGATAATTCGCGAGCTCTGCCTGC -ACCATCTCGTCGTTTCTAGCCTTGTTAGGGCTTGCAGTGCCTGCGTTTGAGGTATTTTTC -AACGGTGGCCTGACTATGCATCCACCTTTTATTACCGATCAGGCCATGGAGTCCAGATTA -GTCGATTTGGAAATTGGATTGGATCGAATGGTGCCATCGTCGTGATTAGCATGGTTGATT -CGTATTGAGATTGATTTTGTAATTTTTTCACACAATACAGGGTCAGTAAACTAGGTATGT -CCGTACCTCAATATTCTCCGAGGCTGTAAATTTGGCCGTTGAAATGACCTGAGCCTGAAC -CTGGGCGATATCCTCTCTGAACCACCAAGTGTTTCCAAGGAAACTCCAACACTATCATCC -AATCACATCCTGTAATCCGCCCGAAAATTCCTCAAGGGATATGTCGATGATCAGCATCCA -GTCCGGGCTCTCGGTGCGGGTGGAGCAAAGGTTGGCTCAATAGGAGACCCGGTTGGATGG -GCGAGGTGGGGATTAGGTGAAGTGGTTCATGCGAATGACCCCAGATTCTAGAGCGTTGAC -GATCTTGGGCTAAGCTCATTGTGCTTCTATGATGAAGATCATTGTTTAATTCTATCTATT -GTTTCATTAAGGAGATGACTATCTAGTGGAACGAAATCCTGCAGTACTCACTTATCTCCT -TTAAAATCAATATCTTCGTACAATCTCGGTAAAAGCAATATCATTTGGCCTAAATAATTT -TAACCAATCAAGACACGGTGACCTCAATCCGGCGATGTCGCACATGAAACTATATATGAT -GGGCCAGTAAAAGACAGGAAGCCGAATCGCAGAAATGAACCAGCATACAATATACACCAG -ATCTACGCCCACACAATAAGCATTCAGGAAACCTGCAGCTGATCAACGCCACTACCTGAG -TCAGGCTCAAACCCAATCCACGAGATACTTACAGAGATGACAGAATACGAGGATCAAGAG -TGGTCCTGAAGGTACCGGAGTGGATATCAGTGCGTGTGTGGTAGTGCTTGTGCCGTTGGA -AGGTATTGGGTGGGACAGAACTACTTTGTAGATTCCGGATATATTTCTCCAGGTGATGGG -GCTATTTTACAGTTTGATTTCTGTAGGGATTGAAAACTGAATGAACATTCGGCATTTGCC -TGATCAGCGCCGAGACGTGGGGTTGACCGGCCTTGTCCGATAATGGGCGATTTTAGTGGA -CCTATAAATCTGGCTCATCTGGTAAGGTAGAGACTGTAACTCCAGAAGTAACAAATCACA -GAGTCTACCGCAATGCACGCGAGGGACTATGTAGGGACGAAGTAAATTGAAGTTCCAAGG -TTCTTCTTGGTAACACTAATTCACGATGGAAATCATATCCGCTGCTAAAGACGATAACGA -GAGGCTGATCGAGTCTGGAGATCCATCCGCTTGCGTCCTATGTTGTATATGGAGTGACAC -ATATTCATGACTAGTGAGGCGAGCATTGGGTTTCTAACAGAGCCAAGCACGTCGGACAGA -GCGATTTCGTATGTTGGAGTTTCAAGATACAGGCGTACTGCGAACATGCTCTGCAATGGA -GCCCAAGTGCAAAACGCAGTGTGTGCTTTTACATGCCACGACTGAAAAGCTCCCGGCCAA -TGTCACCACTGCGGCGGACACTCTGGCTCGAGGAACTACTGCGTCGGCGGCGGCTACGGG -ACTCGGCGGGGTCAGCTGGGTTCAGCCCGCTGGCTCGGGCATGCAAATTCAGGATGTCAG -TCAAGCTGACTACTCCGACTAGGCGGCCGGAGAGCCGGGCTCCGTCTAAGGTCGACGAGG -GAACAGAATGCGATATGGGCGATGGTACTGGCACTGACACGCCTGGAGCTGGATATGTGT -AGGGGAGGTTAGGAGCTGGTAGGTGAGGGGCGGTGTATTGAGAGGGCTGTGTAGAGGGTG -TCACAGGTAAAGAAGGAGATGGTGGTGGTGACTGTGCAGTAGACAATCCGGGAGCCGGGC -TGACGCTGTTGGAAAGAGGGACGTGTACTGAAGAATGCGATGGAGTTGGCGGTCCTGATG -AGGTTGGAGATAATGGATCGGTGACCCAGAGCCTACAGCAGCAGTCAGTACATCTTCTGC -TTACATTTCTACATTTCAAGTGTGCAGTAGTTACCGGTGTGATCGAGTTGCCACAATCTT -TGCAACCGTATGAGCCAGTGTCGATCCAGGATTGACATGGAATACCGGGAATGAGTCTTT -ACCCTCAATCAATCCTCGTGTGGACAGTATGACCGAGATGAAGTGGGTGCAGGTGTTTTG -GAGGAGTGGCAATGAAGATGATCGGGTCAAAAGCTGCAGGATGGTGAGTAAATCTTGCCA -GCCGAGGGGGGGGCTCGTCACATACCTTAACATCCGCTGTGGAGATATTCCCAAGCACGT -TTAAGTAGCTATCAACAACTACGATTGAGGACATACCCTCGTTGTTCATCATATGAAGGG -CATCAGAGAGGGGTTTATCTCCACTGCAAATGACACAATGGCGGTTAGAAAGCTTTCCAA -GAATTCGCCAAAATGACTTACTTGATGGAGATCACTTCATGAGAGCCAACCCCCAGCTCA -CGCAGAGCCTTTGGGTAGAGCTCTTCAATGACCGGGAAGCTGCGACCATTTTCCCAGAGA -AATTTGACCAGTCGAAACTGGCTGAAGATCCCAACAACTTCTTGGTCATCTGACTCACTA -ACTACCACGACCCGGTGAACACCACCACCAAAGATCTCGACTGCGGTCAAGACATTGGCT -GATGCCGGCAGTTTAATCAGCGGTTCCTTTTCCATACCGAACTTCTTGACATCTCTCAGC -GGAATAGGCACGCCGTTGTGAGCTTTCTTGGCTAGCTCTTCATACGACGCTTGATGCGCT -TCGTCGGGGTGTGTCAGGCCCGCTGCCAGTAGGAGATAAGAGTTGAGATCGGCATAATCA -AATGTACCGACGGCAGAAGTGGCTTCAGGGGTCTCGCGGATCAACAGTACGGCCGCACCT -GAATCAATCAAGATCTAGTTGGTACGAGCATGTTAGTCCAAGGACATGGAGCCTTGCCCT -TGTGGTGTCCTCACGTTTGTTGCCTCTTCAATACCGGTGTCAAGCTCCACAAACTTCAGA -TCAGCTGGGCTGACCAGCTCGCCAATTGAAATCTCTCTCCAATCACGACCAACAAAGGCC -GGATCACTATTCTTCGGTGGCGGGGGATTGTCAATAAGGCTCTGGATGGCGGACTGTGGG -AGGGACGGCTGTCGACGGGCACGAGGAGAGCCGGGTGGGCCACGAAGTGACTCGCTCAAG -CTTTGTCTGTGCTGGTGGTTTGCTGGCGGTATGTGTGAAACGCGGAGTTGGTTTCGTGTA -GGGGGAGACCGGGATTCGGTGCTGGATCGTGGGGAAGCAATAGATGCATTGTTAAGAGAA -GTGTCGGACCCTTCGCGGGAGTGTAAACGATCCATTTCAAATATTCTCCCCAAGAGGTTA -GTCAAAGACAAGGGTTAATATGTGCGGAATGTGATTTAATGGGTGTTGCTTGACCCTACT -GAGGTCATCAGGTGATGTTGATCATATTAAGCGGGTCCATTGGCGGGGTTGACTGTCGAA -CCAATCAAAACTGCCCGATATCAATTTGATATGTACTATACCCAGGTAGGTCATGTTAGG -TCGGTGTGTACTCCAGAATATCTTCGGAGGCTCTCCAGAGCGTCTTCAGATATTTGTTGA -AACGTGCTTGAAGTCTTGTAAAAAAACAATCGAAGGCTTTGTCGGTATCAGGGATATTCT -CGGAGTTTCGGATAGGTCGGAGTTACAGGAAGCTGGAAGTGCGCAGTTCGTGATGACGTA -TTGGAAGCAGTACACACAGTAGAGAAAAACAGATGGAGCAAGGGTCGAGACTTACATATG -AGGCCTCCATTACTGGATGATCTTCGAATCTAGTAGGAAGTTCTATTTTCATTACTGCGA -GGGCTGTAGGCTTCAAATGATCCCCATGATCCCCATACAGACAAAGTCGAATGATATTAT -TGCCATTACGAGGGGATGAGAGGACGGGGCGTTGAAGTCGAAGTCGGTTACTTGGGTAGG -GTAGTTGTTCTTTATCATGAGATCACAAGACCACAGTTGCTTAAGCTGTCCGTGTCGGTA -ATTTTACCACCTTATTGTCACCCCCCGAATGGCTTTTGCCGCATACAGCTCCATGCTCAC -TACGTTAAATACATCAACGTTGACAAAGGTAACTCAGGCGGAGTGTGGAAGCCCGACGAC -AGCTTCGATTGCTTTACCGAGCTCATCTTTTTCAATGCTCTCAAGGGTTTGATCCTTGGT -CTCGGGTATCAGCAAGGTTATAAAAGCAGTGAGCAGCATGATCCCCGAGAACAAGCCAAG -AACACCCTCCAATCCCATTGCATCTTCAATAGTCCCAAAAGCAAATGAAGTTATGATTGC -GCCGCATTTGCCAGCAGAGGCTGATATTCCATATGCTGTGCCTCGAACACGCGTTGGGAA -AACTTCTGCGGGAATCAGGAACGTCGTGCAATTAGGACCAATGGTCAGCAAAAATTGCGA -GAGTGCAAAGATGATCATCAAGCCAGCTGTCGGAGTGTGCGCGCTCTTTGTGCTAACTCC -AGCCCATATGGCATAAAGGACGCACACCATGATACAGGTCCAGAATTGTTGGCGTACACG -ACCAATACGATCCGGAAGGAAGATGCCGACGTAGAATCCGGGGAGATATCCTGCTGCCTG -TATGATTATATTACCAACAGCGGTATTGTACAGAGTCCTCCATGCTGTTTCTCCATTCGC -ATAACCAATTCTTGCTAGTATAATACTTTGGTTGAGATTGATACCATAGTATGCGATATC -GCTATGCATATTAGTTTCGAGATTCTTTTCTCAAAGGTCATACTCACTTACAACAAGAAC -CAGGAGGCTGCCGTTGCGAACAGAACTTTGGCATGTTTCCAATGACGGAAGTACTCGCGA -AAGTCTTCCCACTGCTGCTTGAATGTACGTTTCTGTCCAGCAGTGTCATCGCAGACATCT -AAGCGGCATTAGTGATCTTCAGAATACAACACTGCAAGGGGTCTCACATTGCGTGTAGGG -CAGTGTCTCTGTGATCTTTAAGCGGGCATAAAGAGTGAAAACCGCTGGCACAATTCCAAT -GCCCAATAGCAAACGCCAAACCCATTCGAGATTATTCAAGTTGTGGGCTATCGACCCTTC -GAATGCCTTGAGCAGGATCAGAAACACGAGGCTGGCTGCAATATTACCGAGACCGATGTT -CGAGAAGACGGTCAACACTTGCATGGCACGGGAGCCAAATGGTGACTTCTCTACCGACAG -AGATGAAGACAATGGATAGTCTCCTCATATTAGTCCGCTATGCCTCTCTATCTCAAACGG -CCATGACTTACCAGCTCCAATGCCTACGCCAGTTACAACACGAAAAATTGCAATCCATGC -TGTCACAAGCTGGGCGGACGTCCGGTTCCATGGTAACAAGACTACCATCAGGGTCCCAAA -TATTGTGAGGAGGAGCTCTTTACCATATATAGTGTGTCGACCCCAAACGTCACTCAGGAC -CCCAAAAAGGAGCTGACCAGCAACCATGCCCAAACTCAGGCCGCCTTTCATGATGTCGCT -CTGAACTGACGGTACCTTGCCATCTTGGAAATAGAGGTAACCCAGAATTGGTATTACTGG -TTAACATTAGTCTTCGATTCTATTGGAGCAATTGAAGCTAATCACCAAGTCCAATTATCA -GGTTGAGATAGCCATCCGCGAAGAATCCAACACCCGCAGTGACCATAAATCTTAACTTGG -CTCCCAGAGATGCCATTGAAGCGAACCAACTAAGGGTAAGAAGAATATAAACGATCAAGA -CAAAATCGCAAATTCAGCCTTGCGCACGGAAGAATATACATTGTCTCGAGTAGTGAAGTT -ATATACTTTATTGGAATAGGCATGGTTACTCCGTACCCAAGACAGTATTTCGGAGACTAA -TAGTCTTTCTATGTTAGACAATGTTGGACATGGAAAAACTAGAACAATGAGCAACATCAA -AGTCACCACGGGGTCTAAAGGAGAGGCCAGCCAGAGGCCAGTCAGGATTTCACATTAACC -CCACGTGGGGCATGGCGGGTATTCCCGTATCTCTTCGGTGCGAAGTCCGCAAGATCATTG -TGTTGACGAAGGTCTGCTAAACACTACACTTGGGAATTTCAGACTGTGCTGCCTACCTAA -AGCACATATGGACATCGAACTAGCCTCTCCGCCGGCCGTGAGTGCCTTGCGAGAATCATT -TGGCGATCGAAAACCACCCGACATCACTCGCAAGATTACTGCTTGTGTAGCATGTCGAAA -ACAAAAGGTAGATTTCATTGCTGGAGATAACGAAGCAATTACTGATACTCCGTGAAGGTC -AAATGTCATATGCGGGAGGGCCAGATGCCATGCTCGCGATGTAAGAAGAGAGGTCTACCC -TGCAACGTGAATCGGAGCCTGCAGACATTACTAGAAGACGACGTTACGTAAGTACGAATA -GTCCTTGCACTGACCTTTTCTAATATAATTTAGATGGAAGGGCGCCGTGGTACAAAAGAT -GCGACGGCTAGAAGAGGCAGTTGCGAAAATTGCTGCCAAAGTTGATGTGCCCGAGTTGCA -AGCTTCGCAAGCTGCTCCTGAAATACAAGATGACTCTTCGAGTCCGCCAATTGAAGTTAT -GAATGAAGAAAATACCAGAAGGGAAACCTCTGCTCCAAGTCAATCATCTCAGCAAAAAGA -CAAGCATGAGCCACCACAGACATGGGAAGTAATAATGGATCCTCCAGGCGGCGCAGCTTC -CATCCCAGCCTCTTGTGTATCAGAAGGTGGGAAGGCGGGTCTATCAAACAATCTATCACC -TGTTCGTCCAGATTTGATATCCAGAGGACTTATCTCTTTACGCCAAGCCGTTGCATTATT -CGAGACATATCACCTCAGACTTGATCATTTCCTCTATCGCATTCTTGGCGACCATACTAG -TTTGGACTCGGTTCGAGTCGCGTCCCCATTATTAACGGCGGCCATTTGTACCGTTGCTGC -TTTGCATTCTCAACCCCTCGGCCATCTCTTCGAGGCCTGCTACAACGAATATAAAGATCT -CGTCGCAGCCCAGACTTTCTCAAGACACGTAAATGAAGATGACATCCGTGGCCTGTGTGT -CGGTGCGTTTTGGTTGCATGAGATTTCTTGGGCCTTGATCGGGAACGGTACGTGGGTGCA -ATCTTCTACGGAAGCGAAGCTAACAGAACTAGCTGTCCGCATTGCATCGGACATTAATCT -TCATAGTGGTATCTACAAAGCCCTTAAAGGAGACCGCAATGGCTATCTTCAAGCGCGATT -ATATTATCTCGTCTATGTATGCGACCATCACTTTTCCATCGCCTATGGCCGACCTCCAAT -GTCGAGAGAAGGGTTTATAATCGAATCGGCCAGTCGAATCCTAGAACTGGCACATGCCAC -AGAAGACGATGCTAGATTGATAAGTCAAGTCAAGGAATGGTCAATCCTGGGTCAAGTGTT -TGACACCTTCGGTGTTGATATTGATACCCCTATTGCACCTCGGACACTACCTCGATTGCG -TCGATACTCCATTTCTTTGGATACGTGGTTCGCAGATTGGAACGAAAGCTTCAAGGAACA -TCAGAATGTCGGAAATTATCCACAAAAGGGTGTTGGTTTTCACTTTCACTTTGCAAAACT -ATACCTTTGCTCGCATGCATTTCGTGGTGTACCGGTATCGGAGGATTCACCTCGATACGT -GCTGCCTGAACTGGAGGATATCGCCAATGCCGGTGTATTGTCAGCTATGTCAATCCTTCG -GATGATTGTCAGTGATGCTGAATTTCGATCGTTTCTGAATGGGCTTCCTTTATATTTCGA -TACTATGCTCGCCTTCGCCGTTATTTTTCTCCTCAAAGTTGCCACAAAGTATGCATTCAT -GGTCAGGATTGATACAGGCAAGATCATTTCCCTAGTCACTGAGACTGTGGTTGCTCTTGG -AGATATTACCCAATCTATGCACAAACATCATCTACTTGTGGTCATCAGCGAAGGCTTGGA -ACGGCTGTTGAGTATATGCCAGGCGTCTAATCACACAGTCCACGAAACAATATATCGTCC -TTCTACTATCCAGGAACAGCCCCCGCTTTTTGATGCAAGTTGGATGGAAAACATGGCAAG -CTTTGATTTCCAAACTAACTTCCCTGATATGGATGACTGGTGGCTTCATTATAATACATC -GATGGGTCCTCTTTGCCCGGAGGCATCAAGATAGGCAAATATCAGATTAGCCAGTTGAAC -ACAAGATCAAGTCATTTTCTTTCATCTTTCATTTCAACAAGTAACACAAACCATGACTGA -GCCGTGCTGGCAGGCTGTAAATATTCTCTCTAATGTAGGGAATCTTTTGACCGGGAGTCT -AGATTGATGTTCAGTGTCTGTATTGATTGATAGAAGATCATGATTACACCCACCAAGAAG -ACATTGATAGGCAAGTTCCTCATAGGCCTCCAAGAGAGGAACTTTAAAATTTGGCCTTTT -TGCGAGATCATCCCATCGTCTAACAGAAAGTTTTGCCTCGAGCAAAGGATCCTGTTGCGC -CTCCAGGATCTGCTCGGTATTGTATCCACCACCCTAATAGCCGTCAACATACTTCAGATA -CGGAGAATATGAGAAGTCAAAAGTGACATACCTGAAATTTTAATGTCCGCTTACTGGTCT -CACTCAGTGCATCATAATAGCTTTGATCTGTCGCGACCAAATAGCGCTTTGCCATCACAT -GCGCACCAACCAACTTGCACACCTTCTCACTGAACCCAATCTGGCGCAGGTAAGTTTCTC -CCAGAGCTTCATGGCTTTGTCTTCCAATATACTGGCCGTCGGGGGTGATCATTTTCTCCA -TCTTCTCCGCCGCAGGGATGAACCGTCCGACATCGTGCAAAAGGGCAGCCAGGATGACCT -CGGGGTCATTTCCATGTTTGGGGGATTGTGTAGCAAGATATGCACATTGTAGCGAGTGCT -CTAGTTGAGTGACTTGCTCACCTAGATAGTCACCCTGGCCCTGTGCGTGCAAAAACTGGA -ACAATGTTTTAATGGTTGCTCGGGTATCTTCCTCAGCCGTGGCGAGGGCTTTCTCAGTTT -GGGGGGTTATCATTTTCAAGTCCAGGCTGATGGGTGTTTGTTGTTTCACCTCGATGGTCT -CTTATAGTGCTAAATACATCCCTTGGCCATTTTGTGTTTGGATGACATATTAGACGTCAT -TGTGGCATTGGCGAAATTGTACAGGGTACGAAGTCCGTCTTGCATTCGCTCTATCAACTT -ACCCCTCTAATACCTCATAGTATACTTCAAGATGTGAGCCGGCATGAAGCCCGTTAGTCT -GTGCTGCATCCCAAGCGTTTCCGTCTGGCTTCGGGACGAACACCTTCGCTCAATTATCAT -GCAACAGAACTTATTGGGAGATACTGCTGGGCTTACTCGGGTGATCGGGGAACCTCCGCA -GTCTAATTTTGAACCTATTAAAGCGTCGTTGGTTACTGCGTTTCGCTTCGGTCGGTTTTC -TTGAATAATCGGATATTCCTATTACATGCTACTTAGTTCTGTTTTTTGATCTAATTACTT -AAAGATGTCACCTCTCACCCCCGAACAAATAGCTTCTTACCACGAGAATGGTTATCTTCT -CCTACGCGCTGACGAGCATAAACTCGTTGACCCTACAGAACTCAAACTATGGGCGCAAGA -AGTTCAGGCCTGGCCTCGTGTCAAGGGCAAATGGATGCCCTATGACGAGATCAACATTAA -TGGCGAACGCCAACTGATGCGCACGGAACGATTTATTGACTACCACCCAGGATTCAAGTC -ACTGGTTTGTGGCGAGCAGCTGGCCGAGATCCTGAAGGCAGTATCAGGCGATGTATCTTC -CCGAGGTTCCCAATACCGAAGTATCATTAGAAGATGATGCTTATACCTGGCTTTCAATAA -TAGGATATGCTTCTCTTCAAGGAGAAGATCAACTACAAACAGGCCCAGGGCAACGGCTTC -CAAGCGCATCTCGATGCACCAGCCTACGACCACATCGGCCGCATCGAACATGTTACCGCT -AACATTGCCATTGACGCCGCGACTCCTGAAAACGGATGTCTAGAAGTTGTGCGCGGGTCG -CACAAGATGAAAGTTGAATTCGCGGAAGCTGGACGAATCAGGCCTGAATGGGAGCACGCA -CATGAGTGGAGTTCTATCCCACTTGAAGTTGGTGATATGCTGATCTTTGGGTCACATCTT -GCCCATCGGTCGGCTGAGAATAAGACCGATAAGAGTCGGTCGAGCCTTTACGCTACCTTT -CACTCCAGGAGTGATGGGGAGGATCTTAGGGAGCAGTATTACAAGCATCGAATGGAGATG -TTTCCACCGGAGCATGGTATGTTATTCTTCAAAAATTCATATCTTCAATTGCTTGACAGT -TACTGAGATATATCTCCTAGAGCGTGAGGAAGGCAAAGATTACTCTAAAGGCTACGAGAC -TTATGGCTTCGCGGCGCCTTTCACCAAAGTGCAGGGAAATGCTACTGCAACTGCCGTGGC -TTAGTAGGCGCTAGCCTGGTTATCATTTTTATGTTTTTCAAGTGATTGAATCCTCTGTAA -TTAAATTTTGGTTGCTTAATTTGTTAATGCTCATTTACTCCAACTGTGGAATGTGCTTAT -TTCGCTTTTCCAAGCTCCATTCTCGATACTTTTACAGTCTGATTGTCGATATTATAGCTC -ATTGTTTGTTCCATACAAGACCGTGACAGCTACAGTCATAATTCATGAGTATTTGCACCC -CCGTACTTCCTAGTGCTTTACAATAGAAGACTCTATTGTAGCCTGCACGCTGGAGCTTCT -ACCACACATATATATAAGCGTGTATGTGAGCCTTCTAATTACTAATTATTACTTAACGCA -TTCAATAAAGTATTGACAAGCAGAAAGGAAGGTGGCCGGGACATTCTCGTAGTACCCGGG -TGCAAATACTCATGAAACGCCACTGTATAATCGGTCAAAAGTACCGCAAACCCTCCTGTG -GCACACTGGCGGTCGGAGACTGCTGTGACTAAGGCCTCTGTATATTTGCCAAACCTCTCG -AGTATAGTAAGCGGAGTATTTACCTCGAACCAGCGAAGACAGATCACATTAGTAGCTAAA -AAAAACTCGACATTCCTGAGCAGAGCTTCTAGTGTTCTTCTTTAGACTCTAGACAAGTCA -CAACCTGTTCCATTTTGTACTATGTATGTACGGCTATATACAAGATGTGCATGTGATAGG -TCCTGTCAAACACCAAAGTCCACCCAGAAATACGAGTTGAAATCCCACAATCTCAAAAAG -AAAACATTCAGATTAATCATCAAGGTATGATGGCACGACCACATCTCCTACCATGACCGT -CCTGCGATCGGGTAACTCCGAGTCAAAACCCTCTAGTTCTTCGGTAGAGAGTGCAGAATA -TTTGCCAAACGAGACCTTCCCATCGATATTTCGCGTCTTAGCAAGTGTTGAAAATGGCTC -ATCACGCCGAACGCCAGTGAACTGATCTATACAAACCATTTGGCATCGTTGGCATGTGTC -GAGAGCGTCAAACCGGAGTTGGTCTGGTCCAATCCGAAGGGAGGACCACCGGTCCTCCGC -ATATGGTTGTTCTACATCACCACGGTGAGCAAGTCGTTCGGCAACGATGATGTTACCCCG -GAAGACGTCTGCTGCAACAGTCTTACCGCGGCCATGCTTGCTGTTTGCTTTGATGGTTTC -GTTCAGTCGGTTGACAGAAGAACGCGAGACTATTAGAATTGGCGACTCGTTGGATAACGT -GATTTGTGCCTGGTCACCCCCACGTGCCGGGGGAGGGAAGTCAGGTGGGAACGAGCCTGG -CATAATCAGCTTGCGGAAGCGGTTCTTCCATGTATTCGGCGTCCGTTGCATCCGCGAGTA -CCGGCTGGTGGTCTGTGTAGGGAACCGTGCAAGAGTACAGGGTACCCCCAGGAAGTCCGA -AAAGAACGCAGAGACCACCGGAGAGGTGTAGGCATGGAGAGAAATCCTGTCGCCGCAGAC -TGTTGTAGGCTTTCTGGTTGAGCTTGGGCAGAAGGATGTAGAAACAAGACTTGTGTCCTC -CCAGCCAAGCGAAATCTCGAGGCTGACCTGATCTGGCGCAGCGATTGCACCACAGGTAAT -ACGGAGAACGCCTCGTTCAATCTCAATGGAGGGTCGTATAAGACACATGCGGGGATATCT -ACTACCGTTAGTTTGGGAATTCAGACTTTGGTTTAGACACAACGTACCTTTTCTGGTTCA -GGGCTGTGCCAGTCCCCTGATGAACCAAACACCATTCTCTATCCCAAGCTAGACCCTCCT -TCTTCACTTCCCAACGCTTTCCCTCGGGTATCTTGAAAGCACCGCAACTCTTGATTGGGA -ACACTGCTAAGCTTTCAACATAGAAATGAGGAGCAACGACCTCATTGTACTCGGTTAGAG -GATCCAGAGCGAGTATAGGTGGGCACTTCTCCACGTAAAACTCCTCTATGAAGTTTACCA -AGGAGTCTATATCCTTCATGTTTGTCGTGGCGCCGATACTGACACGAAGAATTCCTGTTG -GTCGTCCTCCGAGAACATCATGATTATCACCACATCTTAATCCCGCAGAGTAATGCCGGC -GAAGCTCCGGTCCAGTCCACCCAAGCGATGAGGCAGTTCCTCCTGGATTGCAAACGGATC -CAGTGCGTATCTGCAGGTTCTGCACCGTGGCCAGTTTTTCCACCTCAGTTTTAGGAACCC -ACTCGCCGCGACTATTGCAAAGATTGAAAGCAATGATTGGCCCTTGGGTATTTGGATTGC -CATATGCCGAGGTGTGCGATTGATATATTTGACACACCTTTCTCTCATTGAAATGTGCCA -ATGAGGAAAGCCTATCATAGACTTCTTTGGCGAGGAACCCAGCATGTGCCGAAACGTTCG -CCATGGAACCAAATAAGCGCTTGTGTGTGTCTAAAGCCGCGTCCAGAGCTATGATGCTAT -GAAATGGGAGAGTACCATCTTCCAGCCGCTCATGGATTGAAGTTTCCTTTTTCGCATGCC -ATTGTACCCCAGTGGCCAGGACCATGTCAACGGTGCCTCCGCCAAAATACTTTCTTCGCT -CAAAGGCCCGTGCAGCACTTTTGCGAACAATCAAAGCACCCAGATCGGGAAAGCCGAAGA -TTTTGTAGAAGCTGAGGACAGTGAAATCTGGAGCAGATGAGGACGGGCCTAGATCAAGTG -GAGCGGTGGATACCAGCGACGCAGCGTCAAGCAAAGTGTATACATTTCCTCCGCTTTCTT -TTGCTGCGGCTCGGACTTGCTCGCACCATCGAATAGGCAGGCGTCGCCCGTTCATATTCG -ATTGAGCTGGGTATGCAAGTAGTCTGGGTGCCTTCGACTGGTTGCCGGCCAACTTTGAAA -TCTCAGCCTCCATCTCATCATCATTTTGAAAACACTGATAGCCCATCTCGGCCAACTCTC -GTACACCAACAAGACTGGTATGGGCATCAATGTGATAGCCATACCAAAATCCCCGATGGT -CGGAGTCCCGAAATGAGTCTGCGACGAGCTTGATGCCAGCAGTTGCATTTGCCACAAAGA -CAAGGTCAAACTCATCCGGGTCCGCATTGAAGAATTGCAATGCGCGGACTCGAATATCTT -CTGTTCTTTGTGTAGAAAGCTGCGATGATACAGACATCGAATGAGGGTTACCAAACAGAT -TTGATGTAAGATCACGCGAGAACGACTCGATCATGGACTTTGCATAGGGTGTTGTCCCCG -CATAATCCAGGTATGTTGTTCCTAAGAGAAGGGTGGTTAGCAAGGATTGAGTTATAGCAA -TGGGCACTCGTACCATTCAGTAGAGGGTACTCTTGCTCTCGGATTATGTCCACATCAGCG -GAGTAGCCTTGACTATAATGGGGATGGATAGATCCCTGAGAGATTGATTCTGCCATCTCT -CAAAGTTCAACTTTGGACAAAGGCACTCCAAAATTTCTTTGTCGTAGTTTGTTTGAAATC -AAGGTGGCAAATCGTGGAAGGACGAGCTTTCTATATGTATAACACAATGGTCAGCACACA -ATGAATAGAGATATGCATAAACAGACTGTACATTCGCACGACTGTGCAATATCAAATAAA -TTCGAAGAAAAAAAGAGGAGGAGAGGGGGGGCTTACCACGTATAAGATAGATATAGTCTG -ATCGGAGGTTACCGATACCAACAGATCAACTAACCCGGGTATGATTTCCAGTCCTCTTCA -AAGGGATAAGCTATCGGAGTTTTAGAAAAGATGCACATTCCCAAAGTCTGGTAGACGGCT -TTTATAGGTTGTTTCCAACTATTCGAGGCGCGCTGTGAAGTGATGAAGAAGAGAAGGTGT -GCAGATGAGAAATCCCCTCCTACGTGTCGGGAATATATGGGACGTGGCAGTACGGAGGAG -TCGTTTCCATAACGGACCTTATCACCCCGGCGCACCCCGGTTGATTGTTCTCATTCTATC -ACACGCACTATTCAACATTATATACACACTGATCTACATCCCGTATAGGGACCCACTACT -ACATCGTGACGTTCATGCACTACATGCATAATATGATAATTGCCTATGCCTTTTATGGAT -ACGGTCATTCTCTCATTCTCATCGGGGAGATAGCTTCGTATCAGCCGACAAATTGATTAG -TACTCCGTACGATTGGAAGTAATTATAAGCCCCTACACGATAGTACACAATGGCATTGTT -GGTTGTAGATGACTCCCAGCCGCCTATTTGCCCATAAGGCGCTTCTGTAATTATATCTCA -CGCGTTCCAGTAGCGCCCCCTGATGCACAATGCGTAACCCAACCTGCATGCGTTTCAGGG -ATTCTACTGAAGCTAAAGCACTGCGTTCAGTTTCATCACAATGTCAGCTTACACATTCGG -CTATATTGCTATTCATAGACGCAGAGAGCGCCTAATACTCGCAATGCCCTAAGCCCTGGG -CTGAGAATCGATAGCCACAAGTTGAGTGGAATATACTGGACAATTCATCCATATCTGACT -CGAGTGCCAATGCCCTTAGGAAAATTGAGCAGAACATGTCAAGCCTATAATTAGGAAAAT -GCAGGCCTGAAACAGGATAACTACAGTATTCCAAATTGTTTCTAGGTCTAATTGATCAGC -ACGATGATCCTCGTGGGGTGCTTAGGCCCTTCGTTATTTGCGACTACTGTCATTGGCTGT -TTGAAGGTTCTTTTTTTGAGCATTCCCCTCATTCAACCTGCTTTTTCTGCATGATAAAGG -CTCGCAAGATTGAACATGTTTTGCCGAGAGCAGGTTGGCCTCTGATTGGTGTTGAGGATA -ATCCATGATAAACCGGCTGGATGTGTGTGAATGATTTTCTTGGGTACCATTGGGTGATGT -GACCGTAGTTGTATTGTACAGATGGGTTGATAGTATACTTCAAAGACAAAACTCCGTCCT -AAGCTCCTGCTATGGCTCGTACAAACAGTGATACCTATCCCGGTTTGCGGAGGGATCAAG -AGAGCACTGAAAAGCCGGAATGAACAAATTGGCCAAAATAAGGAAGTGAAAGGCGATGCT -GTGATATGTAGCGGTAAGGCACCCCGATTACCGCTTGGGCCATTGTTTCCAGCCACGGCC -GTGACCTCTCTGGTGCGCCGAGTCAGCATTATTCCCATCACTTCAAATGGAAGGACCAAT -CGATTACTAACCTGCTTCCACATACGGACCATCTCGGGCATACCCTCCATTGCCTTGCGG -CGATCCTCGAGACGTGATTCCATGGTGCGCTCCCACTTGTGACCCTTGACCTTGTGGCCA -ACACCAGTTCCCTTGACCTGCAAGCCACGCTCTGCGCGCCGTGTCTCTTTGAATTCAGGT -GATTTGCGACTAGGAGGAAGAAGTGCGTGCACATTGTATTTGATGGCCAGCTTCATCAAA -TCGGCTTGCTGCCGCAACCCATATTTCGGTGCCTCCCATTTGCCGTATCGCTTATGCGGG -AGAAATGGATTGCGGAGTGCATCACTAGTCACGAGGAGTGAGCGCGAAGGAGTCCAGCCA -TGCTTTTTCTCTCCTGGCGCACCCTTGGCATCGCGGTCGGGGGTGTATGGTGATTCCACC -TTCATCGGTACATAGCCCTCACCGTCGGCAGGGGCAGGGCGGCGGACAGCTGCGGAGTGG -GATTGTGGAGGATACCGAGCAAAGAAATTTACCAGACGTTGTGGAAGATTCTCCAGCAGC -TTGCCTGACTGATTCAAAGCCGACATGATGGATCTTAGATGGTGTTGGATCCTAGTGTGC -ACAGGTGGTTCCAAGATTGCTTGTGAATGTTCTGGTCAAAAAGTTGATACCGCCTGGGAC -TCATGCCAAGGCCGTGGGATTGTGGAGCCTTATTCCTAAGGTGTACACGATTCTGCCCGT -GGCAGAACCTTGGGAAGGCCTCCAAACCTTACCTACCTAGGTAGGTACCCGTCTAAGCCA -AGCTTAGTCATTGCTCTCACCGTCTCAAGCCACACCGCCCTCCAGCCCCGGCAGTGGGCT -GCATAGCCAACGATGCTCGCCTCAGGCATCAGACTTCCTATCTTCGGCTTTTCAGTCTTG -TTTCAGCCCCAAGAATTTCCTACCTTCACAAGGTTCATTGTTACCGTCGCTGCTATTTTA -TTCATATCTGCAATTTCATACCTATTGTCTTACTCTGGGGTCTCAACTTCGCGCGCGCAT -TCTCAGCACTGCGACCATGGCCGACTATCAGTATGGAGGCTCCGAAGAGGAGAATGCGGA -GATTAGAAATCTCGAGACCGAGCTGGTACGTGTTCTATATGTATTCATGAGCGTGTCTCT -CGATAACTGACGTGTCTATATCTTATAGCTTGATGATCCCGACAACTTCGAAACGTGGGA -GAAGCTGGTTCGCGCTGCTGAGGGCTTAGAGGGTGGAATCAATCGCAACTCCAGCCCCCA -GGCAATCACAACCGTCCGCGCAGTCTATGATCGCTTCCTCGCCAAGTTTCCTTTGCTCTT -CGGATACTGGAAGAAGTATGCCGACCAAGAGTTCTCCATCACTGGCACTGAGGCCGCTGA -GATGGTATGCAGACACACTCCTTGGGCACCAGCCTCTTCCTTTTCATGGCTTCATGGCTT -CGTACTAATTTTGTCGTTCTAGGTCTATGAACGAGGCATTGCTAGCATCTCACCATCCGT -TGACCTTTGGACCAACTATTGTTCGTTCAAGGCGGAGACTTCGCATGACGCCGATATCAT -TCGAGAGTAAGTGAGAGTAGAAACGGCCATTTTCCCTTTCCCTATAACCGTTACCGACGC -AGCCCACGAAAGTTCTCGTGATGCCAGTCACATATGTTTCCTGGATATTTCGATATACAG -CCAGTCTTATTGTTCATGGGTGAGAGCCCGCTCGCCGCTTTACTTCCTTGACCTAAGTGG -AGTCTACTTCTCACCCATAGCTACTCAGGCTGCATTCCTGCTTTGATTGGCCGAATCACC -ACTTTTTTGCATTGGATGCCAAGGAATAATACTGCTCGATGGCTCTGTGCGGAGCATTGG -ATGCCCACCTATCCCTTAGCATCATCCACCGCACTCATTTGATTACTTGGGTCTTCACAA -ACGGACAGGGTTGACTACTCGTGTTTTCCATCTCCATTACCCCTGTCCTGTTTCCTTACA -CTCCTGTCCCAGCCACCGAGTCCATGTGTCCGCAACTTGCTAACTTAAATACCTCCAGGC -TTTTCGAACGGGGTGCAAGCAGCGTTGGTCTTGACTTCCTATCCCATCCCTTCTGGGATA -AGTACATTGAATTTGAGGAGCGGGTCGAGGCCCACGATAAGATTTTTGCCATCCTTGGGC -GTGTCATCCATATCCCCATGCACCAGTATGCCCGATACTTCGAACGGTACCGTCAGAGTG -CGCAGACACGCCCGTTGTCCGAGCTCGCCCCAGCAGATATCATGGCCTCGTTCCGCGCTG -AGATCGAAGCTGCATCGTCCCAGCCGGCTCCTGGAGCCAAGGCTGAAGCCGAGATCGAGC -GGGATCTCCGACTTCGCGTGGATAGCTACCACCTCGAGATTTTCACGAACACTCAGACAG -AAACAACTAAGCGCTGGACATTCGAATCGGAGATCAAGCGTCCTTACTTCCATGTCACTG -AGCTCGATGAAGGCCAGTTGACGAACTGGAAGAAGTATCTTGACTTCGAGGAAGCTGAGG -GTTCGTTCGCCCGCGCTCAGTTCTTGTATGAGCGGTGCCTGGTCACTTGTGCCCATTACG -ATGAATTCTGGCTCCGATACGCTCGCTGGATGGCGGCTCAGCCTGACAAGGAAGAAGAGG -TGCGCATTATCTACCAGCGTGCCTCGTATCTCTACGTCCCCATTGCAAACCCCACTGTCC -GACTCCGCTACGCCTACTTCGAAGAGGTGTCGAACCGCGTGGATGTGGCGAAGGATATCC -ATAGTGCTATTCTCATGTATCTCCCTAGCCATGTCGAAACCATTATCTCGCTCGCCAATC -TGTGTCGTCGTCACGGAGGCCTTGAGGCTGCCATTGAAGTCTACAAGACACAATTGGATT -CGCCAGAGTGCGAGATGGCTACAAAGGCTGCTCTTGTTGCCGAATGGGCCCGCCTTTTGT -GGAAGATCAAGGGCAGTCCTGACGAGGCTCGCAAAGTTTTCCACGAGAACCAGCATTACT -ACCTCGACAGTCGACCCTTCTGGGGCAGCTATCTCGTCTTCGAAATTGAGCAACCCACCA -GCGCCGCTACCGAGCCGGTTCAGTATGAACGTATCAAGCAGGTCATTGCTGACATCCGGT -CCAAGAGTGTTCTGCAAGTCGACGCCGTCAAGGAACTTGTCCAAATTTACATGACATACC -TTCTCGAACGGGGCACCAAAGATACTGCGAAGGAATACATGACCCTAGATCGAGAAATCT -ATGGCCCTTCGTCTGTGGCTTCTGCTCGAACTGGCGGGACTGTGGCGGTGCACCCCACCC -CTGTCGACTCCCAGTTTGCTTCAATTCCTCACATTCAACCCACACAGGATGATGCCGCGG -CTGCACAAGCATATGCTTACTACCAACAGACCGCAGCGAACGGCACAACAGCTTGAAACC -CAAGAGTCATGCGCACGTCCCCCAAAGCCGATGACTGCAAGTATGGGTATGTTTGTTAAC -GAATGGGTTTGATTCTGGACGTTCTGCTGGTACCTTATCTTTCGGTGTTTTTGTCTTCGT -TCCTTACCCTGGCATATTGATTTCCCTGGGTGGGTGAGTTCCTTTTTCATGTAGTATCAT -TCCCGTTGCCCGTGCTGTGCAACATTCCCTTCTAGCGATTTTGACCCAGCTTAAAGACAG -CTTCTTCTCACTCTTGTCAGATGTGCTGGTGTCCAGACCCTTCTTAGGGCTCGAGACAGG -TTTATTATGATCAATTCGAGATTGGATCCTTTGGGACCAATTGTATTCTAAGGTGGCTTG -TCTTCCCACTGAAAATTAATTCAAAATGATTCAAATTGATTCGTCTCCGATTTCGGGTAG -TCTTGTTAGACTTCTACTCAGCAACTAGATTCTTCAGTGTGATAGGATGGGAAAAAAATG -CGCTTGTTCGAGTCGGAGATTTACATGAAGCGCACAGGCGGAGGCCAAATCCCATCCTTC -GACCACCGTGCTGCTTTATGAAGCATCTTCAATTCTGATTTGTTTCGATACGGCCGACTA -TTTGCCTCTGAGGAAAAATCTCGTTGGGTGAGACGCGCAAAAAGTGTCGGAGCGATATGG -TTGATATGCACCTTAGGTGGCTTTGCATTGTTTTCATTGTCAGGTTCTAATAACCAGACG -AATGGTCTAGGAGCGTCACACGATTGCAAGGCATCTTGATCCACATAGAGAAAGGAGTCC -CGTCGTAGACCAACAGGGAATTCGTTGCTGTCTCGCATTAAGGCGAATCGATCCCGGAGA -TCACCAGTGTCATCACGGGTGCCATCATCCTCTACCCAATGATGACCAAATTTTGCAATA -GCACCCTCATACACTAAGGTCCTAAAACAATGTAAGCCGTGTTGAACGGCAGTTTGGATT -TGCTTCTTATACAAGGGCCACTCCGGCACATCTGGAAAAAGGATAACTAGGCCGCAGACT -GCTGCTTCGGTGAGACTTAGGACATACTCCTGCCACGGCACATGCATAGCATATCGAATA -CTCCGTTGATCGTCATCATTAAGAATATTAGAAGCTACAGCACTGGCGTCGCTCATTTTT -TGATTCACAGCCCGGGCATTCGCTTGGACGTCTTTCAACTCTTGTTTGTAGCGGCATCAG -AGGCTTTGTGGGTGAGATCTCGATCAGCTTCGCTGAGTCGCACCCGCTCATTCAACAATC -TTCCGGCTTGGCCATGGACGACGCCGGCTACCACTAAGTGAGTCTCTTGGTATGTTAGGG -AGTTCTGTCTACATTGACTGTGACAAGACCATCCTACATTTGGAAAGACATGGTGGGTTG -ATGTTAAGAATGTAAATCCGAAGTGTGGAAGAGGCTAGACCTCCACAAGGGGCCAAGAGC -CGGGGCCTCGCCGCGCCTAAAGTCCTACTTACTAAGAGCAACCGCTCACCTAGGAGCTTG -CTATGCTTAGCAGAATAAGGCCCGTATAGGCCTTCGCCCCCGGCGGTCGCTTTTGGACAG -CTGGTAATAATTTCTTTAATCTATGGACTAGTTGAATAAAGCTTCTATCTAGGATATCCA -TGGATCTATAGATACTATAATTTAACTACAAAACGCCTTAGTTGACCTTAATAGACACTA -TAATAGTATCAGGGCCTAGAAATATAACCTTGGTAGATAGTGTGAATCCTATATACCAAA -ATTGTCTACAGGCGCCTACGGCGCCTACCGCCCGTGGGCGCATAGGGTGGTGCCCACCCA -CGGGTATACCTGTAGACCTATGGTAGCCCTCCAGCTCCACCCTGGAGCGGGCACCCAGCG -GAGGATGGGTTTACTATATAATACTGAGCATGGGTAACTAGGTACAACTAGTTAACCAAC -TAGGTACGGAATGGTACTGACATACGATTCGGTTACGGTACCCATCCCCAGTTCCTATAT -CTGGGGATGGGTAGCGTAACCAGTTTATTATATGATAATTGCTATTTACAAAATCATCTT -TATCACTTTAATTTACCTCCCCTCTTCGGTTCTTCGGCCTATCTATTCTTCCACTACTCC -GTGCAGTATACATGAGACATAGTACCTATAGAGAAATCAATTAATCACGCTGATGATATA -CCTTTTCTAGCTATATTTTCGAAGTGATTTAATCTCAATCCTCTAGTAGATACTTATCTA -GAGTAGAAGTAGCTATTCTAAGAATTCCTTATTAATTTAAGTGATATTTGTATAGCAAAA -TGATATATATATAACTGTGTATATAACTGTATTCTACCACCACCCTTCTTACTTTATTTA -ATAATTATAATATTTACCTTATAATAGTCATGATTGGTGTAATGTTTTCTCTTCACAGTT -AGAAAACATGCCCAGAAAGGTATATTGTAAGAATGATTAATTGATTTTTCTAGACAATAT -TTTATGTATACTGTAGTGGATGAATTGATAGGCCGAAGGACTGAAAGAAATACAGGTGAT -AAAGGTGATTTTGTAAATAGCGATTACTACATTATAAACTGGTTACGCTACCCACTGCCA -GGTACAGGAGCTGGGGATGGGCAGCGTAACTATATCACATGTGGGTACCCATGACCTAAA -CCCATCCTCAACTGTGATCCGGACGGCGATCATATGTAGATAGTGGATAAATAAGTCCAC -GTGACATGCACGTGACCACGCGTCTGGCTCTTATCATGCTCCGTAGAAATTCAATATCAC -TCCCATCTCGTTTCTAGTCTGGTGGTTTCATTAGCCCTCATTTCTCCCGCAGACACAAGT -CATGGCCAACGGCTCTGAGGGTTTTTCCCCGTTCCCTCCCCCAGATCCGATCGAGCGCCC -GCCCAGCCCGCCTCCACCTCCTCCAGAAGATTCGGCCGCACCTCCACCGCCACCAGACAG -TATCGCCCCTCCACCTCCTCCCGATGATGTGCCACCTCCGCCCCCCGCTGAGACTCAGAA -GAAGCAGAAACTGGGATGGGGTGCAAAGAGACCTACTGTCACTCCGTTGAGTGTGGAGGA -GTTGATACGCAAGAAGAAAGAAGCAGATGCGGCAACAGCGAAGGTAAGTTTGTCCTTTCG -GGATGATGAAATATGACACAGGATTCCGTGGCTTTTACTGACAGTCATCGTGCAGCCTAA -ATTTTTATCTAAAGCTCAACGAGAGAAGCTAGCGTTGGAGAAACGGGCTCAAGAGGTTGA -GGCAGAGCGACAAGCCAAGGCCCGCGCCAATGGTGCTGATCGCAACGGTTTTACATCGGA -ACCATCGTTGGCGAGGTCGCAAGACTCCCGTGATATCCGGGAATCAGCGGTCCACCATGT -TCCCACCGGACCACGATCAATGCGCGACGGGCCCCCAACCGGACCTGGCGGTATGCGGAA -GGGAGGCCGACAACATAATCAGCACCTTCGCGACCAGGATATGCCACCCCCACCTAAGAC -TAAAGGCGAAAAGCGTCTTACTGAAGAAGACGAAGCAGAAGCGCTGGCTAAATTGACCAA -GCATCGATATATGGGAGCCGACCAGACATCAAACTTCTCGGCAAAGAAGAAGCGCAAGCG -TACCGCAGACCGCAAGTTTAACTTCGAGTGGAATACCGAGGAGGATACAAGCGGCGACTA -TAATCCTCTATACCAGCACCGACATGAGGCCAACTTCTTTGGCCGGGGCCGTTTGGCCGG -CTTTGGCGATGATGTCGCCGATGAAGTCGCGCGCAAATACGCAGAGGCATTGGTGACCCG -CGACCAGGAAGCTGGTAGCGCACGAGCCAAAGAAATACTGGAGATGGAACGCCGGCGACG -CGAGGACAGTACCCGCACACAGATTGATAAGCATTGGAGTGAGAAGCGCTTAGATCTCAT -GCGCGAGCGAGACTGGCGTATCTTCAAGGAAGATTTCAACATTGCCACAAAAGGCGGCAG -CGTGCCCAACCCCATGCGCTCATGGGAAGAGAGTCAATTGCCGAAGCGTTTATTGGAGCT -TGTTGACCGTGTCGGATATAAGGACCCAACCGCTATCCAGCGCGCGGCAATTCCCATCGC -TATGCAGTCGCGTGACCTGATCGGTGTCGCCGTGACAGGTTCCGGTAAAACGGCCGCGTT -CCTACTACCTCTTTTAGTCTACATCTCGACGCTACCGCGGCTCGACGAGAACCCGTCTCT -CAAAAACGACGGTCCATACGCTATTGTCCTAGCACCCACACGTGAATTGGCGCAGCAGAT -TGAGATCGAAGCCAGAAAATTTACACAGCCCCTCGGATTCAACGTGGTCAGTATTGTCGG -TGGCCACTCGCTCGAAGAGCAAGCATACAGCCTGCGCGAAGGCGCCGAAATCATTATTGC -CACACCTGGTCGTCTAGTCGACTGTATCGAGCGTCGGATGCTGGTTCTCAGCCAGTGCTG -CTACGTTATTATGGACGAAGCAGATCGTATGATCGATCTCGGCTTTGAAGAACCTGTCAA -CAAGATCCTGGATGCCCTCCCCGTCTCCAATCAAAAGCCCGATACCGAAGAAGCTGAAGA -TTCAAGCGCGATGAGCCAATACAAGTACCGCCAGACTATGATGTACACAGCGACTATGCC -TGCAGCAGTCGAGCGTATTGCTCGCAAGTACCTCCGCCGACCAGCCATCATCACCATCGG -TGGCGTCGGTGAAGCCGTCGACACAGTGGAGCAGCGTGTGGAGATGATCTCCGGCGAAGA -CAAACGCAAGAAGCGCCTCGCCGAAATCCTGCAATCGGGCGAGTTCCGTCCCCCGATTAT -CGTATTCGTCAACATCAAGCGAAACTGTGACGCTATCGCTCGCGAGATCAAACAGATGGG -CTTTTCCTCCGTTACTCTACACGGATCCAAAACTCAAGACCAGCGTGAAGCCGCGCTCGC -CTCCGTGCGCAACGGCTCCACAGACGTTCTCGTCGCAACTGACCTCGCAGGAAGAGGTAT -CGATGTGCCAGATGTGTCACTGGTTGTCAACTTCAATATGGCGACCTCGATCGAAAGCTA -CACTCATCGTATCGGTCGTACTGGTCGTGCTGGAAAGAGTGGTGTTGCTATTACCTTCCT -GGGTAGCGAGGATAGCGATGTCATGTATGTTACACCCGTTCTTCGTCCTTTTACACTTTG -TCGCGGTCTCAAGTTACTAATGCTTCTCCAGGTACGATCTCAAACAAATGCTGATCAAAT -CACCCATCTCGCGTGTCCCGGAGGAACTGCGCAAGCACGAAGCGGCGCAGTCCAAGCCCA -CACGCGGTGCTGGTGGCAAGAAGATCGAGGACAGTTCTGGGTTCGGAGGAAAGGGAGGAT -GGGCATAGGCCTCTCTGTCTTTATCTGCCATGCCTTTTTTGTTTCTGTATTATCTTTCAT -TGATACCACCTTTTTTTTTAATATGGTGTTTTCAGTGCATGTTTGGCGGCCAGGTTGGCC -ACATGAGGTGTTTTGCTGGAAATCAAATACACCATTTATGACCCCAGGAGGATTTTATGC -CATTTGTCTCAACATCTACTTGTCTATTGCTCATCAATGTAGTTAGTTATTGTACTCAGG -GCGGATATCTATATACCTAGCTAGTCCTAGTCCAAACGAGCACTTTACAAGAGGTTGATT -GATTGAGTAGAGTATGATTTTAGCTCAAAAAAGCACATCCCGCGTGCTTTGTGTAGATCA -CAAGGTAAAAATAATGTGCAGTCGAAGGAAAGTCCGCTGCGAGCCCTGAAAGACAAAGAA -TCCTGGTGTTCCAAGAAGTCTATGAACATTGCTCATACAATTTTTTTTTGTTAGATTTAC -AGCTTGCTCTGGAGATAGCTGTTGCTGCTCAGCCACTGCTCAACATCGTCGCGGTCACGC -TTGAGCCAGTTGATCTTGGCACGGATGGAGTCAAGGCTCTGCTCAACAGCACGGTCGAAG -CCCTGCAAAGAAAGATCCGAGTTAGTAAATTTCACAAAAAAGATGGATGTTAAGTATAAA -ACTCGGGGAGGGCTTGGTGAAGCCACTAACCTTAGTATCCTTAGAGGCGAAGAAGTCCTC -AACGTCCTTCAACTGTGCCTCAGTGCAGAAACTGGCAGTTGTCAGCTGGACGACAGTGCC -CAGCATGCCAAGACCGGGAGGAAGACGCTTGTAGATGTCATCCCAGTTGGACTTGAGCCA -GTTCCAGCGGCCCTCAATACCAGCTGGGTGGCTGCGGAGGCCACCAAGAGGCATATAAAT -ATCCTGGCTCTTGACCTCCTCGTTCATGGCAAGGCCAAGAGTGCGGGAGATAAGCTTGGG -ATCCTCAGAGGCACCGAGGCAGCGCAAAGCAGTGGTCTTCTCGTCTGAAGTCGGGGCGTG -GCGGAACCGGTCCAGTACCACTTCATACTCCTTGGCACCGCCGTGCTTCAGGGCAATGGT -GAAAACACTGCCGCGGATGTTAGGGTGAATGGCGGAGAGGTCGCCAGCAGCGAAGCGGGC -GAACATGTCTAAGGCGGCCTTGACAACAATCGGGTCCTCGGCCATACCGGCAGAGCCGAA -CATGAGGGCCTTAAACTGTTGCAGAATGTGGTCATCGTTCTCGGGGAACTCCCAACCGAT -CTCGTGGGCCTTGGGGCCAACCAGCGAACGCTGGAAGGCCTTGAGGGCGTCCTTTGTCTT -GTCGTCCTCGAAGAGCCAAGCAGCGCGCAGGGTGCCGATACGGGTCAGGATCTCGTTCCA -CACGACGAACTCTGACTCGGTGTCGAGGCCCTGAAGCAAGGAAAGCATACCAGAGGTGCG -CTGGAAACCAGAAGCAGCGAGGGCACCCGAGTCGGCAATCATACCGGCACGGTCTTCGAC -GGTCAGGCGACCCTCACGAGCAGCCTGTCCGAGCTTCTTGAGGCGCTCAGGGCTGTATGA -CGTGCGGAAGATAGCAGAGTGATCGGCGTTGAGCTTGAAGAAGTCCAGGTCAGGAATGGG -GAAGTCACGCTCGCGCTCCGTCAGCATAGTGTCTTCATCGACACCCTTCTCCGTGCGCAG -TCCGAGCATCACAGGGAAGAGGGTCTGATCCTCCTCGGGACGAACGTCACCGGTGCGCAG -GAAACGGTTCTGCTTTACGTGAATAGAGGAGTTTTCCTTGTTTTCCGTGACACTGAGGAC -AGGGAAACCAACATTCTTGGTCCAGATATCCATGACTGACTGCACGGGCTTTCCGCTAGC -ATCACCAAGAGCAGCCCACAAATCACCGGTCTCGGTGTTACCGTAGGCGTGCTTCTTGAT -GTAGTTGCGGACACCCTGAAGGAAGACGTCTTCGCCCAGGTACTTGGAGATCATGCGCAG -GACAGATGAACCCTTGGAGTAGGAGATAGCATCGAAAATCTGGTTGATCTCATCAGCACG -CTTGACAGGCACCTCAATGGGGTGACTACTGCGCAGCGAGTCAAGGGAAAGAGCGCTTTG -CAAGTTATCGATAACGTAAGTCTGCCAAACCTTCCACTCTGGGTAGAAGACGTTGCAAGA -GTACCACGACATCCAGGTTGCGAAACCCTCATTGAGCCAGAGTCCATCCCAGAAGTCCAT -GGTGACTAAGTTTCCGAACCACTGGTGGGCCAGCTCGTGCTGGACGGTCTCAGCAATACG -CTCCTTTCGAGACGCGCCACTGTTCTTCTCATCCAACAGTACATCGACAATACGGTATGT -GATCAGACCCCAGTTCTCCATGGCTCCCGCGCTGAAATCAGGCACGGCGACCATGTCCAT -CTTTGGCAGAGGGAAATCACTGTCAAAGGCCTTCTCGTAAAATGCTAGAGTCTTGGCAGC -AAGCTCCAATGAGAAACGGCCGTGCTCGATATCCTGGTCGGGGGTGGCATACACGCGGAT -GGGAACCCTGAAGTTCTTTGTCTCGATGTAGTTGAGGTTTCCAACGATGAAAGCAACGAG -ATAGGTGGACATGAGGGGCGAAGTGGTGAATTTCACAGCCTTCTTCGCACCACCCTGGAC -CTCAGTCTCAGTGTCAACATCCATATTGCTCAAGCAAGTCATGCTCTTATCTGCGACCAA -AGTCACGGTGAACTTGGCCTTGAGAGCAGGCTCGTCGAAACAAGGGAAGGCCCGACGGGC -ATCCGTAGGCTCCATCTGAGATGATGCAATGTACTTAGTCTCTCCATTCTCCTTGTATGA -AGATCGGTAGAATCCGGCCATGTTATCATTGAGAATACCAGTGAAGGTGAGCTTCAACTG -AGCGTCTGACCCCGCAGAGATAGCCTGACCGAATTTGATGGTTGCGGTCTGATCATCCTT -CTTGAAGGAGATTTCGGGCTTGGAGTCTACCACCGAGCCTTTCGCTGAGATAATTGCGCT -GTGAATTTCGATTTCGTTGGAATTGAGGGTGACGAAATCGGTGTCTTCGTTCACATCGAG -GCTAGAAGTAGATGTCAATGACTGTGATAGAGAACTGCGCACAAATGACATACTCGATGA -CGACTGTTCCCTCGTATGTGAACTTCTCAAAGTTGGGTTCAAGAGTCAAATCATAATGCT -GGGGTTTGACATTTGTAGGTAGGACCTCACGGCCCTGGACAACTGTTGACCCGGCGGTCT -CACGGCGGGAGCTGCACATTCTGCGGTAGGAGCAATATCGTTTAGAGGTCTGTGATAGCG -GAGAAGGAGAGCGACGTTGAATTGATCTAGGGACTGAAGCTTGAGAGCGAGCGGAAACAA -AGAGGGGGAGGGGGGATCTGGCGGAAAGAAGCGGTGGCTTTGCTTGCTGCAGAAGCACAC -GCGTGGAGAAGCGGCGCATCAGCTCCACGAACCCGAATACCGGGagaaagaagaaagaaa -aaaaaaagaaaagggggaaatgaagaGATATTAGTATTAAATAATTAAAAAGAACCAAAT -GTGAGAACAATTGAACCACAGACTTGATACCAACGGTCAAAACAGAAAAACATGAGATCA -GAGTCCGGCAAATGCGAAGGCGGTGAACAGCAGTGGCtctcctacttttcatcctttttt -ttcttcttcctcttttcttttcttcAACACGCAAGTTCACCCAGTGGATGTCTAAATTCT -ACATGACAGACAACTGATTGGAATGGCTTTCATGCTGTTATCTTGCAAGACAATATACCA -GCAAGTTTGAGGGGCTCATGACAGTTTCAATCGATTCTCTTGGGTGGGCTACCATAGAAG -ACTGGAACATACAGCACCGCGGATCATTGCTCTGATGTCACAGTATGACAGGACAGTTGG -TAATGAGGCCTCTTCAATAATGACAAATGACAAAAGCTTCTATTCCGCTATTACCTATTA -CTTGTAGCGTCTGGTGGGAATTCCTTCCCAGTCACCATGACAAAATCTATCTCCCGAGTA -CATAAGAATCCAAAAAACTAAAGCAATCCCATTTTTATCTCAACATCCAGTATCACTGTG -AGTGTACTGCTTGAGCCTGTGGCAGTCGCGTAAAAACACTGCGTGACCATCCGTCGGAAT -CCACGACGGACTCCTTGACGAGAACATACCCACCAATCCGAGGCATCAACACGGCCCAAA -TAACATAGTAGATTGCTCCGGCTGCGAAAAGCCCCAGGGCTACCACACAGTGTAGGTAGT -ATGGCAGCTCCTCGTACACATTCTGGCCACCGCTGGGCGGCACGTATGGTGCAATGGCGA -GGTACAGATTGGACAGCATGAAGAAGATAGTGACGGGTAGGGTAGCACGGATTCCGGGCG -TCCAATTCGGGTAATTTTTCTCTTTGGTGAGGTAGATATGAACGAGGCCAGCGGCTACGA -AAAAGTTGACAATTGAGAGGGGGTAAGAGATTAGGCTGCAACAATTGGTCAGTAATGATC -TTGACTGTGCCTTCAGGAATCACATACTTGAGGAGGAAGTTGTATGCGTCACCGGGAGGG -GGTGCGAGCATGATAACGACCGAGACGATATAGTGCTCAAGCAAGCCAGCGGCTGGGGAG -TGGAAAGGCCAGTTACTAGCCCAGATCTTAGAGAAGGGCAAGACACCTTCACGGCCGAGT -TCTTGAACAACTGTGTCGAGCTCATATTAGTATATCGTTCAAACTTGCAATTGATAATAC -ATACTTCGCCCTTGTGAGAAGATGACGGACAGCACATTGCCGAATGCAGAGAGAGCCACA -AAGACCGACATAACTTTCTCAGCCTGTTTGCCGAACATGTTTCTGAAGAACACGGCGGCC -ACGATACTGCCAGACTCGAGCATTTCTTTCTTCGATACGGCGGCGAAATAGGCAATGTTG -ACAAACATATACAAAATGCCAACCGATATGATAGCAATGGGGGCTGCAATCTTCAATGTG -CGGGCGGGGTTCTTTGTCTCGCTCAAGGCCTATAGATGTAATTAGCAATATGATCCAAAC -TCAGAACCCATATCTCCATGTCCTTACGTAGTTGGCATTAGAGTAGCCAATGAAGGACCA -GATGACATTGTACAAGGCCATGACAATTCCATACCCGCTTCCCTGAGTGCCTTCAAAGGC -ATTGGTGAAATTATGCGGCGTCTCGGCCTTCGTATGGCCACCGAGTGCCACCCAACCAGA -AACAATGATGAACAAGACGATAATCAACTTGATAACACCGAGTACATTCTGAAGGCGGAT -ACCCCATTTCAAGGCAACGGAGTGGATAAGAAAGGCGGCAGTGAGACAGGCCATGCCAAT -TCCACGCTGGTTCCATCGTCCGACCTCAACATCAGCTGCGTTGAGGATATATTGGCCGAA -GACAACGCTATTACTTGCTGCCCATCCGAGTAACACGGCGTAGGCGGCAAACATGGCCGT -GGCCAGGAACTTGGGCTTTTTATAAACGTACTCGAGGTAGTTCTTCTCGCCTCCGTTTCT -ATACACAGCCAGGTCACTCAGTCATGTGTTCCCTTTGGTACTTTCGTCCTCAATAACATA -CCTTGGAATTGCAGTACCCCATTCTAGGTACACTGCCGTCCCAGCCAGAGCAATTATAGT -GCCGATAACCCACATGAACAATGAGAGACCGACACTACCGGTCAATGCAAGGATAGTACT -CGGTGTGGCAAAGATACCCGTGCCAATAACCCGGTTGAAGATGAGGAGGGCCGCACTGAA -AATGCCAATCTGACGACGGCTGTCGGGTGTCGCTTCGAAAACATTTCCTTCATCTGAAGG -TGAGTTAGAAGTGTCACTCGGCCATCATTCAATCCTCAAGACAAACATACACAATCCAGA -GATACCATCGGCTCGCCGGAGGCCATCATCTCCTGCTGACTTTGTATCAAGTGCGGGTGA -CTTGCTAGAAGCCTCGACAAGAGGATCAACCGGTTCAGTCATTATCTTGGTTATACAGAT -CGAGCCACTCTCCACACGGTGAACCTGATCGACGAGTATTTATATTCAGCTGAGCGTCCA -GAAGGCAAATTCAGCACGGCTAGCAACGGCGTAGAAATGAGGTGCTAGCACTAAATGTGA -TGCCATAGGTCACTGCGGCTGGGCAGAGTCTTATAGTCTTATATTACCTTGGAAATCAAA -ATCACAAGTTGCAAGCGAGGCATCACCAGATCGAGGCAAAGGTCCTAGCTGGATTTGTGG -CTTTTTTTTCCCCTAGAATGCGGCCCACTCGTTTACAGTAATTCCAGAAGCTTTGGGGAA -GCCGAGGAATTGCACCATCACGCACCATTGCCTGGGCAGGCCGAATATCATTTCTACCGT -TACATAGGAAGTTGGCCCAATTAGACCCTCACATTTGCCTTGGAAAACAAGAAACAACAC -ATGATTCATGGCGCCTGTATTCACCATCATGGCGCAATATAAAAAAAAGGAAAGAAAAGC -CTCGGACATCGGGGATGCCGGGGTTCAGTGGGCTGCCGAGAATCCATTCAGGGCCGGGCC -GTGCTAAAAAGGTGTACGGTACGGAGTAAATGTGGTCGATATAAACCCATCGTTATGTCA -TTTGTTTAAAAAAAAAAAAAGGTACAGGTTAAAGTACATCAGAAGCTGTAGGTCAATGGC -ATCCCCATGGGACAGCATGGATGTTAACATCAGAGAATATCCATGGACATGAGGCAAAGC -CAAGAGAGTGCCGAGTTAAATAAAAATTGAAGAAATGGGAAAACTATGACACATGGTCAA -GCACGCCATCTGTAGGGTTATTTGCGTTTGATGCCAAGCTTAGATCCATGTTACCCTACT -CCACCAGGGACGCTCCACTACCCTACTTCATAGTCCATGGCTTGATATAATTTCTTTCTC -TATGTACAATACATCTGGTAATATCTGCATATAATCCGGGTGTACAACGCCGTGACCATA -TCCCCAGCCCAAGAAGCTCAGTGCCAATCTGTATACCGTGGGTTCCCGGGTCAAACGCCA -ATAACGAAAAGATAAAAAAAAGCAAGACATGGCATAACAAGAAAATAAGAACTCCCCTGG -TCTCGTGCTGTAGGATATATGGCAAAACATGAGCTGGAAAGAGAAATTGACGAATATGAA -CGAAGAAAGGAGATCTCCGTGGAGAAATAAGTGGCTGATAAATGGGAAAAGGTAGGAGGA -ACAGGGCCCTGGAAGCTAACCAACGGTTACCTTATATAAAAGGACGGGGGGGAGCTCAAT -GGTGAACGTCGCGAGTCAAGCCTTACACATGAAAGGCTATACTCATGACACTACTGGATT -GAACAATATCGTAAGAATCCCAATCAATTGGGCGGAATTTGGGGGAAGACCAAGGTGCTC -ATCTAGGCGGTGTCACCCCCGTAGGATATGCAACAGGGGAAACGGTAGAGGTAGGTGTCT -TGCCCGCACTCCCCACAGGAGTCGGCACCGAGGCTCGCCTGGATAGAACAGCTACCTTCT -TCCGGACAGATGCGAGACGGCCACGCATACTGGATACAACTGGATCAAGGTGTGAGTTTG -GGGTAAGCTTCGGGGGTGGATTTAGAGAGAACTCACGTTTCATAACGACCTGCCGATCTT -CGACCTGCAGCTCGTCCAAAATAACCTTGTCGTTAGGTTGCGAACCGCTTTGCCGTTCGA -GATTGCTACCTGGCATGGACCGCGTTCCGTCATCCTCAAGCACAGCTTCGAGCATGCGAA -TCGCAGTGACATATGTAATTTCACAGTTGGCAAGATCTTCGCCAGTCAATTCATTGATGG -CAGCCGCCCGGCTCATCTCGAGTGCTCGGTCGTACATGAGCCTCTCGGCTGTCACGCCAG -GGCTCACCACAACGTCGGCGGTTGAGCTTGAGCCAGCGGTGGAGCCAATGGAATGGTTAT -CCGGATGGCTAGGGTGATCGGGTGGCAATCGCCGTTGCCCCTCGATGAGTTTGAGACGTA -CAAATTCGGCTTTTTCGATGACCTCGTTGAACCGACTGCGTACCCATTGCACCACGTAGT -TGATCCTGGTACCGGCAGCTGCGCTCATGCTGTCTTTGCTAGGAGACTCCCCGTAGACCT -CTTCGCGGTTCTTGCGCGACCACCAAGCCCCGGCGATATCCATTGATTTTGCAAGCAAAG -ACAAAGCCTTGACATATAGTACCAAAGCCTCCTCCGATAACGTAACAATAGCATCCACTG -TCAGACCATCATCGGGGTCGGCAGAGTCTGGTTCAATGTTTGTCTCCATCGGTCTCGCAC -ATGGGTCAGTTGGTGCCGAAGGTGCTAGCGGGATAAGCTGTTTGTATTTCACCTCAGCGA -ATCCGAAGACAACGTCACTACGCGTAGCACATTCCTCGATTTCCTGAACTGCTCTGTAGT -CTTCGTCGACAGTTGCATTGGGTTTGGAACCATCTCCGGTGATCAGAAGACTAGCATTTG -CAGCAGGATAGGCAGGGAAAGGGTTGTAGGCTAGAGGTGGAGATCGCCCGCCTTTGGTGA -TGGCAAGGGGAGGAGAGAATCCCATTCCGAACAGACGACCGCTCGCCATGTTAAGTGCTT -TCGAGATTGCCGAGGTCGCTGAAGTAGGACTTTGCCCGTAACGTCGCTCGTAGGAATGTT -GGCGCTGGTGTGTGGAATCTGCTCGCGAACGACCCGACACAACTTGCATGGCTTTGGCCG -CATTGGAATGGGGAGAAGATGGCGACACCGTAGATAGACCCTGCGTTGTTGCCCGACGGC -TTCCGGCGCCAGGCTGTCCTGGCCGAGAAAGCCCGCTTTGAAGGCGTGGGCTGTGGGCTA -GTTCATCTGCAAAGGCGTTGACCTCCACGGCTCGCTTTTCGACCACAACATAGTCTCGTT -CAAATGCCACTTCTTGTGCAGCGCGATCTCGTGCCTCCTGGGCACGGATTGTTGACTCAC -TTACTTTGGGAGAGCCAGTGTAGGTATTTCGATTTCTCTGCCGATCCATAGCCGCGGTAG -TTGCGTGACGGTCGATGAGCTCCTGGCGTCCTGGAGCAGTGGCCAGCGTGACAGGGGCAG -GTCGTTGAGGTGGGTTACCTCCCGGTTGTTGCTCTCTAGAAATGCCTGTAGCAGGCCGGT -CAGCGCTTCCCACACGGCGCATTTGTGTAGCGGTTGGTGGCGTTCCTGATTTTCGACGCG -TTGAAACAGGTCTTTGGCCAAGAGGAGATGCAAGTTTGTCTTCAGGCCCTGTATCATAAG -GAGATTCTTCGGTCTTTGCGACAAGCTGCCTCGGACGGGGCGATTGTTGTGGCCCTGGAG -CGTCATCCGCCAACAAGCCAGGAATTGGATCCTTTATCACATTGCTGTTGAAGAAATCAG -GGAAGTTTAATCTCTCTACTGGGTTCCGTTTCAGAAGACCTCGAATTAGCCTTTTAATAT -CGTCTGACGCGGGGTTATCTTCAGGAAAGCGGATTCGATCTTCTCCCTTCTCAATCTTTC -GCAGCAACTCGACGTGATTTGTAGCCCGGAACGGGGGTCGGCCCACCACCATCTCGTAGA -GAACAGTACCAACCGACCACAAGTCGGCTTTGGCATCATACTTCTCATACCGCAGAATCT -CTGGCGCCATGTACAATGGAGATCCACAAAGTGTCTCTGCTAGTGAAGTGGCGGGTAAAG -ATCGAGCAAAGCCGAAATCTGCGATTTTGAGCATGGGGAGGGACTCGAGCCCAGTCATGG -GTTCGTAAGAATCATCGTTGCCTTTATAGGGAATCACCTGAGCAGCTCCATTCCGATAAG -ACGAAGGTGAGGGACACAATAAAAGGTTCTGTGGTTTGATGTCTCGATGAATGAGGTTTC -GGTCTCGCAGGAATTTCAGTGCGCTTGACAGTTGTTTGAGGAAGTGCCGGGTGACCACCT -CGTTGAGCGAGGCGCCTGGTGCATTTGGATACTTTGCAATCATATCCCGAGTGTATTTAT -GTGATCCAAGAGTATCTCGGCGCTTGATGAACAGCGACAGATCCCCCAACGCACAGTATT -CCATAACCAAGTGGATGTGAGAGGTTGATTCATGACAGTCGATCAGGGCCACAATATGTG -GATGCTGGAGGCCCTTGAGGATATCGATTTCGGACGAAAGGTTCTCCTTGAGCTTTTTGT -TAAGCTTTGACAAGTTAACGGACTTGATAGCTACATATGTTTTTGTTTTCTGTGTGACAA -CAATCGATTAGCATCCATCACTCAACATATGAGAATAAGGCACGACTCACCGTATGAAGA -CCTTGATAGACCGTCGCGAAACTACCTCGCCCGATTTCATCAAGCCGCGTATAGCGACCG -ATAGACATTTCTGGACGAGAGGGCTCACGCGATCTCCTTGAGTTATGAGTCGCCATAACG -GCGATGGGGACGCCAATGCCTGTTCTGTCTCTGGTAGACTTGCGCTGGGAGGAATAGATA -CCGAGTCACCCCAGCAACCACAACTACCTCAGCAGCAATGTAAACGTTGGAGTCGAGAGG -CCCATCAGGGTAGCTATGCAAAGAATGACGCTGCTTCCGGGAAGCCTGGAAGGGTCGAGG -AACCACCGGGAGAATGGGTCTGGGAAGGTACTTTCAAGGGGTGCCGTGGGACGAGACCGG -TCAACCCGGGTAGATTATAAGACGTGAGATCGATGTAGAGGATGTACTTGAAGGAAGCAA -ACAAACGTTGATCCAAAACACTTAAAGCGGTGAGAGATGTTGATGCAGAATAGCCAAATG -AGGGGGAATGGTTGGAGATTCGAGTATAATAGAGAACCAAAGATCTAAAGGGGCGAAATG -ATAGTACTCATGGAAGAGCAAGATGATGTGATGGTGGACGCGGGAAGAGAACCCCAAGAC -CCAAGGATACTCCACGATTGGGGGGGTGTGACCCAACGATAACGAGTGACAAGCGGCGAT -GATCGCGTAAGGCGGTCAGAAGGGCGGAGATCAGGAATTCCCGATAAGATGAAAACGTAT -GGGAAGAAGCTTAGAAATCTAAAGCAAGGGATAGATGATAGACAGATGATGATTAAAAGG -GATCAAAATTATGAGAAATTTAGGATACAACCCTCTTCAAAACATCAAACTGCGAACAGA -AGTCCAAATCAGAAAGATATAGCCCTGGAGGTCATGTGCTTATCGATTAGATAAGGCCGA -GATCGGCACAGATACCGAGAGATCTCCAGTATTCCTCACAAAGTACCGGAGACTTGTGGG -TAAAGAACATATTCTGCGGGGCATGACTCAATATATAGTGTCTAGGCTCTGTCAAGGCTT -CTCTACTCCGTACATATGTATTCCTCAATATTCCCGCATCTGCCTGTACTTGCAAGGTTG -CAAGGTGACTGATCACAAAACCTGGTGAAACTGTATGGTGACGGAAGCTTATCTGCAGCC -TATGAGTCCCAGTCATGGCGCAGCAGTTAGTCGAATTGAGGAATAAGGGGGGCAAGCTAA -GGCAAGGCAGGTATGCAATAAAGCGCCGCTAGTATAGGCATCCAAAAGCGGTCTCTAGTT -GAGTAGTCGAGGTCAATCTATACATCCTCAGGCTATGTGCCCCACTATACTATACAAATG -CCGCAGGAATATGATAGGCAGATAATCACGCGCAAACCTCCGAGCAAAAAGAAAAAATAA -GGGCAAGAAATGAAAAACAAACAAGTCGCGCAGTTATATACATCAACGTGGCAAATCAGA -CGCCGAGACCCCGCACCAGCAACCATAACACCACTGGGACCACGACGTTGTCATTACAGC -CAGTGAGTATCGCCTCGGTGGCGCTCGTTCCGGCTGCCGCCAGAACCGCCTTGAGCAAAG -TACTGGCCCAGAAGTGAGCGGAAAAGGCTGTCCCTTCGGCTGATTCGAGCCCAGACACAG -GCCAACCACCTAGCACCAACCAGGCACGAACCGCAATTAGTCCGCAGGTTACAGCTGCGA -CAAAAGCTACGCTGCCTTCAAGTGATTTGCCGCCACCCCAGAACCATTTGAGCCGACCGT -ACCTTCGGCCAACAAGGGATGCGGCCGCGTCGCCCATGCCAACACAGATAACTCCGCTAG -CCATGCTCACATCTCGTGACGGGATCTCCCAACCAGCCCATGGACCTAGGCCAGTCCTAG -GAAGATCGGAAAGCGACAGCCAAAGGGGTATCGAACATCCAATTAACAAGAAGATGTGTG -AGATGATGACCGGGCCACGGTGATCACGGCCGTCAACATACGGGGCCAGGAAATAGGTGA -GTGGTCGCGAAATTGGGGGTAGCTGAGACGCGCGAAACAGGTCTAATAGAAGGAAGATGG -ATAGAACCAAGGCTAACGCCATGGCGCAGAAGGCTGGATCCACAAATATAGCAGGAAGGA -ACATAAGCACCATCATGCCGTGGAAAATCTTGCGGCGCGTATCGACCTCGGCAACAGAGG -TGAGTTTAACAACAATGGCCAGGCCGGTCAGCAGTACCAACACGCAGTAGCCACTGAGTA -GCAGTCGGGTGATGGCAGCCCCAAAGGTAACCTGGCGAATGTGTTCCACCGATCCGAGGA -AGCAGGACGCACTCAGAATATCGGCATCAGGGCGAGGTGGCAGTTGAATCCAGTACTCAA -GGTTCAACATGAGGACCCAAAAGCGGATCTTCGAGAAATTTCCCAAAAGATAGCCGATAG -CCCAGCCAAACGGATCCTGGCCTTGCAGTGCGTGCTCTGCCACGTACTTTCGGATAGGTC -CAAGGATGATGATCAAGCACAAAACATAAACGTAAAAGGCGTACAGCCATTTGCGGACCT -GCGCCTGGGCTGAAGTAAGCGAGAGGAAGGAGGCCAGGCCAGGACCCATCATTTTCTTGC -GTCTACCACCAGGAGTAGTGCGAACACGTTCTTCTCGAACAACTTCATCAAATGCAGATA -TGGTATGGCGTCGTCGATGCACATTGCGGGCATGCTGCTCGAATATCTCTTCCGTTGTGA -TCTTGTCAACCGCAGATGCTGGCTCTGCCGGCGGGTGGGAAGAGCGACTCTCTCGCATAG -CCTTCGTCTTGCGCGGCCTTGAAAACGGGAACCCATTGGGCCCATCGCTGTCCGACGAAG -CATCGTCCGATGGAACGTGCCCCGTTCGACTCAACTTCTCGCAAAGTCGATGGTCCATCA -TGTTCAAAAATTTGCGGAAAAAGTGCGAACCATGCGGGGCACGGCGGAATTTCCAGCTAG -GAATTCTGGCTAAAGAAACCTCCCAACGCAGTATATGTCGACAAGAAGCAAACAAACACA -TGCCACCAAGCCATAACAAGGCTTTTAGTATCTCCATCTGGGGAGAGGCCGCAAACAAGT -ACAAGTTGATCAAAGCAGTTGTCAAAAGCTGCAGCTCAGCGGGAAGAATGCTGGTCGTCA -ACAAAAAGTCTAAAGTGGGTATCAACGCTTGATGCAAAGGGAAAAGAAGGATTAAAAATT -CCGCGTCGAGCCCGCGAAGCGTCAGTGGTTTGGGCACTGTGTGGTCCAAGGAGAAATGTT -CTGCAACAAGAATGGGAATCATCGAGATCATCCAATGGAGGGTATTGAATCCATGAGTCC -TGTTCTGGAATGGTATAAGAGGCGCCGGTAAAGAGGACAGGCTGAGGATGATATTGGGCA -TGATAAGTGGCGGAAGATGGTTGGACAGCGACAATGAAACGAATATGGGGATAAGAATTG -GATAGAGAAGAGGAGCCGGATCGAAGCTCGAGGGTATAGAGAAGTATGTCCGTTTCGTAG -ACCAGATGCGCCAAATGCGCCAGCTTCGGCCCTGCGATGCACGAACAACGAGCGGATAGG -CGGCATAGAGGCCCAAGACCACGAGGCCATGAGCAGCAATCTCTGCAAATCAATCAGCGC -AGTCCACTTTCTTAGTGGTATTAAATTTCATTTTCCACTTACCCTTTCGCCATTCCCAAG -CTAAAGAGCGCGCTCCGTCCTGGTACAAGACCACACCACCCACAGACAGCAGAAGCGTAG -CCTCCATCAACCGCCGGAGGACCTCTACCCGTCTCTCGCGTCTAAATCGCAGTCTCTCCT -CAAATGCTTCTTCCTCAAGTTCCTCCTCCGAGCTCCGCCGCGATGGCCGGAGCATAGGTC -GCACAAATGATGGCCATGGTTGCATGCCAGGTAGCCATGAGTCGACCTCGGTGGGACCAA -CCCCGCCAGATCGTAGGCCTTTTCGAGGATGGGGTGGTGGGGCAGGTAATCCCCTTAAAA -CACCGGTGCTTTCATCGTCTGCCTCTGTGCCACTGTCGCTGGTTGTTCGCGAGGATCTGG -AGTAGGAGGAGGGCGGGTGGAGACGGTCGCCGTGCTCGGAGGGAGTGCATGATCCGAGCC -GCCGGCTTCTGTGATATGGATGGGGGGAGCGATTGAATAATCGGACGTCCTCGGCGAATG -CTTCGGGGGTGAGAGGTATGCTGCTGCGAGGCGTCGCTTCTTGTTCTTCTTTCCACGGCG -AACTCATGGCGCCATGCTTAGGTCGATGAATACGTCATGTCGCGGTGGGATAGTGATGTA -CAGAGTATATGGAGGATGAGAGGACTCCCGAGAAGAGAGTGGAATACCTAACAATTAAAT -CCCGTGTATTTTGGGTGTCGGCAAATATTGAAAACGTGGCAGAGGGAGAATGACTGTTGC -TATCTTTCCTGAATGCGGAAGGGATTTTGCCTATCACATGCTGGCGGACAGTGCGCGCCG -TACATCGATGTACGGAGCATAACGCGGACGATCGACGATCTATGATCGCTAGGTATTATC -GAGTTTGGTACTTTGTCAAATCCAGTTGCTTTATTCTTTGTTAAGTAGTTCCTAACTAAG -CTGCTGCAGGACCACACCTGCACTCAACCTTCGTCCTTCTCGCCCCCATACGGATCTGCA -TGGCACAACCTGTCCTGGCCACAAGGCTTCAGGCACTGCGTGTGTGATGTATAATACAGC -CTGAGATAGACTGGAATCTCGACAGTGAGCCTCTTTCACTGATCTCAATATTTGCTTCAA -TGTATCGTCCGACGAACCCGCGTCGATCTCGATATTGGTATACTTGTAATGATTGTCCAT -GGTAGTGGAAGACGTCCCACCTTTAATGTCAAGCTTGACCTGCTCACATCGGCCTCCCAG -CCCTTGCCATTCAATATCACTTCCTCTTGGTAGTTCGTCCACTTGAACAGCCAAGAATGG -TGGAATGCAAGTGTGAGACTCAACAAGAGCAGTAAACTTGGGCAGACTCGGCACAGGGGC -ATCAGTCAGCTCCTCTGCCTGGCGGCCATATTTGATATCCCACGCATCCAGCTGTGGCCC -ATCGTCATCATCCTCATCTTCATTGTCTGGCAAAGCATGCATTCTTTCCCAGAGGCGCCA -TGCGACTTGCGCCTGGGTATTAGCGTGTTCTCCGCGCGCCAAGAAGGCCACCGCACCGAG -CCACCAGTTTACCTGCATGGCTTCACCAATTCTCCAGAGGTGCTGGAGGGACAGTACAGC -TCGAAGGCGGTAATTTTCAAGCCACGAACCCGGCCCCTCCACTTGCGCCAAGTCCATAGA -AGCAGGGTCTAGAGGAATCTGGCCAGCAATGTAAACGAGTTTGCTAGCGTTCTGTAGCGG -AACTGACAACGCTTGGGAATATGGTCCAATATTTGCGGGGGCCCAGTACGACCTGGATTG -AACATGCAGGCCCTGGCGCCGGTCCCTCGCTCCCAGGTCGACCACAAATGTGACCATCAC -CTTGACGTTGGACGGGAGTCGGTCTCCGCATGCCACTGTAACTCGTGCGGGGGGGTTTGG -TTTCTTGAATAGTGAGACGTAAATATCATTCATTGATCCAAAGTCGGCCATTGAATTCAA -AAGCACCGTAGCGAAAACAATGTCGTCTGTTGATCGGGAACCCGACTTGGGGTGAGCAAA -AGACTTGAGAACTTTCTCAACCTTATCTGAAATCGCTTTCATCTGCTCCCCTGCCCCAGG -ACCAGCCTCGGGGGCAACAATATTCGAAATAGTCCACGTAGAACCAGACAGCCGATGGAC -CGGCTCACAGACAGGCCAGTCTGGCGACCGAGCAGTACTCGTCAACGCGCGCCAGTTCGA -ATCATCAAATGACTCAGAAAGATCCAATGCAGATCCCAACACACCCTCAAAAACTGCATC -TAATAGTCCGGGTCTCCTAATATCACTCGGCACAACCCCATCGCCAATCTCCCACGAGCT -CTTAGCCGTACACCTAGCACCCCTCAATCTAACAAACGCAACACCCCCTTCCCCAGCCTT -CTCCTCCCGACTACCAACCTCAATCCTCTGCTTCCACAAGAAACCAGGCCCATCCAGCGC -AAGCGTCTCATACTCTCCGCCTTCACCCAACACCGCACCACGGATATCTTCCGCAGCCGC -AAACCGACTCATAGCCTTAACAAGACACCTTCGCATCATGCGGCCACCACTCCCGGCACC -AGACACATTCTCCCACAGAAAGCCCTCGTCCAGACCCCCGGAGGCGACCTTGATAATACG -AGCGTCACATCTGGCGGCTGCCATATCCTCCAAGAGCCCCGCCTGGCGTATTGCCAGAGT -ATCTGCCGATCTGGCAACGGGAGCTGGAAGTGACGGATACATCCATAGCCAAGCGAGGGG -TACAAGGCCCAGACGGTTAGCGACATTCTCGATTCGTGTGCGCTGGTATGTAGAGAGGAT -AGCACCTGCAGAGACGGCGTTTGCTTCTGGATGGGCTTGTTTTATGCGTGTAAGGAGCGG -GACGAGAGACTCGGTCTCATCTTTATCGGTTGTTTTTGTTCCATAGATGCGGGATGTGTC -GACTGCGCCGCCTGTGATGGGTTGGCGGTAGAGGGGTATTTGGAGGGCTGTTTCGTAGAG -AGGGATTATGCTGTGACCGATGGTTTGGTACATGAAGCTATCGATGTCTTCTTCTTCATC -TTGGGTGGGATTCCTCTTGTTCTGCGGTTCTGGGTAGAGGTTGGCTAAGGCGACCACTTT -ATGGCCGTTGCGAATGCAATGTAGGATCGAATAGAGGGAATCTTTTCCGCCGGAGATCAA -GGCGATCACGTTAAGCCCTGAGGCTGAAGGCTGTGAAGACATGACTGTATCCCCAGAGTT -CTAGAGTCCAAGTTTTCGCAGATTTGGATGGGTGCGGGTTTGATGGGCGGGGGGGACTTT -TTGCCGTCCCCACCTTTGCCCGGGTGAGTCTATCGACAATCGATAGACCGATAAGATGGA -AGTACATCTCGAGGAATCGAAGGGAGTTAACATGGATGGAGCATGGCTTGTTCATCATGT -TTGACTATGTGGGCCAAAAGCCCTAATCCTCCTTTACTATTACTACACTGTTCAGGTTCA -CAAGAGGGTAAGAATATTTTTTGTAATTTGATGTATGTAAACCAACATCTAAAAGATGTG -ATCTGTAACCAACACAAACGAGCGTTGCAATACAAGAGGGTATGAAAAAAAAATAAGGGC -AAATGGGGGGTCAATTAACTTATACACGAATGTGACCAGCACAAAGCAGATAAAAAATCA -AAAAAAGGTACCAAAAAATATTACATCGAATAGACAGAACCAAGCATGAACGAAATGGTG -GAAGAAAAAAAATGTTGAAAAAAGTCACAGAGAAGCGAAGTAAACAGGGTACACGAATGG -AAGAAACCCAAAGGTTCAAAAGTGTGAGGCACATTGCTCACGCCGAAAGTGATGGTCCAG -GAGAGATTGGAGAGaacaaacataacaaaggaaagaaaaaaagagaaaaaaaacaaGGTG -ACAAAGGAAATAATTCCACACCATCCCCAGGCCCTATCAAAAGATAGAGGCCATTCGCTT -TATATTTCGTTTTTTTTTTTCTCCCTGGACTGTTCTGCGCCCATGTGCCTCCACCAAAAG -AATCGATTCGAGGTTATCCCAGTGTGTTCCCCGTGCGATGTGTCAAAACGCTGGAAAGAT -AAAAGGAAACCGAAGTTGCTGTGCACCGATGATCCAGTTTCCCGACACCAGTGTAAATGC -CCGGACGCCAAACCCAAAGACCAATATGATCGAGCTTCCAATGAAAGACTCCGACCGAAG -AATGTGTGTATTATATATGCAGTAGGAAGTCTCTTGGTCTCGTAAAAAAAATAATCATGA -AACGCAAAGTCCCTTAGGACAAGTCACGCTTGCGCTTCTTCGCTGTTCGGCGATCCACAG -AGGGGGCACTGGTGATTTCAGTCGACCGGGCAGCCTCGTACATGACTTGGCCAAGGGCGC -CAGCATCATGTTGATCATCAAATGATACAGAACCAGCATCACTTGCGTCCAGGATGCCCA -TGACAATGGAGCCACCACCGTGGGTGCGGGCGTGCTGGGCGAGGTTGTCACTGCGGGAGA -ACTTCTTGCCACACTCGTTGCACTCGAAAGGCTTGTCCTGGGTGTGAAGAGAGCGGTAAT -GGCGCTTCAGGTGTTCCTGACGACGGAAGCGACGGGAGCACAGGGTGCAAACGAAAGTCT -TGGAAGGATCCTCAGTCAAAGACTGCTTGCGACCCCGGCGGTTGACCGAGATGGGAGCGG -ATTCTGAGTCATCCACCGATCCATCATCAGAGCAGCAGTCGCCAGAGGACTCGCAGGCGG -CCAGCGCCTTTGCTTCAGCCTCTTCATCGGCGGTGATCTTTCGGGACTTGCGGTTGCTCC -GCTTCTTGGTCTTGGGCTCCTCGATGAGTTCCTCCTCGCTTTCGATCTGCGGCTGGGCCG -TCTGGGTCCACTCAAATGAGGGCAACCCGTTATTGGGCATGAAAGTCTCGGAGTCATCCG -AGTCTTCGAGGCTGTGCTCGCTCAGGAATCCATCATCCTCTACTGGGTAGGCGTGCATAC -GCTGGCGCTTGTCACCCATGTAGATGGCATTGGCGGCAGGATGGAATTCCACCAGACGAT -TGAGCTCATCATCAGAATCAAGATCGGAAAAGCTGTCAAAAGTGGGCAGCGAGCTCAGCG -GGTCCTGGGTCGAGGAGCTGAAAGAAGGAGACGGGTTCTCGTTGACTGGGAGCGTCACAG -ACGCACTGCCAACAACGGCGCGAGGTTCCTCCTCCTCACATGAGAGATTCGGTAAAGGAG -GAAGATCCTGTGCGTGAACATTGACACACGGCTCTACGGTGAGCTGGCGGGGGTCACAGA -AAGACTGGCTAGACCCAAGAGACTGGGAGGCAAGAATGTCGGTGGGCACgggtgaggggg -atggggaaagcgaggggcaggAAGTTGCAGACAAAAGGTCAGAAGAATGGATGTGAGTGA -GAGAGGGTGGGTGAATGAAGACTGCATCGTAATTAGCCTTTTGCCCTTGCTTCCCAGTAT -CCACAATTCATATCAAAGTGTGATCTATAAAAACTTACCAGGCGTCATCGGAGGCGAGTC -AGAGCGCGCCCACTGGGGGGTGGCTAGGATTTCCTGGTGAACATCGGTCTCACATCCTTC -CTTAACGCCCTCGACCTTGTCAAAGGTGAAGAACGTGTCATGAATCGGGGTTGGTAGCGC -GCCATGGGCAGATGGGGGGCTGCTGATGGTGCTTCCCGAGGCGGATAGCGGAGGAGTTGA -TGGGAACGAGTACAGGTCGTGACCAATAAAGCGAGTATCCAAAGGCATCAACGAAGGAGA -GCCTTGTTGCACAATGATGGTGGGCTTCATTTGCGAGGGCTGGGGGGAGGCAATTGGCGT -CAAGCTCATGGTAGCTCCCCGGAAGGCATTGGTGGTGGCCATCTGGTGCATGTTCATCAT -GGGCTGAGGGGAGTAGAGCGGTTGCTCATACTGCATTTGGTTGTAGTATGGGACGTCTGA -CTGGTAGTGACCTTGTCTCTGTGAGTCGTGGTAGTATGCGAATGGGCCTTGCACCGAGGT -AGGTGCCATATATGTTGAGTCCATTGTGAGTTGTGCTATACCCTGTGTTAGAACCGCAGA -AAGGtatacatatatatatatatatatacataCCTGTATGAGATTTGAGTTCCAATTGAG -AAAGTTGGGTGAGATTAGTAGGATTATAGTGATTCAAGTCCGAATGAATGAATGGACGAT -TAGGTGTAATGAGTGTCGAAAAAGGAAGGACAGGTTCTGAGGAACCGGGAGACAAGAGGA -AGGAGAGTGATATAGAAAGGATGGAACCCCAAAATAGTTAAAATCAGAAAAAAAATAGAA -AAAGAGTGTGTGAGTATAAGAAAAATAGACAGCGGGGAAAAATAAAAGAGAGCAGGGAAA -GATAGCAGGTTGCGGTGGATAAATGAGAGTGATCTACTATTTTCTACTTGCCGGGTCACT -AGTTGAGGTGTGGGCCAGGAACCGGGTGCTTTTTCTTTTTTTTGATTTAATTTGTCTCAA -GAAATATAAAATGATAAAAATGAAATATCCCATATCCAGTCCGGGGCGGGATTGGGAAAG -ACACTGCACATCAAAGTGCTCAGTCCCGTCAAAAGAGACCAATCGCATTGGAGGACTTTA -GTGACTGGAGTGGATCCCGGATTTTATTTTCTTTTCCAAGTATGGCGTTATGGTAATCTA -TACGGAAGATATGGACATTTATTTCGTCGAGAGCTCTGGAAATTCTGAAAATTCAAGCTA -CCGGTCGAACATACGGAAGACCTTGGGGATGTAGTCCAAGACCAAAAATTGAGCCTGGTT -TAGGTGACAGGGACAATCACTTACTCGTTTAGTCTTATCGCAATGTCGGACAAATAAGAA -GGCTTGACACCCAGAAACTCTTCTTGTTCATGACTTCGGGGACCAATAGCCCGGGAGTAA -TAGAGGTCCACGCATCCCGTCTAGTCACCCGGGAGTACCTATGTTGTATGGACTAATGAT -TTGAAAAGACCTCGACCATTTTCCCTCACGGCATAGCGATCAACAGGTGGGAACCCCAAG -AGCCAATCAGTGACCACTTTGAGCTGCACATCCCAAGTCGCTCAGGACTCGCCGATCAGA -CTACGGTGGCGACCACCGTGCAACCTTCTGACTTCGATCGTAAAACGGTCGGCATTGGCT -GTTGCTAAATACCATTTGTCCCCAATTTCATATGATCCAGCCCATACCTCGGTGACCGTA -AGAAGATAACCCCGACAAACTCTCCCTCCCGTCAACTAGTCTGAAATCCAGATTAAAGAT -CGTATATTGTATGGAGTATAACGTGGTTCTCCAGAGTTCACATATATGGATACAGTTTGT -AGTTTACTCTCTTGCCACATGGGTCTTATGTTGCAAGTAAACAGCGTGTTTCTTTCCGGT -CGCTGCAAGGGCCATGCCATGACCCATACAAACAAATCCCACATTGTACACGTCAATGGC -ATCAGATTTCAATATCCAGGCAGAACAGAAGAAAAAAGAAAATTATGTTGTATATGCAAG -CCCAAATAGGCTGCGAGATTCCAATTCACTTGTGCGGCCTACTGCAGGAACCAGCTGTGG -GGAGGGCAAAATCACACAGGTGCATCTAACACGCTTATTGGGTACTTGGCTCTCAAGGAA -AAGGCAAGGCTCAGATGCAGTTGATGCAGTTTCCTTTGAAGCCCAAACCATCGAGGGTTT -TGCCGGATGGAGATTCCGGTTGCCCTAAATCAATTGATTGGAGTGTCCCCCGGACTATCT -AAAGTATCGGGGGCGAAACTAAAGCTAATAATGATAAGTGTGCTTATTTTGCCCCAAATT -CCAGTGTCTTGATGTGCTTCATAGTCTCACCTATGATGCATCTTATTGGATTGATGGGCT -AGCCATGGTCGCTAGGTGATGTGTTCCGATTGCAGGGTGATGTGATAGCACTTTTTCTTG -TCTCACGGACCTGACCTGTAGATCCGGGGGAAAGAGTGTGCTTCGTAGTGTCAGCGATAA -AAAATTGATTATCATTACTTGCAAAGTAGTATGTAGTGACTGTGATTCATCTCATCCCCC -TCAGGTACGGTACCTTGGAAGCACAAGCAGATGCATATCCGACATATCGACTAATACAGC -CCAATGACATACAGCAAAGTGGCCACGCAAGCAAAGTCCAGTTCTATTCTTACCAAGTCG -TGGCACTTCACATTAGAAATAGACTATCCACCTTAACCATGCAAATCTTCCAAATTTTCT -CTTCCAAATCTTTTGAAATCTTTCATAGTCCCTTTTTTTCCCTCTTTGAGTCCAGACCAG -GGAAAAAAAAAAGTAATATCCCGCTGCTCGATGATGTCGGGGGCCCGGAGCATCTCAACG -CGTAATTGGCCGCTTCTAAAGATAGTGTGTCCCTCTGATTGGCCTCTAATCCACTCGCAA -GCAAAGCGCCAGCGTCAGCGGAGACTGGTCTTTATCCGTCACCAATCAAAACCCTATAAA -TCACTCCAATGTGGGCTTGGGGAAATTGGGGAGTCTCTAATCGATATTCATTACCGTCCC -ACTCTACAAAGATAGCCCCCAGTCAAGTCCTCCGTACAACAGACACACTGGAGATACCCG -GTCCTCCGGGCCTCGAGTGTTTCCTTGCTAGGCTTAGATACTTCCCGTAGAATTGGACAG -GGTCATGCATGGAAGGCTGTCAGGTCCTTCTTCGAGAAATCCTAATGTCCCACTTTGGAA -TATTGAAGCTAATTACCTTGCGTCACAATTTTCATTATTAAGCTTAGTAACGGAATATCT -CAAAAATTTCGCCATTTACTTTGTGATAGGCAAGAATCCTTACCGCCACCGTTGTTGCAC -AGACGAATCGACTTTTTTTTTTTTGCTTTTGCGAGGTCCACGGGCTTGCTATCGACGCTT -AGAGAACAATGGAAGCACACAGTGCTCCGAAGCCAGAAGCAAGGCACTATTGGATTGAAG -TCCACCTTTTCACTTTTCCTGATCATCGGGCCTTTTCCCTCCAGATCTTCACTCTAACAG -CTCCGAGAAAATTCCAGAAATAAATCCAAGATTGCACACGGGATAGGAGCTACTTCCAAA -CAGTTTTTACTAACGGCCCTTCTAGCCTCGACACCAGGTTTTAACAAGAGTGGAGAATAA -ATATGGATATGGAGGCGGGCGGGCAGCTCTTAGTTTCCAGCTAAAGGCCGCGGAAGGCCC -TCTAGTGAAGCCGGACCCAAGTGGCTCTCGAATGGAGACCCGAGTGGATGAAGTGTACGG -AGTACGAGCACAAAGTACAAATTACACTCTTCTTTGTCCAGGAAAAAACAAAAAAACAAA -GAGGCAAGATCATGCTCGATCTGTGAACGCTAATCGTAAGGTAATAGGCTAGAGATCGAA -GAATCGACGGTGTGATTTGAAAGAAAAAAAAAAAGGATCCATGCTTGGTCTGGGAAGCGA -GCATCTTCCCAGTTCCCCGACGAATCATCGGTTGCTGGCGGTTAACTATGGGGGTGTCTG -TGTGAGAGCTCCCGTCTGCGATGCGCGAAACCGTTTGGCGATCATCATGCCTATTTCCGA -GTTTAAGCCCAGTCATTGGAGGCTGACCAGTCTATCTGTCATGGTTGACCCCAGATGCGA -TATCTGCATAGCATCAAATAAGTTTTACCCCAGGGACCGAGACCGAGAGTCCGCAGCAAG -GGCTTCGTGATAGGTACGGGAATACCGTTGGGATCTCCCATGCAGCAGTGGGCTTGAACG -AAACGTCTTGGTCCACCAGGATTCACCGCACACATTCTACACATCCAAACCCACATTTTG -TAATTTGCTTTGCAATCCACCAAGAAAATGCAATTTCACAAACGCACACGCCTCAGAGGA -TTCGGGGTTTGGACAGATCAGACCATATGAGACTGAATATTTCGTAAATGTTACAAAAGA -GATGTGGAGAGATCCGCAAGGAATTCACGGCCATCTGTTCTGCGCTTGTGTATCTGTCAA -CGGTTCTTGGCGGACCTACAGGGACTAGTTTACCATTGACTAAGACTATTCGAGGTCATT -CACATGGGACCACCGAGGCTTCAGATAAAGGCAAAAAAATATTGCTTGTGTGGAGTGCAC -TCAATACTCCGTGCTATGGACATACATAGCACATCGTACGTTTGTGGGTTAATTGAAAGA -GTTGGATAATCATGCCATTGAGAGAAAGAGAGAGAGAAAAAAAAAAAATAAAGGTGCATT -CTATTCGAAGCCCCAGTTCTTGTCATGTTATGTGGGCAGATCCAGCCTCTACTGCTACGG -AGTAGGGTTATCAACGCCCCGAAGCCAGCACGGGTACCAGCCTCGGGCTTGAAACTAGAA -GAACCAAATACCAATTGCAATTCCCATTTTTCCAATTCAGGTTACCAGAACGCAATGACC -CCGGATTATCGCCGTTGGCCACTGGACGCTAAGGAAGGAACAAATTGATCCTTGGCGCCA -CTTGGACGGAGTAGAGAGAAAAAAAAAAAGAAGGTAAATCCGATAGCAAAAGAAGCGAGA -GACTTCTCGCAGATCTCCCAACTGGGAATGGACTGGGGGCAAATTTGGAGAATTTGATTT -GTGTTTTAGATTCGCAATTAATTGCCTCTGTATGTAGGATAAGTCGGGAACTTCCGCACG -TTGACAAGGCTAAAATCTGAAACACAAGTTCAAATGGTATCTAGGGCAGAACGGGTAAGG -AACACGCGTTTTTTGAGGCTGCTGTGGACGCTAGTGTACGCCGGGTTCTTGGCTCTTGGC -TTTTTTTCTCTTTTCTTTTTTGGTCTCTCGATGGGTCTCAATATTTTTTGGCGTAGGATT -ATTGTCGATGGTCGCATTTTGAGCGGAGCCTATACTTTGGACTTTGCGGTGTACCTCACT -CTTCTGGACTTGCTTCCGTTTTGGGAGAGCACTGGCCTCAATATGGCCTGTTACGGTAAT -TTGCTGGAATCAATAGAATGTTACAAATGCTAATAGATTAATTCGAGATGAACTCGGGTA -TGCCTCACGGCATGATACTTCATTCATATcagtctccggtctccagtctctagtctctag -tctccagtctcTAATCTTAGAAGGCATCACCTGGGCAGATATCCTGTTTCAATTTGTCTT -ATCCGATACGCCTGGTTTTAATTTAACGCACTTTTGCCACAAACGCCAAAAGTGTTAGCC -GACACTGCCACCCAGGCGGTTTTGCGCACGGAACGCCCTAAACCTGAAGTCCGTACGGAG -TACTCCGTACAAAAAGGATTCCATCGCAAAACCTCCAAGCGAGCAAAGTGCGGGGGGGAA -ATCGAGTTCCACACAAACCCATGGTTGAGAGCGCTGCGGGTAAATCGTCATTTTCCTGTG -GGAGTTTAGACGGCCCACCGTGAGAACATAGACGGATGTATGGAGTATTGCCTATGGAGT -ATGTGTTGGAAAATCTCAACACATTCTGATATCAGAATCATTAAAGCTAGGAATGTGTTT -CAGATATCTATACTCTGGACTCTAAAGCCAAACCATCTCACTCTCAAAGTTCGAGGACCT -TGTGTAATGGGATATCAGACCCGAGACACTAGACCTAGTGTGGCATACAGACCCTGACTT -GTCCACAAACTGGGTTAACCTTCAATTGATGGGATCGGTCGCAACTTGCTCTTTGGAGAT -CTTTCGAACTCTTCAGAGCACGTGTCTCTGAGAATATACGGAGTACAATATACTCACTAA -CTATAAAGTCAATGCGAACGTGGCTAGAAGAACGTGGGTTCGTTTCAGGTAAGATCTGCT -GTGCATCAAGCTTATGGTCCACTATGGCTATCAGCTTCTACTTTTGTGGTTCGACCATCT -CCAACCGTGGAAGACCATCGCTAACGCTATATCAGGTATAGTGGGAATAGCCAAAGCTTC -CGAAAGCTCTTGATAGCATTCTCCTGCCGCTTCAGCCACAGAGTTTGACTCCGTACAAAG -CTTCGAATGACAGAACGAGGAATGGTCTGCGAACACTATCTCCACTTTCCACTGAGGGCA -TAGATCGGCCTTCGGTCCACAAATGCGACAGTAGAACCAATGAGGACTACCAAGTCGCGA -TCGAAGGATTTTTGAGACTCGGCACGATCCTGGGACAAAAGAAATTGATATTGCGGAGAT -AGATTACCCAGCCCGTGGTGGAAATACGCACAATTTTCCCTCGCCAACCATATAGCTCGC -TGGATTTCGAGTCCCTTTCAGATTCTGGAACCGTCTCCATCTGACCCCAACATCACCGGA -AACTCCAAAACCTTGGCCGAGATACCATTCCATAAATTGGATAGTTACCCATCGGGGATT -TCCCAATGAGGCCAAGTGGGTCGGAGAAATAACTTGGATCATCGTATCTTTTCGGACCTG -GATCTCTGGTACGAATGGGGAGCGTGCAAAGACTCCCGAGGTCCGATGTGTTTTCCCCGA -GCGCTATCTGATCTAATCTGCCATAGGTAATTTCCTTATGTATCTCTGGGGTACATATTC -CCGAGGTTCGATCATGTCATATGTGGTTCATGATTGCGAAAGGATATACTTTCCGAATAG -CCATGATGTGCAAAACTTTCTCCCACCACGCTATATCCCGTAGTTTATGAACATCTGTCT -CCACCCAAGTCACGAGCCCCCGAACACACCAATCCTGTTGGTCACAGCTGCAATAATATA -CAAATTTGCGTAGCGTGCAAATCATGCGGAGTATGTACGGAGTATGCAGAATGCACTTCG -TAGCCACTATTGCTGGGCGTATCCAGCTCGGTCCGCTTTGGCGACTACGTACATATCTAC -ATTCGTCCGTGGGTTATCAGTGATCGACGAAGGGCTCCATTTCCCATTGTATTTAGCCCA -CGGCTCAAACCAGGGAAGTGGCTCTTTCCAACGTTAAACTCAGGCCCCGGCAAAAGCACA -TCATTCTTCCCGCGCAGTATCTGGTACGTAACAGATTTCACTGATGGTTTTCTTTACTTC -GGCTGACGGCTATGACTCTGGGCTAAGAGACAAGTCCACCGTTCTCCGCATACGCTGACT -GTCGCTTACGGTCCAGTGTGTGTGTAGAGTCCAAAGCGGCATCAGCCACGGCTTCGGAGC -TGGTTCTGGATCCTCGAGCTGAAACCCCAGCCTGAGCTGCAATGAAACTTGACCAGCAAA -CCTCCAATGCTGGACATGTTTCACGAAAGCACTCGAGATGTGAGCAACCTTTTCACGAGA -CAGCGGTTCACTGACAGGCCGATGCCGTGTCTTCCTTTGTCTTTTCTGTTGCGCTTGGCC -AGCTGAAGCCTGTCCAATCGAAGCCACGGCGGAAAGCACGTCTCCGGCAGTGGATTCACA -GTCCAAACTGCGTGTTTTGTTCGCAGGGAAACACTGGACGGTGCTCATCTCGGGGTGGGC -TAGAGCGGGTCTTGGTACCTTATACGGAACATCGATCCAAAAACGTGGTACCGGTTAGGT -ACCTTGCAAGACTCGCTACAACAAACAACGCAACTGACTCTTGGTCTTAATCCAGTTAGT -ACAAACCTCTCCATGCCAAATCCACATATATCCGACCACCATAAGTATCCATCTTCTACA -ACATACCCACACGAAACAAGGGAACCTCACCAGCAATCTTTTGTTGAAAAAATCATCGTG -GGGTCCGGCAAACCCGAATCTTGAGACAGCGATCATGCCATACATCGTACAATGTCTCCA -ACAAGTACATTGAATATTGATTCATGAAACAACCACAACCACAATCACAAAAATACAGAT -TAACACAGATTAAGCGCATCTACGCCAAGTCCTCATCAGGTGTGCTTGGGCCAGCCAGTC -CATGGTCCTGCAGAAAGCCATCCTCCGCCACTTGAAAATACCGAACCTACAGTGGATATG -GACATGGAGTGGGGAGAAAAAAAAAAAACAAGAGATACGATCTCGGCTAAGTTCCCGTTT -ACTCCTACTGCAAATGGCCAACGAGACGAAACAAATTGCCTCGCGTAAACACATAGAGTG -CCAAGAGCTCCAACTCCATATGCGGCCCGCGAATAGAATCACCGTGTCCATACGGAACAC -GGCTGATTGCTATATCCCTCGTTTCTACTTCGCTAGGTATGGTATGTGCGTTGTGAGAAG -GCGAAGATGTCGGGTGCATCCGGTCGCCGGCCGGTTCAGTGTGGCTTCCGGGCCGAATTT -GATATTGTAACGGCTCCGCGTACCAGCCTTCGATCCTAATATGGGGGCTCTGGTGGTTTG -AAATTTGCACCACATGGTCGATTGCTTTTCGGGTGGATGTTTTTATATATTACAGCAAGC -TTTCGGCCGACATTTTGTCACCCAAGGTCCTATCCTCAGGGCTTGGGTTTGTGCTGGGAT -ATTTCGTCTATCTGGTACGGTCCAAGGAGCTGGAAGATATTTCGCACATGAATTTGAGCT -GAATGTGCTCTTGAATGCATTACTAACTCTAATTGCGGAATTTGGAGACTAGAACGCAAT -GCCAACATGAGCGTTGGGAAGCAACCAGTCCTATATTGAAGCTCGTCCTAGTTGTAGGTC -GAGCTCTCAAAGAAACCAGATATAAACAGCAAAGGGCCTCAGGAATGGTTACATTGAGAT -CACGCTCCCATAAGGAGTCAGTCTCTTCCTATCATATATTCCATAATTCTCATCACCGAC -CATGTTTATGgaagaagaagagagaggaaaaccaaaatgaaaaaaagaagGTTCCTCGCA -TATGGCTTACGTAAAGTTTCTGTGTGTGCCGAAAACCTTGGAACCTTCCGTAAAAAGGTA -GACGGAAAAATTGAATTTCCGGGAATCAGCATAAAAGGAAATCTCAACCTGCGGCATTCC -TCTTCTTTCTTTTCTTCTTCGTGCGGCGGCGCAACTCGATAACCCGTGTTGCCTGGACAT -CTACGGCCATTAGTCTGGCTTTCAGTGATGTACCCCAGGTGTCAGTTGCGCCACAGACGA -AGGGAATTGCACATTGATTCTTACCTTTATGGGTCTTTCTGATTTCGCAAGGATAAGACC -TTCTGGTGGGAGTCAAACATACTTTTTTATATTGTTTTACTATTATTGAGATTTTCCATA -GTCATAGAACAAATCAAATCAAGTGATGAAATTTAGCACAATCTTTACAGCCAGCTGGTT -CTTTCCATTAAGCGGTATACTGTACTCTCCTTGGCATAAATTTGACACTAAAGATTGACC -CAACCAAGCAAGCTCTGTCACTGAAAATTCAATCCCGTAAGACAGAAAAAGCGCTCACAG -TTCTTAAACTCCATGGCGTATAGGCTTGTTTTCTCCAAATTCTATGTTTACTCGCCGGGC -TCATCACCAATCTTCTCCAGCTTGCCCATAGATCGGTTTGCACCTGCCTCGCCAATAATA -CGAATGCGATCAACGCGCAGTTTCTCGCTATCCCCGTCCCCATCAACGAAGAACACAACC -AGCGATGTCACATTCTGGAAATTGACAAACCGCAGGGCGACCGTAGCAGTGCCAGTCTTT -GCATCCCAATCACCAGACTGGATCTCCACCTTCTGCACAGGGGGAATACCATCCGCCTCA -TCGAATCCAAGCACATGCGAAGTATTCTTGTAGATGTGCAAGTTCCTAGGCCGCATGGGT -GTCTCGTCATCGTCGCCATCGGCGGGAGGAAGGGAGGTAATTTGAAGCGAGTGGACCTTC -AGTGTCGAGTTGAATGGGATGTACAGCATCAGCTGCTCGTCGGTATCGCTTTCAGTCCAG -TCTGCCTGGCCAGCAGCCTTACCCTTACCCTTTGCTGCCAGTCCGGATGGCTTCGAGGTG -TTGAACAGAGCACGCGGCTCTGCCGTATCTGTATCCCGGTTCAACAATTCCAATCCCTTC -ACATCAACCTGGTCTGTGATATCGTTGTAGCCCTTGGCCACCGCGCCACCAAGCCAGAGC -TCGCCGGAGCTGCTTCCTTCGCCGGCTCCTGCTGCGTCGTCCGACTGGCTTGCCTCGGAT -GCAAGCTTCTGGACAATCTGGTTCAGCTGGGCGGGGTTGGCGCCCTTAACTGTTTGGTTC -ACGCGGCCGCTCTTAAAAACGATAAAGGTCGGCATTCTAGGTATCGTGTGTGTTAGTAAT -TGAGTCGAGTACTGTATATTTGGACGTGCAGGTCGGGCTAGGTTATAGATGCTTGGATAA -ATTGCACATACGCTGTGACACCGTATGCTTTCGCAAGCTCTTGTTGGTGGTCGACATTGA -TTTTGGTGAATGTGATTTGATTTGGGCGGGAGAGCTGGGTCGCCAGCGAGTTATACGCTG -GTGCAATGGCCTTGCATGGTCCACACCAGTCGGCATAGACTAGTCAAATATCAGAGCAAG -GTCAGCGACTGCTCTAGTGGTTCAGAGGAAAAGCTAGGATCGCAGCGGGGCAGGATCGCG -CGGCAAATACTTACAGTCGGCAACAACAAGACGGGAAGAAGTAAGGAGCGATGAGAATTG -CTCCTTTGATGAGATCTCAACGACAGACATGGTGAAGTTGGGAGGGTAATGAGTGTGAAA -TGGAGGGTGGAAGAGGATGGAAAGACGTGAGGTTGTGCTTTGCTGCCTCCGCATATGTCA -TCAGCCCCTCTACACAATCAACAACACGAACCACGCCAGGGAATGTGTATTGCCTCAGCA -TTTTTTAGGTTAGCTGGCGATTATTGGGTCCGTTTTGGAATTGCAGGATTATTCTTATGT -CAACTCAAATGCCAAGTGATCAAACATAGTCCCCACAGAAATATTCGAGATATATATTGT -TACAATGCTCGCCATATGATCGAAATTGATTTATGGTGAGCTTTGTCTTATTTGATAGTT -TGGTATATGCCAAGACATAGATACATACACAGAAAGTATAATTTCATCAAGATGCAGAGT -TAAATGTTGAGAAGTAGCTATATGGTGCAGCATTCAGTCTCATCATTGCATGCATGAGAT -TGAGCCGGGCCTAGCTCGAAGATAAGAGGCCAGAGTCAGACTTCGGTGGCGCTTGAGTTT -GATCTCGATCAGAATGAGATGGGTCCAAAAGCAGATATACTACGGAGTACTTGCTGTCCG -CTCTTGGTATACACCAGGCCCGACGAGGCTGGTATGCACAAACCAGGAGAAGCTGGTGAC -CCAATATGCAAAAGACAAACTAAGCCCAGAAAAAGGAAAGCCCACAAGAGCAGAAGAATG -AACGTCGCAGGCTGTACCCTATCTGGACGAACGAGCCTTGTCCCCGGACGAGGCATAGAC -TAACATAGACACAGGACCACGGAATGCCTTTTATCGCACAGCGACAAGGCATGAGGTGCA -CAGTCGGACATGAGATTCCACAGAATCCCGGAGCAGAGATGACTACCAGAAAATGATGAG -CTGAAAACCATCTCTTCATCCTTCATCTGCCTCAACGGCAAACAGAATTTTCAACCAGAG -ACTGCACCCGCGACACCCGATGATGACCTCGCCATGCTCAGCCTCCCTCTCCAGATCTTC -CTCGTTGACCAAAAACCCGCGCTCATCACCGCACCGACATCCGCGATACCAGCAGTCGCC -GTCGCCGCCTTCCGCTTCCTCGCAGGCAAGGTCCTCTAAATCCACAATCTCCAAGCCAGT -ATGGAATACGTCGCCGGTCTTCTCCCGTCCCGCAACCTTCAACCTGTCCAGACGTAGAGA -GCGATCATATTCGGCGCTGAGGACGGGGTCCGACAGTGTCTTGTAGGCGGTGGTGATCTG -GTCGATGGTATACAGTTGTGACGAGTTGCCATCGGATTTGGCAAAGAGGTTCGGAGTAGA -TGTTGATGCAGACTTAGTCGAGCGATCTAGCCCGGTCTCCGCTGCCACAGCACCCGCCTT -GTCAGGGTGGTGTTTGAGCAATGCTTTGTGGTATGCGATCTTGATCTGCTGTTTGGAGAG -GACTGTCGACGAAGACCCCGTGAATGGCAGAGATAAGATTTCATAGAAATCCGCAGCGTG -TACCGTTCTCGTCATATTGAAATGATATATTTGGCTTTCTGACCAGGGTATGTTCGTTGT -ATCTGAGTGGTTGCGCTGTTTCCGTGTCGCGCCGCCGAGTCGAGCGACCAGTCGGAGGTG -CGCGCCAGGTAAGAGTGACGCCTCGAAGTCTAGCGGAGAGGTCCAACCGTGCGCATTCCA -GGGAATGACGATAATCTGGGGTAGCAGGGATGCTGCAAAGCGGGGTTGTTGAGCTTCGTT -ACTCGTAAACGAGAGGTTTCTAAAGAGTTAAAAATGGCGGGTCGCCCTTGCACGCCACTT -TGGGCGAGGGCCTTCCGGGCCCATGTTGATCTTGGGCATTGGCTCTATTTCTCTCCTTCA -TTTTCTCGATACTGGGTTATTTTGTGTATATTTCTTTTACAACATCTATGACGTGGAAGG -GTATTCCCTAGATGCGATACAGCTTTGTTAAACTTCACCCGCCGGCCCGATGTGAACCAT -GTTGTGCTACCTCCTACCCAAGGCGAAAACCTACATGCCGCGAACACAGCTGGAATGCAA -AATATGGAAGGAAATCTAAAATTTCAATGGCAACAAGAAAAAGAATCATAGTGAGTAATG -ACTCGGATATCAAGACAGGTAAATGAAAGGAGATGGGAGTATGAGGCACTTTTTGAGTAG -TCCCTTTACTCAGAATCGGAAGTCTTGAAGACGATGCGATCGTCAGTCTTGAAGGGCACA -CCGAGCTCAAGAAGCTTGACAAAGACATCAGAGAACTCCTTGAAGAAGGCATCGCTGTCC -TTGGCATAACGCTCAACGTGCTTCTTGAAGCCCTTGTCCTTGACAAGAGCCATATCAGTG -GGCAACATCATCAGAGTGCCAGTGGTCTTGTCAGTGAACTGAGTCGGGCCATTCCACTTC -TTCTGGACCCACTTCTCATCGGCAAGGAGACGGAAGAATTCATTGGTGAACACAGTAGGG -CTGAAGTTCCAGGGGCCGTCGAAGCCGGAACGATCGGTGTGGCAGCGACCCAGGGCATGG -GCGCCGATCAGTGCAACCATCTCGCGGTCATCAAAGCCCATGCGGGAGAAGACATCGCGG -ATGTGACGCTGGTCCTTGGTGGCGTCAGGAAGGCGGCCGTCAGGAGTGCAGGCCGCGACG -TCCCTGTCCTCACGGCCAGGTCTCCACGGGATGGAAGGACCACCCAGTTCCTGGATGGCA -CAGGCACCGGCGAGAGTCCACAGATCGGAGTAAGTGATCCAGGGGAACTGAGCCTTGATG -GGATCGAGGAAGTTGCGGGCAGTCTTGAGACCGGCGTTAGCGCCGTGATCAGATTCCGGG -GCGAATCTCATAGTGGCACCGTTGCTGCCTCCAGTGCCGGTCTCCTTGTCATAGGTACCA -CTTGCGTGCCATGCGAGACGAACGAGGACCTATCCAGAGCAACAAGCACGTTTAGGTTAG -CGCTTTAACCTTGGATTATAGCATCAAGCAGAGCCTGGGAAGTGACTGCACTTACAGGAC -CATAGCTGCCGTCATCATAGTCGGTCTCGTCGGCCAGACGGCGAGCGACAGCATCATACA -CCTTCTGGTAATCGGCTTGGGTGGGGACGAAGGGTCCCTTGGGGGAGGAGTCGCCGTTCA -GGTACAGGTAAGCTCCACCTGCGGCACCGAGGGCACCGAGTCCAAGAGCCCAGAGGCCAA -AGGATGAGCCAGACTTCTCAGAGCCAGCTTCTGAAGCATATCCGCGACGACCAGAGGCGC -GGAGACCATTGGCGGGGATGGCGAAACGGGTGCTGCGCGCAGCAGGACGGAAAGAGGAGG -TCGGGGTCGAGCGCAGGAAGGCCCGGGAAGCAGATCTGGCAGCCGAAGCCATTGTGATAG -GATAGAGAGTGTGTGCGACAAGGAGTCTTAATGGGTCAAGAAAAGGAAAGTAAAGGTGGG -AGTACAAAGAAAGAAGGGAAGAACCGGGGGGATTTGTACTTCTCCTGTCCCATTTCCTCT -AATATGCAGTACAACCCTGAGCCAATACTTCGGTACATCGTACAGAAGCTGATGGAATCA -TAGCCAATCAAGGGGGCCACATATGCCATTGCCTCGGTGTTTTCGTCTCATCTCCGCATT -CCGGTACCATCGCGGAACATCTCCAGACCTTCACTTGTATCAACTGTCCTTCTGTCTTCA -AGGATATTGATCAGAGATTGTATTAATAGAATAATACTCCGTGGTATATTGAGTGCTACA -TTGACAGTACAGAGCCACAGACACAGTCAAATGCCCAGTATCTTGGAAAAGTATTATTAA -CAACCAAGCGCTGTTTGGTCAATGTTGACCCACCCTCCCTTCCGACATGCTTACCCGATC -AAGCGGCCTCCAGGCTTGCTCGGTAATAAAGAAGGTCTAGATACTACTGGCAATAAGCTA -CATGGATCAATAATATCTAAATATCTTGGCGAATAAAGGTTTGATCTTGATCTTATGCAT -GGTTCTAGCCATCTGATTACTACCTATATCCATGGACAGTACGTATGACACGGGATTTCT -TAGATGCCCATCTTTACATTAGTTATTGCACTGACGGAGCAGGAGTTGTTTACTAAGAAA -ATCAGTATGGATCTTGTTCGAGATAGATAAAAAGAACAAATATGTCTCTCTTCTCTAATC -TTGGAATTACCCTAAAGATTATACTATCTCTTTGTACGGGGTAGAAGCTTAAGCTTTCAT -ATGGACTATGTTCTCACATTCTCTTTGCCTTTGCAACCAGTGCATTCAATGAACCCTCCA -GACGCATTACAGTATCATGTGCTTATAGCTTAAACTACTGTATCAATTTCGTAATGAGAA -ACTACACTGCTCTCTCCCTTGCCTCTTGCATTTTAGTTTCGATAATGGTCTCAATTTTCA -CCTGCTCTCCTCTACTGAGCACACCGAATCGATTCTCGATCGCTATAAATGGACCGAGGA -ACTTTGCACCAATGACCTTCCAATAAGTTTCAAACATAAAACTGTATCGTAAAGCGAGGC -ACAACTAACATAGTCCAGGTTCTAGCCACTTTTCCATGGCTATTGATAGTCTTTGGAATT -GTGATAGAGAGCCGTCTAGGATTTTTCTGGCCTCACAATCCTTGAGAACATCGAGGAATA -TGTGAAATTTCGATATGAAGCGTAACAGAAACCTATTGAGGTCAGTCTCCCAATGCTCTA -GGGTTTCCAACAAAGGCCACCAAGGAGCGGCGAATGTAGCTCAGCAGGGGCAGTATAGGT -AAGCTCCTAATCAACAATCACAATAATAATAACCTTCATGTATCAACAAGGGATTTCTGT -AGCCGGAGGACATGGCAACAAAGCCAGACAAAATTATGTTCCTTGGATATCAGACGTTAG -AGCTCGCTCGCGAAAAAGAAAACGCGGCATATACTTCTTTCGGCAGACACTATCACATAT -AATTGCGTCATTGCGTTATAGTTCAAGAGGGTGGAACTATTGATGAGCAAGCTCCTCGAG -ATAGTCTGCTGCACTTCTGAAACGTTTCGGCTATGAATATGGTGGGGAATATTAGGGAAC -TGGATAGGTCGGTTTACATCGAAAGTGACAGGTCGTTTCTGCATAGTTTACACCTATCAC -CAGTGTGTGTGATATCCTATTGAGACGACGCGGTGGCCATTTACAAAGGCGATGCGGTGA -CAGATATTGAAAGCATTTTTAAAACCGCGAAGCTGTTGCAGGTTGACTATCGCAACACAT -ACCGGGAAGCAAGCTCGCCCGCCAATGCCTCTGATGAGTTTCGGAGCATATTGTGAACTC -ACACCTGAAAGAGTCGATGGATTTAACTGCTACACTGTACTGCAAAATTGTCAAATTCCA -TTTTGTATGCCCTGAAAGAAGGATATGTGAAACCAAGGGTCAAGGGGAGAGTACTCTATA -TGGAGTAGAGTGCAGTGCATGCTTCGCGTAGGTCTCTGAGCCAGGGTCACGTGACAACAG -CGCAGAATTTCATATCTTAGGATTATTTAATTACTCTGGAACCTGCAATTATATGGTGGA -AATGTAGAATAAATTTCAAAGATATGTGCATTATTAATTGGGATGTCCGTCGTAAAAAAG -TCCTGCTGTTCAATAGATATGAACACCAGGGTGGAGTAGACGCAGCTCTGCTAAAAACTG -TTTGGTTTTGATATTCCACGAACGAACGATTAAAAAATCACCCAAGAGACGGTAGTGCCA -CAGGTAGACACCACGCGGGTTATGTGGTTTGAGATTTACATATCTGATTGTTGGATCCGA -AGTCTTCAACAATCCTTGACATTAAAGGATTGGATCTTCCAGTCTCCAACAACACATGAA -TTGTTGAGGAAGAGGAAGAAGTGTGAAGTGAAAAAGTTTTTATCTTGATTTGGCTGGCAC -GTGATTTAACTAGCGCCATCACATGACCCGGTCTTTTTGTATCGAAAATTTCCGCGCAGA -TTGTGAACTTTTCTTCTTTACACTTATTCGAACGCATAATCAACCGCCAATTCCTCGTTC -ATAGGATGGTTGGTCATATTGGCGTACTAGAAGATATTGGGTAACATCATGACTTCTCGA -CGGCTCCAAAAAGGCTTCGGCCTCTAGGCGCATTGGTAGATGCGAGTCATTGTTGTCCAG -ACATGCCATTTTCCCCCTAGTATTATTAATATACACAACCTGTTGACTTTCGGAGCACGA -ACACCGTATGAGCCTAGGCTCGGCTATAAAAGCCTTCGTAAGACTATTTAGAAATGTTCT -AGAAGAACCTACTCCGGATGGAGACAGCTATGCTAAAAGAAACACGGAATATACTTTGGT -GGTAGCAGAATGGCTTGTTAACTCAATTCAACTCAGGAACACTGGAGGTAGATACTTTAA -AGTCTGACTTGAGTAGTTCCCGACTACTCCGTACTTGGCTCTTTTCTTGAACTATACTTC -AATAGTGTACATGTATGTCTCATTTGGAATGATCTGCCCCTCGGACGTCAATCGATCTTT -TTTCTCCGCCCCGGGAACCAGAATTCTTCCCCTATTCCACTTTCTTGTATTGCTGCCACT -CCTTTTATTTATTTATTCTTGTAATTACTTTTTTTGGAATTATGCCCACTGGCATCAGCA -AGCGCCAGCAGGCGCGGAATGAAAAGACCCTGGCGGAGCTGATCCGGACCGTTCCTGGCA -ATGACCGATGTGCCGATTGCGATGCCTTGACCCCAGGTTCGTCCCGAGAGATCCTTGTTG -TTGACCACACTAATTCTTATCTGAATAGGATGGGCAAGCTGGAATGTATGTCACCACTTC -CTTTACCTTCGAGCGAATCCACAAACTGACTGTTCGTCTCTGACAGATGGGCATCTTCCT -TTGCATGCGATGCGCCGCACTCCACCGAAAACTGGGCACACATATTTCAAAGGTCAAATC -TTTAACCATGGACACATGGACATCAGAGCAAGTTGATGTGAGCATAACTCGGCGCGCAAT -AGCAAAGATTACGAGCTGACAGAGTGGACTTTATCAGAACATGAAATCGCACGGTAACAT -CCTTATGAACAAAATGAACAACCCACGGGGTATCAGGCCGCCGGTTCCCACCGACATTGA -CGAGGCCGATGCATGCATGGAACGGTTCATCCGGCAAAAGTATCAACACCGCTCGTTGGA -AAATGGGAAGCCAAAGCCCCCGAGCAGAGAGGATTCGAGCTACTCCAACCCCAGGGCCCT -GTCCCCGGTGGAAACAAGAAAGAACGACTATAACATATCTCCCGAGGGCTCTCCTCCCCC -ACTTCCGCCCAAGTCTGGAAGGTTTTTCAAGTTTGGTCTACGATCGTCATCTTCTACCTC -GAACCTTCGTCGATTCGGTGGCAAGAGCAAGGTGACTTCGCCAACTTTAGATGATAGAGC -CTGGTCACCACCACCATTGCCATCCAGGAGGACAACAGGGCCTGCGCCTCTCGCCGATGT -GACAACAGCATCTTTCGAGTCTAAAATGGCGGCATTGCAGGAGATGGGATTCACCAATGA -CCAACGAAACGAGATGGTTCTCAAGGGACTTCATGAGGATCTCGACCGAGCGGTCGAAAC -ACTGGTCAGGCTAGGCGAGGGAAGCAACCCTGCGTCAAGATCCAGGACTCCGGTCGGGAC -CAGCACTGCTGCATCTGCACGTATAGTAATCTCGAAGCCTGAGACCAAAAACCCATTCCC -CGTCACAACCGACAACACATTCCCGGAGGTTTCAAACAACCCATTTGACAGGGCAGTCTC -GAATCCTGTGCCCCAATCGCAAAGGCAGAACCAGCAACCACAGACCGCTTCGTACAACCC -ATTTGATCAACTAAACCCCAACCCGACATCTACACAGCCTTTAGAATCGTCTTTCCAGGG -TTTGCAGGTTTCCCAGCCCTTGTTCCCCCACTCGACTGGTGGATACCCGAACCAGACAAG -CACCATGCAGCACTCTGTATATCAGCAATCCTATACGCCTCCGATTACATCGACATTCTC -GCATTCTCCCTATGTTACCTCACCTCAGCCAATGGACAGCACCTACAATCCATTCAACCA -GACCCAGCCGCAGCCGCAGGGTGGCTCGGCCAGTCAGACATATCCCAACCCCAAATCGCC -GCAAACGAACCCTTTCTTTAACACTGCGCCGCAAAATCAACCCATGCAGCCGCAACAGCA -ACAAATGACCCCGTTCGCAACAAACCCAGGGGGTTTAGGATTCCCTAACCATGCAAATAC -CATACCGACCATGTCTTCCGCCTCGTCATTTGGACCAAGTGTTTCTTCAcaacagcaaca -gcaacagcaacagcaacagcaacagcaacagcaacagcaacagcaacagcaacagcaaca -gcaacagccacagcCATCTAATGGTCTGGGATCCTACAACCCGTTCCAGCAAGGCCTCGC -GCCGAACACAGCACAAAATGCCGGTGGCTACCCTAGCCAACCCCAGCAGGCACAGCAGCT -CATGCCTCAGCCCACTGGAATGGACAAGAATAGCATACTTTCATTGTACAATATGGGTTC -TGCGCAGCCTAATATGACATCCATTCCCGAGCAACCTCAACAACCCCAGCAATACCAGCA -ACCTCAGCAGTCCCAGTTccaaccccccacaatgaacccctacacccaactatcaaaccc -atacacctccatgcctcaaacccagcaAGCCACAAACTTCCAGCATCAGCACCAGAACCA -GCCCCAGCAAAATTTGCAGTTccagcaacctaccaccaacgcccagcccgccacatccaa -caaccCATTCTTCGGCACCACCCCCACAGGCAGTGGATCCGGCCTTGCTGCACAAGCAGG -CATCAACCCATACCAGCAGCAATCCTCAGGGCTAGGACTTGGCGGCATGAACGGCCAACC -CGGAGGCGGAACCGCCTCCACTACTAATTCGGCCACGGGAACGAACAACTCTCCATTTTC -GGGAATGAGCAGTCCCCCATTTGCAGGAGCGAACAGCTCCCCATTTGCTGGAGGCAAGTC -CCCATTTTCAGGCCCGCCGCCCACAAACTCCACGTTCCAACGGTCACACATGAGCCAGCC -AAGTGTTGATGTGAGCGGGCTGCAGAATGGTCGCCATAGCCCTGATGCCTTTGCTAGCCT -GAGTGCTCGACATGGTTGATTTGTATAACTCCAGCCTTGGATAATAAGTAGATAATGGCC -GAGTAATGTGAAAATGGATATTGACATGGACATTGATTTTTATACTGATCTCGATATAGA -AGACTGACTAGACGTGTTATGGTTGTGCATGTCCGGCGTATTTTCCTTGTCTATTTGCCA -TTCGTTATATTTGTGTTTCTTTTTTTTCTCATCACATTCTCTGGTCACTTTTCTTTTCTA -TGTCAAAAAAACACTGTGATTTCCGATTAGCGAGTCAAAAGATTAGTATAGAAATGTGGG -TTCTTGTTACGCATGAGTTCCAAACTGAAATATATTACTGGTGAATCAAGACAGATTATC -GTACTGTAAATCTAGGGCTATATGTTCAAAGAAGTGAAGCACACCATGTAAAGAACAAGG -AAGTGTGGAAGACAAGAAAAATACTCGCAGCACCACATATATGAACAAGGATGGCTGGAG -ACTAAAAAAGCCAGGAACCCGTGATCTCAAGACCACAACTCTGCTTCTGTAGGTCTCCTT -TTAGCTCCTCCTAACCAAGTTGGCAACCCGGGAAGCCAATCTGACACCCAAGAAGGAAGC -CAACCTGGCTTCCGCTTCGAGAATGGCAACTTCGCTAAAAAGGCCTCCCAGCGAGTGGGC -GGACGCCAAGGCCAATACATCGATGATTTTGGACGCGAGAAAAGAGACACCACTGAATTT -CGCCATGATGCAAACAACGAAGGAGGCGACGCGGACCAAGGAAGCAAAGAAACGCGAAGA -GACATCCACCCCTGCTTTTTTTGCTTGCGCCCTTCGTTCCTGAACGATTCCACAATTCGT -TCGAATTGCTTATCTCGGCATTGCTGGACTTGGCGTTGGGTCTCGGAGCGCCAGGCGTCC -TGCTCTACTGTGTCCAGACACCAGACAACGATATCACCGCGACGGCGCCAGTCGTCGTGC -CAATGAGTGTTGAAGGTGCGCCAGCACTCACCGTAATGGGAGCCGCGTAGTAGGGTGTAG -AGGGTTGGCTCGAGCTCAGCGAAGAAAATGAGGTAGTCGGGCCATTCATGTTGTGGGGTC -TTATTGGGAGTCTTTTCTGATACAGAAGAGTGCGCGGAAGAGATGTACGTAGGTCGGCGA -GGGAGGTGACGGAGGCCACCAGCCATATTCTGGCGTAGGAATTGGGACGGGTTGTCGTAG -AATTGGTCTGCTTCGTCTCGGTAAACGGCTTTCTGGGTTGCATTAAGATCGATCGGTGGC -TCGCAGGACAGGGCCCAAGCATTGATTGTCGGGTAGACTAGATGGGAGCGCCAGGGTGTG -CTGTGACAGGGCATCAAAAAGCCGGCAGAGATTCCAGTTTTGGACATGGAGTTTGTTTGT -CTTTTTGTTGTCGGCACCGAGTGTGCTTGGTGCTGCTGACGGAGATAGGAGAGAACATCC -AATGTTCCCGATGCATGATAGCGAGTTGTGTATATTGCAATGACCACGTTGACAAGCACA -ATAAAGATGAGGATCAACCTGCGCGGAGTATGGCGCGGGGATGCCCGCGAGTAGATGGCT -GGGTAAAAAAACTGCACCAAAGGCGGCGCAGCTAAGATATGCAGACATGGCAGCAATGGG -TAGATGAAACGCACTTCCTTATGGGATATCAAAGAGAGTATAAATGGCATCACAACGCAG -ATGATCGCTAACTGCACTCGGATTCCAGCCTGGAAACTGTTTCCCACTGTGGAATTTCGA -GTTGTGAAGGTTCTGTATAAACCCCAAAGTGCAGCAGGCAACATAGTGGTGAGGAGAAGG -GGATAACCTTGTGAGATGTAGTAATGCCAGTCATTGCTACCGTAGAAGACAGCCAGTGAC -TGCGCGATGTTGAAGTATAGGAACCTCAGGGGTGGGAAGGTCCAAATTCCATAGAAGAGA -CGGTCCGCAATTGAAGACACCGCGAGAACAGTCGATCTATCAGATTAAAAGGTCAGCTGG -TCCCTTATTAGGAGGTCGGATTAGTTACCCGCGAACTATAACCTCAAAAACGAGGACCTT -CCGCTGGGGCCAAGACGTCCGAAGCCAAGCAAGGCCAGCTAGGGTTGCCCAAATGATAAT -GTTCGTCGGACGCAGTATACATGCGAGTGCCGCCAGCGGCAGGCATTTTTGGAGACTGCA -AGCAGCGTCAGTGGCTGTTGTCATGAGATTGAAAGTCCAGTCATACCCAATAATCACACT -TCGATCCTGGTCCGTGCCCCTCCTTTGGTTGCCAGTGTTTTTGTCACGGCCATCGCCAGC -CGTAGAGCCTACCGACCATTGCCACGGCCATAGTTCCAATGCAACAATGGTCAATGTTGT -CTCAAGGCAGTTCGACAGTGTCCTAGTTGAGCAGAACCATTGCCAGGGATTCAGGACGGT -TGCGACCAACTGTTGAACAATTAACATAGGCTCTTTCCTGCCAACCCAGAGAAATCCTCC -AAACCCGACATATAACCACATACCGTTGTCCACGATCCACGGGAATCATTTCCATAGACG -CGAAGTGCAAGTTTCCATGTGTAGAAATCACCAATTGCCGCCACAACGGCCTGCGCTGTC -TTCGGAGCGGCAATCAGGAGGTCTGCGCGCACTGCGGGAGACACGCCGAGGGCAAGAGCG -AGGAAGTCGGCAGCTTTATAGATAGCGGCAAAGAAAAGAGGATGTAATGAGGATCGAAGT -TGGTGGCGCCATTCCTAAGAGCGCGGTATTGAGGTCAGCCAAAGCTTAGTGTCGACGCGA -TCGGATTCACCCACCCATGTTACCCAGGCGCCTTGGCCCTGACCGAACGCGATTTCCCAG -GCTGGCTCGAGTGACTGAAAGTATTCATCTGGCTGGAAAAAGGTCCGAACTGCAAATGTA -TTGAGGAGACGGAAGGCGATGAGAAAGAGGAGTATGTTTTGAATGGGCGGATGGATTTGA -GGCGCGCGACGCGGAGAAGTTGGTTTAGACATGATTACGGGTTAGAATATTGCGGACATT -CATTCACTCCCTCGTACATTTCGCTCCAAGCCTCCAAGAAAGCTGATCTCAAGTGCATGA -ACATGGACAAAGGAGAAAGCGGGAGATATTCCTCCCCCAACTCCGAGACACGACCTACTC -AACCTATTCAATTCGACCTCACATTCCTTCACAATCCGACCTTTTGCCTTTATTTTTTTT -TTTTGTTATAATAAGCATTGGCATTCATTTCTGGTCGTTTTTCTCATATTTTTAAAGTTC -AAGTATGTCCCCCAGCTCCTTCAATAACCGAAAGTATGACGTGAATTTTCTACCCCGGTC -TCTTGGATTTAGATTAGACCAATAGTTAGGGCCGAATGTGATCCTGGGGCATGTATAGTC -AAGGAACGAATCAGAAATCGTGGTTATCAAAGCTAATGGGGTCCTGTAGGGTAATCCTGC -GCATAGGGAAAAGCATCGGCACGACCTGTACTTAGCGGACATCCGGTCTCAAGTCTAACA -CTTAACGGCATAGCGTACAAAGGAGAACAAAAAAAATCCCTTTATATATAACATTCTCTT -GCCCATTTCTTGGTTGAATTtcttcaattctcttcgatctttcttctcttctctCTGCTC -TAACCCCATTGTCGATCCCTGTTACCTCTCCAATTTTCCCCTCTCCCCGCAATCCACCAT -GTCTACAGGACCTTTCTCCCACTACCATGCTGCCTCTTCTGTTGAGGCTATTGAGTCTGA -AGAGCAATATGCTGCTCATAACTACCACCCTCTCCCTGTTGTCTTTGCCCGGGCTCAGGG -TACTTCGGTCTGGGACCCTGAAGGTCGTCACTACCTGGACTTCCTCTCCGCCTACTCCGC -TGTGAACCAAGGCCACTGCCATCCCAAATTAATTGCTGCACTGGTAGATCAAGCTTCAAG -AGTCACTCTGAGCTCCCGTGCATTTTACAATGATGTTTTCCCTCGCTTTGCCAAGTTCGT -TACCGAATACTTTGGCTTTGACATGGTGATGCCCATGAATACTGGAGCCGAGGCCGTCGA -GACAGGTATCAAAATTGCTCGCAAATGGGGATACAAAGTCAAGGGCATTCCGGAAAATAA -AGCCGTTATCTTGAGTGCAGAGAATAACTTCCATGGACGTACAGTGAGTGAATCGTGATC -CCCTCTTTGTTACACCCGTGGCGGTACATCAGTGGCTGATTCGGACCCCTTGCATTAGTT -CGCCGCCATTTCCTTATCTTCCGACCCCGAGTCACGCGAGAACTACGGCCCTTATCTGCC -CGGTATCGGATGCACAATCCCAGGAACTGACAAGCCTATCGCCTACAATGATAAGGCAGC -TCTGCGCGAGGCGTTCGAGGCAGCCGGCCCTAACCTGGCCGCTTTCCTCGTCGAGCCTAT -CCAGGGCGAGGCCGGCATTGTTGTTCCCGACCCAGACTACCTGCAGGAGGCCCGGGCACT -CTGCGACAAGTACAATGCCCTGCTGATCTGCGATGAGATCCAAACCGGTATCGCGCGCAC -GGGTAAGCTTCTGTGCCACGAGTGGAGCGGAATCAAACCCGATCTTGTTTTGCTCGGCAA -AGCTATTTCGGGTGGTATGTACCCGGTTTCTTGCGTTCTGGGTAGCAAGGACGTCATGCT -CACGGTCGAGCCTGGTACCCATGGTTCGACCTACGGTGGAAACCCACTTGGATGTGCCGT -TGCTATTCGCGCGCTCGAAGTCGTGCGCGAGGAGCACATGGTCGAACGTGCAGAGAAGCT -CGGCCACTTGTTCCGGGATGGGCTGTTGGCTATCAAGAGCCCACTCATTGAGACTGTTCG -TGGAAAGGGTCTGCTCAATGCCATCGTTATTGATGAGTCGAAGACGAATGGGCATACCGC -TTGGGATTTGTGCATGTTGATGAAAGAGAAGGGGTTGCTGGTATGTATCCGAGTCCCTAC -TCGTTGAGGTTCTTTTGGGATATCGCTGACTCTTATACTCTCTAGGCCAAGCCCACCCAC -CAGAACATCATTCGCCTTGCTCCTCCGCTGGTCATCACGGAGGAAGAGATCCAAAAGGCT -CTTGATATTATTAAGGAGGCCGTGACAGACCTCCCCAACCTCAAGGGCACATCCGAGGAT -CAGGTCATCCCTCCCCCCGAGAAAAAGGTCAAGATTGATCTTGAGAACTAGATGAATCTA -TGAGGAACGTGACCTTAGGTCCACATATCGCAGCCACAATCCCAGAAGCCGCGCGGGGAA -TCTATCTGCCGTAAGCCATGTATACCTGATTTTATCTGGGTTCGGGCTGGCTTGAGACTC -CCATTGGACACAAGCTTGAGACAATCGGAATGTCCCTGGCACATTTCCAACTGTCTTGGA -TCTACTTGTTATCCCACGATGTGACGATAGAATGACTTGACTTTGTCTCTTTTTTTTTTT -TTTCGGGGATCAAGGCTTGTATGTCATAGCAAACAATAGAACACAGTTTTCAAAGGATAT -GGAGCACCGGGTAATTCCATAAATGAACATGACCGATATCAACATTACTAATCCAATTGT -CGGCGAACGGAAATCACGAGGCCCCGCGATGCCGACTGCCGACTACTCCATACAGGGATC -TGAGATGAAGTTCTACGCTCTAAGCGAATGTTCTATTCTTTTATCTCCGGGATTTGACTT -TTATGGATAATGACAGGCCTAAAAGGTCCGAGAAGCTTAGCCTTGTTAGGTGGATATATC -GCGGACAATTCTACATTTTTCTTTGCTTACTCCCATTTATATAGTCCGATTAAATCCTGC -ATTCTTCTTCTTTCTTTAATTATTAAACTCCTAGAGTCCTAATATCCGATATCTCCGCCT -TCCGTGTTCAGCATCTCCCCATATCCACTATCTTACTCTTCAGtttcttttcttcttttt -ttttttttttAAATCCCAGTTTCTGGGCGTACTGGGTACAATAATATACCCCAGCGCCAT -GTCTGCGCCCGAACCGCTACGTGAAGTAGTGGCGGATAAGCTCGAGACCAGTACCTCGAG -ATCAGCATCCGACGAAGCCAAGACTTCCAATCTGGAGCAGCCTGCGCAGAAAAAATGGTA -TCGCAGACTCAATCCGCTGCGATGGCAAAAAATACCTCCTGTCCCAACAGAGCGATCTGT -TTCGCGTGAACATGGCGCTTCGTTTTTTAGTGTGATTTCCTTCCAATGGATGGCCCCGTT -GATGAAAGTTGGTTATCTTCGTCCACTCGAATTGCAGGATGTCTGGACTGTCAATCCCGA -CCGGACCGTTGACGTATTATCCGGTCGATTAGATGCCGCATTGGAAAAGCGTACTGAGAG -CGGCGCTAACCGCCCGCTTGTCTGGGCGTTGTACGACACGTTCAGATTTGAATTTCTGCT -CGGCGGTGCCTGTCAGTTTCTAAGTTCGCTGTTGCTGGTTTTTGCGCCGTATTTGACTCG -GTACCTGATTTCTTTCGCGACAGAGGCTTATGTCGCACAAAAGGCTGGTCACCCAGCGCC -TCACATTGGTAAGGGCATGGGTTTCGTCGTTGGTATCACCTGTATGCAGGCACTGCAGAG -CTTATGCACGAATCAGTTCCTGTATCGAGGCCAAGTGGTTGGTGGGCAGATTCGAGCAGT -GCTTATCTCTCACATCTTCAACAAGGCTATGAGATTATCTGGCCGTGCGAAGGCAGGAGG -ACAGGCTACCCCAGAAGAACTCAAAGCCTTAGAGGCAACCAAGGAGGCTCTCTTAAAGCC -CGAAGAGAAAGGGAAAACACAGAAGCCTGAAGCTGCACCGCCTTCATCTGGTGGCGTTGC -AGGTGATGGCCGTGGATGGAACAACGGTCGTATCACTGCTCTTATGAGCATTGACGTGGA -CCGAATTAACCTAGCTTGCGGCATGTTCCATATGATTTGGACAGCACCTCTCTCCATTAT -AGTGACACTCGTCCTTCTCCTGGTCAACATTGGTTACAGTTGCTTATCTGGGTACGCGCT -GCTGGTAATCGGAATACCTTTCTTGACCTTTGCTGTGCGATCCTTGATCAACCGCCGAAA -GGCCATCAACACGATTACAGACCAGCGTGTATCTCTTACCCAAGAAATTCTGCAAGCTGT -GCGCTTCGTCAAGTACTTCGGCTGGGAGAGCAGTTTCCTCGGCCGTCTGAAAGAGATCCG -CGGACGAGAAATTCGATCCATTCAGACCCTGCTTGCAATTCGAAATGGCATTCTCTGTGT -GGCGATGTCTATTCCCGTTTTTGCCTCGATGCTGGCTTTCGTTACCTATGCGCTGTCAAA -TCATGACTTGGACCCTGCGCCGATTTTCTCTTCGTTGGCGTTGTTCAATTCCCTGCGAAT -GCCGTTAAACATGCTGCCAATGGTCATCGGTCAAGTGACCGATGCTTGGACTGCCTTTAG -TCGAATCCAGGAGTTCTTGCTCGCCGAAGAACAAAAGGAAGATATCGAGAGGGATGAGAA -CATGGAAAATGCAATCGAGATGGAGCATGCTTCATTTACATGGGAACGCCTTCCGACAAG -CGATAAGGATGCTGAAAAAGCCGAGAAGAAGGCAGCTGCACTCCCCGAGTCAACAGAAAA -GTCAGCAACCAAGAACGATAACGATGAGGAGACTCCCACAGAGCCATTCAAACTAAATGA -CATGACTTTTGAAGTTGGTCGACATGAACTGCTGGCCGTAATTGGAACGGTCGGTTGTGG -CAAGAGTTCTTTACTATCAGCTCTGGCGGGCGATATGCGTGTGACTGGTGGCACTGTACG -TCTGAGCACCTCGCGTGCATTCTGTCCCCAGTACGCCTGGATCCAAAACACCACCGTGCG -TAACAATATTCTATTTGGGAAGGAGTATAATGAGACATGGTATAAGCAAGTGCTTGACGC -ATGCGCTCTGACCCCAGATCTTGAGATCCTGCCGAATGGAGATCAAACCGAGATTGGAGA -GCGAGGTATCACAGTGTCTGGTGGACAGAAACAGCGTTTGAATATCGCTCGAGCTATATA -CTTCAATGCGGAAATGGTGCTCATGGATGATCCTTTGTCAGCTGTCGATGCACATGTCGG -TCGTCACATCATGGACAAGGCAATTTGTGGTTTGCTCAAAGATCGCTGCCGAATCTTGGC -GACGCATCAACTTCATGTTCTTAGCCGATGTGATCGAATCATTGTTATGGACGAGGGACG -GATCAATGCGATTGATACATTCGACAACCTGATGGGCGGCAACGAGGTGTTCAAGCGGTT -GATGTCTACTTCTCGACAGGAGGATATGCAAGAGGCAGAGGAAGAGGCGGTCGACGAAGT -CGGGGATGAAATAGACGAAAAGGAGCCAAGTCTCAAGAAGGCCACTCCAGTAAAACCTGC -CGCAGCCCTGATGCAGCAAGAAGAAAAGGCTACCGCGTCCGTTGGCTGGGATGTTTGGAA -GGCATATATCCGAGCATCTGGCAGCTATTTCAATGCCGTCATGATACTCTTCTTGCTCGC -CATTACGAATGTCGCCAACATCTGGACCAGTCTGTGGCTATCTTACTGGACCTCCAACAA -GTACCCGGGGCTCTCGACTGGCCAATACATTGGAATTTACGCTGGCCTGGGTGGCGGTGT -TGTTGTCCTGATGTTCATTTTCTCAACTTATATGACCACCTGCGGTACGAATTCTAGCAG -GACTATGCTTCAGCGTGCGATGTCTCGTGTGCTCCGCGCACCTATGAGTTTCTTCGATAC -TACGCCTCTTGGACGAATCACCAACCGCTTCTCGAAGGATATTCAGGTGATGGACAATGA -ACTTTCGGACGCAATGCGTATCTATGCCCTGACGATGACTATGATTATCTCTATCATGGT -TTTGGTCATCGTGTTCTTCTATTATGTGAGTGCTGCATCCCTTGAGGTTTCTTAAATATT -CGAAATGGGCTCTAACCCCCTTTAGTTTGTCATCGCACTTGTCCCGCTGTTCATACTGTT -CCTTCTAGCTTCGAACTATTACAGAGCATCCGCCCGCGAGATGAAGCGTCATGAGGCTGT -CTTGCGCTCACTGGTGTATGCCAGGTTTGGAGAAGCCATCACCGGTACTGCATGCATTCG -AGCATATGGTGTAGAGAACCAATTCCGCCGCACCATCCGAGACTCGATCGACGTGATGAA -CGGGGCCTATTTCCTCACTTTCTCCAATCAACGTTGGCTCAGTGTCCGACTCGATGCCGT -GGCGGTCGTAATGGTTTTTGTCGTCGGTGTTCTAGTCGTCACTTCTCGGTTCAATGTCTC -TCCCAGTATTTCGGGATTGGTACTATCTTATATCCTGTCTATCGCACAGATGCTCCAATT -CACAGTTCGTCAGCTGGCCGAGGTTGAGAACAACATGAACGCCACAGAACGTGTCCACTA -CTACGGTACTCAATTGGAAGAAGAGGCGCCTTTGCACCAGGCCGTGGTGCCAGCTAGCTG -GCCCGAGAAGGGTCACATCGAGTTTAGCAGCGTGGAGATGCGCTATCGCGCTGGGCTTCC -ACTCGTCCTTCAGGGTCTCACCATGGACGTGCGAGGAGGCGAGCGTATTGGTATTGTTGG -CCGCACCGGTGCCGGAAAATCGAGCATCATGTCCGCACTGTTCCGATTAACCGAGCTATC -AGGGGGTAGCATCAAGATTGACGACATCGACATTTCCACCGTTGGCCTTCACGACCTTCG -ATCTCGCCTGGCCATCATCCCGCAAGACCCAGCCTTATTCAAGGGCACGATTCGTTCGAA -TCTTGATCCGTTCAACGAGCACAATGATCTCGAACTATGGTCCGCGCTCCGCAAGGCGTA -CCTCATCGGCCAAGAGCAAGAACTCGAAGGGGAGGAACTGCAAAGTGGGCCAGCCACCGG -AACTACAACTCCAGTCACTGGTAGTGACGTGAAAGCGCGTCCGGCAAACAAACTCACCCT -TGAGTCGCCCGTCGATGACGAAGGCTTGAACTTTTCTCTGGGCCAGCGACAACTGATGGC -CCTCGCCCGGGCACTGGTCCGCGACGCACGCATCATTGTCTGCGACGAAGCTACTTCTTC -CGTTGACTTCGAGACAGACCAGAAGATCCAGTACACTATGGCTCAGGGATTCGATGGGAA -GACTTTGCTGTGTATTGCGCATCGCTTGCGCACGATTATCCACTACGATCGGATCTGCGT -TATGGACAAGGGTCGGATTGCTGAGATGGATGCCCCAGTTGTGTTATGGGATAAGGCAGA -TGGCATCTTCCGTGCCATGTGTGAACGCAGTGGGATCACGCGCGAGGATATTCTCAACAC -TAACTGATAAAATTGATGCTTCAATCAATCAGTATATAACCTCTTCACATCGTATAGCAT -ATAGCATATAGCATATCAAAGCACATAAAACGTGAATATTAATACATATTATTTCTGAAT -TATGAATCTTCTCTATGTAATACGAAAAACTAAATCACTCCTTATCCAATTCACAAACCA -ATTCCATCCCCAAATTCCGCATCATCCCTTCAAACCCAGCCTCCCTCCACCCGGCAACAT -CCCTCTCATCCAACTGCACCTTCTCTTCCACCTCCGACAATGCCCCAACAGGCAACCCAG -TAAACTTAACCGGCAATGCAGGCGCCGTGCCAACAACACTCATCCGCAGACCTGTCAAAA -TACCACGCAGGTGATCCGCCGCCTTACCCCCACCCCGTCCTCCATACGAAACAATCCCCG -CCGGTTTCCCCTTCCACTCAAAGAAAAGATAGTCAAGCGCATTCTTCAGACTCGCAGGAA -TACTCCAGTTATACTGCGGTGTGACGAAGATAAAGGCATCGTACTGCCGCACTACCTCGG -ACCATGCCCGTGTGTGCGCTTTGCTGTAATGCGGGGTGGGGTCTGCTGCTGGTAGGCTGG -CAGGGATGACGGATTCATCGTAAAGGGGCAGAGATTGCTTTGCCAAGTCGAGGATCTCGA -AAGTTATGTGGCGCGGGTCTGTGTCGATTGTGCCGCTGTGGCTTGTTGGAATCGTCGGGT -CGGAGGTGAGGACGTCATAGACGTACTGCGTTATCGTGGGGTTGAGGCGAGGGCTGCGGG -TGCTGCTGGTGACTATGGCGACACGTTTGGGGGCCATTTTGGTATTGGTCTCCAAAGTGA -GATTTGAATTTGGATTTCTTGAAAACTTTTGGGAGAGGGTCGAAGCTGTTTTCGGGGTTA -CGGACATGGTAGTTCGGTGAATCGGAGATTAGAAACCTACACACTTTGACTTTCATGGTC -CTATGGAGGGTTTTCACCGGCTTTTATCCTCAGCGACTATTGAAATTTCATATGTGTTAT -AAAGATCATATATAACAGAATTGTGGGAAATGTTGGAAAGTCGCGGTCAATTATGGATTT -GAATGAAAGGAAGGACATATTCAACAGTATTTATCCAAGACACGGAGTGACTGGCTCCTA -TAGCTCCCTATTATTTCTGAAGAGCCAGGAGTATATACCCAAGAATCAAGCGCAAGACAT -AAAGCAATGAGTCTGTCCCTGCGTGCCTATCTCTTCCAACGCGGGATCCGCTCTCCGCAG -CCAATTAGCCAGCATGTTTGTACAGTGATCAACATGTTCCAGATCCCCCAGCATGTTATC -CAGACGAGCGCCACGCATGTGGACGTCGTGGCCCCTTCGAAGAAGATACAGACAATGGAC -TAGATGCTCACGAACAGCAGTCCAATAGAAGTCCGGGTTATTACCGAGCTCAGTGTAGTC -CTTCATCTGCTTTGTGGCAGCCTCATCGTAGAAATAGACAAACGGCTTGCCGTTGTTGAA -ATGCGCAAACTCATCCGTGAAGTCATGTGGACATTCTTTGTGCAGCCAACAGCCAGCAAG -AGGATCATAGGCGCAACCGAGGGCTTCAGCCTCTTTGGTTGAGTTGCCACACGAGTATCT -TACTCGGGGAGCCGGTGGTAAGCATTGTCTTTTGCCTTCGAGTGACTCAGGGTATGCAAA -GGAGTCAGAGGATGGTCGATTAGTGACAAGAACTTTGCGGAATGGTTCCACGGCGATGGC -TGTGATTACAGCCATCAGCAGGAGCAACAGGACCATGAGTAGGCCGGTGATGGTGATTAC -TGTGAGGTTTCGTTTCAAAAATTGCTGCAAGCCCCGGCGAGAAGGCCTGTTTGCGATGGT -GGCATCGCTGTCGCCATCGGCATCCGCTTTAGGGACCGGGTGGTAGTCGCGAGAAAAGGG -TTGGTCCATGGCAGAATTGATGAGGGACGATGTGGTAAGTTTCTGTGGGACCGTAGCTAA -GAAAAAGATATCCTTTCCAGAGGATAAATCCAACTAGTTGCACCTGATGCTTATTGAGAT -TGTAGTCCTATTCATGTCGTTTTCGGGACAAAGAGATGAACAGTCGAGGTCTATAGATGG -CCTGTTTGGGATTTCAATAAGACTCGGAAGAAATAAATCAATCATTTTAGGCCCGAGCGA -ATACTTCAATGAGAGACTGATGGCAAAAGAAATGAATGATCCAAACAAAGAAACCACAGA -AAACCTCCATTACACAATCCAGCCCGCCGATGTAGCAGGTCCGTGCTTCGTCCTCAGTAT -ATACCTAGGTAGTATATCATTTTATCAAAGCCTCATTCATAGATGAGGCCGATGCTGCTA -TACACACTGTGATCCTCATACCTCTATGGGCACTGAAAAGCTCTTCATATCTTTAGCATA -GGTGTATTGCTAAATATATCTGCTAGACTAAACACAATACTCAAACTTTTATCAGCAAAT -TCATCTAACGGCACACCCGCCAATTAATGATGAAGGACTGGACGCACAACAAAAGGCAAT -TTACAAAAGAAGGAAAAAAGCAAATGTACAAAGCAAGCGCAAAGCCGACTTCAGATTCTC -AATCTGTCATCAATCTATACAACATAGACAGTCTGGTTGAGAGACTCCAAGAATGCAGAA -GGACCGGCCTGAAACCCAACAATCAGTATACTTGAATTCCAGCAAATGTCAGGGGTTTAC -TATTTACACCAATTAGAGCAGCATGGGCGACATTGATCTGGCTTTTTCCGAGAGTAGCCG -AGGCCGGCACGGTGATTGTGAAGTTCTCATATGGAGGCAGGCTGGACTCGTGACCCAGGG -GGTTAAAACGCCCATTGTATAGAATTGTTCCTAGAACCTCGTCTGCGTCTCTGCAAGGTG -AGGATTTGCATGAGGAGAGACCGATTGTTACGCCCATCTCCTTAGACGGACTCAAAGAGT -TCTGGACTATATATCAGCAAAGCAAATTACGGTGACGAATATGGAAGAGCTAAGTAGCCC -TAAATCTGAAACTCACAGGACGCTGTACTTGGACTACGATTTCCTTGCCAGAAATGATCC -TCTGATCCTTAGTAGGGAGTCCAATTGCGGCTCTCTGGGCAATAGTCACGGCGATGAATA -GAAGCATGGAAATGAAGGACTTCATTTTGATGGATTAAGAAGATTTGTAGAGTAGAAAAT -GTGCTGGTAAGGTGATGTTATGGTGATAAGATAGATGTTCTATTAGAGATTCGTGCGATC -TGGAGTATTTATATTGAAATGTGGATAAACAGTTCTAATAATGACCCATGGTTATAAAAG -CATCTACTATGATGATTGGTCTTGTCTCTGGAAGTCTCAGATGCATTTCAAGCCCTGAAC -AGCCGCCTGGCACTAAGGTCAATCAGGAAAGCAAATTGGAAATCCCCGGATTATTATAGA -TTCACCTTGCATATCAAAACAAGTAAGCGGTCTGCTGCCTTACTTTGAATGGCTGATTCA -GGCTTGGTTTCAAACCTGTCCCCTATCTTATTGGTAGGTGCTTGAGGGCTCTAAAGGTGC -ATATGGGCGAGCTCTCTCACATTCAAGCCCACAATACTTCTTGAACTGATCAACACTTAC -ATAACAGGCCGCATAGAATACATTGGCTAGCTTAGCGTCCTCCACAGTCCCAGAATAGAA -TGATCTCTAACGCTGAGAAATGAGGTACGGTTGGTTCAGCCGCAATTTAAGGCTGAATCA -AGAAGATTCCACGAGATGGGCAAACGCTAGAGCCAAAAATAGAATATTTAAGACGGAGAG -TATTTGCTGCTGGCAGCTAGAACGTCTTCCTTCATGCACACGGGAATAAAATGGCCCGTA -GGAATTTGTCAACTGCTGTACTTTTCAGCCAGCGGAAATTGTCCAATTGAAGCGTCAAGT -ATGATGTCTCCTCTGCTTTAAGCTTACTGCCTCTCTCGTTTCCATACTCAGATGAATTGT -ATCTCCGACCGGAGTGGTTTTGGCGCAATGCATCAACCCTTGCTTCGTTCCCATGGGCTA -CATGTAGGCCATGGAAGCACAAAAATATCTTGGCTTTTGTTGTCTTTTGTTGTCTGTACG -TGAAATGTAGTGTAACTCTTTGATTGTTGAGTGATCCAATCTTGCTATTGGCTAGCGAAT -CAATGATAGCAAATCGTAGATATGAATCTTAGGTCAAGTGTCTTAAATTTGGCTTGTCAT -CGTGTTTTGATAGTTTGATATTCACATTGCCATTTTAAAACTGTTGAGATTGCTAAGGGC -TATACCATTAGAAGAGAGCAGCTACATCTTAGAGCTTATATGACAGAATTATAGTCAAAC -ACCGCATGTATTCCAATTCACTCCATCCCATTTATATGCCCCGGCCGTATATTTTGTCTC -GATATCAATGAACATGCACAAGCCTTTTTTTGGTCTTTGGATTATCTACGGCCACTGTTT -GAAGTGGACAAATCACAGCAAAAAATAGCGAATATACAGCATCCAAGTGCACACAATTTC -TTTCAAATAATCAAGTTCTTCATCTGTCTTTTCTACTTCCTGTAAACTCTACCATCTACC -CAAAATAAATCTTCATGCTATCTAACAGTATTGCAACAATCCCTTATCAATCCTCAGAAG -ATTTTTTTAACAGAAAATTGTTAACAAGTTACAGCGACCGTAGAGTGTGAAGTTGAGAGA -AGAACCGCCAAGAATACGTGCGAGCCATCGTGTTCCACGGCATAGATCGATCCACTTATT -TGACGAGCAAAACTGTACCTAAGTAATGATATCTTTCACAAAGTTTGCCTCATTCTAGAT -TAAAGATGGAAGCTAATCCTAATTCTATCGATTTGAGGCAACTCCCACTCATCTTCCTGG -TCCGCTGACGGAGCCGACAATTGTGAATTCCATAGGTGGTTATGAAGCGGTGAAGGGGAC -AATAGTGCCTCAGTGGCAATATTGGAACAAGGGACACAATCCCATGAATCAAGTGCAGTG -TATGAGGCAGGAAACAAGGGGGAACGGGGGTGAAGTAGACTGGCCTGGCTGTATGTAGCG -ATTTCCAGCCGGATCCACCAGACATCCATGGGGTGTGGACATTGATGTTTTTCAAAACAT -CTGCAGCGTGAGGACAGAAAGCCTCTGTGAGAACATGCAAATACCGAATGCAAAGAATGA -TTTCATGAGCATGGCTTCCACTCAAAATGAACAGAAAAACTTTAGCCTCCTCTTGACAAA -AGACTAGACATTGATTCTCACCTGATTCTTTGAAGCACGGTCACTTCTCTGGCTATCAGT -CTATCTGAAGACTGCTGAGTCTCTCTTTCTCGGCCTCGAGAGCTGCTGCCATCCGGGCCT -GCTTCCGCCTCCACTTTCTCTTACGCTTCGTGTTCTGCGGGTTTCGGAAACATTCATCGA -CGGGATGCCTGGTCAGACGCTGATGGAAAGGACAATATTGCTTCGGAAAACATTCAATCT -GGTTCATATTGGCAACGACAGTATCTAGGCTACCTAGGCTTGAATTTTGGTTTCCTAGTC -GATGTGCCTCGAACTCCAGGAAATCTTCAAAGGCTTCTTCAAGTATTGATGCCGGGAGAG -AACCTTTCTCAAACACCAGGGACTCGATCCATGGGATTGTGTTAGGATTCACACTAACCG -CAGCAAGGAACTGATTCAAGACGAAGATTGTGGGAATATGTGGGTGTCCAAAGGTCTCCC -TCATCTCTATGAGGGCTTGCTTCCACGCATGCACAAAGGCATGTGGTGAATTCGACGAGT -AGTAGACCGTGTGAAGCCAATTCATGTATTTGCGATACGCGACAAGGCACTCACGGGATT -TGCTCTCCGGTATATCACCAGGCTCACCGGGAAAAGAGGGTACCATGAAAGGATTCGCAG -GAACAATGAATTCAGAGGCCGGAAGCGGGGAATCTATGATCGGCGGGCCATATGGAGGAG -GGCCCGGCGGATAAGGGTCAAACATGACGGAGTAAACTGGCAGCGGCCTGTATAAAAAAC -GTCCATAAGAAACGGGACTAACAGGTGGGGAGCCATAGTACCCAAGGTCGACCGGGGCTG -GAGCTGGGGCCTCGATAACTGGAGTCGGGTTCAAGAAAACGGGCTCGCCCTCAAGCGGCG -CTCCGGGATATCCTGGGCCGGAGATGAGCATTTTGGCTGCGGTCAGTGTAGTTGTACCTG -AACTGGTCTTTGTGTTAGCCTCATTCGGGATTTTGTCTTGGGTATGATGGTGTAGAGATC -GCGGTACTCAATGAATGTAGCTAAAATGCCAAGAAGGACACAGCTAGAGCGGTTAGAAGA -TCGAAAGCTGTACGAGTCGAGTTAAGGTTGGCAAAGATACGTTGAAGGATTGAAACATAT -GCTTGTCAAAGGCTTAAATATAGGCTTGATCCGGTTAGAATCCTGGGGAATGTGGCATTG -GGTTGATGAAGCGATGTATGTTGGATGCCCAGGAGCTCCAGTAAACTATGACAACCACGG -AAACACCCCTTACACGGAATAACGAAGCGCAAGGCATAACTTGAATATAGCTTAGAGAGA -AGTGAGGTCAATCCTAAAGCTTTCTATAGTGGCATATATAAAAGAAATTCCAAACCGATC -TAATGGTGTCCCCGAACGAATTTCCTTCCGTAAATGTGCAAAACTCAACCAGTTGCCTAC -GCCCTGAGCTTACTCATCTCTTCAATATCACTTTTCGTTTGCTTTGACTTTGATAGGCTT -AATTTATGAGATCAGAAAGTCGGTGTGGTGAATGTCAATTGGCAAACATGACCATCTACG -GAAATATGTTACGTCCCGGCGTTAATTCTCAAGATTTGATCAATGGATATAATTCATATA -TTCATGTATGGTGATATTAGATATTTACCGCAGAAAGGATAAAAGAGAAGTGTTTAATTG -GGCTTCCGCACGACTGCGGGGCCCCCGAAACCCCCATATTAGGCCCAGTACGACCCAGTT -TCTAATATGAAAGGGGATATTGGCAGTCAAACACCAGCTAGATCAGCAGGATCATCATAT -GTCTATCCGATACTACACACATACATACTGGAAGCGGGTGGCCTGATGTAATCAAACTAG -GATTGCCAAAACAATCCGCGGGTACGGAGTATTCGGGCGGACACAGGGAGTATAGCACTG -AGCACGGTCCTACGCACCCTATTGAACTCTCGGGAGGCTCTGACCTGGGATCTGGCATTG -GGTCTGGTAGTGCGGAAGAGGCGGTGAGGTAATGACCAATGAAAGATTTATGTGCCGGCT -GGCATTCTTAGTCGTCCGGAGTACACGAGCTTGGTCAGGCCCCGAACTGCGGGCCACAGC -CGCATTTGTATCAAGGCCCCCGATCGAGCCGCACTGAGAACCATGCGGCTCATTGTAGCT -ACTACCTGTACGTCATGTAGACCTATTTTCTTAAACTCACACCTGCTTCGCCTTTTTTCG -AGTCCCAATGTACGCCCTCCAACGTTTAGTCTACGGATTCCCTGAACCTGCTCCGCACAA -ACGCAGTAAGCCCGTCGAGGTGCTTTGCTTGGGCTTACCACGTACCGGAACCGAGTCATT -GAGTGTGGCCTTGCGAACTCTGGGTCTACAAACATACCATGGCTGGGACTTGGCCTTTGA -GCCAGATGGCAGCAAGTTGCAACACTGCACTGAGCTAGTGCGGCGGAAGTACAACGGTGC -TCGGGATGGCGACGTGCAGATTACCAGCGCGGAGTTTGACATTCTCGTCGGGGATAGCCA -GGCTGTCGTTGACTCCTTATGCATTTTATTTGCTCCCGAGCTGCTCGCCGCCTACCCAGA -TGCCAAGGTGGTTCTCAATCTGCGTCCCGATATGGACGCCTGGTACCGCAGCATCAACAA -AACCATCGTCCAAGGGGTCGATCAATCATGGACGGTATGGGCGATGCAGTGGTTTTCCCC -AGAGATTCATTGGCTGTGCAGTCTTTACTTAAGAGATGGATATCCAGGCATATTTCACAG -CACCACCACCAAAGACGGGATCGAGCGCAACGCCAAGTGGGTCTATCGCGATCACTGCAA -CATGGTTCGGGGTATGGTGCCTAAGGATAACTTGCTTGAGTGGTCTGTCGAAGATGGCTG -GGAGCCGCTTTGCAAGGTATGACTTAAACAATCTAGAATGATCTCAAACTATGGCTGATG -ACAGAGATAGTTTCTCGGCAAGCCCGTTCCCGACGAGCCCTTCCCCCGAACCAATACCCC -AGGTAGCTACACTGAGCGCGCAAATGAACTACTCAAAAAGCACTTCCTTCGATGTCTGCG -CAACTTCACTCTCACCGCCGTGACTCTAGGCGGGATTACTAGTGCCATTGTGATGTGGTG -GCAGGGACGGATCCCAGAGATAAGGAGGCTGCACGATCTTCTCGGTGGATTCACTAACAT -AAAGTAGGATCGTTATATGGAATGGAGTTCTTTTAGGCCTCGAATATCTCTCAAGTCCTC -CATCAGAGACCACGCCCCGTATTTTGACTCAAGAATCACGACTTTGCTTTCATGACCGGC -GCAGACCAAACGGTTGTCCAGTTCCTAGTCCCATTGTTTTGCGCCATACTCATATAGTTT -TCTCATAGCTTTCATAAGCCCGGTTTCCAGCATCTCCAATGTTTCCGCTCCATACTCGAG -CGTAGTATGTACGAGTTCATGTAATTGCATGGAGTTTGTAAAATAGCCATTGATAACAGT -TATTTGTTTCACCGAGTTGGCAATGTATGCGTCTATTTTAGGTAGATTTATGTAATTTTA -GGTATAGCAGGTATCCCCGCGATATGTTTACCTATAGGCGATAGGCGTTATAGATCTGGA -GGTACTTGAGGATCCAAATTGCCTCCCGCCTACCGTAGACGGGTGATAGGCACCTCTCCT -AGATATCTGTAGACAGGCAGTGTCGCAGTCTAACTTTGACCGGGACCTTTCGTTTGTACA -ATGAGGCTAACGAGTCTCCGAACATAGTCCACATCTTCGTTGGGATATTTCGCTCGGTAA -ATGGCCCAGAAAGTCATTCGGCTGGTATGGGATCGTCCGGGTTTGGCCCTCCGAAGGGTA -AAAGGGGGAATTTAAAGCCTGGCTCTGAGTAAAAACGTGCAAAGGTTTACTACCTGTCGT -GGTGGTTTCTGCATTATTGAGTGTGGCCAAGCCTCATATGGGGATACCTCGGCGCTGGCA -TGACTTTGCCATGGGCCGAGTTACGCGACCCGGGCTGCAGGCATCGGAACTATAAAGGCC -CTGGGATGATCGGCACTTCCATTTCAAGATAAATTCTAGATAAATTCCAGAAGAAAAATG -GATGAAACTAGCTTTCAACGTTTCAAACAGTAAGGTCCATGGTCCTCCAACTATAGGGAA -TCGTGAAATGGTAAACTTACCAGCACGTATATTTTTTGAGCCGGGCTAACGAGAAATCAG -AGGCCTTGGTAGAAGCAAAGAGGCGATATCAAGGCAAGTCAGAAAATCTACTGTATCGAT -ATTAAACAGCTCTGTCTAATTTGATAGAAAACCACATGGGAACgatggcgtggatggagt -ggatggagtggatgcagtggaACCGTGATCTGATAGCCAGCCATTTGAGCACGTTCCACC -CACATTATGCTGCGTCTGTTCCTTATTGTTCCTTATTATCAGGATATCTAATTTTTTTGA -TACAATTCTATCAAACATGCCATTTTTTGTCTTGGTCGATCTTTTTATGTCGGGATCCGC -CCGATCTACCCTCACACCTATCCCACCCTAGCTCTATAAGACACCGAAAGTCGGAATGAA -CTCTTTCTCTCCTGATTTCCAGGAGTGGGTAGATCGACGCCAAAGCACCCTATCTGCCTC -CACCGTGTCGTCTGCGTCAACAAGTATCCTCTTCCCGAGCCCTGCGACATCTCCAGGGTC -CAGCTCGAGGAAGCAATCCTGGCAGCTCTACGATGATGTCGTACGGCTCGAGCTAACTCC -CTCCGTGCAAATGTCTGTTCTGAGAAAGGTCCATTCAGACAAGAATGTGCTGACATGTCA -GTAGCTCCTTTGTCAAAAACGGTCAGCTATTCAAGCTGAAATATACATTCATTGATATCT -GTAAAGACTCAACGGGTGCATTGCGGTGTCTCGAACTTGGGGGAGGACTAGGACAGCAAA -CACCTTTTGTCCATGCCTGTGAGTTGTCCCACCTATCTCCAAGAAATAACATGCTGATTA -CTCCAAATCATGTATAGTCTCCAATACAAAGCTGCCAGTCCCTCATCTAGAACATCCCAA -ATCAGGCGACGACTACCCTCTTCGCGTATCCTTTATGGACCAGCAAACGGTTCAAACGGC -GAACACAATATTCATGACGCAGCTATCATATAAATTTGATCACTGGGATGGTACGCGACC -GATATACCTTGAGGTAAACATTTGCTTACGCTTGCAGATTGTGTACGATTCCAAGAACTG -TTACTTGGCTCCAAGTTAGTTTTTATTGGCGGGATGGCCGAGGCCAAGTCCAAAGGCCGG -GGTGAAGAGTGCATCAGCCAAAATCTTCGCGTTCTCCGCGGGCATAATGGGAAACGTGTG -ATGCTATTCTTTGCAAACTCGCAACGGAGGGAGCTCAAACGATACGTTTCCATTCCCTGT -GAGTCCACTTCTAGACGAGCCCATATTCCCCTGTCACAGTGTTGCTAATAGCCCGGCTAG -TAAATTGTATTGCGGCAATTAAACCTCCGAAGAAGGCTGGCAGGCCAGCCATAATTGATC -TCAATCCAAACTTTGAGATTCTGTCTCAAATGAGAAATTTTACGATCCAGTTTCTTGACG -ATGATGGTCAGTGCACATTTGAATTTCTGTGGACCAACGCTTACTCCTCTAGACTGTGTA -GCATTCTGTCAGATGTTATCATACGACTTGACTATTGGATGAATCGCTTACGAGGGGGGA -CGGGTATCACTAATTTCTCACCTATATGAATTATGCATGGAGTTCTGGGACACATTGGGG -GTCATATCATGCACATAATACCAATATACCAATGTATGAATATACCAATATACCCGGGGT -ATCACTTGATGTATTACTTGATTGAAAATAGATCATGGAATTGAAATTTGTAAGAGATGG -GCTGATATTCATTTGAAAATGGCTTGGAAGAGCAGAAATAAAATAACCAAAGTTCCGACA -TGTCGACAAGTCAAGCACCAGAGACGCCGCACGTGGAGAGATAAACCTCAAAGCTTTGGC -TCTAGTCCTTGGCGCACTGGGCATTGTCACCTTGGGTTTTTCTCTTCTGAACTATTAACT -ATTGGGTATCTCTTCTCATCTCCGAAATGTAATTGTTTTTGTCTTTCAATTTTCTATCCA -CCATGGCCAAATTCCATGCCCGGTTGTATAAAACCGACCGTGCGCGGGACTTTGACCACG -AGCAGGTAAGGAGTACCCGATGAACATTGCATGATTATTAACAGTTCTATTACAGGATCG -ATATGTTCGTATGCGACAGATCTATGAGACTATCGACCGACAGGGCTTTCAATGGATTGT -CGTGCTGGTCGCTGGTCTTGGGTTCTTCCTCGATGGTTACACTGTGCGTTAACCCACGTT -CCAGGTGTAACATATTGATTTGTACTCGATGCATATACGGCAACTAATACTTGATCTAGT -TATTCGCCAGTAACATGGCCCTACCGATGATCTCCTATGTGTACTGGAGAGATGACACTT -CCTCATTACGACTCACATGCATCAATATCGCAACACTAGGTGGAACCCTGCTGGGCCAAG -TAGCATTTGGATTTCTGGCCGATAAAAATGGGCGGAAGAAGATGTACGGTGTCGAATTGG -TACTCCTCATCACAGCCACCCTAGGTGTGGTCATGTCCTCGACAGGCGTGGACGGCAGCA -TGAACGTCTTCGCATGGCTTATTTGGTGGAGGATCTGTGTTGGCATCGGAGTGGGTGCGG -ATTATCCACTCAGTGCAGTGATCACGTCTGAGTATGTTGATCTCCAAAGTTTCAATCCTA -GAAAGACTGACCTCGAAATGTCAGATTTGCCCCTACCAAGCATCGAGCCCGCATGATGGC -TACCGTTTTCTTCATGCAGCCACTCGGTCAGATTGCAGGTAATATTGTCGCACTGGTTGT -GATCGCTGTGGCTCGTGGTAATAGCAGCCCGGGTGAAGACATCACCCGCACTGTTGATAT -CATGTGGCGCTGGGTGATTGCCATCGGAGTCGTGCCCGGTGCAGTCGCTACTCTTTTCCG -CTTCGCCATCCCAGAGAGCCCTCGCTACCTAGTCGATATCGTGGACGACCCCATCACTGC -GGAGTTTGATGCAACCACGCTTTTCAGTGACACGCCCGGTATGATTGATTGTGTAAGCTG -GGGAAACACCGCTACATGCAGTGCCGGTGGGGCGATTCAACTACCACCCATTTCATCTAT -TGCTAGCAGCTCGATCACTGATAATGATGACGATGATGAATACACCCGCTTGCCCCCCGC -TACCCTCAACTCCCACTGGCGATTGGCACGTACCGATATCGTGCGATACTTCTGGACAGA -AGGAAACTGGCGCAGCCTGGCTGGTTGCTCTCTCGCATGGCTCCTCCTTGATTTTGGCTT -CTACGGTATTGGATTGTCAAGCCCTCAATTCTTAGCCAAGACATGGGGCGCACTCCATAT -ATCCGAGGCAGCACCGACCTGGATGACTGACGATACACCAAATGCCAATGTCTTCCACAT -GTTCATGGAGACCTCTGTGCGTGGTCTGATTGTGCTCAATATCGGTAGTTTCGTCGGTGG -CGTACTACTCATCATCTTTGCCCATAAACTGGATCGTGTTGCATTGCAGAAGTACATGTT -CCTTGCTCTGGCGGCTCTGTTTATCGCGCTGGGAACCATGTTCGTCAGTCTTTATTCTGG -CCCTCCTGCTGTGGTAGCATTGTATATTATTGGACAAATCATGTTCAATTTTGGTGAGTT -GAACTCAGGACCCCACGCATCAAAACATTCTAATCGACTGCCCACAGGCCCCAATGCAAC -TACCTATATGATTCCCGCCGAGATCTTTCCAACTCGATACCGCGCAACATGCCACGGCAT -CAGCGCCGGGGCAGGGAAACTGGGCTCTATTCTCGTACAAATCTTCTCAGCATACTACCA -CTTTGGCTCAGGGCCGGGCAACGAATCAACCCGCAAGCACGGCATTGTTCTCCTCGTTTT -CAGCGCCTGCATGATTCTTGGCGCGGTAGTCACGCACTTTTGGATTCCACCTATTCAGCA -AAAGCGGAATGGCAAGACTAAACTCTGGGGTGGTAAGCCCGAGACTCTTGAAAGCTTGTC -TCTTGGCCGAATGGGACGCAAGAGTCGAGATGCCGATTGTCGGAAACGGACATCACGACA -TCGGGCGCTTTCAATGAGTGCTTAGGTATTTCCAGTTTCTGTATCTATCTTTAATTATTC -TGTATAAAGGTTGAATATTTACTGTTCGCATTTGGGGGTTTCGGTTCTGTGAATATATAA -TAGCTGCGTTTGGTGCACTGTGCGGAGCATCATAAATACATCCCTCATACTCGAGAACGG -TCATCAAGCAGCCGCCCCGTGGCGACCTCATCAAGATAAAGCGCAACATCCATGCCCCCA -TCCTGAACCCCACTATCCATCACCTGAATACAACCATCCATCTTAGGCATAATAGCATGC -ACATAGCGTGGCTTAGTCCTGCCCACAAAGTCCACTTCGTACAGCTGCAGCCGCAACCAC -GACGTAACAAGCGTATGCTCAGAACCCAGGAACGCCTGCCACAGCCGCTGTGGCGACAAC -TCATGCGCAGCATCATGCAGCATCGCACCCACAGCATCGGCCGTAAATCCCTTCATTGTC -TCTCGGATCCGACTCGCTATCTCGCTTAATGATTCCTGGCATACCGAGGAAGCGGGTCCA -CCGACATAAGTTAAAAACAGCGGCGATCCAATGAATGTATCAGGGAGAGCGGGCAAAACA -CGCGGCCGCACGCCAAGACTCAAATTCATATATACTGGTCTTGAAGAATCGGCGTACCCG -CGTGCTCGCGTAATGGATTCCCACAGGTATGCGAGTAGAGCATCCAGTCTAGAGATATCA -CGTCGACTCCCTGGTGTCTCCAGAGCCGTAGCTTGGAGCTTTCTTAGCTCCGTGCCTGTG -AAGTGTAACTGTGTGTATTTGACTGCTTTGGAAAAATCCCATGAAAGCCAAGGCGCTGGT -TCTGAGGGCGAGACTCGGCTGTGGTCGATTTCTGAGGGCGGCGGCTTGGAGTTCTCTGTC -GTCGGTAGTAGGAATGACGGGTAGCCCTCATCAGTGGAGTCCCACCAGCTATACCGATGC -AAGGGCAATTTGCGCGCTTTGTGCACGAGGGTTTGCTCTGGCTCGGGGCTATCGATATCG -CCAGCGGCGCAGCTGTCCAGCAACGCGGGATCAAATACGGGCTCGCCCATCAGCGGGGAT -CCGGGTGGGTTTCCAAAGTGCTTTTTGCTGTTATATGCCCATAAATGCACGAATACCATT -AGAGACTGCGCGTCCGCTAGACAGTGCGCGAGCTTGATTCCGATGGCGTAGCCTCCTTCC -TGGAGAAGACTGAGTTGAACTTGCATCCCTGGCAGACCGTCATAGTCGCGCAGATTGGCC -AATGCCAGTGGGTCGGATGATATGAAGAGACTTTGTTTGAAGTCATCGCCTGCCCATGCT -CCCGGTTGAGGACCGGTGGTTGATGAAGCGCGTTCCTCTGCTGTGGGGACAATTTCATCT -GCTCGGAGGGGATGCTTAATTACTGTCCATTCGACACCTGGGTCAGTATCAGTCCCGTAT -ACAATCAATGGGCGATTAAATCGCTCCGTGTGGTTTCCTTTGGGGTTGACGGGTGCCCAT -TGGAGTTGGCCTGCCCATTGGGGGAATTTTGATAGAGTTTCAGTGAAGGAAGTTCGTAGA -TTGTCGATCAGGGTAATCTCATCCAAGTCTATAGGTTGATCGAAAATCCAAACTGCTCCT -GTTGGAGAGAATCGCGCAACTGTTGCGTCGAGGATGGATAATGGGGTTGATGTCGGTGTT -TGTTGGTGGGCTGGTCGCAGCCTTGTGGACTCGATGATGGACATGTTGTATATGCATGAA -AAATATGCTCGGTGCCTATTGTACTTAAAACAGTACAGTCTCGTCCATTTTATCTTTTTG -GATTCATGAGTCAACATGCGTCACTGTGTGCCACTATGCCTCAACCCCACGCTTGGCCCC -TAGTTAATTGAACTAGCACAGAAGAGCCCTACTTGCGCCACATCTTTGTCAGGTACAGCA -GTTTGAATATCTAGAGGAACTATACGCAGCTTCAGCTCTACCAGAATTTTATATCCCATA -ATGCGAAGACTTAAAAGCTATCAGGGCCCAAACCCCCTTTTTAGAGGGGAACTTGATGCC -TGTGTGCCTGAATTACGAGGCCCCTTTATCCGATTTTTCTGACCATTGTTACTATATTGA -AATCGGTTATAAAAGTATGAGAATCTGATATCAAGAAACTAGTAAAGTTTCCTCGGCCGG -TTTCATTGTTGAAAGGGATCAGGAATCTGGGGTGGCTTCAGCCAACTATAAAGCCTCGTT -TCAGGTACAGTTATGATCAACAGCTCAAGTCGAAAAGCCAAATATTTCAAAAAGGAAAAG -GGCGACGGTGGTACTAAGCAACTGCGCTTCCTTGAAATACCCTTGTCATGACGCCTTCGG -CCGTTTCAGCGGGTGTCCTCCAGCATTGAACACAAACTCATCCAAATTGATCAAGTCTGG -AGAGCTAAACATGAGGTAGAAATACTTTAGTGTCTGCGACATCCACATGCTGTCCATCGA -GTCTTTAAGGGTCAGTCCACTGCCCTCGTCCGTGACCGCAACGATGGCCGCGTTGCCATT -CTTCGTCTGTGTCGCATTCTGCATTGCCCGGAACATATCCCAAGCTGCACCAAGCAGATC -CTCACGGCCACTGATACGGTAAAGAGTAAACATACTCTCAATTGCTTCCGGGCGCAATTT -ATAAGCGGTATCCGGGATTCCGATAAACCCTTTTGGCAGACGGTGGTCCTCAATGAATGT -ATCGACGTTGAGGTTTGGATTTGGCTCTGGCTCGAGGCTATTCGCCTTCTTCCAGACCTC -TTGCTTCCAGAGCCGCTCGCTCCATGGGCAACTTGTTGGAGACGCACAGGGAACCACATC -AAACGCTTCGGGCATAATCCCCAAAGGTATGACCCGGTGAGCCCATATGCATCCATCGAC -CAACTTGTGGGAAATATCTTGGTGGGTGGGAATATTGAGTAGCGCACCTCCTAGTGCGAA -CACACCTCCTGCAAAACAAGATCGGTAGTGCATTTGCGCCTCTAGGTGTGTTTGCAAGTT -CCCATTTTCTGTCATGGTCACTCGAACAGGTCCAGGAAGGAGCATGTCCTTGCCCTGGGG -GTTCATCGGTCGGAATAAATTGTGGACCACAGCAGTGAATGTGGCATCCTCATACATCTT -GCGATACATGGGTATTTGGCCTCCTAGCAAAGCGTATGCCCGGGGCAGCGACTTGTACAG -AGAGCTTGCTTCGGCACCCAGTGTGAAGACATCGTCGTCGAAAAGCTCTTCTCTGGCATT -CACAGAGACTGGCCAGAGACCTGCGAGCTTGGTGGCGCCCTGCTGTCTGTCAAACAATTT -CATGACTCTATGGGCCGCGTCAAAATATTTTGGGTCACCGGTGATCTGAGACAGCCTTGT -GAATTCCAATGCGAATGAGCCTAGTTCTGCACTGAGAACATTTTCTTCTGCGAGCTGCTC -CTCGTGGCAGGTAGCTTTATGTGGGTTCCAATGTATGATAGGTATTCTGTTCGGGGTGTC -GAATGCAGCGTAAAGCATATCGCCCAGCTCAATGGACTTTTCCAGTAATTTCCAATCACC -GCTAAGATCAAATGCCGCCAGCAGGCCTCCGAGTAAATGGATGTTGACATCATGGGTATT -AAGTTCGGGCGACTGTGTATTTTGAAACTTGATGTGCGTGACGGAAGCCACAGCCTTCTT -GAACTCATTGTGCATTCCCATAATCCACAGCGTGTCGAGGTTGTCAAGTAACGTAGCACC -CCAACCCCCCAATCCATTCTCAGGGCTACCACTGAGAGGCGCAAGCTCATCTGCACCCCA -AGCAAGTGACTGGTATGACTTCCAACATCTTTCAAAAGCTTTCTTGGTGGCCGACTGTCT -GCTAGTTTGGCGATCGAAAATGAGATGGGATACTTTGAAGTCTGTCGATTGGACCTTGGG -TAGTGCCTGGGGCGGTCCAAAGACAGGCAATTGCTGAAATTCAGCAACTGGGTATCGTAC -TGGCAGCTTTCTCCATGGATATTTTGGGTTGATTGAAACGTTCAGGGTGACTTTGGATGC -GTCAAATGGAGAGGTTGAAAAACGCCTGTTGTCGTTTGTGCGATGATAGTGGACCTGGTA -AAGCAGAAGCAATAAGAAAGCTCCCAGGAAAGCGCCTAGGACCCGACGCTGTCCCCGGCT -CATGATGACTTCAGGTTATAGTACCACCGACATATTTGATGCTGTGAAGATAATATGATT -ATTGGTCAACGACAGAGTTACTCGGGGAAGGTGTAGAGGGGAAGACACAGAGTTACACAA -AAAGGTGGTAACTCAGGTCTCGAGGCGTGTTCGTCGAATTTTCCAAGGGTGAAATAGAAG -AATTCAAGTTAGACTGAGTGTTCTATGATGACCTGCTGTCCTAGATCTATTAGCAAGTTG -CAATTTGGTGTACGCTTTGTCCTCCTAGGCTGCATGAACTGCCGAGTATAGAGTTGTCTT -TGTATTCCTTGGACGATAATTATGCTAATACCATACCCCTCTAATCCTGTCATCATTTTG -TTCAAATTGTTGAACATCTCTACTCTGCGGAAAACTATTATTAGATATCAAACTCACGCG -ACAGGATGCTACCGTTTAGCTTAATTGGATCAGAGTGGAATGACCAACTTGTCGGGCTTT -TGAGGCTTCCATGCAGAAAAGTTAAATTTAAAATTACAAAACTGTATACATGCACAATTG -GCTGAAGATATATACTTAAAAAGGaattaatgattaatttaagaagatagatacttaaaa -aatatttaataattaatTAAAAAAAGAAGGTATTGTGCATCATCCGTGAATCGAACACGG -GCCTCGTCGATGGCAACGACGAATTCTACCACTAGACCAATGATGCTAGTTCATGTAAAA -CGTTCCATTATTATGCAATATATTCATAACAACATGTTTCCTAGTCTCCCAACGACCAGC -GAAACTAACAATCCAATCTTCCATACTGGAAGATTTGGAGAACCAACAGGCAGTGGAAAT -TAAAAGCCTCATTGGTTTCTTTGTAGACCAATTGACGAAGATAGAATATTATTGGTTATA -AATGGCATTGCCTGTTGGGCAACTACATACGTAGCTCGGTGTGAAATATATATTTCACAC -CGGTCTGTCACACCACATTATGGTTATACACACGTGACCTTTTTTTTTAATAGAGAAAAG -CTGAGGTCAATCATACACGCTATACCAAGGGCTATGAATCTAAATGTACAGCCAAGTCCA -TTTAAAGGAGACATTGAAGTGAGATGTTCCTGTTAGGGTATGAAGACGGCAATTGACATA -CAACTCGGCCGAGAAGGTCCCCGCGTACCCCGCGTTCTATATGAGAGGTTTAGGTGTGGA -TTCTCAGACGGAGCTACGGAGCAGATGGACGTTGTAATGTGATCTGGCAGTTTCCGTTGT -ATGCAGGATTACGAAATCACAACTGAGGGGTTATTGTATTGCACCTCTGTAATTTGTGTT -CTCAATATTTTCATCTGCTACCCAAAACATCTAAATCAATATGGAAAATTCCAATGATCT -ACCGCCATCGCGAGAGCGAGTTGTGCCAGGTTCCTGTAACATCGTACCTGGTCAATTCCC -CAAGACCATGTCCGAATCCGAGTCGGATCTTCCCTTAGACCCAAACTCTATTGCAGACAA -CATAGTCAACCAGCTAAACTCGGGTCTCTCAGCCAGAGACAAGGCAGCTGTTGCAGCACT -GTTTCTCGATAATTGCTACTGGCGCGACCACCTGTGCTATTCGTGGGATTTCCGCACCCT -CGAGGGGAGCCAATCCATCGCTGCCTTTGTCACTGAGGCACCACCCAGCAAAATCGAAAT -CGACCGATCGTCTGACACCAAATCCCCACACAAGGCACCCATTGATTCCTTTGGTGATGT -ATTCGGAATTGAATTTTCCATTAAGGTCACCACAGAGAATGGACTGGGCAATGGTATCAT -GCGATTGGCGGAACAGGATGGCAAGTGGTACATTTTCACAGTGTTCAGTTCCATGGTTGG -GGTGAAGGGTCATGAAGAGGATACGCAGAGGCCTTTCGGGGTCCGACATGGCGAGCAGAA -AGGTCGAAAGAATTGGAAAGATCGGCGTACCGATGAGATCAATTTCGAGGGAAAAGAACC -GGCGGTATTGATAGTTGGTATGTGCCGATGTAATGTTTCCCTCTAGAGATATAAATCTGA -CCCGAAGAATAAAGGTGCTGGCCAAGGTGGCTTGACTGCCGCTGCGCGCCTCAAGATGCT -CAAAATCGACACCCTTGTCATCGACCGAGAAGAGAGAGTTGGAGACAATTGGCGTCAGCG -ATACCACCAGCTTGTTCTTCACGACCCAGTGTGGTACGATCACATGCCATACCTACCCTT -CCCGTCTGACTGGCCCATCTATACACCAAAAGATAAGCTGGCGGATTTCTTCGAAACCTA -CGTGAAGTTCCGAGAGCTCAATGTTTGGACCCAAACCGAGATCAAATCGTCATCCTGGGA -TGAGAGCAAGAAGCAATGGACAGTTGTTCTCGAGCGCAAGACAGACAAGGGAACTGAGAC -ACGCACCTTGCACCCACGCCACATCATCCAAGCTACTGGCCACTCCGGCAAGAAGAATAT -GCCCGTTTTCAAAGGCATGGAGGACTTCAAGGGAGATCGCATGTGCCACAGCTCGGAGCA -TCCTGGCGCAAATTTCGAGTCAAAAGGGAAAAAAGCAATTGTCGTTGGATCTTGCAACTC -GGCAAACGACATCGCGCAGGACTTTGTCGAAAAAGGCTACGACGTGACCATGGTGCAGCG -AAGCTCCACCTATGTAGTATCGTCCGAGTCTACCTTGAAAATTGGCTTTAAAGGGCTTTA -TGATGGGACCGGTCCGCCAACCGAAGACGCCGATCTTTATCTGTGGAGTATTCCATCACG -TTTATTCAAAGCGCAACAGATAAAATCTACAAAGCTTACAGCTCAAAATGACGCAAAGAC -CATCGAAGGGCTGGAAAAGGCAGGCTTTAAGGTCGACCAGGGGCCCTCGGACAGCGGTCT -CTTGATGAAGTACTTCCAGCGCGGAGGTGGGTACTACATTGACGTCGGTGGAAGTCAGCT -CATCATCGATGGAAAGGTCAAGGTGAAGCATGGCCAGGAAATTAGCCAAGTTCTTCCCCA -TGGCCTCCAATTTGCGGATGGTTCAGAGCTTGAAGCCGATGAGATTGTGTTCGCCACTGG -ATATCAGAACATGAGAACCGAGACTCGTGTGATCTTTGGTGACAAGGTCGCTGACCGCGT -TAACGATATCTGGGGACTCGACAAGGAAGGCGAGATGCGGACTATCTGGCGGCGGAGTGG -TCACCCTGGATTCTGGTTTATGGGCGGTAACCTTGCTCTTAGTAGATACTACTCGCGTCT -GCTGGCATTGCAGATCAAAGCGATTGAAGAGGGGTTGACTACTTGGTAGCTACTTCTAGC -CAGAATTCGAGAAGAACGATCCTTCCACACTATGTTTACCCTATTACCCTATATGTAATT -ATCCATGGATTGGGAAGGCGCGTCTCCACCTACGAGTATAAACCCCCCCTCCATTTTGCG -CTCTTCTCTGTCTCTTCTTTTATCTCTCCTCTGCCTTTCAACCGTAGTAGTCCACAAATA -CCTGCCCGTTCATGGCTGCGATGCGACAGTGAATAAAATATAAATACCCGTAGCATCGGT -GCTGTCACATCCCTAGTAAAGCAACTAGAACTTCGTATCCATCAATTTTTGATCGATAAT -ATAGAGACTGTTCATAGTAGATGAATTTTTGCCTAGGCTATTAAGAACAATCAACCCAGC -CACCCTTCTTGCGAAGCCACCGAGACACCCAACGTCAGTACAAGAGTATACAACCAAGTC -TCCAAATCCGTGGAATCATTAGAAATAGGCTCCGTGAACGCCCAACAAGTCCGAAGATGA -TCCAAGATCGAACTCTTCCTCACGGTAATCTCGAGCCCACCTGGAGTCATTGTAGCAATA -CGACTCTTCCGGCGTGCCTTACGGTCAATTACCACAAGAACAAAGTGTTCGCTATCTGTA -ATCCCAGATGTCCCATTTCCATCTCCAGAGACTGCCCGTTTCTCCCACTGCATAATCATC -CGGCCCTGCTCGGGGGTGGCTGTCGTCTCTTCCTGAATAATGACAAATTTGTAACAAATG -CTTTTCTCAGGTCCTGCTGAGGTGGTACTGGCTTGCCAGATGCATTTTGAGTCGCGGAAG -TGGATTGATGGCATGGAATTGCCGATGTTGTTGGTTTGCAGTTGGCACATTGCAGCCACA -ATATCTCTGTCTTGGATGTCTTGGATACTGTTTAATTTTTTATCTTTCTCTTGGCGCTGC -CCTTGGCGCTGCTGCTGTTGATTGGTGATATATGGCTCATCAAGTGTTGCGTAGATATCC -TTTGTGCCGAGCTTTGGGCGTTGAGGGAAACATTTTGTCAACTTTGATTTGCGGAGTGAT -GGCTGCCATATCTCCAGTACAGGTACTTTTCGATGGTTTGGAGATAGTTGTTGGATTTGA -AGCAGTAGTTTGGGGCTGAGTCGAAATAGTTGACTTGATTTTGGGGGCGGCCGAGCCATC -TGGAACTTGGCCTGGATTTTGGTATTGACCCTTTGAGACGATGGTCTAGAGACAAATTAG -ACATGATTCTCGGTAGAATGGCAGACTTGATATGCCCTATTTTTTTGTGTCGGGAGTATC -TACATACCGTTCTTCGCTTGCTGTGTCGCTGTCATAGTTTGGCATACTGTCTGGGGATTC -CATGGCGATGTCATACGTCTTGTCGCGTCTCCAGATTTGAAGGGTGAAGAAGGACACAAA -AACAAAAAGATGAAAGGCTGAGGGCTAAGAGCTAAGAGCTAAAAAGTCTCAAAGGTTTTA -TAATAAATAGCTAGTCAGCCTTGGCCCTACGGCCGTACTCGTATAAACGTGCACGAACCA -GGGCAACTGGGTCCAGCAAATCTGCAAACTAAAACTAGTACGAATACGGGCAATTTTGGT -GTACAACCTACCTATAGAGACAAATATCTCGGCTGCCTGGGACATTTGACACCTATTATA -AAGTTCTTACATGCCTTCTTTGATAATTTTCTAACCTTAAAAATCTTAAGCTTAGGCATC -ATCAATTCCTTTTTCCAAAGCCATGAAAGTGATATTCAAGGCACAACCGATCTTTATCCA -CTATGTACTCTATACTCCATAAGTACATTAAACTATGTATAGGATTTGTGGATCGCCGTT -TTTATGTTCGGTGTCTGGGTTTTCCGAGCCGTGCGCCCGATTGGGGTCGGTCAATTCACA -GCCTCAACTCCTTAGAAGCAACCTTGGAAGTACTCATAGCCACCCCATATGATGAAACCC -CGCCCGGGTGTAAATTACCTCACAAAAAAGGACCAAGTTTCGGTCAAAGTGCCGAGACGT -GGTTATCACAAGGTAAGTATAAAAAGACACGCCCTGAAAGTCGGGCTGGCGGTCCAAAGT -TCCTCCGAATGCCTGGGCCCGGGCCAAGCTGCCGATGATGTCAGCGGAGGCTCGGACCTC -GGGAAGGCCGATTCCGGGAGTTATGAATAGCGGCATATATCATGATGCACACCCGTGCCT -TGTGATCGAAGAAGGTGCATCCTGCTGATATTGCTTGATGATATTGCGAGGTTCGTGAAG -CCAATCGGATTCATTGGGAACATTTAAGGGTTACGCTTCTTTGATGACCGCAAGCTGTCG -GTTGTCATATCGTTTTCTCCATCCGTGATCTAGGCCGAATACTCGCCCTGTCACTATCCC -CTTCAATGTCAAGATCTGTATGACTATCCACATGATTACAAGCAGCAGCAAAAGGGCAGT -GGAAAACACATCAAATGCGGGAGAATTCATAAGCTTCCCAAATTCAACGGCAGCATTGGT -GAAAGTACCCTGGAGAAATCAAAGCCTTGTTAGCAGAACCCTTCAGAGAACCAAAGAAAC -TACGATACGCACCCACGGAAAAACCATAGACCAGACAGACATGGTGAATCGTGACTTCTT -CCAGCCCCCAGGTTGTGTGTGCAGGGTATAACAAATACTCAGGATAGCAAAGAACCACCA -AAAGATACCATACCCCCAGATGAGGAGACCCATAAAATGGCTGATGAAAGCAAAGGGATC -GGCCGCCTTCGCAGTCAGCAGCTGACCGCGATCATACGCCGCAAAGCTACCGGCCTGAAC -AGCCTCGCCCAAAGCCTGGAACGCAACCGAGGCCTGCCCGCAAGGACCGCACATGATCAT -ATCCTGATACACCAAGTCCGTTGTCTGGAATTTGCGACCGAAGTACTGAGCGAGTATAAT -GGTATCGAACGCAGCAACAAGAGCAAGGCCGACCCCAAGTTCAAGGTAGGAGACGATGAT -AGCAGGGACCTGGAGACGGGGGCTGATACGCATGACGCGACATATAACCCCGCCACCTGC -GGCAGATGTTAATGCCGCAAGAAATGGCATCAATGTGTCGGCTTGCATGTGTCGCTTGCC -CGACTGCTGCATCTTGAGATGGACGAATGGGATTACCATACAGGCGAGTACGGCCATGCC -GGTATTTACCCACCAGAGAATGTAAATGGCCAGCCCAGCTGTGTCTCCGTATTTGAAAGC -GGCCATTTGGAGGATGGAGGTGAAGGCGATGGAGATGCAGCAGAGGCAGGATGTTTCGAT -GGTTTTGTTCCGCAGCTGTTGTAGGACATGTTTTGGGTGTAGAAATATGCGTAAGAGGTA -AAGGATGAGGAATAGACCGAGCTGCACGATAGTGTAGATCCATACGATCGTGGCGAGGGT -TTTGAGCCCGTTGAATTGGTATCTCAGTCGATAAAGGATCACGGCTATTATACCCGTTCC -TTGAGGAACGATGAACCACAGCGATGAGAAATTCCACAGGGCGCGAGACAGCGGACCCAC -CGGTGGAGGGAGGTCGCCCTGATCTGACATCTGTCGCTTTGTGTAATTTACCCCGAATTG -GAGCGAATGACTGATCTGCAGCTGCTTTCGATGAATTATGAGGGTATATAGGGTCGGGGG -AGTCTTGACGTCATTAGCTGAAGCTCGGAGCTATCTCCAGATCAAATATGTGGACAATTT -CTTCATTCTCATTCAATAGATTCGATAAAGAGCAACACTAATGTGAATTTTCGATACAAA -TGCAAGCAAATCCACCGTTGAGCCATAATTTCGAATGAGTTGCGCGCAGACACACAGGGC -CACCCCAATACCATAAAAAGTCTTAATTTCAAGACAGCAAATTTTCAAAACCACCATGGC -CGACCGGATGAGAACATTTCAGAACATATCCTTGGTATTTTGAAACCAGGTGCAGCCATT -CACTTCTCAGTGCTTTTTTTTTCCCTGAGAAGAACCCATGCTATAACTCGCTGTAATCCG -TGTAACGCCAGTTTCCCCCATGTGTAACACCCGTGTTCACTCCGGCAAATATCCATGTAA -TTTGTGAATGTCAAAAAGAGAAGAGAAGTTACGAGCTCCCTGTGTCTCTCGTGGTGTCTT -CGGTGTAAGTGTGGGGGCGATAGTAGCTCGGCCCCGCTGGAGGATCTAATCAACAATAGA -GGTCAGCGAGATTATCAGATCAAGAAATCAACAGGCACATACAACGTGACTCCAGATCTC -TTCCCCAGGATATCCGCCTCAGACTGTCAAACCCTCGAGCTCGCACTTCGGCCATATAGA -GCTTCTGCTCGTAGCCCGGACTGAACGGGGTCTCTCCATCGGCAGGTTTGTAGTGTTTGA -AGAGCGATGCTCCCAATGCAGTTATTGCGAGACAGGCGACGAGAACGATGACAATGATGG -CTGCGTTGGAGAGCCCCATTGGGAGTAGAGGGAAAAAAAAGCTTAATCTTGAGGAATTGC -AGTTGCGAAGACAGTTTGGATTGTAAGAGGTGGGTCAGCGCATGGTGGAGGAGAGAGCCT -CCGCAATCTGGGAACCGTGCCTCGCGATGCTAGATGGGCTGACACTGAGATGAAGTGAGA -TAGACGAAGAGAGAAGGGGGGATATGAAAGAAAAGACCAAAAAACCTTAACAGATGTCGA -TTGCTCGGGATGTGCATCTGATGTGTTGTCTCTATACGTGGAACGGCCGCGACCGCCCCA -TTGGCCATGTTCTTAATACATTCAATGTCTAAACACTCTTGGCCAATCATTACAATCATT -ACAACGTTGTGTCACATATGATGAAGAAACTCCCAAAATAGCTGACGCGTCTTTGCTATC -AGCTCTATGGGTTTAGTTCAAAAGCTTGTCATCCAGACTGAATATGCACCTTATTGGCAC -ATTCAACCATAGGTGCCCAGATGTCCTGAAGAAACAAGTCCAGCATGAATCAATTACAAG -ACCGAAAAAAAAAGAGAAAAGAGAAAAGGGCCATCAAAGAGAACGGAACAAAGACCAGTA -GGCAGCAGCCAAGGGTAGGGAGCCAAGGGTAGAGAGGTACCGAGGATCACAGAGAAAAAG -GGCTATGGGCACAATAATCACCAGTTCGCAAGTCCGGTTATCCCGAGGTATTATCTGGCT -AGCCATGTCTATACTTGAATAGGTACGCTGATCAGTTTTATTATCTTGATCCTACGCGAT -GTCTACCTCGGTAAATCATAGAATAAAAACTACGTTGGATTACAAAGGGTCAGTTTGACC -TGGGATCCTCAAGATTTGTTCATGTTATAACTACTTCATGTAGAACACTGGGTGAAAATA -CACCAATAAAAATGTTAGTAATACCTAATGAGCGATACTGACATCTTCAATATCAAATCG -AAGGTTATATTCCCACCGGTCTTCGCCGGTATGAAATGTCACAATGCGTTGAATCCGCTT -GACACCCTGACCATATGCACACAAGACAAGGGCCGCGATGACTTTACCAATGGGAGTACC -CAGAATACCGCAAAACTCCGGCGAAGGGGACTCCCAAGTTTGCGGCTCCTTGGGCGGAAA -CCCAAGCCCCTCCCGGGATAGATAAATATGGTCCCGAAGAAAGGGCACGGTTTCCTCTTG -AATAACAGTGGTAACAAAAACATACTTCAGGCTTTCCAAGGGGAAATGCGACTCATAGAA -AGCCTTAGCAAATTCGGACATGTAGAAATTCGCGGGGTCCTGGGCTCTCCTGATATTGTC -TATGAATATAACACCTGGTCCAGTTCGCCCAATCCAAAATGTAGGCTCGCCCTTGGTTTT -AAGAACCGATTGGAATCGAAAAACAGGTTCCTTCATGATTTCTGGGTGACCAACTGTGAG -GCCAAAAGTGTCAGCCCAGGGCTTCCAATTGTCTGTCAGGAACTCTGGGTTGGAATGGAC -ATACCAGAGGCCATGATCCATGTTTGAGATTGTAAGCGTAGTCTTCGAAATGGGGCACCC -GGGCTCATCGGCACGCTCCATCCATCGCCGTAGACTATCTCCCTGTGGCTCAAACTTGGG -GTACTTGGGAGCCATTGCGATGAAGTTGGTTCTAAGCTCACCAAACCAACTGCGGCGGGG -AGAATATGTAAATTACTTACCTGGCAGGTCAATTGCTATTGAGAGCTTGTTGGTGCTTTT -TACTGCGCAATGTTGTGTTGCCTTTGGCTTGAAGAAAGTAGTGCACTAAGCATGTTTAAA -TACTAGTATCAGCCAGCTAAAAATAGGAATAAAGAAAAACAAAGAGCACAATCAAGGCCA -TTGTGTACAGATGCATGACAGAGATCAATAGGCTTGGAAATTAGTCAAAATACAAGACCG -AAAGAGTAAAAGAATACTTTGACGTATGCAAAAAGGATATCAGGAGGTATGGGAGCAGTC -TGAAATAGCTCTCTTCATCAAGTCATCATAGTCTAGGGCATTTATTACATGCATTTATCG -ACGATGATACTGTCGAGGCATTCAGAATTCTCCAGAACATTGTACCTTCGTCCGCTATCG -TCATATATAGAGATAATGACTAGCTCATTGTTGAACTAATCGAGTCTCATGTTAGCTTAC -TGGAGAGAATCGACACTGTTGAGCTTCCCATCGCCACCCTCAGATACAGAACAGGAAAGA -CCAACTGTGACACTAATGTATTGTGTTCCTTCGGAGCCTGTTAGTGCAATTAGAAGGCAG -TAACTAGTACGCAGAAAGGACAGGTTTCATTGTGCGGCTGGCTTTGGGGACGGAGATGGC -CAACACTGCCGCTGGCAACCCTTGCATTTTGACACTCCGGATGCTTCTCAATATACGTAC -CGTTATGAGGGAACCAGGTTAGCCTGAGAGCTGGATGCCACCAATGCACTGTGGGTCTCT -TTATGGGCCAATAAAGGGAATACTAGGGTCGTCCAAGAGATGCTGACCCGTATGAGAGGA -GTGGATAATTGCTTTAATCCGACGATGGTTTGACTGAGGGTCAAGTTCACGATATCAATA -ACTTGGGGATAAAAAAGAGACCCCAGAGGCTGGCCAGTCGTCATATAGGGTAATTGCTGC -AGCTGCGAAGTTCTGACAAACTTATTTGGGACGTATTATTCTAGGTTTTGCCTTATCCAG -CTTGAAACAGTACTTGACCGGGAGAATTTGACTGTATGTTGTAGCTTTTTATTTGAGATG -GCCGGCTCCTTTCCCCCTTTAGCCCAGGGTAGCTTGGGGGGCTTCAATATTGCCTAGTTT -CTCTAGCATGTGTTCCAAATGAAGAAGGCATCATCTCGGACGAGGTTGTGGATGTCGGTT -TCGCATGCTGTTCAATCAGGATGTGCTGTTTGCAGGTGTCGTCATTGATATGACAAGCTA -CAAAATTCATAGTAGACTCCTCCATGAGCCCAGGAATTTGGACCTCCGTGTCATGTCGGA -GGAAGAGCTCGCCGGGTCCCAAGCCTGAAACCCCATCCATCTTACAAAGTGCTCGCCTGA -CGTTGATGGGCCCGGAGACCCCCAGGAGGATTGTTCCAGCCATCAAATAGACGTGTGGGC -TTTCCGCCTCGGTTATTGTGATTGAATCTTTGAAGGAGGAGCAGGGTTTTTCGGATTGGT -AATCGATTGGAAACAAAATGGAAACAGACAGCTGGGGTTGTCGTCACAAGGCATCGGGCT -GTGAGCGTCCCTAACATGCGTCGGCCTGGAGATGGCTATAGACAACCCTGATAATTGGAC -CATTTATGTACTGAAGACTGACAAGCGCTGCATGGTTGCCCCGATGAATAACCGTGTATG -TTCCCAATGAGCCCGGTATTCTCATCCGGGGTGGCTGAGTCGATTCCCACGGAAACACAT -CAATGCAATGACTATTTCGACGACAAAGAATTTGTCATTCCAGTATATCATCCAGACAGG -GTGGAAGGTAGCTGGTGTCAAATCTGGCTGGAGAACGGAATGCCATATAAGCCGAGAGAA -AAATTAGACAGCCAAGAACCCAAGTGTATTGGGTTGCCTATGGGGAAGAACAGAGAATAT -GAGAAAGCTTCCCATGTCTAGCAATGGATCAAATACAGAATCTCGGAATCATGCGACATC -TGCATCTAAAGGTCTGAAATTTAACTAGAAAGACTACAGCTGCGTACAATGTATGTGGTC -AGACTTATGTCTTATATCCCATGTATCTCTTAGATGCTCTCTAGTGCGAACGTCAGGGGT -ACAGAATTGAAGTCACTCATTCAATAGTAAGTCCCGAAACACAGAAATCTCAGTTGTAAA -ATGCAAAGCCAAGTACTAAAACTGTGTAGTGGGTTCTTGGTCTGTTTACCAAGGTTAAGG -CTGAATAGTCGTTTGGTCCCTTTTCAGAAGGCTCTGCCATTGTAGATTGAATGCTCAATG -GGCTCAAAAGTAAAGAACAATAGCTGTTTGACAGTCGCGCCTGCCTAAAGTTCAATGGTT -GAGACACAATGTGCCCATCGTTCCTTCCATTTGTATACCTCCTGTTTTGGGACACAGGGT -CTCCCATAAATtctctcttgctcacttttctctttctttcttctctctctcttctctctc -AAGTTAACAACATAGTTTTCAACAGCGCCTCCTGCGCTGTGACCCTTTCTGCTCCCCCCA -CGTCCCCGGCCTCGCCGCCGGTTGTGCTTTTCATTTTCATTCAATAAAAAAAAAGTAACA -GTCCTAAAACAAGCTCCCCTTTCCTGACAGACTTGGCTCGTATTGACACAAATGCTAATT -AATGCCTTTGCCTCCTTCCTGTACAGaaaaaaaaacaaaaaaaaaGCATGTTGATCCCCT -TCTTGGGAATCTTCGTGGGCATGACTCTCTGGAGTCAGTGGTTTCCCCTCCCAGGGTTCA -CTGCTCCAGAGCAGTCGTTCATGCGGCGTCTGTCTGGTACTTTCCAGCTGACGATGGAAT -CTTGGGCCTTCTGTGACCTGGTGAACGCTGCCGTCTCCGGGCGCGGGCACGCTTCCCAGC -GCGAGCATCTCGTATGGGAGTCGCGTTCTTACGTTGGCCATGGGTACGCAAACTCCACAG -ACTTTCTGGTTCTAAACCTTGGTATGGATGTTATCGTCCAGCCTGGACTGGTCGATGATA -TGCCGCCCTGGTTTCCAACCCCGTTCCTGGTACCCATCCCAGTCTCGCTTTGGACAGGTG -ACGGAGCGACATCTTCTGATCCCGAGGAACTTGTCGACTTCCACTCGCCTAGACCAAAGC -CTGGGCAAGGTTTCATTGCTACGTTGTTGTTCGTCGGCATTTGGTGTTTTGTCCAAGTCC -CCTGGGCCTTGATTGCTTACTCACGGCAATTGTTCTCCGGACTTGATACCTTGATTGAGT -GGATTCTGAAAGTTCTCGAGGGCCAAACACAAATTGCACGCGTGCAATTTATGGGTGAGG -GGGATTCATCTTCTTTTGACAAGAAGATGGATCTTCTAATCACTGGCCCAACGGAAACGG -ACGAAGAAATTCTTTCTTGGGTCATGGCTGCAGACAATCGTCCCGAGTCTGTGTCTCATG -TGTCGGTTCTCTCTCAGTCTGGAGGTATCTGGGTTCACCGGATCACCAAAAGCCTGGGAG -ATGCGCAGTCACTGGACCCAGAATCGGCGTGCAATACCACCGAGCCGGCTAGCACTACGG -ATGGCGCTCGCGCGAACTTGGTCTCTCGGCACCCTTGGAACTTGACTGATCTCTACGTTC -CCGACCATGAAGATTCTCTGGCCTCTGCCGATGGTCCGACTGGAAACGGTGCGGAAGATG -AGCGTCCTGATTCCACCCCGGCGTACTTCAAAAGGAAAAACCGTCCGTCGCAAGCAAAGA -GACGTCGCAATGCAAAGAGGGCGCAACAGGAAAAGAAGGAACTTCAACATTCTAATGTTT -TAGCCTCCTACGCTTCCAGTGCTTCTCTTGCGATGCTTCCGAGCTCGACGCCTTTGTCGG -CTGTGGCTCCGGTTTTTGTTCCTGGTCATCTGGCGGAGCAGGGTGACTCGCAGAGTGGTC -CTCTTTCTCCAGCTCCCTTGTTCGCAGATGTGGGCATTGATCCTTCAACGGACCGTGGTA -CGACTTCGCCCGATGACAACGCTCTTCCCCAGCGGTCCCGCCACCGTCGTCGTCATCGAC -AGCATCGTGACTAGATGCTTCGATAGAGGTAGGGCATGTATCAGCAATGAAAGTGTCTGA -GTGCGACACCTTGGAGGAACTCCTCTCTTCGGAGGGTTGTCTAGGTCCTTCTCGGCTGTC -CCGCAGCACTTTTGTTTGCCGACACAACGCTGTGTTGGGGCCGCGTCAAGCGTCTTCGGA -AGCTTTTCGCTGGGGGTTGGGCTTGGAGATTGAGTTGACTGACCAATATGTAAATAAACC -CCGAGCCCCTTTTGAGGCACCAGCACTGACAATCTTACTCTAGGCACaaaaaaaaaaaaa -aaaaatacaacagaatacaacaatacataaaGCAGAGAATTCAACAAAAATATGAGAGAG -AGAAGGGTGACGGCAGACAATCATGATAGCAGAAAATCGCCAAAAAAAAGCTCTGCTCTC -ACTTTGCTCTCACTTTACTCTGACTTCTCACTCTCGACAGAAGTGAAAAATCTCTCTTGC -GACGTTCTAGTCTGATGCCGCGTTTGCCTCGTGTTTTGTTGTCTGGGCTCCGGAATTTGC -CCATTTGATCTCTGTTTTTTCGGCATCGACCTCATCAAACATTCAACTGACATCAAAAAG -CGTCATTTATTCCAAAAGGCCTCTGGGGGGAAGGCCTGACTGGACAAACTCTCCCATTCT -TTGCATCGGAGCTTCTCTATCTTTTACGTCAACAAACCCAGTACAAAATTGGCAAACTCG -ACAGGCCAATTCTACATGTCTCTCATGAGACCCGGTGGAGATCTCAGCTGGCTCATCATC -TTGATGGTGGGTTCAGACCACAATGAGCCCTGCTTGCGCAGTTGACTTGGGTACTATGAA -CACCATTTGCCATGTTTCGCAGCGGTCATCTCACCATGCCTTGGTTGTAAGTAGAAAAAA -AAGAAAAGACTTCCACTGATTTTCCGGGATGGTTTCAGTTGGAGGAAAATTGGATTGCCA -CACATCTACCAAATACGTCGGATTCAGCTGTTCTTCTCTGGAATATGAGACAAGCCTTCG -CTGGATAAAAACGCCATGACCACAGATCAGAACTTCGACTTCAATCATGCTTCCACTGCA -TGTCAACAATGATCATGAGTCGATCACTACTCTAACTCTGATTCAATCAGCTTGATTTCC -TTCCAAGGCTTGCCGGGCACAACAATAATGGGCGAGGAGCCGTCATCAATCCTTTTTTAC -TCTGTGCGGCTTCAGGGAAACTTTCAAATCACAAAGCTGCTCAGTAACTCGCTTATCAGC -ATCCTCGAGACTCTTCCGGCCTCTTTGTTTTTCTTTTCATGAGCCTCCAAACAAAGATCT -GTTGTAAGAGCCTCTGGAAGACCAAGGACATGTTTTTACCTTCACTGAAAACCCACTTTC -TGTGTTATCCAATTTTTTCTCGGCATCTCTCCACCTTTCGGACCCTGGAACAGGTCTCTT -TCAGACAGACATGTCGACAGGAATGACTTTCTACCTTCATCTCCCTCGGGCAGCAAAAGA -AAAGCTGTAACGAGGAACAAAAAGAAAAAAAAAACAATATTCTTCCCGAGAAAGCTAAAA -TTATGGTATGTTGTGTTATCATTACTTCGGATTTCCATTTGCTGACTATTAGGGCTTTTT -TAGGACCCGTGACCTTCCATCAATTCATCTGTCCTCCCATCGTTTCCTTTCTTCAGCCGG -TTAAACCCCGATTTTGGTGCATGGTCTACTGGTAACTATGGGATATTGAAGCTCGGGAGA -TGGGAAAAGGGGATATCAAAACAGGGATGAATATGCATGGCATGGTTCGGGCGTTGTGTG -GGTTCCTTGATTTTGTTTGTACAGATCTAGCGGCCATAAAAATTGGGCAGATCCTTTGTT -GTTTCAGTGATATCCCACAAATCGTCGCCTTTTTTGTTGACTTTTGACTATCTTGTCACC -ATGATCAATTAAATACTATTGTTCCACTGCCATTGATGGTCTTGTAAAAGATTCGTCCAA -GAGCAGCCGTGCTCCAGCCATAGTAGACTTAGAATAGAAAGCGAACATGCAATTGAGAGC -TAGCAACTGGTGAACTCAGAATGGAGTTTTTGAACACTCTTGGGTGTATATAGGAGTGAC -ACTGCGTCATATAGAAAACGACGATATACGGTCTTATTTCCGTTAAATTCGTGGCGACAT -GGGCCTAAAACAACTTCCATGCAGATGTTATCAAGAGGTAGTGAGTACGAAGGAAGTATG -GGCTTCAAGAATTTGAGATGATGGTAACAATTCAATACTTTCGCCGGCTCAAGAGAATAG -CGCATATCCAATGAGATATCACTCGGTATATTCGAACTGCTTCCCGTCATCGAAACAAAC -GCCAGGCAATATAGATGGTATTTAGGCAGTACTACACAGCGCCACTTGATGGCATCCCAT -ATGGTACTTTACTTAACACAAGCAAACAGCATATGAACATTTTGCTGAAAACAAAGATTT -ATACTAATAGTTTGACAGTTATCCTAAATATAGTGAATTGGACATATGGTAGAGAAAACT -ACATAATATGGCACTTTCCCCTTCGCTTACGACCTCGCTCCTTAATTGTGTTATCGTTCC -AATCCTGAATGCGCCGGCGCCACTCCTCGTTTGTTGGTGGCGGTAAGCTGGAAGGTTCGG -TTGAAACTCGCCTGTGGAAAGACCGTTGCCTCTGAGGTATCTCATGCTCATCATCTGCTG -CAGTTCCATGCCCATCTTCATATGGTTGCGGGATATGGAGAAGGTTTGGCGATGACACAG -AGTATGGGAAGTAGGCATCTCCAAGTGCATCGCCCAGCGCTGAGGCCCCCAAGTATGGAA -GAGGCATGGTTTCTTGAGGAGGCTGTTCAGCTGGATATACAGCAAGTGTAGGCATGGGAG -TTTCGATTGCTGGGTGATGATGTGCTAGAGATTCTGCTGTAGCTGGATGTGAAGCAGATC -CATGGTGTTCTGGTGAGAGGTTGTTTGCGAATGAATCTTGGGAAACCGGTTGTTCTGGGT -TGAATTCTCTGCCGATGGAATTACCTCTGCGCCTAGCATCAGTCATAGGGGCTAGTCTCT -GTAGGTTGACAGATTGTGGGCGTGGGCGGATTTGTTGCTGCGTGGCACGAGCTGATTCTT -CCCATTCAGGCCGAAGGGCAGAATTGGCGACATTGGCTGGGGAGAGAAGTGTTCTCTGCA -AAGCATCTGGCAAGGGGTCTACTGTCTCACGTTTATAAGGAGGTGGTGGTGGGATCCAAT -TATCGGCGTCACTTTCATGAGGAATGTGTCGACGGTTGGGAGAATTGCGATACCGAGGGT -CGTATCGCCCACGGGTTGATGCAGCCGCAGTGGCAGCTCGATGGAGGACTTCACTGGAAC -GAGGCTGCGTGTTGTCATATGGTAGGTCAAGTACATCGCCTTGAGAATTCTCGCTTGGCG -AGGTAGGAACTGCAAGACTATTGCGGCGGAAGCGAAGCGGTAAAGCTCGAGATTCTCGAG -TAATCGCTGAATGACCAAGGCCTGCAAATGTTGTAGCAGTGTCACCTCGATGTAGTCTTG -TTCTTCTTCTTTCGAGCTCAACTTCTGTGGCCAAAGAGGAGTCGGGTTCCATAAGAATCT -TGGGTGCCATTTCTCCGACTTCAATATCAAGCTCAAGCATCCCACCGTTCATAGTAGAGC -GCTCAAAATCAAGTAAAAGGATTCGACCACCGTCAACTGGGATATCTTCTTCACCAAAAG -CCTGTGGATGCACACCACCGGGTGCTACCGCGACGAGTCTTCGACAGCGCCTTGTATCTG -AGCGTACCCAATGAGCGGAAGTAATACCGTGCATGTCACTCACCCGCCGCCCCCGAATCA -ATATCTTGGGCTGTTGAACGATCTTCTGAAGCTGTGCCAGATTGTCGCGAGGCGCACAAC -CTCCATAATGATGACCACCACCAATCTGGGTACCAGTGATTTCACTAGTACCCAATCCAG -GCATCCCGTATATCCATAGTCCTGAATCTCGAAACCCCATAGCAGAAAGTTCCCCCTCAT -CATCCGTGGCGGGTCTCATGCTTGGCCGCATTTCCCGCGACTCATCTATGAAAAATGGAC -TGGCAAAATAAACGATTCCCTGATGATTGTTCAATCGAAGCGTCCTGAACGCTCCCAATT -GTTTGTCATATCCCATAACCCAATGATCATCACCTTCCTCATGACCAGTAATAAGGGAGG -CGTGAGTTATACAGCAACGGATTGTTTCAGGAAAGAGGATTTGTGTTGTCCACATCCGCA -TCTCGAGTTCTTCGGGCGAAGCGTCGGTTCCAGTCTCAGTATAGAAAGGAGGAGTAATTG -TGACGATATCCTCAGCCGTAAATCCAAGAAGGATTGCTCCATCGGGAGAAAATGAAAGTG -CATCGACACGGAGACAACGCACCGCCCGACGGTCGGTTGGCAAAGCCTCTTCGCCCACAG -CGTATACCTCGATGCTATCCTCAAAAGCTAAGGCCAGCACACTACCTGCCGGAGAGAATT -TGATGTCCCTTGGAGCATCATGCAGTGTGATGACTTGGATATGCTTCGCTTCGTCGTCAG -AGAGCCGGTAGATGTGAACTCGATGTGTGGAAGACAACACAGCAATCAAAGAGCCATCAT -TGCGAATTGTGGCTCCTAGTGGCCGCCTTCGCGTCTTAAGCTCATGCTTGACAACGACTG -GGTCGGTGGCCACATCTAGTATGGCGATGCGAGAAGAACTTATGCAAAGAACCGTTTGAC -CATTTGCTGAAAATGAGAATCGAAATACTTCACCCTGTGGGAATGCCGTAGAAGAGCTCA -TAGAGCTTGAAACTAGTCGCACTAGAGTTTTTCGAGGCCGTAAAAATGCATCAAAGGCGT -TCTGCCTGACCTCCTTGACAAATTGGCGCCTAAGTTTAGTAAGGCTTTCCCCCTCAGAAA -TAACCCCGCGAGCCCAGCAGAACGATGGACATTTTGACAAATGATAGGCATACAATGCCG -CGGAATCGGACGCATGTTTCCATCTTTGGTTCAACAGAGATAATGATGCGAATGTATTGG -CATCGGAAGCATACAGTATGCTATTAACACAGTAAGAATTACAGTTAATGGCTGACCGTG -ACGCAAAATGTCGGACAAACACAAGGTCCCGACCTCAGATCAGATAGAGACTTACTGCTC -GATCACATTAAGTGGTAACCGCTCAAAGGATGGCTTTAGACTGGAAAATGCAGAAGAGCC -CCGTTGATGGACGCAAGAGTCCTCGCAGAGGGAGCTATCCTCCATGATTACCGCTTTTTA -CCTACATATAACGGATTAAGCACACCGCGGGTATTGTTGAGAAGATAGATGACCTTGTGG -CTGTGCAGTTTCTCACCCCATGAGGCAATGCGGGACACAAGCGTTAATGAACTTGTGCCG -ATCGCCTGCATTGCCCACGCCAAGTCCAAGGATATAAAAATAGAAAGAACCTGAGGCCCC -GAGGCCGAGATACACTTATGTCAGCAATATTTCACGTGAAGCCTTGGAATGAGTCATCAT -TCCCCACATGCCTGATGGATCCATGGATGCCCGCAGTCGCAATTTTTGATAAGAAAATGA -GATCGTCGGATTTTGATATCTGGGGAGAGTTCGTACGCAATGATGTGCGTATAGATTTGA -ATACAGCCCACTAGTCTCTAGAGCTCTCAATTTGATTCTCGCCCTCTTCTCCCTATCTTG -CAGGCCTGGTGCTCATACCCCTCACTCTGTCCAACCGATAATCGGTCGACTCCGAAAACA -CCCCGACCAAAATCACCAAATCACCATCCTTTGCTTTCCCTGTATTAGCAATCAGCAACC -TCCCACACTTTTGGGCGCGCGCATCATGTCACCTGCACCAGCCCAAGTCAATTCCTCCAA -GCTTCCATCTCCCGGATTCAATGCCGGCGCCCGACCCTATCGTTCCCACAAGGTCCGCGC -CTGTGACCTGTGCCGCAAACGCAAGTCCCGATGTACTGTTGACATCCCCGGCCAATCCTG -TCTTCTGTGTCGCGTACAAGGTGCCGACTGCCACTACCAAGAGGAGGCCAACGCGGATGC -TTCGATCTTGAGTGCCCCAGACTCAAAATCTTGGCCGGGGCCTCCAGTGATGGACGCCAT -GCCTGGACAGAAGCGTAAGCGCAGTTCCGATGAGCTAGCTCACTTGAGTCAGAATATCCC -GTCCCGACAGCCGATAAATGGACCCAGCCCGCATGGAACGACTTCGTCGGGTGGACGTCG -AGGAAGTGATCCGCGCCGGAAAGATATGGACGATCATCAGAATGAGTCGGTTCTCATTGT -TGGCCCTATGGTTGCTGAAGATGCCCAAGTCATTGAAAAGCACATGCCTCCGGAGCGGAC -GAGTCAATCGGAAGAGCCAAAGAGCCACCCGTACAATATTTACTCTAGCGATCCGCGGAA -ACCAGTGCTCTACACTACAATCTCACGGCGAAGGAAAGGTATGCGGAAAGGAACGCCTCC -TGGTGAGAACCAGAAAGAGGTCCTCGAACAAATACTCGGGCCATTTAAACACGACCTAGT -GAGACTGTATGTTGGGCGTTGACTCCCCGCCCTCATTTTCTACCCTACTGATCCTTTTCT -CTCAAGCTTCATTGACCGTTTTAACGTTTCTTTCCCGATCTTCGACGGAGAATCTTTCTG -GGAATCATACATCGCCGAAGACCTACACGACCCACCAGCTGCACTAGTGTGTCAAGTTTA -TTCCATGTCCCTCGTATATTGGAAACACACCCAAAAACTTGCTTGTCATCCCAAGCCCGA -CGTTCGATATGCGATCAACATGACCGTCGCTGCGTTGCATGAGGAATTCTCAGCCCCCGG -GTTGGACACAATCAGCGCCGCGCTTATTGACCTGACTGGACGACCCATTTTCTCTATGAC -TGGCAACGCCGTCAGCTGTGGCCGGATGTTGTCTTTGGCGAATTGCCTCGGCTTGAACCG -TGATCCTAGCAATTGGAAGCTCTCACAAGCAGAAAAAAACCAGCGGATTCGTCTGTGGTG -GGGAGTTGTGATTCACGATCGTTGGTAATATAGTTCCGTCTTCCTTCTGGCTTGCAACAC -ACCTAACAATGAATAGGGGGAGCTTCGGTCATGGAGTCCCACCTCAGATTTCCAAAAATC -AATACGACGTACCTCTGCCCACTGTGGACGTATTGGTCCCATCAGACCTCCGCACCACAG -AGCGTGTGAGAGCAGCACACTGTCACATCTACCTATGTCGATTAACCGAGACTTTGGGCG -AGCTGTTACCACTGGTTTATGGGCTGCAACACAAGCCTGCACGCGAGACATCCAAGAAGC -TCAGACAAATACGAACAGATCTAGATATTTGGGAAGACTCCCTACCGGACTGGTTGAGAG -GCCCCACAATCCACACCGGAGAACGAATTTATGGAGCAAGCAGCTTGCAACTGGCTTTCT -TGGCTGTCAAAATGCTTGTCAGTCGGGTCGAACTAAACGTAAGCCGAGAGGAAGGAGATT -GAAAGGAGTGATCACTAAAAACATTGCAGGAAGTGAACAACTCTGAAACGGAAAACACCG -AGGCACGTCGCTATTTTCAAACGGAATGCAGAAGGTCAGCAGAGGAGATTGTTGGATTCA -TGACGTCTTTGCGAAAAAGCAATTTCACAGAGTTCTGGCTACCCTGTAAGTTGAAGCATT -CTCTCTGCTCATTTTTTTTCCTCCCTAATATTTTTTGACAGATAGCGCCTTTCACCTCAC -GTCAACGGCGACATTGCTAGTTCGTTGTGCGTTTGAAACCACCGACAGTGAAGTTGCACG -ATCCTGTCTTTCTAACGTCGAAACCTTGCGCTCAGTCTTGCGTCGTGTGCGGGAAGAAGA -AGACTGGGATGTAGCAGACATGTGTCTGGATCACTGTGAACGCATTATGCACCGGCTTCC -TGGAACCGGTGCCACGATGGACCAGGCATTCACAACCACAATGGCAGATAGCGGGCTGGT -CAACCCGGCCGCGATATCGCTGCCTGAGACCCAAACTAATAACGATATCGTCGACGACAT -GATGTCGATATCCAACACTTTTGGAACAATGGACGGCTTCCCCTTTGACATGACAGGAAT -ATGGGATGTCTCCGTCTTCCAAGATGTGAACCTGACATGATCAGATCATTGATACAATTT -TAGATTCTTATTTCAACAATATACCCTGGGATCCAAAGCCCAACGACCTTGATATTAGTT -TAATCTCAGTCTTTCTCATATTAAAATTTCTGACTAAAATTTTGTACAAATAAACAAATC -CCCAAACGCTATTTTACACTCGTTCATTAATGATCAGAATGAACCCCTAATGATCTTCTA -GACAGGGAAAATGGTAATTGGCGCGTGCCGATTACTGATCCTTTGAGCCAGCACTGAACA -AGCCAGTGAAAGAACCCTTCATGCTCTGCATCTGCTCCTGGCGGGCCTTCTCCTCCTCAG -CTGCCATCTCAGCGAGCCACTTCTCGCCGTTCGCCCGGATCTCCGACTCGATCATCTCGT -AGTTCTTTTGACCCCGCTCACGGATCTGATCCCAAAGCATCTTGCCCTGGGCCAGACCCT -CGGAGGGTGTCTGCTCGCCGTTCAGAGTGGCAGAAGACTTCAAGCCAAGTGCGGCAGCAA -AGTTGCCCAAACCACGCGAGGGCTTCTTCTTCTGCTCCTCGGCAAGCTCTTTCTCAAATC -GCTCACGCATCGCCTTTTCGCGCTTGGCAAACTCCGCGGGAATTGGAGTACCATCAAATG -ACTTCAGCACGGTACGCACGTCTTCGACGCCCATACCAGCAAGGTATTCCAGGAAGGGGA -TGAGGGCGACTAGGGTTTTATCTTTCGGGTCTCCGGTCCACTTGTCCAGAACAATGGCGT -TCTCGGGCTGGTAGCGGGCGTGCTCCTCCTTGGTGTCGATGAGAATAACTTTGGAAAGAT -CACGGTTGAGATATTGCAAATCCTAGAATACAATTAGCGCTATTTCGCTAGGACGAGGAG -AGGGGGCAGAAGTTCGCATACCTTGATGTATTCTCCATCCTTGTACCGGGTCGCCTCGCG -GAACAGGGGCCAGCGGATGATGCGGTAAGGGTCGAGTTTGCGTAGAACTTGATCAGCCAT -CATACTGGGCACACTGGTGAACAGAACGAGTTCGTAGTATTGGTTCAGATAGCGGAGGAA -GTAGTCGACACCAGGCCGCTTGGCAACCCGCCATCCGTGCTCACGGGTCCATTCGCTGTG -AACAAGGAGGTCTTCAAGACTGAGCACCAGGGTGTATGGCTGACGCATCTCAGCGTCCTC -ATCGGGAAGCAGCTTGGGGAAAGCAGGGTCCTTGTAGTAGCTGGTGAGATCGCTAGTACG -GGCCTTGATACGGTTGTACCAAAGACCAGGGCTCCATCCGGATGGGATCTCGGGGTGTTG -GCGCTCTTCCTCTTCGGTTTCCCAGTTACGACCCAGGTAGCCCATACCAGCAATACCAGC -AATCAGCATGGCTGCATATACTATGTTGGCCATGCGGGCCTTGCGACGGTCGATAGATGT -CTCATATCCGCCTTTGGGGATGTCACCACCTCCTCTGCCATCCGCACCGTACTCTTCTGA -CTGGGTGGGATCTTCAGTGAGATTCAATGGTCCCTTTTTCATACGACCTTCCAATTCCGC -GGCCAGCGTTGATGGAATACCTTGTGTCAGATCGGGGAGAGGCGTCTGTGGGGCTTGTTC -GGCCTAGAAGACAACGTCAAAAGGGATTTATTTCTGTAGGATGTAAAAGGAAAGCCTCAC -GGGTTCTGAAGGTTGTGTAGGTTCGGCTTTTGTACCGAACTCAGGTTGCTCTGCGGCGAA -GTCTTTTTGATCTGGGTTGGGAGTATTCGGCTTCGATTTGTTCGACTTCGCCCTTAGCTG -GATACCTGGGGAAAGCTTGTAAGCAGCGTATGGCGAGGGCCGCGCCGACACGGTTGAAGC -CCGAACGACAGAAGCGAGGACGTTTGGCCTCGTCAGAGGCAGGACTGCACGACTAAGCAT -TGTGTATTGGGACTTTGAACAGAACTGTCCAGAATGATGGGATGGACGACAGTTTCCTGT -TAAAAGTCGGCGCGACTCGGTACGGCGGCTCATCCGAACCATCCGTGCACTTTGTTTATA -TGTCGACTCCCTAGGCCGCCGGTCcttttcctttcttttcttttcttcttcAGAAAGTTC -ACCCAGACATTAATTTGACCTATCAAGCAATTCTTCTCTGCATGCCGGGTTGTATTAGCT -GGGGAGACCAACGAAGATATTCTTCATTCCCACTACCAAGAGCGCAGCAAAGGATTTTCC -CGTGAAGCTGCGCTCCGATCGTGGGACACAATTCTTTTTAGCTTTTTTCGTTTTCTCTTT -CTACCATCTTATTACCAGAAATATGATCTTCCTGTCCAAATTTAGATTCGCTTGTATCTT -ATAGAACTCATCTACATACATTCCAATCATGGGGTTCAAAAAGAACATGCTCCGTTTCGA -GGAGCAACTCTCCGATGTCGACTCTGAACCAGACGCCAAACCTCCGATTCTACTGCGATT -CCCTTTCCTCCATCTACCGTCCGAGATTCGCCTACGCATTTATCATTATGTCCTGTTTAC -ACCGACCCGGAAAAGTGCATCAAAGCCTACTGGCAGTGTTGGGTCTTCGGCGAAGAAGGG -CAAACCTCTAGCACCATCCTCGCACCGCATCGCCCTTTTCCGTGTCTCCAGGCAAATTCA -CGACGAAGCCACTCACTACTTTTATTCAACCCAGATATTCCGTGTGTTTCCTGTACAGGA -CTTCTTGCGCATGCCCACAATCCGGGCTTTGCCTCGGCGCCACCGCCCTTCGATCACCAC -AATTGAATTGATTGTGGGATCAAGCTGGACTGCACCCCCAAAATCATGGATTGTAAACCA -GGGCTTGGGGTTACACGACATGGAGCTTGTCCGCACATTGAAAGTGTTTGTGCAGTGTGA -CCCTTCACAACCTGCGTTCGAAGGCTTTCGCAAATCGAAGGATTACTATACCGACTTCTG -TGGCAAATTGTTGCACCAAATCCTGGAGCGTTTGCCGGGTTTAGTTCAGGTCGAATTCGA -TGCCTGGCCCTCTGTAGAGAAGAACGGATCTTTAATGACTCGCTTGCTTACGGAAACCCG -CGATGCTCAGAAAAAAGTCCTCTGGGGCCCTGATAGAGGCTGGACCGACACTCAAAATGA -CATCCACGAGAAAAATGACTTCAATGACGTGGATGCTGCCTTTGATGGCCTGCGGGCCTA -CTCTTCTGATATTGTCGAAGCCTTGAGCAACTCGCTTCAGACTGTGAGGTTGGAGTCATG -ATACTCCATTGTTCAGCGGATATAGTTCATACTGTTACGATTTATATACATTGCGCGGCG -CATAGATTTCATGAAACGAAATTATAGACACAGACCTACATCAAACTATAGCCCACATGC -CATTCTTGTTGACAATAAATTTACAGTCGTCACGTCAAAATTTGAATAGATTTCATTTAT -CTGTTGGTCTGGTTTGGTAACATAAAGCCCAACCAATCAGAAATAATTAAAGTGGTTTGA -TTGGCACACTTTTCTACCTCTCTTTAGAGATCTTCATCAACTTCCATGGCAGCACCGTTT -GTATCTTCTTTGGATGATTGCCGTGCCACCGAAGCTTCCTTGTCAATCATCTGCCGCGAG -ACGGCAACAATGTTATCCTCAATCAACTGTTGACTTACGTCCTCGATGTCTGTAAAGCGT -TAATCTCTGTTTTGGGGGGTTATTGTCACAAGGGGGAAAAAACATACGCTTCTTGGGATC -CACCTTGCCGACGTATCTCGTCTTCAATTGGTCCTTGCTCAGCTCCGTCTCTTCCTTGAT -CCGCTTCTCGTAGCCCTCTGCAAGAAGAACGAGCTGCTTCATGCGTTCCACGTTGTGCTG -ACCCTCCTCATGGAAGTCATTCATCTCCAAAGCCTCGGTCCATACGTGCTTGTGGAGGTT -CATCAACATATTCTCTTCCAATCCCGTCTTACGATAATCGATCGCGATGCTGTAATAGTG -GCGGTTGAGACCGTGGATCAGAGCCTGGATCGAAGGCTTGTTGAGGTGACCCAAGTTGGA -TGTAGTCTGGCGGGGCTCCTGGCCCATAACCACCGTCTGAGGCTGGATAAGACGGAAGGC -GTCAATAACAACCTTGCCCTTCACTGACTGGATAGGATCGACGACAACGGCAACCGCACG -CGGTGTGAGCTGCTCAAAGGATTGCTGAGTGTTGATATCGACCGAGGAGAGCCAGCAGCC -GAAACCGGGATGCGAGTGGTACCAGCCAACGACGGTTTCTGGTCTGTACGAAATTATCAA -TGGGGGAGTCCTTTAGTATATATCAGCTGATAGGCTTACCGTCCCGTCTGCCGGAGCATT -TCCATCATTCTAGTTTGGAACACGGGGTCGACTGCCTCAACACTGACACCGGTGCCACTC -TGAGGCATAGCAAAGACGTCCACTACCCGGACTGTGTATTCATCAACGAACTCGCCCAGC -ATCAGCCCCATGACCTCCATCGGGACACCGGCGCGGCCATGTCGTAGCATCTTCAGCAGA -GCCAAAGAGGAGATGTGGACAGTTTCGGAATTATCAATCAGGTTTGGGGTATCCTAAACA -CGGCTCTCATTAGTCAATGTAGATTAATATGGTGAAGCTAGGCTAGACCGTTGCCATTAA -TGCCTCCTAGAAGGGGATTGGAGGACTAATCCAACTTACACCACCGGGAGCAGCACCTCC -CATCCCCATGCCTTGAGCGGCTTGGAGCATCCGGTTTAGGCGATCCATAGTTAAGAGATA -TCCGACAAGACTTGATGAAACGAGAGTAAAGAGGACGAAGCTATCGCTACTTGGAAGCTC -TGGAGCAAGTCGAATTGTCTCTAGTCGCTTGGTATTGCCGAAACAGTATTCAAAGAAAGA -TACAAGATAGCAAAAATCCCCGGCAGATGTGATATTAGCGAGAAGATAACAAGGTTGTCG -AAGACGTGGTAACTGATGATGAGCAAAGCCGTGGGAGCTTTGCCCGACGAAGGCGGTGTG -TGTTTCGGTCGGCTTCCGCTCCTTCGCGGCCGATTCTCTCTCCTCTATCTATTCTTCGAA -AGCCTGAATGAACAATGACATTGGCTTGCTATTGTGATTGAGTTCTACTACCTAGTCTTC -GCATTGCGGGATCTGGTGACAATAGGTGCCATCGCTGCCTATTTACCCCCCCTGGGAGTA -TATTTGCATTGCGCCTGCAACTAACGCTACTTCATCATTCCCCGGATCTCACTTACACTT -CTTGAACTTCACTTTTTTTTTCACACTGTCAACCAAGTGACCCTCTGTCACTCTCTGCAA -CATGGCTTTGGAACCTATCTCGGATAGGCTTCTCGGGTCGAAAAACAAGAAAGTCGCTTA -CTTCTACGACTCCGATGTGGGCAACTATGCCTATGTCTCGGGCCACCCCATGAAGCCTCA -TCGCATACGCATGACACACAGCTTGGTGATGAACTATGGCCTCTACAAGAAAATGGAGAT -CTACGTGAGTAGACTTATCTTTTTTGTTTCTGTTTAACCTCAATACTAACAAAATCAGCG -TGCGAAACCTGCCTCCAAGTATGAGATGACCCAGTTCCACACTGACGAATATATCGACTT -CCTGTCCAAGGTCACACCCGACAACATGGACCACTTCTCAAAGGAACAGAGCCGGTACAA -CGTTGGAGACGATTGCCCCGTGTTTGACGGTCTCTTCGAGTTCTGCGGAATCAGCGCTGG -AGGCAGCATGGAAGGCGCTGCGCGACTGAACCGCAACAAATGCGATGTTGCTGTCAACTG -GGCTGGCGGTCTTCACCACGCCAAGAAGAGCGAAGCAAGTGGCTTCTGCTATGTCAATGG -TATTCTATCTCGGTCTTCAAACGTCTGTCTCGGAGGCTAACATCCATGTAGATATTGTTC -TTGGAATTCTGGAGCTGCTGCGCTTCAAACAGCGAGTACTTTACGTCGATATTGATGTTC -ACCATGGTGATGGTGTCGAAGAAGCCTTCTACACAACTGATCGCGTCATGACCTGCTCCT -TCCACAAGTACGGCGAGTACTTCCCTGGTACCGGTGAGCTTCGCGACATCGGTGTTGGAC -AGGGCAAACACTACGCAGTCAACTTCCCTCTCCGTGATGGCATAACTGATGTCACATATA -AGAGCATCTTTGAGCCTGTCATTAAGAGCGTAATGGAATGGTACCGCCCCGAGGCCGTGG -TCCTTCAGTGCGGTGGTGACAGTCTGTCTGGTGATCGCTTGGGCTGCTTCAACCTCAGTA -TGCGAGGACACGCCAACTGTGTCAATTTCGTCAAGAGCTTTGATCTGCCAACATTGGTTC -TTGGCGGTGGTGGCTACACCATGCGCAACGTTGCACGGACATGGGCTTATGAGACCGGTA -TCCTTGTCGGGGAACCCCTAGGATCGGAGCTGCCATACAATGATTACTACGAGGTAAAAT -TCTTGTTTTTTGCAGACGAACTAATACTAACTGATATAGTACTTCGCTCCTGATTATCAG -CTTGATGTCCGCCCCTCCAATATGGACAACGCTAACACCAGAGAATATCTGGACAAGATT -CGCAATCAAGTTGTCGAGAACCTCAAGCGCACGGCATTCGCTCCTTCAGTCCAGATGACG -GATGTGCCGCGTAACCCCATGTTGGATGGCATGGACGACGACGCTGATGACGTTATGGAT -GACCTTGATGAGGATGAAAACAAAGACAAGCGGTTCACACAACGACGATTCGACCAACGC -ACGGAAAAAGCTGGCGAGCTTAGTGATAGCGAAGACGAAGAGGAAAATGCTGCCAATGGT -GTTCGTCGCCAACCAGGCGCCACGCGTCGTCGCAACCAAGTCAACTACCGAAACCTCGAG -CCGGACTCTGGGCTGGATAGTGGCATGGCAACCCCGCAAGATGGCTCATCTGCAGCTGAT -GGAGAAATGGATTTCGCGCTTGATGCTAAGATGGCCGATGCGCCACGCACTGAACCCGAG -GCGCCTCTCACAGCCCCGGAAGCTTCACACGCTGATGAAACCCCCATGACTGATGCTGAC -AACATGGCCATGGAAAGTGAGGAACAGGAAGCCTCCACTAGCTCCCAACGCCAGTCTCCT -CCCCCTCATGATGAAGATACTACCATGGAGGACGCTGCTGCTACTGTCGCTGAACCCACT -ACCTCGGAACAGGCCCCGGTACAAGCGTCAATGATTGAGGAAAAGTCTCTGGTCGAGGGC -TCCACCGTCTCCCCTATGCGGGCCCAGTCCGTTCCAAAAGAGGTTCCCGATTCCACCCTC -GAGAAGACTACCGAAGCCCCTGCTCCTAGTGCGGACACTGCGGCGGCGACGGCGACCAAG -GTAGAGAACCTGGAACACATAACAGAACCCAAGAAAGAGGAGTAATCGACTGTGATGCAA -AGACGACAATATCGGATCTGGCGCAACGGACAATGGCAGAAAGGTGTTATCGGTTTAATA -ACATGCTCGCATCTTGCTTGACACTTGGGGGCATTACTCGTGGCGGAATGGCAGACTCAC -ATCAAGGTTTAACCTTGAAACGTTTCGGGGACGGCGGGTTGCTATTATGAACTTTCAAGG -AATCATTTCATGTTTATCTTCATCCATGTGCTATCTGTTTTTCTTTTCGCCTAGCTGTTT -TTTTTAAGGTTGCAAATGTATTATTCAGTGAAAAACAAACTTACAACACCACACATTTGA -ACCCTAGAAGTATACGATTGATGCCTCAGGCATATTAATTTGCCATTCCAAATCGGGTTA -GCATTAGGGCAAACTTGATGGAGATGAGCAGCCAATCACCTACCAACGCCAAGATTTTAC -GCACTTTGCCCACTAAGCGAGAAAGCACACCTCCAGTTTGGATCCTCGACGTTCGACCTC -TTGTCGCCAACCTTTTTCACCCAACTTCGTTTCCGTGATCATCAGCCATCATGTCTAAGA -GAGGGTGAGTCTCACAGCGATTCCCTACGTGGCCCCATTCGATTCCGAACCGATGCACAA -TTTTGCAAGCATGAACAAGCAACACAACACCTTTGTTTCGAGAGGATAGAATGGGACTCG -GATTGGGAAATGAACGAAACGACAACGAAATTCGCTAACGGTTAAACAGTCGTGGCGGTG -CCGCCGGCAACAAGCTCAAGATGACTCTCGGTCTGCCTGTGTACGTCTACAAATTTCTTA -TCGTCTAGTCTGTTTCGAATGCGCCTTCTCTTGCCCGCCGGATCCCACATGTTATTTTCA -GTCGATCAACGCCCTCAGCAAAACATACATCATGCAGCAAAACGAAAAGATCGGCGGGAA -GAAAGGGAAAAGAAAATTGGACATCCACTTTCAGCATTCGAAACACGACGTCTTCCAAGA -TCTAGAACAATTCACTAACGGGTCCTTTTTTTCGTCAAACAGTGGCGCCGTGATGAACTG -CTGCGACAACTCCGGTGCTCGCAACCTGTACATCATCTCCGTCAAGGGTATCGGTGCCCG -CCTGAACCGTCTCCCCGCTGCTGGTGTCGGAGACATGGTCATGGCCACCGTCAAGAAGGG -AAAGCCCGAGCTCCGCAAGAAGGTCATGCCCGCTGTCGTTGTCCGCCAGAGCAAGCCTTG -GCGCCGCCCCGACGGTATCTTCCTGTACTTCGAGGACAACGCAGGAGTTATCGTCAACGC -CAAGGGTGAGATGAAGGGTTCCGCCATTACCGGTCCCGTCGGAAAGGAAGCTGCTGAGCT -CTGGCCCGTACGTTATACACTCCACATGGAAATGGGTTTCATTCCATATTCAGATCCCCG -AACTAAGACTAACACCTTCTCTAGCGTATTGCCTCCAACTCCGGTGTCGTCATGTAAAGA -GAGAAAACCCGAATTTTAAAGAAAAAGAACGATACCAAACATCAAATCTGCTTGGGACCT -GGAATGCGGTGCATATGAGACCATAGGATTAAAATGGGAGCTGGACAGGTGGTGTCCATT -TGATGAAATCCAACTGTTTATTTTAAAGCCGGTTTTATCCTCCATCTAGGTGTTTTCTCC -CCCCTTTACTGGCAGCTCTCGGAAGTATTACTTTAAATCACATCCTTTTGCAATGCCTTG -CCCGTTATCTCGAGGGCTGGCCTAAGGGTTTTAGGCATGCTGTATGGGAGGGGACATGGT -CCTGGTTGTTTTTTCTTTGTCCGATATATGAACTCTGATATGACCACCAATGGCTCTACG -TAGTTGCTCCATGGACCCCCCGTAATCGTACTTCATGTTCCATGTTTGCACAACTATATA -TATAACACGACACAGCTTTAGTAAGGTCACTATAGTTCTTGAATATACTTTAGGGTCAGC -TATGAGAATATATATTTTCCAATATGTATCCAAATGCAAACCGCTCATCTATCCAATGCT -GCTTATATCATTGAAAAACAAATCTACAACCCAATTATTTCTTTTGTCCATTCTGAGTCT -TATGTTCTGATTGAACTTTCCGAGCAGCCGGCTTTGCCTTTGCCTGAGCGCCACGAGCTA -AGAAACAAAGCAGAGCCACACCCAGCGTGACTAGCACGAGGATATGAACTGCCCACCAGA -TATGCTGCACACCAGCGGGACACATAGTGATCTCATAGCTCCACTCGCCGGCAGCCTCAT -TAAACCACGCCGAAAGAAGACCGGCATTACCGCCGAATTCTCCCATCATCGAACGGAGGG -TAACCTCCCGAATAGAAGGGTCGTCGATGTGCTGGCTCTTATTGAAGCGCTTCGCAGCCC -ACTTCCAAGCCTGAGACAAATCGTCGTCATCGACTGTTTCAGTGCGGTTGACGAAATGAA -CAGTATCACAGCCCGTGTGGTCATGGCCCGTAAGAATGAGCCCGTTCCGGCCGTGACCAC -CAGCAAGCGCATTCGTGTCGCCGCTCAGCCCAAAGATCCCTTGTAGGATTGCGCTAGCGC -TGAGTGTATCGCTGAGATGGTTCTGCTCCTTGAGGCCACCGTCAAGCCACCGGGGCACAC -CGTTGGGGCCATCCTTGTCGTCAGAATCCCGGAATGAGAAATATGGCCCGTCAGTGCACA -CACCATCTTCCTTATGCAGGGGTAGATGTGTCAAAAGCAAAGTAAAAGCTGACCGGTCCT -CGACCGGCGAGAGTCGATCGGCGATTAGCTCGTTCAGGTAAGAATACGTATGACTTTGGA -CCTCTGCAGAAAGCGCAGGGGTATCGAACATGAGGGAATTGAGGTTAATCAGATGCAGAG -TAGGGGTAACAGGCTGGTTACCAGCCGGCGCAGACGAAGACAAAATGGGCGGGTGTTCGA -ACCGCACATCCCAATTCGCACGGCCAAACCCACGCTCAAACCGCTCCATCCGTGTCTCGC -TAATGTCCCCCGCATATCCAACATCATGGTTTCCCGCGACATTGATCACGCGCCTGGCCC -AAGCCCGATCCGCGCCTAGGATTTCTAACGGCGGCGTATTGGTGCCGTCGTCATGGCTCC -AACCCGCAGTGCCGGTTCTGGTGATGTTGTCGTCAACTCGCTCTCCGCCGCGGAATACAC -GGTTCCAGTACCGGTGTTTGCGCCGGGCGAATTCGTTGTCGGAGATCCATTGACTCCCCA -CCAAGTCGCCCAGGACGGTTGTGTGCGTCGGCCGGGTCCACCAAAAGAGCGTGCGGTAGA -TATGTGCTAGGTAGTAGTCATTGCCGAAGAGGTCGAGGCGTTTCAGGCCCGAGCCGAAGG -CGCGTGGGATGTCCTCGGTTGCGAGGATCTTGAGACCGGTTGTGATGTTTGAGATGACGT -CCTCGTTCAGCAGAGGTGAAGTGGAGGCATTGCCAATTGCTTCCTGGACGGCGCGCCAGT -GGGTCTTCATGCGTGGGTATAATTCATATTCGGGGAAAGGGAGTGAGGAGTCGCCTTCCA -ATTGGGGGTCTGCGAGGACTAGGAGTCTGAAGATGGCGGGTTGATTATTTGTGTCTGAGG -TTGAAACGCCGAGGTGCTGGAAAAGTGTGCTGAGCACTGGGTTATGATATAGCTGCTTCT -CACCCGACCGTGAGGCAGACTGTGGTAGAGGAAAGGCGCATCCGTTGAAAACTGGGTAGA -GATAGAGGTAAACAGTGGCGGTCACCGTGAGTGGAAGAAGGAGCGCAAAGGTGCGGGAGA -GCAGCCAGCGGAGGGACATGAGGAATCTAAAATGACAGAAAGAAGAAATGTTATAAGAAC -CTATACAGCATATTGAAGATCGTTGCTTTGATAGGTGGAAAGGAGAAGAAACAAAAAGGT -CCAGGAGGACATCGCCAATCAACAGAGATAGCGCGCACATATATACCTGGTGACATGGCT -ATACGATCAAAATAAAACAAAACGGATAAGTTCTAGTCAAGTCATTATAAAATATCCCAG -AGACATTGATCCTACTATATGGAGATATTTAGGGAACTCGACTAAGCTATTTATATACAA -CGTGAGTAAGATGGTGGCTATATACTGCCAGTAATAAGAGCTGGCGTTTTATTGGGCGGT -GAGTCCCGGGGCAATGAAGAGAAGTAAACAGCCATGGTGTAAGTATTATTATATCTACAT -TCGCAGACGAAATTAGTATAGGGAAAAACGTTCAACTCCAGTCAACTTCGTGTCCATAAG -CAAGTCAATCGTATACTCCCACATAAATTCCCCGGTGACCAACGTACCTGCAATGACAAA -ATTGGCCGAGTCAACAAGGGAAGAAGTATGAGCGAGGGGTATATCACATCAAACGCCAGC -AAAAACAAAAAAGAAACAAAAACAAGAATGATCAAAATAGAGTAACCTTCGCATAAGTTC -AAGCAAGGTCACACAAATGAATATTCACTTGGAATCCGTCTTGGCTAGAGGATCCGGAAA -CAAGGACGAGTGGCCGCCGTCAAAGCCTCCGAGAGGATTAGACAAAGGTCGTTCAAGACC -TTTATCTATGCTTTCATTGTCCAGACTGCCGTTCGTGTCCTCGAACGAGTTGGCATCAGA -GGGCTGACTGAGGTTCAAGCCCTCGATTCGGAGGCCTGACTCTTCTTCTTCCTCGTCGAT -AGTCTGCTCGGTCTTGGTGCTGCCCGAAGCAGGCATCAGCTCCGGCGGCCGCTGGTTCAT -CAACCGGGCTTGTTTCAAACTTATAGTCTTTTGCTTGTTGGTGGATTCCACTGCCTGTCC -CATATCACGCTCGATGGTCGCTTTCATCAACGCTGCCATCCACAGACGACCTTGCTGGAC -ATTGTCGACCTGGAAGTAGTGGGTGGCCGGTTTTGTGAACTGCACGGTGCGGGAGTTGCC -TGTCTTCGGGGGGACGAGCTTGAAGAAGAACGGCCCACTGCCGTCTTTGTTGGTCAGCGA -TTGGATGACCGAGGGAGTAGTGAGCTTCGTCCCGCTAGAACTCTCCACGGTGTTGTTGGC -ATTGGCTGGTAATGCGGTTGAGCCGGTAATGGTTGCGTGTAGAGCGATGATCGGATCTTG -ATCTGCTCGCAGGACACGGTGGGCGGTGATGTCGATCAAGCCTCTTTCCTCGGTGTCATC -CTCAGAATAGTAATAGGAGAGACGACGACCGCGCAGGACGAACAGACGGGGCTTCCAGGT -GGTCATCAAGTTAGAGGATCTCTTCTTCATCCATCCACTGTAGTCACAGCCATCCATCTG -CTCACGCGGTGTCTTCTTCTCAAGACCCCGAGTATATGCACTGGTCTCCTTCTTGGACTT -GACGGTCCGAGGACGGACTACGAATCCTTGTCCGTCAGTGGCGGCCTTGCCAGACCCATC -TGTACTATGCCGTTCTGAGCTCTTAGATGTCGCCGATGGAGTTGTAGATCCAGTGCGAGC -AGAGGTGGGATCAGATTCCCGAGGCGAGTCAGGCAGAGCAGAGGTGTCAATCTTCTTTCC -AGCAATGTCGGATGTGGCACGAAGGCCCATGGCTCGGCGGAACTTGGGCCCAGCATTCTT -CAGTGGCTGGAATTGCAACGGCGAGGCGCGAGAAGAAGAGCTTTCCGCTTCACCTTTGCT -GCCGACCAGGGGGAAGTTGGCAAAGAACCCACCACCGTTACTGGTTTTGCTCTCGGTGGT -TGATGACTGACTTCTATCCGTTGAGACGCGCGTGCTCAAGCTCCGCAGCCGACTCTTCGG -TGGCGTGCCATGATAGGCCTGGGCCGCGGGGGAGACATGCTTTGACGCGTCACGGATCGA -GTCGGCGCTGCTTATGCGAGAGTGGCGCTTCGCTGGAGATATCACTTTGTATGGATCTTC -TGCATAGCTTGACTTTCGCGAGTGACTATTTACACCGACTCCGGCGGACACCCGTTTCTG -TAAAACCCGGCGGCTTCGGCGCGAATCAACCTCGGTTCCCGAGAAATAACCCCGATCTAG -ATCATCAATCTGATCCGGGGTACTGATATAAGAGTTGGCGAAAGAGTCTGAGTCAGCGGT -TAGCGGGCCGCCCCGGAAGCCACGAGGACTAGTGCCCGTCGAGGTTCCTGGGCGGGCGGA -CATGGATTGAGGGCCAGAATTGAGAGTCCAGGACCGGTCGAATGATGGCTTTTTCTGATG -GTGACCAACGAAGACCGAATCAGACATACCGCGTTGAGTGGCATCAATGGAAGAATGACG -ACGTGCCTGGTTGAGGTCACGGATCGACGCCGCAGAAATCCTCCTCGACGAATCATGGCT -GTACGGAGGGGTATGCGGTGTTATGGGCGAGCTGTTGTTGCTGGCCAATCTGGGGTGCTG -CGCCGGTGCGGAGGCTCGTGGATAACCAGCGCCGGTCTTCTCTGACAAGGTCGGAATCCG -AGGGAGAAGTGGACCGGTGTGTCCAGCGCGAGATAATGTACGTTCGTCCGAAGGTCCCGA -GAAGCTGGAGGATATGGAGTCTCTTGGGGCTTGCTGCTGACTGGAGCCTTTGGTAATTTC -TTGGAATGCCTTAATCTTGTGCCAGGTTTTGAGTCGCTTGCCCATGACTCCAAAGTCAAA -TTCCTTCATAAAGATGAAATCCTGATCCATATCAAGAAGCACATCACCGGTAATCTCCTG -TTCCTCAAATATGTCGCAGTGTTTGGGGTCGATACCGAGCCCGCGCAAGCACCTTGAAGT -CTGTGCTTCATTCCATTTCCGCACCTCTTGCTCGGTTGGATGATTCTGCTCCTCGTCGTC -GGTTTCATTCCCATTGATATAGGAAGCTCTATGGCCAATATGACTGCTATACTCGCTGCC -CGAATCATTGATCGTCCTGGGTTCTTGGGTGGACATGCTGTGACGTGGGGTGTTCATATC -CGTGATGTGCTCGTCAATGACGCTCAAAGTCTCATTCATCACCGGACTTTCTTCGCCCTT -CATATGATTATCCATGGAACCACGAAAAGCTTGCTGAATATCGGCCGCCAGGCTTGGGCG -AGGCAGCGGAGATGAGGTAGATCGATGTTGTGTGGGAGAAGGAGAACCGATTTGCGGTGA -CTGCATATCAGAGCTAAGGTGGCGCGCTTGAGGTGTGCTTTGTCCACTGCCGAATGATGC -GAACGATGTATCGGGCGCGGGGGTTTCTAATTGTGTAGGCGACTCTACAAGTACTGGCGA -CTCGGCAAGGGTCTTTCTTGGAGGGCCAACTTTGGTAAAGTCTGGTCGCGCAGAATATAT -TAGCTGGAATAATAGTCGGGGGGGNNNNNNNNGGAGATTCTTGGGGGGAGGCGCATACTC -GCAGGGAAGAGACCAGTTTGTCCCGTCCTAAGGTTTTTGCCTAAGTACCAGGCATCTCCA -AATCCTTCATCCAACTCGACCAATTCGATCTTGTCGCGGGGTAACATGACCAGTTCATCT -TCTCCGCGTCCGCCAAATTCATCTAGTTTCGAGAGTTAGAAGCTGGGAAGAGTCACAGAT -ATCAAGGCGTCGCATACGACGTCACTTACGAAGAACTTCCAGAATGTCGCCGGGACGAAC -ACCCTGTGGTGGGCCCATGGTAACCATTTCTCAATATTCCGTGTTTCTAAGGGAAATCAC -AAGACATGGCACGGCTCGGGGGAAGTACGCAAATTGCACGCAATTTCGCGCTTATCCCTC -TGGGCGAGAGCAATGTGGACTTGGGTAGAAGGGAGAGAGAAAAGGGAGAGAATGTGAGGA -AGCTAAAGAGAGATTGAGATTAGACAGGATTAGGTTAAACAACTGGCGGGGCTCGGGAGC -ACGAACCTCTTGATCCCGAGGTTTTTTTTTTTTTGGAATATGTGGAAATAAGATAGATCG -CGATATCTATCAATATACCAGATCGAAATGTTCAAGTATTGGTTTGATAAGGAATGAAAG -AGGCACAAAGAGATTAATTATACTACTATGAAAGATAACTATCAAAAATGTGAAAAAAAA -GGTCAAAGTGACGTTTGCAATTGTAAGAAGTTCTGGACTTTGACACTTTCCGTGGTTTTG -AGCTGAGAAGCACAGATTTCACGGTGATTACATTAAATTACATTAATTACCTGTAATTTA -TAAGAATTCGAGTAACTTGATGCGTAGTATGATGAACCCGGTCATTCCAAGTACCCAATC -tacattggatttataacattatatatgatatattatcatacattattttatagtatcata -tagcatacattacGATATCTCACTAGGATTTGGCTAATTTTCCTTAATTAGTCTATATAT -CATGTATCATGTACTATCAATGTAGCTCTGGAGGTATGTAATCTCAAGGTTTCCTAGTAC -CTGCATCTATTGATCTATTCCTATTATGGCATCTAAGCGCCAAAGAGTTTCTTTATTCGC -TGTTGATCAACTTAACAAAAGACATATATGCCTCGATAAGTACTCCATGGTATCACCGAA -ATCAAGATCGCACACTAGTGCGGATCATATTGAAGAGTCTAATACTACCGCCAGTCTATT -TCTAGGCGGCACGCGGAGGAACTGGATGGGCCCGCAGGCTAAAGCCCCGGAGACGAAATC -GGCTACGCGAAATCCTCGTTTTGAACTACCAGACCCTCCCAACGCCTTTCGCAAGCTTTT -AATGTTCCCAGTCACGCCCGGTGAGACCCCTCCCTCATGTGGTCCTGTTCATGCAAAGAA -GCCCGACACACAAACCACGGCTCTTCCTTCACCCGTTCTCAGTACAAATTCTCACCCAAG -TCCTGTTGGCGCCGATGCGATCATCTCAAATAGGCCTTACACACTGCCACCCACCCCTAC -ACAACCTACAGCACAGCGTGGCGATGATGAGCGCATGGCGGTCCAACCTCTTCCCAGCGG -GCCCGCCACGTCACGACGTGTGACCCAGTCTCCTCTAACGGGACCTAACTGGACCAACAA -CTCACCTCTACCCACAGGCACACAACCAGGTGCTAGCACAGAACACACGGCCTCGGGACC -GCGACTCAGTCCCCATCAAATTGGGGGAGCTGGGGCTGCGGTCATTCCTAATAAGACATG -GAAACTATGGTCTGCTCAATTGGATGCCTTGGTAGTTGATTTGCGCACAAAGGGCGCATT -GCCTCATCCTCACCTGGGGGTGACAAATGTCATTGCGCCTCGGATCAACCTGCTGCGCCA -GGCCATTGGTCGTAAGGATGCTTTCTATTTGGTGATACATCAATTGTACTGCCGATATAG -CATTGATCCAACTACGTTGGGTCAGATTAGAGCCTCTGAGACGGGCATGGAATGGCTTAA -GCGTCTTCTGGAAGACAATGGGATGATGCATGATGCCGCTACCGTTGGGTTTGCCCATTT -CCCGGAATCTCCACAACGGTTGATGCAGACACAGTGGTATCTCCAGGTGCTGTCCGGAAT -TCCTGAGTTTCTATCACGGCTTGAAACGGAATGGCAGGGCATCTGGTATCAGCCCCCCCA -TCCCCCATTGGTGTCTCATCTTTGGGAAGCACTTGCTTGTCCTTCTCCGGTTTTGATGTC -GGTGATGTTCATGTGTGTTGCACGGCGCCTTCACAATGAGAAAGACATTGAGCCCTTGCT -GGTGTTGTTTTGGAAAAATTTTACAGCTTTCAAGCAGTCTATGGACCGTAAACTCCCCGG -GACGACTCAGCAGGATATCAACTGTCATTTTGCCCAAGAATATCACAACTTCCCACGTTC -GTCCGAATATCTATCACACACACCATATTCTTCCCACTCTGCACTTCCTAGATCTCAGAT -CCAAACTGTGTCTCAAGCTGCTCCAGCTATACCACCACCATATCCCATATCTTCGGTTGC -TGGTACCAGTCAGCCTGCGACAAATTCATCTGCTTTAAGCCCACTGGGTGGACCTCTGCA -CCCGTTTACCCCGAGCCCAAATCCACCCCAGGGGCGGGCTGTTGGCTCAGCTGCTGGTTC -CTTTCAATTTGGCGTAAACTCACAAGTGCAGAATACCCCACAGCATCATTATTACAACGC -CATGTCacaggggcagggacaggggcagggacagggacagagacagggacagTGGTATCC -TGTGAATTCACATATGAATCCAAACATGAATCCAAACATGAATCCACAGATGCAAATGCA -CACGCAGACAGCGCTTGGACAGGTACCACCTTCTCGGAGATCCCAACAACCGCCGGTTTC -ACATGGTCAACAAATGCAGCGCCGGACAATAATGCCCCAAACTTCAGTCGCGCCTGTCAA -CCCCTCCCTCCAATCAAGTGCTCTCACCTCTACACAGTCAACGCAGTCATCTCACGCTCC -ATCGCCCATATCCACGACGACAACTACAATGAACCACGTGGCGCAGCACCCACCACCACA -AAGGCCTAATAACCGACAGCATTCGATAATCTCGCAATCCTCTGCTTCGCCGATTAGATA -TGCGCCTCAGAGACAAAATATCCAGCCTTCACCTAACTCTGCTTTCCTCCCATTACCTGG -CTACAGAGCACCAATGATGGTGAATCCAAATCCGACGCGTCTCGGCCTACACCTGGTTGA -TCTACGCGACCCTATGAAGAAGCTGGTAAAACATAGCTCAGATGGAGATGACACTGAGAC -TGAATTGTTCACTTATCCCAACACCTTCGTTGTATACCCAACTGTTGTCAGCGTTGATCA -GCCGACCTACACCTGGATGTTCTCTTTCACGCACGATGAATGCCAAAAGTTCCCACATAT -ATCTCACGGACAAGAGGGCCAACGTTCGGTCTGGATCTTCGGGCCTGGTTGCCGGACTAT -TCGCCTGCGATGCATTCGTCTCCCTGGAGATCTGGATACCGTGACAGAGCAGACATGGTC -TACCACGAGCACATTCTGGCCTAGTGTCTTTTACATTACTGTAAACGGAAAAGAGCTCTA -TGTGCGCCGAAAGGTTCATAACGGAAAAGATTTGCCTTTGGACATCACTGAATACCTTCA -AACTGGCGAAAATAGCGTGCGGCTCGATATGATCTTAGGACAGGATGAGTGTAAGACCTC -CAAGTATGTCTTTGGAGTGGAGGCTTTGGAGATCGCCGAGTTCGACCAAATACTCAGCCT -GATCCAATCCATCCCAGCGGCCGACTCTCGTGTAGCAATCCAAAAGCGGCTTTCTCCTAC -CACCGACGATGATGACCTAGCAGTTGTCACCGACAGCCTCACCATCGACCTAGTGGACCC -CTTCATGGCACGCATCTTCGATGTCCCCGCTCGCTCGCGCCACTGCAATCACCATGAATG -TTTCGACCGTGACACTTTTATAATAACCAGGAAATCTGTATCCGGCCCGGCACCAATGGT -CGATAACTGGCACTGCCCGATCTGCAAGGCAGATGCACGCCCGCAGTTCTTGGTCGTCGA -TCAGTTCCTTGCCGAGATTCATGCCGAGCTCACCCGTACAAACCGACTTGACGGGGTTAG -AGCAATTCAGATTAAAGGCGATGGAACCTGGGCCCCCAAGTATGACACTAATGAGACCTC -GCCCGAGGAAAACAAACACTTTTCTCCGAAACGCAAGGCCGAAGACACTCCAGGCCCCGT -TGCCATGCGCCCTAGAAAAGATGTGCCTAATGGTAGTCGCAGCCCGGCTGTTCGCACGCA -AGAACCCACAGTAATTGAACTGGACTAATGAGTCCAGCGAGGTTTCTAACCAAAGTTCTT -ACCTTATTCTCCTTTTTTAGTGGCTTCGGCCTCCCGATGTCTTTTGTTTATCCTTGCATA -CATTTGAAAAGAAAGATACCCATTATTGCGTGCTCTCCAAATGCATTTCTTCTAGGTCCT -CTCATGATCTGCCATTGCTGGCATTGCGGGACGAGGATTTGCGGCGTCGTCCAGCTTTTC -TCTTTGTTTACCTCAAAATGCCACGAACAGAATGAAAACATACTTAGAAAATGAAACTAA -AGCTCTATGCTGAATGAAATATCTTGCTTTAGCTCTCAGTGGTGTAGACTACGAATGCGC -TGATTTATATTCCAGATAAACCTTGGAAGAAGTTTTTCTGGTATCGACATTAGATTATAC -AAAATATATTATATTGGGCAATTAGAGTACACCAAGTGCCCGGAGTAAGGAAAGACTACT -TTCTGACTCTCCTGACATGGCTAACGAGGTTCTCAATGACACCTTCGAAAGCCTCTTTTT -CGCCAACCAAGCCATCTCGCCGCTTAAGCCAGTGCTCCAATACTGAGCTAGCGCGCTGGG -TGACTTGGTTTAATTCCTGAACATGCAGCTTCAACTCCGAATCTGTGAGACCATCCAGTG -CAGCATCGGTAGTGGCGCACAGCCGGCGGACTGCAGAAAGGTCTATTTCCGGTGCCGGTG -GCGCGCCGCGGCGGGGAGTATTCTTCTGTCGGGAGGGGGTTTGTTCTTTTTCGGTGTTCG -CTTCGTTGCCTTCTTCTTTGCGGACTTGGCTACCGGAGCGAGCGCCGGGGCTCTTTGCGC -CGGGGCCTTCAAGATCAATAGAAGGGAGAACGAGCTTTTTCTGTTTGGATGCCGGAAGGA -TCTCGTCCAGATCAACAGGCACGCCTAGGGATACGAGGAATAGTCGGCGAATGCGTGATT -TCACCCAATTTTGGGGCTGGAGCGGTGGCGGTGCCACAAGCTGAGACCACAGCGAGAGGG -ATCGCTCGGTGTTGAAGATTGCTGCGGAGTCTGGGATTGGCTTGACAGGGGTGAGAGAAC -TTAGATCTTGGGATGCGGGGAAGAGGAGGTTGAGGGTGTCTTGGAGGGAATCGAGGAGTT -CGGAGGTGGATTTGAATGTGTCGAAGTCAATAAGGGGTGGAGGTGCGCGAGCTTGCTGAG -ATGCCGCGGTTTCGTCTGTGGGAGCTTCCTCTGCACTGGGCTCTTGGAAGCCGTCGTCAA -AGTCCCCAAAATCATCATCTCCCATGTCTTCCTGCTCTTCTACAAATTCATCAAAATCAT -CCCCCGCTTCTTGCTTTTCATTTCCGTAGTCGTCGTTCATGTCATCCCCCAATGAGAGGT -TATTGTTTTCCGGTTCAGTATCATATATTGAGACATCCGGCACGGTTTCAACCGTATCGG -GTAATGTGTCACTTGCTGAACTTGTATGGGCTTGTGGGTGTGGTGACAGGTCTTCTTCAG -CGGGTATTGTATCCACCCGCTTGAGTCGTGTCTCAGGAATGTCTGTTGTAGAGCTATCAA -GCTCAGGGTGTGTAAGGTCAAGTGCTGGCGGCGTTGGGCTGCCCTCGGAATCTGAAGCCT -TGATCACCACGTCGGGCACAGCATCGGCAAGACGCTTCTCATGTGCCAAAGTCCCAGGGA -TTTCGCCATGGCTAGGTTCATCTGGGTCCACCTTCTCAACCACTGTGCGCGGAATCGGCA -AGTCTTCGGGACTCAAGGCTTGGTCCTCTGACCCCGCGGGCGAACACGTCTGGCTGTGTG -AGCCTTCTGGAATAATTTCGATCTCATCGGGAACTGCATCCTGTTCTCTCTTTTCGAATG -CTACCGTGCCGGGAATCTCGCCATGTTGGGTGGTGCTGTCAACCCTCTCGACTCGCGTTC -GCGGGACAGGGGAAGTGCGACCGGATCGTGGGGATGACTGGAGTTGGGGATTGCCTTCAC -TAGCATCTGAGAAGTATTCCTCGTCGCTTTCAGAAACTGCTAAAAAAATGTCAGCTTCAA -ACGAATATTCAAAGTATGTTTAAGGGTCATGTACTATGGTCTCCAGCTCCTGGATCGTGA -ATCTCTGTAGTCCTGCTGGGGGGCTCTAGATGCGAACTAGACAAATCGGACGGCCCCTCC -GGCATGATTGTAGCTTGAATTCAGGGTATTAACATGAAAGAAAGGGCACACAGAAATTGA -ATAATTCAAAATGCAATGGGGCCGTGTGTAGTGTGCAGTGACGACTTGTTTCCCTGCGAT -CGATGTGAGTCCAGAACACATATGCCTTAGGCACCATAATTATATCAATAAGTTGGCCAC -TTTATTCAGGCATATTTGTTGCCGTTTATTCCAGGCACCAACCTTTCAGCGCGTGCTTGC -CAAACTCCTTCCAATTATTATTGTCTATTGTCTCGGCTTTCTTGTCCTGCCTTTCTCATT -ACACTCTTGGACTCATTGTCTCAGCACATTTGATCAAGAGCTCCATTCTCTCATTCTCGG -ATCATTTGGTCCATTCTGCAGGCCACCTCCCTGCTCCGTGTGTCGCTGGTGCTTTCTTTC -CATCCAGCTTTTTCCCCATCAGTCCTCCAGCTCGATCACCAACACCATGTCGTTCTCTTT -CGGAACCCCGGCCTCTACTGGCGGCTCCAGTGGAAATCTTTTTGGCACAGCGGGCAATGC -TTTCGGCGCCAACAAGGATACCAATGCGCCTAGTGGTGGACTCTTCGGTAACGTTGGAGC -TTCGTCCACCGGCAACAGCGCATCGCCGTCGATTTTTGGAGCAAACTCCGCAACAGGTCA -AAGTACACCAACCTTTGGCAATGCAGGCACTTCCAATGCGACCACAAGCGGTACATCGGC -CTTCAGCTTCGGCGGCAAGGCTCCATCCGGATCTGGCACGCAGCCTACCTCACTCTTCGG -CGCCGCTGCCGGCAGCCAAACTCCCAACAAGCCTACGGAATCGACCACACCTGGCCAGAC -TAGCAATTCACTCTTCGGTGGCGCGGCCCCAAAGAGTATGTTTGGCAACGCAGCCCCAAC -AGCTACCCCTGCTCCCTCTGGAAGCGCTCTGTTTGGAAACAACTCCACCACCCCTGCTGG -TCCTCCGCCAGCGTCCAACCCACCTAGCAGCTTATTCGGGGGTGCGAAACCTGGAGTCTC -GACTACTCCCGCCGCAGCGCCGTCATCCACCACGCCTACTACCTCGCAGGCGCAGCCATC -TAACAATATGTTTGGAGGGGGGGCAACCGGGTCACTCTTTGGAGCTGGCGCAGCAGCTCC -TGCGACTAGCGGAGGTCTATTCGGTGCAAATAAGCCGGCCGATTCAACCGCTCCTAAGCC -TGCCGCAGATACCACTTCCAAGCCACTCTTCGGTGCAGCTGCACCTGCGAGTGGTGCGCA -AACCTCGCAGACACCGTCTCTGTTCAAAAATATGGCAGCCCCTGGCGGCGACTCCGCTCC -AAAGTCTGCATTCCCGGCTCTCGGTGCTCCCCCTGCTTCCACCGCAAACACAACTCCCTC -ACTAGGAGCGCCCGCCACGTCGAGTGCTACCCCTCAAAAGTCTCTTTTCCCTTCCACCGG -AAGTGCCACATCCACAGCTGCCCCCTCGACTACTCCGGCAGCGACTCCTCTTGGAGGAGG -ACTGTTTGGGGCCTCAGCCCCCACATCTACTCCAGGTACTTCGGCCACTGCTGCGCCTGC -TACCACTACACCTGCTGCGCCAACGGGCGGCCTGTTCGGCCAAGCTGCTGCCACGCCTTC -GACCCAACCAGCGAAACCCACGAGTTCGGCACCCCTTAGTGGACTTGGTGGTCAAACGAC -CGCTGCCCCGACTCCTGCTTCGACTACAGCCGCTGCCGGCGCTGCTCCAACTGGGCCCTC -CGTCCTTGGTCAGTCTACCACAGGACCCGCCCCTCCAGCCCAGTCCCGCCTAAAGAACAA -AACCATGGATGAGATCATTACCCGATGGGCCACAGATCTCGGCAAATATCAAAAGGACTT -CCGCGAGCAGGCGGAGAAGGTTTCCGAGTGGGACCGTTTGCTAGTGGACAATGGTACCAA -AGTTCAGAAGCTGTACGGCAGCACTGTTGATGCGGATCGAGCTACCCAGGAGGTTGAGCG -CCAATTGTCAGCAGTCGAAGGTCAACAGGATGAGCTCAGCTCATGGCTGGATCGATACGA -GCGTGAGGTCGACGAGATGGTTACCAAGCAGGTGGGCCCTGGAGAGTCTCTTCAAGGACC -TGACCAGGAGCGCGAGACAACGTACGTATAACCCGGAAGACAGCACTTATATTTATGGTT -TTGCTAATATGGTTATCAGCTACAAACTGGCCGAGAAGCTCTCGGAACGCTTGGACGAAA -TGGGCAAAGATCTTACCAGCATGATTGAAGAGGTCAACGGTGCCTCATCCACATTGACCA -AGACCAACAAGGCAGATGAGCCGGTACGTCGCAAACCAGTACCGATCCTGTTTTTCTTCG -GAATTATTTTACTCATGCATCTCTATAGATATCGCAGATTGTCCGCATCCTCAACGCACA -CCTATCCCAGCTGCAGGTTATCGACCAAGGCACTTCAGACCTGCAGTCCAAGGTTGCCGC -CGCCCAAAAAGCAGGTCAATCGATCTCTGCCCGTCTCGGATACGGCTATCCGAACAATGG -CATGGGCAACAACAACACTGCTGATGATTTCTACCGCTCGTTCATGCAAGGCCGGTAAAG -AGGCGGATGATTTTTTTAATGATTTAAACTCGGAGTTATCGGCTACTTGGCACCTCGATG -TGCTTTGGGGGAATGTACTGTTGTAGTCAGGGAATGCTAATAGTCGATCACAATAAAATG -CTTGCATGCGCTTTGTGATTCACAACTTTGAAGCTTTGTAGAACCAATGCAATAGGATTG -AGAAATTCTTCTCATGTGTGTATAAGTGTGTCCTTCTGGTCACTATTACTCGGACCGAGC -CTTCAGAAGATCCATCACTTCTGTCTGCCTTTTGTCTGACTTATCTGCTGTTTCGGATAA -TAGATGTACAGGCAATTGACCCTTGCGATTCTTGATCAGTCGGCCAGCGCCTGCCTCTAG -CAGCAACTTGACGAATTCCACATCATTTCTCTGCACTGCCACATGCAATGGCGTATTGCC -ATCTCTGTCCCTTGGTTGTACATCAGCGCCTTTCTGCAGCAAATGCCTTGCGGAGTTCAA -AGAGCCATTCTGCGCTAATTTGTGCAGCGGTGTCATTCCCTTCATAGTTTTCAGCCCTGG -GTCGGCACCATATTGTAGCAGTATCTTGCAGAGATCTGAATTTTCCTTCGACGTCGCACA -ATGCAATGGTGTGATGTTCCAATCGTTTCCCTGCCCTGGATTGGCACCTCGTTCTAGCAG -GAGAGCGGCCACAGAGGGCGAATTCAGGTCTTTGCATACAGTGGTCTCCCAGAGTACCCC -CCGTAGCAATGGTGTATTCCCTTTTAACGATCTTGCCTCTATTTTTGCACCCTGATCCAG -CAAGAACGCTGCTGTTTCTACCCGGCCATTCCAGCATGCCACATGCAGTGGTGTCTCACC -TTTCTCGTCAGTTGCGTCAACATTTGCTCCTTTCTCCACCAGCAACTGAGCAATCTCGAC -TCCATTGGAGGATGCTAGATGCAGGAGCGACATACCAACTCGATCCCGTATCTCCAAGTC -TGCACCGCTTTCTATTAAACGCCGGGCCACTTCCACGGAGCCAGACTGACACGCCACCTG -CAGCGGAGTTTGTTTCAACAAATTCACGGCTTCTAGCTCAGCACCTCTCTCAGTAAGAAC -TCTAGCCACTGCCTCGCGACTTTCGGGACATTTTTCAGCCGCGAGATGCAATAAGCTGGC -CCTGGAGCCCGTTGAATTCACATCGACGCCGTTATCAAGCAAAATGCTAGCAATCGCTGC -TCGGTTTCGGATGACAGCCAATGTGATTGCATCTTGTGATCCCCAGCCGCCTTTAGTGTG -CGGGTTGGCTCCATGTACCAACAGCAACGCAACAATGTCTGTATGGCCACAGCGAATGGA -TTGTATCAATGGCGAGTTTCCTGTTTTCCTGTCCTTGGATTCAATTTCCGCTCCTTGTCG -GAGTGATTCTTGGGCCGTTTTTAGTTGTCCTTTGGTTGCGGCCCAATGGAGTGCGCCCCC -TTGGTGCTTTTGGACGTTGTGGGCATAGAGGAAGGGGTTTAGGAGGATATAACAGCGGCG -ACTGGCTCTTGCAAATGCATTTATGTCATTTTGGGAGTCTTGGGTCTCTGCTATAGCTAG -CAAAAGCTCATTTGGTAGGACGAAGAGAGTCATGGTTCAAGCTAAGGTTGGGTGATTTGA -TTTAGATTTCCATTGCGAACATGGTTATAGGAAAGAGCCCCGTAGTTGGACGGTCCCGCT -TTATTTTTATGTGGAGACGGTTGGCAACCCGTGGATAGCTTATATTATTGCACGGAGTAC -GGAGGATAATGTTGACCTTTTACGCCTGGTCGGCGATATACATTCACGGCAAGACGTGTT -CCTCGGGTTAATTGAGGGCTTCCTAAAGAAGCCATAAGCATTACACCAAAACATAACAAA -GACTTATCTTAGACAGGGACAGTGAAAATGCCCATGTAGAATTTTAGGTTTACAGATTTG -AACACAAGCTAATATTGCCGAGACTTCTCAGTGATATTTCCAAGTCACTGGGAATTATAC -AGACGATAGCGGAGGAACTCCATATGCTATGGCAGAGAGATGTCTTTCTTAAGAACAGGC -GCTGAGGACGGCACGATCTCAAGGCAGGCAACCATGATGATTTGTTGGAGCGGATATGGT -GTTATACACAGCAATTGGAGTAGCTCTTCATATTAAACATAAGATAAAATGGGTGTATGG -AGCCTATTTGCGGCTCAAGCCATCGCTAAAGAGAAGTTAGTCAGCCATGCTGTTCTGATG -TTCAACGGGAAAGGGAAATAGACAGAGGGCTTTCACTTACATGTGGGTCTCCAGCTCATC -CAAGTGCTTGCGCTGCTCGCCAATCTTCTTCTTAAGCTGTGCAAGCCTATCAAATCAATT -AGCTCTCTGCTCCAACCTGATTGTTCTTGGCTCTGCAGAACTTACTTCTCAAGTTCCTTC -TCACGGATGTAAAGGCTCTCTTGGGCGGCCTCGCGCTTGGTGAAGGCATCACTGGTAGAG -GGGGCATGTGTTAGCGATAGTACCAATATGATTTCCAACTACGGTCAGGCAGACGGTTGT -CTTGTTGTAATTGAACTGGGGGTGTAGGTTTCGCAGTCAAACCGCATCTCACTTACCCGG -AAGAGGCACCACCGCTAGAGCGGGGGGCACCGGTATCACCTTCGCCCATGCGAGGGACAG -CTACGGAGAAGGAGCGGGTGAAGACTGCCCGATTGGCAGTGGCAACGGGACGGACAAATT -GACGAAGCATGTTGATTTTGGGTGGTTGAGGGGTTTTAGTGAAATATGTGAAGTGAGTTT -GGGGGTTAAGTTGACAAAGTACGAGGAGAACTAAAGCCACCTGGGCAAGCTTATAGCAAA -TGACGTCTCGCGCCAAGAAACATCATGTGATGGGTCCAACCTTCCAAGGTTGGAAGGGCT -CAGCTACTTGGAAGCTTATGTCAGAGGCTAGGTCATGTGAACAAAAATTTCTGATAAGTC -GCGGCCTTCTCATTCGACTCTGCACTTACACCTACATGGAGGCCTATGTACTTGATCACA -GGGTCATTAATTTGCATCGGCTCTCCCTTCTTCTCTTAAAGGATTCAATATGATGGTAGA -GCCTCTATGTCATATTCTTGTTCTAGACTTCGAACACATCCTGTCTTGAATGAGGCGCAA -GTAAGGAACCCCCCGGGCGATTGGCGGATGTGGAATGGGGGATGACGCTGTAGGAGGCCC -ACCGAACCACACAGAAGCGATGAGAGAAGATTTGTGTATATATATGGCCAAAAAAGTAGT -CTAATCTTGGATATTTTGGATGTTTTATATAAAATTAGTTAATTAAAAAAAAAAGACGAC -GTTCAAGTGACCTTAAGTACCCTTTTTTTTTTATATTTCCTAACACAAAAGCCCGAAGCA -GAGTGGGTTGTTCAGGTAAATCCCAACAACACCGccttctgcccccctcctttcctccta -cctcctctctctcaactccctactctctcACTTCCATCTTCCATCTTGCTATTTCTTATT -ACCTTCTTTTCCCCTTTCACAAATCAGGTCAGTTTTTGAATGTTTCCTTGAATTTGCCCT -CTAGCCTTATTTCTATGCCCCGTCATCACCGCAAAATCCACACATCAACATGTGGAAACT -CTCTGAAAACTACAATTCCCCCAAATTCAAATTCCAATGGAATCGCATCACAAGTCACGG -AAACTACTTGTTCTCGTCTCCTTGTTTATCTGTTGAGCCGTTGCCGCGACCGCTTTCCTG -CGATAACCCCCAACGACATACGGGAAGACCCAAGGGCTCTGTAACCGCCACGATCGTCAT -GATCGGTCAAATATCTGGATATTTGGTCTTTCGTCCCAATGTTTCGATCCAGAGGCGCTT -TACATGATACTGGAAACTCAACTTGGCCGTTCCCCGTATATTTGCAGAGGGGATCGGGGG -TCGATATCTACCCCTCGCCATTCACTACTTGCACTGTCCCGATGGCGTGATTTCCAGTCT -TCATTTCAAACATTATCAAACCGAGAAGGAAAACGTATATGAGAGGAAAAAAATTTGGCT -AACATAGTTCGCATTTTAGAAAACACACATTCACCATGGGTCACGAAGACGCTGTTTACC -TCGCCAAGCTCGCTGAGCAGGCCGAGCGCTATGAAGGTTAGTTTCTTTCGGGTCCAAATT -GTCTAGTTGTACAGATTCCTGACAATGCGGTGCAGAGATGGTCGAGAACATGAAGGTCGT -CGCCTCGGCCGACGTTGAGCTCACCGTTGAGGAGCGCAATCTCCTGTCTGTCGCCTATAA -GAACGTCATTGGTGCCCGCCGTGCCTCGTGGAGAATCGTCACCTCTATTGAGCAGAAGGA -GGAGTCCAAGGGCAACGAGTCCCAGGTCACTCTCATCAAGGAGTACCGTCAAAAGATCGA -GGCCGAGCTTGCCAAGATCTGCGACGATATCCTGGAGGTTCTGGACGAGCACCTCATCAA -GTCCGCCCAGAGCGGCGAGTCCAAGGTTTTCTACCACAAGATGAAGGGTGACTACCACCG -CTATCTTGCCGAGTTCGCCATTGGCGACCGCCGCAAGGGTGCTGCCAACCACTCTCTGGA -GGCCTACAAGGCTGCCACCGAGATCGCCTCCACTGACCTCGCCCCTACCCACCCCATTCG -TCTCGGCCTTGCCCTCAACTTCTCAGTCTTCTACTACGAAATTCTGAACTCCCCCGATCA -GGCTTGCCACCTCGCCAAGCTTGCCTTTGATGACGCCATTGCTGGTATGTTGGAATACAG -ATATTGCCAGGTACATTGGCCAAAACTAATATTTCTCTGCAGAGCTCGACACCCTGAGCG -AGGAGAGCTACAAGGACTCCACTCTCATCATGCAGCTCCTCCGTGACAACCTGGTAAGCG -TCGAAATTACCTGTTGATTCGATTCGTTCATACACTGACCCGCATTTCCCACCAGACCCT -TTGGACCTCGTCCGAGGCTGAGCCCACCTCCGAGAGCGCTGCTCCTGCTGATAAGAAGGA -GGAGGCCCCGGCCGCTGAGGCCGAGAAGCCCGCCGCCGAGTAAATGATTAACACCTAAAA -AGACCTTGTTGATGGACACGCGTCCTCTGGACGGATGGAACAACCGAGAGAAAGAGAGCG -CGCGACGGGTCTTCTAGTACAAGAACATAGATTGGAGACGAAAGAGGAAGGACGATGAGA -TTTCTTGGCCCGGTTGTCGGTTTATAGTATCGCAGAGACTCGCAAGGGTTGGTGTGAGAA -ATATTCTGACCTTTAGGTGACGATGCTCTCTCGGCCATTTTTGCTTTGAATCTTCTCTCT -ActtgcctacttgcagtttgcttgcttgcttCGGCCCGGCGCTTGTGTTCTCTGCCTGGG -GGATATCCATCCTGTCTCCTCTTTTCGTTTTTTTCATTCTCCTTGTTATTTTGTCTACTT -TGCCTCTTAACTACCCAAAGAGTCCCCTTGCGCGCACTCTCCCAGCCCAACACACATACA -CACAGAAGTTCTTCAAATCAGTTCCTCATCAGGACTTCAACAAGGGCATCCCTGCTGATC -CCTAACCTGGCTTTTCACCTTTCCTATTGATCACTCCCCGTATATGTGCCATGTGGCGTT -TTGATTCTCACTAGATGTGATTTCGAAGATAATCTCTCGTACGCTGTTCGAGTAATACTT -TACGAATCGTTGTACTTGAAGCATGGATGGCTGTGTATGAGTCTCTGTCCATCGCCCGCC -CTAGATGCCGCAGAGTAGATGCAAACTGTCATGTCCGAGCAAAATAGACCCAGCAGATGA -CAGAAGCAGCCGTATTAACCTCCCGAGGACATCCTACCTGTCTCTATATCAACACACTCT -TATCCACAATACCTTCATTCTCTACCCGTTAGCAGTATGATCACTAATGAGCAGATCGAG -GATACCGATCGGTGGACACGAGCTCATTCAATTCTCTGCTTTGAATAATCGACACTTGAG -CGCCCCATCACAGTGATCTGAATCAGAGATCCATACCAATCAGCACCCAAGTGAAATGAG -AGTACTCCGTACTCCGCGCGTGCTTTTTCGATATACATCAAGTTCTACAAGGTATGACCA -TTCATAGAACTGATAGGACGGATAATTGAAAAGATATATCTAATCTACAATATATATAAA -TCCACATACTTTTTCGAAGGATGAATAGGCACTATATATCTGACCGGGGTGTGCAGACAT -GCACATGCCCGTGTACTGCAGACGAAGCCGCAATGATATCAGTGGCTATATCAGTTGGGC -ATTATCGAGCTTATGTGCACCTGCCTGTGTCACGTCGCAGTTGATCGTCGCTCTCTTCGC -TAGGCATGCATGCCGAACTGTTTGTTCATTTGCCCCCTGTCATGACTCAACAATTGATTG -GATAACTATTTCGTCTAGAATTGCTCTTAATTACGTTTCATTATCTGTATCCTGGTATAA -CAAAATTTTCAAGTTTGTGAATATTGATATTGTACAGAACTACAAGGTACAGTCTCAATT -GAAATATTTTCCATTGGTATGGTCGAAACCACAATGCTACCAGCCATGCGGGCTCGATGA -AAAGGGGTATGAGGACAATAAAGGGAAAATGGAAGACCTGAGACATGCGCGCTCCAGGGT -ATGTCGAGTGCAAGAATCAGAAATAGAGCCACAGTAATGACAAAGATGACAAAAAGCGGC -ATCTTTGTTTCTCCACGCGGAGACACCAGTCAGCCCGGCGGAGACAGTAGAAGAAAATGG -AGGGGAATAAAAAGTTGAAATGCAGATGTACAGAGCCCCCCGAATATCGAGATGGTATGT -GAGAAAAGAGAAGAAGATAAATGGACTGCTCCAAGAGACCCTTAATATGCTTTCCACTTC -CCACGCCGTTCTGTTGTGGCGATGACCGTCACCATGCAATGCCCATGAAGAAACATTTCA -ATGCTCCAGCAAGAGCCATTCAATCAAATCGAGTCTAGTCCCAGTCCCCGAAACGCCCAA -TGTGCTATGCATCGAAAGATAACAAAAAGAAAAAAAAAAATACAAGTGAAACGTGTTCAC -AGAGCCACCACAAAAAAAAAAGAAACGCCGAATGAAAGAGTATGAAAAAAAGAAGGGCCA -ATAATATCGACAGGGTTGAAGAGAGGTAGAGTGGAACACTGCGTTAATAGGATAAAAGCC -CTGGAGGTCGCACACCGACAGGATGATGTAATGAAGACCCCCGGGAAATCAAATGCCAAG -AAGTCAGGTGGCAGCCATACAAGGCATGCGATGATGTCACGAACAATCCCTTGAAGTTCC -CCCCTTGAAATAATTCGGATTCAGCCTCCCCAGTTCGGTAGATCCTTGAACAGGGGTGCT -GTGGAGATGACGACTTTGTGGCGTTTAAACCGAAATGTGGTGTGTTCCGGGGCCACTCGA -ACCAGTCGGGTGGCAGGTGTGATATGTGAAAAAAAAGGCCGTAAAATTGCCGGACAAATG -CTGACCCCATCGCAGAGAGAGTTGGGTCCATAAATTCCCTGACCCTGTTGTTTAATGAGA -CCGTGACATGCTGACATTGGGCATGTAACTCGTTGAACCACTTAGATATGCGGTGAGAAG -AGAAATTCGTTGACCAGGGATTGTGCCGAGATGTGTAAATAATCCGAATCTTGGTAGAAC -CGGTGACCGTATGTAAACGCATGGAGCCCCCTGCCGCAAAATAACAGGCGGTTCTCCCCG -ATTGATGTTCACGGGACGAGATATCCGGCCACCTTGCGACAGTCTGATCGGAAAGTCGGT -TTATTTCTGTGCTTTGCGTGCTTTCGTACTGCGGGTCATTTCACTCTTGGTCATGTTGCG -AGTGAGGCCACCGATGTAGTCTTGTAAGATTTTGTTCTCGGTTTCCAACTTATCATGGTC -CGATTTGACCGCGTCGATGCGTTCAGCGAGAGCTTGAAGTGATGATTGGAGAGCCTTGGC -TTGCCTTTTTTTTTCAGTTCATATAGTTAGCTACCGAAATCTTGAGATTGGCACACTCCA -GTAGAATATCGCCTTGAATGGCTTTTCAGACTCACTCCTGTAGGGTCTGTCGGGCCTCCT -TGCCCAAACGCTCCAGATCGGCTGAATTGCGCCGTGGACTCATTGCGCGGGCGTCGTCCG -GGGGAAATTCCTCCTCCTCTATTTCAATTTCGGGGTGGTCCCGAGGGACAAGGATCGAAG -CACTGGGTTTGCGGCTCGCCATTCGCGGACGTTCTGATCGGCCTCGACGGTCGGTTTCGG -CGTCGGATATAGAAGAAGATGATGAGAGAGATGAAATATGCATGCGGCCACCGTCGTTTG -GGTGCGGCAAAGTATCAACAGGAGAGGCTTCAGTCATTTCTGTGTCTGATGCCATAAGGG -TTACTGCTCGCGACACACAGGATCGAAAGGAGGTCCAGGATGGCGGTAGAATCAAGCTCC -CACGATTGGGGGGTTGGCGGGTTAAAAGAGGCCGGAGGAAAAAAAAAAGGCGGCTGATGA -ACGACCACGGTCTACCCCTAGCGATGTACCCCAGATGAGAAATCTGTGTTACGCTTGAGG -TAGCTACCGGGTCGGTGAAAAGTCGAAAAGGGGAGGTAGATGTAACTTGCAAGACGACCG -GGGCCGACTTTTAATAAGGCGAGTAAATTCGAAAAGCGTGGTTTGAGATGAATGCAATTC -GAGGTTGGGAAGAGGGGAGAGAAAGGTAGCCGGAGTGGGTTCAAAGGGAAGGTTTAGGAT -TTTTTTTTTCTTCCTCTCTTTCCGAAAAAGAGAGGCTGGCCACAAGGCAGGAATCGAGGT -TAGAGATATTGGGAATGAGTTCAAAGAGAAGAGACAGAAGAAAGAGAGAAGGGGAGTCAG -AACGAAAAAGAGCTTTGAGGGAAAAAAGGAAGAGAGTTTCGGTATCGGTGTTACGGCAGA -GGGGGTACAACAGAGTGACCTGAGTTGTGGTGCGACGGGTACGGTATCCAGTGGTATCCA -GCTGTACCCATAGAGTCTCCAGTATAATTGTCTATCTTAGCACTTTTTCAGTAGGTGGAG -GAACCTATCTCCCTTGGATCATCCCTCCGAGAGGGTTCCtttcttctatttttcttctat -tttaccagattttttcactttgattttattttattataattcatCTAGCTAGAATAACCT -GTAAAATAATCAAATTCCAAATCTCGAATTTATAATAAACTAAAACGGAAAAAGGAAAAA -AGAGGGGATTTTCTAGAAATGAGAGGCGTTCAGGAACTAACGGGTGGCTCCTTAGATTCC -CCTTTCCACACACCTTCCCCCGACAAGTACCCGGTACACCGCTGTTCCCTATTTGCGCCG -GCTCCGATCAAGTACTCGGTACATTATAGGATGTTCTTTGGTTTCGCTCCACCGCCATAG -GATTTGACACGTATTGTGAAAATCATAATATCCTATGTAGTCTACATATCTTCCAAGGGT -TGTATGTTTATCTTATCCGGGGAACTTGGGTCTTTCATTCTATTTCATCCAGATGTCCCC -GTCGATAAACTTGGAACTTGGAGAAAAGATTAAATCAGATCTTATATTTTGTATACTATC -GTATTATACAATATTGCACATGTGGCCATACCCCCCGTCCTTCAGGGGGGGGGGCATGCA -ATGTCGATCGAGCCTCAGAACCAAATGACGCCATATAATTGGCTCCTTCGACGTAATTCT -CACGCGGATATACCTCGTAGTTATACAAAGTACAAAGTATGGAGTATACCTAATATAGTA -TGACGAAGAGTACTTACTTCGTGCTCCGTACTTCATACCCTACATATATAATTATATTCT -CGGTATATTGTACGGTGACGAAGATTCCAAGGGGTAGATACAACCGCCCTTGGGCAAGAT -CGACCGAGGAACAAAGGTTCCCGGGAAACGGCACTTGTTTATGAGATATTCCCAATCCTG -AAGGTCCAACAAGGGTGTGTCCTATATGCCACCAAATGTAGGTGGTATTGAAAGCAGGAA -AATAGATCCGAAATAAATCCCCGTTCGCCCCTCGTTAGCCGCTTCCACCGCCGCCAAAAA -GAAAACAGTAGATTCACAGCACTTAAGAATACTTATGGGGAAAATTAACATAAGAGAAAG -AGTTCAGTGGCACTGCAGTTTGTTGCTCCGCACCCCCCCCCAATGTCTGTCGTAATATCC -CACAACCAGTCACCATCTCCCGTCCTTAGCCCACTCCCCTCCATAACCAATGTGCGCCCA -ACGCTTGGGCTTCAGACGATCCATTCCCGTAAATTTTTGACATGCTCCCTTATTATTCGA -CAGATCGCTAGCCGAAAAGGAGGTGAAACTGAGTGCCATCATAGACGCCGATCTCAGCAA -ATCATTTTCAGTGCGATGAATCTGTTAGGCTCCCCGGTCCCGGCAAAATTATGAGGTACC -CTAAATCATCAAAATTGCCGACGCCACCTTTAAGGTCCGGGGATAGGTACTAGTGCTCCG -CCATCGACATCGGTTTCCTAATGTCGTAAGCTACTTGAGTCAGACCTAGGATAACTATCA -GAAAACACCAAGGTTGAGAAGATGTGTTCGACTATAACGTACATAATCTCTGCCCCCAAA -GTCTTGGCTTTGCTTATGCGGTTGGTGACAGCCAACTCACCCCCCCTCTATCAAACCAGA -TGCGTTCCTGGATCCCGTTGTCAGATGACTTGAAGCCCTAAGCAGTTATACGCTCAGACC -TAATACCGTATCTACATCTCTGAACAGTGTAACTGAAGGTGTCTTCACCGAAGCCCATGA -GGGGTGTGCGTGTGACATCCTTTTTAGGCCGTAATCTAAACGATAAGAACCTATAAAATC -GGTGTATTTACATAAGGTATTTAGCATCTGAATTATACTGGTTCATTTAAATTTATATTG -CTTCAAATTGTGGTGCCCTAAAACATGTCGGGGCAACTGAGACAGATCAATATCAGACCA -CTACAGACTTTCATAATGATTATGAGACATCGATTCCGTACAACATATCTCGCCTTTTAA -GATTGCTTTTTGGTTTGACTTTGAGAAACAAAAGCTAATGGTCAACTCAAGTACCCGCCG -ATCTGCAAGTGCGTTTTGGTTGATTCGTGCCCTACGGACTCCCGAACACCCCACCTATAC -GAACGCGTTTTTCCCCCATTACGGAGGGGGCGTCAAGGATGTTTAGTCGCACGCTAGAGT -GGGTTTTTTTTTCCCCTTCTTTTTGCTTCTTTTCGCCCCTGTGTTTTAAGTGGCCACCTT -CCTGTCTTTTTTTCGATGGATATTTTAGGCCCAATCATCGTCATTAGAATTCCTTGAGAT -TTTAATATGAGGTACATCGCTCCGATATTTCCGTTTATACCGTAGGCTTGATCCGGATCT -AGTAATATATCATATGTGTGTAGAGACTTCTTCCGATTCCATTGAACGTTGAATTTTCCT -GTGGTAAATACCCGCCTACAATAGAGTTTAATATCCATGTTATGCATAGCCGGCATCAAC -TTGTCTAGGACAGTGTGCCGAAAGACTATGTCTCACATCGTCTCACATTATATAGGCTTG -TGAGTAAACATCGGCGTGTCGTTGGCAACCTAACGGGGCTCGCCGGGGAAGAAACAATAT -GAAAGATCTTTCATTGTGTAGGTTTCACTCCGGGAACACAAAGATATTGTTAGCTTCTCG -TGTATATACTCTACCTAGCCAAACGGGTGCTGCGTAAGCATACCCAAAATGGCAAGAGTC -ATTAATCTTGTGTACCTATCGCCAGGAAAATGTCTATATCGTACGTCAGCCTCAAGATGC -CTTGCAATATCTCCCACAAAACCCAATTTAGGTAACACATAACAGACACTTTCCGATCGC -CTACCTGTTGCAGATCCGACGATCAAGTAATCCGGTTTTTTGAGATTTGCATTCCTGTTT -TGTGTATGCTTTGTGGAGATGGATCTATCGGGAGCTGGGGAGCTGGGGATCCATTTCTGC -CAATCAACCTGTTCGGAACTTTGATCAGGGCGGGATATGAGATCCATAGAGGATCAAACT -AAAAGGCTAGCGCTGAAGTACTATGTTGTTTTGATCGCCAATTTCCAAGTCTCGAGAAAG -TTGTCAAAATATGAGAGTTCCAGCTGTTGTACATGATGGCAATGCCTAGTATGGGCGTCG -AGATTGTGTATAGAGGCATGAGAAGCGAATGTGGAAACCGAAGGATATGATAATGATTTC -CAGTAATTGTAGACTGCATAGGTTGTACAAAAGAGCCCCAAAGCCCTGTATGCAATGAAT -GGTGCCTATTTACCACCAAAGCATCAAAACACCAACCACCAAAACTTTTGAGTTGTTTTG -CTCACAGTGCATCATAGGATTGCCATTGATACAGGGATCTTAATACAGCAATGCCGCCGG -ACTGTGTTTCAACCGTGCGGCCTCGACCACGATCGTTGTAGTCCTCCGGTGTGGACCCTG -TAGTCCAGAACTTGGACTCATGATCGGCATTGCGGTAATCAACAGCATCATCTGGGATAG -TGCTGCTATCCACGATCTCGTTGACATCCAGATGATGATATGAGCGCCCCCACCAGGTAC -CAAACTTGCCATCGCTGTCACGAGTCATCAAAGCGGCTTGCGCATTGTGTTCAATCCAGG -GACCATATGCGCGGCACTGCTTCATATGCTGCTCATAGATTTCGGCCCAGTCATCATCTG -CTGCATGTCTGTGGATCGCGTCCGCCAGGAACTGCTTCTCTTCAGGCTGAAGATGCCGAC -AGAATTCGGACAGGTGGTGGAAGAATATCCCTTTGAAGGTCTGTCCATCTTGAGAGCAGT -CTCCACGAGAATCACATGCGTCCTCGAGCACACCTCCGCGACCAAGTCCCTTCCATTGCT -GGCTGGAGGTATGTGGCCACCCGGTAACCTGAAGTACTTTGTGGATAAGCTCGTGACCGT -CGCGCAGATAGTCTTGCGAGTTTGTGGCTAGCCAGAGTCCTCTCAGCCCGCTGAGAATGA -CTCCTTGATTGTAGGTGTAGATCATAGTGTTGAGCTCGTCGCATTGTCCTGAGCCAGGAT -TCGATGCACTTTTCCAGCCACTGATATGGAAACCATCTGCATAGAGACCGCCGATTCCAG -TCATATTTGAGTTCTTGAGCCATTCATACCCGTCAATTGCAGCCTTCAGATGATTAGGGT -CGTGTGGGTTCTTCCGTAGGGGCTGTGGGCCCTGCGAGTCGACAAGGAACGGGGCATCAA -TAGAATCTCCGGGAAAGTATAAATACATCTCGATGCTTGCGGATATATAGAGCTCGTTCG -TAATCGCATTTTTGTAGGGTATCAAATATGGGCTCCAGATCATGCCGCCCTCACAGAGGG -TTTTGTCCCAACCAGCGGAGGCGAGCTCGTAAAAGATGCGTGCTCTGTGTGCAGCAGAGT -TCTGGAACTGGGTACCGTACCAAAGTTCTTCTGTGCTGTTGTTTGAAGCGACATAGTGAA -GGTCTGAATGAAGATTTTGGAACTTAATATTTTCCAACCACCCTAAAACAACCCATAGCA -TGTCATCATATGCCTGGTTTCTGAGACTGAAAGCATTTTCGCCAAAGTAGAACGCGGAAG -TATGGCTGAAAAACCGATTGACCATATTTTCAAGCCTGGAGAAGGATAATCCTTGCAACT -CATCCGTGTCGGGAATATCAAGTGGACCTGAGGTTAGAGACGAAAGCACTGCCGAAACGT -GTGTGCCTAACACTGCTGCAGTCCAATCGATGCTGGTCGGCCACGTGCCCTGCCACAGCA -CAAAGTACCTGTCTTGCATCACCTGCAGGGAGGCAATAAGGTCCTGCAGGGTGTTTAGGA -TGGGGTCGTGAATCGACTCTTCCAGCGCCGATCCAGCACTCATCAAGACAGTTCCCCTTA -CGGGAGCATCAATCGAATTGGAAGATGGAGAGGTTCCCATTAGTTCCTGGGGTGGATACG -CCAGCGCAGTGGCCACAACTGAGAATGATAGGAAGCATAGCATAAGAATACAGTATAGAG -ATCGTGATAGCCCCAAGGGGGTCAGAGATGCGTGTTGGGGAGCCATTCTGCATCAGGAAA -CATATCTTCTCAGAGTTAGATCTCCAGACACAGAATTCTGGAGTCCGTCTATAAGAATAG -GGGGTAGATAATACGCAATGAAAGCGACTCAAAAAGAGGAAGAGCGACAACCGCATGCGC -CATGCGGGGAAAATGAGGAGCCGGATAGGGTTACGCTTAGTCACCCTGATGCCCCGGGTC -AGGGCCAGCTGGGCAGGTGAGGTCTGTAGTGGTCAGGTGAGAACCTCTACAACTATTTGG -TGTGGTATTAAAATGCGAACTCCCACCGAAGCCTCCTGACCCCAGAAAACTAGCTCCAAC -GTGCAAGGATTATTTATCGACCCAACTCTGACGCCACATGGCTGTCGCATGATGCATCCC -ACCCCATAAGGGATGTTACATAGATCTACTACAACGTATACTACGGAATACTCATGTACT -CTTTTGGTTTCGTCCTGAAATGATTCTCAAAATATGTGGACAGGAGAAAAATCACTCATA -ACTAAGAAAATCCATTCTTCATAATGTACATTACTCCCAGAAGCTGGGCTTTGGGGTCCA -GGGTTGTCTTTATTGACCAGATGCTCAAACACCAATGTTCAATTTTCAGCAAAGGGCCCT -TCATTTCTGAGTCCATATCAGCAAATTTCTAGTCAAAGCTTAACAGCGAATTTTCATACC -ATTATGGCAATCTATCGTGGCGCCGGCACATACCTCTTGTGTGTATGTGTCCCATGGACA -CCCACACTTGACCTGTGGACCCATACAGATTCGACTGAAGAGGGGTCGTGCCGGCGAATA -CTAGATGATATGTTCATCGTTTGTAGACAAGTTCTGGTAGGAAGGATGGGCAAAAATTCG -AACGAAATAGCGTGAGGTATTGAGAAAATGAGAGAGAAAGCAAAGCCCAGGTATCAAATG -GAATATCACAAGCAACGTATGCCGGTCAAATTTTGTTGTTCCCATTCATCAGGAAGGCTT -AGTCCCGGTCACCGCCAGCTTCAGGGCTGCGGCTTCGGTTGGGGCTTCTGCTGCGATTCC -GTGTAGCCCGGCCTTTTGGTCCTCCATTCTTGTTCTTGCCTGATGGAGGTGGGCGGACAA -AAGCATAGTCGACACGAAGAGTCTGTTCGAGGAGCTTGTAACCGTTAAGGTTCTGGATAG -CTTCACTGGCTTCGGGGAGAGTGGAGTATTCGATCAATGCGTAGCCCTGCCGGAACCTTG -TGTTAACAACCATTTCATTGTATGTGAGTAGGTAAAGGTTCCCATATTTTACATGAGGCG -TTCTCTATGGTAGGATGCATACCTTGACATAACCCGTTCGCCGATCAAGGTTCAAGCTGA -AGTTTTTGATCTCACCATACTCCGCAAAGAGTTCAGTCACATCTTCCTCTGATGATTCCT -CGTGGATATTAGATACGATGATAATCCATCCTTCGATACTCCGCACGGCGACCGCGTTGC -CCTGGGTGCGTGGCTCAAAACTGCGATCTGACTGCTCGGGAACGTCGTCATGCGCGACAG -GAGGGTCAATTTCCATCTCGTTCGACATATTGTGAGGTAGGGGTGCAAGTCGATTGTGCT -ATTGTGTTGGTAGTATGTTGGGAGGTTTAGTCTGCACGTTTGTAGCGGTCAATGAGCCTG -AATCAGGGTCGCGATCTGAAGTCTGCAGAGGGCGGAGGGCGGTATCATTGGGCGGTCTCA -TGGGCGGTCTCGGGCAACCTCACCTTCGAGAGAAAGCTTTCAGTCACATGACCACAGTCT -GCCCTTCTTATGTTGTAGATCGATGTGCTGATAAGTCCGAGACCCTTGTACATGTATGTA -AACGTTGAAATCATGCTGATCTCGTTTCGGAGTATTATCGAGTTAACAAAATACCACGTA -GTATAGTGTTGTAGAAAACGAAAGCCAGTCGTGGTATTGAATCTCTACCAATAGTGATAT -TGTTACGCATGATCACCAGAACTAAATCCCGCCGAATAGTTGACGAACAGGTGACTGTTA -CTTTCTTCCTTATTTTTGCAGGCTGGAACGTCTTCTATTCTCTTCCAAACACGGAGTTTA -CAGACTCGAATCGTTGTCAGCTATATTTTTATACGCAGTAGAATTGTCAAAGGAGAGGAG -ATTGTATTCGCTACCCCATCTGTTCTATGTCTCTCAGCAAGTAGAAGACAGCGATCAACT -GCCCTTGCCCGTATGAAGAAGCATCAAGGTTCAAAGACCAAAGAACGGTaaaaaaaaaaa -agaaaaaaggtcaaaaggtagaagggaggagataaaTACACGGTTAGTTAAATACAGGGG -CTGTTTAATGACTGAAAAAAAGAGGATCACCAATTCTGGTGTTGGACAAACCGAGGTCAG -ATTATCCAGTTAACAAAGTGGATATAAAGTAGCTTAGCGACGGTTATTGCCCATGCCGAG -GATCTGGACCATGCGAATAAAGATGTTAATGAAGTCCAACTCGAGGCTGACGCACTCGTT -GACCACATCACGCTTGATCAAGCCGCGCTCGGCAAGACGAGCATGGTGCAAAATCTTCTG -CACATCGTAAAGAGTGAAACCACCGAAGACAGCAAGGCCACCGTACAGCCAGAGGCGCTC -AGACCACATTAAAGTGCGAACAGCAGTAGCAGGGAGGACCATGGGAGCGAGTCCAGAGAG -AGCAACGATAGTCACACCGGCAAGAAGGGGACCGCCCAGGTAGAGGTACTTCTCCTGCTT -GGCAGTGGCGCCGACAAAAGCGATTGAGCCCATCATACCAACAGTGTAAAGACCAGCGCG -AGCGAGAATGGCAGGGTTCATGAACATTAAAGGTGAAAGAAGAGCAGCCTGGGTAAGGTT -GAAGACAGTCCAGCAGCCATACTTCATCATGTAGCTAATTGAAAGGTCAGTGTCTTTCTG -GAAATAAAATAGAATACAAAAGTGGCCAAGAACTTACTTATCCGGGGAAGTGTAGAAAGT -GCCGTACATGGAGCCGATGCTGCCAACGAGACCGACACCCATAACGAGCCACGGGTTCAT -GGCCATGAGACGATAAGACCAGCCGCTCATATGGAGGGCACGAGCAGCAACACCAATGAT -ACCAACACCAAGGCCAGTGTGCATGAAAGTTTGATTCAAGTAGCTCTGCTCGTAAGCAGG -CATACCGCCGTCTTCTCTCGTTTCACGGTTGAAGATGAAGTTGGTTGCCAGAACTGTACC -ACCAATAATGGCGGCGCCATACAACAGTTTCTGCGTCATGTTACCATCCTGGGCAGGGTT -AAAACTCTGCTGCATGTATGTACGGCGGAAGGCATTTTGGAAGTTCTGTCGGGACTTGGC -GAAGATCGATGACGAAACGCCCTGAGGCTTTGTTGCTTGGGTAGGCTTGATGGGGGAGTT -GTGAATGAATCGGGTGGTGTTTGTGAGCTTAGGGACCTGGCGCAGTGCCGAGGATACAGC -GAACGGGCGCCGTAGGAAGAAAGCCATGTTTTTGATGGGGATAGTCGAGAGTCGAAAGTG -AGGAGACGTTTAAATGTCGGGCCCACGGGTGATATGCAGGCGATTACTCATGTTCCGCCG -GCGACCGCTTAAATCCGGAAAATTCTAGAAGTAGTCACGTGCATCAAGAAGTGGGATGGG -TTTGTAGGCTCACAGTGAACCTCCAGAGCTCACTTGATGTTTTTTTTATCAGATCGGCCT -GATTACAAATACGTCATCTTCTTCGGTTCGGTTTTAGTTCGATTCCAGTCATGGCCTCAA -ATCTAGAGACAATCAGCCTGCCACATCTACCATTGGTGCCTGTGCATGTTGCTCTATATC -GCGATGTTCAAAATGCCGCCTCTCTGAAGGGTCAACTACTCGCCGGCCAAACAGATTTTG -AGTATGCATTTATTGATGCAAGCATGGTATGTCAGGTTGTCATTGAGAGCTCAAATCAGG -AACTAGTCCACTAACACTAAGCGACAGGTTCTGTCAAGAGCTCACGCCACTGCTGCAGTA -TTCAGAGCAGTTAATGATTACATGCACAATCGACTGAAATCTCATAATGTTCATTCTGAG -ATCGTGTTCTCATTGAATCCTACGAATAATGTAGGTCACCAATTTTAGGAATTACCAGCA -ATTTGCCACTAAAGCCGGGACTAACACGGAGTTAGATCACGGAATCATTCCGTAGATTCG -GTATTTCCGATTCAACCAGAGACTTGTTGGTGATCAAGGTCTCTGTGTCTCCGGAGATAA -CACATGAATCCGTTGCGGCCCACTTGGACAGCTCGGTTGAAGGAACACCTGCACGCTTCG -ATGATCAGACACTATCCGAAATCTCTGATATCAACAAGATTAAGAAAGCGTACAAGCTAG -GGGCTTTGCCTTCGCCAACTGCCAAGGAAAATGATGAGGTTAAGCAAAGACTCGAAAATT -CCTTGCTGAGTGCAATCGCATTGAGGGGATCATGAGCAATTGCCAATATCAAGCCACCAA -ATTCACTGTGCCCCAAATGCCCCAGGTCTGCTATGGGACGAGTGGGGCATCTTGAGCTCC -GAAATGACTTTCAGAAAACACTCATCATCAATCACAAGCAGATCGTATGGTTTTTCTGAG -GTCTATCATCACTTAGCTCCAACCTTGGGTAGAGATCACCTCAGATCTCTTTATCGAGAA -AGTGCACGAAGTCAAAGATCATGATGAATTGAATGAAATATTACAGAGGCGCAGGGCCTG -TATATAGTTCCACGCGTCTGGTCCTGACCAATTGTATGTTGTGATACATGGCAGTAAAAA -TGGTTTATTCAATTTCGGAATTGAATGATCCCATTAATTAGTCTGCATTATAGAAACCTA -CTCAGCTGAGGTTGAATGAAAGCAGTTGGTTGCCCGGCCGAACTATGTAGCCAACTCAAA -ATATTGCCAAGACCGCGGCCGATCCGCCCTTTTAGATCGAAACATCGAACTCGGATCTCC -TAACGCCACTTCTCCAGCATGTCCGACTTGCATACACTTGATACTGCTTTGCCTTCAGTG -GAATTTGCCTAATGTCTGATATAGTAGACTTAAGCAGTACACGCGACTCTCCCTGATCAA -GCAAAACACCCAGAGACGCGATTCGCGACGATGGCCAGCATTGCAGATGTGATCATTTTG -TCATCCTCTCCAAATCCACCACAGTCGCCAGGGCCAGCTAGGCATGATGCGAAGCGGGTC -ACTCACCTCTTGCCACGAGGCGAAACGCCACCCCCAATACCCTCGACAGCCGATTTGTCT -AAACCTCATACGCATTCCCGTTTTTTCCCCAATCCAATTCCGAGCACCAAGTGTCCCGTC -GACGTGACAAGACCCAAAAAACGATCGACAAAGAAGGATCCTACTTCAAACCCCGGCAAC -CAAACTGCATCGAGCAAACCGCGGCGGAAGGCGAAGAAAGCCGCAGATGCGGCAGGAACA -ATTGCGCTTGGGGATGCCCAGTCAGAGACTGTAGGAACGAAAGATGAAGCAGCGAAACCG -ACCAGAGGTCGTACTCGCAAGAATGCCATGAGCAAAGATACAGGAAATATGACTCTGGCT -GGGAAAGTTACGAAGACGAGCGCCAATCCAATAGCAAAGAAATCCAACAAAGGTTGGAAG -CAGACCGTTGCCACAAAAGTATCCCCCTCGAAAGACATGCTTGAGGAACTTACCCTCAAG -GAACCAAATGCTTTGAGAGAGGATGAGGTATTGCATCTGGATGAGGCAATGAGGAGAAGG -ATGGACTGGACACCACCAAGGGATACCGCTTACGAGGAAATTGCTACCGTAAACGATGGT -GACAGCCAGGATAAGGACCGTGACTCGAAACTTGCGGGTGGGTTTGGCAAGCGGCTATCC -GATTACAATTATTCTGGGTCGGCTTTGAATCCCCGCGGCCTTGTGCAGAACGCGACTGGC -GGAGGACCAACAAAGCGTCGACGGATAGAGCTAGTCAATCCTGAGATAAAGGCATTGTCC -AATGGAAGGTACTCTGATTCAAGTGATCAAGCATCTGGGCAAGGAGAAGACATGGCATCG -GGAATGCCCAAAAAGACAATCAAGGGCAAGCCTAAGAAGTTTACCACCTTGACGGCGCGC -ATGACTGCACAATATTCAATAATCGACACAGAAGACGATGAGCTGGTGGTCAATCCTCTT -CCGGACATCAGAAGGGCAAAAGCAAGCCGAAGAAAGACCAAAGACACAGAGAAAGAATCT -TCGTTCACTGTACTTCCCCCGGAAGCAGCTGTCGAATTTCTTAATGATCAAGATCTTGTG -TTTGGTACCTGCAGTCAATTGGAAAGAGATGCTTCGCCGCGGACTTTGCGAGAAACCAAG -CAAGCCATCTGTGCCTCCGAAGGTCTTTCATCTCCAGAAGGGACACACAACAATAACTCT -ACAGCGATCCAAGAATCATCTTCTTGTTTTGTATCAAGGCTGGCAGGAACTAGGAATCTG -TGGTGTGTTGGCGCCAGAGACACCAAAGGATCTTTGATTCAACGAGAAACACTCAACATG -GTTGACTTGACGGGTGGAGGAGAAACCCCCGCAAAGAAGAGTCATAACACGGTGGAAGTG -GAACGAGCAGGACGAGCAGGCCACGAAACTCTTGCGAGCAACTGGTTTGAGCTTGAATTC -GCCGACATAGATTCACCCTCAGAACAGAGATCATCATCGCTACCATTGGCCCACAGGGCC -TTGGGTTCAGATATGCAGGCTCAAGCTCCGGTACCTACAGCCACAGCTACTACCATGCCC -AAGGCTAGGGACAAAGCTGCAAAGCCTGCAACGGGAACCGATGATACACCGACTGAAGCG -GCTAGTTCCCAGCCAACTACCTCGCAGGCGCCTTCCATGCCACAATACTCCGGATTCACA -GACACGGAACTGTCTAAACAGGTTTCCACCTATGGCTTTAAGCCAGTCAGGGGTCGCAAG -AAGATGATTGATCTCCTTCAACAATGCTGGGAATCGAAAAATGGTAGCAATGCTACCTCT -GGCAGCCAACCTACTCACCAACCTAAACAACAGAAAGAAACTGTGTCCAAGATGGACAAT -GTTCCAACCCCCACCATACATGCGAAGCCAAAGGCGACGGTTCAGTCTAGATCTACAACA -TCTACAAAGAATCGAAAATTACTTGACACGACAGAATCACCGTCTATCTCGCAATCCAAT -CTCCAGACAGGCCCGAAGAAAAACTTCAAAGGGAAGCCTATCGCTGAGACCTCTAAGTCA -TTCATTGATGTAGAAGAGATCCAAGACTCAGAAGAAGAAATCATTCTCTCTCCAAGTCAA -GTACAAAGGCACTATACCGACATCTATTCAAATTTTAAGGCAGACAGCCAAGCACACCAC -CACTCTTTGGAACTCTTCACAAAAGCACCATCACCAAGTCCAACCAAGCGGAAGGTTGTT -TCTTCCAAAATTTCAGTCAAAGGACCAGCACCCTCTGCATCCCTCACAGCCACACACTCA -GCAGAATCCTCTAAAGAAATCAATATGGCGGATATATCTGCACAAATCACCCAAGCTATT -CGGGCCCAGCCTCGGTTCTCGCCACTATCATCGTCTCGTGGTAGCCGTATTCGACCGACA -TGGCATGAAAAAATCCTCATGTATGACCCTATCATCCTCGAAGATTTTACCGCATGGCTC -AATGTCGAGGGACTTGGGCTTGTGGGTGAAGATCGCGAGGTTGGCACTGCGTCTGTCCGG -GAATGGTGTGAAAGTAAAGGAATCTGTTGTTGTTGGAAAAGCAATGCCAGCTGGTAAAAT -CGCAAGGGTTATTTGCATTCATTTTTATTTTGGATACCCGGTTCACGAATCGGGATCACA -CTGCACTATACCCACTGACATGTTCTTTTGGAAGAAGATGAAAAAATCTGCTTTTCTCCA -TCTACTGTAAGAAGTCTCTCACATCCTCTACTTGACTAAGTGACAAAAAAGTCCACAGTA -TTCAAAATAAAAGCATTTGTGTACTCAAACAATGGATCTTGTGGCCATGTGCCATTTTCA -TTTTCTTGATCATACATTTCATATTGGACAGAACAGGATAGATGCAAACAGCAAACCAAG -CCCTTGAGACATGATACCTTGTCTAAAATTAGCATGTGCGAAAAAGGCGTCACACTCATA -GCAGACTATGGTGATGATTTCCGGGGTGAAGATCCGGCTGGCAATTGAGTTAGCCCTAAA -TCAAAGAATCAGGACATGGATAATCCAATGTAAAATCAGAATTACAAAAGCGGGAAAAAA -GACATACCTCGTAGTGATTCATAAGATTGGCTCTGTATGAATCATCCAGACAGGCTTGTA -GCTGTGTGTTCTTCTGCTGAAGCTGCGTGACCTGTTCCTTCAGGTTCTCGGTCTGCGCAT -TGAGCTTCTCGATAAGGTTCCGCATTCGCCCAGCCTCACCACGCACCCAGGTCTCACCGG -TGACACGGTCAGTGACTTTGCATGGGATATTGAGGACAAGGCAACAGCCGCAGCCTTCAG -GATCGGGCGTACACTTCATCTTCTTGAGCTAGAAATACAAATGCGTCAGAGAGGTGTTCT -GAGCCAAACCCAACGCGGGAACAGGATATGAATGGGCGTACCTTGCAACGATCACAGGCC -TGGCCGACACGGTGTTGAGCCTTCGTGACCCTGGCACGTACCATGGCCGACCAGTCGGGC -TCGGGGTCGTAGCGGCTGCGGCGAGTCTTCTTGGGTTTCGGGACATTCTCGCTATGAGCG -TCGTGGACGCGCTCGTCGTCGGGGATTGTAAAGTCTGGAGTATTCATGATGAGACTGGAT -GGTCTTCTTTCTGGAAATGATGGTTAAATGGCTTCTCATCTCGTCTTCTTATGCAAAGCA -CCGAAGAAACATATTAAGTCTGTGTTTGAGATCTTGGAGTACTTGCCTTTCATTTTCTAG -GCTACTTCCAGGTAAGAATCACTCTTTCTTGTCTTTTTCTTATCTATCCAGAGAAAACAT -CCGATCCAGGTCGCACTCAGATCACAAAGCCTTTGTCTTTAGTAGACTCCCTTTGTTAGA -CTTTGCTGTCTTAACATGGCGCATAACAAATGACTGGACTGTTTCACGCTGCTTGCTTTG -TTAGACAGGAGAAGTCCTCACTCGGAGTCTCTGGGTCAAGGACAGAGCCATTGTTGTATA -CACGTCCTCATTGTCCTGGAAATTGTCTCGAACTTCTGGCTTTTCTTTTTCTTTGCCTTC -TTCCAATGAGTTCTGCTGATCTTTTTGTTATATTGTCAATGGCTATACGCCTTCTCTAGG -AGAAAACTTGATCCATTCCATTACAAGATGTCTCTATATGGCCCAATGACCCTTGTCATG -ACAATAGAATTTTTTCAAGTGGCTGTGCCCACTCACCAACCCTTTATCAAGCCTAGTGTC -CTCATCAGCTCGATAGGTCTTCAAGATTTTCCCCATGTGGGTTATTTATTCATGGTCTAC -CATTTCCAGTTATGTAGAAAGCAAAATATAAACACATCCTTTATGAATGAGGGTTGTGGA -AATTCAATGGCTCATTGCATGGCTTTAAGTATCATTCGTATTCACGGCATGATAAATTCT -GGTTGGGTGGGAATTTAGCCTTCTATTGTATAGAAGTATCACTTTTTCAAAGGAAATATC -CAATAGGTGACTTGATGTCCAAAAATAGCTGCCCATCGATCTTTAGCTCTTCACAGCCCT -GGGGCCGAGCCACTACTGCATCTTCCTATGTGCTACATCAAACTTGGGAAAAAATTCAAC -AGCTCATTGCAAATGAATGGAGAATGTGAGAGCAATAAATTGAATTTGAGATAAAATAGA -GGAATAAAATATATAGAGAATGGTGTAATGCCAATTTCTCCCAGCTCTGCAAACACACTT -CGATTCCAAGGGGGCAAGAGCTGGGGGCGAAGAGTCGATCCGATTGATCTTACAATCGTA -AGATCAATCGAACCAAAGAGACCATGTAAACAAAGTCAAGCTGAATAGGAACGAGCAATA -AGAAATGAGAAGAAGTGGAAGAAGGATGagaaaagaaaagaaaagaaaagaaaaACGCGC -TTGAATCTATAAGGCCGCTTCCTCATTGTCAGTCTCCAGCTCTAAAGACTGAGCTCGAGG -TCGTTTTCGAGACACAGTAGGTGTGGTAGGAGCAGGATCAGACATGGCAGATGGCGATTC -ACTCGAGCGATCACGATGTTGTTTTTTCGTGTCCAAAAGTCGAAGTTGAGTTCTGCATTG -ATCAGGTGAGTACAATCTAGTACCACCAAGTTCTTTCATCTAAGAAGGATCAAAAGACAC -ATTAGAACGTTTCCTATATTATACGAGCAAAAAGAAGCTCACCTTGTCTGAGAGGAAATG -AAATTTATCTTTTGCCCAGATCCCATGGGCGTGGATTAGGAGTTTGATCTAGTAGAATTG -CGTCTTAGCATGGGTGCTCAACAGGTGTATACTCCAGCTCTTACAGCTTGTGTATACCAT -ATGTTGCAGGAGGTACATACTCACATCTGATTCTTCCCAGCGTACCATTCGCTCTCGGAG -TCGGCGATGGAGACGCATCTGCAAGCGCGCCTCGGAGGCATCTTTATTGAATCTTTCCCG -GAACTCTTGACGCACGACCTTCCAAGCCGTTTGTCTATCACGCAGCTCCTCTACGAATGC -ATCTTCGTCTTCTGCTCGCGGATCACGTCGCCCGCGGCCTGGAGCTCGGACCTTAGGGAG -TATATGTGGAGATGGCTGGGTATTGTGGCGATCTTGCTCCATCCGGAGAAGACCCTCTGG -ATTGGGAGCGATATTGATTGGAGTATTGCTTGGTTGCTCCCGCGAGGGCGCGTCCGAGGA -TGGGTGCCTGGCGCGCTTAGCTGGTGGTGGGCCCCGTGGCGACTGGCTAGGCGTTTGTGC -CACGCTATGGTGGAAATCACTTGGACCCGGCGCGTACAAATAGGCGGAATCGGGTGCTGA -AGACCGGGGGAAATCACTTGGGAACGGTGCAGAGATACCGAGACCCACTGGGGCTGACAT -GGGGTCATTCATCCCATATTGGTATTCTTGTGTGCCGTAGCCATGTGAGTGATGACTGAT -TGCTGGGCTGGGACGAAACGAGCTGTCCGGGAGACTGATTGGTGTCAGGATGCTGGATGG -AGCTATAGGCTCCGTGCAGTACGGGCTGGTGGCTCCGACATACGGGGTCTAGATCGTATA -TGGATCAGCTCATGTTGAAGCATACAGTGTATTGTATCAGAAGTACATATGGACATACCG -GTTGGATAGGCCAATGAGCTGGGATACCTTCATAGTACTCTGACAATTCCTCGGAGGGGG -GGATACCCGAGCGATTAGACATGATGTTGACTGGTTGCACTCAGGCGGAACATAAAACAC -GGGGAAGATTCACGAGTCCACTGCGTTGTCTGAGTGGGGAGAATAAACGCCGAAGCAGCA -GAATATGAAACGAGATGTGAAACTCGATGTAGGGGAAGATGTGGAACTGCAGGCGGCTGG -CAGGCTATGGTGGTTTGCTGAACAGTTAGGGTTGTATGTGTATATCAATAAGGCCTTCCA -CTCCGAGGATGTTGAAGAAAATATTCACATTCCCAACCACCGACGATAGTCGATTTTATA -AGATTCATGCGAGCAAGAAAAGAGGGGAAGATTTAAAAGCAAGAGAGATGCACAATTCCG -CCTGTCCTCCACTTTGAGCGATATCACAGCCAGCACCGGGGATGCCTCGTAAGGCACGGT -GTGAACAATCAGTAGAGGTATGAACCATAGGCTCAGAGAGCACCAAAACAGAGATCAGTA -CCGCCGACGAGCGCTGTGCCGCATTAGAACGTAGGGAAACTCATAAAGACTCGACGAGAG -CCTATATTTAAGCGCTGCGGGCTCAATAGTCCAATGTTGATATCCATATTCCGCCCCCAT -CCCTGGATGGGGGGCTCAGACTCTGCATGGTGAATAGGAATTAAAGTCAGTCCATGACTT -CAGACCCACTACAACGGGTCAAAAAATAAAAAAGAATAAATCAAACAGGAATTACTGCGC -TGCTGAGGGTTGTGGATGATCCTCCACAAAGGAGACAAAGCGTAGCGCTTAGAGCTCTGG -AGAAATCCAGTGGACGGATTTTGGACCCCATTGAAAATTATATTATCCATACTAGGCCCA -TCAATTATATACAGATTTGAACATAGTCAGACATAAACGTCCAACTGCCCCCCCAAATCC -GATATTGGCCCCTCTTCATGAAGATCTCCAGTCAGCCTTGTGCCTCTCTACGCTAATATT -TTATTTTGGCAACCACGagcagaagacagaagaaaagaaagaaaaaaaaaaaagaaaaTC -CCTCAAGGTGACGCTAGATTCCAACACACCGGTTGTGGGAATGGGAGGGATCATGATCCT -TCTCCATGCATTCTCCATGCAATCTCCATGCATTCCCCAGAGGCGTCGCTCAAGACGGGC -GGCTCTTGACTGGCCATAGATGGCCCAAAGCCCTTTGAGGCTGAGAAGACCCCACGCGCC -GTGGCTTTGTCCACGGAGAATTCATGACCGCTGGTTAAGCGCTGATGCAGACACCCGGTA -AACCCCGGTCATCTCGAAGGTGCAGAACAACCTTAGAACATTCTTTGTGATAATTTTTTT -TCCATTTCTAATTTTCTTGCTCCTGTGGGGATCTCAAGCAGGGAAAAAATAGAGAGAGAG -AAAAAAACAGAGAATAATGGGAGGGAACTTTTTTTCTTCTTCGATCTCAATTTTCCACCT -CTCCGGTGATCGTTTGCTTCCGAGGTCACCACTCCGAGCCACATCCTGCGTCTAAGCGCC -CGAGAAAAAGCCAGTGCCTGGACCTTGAATAACACGCTCACATTTTGTGGATCTGCACAT -CGCAGTCGAAAAACATCGGGAGTTGATAGCTCCCATGTATCTTTCTTCTGGGGGACTAAA -CAGTTGAATCTCGGGGCATGTACGGCGTACTCCGTAGAGGCTCAAAACATCGTTCCCAGG -TGCAGTAAACGCTTGTATTTCCTAGATGCATGTCGACTACTGATTACTGACTATTGGCTC -TTTATTTTCTCACTTTCTCGTGTAGAAAATCCTGTCTCTGGCTTATCTTATGTGATAGCT -ACTTGCATATTGATTCACCATACTTTGGGGGCCAAGTGCTCCTTTAGGTATGCCGAGTAT -ACGGGGGCTAGGCTAGTCGAAACCCGTGGGCCCTAAATGTAATACTTTTTCACCCCGAGA -ACGTCCCCATAGACAATCTAGTAGAACTATTTACCGGCCGATTTCCCTACATAGGGTACA -AGAAATCAGGATCCATAGAAGGCTCCTAGATCAAATGAAATCAAGTGGCCCCCATCAACT -TGGAGCTCGTACGGTATATTTCATACATTTTCTCATTGCTTGATTCTATCCAAAGGCCCG -AGAATCAAAAGGTGGGTGGGTATCAAATCCTGGGAGATGACCAGACTATAACAGCAATGG -CAGGTCTTCCCGTGCAATTAACCCATAGCGAGGCCGCGAGGGCTCTGCTTAGCATCCAAA -AACGAACTTGACTGACATATACGTGTCCCACGCCGTGAACGACTCTGCCCATGGCAAATC -ATCACTTAATGAGAATTGTTCACCCGGTGATCATCCGTTTCGTCTTCGTCCCCCGAAAAA -CCATCTAAGCTATCATCATGATGATGTCCCTTGTTGTGACCATTGGTTTCCAGGGTATCG -TCGGCTGCTATGGAAATACCACGAGCGCCCCATCGGTTTTGGTTTGATTGACCATCCACA -CCTTCCTCCTCGTCTCGAAGGTGCTGTTCTTCGTCTGCAGAGCGTCGAGATGTCCGGAAA -TTCATCACACGATTCACCAACCAACTGTTTCCATAGTCCTCGGAGTCAACGACACCACCA -TTGCGAAGCTTGACTCGCTGGCGCGAATAGTATACCCATGCAAAGTCGACATAAAAGGCG -GTCTGTACAATTCCAAACACGACGGCGATCCAATCATAGAAGGGCTCCTTATTGGCGAGG -CGGACTATCCAATTGAGGATGTAGAATGCCCTGTAAGAGCCCAGAGCAAGTAGGTAATAT -GAGTCGATCACCGTTGGGACAGTGGTCTGGCGAAGGAGAAGGAGCTGCGGCAAAACACAC -ACGGATTCAAGAATTATGGAGAAAGCCCAGCAAAACTAAGTGATTGGTTACGTTAAGCTG -CAGAATATGAATACTTTGGGCTTGTCCAACTTACTTCAAGGAACCATGTATTGTAAGCCT -CTTTTTCGAGAAGCCAGATTGAGATAGGTGCCAAAACAGCGGACCCAGCCACAGACCACA -AACCGAGTTTCCAAGCTCGCTCTCTCTCGCGCGTCCGCGGGTATACCTTCATCATCAGGT -ATATCAGGTATGCGGATGAGATTATATAGAAGAGCTTGAAAGAGACATTCCAAATGCCAT -GCCATGTCCCACGCGTGTACGTGTGGGGATCAAGCAAATCAAGATAGCGAAATATGAAGA -CGGCGCCATAAAGAAGCTGTGTGAGGAGTGAGACGCCTAAATTAATTCCACGTGTCAGTC -ATCTTTGTAGTATACCTAGGTAGCTCCAATCAGATTGATACAAGGATGTACGAACCCTCA -GCGCTCTTATTTTTGTGAATAGCCCAAATTAGGACACATATTGACCCTAAGTGGGACAGA -TCTGCAATGATGCGAAAGATATTCCAGGCCACCATGGCTGCACGTGGTCGAAGATTCAAA -CAGTAAGTTGGACGCGGGGTGTGGACACTTGTCAAGCTATCACCATCGAGGTCGGCGGTA -AATATCTGGTGGAATGAGAAGAATCAGATGTCGAGTAAATTGGAAGAGAAATGAGTTCTT -TGTTTTTATTTCGTATATAGGTTTCCCCCTCCAATGAGAATAAGCAGGGAGATGCAAGCT -TGTAAGGATATTGGCGCTGTTGGAGTGACTGGGAGTTCGTTATCGGTGATCCACATGAGC -TCGGGCCGCGTGTCCCTCTCACGTGAGCTTGACGTCAGCGTGTATTGGAAGCTCTGGGGT -AGTTGGCGAAAAAATATGGTGAATACATTTTGTTAGATGACGAAAGTGCTGCATACCTTT -CGGTTCTATCTTGTTGCCTAGTGTAGTTATAGTTGACATGTGCTGTTCAACACCGCTGTT -CCAAGGTGTATATATGCGGGGGGTACTTACAAAGGGTGGAAATTATATAGAAACATATAT -AATTCGATGATTGAACTTCGAGGCGCCAAACATACTGTGAGAGATGTTGCTTTTAGCAAG -CTATATAAACCAAGGCTCACTCAAAACCATCAGAGGATATGATATGAACTAAAGAAACAA -AAGATGCAGGGATGGTTAGGAGCAAATGCCACGAGGACGCAAAAACTTACTCAATAGATA -TCGAGTTATCAAAAGTTGGTTCGAGGCTCTTCTTAAGCTTCTGGAGTAATACCAATGAGC -TGGGATTCCAGTTGAAAGATCAATATCTGTTTATTTGCGCTTGTGGCCGTGGTGATTTCA -GGCCTAGATGCTCCAGGTGATGCAGGCGACGCGGTAAGATTCCCAAAGCAGATGTAATCG -CTGCAGCATGAATAGTACCCAATCATCATTTTATTACCGTTAATATTCCCAGGAGTACGG -AGCAATGATCACTTATTTAGTAAGAACGGGGTTTGGCCGGGTGGTAAGGACTCCATTTCA -TCGTAGAACGGTTGAAACGATATGACGGGATACAAACAGCAAGGAGTTGGTCATTTCAAA -GAGTCCTTTTATTGCATGGGCCTAGTATTCCGGAATATTAGCATCTTCTATGGAAACCCA -TATCTTCTCGGTTGGGTTGATCAAGGAACTTGGTATCCCTAGATTTTAGAGAACCACTTC -CTGTTTCTGAAAACTGGTGATGCAAGTCATAAATACCCCCCCCCAGCAACCCTTGGCAGT -ACCTATTTGCATGGATAATATACTGCACCAGGACACTCAGCACACCTGATCCTTTGCATG -CATCCTCAAACCTAACTGGAGACATTCCACTTGATATCTCCTAGTCAATGGATAATGCCA -GGAAATCCAGTCGACCTGGTCGGCTTGATCATCGACAAACTTGTCCATTATCACCAATTG -GAGATTTGACAGGGGGGATGAACTGTGTGTGACAAAGAAAAACACCGGCAGGATTCTGAG -AGGCTGGGCTTCTTGAATATATACACGTCTTGGATGAACCCCGCGCGGTACTAAAAACCA -AAATGAGATGCTGATAGGGTCCTCCAGACCTTCTACCCCATACCGTGCATGCTCTCTATA -ACGACCTGCATGGGTGCAAAGAGGGGATGATACGCGGTGGCTTCACACGAACCCGTCTTG -TGGCTGATCTAGCCCGCCAAGTGTCGGGTTGGACGGAGACTTCTACTTCTGTCGATTGAA -TGTTGGTGATTGCGGGATGAGTTCCCGGGGCCACTGGATGTCTTTGCAATAGTTAGGCCC -TCGGGCGCGTCGTGAACTCGCATTGGGCGTGTCCTGTCATCCTCGCACGGTGTTGTGAGA -TCTCGCCGGGTCACCGGATTTTGGAACAGGCCACTCGCGGGTCTCTTCGGCCCAAATGTC -TCATGGACTCTGTCGGACGTGGTGATTCTTGCTTCTGCATCTTGATGTTGGTCGAATTGG -ATCACCGGATGAGCCATCAATTGTGACAGGACCCCTGTCTGGGCATCGGTTCCTAAGCCA -GGGAAGAACGGGTTGTGGAATGCACCGGAGGGGAGGAAAGTGGTTGGAGGCGCATTTGAA -TCGACACTGAATTTTAATTTGATTGCATGAATAGTCTACATTTGATATAAGGAGAAAAGG -CCAACAGCAAAGGGATATAAAGAAAGGTAATAGAGGGGTTCTTAGGAAACAGATTAGACT -TGaaaatcaaaacattaaatcagattaaaacagagagatttaaatcagaatcaaaatcaa -aatcaaaGCTATATAGATCTCAACTCGAAATATCTCAAGTGCTATTATTACTTTGTTGTG -ATATGAGGTGACTCCATCCCCGCTCCTTTCCTCACACTTCTCctcttcctcctttctctt -ccctctgtaaccttgtctctcttcctctccctctctttctccttgtccttttttttTGGC -TCAAGTCTTCGGGGTTTTTTTTCTCTCTTGGGGGTTCCTCTCCTCAACCTTACCTCCCCT -CTAAAGTCAACCCGTCCTCCCAGCCTCATGTTCTCCCCGTATTCGGAAGAATTCGAGGTC -CCCGAATCGACGCTTCGACCATGCGATTTTGACCTGTGAGCTTCCCCTTGGCTGCCCCCC -CCAACTACACAGGACTTAAAACACTCCGGTCGGAAGTCTGGACTTCTCTGTGTCGCGATT -TATACCTTTTTATTGTCCCTGTTACTTTTGTCAGTCTGGTTCTGCCTGGGATGCGCCTGT -GTCATCGTCTTTAGTGTAACGCTTCCCGATAGTTGACCAAGGCGCGCCAGTACCAGATCC -TGCTAACCCCCCTACGCGGCCTTTTTGTATGTACAGATGAATCGGCACAGTGTGATTGCA -AACGTGTGTCTTTCACCATGTCGTAAGTCCCTTCGTCTGTGATCTCCGTCCGGCCTCCAG -CTGTGGGAGGGAGACTGGGTTGGAATTCAAGTGCACTTGGTCGCGATCTTACCATCTTGC -TTCATCTCCTATGTTTCTTCTATTCTCTGACTCTTTACCCGATTAGTTCCCCTCCGACAT -CTAAGCGCCTCAAGACGTCGGCGACTACCAGCGCGCCCCCGCATATGCTTGCTCAACAAC -AAATCCATCCATTCCACCGCGTCCCAACCTTTGAAGGCATCCCGATTCCGACTGCTCCTA -TACCTCAACAAAACCCCGGCGCTTCGCGCAAACGTCCACCTTCTCCTACGGGATCGTCAG -CCATGATGGCAGCCTCAGGGAATCCTACGGGTGTAATTGATGACCCCGGTGCCATTCCCA -CGGTGGAGTCTACACCCAAGAAAAAGGGACGTACAAATACCCCATGGACTGCAGAGGAGG -AGCAAAGGCTTAAGACAATGCGCGACGCTGGGCGCAGTTGGAGCGAAATCGCCAAGGTGC -GCGCACGGCTTCTTATAGGTTGAAGCTATGGGTTGCTGACCTGGGTCTCTTTTTTTTAAC -CTGTAGACTTTCCCGACCCGAACCGAGGGTAGTGTCAAGAAGCATTGGTATAAGGTACGC -AATATCAAGCGAATTCAGTAATCCCATGTCTCGTCTTACTGACACCTTACGCAGGACATG -CACTACGCGGAATTTGCCGAAGATGAGGTATGATCCCGATTTTCTTTTAATGTCCGTTGT -CGCATCCGCCTCTGACGTCGCACAGTCTGTGAAGCTTCGCGAGGCGATCAAGGAGTACGA -GGCAAACAAATGGAAGGTGATCGGTCAGAAAGTGGGAAAGCCTGCCAAAGTGAGTATCGG -ACTTTCCTGCATGTCATTGTGCCATCTAATCTCAATATTTTCCGACAGGCTTGCGAACAG -TATGCCAAAGAGCATTTCAAGAACGTCTGACCATGACGCCTCTGATGAAGTGATGATATC -GTTTTCTTAGCATCTTTTACGAATTTGGCATTCTCTCTCCCGGATTCCCTGCAATTCCAG -AGCGATGCAAGTTTCATCTGTGGCCGCTGGAGGTCACTGATTTTACAAGGCAGATGGAGT -TGGTGATCTGGATCTCTTTCAATTTTATTGGTTCTCATTCTATGTGCGCTATGAACCCCT -GCGCCTCCATGTTTTCTTTTTTTTTCTCCCCTTCAAGGCGTCTATTTTTTGACAAGCGCT -GATTCTTTTTGCGCTTTTGTGTCCATCAATACCATATGATTCCTACTTATTCCAGTAGGA -AATTGCATTAGCAGAAGAGCAGAAGGAATTGTACTTTGTAAAGGTGTATACCATGGATCT -CTCCAGGTAAGGTGGAGGCGGAAAAAGGCCAAAGCGGGCGGATCTTATTTTTTGCCCCCG -GCAATTGCCCCCCCCCCCATTTTTCCCCCACTGGAAATTGGATGCTTTTCACAAGTGCAT -TGTTACTTGGTTTTTGAATCGAACCTCATATTACAAGGTATACCGAACCCTGTGATTACC -TTGATGTTACCAAGTTCGCTGACGCTACGGTGTTCATAGTGACTCGTCTCGGAAGGTAAC -ACAACCTCACGGCTACCATGAAGACGAGTAGCGTGGTATTCTCTCACGATCAAGGTATCT -AGGACAAACCTCCTGCATGAAGACCGGTGTTCGACAATTGTCGTTCTGAGAATATACGGT -TAGACTTGATCTGGGTAATTCCAGCGAAAGGACTCAATGCGCTTTTTCTCTCCTCAATTA -GGTGTGATACTAACAGAGACGCAGCATGGCACCGGAAAATGATATCACTGCCCTAGAGGT -ACACCCCAACAAGAAAGACCTGACAACTGAAACTCCTGGTGTGGCCAACAAGGATGAGGG -AATCTACGACGAGACTACGACCAATCGCCGCAATCCCGTTGGATCACGATTAGGCTCCGT -CATCAAAGCCTTTGAGAATCAGCTCATCGAGTATAACTTGGAAGCGCGGGGGGTTGAACG -AGTTGCTCCCGATGAGCGCATGAAGCGCAACACGTGGATGTCCTATCTACAAGTGTTCCT -ACTTTGGATCTCTATCAACCTTGCACCCAATAACATTACCCTCGGGATGTTGGGGCCTGC -CGTGTATGGTCTCAGCTTCCGGGACACGGCGCTTTGCGCTGTCTTTGGCGCCTGTGTTGG -TTCCATCGTCTCCTCTTGGATGGCTACATGGGGGCCCGCTTCCGGGGTTCGGACATTGGT -GCGTACTACATATGGAAGCTGTTGGATTATTGTTTGATAGAGAATGCTAATTGGTGACCA -GGCCTTTGGAAGATATTCAATGGGCTGGTGGCCCAGCAAGATTATTGTTCTATTGAACCT -TGTTCAGATGATTGGATACGGCTTGATCGATTGCGTCGTTGGCGGACAAATCTTGTCTGC -TGTCTCCCCTGGCGGCATGTCTGTAGCTGTCGGTAAGTGATTCGAGAACCAGAGAACTCT -ACTCATTAACAGTGGTACTAATCCTGATGTTTCAGGCATTGTGATCATCGCGGTTCTCAG -CTGGGCTGTGGCCACATTCGGCATCCAAGTCTTCCACTACTATGAGCGGTTAGTTTGACA -GCTTATACCACGAGGAACAGTGTCACTGACTGGTCTCAAATTAGATTCGCTTTTTTGCCC -CAAGTAATTGTGGTCTTCATTCTTTTTGGGGTATCATCTGTGAAGTTTGACTTGTCAACC -ACCTCTGTCGGTGATCCCAGGACATTAGCTGGTAATAGGTGAGTCTGCACCTTAGACCTT -ATCATCAAGTCCCCAATCCTTTCTTACATGCTTCCAGGCTATCCTTCTTCTCAATCTGCC -TGAGCGCCGCAGTCACCTACGCCCCACTCGCGGCCGACTTCTTCGTCTACTACCCCGAAA -ATACCTCTTCCGTGAAGCTATTCAGCCTAAGCCTGGCAGGCCTCCTCGTCTCCTTCACCA -TGGCTCTAGTCTGCGGTGCTGGGTTGGCCTCGGGCATTCCTACCCACCCAGAGTACTCTG -CCGCCTACGAGCAAGGCCAAGGAGCACTCATCGTCGAGGGATTCGGCCCGCTACACGGCT -TCGGCAAGTTCTGCTCTGTGATCTGCGCCTTGGGCCTGATCGCTAATACCGTCGCACCCA -CGTACTCCGCCGGAATCGACTTCCAGATCCTCGGTCGCTACGCCGAGGCAGTACCGCGCG -TCATCTGGAATACCATTGCCGTCATTATCTATACCGTCTGCGCGCTAGTCGGCCGCAGCA -ACCTCTCTGAGATCTTCACCAATTTCCTCGCTTTGATGGGCTACTGGGTTGTCATTTGGA -TTGCCATTGTCCTCGAGGAGCGGTTCATCTTCCGTCTTCGCACAGGGTACAACTGGGCGA -TCTGGCGGGATCCGTCAAAGCTGCCGATTGGAATTGCGGCTTTCGCTGCCTTTATTATTG -GCTGGGTTGGCGCCGTTTTGTGTATGGCGCAGGTGTGGTACATCGGACCCATTGCCAAAT -TGGTCGGCGAGTATGGTGCTGATGTAAGTTGTCCCACTTCCTACAATGTGTGTCTTTTGA -TCCTAACTTATCGCTCGCTAGATGGGAATCTACGTCGGGTTCTGTTGGACTGCCCTCTTA -TACCCTCCTCTCAGATTCATCGAACTTCGCTTCGTGGGTCGCTAGGCATTGGCCTAAAAT -AATGTCCTGTCCAAACATTTGAGTGATCTTGGTTCTTCTTGAGCGAGGCTTTGCGCAGTG -TTTAATTATCTACTTCCCGGGGTATTGAGGCTGAGATCTACTAACCCACCGCATTTTCCC -AGATACATGAGATTAAAAGTGATATAATGGATAGACTTTCCTTCGGCACCACGTGCTCGG -TGATGGCCGAGAGCCGAAGTGCCGCCAATTAATTCATCTAAAAAAACCGCGTGATTTTCT -TTTGATACCGGTACTCCTAACATCAATATCAAAGGTCTAGCAACAGGACCTGTAGCGATA -TCCCATCACGTAGTAATTACGAACCAGAAAAATTATAAGTCAAAAGTAGAAACCCCACCA -ACACTCTCATTTCATCTACTAGAGCACTATAGTACAAAagagacagaagagtcacgtaga -cagaacagagacagacagacaCGCAGCATAATGTTCACACTGTATTGGGGTTTTCAAGCA -ATAACCAAAGAAAACCGCCGCGCAAGCAGAAAATCGGCATTAAAGATTTAGCGGTACCAG -TAGTCGGGCTTAACGTAGTCGGCAAAGGTCTTGGGCTCGACGTGAGGGGGGTGGTGGTGA -CCGTGGTCGTGGTCGTCTGATAGAGAGAATGTTAGCATGACCAGTGGACTATCATATACT -ACGGGCCAAAAAGAAAAAGAGCGGGAAACATACGGGAGTGGGGGTCCTCCTCGGGTCCGA -CGTAGTTCAGCTTGACCACGTTGCCGCACTCAAGGCAGCGCTCGAGGGGGCGCTCGCGGG -AAACCTAAAAGGCAGATAGTCAGTCTCTAGAGCTGGAGGTATCGACACCGTTGAGCTTGA -ATGGGGTGATCATACAGTCAGCCAGTTGACCTGGTGAGAGTCTACGGGGAAACCGGTGCA -ACCGGCGTACTGCTCATCACCGGCGCCGTTGACGATGATGGGGTCTTCGAGTGTGCCTGT -AAAATTCCACCGATTTGGTTAGCCAATGTCTCGGTCTTCATCCGAAAGTAACGATTATGA -GATTCCGCTGATATTTGCTCATCCATTCATGATTTGCAATGGAAGGTTCAAGGATAGGGG -TCATACCCTTGCGGGAGGCATCAAGGGGACGCATGTCGAAGATGTCAATGCCCTGCATCT -TTCCGATGAGTTCCAGACGCTCAAGACCGGTGGCCTGCTCGACGTCGGTGGGAATGGTAC -CGGGCTTGGCACCGGGAGCAACGAGGTCATCCTCGACCTTGATCTCTATATGTAGACAAA -ATAAATAAATCAGTATTGTGTTTTGTAGTTATACACTTCATGTATAATGACGAGCCTGGC -CTGGCCCGTGGCTCGGGAAAAGTATCCGCATAACCAGCACCCGAAGTTTCTGGCGTCCAA -TTGCACGCAGATTCTCGAGTGGGCCATCGTAGGAATACCTCTTATACGGCAAAGTCCAGA -TGAAAAGCAATTGACCGCTATAGGGTCAGGGAAAGCACTCACCCTCGAAGGTCAGGATCT -TGCCCTCCTGCTTGGGAGTCCACTTCTTCTCATTGGCTAAAAGAGGTTCGCGTTAGCTGG -TAATCATAATAGGTATTTGATATCAACTGATTGTAAATGATCGTTTGTGGTTGGTGTGCG -CGGGTTATTGGAGCAGTTGTGCGTAGACCGGATCAGGCCATGTGAGAAGTCACGTACAGC -GAATGATGGAGGAAGAGAAAGGGCGCTGGGCGAGGGTGAAAGCGCGGGCAGGCGAGCGCC -TTGCGAGAGCGGAGGCTGTACGTTGGAGGAACATTGTCCTGGAGTTAATGGGAAGAATGA -CGCGGGGCGTTTGGGGCAGGAGAGGGAGTTTGGAGGTTTGAGGTTGGGCGAGTTTGAAGC -CGCTTTGGCCCGAGCGCCTGTGATTGGTTGATAGGCCGTCCAATATGACCACGACTAAAG -ATGAGTCAGCCTTTTTTTGATTGGCGGGAGCTCTCCACGGGCGGTCTGTTCATCTAGCGG -CAGAATGTCCGTTTTTGGAATATATAGGGCTTCTCTGTGTAGGGCATGGGGTGGCATCCA -TTGATAATGTCCTAGGCCAACACCATAAGTCCCAGTATAATAAATAAGTTCTTCTAAACT -AGTAGAATAAAAACTCCAATTTCATCCAAGCTCTTTGGCTCTCCGCTGACACGTCGCTGG -TGGTATACCAGGCCCAAGCTTCCGGGAAATGTGCGTCCATTTGTTGTACTCATACTCCTC -CAGCGCTTTTCTCAGCTTCTGATCCTATCCAGACTTAGCACAGTCCCGATTTTATTAGTT -GTGTGGGGATGATATGTACATACCAAAGCATCATCCCAAACCACAACCCTTTCCCTCAAT -TTGGTACAGTAACGGACTTGCAATGTTTCTTGTTTTCGCCCCGGAAAGAACCATGCAATA -TCCTTCCAGGAGAGCTGAGACTCCTCCTTTAGTTTGACCAACAACGCATCTTCCTCAGGA -CCGAATCTGGTCTTTGGTACTCTGGGCGCAGGGAAAGGTTTAGGCATGCCTGGACGGGTC -AAGTGACTACGGTCGATTTGCGGCAAGTATGATCCCGCTCCAGAGCCCTGACCTGTAGCT -GCAAATGATTGGGGTGGTAGCCGATGGTGGTGATGCTGTTGCGGAAGGGAACTTTGGAGG -TAGTCCTGGCTCGATGTCTGGTGAGAAGAGGAAGCGCCGACATGATCAGCGTACGTCAAG -TCGGTCAATTTACCTACTGGAACCGACGGGTCACCCATGGGTAGATTTTGCAAGATAGTT -CGATACCCATAGTTGGGATCAGTGCCCTCGCTGGCGTCAGGCTCATTATCGATACGTCGT -CGCTTTCTGCTACCCTGGATTGGAGAGAGTCTATATTCGTGTGTATGGTAATGCGAGAGC -TGCCGCTGTAGCGGGTGGTCCGTGATAAAAGCCGGTGGAGACTCATAGGCTGGCTGGGTT -GTATCGGAATAGCGAAATCCAGGCCGGTCTTGGAAAGGCAACGGAGGTGCGCCGTAGGAA -ATGATATCCTCTCGGTTCGCATCTTTACCGCCGTCATGAGGACTGTAGTTAACCATGGAA -CCCTGATAAGGGTCCGCTGGTGAAGCAGACATATGTACAAAATATACCAAGGCCTGCAGC -CTCGGCAAGTGGCTTGAATGACTGCTAGGAGATGAACGATCCAAGGTGTGTTTATTGCTT -AGGTATCTCTCTGGAATTGAAAGGTAACCAAAAAAAAGCCTCTGCTGTTGCGTAGGCGGA -CGATGTGAAGAGAAACTTGCCTGTATATGTAATCGCAACAAAGAAGAGGAAGGCCTCAAC -GCGTTGACGTGCAGAAGAGAATAGATAGGGTTACTTTGGGTCAAATTGTAGGCAGGCCTT -GCAGAAGAATCGGTCAGTCATAATGCATAGGATGAGATAGCACAGATCTCCTAGACGGGT -GAACGGGAAATGAAGCGATGGAGACATCTATATCATCTGGAGAGCCCCCTGAGAGAAAAT -CCGAACCTGTGCCGGTGACCAACTTGGAGCGTGCGTTGCCGGGCTGATTCTATTAGTCTC -CAGTGGACCAGACTGCAAGATGATACTGAATCACTGTCAAGTTATTCGGGATAGGCAGGT -TTTATCCCATCTTTCCGGCCTCCATAGAAGTTTTTCCCGCTTTGCTTTCGCACAGACTCC -CGACTCCCAACTCCCAACGTCAAGGGGAGGGGTTtaataataaataataagaaataataa -aaaCCTTGGAAACGGTAGAACCTCCTCCAAAATTATTGTTGGAAAAGCGCCCATTAATGG -GGATAGACATCAAACGCAAAACATAGGACCAGGGTAAGCCCAGGGTCAAGATCAAAAGCT -ATGACAAAAAAAAAAGATATCGCGAACAGGTGAAGCCTGTCAGACTCTGGGGACATCGCA -AACTAAACAGCTAATTCGTCGGACCGGTCTTCCATGTCGTCTTGCTGTAAAAGAGGCGGA -CATATCGTATCGGCTGATTGATACAGTATAGGTTAGTCAAGTAAGCGATGGCAAACTATT -CTATTGCACCGGGTTTCTTCGGAGCCAAGGCCTGCGGGAGTCACAGATATCCAATAGAAC -AGAACAAGATAACAGGGGGTTGAAACTGAAAACAATACTGGCGTTTGGACTTGGGTTCTA -TGTTGGAAGAAAGAGCTAACAATCAGCGAAAGAGATATTGGGGTCAAGTTGGTAGTCACA -TCGAAGTCATACCGAATTTTGAGCCACACTATCACACTACCCTTTGCCCGAATTTGTCGA -ATAATACAGATGTGATTCGGATGGACTGCCGAAATATACTAGCTGCATGCGGGCCGTCAT -CTGTCATAAAGGACCTGATGCATCAAAGCGGAGATCTTGGACATTTTAATATTGCGCAGG -TATGTTTTGTTTTCTATATTTCACCTCCGGATGTTCGGCTGATATCTAAAGTTTTATTGG -CCTATACAGCTTTCACATCGCGGCAATCTTCAAGTGACGATATCAACTAGTGCCACATTC -CGAAGCCAGCCGCATCCTCGTATACTTACAGGAGAGTTATTGGGAGAGAAAAGATTCAGC -GACAGAAAGTGTATTCGCAAAAGGAAACTTGTATTGTACAAAACACGATCCACGGCTTTC -CCCTAAGATTGTTCGCTGCAATACATGAATTAAGGATGTCGTGATACAAGAACCCAGATA -TAAAAAAAAAGGGAGGAATAAGACAACGCTGGGAAAAAGAAAGAAAAGACTTTGCAACGC -CGGATCAGCTGCGCATGTTTATGCAAAGTTGTCAGACCGGTCAACCCAAATATCTTCAAG -CTCGGCGAAGTTTTGGAAGTGCAGAGGGCGGTGTTGGTCATCGTAGAACTTAAGCGGGGT -GTCGATCTTGACGTAGATAGGTCTCTTTTCCACGCTACCAAATGAGCTACCCATGAAGCT -ATAGTTCCAGACGTTATCCTCGGGAACAAGGAAATAACCGCGGATCTTGTCACTGAGGAG -CAACTGACACTTCTCACCCATGTTGGTGGAGAAGCCTTGGGGCTGATCTGAAGTTGTGTC -CTTGTTCTCTGCGCCCCACTTGTAGCCCGCTGGAGTGAGACCCCAGGCGGCAAGTGAAAC -CGATCCAGGAGTGAAAGAGACGGTCATGGTCACGGTCTTCTTATCCCAGGAAGGATGGGC -GTTCATAAGGCGTGCGTGATGTGTGACATCCGCCGCAGACATGTACGGCGGCTCATTGCC -AGAAACCGTATGGATGATACCGAGTGGCTCCAAATTTTGGAGATAGTCGTGCTGCGGCAA -CTGGTGTGGCAGTTGAACGTCCCGGGTGTTACCCACTTGTGGAATCATGACAATGGTGCG -GACCTCCTTGACCTGGTCATTGTCTGGAGGAGAGCTACCGTAGATGTAACCTGTGACCTG -CACTCGAAGATCGGCAATGGTGATAAATCGCTTGAGAACGTTCTTAGGCATGATGTAGGT -GTAGCCGTCATCCCGGATGTCGTCAGAAGAGACATAAATGTTGTTGGCCCTCGTCCGGAG -GTTGGAGGTCGCGATTGCACGAGTACGCCACTCTGTCTTGGATGCAAATGATTGCTGTTC -GTACTGAGACGTAGTGGTGACGATGATGTCCTCACCACGAGCGTTCTGAGTCTTGGTCGT -CACGGCAGTGAGCTGCTTTTGCTCTTCTTGCTGCTTCTCAATCTCCGCCGCCTGCTGTCG -CTGCAAAGAAGGCGCACTAATCTCCATACCAAGAATAATATCACGAACTTCGCTACTGGT -CAGACTCTGCACGTTGACGTTGTTCTTCTTGCCATAATCGTTTAAGATCAGATCACGCAA -TTGCACTTCAACCTTGATCCAGTCCTCATCACTTAGGGATGGCCAGATGTGGTGTTCTTG -AGTGATGACGGTCTTATCGGGGCGTAGAATGATCTTGGCTTTATCGATATTGACGTGCAG -AGCTCGAAGGATGAGGATAAGTCGAGAAAAGGCAGTGTACGGAGAGATAGACTTGAGCCA -CTCATCATACAAGTTGAAAAGTACCATCTGAGGCTCGGTCGCACGGAGAATCATATCGGC -CAGTTTCTCTACCTTCATGGCAGCCTGGAAAGGCAGCTGGAGCTCAGAGGCACGAATGGA -AATGTTAGGGAAATCGAGCAAGTGGACCTCGAGAGGGTCAAGAAGACCCTTACGGGTGAC -AATGAGCTGCTTCGGTTGCTCTTCCACGGGGAGAGACCGAATGAGAGCGGCAACTTCTTC -GGCTGTTTTCCACTTGGCCAACTGACCCAGACGCTTTTGGCCTGCCCAAACACTGGTGTG -GATGATCTTCAGGAACAATTGTCCAGTCCGAGGATTAAAGATAAAGATTGCACCGTTGAT -GGGTTTTGTGGTCAAGTTACCCTCGAACGTCTTGTGGATCGTGACACGGTAGACGTTAGT -GTCATCAATAAAGAGCTGAATCTGCGGGCTGAAGAGCTCAGAGTAGTTCTGAGAATTGAG -GAACTCCTGGCTGCTCTCCGAGGCATACAGCTGGAGACCCTTGCGTATACGCTCACGCAG -GACATACAAAGCTGGGTTAGCTTTCATGATTTTAGCCATGGCTTGTTGAACTAGAGTCTT -GAGGCCTGGGAAGTACTGTCCATAAGCTGAGAAGAGGTTATATGCAAGATCAATGCCAAT -CATCAACCCAGTGGCGGAAGGATAGATACTCATGCTGTCCGTAGTGTAGTCGAGGTACTT -AGCACGAACGTATCGCTCGATGTCGTGAGAATCGTAGTCACCATATCTGAGCTGCACATC -GATCCAGAACTTATTTGTGGTAGTCGGCTCGTAAACGTCCTTAGTATCGAAGACAAGTGA -AGGTCGGGTAACATTCCATTTGTTCGTCGCGAAGAGCAGGATATCCGCACAAGAGCTGTT -CATCTTGTAAGATTTACGGGGGTGAATAGTCTCCTTTTGAACGGCCTCGATGCCAAGCTG -CTCCAGTTCCTGATCCAGAACTTGACACAAATCCATAACGACCGATTCGTGGATCTTCTG -CCACAGATGGGCACGGAAAATCTGAATCAACGAAATTTTCAGGGTTGGAATCTTGCCGTG -CAAGAAGATACCAGTAAGATCAAGTTGCACCTGGAAACCAACGTAGACGTTGGCGCGGTT -GATGGTCGGCGACCACCAGAGAGTAAATCGACGGTTAGGGATCTGATTCAGACCGGACCG -CTGAGCGTTGGTGAGCTTCTTGAACTTCATGCTTTCCTCGAAACCGCTGTTGTGCAAGAC -AAGGTAGTCATGGCGCAGATAGAGCTGATCTTTGTCGACTCGGAAACCAGCCCACTCGGT -TTCCTCAGATTCGAGACTAATGTGCTCGATCTTGAAACTGTGCATCTGAGGAGCTGAATT -CTCGACGCCTTTCTCCACTCCAGATGTAAAGGCGCTATGCAGTCCTCTTTCCTCGGCAGA -AAGGGCAGCAAACTCGGCAGCAGTCATTTCTACCGTTTGTCTTTCCGAGGATTGGGAATC -ATTGGAGGCCACGTCGTCTCTATAAAGCACTAGAATATGATTCGGCGTCACCACAAGATC -TTCTTTCTCCCCGCCGATCTTGATGCGGTACAGGCGGTCGATGCCATTCACTATGTTAAA -TGCACGGCGAGGCTCTCCGTCGGGACCCAGAAGTTGGTCACCTTCGCGCACGTCCTCGAC -ATTGACCTCAGTCCCATCGTATCGCAACAGACGGGTTCCCTTGGCGAGACACGCTTTTTC -CCAGAACAGACCTTCCCATGATGGGAAGGCTGTGGCCTTGAACAAGGTGTGCTCAAGGAT -TGTTTCGACACCACCCAATGCTTGGATAACATCAGTGCGGTAGGCATTGAGGTTCCAGAG -TTTACCATCGTGTCGCTGGCTAGTCCACCAGAATGGATTACTCTTCATCAGCTGGTATTG -CTTGAATTCGGCGCGGAGTCGGAAGCCCTTGTCGAAGCTGAGGGTGCTTCGATCCTTCTG -GAAGAGCGTGTTGATACGGGGAAGACCGCGATCCCAGCTGTCCTCCAAATCCTCTAATGT -TAAGCGGCGGTTTTGCTGCTGAGCTTCCATACGCTTCTGCGAGTATTCCATCCAAACACG -CTGAGAGTCAATGAACTCTGCTTCCCAAGGAATGATGTACCGGAAAATGTTTGGGATCAA -TGTTTCCTCATCATGAGACATACCCGCTCGGAAGTGGGTGATGCCAGTGTCAGTCTGCTT -GGACCAGCGCTTATCACTGGTGGGGATGAGGATATGAGATCCCGAAATCATACCAAGACC -ACCAAGCTCCTTGGGGGTGTAGAAGACCGCCGGAGGGAAACGAGAAGGCATCTTAGAATT -GAGACCAATCTTGACTCGAGTTTGAATCTTTGTTTCACATTTCACGATGGTATCAAGAAG -ATTAACAGTCGACACTGCGGCCTCGCGGTAGTAAGTGAAGAGAGCAATCAGAGCAGTGTT -CCACTTGTTCGCAATCTTCGTGAATGTAGTCGAACCAGAAGACATGAGAATTTGTCGAAT -ACGGTTGTTGAACTTTTGAACGTCCTCTTCAGTAACCTGGAGGAAGGCATGTGCGGTGCG -TTCCTTGGTGGAGTTGTCAACCAGAGACCAGACACTGTCTTTGACCGAGAACTCCTCGTT -CATGTTCCGCATCTTAGGGAGAATTCGAACTTCAAATCCATTCATCGAGAAGAGCAAGTT -GGGGTTGTCTTTGCTGTACACGCTGGCGAAGGTATCGTCCCAGTCAATGGTGGTCATGGA -CCTGGGGAGGCGGTTCTTCATGTCCCAGAAAACAGCACGGCCAAGGTTGACATCATGGCG -CATCAAGCGCATACGACAATCGCGAGGCCAGCACTTCTTATTCTTGTAACCAATGACGTT -CTCGAAATTAGGGTCAGGATTTTCGGTTAGGAACCGTTGAATCAGATCGCGGGAGTCATC -GGCCTGGAAGCGGAAGAAGACCCAAATCTTGTCGACATAACGGGTATACAAACGGATGGG -GTGTCTTGTCTCAGTGGCACGATCTCTGAATTGCAAAAAGTCGTTGGGACTTCCGGGAGG -GCCAGCCATTTCACTCGCTCTCTGCAAGCCAAGAAGCAACAAATCGATCATCAACCCGTA -ATACTGGAACACGAAGCCCGAGAATTGAAGACCGCGAATCAGTCCATAGCTGTTGGTATG -ATTCATGTCTTTGTAGCTGAGCTGGACATTGTTCTTCGCGGTGATGTAGTCAGCCAGATT -GTGGTCCATGATCAAACGGAGCAGCCGGTTCAGCAGAGTTAGATCAATCTTCTCATAGAC -CTTGGACAGCTCAGTCTCGATCATCACGTTCGTCTCGCCTTCCGCAGTCTCCCAGACATT -GGACAGGTTGTTGATGCCTTGTGTCCACTTGTATGTGAGTAGAGGAGGAACCTCAGAGTC -GGAGGGCTTGATCCAAGCGGGGAAAAGATGGCGCTGCTCCGCTTGGTACCAAAGGTACTG -ATCCAGATAAGCGTCGGTAATTTTTTCGATGGGCTCGATATCGTAAACAGGATTGATGTT -GTTGTAGTTGTCATTCATGTCAATCTTCACCTCCTTGAACGCTCTCTGTGTAAGGAGGAA -CCGCTTGATTCTCGCCAATGTTGTACCAGGGGAATCGTATGCTTGTTCGATAAGAGCCAA -TTCCTCTCGCTGGGTTTGGTTCAGTCTACCCTTCACTGAGTATGATTCGCGCAGCCGCTC -GAGCGCAAGAATCAGGATTTTGGTGTCGTGCTTGTAAGAAACACTGGGGAAGGGAATTGG -GGAGAACTTGCGAGACTCCAACCAGTGTACCATGGTGGTGTAAATTGCAACACCCTCTTC -AGACGAAACGTAAGGGCCGTCCTTCATATAATTGTGCTGACGCTCTTGCTCGGACTTGAG -CCATAGACGCGTGAGTCGACCAAGGTTCTTCTTTGCGACAGTCTTGTCAACTGTGGCACC -ACGGCGAATTCTCTCCCGGTTGTAATGAGCGACAGAGATCCACCAGTCGGCCTTGCTTTT -GACGTAACGAAGAATGATGTTTTCAATTGGTGCGGGCAGGCCGGGTACCTTCCATGGAAT -GTTACTCTTCCAACATCTCCAAGCTTCAGATAAATGCTGCAAAACCACGTTGACTTTGTT -TTGTTTGATACCCTCCGGCATCATGTCCATAAGATCGGCCATAACCGAGGCACGCAACTC -AAGATCGAAATGAGATTCCACACGCTGTTTGGTGACCGTTTTAGCAACACCCTTGCTATG -ACGACCCTCAAACTGACGAGAAAGCAGGTTCCCAAGCCAGCGCTCAAGCAGGGGAATGAT -ACCACGCATGAAGAAAAGCCAAACTCGCCAAGCGGGCGCCCAGAATCCACATCCAGGGCC -CTTACCAACGGGGCCAGAGTTGAAGCGGTAATAAATCAAATGCTTTAGATCCTTGCAGGA -ACGGATCTGGTGCATGAGCTTATACTTGTACCGATACATACCGGTAAGCTGGCCCACGTG -GTTGAAGGCATAGAGAATACCATCTGCGAGTTGGAAGGCATCGATGTTACCGAGACGGTA -TTGGACTTGGGCATCGACGATGAGCTTGGTCAACCGGAGAATTTCACGCATCAAGTGGAA -AGCATTTCCAAAGCGAGACTTCTTACGCTCCTTGGTGGTCAATGTCTTAACAGGCTTCAA -GTTGAAGTTGTAATCAAGATGAAGATAAGTCAAGTTCTTGCGGTGAATCAGCAGATTCAG -CATATTGAAACCCTGGCGACAAACTTGCAAACCGGCCTCAACCCAGTCGATTGTTGTTTG -CTGGAAGAACTTGGTCGTTTTGAGAGTCTTCATGAGATTCTGCTTGCTGTGGGCCTTAGG -TGCACCCTTGTGCAACTCATTGAGAACATATGTCTTGAGCATCTTCTGGTATGAGACTCG -GACTTTGACAGGCTGACCCTGTGGACAATGCTCCAAGTACCATTGCTTGACCAACGGCAC -ATCCTGCGCGCGAACCATCTTGCCAGATCGCTTATCGAATGGGTGAGGGGCCCACCACAG -GGCGATGGCGGACGCGGTGTCGGATGTGTAAAGTTCTTCGTCGTCAAAAAAGGGCTGAAC -CTCACCCGGAAGCTCAAATTCGTCGTCTTCATTATTGCCAGGCCCGAAGATTTCGTCTTC -GTGGCTGACGGTGATATTTTTGGGCGCGACGGAGCGGGAGGAGATCGGGTTGATGACAGG -ATCGAAATAGAAGGCCGGCAGGTTGGGATCCTCAGTGCGGACATAGACCACCTGGGGATG -GGAGTACCAAGCGACCTTGACGCTTCGAGGTAGGGTATTGTAGAGGAAAGGGAAGGTGAC -CCGGTACTCAGTTCGAATCGGGGCACGGAAAATAATGCGGTCAATGGCATTGAACTCGCT -GAAGTCTTCGTCATTAGGATCGATATCTTTGTACAAAGGCTCAAAACGGGGTCCACCAGG -AATGGCGACGTTGAGGGCCTTGGCTGTGAAGAAGCTGTTCAAGTCGAACATGTGATAGTA -ATTCTGGTCAACAACATCACTCAGAATCTGGTGACTGAGCCGATGGAGAGTGGCCATCTG -GGGAAGCGACAAATTCCACTTCTTGTAGCTCGGGCCATTCACATGAGGGGTATCAAGAAG -AGGACGGTGGTCGTAGAACCACTCATAAACTGCGGCATCTTCTTCTTCATCCAGTTCCAT -CTGGATGGGCTCCAACGGTTCGACATCTTCAATGTTTTCTGACCACGAAAGAGGCGGCTC -TTCGTCATCGAATGGGGGGAAACGCATTCGCTTGAAATGTCTTCGATCACTCTTCTCTCG -ACGCATGCACATCCACATCGTTGCCCATTGAGCATGGAAGACAGGCTCGATGACACGAGG -TGTCTCATTGACTAAAGTCAGACAGCCGTTGACGTGATAAAGCACCTTGACTTCTCTTGC -TGACTCCCAAGGCATCGGCATGTTTTCCAACAGCTTCAGGACGGCATGAGGCATGAACTT -GAGCGCACCGAGGTAGCTTCGTTTTTCGTTGGTGAATTTCTTCTGTGAAACATCGCCAAT -ATCGCGCACAATCTTCCGTAGGTGCTCTGGTGGCATATCGGCCTTTTGTGTCTCAACAAA -TCCTGCTTTGCGCTTCTCGCCAAACCGATTACGTTGCGTGCGCAGCCATTCATTTTTCTT -CTGCGCAAATTTCGCGACATTCGGGTCGGCCGGCGGCTGGTATCCCGGGGGCGGCGGCGC -CATTGGCATAGAAGGAGGAGCTGCACCCCATCCTGGAGGGGGAGGAGGGGGAGGGGGCAA -TGATGCCATGATGGTGATAGATGGATGGAACCCTCCCTCGTCGAGGTTGTGTTGGTTCCA -AGTCGCGTCAAGCCAGGTTGAAGGTCATGTGTAGCAAAACTTGTGGCTAGTAAAAAAGGA -ATATAACTTCAAGAAAAGAGTTGGTGATGGGAAGATGAAAGATTAAGATGCAAACGCGAC -AAAGATAGAGATCCACGATCCCCTACGCGATGTGACTATCTCTGCTCCTGTCTCCGTTGA -ACTTTGAGAAGTGGCGGCTCCGACTGGCACTATTCCGCATTGGGACTAGCGCAACCTCGC -CAAATCTGCGCTAGCGTCGGCTGAAGCAGAAACCCACACAAACCCATCTCGACATATTTC -ACTCTGCTTCACGACAACTCCGACAACTGCCGCCCTTGACCAGTTCAAGTACAGTCAAGA -TGACCAACAACAGACTGCAATACGTGAGTTTCATCAGAAAATCCTGATCGGAAATTATTG -ATCTGACTCTTCTCGATTTTCAGCGCCGCCGGAACCCGTACGTTCAAAATTCTCTCAAAA -GACCAATCCCTCGCACCACCATGATCTATACGATGATTTTAGTAGCGGGATGGCTATCAA -ATTTTCGAGCGATTTATTGACGGGTCTTTCTTTTGTTTCAATAGGTACAACACGCGGTCC -AACAAGGTTCGCATCATCAAGACCCCCGGCGGCGAGCTCCGTTACCTTCACCTGAAGAAG -AAGGGCACTGCTCCCAAGTGCGGTGACTGCGGCATCAAGCTCCCCGGTGTGAGTACTCCC -CCGGCGTTCCTGATGCATTTTTGAACGGTTGTGTGGGATATGTGCGATTGTGTGTGGGCC -AAATGAACGACTTGAAAAAAAAATCTGTCTGGATGGACGGATATACAGACGAATGGCCAA -CCGAAACGCTACAATTTCCATACGACCGTCTCGCCGAATTTTTGAAGTTGGCTCGACTGA -CGATTTTTTTTTTTTTTACAGATCCCTGCCCTCCGTCCCCGCGAGTACTCCCAGATCTCT -CGCCCCAAGAAGAATGTCAGCCGTGCTTACGGTGGCTCTCGCTGCGCTGGTTGCGTCAAG -GACCGCATTGTCCGTGCCTTCCTGATTGAGGAGCAGAAGATCGTCAAGAAGGTCCTCAAG -GAGTCCCAGCAGAAGGCCGCCAAGCGCTAAGCGTGCTTTTTGTCGGTGTTGTCCATTTAC -GGTGGAGCGAACGTTATGGGAAAAACGGCTGGGAATGGTTCTCTTAAAATCTTAAAGATG -GACAGGAACAGCTAGCACGCGAGTCCAAAAAAAGGCTCGTGGCTAAGCAAATGAATCAAT -TTCATTTATCTTCATGAGAGATTCCCTGACATGTCGAATCAAGTTGTCTGAAATTATCCC -CGTGGTGTCTTGAATTGGTTCCCGAGTGGAAACCTCGTACGTTCGCCTTTGTTCTAGTTT -GAACTCGTGGCACTCGATCTCTAGCAGTGATGACGCATTTGGGTAAGTCCATGGAGGCTG -GGCAATATGTTTCGAGGCCAACAACTTTTGTTGATCTGCACTAGGTATAGACTCTCATGC -CCTCTAGCTCGGACAAAACGTAGTCATCGCTGGGTAGTATAACGGCCTTCTCTATGTTAG -CCACCGTAGCTGAGTGTAAGGCGCCTAGTATCCATGGCTGAAGTATATTTGGTTAGTTTC -TCGGCCTTTTGGTCTCGATCTCATATAACTGCGCTAGTACATACGCTTCATTGCATGTAC -ACATCTGATCAGGACCCCACTTGGTCCAGTCAATTGATCTAGGCCTTATGTTGTGTGGGT -GTGAATAATATGTGGCGGAGTCGTGTCAACTCACGTCCTCTATGTGTATAACCCTCCAAA -GTGGACATTCTCCCGCAAGAACGTGACACTACGTAGCTGCAGGTTTGTCCTTTTCTTTTT -GCAATATGTACTCAACATTGTAGGCTCTGTTTGGTGCCTGATAGCTCAGGCTTCTAGCAA -TGGTGCATCAATGTGTTAACTAGTTGGTGATCTATGCATTGTGATTATATAGTATGGCTT -CCCGTTCGACAGAAATCCAACTTTATCTTGTTCACAATCATTGAGTACTTTAGAAAAGCT -ATTGTCTTGGGTGAACAGCCTTTTGTGGGGGATAAAATTTTAGACTGTACATACGTATAT -GAAAATGCATATCACTTCTGACATACTAATCTTCCAATGTCAACATAAGTACGTAGGTAG -AGGACACTGTCGATACTATAAGCAACTCTGTAGCACAAGCTGTAATTATTAGTTTTTTCC -CAGATTGAACTTGGCTCAGATTTAATCAGTGACGCGATGGTCTGGCTAAGTAGCTGCACT -TCGTAATCTCGGTAGCATAACCGAAAGGGGAGAACATATGTCACCGTAATAAGGATTATT -AGCCACTACTGTAGATAACTATGAAAGGTCTATTAATTTCATTGTGAGGGGTCTATTTAT -CCGCGGAGTTTGGCACTTTAGCCGATTGAGACCCTGACCAGTCATGCATCAAGTTTAGGG -TCAAACGACACCGCTCGTCGGCAAGGCCCCTAGGGTCTTTACTGTAGAATACCTCACTTA -ATTTGAGGATCGATTTCCCACACTTGTTGTCGGTGATAAAGTTACATTTATACACACGTA -ACATGTTAGCTGGATCTGGGTATATTTATGGGGCGCGGTTATCGCATTCGATGAGAATTG -TTCTCGCTGCGCTGCATGAAAAATATATTATAGGGGGAAAAAGTATAATATAGTGGCATG -AATATTATTGATCTTTGATATCTTGAACTTCTATTTCAGGCAAGAGCTCCATCCTGCACG -ACCTGTTTGAAGCTGCAGATGATATTGGTTAAAGATACACTGTTGATAAACCTACAGCCA -TGCCCATCATTCTAAGTACATGAACTATATTATATGGATTTTCAGTCCACTTCTAAGATT -CAACTAAGAATAAACTAATATCACTATGCTTGAACGTTCTTGTAATTCTTACCAGTCAAG -ATCCAAGGTGAGCTAGTGTGTGACAAAACAATAGATCTCCCTGAATGAGATCTAGCATCT -CTGTACCACCCATTCTACACTAGCTCGATATATAGTAAAAGAAGGGTCAAAGTGCCGCTG -AGAGTGTACCAATGAAGATCGATCCCTCTACTTCCATGCGTTGTTTTACTAAGTACGTAA -CATGTATGTAAAGTACGCGACTGCTGAGATTCGTTCAATCCACGGTGCCTTGAAGCAGCC -AATAGAACACGTTATCGGTCCCTTTCCGAGGGTTCCTTTTTTTTCTATTCTACTTTGAAA -AAGCAAGGAAGCAAGGAATTTTCGCCTTCATGAGAAGTCCCGGGAATACGGATGACAACA -GACAATGGGATCGGCCCCGGAGGCTCAAAAAGCGGTCAAGCGATTGTTCTGTGTCGCATT -GCTCCCTGGAGCTACCCATTTTCGAGGGCGCTTGTCAAAGAAAGCACGAGCGTATGCATA -GTTGACAGCAGGTGAACCGGGCCGTTCCATCCCACAAGCGCTCAACGGGAGGAGGTTCGT -GCGGTTTCTACGAGAGGAACAGAAAGTTAGTACACAAAGAAAAAGAACTACTAGTATGCT -GTAAATGGAATGCCACATCTATCTCGAATACTTTCAACAAAAAGTGAAGGGTAAAGAGAA -AATCCCGGGGGCTTACCGAAATGCATCTGCAAATCCCGGATTCCAGCAGGCTGAGCAACC -TATCGGCTACTGAGTACATATCAAGTGACACTTCCGTACCCGGGTCGATTGATGTCATGC -AGCGTGAAAAAAAAATCAAGAGATCTACGGAGTACCCCTTTCTCCCGTTCGCGACTTGCC -CTTCCCAAGATAAACGATAGATCAAAATCGATCTAGGATGACCCCGAGTTGGGCCTGAAT -AGATACCTATAACATGAAAATTTGGTTGAACAGTGGGGCTGTGCCTGACAGTACTTGTAC -TTGCAATCGGGACTATTGATACTGGCTGCCTTTACCCTGCATAGAAGACCCTAAGCCTCT -CCACAATGATGAGTACGAAGATCTTTTCGGACGACGTGCATGATGGGCATACACGTGACA -TATGGGTATATACGTCAGAACAAAACAAAGCAAAGCAAAGGAAGAGAAATAAGACTAAGT -AACGAAGTAGACTGTACAGAATACTTCGTACAGTAGGAGGATACAGATTCTGGAGGACAT -TGGTACTATCACAGCGGGAGCGGTGCCTCTCGCTGGTTGATTGGATTCATCAATAGCCTA -GGAAATTCATCGTAAGAGGCCAGCCCCCCTTCAACAAAGGAAGCTAGCGATGCCACTGTA -CGATTGGCAACAGTGCGGGACGCACATCCGTCGGTTAGAGGGCCTACTACTCCACTCACC -CTCAACATAGTACATACCTAACCGCGAACACCCACTCAGCACTAAATCACCACTTAAGCT -GCAGACTGTCACAAGAGGATCCCCTACCATATTTGGCTTAGGGAAGGGTCCAAGCGGGTG -CTAAAAATTGGTATTAAGGACATTGGTGTACAATATACACACTGAGGACATGGAGGCCAC -TTGTAATGTACTTTGTACAATAGCCCTAGATAAGGAGTTAGGGTTAGTAACCATAAGATT -AAACCCCCTTCCGATCTATCATCTGAGAACAACACGAACATTTTACCCAACATATAATAG -AAATGAATCACTGTCGGCACTGCCCGTTGGGGGACTTCGAAACTCAACTTCAGATACTAG -TATATTCAGCCGCGTTGTCAACAGCATTGCAGCTGCCATCTGTATGACGGTGCACAGGTA -CATACTGTCTACGGTGAGTGGCTCCTTACTTACAAGCCTACTGCACCGCACGCTCCGCGG -ATGCAAGTTACCCATATCCTAGTGGTATTTTTTTCAAAGTACCTATGTATCCATGTACTC -CGTAGCTGGGGAACATTAAACCGCATATCAGAGCAAAGAATGGCAGTTCATAAAAATCAC -TATGAATATCAAGACCACCGTTTTCTCATCTTGTTCTCTTAGTCTCTTGGTTTGACACAT -GAAACATATTAGGCGGCGTTGTTCGCCTCGCTCCTTCTGCTGGCGTCATTGCCGATCTCC -TTATTACGCGAGCCTGTCGTAGGCATAAAATTGCAATATAGAACCTTTAGTTAATATGTG -GATAAGTGTCATGATAGAGACAGCGTAACTTAGGGAAGGAAATTGAAAACACAGGGCATA -AAATTTGGTATTATGCTCCAATTATAGCATACCTACAATTGAGCTCCTCTATCTTGCCTG -TTGCCTCAACAGGTGTTTCATTCGTGAACAAACCGTAGTTAAAAGAAAGCGAGATCTGCG -TGATCCCCATGATCTGCCATCTCAAAAACGAGGGTATCAGATTCCCCTTTCCTGGGACAC -CAACAAAAAATGAAGTGGGCATTGACAGGGAGGGAACATGATCATGGAAAAGGCAACACG -CAGCTGCCGAGGCAGCCGATGCAACTAAGGGTTTCCGCGTCACTTGAGAACCCTAGACTA -CCGTATCAGTGGGTAGCCTGGTATAGCGGGGTGTTGGTGGGGTTCTCAGTGTGGCTAAGG -ACAGAACTGGGGTGTCAAGAGATAACAAAAAAGACTAGCAAAAAAGCCCGAAATTCGCTC -TGCGATACTACGTAAGAAGCAAAGCGCACTGTAATGCAACCAGGCTTGATGGAGGCATGC -AGGTGGTCACTTAACAATATATCCTGTTACTATGGTTGGAGGCTATTCCCGTCACTTGAG -ACACCGTTGTGCAAGCCTATGAAGCATTGTCAGCAGCACAACATGTGGTCTATGCTGCGG -TTAAGAACAATGTCAATGCATTAACACGAAGGAATCACAATTGACCCGACACAGAGATCT -GGATGCTAAGTAGAGACTTGAATAATTTTAATTCCTTGGCGTTGTTAATGCCCAACATGA -TAAACAATGTGCAATTGGAACTAGTATCATATGGGACTCGAGAGAAGCGAGATCACACAA -GATACAAGCTTAACATCAAGCGTGTCAGTGAGTACCCATTGACACCATCTCTGCCTTTGT -GATAGATCTTGATCAGAATTTCATAGATTCAAACTTCACTATTAACCTATGGTACACGAA -AAGAAAATATGCGTTGCTCATGCCAGAATGACTGAAAAAGGGAGAAAACTTAAGGCAAAA -AGCAAAGTAGATACTGAGAAGAAACGAAAAAGCAGATAGATCTCCTTCGCCATGAGACTC -GCTCTGTGAAACGCCCGCCAAATACATTCTGAATACAAACAGCAAACAAATAGGCACTTC -AAATAACGCCATGCCTCTAGTGAGCTCAATGGCTAGTATGATCTTGACCAATGGCTCGAA -AATTAGCAAGAAAATAAGGCGTAAAAATCACGAATCCTGAGAAAAAAAAATAAAAAAATT -TGCATGCACGCCAGCTTCTCCGCGCTTTTTCATCGCTAATCTTGTCCTATCACCTCCACC -ACACCTCCCCCTCCATTTTTTTGTTTTGCTCAAATTTCTTTTTCCAAATCCCATGAGGAT -CGACCGGGACCTTCGAGTGGGTCCTGGGCCCTGTTAGAGCCCGACTCTGGAGCATACCAA -TCATGACTATGGCCCACGTCAAACGCTGACACTGGTTCCATGGTATCGATGAAGACCGTA -TTGTTTCCTGGTATCATGGGGTGGCATCCGACAACAGGGCCCATGGGGTGTTGCATGTAT -CTTTGCATGACTGGAAGACTCGTAGGACCGTACTCATGTCCTAGACTAGGGTTGGTGGTA -GGAATTTGATGATGATATGCTTCGGCGCGCTCGTTGCGGGGACTATTCGTGCTGCGTGAT -TTGACAACTTTCGTGGCGTGGACCCCCTTGGACTGTGAGGGCTGATGTGTGGGCCCTCGG -TGGCCTGAGCCCATTCCTTTCTTTTGGTGTTGAAGCTTGCGGTAACGGGTAGTTGATTGG -ATGCCACCGCTGTGGATGGCTTCCGGCGTCAGCCTCCAGAAGTTCACTGCTTTTTTTCCT -GGGCCAACTTCTTCCTTCACTGCCTCGAATCCCTAGACTTGGGATTAGTCTTGATATTAA -ATGTAGTAGATATTGAATGCATTGTAGCAAAGAACCAAGGGGACAAATTCGAAGGTGAGA -GTTCTAGAAGTGAGATATCTAGAAGTGAGAGTTATGAGAAGTGAGACATACCGCGTTCAT -CGAGAGATTATGGCGAATACTGTTCTGCCACCCCTTCGCGTTTGGATCCGCTCCCTTGTC -CGTGTTTGCTTCAAACCAGGAGTAAATGCTTTGGAGTTGAAGCTTGTGCCCCGGTGCATC -CTTCAGAGCTTGATAGATGAGGACTGAATAGGGAGGGTGTCCGGGACGTTCCTCTCTATT -ATCACCAGTGCTGGAACGTGGTGAAAGACGGTCGGTAGATTGGTTCATCGTGGCTGGGCT -CGAAGCAGCGAGGGACTGCGGAGAGTGATAGGAGGAGACTTCAGAAAGTGGCGACGACGG -GGAGTCTGGGGGTGGCTGATAAGGTTCCAACTGAGGAACGCTGTATGGCTGGCCTTTGAA -GGCATGGGAGGGTTGCAAAAGTGACCAATTTGCGTGGCCATCTCCAGTCTGAGGGGCCAA -TGTCTCCAAGGTTGTGCCAGCTATGGCGGGGGTGACATGCGTGCGGTACGAATTAAGATA -CCCATTTGGATCCCCAATGGGCTCCTGAGCGAACGAACCCAAGTCGTACGTCTCAGTATC -AGCCACATGAGGAAAATTGCAATCTTCAAATTGGCGAGGAGGCATATGAGCTGGCTGGGA -TCTCCATCTCGCCCAGTCAGGTAGACTGAAGTGGTTGAACGTTGGGGCATTCATGGTGAA -ATCCCCATTGCCCTGAGATGGCGCTTCTAGTGATGGGATATCATTAGACTCTTGGCCTGA -AATTTGAACCCCGTCGAAATGGTGAGAGTGGAATGAGAAATCCATGTTTGAGCAAGTCAA -CGTTCGAAAGTGAGAGTGACTTGATGGGGACGATAGAGTAGAAAAGTGAAGAGAAAGAGA -AGCATGAAAGTGCGGGGATAGGATATGAGAAGGTCAATGTACGAGGAAGAAATAAGGAAA -AGAAGAGACAAAAGTGCTCAAAGGATGTGTGTTCATGGAAGATTTGGTGTATAATTCCAA -GATCCACCAACACATGTAATCTTTCTCCAAATATCATAAATTTCCTAGACAAAAGACGGT -CTGGAACTAAGGGCATTTGTATTAAAGGTCTTCGTATTTAAAAAAACGTCCTTAGATGCC -CAAGTCGTGCAACGGAGCATACAATGGTCGGGAAGGCAAAGTGGCTGGATCTGGTGCTAC -ATGCTACATGCTACATACCACTAGAACGAATAGGACCAGTCGCTTAGCCAGAAGGGGTGG -TGGTCAAATGGTCAAATTGGTCAAATGGGAACCCGCAGGAAATCGCCATCTCTACACCGG -CTTCATTCCGGGCCTTCTGGAAGTCAACTATAAATTGGGGTCTGGTTTTTGCCATCCAAT -TGTTTCTTTTTCTCATAGCCTGCATCTAACCAGCCGGAAGTCAGGAATACGTCCTGCTCC -TGCAGGCCGGGATAGTGTCACCCAAAAGGACACCAAACGTCCACGGTGTTGCGAGTGACC -CCGGTAATAGaaaaaaaaaaaaagagagaaaaaaaaaaagagagagagagaTGGTGGTGA -TACGCGAGTCATGACCGAGCCGAGATTTGGCTTCCCCCCAAAAGACCAGCATGCACCATA -GTGATGGGCAAAAACAAAAGGTGACAGGGTCGTCAATTTTAAGAAGTGGTTGGATCAAGC -ATCTGATATCTCCAGATATCAATTCCGGCTTATTCTGCGAAAACCATGGGGAAATATTTC -CAGTCCAATCAATCCATGGTTGGTCTAATTCACTGGATATCGAGTCACTGGAAAATACAA -CCCCGATCATCTCCCTGCATACACTATAACACACTCTATCTCCAACCCGATATGACGCCC -ATGTGGATTGGTTGCGCATGTGTCACTTCCGGTAATAGATCTAAGACAACTTGTTAGTAT -TGTCGGAAACCaaaaagaaaaaaaaaaagaaaaGCGACTTGCCCTTTTGAATTGGAAATG -AAGTTGAAGGATTCCCTGAAAGAATGAAATTACGTCAGTTGAGAAACAGGGAGATCATTT -AGTGGCACAGCATACCTTGTCAAATGTACTCTTGAAGCCCCTCTCTCGTTTGGCACTATA -ATCCCATTCTTTGTCGACAATTCGAAAGGCAAGATCTTCGTATGGAGGTCCAGCCATGAA -CCGAATGAGACAGGTGTCCTCCTCACCCGCTTCGGCCGATGACTCGCCACGCTTGCGACC -GTTCTCACGCTCAATCCGGTAAGTCGGGGCTTTGGTCTTGTCGATCAAGTCGGGGTAGAA -GATGTTGAATTTGTAACCTTGTACAACCTTGGGGGGTGGGTTATCATGGTCGTAATGGGT -CTGATTGTATTTGTTCCACTCGTAACCCATTTGGACTCGATTGAAGTATCGCGGCTTTCG -AGGCCGATGCTTACCGGCCCATTGAGGCTGCGAGCTCGTGCTCACGGCCTCTTCGCCGGT -GAATATTTCCTCGTTCTCGCTGACACCCTTGGCGAGCTCCCTTTCATATAGTGCTTTGGT -TGCTTGCGAAAAGTCCTCGTTCGGGATGGAGGAGAATCGCGTGGATACAGAGGCCACGGG -AGCGTTGGTAGATGTCGGATTCAACGCTACCGCAGAGGACTTCTCTGCTGCTCGGTGCCG -TAGTGGGACAAAGCCCATACGGATAATTTTCTGGCGTTCACGGGCCTGTGCAGTCGATTA -GCATGTGGATTGGGGTAAATGTGGGGAAGCGTGAAACCTACCACTTGGTCAAGGAATGCA -GACTCGTCCATGATCTCCAAGCCTTTGTCCTCCGGGCGGACCTGAAGCATTGGATCTGGA -TCCAGGCCTTCAAACTCGACATGATTTGTTGTATCACCGGCAGTGACAAGGGGTGCGAGA -GGCGCAAGTTTTTCCCGAATTGATTCGGCTTCCTGTGTTTGTTGTCTCCTCAAGTCGCGA -ACACGCTCATCAATAATGGCTCGATAAACATTTTTCAGACTCGCCCTAGCCTTCCATACA -GTCAGACTGCGCAGCAATTCTTCCCAGTAATCTGTATCGATCGGCTCGTTCGAGTTTAAC -TTTCGTTTAACTTGTACTTCGAGGTTTTGGAGCTGCTCGTAGGACTTCGGGCTTAGTAGT -TTGTTGATATCGGTAGCTACAGAATTCAGTGCGCGGCCCTCGGGTGTAACGGTCTTTTGG -CGATCTCGACAAATCACTTTCATAGTCTGTGGGTGAGCGGTGGTCAACAGGGGTGTCCAG -TGGATAGCACAGGGAGAGAGAGCATTACCTGCCAAAATTCACGGTTTTTGACATTTGTCT -CAAGGTTCAAAAATGTATCGATGTCCTTCTCCAAGTCTCGCAGCTGAGTCTGCGATAGTC -CTTCAAACACACCATCCGGGTCCACGACATCCAGATCGGAGTCTGCTATCTCGTCATCCA -AGGGATCGCGCGTAGGATCGATGATGCGGAGAGTAACCGTGAGCCAGTCGATAGGCTTTG -CGCGCCCTTCCTTGACTCGAATCTCTGCCTTCTTCTTAGCCTGCTTCAACACGAACAGAT -CCTCCTGCGCAACCCACTCGCGCTCTTGTTGGTCTTCCTGTATCAAGTTGAGGCGAACTT -GATCGCGCATGTTACGAGGTTGTTGTGAAGGTCCCGATCTATCATTGTTCCATCTGCGAT -CTGAGTCTTGTCGATCGCGCCATTCTCCATCGTCCCGACGAGGATGTTTCTGAGCCTGAC -GCCACGAGGATGGCGATCGCGACCTGCTGCGCTTTTGGTCTGATCGGTCTGGGAATTGCG -CAGACATCGTGAGGACCGTAGGATTATAGCGAAAAGTAAGGATCAGAGCGAACGCTAGAC -CTAAACGTATGTAGTTTGGATGTCGAAACGCGAAGTCAGAAGTTCGGGAGATCAGCGGGG -AAAGTCAAGCGGGCGGCAAACCTTCCTTACGGCCCCAGGCAGCTATGAGACAAGCCTGGA -ACTTCGTAACTTCACAACTCCGCATACAAGTCATGCCTTCAAGATGGCAGCGTCTTCATT -TGTCCCAACAGCAATTACCTCCATTGCTCTGCCAGTATATCTGGACTCGGCAAGGCTATG -AGATCCACATAACAGACCTCACCTACATATGGTCAGAACGGCTGCCCAATAAAGCCATCG -CCAAGAGAGCAGAGGATGATGCAACAACGATCGATCCCGGAGAGGACCCAGAGCAGCTCA -TTGTGCTCCTGGAGAAGATTGGGGAATCTTTACAAAAAGGCAATGAAAGCGTGATCCTAA -GAAGTGGAACGCAAACCGACTCCCTCGAAATCATGATAACAGCGAAGCTACCCGCCCCGT -TAAAGCCTCTTAAATGGTCTTTGAAGTTATCAAAAGAACCTATATCGTCCTTGACCACTT -ATCTATTACTTCCACTGCTGAGAGAGGAAGCGGAGTGGGAATCCCGCCAACGTCTGCTCT -TGAACCAAATCAAACAGAAAGACTACGTTCTTGGCAAGTTGTTTGACAAAATGGAAACAC -TTGGCGTCGACCTAGGGTCTGTATTCCCTAGCGCTGCGGGATCACGCACGTCTCGCAAGG -CTATTACGCGGTCCGAGGCTGCTAAGTTTGTCAAGGGCATTGAGCCGTTTCAGGAACAGG -CCTGGCTTGCTGAGACAGGAGTATCGGACGGGGCGGGATTAGCCACCAACTTACTCCAGG -AAATTTCGCAATCCAGTGATTCCTATGATTTCAATACATTCACCCAGTCACGAGGCGAAT -GGTGGCATCATGTTGCGAAGCGCTCTGAAGCTAGCGCCAGAGAAGCATCTATCGACGAGG -AAATTGCCGACGAAAAATATAGAGAGAAGAGGATTGGAATCAAGAATAGCGACGCAGAAG -GTGATTCAACAGCAAGCAGCGATGTCGATGAATTTCAGGTATGCTGAGACTTATTTCTTA -TCACTAAAAACAAGTATTGTTTAAACTCATTCATCTAGCGACAAGAAACCCCTCCCAGGC -TAAAATCCCAACATGACAAAGGCAAAGCATCACTACCCAAGCAAAACCCTAGAGGGGAAA -GTCCTCCGCCGCCAGTATTTGCTTCAGAGGAAGACGAAGCCACAGCATCAGACTCCGAAT -CCGAGCCTGAAACAGCACCGCGTCAAAGACGCATTCCCGGCGTTTCAAGGTTGCCAGAGC -CCACCGCCACGTCGCCAGCAGCCCAAGAATCGCTGAAAAAACCAAAGAGTGGATTGGGTA -GAATTGGCGGCAAAAACAAGAAAGAGACAGAACGTCGGCCCTCGCCATCTCCCTCCCCAG -CTCCTTCTCCTCCAGCGCACGTTCGAGAGGCACCGAAAAGGCCAAAGGGCGGATTGGGCA -CGATAGGCGGCAAGAAGAAACAAGCAAAAGAACCCTCGCCTTCATCAGCCCCTTCCCCTC -CAGCACTAGCCCGCGAGTCTCCAAAATCTCAAGAACCTCCAGAGTCGCCAGACGAGAATG -AATGGATCAGCAAAGACCCTCTATCCAGATCTTCGACCCCAGCATTAGCAGCGGCCAAAG -TTAAACGGTCCGGAAAACTTGGCATGATAGGCGGCAAAGCCAAAGCCAAAGCCCCCGAGT -CTGCTCGAGACAATTCTCTCTTGTCTACATCTGACACGGTCATGTCACCTGAGAAAGCCG -CGCCTGCTAAACCGAAGGTTGACCAACAAGCTCCTAAAAAGGAGGAGTCTCCATTGCCTA -CACCAGCGGAGAAAGTCCCACCTGAAGAACCGGAGACTGAAGACCAGCGCGCAGACCGGA -AGCGTGAGGAACTGAAGAGGTCACTCGAGGCTAAGGGCAAAATTCCTGCAAAGAAAAAGC -GGAGATTCTAGTCGAATGGATGCGAGGATTTTATGTTTGAAGATGTTTTAGGTCTTTTTC -CAAGCGTCCGACGGGAATGGCTGTATGTGTGCGTACGACACACCCTATCTAGTACTTGAC -TATGGAGAAGTTCACATCAGGGCTGTACACTATACGCATGTGCTTAAATCTGTGGATTTT -CTCATTGAAAATATCGCGATTGGGGAATCAaatagaaggagaagacaagaaagtgaacag -aagaCATAAAGGTAGAAACACGCGCTAATAATAGTACAAATTGAGCGTTACCAAGAATCA -AGCTCAGTCCCTCGAAGGAGGGCCCGTGCGGGCCAAGGCAACACTTTCCTTCTCTTGCTC -AGTCGGGGTGACAAGTCGTACCCTTTTAGGCGACATGGCGCCGGGCGAAACCGGCGCTGG -AGAGGAAGTATTGTTTGTCAAAGTGATCGGGCCAGCACCACTGCCACCGGTGGCAGCGGC -AACACCAGCAACACCCAGCATGCCGCGGACCTTTTCGACAGCCTTACTTTCACCCTTGGT -CCAGGGATCACGCTTGACAAGTCCGACACCGGCGGAGCGACTAGCAGAGTCGCGACGCTT -GGATTCCGCTTGCAGGTCTTGGCGGTTGAGTTCGCGACGTGGACCAGGCCCACAGTCATG -CGAGCAGCCGGCTGTGGCACAGCCGCCTTCGCTGGTTGAGATGTCGCCGAGCCAGGTTTT -GATACAGGCTATATGGCCGCCGTGGCCGCAGCCTTGGCACCAGGTCCAGAGGGTGGAGCC -TTTGGGCCGAGATACAGTGAAGGGCTCGATCATATCGATCTCCGACGCTGGGACGGGTTC -GGTTGGAACTGAGGATTGTGCTGAATGGGTTGAAGCGCTTCCGGAGTCTGGGTCGGGGTT -CATGAGGGATGAACTGCTTGGTGAAGTTTGCTGCTCTAAAACCCACTCTTGGGGTGGATC -AAGGCTCATGCAAATAGAGCATGGTGCTTGAGGCGTGCTGCAGCGATAGCATCGGCTATT -ATCTTGGGTAGAATTTTCGAAGGGTCGTTGGCACGTGTAGCAGTAGACGTTGATGAATGA -ATCTGTCTGGGCATATTCGTAGATGGTAGGATATGTCGATACGCAGAATAGGCGGAGCTC -TGCTGCCTCGAGGTCCATTGAGTGCCGAATCAGGTGCTCATTGTAAGTTTTGAAGACCGT -TTCACATTCTTCATATGGCAGAATTTGTTCGCAATCCCGGAAAAGAACGTGCAGCTTTTG -GAGAAGATGGGCCGCAGCCTGTATATCGACTAAGCTACTGCTGCTGTGATAATATCGGAC -TGCTTCTTTCAGAATCGCTTCAATACCCCACGGTTTCAAGTTTGTCCCATCTGGCGTGAG -TGGGATCTGGACTCGCGAGACACCGCCCATGGCTGAATAAGGTAAGCTGCCAAGGTCGGG -TGCATCGTTTGGTAGGAAGGATTGCTGATGTCTTCCAAGGGGGCTTGATTCTGTGATCAA -AGGGGGAGGGCTTGATGGGCGCTCAAGATGAAGTCGATCAGTCTCTGAGACAACCTGGTC -AAAGATCTCATCCTCGGATTCCTCACGTGTAGATGCTAGAATCGACCCACTTCTCAGCGA -CGCGGTAGATTGTCCAGCATCCAGAGATTTGGATGCCTCAAGTTGTCCAGTAGACGAGTA -CGACGCGCCAATAGACTTGGGGGGATGCGAACTGTCGGTGGATGTAGAGAACATAGGAAA -ACTCTCCGAAGAATCATGCCGAGTATAGCGGTCGGTGCGAGGAGGCCGGTTAGATTCCAT -TGGTGATTCAAGAGTCAAGGGTTTCTTGGGAAATTGTTTGTAGTCACGAATTGCTGCACG -TTTATCCTCCAGTTTTTGGTCATAGTCATCCTCGGAATTTTCCCGAGGAAAATCTGGGTG -ACCACGACGACGCCAATCTGCCTTGCCAGCAATTGCCCGAGGCGCAGACCTCTGCTCTAT -GTCCAAGTCTTGTTGTGTAGGGTCCCATAGATACTTAGATGAATGAGTATCATCAGTTGA -TGATAGTCTCCCAGGCGGTAATGGTATGCCGTCTGACATCCCCCAGCCATTATGATCACT -CAAGACTGATGGTGGAAGAGGGTCAAGGTCAATCGCCCCATCGCTCAACGAGGGTATTTG -GGAATCAGTGCTCGACCATGAGTCTTTCGGAGAATCGGGCAAAGGTCGTGCAATAGGGGT -GGTCATATTGGACGAGCTCTCGGCCTCGGGAACCGCCTCCATGACTCCTTTGAACAACCG -ACTTTTGACTCTGTCCTGTCGGCCATCATCGGGTCGTGGCTTGTCTATCAAGCCTCCATC -CTTAGATGACCTATCTTTGAGACAACCTTTCGATGGATGGTCTCTAGCCCTGCGTTGGAG -TTCTTCAAGAATAGCGAATTTGACAATTCTCCAAGTCTGAGCCAATTTGACCAGGGATAC -CTCATCTGCACATTCAGCGTTATGGTCCAGTGACTCAATTAGAGTGTTCAGGACGTCGCT -CTGCGTTCTATGTTCACTGTGCTCATCCATTAACGGTGTATAGTGGCTTGCCAAATACCG -AAAGATTGCTGAGTCGTTCGTGGCACCCGGAATTTTTGTAATGATTCCCACCTGACTGGG -CATTGGGGTTGCGAGTTTGGCTAATGCTTGTTCTAGGGGGAGCACAGGTATGAAATCCAC -TGCTCCGGGTGGAGTACTACCCAAGGATTTGGATGGTCGAATCGCGGCCGATTTGGTATG -CCGATTCCGAAGGGAAGTGGCCAAAACGGCCTCATCAAAGATGTCATCTGCGGGACTTTG -ACTTAGTGATCTCTCGCCAGTGCTGTCCTCGTCTTCTTCGGGCCGTAGAAATTCGGCCGC -ATTGACACCTAGAGGGCGTCGTCTGGGCCGTTGTTGACTGAAAGCAACGAACTCGCCATT -GGGACTCCAGGCCACGGAGCACATCGGGCGGCGATTGACCACAGTTGGTGCAAACCGAAC -ATCAGTCTGAGTAAAAGCCCCAGCCTCGCCGACCGTCCACAAAAGATCCTTGGAGCGCCA -AAGCAGATCTGATGCGGGCGAGTCATACCGATCGAACTCCCGGAATGGAACATGAGGACG -ACGGAGATCCCAAAGGTGAACGCGCGGGTCTTCTTTGTCATAGGACGTGACAATTTGAGA -TGACTGCCATTCTCCCGAGGAAGGAGACTCACCAACCCAAGAAGGAGGCCTCCACCGCAC -ATTAGTGACAGCCTGGGGCGTTCGGAACTGGAATGCAGGCTTCTGCCGTCGCTCTGCAGA -TGAAGAAAAGTCCCAGACCTTTACCTGCCGATCTGTGCCACCACTGACAATGTGCTTTCC -ATCTGGGTGCCAGTCGACTGCGAAGCATGGGCGATCATGCGCGGCAATTCTCATCAGTGG -GGCGTTGACCTTTCGGTAATCCCACATCTGGATTGCGCCGCTGTCTGAAGCAGTTGCAAA -TATGACTCCATCGTTGGGAGACCATCGGACGTCTCGGATGGCATCGCTATTGCCCTGATA -CGAATGTTTGCTGCCGCACGTGGAAGTTCCCCTATTTGCAGAGGCAGATCGCAGATCCCA -CATCCGAATTGTGCCATCTTGACTGCCAGACAGAAGCCAAGAAGGAAGATGAGGGTTGAA -GGCCAGCCTGTGCACCTGGCGGTTGTGACCATGGAAACGACATAACTGGAGGCCGGGCCG -CTGGAGGTCGTAGACCACAATCCGACCATTTGCGGCTGCTGTTGCGATTATTCGGTCATA -GTTGCCATGTGACCATTTTACGTCCTTTACATTCAATTGGTCCTTGTGTGGCATCGAAAC -GCCCACATCGTGGTGGGTAGATGCATAGCTGATAATTGCATTGCGAATATTGAATTCCTC -TGACGAGCGGTCTGGCGAAACCTGGATGGTTTTGAGAATCTCCTTGCCTGCGATAACCGC -ATGTGTCCTCTCAGGCGAAATGTCCAAAGCCGTGATTGCGATGCCTGTTTTATGGGTCAC -AGTCTGCGGTTCTCCTGGTCTTTACAGAAGCATATAATTAGAAACAGAAGATTCCATTGG -CTCAAATGTAATTTCCCCGATTACACTAGCTCCGATGACCAAATGGCGGGGGTGGCCACT -GCATTCGTGGGAAACAAAGAGGGAAAAACATGGGTAGTAGTCCAGGGAAGTAGGGGTATC -AGAGCGGTGTGGGAAAGCTGCAACACTTACAGTGATTTGGATCTGATCGACCGGGGACCA -TCCGCACGACTGCTTTGGGGGCTAGGTGGTCTTGAGCCCGCGAAGAAGGGTTGGGCGAAT -CTAGCAAGAGCACTGGCGGCGCGGTTTGAATACTTCTGTggtggagggggaggaggagga -ggGACCGGAGATCGCGAACCTGAGAGGTCCGACATCGTTCACAGTCAGGTCTCCATTCTG -GCACACCGACTTGGAGGGAGAAAGATACGGAGAGTTGGAGATCGTCAATATGCAAGCGGT -GAGGGAAGGATCCCTGAGGCACCGATGATCCATGCCTGAATGTGATGTGACAAGATTGAA -CAACGTACAATGTTTGACGTTGCAACGTTGTTACATCGTTATGCCTCACATATAGGGCAC -CAGTCAGGTAGTTGTACTCCGTACTACTTTCTCATGATTGATATTATACTTCTTGAAGCT -TTCACCTCTTAAGCCATCGGGCCGGAATACATGTTTAAGAGCGGTCTAGAAGACAACACA -TGAGAGAGCTCAGATTAGGATATGATCTTATGAAGTAATTTCTCCGAGGGAACGTGTCAA -ATGACCCAGATATCAATATACTATCTATAAAGCACAAGAATGCAATTCTAAGTTGTCTTA -CCTCAACCGGGGCTTCGGGACGTGGGACACGAGACACATGGGGAATATATATTCATAGAT -CTTCAAGCATTTATGACACATTTGATGGCTATTCATAAGGTGGCCAACCCTTTGAACTGC -GCCTCAAGAGTGCCACTGAAAGTGACTAAAAGACACTAAGAGGAGGCAAATATGAGACAT -TCAGCTAATCACGTACATTTAACGGGAAGCCTAAACAACCAATCTAAGCTGAAGGTCACA -CACATCGCGGAGAAAAGGAAGAAAATCTAGATTTCAGTGCCATAATGACCGAGCTCAGAT -GATCTCGTTAATAATTTCTTTCATGGTTTTATTTTCCTTGGCAGTTAAGTGAAGTCATAT -CTGCGTGGATTGTTCTCGATCTATCGATCGACGATCCTTTGATTCCCCCCCCCCTGTTTT -CCCAGAACGATTTGAACTTGGACCATTGGGCAGGAAAGGCTCTAGATTGATATATAGAAT -TCAACTATCGGAGACAAACAAAGTTCCAAAAGAGGAACTCGGAGAAAATTTATATATCAA -AAAGACATACCTCGGAAGCTTTGGGTGAGTTATATATCAGTGGATCCCCTTGTTTTTAGC -TGCAGCAACCTGCAATCCACACCGTCATTTTAGACATCTGATATTTATGTACGAAGTACA -TTTATATATTCTTCGCCTCTTGACTTTTCCTTTTCCTTTTAAAAAGCTCCTACCCTTTCC -TTCAAATTACATTTAAAGCATTGGaaaggaaaaaaaaataagaaacaagaacactgtagg -gggaaaaaagcataaggaaaaataaggaaaaataaCATTATATTTTCTTTTTTTTCTTTC -TTATTTCTCTTATGTTTGCTTCTGCTTCAGATTCTTAATATTTTGTTTCTATAAACTTCC -ACGTGGAAACCTGTTGAGGTTATCTTGGAAGTTCCCCATTCCCGTCGATCTTTCGCCATA -CCCGTCGTTCACACCCGTCCAGTCCTTTCCGATCCGAACCCGGCCTCCCTTTCCTACATT -TCTTTCGAGGCACTCTTGTCATGGACTTTGTCTGAACCACTCATTTCTGGGACATTTAGT -GGGCAATGGATCCCCATGCAGCCCTTGAGCTGCTGGAATACTTGCGCACCTCTTACCCAA -TCTTTCTTCTTATCCTCTTTGTCGTAGCCTTCGTGGCTAATACTACTGTCGCTGCGAGGA -AAGCTAGCAAGCATGATTCTCAATACTTTGGGCCCGGAGGGCGACCCCTCCCTCAACGAA -CTCGAACCGTGCCCGCCTACAAACCCCAGGAGTTCTCGCGGAACGTCAAACGAGTTTTCA -ATTTGCTCTCCGTGGCCATTCTCCTGACTTTCCTTGTGGATGCAACTATCTATATCGTAC -ATGTGATGGTGGCTAGGTCAGAGAACTGGTGGCGAGGTCAGTCCTTTGTGGTACGTCCTA -TTCAAATGGTTGAGTGTTTCTCGACAGCCGTTGACACTATCCCAGATCTACGTCGTCGGC -TCGTTCTTCGTCTACGCAGTGCTGCTCATGGCAATGCTAGACACATCTCCCTCGCCCACT -CTCGCTCAATTTGTCTGTTGGTCGATCGCCATTCCGATCGAATTGGTTATTGTTAGCACG -TCCCTCTCTATCTACACCTCGGTTCACCACGAGCCAATTGTTGGAGACCCAGAAGGGGGT -CGGGTCCGCAGAAGCATTACATACTGGGAGTCTCTGGAAGTCGGTTCGAACAGTGTACGC -ATCCTTATCTTGTTTGCACTGGTTCTTCTCTACGCCTGTCAGTCATTCAACATGAAGAAT -CACCAAAAGGCGAGCCAACGGGCCAACGGAGCCACAGAAGTCACGGGGCTTTTGGATTCC -GGTGGCGCAGAaaatggcaatgggaatggacatggaaacggaactgcgaatggTCACTCA -TATGGCTCGACGAACGGAGCTCCTCACCCTGAAACTGCCAAACCTACCGATCCATGGGTT -CGTCCGACGACTATTCCTTCCACTAGCTGGTGGGAGTACCTGAGCGGATACTCACTATTT -TTCCCATACCTATGGCCGTCCAAATCCCGCCGCCTGCAACTAGTTGTCACATTTTGTTTT -GGGCTCCTCATATGTCAGCGAGTTGTTAACGTCTTGGTGCCCTATCAAATCGCAGTCATC -ACAGGTTCTCTTACCAGGGCTGAGGGCGAGAACCTCCAAGTGCCATGGCTTCAAATTTGC -CTGTATGTGCTGTACCGATGGTTGCAGGGCAGCCAGGGCTTACTCAGCTCCCTGCGCTCT -AGCCTCTGGATTCCTGTGAGCCAGTATTCCTACATGGAGCTCTCCACCGCTGCATTTGAA -CACGTCCATAGTCTCAGCTTAGACTTCCATCTCGGCAAAAAGACCGGCGAGGTCCTGTCT -GCGCTAAGCAAGGGCAGTTCGATCAATACTTTCCTTGAGCAGGTCACCTTTCAGGTAGTG -CCAATGCTTGTTGATCTCGTGATTGCGGTTGTGTTTTTCATGGTTGCATTTGATGTCTAT -TATGCTCTAGCTATCGGGATTTCCACCTTCGGTTATCTCTATGTCACTATTCGCATGGCC -CAGTGGAGAGCAGAGATCAGAAGACAGATGGTGAATGCCTCGCGACAAGAAGATGCCGTC -AAGTAAGTCAAAACGAGACTATACAGGATACAATAACTGGGACTAACTCATGCTTTTTTT -TTTAGGAATGACTCCATGGTATCATATGAGACAGTGAAATACTTCAATGCCGAACAGTAC -GAATTTAATCGGTACCGGGACACTGTGGCTGACTTCCAGAAGGCCGAATACCACGTGCTA -TTCTCTCTGACACTACTAAATACCTGTCAAAACACCGTGTTTATGATGGGCTTATTGGTC -ATGTGCTTTATTTGTGCATATCAGGTTGCTACTGGCCAGCGCCCTGTCGGCAAGTTCATG -GCTCTACTGGCTTATATGGTCCAACTCCAGGGCCCGCTCAATTTCTTTGGAACCTTCTAT -CGATCTATCCAGTCCGCATTAATCAATGCTGAACGGCTGTTAGAATTGTTCCGTGAGCAG -CCAACAGTGGTCGATAGCCCTAGTGCTACCCCGCTGGCTATGTGTAATGGTGACATTAAG -TTTGAGGATGTTGAGTTTGCTTACGATGCACGCAAGCCTGCGTTGAACGGCCTTACCTTC -CACTGCCAGCCGGGAACCACCACCGCCTTGGTTGGTGAGTCTGGCGGAGGTAAGTCCACT -GTCTTCCGCCTCTTGTTCCGGTTCTACAACTCCGGGAGTGGGCGTATCTGTATTGATGGG -CATGATGTACGGGACATCACGATCGATTCACTCCGCCAGAACATTGGTGTGGTGCCTCAG -GATACTGTACTTTTCAACGAGACTTTGATGTACAATTTGAAGTATGCCAAGCAAGACGCC -ACTGATGAGGATGTTTTCGAGGCTTGCCGCGCTGCCAGCATTCACGACAAGATTCTAGCT -TTCCCCGACGGATACAACACTAAAGTTGGAGAGCGTGGCCTACGCCTTAGCGGTGGCGAG -AAGCAACGTGTGGCTATCGCTCGGACCATCATCAAGAATCCGCGCATCATTCTATTAGAT -GAGGCGACCGCAGCTCTAGACACAGATACCGAGGAGCATATCCAGCGGGCGCTTTCTACC -CTTTCCCGGGGCCGCACCATGCTGGTAATCGCTCATCGTCTCAGCACCATCACTACTGCA -GATCGCATCTTGGTCTTGTCAGAGGGTCAAGTGGCCGAAAGCGGCACCCACGAAGAGCTC -CTGGCTATGAAGGGCCGCTATGCCAGCATGTGGCGTAAGCAGATCCGCGCCCAAAAGGCT -ATGGCAGAAGCTCAGGTCCTAGCCGAGCGAATCCGCACCGCAACCACAAGCGATGACAGC -TCTAGTCAATCGGATGAAGACCGCAACATTGTCGGTCACCGAAGCAACTAAATCAGTAAT -CCCTAGACCACTAAAATAGGGACAGGGAGAATGACTCTTGTTTAATCTTCAACATGTGCA -TACGTTTTTCCATATTCTTACAGGAAAAGTTCCAAAGAGGCAGTAATGAACGGGGGCAGC -ATGAGATCAAGTTTGATGTCTCATTTCAAGGGGTGAGAGCTTCTCTTTTTTCTTGATTAT -TTCTCAATTTTCAATTCGCTCGTCCTTGCAGGTATCGttcttgttctttttttctttctc -tttttttttGGTTGGCGTTTTGGGTCTACGACGTACGATGTTATCATCTCCCTTGTCCTT -GCTTTATATGAATCCAGGCGCAGAGGGTCAGGGCTGGGACTCCTGCACAATCAAAATAGA -TTAAATTGCTAGGATCAATATTAATATAAGAAGAATTCAATCCAAGCGGATCGTGACTAC -TTGCTTGCGAAAGACCGGGGTCCCAACTCAAGTCCATGCTCATGCTATACAACCATTCTT -GGTTTTCATAGAATATCTAATCGCTTTCTCTTAAACATATAAATTGTTGCCACGCGTTAC -TGCGTCATCTTTATAACTCATGCGGCTCGAAGTCTAGGTCCTCTCTTTGCCAAGACCTAA -GGGCCAGAGAGCTTTATTTCACGTGACACTCACCGTTGACCTTCCGAACCACTACATCAC -CAGGACAGTATACAACACAGAACCAACCTGAACCAGCTGCTTTGCCGACCAAATCCCAAT -AATGCATTCACCATCGCCGCAACTCTTATGCGTTCTGCGCACCTCCCTAGTCAACACCCG -TCGCACTTCGAACTTGACTCGATTCTGCCAGATCACATCACCAACTGCAGTACCATGTCT -CGCCTCAAATCACAGCAACAAAATACCCTCAACACACCGAAACTACAGCTCACCCATTCA -ACCAGCCCGCATGATACCTCGGTCACACGCGCATAAACCCGTCAGCCGCGACCGCGGACC -AAAGTCAAAAGAAGACACCCAGACAAACTTTGATGCATTGAACGTGTTGGGCAACATCCC -TGCTCCAACGACGGCCATTGATGCATGCCTAGATTCTGGGTTCCATCTGAACAATGGTGT -CAAGCTTACCAACGGCGATGGGTTGCTGCTGGTTGGCGGTGAGGCATTTGCGTGGCGGCC -GTGGAAGGCTATCGAGGGCGTAGAGAGCGACCGTGCCGCTAAGGATGCTATGCTCAACTC -TAAAGGCCAATTTGAGCTTGATGAGTCGGTTTGGGGGCTTTTGAATCTTGTCTGGCCAAA -GCCTGGTAAGTTCTCTCTTCTTTTTTGATGCCGGATATGTGCTTCCTTCGGATTAAACAT -ATCTTGAGCCTGATCACCAAATTTCTTTTCAATTTCCTAACAACTCTGCCACAGATATGC -TCATTCTTGGTCTTGGTGGCTCGATGTTCCCGCTTTCACCTGAAACTAAGCGTCACATCA -ATTCGTTAGGCATTCGGGTTGATATCCTCGATACTCGAAATGCGGCCGCGCAGTTCAATC -TTCTTGCTACAGAGCGCGGTGTAGCTGAGATCGCCGCTGCCATGATTCCTATTGGGTGGA -AAGCTAGACCTCTCTAGGTGTTATAGGTTGTCTTGAGAATGGTATGTATTATGTGGATAA -TTTGAGTGTATAGCTTAGACCGATTCAAAATGAAATAATTCTTATCCTGAACTCCCGGTT -GCGGTAGTACAAAAGAGCACATGTAGTCCTTAAACATTGCATAATCGCATGTTACCCGAC -CTCTAGGGCTGTGATGCAGGAACGTGACCTCGGGACGTATCATGTGAGCCCTGTTATCTT -ATCCTTATCTCCTGAACTCCGCCCCCACGGCTCAGGGTCAGGATCAAAACAACCTGGTTA -GGTCGCAATCGAAGCTCTCGACCTTGTAGATCTGTCCCCACCCTTTCCGCAGTATATTAT -ACCGAGGTCAACCCGATTTGCTTATTTTTACCGATTCTGTCTTTACAGCTATGGCGCGGG -AAGAGTCTCTTCTTGCTCCTCGCGTGTCCTCTGAAGATGGTTCGTCGATCCGAAATGACG -GGGAGGAGTACGCTCTCCTGACCGGCGAGCGAACGGCCACCGAAAACAAACGGCAAGCCT -GGCCTTCCTGGCGGGAAATCGGAGTGTTTTCTTGGGCCCTTATTGCAACAATTTTGGTAA -TTGTGCTGGCCGTGCTCTACCCACACAAGTCAGCGGAGTCTCAACCCCATCGTGGGTCAA -ACACGACCTGGGGCCCTGGAGGCAAGCCGACGGGCAAGCGGAACATGATCTTCATGGTCT -CTGACGGCATGGGCCCTACCAGTCTCTCGCTGACTCGGAGCTTCCGGCAATACACGCATG -ATCTGCCGTTCGATGACATTCTGGTCTTGGATAAGCACTACATTGGAACTTCGCGAACAC -GGTCAAGCTCTAGCTTGGTAACAGACTCTGCCGCTGGAGCAACCGCATTTTCGTGCGGTC -GCAAGAGTTACAATTCTGCCATCTCAGTGCTCCCCGACCACTCGCCCTGTGGGACCGTGC -TCGAGGCGGCTGCGTTGGCGGGCTACAAGACAGGGCTGGTTGTGACTACTCGCCTGACGG -ATGCGACTCCAGCATGCTTTGCCGCGCATGCCAATCTGCGAAACTACGAAGATTTGATTG -CCGCCCAAGAGGTTGGAGAACACCCTCTCGGCCGGGTCGTAGATCTTATGCTCGGCGGAG -GACGATGCCATTTTTTGCCCAACTCAAAGTCTGGCAGCTGTCGTGGCGATGATCACGATT -TGATCAAGGTTGCTTCTCAGAATGGATTCGGTTATATCGATGACCGTGCAGGCTTCGATA -GCCTTAAGGGCGGCGATAATGCCACCTTACCGCTGCTAGGCCTTTTCGCCCACGGAGATA -TCCCGTATGAGATTGATCGACGTACTCAAAATGAGACTTATCCCTCTTTGGAGGAGATGA -CTCGCACTGCCTTGAAGGCCTTGAGTAAGGCCACCGAGGACAGCGAGCAAGGGTTCTTCG -TCCTGATCGAGGGCTCTCGCATTGACCATGCCGGCCATGGCAATGATCCTGCCGCCCAAG -TTCATGAGGTACTGGCATACGACAAGGCATTTGCTGCCGCACTCGAGTTCCTAGAGAATG -ATTCCACCCCTGGCGTAATTGTGAGCACATCTGACCACGAGACCGGTGGTCTTGCTGCAG -CACGACAGCTGCACAAGAGCTACCCCGAGTATTTGTGGCTTCCGAACGTCCTGGACAAGG -CAAGCCACTCTGGCGAATACCTTGCTGCTCGATTAAAAGAATACAATTCCGGGCCGGGCA -AGGACGCCAAGGATTCGACTAAGCAGTCCTGGGTCCGGGAAAACCTTCTCAAGGATGGGC -TTGGCATCGATGATGCTACAGACAAAGAAGTAGATGCCTTGCTACACCCGGATCCTCTAG -TCACTCCGGCATATGTGTTCGCGGATATGATCAGCCGCCGCGCCCAGGTCGGCTGGAGTA -CTCATGGACATTCTGGTAAGATAGTCTCGAGCAATGACCTGTGACAGTATGCTAACTTCT -CTCTAGCTGTTGACGTCAACATCTACGTGTCGTCCAGCAAGGATGCTTGGCCATTAGTTG -GAAACAACGAGAACACCGATGTCGGAAACTTCCTCGCTGACTATCTCGATCTCAATGTTG -AAAATGTCACAAAGTTGCTGCAAGACACCAAGTCTGGAGCCTTGCAATCATACGACTGGA -TGGGTGACCGCCTGGGATTGAACTTCCATACCGATGGATTAGACACTTACCATGGAGACT -TTAGGAAGCGATCAGCCGATGATTGTGGATGTGGTGCTGCGCACTAAACGATAGTCGTCA -CTCGACATTTCTCATATTGTATTGATTTCTGTATTAGGTCGCACTGTGAATATTTCGGGG -GTTCATCATCATCATACATACCTACATGCAATTCAGAGCTTTCATTTATTCCGATACTGG -TCTTCCTAGGAATAAGCATTGGGATTTTGGGTGCTCTCAAACTTCAATGTTCCCTTGCAA -AATTTTAATAGACATACATGCCACTTCATCACCAAACAATATCTTATCTCCAACGCATAT -TCCGAAAAAAAGGAAGAATCCAAATCACAATGCCTTGGTATTACAACCAGGACAGCTTTC -GACCATATCCATCATTTGGTGCACAATATTACTCTCCAAAATCACACTCTGACAATTCGT -ATCCGGACGAAGGGTGGTTGGCTTCATACTGATTGTGTATGGCTGGCAGGTGGGACAATT -AGGGTCAAATGGCAAAGCGTAGTCTCTACGCTGCTCCGGATCGTTGATACGGTTCAGTTG -GCGGAGGGTTTCGCGGCAAAAGAGGTAGAGCTCGAAGCATGTATGCTTGCAATTGGAGTA -GTGGGTTTCAACAAACAGACACATGGTGAACCAGCTTGGCAGATGTCGCACGAGGCTAAT -TGCTTTTTGTATCCGTCTGTCTTGAGCAAGCTGTATTGGAGTGTTGGCACGCTCAAGGTA -GAATGTCTGTTTGTGCTCACTGTTGGTTAGGTCGCCTCTCTGTGAGGCTTGAAAGGGTGA -TCAGTTGATTGTGGTGTAAAATCAGAGTTGAGTATTGTAGTGTGAGACTAGTTAGGATGA -CTTTTCTGTACTCAAATGGCAAACGTGTAGGTTTGAGTGAGAGGTTGGAAATCCATATAC -GAATGGGGCATTGCGCAGTGATAAAGTGATGCCATTGATAGAGCACAGTGAATAAAGCCA -GGCTCTGGCAACGAAGCAAGAGTGGGTTAGGCCCAGCATTTCGACGTTGCTACCCTATTC -ACTCCATCTATGTTTTGTTTATGAAACTAACCGTGAAAGGGACATTTCAGCTGGCCACAG -GCCATGTACAAAGCCTGTGTTACGAGGCAAAGCTTTTCATCGAAAGTAAAATAAGTTGAA -TGCCGACTGCCTGGAAGAGTGTCCTGTATAGACCATATGTTATACAAGACGTCAAGGGGG -AAAATGGAAGGATTGGACGCCAGCATGGCGAGACAAAGAGGAACAAACAGATGGGATAGA -CAAAGCCAAACACTAGATCCTCAGGTTGTGATAAATATTAATGTTCTTCTCCGTGGACAT -CAAATATCTTAAGTACTGTATGAAGCGATCATGAAGATCCAATGTACTGCCTGTCTTGCT -CAGCTGATTGCTACCGCCTGTAGTCCCAATTGTCATACTGAGGGTACAACGATCTTTGAC -AACAATTATCTTGATTGCTATGTTTCTATCCACTGAAATATAAAGACCAGACTGCTGATT -TGTAGCCATCTCATCATAACTAAAATGGCTATATAACCTATTCCATGAGGGAAGAACCAT -GGGATTTGTAGATATGCTGAATCACCAAAGGCATCTCTCTAGTAAAGATAGCCTCATGGG -TATGGGCAGACATGCTCTGATTCCACCCCATAGCCCAGCCTAGGCGAAATTCTGAAATTT -CTCAGACACCATGTGTACAGGCTAGGTGGCATCATTCCAGCTGGAACAATCAAAGGTCGC -AGCTGCTAGTTTATAGTAAGAGAATCTAATGTCTACCTCCATTTGTTATTCACGTGTCGA -AGTGGGTTGCTGTTCATTTACATGCAGATAATCTTCACTTCACCTAACCATCTTGCTCCA -TCACTAATCTTGATCGAATCTGTCCTCGATATCTCGATTAGCATCAAGCTCAATTAGCTG -GTTGCCCCATCAGTAGACTGACTCCCCTGGGGGAAGCGAAAAACCTAAAGATCGACAGGA -TTCTCCAGGACCCAGAGAGGGAGAAGCTCTCGAAAAGGCATAGTCAGGCCCTTGAAGCTG -CCGAGGCCCTCAAGGGCACCCTGATCAAATAAAAAATGACACGCGAGGAAGTATTACAAC -GGTTCAAGTGTCGGCAGAAGGTGAACGAAGACCAGTCACGCACCCGAGGTAGCGCCAGCG -CGGTGTATGTTTCTTTTGGCCCTCCCATTTGTCCACCATGTCTTGCTCCGCTGAAAGGCC -TGAATAAGATCATGATCACATATTTGCTTCTTGAAACTCACCACAGGGGCCCTTACATAC -TACTCAGATCAATCACCCAGGCAGAAAAGATGGCTGCTATGATGGTCATTGTGGAAGATG -AGCAGAAAAATGCGCTCCCGCTCCAAATTTGGAATGAGAAGCTCCGCTGCCAGGATGGCT -CTGTCGAAAAAGGACAGATTTTGCTTGTGAAGGAGCCTTACTTGATTTTGACATTCGAAG -CCGAATATGGAGTCAAAGTTGATCACGTTTCTGATATCATGTTCATCTCAAAGTCCAATA -AGATCGTCCCGTCATCTTGGCGAGAGCGGCTCCCAGAAGATGAAACCTCCCAGTGGATGG -CCGGAGATTGGCTAAGCATGGGTAACGAGTATCTCAACCAGGGCAAATTCTATGCTGACA -CTGAATGGGTAAGTCTCAGTAGAACTTTTGACAGCGAAGAATGCTGATCAGAATGACATT -TACTCCAAAGGATTGGAGTGTTCGCCGACAGAGGAAGAGACGCATTACTTTCTGTACTAC -CGGGGCCTGGCATTTTTCCGGGTCGATGAGTGGGATGCATCTATACGTGATCTTGACGCC -GCCCCAGCAGGCCCAAAATCCGAGAAAGCTGTGAGGGGTAAAGCCCAGGCCCTGTACCGT -CTGAAAAGGTTCCGAGAAAGCTGCGATGTATTCATGGAACTCTGCAAGAAACACCCAGAA -GACGTCAGCGCCAAAGAAGATTTTTGTGAAGCCATTGCACGTCTTGCAGAGCAAAAGAAG -GGCGGATAGAACTTCGAGAAGATGCAAGAGAAAGCATCGGAGACCTACCCACCACTCTTG -GACCATGCGACATAGATTGGTCCAGTGACAGTCCTAAAAACCAAGTCCCAAGGACGAGGC -CTATTCACAACGGAGGCAGTCAACGCTGGTGGCTTAATTCTCTGCGAAAAAGCCTTTGCA -CATGCAACTGAGCACTCTGGCAAGTTAAAGTGGAGCAGCACCCTCCATATCAACACCGAA -ACTCGAACGATCACCAGAGGAGGCCAAGCTGCGCTCGCATCGTCAATCATTGAAAAGCTG -TACAAGAACCCATCATTAGCACCAGTAATAACCGATCTCTATAGCAGCGGATTCAAGCAT -GTCTTAACAGGCCCCGTTGATGGCAAGCCAGTGGTAGACACGTAAATGAGAAACAAATCA -CTCTGTGTTAAACACACAAAGCTAACAAATGTCCCCTCCTGCAGATTTCAAATCGCCCGC -ATTATTTCCCTCAACAGTTTTGGCAGCCAAACCTCTTCAAGAGCAGATCATGTCCATGAC -GCGTGCGACGCAATCAGGAACTCCAGAGCGCTGCACAACTGCGGTATCTGGCCTTATGCC -TCAACAATCAATCACTCATGTATGAGCAACGTACACCGCGCCTTCATTGGCGATATTATG -GTCATCCGCGCCGCCACAGATATCCCAGCAAACACCGAGCTAAAAACTTGGTATCTGCTT -CCCGCCCCTGAGAACCAGCCCATGGATTTCCGCCACTGGGGCTTCGAGTGCTGCTGTGCC -ATTTGCGTCGACGTTAAGACAACCGAATCCCACATTCTCAAGACGCGGGTCAGACAGCGC -GTGCAGATCGCTATGGCACTAGAAAACAGCTTGAGTAAGTTGAGCCTAGCACGGGCTGAG -ATGCTCATTGAGTGTACGGCCGAGACATATGCGCACCCGCTGGAGCAGGTGTCGCGACTT -GGGTTGTGGGGGCCGCTTACTTTGATCGCTGGGTTCTATGAGAGACATAAACGGCCGGAC -GAAGCCGTGAAGGCTGTGGATCGGGCTCTCGCATCGGTTGGATTTGTGCTTGAGGGGGCC -GCTTGTTGTCAAGAAATGGGGGCTCACTATGGGTGAGCTTGTTGTTGCGTTGCCGATCCT -GCACAGATCTTTACTTGAGACTGCACCTGAGAGGGCAATGCAGGTGGAACGATATGCTCG -CAAGGCTTACTTGATTTGCGTTGGGGAGGAGACAAGCTTTGATATAACCTATGGAAAGAG -CTTGAAGGCGGCTTGTGAGCCTCGTCCTAAGGATCAAGTTGCAGGATGGGTTGTTTGAAG -CTGCTTGTTTTGCCTGTATTAAGTGACGAATATGGAAAAACATGTACTTTTGTAAAAATG -GAAACCATCTACAACAAATGGTTATCCATGTACTCCGTGCTGTAGGCGAATTCAAGAATG -GGACTATGAAATTCGGGAAGAGATTACTCATATATAAACAAAAGATCACGAGACCTGGCA -GCTACTTTGCAGTCATGTGCAGGGAATCACAGATTATTATCAGAATAGCGGTTCATCGCA -TGAATTGGCCAAAAATAAACGCCGGATAGGTATACCAAACAAAACATGGAGGAAAGCTGC -AGATGCGAATGCAGTCAACCATTGAACATCACAACCTAATGGAAGGGGGGAAAAGATATA -AGAACCTTGGGATGAAAGGTATTCATTAGGTGATTTGTTCAAGGTCAGTCAAGACAGACA -TGAAGCGAATTCGCACAGTATATGGTGAGCAAAAAAAGTACTAGATGCTATGCAATAAGT -ACGGCAAGAGTGAAGAAAATAAACATGATACAACAGTGATATATAGGTCAAATCAGCAAA -ATATTTATAGCGTCACGAGGACAACAAGGGCTACCAGGACTCCAAAGAGCGATCCACGAA -GGACACGCTCATTCTGGCCGGCCGCACCGCTGTCCTGAGTGGTGGTTCCCTGGGAGAAGC -CCGAGGAAGTGCTAGTCTCCTCGCCGTTGGTGGTGGAAGTgccggagccggagccggagc -tggagccggagcTGGATTCATTAGAATCATTGTTGGTCATACCACCGGAGCCACCCTTGC -TGGTGGGGATGCTGTTGGACACGGAAGTGGATGATGAGCCCTTGGAGGTGGAAGAGCCTG -ATTTATCAGCACCGAGATCCATATCGAGACCAGTGGCGCCAAGGGAAGCAAGAACGGTGG -AGTTGTTGGTGATGGCGATGTCGCCCTCCATGGCAGCGGAGTTAGTGTAGACATAGGCCT -TGGAACCCTTCTTGGAGGCACCGGAAGGAGGATCATAGCACTTCACGGTGACCTCACCAA -CGGTGGCGTAGTCGTATCCGACATTCTTGATATCCTCGCTGTTCCAGTCAATCACACCAC -CAGCCCAGTCGATGGTACCCTGGGCGTTGCTGGGCTGGCCGGCGGGCCAGAGCGACATCT -GAAGACGGGCGGGTGTCTGAGGGAACTCGTACTGCTTGGAAGTCTCGTTGTAGGTATCGG -CCTTCTTCAGGGTGCGCGCCACAGTGCCATCGACGATCCAGTCAACCTTCTCGGGAGTCC -AGTCGATCTCGTAGGTGTGCCAGTCGTTGAAGGTGTCCTCGCCTTTGACGCTGATATTGC -CGCTGTTGTGCCCTAGTTTCTCAAGTCAGTATTGATGATTCGGACTGGATCTTTTTGATT -CTTGACTTACAGTCCAGGACACCCTGCCAGTAGTAGTTGGTTTGCACGGCGGTCAAATCA -GCACCAACCCACTCGTAGTCGATCTCATCCTTGACATCGGACAGAAGAATGAAAGCAGTC -ACGACACCCTTGCCGCGGGAACTCTTGAATTTGCCGGAAACCTTGCCGTACCAGATGTAG -TGGTTGTTGGCGAAGAGAGTACCGACACTCTTGGCGGGCATGGTCATGATCAGGTTGCCA -TCCTCGACCTTGACCTCCTTTCCGCTGTAGGTCCAGTCGGCAGCGCTGGAGTTACCGAGA -TACTTGTCCTGGGTGACGGAATTGTCGAGGTTGTCCCAGTTGTAAGTCTTGCTCTGACAG -ACGGGCATAGGGGCGCAGGAATCGAGAGAGAAGGAGGCGAGAGGGTCGCAGCCGCCGAGA -CAGAAGGCGCCAGTACCGCACTCACCATATTCTGTATTGAATGAGCCATTAGCTACCGAT -TCCGATCTGAAGGGCGGTCTCACTCACGAGAGCAGCAGGGGTATTTCTCATCCTTGCACT -GCTTGCTGGTGCTGCAGGATTGTGCGGCCAAGGCGCTGACTGACAGCGAGGCCAGGACGA -ACGGTGTCACAAAGCGAACCATTATGTACAGTTGGTATGTACAGCGGAAATGACAGGAAG -AATTTGAATTCAAGCCACAGGTAAACGAGGGAAAGTGTGGGGAAGCAAAAAAAGGGGTTA -GGTGAAATATGTATCAAAACGACAGGCAGCAAGTGAAAAGGATATTCCACTGCGAGGATA -AAAATTAGAAAAGAAGGTCGATCGGAGAAAGAGGGGGGACGAATAGTTATTAATACTGAT -ATTCATCTCATACTGAAAAATATGAAACCATGAAAACCATGAAAACCTGAAAACATCAAA -ACGTAAAAACATAAAAACATAAATCAACAGACACTAACCGGATTAGGATGGACTCCGCTG -ACCCTAGAAGGGAGGGTCCACGGGTCGGAATTAAAAAGGGTCGGTTCGCACCTAAGGGGG -CCTCTGATTGGCTGGTCTTTCCTTTTGATTCTTATTTTATTTATTTTCTGTTTTCTTCCC -TCTGGAGAAGTTTGGGGCATTTCAGTCTTGGGTATAGTATTAGATGGTATTCAATGGTAT -TGGAGACTTTTGGTAAGCAATCTACCAAGGTTAGGGGGTTGATATGTCTAATAAGTCCAA -GGCTTCTCACAAAGGTGGTATGACAAAGAGTCTCAAGGTAATTTGAAAACCAATCTTAAT -TTTGACCCAAATTAACCTATCTAGCCAATGATTCTCAAATACCCATGCGTGACCGTTAAT -TGAGACATCCCCAGGTGTTTGTCGAAAATGGATGTATGGATACAATACTAGCGTGAGCTT -GGGTACTTTCCGCTTTCTCTTTCCCTTGATCGGCGCATTTAACAAGTGATCTGCAGTTTA -GAAATGTATCCTCCGTATATGTACTCCGTAGTAACGTACAATTCCGTTACAATTAAATAT -CCACTACTCCACGCGGAGTATACAATGTTGATATATTGCAATTTTGTACAGGTACGCTTG -GATACCCCTATAAGGAGTAGGGAGTGACTCGGACTTCCGATGTCCGCTGGGCGTCTTAAG -CCGAACCAATTAGGTTTAGCGAATTCCATCTAGTTCACGGCCAAAAATAATGTGGCCATT -AAGACAAAGAGTAGGTATCAGCCCCAAAATTTGATAATCTTATTTATGCGAAGCCTCACA -TTAGGCTGCAAGCTTCCAAGACCTCGCTACACAATACTCTCTCTCGGCGCACTCGGCACC -AAGGACCCGTATTCCGCACTAGGAAGCTCACTTTCGCCTCCAAGACTTGACCTGCGAAAT -CTATCCTTATTAATATGGCGATCCAAGACTAGCTAACCTTATATGCCTCGGTTGTCAGTT -GCCAAGCACCAGTGGACCAAGCACTGCACAGGAACGCAGATCGCTAGTGTCGGGCACGGG -CGTAAAAGGGAGAGATCGTGGTCCTGTCTGATTTTCGTTCTCGGTTATGCGTGGCTGAGT -GCGGTAAAAGGGGGGAGAACTGATAACCGATAAGGATATCCCTGTGAACAGGTCCTTTTT -GCACCCCCTTGCCACCGATTGTTTTCAGAAGGTATTTTTTCTAGAATGAGTTGTGACAAT -CTCCAATCTATATATGGAGTACAATCTATCAAGTAGATCCAATCCGTACCTCGAATGTTG -CTATGGACGCTTTGGCGGCCAGGAATGTGTCACGAGGGGATGCAAGGGGATTTGATTATT -GGTTTGTTTTCGTTGTCAAACAAGGTCTGATCAGACCGCATTGTATGATGCCATTATATG -ATGCAAGAGCAGATCTGGTTTGGTTTAAAACATGCGAAGACACGTTTTCTCCATGTTGAA -ATTGTGCGTCAAATTCCATGAGCTAATGTGTAAGATTGCCATTTCTGGCTTAGTGCCCCT -ATGTGCTCATATAGATCTAAGGTCATGGGTGTAGTGTTTTCTATCACCTGCCATTGAACA -GCAAAATTGTCAGAAAATAACCAGGAAGAGTAATTCAATATACATTGCCTCTCTCTACCA -CGCATGTATCGAAGGCCAATCCTGTTTTGATTTCTAATCGGTCGCCATCTAGGCAGAGGA -ACATAACTCCAGCCGGCTTTGAGCGATCTGCAAGCACGGATGTCTGTTGATGTACTTGAC -GGGGCCTTCCACATCCCTAGTTGTTTTGATGGGTTATGTTAAGTTTCATCCTCTAGTCGT -GGGACTAGGCTGAGGTCTTGATGGATGTTGACTGTGATTGATATCTTCAAGCTGGTGAAT -GCCAATGGGAGAGGTTGCCGAGCAGTTTGGTCAAACCAGCCAACAGCAGACTGACCCCAA -GATACTCGGGTGAAGGGAGAAGCCGCGAATGGCGCACCAAATGATGCAGCACGTCCGGGT -CCATGTTGTTGTAAAGTTGTGATCTACCAATTGGTGGCTCTCACGGATTAGACATCGAGC -CGTGGCGAATTCCTTCAGAATGTGCTGGATCTAATGGGTCACTACTACATTTCAGTACGG -CAAAATGCTGGAGTATAGGTAGAATTTGATGTTGAGGGTATATCTTTTGATTGCTGAGCA -CCCAGGCGAAGAGAAGCGCTGTGCATAATAGATGGCTGAGAAGTGAGGGGTATCAGACGT -GAAATAGTTAGACAAGGAATTCACATTGCAGAGACGCAGTTGGAACTATCAATAGGAGGC -CAGGAATACAGGATTTGAGCATTAAACGTAACACAACATGTAATATGACTTGTAGACTAG -TTTTTGGAGCAAATTTCATAATATTGAAATCCGTGAACCTCTAAGATCGTAGCGTTCAGG -ACCTCAACGTATACAAGGTAGTGTTTATAAGAAGAAAGACCTAGATAAGGCCGAGAAGAA -GCTTATAGTAAGAGTCTATAGGATCAGATCTAACTCAAGTCTACACTGGCAATGGCTCTA -TAGAGCTGAGGTATAAAACAGGCCTTCTTTAGATCTTCCCTCGACTCTCTCATCAACCAT -TGCCTTAAATTGCATGTGTCCGGTTCTCAACAATATGCTTCAACGACTGCATGGACTATT -TGAAATTCCTCGTTCTTTATTTCATTCTTCACCTTGTCACCACCAGCTTGCACGTTTCGC -TGCATTTTCTCGGCACTGAGTACCCAACACCCAAGATCTACATATCTCCATACTTTCATT -GCACCTTTACTTCAGCATAACATCATAACTTACTCAGAAAATGGCTGCAAGCACTTTGAA -CAAGATGCGCAAGGCTCAGTCCCTCATTCGCGAATGCCATAGCTGGGTAAGTCATAGCTA -TTTCTCTCAGAGTTACTTCAATATTTGTGCCAACTTTCTCGATAAACAGTTCGTACAAAA -TGACGAGATCATGCTGGCTGTTGAGTTGTGCAAAATAGAAGGCAGCCTCGCACTTTTTTT -TGAGAACAAATGTTTCGCCCATTCTCCCTGTCCTGGACTTCAAGATGAGGAAATTTCCCG -CATTGTGAGTACTGAGTGCTCTATAAGAAATGATTATGTCTTGAATAATAACATTATATC -ACCCACAGCCTCCATCTGATAGCCGTAGGCGTGGAAACCACCGTGTGCCCTACAGTGGCA -GCCGCAGACCAGATCCAACTATCAGGAGCAAGACTGAAGTCCCTGCAAGCCGCGCTAACG -GCGGACAAAAAGCCCCGGCTCAGGCAACAGCAAGTAAACCTAGAGGCATAGTGAGGAATT -CAACCTCCCCTACTGCCGGCCTCAAGACCCCTGAAGCTCGAGCTACCGTGACCAGATCAT -CGCCGCCAACGGGCGTGCTCTTGGATTTTGAAAACGACGTGGAAGTTGTCAAACTGCCGC -CAAACATGGTAGCGGACATGGAGCCCGCAAATACAACCAATCTCGGACCGAAGGAAAAGC -AGCACTTGCCAGAGTGCCCCGCTGCTGTAGATATCAATGAGCCAGCTTTGGGGGGCTTGA -TCCGGGTGGACCGTGATCATAGGCCCCTCCACCAGACCGTTGCTACTGCGGCTGGTGTGA -CTTCAGCAGGTCTGGATCCTCCTATCGGTAGTTATTGTCCTCCCCGGTCGGTCGAAGAAG -GTGTTGTACCAGTCTCATCCCAGCCACAGTTGCCAGATTCGCCCGCGTGCGACTCCCATG -AGGGGGGGTCACCATGTGGTCAAGCATACCAGCCGCATGGAATTTCCGAGAACCAAAAAG -GAGATTCCAGTGAGGAGTACTCTGTGATGATTAGCGCAGGGGGCTTCACCGAGGGCAGGT -TGGTTGTTCTTGACGACGATGTGGCTGTCATTCTTCCAAGTGAAACCGACACAGATACCA -ACAGCACTACTCATCCAAAAGAAAAGGATGTTCTCGCGGGTGGCCTTGACCCCCGGACTG -GTCATAATCCTCAACCTGAGACTTTGGAAGATAGAGAAATCATTTTTCAAGAAAACGTAG -CAAATGGCATCCATCCAGATTCTAAGGCTTTCTTCGTCAGTTCCGATGATGTCTCTCCAA -CTTCCCGCCGTGCGATTGATCTTAGAGCAATTCTCGACGAATATTCCGAAGTGATTATTT -GCCAATGACCATTTCTTAGCTCTTTTCGTTTGTTTTGCATTCTACGTAGGTCTCTGCACA -TTGCTTCGTTCTATTTTGTTTTTCCTGTCATTTATCTGAATTATCTAAAGGCGAAGAAAC -AGCGTGGTCAGGATGAGATTTGTTCTTCTTCTTTCCAACCAGAGCTGTTTTCTTCTTGCC -ATTGTCTATTGATTTTAACTTCTGCATATCCTGACCCGAGCAACCAAATATTAAGGTGCT -TGTGAAAAGAAAATTCATGATTTCTGTGGCTGGAATGCACCGACCCTTAAGTCTTTCTTA -ATAGGTATTCGTCTTGCGTATTTCCTGGGGTTTTTTTATCCGCATCGTGTATTTCTTGCT -AAGGGATTCAGGTCTTGTTCATGTGATTTCTCTAGTAAATGGAGGTTAGATCATACATAG -CTGATATGATATGTGTACGCTACTACATATGCGAGACGAGGAGGAGCTAAGATTCCATTG -CACCCAGATAAGTTAAGAATCTCGTAAGAGTACGCACGATAACTCATTAACCAAAATAGC -AATTGTGGGATTCAAGCAATACTGCGAATATATCGAAGGTCGGTTTCTATTTTGCAACAC -AAATCAATTGTTCCAAAATTAGATTCAAGAAATCCGACACGAAAAAGTGCAATGGGCTAC -CCGAGTGTATCAAGGAGCCGGAGCCCTGAGCTTTGCCATTATAGGGCGACTAAAGAGACC -AGGATAAGGATGGTTCAAACTGACGATTGCGCAAAGTATGAGGAAAGGGCTGGGCTTTTG -GAAGGGGCATGTTCTGTTTAGGCTCGGATGATGCACCGTCTTGATCACTTAACCGATGGA -TCGCACGAGGCACTATATATAACATCACCAAGTAAATATCATGAAAAGTATATGAACGAC -CAGCGAGAAGATCGAGAGCCGACTACCCGAAACAACATAAGACGGACAAAGAGAAAAGAA -ATCCAGGCCTTGTGAGATACATAGAAGCCAAAGACCGGGTATAGACAGTATGCTGAGTAT -GCTGGATATCAAACTAGCCGCCATTGTAACTACCTATGACGTTGGTGTCTGCTGTACTTC -AAACTAATGTATGATTTAATGGCAATAAGTTATCATATAACTATAACGATGCATGTGGTT -CTCAGCAATGAGTTCAGATTTCCCCACGCTGAAAAGGTCGATTGAGAATACTGTATGGTC -ACACCAAGGAAGATGGAGACCTAAGTATAGATAGGAAGATCACGATGGCGAGGAGAAAGG -TATATCAGAGCAAAATACAATGAATTATTTGGCCCTTTGTCTTTATCTCAATGAATGTCT -AAGACCGGTGGAGACAAAGCCCTGCACAAGATGTTAAGAAAGAAAAGGGACCACAAAGCA -GCCAAAAAGACGCCACAAAGGTCATTGTTCGTGTATAAATTGAAGTTTCTAAAGCTGGGA -GAGACAGGTTGCGTCCATGCTGACATTCTAAATCTTGATCATGTCAGACGTGATCTCGTG -AGTCATGACAGTCAGTGTCGGCAGACAGGATGGTATAGTCGACTGCCCAGTCCGTAATCA -GGCCTAGATACGAGTTATACAGACTCTCTTTAGTTCCTTTGTCACTGAAGATGGCGTGCA -CATTAATCATTAGCAATTCATTGGCGTAAACGCCCGCGCTGTCAATGACCAATGGTTATC -CGACAGAATTATCCCACAGTTGGGACTTGCAGTATGAGTGATGGGCTTCAGTCAGCGGTT -GGTGATGGGAGATACGTTAGCTTGTGGCGGATTGGGTCGGCAGGAAAGACTCACGCCTGG -CACGCCGGATCGGACAGGGAGATAAAATACCGTCAACACTGCCACCTCTATTATTATTGG -AAATCCCCATCACATGCGATATATCAATTCAAGTTTGTTGCTTAGGTATTCATGGACATC -TGGGCACTAATGGGCTTTCAGCGAAACTTATCCGAAAAACTATATGGACCTATCGGCTCG -TTTGGGGTGTTCAGTAGATCTAAATAATATTACATACCATCAACAAATTTTCTCATAAAT -ATTCCTTTCAAATTAGGTCCAGGTATTGAAAAATTGTTCTACAGAGTTATTGAGGAGAAG -ATCGTCCAGTGACTGCCAGCCCTCCATAAACCCAAGCATATCCTGGGGCAGGTCGGCCCA -CTCTACAGAGCCGACATCCTGAAGCGCAAATTCCGAGTTGATAACCTGAGTACTGTCGGC -TGAACTCTCAAAGTGGAGATTGCCGCTAGGTGTGTCATCTTTCTTACCCCCATCAAGCCG -GTCCTGTATCTGAGTAATCCGCTTTCGAAACTCGATAAAGACATCCCGATCGCCATTGTG -ATCGACGCTGGGCGTTGACAAGCCTTCCACCCGCTGCCGAAGCAATTCTAGGGTCCGAAG -ATACGCCTGTGTTCTCTCTGGGCTTGAAGCTGCAACCGTATAGCGGTACTCAACGAGCAT -GGCGAATCCCAGTTGTATCCATTGCGTGTTGGTGAAGCCTAACTCTAAAGCGGGTGGAAG -CGAGATGTAAAGACTGATAATTGATGCTGCTGCATTGACAGCTGCGTGTGACAATTCCTC -ACGCCAGGTTTGGGAGGGTGGAGCTATTAATGCTGAGGGAGGTTGTTGATCTGTCATGCT -GAGAGCTATTTGGTAAAGACAAAGACCTGTAGAATGGAAGTGAAATGCAAGAAATGCTGG -AAGGTCATCAGTCGGGCATCCTGAATAATAATTTGATGGAATGCGAATCACCCACGAGAG -TCATAGATATTGAAATTTAATCGTTTATGAAAGGTTGATAGGTCGTCTTTTACGCCTGCC -ACGTAGAGCTCTATACCCGACCCTGGTCGCAACAGTTCGTCCCCATGATCAGAGGGCAGA -CGATCTATTTTCTCGGCAACATGCTGTAGCTGCACAATGTGCATTATGTATCGATCGTAA -GAACACTCGCCTGCATTGTGGAGAGACTGGCAGCAGTCTTCGATATATGGAAAATAGGGG -AAGGTAGAGGGTTTTTGAAGGAAGGTCGCGACCCTAACATTCCGTCAGTATCCTATAGGT -TATAGGCCAAGACCATATTAACTGACGATGACGATAGGTAGTAACAGCCTATGACAGCTC -TCTTTTCATCATTGCTCCAAGATGCCCGCTCAAAATCATCTTGATTTCCAGGCTCATCGG -CTCGAAACTTGAGACCTGTTTTCCATGTTTTGGTATGTGGTGCTCGGTCCAGCCGTAGCT -CGATAATAAGACTTATCGCCAGTTGAAGATACTGCGTGTAGCGATGTGGCTTTGTATGGT -AATGCGACCTATCATACTGTCAGGTAAATATGATATATAATTCTCGGGTTGGTGGAAATG -CGCATTTACCATGCCAAGAACACCAACAGCCCTTGAAGCAAGTCAAAAGAGACCTCTCCA -TTCAGTATCATGCGAGAAGAAACAGCTTTCTTGACTTCGGCCCCGAGAGATCGCTGCAGC -GGCATATTTTCGTAAGATGCAGTAGCAATGATAGCGAGGAACAGGAACGGCCTCTCCTGG -CGCAGGTGGTTGACGGATACCTCTGGCGGAATGATCACGAAAGGGAAATGGGGAGTCATC -ACGGTCCTAAATACCTCCAAATAAGTCTGTGCCGTCTCTGCGGTGAGATATTCTCGATCG -ACAACGTCTTTCCTGTCATTCTCGTCGTCGGCAATAGAAACACTTCGCGAACTGGCCTCC -CGCTCCGATGGGACTCGGGAAGTATCTGCCGACACGGCGCGGTCATTTAATAGCTCTTTG -ACCCTCGCCTCAAGGGCCTCAATCTTCCTACAAGACAGTAAATTATATGCCCACTTGCAA -CTATTTCCGGGAATATGCATACTGGTCTTTTTGAAATCCATGGAACCGGCGTCCGGTTTG -GCGATAGAGACACGCCTGACCGAGGCGAAGGCATCTATAGATCGATCAGCCCTACGTTTA -TCCCACAATGTCTTCACTTCGGAAGGCCCTACCGATTACACACATGTGGATTTTCCGCAT -TACGAATGCAGCGAACTTTTGAGCTTGCGCAACTCTGACAGGTCCTTGACAGGATGACCA -TGATGAATACTGGTTATTGGTAACTTTGAGCAGGTGCCATTCGATCCCTGGTCCGATGTG -ATGGCGAGAAGATTCATAAGGGCGTTTCACACCCTTGATACCAAAAGGCAGCCGCCTGAC -AAAAGCCTTGAGGTATCGAAACGGGGCTGTGCAAAACAAGCTTACTGTGTCGATCGCTGA -GTGGACGAAATAACACGCTTGTCCTGACATAATCCACTGCCGAGCCGCCGGCTGGATACT -AGATCGCGATCACGCCCCAACGCGTCAGTTTCCTTGAATTACACATCATGATGGACAATA -TTCTATTAGCAGAGCGTGGTAAAGTAATACCTCAAATAGTAGTAGGGGCGAGAAGGAGTA -GATGCCAGTTAGCTAGTGGATTGCCCTAGGAAGGAAAGAGTGTCAAAATTGGTTGGTTTG -ATATGTGAAACTTGAAAAAAGTGGATGTGGACTATGTACAATTTAGGTGTGTTATAGGTG -GGAATTTAACTGTAGAAGGAAGTTTGACTACAGCATGAAGGCCAAGCGCCTGGTAACCAG -AAGGACGATAAAATGCTGTCTCGCCACAGGAATAATACAATGTTGTAGATCTGTTGTCGA -TCGGTTTCAGCTCCAAGAACAAGGTGGGTCTACGAAGCAGTACGGAGTACGGAGTATACT -ATCTACCAGGGTTCGTCCTCTATTTTTAGGGGGTTGATGCTATCTAAGCTCAGATCAAGG -GTACAGCCCAAGAATCAACAGAATCAACTTCGTGCTCAACCCTAAATCATGCAGGGACCA -GCCCCTAACGCCAGGATTGACTCAACCTTGACGGCCTCTGATTTATTCTTGCGCTACGAC -TGAGACGGTGGGAGCGGAATAAGCAATACGGAGTACATAATTGCTTCTCTGCAGTAGTCT -AAAAGAGTCCGTCTCGGACACTGATGCTCTTTTTCATACCAATGCGTAAAGATCATCCGT -ACAAGATACGGAGTACGCTTGCTTCTGCGACTCAATTTCAGATTCCGTGCACGCCGACGC -CCATGCATAGCTTACGGAGTACTCCGTACAGAGTATGTCTGTAGAGCAACATTTAGCCCT -AGTGGGTCGATTCTCAGCTTCGTGCCACTGCGCGCACGGACCCGGACGAGGCTGAACTCC -AGGCTGTAGTTGTTCGGTAAATAACTACGTTAGATTGAATGTATAGAGAATGGTCGAGGT -TTGAAGTGCGAAATTAAGACCAAACCCTAGTCTACCACCTGGGAGCCGGTCCTGTTCCCT -GGATTTGGGTCGTTTATTGGCTAGAAGCGCGGGCAAAATATGATGCCAGTAGGGATGTTG -ACAATCAGCCTGGCACTTCGAACCAAGTATGACCGAACCAATGGGGACAAAAATCAACCT -ATTTACGACTGTTTAACCAGGGATAGCAACCCCCAAAGACTTCCAAATGGCAAAGTGGTA -CCGGACTGAGATTTGGAGCCGGCTATCGATGCAATTTAGCGAGAGAAGAGGGTGACATGG -TGACAATATCAATCAATGATTACTACAAGACTTACAGGGCAGTCAAAGCATCAGGATCTG -ATGAAGTCGAAGGACAATAGTCATGCGGCTAATATCGAGGAGATCCAGAGCGCGAGATTA -AGGATGCTGGGAGGTTTATGCCACAACAAGTAAGGGGGTTACAGCCTGCAATATGTGTAT -CTGTGTATCTTAATAGACTTGAAAATAATATTCAAAGGTTAGAAGGGGCATTTTTACCTC -TTATCTTACTGTATAAGCAACCTTCAATCGTCGGATCGGAGATAATTCGACCATAGTTAT -ATACTCCATGCAGAAAATAAGAATGCGTAGATGTACTATATAGATAGAGGCTTGATTATT -TCAGAACGCTCTGCCTTTAGGCATAGATAATGAATACAGAATACACCTGAGATGAGAGAT -ATCAGAAAGATGAATGTGTTGTTACTACAAGAGTGGATGATCTACTCCGTACAGGGGATA -AATGTAAAGCCCCACACCAAAACCTCTGGTGGCGACCTGAAATTGGACTATTTAGGCATA -AGCACGCCTTTCAAAGATTCCTATGGCTCCACTCCTATAGAGACCCTGGTTTGAAATATT -CGACCCCTAGCCCCTAATACTATGTAGCTCCGATCCGAGATGCTGCTGTTTGGGGATATT -AAGTCTCAAGTTTTCAAGCCAAGTTTTAATAGCCCAGATAACGAATAACAACAGTATTGC -TCACGGGCATCATGACGACTATGACCGCTATAGCCCACTCATCTAGTATGTAACTCCTCA -GCAGCACCAATCTTCCCGCGAATAGCCGAGTCATCCAAGGGCCTTGTAACAACCATTGAC -GGTATTTTTAATCGTTAGTCGCAACACAAATTCATCACACTCCTCGCTCCAGCCCAGACA -CTCCAACAGACAAAACCTACCACCTATCAGGCGGCGCAATCACTGGAATAGTAGTCGTAT -CTTTTGCAGCCGTCGCAATTGTGGTGCTCATCATCTTTCGCAAGAGAATCGTGGGTTGCT -GCGGCCGTGATAAGACCCCAGTGATTGATAGCGAACACGCCTTGTCACCGCCCGCATATA -TGGCCCCGCAGGCGCAGTCTATACTACGAAATGGTGTTTTAGCGTGGCAGGCTTCGGAGA -TGTCATCAGAGCCAAATGCTATACATGAGATTGGGTCCATGGACGCCAAGTCAAAGGTGA -ATGGAGGGGTTATCCATGAGCTTAGTTGAGACTCTGGGGCTGGGGATGGCTGTCGACATG -TATGGGGCAATATATGATGGAGATAACGATGATGATGAATGCATTTGCCCGATGTAGCTT -GCTTCAACTATGTACTGCCATGGGGGTATGCTGTCAGGGTTGCCGATAATTGAATAACCA -TTAATGCATCTTCTAGATAAAAAAGTCAAAGTAAATCGTAAAAAGGTATCGCTTGAGCTT -TTGTTGATTATTCGGGTGCAGGTGGAGTAATAGACGCCATCCCAGTGATCACCAATCACC -ACTCCTGCCAGGTCTATAGATGGCCTATTACTTCAGAGTCCATTGTATGTGCTTCTGCTA -CTACTCAACCCATGGATTATCGATTCATATCGTCAACCAGTGAACCGGCTAATTGCCCAA -AACTACCAGGACAAAATAGGAGAACAATAGGTAAGGTGAATAGCAGGAGAGAGCAAATTT -TATTTCAGTCGGGAATGTTGCCATTTTCAATACAGAAATCATCATATAATGATCGATGCG -AAAACAGCACATGGGACCTTTTTAAGTATACCTCAATTTGCCCTCGGCTCTATCAAAAAG -TGTTTTATCTGCCACTTCAATTTCAACCATTCAGTACTTCGGCCTTGCCCCCTTCCAAGA -ATCGCTTTTGCATGTTTGAAATTTTTTGTCGGTCGATCAATCAATTCTTGTCTCTGCTTT -TCCTTTGATTCTTTGTTTGAGCTGCAAGAGCACTTTTCGCAATCATGGCATTGATTTCTG -GTGAATCATGATTTGGATTGTGGGGGTCTCTGGATCGTTACTCGAAGAAATACCGAATTT -TCAAAAATTGGTAGCAGATGCAAAGTCCTCCCATTCGTCACAGACTTTTACCTTTGCCTG -GCAATCATCTCTGATCGCCTTTATTGCATCGGCTCCTAATGGAAGCCTAGAGGGAAGTTC -CTTGCCTGCACATCGGCCCTGACCTCTGACAGCGTCCACTATCAAAGTAGCCGCTTTCAG -AGGATCTCCGGGCTGGTTCCCGTCCGCGGCATTACAACCTGCCTTAATCTGCGAATTCAT -CTCCACATATTCCGGTAATGGGTTCGGTGCTCGGTACAGAATATTTCCTGGTGTCATAGC -ACTGGTACGAAAGTAACCAGGCGTGATCATGCACGTTCGGAGACCGAATGGTTCGATTTC -CAGGCTGAGGTTAGGCACTAGCCCTTCAAGTAAGCCCTTCGAACCTGCGTAGGCGCTGGC -GCCGGGTGCACCATAGTATGCACCCACGGATGACATGAAAAGAATAGTTCCAGACCGCCT -CTCCCTCATGTATGGAAGGAAGGCACGTGTCAGATTGAGAGGACCAATGGCGTTGTTGCG -GAGGGCCTGGGTCAGGAATGCTTCACTGGAGCTGATGTTAGGTTTTGTACATCCATTCTA -CACGTTCTGTTGATGGCTCACTCGATCTCCTCGAATACACCCGCATCAATGTAGGCAGCA -TTGTTCACAAGGACGTCAATTTTCCCATATATTGCCCATGCTTCTTTGACTTTAGCATTG -AGCACTTCCGCTGACGCTGTAACATCTACTTCCATCACAGCGGCTCCGGCCTCCTTGAGT -GGAGCTATGCGATCAAGAGCATTTGTGCTTCCTGGTCGAGCTGTTGCAATCACTTGATCT -CCCTGGTTGTGATTGATTAGCAAAATCGGATTGACATGCAAGTAGAACTCAGTATCGTAC -CGCCGTAAGAATCGCCCGGACCAGACATTCTCCCATGCCAGAGGAGCAGCCCGTCACAAA -CCAGGTGAGCTTTCCAGTCATGTTGCTATGTTGTCTAGTTCCTACGTACTCTAGAAATGA -TAAAATTACCGAGAACTAGCAGAGCTGATAGCCATGCTCGATTATTTTGAGGCTACTCCT -TACATTGGGTGCCCTCATGCTAGGCTTGATAAGTATGCGACAGAGTCAAAGCGGATCCAT -TTTGGGCACACGCGCTTGCAGATCATTCACAGGAAGCCCACAAGTCCGCGACTAGGTAAT -CAGCTCCTCCACTAACTTGGCTTTCCACTCTTCCAAAAGCCCCTCGACAAATTCACGCTC -ATAAAAGCCGTCATTGAAATGACAAGCGAGATGTAGTTCATCTTCACGCGCCCATAGATA -TGTTTGCAAAATCCGGTTGATAATTTGAACACCTAGCCACCAGTCATCAATTTCCACCGT -AGTTGCGGGGCCTGCATGCACTGTTGGCAGACGGTCGTCTATAACACCAAGTGAGCTGAG -CTCGGGATGCGCGGCACCCGGTGCTTGGACTGCCAATTCTAGCGGTGCTGCCAGCAGTGA -ACTGATGCCCTGTACGCAACATGACATAATTTGGAGAAGTGGTTCTAATTCGCGTTGGTA -GTGGGAAGTCAAAGTTGCAGCAATGGCATCGTAATCCTTGTTGGCACCTAAGTCGATGCT -GCAGGGCCGGCCTGTATGATACAGGCCAGTCGCTCCCTGAGGCCCACTCCATGGCGCCGA -AACCCGATTGCGAACATCATATGTGTTAAAACAGACTAGACGACCGTCCACTGGAATTAG -ATGGGGTCGCACAGCAAGGATAAGGGCAGATTGGACAGCTGCTGTCACCTTCAGACCGCG -GGGCTTACTAGCCGCGATGAGTTGTTGACTGAGGTCTCTGGAGAGACGAGTACTTAAGCG -ACGGGAGTTTCTTGCTGTTGTATTCTGCAAAGTCTAGATGATAGAAGAGGGCGCTGCCCC -ACTGGCAATGACGGCCAACTCAGCTTCGGTGGCCACTTTCATTTCGTCCGTAATGTCAAG -TGATACACCAGCCGCTTCGTCCAAGCTTGGGGATATGTGTGACACTTCAGAGCCATCAAA -TTCCAGGTTAAATCGGGGACCTTGGGCTAGTAGGGTCAAAAAGTCGTGCTGTAGTAGTAT -CATGCCCGTTCCATCGGAGCGCCAATGCGGTGTACGGAAAAACAACTCCCTTGAGCGTGG -TAGATAATGGAGCACGAATAAGGAGGATGGGGGGAAGAATTGCATCAAGTCTTTCGGCGG -AGCGAGTCTCACTGGAGTGCACAACAAACGTCTTTCGTAGCCATTCTTCAAGGGTCTCCG -GGGACGGAACCGTATAGGTCAACCGAGAAGTTGGTTCGTCAGAAATAGCTGCAATCTGCG -GGTGTTGGTACCGTAGTACCTTCCACGCTTGTTGTACTTGTGTCACAGAAGGCACATTTT -TAAGCTTAATGACACTGGATATATGATACTGTTCCTTCTGGAAACGCGTGCCGGTTGCTG -CAATGCCGCGATATAAGAGCTCGAGCGTGTCGAAGGGACGTTGATACGGGCCAGTTTTGA -CCTGCTTTAATGGCATTTCATTAAGAGTGGAAGAGAATGAATACGATGTAAGACTTTCTC -AAGGAAAGGGAACAAAAGAATGTACAGAGTCGAAGAGAGGACATCTAGAAGTCCCTCATA -GGAAGGGGATCTCATCGGTCGCTCACGTTTCTCTCCACGTAACAGCCTCGCCAACAAGCA -GGCGAGTCATAGTAAGCCATAATTCACGCAGTCCTATAGCGCCCAGGGCTCCCCCCTTTT -TGGGGGGCTTATTGATGCATCAATGCTTTATTTAGTACCTCTCTACACCGCTCTATACAA -TTTACTGTAGATCTATTCATATATCAGCGTACTTATGTGATGCGTTTGATTTCTATACTA -ATCTTCGCATATTTCTGTCTTTTGTGCGGCTTGTGGGAGAAGGTTGATGGCGCCGGGGGG -TGGAATGTGCAGCAGCCAAATGGATGTCAAAGAAAATGCAGTAAGTTTAACTGACGCAGC -TAGAGCTGTTTCAGTGCTTACAGAAGTAATGAAATACCATTTTGCATTTTCAAATTCACT -TGATATATCCAAAATTGAGTTATCCGACGGCTCGGTCATAGGTACTCAACTGGAGAATGG -CCAGCGAGTCTATTCACCACTACCTACGAGTGCTATAATGCTTGGCTCCAAAATAATTGA -GAAGAGGTTCCAGGCGGAACAGATAGAGACACATGAAGCGCTCATTTTCAGATTCAGAAG -TCCATTCCCAGACCATTGGTAGATCCAGAGGACCAGCCCGGGATCAACAGTTCTATAGGA -GTTGAATGATCAGAGAAGCAGTGTGAAAAAGGCTGGTAAATGCTAAAACCGGTACAGATG -GCGTCTTATATGGGCTCGTTTCTCAGAGATTCGCATCTCCAACGGGACTTGATTGATAAT -TCTGGGAGTGAAAGGCGGACAAAGTATACGAAGCTGCCTTCTATTGAATTAAAGCCATTC -TCACTGTTATTAGTCGCTTATATTCCTTAAATATGATATTCATTAATCACATGAATTTCT -ACATTTAATTTTACATACAGAAAAAGATAGCACCCTATAATACAGCAAATGTGAAATAAG -ATAGATGCGCAGCATTATGTCGTAAGAAAAGATAGATAGCCCAAGAACATTGAGTAATAA -GGAAAAGCAATAAACCCCATGTACTAGATGCCTTCAGTCTTTGAACTCCATGCTTGGCCT -GCCCATGGGCTAGATCCGAAATCCCCACCAGAATGTAAAAACATAACCTGCAGCCCAAAA -GATTGTAATCCAAAAGTGAAGAAGATTGTCCAGGTAAATTCAAACGCCGAGATCAGTTAG -CGATGGCAAAGCAGGAACTAGAAAACAATGAGCCTAGAAATTAATAACAAGAAAGATTAG -GAAGTCGTACATTATGTGCCAAAAAAATGTATCATGCAACTTCGAGGATGGGTGTCACCT -CAGGCGTCGTTGTGGAGGGGGGGAAACGGTACAGAAACCGGAGTAGGAGTGTGCAACGAA -AAAGGCCAGGCTTTGTATGGGCTTGCTTGTTCACCGAGGAACCTTCTTGTTTGGGGCGAC -AGTTTTGAATTCGATTCTCCGCTGGTAGGGGACGGCACCACCGTCCTCTAGCCAGGGTCC -GAGATTTTTACATTGACTGACATCGAAAATCCTGAGTTGGTGGCAACCCTCCGCGACAGC -CTCGACGCCAACTCCAGTAACTCTAACGCAGCCACGGACCGATAAATGTTGGAGAGAAAG -TAGGTGAAGACCAATGCTCCTGAGAGAGGGATCGGAAATTGCAGAGCCGCAGAAGGCCAT -GTTGAGATACTTCAAATGTGAGCACTGCAATGCAAGCACCTCGGTTGCTGTATCTGACAA -TGCGCAGCAAAAAGTCTGGCAGACATTAGTAAAACATGATATCTCGCTTTGAGTTTGACA -GAGCTCAGCAAAGACTCGACTTACCAGATCTAACTCTTCTAGGTTTTTAGCCGCGTTGGT -TAGATGGACGATGGCGTTGTCCGTCAAATACGTGCAATCCGCCAAGCAAAGCTTTCGCAG -GTTGGTGAATTGCGCATTCCCCCAGTATTTGAACCCCTGATCTGTGATAGTGGTGCAACG -GGTCAAATCCATTTCCTCAATCCGACCAGCGGCATGAGACGCAATGTGATGCATAGATCG -ATCGGTGACATGTTTGCAGTACGAGAGGGTAAGTTTCTTCAGCTTTGGACAGCCGTACAC -AGTACATTCTGCTGTCTGGATTGTGGGTTTGATCGATGACTGTTTCTCGCCCAACTTCTG -ATCGGCAGGCACTACCCATCCAATGATCCGGGCAAGAAGTGTATCTCCAACCTTGCGGCA -ATTGCTCAAGTCTACCTCCTGCAAATTAGTGGCCTTGCTAGACATGTCGAGAATGGCAGA -GGCCGTGACATCCCACACACTCTTCATCTTCCACACGGTGGTATTAGGGGCGCAAGTCGC -AGACAATGCCGTGAAGCCTTCGTCGGTGATGTGAAAGCAATTGTTGATATTGACCGATCG -GGCTCGATTGCCGACAAATGGACAAATAATCTTAACAAGAGCGTCATCTGTAATCTTACG -GTTGTAAACGCTCAAATCTAAATCATGGACTACGTCCGCAGATTTTGTCAAGAGCTGTGA -CCAATGGGACGACACTGCCCTAAGCCGAAGAAGTTGATGGAGCTCAAGGAATTGAAAGAT -GTTCAGCAGTACACTGTCCGGCAAAAATCCATTGGAGCCATCGATCCGGGGCCGTTCGGT -CTGATACGATTTGAAGAGTTTTTCAGTTTCGGGAAGGGCGCTGGTTGAAGAACTGGCAGA -GCGTGGCTCCGTAGAAGATGGCCGCTGCTGCTCCCCTGATACCTCCTCGGGGGCTAACGG -TGCTAGCGAGCCGCGTCGACTTCGCATCCGGACTTTATCCAGACCTATGCTGAAGTAAGG -GTCTGGATCGGAGGGCCGTAAAACAGCCTGTTCTTCGTCTTCAACGCGCGAGGACTGATG -TGGTCCATCCAATTCCGTTTCAGTGAGGGTAAGTAGGGGCATCGACCGTCGCCGTGATGA -TGCGACCTTTACTCTGTCGTCGAGGGCGAGATCATTCATAGATGGTTGCGGCTCTGCATC -GTCAGACATGATAACATCACCGTCGGCTGCCCCCTGGATCCTTGCTTGGGCAGTGCGTTC -GACTTGTTCCCGTACGCCACTAGGGCATTTCTCCCAAAATTCAGCAAGAACGTCGCCGCT -GAGCATCAAGCACTCAACTGAAGTCACAGATCGAACTGTAGCGGTTCTCCGAGGTGCTAA -ATCTAGACTGACCACCTCGCCAAAGTACTGGCCTTGCTTCAGCCTAGCTTTGACCTCGAT -CCCGGGCCGTTCAATGAGCCCATTTGGACTGTTTTTGTTTTCTGATGAATGGTCCTTTTC -CGTAAGAACCTCCACCTCGCCGCGAACAATGAAGTAGAGCTCACGGCCCTGGGAATCTTG -TTTTATGATGTCGGTAAATGGAGGAAAAGAACACGGTTGGGCGTTGAGACCAAGGTAATG -AAGAATATCGGCAGGCAACCCAGAAAAAAGTGGCAGCTCCTTGAGTAAGAGACGTACGTT -GACCAATCCATTTGCCAACGCACTGGAGCCATCCCGCCGACCGGGGCTCGGTGACTTCCT -CTTCTTGTACACGGACTTGGCACTCGAGTCCCCATCATCCTCTGCAACGGCTAAATCCTT -GGAAAAGCTCTCACGCAGCCTTTTTGAGCCCCTTCGCGCCGGGCTCGGTAAATCCAGTGC -GGGCGCGGAGGTTTCTTTTTTTTTCTTTTCTAAAATCATCAATCGCTCCTGCGCCTCCTC -CCGAATCGCCCGCTCAACCTCCGGAAACCGAGGCAATATATTCCGAAAGTCCTCTTTTGT -CAGCACGACCAGCATGCATCGAGTCCGGGCGATAATGGTTGCAGTGCGAGGACGGTCCAT -GAGCAATCCAATCTCCCCGAAGAACGCACCGGGTTTAAGCTCGGCGTATATACTTTCACC -ATCCCGAGATGTGACCGAAACGGCACCCCGCACCAGCCAGTATATTGCCTTTGCCTCATC -ACCTTCCGTCAAGATGTAATCATTGGGTGCATGGAGCTGTGGGCGCAGGTGCTGTCCGAC -CTCGATCAAAAAGGATTCTGGTGTAGATTGAAACAGTGGGAAGGATCGCAACCGATCTAT -CAGTTCCAAGGGTAAGGCTTTGATGTGGGAGGAGGCCAAGGGCGAGGGACGCGCGGGTCG -GTTTGGGTTCATGACCGAATCGAACGAACGTATGAGGGATACGGTGTCGGATAAGGGCGA -GACGGCCTTTGTTGGACCACCACTGGACCTGCCATGGCGACGCATTGTGATCAAGGTATT -CTGAACGAGGTTGTGGGATATGGGATGGATTCAATTATTTCTCCCAGGCATAAAGAAGTC -AATAAATCGATGACGTCGACGAGAACAGCGTCGAACTTCGCATCGGCATCTTCGCCCCAA -TGGGTTGCTGTCGCAGAGTTTAACAGAGAGGCATGGATGGTAGGGTATAGAAAGAAGCCA -AGTGGAAAAAAGAAGGTCAGAGATGATCAGTTCGAGCTAAGGTCTTCAGTGAGGTGGGGG -GTTTTCCGAATCGACGTCAAATGCGGTTTCTCCACCGATCAGCTGGGAACCTTCGGAAAG -CCTATGGGAACCTCCACCTTAGTATTATTCTGGATATTTCGAAGTTATAGGTCTGGTAAC -ATTCGATCTTGAACACttttcttttcttttcttttttttttAAAAAGTGGGGGATATCAC -TAAGCGGTCAATCGCATCCTTTGAACCTGCAGCACCCAGGCCGATCTTCTCTATCTTTTT -GCTTAAACCCGGAAACAAAGAAAAAATGGGGAAAAATCTACGGGATTTTCATATTGAATC -ACCTGATTTACTATATTCAATAGAACTTGGTATCGTGACAATGTCTAAAGGTGACTTTAC -GTTATATTACATTACGTCCAAGTACTCTATAACCTAATAACCCCTATCTTGACTTGTTGA -ACTTCAAGTACTGTTTATAAAATCTGATAGGATTCTAGTCCAAATGCAATCTAGTCACGC -CACAATGGCGTTGCCAATCGATTTGAGGCCTGATCATACTCCAACACAAGTTCCGGCGAG -GGTCACGTGTCGCTATGTAGCAGCCTGGGGGCAAAAATGTTCATCTTGTTTGATGCTAAA -GTGGGTATCAAAAAGAAATGCAAATAATACAAGTTACCAGGAGTATGGTGTGAGCGTTGC -TGTCGAAGAACACAATTGTAGGGGAAAAGGCCGGGATGGTCACCGGACTCAATATCTCAG -TGGTTTGATGAGGACCTAGATATAGTGGTCGGGGCGGCATTGACTCGGATGCGCGGCTCA -TCCCGCCGGTGTCGAATGCCTACCACTTGGAGCAGCATATATAGCAGCCATTTCTCAATC -GAACGCCGAAGGACGTCCTAAAGAGTGAAGTGGGAGAGTCTGTAACATTGGTTCTCGGCA -TTGACGTCCTCTCAATCGTGCTGCAAGCCTGGGACCTCCGCGCACAGAACAATCTAGAGA -GCCATTGTGACATGGGCCCACCTATGATCAGATCGATCCTTCCTTCTCAAGCTTGGACCG -ACGGTTACCGAGGAAGAAATCGACATATGCAGGGCGGTTTCCTTTCAAGGTCTCAAGGAC -ATCTTGCAGAAGACCATAGGTTAAGCTCAATTTCAACCCAAGCTTCGCTGTACGGGCGGC -AGTGTAGACATCGTGACGAGCTGTGGGCGAGCTAGGTCAGCTTTAGTTCATCCGGTAATG -ATCTCCAAGCAGAAAGGTATCCTTACCTAGTAAGCTGTATATTCCGGAGAATGACAACCC -AGCAGTGACTGTCGATAGAAAGTCCCTCTTGTCATGCCGAGCATAGTCCACTGTCTCTTC -GAATAGACAGAATGCAAGAGCGCCAGCACCAAGCTTGAATCCCATCTTCATGCCATCATG -AACGCCTCCCACCACACCAACATAATTCTTTGTCTTATGATACTGGAACCACCCTGTTGA -AGTCGTGGGGAAACGATGCGCATTCTCCGCCCGAAACACAAGCCCTGCCTTTTTGCTGCC -ATGATAGTATCCTAGTGCCAGCCCAGTTGAGAATGCTGACATAGTAGCATATGGCAACCG -CTTGCCAAACTCAATGCCTAACCGTGGAGGGCTGTCGGATTCAAACAGCTTATCCCAGTG -GACGAACTTAAGCACGATGGAATTATGCTGTGAGTCGGGAGATGGGGGGTTATTGACCTC -CATGATAAAAGCTGTGAATGAAGAACGAGGGAAATAGCAACAATATTCACGGTTTAGCTG -TAGAGAGCGTGGTGAATGTGAGGAACTCTGGCAGAGCTCAGAAATTCAGAGCGCGGAAGT -CGGAGTCCCGCTGGATTTTCCGCCTTCCGTCACTCTTCCAAATCAATTATTACAAGCACG -CGACCGACCCTTTAATTCTCGCGCAATCTTCAAGATGGTATGGAAATTAGCCTTTCGAAT -TGAGTATGATCAGATACAGGCTAATTATCGAGCAGCCGCCCAAGCAGTCCAAAACTGACT -CGCGCGCCGTGACAATCCTCTTCAAGAAGCACAAAACCACTGTCCTTCTAATGCTCCAAC -CACACGAATCACTCGACTTCACCAAAGAGAAGCTTCTGGGGGCTTTGAAATCGCGAGATA -TAAAGGACTTAAATGGCGACCTTCTTCCGGACGACTCCTGTGACATTGAGTTCGGTGAGC -CTGTCGACCGAGCGGACCTTGAGAAAGGCTGGAAGCGCCTCCAGGCCGATGTAAAGGTAC -AGAATGAGTCAGTAACTATAATGGAGGCGGGTCTTCAAAATGGACAATCGATTGCCTTCC -GCTTCCACAAGTCCAGTGAGGATCAGGATGGCGGTTTTGATATGGACCTTGATAGTGAGG -ATCCAGGGTGGGATGTCGTCATGCCGACCTATGAAGACGACCAAGCATGATAGGGTCTTT -ATGGATGACTTTCAAGAAAGAATTGGGTCAATATATCCAAATCTGTGGCTTTGGGCGTTC -CAGGGGAGGTTTTAGCAATTCAAACCATCACATGGCGTTTGTTCAGGGGGAAAATGCTAC -TAGGACATCAACACCGATATGATGTTTTACACTCATACTTTACAATAACTCTATATACAC -GCCTTCTTTATACAAACAACTGGCGTTGCTAATTAGTGCAAGTACTTTTGGGGAATTGCT -GCATCGATAGTAGATGTCATGATAATACTATCGCCTCCCGTAACTTTTGCTTAAGCATCC -TGAGGATAATACTATAAAAAGCCTAATACTAGCCACAATTTGACCTTGATCTTGAGCATG -GGAAATAGTGCGTGAGAATAAAGCAAGATGGGCGTGTCAGTACTTGATACTCCCAACATG -CTGTGTGACTGGTAACTTTAAATGTGGAAGGAGTAGTATTGCCACTCTCGAATTACCAAG -CACCACCACCAAATGGTTGCTCGGAGCAACCACTAGGACAACATGCACACGCATCAACAA -ATGAAATCACTTCAAGGAGCCCCCCTCGGAGAATACTACTGCACCCCCTACCCCATACAA -CTTTAGATCCATTCTTCGACTTTTCTACATTCCTTATTTTTGACGAATCGTTGAGCTTCC -CCCTTGGCCTAGCCATTGCAGGGTGTCGCCAACAGCCTACCGACCCCGCAAAACAGCTTC -GCGATTTGGGATTGATCCGATCTGACCTTCCAACATGGCTGCCTCCGACGGCGCGTCCTC -GATCAGCGTTGCAGTCAGAGTCCGTCCTTTTACTATCAGGGAGGCAGCCCAGATCACTAG -ATGTGACGAGGGTCCCCTGTTCCTCGGCGACGGTTCCCTCGCGGGTGCGCCAACACCGAA -GCTTAACCAGAAGGGTATACGATCAATCGTTAAAGTGATTGACGACCGATGTCTGTGAGT -ATTCTATACATTTTGAATATTGGGTCCGACATCCTGACTAGACTAGAGTGTTTGACCCCC -CAGAGGACAATCCTGTTCAAAAGTTCTCCAAAAGCGTAGTGCCCAACGGGAAACGCGTCA -AGGATCAGACCTTCGCCTTCGATCGCATCTTCGACCAAAATGCCTCACAGGGAGAGGTAT -ACGAATCTACAACCCGAAGCCTGCTCGACAACGTGCTCGATGGATACAATGCGACGGTAT -TTGCGTATGGAGCTACCGGATGTGGAAAGACACATACAATCACCGGAACCGCCCAACAGC -CTGGCATCATCTTTCTGACCATGCAGGAACTGTTTGAGCGGATTGATGAACGAGCAGGCG -AAAAATCGACCGAAGTTTCTCTTTCCTACCTCGAGATCTACAACGAAACCATTCGTGACT -TGCTTGTCCCTAGTGGTAGCAGTGGAAAGGGTGGATTGATGCTACGGGAGGATTCCAACC -AGTCAGTCTCAGTTGCTGGGCTATCAAGTTACCACCCGCAAAATGTGGGGGAGGTAATGG -ACATGATCATGCAAGGCAATGAGCGCCGTACCATGTCTCCTACCGCAGCAAATGCTACTT -CGTCTCGATCGCATGCCGTCCTTCAAATCAACATCGCACAAAAAGACCGAAATGCGGACG -TCAATGAGCCTCACACCATGGCCACCTTCAGTATCATAGATTTAGCGGGAAGTGAGCGTG -CCAGCGCGACACTGAACAGAGGAGAACGTCTGTTCGAAGGGGCCAATATCAACAAGTCTC -TCTTGTCTCTGGGAAGCTGTATCAACGCACTCTGTGATCCGCGAAAACGCAATCACATTC -CGTACCGAAACTCCAAGCTCACTCGATTACTTAAATTTGCTCTTGGTGGCAATTGTAAGA -CAGTCATGATTGTCTGCGTCAGCCCCTCAAGCCAGCACTTCGATGAGACACAAAATACTC -TTCGATATGCGAACCGAGCGAAGAACATCCAGACCAAGGTCACACGAAATGTGTTCAATG -TTAACCGACACGTGAAGGACTTCTTGGTCAAGATCGATGAGCAAATGAATTTGATCAATG -AGCTGAAAGCACAAGCAAAGGATCATGAAAAGGCAGCCTTCGTCAAGTTCCGCAGGCAGG -GCGAGAAGAAAGACGCTGTTCTACGCGAGGGTGTTGCCCGGATTCGCAATGCTTACGAAC -ACACGTTGCCTGAGAGGCAAGAGAAGACTAACAACATGCTGAAACTAAGGCAAATCGGTC -GCAGAATCGGTATCTTATCGTCTTGGATCGCCGCCTTTGATAACGTTTGCGCCGCGCATG -AAACTGAGGAGGGACTATCCAACCTTTATGCCGTTCGCAAATCTGCACAGGGAATCCTTC -TTGAATTGGAAGGTAGCAGATACCATTACAACCAGAAATTATCGAAAAGCACCTGGGACC -GAGCGGTGAACTCAGCGGTAGAAAACGCGGCAAAGCAGCTTCAAGAGTTTGATATTACCG -ACAACAGCGACTACGCCAATTTAAACAGAGAAGCAGATCTTCTTCGGTCCAATGCAGAGC -GCGAATCCTTGACAGCCGTGCTAGAACAGGACAAGGCTGGAGAAGCCGCTGCTGTGCAGC -TTCTGTTGCAAGCTCAGTTTGAGATGATGAACTCAATCGAGGAAATCATGCAGCTAAACG -CAAACGAGGCTATTAAGAAGGGGCGGTCTATCCTTACAAAGATGTTAGAGGACTGTTCAG -ATGCAGCAACGAACCTCGTCAAACCCGATGGCACCATGCCTTCTATTCCGAATGTCAGCT -CTGCCCAGCCTGCAAGCCCCGCAAAAACAAAGAAGCGGTTCAGCTTGGTGAGCTTGCCAC -CAGTATCAACTGCCAACCCTCCAGTCACCCTCACACATGTTGCTCCTGTTTCACCAACTA -AGGGCTCTCCGCGTCGCCGTAAAGCCACCGTGGGCCGGAAAAGTGTCAGCTTTTCGCCAA -AGAAAGCCCAGGTGAAGGTCCCCAAACGATCTGTCAGATGGAAGGACGATGAGGACGAGG -GCACTCTGACCGAGTTTCAAAAGACCCCCAAGAAGGTCGACTCTGTTGATGAAGCCAGCC -TCGAGAAGTCATGGTTGCCACCTCGCTCTGGATCTCCCATTCCACGAAATATTCCAAGAG -TTATAAGCCCCTCGGGCTCTATATCTCCCACTCCCGACGAGCCTGTCGAATCCACCGAGG -CTACCCTAAACGTTCAAAAGAACAACAGTCGCTTCAAGGCAGGATTTTTAACCAAGAGAA -ATGGTAGCTCACCATTAGCACCACCACCATCATCCAGTCTTCCTCTTTCAAGGAGAGAAA -GCTCCCCTCTCCGTGATATCGAAGGGAGCAGCTTTATGAACCGCACCTTGATGGACCGTC -CATCGCGGATTGCAGTTCGCAGTCCCAGCGGCAACTTCTCAAGCAGTCCTGTATCTGAAA -ACAAAGACAACTGGAAGTCAGACAAGGAAGAAGCAATCAAAATCAATTCGGCTATGCGAC -GGATGTCTAGTGGGCGGATTGCTAGTGGACAATTTAGTACTCCATCCTCCAATGCCCTGC -GAGTTCACCGTCGTCGCAGCCCAACTTCCAATACCCACGCGACCTCGCCAGCAGACAACC -AGATGTTCACCGCATCACAAGCGCGGCGCATGGTCAAGAGCGAGAGAGAACACGATGCCA -AGCATCGGGTCTTGAGCCCTCATACTCTTCCAGTCATGAAGCACACAGGACGTCGCACAA -CAATGGGTGGAGACGGTCGACCGCGAAATATCAGTCTATCCAGCAGAGATGCCATCCGTC -TCAGTGCGATGGCAGCGCCCCCTGCTGACCACAACTCACAAGCGTGGTAGTAGACTCTGG -TGAATGGCTTTAATTGAATTGGGAATTGGCGTTTGGCGTAGCAGATACCACATTTCGTTT -TTGTACACAGAATTCTTATGTTTGATTTGCATTAGGGCATTTGTACAGAATATCACTAAT -TTCGGGATGGAGTTGCCGAGCCCTGATCTTCAGCCAATCCCTGTCGAAATTCCCCGCTGC -TTTGTAAGTTCTGAAATGACGCGATCGCGGACCTCCAATTTTTGAGCTTTCTATTTCTTC -AATTTTTTCTTCTTGTTTCTTATTCAATTCTTACATCTACTCTGAAACTGCTCCCTTGCC -ATCGACAGCAGCTCACACAACTTGTCTAACACTCTCACATCTGAAATTCGCGTGCATCTC -CACTTATCAAGTACGTCTCCACCAGAATTTCCGCAGATGACTTTCGCTGCTCCGGTTGCT -TCGATCCTTTTCTCACACGTCTTCCGTCCCTTCTCCAGCTCAGCTTTTCCCTTAAACCTG -GCACAATCTCGCAAAATGTCAGGCACACAGATCGCGACCGTAGCAGCCGGCTGCTTCTGG -GGTGTTGAGCACTTGTACCGCAAGAATTTCGGCAACGGCAAAGGCCTCCTTGATGCCAAG -GTCGGCTACTGCGGTGGCGCAACTTCATCGCCATCGTACCGAGCTGTCTGCTCCGGTTCA -ACTGGCCGTAAGTACCACGCTATCTGAGACAAAAATTAGCGCGAAATTGCTTATCGCGAT -CTCATCTTCAAATGCTTATTTTTTTTCTCTTTTGGCCTCCACAGATGCCGAAGCCCTCCA -GATAACCTTTGATCCCTCAATTGTGACCTACCGCCAGCTCCTCGAATTCTTCTACCGTAT -GCACGATCCCACAACAGAGAACCGACAGGGTCCGGATGTTGGCACACAATACCGCAGTGC -GATCTTTACGCATGGGGAAGAGCAACAGAAGATCGCGGAGAAAATTACAGAAAAAGTCAG -CAAGGAGTGGTATAAAACGCCGCTTTCTACTAAGGTCCTGCCGGCAGGCCAATGGTGGGA -TGCGGAGCAATATCACCAGCTATACCTTGAGCATAACCCTGCTGGGTACGAGTGTCCTGC -TCAGTATGTACCCTTGTCCTTCCGGTTCCATCTGATCGGGTCTAACTTTTTGTTTCTAGT -TTCATCCGACCTTTCCCTCCTTTGTCGGATTGACTGGATCCCAAGTTGTAATTTGTTCTA -TTTTAGCCATATCTTCATAATGTGTACCCGAAGAAGATTGAATACAAAAGTCAATATTTA -TAATGAAAATGAAAATTCGCGCTTGAAGCCCTGGAGATCCACATGGACATGATATCTCTT -TACGTAAGGCCGCCTTTGGGTATTTTTGTTCGACCATAGGAGAGTTAGTTACGTTACTAG -TCCAATTCAACCATCCGTTGCCCAAGTGTCTCGTCGGGTAATTGTGCAATGAACATGGTG -AGCAGCGTGAGGTTTACACTTGCAAGTGCCGCAAACTCTCCGCTGGGCTATCGCGAACCT -ACTGGATTGACTGTCTAGAGCCAAAACACACGATTTCGCTATTGGTAGATATCTTCCCAC -GAGATATCGTCGACTCGGCAAGGAAAATAACATGATCAGTGCTCAACGTGGAGTTAGAAA -AACGCAATCGCATCCCTGCCACTCGGCCTCTACAAAAGATTGCAGCAACGCTTGTGCTCA -TTGTGTAACTGCTAGGTCGGCCAGGTAATTACCACTTGGATGATGTATATTATACAACAG -AAAACTCTGACAGGATAAGAAATATATGTATTAATTTTAGTATCGCTAGCTGGCCCGGGG -GCGTCTAGCTGGCGCCGGTGAGTGTAGACCTGTCACCCGTTCTGTGGAGATTGTGTGTAA -TATTGTTTGCGGTAGAGAGACAACAGCTGATAGAAAGCGAGTAGACAAAAAAGGCATTAA -AATTGTACTCTTGTGCAGGGATACATATTCTAACTCAGGCGGGTATCCCTTCCCACTGGG -CAGTACACTTCGACTACTGAAGGTACTACTTGTAAGCGGGCTACTTTTAAAGGTAGCCTA -TTGATTTCCAACCCCATTTTATCTAATCTATAGAGTAGGTCTCAGTTATGTATAATTCAA -GGAAGATTATAAGATACTAGGTGAATTTATCATGAGCCACGTGATCATAATAGCACGTGA -CTCATATGACCCGAGCTGTAATTTGGGGCGAGCCTTTATCTTATCTAATCTTATCCAGCA -CACCCGCCGTCAACACGGCAGCCTCGGTCTGAGCCTCGGTCTCAGCCCTCGAGCACTTTC -TCCCCACAAGACGTTCTCTTCCTGACCTCTGCTTTGGTGTTTAATTTTGCTAGCACCTCC -CATAATTGCTCTAGAAAGCATTTCAAGGAGTCAATACTACAACTCATCACCTTACTTATA -TCCGACGAACTTATAAGTTTCAGGGGGACTTCCCGCAGTCGCAACCATGGCGCTCTACAA -CCAAACTGTGGATTTCCTTTCCGGTGCCTTCTCAAATCACCAGACTCAACTTCCCTTGCT -AGCCGTGACTGGGGCTTCATTTACACTTGGTCTACTCGCTCGATCGGTGTTCCCTCGGAA -CGAACCGCTTCAAAATGTTCTCGTCTCGCCTCGATCGACAGTCTTGCCCGGCATATCTGA -GTCGGAGAATCGCCAGCTGCCTCTGCCGGCTGATGTTCTACCCGGTGCTCGTGATGTCGA -TAGCCCGTACGGATCGATTCGAGTATACGAATGGGGCCCGGAAGATGGACCCAAGGTGTT -GTTCGTGCACGGTATCACCACACCGTGTATTGCGCTCGGCGGGGTGGCCCATGCGCTGGT -GGACCGTGGGTGCCGTGTGATGCTATTTGACTTGTAGGGTAACACAACATCCAACACCTT -GAAGTATTGCTAACCCGAAGAAAATAGGTTCGGAAGAGGATATTCAGACTGCCCCGCTGA -TCTTCCACAAGACGAGCGCCTCTTCTCGACGCAGATCTTCCTCGCTCTGACATCATCGCC -TATCTCCTGGACAGGTGCAGACTCCGGTAAATTCTGTCTTACTGGGTACTCGCTAGGAGG -TGGTATCGCCGCTGCCTTTGCATCCTACTTTCCTCACCTTCTATCGTCATTGGTGCTCCT -AGCCCCTTCAGGCTTGCTTCGCGACTCGCAAATCAGTTTCCAGAGTCGACTCCTTTACTC -AAAGGGTCTTATGCCTGAGAATATCTTAACATCCTTGGTTAGCCGTCGACTGAGGGCAGG -GCCGTTAACAAGGCCCAAGCCCAAGAATAAGAAGCTCAATGCTGCGGATGCGCTCACCGA -GGAACTACCTTCACAAAACGCTGCAGCAACCCAAATTCTGTCCCGAAGCTACCCCCAAAT -CAATGTCCCTTCCGTCGTCACGTGGCAGGTAATTAACCATACTGGGTTTGTCCATGCATT -CATGTCCAGCATGCGTTATGGCCCAATTTTACGAGAGCGCCAGTGGAACAGATGGGCACG -TCTTGGCGAGTATCTAACTGCACAGAACGGCACCTCAAGAAACGAAAATAAGAGGCTGGC -AGATAACAAAGTTCACATTCTGTGTGGTAACAATGATTTAATCATTGTTAAAAGTGAGCT -TGTTCCAGATGCCACGGCTGCCCTGGGTGGCAATGTCATCTTCAAGTTCTACGACGCGGG -ACATGAATTCCCCAGCACCAAGTACGAGGAAGTAGCTTCTTACATCTTTGAGTTGCTATG -AGCCAGGAGTTCCACGAAATATATACACTCTTGATCGGGAGATTTGGGATATATCCAGGT -TCATTGGACTCTCGTTCAGTTCCATTTGGTAGATCCACTATACATAGAAACTGGTGTCGA -ATGATACAATTTTTTTTTGTTCAATTTGCTTTCCTTCCCTAGCTTCCAGTCTTCATAATC -TCCTCCATGGCATTGAGTTGACTTGACAGCCCCGTGAGATGTGCCGACCTTTCTGGCATC -CGTGCACCAGCAAAGTAAGGGCCTTCGGAGGCTGGTGCTCGTGCAAATCGGTCCACACTG -CTGCGGCTGCTTCCTGGCACTTGGCTGACGTTGCCTTGGCTTACGGCGTTTGTACTGGTG -CCATTTGCAACACTGTGGGAGAAGCTGGTTAGTTGGTGGATAGAAGATGAGTTTGAATAT -AGGTAAACATACGTTGTGATTCTTGGCTCCGGAATGGACATGTTGTGTGTGTAGATGGTT -GTTGGTGATAGATATGAGGCTCACACAGGCGGCTCAAAGGAATCAAAGAGTCAAGTCGCA -AAGGAAAGGCCCAGTAGCAAGAAGGAAGATTGTGATAGTTTGGTATTCAAGTACGAGGTA -CGTGCTTTGCTTTGCTCGGTATGCTTAGTGATGGACTGTAGAGGGGATGAGCTGCTGGGT -TGATGATGTCCTTGATAAAACCGAGGGGAGCTCCTATATATACACTGCAGGTCTGGCGTC -ATGCATGCGGACGGCATGCCTTTGCGGCCGGACACGCGAGCTTGAGATTCGCACCATAGC -CCACATGGACAGTATGAGCTAAGCGAAGCAGGAAACGAGGAAAAGATCGTCATCGACTGC -ATTGACTCCACTTGTGATGGCATCTCGACCCTTGAGCTCAACGTTTGCCAAGGCTGAGTC -TGCATGAACCATGCCGGTTCTAGGACTGCTTTAGCGGGAAAGCCCGAGGGCCCATAGCCA -AAAGCACATACCCGTTCGGCTCATTTATGCAAAACAAGCGACAATCTTGTTGTTATTGGC -TGAAATGCAGGTAACCGGGGCTAGGAGGCAGGTACCAGCCGCATAAGGGTGCGTGGACTA -AGACCCGCTTTTACGATCATCCTGAGCGTAAATACAACATTCTTTCGTCAGCACATGGCC -AACCACATGTTTCAGGTCTGTATCTAGACTCTCGACCTATGTGCCGTGGAATGAAGCGAG -GGGGCGCAATACAAATCGATCCCGGTTACCGTAAACGGATTTTCGATGGAGTTATCAAAC -AAGGGACAAGATCACGATCCTGAAACGCACTTCTGGAGAGGCTAGAACGCGTTGAAACAA -ACAAATGCCGAGCCGTCCTTGGCGGCAAATGCGCTTCATAATGGTTGGCATTCTCGCGTG -GGATCACTACGACGCAAACTTCCAGGATGAAACAATGTCGGGGGCCAACGTGGTCCATCA -CATGGTCATGTTGACCCAGGGGCTGAGCTCTACGAGATCAAAGAGAATAAATTAAAATAA -AGCTTACGATGCTGCATTCCCCGGGCCACGCGCGCCTGCTGCATGTTTCCTTCATTTCCC -TTTCTTGATGTCAACGCTGCAGATCTCTTAATACCGAGCTGGAATTAGGGCACGGCACTC -TTGGCAGAAGATATCTCGATAGTAGCAGGTCCATACCGGCCTTGAGCCCTAAATTCCTTT -TTGCATAATCATAAATTTCAAATGTAGGAATGCTTGGATTCTGTACTTGAAAGCCGAAGA -TCTCCGAAATGTGAGTAGCTTGTTTCCAAAGCCCCCCAATTGGAGCATGTCTAAGAATAG -AGGGGAAATGAATTGCCTTGTTAAAGCCAATTCTGATAAGTCCGAAGGCCGGAGCACCTC -AGGAAGGAAAGCATCAGTAATAGCATTTTGCCTTGGAGTGCCATCAAGTCCTGGACCCAT -TCGCTGATCAAAGGGCCCAATGCTCAAAGGCGTTTGTGCACATGGCACCAAGCTTGGACA -CCTCGCTAAAGGCTACCGGGTGTGCTAGCGCTGATCTCTATGGTGTAACAAAGCATCTCC -GGTAAGAACTACGGAGATCCGCTAGACTTTGCGGACCGAGGGTAGTGGACCGTACCGCAT -ACATTAACTACGCAGATACACGCGCATTGGCTTTTTTTTCCCCTTGAATCCTTGGCTGAG -ACTGAGCCACAATCGCCATATGGGAGCTGATGTTTCAAGACAAAATTTAATTTCCCACGG -GTCTAAATCTGCTCATTCCCTTAAGAAATGTTAGAGGTCTCCTGTTAGGATATGTAAGGC -ACGCCTGGCGGAATTCCAGCAACCTTATCTTCATACGCTACGGCGTCGATCACCTCGCTA -AATCCGAACCCCCAGGGCCGATCATTCGTCTCAGGTGAGCTGACCTCGTACAACATGGGG -CGTTCTGGTTCGTGATATGCCCCCAACCTAGCGAAGATCTCGCCGCGGAGAACTTGAAGT -CAAGCACTTATTCTTATTTTAGACTGAAACATAGCCCCTGAAGCTTACCGAGATTCAATA -AGACGGCTCTTTTCTCCTTGTTACCTATCATAAAATTAAAAACAAAAGCTTGTCCAAAAA -CAAGTCATCCAAGGGGGAAATCGAAAAGGGCGTTGACAGAAGAGGAAGGGATCATCCATA -CATATTCCTGTTTGGGACATTTGACGCACCTTGCCAATCACCGATGCCTCAGTCTGGGAC -GCAACTCAATAGTTCAACTTCTACGGCGAGCTCCGGCACGGAGTATAGATACCGAAAGTG -TAGCCGCAGCTTGTTACCATCGCAACCCATGCTCCTAATACAGCCATGCCATAAGACTGA -GGACTGCGGGTTGAGTCCGAACAAATGAGAGATATTGGCTCCTAATGTAGTGTTTTGTGC -ATGAGTGTGAACGTGGCCTATGCAGCCGGCTTATTTTATTCAACGGCCTTGCCTTTGCCG -GTAAGCCACCTTACAGAACTGCAGATCCCGGTCGTATCTTCTTCGGCTGCAATGAATGGT -CATTTCTCGGAAGTTGATCGTACTCGTATATTCCGTCTTGATTGAATATTTATTTTTAGG -CCTTCGGATGAAATTGTTGAGGTGTGTAAACGAGGATCGTAATCTGCTCAGCCTTGGCTA -ACACACATTCATTTTGACATTAGGCTCGCAGGTTGGTTGCATGAATCAAGGGATATATCG -GCACAGCCCCATTCCCCGAGTGCCCTAAGACTCATCATGCAAGCGGTAGGACCTTGAAAA -TGATTCTGATAATAGTTCATTCTTTTCTTCACTCTTACTGTAGACGCTAGGGTGAAAAGG -GAAATCAAAAGTGATAATCCAATCCTCACATTGCTGAACCGGACAGACATGTATCACGGC -CACTCATTTTGAAAATATGTCGTGTGAATTAAATGTGAGGAATCGATGCTAACCAACTCA -CCGTGATATAAGAACCAACGGGCAACGCCCGGAGGGATATGAACATGAGAATGATACATC -CCCAGCGTGCAGTTGCATAGCAACAAAACCAGACCAGGATGAAAAGAAGGTCAGGTGCCT -TGGACCAGCCAATACTTGAGATACGCTTCTCCTTGTCATCCAATGTACGCCGGAGCCAAT -AGAATGACCAAAGGTATTTGAAGTACAGAGGGAAGGTTATGAAGAGTGAACATAGCGAGA -GCACTCTCCATACTATTTGCTCCTCTTTTGTTGGAAACGTGGAGTTCCATCTACATTTCT -CGGTTCTCAGCTTTATCCAGCATTGAGAGGAATATCAAAGGGAATGGTAATACGTACGCA -GCAACATGTATTCCACAAAAGACCAATGCAGCGAACATATAGAAATGAGTGATATTGGTC -CATTCCATGATTGTCATTTTTTCGTCGTCTTCTGGTGAGTGTCCCACATGTCGAGACTCA -GATGGCGGCTCGATTTCCTGATGGGTTTCCACATCTCGGTAGTCAATTTTGCTGGCTTCG -GAGGACCTCACCGGTGGTTCTTTTCCTTGGGAAGTTTCCGTCACGGTCAATATTTCGTCA -ACTTTCGACCCATCTCTTGGCTGCTGGGACCCTGATGCTGCTATAATATCCCGGGGGATT -TCTTCATCTCGATGGTCACCATTGATGATCAATGGCTCTGGTTCCGGCCCATGGCAGGTT -TTCCTGTCTATATGGCTACCGTTCTTGGTTTGACATGCGTTTTCAGTCTCCTGGGGATTT -TCCTCATCTTGGTGGCCACTATTGCTTTCATCAGAATGCCTAGCTGGTGGTTTTCGAGCC -TGCCAATCTTGTATTATCACTGCCCAATCATCCTTGAACTCTTGCCAGAACCCCTTCCAT -AACCAAGGTGAAAAAACCAATAATAACACAATGCGTTGGACATAAAAGCCTGCGACAATG -ATTTGCCATACCGGAGAGCCAATGTCTCTATCGAAAGCATCTGAGTGGACCCAGTTCTCA -ACCTTTTCGTTTAGGATATCTCTGACTTGAGGCGGCATATTCTCGGAATCGCGATCGTAT -GGTAGATAAATCGTGATTGGTGTTACCATATCTTTCGGTTTATTCCACCAAGTTATATAT -GTAGCGGCAGCACAGGCGACATACGCCACTGTAGTAATCTCGAGAGCTGTAATTGGCAAG -CTGGACGCTTTTCTGCCAATGACATTGCAAGTCACCCATAAGGCTTGGAGCAGAGTAAAT -CCTTTGGCAAAGGAGTCAGCTTTGGCGCGGTCTTGGATGTCTCTAGTGCTCAAATGAGAT -GATTTGATGACGCGTGTATTGATTAGGGCAACGACGTTGCTACAGTGTACTGGATATGTC -CAATTATCCTTGGTCTGTAGAAGCACACCATTCATACCGAGGCAAAACCCCTGGATAGTG -TCCCAGCGACCCTCAGACGTTTCTGTCTTTCGTATTTTTGAATCTTCTGGTAGTGATGGG -TCTGCACCCCCTCGGCCGCCGTATACTATGCGAAAAGCAGCATTGCAGCGTGCTGCTGTA -GATCTTGCCTGCCATAACTCTTCGGCTGCAGTTCCAGCCATGGCCTCCGGGGCAAGTATC -GTGCATATCCACGCCACTGCCTTCACCCAAAAATACCCCGCCTGTGTATCATCACGTCTT -GGGACAGAAATATGCAGGGCTGTCCATGTGCATGCAAGGATTGTTGAAACACAAGTCCAG -AAAATGTCCCAACTAGAGCGTGTGTTGTCATCAAATTGCCAGCCTTCCAAGGCCGTCGAA -TTAGCCTGGGCGAGTGATTGGTTGACGAAGCCAAGCCATAGAAGAAGCGCAAACATGGGG -TGTCTGGATTTGATGATGAGAGACGCAGAAGACCCAAGAAGATAGATAGGAGGAAAACAG -AAACGGGTTTAGCTCGACAACTCAGCCACACGGTGTGGATTGTACAAAAGAGGCGCGATC -TGAGGTTTCCTTTGTTAATCTACAAAGGTAATCATTTTCTACACCTACTTGACAATTTCA -GACAAGGAAAAACTTAAGTAGTAAAAATGATGTTTGTGGTAGATGTGAGAAAAATACAAT -AAGATAGTTCTAAGAAACATAGTATGTATCTGGCCCAATGCCAAATTTCGAAATTTTGAA -TATTTACAAAAGCCACCTGAAGCGCCAATGCACCAAACGCAATACAGAGTTAGAGATCTT -TCGCTTCAGAACCTTTGGAAATGCGAGCACaaaagcagaacaaaagaagaaaaaaaaaaa -gaaaaTGGATATCAATCCTCACGACTATCGTCGAAGGTGGTCATAACTGGTGGGGAAGAG -CGGCAGCGAAGGGTCCACAACATCATTATATGGTCTTCTGGCTGCCATTGACACATCTTC -ATCACTGGGTGGAGCAGTGACCACGCGCGGGGGATGTGGATGGGCTTCTGCATCCTCCTC -GCCTCTTGCTATCTTCTCGAGGGACGACAACTCGGTCGTCGTTCCAGTGCGAGACCAGCG -GAAAGAGTCACGGTCGTCACCAAGAGGTTTGTATTTGGTGTCGGGTTTCTTGAGGAAAAT -TGCGTAAACAATACCAACCAAAAGGTAGATTAGGAGCACAAGAGTGAATATCGCATTCAA -GATGAAGAAGAGCACGCCCAGAATGCCAGTCATAAGGTCAGGTTGGTCGAAGACATCCGA -GAAGACAAGCAGGAAGATAGTATTGAGAGAATTGAGTACCGCCGCGGTGATTGCAACGAC -ATTTGCCCCCTTGCTCATGAATGGCTTGATTATAGCGGTTGCGAGCATCATGCAAAGTTC -CAAAATAAGGAGAACGATCGACTGTGCGACTGGTGAAGGCTGAGCAAAGGCGATAACCAT -TCCTTTAACAATGGTGTAGACCAGAGTTGGGACAAGGAAGTAGTAGACCGGAGCGCGGTA -GTAGATATACAGGAAGCCCCACTGGCTCAGAGTGGCAGGGTCAGAATAGAGCGTGTATGC -AGGTGTGTTGTGTTGCTGTTTCGATCGCATCGCGCGTTGGAACACCTTGAATGCGGCAAA -AGCTAAGACGGCGGTCGTCACGAGCCACATGGTGATGGCTAGGACAATTTCAGCGGCCGA -GTCCCGACGAGTCAGTTCCCAGAGGCACAGGACACACATTTGTGGGTACCCGATGCATGC -CAGCCGGAAGAATGTATTGCGGAGAATGGTTTTCCAATCTGCGGTGACGCCGAGGGCGCG -ATCCAATTTTGGAAGCGAGGTCGATTTCTTGGCTAGGACAATCCGGACCACCAGGACCAG -ACCCACGCAAGCGACGATGACTGCAGTGATAAAGTAATAGAATAGATACCCGGTCATGAA -GATGTTGGTTGGTTCAATGGATTCACGAACTCCTACCCGTTCAATACCGCGAAGAATAAT -TTGACTTCCTTTGTCCGTGGCTCTGCGGACGATCTCAGAGGAACGCTTCTGCAGGACAAC -AGACACAGAGTTGGAGGAGGTCATCACGTTCGAGGGTGTACCCCCGGTGGCACGTTGAAA -CCAGGTCGCAATTGTCTGCAGAGCATCCGACTGCACAATCCCCAAGCTCCATTGCATGTT -CTGGGTCCATGATTGCACAAAAGGCGGCATTTTCACTGAAGTCATTCCAGCCATAGCCTG -TGTTTGAATGAATCCGAGGAAGGCAAGTGATCGGAAAGCTAGATGGGTTGCCGTGTTCGA -ATGACCGAGAAGTGATATGACCGTAGAGGTGACAAGACCAAGACCGGTAATGACGGCGAG -TGTCCAAGACACTCCCGCCTGGTAGACTGTTTTTCCATTGCGGATGCGGGTCTCCAAGCA -GGTGATTTGTTTACCTCCATTCTTGTATATGAGCCTGAGGCGAATCACGGCATCGATATC -GGGGACACCATAGGAGATCGCTATGACGTAACGGTTAGTAAAAAGACATTTTTTTCTGGT -TCAGGTACCCGGAAAAGATTGCAACGCATCGAGGAACTGGTAACGAACATGGTATTTGTG -AGAGAAGGCTGTCGCTCAATGTTTCAGTGATTGTGGGAATTTGAAGTTCGATGGCTTTCA -TAGGACAAAATACCGCCAAATCGAGACTGCATGGATCGAACGACTTGGTGAACGCTTTGT -ATCCGTACACCAAGAGGTCCACCTCAATGAGTACCTCTCCGGCCACTTCAGAATAGCCAT -CAAAGATAATGCTTATGGAGTTGTTGGCTGGTGTGAAAACCGCATTGAAGTTATTCACGG -TCACTTCGGCACTCTCGGAACATTGTGTCAGCCCGGTGGCTTGGAGGAGATTCTCTGCAT -GTACGAGATGAAGACTCTGCAGTAAGATGAGAATGAAAGCTCCGAAACGGGAGCCATTCA -GTTGCATGGCGACAAAACGAGTGAGTGTCAAGAGAGAAGATGGGAGTTTCTTTCTATTTT -CTTTTCCTTGATTTCCTGATATCATGGAACTCCACAGCACGACGTAAGCGCTCGGTGGAT -CCAGGAGATCGTCCAGGGTCGCGCAGTGAAACTCCGCTCTCAGGCAACCACCATTCAAGG -AAAAAGCGGACTTGGCTTTATTATGTGATGTACTCCGTATGTTATACAAAGATTCGAATG -GTGATCAACAATGGTGTTCCCGTGTTACCTTGGTTGTTCCCAGCCTTGCCCCTCTACACT -GCCCCAGCCTCCCAAGAGTCTAGACGATCACGTCGATTCCCAGTGCAAATCTTTATTTCG -GCTTATTCTCGCAGCTTTCCCAGGAGTCCGGAGTACGTCAATGCAATAAAACTTTGGTCC -GCCACCGGTCAGCACGTGGTCTCAACTAATTCCGCAAACAGCCATCTCCTGAGGTCAGCC -CTGAGATGTAAACAGCCTTGACATCCACCAAGACATAATTATAATTGGCATATCCGAGAA -TATTTAACTTTGAGTTTATACAATATAACGTTGATTTGGTCTTTCCGCGAGCTTCCTCAT -TATCGGGAAATCATTTCCAGTTTGGGACACCTTCAGGATCTGATTTCCTAAGTCTAGCTA -AATTTAGAATGGCCGCGATTGCGTTTGAGACAGCTCATGTCTCTCATTGGGCAGTGTCGG -GCTGATGTTAATGCCCAACCAATAGAAAAAAATCTCACAATCCGTAACTTAAATCAAGTT -CATCAAGTTCATTGCCCCAGCTGGGGGAGACTGTGATCTGGAGCTATCTACCCATCATTT -TAGCAAGATTACGGAGGAGCCAGGACCACATCACGTGTGGAAATGATTGGTCATGTGTTT -TGGGTAACAAAGATCTTTGGGACCCCGGATTTCAATCAACGTGGGTTTTCCCACACATTG -CAATATCAAAATATTCGGCTATGTGACGCGGGGAAGGAGACTTGAGTTATTGCTTCTATA -TGATCCTCACAGAAAAAAAAAATTGATCAATCAGACTTACTAGCTGATAATCCCTACAAT -AATCCTCGCATCTACTTGGATACGCCCAAGGTTTTATGCTACTGTGTATGACTTGGTGAA -AGCAAGATGGTCCATTTGGAATAACACTTTGCGACAGGCCTTTTTTACCGTGAGCGACAA -ATTTCAATACGCACTGAAAGGGCTGGAGAACCACGGATCCCATTTGACTTGACAAATGTC -GTTCGTCTTCAGTATCGTGACGGGGTTGATCTATACTCTGCATGGCCATCTCGCATGACG -TAACCACCAGCCCCCAACCTTGGAACCGAATCAGTCAAGGGCAACAATTTTCATTCATCT -GAGTTATTTATGCCAGGTGATATCACATGTAAGCAGGGGCGCGTATAAATCTTGCTTTGA -TTGCCCAACTCACTTCGGAGTCCTCCGTATGCGTGTTCGGATCTTCGCCAAGCGAACCAA -GAATCGCCACCTAAATCATTTTACGTGCTACCATAGCAATGAGAAGCGTCCGTACTATGG -ATTTATGTGTGTTGAGTATGTTGGATCCAATTCTTCTCTGCCGTTACGCCTACCATCACA -GCGGCGCATGGTTCGACTGCCACTGAGGCGGAGAATTGGTCGGAAGTTTTCAACAAGACT -AAACGATAGAGAATTCTGCGCACAGGTAAGACAAGGAGGCGATCTGAGGAAACTCTAGTG -TGAGGCGAAAATGGAGGCTTGTGAGATGGCTGTCGCCATGTTGCAGCGGGATAATGGCGG -TTCGACTTGGATTATCAAGACGAGGAAGGAGTTTGCTCGTTGATTCTTGTCAGTTACCAT -AGATTGCGTGTGAGAATCATACAAAATCTCCTTCAACCAAGTGGCTAGGTATTACGTACC -GCCCGATCTATACAGACCCCCGTGCTTCACAGTCGTAGTTGATCCCGTTGTTTTCTTTTA -AGGATAACAGTATGTGTCCATAGATACAAGAGTGGTAATATCCTGACCATTGGAGACATT -GAAGTTGACGAAGGGAAAACACCCGGGGGGGGGGATACAAATACAGATCTCTGCACTGCG -GCTCCGGCTTTTCTGCTGGGTCGCGCTACCATCACCCAAAGGCGGACGAAGTACTTCACT -GCTAGGATGTACGGAATACTAAATACTGAATCAGGATACACTATGTTTTGGTGCAAATTG -AAAAATAAAATAAATCCATGAAAATGCATGTCAATCTATGCAGCTGTCTATACATTGTGG -CCGTCTGTGCAAGGCCGATAAGCCAATCTATATACCCCCATTTGTAACCGTATCTCTTAT -TTTTGTGTCTCTTCGTCATAATGTGCAGGCGCAGTTGAACCGGTGTTCTCCTCATGGTAC -GATGTCTCCTTGACATCGAAGCGGAACTGCTCCTCCTCTTCCGCGAGGGCAGCCTTGAGC -ATCTTATTTGCTCTCCAGACAGGTTTGCAATCAAACAGACGGTCGACCTTTTCAAGAGGG -ATACCGCTGGTTTCGGGGATGAGGAAGAATGCAAAGAAGAATGCCAAAAATGACAGTGCA -GCGAAGAAGAAGTACACGCCGTAGTGCATCTTGGCGAACATTTGTTCGGTGAAGCGCGAA -ATGAGGAAGTTAAAGAGCCAGTTGCTCGCAGTCGTGGCAGCTTGCGCCAGGGAACGGAAA -TTGGGGTCGAAGAATTCCTGATAACCTGTTAGAAAATGGCTTAATGTAGGGGGATAATAC -AATGACTTACAGAATTGATAACCCAAGGGGTACCATTCCAGGTAGGGGTATAGACTGCAG -TCCATAGATAGAAGAAGACTACGGCCGCAATGCCTCCTCCGTCCAGAGACGTGCTGGGAT -TATCCTCGGGCTTGACAACACAGATATAGGCACCGATGACCCACATACAGATCGATCCAG -TCACGGCACCGACAAGGAGCAGATTGCGTCGTCCGAATTGATCGACCAGGAATAGCAGCC -ACACGAAGGTCATGACGGCCTTGATGACTCCGAAGATGCCGGTCATCAGGTTGACGGTGT -TAGGGTCGATACCAATCTAAGGCACACAAGTCAGTATAATTAATGGACCATTGGGAATGC -AACAGATGAAGTGTAATCATACCGATCTAAAAATGGTAGGTGAGTAGTAGTTGATGGCGT -TGATACCGGAACCGTTTTGCCAGAAGAAGAGCATGCAGCCCAACATCAGACGGTACATAA -CGCTCTTGTTGGTGGCGAGAGCCTGGAAGGGTTTCCAGAAACCAATACCGATGGAAGTTT -TCTGATGCTCAAGAGCCTGGTCGATAGCAGCCACCTCTTCGGTTATGTACATGTCGTCAG -CTTCCAATTGGCGAATCCAACAAAGATTCTCCATCGCTTGGCGGCGACGGTTCTTGAGGA -AGAGCCAACGAGGGGATTCCTTGATCCACAGGGCACCGATGAAAAGTAGACCGGACGGGA -CGAGCTGGATGGCGAATGGAATGATCCATTGCTCGTGATCTTGGGGCATGTGTTGCTCGA -CACCATACTACCATAGCTAGATTAGTATTTGCACGCGAAGATAGCACAGGGGGAAATAGA -GACTGGGGAGAGGCAACGAGACGCACATTGATCCAAAAACCTAACATTCCGCCTATCTGC -CAGCCGAGCTCATATAGACCGACGAGCCGTCCTCGGATGGCAGGCGGCGCTAGTTCGGAA -AGATATATAGGGATGATGTTCGAGGCTATGCCGGTACCCAAGCCGGACAGCACACGACCA -GCGTACAGAATGCCCAGGCCTCGAGCACTGTTGGCACCAAGTTGTAATCCAGATCCGAGA -AAGAACACAGTGGCGGCAACAGCCAAGCTCCATTTGCGGCCAATGAAGTGGCCGAGGGCG -TATGTTCCCAAAGCACCGAAAAAGGCACCAGCAATGAAAAGTGAGACGATGTTCTCGCTA -ATTAGAGCTCTCTCTGGAGAAGACATTTCGTCGAGGCCGAACTCCTTTTTGAAAGAGGCG -AGTGTGATAGTTGTACCGATGAAGGCACTGGTATAACCGATCATATTGGAACCACATGAT -GCAATGCCGGCGAGTGCATATACACGCCAGTTGTAAACGGACTTGGGCGTTGGCCGATCC -TCAGCAAGTGTGAGAATACCCATGACCGAAAAGATGACGGCTCAAGCTGTGAAGTAGATT -GGAAGGCCGGGATGAGAAGACAAAAAAGTTTGCTCGAAGGCAGATGGAGGAAAGAGAGAG -TCTCAGCAGGTGCTAAGATGGGACTCTAAGTAATATCTGCCGTCCTAATGGACTGTGGAT -GTGGAATTCCCCCAACTGAGCTCCCCACAGCACACGTCGGCTAAACCCCATAGACTGGGG -CAATGAAATATGGGGTTCCTACGGGAGGCTGCGAGACGGCGATGATTACAGATGCAGATT -TCCTCTAGTGGGAACATTGGCCGGGGTAGCGTTTTCCCCGGAGGAAACATTCAGCCGACA -AGTAAGCTTCATACAGATCCATAAAGTAGATATATCAACCACGGAATCCTACTGCGGTGA -AATCACCCAGGGTGCACTATCTTATTCGAGAGTATTTCTATGATGCAGTAGATAGTAGAC -ATCCTTTTGACATTTTTCTGCAGTGGAGACAAGTTTTACACGTTTAGCCGCTCAAGTATG -TTTATGATTTTATAGGGATGCCGGTGAGCTCATCGCCACCGAATTACATACGTATTATTT -TCTCAAGTTGGAGAGGAACTCAGGTGCAAACTGTAGGAGTGTGGACAGACTGGCACAGCG -AAGTCCCGCGTTAAATTCATACCCCGTAGTCATTCTAGCGAGAATATCATCGGGGAGAAC -CCTTAACCATGTGTGGAGCCGGAAGTTTGCCGAGCTCCCTGTCCGAAGAGCCCTGAGCCT -AACACACCGGCATACGGAAGAAGGGGTAAAAGTCTATTTGATCCCGATTTCGGGATATTA -ACCGAAGACGAACATTGAAAAAGAAGAAATCTCAATTGTAGTACTCCATACACACTGTGC -CAATAGGGATTATTCAACTTGCATAGGGAATGTCCTTGCCGGTAATCCCTCCCACAGTGT -CCCCCGGAAGTGGCGGTTCCCCATGACTCGGGATAATCACTCACCATAATGCACATTTGC -ACTAAACGTAACTCCGGGATTTACATGGAGCCTCGGGTTCGAATTACCCAAAATTAGGGA -TATATGATTGATAATGCCAATCCGATAAAATTACACGGTTTCTACACTGTCCTAATGTTG -CCAGTGGTGTTTTGAGGACAAGGAACTATACGAGCTCCTTTCGGCCCGGTTCCGGCTTGG -AAAGATGCCGTAGCGTAGCCGCCCGGCCTTGGTGCACATGAATGCACCATTAAAACGACG -TTCAAATGGCGAGCAGTTTTTGTCTGAAAGTGATATTAAGTTTTCAATGAATAACTTGTT -GGCGTCAAGCTGTCCCGGCCCATCAGCGGTATCTCAGGTGCGTTCAAGAAATTGTGCTCC -TGAGCTCACATTATCGCACAAGAAGTTCCCGATCAGTTTAGCAAAATATCTGGTATTTCA -CCAATTTTTCTGTGCGCTTGTGCCTATTGTCCATGGTCCAAAAGATTTATGATTGTAGCT -CTTGCAGTAGAATTGACAAGGGGGTACGTGGCGTGGGGGGTGAGAACGCATGGTGTATCA -TGAGTGCCGTCCTCAATCTGGGTGATCTTTCGAAAATTGGACCTTTGGATACGGAGTACT -TAAGGCATATTGCCCAGAATAGTGCAGGAGATTTGACCTTGGAGTTAAATGACTGGCATG -TCTTCGCAACCCTGCATCCCGATTCCTTCATATTGCTTCCCCTGTTGGCAGCAGCCCCCG -AGGTGAAAGTTGTAGGAGCTCGGATTTTCAACAGAGCATTGTCCCTCTTCATGGCTACAA -GGACACGTCTTGAGATAGTCCATGTTGCCCAAAGAGTGGGATAGTGTTTGGCTAACCAGC -TTCTGCGGCAGCAATTTTCAGACTTCTCGGCTTGTAAAGATGGATATTCAATAGAATCAT -TGGCATGGCTTGGCCCCAATTTCGTAGAACAATATCAGCTGAGAGTGGAAAGCGATGTAA -AGTGTGTGTAGAATGAATTTTCTGCCTGTATCCGTCGATTGGTCACCCTTTTGGCTGATT -TAGAGAATACTTCAACGGCTAAGACTCAATGCCGTATTGGCGCAGTCAGTGCGTTACAGG -ACGATCATGCATGAAGCTCCAAATTTGTGAGGCAGCTTCAAAGTATGTTGGACGGAATCC -ACTCGGCGAGGTGACTCGATTGTTCCGTGCATATTCGGTCCAGCTTGCAGGTTGTGGATG -GGCAGATCTCAACATATGGATCTCTAAGTCACATTCTCGATAGATATCAGACTTGGGGCT -GTCTTGGTAGAATAAAAAGTCTGCATGCGCCAGAGCTGGGTGAGAAATGTGCACAACGGA -CCACAGTAGCGTTTGTTGTGGTGAGTCTGGTGATTGTGGTTTTTGACACGCAAAAGAACT -TCGGGTAAAGTCTTACAAATGAAGCGATATTAGGCGATCCAGCAAGTTCGCAAAATTACT -CAATGGTCAGCTTACACAGAATATTGGAAAGTTGATGCTGAATATTGAGAGACTAGAGGA -TATCATTATTACTTCAATTTTCAAAATGGGGAACCTGAGGCATAAATATCTTTTTTTTTA -TTAAAAAATATATTTCGCCTGAGGCAGCAACACGTGCCGTCAACCTTCGGTGATTCAAGT -CACGCTATGTCAACACAAACCGTATGTATCTCCTTGGAACTCAAACGCTTGACTGATAAG -TTTCAAACATTGCCCAGGTGTCTCTGGAACCGTGTGGATACGCCGTTTCCCGGCTCATGA -CCACATCTAAAGCGGGGCATTTTCAAACTGCGAGTCTTATCAATTGAAGACATGCGAATC -TCAGCGCGTGTACAGTAATTCGACACGGTGTGTGAAGATCAAGTGTATTTTCTTAGGCTT -TAGATAGTTTTACGAGCTAACCATATCCTAGAAAAATCATCAGAAAGACAAGCATTAGAA -TAAAGAAACGACTCGTCTCTCATCGCGATGGAGAATCGAGTGTCTCTCGGCTCTATCCAG -AAGGCCGTCTGGGACGGCCGTTTGCCACTGGAAATTGTTTTGGCCTCATCAGAAAGCCGA -ATATTTGATAAGACCGATCCATATCTTGTATGCATGCCCTTCGCTCGATATTTACACGTG -CTGATCTTAGTTGCTAGATCGCTTATCCGCGCATCTCATATCTTCCCTCGTTGCTACCCA -AGCTCCGAGCTTTCTTCTCGAGCTTTCTGATTGAGCCAAACTCTCAGTCTCATGATGGCT -GGTTTGAGTTTGAGGGTGTGCCTCTGAAATGGCACTACCCAGTCGGGCTGTTATATGACC -TCTATGCGGGAGCAGACCCCGCCTCAAAGACTGCCGCTGGAGGCAACGAGTCCACCGATA -GTGGATCATCGCTGCCCTGGCGTCTTATAGTGCATTTTAGCGACTGGCCGCATGATCTTG -TTCGGCTTGATGCTGATGGAATGGTCATGAACGACGCCTTCATAAACAGTGTGAAAGAGG -CGGATTTCCTACGAAATGGTACAGCCAAAGGCATTATGAGTCTTTCAAAGGAAGACTCGT -CGGGTCTTTGGAATGCCGTTCAAGAAGGTGAGTCAAGCATTCTTATGTAGAAATTCCCCT -ACTGCATCAATGGTGGAGACTATCCTAACCCGAAGAACCGCAGTCGATCTCCTCTCTTTT -CAGCGCATATCAAGTATCCTTCTTCCTCAACTATCACAGCCTTTCCGCAACGTCCCGATC -CGAATTTTCCTTCCACTCCCACCAGGTGCCGAGTCTTCTTCCCTAAAAGTAGTGCAATCG -CCACTTCCTCCTTTAATACCAGCATCAAGTATGCAGTCGAGTCAAATGCTGAGCTCGCGG -GGCGGTTCCACTACCCAACCTCAAACGATCGGGACTGTTCTGCACATATTGCTACCAAAT -CTATTTCCTAGTCGGAGAACACCTGTCCTTGCAAAGCCCGTCTTACACGGTGCTGTGATA -CCCATGTCTGCTCCCATTGAGGAGGTTGTGAGGAGCTCGGCTTATGGAGATGGGTGGGCA -TATCTTGTCGTTCGAATGATGGGCTGAGTCAATATGAGAAGTTTTTGTTAGACAACACCC -ACAAGGCTGTTTTTTTGATCTCAAGTGTTGAATTTGATATGTATATGCTACTATATTGAT -TTTCAAGAGCTACTAAGTTATCTATTGAATCATCAAAACAGACAAAAAGATAAATGGCGC -TTTTAAAAGCTGATCCAGGCCTGAGCATGGTTTCTATGTACATCCTTTGGAACAGAATAA -AAATGAACGAGAGATTCAAGTGAGATCGAACCGCAGTCCAACCAAGAGGAATGCTGGCGC -TATGTAGTGAGAAGGCGTATGTGTGTGTgagagagagagagagagCAAAATGCAAGAAAA -CAGAGAAAAAGAATCAGGGGGAATATAAAAGCAAACAATAATAGATAAAAACCAGACACC -ATAGAGTAGGATAAAGTAGTACGTATCTTAGCAAGAAGAAGAAACAATAGACGGGATATC -CATGGACGTGGTTGACGCCCTCGACCCCCAGATCGAGTAATCTTCACCAACCAAAGTCGA -TCCCGCTATCTCACCAACATCAGATCCAATATCGACCGAGTCGATAGTTGTGGCCAGGCT -CCTGCGTTGGTGTACCGCTGGCAGCGACTCCAACCTGCCCATATTATCAAACCAAAGTAC -ACTGGCTGGATTTTTGCGAGCCAGGTCTATTGCTGTCTGACCATCTCCTTTCACTTTGTC -GTTGGTTGGTGTTTCTCCAGCGTCTTTGATTGAAGGATCTGCACCCGCATCTAACAGTAT -TTGGAAAAGACTGTGGCGGCCTCGCTCTGCCGCTAGATGTAACGGCGTTCTGCCGTCAGA -TGTTCTAGAATCAAGCGATGCGCCATTCTTGATGAGCAGCAATACCAAGGACTCGCGTGG -CATTTTGGCAGTCACTGCCATATGAAGCGCCGTCTCGGCTTTGTAAGAAGCCGTGGGGGT -ATTGATGTCAGCTCCATGAGACAGGAGCAATGAAAGGAAAACAGCTGTCTTTTCAGACAA -AGGGTCTGGGGAGTTGAATGCATTTATCACAAGATGATGCAGAGGTGTCAGGCCATTACA -TGTTGTTGCATTAGCGCTGAAACTAAATTCCAGTAGTGTTTCGACAATGTCAAGGCAGTC -TTGATTGAGAGGAGGCGTGATGTGAAGAAGAGTTTCTCCCTTGGCAGTCTGAACATCAAG -TCTTGCCCCCTGATCTACCAAGAATAAAAATAATTCTTTGTTCCCAGATGCGGCGGCGGA -CTGTAACGGTGTATAGCCCTGAGAGTTGAAGGCCTCTATATTTGCACCTTGTTCGAGCAG -ATGTGCTGCAATAGAGGGCGTGGAAGGAGAAGGTGCACGGGCCTGCTTGTGAAGCAATGA -GTCGCCTTCGCTATCCCATGCATTAATATCTGCTCCTCCAATTACCAGCAAGTCAAACGC -GACCCAATTACCCTCTGACGCAGAGATTTGTAACGCAGTAGTTCCTCGGGAAGAGCGAGT -TTCTAATTGAGCGCCACGTTCAAGGAGAAGAGCCACTGCTTCACAATTGTCTGTTTTGCA -AGCAAGTTGAAGGGCCGTCATGCCGTCTCTGTTCAGTACATTGACGCTAGCCCCTTTCTC -CAAAAGGAAGGCTAGAATACGTGGCCGTCTCAGTTCCACTGCCAGATGGAGGGCAGTGTT -TCCAGTAACTGTGTTTTGAACATCCATGTCAAATCGGCTTTTGATCAGAAGTGGAACGGT -TGCTTCATCATGGGTTGTCCCAATGGCTATCTGAAGAGCCGTTGCTCCCTGGTGACTTGC -CGTCCTAGAGTTGACTCGTCGGTGAAGCAAAAGCTTGATCAATTTGCGGTGCCCTTGTTC -CCCGGCTATGTGCAGAACAGTTTTACCGTTCTTGCAGGTAGCATTGATGTTAGCTCTGGC -ATCGAGAAGTAACTCCGTCATCTCTTCATTGCCTTGATATACTGCACAATGCAAAGGAGT -GACGCCAAGCTGGTTATCGCAGTCATTGAGGTCAGCACCCTGCTCAATTAAGCGATTTGC -CTTCCCAGTGATGCCAAAGTAAGCAAGCACATGCAGAGAAGTCCAGTCGGGAAAGTATTT -TCCGAACCCCAGCTGCTTCGGCAAGTCGAATCCGAGAGTGCAAGATTCTGTTTCGGGGGG -TCGCCTCCAGCAAAGTTTGTTCAAGAAGGCTCGAATCAGAACTTGCGTTGCCTGTTCATC -CTCGCCCACCTCGCGGGCGTGATTCCCCCAATAAGTTGTGGCATAGTCGAGCAGTTTGCC -ACGTGTTTTGCGAGGGCTTGTTCCTTGATTGATGTAGCAATCATCAATTACTTCATCTGG -GGTTATTACCGAGAGACAGATATCTGCAATATTTGCCTTAGCAGTTGGGAAAAATACTCG -AGCCGCCGGTCCACTGAGGTATTCTTTAGCCGTCCTGTGAACCAAGTGAACGGTCCCATT -CATTGCGTCAATGGTCAGTAATCCAGCAGCTTCATAAAGTATGGTATGCGCAGAGAAGGA -AGACTCTTTTGAAGTGCCGTTGCCTTGCAGCTCAAAGCTTGCTGCAAAAGTTAACTCTGA -GACTGTCAGAGGCCTCTGAGCATACAAAGTCCAGTAAAGGCAGCGACTAGCAAAGGGGTT -CTGGCTGACAATGCGGGTCATAGACTCGCCGTACGCATCGCTTAAACTCTCGGGTAGGTG -AAAGAGAGACCGTTGTAGCAAGTTTCCATCATTGCATCGGGATAGGAGATCAATTTGTAG -GCGGGCCAACAAGAACCTAATTCAAAGTAGTAAAATATTAACAGGGGGATTCTTGAACAC -TGAAAAGCAATGTTATTCAAAGGGGAAGGGGAATTAATATCTTACAAGCCATGTGACTTC -TGAGCAACTTGTTGAACAACGGCAAATTTCAACTCAGGGGCGTATTGCTTCAAAAGAGTA -CTCAGTCCTCCATCTTGCTCAATGCGGCAGCGTGTGTATGTTTCAAGATCCCCACGGGTC -GCCAACACAGATATATGCTCATCCTTGGGTGCATATTTTGCTTCCCTGGTGGTGACCAGG -AGTTGTGCATGATCTGGAAGCTTTTGAAGACGATTCAAAATCCGTTCCTTTTCCACGTGC -ATGTCGATTCCATCAATGACGAAAAAAACTCGTGAGAATCTGTTGACCTCAGCGCGAATC -GCATCTTGATATGTCTTTGCTGAGACATTTCCCTCCGCGAAAGATTTGGAGTTGTACAAA -GAGGCAGTAGTGTAGGATGGCGTCCGTTTTCGATATACAAGCTGGGCAAGTATCTTGTCG -AGGAAGCCCACAGAGCTTGCGTTATCATCTATTTCATCGTGGCCAAAAACAAAGACTATA -GCCACATCTGCAGATGTAAACGCATTCTGCAGGCTATCGATGATTGTGGATCTAGCTCTG -TGTTAGCACCTCAACGTTGCTTCGATTTGCGGTTGACTCACGCAAGAAAAGTCTTTCCAG -CGCCGGCTGTTAACCTGATTAGCTTTGGACTTCGATCTGACATTTGGATGAATGAAGTAC -CTGGCCCAACGCAGTAAAGCGTTCTGTTGCAACCAAACAGCCACTTTGTATACGTATCAT -TGTTAAAGATCCATTCGCACGTTCCTTTTTGCCGTCTATTGCAGAGGTCTGCCAGCCTCA -TGAAACTTTTAGTGGTCATACGACTTGTGATATTGTGATAGTCCTCGGCTTTCGGTGTGC -TGGCTCTGATTGAATCTAAAAACTTAAGGATCAGCTAAAGAACTTGAGATACATACTTGT -TTCAACTGACCAGAGTCCATCTCGTAGAATTGCCGGAGTATGTCTCGGAGTTTTGCTTCG -ACTCGGGATTTCTCTCTGTGTAAAAGTAGTGATTTTAGTGCTGATCTGTAGCCCAGGACA -ATATAGGGACCGCTAAGCCCCCTGGGCAACACATACCGAGACTCAGGTTGCATGGTCAGA -ATCATCATAACCTTGAAATGCTCTAATCTGGGAATGAAAGTGGTTTCCAACAAGCGCTTC -CTGAAAGAGCGTGCGCTAATGCCACCCGGCTGGAAGTATATCTCGATTGACACAATTGTC -GACTCGAAATAACGGGTGACTTCGTCAAGGTTACGCAAACGCGAGGGATCCACCACCCAT -TTATCCTCCGCCCAGTTTAGTTGGTCTTGAAGTCGACACAGCAGATCAAGGAGCTCACAC -ATGATCAACATAGCTCGTTGTGCGCCTTCTTGGGACTCTCTGTTTCCACCGAGACAGCGC -ATGACTGCCCGGATTGACCGAGCAACTCGGGCAAGGCGAACCCGAATCACAGGGCGATCG -ACCATATTGGGAGGCTTCATGAACTAAAAATTAGATATCCATATCACAGTGATAAAAAGG -CTAGAGATAACATACTCGCAGTCAGAAATACGGAATAGACGGTAGAAAGAGAGTTTAGTG -TGAGGCACACGGTGTCTGTAAACTCCATGGAATGGTACCACTCCTATTCGAGTTAGGACG -TATTTTTTGGTTAAAAGGGTCAAGTGACGAACCCCTGTATGGTGAGTGATGTGCTATGTA -TTCAAAGAGATATGATAGCCGAAAAGGGACGTTTCTGTACGCCTTAGCGCCGACGGCCAC -TGATGGCGCGACATATAACTCAGTGCCGCTACTTCGTCGCAAAAAATTATAGCTGAAGAC -CGCTCCGTACATTTATTATTGTTCAAGTAACCCCCCCCAAAACTCGAGCTGGAGCCATCA -AAGCAGGAGTCCTCGACGAATGACTGGGGCGGAGAAGTGTTAGCGCCTGCATTTAAGTCT -TGTCCACGGGGTGCGGGGTCCCATCCACTGATAATGGTAGCTTAGCCGCTGGGGGCTGTG -ATATGTGCATTTCCATGCATATCACACACATTGACCGCCACAAACGAACAAATTGTACTC -CGTAGGGAGGCCAAGCTAATTATAGCGACAATATCTACCCACATTATCTACGCTGTTGAC -CCCGGATGCACTCCGATTGTTGATTTACGTATGTACGGTGTAGGAGGGGTCCGCAGCCGA -ATGCCAGCATTCCAAGGCACAACATATATGGTCATTTTGACTTGGACTTTTAGACATTAT -ACTTCGTATGGAGTATCACTAATATTGACCTCAAATTTCCATGATAAATACACTTGGGTC -GTATTTATAGGTATGATTCAACTCCTAAGGCGACTAGGATAGCGCGAGCCTTCCACGGTC -TGCCGGGTATTCCTCTCATTCTAACCAGTAGCGAAAGAAATGATATGACAAAAGAAATGT -GGATAAAAGTATAGCAAAGCAAAGGAAATTCGTGGCGGCACATCGTTAAGTTATTCAAAA -ATCATTAGGTGCAGACAGAGTACTGTGTTGACTCCTGCTCATGCAACCACGGGTATCGCA -AGAATAAAGTGCAAAAAAACGAGTGCAAAAAATCCAGACTTGTCGTGAGGGGTACGAACA -AAACAAAGACTTTATATCATATTGGCTCCATTGAAGGTGAGCTCAGCGCCACTGTCTCGG -CCACGCTTGAGAGCTTTGGCCAGCATAAGCTCTCGCATCCAGTCAATGCGTTCCTCGCGG -GACTTTACAGAAAAGTGGTGCGACTTCCCACTATTTAAAAACATCGTCTTTCGGTCAACA -TAGCCGCTCCCATTGAGTGTGGGAATCAGGGAGAATGAAAAGGCCGACACATCCTGTGGG -TTATCACTGCGTTTGAGGACAGTCTTCCTGAATGCTGCTGTCAGCTTGGACCTTGACGCA -AAAGAAGAACAAGCCACTGCGTAGTCATCGACATCGATATACTCCAGTGCCTTTGAGTTC -CTATGGATAGACGCATCATCATCATGCATTAAGAGCTGTGTGCCCTTCAGGGTAAACAAA -TGGTCTTCCCAGTCATGGCACAGCAGTTTGGTGGTCTTCCGTTTCTTCATCCACCCACTG -TGAGTTACATCGCCTTGTTCACGTCTAGTAAAGGGGAACAAAGTATTGTTGCGCATTTGA -GGTGCACGGGGTGTTCTTTGTAGATCCTCTGGGGTTTCAATTGGGCTCATGGCACGGCCC -TCTTGAAGGGGATCCATTGTGGCAACGTTTTGAGTTGAAGGGTATTGCCAATTGATTTCC -GCCTTGGTTGAGGATGACCGAGGGATTGGCTCCTCTGCCACGTATTTGCTGTTGCCAAAA -CGCATGTTTGGGGGCACTGATTGCGACTCCTCTCGTGACACTGGTCCAATTGGAACTGCA -GTTATTGGCACGTCCACGTCTACATCTGAAAATGGTGTTTCTGATCGATAGCAAGGATCG -GATCGGTAGTAGTCTGCTGGCGAGGCTGCGATCTCCCGGGATATGAGCTCTTGTTCGCCA -GAGACAGCGTAGCGATCATGAAAATGGTTCCTTCTGGACAGAATAGAAGGTGTGATAGTA -CGCAGACCTGACATGTTTTCAGTAAATACAGACTCATCGCTCGAATCATGCATGCTTGGA -ATTCTCAGCTTGGGCAGGTGTCGAAGGTGCTCAGCCAGTGTAGGTGTCACTTTCGCCGAA -ACGCAAGACTCTGACCCAGGCGATGAAAGAACTCCTCTCGTGGGAGTTGCAGAATGGTTG -GCTGTTTGCAATCGGTCAAGGCTTTGGAAGTTCAAGAAGTTCCGCACATTCTCTTGGGGA -TCCCTAGGTTTTACGTCTTTCAACTTGTCCTGGGATAATTGGAAGGAAGATGGTTTTCCG -GGTCCCATAATATCAGAGGACGCGATAATCGAGGGTACAAATTCGGACTTCGGGGACTTG -AGCAGGTTTGGGGCTGTCTTCGGATTTCCGGGGTCCCCTGCAACGATACAGCGAGAGCCA -AAATGCTCGAATGGTAAATCTCGCGCGAGACGGGCAAGCCTTTCTTGTTGCCTTTGCCAC -TTGCCACATTCTTCGCCCTTGGGGCACTGATGCAATTTTGGTAGAAGCTGTTCGATGGCA -ACAATAGATACTGAATCACCAGGAACAAGGTCACTTCTCTTGCGGACATTACCATCCGAG -TTTTGGCGACGAGTCCGATGCTTCGCTCTACTTGCTTCTCTCTCGACGGTGGAGGAGGTG -TTGCAGCTATTAGCTGAAACCACAGATGTCTTTCGCGTAGATGCCGCATTGCTAGTGACA -CTCGAGTTGCGTGAGCGTGAGCGTGAAGAGGTAGTGTCGGCGCTAGTGAACATGCCGCTG -TCTCTAAGCTTTTTGATATGGCCCATCACTTTGTGTCGCTTGCCAAAAGACGCAATCTCA -AGCTCCTTGAGGTCTTCAGATTGAAGTTCGAGCAAAATAGAGCCGGAGATGTCATTGACC -AAAAATGTATCCACGATATCATCTTCAAGCCCAATACTTTGCATCCATATAACAACATCT -TCAGGTGTCCAGTCTTGGGGCTCAATATGGTGAGATGCTGAAGCCTGTCTAGCTATATGT -GTGGGCTGTTGGGGCTCAGGTGTGGGGCCTGATGAGCGATTGGGAACTGGTGTATCCCTT -CGGAAAGGGTCAAACTTCGAGGCTTTGTGATAATAAGGACCCGTGGTACTAATGGGTGTT -TGCTGGAGTATTGGATCAATGTCTGTAACTGATTGTAGATCAGTCCCCGTAAATGATCCA -AACAAATGGGGACCGCTTGGGCCTTGAACACGGTTGTCATCGATATGGAAATCGAAAGCT -GTGAGTCCGGTTGAGTTTGGCGTTGTGACATTGTCTGGAGTAGAGGCTGTGGATATACTC -CCGGTTCCAGTTCCAAACTGAAGTAAGGGAGGATTAGCATAGATATTCACAGCATATTTC -AAGGCCTGAATGACTGTCACTCACAGAAGCCACACTTCTCATAGGTGAGCTATCTGAGTA -GCTATGATATGCATCTTCCTCAAAGATCTCTGTCGCCTCTGAAAAAGGCCGATGTATTCC -AGTCTTTGGAGGGAAAAGTGGCAGGGGCTCCTCTTTCTGGTTCACTAGAAAGTTCATGTG -ATATGCATATTGGGTCTCCTTGATTTCCCGTTAGATATTTATGACGTTTCAGATGAGATT -CAACCAAGACGAACCATATTTTCACTGGAGATCACCCTAGCTAGAGTGCCGGTACTAGAC -TTCGGACTGTTGGAAGTGAAATAGCCAACAGTGGAATGACCGGCACAAAGGTCTTCAATA -AGACTAGGCGGCTTATGTTGGTAATTGCGGGGCCAAGTATGTAACCACGCTATAATAATA -TGATAATGGGGAACAAAGGTCAGAAACGAGTAATTTGGGTGTTGGATGAGTTTATGTACC -AAGTTATCGTCTGGGAATTGTTACCCGTTCGCCTGTGTGGTGTGGCCACCAATGAAATTG -CTCGGTCCAAGGTGCTTCGGACGATTTCAAAAATCCAGCGAATGAAACTTACAGCTCAGA -CAACGACTGGAGACATGGCTCAGCGGTAAGCACCGCGAGGCGCCCTTGATGATGCATCTA -GGTGGTCGACAAGCTAACCTGGAACACTAGCTTGGAAGGCCAGTGGCCAAAGACCCAACT -TTTCTATCCAGTACGGAGCAGAACTGAGTGCTTCTCAAGCTGGTTGGGTGAGATGCAACC -CAGTTCACATCGAACCTGCATGCGGGGTTGAGTAAGGCGGGTGATAGCGGTCTGTGAGAG -GCTACTCATCACGGCGCTACTGCTGTGATGATAATCTTGGCAACAATTCCACGGGCTGGG -GAGACATCATAAATATTCCTGGCGCGTAAAGGGGTTTCAGGCTCAGTTCTATAAATTGAT -AGCATTTCCTCGACGGTCCAAGGGCCTTGGGGCCAAAACAAAGTACATAGAAATGGGCGT -GTTGACATGCCCGTTTTCTTTGAGTCGGGAAATTTGTCTTGATTTGATAAGATATAATGA -GGTGAAGCTCTGAAGCTTGCTCAAAAATACTACTCTGTACTGTAAATTTCTTTATTTGTT -CCCAAATCAGTTCCTTTCAGAACAGTGAACTAGGATTTCTGCCGAGCCAGGGATGAAGGA -TTCAAGTGGTTAAAAGTCACACAATCATCCGCAGTAAATTCGGACCCATCTGCCCGCCCA -TCACACTGCAGATCACTCTTTATTCGGACTGTCCTCCCAAAGCGATTTGGTCCTTATTGG -GATACCAACCTAATTATGTGCGTTGAAGCGAAGGGTGTAAAATTTGATTTTCAATACTTG -GGAGATTTTTGATGGAATAAACTCGGCTTGCCTTAGAGTACAAGTCACTTATTACAGAGT -ACAAGAATCTATGTGTCCAACGTAGATACAAAATGGATACTATTGTGGCTGCGTGCCTTC -AAAGTCACATCGCAGGCAAAACTCTTTAGGTATAGTGCGTTTTGGTCCATGCAATCTGCA -ACTTGGGATATCCGCCTCATACGATGGCTACCCGCGTATTCTTGACCTCCCAAACTCAGG -ACCGGTCATTGTCCCAATCGATGACAGCCATATGGACTACTCCTTGTTGAAATGAACTAT -CCTGGCCTCAATTAAAATTAGGTGCTCATATCATTTCTTCTGCGTATGTTGTACAGAACT -GAAATATTCACTACTTCGTAGAGATCGCTCTCTGCGTAATCCGTTGGCCGTCACTCTTAA -AGCCAACATGAATCACACCGCGTGCTCATGCACAGCTGGTATTTGGGACACAAAGTAGCT -AGACTGATTTACTTACTCCGTACAGAGTACGGAGTAGTCTCACCGTAACGTACTTTTGTC -CTAAATCTAGAACTCTCATATATATTGGCCTTGGGACCCTGGGGGATGGCGAATTAAAAT -TTCTTTATTAATTCAACATCGGGCTGAGGCTGGTTCAACCGATAGGTTCTCTACCTGATC -TATCACTCTGAATGCCCTATGTAATTTGTCATTGGGATCTTCTTTATGTCTGAAATCTCC -CGACTCGTCACCAAGCATATGGAAACTTGGCAGAGTTGGTGGAAGATACACTCGATGGAT -CTAAAAAAAAAAATTTTCTTGCACTGACGGTACTTTGAAATTCCCTTTCCATGGCTTAGT -CTTCCATCTCATGAAAGAGAATGTGGCTCAGGTGGAAATCTACAACATATATATTCACAT -ATGTCGGAGAATCTCGATCACGTTACTCCACATGGGGGTAGTCGGTGATGTCCAATAGTT -ACAAGTCGGTATACACTTCGTCTCCGACCACGAGCACTTGATGAACCTTAATGCTTGTCC -AAGATCATGCATCAAAATTAGTTGGCAGGTGGAATATTGTATCATGTCCCTCTTCGCATG -GGTTTTCCAGCCGGTTTGATCATTCCATATATAGCATACTGAGTACAGAATGCATGTTTC -CCTCATCAACTTTGAAGTTTCGTATAGTCGATTATATCATTTTAAGTAGGACTTTGACTC -TTATGTTTGCTTGAAGCTAGACTAGGCATTTATGAAGGTGGTCAAGTAAACTCGTTACCC -GATCAATATGCAATATTCTCGTGCCCCGGTGTGCAGGTTCCCGTGATACAGGTCATTTAG -AGCCATGTGACGACTTCTGGTCTATATTAATTCTACTTTTTGATTGGTCCGTACCTTATA -CAGGTAGGCAGGAGCTCTTGTCTTTGACATTCCTCTTGGCAACGGTCTATCTGCTGTACT -GATACGGTTGAGAATGGTCCGTGTTTATTTTCCCTTTGATAAGGCCTAACGCTCCAAAGT -TCCCGGAGGAAAACAAGTGTCAATTTTTAATTTTGCTGGGAAGCCTGGGCAAATGAAGTC -CATAGACTCAGCTACGTGTGAGGTGTAAATGAACATTTTGGGAAGAAAATATCCCCTGAT -ATTGTACAACCTATTCGTCTATCAAGTACCTGCAGATATGATAATATCAGTAATAGGTTG -AGCCCAGTTAGCTAAAAAAATGACACATGGCCACAATCAAATGGCAATCCAATATCGACT -GTGGTATTCCAATATGCTAGCCTAAACCAAAGAGAAATGGTTCAACCGGTAATAGTAGAT -TAGCACAAAATTTGTTTTTCCTTCTTTTTTTTTTCATGAGCCGTGAATCAGTCAATTATC -GCGCAATTCGCGACTCGCCTTTTCCCTATTGTTCCTAGATGTGTCTAAATTAAGCACATA -AAAAGTCCTGGTTAGTCCAAATCACTCCCAAATTTAGAAGAGCAAATTTAGTCAGAACCA -AGTGCGAAGTAGATCCTTCCTCATCCAGATATCGTATACACAATATACATTGTAGTGCTA -TGTATCCTAGGGTAGTATCCAGATCAATGAGGACTTTAGAGCATCCAGACTGCATATTGT -GCTTATTGCCCCACTCAAATTGATACTAATTATGTCCATGAGTGTTCTTGTTTGTTGTTT -GGATGCTAATTATGGGACAAATTGACTATGTGTACCCCCAATAATGCCTCATGTAATTAT -TAATAGAAGTTTAGAGCGATTTTTACTCACCTGATCTATTAGAAATAGGATATCATATGC -TGTCTCATTTTCAGGAACAGTATAATTAGCCTTAATTTATGGCGCTTAAGTGTTGCTAAT -TAAGGGACTCTTTGCTTGATGTACCCTTATCACAGTCCGAGCGCCAGGGTTGGCGCATTA -GCGTCCCCAGTCACTGCCCGGCCCGGCCCCACGGTTGGCTATGAGTCCACCGTTCTGCAC -AACACCGATGACACTAAAACTGAGTCGGTCTTAGGGCTTATGGTCAATCGGCCAATAAGA -GATCAGAGAATAGAAGCTTGGCATGCAAGAACTGCTTCTTCATCACAGTCTAAATCTGCA -TGGTTTTCACCAACCATTTACTTTTCGACCAATACCATTCGAGTATTCCCACCAATTTGG -GACTGAGGGTGGATATTACCTGACTCGCAAAGATGAGCTGTTCACCTATCATTTTGGCGA -TATCGATTCTTTTTTCTTGTTAATTCCAACAAACCAACAACCCCGCTTCTCACCAGCATA -GAGATATTCTAGCCCGCGGTTACTATTTTCAGATCTTGTTGGTTAATTGTGATGTGCCCT -GTCAATGCTGATACGTTGTAGAATTGGAATCGTCTATAGATTTTCGTTAGCCAAATATAG -AAGTTTGACTGGTTGAAAGTCTGAACGTCAAAAAAGCAAAAATGGAGTGAGTTCTCGGCG -AAGGGCCTATCAGCGTCGGAATACACCTTGCCCAGAGTTGCACTGAGTGGTCAGTGAGGT -GCTTGAGAGTTGGTGATTAGTTGGGCACTAGCAGGGATAATTCTCAGGTCCACTGTAACT -CTCGTGCATCATGGCGGCCCCAGTTGTCCTACCAGAGGTTATTATCTGTTAGCTGCAGGC -GCCGCTCGAGAGGATCGGCCCTGGATACCTGTCGTCCGTCACTCTTCGTGCTTGCACCCC -TGTCGTGTCCCAAAATTAGTCTGAGGACCCGGTCAACAACCCGTGTCTCGAAAATGATTT -GCCGCTGGGGCTGTATTCCCCACCCGATGTTCGCATATTCTTCCGGCACAAACTAGGCTC -CTCGACCGCTTCCAACAAAATTAGAAGCATGCATGCCAGGGCTGCTGATCGTAGCTCAGA -CTTATAACTCGCCGCATCCGAGATCTCAAGATATAATATAACCTTTCAACCCGTCTCGAA -TTTGTCTGTTTTTCAACCTTCGTTTCCATCTTCTACTATATCATACCCTTAATAGAGTGT -ATAAGCTGCTGGTCCAGCTTTACCATTGCGCCGTCTCTTTAGTTACGAATAAGGTGGCTG -GTCCTCCTCATCAACTATATTCTTCTTTCGGCCGCGCCGCTCACCCCCGCCTTCAACTAT -TTCGATCGCTTTGGTCATTGATTGACCTTTTCTCGCTGTCAGTACTTGATTTTGAGAATT -ATATTGTGAAAAAATGCCAGTTTCAAACACTTCCGGCACCGCTTCAACAAACAGCATGAA -TTCTTCTACTGCGGAGGGGTCCATCAACGGGCAAGTAGACAAGTCTGGACGATTGCATAA -GAGATCTCGCTCTGGTATGAACATTTATACTTGATAATTGACTAGATTATTTGGTTTATT -GACATTGTCAATCTATATAGGCTGCTTCACCTGTCGCCTACGAAGAAAAAAGTGCGATGA -AGGCCATCCCGCTTGCAAAGCCTGTGCGAACCTCTGCATCAAATGTGAATATAAACGACC -TGTGTGGTGGACCAATGCAGAGCAAAGAAGACAACAAAAGGAACGGATCAAAATCAAAAT -CAAACAGACCAAGTCATTGGAACGGAGTGGAGCTCTTCAGGGTGCGTCATAATCAATTGC -TGATCGGTCTCAGAGTTTATTGGTCAAGTTCTAATATCTTCCTTAGACTATATGAACCGT -GCCCTTTCATCGCCAATTTCTGGAGACGACTTTAGTCGCCCTATTCATACCGAGAGACAC -GATTCCGGCTTCGCGGCACACTACTTGACTCCTGGATTCGTACCGCTATCTTATTCTCAA -CATTATCCTTATGAGGTCGATGTGAAAACAGAGCGCCAAACATTCGTGAACGATGTACCT -TTGCGACATGACTCTTCGGTCTCAACCTTCAATACAATGGGCCCGCCACAACTGCACACC -ACACTTCCCACTTTCCCGTCAGATGAGTGGCTTCAAGAAGAGTGTTTTGACTCTTCTCGT -CAATTCTCCCAGGCAAATACACTTCTGGGTGGTCAACGTGTTGGACAGTCTCAGGCCTCC -CTACAAGCCAATATTCCTGTCGATGACTATGACCGGCCGCTTCTGGATCATTTCGTGGAC -AATGTGTTGCGTATGATTTTCCCAATCCTAGAGGTACATCAGCGTGGTTCTGCACGGGCT -CGGTCTGTCCTTCAGTCTCTGGAGACAAACAAGTCATATTTTCATTGCTGTCTTAGCGTT -GCCGCAATTCACCTTAAAACAACAAGTCGATTGAATGGTGAACAGATTGACCACGATATT -ATGCGTCATCGCTATGAAGCCATTTCCGAATTATGCAAAGCACTGAATAAAGATGCTGAC -CATGAACAAATCCTGGATGCCACTCTTGCCATGATCTTCTTTCATTGCTCGGTTGGCGCG -CCCGATGATTACCTCCCTGATATCCCCTGGAACGATCATTTTCAAGCTGCGTCAAACTTG -GTCAGCAAACTGGATTTAACAAACCAAATGATTCCATGTGGTCCGATGCACTCAGTGCCC -CCCTTCAGCATGTCTCTAACCTCCTGGATTGACATTCTGGGTGCCACAATGCTTGGGAAA -ACACCCCAGTTTGCTCACTCATATCGCGCTAAGCACCTTAGTGGAACATCTTCTGGTCTT -CGAGAACTTATGGGATGTGATGACCGTATTATGTACCTGATCTCCGAAGTGGCCTGTCTA -GAATCGCTTAAAATGGAGGGCCGCATTGACTCCATAGCTGTTTGCACACACGTGTCTGCT -CTGGGCAGTCAGTTGGAATACACCGAAGATGCGGATACGACTTTAATGAGCCCTTACTCT -CCGGGAACCGGAGCAATTCGCCCTGATGCATTGACAAAAACGATGACTGCCATTTTCCGC -ATTGCCGCACGCATCTACCTCTGCAGCCTTATTCCTGGAACCGATCGGAACCAGGCCAGC -ATTATCAATCTTGTTAATGCATTAACAGATGCCATGTCTTACATTCCAGCAGGACCGTTC -GGATTTGATCGCTCTCTCGTTTGGCCGCTTCTTATCGCTGGTGCTTTCTCTACTCCTTCC -AGCTCATTCCGTCAAGTTTTGGATGATCGAGTGACTGCCCTAGGAGATCTTGCCGAATTG -GGAAGTATTGGTCGAATGTACCAATTGTTGAATGATTACTGGCGGGTATCCGATGATCCT -GTCAGCCCATCCTTCCCTTCACAAGCGAACCAGCCCGAGACAGGATATCCATCTACGGAA -ATGAGACAAGATGATCTTCCTTCTGTTCCTCTTCTGTCCCCTAGCATGCGAGAAATCAAG -AGGCAACAAGTTCACTGGCGAGACATCATGAACCGAAATGGCTGGCGCTATCTTCTGATT -TGATTGTACTGCAAGAGCGGGTTACAGCCCCCAAGTTTAACTTCATGTGCGAGTCAAAGT -ATTAGATCATTCAGTGTTACCTCCCAGACTCCCATGGAATTTTGCTGGTTGAATAATACC -CAGGCTTGCGTATATGCTACCTAGTCGAGCAAAGGGTTGGTGTTTTTATTGTTGTTTTGG -GCGTTAAGGATTCGCGTTGGATTATATCCCCTGACTCTCACTTTTTGTTTTTGTGTGACT -TTTACAAGCGCTGCCTAGTGGTATACAGTTGGGCTAAATCAGATTTACCTATTGTATAAA -CTGCGGTTTTCTCGATTATTTTGTAGCTTCCAGCTATCTTAAATCTGTCACTCGGTTGCG -TGGCCTCCGACAAGAAAATTCCTGACGTATCCTTCTCGTTAGGAGCTGTGGCTCTTCTTT -AAGAGCATTTATTTTGCATAGACACATGCGTTTTCTTGTTATATGGTGTCAGCTCTTACT -TTCTCGGTTGTATATTGTAGACAGGATTGCCCTACAGAGGATGTCTTTGTGCTTCATTTA -CCCTGGGCCTAGAGTTGAGTAATATAATCCCTCTTCCATTTATTCATGTCGTTACTAAAA -TAAACATTTCTCAGGGCCTTAACAGAAAATATGGCACTAGGTCAATTCTACGACTCCTGC -ATTGCTAGTATGGTCATGTTGCATAATATAAGAGGCGCCCTTTGATAATTTTATCCGTGG -CCAGCAGGGAACCTTGTAATCAGGTCAATGGGCGGTAAATGATCAAAAATTGCTCTCTTT -CGTCTGATTATATCCATACAAATTTCGAAAGCGATGTTCACTTGGCACTTACCATGCTAT -CAGAGCAGCGCGTCTTGAAACACGGGTCATATCCTACAGCGACTCATTGTGGAGGTGAAC -TAAATGAAGTGTCAAATATTGTATGATTTACGATAAGAAGCCTGTGCCAAACGATCCTGC -AGAACTGCCTGCGGGGGGTGTCTGCGGGCTAACCGCTTGTCCAGTGAGAACATGATAGGA -CAGTAATTGAGCCACTGGCCCCAGCGATTGATGAAACAAGTGCTATGTTAGCCCTGAGAT -TTTTGTTCCACCGAGTTTCTAAGGTTGACTTTAGAAGCTCACCGCCAACTATTGCCTTGA -AACTTCTGATCTGTTGTACAATATCGAGCCGCTACTGTCATGGATGCCGCGCGCCCTGAC -GGTAGCAAACCAGGAGGCTCGATATGCCGAAAATGCACGGTTGCTGAAGAAACCAAGCCG -CCGGTCGACGCCTCAGTCGCGTGGAGGATATTTTGGGAATGAAGAAAGGCTGATGCTCTT -GATTGAGCAGGCGCGCGATAATGCGGCCCTGTTTTACCTTGAGCCTTTTTATTTCCAAAG -CGGTTATTATCTGGGGTCGCTGATATATATAATAGACACAAGTAATCCTCTTGGTCGATT -TGTACACAGTTACCTTTACTCTGTTTTTTTTTTGTTTCTTTGCCTTATTTCTCATCTTTC -ACCTTGCTGGTGTCATTCATTCATTTTGTTAACCAAGTGCTGGTCACTCTGATAAATACT -TCATCCAAACTAAACCATCGTTGAGGCTTGGAGTTGTTGGGGGCGAATCCTGTCGTTCTG -CTCAGCCCCCCTTAGTTTGTTAAGTTCCGACAGCAGGCCTGGTGAGTCCTTCGGGCCAAA -TATGCGATTGGCCACGCTAACCCGTTTTAACCTAGTCGATCACCGGCGTATTGACTGAta -ctgtccagtacagtacagttgttacggttctgtacaataccgtccaCCTTCGTGTCCTAC -TCCCCCATATAGAAAAAGATCTCCTCGGCCCTCCTGAGTCTTTGCTTCTTGGTAAGACTT -ATTTAACTTCCTCTTCGACTTCGTCCACTCAATCAATATCCTGACCTTCGTCTCCATCTG -TACTCAGACGTCTTGTCCTTCTTTGATCAAGACGCGAGCGACCCGCTCTCGCCTTGTGTT -TGCGACACTAGTACATCCCCCCCTCAAAGCAACTTACCTCAACCTTTCTACGCCTTGTTC -TTCCAACCTACAAGTATCCTACTGAACAAGATGCCGGTCCGAAAACCTGATTTGCATCCG -CTGAAGACGTCGAAGGCCTTCATATTCCCTTCAGAAATACACAAAGATTCAAAGACCCTC -ACCGTCGATGACATCAAGCGCGAGGATGGAAGCGCAACGCCTATCACACCCCCATTGGCA -TATACCGATTTTCTCAACGCGTTAACACCGGTATTCACAAGCCCGGTCAGTGCTGGGGGC -TTTCCAAAATTTTCAATCGAAAAAAGCGACCCGTCGCCAATCTCGCAGTCGTCTACTGCA -ACAAGTCCCGCACTCTCTTCTGAGGTTTCTGTCAAATCGCCTACGACGCCAGTTATCTCC -CTCCCACCACCCTCCCCCTGCACTGTGAAGTCAGCGAAGGCTCCACCTGCGCTACGAAGA -CTACGAATTCCCCAGTCACTGAAGCCATTTTCTTCAAAAGACTCCCCGCGGACTGCCACC -CCGTGTAGTGCGACTCCATACAGCGCGGGTCCCTATAGTGCGACCCAATGGAGCGCACTT -CCTCGAAGTGCTGCCAGTCTTCGGTCGCCATTCTCCCCATCAGACTGGAAGCTCCGATAT -CTTGAGGCACCAAGAAGTGCAACCACAAAGGCAGTCAGTGTTAGACAAGTCGTGACCCGC -ACGATTACATACAAGCGTACCCCGCTTGATCCCCCGCCAAAGGCGAAAAGACGCAAGACT -CAAGAATGTCGAGATGTTTGAGCTGGCCTCTTGCTCTCCATTCATCATGTGCGGCGTTTT -ATCTTGAATTCTGATTTCTGATTATATGCGCAATTTGCGATGGGGCTTTGGGGGAAAGTG -GTTTGACCTTTGACCTTTGAGCTCTAAAAACCATATCAAATACTTTTGCCTAATGCTGAA -ATCTATGAGTTGATACTTCATGATTCGGCTTCAGCATTTTATCTCTTACGTTCTATTTCG -AAGATGAATTTCCTACAAGCGCTAGGAACGATGGGGAATTGGGTATACGGAGTTGGTTGG -GTTTGAATTTACGATTATGGCGTTTTGGGCAACATAACGGAACTTGAGAAACAAATTTTT -GTCCTGTACTGTACTGTCTGTCATCAGAGGACTTCATTGGCACATGTGCAGTTGGGGGAA -ACACGCTTGGGTTCTGTTTTGTCTTGGCCCTTCCTCTTATATTAGGCGTCAACATTGGTC -ATTTGACCAACCTATTAGCATTTACGGCGAATGAATTTTGGTGCATTCATGTCTTACTAC -TTGTATCCTACTCCGTACATGCTTCTGCGAATCCTGCCAGATACAGTTTGCAAATATACG -CCGTGACATGCTATGGTAAAAAGAACTGTCTTATGGTGTACATTACTATGTACATCTTCA -AAGTTGATGAGCTCCTTGGTAGATAGGGTACTTTTGTTTGCTATGGAGGTCCATGAAGCT -TAGTGTGAGCCTCATCAACCCGTGGGTTTATCTTGGGTCGAGGCCTTGGGTCTATCCTAT -CATAAGATGATCCATCACCGACAACTTACTAAGGCTAAATTTCCAGCCCCTCCACGTGTC -GAATGCATTTAACGGTAGCTAATTAAGAAGAAAGGAAATCCAACAAAACATCCAACAACC -GCGGGGAACAGGGAACATGCCCACTAGCGCTATGCCGGCTTACATTCCGACCTGGGCGAG -GGGTCTCTAGTGTGCTCCCTGAGCTTCGGAGATTTAATTATGATTAATTAAATGCAAACG -CTCTGACATATACCTATCTATCTATCCAGACCATCAAGATATTCAAGCGGTTTTGTTTTG -CAGCCCTGTGGCTTCCAGTTTTGTAAGAATGCAGAGGTTCAAATATTCGATTGTGATCCA -ATATAGTGACTATATGGCGTCAATATAAGTTTTGGTTTGTTGTCTGGCCTCACAGGACTC -ACAGTTAGTCCATTCCTTGTGAATCCGCCCCCAGCTCCTGGCGCCACGAAAAAGATATAA -AAAATGCCCAAAAGGGCATTGAGCAGCTCGCTAATATCTCAATGTATGATAATAACAGCT -ATGAGCATTCTCAATATTGAGCATTATTCTGTATAGTCTTTATTCTTAGACCCTTTCCTT -CTTGAGGCTGAGAGCTGCTGAGAGCGCCAAGCGGTTGGTTTCCTTCTCCCAAGTCAGCCC -TCGACCGCCACTGTCTGCCTTGCGCTACACAACACACGATCCAAACATGCCAGTGGATGA -GGTATCAAACACTGACTCGCAGGATTGTAGCTGCAGTGAGGCTCAGCGCACAGTCATCAG -TTTACAACGCCTGGTTTGCACTAGTATTTCAATACTGCATATAGGGCTTGACTGTCTAGG -CAGATGTATTATGGTAAACCAGCCTAAAGCTAAAGTTACCTACAATATACAGACACCAAT -ATACGTATTAGTCTCTCAGTTATAAACCCAGATATAGATACGGTGCCACATAACAATTAT -TTCACTGGAACTAACGCTAGATGAGACAAGTCGATCATAGGTTTCACGGCTCGACATGGT -TTATGCTATCAAGTTAGGTATTTTGATAGACCCCCGCTTTTGTGAAAGAAGGGAGATTGG -CCAGCTTCCAAGGTTTATGAGTCTACTTTTTCGTACTCTGTATTGCCCCTCTTCAATACA -TCCAAATGCGTATTCGAGAAAGTGGGTCAAACAATACCGAGTCATACACGTATGAAATTA -TCTTCGTACTAGTTTCAGTTTCCTACATTCACTCCAAATCAATTGAGCGAATCTAGGACA -TACAAAAATAGTATAGCTAATTCTAAATCAATGTGCGACTAGCAGATAATTCGTTTTTAT -TTTATTTTAATATGGGCCCCATCGGTTCAAGTCTATCGTGCTCAAGGCTGCTACTGGGTG -TATTGATACATACAAACATAGTTACAAGGTTATTGTACATACTTGCTCTGACGAAGTGCT -TATCCTAATGAAGGATTAGGATCAAGACTCAATCACTGCTCTGAGAAGCCTTCCCTCTTT -TCACATCAACTTAAATCAAGTGATGCGCTCAGCTATGACTGACCATTGAGGTTGCATAGG -AGAATATCTGGATCATTTTACTCAAAGATGCTTGCACGCTGTTCCACATATGGCTTTGTT -CCGGTGATATCCCTGCAACCTTTAAGCGCCTACGCTATTCCATATTTGTACTAGTGTCTA -CTGGTGTCTATGAAGGTTGGCGGGTCCGGGCCCGTCGAGCCCCGTTATTGACGATGGTTG -GCTCCAGAAGCCGCTTGGACCCACTGTGGTGTCTTCGCTAGACTGATTTTTCTGGTGCAA -CGTGGCGACCGATCTTGCTCAATATCATGCATGCTTGAAACATATCACAGCTCAAGATCG -GCTCAATTTAACTTCACATGATTTAGATTCGAGTGAGGTGAATGTAGTACGGAGTATTAA -GAACACAATTAGGTAGATTCCCACGGCTCGAAGAATTCAACGGCTAGCTATGTTAATGGG -CTTTTTAACAATGTCATCACTTCCTATCTTAGCTAGTATAGCAAAAAGTGCTTTGTAACA -AAGGTGATGTTAGTTATGTTGATCTCGGGGTAAAAGCCAGAGTGCCACTGTGCAGTACCC -TGGTAAGTAGGTACATACTGACTGTACTGTAGGTAATCCGTAGTGTGAAGACTCTTTTAT -TAGATGTCCATACCTCAGGCCGCATGTGAGCAAATGGTGATATTCTGACCAATGATGGTG -AGAGAATAGACGCTGTGGCTTTCTGTGCAGCAATTGACTAGCCTCCTTGGGAGTTGCCCA -AATCCCAGGCATGCGTGATCAAATATTTTACCTTTGCCAAGCGAGTTCTAATTTTACCCT -ACAGCGGGAGAAAAGTCTAGCTTAATTATGTAATGTTATTTTTAGAACCAATTTGCCGGG -ACTTCAACTTCATATCATCGATGGTATTCCGTATTCGATGATTCAATATATATCAATATA -ACTAACACCTCTAGACCAGGCTAATAGCAAAGGCCTTGTTTGCCAATTTGACGTGGCGCG -CTTTCTCTATTTTGTTTTAATTGCTATACCAATGGCATCATTACATTGCACATCACCTTA -TTGATCTATGACAGTGGCAAAATTCGATCAGCATATGATTCGTAGCTTTCCGTGTTCCTT -TTTAGACATCTTGTCCTGATTTTCGCGTCAATCATTACAAATATCCAGGGATTAGCTGAA -ACTTTACTAATAAATGCGGCCGAAGCATCTCTTATCTCCTTGTGAGACTATTCACAGGTG -AGCTTCGTTTGTTCATCATGAGTGGCCAGAAAGATGTCTACCCTTCCAAGAATAGAACCA -TATCAAGCTATGATACTTACTGCCTTCCAGATCAGCAGTCTCAGCCCTGTCAATAATAGG -CTCAATGGCAACATCGTGGGACATCCAAAGTGCATGCTATCTGCACCTCTAGTCAGTCAA -TCTTCGTCTGTTATTGGTCTAGGAAGACTTGCCCCGTCAAATCCCATTTTTTCTCCGCTT -CAGGGGCATTGAAGTTACTAAAGTTAGCAACGAGGGGTTTTTGATTGCACATCTTCTGCC -CTATACCACAGTTCCTGCGACTTGAAGGACGATCGCTACCACCATTCTTATTGTATCGAC -CAATTACAGTGCTGGGCTATGAATAGGCTGCATGCTCTCTTTCAGTAGTGGCTTATTCCC -TCAACTGTCAGGTTATTCTGCCACTGATTCCACTGTCTGTGGAACCGAATGGTTCATCTA -TAGGTGTCCGATGCGCGAGCGACGTGATAGAACTGCAATGTCAGATCCATCCCTCGCTGA -AGTGTCTATTTTTATTAATTCTCCTCCTCTGATACCCTCTTGGTGTGTATTTATCCCAGT -CGAAAGCCGTTTCCCATCTTTCTGTATCATACAGTATGATACCCATACCTTTGTTGACTC -GGGTGAAGACCCTATATCGGATAAAGAATCTAAATCAGGAGCATCATTTGCGACATCATG -CATGCTTGTCTATATTGAGATCAAGACAGGTTGTCATTGGGTAGTACAACATTAGAAGGA -TGGGTAGCATGCTGGCCCTCATCTACTCGTCGTTTACTCTACCCTCACAATGTGCTGAGT -CTTCTTTTAACCTTCCAATATTGTTTTGTTTGCTTATGACAAGTACAAGCTTACAATTAT -ATCTCTTCAGACGATCCATATTCCACAACTATGTTGTGTATTGTATGGTTCGCTATTGAT -GCTCTGACGTTTAAAGATTTGACTATGATTATATTCCAAGTTGCTACATGGATCTTGAAC -CAAAGCCTTCGATTGGTGATGGATATAAATAGAATGCATTTTCTATGCATGAGTGCGGGG -TCTATCACACTATGTGTCCTTGGGCCTCCATGATCTATTTTTGTTTATTGTTGTTTATTT -TTGTCCATTTTTGCTGATATGCTGATATCACGAGGGATTTTCCAAGGTCATGGTTCATTA -CCATTGACATAAAAGGTCGGTCTCCCAATTGATCATGTTTTCCAAATGAGTTGTTGACTT -GAACGCAATGCAGCCGTGGTTCTAAATTTAGAGGCGGGGGAAACAAAGCCtgggatgcga -tgtgatgcgattgatgtgaCATGCCCCTGTACTCTGTAACCAGCGAATGACAACGGACAA -AAAGCCCCCCTATTTAGATCGGTTAGGTATTCCCCTCGAAGCGGGCAAAGGATAATGGAG -AGCACATATGTTGTAGAGGGATTCCCAAAATCCCGTGATCGACCCGCCCAAAGTCCTTCG -TCCCTCATTCAGATGCGGTTTCACTCCTTTTCCCCCTAAACCGTAACTGCGTCTTTTGTA -TTTCCTATCCCTTGAGCCGTGCGTCAATCTAAGCACGCTCCAAGTCAATCGTGTTCACAT -TCTGTGCACCCCCACGCCCAGAAAGTACGGTGTTTCGATCTCGTCCATAGCCATGGCTCA -AGCGCCCCCCACTTTCACCCCAGACAAGGAGTTTGCTGACCTCGAAACCCCTTTGGGTGG -AAGCCTGTCAATAACTGCGTTGGCCAGGTTTGAATTCGAGGCAGGGAAAGGGAACGATGG -CACTAAAATATTGATGATTGAATGGGAGGACGATGATCTCACTCGATCGATAACCGAAGG -GTCGTGGCATGTCTCATGGGCTGGAAAGACCTCCATACTCCCCGCCGATGAACGCCCTAG -TGATAGCTTACGCCGCTTCTACTTCTTGCTCCCGCCCAATATCACCATCCCGCCTGTCGT -CACGCTTTACTATGAACCGCGGTCCACCCCATCAAATACGACTGATTCCGCCTCCCCTCC -GCCTTCGAAGCCGCGGGATGCGCTTCGGCTTAATCCCCTACCTGCTATTTTTCCAGCTGA -GCTGGGCGCCACCGGGCGTGCAGCGGGCAAAAAGGGTGTATTGCATACCATATGGGCAAA -GAAACGGCTCCGGGTGCTTGAGACGGAGATTGCTGCCGAAACCCGCAACAACATTGAAGG -AATTGCTTTGCACATGGCTATTCAAGAAAAGGAGTGGATTGAGGAAAACTTTGGAGTTGG -GTCGCGCGCTGTCGACACGGTTGCCAGTAGTCATGACTCCTCAAATTCATCATCCATTTA -CCCAGCCACACCGGTGTCGCCTATCAGTGGCAGGAAGCTAAGCGACAAGCTCAAGGGACT -CAAATTGCAGACAAGCTCAAGGGACCTGGCCGTGAATGATGGTATGTATCCTTGGTCCCA -ATCTTGCATATGCTTGATACCATTGAAGCTAACCAAAGCCAGGCTCTACTCCTAATACCG -CTCATCTTCTCTCACCTCAGTCTCCGGATGTTGCTGTCTCTTCGTTCAGCTCATTCCGTA -ACGTCGCGCATATGCATATCCATTCCATGCCAACTCCAGATACTAATCCCACCCCAACTC -CAAGCTCTCACATCAACCCTTCTGGCCCCGAATTTCTTAAGCCGGTTGCATTCTTCCCAC -CCTCGGCTCTGCAAGAAGCCCAGCAGAAGAGCGACCACGACGGATTTGCATCTATGGGCG -TAATTACACGCACCCCGAGCAACGATTCCGGCGAGGAATTGTTCGCCAAAGCGCTGAGCC -CACGCTCACCCGATCTCCCTCGGAGCCCGTTCTCATTCCGTTGAACTTGAATTTCTCATG -CATGATATGATACCATACCATCTTTCACGACCTGCGCAATGATTCCCCTTCTGCGGTGCC -ATACCATATGTCCACATTCCATACATCTATGATCATTTGTCTATCTTTCTTATTCATTCC -ATACTCGGGTGCATATCATGCCATATTCCTGCATTTTCATCTACCATCTTGTTCTTGTAT -GGGCGCGCCATACCATATCTATGGATCGCATATGTACATAGACCGAATTCAAATCTCACA -ACTCCCTCGACACTTAAGAAAAAAAGGCATCTTCGATGATTTTCTTTGCGACAGCAATAC -CCCGTTTGCGTGGGCGACTAGACATAAAATAATGGCTACCAAGCAAAGCAAGCAATCAGG -CGCACCACCCCACCACCTAGTACCCACATCCCTGAGGCAAACACAAACGAAAGCTATAGA -AACAATCAACAACTGGAATCAATTGCTCTTCTTACGCTGACTTCCCTGCCGACCCGACTT -TCCAGCAGTCGCGGTCGACGAACCGGAAGCAGGGGCAGTAGCGTCGCTGGACTCCTTCTC -ATTCTTCTTGCTAGTAGCGATTTCATCCTCCTGCTGATCCTTGCGCTCCGCATACCACTG -TCCAGCCTGCAGAACCAGAATTCCAACCAGAACAAGCGCGAGAATGGTATTGCTAGCGGA -AAGAACCTTGAGCGAGTCCTCACGACCAAGGATCTGCTCGTGCTCGTCGTCAGTGCTAGC -CAGGTCGGCACCGTACTGGCCTGCAGAGGCGAGGCGCAGGCCCTTGACGATGTTGGCAAT -GTAGACTGTCACACCGCACATATAGAGGACCAACGAGGCGCCGTCAAAGAGCATGTTTGA -TTCAGAGGGCTTGTAGAGCTTGGTGAGAAGACCAGCTAGACCGAGGAAGATTACGATGTG -CAAGATGCGTGGGATGAGCGGGGGCGAGGCGTGCAAGAAGCGGAGATGCGCCTCGAGATA -GTCATAGTGAGAGGTGGGGGTTGACACCGTGGACCACAGGATGGGATAGTCGTAAGGGAA -GAGGGAGAAGATAATGCCCAGGAAGAAACAGGTTGCTGGTTGGGGAAATTGAGTTAGTTT -GTTACAGTCTCGTGGCCTCTTTGCCGGGTCCGGGGAAGTTTCGCGACGCGATCGACTTAC -GGCAGACAATGAGGAAAGTCGCGAACGACCCGCTTTTGAAGGCCATTGTGAGAATTGGAA -AGACAGATGAGAGTATTTTGAAGAGAAAGGAGATGACCAGGGGCAAGGCTGAAGTCCTCC -AGGTGTCGCTAAGCGGGTGCCAAGCCCTTCGGGGCGTGAGATAAGGACGTTTTTACAAAT -CTCAAGGTTACAGAGGTTACAGAGGTCCAAGCATTTGTGATAACGTGGGAACATTTGTTG -GTTTGATGTGAGAGAAAATCGATTTTGATAAATAAATGTATATGATACAGATTTCAACGC -CTTTAGCGCCTTTTTATTAACGATGCGAATAGTGGTATCTTCCAGAACATCCCGTAAATG -CAAATGTACATAACATCATCGCTGGTATCGAGGCAACGACTCGTCTGCATGCGCCGTCCC -GCCGACCAGTTGGTCATCCTCATCAAAGACTGGAGCACTGGGCCCGTCGTTGGGCTCCTG -AGTATTGGGCTCGTCTTGGGGTGCGCTCGCCTGCTCCTGGAGTCGGTGACGCTCAAGCTC -TTGTTTATCGTCTCCTGATGCACGTAGTTCGCTGGGCCCATCGCCGTTCACAACAACGGT -CTGCAGAGATTCCGCAGAGCGGACAGCATGCGGCGCCCCGCTTGTATCTGGTGTTGGTAC -ATGCTGGTAGTCTTGGAGATGATGATCATCTGGTAAAACCGGCGCGGTCGGGACCTCCAT -TGGTACTGTAGATGGCCCTGCCTCATCTTCATCGGGAGGTCGACTAGGTAAGAGAGCTTC -CTCATGACGCCGTACCCGCGTTTTCTCGTCAACGGGCTCCTCGACTTCTGGGGGCGCGAC -CATAGAGTCCATTGGCTGGAAATGATCGGTGTATTCTTCAGGGTACTCTGCCCAATCCGT -CTCTTCAGGGAAATATTCCTCCGATGTCCCATGTTCCCCATGAGAATTAGGCATTGGGCT -CGCTTCTGGCCATTCCTCTCCATCAACAGGGTTAATTAGAACTGCAGATGCAGGAGCTGC -CGTTTGTGACGCGGTCGAGTCACCAGCTGTAGATGATGTACGTTTCGTCGTTTTCCTTCG -TTTGGAATCACGCGTCCCAACCACTACTTCAAAGGCCACTGAGATCACCCCTTTTTCACG -GCGGATTGAATCGGTGTCCAAGATGTTGCCAGCCCAGTTGGCAGTAACGCCACTCGAATC -GGTTGGATTGAGAACCTGCCCGCTGGGCGAAAATGTAGTACCACCGGAGACCATGTTGAA -ACGCGGGATGAAGCGATCGGGTGAAGTAATTTTACCTCGGAGGTCCGTTACGACCTCCAC -ATAGTACCGAAAATTGATCATGGCTCCCGGTACTCGCGTGATAGTGGGAAATGCATCTTC -AGGTATTCGAATGGAAGTCTTTACAATTGCAGTCATGGTATTTGGATCAATGATCAGCGG -GGCAAATGTCTGCGACAAATCTTTGCGAAAAGTGCTGCTTGTCCGACTTGTTCCCAAGGT -AAGTCCTCCAAGTCCTGTCCGTGATCGCGGATAGCAGTCTTCGTATACCGGCTTTTTTCC -GGCTGCCGTAGTGCCAATGGGGATTGCAGGATGGAGATCAATGCGACCTTGGCGGTATAA -TGTGACCATAATTCCATGCGCACTGCGCACTTGTTTACTATGGTTGATCGTGACTGTGAT -GGGTAGGGTATCGCCGGGGAGCACGCCGGCGCGCAGAAGCTCAGCCTTTGCGGTGATAGT -TTTATCTGAGATAGACGGGGTCGCACTGCGACTCTCGCTATTCTGTCCACCCGTGCTTGC -GGATGAACCGAGATCGCTGGTGATTCGAAAGCTTTGGCTGCTTTCGCTAGGCCGGCTGGA -TGAGCTGACATTGCTTGGCGAGGGGCTCAGCGGTGGCCTGGTATCTACGGAGGGTGCTGC -ACCACTAATAGGGGTATCGACCGAGGATGTATCTGGCCCTGCGGTTGCGTCGGAACTTGT -CGACTTGGCTTTCCCTCTTGATTTTGAGCGCTTCGAGATAGGTTCCAGTGTGACTATACG -AGCCCTAGGTGGTGGGAAAGATGCGATGTCGATGTTTTCTAGGATATTGACCCGTCTTCG -ACATGTGAGTGTAGGGTTCATGGTGGTCGGTCGGGTAAGGGTAGAGGTAAGTGAGTAGCC -GATGATTCCACGTTCAAACTGCAAGTATCCATTTGGTTAGCGTATTTTTACAAAAGAGGT -CGCATCAGCAAACGCACATTGATGCTGCTGGGAAGCGCATAAGGAGGTAGTCCCATTTCA -AAGCGAAACTTGTAGATACCCTCCTTCAAGCGTCCATCGCCACAGAGAACAACTTCATCC -TCAAACAAGGTTGCTAACCCATTCCCCAAGTATTCTGCACCTCGTCGACCCCGCCCTGGT -CCTAAAAACCCCAAGTCGGGGGTTGCTTCTCCTGCCGGGATCGCATTTTTGAACACCTTG -ACATAGCCATGGAGACATACGACTATATGCGTGATGCGAACGGGTCGTACAACGGTCACA -ACGACGGTACCTTTGATGACATCACCGGGAAAGTAGGAGCGCCATGGGTCATCCGGGTCG -ATATAGAAGTCGGTGATGCCTCGATTTCTTTGACCGAGCGGGGAGCGAAACTTGGAGAGG -AGAGAGTTGCGATTCCGACTGGCCGGCAACGTAGGTTGGGTAGGAGGGTTGGCGGACATG -ACACACAGCCGATGTTTACTGCATCTATACACGACAACGTGGCAGGGGTGAGAGGGATGG -TGGGGACGATAGACTGCAACCGCTTTGTGACCGATCGGATAAACACGTCGGTCTGGTATT -TCTCTGGGGCAGCACGGAATGGCGGGGGATGGTTGGACAGACCGGTAGGCAGTCAGCCTG -CGTCAAGAAATCCAATCAAGCAACACAACAAGTAGATCAATCGCCTTTGTCCAAGGATCT -GGCGTCTAGAACCAATGCACTGAACACGGTCTTATTCCAAACCTCCAAGGAATGTATACT -GATCCACGCGCAAAAGATTCCCCTGGTCCAAAGAAAGGAATGGGGATTATGATCGAGGGC -CACTGCACCACATTGTTTCATGTGACATCGAAGCGCGAGGCAGGATTTCAACGGTACCCC -CGCCAGGAACTCGCAGCGTATCGGAACGCAAGGTCAGGTCGCAAGATAAGGAAATGGGTA -TTGTCAAGAGGAATGACAGGCTTGAAAGGAGCCAATGGGTCAGTATCACCGGATACTATG -CTGTAGTCGAGAATCTAGAACGGGATCGGGGGATCGGGGTGAGAGGGTGAGAGGCGCAAC -TGGAGATGGCGCGAGATCCGGAGGAATGTGGGACTCGAGAATGCACGAGCCACCGGCAAA -GTGTTTATGGTCGAAGAGCCGAagaggggaaagaggagaggaaagggaaagggagaaata -agagagatggaggaggagagagagagagagagTCGCGAGCTCGCGATAAGCCTCCGAGAG -GCGGAGAACAAACCAGAAGGCCTATTCGCGTGTTGCACGGTCAAGCTTCTCTTGGTGTCC -CAGGTGGCTCAGCTAGTTTTCCCCTTTTTCTGGGCAATGAttatcatttcttttcttttc -atttctttttttttGACATGTTTCATGTTTCACATAATTTCCCCAAGGTTGCCATGTGCT -AAGCAAACCTAAGCTTTTCTCCCTCAAATCTCTATCCTATCCTAGTTCACTAAGACCCTC -TCCATCCACGTATTCGTCTCCATAACATTCGCGCAAAATGCTCTCAATGATTTGTCGATC -TCCACCAGCTAATTTGCGCAAATACTCCTCTAGCGTATAGATATCATCTGTCCACTGCAA -TGCCTGGTGATGGAACTCCGCTAGCTCCTCTTGGACCTGCTTCACACCCTTACTGCTGAG -CGAATCAAGGGGACCGGAGAGTTGGGCCTCTGTAGTCCGTACCCGATCGATTGCAACCTG -GAGCTGGGCATTTTTGGTCATGAGAGATTCCCTTTCATCTTCGTCATCATTCTCCTCTTC -CCGACGCATGGTCTCAGCGGCCAATGCCGCTTCCGCATCGGTGCAGGACTTCCGGACTTT -CTCCACCTCGAGAGTCACTCGGGCCAACTGACGCTCTCGCTCGAGCTTTTCGTCGCTACT -GAAACTCCAGTACCAGTTCCCACTGCCGATCTTTTCGACTCGGACTTTACCCTCATCCGT -CAGAGCTTGAATGTATTCCTTGACCTGGATGCCATTGATGGATGCAATTGAGGGTAAAGT -CTTCTCTAGATCCTTGAGCGTGTGACAGGTTCCAGTGGCGCGGAGATGAGTCAAAATCAA -ATCTTGTTTAGCGCTTTTGGTTAATTTGGGCTAGTGGTGCGTCAGTAGTCTCATCCGGAT -AATCCTAGACCCTGGCAATATGCCAAAGCCACGTACCATTTTGCACTGTCATATCCCCTA -GGGTAAATTCAGAAAAAGCTTAACAATCCAGCCATCCACTCATTGGGCAAGTGTGAGATG -GCTAAGCATTGGTGGAGGCGCAGTACTGGATGAGGTTGTCCAGGTAGGAAGAGGATTACC -TGTCACGCAAACACAATCAAAGGTTGGAAACCCATGCCTGACTCAGTACAAGCTCATCTC -CCATTTGACAATTACTTTTTATTTCCTCTCAAATTATAAGGCTTTGTGCTTTGTCTGTAA -AGTTACATTGCTTTTTTTGAACAGCTTGTAAAAATTATGACAGTAGAAACAACTAACGCT -GCTCACGATTACCCTCGGAGTTAGCACGAGACTGCATGAAATTCGACCTATCCTTTTGGG -ATCCAATCCAGGCAATTCCGAGAAGACCCTACTTGGTTCAAACCTGTCAAGTGAGACAAG -ATTTTGCCGCAACAGAAACGAGGTCCAGCAACAAGATGTCGTCTCGAGGCCAGCTATAAT -TAAACCAAATTTGCTTCCGGTTGCACAGCATATCCAATATTGTACTTTGACTCGGCATGA -GCGCGCTTTCGACAGTCCTGCCAGCCGGCTAGATATTGGCCGGTTTAATCACCGAAATTT -CTCTCCCTGGAGGTAGTGCTTTGTGTATAGCTCCCTTCGGGACCTTCGGTGTAGCGGGAG -GCACAGAAGAAGGCGCAGCAAACTACTGTCTTACAGCTTGAAGCATGACTGTTGCTTCTC -TGTATGCCAGAGAGTGGTGAAAGGGACTGAGATATGTGCCGATACCATGTGCAAACACCA -TGTGCAGCGTGGCTGTGCACTGTAGGACATGTGCAGTACATGGGGTGACCCGATTCGCTT -TCAAGGGGTTTGGCTGAAGTTTTGCACGATATCGCGTAGCTTGATACAGCTGCAGGCATC -AAAAGCCTCCGAGAAGATCCGACAGCGGAGAAATGGGTGCAAGCTGATAATAAAGTTCTA -GCGCCATCTCATCCATTCATCGTCTTTCATCAACTTCGGTGTCAAAATCTGTTGTAACAG -AAACAGAACGAGACAATTTGCGCCTTTTTGATCTTTGCCGGGCAATGCTGCTAGCTCGCT -TCCTTCGTGCTGCTCGTGCTGTTGACCTAGTTGTGATGTCACTCTCGAGACCTGTATTAT -CCTCCTCTTCCTCCGACAGCTCTTGTCCTTCAACCAAGGCCAGATCCCCGGTTTCACCCA -GTTCCTCCTTAGCCATCTCCTCGATCAGGATCCCCATTGCAAGCAACGCTGTCTCATCCA -TGGTCTGGTGGTCATCTTTGCCGTTGTCTGTCGTAGCGTATTCGTAGTAGTCGGCCGCAT -AGGCATGTATTGTCTCAAGAAGCTCGGAGTTCGGGAGAGCTTGATCGGAGGAAAGATCCT -CATGCGCGAAGTAAACTTTCTCGTCGACTGATTCCGCCGGTATATTCTGTCTGCGATATA -AAAGCTCCTCTGGACGCAAACTTCGAGCGCCTGCCGAGTTTGTGTCGCGATATTGGGAAG -CAACTTCACGGGGTGCGTTGACACCGCGGGTGCTGACACGACCTGGTTGCAATTAGATTG -CGTTTTCCCTAGCATGATAAGCTAAGAAAGCTTATAAAACATACCAGCTACCGCATCATA -AACATTGGCGCGGCGTTTTATGTGCTTCGAGGGCTTGTCGTCCAGTGGTTCTTGATCATC -TTCGAATGGCTCGACAAAACTGTCTTCGTCCGAGTCGAGTGACCTGGCAGCAGACATACT -TCAAATTCGCCAGGAAGACCAATCAAGGTTTGGCAAATCGAATAGGTGTGCAGCGGATAC -ATGTAAGTAAAGATCGAGGGTCTCCGGTTGCCAAATTCGACAAATGCGCCGGCACAACTT -TAATCCGGCATAGCATAGCATCATCCAGTGCTCTAGAATTCTCTATGCAAAAGCATCAGC -GCTGATGAATGAATGAATTCCTATCTCCTCAACAGAGCCCTTGGGTCTGTAACTCCAAAT -GATTTTCATGTTGCTCAAACAAGCCGGCTAGTTCACAACCTTGAGCCTGCTCCGGGTATA -CGCTTCTCCAGTCGCAGAGCCTCTGCACCAACGGACCGGTCCGAAGACGATGTGGCGCCG -TTAGATCAAGAAGTAGGTGTTACGGCGGAGGTGTTTGCTCACAAGTCCGGCGTGAATTGC -CTGGCTGTCGATCAGTTTGAGGGCCGATAGTGCGTGTGCAATCACTTAGCATCCGACGGT -TTTCGATCTAACATGAAGGTAGTATGATATCGGGAGGCGCCGACCCATCGATTCACCTCT -GGGACCTTGAATCAAGAGGCTCAGAACTCGAGCACATCCATCACTCTTGCGCGTCCGTCA -GCAAATCCTCTTTCGCCGATGCACATACACATGCGATTACCTCCCTTTCGATCTACCCGT -TTGATCCCGTCCCATCAACGATATTCTCGACAGCACACGATGGTACACTAAAGCTGTCGG -CGCTACAATCGCCCTCTATCACTCCCATACATACATTCAACCTGGACTGCACACCATACT -CACACTCGTTCTCCTCCCAGCCTGGGTCAACACTACTCGTAGCAGTGGGCACGTCTGAGC -GGTCGGTCCGATTAGTAGACCTTCGATCTGGACTTTCGACACAAGGTCTACCCGGGCACA -ATGGTGCAGTCTTATCAGTGGCATGGGCGCCCCATCGGCCCCATCTCCTAGCCTCTGCAT -CGGTTGACAACAGAGTAATCATATTCGATGTGCGTCGCGGAGGTCACAACTCTGCCATTG -CTACCCTAGATATGGACGATCCAGTAGGACTGGTGGTCCCAGGAACTGCACCAGTCTCAT -ACGAGAGTCGTCCAGCCTTCTCCCGGCATGCCCGTGCCCACAATGGGCCTGTGACTGGCG -TACGATGGACCTCTAATGGTAGTCACATAGTGACGACGGGACAAGATTCTCGAATCCGAG -TCTGGGACTCTGGAACAGGCGCCAATACACTTGCTCATTTTGGTCCTCGGGTTCGTAATG -GCTCTTCCTCGCACCTCGCTGAACGGGCACCCTTGATCGTGCCGAGAGGCTCCATGGGTC -CAGGACACGAAACACTACTGTGGCCGAATTTCAATGAGCAAGATGATCGTGGTGAGATTT -TTGTCTTCGAGCTCCGCGAGGGGACATTCGTGAAGCGTCTCCGCGTCCCAGGTCTCGCGG -CTGGGTCGCAGAATTTTCGTGGTCGGTCGAGTGCTTTGAGCGCCGGGCGTATCAATGACC -TTGTTTGGCGAGGGAACGGTGCCTCGGGCGAGGGAATAGAGATGTTCTCGGCGCACGGCG -ATGGAACCATCCGGACATGGGTGTCTCACGAACCGGATGGCGAACCTACTGAAGCAGAAC -AAACAGCGCAAGCCGATCGCAAGAGGAAGCGAGATGTTCTTGACGAGATCTATCAAGGAT -TTCTCGGGCCTGATCAGCCCGGGCTACGACTTTAAAACGATTTAATAGTCATACCATACG -CAGGCAATCTAAACAATTTTTCGTGAATAGGATTGTATTTTTTACATCTGACTAACGCCA -TTCATAACATCATGCACAATAGCGCGAGCTCAACAGCCATCATTTCGAAAAGCTGCAGGA -TCACAACGGTGTTTGAAGAAACCTTTCACTCCTCCTGATAGCGCACAAACTCACTCCGGT -CGTACTTGGAATGCTTGAGATCGCGGGTATCACCATGGGCAGCATGGTGATTAGCGCCCA -TATAGGCATGCTTGGACTCGATACCGGCCGGCATGGAGTTGCCCGTGCCGGTGATGACCT -CGCCGCTCTGGCCCACATCCCGGGTCGAGCGTGGCAAGTCAGTAGTGTTGTCGGCAGCCG -TCGCCGTTGAGGGCTTAGCGATGCCTTCATTGTCGAAACCGAAGTGTTCGGCACGCTGGT -CTGGGCGTGGCACCGAGGGGATTGTATTCATCTTGCCGGCATCATTGGGCATGGATGTAC -CTTGGGCGGCCATTTCGGAGAGGGTGCCGCCTTGGTGAGGATCTTGTCGAGCCATTTTGA -AGGTGGAATTGGTTGTTGGGTTTGGAGTGGTTTGATATCCAGTGGAACTTGAGATATTGA -GCGATGGGTGAAGGCTTCGGGTGGTTGATTGAACGAGACGAGACTACAATTGCACTCTCT -TATATTCCACTCTAGAGACCGCGTCCTGTACTGCTGGTACTGATGTCACAACGTTGAACG -TTGGCGAGGGATGATGGGCAATGATATCACATGGGGTTGATTTGTTGGTCATATTATTCA -GGTAAAATATCTACAAACAACAACAAAAAGATGCCGTCGATTAATATGCAAGACTGGAAA -TTGTTGGAAGTTGGGAGTCAATATCTTGAAATTCAAGTTGATTTTCCTGTCATTGACTTT -GAGGATTTGCAGTGGCGAAACGAAATGCCCACTTCTTATCAAGGCAACCTGACCCACCCT -ACTGGGGTCTAGATGTTCGGGATCAGGAACCGCAAATAAGATTCGGTGTTGCACTTACTA -TGCATAGGATTATACAATTACGCAGTGTGAATTGGGGGGCAGAGATGGGTCAACACGGAC -TGTTTATCTCTGGATTATGACATCTTTGTCGAGTGGGTGGAAGATAGATTTGTACTGCTG -CAGAAATGAAACATCATTGAAGATACACTAGCCATTAAAGATGGGTCAAATTGGTAAGCT -ACAAAAACTTGTTATGGAAAAGGCGGCTGTGAAGGGAACATGAAGTGCAAAAGCAGAAAC -GAGTGACAAGAAAGTACTGATAAACCAAACACTTGAATGTAATTGCCGGGCGTCTACAAG -ATGAAGGCAAGGACACCCATGACAGCGGCCGCGGCGGAGATGATTCCCGCAGAGAGCCGG -GCGGCGTCACCTTCGAACATGATTCCATGGGAGACAGAAGACGACGCGGAGGAAGAAGGA -GAAGCTTGCTGCTCAGCGAATGAGCCACTGGGCGTAGCCCCTGACAAAATGCCATGCGAG -ACAGGGGTTGCCATGGGCACATGAACGGGGATCATGGAGTACGAGCCAGCTTGGCTCACA -GGACGTTGAGAAGCCTGGTTGTAGGAGGCGGGGGCTGAGTTCGCCAAGGGGTCAGCGCCA -GCCAACTGCTGGGGAGAATGGGAGAAAACAGCGCTAGTAGGGACTGGGGCCGAGGCGGGG -TTGGGGTTCATCATAGGGTCAGCCCCGGCAGAGTGCTGGGGAGCATGAACAGGGATGGCA -CTAGCTTCAATGACAGGAGCCGACTGGTAGGCGAAGTGTGTGGCTTCGGCAGTCTTGTAC -TGGGTCTTGGTGACTGTGCGGACAACTTCATGCACCTGGGCAGCGCACTGGGAAGCCTCA -ACGCTATCCTCCCCAGAAGTACCGGCATTGTCGTCTGCGGCAGGAGCATACGCGCGGCGG -CTGAACGAGATGCTTCCAGCGGAGGGGCCCTGGTTATTGTTCTTCAGCGATGTGAGATCA -TCGTCGACGGTGTTGTGGGGCTCGTAGTGGGGCTCAGAGCTAGGGCTGTAAGAAGGCTCG -TAGTGAGGCTCCGGGTGGTTCTCGGTGTGAGGCTCAGGGTGGCTGACATAGTGAGGCTCG -GGGTGTGGCTCGTAGTGGGACCCAGTGTGAGATTCGTAGTGAGGTTCAGCGTGAGGTTCG -AAGTGGGGCCCAGGACGAGGCTGGGAGTGGCCCTCAAAGTGAGGTCCGGGGGCGGTAGGG -AACCCACGGCGACTGAACGAGATGCTCCCAGCGGAGGGACCTTTGTTATTGTTCTTCAGC -GATGTGATATCATCATCAACTGTGGTGTGAGGCTCGTAGTGAGGTTCGTAGCTAGGGCTA -TAGTGAGGCTCATAGTGAGGCTCCGGGTGGTTCGTAGTGTGAGGCTCGGGATGGTTGATA -TAGTGAGGCTCAGGGTGTGGCTTATAGTGTGACTCGGTGTGAGAGTCGTAGTGAGGCTCG -GGGTGAGGTTCGAAGTGAGGCCCAGAAGGAGTAGGGAATCCACGGCGACTGAACGAGATC -TTCCCAGCAGAGGGGCCCTTGTTGTTACCCTCGAGCGAAGTGAGATCGTGCTCGAGTGTA -GTGTGGGGCTCATAGTGAGGCTTATAGCTAGGGCTATAAGAAGGCGCATAGTGAGGCTCC -GGGTGATCCTCAGTGTGAGGCTCGGGGTGGTCAACATAGTGAGGCTCGGGGTGTGGCTCA -TAGTGTGACTCGGTGTGAGAGTCGTAATGAGGCTCGGGGTGAGGTTCGAAGTGGGGCCCA -GGAGTAGTGGGGAATCCACGGCGACTAAACGAGATCCTCCCAGCAGAGGGGCCCTTGTTG -TTACCCTCGAGCGAAGTGAGATCGTGCTCGAGTGTAGTGTGGGGCTCATAGTGAGGCTCA -TAGCTAGGGCTATAAGAAGGCGCATAGTGAGGCTCCGGGTGATCCTCAGTGTGAGGCTCG -GGGTGGTCAACATAGTGAGGCTCGGGGTGTGGCTCATAGTGTGATTTTGTGTGAGACTCG -TAGTGAGGCTCGGGGTGAGGTTCGAAGTGAGGCCCAGGAGGACTAGGGAATCCACGGCGA -CTGAACGAGATCTTTCCAGCAGAGGGGCCCTTATTATTACCCTCGAGCGAAGTGAGATCG -TGCTCGAGTGTAGTGTGGGGCTCATAGTGAGGCTCATAGCTAGGGCTGTAAGAAGGCGCG -TAGTGAGGCTCCGGGTGGTTCTCGACGTGGGGCTCCGGGTGGTTCACGAAATGAGGCTCA -GAGTGAGGCTCATAGTCTGAATCAGTGTGGGACTCGTAATGAGGCTCGGGATGTGGCTCA -AAGTGACCAGCATGCGGGGCGGCGCCGTGGGCAGGAGGGCCATGGTTCTCGGAGCGACGG -AAAGGGATAAATGGCGGACCGTGAGGGTGCACATCCTCATGCTTCACTCGGGTAGAGTGA -TCGTCTTTGTTGTAATCGTTGACCTCGGTGCTGGACGCATCAGTCACAGACACATGGAAA -CCCTGATCCTCATCGTGGTGAGGGTGGTGAGGGTGGTCACTAAAGACAACACTCCTAGAT -GGCTCCCCACGCTTTCCAAAGGGACCCATTCCAGGGACTTGAGGCCCGCCCGAAAATCCG -GGGTGACCAAATGGCACAGGCACCACCGAAGGGCGAGGGTATACGTCCGTGTGCTTCAGC -TTGACGGAGTGGTCGTCCTCGTCCCACTCGTTGACTTTGGTATCGGCGTTGAAACTAGCG -GAATTGGGGCTGATGAAGCCGCCCTCGCCATCATTGCCAGAAGGCCCACCGATGGCGTTA -CCACGAGCTTGCTATTCATTCGGTTAGCTGATGCATCCGCATACATCGAGGACTGCATAC -ACTTTCAACAAACTTGGCCTGGGCAGCGACTGCAAGAAGTGCCACTGGGATTCCGCCGAT -GAAGCCCTTCATGATTGCGGTGCTTGAGAGATATTGCTTTTAAAAGGCGTATGCTGCTGG -CCGTAGGAGGTTAAAAGAGTGACTAGTAGCGAAAGCAGAATAAAAGAATGGCTCGATTGA -TTGAATATACTCCTTTGAAACAATGAGTAGCGAATAACCCTAAGGAAATGTTCAGACCTT -TAGTAGTTTCTTTACATTGATGATCAACAGCTGAAAACGCAGGCGGGAATGCCGTCGCAT -CAACAGGGACAGCACCTTGACACGAGGCCAAGCGGTGCGTAACGCCATCAGAATGTTGCC -GAGAATGAAAAGCAACACCAACAAGTGAGAAAATGTCCGTCGATCGAGGTCGGATCGTAA -ACAGATCCCATGTAATCCCTAGAAAGGTGGTGGTGTCGCATTCCGCGAGCTGAGAGGGAT -GGATCATCGTCTGAACAGGAACGCCATTCAGAGATCAAACAAGCCCCTTGTCTGAGCTGG -GATATTTTCGTGAATATGTTTACATGTGTCGCGCTGGAGTGATGGCATAATTCATGCTGG -CGTCTCCCAAGGAGACTCAAGTATGGAAGATCCTGGTACGGCTTGTCGAGAGTTCACTCG -AGTTGAGCTTCGAGAGGCTAACTTGGAAAATCACCAACCAACCAATATGGGATTCTCAGT -GATGTCAAGGATTGCGATCCCCTGGCCACTTTTGCATCCACAAGCCTAGGTAACTCATCC -GTCCACATTACAATAAAAGTATTGTATAGTGTGGCCTGACTTTTGCGCTAAGACTTATAC -AGTATTACGCTGAATCTGTTCAGTGTTTGTATCTACCCAATCTACACAAAAGACCTTCAC -ATACATCAATGCGTTGATCACTACTCTCGCTTCAACTACTAAAACAGAGTTGAGGCGAAG -GAATGTGTCCCGTCTTCGACATCCAAATCCCACCGCCCTCACTTAAGAGGACACAGGAAA -CGAACTTCCTATCGAGGACAATCTATCTCCTGAGCCAATCAGTCCCGAAAGACCGAAGGC -AACACATCGCCGTGGATTTGTCGTAAGACAGAACGATGGTCGTGGCGGGCTCAGTTGCTT -TTTTTCGATAAAGAAGAAAAGAAAAAGAGAGAAGGGCGAAAAAAATTGCGGCTTTTTATA -ACGCCAGTGGGGTCATCGGATCGGATTTACAATCTTCATCTCGGCACCCAGGTCGGTCCG -ACCCGGTCTGTTTTATGGCCTGCATTTTAAGGCTGTACAGGCTTCATCTCATGCTTTTGG -ACAGAAACAAAAAGAAAATAGCTTAGGATATGATACAACGGTTCCAGCCAATTCCATGAC -ATGTCCAATTCAATACCCGCGGGATCCTGATCACGCCGGAATGTGACATGTCATTTCTAT -GGGCAAATGGAATAATCTCCAATTAAGAATGACAAAATCCTCACCCTCAAGCTCCGGCTT -TCCATAGTACCTTGAAAAGAGAAATAACAGACAAAACGGGTGATCTAGCTTAGGTCGGAG -AAGTCGATGAGGTGAATCATAACGAATGGTGATCGGGCTGCTTGTTGCAGTGGTAATATG -GTATCGGAACCTTGCATGAACCGTCAACGATGAATGCAGTGGGTGAAAAATACTCGAAGC -ACTCACTTATCCCCCAGTTGTCGTTTTTTCGGCACTGGCTCTCCAACCTGCTCCTCCCAC -CGCCGCAGTAATGCCCAATGCAATTGCAATCATAGTTCCCACTGTCACCACAGCGACGAC -TGCCTTCAATGCCATCCGTTGCTTGTTACTTAGCGACTGCAGCCATGTCAATGGCTTGTT -TTCTTCCTCCCACAACTTCGATCGCCGAGGACTATCGGTTCGGCAGCTTGCGTATGGGCC -CATGCTTTCCAGATCTCTCAAATGACTTCTCCGATTGGCATTCTTAGTTTCGAAGGTGAG -TTGTTCATGCGATATAGTTGGACTTGAATGAGGATAGAACGGCGAGCATGGCTTTGCACC -GATTGAAGGATCAAACTCCTGGCTGTGAGTCCCAGGCGAAGACGTAATGGAACATCGTGA -GGGTGGGATATTAGGCGTCGTTGGGATTTTCTCGTCGGCTTTGATCGCTGCGGCCACCTG -GGCCCCAATCACCCCAAGTCCGTGTAAACTCATCGCGATGTGCCAATGTGACTAGAAGGG -ACAGCCGATTCCCAGATCAAAATATCTTGGATAGTGACCTCCTCAAGCTTTTTCGCTTAG -ACTTGGATCGTTGTATGGGGCGTCGTTCTTTAAGCGTGGGTGCTGGTGTTATGATTGCCT -CAACGGGAATGGCAACGGTCGAGACACCGCCGTACATAAACAATGCAGTATCAACCTGGA -ATGCGCTCCAACTCCAAGCGGACCGCGGCAGTCTCATCACTCGAACTGATCACAAGAAGG -AATATAATTATTGAAAATAGTGGAGGAGCGAATCCGTGCCCGTCGAAATGATAGTATGGT -ATAAAATGGAATGAGTGGTGGCTTCCTCCCAAAAAGCAACTTGATGTTTTGGCAACTTCA -CGGCAACAACAATTTGGATTTTTTTAAGATCCTAGTTACATTCGGTAGCACCTCGTAGTG -AAATGGACTATTTCCCTGGGGATGCAAGGCCTCTATCGGCTTGGAGCCAGTTTTCACCCC -TAGATTGGTGTCTCGTGTGCCTTCGAGCATTCCTACCCTTTACAATGTAAATTATCTTCT -CCTGCAGGAAAAGGAGCCCTGAAATCCTACTATTGGCAAATCATTGCTTAGATGGCATTC -TTATCGGTCGATCATGCAATCCCCTGTTCCAAGTCTTCTCTCCATAGGCCCTGTCTGTAA -CCCTGTTGGCCACTTCAATTTGTGGATTCCACCCTAGCATATGAGGACATCTGCCTATTG -TGTGGTGTCACTCGGTGGGACAAATCATAGGACGACAGGGGCATATTTGAAATCGGGCTC -CTCCGCCATTATTCTATCTCAACATAGTTCACAGACCCTAGGTCATATGTAAGTAGGTGA -TATCTTGAATATGTATAAACCGCTGCACGACATTGTACCATTCGTGCCGGCAAAGCTTGC -CCTAGTAGAGTTGGATAGCATCAATCATAGCCCAGACGGCCATGACCCTGTCAGGTTCGA -AAATTATGGGCAAAAATTAGCGCTGTCGTATTGTTGATGCCTATGCGTCATATAAACTAT -GTAAACAGACCTAATCAAGGGACCGAGCCCTCTACCAGGAACCTCAAGATTTCCCATCTT -CGGCTACAACTAAATCCCCATCGAAAGAAGCAAGCCTTTTGTTCTATGAGGATATGGAAG -TAAAATTAATAGGAAATATTGTCTACGGGCTATTCAACGATAGTATGTTGAGTCATGGCT -GAAATATTGTAACGATAATGGCTTGTTCAATATATATATCTTTCATTGACATGATTTGTA -TGACAAATGTTGGACGAAATATATTTACGGAGTACCTACTCCGGAGATAGAGACAAAACA -CACCTGGGTCCAATTTAAATGAGTCTCAAGATCATACAACATATGAGATCTTAAAGAGGT -ACTTATCACATGGGTAGACCTATATCTATTCATCGGTTTACTCGCAACGTTGATTCACCT -GCCGAACGTAGGTGTGTTTTTACCTAAGACTATGTAATTCCGCTGGGCTGCATCACGTTG -ACAAATAATAGGGATGTAGATATAATACGGAGTATAGAGAGAATCATTCTCTTTGCACTT -CAGCTAATTAAGCTTTCCATTCGGTCCATACTTTGACTATAGTCTTTCACCATTGGTTTG -ACGCATCAGGGGTAATACTACTCTGTCGCCAAGCATACCTCATGATCAACCCAACCAGCG -TTTACAATGACTTCGGGATATAAGTTAGCGTTGCTGTTAATCGAATACTAAGCCAGGCCT -TCGTACTACTCATGAGTAATAGACCACATTGTCTATGTTAAGTGTAACACATCTCTAGAA -GTAGGCCCTCCCTAACCTTGGAATCATCGCGATACAATAACTGATCAACTGTCGCGCTTT -CCCAAATCGCCTTTGTGGATCGAATCAGCACTTCATAATTTAAAAATAAATCAATTTATA -TCTCAAGAGTAACTATTTTTATTTCCAAAATCCATAGGAATAAATATTAATGCCGAGCAC -CACGATATGCGGTGTTCGTCCAGCAAGAATAGGGAAATATAGGATGAACATAATATGCAA -AATATTAACTAAATCAATCAGATGATCGCTGCCAGGATAAAGGGGGAGGGAAGTGAAGAT -TTTTGGAAGGTAGAAAGAAAATCACTCCAAAGAGGCATTACCCCTGGATGCCGAACAAAC -AAGACCACTAAGGCGCCAAATACACACAATAAAACTCGAATGTGTACCTTTATATGGAGT -ACAAGACTTGACAGCAAATGGCCTTAGAAATAATATAGTTCGTATTATGTTGCTCCGTCT -CTAAATTGAGGCTGGTGCTTTGGGCCAATAGGACCCTTCCCTCGCTCTCCCGCTATAAAT -TACCCATCAATTTCCTTCACATCAGGGCTATCATCTTTCCAGGTTTGATTTACGCCTCGA -TTGCCTGGTCTGGCGTTTTGCCTTAAAAAAAAAAAAAACTTCAAGTGCAGTGGAGCTCGC -CTCCTTCATGTGTATGGcaattctattcaaatcagtatcaattcaCCACAACGCAACACA -GAAAGCGACTTACTTGGGGTACCTCGGTAGTTGTCGAAGGCGCATCGGTATTTGTCACAG -TAGGGGTTGGCTGACTAGTCATAGTCTCAGATGTTGTTTGATCAGTGTGAGGTTCATCAT -GCTCATTTCGAACACCTGAACAACCAATCCCTGGCACAGCGATGTCGTAAGCATATACCT -CCAGTGCAAAGTACTGAAGGGTTTCTGAATCATGCGTTGACAGCGTCGCATTTTTCTTCG -CCTGGTCAACGACCTCTTCATATCCGTCTGCAAAATGGTCTATCCAGTTCTGCCCGATGG -CAGGAACGTGATATAGACGGTGCAGTAGATCAGCGGCCCAGAAGGTATTGGTCTCAAATT -TCGAGACTGTATATCCAAGGCCACACATGGTTGAGAGAGAGCGACGAGTTTCGTAACTCA -GATCGCAGATGACTGTCTCATCTGTGCCATTTTCGCCTCGCCAATGTCCGGCATAACCTA -TTCGGTTGAGATGTTTAGTACACGGTCCCTTACATAGAGCAAGTTGTATTACCTTCAATG -CCACAATTCCCATCTGGATTATCACAGCGAAACAAAACGTCTTTCTTGTCGCCGTTGACT -ACGATATCAAATGCCCCGATGGCTTCCATGGATGGGCGATCACCGAAGTATTTTCGGTAG -ATTTCACTCTCATTTTTCCATCTCAAGATATGGTCTTGGGCATGTGCAGCAAGAGCAATA -GTCTCATTAAGACCTAGCTCAATCTGGCGACGCTGGGTTGCATTGCAGCTTGAGTGGATA -GGAAATTGATTCACGGCCCCGGCGTTCCAAGGTGCGAGCGCAGTCCGAGAGAAGGGCTGG -GATTGATTGCCTTCGGGGCTCTCAGGGGTAGGAAGAGCAACAGCAATTGCCGCAGAGCAG -AGAATGAGGTTTAACAAAAGCATCTCTGTATAATCGAGGCAAACTGAATAGGAAGCATGG -AAGCAAATTTATAAACACGGTAAAGGTAGCTTTTATATGCTTTTTACCATGCTTTCTTCA -TTTCTATATACCACTGTTTTAGTTTTTCTCATGTCTTTGCCAAGTGTAGCTTTTGTCCTC -ATTTTCGCCGTGTCTGTAGTCTACGCAACACCCAGTAAAGGATAGAAAGCGGGATGTTTT -CTCCATATTTCTCGGTATACGGATGATGCAATCTGCCAAGAGGGGACCTTGAGAGCTTCT -TTGCTTCTCTCACCAAAAATTTCTTCTTTCTCCAGCAATCAGAACAGAAAATCCCAGATT -TATGCCAAGACAAAGGCTCGGCGGAGGGTCGCCAAGACACTTTGGCGGGAGTACCTTGAG -AGAACAAGAAGTAAGTGGGCGATATTCTCCGGAGGAAGCCCGGATTTGAAGTTCAGACCT -GCTCTTGGCGAAAATATGGAAAGATCTTTCGCTTGGCATTGAGAACCTTGGGGGTAAGAC -CCGTCCGCATTGCTTTTACTTAATCTACCAGGCCTTTCGGAGAGGACTCTACCCATCATT -CTATTGAAAAAATCTCGACTTGTGGCATATATTTTGCATCAATTCTAATCCTAATCCCAT -TGTTCTCATTTGCAGAAGAGTAGAGCACTAGCGCTTTATCATGCTCATTTCCAGACCTTT -CCTTATATCACTGGGCATTTCCAGGGTATATGCAGCCACAGCAACTACATCAGCACAAAC -ATCGTCTATAACTGGCTGTCACACCCACGGCTCAAGCATTTACTGTATTGATGGAGATGG -CCACGAAGTATTAGTTTCGGCGACATCTACCCCTACAACTGGGGTACCAGCACAATACAC -TGGGTGTCACTCACATGGAAGCGAGTCGTAAGTACACACGTATGTGTCTCTATGACAATG -TACTAACTTACGAAATGCGAAGATATTGCATGGATGGAGATGGGAACGATGTCTTGATCC -AAGAAGAAGAAACGGAAACCCATGGGACCGAAGACAGTCATAGTGAAGACGATGGGCACG -ACCACGAGGAATCCTCAAGCGAAGCCAAACAAAACTGCCACTTCCATGCTGGCGTCGAGT -AAGCTTGTCACGATTTTCCCAAATTTGAGCGTTATTGACTTTGGCTATTCTAGACACTGC -GTGGGTGAAGGCGAATCGGAGGAACACAGTGGTTCATCGTGTGATATCAAAGCAAGAGAC -TACAACGTGCCTCTGAGAATTGGTACACTTTTCGTTGTCCTTGTCACTAGTGCGATTGGT -GTCTTTGCCCCTATTCTCCTAATGAAACTTCCATTTGCTTCCATCAATGGGGTGGTCTCG -ACCGTCATCAAACAATTTGGAACTGGCATCATCCTGGCCACAGCTTTCATCCATGTAAGT -CAACTCAGCCTTGACATACGTGTTGCTTTCTGACTGTGTTCCTTAGCTATATACACATGC -AAACCTTATGTTCACCAATGAGTGTTTGGGAGAACTCGAATACGAAGCGACTACCTCTGC -TGTAGTCATGGCGGGTATATTCATTGCCTTCTTGTTGGAATACATTGGCCACAGGATCAT -CGTTTCTCGTAACGGCAAGAGCTCCGCAGAAATAATACCATCCGAGGCCCAGCAAGGAGA -GCAGAAAGAAGAGCATGATCACTCTCGGGATCACTCACAGGAACAACAGCAGCAGCCCAC -GTTAGCTTGTCTTGGACACAATCACGGCTCATTCGATCTTACCGGCCCCAACAGCAAATT -TTCGGTTATTGTCATGGAAGCGGGTATACTTTTCCACAGTATCTTGATTGGTCTCACCCT -CGTCGTTGCTGGGGACTCATTCTACAAGACGCTTCTAGTGGTGATTGTGTTTCATCAATT -CTTCGAAGGACTGGCACTCGGTGCTCGCATTGCAACTCTCCCTGGACCTATATTTCCTTC -TAAGGCGTTTATGGCGATGGCATTTGCCTTGATCACACCTATTGGCATGGCTATCGGACT -TGGCGTTCTGCACACCTTCAATGGTAATTCGAGAAGCACTTTGATTGCACTAGGAACTTT -AGACGCGCTCTCTGCTGGCATTTTGGTGTGGGTTGGTGTTGTTGATATGTGGGCGCGCGA -CTGGGTCATTGAGGGAGGTGAAATGTTGCATGCGAAGTTGAGCAAAGTCTTCACTGGTGG -TATTTCCTTGATCAGCGGGCTAGTTTTGATGGGACTGCTAGGGAAATGGGCTTGATTATA -AGTGACATAGCTGCCTGAGACCTGCTTCCTTCCAAGCTTGTGTTCCATTTGTACATAAAT -AGCCTATCCTTTGCGACCAACGTCTTGGACATTATAATCTGCTCGCACACGCCAGATTTC -ACTCCCTGTATTTCGTATTCAATCTGAATGCCAGTGGGAATAAGATGGACCGCTCCGCAG -GCGTTTGTCGACAATATCGAGTGAAATGTAGAGCCTTCTCGATCTCTGAGAATTAGCACA -GCCCAAAAAAAGTCTAAAGCTACCCGATTTGGGCGGATGGATGGACTGATCCGGAAGTGA -GATATTCGTCCATGACTGCCTTCCAGTCGTGAATCCCAGGTTTACCAGTGCCCATCCCCA -GCAGAGGCATGCAGCCGGCAGTTGCACTCTCGCCCGTGGTGTTATATACTCAGAATGATC -AAGTTTTATAAAAATAGACGTATAGCGGACGAGTTAAATTGTGTTAAATCAGAGTACGAT -CGTTGAAATCAGAACACAAAAACAATATAAAAGTGCCTCTGCAGCTCTCGGTTTCTCCAA -CAAGATTTCTGGGACACGGAGGGCTTATCAAAAGGTACAAGAGGACTTCATCCGGAGAAC -CTAATACCACTTGCTAAAAACTTGGGATATAGTCGCTATATTCGGTTATTCCATCATCTC -GCGCATGCTGCCAAAGCAATCAAGTATGACAGATACTGAAATGCTTGCAATTTAACGGGG -CAAATGCTAATAATTAGTCATGTCTGTCACTCCTGAGGTCAATCTCCTTTCCTGCGCATC -TGTGTTAATGAATGGTATCGGCTGTTGCAAGCCTTATTCCTTGGACTTGGGGTTGCGAAT -CATGCAATATAACTGGTATAGCGTGCGTCAGTCATCTGATAAGAAACGTTCCAGCCAAGA -GGACATACCTTGCAAACGTCCCCGCCATCCACGCACTCAATCTCCTTGACCAATTCAAAG -CCCATTTTTTCATAAATACCCACATTTGGGTACCCCTTGGAGCTCTCAAGATAGCACGGC -ATATTCTCCTGATCGGCCTTTTCCATGACATTTTCCATGAGCTGCTTGCCCACTCCCATA -CCACGCGCCTCTGAATCAACGCAGACAACGTTGCAAAAGTAGTACCCGCGGGGGTCCTGC -CAGATGATATCGTGCGCATTTTGCTGCACTTGTTTCCAAATCCAATAGCGGTGCACATTC -AGACCACCGCGACCAAGGAAGCGGATATTATTCATGAGCTGGCGGAAGGAAAGAAGCCAG -TCTTGCGCCCAAACAGACCAGGTTTGTGTCATGGAAGGCCCTTGTGGTGAGTACCACCAG -GCAACGCCGACTACAGTACCGGCTGGTAAACGTATCTCGCCATCGCTCTTGGTTTCATTT -GCGGCGCGAGTTATCTTCGCGACAGAAATGGGGCAGCCGCAGTTGATACCGTAGCGAAAA -TGAGCAGCGAGGGAGGCTGCATTTCGTTGGATGTTGAACTGTCAGATTCCGTTATTACTT -GATTCGATACCCACTTCGTTGTGTTCACTTCTTCTTTGCGCGTTTTTCTCAAAGCCACGA -TGAAGTCATCGGAGCCAAGGTTTGCGGAAAGATATATCTCATACCTTGGATGGGTCATCA -AACGCCCAACGAAAATATGGGTCGTCGGCAAACGCTTTCTGCACACAATTTACAGCCCCA -GGTATGTCGGTCTGAGCCAGCGGGACAATTTCGATTGACATTGTTTGCTTGTCACTACAC -TTTCTTTTTGGACAGCGGCAAGTTTCTTAGCGAACTGGTTTAGATGTCCGAATACTTTTT -CTACTAGCTCTGCGAATAGCTTGACACAGAGATATAGTATAGGAATGCCAATGAAGCCGT -TGGGGACAGTGAGAGACAAAGACAACCGCCATCTAAGTAGAATTGAAGTGGTGATGGTCC -ACGAGTTGATGGAGGAGAAGGTGAGGGAAGGTCTTTGTTATTGTAGGGGTGTCAGCAATC -GAGAAGAAATGATGAGTAGAAAGCCACTTGCCCTGAGGAATGGGGTTCATATAGCTCGCT -CGGCGATGATCCTCAAATCCCCTTGATTACTATGCTTATCTTATCCTTATCCACTGCCTA -CATTCCAAGGTTGCATGTCGCTAGCCTTTCTAGGCAGCTCCAGCCGGAACAAGTACATAT -GCGCGGGTGTCGCATACCATCCCTCCACTTATATCATCGTTCCGTAGTTCCGGCAGTAGC -CGCAGATAGACAAATTGTAGACCGATTGTCTACGTAGTTTGAGCATGTACATAGTACAAA -GATGTGCTCGAATACCTCGAAGCAAGATTGTCGATCTCTGCTTTGCGGGGGGACGGCTCG -TGTTGGGTCATGTGGGGAATCTAATCTAACGCTAATCCTTATTAGGCAGGTTGTTGCGTC -ATTGGTCCAGATGATTGTGGGGCCCAAGCAGGCGGTTTATAAGAACTGATAAGAGGTACG -GAGTAGTCATAGTCAACTACCTAGGTATACATCTAGTTGATCATGTTTAATAGTAGTCTA -TTTGTTTCTTTATAGATCAGGGAAATTGGATATAAGGAACGGAAAATTAATCCGGCTGAG -CCTTGAAAATCATGAACTCGGTAAAACCATGAAACAGCAGAGCGAATCTGCTTCTATCAG -ACGAGAAATACATCTTGAGAATCGTGCCCACATCTCACATAATATTTAAATTACCGAGCA -CAAGTTTCATACAAATGTGGATGTTTACGACCTGTCTGGCGATTGATCTTTTGTTTGGTG -ACACATGGTCACCTGCCACCGTTTGTTCTTTCTTAGTAGCCCAAATACACCCATGCTTTA -TCTACCCTTTATATACTATTTTACTCTTTGTTCACCGAGTCCACTAATTTGATCGACAGG -TTTAAACATGGTAGAGTTACTAAAAACTAGTATTTATCCTAAGATGTCTACATAGAGCTT -CCGGCATTGTTACCACGTGAGCTGTGCCCTGTCTTTCGTATTTCGACTCCTATGCAATTC -AGAAAAATTCCAACAGGTTCTTCCCCAAAACCAAAGTACACAATAATAGGTCTCTGTGAC -AGACATAATTCTCTTTCGATAGAATGTAAGTGGTGTCTGTTTGTGAGTACTGTGAAAGTG -ATGACTCTCTCCTGATAAAAATAACCTGGAATCCCGGAGTCTTCCTAGTCGAGTTTCAAT -CTTCCGTTTATTCCTTGAGAGATTAAAAATACAATCTTTGTTATAATTGCTGTCTTTGTC -AAATACATCAACCAGGGGATAGATTAGTGCCAACTGTACTCTGCACAAATGCATAAATAA -CTCCCACTATCTTCTCAATATCACAGACATAGCTCGCAAGCAATCCCTGAGAAAGAATCA -GATACTACATCCTCCGATAACTCAAGTATTCAAGTGCACTTTGCTTGTCCAGGAATCATC -AATAAATTCCACCTTCAAGGCACACTTGAATCACCTATCGAGATTTGCAACTTTGAGAAG -CTTCTCTGTCACCAGTCAAGTCTGATCTTCACCTATCATGCCTCGCCATGATATTAAACG -TATACTAGCGGATGATCCCTACAATGAGCGGCGCATCTTCGTTTTGTCATGTGTGCACGA -CGCAATCTACTATCTTCGTCATGGACCGCATCGTCGTAACGATGTCATAGCCACATTAGA -GAAAATTTCTGACGAGATGGGTATTGCTATTTCAAGCTCGGTTGTGGAACACGATCCCCA -GTTTGAGAGGTTATGTCGTAATCTTGAGCTGATGCTGCCTGTCTTGATCAGAATTCTCAA -CAAGCACAAAGACCGAGTGACCGACAAAGATGTTGCTGATGATCTCATCAGCTGCATCCC -CAGCGTTTTACAGCAGCCTAAAAAGTGGAATCCGGATATCGAAGAGATTTCCGACGAGGA -AGAGTTTGAGCAAGAGCAGCGAAGGCAAAAAAAATATCCTTACCATCACCAATAGATTTG -ACTCTCTACCTAGGGATACTGAGATCGAGTGTGGGCTAAGTCGTGACAACGGTACTGGAG -ATTGTTTGGTTCTAGTTTTTTTTACTGCTGCTTCTTTAATTCGACTTTTGTGTTTTTGGT -TCGGGGCTACCCAAGTTTCCTTGATTTTTAATAAGCCGGACATTTTCTTCCCTCTATGGA -TCATTCAACACAAGTGCCATGTGCCATGTTTTCTAATATCCGATCCTGCTTCATCAACTA -AGATAGATCCACGGAAGTTGGTCATGAACTATTACAAGGAATATTGAAAGTGTCTCAAGC -TTCACGTCTCTTGGTGTCAATAGCCGGGAGAGCTTGCGTCAATAGTAGAAGAAGTGCAAG -CTGGTCAATTCAATAAATATCAGCAACATCATATTACCACCGAATACATTAAACTCCTCT -TCAACTATCAGGAAAACTATCAAAAACCGGTAATATCAAACTAATCTAGCATCTATCAGG -GAACAACCAACCAATATCTCAAGTCTACTCTTCCACAAATCAACCAAACAACCTAAACAC -GATTGCGTCCACGATGAAAGCGCGGCAGGAAAGCAGGCGCAGGCTCAGTGGCGGTCTCAG -TCACAACACGGGTAAGGTTTACTGCACTAGTCGTAGTATTCGAGATGATAGTCGAGTTAG -TGAAGTGGGCTGTCGCCGTCGTGTTGCTAAATATCTGTCGGCGAATAACATCGGGGTTGA -TAATTGCTCGTGGGGTGACACGCGAACCCGGTGTCGAGGGGGATACAACTGGAGGTGTTG -ATGTAGGCAAAGTCACGAGGGTGGCTGGTGTTGTGGGTGATCCTGTAGTGATGGTACGCT -GTCTGGTACTGCTCGGTACGAGAGGCGTCGATGTTCgggtctgagtcaaggtctgggtct -gggtgttggtttgggttctggTGGATGAGGGTGTCACGCCACTGCCACTTGGGGATACTA -CGCTAGAGGTAGTGACTGGGGCTGTCGCAGTGCCAGGTGGGGGAGCTGCTCCAGACGTAG -TCCCGGGGGCTGTTGTGGTGCCAGCTGGGGGGACTCCACCACCTCCAGCACTACCTCCGC -CAGCACCTCCGCCAGCACCTCCCCCAGCACCTCCTCCTGTGGGGCAAGGACAGCACTCTG -TTCCCGGGCTCAAACCTGCCCCGGATGTGACTGTGACAGTGGCAGTGCTCACTATGACGG -CAGGACAGGTGACCGTTACTGGGACGACGGTGGTCTCCTTAGATGTGGAAGGAGGAGGGC -AAATCGGTGGTCCTGCAGTGGTGGGGGGTGGGGAAGGAGGTACACTACCGCCACCAGTGG -GAGATGTGTGGCTGGTTCCTGGTTCAGTAGTAGAGGTAGGAGGCTGAACAGTGTTAGTAG -TTGTAGTGACTACTTCAGGAGGGGGGGGGCTGGAGGGTTCCACTATGGATGGTTNGGGGG -GAGGCAGGGCCTCTGGGAGATGGCCCGCTTGGGGGAGGACTAACATTGCTGGTAGTTGGA -GCAGGTTCTGTTGTTGAGCTAGTAGTGCCTCCAGCTGGAGGAACACCGCCAGTTTGGGTA -GTTGTAGTACCTCCGCCGCCAGTGGGTGAAGTGCCGCCTTGAGTACCAGAGTGAGCAAGA -AGAGTGCTTTCAGAAAATGAGGTTGGGGTTTGCTGACCTCCTCCACCAGGGACTGTACCA -GGCACTGTAGTCTCACCTCCTCTGCTAGGGACTGTACCAGGAACTGTAGTACCGCCTCCG -CCGCCAGGGACTGTACCACCTCCACCTCCAGGGACTGTAGTGCCGCCTCCACCGCCAGGG -ACTGTAGTACCGCCTCCGCCGCTAGGAGCTGTGCCGCCTCCGCCGCCAGGGACTGTGCCA -CCTCCTCCGCCAGGGACTGTAGTACCGCCTCCACCGCCAGGGACCGTAGTGCCGCCTCCG -CCGCCAGGGACTGTGCCACCTCCTCCGCCAGGGACTGTACCACCACCTCCTCCTCCTGGG -GTGGTCTCAGGCTTTGGAGGAGCATGTGTCGTAGCTAATTCCTCGGTCAAAACAGAAAGC -AGCGAGCTCTCGAGATCAGGATGGTTGTGGCGCTCCTGCAAGCCAAAGTTACGCTCTTCT -CGACGACGATCCTCAAGGCCAGGCTGCTCTAATGCCACATCCGGGACAGCTAGGACACTC -TGTAAGAGAGTCCAGGAGCAGGCCAGTAGAATCAAACACCTTCTGGTAGACATGATTCCG -ACCAACCCAAGTATGTGCAGGGCGATTGAGAAAATGTGGCCGATTAAGGGAGAACAGAGG -CCAATCCAAGTTCGCCGGTGGTTCCGGGTGTCAAATTGACGAAGGAGCTGATATAGAAGA -CAGTTCTAGGTTCTGGATTCTTAGCAATTTTAGCAACGAAGCAAATGCCCACCAGACGAG -TTCGAGACGATAGCAAGGAAAGCAATGTCCCAGGTGAAAAGGACGTTAGGAGAGGGATGC -GAGCATAGCGTCGTATATGTAGTGAGGGCCTAGACAATTCAATGGTGATGGATTGATGAA -CACTCGATGTCATCACAGCGAGAGGATTGGCCAGGGGTGATGTCGATGTTTGGTTTTTTG -CTTAAGCTTTCGTTGTTCTTATAGATTCCGTGGTAGGTTCTTAAGTTGCTCCTATTGGAA -ATCATTCCCAATGCTGAAATCCATGGCTGGATAAGGTACAAAGACATCATTGTGACTAGA -TGTTTACATAGATTCGGTGCCTGTCGGATATCTTATGCGACATGTTCTGCCCTGTTCAGC -CCTGCCCAGGGGGACCTGAACCTGAGTTACCAGCACATCCATGCATTTGATATTGCCTCC -ATGGCGGCAAATCTTTATATTTTTATATTTTCCCAATTGCCAGTTCGACAGCGACATTCA -TTGGTACAGCTAAAGGGCCAAACCGACTATGTCGGAAACATTGTCCGATTTTGTGGTTCT -AATCACGAATTGATAAATATAGACATTAATAGCAGCTATGTATGACAAATCACTCTGTTA -TGGACGCCAAATAGATAATGCAAGTTGAGGAGGAAAAACTCTTCTATCTGCTTTTTGAAA -TCGTGAAATCGTGTTGAAATTGCCAGATCAACCAGCGTCAAAATCCCGAATACGAACCGA -AAACAAATTAATAGATCGCGAGAGGGTGACGCCAGCCAACTTGGATAACCTTTACGGCGG -TGCAGCATGCAAGACCGGCTGCATAGCAACAGACAGTAAATGTGATCAGGCCCCAGTAGG -ATCCATTGCAACGTGCCAGGATCTCACCTGCGATAGGGATTCCGGTGAGAGTTCTAGTGG -CAAAAGTATGTCAGCTATGGATGCGCAGGTGACATCACAAAGGGGGGAAACACTCACCCA -AAGCTCACTATGGTATAGGCGGTTGCGTAATATCGGCCGTAGTTTTCAGTTTTGCAGAGT -TGACCGACACAGACTGGTGTCAAGCTGATATTGCTGCCGCTTGAGAATCCAAAAATGACA -GCATAAACGACCAGCAGAGCCATACTATCTCCAGCAGGAAGCCACAGGCAGGCGTTACAC -AACACGCAGAGCGCTACCGTCGCGATCAAAGTGTTGAAGCGACCAAGGAAATCGGCAAAG -AAGCCAGGCAACCATCGCCCAAAGAATGAGCCAGCATTCAAGATGGCAATGATTTGGTAC -GAGAACTTGGAAGAAAACCCATGGTCGAGGGCGTAGGAGGAGATATAGCTGATTGGCACG -AAAAGACCCCACTCAATAAAGAAGACGCTGGCGGTGGTCAGCACGAATCGCGGGTCTCGG -AAGATGCGGAAGTCAGGAAGCACGTTCTCCTTGGAGAATGGCTTCTGCGGGAGTCGGGAC -CTAATCAAGAGATTCGCAGTTGATAGCGTAACTAGGCAGATCAACGCAACAACCCGAGTG -GCCCAGGCCCAGCCAATCTTGGGAAAGAGGGCTTCTAGAACGAGGGGGAAAACAATGCCT -CCGGCAGAGCCACCAGTGGCAGCAATTCCAGTTGCGAGTCCACGTTTCTCATAGAAATAG -TGGCCGATGGCCGAGATCGCCGGTGTGAAAATGAGGGACGCTCCTACTCCACCCAGTACA -CCGATTACCAGCATAAAGTGCCAGTATTCAGTGCAGAAGCCGATGAGGATCATCATGGCC -ATTTCAAGTACGGAACCCGCAAAGACCAGAAGACGTGGGCCTCGAGCGTCGAAGATCGGA -CCAATTTGCACTCCACAAAAGAAAGTGAGAAAGGCAAACATTCCAAAAATCCATCCATTG -GTTCCTGAGCTGTATTCTTTCAGTTGGTGGGTTTGGATATATGCTTGGAATGTGCCAATG -GTGTTGACCAGGCCCAGCGACCCAAAAAGACCGAGGAAGCTACCAAGCACTACCAGCCAG -GCTTCAAGACCCCCCTCCGGATAGGTATTCCCTTCGGCATCTGTGGTCACTCGACTAATG -GGGTCCCTTCTTGTTGATCTTTGGGACGGTGCCAGAGTAGATGCTGTCCGATATACATCC -TCTGGGCTGGGTTCTTCCTCTGGTGCAGCCTCCGTATCAATCCTCTCATTGACCGTTGGA -ATGGTGGTCTTCTCGTCATATGCAGTGGTGCTAGGGGCTGTGGTGCTCTGGGTATTCATA -TCAAATGCATGATTGGTCTTGCTGCTGGCAGACCCGACGTTGGTAGACATTCTATGTGGA -AGGAGCTATATTCTCCACTTGCATCAATCCTAGATTGCTCAGATTGAGGATTTAGAGAGA -TGTTTGGAGGAGAGTTTCATATTAGCAATCAAAACTTCCAGAATCATATGCAAGAGTGGC -CGAGAATTTTCGGGCGGAATTATTCTCATTGGTGTTTAAGTGACTATGTAATAGTCACTT -AGGCAGTGAAAATTTCGCAGCCCGTGATCATTGGGTAGGAAGGTGGATCTATGTCCAGGT -CCGGACCCGAACACCTTGGGGATATGCTTTGGTCTATACGCCAACATGTTGTTTTATAAT -AATTCCAGAGCTATATGAGGTTTTTCAATGAAGCCTACGGTGGAAAATTCTATCAGTTCT -GTCTTAGTACGCAATCATATAGAATGATGATCTGTACTTGCCTTATTTATCCACGTGGGT -TTACATGATATAATTCAACGTATTTGTCGGCTATAGCTCTGCAGTTCAGCAACGTGGGCA -AAATCCGAACTTCCGAATTTCCGACCTCCGTGAGACTATTGGACTACCCGAAGCATGCCC -GAGCGCATGTATCATGTCACATTCGGGCATCGTATGAACCATTCGTATGTAGATATGAAT -CAGCTTATATTTAGTTACACGAGACTGGACTTTCCGGAGACAACGTGTTCAAAATTTATT -TACGGAGGATATCAAACCCATAAACCCATCTGTGAAATCGCACGAAATCAGATACGGGTA -GACAAatatatatatatatataaatacatatatatatgtataATGGCAATCGTTTTTTTT -TTATATCAAACGGTACCGCTTGTTAAATTGATATACAATCTGTGTAACGTCTCAGGTTTT -GGAAACTAGAAACTTGGAAAGGGTGAATCTATCCAATGATTAAACGAAAACTTGAAAGTC -CTACGGAGTAGATTGGGATTGTGCTCAAGTATATACGATTCTTGTGTAGGCTTATGAGTT -ATTAAATGGATGCCTCTTACATCAAATTGGGCTTGTGGTTTAGTGGTATAATACTCCCTT -AGCATGGGAGTGGTCCAGGGTTCGATTCCCTGCAAGTCCATTTTTTTTGGTTCCTGGTTT -ATATTTTTAATATAAAATTCGCTGTAAATTACATCAATCTAGAATACATCAGGCCTTTCC -CCGTGTATCTCATACTTAACCCTCGGCGAAAATAAGGTTCGTCAAGAGGAAGCAAATTCC -CCTGATCAGCCAAATATCTTCAGGCCAGTGACCGCGAAGCACGCTGGAAAGCTTGGTATG -TCCAGGATCTATACCGTAGAGCCGCGAGAAGCCATGCTCATGTACTCCGCACAACAATTG -TCTTCATGATAACCGGACCTTATATATGTATACCTATATGTACCAGGTTTCAGGCCGCGG -ATGCAAATTTCCAGACCGTATTGCCGCTTTATGTGGGCGCTCACGTATCTTATCCCACAG -CACAGGGTGACCACTCAGTGACCACCTGCGTTTAAATATACGTAGTAAGTATTCGGTGGG -ATGAAAGCAGAAATGGGCTCCTATGTCGAGTTTACTTGCCACTGGGAGTCGTCTATTCCG -CTCTGTCGATGGCCTGAGTAAAGATCGTGATAGAATGTTTGAACTGTCCAAAATTGAATG -GAATGTATAGATCACAAGCTCACGCCTCCATATTGATAGCCTCCTGGGCCTCCAATAGCT -GCAGATAAACACCCGCAACATTCCACCGTACATCAATACTCATGAACCCCTCACGTAAGC -TCCTCATCACCTCAAGGCCATGCGTTGATTTACTGCCGATTCGCATATGGGCGCCACAGA -TACGATAGGCAAAATACAGACCCCACGGAGACATGTCTTCTGGATCTCGACCCAAGAAGC -ATTGTCGCTCGATTAGATTTGCGTTAATACGCTCCGTGATTTGTTGGAGGATGAAATTCG -CTCGCCGTGGATGATGGGTCCCGATTTCTTGAACCGAATCGGATAATAACCAGGGTGTGT -GTAGGGCAACCAGGGCACTAAGTTTGGTGTGAAAGTCTGTACCAGATAAGGGCGACTACT -GTAGTACTAACCTATACACGAAGGTGATTTGATCATAGTCCGGGCAATTAATATTCAATG -CAGCGGCTAACATTGACTGCAGGGTACGATCGAGCTGTATCCCAACATCATCTTCCCTGG -TGAAGTCGGTTGAGGTGATGTAGAACCTGGCCCAATAACCTGGCGGCTTGACACAGCAGA -GCAAATTTACTCATATGTCCCGTCATTGGAGACGAGAGAGTTGAGAACATGTCGGGTTTG -ACGGTCTAATCTTAAGATCAGCACCTGAAAGCCAAAGATCATTAAATATTACCAATTAAC -ATACCCCCTGGTCCCAGGCAGCATCATTTGCCGGTAAAAAGTCATCCAATTTTGGATCTG -CAGTTGCAAAGGGCCGCCCCGGGCAGCCGATGCTCACAAAGCTTTTGCGTTGTTAGTACT -TGAAATAAAATACCCATGGTGGACAGCGACGAACCGGTCTAGAATGACAATAGCCCACCA -GACTCTCCGTCGCTCCTCAACCTCCACCAAAGTGAGCACCCTCCGACTCGGCACCGTGCG -ACTGACATTTATACCCAATGCATAGGCATATCTCGCACAGGTCCCGATTGACAAGAATGC -ACCAGGATAGATCCCGTGTCCTAGCTCACAAAGAGCAACAAGAACTCCAGCTTGCAGTAC -TAAAATAGAGAAACACCCCTCAACGTTTAAATAAAAATGTTTAGTTGAGTTATACAAGGC -TGTCCGGGGACTACGCGAACCAGCCGGCGGGAAGGTAGTGATGAGCGTCTGTGCCAATAA -AAGAAGAGCAACATCGGGTCGCGAGCGAAACGAGGGCTGCAAGTATAAATCATAAAAGCG -CTTTTTCGAGATGAAGGGCATCCATTGATGAACATGCTCGAAAAACCGCGTTGCCGTGAG -TTAGATTTCATCCAAATCGCCTAGTAGTTGTAGAATACACTGCGGGACAGTCGCTGTCGC -AGGTGAGGTCTCTACTAAGCCGTGCTGTAGCAGAGCCGGATCGAGAAACAAGATGGTTGG -AAAGTCAATAGTCCGAACATCTGTTTGCTCTTAGAGGAGAGGGGTCCCATCCTCGCTCCG -GGATTGAAGTTGTGGCTGAGTGACTAGGCGCATCTAATCTCCAATACTCGCAATTTCGAT -TACGCCTGCGCTGGTCCGCGTTAGCTCTGTCACCAAAGTGACTATCCGTGCTTACTTAGC -ACAGAGTGAGCATCTGGGGAGTGCTTTGTCGCATTTCCTCTTGCGAGACCTGCAGGATTG -GCAGGACTGAGGGGCTACGTCGGGGCTGGATTCATGACTGGGTGTCTGCATGATGAAGGT -GAAAGGTGGACAGAGTACGATCTCTGCGGGGCTTGACGGCCGCAGAAAGGGGAGACTAAG -GGAAAAAATGACCACAAATTGCGGGATGTGTACAGAGTATAACCGGATCATTTGCGACCG -CATTTGCGGCCGCATTAACGGCCGCAAACTGCGCACATGGGCCGCCCCGGGGCCATAGAC -GGAGAAAGTGGAGGCCGAACTATACAAAGAATGGACTCACACAAAAAGCCACATTTTAAC -TTCATAGAAGTCATCATTTCTTGATATTTATCTGCAAGTCTTCTCCTCTGTTTCTCAGCA -TGGGCTCGCCAAGCCCCTACGCTGCGGCGCATCTCGACCCCCAAGGCGCCGGAGATGCAC -GACCAACCACCCTTCAAATTATCAAAGATGAGGGCGCCGGAGGAAAGCTGGCCGGCAAGG -TCATTGTCATCACTGGAGCCACATCAGGTATTGGTCTCGAGACAGCTCGTGCCTTGAAGG -CCACCGGGTCCACTCTGTTCCTCACGGCCCGCAACCGATCTAAGGCCGAGAAGAACCTCG -CAGGGATCCTAGAACCCGGCCGAGTCTCGCTAATCGACTTGAATTTGGACTCATTTGCGA -GAATCTGAGCTGGAGCCAAAGAAATCCTCGCTGCATCCAAGGGCCAGGTCAACGTTCTTA -CCAACGGCGCGGGTCTTATGGGCATCCAGAACCGCACGCTCACTGAAGATAATATTGAGG -CGCAACTCTCTGGCAACTGTATGGGATTCTTCCTCCTCTTTCAGCTTCTGAAGGAAGCTT -TGCTAGCTAGTGTGACTCCCGAGCTCAATTCGCAAGTCGCGGTCGTGGCTTCATCTGCCC -ACCGAGCTGACACTCTCCCAAGTAGTGACAACTACAACTTCGAAAAGAGTGAATATAACC -ACGAAACTGCATACAACAACGCTAAGCTTGCTGCGGTATATTCGGCCAACACCCTTGATC -GGTTATATGATTCCCAGGGACTACATGCTACCAGTCTCCACCCCGGCGCCATCAACACTG -ATATATCCCGCAATATGCCCCCGAAGTTCTTAGAGGCTATCATGACCAACCCCTACATCC -TAAAAATTTTGAAGTCTCTCCAACAGGGAGCGGCTACCACGGTGTGGGCGGCCGTGGGCA -AGGAGTGGGAGGCAATGGGTGGTAAGTGTCTCGAGGACTGCAAGAAAGCTGACCGTGGTG -AGGAAGATGGCCAAACCTTTGGACCTAGATGGGTGAAGCAAACATATAACCCTGAGGAGG -AAGATCGTCTCTGGAAGGATTCCCTTAAGATAGTCGGACTGGAGTGAAGTGACCATTGTT -CACGCGTTGATAGATGCTATTGTTATAGCGCTCAAGAGAGCTAGTATGGCAATGTGATCT -ATGCACAATTGGATTCGATTTCCAGTATTGCGATATCTTCTTTGTTGTTTTGAACAGTGT -TTGGTCCTTGTTGTTGGCTATGCGCCATGACTCTCAGCGTAGATAGCGGTTTCTCACCTA -AATATCCTTGGTAGCTGTCGAGGGCAGTAGTGGGGAGAAAGTACGCCCTGACTCCTGACT -CACTGAGCACCCGGTGATCGCGGTTCAGTAGCGGGATCCGGCAGAGGAACTAGTTCAAAG -CACCATCACCCGTTAGAACTGGCCGGGGGGTTTAAGGGTGTATACAACACCATTTATAAG -CGGAAGACTACCTATTTGGGCCTGTTAGATCCCAAAATATTATCTGAAAAGGTCCAAGGT -GAAAACTTGGACCCTCCTTGACGTCTTGGCGGGTCTAAATTGAGGGTGAGTACCAAGTCC -ACCGCCTGTTCCGAAAAGAATCACAAACGAGGTCAAGGACTGCAGTGATCAGGATCCTCA -GTGAACGAGTTCAATGATTATGTAATATACAGGGGCGTCTACGGATGCCCAGTCTTAGTG -AGCAAAGGGCGACATAGGGTGAGCACCGAAGCGGAAGTGGAAAGCCAAGTCGTGCGACCT -AAATCCCGGAATATATGTATCGGATGGCAGTATTTCCCTGCATGCCGAATCTAACTCCGG -AAGGAGATAGTCAGGGGCATGATCGCTCGTGAGGTTTTCACCGGAGGACCGAGCAATAAG -AAGTCTGACATAAAATGAGTTATTAAGAGGATATCTCGCCTATCAGACCGTTACAACCGC -CGTAGGCCGGTTATCCTTACCTTCCCATCAAGCGGGGCTGCCGCCTGATAATATGGAGGC -CACGATGCTTAGCCGGGATAAGAAACATGCCAGACCAATTGCGAGCTGCCCCCGCTTTTC -ATAACGGAGCTTGTTATCCCCAAAAGCGACAGATAACTGTCGATCTCACCGGAATCCGTC -GCTCGTCCGTCACAGTGATAGATGTGTAAGGAGTACAAAGCGAATCTTTGCAATCATTGA -TGCCATCCCATTATCCTATAAACCTACAAGTACCCAACATCACGGAAAAAGACCATCGGG -ATGCACAAAGTTGACTATCGACGAAACGAAGTAGTGGCAAGATTTACATCTTTGAAGCTG -AGCTTTCCTTTCTGGGGCTGGCCCCGCCTGGGATGACGGTGAAGCCGATATCAGTGGATG -ATTGCTATCGAACAGGGGGTTGTGGCGTAGAAGAAGCACCATCCGGCGTGAAGCTAAGGA -TTCCTTCCAATCATCCACTTGGAATAACTCAATCTGAGTTTTTATTTAATTTAAATGGAC -TTACCGATCACATAATTAGGGGTTATTGACCGGCCTTTCCGGAGTGTGTGGTATATAGCT -GGGTCCTCCCTGTAGGGGCAACAAAAAAAATCCCACCTCTCTCTTACCCATTGATAACGA -TTTAGGATGATACTCAAATCCGTCCTGACATCTGCTCTCCTAGCCCTTTCACTGGGGTCG -GATCTGGTGACTGCTTCCAAACACGGACGCTTTGCAGAGAGAGCACGTGCACCACAGGAG -AAGGCCAAACGGGCAGTTGAGGCTCGTAATAACCACAAGCATGAGAAATCACCCAAGGAT -TACCGTTTCCTGAACAAGAAGACTAAACGTGGGTGTCGAGTGATATCATTCTTGGCAGCA -AAATCCTAACAGACTCATAGATTACCTCGTGGATTCACTGCCGGATGTTCCATTTGATGT -CGGCGAAATGTATTCTGGTCTCGTACCAATTGATATGGACAATAGCTCCAGAGCGCTCTT -CTTCATATTCCAGCCTACTATCAACGAGCCTGTAGATGAAGTAACTATTTGGCTCAACGG -AGGCCCTGGCTGCAGTTCGATGGAGAGTTTCTTGCAAGAGACTGGTCAATTTATATGGCA -ACCTGGTACTTTTGCCCCGGTGGAGAACCCTTATGCTTGGGTCAACCTGACCAACGTGCT -GTGGTGAGTACTGATCATGCATAGGAACCCAGAAACTTGCTAACGTCTCGTAGGGTTGAC -CAGCCTGTCGGAACAGGCTACTCTATCGGCACCCCAACTGCTGTTTCGCAAGAGGAGACA -GCACAAGACTTTGTCAAGTTCTTCAAAAATTTCCAAAAGATTTTTGGAATCAAGAACTTC -AAGATCTATGTCACTGGTGAGAGTTATGCTGGACGTTATGTGCCCTACATCTCAGCTGCG -ATGCTCGACGAAAAAGACAAGGAGTATTTCGATCTAGATGGTGAGTCCTCGCGACGATTC -ATATGGCTGGGATGCAAAATCTGACGATCTATCAAGGTGCGCTGGCATACGACCCATGCA -TTGGACAGTTCGACTATGTGCAGCAAGAAATTCCCGTTGTTCCCTTTGTCCAGAAAAATG -CGAACCTGTTCAACTTCAACGAATCGTTCATGGCGGAGCTCGACAAGAAGCACAAGACCT -GCGGATACGAGGAGTTCATTGACGAGTACCTCACTTTCCCGCCACCTGAAGCCCAGCCGC -CCATGTTATTCAACTACACCTCAAACGCCGACTGTGATCTGTTTGATCTAGTTTACGACG -AAGTATTCCACATCAACCCATGCTTCGACCTCTACGAGATCAACCTGATGTGCCCGCTCC -TCTGGGACGTTCTCGCCTTCCCCACGTCGCTTGACTACCAACCAGCTGGAGCAACCGTCT -ACTTCGATCGCCCCGATGTTAAGAAAGCGCTACACGCACCTAAGCAGGCTTGGAACGGGT -GTTCTCTAGAGCCCGTGTACGTGGGTGGCAGCGCCGGCCCCGAGCAAGAAGGCGATATTT -CGGCGAACCCAATCGAGCATGTCCTGCCGCAGGTAATTGAGGCTACTAACCGCGTTCTCA -TTTCAAACGGTGACTTTGATATGGTTGTCATCACCAACGGTACGCTCCTAGCCATCCAGA -ATATGACCTGGAACGGCCATCTCGGGTTCCAGGAGAAGCCAAGCACTCCCATCGATATCA -AAATGCGGGATCTCTTGTACGTCGATGTCTTTGCGGAGAATGGTGCCTCTGAGCTCGATG -GCCCTCAGGGTATCATGGGTGTTCAACATTATGAGCGTGGCCTGATGTGGGCTGAGACTT -ATCAGGCTGGTCATATGCAACCGCAGTACCAGCCTCGTGTGGCTTATCGTCACCTCCAAT -GGCTGCTTAGACGCACCGAGAAACTCTAGTTTCTGGTTCTTAAAGAAGGTGCCTCATATA -TACAGAGTTTATAGCAAGACAATATGTATAGAAAATATAATGAATACGTTCTTGTATTTT -TAAGGACAAGTCGCCTGCGCATTACCCCAGAGTTTGGAGATGGGCTATGTACCTACGTGT -ATGTCTCCATGTATTCTGAGTTCTTGTGGGGTATAGCGTCGACCTTTCGAGATCAGGTTG -GAACATGTTCTATCATTTCCTATTTTTTTATTTCTGATAGCGCAGCCGCTGGGAACGGGC -TGTCGTTGTGATAAACATATGCCTATCTGGCGATAGTATACCGGTTCAATGGACCGAGCT -TGTTGATGGCGGATTGAGGTACATCTTCCTATTTTGCCAAGCGAAATTATGGTGTTTGAA -TTGTTGCTTGCGGTGAGAGTTTTATATTTTGATCATGATGATAGTTTAGTATCTTGCCCG -AttatgtcttatgtcttcttatgtcttcttatgccttatgtATATTGTAGTCTTGCACAT -GGTATGACGCAATTAAACAGTGATGCAGAACAATTGCAGCAGCGCCCTACCGGCAAAAAC -GCGCACAACCCTCCGGCTAGTGTAGACATCGTAAGCCGCTAACCAGTAGTGTGCTTCGGA -AGTAGAGTGGATCTCGATATTTGGTATGGTTTTAGTCAAGTCCAACTACACTGCCGGTGA -AAGCTCGCACATCATAATATACAACATACGTACTCTTGACTCCATTCTCTTACGAGTAAA -TTGCGAGCAATGTATCTTTTACGGAGGTAAAAATTACTACAATCACATACCTTGTGGCAA -GACAGTATGCCGAGAGCTACCGAAACATAAACAAGATTCGGTAGAGTTTATCATAGACTT -CCCAGTTTCCTGATCAATATTGACTTTCCAAAGAAAGAAATAGCCGAGTTACGTTTTGCA -ACATAGCCGAGACTCTCCACTGTGTAGAGTCTTGATCACAGCCAAGTGAAGTGGACCAGA -AACGGCAAGTGGCCGTAATTTATCCGAGAACCGAATATCACATTAGTTATGATTGGCCAC -TAAGAGCGAATTCCCCTGACCTGAATGCGGCGCCTCCGCCAAGATTGCTTTCAGACGTTA -GTTCAACCCTCTATTCAGTCCTCCATTGTAATGTAGGTTAGTTGGCTTTTGATGCTCTAG -GGGATTGCTGTGATATAGATCTATGTAGGGTTATGTATATAAGAAAGCCAAAGTCTGGAA -TATGGGAAATAGAGAAGAGCAAAAGAGAGTGACGTTCAAAACAGTTTCTTGATCTCAGTA -TTTTGCCCCAAACAAGCGTAGTTGAGAGGGTAAAGTGCAAAAATTTTCCTGGTGGATCAT -CGAGGCCAATAGGGCCGCGGCTCCCCAAAGTCAAATTTGTTCATTATACGGGACTATTTC -CGATATAGAGAATCTATTCTTTCCATAACAGATGCTGATCTATGGACATGTGAAACCGTG -TGAGTTCTGCATACAGAGTGCTAAATAGCGAAGAGTCTAGGGACCCTTGAGGCTGAATGG -ATTTGCCTTGGTGGCGTTCACAACCCTAGATCGAAAACTCGTTCAACCACTGACCTTTAG -CGATTTCTTTTAAGCTATTCCCTGTGCTGCAAATTATTTAAATTGCTATTAACAATTTTA -TTAATATAATACATGGTATGATGTACACTCCGTATGGTGGAACCTGTTAGGTTCCCGCAG -ACCGTGGATTCTGCCAGTACTGCCAAGTAGTGTTGACTCTATAAAAGTAATGCAATACAA -TGATGTATGAGGATTTTGAAAGCCATGGAGCCAGCCTAACAGAAACCGGGAGATAGATAT -CATAATATTGTCAGCATTGGAGTATTCAGGTTGGCAAGCTCCTCACTAGCTGAGGATAGG -TAGGCATAGGGCGTATGAAGCCGAAAACATCCCAGCTCCTTGACTACTGTAGATTCTCTA -ATATTGTACCCACCGAGGAATATGTTGAGATATACAAGAAATATAGTCCATAAAGCACCC -CGAATACCCCAATTCCGGGGCCGAAGTCACTTTTCTAGTATTTGAATTACAGGGCAGAAA -ACCACTCCAGTCCCGCTTCTCGGTCTTGACCAATTATCGCATCTGGTCCTAGACCCTTAG -TGAAAAAAAATGATGAAAAAAAAACCAGAAGCATATTGTCTGTTAGGATAGTCTTAAGAA -GACAACATATATAGCAACTCTTGAATCCCACACTTCCTCTTTTCTCTTTCCTTTTCTGTC -AATTCCCAAACTCCCCTTCCCTCGAATTTGAGGTATAGCTACCCCAAGCTTTCCTTATAT -CTTGCTAGGGCTTCGTCTCTTAGTCTTTCTATCTTATATCCTTGCAGAACTTCTGGTTTA -CGCTTTTGGATAATTTCTATTATTCATCACCCTGTGGAACAGAAATCTATTTCAACAGAG -GTCAAGCACGAGGTACTTCACATCAGTTCTTCAGTAGCAACCTTGCAACATACCGGCAGC -AACAATCACATCGACAGCGATTGCGAAAACTCTGGCATCCAAAGCGACAAGCCCATTCGA -AATCGACAGCAGTATATATCGACCATTGTCCTTGCAGTACTTTCGGCCCACAGCGAATTG -CCCCCGGATCCACGATATATGACCCCATTGCAGCTTCTCCCCACATCTGTAGATCCACTC -TCGACCTCTCACAGAATTCCCAGCGACACTTCAACGGAATAGACCGAGCGAAAAGCGACA -TATTCGACAGACAGCAAATCCCAGGCAGTTGACAACCCATATTCCAAAGCGCCATATATA -ACCCTCATCATGGGTATCCAAAAATTTAACCCTTTCAAAAAGGACAATCAAAGCTTCCCT -AGCGTTGTCATCCCACTCGCCGATGCACCTGCCCACTCTCTCTCGGAAAAGGCAGACAAG -GAATCTAGCCAGAGCTTGGACGGCTCGTCTTCCTCGGAGAACGGTGCTGCTGGCTCCAAG -GATTCAACTCACCTGACCCTCGAGGCCCTTCGTGCTGAAGTCGAGGCTGATATTTCCACA -GCTACTCATGACTCTGCCTATGATCGTATGTTCTTCCTGGATTCTTGCACTAGCCCCGTG -TGGGAGCCCCATGTACTTGGGTGTTTTTGATCTGACTCGACGCAGGCAAGGCAAAGGTGA -TCAACAGAGCCCTGCAAGATATCGGCATGGGTCGGTATCAATGGGAACTGTTCTTCCTCT -GTGGTTTTGGTTGGACAGCAGACAACCTTTGGCTCCAGGTAGGATCAAAACTAAACGGAC -CACAAAGATCAAATCTAACCATGTGTAGGGAGTTGCTCTGACACTAACGCCTATCTCCTA -CGAATTTGGACTCTCCAGTACGGAGGTGCGCTTTACCACTTGCGCTCTATTTTTGGGTCT -CTGCATTGGTGCCTCATTCTGGGGCGTTGCCTCTGATATTGTTGGTCGTCGTCTTGCTTT -CAACGCCACCCTGTTCCTGGCTGGTACCTTTGGTCTGGCTGCTGGCGGTGGCCCCAACTG -GATTGGGTAAGTCTATTTCGAAACAACGCATTTTGTTTCTTTCGCTAACTATGAATGCAG -AACATGTGCCCTGTTCTCTTGCTTGGGCCTTGGTGTGGGCGGTAACCTGCCTGTTGATGG -TGCCCTTTTCCTGGAGTTCCTACCCTTTGTCTCGGGAAACTTGTTAACCATGTTGAGTGT -TTGGTGGCCCGTTGGCCAGCTGATCGGCAGTCTTCTTGCGTGGGCTTTCATCCCCAAATT -CAGCTGCACTGGCTATGAAGGTTGTACCAGGGAGAACAACATGGGCTGGCGTTACCTGGT -CCTCACTCTGGGTGCCATCACTTTTGTCATGTTTGTCCTACGATTCTTCTTTTTCCACTT -GTACGAGTCGCCTAAGTACCTGCTCTCCCGTGGCCGCCAGGAAGAAGCCGTTGCCTCTAT -CCATGGAATTGCCCACAAGAACGGCACCAAGACCTGGTTGACCACCGAGATCCTCAACGA -AATTGGTGGTCATGCCGAGGTGCATGAAAAGGAGAAAGGGCTCACTTACACTCAAATCGT -GGGCCGCTTCTTCTCCAAATTCTCAATGGAGCGCATTGCGCCCCTCTTCGCCAACAAGCG -CATGGGCTGGAATACTGTTCTACTCTGGTTCTGCTGGGCCACCATTGGCATGGGCTATCC -TCTCTTCAACGCTTTCTTGCCGCAATACCTTTCCCAAACTGGCGGCGAGACAAACTCAAA -CTACATTACCTACCGCAATTACGCCATCACCTCCATCATTGGTCTGCCTGGCTCGTTCCT -GGCTTGCTGGACAGTTGAGCTCAAGTATATTGGTCGCAAGGGCACAATGGCCATCTCCAC -CTTGATCACCGGCGTGTTGCTGTTCTGCTTCACCCAGTCCACAAAGTCCGATATCCAGCT -CTTGTGCAGTTGTCTCGAGGCTTTCTTCCAGAACATCATGTACGGTGTCCTCTTCGCCTA -TACCCCCGAGACCTTCCCCGCCCCGAACCGCGGTACTGGCACTGGTATCTCCAGCTGCTT -GAACCGTATCACTGGCCTCTGCGCTCCCCTTGTCGCCATCTATGCCGGTACTGCGGACCC -CAACGCACCCATCTATGCATCTGGCGCATTGATTCTGGCTTCTTTTGTTGCCATGTGTCT -TCTACCCATCGAGACTCGTGGCAAACAGACTCTTTAAGACGTCACGAGTTGAATCTCTTG -TTTCAGTGTGTAAAAACAGTCTTGCATACTGTAAAGTCTTACAGTCTTCTGCTTGTGCTG -GAAAGTGCAAGACGAGAACACGTCCAAAAAAAGGACAGCACATCCACATGTCACGGATCC -ATCCCCATGCTACTGCGGTCTGAACGAGTCGGCCGATGAGTTGACACATGCTCAAAGCAC -TGGGGAGTATATGGACCGGATAGAGTGTTGTCGTGCATATCATAATAATCCAGGGACCAC -TGCCAGGTGGCATTGGAACTTGAACATAAAAGATACCACCAAGATACCAAGTTTGCTTTT -CTCTTTTCTTGTTTCCGAGTTAGATTCCAATGTTAGATGATCTGCTTGGCTTAATAAAAA -AATTTGAAGTTACCACTACTCTCTGTCTGGGCGAAGATAATCTCAAGTGCCTAACCTATC -AAGAAAGCTAACCACATTACACCAGCGGCGGGAATGAGTTCGCTACATCAAAAGAAAGAT -TAAATGAAATATTTAAGATTCAAAGGTTCCAAACCTTGGAATATTGAACAATATGCTTAT -TGAATCCATAGCAAATATGAATGGACTAATCTTCAAGATAATCACAGCAAAAAGGAACAG -CGAACGTAAACCAGTAAAACTCCATTCCAGTCCAAGCAAGCTCCCGATAACTCAAAAGAA -AGCGAAAAAGAAAACCCACTTCACCCAGTTTCCAATCGTGACTATGTAGCTTCCAGGCTC -CTCGCCGGGACCGATCCGATAACGCGCGAAAAGCTTGAAAAACGAGGGAAAAAAACAAAG -TGAAGATGGCCATGGAGGCAAAACGCAGACAGAAAGATATCCGTGAAATTATCGCTCATC -AACAAAGTGCAAAGAAGTTATGAAGACATCACATCATGACCCTGATGGAAGAAATACATT -CCGTCTATGCCACCGGTTGATTACGACGGCCACGCTTCTCAAGCAATGTGGAGCCGTTCA -GAGTGAGTTCAAACGGCTGTTGATGCTTCCGGATAGAAGTCTTTGCAGCATTAGCGACAT -TGGCATCATGCATCCCGTATCCTTGTTTGGGCTTGGCCTGATTGGGCGGTTGTCGTGCCT -GTGCAGCGCTGCTTTGCGCGCCAAGGTTCGTAGATCGTAGCTTGGCTTCCATTCCAGCCA -TGTCCCCTTGGTTCTTGCCCACTTTGGCTGCTGGTTGGCCTCCGAGACGGTGGCTGTATT -CGGGCTCATCGTCGTAATCTTCGTTGAGTGCACCGAGTGCCGACTGGAATCCTGCCGTGC -CACTGCGGTTGCTTTGGCGTGCGTTTGCGCGGCGTTTTCCTTCCTCTTCTTGTCTGTGTA -CTGCATAACCGACATCGAAAACTTCACGGAGGTTGAAAGCGCCTGGAGACCGGAAGTCCA -TGCGGCGATCTGCCGTGACAGATGGTTGGTCGAGGAAGCGAGCACTGGCGGGCTGGTAGG -GTCCATGTTCGGCGTTGGAAGAGTCAAGTTGTTGTGGGCCTTTCTCGGTGGTTTGGAGGT -AAGGTGTTGCTAGAGGGGGTGCGTCGGCGGCGGCGGTAGTCTCCTCATCCGTACCATTTA -TCCAGGGGTGTGCGAGAAACTCCTTGATAGTGTATCGCTTTTCAGGGTCCACGGTGAGGA -GATGCGAAATCAGATCCTGGGCCGACTTGGAGATGTCATCCCACCAGGGTGACAAAAAGG -TATATTGGCCACGCGCCACCTTCTCGGTAAGGACTTGGATGCTCTCGTCATAGAACGGAG -GGAACCCACAGAGAAGTGTGTAAAGCACACAACCCATGGCCCACATGTCCACACTTTTGG -AGTAACGCTCATCCTTGACGATTTCTGGGGCTGTGTATCCGACAGTACCGCAAGGCGTCA -TCGTCTGGCTATCCCAAATCACCTTGGATAGTCCGAAGTCTGCAAGCTTGATCTTGCCAA -TACCACCAGCACCAACACCAGGGATGAACTCTCCCTCATCTTCCTTGTCTTCGTCCCCTG -GTTGCTGAGGTTTGGGATTCTTGCTGGGAACGTAAGGAGTCGGATAGAACAGGAGATTTT -CCGGTTTGATATCACTATAAGACCGTTAGTTTGCGCAAGAGCAACTTTACACGGGATGAC -GAACCGATGAACGACACCTGATGTCTCGTGCAGATATTCAATTGCTTCCGCAACCTGGCG -AATGACGTGACGACTAAGATCTTCGCTGAAATAGGTCAATCGGACGATCTGGTGGAATAA -TTCACCCCCGGGGCAGAGTTCGAGGACGATATAGTAATATTGTCGTGACTCGGAGAAGTC -GATCAATTTGACGATGTTGGGGTGGTCAAGATTGCGCATAATCTGCACCTCTTTGAGAAT -ATTTGCACGCTGTTAAAGATACAAGAGTTAGCAATGACGAAACAAGATACGCAGCCATGC -ATAGTCAGTAGCATCGGATAGTGTATCGTCTCGCCGGCATACGCAAGTCACCATTGCAAG -GAAGCGTGTCGATCGTCAGGAAAGAATTGTTTGCGCACCTCCAATGCCTTTGGGACTTTT -TTGACCGTCGGATAGTGAGGATCGTTCTACAGAACGTATTGGCCATGTTTATGGACATGT -TGCAAAGAAAGAAAAAACATGGCGCAAAGGAAAAGAAAGACACCTTGCGGTAGGACCTAC -CTGATTGCTGTTCATTTCAAATTTGCGGACGACCTTAATAGCCACCTCGCCATATTCTGT -TGTCGAATCCTTGGCACGATAGACGTTACTAAAGGCGCCATCGCCCATCTTTTCCACCAA -GATCCAGCGCTCTAGACCGGGATATTTGGGCATCTTCGATCGGTTCATGTTCTCCTCTGC -GACTAGACGTTCGATTTCGACAGAGCGATCCTGCCTGACCTGAGAGTCGGGTTTGTGGAT -GGCATTGGCTCGACCTTCAGGGCCAAAAGCCTCGAGAGCCGGTGAATATTGGCCTATCGG -TTGGCGTTGTGGCTCTGCATGGATGGTGGACACGTCGGTGGTGGGCTCAGAATGAGTAAC -AAGACGGGCCTGCTTGCCATGTCGAATGAAATTCTTGATATTCTGTATCGTACTCATGAT -GAAGAAGAATCGTTATTCATAAGAAAACAGATCTGGGTTTTCGGAGAAGAGAAAAAAAGG -ATGAAGGGATTTTCAAGGACTGGTTTCAGAGTTGAAGAATGGATTTACCCCACTAAACGG -TGCAAATCACAGAGGAATCGAAAGAGACGCTCCAGATAGAAGAAAAATAGGGGAGAAAGG -AGATGATGGCGGGGGAGAGCTATGACGAAATGAGAGATCTTTCCCTTGCGATGTCATCTG -CCAAGGCCTCCATCCTGGTTCACCCTCTCGGGACGTCATGGTACGCCACTGATCCACCAT -GGTAATTGCGCTCAATCTATATGAGAATTCCCTTACGTTGTACGGACAGCGATATAGGTT -CTTTATTTCTTTTTGGATCAATTTACCTCCTTTACCccagagaccagagaccagagacca -gagaccagagaccagagaccagagaccagagagaccagagaccagagaacgagagcccag -agGCCACACCCAAGACTCCTAACCTGGCCCATTCTCTTTTTTCTTTAATTTTTGTGGGAT -TCCTTAACAATGTCACCTTATCAAGGCTCTCGTACAGAATCCTGGTTTGCCTGACCTGAT -CCCAAGCGTCTTTAAGCCCTGGCTTGTGTGGGGGGTTGATATCGTCATCATCCGTTGCCA -CAAGCTTGAAATCGTACTTGGAATTCCGGATTTACCGATTCAAGGATTCACTTGCTTTTG -CTACTCTTATCCTATGATGCACATGTTACCATTATCGGAATCGGAAAATCCAACAAGCTT -TAAGTCTCTTACTCGTATGGAGCATTCCGAGATGCAGATGGGCATCTCAAGCCTTGGAAT -GCCACCTGCGAGCGCAAACATATCAAGCCAGGTGTGCGCATAAAAGTATGAGATGGCACC -CTATTCATATACAACATAGAAGTATCAGGGACATCTTGATATATCATACTTGGCCGACTC -CACATGGCCATGTAGATCAAGTCATTACGGAGTAGGAGAAGCTCCCAAATGGACCCCCAA -TAGGTGCATACAATGCACATACATAGCTCTGGAGGGCCGGGCCCCAGTGACGCCGTGATT -AGTAACAGCCCTGAAATTCAACTCCACAACATGCCTAGCTAGGTAGATATTTACCTATGA -AGGTAACACTGCAGGATTGAACCTTCTCTCTTGCGAACATCAGTCCAATTTATAGAGTCT -AACCTTATATTCACTATGAGCATATTCATATAAGATTACCGGCTTATCATCCACAATTTC -TCTTCCCTAATACCTAATTGTGGAAGTACACTGGATCTTATTGAGAAATACCTACATATG -ACATACATCATACATTTCTCATTCACAGACTCAGCCCCGTTACAAATGTAAGATCTCGAT -ATTCAATGATCTTTCCAAGATGAAAACCACCGAGCAAAAAGATATAGGCCTTTATATATC -CAAGAGAAGATCGGTTACTTACACAGGATCGGTTTTGTGTGAAAGTTGGAATAGGAGACA -GAAATGTGTTTACAAAATTTCACTGCGATGCTTTTATCAATAATCCCCTAATCATGTTTA -TACTGTGGTGCTGGGCAGCCCAAGATCCTTGGTCCTTTGGAAATAAGGATGTGCATCGTA -AGGAATGTATGTGTTTTAGACCTGCATCAGGGACAAAACATGCCAAGTAAGGGCTGCGGT -AGAGCCATAAGATTTCGAACAGATCTATGGAGTATGCTCTTGGAATATTTTGACACATTC -AATTTCGAGTCTTCAGATATACATTTGTACGAAGTAGATGAAGCCGAGCGTGTAGGTACC -TCATTGCAGGATGCAGCAACCAATCACTGCTTCACACCCAATATTGAATCTATACCGCCG -AAGATGGTCTTGATAAGAGAACAATTCCAATTCATGGTGGGATCAGAATTTAAGAAGTCC -CCCATATCGTGAAAGCTGGTGTGATGGATCTGCAGTGTGATCACCAGTGTGATGAGGTCT -CAGTGAGATGAGGCTGCAAGGCATCAATATTACCAAAATAGGAAACAGGCGCTAATCAGC -GGGAAACTGGGGCACGAAAACCGCTCGTCATATTATCAAACCTTTCACACGAGCAATTTT -TCGAACATGGATATTGTTTTTGAATTCTTTTACCCTATATTCCGTATAATTTGAATGATT -ATTCTTTATTGCTTCATACTTAGCTGAAACAAGGCGATAAGAGGGCTTGAGTTTGGACTC -CCCAGCCACTCGGCAATATACTCCACCTCCACCCTTACAGCTCACATTGCACAGAGAGGT -CGAGCCAGAGTCTTCGTCCGTGAAAGCATATGCCCATGCTCCGTACTCCACACTAAACCG -TTGTTTGTCATTTGTGGCGGACTCGAACATACCACGGACGATCGGTTCCCCCCTGCATCA -TAAACGAGACAAAGCAAACAAAGCCTTCGGGCTTTAGGATCCGTTGACTTTAACTATTGG -TCGCATCTTAGTCGCGAAGTGCTCGCACGCGATGCGACCCTTGCGACACTCTGCACGAGA -CATCCTACTCGTACTGTGATACCCAAAGGTATCTAAACGCCAATCGAATCCATAAAAGCG -ACCGTGGATGTTACGGGCCTCCCTGGGTGTGCCGTTTCCGAGATGCCGGGCGTGGAAGTG -ATGGGGTCGTCGGAGGATTTCCAATCATTTGAACGGCCACTTCTAAGGTCTCACAAGACG -CTCCCTCGGCGGGGTGTAATTGTTCCTAGCATCGAAGTCATCCAACCAGTTTCCTCGGAT -CAGGATGGGAAACATACTGCGACGCCGTCGCTTCCCTTGACACCGCCGGTTGCAGCCCAC -GAAGAACTCGTCGACGAACGAACGCCCCAGAAAATTAATTCTTCTCTGCCAGAAGTATCG -ACATCTGGCATGTTGACCCCTCGCCGTCCCTCTAAACCTCCGACCCCGGATGTCACGCCG -CCTCGAACAAACTCGAGTAAGCGACCTGCTTTGAACCAATTTAGTTATTTCTCGTCGTCG -TCCCGAGCCGATTCGTTCCAAACGGCCTTGGAGAGTATCTCACCAGGAGAAGATATGGTC -ACCCCGGTCAACATATCTCAGACTGTGACCCCGACAGCAAAACAAAAGAGGCAACCATCA -AAGCCACCCGTCTCAAATGAATATCTTCCAGACTTGTCAAATATAAAAGGATACCCACCG -CCTGCTTCTCAACGAGAGGGCGAAACCGAATGCGACACTAGATTCGAGTCTTTTGATGGA -CATTGGGCCGCAAACCCGATTGATGGGTCGCCAACACCGTTAGCTGGAAAGCGAAAGTCT -TCCCAGAACCACACTCAATTGGTCTACAGGCCAATCCCTGGTAGAGAAGTGTTGGATGTA -AAGCAGTTGGATGCGTCTTTGATGAGAGAAAAAAGCTTGCGGGACCGCGTCAATGGTGCG -CATGAAGCCGAAGCAAGTACTCCAATCGAACACTTCCGTGAGAATATTGGGTCATCATCA -TCGGAGGGTCTAGCAAGGCTCAGTAGACCAGACACCCGTCCTTTGTCAATCATCTCCTCG -ACATCCACCGTCGAGGCTATGATCATCGATACTCCCAAACGAGCCCAACGAATGCTCCGG -CATACTGAGAAGAAAAGCTCTCTTCGGTCTGTCAGCTCTCCCATAACAAGATCGGAACGT -ACATCTTATGGCTCCATCCCTGAATCCCAGCACCGCCTGGTTCACAAGGCTGCTCGCATT -TCCAACCAGGATTTCCAAAGCAGCACACCGGAGACGTCGTTCTCGGCCAAGACTAGCAAC -AGTGTAATACAACCCAATATCGAAACTATCAACGTGGTGGTCATACCGGAAAGAAGTTCA -TCTCTCAAGTCCCGTCCAAACAGTCATGTTTCTTCTAGACCTGTCTCTCAGCGATCAAGC -CGTCGTCCTCCGACAGCATCAACGGGCCGCACAGATATTCCAGGGCAGAAGAAACATATC -GTGTCTGATTCAGTGTCAACTCGGTCTCGCGAGACCGATCCCAGGGGTTGTCCCATGGTG -CGGCCTGTCATTCCACCCCGGGCCTCATCACTTTCTGCGCCGACGAGCCGGAACAATTCT -CGCACGGCTTCTCTCACTTCTAACAGCCTGCGAAGTCGTCCCACGGAGCGTCCCATCATA -CCACCACGAAGCTCGTCGCTTTCTGCGCCAACGAGTCGGAACAATTCTCGTGCGACTTCT -CTTACTTCCGACAGTCTACGAAGCCATGACTTAGCCATGGACCTCGAGGTACACAAGAGT -CGTGAGCATCAGCCGGTGTCCCCGCCTCGCCACAACGTTTTGGCTTCGCCTGATCGCCAT -GGCCTACTCGAAGCTCCCAATATGCACACCCTCTTTGCTGGCTCGGATGATATGGCAACC -CTGCGTCCTCCCTCTTTGCCTTTTACCCAAGGCTCAATTCCATCATCTTCGCCAGGGCCC -ATCGAAATTCAAGAGGCCACAGCCGTCAGTCTGTTCGCACACAATAATCGATCACTGCTG -CTTGTTGACCCTCGGGTACAGGCCTCCTCAAGAGACCCTTTCCATGCATTTGGAATCTCA -TATGATCTTCCCCAACCCCCTCGCACACCCAGTAATGCACCACAGATCGCAACATTCAAT -GCCGACTCACCTTTAAAAAACCCTCGCCCCCCGCCCAAGCCACCATCTGCCAAGCCTCTT -CCCTCGCTTCCATTACATGAAGCCCAAGATGACAACAAAGGCCTTGGTCGACGATGGAGT -TCCGTCCGTCGTTCCTGGAGCGCCCGCCCTAGGTCGGATTCCTTCAACAATATAGCGAGG -TCATTCTCCATGAAGTCGGCCAAGAACCGAACCGCAGGAATGGAAATGGACAGTCGTCTA -TATCCGTTTTGGCGACCCCGCGGGTTCTGGGAGGACGTTCCGGAGTCCCCAGAGAAAGAA -ATCTCGCCAACGCGGCAATCTTTGCCACGTCCCGATGAATCCTTGATTGTAAACAACTCT -CTAGGCTTGCCACAGCGGCGCATTATCTTTGATGGCCCACCTGCTCTGGCACGCCGCAGC -CCAGAAATGAGACGCTTGTTCAATGGAATGACCAGCAATGGTAGCTTGGTTGATCCGGGC -ATGTTCCGAACAGGATCCCCATTGAACCCGGGTCGCTTCCGGTCTCTTTCTCGTTGGGGA -CTGCGGCTCCAGTCGATGTCATGGCGTAATGTGCGGAATCGCCTGCGACGTGTGCGCCAC -CGACGAGACGAGAGAAAACGGGCTGTCCGGAGGGAAGCCCTCAAGCAGAGCATTGGTGGC -CCTATCTATGTGGCATCTAGTGCTACAGCTGAGGTGGCCACGAGATGACTTGAATGATGT -CCCTTTCCCCTTCTCGTGTTATTCCCATCTTTATGATTTGATATGTTTCAAACACTTGCC -CGACGATTTCATGATGCGTTTGCTTTTTTTTTTGGTCTTTGAAATTGCTGCGATGCCCCG -AGATGATCTTTtgatatgagatctatgatgacgatgaaacatgaacccatgatgatgatg -atATTATTGTTTTTATGCCTGATTTGTCAATTTTCTTTATCTTAGATGTACCATTAACCT -GTTTTGTTCACTTCTAGCATGCATCCGTATCGACTTCCCAACTAGTTCATCAGCCCAGGA -CCGAACAAAGATCTCAAACCCCCCTTGCTTTCGCTTGTGTCACTTGTGTGACATAATATG -GTCTACAAAGCAATACCCCTTAGCTAGATGTTGAGTTTATTGAGTAATGCATTGATATAT -GAGCACACGGACCGGGGTGTGCATCAAGTTAGCACCAATTATTCTGATCAAAGAATTATG -GGCAAGAAATGAAGCCTAGGTGTTTTACAAAGCAATCGGCTAATGAAATGATCATACTTG -TCATATTGACCAAATATGGGGTATCTAAAAATATCCAGCTCATCCCAATCCCATCAATAT -CAGCGTCTACCACCCCAGTAGACATTGCCTCTTCCCCTTCCTCTCTTTCCACGCCCACGT -ACACTCGACCGGCCCCGAATAGCCATCGACCCATACTCTGTCTCGCGCTCCACTTGTTGT -CTCATTCGCTCAGCCGATTCATCTTCCTGGTCCACCCCCGAGCCTGGTCCATCATCTACT -GACTGTACTGCCAGTCTCTCGCGGTTGTGCTTGAGCATCGCTTCCCAGGTTGGTGGGTCA -AGGAAAGGTTGTGGCGGCATAGAGAATGATACTTCAAAGCGCGGGGGCTGGGGCTCATAG -TCTTCTTCGTTTTCGACTGTGCTTTTATTACGCTCGCTTGAACTTGTGGGCTTCTTTCTT -TCTTCGGTGGCCTCTTCGTATGACTGTTTGATGCTTATCCCTGCAGGGATGGCGCTAGGA -TTGAGGACTTTGTATACCTTCGGACCTATCTTGTTGGTTGGTAAGGACAAGAGAGCGCGT -TTATGTGGAATTAGCAGAGGCCGGGATTTCTTCGAGGAGGTTGATAAAGCCATGACTTCG -AGATAACTTCAGGTAGGCATAGACAAAAATGAAGAGATGAAGTTGGGACAGTTGGAGATA -AGCGAAGTCAGGACCTTATCGGCTTGTCAGACACGGAGCAGTGTTTAAGCGTTTAGGCAA -TTTGTGTACATTTTATTTCGATATTATTAGCACTACGTTGATTCCAATAAATTTGAATAT -GTCCCGGCCGAAACACGATCAGAATTCTACCTGTTGTCTTTGGCGTGCACCCAGACATCC -TGGTCCCTCCCCTTGAGATATCCTTGCTAGCGCTGCCGCAGAGCAATTTGCAGCCCGAAG -TTGGGCACATAGAACTCTTCGAATGAAAGGGAGCCAGTCAAGGAGGTAGAAGTCGAATTA -GGCAGTAGTATATCATACTGATTCAGGATATGCGCGAAGGCGAGTTTCATGAGTGCAGTA -GCCATCCATCGTCCAGGGCTGCAAACACGAGCAGAAATCAGCTGATAGGATCGGGTCGGT -TGGCTTTGAAACCAGTGCATGAAAAGAAAAGGGGCCAGCCAGGAACAAGTCTTACCAGGC -ATGTGCGCCCATTCCAAAAACCAGATAGTCAGGGCCCAGGTCGACCAGCCGGGAGTCCTT -GGCTCCAGCTGCTGCACGCTTGACGAACCGGAAACCGTCGAACCCGTCGGGGTTTGCATA -GTGCTCGCTGGATCGTTGGATGATTCCACTTGGGACCGCAACGTGGGAACCCTGCTTGAG -GGTGATGCCTATTCCATCACATACTTGATCAGCTCGACATCTCTTTCTTAGCCTGGTAGG -AGATTTGCGAGAACAAACCATTAGATAGTTTGAAGTCAGTGATGCAGACCCGATGGGCAG -AGACTAGAAATTATTGCATTAGCCATGCACAATTCCCATTTTTGATTGCATATAGACGCA -CGTGCTGAAGGGGGATGCAGCCGCTGGCATTCCTTCAAGAAGCTATCTGTCAATACCATG -CCATCACAGGCTGGAACGGTTGGTCCCATTTCGGAAAAAACCAAACTAATCTCCTCACGC -AGGGGCGCGATATACTCGGGACGCTGAGCTAGCTCAATCAAGGCGCCGTAGAGTCCCTAT -TGGTGGCCAAATCAGCAGGTTGCACGTCTATGACATGCGTATGGAATACCTACATAGGAC -GTGGTGTAGATAAAAGCAAATTGCAAAAGTAGAATCTCCCCTATCACCTCTGCGTATGTC -CATCGCTTCCGAAGAACACTGCTCTCATACAGCCATTGGAAGATTTCAACCTATGGTTTG -TTCAGCCATGGTGCTATTGGGGGTGGAAGAGGAGGCAGGGATACCGGATTTTCTGATTCA -TCGAGATTAATCTCACTAGGTCCATTGTCACTAAATATCTTGGTCTGATATTCTGCCGCG -CGTGCGTCGAAGTGCTTGAGCAGAATCTTGCGCGAGGCGTGCATATCCCAACTAGTCCCA -AGAGTGGCAAGGCGACTAATGAGGCCACCAATATAGCTATATGTGGTCTTAGCCCATGGG -AATTAAGATTAGAGAAGCATTTAACTCACGGCTTTAGGAAATCTGGAATGAGAAATTGTG -CACGAGTGGTCACTATCATCCCAAGGATGTGATCTTTGACAGCTTTAAGATATTCCTCAT -TCTGAGAAAGCCGCGAACCGACAATAGCAAAGCTACCACTGCGGGCTACGATCGAGTATG -CCAAATCAAAACAACCAATGCTGCATTCTAATTGAAGGACGTTAGAGACTGTCTCTCCTA -AGAAAATGCTCACACTTACCCCCGTCTTTGAAGTTTAGGGTCTGCAGCTGCGAGATAATT -GCCTGATCCATCGCAGGCATGTACCGATGCAGGTGTTTATAAGCTGTGACGCGAATCAGA -CCAGGCGTAGTTTTAGCAATCTTTGGCCATCCCATCCCCTTTGGGAAATAGCCAGGCATA -ACATGAGAGATCTGAAGGAGCTATGTGGAATATTTGTCAGCGCAAACGTCGACAATGCAC -TGCAAAGCCCTACCTGATCAATCCATGGCTCGGGGGAGACAATAGTACTCCGTTTACTTC -TTAGTTCTTCAATCAACTCTGGCTGGCATAATATGACCTGCCCGAAGCGGAATGTCCGCG -GAATCTCAAACAAGCCATAGGTGAATTGTTCATAGCCCTTCTTTATCATACCTGGTCCAT -CCACCACAAACTTGTACTGTTGGTATAAAATCGAGAAATAGTCTTTGCGTGCAGGAAATG -GAACGTGGTTCTGCCGGTTCAGGAAATCATATCAGCATGTGAATTTGTTTCTTGGTCAAC -TATCACCAGGTAGATGGGATGTCACCGACGTTAGCCATGGTATATGAAGTGAACACCAAA -GCGGCCGCAACCACAGAGCAAAAGATAAAGGGAGTAAGGAGGCACAGCACTATAGCCGAA -ACTCGAGCTTCCCCCTCGGTCCAATCGAGCTTATAAAGGAAAGCCATTTTGATATAAAGA -AATAATAGGAAGTAAAGATATGGACAAGAAGGGTGAGAAACCGAGATTATATAGTAGGTT -TCGTCCAGTAGAAAAGGATAAAATGAGAGAAAAGTAATAATATCACTATGGCAGGCTGAG -ATTGCTTCCACGGAGCGAATCTGAGCCTACAAAAAGCCACAAAAATAACTCAGTGTGATA -TACCTACATAATGTACCGGACTTTGTAATTGCAGACTATACTCTGGATTTTCAGACAAGG -AGCTTTGGCCAAATATAGCATCATATTGGAAAGAAACCCAAAAGAAATATAGACGGATAT -AGAAAGAACATGGAACTCACTGGGAAGTATCAGCGATGGAATGTGTGGTATAATGATACA -AGACAATGCAAGCCACGCCTGCAACCTTGAGGGGGGAAAAGCAAAAAAAAAAAAAAAGCC -ATAACGCCAAGTAATTATTAGGCAGGCCAATCTATAAAATGCCCTGGGATTCTATCCCAC -CCATTGTGGAGTAAGAATAAGGCAAGTCTACCAAGCCGTCAAGCAGCCAGTTCAACGGAT -CGAAGACTTCGTAATTAGGTTCAATGAAGCCACTTGGGAGTCCAGTGGGTTGGGTTGTGG -ATGGTTGAATCCCAAGACTAGCCTGAGATAGCAGAGGTGCAGGGGCAAGGCTTGGATCGT -TCCCAGAGATTCCAGGGGTTAGTGTAGATGTGTGTGCAGGCCCGACAGATGAAGTGGCCG -AAGAGTCGGCTGCCGAGTCAGGGTTTGTTGGATTTTTCAAATAGGCTATAATGGAAGGTT -AGAGATGTATGTATACCTGAGATTATAAAGGGATACTGACCTTCAATGTTTCGGCCCTTC -GTTCTGGCATCCTCCCGCCAGCGCCACACAGAATCGAAAAGCAGGGACATGCTCATCCGG -CAGCGTACCTTGAGTCGCAGGGAGTCATCAACATCGGTACTATTGTTAGACCGGTGAGTC -GCCCCTCCCAATCTCCACATCTGAGCCAAAACTTCAGCAAGCCGTTCGGGCAGGTCATTG -CTGGATGCTGACACCCCCCGGATGGCCCATATTGTACGGTTGAATAGGTTCCTGGTATAT -TCCATGTCTATGTGAGAAGCGAAGAAGCTCTTGCAGAGCTTCAGAAGAGTGCAACCGGCA -GCAACCATCATCTGGTAGACATAGTATGGGGTGTAAGACAGAACCGGGCCAACGTCAGTC -TCGAGGTTCAGGACAACCTCAAGGAATGATGTCGTCGCGACATACAAAGATAAGAGGCGT -TCGCGATAATTCTTAGTGGTAGGCTCATCAAAGAAAGCCGAAAGGTGCAAATGGAGGTTA -GCAGCACGGAGGAACAAGTCAGTAATACCTGGAATCGTTAGCCGTGCATCCTATGTATTC -CAACTTGAAGTTTGGAAGCTTACAATCATTCTGCGGTTTGAGTTGTTCTTCTAACTCCTC -GAAGTCTCGCGACAAAAAGGAAATCATTGTTGAGCGTTCGTGGTCGCTGGTTAAACCAAC -AGGATCTCGTCGATTTGTGTAAAGTGCTCGGGTGACTCTGTCACAAAACTTTTCGATCTC -CAATCTTGGCCTTATATCTTCAGGAAGCTTGAAGTTCGGGTCTAGCGAATCGCTCGAGGA -CAATGTCCAATCGTAGAGTGTAGATGATGGCTGTCCATAACCGGTGGAAACTCTGCAAAG -TAAAATGTGAGACAAAGGCGAGCAGTAATTGACCATGGGTTTGTACATACCTTTGAGCAA -CGATATTGCAAATTGCCCAAGTTCGCACCTTGTCCTTCAACTCATCTTCAATCAGTTCCA -CTCGGAATTTGCTAAAGTCCTGGGTATGAGAAGGCCGGTGGAGACCAAGCTGCATAGCAA -CCTGTACCATCATTCCACATAGCATGAAGGTCGGATCTGTTGAGGTGCTGCTTGTCGGGA -ACGGCCATGAGCACAGTAGAGCGAGGGCCTTCACAACATGGTAGCTCTGGGGGATATCTG -CTAGAGTGCTCCAGACCAGCCGGGTGACGGGACCAGCGAGAGAATTCAACAAATTTGAAT -CGCCTTGATAGCGTCGCGCACCGACACTGATGACTATCCAGAAAAGCAACGAAGAAGAGC -TATAATAATCCTCGGGGGACTGGTTTCGGTCCAAAAATGGAAGGAAAGGGTGGTAGTATG -TGAAGAATCTGGCAGAAACAAGTTAGGATCCATCACGAGGACTTGACAAATTAGGACTCA -CATATTAAACAGTTCTGCGATTTTTTCGGGGACAACCATAACATCCTCAATTCGTTTGAT -TTGTCCACCATTCCTCATGAAGTTGGCACCATCGAAGCCAGAACGAAGGTCCAGGAGGGA -CGCAACAGCTTCTTGGGATCCCATGTACTGATCTGCGGACATGGCTGATGGGGTTTGATA -GACTGCAGGGCTGACTTGACTCGATTCTTGCTTGGGGGTCAATTGTCCTGCGGTCAATGC -TCGTTGCTGTGACATTCCAGGGACAGATTGGCCAGTGGCAATCTGCTTCCTGAGCTCCAT -TATTTCCCTCTCCATTTCCGCATTTCGGCTGCGCTTCCCAACACGCTTGAAGTTGGATTC -AATTTTACAATCAAGCCTCAGTCGACGGCATCGCGAACAGTCGGTCCATGGATCTTGAAT -AACATCGCATCGGAGCTGTGGCAATCCGAAAAGGTTAGTCAAACCGAAGAATCGCGAGGA -CTTTGCAGTGATAAACTAAACCTACCTTCTGTTGTCGACATTCATTGCAAGCGCGCTTCA -CTGGTTGGCGTCGCTTTTCCTCGCGCGCATCATTATCATCCATATCCTCCATGCTCCCCG -GAGAACTGTCGATCCCCGCAATTGCCATATTGCGTTTGCGACCATGCACGCCATCGGCGG -CAACTGAGACTGGCAACTGATTTACACCCGCCATGACGGGACTGGTGCCAAAAGGAGCAT -TATCAATGCTCGAAATCGGTTCAAGAGTTTGGGCCTGGCCCTCTCCGATGCCCTTCGATC -GCCCGAGAGAAGCCAGGCTGAATGACGTTTTTGCTGGTCGAGATCTCTGTTTTGAACGGC -GAGAGAGCTCTGGACGGATTCACAGACACCGTTGAGCCTTGATCCAGGTATTGCAATTTG -GTTCGATCCTTTCTTTTTCCTTGTCTTTCTCCAATAAACCAGTCTCAACTTTCTTCGGTT -TCTCACTCGGTGAGCGCACTTCTTCCCGGAATATCAATGACTCAGCACAGAGGGGCATAT -CACGTGACTATATATGGTTCCATATCCCACGTGACAGTAGACGGGGCTGTACATTGAAAT -TATTCTTGTCATGTCATCATCAATACGAAGTACAAGCATATAAAATAGAGACTAGAAAGA -CACTAGAAACTCATGTTCGCAAAATGTAGTGCATCTCCACACTTGATCAGCAAAAAGTAA -GCCTTGGGGCCGGACAAAAGCCACCAAGGCAGCAAGGCGGTGCAATCGCGCCCGGCTCCG -CCCTAGATTTTGTATCCAATCCAGTATGGGATTGAATAAGACGTTTTCAGATCATTGGCT -TGGAAATATAAAATGAAGCTATTCCTTCTCCATAAAAGCTTTGTATTCCGAGGAGGCTTG -GCCAAATGTGGTAAAGGTCTGAACTATCCAGGGGAATTAGTGGCTATCATCATCGTTACA -TCTCGTCTAACTTGTCCATTTTAGGCAATGAAAGATCAGACAGATAAACGCCCTGCTGAC -TTTTCTTCATCATACTCTACTCAGATACGAATGAGATACTCAGTGTTCACCAACTAATAT -AAAAGCAAGCCCGACCACACCTGTCAGACCCCAATAGCTTAATATCTTCTTTGTAACATA -CTACCTCTATTAAATGCCAACATGGGCGTGTCAACGGATAAAAAAGCCAAAGTCGCACCC -AATCAATCGGAGGCTCTCAGCGAGTCTTCCAGTGGGCGACCACCTTCCTATTCGGCCGAC -AAGGTCGAGCAGCAGCCGCCATTAGAACTGCCACGGTTGCACCTTGGGCCCCCAACATCT -TCTTCAACTACCGTCACTCGGGATCAGTGCGTTGCACATCTCAAGCTTCTGGCTGTGTTC -GCAGACCTGCGAGATACTATCTCTAGTGATGATGGCCTCTTTGGGATTAACGATGCCGAA -GCTGAGCGATTCCCGGATTCGCTGAATGAAGCACGGGCTCGTATTCGCGAAAAGCGCTGG -GCGGTATATACAGCGCGTGCGGTCGACCGATATACCAAGTGGTGGTCTATTGGCCTTCCC -CGGTCAAGGCCGATGGCAACTATCAGTGACTTGGAGGATATTGGCTATGAAAATATCATT -AATTGTGATACAGTAGTTGCATGGAGCCCGGACAATCTACCCCCGCTAGGTAAGTGTATT -AATTCTTAGGCGAAAGGGCCATCGACTAATTACTCTATAGATATTTTAATGGTCTGGCAT -GCTCATTCGTTGAATCCCCGGAACTTCCTTGAGGATTGCATTCGATATGGCAAGATTAGC -ACCTGGACAACGGGATTCCCATGGGAAGTGATAGATAGCTGCATCAACAACCATACACTC -GAGTATACAGTTTCCGACCAAGCCCAGAGTCAGTTCGAGGAAAAACTGAATCTCAAGTGG -AATAATCTGCACGATCAGCCAACCAAGGAAGTCAACTGCCCATGCTGCCAAAAAGTAAGC -GCTGTCCCGTGGACAGATGCCTACTTTGGAGACACTGTTGCCAGCGCCTTCAAATTTGGT -AATGGCTACGCCGATAATTCATTTGAAGTGAAATGCGTCAGCTGTGACCACATCATCGAC -CACGGCCGGCTCAAGGTGGCAAAGTTCCGAAAAGACCTTACAGAAACCCTTTACGACGGC -TTCCCCATGCCCGGGGCGTTCACCAACCTCCAGGGAATTCCCGAGGGTCCAAGTGTTCGG -TTGAGCCACCGGGCCATCCGGTATATGTTATTCCCCACACGAGTTCTCCAGGCTGCCGGA -AAGAGCTTAATGGAATTCACTGATGGCCGAGTGGATTGGTGTCAGAACGTCACCCGCCTG -AAGGAGGAGCTTGAAACCAAGCTACGCGACCAAAAGATTTTGATGAGCGCGCATGGTACC -AACCACACTGTCATCCGGCCTGCCGAGAGGATCCACTTCCGACGAATGATGTCTCGATAC -TGGGAGAATCTCGGTCCCTTCGCGCTAGACCTTGTGGGCGCCGTGATCAGACAAGGCACC -TTCGTAGATAAGATGGACCAAATCGACTGGCTACACTCGCCCACAGTCTTCGACACCATG -GACCGACTGATCAAAAAATACCAAGTGTTCTTCCAGATCATGATGGATTACCCAAAGAAT -ATGGCCGTGCCCACACTGGACGTCGACCTGGCCTGGCACACACACCAACTCAGCCCATCG -TGGTATTATAATTACAGCACATTCAAGTCAACAGCTGCTGGACTTCGTGTATTCATCGAC -CACGACGACAAAGTCGACGAGGGAAAACTCTCCGACGGCTTTGCCTGGACCAGCAAAATG -TACCGCCGTGCCACCGACGGCGGCATATACAGCGAATGCACCTGCTGGTACTGCGAGGCG -ACACGAACACCAGACCTGTACGACCGCTTCATCACTGTTGGCTCAGCCTCTCGTGCCCGA -GCCGCTGCAGACCTCTTGCACGATCGCCCCGATATATCGTCCGACCCAAACAAAAACCCG -CATATCTCTTCACACAACGCTGTCCGGCCAACAAACCTCTCCAACTCGACTGAGAGGGCT -GGCGCTCTGAAACATATGCGTTTACAAAGCAGCTACCGAAAGGCAGCTCGACGTGCTGAG -AAGCGTGGCCGTCAGCGATCCGACTCCAAGTCATCTAATAATCAGGCCGACATCTATCCA -TACTACATGCCTTATGCATATGGGTACCCTATCATGGTGCCATATTATGGACCGTATATG -ATGGACCCGTCCATCAACTGTGATTCGTACGCAAGTAACCCTGGCTGCATGAATGTCGTG -GCAGGGGCCGCTGGAAACTGTTGTGCCGGCACGTGTGGTAAGTTCTTCTGGCACCCTTGG -AGAAGTATTAGGTGAAACTGACTTGTCAAAAGGTGGTGCCGCAGCCGCAGGTAGCTGCGC -AGGAGGTAGTGGTAATGGAGGGTGCAGTAAGTCTCTTTATGAATTTCGATATACTACAAT -GAATGCTAATGTGTTTTCTGTATAGGCAgtggcgcttgcggaggaggaggaggtggtggt -ggtggtggtAAGTCTTTCTCATTGAATTCGATAGACATATTTGCACATACTGACTTGGTT -ACTTAGACggcggcggtggcggcggtggtggttgtggaggaggtggtggaggtggaggag -gagggggctgtggtggaggtggaggtggATGCTAATTCAATGACTGCTAATACCCCTGTT -TCTGTATTTGCTTCTGCTGTTTTATACTTGTGCTTCATTCCTTGATTGCTTAACGAACTA -GCTCAAGTTATGCTTATTGCTCATAAGTCCAGAATGCACATGATCTCCTTGATATTGTTT -CGCCATTGTTTTCATTAAGCTCCACCATTACTATACATGCCGTTGGGTGCTAAGTTGCTT -AGGAAGATGCGAAATTCATCGGACTCACGCCTTTCTGATTgaggaggaggaggaggagga -ggaggaggaggaggaggaggaggagAGATATATATGCCGACCTGTAATCGGTAGATGCCA -CATGGATACCCAGCTAGGTACATCCGAACACCCGCCTCAGGTACTAGAACACAATTGAAT -CATTAGCAACATGAAATCTGTATCATTTTGTAACTATCTGCAAGAGACAACGACAGCGTT -CCCCTAAAAGAGATCAACACCCATACTAGTTTCTATCAGAGCCGGGTTGTCACACATTCA -TTCAACTTCATAATTTATCCTACGTCTTCCTCTATGATGAAACAGAGATTCCACAAAAAA -AAGATGGACCTTTACCCCGTAGTGGGTCGTTGAGTCCCCGCTTATGATATACAGGTACGT -CGAGATAGAAATGACATAAGGATCTACATTCTAGAACAATATACGTTCATGAGATATATT -CATTGTATGCCCCGTCCGAGGGTTAGTTGAGTTCACAAGAGTGAGGGGCCATAGGTGAGA -GATATAGAAGAGATTGGGAAAGGAGACAGAGGGAAAAGAACGGACGAGATACACATGTGG -AAAAAGTAGGGAAGCGGATCGTAGGATTTGTGGGTTTGGTATATATGTGCGAGGTGAGAG -ATCAAAACAAAACAAAACAAAAAGACAAACGAGAAGCATAGAGGACATGGAAGGGTGTCA -TGTCCTTATGGATATAAGTATTTAATGGGGATAATAAGTATGCGCCGGCCATACGGAGTA -GGCCATTTATGGGGGTTTCCAGAATAAGCCGcaaaacaaaacacaaagaaaaaaaagcac -aaAGCGGGCCAAGGAGACATGATTGATACATGGGATCTCAAAAGGTAGAGAAAAAAGGTC -CCGAGAATTGTGAAACACTCCCGTTGAGAAAAGAGCTCTGCACGATTACTCGACATGAGC -TCTATTCTCCACCTGGAGTGGTTATAAACATGTAAACAAGAAGGTTCAAAAAATTCGCTC -AAGCTCGTACATGAGAATCGTGAAGGGGTCATTCGATATAAGCCGATTTGACCCAAGGAT -TTGTTTGGAGGCACGCAGACGCGCAGACGCAGAGGATAATGGAAAATTTACTCTTCCATG -TGGCCAAAGACACCGCCATCGTCGTCATCGACCGGCGCGGAGCGGTTACCAAAGTCTACA -TCGGCGTTGCGACGTTGACGAGATTTCTGGGCATCCTCATTACCAGCATCCTCATTACCG -GCAGTCCGAGCCTCACGCTCATCCGACACCTTGTAAGACAGCGACGGGTTGAGCGATCGG -GCCAGGAAGATGTTGACCTCGGAGTCTCGGCGAGCGAACTCCTCTGTGGGTCGTGATCGA -GGGGAGAAGAGACCCTGGTTGCTCGGGTGGCTGCCTGAGTCCAAGTAGCGCGCCGGCAGT -GGAGGCACTGGTGGCACGTCGGGAGTCATGGAGACAGGCCCCGTGCGAGGAGTCCCGAAA -TTGCACAGGCCGATGGCCGCCGCAAGATCTGCCTCATCATCCTCGTGTAACCGTGTGTCC -GCAGTCGAGGGACGGGTGTCGATGCTCGAGTGGCGGAAGTGGGAGAATGTAGGCGAGTGC -AGCGGAGCACTTTCGTTGTACGAGCTGGAGGGAATGGACTGGTAGGAACGAGAGAAAGCG -GAGGCGTTGCTGACGCTGGGGCGTTTGGCCCACTGTGCCGGCATTCCGGACATCTCGAAG -TCGTCATCATCCTCCGAGTCCATTGGGGGCGGCGTGGTCTCAGTGGAACTGATGCCATCT -CGCATTTCTGATAGGGCGTCCGGCGATCCAGACGACAATTCCGACTCCAACTCGGCGGAC -TCATCGGGTGGATCATCGTGGTTCATGGTGACCAAGACGGAGGCGGCCTCCAGCAGTTGA -ACCTGCTGATGTTTGGAGATAAGAAGCTTCGAAGTAACGGACCATGCTGGGTCGTGTTCC -CACCTAAGATACCACGGGAAAAAGACACGTCAATCAAATGTTGCTGAAAGGGATGGAAGA -AAGGGGAGCAATGACGAATGGGGAGTTGAATAACCACGAGGGCGACCTGCACAGAGCCGA -CGTGGCGCATATTCCCGATCGAGCTAGAGGAAGACGGTCGAGCGGTAGGGGGGATACACA -CATATGCTTAGACAAGCAGCTGCCATGCTTGTACCCTTTCCCGCAGCGATCGCAACGTAT -GGCCGTTGAGGCACGCTTCCCTTCGGTCTTGAGTTGGGCACCTTCACTGGCTCGGCGGTT -TCGGTTCTTGGGCTTGCCATCGGAGTGGTCCGAGCCCGGAGAACCTTTGCGGGCGACCGG -TGATTCCTGACTGGGGCTGTTGATGCTGCGGGAGAGGTAGGAGCTGAAGCCAGATACGTT -TCCGACGGAGCCGGACTCCTGGCCCTTGCGGCCTGCGAGGCTGCGGCGGTGCGCACCCAT -GGGGAGCGAGGTAGCCTCCCCATCCTCCATGGAGGCAGCGACTGCGGCGGCAGCATTGGC -AGCGGCGGTGGTGACACTCTTTCGGCGAGTTACTCGGTGGTTTGGATTGAGAGCACCGAG -CGAGATCGAATGCGCGTGTTGATGCCGCTGGCTCGTATTGGGGAGAGGGCGCGACATGGA -TAATTCCTGTGAATGGCCCAAAGAGGCCGCTACATTAGTATGGTCCGAAGAAAAAGTGTT -GGGCAAGATTGTAATGCTGGACGCCTACCTTGAATGAGGCCATGGTAAAAAAAAAGTTAG -GAAGCTTCCAATGGAATTGATCCTCTAGGACAGTTGCCCTCAATTCAGGGAAGATGAAAT -AAGGTAGAGTTCCGGGACGGAGATGTATAGGTGAAGAGCAATGCGAGAGGAAATGAGGAA -AGGAAACAAAAAAATGTGAGGGAATCCAAAAAAAGAAATGAGGAAGGGGAAAAAAAAGAG -AAAGGAGAGAAAGAGGGCCGGTATGAGCCAAAATCGCTCAGGTTTGCGGGGGCCTTTCCC -TTTGAGCATGCGGGGGGGGGAAAAGAGGCGATCACCGGTCACACATACAGTTGGTATTTT -ACTAACTTGGAAGTTATATATGACTGTGGACTCTATTGATATCTCTATTACTTTGCTACT -TGGCTACTTTGCGATATTCCTATATTCCCCATCTTGTAGAAGTGTCGCGATGACTCTGAG -AGTAGAGTGGATCCACTTATCTAAAGACATACATAATGTCCAAACCCTTTGAGAGCGAAT -TCTCCTATGGCGAATGTTGGGCATAATCTTTTTTTTTTTTATCAATGTCTTATAATACCT -GGCGCTCGGATGGCGTTCCAGGGGAAGGGTAGTGCCATGGATCCAGGCTCAGGGTATAAG -AATATTATTTCAATTCTGGATATATCAAGGTTATTATATAACATTCAAATGTTACAATTA -TATATGGGAACAGAAAAAAAAAAATTCTCAGTTGCCTGACGATCAACTAGGGTCACAAGA -TTGAACAACTGGACTCTGGGTCCAAGTACCAATCAAGTTACTTGGTTCCCACCCGGGTCA -TGACGACTATGTATTGATAGGAGATGACCGCAAGAACCTGAACTAGTGTGACTTGGAAAG -TACCAAGTTCCCATGTATCAACCAGATCACCTGCATAGCATCGGCCGGTCTAGTACGTGA -AAGACCACTGGATGTGGTGGAGCCAATGATGTCATCCGGGCTTAGTCAATGGAATCACCT -GATAACGGATACGGATATAAATTATATAATTTCGATTTCCCTACCTGGGTAGTTATACAC -TTTgattacggacaggagcacagggagtccggagtacggagtacaggaagtacATAATCT -GTGCTGTGGCAAAGAATTGTGGCTTATCGTCTAGCCCAAGGCAACATCCGTTCATTCCTG -GAACCCGTGGACAAGTGTCAGTTAGGTAATCGCTCAGGTCACCATATTCTACGATCAGGT -CGCGCCATCTCGCACGCTAACGCCTGCAAGAGAGGATCATTACTCCGAGCGTCACACTAC -CCTGTTGCGCACGAGCACTTAGGCGGAATTATTTTTGGCGCCCTTCAGGGCCTGCAAAAG -AGGAGGGCCATCGAACCCCCCTGGCCTTTCTTGATTCTGGCACATTCCTAATGTTCCTGG -CCCGCCATTCTTGCCCTGCGGCTGAGACCCCCGGGACATTCCTCTTGTCAGTATTATTGG -GCTAAAAGGTCAGATCATTGTCGATCCAATTCTTAGAGCTGCCGGGAGGTACCGTAGCTG -CTGAAATTATTTTATTCAACTTGTTTTTTTTTTTACCACGGAAACACAACCAGTCCAATG -CAAATGAATTTCATATGCCCCATTATTCTTTTTCTCCAGTTCTGTGTTCGCATGACGTGG -TCGGACCCCCAGGGCTGAAAAGCGCCGAGGTGGAGGAAATCTTACCTAATATGCCAAACA -GTGCAAACATGTAAGAAAATCCTCATTGGAGATAAGAGCGATAAAGATTAGACCCATGGA -TCCCGGTCGCAACAGCCTCACATAATAGGTCAATTGACTTTTCAATACAAACTCCCGGAA -GTCTACGTTGCATACCAACGCATCCAATACCACCATAGAAAACACCAAAAGACTTCCTTC -CAAACCGCCGCAATGCTTCATTCACGTCTACGGCCCCTCCCCCGCCGCAACCACCCCTCC -ACACCCAGCGCCCCAGCGCCAGAATTCCAAAATCAAGACACCAACAGCGAACCCGACGAA -GAACCCGAAGATCTCTTCGGTGCATTCCTCCCCCATCTCTTCCCAGACGACGCTCCTTCC -TTCCACGGCGACCCAGGCCAACACCTCCTCTATAGCTCCCCGCGCTATGGCGACCTCAAC -ATCATGGTCCCCAGCTACCCGAGCTCAAGCCAAAGCCAGAAACGCTCCGAAGAGATCGCT -GCAGGCCAAGCCAAGCCAGATGGCTCAGTGAACCAAGTCGACGAGGGACGCAAGCTCTTT -GCGCATTTCCTCTGGAGCGCGGCGATGGTCGTTGCAGAGGGTGTCGAGGAAGCGGATACG -CCTCTAGCACCAGGTGAATCAGAGACAGAAGCGCAAAAAGAGAACCGGGAGCTGTGGGGT -GTCAAGGGCGAGAGTGTGCTCGAGTTAGGAGCTGGTAAGTGGTGATCGGCCTTTTACATT -CGTTACAGTTGATACTGCACCCCTCAACTCTCAATATCAACTAACGCATTCATATCATAT -ACAGGCGCCGCTCTTCCCTCCGTAATCTGCGCCCTAGCCAACGCCTCAATGGTTACAGCA -ACTGACCACCCATCCTCGCCTGCGTTCAGCGGCGCAATCGCTTTCAATATAGAGCACAAC -CTAGCGAAGCGCACGCCCGAGGTTACGGGGAAAGTCTCAATGCACCCGCACGAATGGGGT -GTGTTGGATGACTCGTTCGGGATGGCGAATAAGGGTGTCTTCACGCGCATTGTCGCGGCG -GATTGTTTCTGGATGCGCTCGCAGCATGAGAATCTCGCGCGCACTATGCAGTGGTTCCTT -TCGCCGGGTGGAAAGGTGTGGGTTGTTGCTGGGTTCCATACGGGGCGCGCGATCGTTGCC -GGGTTCTTTGAGACGGTTGCTGAGTATGGGTTTGTGACTGAGAGGATTTATGAGAGGGAT -TTGGTTGCGAGGTTGGAGGATGGGGGTGAGATTCGTAGGGAATGGGCCCCGGTCAGAGAA -GGGGAGGGTACGGAGAACCAGAAGAGGTGGTGTGTTATTGCTGTTCTGAAAAGAAAGGGG -GAGTAAATTATACTTGTGACTTTGATGATGGTATTAGGTGCATTTTTGAGCGATCTATCA -TTTCCTTTGCATGATGAGATGACGAGTATACATAGATGAGCATAAAATAGAGAAAACTGT -CTTGGAATGATTGAAATGTTTCTGTATTATATCCATCATTTCTATACTGGCTATCTGCAC -TCCATCCTTCTTTGCTTGGGGGTATAGTTCGCATGGTATTCCGCAGACCCACTCAAGTAT -CAAGCATCGCCAACCCTCTGGAAACATACCGTACAAACTCCCTGTCCTTCATGTCCTCCA -AACCGGAACCGAGGATACTCAACTCGTCCCCAAACCGCCAACCGATCCCCACTTTTCCGA -TCACCAGTGAACCAATGCACCAGCTCTTCCTCAGTCCAGTTGCAGGTAGTAACCATCAAA -AATCCACCACGACGCACTAAGCGCCGAGCAATATCAGGATATCTCTCGCAGACACGAGTG -GTTTTCGCATCATCAGCTAGCGAGACGGCATCGAACGTCCCCTTGTCTAGCACAATATCG -AAACCACCTTCCTGGTAGGGAAACCAGGGGAGTGTTTCCTCGGAGGCAGAGGGCGATACC -TCGAGACCGGTCTCAGAAAGAAGGGCTTTAGAGCCGAGGATATCCCACTCCTCAAACCGA -ATGGATGTTTCTTGTTCCGGTAAGTCATTAGATCcagcctcagcctcgacctcagcctct -gcttcctcttcatcctcctcttcatccgaatcctcagAGTCACTTCGATAAGCAGAATGC -CCTTTCACACGCTGCAACTCCCTCGCAAGCTCAACACTCTGACGCGAGTAATCAACACCA -ACAAGACGTCCCGTATACCCGCCCCTCTTTCTAAGCAGCGCAAGCATACTCCCATTCCCA -GTCCCAAGATCTAAAACACTCGGCTGGCGGACGCCCTTTTTGTGCGCTGTATTGCGCGGC -GCCAAGGGGAATGACTTGCGTGTGAGGAAGTGGAGGACTTTTTGCGGGGCGTTGTGTTCG -GAGAACCAGGATGTGCCAGGGTCATCTTCTTCGTTTAGGTCGGAAGTGGTGGATGCGGCA -TCGTCATCATTTTGATCTTTGTCATCGGTCTTGGGGTCTTTGCTGGGAGCTTCGTGCGAG -ATGTGGGCTAGTGTGCGGGCGTAGTAGGATTCCCAGCTAGATTTGGATTGTTAGCTTGCG -TTTGAAAAACCGGTTAAATATCAGAGGTAATTTTATATATCAATTGGTAGCCCAATTGAT -CTATTGCTGCATTTGCTGTGGGAGATTCGTTGTATACAGCACCCATAGAGGGGCAGAGCG -GGGGTTGGGAATTCCCCGTTCTTGCGTCGAAGAATAGCAGGGGGTGCGGCTAGAAAGAAC -ACATACTAGTCTTTTGTACCCAGCTCGCTAGGCGAGAGGTATGCCGGATGAGATTCTTCA -GCCATTCTCGCTAAATGTGGAGAGGATGTGCAAGGGAAAATTTGGGTTGGAGGCCCCGGG -GTGTCCTCGGGGCTTATCGCTATCTCCGTTCTAATCAACAAATAGCCCGAAGCCACGATT -TGGATCTACTGGAACTAGATTTGATGCCATACGTGTTTACAGAGTCATGAGAAACAATTG -AAGCTCAACGTCGAACTCTATTGTGAGACTGCGCAAAGGGGGTATTCAAATGCAATCCCT -CACCCATAAAGCATGTCACGAGGTCCAAGAGGAGCCGCGCAGCTTCACCACACGGATGCT -GAACACACAGAGAAAGGCACAGAAAGGCCTTATCTAAGTGGCAAGACAGAAAGATAGAAG -AATAACATGAAACAACCAAGCCAAAGACGAAAGCCAAAGTAAAAGCAGCCAAAAAACAAG -GATCCGCCGATAATATTACAGGTACCAAGCGGAAAGAAAGCAGACAGGTCTGCTTCATCT -TCCAAGACCTGGCCTTGGTATCAAAATCCGTCCGTTTTGGACACCCCAGACGCCATGGTA -AAAAAAAAAAAGAGAAAAGTTTCAAGCGACGGTGGCCAGTAATGGCGTTGGTAAAACCCG -TAGGTTCAAATGACAAAAAGAAATATGCAGTAAGTATGCAAGTTGGGAAAAGAAAAATGC -GATGAGCGACAGCAAAGCCTGCTTTGGTCATGAGTATCATCAAGCCAGCCATCTGATTGA -GATGGGTGGAAGAAATTAATAAATGAGGATAGAAGGGTGGCATTGACTCCCGGGGCATGT -TGCCCGGGGTCCCCGGGGGTGGCTGAAAAGATGGTCTCCAAATCAGGAAAGAGACGCGGG -AAAATATGTCGGGTCATGGAATCGTTCAAGTCTGGTCTCAAAAAGGACACAATGCGGGGA -GATCACAAGAATTCATCAATTGATCTCATGATCAGGGGCTCGCCGCCGGATGGAGGCGAG -CGTGGGTCTACCAGTTCGGCGGCAGACCGCAAAGCGTGGGGGATGGGAGGTGGAGAGAGG -GCTGCGGAACGATCGATCCTGTGGAATGAGGGTGGCGCCAAAGGAGTTGGCGAGAAGAAT -CCGTGCGTCCTTGGAGAAAACATGCCACTAGAGTCTGGCGCATCACGTACAAACACATGT -GTTTGAACGAGCGGGTCACAACTGCTGACGCTGCGCGGCAACCCTCGTGGCCGACCCGAG -GCAGGTTCCACGGACTGGAACTTTTGTCGGAACGGGAGCGGTCGGGGGGACAGACTCCGC -TTAGGGGAGTAGGACTTGAACTTTGTGTAGCTACTGCTGTCGTCGCTCGTAGTGGGACGC -ATCGAGGTCGTTTCTGGGATAGAGAGGCAGCTGGCTGAAGGTGCCTGCGGTAGTCCGCGG -AAGGATGGCCGACTTGTTTCGTTGGATGCTGGGCGCTCAGCTCGTGCAACCTGGGGCTCT -GAGCTTCGCCGACCGGTTTGGACGAACCACTCCTGGGGTGGTGGCTTAGATGTAGATCCC -CGCAGCAATTTAGAGCGGAAGTCTGTGAGTTGATAGATGAGACTCATGTTCGGGCCAACC -CACTGGCTGCGTCCCTTGACTGCTTCGTACATCGAGTTGAAATCCAGATGTCGGTTCTTG -TACAATCCGTAGGCAATTACCAGCGATGCTGAGCGGCTGGCGCCTAGCTGGCAATGCACG -AGAACTTTCTTTCCGTCAGCGATGCGGTCGTCAATCAGCTCACATAACGGGAGCAGGTCT -TCGAGAATCTCAGAGTTGTGGTCCCAACCGACATGGAGGTACTCCGGTTCCGAAGTTGTA -GGACTTGAGGTCGTAGGGGTTGGCGACTCAGAATCCGAGGGTTGAAATTCCCAGGCAGAC -TTGAATGAGAATTCGGATATCGCGGTCTGGGGCTCCCCGCGGGAGAAGCGTTTCGAGGCC -ATGGAGGCGTTCCGCCATGTGCTCATCACTGTGTCATTCCGCTCTTCCTTGCTAGCACTA -AATGGATTCACCACTTCTTTGGCGACGTTGATCACCACGTCGAATTTAGACGCTTCCTCT -GCTGTTGGCTCTAAGTACAAGTAGACACCAGAGTCGTGAATTCGGATAGGTCCATCCGGG -TAGCCACGATCCTTCTTGCGGGTTGACTCGCGCGAGTCGGGGTGGTCATCCTCTTCTTCT -AAACCATGGAGAACACGGTGAGAAAAGCCAGATCCATCGACTGACTTCGTGGAGGACACA -TCCTCAGACGCACTAGACGAACGGCGCATACCAGGATGTGACATTAGCCGGGGAAGCTGC -ATCCCTCCCCGTGGGGCGAAAGAGGGCGATGTGATCGTTGGAAGTGAAGGTGAGGATTCT -GCGTGACGCAGGGATGGAGTTGGGGGCACGGCATCAGTGATACCAGATAGCGGCTTATTG -AATGTGGGGGTCTGGATGGTGAGATTGGCGCGTCGCCTCGTGCTCTTAGAAGGGATCAGG -AGGGGAGATGGTGCGGCGTCGAGGGGCTGTTTTGACGTCTCCGTCATTGATGCAGTCGCG -ATAGGTGGGCGGGAGTGAGATGGGGGCGGCATTCGCAATGACAAGTTCTTGAGATTACGC -GCCCGCCGATTTGGGGAGTCTGAGCGCGTACTATGAGGGATCGGCTGGGACAGACGCGGT -TCACCGACCGGCGCAAGGCGCGGGGGAAGTTCCATGGGCCGTGTGGGAGGCATGAACTTT -GGGTATGAGACAGACATAGCAAAGGGAGATTCCGGGGACGAGCTGGGAGAGACGTCACCC -CCGCCGGGCGAATCAAAGGGCGAGATGGTGGTGGTCGGGGAGGACTCGGTCGATGCCGAG -GTCATAGATGAAACACTTTCGCGATGGGGGGAAGGAAGTGGACGTGTATTGACAGGGACA -TGGGTTGGGTATTGAAGAGGATGAATGGTGTTTGCTTCTTCTTTGTATCGTGGAAGGGGA -CCACCAAATACAGACATGAAAGACGGAAACGAGTCCGGAAAGGACGGCTGGCTGGTTTGC -ACTGGCATTGTCCGCGGAGTTGACACTGTGAAAGGAAGTTCCTTTCAATCCGGCTCTCAA -GCTCAAGAGCTCGAATGGCAATCGAGATCTCAAAGATACACCTGCTACCTGCGCAAATGG -GTCTATGGGGAAGAGAGTTAGATTGAATCTAGGGCAAGTGAAATGCAAGGGTGCTTGAGA -CTACTGATCTGATGTAAATCACAAGGGGAACGATGAGAAGAGAACGAACATACCAGATCA -CAAAAGAAAAGAAAATGAGAGATGATCAAGTTGAAAGTGGTTCCTCCAGGGAACTGGAAG -GAGAGAATAGATGTAGAAAGGGAAAAGGGAAAAAAAAAGACTTGGACTTTGGAGATCAAG -GTAACATACAATACAAAAAAAAAGAGGGGACCGATCCAAAGATGGAGATAACACCAATCA -GTGGCCTTTGCTCTAATTTTAGCCGCCCTGGAATGTTTAGTCGGCGTCAAACGAACGCAA -ATGAGAGGCTCGATAGTGTGCCTCTTCCTACCTTAGTGTATATTATAATCTCTATCCGTA -TTTTAAACATCCACATTCGGAAAATAAATTCGATCCCTTTGTCCAGTTTTGTGCAGTCAC -CACGGCTTAGGGAGACGGGATCTGGTGGAGATATATGGAGTACGGAGTACAGAATACGAA -GGCAAGAATATTCCAAATCGGAAATCCCAAGTTACATTACCAAGTTACGGAGTTGGAGAC -AAGGCAGAATGTGAACGGGTGAACGGTGGGCTGGGCTGATCCATGGATTAGATGCCATTG -GACCATTCAACACTTTAAGGAGAATCCCCAAATAGGAATACCATGATATCTGATTGGATG -ACATCAGGAGGAGAAGGCTCATCCTGAAACCCCATGGATCTGGGTTTGGCATATGAAACG -CACCAATTTGGCACTAGTGGGACACTAGTGTATCTTGGCGGGTTGGGTCGATGTTGACGG -GGTATACAAGTGTTCATTTCATTTGTATCTATACTCCATATAAAGAGGAGAGGTACATTT -TAGTACATTTGATGGATATTCAGTGCCTACTCCGTACTCCGTATCCTCACCTTCATTGAG -GCGTACGGCCCACTACGTACAGTTTCAATGAAGGATCGGCCCTTTTCCACGTGGGAACAA -TCCTGAAACTGGATCTTCAGTTTGCGATATACCGAATCTGGAGAGGATATTCCAAGGTTC -AAAAATACACACATGATATAACCCAAAATCTGGTTTATGCTTAAGTCCCAGGTAGATGAC -CTCCTTTCGCTGGGTTCCTGAAATCAGGGAAGTCCAGGTGTTTCACCATCAAGTTCTAAA -AATAAGTGTGCATGGCGATAGCAGCTTACGGTAATTCTCGTGTTTCAGACTTAAAGGACC -CTCTCATTGGGTTGTGGCAAACGAGGGCTGAAGCTAACTTACTCCGTGGGGATGAAGTCC -TGATCACGCCTTTGCTTATTGATATAATTTGCAAATCTTATCATTCACCCAGCCCGTCTC -CAACATGAAGCTCTTCAGACTATTCCACCACTAGAAACCGAATGATTCGACAATATGAAA -ATCCAGACAAAGTACCAAATTAGTCTTATTCCATACTAATTGTCCGGAGTTAGCTGAGGT -AGATTGTAATTTCCACATGAATTTGTCACGCTAACTCAGTGGCAGTGCCCAGGTGGAAGG -ATTTAGATTACCAGGTAACGGAGTACATCACTATATGACCCTGGCCATAGGGATCTAGAC -AAAGCATTATTTATGTGCCAGGACCGACCGTAGGAATCCAAGTTGTGATGTTAGCGAAAC -CACAAAGAATATGATGTACACTAGAATAGAGATCACTAAATAAACCCCTTTATTGGCACC -TTGCTTATCTCCAAGTATAGCCCAATATTCTCACCCCGGGAGGTTGAGGACCGCACTACT -ATGGAGTACTCCGTACTCCGTACCTTGTTGCTTGACCTTTGGACAGCATGATCAAGAACA -AGTTATAACACCCAGAAGACTGTTCTTGGTTCAAGCAGCCTGCTTTGTCTTTTACCATTC -CTTCCAAGAATTGTAAGCAGCGTCTCATAATAAAATCGCATCATAAGATAATGACGTTTG -TTCACACAATAGGCCTACTATATGTATTGTAGAGTGCAATCAAGGGTCCCCGAAGAACCC -ACTCTGCTTCTTCCAAGATTCAACTTGGCACCAAAGTAATATGCAGATACTGCCTAGGTA -CCTCGATATAAACTTGCAGAACTTGAAGTATACCACATAGAGCAATTTGTTTGGGAAAAA -ACCGTTTCTGTCAAATTCAGTATTTGTTTCAAAGTAACCATGGTCTTCGGTGAGGCTGAA -CCATGGTTCAATGCTttgtttttttccttctttttcttcttcttctgcttctctgctcct -gctccttctccttcttttcccttttgcccttcttgttccatctcttcATATCTCAAATTT -CATGTAGAAAAGTCTTACAGAGTAGAAGCTACTAGACTAGAATAGGTACATAATGTCCAA -CATATTGGGTCTACAGGCCATTCAAAGTGGTATTGTCCATTCCCAATAAGATCCAAGTAA -ATGCGAATAGTGAATAAAGGCTCGTCAATATTGAAAATTTCCCACACAGTATACCCGCAT -TGAATATTCTGACAGGTCAAGTCCACAAGTCCTTACTGACATCCTGCAACCATGAACGGT -GCATATTTCTCACCCGTCGGAACTATACAATACTAGCTTTCCCCAGAATCGTTACTTGAG -AGTACTTCAGCCCTCTGTACTCTGTGCAGAGTAAGTTGTACAGTGCTACAGAGAGTACAA -AGTACAGAATGGCTGCAGGTTCCCGTGACCATTTTCAGCCCAACCAGGTCAATCAGCAAC -AAAATCAGCAACAATTCCAACATTTTCCTCAATCGCGTTCTTTGTCGTTGCACACTCGCT -TCGACAGCCCCACCGATGAAACAGCCAATCTAACCACCAAACGCTCCTTATTGCACCGCG -CCCGGCGATCCGTCCTAAGCTTCAGCTCACCCAGCCCCCAAGCTTCTGAATCTCCCTATG -CCTCGAAGAGGCCCAGACAGGACGAATTTCACCAAGATCGCCCTGATGTGAAAAGAACAA -TGCCATCAAGTCTGGCTTATTCTGGAGCAACCAGAACCTTAAATGACAATGCTCGTTCTG -TGAACGGTCTAAACGGTACTCGGATCGAGGACAAGTCTTCGATGGACGAGAGCCGCCCAA -AGAACGAGGATGTGTTTCTAAACATTGCGAGAGCGGATTCGGGCCGTCGCGACTCGATAG -GTCGCTCGGATTTGAGACGGGTAAGTGAGAATAATATAAGGCTTGAGCGGATCGCATTCT -TGATCGCTCGGGTATTCCATCGATCGTGGCATCCGCTCAGCCCATTCTCGCGACTTTCCG -ATAGCTAACGAGAATCTATGAACTAGTCACGGCTTGGTTATTCTAGCCAGAGTCTACGGT -CGCCAACGACAGAACAAACCCCCTCACCCGATCAGCGATACAGCAATATCGACAACCCCT -TGCATCTCCAAAATGACTCCCCCACTGCCCCCTACAGCTCCATGCACACACCGGCGTCAT -CTGCCCATCCTCTCGACGACTCAAGTCGCCTCCGCTACAGCAGCCTCGGATCTGGTGCGC -GATCCGTTGTCGGTGTTCCACGAAGCCGGTTCAGCCGTACGAGCCCAGAAACTTCACCGC -GCACACCTTCTGCTGCCGAGGAATTGGAGCGTCGATCCTCTCTAAATGATCCGCGCATCA -ATCGCCATTCGGGACTCTCCACGATTCGAAGCAGTCGACAACCTTCAGGCTCTGAAGTGA -CCGGGCGACCGCGAGCCGATACAGAACGATCGCGCGCTGATGGAACCGAGTCAACATTGT -CCACCACGGCCCCCTCGACTGTCTGGGACGAGCTAGATGATCTCAAAGATCGGATCAAAA -AGTTGGAACTGACAGGGAAGCTACCTCCCTCATCTTCGGCGGCGATGTACACCCCTACCA -ATGAGAGACCGCGGACTGCAAACACTGCCGTGACCACCTTATCCTCTTCGCCGAGGCAAC -GCCGCAAAACCAGCTCCTCAACAACAGATACCGAAAATAACCCAGTGCACCCAATTCTGC -AATCCGCATTGGCCAAGGCCAAAGTTGTTCTGAGCGGTGACGTGTACGCCTCCCTCGAGG -CGACGATAACCGATGCTTTGAACCTTTCCACCGCGTTGGGCGTCAATGCCGCACCGTCAG -GCACCATGTCCGTTGTGAATGGAGGATACACTTCACCTGAGCGACATGCCCGCCGCAAAG -CGGACAGTGTGTGCCGCAGCCTGACCGAGCTTTGTCTAGCCTTGACGGATGAGCAGCTAA -AGAATGTCCGACCGTCGTCCAGTCGCGAGACCGGCGTGCAGCCCCAGTTATTGAATGGCA -CGGGGGTGGATGCTCGTATGTCCATACCAACCTATCAGCGAAATGGGACTCAGGAGCCCG -AAGCCATTGAGCGGCGCCATAGCACGACTCGCATCTCCAGTCGCCTTGAGGCACGTCGTG -CATCTCTGGTCAATGCCAGCCCTGGAAACACGACGGATATGAAGCAGAGTCCAAGCCAAT -CTCCCGGCATGTCCAACCCAACCACCCGTCTCAATCGCATGTCAACTTCCCTCCGCAGCC -GCAGGCAGACCATTGGTGAGGAAAGTGGAGAGACTGAAAATCCACTTAGTCGTTCAGTCT -CCAGAGCCAATACAGTGATTGGCACACCGCTTCCTGCGCAAACCATACCTCCTCCTCGGC -AACGATTCTCCCAAGGTCAGACTGTGTCCCGCTCCATTTCCGATATGCAGCAAGACCAGA -GCGGTCTAGGATACTCCCCCCGCTCCCCGCAATACCAATCGTCACAAATCCCTCAGTCGC -AGACACAGCCATCAGAGCCTCGGACACCGACTCTCTCGTCCAGTCTCTCATTCCGTAGAA -GCTACATGAGCCCTGCCTCGTACACCCCCGCGACATCCCGCTCCAACATCCAAGCTGGGT -CTCGCCGTTACGGTCTGACCCCTAGCTTCTCTTCCAATAACATGCAGGCATCCCCCGTCG -AAGAAACTCTGCGATTGCCCCAATTGGAACCATCCCAGACCAGAATCTCCACCCCATCGA -GCAAAGTGGCAACCAGTTACACGCCAATCCAAACACCACGACTTCGAACAAACAGCTTGG -GGGCACGGAGATTCGGCCTTCGAAACCGTGTCTCGGCTACTCTCAACAATGTAAACCTCG -ACGACAGTATCGATTAACGTCTACCCCCCCCCATATCCTCCGGCTGAATCCCATCATGTA -TGCCGTCGACAAATCCGGTTACGTCACCTTCAAATCTTTTACGCAGCGACCCCACCACAT -CTGGTTCTCTTTCGGACATCTGCTTCGACATCGGCATTCTCGATATATGCATGTCTCATC -AGTCACGGTCACCAGTCACGGCAGTTCACCTTTTTGTCTGTCCACGGTTCCACAACATTG -CCCTATGCATTCTAACGAATCTCCTCTGTATTCTTTGATCTGAGACGAATTCACGCTGTC -AGCGTATTTGATATTCGTCTACGGGTATGACAGCAACGACACACCATCTCACCACCGTGG -ACTAGCATGTCTACAGGTGATTTGTCAAATTCATGTAAAAAACCTACCATTGGCGGCGTC -TCTTTGTCCATAATACTCTACCGCGTGGCACTATTTCATTTGTGTACCCGCCCTAGGGGT -TTTGATGTCACTTTACTTTCCCCTGTTTTTCTTTTTTTTTCCTGTCTATTCTTTTGTCAT -ATCTCTTACTTGCAGGCACGTTTTGTATCTAGGTCTGTTGCCTGTAGGCTGGTCTTTATT -TGTTTTCTTATCTCTTTTGTCCGTGAGTCACTGATGAgaatttacatgatgatgaaatgg -aatgaaatgagatATTCAAGACCTTAAGCAAAGTCGTCTTTTACAAAAAGGAAGCTCGAA -GAGAGAAATATATAACTCGATACGGGATAGGACGCCTCTAGACAAAAACGTACCTAGCGA -AGCGTCTGTAATCTAAAGCGATGCCATTTCACAACTTGTCCCTCGGGCTCTTACTACGTA -AGTAACTATATATAGAAGCGCAATAGAAAGTGGAGAAGAATAGCCGCAAGGAAGAATATA -TTATATGAAACGATATACCGTATCCCCCTATTAATATCAATTTCTATCCATCACAGTCTT -CTGCCACTTAGATGGGTATTGATCCAAAGGTCCTCGGCGTTTTAGAGGCGCGACTTTTTC -TAATATTTGATTGATAATATCAGAACCCAGGGCTCTTAGCTGCTTGAATTGAAAGCATGT -CATTATATCCTAGTATTTTGATCTCGAGGTCTGCGATTCAGCCCATTATATCATGTAGTC -ATTTTAAAGACACTATTCAAAGTCGACAGTCGTGAATACGCTAGGCATGCTCATTCGCAG -GGCAATCTACTGGCCATCATCTCTGCAGGGTGCTCTATCGAGCACCAGAAATATTCCATG -AAGCCATTTCATTTTGGGGATAGCCACACAGACCAGTTGACAATATATGGGCCATAAGAG -AGCCCCATCAGTCGATGGCAACTGCGGTTCTCTCCATCTCCCGTCCTCATGTCTCCGAGC -ATACGGCACTCCTCGTGTGGTGGCCAAATGTCAACCTATTACTGCTGACTAGAGCCACTC -TTGCCAACCTAAGGACCTAAGGTTATATGCCCAGCTGTCAGTAAAGAATGACAAAGGCGA -TAGTTAACCACGAAACTGTAGTACAATCTTAATATGAACCAGTTGGACAGACCGTTCGTA -CAAATATATCTTGGTAGATGTACCGAAAGGTCTACCTACATATAAGAGATCACTCCCTGG -AGTATTTCACGGGTTCCAGATAGTGGCGATGTCTTGATTGACATCTAACGCCAGATATTC -ATATTTGAGAACAGCCAAGGTAGTACGCAGCGAGGAGATACATGAAAGGGTAATAAGGAG -ACGGAAAGCGTTTTTTTAATTATTTCAAGGTTTCGTTTTCAAGGGTCAAGATACACCACA -ATAAGGAGAGGGTAGGGTTGGTAAAAGGTGATAGCGTTGCAGTAAATCAGGGATCGGAAG -GGAACGAGAACAGGGTACATAAGTCGAGGAGATAAGGGTAGAGGAAAGACAAGGATCAAT -ATACGTCACCAGCTATGAGCTGGTGACATACAAGGTGGAACGATGACAAGGAGGAGGTAG -AGACCGGGATGGTGGCGGAGAGGTCTGTGATTCAGGAGTCGGGGTAAAGACGGATTAAGG -ACATCACGGTGACAGTTCAGCTGGACGTGGGTAGCGAGGAAGGGAATGATGTGAGGCTGT -TGTCATTGACAAAGTTGAGGGTTAGGATCGAGAGACATTCGGGCTGCGATTCAATTCAAC -GCACAATGGTCGATAAAGACCACACAATCTTGGTATCTAATACGCATGTGTACTCTCCAT -CCATCGCAGTGAACTTTTGCCTTTTTTGTGAATGAGACGTGTGAGGCTAGAACCCCAGAA -GAGAGCCGAGGAAGAAGATTTAAAGCAGAATGAAATAGAGGAATAAAAAGAAAATAAAAG -AAGCGCTAGCACATGACCGCCGTTGATCGTGACCGACAAGTCTCCAAGGAAGTCGTAAGG -TCAGAGAACACAAAGGAGCCCTGCTAGGACCGTCTCATCCGCACACACACACACACTAGA -GAGGATCCCAGAGGATTTACTCCTGAGGAAGCTGGCTGCGGCGAGTCTTCTCAGCCTCTA -GGTTTTCATAGAGAAGGTACAGGGCCTTTTCGTTAGCCTGAGCACTTCCCATGATGGTGA -ACATCCGCTCGCCGGTCTCGTCATGAGGAGCCTTGGCGATCGAAATGCGAGCGCCAGAGC -TTCGGCGGATCTCCGTGATCTTGGTTCCGCCTCGGCCAATGATGCATCCAACCATGTCAG -CAGGAATGCTGATGTTCTGGGTCTGGATCTCCTCGCCATCTTCCGTAACCAGGGGATAGC -CGCGGTTTCCAGCATCGGAGTTAGACCGGCGGTTGTAGCTGCCATCGCTGAAGTCGGCAC -CATTGCCAGTGCGGTTATACTGACGGCTGTTGCTGTTGTTCTGGTAACCATTTCCAGCCG -GGGGGTTGTTGTTGATGGGCTGCGAGCCGCTGAGAGAAGCGCGCACTGCGGGGTTGTAAA -GAACAGTGCCGGTTCCACGCTGCCAGTCGTCAAGCAGACACTTGCCAATTTCCCAGATGG -CCTTCTCGATGCCCTCGGGGGTACCCTGGACCTCGACAATGCGCTCTGTAGATTGGGGGA -GCATTTCCTTCTGCGCGACCATCCGAACGCCCGATGCATCCTGAATGTGCTTGATCTTCA -GGCCCTGTCGTCCAATGATGGTACCCATCTGGTTGTGGGAGATCAGAAGGCGAATGGCTG -AAGAAAGTTAGAACAGGCTCGGATTGGAACGGGGGTTTTCGTATCCTCTGTATGGGTCCT -TTGAATTTCCCCATACAGAAGCATGAGTCCAAATCAAGATATCGGAGAGACAGAACTAAA -GACATCACTTACGGTGGGTCCCATTGTTAGAGACAATGCCGCCCATGCCCATTTGCGGGG -CACCCTCCAAAAGACCCTTTGCCACGATCGCATAGGCCCGAGCAAGCGAGCGCAATTGAC -CAGTAACGGTCAGGACACGGTCATGGACACCGGGAACTACCTTGCTCACACCGGCCTTGA -CACCGGTCTCGTCGCGGAGGTCGGCAACGTTCTTGCCAGCCTTGCCGATGATGACACCAG -CCTCCTTGGAAGAGACAATAGCACGCAGAGTCAGCTCAGCGATAGCATACTCCTCCTCAG -TCTTGAGGGGGGCCTCGAGGTCACCCTCGGCGACGTTGTCGTTGAAGTTATCGGTCATGG -GCTCGTCCACGACGGTGGTTTGGTGAATCTCAGTAGAATCCATTGTTTTTGATAATTGAT -GGAGGTGGGAGAAGCGGTTTGAAAGGGTCGAGTGGCGGGATAGAGGGTTTCTTTAAGACC -GTACGGTCGCGGGATGCGGACGGTAGAGACTTCACATTCAAATAGTGGCAAAGTCAGCTT -CTCAACAGAGGTAGAGAGAAATGCGCATGAGATACACGCAGATGCGCAAGCTGAAGTGGG -GGAAATGCGTGAAATGGGATCGAACTGCTAGAGGACTCACAGAGGGAGATTAGGAAGTGG -AGAAATGGATGGGAGGGGAAAGCGGCAGGATCTCGGATTTagagagagagaagagaagag -aagtgaataggaaaagcacccgatgagagaagggaaagggggagaagagagagagaaagg -aagaaggtgaggagaaagatggagatgtgaaagagTTGAAGAGGTTGAAGTTGAGTGTTT -TGCTTTAAGCGGAGACCTATGAAAATCTACCTCTTTTTTTTATATGACCTCTTGCTTTTT -TTATGATTTTTTTTTTATTTGTATTCTTCTTTTGCTTATTCGATTTTATATTACTTTTTG -GTGATATATAGAGGGAAATTGAGATTTATACCCCCCCTTAGGATAAACGTACCCCCTCCC -TGCCTGAGCTTTCAGGTGGAAGTTCCTAATTGGGTACCGCTCCTGCCTTGACACCTGGTT -CTGCCTACCAGTTATTTTACCGCCTTTTGACTCCAGATACCGCTTTTTGTGAAAGACCCC -ATGTAATCTAACTGTAGCAAGACAGCAAATACCTGTCCTTTAGATCTAAATCACCAAGGA -TATTAAGTGTGTCATGCGAGCAGGGATTTCTTTATGTTCAAAATGTAGGGCTTTGTGCTG -TGTAAACCAGCATTAATAATGGGCAAAATACATGGCAGAAAGAGATATGGAGTGGCAGAA -ACAAGAGCAGATTTTAAATTAGGAAGGCGGTATATATGATCCAGACAGGGGGTATCTGTA -TCAACACCCTACATAGTCAGATATTCTCTACTGGTCAGCCTGGAATATCACCTTTTGCAC -CTGCATTTTGTTGAATATCTCATAGCCCTCCACAGCCTGTGAGAGAGGCATAACATGATC -AGCCATGAAGCCAAGCTTATGTTGATTCCTCTTGAGCACTTCTAGAGCCTGAGGAGCAAC -AGACCGAACAGGGCATCGCCCCATCTGGACAGTGAGGTTCTTGGAATAGGCATCATTGCC -ATCCCAGGGAATCTGAGTATCTCATTAGCAGAAATATGATGGCCGAATCCAGATCTACTT -ACCTCTCCATTGTGAACACCCACACTACTAATGGTTCCCCAAGGCCGCAGCAGTTCGAAG -CCAGTGCGCAAGGCAGGACTCAGCCCAACGACTTCAATGACAGCATCGGCACCACGTCCA -TTCGTCAGCTCCTGAACACGCTTGTCTAGACCAGCCCGATCCAACTGGAAGTTCCATGGC -TCAGCACCGAGCGACTTAGCAAGCTCTAGTCTCGACGGGACCGAGTCCACTGCCAGGATA -TGCTTGGGTTTGTATTCCAGAGCATTGATGAGCGCACAGAGACCGACAGGTCCACATCCA -ATTAGGACTACTGTTTGTTCCGCAATCTGCTCTGGGGTCGAGTTCTTGAATGCATTGTTG -GCTGCGAACCAACCTGTAGGGAAGATATCTCCCATGAGCACCAGGTACTTTTCGTCAACG -CCCTCGGGGGCTTTCATGACAGAACCCTCAGCATTCGGGATTCGGGCCTATTGAGTCTTG -ATATCAGATTATTGAGCCAATGCATGATGTACTTAGAAACTCACATATTCAGCCTGTCCA -CCGTTCAAGCTGTCACATCCAAAAAGTTCGTTCTTATCACAGCGAGAAGACCACCCTTGC -TTGCAGTAGAAGCATTCGCCGCTATCATGATATTAGCACAACGTCCCTATAGTTGCGCAG -AACGAGCATCTGGTGAGCATACAACTTACCAGGAAGTAGTAAACGCAGTGACGACCCGGT -CACCCTTCTGGAGAGTCTTGACGGCACTGCCCATCTCAACAATCTCGCCCACGAACTCAT -GGCCCATGATGAATCCCGTTCCTGCAGGCTCCACGCCACGGAACATATGCAACTCACTAT -AGAATATTACGAAAATTAGCACCGATGGCCCTCCCAAATGAGTATAGGTCAATCCCACCT -TCCACAAAGCGCGGTATATCCCACTTTGACTACAATATCCTCCGGGTTTTGCACCTTGGG -AATAGGCCTCTGCTCAACAGCCACCTTATAGGGGCCGTGGAACACGACCGCCTGCATCAT -ACCAGAATCTGAGGGGGCGGGAGTCATTCTGACGATGTCGATCCCCAATATCTTACAAAG -AAATTCCGGTAACCGAAATCTGATAGCAGAGACTTACTTTGGGATTTGGCGGGGGAATTC -GGAGGTTGACCTTTGCTACGATGATAGTGAATACTGCTAAATTTCTTCAATATCGAACGA -TCAAATTGCAGACGGATCCGAATGGGTTAAAAAGAAGACTATTGACTAATTTCTCCAGTT -TTAGACTTCTGTGCGAATCATCTGGTGGGGCTGGTAGACTGTGCCCACAGTGGTTGGTAT -CCACTGAAAGCGATGCCGAACATAGGATACAAAGTAGATGGACATGGTTGATTTTCCGAC -ACGGCAGTCCCCTTCTATACGATATATGTGTTTGCAGATATAAACTCCAAGCAGACTATT -CAATCTTAAAGGTTGTGGATGCTACGTGGAAGAGAGATAAGCCGTCAAAGTGTATTCCAA -GGACGCGCCAGAAAGCTCAGCATGGCACTCAAGTCCTGCTCTGCATGTGTAGCTGGTTCC -GGTTCTAATTGGCCCAAGGAGACCAAACAAAAAAAGCCAAATGCTCCTTGTAACGCCTTT -CACAAACACCAAAGCTGAAAAAGAGGAAGCAAAGCAAGCCGCAAGAATCAGTCTCGAAAG -CATGAAAACGCAAGAATGCCCAGAGAATGCAAAGAATGCAATCCCATCTTTATCATAAAC -CCGAAAACTGGCCTGTCAGATGCACGAATTGAAGATCAAAATCGTCACACATCAGGGCCA -TCGCATGAGTGGAAGGAAGTATCACTCCCACTAACAAACGACGCAGAATAGCTCCTCCTC -GAGGATGAACTTGCCCTCCTTGTATTTCTCACTGCTCTCGTATGCCTTGTCGTATTTCCA -TCCTACCGTTTCCTTGCACTGTCTGCAGGCGATGTCGCGGACAATATGTCGACCTGTCGT -CATGCTGCGCTCTACAGCTTCGCCTTGCGTGACGTTGACAACTTTATTGAAGAGATATGC -TTTGCCGTGCTGGCCTCGGAAGTTCTAAGGAGTCGGAGGTCATGGGTCAGTAACAGGTTA -TTCATAGCAGGAGGTGTGTAGGAAGAGTAGGATGTGACGAGTTATCATACCCGACTCATG -ATGTCGTTAAAATCTGCCAGATGGGTCTTGCATCCCTTACACCCAAAAATCTTGTTGGCA -GTCAAGTAGACGTTGTATGAGAGACCCATAGTGGCTGTAGTGATGCGGTTATTGCCGGAG -AGGCGTAGGATCTGTTTCTGCGGGGATCTGAGAGTGCAGTCGTGGTCCTGAGAGGTGAGT -TCGAAGCAGCAGAGAGTTGACACTTGAATCTAGAGGGGGATATGCGGGCCCTTTATTGAA -AAGGGGCAGCGTGTTTGCCTGCTTTCCTCGATATGGAGTAGTAGTTGGGCCCGCCCTGAC -TTCGATTTGGGAGGACCTCTGCCGTAGTCCTGCTGTTGACCCCCCTAGTGGCTTTGAGGC -TTTATGCTTTGTCAAGTGTAGGGTTTCACAATTTATAAGAAAGTATGCTCCTGGTCTTGC -AGAGGGTCTATGGAAGAAGGAGGATGTGTCGGACTTCACAGCCCTGGAGCTGACGTGTGG -ACGATAAGCCGATAAGCGGATCGGAAGGAGAATCGGGATTCACGGAGACAGGAGATGGGC -TAGAATACTTTTCAAAAGATCAGCTGGCTCAGGGTGTACAAATGTTTCAAAGGTCAAGTG -AGCTGATCCCAAGTGAGTTCGGAGGCTTTAAGTAACTGTGCCTAAGGCCCCGCACTTGGT -CTCAAACCCCGAAAGCTCCCTTTGTCACTTCACTTGAGGTGTTAAGATCTACATGGGAGA -TACGAGACTGCACTGGGTATAGACAAGTTATTTTTGGGGGTAATCATCAATCCAGAATCC -CTGTACATAAGGCGCCGGTAACATGCGCTCCTCCAAGTATGCTCAAATGACAAAATTACA -TGTCTGAATCAGCTGCATTCTAATTTTTACAGAAAGCCCTTTCACATCTCTCTGCTTCAC -TCGTAATCCTTCACATAACGACCAGGCAGCTCAATGTCCCACATAGGTCGCTCCACCACT -GCAACTTCCAGCATCCCGCTCGGGCTCTGGTCGTCGGAGCCCTCGTCCGCCAAGGTGTTG -AGAGGTACGATGCCGAGGATATCACGATACTTATAGGTGTGCCACCACCGACGTGTAGGT -TGAGCTGCGACACCGTTAACCTCCACCAGTTGCTTCTTCGGGGTATCGCCAGCGCCATTG -GCACTAAGAAGTGATTGGTGCTTCCGAGATCCCTTCTGTGGGGTTCCATTTTCAGTTCCA -TCTCCACGTCGTAGGCGGGCGAGGGTTGACTCATTGGTGGCTGCAAAGGTTTCGTCGTAC -TCGTAGGCTTCGTCATCTTCGCCTGGGACGCGGGTACGTGCCTTCTCCAGCGCGATCCAT -TCTCCCTGTGATTCATCGCTGCCGACGATCTTGCGGACCTTGGTACCAAAGTGAATATCC -ATCTGTGACTGAGGGATCCGGTCACCGGCACCGGTATTGTGCTTCCTTCGCTCGTCTTCA -TCGTCATCGAGGGCTTCGCGCTTGCGCTTCGATGCCTTGGCAAGCTGACCCGCGGTCTTG -CCCTCTTCGATTCCAGCTTCAGCCTCCTCGGGGGAGGGGAAGTCTTGGAGCAGATCAAAC -ATCCACAGCCATGATGCTCCGTAGAGCCAGAGGCGCTCGCGGCCCTCAAAGAGATCCCAC -ATGCAGCCCATAGCTCGGTCCTTGACTCCTCGGAACTCTGCAGGAAGGTATGCCTTGGGA -TTGCGTCTTGACCAATCGGATAATTTACCATCCCGAGCGTCGAATTCGATGAGCTGGTGC -TCACTTGTGAGGACCATCAGCCGAGCTTCGCCAGACTGATGCGTGGCATTAGCCAGACGC -TTTTGTGTGGGGGCAGAATTGGGACGGAAGGACAACAAGGTAATGCCAGACTTCATACGA -GGAATTGGCGATTCGGTGGCAGCGGTAGTCCAGCGCTCGCCCTCAATTATGACATCCTCG -TCTTCATCATCGGATGAATCATCGTCGGAATCCGAGGCTCCGTTACGCTTGACCGCAGCG -TTTGCGGGTGCTGTCGCTTTTTCCAGGACCCAGGTGTCGACGCAACCAGAGATATCACCA -CTTGCCAGGACCTTGCCGTCATCCGAAAACACTACAGAGCGTATCGTTCTCTCATAGTCA -CCCAATGTTCCGTGGGATGCCTTCTCGTGTCGTACGTGACGTGTGGCTCTCTTGAGCTTT -GCAAACTGCGGCATCACCTGAGGCTTCTCATTAGATACAGAGGTTTGCTGGATCCTTGCC -ATGTATATGTCGCTGTTGGGGCGAACAACCGAAAGCCATCGGCCGTCAGGGGAGATGGAC -ACAAGGCGAGCGCCTTCATCGGAAAGCACCTTGGGAACTTCGATCTTCTGGATGCGCAGA -GCACCCTTCTCATCGCCCTTTCGCCGACGAAGAGAGAACAATTTGACCTCAGCAATGGTT -GCGATGGCGAGTGTCTTGCCGTCCGAGGACAGCATTGCGGATGAAATGTTTTCCTCGCCC -TGGATCAAAACTTTTCCGACTAGCCTATGCCGTTGGCCATCAATGTTCTCATGCAAAGAC -GCAGGGCCTCGAGACACACGCCACAAGCTCACTTCTCTATCCCAGAAACTCATAACCAAA -CGAGACGACGGCGCGGAGGTGAGCTGGGGAATCTGCGGGAGACTCGAAAGCTTGCGGTGA -TGTTCCTTTCCAAATTCACGAAGAGGAAGAACGATCGGTGTTGCGTCGGGACCTATGGGC -GAATTAGGTTAGCATCGCTAAGGATACCTCAAATTACAATGGAGAAAGTTAACATACCTC -CAGACACGGCAATACTGATGTCCTTTGTCTCGTAGACAGCAAATGTCTTGACGTCGTGGG -TGTGGTAACGACGATGCATAATCTCGGCCCAGCGCGCCTTCTTGTCGCCCTTCTCGCCCT -CCTTCTTGCGGTAGACGACCGTCCTCTGATCGGCACCGCCACTGATAACAGTGTCTCCAT -TGGCACTCACTGCAATATCAAGAGTATCGGCGAGATGTCCCTGTATCCGTTGAACGAGAG -AATAGTTCTTTGCATCCCAGAAACGAATTTCACCAGCAGAGTCACCGGAAACGATCGTGC -CATCCGGGAGACATTTAACCGACCAAACAAGCAACTCCTTCGGACCACCGGTAGGACCTT -TGCCTAGAGAGATCGTACGCAGCTGCTGACCGGTGCGAATATCGAAGAGACGGATAGAAC -TATCAGCATAACCGGCAACAATAGTATTGCGGTTCTGGAAGGTAACACTCAGGACGCGCG -CTCTTTTCGTTGAGGGACGCATCAGGCGCAAAAACTTGAGATCATTATCGGCAGTAGACA -AAATGACAATTGACCCATCTGCACATCCTGCAGCAAGGTGCTGACCTGTGAACTCGCCCT -CGGCGGGAGGAATCATCTTTCCATCTTTGCCTTTGGTCGCCTGCCAGCGAGGCTGCGCAG -CAAGACACCAAATCTCTCCATAGTTTCCACTGGAGTGGCGCAAAGGGCGGCCATTTTCAA -GATCCCATTCGGTGACGGCCGTAGAGTATCCAATGCTAAACAGACGCAGTTTTCCCGGTA -ATTTGGCGCCATCTGGCCCATCCTCCTGGGGATCCAGAGTCCATGCAAGTCCTTCGATAC -TTCTATCCTTACCTCCACGCATCACGGTCTCCTGGAACCAGGCGCCGCGCATCGGATTCC -ATATTTCGATATCACCGTTCGCACGACCGATCGCGAGTCGGAGGGTGGGTACGCCACGTC -CAGCTAGATCTGCTGACGGGGGATGCGAGAATGCAAGTGCATTGATCGCTTGTGGATTAT -AAGAAACGAAACGGCAACGGTGGATATCCATGTTTGAAGATAAGGGCTTCGAAGCCAGAG -AGCTCGTTTCGGATGTTTTTGAACGTTGTGCAGATCAGAAATGGGGTGCTGAAGATCAAT -TCAGAAATAATTCAGCTCGGGCTGTAAGACCAGCCTTTAGACAGTCATCTCTTAAACACA -AATGGGTGATTATAGCGTGGTGCCGAAGCTTTCCTTGCGACTGTTGCGCTGCCGCTTTTT -TTTTTCTGTAGATAATTTTTAGCGGCCGGAATATCTTTACTAGTCTGGTATGTTTTCCCG -AGGTCACGTGTGCATGCTGAGTGGTATGGTGCCTTGTAAATCAGGCCAATATATAACGGA -ACACTCATGAGAAGAGCAAGCAATAATGATGATATACCTCTGTGGATAAGAAGTCTCCCG -TCCACCAGGTGTCCCATTATTAGATTTTCGATTCCAAGGCGGAAAGTGAGAGTTTCCCGA -GGTCTAATATCTAGGCACACACCAGGACCTCAGATTCAATATTGAAGTATACACTTGTAC -AAGAACAGAGTTGCCACACGTATGGTCGAGTAAATTCCAACAGCTACTATATATCCAATG -CACCTTCCTTGCCCACTGAAAAAAAGTTGTTTCTAAATACTGTGCTTATGCAAAATCACG -AATGTTAGGAGATATCTTTGAACTTGCTATTGCAACTCCCCATCACAGAAAGAGAAGAGT -GGATTTCAAATCAAATAACCGACAAGAAGCGGCCAATGGCCACTCTATCCACTCACACAC -TAAATAGGAAACAGGCAAGAGTTAACATTCAGCAACCTGACTTGATCATGTTGCAACCTG -AATCAAGGGTATGCAGCGACACCATTGGACACGCGAGCAAGTTTCGTCGGCGTAAGTTAA -ACAACTCGGGATGTAGCCCAGTTTTGAATTTCAGTGGCGTATCAGAAACCGCAGTGTGCT -CTTCGAAGCCATTGTATGTATATTGAGATTATCATGGCAGCTGCAAGGGAGTCAGCGGGA -AACCTGCCCCAAAAAATGCAGGTTCAGCTGAATATTTCAGCGCAAGTAGACAAATTGGGC -CACAAGCAACGCAATCTGCAGCCAATAGCCACATGTAATCGCGAATGGGTGCGACAAGTT -CCTATCGGTATCTCCATCCACAGAAGGTAGAGGCCGTTCATCGACGGACTGCCACGTCAG -AAGCAGCCATCACTTTTTTAGCTTGTCATATTCCATTGTGGCTGATTGGAAAGTAACGTC -TCATCCTTGCTTATGTATCAAACTTCTGAACCCGCTAGACACACATGCAGATAAACTCTC -TGATAGGTCATGACAGTCCTTAGCTCTGCAGCGGACAACCCACATCTAATCGCCATGATA -ACATATCACTGGATGGGCACGAACGAGCTAAAGGGCAGGAGAGGCTGAGATGGATTTGCA -CACCAACATTGGACGATCTCGAATCAAAGTGCGGGTAAGGCAACCTCGGCCAGACCGGTG -ATGACATTTTGCTGCGGCAGGCCTGATGGCGCAGTTCAAATTCTATGTGAGTGTGCATTT -CGAGTGTATACCGTATGCCGTATTTCTATGCCGTCATGAAACATTGAGTTCGCAGAAAAA -ATAGCCTGCGGGTTGCAAAGCGGGATTTCACCCACAGGCAACAGCTGCGGGGGTGGCTAT -TGGAGACACCCCTGTCAGCATGGGCTCGGACCCATCTTCGGGCGCATTTCGCTTGGGATT -TTGCTGATATGAGTGAACAATTGTCCTGTCATTGATTTGGACGATCACGTTCATGTTGTT -CGGTAAAAAGTAGCCATGGATCTGGTTTATCCTCCGGGGTTGTTGTTGGCTTGGAGCGAA -TTGCGGCGACCAAGGCCCATCAAACTCAATTAGATTGTTTCTCGTACTCCGGATGCTGCT -TGATTGTCCCTGTTTTCATGGGAGGTACAAGTCAGGTCCACATATGGGTATGTGTCCAGC -TGAAATGGATCGCGATATTGGAAGACATCCCGTCCATAGTGGAGATCTTCGGTAGACCCG -ATAGTCGTATGTTGTACATTGTGCCTAGGCCACATGGGTGATTATTTGTCAATCTGGTAA -GCCCTGATAGCGCAGCCTGGAATCGCGTGCTTGGAGTTGAGGGCACAGACACGGCGTAGT -GAGCTTATAGTTCACGCTTCCAGGGTTCCAGTCGGGCTATCATCTTCATCGCACTCTTGG -TAATTTACCAGGAGGGCAGCTGGGCCAATATTTGGATTCATCGTTGGATGAAAAAGAAAA -GATGGTAGATCGGCAACACATGTTGTATAGAATTGATACCAGTCCAAAGTGATAAAACAC -GAGAATTGACTGGGATTCGCGCCAAGATCGAACACAATCGTCCCGTTAGGTGGACGTTGA -CCGAGCGGCCGACCGCATGTGCCTTTTCCAGTCTTTCGTGACGAAAATCAAACAATTACC -TCGTTTTTCACATATGTTGAAAGAGAGCTGCAGTGGACTGGAGACTTTGAACCCATCTGC -AAACTTTCACTATCAAGCGAACGCGCAACACAGAAAGTGAGAAGTGTGGAACGCGAGGCA -ATGACGGCATAGACAAATTAATGCCGAGGCTATTGATACCACTAATCCCTAAGAGAGACA -TGGCGCACAAATGTAATTCGCATTAGTTGTACTAGTTGTTTTGTAGGCGTTCAGATTCAA -TGTTCGCATCAAGTCTGTAAAAGGCAATAGCGTACTCTGTACAAATCAATTTATCCTCTT -TCGGACAGCATACTTGTCTATCGTACCTTGTACTCCGTACAGTAACCGGTACGTACATTG -CAAATTGTAAAGCTTCAAAGTGGATCTTCTTTCCTTACACGGGTATTCTCCACTATATTT -TATGCCCTAATCCCCGTCAGGCTTAGCGACGTAGCAGCCTTAAAGCTCCGGTTACGTTTA -CAGAAAGAACTGTCAAGAGATCTCTATCAAGATATAACATAGGTCAATTCCAATTGATCC -AATTTGAGAGAGTTCAATGTTACGTACGGAGTCCTTGTCTAGCCAGATGATGATAGACAG -TGGCAAGTTCAAGAAAGAAAGCTAAAGAAAAGGGGCGGGGAATAGGGGAGTGAAAAAAAC -AGACTAACCATGGATTGAGCCTCTGCCCCAAGCTGTGTATGTTGAACCATACCCAAAAGA -GTCATGTCTCGAATTCCCGATCGTGGATACATCATTAGCATTATGCCCCGTGGAGCTTAA -AGATGTAGGTGCACTTGAAGAATTGAATTCACTTTGGAAAATGCTAGTTAATATGTTCTA -GATACTACAGAAATCCGCGAATAGCTATGCTACTTCGGGGATTCCGAAATGAGTCCGTAA -ATCCGAGGCACTCCTTTATATTAGTTGCAGAGACCTTGCGAGTATTTTAGATGAACTTCA -TGCACCTTGTCCACTACCCTGGAAATTTCTCCAGCTTAAGGATCAAGAAGAGAAAAAAAC -CTGTGTAGTGGAGCTGTTTGTCACTAAACACAGAAAAACCTTTGTGACAGTTGTGATAAC -TCCCCGCAGAGGGGCCACGAAACATCGATTTTTCACCGGTCATGGATGCAGGTTTTTTGC -CCGGTACGCACAGAAATAAATATGTTTGAAGGCAGAATTTCGTTGTGACATACAGCCCCG -AAGAGCCTCGTCTGAAATTCAGGAATTTCGATATCTAGACGGCAATTCTAAGTTTCTAAG -TTACCTAGGAAATATAACGAGAGATTATTAGAGCCACTCAACGCCCACCGCCACTCTACA -CATTAAAACCCCATCGGAAAACAAAGCCACAAATTTCAGCCTGTGCGTATACGGTATCCG -CACTATCCGACAACAGCCACAATCCCGCCACCTCTTCTTTTCATATCGAGACGGTGGAGC -GGAAACCCTTCGAACATGGCGGGGCGGAAAAGAACATGCCCTACTCCGTACAGGTATAGA -AAGTATACAGTCGAATGCTGCGATGGACAGAGAGAAAAAGTGGTGGAGATTCAGTCTTTT -CCAACAGGTACCATGGTTGATACATCCCGGCACGGCCCTGGAATAACCGTTGTTGAAGGT -GGAGTTTCAGTGCCCATGGTACTGTGTATTCGAAATTTACCATTCCACGGTTTAGCAGGT -GATGGGTGATGTCGGATATTCAAAGTGGAATGGATCCATCTGTGTGAGGCGAGAAAAAAA -AAAGATCGGATCCCACACTTCCGTTTTGAATTCTGTTATGGGTTATTGTGGGTTGTATTA -TACGGTGTTTTGTATGAACTGCAAGTCTTGGGGGTCTGTAGATACTCTGTATGGAGTATG -GGATGTATCGATATGGAAGATCAAGTAAGAACCAGATAGTGCTTGTATTGGTATAGAATG -CACAACGTATGGATCCACTCGAAGATGACTACATATATCAGTTCCCAGTGATGACATTCA -ATGCAGAACATGCAGCTGGTACATGATTTGATGAGGTATCTGACATCCAGTCCACACCGC -TCTCTAACAAACGAGTAATGTCTACGAATTTAACATTGATCTACATTGGAAGCAGAAATC -CGAGATTGCTTAAGGTCTCCTTGAACTAATGGACTCCCTAGACGGAATACAGTATACATA -GCGGAGATCTTCTCCTTGCAGAACGAAATATATACCCACAGGCTGCAAGTGAAATAAGCA -ACGTGGGAAATCGAAGGGTTATATTACTGTATTCTGATTTGCAGCCATGAGCAAGGGAAA -CCACATCAACTCTAAATGGCGTCCCATCGGGACGAGATCTTGCAGCAACTGGGGGAACCA -ACCGATGGCAGGCTATTTAAGTTTTGGCTCATAGGCAAGTGGACACCTCCTTGGAGACTT -CGACTTGCATTACTGGGTGGTCGCCCGTGAAGCTTTATTTACCAAACACCAAACACCTGA -CATGGATTAACTACCCCCCTTGAACTTACTTAAGCGCTTCTAGCCAGCAAGTCTCTATTC -CAAGAATCATGTCGAACAGAATGCCTGTGATTAAAATAAATGAGAGGTTCAAACTCACAC -CCAAAAATAGATAAAATTGCAACCTATCCGCTGCGGGAAGCTAAAGAGTCCGGGGCCAAT -AGACGTTCCAGACCAAGTCCAGATTCAAACCATGTATGCCCAACGCCATATGCGCGCGCT -CTAAACTTGCCCTCATCCTCTGGAATTATTCATGCTGTCGGTATTTGAAACATGGGATGA -GACACGAGCCAGGAGACGAACATGGCAACACTGACATGCTGATTTGCAAGGCTCCTCTTC -CCCTTTTCAGGAGGTACAGTACTACGCCGGCTGGGCGCATCGGAAGTACATCGGCCAAGT -ACTAGTTTCCAAGCCATCTAGGCAAGCTCTGTGGGCACATGTACAGTACCTCTTGTTGAC -TAAGTATGGGCTAATCCTGAGACTATGCACGGCTGCAGCCAGATATGTCCCATCGCGACC -ACGTAATTACTGCATGAGATCTCTTACTGCATGGGCTAATCTGTCCACAGAGCCCCTGTT -TTTCACACATAGCGTTTTGTCCAATTGTTTCCTCGACTTTTTATTCCTAAACGAATGTAG -GCGACTCTACAGAAAAGATCTTATCAAGTTTGTGCTTAGGGTGTAGGACAGAACGATAAT -ATAATTGTACTTTAATATCGTTACTTTTTCCTGGGTGGTACAGGTCATCCCCGCCATTTG -GGGGCGGACTACTCAAGTAGGCAACAGCACGGAATACGGAGTAGGGGAATACAAAATTAG -GTAGTGCTATCCACTTAAGATGGATAATGTACGCAAATTAGCTACGGAGTGGACTTCCGG -TTACCCAAGCACCTAAGTAGGAATGTGCCTAGATTGAGTGCTTGGATAGGGGGTGTCTAG -ATTTGATTGACGCAATATGTACTTGGTGCTCGGCTTCTCAGGGTGAAAGGCCCTCCACAA -ATGTAAAGGCCAGACATGCTCCTTTATGCAGCTGACATGTTACTAAGAACCAAGAGGTAT -CGAAGGTCATGTACTCCGTACTCCCTACAATTATCTGGGACACACATTATTACGACCAAA -GCCTTGGCGACTAATTTAAATGCTGGTATGATACTCCGTCCTCCGTACCGCAAAAGTGTC -TGGCATGCGATGAGATATGGGTTTCCAAGAAAGACCAAGTGTGTTTGCGATCTGCTTCGG -CATATTAGCTGTGTCTATTAAGTTTCTCTTATTTGAGAGTCCCTTGCTTTTGCTACGTTG -TACAATCCCGATATCCTCTAATGGTGGCTTATCTCTTCCCCCTTTCTTTCCCTGCTAGTC -TTGCACGAGGCGGGAATACTCCGTTGTTCATAGGGGAATCATTGCTTTGGCAATTGTCAT -CCCCGGTTTAAATGAGAAGGCGCCAACGAATGATTTGATCTGCATTCCTGGAGTAAGTGA -AACGCAGCGATCCACTCCGTGATAGATGTAGAGCCTTATGCGCTGTCGTGTATCAAGCTG -CACCACTCCGTGTATATCGATCAATTGTATCCGTCTGGTATGAACATCTTGTGCATATCA -ATGAGAAACGGCCGAGTCGCATGATGGATGGTACAGGGTTTGTTGATGTACGATTGAGTG -GCTGAGGTTAATCCCTGAGGGACCTTACAGAGGGTTCTCATTGAACTGCATTGTAAGGGG -ACAAAGTAAGGGGAGGCAGTAAAAAGCTCCGAGTCGCAAGTGTGTCCACAGGTATACGAT -CAGAGATCGGCCTGGAAAATTTACGCGTCCAGAAACAGCATACTCACAACCAGTGAGTCG -AGACGTCTACAAACCTTATCTCGGGTCGAGAAGCTGGGGACAAATCAAGTGCATCCAGTG -ACAAAGGCAGATGTAGGGGTATGAGTAAAGTGACACAGTAGACTAAATTACCAGATCCAA -ACAGGGAAGATGACCTAAAGTAAATCTCACGGATTAGCCAACTCCGACACAAGAGTAGAA -TTAGAAAAACGTGGAAGGTCATATTACTGGGGGGAGAACCTGCTTTTGCGACCGAAATCT -CCCACCCCCCCACATGGGCCAGATCCCTGCAAGACAAAGGAGGCTCGGGGTATCAATTTA -CCAGCTGAGGACGCGAAACGCGAGATGAAGTCAAGGGCAATCCCAGGCTTCTGAACCAGT -CAGCGCCCACAGGTCGGAGACCCTGGAACGCGCGACCGTATCGGAGGCTTTGTACGGAGT -TAGAAGGAGAGCCACTCCAGCCCATTGCAGCACTAGTAACCAAGGTGCTGGATGACGAGC -CAACAATGGCTGAGGATTGGCCTTTAATACTTGTCGATCAGGAGCTTTTTTGGGGCCGGA -ACTCGCGGTTGTTCTTGGCTTGAATTATCATTTTTTACTTTATTTAAAATTTGAATAGAG -GAATGGAATTGAATATCGGAGAATTCTATATCCCACAGTAGAAAGATCAAGGAAAGTAGG -GGGAAATGGCATTGAAAGGTTGAAATGTCTCTCGGTAGCGACGGAGATCTCTCCACTGCG -AGAAACTCCTTCAAATTACTAGTTTAAGAAAATGACTGGGTCGCGGCATACGGTTGAAAA -CGCAACTGAGTTTCAAAGTTAGATGTTAGATCTATCGtagaaataggaaaggaacaggaa -ttacgcgcaatagaaatagaaatatgaatttgaatatgagtatcaataCTCGAAGTAATT -TTCGGATTGGGTTAACAGTGTACGAATCTCCTAATTCAGTATGTACAGGGCATCTCAAGT -GTATCAATTCCACCGAGCCATGTTATAGTACAGTTTCAAGCAACAGGTTTCAACTCCTGT -CTTAATATCCTTGCAGAGAAGCCCTATCAAAAATTACCTGTAAATTATGGAACTGTAAAT -TTACTGTACGTACTAACCTTCAGCCTCCACTCTGTGTCTCGTGGATTACCGTACCTCTTG -TGTATCTTCTCTCTTAACCTCTCCCTCTTCCTTTCTCTTCCACACCCTGGAGAGAGAAAG -TCAAAACcctcctccttgtacctctctccttctcttcatacatcctcttccctttcccct -cccccctGACAACCCCAGAGTCTGGTCTACTGGCTGAGTTACGGTTCGACGACATTCCAG -TCGACGCAATCTCATCAGCCCGCTGCACTAGCACTCATATCAAGTCGAATTCACCTTCGA -ACTTGACCTTCCAATTCGACGTTTGTTAACTCAAACGGCGATCCTTCGTTGTCCGCCCGG -TGCCTCCCTGATCCTCGGACCTCGACGACTTGGACCCTCCTGTTCCAGCTCCTGCCGGTT -AGACGAGCGACAGTAGGAACCCCATTCAGGTTGACTTTTGGTTGATACGAATTTGTTTTC -GGTATCAGATTGACATTGAGCTTGAAAATTTGGGTAAGAAATTTCTTTCTTTTTTTTTTT -GGTGCTGGAGGTGTGCATGGTGTGAATGTCGGAACGTCCCGATGGATAGACGAGCAACGG -ATACTCGAATGTTCTCAACACGAGCCAAAAGATTCAATACATTTCCACACGAAAAAGAAA -GTTGTCTGTCCAGAGTGCATGGAAGTTTGGCCCACCGGGCATAATGGTATGGCCTTGAGG -CCATGTCCAAGCATCCAGCAGTCCCAACCCCATATACCTCCGATTTTACGTCATGTTCCA -CCCAATATAACACACGCCGAGAGCCAGATGTGAGGGCAACGGTCTATTGAGCGATAAAAG -TCGCCGTCCGTGGAGACGGGAGATGATGTTTCAAATACAATTGTGGAATGTGTATCTTTT -TGCTTGACATCCAAGGTCTCGCCAGGGGCCAGACTGCTTCTATATTCCCCGTCTTGGGAA -GATTTAAGGGGTCGGCGAAAATCCTATACTCATACTGGACATGCCGAATGTTGGGGTGAC -GATCAGGCTGCAGCACCACATACCTCTCGCACTTGATCATTGACTGGCGTCAATTGTTGA -TTGGCATTGTCCTTTGGACCGTTGGCTGCTACGAATGGACTATTAACGAATGTATGACTG -ACATTGTTGCCTGGTATAGGATTTATCTGTCTGAATCTTCGATATAAACCCACTTGTCTC -CTCCGACGTGGACTTAATATTTGAATCACTAGCATTCTTTCCATCTAGCAAACTCTCATC -AGTCTCTTTACTGAGCCTTGTTCATCCAATTCGTTAATTCGTGAAGCGTGAGTTCTGACC -ATTCCGTTACCAAGACATCCCCCCTGATGATTGTGACCCCCTTCATAGGGTTATTCTCCT -GCACTCCACATTCCGCAGCACTAGCCTAGGGGTGAAGACGTTGACTCGTACTTCATCTCT -TCGTTGGTATCCAACAGAATCTTCATTCATCGGATCATTCCTGCCTAGATGCCAGTCTTG -CCAACACGGTAAGGGTTGCATTGTGCCCTCTGCCCGAAGGCTCTTTCTGGGTTAGTATTT -CTCCGAAGGACTTATGTCCCTTTCTCCTGGATGAATACAACATACATCTCTACAGTTTAC -CTCGCTGTGCTTCTTTTCCCACACTCTCTTGCTGTCCCGTCACTTGAACATTACAATCTT -TTAATCATTTCTTTTTTCGGCCTCCGCTAATATTAATTATCTCAGCCTTAACCTTCGATT -ACTTCTTCCTCGGACCTCCCGGCCACCTTTTCCCAGCATTCGAACTATTATTCTACCGCT -GATTCACTCTTTAATCGTTACAACTTTTCCGTCACTCATTCCAATACACTAATACCACGA -CTGTCACTCTCTTAATTCCGTCGTTGCCTGATAGACTCGTTCTTCAAGCTTCCTTTCATT -GAAACTCGGCAAAAcaaacctcatacgtcaacttgtccttcatacttcGGTCGATCCAAG -TACTTCCGCAATCATGGATTATCATCGCAACTACCGTCCTTGCCCCACCTTCTTCGTGGA -AGAAGAGTACGGCATGGAAAGTCGGATTCGGCAAGAGCGGCAGGCGAAGCAGGAGCATGA -CAAGCAGATGGCTCGTGAGCGTCAGTATGCCATGGCAGATGAGCTGTCTAGTCTCACATG -TGAACAGCTTCGTGACGATGTCCTGAGCCACATGTTGGAAATGGATGTGCGTTTAACCAG -ATCCCCGGATGCCTTCTTTCCAAAGATTACATTTTGCTAATTTAATAATCTTATAGAACC -AAACCCTGCCCGACGTCGAGTCTATTGACATCCAAACTGAGATCCAATGGTTCATGCGCC -CCTACCTTCTTGACTTCTTGATCGAGGCCCACACTGCGTTCCAGCTTCTGCCATCCACCT -TGTTCTTGGCGATCAACCTTTTGGACCGATACTGCTCCAAGCGTGTGGTCTACAAACGTC -ATTACCAGCTGGTTGGCTGTGCAGCTCTACTCATTGCGGCGAAGTACAATGACAAGAAGG -ATCGTGTTCCCACTATTAAGGAGTTGAAGTCGATGTGCTGTTCTCTTTACGATGACGATA -TGTTCACCCAGATGGAATGGCATGTCCTCCAAACTCTTGGCTGGTCAATTGGACACCCAA -CTGTGGATGCGTTCTTGCAACACGCTGTTCTGGACGATGCTTACGACCCCGAGGTGGAAC -ATATGGCTCTGTATATCCTGGAGATTGCCCTGTTCCACCGGGACTTTGTCGCCAAGCTAT -CATCCGAGCTTGCTCGTGCTGCCCTTGCGTTGTCACGCTGCATTCTCAACAGACCGCAGC -CCCGTCACACTGACTGGTCATCGCAGTATGACTCGGCGACCCTTGTTGCCTTGTCTCAAC -AGTTGCACCAGCCCTCCACTGTGGTCGCCCGCAAGTACGCTTCCCCTCACTACTCTCGGG -TCTCAAAGGTCATGGAGCACTTCCTCCAACGTCAGGCTTCGTTGGCAAACTATGCCCGCT -GCACCCCTCCGGTTGAGACTCCTATCGATTCCAAGCCGTACAATGGGGAGATTGGCCTTG -CCACGCCTCAGAAGTCTCAGCAGTTGACAGCTATTCCTCATGGCTACCTCACTCCTCCCA -TCACCCCGGACAACGAGGCGCTCGTGCATGCCCATGCACAAATGGCGAATCACGATCTAT -CTCGCGGAATGTCCACTGTCCACCATGGCTGTTCCCCTTCTCCCGCCCCATCCGCTGAGA -TGCAATACATGAACAGTGAAGCATACCAACAACAGCAGGAGGCTCTGTACATGGCTCAGC -AAGCAGCGCTCCAGCAGTTCCCTCTGGCACCTACCCACATGGGGATGAGCCAAACATATC -AGGGTGGGATGTAAAAGAAACGACGGAACGAACAGCAATCCAAATTCCATTTGGTCATTG -CACATATTTCCAGCACTTCATTTCATTTCTCTCATCGTCATATAATGACTCCAAGGAGCA -TCCGACTCCGCATTGATTCACCTCGATATCTCATCTTAATGCTCAATGCGTCACACCAGC -TGTCGACCTGTGCCATATTCCCCGAACTCAACTGCATCCACTTTCGTCATTTACGCTTTG -CATTCGGCTTTGATTTTACGAAACCAGAGTCGAGACCAGCCCTTCACTTCCTGTTCTAAA -AATTGCATACCCATTTGCTCCCTATACCCATTAGTTTTGGGTCATCAGTTGCACTTGATT -TAACCATCTAGCATTGCGAGCGCGAGCCATGTCGATTTATTTGTTTCAAGAATACATCCT -GCTGTTTTTCACTCCCCTCGTCATATTAACTTCATTTCTATCAGTCAGGGAATACCACGC -ACCCTCGACATCTTGTATTTTTGCATGCCAATTGGCGCTGCGTTTTTGATATCCCCAACC -CCCTGCCCTCCACGATCCTTAGCAGCATGACTGCCTTCCCAATTGCGCCCCCAATCCCCA -ACCCCCACCCCCTCGCCCTCAAAACCCTCCAAACTCGTTTTTCTCGTGCCTCGTTTCTCA -CGTTGACTCTTCCGCCAATGTGCATGAACCGGTCTCCTGTCCAACGACATCCAATTGTCT -CACGCACCAGGAGAGCAGGTTCATCGAGATCACAAAAAAAAGCAAAAAAAGTGTGAAAAC -CCGGAAAAAATTATATGTCTTCTGGGGAGGAAAGGCCACTGAGTCCATATTTTGGTTGAA -CTCAAGGTGCCTTTTCTTTTCCCTCCGCTCTCCTCTTCCTTTGTATCTAGCCCTTGACAC -ATGCCCTGAATGCCTACCCGTCATCCATTATTGGAGATGGGAGGCCAGGTGCCTTTTTTC -TTATCTCTTGTGTGGATTTGAAAATGGGGTTGTCGGTTTTGCCGTTTGGCCCTGAATTAT -ATCCCATTTTCAGTGAAATTTTGGATGCTGTAGACTTGTAGTCTTTTGGTCTTGTAGAGT -ACTGTGCATTCATTTCCTCGGTTACTCCATGGTAGCAATGGTTCTATCTAATCCGGTTTC -TGTGTATTCTTTTCTAATCTCAACTTCGATGTCCATTCCCTTACTCTGGATATTTTGTTA -CTGTTGGTCCTTTCTGTTGCCATGATTCTCACCAATCCTCCTTGACCACCTCCGGTCTCT -CAAGAAATATATCCAGACATATTCCTGAACATCCTGGCTTTAATATCATCTGTTAATAAT -CTCAAAGCCTAGGGTAATATGTGGTAGAAGCGGACAAAGCTCTCGTTCAAACCCCTCTCT -CTTTGCTGCTCAGATCGAGTCTCATTTATATTTACCGTGATCGAGGTTTCTCGGCATAAC -ATATGTGAATTTGTCTCAATCACAACTCGCACGATATATTATAGATTATTCTGTTCGCAC -ACACATGACTTCCCCGTCTACGCCATATACATACCAATTGATGCTGCTCAGAAATTGCGA -GGGAAAAGAAGAGAACAGGGTTAGGGTTGAGTGCTGCATATGACCAACTCCATTCCAAAG -CGAGCTAGGGGCTCCCATGGCTGAATTGGAAGTAGGTCACTCGAAAGACGCCATATTCCT -CCAGAATTGCTCATAACAGTAGGTTTCGTACAATGACAGAAGATAGTATGCTTGAAACTA -TCATTAAGCAGGTAGCTTCTAGCGGTGAGGCAAGCTTGGGGTATATATCTCGAAGGCTGT -CAGATTATGTATACAACATGGTCTATGGATATCCAATCTTCTCAGCTCGAAGTTATTGGT -ACAATTGTACGAAAGTAAGAAAACAGCCACAAATAAAGTGAAGATTAAATTAGATAAAAC -ATTGATATTTCTCTTGAGAATACAATTTCGCTCATTCGCAATAAATCCAGATCCAGCAGA -TCCAACAATACCCTCCACGCTATGCATCGCAACAAACAAAATAAACAAAAGGGCGTCAAG -CCAATTAGCCCAGCATGACCATGCAGCTAGCTGCTAGGCATCAATACCGAGAAGCAGACA -CGTGTTCAACAGTAACCAGTAGAGTTGGAAGGGTAGAAGGTAGAAGGGACAAAAGAGAGG -AGTGAAGAAAGGGAGAGACAAATCGGACTAGGAGATCACATAGAGTGACTCCTAGCAGGG -TAGAGGGGAGAATGACAAGAGACAAGAGAAATCAAACAGCAAAACGCCTTCAGAACTCCA -AGATAATGATACAAGGGGAGGTAGACAGCAAAGAGTGGAGACCTGAAAATCCGGGGAGAC -GCAGGGATTCAGGTTGGCGAGACAATAGGAGCGATAAAGGGACCGGCACAAGAGCGCCGA -GAGTCCGAGAAACCCAGAGAAAGTCCTCAGAGTAGCACGTCCCGATTCGGTCAGGGGCAG -AGAGAAAAAGGGTGCGGTGACGAGACTCGCAAAAAAAAAGGGGGTTTTGCGTCCAATGAA -CTCCCCCGATCATTTCTGCCGATCATCGCCTCACTTTTCTTATTTTCTGGCCCTTTTGGG -TCTTTTTGTTGGTCTGGACCTCTTTTCGCTCTTGTAACGCGCGTGTAAAATGCTGGGATC -CGCGTCTCCTTTGGCGAGCCACCTACGTATTGCAATTGAGGCAAATGCATAGGTCGGCAG -ATCTCCGTTGTATGAACAACTACTTCCAGCGTTGTTTGTCCCAGTCAGAGGGCCGCGTTT -CATCGTAGATTCGGCCGCCCTTGTTGTGAATGACTACCGGGGGTTTGCGGGCAGGATTGG -TTTCTGGGATCTGCATTAGCTTTGGCTGAATCTGGGGATTGGAGACAGGGATATGATGCT -CTCGACATACTCAAGCTCCGGACTTCGGTAGATACTCCTTGGTCATTGCTGCTCCCTCTG -CTGGCTAGCGTGGAGTTGGAATCGTTGACCTTGTGTGGCATTGTGAATGGAGAGTAGCAG -GAATGGTTTGTGGTTTGAGGGTCAATGGAGATGAAGTTGGAGGAGAGGGATATTAGGGTA -TTAAAAGTGGTAGAGGATCAAGAGGTCAGCAGACGAAGAGAAGTATAGAAAGCGTCAAGC -TTGTGGAAGCACACAGTGTTGAACTGTGAGCAGATTATAAGAACAATCAAGATATATATG -AACAAAAGATAAAAAGAGTGAGAAACACAAGAATATAAAAAGAAGACAATAGTGATGGTT -GTTGATATGTTGTAGTTTGGTTGATAATGAATCGATTGTTTTGAGGGCATCGAGTTCTTT -TTGTATAGCAAGCCATTCCACATTGAGCTTCACCCCGATCTACACTCTGGGGAACTTCGA -GCTTCCTAGTGTCAAAAGAGCTCGGAATGGATCATCCCCCCCCTGACGTATTTGGTGCCT -GGTAATTACAACTGCCACGATTGAAGCATTGATTGGTGTAGAGAGTAGATCTAAAGTAGG -GCAGCCAGTGAGGAAGAAGAGAAGCTACTAGGGCTGGAGTGGAAATGGGCCAAGTATGCA -AAAAACGGTAATATTACCAATAATGATGAGTCAGCCATTCCTAGAGCCCTGGAGCTATTT -ACAGAATCGGACTGGTCGTGTAACTAAAAAAACTTCCTCATTTTAATGAATGCGTTGGGG -AGGGATTCCTTCACCGCTAGGATTACTGCGCCAATGGCGGTACACAGTGCGTGTATTTAG -CCATGGAATCATCAGTTAGGGGCTTCTTCCACACCGCAATTGGAACACTCCACTGTACTC -CATAGAAGTACATCGTCAGGTCTCGATGATTTTATTACACTCATTTTTTAGGCCAATAGT -CAGAAGGTCAGAAGACTACCTAATAATATTAGGCTCCATCGGAAGATCTGCCCGCCCCGA -TTCATACAGGGTCATCCCATGGGGAACAATTATGAAACTTAGGTACATATACATCTACCT -CTGGATGCATAAACAACTCATTGTCTCGTTGGTAGGGACACACGGATGTGCGTCATTCCA -TCCAGACCGAGATTCTAGCCCTGGAGGCAACCCTTTCCTGGTCAATCACTTTCCAAGCTA -AGTTTGCCCCGGGGCTTCCCCAGCAACGCCTCTGAACCAGCAATGAGATTGCGAAGCGTG -TGGATTGTCTGACCCGTCTGACCTAGTCTCGTGGACACGGATAATCCATCCCGGATTCTA -GACTAGCCTCCCCCGAAATCGTAAATTATGTCAGCTCTCCTATCAACCCTCTCGTTTCAT -CCGGTCTTGCCACCCGTTCATGCTCCGCCGAATTCCTAATGTCCGTCGCGGTGCTTGTTG -AATCATTGACATCGTGGTGCTGTGGAACGTCGATACCGGCTGGAACCCGCAGGTGGAGCT -TTCGGGTTTCACTTCGTCTGCCCCGGCGGTCCCGCCTGCGGATGTATTGTTGCCGCTCTG -TTCCTGCGTCATCGACATCCTGGCAGGTTTCACGGTTGGTTTCGATGGAGTTGCCAGCCA -CGTGTAGCCATTGGAGTTCGTAGGCTCCAGGGTCGGGCGCGTTGGAACAAGTGGCCGTGG -TTGAACACGGTGAGAGGAGAGCGGAGCTGGAACAATGCTTCTTGCCCCTGGACGTTGGTT -CTGGGCCGGGGGCACTTCGTATCTGCGGGATGGTGGCTGTGGGTGCAGAGCTGATTCTCT -CTTCTGGAGGGATCCTGGGCCTGCAAAAAAGTTGGAGATGCTTGCTTGTGTGAGTCCATC -TTTACGGCTGGCACTGTTAGGCGGTTTCAGTCTGTTTGCCCCAGCTGCGTCTGCGTCTGC -GTCTGCCGGAGGCTTCGAGCCTGACTGCGCGGGGGGCTCGGGGTTGGCCGCTATGTAATC -GCGGGCAGATGTAAACCCCGATGAAACGGTTGTAGGAATTGTAAACCCTGTCGAATTATT -CATAGTATACCCAGTTGAAGAGACGTAGGTAGTCGTTTTTGGGCCCTGGTTCATGTCTGA -CCGTCGTCGCTTTGGATTGGGCTCGCCAGAGGCCTCATGAGTTGATGTGCCATTGATTTT -GCTGCGGTCTTCCTCACCCTCGGGTGTCCATCGGTCGTCCAGCGTCGATGGAAGAAACTC -GAGCCCCTTGTACATGTCTTCTAGAGATGGCCGAGGGCGACGCAGAATATCGGCAATCGC -ATACACCACCTTCTCGTGAAGAGACGGCCCCGTCGCACGAAAGCGTGATTTAGTCAACTT -GGGAGGTAGGAATGGTGTCAAAGTGGTCGCACCATCTTCTCCTTCTCCTAACCGGGGTTG -TCGAATAGGAATACTCAGATATAACAGGGCCTGTGCTCTTGTCATTGCGACATACAGCAA -TCGTCTCTCCTCTTCTACCACCTCCGCACGCGAGTGAGGAATGCTCCCATTGTATACAGC -GGGGACAAACACAACTGGCCATTCCAGACCTTTGGCAGCGTGGATAGTTGAAATGGTGAC -TTTCTCTGTCGGTTGGTCTCCTTCCTTGGCCTCTTCTTTGGTTGTGATCTCCGTAGCCAA -TGCGACATTTGCCAAGAATCGAGTGAGAGCCTCCTCGCCAGAATGTGCCTGCTGCTGCTG -CACACCTTCGATTACAGGGAGATCCATTTCTTTGTCAACCTCTTCGTCTCCAAATGCTGT -GACTTCACTGGCTTGATGCATCAATTCCTTGACATTCGCCCAGCGACTGTCTTCATCAAG -GGGATAGGTTCTGATGAGATACTCCTGGAACGACAGACGTTCGGCAATGAATTCAATCAA -AACACGGGGAGCTGATGCATCCTCACACTCTTGCAACTTTTGTCTAGAGGCTTCAATCAA -ATTGACTAAAACACAAAGTCCCTGCTCTGCGGCCTTTTGAAGTTTCTTTTCAGTCGCACG -GCGTCCTTGGATGACATCTTTGATGAAGTTCCACAGCGGGATTTTGGCCTTCTCCGCACC -ACTAGTCAATTGCTTGATTGTCTCGTCTCCAATCCTTCGTGGCGGAACATTCACAATATT -CATCAATGCGTCCGAATGTCCTGTGTGGCTGACAACCCTCATATAATTGAGAAGAATCTT -GACTTCAGTTCGATCGAAGAATTTTCGTCCTCCAACCATCTTGTATGGTATACCCGCCCT -TCCAAAGGCGGTCTCGATTTGTGTAGACAGTGAAGCCGACCGAAGCAGGACGGCAAAGTC -AGACATGCGTAATAGACCGCCGGTCATCGCTACACAACGCTGAATCTCAAGTACCATCCA -TTGCGCCTCTGCGTTGGGATTTGGGAGCCTTCTTAGTACCGGCAAGGTTCCGAAAGTGTG -CGTAGCCAGGAGTGCTTTTGCAGGCCGACTTGTATCCTGCTCAATCACTTCTTGGGCTGC -ATTCAAGATAGAGCCCGCAGAGCGATAATTGTCTTCCAAGAGCACAACCGTTGTGTTTGT -ATATCGCCGCTGCATCCGCTTGAGATTTTGGATCTCCGCAGAACGAAACCCGTATATGCT -TTGATCCGGATCTCCGACGATAGTGATCCGTCGGTTCTTAGATGCGAACAGATTCATCAG -CTCATATTGTATGACATTGGTGTCCTGGAACTCATCCACCAAGACAGCCTGCACATTTGA -AACACACTGTGGGTGGTCTCGAAGCAGATCTCCACATCGCAGAAGGAGGTCATCGTAATC -CAAGAGATTGGAGGCCGCCAGTGCCGATTCGTAGTCACGATAGATATCAACAAGCTCCAG -CTCTTCTGCTTTGGCTCTATCAGCTACGGCATCCGGAGTGAGTCCATGTGCTTTATGGCG -AGAAATTCGGCCTTGCGCTGCCTTAACTTCAATGCTAGACTTCAGTCGTTTAATGGTTCT -CTGAGGTCACAATTAGAGAGGAGAAATAAGCGGACATTATTCAGGAATTTGAATCATACT -CTGATAATTGCGCGAGTATCATCCGAATCCGCAATGCCAAACCCCTTCCGCAGGCCGATC -AAGTACCCGTACTTGACCAAGTAACGGCGACAGATGGAATGGAAAGTGCCGAGAATCAAC -CGCGATTCGAGTTTTTCTCCAATAAGTTTTGCGAGTCGCTCGCGCATCTCACGGCTGGCT -TTGATTGTGAATGTGCAACAGATAACATCTTGAGGTTGGTATCCATGGTGCGAGAGGAGG -TACGCGACGCGTGCGGTCAAGGTTTTTGTTTTCCCTGAGCCCGGTGGTGCCAGAACTTGG -AGAATAGAGCCGGATGAGGTGACAGCTTCTCGTTGGGCATGATTCAAGCCATCTAAGATC -GGATCCATGACATGTAGGGGGTTTGACCAATGGTCGGTTGCTTGTGAGGAGGAGACGCGA -GAGAACGCGACGTCATGTGACACTTCCGCTCTACATTGAGGAAGTCGATTATTAAAAGAA -TCGATGGAATTAGAAGCTGCTACTTGTCTCCAAGTCTAGAAGTATTGATCTTTTAGATGC -TAGAGGACATCGAGGAGCAAATTGTGAGGCGGCGATCCTCGAAGGCCTTCACAGAGTGGA -TGGTTGCGGGTTGAATAGAGGAGGTTTGGCCGGAGTATTTCAAGTCCAATCTTATGAGGT -ATAGACAATCTATATGTCTGATTCGGACTAACTTAGCCTCGTAGTATATGCTGATGACAT -TGAAGGTTCCATTTTTACTTCAAAAATGGAGATTCGAATATTTTGTTCTTCTGATACGTT -AAATGTAATTTTTCTTATTTTTAACCTTCTTCCTAGGTTCTTCTTGTGGGCAGTACTATG -ATTTAGTTCTTTTCGAATATAGGTTAATATACATGGATGATATTTTCGTAAACAAAAGCC -TAGTTCATGCTCTCGGGGGTTGCAATGCTTTTCACTATCTATACCATAGGAAGAACAACT -AGAATGAAGATCCTACATAGCACTTCCAAACAATTGACCTTACACTAACCGAAGTCCTTC -TTGGTACTCAATTGGCGCCATATTGGGACCAGCTCTTTGGTTCTACTCCATACAAGACTG -AGGACTCCACCATGGAATTACCGAAATTGGCAGGTGCTGCCTCAAATTCCACTCCATCCA -AAGAAAAAAAGGGAAGAACGAGGCTCATATAGTTGAATATCCGATCAAATGCCAATTGAG -GTAGGTTGTAATATACCAGCCTGGTGTGATGGCAAGTACGGAGTAAATGTGGGCTCCAAC -GGAGCACCCCAGGATATATTGCATCGTTACAGCCAGACTCATAGAGTTGAAGATTAGCAC -AATCTCGGGTTCGAAAAAAAAAGCAATCCATCTCATGCCAATAATCAAATCGCTTGGCAC -ACAAATGTGTCCATGCGCTACCAAGAACGGCACGGGATATGTAAAATTCATGAAAATGAT -TCATACGCCGGATGATAGGCAGAGTTTTCCATACCATAGCCTCGCTAATACATTGTCGCC -CCGGGATTTGCTTCTCGGTCGAATTCTAAACCGAATTAAGATAATGGGTGGTCATTGGAT -CTAGATTAACGAAGCACTCGGAGTCCGGAGTCGGAATTGGACATTTGTACCCTAGCCAAC -TAAATGGAAACCCGGACCTGAAAAGTGTTCCCCCGTCGGAGGTATCATCGCATGCCTCGC -TTACAAGGTCCCCGAGCTCTTCTGTTGACGTGATGTTTCTCTTTCAATCTTCACCGGCAG -TGGGCCCGATGTCTCCCGTCGTGCCGGATTCCGACCTGCCTATCTGATACAAGCGCTCGA -GCAAAGCGGGACAGGCCCCATGAGAAGAGGATCTGGGAATGAATGTTAATTCTTCCTTCC -GTTGACCCAGAGGAATTGGCGATTAGACCACAAAAGGCAGGTCCGGTAAAGAGATAGGAT -GAAGCTCGCTTGCATTTTCGATTTAGGGCTCACTTCTCGCCACTGTGTTATCACTTGAGC -ATAATTCCGGTCATATATACATATTAGGCTGCACAAGGCCTGGGCCCGGGGTCTATCGAT -CTTATGCATGACATAACATCCTAGCTGTATACATATGCATTTATCGAGTCCACAGTAAGA -CCTGGGTATAGCAGAACCTGATCCAATTAGTTCAAGTTCAATATGTCTTTAGATGAATTT -TTTTGCGAGGCAGGGGTAAATCTAAGAAGTGCAAGCGTTGCTACCTCTCATACCGGGTTA -TTTTTTAAATATACCGGAATCTACGGTGGGACGAACTTGACATGTGAAGGTGGAGTTAAC -AGGTATCAATCAGAATCTATAAAGCAGCTTTCTAGTCACAAATTCAATCAAATAAGTGTA -CATGCTATGATGATGCCACTATTCAAGAGAAATGTAATGAAGGTCACTGTGTATAGTGCT -GTGCCTGGAATTTTGTGTTAGAGTGGCTGAAGTTCTACAAGGGCATCTAACCGCCTTAAG -AGAACAAGGCTAATTTCGGGGCCTGTTGTCTTGGCCAGTAGAAAATTTCCACACGTAGGC -TGATTAGAATAGAGAACACATGCAATGTCTGTCGACTAGGTAAGAGAGGGCCAACAAGTT -ACACAAGATACCCGGCACGTCCACGGGATTTAAAGCAGCTAGAAACTCATGATTCTCAGA -GCGTCATTTGTGAGGCTATAGATATACATATTGGCTTTCTTTTAGTAAAACAAAACTAGC -ACACGAAGACAGACAAGTCGCCAATGTTGGGACCCTAGGTAGATATTGAGTTAGTATAGT -TTGCTGTGGTTAGCTCACAGGATACTGCACTTACCATATAAACAAGATTCCGATGAACGA -TACGCCAAGTACCAGTAGACTGACGTTGCCAGTTATCCTGATATTGGCCATAAGCATAGG -CTACCTCACTCGCCTTGTCTCCTTTCCCGAAGTGGGCCGCACGGTAGTATGTCACCGATT -TGGCCGTGCTGGGTGAAAGTACATCGATTCTTTGGGTACCAAACATATGCTGCGTGGTTA -CACATGCCAAGCTTGTGTCCAGTACAGATTGAATGTTTTTCAGTGGGGTCAACACGTTGA -GGGGGGCCGAGTAGTTGGCTACCGCATCAGTGGTAAAGACCTTGCTAAGGGCGTCGAAGT -CTTTGCCATCAATAGCAAAAGCATAGAGGGCCAGGGTATTTCGGATTGCCTCAATTGTAC -TAGCATCGGTATAGGGTTTAGAGGTGTAGCTACCGGCAAGGGACGAAATCTCGATAGGGG -CGACACCACTAGTCGTGGATGCCACGGCCGAGCTGGCCACGGCGAGAGAAAGTGTGCAGA -AAGAGAGGAATTTCATTTTGTCACAAGAAAAATCAAGCGGACAAAGGCAGGGATGGAGCA -AGGATGTTGCTCTTTGTAATAGGAGACTTTGTACGGAGTACTCCGTACTTCATGATTTCA -ATCTCGGTTGTGCCTCCCCCCTGAGATCCGAGATGAAACGAACCGACTATTTTCGCTTAT -ATTGCTACTAATGAGTTAGTGCCGGCCGAGAAGTTGGACTAATTGCAAAGCTTAGCGTCC -CATATTGTCTAACAGTGGGACGATATAAATCGATTCCATTATGGAGAATGCTTTGACAGG -ACGAAAGGAAGCCTTTCGTCCATTAGACCTCCATGTGGGGACAGTAAGGTATCGGCTCGA -CGCGACCCCCAAATTGACAAATGGTGTTTCTAAGTCCATCGGCGGTTAGCTGTGGCTGCA -CCAGAGATATTAAACGGGACAGAAGCGCAATTCAAAGACTCTAGGCGTATGCTAGTCTGA -AAATGTTCTTAAAATACCTCAACTGTACTCTGTGCTCCTTGCCTCCTCTCGGTTCATGAA -ATAATGAATCAACTGAGATGTTAATCCAGACGACGGAGTGATCATTCCCCTCAATAAGCC -CTTCAGCAGCCCTTCAGCAGCGGGTAAATGCATCGTATCCGGGCCGAAGCGAAGAATTTC -CCCCAGGATAGATCTGGCCCCCACATATCTGGCCCCCAAAATGGCTTTGCTTTGGAGCCC -CAGAATGAGCCCTAATGTGAGTTCAAATCCATACCCCAGGTATTTTGATTTGAATTGCTA -CTACCATAGGTTTACGGCATACCCTGTATATTGCCTCATGATACACATATAGTTCATGAT -AAAGCTTTAGCCAGGAATGCATCGTTAAACAATAGCGATAACGAGTCTCGTAGTCCTCAA -AATGTCACACGATGCTCTTACCACGCACAAACGTGATGTCGCCTGATTCAAAATAGGAAT -GGATTTCTCGCATTATACATCATGATAGCTCTTTCTGGTGCTGCCGTCGTGAATACAACA -TATGGGTATGGTGATTCATTTACCCCAAATCATCCCTAAGTTATCCGCGGACTACAGCCA -CAGAATTAAGAATCAATAATACACTCCCTGTGATCTACACAGCACGCAATATGTCTTTTG -CTCTTTGCCCAATATAATGACTATGCACGGGCCATTTTTGGTTATCCACATATGTATGCC -AAGCCCGCCGATTTGATATCCGACCATCACTGTTTGTGGGATGCCTAAGAAACACCCAAG -TCTCGTATCAAGCTCTTCGAGAAGAGATCAGCCATATAGCTCCCAAGAGATCAGATTGGA -GTCCATGAAATACACCAAAAAAACGTATCTGGATCAGCAACGACCGCCAAGTCGGTAGAT -AGTAATTCGATGCTAACCAGCAATTACTATCTGTAAACCCCAACACAGGGTTGAGTCAGT -CTCAGATTGTCCTGAACCTTACACATACAGGAGGCAGCTAAGATGGGCCAGGTATAGAGC -AAGGTCACTGCACAGATGGATGGTACGTACGGTCTAAGTAGCTATAGATTTCTCGGTCAA -AGGGAAGACTCTGTATTAAGAATATCATGCTCGCTTGAGTAACTCTTGTCCTGTATGCAA -ATACATCATCCCAGCTACTCCAATATATGTTTAATCGAAAATCACAACACCAATCAATAT -TCGCAGGAGGGTATCACGCCAACATATGTCCTCTTGAAATCAATGAAGAACATTCAAAAC -AAAAAGAAAGCAAATAGATCAAGAAAAGATGCCTAGGGGCAATGTCATGGCCACCTCATA -GTTGGGCACGTTGACCACTTCAGCGATATGCAAGTCACCAGCAACGCTAGCCAGGATGTA -TGCCTCACTGCGAGTGAGACCCTTGGTCTGCACCAACCACTGTATAAGATTCCGAGTAGC -GCTTCTGGTAGCCTCGTATAGGTCAGGAGCAACACCGAGAGCACCGTATGTTCCAAGGTC -GGGAAGGGGGTTTGGTATTTGAGCGCGGGGGGGAGTCTGGAATTGAGGAGCGGTCATCCA -GGGTTGGTTCTTGAGGAGCTCGAAGCGGAGGGTAGCGCGAATGGGGGTTTCGATGGCGGT -GCCGCAGACCTCCCCATGACCCTGAGCAGCGTGACCGTCACCGCAGCTGAATAGGGCGCC -AGCAGTCTGGACGGGAAGGAAGACAGTGGAGCCGACGCTGAGTTCTCGGCAGTCCATGTT -TCCGCCGGCGTTTGTGGGTGGCACAGTGGAGAGTTCTTCGTCGGTGGCTGGGGCGAGACC -CATGCATCCGAGGAAGGGACGCAAGGGGATGCGCATGTCTTTGAATTGGGCGAAGCCTGC -TTCGCGGTTAAGCTCCCAGATCTTGATCTCTGGCTCTGGAAATTCGTCTGCAAGCAGACC -GAAGCCTGGGATGATGGCGGACCAGCCCCAGTCAGCGACCTCGAGGTCTAGTACTTCCAC -CTTTAGGACATCACCGGGTTGAGCGTCGTTCATATAGATTGGGCCGAAGACAGGATTGGC -TAGTTCAAGATCTAACGTCTTGATGGCCGAGGCCTTGGATGACGTGTCGAGTTGACCATT -GCTACTATCGATAGCATCGAAAGAGACAGTCTCGCCGGAGGAAATGGTGAACTTGGGAGT -GTTTCGTCGAGACCACTTTTGGTGGCTACAGTCTCTTCCCACATGAAAGTTGCTCCATTC -CTTTGGTGCCATGCTGGATGCTTAAGTCGGATGTCTCGGGGTTCCTGTCAGGGTCGTTGA -AGAGATACAATAGATAAAAAGATAGATCTAACCAATAACGAAAATGAGAAAAAATCAGAT -CGGGGGAAGAAAAAAAACCGCTCGTGGAATAGACGGATGATTTTGTATCTAAAGAGCCCG -GGAGATCCTCTTCAATATTGACCGTTCTGGAAGGTGGCTACAACCGGATCTTATGCTTCT -TTTTACCAGCTCCACCCCGAGACTGAGTCGCCTGCCTAAAGTAGTTTGATAGGTTTCTGA -CAGATTGGTCCCCCCCCCCCTCCCCCTGTCGGAGCCTCTGGGTTTCCAACAGTAGCTCCA -TTGGAGTATGAAGATTCCCGATGTCTGGTTGGATAGAACAGGTAAGCGAGGAAACCGTAG -GGGGGAAGGTCACGGAGAAGCCGATAACTTTTATACTGTTAAAGTTTCAACTGTAGATAT -AAAATTCAAATTGTATCATTTAATAGAAAACGGACCTTGGAAAAGCCGAGCACCTAAGTT -TCCTTGGACCTTGGAACTGTCTAGATTTCCCTGGATTTCTCCGTATAAAAACGCCACAAA -CTGAATTGATTGGTAAAGGGTTTCTTTTTTTCCCTTTTTCACTTAGGCTCGGACCAAGAA -ATGAAAAGAGAAATAGTTAAAATCATTTATGAAATATTTATAATCAGAAAATGGAATTAA -ATATTCAATGAAAATCCAGAAAATCCAGAATAGTCCAACACCACCTTCTTCATTGGGTAA -AGAGCAAACCGTTGGTGGACTATGAGTGGCACAGCTGCATAACATTTTCTCCAAGTGGAA -CTTTTGTTTTACTTAGGTTTAGGTACTATAGGACTTCGGACCACCTATCTGGACATGTAG -CATCGACTATAATCTATAATTCACGATCCCTGTCCGTCCAACATCGCTACAATGCACGTT -GCATTGTCACTACGAGGCTTGTTAGCACTGTTGGTGGCGATTTCTTGTGCAATTCCACTC -GCTCGTTTTCCCCGCATCGATAATTTCATTACGTGCTGAACCAGGTTCTTGTCGTCCACT -CGATCGGTAACTCCGTCGGACACCACCAGTAGCAGGTAACGTCGATCGGTGTTCAATGTC -CGCCGCGAGGTATATGGATCATTGGATAGGAAGTTGCCCCGAGATTCCGATCGGCTGGTA -GCCGATGAGGCGGGGTCTAGTATAGCGGTGTCCGTGAAAGTATTCACCGGATTTTTATAC -TGTAGGTCGCCTAGTGCTCGAGACATGTTCAAGGAGCCTGTCTAGCGAAATTAGTTGTGT -TTTTGAGGGCCGATCTCCTCTTTAACCCACCAAGTCGCTGCGTGCCGCTGCGGTCAACAA -CTACACCGCCTGCCTGCTCGATTCGGGTCTTTTCGCTGGGAGTACCGGGCTTGTGAGCCT -CTGTGAGACGTCGCTAGCGTGTGTCTGTTAGCATCCATGCTCAAATCAACTCAATTAATC -ACAGTGCATACAATGTGATACGGGTGCTCTGTTTTCCGGTCCCGCTCGGCCAGGATAACA -TGCGTGTCGCCCAGGTTGGAGACTACCAGCTCACCTTTGGTGAGGTTAATGCAGCATATT -GCCACTGTCGACCCTGAAATGGCAGGTTCGGCAGATTCATGCGTGAACCTATCCAACAGA -AGGCTGTCTTCCTCCGCCAGTGCCGCCTTGATAGCCCCAGGCCAATCGCCTTGTTCGAAC -TCCGGACGCCGTCCAAGCAGGTGGTGCAAACTGTTATTAACATGTTCTGAAACCAATCCA -GAGCCACTGGGGGTATTAATTAGTTTAACACTGGTCTAAAGGGTACATCTCGAGGCTTTC -CTTACTGCCCATCGTAGATTGCGAAGAATGCAACTTTATCCTTCATGTTCGAAGGGAACT -GATCCGGGAAAATGACTGCACACCGGTCCTCTTGATACTTCCTACCACCCTGGGACTGCC -ATTTGATTATTGGTAAGTATGGTTGTTGTGCGATGGCCTTGCGCTTTGACACTCACTTGA -CCAGCCCCAGCATCAAACAGCGTGATTGCGTCCGAGGTAGACATTGTTTTACGGAACATC -TGGTCCCTCATATCTCTGTGGGCCACAACCATCCTTGGGCAATTGATGTGCAGAGTCATG -TCCTCGTATCACTAGTGAAGGGCGTGGGCAACTATAGATGATGATCCACTGACATCATGA -ACGCCTTCTCTCCTTGACTGCTGCTTGGTTTACCACATATATCCAATTGGTGGCTTTCAG -TGAGTCTATTCCATTCCTTTGCCAGACAGATTCGCGAACGATATCTGGTATTTGACTACC -GGCAATGTATTTTTCTTCATCGATTACCTGAGCAAATAGCGCATTGATTTCTCTTATTTT -AGAAATCTCAATATTATGAGCCTAATTAACCCCCCTTACTAGTTAGATATCGAATGTCTA -ATAATACACATTTAAGAGCTTCTACTGAACAGACTCTGAACGCCGCATAGATCTTATTGA -TAGCGCACGGGCCAGTTTTCTCGGCGAATCTGCAAGATCATAATTCTAACCAGGAACAAC -CCTCTGAAACATCCTCCTCACTTTCAGTACATTTATAATCTAGGTCGGTATGGGCTGGGT -AGGTGTGGGCGTGCTTATTGCAACCACAGGCTACTTTCTCTACCGCTACCCTCCTCGCAG -CTGGGCCGAGCCTCGTCCTTTTGCCCCTCCGGAACCAAACCCCGACACGTCAGTTGCCGC -TACCACCGCAGACGAGCCTGTACAGCGGAAGCATGGCTTAGAAGCCTCAACATTACCAAT -AGAGACAGAAGAGGAGGAGGATTCCCAAAGCACACCAAAGGCTGCGGCTTCCAGCGCACC -ACCCTCGGTCCTTGCGGTCCCAACATTCAGCCTAGACAGCGGCGAGCGCGAGACATCGCA -ACCCGCCACGATGCCGTCCATTTCACAACCTATGAACGGAGCCGCAGAGCAAGTATCTTC -GACAACCCGTGATCATGTACCAAAGTTCCAACCGCCACCACAGCCATCCCTCGCGCCTCC -CCCAAAATCACAACCATCATCTACACCTCCGAAGGCAACAACAACCGGCGCTACTTCCCT -TATGCCTCCACCCCCAGTGCCAAGGTCTCGACCAGCAATTCAAACAAACCGCCTCGCCCC -CACAAGCACACTTCAACCACCACGGCTTAGTGGAAACTCCCTTGGCCCTCCACCATCAGC -CGCCGCCGCTCAGCGTGGCCTGGGCGCACCGAGTAGCGGCAGCCGTCTGAGCAACAGCAC -TCTCGCCCCGACACAGGTATCTCTGAAGAAATCTACATCGCGTCAAGTGGTTCTTGAGCC -GGGATTCTCGCCCCTTGACTGGGCAGCGCTAGCAGCGAACCCGAAGAACAAGCTCCGCGG -CGAAGGCCTTCCCCCAGGCTTGTTGAGGGTCACGCCGTCAATGCTCAAGGAGCAGCATGG -GCGTAAGGGCCGGGATGCCTGGACTTCATACCATGGAAAGGTGTACAATATTTCGCCCTA -CGCCCCTTACCATCCAGGTGGGAAGGGTGAGCTCTTCCGCGGCGCGGGAAAGGACTCTGC -ACAGCTGTTTCAAGAGATTCACCCGTGGGTAAACTGGGAGGGAATCCTTGGCGAGTGTCT -GGTCGGAATTCTGGTTTCGGAGCATGATGTCCAAACGGAGAATGCTTTGGATGCGATGGA -TTGATACTCGACCCACTGTTTAATTACTATTTTACTTTGCTGTAATTGTTCAGCCGGTTT -GTATACCGTGTACATCTTACCATATATAGATGAGATACCCAGCTGCTATATGATATACCT -TGCATGCGAAAAGGTGTATATTTGTTCTCGGCCGAAAATAATGGCCTGGTCATCGGAACC -ACTCCCAGAGAAAAACATGAGTAGATATCAAGCACCGCTCATTCAAAGCTCATCCTTCAC -ACCATCAATACCGACTAACTCAGTCTCAAACACCAATGTCGCACCACCGGGAATAGGGCC -AACACCGCGGTCGCCGTAGCCGAACTCAGGTGGAATAGTCAGCGTACGCTTCTCGCCAAT -GCACATATCCAGCAAGCCCTGGTCCCATCTGCCAACGAACACATAAATTAGCTTCCTTAA -CCAGGACAATGAGGTTTGTAGACAATTGCGCGATTGACATCCACAATCAGCCCCAAATAG -CTTCAACTCACCCCTTGATGACACGACCGGTACCCAGCTTGAAGGTCAAAGGCGCTTTGC -GCTTGTAGCTGGAATCGAATTCGGACCCATCGGACTGCAAAGTGCCGCGGTAGTGCATGG -CGACTCCATCGCCGTTGGTTGTCTTGCGGGTGCACTCAACAGCGTGGGTGACTTCGATAC -CAAGCTCGGCGGCAGCCGCGAGGGAGCCGAGTGCTGCGACGAGGATGGAGGTGAGACGCA -TTGTTTGTTTATGGGGTCGGGAGGGGAAAGAGATGAGTTTGGGAATGTGGACAGATAAGG -CGGAGATTAGACCGAGGTTTAGCGTTGGTCTGTGCTTTAAGCTCATGGTTTCAAGATCCA -TGTTATACCTCGATCGTTGTCAAGAATTTCTTGTAAGTTAGTTATTGCTTATTACATATC -TTTAATCAAATCTTTGAGGCCAGAAGAGAGAGGGGCCTCGCAATATAGTAATCGGTGTAC -TAAATATACCAAACGAAGCAGGAGTTGGATATATATATGTAGCTAGTTGTAACAATGTAG -TGGTTATGTACTTCAGGTTGGCTGAATATTGTGTTTCATTACTAAGTACGATTATGCGGA -CGTCATATTGTGATCATACCTAAGCAACTGGGGTCTGAGAAGCTGCTAGTCAAAATAATC -CGGGAATATCATGACATCCTCTTTTTCCGAGCTTCGGCCATCCGCCGATTTTCGTGCACC -GAAAGCCAATTCAAGCCTTGACCGTCTGGTTTACGAATTTAGCATCATCATTAGAAAGAC -CTGGGGCAATGACCTGATCAGCAGCCGCTTGCAGATCGTCCTCACCCATATCACCATCCC -AGCCGTTAGAAACAAGGTAAGCACGGAATTTCTCATCGCGCACTCTCAAATCTTCGCCCG -TCAAATCAGGTACAAAGAAGAAAGTAACTAAAATTCCAACAACACCGCAAATAGCAGCAA -CAATAAAGGTCCACTTGTTCCCTAGATGATTCTTAATAGGCTGAAACGCTTGCGTACCGA -CAGCAGCACCCGCCTTCCCAATAGCCGCAGAGAGTCCATAGCAAGTCCCCCGCACACCGG -TAGCATACGACTCAGACGAAATCAGACCTAGCATATTGCCCGGGCCGAAATTGCCGGAGC -TCTGCATTAGTCCGTAGAAGACCACAAAGAGAGGTACGATCTTAGTAACGCGGTCGTAGG -CGCAACCTATAATCAGGCCAAAGATTAGATATCCGCTGAAGCCAATCATCATGATGGGCT -TGCGGCCGAGCCGGTCGCAGAGAATTGCGCCGAGGAAGACACCTGGTAGCGCGATTGCGC -CGAGGAGGAGCTGCCATTCGCCTGTTCTCAGGATTGTGCCGTCGTGCACGACCGAGGAGA -TAATTGTGGCGGAGAAGACACCATTGGGGAAGGTGACAAAGTCTATATACGTGAGTTTCC -ACACAGACCCGAAAAATACTGGAGAGCTCTTTTACCGTAGAGGAACCAAGCACCGCATGT -CCCAATCAACGATTTCCAGTAGTATCGTAACACGAGCATGTAGGGTACATGCTTCTTGAT -CGCTCCGCGTCGATATAGAGCCGAGTTAAGCATTCGGATTCGGAAGTAGAATACTGACAG -CGGCCAGATGCATCCTATGCCAAAGCACACCCGCCATACGGTACTGTAGTGGGTCTGCTG -GCAGGCCAAGAGTACGATCAAGAAGATTGAGACCGCGAATGGGCCCCCAAATGACAGGGG -GAAGTTGGTCACCATGATGAATGCTGGGCCACGGTGCTTCGGGGTTAGATCATTTGCTGC -TTCCGAGGCGGAAGTTGAAGAGGCCGGATACTCGCCGCCGGCGCCTGGAGTTATTTAGTA -GAGTAATCAAATTAGAATGATGTTTTTGGGACATGCAAGTCATGCAAGTATGTTGAGGTC -CTACTTACCAAATCCGACAACGCCACGGGCAATCGTCATCATCCAGAACATGCCGTCGAT -AGAAACGCCATGCGAGGCTGTGGCGAGTATACCTCCGATGACAATCATAAGTGTTGTGAA -GACGATGGCCGTCTTTCGACCAAGATAGTCGCAAGTCAATCTGGTATGATATCGTCAGTG -GTCAAAGCCCTTTCATAGGTCCAATCTCTAGTAGTGTATTGTTCGTACCCAATAACAACC -TGCCCAATAACTTCGCCAACAAGTAGAGCGTTCGAAACGCGCGTGCTGACGGTGGTAGTG -TACTCCTTGGGATATTCCTTTTTGAGAAGAACATTGGTCATAGTCCTTTAAGATAATCTG -TAGTTAGACACAGTCGTGCATGGGCGCAATCATTCGACCTACATCAGATTGTTTTGGTAT -CCATCGCTAATCAGAGCGAAGCCGCTTGCAAACTGTTAAATACCACCCGTTTGGTCAGTC -TATCACTTGCCAAATTAACCCACGAGTGCAGGGGACACTGGAAAATCACCTGGCATGGAT -AGGCTGGGATGATGGACTCACAATTGTGAAGAGATCCGACAGACTCTGTTGCCTCAACGG -CGTACCCGATGAATGAGTAGGGTTATCTTGGTCGACAGTGTCTTGAACCACAGTCTTGGA -CTCATTGGACTCAGGGGGTGCGGTTAGACTGCGCGCTGAAGCCATTTTGACTTAAGAGTT -GGGTTGGGAAAATTCTTGGACTGTGATCCTTTCAGGCAGGACCGGGGAAAGGTACCAATA -TCCCTGGGAATCCCGTGCTATTTATTCCATTTTCGACCTGTTCTACATTGGAGCGGGGAA -CCCCTGTCAGGAATTTAGCCCTATAGAGGAAAAAGGTTAAGCGTGAACTAACAAGTAAGC -TAATGGCTTATCCACGGTTAGCTTCAGAGTTGGCTTGTATACCTTGGATGCCAAGGACGT -ATAGAAGACCCGTATGCCTCAGATGCAAAGGACATGTAAACCACTAATACTGCCAAACCT -AAGTCTACTACTGGTTATGTTTTCTATTGCTACTGACTGTCCGTTGAATGGTTCTGATAA -TTTTGTTCATTTCAAGTAGATACATTCGCTAAACGCCGTATATGGGTATGGCTAAAACTG -GCCCGGATCCCGTGGGTATCACAGATGCAGAAACAATATGTGACCATAAAATAAAAATTT -GACAAAGAAAAGCAAGGACATGACGCATTATTGGGGAATGATGTTCCCATTTTTTTTTAC -CATCACTCAAGCCGCAGATACCTCTATGCCCATTTGCCGAGCAACGCCATGAGCGCAGCT -CCGGCACAAAGAAGGAAGAAGGCAATCAGCACATCACGGATGGGAGCCTTGCGCATAGAG -TGGCTGAACATGAACTCATGCGCCATCAGCTCAACGAGGGCAGTATAAATGAGAATGCCA -GCGGAGATAGAGTCAAAGACACCGTTGACAATGAGGGTGGTGCGCCCGGTGGGTGGGTAG -CTGTTTCGAACACCCAAGCCAATAGCAATGGCAATTGGAGTCGAAATTCCAAAGCCAAAA -CCCAGGTAGTAGGGTGTGTTGCGTTTTGATTTGGGCCAAGGAATTGTAGCTAGACGAGAA -CCAAGTCCAAGTCCTTCAAAGGTTTGGTGGAAGGCAAGCACAATGTAAAGGGTAGTGAAC -TCGGGGCCTGATACTGCGAGAGTAAGACCAATGAAAACAGAGTGAAAGATGATACCGAAC -TCCAGAATGAAGATGGAAGTGAGTTGGGCGGCATAGTCCTCAGTCGCTTGAATGTCATTT -TCGGAGTCGCTGTTATTATCGTGATGCTCACGGGAATGACCAAGGTGATCCTCCCCAGGC -ATAATTTGCTTCGGGTCAGTGGCTTCGTTGACAATAGAGTTGTCATCAAGGTGGGTGTGG -GTGTTGCCTTCCTCGTCGTGCACATGCCCTTGACCAAAGTGCGAGAAACGCATTACCATA -AGCTCCACAAAGAACAGAACAATGATGGTCATGAGGATGATGCCCTCTACCCAGCTGTAT -TCAGTGATTGGTCCAGTCAGGCACTCATTCTTCAATGCCTCCTCCGCCGGAGCCAATAAG -TGAATGAAAGCAGTGGCAATGATCACACCGGATCCGAAGTATTTGGCGACGAAGAAGGCC -CATTTGGGAAATCCGCCTTTCCTGCTAAAGCGACGGGCGAAGACCGGGAATAGAGCACCA -AACATAGAGCCGGCCATGATCACAAAGATGGAGGAGATCCGCAGGCCCATCCGGCCGTCG -AACTCATTCCCGGACTCACAAGCATCCTCACGCTTGAGGATAGTAGAGTAAGATTCCATT -GGAGTCAATTAATGGTGACACAAGAAGTATGAAAGAAGCGCAGAGGTTGGACGAAAAGAA -CCTCTGCGGTAACAAATCAAGTGCACAGAATATGAGAAAGGTAGATACTCATGCAAGTAT -CCCAGTTGACAGGAGAAGATTTATGATCTAAATGAGATGAGAAGAATGGGTTTGATGACC -GCAAACTTGTGGCGGGGGGGCGTGTGGGAAGCTAGTTGACCGCAATCCCCAAGGTCCTCG -ATGGTAGGCGAGATTCGTCCCTTAGTCCACTATCCGCGCATGAGATCACCAGGATATTTT -GACAATGCCAAACTTTGTGAATATCAATTCAACAGAAGGTAATGCGCAGGAAATGAAGGT -TTCCTATCCTGATCGCACGTGTATGAGTCGCACAGTGAGTAGCACGTTGAGTCGCACTGC -AATCTTTCGATAGGTTTTCTCGAATGAGTACCCTGGGGACAAGATCGGAAAATTCAGCCG -ATGTATAGATAAGAATCAATGACTTGCTTCAAATAAGGGGCATAATCAGAGTGGCATCAA -AATCTGCGGGAGCTTTTGCTTATATCACATCAATCACTGGTGGCGGATGGATCCATGAGG -ACCTTGGGAGCGCGAAGACGTCAAGGACGGAGTTTTACACTCTATCGCACGAGTACCGGG -TACCTCTGTACCAATACAATTGTAGGTGTACCCGAAAGTACCCGGTACCTAAGACTAGCG -CGTTGAGGACCTTGGGAGTCAGTTTCCAGGGTTCCGATTGCGGTGTTTAAATCACGTGTA -CTCCGTACAGGCGATAAGGTCCTCACTGTGTCCTGATGTTTGTTCCATTTTTTCGTATTT -GATATAACAGGTCTAGGAATATTGGACGATTGGAAGGGGGTGAAGGAACTGTGGTTGATG -GCCCCCTTGAGTTGGAAAGATTCCTGCCAGCCCTTTCAGGCGTCTCAATCGCCAATCAGA -GGTTCGGCGGCAAATGCGCTTCGCATGACCTTGAGGTCATAGGCATTTCGACCACGCCCT -TGCGCAGCTCATTCGAACATGGCTCCCATTCGACGATACCTCCGCATCAGCAAATACTCA -GTATTGGAATGCCGCATCTATCTGGAATCGCCTTCTGACTCACGATGGCTCCTGGACTCT -CGCGATCCTGTCCTGCCCCGCGTCATCGCAGCCGTTCGACCGTTGGTGCTTCCGAAGCTG -CGTGAAGAGAATGAGCGGTTGTTCATGCGCAAGAAGGGCAAGCCGGTCAAAGACGTCATT -GCAGAAGGTATAGATTCCTTGATATTTCTCGATAAACCAACTATATTAACACATTACCTA -GATGACTTCGAGGTGGCCATCTTCCTCCGCGAATCCCGCACCCGTCATGCCCTCCTCACG -CGTAGCAAGACATTTCAAGGAAAGGAAAATCACAATCAAGAGTTGAATATGGATGTGAAA -CCAGACCATGTATCAGACGATACGGTCGACAACCCGGTTAGCTCGGCAGATGGTGAAATC -ATGATCGAGAGTGACAGTGAGCCTGAGCTGGAATTGCACAATATCCCCGAATCTGTGGAT -GAAGCTGCGCTAGGGAATGGACGTCGGTCAAGCACGAGAAGTCGGACAACTAAAGAAAAT -CAATTGCCCCTCGAAGACACGACTGGCAGTGACGAAAAGAAGCTTCGTTTCAACACCCAT -TATGAGAGCTTTAATATCTGTGGATGGGTGTTATGTCTGCTGATAACCCGCAAGGGCGAC -AAAAGCAGGCAAAGTGCCGCCCTCTCTGATACAAATCGGCAGCCTTTGATGGAAGAATGG -ATTTCAACTCAGGCTCAGGTTTCTATTGACGATTGATCAAGGCTAGGTCATCGGCGAGTG -TGGAGCTGGCAATGCTTAATGTGGCCGCCAGGAATCTTGCTTACGGTTAAGCAGTTAAGC -TTTCCCAACCTTGTACTGTGAGTAGCCGTGGTGGGATGATTAGCCGCAGAAGGTCTCGTG -ATCCTGCCAGCCCGCAATGCTACCATAATTATTGGTGATATGTTTTGTTTCAACCTGCAA -ACCCTACACGATCGTGCTTCAATTTGCCCGCCTCGCAAATAATGTGTCAGAGACTTTGTC -TCATCTTGTCAGATCATAGGCTCAGGGATGCACGTTGGCTCCAGAGTCTACGAGGCTTGG -TGTCACTTGGAAAAGCACAAGACCTTCACTACCGGGCTGTGACGGGACTCGCACCAGGTA -TTGTTGTGATCGGCAGGTGGGTGTCACCGAAGATCAAGCAGGTTACGCAAGTGGCGGGTT -GTCATACATACTCCCTGGATGACTCCCTCAGGCACGGTGCCAGGTAATAAGGGCCACTGA -GACGAAAATGTCAGTCATGCATCCAAAGCATGCAATCAGTGACGTCTTTATGATCTGCAT -GCTATAAGACATGGTCTGATGTTGGTTTTTCTACATGGCCTTTGGTTTACATGTTTCGGT -CTGATTCATGACCCTCAACGACAATCCGGGCAAGGGGATGAAACTAGTCTCGTAGATTCT -CAATTCGGAGACATTTCGATTGCCAATTTCAAAATCCCCATGTGGTACGGGCTCTCCCAA -CCTCGGGCCGCAACTTCCAGAAGGCAAGCTAGAAGATTTTCAAGTGGAGCTCAACTTGAA -ACAGCCCGCCTCTCGTCCCGTGACCCCAACACCTCAATAGAGAAGTATTTTATTCCTCCG -ATGTGCTTTCGGAAGCACTGTCCAGTACTCCGACAAGTACCTCGTACTAGAATTGAACAC -AAATACTACGATTGATTTTTTTTTGGTACGCTCCGTACTGTTTAGTATATTTGTCTCCCA -AGCATCAGAACTCATATCTTGGCCTCCAAAGTAAGAACCAATAAACAGGCAATATGAATA -TCCCAAACCGATCGTGCTACGATCAGGGGGAATTCCCGATCTGTTAAACAAGTCATAAAT -CTCCGACTCTCCACAGCTAATTTTTTTCTTTCCAACGGTGCGGAACGGTGTGGAATCTAA -GTGGAGCTGACTTTGCCTAACTGGGATTATTGAAGTTTGCGGTCTGCTAGTTCATAGAGT -TTTTCCCATGTGTTTTGCCCACTTTCTTTTTTTGCCACTTCCACTTTTTCTTTTCCCCTT -TTCTTGGTTCCCCCGGCCTTAGTTCAGGTACACCCGTGTGGTCGCATATGTTTGCCACGA -AACTCCACTTATATATATGCCTTGCCTTTTCCAACACGGTAAGCTGGGAATTTAAATGAT -GTTTCTAATATAGGTTCCAATTATTATCTCAGTATTTATAATCTTACCTGCCCCTTGGAA -CATATGCGGGGGTTTTGTTTTACAAATCCCGAGTACCCGTTATCTTGTATAGGGTATTCC -CGAAGGGCATAACGTTAGCACGGTATTACTACATAGGGCAGGGTGCATTGATGTGGAATC -CATGCGTACTATGTCTACTGTAATAATCTATCTGCCTGGTTTTATATGCCGTGATTAAAA -AAAACCTGAAAACAAGGAACACGGCGGATGTAACGGGAGAGCATTACATTTATTTAGGCC -CGGATAATTTCCGGGGAAGACTGTGAGCATTCCGAAGCTAAAGGGTCTCTACGTAGTACA -TAGTAGCAGTCTGTCCTCCGtatatatatatatatatatttacacatctaGTTCACGCAA -GATGCTCACACAACACACACTCAACGGAAGTAACCACTACCTATAATCAAGCACGCCATG -AACGCCACACCAGGGGGTGAAGTAGATCTCCCATCGTGAAACTTAAGGCCCAGTGATGTG -CACAACGTAACCACTAAGGAGATGGGAGGGGAAAATGTTGAACAAACCCCTCTCGATGTA -ATCTTCACTATCAGCCAATAATCAGGAGGACTGGGGTCATCTATTGCATACAGGGGTCCA -TTCTTCTCCAGATGTGCTACGACGCTGTTCTGCATACTCCGTACTTTCGAAATGATGATG -TGCAATGCACAGTCCTCTGCAGGAAGACGATCCTGGCTAGGGACTATCAGCCACAATGTT -GCTATAGGCGGAGGTTAATCGACTCTGTACTCCGGTAGGGGGGTGTGTAATTTTATACGT -TAAGTGCTAAACAACTCAAAAATCTGACTTCATTTGTATTACATGCTGTTATTAGAACCC -CGGAGGGCCTAAGCGGGAGTAAGTGATAAAAGATAAGGAAAAAAGGGGATTATGGATGGA -GTACGGAGTACGGAGTAGTATGGAGTACAATATGGTAATTTGGAATAATAGCCGAAGTGC -ATGCAACCGGAGTACGGAGTACTAAAAGCGAAACCCGTTAGAACTGCCCAATATCAATTA -CCCCAAAGGGTTCTATGTGTATTCTTGTCTCAACCATGGCTTGATCTAGTTTGGTAGGAC -GGGTAAAAATACCACGGTTCAGGATGGGTCCTCGATTGGCGACTCCAACcctctttcatt -ttcctttggattttctttttgttctccttttTGGGTTAACCCCTGGGACCGTACCACTCT -AGTCGATATTGTCTAAATGTATACGGAGTGTGCATATGTACCTGGCTAAGTTGGCATAGA -TATGTGCATGATATGTCATATATCTGTTCAAGAAAGTTCGGGAACGAATCCGTCGGATGA -ACAGGCTTATCCTTATTCTGTATGTAATCCCAGCATTCTGGGATCTCGGCTGTCACGCTC -TTAGTGTGTTATCAATACGAATCGGCCATCCCGTTTGGCCCTAAGTCGACGGAAAAAAGA -ATAGAAAAATcttctagtattttctttttcttctagtttagttttctttttacttttTTT -TCCCACTAACGCTGCTCTCTCTCGGGATCTGCAAACATGCACGTGTTGTGATCTGACGTC -TTGTTCTCTCCTATTGAGTCATACAGGATCCGGTGTGCTTGGACCTATATGGTTTATCAT -GTTCCAATTAGAGGTTGTACAACTAATAGAAAAGAAATATTAAAAAAAAAGAAAAGAAAA -TAGGAAAAAAAAGTCGTTCTCACCGCATCAAGGAACCGTCCCTATGGAGCCTTGGTGGGC -CCACGGGAGATTGCCCTTTACCAGTTGTCATCTCGCGTGGTTTCATGATTTACTGTACAT -GGGGGGATTGTGAGACCTAAGCAGAAAATTAGAAGAGCTGCATAGTTAGTCCATGTACCC -TAAACTTCATCTGTACTATGTATATATTGAGGTAGCACGACATATATGTTTGATTACTAT -GTCTCATTTTCCCTCCACTCTCCTCCACTAGTGGTTCACAGGAAACAGAAAGCGCCACAA -GAGAAGGAGTGGGGAACTTCTAGAAAAAAATGGGAATTTTCTATTGGATTACCACTTGGG -GCTGCATTATTCATTGCCTTGCCCGAGGGCCAAGTTGAGCAAGTAAATCGCATTAAATTC -CTACGTCAGATCAGGTGGATACTCAGCCTCCTGTTCATGGCAGGGATGTTCGATGGGGAT -TTTAGGGCCTAATTTAGTCTGTAATAGTGACGGTTGACCGGAAAGGTTGGGGGGGGGGGA -GAGGGGAGACTTTTATTTCTATAAGAGTCCTACTCCCGTAAGCTTTTTTTCTTCCCCTTC -TCTCCATCCACTTTACTCTACTCTATACCTTATTCTGCAACCTGCCAGCTTCTTGGTTTA -TCCAATCTGTCAACCTTGAAGAACTACTGTCAACTCTCAATCTTCAAACCTTCCAACCCT -TCCAACACAATCCACAATGCCTTCCGTCGGATCTTCTCAGTTCCCTCCATGCCCCGGTCC -TCCTCCTAAGGGCCCTCTGCCTCCGCTTCCGAAATAAACCGTCCACAGTCCCCTCTTCTC -AACGAACATGATGACCCCGGCGCATTCTGGTCCACCAGATCCGCCCACGACCTGGACCCT -CTCAGCATGGATATTATgacttcgacttcgacctagtttcgatctcacctgcgattcttt -gttttcgattcggacttTCAACACCTAATTTACACCATGGTTCCGCCTAAACAAAACCAC -GCCCTTGCTTAAAGTTGGTCTTGATACGGCAACCCAAAGACCGCTAGCTTTTCCACCGTC -CATGCTTGCCTCTTGGCATTCCAATGCGACAATGCATCGCTAGCCACAGCTTTATGCGCG -TGGTAAAAAAAATTTGTTTTGAATGGGCCTCGGACAATTTTTTAATTCTCATTTTTGGGA -TTCACTCTTCTTTTTCTTTGTCCTCTCGCTCTTCAGCAACCCTACTCGTCCGCAATGGGA -CCCAGCGCCGGCGCTACCATTCCCTCGGGCTGCATTAATAGCCTTTCTATTTCTTGCTCT -TTGTCTCACTTCAACTCTCGACACTATTAATTACCCCTCGGTCTTCTGTGGGGCTTTCAT -TCCACCCATTCGATCTTGACCTACCTGTGACGCGCAGATGCGGAGAATGGTGCAGCTCCT -CAGCCGCTGCACCTCAATTGGACAATACGAACTTGTCTACCACCCCTGTCTTGGTGCCTA -GGATTGACTGGGGTTACCGTGCGATCGTCACCATAGCACTGATCAGGCGCATAGTGTCTT -TTATTTCTTGCTTTTCCTCCCCTGTCTGCCGTTGTGACTTGTCATTCTGACCGTTTGAAC -GTCTGATTAATTCTCTTTTTTTGGTTTCTTTTTTCCCCCTGTTTTTCTTATGTTCTCTCC -TGCATCTGGCCGAATCTGAATCGATATGTATCATAGACTTAGTATACCCAACAGAGCAAT -TTGTGTTTTATCTACAACGTTCCAAGTATTTGATTAATTGCTTACGGTCCAAGCATATGT -CGTGCATAAGGAAGACCGGGGACTGCGGGTGACACCCACCTGACCGCCCGTTTCACTAAC -CAGCCGGCATCCCGCACAAAATCACTAGTAGCGAAGCTAGCAGGGATCTCACGGGGGCGG -ATGTCGCGGTAGTCCATTTGACGCATATTTCGCGCACAAATGAATACACCAGATACGAGT -GATTTAGAATCTATACGTCAGTATGACTCATTAGTTAGGGGCAGACCCATTTATGAGACC -CCATGGTGTATACGCATCTCTTCCATCATAAGATCTGAGGTAGGTCCGATCTGACGCGAT -CCTTATCGAGCCTCCTATTAGATTTACAGCGCATTTGCCAGATCCAGTCCAATGCATGGC -TGCAAGGCCGATGCATTGGACATTCCGCATATCGCCGCATGGCTGGAACTCATATATGCG -ACTCGAACCAGTCTTAAATCACACCATACTATCGCACCCGGTTGCATGCATACACCCAAA -ATGGGGGGGACGTTGGCCACACTAAGACATATCGGTAATATCCCTAGCATGTGCATATTT -AGCTCGTAATATCGACGGCTAAACGGTAAACGGCTGAGCGGAGCAAGCAGGAAGCAAGAT -ACGAGTTTGGCCCCGTCCGAGCGCCTCGGGTTTTAGTAACCATATCATTGGTGTCCTACA -CCTTAATGTTGTTACGACTATAACACAATGGAGATTAGCTCATTTTTTATATTACCAGGT -ACAGTAGCTGGGCTAACATATGCTCCATGCTTGAGTTGTTTCTCCTAGTCAATTGTCAAA -AGCACACCCGAGCAGTGGTTCCAAGCTATTTCCAAGGAAATCCGATCATCCTATTGATTA -GGATATGATCACTGTGGCCTGCAAATGCATCCAGGCGTGCTAGGATCCTCGTCAACCGTT -GGCCCATCACTTGACTGATACAGTTGCATGTCGTCATTGAAGAAATCCCGACAATCTGTG -TCCAATAGATCTCGCAGATAGTGTTCCAAGGTTCCTTCCCCCGCTTCTACCCCTCAACCC -GATACACTTTGTTTATCCATGGGTTATCCTTGGATTTTCCATGGATATCAAATTGACTTT -TGTTTCAATTTCCAAGCTAGGCTATCTTGGTGGGATCAAGTGACCACAGTAAGCTTTCGG -CTCAAATCCATGGATCATGGAGTTTTGACCTCTGTCTCAGGCCCCAGGGGATGTCTGTGC -TACTTAAAATAACCTTGCAACCTTCAAGGAACCGGGTTGACAACTCTGTGTGGCTGTCAG -CCCTGGGGCCCTAGCATATGTAGCCACGTGCATCAAATGAGGTCATTCGCTGGCTATCTC -ACTCTATATAATAATCGAGTACTTTATATGGACCCGTGATGTACTGGGCCAAGGATTATG -TTCTAGGCAGAGACGGACCTTGCGAGAAATAGCGGTATACAATAGATGCGTTGGGCGGGC -TCTCAGCTCCCAGGGCCGTGCTGTGAGATGCGGTGTATATGAATATTGAGGCTCTTAATG -TTCTAGATGGCCTGCGTCGTATGACTGAGCCCCATTGAGCTTTGACACCGATACCTTTTG -CCCGGGCTGAGTGAGATGGATGCTCCGAGTACCAGGTTGAGTATGGCTTTTGGGCCAGGT -TGGAAGATTTATCCGATTCGCTCCCTCTTGCATTACGTATATCTCAGCCATTGTTAATCT -ATCCAAAACCATATACAGAGCTAATTATCAATGATTCTGGTGTAATGAAATGGCGTTGCC -TAGCTCGTAATTCAAGCCACATCTTGGTTTCTTGCTGACTTCCAAGAAGCTTGGATTCTA -GCGCGACACTTCGTCGTATTTTTATTTACTTCTATTTTGACTTTCAGGTTGATAACGGAA -TTTAGTCGTGGAGGATACATGTAGCTATACATCCTAGTATAGATAAACGAGAGCAAACCA -ATCTGGAGAAGAGAAAGGAATCTCTTTTCATTTTTTATCGAGAATCCGTCATTTATAGAC -CATATACAGACCCTATACGCCTCGATTAAAATGAATAAACACATTGTATCTTGTAGTCAG -TAGAAGCCGAATATCTTATATCAAGAAACCGAGGATAAACACACGTGATTCGCCCGCCCA -AATCAATCACGGCCGGCCCTGCTTCCCCAAGCGCAAAGCCACCATGGTACTACAATTGCA -TGTATGGGGCCCGGCCTTTTCTCTGCCCTCAATTGACGCACAATGCCTTGCTGCAATTGC -ATACTGTTCCGAAGTCCTCCCTCAGGACTCGTGGGAGCTGATCGCGAGCAGCGATCCCTC -CGTGTCCCCAACAGGTCAGTGACGTGACCGGAACAAAAAAAAGCATCACCAGAACCAGAC -AGCTAACAAGACACTTAACAATAGGTGAGCTCCCAGCTCTCCAGAATGGCTCTATCTGGG -TGAGCCGGTTCCGCAATATCGTCGACTACCTCCGCCAATACTCGGAGGGAGCATGGAACC -TGGATCAAAACCTAGACGACGTACAAAAAGCCGATAGTGTTGCGTACGCACTCCCTACCC -CCAAAATGGCGCGAGAAAGACAAAATGCTAACAATAAACCCTCCCCCCAGCTTCTCCTCC -TTCATCGAATCCCGCGGCCAATCCCTCCTCGATCTCTCCCTCTACGTAACCAGTCAGAAC -TACTACGCCAACACATCCCCAGCCTACGGCTCTCTCCTCCAATGGCCAAACCAATGGATC -CTCCCACCTAAGCTCCACAGCGCCGCCAAAACCCGCACCGAACACCTCGGCCTATCCTCG -CTAGACCTGCAAGCAATGGAAGACCAGCGAAAACGCGAGCACTCTGCCGCCGTAGCAGCC -GGACAAATCCCCAGCAACATGATCCAGCAGCCCCGTGACACAGTCTCCAAACTCCTCGGC -CGCACAGCGCAAACCAACCAGTTCCGGCTGGAAGCCCTCACAGCCGATTTCTTCGAGCCA -CTCGAAGCAATGCTCGCCCGCAGCAAGATTTGTCTGCTCCCCGCCGACGACAACAACAAC -CCCTCCTCACTGGATTGCGTCGCGCTGGGCTACCTGTCGCTGGCGCTTGTACCGGAACTA -GCCTTCCCTTGGCTGCGCGACGCGATGCGCGCCAAGGCCCCGCTACTGACAGCATACACG -GAGCGCATGCGCAGCCGCTGTTTCGGTGACGCACCCGTCGAGGCTAAGCACGCTTTCCAG -CCGACGGCGCCAGCTTCTCTGCCGTGGCGCGCACCCGAGCGCATCTCCGTCGCCGCTGTC -GGGACTACCCTCCTTGGCACGTTAGCGGATAACACGCCATTTTTGCGTGAGGTGCGCCAG -AATAGGCGGTTGAAGCAGGCCGTTGAGGCTGATTCGGCGTTTAGCTCGGTCGAGAAACAG -GTTCTGTCTTCGTATGCGGATTCTAGTAACAAGGATATGTTATTGTCTATTGCCACTGCT -GTGGCGGGCACGGCTGCTCTCGTTGGGTATATGGTACATGTAGGCTTACTTTCTTTTTCG -ACTGGAGGTGTCCAGGAGGAGGAAGAGTTTGAGGAAGATGCGGGTGTGTTGCAGGTTGAT -CCTGGGTCTGCGGCTGACTTCCTGGGCGCGTTCTAAGCATTGAAAGCTCCTTGCGTCTGG -TCCCGTGGACGTTAGGGGATTGATGTTTTTTTTTACTATTTGGGGTAAAAGCGAAGGCTG -TACGATAGCATTGCGTTTTGTCCTTTGTACTTGTGTGTTGACCTCCGGTTGCTTGCCACA -CCAAAATTGCAGGTCTTGGGTCTGGCCCACTACTCAAAACATCTCCTTTGTTTTTAGCTT -TATAATCACATTCAGATTGAGATCAATTTGTTTACTATGATAGATGATTGAGTCCTTAAA -GAAAATCCTGGTGACATTCATCTTCGGTAGATATCACTGCATGTATGCTGGCATTCTTCC -GAAGCCCTAACTCTGAACTTGACATCTCAGATTCCGCCACGGGAATTCTTGAAACTACCT -AAACAGTGTACTGAGGTCATAACTTTATGCATAAGTTTAACGTTTATGACATGTTCAAAT -TTGGACTTCATCGGTCAAAATGGCAATACTGTAGCTAAAACATGCACAGATCATTAGTTG -AGCCTCATGCAAGCCATACAATTTAGTGCCGAGGGATCATAACATATCGGATTTGGGGTA -GTGTGACTTGGCAATAGATACGGTTTTGTTTCAGGTTCATGATCGACCAACTGGCGTCAG -GCACTAATGATCCTGAGAGACTAGAGGGGATAGTGGACCGATGGCACACTGGGGTAGATG -TCAATTTGTAGGTATCGGCGCTCGATGGCCATTCAATACAATAAAGTATACATAGTAACT -AGATTTAGATGAAAGGAAAAAGGAAATGATATCAACTCTCAAATCAGATGGTATATGTAC -AGAGAAGAAAGACGAAAGGACGCGAATCATGACCACAAAGATATGTATGAACTCCTAGAC -GGGAAAAAAGGTATCCATCGAGGAGCCGCTCGAAGATGGAAGGATGTGTGGCCAGGATGG -TTGCTGGGGCAAGCGACCTGCGCACGCACACATCGAATCAGGGAAAAGATACAAGGACAC -CTTCGGCGTTGAACCATTTGCGCAGGAAAGACGAAGTCTCAGTAGAGAGGATAAAGTTGC -TGAAAATAGGACGAGATGGGAACACTTGGCGTGACCGTTTAAGAGGACCAGCCCAGGTTG -ACGACGAGGCGGACGGGACCAGCGGACTGGGAAACGGCCTTGTCGGGCTTGCCTTGATCG -GCGGAGGTGTAGGCGCTGACAACGCTGGAAGCACCCTGGGAGATAGAAGAGCACTTCTCA -CCCTCGTGGTTGCAGATCTGCATACCAGCAATGTGCATGTCGGCGAGCTGGGCAATGATA -CAGGAGACGTCCCAGCCGGAGAAGCCGTCGTTCTGGGAGTTGCTCATGTCGAACTCACCC -CAGGTGGAAGCGTACTCGCCGACGACGTTGGTAGGGAGGCCTTCGCCTTCGGCGGCACCC -CAACCACCCTGAGAGTTATCATCGATGGCGACGAAGGTGCTGTGGCCAGGCTCGACAGTG -AAGCTCAGAGCCTTGTTGGGGCTCCAGAAACCGTTCAGACCACCGCTGGGGCCGTACGAG -TTCCACATGACGACGGTCCAGGCCTTGGAGTTGTCGTTCTCGAAGCGGATAACGTGCTTG -TACTCGCTGGCGGTGTCCTCTTCGACGCGGATCATGTTGCTACCCCATGGGCTACCGATG -TTGCCGATGTAGTCCCAGTCGAGGGAACCAACGCGCTTACTGGCAGTGGAGGAACCGAAT -CCCTTGGTGGAGTACTTGCCGTCGGAAGGGTAGTTGGCCCAGTTGCTGGAGCCAGAAGAG -CTGGAGCTGGAGGGGCTAGAGGCAACGGGGGTGGGCTTGACGGTCTCAACGGCCTCCTCG -GTGGTGGACTGGGCGGCAGCACTGGTGGTTGAAGAGGGAGCAATGGTAGCCTTGGGGGCC -TCGGTGGTGGAAGAAGCATCAGCAGAGGTCTCACCGAACCAGTTGTTGATCCAGTGGACA -AGCTTGCCGTCAATGGTAGCGGCAACAACATCTTCACCACGCTTCTCAACATTGGCAATC -TGAGGGGCAGCAGTGACAGTAGGCTCGACGTCAATGTCGACGACGTCTCGGAGGTGGTTG -TGACGGCGGTCGTGGCCGTGAAGACGAGCGACAGCCGACCCAGCGGTCAGCGCTGTCATC -AACAGCATCGAGTTAGAGAAATGCATCTTGAAAATTGAAGTAATGAGTGACTTGAGCGAA -AGCTCTGGAAATTTCAATTCAAAGGAATGTGTGTGAGGATGAAGAGAGAAGAGAGAAGAA -AGTGGACTGGGGGCCTATTAGTACAATACCAAGTTACACACAACGGGGTGTTAGGCTGGG -ACTTGAACCCAAAAAAGAAACTCCAAAAAATCAGGGGCCATCGATTTATGATTGGTCGGT -GCCTGGCGGTTCCTGGCCGTGCACCGATTTTACTCGATTTGATTGGTTCCAAGACTGGCT -GTCAAGGGCCCAGACTCTATTCTCCTTGGTATTGTTTACTTGGCTATACTATGTCTTCCA -GGCTGTGCAGGTGGGATAAGGGGTTGTCTCTCCTATATTCGGTATTTTTATATTCTCTCC -TACTCCGTGCTCCGTATTATTATAGAACATATTCTATTATTATGATTCCTATTATTGACA -TTTGTCACTGATTTATACTTGCCTAATGTCAAACACGTGTCCCATCGGAATGCTAAATAT -CTCGAAGACGTGGACACTAGAGCCCAACGTCCATATGAGGGTTTTAATACCAAGGCATTT -GTAAAATATCGATACGATGCACAAAGAACGTTCAAACTTACACATACACCTTAAGACGAC -TATCCCCGTCATCCCTAATTCCCCACATAAACCAATCGGCGCAGGTGGACAGTAAGGATC -TGAACTGTCTTACGAATTCCAAGGCAATCTGAAATAACAACTCACACTACAACATACGTG -GACACTCGTTAACACCTTATAAGTAGTTTAGTACCCAACTCATTACAACATACAGCATAC -AGCATACAGATTACATTGTACTCCCCGTTACAATTAATATAAGGATTGACCTCACTCTGT -TAATCGTATCCACGGATGTGCGCATGAAAGGGCGAATCTCTGCCGTTGATCTGCGGCCAG -ACCCCTACGCGGCCGATGGGTCCTCGATATTTTTCTGTAAGGCACACCCATTTGTATATT -CACTGGTGGCGTCGAAGCCGTTCTCCACCCACATGACGAGTCTCCGGATTGTCTCCATAA -TAGCAGGCCGAAATGGCCCGTCGGGAATCCAGGCCTTCTAACTCAACTCGGGCTGAATTT -TGAAGTCGTTTCATGTGTGTCTCGTCTTACTGAAACTTTGAACACACTCGGCTTTCCAAA -GATTCCGTATAAAGTACTCAGTACTCGGTACTTGGTATATCGTAGTATACTGTATCATCG -TATCAACAGTGTACGGAGTACGATTTTGCCACAGGCGTTTCCTGAAACATTAGGGTTATT -CGTGCAGGAACCCCTGGCAGACACCTTGATGCAGGTACCCACGACTTTGAACACTTAAAT -CACTCTAATCACATTAGACCCCTAAAGCCCCTGGTGAAAAAAAAAAATTGTAGAAAATGG -GCTTGTCAATAAACAAAACCAAGGTCGACACGGGAGGATCCTCGGTACGATCGATCTACG -ACTTGTCCTATTGGCTGTGGGAAATGAAATCTGCTGTCAGTGTACCCGCGAACCATAACC -ATAAAAGAAAACCACATGGCTCTCTATATTTGCATGGTAATCTAAATTAGACCCAGATAA -TGCACATGTTGTTGTAAACAGAGTATACAGAATAATGATTTCAGTCTCTCGAACATCATG -AATGTGGCTGAAAAATATAATCATACGACGGGTGGAGACGATACTAGTATTTACAAATCT -TCTCCAAAGGGGAAGTTGCCGTTTTCACAGGCGTCCATTTATCTCAACTCCACGCCACCG -TCAAATACAACACCACCAGCGGCCGCATCCAACTCCAAATTATCATTCAGCCCACAAACT -CTAGACAACATCTGGGGCGTCAACTCCTCATCAAGACCAGGCAACCCATCGCGTTTTAAT -CTCTCCAACCCCTCCCGCATAGCTAAGAGAGCCGGGCCCAAAGTCGCAAGTGGGAAGATA -ACAATCCGAAATCCAATCTCCTTAGCCTCCTTGGCAGAAATCGAAGGCGTGGACCCATGT -TCCACCATATTCAACAACAAAGGCCACGGCGCAAGGTCGGCCACAACCCGGTGGGCCATT -TCCCTTGACGAAATGCCCTCTAAGAACCCGACGTCCGCGCCCACGTCTCTAGCCGCCCGC -AGCCGCGCCAAACTCTCTCCATAGCCATGCGGCTGCAACGAGTCTGTCCGCGCAATGATC -ACGATATCACTGCCTATGCGTTCGCGCGCCTGGACAGCCGCTCGGATTTGGCTGGTGTAG -GTATCCGTGTCGACGAGCTGCTTGCCGAAGAGGTGTCCACATCGCTTGGTTTGTACTTGA -TCCTCGATGTGGAAGGCCGCGACGCCGGACCGCGCGTATTGCTCCGTCGTGCGGGCTACC -ATGATCGGCCCGCCGTAGCCGGTGTCGGCGTCGGCAATCAGCGGTGTGTACGGGGAGAGA -TTTGCTAGCATATCTGCGTTCGCTCGCATCTCAGTCAGCGTGCAGATTCCGAGGTCTGCT -TGGCCATGTACCGAGGCTGCTGTTCCGGCGCCTGTCTGATTGGTGGTGTCAGCTCGGGGA -TTTTGACGGGGACCCCTTAATCATCTTGCATTGCATACCATATAAAGCGCATCGAAGCCT -GCCGCCAGTGCTAGTCGTGCTGACACTCCGTCATAGACTCCTGGGGCTGTGATGAATACC -TCAGGATCCTGCAGAGATCGCCGCAGGCTGGTTGCTGCTGATGGTAGACCCATGGTTGTT -GTCTTATGGTAACGGAGATCACAGTGACCTGTCGAGCTGATCCGACATAGATATAACTAT -TATCTAGGTGAAGACTTAGGTGTTGACATACGCTCACCTCAGAAAATGAGCCCCACAAAT -AGCTTTCTCCCCGAAGGTAGGACGTCATTAAAGGCATTGGAAAGCACGTGAGACAAGCTT -TCGCTCATCATCTCTACACTCTTGAGTTCCTCTAGGCAATTGCAATCCCTCGATGATGCG -CAACTCTCCACTTCTCCAGAGCTACGGTGCCGATATAGCTCTCTTGTACCATCGCCCTAT -ATCAACGTTCCAGGGCACATGGACAGACATCTAGAAGATATACCTAAACTCGGTATTGAT -TCGACGAAAATTAACTGGCGTAGATGTGAGTGCCAGAAGTTGATTGCTGCGGTATTGAGT -CAATTAACCTAGGAAGATATTCAGGCCGAGGATCTCCGATGTCACAATTATCAGAAAGGA -ACTCAAACGGCTTGGGAACGTTGAACATATAGGACTATTATGAGCTTCGACATCTCAAGC -CGGGTACTTACCCGAATAGTAGAACTCACGCAATTTCCACCTATATAGTATAGTCTATAG -TGGTGATATATGACCGAAGCccttccatcctcctttgctccttcctatttgtccgttcct -tcctAGTAGAGAGCGGGAAACATGCAAGATTGGCCCCGCCCGCCGATCTGCTTAGTCATG -AACTTCCGTCGGTCCCATCTTGAATTCCATTTATCCCATCTAAAATGAACGTTTTGGCGT -TCATAATTCTTTAATTCCCGGTCCTCTACCCAATTATCTACCTCGATGGCTCGCTCTTCC -GTCGTTCAAGAGTATGCATCTCCCTCCTCCCCGAAGCCGACCCTTACTCTCGACCAAAAG -TCCACTGCCGAGCTCCAAAACCTACAAATAGCTCGTCCACAAGGCTATCGGGTATCATGG -CACGCCAATCCAGCCGTTGAGCCCCACCACTTTGGCCAGTCACATCCAATGAAGCCATGG -CGCCTCACGCTCACCAAACAGCTCGTCATGGCGTACGGCATGCACCACGCCATGGACCTG -TATCTCTCGCGCGCAGCGACCTATGAGGAAATGGCCGACTTCCACGAGAGAGACTACCTC -GACTTCCTGCAACAGATCATGCCCGGCGATATGGACAAGCCCGAGCAAGCAGACAACGTG -GTGCGCTTCAATTTCGGCGACGACTGTCCCATCTTCGACGGCCTGTACAGCTACTGCTCG -CTATACGCCGGCGGCAGCGTCGACGCCGCGCGCAAGCTCTGCAACAACCAATCCGACATC -GCAATTAACTGGTCCGGTGGCCTCCACCATGCCAAAAAAGCCGAAGCAAGCGGCTTCTGC -TATGTCAACGACATCGTCCTGGGCACGCTCCAGCTCCTCCGCCACCACCCGCGCGTCATG -TACATCGATATCGACGTACACCATGGCGACGGCGTCGAGCAAGCTTTCTGGTCAACCGAC -CGCGTACTCACAGTCTCCTTCCACAAATACGACAAAGACAACTTCTTCCCGGGCACTGGT -GCACGCGAAGACACAGGTCCCGCACACCCACTCAACCCAGGCGCCCACCACGCAATCAAC -GTCCCGCTCAATGACGGCATAGACGATGAGTCTTACATCGCCCTCTTTAAAGAGGTAATT -GGCGCATGCATGAAGACCTACCAACCAGGCGCAGTCGTTCTACAATGCGGCGCCGACAGT -CTGGGCTGTGACCGACTAGGCTGCTTCAACCTGAACGTGCGCGCGCACGGCGCCTGCGTC -GCATACACTAAGACCTTCGGTCTGCCCATGCTAGTCGTTGGCGGCGGCGGCTACACACCG -CGCAACGTCTCTCGCGCGTGGGCGCACGAGACGTCGATCCTGCTCGAGGCAGATAAGATA -ATCGACCCCACTATCCCGGACTCTGTCGCGTTCCGCAATCACTTCGGTCCTGATTACTCG -CTCTTCCCGCCCCTGTCGGAGATGCGCAAGCTTGATAATAAAAACACGCGCGCGTACCTT -GAGGGCCTTGTCGAGGCTGTCCATGAGCAGCTGAGGTATATCAAGGGTGCGCCCAGCGTG -CAGATGAGCTTTATCCCACCGGATATATTGGGTTTGCGTGAGGACACCGAGAAGGAGATT -GAAGAGCAGACAATGCTGATTGAAGAGGAGAGAGAGGAATTTGAGGGTGGTGGATCGATG -GCTGCTGCGGATGCGGCGGCGGCTGGTTCTGGCTTGGCTTACACTGGCTCTGCGGGTGCT -ATCCCGCGAGGTAATCGGCGACGGGATCTGGAGCGTGGGGCTGGATACAAGGGAGAGTTG -TATTCATGATCTTTGTGCAGCCTTGTGACGGGTAAGTCACTCCCAGGAGTAAAGCATGGT -TTGACATGGCGTTTTGTATGTGTTTGGATATTGGAGGGGAGTTTCGGTTGTCTTTGTATG -GTAATATCTGGTGGCTGGGTAGTTGTCATCTATTGTGATATCCCTCTCTTCTAGGAGAGC -TTTGTATAGTAATGGACAGAGACACAAAATTAATTGATCAAAATTCAAGCGGACTTGAAC -AGCAGAGGCATGAATCTTGGATATCAAGAGGTAAGTGAAATTTGGGGAGATTAATCAGAG -AGTTAAACACCAGACCCTAAGTGTATAAGGTGCCGAGAGCACCCAACTTCTTTCATATTA -TGACTATTTTTGTGTTTTGATGGATTATCTCATAACACATTTACACTGCATTAAACCCTG -CTATGTACCTCTTGTGTCATATATGGACGTGTAGCATAGAGGATATACTCAGGTTCAGGT -GAGATTTTAGGAAATTGTTTGAGATTTTTTGAATTTTCAGATTAAATCAGATCGGGTATC -CAGCAATGGGAGCGGCATGATGACCGAGAAAAACGCTGTGTTGAAACATCAGAGGGGCAG -GGGGGAACTATCAGCCGCCTCGAGTGGCTGTGCTGGGTATATCTGCGCAGGTTGACTACA -TCAAGGCagagagagagagaaagagggagcgagagggagCGTGATATCACTCAATGATCA -GGTGAAAGTGAGAGAGGTAAGGCGACCCGAGCAGCCGGGTTCGATTATGTCGGCCATAGC -ATGGACAGGGGCCAGAGGGTGGACGCATGGGGCTTTTTGGCGGCTTATGATGTTTCCTGG -GCGTCTACAAACTTCTCCGCTCTTGATGTTTGAGGATCTGTTCGTTCCACGGTGCTTTGG -GTGGCATCGGGGTTTGTCTCGGAGCCTGGGGGTGGTGGCATTAGCTGGGTGTGGCGGTAT -ATGCCTGTATTCCTAGTTCGCGGAAGCGGAAGGATGAGGGTGATGCGGCGGGATTTGAAA -TTGAGATGACTCCTTCTGACCTTGGACGATAGGTACGATTGAGAGGATTTGGTTGGTAGC -GACCTGGGGATCGCTCATGTCACTTGAGACTGTTAGTTATTAGTCTTTAGGGTTTGAGGG -ATCGTGCTTACACTTGAGCATTAATGATCCAATCAAGGTCTTCCTCGCCCTCATCGGGAC -CACCGGGACCACCTTGGATATGACGGAAAAGTCGAGGACGCCATTCAAGTCCCTCATCGG -CGCGCTTTGCCGCCTCCTCTCGCTGGCGGTCTTCGATCTTGGTCTTTGCGTCCGTTGCAG -CTTCATGATCCCGGGCAATAATCGCTTGGACAGTGTCATGCCATAGTCGCTGCGATTCCC -GCTCTCCCTGCTCCTCCATGGGGCGGACCAAAGGAGGGGTAGCCTTCGCGTGTGTTGCAT -CGAACAAGATGTCTTTTTTATGGGTCTGCTATCGGTCAGCATTGAATCCGGAAGGAGCCT -TGAGTATGCGCAACGTACGTGAATATCCTTCAGGTCCATCTCCCCATTCCACATTCCAGA -CAGTTCATATAGCACCTCTCCAGTCTTTTCGTTCTTGATAGTTCCCCCAATGGCGTTATA -AGTACCGGAGAAATATCCTTTGGTTTTGAACTCGATATCGGCCACGAGGTGATTCTCTGG -ACAACGCACGTAGCTGTGATCGCCAAGTTCATACCTCATCTTTCCGAAAAGAATGCCGCG -CGCATACATGTTGGGTTGGGTCAGAATACTACTTTTTGTCAGTAACCGAAAGGACTATAA -GGACCGACGCAACATACTATCTTTCGCCCTTTTCTTTATCTCCGTGGTTCAACAGCCGCA -GAATGGCGATACCCTCCATCATACTGGCGGCAGAGTTGCCCAGGAATTTACTGCGGGGCT -TGAGCGTTCCATCAATACGAATTTTATGCTCCGGTGCTGCAAAGAAATAGCTCGACTTGG -GTGGGTGGTGGGAGGTCTGTTCGGAGATGTAGTAGGCGCGGGTTCCATCGGGATAGTCCC -AGTAGCAAGTGAAGGTCTCTCCCAGAATGGGATTGAGCGGCTTCTTCACTCCTCTATGCA -AATATTAGAATATATTCCCAGACAAACAGCAAGCGGGCGACATACGGTGGTTTAATATGC -CATCCACTCAGATAAAATTTAACCACGGAGACAAAGCGCTCAAGGGGATCGTCAATGGTC -GACATAGGAAGGAGTGTTTCAGGGTGGGCCATGAAGCTATAGAGATCTTTTTGTCAGTTG -ATATATCTTTACATGTTATCCGGTAAGCCCCAAGCTGCCCGCCTGGAGCTGCGGGGTGTA -AAACGTACTTGGTGATCCGCTCCAGCATAGACCGGGGCTCGAGAATGAATGTCGGCAAGA -CGACACGACTCAGATCAGCACCCGGCCGCAACTGAGAAATGATATGAGAAAGAACTAAAC -CGATATATCAAGTCAGCTTGTCTCCGGGGACTAAGTGTGGATTTGCACGTACCGTTTCCC -TGATCGGGCTCAACGACCTCATCCTCTCCACTTGGCGGGGAATCTTCCATGGATGATCTG -GATGATCTGCCTGGATCAACGTTAGTTGACATGTTGGAATTATTTGATGTGTGATAGTGG -GATGAAAAGTTACGGGAAGACAAACTTACTGGCTTGGGAAGCACGGCGGAGATGCGGAAC -CGGCATGACTGCGTTCGGAAATAAGCTATACCACGGAATTCCGCTAAGCTCAAGGCATTT -ATGATAAAGAAGATATTCAGAGGAATAGGAAAAAATAGAGAAACAGATGCCTAAATAAAC -AGCTGACTTGTTTTGGCGGAAATTCCGGAAACCGGGCTTTTTTTTTTTTACTATGGAGNN -NNNNNaaaaaaaaaaaaacacgaaaaaaaaTAGGTTTGTGTAGTTCACAAGGAAAAATGA -CAGCACAAAAGGACACTAAAAGACAACATATTTAATATTCATACACATATTAAATGCTTA -GCATCATGGAGGGGCTAGTGACGTTTCTGATATTCTGATATTCTGATTTTCTGACTGGGA -TCAGCTGTAAGGTGCTTATTCCACCTATACTTACGGAGTACATAGGTAGCTATACCTAGG -CTATATCTGGGGTATCTACACTCCCTTAGTGATCCAAAAATGATAGAAACATATACTTCC -TCTAGCTTTCTGTTTCATTGAACGTTATCCCCTTAAAGATCGTCAAAGAGGGGCAGTCTT -GGCAGGTAATTCCCGTATACAGGCCGGCCCATGTTGTAGACAGGCCATAATGCATAGGCA -ATGTACAGAGTACCTTGCACTGAACATATGAGAGAACTTCCATTCCCTTTCTTTAATCTA -TAATCTAGTAAACAGTGGACATTCGGTCAATCAGTGGTACATCTCCACGGGTGATCTCCC -TAACAAATTCCATATCGGGCCGATGGAGCATGACGTGGAAGTGTTCTATCCCGGGAACCG -ACTTGAGTGATTTCCAGTTCTTGAACCAGACCTATGTTGTCAGCTTTTCATCATGGCAAT -GCTGACATCTCTCATACCTTTTCCTCTACCCGTGAGAATGTCTTCTGCACATAGCTCTCT -ATGGCCTCTCGTGCGCCTGGCGTCAGATCATCTGTGGCTGGATCATCTTCCAGTTGGAAC -TTGGTCCACACTACCAAATGGACAATGTCCTTATCCACGCCATATGGCCAGTCATTGTAT -ATGATTCGAATGTCCTCTATACTTTCGTTAGTATCTCACGTGTTCTACCCCCAAACCAGG -TGACCTGTGCCATACCCTCGTATTCAAAGGGGCCACCTCTTGGCTTGATGTCTTCCACAT -TACCATCCCCCCAGCCTAGCCTTTCTTTCACCACAAATCTCATGACGGACCTATACTCCG -CCTTGATCTGCGCCATATATTCCAGGTACTTGCGCAGCTCGCTTGGGACACGCTGGAACC -GATCGATCCGATTGGTGCCTGGTGACACTTTATAGTTAGTTAAGTATACCACTGACAATC -AGAATCCTGCACTTAACTTGTGATCTCCTTCACCTGATCCCAATTCTGTCGGGTATAGAG -GTAGTCAGGGGTCGAGAGGCATCGGATATTCTTCGGCGTTTGGTCACGTAAATAACTAGG -ACATTCGGCCGTCCATTGCGACCGAGGTAAATTGACGAGCCAATATGGGAGGCTTTCGGA -GGATAGGCTGCGATCGTTGAAGTGCAGGGCTGCATCGGCATCGGTCAATCTCCCATCAAG -GATGTTGATTGGTGCCGTTGATAGATTCATCGAGTGAATCATCGCGACTAGATCAAGTCC -TGAACTGATCTAGTTCGGTCCGTACTAGAATGTGATCTATGGCTTGAGTGGTGAGTGTTG -AATCTCCTCCCTTGGGACAACGTCACTCTGCAGTACAGTGCCGAAAGGGGGCATGCGGTT -CCGGCAGTCACTTGTCGATCACGTCTCCACCGATCTATTTTTATACATATACTACTGACT -TTGGAAATTGTTTTAAACCCACAACATAGGGAGTATACTCCGACACTGGGGGCTGTTTGT -TTTCTCTAATCTGAACACGGTGTGTGGAATCATGAGAGAAATCATATAGGGTGACTTTGG -TCAGGGCTATGAATACAGAAGTAACGTCTCTCCTCCCTCTCTTGTCCATCTGCATAGGGA -TGCACACACTCTCTCCATAGCGAACTCCTATTGTGTGACAATGCTTAGGGTAACACAATC -ACGTGAAGATATATTGCTTTCATACACAGTGCTATCATAAGATCTACAGCTAGGCAGCAT -GGAACTTGAATCCAACACAAGACACAGAAACATCAACATCCGATCGGGAAGATTCACCTT -TGAATAAAGATAGGATTTAAGAGAATAAGACAGATCAACAATTCCCCCCCAATTGCCTGA -TGATGGGGTCGTCTATAGAGATAATCGACACCCCGGCAGTCGCAAGTTCCTCCTTGGTGG -CATCCCAACCAGCAGGCACGACACACTTGGTTCCTTCCTCGATAACCCAGCTTTTGAATC -CAGTCTTGGCAGCATCAAGAGCTGTAGCCTTGACACAATAATCCCCAGCCAAACCCACAA -CAAATACATCTGTAACACTTTGGGCTTTCAGAGTTGTGGCAACATCTAGGCTGACCGAAT -GTGCGTTGGTTCCATGATCACAGTTTCCGAAAGCATCCGCAAACACAGAGTACATCTCGA -CTCCTGGGAGCATGCCTTTGTGAACAACTAGATGGTCGCTGGAGGTGTCAATCTCGGAAA -TAATCTCTGCGCCGGGGGTACCAGCCACACAGTGGACTGGCCAGAGTTGCTGTGGTTTTG -TTTCTGATTCCTTTCCTACGGCGGGATTGTTTATTGTAACGAATGAGGTAAAGGGCTGGT -TGTCAGGCCCTGGATGGTTACTGGCAAAAGAGATATGGTCTTTTGGGTGGTAGTCTTTGG -TCATGATGCGTGTGACAAATCCTGGCTTTGCAAGTAGAGTGTTGATTATCGAGGCAATTT -CGCGGCCTCCTGTCACGGCGAGTGACCCATGCTGTTCTGTTTCTCAACATATGTGTCTTC -TGTAGTATATATTTGACTCACTGGAGGGCAGAAATCTTCTTGCATGTCGACCACAATCAG -GGCTGGCCTGATTGCATCATTTGCTGCCATTTTGGGGCTAGAAATTCGTGAGAGTACCTG -CTGGTGGAGGTTATAGACAGGATTATGAAGAGGCTTTAAGTCTTGGAGGTGGTAGATTCT -GATTCTACTCTCTGGAGGAAAAGAAGGGTGTAGAGACCTGATCAAGACAGAGAAGCTTCA -AGAAAGGGATGTGTGTTGTAAACGATTACGATAAGCTGATATTTGTCAGGCAGAATAGGA -GCTCCCATGAGCTCCGTGTCCAGGCGGAAGGATAAGCGGTATGCCAATGACTAAGCCAGG -GCACTAACAAAACGAAGCTTTTCAACTTCCACCAAACTCGCTTTTCTCTCGTCTCTCAGG -GCTACGGCCCGTGATAACTGGTATGTGTGTCGTGCTACCCTCTCCCTCCCTCAACCTGAT -GAATACCTTATTGGAGCTGTCTGAGCTTTTGCTATGCGGATAAAACTTATACGCTTTTTT -CTGAACTTGTGTTTTCACATCTCATGTCACAAAAAAACCACTTTTAAATCAGTCGCTAAT -TACCTCTGATAGTCAAACAGGATATTGTAAATCTCAAGAAACCCTCATGGTCATCGATTC -CAAAATACCTAGAAGCACAAACCCTTAGTCTCTCTCTATTATCATTTCCATAGTCTCCCC -TCAAAATGGCATATGATCCTCGCCGAACAAACTCGGGGacctcaactccccaacctcctc -cccctccccctccaccaccAGAGACCTCCACACAAGCCGACATGGCCTCAGCAGCCCAAG -CCCTAGAAACATCCAACCCGGCCAACACAGCCACAGACTCGTACAAGCTCAAGTTCTGCA -CAGTCTGCGCCTCGAACAACAACCGCTCAATGGAGGCTCACCTCCAGCTCTCCGGTGCCC -CAACAGCCTTCCCCGTGATATCCTTCGGCACAGGTTCCCTCGTCCGCCTCCCCGGCCCAT -CAATCACCCAACCAAACGTGTACAGTTTCAACACAACCTCCTACAACCAAATGTACGAGG -AGCTCTCAAGCAAAGACGAGCGTTTGTACCGCAGCAACGGCATCCTGAACATGCTCGACC -GCAATCGCCAGCTGAAGTGGGGACCCGAACGGTTCCAGGATTGGGTGCCTGGCGTTCCGC -GTGTCGACCATCTTTCCAAGGGTGATAAGGGTGCGATCGGAACAGAGGGCGGCGTTGTTG -ATGTCATCATCACATGTGAAGAGCGCTGCTGGGATGCTGTTGTCGATGACTTGATGAATA -AGGGAACATCGCTCAATTACCCTGTCCATGTATTCAATGTCGATATTAAGGATAATCATG -AAGAGGCCTTGACTGGTGGAAAGGCGATTCTTGATCTGGCGAATCGCTTGAATGAGGCTG -CTGTTGCGGAGCATCGTGTTCATGGCTCTGAGGGCTGGGAAAACGGGACCGGTCCTGCAC -GCCAGAGCTTTGATGAGAAGGTCCCCGAGATCTTGGCGGATTGGCAAGAACGATACCCCA -ATTTGCCGGCACTCTGGACTTTAGCTTGGCTTTAGTTCTGTGTCTGCGTTGCGACCTGGA -TATCTATTTACCTACGGCCTTCCTAAGGGAATGGTTTAATACCAATGATTTGTAGGTGCT -ATGGCTATGTTATCCAGATGAATTGTTTTTTTTAGCGTGGTCAGATATCGCATTATTAAA -TGCATGTTATACAACTAGAAGGGGGGTATCAAACAGGAAGGAATAGCAACAAAAGCAAAT -CCACTCAAAGATCCTGGCTCTCCTCTTTGGGGCCCTTCTCATCTTTGATCCATTCATCTT -TCACATCCATCTCTTCAGGAAGCTTTTCCTCTTTGGGCCACTCCTCTTCAGGGGTGTTTT -GGCCCGCAGACAGTAGTATCTCTCTCCTCTCTCGATACCACTTGTCGAGACAAGCTCTCC -GCTGCGCCTGGGTCATACCCTCAACAACGAGCGCAAACTCCGTAGCATCAGCGTATCCAA -ACCGCCGCGCCATACTACAGAACTGACGACGTATAACATCCTCCGGAGGATGATACTTGA -ATCGCACAGGTTCCCCTGTCGGCTGAGAGAAAGGCTGCGAGGACATAAAGACCTGGCGGT -CACCTGCCGTGTCATTTGGCGCCAGTTCAGCACGACGACTGAGGTCTTCCTCGACACGTG -AGGAACCGATCACCTCATTGTTGAGGTGGGTGTATTCCACGCCTGCGCTTGCGAGAATCG -CCTGAACGGGGTCGTGTTTACGCGGTGTCGGAAGATTGGCTGAAGGCTTATGGTCTTTTT -TGTCATCATCTTCGCCACAGATCATCGCCGCGAGTTGGCTCATTGCTTCATCTTCGGATT -TCATTGGTCGATCGGTGTAGTCGTGGGCTTCTTCTTCTGCTTTGAGGTCTATGTCGACAA -CTTGTACTCCCGCACGGCTTTCGGCGACATTTGTTCTGTTGACGATGTCGCGCAGAACGA -TATTCGTATTCTGATACCCGAAGAAGTTCTTAAGACCAAAGATTTCACCCTTTTGGTCTT -TCTTTTCTTGAACGCCTTTGAAGTACCGACGCTCGGAACTTGCGTTGTATCCAATGTTTG -CCTGTTGCTGCTTGTAGATCTGTCGAGCGTATACGATCTCTTCGATCGTGCCAGCAGAAA -TAAGCCTAAAGACTTCCACGTCGCGAAGTTGTCCTATGCGGTAGGCCCGATCCTGGGCCT -GCAAATCGTACGACGGGTTCCAGTTTGGATCAACGACTACCACTTTGTTTGCCGAGGTGA -TGTTCAACCCAACACCGCCAGCCTTGGTTGATATTAAAAAAACAAACTGTCGCGAGTCGG -CATTGAACTCGTCCACAGCTTTTGCACGGTCATCGAGACTCATAGATCCATCCAGGTAGC -TGACGTTGTAGCTAGTGTGATGGAACAGCATTTGCAGCATCTTGAGAAGACGCACGCTAT -GCGAGAAAACAAGAACTTTGTCTCCATTTGAATGCCACCATTTAAGTAGCTTCCGCAAGA -CCTTCCATTTGCCGCAAAATTCAGGGTTCGCGTAGTTGACGATCGAGTCGCGCGAACGGT -AGAGATCTTCCCATTGTTCAGGTAACGCAAGTTCCAAATAATCTCTGTCTTTTTCCTGCT -TCTCTTTTGGGTCAACTCCTTGTGGGATCAGAATTGCCAGATGATTGCTCAGCTTTTGAA -GCACATTGATAGCAGGGAACACATAGGATTGCCAGTTTGGGGGGTCCCTATCGGGGATCA -AGCGGCGACAACACCAGCCGGACTTTTTGCCCGATCCACACTCACATGGTTCTGTAGATG -TCTTTATATAATCGACGATATCACTGTCAAGGAAGTTCTCGTAGGCCTCGGCTTGTGTCT -CAGTCAGAGGACAAAAGACAACCCGGTCAACCTTTTTGGGCAACTGATCCGCGATCAAGG -ACTTCATCCTCCTGATGAAAAATTGGGGGAGTAGATTCTCGACCAGCTTCTTGGCTGTTC -TTCGTGCTCTGCTGAGCTCGCTCAGAGTGGCATCATGCGATTGTCCGATCTTCAGTGGCT -CTGATATCTGCGCCTTCCAGGTAGACACCGGACCCAGAACACCAGGGTTCGTCCAGTTGA -GCAGGGTCCATAATTCTTCATACTTGTTCTGGATGGCCGTGCCGGTGAGCCCTATCCGAC -ATAACGCATTGATCTCGTTCATAGACTGGGTGGTCCCGGACTTGCGCTCCTTGATCTTGT -GGCACTCGTCGGCAATCACGCAGTCCCATTCAATCATATTGACAGCGTCTTTGTTGTTCA -TATAAGTGGTATAGGTTGTAATCAGGATTTCCACCCTCCCCGACTTTGCCGCATCGAGAG -CGAGGTCTTTATTCTCCCCGTGGTAGCTTTCAACATGCCACCACCCCCAGCGTGCAAACT -CAGCCCTCCAATTTGCAATAAGGGTCCCTGGACACACAATGAGCGTGCGTGGATACCAAG -CGTCGTCGTCGGATCTGCGCATCTTTCGCATGCGCTTTGCGTCCCGTTCGTCACCTGTCT -TTCCATAGGCAGCGGTCAAGAACGCAATCACCTGAATGGTTTTTCCGAGACCCATGTCGT -CGCCTAGGACACCACCCTTTTGGTATACGAATAGCTCGTGCAGAAACGCCACTCCCTCCA -CTTGATACTGCCGCAACCACTGGGCGATCGGAGCAGGAATGATGCCCATCGAGTATGGAA -GTTCGACGTCCTTATACTCCTGTTTTCGGGTGTTAATCGGAAAGACAGGTCGTTCTTGAA -GTTCTTCAAGCCGCTCGTCGTCCGAGAAATCCACATTCTCATAGGTGGGTGGGAGCTTCA -GTCCATGATCTTTCAGATGTTCATACCTCCGGTCAAACTGGCTGCGACGGGTTTTGAGAT -ATCTAGGTAAAGTGTTTTCCATCAGCTCATCCTCGGAAGATACCTCGCCCCATTGCTTCA -ACGGCCGCTTAGACAAGGGCGTTGGTCGCTCTTGTTTTACTCGACCAACAATTGGATCTC -CCCTGGGCATTTTCGCTTTGCGAGTTGCTGCAGCTATTTGTTTGCGCTTAAGGGATTTGC -GTTCCTTGAATTTCTTGTATGCTAGTTCGCCATCGTTGGCGTCATCCTCATCGCTGTCCA -CGGGCTGGATCTTTACAGCGCTTGATGACCCGGAAAGAAGGGTACGCGATAGATCGGACT -TCATTGGTGTAATTGTTCCGTGTTTCACACTAGAAACTCTGGCTGCAGGTAATGTGAGCT -GTCTGTCAGACGCGTCGGAGAGGTGTCATACTGCTAGCAGTCGGAGTCGCATAGTGCGTC -ACCTCTAAGTCGTCATCATCTGAATCAATAATGATGGGCTCCTCAATATTCTCATTATTC -GCCATGATCCCTGCATTTCTGAAAATAGCGCTCAAAGGGTGACTGAGCGCAAGTCGAAGT -GGAAGTACCAGACTGTCGCGACACAGGGTAAATTACAGCAGGCGATAACGGCCACAAAGC -AAATGTGAATCCGGTCTCTTAAGAAAGGCTCAATTGGGCAAATGTTGTAGCATAAACTTC -GTGTTCTGTTTCTTTATCTATGCCAAAAAAACCCCGAGTCTCATTTATAACATCTAACTA -CTCAAAGAAGTGGTTGAAACTGTTCTACATTTATACTTGTTACACCAAGTCACGTGCAAG -AGAACCAGAAAAAAAGATGCTCTCATTGGTTTGGCATTCGATTCTTTGCATATGCGATAA -AGCCAACGTAAGTCGAGGATAATATTCTACCTCTGTCATTACTCGATCAAGACACACCCG -ACTGACCTAAGCCCACAGAGAACACAAAATAAAGCTTGAAATCCTGCTTGACATACCTTC -TTTTCACTCAAGACAACTCTCGTTCTTATCAGAAGCTCAATTCAACCCACAAAGATGCCG -TTGGACACCTCGACAACATATCCCCTGACGCGGCTGCGTCTTGACGGTCGCCGCTGGAAT -GAACTCCGTCTGCTCCAAGCCCAGATCTCAACAAACCCCGCCAGCTCCGGCTCTTCATAC -CTGTCAATGGGCAACACATCAATCATGTGCTCTGTCCACGGCCCTGCCGAGGGTCGCCGA -GGAGATGGCGGCGGTGGAGCTGCAGGAAGTGGTCATGCGGTTGTGGAAGTAGACGTCAAC -GTCGCCGGGTTTGCCGGTGTAGATCGCAAGCGGAGAGCCGGTGGAAGTGACAGGTATGTG -CACCCTCTTACGGCTTCCCATTGCCTTGTCCTGCCACAGGGAGAGGATTAAAGGGACAGC -CGACCAGGTCCAGATCCAGTCAATTCTAATCTCTGCGGACTACAGACAATCATCGCGAAT -CGCTACAACCCTCCGCGCAGCCTTCCAGTCTCACCTCCACACCTACCTCTATCCCCACAG -CACAATTAGCATCCATGTCTCTGTTCTCTCTGCCGATGGCTCATTGATGGCCGCGGCAAT -CAACGCCTGTACTCTAGCGCTCGTGGACGCTGGAATCCCCATGCCGGGACTACTCACTGG -CTGTACTGCCGGTATGAGCGGCAACGCCTCTACACCTCGTGACCCGCGACACGACGAGCT -TGATCCTTTGCTGGATCTGTCGCTTCCAGAAGAACAGGAACTGCCTTCGCTCACTGTTGC -GACGACAACGGCAGTGCCCGTTGGTGAGAACAATATGGATGAGGATGATGAGGCTATGAA -GGTGTGTATCCTTACTATGGAGTCTAAGGTTCACGCTACGTACCTTGAGACAATGTTGGC -GGTCGGAATCGATGGGTGCAGTCAGATCCGCGAGTTGTTGGAAGGTGTCATCAAAGGGTC -CCGGGGGTGACGCAGAAAAAGAAAAAAAATAAACAAATCAAAAGATTCAAATGGGAGAAT -AGTTCAAGTTGATACCCTGTCTTACGTTTAATGTTACCAGTTACGGCCATTCCGAGAATT -TGCTCAAACTACCTCCCACTAGGCTTTTAGGGCCAAATGTATAAATGTGCTCATATTGCT -CACATTAGGGCTCATGGAGGAACTACTCCATTTTCTGTTTCCTTTATTTACATATCTTGT -ACTGCTCTCATCCTGTTTTGTTTCGGTCAACACGAGAATATCTCGACTCAGTGCTCCCCA -AGGTCCCCAGTCAGCAATCATGGATATGGATGGGATGACTATGCCAATGGCCACATCGAC -AAGCTCTGCAGCTATGGCCAGCAGTACCGGGATGAAAATGGGCGGAATGAGCATGGGAGG -TAGCTGCAAAATCTCGGTCCGTCTAATATCAATCCCAGACTCACTATTCCATACACCAGG -ATCAAGCTCACGCATAGTCACGATCAGATGCTTTGGAACTGGTACACAGTTGACGCATGT -ATGCAGCCTGATACCTCTCTACACAAGCCCACAAGCTAACTGATCACGCAGGCTTCCTCG -CCAAATCATGGCACATAACCTCTCGCGGCATGTTCGCCGGCTCATGCATTGGAGTAATCT -GCCTCGTCTTGAGTCTCGAGCTCCTCCGCCGTCTGGGTCGAGAATACGACTCGTTCATTG -TACGTCGAGCCCGTCTTCGAAGACTCTATATGTCGGGCTCTTCCACTGCACAATCCGTCT -CAAACGTTCCTCTGCGGAGTGAAGAAGACACCATCAGGCCGTCTGGCAACTGCTGCGGCG -GTAACGCAGAGCCAGATGCTACGTTCTTCAATGCAGATGACGACATGATTACTTCTGTGT -CGGGTACTCCCCAGAACGGAGCTTCGAAGAAGCAGGCTTCTGCTGCGTCGGCTAATGCCA -TGCAGGATGTCGGTACTCAGCGCATCGAGAGACAGGAGGGCATGTTTGCTCCGTATCGTC -CGTCTTTTGTTGAGCATACCGTACGTTCTTTGCTACATATGGCGCAGTTTGCTGTTGCGT -ACATCATCATGCTGCTTGCTATGTACTTCAATGGGTATATCATCATCTGTATCTTCATTG -GTGCTTTCCTGGGGGCGTTTATCTTCTCCTGGGAGCCGGTGGACTTGAGTAAGGAGTAAG -TCATTCTGTCTGTATACTAGCTATTTAACTTTTTTGTGCTAACATGGCTTTGTTTTTTTC -AGGTCAGATGCTACGTCTGTCACTAAGTGCTGCGGTTAAACAAGTGTTGCATGGCGTTAT -GTTGTCTTAGCCTCAGGCTCCAGTGACGATTCAATTTTACTCCAGCATTTTCAGGGTTTG -TTGTCCCGGTATTTTTTCTTGAAATTCGAAGAGGGCAATATGCAGGTTTCTGTGTCCAGG -GTTTGCTTCCCATATGCCATGACGTATGAACCATCTCTAAGGATCTATTTTCCCTCTACT -CAATTGCTTTCTCCCTTATGAGGAAGACCTTAAACACGGCGTACATGAAATTAACACTAA -TATCACACAATTCGCTTCAAAGGTCTTACTGGAAGGGAAGGCGGAAATCTTTTCCATGAA -AAAACCTGCCGTCTCACGCGGGGTACGGTGATGTAGTTGCGGACCCGCGATCAAAGCCCT -CGACTCACCAGAACATTTAGAGGTTTCTCCTGGGAAAAGAAACGCGAGGGCTTGTAACAA -AGCAGTCTAGATGATAATTTAATGCTTTAGGGTAGAGACATGAGATGTGCATCGGTTGAG -CTGATTTGGGGATAATGGCTTTGCGCCTACATGTCACTAAAAAACAAACATACTCGCTTT -CACAGAGCCTCTGATCTACAAATGGAACATCAAGTCCCCACGACAGAAAGGCATGTATAA -CTAGCTTAGTAGAAACTTTAGATTACAAGTCGCAAAAGGGAAAGCCACCTATGCTAGTCC -CACGCCGCGCTGAAGCGACGGGAAAAGAACCACGTTCATGACGAAAAGAAAGACATTCAT -CCTCCAAGCACCCAACTAAATCTCCAGAGTGAGCTTTTTAGTTGCCACCGCAGTTGACGA -TGGTGGGCTTGATAGCGGAGCGGATGGAGCCGGAGGAAGAGCCGAGAGCCTCGATCTCCT -TGACAACGTTCATGGACTCGGCATCAGCGACCTCGCCGAAGACGACGTGCTTGCCATCGA -GCCACGAGGTGACAACGGTGGTGATGAAGAACTGGGAGCCGTTGCTGAAAAAAAAGGGGG -GATGTTTAGTATGGTTCTGTGGACCAGCTGGATTACAAAATTGAGACTTACGTGTTGGGG -CCAGCGTTGGCCATGGACAGAAGACCAGGGCGAGTGTGCTTGTGGACGAAGTTCTCATCG -GGGAACTTCTCACCGTAGATGGAGCGACCACCGGTACCCTATTCCGGTAAAATAAAAGTC -AGACACCGAGCATTGCAGCGAGCATGATGGTTAGAAGAAAAAAAAACTCACGTTGCCACG -GGTGAAGTCACCACCCTGGAGCATGAACTGGGGGATGACACGGTGGAAGGAAGAGCCCTT -GTAGCCCTCACCCTGCTTGGCGTTGCAAAGCTCAGCGAAGTTCTTGGCAGTCTTGGGGAC -CTCGTTGGCGTAGAGCTTGAAGTTGATGCGGCCAGTCTTGGCTAAAGATGGAATCAGCAT -CAATTCCAATTCCAATTAATGTAACATGATATGAGATTGCCAAGCTTTAATACTGATATT -GAAAGCTGGAGGTTGGAGGTTGGAGGCAGCGATGTAAGAAAGGCAAAAACTCTCTGCCCC -ACTAGGAATGAGAATAACAAAGCTGGAAGAACTTACGGGTAGAGGAGCCAGCAGGGGCGT -ACTGGACCTCGAAGAAAGCGGAGGTGTTGGACATAGCGGCGGTTGATGAAGCGAAAGTTC -GGGCAGCGGGGTGGAAAGAAGTGCAATTgaagagagaagaagagaagagggaagaagGTG -GGGTTATTGACCTCAATCGGCAGAACATGGTGAGGGGCAGGGCGGAGCGGGTGAACTCGG -GTGCCGGGCTGTTTTGGGGTTAGCTTGTTGAAGTTCACCTAAAGACCACTTGTGCACTTT -CTGCCTGAGGCTCCACTTTAGAGCTTCTCCAGACTTATCCAGGGGGAAAACATGCTTTTG -TTGCCTAATGATTTCTCTGGAAGAGTCTGGAGGAAAGTGAAAATGCCCTCTAACATTCAG -ACGACAAGGTACAAATTTCACTTACAAAGGATATAAATTGATGAGGAATGCGAACTTAAC -GAAGTAGAAAGAAGGAAAAAAACTGAAGCTCAATGCCGAAATGCGTCTAGAGCTCCGTGA -TTATGTAATGACTTCCGGGATATCCCTGCCGAACTAGAGCCTTGGCTACAAGAAGATATG -CATCTTTTAAGGGGTAAACTCGTGTTTATTGCTATAGTCATTGAATTGCAGATATACCGA -GCACTTTCATTCCACTTGTATACATTCAAAATGCAAGGAAATAACAGCTCGATAGCAGCA -AACCGTCGTAAAATAGAAAAAAGAATTGAACAATCAACACGAAATCGGGCAGACATGCCT -AAAGGAACAATGGAGAAGGGCTAAAATAAAGTACAAAGAATAAGAGAATGTGAAAGAAAA -AAAAAGTGACGGAAGGGAAAGAAACCCTGTAGATGCCACAGATATCGGAACATCGTTTCG -ACATTGTCCACTCGTTGCCATGTATGAATAGTCGCTGAAAAGCAAGGGGCAAAGAGGAAT -GAGTATAAAGAAGAATTCGAGAAGAATGGAAAAATGGTAGCCCCGATATCATGTCATGAG -AAATAATGCCTTGGATCAAGCGCATCACCGGAAGTCATCGTCGTCTTCCTCGGTAGCAAC -TACCGATGTAGCCTCGAGGCGCTGAACAAGGTCTTTGGTCTGCCGAGCGATCTCAAATGC -AATGGGAGGCAATTTGTTCGTGAGCTCACGCAATTGCTCAGGGGTGGTAGCGTGTTCGCC -CTCATCAGCAGTGGCCGCCAACCGGCCGCGGCACTGGTCCAATGCCGCGATAACTGGTTC -AGATGTCTGCCGCAAAGCGATGTCTCCGCTGCGTTCACGCATCAGGTGCTCTGTAGCGGA -GACCACATTCGCAACAACATTCGAAATGGCAGAGACGTGCGTCTGAACAGTGTGCAGGTT -GTCCTCGGCTCGGATACTAGCCACCAAAGCCTGAATTTGTTGGACCATGCCATCCGTCTG -GTCCTCGACATAGAGCTAGGAGCACCTGTCAGTAAAGGATTCCTCGCAATATGACCCTTC -TCGAAGAGACTCACCTTAAGCTCCAACACCTCATGGTCTTCTTGCACGTGCCCCCACCCA -TGATTGAGGTCATTAGTGGGGCCATTGTGTACAGGGAACTGAGAGTCATCCGGACGGCTT -ATAGCACTGTAGACGGAGTTATTGCTCATCCGCTTCTGACTGGCAGATGTATCGAAATAG -TCAGGGTATTGTCCCTGAGATACATCTGCCTCGTCATCTTCATCCTTCAACTCATCGACA -GGCGATTCCTGGATCTTCACCACCCGTATCAGGTCAACAATAGCTGTACAAAGATGGGAG -GCCGCCGCGTCAAGGAGCGACACCGGAGAGAGGCCACTGGAATTGGCAAAGTTGCGCGTG -GCGGTGATCACATTGTTTGCTGTCGTAGATACCTGGCCCTTGGCCTTTGCGCGCAGAGCA -GCCGACCCATCCACCGGGATTGGGGAGTGTTCCACGTCCTGGAGAATATGGCGAACTGTT -ACAATGACCATCTTGACCTGGTGAAGCACGACATGCGATTCTTCGAAACGCGCAACTCGC -AGGAGCTCATCCACAGACAGCTGGAACTTGGTCACATGGAAACCCTTGACCACCCCGTCA -GCTTGCACCAATTCATGTTCTTTGGAAACAAGACTTGCGTCGGGCCGCGAGTCGGCGAAA -CCACCAGACGAAGTGCGGAGTTGCCGCAGCTGCGCCTTGACCTTCGCGTAGCGGTTCTTC -CATTCATTGGCATCTGCCTCAAGTCTGTGGACCTCGTTCGAGAGCTTTTCTTCGCGCTCC -CAATTGGAGTGGCTCTCGGTTGAAAGAGTCTTCATCTCCATTAGGAAAGTTGCTGCCTCG -CGGCGGACTTCATCGGTCACTTGTTGCTGTTGCTGCAACTGTACTTGTAATTTCTGGTGC -TGGGTCTCTAGGTCAACATAGCGCGCTTGTAGACCCGCATCATCGACGCCCCGAGAAGAG -TCATTGAGCTGGCTGCGGAGATCTCGTTCCATGGTGTCGTGCTCAGTTCTCACCCGCTCG -AGCTCTAGCTGAAGCGAGCTATTGAGATCCTCGGCTTTTGATACCTTGCTCTCAAGGTCG -TGTCGTAATTCGTCCCACTCCTGACGTTCGTTATGACTGATCCCGGAGGAATCTGCATCT -TGTGATGACTGGGCCAACTCCTCATCCTTGGAGCGGACCAATTCTTCCAACTTTTCAATC -TTATCTTGGAGAATCGATACTTGTGTCTGGCTCTCCAGTAACAGTCTCCGTTCACTGTCA -CCCAGGGTCGTTGTAGTGCCCCGTCGGCTCAAAACAGCATCTAGCGCGAACGCATCACTC -CGAGCATCATCCTCATCCTCGGGTCCCATATCATCGCTATCCTCGACCATCGTACTTTTA -TTTGGAACCATCGTATTGCTCTGGAAAGACCGAGGAAACGGTCCATCGCCATTCGCACTC -GGTGGGCCTCCAAGAGAACCAGCACGCGGAGGAAATGGACTGCCGGGGGGACCGCCAGAT -GGGTACCCTCGGCCCATACGGGAAGGAGGTCCCCGAGAAGACTGTTGCGAGGTACGAGCG -CTAGGAGGGCCCATATTTGGAGGTGGCCGGGCGCGGTATTCTCGGCTGGTGAAATGCGGG -AAGCGGCGCTCCAGCTCACAGAAGACATCGGTCGCCAGATCACGGAAACGCACGTGGTGG -AGCGCAGCGAGCTTTTGGCGGGCATGGTTGCGCTTTTCGTGGAAGTCGGGGCGAGCGGGG -AGATAGGGAGGTACGTCTGGACGAGGTGGCCGTCCAGGAGGCGGCGAGGCCTGCTGCCGT -CGAATCAATTCGTCGTAAACATCGGTGGAAAGTTCATGGAACTGGGTTGGCGATAGTCGC -AGTAATTTGTCTCGGGCTTTATTCGACTTGCCACGGTCATCGCGGGCTGTGCCATTCAGG -AATCTTTTCAAGACCACATAGTGCTGGCCCAGTATCTCCTCCATCTGACGATATCGTCTT -CCATCGCGCTGGGTGTCCCCTAGGGTCCCGGAACTGGATCTTGCGGCGACCGAACTGGGT -GGGGATGGGTTGCCGCTCGGGTTGGAGGGTGACAGCGAAGCAGGTAAGCTACCGGGGCTT -TGAGGTGCGCTCGGAATGCCGGAGGTCGGAGGTGTGGCTAGATTGGCTCGCGATGATGGG -AATGTTGGTGAGAATGGTGAGTCCGACTTTCCATACTGATTGAGTCCCGACCATTCGCTA -CCATCAACAGAGACAGGCGACATGGCTCCCGAACCCCCATTCATATCAGGATCAAGCTTG -AGGTTGATCCGACTCCAAATATTCAATGTTTGAAGGAGCTGTCACGGGTAATCAAGGTCA -CTTCACAGAATCCCCCGTATTTCTCCACTAGGATTCCAATCTGAGACACTGGAGCGAATA -TGAGCGCTGATTCGCTCTAGTTCGTAACTCTACCTTGGAAATCTTTGGAACCAGATCCAA -AGGAGTGAAGGACTGGGATATGGGGGAAAAGGGAAGTTGAACTTCCGGACTTTCTTTACG -ATACCCGTTCCTTACAGCGTTGTGCAGATGTATCTATGCATCTACAGGGGCCGATGCGGG -TGGATATGATTTCAGATCTTATTCCTTTTTCATTTTGAATGAGTTTTTTCTTCTAGTCGG -CTCAATATGTATTCCCAACCCATTCTAAAGCTTGATCAGTAGTCAAGCAGAAAACTTGGC -TATACGGTACTTGTATTCGATACTGCCAAGGCGGGGCCTGAACAGGGGGGACGAATTTCC -ACAGGGGCAGACCATAACATAGAGATACGGACTATGGAGTACGAACGACGGAGTACTCCT -TACTACTGCAATATACTTTTTAGATAGACTATACAATCTTGTTGGATTGATATGCGAAGA -TTTAAATTCAAATATCTTATGAGATCAATAGAAGCATCCAACAAAATAAAATTGAGTAAT -TCTAGACCCTTGCCTGCCCCATAATTTTGTACGTGTACCTGGTCCCACATATTATTTGAG -AAAGGCTTGAAAAACAAATAAATTCTTTCGGTCATATCTATGGGTAAAAAAAAAAAACAC -CGAAACAATTAAGTACACATAGAACAACAGCTTCAATACCCGCATGAATGCTTTGAAGCG -TTGGTTCTAGGGCCCTCAAGGGGACGCCTTCGTTAGGCCTCATTCGGATCTTGCCGATCC -ACTGACCAAGGGGGAAAAAAGTCAATCGCCGACGCTATCCGATCTTCGGATAGGACAGTC -TTGCAGAACGATTCCGGGTCAGTTGGGAGAGTCTCCCATCATGATGCAGAGGCAAAAACA -AGGCGATGGGTACGCCACATCTTGCATCGAAAGTGAATGGGATCGGGAGAATCATGGGCC -GTATTCACACATTTACATTGGCCCACAGCGGCGGGCCAGGAGCTAGTCATGTCATCGTTT -ATTTGGAGGTCATGGAAAGGCCACGAGTGGCGCTGACGGCAGAAGACACGGGGGTGGAGG -ACCGTGTGGGGGCTGCGCTGCGGTTGTGGGCCATGTGTGCCTTGACGAGGTGACCGGCGG -CAAGTGCAGAGCACAAGCTGAGCTCACCGGCAAGAACGGAGGCAGCAATGATACGAGACA -GCTGGCGCGCATTTTCGCCGGGGGAGGTCGAGTGAGCACCGCGGACACCCAAAAGGTCCA -ACATGGCACCTTGAGCTTCGAGAATGGTACCACCGCCAATAGTGCCCACCTCAATAGAAG -GCATAGACACGGCAATCTGGAGGTTGCCATTGTTGCTGTTTGGGGAATCGTTGTTAGCAT -CTCAAATGTCAAATAGTAGTGGATGAGAAAGGCTTACTTCTTCATGGTGGTGATACAGCT -GCTGCTCTCAACGTTCTGGGCAGGATCTTGACCAGTGGCCAGGAAAATGGCAGATACGAT -GTTCGAGGCATGGGCGTTGAAACCACCCAAGCTGCCCGCCATCGCACTTCCGATCAAGTT -CTTGCTGACGTTCAGCTCAACCAGTGCATCGACGTCGCTCTTGAGGACACTCTTGACAAC -ATCCCCGGGAATGATAGCCTCTGCCACGACGGACTTACCACGGCCATCAGTCCAGTTGAT -CGCAGCGGACTTCTTATCTGTGCAGAAGTTACCGGAAAGCGAGATGATGGACATGTCCTC -GAAACCACACTCCTTGTGCATGACGTCCAGCGCCTTCTCACAGCCCTTAGAGATCATGTT -CATACCCATGGCATCACCAGTGGTGGTCTTGAAGCGGATGTACAGGTAGGTTCCGGCAAG -GGCAGTCTTGATACTTTGAAGACGGGCAAAGCGACTGGTCGAGTTAAAAGCCGTCGTCAT -AATATTCCGGCCTTCCTCGGAATCGATCCAAACCTTCGCAGCGGCAGCCCGGGCCAAGGT -TGGGAAAGTTACACAAGGACCACGGGTCATACCATCACCAGTCAAGACAGTCACTGCACC -GCCACCGGCGTTGATTGCCTTGGACCCACGACTCGTGCTAGCCACCAAGACACCTTCGGT -AGTGGCCATGGGAATGAAGTAGTTCTGTCCATCGATATTGATAGGACCAGCGACACCAAG -AGGAAGGGGCAAGTAACCAATAACATTTTCACAGCAAGCACCGTGGACAAGGCCATAGTT -GTAATCCTTGTATGGGAGCTTGGAGTTTTCCAGAACGGAAGTGATAGCAGCGGTAGCAGG -AGTCCGGGAGACCACAGACCTGCGGATCTTGACAGCACGAGTAAAGGCATCAACGCGACT -CATCAAGTTCTCGTCCTCCATGGTCTTCTCTAGCGCGTAACCCGGAAGCTTTCCGCGGAG -AGAAAGATCGATCAATTCCTCATCAGTAAGCATCGGTGCACGCTTTTCCTTGAGGAAAGC -CTCGCATTCTCCCTGGGTTCTGGATGATTGTTGTTCATTTGGCTCAAACTTGGGGTAGAC -CTTCGGTGCGACCACAGGCTCCGCAGGAAGGGCCGGGGCAGTCTCTGGGTCCTTGATACT -CCAGCGGGCAGCATTGAAAAGATAGCCGTTGAGAATGATGCTAAGTGTCAAGGCGGCGAT -GATCCACTTGCTGATGATAGGGTCTTCGACACTCTTAAGCAAACTTTCGAGAACTTTGCC -ACCCACCGCATCCAAAAACTGGTCGGTGTACTCGATTTCAAATGAACCAGAAGCGGTAGG -GTCTGCGTAGTGAATGGAGGGGTACTCCAACTTGTACTTGATCGGGGAAAGGATGGTAAC -AATAGTCTCCTGCTTCTGGCTCTTGGCAGCCACGTACACACTGTCCAGGCCATTCTCTGC -CACCTTGAAAGGATCAATTGGAGTAGGAGACAGGACGCTGGAGAACCGGGAGAGCAAGCC -ACCGTTGCCAGTGTTCCGGAAAGGAATGGCTGACATGTTCACGACATTGATCAAAATAAG -ACCACCAACCATAAGGATCTTGAATCGGCGCACATTGCTAGACTTGATCTTGCGACCAAA -GATGCTGGTATCCGCCTCACCAGATCCGGCGCTAGGCCAGTCGTTGCTAGAAGCGACATT -CTCAGCAACGCTGTGGGTGATACCATCTTCTTCCAAGGCCTTGCGTAGCGCCACGTGGCG -CTTGATCCGAGTGATCTCGAGCTTGATACAGAGAATTGTGGTATAGAAGGTGAAGAGAAG -CACGCAGTCAAAGAACAAGATCAAGGCCGCGAGGAAGCAGAACTGCTGGAGTCCTCCTTG -AACGCCAGAGGCCGCACCCAAAGCCAACAGACCAATCTCAATGCAGTAGTGCTGGACGAT -CTCGAAACCTTGGTCCTTGATCGCAGCTCCGATGGAGTCCTGAATTGATCTCGGTGTGCT -CGATGCCAACGGGCGAGGGGCAGCACCAGGAACAGGCCGGCGATTGTCCACAGAAGCCTT -CAGAACGGCCCGAGTGAACATGATCGGCTTCTCGAAACCAATGGTGACAACCAAGAATGG -GAGACCTTCGGACAGCAGGAGGAGGTTGATAGGCACACCAAGTCTAGTGGTCACGTGTAG -ACCGAAAAGGAATGCAAAAGTACCAGAAAGGAGCACGGTAGCCGCAAGCCAAAAGTTAGA -GCCCAATCGTCGCATCGAGAAGAAGAGCGAGACGAAGCTCAGGTGCATCGAGAGGTATCC -AAGAGCCATGATCACAATGTCAATAGTTTCCGCGTGCTTGATCAGATCCACAAAGGAACT -CCATGCATCTGCAACCCAGAGCTTTACAGCTCTGCCAGATCCGTAGGCCGGGCCACGAGC -AGCTCGCATGATCCACTTCTTAGATTCCTCCTCATCCTCATCCGTGGATGGGGTGGGGAT -CTCCTGCACGGCCCTCAAAAGTTCCGAGATTTGGTCAAAAGGCACGGTGTAAACCAAAGA -AGCGTCATGAGTGAATTGCGAGAAGAGGTTGGGGGTGTACGGCACCGAGTTGGCGGAGAT -GTTGGCAGGGATCGGGACATCATCAGCTGCGGGGGCAGTTGAATCAGAGTTGGTGGAATC -AGGGAAGATTAGGGTCGCCAGGGCGAGGTGCTGGGCAACAGGCTTTTCAGTCTAATATTG -CATTGAGTTAGTATTTCTTTCTCGGTGCCCGAGAAGAAAGGGGGGAGTGGGCCCACCTGG -GAGTCAGTCCAGGCGTCTTCCGCCTGCCACTTCCACGAAGTTGATTCTCCGAGGCGCAAA -TTGCGGCTTCCCTGGAGCAGGGCATCGGCATCAACCTGTCCCGCGACACTTCGCAGATCT -TTAGATGTATCAAGCAGGCTACCTTCGAGGAGACCCACGTAGGTGGTGCTAGCGAGGATT -GCAATCACCACGAGTGTGTGGATAGGGTGCACACAGGCCCGGCGAGAGATGGACTGGAGG -CCGGACACAATCTGACGCCGGAACCAGCTAGGTTGGATTTCCCGCTCTTCCTCGGTTGAG -CGAAGGCGTTTGGAGATAAACGAGGTAGCCATGACGGTTCGCGAAGGAAGCGGTGAGGAC -GTCTGTTGAAGGCGTTGAGGGGGTGTTAAGACGGTCCGCAAGGACCAGGACCTGCGAGGA -AAGCCGTCGTGGGATGATGTTTTACACCAAAACCAAAACCAATGCGGAGGGAAATCTTGA -ATGATCCAATTCTATGACAGCATGTGGGCTGCACAAGTTAAGGATTGGGCAGTTTCCGGG -CTATAGAAATTTTTTTCGGGATTAGTTGAGGTTACCAAACATTGATAAGAAAGCACCTAC -CGAGGGAAGAAGTCGAGAAGCTCAAGGCACCCTTGGAATTGGAGAAGGGAAAAAAGTAAG -GGACCCCCCTGAAGTAAAATAATAGTTGCCCGAGACTGGAATTAAGTAACGCCGACGCCA -AGCCACACCAGTGGGGATTTGCTTTTGCGAGTAATTTATCCTTGGGTATTTTATCACAAA -GAAAATAATAATATAGGGATTAAGAAGAATGAGAAGAACAGTCATCAAGGTCCTCGAGTC -AAGCTGACGTTAAGGAGTAGTTTTCTAGGATTGTCTAGGATCATATTGGATCTAGGCGGC -AATATTCTATACAGAGCCTACATATGATACCTGAAAATACCTTGAACATAGTACATATTG -AGGGATATTGAGGACTAGTCAGGCGTATTTGAACGAGTGTTTTGAATCTTACAAAAATAT -CAAGTGAAGTACCTAGGTATGCAATACAGACCCTTTGGAGAAAATTTGCAGTGGCAATCC -TTGTACAACCCAAAGAATCTGGAGACGCACAGGCATTTCTTTATCTTTACCTATGCATGT -CGATCCTTTGACAGAGACACCGCACCTTGAACATACGTGttgaatcttgaatgtctcttg -aatcttggatgttTATTGATATTTATCCTGTCTGAGCTGGACGTACTTACGTGAGAAAAC -CATCAAAGGCTCCACCGGTTTCGTCTGTTACTTAACCCTGTACTCCGTACTCCGAACAAA -GTAGAGGTTTCCACGTCAGCCAACCCTCAAAAAGGAAAGTCGAAAGAGACACTCCGTAGT -CGGCACCTGCGTCAGAAAAGAGTCCACTCAATTTCCCTGTGTGGAATCCACCCGGTGGAG -CTCCGGAGAAATCTGATAAAAATCTGATAACTCTGAATCAAATCATCTGCTCGGACTGAC -CCGAAGCAGACTCCATGTATCCATAATAAACTCGGTAACCGAGCTTGCCAAGCGAAACAG -GAAAAAGATAAGATCGTTTGCTCGGTGAATCCCGAGTGTTCCGGTAATTCTGCTTGAAAT -TTGCACTTCTCATCTCCTTACGTAGGTGCCTTAGGTGCCTCAGGTGGCTTTCCCGTTTTA -GTGCTTGCCTCTTGCTGCTTTTTTGATCCATAAAACTCAACACGGTCAGCCCCGGCGACC -TCCAGGGCTACAACTACCCGTGGACCTGACCTCGGGATCACTCACAGTATATAAGCCCAC -ATGCTCACCTGGGCAGGACTGCGAGCTGCATTGCAGGCCTCCAAGTGGCTGCTTACTGCG -CATGTATCTTTCACATCATTCGTGACTGTCATAGATACGGTGTGATTTTGATCCTTTCAT -TGCCTTATATTCTATTCATGGCCGCATCTAGCTGTTTTCCTATTACAATCTCATAGCTTC -CATCTTGCTCTCCCTCAACGAATCAAGGGCGACGTATCCCGATATGTCGACAGCAGGAAC -GATATATATCAATGTTATGTATGCGAAACGTCCAAGGTCATTTGGGTCCCGCTCCATTAT -TCTAGCATTTGGGTTATTTTGATCGGCGTGTTCTTGCTCACGTAAGTAAATCACAATGTC -ACATCCTACCTGCAGCAGACGCCGCATGCTCATAGATTTTCATAAATATGGCACCATTCA -TGGATAGAATAAGTTTCACCCCGGTGAGCCCTTCGAGCTGACGTGATATAAGCATATAAA -GAAATCTTATCTTTCGAAACATACATCGAAGTACATTGACACGAGAGTGAGTCGGACCCA -TTGGCAGCTTCAGATGGTATATCACGGCCTGGATTTCTGGGTTATGGCAAAAGTAACCTT -ACTACAACATGAACTCCGTAGAGAATAGCCGGGCTGGAGTGAGTTGAGACAATGGGATTA -GGTACCTATGTCGCTCAACTACGGTCAGCCATGAGATACAAACCTTGCAGGAGATGGAAG -TGACAAGACCTTTCAGAATGGCTCAAATGAGTGCTGGTATCTCTGAAAATGATATTGCTC -ATGGCACCAAAGGCGAGACGCCATGGTACCATCTGATTGAAGGGCCACGATCAATATGGA -GCGAGATATTCTGATCGCAGATTATTAGACTGCATCGTCGCCTCGAGTAGACTGAAAGTA -GACCGATATGCTGAATGACTAAGATTGATGCATTATACAGTTGAATATGTCACCATTTCT -ACTGCAGGGCAAGCAATGAAGAGTAAAATCGGGTGCTTCTGCCTGAATAGGTGCATTAGG -GTTGCGAATGCAACCACGTTCAGTAAGGTACGTACTCGATCCAAACCAGAAAGCATTTTT -CCGTCTGGCTTTTCAGGCGTAGGCCTCTGGGCGGGATCACAAAAGTGTAGTTTGGTATCA -GATGACCCCGAGAATCAGCATCTTTCAGATGTCGATCTAGATCATCAACATTGAGGTAAT -GCTATATCCCATATCCTCTATCGGGAGCAGAGCTGTTCAAATGAACCATTGATGTTTCAA -AAATTATTCCCAAATGACCTCCTCGCTTTGAAAACAGGCTCTCAGTTCATCACTTCAAAA -AATTGATCCACACACCTCATCGAGGCAGATTTGCGAGCCAAGCAGGTCACTCAGCTGTGT -TTCAGAGATTTCTCGGATAGGATGTGTAATACTGGCGGCGCTAAAGTCTAGGCCACTGCA -ATATGAAGCTCATCGAGTATGTACATGAACAAATACCGCCGCTTCAGTGTCTACATAGGT -TAGTCGTACCTATTGCGCATTCTAGCTCCCTTAGACTGATTCCATGCCTGGTGAACTCCT -CAAAATCAAATTTCTGCCATTCGTGGGACTCACACGTGCCTCAGGCAGCTCTTCTCCAAA -ATCATGCATCTTAAAATCAGATGGCTCGATTTCCTACAGACAGGCCAATAAGCCTTCGGT -GTTCGGTTCGTAGCAGCCACCAATCTCCCAAGCTGTCCTATTAAGAAGTCTCGCCAATAT -AGTGTTAGCTTTACGCACATCGGAATCTGAGGGCTGGTAATCATGACTGTCCTATCAATT -TAATGGTACTTCGTACCCAATACCGAATATGTGCATTGGCAACAAGGTCTTTATTCTACA -ACAAATTTGTCAGAAGCCTTAGGCAGCGACTGTGACGATAAAACACCAGTATCTCAAGAA -ATGGAGAATCTGGGCTGGGTTTTAGGCATATTTTCTTGTGATGGATCTCTAAGTAGTTCA -AAATCAGAGGTGTCATTTATGTATTCTGTCCAATAAATAGGCTAATAGGGGTATTTACAG -TGGGTGATCAGGTGAAGTGTAAAAATGCAAACAGAGCAATCTTCAGGAAATTAAAGCGCT -TAAGAGGTCTTCTTTGTTTTACTTTCTCCTTCAGAGATTCAAGGGGGTTTCTTTCGTTGA -TTTCGACTGCGACGGCGAAGCGCCCAGGGGCCCCCAAATCAGCCAATAAGCGTGCTCACT -GAGCCAATGGACCTGCAGAGATTCACAACATACTTGCAAGCTTAGAAAATCCAACACCTA -GAATCATGCAAATGTCACAGCTACGCGCCAGGAGCGGAACCAAGCATGTCTCAAAATACT -CCGTGGCTCTAGATATCTCCCGAGAACCGAGCCCTCTTATCCAATCATTCCCTGCTTTGT -CCAGAAGCCGGCAAGGGCATCGGGCTTGTATCTCGGTCTATTAACAGTTCTGATTGTAAG -ACGGTTTGGTATCAGGTTTGGAGCCTCTCCAATTAGACGCGAAGCCTCGAAGTCAGACAA -TGTTTCCTTTATGTGTGACAGATACCCTTGAAACTTTTTCATACCATTCCGATCCTTCTT -TTCGTTGTGGCCTGGGGGTGTTTGGACTAACAACCCATTGTCTATTCACACCTATATTGT -CACAGGTCGATCATTTCAATGATTCTATTCATCTCGACAAAGCCCATTGTTGCAGCCTTC -GACCTGGCCGCTCCATCGATCTTCTAGACTCAATAAATACGGCACTACCCGCTTGAGAAC -CCTCACCTCTGTTCCATATCTTTATCCTCAAAGAATATCAATATTGCAATACCTACTCGA -GTTGACGCCGCCGGTCGCCGTCCTCTTCAAGTCCACAAAATCCCATTTTTACAGCCTTTG -GCCTCGAATCCTACATATCTAAACCCACTATCACGATGTTGATCATCGTATTGATAATGC -TCTTTGCTCTGTACAGCCCAGCACACGCTCATTCTTGGGTGGAAGAGCTCACACTCATCG -CCCCAAATGGAACATTCGTCGGCACTCCGGGATATGCGCGAGGAAACTACCTGCGTACGG -AATCTGGCTTTAGTGACACTGCCATGACTTACTTGATTCCGCCCAGCACAAGGGCAAATG -TGACTCAGATCCTCCCTACCGACAAAATGTGCAAGGACACCCAGCAGGATCAAACTCAGT -CAGAGGGAAGCCCTCGGCTTCAAGCCAGCGCCGGGGCGGCGATCGCCCTCCGTTTCCAGG -AGAACGGCCATGTGACGCTTCCTCATACTCAGCCCGGAAAGCCCGCGAACCGGGGAACCG -TTTATGTCTATGGCACCACACAACCCAAGACAGGCGAAAAATTTCTTGATGTCCATGGAG -CCTGGACCCAAGATGGCACGGGAGGTGATGGGCGCGGGGTGCTCCTGTCAGTCCAAAATT -ACGACGACGGGCGTTGCTACCAGGTCAATTCGGGGGAAATCTCAGGGACTCGGCAAGCGA -AGTACACTCACACAACAGACCCGAGGATGGGAGCAGACCTCTGGTGCCAGCAGGATATTC -AACTACCTTCCGATGCCCCCAGTGGTAAGCCCTACACACTCTACTGGGTGTGGGACTGGC -CTACTGCCGCGGATGTTGATTCCACATATCCCAATGGAAAGGCCGAGGTCTATACAACTT -GTATGGATGTGGATTTGGTGAACGTGGTCAGCAAGCAGGCAATCAAGAGCGATTATGAGG -TTGACCAAGATATTAACAATGCTGCAATCCCTTCGCAGTTCGACGCACTCGGCCGTCCGA -TATCCTCTCAGTCAACCACCTTTGCCACATTGTTAGCTGCGACGGGGGTTCCGATGTCCT -CTCAGCCAGCCACATTTGTCACATCATTAGCTGCGACAGGAGTTGAGCCCAAAACAGTCA -CAGTGACAGACTGGGTGATCAGCACAAGCACTGTATTTATGGCGGGGAGCCAACCCACTT -AAAATGGGGTTGACATACCAAAACACCCGAGATAGgacaatgcgaatgaaaatctgaatg -aaatgacaatgacTTTGTTTTTATGCATACTGAGTAGTTCGTTGTAGCCAAAATCGATGT -ATCAGATGCACCAATACTCAGGTATACCAGATGCCAGATATAAGCCCAATATAAAGGCAT -ACAACATACAATCCCATATAGAACCCATGAACCTTTAGTCCCAAACAGGAATAGACGTAA -CCAGACTTTTTTGAAACACGCTAAGGCCGGGTGCCACGTGACCATCGATGAGTTCAAGCT -TCACCCATTCACACACGGCCAACTCTGTGACTCTTGAGCGGCTGCCTTGCATATCAGCTG -ATTTCTGCTGCTTCCTTTACTGGAGTTCATCTCGATAAATCACATTCTTCAAGAAACAAT -AGCACCCACGATCCAAAAACCACCTACGCGCTGATCATTACTCCAGTTCGACCTTCAACT -TAAGACAGGTATGGCAATCTCCGTTCACTCACAAGTCTATTTCTCCTCAATTATCACATT -TCCCATGTGGTCTTGACCTAATCCTTGTCTTCGTGCGATCTGCACATTCTTTCCCGCATA -AGCGCTGTGCCTCTGTTACACTTATATCTGACATTAGCCCTTTCTGCAGTTTTCAGCTAT -ATTTAGCAATTCCCTGCCCTCGAGATCGCGGACCAAACGGCTAGAATAACGGGCTTGACT -TTGCCAATCTCGCTTTGAACGAATATCCTACCACGCAAACCTTGCAAACTTGCAGGCTTT -TTCTGGATTGTTGATATTGTCTTGTCTCGTCGTGAGATCTTCCCCGTCGCGGTTCGGCGT -TCCACTCGGGCCACATTTGCCCCCCCTCCCTCGTGTTGTTTGCCGATTACCCACCCATTC -ACCGTCCACCAAACCGGAGCCCCTTGGGATAAACATCCTTTTTTTCTTTTCACCAAAATC -TTGACATCCAAGAGAGGTTCTTAGTTTGAAACACACGAACCTGAAATTGTTCTTGGCCCC -CTTACAACTTAGCGCTTGCTGCATCGTACCTGAACTCGGTATCTCTTCGACCTCCGAGCA -CAAGCTATACCTCCCTTGTTCGAGCCGAAGGAACGTCCGCTGTCTGCGATTGCAAATATC -AATAACTTGGACTACGTCAATCGCAATGCGAAACTAGAGCGCCAACCACGCGACCGATGA -ACAACCCCACATTTTCCTTGCTGAGATCCGCCCCATTATTCCTCATTCCGATCCCAATTC -TGGGTCTCTCTCCATGTCAACCATGATGTCCTCTCCCACATCATCGCAGAACATTCTGTC -CACAAACAATCTTTCGGTGCTACCACCCATCCTCACTGATGGGACGGTGAAACGGTCATC -GACGCTTCCAAAAGATCGGCTCTCCACGTATTCCAATGTCTCGCTTGCATCCCAAAATCG -TTCGCGCCCAGGATCCCATGTCTTCCCCATCTTCCATACCAGCCTTCCATATGCTCTGGT -GCGAGACTTCGCCTATCCCTCCACCCATCCTCTCCATTACGGGCCTCCCCCTCCCCGCGC -CTCCGGGGTCTCTACGCCGGCAAGTGAACATCGGCGCCTGTCCGATCCACCACCTTCGTG -GGATATATCGCGTGGATGGTCAACAGGGCAACCGAGCACAGAACCGCAGATTAGCCATGT -CCACCAACAGTTGCCAGCCATGTCCTTTGGAGATGGACCACCATACAGCGAGGATGAAGA -CCTGCATAGCCCAGTTGTGACCTCGCGTCATCGCAAGAAATCTTCTGACATGAACGGTCT -TGTAGATGAGAGAGTCATGTCTGGAACTGGCCATGCAGGGAACAACGATCGCGGAATGTT -TGTTGGCGTTAATGCCGATGGCAGTGAGACTTATTATGTAAATGATGGAGACACGTCAGA -TGATGGACCAGGCGGTGAGTACGTGACATATCCGGCAAGTGAAGGGCGGTATTCAATCAT -GGGATACAACGCACCGGGCGGCCAGCAGGGCCACGAACACGATCTTGGATTTGAGTCCGA -AGATGAAACCCCTGGTGGAAACCGATACTCTAGAGACTATCAGTTTGCCATTGGCTGCCC -TGACGAAGAGATGCATGGAAAAGCTGTTGCACTCTTTGATTTCACACGTGAGCATGAGAA -CGAACTACCTCTTACAGAGGGCCAAGTGATCTACGTTTCATACCGGCATGGGCAGGGCTG -GCTAGTTGCAGAAGATCCCAAGACAGGAGAGAATGGTCTTGTCCCGGAGGAGTTTGTGCG -TCTTCTGCGAGATATCGAAGGGGGCTTGACGTCGTTGAATGGGGAACCAAATCCTGAGGT -CACCGGGGTCGCAGACGTCAGTCTAGACTCCACCGGGTCAGATCAAATATCTACACCTAC -CCAGGTAGAGCAGCCTTCATTTGACGAAAACGCGCAGTCTGAAAACAAAAGTACCAATGG -CAATACCTTAACTGCGGTCTCAACGGAAGGCGCGGCTGCATCTCAACCTGTGTTGGATAC -CGAAACCTCGACCGGTAAAGCTGCCGAGAGCTTCGACAAACGCTCTAGCACGCTGACCAA -GACCTGATTCAATCAAACATTGGCCCTAAATGGCTCTGTTCAATCTACGCAAGATCCTGG -TGCTCTGTGTCTGAATCTGGAGAGATGGCCTTTTCTGTTCCAAGTTGCGCCATGAAATCC -CGACAGCCGATCATCAAAGCTTGGCGCTCTCCACTTTCCTGTGACTGGGGAGGTCTCAGG -CATGAAACACGCAGTTGGCTTGTTGTGTGAACTGTTTCATGAAAACACATTTAGCACCCC -TTGGTGGATCAACCCGGACCACCTAATCTCCACCTTTTAGAGCTCCGTCTCAAAGGTCTT -TTTTCTTCCCTTCGAACCTTCCCTTCAGACGGTTTCGTCAGATATAAACCCACCTCTTTC -TTTCCAGCCTTATGTTCAACCCATCTCCCCGCGATGAATTATTACCCACAAGTTCCTCTT -TATTAACGTATATCACCCCTCCTTCACGATTCCAATTCATGTCTTCTAATCATGCACGGG -GAATCATAACCTGGGACTGGAGAGGATTGAGAACCCCTCTTTTACTACATACCCCAAGTC -TTTCGCCATATTGTCGCCTTATCTGTTTTGTCTTCCTCTCTCACAATGCCTCTTATCAAA -CTCCCCCATCGTCTTCCTTGTATTTAGTTCCCTTCAGCGCAGGCGTTATACCAGCATCAA -TGAGCATCAGCATCATGCCGATTTGAATCTATTCAATTACTTATAAGATACATTCTATTC -AGCCCAGGTCAATATTTCTACACGATCTCATGAAGTGCCAATGTAGGACCGGCTTACTAT -GGGATAAAACTACAGCACTCTCATATCGTCAGTGCGTCAGTTTGGATTTTGCCTGTGATA -TACATCATTTATTCAAATGCAATCATTCTATAAATTATAGTTAAGAGTTCAAGAGGAATT -CTCAATACTTAACCATATGAATTCTGATACCTAGATCCTCTACAGTCTACCTTTCTGTAC -TTATACTATGTCACACATTATATCCCGAGTGGTCTGAAGGGATGCACTGGTGGTATTGGA -CTGCCGTAGACAAAACCTGCAACAAGGCTCAGAAGGATATAGAATGACATTGACAGATTA -GGCCTAGCATCTCGAGGCCGTTCGAATAATAAGGCCCAGAGAGCAAAAGGTCGACATCAT -GGAGGCATGGGCAACGAAGAAGCCTTAATATATATACCTGGACATCGCCGGACTGCCGAT -AGATAAAGCATCTCTCGATACATCAATTCTCGGTGAGATATGGACAGTTTATTTTCAGTG -CCACTAGCCCACTAGCAAAACACCAAATATGCGAGCACAGCCGAATTCTGGCTGTTGGGG -CGTGTATAGAAGAGAACGCGAGGGACGACAGAATTTTACGTGAGGGAGAGGAAAGCAAAA -AATTTCCCAACTTGCTCTGCTGTAGTGCAGATATACTTAAGTAGGCTTGCAACTGGAGAA -GGAAGGGAGGAAATGGCAATAAAATGGAGTATACACGGACACCGGAAGAAAGAAGAAGAA -GGGGGAAAAGACCAAGTCAAATCTGGAATTCGAAATGCTAAAATCATAGCCACATATCGC -AAATGTAAGGAGCGAGCATGGACTCAGTGGGAAAAGAAGGGTAGTGGTGGTAAATAGCTT -TACTAGCTGATCAACCAAGCAGATCATCAAAAGCCGAGCTAGACTGTTGCTTGTGCTGGG -GCTGGCTAGGCGTGGAAGCGGTGCTGGAGGGAGCTGGGGCGCCCCAGATTCCGGCGCTGG -CTTTCTCCTTGGCCATGCTGGCGAGGTTAGGGCCCTTGTTTGTGCCGGTGTTAGACTTCT -GAATACCGGCGCTGGCGCTAGCGGTAGACCACAGCGAGCCGAAGGCGTCGCCAGAGGACT -TCTTGGCTGTTGCTGAGGCTGAGGCTGGCTTGCCTGTTGTGCTTGAGGGGGAAGCTACGG -AGGGACTAGCAGCGTGAGAGGACCCAGAATTTTGGGCTACATGAACGGAGGTAAAGTAGT -TGGGTGTGGCAGCTTGGTACCCGGCGGGCTTCTGCTGTTGGATCGGGACCATCTGAGAAA -GGGGAGTGGCCATCCCGCTGGAAGAAGGAGTCGGGGTAGCCGATTTGAATCCAACTAAGC -CGTTGATATTGGCACCTTGCGAGGCCGAGATTGGCTGAGGTGCAACGAATTGGGTGTTCG -AACTTGTGCTGGCGGTGGAAGCTGGAGGGGGAATAACGAACTGGTTTGGCGACTGAGTAG -ACTGGGTAGCTGGGGTTGCGGACTGGAAGTCGTCGAACTCGTCGTCGTCAACTGGGGATG -GGTCTAGCATGTTGAGAGCACCGCTGGCGGGTTTTTTGCCGGCCGAAGTTGAAGCTGTGG -TAGTGACAGGCTCGTCGTCACCAAAGTCGAAGAGGTCTGCTACAGGTGCAGGAGCTGGCT -CGGGCTTCTTTGCTTGGGGTTTCGCTCGGGCGGAGGGCTCACGAGGGCGCGCGGGGGCAT -CTGCCTCGTCATACTCGTCATATTCTTCGAAGCGGTTGGACCGACGCCCAGTGTCCTGGA -AGTCGACGCCAGCTTGCCCGCCAGATTCCCCGCCAAAACCGCCACCGTCACCATAGACAC -CTCCGCTGTATCCACCGTACCCCATACTCTCGCTTCCAAAGCCTCCATAGCGGCCCGAGC -CCGAGCTGGACATGCCTCCTCCGACACCCATGCCACCTTCAAAGCCACTGAACTTGTTGC -GGTTGTTCTTGGCCTTCTTTCTTTCAGTCCGAATTTGATCCACATCACCCAGCAACTTCA -CCAACTCGGACGACCGGTTGCGCACGTTGATACCCTGGTCCTTGCCGTTCATGTCGATGT -AGTGGAATTGCCGGAGCATCCGGATCAAGGACATGTGGGATCGTGCATCATCGACGACTC -GTTCGGAACCGTTCTTGACAAGGAATTCGAGAAGCTGGAGGCTCTATGATCCAAGAAGAA -GATAGTGTTAATCATGATAAACCTGTATAGATAACCACCCTGCTCGTACCTTATAGATCT -GTCGCCATTCCTCTGCAGCCTTGTCGGTGAATCGCTTGTAGATAATGGGCATGATCTCAT -TGAGTAATTGGCTGTAGCAGCATAAAGTTAGCGTGCGCGCATTAGTGGGAATACCATTAG -GCATTATGGACTGACTAGTTATGCGTTCCATTAGCGATCTCCTGCATCAACGTGGTCGAG -GCACCCCAGGGTTCATTGTTTGTAGCTTCCCGGACCTGACTTCTGTAAGTATTTTATCGC -ACAATCTTCTCATATTCACCTGCGAAAAGCCATGTTGGTTCGCGCAGAGGTGAGCGATTG -CATACGAACCTTGGATTCCATCTCGGTATAGTTCATGACAGCTATGACGGAACGGCCAAA -TCAGTTTCCGCAAATTGTAAGACACATGCAACTTGATTTGGCTTTGTTCATAACACAATC -GGACCAAGCGGAATTCGATCTTACCATTTTGAACCTTGCGAACGCCGGCCTTGAGATCAT -ACAAGGTCATGTTGCTGACCTGATCTTTGAGGCTGGAGAAATCCATGGCGAGCGGTGACT -ACTAGTCAGGACAAGTATGGGAAAAGGAGAATCCGAGAGAAGAGAAAATAGCAGCGAAGG -CAATTGGAAGCCCAGCACAGGAAGAAGCAGAAATCCCTCAGGACTAACGTGAACAGTTGG -AGGGGGGAAGATGCCCGGTTTCCAACTGGCAACCTGATTGAATGCCCACGTCACGTGGCA -ACCCCTGACCAAGCCTTGTATACTTCAGGCTGCAATAAGCAAATATTCTACTTAAATTCT -ACTTTCTGTCTGATGAAATTTCTGGAAAACCGAATTGTTTTACAATTGATACTGAATGAT -ATCATACTGTGTACATGTTCGCCCGTATAAACCGAACGCTATCGCTATCGAAATCAACAA -AATACCCTTGGTATCTTAAACAAAGACAACAAGTAAAAGGAAAANNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNAAAAAAAAAAAAGACAGAAAATAATAATATCAATAAGAATGTGC -ACCCAAATCATCCCTCATCAAGTCCATAAGCCATGACTCTCACTCGTTATAGAACTTCTT -CATGATCAAATGCAAGAAAAGGTTGACATTGACCCGCATCTGTCCATCAAAGAAGTCCAT -GAATTGCGTGTTGTTGAATTGAGCCTGCAGCAAGTCGTCCAGCGGTACCTGTGCATTGGC -GCCAGGAATCATCATGACACCGCTGCTCCCGTCGATGGTGAATACACCAACTTCGTTACT -TGAGATGGTCAAGTCGACCCGATCCCACTGCCGCTCGGCGTAGCCCTTCCAGGCAACCAG -AACGCCTCGGTCCGCCAGAGTGCGCGCAGAGTATTTGTAAGATCCAAATTTGAATACTTT -GCCAGATTTTTGCAGCTCTCGTTGATGATCCCATTGTTTGGTGAACGGCATCAGGAATCG -TTTCTTTCCCTTCTTGTTCTGCAAAGTGATCATGGCCTGTTCAATGTAGTCGTTGTATGT -CTTGAGCTGCCCCTCTAGGTAGACAGCTTGGCCACTGAGCCTCTGAGAGGTCAGCCGAGC -GCCCTCCAGTTCACGCTCTCGCTGGATTCTGCGACGATGCTTGGTGCGGATATCGATGGC -GATGGCGTTGAGCAAATCCTGATAATGGTTATGACGATTGATCTTGCCGCTCTGTTCAAG -TCGAAGGATATTTTCGAGTGCAATGCCCTTAAGCCCAGAATAGCTCATGGCACTAATGTC -AATTAAGCTATTGGCTTCAGAGTAGGCGCCTCGGCGTGGATTGCTGGTGTTTAGTTCTTC -GCGAACCAAAGTGTGCCATCGCTCCTCATCTTCGGCTGTCGGTGACTTGAGCATGACGTC -CATTAGGTTGGCACCACTTTGAACACGAATGATGTAGAGGATGCAGCGCTTAGTCTCCAT -AAAGAGCGTCTTTATATCTGCTTCGGGGTCTGGGAGGTAGAGTTAGAAAAAAGATCAGGT -GGGAGCTTGCTTGCATTTCTTACCTTCAACCTGGGCCAGCTTGGGATTCAATGTGAGATT -GATCTCAGTAGAGCTGACACCCATCAATTCGTTCTCGTTACTCTTGACACTGCCAAGTTC -TCGCACAAGTTCCTTGAGGAAGTCATCAGGGTGTGGGCACATATAGTTAATCTCAGAGGC -CACAAGCTGGTGGATCGAGAAGATATCAGACATCTTGATGTACAGAGTAGGTTTGGCCTT -AGCATAGAGATCGTTGAATTCATCGATGTCGAAATAAGCCTCTGCATCCTGGACTGAGAT -CATTTGGCCCCAAATACGCCCTAGGCGCTGAATTGATACTCCGATGTAGTTGTTGAGCGG -CTGAAGATAGACATTATCCGCCCCAAAGAGTCTGCCAGATGCAATCTGAGCAACAACCTT -AGATATTTCTCCAAGGTTGCGCTTTTGTTCTTGAGTGAGACCTCGATCGATTACACCATA -CTTCTCTGGCTCTAGCAGAGCAGGCTGGAAGTAATTTTTCCATACCCAGTGGCCGACCGT -CTGAAGAATGAATCCTGGATCCTCACCTGAGAATCTCTCGAGAAGGTTTTGATACATTTG -CTGTGCGAGATATCTGATACCGAATGGCATACGATACAGGAAGTCCTCGAAGCTTGAGAA -TAATTGGTCAACAATGTCACGAAGATCTTGCAAATGCTGAATGAATGTCTCTCTGGTTTC -AGGGTCTCTGATCGCTTCTTCTCTGGGAAGATCCGGTTGTCGGCGGCTTCGCTGGCCCGT -TTGAAGCTCCTCGTTGTTGATTGCAGAACGGTAGATTTGAATCGGATCACTCTCTAAATC -AAGGTCCGGGTTGTCGATCACAGACTCCTTGACGACACCCCCTAAGACGTCGCGCATGAA -CTTCCGGTCGCGTGATGACTTGGCATATGAGGCGAAAAGCTTGTTCCAGAACGAGGTGCA -GCGCAAATATTCAGGAAGAGACGTGAAGCTCTCAATATCCTCTTTGACGGATCGTGCCAG -GAGCTTAATAAGATAATACTCCTCGCGTCGTTTCTGAGAATATCCAAACAGACCCATCAT -CAAGTGCCGGATCCGCTCATATTCCTTGTCCGAGGTGTTGATCTCGCGCAATCGCCGGAA -CAAGCGAGCCAGATACTGTGATTGGGTTTGGAGAAGGAAGAAAAACACCTGGTACTGATC -CAATTTCCTCCTTGAGGTCTTATTAAGAGCCTTGAGGTCGTACGGATCCTTAGAAGAAAT -CTCGGTGTTGGATAATAAGTTGCCAATGTGGCCACCGAAGTGCTTTTGATGCTTGACGAC -TTCATCCAGAGTAATTTTGTTCTTGACAAGGAGAGCAATCTTGATATCAAGTTGACTGAT -GTATTGTTCAGCAAGCTCGTTCTGTCGCACTTGTTGAACCACCACCTTTCGCAGGCGCTC -GAACTCGATCTCTTCGTCAAAGTCGAAGTCACTGTCATTGAGAAGATGCACGAAGCCCTT -GACTGTTCCAACAGGAGGGTTCTTTCCACTTGTCAGGCTCTTATAAGCCTGGCCTTGAAT -ACGGGCTCGTATAACACTCTGGGCCTTGATGACTTTCTCCATGTTTTCATTATAGTGTTG -ACGTCTTTCCTCAAAGCGATTGCAGACAATGAGTCCTCTAATTCGTGCTTGAAGGTCCAT -GATAAAGTCCTCCTCTGCTTCAAGATCAGTCAGGATATCACCAACTTCAACTCGCGCAAG -CATTGCCCGAGTCAAAGATTGGAGTTCAACAACTTCTTCCTCATTGTCCTCAAGGCGAGT -CAATATACCGCCTACTTCGATCCGTGAGACAGCAGCACGGGCAAGGGCCTGCAAATCAAC -AATGGCATGTTCGTGCTGCCCTAGTTCATGTCGTTGTGCAAGTAGATACTTCCTGTGAAT -GACGCCACGGGATCGGGACTGGATTTCCAAGATAGCTGATTCTTCCTCAACCAATGCGGT -CCTCTGGGCATCTACTGATTTTCTCAAAGATTGTCCCCTGACAATAGACTGCAGGTCGAT -CACTGGCAATAGATGCTCCGCAAGCTCGGTGCGAATGGCTGTAGTACTTTTTCTTACAGC -AGCAGCCCGCGCGATAGCTTGCAGTGCAGCACACTCATTTTCGGTTTCTTCTAGTGCGTC -CAGCAGCAGAGTTTGGTGTTCCCTGGCAATCAGAGCCCTGATGGTAGACTGAATACAGAC -AATGTGGGACGTTTCGTTGTTGAGCAGAACCTTCATCTGAGACAAGTTTTGACGAGCCAT -CGAACCTCGAATAGCCGCCTGCAGCGACTTGACGGGTGTTTCGACTAGCTTCGTCTCGTG -AGAATGAGCATTCATCTGTCTTCGTTGCAATATACCACGGATGGCAGCTTGGAGCGACTT -GACGGAACTGCCGGCCTGTTTTACCTCTTGGGATTGAGCGTTCATCTCTCTTCGTTGCAA -GGTACCACGGATAGCAGCTTGGAGCGACTTGACGGAACTGCCAACCTGGCTCACCTCTTG -AGATTGAGCATTCATCTCTCTTCGTTGCAAGGTACCGCGGATGGCAGCCTGGAGCGACTT -GACCGAAGTGCCCACTTGTTTCAGCTCGTGGGATTGAGCATCCAACTGTTTGCGTTGTAA -AGCGCCGCGGATGAGAGCTTGAAGTGATTTTACATCATTTTCAGTGTGTCTAGACTCTTC -GTATTGGTCACCAACATTCATGCGTTGCAATGCACCTCTGATCGCGGCCTGGATCTGTTT -AATGCCGGACACTTCTCGGCGTGCGCGAGCTTGTAGGTGACTAGCCTGAGCACGAGCCCT -AGACCCCCGAATCAGGCTTTGTAAGTCCAGAACATCCATCTCCTGAGACTGTCGCCATTG -CTTGTCGCCTTCTTGACCACGGCGTACAATGAAGCCTCGGCTAATGGCCTGCAGCCCAAC -TGCAAAGTGGAGCATACTTAGCCTGTATTCACTAATTTGTCGGGCCCAATCTCCACGCAG -ACGGGACTGTAGCTCAATGAGGAGGGGTTCGAAATCCCACAGATCGTTCATCGTGTTGCC -CAACTGTAAACGCAACATAGCGCCTCTGATCTGAGCCTGGAATTCGCAGATCGAGGCTTC -ATTCTCGTGCATTTCTCGATCAATGCGATCTTCCTCTGATTCAAcaggctcaggctcagg -ctcaggttcaggttcaggctcTGCGCCAAAGTTCGCAGCCATACCCGCAAAGCTAGGCAT -GCTCACACCAGCCTTGTCCAGTCCCTTTTGGGTTTTCTCAAGTTCGTGGTGTTCAAACTC -AAGCTGACCAACCAAATTACCCATGCGGAAGTCCAGCATGCCATTTTTGAAGAGCAACCA -TGACAACGCATGAATACAATGTATGACCTTGGGAAGGTTCTTCTTTTCATACAAGTCAAT -GAGTTCAAACCGGAACAACTCTGGCAACTCGATCTCATCGAGGAATCGGAAAAACAAGGC -GATGTTATCTGAATGGCGATATTGTAACTTGGGGTGTCGGAAAATACGAAGGGTTTTGCC -CGCATACATTGCTTGCACAACCTCCGCCAAAGTGACGCCGTCTCGGAGTCCTTCCTCCAG -CTGAACGATTGGCGGGATTTGCTTTTGAATGACATCCTCAATCCACTGCTTGGCTTCTCC -GATATGGCACAAATATTCGTATGCTTGGATGAATTGTCGCTGCTGATCCATCCAATTCCC -GGCTGCCCATGAGGACGCCTTCGGCTTTTCGGGAGATACTGCCCCATTCCTTCCGGCTAC -TGATCCCGCTCTTTTAAGACGTCGTCGGCCTTGCAAACCCACCACGGAGGCTGCGGCCGC -GCCAAATTCATTTTCGCCAGACTGTGCAAACTTCGAGAGAGTTCTAAGATGACCCGTAGA -TGATTTTTGCAGCCCTTCAAGCTCGGGGGCGGAAAGTGCAGCAGTCAGCTGTGGTGGCTG -GGAACTGTAGGAGGCTGCCTTCTGGGGAGACGAAGGACGCGATGTTCCGGGCACAGTTGA -CCAGTGGTGAGGAGAGGTTGTGGGTGGGGGGCTCTGGGATTCGTAGGCCAGGGTTTGATG -ACGGGGAGATTGGTTCTCTTGGAGATTGAGCTGACGTGATTCGGGGGTGGCGGGTCGACA -GTCCTTCCACGGGCGTGATTCATCAATTGTTTGGCTTCGTGAATGTCGATATCCGCCAGT -ATTAGAGGCTGAGGATGGTGTATTAGGGACTTGCTGTAGTGGTCGTAGAGATCGGCGAAT -ATTATCGTATGCTGTGCCAGCATTTGTCCAATTTCCTCGTTCCATTGCGTCTGTAACAGT -CGTTGCTGAGCTCATGCTGCTTGCCTCACTCTTCCCTCGAGAGTGACCCAATTTGTTCGA -TATCGACGCATTAGAAGAGACAGTGCTAGTCCGACTAGGAGCAAATGTACTTGACGCGAC -GGAATACGCAGATGAGTTTGTGGATGCTGTTGACCCGTGTCGTAATGGGCTGGTGGACTG -GGAGGGAAGGTCTGTCTGGGCCCGCTTGAGGGATGGAAATTGGGTCGACATCATTTAATC -TCAGGGCGGTATGTAGAAATGGATTGAAAAATGTCGCGCTCCTTTGGCCGCATCCCTGCT -CTGGTGGTCGAAGCGGAATGCGCAGGTTAAACGAGGTCTTGAAGACCTTTTCCGGAGACA -CCTGTGTTGGATAGCAGTCAAAATCAACTCTCACGATTTAGCGAAGAAACCAATCGACAG -GTGCGCGAATTTGTGTCGATAAGCTCTGGGCTCGTGAATGCCCGATGTGGAATTGTTGAC -GGTGGGGTGCAAGTGCGACCGAGATTCCCCAAAGTGGGCGCAGGGGGCACAACAAATCTC -CAGATCGGGTATGGCGCAATCAGGTGATTGTGGCTTGTCGATCTCGAATCACGTGGGACA -GAGAACTAAGAGATTGTGTTACTCGACAAGTCACACCTTGGGCATGGTTCTGTGACAGTA -CTTTTCATTTGGAATTAGTCGAAATCCCAAAGGTCGCCCTCTGTATTCCGTAAATTGCCC -GATATACTCCGTACTGGGCATTCCTTTCAATCCCAATGCCTGACCTACCAGGATACCTGC -AGTCTTGGCAAAGGTGGCTTGTGCATTTAGGCAGGTCTAAGTGGCCCGGGGCGCTCGTAG -TCCCGTACGACCAACACATCTTTAAGTCACCCAGTGAGTTGTAGACCGCCAATTGATCTG -GAGAAGTATGTACTCCATGCAAGCTGGTAACCTTGGTCTTTGGTTTTCTCTGACAGGGAC -TTCCCTCTATCTTACATTTTTGGGATTAATATCGCGCTCGATATGACATGAACAGTCCTC -GTTATCTTGTGTTAAAATATGTTGCAAGACATTCATAGATTTACACCCATGAAATATTCA -GCACAATAACTTGTGAATTGAAAGATTCTACTGAAAGCTGATATAAGATATTTCAGATTG -TCAACCAATTTTGGTGTCTTGTATATATACTCCAGGGCTGGAAGTACAAAAAATGGCGCA -ATAATCCGCCTTCAAAAGGCAGGTTGCCGGTCAAACCGACCCCGCCAATAAAACCACCTG -ACAACCTTAACTTTGTTTCTCTTTTAATCGCCAAGTGAGTCGGTGAATTAATTTGCTTCT -TTTCACCACTTTCATCATGGCATCGCTGCTTTCTCAACCTCGTTACATCTATAAGATTGT -GCCTTCCACCGCACCAGTCCGTGAGCCACTCCCGGAGCGGCTCGCAGTAAGCGATTTAGA -CAAGGACTCTGATTTCATCCACCTCTCGATGGCACATCAGGTGGGAGGCACACTAAAAGC -ATTTTTCACGGAGGAACCACTTGTCTATGTATTGCGGATCGAGTATCATCGCGTGATCCA -GGATATTCGTTGGGAGTCCCCAGACGGAGCAGTGTCTGGTCCTCGACCCGGTGATGGACT -TTTCCCAGTAAGTTGAACCTGCCACTTTAAAAATGACATTTGCTAAGTAAAGCCTAGCAT -CTCTACAATGGACTCAAACTGGGCAGAGAGGAGGTTGAGAGCGTCGCCATCTGGCAGAAT -GACGAGGGATGGGACAAAGCTCTCACTGCAGCGAAGCCGTGGCTCCTGTACTGATTCTAT -ATCCCGTTGAATCTCAGGTTGATACCAATGAAAAGCACATGCCTAGAAAAATGTTCTGGC -AACACCCCCAGCTAGCAGATACCACTCCCTGTAAAACCACCTAGTCACCACGCGGGTAGC -AGTGCATCACTTGCCCATCTCTACATGCAGTTACGATACGTTAGCCAAAATAGGAAAAAA -GTTGAGGTTTTGAAGGTGTGAGATATTGACCACTCACAGTATAGAGAAGCCTCCGCACTA -GCCAAGAAGACGAAGCTTGTCGCAACCTCACTAGGCTCACCTGGCCGTCCAAGACCAGCC -TTGTGACCCCATCCTTCCATCTGGTTGGCATCTCGGGTGTCGACTTGAATTGGGGTATAG -ATCGCACCTGGACTAAAATTGGGAGCATGTTAGTATGAATTTCCGGAGAGGAATGAATGC -AGGATACTCACGCGACTGCATTGACCCGAATGCCCTTGGGATCAAGGGTGTTGCTCAAGC -CCTAGTAAAACTAATGATAGCTCCCTTTGTTGCAGCATAATCAATCATAGTCCCAGAGCC -ACGAAAAGCCACAACAGAAGCATTATCCATGATCCTGCAGCAAGTGCGTGAGTGTATGAT -TATATAGATTAAAGAAAGCACCTATGATTTACCTTGTGTCATATGAAGAAGAGCAAACTT -CGCCATTGCAATCATCTGGATAATATTCTTTCTGAAGATGTTTTCAGCCTTATCCGGATC -GACATCCTCAAATTGGGTGGAAAGGTATTGTTTCGACGCATTATTGACAAGTATGTTGAC -CGCGCCGTATCTGTAGACTCTCGGGGCCAGTCATTTCCTAAGAATGAAAAGGGCACTCTA -GGCTGACTTCTTCACATCTTCATCCACTGCATGACGACATGCGTGGCGATCTCTCAAATC -ACATTCGATAAGAAGGCAAGTCCGCTTCTCTTGCTCGACAAACTTTTTGGTTGCCTCTGC -GTGCGGCTGTTTCGAAAGCAAATAAGCGATAGTGACATCCGCACCCTCTCTGGCCACTAG -AAGAGCGACTGATCGTCCAATTCCAGAACTGAGAAATTATTAGCCCGAGATTGTTCCTGT -GAACTCGCTTGGCATACTCTCCGCCAGTGACAAGAGCATTTTTGCCCTTCAGCTTTACGG -AACCCGAATATTCAGAAAAGCCGTGAGAGTCCTCTAGTTTGGTTGCTTCACTGGTCTCCG -CCATTATGCTCTCCAATCTAGCACATCATTTTAGCAGCAGTATTTCGCTCGGGTGAAACA -TTACTTACCCCGGTTTGTGAGGTGCCATGTTCACCCGTAGATGACTCCATATCGATAGTG -AACGTGAGTTCTGGAAGAATATGTTGAAGCTTAGCAATTTTTGAGAGGCTATATATACCA -TATTTGTTCCTCATTCTGTCGTCATGTTGCTATGGACGTACGGGACTTGAGGCCGCTATC -TGGCGAGATTGCTGTCTCGTTGGGCGGTTAACCATGCCAATCAACCATGAATTTTGGTTC -AAAGCATGGCTTTCTACATATATAGGAACAAAGAACTGCATCTTGACCATTATAACCAAC -GAGAATTGGATTTTCACTGCCATGTCGAAAGCTGTATTGTACGAGATTAGATATTTATAT -TGGCCTCATTGGTAGAATATATTTAAAAAAAAAAAGTAAATATCGGAATCAAACCTAAAA -AGGATGTTATATTCTACTGAAGATCTTAGCACCATCTGCATATGTAGTCTAGTAGTCTAG -TACTGATGTACATGAAGCTAAGCATAAACAAGCTCCCGAAGTCACCGCCTTGAGGAATCT -CGTTTACGAGCCAATCAGCAGCGACAGAGATGAAAAAACCAGTACGCTATACCTGAAAGA -GGCAAGCTCCATTCTGGCTTGGCCGTGCACGGAACAGAAAAGGAAACAATTTTTCGCCCG -ACCTTTTTCTTCTTCTTTCTCTTCCTTGGGAAAGCTCGACTTTCTCCATCAGCAGAACGT -TTCTTTCTGTTGATTAAGATTTTCCATCGGGTCGTGTCTCACTCGTTATTATAAACTCTC -CTTCTTTCAATCATGCCTATCACTGCTGGTGACGCAGCACCTGGTGCCGCCATGAAGGCT -GAGATCACCGACTACTCCAAGGCCATGGAGGTTTTGGACACCTATACCACCCTTGATGGT -CTTGATGCGGATACCCTGCTTGACTCTGATAAGCACGGTGCACTGACCTACAATGATTTC -CTGATCCTGCCTGGTTACATCGGTAAGCCCGAAGTCTGCAGTCCAAAGATTTTTCATTTT -TTTCTAACCCCGCCAGGTTTCCCCGCTTCCGATGTCTGTTTGGAAACCCCTGTTACCAAG -CGTATCTCCCTGAAGGCCCCCCTTCTGTCCTCCCCTATGGACACTGTCACCGAGCACAAC -ATGGCTATCCACATGGCCCTGCTCGGTGGTCTGGGTGTCATTCACCACAACTGCTCCCCC -GAGGATCAGGCAGAGATGGTCCGCAAGGTCAAGAGATATGAGAATGGATTCATTTTGGAC -CCCGTTGTCCTCTCTCCCAAGGCCACCGTTGGTGAAGCCAAGGAGCTGAAGGCCAAATGG -GGCTTCGGTGGCTTCCCTGTCACTGGTAAGATACCCACATCTACTCGTTTTCCAAGATTC -TACCCACCCACTTTGCATGATGATCATCCTCTTTTAATTATAACTTTCGTTCCACTGCAG -CGACGAAAGTCGCACTGTAGAGAACCAGATTTTATCTGACCAATCTGTCTATTTTGGATA -TTGTCCACACATCAGCTTTTCCCAGCAAATGCTAATGTTTGAACATAGAGAACGGAACGC -TCAAGTCCAAGCTCGTTGGAATGGTTACCTCGCGTGACATTCAATTCCACACCAATGACG -ACGAGCCCGTGACTGCCATCATGGCCACGGATCTTGTCACTGCCCCCGCCGGTACCACCC -TGGCCGAGGCTAACGAGGTTCTCCGTCAGTCCAAGAAGGGAAAGCTCCCTATTGTTGATG -CCAATGGCAACATCGTCTCGCTGCTTTCTCGATCTGATCTGATGAAGAACCTCCACTACC -CCCTTGCCTCTAAGCTCCCCGATTCCAAGCAGCTGATTTCTGCCGCTGCTATTGGTACTC -GTGAGGAGGACAAGACGCGCCTCAAGCTCCTTGTCGAGGCTGGGTTGGATATCGTTATTC -TGGACTCCAGCCAGGGTAACTCTATGTACCAGATCGAGATGATCAAGTACATCAAGAAGA -ACATGCCCGAGATTGACGTTATCGGTGGTAACGTTGTCACTCGTGAGCAGGCTGCTGCTT -TGATTGCCGCCGGTGTCGATGGCCTGAGAATTGGCATGGGCAGCGGTAGTGCCTGTATCA -CCCAGGAGGTTATGGCTGTCGGTCGTCCCCAGGCCGCCTCCGTGCGCAGCGTTGCATCCT -TTGCCGCTCGCTTCGGTGTCCCCTGTATCGCTGATGGTGGTGTCCAGAATGTTGGTCACA -TCGTTAAGGGTCTGGCCATGGGTGCCAGCACCGTCATGATGGGTGGTCTCCTTGCCGGTA -CCACCGAGTCTCCCGGTGAGTACTTCGTTAGCAACGAGGGCCAGCTCGTCAAGGCCTACC -GTGGCATGGGCAGTATCGCCGCCATGGAGGACAAGAAGGCCGGTGGTGACGGAAAGGACA -GCAAGGCCAGCAACGCTGGTACCGCTCGCTACTTCTCCGAGAAGGACCGCGTCCTTGTTG -CCCAGGGTGTTGCTGGATCCGTCCTCGACCGCGGTTCCGTCACCAAGTTCATTCCTTACC -TCTTTGCCGGTATCCAGCACTCCCTGCAGGACATTGGTGTCAAGAGCCTGACCGCTTTGC -ACGAGGGTGTCAACAAGGGCACCGTCCGTTTCGAGATGCGCAGTGCAAGCGCCATGACCG -AGGGTAACGTCCACGGTCTCCACAGCTACGACAAGAAGCTTTACTCGTAAGATCCAAGCT -TTGTAAGAGTTCACTGGAAAAATTTGTGTATACATCATCGGGGTGAACCCCGATGGATGA -TGGGGCCTTGAAAATTAAAATAAGGGTACAGGATAGGAGTTTACGATTTGAAGATTTACG -CCATTTCTAATGAGCAAACAAGTTTATTTTTTCTCAATAAATTGCAAATGTAGAAAGTCA -CGTAGGAGTTTTGTAGAGGAGGTGCTGGGGTTTGTCCGAGCACATGCGCGCTGCATTCGG -CTAATCTCGATGGGGTTAAGCTGTACGCGGGGTAACGATGAACTGCCCGAGCAACTGCCT -AGGCTCTCGGGTCTTTGCTCTTTAGTATAAACATTGAGAAGCCATTTTTCGGCTTCGAAA -TCTCCTACGATGTCCTTTGCCTATCAAGAATACCTTTGTCGTCACTTAGCACACCGTTTG -AGCAATCGTTTTTCTCTTAACCTTCGAACATATCGTGTACCTCACATTGCCTCGATTCAG -CCCTTCCAACGGCTAAACAGTGAATCTGCTCCAAGACAATACACGACTACAGTTCCCCCA -ATTCCTCAGATGTCTCGCTCAAAACGTGATAGTGAGCCTACGGTCCCCAGGCCTAGTTCT -AGGTAAGCCAGCAGCAAAGGACCACATGACAGAGGTCTACAGAGCCATGTCATCCGAAGG -GAGGTATTCCCCCAAAAAATGAGAAACAAAACTAATTATTTCATAGTGTGCTCCTTGTCT -CTTCAAAGAACGAGATCCTTCTTCTTCATCGCGTGAAGACCTCGACTTCTTTTGCATCTG -CCCATGTATTCCCCGGTGGCAACCTATCCGTCCAAGATGGGCAATGCCCACCACCTGAAG -ACCTAGCACGGCATGATGATTCTCCTAGTTACCGACGTGCAGCCATTAGGGAACTCTTTG -AAGAGAGTGGAATTCTTCTAGCTAAAGACCGGAACACCGGGAAGATGCTTGCTGTCGACG -AGCCTACCCGAGAGGCAGGTAGACGACTCATTCACCAGAACAAAACCACTTTTGATGATT -GGCTGAAGCAGCAAAACGCCGATGCCCAGCCTGACATAGGTAAATAGATAGTTTGCGTGA -CTAAGACCTGAAACCAGAACGCTGACTCCACAGGCCAATTAATTCCGTTTACTCGCTGGA -TTACGCCAACCAATGTCCCCAAACGATACACCACACAGATGTACCTTTATTTCTTGCCTT -TTCCCGCCAAGGTGGATAAAAAGCTCCTCGACCAACTCCCAGTAGAGGGGGGACGCGAGG -AACATGAGATCCCAACAAGTGATGGTGGAATCGAGGTTACCGAGGCACAGTTTCTTCCAG -CCTCGGCGTGGGTTAGTCGTGCTCAAAAAGCAGAGATAATTCTGTTCCCACCACAGTTCC -TTCTTCTGCATTTGGTGTCGGGATTTCTGGACAGGGAGCCTCGTACAGGAGCGTCTATGG -AAGAGATGGAGAACCGTCGTCAAGCGCTTATCGATTTCGTCCATTCAGGCTCTCCCCCAT -GGACAGAAAAATGCATATCCCCGAAAATGATTCACATGACCGGTGATGGGCGAACTGTGT -TGGGCCTAAATGATCCTGGACCTGAGCTGAAAGGGTCTTCCAAGCGTGGTGAGCCCGACA -GAGTGGTTATTGTGCGGTTCAAAAAAGGAAGTGCACGAGAAGTGGCTGTGGGATGGAAGA -AGGATGTGTTGCAGGAGGAGAGGGAAAGAAGTAATCTGTAAAATACTTGCATTTACCCAT -ACATGTACTACAGTCATCCATATATCCGAATCAGCAATCAATGTCCATAGATGAACCGAT -GTTGAAATTGCCACGGCCTCCTGTATGGTTCCCGATTAAAGCCACGCTTGCCCCACTTTG -GCCACGCCAGCCCTCGACAAAGGGTCACATGACCCACACAAATGATTTGAGGTCTTCAAT -TTAATGTGCGGCAGAAAAACCCTCCACCTCCAACCTCACCGTCGCCCGTCAAAACCTCAA -CACCAAACCCGCCAAAATGGAGAACGACAAGGGAGAGATCGTCGATCTGTACGTGTCATG -GATTTCACACCCGTTCCCACAGTCCCAATATACCCTATGCCCTGTGTCTGCGTGGTCGCA -TACGATCAGCAATCCCGCCTTGATCCCAAGCCTAAGAAATGGACCCGGTGTGATCGGAGA -GTGCGAGGATTGAAGAAAAAAGCTCTCAGCGCTGCGTGATTGCGATTGTTGGCGAGCCAC -CAGAAAACCAGGGCATAACATGGTGTTGGAGGGGAACACATTCGCTCGCAAAAAAACACA -GACAACTAACATGACCTTCCTCTCAACAGCTACGTGCCCCGCAAGTGCAGCGCCACCAAC -CGCATCATCAAGGCCAACGACCACGCCTCTGTCCAGCTCTCCGTCGGCAAGGTTGACGAG -AACGGCCGCTACACCGGCGAGAACCAGACCTACGCTCTTTGCGGTTTCATCCGTGCCCGT -GGCGAGAGCGACGACTCCTTCAACCGCCTTGCCCAGCGTGACGGTTACCTCAAGAACGTC -TGGACCGCCGCCAGCCAGCGTTAAATTATCCTCAGCAATGTGGACTTTTTTACCGGGTGG -TGTTGACGGGTTGATTTGATGGTACGTTGTGGTCATCTACATGTATTCTGCATGTTCATG -TTCAATTGAGCTAATTTGACGTATAGGAACCGTGGCTTGATGGTCCGTGTTCGGGGAGTT -GGTTTCAAGACCAAAAAAGGACGTTAAGGGATATAGATATGTTCCTTGGTTCATTTTTTC -TTTTTCTCCTCTGAGAAACTCCTGTTTTCCTACCTATCTCTCGAAATGAATGGAATATCG -ACGACTTAATGGGTCAGGTTTTTTTTCTCTACGAATCCCGCCTGTTCTCTTTCTCTGATA -GACTCCCACCCAACCTCATGAAGCACAAGCCAAAAAAAGCAATGCCAAAAAATCGAAACG -ATATCCCTTTCATATATTTCTTAGTCTCTGCTTGCTTCCTTTCCATTTCTTATTTATCTT -TCTGCTCGGCATATAAAGACCTCCTCGTTCTTTGTAGACTCGAAAACCAGACTGGTTCGC -TGGTCTTGGTCTTCAGTGTCTCCATCTCCATGAGTGAGGAGGTAGAAATATTTTCCATTG -CCGAAGATCATGCTTCCGAGCTCTCGCCTGCTGAAGAGCCACTGTTGCAGAGATCCATGC -GCTGGGGCACAGATACGGGCGGGACATCCATTGTAGATACGTGCTAGATGAATCTCATTT -ATGGATATGTTTCATACTCTGTTTTCTTAATTGTATCTCTGACTACATATATACGCTGGG -GCCTTCTTTTATCAAGCATTTTATTCACATCCTGTCCATTGTTGATATGTAATACGTTTC -CTTCCACATAAACTCGTGATTACAAGCAAGAAGAGCCCACAAGCAAGACACAACTCAAAA -GCACATTCTTTTTTCTCTATGAGTATTTCGCTGTTTAATCCTTATCGAGACGGACGGCCG -GATTGGTCTCAATGAAGTTCAAATCGACTTTATGACCGAGGAGTTCACGGATGTTGCTGA -CGGCGTACCTAACGAGCTTATTAGTAAATGAAAATGAATGGTGGCTGATGCAATTTACTT -GATCATAGTGGGTCTCTCCAGACTCAATCCCCATCCGTAGACTCTCAGGTCCTTAGGCAG -ACCCATAGCCTCCAGCATCTCTGGTCGGAACATGCCACTGTTACCGATCTCAACCCACTT -GCCGAGACCATCGTGGTAGCCGAAGATTTCCATACTGGGCTCGGTGTAGGGGTTGTAGGC -GGGCTTGAAGCGCAGCTTGTGGATGCCCATCTTGGCGAAGAAGACCTCCATGAATCCAAT -CAGACCACCGAGTGTCAGACCGAAGTCAGCAATGACACCCTCGACCTGATGGAATTCTGC -CAGATGGGTAGCATCTACGGACTCATTTCGGAAGACACGGTCAATACTGAAGAAGCGAGC -CGGCCGAGGGTTGGCAGCCAGCTTGTGCAGCATGTAGGTCGAGATGGAAGTGGTGTGAGT -GCGAAGCACCAGACGGAGCGACTCATCCGCGCTCCAGGGGTAGCGGTAGCCAATGGAGCC -GAACTTTCCGTGCTCGTGGACCTCACGGACGTTGTCCCAGTATTTTTGAAAGTCGAGGGG -CTTCTCGGACTTGTTTTGAGCTCGGACGTGGGGGTCATTGGGGGGATCCTCACGAGGAGG -GTCTGCCACAGCAGGGTCGGAGATGTAGAAGGTATCCTGGAGATCACGGGCGGGGTGTTG -CTGGGGCACGAACAGGGCGTCGAAGTTCCAGAAGCCGGTCTCGACGAAGCGGTTGGTCGG -CATCTCCTCGAAGCCCATGTCGAAGAAGATGTTGCGGAACTCTTGGCGAACCTTGTTCAG -CGGGTGAAGAGCACCAGCTGGAGTCGGGGCACCCTTAGCATTGAAGTTGTAGGGCTTGAG -ATGCGCCGTCTTCCACGAGCCATTGGCGATCATCTCCGGCGTGAGGTCGGTCTGCTCCTT -GACGAATTCTCGGGCATACTTGGGACCCTTACTGATCTTGAAAGAAACTTCCTTTGCCAG -ACCAACCAACTTCCTCCGCTTGAGGTCAAGCATGACCTTTTTATCATCCAGAGACTTTGT -TTTTTGAACCTCAAGCAGCTGCTGTTGCGTCTCGTCGACAATGGAGTCTGTGGATGCCCG -CAGCAAATCCTTATCCTTCTTAATCCAGCCACGCTTGAAAGCGTTTCCTTGGCCAACTTT -GGCGTTCTCCTTACCCACAATGCCTGGCAAGTCGGAGATCTTCAATCCATCCATGGCAGC -AAGCACCACGGCGAAGACCTTGGCCTCGTGACTGCCATTGGCGGCAATCTCCTGGCCCTC -GGCAGTGAGGGTAACAATTTCCTTCTCTAGGGTCTCATATTGGACCATCTGTCGCGAATC -AAGGCGGTCGAGCGCACTCTTAACGTTCAATGAGGGGGTGGAGGGGAAGGCTTCGCTGGA -GAGGATGGGCGCATCGGACGCGGACAGGGCGCCCAAAATCTCGTTTGTCAAGTCGGACGC -CATGGCTGCAGCCTTCTCCTGCAGAGACAATTTTTGACGGGTGTTTTGAAGGGCGTTCTT -TTGATCGGACAATATTTCGCAGAATTATGACTCTGCGGGATTCCCAAAAGATAATACGGC -GATGCACACCGACCACTTCCGCTATTTACCCCCGCCATTTGATTGTCGACCGTTGACCAG -TTGACTTGTAGGCGACGTAAATTTAGGATATAAATTACACGTAAAAGGCTGTGTATTATT -GTATGTATCTTCCCTAAACTTCCTCTCTGCAAATACGTCGtttccattctctctttctct -ttctctttcttattatttttttctttaattatttTTGGACAAATACCCTGTAGGGTCAAT -CATAGTAAACCAGGTACCATCTCAAGTCAAACATCTTCATGTTTTCGCACGAGGTCCTTA -TGTGACCAATAGACGTTACGAAATACCCAGATCAACGAAAGACGCTAAGCTCGACGGCTC -AGCCACCAAAAAAAGACGCAAATTGCGATGAAAATCAAACTATGGATCTGAGGCCCGTGG -CAGAGACCCTAACGAAGAGACATCGTTGATTTCTTATAGGCTCGCACTAACTGCAACTGG -ATCCCGCTCCCAGCCAATTGTCTAAAACTCTGTCTCGCCTATCTTTCCCTCTAATAAGCG -CTGATGCTATACCAAAGGATATGGATTTAGACTATTGAGAAGTCAAAACGAACCGGAGCC -ACCTTGTGATCGACACTAGCCCCAGAGTCGATCACTTCATATGAAACGTATTCCGTACTC -CGTATGTATACCTTCCAAGATACTTGGGTGCATACAACATTGCAGATACATTGTTAGTAG -TCTGGATAAAGTATACTTTGTACCTAGGGTACCTAGGCATATATTGCCTATGATAGATCA -AGTCCGTGCGCCTTATGCAGTCACCTGGGCATTCTGTAAATGTGCAGAAAGATACCCAAT -TTTTTCCCCCAATTTTTACCCATTTTATTCTCTCTTATTTTTTTTGTCTGTTCAGAGGCG -ATTTATAAGTTCTACCTAAAAGGTAAGACATAGGGAAAAAAAAAANNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTTTTAAGAA -AAAACCTTCGATTATCCTGTTTTCCCCCGGGGTTCTGAAGTTCGTCTTGTCCTTCGATCC -CCGCAGAGCCCATAGTGGTGGATTACATAACAGGGCGCGTTCTCCAGGAACCTATTTTCC -CCCTTCCCGACTCTCCATATTTCCTTTCATATTTATTCCCGGTTGATTCGGGTTATTCTT -GTCTTTTTATTTGGATTTCCCCATGTCGCCTTAATTAGACTATACCTCTGTCATCCTGTC -AGGATCCAGACTTGCCAGTGAGTGATACTATCTCCGAATTCTACACCCTATCTCATATTC -TCACCCGGGGAGTTGCTGGTACTATATGTATTTTATGTTAGGTGAAACTGACTCATGTAG -GCCACAATGGCTAGCGCGGACGAGACTCTCGCGGCCGCGACTGCTTTGCTCCAGAATTTG -GCAAGAGACGAATCTGTCTCCGGTTCCTCTTCGCCCCCATTTGATTTTCAACTATCCAAC -GGCACCAAGTCGGCCAAACTTCCTGGAGAAAACAGTCCCACCAAGGCAGCCTTCGAAGCC -GAGCTTGAGGCCCTGGTGCGCCGTGTGCACCATCTGGAATTCCAAGCTGTCAGTCACCAC -AATTTCCAACAGCAGACGGATCCCCAACAGACCACCCCGTCTGCTGAAGAAACCAGCGAG -AGGAATTTCTTGAGGACATTTGGGCTCTCTCGCTTGTCGTCCGGTCAAGATTCTGACTCT -CTGTCACAGCAGCAAAAAGCGATACCAGTTCCCGCGGCCGACGAACCACCCCTGGGAGAA -ATTGACGACGAATCCGATGAAGAAGATGATAATGGCACCCGTGTCGTGCGGGAGGAGGAT -ATTAGCTTTCTCCGCAATCATGTGCAAAAACAGGCCGAGGAGATCAGCAGCCAAAAGGAT -ATTATCGCTCAGGTTCGCGACGAGTTGCAAAAACAGGAAGAGCACACTCGCCGTGCTCTT -ACCAAGGTTGAAAATGAGGATGTCGTTCTTCTCGAGCGCGAGTTGCGCAAGCATCAACAG -GCCAACGAGGCCTTCCAAAAAGCTTTGCGTGAAATCGGTGGCATTATCACTCAAGTCGCC -AACGGTGACCTGTCCATGAAAGTCCAAATTCACCCGCTGGAAATGGACCCCGAAATCGCC -ACTTTCAAACGAACAATTAACACTATGATGGATCAATTGCAAGTCTTCGGTAGCGAAGTC -TCTCGTGTCGCACGCGAAGTCGGCACTGAAGGAATACTCGGTGGTCAAGCCCAGATCACT -GGCGTCCATGGTATCTGGAAAGAGCTTACGGAGAATGTCAACATTATGGCCAAAAACTTG -ACGGATCAAGTGCGAGAAATTGCCGCCGTGACTACCGCTGTCGCTCATGGTGATCTTAGT -CAGAAGATTGAGAGCCGAGCCCAAGGTGAAATTTTGGAGTTGCAACAGACTATCAACACC -ATGGTAGACCAACTCCGAACGTTTGCAACAGAAGTTACTCGAGTCGCGCGTGATGTCGGT -ACGGAAGGTGTACTGGGAGGACAAGCTCAGATCGAGGGTGTCCAGGGCATGTGGAACGAA -CTCACGGTTAATGTCAACGCTATGGCCAACAACCTTACCACACAAGTGCGAGATATTGCT -ACCGTCACGAAGGCCGTTGCCAAGGGCGATCTCACGCAGAAGGTCCAGGCCAACTGTAAG -GGAGAAATCGCAGAGTTGAAGAACATTATCAATTCCATGGTGGACCAACTACGACAATTC -GCACAAGAAGTCACCAAAATTGCCAAGGAAGTCGGTACTGATGGTGTACTTGGAGGCCAA -GCCACCGTGAATGATGTCGAAGGAACGTGGAAGGATTTGACGGAGAACGTCAACCGCATG -GCCAACAACCTGACCACGCAGGTGCGTGAGATTGCGGACGTCACCACCGCAGTCGCCAAG -GGTGATCTCACCAAAAAGGTTACTGCCAACGTACAAGGAGAAATTCTCGACCTCAAGAGT -ACCATCAACGGCATGGTGGATCGTCTCAACACTTTTGCTTTTGAAGTCAGTAAAGTGGCT -CGTGAGGTCGGCACGGATGGTACTCTTGGCGGTCAAGCCAAGGTGGACAACGTAGAAGGA -AAATGGAAGGACCTGACGGACAATGTGAACACCATGGCACAAAACCTGACGTCTCAGGTC -CGAAGTATATCCGATGTCACACAAGCTATTGCCAAGGGAGACTTGAGCAAAAAGATCGAA -GTGCATGCTCAGGGAGAGATTCTTACGCTCAAGGTCACAATCAACCACATGGTGGGTCGA -CTGGCTAAATTCGCTACTGAGCTAAAGAAGGTCGCCCGAGATGTTGGTGTCGACGGCAAG -ATGGGTGGTCAAGCCAACGTGGAGGGTATTGCGGGTACTTGGAAGGAAATTACCGAGGAC -GTGAACACTATGGCCGAGAATCTGACATCACAAGTTCGCGCTTTCGGCGAAATTACCGAT -GCCGCCACTGACGGTGACTTCACAAAGCTGATCACAGTCAATGCGTCTGGTGAAATGGAC -GAGCTGAAGCGGAAGATCAACAAGATGGTTTCCAATCTACGGGACAGTATTCAGCGAAAC -ACCGCGGCTAGGGAGGCTGCCGAACTCGCCAATCGTACCAAGTCAGAGTTCTTGGCCAAT -ATGTCGCATGAGATCAGAACGCCGATGAATGGTATCATTGGAATGACCCAGTTGACATTG -GATACGGATGATCTCAAGCCATACACGCGAGAAATGTTGAACGTCGTACATAACTTGGCG -AACAGTCTGTTGACCATCATCGACGACATTCTGGATATCTCCAAGATCGAAGCCAACCGC -ATGGTTATTGAGAGCATTCCATTCACCGTCAGAGGTACTGTCTTCAATGCCCTCAAGACT -TTGGCCGTCAAGGCCAATGAGAAGTTCCTGAGTTTGACCTACCAAGTTGACAACACCGTT -CCTGATTATGTCATTGGTGATCCATTCCGCCTTCGCCAGATCATTCTCAATCTTGTTGGA -AACGCTATCAAGTTCACCGAGCATGGCGAGGTCAAGCTCACCATCCGAAAGTCGGACAGA -GAGCAATGCACGACCAATGAATATGCCTTTGAATTCTCCGTTTCTGATACGGGCATTGGA -ATCGAAGAGGATAAGCTTGATCTCATTTTCGACACCTTCCAACAAGCTGATGGCTCAACC -ACCCGCAGATTTGGTGGCACTGGGCTGGGATTGTCGATTTCCAAGCGCCTTGTAAATTTG -ATGGGCGGTGATGTTTGGGTCACTTCCGAATATGGACATGGAAGTACTTTCCACTTCACC -TGCGTAGTGAAACTCGCGGACCAGTCTTTGAGCGTTATTGCCAATCAGCTCTTGCCGTAC -AAGAACCACCGGGTGTTGTTCATCGATAAGGGTCAAAACGGTGCTCAAGCTACCAACGTG -ATGAAGATGTTGAAGAAGATTGACCTAGAGCCGTTGGTTGTGCGGAACGAGGAACACGTT -CCTCCGCCCGAGATCCAGGATCCTTCTGGCAAGGAATCAGGCCATGCTTACGATGTTATT -ATCGTTGACTCGGTAGACACGGCCCGTGTCTTGCGGACGTTTGACGAGTTCAAGTATATT -CCGATTGTTTTGGTCTGTCCTTTGGTCTGCGTGAGCTTGAAGTCGGCCTTGGACCTTGGT -ATCAGTTCATATATGACTACCCCGTGTCAGCCGATTGATCTTGGCAATGGCATGTTGCCG -GCCTTGGAAGGCCGGTCAACGCCAATCACCACCGACAATTCACGTTCATTCGATATTCTT -CTAGCGGAGGATAATGACGTGAACCAGCAACTTGCGATGAAGATTCTCCAAAAGCATAAC -CACAACGTGTTTGTTGTTGGTAATGGCTTGGAAGCTGTTGAGGCTGTCAAGAAACGTCGT -TACGATGTTATTTTAATGGACGTTCAGATGCCAGTCATGGGTGGCTTTGAAGCCACCGGC -AAGATTCGTGAGTACGAACGCGAGAGTGGGCTTCAACGGACCCCAATTATCGCGCTCACT -GCCCATGCCATGTTGGGTGACCGTGAGAAGTGTATCCAGGCCCAGATGGACGAGTACTTG -TCCAAGCCGCTCAAGCAAAATCAGATGATGCAAACTATCCTCAAATGTGCTACTCTTGGT -GGGTCACTATTGGAGAAGAGCAAGGAGTCTAGGATTTCGAGCAGCGGGGAAATGCACCCG -AACTACTCAATGTCTGAGAACCAACAGCCGCCACCACCACAACAGCGATCAAAGCAGCAG -CCCTTGCCATCACCTCGCCCGGTTGTAGATTCCCGGTCTATCACAAGCTCTGGTCCCATC -ACCCGAGGGAGTGTGGTCAGTCCGTTGGAAGAGAAAGAAGAGATGATCGACGAGCGGGTA -CGTTAGCTGTATGTTCGGAACCGGACTATCAAAACAGCCTGGCTAATTACTTTTCAGGCA -TTGCTACGATCCAACAGTACATGAGTTTAGCATATTTATTTCCATGCAGATGATTTCGAT -GCGACTATACCCTGTCTATATGAGTCTCATCTACTCTACTTTTGTCTACTTCATACCCAC -CTGCGTCTTTCCTCCGTCTATTATGATTCTCCGTCGTCAACCTCCACCATTGCATTCGCT -TTTTTTCCCCCTCATTTTCATTTCTCCTCTTGGAGATGGTGTTTTGGCGCAGTCTATGAC -AGGACTTGGTTTGATCTATATGGAAGGATGACCGGGGAGTTTTCGCTTACCAGAGAGGCG -CTTTGTTCTTTTTACCTCTTGCTCTACCTTTTTCTCTCTTTCCCATCTAGCCGCTCGCTT -TTTTATCTAACCCCCTCAGCATCCTCTTGCCGTGAATTCTTGTATTGTATCTAGCTCTGT -TACTGATCATATCAAGTGCGGCACGAGAACACCGCTCAATTCCTTCAAGTTCTTCCCTAT -TTTCATTGTTCATGCGTATTTACCCATCTCCTATCTAGCTCTGAGACCATCTAACTTCCA -ACCGGCTCTTAGTCTATTATTTCTGGGCTATCTATTTTAGCCTTGAAACCCCCCAAACGC -TGCTTCGCGTGCCCGACTCATCTATGTTATCCGTAATGAGCTCTTTTATGAGGATACGCT -AAGATGTGACATCCCCCTTGCTGTTGCCTGGCTGCGGCTGCCCTTCTCTTGCTTTACATC -TCAGTGTAATCCCTCGAAGGCCTCTTATATTGGGCATTAACCCCACGTGTTGGAACAGGC -CAGGAAGCGCATATTATTCGAGTGACAACAGTAGCGACAAAAGTAGGGCACGACCTCACG -CCACCTGACTAGGATCTACAGATCTGTGTGCATCGAAGGTTTGAAACTAGTTTGCGGTGT -ATAGCAGAGGGGTTGGTAGGCTGACTAAAAGCACATTGTTGATAGCCCTGAAGATGATGT -ACAAGAGGTTACAGTAGGCCTAGATAGGATATTTGTTGGGGTCGTTCTCCAAAGATCTCA -TTATGGTCGTTGACCACTCTGGTCACTGCACTACTGACTAGCAACCTATGTAACATTGTG -GGTCATTGTAAACACATTTTTATTAGCCTCCCAAGCAGCAAGAGGCTAACAGCCCACTGG -ATATGAGATAGGATGAAAAACATTGATTGCTTTTAAAACACCAAAGCATGCCCAACTCCG -AAAATCTGCGTGTTGCAAGAACCGTGTCGTATCTGAAGAAGCATGAAAACAAGATTGAAA -AGAAAAAAATAGAGACCCAGGAGAAAATTTAGTCGATATCCATCTGCTCCTCGACTCCGC -CATCCACCAACAATTTGCTCTTCTGCTTCTGGCGGATTCTCTCCTCGCCGTGGACTCTGT -TGGACACCTTGCTCTGCTCGGCCACCATAGGCAGACCCGCCTCGCGCTCCCGAAGTTCCT -TCCGGATGAGGTGCTCGCCTGTGTTGACGACCTTGCGGTCCGCCTCGCGCTGGCGCTCGC -GGGCGAGTTTGCCGGCCAAGCGGCGTTTTGTGAACACACGCTCGCGGCGGGTTCTGATCT -CCTCGACTCTCTCCATGGCGGAGATGGTGGCAGCCATAAGGTTGCGGTCGTACTTGACGG -GGACGTTTCTCTTCTTGGCGAACTGGGACAGGACAAGCGACGAGTCCACAATCATCTCCT -TTCCGCGGGAGGCACGGTGAGTCTTGGTCCACTTCAGCTTTCGGGGCTGACGCTTCATCT -TGAAGTTCTTGTGGCACTTGGACCGACAGAAGCGAAATTGTCGGGCATCGTTGCGGACGA -ATGTGATGCCCTTACTCGGGAAGGCCGGGCGGGAACAGAAGTGACAAGTTTCAACACTGC -CGAACAGAAGAATATTAGGATTCCGTAGAGCGGGTACAGACAAAAATTAACAGTAAAACT -TACCGCATGGTTGTAGGGTGGTGTTGGTTATGTCTTCGTTGAGTCGCAAGAAACTTTTTC -TGTAGCTTGGGCCTGAATGGTGGAGCAGACCACGCTAAACCCTTTTGGGGGCACCTGCGC -TGACTTTTCTATCCCGACGCCTCTTCGTCTATTCCCCGGTCCAACCGACTTGATCTCATT -TCGCGACATCTCTATATTCCTCGGTCTACAAAATGACTTCTGTTCTTGCCATTGGATTCG -GCATTGCCACAACTGCCTTTTTGGTAAGTTAAAAAGCCACTATTCAGCCACATGGAACTA -ACTGATTGTCTACAGGGCCGCGCTGGTCTGGTCGCCTACCGCCGCTCCAAGGGAGGATTA -AACGCTGCCGGAAAGGCATTCTACAAGGGGTTCGTGGGAATTATTCACCTTATCGCTATA -TGGGATTTTGACTGACGTCGTGTTACAGAGGATTCGAACCGCGCATGAACCGCCGGGAAG -CTTCCCTAATCCTACAACTTGCGTATGTGCGAGCGACATATCTTTGACATGCCAGAACTG -ACAATACTTCGCAGTGAACGAACTTTGACCAAGGACAAGATTCGCAAGAACCACCGCCAG -CTCATGCTGCTCAACCACCCCGATCGCGGCGGCAGTCCATACCTGGCCACCAAGATTAAC -GAGGCCAAGGAATTCCTCGACAAACATGCTTAAGACTTTATCAATGCGAATTTGACCTCT -CTCTCTTCTCCGGCAGTGACATTTGGGCAACGATGGAAAAGCGGATTTAGGCCTTAACAT -TGACGAACGAGTCATCATATTTTGGGCATATAGCGTGGCGTTGGTTTGGCATTTACAAGA -TCGGAGGAACAGGACTGCTCTTTACGAACTCTTGTTGTATTATATGTACTGTTTTCACAT -ATCCCTGCGAGATCGCCGCTCTTCGTTGAATGTACTTTTATTACTATCCATCTATCCGAC -ATACAACAATCCAGCACCCCAGAACTGGCAACCCCTGGTAGCAGAGCAACTCATGACACT -GCCAATAGTCGCTCTCTGCGATTTTTAATCATGCACAGGCAAAGTAACAAGGTCGATCCC -GGGACAATAAGAAGCACGTAGGCCCAAGGCTCAAGGTTACAAAAGCCCGTGATGGGTATG -GGATCTGTCCTACCGCATAGAGAACGCCACACACCTTCTGCACAAGATTTATTATGCTAC -TCCGCGTACTCCGTACAAATAATGCAAAATGCATTTGATCGGCGCAAAGAGAGAGTGGAT -TGGAAGAGATGGAACGGAAGTCGATGCCCCTGTTATCCGGATCGGACGAAATCGACCGTC -AAGTTCAAATCTGCTCGGCGGTCGGGTTTCGGGTGTTCCGTCATGCCACTGAATAATCGT -CAAGGTTCAAGGCAACTGTATAGCGTTCCTCTTACAAGATTGAAATCAGATCTGGCCTTG -CTCTCACGTGTGGTTGAGTCATTTCTTACCTAAAGTCCCAAACTCACCTGGGCAGTGCTC -AAGCCTGTGATTTCATTGTCAACGTCCGAAAAGGTTCTATCCTTCCATGCGTCATTCACT -CCAATAATCCATAGCGCACTCTTTTTTTTTTCTCAGGGGGCGGAAAGAAAAACCTCTGCG -CTGAGACGCAGCACCCCGCAGTATCCTTCATCACTTATTATCGTCATTACCTCTACTCCG -CGGAGTTTCCGTTGCCCCCTCAGACGCAAAATGGATGGCTTGGGGGACGGCGATATGGGC -TTCGACCCCCCTAATATGATGAGCCAGACACCACAGCTCTTCGGGTACGATAATTCTCAG -ATGCAGGCTGGTTCTATGTACGATGATTCGTCTTTGGGCGCGGGAGACGAGACAAACGAT -GCGAAGAGACGGAGGATCGCGAGAGTGAGGATCATTACCCGTTACAGTTATCCAGAATAC -GTCTAACGAGCGAACAGGCGTGCGACATGTGTCGGAAGAAGAAGATTAAATGCGATGGAA -AAATGCCAAAATGCTCACACTGCATTAACTACAAAACGGAATGCATCTTCACACAAGTGG -AGAAGAAGCGGAACCCTCCTAAAGGGTATGTATACTTGTATACTTGTATACTTCATTGCG -TATAAGGCTATGTATCTGACTGGGTTGTTTCTTGCATAGAGCGAAATACATCGAAGGCCT -TGAAAACCGACTGGGACGGATGGAGTCGTTGTTGCGTTTGTCGGGTCTTTTGTCCGAAGA -AGATGGCGGCAAGACAGATCTTGGAACATTGGAGAAGCGGCTAGCGGATCGAACGAATGC -GCTGAATGCAGCGAAAAGCTCCAACAAGTTCATGGCTCAAGCAACTACAGCCCAACAAGC -CCCGAGCTCCCACCAAACCACACCTCGTATGGACTCCATCTCCAGCCCTCAGACAGCCGC -TACATCACCGGAGTCGCAAAAGTCGGAGACTGAAGTCGAGGCGCTGTCGGACATGATGTG -CTCTTTGGTGACAAACAACTGCGGGGAGACTCGGTATATTGGTAGGCTTTGTTCCGCAAC -TGTTTAGAGCAGGTTTGCTAACGTTTTCCCAGGATCCTCATCTGGATTTTCTATATTCTC -TCCAAAGGGCATTCAATGGGTTAATGAGAAGACCGGCGATACATCCTTCCAGGACATGAT -TTCGTCAGCATATGTCGATGACAATAAATGGATGTATTGGAAACCCGAGATCTTCAGCGA -TATTTTCGCAAGGCGCGTTTTCAAGCCTCTCCCGCCGAAGGAAGAAGCTCTTTCTCTATT -CAAGGACTTTTTCGAAAATTTCAATTGTATGTTCCCGCTGTTCCACGAAGCGACATTTAT -GCACTTGGTGGAGCGGCAATACTCGCGCGATCCCTACGAGGGCTCAGGCTGGTGGGCAAG -TATAAACGTTGTCTTGGCGATTTCTCACAGGCTGCGGATTATGAGCAATCTGGTGCCACA -CGAAGAAGATAAAAGAGCTTGGCTTTATTTGAAGAACGCCATGGGAGTTCTGACCGAGCT -CACAATGCGTAACACTGATCTACTGAGTGTGCAAGCGCTATTGGGCATGGTAATGTGGTC -TACTCGGCTTTCGGTGGCTTTGTCTAACCTACTTTTCTAGTCACTATTTCTTCAAGGAAC -CCCCAACCCACAACCTGCTTTCTTCCTAGTCGCCGCGGCGATTCGACTTTCCCACAGCAT -TGGTCTACACAAACGCGGTTCTGGCTTTGGCCTCAACGTGGTCGAAGTTGAGCAGCGGAA -GAGAGTATTCTGGATAGCCTATCTTCTCGACAAAGAGTATGATCCCCCTTTTCGAGACAC -CCGAATACCTCTGTTTCAAGTACTGACCCATTTTCAGCATCTGTCTACGCTCTGGTCGAC -CACCAGTACAAGATGACGATGATATGAACGTCGAGCTACCAAGCGAAGATCCACCAGACA -ATGTTGGAAATGTGCCATTATCTGATGGTAGGAGCAAGTTCAACCTATTCCGCTCATTGT -GTGAATTTGCAACTATTGAAAGCAGAGTATACAAGCGACTTTATTCTGCCAAAGCATCGA -AGCAATCAGACGGTGAACTGCTCAATACAATCGGTGAGCTTGACAAGGAGTTGGAAGACT -GGAAAGATAGCATTCCTATCGACTTCCGCCCTGAACATGAAATCCAAGCTACCCACACTC -CCCTCATTATCCACATGGTCGTTCTACATTTTGCCTACTACAATTGCTTGACAACCATAC -ATCGAATGTCAGTTCATCACGGGTATTGGACAAGCCGCCTGTCCAATTATGCCATACAGG -GGTTGAATGCACGACCCTTGAACCCAAGGGTATTCTTGTCGGCGGTTTTGTGCGTCACTG -CTGCCAGAGCATCGATCAATTTGATCAAGTATATTCCGCAGGGAGACTTTGCTTGTGTTT -GGTACGTCCCAGGGTATTGTGGAGACAAACAGCTGCTAATTTGACTTCAGGCTAATTCTC -TACTACCCGGTGTCTGCCCTTGTAACACTTTTCGCCAACATCCTTCAGAACCCCAACGAT -GTTCGTGCTCGGTCCGATGTTAAATTAATGGGCGTGGTAGTCAGTTTCTTGTCCACCCTA -GTCTCCGACGAATCCAATGGCAGCATCAAGCGTATGCTAGGTCTTTGCGGTGAATTCGAG -AGAATTGCCAAGGTGGTCTTGGATAAAACCGAAAAAGAGTCTCAGTCGCGAAAGAAACGC -AAGAATGCCGAAGCAGAAGACTCTGCAGATGCAACCGAAGGACATCCGGCAGCTGCACCC -TCCGCCAAACGTACACAAGCACCACCCACCTCCACACCTTTCTCTCCCTCCATGTTTACC -AATCCTACGGCTACAGCCCCCTCTAGCACAGAAGGGCCATTCCCAGGATCCATAATACCA -CCGACAAGTGGCATTCCTGCTGACCTTCCAAGTAATATCCATGCCATGCCTGGCATTGGG -CAGGAGTTCTCAGACATGCTGAGTCCTGATCAAATGGGTAATGTTGGATTCTCCGACCAA -CACTCATTCAGTGCCAGTCCCGGCATGGCACCATTCCAGCAGCCATTTGTCCCACAAGAT -CTCTGGCAGATGCCAATGACAATTGAGTGGGATTGGGCGGATATGTCCACCAACTTCCCT -CCATTTGACGGAGCTGGTCCGGGCGGGTCGCTGTCTGGGCAGTGATCATGACTGCATGAT -GAACTAAAGTCATCTTCATCCACCTTGTTATGTCGATTCTTTCGGTGGAATTATTATTTT -CAGTGTTATGTCTCTACACCACACCATTTCCACACCTTTTGTACATTGATGCGCTCCTCT -TCTTATTTCGAAACCATGCAGAGGTCTATGGAGGGACCGGGAGAATTGTGGGTTTCCATT -ACGCAGCGAATGGCGCAAAGGTGGGCCATATTTATAGGGATAAATATGTACAGACAGGTC -TCCAATCAAATTCAAAACAATATTAGACAATCTTACTCTAGATCGGAAGTAATAAAGAAC -GGTTTTGAAGGAGCTCACGGCTTTAAGCTAATTCCATCCGGCACCTGGACATGCGCTTCC -GATGACTCAGCCGGATCGGCGAAAATTTAGAACCGACTGGAACCACACGGCGTCACCTCG -GGACTTGGATGTCCAAGAGCCTCTCAACTTCCAGCTCAATTTCCCCTCCTCGTGTCTAAT -CTTGATGGACTGTGATATCATCTATTACATCTGAAGTCGATTGATCACCTTTTACCCCCG -CCCTCGAAGCCCTCGGGGCTCCCATTCCTCGACCATGGCTACCTTGCTCCGACGGCCAGG -CAATCTCGCTCGCTACTCGAGGACAATCGTGGAATCGACCCGTCGCACGGGGCTGGCTTC -CGTGCGTGTGCGACCGTCACGAATCTCATCTCTGATTCTATCGAGCCGCGCTAGAACCTA -CGCCACGCAAGGTCCGACACCCCCCGACGACAATAGCCCGAACAATGATCACCAAACAAA -CAAACCAAAGTCAGCTCTGACTGAAGCTGAACTGGAACGTGTGGATGAATGGCTGGAAGG -TATGGATCCGGTGGGCCGTAAGCAGATCCGAAACCACCTCATGAGCAATGGAATTCCACC -AGAAGTCAGGAAGTACCTCGATCGGAATCAGCCAACGACGTTGATGGATCGCATCAAGAT -GATGCGGTTCCTGTGGCAAGCCACAACTTCCGACGTTGCATTGGAAGAGTTTTTAAAGAA -ATACGAGCCTGATGCCAAAGAGTTATCAAGAAAGCGCGGTATCGCAGATTTGCACAACCC -CGCTGAAAAACGCAGTGATATGCCTAAACGCGACGAAGATTTCAGCCTCCGTTTCAATAA -GGATTCTGAGCAGGAGCAGCAAGGGCAGCAGCCCTCAAAGCAGGGCGAACAGCAATCCAA -CTCACAACCGCCAAAGGATCCCAAGCAAGACAAGGATCCTAAGGATGAAAAGGATAACAA -GCAAACACAAGATCCCAAGAAGGACCAACAACAACAGAAGAAGCAACAGGGCGACAAGAA -GGGCAAAGCTCCCCCGAACGTGGGCAAGGTCCTCGAGTTCCGCTTCGATCCCGTTTCGTT -CTTCGTTACCGCTCTCGTCACATACTACGCCTACCGCAGTTTCTTCCCCGGTGATGCTGG -GGGCAGAGAAATCACTTGGCAAGAATTCCGGGCGAACTACCTTGAGAAGGGGCTCGTGGA -GAGATTGACCGTCTTGAACCGCACCAAGGTTCGGGTTGAACTCAACGGCGAAGCTTCGAC -CCAGCCTGATTCAAATGGTCAGCCTGCCTCCTATGTCTATTTCAGTATTGGTTCTGTCGA -CAGCTTTGAGATGAAGATTGAAGCAGCTCAGAACGAACTGGGTATTCCATCGCATGAACG -TATCCCTGTTGCCTATCATGACGAGACCCCATGGGGCGGCGTCTTGATGTCATTGGCTCC -TACTCTTCTCTTCCTTGGTGGTGTCTTCTGGATGTCGCGACGTGCCGGTGGCGGCGCCGG -CGGTCAAAGTGGAATCTTTGGCATCGGCAAGAGCCGTGCCAAGCGCTTCAATCATGAGAC -CGATATCAAGACCAAGTTCGCCGACGTTGCGGGCATGGACGAAGCCAAGGTTGAAATCAT -GGAATTTGTTAGTTTCCTCCAGCAGCCCGAGCGGTTCGAGAAGCTAGGTGCAAAGATTCC -CCGTGGTGCCATTCTTTCAGGTCCCCCTGGTACTGGTAAGACATTGCTTGCTAAGGCTAC -CGCCGGTGAATCTGGCGTGCCTTTCTACAGTGTCAGTGGTTCAGAGTTCGTCGAAATGTT -TGTCGGTGTTGGTCCCTCTCGTGTCCGCGATCTCTTCGCCAATGCCCGGAAGAACACCCC -CTGCATTATCTTCATTGATGAAATCGATGCAATTGGAAAGTCCCGAGCAAAGTCGAACGT -TGGTGGTGGAAACGATGAGCGTGAAAGCACTCTCAACCAGATTCTCACTGAGATGGATGG -TTTCAACACCTCGGAGCAAGTCGTTGTCCTGGCCGGTACCAACCGACCGGATGTTCTTGA -CAAGGCCCTCATGCGTCCCGGTCGATTCGACCGACATATCTCCATTGACCGACCTACTAT -GGACGGTCGCAAGCAGATTTTCCGTGTTTACTTGAAGAAGATCGTCACCGACGAGAACTT -GGAGTACATGGAGGGCAGACTTGCTGCTCTGACTCCTGGTTTTGCTGGTGCTGATATCGC -CAACTGCGTGAATGAGGCTGCTCTTGTTGGTAAGTTTTTATGCGTACTCTCTTCGTTGTG -TTGTATCAAGCTAACAATTCCTCCCTGTAGCTGCCCGCGGAAACGCAGACAAGGTTAGCA -TGCATCACTTCGAGCAGGCCATCGAACGAGTCGTTGGTGGATTGGAGAAGAAGTCCCTTG -TCCTCTCTCCCGAAGAGAAGCGGACTGTCGCCTACCACGAGGCTGGCCACGCTATCTGCG -GTTGGTACTTCAAGTGGGCTGACCCGCTATTGAAGGTCTCGATCATTCCCCGCGGACAAG -GTGCACTTGGATATGCCCAATACCTCCCGGCTGGCGGCGACACATACCTGATGAACGTCA -ACCAGATGATGGACCGCATGGCCATGACCCTGGGTGGTCGTGTCAGCGAAGAGATACACT -TCGACAGCGTGACCAGCGGTGCTAGTGATGACTTCAACAAGGTCACTCGCATGGCCACTG -CCATGGTGACTAAGTTCGGCATGTCTTCCAAGCTTGGTTACATTTACTACGAAGATGACG -GACAGCAGCAACTCCACAAGCCTTTCTCTGAGGACACCGCCCGCTCCATTGATATGGAGG -TGCGCCGTATCATTGACGAGGCCCACAAGCAGTGCCGCGACCTCCTGACCGAGAAGAAGA -AGGAACTCGGTATTGTTGCCGAGGAGCTTCTGTCTAAAGAGGTCCTTGGACGCGACGACC -TCATTCGTCTCCTTGGCCCACGCCCTTACCCCGAGTCTGGCGAGTTCGCCAAGTACTTCG -ATGGCACGGGCGGCAAGACCATCGCTCCACCGGACTTCCAGAGCCCCGAACAAACCACTG -GTAAGGATGGCCGGGATGAGACACCTATCCCTCCTTCATAGATTAGGAGACTGTGTTTCT -TTGCATCCTTGCGCTTTGGTTTGGTTTTGGTTCAATTTCGGGAGGCATGATATCACTGTA -GCCTCTCTTTTTGTATTTATCGATTGCATCTCCCGTTTCTTCTCCCCTCTCATCTTCCCT -TTTCGTCTACTCACATCAACACCCACAACACCTTTCCCTCTTTCCTCCTCTCGTGCTCAA -TAAGAAGATCGTTTCCCTTTTTTTATTTTCTCTCATTTGTCCCCCTGTCCATCTCCGTGT -CCAATTGGGATCTGTACAACTTGTACCGAGTGTATAATAGGGAGTTTTATTGAAACATGT -CTTAGATTGGAAGTTCACGCCTTTTCTGCGTAAATAGGATTGAATCGCCGTACCGCACGG -AAGAACATTGTATCAGACTCATTGGCCAAAAATATTTGAGTCCCCTTTGGTAGCGGTGAC -ACGTGCGAATGTCTTGCGCTTTATATCCTCAATTACTTCGTTTTGAGGTATGTCTCGAAC -TCCTCTAAGAAGTCGACCCATTTCAAGATTTCTTTATTATGAAGCTGCATACAAACCCAC -AAAGAAAACCAGGGCGAAGAGGACAGACGATCCAATTATGAAGATGGCGTCTATGTATAC -AAAAGAGCCGTAGACTGATCATGATCATATCTACGGCACTCAATCTCTTAGCAACATTCT -TCGACTCCCCAAGCCCTATGGCGAATTTCATATCAGAGATGCTGCTCATCCCAGCTCCCT -GGAATGGCCAGGATCTACTCAGTGATGGCACAAGTTCCCACAAACACAGCCCTGTATTAC -ACGCGGAGTGGACAAGTGATGTTCTCCGGGCGTTTGATCGTGCTTGGCATCACCCAGTGA -AGAGAAAGCAACCTAGCTATTTCCAACCCGGCAGTCCTCCACTTTTTAGTGGCGCACCAA -GCAAGGCTATTGTTTCTCAAAGCAGACAGACCCTCGCCCCAGGCAGCTATGCTACTCCAT -ACAAGACGGTAACACCGGAGCAGTCTCATCAATTTTGGAATCTGACGAGACAACCCTATC -AGCTTTCGCGTGAGAAGATGCCCCGATTCTACAGCCCGGGCCATTTGAAAGAAAACCTTC -TCGGCACTGAGAACTACAAAAAATGGGCGGACCAGATGAAGAAGAGACTTGAGCAATGTG -GCGGATTGTGGATCACTGATACTGATACAAAGATGCCGGCATGTAGCAGCTTAAGCACAC -AATGCATCCGGCCTAACCTAAATCTTTGGATGATGATTTTCAACAACGTGTCACAGCCCA -TACGACGAGAACTGTGTGTCCTAGGGGTATTGAATGTACATGAGGCACGGCAGTTTCTGG -AAAAGACAAATGGAAGAGACGTTCCCATGAAGATGCGTTCAGTCCAAGGGCTACGTGATA -TCATGGGTATTAGGTATAAGAAGTGCGCGTCAATGAAAGAATACATAAAGATGGTGTTGT -GCAGCCGAGCGATTCAATGTAACCGCGGAGAGAAAGACCGTGACAATGGAAGGACAGACA -ATGGCCACGGAAAGAATTGCAATTGCTGGAGAGGAAGGGACAGAACTAACGAATGGTTAT -GGTGCCTGTCCATCTTGGTGAATCTTGGTCCGGAGTGGGAGTCTTGGGTGTCTGAGCTGT -TGGGGAAATTCGAAGATAAAGAGAGAATGAATGCTGCCATATGCACTATAGGAGGACTCT -TTCCAATCATCGAAGCGGAACAGGCACGTCGCATACAATCTTCTCGTTACATGCCAAATT -GCAGCGCGTGATCATGTCTTAAGAAGGCATGTCCACAGATATCTCATGTTTTCACTTCTT -TGCAATGAGCGATTAGTATATGGTAAAAATAAAAATTGTGTGCATTCTTCAGAATTGATA -GGTTCGAGACAAGGTTCTAGAAAAGCATATAGAATCACAGCAGAAACACATTCACTCTGG -TTTAGATTTCTTCGTTTGCTAATTTATCGATCAACAAATTAATTGCTCTTCTTGCTAGCC -TTCTTGGTCTGCTTCGCCTTGACAGACAGGCGAGGGATAAACCTGCCAACGCGAGCCTGG -TAGTCGCTGTACTCGGGGTACTTGCCAGCGGTGATAGCCTCAGTTAAGCGTACGCTGCCC -TGGAAGATAGCAAGGTATCCCAAAGCTCCCAGACCGGCCCAGTTGAAGTAAGTCTCGGTA -CGGTAGCAGGACCAAGCATACATGGTCAGCCAAAAGGCCTGCTCAGCAACGAAGTTGGGG -TGTCGCGACCAAGACCAGAGGCCACTGACAACAAAGCCACGCTCAAGATCTTCGGCGGAG -AACTGGTCCTTGTAGGGGTCGGGAATACGCGCGTTGGTCTGGTACTCCTTCTTAGCAGAT -TGGAACTTCCACTGCTGGCCGTCGGCGAAGAATTCGATAATGATGAAGAAAAAGGCAGCA -CGAGAGAAGGCCAAGTCGGGAAGGCCAAAGACCTCGGGTCCCTGGTTGTACGCGGCGACA -ACAAAGATGTAGGTTGGAGCTGTGATGAGAAGGAGCAATACCGACTGGGCGATTGCAATG -AATCCGAGGTTGAATATCATGAACAAAAAGCTATTGTTCACCTTAGAACGCACGATCTGC -CAGCGGTAATCTTCCGAGCCGATCGAATATCCTCCTTTGCGCCAATAGTTGAAAGTCAAG -CGAACCTATCCCAAGATTCGAGTCAGCACAAGTAGGGACTCGGAATTCAAGTCAGCTTAC -ACTCCAAAGAACAGTGATGACAGCGATGGTATCCAAAGTCTGGGTTTGAATGCCGGACAT -ACGAGCCCAGAGGGCGAAATGCACATTATACAGCGCAGGGAGAATGCTCCAAAACCGGTC -TACTTGCGAATAGTTTCGGTTGATCTCGGCTGCAATGAAAAGCAGACCAGCCAGGACTCC -ACCGAAAGCAATCGCAGAGACGAAGGGGTTGGTAGCTAGGTAGACAGCTCGCAAGCTATA -CACATCGCTACCAGCTTCGCGCAAGCTGTCGGGCAGCCATTGCAGCTGGGACAAATAGGG -GGAAACGGTGAGCTCGAATGAGGTATAATCCAAAAGGGACTCCACTCGGGGAAGTGGTAG -AGATGTCATTGTTTTGCTCCGTATTATATATCGCCAAAAGGCCACAAGAGAAAAGACAAA -CAAAAGCTTGGGAAGTGTTTGCTGTGGAGTTGACGGGTAGATTTACTCATTGGGAATAAT -TTGCCTAAAGAGGATAAGCGTATCCGCGTGTAAATGCCTCAGGCATCACATCGCGTCTCA -GGCATCCAATCTCTTCGCGTCGAGCCATTCCAGAACAAGATGTGATCGACATCAGGACAT -TCAAGGCCACCACCACTATGAACCCTATATGATCTAGATTGGGGCTCATCATGGCTCGCA -AGATTCTATGTGTGGCCGAGAAGCCTGCCATTGCTCGCGCAGTGGCCACTCACTTATCGG -GTGGTGCTTTTCAAACAGTGGGTTCCTTTCTAAAAGATTTTGGTGATTCAGCTAACGGAG -AGCATAGCGTGCAATTCGTGGGAATCAATATGTGAAAAACTATGAATTTGACTTCGATTT -TGGTGGTGCTTGGGGAAATTGCTCGGTCACCATGACCAGCGTAGTCGGCCATTTAACCGG -TCTGGACTTTGACCGCCAGTACAAAGGTTGGATGTCCTGTCCTCCGGGGGCACTATTTGA -GGCACCTGTGCAAGAAACTGTTGATAAGGTGGGAAAATATCCAGCGGCTGTATTGACTAC -ATGTTGACGTCTTCACTAGGACAAGCTACCAATTGCGGATAACATCCGGGATCAAGCCAA -GTACTCTAAAGCCCTGTTTATTTGGACCGATTGTGATCGCGAAGGTGAGCATATTGGTTC -TGAGGTCCGTAATCAGGCGAAAGCAGGAAATGCTCGGATCGAGGTCAAGCGAGCCAAGTT -CAGCAACACAGAAAGCGCGTAAGTAATGTTGACTGTTTTGTGTCTACTCAGACTAATCCG -AGGATATAGACATGTCCGTCGTGCTGCTCTGGAGCCAGTCAATCTCGACGAATATCAGGC -TAGTGCTGTAGCTGCCAGAATCGAGCTGGATCTCCGAATCGGCGCTGCATTTACCCGCCT -ACAAACTCTACAACTACAAACTGTGGTGGCAGCTCTAAAGGAAAAAATAATTAGCTATGG -TAAGCTATCAATTATTATAGGCACAATTAAATACATTAACAAGTTAGGTTCCTGCCAATT -TCCTACTTTAGGATTCGTTGTCGACCGATACCTGAGAGTGAAAAATTTCAAACCAGAAAC -ATTTTGGGGCGTCAAGATCATACTCAACCGTGAAGGCAAGAAGGTCAACTTTCTCTGGAA -GCGGGTACATTTATTTGACCGAGCAGTTGTGACGATGATGTTAGAAAGGTGTTTGATTGC -AAAACAAGCCAAAGTCACGAAAGTGAACCAGAAGCCTACAAGCAAATGGCGGCCGTTGCC -CTTGACTACAGTTGACCTACAAATGATGGGCAGTCGATTTCTTCGTTTGGACAGTCAGAC -TATTATGAAGGTAATAGAATCACTCTGTTGGATGATGGATGATATCTGACAAGCTCAAAG -GTGGCAGAAGGACTTTATACCAAGGGATTCATCAGCTATCCTAGAACCGAGACAGATCAG -TTTGACAAAGCAATCGACCTGAAGAAATTGGTCGAGAAGCAATACCCCGACAGTTCATGG -GGCCAATATGCTCGAGAGTAAGCTTACCGGACTTGCTTTTCATATACAAAACTAATTTCT -ATCTAGACTCATTGATGGGAAGTTCAGGACCCCTCGCTCTGGCCGCCACAACGACAAGGC -CCATCCACCTATTCATCCTGTCAGTTGGGTCTCGCCGAGTCAATTGAATGCCAATGAAAA -GAAGGTCTATGAATTTGTTGCCCGTCGATTCCTGGCCTGCTGTTCCGATGATGCGAAGGG -CCAGAGCACAGAGGTTGAGATTCAATATGGTGACGAATCCTTCCACACCAATGGTCTTAT -TGTGTTGGAGAAAAACTATTTGGATGTATACGTCTACGACAAGTGGGAAAGCAGTCAGCA -ACTTCCTAAATTTGAAATGGGGGAGCTCTTTGAGCCCACCGAGGCCAATATTTTCGAAGG -CAAAACAACTGCCCCGAACTATCTGACTGAGCCCGAACTCATTGGCCTGATGGATGCCAA -CGGAATTGGAACAGATGCCACGATGGCGGAACATATTGCTAAAATCAAAGAACGGGAATA -CGTCGCCATAAACCAACGGGGCAGTGGCCGTAGCTCAGTGCAGGAATTTATTCCAACCCG -CTTAGGGGTTGCCCTTGTGGAAGGGTATGATAATGTTGTTGAGGGTCTCCCTAACAGTGT -GTCTCTCAGTAAGCCGTTCCTTCGCAAGGAAATGGAATTGAGCATGCTTCAGATCTGCTC -AGGCACCAAGACTCGACAGGAGGTCGTCCAGCAGAGTCTGGACATGTATCGAGAAGTCTT -TATACACACACAAAGACGCATCAACATGCTGAAATCTGCCTGTCGCAAGTATCTTCTTGA -AGAAAACGGCTCATGATTGTTAGTCAATTACAAAAGGCCTTTATCTAAAATTAATGAAAT -AATCAATTGATTGTCTGCAACCCTCTTGACCTTGATTAAAACTGCAACCATTCAAAGAAT -AATAAGAGAGGCAAGTGGTCGAATCCCCTATGATACAGATACAAAATGATTATTTACCGA -TGTCCTAGTCCAGCCCCTGGGCATTGCGCCGGACTGAGCCCTCTTCCCCATTCCCAAGAC -CCACAGCTCGGTTTGAGGTGAAAACTCCGATGACATTCTCCGCTGTTTTGCATCTGGAAG -CTGGGTTGATCTGGCTGGTGGTGAGGCTTTCGTTTAGATCAATTATGGGCTGTTCCGCGC -GTAATTCATCAGGTACGTCGTTGTACATGGTTTTTGACATGAGTATGTTAAGCCAGGCCA -ACTGCCTTGGAATCTGTGGAAAAGTTTGAGTATTTTCCAGGAGACTCTGGTGTTGTTGAC -TGACCTTCTCCAGTTTAGTGTCTAGGTCTATCATGCCAGACGTGTATTTGCCTTCCATCG -TACCAACAAGATGCAGAAAACAACGTTGAAGGTCTTCCGAGGCGTTTCGGATTTCCTCCG -CCAGGTGAACAAAAACAGGTCTTACTTGTTCATTTTCTTTTCGAAAATATTGGATTTGTA -TCATCAATGCACCAAATTGTTCTAGCACGCCCTTCATTAGGCCGTAACAATTGATTTTCA -GGTCTTCGGTGTCGCTCCGGATTCTGGCTAACTTCATGAAGGCCATAAGTTGTAATAGGA -GAGTAATCAGAAAAAGAGTAGCTATAACATAGAAAGACAGCCCGTTAGTCTGGACTGGGT -TAGGCTGCTGTAGCTCCTCGTAGTGAAGCTCATTTGCCGAGGTCTCAGTCAAATGAGCCG -AATTTGGAATGGCTATGGCATTCGGGTCGAATGTGGACGCTGTGACTTCTGGAATGTTGA -ATTGCAGTGATGCTGCTTCAGCTGATTGTTCCAGGGCCAGCCTGGCATTTTCGTACTGTT -GGGCATCGAGTGAAAGATTCAGAAAAAGGTCGTATTCCGAGATTGCGTCGTCATGGGGGA -AGATAAAAGGGTGAGCTTTACGCAGCTCATGGATGGAATAGGCGATTTCTAAGGAGCTGA -GACTACGCCAAAAAAAAGAGTTAGAAGGATTGATCCCTGTGTACGGAATAGAATGCCAGA -TACCAATGCTGCTGAGGCCGGCAAAAATGACCCCCGACACCAGTTTTTCGAATATCATCG -TGGCATTTATATTATATCAGTGTGCTCAGTGTAACCGGTGAGAAAATAGTGAGTGAACAG -TGAGAGATCAATGAGATTCGAGGGAGTCAAGGAGATCTCCGTCTTTCTCTGTATTTTGAG -GGCTTAAATATCAGGCGGAATTAATACAATGAATAGGCAACCCTTTGTTAAACAGTTTGC -ATGTAAACTTGAATAAGCATTCATGGATTTTGGCTGATTGTTTGAGGGCTATATAGTATG -TGGATTCATTTTACCTAGTCCTTTGAGCTTTTATTAGCCCTTTAGATATCCCTTTTATCC -CTTATCCTTACCATACCTATAAGATTCATCTAGAAGTTAAGTATATTAGGAGATCTGATA -GTCAATAACCCTAAGACCTCGTGGTATCTAAATCAAATGGTCTAAGCACACATGACAATG -AGTACAAAGAAAATCACTCACACTGGCACAAAGAGACCTAAAATATATGATAAGTTCAAC -CCCAGTTTCGTTGAATATTATAGTGGCTATACTAAAAATACTATCCCTTTGATCGTAGTG -AAAGGAGTCAGATCTTATAAAGATACGGAAAATGTTCAGAGCAAAAAAAAAGCATTATTA -TCACACTCGTCACTATTTATACATCACTGTGAATCTTCCAGCCTACATGTGAGTTACTTT -GATGCTTTATTATCAGCGTAAGCTAATATTGACACTGTAGATTATTGCTATTGATGAACC -TGTAATATGTTAGGTCAAGCTGACAAAGGCATCGAGGAATCTTTTAATGACTCTCATTAT -GCAGTCACCCATATTCATCATGCGCAAGTTGAAAATTGAGATGAAATCTGGTAGAAAATG -ACTTATGAGCTCAGGGAATATGAGAATCTTCGATAGAGATGTGGAAAGAGGCTATCATCG -ACCACAAAAGCTTAGTATGTCGCTGTATTCTCATCATGTTCCTCATTCTGAACGCCCCCA -ATATCCTCGCCATTGTCCTTATTGTCGTCCTTTTTGGTCTCCAGGAGAGAGTTGTACTAC -TGACTATCGGCATCATTCTGAGCCTCTGACAATCGGCCGGAGAAATTCATCGCAACTTAA -ACGCTATGCCTTATTATGTGTTGGCGAAGCTTGGCGGTAGCCGAGATAGGGACCTGACTT -CAATAAACAACCAGCTCGGAGAGCCTGAGAACAGCGCTGACATGTTTGTGTGAGCAACTT -GGGACACGACAGAGGACCTCGCCATGATACACAGTAACTTGACCAGTGGAGTTCTGCTCG -CTTAGTATGAGCTTCTGTTGACTATTCTTCATATCGACTTACGGTCGTGCAAGGCGGGAG -ATCGAGCTCCGGGTCTGGGATCTGATAAGGTACTCCCTGTACGCATCGCAGTTTCGTAAA -AAAATGGGGCAGTTATAATTATCAAGTAAGGGATCAGTGGGTGTCATCTTAGGTTCTGTT -TTGGCCTCAGGGGTTGGGGTTGTTTTTTGATAATAGAGAGGTGATGTGATGGAAGAGAGA -GAGAAGAACAGTTGTTCAAATGCTCTGGGTGGCTCCTTCGTTGCCAAATGTGATCTTTTT -CCTCGATGTCATCCTTTTAATTTTCATCCGTACACTCTTTTTCTTTTGTTCTTCTTTAAG -ATTTCGACCATCTTGCCTCAACTCACACTTCGCCCCTCAAGCCTCACAATATGGGCTTCT -TGTTCGCAAGTCAGAGCGATGTGACTCTTATTATCATTGAGGGGCCGACCGGAAATGACG -AGAACAAGACTAAAACAGCGTTATTTCATGTTGATCGGGCAAAATTGATTGAGGGCAGTG -CATACTCTCGAACAATGTTCTCATCTCGGTGGGAAAGACCCGGAAAATACAAATGGAAGT -GATGTTGGGTTCAATTCATGAATTCAAACCCGAGCCAGAGTCTGTCTCTGTTGCAGATAT -CTGGTACACAATGAGGGCCTGTAACAAGTATCTGTTGGATCCTAAGAAGCTGATGGGCTA -GTTCCCACGTTGGATCCAATGGATTGACCAAGAAGATACTACAACATGGGAAGATTTGGA -TTTCAACCGCCAGCTGCTATTTCCTTGCTATTTCCTTGATCATGCAAATGCCTTCCAACA -CATTTCGAAGAGACTTGTCTATGGCACCCCGGGCTATATTACAGAAATGGCACCGAGAGA -CTCACCATTATTCGAGCCGATGCATATGCCCGCTATTGCTATGCGTAAGTCGATCTCACA -CGTCTTTGAGACCGAAAGATTCTAACTTACCAAAAGAACAACTCGGCGCGGCAAGAGGGC -GACTCCGCACGACTCTCCAAAGATCGCTTTTTGAAGATGTAAATGTGACTATTGATTGTG -CATATTGTACTTGTGCAGCGCATAACACATTTTTCTACTTGAGAGAGCTACATAGAATTG -GGGCCAGGCCGCTGGATGGTGACATCCACAAGAATGGCGTCCACGATATCCTCGGTCGAC -TGGAGAGCTTCAATGAGGAAAAGATGATAAACGGGCAGCCATCAGTGCAGGGATGCACCG -CATGCTCTCATTCCTGGAAACTCGTGGTAGAGCACGCACGAAAACAAGTTGAAAGTTACT -TCGATTGTTTATGCTTAGAATGCATGCACAACCATTCTGATAAGAATTCTGAGTACTGGT -CCCTCAACGCCATAAGATACGCGTATGACAAAGATTGCCGCATCAGCAATGGCCAGCCGA -CCTGGTAATTTAGTTTCATGGGTCATCGTGACAGAAACCCATCCCGCATGCAATCCTAAG -TGAACAAGAAGGATAAGTCGCGACTCGATTATTGGATTGATCTCCTACCCTGAATCTTCG -TCAAGAAATCTTCTGTCCTTCCTGTCAATCTGATTTCCATGGTATCCTGGGTGTCACAAC -CATGTCTATGACAAGCTCGCTGATTTTTCAGTCGGTGGCAGATACTGCAATGGTATCTGA -CCTTGGTGAACATATGTATGAATCATATCACTTTCTAAGCAAGGTAAAATTCGAGTAATA -GTTTGTTTTTATTTGCAAAACCGTTGCCACCAATGGCCATACATGGTGATCTATATCTAG -AGCACTGTGCTGCGAGATGCAACCTCGGAAGAGTGATCAATCAACCATCACCAAGTCCCT -AATGCAAGGGCCCAAGAACTCCTCCAATAACATCACAGGTATCGAAATATCAGAATACAA -ATTACATGTTTATGCGGATACCCCGCAAATCAAATCATGATACTTATATGAGATTTATTG -TATATTTCAAGATACGATAGAACAGCGCGCATCCAAAAGAATATGAATGAGTTCCTTTCA -TGTAAAATACTGACTTACAGTCGCGTAATCAAAACATGTCCAATGTCGGACGGGGGCTGA -GTTGCCAATATTTATGTTGAAGCAAGGTGACATTGCCCGGGTGCGAATGGGCATAGCCGC -CACTAAAGAAAATCAGACATCCGGCCCCTGGCCGCTATGCCCCTTTGCACCCGGGTGTTG -TTGCAAGACAGATCTTGGGTGATGATTGGGAAATAAAAAGAGCTCAAAAGGGCAAACGCT -CCGTCTCCCTCAAACAAGTCAGGCGATCAGTTTTGACAGAATTATACAACAGGCAGAAGC -GAGAAAAAAATTACAGGGGTGTAAGCACCCGAAAACATGACTTTCATAAACGGCCTCCGG -GGCAGTGCGATACATTATGGTGAAGGAGAATTCATTCAAGCTCCGTGCACGAATCAAGTA -GACTCCTATCGTCTGAGGGGTTTGCTGTCTACGTCTCTACGCCTCGCCGCGATAGTGCCG -AAATTTCAAATGATGAGGCTGATTTGCCGCCTAGAGAGGCTCGGCCCTAGCTTCGTTGAT -GAGAGGTCAACAGGTCACCACCAGCATGTGCCGTGCCTTGAACCCGGGCAAATCTATCCT -ATGAGATCAGCTTTGCCTATATATGTTCATCTGTGCCTGTCTGTGATTACCCATCCGTCC -ATCTATCCATCATGATTCCGGCTTGTCGGAAAACTTCACAAAAGATGAGCTCCAATAATC -CACAGGGTCCTCTTCGATCCAAAAGCAGTTCCACCACCGCTTTTTCCTTGCCAGAAAGCA -AACCAGTACCATACATACCCAAATGTAAGTTTTCGGCGTCAGATGTCGCGATAAGGAGCC -TAACCAAGTATTAGGCACGACTGATGAATACATCAAAGCTATCGCATACCATTCTCAGCA -ACCTAGCGCTAAGACGTTATATCCCGGAGAGATTTATGATACGCTCAAACAATTTCTTGA -GCAGAAATCAAATACAATACCAAACACTGCAAGCCAGGGCCCAAAAGATTTCGCATGGCT -CATCAGTCCGAGCCAGGTGGGCCAGCAAGCTTCGTCGGATCAGCATTGGCTGTTGGACAG -CAATAGTAATAGGAACCGGGAACGGTTTGAAGGATTTGCTGATCCGCAGTCATGTATAGA -GGCGTTGAGGGCTAGAATCGAATTATCACATCCACAAGTGCTATTTCTACGGGGCCATCC -TTCGCCAAGTTGGCTTTCAAGCATTGGCGCCTTTTGCTGCGCCGACCCCGAACTATTTCG -CTGGTTTCTTAGGTACCGTGCCGGACCGGGGAGTGATTATTACTTTGACTCCGCTCCTTC -TATAATGAGTGACATATTTCGCTTCAAGTTCTTTACCATCGGATCCAAGAACTATCGACG -TCGATCATCTCAAAGCGAGGTAGACGCCTTACGCGGAAGGGCATTTAACGATCTGCACAG -GTATCAGGCTGAGTTAAGAGGAAACTGGGCGCTAAAGCCAGGGGACTCGATTGTGCGGAA -TTTCCATGTACTAGATGAACGTCATTGCGTCATTGAACAGGAGATTGTGATTTCCATATT -CGATGTTGGCAAAACTTGGATGGGTATGATCCCTCGCACGACATTCATCGATATAATTGC -TGATTATTATAGCCATTGCCTGTTCGGATGCAGGAAATGACCTCTCTCAGGGACCAAGAT -TCCCCTGGCTCGAGACTGGCGCGGACTCATTACCAATCACCCTTCTACCAATCGTTCAAT -ATAGACCGAGGTGTGCGTTAAAATACCGCGGTATAAACGAGGGTACTAGGTCGCCAAAAG -GAAAGAATACGCAAAGTCTCGCCATCTTCCCAGAAGGCTACGGGCGTGGCTTGGATTGGT -CGCTTGCAAAATCAGATCGGCTTTATATCTTGGCGGATGTGTTCAGACTATCTGCCTTCT -CTCAAAAACAACTTCTTAATGTCATGAAAGAGAAAATAAGAACCGAAACAAACCGCCTGT -CGCTCAGTAATGAGAATCCAACATTGGCCAACCTGCTATACTTTCGGGATATTCTACAGG -ACCAGCTAAGCAACACTTCCTATATGCTGCAGTTGACAAACAAGAATAATATACTTCAGA -ATGGTCGTCGCTCCACCACCGCTATATCTATCGATCAAAGAAGCGCGGCCAATGACGCCA -TGGCTGAAGTTTGCAGTATATTCCAAGACTTGCACTTGGAAGCACAGTCACTTCATGAGA -AGTGCACTCAAGGAATGACAGTCATTAGTAATAACAGCATGCTCGCAGAGTCACAACGCG -CCATCCAGCAGGCGAAGTTGGTCACCAAACTCACAATTGTCGCCTTTGTGTACCTTCCGT -TCACTTTCACCGCTGGTTTCTTTGGCATGAACTTCAAGGAACTGGGAAATGATATGCCCT -TATGGATATTTTTTGCTGCCTCATTTCCTCTGATGTTTATCACTATGGCGGTATTTGCAT -TGGATATGCAAATGATTAAGAGAGTCTTGCGAGCATTGCATATTGCATATTGGGACAAAA -CTTATTTGGTTTAACAACACATTCAGATTAGCAACTTATGGACATATATCATTCTACGGA -GTAATCTCTCTCTCTCTTTTTCTAAATCTGATCTCGTGTCATGTGGTCATTGGATGAGGC -TGACTTACCAAATCTGCTCAGGTGCAGGGAAGCTCGAGCGGGGGGTTTCTGGTCACCTGA -GCAATATTGTGAGGATTATCAAACTTATCGAAATTCTGGTTCTATAGTTTTTGCCCTTCT -GAGATTCATGTGTTTCTATGGCCATGACGGAATGATCAAGTGCTAGAGTCACCAGGAGTG -GAGGGTTCCAACAAATTCGTGTCGGCCCAAGAGTCGGAACGCTGTATGTCGCAATCTTTC -ACACTTCATCACTTCGCTAGTGGGTAATGAAAGGTGCTGATCCGTGGCAGTCTACTCGCT -GTAGGTGGAGTTGGAACCACACCTCCAGAGCATTGGACAAACTCGAAAGGCGACCTATGG -TTGGGTACACTGGATGCACCTGACATCGGCATTCTGTCCTTCCCAAACAGACTTGCAACA -AACGGTATACTATCTGGGAGGAATATCGAAGAAGAAGGTGCTTCTCTTCTCGCTGAGTTG -TACCGTCTCGCTGAAGATGAGACTGTACGTCTTTCCTCCAGGTGACAGAGCATCGCTGAT -CACGTTAGGTGATACAATGCCCGATTGTCCTGGTTGCTCATAGCCTGGGCGGATTGATTG -TGAAACGAGTATGTCTCTCGTCATAAGCAGTTTGCAATATTCTGGCTGAATCATGCAATT -ATCTAGGCCATCTGTCAGGCAAAGCAAAGTCCAAACACTTATCGGCATTTGATACGCAAG -ATTGCTGGATTTGTTTTTATGGGATGTCCGCATTCCTCGTCCCAAGAACTAAAAGACTGG -GCCGGGGTCGAAAGCATTCTACAGAAATTTACTAAAGGAAGGTATAGTCCCCTGGATTTA -CAGTGTGTTGAAAGTTTGGCTGGAGATTGTGAAAATTTCCGGGTCATCTTAGCCGAAGAC -GAGAAGCAACTGTTGACCGTGAGCGAAAAGAAAGGAATGAGCAAACAAATGATTGGAAAG -AAAAATTTAGTAAGCCTGCTTTTTTACGACGGGTCATGCCTCTCTTTCGGGGCAAGACTG -ATGACAGACAGTTTGTTGACAGATACTCGGCAACCATTGGCCATGACCAAGAGACTGTGA -TCGAGCACGACTCCAATCACCGAGACCTCTGCATTGTGTCGGCTGGAAGCAGGACGCTTG -ACGGGATCTTTGATTTTCTCCATCTAGCTCTGGATGACACAAGAAAAGCCCTCGCTAAAG -ACTCTCCACGGTGTGAGTAAAAGTTCACGGATGGCGCCTGATACCAGGGCTTGGCTCACA -GAATATTAGACACCGGACCCTCCGTCTTTGCTGAATCCACTGTCAGAGATCAAAACAGCG -ATGATGAGAATACTGATGGTGTTACAATTGAATCCATGTCTGGTTGGTAAAAATGCCATA -TGTGACACTATATGTTTTACTGATTAGCCCTTGAAGGCCAGAATGTGCTCCGTGGTCCCG -ATGCGTCCATTCTAGCAACCTCGGAACCAGCTTCCTCAACTAATCTCGGATCATGCTCCA -AAGGCGCTACCGCAGGCAGTGATAAGATCAACCCCGATCTCATCTCCGCCTTGAAGGATT -TCACCATCGAGTACCGCGACCCTATTTTGCCGTGCAAGTCCATTCTACTGCCCAGAGATC -CCGTTTTTTCTGGGCGCGAGGACACACTATCAAGCATTCGGCAGACCCTTGTAGCCTCTA -TACAAGAGCCTGAAAATGGATCAAATAGTTCTCTCGGCGAGACAATCCCGTCGTTGAACG -TTTACTCGCTCTGTGGTCCAGGTGGAATGGGGAAAACTTCCATTGCCAACGAGTTTGTTC -ACAGGTATGAAAAGGACTTTGACGCTGTTTTTTGGGTTGCAGCGGACGAGGAAAGCAAGC -TCTTTAACGGCTTTCGAGACATCGCTGTGAAGCTCGGCATCATTTCAGACACCGATGGCA -AGGATCTGCCAGCAATCCGAGAGATGCTCTTAGCCTGGCTTGCCAACCCTCTCAGGTCAT -ATGAACATATGGACCACGTGAAACCAGAAAGAGCATCTTGGCTCATCGTTTTCGATAACG -TGGATCGTGCTGATACACTTGAAGACTTTTGGCCAAAAGACGCTGCTGGTTCGGTTCTGG -TGACCTGTCGAGATCCATTGATCAAAAGCTCCATCTATTTGAGGAATACTGGAAGCATTG -TTCCTGAGCTATTGGAGGAAGAGGGAATAACTCTATTGCTTCGTCTCACTAATCGTGAGA -GCGATGAAGATGACGTAAAGCAGGCACCTAGGGTTGTTCGAGCCTTGGGCAGATATCCCC -TTGCCATTGCGCAAATGTCTGGAGTCATACTAGCTCGAGACCTCGGTTTTAACGAATTTC -TGGAATTATACTCCGAGGAGACAGAGCGTCGAGAGATTCTTGGAATCTCAGAAGGACAAT -CTGCTTCTCTTCGCAGATACAACCAGACGCTTGGTACTGTCTGGAACCTGGATGACTTGA -AAGAGGGCAGAGCCTTATTAGAAGTCATATCATTTCTCGATCCAGACAGTATCCCAGAAT -CACTTCTAGAAAAGAACCCGGCATGTATGGATTGGGATCGTTATCCCAAAACATCATTGG -AATATTCAAAGGCTCGGGCCGAGCTACTTTCTAGGTCACTGATTTACAGAAATCGGGGCA -AGAAGACCTTACGTGTCCACCGGTTAATTCAAGATACTGTGAGAACTCAAATGAATGATG -CAACATTCAATGAGGTATACTCAAGGGTGCTAGACATGCTTGGTGCTCGGTGGCCGAGGG -TTTTCAAAGGCTTTGGCAATGTACAAACGGACTGGCAACAGAATTCTGAGCTCTGGGCCC -ACGTTCTCTCGCTGTTCAAATATCGAGACCGTTTTAGCCCTGGAGCTGCTCACTTTGCTA -CTGCTATCAAGCGGATGCACTTTGCCCTCGATGTCTTGATGTACGTATATGAGAGCTCAC -GTGTGATGAACAAACTGACACATAGACCAGATTTAGCGTTTCGGATTCCCGGTTTAGTGC -TTCACCAACAATCCTACTCTTTTTCAGCTTTTTGAGAGATGCAGTGACCGGAGAGCCAAG -CCATGAGCTACAAATGATTGATGCTAATTTCCACTACTGCCGAGGAGAACTCGGGTTGCA -TATCAACCACCGTGAACATCCTTTGCCCGATTTCAGCCAATGCGTCAAACAGCTGGATAT -ATTACTAGATGATGATGCCAAGAAAGCTGATGCATTATTTGGGGTCGCAGTGAACGAGCT -CGGTTGTGCCTATTTGATGAATTGGAAGAACGATGAGGCTCTGATTGAATTTGAGAGAGC -AGTCACTATCCTCCAAAACCTACATAACCCTTCAGCACAGGAGATTACTATGGGTCAAAT -CAATGTAGGATTTGTCTATGGTTACCTTGGACGGTATGATGAAGCTTTGCAGATCTTCGA -GGTTGCATTGAAGGAGCGTCATATAAAGCTTGGTCGGGATGATTACTCCTCATTTGTGTA -AGTGGAGACCCTCTTATTATTTGCTCCAATCCTGATGATACAGAAATGGAAAATTGTACA -TGGGTTGGGGAAATGTACTGGCTGCAAAGGGCCGGCTTGACGACAGCTTCAAATTACACG -TCAAGTGCCTGGAGCATTACAAGCGCTCGGTTGGTAATTCTCACCATAGAACAGGAGACG -GATGCGTGAAGGCTTCGGATCATTTTGCTCGCACGGGAGACGGTCCTACGGCCTTGTAAG -TTCCACAATCTGAATTTCGCCACGGCCGCTAATTTCTAAAGAGCGCTGCTTGACCAAGCC -CTCAAAATCTTCAACCTTGACACATACCACAGACCAGAAGCTGCAAGAGCTCACTACAAG -AAGGGAAGTGTCTTGAAGCAGATGGACCAAGAAGAGGAAGCCAACAAGGAAATGGACACA -GCTCTCGAGATCTTCAACAGCTTTGTTTCCCCTGGAGACCGCGCTGGAACCATCGATGAA -CTGGACGACGAGGACTTTGATCATTGGATCATGTTCTGGTCTCGGTGACCAGGAAATTCT -CGCCCAGTGCGATGAGAAGGCGATAAATCAATTCATGGAAGATGCTTAGCAAGTCAGTTC -TAAAACTAAAACTAAATTGTTTTCGCTCCACTTTAGTCGTAGACTGTATGGCTTGTCCAG -TAGACTGAAGACCACAAATCAAGACCCAAAAAGATACCAAAAACCAAACGAAAAAGCATC -CCTTAAAAATGCATTCATGGGGGATGTCCATACTACATACGTACCTTACGTACCTAGGGA -CCTTGCAAACAATGTAAACGCGGAAGTGGCGTTGAGGCGTTCGGTCCCTGTTGATCACAG -CCCAAATAGGAGCGATCGAGAAAGATATAGGATGAGTAACTGTGAGTGAGTGCCATGATC -AAACTTAAAATATGTATGTACAACATAGTATAAGGTTAGATATGGTGTTTAATATAGAAG -TAGCCAATGAAGGAGGCTTGTGGCAACTATGAAGTGCGGGTAGATGCTGCGGGCCAATGA -CGCTCGCTGAACCATCATGAGTCATGGTGATTAACTTGCACTAACCTATTTAGACTTATT -TGAACCCCCAAAATGGGTTAGTGTGGGCCGGGCCGGGGCGAAGACATCACAAACATAGGT -ACATACTATACCCAGCCCTGAGTCTACTAGTATACAATGTTCTCGTCTCCATTTGTTCGT -ATCTCCCAACTCAACTATCTCACACCTTCGACTGCCACAACAAATGGCTGAGAAACATCC -CATCGAGGATCTACGCGTTCCCGAGGCAAATAGCCTGTCGGCTGGCGACAGCGACTCCCA -AGCAGACGATGCAAGCGACTACGAGCTGACTGGCATTCCACTAGTTCTGGTAATGACTGG -ACTTGGTCTTTCTATATTCCTCATGTCACTGGACTCGTCCATCATTGCCACAGCCATTCC -TCGGATCACGTCACAGTTCAACAGCACCGGTGATATTGGCTGGTACGGCAGCGCATACTC -CTTTGCGATGTGCGCTTTGCAGCCTGTCGCAGGCAAGCTGTTTGCAAGTTTTGTGATGAA -GGTAAGACTATCCAAAACTTTCAGGCGCAGATGAGATCTTCATTAACAATATCAGGGCAT -GTTCCTAGGATGTCTTGCCGTCTTCGAGCTTGGCTCATTGCTCTGTGCCCTTGCTGTCAA -CAGCCCTATGCTTATTGTCGGGCGTGCTATAGCTGGGATTGGGGCAGCAGGTTGTTTTAC -AGGAGCCTTCTGTATTGTGGCAGTTTCTATCCCGCTGGTCAAGCGACCCTTCTATATTGG -AATCCTTCAGTCAACTTTCGGCATTGCCACCATAATCGGACCCGTATTAGGTGGAGCATT -CACAGAGCATGCGACTTGGCGGTGGTGTTTTTGGATTAATATCCCCATCGGTGCTATCAC -CATCATCTCATTGGTCTTCTTCTTCAAACCGCCTGCGCGCGACTCGACGAAAGCCCCGTC -GGTTCTTCGCAGACTGCAGAACCTGGACTTGCTGGGCGCTTCTCTTTTTGCTCCGGCCAT -CATCATGATATTTTTGGCTCTTCAATGGGGAGGGACTGAGCATGCTTGGAAGAGTGCTAC -CATAATTGGTCTCTTCATTGGAGGTGCGGGCCTTGGTCTTGTCTTTGCTCTCTGGCAGAT -ACGAAGGGGTGATGATGCCATGATTCCACCACGTCTTATCACTGAGCGTACGATGTTCTT -CTCTTGTTTCAGCGAGTTCTTCGCCATGGGTACAGTCTATATTTCGATCTACTACCTTCC -GGAATGGTTTCAGGTCATCAAAAATGCCTCGCCAACGGAATCTGGAATAATGTACCTTCC -TTTAGCGCTGTCAGATGTCCTCTCTGCAACCCTAACAGGCGCCTCCCTCAAGTATCTTGG -ATACCCGAACCCCTATATGCTCCTCGGAACGGGCTTGATGAGTATAGCAACAGGACTCTT -CTCTACCTTCTCCCTAAGCACCCCCCACCAGCAGTGGATTCCGTTCCAGGTTCTCCAGGG -TCTTGGTGTGGGTATGACCCTCTCAATGCCCTATGTTGCAACACAAACAGTCCTCAGGCC -TGAAGATATCCCTGTCGGTACCTCATTACTGCAGTGCTTCCAATTCTTCGGGGCTTCTGT -GAACCTTGCAATCGCGGAGGCTATTTTTGAGAACAAACTCGTCTCACATTTGAAGAATTG -GGGATTTGAGGACCATGAAATAGATAATATTCTCAGCGCAGGCTCTGCCGAAGCAAGGAG -CGTGGTTTCTGCGGCGCAGCTTCCAGGCGTGTTAGATGCATACAATCACGCGATCACCAG -GACTTTCTACGTTGCAACCAGTGTTGCTGCAGTTGCATTCCTGCTTTCACTCGGCATCCA -CTGGAGAAGTGTCAAGCCAAAACCCCAGGCTGCTGTCTCTGAAGAGGCTAGGTCGTCAAG -TCAGTAGGCTTCAATGGAACCGGTTAGTCCAAGCAAGACATAGAAGACGGAATCTTGCAA -CGTGAGCCCTAAGGGTATCACAGATAATTACATCTTGCAGCTCAAATTATATCTAGAGAT -TTTTCAAATAATTATGCCTAGCTAGCGATATTTTAACACAAAACATGAAACTATTCTTCA -TCACGCATTCTGGATCTCTTTCTCTGGCCATTTCCATGTTCAAATTTATTCCTTACTCCA -GTTATCTCAGTATAAAATGGTGTAATTCCAACGATCAAGTTGTGGAACGGCCCTACATAG -GTACTTATGTCTCCAGTACTTACCAGTTATCTCACAGCTTGGTCTTAATATGGGGGATAC -GCCGAAATGTCCTCGTCGGCCATATCATTCACCCTTTGTACTTCCTCAACTTCAAAGTAG -CAACTTTTTTTAAGAAAAAAAAGAACATTGATAATCCAAATATACTAGATTCAATTCTGG -AAGCTAGGCAATCTATGGCCAAAATGCCAACAAGATTTGAAGAATCAAGCCATCCCCCAT -TGTTCAAGCCAAAGGCTTACCCACCTATCACTAACTGCTCCGACTTGGCTCAAACGAGGG -AATTGCTCCTCAATAATTGTTGTATCCAGTGTCACAGATCTCTCGGATTTCATCAATTCC -TGGAAGAATCCAAGAAGACGAAGACCAGGATTGGGGCGAGAACCCGATTCGTTGATGCCC -TGTGACAGACGATGCACCCACTCCCGCAGGGAAACAACCTTCAGGGACTCCTTTGACCCA -AGGCGTTGTGCCAACAAGGAGACAATATCTGAAAATTGAATTTTATGTGGATTGACCAAA -TTGTAGTATCTTGGGTGGGCAGAACTGCCTCCTTCATATACTCTCGGCATACAGACTAGG -TCAGAGAGTATTTCAGCGACGGTATCGACTGGAAGCCAGTCAATCTGGCTTGCACTTCCG -AGCGTTTCTGGAAGATAGCCAAGCTGGATGGAGGCCTCTAGGAGGGTTGGGAACCAGTCA -CGGCGCGGCCACGACATATCTTGGCCTTCTGTCGACTCCTTGACAGGCCCGGCGACCTGA -CCTAAGCGAAATATACAGGACTCGACACCAGCTAAGTTGGCGGCTTTCTCCAAGAGTAGA -CTGGCGATGAGCTTGCTTTCCCCATAACCTGTCTCGGTAGTCTCGGGGGCGCTCAGCGTT -TCTTCTGGAACAGGTCTGTCTTCTGTCCAATTCTTCACTGTAGCGATAGAAGACAAGAAT -ATGACTTGAACATTGTGTTTGGACGCGTAGGCAAATTGAATTAGTCCAACAACACCCTTG -ATATTTGGGTCGAAGTACTTTAATGGCCGGTTAAAATCAACTGCCCACTGGCAATGCAGA -ATATGCGTTATATCGTCCTGCAGACCATTATAAACCTCTTCGGAGAGTCCGAGGAATGGC -TCTGACAGATCTGCAATCCTAAAATTAAGTTGGACGGAGTGATTCTTACTTTCGTTATCC -AGGAAGCGTTGAAGACAAATTTTCGACTTGTTTGTTCCACTCCGATTGAGACAGGTTATT -TGTCTGATTTCCTGGCGGCGAAGAAGAGATCTTAATATATGTGATCCAACAAACCCTGTG -CTACCAGTCACAAGAACATGATGGCCACGATTGGTGGAAGCAATCGGCTCGCAATTTCTA -GGAGTATCGGGTATTGCTTTGGAGTATCTAGTGACCCCTTTAGCATACACAAAAAGTATC -ACATTAAGCATCGAATAGGGGCTGACCTTTGCAAAAACTCTTGGAATTCGCTTGTTTCTT -GCGAATTATGATCAAGTGGGCTTTGAGACTCTATCAAAAATTTCGCAAGCTTTTGTGGTG -TTTGGTTACGATAAACAGCATCAACTTTGATATTGCGCTCAGGTTGTCCACTCTGCCGCG -CAAGTGCCTTTCCAACAACAGCAGCTAGTATTTGCGCTTGTCCAGAATCAAGCCCTGCAG -TAAAGACGTCTGTCTGTTCGTCCAACTTGCTGTCTTTTGCCAGAATTCTTTCGACCAGCC -GAGTCAGATCGGATGCAAGCGACTCTTGCGAGGTCAGGTCTAAAGTAAGGTCATTCAAGC -CCTTCACAATTGAGTGATCCATAGAACGGTACAGCTCATCGATCTCAGTTTTGTAGAGCT -CAAGTGTTTTGTTACGATGGAGTGTCCCTTTTGGTGTTCGAATAAATTCTCTGTCTGGGG -CGGCAATGAGAATCAGTTTCTCGTCGATCTGCGCATCTCGAGAGCTTTGATCATTCTCAA -TACTTAAAGCCTCCTTAAAGGGCCCTTTAAGCTCAAATGGGAGACCACTGTTGTAGTTCA -GACTCTGGTCAGGGTCGTCGAGCTCGATTAGCATTCCTGGGCGCGGTTGCTTGTTGCCGA -TCACCAAAGCGGCTTTGATTCCAGGAATGGCACTGACTCGGCTTTCGACTGGCATTGGGT -TCATCTTCTCACCAGTGGAGAAGACGACCAAGTCATCAATTCTGTATCGATACTCCCAAT -AATCTGGGATCGATGGATGCTTTCGGAACACGTCTTTCGTAGACCATTCGTCGAGATCAG -GAAAGTTCTCAAAGACGCCTTGATATGGTTTGCAAGAATTTTGCCGATGGAGAACCAGCT -CAAAGTCAGTCGATTGCTCCCCTGGGCTTTGCAAGACTGGACGCCATTGTATGCCATTCC -AGGTGTGGAAGTGGTAATAGTTCCAGTGTGGTGGATCGGTTGGGTACTGAAGAAACGATC -CACATTCGGTGCTCCCCAAACCCACAAACAGTCTCGTTTTGGTGACAATTATGTCCCCTG -CGGTTTGAGCAACTTGGCCTATATACAATTTTGAGTCAGTTGCCACCTAATGTCAACACG -GCTTCGGGACTGTTTAATACCTCCACCAGAGAAGATACAGCGTAGCTGGGATATATCTTC -CAGCACCTCAGGATCTCGCGCTATATCTTCAACGATAGAGGGGGGGATAAAAGCCGCTTC -CAGAGATCTAACTCTGAGTGCGTCGCGTAAGACGGAAATATTCGGGGGTTGACGAGGATA -TCCAAATAGAAGAACACTGCCCGAAATCAATATATAACAGCTTAACATAAAGCCCGCAAC -ATGGAAAAGGGGGAAAGCCATGTAGACAACTTTGTATCCGGACACAACTTCAAGCTGACT -CTCGATAGCTTTGGACTCCTTCTGCGATAGAAGACGATGTGCATCAATCGCGGCCAATGC -AGAGTGGGTGAACGGCACCAGCTTGGGCATGCCTGTAGATCCGCTTGTATGCAATATCTT -TTCGCTTATTAGGAGTTGGGGTGCGAGGGGGAGAAAAAGACAGGACTTACCACTATTGGA -AAAGATGATAGTGTATTCCAACTTCCTTCGTATGGGAAGTGTTCTACTGGGTCCAAATCG -CCGAGCAATTCCTCCAGGCCACTAACCTGAAGTATATCGACTTTTCGTTTCTGGAAGATC -TTATTCGCCGTTCCCGCAAACTCCGAATCGTATAGCAAAAGCGAACACTGGCACTCTTGC -GTGAGATTTTCATAAATCGAGATACTTGCTCTTGGTGAAAGTAAAAGAATCTGGTTTATT -TAGTAGATATAGCAATCTTGTTCTGCACTATTTTGTACATACCCTAGCTTGTAACTTGGC -TACCGCCAATGTCAATAAGATATGCCTGAAATCACTCGGGCCCAAATATCTACTCGCACG -TTACATGAGCGATCATATGCAAAATGCGGAGTGAGTGAGGCGTACGCAACGGATGCTCGG -GATGTAAGACCACTTCGATGTAATTTCCAGGATAGTTTGTTGATCAAAATCACCAACTGC -TGGTAATTCAAAGTGACAATTTTTGGCGCATGGGAATCACCATTTTCCGGGATAAGAAGC -TGATGAACAGCCAACTTGCGGGGGTTCTGAGAGGCTGTAGCTTCAACGTAGTTGAGTAAT -AGCCGTTCTCCAGCCTTTCCATAATCGGGAAGTTCAGTAGTCATGGCGAAAATGGTTTGA -ATGCTTGTCAATTTTGAATGTTGGGATGACTTCTCGCATTCGTTCAGGATCCTTCAGGTC -ATATATAGGCACGAAGCTCCCTCGGCAAAACGTCGTACTACGTGTGTTTATCTACTAGTA -AGAAAACTGAAAATATCACCAGACTGATGACACTTGGCCTTTGCGAAAAAAGAAACTTGA -AATCTTTCTACGCCGTATGTAAGCAAATGACTGCATATATATATTGTACAACAGGCAATA -GTAGGCGAAGAGGCTATACGACTGGACATGAGTTTTTGCACCCCCACTTAGCTTGCCACT -AATACTACCTTGTAAAACGCATTTTCTGAATGGAACCGGCCTGAAATACCACTAGAAGCA -AATTTTTTCGGGGAAAGGCTGTATTCTCTATTTCTAGACTCTTAAACCAGTACCTTGGAC -TGGGCACCTGATATATGTTGGATTAGTAATAAAGATAATTGTGCATGAAGTTTATCTTGC -CTGCAGAATATTATATAATCTACGGTGTGGGGGACTCATCTATGTCTACTGATTGTAATT -AGAAAAGGTCACCTTTTTTAGATTGTTTGTATTGTGTGGTGCTTTGGGGCCCTACTGGAC -TATGGAGGACCCAGTGGGCTTTCACCCTGGGCCTATATAGAGCCCACCCCATTTGTGGAC -ATATCCAAAAATACGTATATCCCCCCTCCCCCCAAATCTCCTACTTATTCTCAACTTGGA -TACAGCGGGAGACTTCTAGGCCATCCTGTTTATGTTAAGCAGGGTCTCTCAAGGCGTTCG -CAGGGAGAACTCACAACTTCACCGGTTGGCGCCAAACTTTCAATAAGAGTAGTGTCGTAA -GTCAAGTATGGATTCCCCAGGGACAATTCGTCATTTGGCTCTGCCAAGGAAGCCATCTGT -GCCTTTTGCTCCTGTTTCACGCGGAGAATCTTGTCATCAGTCGACTCTCTCACGTAGGCC -CGGTACTTAGGATTTCCCGGCAAAATCACATTATCGGCGATCACAGTTGTCCCAGGTCTC -ACCAGACTAAGCTCTTCACAGAGCTTCAAGTCATTGACATAATGAGGCTTTGCGTGATCG -ATAAAAAGCACATCAACTGCGCCAGTTGGGTAAGTTTGTCGCAGTTTGCGGAGAGAGTCG -CGGCAGGGCCCGATAATGATTTCCACTACGTCGTTCAATCCAGCAAGTTCCACCAAAGCC -GCAGCTATGGCGGCATACTTAGGGCTTGCCTCGAGACTCAGATATTTTCGGCCACGAGCG -CGCTTTAGCGCACTACCGAACAACAAAGCGGAGTATCCCACATAGCCTCCCAACTCAATC -ATGAAATCGGGTTGTAGCTCGGGAATCAGTTCCTCCGCCACGATGTTGCCTTTTTCCCAT -CCAACATTCATGAGAAAGTCTTCTTTGTGTCCGAACTCATCGATTGCAGCCAGGATCACC -TCTGGTTTTCCTTGCATTTCTTTATACTTCGGATGGGATTGAATAAACTCTAGGAGTCGC -TCTTCCCGGCCGTCATTATTCTATTCAAAAGATCAGCGTCTCAAAACGACCGAAGACAAG -CACAGACTCAACACACGCAAACAGCCTCCTCAGGCTTATATACTGCTCTCTGGCCTTCTT -TCATATTGAGTGTCTTTCAAAAAAAGATGCGTCGAAATAGAAGGGGGTTCTAGGTTGAAA -TTCTGGGTTTTTCTATAATTCTTGGGGAGTGAACCCTCAAGACCTCAATTTATTATATAG -CTTTTGCCAAAAATTACAGCGAATTCACCTGCTGCTTAAAGCAAATGTCACCAAAATTCC -TTGGAGTCTTGTTTTCGAAGGGAACCTCCGTCCCGATCTGTAGTGTTTATTCTTGATAGG -CTACAATCTCAATGATGATCGCATTATCGTGAACGCGCCAATGAAGAATAACACGGAGAC -AGTCCAAGTAATTCAATGTATGCAGTTGCAGGGTCTCGTGGTTCACAGTTCTCGATCTCG -TGCAGGACATGCAGATCATTTACTGTCGATCTTACCCCTTCAAAGTACCTGCCGCTGAAG -AAATATCGATGTATAGTCTGCCGTGCTATGCAATAGTTCAGAAAAACAGCCAGGCTCTGT -ACCATGTACACCCTTAACTCATGCCGTGCTATGCAATACGCAAAATGCCTGATTTACTTT -TGCTAATGAGTATACAGATGGTCAGCATCAAGAAGCGCAGACAGTCTTCAGGAAACTTTG -TGAAAAAGCAAAGATTGGTTCTGAGGCTTACGGGGTACAGTCTCTGGCCCCGGAAGATTA -TATGGTATATGAGTTGTAAAATTTCAATTCCCTAGATTCTGATCTAACTCTTCAAATGGG -CGATTGTTATTCGCTCGGATATCCCTAAAGTTCGGTGTCTCTGCTTGTAACTAAAGCATC -ACTTCGGTTGCCCTCTGCAGTGGTACCATCGAGTTTCCATCGCGAATGGCAGTGTATATC -ACTGAAACTCTTGTGCCAATAGTTTTATTGTTCTTTGCATCGTCACTATCTCCGGGATGG -TCCACAAGGCCTTGATGCAGAGGGACATGCAGGGTTTCACGCACCCAGAGATAAATTTCT -TTCGTGGATGTTGCCAAATAGTCAGGAGTCGATTGCCTAATGAGGAATTGGTGGCGAGTT -TCTTCATGCACCTTCTTCAAAAGAGATGCAACTCCAACCCTCCATTTCGGAAACAGTTTC -AACGAGTTGAAAGGCAAAGATGACTCGGAAGTTGTTATCTGATCAACAGCACAATGGGCT -GAATCCGCTGCAACTTTCTCACATCGAGTTCCAATCTCGTCTGAGTCATTATGCGCATCC -CATGACTCGGTGATAGCGACCCATAGCGTATCAAAATCTGGGATCTCCCCAGCGCACCAT -ATTTCTTGGTAAAGATCGTGGAGAGCTACCTTTACAGTGGCAAAGAGGTCAAGAGTCATA -GCTCGTAGATCGAGTGCTTGGCAGAGAACGTAAAGATGAGCTGCGCACATCATGGTCACA -ATGTCAACACCTTCCTTGACGTATCGGCATGCAATCAGAGCCAGTGAGTTGACTGATTGG -TTGTTCATATCAGCCACATGCACATGACTGACAACGGAATTGCCCAAAAATGCGAGTTCC -GAGAAGTAAGCCGACATATTGATGTCAATGCCCTTGCATGTAAATGAGATGCTCGGGTCA -TCGAAGCAGAGATTAGGTGGCAATCCTTTGTTCAGATTTGGGTTGATGACCTCCTGGCAC -TGTGATACAAGAAGCCTGCCGAGCATCACTAGTGCTGTTTTAGCTTTTTCCATGGCGCTC -GTAATCGATGCAGCTTGGAAATTCCCACCATGGTGGAAACGATCTTGTGCTACATCAATA -AGTGGATTATCGGTCGTGGAATTCAACTCGGTGGTAACCTGCCGGTTGGCCAGTAGGAGA -TCTTCGATCACAGGCCCAAGCCACTGGGGCGCTGTACGCAAAGCGTATCGATCCTGAGCA -AGGCCTTGCCTGTGAATGTTCGTCCTCTCCGCCAACATAGTTCCAGCCAAGAGCCCGCTG -ATATTCCGGGCCACTTCTATCTGCCCCGGATGTGGCCGGCATTGTGAGATAAAGTCATCG -TAATTGTCGACTGATCCGAGAAGTGCCTCCGTTGCCATTGCGGTGAGTACCTGGGTCAAA -ACAGTGAGATTGTTGACATCATGCAATGTGAGCGCCGCAGCCGCGGCACTCGGTGCAGTT -CCATTCATGATGCCAAGGCCTTCCTTGGCCCTCAGGGTCAGAGGCGTGATGTTGGCTCGA -CGCAGCGCATTTGCAGCTGACATTACCTCAACCTGTTTTACTGTTTTTCCATTGCTGGGA -AATCCCTGTCCCACCTGCACGAAGATGTCGGGACTTCCCTCAATCAATCCACCAAGATAA -GCAAGAGGTGACAGGTCCCCAGATGCCGAAATACTACCTCGAAGAGGAATGAGTGGAGTA -ATTCCAAGATTGAGGGCCTGAGCCAAGAGCTCAATGGCCTCCATTCTGACAGCTGAATGC -CCCCGAAGCAGAGAGTTGCATCGCAGGAGCATTGTACCTCGCACAATCTCTCTTGGCAGG -GCGTGGTGACTAGCATCATGAGGCATAGTATACGAATTATTCCCACGTTCAGGCAGTGAG -ATAAGAATACCTGATTGATGATGCTGTAAGAAGGCTTTTTGGAGCAAATTCAGCTTTCTC -GTCCGTGTGTTTGCATTGCCACCGCAGCCAGTATTAACCCCTGGATGGATCAAATTGATT -AGCAGCTTCCCAATAAGCAGAGTCGAATTACGTTTCATGTACCGTAGATAACTTCTCCAT -TATTTAAATGCAAGTCCAACGTAGCAATGCTTTCCTGAACACGCGTCACAATGGGATCGG -ACTGCAAAATAGAAGCATCCACTGAGAGCCTAAGGAGTTGCCATGGGGGTCAAAATCAGG -GCGTCTTTAAGTTTGCTGGGTAATTTACCTTGCCACAGCAACAATATGGGCCACTTGCAA -TGATTCGGGCTCTCCATCCAATGTCACCACGCTAGCGCTGCTACTATTTTCACTGAGAAT -TTTCCAAGCCTTGCGAACTTCATCAAGATGAGTGGGCATGATTGACTTTTCTTGCAAAAA -GGAGACAGGTTTTCCAAAGATGACTGTGAGAGAAATACAGGTAGTTCAACTGTAACAAGA -TATTTTTGCGATCAGTGGTTGATAGTGAAGTTTGAAGACATATATAATGGAAATCAACCA -AGATATCGATGTTTATACTTTCTCTGCACTCATATCCGCGTAACAACGCATATATGCATT -TCCATATTTGGATAATTCTGTCTCAGAACAAGTTTTTGCAGGTGCATATTATCAGGTACT -ATACTCCTCGGCCAGATGATCGACTGTATCACCAGATTACGCCAAAGTCCTGGAAAGCAA -AAGTGGGACCCCTATAGATGCATTCGAAGAACTGATGTAACCCCCCCGAAAAGGTGTAAG -GTGTGAGGTCAATCTAGATATTAAGTAGAGACGGACAAACAGACCAGACCTGATCTAAGC -CTTTCAATTACAGGTCCTAATCTCCTTTCTGTTCCAAAATGACGCTAGATGAATTCAATG -TGGAAGTTCTTATTGTTGGCGCGGGCCCTGCAGGGTTTTTGTCCCATGTGGAATTAGTGG -TATGATATCTTGCTGACAGCTTTTTTGTAGACTCATGGCCGCGACCTGGATGGCACAAAC -AGGAATCAAGACCATGTTGGTTGACCAAAAAGGACTGAGAACCCAATGTGGGCATGCAGA -CGGGATTGAAAGTCGCACATTGGAGATATTGGACAGTTTCAGGCTGGGAGAATCTATCTA -CAGAGGTGCCAATCTCACAATTGACTTGTGCTTGTGGGTAAGTGATGATGTATCTCTTCA -AACTCGAAGAATCCAAAGATAGCCTTCTTTTCTCTGTCTTTTGCTCTGACCTTTGAGTAC -TATGCAGAATCAGTTGGAGAATGGCGATATACAACGTCAGACTGTGCTCAATAATGCGAG -TCCGGGGCTTTCCCGATATCACGAAGCCACCTATGGGCAGGGAAAGATTGAGGAAAATTT -CTTGAAATTTCTCCAGTCCAGCAACTCAGTCGAGGTTCGGTGGAACACGGTGCCTGTCAA -ATTAGACATTTTGCAAAGCTGCACAGACTATCCCGTACGAGTGGTGATACAGACAAAACA -ATCGAATGCAGAGGTGTGTACCTTTTGTTTCTCGCTCAACCGTGAGCCAGGTCTAACCAT -TTTCATAGTGTACATTAGCCACGGTCAATGCCAAATACGTGGTTGGTTGCGACGGAGCCC -GAAGCTGGGTCCGAAAACGACTCGGTCTTAGACTTGAAGGAAATCATATGGACGAGAATT -GGGGAGTTATTGACGTGATACCAGCGACCAATTTTCGTGAGCTTGACTTGTTAGCTCCCT -TCCTCTGACTGATTACTAATGGAATTCGAAGCCGACATCCGTAAAAGATGCATTATAAAA -TCTGAGGCTGGTCACTTGATGATTATACCCCGAGAGGAAAGACTAGTTCGATTTTATGTT -CAGCTTTCTCCCGGAGCTGCCGCGCTGTTCAGGGCTGAATATCATCCGAAAGCCCTGGTC -GCGATTGTGAAAGATATCCTGCATCCATATAGCTTCGAAGCGCCTTATATTGAATGGTCA -ACCATATACACAGTTTGTTCAAGCCCATCTCTCATAGTGCAATAACAGGAATCTGACATG -ACCTTATACAGGTTGGTCAAAGGCTGTGTCCCGCTCTATCCAAGCATAACCGAGTGTTCC -TGGCCGGTGATGCGGTACATACCCATTCCCCTAAGGCTGGAAAGGGCATGAACGTCAGCA -TCCAAGACACATACAATCTAGGCTGGAAATTAGCTTCTGTCATCAAAGGAAAGGCCATAC -CGAGTATTTTACATACATACCAGGCCGAGCGTGGTGCAGTCGCGAGAAGACTAATAGCCT -TTGACAAACGGATGGTCATGGGAATCGGCGAGAAGGAACCGAGCTCAGATTCCACAGAGC -AAAAAGAGGGTACTCTTCGTGCAATTCTCAAGGAAGAAAATACCTCTGAGTCTGGGTCGC -GAACCTACTATCAACCTAGTCTGCTTGTAACGCGAACATGGGACGACAGACCAACCGGGA -AGCGCCACATGCTAAAGCCGATGCTCCCACACAGCAAAACCAACCTTGCCATAAATATTG -TTCTGGGGGCAAGACTACCTAATATAGAAGTCTTGTGTCAGAGTGACTCTCGTCCTTATC -ATATCCAGCGACTGCTTCGGAGTACCGGAGAGTGGTACTTGCTAATCTTGGGCGGCGATA -TATTACAGAAGTCTCAAATGGTTCGCCTCGAAAGTCTAGGGCTACAACTTGGGCGAACCG -ACTCTCTGATTTATAGAATCAACCAGCAAAATGACTCAGTCTGTAGGATTTCGACTTTTC -TAGTTCACTGTGCTCCCCGAAACAAAGTCGAGCTGATGGAGTTGCCGCAAGTCTTCATAC -CATTTGACAAGAAGCTTGGATATGATTATTGGAAGGTTTTCGCTGATAATGAGCCGTGCA -GCGAGGAATGTGGATCTGCTCATGAGTTTTATGGGGTCTCGGCGGAAGGATGTATGGTTC -TCGTGCGTCCTGACCAACATGTTGCATTTATTGGGTCTCTGGAAGACTTGCCTGAGGTCG -AAAGTTTTCTCGGAAACTTTATGATTGCCAGTATAGAAGGGACTGAATGTCAAGAGTGAG -GGTGAAATGGATGGAAGACCCGAGATTTTAGATTTAGATACTTGATACTATATCGAGATT -TGGACAATGACGAATGGACTGATATTGGATTGAGCAGCGCTTCGCATAATATTTTAGCTT -CCAGGAGTCTCTCTTATAGTGCCAAGTACAGCAGTAGCCTATCTTTGCGTCGAGTCTGAC -AATACATTACATGCTGTGAGTGACCAAACCCCGAACAGCATGTCTTATCAAACCAGGCCA -CCGTGAAAATAAGGTATGAATATCACGATCCCTTAGTTCTACATAAAAGCTTGGACTGCT -GACCAAAATAGGTACTCATCGAATGGGGGTGGTGGTGGACTCAGGTGTAGGTACGCGGTT -CTCAGGTGCACCTGGCGAGGGCAACTGCACGAACTCGGAATTTCAGATTAGAGATGTAGA -GGCTGACGGGCCGGCCGGCACCACCAATCGATAAGTTTGGCGAGTCCCAGTGCAGTACGG -TTGCGCCGTATACTAACATCACCGTTGTTGATACTACCTTGTAGGTCGACAATGGCACTC -ACAAGTGGGTACTTCCAAGGTTGTGTGGGAATGTTGCAACGCTCACAAGGTTCAACTGCA -CTATGTACAGTGCCCTGTGTGAGTGAGAGCGCCAATTGGGAGTGCTCGGAAATTATTGGC -AGTAGGTCCATGCATTTACCATGATACTGAGAGGGATGGAGTACTAAGTCTCTGCGATGA -ACTGTGGGCACAGCATGAGTGCTCAGACACATTACAAGGTTAGCAATCTACGATCATGTA -TTAACTACTATTGAAAATACTATAACAATCCGATGTAGTGTAGGGGTTATCACTGTGCTC -TTTCACAGCACTAACGGGGGTTCGAATCCCCCCATCGGAGAATTTCTTTTTTTGCTTGGT -CCTTTCTTACACCATCTTTTTGACATCCATTTATTCATCTATGTGCGCATATCATGAAAT -TAATAACAAGTATTTCGTGGAATGAACACTTAAATTGCGCGATTATCCTTACAGCCAAAA -ATGGGCCATTATATTTACCTCGATACCACATAACTCGGGGGAATCTATGCCAGGAAACTA -TGGCTACCCATGATATCGCCCTAAGAAATTATTAGCTATAGGCCGATGCGTATCCCTGGA -TAGCAGTCATTAAACACCGGGGGCATGCAAGAGCTCTTCCCACTCGACAGTGTGATATTG -TCCGTGAGTCGAATATTCCTAGAGCTAGTCTATACCTAGTGCCGATGCTGGTACACATGG -ACGATCATATGTGGTTATCTTGGCGTGTTCTTTTGGGATATGAACTCTAGAGAACCGTGT -TATATCGCGTGATAGGGAGCATAGCCAAGACTTATACTCCCCTGCCGTAGTTTTTGTTGA -TAAAAAAAGTAGAGAGGGAGTGatatatatatatatatatataCCCATGTACGCGGGAAC -CAAAGCGCCGAGAGCCTCTGCATTTACATATGCATAGGAGCTACTAGTAATGTACCTATC -TCCCTAATATATGGAGATGGTATTCTGAATTGGATGGAGTCCCGAGTACTAAACGGAGAG -GACTAGTCCTCGGGTGCCTTCTGTGGTTCCTGTCGTATCCAACTGTTCTTACTAACGCTT -AATGGTTGAGCACCAGTCGCTCAGAGTTTGAATTTACAACATCCACACTTCCAAATTCTA -TTGTCCTTATTTAAATTGTGATATTCTAGCAATACCTAGTAATTTTGGGAACCTCAGCTA -CACCAAAATAAGCAGGCAAATTGAGCAAAAAAGCATAGTTATTGCTGGAAGAAGTAGAAT -TATATAGTAATCCGATGTAGTGTAGGGGTTATCACTGTGCTCTTTCACAGCACTAACGGG -GGTTCGAATCCCCCCATCGGAGTTTCTTTTTTTCTTTTCTTTTTGCCTATTCAGATTGAT -TTAAGTTATGCTATTGTTTCGCTTTTATTTTTGCGTTCAGATATTCCAAACCCTTGGAAT -GTCTCCGTCCCTTCCATTTGAATCGAAAATGGGGGTCGTTTCCTTCAACAGCCTCTACCC -GCTGGCTCAACGTGAGATACTGGGAATGACTAAGTGATCAAATTGCTGAGGTCTCGTCAA -TATTACAGCGATTTCTCCAATCGGTGTCGCCGGCCATATCTCCGCATCGATCTAAGCACA -GATGCTACCCACCTAGGCAGTCTTGAAGGTATTCAGCCTAAAGCAGTCCAAGAAGTGACT -TAGCCCTGAAGATACATGTCACAACAGGAAAAGATCTACATATTAGTCGCCCTTGAAGAT -TCGTGCATTATTCTGCGCATTAGCCTGCACGACGCCTTGTTTCCCCATGTTCCATTACTG -GCAAATTGCCAGCCGCACAACTACATCCGGGGAAATTTGTCCACTCATGGAGGGAACCCT -CCACCTGGGCGCCATTAGTGCACATGTAATCGCTTGGCTCATATATTGACGGTAGATTAC -CGTGTCGGATTTGGCCATTTGCACTTGGTATCTGCAGTCCAGGCAGCGCATATGGACAGA -TGTAACCTTTATATACTCCAGATTGGAATTTTCCACACACAACCCCTGATTAACGAGGGC -AGTTGGAGTGAGAAAGCACCGGCAAATGTATGCACAATTTGAAAGCCACTGGATTTCAGG -GTTATGACTATGTACTGCACGCAACTGCCAGAGGATGCGGGGATCCTGCACGTTTGCCGG -TGAAATTATATCCTAGTAGTATCTAACGCAGGTCACATTATCCTTTAGCGTTCTAAATTC -CAAATTTGCACAATAGGCCGCGACGCATCGGGTAGCATAATCTTGAAACACCGGCTTTGC -CGTGCTGCCAGGGCCCTGTCCATTTGGAGTCTCGTCTTTAAACTAGACCCCGCAATGCAA -CAGCGACTTAGCATGTGCAGGTGTCCTTCATTTTCTTTGACCTCAACATAATACTTTAGA -ACAATATTGGTGCCATGTCTCCACAAATCAGACCATCGCCCTTCGATCTACTATGCCTAT -TCCGGGCTATGCGCAACCATTAGTGATTTATGCTTAGTTTATTGGTAGTCCGTACGAGAC -CAGCTCGTATGAACGGAACCTTATTTCAAATTGTGTTCTTAGCAGAGAACTGTTCTAAAC -CCTGTGTCAAAATAGGAAATATATTCTATGATACAGCATCACTAGACAGGTGGTACTACA -AGCGCATTGTTCCGAAAATACCCAGGTTTCGGACTGAAACGTTTTATGCCCGAAGGGTCC -GTAGAACAATCCTCCGTGTTTGGGCTTAACGCCTTGCGGATTACCCGTCACACAAAACGC -CTTTTGGCGGTTTGGTGTCGGTGGAGGAGCGCCATGGCGCCCATTCCACCCTGCGGCGCT -CTTGGCTGGCTCCAACCCCTTCAACCCCACAAAGTGTCTCATGATCCTTCAGTCCTATGA -AGGTTTAATCTTATAGCCAAAATATTTGCCTGGACAGTACCGGCCCTGATACCCAAATGA -AGGATCGACAGGCAACATACCTTAAAGCGTCTTAATTTGTAGTCATGGCTCCCTGCACTA -ACAATTCCCCGAATCTTGTACAGCAATGGACAGAACTGGACCGGGGGATCCTCCGTTTTC -ACGGATGTTGAAACGGCTTAACCATGTTTATCCACTCCAATATCAGTTAGGATCTTGATG -AGATTTCTCACAATGGTCATTCAACGATGATCATTCCCTTCCTGATTCAGGCTCCGGTTC -CACTCTCAAATCAATGCACGACAACGGAGGTCATTCAGTATGAGAGTTGATTGTGGATTC -ATGATTGTGGACTCTCCATCCCGAATTGGTGACGGCCTCACGGTCCGTCACACCGGTCAT -GCTATGCAAAGAGGGCTTTGGTTAAGCCTCGTATGGTAATGACTTGGAGGAATTCTGTGA -GGAACAAAGACATATAAAGATCCATTCCCACCTCGAGATTATTTCTGTTCATCAACTGCA -GGCCAGACAACAAGATAACAAGACAACATTGCAAATCAATCATTTCGCTTCACCAGCAAT -GTATCAACGCGCTCTCCTTTTCTCCGCCCTGGCGGCGGCAGCTCGTGCTCAGCAAGCTGG -CACATTGGAAGATGAGACTCATCCCTCTTTGACCTGGCAAAAGTGCACTGCCGATGGCAC -TTGCGCTGACCAGAAGGGCTCTGTTGTCATTGACGCCAACTGGCGCTGGCTCCACTCTGT -CGACGGTTCCACCAACTGCTACACTGGAAACGAGGTAAGAAACCCCGAACTGAAAATATG -AGTGTCCAGCTTGATTGACCTTTTCGATAACAGTGGGATGCAACTCTCTGCCCCGATGAC -AAGACTTGTGCCACCAACTGCGCCCTCGAAGGTGCCGACTACGCCGGAACCTACGGTGCG -ACCACCGATGGCAACGCCCTCTCTTTGAGCTTTGTCACTGGCGCCAACATTGGCTCTCGT -CTGTTCCTGATGGAGGACGAAAGTACCTACCAGATGTTCAAGCTGAAGAACCAGGAGTTC -ACAGTTGATGTCGACACTTCCGAGCTTCCCTGCGGACTCAACGGAGCTCTGTACTTTGTA -TCAATGGATGCCGATGGTGGCCTGGAAAGGTTCGAAGGCAACAAGGCAGGTGCCAAGTAC -GGAACTGGCTACTGTGATTCTCAGTGCCCCCGTGATCTGAAGTTCATCAACGGTCAGGTG -AGTTTTAATATCGGTCAATTCGAACAACTTGCGAGGAAATGACTAATGCGCAATACAGGC -CAACGTCGACGGCTGGGAGCCTTCTGACAACGACAAGAACGCCGGTGTTGGCGGCCACGG -AGCCTGCTGCCCCGAGATGGACATCTGGGAGGCTAACAGCATTTCCACTGCTTACACCCC -CCACCCTTGCGACAGCCCCGAGCAGGTCATGTGCGAGGGCGACGACTGTGGTGGAACTTA -CTCCTCCACCCGCTACGCCGGAACCTGCGATCCCGATGGATGCGACTTCAACTCCTTCCG -CATGGGTAACGAGACCTTCTTCGGCCCCGGCATGACAGTCGACACCAAGTCCAAGATGAC -CGTGGTCACTCAGTTCATCACCGCCGATGGCACCGACTCCGGCGCCCTCACGGAAATTAA -GCGCATCTACGTCCAGGACGGAAAGGTTATCGCCAACTCTGCCTCTGAAGTCGCCGGTGT -CGAGGGAAACTCTATCACCGAGGAGTTCTGCGCAGCCCAGAAGAAGGCCTTCGGCGATGA -GGGGAGCTTCACTGCTCACGGTGGTCTGGCTGGTATGGGCGAGGGCTTGGACCAGGGAAT -GGTTCTCGTCATGAGCTTGTGGGATGACCACCATGCTAACATGCTCTGGTTGGACGGCGA -GGCCTACCCTACTGATGCCTCTCCCTCCGACCCTGGTGTTGCCCGCGGCACATGCGACAT -CACCTCCGGCGACCCAGAGACCGTTGAGAGAGACTTCGCCTCCGCCAAGGTGACCTACTC -CAACATCAAGGTTGGCCCCATTGGCTCAACCTTCTCTTCCTAAAGAGTGAAAATGACAGG -CCCTTGCTGAAACTCAACTACACCTGGAATGTGGGGTCTCCGTGCTGTATAAATCCCTGC -GCCAGTGGCAGCAAATGCCCAATTTGGAGGAAAATGGTGAATGATATATAGGGCTTCTTT -CGTTGATAGTTTTCTGTATTCTTGATTTCATTTTGTCTTTCTTTAGAATCAGAGTTGTTT -ATCAGAATTGAATTGATTTCATTCGCCAAGTTGAACAAGTAAAAAAGGCGGTAGATACAT -CCTCGGCCCGGCAGAAAAAAAAAACGGCCAGCTCTAAAGCTCCAGCTTTCGCAATTGATT -TTCTTGGTTTGTTTACACGAGTTCAGATCGTTGACTAGGCTCACTAGAAGCCCTCATATT -TTTTTGTTTATTACTGTCACCATCCGCAACCATGTTTGACACGGTCTGCACGCTTCCGCT -TTCCTCGGACGTGTTCGCTCAGGCTATCCACCCCGAGGAGCCCGTAGTTTCTGTCGGTCT -GGCCTCGGGTCACGTCCAGACCTTTCGACTGCCTTCCTTAGAAGCGGAGGAGGGTGATGA -TGATGCCGCCTCGAACTCGTCTGCCCGGACCGGAAAGGGCCACATTGACACAATGTGGAG -AACTCGGCGACACAAGGGTAGTTGCCGCACCCTAGGGTTTGGCATTGATGGCAAAACGCT -TTACTCGGCTGGTACAGACGGTATAGTGAAGGCTGCAAGCAGTGAGACTGGTCAGGTGCA -AAACAAGATTGTGATTCCTCTTGAGAAGAACGGGTAAAACTTCCCCGAATATAAACGTTG -CAATTTAGACCTTTGAATTTGGCTGACCTGTGTCTCCTCTGATGTCATATAGCTCTGTCG -ATGCGCCAACCATGCTTCACGCCCTCTCTCCTCAAACTCTTCTCCTCGGCACTGACTCAA -GTGCTCTGCACATCTACGATCTCCGCATCCCCTACTCCAAGGTGTCTGCGCGACCTGAGC -AGTCACACCACCCCCACGACGACTACATCTCCTCTCTCACCCCTCTGCCAGCTTCCGAAA -CTAGCACGTCGGGCTTTAGCAAGCAGTGGGTTACAACCGGTGGAACAACCCTGGCCGTGA -CAGATCTCCGGAGGGGCGTTCTGGTGCGTAGCGAGAACCAGGAAGAGGAATTGGTCAGTT -CCGCCTTTATGCGCGGTCTACCTTCGAGCGGAACCAGCCGGGGTGAAAAGCTCATTGTTG -GTGGAGCCAGCGGTATCATGACTCTCTGGGAAAAGGGCGCATGGGATGATCAGGATGAGC -GGATTTATGTTGACCGCACCGATGGTGGAGATTCCATTGAGACAATGACCGTTACGCCCG -ACTACTTGGGCAAGGTGGTGGCCGCAGGTCTTTCAAACGGAAAAGTGAAATTTGTGCGCA -TCGGACCCAACCAAGTCATCTCTGAGGTTGTGCACGATGAGATCGATGGTGTTGTGGGCT -TGGGCTTTGACGTGGAGGGCCGTATGGTCTCTGCTGGAGGTCAAACAGTTAAGGTGTGGC -ACGAAGCTGAGGACAATGCCGGTGGATCTGGCGACGAAGACATGATGGATGATAGTGACG -ACGATAAAGATTCGGATGACAGCGATGCGGAACGTCGCGAGGACCCGAAGGACCGAAAGA -AGCGCAAGAAGTCCAAGGGCAAGGACCTCAGTGGTGGACAGCATGTCATGGCCTTCCACG -ACCTGGACTAGTGTCTTAATACCCCATATCTGATACCCACATTTGCAGATCAATTGTTCA -GCCCCAAACAAGAAAAAGTGTATACATATAGCGTCGGTGTTAGGGTTTCCAAGAGCCAAT -TGAAGGGAGAGTATCTATTTTTAGGTGGCGAGAAATGATATACTTCGCACCTATGTGATG -TTCCCTGTTTATCACCGGTAGACCTCCGCAAAGGCCATAAAGCCCACAACCCAGCCCAAC -CCGAAAATGGGCTGGGCTTTGAGTCCTACTTTCGGGCCCACCGGTGGGCTATCGGCCCGG -GATGAAAGCCCCCTAGGCCCTTAATGTGGGCCCAATAAGGCCCAACAGGCACCAAGCAAT -GCAAATATGCCAATTAATCGCAGTTGTATAAGTGATGTAGAGCCTGCCAGTGATTCCGGT -AGATTGATCACTCCTTAAGCCACAAATTATAGGTGATGCGATGAGTCAGAAAAAGAAACA -GATGCTGACCCCTAGAGATGGTGGCAATGGAGAGATCTAAGGTGGGTAAGATCGCCAAGA -TCACGGCATATACTAAAGCCTTAGGCCCCGGAGATCCCTAGCATATATACATACACTCCG -TACTCCGTAGGCTGGAACATACTGGACGTGAAATAGAATTATTCATCAGCTATTTGATGT -ATGCCAAGACCGTATTCATGCGTAATAATGGGCATTGATGGGGGCCTGAGGGGCCCCCAT -TGATGGGCTCTGGGTTAAGCTGGGCCTTCAAGCTTGGGCCCATTGGTGGGTTTTGCGGAG -ATCCAGTCTTGAGATGCATTACATGTTTTTTTTTTCTTTCATGGGGAAGACACGTAGAAG -ACCAGTAGCGAAAAATTGAATTGTGCAACCAGGCTCTATAGAAAGCGCACAGGATTTACC -AATAGAAGGGAAGAGAGGAGAGCCCTGGAGGGAGTGGAGTAGAGATCTACTAGTCCGATC -TCTGCGTTGCTAGTCAATACCACTTGATTATGGTGCTTCAGGCATTGAGCAGTCGACATG -ACTTGTTACCTGCTCAATGCGTCTGTTTTGGCCTACCTGGGAATCTATAACATTGTAACA -CGCACAAGAGAGATGTACTCTAGTGCCAGGGGGCTTGCAATATACATTAAATTTAGCTAA -TTGTAAAACCCTAACTTTACCAAGCCGGGGGAGCATTTTTTGCAGGAGATGCACTGGGTT -GATCAGGTCGAGTCTCATGGGTGATATCAATCTCATTGGAGACCATGATAGCACCCGTAG -AGGGTTTGCCGGACAGAGGGCCCGGATATGTCGTTATGTTCTGCATTTTGACATCTTCCA -TCATCCGTTCCTCACTCTCAGTGAATGACATGCCGATGGTCATAGGATCCGTACTCGCAT -CCCCGCGACTCTTTCGACTGGCGTAGTCACCTCCGCCACCAATGCTGCGGAAACCATTTG -AGGGCGATTTATGTGTCTTCTGAGACAACTGAAACTGGCTGCCGAGAATTCGGCTGAGCC -AGATTCTGAATAGGTGAAAGATCATGGGGAGATTGGTTGTGACGACTGCGACGAAAGTTT -CGCGAGTGCCCCATTCGTCAGCGAGTTTTGCACCATTTTCGGGCTCCTATGGAGAACGAA -CGAAATTAGACAAATCCTTCAAGTGGCAATACGAATATGTAAAACTTACCACCAGGAGGA -AGACACTCTTAATAGTGGCGCAAACAAGAATAAAGACACCCGCGCTGAGTACGATAGTTG -CCGCGATTTTTTTCAAAGGCTTCAAACTTGATTTCCAAAGCATGGGGATTGGGATGAATA -TGAGGTATGGATCGGTAAGAAGGTTTGCGGCGAAGGTTACTGCGATGACCGGCTTGGCAA -TGGCTGGCTGGCATACGTCTAGAGATATAGTTAGCATTCACTATCTGCCAATTATGCCAA -TAAGGGAGTCTGACTGCCGGGGTCCGGATTGATCTGCCAGTTTTTGTGGAATGGTCGACA -TGCGCCAAAGATTGTAATGACACTGGCGAGAAAGCTTCCCAGGACAAAGGCAACTCCGAC -ATAGATGGGGATTCGATAACGCCGGCCGAGGCCATCCTGTTTTGTATCAGTCACTTGTTG -CAAACCATTGCCAGAAACCGAGGGTTATTACCATCAGTCGAACGTAGAATGCCAGCATGG -ACAATTTGAGTGCATTAATCAATGCCACATAAGTAGTCCAGCCTGCAATTTGAATCTTGG -AGCCAATTACTCTAACCAAATGGTCAGTGATCGAGTTCAAATTCGCTCTTCGGTCGATAA -TGGGGGGACAAGTACCTGGCGCGGTACATTGGGTCGTCGGGCAATAGTGCCTCTCGCTGG -GCGGCTGTCATGCCATTGTTGGCGAGGCCATGAGACACATTGCCTACACTATGTGCAAGC -GACGCCTGTGCAGTGTAGAGGATCTGTATGGATTGGAAATTAGTAAGTAGTAAGATGGTA -TAGAATCAATTGGAGTCCATTACAATTGCAACCCATACAAGGTAATCGTCGAGTCGTAAG -TTCCTCAATCCTCCGGTTTTGGACCGCGCATATGTTCTAAGAATGGTCACGGCAACACCA -ATTGAATATAGCGTCCATAACTCGACAGTGGCCTTGTAGGCATCTGACGCGGCATCTCGT -GCCTCTAAGCCAAATGCCATGAAGCCTGATATCTGTGAATAATAGCGGGGGTCGGGGTTT -AGCGGAAGATTTACAACGCATGGAAGAGGTAAGAAGACGTGAATATCTTCTCTGTACTAG -CGCGAATGCCTTGATTGCTAAATCGCAGTTTCGCCCCTAATTTGGACACCTTCATTACAA -AACGAGGTACGATTGGAGAAAGTCCAAGCGTCTATGCTCCATACTCGATACAATTTCCAA -GGTTGAATCATACCAGAAGGGTCTGGTTCGCTCCGGATAACGAGGAACCTCATTGGTACC -ATGGTTGTGGGAAGCCACAGAATCCAGAATTGTGATAAAGTTTGACCTAAATTGGGATGG -ATATACGTCGATCCTTCAGACCTTCACAAGCCCTCCAGGGCTGGAGGTGCAGAAGGCTTA -CCTTTTTAGCTACTAGGCAAATTCGGGAATATGACAGGGGTCTCGGATGGCCCCTGAAGA -ATTTCTTTTTTAATCTAGAGCTAGGATTTCATAAATTAAGTGGCATTCCTTCTAAATTAA -ATCCCTAAAATCTAAGACATACACTTGGACAATGTCCACTAGCGAGATGCGAGTGGATCT -TGTGTGAAGCGCGTTGGATGCACTACCCCCACAGGTTTGGGGCCAAACCGGATGCCATAT -CCAACCACGAGGCAATGCATGTTCCGGCCGAGCGCCAGAGTGGAACTTAAATTGTCCCGA -TGGTTTTGCGCTTATTAAGGCTGCGATTCCGGTCACATGTGTACGGAGTAGGTAATCCCT -GCGTTAGCGTACGGCAGTTATTCGACTGGGATGAATATCAAGGCTGAACCGGGGAATGGA -TTTCCAAAGGGTGTGGTACGGAGTACTCCGTACTCCGTACTGGGTTCCATACGGAAGGCA -GATTCACAACCAGAACATATGTGGAAACCGTGTAAAAGTGGAGTCTTATAGACTTCTATG -TCTTATCTTTGTCTTTTTAACTATGCCATTGAGGAATTTTCACGGAGTACGGAGTGTACT -GCGTTATTAAGGGGAGCCAATCTAACCGAGCTAACTCTAGGGCAGTTTGACAATTGCCGT -GAAATAATATGCAATAAATAGAGCACCGCAGAACTGACAGGGATTATTTCTCCAGAGCCT -ATCCACTTTTTTTTTCGGACATCCTCTGACGATCAAACGCAAGGGGCATTTGGTTTTTAC -TTTTTCCATTGTCTTTGACAATTCAGTACATTCTATTTTGAGCAATGCATTTCAAGACAG -AGGTGTTCACGGCCGTTGCGGCTTGGGCACTTTCTGCCGCCTCTCTGAGTTTCTATAGCG -ACTGCGATTATATCGAGGGAGGTCTTTGCCCGCGAGGCGATGCTGATTTGCAGGCACTTG -GAAAGCAGCTTTCAAGCTCTGCCAAGCTGTATTTCCCCGGATCGAGCGGGTTCAAGGACC -TCACAACCCGGTGGTCTACTCTAGCAGAGCCCAAGGTGAACGTCGTCGTCGTGCCCGGCA -CTGAGAACGATGTTGTGCAAACGGTGGGTAGttttttttctttttctccttctctctctt -ttttttttttttctcttttcttttGGATCCCTATGAAGTCAATTGAGCAAAAACAGATAC -TAATTGTAACGATGGTTTAGGTGAAATTCGCCAACAAGAAGGACCTGCCTTTCCTCACGT -TCAACGGTGTCCACGGAGCCCTTGTTTCGTTGGGTAAGATGGACCATGGCGTTGGAATCT -CTTTGAGCCAGCTGAACAGCGTCAAAATTGCTCAGAATGGAAAGACCGTCACGCTGGGTG -GAGGAACCATGTCCAAGAAGGTCACCGACACGCTTTGGGCCGCAGGAAAGCAGACAGTCA -CTGGAACCTGTGAATGTGTCAGTGTTCTGGGACCGGCGCTTGGCGGTGGACATGGATGGC -TCCAGGGCCACCACGGACTCGTCTCTGACCAATTCGTCTCGATGAATGTCGTTCTCGCCG -ACGGTACCTTGAAGACGATTGACCACAACTCCGACCTCTTCTGGGCGATGAAGGGTGCTG -GCCACAACTTCGGCATTGTGACTTCTCTCACCAGCAAGATCTATGCAATTGAGCACAGCG -ACTGGGCCATCGAGACAATCACTTTCAGTGGTGATAAGGTCGAGGCTGTCTACCAAGCTA -TTAACGACTATCTGCTGAAGGATGGTGCTCAGCCGGCCGAAGTCATCAATTGGTCGTACT -GGATGAACGTCCCTGATCTCGACGCTAATAACCCCGTCATCGTACTCTATCTCATCCAGG -AGGGAGTTAAGGCCGTTGACTCAATTTACACCAAGCCTTTCCACGATATCGGAGCACTCG -CCATTGTTCCTCAGGCAGGCACCTACAAGGACCTTGCTGCATGGACCGGCATTGCCCTTG -ACTCCCCGCCATGCCAGAACATGGGCATGGCCAATCCCCGCTTCCCCATCTATCTTGAAA -AATACGATGTTGCGGCTCAGAAGATGGCATGGGACATTTACGAACCTGCCGTCCGCGGAA -GCTCTCCCTTCAACTTTTCAATCTTCATGTTCGAGGGTTATTCCTTTGGGGGCGTCCACG -CTATCCCTAGCAGCGCGAGCGCTTTTGCATTCCGCAGCGAGAATATCCTTGCCGCGCCGC -TCATCAACTATGTGCCTGTCGATCAAGCACTTGAGAACAAGGCCGCTGCTTTGGGTAAAC -AGCTTCGCAATATCCTTCACGAAGCCACTAACCGCACGGATATTCGCGCATATGTCAACT -ATGCCCACGGAGACGAGTCGACGCACCAGTTGTATGGCGATAACCAACAGCGCCTGCTCG -CCTTGAAGAAGAAGTACGACCCGACTGGCAAGTTTAGCTTTTACGCACCGATCGAATAGA -TGGATTAAGCAGGGGACTTGAAGATTTGTACATATAACAACTTGTTGCAAGCATGTACAA -CAAGCACGTGGAGATAGGATCATTTGAATCATCTATAAACAATCTGATACAATTTAATAC -TGTCTTGACACCTTCAGGGCTGTATATGCCTGAACATCAGGTATACCTAAACCAGGATTC -ATTCAACAGACAAACTCTGCGTAAATCAAAATCACCTGAGTTTGAGCCTGGACTATAATT -CAAGCCTGATATAGAAAAGCCCCACCCGGCAGTTTGGGGAAATTCATACCCGTCCACCTA -GACAATCCACGGATGGCTTTTTCTGTCGATCTTTCACTGTAACCGGAATCTCTACGGCCA -CACAAGGGGAGCTGGGAGCATTTGATATCGCACACGAAGCATATGTTAACACGCCAAATT -GGCCATTATATGCACAGCCCGCCAATTGAATTTGCAATGGTTCGGAGAATGATCATAGTA -TAACTGCCGATCAAACCCCACGGCCCCACGGAAGGAAATGACGTTCATCGAATGCGGTGG -GGTGTGGGGAACGGACCATGTATAAGATGACAAGTCTCACCCTCAAATGGTATTAAGCAC -AAAATGGATACCTTCGAGTACAATCCCAACCCTGGTCGTGTCGTCTTCGGCAGTGGCACG -CTACAAAAACTTCCCGATGAGATCTCTCGCCTGCAGTTGAAGATTCCGCTAGTGCTCTCG -ACACCCCGACAGGTTAGCCATGCTGAGAAAGTCAAGGAGGTGCTTAAAGGTCAGGTTGCT -GGGATCTTTACCGAAGCGACTATGCATACTCCCACGGATGTTACAGATAAGGCTCTTGAA -TATGCGAAGGCCCAGGGAGCCGACTCGGTGATTTCTATTGGAGGCGGAAGCACTATCGGT -CTTGGGAAGGCGATCAGCGTTCGCACTGGACTGCCACATATCTGTATTCCGACTACATAT -GCTGGTAGTGAAATGACCCCGATCCTCGGTGAGACCGCCGATGGGTTGAAGAAGACTCGC -TCCGACCCCAAAATCTTGCCCGGCACAGTCATCTACGATGTGGACCTCACAATGACCTTG -CCGGCGGCTATGAGCGCCACTAGCGGTGTCAACGCCATTGCGCATGCCGGTAAATATATC -TGATTAATCTGCATCAACGTCCTGCGCACCACTAATGCGTTACTGAATAGTCGAGGCTCT -CTACGCGCGCAACACTAACCCAATCATCAATATGATGGCGCTCGAGGGTATACGGGCACT -TGCATCAGCCCTTCCCGAGATCGTTGAGAACCCATCTTCTCAATCCGCGCGATCCAAGGC -CTTATATGGTGCCTGGCTCTGTGGAACATGTTTGGGCAGCGTGGGAATGTCTATCCACCA -CAAACTGTGCCACACACTTGGTGGTAGCTTCAATCTTCCCCACGCCGAAACTCATACGGC -TGTCCTTCCGCATGCCATCTCATATAATGCACCTAATATCCCTGAGGTTATGAAGCAGTT -GGCCGATGCCCTGCCGGATAGCAATGGTGATGCGATCCACGGTTTGAACGTTCTCTTGAC -TAAGCTCAAGGTCAAGCGCGGTGTGAAGGATTTCGGAATGAAGGAAGAAGACATCGATAA -AGCGGCTGATATTGCACTATCCAATGCTTACTGGAATCCTCGCCCGGTTGAACACGCGCC -GATCCGCGAATTGCTTCGCAGGGTCTGGGCGGGAGAGCTCGCACAGGCGAACCTGTAGAT -ATCCTTTGCTTTTTATGTTTGACACGATAAAGCGCATAGATAGCCAAAGGGCCACATATA -TCTTCGGAATACCGTCAGTATATCCTTTTGCCACCATGCAATTATCAGAAAGTTGGTCTG -CTCTGCCAATAGGCTCCGTTGTATTTTCCCGTTATGGCGGGGTGAAGTGGGGGGATCTTT -TTTTTTCTCCAAAAATTCCTAGCAATCTCATTCTTCACTCTCAACACCACACGTTTTCGC -TCTATCTTACAAAATGGGGTCTGAAATGAAGTCAGCCCCTGTGCTGTCGACTGCCGATGA -TCGTATACTCGAAGAAACCAACCCAGTAGCCTTGCGAGACCCCGCGGCTCCAGTGTCAGA -TGATGAACTCTCCATTACCTACGATATTGAGCGCACATTGAAGGAAATCCGGCAAGCACG -ATACAAGCGCATCGCTCTCCAATTTCCAGATGATATGCTCCCTGATGCCCCGCGAGTATT -TCAATTGCTCAGCCGGGGCCTGGAACGCGAGGAAGTCGGCACCAATGGCACGCAGTCCTC -GAACCAGACTGATGCCGACCTTGCTCACTCGATAACTGAGCTTGAAGTGAAGGATAAAAC -AGAAAAACACTCACCTCGGCTTACAATCCTGGCCGATACATCTTACGGGACCTGCTGCGT -TGATGAAGTGGCCGCGGAACATGTGGACGCAGATGTGGTTGTGCATTACGGGAGGTCCTG -TCTGTCGCCAACAGCTCGGCTACCCGTGATCTATGTTTTTACGCACAAGAATTTGCCCCT -TGAGCCAGTGGTTCGAGCGTTCAAGGCAACTTATCCTGACCCCACGACCAAGGTTGTCAT -CGCAGCCGACGTTACCTATGCAGACCACGTTCCTACGGTATACAATCGCCTAGTGGAAGA -GGGATATACCAACCTCTATGCAACGGAGGTTGTTCATAAGCCTTCCTCACCTATTCCCAA -CCGTACTGTCCCCGAGTCAGTCCAAGAGGCCCCCGAGACGCTGGGCGACTGGCAACTGTT -TCACATCTCGGATCCGCCAACAGCTCTCATGATGACCCTTGCGTCTCGCGTTGCCGCGAT -CCATATTTATCCCACCAATGACCTTTCGAATGAGAATATCAAACCATTGCCTGCGTCGAC -TGCTGCGGTTTTGCGGCGCCGCTATGGCACTCTTGCCTCTCTGACGACCGTGCCCATCTG -GGGCATTCTCATCAACACTTTAAGTGTCAAGAATTACTTGCATATTGTGGAACATGTCAA -GGAACGAATTGCCAAAGCAGGCAAGAAGAGCTATATGTTCGTGGTTGGTAAATTGAACGC -TGCCAAGGTTGCCAATTTCAGTGAAATTGGTGGTTGGGTGGTTATTGGCTGCTGGGAGAG -CTCTCTGGTGGATAGCACAGATTTCTGGAAGCCAGTTATCACGCCCTTTGAATTAGAGCT -GGCCTTGAAAGACGATTCAGACCGGGTATGGACAGGAGCATGGCAGAGTGACTTCCAGGC -CGTCCTCGATGCGCCAGCCCAGGAGGCCAACGGAAATGGAGATGAGCAGGGGACACCCAA -TGGTGCAACCATGTCCGACGAGGATGACATGTCAGAACCCGAGTCGGCACCGCCCGAATT -TGACCTGCGCACCGGTCGATACGTCTCAAATTCCCGTCCAATGCGTGAACCCGCGCCTCG -CGCTTCGCAGGTTGATGGATCATCCACCACTGGGCCATCCGCTGCTAGGGCGCTGGCACG -ACGAGCTAAGGGTGACTTGGCTATGATTGGCGGTGCCGTTTCACCAGGAGCAGAATACTT -GCGATCACAACGGACATGGACGGGTCTGGGTAGTGACTTCAATATTCGATATGATGAGGA -AGACTCGGAGGATAGTACCCTGGTCAGGGAAGGGCGCAGGGGTATTGCGAGGGGCTATAC -TGTAGGCGAAGCAACCGACAAACATTAGTCCACATGGCCTAGCATAGCATAGAAACCAGC -ACTAGACTCAGCGATCAATGCAATTCATTTTGAAGAGCCAAGTTCATCAACTACCTGGGC -AGATCATGAAAATCCGGGGGTTTGATGGCCATCACATGACGGCTCCTAACGGGCGCATCC -GGATATCACcttgtctcgctcagcttttgctcttctctttttttctctctcccatccctt -ttcttTAGGACTATAGGGTGGGCAATAAGACTACTTCTTTCTTTGCCTTGCCCTTCTCTT -GGGGAACTTCGTCTTTTCCCCTCATTCTTATACCCTTTGCTATACCCCTCTCTTTTCTCC -CTTTCCCTTCAACCACCCCCAATTCCTCAGCCCTTGCGACTTCGTCTTCTCAATTATCGA -TCTTTTACCTTTCATGGCCTTGCATGGTGGCTTACCTCAGGGTGTGCTTACCCCCTCGAT -TGTATCGTCAAGTTGTCTTATCATGTGCCGCGTCTCTACCCGCCGAGCTCGCGACCTCGC -GCTATCTCGATTTTCGACCTAAGAGACGGTATCTTTCGTTCGAGGACCTATGAAGAATCA -CGGTCTTGAAAGCATGGCTAGGGGGGATGTTGCTGGTGAGGACCCAGCGTCGCACCCGAC -GGTGCTGGAAAATGCCTTGCAACACTTTGTTATCCCAGAAGAACGAACCCTGTCCACTTT -TGAGCGAGTGATAACAACAAATGCACCCATATTCGAGTCTCTGTTACTCCAGATCCCGAC -CGATACTATCATAAAGCTGTATCACACTTCAAATTACCTCCGTACATTCCTTCGATCATA -TCCGACCGCATGGAAGTCGCTCTCGTTTCGCCTACAATTTCCTTCAAGCACCCTACCCAA -TCCACAACTCAATGACCCAAGCATCACCGAGACAGTCCCTGCGCGTCAATCGCGACCTTA -TGCCCTAGATATGTTCCTGATGAATGTGGTGATCCCATTCAGTAGGTGTTTGAAAAGCCT -AGAATTGGATAACACGGCCATTTCGGGAGAGAACTTGACCTCGACTGTACTGCATTCACG -GCGGGAGACACTGGTACATCTTTCTGTTCGAGGTTGCAAGAATGTCTCTCTGAAATATCA -CATCGTTCCTTTCTTGACCATGTTCGCGCTGCAATGTGAGATTGACATGGAGAGTCATAC -CGGTACTTCTCCCGGAATGAAACGACTTGCTCTCAAGAGTATCTACGCCTACCGATGTCG -TCATCACAGAAGACGTCCTTATCTCTCATCTTCACTGATGCGGAAAGACGCTGACGCGGA -ATCGACTCATGAACTTGTCAATATATGCCACAAGCTAGGCATCTGGACTGACACCGCTTG -GTGCACCACACCTGCTGGCAGATGTGCTCGCAGAGCCGGTTACGTCAAATCGAGGTTCCC -GCATGGAGCAGGGTCGCCGGAAGTTTGGGTAGTCTTTGATCGACTCTGGAGATCTAAAAA -TTGGATAGGCCCAACTGAGTCTGAGACGCCCCAGCCACACGCATACGATGGAAAGCTGTG -GGAGAATCGTGATACTGGATTATATGGCGAGGCATTAGGAACGGGTGAAGGTCTCGACTG -GGGTGAAGGCAAAATGACGCCTGCCCATCTGCGCCGAAGTCATACCCAATTCGTTGACAA -AATCCGCTGTGACAACTGCCTTGAGGATATCTCAGAGCGATGTGAACATTGCAGCGTCTT -AATGCACTGTGTCGGATGCCGTAAAACACTCTGTGCGAGCTGTGCCCACGAACGGCCATA -TCTTCACAGAAGCACCCCATCTTCTGAAAAAACCCAGAATGCATCGGATTCCTTCTGGTG -GGCACCTGGGGCCACACACTCTCCGGGCTCAATGCAGGACCCCGTGGATATTGTTGCAAA -TCCCAACCAGGCCCATGGTACAGGGATGGACCCACCGCCTATCCTCAATTTCCATTGGTG -CTGCACCAAGCCTGACCTCTCCGGGGGAGGCGGGATTAGCATTGGGCCACGCAATGGTGA -GGTGGATCAGGTACGGGCTGTGCCTTTGCCCCGTGGCCAAGGTTGGGAGGACCTAGAGTA -CACTGTCAGTGAATGGAGCAAGACATTCCCCAAGTATGCATATGGGGATCCGAGCAAGCC -GGACTATTCGCTTGAGACGGGCCACCTGGCGATGATGAAGTGGCTACTAGGACCGCCTAA -TCGGGAAGTCTCACCGTGCCCGCGTAACCTATGCCAAGGATGTTACGATTCACCACAGTG -GAAGGTGCATTGCAAGAGTTGTTCCAAACCTCTCTGCATTGAGCATGACCTCCGCGGGCT -TCGATTAAGGATCTGCGGCTACCGGGACCTTGAGACGGAAAAGCAAACCATTCAAAATAG -ATTCGCTGCAAATCTTTCGGGGTCTCTCCTTTCAAGCCTTCCCGAATATGACGACATACT -ATTCAGAAGTCAACAGCTTCTTGACTCGACAAATAAAAGCTTCATTGACGATTCACGTCA -AGATAGCATGGACGAGGAAAATACGCCCGAATCACATCCCGGTGACCAGTCGGCTCCGAT -TTCCTTCTTTTCCAACCAACCATCTCGCTCATTTTCGGCCCCTGCATCTGACCATTCTTC -CCCTCCGTCATCCTCATCTTCCACCTTCTTTGATTCGCCGACATTCGAACCTCCAAGATG -GCAAGGTTGCCAATCTTTCTTCTGCCCCCCGAGTGATCCTATCACCGGAACGAGGGATAT -CAGAGGCGGTATCAGGGATCCACATCCCAGATGCCCCAGCTATCTGCGAGAATGCAAAGG -CTGCAAGGTGTATGTATGCGCGGACTGCATCTATACCCACCCACCCTGCAAATGCTCGTA -TTGCGAGGAAAACTACTTGTGTCCTAATTGCATGACCGATCGCCCTCGTGACAGCTTGTG -CCGTCGCCGCCATGAGGAAAGAGCGCAGCGTGACCGCAAATGGAAAAAGGAAATGCAGCT -GCTCGAAGCCATCTTGGAGCTTAAAGTGGCTAACGAGCAGGCCGAGTTTGCAGGCGAGTT -CTTTGACCTCGTTGAGAGGCGCAATAGTCTGCCCTCGGGGACGCTGGTCCCCTTTGATAA -CGGGATCGCCGATTTTTCCGAAGGGGACATCGCAGCAGCGGAAATATCCGCCTCGCACTT -TGAGCCCGATGACCTAGAGTGGTTCCTCCAAGAGTCTCGTTAGTCCAAGGTACTGCTCAC -CGTCTCTAACTCGGACTTTTGCCCTTTTTCGTCATTGTCTTTTGTTTCTTTTTTTGTTTT -TTCCAGTTTCTTCAGCGATCTCATATCCTGGCGTCATAGGGGGATCTACTCGTATGAAAT -CTTACCGCAATCTACCACAGCGGGATCAATATCACGGTGTTCTAACTCGGCGTTGTACCA -AATGAAGGATCGGCGCATTGTCCCACCTTTTTTTTTTTTTGCTTCTGCCACTCGACCCCT -CTTTCAAACCGGACTGAGGCTGCAGTTAATGACATGGGTTGCACCATTTACCCAACTGTT -CCTCGGGAAGGGCTTACCGCGTTGACATAAATGGTCAATAGATCCAGGCACCAGCGCCAG -AACTGACGATTTGTTTGTTCTTTTTACCTTCGTCCTTTGATTTACGGGACTGGGTGGCCT -GGTAGGTGACTGTGGATGAATGTGCGCTTCTATGGACCATGTATTGAGGGGCATATTCAA -AGAATCAGGTCTCATCTTACCACTTGCTCTTCTACTTGGCTCATTATCCTTGTAGTCTTA -CACCAATGACGGTAGGTTCCTTGGACTAGTTCAACCCTTGGACCCTTGGCGAGTCCAAGG -TTGTATAAAGAATGGCGCCACACCCCCGGACAACCCATGGCAACCAGTAATAATGCACCA -CATGCGTTGGAACAATACTTACTCCGTAAAGATAACAGACTAGTCTAATCTAAGCACCTC -AAAGCATCGTCATCCGTCAGCTGGGGGAAGAAGCAAAGACCCCCCCGATCAAATAAGAAA -ACCCCACGATCGGATACCCGAACCTAGCGCCTGAAGAACAGCCCAATTACGATTTCATCA -TCTGACGTGACATCGACCAAAGCACCAAAAGTACCATAGCGCCCCCAGAACGTGTCGGGT -CGTAGATGGGTAGAGGGATGCGGTCAAAGACAACGGCCATGACACAGCGCTAGCGTATGC -AAACCACGTGGATTCTTGAAATGCAACGTCTACGGCTCTGCACGTCCCGGGGCTCTTATC -GGCGGTGAATACCCTGCTTAGACCAATAAGATTTGAACTAACCTGGGTGGTGATTGGGCT -CTGCAGGGCCTCCAACTAGAGTCGGGCGCCTAGATGTATATATGTTTCCCCCTTCCACGG -AATTTGTTTATATTGCTACGTGGTTCTAGAAATACGTAGAGATAATACTAATCATAGAGT -CTTCCGGAGATAGATCTTCCGTTGATCCTAGAATATAGATCAAAGCATCAGGGTGGATCG -GACGATCACTTGGGAACCTTGGAACAATCCAGTTCTACCCGCTTCCCCGGTGCCCCTTTT -TGTTATATTTTATATCTTTACTCCGTACTCCGTGCTCCGTGACTATACAATGTATGTATT -GTATATTTCATACAACAGTAGACGGTCGTATAGATGGCCTGATGATATCGTAAGGGCTCC -TAGGTAATCATTCCAGCGATCTCTCTGTGTCGTAGCGGTCTTTTGAGTCAGCCTCTCAGA -TATCCTCAGATACCTATAGGATAATTTTTCCCCTTTCCTATTTTATACTTGGGCAGAAAA -ATGCGGGAAGGCATACTTTATGCAAGAGTATATttattttttatctccatattttactct -tattcttattcttCCATATGATACGTACTATGTATCCATTCCACCCCTTAAAAGCATCCT -AAGATATCCGATCGGCTGGCGCGGATGTAGCATCCTAAACCCGATTGGGTACGGGACAAC -TTCGGTATTCTGGAAGTCATCCTATTAGTAGAGACAAGACATATTTGGAATGTTAGTGGC -GAGTACAGAGTACAGACTGTATGTATGTGTATGTAGGGTTTAAGTACCGTACGCTAGGTC -CCGTTTCTATCTGACATCATAAGCCAAGTAGACATATGTAGCTAGTTTCTCAAGGGGTAA -AAACAAAAAAAAAAAGGCGCTAATTTCAGCTTGAACCGTCACAATCGGCCTCACCAATAA -GATACGTTAGATATTTGCTCCTTTTTTTTATATTTTTTTCTGAGCGCAAGGCTAAGTGGT -CCGATCTCCCGTTAATTTAGGCTCCAGAGAAGATGTCGGTGAATGCGTTCTTGACCTTTA -GACGCGTGACCGGTCTATTTCCGATGCAAACATCATTTGACAGGGGTTTTGATATTATTG -GGAAAGTTTTTATTTTGGATTTTTCTAAAAGAGAAATAAGAACAGGGTTTCTAGAAATTT -AATTTAATTTTCTTAGTTGCGTTGTTTATATAAAGCCTTTGCTGTCGCACGTCTATTTGA -CTTTAATCAACTCCAGACTTTCAAACATCTTCCCCTGAGATCTATACCATTTAAGACTTC -CTACTTCATATTCATCATGGATAAAGTCAAGACACCCAAGGAGGAACCTCAAGAGAAGAC -CGAGGATCAGCCCATGACTGTGGAAGAATACCAGAGGAAATGGGACCAGATGCTTCAAGA -CACCGCAAAGCAGATCGACAAGATTCGAGGTGAGTTGTAATGTGACATATGCAGAGGAGG -GGACTGACAGCTATCATAGAAAACGAGCTAGCAAGAAGGTCAAATAATAACAATCAAACT -GAGCAGCGTTGATCAACGCCACTTTATTATTGTGACCGTCTATGCAAGTCGTGGGGTTTA -TTGGAAACCGAGCTTCTGATGCTTGCtttttttttcttttcttttttttCTCCCCTCCAT -GGATGATGAGAGCATTGGGGATGGGATCAGATGGTGTTTTTTTTTTGGGAGTACATTATA -CCTTGAAGCTTTGGCATTTTGTTCCAAGGCATGCATGAGAATACAATTTTCCTTACATCT -TAGAGATACTCTATGGGATGTAAACAAAAAGCAAAGAGAGAGCCTCAAGTGGGGTGGTAC -ATGGGGTTGCAAGGTTTCCTTGATATCTGCACTGTCCAATCTCCTTTTTAGCTGCACGTC -ATGGATGCCGCTAATGACCAGGTCGCTCAACCATCCTTGTAGGTTGCCCCAAAATGGTAG -GAGACATACCCTAACACCCCTAGGCCACTTGACGTCGTGGCTCACAATGCCACCGTGATA -CTGAGCGGTGTACTTCTGGGGGCACTGCTTGCGTTCGTTGCTTCTCACCTGGGCTGGCTG -CGCCACCATGTCCACCTGAATGACCGGTTGGTCAATGCAATATTTCCTCTTGGGCCGCAA -TTCTTCTTCTTTACATTGCCTCTGTGGTTTATTGTGGCGGTACCTCCTGCTTTGCGATTT -TCCGGTATGAatatatatatatatatatatagatTGACGTGAAAGAAACTAGTCTGACTT -ATCCAGCATTCATTAGGGAATACAGTACTGCAATTTATCTGTCATATGGACACCCGGAGT -ACAATACCATTGAGATCACCCCAGCTCAAATGATCGAGACACCTTGAGATGTTGACGGAC -AAAGTTGAACTTTGCACTGTCATTGTGTTTCAGATTATGCAATGAAATAGATATCGAAGA -TTCATACAAGCTCATTGTACGGAGTAAAGAGATCGGTGTAGAGGCGATTAGGGCTGGCGG -CAAAAGCGCCAACGGTAGAAAAAGCCCCGTCTCCACATCTTTCCGTTTTGGGTTTTGCTT -TTGGCTCCCTCAATTATCAACCGTATACCTCCACCCTTACGCTGTCTGCCCAACCCCCTT -ACTCCATTTTCCTTTCTGACCTGCCATCATGTCCGCCGATCTCGACCGAGACCCGTCCCC -CGCCGACCTAGCCGAACGCGAACGCGAGAACAAGGAGCGTAAAGCCCGCGAGGATGCCGA -ACAGGCTCAGCTTCCCTACAAGTGGACACAAACTATCCGTGATCTGGATGTGACAGCACC -AATTCCTAGTAACATCAAGGGTCGCGACTTGGATGTGCTGTTGACCAAGAATAAAATCCG -CGTCGCGATCAAGGGACAGGAACCCCTCATCGAGGTATGTTGTCCCCACTCACGGAGATT -ACACCGTACACTCTAAATCAACACATCAACTCCAACAGCCCAGCTAACCACCCCATTTTC -CTATTCAGGGCGACCTTCCCCACGCCATCCTTGTCGACGAATCCTCTTGGACACTCGAGA -CTACCCCCACCCCGCCCGGCAAGGAAATCAACATCCACCTCGACAAGGCCAACAAGGTCG -AATGGTGGCCTCATGTCGTGACCACCGCTCCTAAGATCGATGTCACCAAGATTACGCCCG -AGAACTCTAGCCTAAGCGATCTGGACGGCCAGACTCGCGCCATGGTCGAGAAGATGATGT -ATGATCAGCGACAGAAGGAGATGGGTGGTCCCAGTAGCGACGAGCAAAAGAAGATGGAAT -TGCTGAAGAAGTTCCAGGCTGAGCATCCCGGTAAGTGCAATATTGTGGTTGTCCTCTTCC -ATCTCTCGCTAGGCACTTTGCCAATTCCCAGGAACAAGTGCATTCTGTGGTTCTTATATG -TTGCTAACCATGATCTCGCAGAGATGGATTTCTCGAATGCAAAGATGGGCTAAATCACTA -TTAGTATTCAACGAAACCTAACTCTTCGTTCTCTATGCAAAAAAAAAGTATCAATATATG -AAGTATGAGTTAAACGCATGGAGAGCTTCGATGAATGATGAATTGCATGGCGTTTCTCGA -AATCAATTTCCAAAACTTGGTATGGGTATCCATTTTATATATTTCTGAAAGTAGATGTCT -CTTATTCAGTGCCGTGATCAAATTATTGCTTCTGGTCATTCTGCCAAGCGACAAATACGA -GAATTACCTAATCGGGGATAGTGATCGGGCGTTCGAGTCGATCTTCCCTCTCGCCTCTCT -CGTCCGTTACCACATGGCAGTTCCTCACTTAATCTGGACGCATGAAGTGATAGGGCTGGT -TGAAACAAGGCGCAGTTCCACATACCTGAGCCTCCGCCCATTGCTCTGTGCCAAGCCTGA -ATCTCAGCCGCAGAACTATATCGCATTCCAATATCTTGACACCGTGAGACACCCGAGCGT -TTTTCGTACAATCTTCAGCGCAAATCTGATTCAACAGACCATCGGGAAATCGCCTCGGGT -GTCAAATTCATACCCCAAAGAACAATGGACGATATGCAGCCTAAAGCCCAATCCCGGCCA -CACTTGTCTAGGCACGTGCCGGGACAGAATATGGCCTTTCGTATCCCGAACTACAAGTCC -AGAAATTCCCTAGGATGTGGGTCCGAACAGTCCGACCAGGTGTATCATTCCCGCCGGTCT -CATCGGAAATCCCGCGCCGGCTGTGTGAATTGTAAGCAACGAAGAATCAAGGTACTCCGG -ACGCCCCCATTATCATTCATCGGCTGATAATGATTGCTGAAGTGCGATGAAGCAAAACCC -CATTGTCTTCGATGCAAAAAACATGGTACCGAGTGTGACTACTCGAGCCAGCCAGCCCGA -CCTCACAAGGCCAATAACATCATTTCCAAATTCACCAAAACCAACCCTGAGCTATTATCG -ATCGATTCCCAAGCTTCTTCAATGTCTCTGATGATGGTCGCGGAAAAGCTGAAAGAATTA -CTACAGCCACCCCCTGCTACCGGTACCAACCTGCCGAGATTACCCACAGATGCCACAGCA -TCAGCCCGCACTATCGAAGCACTGCATCATTTCCACAAAGGCCCAGCCTTTGCAACCGAA -AGCCAAACGCCTCTTCGAATAGTAATGGGTAAAGTTGTTGAACTTGCCTTCGAAGTAAGC -CCATTCATAGTTTATCTAGGCCTCCTCCCTAAATCTGCCTCACTCATATCTCAATAAAAC -ACACTCCTTCGACCTACCTATTACAACCTCAGACTAACTCCCTACTAACAACAGACCCCC -TTTCTCATGCACGCCATAATAGCAGCTGCGACAAGCCACCTCTGTACTCTCCTCCCCGAC -AGCAGGGACTACCGCCTAGCAGAAGCCTACCACTGGCAACAAACAATCAACCAATACTCC -ACCGAAGTCTCCAAAACGATAAACTGCAAGAACATGGATAAGCTTTACTCCGCCTGTATG -ATGATCAGCATGCACTCCTTCCTGCAAAAAACCTTCAACCCGCGCACCTCCTTCGTCTTC -ACCACAGACCCAACAGCCCTAACCTGGCTCCGCCTCCAAGCCGGCCTGCGCTACCTACTT -GAGCGCACGCTGCCCTGGCTCCCGCAGAGTATGTGGTGGACGGTATTCATGGAATCGCGG -GATCCATCTCTGAACTTTGAAGATAAGCGTCCTGGTCGTGTGGGTCTTGATCCGGATTTG -GCGGATCTGTGTGGAATCAGCGATGAGACGACTATTGAGTCGAATCCGTGTTTGTGGCCT -TTGCGGATGCTCATGGGATTGTTGCCGTTTGAGAGGACGACCGGTAGCTTTCAAGTTTAT -AATACTTGGATGGGGAGGCTGGAGAATCCCTTTTATGAGTGTTTGCTTAGGAAGGAGCCG -CGTGCGCTAGTTCTGCTAGCTTGGTGGCTGGGGCTTATGTGTTATGTGGAGGAATGGTGG -GTTGAGATGAGGGTTCGCTCGGAGTGTACGGCTATCTGTATGTTTCTTGAGGATAGTTGT -GATCCGCTGGTTTTGAAGTTGTTGGAGTTTCCGGCGTCGTGCTGTGGGTATTTGCTCAAG -CATGAGCAAGCGAGGGTCCTTGACTTGGATTGATGTTGGTTTGGGCCGAGTTGGTTTCAA -TCTTGGATTTGTTGCATTGATACCCTTTTTTTGTTGGCTGATTGCCAACCTAGGTACATA -GCCAGGTGGTTTTGTCGAGAATTAGTAGCGATGAAGCTAGGTGTGGAGTAATTGATTCAG -TGAAATGTATCAATGCATTCGTTTATCATAATTGTTACATTATGTATATCTGCCAGGATG -CGAGCGAGCACTCAGGTAATCGATGATCTCGATATGGTGTCCTCCTGCATCAATGACAGT -CGAGTATGTGCAAGTCAATATTAAGTATCCAAAGCTATCCCAGAAGAGACCCCTAGAAGA -AATCCCCTTCTATACCAAGTCCCAGTCATGTCATAAACATTCGATCAGTTTCCGTAAATG -ATCTCGCCGCGTAATGTGCaataaaataagagaaagcacaaaaaaaaaaaagaagaaaaa -aGCACAGCAATGCAGTAATGGGGCAAAAAGAAACGTAAATCCACAATAGACGATTCCAGA -TCGCCCAGACCAAAAGATCCATGGTGGCAAAAAATTAAACAGGTAGCTTATAAGCAAAGG -GCAGCAATAATTCTTTCCGCAATGATTAAACGAGGCTGTATCACCGGCTCTCCCAAGAGG -CCCGACGGTGACGACGAATGGACCGGTCATCTTCCTCAGAGTATGTAGGTCCAGTTCGCG -AGCCGTTCATGCGGGCAGCGGGAGGCGGATTAATGATGACCGCCATAGCGGAAGATATAG -ACATGGACCGCGAGCGACACAACGCTGCTGAGTGACGGGGGGAGTACGGCGGCGGTGGAG -TATCCCTCGTTGCGGTTGAGGTATTGCCATAACTGCTTGGGAAGGAAATGGCTGATGAAC -TATCGGAGGTTACATCTTCGTCGTAATGATTGGCGTTCTTTTCATCATGGGAATAGGAAT -TCTGAGACTCGGAAGAAATAGGTGGATCACGCATATGGGCTTCTAGAGTTTTCTCGTGGA -GGCTCATTGGACGGGCTGTGTACTGCGGGAGAGGGACCACGCTGGAATAGCCCCCATCCT -CCGAGTCAGAGGGCGGAGGTCCGTGCCCATGGCCCCCGGTTGGAGATGCCGAGCGAAAGA -ATTCTCCTTGCGCAGAAAGCGTATAGCTTCCTGTCTGGATATTGTTGGGGTTGGGTCTTT -GGGTCATCATTTCTGGGGATGAATGGGACTGAGAGGGCTTCTCGTCTTGGGCGAAACACC -CGCAAAGTGCCGAGAGGAAAGACATGGCTCAATGAGGTGTCTGCTTTTTTAATTGACAGC -AGAAAAGCCAATAGTGGTTTGACTAAAGCGAGAGAATTTGGCCCGTGGGCAGTGCAAGAA -AAAGCTCTGGACAAGAACTATCGTATGAGCACCTCGGGTGTTGAGCCTCAAACGCTATAG -TGGGCTGAACCCCACTTGCGTCGATAAACCCCACAACCGATAAACAGGGAGAATTGGAAT -CAAGTATGAATCTATACGGAGTTGAAATATATGTTATAGAGAAAGAAAAGAAGAAAATGA -GGGGTGAAAGAAAGGCCAGATGAGAGCTGACCCGGGCAAGGCACCCGCAGGCATCCAGAT -TTCCCAAAAGTCTAATTGGCCCGGATAAGATACTGTAAATGCCTGAGGCACACACCCACT -CATCGTTGTCAATGAATGTAAGTACTAGCTCATATGCTAGAAATGGTTCGTGGATATCCC -CAAGAGCCTGAATCATGAGATTTTAGAAGATAGAAAATGTCCTCGAGTGCCATAGAGCAA -TCATTGAACAGTACGCTTTTACACCCAGAGCATCCTCGGCATCACAATTTGCGCCAGTGG -CAATCTACCTTTTTGATCATCCCAAGCACCAAAGGTATAGGATCAAACAGGCTCAATCAA -AGAGGGCTGGGTTTTGCCGGGCTAGCATTGTGGTAAACGAGGTGATACGTATACTCCAAA -TATTTCCGAGACACCATTGAGCGTACACAAGCTCCGAATCAATTTTGCCCAAGAAGCTCA -TCAGAATCAATGAGATTCATTGTATGGAGTAGTTCATCAAAAGTTGAAAGACAAATATAT -TGGATCAGGATATTTGCTTGATTAAAGTTCGATGAGCCTCATACTAGAGTCCCACATGAT -TTTCTGCATTTTTCCCTCATACGCCCTAAGGCAAGCTGAGTAATTGGCTAGAATCCATAA -TGAGAGTTAGAATGATCTCTCATGAAAATGGCCTTCCTATTGATCCCTCAGTAGAGCATC -TTGTGCCCACATGGATAGAAAAACCGATAAGTGATAGTCGATAAGATCGGAAAAGATAAA -TTAGGCGGTGTATAAAACATCATCCCTAATTGGTTGAAGGGTCAAGTGGGCCGAGGGGCA -AGAAAGTATATGGGCAGAATACAGTCATGTGACTGAAACACCCCACCAATCGCGTCGCCG -CCTGTATTTTTTTTTGTTCTCCCCGCCTTCTCCTCGACGATCACTCACTCGAGCGATAAT -AATTTCGTTTCATACCTAATTCCAAATCTCGAGTCTTTCAGAACTACCTCATCATGACCG -TCGAGCCAACTATCGAGGAAGTGACACAGAATGTCGAGCCTCCTCAGGTCGCCCCTCATC -GTTCCCACGATCCTGCAAACAACATGAAACGGACCGATCCCTTTCAATTCGGCTCACGAT -ACCTTGAAAAGGGTGATGATGTTTTTGAGTTCAATGCCTGGGACCACGTCGAACCAGATG -ACGAATTTAAGGCCTTCGCCGAGACCCAATATGCGAGACAGCGCGAAACACGCGTGTCGG -ACTTTGATCGCAACCGATTCAACAACGACCCCGCCAAGTGGTGGGATCTCTTCTATAAGA -ACAACACATCCAACTTCTTCAAGGACCGCAAGTGGCTGCGCCAAGAATTCCCAATCCTCG -CTGAAGTGACACAGAAGGATGCCGGTCCCCAGGTCGTCTTGGAGGTTGGCGCAGGAGCCG -GAAACACCGCCTTCCCATTGCTGGCTAACAATGAAAATGAGCACCTGAAGGTCCATGCCT -GTGATTTTTCCAAGTATGCCGTGAAGGTCATGCGCGAGAGCGAACTCTACAACGAGAAAT -ACATGTCCGCTGATGTTTGGGACGCATCTGCTGTGCCTAACGAGAACGGCGACTCTCTTC -CGCCTGGCCTGACGGAAGGCTCTGTCGACGTTGTGATTCTAATCTTCATCTTCTCTGCCT -TGGCCCCTAATCAATGGGACCATGCAATCCGCAATATCTATCGTCTCTTGAAGCCCGGTG -GCCGTGTGTTGTTCCGCGATTACGGCCGTGGCGACCTCGCTCAAGTGCGATTCAAGAAGG -GTCGATACATGGCCGAGAATTTCTATATTCGAGGTGACGGCACACGCGTTTACTTCTTCG -AGAAGGACCAGCTCGTCGACATGTGGAGTACCTGGAGCGCGGAAAACGGCTTGCAGATTC -CCATTGGTGATGAGAATTCCACCGAAGAGGCGTCCGAGAAAAAGACAGATGAGGCCCAGC -CCACAGAGGAGCCGTCTGCAGAGGCCAAACAACTGGCAAAGGATAATGGAGCATTCGAGG -TCCTTAAAATGGGAGCCGACCGGCGGTTGATTGTCAACCGTGGGACCAAACAAAAGATGT -ACCGCTGCTGGATGCAGGGCAACTTCCGGAAACGCGGTGGACCTGAGACTGAGGTTGAGG -TTGCCACCGAGAACACCCAATAAAGTCCTGAGCCACCATTTCTACAATGGTTATTTCGGA -TGGCCATAAAGCTTCACCAAAGTCTCAGCCATAAAACGGACAATTGATTGTTCATAACAG -TTATGGTTGTGATAATCTTTTCCCATCGTCCAACCTTGACATCGCGATCTTCAATATGCA -TCGACTCAAATCGTGATTTTGGGTGCATCACAAAGGCGCATTACAACGAGATACCATGCT -GTTTGGGCATCTCCAAAGGGGAAAAACTAGAGTGGCTAGCACCTTTCGAGCGATGAGGGA -ATCGACTTGGATTGCTGTTTATATAGGGCAATGCCTTCATGATGGCGTTACGAATGTACG -ACACACATTAGATCATGTCATTAATCTGCACATACAGTCAAGGAACAATACCTACCATGT -AGAAAACCATCGCAGGATACTGATGTAGAAGCTCAGCTCTTCTGTTCACCTAAACCATAT -ACCACAAGGATGCCATATCCATGGATAATTCTTTCAAATTATTCATAGAAATCGGGAGCA -AAATCAAGCACATTGATGTGCAACTGATGGTATGAGGCCCGGGGAACTTGGATAATTAAT -GAAAGCGAAATTATAGAATAATTGTGGAAACCGAAGTCTTCATTTCATCCAAATATTGAC -AAGGTTGGTCTGCAAGAAATAGATTCCTGAAGTGCAGCAAACGTGTCAAAAGTAAATACC -GAATAACCGAATTTTTTGTCCACACCCTACGCTGGATCTAGACAGATAGACTTGCGCTAA -TAAGGGGAACAGAGAGACACTGGAGTTATGTACAAGCCAATGCTCTAAGAATCGAAAAAG -AAACAGAAATCAGGACGAACTCCATCCATGCAGGCAAAATGGGCATCAACGGAGACAAGG -GTCCCGTAGAAAGAGATAAAAAATTTCGTTGCAAGGGACCATAGCAAGTCGTGAGCTGCA -GCAAGCTGCAGGCCTAATCGTCTAATCGCTGTATGTTGAAAAGCGAAACTATGTACCACA -TGAGATTTGTTCGTGAGCAAACGCAAAGAGCAAACAACAATGCAGAGAGATTTATGGATG -TATGTTGGAAGGTATTTTCACAGTGTCAGATTCGGACACTCGCCTTGGGCGCGTTGGAAG -GCTCCTGCGAACAAGTCCTGCATGGCGTGGTAGTGGAAAAAGATACCGCCCAAGACAGCG -ACGTGCCAGATGTTGTGGCTGCCACCAACGTAATCAAAAAGACCTGGCTTCCAGCGCTCA -GGAACCTTGGAGGCGTAGACACAGGCACCGACAAAATAGACCAAGACGCTCTTCACGACC -GGTGCATAGAAATACATGGTCCATGAGAGGCCACGTGTGTAAGAAAGCTGAGCGATGGGC -GAGAAACCAGTGAGCGCAAGGGTCACATAGAAGCCCACACGGGCCCAAGCCATATCGTGG -CGGTTGAAGGTTGGGTGCCACGGTAGAATGATGCCACCGATGCCAAATGACATGGTAAGC -AGGATATATGTCCAGCGGGAGAAGGGCTCGCAATAGAAAGCAGTGTACTCAGTCGTCACA -ATCGAGGCAGCAACCAGCATGGAGATTCCAGTGTAGTCCACACAAGCGAACCGTTCCATC -AATGACTGCGAAGCAATACTGTTCATAGTATGCCAAATACAGCTGCAGACTAGACATTTG -CATGCAGCAAAGAAGAAAACGGCAGCGATTGTGATGTCGGACTTGGAGCTCAGATGGAAA -TTGGTGTGAAGCGGGTAGAAATAGAAGGCGATGGCTAGGACGATGACGAGGCCAATCACA -TGAGACCAGATGTTGAACATTTCGTTCGAATAAGTGAAGACCGACGAGAGACATTCCACC -TTGGAGGTTGTAAATCGGTAGCCATTGATGATGTGTGGGTTAACGCGCCATGGATGAGGA -AGTTCATCGTAGTTGATTAACCGCTTCTCCTGGGCTAGCCGGATAGCCTCTTCGACGGTG -TCTCGGAGTGTATCACGAGTCCGGCGGGCACGCTCCATACCCTCGTCGACAATTTCCCGG -GCATGGACCAAGCCCGAATCCATCGCTTTCCACCCGTCATCGATAGTTCCGTAGATACCG -CGATCGCGAACCGCATGTACTTTGTTCTCGAGTTCGACCAAGAATGACTCCATTAACCGC -ACACCGGCTTGCGCCTTTTGCTCCAGAGTCTCCTTTGTCACTAGAGCTTCATTGTATCCA -TCCTCGAGGGTTTCAACCAGTATCTTGGCTCGCTTTTTGCCACTTCCCATCAACTCCCCC -GAGGCGAGGGAACATTGATCTCTCACGGCCTCCAGCGTAGCATAGCCTCGTTTCAGACTA -GCATCGACCTGCATCATATGGGATTTCCGATATTGCTCGATCCACTGCATCCGACGTTCC -AGTTCGGCCAGGAAGAGTTCAACCTAGAACTTAATTAGACACACCATTACCCGGAGATGT -TTTCCACCAATAGGGCATGTACATACCCTGATAAAAACCGCATCAGCATCTTGATAGTCG -CACGAGAGTTTTCTAGAAGAATGGAACGAATGTCTGCGCCGATCACGAAGAATACCCGCA -TCTTCGTCCGGGGACGCAATAGCAGACGTGAATGCCTCGGGATAAGTATCCACCGCCATT -GTAGCAGCGCAAGCCATGGGGTCTATTTGAGAGTAAACCCCTTAATGAAATGCCATATAG -TGTGAAGGTGTCAAAAGTAGAACAACTTGAACAGTCTTGGAATGGAAATTAAAGGAATGA -GGGGCACATTTAAGGAGAGGGAATGGACGGGTGAAATTCGGGCTGGGGGAAAAAAGGGCG -CCCCAACCACACAAAATCCCCAACTTTTGAGCCAATCCATTCCCGCCTCAACCGTCTCGT -TTCCGTTTGATCTTTTGCCACCCTTAATCATCGGATTTAGATCTTTTGAGATCTTTGAGA -TCTATTGGATCATTTACATTATGGCCATCAGTGATATTGTCGCCGACGAGTCGCTTTTAC -CAGTGCTTCAGACTAGCGCTGAGACTCTGGTCCAGTGTCAGCAACTCCTCACCATATTAA -ATCCCGATACCCTACCCAACGATGGCGCCAAGCTCCGAGAATTATCCCTAGCGGCATCCA -AGCAACAGAAAATACTATTTGCACTATTGGCCCAACTTCGTGGTCAGAATCGGGACGCCA -TATTTCGCGTGCGTGACACCAAGCAATCGACTGCAGAGGCTCGACAAGAAATCGACCGGT -TACATCTTCAGCTTCAGAACTTGTACTATGAACAGAAGCACCTGACAGGTGAAATTGCGG -CATGTGAGGCCTATGAGTATGTGACTCTGATTTCCTACCCATGACGCTCTCATTTTCTCT -TGGCAAAGAACTGACATATCCACTGTAACAGTCACAAGTACCTATCACTCCCTTTGATTC -CGGTTGAAGAATTCTTGGAGCTTCACCCCGAACATCGCGAGTCCAGCGAACATGACCTCA -TGATCGCCCGCATCGAACATGAGCATGCTGAGCGAGAGAAGCTGGAACAAGCGCGGCAAG -AGCTTTTGAAGCGCAAGCAAGGCTTAATCGCAGAGAACAAGAAAAGGAAGAACGACCTTG -CGAATCTCGACCAAGATCTTGAGAAGTTCATTGATGTATGTTTTTCCGCTTGTTCAGGTC -TACTATTGACGTGGTTGCTAATCAGTTTCATGATGGATAGGCTGCTAAGCCGATTCAAAG -GATCTTCGAGAAGGAATACTAGACATTGCATGTGGAGGTACAGCGGCGAGGCCTGGCCTT -TTGACTGCTACATCGGATCGGCGTTACTGGTCAATATACCCTTGTTTTTTTATGTGATTT -CAAAGTATGGGGTACTACTAGTCGATAATGACATACTAGGCCTCTTATGGTCTTTCTTTC -CAAAAAAAGCCGAATCGAATGCAAAGTCCGGAGCTGTAGGGTACGAATAAAGCATTATCA -AAACAATTAAATAAGAAAATATGAAATTACAAGGTCAGATAAAAAATACGTGTCTATTCC -AATGCTGCGTATAGTTGAACTTGTGTCGCTCCAATGCGGCCTAGGTGGTCCCTGGCTTCT -CCGCCCGGCTATCAACTTCCCCAAAGTCAACAATCAACTTCATAAACACCCCCTTCTTTG -ATGCTCTGATTTAGAGCACAAAGCATGTCCGGACGATGACGATCAAATCAGGAGACCAGG -AGTCTCCTCAGGCTTCGGCCTCTCAAGTCGACCAACCCAGCGTCCACCGCGCTCCACCCC -CAGAGACCCCGGCGGCCCTCCAAACGCGATTTCGAGTGATAGCAGCCTTTTGGGCGGTAA -TTGTATTCCTTGGATTCCCGATATGGTGGAAAACAACATCAATCTACCGGGCGTCTCTCC -CCATCGAGGAGATGGTGGATTGGGCTGATGGCAAGGTACGGCTTGTGGAATCGCTTTTGG -CTTGGGGGACTTGACAGATGCTAACAATGGTCTTGACAGGCGTGCCGACCTGTTTTCCCC -TTGGAAATCCATCTTGCGACACCTTCAATGCAATATTTCGAGGCCCAGCATCTCCTCCGC -ACGACGCAACATACTCTCGATGATCTCAACGAGTTCGCAGCCCATCACCTCCGACTTAAG -TTGACCAACAGTAGTACCACGGCAGTTGATAATAGCGAAGAAACCCCCGAAAATGTCCAG -TACCCTGCAGAAGAGAGAGCGGATACTGCATTAACCGTCCGATTACTGCCCCGGGATGGC -CTAGCAGGCCCTTCGTCAGAGCTCCATGCAGCTGCGACGCAGCTCGATATCTTCTACCCG -CCGAGCCAGGTCCCCGTACCATCCTCCTCCAACTCCCCCCTCTCTGCTTTCATTGCGGCG -GAACTGCAGCGATTGTATGGCGAGGAGAAGGCTACTATAGCGCACATATTGTCGGGAAAT -ACCGCCGGGCTTGACTCGACCTCACCACAACTTGCCGAAGATATCGACCGAAGACTGCGC -CGATCGATGAAATATGCTGAAACCTACCACCTCTCTTTTTCACTTTTCACCCCCAAATTT -GAACCTTCCTCGTGGGACATCGAGGCCGCCGTGAAGGAATATGTCTCGCCCCTCCTGCAG -GCATTATCTCCTATCAGCAATTTTACTGTCGACACGCAAGTACAGCTCTATGCAAACTTT -GCGCCCACTGCGCCGAAGCCTGAGTATGATGAAACCGAGGCGGCATGGACTCTGAAGAAA -GAAGATCTAAGTGCCTTCGTTAATGCTGCCGAGTGGCCGCTGAACCCGAGCATTGGCAAT -GGCCCGACGATCAATTTTATTCTCTATGTCCCAGACCCCTCGCAGTCTCCACTCATTGTC -AAGGAAAACCGTGCTTCTAGCTGGATGGTTCCCCAATGGGGCGGGGTTTTCCTCCTTAAC -CCGCCGCTTTCCAATACCGAGCAAGGTGGCCTGTCCAATCCTTCCCATCTCTCCCAGGAC -TCTTTAGCTCCTGCATTCATGACATTCTCTCACCAACTCCTCACACTTCTCGGCACTCCA -AGCACTCCTGCCTCTCTTCCACTCCGACTTCAGACATCTATCCGCATTCGCGCGGCCACT -TTGCTGCTCTCCGCCTCATCAACAATGGGCTCGCTTGCCCGTCTTACTGAGTCTTTACCT -TCCATCCCTATCCCGGCAACGGTGGCTACTTCGGTCTCTACCACACTCTCTCATTTGGCT -GCCTCCTGTGCCCACTTGCGAGAAGGGCGTTTCGGCGCTGCGCTTGCCAGCGCCCGAGTC -GCGGAGACAGAAGCTGAGCGCAGTTTCTTTGAAAAGAGTATGGTGGGCCAGGTCTATTTC -CCAGATGAGCATAAGGTGGCGGTTTATCTGCCTCTTCTGGGCCCAATTGGTGTCCCTCTG -GTCGTTGGGTTGCTCAAAGAGCTCAAGAGGATTGCCGCTCGACGCAAGGCGAAGGGGACA -TCATCGTAGAGTAATATAAACTCTCGACATTAATGTATTCTAGGAGTCATATACACCCAT -TGAAACTATTCTTCCTTTTCATGATCTATTTATGGGGGTCTCGAAGATAGGACTCTATCT -ACAGGCATATATATCCAGTAAATTGATCTTGTGGATAAAACAGCACACAATATTTTAATT -AAGTGGAGAATTAATCAGGTATCTAGGTAGGTGAGACTAAATTCTCAAAACAAAGTGAAA -AACAATGCAAATATGGATATCATGGTACACGATCAATATGCCAGAGCAACCAAATGCCTA -AACCAAGGCGACTGAACGCACAACTCAACCCAACCTGGCGCAACCCTCACATCCCTCAAC -TATCAACGCCCAGGATCCCGGAACGGATTTTCCCGAGAGTCATCATGGGTCGGATAAGGT -GCATTCCCCTTACCCTCATGAGGATTATCAACAACCCGCATCAGACCGCTAAAGGTAGTA -TCCGGCCTAGCAGCAGTACCAGGGTACGGCTCAGGCCCCAAAAAAGTACGAGACTGCGAA -AACGACGACTCAGGCGGTGAATAAATTTTAATACTTTCCGTGCTTGGCTGACGCTGCCGC -GGCGGAGTAAATGGAGCCGCTGGCATAGGAATTGGCACAGGGATAGGTGGCACATCCTTG -TCTAGTCGGGGGTTTCCTCTTGGATAACCGGAGAATAGACTCCGCACTCTGCTTCCAGTA -CGGTGGATCATCGTGCGGGTGGAGCGTCCTCCTGTAGAAGAGCGCTTGACTCGCGGTGGG -CTTCGGAGGAAGTCTGTGCGGTATGAGGTATCGTCTTGGGGTATGGGGTTCGATATGCAT -AATATTGCCCCGGAAGAAGAGCGTTGGGACCAATTTGGCCCCGTTTTGGCTTCTTGTATT -TCAGATTCGTCTTTTCGGGTACGTCGGAGACAGAGAGATATTAAGAGGGCGGCGACAGCA -CCGAGGACGGCACCTGGGAAGAAGCCTGCTGCGATAGCTCCAGGTGGGAAGGAGGGACAT -GAATTTGATAGTGCTGCGCATGTTGTGGAGGTTGCGGACTTTTCGGATGAGGGAGACGGC -ACTGGTGTGATAGTTGCCTGGGGAGCTATTGAGCTTGTAGCCGTTGTCGATGACGATGCT -GGGTTGGTCGAGGAGCTGGATAGAGATGATGTGGTGATTGTTGCGGATAGTTCTTTGTTT -GTAGAGGAGGCGGATATGATCGCTGTTGCCGTTGTGGAGGTTTTTTTGTTCAAAGAGCAA -GTGTTGCCTCCTTCGCAGGTGTAGCCAAACGGACAGCACGCATCGCCGCATTTCGGAAGG -CTCTCGCCCAGACGGGTTGTCTTTATGACGCTCCCAGGGTAATGGGTGACATTCTGCTGT -TGTATATCACATGTAATTGGCGAAATGTAGTTACAGTCTTTACCGGCGGGACAGCAGATA -GAGGAGCTGCCCTGGTCTAGACTGATGCAGGTGGAGTCTGAAGGACAGCAGAAAGTATTT -GAGGCATTCGTACTGCCGCAGCGAGTGTATGAGGTGGGACACGCATCCGAGCTTCTTGAG -ATGAGATCAAAAGAGTTAGATGCTTGTCCTGGTCGGATGCTGGCCAGAAGAGTCAAGAGC -AGCAGAACGGATGGCCGTCCCACTGTTGACCTCGTCATGCTGGTCGTGTGTGTAGGCCGA -GAGACAGGGAGAAAAGTCTCTGGCTATCAGATCCGGAAGAGCTTCGACGAAGGATATAGA -CTGATAGGTCCCTTGACAGTACTCTCACGTATAACGTGCAACTCTAGTCGTAAGTAGAAA -GACCAACACTGGCGGGAAACTTGACAGAGGTAAATTGAGAATGTCTAACAAATAGCTGAC -AGACAAGGGCATTAATTGATGCCTGAAACAAAAAATCAATCAAAGAACCAGGATAAGTCA -TTTCTTGAAAGGAACTTGGGCACTGCCATCCTCTAGGTCTTGTAGACAAAGAGTACTTGC -ACGCGCCGGGGCCCTAAAATAGCCATTGGGCTGTTTCAGAGTGTGCAACATGATAAAAAT -GGCAACACGTTGGGCGCTAAATTATAAATATTTGTTACAGAACCACATCAGACCAGGGGA -TTGCAGAACGAGGATTGGATGAAGGCCAAAGGGTGCAGAAGAGTACAAGGCGCTTAGAGA -CGAAGTAGATTTGGACGATTTTATTGGCTGCTGGGCCAGGGGTCTTACATAACCACACTA -GTCCCGATCGGGCAGAACGTCGGGAGGGAAATTCTTCGACGTCGAAGCTCTGCACGATTA -ACCCGACCATTCACAGAGCCGCTCGTCATGTTCAGCCGATTCGCCAGGCCGCAAAGCGCT -TTGCGCGCAGCCTCCTCTTTCTCCCCCGTGAGAATCACTTCCTCCGTCGCCAATCCGTCT -TGACTGACCGCGTTTTGTCTAGAAACCTACCTCCGCCTTCTCACGCTTCCAGACCCGGGC -TATCCATCGCATTCCCCAATTGCAGCACGACGCCTACTATAAGGAGAATGGTGTCCCTGA -GTTCATGTCGCCCGAGGCATTCGACTTCGCTTGGACCCAATACCAGAGTCTCCTCGTTAA -CAAACTCAATCTGTTGACACAAGGTGAGCCCGGCACTCCGACCCTGTGCAACAGTACATT -GGTTCACAATGCGGTCCCAATCAATTACTTGAGCGGCATGGCACATTAGAAATCGTGTCT -GACATTCAAATGAAACAGATACCGTCGACGCAGATGCCAAACCTGGAGAATTGCTGGTCA -AATACTCTAAGCGTCCGGAGATGGCCTCGGTGTTCAACTATGCCTCAATGGCCCACAACA -ACCACTTTTTCTTCAACTGCTTGGTTAGCAACGACACCGATCCCTTTGTATACGATATTC -AATCCTAACCAGCCTGCTCAATATAGTCCCCCGCAGCCACCACTATTCCCGAAAAGTTCG -CCAAGGACATCGTCGACACCTGCTCCTCCGTCGAGTCCCTGAAGATGGATTTCCTGGCCA -CAGCCAGCTCAATGTTCGGCCCCGGATTCGTGTGGCTTGCTAAGAACCTCGAGCGCGAAG -GCATGATGCACATTTTCTGCACCTATAGCGCTGGCTCTCCCTACCCTGCCGCCCACTCCC -GGCGGCAACCCGTCGACATGTCCACCCACACCCCGGAGACCCAGCTGGGCAACCAGTTCG -CCGGCTCGATGGGCGCTCACGCCCAGAACCAGAAGAGCCTCGCTCCTGGCGCCGTCGATA -TCCAGCCCATTCTCTGTGTTAACACCTGGGAACACGCGTGGATGATGGACTACGGTATTG -CCGGCAAGGACGAGTACCTGGAGCGCTGGTGGGATCGGATTAACTGGGATGTGGTTTTCG -ACAACTACAATGCTGTCGGCTCGATGAAGAGCTCGCGCAACTCCCTCAACCGTCATCGGA -GTATGAGCATGCTTTAAGTTGAAAATCATTTCTGTGTCTGGATAATCTGTATTATATACT -ATCATGTTTTATTTTTTTTTGGTCTCCATTGCTCATTCTCACAGGCTTCTGATTGAGCAT -GTACAGCGATCCATCCAATAGAATCAAATTGGCGATGCTCTATATTTCCATTCTGCTGAC -AAGAGTTGAAGAGAAGCAAGTCCAAGAAGTAGTCTTATGCAACGTCAGGAATGCAATCTA -TGCCCAAATTCCAAGTACTCCACTAATATACAATCCAACCTACAATTGACACCATCACTG -CCCTTTACCCTCAACCTCCCGTATCCGCTTCTCAAACGCCTCAATCATAACCCCAGCCAT -CTGCCCCTCAACAGCACTCATAAGAGTAGCATGCATCTGCGAGCGAAACTCAAACCGCAC -CTCCAAATTAACCTTAGTCAACGGCCTACCCGTGGTCCCGGACGGAAGCGGCACCAGCTC -CCACTTCGTAGTCAGGTACTCGAAAAGGCCCTCATTCGCACCGGGGAAGAACCCGCTTAG -CCCGGACGAGGAGCTCTTACTAGCCTGTCCATCCTGTCCCTCTTTACCATATTTGGCCCC -GCTATTAGCCTCCACGGTCAGATTCTCCACATCGCAGATCACGCGCGACGTGAACGTCTC -ACTGAGCGGGCCGTAGCCGACTGTCAGGAACGCTTGTGTCGGGTACCCTGTTTCTGGATC -GCGTGCTGTCACGGTAGACGCAGTGAGGAAGGGGAGGAATTGGCTGTAGGCTTCGACCGA -GGAGATCACTTTGTATAGCAGGGTTGGGTGGTAGGGGAGGTTGCGGGTGGCCGTTAGGAC -GCGGTGCGGGGAgttgttgttattgttgttgttgGGGAGGAAGGAGGATAGGTCAGGCAG -GCCGAAGGCTCGTGTGGTTGTTTTGGTGACATTGTTTGGAAGCAGAGGTCGGCACAGACT -TGGTCGTTGTGTGGTGTTTATTGTCCTCAGGTTTGTCTTGTGGTGGGAGACTGGGCACAC -ATTCTGGCGCTGGGAGGGTAGAGTTGCGCGACTTGGAAGTCGCAAGGTGCGGAAGGGTCG -CATTGAGAAGGATATAAGAAAGATATACACAAATTGGTTCTTTGAGTGTAGTGGTTCCTG -GTAGACAAGAAAATCGGTCTGGTTCCGCGGAGCGGGATCTGCACCTCGGCAGGATTATTG -ACTTAGTGCCCGGTGCATACTCCATACTGTGGATTTGATTGATGGGTTTTGTACTCCGTT -TTTTTTTAAGATCAGAATAAACCAATAGATCTATGAAACGTGTGCAATGACAACATTTCA -ATGACTACCAAGGACTTGTACGACGGCATCTAGCCAAGGTTGGGTATTGCGGATTATGAG -TCGTCCATGCGTGTTGGATTTAGTATTGTTGACCAAAGCCTGTTGGAGCTTTGATTCCAT -CAGAAATGGCCCTCATAGACTTCAATACCCACAGTTTTATATCTGGGTCACTTATATTTG -AAGTTCATATTCATTGAATCTCAGCCTTCATGGCGACTACTTCTCCGGGCCCAATAATCT -GCACCTTGGTGGTGTCGCCCTCCTCAAGCACAATCCGATACTCTTCCCCGGCATATAATG -GGCTCGTTGCCCGATAGGTGATGCGCTCAGGAATATCCAAGGTAGAATCCATGGAATTCT -GGCGAGTGTCACGCCAAAGATCCAGAATAGAGATCAAATTCAACGGACCATGTACTACAA -TATCCCTGTGTCCCTCTACATCACGCGCCCAGGGTAGTGAGTAGTGAATCTTATGAGGGT -TGAAAGTCAGAGCCGAGAAGCGGAATAGGGTCACGGGCGTTTGCATCAGTGTTCGAGTAT -GGGTATTTCCCTCGGTAAAAATGACGGACGATGTAGGATTCGGGGTCGCAATATGGGATG -TCGAAGACGAGGGAGGGGCGCTGGATGGCGTTGATGCAGAGGGAACAGGTAGAGCCTTAC -GGAATACCCAATTGCTATATGAATGTTGTTAGTACTTTCGGTTTGTGATAATCTCGAGCA -AGGAAAGCCGGACCGTCGGTCGATAATTGACACGCCGTGCTCGTTGGCGAATTCTTTCTC -AATTCCAACAACAATCATATCCTCGCCGGTCTTTTTCACAACCTTGGGCTGCGCACTAAG -TACTCGCGTTGTCTCTAGCACCTCCTGTCCTACTCGTATTGGATTGGGGTTCCCATCCAA -ACGGGGCCACAATACCTCTCCGCCCGCCCACATGCGCCGTGTGAATGGTGCGTCTGGATT -GTACGATGCATCGGTTCCATCCGGCCCAAGTTCGGTTTCCATGAACGCAGGCGTGAAATA -CGCAAGGTGATAGCCTGGCGGCACGGGTACTGCGGATTGCGGGCCGGGAGCACCATTGGA -TAGGGAGGGGCCACCGGGGAAGAGGGTAGGGCGATTGAATGTGAGGGTGAGAAGATGGAG -CTGGTTTGCATCTATTGTTTGCGTACGGGTTTGTGGCCCCATGGATTGGAAGCGCGACAG -GAAGGCCTCGGCTACTGTTGAAGCATCTGAGTGGAGATGCTGAGAGATGGTGAAACTGCG -AATCGTGCTCGGTCGAACGTGCGCCCGTATACGCGGGCGAAGGTGTGTGAGTAAGGACAA -AGAAGACATGATAAATTTGGGAGCGTTGTCAAAACAGAACAaaagagaaagtagaaagca -gaaagggtaaaaggaaaagCAGTGGGGATCTCCAAATACATGCGGGGAAGCCGATTTGCC -TCCAAGTCGGCTTTATACTCCTGGACCTTATCGACCTAGAATGCCATTGGTCTCTCATAG -TGCTATAGAGTGAACAGTATTTGGAGTATAGGCATCCTGATCTTCAATGTACATTTGACA -GTCACGAATCGGACCAGTAGATGAAAAGCCCTAAGTGGTCCTCAACAGCCCCCGGAGCAA -AGCTCTAATGCGGTGTGATAGAGGCGCAGGTGCCCCTGCTTAATAATTAGCCGACATCTA -TCTTCGGCCTAGCATCACGATGTGTCCTCCGTACCTGTGCAGGGATGTGTAAGGATCTTC -TTGTTTCCAAGATTCCATGATTATAAACATGATTAGCAAGCTCAGATAAGGTTAAAGAAT -ATAAAGTAGACACCCGACTTATTCTTAGACGTTCATTTTTActaccaacacacatagcac -cgaccatctacaatcaccatttaccatGTCTGACATTGAAATCATCTGGGGCGGGGCCTC -GCTCATGGACGAATTCGCATATCCGAATCTTGAATCCATCAATGAGGTCTTCGACATTCT -ACAGGCCAATGGCATCAAACACATCGACACGGCCAAATTCTATAACAACTCCGAGGTGTT -ATTGGGGAAACTTCACGCCCATTCCCGTTTTACCATTGATTCAAAATATCCCGGCGGATT -TGGATCTGAGCCTTCCACCCCGGAGTCCTTCACTGAAACCTTAAACCAGAGTTTGGCCCG -TCTCCAAACCGATGAGGTGAGCTTCTCATGTTTCCAACCCATACCCCAATCTCTAATACC -GATTTCCAGCTCGATATCTACTACATGCATGCCCCCGAGCGCAGAAGCTCAATGGAAGAC -CTAATGGTTGGTATCGACGCTGCACACAAGGCGGGAAAGTTCAAGCGATTCGGTCTGTCC -AACTATCTGGCCGAGGAGGTCGAAGAGGTAGTCCGCATCTGTCGTGAGAAGAATTACGTT -ATGCCAAGTGTCTACCAGGGCAACTACTCTGCCGTTGCGCGCCGACCAGAGAAAGAGATT -CTTCCCACGCTGCGGAAGCACAATATCGCCTTCTACGCCTATTCACCAATCGCCGGAGGG -TTCCTGACAAAGGATGTCGCGACGCTGGTTGCCGGCGGTGAGGGTCGTTGGGACCCTAAG -ACACCTCTTGGTGGAGTCTACAATGCGCTTTTCAATAAGCCCCATATGCTGGAGGGCTTG -GGAGAATGGGAGAAGATCTCCAAAGGGTCTGGTATTCCCAAGGCTGAGCTGGCATATCGG -TGGGTGATGCACAGCTCGGCCCTCAAGGCAGAACTTGGAGACGCTTTGATTATTGGCTCC -CGCAATGTCGAGCAACTCAATCAGACGCTTGCTGTATTGAACAAGGGACCTCTCAGCGCG -GAAATTACAGCGCAGATTGATCAGGTTTGGGATATTGTTGAGGTCGATTCGGTTTTGGAT -ACCTTCAACGGTTTCTCCAAGTATGACAAGGTCTGAGGCCTAAAGAGTACCGGAAGGAAT -GCATTTCCAATGATTTCGCATTTACAAAAACCAGAGCTCATAGAAATAGAACAGTGCAAA -GCTTGAAAGAAGTCAGGTTATCGGAAGTGGGGGTCATGGTGGAGATGAGTCAAAATGTAT -ATCAACAATATCCCAAAGGGGAAATCATACTTTGGATTGTCTATACCTAAGTATGTGTGT -AATCTCCAGTGAAAATATGTAAATTATGAAATTGAGGGCCATTGAGGGTATCGTAAGGTA -AACGAGGGAAGGGGCTATGGTAGAGAACTGAAAAGAAAGTAAAGATATAATAGGGTAAGT -CAAACACAATAATCCGAGATGAGTCCAAGAAAAGCAGATCAAAGTCGGACCTGACCACCA -GTCAACCGTTCCCAGTCCTTCTGTCCAAAGGGGACATAGGTATTACCCTGGAGCCAGTAA -ATTCGGTCCTTCTTCGCCACAAGAGCCGGATCCACCCCCTCATTGCGAAGAACATGCTTC -TGTTGCTTGTTGTTACCGGTCGACTGCGTCACAGGCGTAACTCGCAAAAACATCGGTGCC -GCAAACCGGGGCAGATTCTGGAGAATGTGTGCAGCCAGAGTATCAAGCACCTCGCGCGAG -GGCTCTAGTGCGGAATCAGAGGTATTTCCGTTTGCCATCTGCTGGTTGAAAAGGATGGCT -GCACAGCCGGCACGACCATCGTGATTGGGAAGAGCAACGCCGTACACATTGGCTTCGTGG -ACGTCAGGATGCGAGCCCAGAACCTCGGAAACCTCGCTGGTGGATACGTTCTCGCTCTTC -CATCGGAAAGTATCACCCAGTCGATCGGAGAAGAACCAGCGGCCGTCTTTGTCCCATCGA -ACCATGTCACCAGTCCGGAAATAGGCATCGCCCTTCTTGATTACATCGCGGATAATCTTG -CTCTCTGTCGCTTTCTTGTTTCCAAAGTAGCCCTGGAAATTGGCAGTGGGATCAGCTGCG -TCGATGGCGAATAACAGCTCACCGGGATCACCCCGAGGGACAGATTTGCAGAGACCAGTC -TTGGGGTCACGCCACGGTTCCTCGGTGTCGTGATCAACATCCACAATGGCAGTGCTACGA -CCAAGGATAATATTTCCTAAAGCACCGTTTCGGCCGATGGCTCCCGCGGAGAAGTCGTTC -GACGAAATATTCCATGCGCCCGCAGTACCTTCGGTGGCGGCGTAGAACTCGGCGACTGTG -GGGATGCTGAAGCGCTCTTTGAAACGATTCCAGATGTCAGGACGCAATCCATTCCCGAAG -GCCAAACGGATATTGTGCTTCTTGTCGAGATCCTCGCCGGTGGCTGGGTCAATTTCAGGT -GTTACACCCAATAGATAACGTAGCGTCTCGCCCACATATTGGATAATTGTCGCATCGTTC -TCACGAGCCTCCTTGATGAAGTTGCGTGCAGAGAACCGGCGGCCAATGATCAAGGCGGAT -CCGCCCATTAAGCAAGTTATGAACCCAAGCACTGCAGCGGAACTATGGTAGAGAGGCATG -CACTAGTCAGGAGGTTAGTTTGGACAGTGACATTTGCAGATTTAGTAGGCCTTACCGTGA -AGAATCTGTCAGAGGAAGTAATACTTAACCAGTCTTTGACAAATAGACTACCAGACCAGC -ACTTCTTCCAACTGACAATAGCTGGTTTGGGAAGCCCAGTGGTGCCACTTGTGTAAATCA -AGATGGCCATATCACGAGGGATTAAGCCACTGCGAACCTTGTCATCCTCTCGCGTAGGCT -CCATTGCGAGAATCTGCGCTTCGACCTCCGGGGTATGAAATACAATTTCAACCGGTCCCT -TGCCATCACGGAAGCTCGGAGATGTCAAGATCTTCTCCTGCTCCGCCGGGAAGCAGTGAC -GAACTTCTTCATCAACCATCAAGAGCCTCGCACTAGACGTTCGAATAGAGTGCGTGAGCG -GCTTGCCAGACAAGTTGTAGTTGATAAACGCAGGGACTGCGCCAATACTCCACAGTCCCA -AAATCATGAAGATGAAGGCGGAAGAGTTCATGAAGTCGATCGCGACAATTTCCTTGCGCT -TGATACCGTGAACGTTCTTGAACCATGCTCCGTAGCGCAGAACCGTGATATAGCCTTCGT -GGAATGTCGTGGTTTGTCCATTATACACAATAAAATCGTTATCTGCAGTTCTAGGATCAA -GTGCATAGGCTTCGAGTGTGTAGAAGAGATTCAAGCGGTCACCACGTTCGGCGATGCGGG -ACTTCACGGACATCTTCAATAGCCCCTTTATAAGGTTCAAATCGTAGGAGACCGACCATC -TTGCGTTGAGATATGCAAGTGTGGTTGCCGCTGCAGGGAGTGCAAGACTCAAGGGAATAT -CTGCGCAAAGAGACAGCTTGTTAGTCATTCATCCAGTCCTGACGTACACCTTAACATACC -GGGCAAAGATGACAGCGGCATTATGACCAAAAAGGTTCAAGGATCAAGCCCAAATGAGTA -GGATCAAAAGAGTATGAAATTTGGGTAACAGATGAGATCGCCCCTTCTATCGCAGCTTTG -AATGTGGATTTGGGACAAAGAACGAAGAAGGTGTGATAGGGACTCCTTGGCTGATACCTC -GGTGCTAATAATCACTCAAAAGCAATGAAGGTCCCAGCGAGACCCGTGGGAGTGAAATGG -CAAAAGAGCGAAAGTGCCTCGGCACGGGTGTGCCGATCCCCTGACGACATGCAACATTGG -AATGTGTGCGACCCAAAACAAGAAATTGACGTACAGTTTCTCTGCCCAATGACTTGATAC -GTTTAAATGTTACATACATTCAAATCTAACAATAAACGATCTATTGTACACCTTCACATG -GGCTTATTTAGAGGGATCGTCCCCCCGCACTTTTTATAACCTCACTTGGTCTTAGCGTCT -GGATCTACATACCTATTTTGGCTATTTTCAGACGGGTTCGAGCCAAAATCAAACTTAACA -TATATCAATAAAATGGCATCTCACTTTTTCAAGGGCCTCTTCACTGGGACATCCTCATAC -TCATCTTCCATGAATTTGCCCAATGCATCCAAGCCCTGCTTCAGGACCTCGGTTTCATTG -ACATACCCAATGCGAACATAACCCTTGAAGTCTTCTCCAAGTCCAAAACAAAGGCTACCT -GGAACGAGCATCACACCAGTCTGTTCCTGAAGCATCTCGCAGAAAGCGACATCGTCTACG -GGTTTTCCCATCTTGTTAAAGCGCACAAATGCAGTGGTACCGGCTTGCGGTTTAATCCAC -TCACACGCCCAGCGGTGAGTATCGATGAACTTCTCCAGAATAGAGAGGTTAGTCCGAGCG -AGATCAAGATTTCGCTTAAGAAGATTGTGGATGCAATCCGGTGCTAGTGCAAAACTGGCA -ATTGCCTCGTCAATTTGCCCGACAGAGATAGTTGTATAATCGCGCGCATTAGCAAGAGAT -TCAATGATAGAGCTATCCCGCGACGCGACCCAGCCGACCCGCAAGCCTGCCAGGCTATAT -GCTTTGGAAACAGATCCTGTCACCACGGTGCGTTCGTAACCCATGGAAAGAAGTGTGGGT -GGGAATCTCGGGTCCGCGGGACTGATATTGTGGAAAAGCGGGCGATAGACCTCGTCGGAG -TGGATGATAATGGATTTCTCACGCGCAATATCGACCAATTCTTGGAGTGTCTGGCGGGGG -ATGATGGCTCCAGTTGGGTTTTGCGGGTTGCTATGAGCATGATGGTTAGAACAACACGTT -CGTTGTATGATTATGACAAGATTCGAACATACTTTATGATAATCAGCTTCGTGTTGGGAC -GAATAAGGCCTTTCAGCTCTTCAATGTCCAACTGCCATCCATCGGCCTCTTTGGACTTCC -ACAAGCTGACTTCGGCCCCCACTGATGCAGGGACAGAGAACAGCTGCTGATATGTCGGAT -AATGACAGATAACATGGTCTCCTGGGCCAAGCAAGGTATATAAGAGCAAGAAGTTCGCTT -GGATTCCGCCAGCGGTGATCAGGATATTGTCCTTCGGTAACTGCGTTGGTGCCCTCACCG -AATACAAGTTCGCCAATGTATTTCGCAATTTATCGGAGCCACGAATCGCCCCATATGTAA -GTTTAGTGGATTGGAGCTCGGTCAAAGGGTTGGACTCCTTGTCATCGGAAAGCGCGCGGA -GATCGTCGACGGAGATGGATGCGCAGCAGGTTTCGGCGGTATTATACTTGGCAGTGGTCT -CATACTTGTCCATCCACTGCAAAGTACTTAGTAATGGGTATTTGTAACAACACAAGGAAC -AGATAAATACCACCGAGTCATGGGGTAATGCGGGGAGGTACCTGTTCAACCGCGAAGGCC -TCAATCTTCACCATTGCTGAGAGCAGTAAGCAAAAGCGAGCAAAGGAGCTATATACAATC -AGATGTCTTTGTCCAAAGACGTGCCCAGGTTCAGCTGATGGGTAATGACTCAAGCTTCTC -GTTACGCTACCGCCGCCTTTACCTGTGCATGTTCCTCTGTGCTCCTGGGCGGTTTGCAGG -GTTTGCATACCGCTTGGTGCCTAAGGCCCCAGACTCAGTATACCTCAGGACCGTAGGACT -TCGGCACCTACCGAAGATAGGTTCGAGTTGTTGAAAAGGTTGTGTGCAAACGATTTCCCA -TTCGCATACCTTTATAGTAACTTTGGAGCTTTTTTTTTCCTTGGTAAATAATGGAGAAAA -TCCGCAAAGAGAAAGATCTTTATCTCCGTACCCTTGTTATAGAGATTGAATTGAATGCAG -TGTCTAGATTGTAATATCAGAGCATAATTATTCACAAAGCTATTGAACTAAATGCAAACT -GACTTTTTCCTAATATCGGTGACATCCAAAAGGACATCCCCACGACGGCCTCCTCTTTCT -TTTACAGTCACAGTCCAACCTATTGGATGACCCGCTGTCCTTTCCTATCATTTTGTCAAC -CCAATCTTCCATTTCTAGATTCCATTAGTAACCACTGGGACTGAATAGGGAGGGACTACT -AACTTTTGTCTTCGGTGCTGTGAAATAGAATATTTGTACCCGAATTGTCGGCAATTGGCC -TCAAATGGGAGATCCATTCTTTCAATGAAAGATTAGTTGTGCGAATGTCAAAATCCACTC -GGCCGCTTTTTTCGTTTCTTTTGCTGACTCTTTGAAAATAGTAATAGTTGCCGTCACAGA -TGATATCTTCCAATGAGAGTGCGTCTAGAGACAGCTCGTTCTGTAAAGTGTCCAAAAGGC -TGGACACACAAGTCACCTTTTCTTGGTTGCGTTTCCATGCCATAGAGGTTTCCAAGGTTG -AACAGACAAAGCCTCGAGTACGCTCGGGTGGAGGATATGGCTGATAAGTAGATAACCACT -TTGCGCTGAGATATTCTCCCGTCGTCGACTCAGAGGATTCGGAAGTCGAGTAATGCTGTG -GGAATTGAGGTGCATTTTCTGCCCATGGTGAGAATGATGGCCGCGTTGGTGCATATGACG -GCGATGTTGGTGGATATAACGGCGATGTTGGTACAAATGGTGGCCATGTTGGCACATGCC -CCGGCAATGTTGGTGCATATAACGGCAATGCTGGTGTATAAAACGGCGATGTTGGTCCAT -GAAATGGCGAGATTGAATGAACCCCTGGTAAATTGAGCTGAGACCTGAGATTTTCTGTAG -CTCTCGACTCCTCGCTTGTTATTGTCACCCGTTTAAGTGTAAAAGTGACCAGAGTTTTCT -TTACACTAAGCAATAGTGATTTCAGAGTGTTGAAATAGATATAAAGATCGTGGATATGAA -GCTCTCGGAGTCGAGTGAAGCTAGTGTCATGAAACATGTTTCCAACGTAGCTTTTTGCCA -GATGGGCAGGCATATAATCTTGTGACAAAATCAGACCTTCCAGTTGAGGTGCAAGCTTCG -TGAAGAATTTCAGCCAAGACACCTTATTGAGATCAATACATAAATAAAGATTCTCCAGGC -CTTGGAAAGTAGACAGGAGAGTCTCAAATTGTTCTTCAGAGAGGGCGATTCTGGGGCCGA -TATCACCACAGAAATTTGAATCGCAGGTATACAGCTTTCGGGGTCTGCAGCTTGAGCCAC -ATAACGCAAGAAAAAGCTGCGATAATGCCAAGGAGTTCAGTTTCCTTATCCGGCTATCCT -GATATATATAGTTCAACGCTCGCAGGCTGTAAACATCGAATCGAAAATCAATCCGGCTCA -TCATCTATCGCCTTCCCAGGAATGGAACTGTCTGCTGCCGTGGATCAAGCAAGAAATCCG -TATTGTATATACGAGTCCTATTGTTTGGAGGTTCTTAAATCGATCTATACATTTCCGAAG -TCTAACAGTGAACGCTTCTGATTCCAGCAGGTTAGAATTATCCGCAAGTATGGCTTTATG -GACGGCATACCGACTTTCCATCTCGTCGCCATTAACTTTTCGGCAGCTTTTTGATGATAT -AGATACCATTCCGAGGGGCTTTTCAGCACCATCGAACACAATTGGTATCATCCATAGCTC -TTGGACGTACTCGCAGAGACCATCTGACTTGGATAGTTCTTCCAGTTCATGGAGACTTTC -GCTCGTCACAATGAAACGGACTTTTCGGTAAAAAGTCTTCGCGAAATTCTCCAGTGAGTT -TTTGTACAATGCGCGACATGATAACCTCAGAGCGACTAATTGCTGGAACTCCAACTGGTT -ACATATCTTTGAGATCAATTCCACGGGCAGAGCTGTAATAGACATTTCGGCTGCGCGTGT -CGCGCTATACAAGGTTGAGAACAATGATGATGAGAGGCTCGGGTTGTTATCCGCAGAAAT -GGGTAGCAATCAGCCTCACTCTCTATGGCTCCGGGCTCTCTTTGACGAGAAAGCGAACTC -AAGGTGAAATTTCTGCGGAGACAACGAAAAGACTAGTGCTATCTGTTAGCGGCTACAAGC -ACCTGAACGAACGATGACTAGGTTAGTACCGATTGTACTGTATTATTGATACACTGTATC -TTTTGCATAACATACCTAAGGTATGTCTGCCCACGTTGTTTATAGTCCTACCTAGTACCT -ATGGGAGTCCCTCCCAAGTCCCATGGTACCGAACTCCGGACTATGTAATTGGGCTTTCGC -TTCCGCCCCTAACCCCTTTGGGTTGTTGGGTCGCATCACTGGGCCCGAGTACCTCTTCAT -TGCCATCTCGTGGCACTCCTATGGGACCAAATGGTGGGAGTTGGCAAGGCCGATGCCGGT -ATATAGAAAGAATCGGCTTTGGGACCACGTACGCGGTTATACTAGGGTATAGTGGGTGCA -GAGAAACCGGGTATAGACTCAATAGATACCTAACTATATAGATGATTTTAGTGTATTTAA -TTTGATAGCGTTTTGCTTGAGATGAATACATTAAGATCAGTAGGCTGGATAGGATGGACC -ACTTGGTGGTGAAAACGCGTGAGTTAGGCGAAAAGGCCATAGAGATAACCGAGAGACAAT -CGAAACGTCCGATTGTCACTGCAGCGGTATGCATTTGTTTTTCCAGTATCTATAATTATA -ACAGATGTCTCCACCATATCTTTTCGTCCCTCTCCGCCATCAAAGGCTCAATCTCAATTC -CTAACCAGCTTGCTGAGCCATCAGGGCCAATCCGAAACTCCACCCGAACAGCACTGGGAT -GGCCCCATCTCACTGCAATTTGAAGCCAATAGGTACCGGAGACATGGAAAAAGCCCATTT -CTAAATTTTTCGAGTAGTCACTGTATCTTGCAAGCGAGGCGCACAGGTCAGGGGTTGTTC -GGTTTAGCCCCTGGTAAGCGTGATCACTCCTGCAATTGCTGGATATCCTTAGCTCTGGGT -AAGCGGGGTGCGTATAGGATCCTTCATATGCCGAGAAGTTGACAGGGCTTGCCAACGGAG -GATTGGGGATTCTTGGATAGAGCTCTGATAGGATGTTTTCAGAAAGATATCCGTCGTTGA -TGATTGCATCGCCTCTTTTTAATGATTAGAGAGATATTTTGAAAACGATAACAAGGCCAA -ACCATCGAGTAGCCAACTGAATGTTTTTACATACCTTGCAACCCAATCAAATCGACTCTC -AAGGGGAATCTCCAATTCTTCGTCAATCTCCAATTCTTCGTCAATCAGATGATAAGCAAG -TGCGTTCGCCGCCGCGTTCACCCCATCCATATTATTGCCTAGCAACACAAGCCCGAAGTC -TCGCTGTGGTAGGAGTATAACGGATGCACCATACCCAAACTGAGCCCCTTCGTGGAATAG -AATCATCTCGCCCTTATATGCTTGTGACATCCATCCTAGTCCGTAAAGTGTAGGGGTTTG -AAACGGCTCAAGAGGATTAGGCGACACTATAGAGTGGCCACCAAACAAATTTTCGTAACC -TGTCTGAGAGAATGGTAGGCCACGTTTCAGCATGGTTGAAATCCACTTGGCGTAATCTGA -TACCGACGACAGTATGTTGCCTGCTCCACGGATGTTATCCGAAAATACTTGGTCGGTGGA -GGCAATTTTACCGTCCAAGCCAACAAAGTAGCCTTGTGCAATATCTCGTTGTGCGTTCCG -AGCCTTGGAGAGGGAAAGATAAGTCTCTGTCATGTTCATAGGCAGCCAGAGATTTTCGGT -AAAGAACTCCTGCAAGGATTGGTTCGTCACGGTTTCGATAAGGTGGGATGCCACAAGATA -CATGAGATTTGAATATTGCCATTCTGTCCGCGGAGAAGCCGTAAATGGAAGATGTCTCAT -TGATCGCACTGCTTCTTGAAGCGTTACGTCGGCAAGCCAAACCCAGTCGTGGCGTGGTAG -ACCAGAGCGATGAGACAGAATGTCAATGATTGTCATTTGCGATGTAGACCAAGTATCGTT -GAGTACAAAATCTGCTGGAATAATAGTGTGGACCGGTGTATCCCAATGGATGGCTGGAAA -ATTGATGTTATCGTCAACAAGGAGAGAGATGGCCGCAGAAGTGAAGGCCTTGGTCGTGCT -TCCTGTGAAAAATAGGGTATGTTCTGTCACTGGTTGAGAACTGATAGTATTCGAGACGCC -ATAGCCCTTTGTGGTTTTCTGTCAATTTAATCATCGATGACTATATAAACAACGGGGTCA -CACACTTTAACAAATGTGTCTCCATGAACGACTGCAACAGCCAGACCGGGAATGCTGAAA -TGTTCAAGGGCCCACTCGACCCTTTGGTCAAACTGCGCATCAAAAGGCGAACTGGACTGG -TTTTCGTACACTTGAAGGGGCTGTTGAAAGGTCTCAAGGCCGGCGAGTACAAGGCAAGGA -AAGATCAAGGCCAGTGGCGAAGGCATTTCGGGCAGAAAAACATCTACAAGCAAAACCTGG -AATGGGATCAACAAATGAAGTAGCCCGTGGAGGGTCAGTTGGAGGGATATTGATCCAAGT -GAAAAATTAGACATGTTAGGCAACTCTCAATGTTTATTGGCAGTGCTAGCGAGTATTATG -CTGCTGAGACTATGACTGGGCACTATTACTAGTGTCTCACAAAGTGACAGCGATACTATA -GTATTCTTGTAATAGCGCACTGGTTACTGTAAAATGACCCTTTTTAGTATTTCAACGGTA -GTACAACGGTAATCTGATTTCTAGTTTCTCCCAAATGGTATAAATAAGCTTTCGTATCAT -CTTTAAAATTGTGGAGCCAGCGGACTCGATCAGCTCGACCAGCTCGACCAGCTGGGCGAT -TTGGTCGGAGTCGCCAAGGGCCCCGCGATGAATTATTACCCACAAGTTCCTCTTTATTAG -CTACAAACCTGGAGATGATATACGACGTAAGCTAGAACCAAGAAGCGGGTGAACATAATA -GCCCCAGAATTCGGATGTGGTACTGATGCGGTATTTTACCCCCGGTAGAGGTACATGAAT -GTGATTCCTTTAACCTGGGAGATTCATACATTGTGCACGCGTTGTATAGGGGGCCCTAAG -TCATCGTACTAGTATACGTGCTCTGGGTTTGACCTAGCAAGCGTATAGGAACTGATCCCC -GCAGAGAGCCAATATACTTAAACAAATTTTGGCTGGCGTCTAACAGGCCGCAATATTGGA -ATTCGATATCAAAGGTTGATACTACCCAACGGATAGAGCCGAAACGAGATGATGGCGCTG -ATGGAGTCCTTGGGGCAGTTGCCCACACTTGACACATAGCTCAACGACCTCAGGTTCCCG -TGGATAGTAAGAAATCAAAGCTACCCAGTGGCTGTAGCGAAGAAGGACTATGATTTAAAC -CAGTCTTTCTTTCTGCCCTTCGTCGCTAAATTGATCGACGATGTATCCCCTGGTGGCATC -ACGTGTTTCCCCGCATCGAGGCGCCACCTGGGCAGCTATTACAGAAGGATGAACCATTGT -TCTCCGTAAGGAAAATTTGGAGTTCGTGGAGTATCTACATTATAGCTTTATAGTCGTAGA -ATGAGAGTACAAAGAAAATCATCAAAGTCAACCTCAGCAACTGTCCGGGTCACATGCGGG -TTCCTGACGCCCATGCAGCAGCCCCAGCCTCTCGCTTCTCCGCCCGCGTCATCGCCTGTG -GGCTAATCTCACCCTCTCTCCGCTTCCTGTTTCTCAACCTCCAAACTTACCTCCCTGTCT -CTATCTTTCACTTCAACTATATGCTCCACATCGATTCTCTATTTTGATCAATTCGGCGGT -CCGCCCCGCATTTGTTCCTTGACTGTCAGTATCGCAGCGTATCAGATCACGCTGCTGCTA -TCCTGCACAAGATGCGCTTGACTGGCGCAATAACTCTTTCGCTTCTCACATTAAATCCTT -TACTGGGATCTGCTGCAGCATGGAGCGAATTTTCCGACACAAACCTCGCTGCACAATATT -CCATCTTCGACGATCGGCCACCCGGATGTCCCGAGTGTCCGGCTTGCTTTAACTGCCAAC -TCGACAACTATAACTGCACTCATTTTGCGGATTGCAACAACTTCAACGGCAAATGCTCAT -GCCCCCCCGGATTTGGAGGCGACGACTGCTCCATTCCCCTATGCGGTTCATTGGCCGACA -ACTTGAACCGACCGCAGCGTGATACCAATACCTGCCAATGCAAGGATGGCTGGAAAGGCA -TCAACTGCAATGTCTGTGACTCGGATAATGCCTGCAACGCCATGATGCCAGAAGATCAAG -GGGGCGTGTGTCACAAGCAAGGTGTCACGGTCAAGGAGAACTTTCAAATGTGCGATGTGA -CCAACCGTGCGATCTTGAAACAGCTCGACGGCCGCAAGCCCCAAGTGACTTTCTCTTGCA -AAGCCGAGGATGAAACATGTAGCTTCCAATTTTGGGTCGACCAGAAAGAGTCATTCTATT -GCGGCCTAGACACATGCAAATGGAACATGGAAACAGACTACGCTTCCAATACAACTCACT -ACAAGTGTGAGCACCTTCAGTGCAAATGTATTCCGGGACGTTTCCTCTGTGGCGAGAATG -GCTCTATTGATTTGAGCGATTTTCTTGATCAAATGATCAAGGGTCCGGCTACTTTCGATA -CTAAATCAACCTTGGAGGGTACTAAGAGCGTTTTCTCCGAACCTCAAATGAATGGACTGA -TTGCGAGCGTCTTCGGTGATCCCAGTATCTTCTTGTCATGTGATTCAGGAGAGTGCATGT -ACAAGACTGACGAACCAGGATACCAGCGCCCCGTGAAGAAAATCAACACTCCGCTCATTG -CCGGCGTGATAGCGGGAAGTTCACTATTTGTGGTTGCTGTCATCTTGCTCGTTTGGTATC -TGTCCCGTCGTGCTGCGCGCCGCAGCTATCTTCGCCTGTCTCTCTCGGATGATTCGGACG -ATGAATCTGCCAAACTCATGGCAGACTATCGCCCTATGGCTTTGTACTGGGATAATGTAA -CATACACACTAAACGGCAAAGAAATTCTCTCTGGTATCCAAGGCGCCTCCATACCAGGTC -AAATCACAGCCATTATGGGAGCATCCGGTGCTGGAAAAACATCATTCCTTGATATCCTGG -CACGAAAGAACAAGCGTGGCTCAGTGAATGGAGATTTCTATGTCAACGGTGAGAAGATTG -ATGATAATGACTTCCGATCCATGGTTGGCTTTGTCGACCAAGAAGACACAATGCTTGCCA -CCTTGACAGTCCATGAGACCATCTTGACAAGCGCCCTACTCAGACTTCCCCGGGATATGA -GCCGGGCAGCTAAAGAGCAAAGAGTCGTCGAGGTCGAAAAGCAACTTGGAATCTACCACA -TCAAGGATCAATTGATTGGCTCGGAAGAGGGAAATGGCCGTGGCATTTCCGGTGGAGAGA -AACGCCGAGTCGGGATTGCGTGTGAGCTGGTCACCAGCCCTAGTATCCTGTTCCTGGATG -AACCCACCAGTGGATTGGATGCCTACAATGCGTTCAATGTGGTTGAATGTCTAGTGACAC -TGGCCAAGACATACAACCGCACTGTAATCTTCACCATCCACCAACCTCGCTCGAATATTG -TTGCCCTATTTGACCGACTCATTCTTCTCGCTGAAGGACGCACTGTCTATTCTGGCCCCT -TCTCATCCTGCCAGCAGTATTTTGACCGTATGGGCTACTCCTGTCCCCCGGGATTCAATA -TTGCGGATTACCTTGTCGACCTTACAATGCACGCCAGTGGCGCACATTCATCATATACAG -ATGATGTAACATTGTCTGTGGATGGGCGCACTGGTCCGCCCAAGACTGCATCAAGCAGTC -TCAGAGCTGTCAAGTCCATTAACAGCGCGTCTATTCAGAGCACTGAAGACAACTCAAGCG -GCGCCGAGGCGACCCGGAGACCCAAAAGCAAACGCCAGGTATCTCTGAAACAGCAACAAG -ATCGCCAGCTTTACTCCCGAAAGCACCATACTGAGCGGCCAATCACGCCAAAGACAGACG -ACGAGAGTGCTACATTGGATGTAGCTGAGAATCATCAACAGTGGCTGCGTCTTTCTCGTC -AACAAGGCAATGTCCCCCCGCAAATTCTTGATGATCCCGATCAGTTGCCTCCTCCGGCAC -CTGGGCAAAGTGATCTCGACCTTCTAGTCGCCAGCTACGCAGATTCTGATGTTTGCCATT -TTGTGCATGATGAGATTTTGACCGCCGTTCAGAATGCCCAGGCTTCGAACGGTTCACCAA -ATTCACCCATGTTGTCGGATACTGTGATACAGTCTAAGGGCTATGCCCGAGTTGGTCTGG -TGCGACAGTTCATCATCCTGTCTCAGCGGACATGGCGTAATCTTTATCGAAACCCCATGC -TGATGCTCACCCATTACGCAATTGCCATCCTTCTCGCCGTCTTATCAGGATTTCTCTTCT -ATGGCCTCACTGATGATATCAAAGGCTTCCAGAATCGTCTTGGTCTCTTCTTCTTTATCT -TGGCTCTGTTTGGTTTCAGTACTTTGACTAGTCTCAACGTGTTCTCCACGGAGCGGCTTT -TGTTCGTACGTGAGCGTGCCAATGGTTACTATCATCCGGTGACATACTTCGCATCCAAGG -TTGTGTTTGATATCGTTCCTCTGAGGCTGATTCCTCCTATCATCATGGGAATTATCGTAT -ACCCTATGACTGGTTTGATTCCGGCATGGCCGGAGTTCTTCCGCTTTTTGCTGGTTTTGG -TTCTTTTCAATCTTGCAGCTGCCAACATTTGCCTCTTCATCGGCATCATTTTGCGTGATG -GCGGAGTCGCCAATCTCATCGGAAGTTTGGTAATGCTTTTCAGTTTGCTTTTCGCCGGTC -TTCTGCTGAATCACGATGCGATCCCCAAGTCAGCATTGTGGCTGCAAACTGTAAGTTCTC -CTATAACTTAACCGATTTTTGTCCCAGACTAACAATAGTATCAGCTATCCATTTTCCACT -ATGGCTTCGAGGCCTTGATTGTCAACGAGGTGACTTTCCTCACCTTGATTGATCACAAGT -ACGGTTTGGATATTGAAGTCCCCGGTGCCTCTATTCTGAGCGCATTCGGCTTTGACACCC -AGGCCTATTGGAGTGATGTTATTGGGCTGGGTGTCATTTCTGGGGCCTTTATCATCATTG -CATACAGTGCAATGCATTTCCTACTTGTCGAGAAGCGATGAGCAACTCCTTTTCTTTGTT -ACTATACACCGGGTTGGTACCATCGGCTTACAGCTCCTTGTTTTTATCCGTTGTGTGGCG -CTCACTGCCAAGAGGAAAGAATCTCTAAAGGCGTCTGGAAGTATGCTACGTTTTACATAG -TTATGTATATAGACAGTAATCCTCACGAGTTTTTGAACCAGTCTAGAGCTCACGTGTCAA -AACGTCATCCTTCTACAAAGCTTGTTCCAAGGTTCAGGTCAGGAATGGCCTTATCCACCT -ACATACTTGTGAACTCCGTACCTGAGTGCCTGCTATTAGCAATAATTTTCTTTCGACATC -TTCGCATCTTAAAACTGTTCCTTATCTAAACTGTGCATGTACGTTACCAGCCCCCGTTTT -CGGGTTGCGCTAATGATGACCAGCCCTCCCGACACCGCCGACCCCTTCAGGTGCAATTCG -CTAATCTCGAAACTTCCTCTTTCGATCCCACCCCAACTCTGCTTTTGTTCTTTTCGCCTC -CATCAGCTTGAGTCCATTTTCCTATTGGTGAGTGAGCTTTGACCCCTCTTCGGACATGAC -TCTGTGTGATTCCTCTGTCTTCCTTGGGCCGCAGAACCCCACAAACATCTCCAATTCCGG -CCTCTTCAATCCACCTTGACCTCAAACCACATTTGTCGCAGACAGTTGGCTCATATTCTC -TAGCAATTGAATTACTGTCAATTATACTTGACGTCACGTCACTCTCGCAATGATGTCTGG -CGAAGCATGGCTTTACCTGTTGTCGGTGTTGATCAACGCCGTCAACCTCTTCCTCCAGGT -GTTCTTCACGATTATGTACAGTGATCTCGAATGGTAAGTCTTTCCAGTTGTCTATCCAAA -GCAACTACCCTTGGTCCCCGAATACTCTCGAATGCAGGCTGACACTTTTGCTCCTTTTTT -TTCCAGCGATTACATCAACCCTATCGATCTCTGCAACCGCCTTAACGCCTACATCATCCC -CGAAGCCGCTGTCCACGCGTTCCTGACCTTCCTCTTCCTGATCAACGGGTACTGGCTCGC -TCTCGTGCTGAACCTGCCTCTCGTATTGTTCAACGCCAAGAAGTACGTTCGAAGCTCTCC -GAGTATTGCCAACCCCTCCCAAGCACAAGGCTAACCGAGTCATTTCTGGAAATATAGGAT -CTACGAGAACCAGCATCTTTTGGATGCGACCGAGATTTTCCGCAAGTTGAACGTGCACAA -GAAGGTTAGTTCAATTTCTTGGGTCGCAATCTGCAGCGCAATGACTGACTCGAGATCGTG -TTTAGGAATCCTTCATCAAGCTTGGCTTCCACCTCCTCATGTTCTTCTTCTACCTCTATA -GCATGATTGTTGCCCTGATCCGCGATGAGGGCCACTGACTGCGTGAAAAGACCGAACATG -CGATGCGTCATGGTCGTCCCGGCTAGATTGAGGAACCATCCATTGATGAGCTGTTGTGAG -TTCCTGGGTCAGTGTTCTCTTGACGTTCTTCTCGGACTTTCACGCAACTCTCGAATATGA -TGACGGCCTCAAGACGACTGCTATGCCATATAAATCGGTATATCGAAGTGGTTTCTTATC -TGTTGGCCGGCCATGACTATGGATGAGCAACGAATTCTTTCAGTCTTGAAATATGTGCTG -GTACGAGTACAGCAATGAGAAGTAATAGATATTTCCTTTGAGGTGTCTCGAGCCGGGGAT -GTGAGCTGGATTGATTCGAGACACTGTATGATTGACTCGCGGTGGTGCATGTGCTTTTTT -GCTATCTTCTGCTTTGTTTTTAACTTTTCTTCTGTGTTAATCTTGCTGTGTCTTTCGGCG -TGTTGAGTTGTATATACCTCCTATGCGGTTGGCAATGAAGAGCGTACGATACCCAAGGTT -TCTAACGTTTAACTGGTTCCTTGTTGAGAGATACCATGACCATCAAATCGGTGTAGCAAT -TGTAGAAAAATGGAGCCATAATTGAGATGCGCACCTAGCTAAGATCGAAGCTGCACTGGA -GGCTAATAGAGCTTGGTTGCGGTGGCTGTTCCGCGGGCCTTCGTATCGGGGGTCTCCCGA -CGCCCGTAAAAATCccatcctccaccacctaccattcaccagccacattcaaccCCAGCT -TGTTACAAAGGGGAATTGAGGACTGCGCATTCTCAGTGTCTTTAACATAGCAGCCATGTC -CGCCTTATTAGGTCTTGGTTATGACTCCAGCGACGATGACGCCACAGCTGTGCCTAACTC -TACTGTGACGACCGCGACTAAGGTCATTGCTGCACCAGAGGTCAATACAGAGGTTTGGTC -ACCCCATAATCACCCGCTTTCCCAAGTCTTTTCTTACTCTTTGCGCCTCAGCCATGACGG -ACAGATAAGACTAACAATCCTACAGGACCAAGCACACATGCAGTTGGCCCTCGCCAACGC -CTCATCCCAAGCCCTCACCTACAATGCCACATACGATGATCTTACACAGCCTGCTCAGGG -ACCCGTGAACCCGTTCAAGCCAGCAGGTCCAGGAAACGGTATTAAAAGAAAGAATGTTCC -GACAGGATTTGCCGAAGAAGCAGCTATCAGCGAGTCCACATTTGCTGCGCAACATCGCAC -GTTCCAAAGTCTGGGATACACGCGCAATCCCACACAACCGGACCAATTTATTGGAAATCT -CGAAAATGCGGCGCAATATGGGGGTCGGGACGTTGTGCAAATGAAGCCAACTAAGAGCGC -GTCCGCAAACTGGCGGACAAAGCGGCAAAAGAAGGGCGACTCAAGCATTGTGGAAGGCGA -TGGCGCATATCTGGGGCCCTGGGCGAAATACCAAAATGACCAGCAATATGACTACGTGGA -GGTTGCTGAGGGAGATGAGCGCGCGCTTGGTAGCGACGAGGAATATGTCGAAGAGGAAGA -CGAGACGCTGACTACAACGGCCCCTATGCCGGCCATGAGCAAGGATGCCACCGACTACGA -AGGCGACTTCTCCAAGCAAGAAACCACTGAGTTCCACGGATCAGAGCAGTACGACTACCA -GGGTCGGACATACATGCATGTGCCCCAGGACCTGGATATCGATTTGCGCAAAGAGCCCGG -CACCACCAAGAACTACGTACCGAAAAAGCTTGTGCAAACTTGGAAATCGCACACCAAGCC -GATCACCTCTCTTCGATTCATCCCCAAGTCTGGTCACTTGCTGCTTTCGTCAGCAGCCGA -TGGAAAGGCCAAGCTCTGGGATGTGTATCACTCTCGGGAACTCTTGCGCACTTTCTCCGG -CCATACTAAGTCAATCTCAGACACGGACTTCCACCCATCTGGAAAGACGTTCCTTACTGG -TTCATACGACCGACAGATCAAGCTCTGGGATACCGAATACGGCAAATGCCTTGGCCGGTT -CTCCACGGGCAAGACACCGCACGTCGTCCGCTTCAACCCCAGCGCAGAACACTCACACGA -GTTCTTGGCTGGTATGTCCGACAAGAAGATCGTGCAATTCGACACCAGGTCCGGCGAGCT -GGTTCAAGAGTATGACCACCATTTGGCAGCTGTCAACACCCTCACTTTTGTCGATGACAA -TCGCCGCTTCATCTCCACCTCAGACGACAAGTCGCTGCGCGCTTGGGAATACGGCATCCC -TGTCCCGATCAAGTTCATTGCAGAGCCATACATGTTCGCGCTCACTCGTGCGGCGCCTCA -TCCGAACGGCAAGTATGTCGCCTTCCAGTCAGGCGACAACCAGATTGTCGTCTATAGCGC -GACGGACAAATTCCGACAAAACCGAAAGAAGCGCTTTGTTGGGCATAATACTTCCGGATA -CGCTGTTGACCTCAAGATCTCGCCTGATGGCCAGTTCCTTGCTTCTGGTGACAGCGGTGG -CTATGTCTGCTTCTGGGATTGGAAGACTGGCAAGATGTACCATAAGATCCAGGCTGGTGG -AAAGGAAGGCGGTGCTGTTACCTGCGTGGACTGGCATCCTCATGAATCCAGTAAGGTTGT -TACTGGTGGATTGGACGGTGCTATCAGGTATTGGGATTAAAGGCATTTATAGTTAATCAA -GTGTTATTGGTTTGATCAAGAATTCAGACTTGGATCTTGGAATATCTTCCTTGTATACTC -TCGTTACTTTCTACCATAGTGTGCTGATTCTACCATGTTCCTTCCTCATGTAATATATTA -GATGCACTAAAAGTTCCCCAAAAATTCGCATATTGTCACATATTCAACCAAAATTCCACT -CATCCGGCAAAACAACTCTACATATGAATGAAGTTCCGAACGCTCTTCAATCCGACGCAA -GCCAGATGCCGCGAGCACCGAATCAACCCCATCGGCGAGAGATATTGCGGGTACTTCCAC -GCCAAGATGGAAAAAGCACCAACACTAGCAACTAAAGCCCCGCTCAAACCAAGACCAAGC -ACCTGGCTCTGATGCTGAACAGCTTCAATTACCTCAGAGTAACGCTGGTTACTGAAGCTG -ACACGGGCGTACTGCGTTTTCCAGCCAAAGATGGGGAAATGGCGGTCAATAGTCTCTTCG -ATCGTCTTGCGCAGCTTGTAGAGCGGCGATTTGACGCCCGAGCGCATTTCAAGATAATTC -TCCTTTGACAAATCGTTTATGGCATGCGTGTCGGCGCAGCGCTGATCTGTGTAGGCTTGG -AATCCTGCTTGGCGCTTGAGGCCGCGTGTGTAGACGCTAGAGTGGGGATCGTAGACTCCG -TGTTTGTCCAGTTGCTCGAAGAGGACCCGGACATCCTCGAGACCCGCATTTAGTCCCTGT -CCATAGAAAGGCAGGACCGCGTGCGCTGCGTCGCCGACGATGACGACGCTGGAGTTAAAG -TGGTGGGGGTTGCACTTGATGCTGATCAGTGGGAGATGCGGGTTCGCGACGAACTGTTCG -CTCAATGCCGCGGGCTCGATTAGCTCTGGGCAGACGCCTGGGAAGTGGGAGTCGAAGAAT -CCATGCAGATCCTCTGGTGAGGTCTCGAGGAAGTTGTAATGCGATGATGGTGCGAATAAG -GTACATGTGAACGACTTATCAGCTGATGGTAGCGCTATGAACATGAATTCGCGCCCAGGC -CAGATATGGAGGTGGTTTGGCGAGATACGGAAGTTCCCATCTTCCGACGGTGGAATGCGG -AACTCGCACCAGAGAGTGTCGATGTACTCTTGTTGATAATCGATACGAGCGAACTTCATT -AGGTGGTAGCGCGCGGCGGAGTGTGCTCCATCGGCGCCGATGAGGTAGTCGAAGGAAACC -TCGATCTCCGGTACCCGACCAGCTACACCGGCATCATCGGCCACTGGTGTATCCCCGGGA -ATTCTGCGCTCAAACCAGGCTTTGTTGGTGCGAAAGTCTGCACCCGTGAGCTTGTGGTTG -AAAAATAGCTTGACGTTGGGCGTCTTCTCTAGCTCATCAAGGAGGGCATTGTTGAGAGTC -CCTCGGTCTATAGCATTAATGGCCTTGTAGTAGTGTTAGCTATATGATCTGCAGTGTAAC -TAGCAATAGCAAATGCCAAGCGGTTGATCGGTGCATACCCGGCCGTGAACGTCATAGGCT -TGTGCAGCTTCCCACAGATCTCCTCGATCTCGCCCGTGGATCATCCGCCCGTGCATAGGG -ATCGCTTCACGGAGGACATTTTCGATCAAATCATCCCGACCACAATGTCTCATGGATGTA -ATGCCACGTTCGGAGAGTGCCAAGTTGATAGACTTGGTAAAGTTCAATGGCACTGTGGTA -GGGTCTCGGAGATCTACGGATGCGCAGGGATTATTGGCGTCAGTTCGGAAGAATTCGCAA -TGAATCACAACTGAGCCAGAACAAGTGCAAGGGGTACCCAAGATAGTAGCAAATGGTGAT -AGAGATTAATCGGGGTGGCTTTCTCTGAATTGAAGAGTTGAGATAGGGACCTTGCGAGAC -ATGGAGCCTTGATAAAAGCCAGCCATTTCCAGCTGAAAACATGTATCCCCGTGACAAGCG -AATTGTTCCCAATGGCGATCACCCAAAGGTAAAAAGAAAGGTCACACAAAACAGAAGAAC -ATACCTCCACGGAGCTCATATACTTCCACCTCGTCGCCTCTTGACGCAGCATATAGGGCT -GCTAAGGCTCCCACTGGGCCAGCACCTACAACAACCACTTTCTGCCTCGGAATAGCCATC -ATGTACCTAGTTAAAGTCCAAGATACCAACGGGTACGTGCTGAAACGGAAGAGTAAGTCG -TTATCACGTGCTGCGAGATCGGCCCCTTCCAAGATTCCTAAATGAGATGATCGACATTAG -TCATTCTATTTAGTGCCTTGATGCCTGGAAATATCAGACAATTGGATACTAGTCCAAATG -AGGCTTAGTTCCATATCGCCTTGCGGCCGGCGGGGATTTTCTTGAAGTTTTGGCCGCCAT -TTAATCCACTCTCGACTACATATTCTCTCAATTTTATTTTTATTTCTTTTCGCTTCTGCT -ATTTCAATATGGCTGAAGCTGGGAATCGCAAATGCAAGGGTGTGGACTGCCCTAACGACG -CAGGCACCCTCCAATGTCCGACATGTTTAAAGAGCGGCACGGACAGTTTCTTCTGTTCGC -AGGACTGTTTCAAGAGAAGCTGGGTAGGCACTCCGATGTGACTGGAAGATGCGCAACAAA -TGCGCCCTTCTATGTGGGAAACCAAGGGCTGACCATCTATTGTAGAGCGAGCATAAGACC -ATTCACAAAAAGAGTAATATCCTCACCAACCTTTTTCCTCCGAAGGTAGTTTCTGAACCA -GATCCAGCTACCGGAACCTTCAATCCGTACCCATCCTTCCCTTACAGCGGTTCTTTGAGA -CCCGTCTATCCTCTGTCCGCCAAACGAACGATCCCCAAGGCGATCCCGCACCCGGACTAT -GCCCGCGATGGTATTCCCCGCTCCGAGCAGAAGATCATCGGCCGTCATAACATCACCATC -CTGAACAAGGAAGAGCAAGAGGCCATGCGCAAGGTTTGCCGATTGGCTCGTGAGGTTTTG -GATGCTGCTGCACGTGAATTGAAGCCTGGTGTCACCACGGACTACATCGACGAGGTTGTC -CACAAGGCCTGTATAGAGCGTGATGTATGAGGAGGTACTCCCTGGTCGCATAATACCATA -CTAACATATATATAGTCTTACCCCTCACCGTTGAACTACATGAACTTCCCCAAGTCCGTA -TGTACCTCGGTTAACGAGACCATCTGCCACGGTATTCCCGATCAACGGCCCTTGAAGGAT -GGAGACATTGTCAACATCGATGTCACTCTGTACCACAAGGGATTCCACGGCGATATCAAC -GAGACCTACTATGTTGGAGACAAGGCACTTGCAGACCCCGACGCCGTCCGAGTGGTGGAA -ACCGCGCGTGAATGTTTGGATAAGTCAATTGACATCGTCAAGCCTGGCATGCTGTTCCGC -GAACCTGGCAATGTCATTGAGAAGCACGCCAAGGGCCGTAACTGCAGTGTTGTCAAAAGC -TACTGCGGCCACGGTATCAACCAGTTGTTCCACTGTGCGCCCAATATTCCTCATTATGCT -AAGAACAAAGCCGTGGGAACTGCCAAACCCGGTATGTGCTTTACAATTGAGCCTATGATC -AACATTGGCACCCACCGTGACCGCACTTGGCCCGATGACTGGACCAGCACTACTCAAGAC -GGATCTCTGTCAGCCCAGTTCGAGCACACTCTTCTCGTAACTGAGGATGGTGTTGAGGTT -CTGACTGCCCGTCTGGCTGACTCACCTGGTGGGCCAGTTCCTATGCCTTCGACAGCATGA -ACTTGGGGAATTGGGAGTTAGGTAATTGGAATTATTTGTATATTCAGATACCACCTATTT -AACAGCCAAATTTACGACCGTTCAACGTCTCTATAGTTTCGTCATTGAGGTAATGTCATA -AACTGAGCAAGATAGGGCTATAGGCCAAGAGTTTGCCGATGTTGCAGATCAAAGGTCTGG -GCGGCATCAGTCCGAAGAAAGAGTACATTTGATTGGGCGCAACGGGTAAGTCGGGGGGGC -GGGGGGTACCTAGGTACCCAGGTATAGTGAGGGGCGTGCTATCGGTGGAGAATTTTGGGG -TTAGGTGCTCTTTCGTCTTCCTCTTTGTTTTTCCCCCGTACAACAAGACGTTCAGTCAAG -TCCTCTCGATTTGGAGACAATCTGGAGCAATTTAGCGACACAAGTCTCAACAATGGCTCC -AGAGAATTCTCCTACTGAGATTTCCCCGTTGCTGGGATCTGCGTCTACTAATAGCGCTGT -ATCCAACGGCACGATCCCTGATCACCATGTGGAATCAGGGGATCCAGGTCAGACGGAGAG -GCAAGGCAAAGAGCCTGTTCTTGATGCGCAGAAGCAGCTCAAATATATCGTTCCTGCCAT -CTCGCTAGGTGTATGGATTCTTTATCATCATTCCGTTTTCTGAATCCAAAGTAACTGACA -GGATTTTTAGATCTTCTTGTCGGCGGCAGACCAGACAATTATCATGGCCAGTTATGGTCA -GATTGGTAGCGACCTCAAAGCTCTCAATCTGACCAGCTGGATTGCCACTTCTTATTTTCT -TACTTTAACTTCTTTCCAGCCTCTATATGGCAAACTGAGCGATATCTTTGGTCGCAAGCC -GTGTCTACTATTTGCATATGCGGTTTTTGGCATCGGATGCTTGGGCTGTGGTATGGCTCG -AAATATTAATGAGTTGATTGCTGCTCGTGTATGTTCATATACGATCTCAGACCTGAATCA -TGTAATGTAATGCTGAGTATTTTCGAACAGATTTTCCAAGGCATTGGTGGTGGTGGTATG -ACCACTGTCGTGAGCATTTTGATGAGTGATATTGTTCCTCTTCGCGATCGAGGCTTGTGG -CAGGGTATCATCAACATCATCTATGCCACAGGTTCCGGTACCGGTGCACCCTTGGGTGGA -ATTCTCTCTGACTACATTGGCTGGCGCTGGTATGAATGATGCAAGGCCCACATATCCTTA -CGCCATCTGACGTTTCTTCCTAGGGCGTTTTTAGCGCAAGTCCCTCTCTGCGTTATTGCA -TTCATTGCCGTATCGTTCCTGCTCAAGCTGCCCCCGCGCGAGAGCGCCCATTGGAAGACC -AAGCTGCGTCGCATCGACTTCCTTGGTGCTGTAGTCCTTATTGGCGCCATTCTTGGCTTC -CTGGTTGGTCTTGATCGTGGTAGTAATGTGTCATGGAGATTACCCCTGACAATTGCCTCT -CTTGGTGTCTCCGCTGTCTTGTTCGTGATCTTTATTCTAGTTGAGATCTACCTAGCAGCC -GAACCCTTCGCGCCTGGTCACATCATCTTCAACCGAACATTCTTTGCTTGCTACTGCTGC -AACTTCTTCAGCTTTGGGGGATGGATTGCCGCTCTGTTCTATATTCCTTTATACTTCCAA -GCGGTAGATGGTGTCTCTGCTACAACTGCAGGGCTGCGTTTATTGCCAAGCATTCTATCT -GGTGTCTCCGGATCCATGTTTGCAGGGTTGTTGATGAAGTGGTATGGCAAGTACTACTGG -CTGACCATTGCTGCATATACTTTCCTGACCACTGGTGTGCTCTGCATATATCTGTTCTCC -GGTGGTGTGGTGGCTAGCACAATCCCCATGATCTTGGGAATGGTAATTTCGGCACTTGGC -AATGGCATTGGTGTCACCACAACCTTGATCGGTTTGAGTATGTTCTCATCTGATCGTGCC -CTACAAACTTTGGATGTATACTAACCACTCTCCCAGTATCAAATGCTACTTATGAAGATC -AGGCAGTTGTCACCGCTTGCTCGTATCTGTTCCGCTCATTGGGATCCGTGATTGGTCTCT -CTTTATCGTCAACTGTCGTTCAGCAAGCTTTACGAACTCGATTGAATTCAGGTCTACGCG -ATAGTAAGAATATTGATCAGATTGTTGAGGGCGTGCGAGAAAGTTTGGACTTTATCAGAA -CCCTTGACCCGGCAACTGCCAGGATTGTCCGGGACTCTTATGGCTGGTCCACAAACAAGG -GATTCGCCTTCATGGTTTGTGTGGTCTTCTTTGCTTTGCTGAGCAGCTGTTTCATCCGTG -AAAGCAAACTCAACCGATGATCTATACATTTCCAGGCTGAAAGTCCCAGTAACGACGATC -TTTTTTCCATACATAGTCGTACTCGTTAGGATTTATCGGCGCAGACTGGTTTAAGAAACC -CTTGAAACCAATAAGTTACCATTCTATTCACAATCAGCAGACGACATCGGAGGTTGCGCA -GACGGGTTTCACAACTGAATCTCTACACACCAAAAAGATACAGATTTGCATTTTTTTTAA -AAGCGACAAACTTGCATGGCAAATTACTACGGTCTATCTAGCATTAGGGTTGAACTTGAA -GTTGTTTATTTTCATTCCATGAATATATCAAATAGTCCATCAAATATATGCTGGGTATAA -AGGAAGTTTTGTATCTATCCCAAAATCCAATCAAGAAAAGAATGTACACGTAGGTTTCCC -AGACACGGATCACTTAAAGTTGTATGCATAAAATGCCACCCACTGGTTTACCTACAAGGA -TTGAAGGATATCACAGCTCTTTTGTCTGTATCTCAAAAATTCCGTCGCCTCCTATAGCTT -CGTACATAGTAGGTATGCTATGGGGCTCTGCCAAGCTGGTAATCTCCAGGGGCTGGCCCG -GGCAGGCTCAAGTATAGCTTACTAAGCCAGCAAATGTGCCCAGCAGGTGCCCGGGTCTTG -TCAGCAACGTTTCAACCTTAGAATTGTTATCCAGTTGACTCACTGTTCATCCATTAATTG -AGTGTCTTGATTGACACTTTGAGGGCTTGACTTGATCTGCTTAACCAACTGTGATCAGAA -TGAGCTACGAACTTATGTTCCTAACATTTTTGTGCAGCTTCCGCTAGCTCAACTTCCGCC -AAACCTCCGTCATGTCCGACTCGCGCCCAGGGCCAGTTTCACTAGCACGATATCGACCTA -TTCTCTATCTCTTTACGGGTGTCGCTGCCGCGTACGCGATTCTCTATGTCCACAATAATG -TCCTTTCGCAGTCATCATCGACTTCGCTCCGGCGTCGAAATGCTGTGCGCCGCACAAGAG -CAACCGAAGCTGAAGAGCTTGGGATCGCTGATACCCCGTCTTATCGAGCAATCACCCACC -TTGAACAACTAGAGCGTCAGAATGGTGTATATGGGACATTTCGCATAGAGACAGAGGATG -GGCGTCGCGTGGAGAGCGGCCTTCTTCCATCACTGCTCGCAACACGCGACCAGCTGATGG -AAGAAGTCGGGGTCCCGCCTGCTCATGCAGAGCGCATGCGCGAGATGATGGAGGATACAT -TTCTTGAATCCTTCCTTGCCCTCGATTTCCCATCTGCACATATTCTCCCAGAGGACACCC -CGGAAAGAACTTACTTGACAGAGCAACTTCAACGCCGAGGTATCTCCAGAGCTGGTATTG -AAAGAGCTTTGATCCGCTTCAACGGAGATGTAAATTATGGAGAGGAGCTGCGCCGGCGAC -GACAGAATGGTGAACGTGTCACCCTCTCAACCTCTACATTTCCAGACGTGTCTGCGCCTC -AACAAAATCCTGACGGCGGTGAGACGGTAGTGGATGACCAGAGTGTCTTCTCGTGGCGCG -ATGGAAACAACGATCCTTCCAATAGGGAAGGCCAGAATTTGCTCAACCTACTTTACCACA -TCGCTGAGGATCAGGCGCGGAGAGACGGCTATATCCATCGTGGAGTTACTTGCAACAGCT -GCGGTTCCATGCCAATCCAGGGTATCAGATACCGATGTGCCAATTGCATCGACTATGACT -TGTGTGAGACGTGCGAGGCAATGCAAGTCCACATCAAGACACATCTATTCTATAAAGTGC -GGATTCCAGCCCCGTTCTTAGGAAACCCTCGTCAATCGCAACCTGTATGGTATCCTGGGA -AGCCTTCAATGCTCCCGCGCAGCCTTCCACGACCTCTTGCCAAACGTCTTATGAAAGAGT -GCAATTTCGAAAACACAGAGCTTGATGCACTCTGGGATCAGTTCCGCTGTTTGGCAAATC -ATGAGTGGGTAAATGACCCGAACAAGTTGCACATGGCCATCGATCGCAAAACATTCGACC -GATGCTTTGTACCTAATACTTCTATCCGGCCACCGCCTCCCAGTCTCATTTACGACCGAA -TGTTCGCATTCTATGACACCAATGGCGACAATCTTATCGGGTTTGAAGAATTTTTGAAGG -GTTTAGCTAGTCTCAACAATACAAGCAATGACGAAAGACTGCGACGTGTGTTCCGTGGCT -ACGATATCGACGGGGATGGTTTCGTCGAACGCAAGGATTTCCTTCGGGTTTTCAGGGCTT -ACTATGCGCTTAGCCGGGAGCTTACAAGGGATATTGTTGCGGGCATGGAAGATGACTTCT -TCGAGGGTGGAGCTCGTGATGTGGTTCTTGGTAGCCAGCCTATCAGCTCTGCATTCCCTG -GCAATATCCCTGCAGGTGAACGTTCTCGCACAGGCGAAGGCAAACGCATGAATTCTGAGG -GGGACATGGAGGTCGTCGACAATGGAGGAATCTTGCGACAGGACGGAGATGACACTGGTG -ATCGACATTTGGTCGTGGGCGATGCTGCTGTCAGAACACAGTTTGGTCGGATGAGGCCAT -TGTTCCCATCCACGCTCCGCCTAGCATCTACTGAGACTCCTCCTACTCTTCGTCCATCGG -ATATCATAGAGGGTTATCGAAATGACGACAACGAAGAACATGAAGATGGCGCTAGCGAGA -CCAGTGGATCGGACCAATCTAGTTCGTCGGTGGCCAGTGGTACCTGGCCCCCAACTGAAC -ACATTCTCGAAGAAGATATCATCGAAGCCCTTGGGACCTATATTCCCGCTCGAGAAATCA -CGGACCCAATTGATCGGGCTCGCGTTGCGGATGCGGTTTATCAGAGAATGCGCGAAGATG -ATCAGAGACGCGTGTTTGACGCTCGTCAGCGTGGAATTGACGAACGTTGGAGGCGCCGAG -CGTTTTATACAGACGAAGAAGATGGCGCCACTGTTCCTGAGGGGTACGAAAGGGATCCAG -CCATCGATGGCTCCAGCGACGAAGACCTAGATGAGTCCGAATCGCGCCCACCCTCTTTCC -GTTCGCGGTCCTCGTCCAAGGTGCGATTCCAGGATGATCTTAATGACGACGAGTACGACA -TTCGATCTAACCCATCAACTTCGTCTCGGAGTATTCCTGTTGGCGAGCGATGGGGTGGCT -TCGAAATTCCAGAAGTTGAAAAGGATGTCGGCAAAGAAATCTTGTATCAAGTCACACAAC -AGGGATTCAATGAGCTTCTAGATATTCTGTTCAAGCCAAAGGAGGACCTTCTGATGGAAG -TTTATCGCACACGAGCTGAGCGCAAGATCTGGGCCCGCGAGATTGAACTCGCGGGGAAGG -AGGATCCTTCAAAACGTACCCCGCCTGGGAATGCTCCAGTTGAGGAAGAGCTTGAGAAAA -CTATCAATAACCGGCCACTTGATGATCTATTACAGCGCGCTGGCTACGCGATTCAAAGCC -CAGTCCTGGCGCCTGCCAACGCCGACCCAGTGCCTGAGCCCACCGTTTCagatgatgata -gtaatgaagacgaatatgaagatgacgatgaagacgaTGTGCGGCCAACTCATCTTGCCG -ACCCCGAAGGTGATGCTGAATTTGGGCAGACACAGTCCTTCGAGGATTCCGACACGGCAT -CCGCCGCCTCATCTGATTATGACGACGACGTCATTGCCTCTCAAGAAACCGACGTGGGAC -CATTACGTTCCACTGCTGAAAACGACACACTTTACTACGATCCAACACTCCCACAACATC -GCCCCCATGCAGAGATGACACCAGTACTCATCGATGAATCAGACTCCGACCCCGAAGAAT -CACCAATCACAAGCACAACCCCTGGTCTCCCTGCTCAGCTCCGACTCCAGCCCGGGATCA -CCTCTCTTCCCGCGCCATCTTCTCCCCATTCATCTCCAGAAGCTGAAGCCACAGCCCCTA -AGCTTCCTCCTTCTCCTATGCAACCACTCCCACCCCCTCACCGGGTCAATGACCAGCCTC -GTCAAGCCTCTTCCCCACGCCGACCTTCGCCCCGGACTCTGGCCCGGTGGGTCTATTTAG -ACCAGGTAGAACGTGAAAATAAGGAACGTGGGAACGGTGCGAAACTCAACTTTGAGGAGT -TCTCGCGCCGTATGGCTGCGGATCGGGGCCGCCGACTGGCATTTGTGGCCAGTTGGATTG -AGATGGCTAGCTTTTGATTCTGAGAAGGTTCCTAGATGAACCTAGGTGTACATTATCATT -GTAAATTAGCGGGATTTCTGAATTGATCCAGACGCGTCAACACAGTGACAATTTGGTCAA -TTACGAAAGGAAATAAGGGTCTTTATCCTAGTTTTGTAGGGAATAGACCGGTTCTAGTCC -ATTGTGACTTGTAGTGGCTCGGAATGCATATAACATCCATTGATTGGCTATTCAAGGGGA -CTTGCACCTCATTTCCCTGGCATACTAATATTCAGAAAATGTCTCGCAAACAAGTCTTTT -ACCCCGGGGCCGTGCCCAGCCATGAGGGCGTGCCCCCCTCATATTACGTGCTTTTGGGGA -CCAATGGCTCACTTTGCACCATTGTTATCCTCGGGATGTTGAAGATTACCGTCCAGGATC -CCCCTTATCGATTTGACTATAACTCGAAAGTTCTTCGACTGTCTCCAGTATACAAGCTAG -CACCAGTTATGCCCTCAGCTGCCGTGGGGCTCTACGGCCGAGAGATAAAATCAAGATATC -GCGATTATGATGGATCCTGACGTGCTGTCTTCTCTTGCTTCAAAAAAAAATAGGCGGCCT -GAGGACCCATCGCCATGCAGTACTAATAGAGACTCCGTCCGCTGCTGTAATCCATATAGA -AACCTCCCACATCATGGTTTTTGATCGCCTGGCCCGGTTTTACCGGTCATGTCTACTCCA -AATTATCATTGTTGGACTAGTCGCATTCTGTGAACCAGGCATATGGACAGCTCTTAATAA -TCTTGGAGCTGGAGGCAATGCCAAGGCAAGTCATATAAATGCTGGCAATCGGCGCGACTA -CCCCTAACATCCTTGCAGCCTTACCTGAACAATGCCGCAAATGCCTTGACATACGGACTT -ATGTCAGTAGGCTGCTTCCTTGCCGGTGGCGTCACCAATAAGATCACTGCGAAATGGACC -TTGTTCATCGGAGCTGCATTCTACACTCCTTATGCGGCTGGTCTATACTGCAACAATCGG -TATGGCAACGAGTGGTTCCTCCTGCTCGGCGCTGCTCTTTGTGGAATTGGTGCATCCCTC -TTATGGGCTAGTGAAGCTGCCATTGCAGTTGGTTATCCGGAGGAGGAAAAAAGAGGACGG -TAGGTTGAAACTCTGAATCATCTAGTCATTTGACTGACCGTTGTGTTTAGTTACGTTGCA -ATTTGGATGAGTATTCGGCAGATGGGGCCACTTGTTGGTGGTGCCATATCTCTCGCTCTC -AATGTCAACACTGCGCACGTCGGCAAGGTCACCTACACAACATACTTAGGGCTTGTTGCC -ATCTCATCCCTCGGGGCTCCATTTGCGTTGCTGTTGTCCCAGCCACAGGATGTCATCAGG -AGTAATGGCACCAAGATCCCTTATATGAAGAAAACGAGTCTTGCTATTGAGGCCCGCGCT -ATCTGGAGGCAACTTTCGAATAAGTACATGCTGCTACTCATCCCGGTCTTCCTCGCAGGA -CAGTTTGGTGTGACTTATCAGGGGAACTATTTGACAAGTATGGAATCTCCAATCCGTTTT -TATCGATCAGGGGTGGCGTAGCTAACCGGAGAATTAGCTTATTTCACCGTCCGCTCTAGG -GCCCTTGCGTCATTCCTAACTGCCGTGGTCGGCGCCACCGCCAACGTGGTGACGGGCTTG -TTTCTCGATCTGAAAATTTTGTCTCGAGGAACGAGATCTAAAGTTGTGTACATCTTTGTC -CTTGTATTTATCACAGCTGCTTGGACATGGAACGCCATCACCGAAACGAAACTCTCCCGC -ATGGCCGAACCACCCGCCTTTGACTTGGGCGATGGTCCATTCTTCAACTCGGCTTTCACT -GTCTATATATTCTTCCGGTTCTTCTATGAGGTGCTGCAGACGTATATCTATTGGCTGATG -GCCGAGATCAAGGGAGCGCAGGCAGACGGCGATATCGCGAGAACAACCGGAATCCTGAGA -TCGTGGGAGTCCATCGGCAGTACGATTGCCTACGCGGTCGGTGCAACTCACTGGGCCAAC -CTGAACCAAATGATACTGGGCTTTGCACTATGGGGCTTTACCATTCCATTCACACTTCTC -GCGGTGTTTGGCAATTGGAATGTTTCAGACACGCTCGAGGTTGAGGAAGAGACGGACAGC -AGTAGCTTGGAGGCACAGAGGGTGGTCGTCAATTCTGATGAGAAGAACTAGAGTGGATTC -TGGTTTTGTCATATAAAACTATATAGAGTGGTAGATAGCAAATGAATGATCCAAGGGATT -GTTCGAATATAATAGAATTTGTACCCTTGCCCTAAATATATAGATGAGATGAACAGCAAT -TTTAGTATTACGATATATACCTCCAACTCTTCCTATGCATCAACTTGTCGACTACTAATA -CTATATTTAACCTACCAATGCCTAGCTATTTATCAGTTTTGGGATAAAATGCTAAATCAA -GCTCTCGACATAAGTACATAAGTGGTATGGTACAACTTCGATGATTTACTTGATAATGTC -AAAATGAGACACCTATTCCATCAATTGGTCCAAGTGGCCGAACAGTTTCCTGGTCTAGCG -GTTATGATTTTCCGTTAACAAATGCGACTCGCAAATGTTCCGGAAAGGTTCTGGGTTCGA -TTCCCAGGGAAACTATTTTTTGCCCTTTGCCGTTTTTTTCCATGTCTTGCTTGATCTTAT -AGTCTTTTTTCTTTTGGCCTTGTTGAAATAGTTCAAATTATGTGAACCGACCTGAAGTAG -AGCCACAAGACTATACTGGACATCCGAGGAAATGCAGCCCAAAATCTGGAGTGTATGGAA -TTAGCAGAGGAAAAAATCAGAAGTGGACCACCCAAGCAAGTCAAATTAGAGACATAACAT -AAATCACAGACATAATCTCGTGCTGCATACAACAATGAACCGAAAGCATGATATGGTCGT -GTAGCACAAGTAAAACGTAAGAAGCACGGTAAACATATCAAGACAAACCATCCCGAACGC -CAGAATAAGAATTGAAATCAAACCGAGAAAACCAGCAAAACCACCGCCCTATTATTGATT -CCTTTTGTACCGCTAGCTGGCACATAGACCCGGAGGAGATGTACGAGAGCCGGACGATTA -TTTTCCTGCCACCCTGACTGGGGGTTGGTTTCAATCATCCCTTCACCTGGCCTGGGTACG -TAGGTGGGTGGGCTTTTGTCCTGGAAATCACTGAGGCTCCAGGGTACCTTTCCTCCAAGG -GGTCTGAACTTGACAGTGGGGTATTTCAAAGGGAGTTGTAAAAAAGAAGAAAAACCAGAG -GAACAAGAAAGCAGGAAAAAAAAAGACCGGACAATCCAAGGAGCTTCCATGAGAAGACAA -GCGCAAACATCGCAGCGGCATCCATGCATGGGCTTTAAAAACGGCCATTCTCCATCCAAT -GGATTCAAGGGTTTGCGAATCTTACACGACGGCTACGTCGCTCCACGATCAACTTCTCAG -TGTCAACATAAAGACAATCGAGACACTGCATGGCGCTAGAATCCCTAAGGATCTTCGCGA -CCTGAGTACCCGCAGAGATGAGCTGCAGCACGTTGCGGTCATCCTTGACTGGCCTAGACT -GGTGCTTGGCAGCATGGCGCATCTCCAGGGAGTTCTCCAGGACAGTGATGAGCTGTTTAA -TTTGGCGGTTGTACAAGAGATGAGCTGGGATGCGGCGTGTGGTAGCCAGCCGTTTTAAGA -GGTGCACCCATTCAGTCCACTCGCGGTCCGAGGGGACTTTGACGGGTCTCTTGTCCTGTT -CGATAGGAACACGGAACTGGTATTTAGCGATGAAGTCATAGGCGCACTGGGACAGTCGAT -CGTTGACCTTGTGGAGGATGCTGTCTTGCTCGCCGGCATCGAGGTTAGTAGGCAGCTGGG -GAAGCTCTTTTTCCTGGGCTGCCATAGCTTGTGGAGAAGGTGGCTCCAAAAACTGATGCG -GGCCATCAAATTCATCCCGGTTGGATGTCCGTCTCTGGGCATGATAGTGTGGGCTTCCTC -TGGATCTCCCGGCAGATCGCTGGCTGTTTCCTTCTATGACATATTCTGGGCTCTCCGGGG -GGTTCATTGTTCCAGGATCCCGCCCCAGGGATTCTCGAGGCAGGTGGTGCGGCTGGTTCC -ATTCATAACTCCCAGGTGGTGGATGAGCCTGTCCGGCTGGAGGGTAGGATGAACGGCCCG -AGGGTGGTGTGGGTGGGCCCATCATGTGGCTGTGGCTGTCCAGAGGGACTGGAGCTCCGC -GGTAGTGGGTCTGATGGTTAGAATCAATTGGACCGAAAGGCCCTTGACGATCGTCTCTGT -AGCCATACCCGTGGGAAGTAGAATGTGACCTTTTACCGTTTAGTAAAAAGAACAAAAGCG -AGTGTTGTCGATGATGGCTTGGAGGGAGTTTGCAGTGAAGTGAACGATGACCAGACAATA -CCTTTGATAGACCACTGGAGAGATTGCTGGAGCTGCATACATAGGGTGCTGGGCCATGGT -GCTGCTACGGAGCATGAGTTTTGGATTCCAAGATTTTTGACACGGAAACAGAGGAGGGAA -TGGCGATGCCGATTTTGCTGACCTATGGCGAGACCGAGTATCAACTTTGGAGTTGATAGA -GCTTGTACAGGAACTGAGACTGGCTGGCAAAGAGAACGGATAGACTGATCCAGCCGCCAG -TGGATGAGGAAGAATAGGAAGCATGAAGAAAGTTAAACGGGAAGTAGTATGCCTTATATA -AGAATTGGAAAGAAGATTTAGAGCGACGAGGGAAATAATATATCTATCAAGTGCGTTATT -GAATGAATTGTGAAAATCCGAGCAAAGGGCAGGTATTATAGTCCTTTCGGGAAAGTCACT -CTACCCGGGTTGGACGAGGTGAAGTGTGCCTGACCAAGTGGATCGGAGCCCTAGGTTTCT -GCAACGGCAAAGTAACTGGATTGAGAGCCGCGCTAGAGGACTTTACGCCAGTGGCGTTGG -TGGGATGAAATCTATGGAGACGAGGGCAGTGTTGCCTGAACATTGTTGATTTCAAGAAAA -CCAATTGCGTAACAGGACATGGACCGTCCCTGCCGTTTACGTTGAAGATCTCGATCGATC -GAGCATAGAACCGGTGTGCAAAGCTCGTTGGCAGCAGAGGCGCCGTGAGACGCCTCTGGA -AGTGCATAGCATGACTCCGATGGCATGGCAAACATTCTGTCCAAATCCTCGATCTTCACA -ACTTAGGGGCAAATCACCAGCACACTGGGTGCCGCATTATCTTCATTCTGGTTTGATACT -TGTATTGGAGGAGAGCCTCGGGCAGCAAAGCTGGTGTTGGGTACAAAATGCAAGGACCGG -GGGGGGGGGTCTGACCTTGGACATCATTGACCTATGGATACCCAAATGTGCAGATAGAAG -ATGCGATCTGCGCCTCCGAAAGCCTTAGAAGAAATGCAAAGATGAGGTGAGGTAGGTGTT -ACATGCAACTTCCGTGACGCGCGTAGTTAATCCGTGTTGGGCACTTAGGCTTCTTGGGGC -CCAATGCAATAAGCCGCATGGGCCATGTCTCTCTTCTGGGGATCCTTCGGAGGTTCGGGT -GAAACAAAGAGGGAATTCAATATCAAAGTTTTTGAATGTGGAGAGTTCAACTGTTTTGAT -GGCATAAGATCTACCCTTGCAACCCAACATGAAGCCCAATGTCTATGCTTTGAAGGTCAA -GGTTGCCTAGCAGGATTGTCATCGACCATACTCTGAATGGCAATAGGAGGTATGATAACA -TCATTGAAACTAGAGAGGGCCTCGCAAACCCCAGAAAACCATAGACAGTTGTATTTCCGG -GGTATCATTAACCCTGCGCATTTGCCAGGAAGCCGGGGATTCTATGTGGTTCTCCATCAA -GGTGACATTTTATTGTTTTTGCCGGTTTCCGCTATTGAAAACGGGCAGTTTTATGCCAAG -ACGAGAGGCATAATCCTATATTGCGTTGGTAAGTACAGCAATAGAGAGAGTTACATTTGT -GACATCTGCTCACCTCGCAACTCAAACGGATGGCTTCTTCGATACCAATACGCGCCTCGT -ACCCCAGAATATCCTTAGCCTTTTTGCCACTAGCATATCTAATAGCACATGCATCTTGTA -CACTTCCACGGCTCAAGGTTGTGGTTGTTCCCATCAACCAGGTGACAGTTTCACACGCCA -GCCCGGCGAAGTAAGCTAGACTCTTCGGGATATGCAACTCAAAAGGTGGGACGTGCCCGA -AATGCGCCCAGGTTGCCAGACAAAAATCACGGAAGACAATAGGCTGGTTATTTTGAATGA -AAAAAGCCTGGCCTGCAGCTGTGCGGGAGGATAACAGATTCTCTATGGCCAGGACGTGAG -CATCGGCTGCATTGTCGACGTGGGTGATATCCCACAAGTTAAAGCCATTCCCTATCAAAA -AAGGAGTTTCATATTTGGCGATGCACGCATGAATAGCTGGTAATAGCTGGTAGTCTCCTG -GTCCGCAAAGCACCGATGGCCGCAGGGCACATGTCACCATTTTGCTGCTCGAGGTCTTGA -GCACAAGTGTTTCTGCAGCAGCCTAGGTACCATACCGAAGCGTGTTAGTAGCTCATTTGC -TATCGACGTTAATAATATTGCTGATGGGGATCAGTGGTTCCAACCTTGGACTCTCCATAA -ATGGTTGACCTAAATGCCGAGGGCCATTCTTCATTGATGTTAGGGTGAGCGGTACCCGTA -TCATCGATCACCACGCAGCAACTGCTCGTATATACGAACACCTCAACCCCACTAAGCTTG -GATGCATCCAACATATTCCGTGTGCCTTCAAAGTTGATCTTCCATACCTCCTGTTCAAGT -CGGCGACCGAACCGTTCGGCCAGATTTGGGACGACTCCAGCAGTATGGACAACAACATCT -GGCTTCACAGCCTCGAATAATTCGCTCAATGTTTTAGTCGAAGTAATATTCACCTGCATG -TATGAAGTCCTCTCTTGTAGGATATACTCAGGTCGTGGGGGGCTTTGGTCGATGACGAAG -ATGACAAAGTTTGGGTGTTTTTCTGCAAGAGCCCGCACTATGGCCGAGCCTACAAATCCC -GTGCCACCAGATATAAGGACTTTCTTCGCTATATCGGTCATTGTGGAGGTTGATTGAGAG -TGAGGATATGAGAAGACTTTGATGTGGGGGATAGTCGGACTGGATAGACACAACTGTGCG -ACACCACAACTACATAACGACATCAAAGTTACCGCAACAACCCGAGAAGGACCTCGAATG -ATATTGAAAATGATACTGGAGAGAGGCCATTCGAATGGTCAGACCTTTTGCTCACTCATT -CAGATTCGAAGTTGGCTTTTGTTCTTAATGCCTAGACCACGGCCTAGTACAATCTCAAAG -ACGACCTCTGATTGTTGTGCTTGCTGAACATCGTCCAAAATCCCGCATCAGATTGCTCTT -CATGTCTGCCCCCATCTTGCTCCTCTTGTGCCTCACCTTCACAGTGAGCAGGCTCCAATA -GATCCAACACTTCTTCCAGTCCAGGATGAATTCAATAATCCTCGGCCAGGTCTTACAATT -CATTTAACTTTGACCACAACGATTTCGGACTCGACTCATCCTTTCTGATCGCTCGAACTT -CATCGTTCGTCAAAGAATCGATAATTTCTCGCATCCCATCGACAAGGGCAGTATCAGATT -GGAACTGCTTTTGCACCTCATGGCCCAATGTATCGTTGGAAGAAACCGTAATTATCTTGC -CAATCTTGCGTAGAACAGAGAGCCCATTGAAACGTGCCCGAGGGTTCGTGAAGGGACCAC -ACTGTTTCGTGATCAGTTGGATAGTATCAACGACATCCTGGGCCACCTCGAATGAGACAT -CATACTGCGCTCCACCGCGCATGGAACAACAGTATGTGACGTTAAGTAACTTCCATATTG -AACGCGAGTAACGGTCAAATTTAATGACGCAATTTCGCTCTTTTTTCGCTCTTTCTCCCG -AATGACGCGAATGGCGTCGTCCACCATGTTCATCACGTCGGGATGGATCTGCGCTGCTCG -CGTGAGAATTTCATCGACAGTCTCTGGTATGAGAGACTAGATACGCTGGGAAATTTTGTC -TGGTGACTGGACTGTCGTGTTGGTGTGGAAATCGCCAATGGTGTTAGCTCGCGCGCGTTT -CACGCTTTGAGGTTCAGCCATGATTGGTAGTTGTATGCCCGACGATCACTGAATGGATCT -TGGGAACAAAGATTTCACGAGATGGCAAGGTGATCTTGTAGGGTAAATAGAACAATGCAA -GAAGAAGGGAGTTAGAGAGCTTAAAACATGGTCCCAAAGATAGAGGGGGCCCAAACAGGA -AGATACGGCGCGGCTACGGGGTGGATCATGGGAATTTTATGTTTGTAGATGTCTTCCAGT -TACATATATCAGATTATGAGATTACACCGTCCAGAACGTATACCGTATAACCGATTGACT -ATTTTAACTTGCCAACTATGATCTGGCCATGGCCATTCGCAATGCTATTCACCTACATCC -CATCCATCCCCTAGCACTTAGGCCGAGTAGAATCAAGATATCCATACAAATACAAACTTA -GGCTATCAAGAGGGGGCCTTACTACTAGCTTGTAGTAAGCACCTCTATAGAAGAGAAGTA -CGGACATATATAAGTCATAATCTATACACTGTGTCACATTGAGGACAGCTGCAAAATCAC -CAGCAGAGCACACGGAATGCGAGATACGCAGACATCTAATATACAGATCCTTGACCCAGG -TATTATCCCAACAGTCACAAAAAGACTAATCTCCAAAAGAGAAAGCGCTGCGACTAACCA -ACAAACTCCAAAGTAAATGCAAGAAGAGAAACAAATTTCCCCGGTAAGTAGCTCCCCCAA -ATCAAGGACAAGCCGTTAGGACAATCGCAGACTTATAGCACACGGCACAAGACCAGCCCA -CCGAGCTTTTTCTCAACACACTCGCGAGCTTCCAGAATCCCTCGGTCTGTAGAGAGGTAC -AGACTCTCACCCGGCGAGCGAAGTCCGGCCACAAACCCGGATGCCTCGCCACGAACAACT -CGGCGCAGTCCCTCCACATCAATGTTGACCCGTCGGGTGGGCTTGCTGACCATCTTGATT -TTGCCCAGGACCGACTCGTTCTGCCAGTACTTCAGGCCCAGCCACAGCCGCCTTGATGCG -ACATTATCGCGCGTTATTGGCTCAATGCCTTCGACTTCATCATTGGCTGCAGGGTAACCG -AGGATGCTATGGGCTGGAGGAGGAGTCGGACCTCCGCGGACCACGGAGGAGATGTAGCCT -GAATTTTGGAGGGCGAGGGAGAGCTTCAGGTGAAGGTTGCTGTTAGGGATGGAGGTAAGA -GCGAGGCGGGCCTTGGTGGCATTGTTCAAGTGGGAGCAGACGGCAGAAAGGTTGACGAGA -GACATCGTGATTTCGCGACGGCAGCAACTAAGCTCTACCGGCGGCCGTTCGGAGAAGAAA -AAAAAGTGGTGGGATCGATTGATTGCGGAAGAGGAAGAATGACAGGAGATCCCAGATACA -CGAAAGGGAAGACCGAGAGAGCCACAATGGGGTGCAATTGTCTGGGGAAGTAAAGGCACG -GCAGGTGTTGTAGCGCCTGAAGATGCGATGAGGATGGAAATTTTCGGATAAATAGTATTT -CCGGGTCACGATCAGGTCCAGGTACCCGGTACGGTCTCACCCATAGCGGATATGTGTGAT -GACAATGTACTAGTACCGTCCTATCACATCACATCCATACATCTTCTAAGCGCCCAGCTT -GCATGGCGGTTGATGCAGGCGTTTCACAGGTTGAATACCCCTCAGTCTAAAATATAAATA -TTAAACGTACGTTGTTCTTTTTAATTTTCCCCTCCACTTACTGCCATCGTTGTATACTTC -CATTCTGTACTTCGAATTGTCAGGCAATAATGCCACCATCAGCGGGTGAAGAGCGACTTG -TGACTGTGTGAGTTGAGCTGACTTCCCCGCAGGTCGATAGGAAGCAGTTTTTGAATTGCG -TCTAACTGCCAGAATAGTTTTGCAGATATCCACTACTACTTCACAGAGCCAACCCGAAGA -CCTTTACCATATCACCACCGTTTTGATAAAGGCTCCTATGTTTATGTTTACCATGACGCC -TTTCAGAACAAGGCCCGCATAGAGATTGCCAATAATCCTGGATCTCCAGAGCAGGATGCA -TTCTGTGGAGGCTTGAACAATGTCCATATACGGCACTCTGCTCAGTTCCCGACACTCTGT -ACATTGACAGTTGATGCACACACGACCTCGCCCCAGCAGCAACATGCACATGATGCCCAT -CAGCATGAGTGGCGTCTGCCTAGTGGCGACCCACGAGATGACCCAAAGCAGTTGCGCGAT -TTCGCTCGCCTGCATACATTGGATATTTACTTCTGGACTCAAGAAGATGCAAACGACTTT -CTGGACACGGTCGTCCGGCTTCTGTCCAATGCCCAGGTTGAGACAGACCGAGAACCAGCT -CCACAATCACAGGAAACCATGAGCTCTGTAGTCCAGCAACTAGAGACTGTTGCAGTCTCC -GACCCGGCCTACCAGAACGGTCAAACACGCAACTCACGATCCGAGCCCACAACCACAGCA -GCACCAGTACAACAGGCACCTCCACCGACCAGCAGCTTCCCCCCACCTCCTCCAAGCGGA -CCTCCAGCTGATCAACGATCAAGCGCTGGGTCTACCCCTGCCGAGGAAAAGAAAGATCAA -GCTAGCTTTGCCCCGCTGCCATACAATCCGGCTGCTCCGGCCGCCCCAGAGCCAATCCAA -TATCGTGAGAAGACTCCACCGCCAGAGGACGGGATGAAcggcaccggccttgcagccgca -gtggcagcagacagcggcaTTCCATATACGCCACCGAACCAAATGATGGGGGCTGTACCA -GGCGTGGGTGGATTTACATCCCCGCCCCCCTCAACCCCAGGGTTGCAATACGCAGGCCCT -CCGGTCTCAGCACCACCAGCCTTTGCCTCCCCTCCGCCTAGTGTGGGACTACAGCATTCC -AACACTTTCCCCACCGCTCGATCCTCGCTTCACAGCCCCGGAGCGTCTGTCCCTTCATAC -CAGCAATCCTTCGTCGGCGGACAGGGAGCTCAACAGTACAACAGCGCAAGCCGGCAAGGT -TCAATGTCCTTCGCGCCGCCGCCGCAGGATCCGAATGCCCATCTATACGACCAGCAGGTT -TATGGGCCTGCCCAGGCCCAGTCGCCGCCTCAGTATCAGTCTGGAGCGGCGGTGCCGGTA -GGCGGATTCTCGAACTACTCCTTTGATAAGACTCAGGCCcaacagcagccgcagcagcaa -caccagcGTTCCGGCTCTGAATACGATATCCATAGCCAGTTGTACAGACCTACTGAAGCG -GAGGCTGGCTCCCACTATCAGAAATATGCACAGAAGGCCATGAAGAATCCTGGACAGCGG -CCCAGGAAATTGGAGGATAGAGCAGAGCGGTTGGAAGGCGGTATGAATAGGTTCCTGAAG -AAGCTTGAGAGGAAACTTTGAGCCTCCTTTTCGGGTGGGAGTTTGAGATTTTGCATCTTT -CTTTTTTAGAGTTCTCTGCTGGTTATGGCCAGCTCGAGTGCTTGTTATAACTAGGCCGGC -TGCTGGGCAGTACTGTGAGCATGGAAATCTGACAAGAACCTTGGAATATATTTAGTGTCT -CAATTTGATGAGAAAATGAAGTGCTTACTAGATAAAAACTAGAAAAATGGTAGTATGAAT -GCTCTGAGCCCCGCTATATTGATGAATCAAGATGCTAAATTCGATAATGCCCCAAAGCGC -TAAGAATAATATGCTCAAGAAATTGGTACAAAGTATGCAAGAGATAAAACAAAGAATAAT -CAGACATCCCAAAACACTGAAGGATTTAAAATGCTAGCATAGAGTAAGAAAAGAGACACG -GAAGAATGGATGGTAAAAAAGTCGGGGGCAAGCCTCGATAAATGGAAAGGGTCTCACGTT -TGGTGGGATAGAAGAAAATGGGAAGACTATGATCAAAGAAGATTAAAAGATTATTCCAAA -ATTTACCAGAAGCGGAGATGGGCCAGCGAAATCCATTTCAGATAAACATGGTGTAGTTCT -GATATAGAGCGCCTAGTGCAGTGACAAAGTGGCAATGTCGATGGCGACGTTGCGTGACTC -AGCCTCAGTCTCAGTTGCGGGCTGCGCATCGGGAGCAGGCTCAGTGGACTTGGCCTCAGT -GGAATTTTCCTCTGCGGCCTCTGCCGAAATCTCAGGGACAACAGCCTCCTTCACAGGCAC -ATCGTCTTCCTTCTCAGCCTCGGCCTGGACCTCTGCTGATTCCGGAGTCTCGAGCACCTC -TGGTTCGGACATGGACGTCTCTGGCTTCTCATCGACATTCACTAGATCGCCAGCCGTTGG -GATTGCCACGGGCTCTGCGGCAGTCTCCTCAGTTGTGGTGGTGGGTTCAGCAGCGTTCTC -CTCAACATCCTCAGCGATGGGCTCAATGAGGGCTTCAACAGGTGCATCCACCTCAGCTTC -AGCTGAAGCCTCGGGCTCGTTGGAATCAACGGTCTTAACCGGCACCTCCGTGACATCTTC -GATTGGGACGAGGTGAGAGGAGCTTTCAGCCACTGTCTCCTTGATGACCGGCTCAGGCGC -TTCAATCGACTTCTCGATTGGCTCAACAGATGCAATGGCAACTTCTGGAGTAGGCTCTAC -AACCTGGGCAGGGGAAGCGATCTCCTTGACTGGCTGAAGAGCAGCTTCGGGCTCTTTAAC -AACCGCAACCGGAGTGGCTTCCACTGTTTCAGGCTGTTTTGGGCTGGATGTGATGGGCTT -CGCGATATTCTCCTTCTCACTTTCCTGTTTCTTTTGGGAGGGCTTGTGTTCGTCTTTCTG -TGGCACAGGTTCCTTGTTTAGACCCTGAGATTTCTGAGTCTGAGGTCGTAGAGCGGTACT -CTTAGGCTTCGCATGGGGGACGCCACGGTCTGGTACCCTTCCAGTTGATGGCCTGGGGGC -CTTGGAAACAGATGTTGTCTTGGTAACAGGCTTTGCATCAGTCTTTTCCTGAGGGTGGGA -CTTATTCGCAGAGCTAGCAGTAGGCCGCATCATCCTCGCGAGGAAACCCTCATTGACAGG -CTTAGTGGTTGTATCATGAGTTGCTTGAGAAGGCAACGAAGGGCGAGAGGCTTGTCTGCG -AATGCTCGTACTAGGAGTAGTGGCACGCGATTGACCTCCAGTTGCGCTTTTGAGGGATGA -AGGTTTTCTTGTGAGGGCTGATGTACCGGTAGTAGTGCGAGTGCTCGGGGCCCCTGCGCT -TCCTAGTTTAACAGTAGATGCTTGGGTTGAGGTTTGCATCGAACCAGGTACACGGGCAGA -CTTCGGTGGGGACTTGGTTCCTGGCTTGTTGGACGCACCTGCGGTAGGTGGCTTCGATGT -ATCTTGTGTCGGTGCTGAGCCACGGGTTGGCCGTGCTGTTGCCTTGGGATTCAGTGAGAC -ACGACTCGGTTTGTGTGCCAGAGAGCTAACACGCTCTTTAGCCACCTCGCGGGCTGTGGG -CTTGGATGCTGTCTTATGAGCAGCAGGTTTTGCACCAGTCGAAGCTGCAGTAGTTGACAC -AGATTTGCTCGCTGCACTGCGGGCAGACCCAACGGAAGAAGGCTTCTTCTTGATCGCTCC -TGATGAGCTTGACTTCTGCGAGGGAGTGTCGGAGCTTGTAGACTTGGAATCCGCGGAGAT -GGGCTCCTCTGTCTGCTTTGGTGGGATGTTCCGCAACTGTGATTCAGTCTCAGTGTGGGC -TGCAGGTTCCTCCTTGGCGGTTTGATTCCGTGCACTCTTGGTATCGATGGCTTCCTCCGG -TCGACCCTCGGCCATTATCCGTCCTATAGTCGAGTTCTGCGTGTGGTCTAGGCGACTGAG -CGGGGTGGGGGAGATCATAGTCTGCCGGCCAGACTCCGCATCCCCCGAATTGATTTTTGG -TCCAAACATTGCAGAACTATCGCTGCGCCCACTTGTTTTCCTCAACGGCGAAGCAGGGTT -CGACTGTACGACCCCGTCCACAGTCACGAAGCTCGCCCGGACTTTGGACAGTTGCCGGCC -TGTCCCTGACGAGTCTGATGCAGCTGGAGATCGGCCACGAGGCGGCGGAGAGGCGACGGG -GCTTTGGCCTTCAAACTTGGCCAACAAGCTGCGCACTCCAGGGTGATGGGCGTTGTTATC -GGACATTATCGCTCACAGAGGGTACGTCAGTCGAACAAGGGCGGGTATAGAATCGAGGGG -AATCGAAAAACGCGTCTGCCGAGCACAGGGGTGCTCCTGAAATCAGGTCAAAACGGGATT -GTTGTGTCTAATCGGATACAGGCAGGTCCCACTGCATGTTCCCAGGAGGGGTTGAGTTCA -ACGCGTCAAAGTTGGGTAGAGAATTGGTATGAGACAAGAGGACACAGGGTGTGAAGAAGG -GCCGATCCAGAAACTATTACATACATATACGCTAGTTAAGCCTAGCGTATTCAATAGGCC -AATGCCTTACGTCGTACCCCGTATGTATATTGTACAGAACTCCCCATCTCAGGGGCTTTC -ATTTCTATGTGACAGGTGATTTCATCCTTGAGGGGTTTAATTAATTTTACTGTACGGAGT -ACACGGCGTACTGAAACCCCTTGTGAGGTCTGAAACATTCTCTCTGGACACTATTGCAAG -TGGAGCTTGAGTAAGGGGGTGAGGATGCCTGAGTCATCCAAAATCTCTCCGAGCCTGGAT -TTCCTCCAATTTGAGTCTTGCTTGTGAGATCTAGAGTAGTTCCAGTATCTATGTATACCT -ATATGAGCTAGGTATATAGCTGATAGTCAAGACATGTATCGTAATGTCGGGTATACCGAG -TTGACAGGACATCATTTGAAAGGGATAATAATGGATTTTACTCCGTACAAATCTCACAAT -TAACGTTGGAAGATATACTGGAAGATGTGGTTTTTTTTTTCATCATACCCCGTGTATGAC -ATGCAGATCACATTGGTAGTATAAGGCAAAAGGCCAATTACCTACCGTATCTATCGACTT -TATACAACCCTGGAAGCCCATGCAAGCTGTACCTTTGAGCCAATGAGAGAATATCGGGAC -ATGTCCGAGGCACGTGATGATACCAGTTCTTCCTATACGAAGCTTCTCTGACTCCTGCAG -CTCCTTTAAATTAGTATCATATATGGGGTTATGATTCATAATGGTTGGCGTGCCTCGGAG -CAATGGCTGCTCGCTCTGTGTTAAACGACGAGTTAAAGTGAGTGGATTATCTAACAAGTC -TAGTCAAAAAGTCCACGCCATATTGATAGCGCCACAGTGTGATCAAGGGCTTCCTGGTTG -TGCCAAATGCGATAAGTATGGCCAACCCTGCCCTGGGTATGATCGGGGCTTCAAGTTTGT -TACTGGGAAGCCGTATCGTGATCGGCGTCAACCCAATCCAAGGAATGAGAAAAAGGACGG -AACACGTCTTCAGAATGGAATCTCGAATTTTGAAATTGGTCCCGAGTCAATCTGCCAGAA -TATGGCCCAGAGAAATAAGCCATCCTCACTTATCTCTGTGGATCTGGATGTTGTGCAACA -TCTCTGTGTTCTCATTGACGATTTCTCTCAACCATACACTCCCAGTCCAACACATGTTGT -GATTAGATGGTTTGGGTTTCTTCCCTCTATTTATGGCCAAAATCGGGTATTGGATGCCAC -CATAAGGTCTTTTACCGCACATCACTTTGGGCGCACTAGCCAAAATATCCAAATGGTGTC -ATATGCCCGGTCAGCTTATGGGGAAGCCTTGTGCGGACTTCGCAAATCGCTGGAGGCACC -TGCACAAAGTTTATCGTCTCATATATTTTGTGCGGTAGTATTGCTATGCATGTATGAGGT -AAAGCATGCTCGAAGTAAGTCGTCTGTCAGTGACTGATCATATTTAGCTTTTCACAGATA -CCGAAAATCCAGAGTCATGGATGAAGCATGCCAAAGGGCTCAGCCAGCTGGTACGGATTC -GAGGACCTGATAGGTACAATACCGAGCTCGATGTAACGTTACTAAAAGCTGCTCGAGGAT -TGATTGTAAGTTTGACTCTTTGCCTTTAATATCCTTAGGAAGCATTATAAAATCAGTTGA -AACTTGGAACAAACGCTCATATCTGCACAGGTGATGCACTCAATGTTCTCCGGTGAAGAG -TGCTTTCTAGCCTCCGATGCGTGGCATGATAAGATGCGACAGCAATATACATCAGACTTG -CCCCCAGAAGTGCACAACCGCATCGAACAATTCTTTGCTTATTTCACATATGCCCCGAGT -CTTGTGCATAAGCTCTATAGTTTGAAGCATGTCGACGCCACCAGTGCTGAAGCTTTACAA -ACAATCTCAGAAGTCTTGTCCGAGGCCTTGGAGATGCAGACGAATCTGGCTATATGGTAT -GAGCAATTTTCTCAAATTGTACCTCCGCCAACCGAAACCACATCCCAAACCGGGGATCAG -CTATATCCGATTATTCTCACATATACAGACGTGAGCTATGCAACGATCTACTGCGGTTAC -TATTCTTATATGGTCATCATTCATGAAGTTTTGAACTTTTGTGGCTACCCGGGTCAGCAT -GAAGCCATGGTTGCCTATTACAGAGATCAAATTTGCAAATCAGTGGAATACAACAGCGTG -GGAGTAATGGGTCCATATCGAATGGGGTTTCCTCTCCGCGTGGCATTTGAAGTTGCAGAT -CCAGTTACGAGCTCTTGGATCTTAAAAAGGCTTGGACAATTTTCAAAGATCTATGCAGCC -GCCCAGCCAGAGAACTATCATACTGTCCTATGATCCCAAACCAAGTGATTAAACTATCAC -AAGTCAAAGCCAATTCCCAAAGCCATTCTCCAAGCAGTATCTGGTATTCACCACCAGTCT -CCGAAATTTTCAAAGTGAACTAGCTCAGTCCCACATCGATTCCAAATCATCACATATGGC -GCGCTTAGAAATGATCGACAACCCAACCGGTTCTGCTACAGCGCGTGCCGCCAAAAGCAA -GGCAACAATGATGACCTCCATCAGAAGCCCCATTCCAATCAAGATGCCGACATCACCAGT -GGCTGAGTTGAATTTTGGGGGTCCATACTCCAACAATAAGAAATAACTGATTCGAACCAG -CAAAAAGGGAAGAGTCAAAAATGTCAAAAGCACAGCGTGGTAACCCAACTTGGAAAGAAT -CGACCGGGATTTAACCGCAAGCCACCCAACAAGACCGGACACATAGGCGAATGTGACAAC -AAGAACAATGGATCCAGCTTCTCTCAATGCGTGCATGCTGATACAACCACCCACCACTGC -TAGCATGATGGCGATGAAAACCGGAAATCGAAAGAAATGCAGAACAAACCGGGTCTGTTT -GGTCCATTTCGAATTACCAGGACCAGTTTCTCCGGACTGCCCACTTTTGAAGAATGATCA -TCAGCAAGACTCTCAGGGCCTTGATATCTTCAACTTACCATCGCAATAAGACCAAGCTGA -CTTCGAATAAGAGAGGTGAAAGGCCCACGCTTTGTAAAATGGATTCAGCAATGAGCATTG -GCTTCTCTGGATGTGGACTGGCAAGCACCAGCGCCGAGCCTGCTATGCGAACTATGAAGA -ATTTCAATAGCCGTATTTTTGAAATTGTCGGAGTGTCTTCCACTCACTCTGTGACAGAAG -ACCGACCATACTCCACCAAGAATAGAACACACATCGTCCTATACTCTTGTTCTTACGGTG -GTGCCAGTAATGCCATTCTTGCATGGATCTTGTGCTGAACTGTGCAATGTGGATGAGGGA -ATATATCACAATCTCTGCGATGGCAGCATTCCGCTTCCTAGTTGATATTGGCGAATCCAA -CGAACGGGAGAGAAGACTGATTGCCATAGTGAGCTCTGAATTTGAAGAAAGATTTGATAA -TATGGTTTTGGGTGATAGTGAAGTATGCAGTCAAATACACCCACGAAAAAATGATTTGGA -CTACTTTAAATATATACTTTGATAATTTGGTTCAATCTCTGTCGGAGACTTTGTATACTC -AAAGCTGGTGGATCTACATCGCGGTAAGTGACCCATTTGGGTTTAGCGCTTATAGCACTT -GGCGTGGATGGCTCCTCGAAAGACTTGAATGGCTGTAAAGACTGAGCGATACGATGAAAT -GGATATTTTAAAGTTTAGCGAAGCTGTGAGAGGAATTTGCAAACACTCGATGTAGAGAAG -TGCAAAAGTGCTGTCCATGCTCAATTTCGCGAGAAGTGGTCCGTTTAGGTAGTCCGAACA -TGCATACTATACATACTCAGGCGCAACCTGGGAATGGGTTCCCGGCTGACTGCATCATTT -GACACTGTAACGGATATTCAAGCAGTATACTTATATGTTATATAATCTGTATAGATTCCA -TATCTGTGTGCTAGAAAGGTCAACTGGTAAATCTATGTACTCCGTACTCCGTACAGTGGG -TCTAAAGAAAGGGCGCTTCTCCGGACCCCGGAGTTAGTACTGGAGCAATAATAATCCGAT -GCCGTATACAAAACGAAAAGCTCCGATGACGGCGAATATCGATTTTAGGAAAGCAACTCT -GCAAAAAAGTCTATCGAAATGGCTGCTGCCCGGACACTTCGCATTGGTACAATCGCCCTT -GTTTTGCGATCTCGCGGCCCCGCAATTGACTGACATGGCTGGAATATCATAGGTCTCATC -CCCGGAGACGGTATTGGACGGGAGGTCATCCCGGTGGGTAGCTCTCTCAAGATTTGATAT -GCGAGTCAATTCACTGACTCAATGGTTCTTGATGAAATATAGGCCGGCCGTCGGATCCTT -GAGTCCCTCCCCGCGTCCCTGAACCTCAAGTTCAGCTTCGTCGACCTCGAGGCCGGCTAT -GACACCTTCCAGAAGACTGGAACCGCTCTGCCCGACAAGACTGTCGATATCCTGAAGAAG -GAATGTGACGGTGCTCTCTTTGGTGCTGTCAGGTAAGTCAGAAGATTCTTCTTTCAGCAC -GCGCGAGTTCAGCTGACAATTGCTCCCAACCCGAAGCTCCCCTAGCACCAAGGTCGCTGG -CTACTCGTCACCCATTGTCGCTCTCCGCAAGAAGCTCGACCTGTACGCCAACGTCCGCCC -TGTCAAGACCACCGCTGGCAACAGCAATGGCAAGCCCATTGACCTCGTCATCGTTCGCGA -GAACACCGAGGACCTGTACGTGAAGGAAGAGCGCACCATCGATACCCCTAACGGCAAGGT -TGCCGAGGCCATCAAGCGTATCTCCGAGAACGCCTCATACCGCATCTCCAAGATCGCCGG -TGAGATTGCGCTCAGCCGTCAGAGCATCCGTGACGCTGCCCCCGGCACTGCTACCCGTAG -CCAGCCCATGGTCACCATTACCCACAAGTCCAACGTGCTCTCGCAGACTGACGGTCTCTT -CCGTGAGACCGCGCGCCGCGCTCTTGCCGCCGAGAAGTTCTCTTCCGTCTTTGTCGAGGA -GCAGATCGTCGACTCCATGGTCTACAAGCTCTTCCGCCAGCCCGAGTTCTACGATGTCAT -TGTCGCCCCTAACCTCTACGGTGACATCCTCTCTGACGGCGCTGCTGCCCTTGTTGGCAG -CTTGGGTCTCGTCCCCAGTGCCAACGTCGGTGATAACTTCGCTATCGGCGAGCCCTGCCA -CGGCAGTGCCCCTGACATTGAGGGCATGGGTATCTCCAACCCCATTGCTACTATCCGCAG -TGTTGCTCTGATGCTCGAGTTCCTGGGCGAGATGAACGCAGCTGCCAAGATCTACGCTGC -CGTTGATGGCAACCTGGATGCTGCTCAGTTCCTTACCCCCGATATGGGTGGAAAGGCCAC -CACTCAGCAGGTGTTGGAGGATGTCCTGAAGAGACTGTAAAAGTAAACTTGTACATAAAT -GAAAGTTTGCAAAAGATATCTAGAGATGAATTCACTGTTTTTGCATTGACAGCCATTCAA -CTGTAAAGAGACACAAGCCCGGTGATTGTCAAAATCCATTTCTATTGCAATCGGTGCAGA -GGTGTTTGAGAACTACAACAGGCTCTGTTAAACCAAGAAATGTATCACTCAACATCGCCA -GACGATGGCTCCGCCTCCCAATCAAACTTCCCAAGACTCTCGTCGCCTTGCTCGCTCTTG -GTGTCGTCCTTCTTGTCTTCCGTTTCGCACAGGACCACCGAAAACACTACAGCAAGATAC -ACCCGTCCGGTTGAGACGTCCTTGAGCGTATATCGCAACTCATGATTATTCTCATCCACA -TACTTCATAATGGGCAAAGTAAATCCAGGGAGGCGGACAGAGAAATCTATCATGACCAGT -GAGCCGACGATCCCAATCAAAAGAGCAAATCCGTGCTTCACTCACCATTAAATGCCAAAT -AGGGATTGCCAAAATCAACAGAATACATCCTCCCAGGCTCAAAGTTGAACTCCTTCCTAT -TTGCCTCATCCTGGAAATGCTTTCTGCGCCCCTCGACTGTATCAGGGATATTAAATTGCC -GACGCACATCGGCAGCGTCTTCACTATAAGAGCCTTCTTCAATAATCTCATCATGGAGCG -TTGGCACGTCGTCTGTTTTCCGCATCTTATCTCCAATACGGAACTGGTCCCACGAGGCCA -ATGCCGGGCTGTACAGATACGGCTCATCAGAGTAGACATCGCCGTCTAGGCTAGGGTCGA -GCGTCCACTTGACTATCCGTAGTGCTGCGTTGAATCCTGGGGGTAGGATGTCACGAATTG -GATGATCAAAGTCATTGCCAAAGAAGAGATTGTTGCCGTTCACCGGCTCTTTGAAAATGA -CTGAGAAGCATATGGAATATAGGTCTCTCTGATGCAGAGGATGGTCGAAATATGGACATG -TGGATGGTGAATTTTCGGGATAGCCTATTTGGAGGATAAGCTCCGTTCAATGGCCAAATA -GATGAAAGGAGAGATATTACCTGTATAATCTTTGATCCGTACGCAGAGACTCACAATAGC -GTGTTCGTTTTCAATTCGAACTGTCTCGCCATTTACAGGTACCACTTGGTGATGAGTTTT -GGGGTCGTAGTCTGCACCTGCCGTTACCTTTAAGCGGTGCTTATTTGCGTTGGCATCTTG -GGTCACCATGCTTGGGGTGGGTAGTAGAGCGACTTGGCGAAACTTCGTGACAAGGCTTGA -TATTTTATAGCAGGCTGTTGTTGGAAGCTCATTTGGAGCGTGGTTAACGAATCTAATTTT -ATAGGCTTGACGCCCAACAACGTCATCGTTGTTGACCTCGGGATGACAATCAAGTATTGA -TAAGAGTGACAATTCCATGTCGGAGTTCTCACCACACCAAAGCTCAAGTAATAATAGATA -ACGTGTTTGATATTGAATAGCTCTGGACAATTTGTACCAGAGTTATCAGTGTCGTGATCT -ACTGCGTACTAATTGCAATGTCTTTCAAGTGCGTTGATCTAGGTCCATAGATAGGTATGC -ATTTTCCACAAATGAGAAGAATTCCGTAAATTTGCCAACACATCGCTGTATACTTATTGT -TATCATCGTGAGGACTTCTATTTGTTGCTCTATCTATATTGTAGTCGAAGCAAAGACTCA -GCCTTGTTCAGCCCCAGGGCTGAATCTAAAGGAATGACATCACGAAAACCTTAACCTGAA -ACTCAAATCTGATTGGTTGGAGCGGTACGTGACACCTAATGCGGTTAGTGCAATGAGGAC -GATCAACCATAAATGGGCGATGCCAAGACCTGCGCCTCGGGGCAACAAGATGTTTTCATA -TCTGCTTGAGCTATATAAGATGTCTAAATTTTCCCTCCAAGGTACTAATTGACTTACGGA -CGACTTACTTCTCTCAGCTGCTGCTAAATAGAAACGATGGCTTCAACGGATATCCCTCAA -CCCATCCATTCTCCTACTTTTAAGAATCCATTTTTGACGGTCGACTCAAACAATATGACT -TCGTCCAATACTCCCCAGTACATCCAGACTTCCTCCAAAACTCCCACTTTCAAAGTCGAC -TTCACATGGAAGAAATGGAAGGCTCTGGTCTCCGACGCCAACAATCCTGATTCAGGCCCC -TTATACACTGTCCATTTCCCATTCAGCTTTGGAACCACCATGGTTTTCAAAAAAGCTCCT -GATGAGGAAGTCATCGGGAATAGCAAGTTGAACTTGGTATCCATCAATGCCGAGTACGAA -CTGCGTGGACAGAAGGCCCATCTTCTAGCTCAGAAACGCTGGCGGACGGTGTACACCCAC -CGCTCGCTCAATTTCTCAGACACCGAATCCCCAGTCACAATGACATGGACTAGCGATGCA -GGTTTCAAGAACTGGGACTTTGTCTGTGTCGATGAGCAACAAATGCCTGTAGCAAAATTT -ACTGCTAATTGCTGGGGTGTCACTAAAATTGGCAAAATCGAATTCAGTGGGCCCAAGGCA -GATAATAGAGCCGCCCAGGAAGAGATTATGGTCACTGGAATTACTTTGTTTTATTGCATG -ATGCTTCGCTGTAACAATGTGTTCAATCTCTTTGGGGCTATTTTTGCTCGGCCGGGGCAC -AAACAGCATATTGAGCCTCAGCCTAAAGCTATGACTAGCGAGTAGGGTTTGCAAACTGCT -TGACCGAGTCTTTTGTTGTTTGAAGTTTATTTTTTGATTTGAGGACAAAGAGATACACCA -TTTGTTTTATAACGTTTATGATACATATTCGAGAATATTACTCCATAATACATTATTTCG -ACTATATAATTGTTTCAATTATACACAAATTCGATGCATCTTTGTCCTTGATTCACTATT -TTATTCTGGTCCACCTAGGTACCTGTCACAAGATACTATATCTCAATCAACGATCAAGGG -AAAGGAACATCCTGACCAAGCGAGCGGGCAACCCAATAGCCTACCGCCTCGCGGACAATG -TTATTACTCGCCCAGTTGCGGCTTGGATACACACGATGGGCCAGCATGACCATAAAGGTA -TCTGAGGCCCGGTCAACAACAAGTGTAGTACCAGTAAAGCCCGTGTGACCTCCAGCCAGC -ATATTGGCCATGGGACCAGAGAAATAGTACTGGTTAAGCTCGAAACCGATACTGTGCTCA -TCACCAGGGAATGCGGTATTGAAATTTGTGAACATCAAGTCCACAGTCTCTGGCTTCAGT -ATCTGCTTACCGCCATATGTGCCGTTATTTAGGATCATCTGACAGAAGATCGCCACGTCC -CCGGCGGTCGAAAAAAGACCAGCATGTCCGCTCACACCATCAAGAGCCCAGGCATTCTCG -TCGTGCACGGTTCCTCGCACTGGCTGGGGACGTTTGGGTTCGGCATCTCCCAGAACTTCG -CTCTGATATTCCGTCGGCGCCATGCGTGCGTAGTGACGGAATCTAGGGGATTGGTTGTTT -CCCTTGTTGAAAAAGGTGCTGCTCATACCGAGGGCATGGGTGTATGAATAGACATACTCC -TCGAATGGTGTTTTGGTGACCTTTTCCAGCAAGAAGCGCAGGTTCATGAAGTTCAGGTCT -GAGTATGTGTAGGTAGATCCAGGAGTGTTTTCGAGCTCATGCTCGATGACAGCTTTGACG -CGCTGCTGCATAGTCTTGTATGGTGCTTCATACAGACCAGGCAGAGGGTCGGGAGGGAAG -CCACTTATATGGGTCAACAGCATCTGGACAGTGATGCTGCTCTTCCCATTGGCTGCAAAT -TCAGGAAGGTATGATGCAACGGTCTTGTCCAAGCTGAGCTTGCCCTCGTCAATGGCACGC -AGCACGGCTACCGCAGTGAATACTTTAGTCAGACTGGCCATATCGTAGATTGTGTCCATT -TTGGCCTGGGTCCGTTCTGAGGGTGGGAGCAATGTGCCGTTGGAGTCTGCATACAAATTT -GTATTTCCGAAAGCGAAGGAGCTCACAATAGTGGCCTTGTGGCCGATGACAACAGAGCCT -CCGGGCTGAATGGGGTGAATCACGTTGTGCGAAGGTGCTCCATAGTTGGCAGGAATAGTG -TACTGGCTCAAGTTCTTTGGTAGCTGGTGAAGAGGTTCGGCGAGAAGTCCCACTGATTCA -GGGGAGCCATGTTTGAGGACTCGGTCTGCGGCGCTACAAAGCGAGCCGCAGCTTGCAATG -AGGCAAATGGCCGAAGACTTGAGTCTCATTGTAGAGTGTGAATGGTACGGGGAGGCATGA -GAATTTCGGGTTCCAGCCTGCAAATATATATGAGTTCGCGTCCTGTTATCTGGAGATTGT -TTATAGATTAAAATGCGACTCAGCGTGGTAACCGTTTCATCATTTCTGGATAAGATCAGC -TCCGGGGATGGGGTAATGCTCCACATATAACCCGTTTAACAATCGTACGGAGTGCCTATC -TCCGGGTTCTCCCCGACAAGGGAATAATAGACGACCCCCGCAAACTTGGAAGATTTAACC -CATATGATATCTGGAGATGGGTTGAATGCCGCAGAAAATGATATAGAAATTGTGAGGATA -TAAAAGCCAAGACAAAGTATATAAGAGCCAAGACCGAGTATATCAAAGCCAAGACCGAGT -TAAGAAATAGATAAATAAATGACAAGACCATTGTTTACAAAAGCTAAGCCAAATCTGGCG -CGGGAAGCTGAATGTTGATATGGGATCCGATACCGCTGCCGACATCCGAAATGTCATGGT -CCAGTTCTTCGGTATCCATACGCCGACGGTTCTTGCCACGGCCGATTTCGATATCAACAA -ACCGGGCACTAACAGGTGTAATTGGTTCACTTCCAGTGCCGATTGTAGTGCCATCTGACA -TAAGGTCGTCCGATTTTAATTCCCAATTGGCCGCGCGAACCTTAGCTCTGCGACGTTCCG -AGTTGACAGTCTCCTTCATGTGGGCGAACGATCGCACTTGATCTGCGGCCCACTTGTCCA -GAAGCTCATCTCGAGCCTCGACTTCTAGCCGCAGCTGTTCTTCAAGGCCAGTGTTAAGAG -TTTGAAGACGCTGCACTTCAGATTTGGCGCCATGTAGCGATGTGGTGAGGTCTGAGATGC -GAACATTGAGAGTACCGGTCTCTGCTGCGTGCGTTAGAAGCTCACTAGTGTTCTTCTCTA -GACCCTTGACGGCTCTTTCATATTCTTTTTCCAAAAGGTCCACCTGGGCCATCAAATCGC -AAGCTCGAATTCGATTGTGTTCGACTGTTTCCTCCAACTCCTGGATGCGAGACTCAGACT -CGGCAGCAGTAGCCTCGGCGGAACAGCGTTTTGAGCGTTCATAAGCCACTTGTTTCTTGA -GCTTAGATATCTGCCGAGTGTAGTTCTCCTTTGTAGCCGTGTTCTCCTTCTCAAGGTTAT -CGATAAGACTTTCGAGACCCTTCACTTCATGATGATACTTGTCGAGGGCTGTATTTAATC -TATCAATCCCAACGGCCTGTTCTTGGCCTTCACGCTCGAGGTCTTGCATTCGCACCCGCG -TGTGAAGCATATCACCCGCAGAAGATGCGATTGTATCCTCGAGAGTCTCAATCTTTTTGG -CAGCTGTCTCATATCGGCATAGTAAAGCATCGAATTGCCCTCGAAGAGCCTTTTCTCGGC -CGAGAGAGCCGTGGTGATAATGGCGTTCTGTCCCCAGATCTTTTGCAAGTGACTCCACCC -GTTCCACCAGCGCACCTAGGGTTGCTTTGCCGTCCTTAAGACTGATATTGGCAGTTTCAC -CTGGTAGCGCACGCTCAAGCCCGAGGCGAGCTGAGCGGAAGTGGGTCTGCATGTCAGATA -TGACATCGTCCACGCCGTCCCCGCAGAATCCCAAGCCGGAGCATTCTTGGGTGACTCCTT -CAAGAGCCTCAGCAGCATCGGAAGCACGAGACAGAGCATTTTCTAAGATCTTCATAACAT -TCTCGTAGAAGTTCGACGGCGGAGAAGACTGGCTGAAAAGCCCACTGATGGTAGGGTCAT -TGAACCCTGAGATTCGTGATCGACAGGCATTGAACAAGTCGCTCTTTTCTTGTCTGGCAG -CTTCCAAATCCAGGCTTAGGGAGCGGATGTCAGATGCCTCTGTGTTGGCTAAAGTAAGCA -CTTGTGTGGCACGAATGGGCGTTTGGGGTCTATCGGATAGCATCATGCTATCAGTAACGG -GTGGATCATGACTCCGAGCTACACGGAAATCAGGAGAGCTGGACATAATGATTGCACTGT -CATTGAACATGGATGCCTCTGCATCATAGGTTAAAAATGGAGCAGCATCATGCTGACTGT -GATTAGAAACCACTGACACTTCCTCGCAAGATCTGGGTGTGTCGGGCGTCATCCAATAAG -ACTCAGCCACCGAGGCTGGGTCGTCAAAGCCCATTCCACCTCTTTTCATGGCCCTGAGCT -CATCTTTCAGGGAATCTCGCTCTTGAAGCAGAGCTTGCAATGTCTTTTCCATCTTGCGCG -TGTCACGCTTTTCGCGCTCAATGTGATTGATTTCATCACTCAACCCGATTCGCCTGATTC -TTCTCTGGGTCCGTGTATCCAAGATTTGGCGCAGCGGAGTAAATTGAATCCTTCGCTCGG -TGCATGTTTCATCGAACGGCTCCTCTGTGTCATATGATCGCTGGAAGCGTGGTGTTGGGG -CAGACCTGCGTCGGGCTTTGCGGCTAGGTGTGCGATCCCCAGATCCTACCCCGAAACATG -TCCGACGCATTGCTGGAGTCAAACCAGTCGACCCAATATCAGCATCATCTAATAACCGAG -GACCTGGGTCGGAAAACCGAACTCTCTTTCTTGATTTCTTGGGTGTAATAGATACTGATT -CTTGCTGTCGATCGAGGCCGACATCTTGTGGCACCGCTGCCAAGCTCTGTCTAGTCCGAC -GTGAGACTGGAGAGTGGCTTGTTGAATCCGAAAGAGAGCTCTTCCGTTTTCTTCCTGACC -TAGTGATGTCCATGTTGGAGATGGGAGTATATTGAAAGGATTGAAGTAAAGACACAGGAA -CATCCAATAAGCAAGAGTGTACTGCGTAGGGATGGGCTTATAAAAAGAATTCTATAGATC -ACAAATTAGGATGTAAACAAACAAAGTGGGGCTCTATAGGCAACAGGCGCCCAGACGCGT -GCCTGGATTCATATTTCCTATTCCGGCAAGCGTAAACAAGATGCTTCTTTAGGCAATCAG -ATGATACAAGGGCAGATGACCGCTTACGCGCCCTTATGATGTCTTTTTAACAGGGCTGCT -CAGCGGTGTTTTCACCAGGTTTCTCAGTAGGCTTCTCAGTTATTCCTCTCCAATATCTTT -AAGATAGTTTCCCCCCTTTACAGCCTGTAGACGGCAGAAAAAGGCGGAGAAAACACAATT -TATGCGAGAGACTTTCAACTTCTGATGATACACGCGTGATGTTTACATATTCAACACACG -TGACGTATGTTGTGTCTCTTCTCAAGATATATCCAAAAGTATAGTTATGAATCCAATCCA -ACGATATCTTTATTAGCTACCTGGAGGATATTGGGAAGTGTATATGTATTGGCCACGGTT -ATGTGTACGGTATAGATCAGAACTACTCCGCACTGCCACACCACCTTATCGATAGCTTCC -AAGTTTCGCTCCAAATACGAAACTGGGAAACAATGCAGGTTTCTCCAAAACCCTGCAATA -TTGACAATATTGGCAATATTAGGCTCGTCCTCTTTCCTGCTAGGCTCGCCTCCCTTCCGT -GTGACCCTCATGCATCTCCCCTCTCCTGAGTCTCCAAGTGATGTCATTCACAATCTGAAG -AGGCTCGACCATCTCACTTATCTCCTGCAAATCTCGATGCATCAGAGAAAAGATGGCGAT -TAGTAGCCTTTTCAGCCAGGTATGTCTAAGTATTACTCTTGATGAAAATTATTGTAGACG -GCCCTGTACTCACATTTCGATCTTACAGGAACAAAGCAACGATGGTCCCTACGACGAGCC -AGTTAATGATGGGCAGATATGTCCACCCCCAGGTCAATTGTCATTCCTTGACAAATTACC -CGCTGAGATCATTCAAGATATCGCGTCTTATCTCTCGGCTCCAGATCTCGCCTGCCTCGG -TGCCACTTGCCGAGCCCTCGTCGATCATGCATCGAATGACTTCCTATGGGCTAACCTGGT -GAACAAGAGACTACCTGCTCCTATCCAGAACCCAGGCCCCTTCGAGTCCTTCCGTCGTCT -TTATCTTGCATATCATCCCTGCTGGTTCATTCCTCAGCATAAAATTTGGTTCGCGGACAC -TGAACATACGGGCATGCTGATCATAGCTCGATATGACAGCCACCGGGGTGTGATTGAAGC -ACACCAAGTTGTTGCAGAACGCGGTACTGCTGGATTCCAAATATGGGCGTCAAACCCAGA -AGTTATTATCCAGTCTTTTGAACCCAGTGTTCATCTAGCGTTGGATAATCCTGTCTTGTC -TCTCAGTGACCCTGATCCATCCTCCCGAAAAGCCCCAATCCAATCCCTTCACTATATGCC -AGAGGAACGACGCATGATCCTGGCTTCGGACGCTCGAAATATCTACACATCACTGTCGTT -TTGCTCCCCATTCAGCCCCCGGGGACCTTGTTTCAAACCCAATTTACTTTGGCCGCCACG -AACCATCCCGAGCAGGGCACGCACAGTTCGTGATCTGAAGGACTCAGCACCACCAATAGT -GAGACATCCGTCGGAATTATCAGAAACGTTCTTCCGTGTCAGGAAGTGGGGAAACCCGTG -GGCAAACCCAGAGCTCCAATTGTCGCCGGTGTCAAAGAAAGCAAGCCTAACCTACTCAAC -TCTCGATCCCGTTCTTTACACACCCACTGCAGAGAAACCGTACCAAGGAATTTGGGTTGG -AGACTATGAAGCCCATGGGTGTGAGTTTCTGCTTCTTCTCCAGAAAGAGATAAGGGACCT -AGCACACGATGATGAAAAAGAGGCGAAGGTCATCGATGCCGAGGACGAGGTCGAACATGA -CAGCCAGGAGGATATTGGACAACGAGGTACATTACACGCCGTCAAGTTGACGGGTGACTC -CAATGTCCCTCGTGGGCAATCATCGTTTATTGCCGAAGACATTGGCCCCAGAGGTCTGGT -CAGTGTTGCAATGGATGAGCCTTTTGTTGGCGCCCGAATTGTGCGTTGCCGCGGCCATGT -AGCTGGACTAGGGTTTCTTGACGGTAAGCCTTGCAGATAAACTAAATCGCTCTTTTCTAT -CTTTTCTAACATGTTATTAGACACCTACATTGACTCCCAATTGATCCTTATTTCTCATGA -CCACATGGCTCTTTACTGGAAGCAGATAGGCCATGTCTCCTATTACCACCGCGTGGACAT -TGATGCATTAATGCATAGTTGAGTGGCTTGTGTGATTTTTTTCATTTATCTGTTACATAC -CCCAATGTTTTCATGTATTGAGTATGGAGTAAACGAAGGATGAGTTTCCTTTTTGGGTTA -GTTTGGTCTTTTTAGGCTTGGCGAATGCTACGCCGTCACATAAGGATCTCCCCTTCATTG -TTCTTCCTATAAGTCCTCTATTACAATGTACGGTGTACTTCGCCCCCTGCAATTGTATCC -AATCGGACAGCCCGCTTTCCCACCCCGCCTACTCCTTGCTTTCAGTCTTTCAGTCGCAAA -ATGCGGCTGAGTTGTCGCCTCACATCTTACCTCAAGTTCTTGGGATCTGTATAATCTGGC -TTGACTACGATTTGAAACGTGGGAACAATTTCGTTACAGTTCCTATAATACATATTATTC -AGTGGTGCAATATATATTATCCTTCCACCAGATGCTCACTTATTCCACAAAGCTCAGATC -ATCTTGACTGTCCCGTTTTCCTCGAAGGATTTACCTCCCCGATCACACGAGTCATTTCGT -TGCTTGTGCACGTCCTGCTGCCTCGTCCGTGTGACCCACTTCCTTCGGTTTACGCTGGTC -TACTCCCTACTATAGGCGGGGAAATCTGTGGGGGAAACCGGAGTCGGAATCATCAGTGCG -AACCAACCGGGGCCTAGTTGCTCTCTTTCTCCTCTGTTCTCTGGGCCGAGTCGTATAAAC -CTCAGACTTCCCTGATACCATCAAAATGTGCCGTTGCCCGACTATCGGTAATACACCAAG -ACAAGCCAAGACTTGAGGAACAGGTGCCAAGAGAGCTTGGATCCCGAGTCCAAATGATCA -CCTTCTTTTTCATTTGACGACGTCGCCGCTGTTTCGAGGGTATCTTTTCAATGGCGCACA -ATGCTCCTAACACATGGCAGCAAGTATTTCCATAAGTAATTTGAAAGGTGACCTGCATCC -TTAAGGCTATTGCACTGCAGGCTCGCTGCAACTCAACATTTTTCCGCAAGGTCATCAGAT -GTCTTCCCACACAATGTCCAGTGCCAAGATTTTCGGTCTCTTCGGGGTCCAACTTTATTG -AGGTTTCGGAAACCATCCGAAGGGGCGGGCAAATGAGGATATCTTCAATTGAAGTATGCG -TCATATATGCCCAGTTTGCAGGGATAATAGAGTGGCCTGGGCACGAAATAAAGCGCCATT -GATGTGAGTTTTGGAGGTTATTGGCCCGCAAACCACCACCCGTGCTTGGCATGGGGTAAG -AGCTGGGGTACGGGCTGCACTCTTTGATACATGGCCGATCGAACTCTATCCCTCGTGGAC -AGCTAGAGTTATGGGCGTATTCTTGTCCGAGCATCGGGGAATTGAGGAATGCGGAGTGAT -CAAGAATAAAGCTCGATGCAGGGCTGTATCAGCCCTCGTGGCGCAATATTGAAATTTAAG -AGATCGTGCATGCCCGTTTCCCGCATACCTTTTTTTTCCCTTACTGCAGGATCTTTCTCA -CACTTTTCTCTCTTCTATTGCCAGCCTCGAAGACCCTTGCTGAGCTCATCGGCTTCATCC -GAGGTGTGCTCTCTTGAGCTTTACAGCGCTGCCACATTGTCTCTTATACAGGCTCATCAC -TTTGATCCACAATTGACCTGTTCATTTTTTCTTCACGACCAGCCTAGAAACTACAAATAG -CATCCTTTATCATGGCAGGTGGTGCTGTTGTCAATGTCTTCAAGTTCAACACAGGCAGTT -TACCTAAGGAGACACTCAATATAAAGCTATGGTTCGCCGTCTTTGCGTTTGGTCTCATGG -GTGCTGCCCGTGGTGTTGACGAGGGTCTCATCACGGGTGTCTTCAACTCCCACTCGTTCA -AGCAGTCTGTCGGTATTGATGACCTCAATAAGGGTGACCTAGCTAGTATCAAGGGTACTA -TTTCCTCCATGGTTCAACTTGGCAGTATTGCCGGTGCTCTGTTGTAAGTCTTGCTACCAA -AACCTATTACCGTCGGTTGTATACCTGAGAGTCTTTGCTAATCCCAATTGAATATCAGTG -CTTTCGTTGTTTGTGATCGCATCGGTCGTGTTTGGGCTACTCGCCAGCTCTGCTGTCTCT -GGATTCTCGGAATCGCCATCTTTATTGGAAACAACGGCAACATGGATGCCGTCTACGCTG -GTCGCTTCGTTGCTGGTCTTGGTATTGGGCAGACTTGTGTCGTCGGTCCCATCTACCTTT -CTGAAATTTCCCCCGCTCCCATCCGTGGTCTTTGCACTTGTATGTTTACTGGTGCTGTGT -ACCTGTGAGTTCAGCCCTGTTGACTCGTTTTCAATTACTAATGCTAAGTACAGTGGTATC -ATGATTGCCTACTTTGCCAACTGGGGTGCTCAGATTCATATGGCAGACTCCTTTAACCGC -TGGGTAAGTGCTGGCCACTACAATCATTGTTGTGCTAAGTAGGCTAACGCGTCTTCCTCT -ATAGGCTGTCCCCACTTCACTCCACCTCATGTTTGCCGGAATTATCCTTCTTCTCACCTT -CTTCCAGCTGGAGTCCCCCCGGTTTTATATCAAGCAGGGCAAGCGCGAGAAGGCCCTCGA -GGTCCTCTGCAAGCTCCGTGGCCTACCTGCTGACCACCCGTACGTTCTCAACGAGATCTC -CGAAATGGACATTGCCTTCCAGGAAGAGATGGAGGCCACCCTCGGCATGGGCTGGAAAGG -CCTGTTCAAGGAGATCCTTGGCATCAAACGCAACTCGTATCGTCTGTTCCTCACCAATCT -CGCTCAGAACATGGCTTGCTGGTCCGGTGGCTCTGCTATTACTGTGTACGCCCCGGATTT -GTTCACGCTCGTCGGCATCACTGGCCAAGAGCAATCCCTGTTCTCGACCGTTGTATTCGG -CGTCGTCAAATTCGTCTCTGCCATTATCTGTGCTCTCTTTCTCGTCGATATGGCCGGCCG -TAAGCGCTCTCTCATTATTGGCATTATTCTCCAGTCCATTGCCATGTTCTACATTGCTAT -CTTCCTCAATCTCGTCCCCATTGCCGAAAACCCCGACTTTGTCCCCTCCGAGACGCAGAG -CCGTGCATCGACCGCAGCCATTGCCTTCATCTACATCTCTGGTGTCGGTTGGGCCTTAGG -ATGGAACAGTGGGCAGTACCTGCTGTCCTCGGAGCTGTTCCCTCTCCGTATTCGTGGTAT -CTGCTCGTCCATTACCATGGCCATGCACTTTATCTGCCAGTACGCTGTCAACCGCTCTCT -GCCCGAGATGCTGCTCGAGCATGGAGGTCTTGGCCCTCACGGAACCTTTTACTTCTTTGG -AGTCATTTCCGTTCTTGGTGGCTTCTGGGTTTGGCTATTTGTGCCCGAGGCCGCAGGTCG -CAGCCTCGAGACTATCGACAAGATGTTCGACCTTCCCTGGTACAAAATCGGTCTTCATGG -AAGGAAGTTTGCCGAAGAGTACGACCGTGAGCAGGAACGGATTTACCACGATGAGAAGAA -GGATGGAGGAGTTGTTGTTTCCCATAAGGAGACCGCGTAGTCTTTCTCAGCTAAATATAA -TTATGTGTCCCTTGGAGGGACAGTTTTACTACATGCCAAATCTGAATATGTCTAAAAAGA -TAAACCGAATCGAAAAGGTAATGAGCCCTGTATCATGTTACCCCGCACCCATATGATATA -TTTCAGTCTTGGACTTTTCAAATTCCATTCCTATCCAGGTACGTAGGCATATAAAACAGG -CTCCTTGAACTTGATTTAGTACCCTCTCTTCATACACCCAATTGAACTTAGGGAGAAAAT -CGAGTTTATACCCACTAATGGCGCTCCCAATTTACATCACACTTTGTATTCACACCCCCG -TCGACCGACATCACCTCCCATCGTCTGATCAACCGCTTAGAATTCAAATTGAAGAACCAC -TGGTCGCCATACAAAAGCTCCTGCCACATATCTCGTGACATCTGGAACCTCGAGACCGAA -AATTTCCATAGCTAGCAGGCTCAGACCTAGCACGATTCACTTATCAAAGTATGTATGGAA -GGGATTATTGTCCAGAAGTCCCGGGCGATCTAGTAGTACGGGACGAGTACTTGGGCTGGA -TGCCAGAGAAGCGTCCGGAGGAGTGAGTTCACATCAATATCTCACGGTGGGAGCAGATAG -CTCGAAAATACTATGTATGGCTAACAGCCCCTCTCCTTTGAACCTGACTAGGCTGGTTGA -CTATTATGGTGTGACGTTTGACCATATCGTGACAGCTGATGATCACAACCCTGAGGTGTT -ACAGAGTAATATCATTGAGATTGAGGATGACAATGGTGTTTCTGTAAATGAATGGCTCTT -ATTTGCAGTAGATCCGACGGAGTTCATTGGGAAGAAAGTATTAGCAATGCCGAGATGTTG -CCAGAAGAGGAAGGGCACACAGGATCACTGGCGAATGAATGCACTTGTGGGCCAGAGGCT -TCATGGGCGGGAAAAATTGAAGGGAGCTAAACAATAGGAAAAAAGTACCTAAAAGGCCAG -TTCAGTTTTTACAAAAATGACGAATTTGCTGTCGAAGTTGTGCATTCTTCAGTTATGTTT -GAGTCCAGCTATTCACCTCTATTTCCACACCGCAAAAGATTCCCCCCTCCTTCCCGCCCT -GTTGATAGTACGCCTGTCGCGTCGCTATCTCGGCCGCGCTCCGACATTTCGAGTCTGTGG -GCACCAGGACCGAGGGATGAACGATTGGGACTATCAGGTAAGGTCTGTCAAGTCTAAATA -TTGACTGAGCAAGAGCTTACTCGAGGTATTCAGCCGGCGCTCTGGTCAATCCTCAGGATT -CACATCAGAAATCTCCGTGGTTCGGGAATTTAGGATATTCCATGAGCGGCTGCGTGAACT -CATTACATCAATAGGCACCTTTGTCCCGCCGTAGTCATATGCCAGTCTCCCATCACCACT -GCGGGTGAAACAAGGTCTTTGCATATACTTCATGTCATGAGAACCTTTAGGCCTAAGGGA -GACGAGGAGAAATGTGAGCTTCTTGGAAGGAAGACTTTGTATCGCTGTTGTGTATGCTTC -CTCACTCTCGGGCACAGTCTCTAGGATGACCTTCTTGACTTCTTGAAAGGTATCCCGAGG -TCCTTATTGAATGACACATGAAACCTCACTTGTTTCGCATTCTGTATTTCTCGAGTCTCT -GGGTGGAGCAATCTCTTACTGGTGGGTCATGTTGCTCCCGATAAGTCCTGTGGCATAGCC -AATTTGATGTGCTGTTAGTTTCTTCGTCCTCTACCAAATGGCAGCATTGTCCTCAGTTTC -TCGCATTCTTGAACTAGTTGAGTGTAACCATCCAACTAATCATTGAGTCGACCATGGCGT -CAAGTCATAAAAACCATGCCGCGGGAATTTCAAGCTCTTCTGGCGACTTCATTATCCAAC -TCTGCATTTAATTGGTCGTGGTGTAGTGTCAAAAGATTATGCCGTAAGAACATCAGATGG -CGAGgaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaagTGGCCTGA -TATTTAGAGAAGCGAGGAAGGCAAACAAATCAATGTGATTGTCCTTCATCTCAGAAGCGT -GGAAGAGAAGAGAGTTGTCTAAGACTCAAGAAAAGTTACTACTTTAGGCATTCCATCCAT -CACTTTTCCCTGTCAGTCTATCCTGTTATCTCCCCCTGTTCCTCTAGACTTTGGTCAAAA -GCAACACCCCTTTCCATATCCTATCAAATGTCATTTCATCCAAGATCATACATTAATTTC -CCATCAGCCATGGAGACTGAAGAGAACAAAGAGCTTGCAGCCCAGCCTCTTGCCTTGCCC -ATCGTCGAGGGAATGCTAAATTTCTCGGGGAATATCGAAAGACTCTCAGTCCCTCTTCCC -GCCTCGCCTGTCATTGAGCCAATACCAACTTTTCCTGAGGGGACCAAGCGACCCTCAGTT -CCCCCTCCTCCTGCCTCAACTACCATCCAGCCAATTTTGCTGTCTCAACCTAGCCGAGGC -CTATTGGAGGAACTCAGTGATCATAGCGAGACACTAGACACACAGCTTACCAATGTGTCG -CCTCGTCAGCAGTCTCACTACCCCGTGATGAAAGAAGCTTTGAAAGTACTTATACACAAC -ACCATCTCGCTTGTGGAGTACGGGAACCAGGTGCTATACAGATGTGGATATGCAGAGGCT -CTGATTATGAGTGGTGAAAGCCTCCTCCTTGGTCTCTTGGATGAACATTTGATTAAAAAC -ATTTTGATAGCTTGTAGAGTGGGTTGTGCGCGACGAGCAGCTGTTGCTTGCTTCACAGCT -TTTGCTCGAAAACAACTGGCCACGCCTTCTTTTCCATGAAAAAGATTGGAATGGATGCCC -TAGAGCCCCTGAATACTGGGAAGCACAATACCTGATGCACGATCTTGATGGCCAGGGCTG -GATGCGTGTCCATCTTATCCCTTTGTCATTAGTTGGTTTCACTCTTGAGGATACAGTCGA -GGTTCCCTCGAGCTCCGCCTCCTCACACCCAAGCCTCCTCATTACATGTCATCCTTGATA -AACTATCTCTTGCAGCTCCCGATTGGCCATAACAGCAGAGTTCGAATTGAGAAGGACCTG -ATGGGTTTCATTTCTGCTTACATACTCAAAGATGGGCCTGCTGATACAACTACTTGGACC -CATCTCAATAACCTGGAGAGCGACGAGGACTACCAAAAAAGAATTGAAGAAGGCATCCAG -TTTATGAAGACATGGGACTGGGGTAATATCGAGGAAAGGCACCTAGCCATAGCCGAACGT -GCGGTCCAGGATTGCCGCTACATCGATACGCTGACGGACGTTCATGATGGGAGCCGCATG -TTCTGAATTGACCTTGCGAGGCTGTAGGAGCTTTTGATGCTCCTCGTTAAGAGACGCAAT -TTTCTGCATGGGTGCCTTTTCCATCCAACTGATCTTTTGATCTGAACATTCTTTTACGTT -TGAAGTCTCTTCAGTGGTTATCTCCTCCATAGATTGAGATATCCATTGTATGCAGTCATG -GATAATAGCTTCACGTATTCTTCTATCATTTAATTAATTTGCATAATATAATTCTACATC -ACCAGCGGAAGTATCTGGGTGCGATATCAGACCATGTTGCAAAAGCCGGGTATTTTCGAG -AATTATGAATTGCTTGACACGCTATATAAGTCACTGTACGTGACATATTTCCACTGCCGG -GTGGGATTTCCTATCTTAGCTGCCGCTTTTACGAGGGCACCTATACCGCTGCGTGCCTCT -ACAATACCCTGAAACACTGTGATCTTATCTTGTAGTCTTCTTTGAATACTTGAAGACTTA -GTTTGGCAGTCCACCATAATTTGATCGTTGTGAGACAAAAGAGAGATAGTATATTGTGCC -AGTGATATTTTCATTCCACCGAAAGGGCTTTGGGGAAATTATACTAATCAATCTATGTCA -ACAGGCTAATATTGAAAAAAAAGACTGCCCACAAAAACCAGAAGAAAAAAATTCTACTAC -ATATCATTACACACAGGTAAGTAAAAAGGATGCGAAACCGTCTAGAGCAACTAAGCCACC -CCAGACACAAACCAGGGGCACCTCAAACACAGATCTAGATATGCCACacaatgtacaaag -accttgacaagagcaagacaattacaCAGCTGCACCGATAGAGACATTCTTAGAGCCTTC -TTTTGCTGCATGGGCGTCAATTTTCTTAAAGTTGTTGAAGGGGCTGAAACATAAAGCGAT -TCCCGCCGCAGCGATCGAAATGGCGTAAGCGACTCTGAGACCAGTCATGTAAGCAGGTAA -AATAGCAGTCATCTGCTCAACTGTAAAAACGTCCCGCAGCTCGCTCGCACCAGTCGAGAC -CACTAAGGCTGAGTTGATGCTTGGTGCGGTTGCGGCAAGTTTGATGATAATCTGGTTGAC -GAAGGCAGCTTGTGCTGCAGCCAGGAAAACCGCACCCCCGACTGTCTGAAAGACTGAGAG -ATGAGGTGTGTTAGCGGGTATATATCAGATAATCAAGAAGCAGTTAGGTAGGTAAACCAC -TCACAGACGATAATACCTGTCACGGATGCAATATCACTGGGAACCACTGCACCATGCACC -GCATTGATGGGAACCTGGTAGGCTAATCCCCATCCAACTCCGCCGACAATTTGGTATCCA -ATCCATTTGCCTGTGCTTGTATCGACGTCCAGCGAGTATAACAGCCCCGAACCGATGGTT -GCAATGACCCCACCAACAACCATAAGAGGAGCAGTATGACCAGTTTTGGTGATGCTTCCA -CCAGAGACAATGATAGCTAACGACATCGAGACAATCAAGGGAAGGTTTCGTACACCAGAC -ATAGTAGGACTGACGTTTTGAATGCTCTGGAAGTAGTAAGGAAGGTAATAGACTAAGATA -AAGTACGAACCTGCGAAAAGAAACGCGTAGATTGAATTCACAAGGACGAGGCGGTTTGAC -ATCAAGCGCGGCGGAAAGGCAGCCCGTTCTCCTTGGAACCATTCCCACGCCACGAAGGCA -ATAATCATGAGAGGGAAGCCGACAAGAAGACCGATCACAGTACTGGAGTTCCATGCCTTC -ATCTGTCCACCAAACTGCAACGGCAAGATATATGAAATGATGGCACAGATCAGAAGAATC -GTCCCAATAGGATCTATTTGCAGGAGCCTTTCCTTGAATGTAGCCTTGGTTGTCGAAGCA -CTATTGGGTGTATGGAAGAAGAAAACAATGCAAGTGGCTGCGACGCCTCCAACGGGCAAA -TTGATGTAAAAGCCTAGATGGAAAAAGAAATAATAAGTCAGCGTAGTCCAGCGTCCACTG -AAGAGCAACTACTTTATTCAATCAATTAAACAGCAGAAATACTCACACCACCTCCAAGTT -ACTTTATCAGTGAATGCACCACCAACAAGGGGGCCAACAACCGACGCTATACCGTAAGAC -ATGCCAATAATACCGGTGAAGAGTGGCCGCTTTTTTGGACTGGCAATGAAAGCGATGATA -ATGAAGACTCCACCCCCAATGCCAGCTGCCCCAATTCCAGCAATTGCACGACCGACAATC -AGTGCCACGGAATTTGGGGCCACACCACAAATCAAGCTTCCAATCTCGAAGATGAAAACC -GCAACCAGGAACGAGATCTTCAGCGGGAAAAATTTGTATACCTTTCCCCAGGTTGACTGG -AAGCCTCCAGTAGTCATAAAAAAGGCAGAGCCATACCAACTGGCATCATCGAGGCTGTGG -AACTGATCGGTAATTTTTGGAATAGCTGTCGCTACAATCGTCTATCGTTGAACATTAGCA -TTAGATGGTGATGGAACTTTGAAGAGAGGCTCCTCCTCACTGAGAAATTGTGAGACATAC -CATGTCCAGTGCAACCAAAAAGATGCTCATGCCAAGAGCCGCGACAACAACGAGTAGCTT -GAGGCCATGAGGATACTGGTCTTCGTCGTCCAGGTCCGATTCTTCAGTATCTTCCCTTTC -CGGTAGGTAAGAAGTCTCTTGGTCATTTGGAGCATTGTGGCTCTTGTTCTCTAAATCGGT -CACAGAAAGAGTATTGGTTATTGATGACTGGACGCTCGATTCTGCGTTATGTCGCTCTTT -TACCACATCGAACGTTGAAATCATTTCCGTTAATAGTAATGGACTGTTACCAGTCGATAA -TTGCAGAGGAATAATGCCCACGACACGACAGGACAAATGGAAATAAATAACAGTAATTTC -GCACGTAGAAGAGGCCCCTTAGAATTTACTTAATGGATTTGAAAAGCATGTATTACAAAT -GACCGCCGAATTTGAGCTTGTCCCCGGCGGAGCTTCTTCTAGACGTTTGCTTGTCCGTCC -GACTCCGGACGGAGGAACGAGGAAGCAAACAGTCACTAGCCTGCCGCAGCACGTTTCGAC -CGACGTACAGTGGGACTCGGGATACGGAGGCCCCCTGGCGGCAAGTAATCCTCCGTATTC -GGGAAATGTCTTATGGGATATAGGACATAGATCCGTCTAGTCTTTAAAGTGTCTGATTTC -TGATTCTATATTATATTATTACCAACCACCTAGGTAGCTACTAGTAGTAGTCAACATTGA -CACCTCCTGCCGTATTACTCTTTCCTGATCATGGTGTATCAGAACTCTCCCCCGGAACAA -AATCATTCACCTGCGCCGACTTGAACAAAGGGTCACGTCATCTTTGAACGAGATCGTCAC -AATATGGTACGAGGCCAATTCTGCTAGTGAATACATTTTGTCGGGCAAGAGTAGTACAGC -TAGATGACTACATTAGCTTCGTGCTAAACGGCTATTTGTAGTTTAACTGTTGGAGATATA -TCCTGAGATCATCCTCCTTGGCCCCGAATTGTCTGAACCGCCAATGTTATTTCTGATTTG -CCAGTCATTCTGCGTACCTTGTCCAAATGGGTATCCTATTCCATTTCACATTCCACGCta -aatgtaagatgtaatataaatataaatataGAGGGGCTTTTGTCCGCCGCATGTTGCATT -TCGCGGCGGTCGAATGACATCCGCTGAGTCAGCTGTCCCCCGCCGACGGTGCAATCGCCC -TGTTCACCTTCCATCATCTTTTACGGCCTCGACTCTATGCATGGATACGGATATGGACTA -CGAGATCATTGCCGAATCGGAGGCGAAGCGGAGGAAGATCCGCAAAGGGACCAAGAGTTG -CTGGGAGTGTAAAAAACGCAAGATGAAGTGTGTGTATGCGGACCCTAGCTCTCCAGCCGA -TGCCGAAGTCATCTGCATTGGCTGCCAGCGGCGCCATTGTAAATGTGTGAGCCAGGAGTT -TGAGTTTGTTGATGGAAGAGAGATCGCAGGGCCTCTCGAGAGGAAGGGACGGCACCACAC -GAGCGGTAAAGGTAATGATCGTGTTGCGAGGGTTGAGGCATTGGTTGAGCAACTGATCAG -GAAGGTGGACCGTCATGGGGGAGTAGGAGTACCTGCAACTAGCGAAATTCCAACCCCCAG -TTATGGAATCCCTACTCCAGATGTTGCATCAATAGACCAAGAATCTTCACGTTTCTTGTC -GTTATGCAAGTCTTCGGATGTACGTATATATTTTTCGAAATCCAGTGTCTGATGAACTAT -CACTAATTCTATCAGAATCATGGCCCGGCTGAAGCTGAGGGGAAATATGAAAGACTATCT -CAAAGATTGCACGAGTCCTTGCCCTCACGACAAGTTATCGAGGTGATATACAATGCCCGC -GGTAGAAAAGCCATCCTTTTTTATGAGATGTTAAACGCTCCATATACTGCTCTCAAGCAG -AACGGCCTCAAGCCGCCAGACACTTTACTCGAAATACCTAGTCCGAGTTCGCATCCAGTG -CTGATCGCAAGACATATGCTTTACATTGCTACCTTCCTGCAGCATCTATACCCTAATTTT -CTTGGGGAGATGAAGTGTTTGCCTGAATCAATCCCCGCCCTGATGGAGCGATTAGCAGAA -ACTGTTATTAATTTGGTCACGACCAACGACGAGTTTTTTGGCAGCATTGAAGGATTGGAA -TGTGTTATGATGGAAAGCATGTACTACCATAATGGTGGAAATCTACGACGAAGCTGGATA -GCGAACCGGAGGGCTATGGCTATTGCGCAAATGATGAACCTCCACCAAAGTCAAAGCCGA -GTAAAGTACAAGGTTCTTGACCACAAAACGAAAGCTTATCCACAATTCATGTGGTTTCGA -ATTGTCTCCCTCGACCGTAATCTGTGTCTCATGCTGGGTCTTACACAAGGCTCTTTTGAT -CAGAGTATGACCACTGGCACAGCATTCAGAGATGACACTCCCATGGGCCGCCTCGAGCGG -ATGCACTGTATTCTCGCGTCTCGCATATTGGAGCGCAACGACTCTGACCCGTGTGCTAAT -GACTTTGCCCTGACACAAACCCTCGACTTGGAGCTACAGAAAGCGGGTAGAAGCATGCCT -AGCAAATGGTGGCTTATGCCAAACCTAGACAGTGTCACAGATGATCCACAGGCTCTCTTT -TTAGATATGGGACGGCTGTTCAACCAGTTATATCATTATAACCTGCTCATTCAGCTCCAT -CTACCTTATATGCTTCGTTCAACAACTGAACAGAAGTACGAATATTCCAGATTGACCTAC -GTGAATGCCTCAAGGGAGGTTCTTTCACGATTTATCATGTTTCGTGGCTATAACAAAAAC -AATTTCTGCTGTCGGACAGTTGACTTTTTTGCACTCATGGCCGGCATTACACTACTTCTA -GCGCATCTCGACAGCCACCGGTTTTCACAGACCAGCAACCTCTTAGCGCACCAATATCTT -AGCGATCGTGCCATGATAGAGCAGGCACAGGTGAACATGGAACAGATTAGTCGCGTTAAT -GGAGATACTTTAAGTGCCCAAAGTGCGGACTTATTGCATAAGTTGCTGGCCATTGATTCC -GAAACAGCAGATGGCGTCAGTACGGAGAGCGTCAGCGTTCAGACACCAGAAAGTGCAGTG -TTGCAGTCAAAAGACAACGACAGCGGCATTGTTCTGGTGCAAATACCTTACTTCGGTACC -GTCAGGATTGCCCGCCAGGGCGTTGCTTCCAGGGAAATGGCTAGCGCACAACCTTCTTTA -AGTAATGACCAACCCTACGCTCCGGTGATTAGTATGTATAACACCGAGAGCTCGGAAGCT -ACAGTGCGTGATCCTAGCTCTGGTCGCTTCGATGGGAATGCAGTCCCAAGGTCAATGACT -GAAGCGGATTACAACTCTGTCGATAGTGTTGATGGTCTGCCTGCGCAGCTCACATCGCAA -TATCAGGACACCGTTTCTAATAGCTTCCTGCAGAACTATCAGGATCCTGGGCTGACTGCC -AGGGTGGACGATTGGGCATTTCAGGGTGTCGATATGGCTTTCTTTGACAGCATAATGAGA -GGCCTCGATGACGGTAATGATTCAGAAACCTGGGATAATGGCCTTTAATCTTAAAGATAT -AGTAGTATGCTATTGATACCAAGTGGTTTGCGTCCCAACCGGTCAATTTATTGAATTGAC -AGTTTGTTCAACAATTCACCTGCAAGTTCAATCATGGTGCATGGAACGATGGTGCATGGA -ATCCTTAACAAGTATCTTGAGATAAAAATTCGGCGTCTAGCACTCCCTAGTCCAAGAGAA -GACGCCATCCAGGGTGGCTATAAGTGAGCCTAAAATTCGGCGGCCAGTAATCAAACGGCC -TCGCGCTGATCAATTTTGCATCTATGATATCTTAAACCAATTCTCTGGTACAACATATCA -CGTCACTGCCTATATCAAAGAATATCAATCTTCACACAAAGTAACTTTGGGCCATACAAA -CGAGCAACTAAGAGATATAGACGTGGTTAAAGTGATATAATACTCCGTACATGAGTAGTG -AACAACGATGTAGCCCACAGCGAAACTCAACCGAAGCTCGCCATCTACGGAGATTTTGGA -TGGAAAGTCAAGAGATCGCCATCTCCAGCTACAAGCATCCAATGTCGACCCAAGTTCAGA -TATGCATGAGCAAGATAGTAGGTCATTGGCTTGCATTTTCTTGATGATAAACATCTTGAC -ATGCCTGTGATTTGTGTTCATATCTACAGACAAAAGCTTGATAAAATCGTCTGTTAACCT -CTGACTGAATTTGAGTGGTGATATAGAACAGGCAGAAAGACAAAAGGTATCAAAAAGAGT -CAAGACGCCATGACCTGGAAAGAATTGGTCGAAAGTGGAGATTGAAGCGCAGATAATAAT -TGGTCATCACTCATCAATCCAGCTTTGAATCATGTACTTGTGGTGTCGCGGTGATTTGAA -AAGACACCACACCGTCCACAACACAGACTCAATCTGTAAATACTGATATTGTATCATATT -GCCATAGGGCTAAACGTCGAGGTCAAGGCCCAGATTCATGCCCCCAGCTCAGTCCTGCGG -CGATTTATGCACCCCTATGAGTTGCCAAGTGTCCCGCTTCAGACCCCGCAATCATACATC -AAATGGCCGAAAGTCGAGTCACGAGGAAAAAATCGCCGATTGCCTCCCCGAGTTGAGTAT -GTCGGAACCAATCACCGGCGTCGTTCGAACCAGGCAGACGAGCCCATCACAGCTACTGAC -TGGTCCATCATCCACTCGCCAACACCAAGGTAGCTGTCAACCCATTTCTTAACATCTTTG -TTATCTGTGATCTTGTCTGAATTAAGGCTGGGGCTAGGAAGGTCCCACGCACACACGACC -CAACGGGTGTGCGCGAGGGTAAACAGAGTTAGCCAACGATCCTCCTATGCATATCGAGAG -TGCTCAAGATGAGTCACGAGCGGAATGAATGAAAGATTGGAAGAGGATCGTGAGGGGTAG -GGTGTATTTACGTGAGGCCCGTGGAGCGCTTGACACCTGCTAGGAAGGCGGAGACTAAGG -TTGAAGACACTAGTTAGTTTGTATTCCAAGTTCAAATAGAGACATGGGACTCACAGAGGA -CCGCATCAAATGCGTAGTGGGTAAGACGGCCGAACTAGATTGCACCGAACATCCAAGTCA -GTCATGGCATGCTCTATGTTTGGGGGACCTCGGTTGTCCAGGATCTACCAGCAAAGGGCT -CTTACCATAATGCCAATTAGAAGTTGTCAATACTGTGGATATTGAGTAACCGGTAGTCAC -CAAAAAGCTTGAGGGACAAAAGTTAAGAGGAGATCCAAGCTGTCATGGAGACCGCCCAAA -TAGCGGTCACGTGTAGATTCACATGACGTCTAATCCATAAAGAGGCTGGAGATGAGGCTG -GTTAGAAAGTGAGAACCTGACATAATCCACCATGTGGCTGGGGCCGGATCCGGCCATATC -CACCTCAACACTAGTCCAGATTCGGTCTATCGATAAGCGCTCTCATCCCACCGAAATCCT -TTTTCGCCTCTGGGGTCTTTGGGCTGGGGCCTCGATCAATTGGGTGTCTTTATTTGTCAC -GTTTACATAGTATTCGATAAAGGGACTCCAATGTACAGCGCGTAGGCCAGCACGGGGCAA -TCCTCCGCTTTTTTGCGGTCAGCCCAGTCTGGAGTTGATGTATGATTTTTTTTTCTGCCT -GGAGTATTTATGTCGCCTGTGCCGACACCTCCGGCTTCCAGTAAATATATACGAAGTATA -GCTAGACAAAGAGAAAGAAACAAGATGAATCAAGAATACATATTCCACCGAATTTCCAGG -TTAATTTAGTATAGATATGGCTTAGTGTCAATAGTACGGACAAAAAAAATGTCTTGGGAT -GCCGCCGATACTCACACCGGCGGCCAACATTTTCCCTCCCGCCCCTCACCCATCCGTTTT -CTCTTTCCCCTCCATCTCTCACCCTTTCTCTTCTACCATCCTCTCTGACAATTGATCAGA -ATTCCCTTACCTTTTCTACATTAATTCAACAATTGTTGATCATATCAAAATGGCGGCCAC -CAGCTCCCTCGATCACCTGAGCAACCGCATGAAGCTGGAGTGGCACTCCAAGCTCAACAC -CGAGATGGTGCCTGCGAAGAACTTCCGCCGCACCTCCATCATTTGCACCATTGGTATGTC -GCACCATTCATGGAAAATTCTTTCAGATCGAGTCTGTCAAAGGAGATGGATTGAGAAAGG -GTTCGATCACTAACTGCAATCGATCAGGTCCCAAGACGAACTCTTCTGAGAAGATCAATG -CTCTCCGCACAGGTATGGCAACAAAAAACTTCCGACGGAAATTGGCAGCTTTGTTGGGTA -GTTAGACTGATATGTATGGTCATAGTTGGTCTTAACGTTGTTCGCATGAACTTCTCCCAC -GGATCATATGAGGTTAGTATCTTCAATCAACATCCGATGAAGACCGTAGCTGACTTGTCT -GCAGTACCACCAATCTGTCATTGACAACGCTCGCGAGGCTGCCCGTATTCAGACTGGTCG -CCCTCTCGCCATTGCCCTCGACACCAAGGGCCCCGAGATCCGTACCGGTAACACTGTCGG -TGACAAGGACTACCCAATTGCGCAGGGCACCGTGCTTAACATCACCACCGATGAGGCATA -CGCCACTGCTTCCGATGATAAGAACATGTAAGATAATCCCCATTTCCAAAGGGTGAATTG -TTCCAGATTGGACAGTCTCTCAATGCATCCGCGTAGCTGGACGAGCTTGACTTGTGACGT -GGACTGACAAGTGTGTGACGGCAGGTACCTGGACTACAAGAACATCACCAACGTGATCGC -TCCTGGCAAGCTCATCTATGTTGATGATGGCATTCTGTCCTTCGAGGTGATTGAGATTGT -TGATAACCAGACCCTTAAGGTCAAGTGCTTGAACAACGGCAACATCTCGTCCCGCAAGGG -TGTCAACTTGCCCGGCACTGACGTAGACCTCCCTGCTCTCTCGGAGAAGGATATTGCCGA -CTTGCAATTCGGTGTCAAGAACAAGGTCGATATGGTCTTTGCCTCATTTATCCGTCGCGG -CAGCGACATCAAGCACATCCGTTCCATCCTCGGCGAGGAAGGTAAGGAGATCCAGATCAT -CGCCAAGATTGAGAACCAGCAGGGTGTCAACAACTTCGATGAGATTCTTGAGGAGACCGA -CGGTGTTATGGTCGCCCGTGGTGACCTCGGTATCGAGATCCCCGCCCCCAAGGTTTTCCT -TGCCCAAAAGATGATGATCGCCAAGTGCAACATCAAGGGCAAGCCTGTTATCTGTGCCAC -CCAGATGCTGGAGTCCATGACATACAACCCCCGCCCCACTCGTGCTGAGGTCTCCGACGT -TGCCAACGCCGTCCTCGACGGTGCTGACTGTGTCATGTTGTCTGGTGAGACTGCCAAGGG -TAACTACCCTTGCGAGGCCGTTAAGATGATGTCTGAGACCTGCTTGCTCGCCGAGGTTGC -CATTCCTCACTTCAACGTCTTTGACGAGCTGCGCAACCTCGCCCCTCGCCCCACTGACAC -CTCTGAATCCATTGCCATGGCTGCTGTCAGCGCCAGTCTGGAGCTGAACGCAGGTGCCAT -CATCGTCCTGACCACCAGGTATGTACCGCCACAGAGGTATTCTCGTTTGTGTGATGATCT -TGCTAACAAGTTCACAGTGGCAAAACCGCCCGCCTTGTGTCGAAGTACCGTCCCGTCTGC -CCCATCTTGATGGTCACTCGCAACGAGACTGCTGCCCGGGTAAGTCTGATGCGATCGTTT -GAATGTCGTTCTTTGCTGACAACAGTTTCCAGTACTCCCACCTGTACCGCGGTGTGTGGC -CATTCTACTTCCCCGAGTCCAAGCCTGATTTCAACGTCAAGATCTGGCAGGAGGATGTCG -ATCGTCGCCTCATGTGGGGTATCAACCACGGCCTGAAGCTCGGCATCATCAACAAGGGCG -ACCCTATTGTCTGTGTCCAGGGATGGCGCGGCGGTATGGGCCACACCAACACCGTTCGTG -TGGTGCCCGCCGAGGAGAACCTCGGTCTGGCCATGGAGTAGACCTTCTGTAAACTGATCA -GATGGTAAGCTTTTGGTGATCGATGATCCATTGTGCTTTGTTTACCGCCGGTGTGCGGAT -GGGATACATACTTTTTTTTTCTTCGTTCTGGCTTTTTTTTTTTCTCTTTCGAAGCCAATT -TTGCTCTGCGCTGTGTGGTCTGGACCATACTCTGGAAGGTTAATGCATTGCTCATGTAAA -TGAAACGAATTTTATGCTTTCCGGTCAAGTTAGTGGGTTAGTAAAGACTAGACAATTCAA -ATTGAATAGACATGACACTGGTATTTTGTGCTTGAATCCTTGTAGACTTGATCGCGCGCC -GAGAGGGTTCGCAATTGTCGCTCTCTCGCATGTGCATATGCAAGCTAAAATAGAGCAATG -CACTGTAACTGCACTGTAACTGCACTGAATTACATGCATTGAAACTGCAATCGTCCTTTT -AAAAGAGGGCTTCAGTCAAAAGAAGAAGAGAAAAAAAAGCGAAAGTCGAATGAAAATCAG -ATGCGGGGGACGCAGGGACCCGACACAGGCTTTGACCCGCGAGACCGAGGCCCATCCGTC -CGAGGTGACAATATCAAGCATATGCCGTGGACGCGTTCTGTCCGCATGTCCTCTCATGTC -CTCTTTCCTGTCACAGAAGACAATTTCTGAAATCTTCTCGACGTCATATGAAAAAATGGA -AAAGAAAAAGAGAAAAAAGCCAGTAAATTTCAACAAAATTTCTGGCAGCCCTTGCCGTCC -CGATCCCGGTTTCTGCCGATGGTTCCAATGTTGAGATTGGATATAGATCTGTGTCATTTC -CAGTTGAAATCATCGATCCCTAGTGGGGACGTTGATTATTGTTCATATTCTCATCCGATG -CAAATAAAGCTATAATAGCATTGAATTGAGCACACGTTATTCTCTTGTCAATCTTCATGT -CGTTTGCTCCTAAATATTCCTAGGCTAGTATATAGGTAGAAGGATATCCACTAGCACCTA -GTGGGAGAAAGAATTAGTCGAGTAGCTGCAGGAAGGTGATTATCTATCCAGATGTCACTG -ATTTCAATCAGCCACATTACCAATACTTTGGGCCTAAATGAACCGTCGACTACCTTCTTC -GTAGTCAAGGACTGTGGACTCTTGCAAGAAAAGACCCAGGACAAAGTCATGCAAGCATGA -GTATACCGAGAACTTTTCTATAGTAAGAAAGTTTTCACGTACTAGGGGTTATGCCAGAGA -CTGTTCCATATTAGACAGCGTCTTTTCTTCCAATACGCTTCACGAAGCGAGATCATATAA -TGGTGATGGGGGCATTGTCTCTTCCTAATGCCATTCCACCTACTACGGCAAAGGACGCTG -GTGGTTTCTAGTTGACAACCCCGTGAAGCTTTTCATTCTTGTCGCTGACTTCCTCAAGGT -AGATTTTACAATGCTGGAACTGCCAACCATTTATGAAGACAACATTATAAGCCGGTCAAG -CCTTGCCATACTAGAGATTCGTATAAGCGAAGGTCAAACATTGTATATTTTTCATCGCTA -TAATTTTGAAGAATAGCGCACCATTTGCTTACTTTCTGAGTGACGTCAAAAGTCAGGATT -TGTTCAGCCAAAGACCTGGCGAGAACCATAACCAGCCAGTGAATTGAGAGCTTCTCGGAG -CAAAATCATATATCGCTTCGCCCGTTCTGGGCTTGCTTTCATCATGTACTATATGATAAA -GCGGAGGAAAGCCCGCTGGAGAGATGCTCGAAATTACTGTGAGGCATCGTTCTCGATGTG -TCCCTTATCAAACGACAACAACCGAGGACTTTGGAAAGCAAAATAGCCAAGGACTGCATT -TTTTTCCCAGCGATGGCAACGGTGACAATATGACTGTTTTGAGCAGTCTTTACTTCTTCT -TTGGTGGTGCAACATTGAACTGGAAATGGATGATGTCGCCATCTTGCACGACATAAGACT -TGCCCTCCTGTCTGTACTTACCAGCTGACTTGATTGGCCCCATCGACTTCTCGCCGTTGC -AGAGGTCATGGAAATCCTGGTATGCCACAACTTCGGCCTTGATGAAGCCGCGCTCGAAGT -CGCCGTGAATGACACCGGCTGCTTGTGGAGCTAGTGTTCCCTGCTTGATCGGCCAACAGC -GGATCTCTTTCTCACCGGCAGTGAAGTAGTACTGCAGTCCCAGCTTAGTGAAGCCTGTTA -TGACTGTCAGTATAGAGTACGTAAAGAGAAGATCAAAATTTCCACATACCTTCAGTAGTA -ATCTTTCCCAAGCGGCTCTGGACCTTAATGTCGGCCATGAAGGCATCTAGCGCTGGCTTG -TCATCCCTCAGTGACCACAGCTTCTCTTCAAACTCGACAGAGAAGGGGATAATGTCTGAG -CCTTGGCCGCCATGGGCCTTAACCCACTCCGCGATCTTGGGCAGATATTTACACTTCTGG -CGGAGGTAGTCCTTCATTGTAAGATTAATGAGGTAGATCACCGGCTTGGTAGTGATAAGG -CGCAACTTGATAATGTCAGTATTGTCTCTGGGGCAACATAGAGTCCTAGACAAACCTTCT -CGTTGATCAACTCGATTTCCGGAGTTGTCCACTCTCCATCTCTAACAGGCTTATCCTGTT -CCAAGAGCGCCTTGATCTTGGCAGTCGTCTCGTCGAACAGCGGCAGCATCTTGAACTTTC -CTCCGGCCTTCCGCACGATAGCCTCTTCGGCAATCTTAGCTTTGGTAAGAATATCCAAAT -CCTTCTTACACAACTCGCTTTGAATTGTATCTGCATCGCAGCATCAGCATTTATTTCTCC -TGCGTTAGGGATAAACTGCTCACCTAAGTCGCGGACAGGATCAACGGAGTCATCCACATG -AAGAACCTCATCGTTATCAAACGCCCTATGAAATAATCAGTAGCTGCTTTCATTTCTCGA -GTTTCCGCAACGAACGCACCGGACAACGTGGTACATGCCATCCACAGCCTGAATATGACT -CAAGAAAGCGTTTCCCAAGCCAGCTCCCTCACTCGCTCCCTTGATCAATCCTGCGATGTC -TGTAACCTGCAAATACGCCGGGTATTTGCTTGGTGGGTTCCAGACATTGCACAAGAAATC -CTACTTGAAGATCAGCTAAAACCTCCCAGGAAGGGTAAGATCCATCACGTGAGGATACCC -GCACAGAATTCAAAACAGGGAAAAAGCATACATATCTTGGATCAGGAACACTGCAGCGCG -CCTCATTCGGCTCGATGGTGCAGAATGGGTAATTCTCTGCGGAAGCGCTCTGCTCTGTCA -TGAGATTGAACAGAGAGCTCTTGCCCACATTGGGCAGGCCAACACAACCCATCTTGAGGT -TGGACTTGACGCGCCCGAAGGCGACGACATTGTTCGTCGGCGCCTCTACTGCTTTCTTTG -GCGCCATATTGGAATGTAGTTATCGTTGGATTTCGATGATAAGTTGATTCTCAAAGGCCG -TGTGTGCCAATTCAAATTTTTTTACAGCAATTAAAGACTTTGTTGCCATTGGTCCATTGT -TACATACTCCTTTCTCCCGAGGTCCACCTTGGGATAACGGCGATTCTACGAAGTACTTTT -GTCGAAGAAGGAGTTGGGCTGGTGGTAACTCTGGAGGAATCCTGGACATCTTGTAAGACT -GTTCTGATCGAAAGGTTTGGGCCTTCTGGAAGGGAAATGGTCAACTACGCAGCGAAAGAA -AAACAACTTAGATTGCGAGCAGATGTTTCGACTGAGCAGATTTTGACCCAAGACTCCAGA -AACTACACTGTCGCTGTATCATCTTTATACGCCCTGATCTAAATAGAACTTATCGGATAT -ACATTGTGAATGAACAAAAAATGTGTAACAAGCAGTTATACTTGAACACCATTTCGTAGT -CTATTTGTTAGTATATAATGCACACGTGATGCTGAGATTACGAAGACTTTGCGAAGGGAG -CATAAACCAGCGTCAAAAGGTAGCCATTTCCAGGGGCCAATGCACCGTCGATTGCTGTTA -ATGTTGAGCTCTATACATCAAATATAATAAAGAAATGCCTAATCGAAATATAAAACTGAA -ATCAAGTAGGACAGCCCACAATATTCCAAAACAGTCAAGTATAGACCCTTCTTGGTTCAG -TATCTTTCTCTTTCTTAAATTTGACACTTCGAATTGATGTTATCATATAGGTAGGTCTCC -ATACTTTGTTTGACAGGGGATTATCCGCCATTGAGCCCATAGGTATATTAATTTAATCTA -AGCACTATATGTACATTGTATGTTGTATGATTCTGAGGAATCACCAGACCTCATCTATGC -CAATTCACACTCGATAAGACTCTCGACACCTTTCTGGGCACTCCAAATCTTTGTAATCTT -CAACCCAGCAGATCCCACCAGTTGGTGCCAGTGAGCTTCAGTCCGCTCGGTTGAGGCGAA -ATCGGCCATCATGATGATATCAAGACTAGTAGTCTCCCAGTACGCGTTGGTATCAGGAAT -AACATTCTCGTTAATCAACACCTTGCTGTGTCCACGCTTCATGGCATTCACAAGGCCCTT -CAAGATCTTACGGCAGTTATCATCTGTCCAGTCGTGGAGGACAGAGTGCATATAGTAAGC -GCGAGCGCCTATAGGAGAAGATATTAGCTTTGACAGGATCAGACAACGTTAATGAAGGGG -TAACTATACCCTTGATAGGCTGCTCAGTGAAAAAGTCATGCTCCATTGTTTCAATCGAGT -GCAGCTGCCCTGTCTTGGCTTGAGCAATCACCTCAGGAAGATCCTGGAGAACCAGGCGTC -CCGGAGCATCAGACCACTTGCGCCTGAACTCTGAAAGATCATGGCCCATGCTACCACCGA -CATCGACAAGGAGAACATCGTCCTGGGTGACATTGGAGAACAGACTGGGCACGTCGTAGA -AGCCAACATCCATCCAACTAGGACGTCCTTGGTGGTAGACAGACATGTGATTGTTGAACT -GACTAGCAGCCTCCGGGTTCTCCTTCATAAACTCGAAAAAGTGAAGGTCAGTTCGGAAGC -CGCGCTGGAAAGCACATTCCTTGCCATTGGATGGGTTAGAATAGCCGTCCTTCTTAAGCT -GGGCAGGCAGTGCAAGCACACCAGAGGTGATGCAGTTAGTCCTAATCAATGAATTAGTGA -TAAACTCTTTGATTTTAGGTCAAAAGGCGCAAACTTACATGCAAGGGTATGCATCATTGT -ATCGTTGCAAGCTCAGCGAAATGGAAATTCCGGTGCGGCGGTATTCATTAGGACCAGTCT -CAAGGATGACTCCCATTGCGCACAAGTGCTTCAGGATACGAGCTGATCAAATATATAATT -AGTTAAGTGTATCAACAAGGCGTATGGTGAGATGGTGAGTTACATACCAAGGAGAACTGG -GTCAGCACCTGTAGCCTTGGCCAGCTCGGCAGCAGACTTGGGCTTTTCATCCTTGGACAG -AATAGCAAAAACGTCCAGATCAACGCCTGTCTCAAGAGCAGCATAGAGAGTAGCCTATGA -AGTCAACATTAATCGCCGCTAACAACACAAACCAAGTTTGAATCCTTAGAAGCATGGGAG -CTTACCTGGGACCAGCAGTATCTGATCATAGCCTCACGTGGGGTTTCAAGTGCATAGACC -AGAGATCGGGCAGCCTCTAAAAGCGTTGTTCGATCGGTGTGGTCTTGAAGACTGGCAACT -TTACCAAGGGAAGCAACTTGATCGACTAGGCCAGGCACCAGCTCGGGTGTATTAGCGGAA -GTTGCTACGCGATCAACAAAGGCATTCTCGAGGCCTCCGTTGGTCTGGAGGTCTCCATTT -GCATGATTTGCGCCGTTCATGTGCGAGTTCCCGTTAGCGTGGGTGGTTTCACTTAATGTC -AGGGTTGCAAGATCGGCCATTGTGGATTGTAATATGAATAAGCTTGTGTAGAAGAGCGTG -AGGAGACGTGCAAGTCAAACGTACTCTAGGGGTCTTTAAATATACTACGAAGTACATTGG -CTGTCCCTTTACTTGTAATAGGTTGCATTCACTGTCTCCTTTTCTTTAAATAAAGCTACA -CCCCCTGAACAACGGTTGGACTAGGTGCAATTGTGCGGGTCTGGTGTCATGAAGATCAAC -CAGGATTTCATTTGCTTGGACACTGCGGGCTCTTTCTATTATTGCCGAAGTTGGGGGGCG -AAGGAAGTGGAGACGCGGTAGTTCAACGTCGTTCCTATTTTGGTAATAGACTTGTAGATT -ATCTAACGGCTCGTTCAACGCCAAGAAGTTTACTCCATTAATTAAAATTAGGGGCGAACA -GATGAGTCTTTTCAGGTCCCCGGATGGATCCCTCACGAGTTGACATGGCTGGAAGAGGTG -AAGTCATTACTTTTATAGGTAGTGGGTGGACCAAACAGATCTGGGCATTGGGCAAGCTTG -GAATTCCCCTTCATTCTATTTGTGCAAAAGCCATGTGTGTATTTTGGATCCATAAACTGC -ATAAATGTCTATAGCGCATGTGCATGGCAGAGACAGAGCATGTTTATCTGTGGGACTAAG -ATTCTGACAGGCCCGGCTAGGTGGAAAGAAATACGTTGATATCCACACCAAATCCCGCAA -AACGGGTACCGGGACAGTGGCGCATTTTTGGCGCATTTACTACGTTCTTCTACCCCTCGA -ACGCTCTGTACTTTTGTCTTATTCAACTCCGGCTTTGGGGAAGATCCAATCGAACTGGTC -TTCGTATAAGGACTAGTCTGGTCGGACTATTAAATCTCAACTGGATTATTACTAGTGTGT -TTTGAAACCTACAACATATGACCAAAAAAATTAGCAAGGGTCTAGACTGGATGATCCAAT -CGTTTCTTCTTGGATTTTTTGGATCATATTGCAAAACCAATCCGGTTATTCGATATCAAA -GCAGTATGAGCTTTAGGGCTATTTACGATGTAGGTCCGGGTAATGAGCTTACCTAAGGTA -CCATCCAGGACTAGATGCTGTCAGTTCTTTAGCGACATTTGACTAAATGGCTACTCCGTA -GTGCGACAGGGGTTTCATGCCCCTATCTGACTAAAACTGGGCTAACAAGTTTGTGGTCAA -GCTTAGTCTCGGAGATTACTCGCCGAGTAATGACATTGTGTTGACAAAAGATGAACACCG -CGGCCTTCCTAAATAGGGTGATCGTCGCAGCTAAAAATCCAAAGGATTTTTTGCAATGGA -CTTCCAGGAATAGCTGGTCGAATAACAGCACACTTAAACCAGTCTTCTGCCTACATCTAT -GTATTTCATGTAAATCCTTAGTTGCTAGTGTTAATTTTAGCTGGCCCTTCCCGTATGGAT -TTATATAAACTAATGGACCGCACTCCCCCGATTTTTCTCGCTTTTTCTGCATCAACATAA -TGGCGGTCACCGCTGAAAATCTTCATGATGACAAGGGTCCCAAGATCCTAGCTGTACTAT -GGACCTTGACTGGACTAACAGCCATCATGGTTGTAGCCAGGGTCTACATCCGACTCAAGG -TGCTGCGGAACTTCGGTATGGATGACTATTTGATTGTACTTTCCATGGTATGGAACTTCG -AGCTTTATTTTCTCGACGTGAATCTAATTTGTGCCATAGATAATGGGGCTTGCATATTGC -GCTATCTCTACAGCAGGCGTGAGTGTGGGCTTTGGAAAACATGCCATGTACTTGACTATG -GATAATCTGGAGATGGCGATTCTGCTCAATACAGTCAGCTTTCTGTTTGGAATCCTATCC -TTTACTATTCCCAAGATTGCGGTGACCTCTATGTTGACCCGAATCTTAAATCCCAAGCCA -ATGCATAAGGCTATTCTCTGGACGATGGTCGGAACTGCAGCTATCGTGTCCATTATCTGC -ATTGTGATCTTGTTCACAATGTGCGATCCGGCAGAGGCATTATGGACGCCTAGACTTATG -AAAACAGGCGCCACCTGCAAGAGTACAACGATGCTAATTGACTACGCAATCTTCACTGGA -GGTGGGTGTCTTCCTCTTCAAACAGCGACATCGCTCGGAATTAACCTTGTATCTCTAGCT -GTCTCTGCAGTTGTTGATCTGACTCTGGCCATTTACCCAACCACCGTCCTGCTGAAGCTC -CAAATGTCTCTACGGAAGAGACTTGCGCTATGTGCTGCTTTAGGGCTTGGTGCAATGTAA -GTCGATACCTACCTAACTTATGCCATACTTCTCGGAAGCTAACCGGAGTGGAAATGAAGT -GCTTCTGCGATGGCGATTGTCAAGTGTACCCAGCTTCATGGACTCGCCGACAAAGAGGAT -TACACATGTAAGTATTTCAATCATCCACCTAGATATAATCGAGGACTGACCAAAACAAAA -GACGGTACTGCCGACCTTGTTATGTGGACCAAGTATGAATTACCCATCTGATGCTCTTTA -GTTCTATATAAACTAACCTTTTTCTCTCCGTTTAGTATTGAAGCCGACGTCGTCGTTATA -GCATCATGCATTCCCACCTTACAACCATTACTCGAAATGATTCTCGGTAAACGAGCGATG -GGCTCCTACAGTCAAGGAAAGAGCGATCAGTTCAAAGGCAGCAGCAATTTCCCATCCTCG -TACAATCGCTCTAAGCCGTCAAAAACCCACGACGATCTTGGCTTCACCAACATCGACCGA -GAGAGTCAAGAAAGCATTCTTCCCACAGATGACAACAACAATAAAAAGAGCCACCCTATG -GGACACATATACCGCAAAGATGATGTGACCGTGGAATACGAGTCATCAGCACCAAAATCA -GGCCCTGGGAATACTTCTTGGTGAAGACGCATGGTGAGGCTGAGCTCCCGGCTCCCTCTG -AAACGACCATCAGACCATGTTAGCCCATCAAATAATACATCCAACAGAGAACATTGAAGA -GAATTTTGCTGATGGACATTGCACCTTCTGCACAAATTAGTCTTTACTGGTTGCACACAC -ACATATTTCACAGATTTCTTTGCCTTTTTGAATGTATATATTATGCACTACTTACGTTGA -ACGCTTGGATCAAAAATTGGGCAACACCCATATTCCTCTGCCTTTACGGATAGAGGCTCT -AGATATAAAAATATATAGCGTTAAACCTGCTATCATAAATTTGACTTCTCACAAGCTCAT -CTACTTGGTCGTCTAGATTTCCAAAGTTGGCCCGGTAGTTAAAATCGTTCTCTTGTCTAA -ATTAAAAAGCAAAATATGAATACGATGTGAATTACAGAGTGGCGTATTTAAAGTCAAAAT -GATCTCATTGAGTAAACGAAGATGTCCCGGGACTGGCTTTGAAAATAATATGAAATGTCT -GTCTTAAACGAGGCAATGTAGAGTCATGATAATGAGAAATTTGCAACAAATGAAAGCACA -ACGCTGCGCTGCACCTCTTCATGCAAATTGATCAGGTCGTAAATCATTAAAAGTTTTTTT -CATGGCCGGGGGTCAGAATCAAGGCCGGCACCAGCGAAGCTATCAGGAGCCAGCGCCTTC -CGCATACCCGCTCTCATCAACGGTCTCAGCTTCAGTGATTTGGTTCTCATAGCGGTACAT -CGCAGTTGGTTGGAATTGATGCCAACAGCTAATCATGGGTTAGTATTTAGTCGCACAAGA -AAGAACATATATCAACTAACCTTGGAGCTTGAAGCACATATCTGGCCATTGCCTCGCGGA -AACGGCCTTTGTATTCCCTGGGACTTGTCACCGTTGGTCGATTGCGGCCACCACCACCAA -ATCCTTTGTCCTTGATCCAGGACTCGAGCTTTTTGTCCCAGGTGTAGGTGCGAATACAGT -CAATGATTCCTACTACCAGTTCGTCGCGGTTTTCATCAATGGCGATCATGAGCGAGTAGT -CCATGACATCTTGGCGACCCAAGAAAAGTGTGTCATTCCACACACTTTGGCTAAGAAGCT -TCTTTGAGTGCTCACGGGCAAAGAGAGGCGTCTCATAGATAAAATCCACCATGTTTTCAT -CTAAGAGAACCTCGTCTCGTTCACCGGTGCTCTCTACTTTGCGGTTTCGCATTGAGCCTT -TCAAGTCGAAGATGCGCGTAGGGACACGATCGTAGAACAAGTTCTCCATTAGCAATAAGA -ACCAATTGAACTCTGTTCCTGTAACTGGGTTCTTGATAATGACTTGGTAAAAGCCAAACA -TCTTGGCGATTGCGGAAGGGAGTTCATGAAACAGGGCTTCGGCCATTATTTGGAAATAGG -CCGGAGCAAATTTGAGGAAGGCCTGCGTCTCAATCGTTGAGAGCGATTTGAGAATGAAAC -GGTCGTCAAGGGTCTTCAGGAATAAAGATTTCGTCTTGCCACCCTTTGAGTCCCACTTTG -CGCAACGAGACAACGATTCAACGATTCGATCTGCCACACCACACTTTTTGCGTAGAGCAT -CAAACTGCTCAGCGTAAAATACTTTGCACAGCATCTTGGCCTGGCCTTCTTGGAATTGAT -ACTTGAGGTGTGTGCCCGTAGGTCGCAAGAGAGCACGTTCGATTCGCACTTCCTTCTTGG -CATCAATTTCATCCCCGGCATCTGGTTTTTGTTCTTCCACTCCATCGAACTGCTGTTGAA -TGCTCGAAAGTTTTTCTTTGTAGTCAAAAGAGTCCAAAGCGAACGCCACCAAAGAGCTTG -GTTCATCCTCACGAACAATAATATTGCAATCAGCAAAGACATGATCCATCACACTGAGTG -GATACTCGAGAGACGCCCACCCACTGGCAGAACGTTCGGACCAGAAGTTTGTCAGCATCT -TCAAAAGCGTAGTACGTTCGTGCTTGGGCAGTTCTTTATAGTCCATTGCATAGTCCTCTG -CTGACTGTTTCGTCTCTTCGTGGGAGTCTACCGGATGGAGCTCTTCAATCATGTTTTGTT -CCTCATCACTTTGGTGGTCATCTGCTTCACCACCAGACAAGGGTTGGTTGTCAGGGCAGA -ACGCACTGCCTACTTCCGTAGGCTCTGGGGGAGAGTCATTTGGAAGAGATGGTGCATCAT -GGGTTGTATTATCATGCGTTAGTTCATCGCTCTTGCGACTCATGTTGTCCATTGGGATCC -GTGGAGCAGAGGGTAAAGATTCATCGCTATCAACTGGTGGTTCACGTTCTTCAACAGCCT -CTCGTACATTTCGATAGACCTCCACGATTGGCTTTGAGGACGCCAGTGGGAATGCTCGGG -AATGAGTGCCCTTAGCAGCGCGTTGACGTCTTTCACGCTGTCTCTCTTTTTCGAACTCAC -GACTCAATTGTTCAAAATGTCTGGCAAGGTGAGAAACCTGTGATGTTCCCCTTTTACTCG -GAGCAGACCGTGGTATAAGCGAATGTCCTGAGGCAAATCGGGCGCTCTTGAGGGAACTGA -ATCCCAAACGATCGCTCAGTGTCCTATCTGATGACTTTGTTGGTGATTGGCCTGAAGACT -TTTCTGGATCGATTGGGCCAGTGAAAGCTGTTTTTAACCCCGGAAGTGGAATGATGTGAG -ATGCCCGTGGTATATTCGCTGGCTGAGATTGAGTGCGCAAAAGTGGAGGAGATCGTGTCT -TACTGGGACGTCGAAAGGCTGGTTCAGTGGGTCTCGGTATCGCTGAAGTTCGACGTACTG -GGGTACTTGGTATGTCAGAGGCCTTGTTGTCTTCAGTCCCAATTTTGATTCCTTGATCGG -AGATCTCTGAGTTTTCCGGCGGTGATAATTCAGATAATCTGTCGCCGGAGATCATCACAG -GGTGGTCAGGTTTGATTGAAGATTGATTGGAAATGTCAGCGTGACCAACGGAGCTGCGTT -CCGATTGTCCAGTAGGAACAGCCAAGTCAAGATGACGGACTTCTTCTTGTGAGATAACCT -CATTCTGGGGAGAGGACCTCGTTCCGTCAACTGCATCGACTGACAAAGTTGCCTGGTCAG -CCAACGATGGAGGCGACTGCGCCGCGACTTCTTGGAGTTCCTCGCGGTGCTTTTTGCCAG -CATGCTCCTCAACAACGGAAACTAGTACATCCTGTGCCTTTTCCGGCGAAAGCGTCATGC -GTCGCATCATCCGTGGTCTTTGTACTGCCTGGTTGTTCCCATCTTCAACGTCCGCAGGAG -TTTGAAGGGCATCATCTGTTGAAACTGGGGTTGCATCTTTGTCGAGGAAGATCTTCCTCA -GTTGCAGGGTTGCCATGTGCTTGATATCTTTTTCGGATGGGAAAAAGTTCATTTCGAAAT -TGGCAAATGTCGTGTCCCACTCGACAACCTTTTCTTGAACTGCACGGAGGACCTCATTCA -GCGGTATGATCTCCCAGTAACGAGAACCTGTGTATTTGTCTTGTAAGTGCCGGACCAAAG -AGCCATGGTCTTCATGACCCTTCTTGATCAAATTTTCAATTTCAGTTTTGCAGCTCTCCG -TCAGTTCGGGCACAACACTCTCGACATTCATGCCTTTCAGACGGTCTTGAACAGACGCCA -TGAATTTGCTGATCCGTTGCTCGGTACGCAAATATACTTCGTTCCTCATCTTCAGATCGT -TGTCCACCTTCCAAGTTACACGAGGCCGAGGAACGATAATCTCGAGCAGGCGAATGGGGT -CATATTGGACTCGCAAAGCAAGGTCCTTGTAGCCGAAAAAATGGAGGTAATCCCGCCGCA -AATCATGGGAGCAGCCTCCAGTGCGAGCAAGCAAGTGTCTTCCCCAGAAAGACAATTCCA -AGTATTTCCCGAATGAATATTTCCATGTGCTCTTCGACATGGGCATGACTTGAGTCTCAT -TGCCGCACACTTTGCAACAGCTCCACATCAGGACAGTATTATGGAGCCCGAGAAGTTTAG -ATGGGTAAGGTTGGACCAGGACGCTGATTTGAGCCTCGCCATGGACATACTGGCGGTGGT -GTTCGAACATGCGTTTTTCACAGCCGTTGACGGTGCATATCGAATTTGCTGTCTCACAAA -GCTCCTCGACATACTGCCCAATTGTGAGATCTGACTCAAAGACCGTGTCGTCTCCATGCT -CATTGTAGAACTCCAGGGCAAAAGTATCCGGCCCTGAGCAGGGGACTGATGTTGTTGTGC -ACACAACACTGTAGAGAATGACTATATTCTGATGTGCATAAGGGTCGAACATGTTGGGGT -TCCCCGCTACATAGGCTTCCCACTGGCGTTTTTGGGTTTGATGGTGATAAAGTGCCCGAT -CGTACTCGGCTTCATGTGCAGCACGCAATACTTCCTTGACTTTTGCCGGTGCATTTTGCG -GTGATTGATGAACCATCTCAGGAGTGATGAGCACGAACTTCTGCGGTTTGGCCTTTTCAT -CAATGATTTGATCAGAGGTGCCGCCTTGATCACGAAGAGATTTCAAATACGAAAGTCTGC -GTTCCATTTCGCGCGCTCGCGTTAGGAGATACGGAGCCTCGAATTTTACGAAAGGCGATG -CCGACAAAATTTTTGTCTCGTAATCCTGAACCATATCGTTATAATATGTCGGCATTGGAA -TGTTGTCCGGGACTTTTGCACCATCGGCATCAACGAGAACTTGGCTATCAGAGATGTCAC -CAACTTTCTCGCTTTTTTCCTGTTGAGCGGTACATGGGTCAACAAGAGATGTATCGACTC -CGTGAGCGACAATCTGCAGTTTATTCTTGTCTTCGTCGGACATAGTTGGTGATTCTTCTG -CCGTGGAGGCCGATATCCTCCCGAATTCATCTCGCATAACATTGGTTTCAAGACGTAGAT -TGTACACGACGTAAACCATAAACTCGGTGATCCTTTTCACCTTCGCTAGGAGGTGATTAT -CTCCTCCACGCAGGATAATGGTACACCCGAGCTCCTTCGGACAGCCGGAGATGTATATAT -ATGTTTTCTTTCGACCATTATGGACATAGGTTTTCACGTCAAAACTGCTGCATTCACTAT -GCAATGTGGTTGTCAAGAGTTTATCCATAGAAGTCATGATTCTAGTCTGTGTGCATCTTG -ACACAGCCTCAAGAACAGATGGCTTCACGTTATATGCAGTGGCGATTCCAGCTTGCTCAA -GGAGGCCAAGAGCCAGCCCAGAAACAGTTTTCTCGACCAATAGCAGGTTTGGGCGCAATG -CCGAGATTCGACTGACTAGGTTCTCTAAGAACTCACGTTCTTGTCGAATGACAGGCTCGA -GACTCATAAAATGCTGTTGTTGACGAGCATATTCAAGAGGAAAGGTGATTATCAAGATGT -TTGGACGTAAGATGTTTCGTGACATGCTTTTCAGAGCCAAATTTTTGGTGAAGACAAGGC -CCGACACATAGGAAGTGTCACTGGGCCGGCCGCCCGGGATCTTCTTCAATTTGACGTAAT -GTCGAATGTCCATGTCGTCCCCATTTTGAACATCTGGATCGACCTCGTCTGCAGCTTTCA -GAAGTATTGGCATCAATGCATTTTCCCAGCTGTCGCCATTTGGCACAGCAGATTCTAGAA -GCAGTTGATGGAGAAGCTTTCGTACGTGCTCTAGACTGGCTTTGTTCAGTTCCACTGGTG -GCGCATCATTGCCCCTCATTGATGAACTTCGAGTCATCTTGAACCCGACTTGCTTGGAAC -CTCCAGGTAAAGATATGGCTGGTACTGAGGTGTCATGAGAAAGCATATTGTCTTTTGATG -TCCTGGGTGAATGACGCGGTTGAAAATTGCTAGTAACACTCATGTTCCGACGACGAGAGA -AACGAGGTAGATTCACGGCCCTTGAACTGGTGATACTGGCATCATCTGCATCTCGATTGA -TACTAGGAGCATTTCGGTCCCCAAAAGCAGATCTACCCTTCTTTACGGCTGCCAGGAGCC -CAACAAAGGATGCCCTCTCGCTTTCACCCCCACTCTTGGACAGAGTGCCTTCAGAGACTG -CAGACAAGAGACTGGGCTGCTCGTCCCCACTAGATGCGTCATCCGAGAGATATTGAGCAA -GGTCAGGGTCAATGATATTGTCTTTATGAAAAGCGGGTAGTCGAGATTCAACGTCTACCT -CATCTGCATGTCGGCGTTGGAAGGGAGCTCGATCTTCATGATTTGGCTTGAAGCCCCGAA -TGTATTGCTGACGAGAGTGATGGCGTTTGTGGCCGATGGAATGCGGTCTTCCGCCCAGCG -AGGACCGCAGAGAGCGCGAAGAGGTAGGCCGTGCCAGGTGGCGATCAGAATCAATTTCGA -GAATTGCCGAACGGCGATTATGTCCCTCTCCAGCACGTCGTGTGGCCGGTATGGCCATTG -TCGGGGTTCTCAGAACGTGGTCTATTGATTGTGATACAACCGAAGATGAATCATCGTCGT -CCGCAGATGTCCGTCCGGTATTGGTCCAGCTGAGTTCGGCAACCCGAGGGTTGCCCACTA -TGGGGCTCTGCTCGCTATCAGAGAACTCAGATGAATCATCGTCATGCGCATTAATCATCT -TCTCACAGGGTTTGCAAACCCGAATACTCCCAGGTTGTCCGAATCGTGTTCCTGAGATAA -GCAGAGTGCATTTTGAATCAAAAATTTGACCACATGTTCGACAGTGATGTTTTCTGCGAA -AAGTGGTGAATGGTTCTCCGCAATGAAAGCAGTCTTTAGCATTCTCATCCCGCATCCAAA -AATCCCTGCTAAGCAATTTACTCCTTAACTGCGACAAAACACCGTTGTTCTCGGCAGGAG -CTTGGCGTCTGGCTTGTACGGATTGACCAGTAGCCCGATCGACTGTGGTATTTAAAGGGG -ATAAGGAAGAGGCTGTTGTGGTGGTTTCGGCCGTGGAAGCCCGTGAAATCACGTACCCAG -GCAGTCGCGCTGCTCGGATGGAGGACGTTGACTCTTCATTAGATCTGCCGGGAGCTGTAA -CCGTTGGAGGCTCATTGGCGTTGGCGCTGTCACGAGGAGTGTTTGTCTCCACATTGGTTG -GGGAACTTTCATCATTGTATGGCACGTCACCCTGAGAGGGAAATCTTGATAGACTATTTG -ATGCAGATACTGAAGCTTTAGCTTCTTCATAAGGGAAAGATGTTCGTGATTTTGATTTTT -CTGCCGGATGCTGAGATGTATGATCCTGAAATGGCTCAGCTTGCTTTTGATTTCGCACGT -TAGTTGATTGAGTGGGGATTTCGGGTAGCTTTTGTTGGTCTGGAGTAGAGATGGACTTCC -GAGACTGGTGGTCTGGAGTATAGGTTGTCGAGAAGAATGAGTCTTTGCGGTTGTTTTCCT -CATGATGACTTTCATGCATCTTTGCAGAAAGAATCGGCGAACTAGAGAGGTTTTGACTCA -ATGAAGAGGATGTCTGGCTCAGGGAAAGATCCGTGGGGCCGTCCTTGCTGCCGATACTTG -TGGATTCTGCATTATGGATTCCCTGGAAGGGCACATTGACGGAACTACTTCTTGAGCGGT -CGGTATTTCCGATGACGTTCACATGAAGCTGAGTAACAGCAGGGTCTGCCACAATGGCGC -TACCGGTGGCTTTCGTCAATGGTGCGATACTAGGCTTGATTCCAGGAGATGGTGACACGG -ATGCCTTGGAAGATATGCTGCCACTTTTGGAGGATATTCTTGATCTCTTTCCTACATTCG -ATTGTTGCACAGAATCAGGATGAGTGTGAAAGTCTGGTACGACGGGCGAGTGGAGCCTTG -ACCCCTGTGACGAGTTGGGCTGTGATGGCGGATAATCAACGGAGCTTGTAGCCAGGTCCG -AGACTGATTTTGCCATTGAACGTTCGGAAACAGGGCTCTTGACTGCAGTAATTTCTGTAG -CATTTTTGTCTGTAGACTTTCCGCCACCGCTAACAATATCTCTCACTCCCCCGACAGAAG -TCCTAAAGCGACTATACAGACCGCTTAGTCCACCTTGTAATTCATCAGATAGTCCTTTCT -CATCCGATGTAGCAGCTGGAGCCGGTGGACTAGTGAAATCGTTAAAGAGAGTGAGCGATT -CGGACTGATATGCTGCATTATGGTAGTGATCTAGAGCTGCATTCATCGATTCTCGTTCAG -TCTGAGTCGTTGCCATAGAACCGCGTGATCCCCGTCGCCCAAGAGGAAGCGAGGTGGAAG -ACCCAGATGGACTGCATGAGTCCTGGGATTGCTGAAAATCCATGGCCAACTCAGGCAGAA -AGGTCTCAGATCATCCCCCCACAGGATAAAGACCCCGTTAGTAACCGATCATTCGGGACA -GACCTGCGGTAAGGTCTCAAATTTGACGAGGTTGAAATAAGGAAGAGGAACGAGGGTCCT -TTAAAAGCGCCAGGGTGATCAAAGAACTGAATGTTCCTTTGACGAAAAGAATAGGTCCTT -GGCCCTCAGGTTGAATGACGTGGTGGTGCTGAGTAAGCTAGGCGCGTCCCCAGAGCTGGG -CAGCTCCGATAAGACGCGGTTTGGTGGGTTTTTGATACGGTGCGACCTTCTCTTTTTTTC -TACACATATTCATGGATGTAAGTAAACTACGCCTCACAAACATCCCACCGGGCCATACCT -CCACTGTTCAATCCTACTAGCTATGACCCATATGCTGATTGCATTGATCAAAAGGCTGGA -CCCGCACGATGAATCCAGTAACTCCCTGGATCGCGGCTTTGGCCGACCAATGTCTGTCGT -TCTATCTCGATCAGAATGATCAAGATGATATCCAGGTGGAGGATTTTGAGGGATGCCTGA -TTTTTATTATCCGCAGGCCCACACTAAACACAGCCGTTGTTGTATCAGTAAGATATCTCA -AGCGGGTGTGAAAACAAAAGATATTGTCGCTAACTGTATGTTTCCTGTCTCTAAGTGGGG -AGAAGGAGAGGACAGTCATCGAGCCACATTCACAGACTCAAAAAATCAGATCGATGCGAT -TATTCTACGCGGCTCGCCGGACGCACAGGAAGAAACCTCACCCTGCCCACCATCGGTCAA -AGGGGGCCCTAGACACCTTGTTGAACTCTCCGATACCAAGCTTATATTTACTTACTCGGC -CTCTGTCCCGGATGTGTACCTTCACGTCAATCGCTTTACCATCCACCCAAATGCAGTTCC -GAAGGGGGATGTACCAAAACCAAAGCTCAAGAAGACACCAACCCTCAGAACCCTAATGAC -TATGGCATGCGAGAAAATAAAAAAAGCCCAAGCGCAAACTGGCTCCAATAGCCAATCTGA -TGTTGGTTTGACAGTTTCATTTGTCTCTCAGCGTGATCAGCCCATGCCACCCCATTATTC -TCAAAACAACAAAGCATCACAGTTGCTCTTCTCCCAACCCCCGTCTAATATGTGCCATGT -CGCACCGGAGAGCAATTTAACGACCGATCGTGTTTTCATCACCGAGTCATCGGATCTTCT -AGGGTTGCTCGCACCTTCTCGTCCGATTCAAAAGGGCGGCCCAGCCACCCGAGAACTTCC -AGAGGCCGACGGACTATCTCTAGGTGAACGTAAAAGCAGGCGATCTCCTTTGCCAAATGA -GAGGAAGTCAGTGGCACCACAAGAGTTTGACACATCTCCAACCGGAACACAACCCTCATT -CCGAAGTCTTTCTGGCAACACTGCTGATGCGAGGCGTGCTAGTAACTCTGTTACCAATGT -GTCTTTCGCCAATCTCAGCGAAGATGTGGATAACCTAATAGCAGAGTGTCGAAGTCAGGC -TGCGCAATCTTCAACTGAGCCAGATGATCATGGACCGGCTCATACTGTTGCAACAGGGGA -TAACCAGCATTTTTCGAAAAAAAGACACCGTGGCAGTACAGATGCGCCACTACATGAAGC -TCGGGATGATAACCTGGACTTGCAAGGCCTGCAGAACCCACACACATCACCCTCCAGAAA -GCGACAACGTGTTGATACCGGAAAGGCGATGCCGACAGACACAACTGAAGCCGAGCAAGT -GCAAATTAAGGCCATGCCGTCACTTCCTAGAGTGCAGATCGTAAGCACCAAAACAGACAG -CGTGTGTCCACCCACAACTAATCCATGGGAAGGAATGATACAAATACCTTTGAGCGAAGT -TGAAATCCCGAAAGATCAAGCAGAACTTTTGGAAGGACTCAAATGGATCCCTCAAGACCC -CGGTGTATCTGCACCTCTATGCCACGTACCACCTCATCTTTTGAGGCAATGGAACAGCAT -TGCACGAAAAAGACAACATCTGAATAaagaagaagaagaagaagaagaagaagaagaaga -agaagaagaagaagaagaagaagaagaagaagaagaagaaggaggagaagaagaGCAAGT -ATTCGACCGTGCATTTACTCCGACCCCGCAAGATATCCAAACATCCCCCATACCATGGTC -TCCGTCACCGAATAGGACCCCTGCTCGAGATCTTCTCCCACGAGACACCGCTTCTCCCCC -AAGATACACGAGACCTATCAGAAAAAATCCTACGCCAAGCGGTACTCAATGTAGCGTTTA -TCAGTCTGCAGATCATGCTGATATGGACATGTTGAATGGATCTAGCAGTGCTTCTTCCCA -GCCAGGCCAGGGGGATATCGCCGCGTCCAAAGACAATACCAAAACAGCAGGGCAGCAATA -TACCCAATTCCCCGAGAAAGTAACGACAAAACAGATAAATCCAGTGTCTTTGGACCATGA -TCCCGGCGATAGCTCATCCCAAGAACACAACACTCCAACATCTCTCGATGTGATCGATAA -TCAACCCCTAGGCGTAATACAAGAAGATCCTCATGATGAACAACCTACCCGAAACCCCCC -ACTTACATTTGAACTGCGCGATGAATCAAGTGGTGATGAGAGTGATGAGCCGGAGATGGA -AACATATGTTCCTTTCGCCCTAGGGGGAAGCCTTCCATTAAGCAGTCAACCTGAGAAGGA -ATTTACTAGCTCGGGGCTATCGCTTCCAAGATTTGCAGGAAACAATATCCAAGTGGTGGA -GACTCCTGCAGTCCACACGACACGTCTGAATTCTGGGAACCTAAACAAACAGAGGGTTGG -ATTAGAACTCCAGAGCTCTCAGCATCCATCCTCACAGGCCGCTAAGACATCATCCTCGTC -TCGGATTCTCAATACCTACCACTCGCAAGATAGCCACGGGCGTAGTATTCCATCTCAAGA -GGCACCGAATCCATCTTTACCGATATTGGAAGGCGGGTCACTGCGAGTAGATGTGCTAGG -CACACAGACTCAAACGAGCAGCATCCATGCACTACCAAAGGCGACAGTCCAGTCGTCTTC -AGACGTTGTATTTGATTCTTCCAGACCTGCTCAGCGCCAGCGTGGCTCCTCTATCTTCGG -TTTAGACATCTTGGACGACCCATCGTCTTTCCCTTATGCCAGGTGCCACTCGTTACCCAT -GTCTCAACTTCATGAACCAAGCCAAGATTCTCTCAGAGAATATTTTTCTCTCGACGGAGC -CTCGCAGTTGCCAGACTTGTCACCTATGGAATTCACCACCTGGGTTGCTGCCAACGGGGA -ATCACCATCTAGGTATTCACGCCCTCTACGTCGCGAGAGTGCTGATACTGGTCGAAAGGT -ATCCCCCAGTCCAAATGCCGAGCTGGTAGCACGGCGGCGGGGCTTTATCGGCAAGTCTGA -CAAGTATGCTGAAGCACAGACAATCTATGAAAAATTCTGCAATGATTATTCACCATATTC -TGGTGATTTCGCTCACTTCGTTGAGATGTGTTCAAAACTGCAGACAATGCGACAAAGGGG -CCAGCTGCAACGGTCATTCCTATGGGATGACTTTGTTATCCAGCATTTGGAGGAGTACCC -ACGGTATTTTGCAGAGCGCACATCGCAGGATTCAAAGCCACTGGGTTACGAAGATTTCTT -TTGCTTGAAATTCCTACGACCCCAGCACAAAAAGAGAAGCCTGACGGCCCACGGAGTTGA -CATAGTTGCTTCCCAGTTTGCTGCGCCTACCGTCGAAGGCGAAGTGACTCAGGATAAAGC -GGTGCAAAGCGAAACGGCCCACACTTCTTTTACAGCAAGTCTTGTGGGAGAGTCTTCAAC -CCGCCATACTCGCTCATTTGGTGATGTTCCTATTCCGAATGTCTACATGCCAACTGCAAC -GCAGTCATCGTCATCCTCTACTGACACCGACTCAGATTCGGTGGAGGTCAAATTTGAAGA -CGATTCTTCCGATGGATCGACTGACTGCCAAACTCCCACACGCAGCAATGATCAGCAACC -CGTGCTCTTCGGTCTTGACGAAGAGATAGTCAATGCAACGCAATGCGACGCAAAAAACAC -GCTCATTGCGCCTGATTCGAACAACGTCGGCGTAGCTGAAACCAGCATGGACGAAGTTGA -TGATACAGAGGAAGGAGATTCTACTCACCATGAGACTGCCAGCATAGAACTTGGCGATGA -TACTGACGATCGCCACATCTCTGCCTTTCTCGCCCCGCCTGACCCGCCCGCGGCCCTGGG -TTCTCTTAATGGTGCCGCGATGGAGCCTCCGAGGCAACGAAAACCATGGTTCCGATCTCT -TCGGAACATATACCCCACAGGTCCTGTATGGTCCGATGACCCTGACACGCCGTTCAAGCG -CTGGGCTCGCCACGATCAAAACGTTCTCCAGGAACTCAATCGTCGCGGTGGAGTTAGGGT -TCTACTTGATGAGAAGGGTGTCATTCGTCGGCCTATCTACAACCACGGGAACAACCCTGG -TTTCTAGCCGTTGTATTTGTTTAGACGCCTCCCATTCAAGAATTGTATTTATATTTTCCA -CGGGCCAGCGGCTGGGTTGGCACAATGGCAAGGAGTACGCATAACGGCAGATACCATTCT -ATTTCATATTTACTCCAGCAAAGGGAAACAGAATATAGCACACTATCTTATTTTCGAATT -CAGATTCATTCATTCTTTGTACATATCTTCTCACAGCCCAAAGCCACTAAGGACGTTGAG -GCAGAAGACCCAAGCCTTGAAACCCGATTTCTCTCAGTAGGGACAAAACCAAGCCCGATA -ATCCACAAAACTCCAATAGTACAGAGTACTTGTACTCCACATGCAATCATCCACTGGAGA -GCAAAAAGAAAATTCCCAAATACAATATTTTATTCCGGGATTCCCTGTGGATTCCCAATG -CCTCTAACCACACCAAGCATCCGTGCAGGGATAGCGAAGCATCGAATAGAGATCAACAGC -AATCACCTAGGCTCGTATCTAGTCTCGTGTACAAGATACTCCATAGACCATGCCGTTGAC -AGCCCAACGCGCTCGAGCCTCGTTGCAGATCCTAGGCTAGGCCCTTAGCCCTCGCCACCG -CGTCGCAAGAATCAATTTTCTCTCTCTGCGATATTCAGACTCACAGAGGACGAACGTACC -TTATGGGCCAACAATATTCCAACATACTGGGTCGGCAGGAGTGAGATTATGGACTGGTGA -GTTCAAGGTTTTGGGATTTAGGGTGTTAGTAGTTTGTGATGGCATGCTTAATTGGACAAA -TGGACAAATGGACAAATTAGGTATACTATGAAATTGAAGGGAGGCTGAAGAGCCCGAATG -TTATAAGTAAATGGATCTTGTATTCGAATCAATTTGCAAGCAGATAGATGCTAATGATTC -CTCCGTGGAGGATCAGAGAAATAAACAGAAATGCAAGTGTTCCCTCCCATAGAGACCATA -GTCTCTTCGCAAAGTGGTATGATCGACAAAAGTTCAGGAAACGGATATGGTCATTAACGG -CATGCAAAAAAAACCCCCCCACATGAGAAACGCCTTGGTATCGCCAATCTAAAATCGTTT -TGGAATGGATATACAGTCATAAAAGCCCGGAATCCTTTTAGGAATGACGGTAGGGCTCGT -AGGCGCTTGAGTGCTGGTTGTTGTGGGTCATGGGGATATCATTGTTGTAATTGGGGGCAG -GAGGGTAGGAAGTGGTGTAGCCGTGCTGAGCATTGGGCTGCGGGGAACCGTAAGGAGGGT -GGGCGGCACCGATGGGCTCATAAGTGTAAGGAGCCTTTTCTGCGGTCACGCCACCGCGGG -TGTTGTGGCGGTCTTTGTGGCCGTAGGGCGAGCGGCCGGAACAGCAGCAGGTGCTGAACA -TCCAGAAGATGGTAGCTCCCAGAGAGAAAGCGACTGCTAGCCAAGTGGCGGCGAAGATGT -TCTTACCCAGGCCTACATCGACACCATAGACCTTGAGGGCTGTTCCGAGAGAACCCTTTG -CGATGGAGAACATGATGGTCGAGGTAAGCGATGCGCCGAGAGTGAAGAGGAATGAAACAA -TCGAGACGATGGTGGTGACACAGCTGCCCCAGCGGCTGAAGATGGCAAAGATACCAAGCA -GGACCTGGGCGCAAGTTGCAATGAAGGCAACTAGGTAGGCAATGAACATCCACTTGGACA -CGGCCTTGTAAACGTTCAGGGTCTTCTTGAATCCACTACCAAGCTCCTTCTCCAGTTGAG -ACTCGCTTATTCCCAATACTGAAGACGGGTTGAAGTAGAACTGAGCCTTGGGATCAGTGC -AGGAGGAGATGGTGTTTTTGCCCTTGGTGGAATTTCCCTCACAGTAACCCCAGAGACCAA -TGTCGTAGAAATCAGCGATGTCGGCATCATTCTGGAAGGTCTTGATGGTCTCTTCAATCT -CCGATGCAGTGACGGAAGTGATATGGGCATCATTGAGGCGATCTGTGATTTCTGATGTAA -TTGCTGATCCTGTGCTGATGTTTGAGAGGTCGAGCTATAGATTATTGACATTAGCGGTTG -CAGTATTCAGGGCGAATTTTGATCACTTACACGAATGAAGTATAAATTGTTCAACGTATC -AGAACTGGCTTTTGTGCAACCTAATCCCACCATAATGATGCAAATTAATGATGCGATGGT -CAAGACATAGGGCGTGAAGATACACGCAACACGACCGGCTTTTCCCATGATGAATAGGAA -TGATAGGTAATGTAGAGAAAGATGTGTATGACAGAGAGGTGGAGAGAGGTGAAATGGGGG -GGGGGAAGACTTTAATATATAAAAGACCCCAGTAAGTAAACAATTGTACAGCCTCACACC -AACACGAACACCAGCCTCCTATGGGAGGAGGCGCAGGATCAGCGGACCAATCAGTGGCTG -AGCGGTCACGTCGCCAGGGCCACGTTTGTTTCTATTTGTGGCTGCGCTAGTCCTATTTGG -GCTGGCATGGTCCTTAATCTGCTCCTTCTATACTCCATATACACAGTCAGCTCAGAAGAA -CTCTGGTATTCGTACTCTGTATATCATATTCCTCAAAGCATTATAAGCACGCATTGTCTC -CAAGATCAAAATCACACATACATTGTAAATGTACATAGTGTACCTGCACGACTCAACGTC -ATATCCTTCTGACCGCCCCAGGATCATCATTCTAGTGGCCAAAACAGGTGATTCTCCAAT -CCCGAGCTTCGTCATCCTCTCTACTCTGAACAGCCTTGTGGATCATTTTAAATTTTTTCC -CTCTCCGCCTCAATGCGTAAGCCCGGGCGTGGAGTACAGATGCCAGGCTCCCCCATTTTT -CTCTTGGCATTTATCTCCCGCCCTTACAGATAGCATTCCACTCGGCTCCCCGCTGTCTCT -GCAGGCAAGATTTACTGTGATGCCTTGTTCTATATTTATTCTATACGATTAACTTTTTGA -ATCTAGGATAATCCCGCGATGCACCTTTCGATTGCGCAACTCATCCTGTACAACATTCTG -TGATCTTTTCTAGGAAAAACCATGCGCTGATCGACACTTGTCATATCGCCACTGATACAT -GAGCTATGGTCATTGGAAATTCCAGTCGTTTCATTTACTATCTTTTAGGCAAATGATACG -TACACAAAAAACAGGATTTTCCAAAATTGCAGCAACTGAAGCTGATAACTCACATGCGAC -TATGTATATTCCCATTCACCTCAATTGACAGGATTTACTGCAAATATGGCTCAAGCGTCG -ACAACATACCAGTATAGGCCGGTCCAGTGGGAGGCTCGATACTGAACATGTAGTCAGCCC -ACCAAGGTCCCGCAGCCCACCACAATGCGCCGAGCCAGACGTCTGAGTTTTCACCCAGGA -AATCAAGCATCCCGGTAACTGCCGCCTTGCAATCATCGTTCACACCACCGGCAAACTCGC -CAATCACGCCAACCTTCTTGTTATCCTTCAGCCACTGTGTCGCATCCGTCACGCGCTCTT -TTCCGATCGTCGTCGACACACATGCCTCGTTCGTGCCTGATCCATCCGAATCAAGGTACT -GGTGCATCTCGTAGACGATCTTGTCTTCGGGGTCGGTCAGATCCTTCAGGTTATCGTTCA -CATCGACCCAGGTCCACGCGCCGGTCCAGGAGTTACCCTCGACGAAGATGTATTGGCTCT -TCGCCCCCGATGCACGAATACCGTCAATGGCTGCTTGGTTGAGCTTCAGGACGAGGTCTT -GGTCCATGTCGTGGTATTCGTTGTCTACATGATTTGGTTAACAGGCGGACTGTGATGCGG -CTGAAGCTCGAGGGTACTGACTTGTGTCGAAAATGACTAGCTCGTTCTCAACGAATTCAC -CTGCGACATTCTTCCAGAATGTCTGGAAGTCGGCGGGGGTGGAAATAATCTCATCGTTGC -TGTGGATACTCGTAAGCTTCAATTCATTTGTTTACTCATTTGATAGATCACCTACTATCT -TCCATAGTTATGAGGATCAAGCACGGCGTGGGATCCTCCATCAGTGATGGCTTTGACCGT -CTGACACTGAATCAGTATCTGCTTGGTCTCATTCGCGCAAGTACGGAACAACATACCGCG -GTCAGATTGCTCAAATAAGCTGCATCAAACCCTCCTGTCATGGTTTCCGGTACCAGCCGT -TCCATCTTGAAGGCCACTCGGAAGATGTTCATGCCTTTCTCCGCCAATGTTTGGATTGCA -CTGGCATCCGGGAAGTAATAGTCCTTTCCCTGTTACGATCTCTGTCAGTCCAGATTCTCT -GAAGACATGTATGTTCAGCCCACTCACGTAGACACCAGGAATGTTCTTCTCGCCGAATTC -AGCTACCGATTCGCTTGTTCCGAACCCTATACAAGAAATTCGGTCAATGGATGATCAGTA -AATCTGTTGAAATCGTGGCCTCGAACATTTGAAGGGTGAAGCTCTCTTTGCATTCTTAGC -AGGAGCTGCCGCTGCGGAACCAGCCACAGCAGCGAGGAGAGCAATGTGATCGAGTCTCAT -ATTGACAAGTCAAACTCTTTGTTATATGAGATCGATGGTTATTTTCACAAAAGAAAATAG -AATAAAATGAGTGATGTGATAGCAATGCAGAATGATACCAACAACGATGCGAACTTGAGC -TTTTATAGTTCTCTTGTCAGTAAATCAAGGTCCACATATCGTTGAAGAACTCGGGAAACA -TTCCAAATGCGTACAGCTCCACGGATGTTAGGTCTTGGTGACAGAAAGGACACGGGGTTT -CATAAGTCTTCAGTCCTTGGAAATATTGCAAGTCTCTGACGACAGTGAGGCACGATGTGC -AGCAACTTCGGATATTGCAGTGCCTCAGAACGATCCCCCACCAATAAGGATCCATAGAAT -CATAGAACATGGTGAAAGACACTTTTCCTTGGCTCTCTCGTGCGAAACGGGGAAGCCAAG -AAGGAGATGTTCCCGAATATTCCGACTAGGGGGCACCTGATCCGTGTCCAAGATGAATTG -TTCACTGTACTTCATACATACCCCGCTTCAATGAAGCTTTCTTCAGGCGCAAGTGGCTTG -AATATGCGTAGCTTTGTTTCGATGACTTTGAGGTTTTGCAAGCCCGTGATGCTTCAAGTA -TTGATCGGCGTTTATAGTTTCTGACATGCAGGGGTAGGGGTAGTAAGACCACGCTACAAT -ACAGGGGGCTGCCGAGTGTAACTGACATCTATGAATAGTCTAATGTAGGCAATATCTTTA -CCAAGTCAAGGAAAAGTTTACTGTCAACATACTACCCTGTAATAGTTGATGGGAGGATAA -AACAATTGCGTGGGCCAAGCAAGCGTGGCAGGTTGATTGCAAGCTGAAACAGATACATAA -AACCTTGAAATGCGCTCGGGAAAGGGTAACTGCCTTTATAAAAAAAAAAAGGGCAGACTA -TACACAACTATTTGAAACTTGAACCTTACTACTCTTAGTCCTGCAGATGGTTCTCCACTG -GACTATCCAAATCAGTCTACCAAGCACCGAGAGAGTGCATGTGCTCTTTCATATCAGCTG -CCTGCAGCTTGTGGTAAGCATGACCTGGGTGGTAAGAATTCCACCAGACACATGAAGTAC -CGTCATCATTGACACAAGTAGCATCGGTGAATCCATATGTTGCAGGAGCGTCAAGGACTT -TGTCCATGAAGGCGAATGTATCGTAGAGAACCGTTTTGGTCTGCAGCTAGATTAGTTTTG -CAAGATCACACATATGATCAGAGTCCACAACGTACGCCACTGTTGTCTGCAATGAATTCA -TTGACCATGGTTTTGAGAGCAGCATTGTAAGCCGCGAGCCACTTAGCGTGGTTGGTTGCA -ACGGCTGTCCCTTGATTGGCGATCTGGGGAGATCGGCTCACAGGTGGGATATTGAGGAAG -ATGAACTTCCGTCCACCATTTGCGTAGATCTTTTCAGCTTGAGCTTTATACTGGGCCATG -ATCTTTGGAACCAGGACACTGGCATCATTGCTTGAGGCAGCCCAACCAATGCTGCACATC -GAAGGATTAGTCGAGTCGTTCATAGGCGCCCAAGGGAACATACTCATTAATTCCAATCCA -GAACCCAAACACTGCATTGTCTGAGGTCCATGGTGCAGTAGCAGGCTTCGTACTGTATGC -TGTCTCAAAGCTGGCGACTTGCGTTACCATATCTTCCGCGGTGTTGGTTTTAACAAGGGA -ATTGTCCAGGGACGCTCCTCCAACAGCGAAGTTGTAGCTGAGGACCAGTGAAGCATTGTT -CACAGTCGTCAGGTCTCCCACCCAGTTGATGCCGCCACCAGTTGTTCCGGTGCCTGTTGT -TTTCTCATCAGCCAATCCTGCTTGCCAGGGGGCGAAAAATACTCACCCAGTGTTGGATTC -CCCATTGGGTTACCAGGAGTTGGCTGCGTGCCACTGACATCGAAACTGGTCATAGAGTAG -GAATCACCACTACGGATACCTTTTAGCTGTTTTCCCAATGAATTATATGTAAGGAACAAT -CATCACTCACAATGTAAAGAAAAAGGTATTAGTAGCGCGAGGTTGCAAGGGAGCAGCAGC -AGCCAGCCCAGCACCGGCGATCATACTGATGATATACTTCATGGTGAGGAGAATGTGCAA -TGTCTGTTGGAAGAATAATCACACACTGCATGGTTCGTTCCTTCCCTTATATACATACAC -TCATCCCTTGTTTCGCCCGGCTCGTCTCATACTCCGAGTAAACAAGCCCCAGAGCCGAGA -CTAATAGATTAAAAAGAAAAGGCGAGACTATGTCATTTAATTAACCGCTTTTGTCAATCA -TTTCCTCAATGGGCTCGATCCGACTCATCGATATTAAGCTGGCTGTACACGGTGAACACC -CCGAAACTGCGGGGTACATATCGGAGTTCAAATCATCAAGCTAATTCGGAAAAGCTGAGT -AGTTCGTTGGACTTGACGAATACCCCCAGCATATTATGGGGTATATTGTCCACTGGGATG -GGTTTCTGAGAATGGATCCGCTATGCAGCTAGAACACATGCCTTGATTTGCTCTTATGTT -CCTCTTACCCGAATAGCTCACTACTGTCTTCCCCTTGCCCGGTGAGTTTACGATCGTCAC -CCTCTAAGCCAAATCTGTAGTCTCGCGTGGTGTGGTCTTGCATGATGTAGTCTCGCATGG -TATCATCTATGTTCATATTGTTGCATTGAAGTCTTATCTACATCCGACATCGAATCGGCA -TCATGCAGATAAGGCTTGATCCGATTTGGCAGCAAACATCTCCATGCCCACACCGCATAC -AACCAATCGCGACTGGTTCTTTTCCGATTCAGACACGTAGCTCTCTGCGGGGAGGCAATT -TTCAGGTCCACGGACAAGCCCCAGTGATGATATATCAGCTATAGAGCTGTATGTATCCAG -TAGGAATCTCACAATAATCCCCTGCGTGCCAGCTTGTGTAGGGTCCACAGCGATTGACCA -GATTTTAACTCGGTGGTCATTGCCAGAGGATGCCACAGTGATTTGGATGGATTCTGTATT -TGATGTGACATCACGAGAGATTTGCTGATTGAGCACTTTAACCGTGGTGACTGCTGCAGC -GTGAGCATCGGGGATAGATACTGTAGCTACTTGTGCGTTTGTGCCGGTGCCGGATGGATG -TGTTTGTAGGAGGGACACGGAGAGAGAGTTATCATCACCTCCAGCAACAACCACAGTAGC -CGTGTCAGAGATAGGAACCAGTTCCATCCCCTTGATGCTGTTTGAATGAATCTGATATCG -GCTTTCGCATGTGATGTTTTCTGGTGAAACCGAAGAGCTGCCGAAGGGGGCCTTGGCCTT -CAATGTGGATTGGGTGATTGTGTAGAATGATTCAAGTGTGCTTGTTAGATCCCAGAGTGT -GAAATAGCCATCGGTTGCTGCAGTTATGAGGCTCACTGAAGAGCTTGTGACCAAGAAGTG -GGCTTGGGTTAAGCAATTGGTCATGTATTTTCCACGAGCCAAGAGAGTAAATTGTCCATT -GGATGGTGAAAAGTGGAAGATCTGCAACGACGTTAGTATTGGCCAAGGTGTTCGAGGGAA -AGAACGACTCTCACCTTAAAAACAGAGTTGGAAAGAGCAAGGCATAGGAGGAAACCTTGC -TCCCCGTCAACTTCCTTCACTTCTAGCAAATCGAAACTAGGTATCCGGAGATCTGAATTC -TCGTCATCCTTCGGACTGGCTGCCACCAATTTAGTAGCGACGCCGAATACAGGAATAGTG -CGGATTCTCCATACGAAAAACTCTTCATATGCAGCACTGGTGAATAGGTATCTTCCATCC -TTGGACCAGGTCACCTGCTGAATCCCAGACTTGTGTGTATCCAAGACGCGCAGACACTCA -AAGCTGCCCCACGGACTAGCAGCCAATGTTGAAGTCGGCGCGAAAAGACGAACAGTGGTG -TCCTCGGCACCAGTCGCAAAAAGAGTGCGACCTCCACCGGTAGGGTTGAAAACATCCACC -GCTTTGATTTCTCGTCCATGACCTCCAGCTCGAAGCACAGTACTAGCACCAGCTCGGATG -CGAGTAGTCATTAAAGAAAATCCGTCCTTCCACGTGAAGAAAGCATTGTGATCTTTGTCT -CCTGGGATGAAGTCCCAGCCTCGGCGAAACCCGCCTGAAGCAATGGACACGATATCCGTC -AGCTTCGATTCATTCCTGAGAACGAAATCTTGGGATCTGAATCCGTACAACATGAAATCG -CCAGTCGCTTTATCGAAATAAGCGCCTTCAATGTTCCCACCAGTTATGGATGCTGTACGG -TGCACTGTCTGCATCGAAACAGCGTCTGTTCCGTTCCCACAAAGCTGCAACTCATGGAGG -CAATAAGTTCCATCTCGCCCACAGGTCAATACAAAATCGGATTTTGTGGTGTTACTCATA -GTAACAGATGTCACGACGCTAACATGATTGACAAAATCTCGGCCATGAACACGTCGGCTG -ACCAAGATGGGCTCAGTCTCGGAGGTGTTGTATATGGCAATGCCGCCTAATTTTGACCCT -ATAATGAAATATTGGCCCTCAAAGAGGAAAGACGCACTGCAGACCACAAATGTGGATGGT -AGGCTGAATGTGGTTATTTTGGCGTTTGGATGATCGCTGTCCCAATTGGACACAGTGACG -AGTGTTATTTCTTCTCCGGTAGGATAACAGACAAGGAAGGTTATGTTGCTAGGAGGATTC -AGGGTATCCATCTTGGAAGAGTGGGTCTGAAGGAAAAATGCTTGAAGGGGCCTGTTGCCC -ACATCTGCAAGGTTGGTAACAGATTTCGTAGCGTGATTATAGAGTCGAAAGAACCCTTGA -AAATTACCAATCAATGCTAGACCCATCTGCGGCAAAGCAGTCAGGACGGCAAAAGAGGCA -AGCTCGGAGAATTCACATATTGTCGCCCAAGTAATGACAGTCTCCCCTGCCTGATTGAGA -TATCCAGGCTGGATCTGACCTTGTGTAGAGACGCGAAGAAGGCAGTCCTGGGCAATAAAT -GCAAAGACTCTCGCTCCATCTGCTGTGGGATGCTTCTTCTTCGCCTTTGCCTGCAAGCCA -GATGATTGCTGCAAAGAAGCATCCAGGCCACAGCTCAGATGGTCATCGTCATTTTCTTTG -ATGGGAAAACTTTTCAGTGCACCATCAGCGCCTCCAGTATAAACCACAGTCTCAGAGCCA -ACTCTGCAAAGATCCAAAGACCAAATGTGCTTTCCAATATGTGTTTGTATTGAAGAAGTT -TGTTTGAGCTGATAATCAGTCGTCCCTGCGGATGATGAGTGCCAGCTTAGGTCCCACAAA -GCACATGTGCAATCCTCTCCACGAGAAACCAATCCAATGTGAGTTTGGTTATTATTTTTG -ATGGCCCTGAAATAGACACTCCAGATACGAGACGTGTGAGCAAATGCCTGGACCATACAT -GACTCGGATCCGACGCTCTCTTCGCCTGCCTCAACAGGACCAAAGCCTGTGCTTCGTAAT -TCAAACCCGTCCGTGGAGTATGCCGAATGATCCTGGGCTGTTTTGTGCTCACAATCTGAT -ATATCCCAGATTCTCACAGTTCTATCGTCACTGCAGCTTGCCAGTATCCTGCCAGACTGG -CCTCCATTTAAGGATGGGATCTGGGGAGAGATGCGCACACCGAAGATTGATCCATCATGT -CCCGTGAAGAAGTGGTGGATGGAACCGACTGCGTTAGACTTGTGTGATCCACTTTCATTC -AAGAAACATGACCACACGATGATTTCTCCGAACACGGTACCAGCGGCAATGAGGACATGG -GAGGAAGACAGCGCAATCACATCGGCGGAGTATAGGATAGACTTTACACTTGTCGCCAAC -TGATATATGGATATGGTTGTATTTCTCTCGGGCATCTCACTGTTAAGCACTTCGAGACTC -AGAAGGGCGTTGTTCGCGGTGATAAAGAATGCCCCATGATGACCACCTGCAGCTGCTGCA -CACCCACTCATGATCCAATCTGGAGCAGTGAACTCGGCACTAGAGGCCGACAGCGTGACT -TCATTGTCAGAAACCAGAACGAAATTGACCGCTCGCACTGAGCGACCACCCCAAACAATT -ACTTGGACATGGCTCAAGTCATGATCTTGTGGTGGTTGATCCAAAATAATGAAACCGTGA -ATATTGTTTCTCTTGAATATCTGTATTTGAGTGGCGACAACACCCGTAAATTCATCAACC -ACCCGGAAGTATGTGCCTTGACTTTGAAAGACGAAACGCCTATCACCCAATACAAACGTC -TTCAACGCAGTTATGGGGAGAAAAGCATCAATATGCTGTAATTTGAGTTAGTGGGACAGC -ATTTTTACTACAGATACAGCAACACTAACCTCAAGTGAGAGAGACATTGTCAACTCCTAC -GCATATTAAGCAGAGAGGCGAATCACACGGAATGTCTCAATTGCCTTACATCCAAGTGTC -CATCCCACCAACCCCCAAAAAAAGCCCACTACTTTTTCGATGATTTCCCCGATGTGGTTA -CATAATACACACTAGTCCATATATGGTATAATTACTCACGTGATCTCGCGATCGAGGCGC -GGAGAGCGGAATGGGTCTCCACGTCACTGCCTAGTTCCTGCTTCTCTAATTAGCCATCTG -CGAGCTCTTATTTTGCTACATGTGGATCTTTCTTAAAATGCTAGACACTGATACAGACTC -ATGATCTGATTTCCTTTTGATTGGCCATGGGTGTCACGGGCCTCTGGACGGTCGTTCAGC -CTTGCGCTCGCCCCATCAAACTCGAAACGCTAAACAAGAAACGACTAGCTGTCGACGCGT -CAATCTGGATATATCAATTTCTGAAAGTGGTGCGAGATAAGGAAGGTAATGCACTCCGAA -ATTCTCACATCGTTGGATTCTTTCGCCGCATATGCAAGCTGCTATACTTCGGGATCCGTC -CCGTCTTCGTCTTCGACGGGGGGGCTCCTGTGATGAAAAGGCAGACAATCGCTGGCCGGA -AAAAGAAACGTGAAGGTCACAGGGAAGATGCTGCGAGGACGGCGGGAAAGTTGCTTGCCG -TACAAATGCAACGCAGTGCGGAGGAGGAGGACACTCGTCGACGCAACAAGACCCAGATAC -AAGAAGAGGAAGACGTGCCGGATGCTCCAGTCTACGCAGAGGAGGCTTTTATGACAGAGA -CAGAAAAGCAACAGAGCCGCAAGTTCAAGAAGAAAGACGCGTACCACCTACCTAGTTTAG -ACGTCTCGCTTGAAGATATGGGGGCACCAAACGATCCACGCATCATGTCCCAAGAGGAGC -TAGAGGAGTATGCGAGACACTTCCACCAGGGTCAAGATATCAACCTCTACGATTTTTCCA -AAATCGACTTCGACAGCATGTTTTTCCTTAGTCTGCCCGCCACTGATCGCTACAATATCT -TGAATGCAGCGAGGTTAAGAAGCCGACTCCGAATGGGATATTCAAAAGAGCAGCTAGATA -CCATGTTTCCCGACCGCATGGCATTCTCGAGGTTTCAAATTGACCGTGTTGCAGAACGTA -ATGACCTCACGCAACGATTAATGAATATCAACGGCATGAACGGAGAAGAAGCATTCTTCA -ACTCTGGAAAGCGGATCGCGGGAGAACGTGGCAGAGAATACGTGCTTGTCAAAGATCGCG -AACATGAAGGTGGCTGGGTCCTAGGTGTCGTGGGAAACAGGGAAGGCCATGAAGAGAAGC -CCATCGATATAGATCGACCTGAGATACTATCTGACGAGGATGAAGTGTCGGAAGAAGATG -AGTTTGAGGACGTTCCAATTGAAGGTCTTAACCGACTGCCCAAACTTCCTTTCCTTCGAG -AGGGTGTATTCGACCAGTCGCTTCAACTACAAACGGACGAAAATACAGACATGAGAAGAG -CTACACAAGAATCACATCAGGTTGCACAGCGACAGTCAGTGAATCAACATCGTACACAGG -AGGTCGAAAATGACTCGCTTTTTGTTGAAGCCGAAGGAAATATTGGTGCTGAGCAGCAGA -ATAAGGACACTGACGAGTTTTTTGATGGCGACGACGATGACCTCGAACGAGCTATTGCGC -TTTCATTACAACCAGAAGAAGTCGACGACGAAGATACACTTGACATGTCTGATATGCCCA -TTCATCGACCGGTAAAATCCGTCCCGTCTTACGAGACGGTTCCAGATCTTGATTCCGAAA -GCGATGATGGCATGGACTTTACTGCTGCCATAGCAAGAACAAAAGTCTCGAAGAAAGCCT -CACATGCACCAAATCCATTTGGGGGTCCTCTCCCTTTTGAGTCCATCAAACTCACTAAAG -TCGCGAAGAATGATGACAAAGCTGGTGCGGTTGACGAAAGTGCAGGAGGTTTCGTGAAGG -AGCCTACAAAGAAAACGAAAAAGATAGACCCACTTCCTCCGTGGTTTGTTGGTGAGCCCT -CGGATTCGGGATTCATTGTTGATCCCATGGAAGATCCTGAAAAGGATGAGGACCAAATTA -CGGCGCCAGATCATATGTTCTTGTCCAATCGTCGTTCACCGGACATTATCGATGTTGACG -AAATTTCCGCCAAGAAGGAAGTCGTTGACCTAgaggcggaagaggaggatggggaggaga -agTCCGAACATCAAGACAACAAACCCGCACACGTCTTCCAATTGGAGGACATCGAAAAGG -TTTATAATGAACTCTCTACCACAAATCTTGATGAGAAGAAGCCTTGGAGCGAACCAGCTC -CAGCTCCAATCAATATTGATGAGACAGTACCTGGGCGTCCTGAAACACAAGTGCCCGCGC -TTGATGACCGGTCTCCACGCAGCGAACATTCTCCTTCCCCGGAATTTGAAGATGTTGTCC -CTCAGCAGCCCACTCAAGGCCCTGAGATCACTGTTGTATCCCATCAGAAGGCCCAGCCCG -AGCCTCAGCTCTTCGAAAACGTCGAGGAAGCTTTCGTGCAGGATCAAGCCGATTACAGTG -ACCCAGAGGATGAAGATTTGTTCAAACAGCTTGCAGCTGAGGGCGAGGAGCATGTTCGGT -TTGCGAATACTCTCAATTATGCTGCTCCCAACCAGGAAGCCTTCGACTACGAGCAAGAAC -TGAAGCAACTCCGATCTCAGCAGAGAAACGAGCGTCGCGATGCAGACGAGGTCACTACTA -TTATGATTAATGAGTGTCAACAGCTCTTAGCGCTTTTCGGGTTGCCCTACATCACCGCGC -CTATGGAAGCTGAAGCGCAGTGCGCCAAACTTGTATCGCTAGGTCTCGTAGACGGGATCG -TCACGGATGACAGTGACATCTTCCTCTTTGGTGGAACACGAGTCTACAAAAACATGTTTA -ATCAAAGTAAATTCGTCGAGTGCTACTTGGCGTCCGATTTGGAGAAAGAGTATGCCCTCC -ATCGGCAGAAGCTTATTAGCTTTGCCCATCTCTTGGGCAGCGATTATACAGAAGGCGTCC -CCGGGATTGGTCCTGTCACAGCTCTAGAGATACTCACCGAGTTTTCCAGCCTGGAAGAGT -TCCGCAATTGGTGGGCCGACTTGCAAATGGGTACGCATAATACAGAAGATCCCCACCTTG -CCTTCCGGAAGAAATTTCGAAAGAAGGCATCCAAGATTTTCTTGCCGCCTTCATTCCCGG -ATCCCAAAGTTGACGAGGCATACCTGGAACCTGTGGTTGATAACGACCCTTCCCAATTCC -AATGGGGTGTTCCTGATTTGAATGGGTTACGATCCTTCCTCATGACGACAATCGGCTGGA -GCCAAGAGCGGACGGATGAAGTCCTGGTACCGGTCATTCGCGACATGAACCGGCGAGAGC -AGGAAGGTACACAGTCGAACATCACACAATTCATGCAAGGTCCACAAGGCGCGGGTGCAT -TTGCGCCTCGTGTTCGCACAGGTGGACCCAGCCGCATGGAAAAGGCATTTAGTCGACTGC -GTCAGGAAGCTCAGACAGGCGGAACCTCCCTGGATAACGAACCAGCTATCGGGGACGAAG -AGACAGAGGAAACCTCTGTTTCACAAAACAAAGGCAAAAGAGGTACGCCAATTGGGAAGA -AGGCTGGCACAAATAAGAAGCGAAAGACCCGTCGCGCCTCTCTTGAGCCATAGCCGAGGA -TTGGTGAATTGAGGTTGTCGCACCGGCGGCATATCAGATCACACCTTGTACCTAGCCCTG -GAAGTAGGATATGGCAGTTTTCTGCCAGCAATCCAATCATTTTATTGTCGCAATTGGTCG -TTGGACCCCCATAAGAGGAGCGACCCGCTCCTTCCATTCTTCATCTGCTCCCACGAGATG -GCTCCCATAGTACTTTGCATGCTCATACCTAGGATCCCTGGAAAGTCCAGGGATGTGGCA -TAAGTCATCCATATTAATGCCTGGGCTCTTATTAAACTAGCGTCATTTAAGACCCCTACA -TGTTTGAGCGCACCCATTGCCTCATCCCTTTCTCCGGCCTTTTTCTTCTGCCATAGAACA -CTTTCAACATGTCCACATCGGTGCAAACTCCTTGGGTTGTTCTTACCGGGATCACCAGCA -GCGCACTAGAATGGAGCGCGGTGTGACGACATCTCGAGAATGAGATATCCGTGTTGCTGT -ATCGGCGGATTGACTACGGTCAGAACGAATCATGCCCTAGCACCGAGCCGGACTCACTAG -CAACTGTGGACGAACTGGCTGCGCTACTCCCAACCGCGGTCCTGCCACCCCGGTGCCTAG -TGGTCGGTCAGTCCTTTGGGGGGATATTAATCTTGGCCCGCAACCTATAGCATTCATTTT -TGAACCGAGCAACGGTGCTATGTTGACAGGGAGATATCCCGTGAGAATTGGAAGTATCTC -CATCCCCTGGAAGGATGAAATTAACGCATGAGCGGTTTTTGTTCATAAACACCCTAGAGG -GCTCTATAGGGCCGAATGTTGCTGCGTCGCATTCTGGTTTGCATGATTACATCGTCCAAG -TTAGACCACGTTTATTTTTTAAAGAAAACAATTGTACAATATCTGTCTGTCTAATAAGCT -GATGCACTTAGTTTGTCTCTTGAGCCATAATAAAATCCTGCAAGTGGGTAAGGCCACCTC -TCATTTACCAGAATCCTTTCAGTTTCGATCGTTTTGTCCATATAAATCAATCGGTGGGAG -AGTAAGGCCACATTTATAGAAGAGAACTGGTCATTCGTCCGCAGTATTTACGGTACCCTC -AATCCATGTAAAGCGATCTAGCTTCGGGGTCGCCTACCTACTTAGTTGTGTATATTCCTA -GATAGTTAGGAAGGATGGTGATCACTTATTCCGATACAGCCTGAAGTTTCAATTGCAAAC -AAAAGCCATAGTTTCAGTCAATAACCAGCCCCATTTACTGAGGATTGCTCAAATAGATGA -ACAAGAAAATATAAGGGAAGCAATTCTGTCTGAAACAGAAATTTATGATGTCTTAACTAC -TTAGGCATCATTTCCCATGCCCCATCTACCATCTCTTTTGCTTTCAACGGTGACCCCCCA -TTTTCTTCATCCATCTCCTATTTACTTATCCCCACTGCGTCCCTTTCGCTGCTCTATCAG -GGCAGATTCTACTTAATGCCCCAGATTCCATATTTTGTGCTAAACGTGTGGGAAAGTATA -TCACGCTAGAATGCCACTTACAGCGCGTCACGACCGCCCATCCAACCAGTTGGACACAAG -TCGCCCGCAAAGTGAGGACTCAAACCGGTTACCTGGGCATCCAGACTACTCTTCAGTGGC -TTGTGAAAAAAAAAAATCCAAAATCAATTTCTCTGGCTTGCCCGATATACCCCGAGGGGG -GGTGATGCGGACCATAGGTAGGCTGTACAGGGCCGAGCAGCGAGCATGGCCATAGGTCTT -GCTAATCGTCGCCAGAGATGGGAGATGTTTTGACCAAAGACGAGACACATGGAGGTGGCG -TGGTCAGTTGCCCCAGAGAGCGACTAATTCCGTGTTAGTGACATTACCGGACACAATCCT -ACCACATGCCCTCTCCAAAGGAAAATCGTGTCCCACGGGCTCCCAAACGTGGGTCGAATC -TTTTTCCAACTCAACCATCCCGCCCGGAAATGTACGTACGGAGCGCTCCTTCCAACCATC -CAAGCAGCTGTCATGAGAGTCCGAGTAGAATTGATTCGCATGCGGGGGGTGCGTTATCTT -CGTTTCTGTTACCTTCGTTTCTGAAATAAATTCGCTACCACTATGATGAAGAACAACCGA -GCCAAGCGAAGAGCCGGTTGTTATGGTATCGAGAAGAAAGAGGAATCCTTCAAAACGTGT -GACCTGGCTCCACTCTGGCTCCAGTATGCAAAAAAGATGCCCTATCAAACAGCTTCTGAA -GGACATGGTCCTAGCAAGACCCACGGTTCAAACTTGCTGAGAGGACTGCATCAACCACGT -CTAGCTGCAGATCCTGCTGATGATCTCTCCCACTAGATGTCTCGCTAGATGGCGACTTCA -GTGTGGAGCGAGAGGTAGACATGATGGGATTTGGGGAAAGAATTCAAGTTCTCATCATGA -AGCCTCGAACGCTTTTTGGCCTACTTTGTGTTGTTTGGTCTATACTCCCGAAGATTCACC -TTGAAAAGCTCAACAAATGGCTCTTCATTGAAGATTATCTATTTGGAGAGATACCTTCAA -TGATTTGGTCCTTCCTCTCCTCTTCCCCACATCACTATATCAACTTTGTCTTCCTATTTC -ACGATCTTTTCATTGTTTTCAAGGAAATTCAAGGTCGGAATCATGATATCAGTCTACGAT -CAGTCTGTGCGTCGTCGGGGTGCTGAACGGCGTGGGTAGGAAAACCGCAGATTCTGCTGT -CCCACTCTGGACGAGACCAAAGTGGTTGTATCGAACCTGACACGGGTAGTCACCTCAACC -TAGGCGCTATGCTTCAATGGTCCATTGCTCCCTGGATGAATAGGGTGGAATCTGTAGCCG -AGGCCACATGGTTCCCCACGAACAGGAGTTCTCCGCAATCTCAGCGATCAGTGGAACTGG -ACCAATGGTTTCATACAGTATGTGCACTGGTTCCTTATAAGGGGAAATCCAGGCACTTGA -TGGTTACTATAAGATGTAGTATATATAATCGTGTATATCATGAGATTCATAGCATATAAT -GGTACATAATATATCAATGCAACCTTAGTCATGACATCAGGAAATCGATGACGTGAAGAT -TCCAATGCAAGATCGACAGACTATTGGTTTAAAAGGAGCTAAGGCCGATTCGATGCAGTT -CTGGAGTGCCTGTCCTATTAGCCATCGTGTTTTTTTTCCAGGTCCGCCCAATCCCGATCT -GATCAAGTCGGTTTTCTAGTTTCCTGAAAATAGTCGACGTCACTTCCCGAGAGCTTCCAG -GGGAACTTCACTTTTTGACTGAGATCTCATCCGCTTGCAGTCTCTTTTTTTTTCCCCGTT -ACATCTCAGACTCAAGTACGTACTCCAAGGAGAAGAAAAAAAAGAGAGACCCTCTTTAAA -CTCACGTCATTACTGCGCAAGGCCCTGAAAGTCTTTTTTCTCTAATTTACCTTTCTTGCA -GGAACAAGAATCTCTCTTGACCATCTCCTTTCTTCCACTTTACATTGCGAACCCTTTTTC -ATTGCAGATCGCCATGCATCGCACCTATTCTATGCGCCAATCGCGCGCCCCGACCGCTTC -GCAGGTCGAGAGCCCACCGCCACCGCTGTCCACAACTAAGACCGGGAGATGGCTAGGGAA -AGGCGGAATCGGTGAGTTCGAGATATTTCTTGTGATTGGGTTCGACTAGAAATCTCCTGA -GGATGTTTGTCGGGTCGGAGGCTGCCTAGTCGGGCCGCCATCGACTCCCCTGTCCCCACC -TCCGAGAGACGCAACAACCGAATTGAAGACCAGTCGTCGAAGCGATCACCCATGGTAGAA -AGGGAGAAATTGAACTTGCTAACATTCTGTGATTAGGCCATGCTTTCCGCAAGAACGCCG -CCGGTGCCTTCGGACCCGAACTCTCCAAGAAGCTGTCGCAGCTCGTCAAGATGGAGAAGA -ACGTCATGCGCAGCATGGAGCTCGTCGCTCGGGAGCGCATGGAAGTCGCTGTATGTGGAT -GGCCCTGGATTGCGAATAACCGTGGAACAACTTAGAAGCTGACTCGGTCGCCATAGCAAC -AACTCTCCATCTGGGGTGAAGCATGCGATGAGGATGTGTCGGATGTGACGGATAAGCTCG -GCGTGCTGATCTACGAGATCGGTGAGCTGGAGGATATGTTCGTTGACCGCTACGACCAGT -ACCGTGTCACCATTAAGAGCATCCGTAACATCGAGGCTTCCGTACAGCCTAGTCGAGACC -GTAAGAATTCCCCCGGCTTGAACTTCTCAGCTTGGACTAACCAAATTGAATCTCCAGGCA -AGCAGAAGATCACCGATGAGATCGCCAAGTTGAAGTATAAGGACCCCAACTCCCCTCGCA -TTGTCGTCCTGGAGCAGGAGCTCGTCCGCGCCGAAGCCGAGTCCCTAGTTGCTGAGGCCC -AGCTGTCCAACATCACCCGCGAGAAGGTCAAGGCTGCCTTCCAGTACCAGTTCGATGCCC -TCCGCGAGCACTGCGAGAAGGTTGCCATCGTCGCCGGATACGGAAAGCACCTGCTCGACC -TGATCGACGATACTCCCGTCACTCCCGGCGAGACCCGTCACGCCTACGACGGCTACGATG -CCTCCAAGGCCATCATCCAGGATTGCGAGGATGCTCTCACTAACTGGGTCTCCTCCAAGG -CAGTCGTCAAGTCTAGCCTCTCCCAGCGCTCCCGCACTCTCTCCCAGCGCCACCGTACTA -ACCTCAACAAGAACCGTGAGGGCGTCGATTTGTCCTCACAGGACCAGCCCTTGGCTGGGG -ATCGTGACTCTTGGCTTCCAGCTGACCAGCACCCTAACTACGAGGACGGTGACGATGGCG -TCAGCACCATTGAAGGCGAGCTGCGTGGCCGCGAAGAGGAGCGTGACCCCATTGCCGTAT -AAGGGTTGGGCGAGAATGATTACCTCTTTTCACGAATCTGACGGTTTGAAATTCGGGCAG -ATGCATGGGATCTATTCATCTGTCCAATAGATTTCATCTCCACGTTCCTTCTGTCGAAAA -TTCCCCTTCTGGTCTTCCATATACTTTGATTTTTCCGGGTTGAGATTCCGTGCTATGCTC -AAAGAAGACGCACATCAATCACGCCATTCAACTTTGGTGTTTAACGCGCCTTTTTTCCCC -CACATGAACGCCTCTCAATGTCGTAATCCAACGACTGTGAATCGTGCTTGGGGAATGTCC -ACTTCTCCACGCTTAGATCGGGGGACGTAGGTCTCATTTCCTGCTCGTTTCTCCACGGAG -TTGGTGATGTCTGAGATCGGTGCGATTGGGCATTGTCTGGGTTTTTTTCGATGTCTTTTT -TTTGTTCCCATTCCCTGTTTTTACTTAATTGATGTAATACCATTCTACGTTTGTTTTTGC -TGTTTTGAGGATTAGGTCCCAGGTAATTAAGGTGTTTTAATGGATTGTTTGCAGAAAGTT -ATATACATTGTGTAGTATCTGCTAGAGAACAGTGGCGTAAGGCGACGGCTAAACAGTGTG -GAGACAGATCCTTACCCCAGAAAAGTGAAAGACACATCAGATCCAATGGCATTCAATCTC -AGTGAGCAAATCACTCTGCTATAGCACGCCAGGTAGAGCACGTCACTCACCGCTGCTTTT -ACGTCTTTGTCATTACGTATCTCAGGGATCCTATAGAGTAGGACATGTAGGAGAGCAAGA -TAATTCGTATAGCAATCAACCCCAGAGTTTATATTGCACGCACTCCAGTCCATAGATAGA -CAGCAAAAGGACGGAGGAAAGACAGAAAAAGCGAGTCCCACAAAGATATCAAATATAAAA -CCAATACTATAAAGCAAAAAGACATAATTCAACTTCCCTGGACCACAACACCGCCAACTC -ATCTCCTTTCCCGCAAACCAGGGGAAAGGGAGAAAAATGAAATAGGCCTTCGCAGCACAC -GATCTGGAGGATAAGAAGGGTGGAGAGGATGTAGATGGGTAGAGAGCCGGGGCAGTGAAA -GATAAAGCCCCGGTAGAGAGTAGAGATAGAGATTGAAACAGCAAGCGAAATTGCCAGATC -TGAAGTGTAGAAGCAAAGACCATCCCTAGAGGTCGATAAAGCCACGGTCGGGAGAGCCGG -GCAGACGAGAAGAAGAGAAGGGTCGTGACATCGTCATTTGTCTGCGTTCATCTAGCCCCA -TTTCCCATTCGGTCGTTTTGAACATGACAGGTAGAGATGAGGAAGATGAGCGCAGACACC -ATAGCAAAGCAATGGGGGAAAAACAGAATGCTGATTCGATACGGACGCTTTATAGAGACC -GTAGACTGGGGAGATGATACAAATGAAAATAACGGCAAACAAAAACGCAAAAAAAAAAAA -AACACACATTCTTCCATTTTGGCTCACTCCTCGGTGCCAACGCCCTTCTTTTTGGGCTGA -ATATGTTTTTGTCTGGTATTGCCGCCAGGACCTTTTAACTCGACCGAGCTATCCTTAGCC -GGAGCAGGCAAACGAGAACCAGCATCGTGTTTCTCAGTGAGGAGATTCTTCAGCCAGCGG -TTCTCCATCTCGAGCTGTGCGACGCGAGCCTCGAGGGAAGCGTTCTTCTCGGTAGTCTCA -CGAACCGTCCGTTCGAGGACCTGCTCGCGCTGCTTTTTCTTTTGACGGAAGCGGGCGCTG -GCGGCTGTATTTCGGCGACGACGATCTTCCTCTGCGGCACCACGCACGCCCTCTTCTTGA -TCGAGAGCAGCGGCAATACGGTCGCCAGAGATCGAGGTACTGTACTTGGTGTTTCCCATT -TGGCCGTTGGGCGGGGAGATGGATGAGGACACATCGGATGGTGTGAGAGCGCCCTGTGCG -TGGCCGAGCTCGTCGGGCGCATAGGGGACTGGTGGGTAGATTGAGTGAGCTGAGTAGGGC -ATCATGTGCTCTGGAAGATTGTAGTTGATGGGCTTTTCGCCAGGAGGGCCTATGGATAGA -GTAAGCGATGGTCGATATAACCGCCAGGGAATGAGCCCCAAGGGTCGGGAAGTGCAAATC -ATATCCGAAGGATATAAGCTCAAGAAGGGTCAAGGGGATGTGCAGTTTAGCGTCAGCAGG -CAACCACTGGCGTTTGTCATTTCATTGCGGGATTGCGTGCTCGTTATTGCAGGAGCACCG -ACGGTGATCTCATCCATTGAGAATGGGAGATGCATGCCAGGGTTCATATCCTGGGCCTTT -GCAGCCTCGAGCTTTTAAAGAACATTTGCACATTGTGTAGGGGAAAGAAGAGGTTCCGCA -CCGGAGCTATCCGGTGCCTTTTCTTTTTTTTTTAAGAAAAAAAAACAACAGGGAAGAAAG -GCCAAAAAAAAAAATGGAGAGATCGCAACTTCAATTTTCAGACATGGGAATTTCAAGCCC -GGAACTAAAGCTCAGACGCAGAGGCGCGATGGCGTAACAAATGCAACATTGGGGCACAAG -GTCTCAAAAGCTTGATACATACCATGATAGGGATAGCCTCGGAGATCGAGAGAGCTGCGG -TCATCATCTATACTCTCATATGAACTGACAGAGAGACCGCCCATGCCTTCGGTCTGGTCG -TAATCCATACTCACGTTGCCGAACGGGACCAGCCCGGTATCAGCGTCAAATGAGTACTGT -CGAGTCTTGGCTGCGGGATCGTGGCGCGACATCAAGAAATTGAAGTTCTCAAGTTTGTCG -AAGACTGGACCTGGAGCTGGTAAAGGTTGTTCTCCCATGGTGTTGAAGAGAGAAAGAAAA -GAAGAGAAGTTCAGCCCCTTGACCTTTAGGGTTATGAGTGTAAGAGAGAAGAGGGGCTTA -GGATGTTGGCAACCCCCAGATCAGGACGAGATTTTCTATGAAGCAAATCAGATCACCAGT -TAAAAGTGTTTTAATATAAAGGCGAAAAAGGTAAAAAAAAATAAATAAATCCAAGACAAG -GTAGAGAGGAAAAAAGACAGCGTTCTCGGCCACGATCACCTCGAAAAAGGTCACTTCATC -GTCGCATGCCGCGGGAGTTCCACCAGAACGAATCACAAGGGGATTAATGGCGTTGAATTA -AAAAAAAGGGGGGGCAAAGAGGACGAGGAGGAAAAAGACGGCCCTAGTCGATGATCTGAT -AGAATGGAGAAATGTCGTGATCGCGTCTTCTGGTTCGTCAGTTGAGACATTCACCGCTTT -TATAGAACATATAGTAGTCGAGGGATATCACTATAGATCAAGTGTATAAGTTTATAGATC -TCTAGGGATATATATGTATAGAAATAAACAAGCTTCCCTGGAGAGCTTCTAAGCTCTTTC -GATGAAAAACAGGGGCCATGACATATATTATCCACCCTCGGAACCAGTACCTGGCAAAGG -TACATAATGATATGGAGTATGCAGACTGGATACAAACCCATAAGCATTATACAATCTATA -CTCCGTACTAAACTGTAATTTATATGTTGTATAATTTGATGACATCGAATTGTTCAGCGC -AGATGACGCGGACTGGCCCCGATATTCAACCCAAAAGCTTGGCGGCAGGAAAATCCCCCC -CCCAATCTAGAAACATTCGATCGTCCATATGACATATGCTTTATACTAATGTTATTCACG -GCTTACTGTGGACGCCTTTCAGCAATGATCTGAGCGGCATCTTCAACCGTCATACCTCGC -ACTCGACGCTCCACTCTCTCCTCACCTAAAACGGTATCTTCCGCAATGGTCCGATAGATC -TCAGCAAATCCTCCATAAACATCGGCTCCGACCCCATTCCACCCACCCTCGGCATCAAAC -GTCTCCCCAATCCCACGCATCTCATCCTCCCATCGGTATGCCTTGGGACTCATACCAATC -ACCCCATTCGTCGCGAGCGCAGCAGTACGTGGGGAGTACTCCTCCAGCAATGCCAACAAC -TCTGGCAACATCGACTCCTTCTGCGCAGTGGAGAAAGACAGAATAGACAGCGCGGTGAGA -CCTTTGGTCAGTGCAGCGAAGGATAGTTTGAGCGTGGATGCGGCGCCAATCCGAGGCGAG -ACAAGCTTGATATTCAGGACCTCCGCCAGCGTCGCAAACGCAGGGGCCAGATCCACCACC -CCCGACAACACCACGCTAGGTCTTTTCCAGGTCGTGCGCCCGTCTTGCGTGCTTTGCCAT -GGCGGCGCCCCAATGATTCCGCCATCGAGGAAGTGGCACCTTAGGCCCGAATCCGTTGTT -ATGGTTGGTTCGTCGAATTGCGCCGCCATCTCCGACGCCAGTCGGGCGGGTACCGCGTTG -AGTTCGAGGTAGTATAGCTTGGACCGCTTGGGTTTGCTTTCTACATCTTCCAACGCCTCT -CGGAGACTGCCGGTCGACGGCTGTGCGTATGCTTCTGCTATTCGTCCGGCTGTCGCCACT -GCGTTCCGCGGAGGTACGATCGAAAGTACGTAGTCTGCTTGGATTACTAACGCTTGGTCC -GAAGGAAGAATTTCGATTCCCGCGGAATGGATTCGCGCTAGTGTGTGTTCACTGGCGGCC -GTTAGTGCGCGGCCGTGGTTTCCCTCTCTGGATGCGTACCTTCTTCCTTCTGCGACCGTG -ACAACTCGGTAGTCGTGGGCTTTTAAGAGCCTGGCCATGCCTAGGCCCATGTCTCCGATC -GACACAATTCCAACTGTAACGGGTCTCATGGTGAAGGGATTGTCGGTCTTGGAATGAAGA -GTATTTTGAAGTAAATCAGTAAAATACCGAGTACTTGTATCAACTTCTACTGCGGGGGTA -CCTAGGTAGGTACATTTTCTCCATTTGTCAGCGAAGTCTGAAGTTCTTGGCTCCTGGGCT -CCAGGGCTTTCAAGTTGGTTTATAAGTGTGTCCGGCGTTATCGTGTCCCGATCCCCTGAA -GAGTGACTGGGCCCCGAGGGTAATTAAGCTTATCTTCACCTTGCCCGTGTCCTTCTAGAA -AAGTCTTTGAATCATGTGTCTATTAACATGAGGTGATATTTCTCATTGTAGGAACGGTTC -AAGATACCACATACTTGCATTTTCGGAACTGGCCAAATCCCTGAAGATACTTGGTATCAG -GTACCTAACCACCCACAAATGCATACAAGCGCTTACAGCTGGGTTCACGTCATTAATATA -TAGCTAGCCCATCATCCTATCGGAACTGGGTGAGAATTACTACATCCGACCCTACTATAG -AGATGGATGCACAACCCCCAAATAAGCAGCACGATGTTCTCCAACATGTTTATCAGCTAG -TCAAGGACCGGGAATTCTGCTTTTTCTCTGTCGAAATCAAGCTCTTGGGGTCCCGGATTT -TCCAGTAGCTATTAAAGATGGCGTTCTACAATTTCACCCATGTCTCCCCCTGCCTTACCG -GAGAGATTCCCCCAAAATGTTCACTTGGATTAGGACCGATGGTCCTCCAAAGAACGATGA -TGACCCAGAATTTGATTCTATTTTGAGAAATCTCATCATACGGCCTCCCACAAATGCAAG -GGATGCTTAGGATGAACTCATTATGCGGGGTCGGGTGACTGGGATCACCATGCCCTCCAA -ACAGAGCACCACATCCTACCAGAATAATGCTCCTCGGAAGGAACGAGCGGTAGCCTTTCT -TATTAACCTTATAGCTCTTAGAGCGTGCTGTTAAGTCAATAGTCAGGATTTTCAGGGGTT -TGAATATATTTGCTGAATATCAACCAAGACGTTAGAAAAGACAACCGACCCCTTGATATT -GCAGGCCACGGTGGCAGATAAAAGAGCCAGTGGAAGACAGCCACGAAAGTTCTCCTGTAC -CACATGCAATTATGCAATTATAACAACCGTTGCACACGTCCGCGCAAATCATGCGGACCA -GAATCTCACTTCTCACAAGAGCTTTCCTCCTGTATGGGTGAGCAGCACGGGATAATCATT -CGATTTTTATCCAATGTTTAAACCTTGGTTCTGCGATGTTTGCTCCAATATGAAAGTTAG -CCAGTTTGGCTAGGGAGCTTGGACGCTCGTGTTCGATCACTTGCGGTTTCAGTTACACCG -ACCTTGGAATGAACTCAACATAAATATCTAATACTAATAGCCTAGCTCAATTCCTTTAGC -CCATTTATAGGGTATGTCGACCACAAAACGATGGGAATAAAACAAGATATCGAATCTGCT -CGTCAAAAGCACAAAAGACACGAAGGGCTGACGAACGTACCTACTTCGGTATCCTGAATG -GGCCCAGGCCCGAAGGATGGCACCCTTTCATTGGACAGTGGGGCGGCCCGCTAGGGCCCG -AGGAGTGACTTTATCCTATTGGATGGTGCTTAGGTCTTTCACGAGTCCGGCCCCGATGTG -AAATACGGCTCAGTGATTGTCTAGACGATGGGTGCAGTGAGCTGTTTCAGAAGTATACAC -TTACTGTGATGGATAATTGGTAGTATGATATCAAGAAACTGCGCTCTGTTTATGAATATA -TCTGACACTTGTCCTAAGATGAGCGATACAAGGCCAAGCACCTCCGACGAAGGCTGCTAT -GAGACAAAAGACCTCGAAGCGTTCAAGCCCACAAGCATTGTTGAACCCTCTCTCGATGCA -GATGCCAACAAAGACCCGGATTTCATCACATGGAGCGGACCTATGGATCCAGAGAACCCA -AATAACTGGCCTCAAAGATTAGAGTGGAAGAGCACCTGGGTTATCTCCCTTTTCGTCTTC -ATCTCGCCAGTCTCATCATTCATGATTGCCCCCGCCATGCAGGATCTTGGCAAGGCCCTG -GGAATGTACTCCGATATTGTAATATATCTGGTCATGGCTGTCTTCATCCTCGCCTACTCC -GTTGGCCCCAGCTTCTTCGGCCCAGCCTCGAGGCTCTTGGCCGCGTGCGTTTGCTCCAGG -TTAGCAATATGTGGTATTTAACCTGGAATTCGACACAGCTGATCTCAGAAGCCCCAATTC -GAAGGGTCAACCCCACCATTTAAATCCAGGTTTATCCTATCAATGTTGCCAAGGCTTTCA -TCAAACCCACATACACCGTCCGCGGGCATGGTGGCTTCTTTTCTTCCAACCATGCTCTCC -TGTTGCCTGCTCACCTTCAGTCCATGTTGGGCACGCAGCGTGGTTACGAAATTTGGCTCC -GGGGACGACTGAATGGGGGGATTTACAAGCCATGCCTTGAGCGTCATTTTACTAATTGTG -ACATGCAGGGTTCCGATCACTACTCGCTTACCACATTTCGCCAAAGCACTCTCATCCTAA -CACCAGCGGTCCTCGCCCTAATTATTCCAACTGGTAAGGGGTAGTCAGCTTGAGGTGTTC -GATATTACGACAGACACGTCCCCTCAGCTAGCTGGTCCACAAGTCCGACTATCGCGCCTG -AGTATAGGTTCCTATTGCGTCACCCAGAAGCAGAACACCAGACGAGCAAGTTGTGAGATC -TCTGATAGACCTCTCCAAGATTTTAACAATCAATGTTCAGCACTGCCTATTGATGGGCTT -TCGGTCGCAGACCACTCAATGCTGGGTCTCCTGGTGATTGAATGACTGCCTGCACCGACA -GATATCCCGAAGGGCACCATATCAAGTCTTCGCCATCGTCGAGGTGAAGGCGGCGCACTG -GGGTCGGAAACATCGGTCCCAAGTGCTCTGGTAAAACGCTGCCCAGACTTTTGCATGGAT -GCATGATATGAACTCCTGGTAGTGTCATGCTTCATTTTGACCAATAGGTTGGATAGTGAT -ATGACGTACAGGGTAACCCTGCTAACAAGCACTTATAGATGTCCCATGATCTCACGGTAT -CAGGATAAGAGAATCCTCGGGAATACCTCAGATAAGAAGTTTGGCAAAATCCTGCTGTAA -TGCTGTGGTTATGAGATTCTGCCACCCGTGGACGAATTCAACCTGAATTGGTCATACGAT -ATCACATGCCTGGACCCACGACCCACGGGTTCCGTCAGAGGCGAAAATTTCAACTGGAGC -TCTTTTGCTCTTGATCTTGATATATCCTGAATTGAGCTCATCATGAATGCCTTGTCTATA -GTCGGGCGATCGAGCTAGAAATAAGAATGATAGAGTCAATATAAGGTATCAAGTACAGAT -TGTCTGTTCTACAATATCTACAATATACACTGCTCCCGAGGGCGGGAGATCCGAGCTGAC -TAAATGACTTCTTCATTTCCCGATTGGCACAGTTTCACCGGGGCAGGCTTAGGGATAGCC -TTCCGAGGTCTCTTTGACTCTAATTTATTCTAATCTATCGGCCTATGTGGGTCAAGGTGA -TATCAGGAGGCGCTATTAGTATATACGATACCAGCCTTTTGAGGTTTTCGATGTTTTCAA -TGTCTCGTTCTAAGAATAATCACACCTAATATAAGCTGGGGTTTCCCAGATAGATCACGT -ATAGTCCTTCGTACTCCGTGTAAATGTGCAAGGATAAGCCTGGGCACACCTGACCCCGCC -CAAAAGTACCCCGGAGATCGAACGGGATCCGTGAATCCCGATATCCGAGTGTGCTTTGAT -TATTCATCTTTTTTTCTCCTTGGAAGCTTTTCTTCCCGTGCAGATTATGAACATCACAGC -AGAGATCGAAATAAGAATCAGTGATTTTCCGCTAGTCTTGGCCATTTCCGCTGGGGTGAC -TTTTTCACAAAGTCTTGGCACATTCATGACCCCGAGGCAACCCACAACCTTCGGTCAAGG -CAATGAACCAGTCATTTTCGCAAAGCACCTTGTCAATGGACACTAGTCCATGGAGAGCTC -AACATTTGAGGGTGGGTTTCACTCGTCATTCGGGATTCCGCCATCCCGGTGGCCTCCGGT -TTAGGGGGATCCAGCGTTGCTAACGGTGATATGACGCCTATCTAGGCTTGCGAGAGGTGT -CATCGAAAAAAGATCAAGTGTGAGATCGAAGGGTCTAAGTTAACTTGTGTGCAATGCATC -CGCCGTAATACACAATGTGTCTTTCCGGTCCAGCATGAGAAGCGGGATACTCAGCAAAGG -TATAGCGATATCCATTGCCATTCCCAATCAACTTTTTATCATTCTCGATCTCCTGTTGAT -TCGCCGTGGCTAATTATATCACAGTAGTGACGAGTATATCAAGTCTTTGAAAGACCGGCT -CGTCAGGGTTGAGTCGCTATTGAGAACGGCAGGCATCTTGCAAGAGAGCGACATGAGCCA -TGATGAGTTTTCCGATGAAGATGATGACGGACCAGCTAGTCCAGACATCTCATCTCTTTC -CAGTCCAAAGTCAAATTTTGGGGTATGTCTTCGCTCAGATCGTGGGGACATCGAGGGTAC -GCCTATCTTCCGGGCGGACGAAAGAGATGATTCTCGGTATTTCGGTAAGATAAAGGGGAA -TGCCCAATGTGATATAGAATCGCCGAGGCTAATTCAGCGGACAGGAAAATCCTGCTCATT -ATCAATTCTTTCACGAACAGGTATAGAATGGATCAAGAGCAAAACAGGCGATATCAGTTT -CTTGCGAATTGTATCTCCTGAATCCATCCATGAAAACCCCTGGAATCAATGGCGTCCAGA -TGTATTCCAGGATCTTTTTGCCTCGCATGTTTTTAAGCCTTTGCCCTCAAGGTCGGAGGT -GTTCTCTCTTATGAAGGATTTCTTCCGGACGGCCAACCGTCTGTTTCCTATCTATCTTGA -GTCGTCCTTTATGAAAATGGTCGAATGGCAATATACGCAGCAAACATGTGACGACGCTGC -TCGTTGGGCCAATATCAATATGGTAATCTGTCTGGCATACGAATATCGATTCTCGAATGG -CTCCAAGTCAGAGAAGGACAAAGAGAAGTCCCAGTTATACTTCAAAAACGCCATGTCTGT -CTTCACCGAATTAGCCTTAAAACGCACCGACCTACTGAGTATACAAGCTTTGCTCAGCAT -GGTACGTCTCCTACCCCTTTGGCTCCCATAGACGCGTCTAACAATTTGGCAAGTCGTTCT -TCCTTCGAGGGAATTCTGGAACTCAATCAGCTTTACCTCTTATCACTGCGGCTATGCGGT -CTGGTCACCGAATGGGACTTCATCGAGATATAGCAAGACCAGAACTCAGTCCGGCCGAAC -AAGAGGAGAGACGGCGAGTCTTTTGGGTTGCGTTTGTCATTGATCAAAGGTGAGAAAGGT -CCATTCAGATATCGATCTCTCATACTAAAGCAGTGGTCCAGTACATGCCTAAGAATTGGG -AACGCCCCATCACAGCATCAAGATGATTTCGATGTCCCTCTTCCGGAAGAACTGGAGAGC -GATAAGCATGGTGAAACTGCAAGCAACATTCCATTCTTCCGCCAGCTCTGTCAGATGGCG -CTCATCAAGAGCCATATTTACAGTCGATTGTACTCTGCTACGGCGTTAACGAAACCTCCC -GTTGAAATCTACAAGACGGTCAAAGAACTGGACGCAGAGCTTGAAGAATGGAAAAGGGAA -AATCCAAGTCTCACTGAAACGGGGATGAAACATACCGAACGCGACTTTCTGTTTGGTTTT -GCCTCCATTGGTCTCCATTTCGTCTATCAGAATGCCCTGATCATGATTCACCGGGTCCCT -ATCTTTCTTAACTACATGATAACAGCCAGCAAGGAATCAGAAAAGGTCGTGGCGATCAGC -AAAGCACATGCCTCAAAATCAACAGCCATTAGTTCACAGGCTGCCAGAGATACATTGAAG -GTCGTCAATAACATGCCATGGGGAGATATCGCATGGACTTGGTGAGTGCAACAGTCTTCT -CTTGATCTATATGGGCATAAATTCTCACACTCAAACCTCAGGTCATTACTATACTATGTC -TTCCTGGCAGCATCAACATTGTTCTCAGGTATCCTGCGGAATACTCGACATTCAAAAGCC -CGAGCCGACCTTCAATCCCTCAACATGGCCTCGAAATTCTTTGCAACTCTTGCCCCAGGC -GACGGATCAAATAATTATGCCGGTTTCATGACCCGCATGAGCGCAACCCTGGAACGCATT -GCCAGAGCCGTCGTCGAAAAAGATGAGAAAAGACCCCGCGCCCCAGACGAAGAAGACCAA -GAATATAAACCACCAGGAGCCAAGAGGCGCACCTCTCGGgcacagccagcacagccagca -caccacCAAAGGCAGCACCGCCGACCCACAAACCTCCGCACAACAATGACCCCCGCCACA -GCTCCGGGATATCCAACATCAAGCACCGCCAACCCCAACAGGACGGACTTCAGTATCCCC -GATACCCTAGAAGGCCTCCCTCCAGTAAATTCATTGGGCTACGTCGTTCCCATGAGTCCA -GTGCCAGGCCCAGGCGACAATATTCAAACCGAGTCCCCATATCTCGCCAACCTACCCGGC -ACATACCCACCTCCAAACACAAACATAGATGGCACATATCTCCACAATACAGGTGACAGC -ACCTTCAGCACACCCCAAATCCCATCCTGGCAATTATCCAAAGACTATTCTACAACTCCG -GCCCAGGCCCAGGCCCCAGGACCGTCAGTCCTGACCCCCAACCCCATTACTTCGATCAAC -TCCCCAGACTCCTTCAGCAGCACCGCAAACTCAATCCCCGATTTCTTCCAGAACCCAATG -AGTGGCGACTGGGGCCACGGCGGGAATATTTTCGCCGGACTCTTTCCAACTGAATACAAT -TTCCCGCCCCCAGCTCCAGCGGGCACTCAGTCTGCCGATGCATATCCCTCGATCCCGATT -CTATCGGCTGAGTCGTATATCCATGGTGCGCCTGGTATTGATAGTCATACCGCTCAAGCT -GGTGGGTTCGATCCTCAGGGTCTGGGGTATGGCTATGCGCCACAGGGTCAAGAAGATCCG -AATCAGGGGGCTGATCCGGTCTGGCCTAATGGGTTTTTAGGGTTATTTTAGGAAGAAATG -ATCGGAGTGTCCATTTCGGGTTTTGTTGGTCTTGTTAGCGTTCGTTGTCTTTACCCATGG -TTTTTCCGAAATACGCCTTCTCTGCTCTCTTCCTTGTATATGTTTTAGACTGGACTTACG -AGTGTTAATGTAATCACATACTGGTGTATAATAGAAATAAATCAATCAGTGAATGTCAAA -ATAAAGACTAGGTATATACAAAAAAGCGCCCAGCCAGATCCCAAGCTTGGAATATCCCAT -TGTCCAGCGCCATTGAACCCGAAGGCATGCAAAAGATTCTCTCTGCGCCTTCCTTCCCGA -ATCCAGACCTAACTTATCCCCGTCCAACAAAAAAATCATGAAAGAGCAAATATTCGAAAA -CTAGAACACAGATCCCGTGAGCGTGCGCACTGCCTTGCACCACATGCCATTCCCTAGCTC -AACATCGTCTTCCTTTCCCCGGCCAAGGTCTTTGCCAAGGATGCTTTGCAACTCCTTATC -TGGAAGACCGGACTTGTTACCTGTACCGAAACCATAGACCTTCGTCGCCAAGACGTCAAT -ATCAAAGTCTAAGAAACCGACAGTATTCTCTGCCCTGGTTTTATGAATACGGGTACGGAC -AGTGAAGTCAACATCGATTCTCTTGGCAATGGTGTTCTTCGGATCAGCGCCCTTGTGGCC -AGGTGTAGAGGCAGTTCCATTGCTAAGGAGCGTGCAGCGTGTGCGTAGAGAAAGAGCGCC -CTTGTCAGGCTTAGATAGGGTGAGGCGGGTAACACCACAGAATTCCAGCATGCGCGCTTC -GTTCTCCAGCCCGACTGTGCGGTCCCATGCACTAGAGATGAAGTGCAAGAGCTGCTTAGG -TGCGACAGTGGATTGCTGGATGGTGGTCAGGTGGTGCTGTAGCGAGTGGAGGACTAAAGA -TGCAATCGGAGAGATGTTGGCGTTCTTTTGAGAAGTTAGTTCCATCGGGTAGTTCTTCCC -TTCGCTTTCACCACTGTTTTTGGAGGCAAAAGCTCCAGGATAGAACTCTAGTTGGAGTTG -GTCTCGGTAAGCCATCTTCAATAAAGGCCCTTTCGCGGCATCCGAAGGAGTCGAGGCTGA -AACGATCGACCAGCCGGTTTCTTGCTGAATTCGGTGGACGGAGGCTATTTTTCCTGGTCA -GTATTCACATCCACGAAAGACTCCCTAAGGGAGGAACAAATATCTCACCCTTCAATTCAT -TGATCTCTCGGGCACTCCACCCACGGCACTCTTCCTTCACACGCTCTGCCTCCTGGATTT -GAGCCAGAAATTCATCCCGCATTTCGGCTCCCGCCTCGATAAAGGCCGTTTTCTCATGAA -CTTCTTTCTGCAACTGCTCAAGTTCCTGTCTTTTGGCTGCAATTTCGGAGTCCACCTCAA -ACAGCTTGCCGCGCGTACTGCGTAGCTCATCTTGATCACAATTCTCCATCTCATCCATCA -ACTGTTGTAGGTTAGCTGCCTGTTCCTCGAGAGAGGCGTGCTTCTCCACGAGTAGCGGCA -CGCTTCTGTTGAGCAACTCTTCTCGTTCAGATAGCAGATCGTCGTCTGCTTTCATTTCAT -CCACGTGACGAATGAGCCCTTCTTTCAGACCCTCCAACAACTTCATCCGCCATTCGTACC -ATGTAGCCTTACTCAACAGGCGGGCATGGGTCTTAACGTTTCGGAATTGATTATCCATGA -TCACTCGGATATCTGGCGGCGCGGTGGCATACTCCTTGAAGAGAGCTGGATTCTCGGCAT -ATGTTTCTGCCTCGATAGAGCGAATCACTTGCCGACCTTCGGAAATATAAGATTTCAACT -CCCGGCATGACTAGAGACAGTGTGAGCGGCAATTAAGGTATGCGGGTATAGTAAAACCTA -CATGCTGATAGAGTTCAAGCATTGGCACTGTACAAAAGCCAGCTGCCACGCAATCATCAA -ATGTTATGGAGCCCGGTTTAGGCATGTTCTCACCAGATGGTCGTGTCGGCTTCTTGGCGG -CACTGCCAGGGACAGTTGTGTGTCGTCTCTTTGTCGTAGTTAGTTCCATGAAATGAATAT -TGGTCATTTTCAGGAAGTCCTGCAACTGAATAGGCTCCATTTCCATGGATGGTTCTTCGG -CGGGGGTAttctcttcttctacttctttttcttcttGTGGTTGCTGAGGATCAGAAGCAA -TATGAAGAGCATCAATCCCATGCTTTAGAGGAGTTGTCGAGGTATGAGAAGCTCTCGATG -GTTTCTGGGAAGGCGTGGTGCTACCCATAGGAGGCGATGAGCTAGCAACTCTACGGACGG -GCGAAAGCATTGACCGCCTGGCATTGCCCTCTTTGAACATCGGTGCCGGCAACTTCACAC -CTTTGACTGGGCTGACATTGTGACCGCGCAGTCGCTTGGGTGAATTCTCGACTTCATCTT -CATCAAGATCCAGCTCGACTGGGCGTTTGCCTAGAACCCCACGGGCAGCCCCAACATGAA -GACTCTTGCGACTGCCAATCCTACTCTTCTTAGGGCTTAGATTTGAGATCATGTCCTTCA -AGTTTGAGGTTACATCCCGATCTGACTGCGATCCCGCGATATGGCCATCTTCTCGAAGTT -CCTCTTGTTCGCGGTCACGTTCTTCCTCGGCCTGCATTTGCAGGGGATCCTCAAAACGTA -CACCAGATTGGGGCTGTTCTTGTGGAACGAACTGGGGCGTACTATCTCCAAGCGACTGGC -GCGTGTCAAGTAAGGCGGCCACTCGAGGAGATCCAAGACCTTCTTTGTCGATCCCTAGAC -CAGAAGAGCGACGTCTCTCGCGTGGTTGTAATTTGAAGAGCGGGGTTGACTCTCCGGTTG -CTATGTTATGTTCGAACAGGCTTTGGCGTAGAGACTTCCGCTTATCTGATGACTGCTGGA -GTTCAGGCTGGAAGAGCTTCTTCGGCGAAGCACTCCTGAAAGTAACATTGCTAGATGGGG -GTGTCTTGCTTGGGGTGGTAGGTTTCGCTTTGGGTGTGGACTGTTTGGGCGGAGTCAATG -GCTTGGTGTTTGGTGATCGTTGCCGACGGGCCGACTTTGGCATTTCCAACGATCCTAGAG -CGGAGCTACGCCGGGAAGGCCTAGCCCGCACGGCTGCTAAACTGGGGCTCCCATTTTCGG -ATGCAACAGCAGCAATTTGATAAGCCAAAGGTGATTTAGCAGGGGACTTGTTGGGAGACA -GCCGAACATTGTCTTGGAAAGGCGAGCTAGCAAGCTGACCTGAGTCGGTTTCAAGCTCCA -TCATCATTTTCGCGTGCAATTTGTCATTGGCTTCGATCCCTGACTGTAAGATACCACCAA -TAGCACCAGTTATCTCCATGTCCATACCATCGTCAGACCCATTATCTTGGCCGGAATTCA -TGTGGTCAGCGTTGGAGGCATAGGTAGGTTGGTTACCCAGGATTTTGCCAACAGCCGAAG -TGAATTCCATTGTCATTCCTTCGTCCTCGTTGCCAATTTCAGAAGACTTTGATGGCGAAT -CATTCTGCGCGATGCCACCGACGACAGTGGTCATCTCCATGGTTTGATCTCCGTAGTTAG -ATTCTTCTGCCATGGACTGGCGACGGCCGGGAGACTGGCAAAGAATGCGTCCTACGGTAC -TCGTCACGTCCATGCTGAGGTCTTCGTCCTCGTCAATATCATCTGATTTTGGCAACCCTT -GCGTAGTGGCATTCTTGGATGGGTCGATGTTCTCCTCATCATGTCGTGTGCTAATATCTC -CCCAGTCAAAGCTTTGTCTTTGGCCCTTTTTGATCCAGGGCTGGAAGGCACCAGTTATTT -CTTGATCCGCAATTTCCATTGACATCTCTCCGTCCTCCTCAATTTCAGTTCCTGCTTCCT -TTGCGGCCTGGCGCAGTGCTTCGTTGAGTCTAGCACTAGAGCTTGAGCTAGAAGTTATAT -CTGATTGGACACTGACGCCTGAGCGCGCGGTCATATCATCCATGCTCATTGCAGTGCTCT -CAGCATCAAATCCATCGTCGGACCCTGAGGAATCGCCATCGTCTTCGTCTTCTCCAGCCA -TGCTTTGGGCACCGGTGTCTTCGCTTCCTACCGAGCTTCCGCTGAACGGACTTGAAGATA -AATCCCGAAGGCCATCATCGCCACCGGTTGATGATGAGCGACCTCTTATCTGGTCCAGGT -CCTGTGTGCGGACAGGGGAGAATCCGAATTCGGACTCCGCTGGCGAAGATGGGACATCTT -TGGTGGGTGAGGGAGGGGCAGGTTGGTTCTGGGAGTCTGCAACAGCAGATGCGCGTCTCG -TTGAGTTCGCAGCAGAGGAAGAGGTCGAGTCGTCCGGGATCTCCACCACGTTCCAGGTGT -GTAGGGTTGCCTCGGGGGCAAATGATACTCGGCGATTTGCTGTATAGATGTGTCAATACT -CGTTTTTATGGTCGAGTTGAGCACACGAACCCATTGATTTCCTCCGCGCCGCTCGTTTCT -CTAGGATATTCTGTCGTTCTCGATCTTTGCGCTCCTGGTCATCTCGCTCCTTGGTAGCAG -CCATCTCCTCGCGAATGGCAGAAGTCGCATTGAATGTGTCAAACGGGTTCGCCGTGTCGT -TGTTGTCATGATTTGCCATGGGAGGTCCTGGTGTTGCTTTAAAGTCGATCAGTGGACCCT -CTTTGGCTTGATTCATCTGGCTCTGTGCATCCATCGTGCCGTCATGAGTCGGTTGTTCTC -TACGAGCGGGTGTACGTCGGCGCGTTTCTTCGAATGTCGGGATGGTTCGCACTGGAGAGA -CAGGAACTGTGGGTTTGAGAATAGACTTTAATGGGAAAGAGGCCGTAGACTATAAATATT -GAAGTCAGAATTGTTTGCATATGCGCACGTGGTATTCGATTGGTGGCTACCTGGCGACGA -TTGCCGTTAGAATTCTGGAGCGCATCAAGTCCACCGGGCCCCAAACTCTTGCTCCGAGAC -TTCTTGTCTCTCATGGCAGGCTTTGTGTTATTAGAAAATGGCTGAATTGCGCCAATCTCA -GTTGTCGCATTCTCCTTGTCCAAACCAGACGTCATCCTAGACCGCGGGACATGAGCGATA -GACCGACGAGTTCGTGGCGGTCGCAAAGCCGCTGAGTCGGGTCGAGTGGACATTGTCGGC -GATTGAGTCGGCGCATTTACAGATCGATTCAAGATTGCATGTTTCTCATATCCGAACGTT -GAGATATCTAAATCAAACAGGGCTCAGACCTGTAATCTGCTCCTATCGCACCTACCCAAA -GCGCGTTGCCTCCGCAATCACGTGATGTAAACAAAAGCGCAGAGCCCTAACCCCAATTAG -CCTAGTCCCGATCGGGACTAGTGTCGCACACTACGAATTACCAAAGATTCTGACGGACAA -CGAATCTTCCTCATCTTCGACAGCCGCCAACCCGACATTTCTTCGACACCGCTTCCTCGT -CGAACAACACCGCAAAAATGGCCACCGAGTTGACCGTCCAGTCGGAGCGCGCTTTCCAGA -AGCAGCCTCACATCTTCCTTAACACCAAGGCCAAGGCTTCATCCAAGAAGACTGCCCAGG -GCCGCCGCTGGTACAAGGAGGTCGGTCTCGGTTTCCGTACCCCCAAGACCGCCATTGAGG -GAACCTACATCGGTATGCACGACCTTCGTTGAGATTTTATTCGAAATATGGTGGCGGAAT -TTATGGGTGGGTGGTCTTTTCACGGATACAAACAGCGAATGGAGGATCGCATGGAGCAAC -TGAGGAGGGGCCTGGGATCCCCGTCGTTTTCCTCTCGCCAATATTCGCATTCAACACTCA -AACTTTTGGAACCTCGGAAAGACCACTCATGCCGAAGAAATATTCGCATTCACCAACTGT -GTCAGATCAGATTACTTATTTTTGAACCTTTACAGACAAGAAGTGCCCCTTCACTTCCCA -GGTCTCCATCCGCGGCCGTGTCCTCGTCGGCAAAGTCGGTGAGTTTTATCAGACACTCTT -TGGATGACAAGCAGATTTACTAACGAATTAACAGTCTCCACCAAGATGCACCGTACCCTC -GTCATCCGCCGCGAATACCTGCACTACGTCCCCAAGTACAACCGTTACGAGCGTCGCCAC -AAGAACCTGTCTGCCCACGTCTCCCCCGCTTTCCGTGTTGAGGAGGGTGACATGGTTACC -GTCGGCCAGTGCCGTCCTCTCTCCAAGACTGTACGTTCATTTGTGATCCTTGTCAATGTT -CGCATGGTAGTCTGCAAAAGCACTCACTATTGCTCTACTACCACATCGAATTTGACATTC -TATATTGTCTTGCTATGAACTTGATAACTGACTGGAAATTTTACTACAGGTTCGCTTCAA -CGTGTTGAAGGTTCACGCCAAGACCTCCCGCGCTGTCAAGTCTTTCTCCAAGTTCTAGGT -TAATCAACCGGGTGTTTTGAAGGCGAAGGAAGCGGTATGGACTGGATATACGGGAAGTTC -TTGGATGGGGGTCGGGATAGTTTATGAGTTTCCATGTCCCAGATACCTCGCTGATGAATG -TGTATCCACGCAAAAATCAAAATGATTTGTTCGGCCCTCCTGTGATGGAAACTGTACAAC -AACTCTGGTACAAGGCAAGATTTGACCATGAATGGCAATCTGCACAGAGCAGCCTCTCAA -TGCGCATCTCGAGTTTATCTCCTGCTCCTCCTTAAGTCTTACATTGATACAGTAGGACCC -CAGTATGTGTTCTCTTTGGTAGTGCTAAATGACATATCCAAGGGTTGGGTTGCGCTGTCT -GTAGGCGTGGTTTTATATTGATTCGAGTGTTTACTGGCTATTTTGGGTACTATCTATTTA -GTTGGATTGCATAAAACCTACTTGTCTATGTAGATAGTCTATGTAGATAGGTCAAATTTT -ACCTTTATTTGCTTTATTCGAGGATGCGCATAATCCCCAGGTGTCTAGATGTGTAGATGA -AATGCTTAGAGTCATGACATACATTGCCAGGAAGTCTTCATTCAAATAATATTGCAAAGG -CATATTGGCATCTGGGCCGGGGTTTCTAGTCTTCACACGGCTGCGACAAATAACGTTGTT -CAACGTTCTCTTCCACACCAAGCATGGAATATCCCTCCATGACCTTTCACATATATCATT -CCCAATTCCCGCACTACGACCAGTGTCGGAACTTTTCAAAGAAGTCCATATCCGTCTGGC -CATTGAGTACAAGATGGCGGGTGGATCTCTTCTCATGAGAAGATTGCGACGGATGGGTCT -TCGTAGGTGTGTCAGGATACATGTATGGATCGTCATCCGATGGCCAGTCCCGCTGCAATG -CACACTTTGAACTAGTCTCTTCGTTGTCAGAGGTGAGATCAATCACCTCTTGACCTTGTA -CCTGCTTTGGGAGGTCTTCAACGCCATAATCCTCGGGGAAAATCTCACTCGACAACGTCT -CATGCGTCGAGAACTTTTCCTCATCGTCAGTCTCTGGATCATCACACATCGACATAGATC -GAGACTTGGTCATGCCTTCTGACTCAGAGGTACTTTCTGGGTCTTCGTCAGTCTCAGGCG -GCTCATAGTCAATATCGGAGTCCCTATTAGACGAGAAATGCATCTTTTGGGGGGTCTTTT -GGGGTGTGGGCCATGGAGCCTCGCTTTCAGAATCCGACAATTCCTCGGGAAGATAGGGGG -GTGGACTTCGAATTCCATTGAGTAAAAGAGTGATCTCTTTCTTATGTCTCCAACCCCATG -CCTTGGCAAACTCATATGCTAGGTTCTCCCAGTGGATCACTGGGACTTGGAGTAGAGTCA -GTAATTTCCCAAGCGTTCGACCAGATGGAAAGGGCAACTTTCTTCTAAACGCCAGCATCT -CACGTAGCGTAGCCTTACAGCGTGGGACGCTGCGGATCAGTGGAAGCTGTATCAGACGAT -TGATGTCGCGGTGGGACTGTGTGATTTGTTGGAGACTAGCAATCTCCACGGTGTGAGCAA -TTGCCTCTGTTGGGATACTACCCCATATGAGAAACTCCCCAAACCCCTTCCACCCGTTGA -TCACTAAAGTCCTCATCCGAACTGCCAATGGATAAGCGTAGAATACTTTGGTCTCCAGCT -TTGACGTGTCTATAACGGTCAATATCGCGCCCTTTTGGCCAGCAATTGCGCGGTGTAAAG -GCGACAGGGGAGACTGAGAGGTGGAGATAAATGGTGTATTGACCTCCTGCTTGGTTACAT -GGCTCTCAAAGAGGCTTTCAAATCGACTCTGAGGAAGATCTTCTGGATTGAACCATCCAG -AATCGCAGAAAATTCCGGCCAGGAAGTGTGTTTTCGAGTTGACCCCTTGGGAGTCACAGT -TTGACCAGCGAAACAGTAATGGTGGCAACTTATCTCGGGGAACTCCCTGCATGGCCATCT -CGTGGTCATGCTGGTGATCTTGATGGTGATGCTGCTGGCCCCAATCTTGATACCCTGCGT -GCTGGTACTGAGGCTGTTGGCTTTGGTCAGGGTTGTTGTTTTCCTTGGGATTATTGCTCA -TGCTGGTCTGGTCATCGATACCATCCACCTCGTCAATTGTCACACCATGGCTACACCACA -GGCAAGTCTTTCCATGACTTGTAACCACCGACACGTCGTTTTCCACGAAGTCGATATCCT -CCAGAGAGTCGGTCCGTTCTTGGCTGTGGCCGACAGTCATTGTGATACTGCCACTGCTTT -GGTCATCACTTCCGGCAAAAGAATGACTCGTTTCCTCTTCTCTTTGCTGGCGCAATGGAG -ATGGCATGCGATAGGGTGCCTCGTGGGGGTACTCAGTTCTCTGTCCCACGACCACTGTCT -CTCGCGTCCGCCATTGAGTTGTGTCAATGTCGTCTTCTATCTTGTCAAAAAGAGGAAGGA -GTAAAGTGATTGCTGTAGACTTAATCTGCTGAATTATGCTCTTCCATTCTCCATCCATGT -TGAACTCTGTATCTCGATGGACATGAGACCAAACAAGGTCACCAGTGTTTCTCATCCAGA -TCCACTGGGCGTGGAGTGTCCTATCTGGAATAAATCGATTAATCCCACGTTTTTTCAGAT -TGTCTCGAAACATTGAGAAGAAGATCTCCTCTTTCGCTTTCTTGCCACATCGGAAGAATC -GGTAAATACAACAGAGAGCTTGGCGCATTGGGATGTCCCATCGAAGCTTCAGCTTTTGGG -GAGGATTTTGGTTTTTCATTTTCGATTAGATGAGAAGGGCAGAGAGACAAGTTCTCAAAG -GAGGAGGCCGTAAGAGGTCAAATATTGGAGAAATGAAAGCTGGTTCTGATGGCCGGGGAC -TTGATCATGTAGACGGTGAGGGTTGGGGGTGATTGAAGAAGGATGAAAGTCACAAGAGTC -GGAAGAAGAAAAGTAAGAAAGATTTGAGGCAAACAAAAAAAGAGTTATATTATTTCTGAG -GCTCGGACTTAGACGAGAAAGGGGAGAGATCTTTAACTCATCCCTCGAATACTGTTGGAG -TATATTCGGTCCCATTGTATGCGACTATACTTTTCTTCCAAGGAGCTAGGAAAGAGCGAA -GCTGATTTACTATCTTTAACATAGAGATGGGTCTGGTCATATTGCCTCTTTCTTTTCAAG -GCTCTCTCTTGTATTGAGGTAAATTGGAAGTTGAATTTCACTATGTAAGTTCGCATTTTG -CCTGCCTCTGGCGGATGAGGCAGTACCAGATGCTCGCTTTTCGGCTAAAGGGTAATAGTT -ACATAGGTCTATAAGCTACCGATATTTATCACATGTCTGAGAATTAGAATCGTGTAGACC -TTTTTTGTAACTTTCAGGCATGACTTGATTTTAGCACAATCACCTAATGAGGATATATAT -ACACTTGCATCTATCTACCTACACTTTGAATCCCTAGTCTTCCACTTGCCCAGTTGACCT -GAAGAACGAAACTAATGTCTTCATATTTCCCTTTTCATCCAGTTAAGTGCCTTGAAAAAC -CGAAAACATGTTACTTTGTAGTGTTTCTCTTGCAAATGCAATGCAGGTTCCCTCTTAATG -ACTACTTCCCAGCTTACTTCTGGCCACTGGATATCTACATTGTATTTTGTTAGGAAAGGT -ATTCTAAATACTCAATACATAGGTAGGGCCACATTCTTCAAAGTACAAATATTATGATTC -CATGAAAACAGAGCAAGTCTTGCTGCTATACCTAGATGTATAGAAATAAGGTTACAAGAT -TCAGTTACCATTGCCAATAACTGATATATTTGAGACATTGTCATCTGACTTTAGTTTATG -TATCTCATAATCCTCCATATATAATCCAATATATCCAAGTCCGATGGTATATAATCCTAC -GTATGTATACTACTCCGTGCACACGCGGAACTGCCAACATACGCTAGTCCCAATTTGGGT -TGCCTTTAGGAACACTCCGCCCCAGACTCCCTTTACCCTCTTGAGACTATTGCCTCTTAC -TATCCTTATCCCCTCCATTGCTTCTCAGCTCACATTAACTCTGTGATTTGCATTTTTGCC -AATCCCTAACCCGGCAATTCCGATCGGACTTTGAACCCCTCCCCTAGAAACTGCCCACTT -TCCCCAGGCTTACAGCATGGCTCAATGAGCCACGTCATGAGGGATACGGATGATTGAAGA -TTTTCTCAGACACTACAAGGGCATTGATTCTTGGAGCAGGCTCTCCAGTCATTCCCCACT -GTCTTTTGACAGACCTACATTCTCCTGAGCCATCGAGAAGTTGGGCACCGATTCGCGACT -TTTAGAACCATGCCTGGAGGATTTGAGAAGTCAGTAAAAGGGGCCACAAAGCTCAAGGTC -AGTCTGCAATACTAAGGTCCTTTCTCTCTGAATGTGACAATTATTAACTATCTGGCATGC -ACAGCTTGCGGCCCCTAAATCCAAATACATAGAGAATATTCTTGTGGCAACTCACACCGG -CGAGGCAGGAGTCGCAGAGGTATTCCGAACCCTCCAGATACGACTCCGAGACTCGGCATG -GACCATTGTCTTCAAGGCGCTCATCGTCCTGCATCTCATGATCCGAGAGGGTCAGCTGGA -CGCGGCGTTGGGATACTTGTCCGATAATCCAAAGAAAATTGCGCCCAGCAATTTCTCTGA -AGGTTAGACCGATATTTTCCCTATATGGTTATGAGCGCAATTTACTAATTGTTTTTGCAG -CCCAATCGCAGGGTCATAATATCCGACGGTACGCCGAATACCTTATCACGCGCGCCAAGG -CGTTCGAAGCCTGCAAGACGGATCATGTACGAAGCGGACCGGGACGGCTGAAGAGAATAG -GCGTGGACAAAGGTCTATTGAGGGAGACTGAAATTGTACAGAAGCAAATTCGTGCGTTAC -TGCGGTGTGATGTGGGTGAATTCCAAGATTTGGGTCTTGTTCTTTGTGACTAACGTCGTA -TAGCTATTGACGGATGAGCCCGAAAACGAGATCAGTTTGACCGCATTCCGTCTCCTCACA -CTTGATCTATTGACTTTATATTCTGTCATGAATGAGGGCACAATCAATGTTTTGGGTAAG -TCAGCAGCCACTCCACAGATCGACATTAGTTTAACTTTATACCTCAATAGAACACTACTT -TGAGATGTCACGGCCGGATAGTGTACGCGCACTGGCAATCTACAAGACGTTCACAAAACA -AACGGAAGAAGTGGTTCAGTTCTTGGGAGTGGCAAGACATTTCCAGTCTGCCACACGCCT -GGAAATTCCAAAGCTCAAGCATGCATCCACAGACTTGGCGCGCCTTCTTGAGGATGATCT -GAACGATCCTGATTTCGATCTTCGCCGACGGGAGTACTTGGCCAAGAAAGGTGTTCGCGC -TCCACCGAGCATGGAGGCCAGTGTAACGTCAGATGCATCAAAGCCAATGCCAACCCCACC -CATGTCAAACCCGGCATCAAACCCACCCAAGCAGCCTGAACAATCGAAGCCCCCCCCTGT -GGATCTAATCGACTTCTTCGATTCAATTGAGCAGAATCAGCAGCCAATGGGGCAACAGAA -CACAATGCAGTACCAGCAAACAGGGTTCCAGCAGCAATCCCAGCAGCCTTTCTATCCCCA -GCAGACTGACTTCCAGCAACAACCTCAGACGATTCAGCAGCAGCCTTTGGCAACTGGATA -TGGCCAGCCGACCCAATATGGAGCGCCCTATCAGGCACAGGGCCCAAACAATCCATTTGG -GcagcatcagcagcaacaacaaccacaacaaccgccgccgcagcCACTTCAGGCTATGCC -AACTGGCGCTGGATTCGGAGGGTACTCTCCACAGCCGCAACAGTATGGTTTCCAGTCGCA -ACTGGCTCCTATTCCTCAGCAAGGCATCGCCTCATTCCCTCAACCACAACAGCAGCAGCA -AGCACCTCAGACCTTGCAGCCCTTGCAGCCCCAGCACACCAATCCGTTCCGCCAGTCAAT -GATGCTCAATTCTTCCACCGGCACTCAGCCACCCGCCGCCCCGCTATCGCGACAAAACAC -GAATCCATTCGCGAGACGACTTTCCACCGCAACCCCGCAATATAATCCCCCAAATGATCA -ATTCCAGCCTGCGCAATCCCCACAAGTGCAGCAGCTTCCTCAACCACAAGCGCAGCCGAT -CCAGCCTCAGCGGACGGGAACTAATCCTTTTGCACGGGCCTCCCCTGCCCCGCAGCAAGC -TCTTCCACCCCCAGCAGCCACTCCCTTGCGGCCTAACCCAACTGGCAGCACCAACCCTTT -CCGACAGAGCCAGTTTATCAATCAGCAGACTGGACAGGGTTGGCAAAACAGTGGGCAGAG -TGGTACACTGGGGGGATTAGAACAGCTGGAAACGGTGCCGATCTTCCCGCGCCCTGGCAT -GACCTAAAGTCCAACACTATTTGTATGGCCTCGGAATGAAACCGACGTCCGTTAGATTAT -CTTCCCTTCACTATCTCATTTCTATCTCACTTGTTCCCCACACCCTCCCCATTGAGGGGT -TGTTTCCATGTAAAAGTCTCTGATATTACCTTGGTTACATCGTAGGTCGTATATTGTATC -TTAAATCATGTAAAAAAAATTGCCATGGCAATTTGGGATCTAATATCGAAGGCACTTGTC -CAATTAAGAATGAAGGTCAAGGACTTACAAAGTCTCGGCATGCGATAGATGAGAATCTTC -AGTTGCGCCAGTCTCGGGGCTATATGTCTCTTAACACATTCAAGGCTAGTGCTGACAGGG -GCGGAAAATCTTGACTTGACGACTAGGGCTGTTAGGAAATGTTCAACGATCTTATAGGGG -CCAACATGTCGATCATTAAGTATATCGGGCTATCTCTGGATACTCCGTACATTGTGGAGT -GTTTCAGGTTTTTTACGATGATCATACGCCAAGTCCAGATGTCTAGTATACACACGTCTT -TTTGTTCCAACACATGACAGATAGAATAAAAATATCGGTGAATAATCCCTTTATGTGTAC -GGAGTATCCCTATAGATGATTTTTTATCCAAGGTCCAGTGGAGGTAAATGGTTGACTCAT -ACGGTTTAGAAAGGGGGAGGATTGACCaaaaggaaaaaataaaaaaataaaaaaGAGTGA -AACTCCACAAATCCTCTTGACACTTCTTATCCTTGAAAAGCCCCTTAAATCCTCACCAGT -TCCACTAGAGCTCTCCTAGCGGGGCAATGTGAAGAAGTCCAAATATTTCTACCCCGCCGG -TCCCGAAAGGAAGTTTGTCCTTTTCTAAACCTCCTACCATATAAGCTCGATGAACACCAA -CTTGCCTTACGAGCAGACCTTGCCCGTGCTGTCTCGGCCTACTTGGGACCCGACCTCTCC -CTGTGCTGCGTTCGATCCTATTGTACCTGTGACCTCACAAGCACCTCTAGCTCCTCCAGC -CCCTCCTAGTCGCAAAACAACGAGGGTACGGAAACCCCGTAAAAATGGCAATGTATCTAC -AGGCAAATCAACGTTATTTTGGGTCAACAGTGATCAACAAACTGCAGCGGCAGGCACGAC -GGACGAGACCCTAAAGCGGATTCGGTCCCATGTGATGTCCGAGCATAATCGCAAGAAGCG -GATGGAGACCACGGAACAGTACAATAAGAGCAAATGGAAGCACGCACCTTATCAGCCACC -GACAAGTGCTGGGGCACCCGTAACCGCTGAACCTGTCCGTCCGTCTATTAGCTCGTCCGC -CAGCTTGTCCGGTAGTCAAATCGCAGACGAACAAGACTTAGAACAGGTCGAGGGGCTTGT -TACCTCAACAACTGTTGGATACCCTGCCACGCAGGCGGCAATATGGGATGACGGTGGCTC -TCGTGGGACGATGGCCTACCCTGCTAGCCCGTCTGCATGGTCTTACGTGGGACAGGGAGC -TAATGATCCTTTCCATACTGGACATACACAGCTCACAGATAGAATGATGCGACACCTGCG -AGTCTGTGAGTTTCAAGCCCGATCCTGCTATGGTAGAAAGCTTGTATACTGACACAGGTA -TAGTCCTCTGGGATCTAACACAAGAAGCACACCCATTACAAACGCGATACAAGCCCAAAC -TGCAGGCGCATTGGGCGTCCTTGATTCAGCGTGATCCAGCCATTCTGCACGCAACTATTT -GTATGGCTAGTTCCAATGATGCTATGCGTGCAGGCGAGCTACCCATTCGAGATCCAAATC -AAAAGCGGAGCCAGTTAGTGATCGACACGTTCCACCATCGTGGTGAGACTATTCGATTGG -TTAATGAGGGCCTCTCGGACCCGGTCAAGGCTTCCAGTGATGTTTTGATTGCTGCGGTTT -CTACGTTATTGACAATCGAAGTGAGTACATCCTGCGAATCAGGATTGTGATTCCCAAAGC -TAAGACACGATTGTAGATTGCATCGGGAAATCCCGATTATCTCAAAATTCATCTGGCAGG -CTTACGACAGATGATCGCACTGCGAAAGAACTTTAACGATGTTCCTTCAGACGTCCGGTT -CCAAATCTCATGGTCGGTATACCCCTTCACAATTAGAGACAAAGCTTGCTAACTTCGCGA -TTAGGACTGATATCCGAGTTGCCTGTATGGCCCACGCCAGACCTATATTCCCTTTTGTCC -GCTACACCCGCCCAGCGCGATTCTCCCTCATCCCACCCAATGACGACGTAGCATTACTCT -CCACCCGCCTCTTCCCTCTACTCAAAATCCCCGGAATTTTCGGCGAAGCCATGCCACAAA -TCGTATATGATCTCCTCGAGCTATCCTGGTATGCTGAATGGATCAAAGGCAATACTGGAT -ACAAAGAGTTCAACGAGGAGACCGAGGATTACTTCAACACGGAAGTGCTGCACGTAGAAT -ACCAACTGCACACTGACCGATATACAGCAACAGGTCAAGTAAAGGGCGACAACTCCATCG -AGGGCTGCACCCGTCTCGCTTTACTGCTATTCCACAACAGCGCCATCTGGAATTTCTATC -CAATGATCGGCCAGTTACTGCCTAAGCCGATCCAGGCTCTGCGCATTGCCCTTGAGGCGA -CCATCCCTTCGGGGTTATTCGCCCTCTGTCGCGACCTGCTCCTCTGGCAGCTTTTCATGG -GTGCGGCTTGCAGTCTAACCCTGCCGTCAGAGCGGGCCTTCTTTGTATCTGAACTGGCCA -ATGCTGCGCGGCTGCAGGGGATCCGGTCGTGGCAGGAGGCCCGTGCTATCTTGCTAGGCT -TTTTCTATGTCGATCGCATTCACCTGCCTATGCTTCGTCAGATTTGGGATGAGGGTCATT -TACAGGTAGAACCCCCGGCAGTATGAACTTGGCTACTTGGTTCTGCCGTAATTCGTCCAT -TTTGGTGAAAGCTGGTTATATACAGTCCTTTCCTCTCGGTAAATTCTCAAGGTTCGGACC -AGCCATCTTGCTGTCTCGCCGAGTGAAGATACAATGTACACATAGTGTTATATCCAATGA -TTGACGACTACTCTCCGAAATAGCTACATCCTATCTATCTAATTGGATACTTGAAAACAT -ATACACTTATCACACCGTATTACTTACTACCCAGTCATTTAGACAAGAAGTATAACGCAC -TATCTCGACCTCAAACAGCCAAGATACCAAGCCCCCCTGACAAGAATACTTCAAGCACAC -CATTCCGACCAAACCCGATACATGCTTCGTCCATTCTCGGCATTGGGCGACTATCATTTC -ATAGAGATAAGCACGGCATCCCGATATCCGAACAATCTCCGTTCATCCCTGCACACAACG -AAGCTGATACGTCGCAAAAGCCCCCGCCGAGCGCGCACCGAGTTTCCACCGAAAAATGGA -CGGCATTCACTCGGCGATGCGTGGCGATTTTGGATAGAAGCTGCAGGTGCTTCCTAGCTT -GCTTGGCTTATTCGGTTGCCTGGTTTGATTTTTCGACCTCATCTCAGTTCTCTTCGTTTT -TCTCATCGTTGGACTGGAGATACACATTCTAATTGATAGGAAATTATATCTAAAAGTGTC -ATGAACATGTACAACATTGAGTTTGAGAATCAAATCCTTGCCTTTTCCTCCTATATTGTA -TGCTAAACCTGCAGACCGAGAAAAGACCGGAATGAGATCAGACACCCAACTCCACTCTCG -GGTGGCCTTGGTCCCGGACTCGGAGGGCTACGTAGCCAATGATCTAGGTATAATAGGCCC -TGCTGTTCTAAGAAAAAGGCGCATTTCAGGATATATACATACTTCCATATCCCCCTTCTC -CCTTCTTGCTATGAATCACACCATTTTTGCTAATACAATACATTCAAACTTTGTCTTCAA -TGCTTTTCTCTTATTTGTGTCTCACTTCCTAAATCACATACCAGGCTATCCATGTCTTGT -CGGCCTTCAGGCGTCCAAGATCCAAAGGCTTGACTGCATAGACATGGGTATGCTTGGATG -ATTGATGACAGGTACTGGAAGTGCATCTGGACAGAGATATTGCTCACTATGTGATGGATG -CATTGCGTCTTTTGACTCGAATGACTGTTCCCAGCTCGCAACTTGATTGACCTGCACGGT -TACACAAGACTGGTATACAATATTCCAGAGCGACAACACTCCTGCAACACCCCGAATGAC -AGCAAGCACATGGCGAGGATTTGACCTGAGCACCAGAATCCGACAGCCTACAACATGAAG -CAAAAGAAGGTTGAAAGGACACATTGTGAAGAATGTTGGATGAATTAATTATCCGAGTTA -CAGCAGAAACTCGCAGGTAAAGAGACTAAATACAACAAGGAAAAACCAAAGCACAAGGAA -TCACCCGAGTGCTCACCGAAAAATTACAACCAGCAAACAGGGTGATGGCTGGCAGATATT -TTGCAACTGCGCCGAACATGTTCATCCATGCACAAAGCAGTCACAGGCGAACTTCCAAAC -CACAACACAATCGAACACAATCGACGACGAGCAAACGTGTCGATGAGCACAAAGTGGCGA -ACAGGAACCCCGTCTATCTGAAGCCTATACATACAACACTGGGTATCGAAAACATTACAA -GCACCAAGCGGATACTCCGCATCCCGGTCTCTTTTCTAATAAGAAAAAGGAGAGAACAAA -TTGCTTATGCAAGCTCACGTTCGGTCTTGCCACGGCGAACACGGCCAGTGGGGGACCAGG -GGAAGTGAGCGACGGGAATGATGAAGAAGCTGTTGCTGGTGTCGGCGATACCAGCGAGGG -CATTGTACCAGGCAGAGAAAGCGGCCAGGAAACCAAAGAATCCACCGGCTTTCTGGATGG -GGACGTTGGGCTTGCCGTCATCACCGCGGTGCAGGTAGCAGATACCAAGGAGGAGGAAGG -TCAGGTCCAGTGTGAAGAACAGCAGGAAGAAAGCGACGGTAGACTTGAGGGTGCAGAACA -GCATGATGGTGGTGAAGATGAACCAACCCTGTGAAGACATTGTCAGCGGGGGGTGCTCTC -GCAATTTTGTGGAGGCAGGAGGCAAGAACTCACCATCAGGAAAAGACCCATGGAGTCATA -GAATTTCGGGCCTGCTCCGCCGAGCGCAGCCTCGATCTCGAACCCACCGGGGGTCAAAAC -CATAGCGAACGCGATCCAGAAACCACCGTAGGAGGAGAGAGCAGTAGCACCGAAAGTGTT -ACCAACAGCCATTTCCCTTAAAAAAAGCGCGTATTAGCCGCGGAACCGAACTGTGAAAGC -CCAAGGGCTCGGAAGTTACAGTTACATACCACATGCCGGAAAGCAGCTGGACGAGACCAC -CGTAGCCAAAGGCGAGGCCGACAACAATGTTGGGGCTGCCGATATCCAGGGTACCCATGT -TGACGGCGCTCAGGACGAAAGTGGTGAGGGCGAAAGCGCACAGGCCGAGAGGAGCCGGAT -TGGCGAACTTGCGGCCCTCGACGGACTTGTACAGACCGGGCTGGAACTCACCACCGAAGG -GAGGTAGGGATGCCTCATTCTGGTTGACGTGGGCGAGAGGTCCGTAGCCGAAGCGCGCGG -CAGCGCGTGCTACCTGGTGCTCTTCGGCAGACATCTGGGGCTGCATTGGCACGCCGCTGT -TCCCCAGAGCATTCTGGGGGGAGTGAGAGGGGCCCACGTCCTTCTCCAGCTCTTGCTGGT -TGGTTGCTCCAGTCGCAGACATGATGCAAGATTTCGTATCTGATGGTCGTTAAAAAAAAA -AAAGAAGTGTAAAACAGAGAGAAAAGAAGTATAATAAAGCCGTATAACACAGTTATAGGT -GTAGGAAAGGATGCAAACCAACACGGCTCTAGAGAGGAAGGGGAAGGAAAGTATTATACT -CGTGAGTCCCCAAGGCCACAAAGGCGGCTTTGCTTGGAAATTCGGCGAGAAAAAAAGTGT -GGAACCAAACTTACGTATTTGCTTTTCGGTTATATTACATATAAAAAATGATGATGTGCA -GCCGCCAGGACCCGTTCCCACTATGGAGAAGTCAATAAATAATATAAAGTGGAGAGAAAA -TGAAGGGAAAAAAAACAAACGGGAATGCTCCTAATTATACAAAAAAAAAACGAAGACCCT -TGAAGCTCCAAATTGGTTTTTTAATTGAATTGTTTCACAAGTCCATCTAGAACAATGTCA -AATTAGCCATGCAGGGACGGCACGGAGCAATGGGGATTAGTTACCGTTCGCTGGAGGTAA -TTTTGACAGGCAAGGATGAAAGGAAGGGACATGGGGAAAGTGGAGAGACCGAAAGGATCC -GGGGAATGATCTGATTGGTCGAGATTCATTCTCGATCCTCAACTTCGGTGCATAATTCTA -TGAAGTTTGGCCACGTAAAGGGCTTCTAATGCACGTGTACGGAGTACTCCGTACAAAATA -CCTACGATGGAACTTTGCCATTATATAAGCTTACAGTAACCCGGGGTTTCGGCATCTATG -GATCTTGCTTCCATGGGTAGCTCGTGAAATTTGGGGGAGCTTAGGGCTTAAGATTTCCAT -GATCTTCGAGAGTCCACTTTCTCTGTACGGACATGTACAATACTTTAGTGCAATGGGGAG -CTGAAAGTAGAAGATACTTGGTAAGATCCGCTTGTTCGGTGGTAAATTCCGGTACCGAGT -ACTCAGCCGAGACTGCGTCACCTCGGTCCCATTGTGGAAGGGATTTTGCCTCTCTGTCTT -AACATACCAATTGTTACCCATGTAACGGACCATGAATCTGAGTCTACTCCGTACAGACTG -TCAGAGAAGAAAAAAAAAAGTACTGTACATATGGTTGTCGACTTTTTAGGCGGATATGTC -CGGAATTACGCACCAATCAATCCGGCGTGCAATCAATATTCGATATGTGCATATTTCTAC -GGAGTACAGAGTCCGAGTTGATTAATTGATGGTGTAATCTATCTTGTATTCATTGTATTT -CCTTTTTTCTCGCCCATATCACCCCCCACATAGCGCGTCCACCAGTTGCCCCTTTTTTCA -GACACAAAGCACGGATGCACCACTAACAAAGCACCTAACACAATCCAGACCCGCTAGTCA -CGGGTGGTGCACCAGAAACCGCATAGAGGCCGTTCAAGTCAAAATTACGTTTCAAATCTC -GGGGAAACGGGACGAGATACCAAAGGTGAAAACGGTGCACAGCGATTCAATTCGACGATA -TATGTCTGTGTTCTCTCGAATGGGTCATCAGTCCGGGGGGGAGAACCTAGATCGGCGCAC -GAGGCTCAGACCCAttgctttaacttttccttttcctttttgtttattttttgttttttc -ttCTCTCAGCCCGTATGATCTGGAAGCTACATGCAAGCCTGTCAAGCTTCATCTCCACAT -GTAGCTCTACTATAGGATTTACGGCTAAAATGTTGAAAGACATGGTGGATCCGTGGATGT -CTGTGGGCGTCTCAGTACCAGTCATATTTGTCTCTCTATACGGAGCTCCGTGGTTGACTT -GAGATTCGGTCCAGATCAGAGATTTCCCCAGTCCAGGTCGGACCATACACAATCTAGCCA -GGGCTCTATGTTCTCCTTTTCTGAGGTGCTTTTGTATGTTGATGTCGTGGCAACAGGGTA -GCGCTGCAGGTAGAAACAGTGACAAGGTTCATTACACGGGATAATATGCAAGGTTTCTCT -GAAAATGGTCCGTATACTCCGTACGAAGTACAATTGTACAATAACAGGTCTCATACAGTG -CGATCATCCTCCCGCGATTGCTAAGCACTGTGGTTTTGATGAATTGCAATTGGCCCAACG -CTATTAGCCCTTAAAAACTGGAGCTGAAAGTGATTTCTCGGTGTAAGCATCCACCAAAAA -TACCTGGGCAATAGCTTGGATTTGTGGCGCACGCCAAGTAGCAATAATAAACACACGCCA -CAGATTTTAACTATTCGGGTGTTACACATGTGAATCTTCCGTGGCCCTTTCACTTCCCCG -TCAGGAGTTTATCTGTATTTTGCAGGTCAATTGCGATAGACAATCTAACAGAAACCTCCC -TGGACATGTTTGAAGATTGTGCGATCCAGACCTGATCCGAGCGCAAACTCTGCAAAGGTG -GCACCATACCTAAACCCGTGACATTTCATATGGGGCCAATCGTAATCTAACCCTAACCTT -GTCCCCAGAGCCACATGGGACCAATTGCCGCAGGCTCGACTCTTGCCGCGAGATGTCATA -CTCCGTTCTAATAATATATAGCTGGAAATCTTCTACATCCTACTTGGCGTGGCTCCCTAC -TTAAGAGACATGCGGGGATAGCAAACTTTCACATAGCGCGGGATCCCCGGAAGTCTTGGA -GTATGTATGCTGTATGGTGATTACACTTGGCGAATTTGGTCCTCTCCACATTTTGCTCAT -CTGATCTTTGATCCCCGCAAAACTACTCCAATACGGACTCTCTCTTATCCTTGGTTCTAC -GATCTCATCCTCGGTTGAGTAGATTTTCGGCTCGGCGGAGCGGAACTTTCTCTCTTCGGC -TGCCAAGTCGGACGAACCGTATTTAGACCAGAATCGTGGGGAAATTCCAGATTTGGCTTT -CTGCTTACAATTCTTCGGCAGTTGAACTCGCTGTTGGGACTTGATAGGCTCTTATTCCCT -GTGGGTTGAAGAGGACTTACATCATTCATGATCTCTCCAGGGATGGGCAGGAACCTCCTC -AAACCCCCCATTATTCCATTAGGAATCAAGGACGGTATACTTGACTCCTGTCAAAAAGAA -AGCACCCTCTACATGAAACTGCTAAATGAAGGCAATGAGCGCCAGACTCCGAGATTGCAG -CAATTGCCTATAACGTTCAAGGGTTGTCTCCACATAGCGATTATATATATAGGGAGCATC -CTCTTTTTTTGGTGGCCGCCATCTCCCTAGGTATCCTGCTGACATTCCACATATCCACGG -ATTCACAGATACATATTATTAAACCAACGCATGCCAAGTCCAAAGCATAGGGATTTAGTA -TCGACGTCGGCACAGCTTCAAGTGAATGTATGAAGACCATGTTTAAGCAGCAAAGTATAT -ATTATATGCTAGGTTATATGATACCTCTAATTTTATATACATTCAATGATATATACAAAC -ACCGCTATGCAATCACGACATCAAAGCCAATCCCAGATCAAGCCGCAGCTTCAATAGTCT -TCTGGAACTCAGGACTGAACTCATAGTCCGTGTATCCCTTGGGGTCCTTAGGAACATAAA -AAGTGTCCCGATCATAAGGCACGAATGGAATGCCCTTCTGAATACGGAACGGCAAGTCAG -GGTTCGAAATAAACGGACGCCCAATACCAATGACAGTATCATAGTCCGCATACTTGACAT -CAGCCGCGTGAGCCGCCGAATCAGCCTTATACCCACCAGCAATGATAACAGGCGAAGCCT -TCCCATAAACCCGCAGGAAGAAATCCAACTGATCGGTCGACTCAACATCAGCATTACCCG -CAATGCGCGACTCAACCAAGTGCGCATACGCCAGTCCGAGCTTTGCTGTCTGCGTCGCCA -GATACTCAAACTGCGGCTTAGGATCCTCCATCCGCATTCCCTGGAACGTACTCCAAGGAC -TGAATCGAATACCAGTGCGGTCTGCACCAACAGCATCAATAACAGCGCGTGAAACCTCAA -GCGCAAACTTCGCCCGGCCCTCCACACTACCTCCCCACGCATCAGTACGCTTATTGCACG -TATCCTGCGTAAACTGATCAATCAGGTACCCATTCGCAGCATGGATCTCAACACCATCGA -AGCCAGCTGCAACGGCATTGCGAGCCGCCTGCGCATAGTCTGCGATCCATTCGTGGATCT -CAGTCTCGGACAGCGCGCGCGGCGTAGGCGCGTCGGCGCTCGCCGGCGTCGCACTGCTCG -ACACGACATCGAACCCGCCCTCGGATTTAAGGAGGTCCGGGCTTGCAACTCGGCCTAGCG -CCCACAGCTGCACGTAGATATAGCTACCCTTCGCGTGTACCGCGTCGGTCACTTCTTTCC -ATGCGGCGATCTGGGCGTCGTTGTAGATGCCGGGGACATTGGGGTAGCCACCGGCACGGG -CGGAGATAAAGGTTGCTTCGGTTATAAGGAGGGTGCCTGGTACTGAGGCTCGCTGGGCGT -AGTAGTCTTTCACGAAGGGGAGGGGGATGTGGTTGTCATCGGCGCGGAAGCGGGTCATGG -GAGCCATGGCGATGCGGTTGGAGAGCTCAACGCGGCCAACGTGGATGGGGGTGAAGAGTT -TGGTCATTTTGCTTTTATTTGAGATATATTATTTTGTAGATGTTTTGAATAGGATTCAAA -ATGTATTTGGGGTGAATGACTATGGCTTTATATATATTCGATGGCATCCATTTGCTTCGT -GAAACTTCCCAGGATTGTTGGATTAACAAGCAATGTGGAGCCAGGCACTTTAGATGAGCC -TAACATTTAGGATCATTAGATCCTTTCAGGTTTTAAGTCTCCAAAGGATCAAGAGGTGAT -TCTGATTTGCCACTGCTGTTCCACTTTAGCCTTGGAATTCGGGATCGGGAGGATTAATAA -TACATAAAAGGTCCCGAGGGTCATGTCATGGCCCACCGTTGGATCCCGAGCGTTATTATT -GAATACTTATGGGCATAATATAGAGAAGCACTCAACTGGAGATTCTTTCTTTGACATTCA -ATGGTAGCTGAAAGTAACATGTAGAAGCTACTTATGTACTCGAGCACATCAGTACTTGTA -CAGGTGTAAACATCGCCAAACGCCATCCTCTGGCCCAGCCATGGACTATAGTCACCGCGC -AAACACTGGTTCCTGGTATCATAAAGGACTGTAAAAAGAAAATGCAGAACAAAAAAAGAA -CATAGAAATCATCGCTCCAGGCGACCAACCGTAGCCTCAAGCTGTGCATTGAACGCCCGT -ATTTGGTTTAAGAGACCACCTTGTGCTCCAGGGGCTCGGGAACTCACGTCAGCAACGGTG -CGAAGCATGTACTCCAGATCCGTGGAAGAATCTGCAACAGAAACAGAACGTTGCTTATTA -CGGTCGAGGGCCAATTCAAGGCTGTGGAGAGAATTGCTGATCGTGGATCGTGACTAGGGA -CTATTGTTAGTGATGTTCTTGCCAGTGTTTGCATGAAAAGATCTCACCGATTTGGCCTTC -TCCTCTTCAATGTGTCTGCTGCGCACTGAATCCATCTGCAAAGCTATATTCTCGCGTTCG -CGGCGGACCCGGTATAGATGATTCCGTAGATCCATCATTTCCCGCTTGGTCTTTTTCAAT -TGCACACCAAGGACAAAGTTGCTATCTAGTATCCCGCTTAGGTCCATGAGGCGACTTTCA -AGCTCTGAGCCAAATACCTCGACGGCTTTTCTCTTGCGAGACCACTCTGCACGTCGCGTA -GCGTTCGCCTCGTTGGAAATTCCATCCTTGAGTGTATTGAGTGTCTTCTCCAGAGTCTCG -CGGCAGATCTGACTCAATACATCAGCCGGGTTGACACCACCGCGATTGGGTATTTTGGCC -TTCTGGTGCGTGGTGAGTTCATCGGCCGATTCTTCGTCTTCGGATCCGTCGGCCGTAGAT -GTTATACCACCTAAAGAATTGACATTGGCGAGACGGTAAACAGTGACTGGGACTGTTTCT -CCACGAGGCTCTCGAGTCTTGCGACGAGGCTGTTCTTGTAcaggctcagcctcgggttgg -tcttctgtctcaggttcaggctcgggctcCACTGCGCGCTTAACCTTCTTCCCAGGTCTG -CCGCGCTGTGACTTGGGCGCTGCAGATACTGCTCCGTGTTCCAACTGATCTTCTGTGACA -TTCTCGGGCTCGGGCTCGGGCTCCACCGCGCGACTGGCCTTTTTTCCGCGCATACCACGC -TGGGACTTGGATACTGCAGTTTCTACTTCAGGTTCCGGCTGAGCCTCCGTCTCATGTTCG -GCTGCTTCATTGGTCTTTTTCCCGGGTCTGCCGCGAGACTTCGACTTTGTAGATCCCGGG -TGTGGCTGGTCTGCTGTCACAGGTTCCGGCTCGGGTTCGAGGTCAGTCTCTGCTGCGCGT -TTGGCTTTCTTCCCAGGCCTCCCTCGTTTTGGCTTAGGCAGAGCAGATTCCATCTCGGGC -TCTGCCTGGTCCGCTGTCACAGGCTCAGCTTCGGATTCAAGCTCCACTGCGTGGTCTGCT -TTCTTCACAGGTCTACCCCGGCGTGGCTTAGGTACTGAAGATTCCACCACAGTTTCCGGC -TGGTCATCCTTTTGCGATGTTGGCTTACGACCCCGGCGTGGCCCCGAAGAAGACTCTGCA -ACATCTGGGATAGGTTTGGGTGAAGTATTCTCAGTCTCGATAGCTTCAATGGTCTTCTTG -TGAAGCGAGGGTCTTCCACGAGGCCGTTTGGTCTTTGGAACTCCCTCAACTTGGGCTGCC -ATGGGCTTATCTTGCACTTGAATTGTGGGTTCTTGTATTGCATCTTGCACCTGCAGTTCT -GGAGTGTTTTCCGCGGTGGATTGATGGCGTGGGCGACCCTGGGGCGTTTTCGACTTTGTA -GAAGGCCCTTGGGATGTTTGGTTGGCTTGTTCTGGTTGCTCCTCATTGGATGATAGAACA -TCCTCAGTCATCTCGACATCATTATCAGCAGCAGTCAGCACATGGCTAACTTTAACAACC -TCCGCAGAGCTACCACGTGGTGATAAAGATTTTGTGACAGAATTATCTGCCTGCACAACT -GAAGAAGTGATTTCCGAGGTAGCCAGTTCGGAGCCGTTATTTCCTTGGATGCGCTCCTCG -GACCGTGATGGTGACCGAGTGTTTTGTCGTTTCTCGTTTAGTTGTTCTATGTTCTTAGGC -CGTTCGACCCCGGCCTGAGCCTCATGTGATGAAAGAGGCACTTGTAGTTCTTCAGCTACT -TCGGGCGCCGTAAAATCAGACACGGGAGATGTGTCTTGGGTTATTCTCTCTCCAGTGGCT -GATGCTGTTGGTCCAGCGTCATTGGTCCCCTCTTCTATCGGATGCTGATCTTGGACCGAG -GTCTCCACAATGGGTGAGATGTGAGGCTGTGTGGATTGATTGTTCGTTATTTCTGACGGA -GGATGTGCATATGTATTAGGAGGTTCCTGGGCGTGACTTGTTTGAGACTCTGTGCTACCT -GCACTGCTATCAACAGGGTGTGAATCGCCATTCTCAAATCGCCCAATATTACGACGTGAA -GGAGTATCCTCTGTTCCATTAGTAGAGCCTGGGGAGGAATTAGCACACATTTCTAGCATA -GCACACCAAGCTGGTTCATAACATACTGATCTTTCTCCTCTTATTACTTCGTGTTTGTTC -AGATCTATCGTCGGAGGAGGGTATTTCGTATGTCGAGGGTCTTTCTGGGAGCTTGTTACG -CGCACTCCCTGGTGTGCGCTCAATCCCCCGGACTTGCGATCCTGGTTGCGATACCTTCAA -CGGCTGTGGTGTTGTGTTGAGCGTGGTCTGTGTTCGCGGTTGACTCTGAGCTTCAGGGAC -AGTCGCAATTGGAGCGGGCGCTGGGAACCCAAGGCCTGGGCTACCAAATGAGAACCCGAA -ATTGACTTCTCTTGCTTTGCGCGTCCTATACCAGCAAATTAGTCTATATGTTTTGATTTC -GCGGGGTATGGGTAAGGCGCGTACGCAGCTCCACGCTGCCGCATTTGTTGCCGCTCCTCT -CGCGCTGCCAATTGTTAGCATTTCCACGCATAGATAGACATAGCCCATGACACATACAGG -CTTCCATTTTAAAGGAACAGCGTCAAAAAAGACGCGTCGTGTTGATGGTTGAGGAGTAAA -GACAAGCGGTCGGAGCCCAGGCAAATCGGAGCCGATCCATTTTTGTGCCGATTTCTCCGC -GCTCTCAATCTTCAACAATCCAGCTGTTGCACATTCTTGTTTATTCTTGACTATAATTTC -CAAATGGGTTGGTTCTGGGGTGACAAAAATGATACTGTCCAAAAGCTCGACCCGGGGCTT -CGTGACTATCTCGAACAGGAGAAGCCCGACAAATATGTGCCTGGTCCCAATGTGAAGCCC -ACCCCGACGCCCGCGCAATCGGCCGAACCGTCTGATTCCTCGAAACCGAAAGTCCCTGCA -GCATCACTTTACCAAGATGGCCGCTATGCTCATCTATGGAAGACCTACAAGCCTCCGATG -GAGTTTGAGGAACAATCCGAAGTCCGCGGGGCGTCCAGAGTGATTGAAAAGTACAAGGAG -CGCGGTGACACGGTGCAGCGAGCAGCAATGGAGAATTGTGCCCTGGAGCATGAAGCCTTG -ACATACTGTTTCCAGACCGGAAATTGGCGCAAGCAAATTGAAGCCCGACTGACTATGTGC -TCGGCGGAGAATGCCACCTTTTCACGATGCTTTATGACACAGACTGTGGGTTGGCGCCAC -AACTTATGGTTTTCTCGATATCGATTCTAACATGTTTTTGGTGAATAGAAATTCCTCCAA -GCTCTCGGCTACGCAGCTAGCTTCGAATATGACGCCGAAAAAGAGGAGCGCATTCAGATG -CATGCCGACAAACTTTACCACCAAATGTTAGACTACGAAAAGCGAGTTGAGGATGCAAAG -GCCGCTGGAATTGAGCCACCGCCTCTTACATCGCTTTTCAACCCAGACAAACCATCGCAA -GCCCCGGAGGTCGGCCTTGAAATTCCTGGAGGGGAGACGATCCCGGAAGAACTCAAACCG -TCGAAGCCCTTACAGAAGCTCACGCCACACGAGCGAGAGTTAGAGATTCGGGCACATAAC -GCGCAGCGCGAGCAGCACAAGATGTACGCGCAGGAGGCTGCGCCCTTCATGAAGACTCAA -GATGATGCTCGGCAGAAGCGACAGGAGAAAGCTGTAAGCTGGTTTGGAGAAACTCTCGGC -AAGTGGATGTCGTAGAGGGACCTGAATTCTTGTATTTACTATATTGCTTTGAGAAATGGC -GATTGGAGAGGCGTTGGGGTTTGAAGTTCAATTGTTACATTCTTATAGCGCTGCTCCATA -CTCCGTACTCTGTAGAGAGTGATAGTTACACTTGAGTGTACTCTTTGAAGTCATCCAAAT -TGCTTTCAAGGTCCTTCCCTGTATCTCTGTTATTTATTCCCCCCAAATCTGCTTCTCTAT -CTACTCTAGGGCTTGTTATATCCACACTAGTAGGTAACGGGTAGTATCCGAGGACACCAC -CCGATGAATTTGAATCTTTACATGAAAATTTCCAGTATAAACAAGCTAGTGATATATAAT -AGAGATGTCATTTTAGTtaagatataaaatataacatataacattcatatataatataGG -AGAATGTCTTGGCCCTATCTAATCGCCGGTCGCGTTCTCCCGCGCAAACAACTCGAAGCA -TCATCGTAGAATCACGACTCTCTTCCTTAAACTCTCAGCTTACTCCTTGTCGCATCCTTT -TATCTTCTATTCCCTAATCACACTTGCTCCCCATCTCCTTAGTTGATCGGAATACTTGAA -TCCTCTCCAATTCAACAACTACCCTAGATATCACCTGCCGACAAGATACCAGGTTGATCA -TTAGACTCGGAAATTCTCTCTTCTCTCTTCAGACCATATTTTCCTACTCGCAACCAGTGG -TGCTCAATAAGCTGAGCTGCGGCGCTCGCCATGAAGGCCACCCCGTTGCTCATCTCATGG -CATAATGACAATGCCCCTATCTACTCGGTGCATTTTGATCCCAATGGCAAGGGTCGGCTA -GCTACTGCTGGAAAGTAGGTTCTACCTGTCCTCTCAAAGTGTTGAAGCCATTTTACTGAC -TGCTTTCTCTTTAGTGACAATAACGTCCGAGTATGAACCCGCCCTCCGTCGCCCTCCGCA -CACCTTGATCTGACATGTTCTGCAGCTCTGGAAAGTCGAATCTACCGGGGAGGAAAGAAG -AGTCTCCTATCTGAGCACCCTGATTAAGCATACGCAGGCCGTCAATGTGGTTCGCTTCAG -CCCAAAAGGTATGTTTTCCCCAGGATAGCATATGACTACATGTCTGACAGAGGCAGGTGA -GATGCTGGCATCTGCCGGAGATGATGGAAATGTCTTACTCTGGGTACCGTCGGAGCTACA -AACACAAGCCGGGCTGGGCGAAGATCTTTCAGACGACAAAGAGACCTGGCGAGTGAAGCA -TATGTGCCGCTCGTCCGGCGCCGAAATCTACGACCTGGCTTGGTCTCCCGATGGTGTTTT -TATCATTACAGGCAGCATGGACAACATCGCACGGATTTACAATGCCCAAACTGGTTAGTT -AGCCTCGAGCAATCCCGAAACAGCTCGATAAGCTAACTATCGATCTCTACAGGGCAAATG -GTACGACAGATTGCCGAGCACTCTCACTATGTTCAAGGTGTAGCATGGGATCCGCTCAAC -GAATTTGTCGCTACTCAATCGTCCGATCGATCCGTTCATATTTATACGTTGAAGACCAAA -GATGGACAATTCACGTTGACACCACACGGGAAGGTTCTCAAGATGGATCTCCCTGCCAAG -CGTATCGCATCCAGCAGTCCTGCACCTCCAGATGCGCCTAGCCGGTCTCAACAAAGCACT -GGAAATTCTATTGCTATTGCCTCGCCTGTTCCCTCGACCCCGGGGACCCCAATGGCTTCT -AACCTACCCATGGATCCTCCTCCAGTCTCACACAGTCGGCGTTCTTCGTTCGGCTCGTCT -CCTTCGATACGGCGATCTGCGTCTCCTGCTCCATCTCTCCCGTTGCCAGCCGTCAAACCG -TTGGAGGTCTCCTCTCCGAGTTTTGGTGCCTTAGGGGTCAAGAATGCCAGCATCTACGCC -AACGAGACATTTACTTCTTTCTTCAGGCGACTGACTTTTGCCCCTGACGGAAGTCTGCTG -TTCACCCCAGCTGGTCAATTTAAGACCAGTCATGCATCAGCCATCGATTCTACAAAGACG -ACAGATGAGGTCATCAACACAGTCTACGTCTACACACGGGCTGGTTTCAACAAGCCCCCA -ATCTCTCATCTTCCAGGGCACAAGAAGCCATCTGTAGCCGTCAAATGCTCACCCGTATTT -TATACATTGAAGCAAGGCACTCAGCCTGCGAAAAACATTATCCTTGATACGTCTTCGGGT -GAGGAGGTGTTCCCATCTCTCCCAGACCCTGTAGTATCCGGAACTTCATCTTTCACCAGC -CAAACCCACATGGTCCCTCCATCCTCAACGATGGCCGAGCCATCGAAGCTTCAGCCATCA -CCGAAGGCTGGGGAGGACGGCAGCCCCGAAGCAAGCCAGAGCTCAGCGCCCGTCTTTGCA -CTTCCTTATCGTATTGTATATGCTGTTGCCACTCAGGATGCTGTTTTGGTATATGATACG -CAACAACAAACCCCGCTATGCATTGTGAGCAATCTGCATTTTGCAACTTTCACGGATTTG -ACGTGGTAAGTACTGTTTCTATTCATGATACCCACGTATTCCTTTCTAACATACAATAGG -TCCGCTGATGGGTTAACCTTGATCATGAGTTCCTCTGACGGCTTCTGCTCTACGCTCGCG -TTCGCACCTGGAGAGTTGGGCCAAACATACACAGGATCAAGCTCTGTGGCTCACAACAAT -CCAACCCCAACCACCCCTGCGACAAATCTCACACCTCTGCTGACTCCAACGCATGCGGCG -TCTCCACATGTGCCATCTCCAATCAAGACGAACCATGCATCCTCAAATAGCACAGGTCCC -GCACCGCCAGCTAGCCCCGCACGGTCCAACTCCACTTGCTCAGTCGCGACACAGTCTTCC -TCTCAGCCAACCCCTGCGGGAGTCGTGAATAATCCTACTCCCACACTCGGCACAGTTCCG -TCGGTGACTGCAACGCACTCTGCGCAGCCACCGACACTTCCACTGACAACCCCACCCCAA -ACGCCTGTCTCAGGCGCTCCCCAAAATGGAAACACTACGACTGGTGGTAGTGTCCTGGGC -AAGCGCGACATAAGGCCGGCAAGCGAGTCTGAAAGAGAAGAAGGCAAGGAGGACTCAAAA -AATGCATTACAGCAGCCGCCGAAGAGGCGCCGTGTGGCGCCAACTCTGATCTCGGCAGGT -ACTGATGGGGCATCGTCCTCGAAGGACAACAAATCCTAAGTTGAGATTGTGTGTAGCCCA -TTCTCAGCCTCGGTATGCAAGGCATGAAATGACCAATTTGCTTTGCTATCTGCCGCTTCT -TATGATTCATGTTTTTTTTTTCCCTTACTCGTTGCTCATGCTATATCCTCCTAGTCTTAT -ACGTTATGTACTTGCAATATCTTTGTTTCTGATACTTTTGACAATCGGGCCCATGGTGCG -TGGAGTTGTGAGAGCCATCTATCTTCTCGCTCATGTTTACTTACCCTTTGATCTATGCAT -GAAGTGTGGCATGGGAATAAAGAATGAAAATGTATGATCTTAATTTCCTACGTAGACCAT -TAAGGAGATCCTACTGAACACCGCCCTTGGAATACAACAGCCATTCAAAGTAGGGTAATA -CCAAAGATTAAACTGTTTAGAATGTGTCGTCTCACCATCGAAGCTATCTATAATGTATGT -CTATTGTGTCCCTCGCTCCCTTGAATACTCAAAAAACTATGCCCTAAGTCATATCCCTCC -GTGGTCCATCTTCTAACTTATATGAATGACAACAACCCGCATGATAACAATATTGACAGC -CAATCGCAACAAACCGGTGGCGCGAAAGGGTCATTTAGTCTCCGTTCCAGTAGACGTGAA -ACTCGCCAGTTCATCCAGAACATCCAAAAGCAAGCAGCCAGGCGGCAAGACCGACCTTGC -CTCAGTAGCACGTGTGCTGGTTGTTATGGTGGTGACATGGAGCATTTTCTCAAACTCAGA -CTTATCAAACCGAGAATCAATGTGAGCAGCAGCCAACTCATTCGCCCAGCGAAATAGCGC -TTCACCACTACGAGAACCGCCAGCTGCAAATATAAAATCAGGATATTGGATATTGGGAAG -ATGGTCTAGAACATAGGTGGCAGCTCTCCCATTTCCAAGAGAAGCATGCGGTGGTTCAAC -CTTCACAGATGTTGCCTCCCGGACGACATGAAAATCTGCACGTCCACGGGTACCGTCGAT -TTGATCAGCTAGGTCCGAGGCCTGGCGAGCGGCCACTTCGTGGTCGAAAGCATCGCCGTA -ATGAAATGTTAAGGAGCAGCGCCGTTCCTCAATTTGGCTGCCTTCTGTTCGCTCAAGGAA -ATACTCCATCATCTTTCTAATGCCGGGACGCCATTTCATGGCGCTTTCGGGCACAAATGG -AATCCATATATCTGATCCGGGTTCTTGTAAGAAGCAGCCGTTCTCAGTGATGAAACCGAG -CTCGCAGGGGAGTTTTTCGAGTACTTCTCGAAGCTGCTCAGTGCTCCGGTTGCTGATTAT -GTACAAGATGTTCTTGCAGTCGTTTATGAGTGCTTGTAGAGTTGTACAGGCTTTGTTCGA -GGGAAGAGACTTCTCTATATCAAAAGCTGGACCAAAGATTGCGCCTTCTTCCAAGAAGAA -GATACGTGCCGTTGCGGCGGCGTATGACTTTTTTAAGCCTTCAAATGAAAGTGGAGATAG -GTGGGCTGTCTGACGGATTTGTTGAATTTCGTGGGCTTTTGTGAGAGCATCTTGTAGTCC -CATGTGCCATTTGAGAGCAGTATAGTGAGATTTGCAATCGAGCAGGAAATTCCAGTTGAG -TTTCCGCTCCACTGGTGACATTTCAAGTGCGGATTTGATCGCACCCGCACATTGTTTGTA -GTCCCAGGGATTGACAAGAAGTTTATGACCACGAAAGATGGAGGCACTACCCACGAACTC -GCTGAGAATTAGCGATCCATGTTGATGGAAGCTAAGCTGTCCATCTTGGCAATGGATAAA -ATCATGGCTACTCAAGTTCATTCCCTCACGCAGGTTAGTGGCCATGAAGATATCTGCAAC -GCTCAAGAGGGCGATGAATTGCAAGTGGCTGATGTCTTGCCGGACTAACACCAAGGGCTG -GTGGGTGATGGTTGAAAAACTTGCATTAATCCTCATTGCGATTTTGAAGATATGTGCCTC -CAGCTCTGGGATCTCAGATGCAGATGACATTACTTGAACCAGAACCACCTACATACGACA -GATCAGTAGCGGTAACACGCTTTAAAGACTTGTTAATCGACTTACTTGCTCTCTCCATGC -TGGATATGTCTTGAGAAAAAGCTCGTATGCAAGAAGCTTCTGCTTAATTCCTCCAGGTGC -ATCCAATCGATCCCGCGCTACAATAAGATATTGCCCTTGGTATTTATGGCCGATGCTGTG -TATCCAACCTTTGACCTCGGTGGATTGGCGTAATTTAAAAAGTGACGAAGGGTCAATCCC -CATTGGAGAATTCACAACGCTAACATGCCGATGATTGAGGTGAACTTTATTGGCAGAGGC -CTCCAGCCTCCGCAGTCGGCTGCAGGAGTGCAGAAAGTGACTGCAGTATTCCTCTGTCTG -AAATCCAATCAAATCGGCGCCCAGAAGTCCATCCAAAAGTTCTTCGCGTGGAGTCAGACA -GCGGAAGATTTCGGAGGAAGGAAAGGGAGTATGCAAGAAAAATCCAATCTCGGCCTGGGG -TAGTCTTTCTCTCAAAAGGCCAGGAAGGACCAGAAGGTGATAGTCATGAACCCAGATTTT -ATCACCTGGCTTCCAGTGCTTGATGATTTTTTCTGCAAATGCCATGTTGAGCTTGAGGTA -TTGGACCCAAGAATAATCATCGTACTCAGTGTGCCGAGGGCTTTCTTGTGTCTGATAATG -GAGAGCTGGCCAAAGTACCGCACGACAGAAATGAGTGTAATGACCCTCAAACTCGTTATC -ACTCACAAACACAGTCAATGAATCATACACATCCTGGAGAGTCTCACCAATTTCCGTCCT -GGTAGGATCTGTCAATGAGTCGGTAGGCATGCCTAGTGTCCCCACCCACATCTTATCATT -GAGCAAGCTCAACTTTTCAGCAGCGTGGATGGCATTTTGCAGACCTCCATGGCCTTGTTC -GGCGGCCTTGACGACCCAGTCAGCATTGGATAGCGAGACTCGTGTGCTACTGTGGTATTC -CATGCCCCATCGATCTTGCGATCCATCCAAAGACAAGTCCCTTCTCGATCCCTGTTTTGG -GCACTGACTACTCAAAATGGAAGGTGTTGGGGGATCGATCGCCTTGGATTTGGGCTGATT -AAATCTCCGGCTGTGTCCCCAAGGAATAGCACGGGGCTCGCTGGGGCCATGCCACAATCG -ATCATTGGCAATCGGAATCTCATCTGTCAATCGCCAAGTGTATCCTTTGAAGACTTTCTC -ATCTTCTGTCGTGGCTCCTGGTGTCAGTGGGAGACATCCCAAGAGACGATTTTGATAGCA -GGTATCCGGCAGATGTCCAATAATAGACCCAGGACCATCGACATTCGCGTGGCTAGACAC -GAACCCGTTCTGTCCATGCCTGGGCTCTGTCACTTGAAAATCGATGGTATATGGTAGGAA -CCTGGTATATCATCAGATCTGTGGAGTTTTACAGGCGTGGAAGGAACTCACAAAGACGCG -ATGAAGACCGTCATTGGACTGGATTGTAACCCAAAGCAGTGCTTAGGTACATATGTTGCA -GATGGCGAATAGGCTGAATTCAAGGTACAGGTCAAGCAACCATAAGTCGAGCACCGAGGG -TAGAGAGACTCGGGTTTTTGAAAAGGAAGGGGCATTCAGCTCATCGCTACTGCCATCCAT -GTCTCGCTCTCTTGTCTGTACTGGGCCTGGGAACTTTGTAGTGGGATCAAGTCATACAAG -TGTTGTACATGATCTTGTAGTCGGCCAATGAAGTTCATATTTTAGTATTCATTCAAATAT -TTACAATGCTCTAGTTGTGTATCAAGAATAGGGCTGTCACCATCCCTACAGAGCATCGAG -CCTCCTCGCAAATTCATGACCATGAACTCCCACGAGCCAATCAAGCAAATATAAAAGGTG -TAGCTAGTGTAAAACCCAGCCATACCTCTTATAGATGAGGAGTTCGCCTAAGACACACAA -AAAAAAAAAAACGAACACAGTGATTGTTCGTGGTTTCTGAAAGCATGTGACCCCATCTGT -ACATAGTTTCGTTCCTCTTCGCGCATCTACAGGCTTTCAGAGTGCGTTTTTGCTAGCCAT -TTCTCTAGGTCTCCCTTATGTGAATTCAAATGTCCATGGCAAGAGGCCGTATATTAAGCA -GGTCACTCGAGGTTAATTTCTGAACAGGAAATTTGACGATGTGAGCCATCATATTCAACG -GATCAGCTGACAAGATGCAGAAGCATTCGCAAGACCCAAGAGCTTGCGACATCAAGGCTA -GAGTGGATCAGCGCATGACGCTTTGGCCCACCCAAGCCAACGTTATTATCCCACATATCC -GTCGGTGGTCACTTCCGAGTTAAATAACCACCGCATGGAGTAAAACGTCGATCCATGTCA -ACGTAAGGCCATGTTCAATAGGTCAGCTGCACGGCGAGCCACCCATGCCGCGTGTGTGCC -CGCTGGGGAAGGAACGCCCACCGCGTCAGTTTGACGAAACAGTGGTGTCACAGGATTGGC -AGATCTTCGTAAGATGTGGAGGAACGCGTAGGTATTGAATAATGGGCGTCGACGCGTGGA -AACTAGATGCTTGGGCTGGAAATGGGCTCATGCACTCACCGGCATAGAGGATAGTGCAAG -TAGTCTTGGCACATTGTAAACGGCAGAGCCAAGCTCAGAGGATACACCAGCAGTCTCGCA -GGATAAGCTGTGTCTTAGTTTAATATATCATTCAAGCAATAGACTCAAGAGACTCAGGAG -AGAAATGAGGTCTTGGCACTTGGCGTCCGGGGATCATTGGCTCGCTCGGCTCGTCGAGCC -TGAATTGCTTTGGCTTCTCTGCACTTCTCCCTTTGCTGGTGGCACTGGCGTACAAATTCC -ATGACAATAATCGGTGCCAGTGTGAGTGCCTTGCGTGTTGCGTACGAATCGTCTTTTGCC -TGCCTGGGTAGCACTACCCATTGTTATCATTCCCGTCAATACCGCCAATGAGATAAAAAA -AACACAAGAACCCCCCTTCCCCAAATCCAAAATCAAATCTGAGCGATCACCGCAAAAATA -CCAATTAGAACACCTCGTAAATGACCCAGTTTTTGCCAAATTTGTGGCACCATCGGAGCT -AAGCATCGAGGGACCATTTCTCGGCCTCGGGACAATGAAGTATATGGAACATTGTTGTAT -GTTGTAGGGAAAAAAAACGTCTTTAAAACGTTTGTTAGCTGTGATGGACTGTTCAGATAC -TTACGCGGTCGAACCAGAACCAATTCGATCGCACAGCCACCTAGCATAGCCTTGGAATAT -TACTAGGCTCAAACGCGGCTCAGAAGATGAAACGACGGCTCATCGGTTTCCCCAGATACT -AGACCGGCTGGACAGTACTGAGTAATTCCCCGAGTCATGCGATGTAATAGATTTCAGGGC -TGAATCTCAGAGTTTGTGAGTCTAAAAAAATAAGATGGATATGATTATTGTCTGCCATAA -CAACGTAACTCTAACTATATCCTAATGTTGTACTCCATGTGTTGTATAGTTACAGTGGAC -CCGAACATGGTTCTTTTTTAACTCAATTGCCCCTCCAGGGCTGCACAACTGTGCATATTT -GGAGAGCCTACTTAAGGGTGTATAGAAACTGTTCCAACCCCTGATTCATATGCAGGTCAG -CTCGTTGGGTACGAGGTGTACAATATACAGACAGGTTTTGGAGACGAGTTGTACAATGTT -GATCGACATGACCCAATCCTTTGCCTAACCGAAAAGGGAAAAAGACATTTCCGCAATTGG -ACCCTCACTGTTTTCCAGGGTTTTCACGTGGGTGATTCAAACGGGTTATTGTGATTGGCG -ATAGAGGGCAAGTTCCTGCGGGTTGTGGTGTTTGTACGGATTATGCGTACAGTGATCTAT -CAACAGCCTGGTCAATTTAACAAATAATTTGTTATCAGATGAGATCGATGTATGAATCTC -AAATCATAATTCTTTGGAAGATGGAAGCAAGAATTAGGATCTCACGTTGCAGACATTCAT -ACCAAAGAAGAATATGTCGAAGCCCCTTATAGACGCTAAACCTCATCTGGCGGTCAGCAC -CGCCTCAAGCGGCGTAGGCGGAGGCTCTAGCCCCCGATCGTGCGAAATGCCAAGACGGAC -GCTTGCGCGTCTCCAGCCTTTCAGTGGAAAAGCGGACAGAAAAGTTGAGCTTTCGTACCT -TGGCCGCGCGTTGGCCTGATGCttcttattattttcttttcttttcttttttctttactt -tagcttGATGTTGATTTGATACGGAGGGTAAGACGTACATTGATGTATATTATATATACT -CTTTCTTCGGAGTATACCAACCGAAGAGACTGGGCACGAGCTTTGATCATCCTTATACCA -TCAAGGGTATTGATCTTCGCTGTTGTACCCAACCCTGACTATTTAAGAACGAGCTTACTA -TATACAACATAGAAAGGCTTTGCTTACAGGTGCCACGTTAACATCCTAACTCATGTGTAC -TCCGTACATGAATATGAATATATCTACACGTCAAGATTTGATCAAGTTAAGTGTCAAGAC -ATAAAAAGACTATAGAAGCCTTCAAGCCATCAAAGTATAGACACTGCCAAGGAACTTAAG -AACGATCCATGAAAATCTGGATGTGGACGTTACGTGTTCACGGGAGCCAAAATTACACAA -ATTGTGAAAATTACCAGAATTATGGAAATTACGGGGAAACAGGGTCTTTTCCTTGGCTTT -TTTTTTCGTTTTCGCACACGCGGCTTGGCCGCGGGGCCCGTGATTGGGGCCAGATGCTGA -CGACGCTCGTAATACTGCGTACACCTCCGTACTTTGTAATCTCCCTCTCTTTCTCTAAGG -GCCCATCTTTCTTCTCCCTTCTTGACTCTCCAATAAGAGAGTGATTCGAAAGTGCTACGT -CAACTCGAGCGAGGTAGTCCCTTCGTTTACTACTCGTCTCTGACATCCCTTTCTTATCCA -TCTTCCTCTCAGCTTGGGTCAAATTGACCTCCTCCGCCCAAACTCGAGATCGTTCGCCCA -TAGTCATCCTGGATCTCAACACTCATAGTCTTTGAAGTCTGCCGTGACTTCCTCATCGAC -TATGGCGGACGGCAGACACCGGGCTACCCAGCCTGCCCCAAATGGCTACGACGCCTCCAC -TCTAACTCACGAATTCGAGCAACTGATGCGCACCAAACGGCTCAACCGACTTCATGAACC -TTCCCGCTCGCGTACTCACTCTACATCACCATCTCCAATGTCCAGTTCATCAATTCCTCC -ACCACCCTCACGAGCTCCGCCGCCACCTCCCTCCCTAGGCACTCCTGCCGTCGCTCCTTC -CCCAAAGCCGCCCACCTCATCAGCACTACGCGGTCTTCCTATCATGCCCTCGCCACCCCA -GGATGCAGCTTCTCTCAAATTTTTTAATTTATTGAAGGGTTTGTCAGTCACACCTACTAA -GTACGAAAATCCGGGTCTGTTGGATGAGGCCTTGTGCGTCATTCCACTCGATCGCCTCTA -TTCCGAGGCCGAAGAGGAATCCCAAATTATGCAGGCCCAGGCTGCCAGCGTGGCAGGGAA -GCCGGAATGGGGCTACCAAGATTGTGTGATCCGAGCATTGTTAAGGTATGCTGGAATTCC -TTGATGGTCTAGACTCAATGCGCCCTTGCTAACATATCGTCATTCAGGTGGTTCAAGGGG -TCGTTTTTCCAATTTGTTAATAACCCTCCATGCTCAAAATGTCACATGCCCACGATCGCC -CAGGGCATGACCCCTCCCACCCCCGATGAGACTGCGCGTGGCGCATCCCGGGTCGAGCTG -TACCGTTGTTCCGATACTAGTTGTGCCGCCCACGAGCGATTTCCGCGGTATTCCGATGTT -TGGCAATTGCTACAATCTCGACGAGGTAGAGTTGGCGAATGGGCCAATTGCTTTAGCATG -TTCTGCCGCGCAGTCGGGGCACGAGTTCGTTGGGTTTGGAACTCCGAGGATTATGTCTGG -ACCGAAGTGTACTCGGAACACCAGCGGCGGTGGGTCCACGTTGATTCCTGCGAGGGCGCC -TGGGATCAGCCACGCCTGTACACCGAAGGATGGCAGCGCAAGATCTCTTACTGTGTTGCT -TTCTCCATCGACGGTGCGACCGATGTTACCCGCCGCTACGTTCGCAACTTCTCCCGACAT -GGAAGCCCTCGCAACCGCGCCCCCGAAGAAGTTGTCCTGTGGACGATTCACGAAATTCGC -CGCAAGCGTCGCGAAAACATGTCGAAGACCGACCAACGTCGCCTGATCAAGGAGGATGAA -CGTGAAGAAAAGGAACTCCGGTGCTACATGGCATCTGCTCTGGCAGCAGAGATTAACAAC -ATGCTCCCACAAGGCCTTACCGGTCGCCCCGAGGACCAGAAACACCCAGGTGCTCGCCAG -GAGGCCTCGACCGAGTGGCTCGCCGCACGAGGCCACGGGAACTCAGGACCCGATCGTTCC -CCCGAAGGACGGTAAAATGTACTGCTGGTAAATGGCATCACATCAACCCTTTGGCTTCTG -AAATTCATATCAGCCACACCACTCATCGAACGATTGTGCGACCTGGATACTCAATCCACC -ACAGCCTCCTTTTCTCTCATCGGAGGCTGGCCGGGCGCAGCGACGTTCGAGGAATCCTTG -GGTGACATTGGTTCCGACTCTTGACCAAAGCCACTGCGGTCGCGCTGGGAAAATTCGCGT -CGCTGGCACCCACCTGGGCGTCAGCAGCAAAAGAACCCTCCTGCTATCTTGACAAAGATT -CAAGCATATGCGACCGCAGAATTAGACGAGCAGGCTCGCGGGCCTACCGGTCTACAATGG -CTGGCGATTGTTGACCACCTGTTCCCTTGATCATCGATCATCCAATTTTTGTCCTACGTC -TAAATCACGACATATCTCCACACGATATTTTCTATTTTCTTACTCTCTGAGATACCACAT -CCGGACCCTGATTTCTCCCAACCCTCTTGCACTTAATATGGGTTACAATCGGGGGTTTCG -CTTAGGCTTTGTACGGCGCCCACGGTGACGATTCTATCTTTCTCCTTTCTCAATCTTCAT -TTCTAATATCTTTGGGAACTGCCCGATGGCACATGTTGTACGAGTCACGGATGCAGCTAT -TTCTTTTCTATGATGTTCACTTTTTGCATGTCCTGGCAGGATCGCCACGAACTTTTTTCT -TATATACCTGACCATCTTTTAAGGGTCGATTTTGTTTTTAGTTTCTACCAGTGGGCTTGC -ACCACCCGTTCAATACCATTCCCTTTTCTCATAAAATACTCAATACACCAGTTTAATTAT -GATGTGCAATCTTCCCCGCGCTCTCCCTAGCGTACTCGGTGGAGCCGAGTTCAGGTTTTT -TTTTTGCCCCTGGCGACATCACACATACATAATCCTCTTAACCCTAGATGTGTACATTGT -ACCTCGTACTCCGTACAATGTCATACTTTGTATCTGGAATGGAGGAATCAAGACTTGGTA -TCTCGTTTCTACGTATAAGACACGGCACGTGTGTTATTTATAGGTTGACTCCTCTCAGTG -GGTCGGTCTCACTTGAATACCCAGAGTATAGGGGGGAGATAAGCAAAAGCTAAGAATTTC -AAGAGCATATTTGGAACAGAAGTAAATGCCCGATAACAGGGCAATACTCCGATCCTAATA -TGTAGATCTGCAACATCTTGCATACGAAGACCGAAAAACAAGAACGTGACGACCCTTGCA -CTCCGTACAACATACAAGATCAGACGATCATTGCAACATCATGCATTAGAACAGATCATA -CCATTGCGGATCTTGAAGCATACCTCACACCGCTGAAATCCAGCAGTCTATGCCCCTCCG -TCGAAGATCCACCCCTATGCGGTCCAATTCATGCAAATAATAAGACTGGCCCTGAAGCCT -CAGTTCCCAGCTACCCCGAGACCCCGAAACCGTAGTTGAATCCCCAGAAGAAAATGAAGC -AGAATCAGACTTCGAACTTTTACAAGAGTCCTCGTCGGAACTAAATCCATCACCGAATCC -AAGATCCCTCACCCGAAACCGATCCCCACAATTCCGATCATCAAGCTCCACATCTTTCCT -CCCAAAATCCCAATCCCAACCCCGTCTTGAAACAGGCCGAACAACCAATCCCCTCAATGG -CTCCAAAAATCCGGCCAATTTCTCAACAGGCTCGCTGAACCAATTGCTCTCTAGAACGAG -CACAAATGACCGCAGTGATTCTAGCTGCGATACAGCGCGACACGTCTCTTTCCACTGCGT -CCGTCCGGCGGACACGTACACGGCGCGTACGGAGTCTTTGGTTGGGAGCCAGTGGCCTGG -GAAAGACCAACGGAGCTCGATGGTACGTATGCGGTCCCGGTGTTCTGGTGGGATTGTTGG -GAGGAGGTATGGGAGAGTCCAGGGGTTACTGAATTCGAGGGTTGGGAGGGTGTAGAGGAG -AGGGATTGATTCGTGGTATCTGAGGGGTTTGGTTAGTATTGGGGGGGAAAGGGGAGTGTA -AATGTATGCCACTCTGGGGGAGATGTAGATGTAGGTTATAGGGTGTATACACAGTACAAT -TGTATATTGTACAGAAGGGGTCGGGATAAGGTGAGGGTAGGTGAAAGGGACATGAGATAT -GGATAAAAGTTCAACCATACATCTGTCGACAAGTCAACGCCAGACCCAGTAGCTTAACCT -TATTCCAGCGCACGAGATCAGCCTCTTTTGCCGGCTGGGAGATGCCGGTGCCATGACAAA -TCTCACAAAATTGGCTGCTTTGACGTGTGTGGCGCTTTGGAAGTGAAGACCTCTCACATG -GACATACCACGTGCCCGAGTCGTTGCTTGCATTGAATGATGTGGATTCGTTGGTCTAGGA -CAAATCCCCAAATCATTAGCCGAAGCTCCGATGATAGCCGGAGGAGACGACTTTGTTGGG -GTAAGGAGAAGGCGATTGTGTGAACGCGGGGAAGTGGAGGAGGTTTGTTGCCTGATGTTT -GAGGTCAGTACATTGGTCTCCGTAGATGTATACTTGAAGTAAAAGGAAGTGAAGTAAGTA -GCTTACTCTGATATCGCTGAACTAGCACGTCCTCCCATGAAAAGACTCCCTGTTTCATAC -GGGCCATTGTGGATAGCTCTATTCCCCGTAGTCAGAAATCAAAGAGACATTGCCAGCGAG -GTCGCTTCATAAATAGTGTCAGACCGTCACCTGCTGACCCCTATCGGGCACAACAGCGTT -CCCGTCAATCCGGACGCCAAGTAGCCCGGTTAAGGAGTTGGGCAAAGAGTCGGTGAAAAG -ATAAAGGTGTATAGGAATGAGGTATTTATGTCGGTGCATATATATACCCACGGCTATTTA -CATAGCTAATCTGAGTCGAGAAAAGATAAGGAAACGCGGAAGAAAATAATTCGGGCTATT -TGTAGATCCTTTTGGCTTGGCCCTACCTTGATCTATGGCTAGCGTTACTCTACAAGTGCT -CTCTATACTGGGCAGGACATCTCGAAGCTAGATCCATTGAGCGCTGGTAACTGGATTATC -AACATTCATGGATACAGTCCTTATTCAAGCTAACGTTTTGCTTAGATACCTTGATGGAGT -AAATATTGGGCTTCCCCTGCCAGCTAAGGCTGCCATGTTAGAGCTGCTGCCTTTGAGATA -GCGCTCAAGGTTAATCTACGAATTTATTATGAAAGTGTGGGAGAAACCTACAAAGTGCAC -TCTGTACTTTATGCGAGCAGCAGCAAGACGGACGGAGAGATCTGAGGTCGTGCTCAACTG -GTCTGGTCTGCAACTATAATCAGCGTCGTCAGATTTTGATTCGGAGATTTCTTCATCAAT -AACCCTGATATCAGACTGACAGGGCCCGACGTAGGTGTTAACTCCTGCGATGGGGCTCCA -TCTGATTGTCGACGTGAACCTTTTTCGTAGGAGTCGGACTTGGGGACTTGAAAGGCATAG -CAGGAGGTGGTAGCGCGACAATTTTGGTCCCCAAGCAATCTTGCTCCTCATACGATAGCT -TCTCGACTTTAGCTAATTGAACCCACTTGTTGAGAGTCTTAAACAGCGTTGCTATGTTGA -CTTGGAGCCACTCCTTATGACTCTTATCGCATCGATTGCATTTTTCTTCGAGCCGGTAAT -GTTTCAAGGAGAACTGCGCGATGATAAGTTGCCCAATCGACGGAAATTGGGTATGAAATC -ACTCTTCACTTTGCTGAAATCTTCATAGCACCGTCGGTGGACGCTGAATCGTCGTTCACG -GTTTGTGGTATATCCAACCTAGGACATGGTTGGTTGTTGAGGGTGAGAGATGAGGTAGAT -GTACCCAGACACTCAATCAATATTCCCAAGAGGCTCGAGCAATTTATCCCTCACCTTGGT -TGCCCGCTGGCTTTATCGTCTGGTGATGCTTTGGATTTACAGAATTCGAACTCTGCTGCT -GACGGTGGAATAGCAATAAAAATGATCGATTCTTCCTAAGCCCCGTATTTTCAGCCCATC -TAGTCAGGCTAGCTCAGATTCTGGGCCTATCTCCCATACTGCATCAGATAATGCGCAAAT -TCGCGTCAACCACATTGACCTGTTGTTAGTGACCTTCCTATTTTGGTCATTTGTTAACAG -GGGCTATTCGGTCATGTCTGAGAAAGTCTTGTACCAACTGAGATGATTGGAGAGTTTCAC -GCGGGGTTTGGAGCTTGTGGGATTAACAGGAAAGTGTCGGAGAACCTGGTGCATTAGTGT -AGTGCGTTGACAAGGATCATTAGCCGTACTATCGAAGCTCCGTTTCCGTGGTCAAGTTTT -AGGCTGTCCTAAGACATACCCTGGTGGTGCGGTGACTTCAAAGGTATGTACAGTATGATG -TATGTAGTTCTCAAGGATATAAGAAAGCTGGTACCTGACAAGGCGGATGAAAGGTAGACA -TTGCGTGCGCTAGGCACCAACGTTAATTGAAAACAATTTTGGGGGCTAAAACCCAAGTCA -CGCTTTTGAGATAAGATGAGCCTTGGAAAATTGAGAAAGAGAGTGCAAATCACGCCTAAG -AGACTTCCTAATTCATATAGTGATGTGATTGTCGCTGTCTAGCAGCAATGTCCATTTTCA -AACATTCCAGGGTTGCTCCGTACTTCACTGAGCTATATCCGGTATCTTGGCCTAAATTAT -ATGATAGTAATCTAGAGAGAGGGCACATGGCTTTTGCTAGCTTGTTGTATCTTAATGGAC -AACTCTCACCCTTCTACCAAGTGGGCAAACCAGGCCGCCCACTTTACTTAAGATACTCTG -TACATAGGTCCCGGACCTCGAACTATAGATACACCAATCCCAGTGCCAGTCATACTAGCA -CCCGTAATATCTCTTACGTAGGAGAATTAACCCAGAGTGTTGATCGGGGGCGTCACCTGC -AACCCCTGGGACAGCATAGTCAAACCATCCGGCTGCAGTTGATCAACACTGGACTCTGCC -GGCAGAGTGCCCACATAAGCATTGAAGTTGTAGATGCCGGAGTCACATCTGTCGACAACA -TCCCTCAAGCTCGTCCCCGACAAAGCCTGGATAGTCCGGAAGGCGCGAGAGACTTTGAAA -CTCTGCGCGCCCACACCCATCGGCACCGAGTGTGACCATATCCCTGGAGGCGCGATGAAT -GTCTCAGTCTTATCGCCAGACTTCACCCGCACAGTCGCAGGCAACTTCAGCATCGTGACG -ATGAAAATCTCATCTTTCATTGAGTCGACGCCGTCAGGCCGGCCACAGGTGAAGTTGCCG -CCGACGTCGTTGGGGTTAACTTGCATTGTTGTGTCGGTTGCGTCGCAGTCGACGTCCTTC -GGGGTTGGCCGGTACCAATAAACCAGCCTTTCTTCCTCTATGAATCGGATGGGGAGTCTT -GACTCTGCTTTGTAGGCAGCGATAAAAGGCTTTGCCATGTCAAGCCATCCATTATGGGGC -CTGTTTCCTTGTCAGTTGATGCGTCCGTTATTTTCCCGGCTGGGATACCGCTACTTACAT -GTCCATCACCCATTTCGAAGCACCATCGTCTGTGTGTGGTGACGATAGCGGGCCGATATA -GTGCGACTCACCATAGTCATTCCAAGTAAGGATCTCCACAAAGCGAGGACTGAGACGTAG -GATCTCTCTCCACCGGTCATACCAGAGGAGGTCAGAAGGGAACACCCAGTTCTTGGGATA -AGAGACCTCGCTGCCGAAATGGGTCGAGAACCAAGGCGAGATGGCTATTTATCGATCAGC -AACCGTACGCATAATAACCGACTTACTCAAGTACTCACGCGCAATGTATGCTTTCCCCTG -GAGAGCATCCTCATAAGCCTGATCACCATCTGAGACAGATACCTCCTGTCCGTCACTCGG -AGCCCTGTTATTTCCATTACTGTCCCAGCCCATCCAGTTCAGAGCGCCATCGATATTTTC -GAAATCACCGTGACCGGGGTGGAAGTTCGGCGCGAAGAAGATCTCGCTGCCAGCAGCCGA -TCTGATCGCACCGACATCGACACCATCGCCCACAAATGACGATACAAAGACTTTTCCATC -AACCAGGAGCTGTCCGGGATGACCTGCATACTGCGCAATCTTGGCGCCGACTTCGCTGCT -CTGGCTGACCTGCCACCAGTGGAAGTCGAAGGATATAAACACTTTCATGCTATTGTTGGC -AGCAGATTCATAGGCATAGTCCAGCTGGACATCGTTGTAAGAGTCGGTGCCAATGTTCAA -TGCAAAGGCATCGATACCCGCGTCTCTGGCACGTCTCATATCATCGTCATAGTCACCAGC -GCTTTGACGATTGCTGACGATGCCCATCTAGATAATGTTAGTATTTTTAAGGGAAGGATT -TTGAGGATACAATAGATGTGTCACTTGCCATAAAATGACAGAAAACTAATTTATTCCCCA -TGTTTTGATCATTGGCTGCAGAATCCATGATGTTGGTAGTGTGGTCGATTACAGTCGGGA -AAGCGAAGGTGAAGGGAAATAGGCCAACCAGGGCCACGAATAGGTAGAACATTTCTTTCT -AAGCGGTGAAGAAATACCAAGGACCAGCCAAGGAATATCTCCACTATGTGAAGCTCTAAA -TGTTGAAGATAAaaggaatgaaggaatggaagaatgaaagaTGTATCTAGCTTAATGAGC -GACTAGAGTGAGAAATGCAAGTAAGAAAAGCAAGGACTTGGGGCAAGAAGTCTTATATGC -ATTTGGCGCTTTGTTTTACAATGCCAAATTGTCTACATTTAGATCCACTCTTTGTATGCG -TTGCTGCACAATAGATCAATCAGACAATGAAGCCCTAAAGCGGAGGCTGCCTGGAAAGAA -AACAATCAAAACCTGTTTGAAGACCTCTCTTGTCACAGGGCTTGCTAGAGGGCCCTCAGC -GGTTAGATTCCGACAGGACGATGAAGCCGAACGATCTATTTCTGCGATCTGGTTAGTTGA -AATGAAATAGAAGGGGTAGAGGCTGACAATTACAGATACTCCAGGGCTGCCGGATTGGCC -GAAGAACGCCTCCGACAGCCCAGCGGCTAAATTAAGGGACTGAGGCGTTCCACGTCGGAC -CTTTCTCGATGGCAGTGAATCAATACTCCGTACCTTGATTAGAATTCTGAGGTATTTCCA -GTAGATCTCATTCTGAAACATCTTACATCTAAGATACGGAAGATCAATTCACAGGGCACA -AGGTACACGGTGTACCAAGTGCAACCATCGACTGTGGGGTGACTCGGCGCTAACATGTTT -CACGATCTAGATTTGGAATTTTGTGATGAGTCTGTCGGGTCATCGAGTGTTAACCTAAGA -CCTACTTGTTTTCTACTGGTGGATGTAAAGACGATAGTCTCCGAGTTCCCTGCTATAGAT -ACCTCACAGATATTTGCCCAACGTCAGCTGATGGCAATCAGGGGAAGCTGGGGATATGAA -CCCGATGAAGGTATCTGCTATATACCCTGTCATGTGTCTAGAGTGGTGATTTAGGTGAGT -GCTATAGTGCAGCATGCGAAAGGAATACGCGTGGCTCCAGATGAACACGCGATCTTGAAC -TCTCTATTGAATAAGTCAAGCCTAGCTAGTGTCTGATTCTGATACTCCTGACCCATTAAT -TCCAGCCTTATTTTGGCGATCTTCTCTAGAGCTAACGCCCATCATCCGAGCTAAAAGCAA -GGATAAAAGCTGTGCCATCAATGGCATATGATAGACCGTCTTAGAGCACGGCAAATGCAG -ATACATAGCTCACCGAGCTGTCATCAATCTCTTCTACATCCGAGCTACGCTCGTATCGCA -GAAAGTAGAAGCGTCCCTGCTGCTCGGCTGCTTGATTTGCCGTGACGATTTCCAGGGCGG -CCATAATTACCCCGATCGGATGGCGCCCACATACTGTGTTACCCGTGCGTTCAATAATGT -CGAGAAAAGACTCTGAAGACCCCGTTGCAATAGCAGCCATGGTGGCCATGTCAAATGCTG -AGATACTCTCGTGAATGGCAGGTCCCCTGCTTGAGATGCGATCGCGCTGCCGCAAGGAGT -GGCCGGTCGAGGCCATCTCGATGGCCTGATCAACGCTGCTTGTATCCATCCCAGGTTGGG -GAAGTATATCACCAGACAGTGGGAGTTGTGGTCCTAGCTTGGGGGCCTGAGGAACATAGT -ATGTGTAGCGGAATCGGAGGCCCCAATGGCAGAAATCTGAACTTATCACAAAGGCGTTTG -CCGGGTCCGCCAGGTAGGGGGCTAAAAGGGCGCCGAATGCTTGCTCTGTCGCGGCCGAGG -TATTTCCGACCATGATTGGGACTAGAGGGGGATATTCAGAGGCCGGAGAATCTGGATATT -GGAGCTGGAGCAAACGGTGGATGTAGGGCAGGTGCATCTCGATGCTATGCTCGTCTTCAT -CGACCGAACGTGACATGGTTGTAAAGCTAATCTTTGCCCCATTTTGTTTGGTTGCCTGGG -TTTCGAGAAGCTGCGTAATCAGCTCGGTGTCTAGTGGGAGGGGTTCATCGGATAGAGGGG -TACGGTAGGAAGTTAGCGTTGGAAGAGCCAGTGTTGCGAGCGAGACATGGTGTGATGGGC -CCAGAATAAAAATCCGCTTTCTATTTGTGAGGTTAGATGAGTTTCACAAGAGAAGCGGGA -GGTAAGCTCACGCTTTGGAAAGATCCAGCGCCTTGTATGCATAGGCTGCGCATGGCCCCG -AGTACGAATAGCCCGCATGTCTGGTTGATTAGTATGTTTGGCTATTGACAGCATTATATG -CATGGCAAAGGTCTCAGACATACGGTGCGATGATCACTCTAGCTCCTGGGGTAGGCAGCG -AGCCAACCTTCTCCATCGCGTCTGGCACCTGCgccagccagccatccagctggcgggcca -gGGTTCGGGCACTATCTGAGTACCATGAGCCAGAGTGAGAGGCTTGTCTAGATGACATCG -TAGGGTCTAAAGTTGGTCAACGTCAAGGTTGAATGTGAGAGGTCGGGAAGTGGTTGGCAG -CTACAAGCTGAAGCTCTTTAAGAAGTGGGGGTTCATGGCGGGGATATCCCGCTCCACGTC -AATTGGAAGATATTCGATGGGCATTGAAATTGTGATTTTTGAAAGTATTCAAGGTATATT -GAATGTAAAAAATCTAATACGATTGCATTATCTACGAGAAACATTACCTTTTACAGGTTG -AAATAGGAAAGTTGGGGAATTCCGATATGTACGAAAAGTCTAAGGAAATCAGGCCAAACA -TATTCCGTCAACTAGGTAGACAACCATGGCTCTTCCCTAATAAACCTAAGAGATGAACGG -GAAAGGAAAGGCTCAACAATGTTGAGCCCTCGTGTCAGAGTGTAGTCTGAAGATATGCTA -GGTACTTATATACGCAGTGTACATGATCCTTACAACCAGAACCAAGTGCTTACATTCTAT -GGCAGGACAGACCTATATCTATGATGTATGTACATGATCCTTAGATCTGATGCACATATT -CGTTGACTGCTCCGGATATATTGAAGGGTGTCATTCGAGAAATTTGGAAGCTTCGTCTAG -CTCATTTCTTTTTCCAAATCTATATTGATACAGAAATGGACAGAAAGTCGTCGGGAGTAT -ATGAACGACCATGAATCTATTCAGCTCATTCTTTCCCCAGTCCAGTAGCCAGCCAACGAA -TTTCGATATGATCATGCAGAAAAACAAGAGAAACTATCTAATTAACAGGATATGGGTACC -AGTAAGCCACCAATTGGCTGACTTAACCCATAGCAGAAGAACAATTCGACTGGTGTTGCT -TCTATGTCCGCAAATCCGCAGTTGCGAGGTTTGTAAGAGTAGCGCCTTTCTATTATACAA -AGCTTGAATAATTCTCGGCTTATCGCCCATATAAGAGAGAACCATCATCCATGCACGCGT -TAGATATCATTCTATTACAAGATGTGGAAAATATAAATGGATGAAAATGGTATGTATTGA -ATGGCTTATGATTCACCCGAGGAAGCATCGCACATATGACGGCGCGCAGTTCATGTGTAG -TACTTCACTGATCTGCGGCGATGGGACGAAAATGAACTAAGCAGGTTATGACCGAATTGC -GAGGGTAATCGGTATATTAACAATGCCTGACCAGCTCCCAAATTCCTAGCCTGCTTCTTT -TAGAAAGGAAGAGCTCCTAGAGTTTGTAGAAAGTTGTGATGAGCATTCAAGCTACCTATA -TTACCCGTTTTGGCTTAGCCTGACGGAATGCAACACGAATCCCGGAGATGACGCGCCCAG -ATTGTGTTTGCATCGGTCCTCGTCTTAATTCCTCATACATCTCTTCTTCTGTCATTACTA -CCTCTGCGAGACTTTTCAAGGTTTTTACACGCGATTACCTCTATATACCCTCCGTCGTTC -CACTTTACTATACACCAAGCTCCTAAAATGTCCCTCAAGCGCAAAGCTTCGTTCACAGCT -CTCCCAACCAATCCATCAGTTGCTACCCCCAATGAATGGGGCGTCGGGGCTGAAAGCCCA -CATCTCCACAGTCGGACGCGAAAACGCTTCAGGGACGGTCGTCCGAGTGACCAAATGATT -TACCGTAAGTGCCTCGCTTCGCTCTTTGAGCACTCTCCCTTGATCGCGTCATTGTTCAAC -CTCGCTGACAACTAGCAGAGAATACCCTACGATGGATCTTCTCTGCCCAAAAGCAGCAGG -ATTCTGCACAGGCCACCGATATGGATACAATGGACTCGGAACCGACTCTCGAAACACCAG -AAACCGTTGACCCCCGACAGCAAACATTACACCGTTTCTTCCAACAAAAACCTCAGCAAC -CACCGTCTTTCCGACCATCCCGCCAAGCACTTGCACCTCGTGCCAATGAGACTGCGTTCA -CTCAGGAGGATCTCCTGCGTCGACAGGCATTCAATCAGATGAGCTCCGGAGACAGCTCGA -GCGAGAGCAACAGTCCCGGATCCAATCAGATGGGTGCTGATGTGGACATGGATATGGATA -TGGATAGCAAAAACGATTGTTATGGATTGGTTCAGGCTCCCAAAAACTGGGTAGGAGGGA -TGGCCTGGATGTAATCTGGCTGCGCGGGTCCGTTTTTTTTATCATCGAGCATTGTTGTTT -ATATCAGAAGAGACTATTGTCTCCTTTCTATCTACTACGTGATACCTATTGGGATATTTG -GGCAAAGCGCTACTGGCTAATTATATTGCCTTACTTTTCATTCTTTCTCAAAAGGAAACG -GGGGTTCTTGTCGGTCTTTTGCGTTTTTGGAAGCCTGGGAATACGTGAACCCCCGAGGTT -TTGACTTTGAATGAATTCACCTACATCTCCAGATATACAGCTTAAACTAATGCTTCAGTG -ACTTGCGACTTCATCGCAGGCTGGGCGAGGGGGTGTCATTATTGATCGGGCCCTATGTAG -TGGAAGGGGGCCTTCTGATCACTTGCCTAAAATGGTAACCAGCTTGATATAATCACTAGG -AATACCTGAATGAATTGAGTAAGGCTCGGAGCTGGTAATTAGTCAAACACACTGACGCTT -CATTATACATACACTCTATAACTCTTTCTACATTATGCACACATTTTGGTGTGATGGCTG -TTAAACTATGGACCCGCTTGATTAGCCTAATTTGGCCTCAGGCAACGAGGTTGCTTGCCC -ACCCGACGTCGTTCCTCCCTCAATTGAACGACAAGACCAGGGTAATCTCAGACCGCAATA -CAAATTACTTAATCCTGCCAACCCCCCGAGTAATCTGACCCTTTCCTTTCTACCTCGGAA -TCATTTCAGCTCGGCTACCCTCAGAGTTCAACTCCCCCGCACATCGGAGGTCCTTTGCCT -GCTGACACGAACAACCCACCAACCACAATGGATCACGATCAGTCTGTTCAGTACTTGGAA -AGGCTGTTAGGTCGTACACTGCGGATCCATACTACAGATACTCGGATGTTTGTCGGACTT -TTCAAGTGTACCGATGCGGTAAGCATTCACCTATGAACTACGCATTGCGCGGTACTAATC -AAAAACTATCAATAGGACCGGAACGTCATTCTCGCCAACTCTTTTGAATATCGTATGCCC -ACCACATCTGCTGTACAAGCTGCCGCGGACAAGAAGGAGTCGTGGGGAGAAGGGTCGGAA -GCAAAGAGCACCCTCAAAGTGAACATGACACACCGATTGATAGGCCTTATCGTGATCCCA -GGACGACACATTACGAAAATAGAGCTGGAGTAAGCCTTGATCGGTTTCCAACTCGGCTCG -TGTCCACGAGACCTACATCTTTCCACCTGGGGATCAGTCTCTATCCTATTCTCGAATGAT -AAGCCTGTCTCTACTGCCCCGCAATACCAAGCAGTGGCAACCACGTATTGTACCCACCCA -CGCGATATGCACCAGGAGAGGCCCAGTTTCTACCCTGCGGCCACAATGACCTGGTCCCTG -CTTTTCCGCCAGGGCTAGAGTTGGGTCTGGGGCTGCCAAGGCGAGGCTTTCTTGCAGCGG -CGCTCATCCTCCTTCAACAACGTGGCACCGGGATCACAGCAGCTGCATCGCAGCATTGCA -GCATCCAAAGTGACTTGATCGACAGGCCTGCTGTCAAGCTGATGGGGATATTGTTTGTGG -CAAGCGTTCCTAGCTAGCAAGGGGGTGGATGGAGGGGAAAAAAGCGGTGTCTCCGCCACA -CCGATCTCTTGGCATGCAGTGTGGTTGCTTTCGTGCAGAACCAAGACTATCTGTGGCTCC -TGTCCACTTCCGCTATGATCAGATCATGGGGCCAGGGTGTCTCTTGTATCTGAATGGCCT -CCTATTTTGTAACCGCCTTCCCCTGTGGGAAGTATTTGCAGGGGGCTTACGCTCTATCAC -GCAACAACAGATAACTTTGTGATGTATCATTTTGCTTTCGAACGTGTTGCCGAATCCTTG -AGTTAAGTGGGTGCTGCACTTTTATCTCTATCACTTCTGATAGGAGACGTGCCCCAAATG -ACGGGGTCCAAGCTCTAGGTAGATTCTCATGTCTCTTGCGAACGGTCGGGTAGGGTCGGG -AAAAGATTCTTGGTGTTCGACTGGCTCTACAATTTCATTTGTGATGGCCGCTTCAGGTGG -TCCAGATTCAAATGTAAatgcatgtatgttgtagggatgcatgtatgttgtaGGGCGACA -ACCATCGCTTATCGAGCGCTAATGGTGGGGGTTCAAAATTGATTAGGATTTATCTAGATC -ATGAAGGTAAAGGATACAAAAGAGAAGTTCAAGCTCTATAGGATTAAAGTGAAAAGGCAA -AGAATATATTTCCAAAAAAAATATTGCGGTGTGAAGCTATAGAGAAAGGATATACTCCAT -ATGTGTGTACAGCTTCTCAATGGAGGTGCCTGTCTTTAGATGTGTAATAGGTTACTCTCG -aacgaaaaaaagaaatcaaatagaaaaagaaaaaTCACATTTGGGTTAAGTTGATATTCA -CGAGTATCAATCCTATAAATATGAGAGTCCAGACTCTCTAAATCAGGCAACTGTCTCAAA -ATCTTTGTATAGCTCGGACCGAGAGATAATCCGGCTGACCTCATCCACCCGCCCCGATCG -AGACCGGCCCAAGAAACAGAAGCAGCCACTTTGTACCCGCGAAGTACCTTCTCGCTATAT -TAGCGGTGGCGGAGTACTTTTGCATACCTTTCTGACATCTTGACTTCCAACTTTCCTCTT -CCCCTCTCTATTTTCTTGTTTTCATCCTTGCAGCCTTCCTGATATCCCCCATTGGATTCT -TTTCGAGGTAAATTGTTCCTCCCCCGAGGTCTCCGCCTGCGGCCTATTATGTGGGAATCC -CACCCCGTTTAAACACCTCGTACATAATGACGCATTCCTCTACCCGCAAACCCACCATTC -CCAGTCCTTTTCCAAGGACCTCCCTTGCTTCCTTTTTTCTTCTCTTTCTCACCTCCACAT -GCCACCTGTGTTTCTGCGGAGTCAGCCCTGCAATCCCGCACCTGCATTGTCCCGTGCAAG -AGGCTAACTTGCTGTAGAGCTTCAGCCTCTCACTTAGGAGCTTCCCGCCTTTTCATCATT -AGCTTCTCGCTATCCAAAATCTTTCCTACGTCAACCACAAAACCCCTGGCTTTCCGGAAT -TTCAATTTGACCACGGCTCGCCCATTTACCTCGTAACTGTGCTCACGCCTCGCACTTGCC -TCCGCCTGTCTGTTGCCCAAGTTCTGGACCAGGTGGATTAGTGATAATCAGTACTCACGC -AAACCGCTGCGCATTCTTTGCCCAGACTCTTCAAGCTTTTTGTAATATCGCAGAAGCTCG -GGCCAACACATTCCAGCAACACAACAGGGGTTTCTCGGCATAACACTTTGGATTTTCATC -GTTCGTGCGATCATCTCAACTCAAGCTTGGCACTTACCCGTAGTGATGGGTCAGTCGCAT -TCCAAGAGCAACGCCTCGGGCGACACTCTGCAGTCCTACCCTTCGTTTTCCAAAACCGAC -ACCAAGGAGTCTCTCCGCTCGTTCCGCGGCTCAATTCGATCGAAAATCCCTGGTGCTCGC -AGCTCCGATAGCCCTCGTGGCTCCACCACTGCTCTCTCTCGGACCGAGAGCCAGACCGAC -AAATCCGATGCTGGGTCCCTCAAATCAGTCGGAAGCCGCCCTGGCTCCAATGCCGGCCTC -CCCCAGTCGCCTGCTTCCGAGACCACTTCGCGACCCGGGTCCCCCCAACCCCCGCCATCT -CCCTCGCTTTCCACCAGCTTGCAACGAGGTCACAAGGATGTGGATGCGATGAAAGAGAGC -GGCGAAGTTGACCATGTGTCAGATCACCCTCCCTCTGGTGGTCCTCCAACTGGAGCCTCC -GCAGTCGTTGGTGAATCAATTCTGATGAAGCGAGAGAACCAACTGAATCCCATCCTGGAC -TTTATCCTGAACGCCCCCCTAGAGACTTCAGGATCTCCCGGGATGGGCATGGGCGCTTTG -AAATCCATTGATTTGGATGACATGATTTCGCGACTCCTAGACGCTGGTTATTCTAGCAAA -GTTACCAAAACAGTTTGCCTGAAGAACGCTGAAATTACCGCCATCTGCACCGCCGCTCGT -GAGCTCTTCCTGTCGCAACCTGCTCTGTTGGAGTTGTCAGCTCCCGTGAAGATTGTCGGC -GATGTTCACGGCCAATATACTGACCTCATTCGACTCTTCGAGATGTGTGGATTCCCTCCT -GCCTCCAACTACCTTTTCCTGGGTGACTATGTGGACCGAGGCAAGCAGAGTTTAGAGACA -ATCCTTCTGCTGATGTGCTATAAACTCAAATACCCTGAGAACTTTTTCCTGCTTCGAGGA -AACCATGAGTGTGCCAACGTTACCCGTGTTTATGGGTTCTATGATGAGTGCAAGCGTCGT -TGCAACATCAAGGTCTGGAAGACCTTCATCGATACCTTCAATACCCTCCCAATTGCTGCG -ATTGTGGCCGGTAAAATCTTCTGTGTCCACGGTGGTCTTTCTCCCAGTCTTTCCCATATG -GACGACATTCGCGGCATCGCTCGCCCTACTGACGTACCGGACTATGGCCTGTTGAACGAC -CTCCTGTGGAGTGACCCGGCAGATATGGAGGAAGATTGGGAACCTAATGAGCGTGGTGTC -AGCTACTGCTTCGGAAAGAAGGTGATAATGGACTTTTTGCAGCGTCACGACTTTGACCTG -GTTTGCCGCGCCCACATGGTGGTTGAGGATGGATATGAGTTCTACCAGGATCGCATTTTG -GTGACTGTGTTTTCTGCACCTAATGTAAGATGTCTTACAATTTTTTTCACTAAGACTCCA -ACTGACCTTTCACAGTATTGCGGCGAATTCGATAATTGGGGCGCGATCATGTCAGTCTCC -GGCGAACTTCTTTGCAGCTTTGAACTTCTCAAACCCTTGGACTCGACCGCCCTGAAGAAC -CACATCAAGAAGGGCCGAAACAAGCGCAATAGCATGCTGAACAGTCCCGTAAGGAACCCC -CCTTTGTTATTGGATTTCCTCCCGTGTGATAGTTGTTGTATCGTGCAACACTGCCGAGCA -TTATTCTTCCAAATTACGCAGATAGTGCATTCTGTATCTCCCTGAACTTCGCTAACCAAT -CCCAGCCTGCGGCGGTATCGGCACAAAGTTATTAGTCTCCACTGATTGCCTTACATTCGA -AATTGATATCATCTCCTGCGATTGTCATACGCTGTCATGATTTCGATTACTCTGACAGCC -ATTGACCGACAATGAACACGACTTTCACCAAACTAGCAGCGTGGTAGCCTATCCCTTGGT -TCATAATGGCTGATCACTGAAACCGTTCTGCCGCCACGGCTTTCGCGCAAACCAACTTCC -GGACCGAACACCCCCGGTATGGATGCCCAAAATGAAGCTTGGCGCAGATTTGAACCGGAT -TCATCTGACCGGTCTGCGCATCGGAGCCCTCTAGGCACCATAAAATGCGCAGACCCGCCG -ATTCCAAATTTGCAGACTCAATGGAAAGACATGCCCGGGCAAAGGCACGTGGAATCTCTA -CAGGCAACTTTAGGGTCGCCTAATTAGGTTCCAGCTCATAATGCAGACGCGTCCAGACCC -ACGGCAGTTTTCTTCGGCCTCTTCAACAATCATAGCTATGTATCTTTACATAACTAGGTT -ACTAGGCCTCTCGCTTCTGGTGTGGCATTCTCTTGCATGTACTTACATTATCTCTACTCT -TTCATCCCAAACAAATGGTATTTCTTGGGCAATCATTTGGCCTTCTTCCTTTTTTCCACA -TATCTAGGATCATAGACCCGCCGTGTTTATTGAAAATGCGTAACATCTTTAGTTTCTGTG -ATTTGCACAAGATGTCGTCCACTGATCGTCTAGCGAAGTTGTGTATTTGGTTAGACCTTC -GTGTGGATTAACAAAACCCACACCGCTACTTGATGCTTTCGTTACTTGACCAGAAACCAC -CCAAATAGACAATGAAGCAGCTTGTTGATTCTCAGTCGCAGGTTTTGTACAACCGACAGT -TGTATGTCTAGGCCTCCCCCGGGACCACCTTCCGACAAGGTTCGGCAGCTCACCCGCCCT -GGTTGAACATTGAACGCCTTGATGTGGAAACCATTCGCAACAATTGGACATGGAAAGTCT -TACTATCTAGTAACTCACGTAGGTGGACTGCTTTGTATGTCTACTAATATGGATGCCGAG -CTCTAGAACTTCTTCATAAACAGACCAGTTTGTGACAGACCATTGATAAAGGCCAAATAA -ACAACAACAGATATGTGTACCTCAATCAAATTTCCTTACTACTTTCCTGCAGCTGGCGCA -ACCCCAGCCGAACAAGCAACAGATTGAACCTGAACTCTAACTCCCGCCGCAAAAGGGTTC -GGATCCGTATTCTGATCCAAATTCCCCAAAGCATGATTGACGTCGCGATGTTTCGCCTCA -TCAGCACGTACATATAACAACAGTGAATGTATACAGCGTTGGCCCTCCGGCATCTGCCAG -TACTTGATTGCCATCTCTGGAGCCTGCAAGCTCTCCCATCCAGGCAGATGACCATTCTCG -AGGTCCTGGATTGCCTTTGAGTATGTGATCACGGCTTCTTCTTCTAGATATCCCACGAAA -CGATGACATATTTGAGGCGAAATGAGATATGCGAGGGAAAATCCGCTGAAGAACACCCAC -TGCGCGCCGAGGACCATGAACCGCATTGTTGAGCTTGGCTCGGCGAGCTTGAGGAAGGTT -AGGAGATGCATTCGTTCGTTGTAGGCTTCTTCGAGGAGAGTCTCTATCCTATATTTTGTG -AGACGATGATCCTCTTTTGTGTATATGAAGATGGTTGGTATCAACTCACCACCCATTGTC -TCTCCTCATTCGGCGGATGCTTTTGAGATGGCGGAGCATCCCGGCCACCATCCCTGGAAC -GCCGGCTACGCTTTCCAGAAATATGAATCTCGTGATCCATTTCTCCTCGGACATGAGGTA -GGGGTTGGAAAGTGTGCTTGTGTTTGTTGGTCGAGCGTGGTATCCGGAGACAAAATCCAT -ACCCCAGCGCAATAGACGTACGGTTCCAAGAGCTGCTCTATCTTGCCAGGTATATGCTTT -GTGATGAGCGGCTCGGACAGCGAGGATCTGTTCTTTTGTGTATCGAACAGGAATGTGGTA -TATGTCAGATACAAGTCTTCCTGGTGTGAGGGACAATCAAGGAAAGGTTCCCTCCATCGA -GGCAACTCGATAGAACATACATAGGATGAGTCCATGACGAGTTTTGGCCAACCTGTCTGC -TTGTTGTATGATTTTGCTTGATCAGTACCGAGGTAGAGAGGCTGCTATATCTGCTTGGGC -GAGCATATAGTCTAGTACCTGTCCTAAACATGGCGCTGTGCTGAAGACGCGTGGACATTG -GGATAAGACGAGTTGTACTTATTTGAACATTCATGAACAGCGACGAAAAAGGCAGAAGAA -TGAGAAAACTATATGAACTGGGATGGTTTGTTCTGGAATTCAGTAGTCAAACTCCAACAT -GCCTTGCCTTTTATGCCATGGATGTTCTGTTTCCTACAATGCAGAACGCAGACTATGACT -CCAATGTTGTGAAGGATCATGTATTCTACAATATACAGTGTATACTAGATAATTGCATTA -CCAGCTCCACGAGATTATACTAAAGATGGCTACTGCTGGGAGAAGATCATGTTTTGTTGA -ACCTTGATCCTCCCGATTTCATCCAACAGAGAGAAGACCTCGGGTGAAGAAGTTCTAGAT -GAATTTTGCGTGACTCATTGAAGATTTAATTTTACCCAAGAGTGTATTTCATTGAAAAGA -GATAACAATAAAATATGAGTTGTGGACTAGGAGTCTTACGTTTGCCTAGTTTACCATGCG -TAGACTGTCATCTCATGATGTTAGACTCCACATGAAAGTTGGAAGAGAGTGATGAAGAAC -GAATCAATAACTACATGATTATATGTGTTGAAAAAATGAATAATATCCTCCTTCTGACAA -GTGAATGATTGAATAGGACTCGGGGGTGGTTGGACACTCCTTTGTCCAAATAGAATTTCC -CAAATAAGAGGATATGGTTTATATCAGAGAGCTATTCCCTATGCACAAAAGAGATGACTA -CTATTATTATATCCTTTGCGAACCTCGGCGGTGTCTTAACATGTCTGCTATTGAACAGCA -GACTTTCGAAAAGCTCGAAAACTGCTAGCGATCTGTAAGTAAATTGCGGTTGTTAAAGCG -AGAAAATATGTGCTTTAGTCATATGCTATATCCTAATTCTATTCTACAGCCATTTGCCCT -GTATATCTCGTGAGTAGCAATTGTCTATCCTTCCATGAGATATGGCATATGATCTCAGAT -GGTACATAATCCTTTCTGTCATGCCGAAAAGATATCGACTACCCACTCAGTGATATGAGA -TAGTATGCAAAAATCATATTCTCCAGTCTATCTTCGTGTCACTTGATGAAACCCAAAGAG -GCGCTCGGTAGGCATACCGTTGATAACCTGGGCAAGCAACCCCGCCACCGGCGAGTCTCC -CGTACAGACACCACAAGAACAGATTATCTGGATTGTCAATAGGACCAATCCTCAAATAGG -CTCTTCGGGTTCCATTACTCTTTATGGAGAGATTGAACAGCAGCGATAACCACCCCGCAT -GGTGGGTTTCATCAGTTATCCAGGGCACCGGGCGCCTGTCCGAGATGCCATCTGGCTCTG -TACGAGGGGAACAGCATTGGAAGATAAAGAAGCTGGTCCCTTCGCCTAAGTTGGCATTAA -CAGAAGTTCAAATTCCTAACATCCTTCCAAAGCAATGGGTACCTTACTTCCTGGAAAGCC -TTTAACCACTCTGGCGCGATGTACTCGACCAGCGGGCAATATGAGTGCCCCAACGGCACT -GCGATTTAATTCAACGTTGCAAGCACACGCACAAGTCAAGGCTTCTGCGAGCCCCTCACT -ACCACCTCTCTCTGTTTTACCGGCACGTGTCTTGCTCAGGTCTTTGCTAGTCTCAACGAT -TTCGTCGAAGAGATTCCTTCTCATTCCCGCACTCTCGTTGTTGTCCTTCCTCTCCAAGCC -TAACAGGATCTGGCTATTTGATGTGGATCGCAATCCGGTTCTACATGGGATATTAAAGAA -GACATTCTACAATCAATTCTGTGCTGGAGAGAATGGGGCAGAATGTAGATCCACCATCCG -GGAAATGAAAGATATGGGGTTCCGCGGTATGATCTTGACATACGCTGCTGAAACTGTGTT -TGATCATGGTACTCAGGCTCAGCATGGACAGGGAGTTGCTGCTTTGGAAAGTGAGCACGG -GGATGTGTCCATCGATGCCCCAGTCTACCAATGTCCTAGCATTGAAGCTTGGCGGGAGGG -GACAGTTGAGACCATCTATATGACGGAGGCAGAAGATTACCTCGCAGTCAAGTAAGTCAC -CCTGAGCCCACTCTTGCATAAAATCATGACTGACCATTCGTAAGACTGACAGGTGCTGGC -GTCAAAGTCACCGAGGCTTTTGCCGCTGGAGAGTTGCCTCCGCAGCAAATGATGGATGCA -CTGCATGAGGTCTGCACAAAGGCCAAGGACAGAAAGGTTCGCATCCTTGTCGATGCCGAG -TCTCAGCATTTCCAAAAGGGCATTGCGCGGGTTGCTGTCGAGCTTATGCGCGAATACAAT -CGCGATGGTTATGCAACCATCTACAATACCTACCAGGCTTACCTCAAAAACACACCAGTG -ACACTGGCCGACCATCTCGCTGTGGCAAAAGAAGATGGATTCACTCTCGGTCTCAAGCTC -GTGAGAGGCGCATACATGGCCACAGATGAGCGGTCCCTAATCCATGACACCAAGGAAGAT -ACCGACAACGCATATAATATGATTGCCCAGGGAGCATTGAGAAAGAACATCGGAGAATTT -GGAGATCAGGGCAGCCGTGGCTTCCCGTCGGTGAATCTCTTCTTGGCAAGTCATAACAAG -GAGAGCGTAGTGGCTGCCCATCAGCTCCATAAGCATCGTGTCATGTCTGGTTTGCCGACT -GTGCCTGTGCGCTTTGCCCAACTCCATGGTATGTCGGATGAGGTGAGCTTTTCGCTGCTC -CAAATGAATGATGGCGATGGAACACCAGAAGTTTACAAATGCTCCACTTGGGGAGGTCTG -GGGGAATGTCTAGCCTATCTTCTTCGCCGGGCAATTGAGAATCGTGATGCTGTTCTGCGG -ACGGATAGTGAGTACCGGGCGTTGAAGACGGAGCTTTTCCGGAGAGTGAAGTCAGTGTTT -TCTCTTTCGCCATCTCCATAGCGTACGAAATGATATATGACTTTATGACCATGCATGACC -TGGTTGTGTATGTCTTCCGGAGTAATGACAGCATTAATGTCGGTTTAGGTAAAGATCTGC -ATCTGGTCGGTGCTCTCGGGGACAATTATCTTCCTAGGCATCCAAGTTCGGAGACTTTCT -GGATACAGGAAGCAGTGAGAATCTAAGGAGTTTGCCTGATGATCATTGCTTTTTCTTCTT -ATTTTAGACAACATATCGCCTTGAAGCTGACCGCAACGATGACGTGATGGTGTTCAAGAG -GTCAAGAATTGTCCGTATGTTTCACAGAGCCCTTATCAAGAGTGTTAGAAAGCTGAGGCA -GGCAGCTTGCGTCATATAGATTGTTGACAACAGCCAGAGGTGGGAAAAGTGATGCAATAG -ATAAGAGATACTCTGTGAGTCGGAAGGGGGTATACTCTTCACCGCCATACTCGCAGGTTC -ACAGCCCACAGCCCTCTCTCATCCCAACATAAACGTACCGTAAGGGCAAAGAGGGGAAGT -GGATCGACTTTCATCGTGATCAGATCAAGATATACGTCCAATAGATGGATCGGAAGGTAA -TCACCGAAGTTATTTCGAGGTAGCCTATTTGCCGGATGCCGACAGCCCCGGGACTAGATA -AAGCTATCACGCCTAGCGCCGGAGTGTACCAGACTTGTAGGATGTTTAAGCTCCACTGGT -CATCGAATCCGGGGGCCTGGTTGGGCCGTTCGGTGAACCTCCGAAGCTTGGTAAGCTTGT -CTTTGTGTACCTAACCTTCCAAGGTTAGCCCCATAGGTACATACGGTACATACGGAGAAC -CTTCAGTCCGGGGCCCCTCGCTTTAGGATTTGGTAAGTGGGGGAAGGAGGTCTTACATTG -GGGTAACTTGGGAAGTTTTAAATTGCAAGATTCTTTCCCTCATGTTCTTGTGCGAGAAGT -GATAAAAGCTGATCCGAAATCCTTTCAATAACTATACTAGAATCCTTCAGAGTGAACCCC -ATGGCCCCCCTCAAAGGCAAGTCATTGAGCCTCGTCATCGGTCTCGTTGGCGCTGTCGGG -TTTGTACTACAGGGTTACGATCAGGCAGTTGCCAATGGGTTGTTGACATTGGGCTCTTTC -ATCGCCGTCTTTCCTCAAATCGATACCGTAAATACCAGCGGTAGTGAGAGGGCCCACAAC -TCTAAAATCCAAGGTATTTACTCTAATGAGTTGGTTCTTACAGGTGAACACGAACAATGC -TGACCATTACAAGGCACAACGGTCGCTATCTATGAGGTCGGATGTGCCCTAGGTGCATCT -TCTTGTGCCTTTCTAGGTGACAGACTTGGTCGTCGCAAGACCATATTTCTTGCTGGTTGC -ATAGCCTTAGTTGGCATAGCAATTCAAGCAAGTCCCTTCTCGCTCGCCCAACTTATCGCT -GGCAGAGTTGTCACTGGTAAGCTCCCAACATAATGTTAAACAAATACAATAAACTAACTA -GTGACTGAATAGGCCTCGGCGTCGGTGGCTTCACTGCTACAATCCCAATGTACGTCTCCG -AGTCATCTGGAGCAGAGGCCCGTGGACGAATGGTGCTGCTGGAGGGATGGTTTGCAATCG -GCGGCATTGTTCTCGCAACTTGGTTAGAATTCGGACTCTACTATGTCAGCGACAACTCTG -TGAGCTGGCGATTTCCTATTGCATTTCAAGGCCTCTTTGGTCTCGTTGTTGTGGTATGTA -TCATGCTACTACCCGAATCTCCCCGTTGGCTCGCACGCGTCGGACGTTTCGATGAGGCAG -CTGAAGTGCTGGCCCGCATGGAAAATGTGCCAGTCGACTCAGAGCATGTCCTACAGGAGC -TGGAAATTATTCGGCAGTCACTTGCAATTGACGAGAACACTGAATCGGCGGGTTCCTCTT -CGCCATTTGCGCCCACGAAGAACCGCCACCTGCATCGCACAGTAATTGCTGTTGGTGTGA -ACATTCTGGCGCAGATGACCGGTGTCAACATAATAACTTTCTACTCCGATACTATTTTCG -AGTCTGACCTGGGTTACTCAGGCACTCTCTCACGAATAATTACCGGATGTCTGCAAATAT -GGCAGTTTTTGGCTGCCGGCCTGGCTGTTCTACTCATTGATCGTCTCGGACGTCGTCCAC -TGCTTATTACTGCCACTGCCGGTATGACCATTGCCCAGGCCTGTCTTGCCGGGTTATCAA -GCGACCTTGAAAACAGGTCCGCAGCTGGCGCATCACTCTTGTTCTATTTCGTAGCGCTTT -TCTGCTTCCCCATTGGACTTTTCCTTGTTCCATTTATGTACGCCGCAGAGATTGCACCGC -TACGTACTCGTGCTAAGGTGACAGCCATGTCAGCTGCAGCAAATTGGCTCTTCAACTTTG -TGTTGGCAGAGGTGAGCCCTGTCGGATTCGCTACTATCCACTGGCGGTATTACATTGTCT -ACACGTGCATTAGCGCCTTTGCTTGTGTATCCTTCTACTTCTTCTGCCCTGAGACTAGGG -GCCGTACACTAGAGGAGATTGATGATATTTTTGTTCAGTCGAAGTCAGTATTCGATACTG -TTCGCATCGCACGGGAGATGCCTTACCAGACGGAGATTCTGGCGCACATCAACGATACCG -GGAAAGGTGGAATGGAGTCTACGCAGGTGGAAAATGCTTAAAAAGAAAAGGAAGAAGCTT -GTGATGATGTGATACTAATCGGGGTTGTCTTGATGGAAGCTTATGATAGATTCTAGATTT -ACGTCTGTATATAGTTGCGTTGAGTCTATTGATTCTAGCGATTTGTACAGGGTATGAATA -TATAGTTAGACAAGAAATCTATGTGTTCAACCTGAGATTTTGGTATGAAAGGCAGATCGC -AACTCCATGTTCGTCAAACACCCAATCCCAAACGCAGGAATGATACGAACACAGCGCAAC -AAAGTCGACGAGGGAAGATATTGCTGTGAATGCTTCACCCTTTCCAGGAATGGCATAGTA -TATTGCCGTGTATACATATCCAGTGCCAACAAGTGCAGCGGAGTAGCAACCAGCTGTGTC -AGGACGGGTACGGTCAACTGGGTGATGACAGGCTTTGCGTGGGGATGAGTGGCCAAGTTG -TCGGGGATGGCATCCGACAACCACGGGGCCAACGTAAATGACCCAAAGATAGTCACACCG -TCTCGTACGAGGAAGATAGCAGCTGCAGATCGTGCTAATGACTTGTTCTGGAGGGGTACA -GAGATAATGCTTGCTTTCGATGTCTTGGCATCGGGACCGTTACCGGTGCCGTATTCCTGC -GCAAATCGGATGTCCTTCCAAAGCGCTAGCGGCACGTTGGCCATCATAGTAGTCGCAAAT -GTAATTGTCCCCGCAGCGGCTATCTCAAGCTGGCGACTGACTGTGTCGGTCACATTTGCG -ACGGAATAGGTTGCCGCGTAGAGGGCCCAGACGATACCGAGGGGTCGTTGGAAGACAAAC -AGCCCCGGCTGTCGGAGGGCCCCAAGTGCATGCCGACGAAGTCCGCGGAGAATGGGCTGC -TTGAAAGCAGCTTGCTCGACAAGCGCTCTTAGAGCTTGTTAGCGTGGAATCAGCGCGCTT -GGAATCGAGACGAACCTATCAATAATTGCCACCGCGGGTGTCACCAGAGACGCCGAGGCT -GCTGCTGCGCCGAGATCGCCTGCCAGGTGACCTGGTTGAAAATTGCTCCAGCGAGTCTCA -TTTTTTTGAGCTTCTTGTTGCATATTGATAGCCTGAAGTACATAGTTCCGAGTCTCAAGC -TTATCTACTTTGAAAATATAGGATTACACTACTCAGCCCGGCCTGATAGGAACATCGGAT -GGCTTCCCGATGCTGCAGATCTTGAACAGCAGCTGTGCACTTAGTTTAATCTTGTCCGCT -GGAGCTCAAGCTCATAGCAAAGCATCCTGTGGGGTAAACGGGGAAGCCTCCGATACTTCT -AGGAGGGGTCGTTGACGTTAAAGTCGATCTATTCGTCGTCTCAAACTGATTCCTCAAGTT -TACTTTATCATTGACTCGACTCAAGATTACTATCTAAGTATTCATCATGTCTACTACACT -CATGAGACTTCGCTCAGCACGGCCTACTGGCTCTCGCCTGCGCTGTGTTCCTGCGGGACT -CTCTATTCAAACTCGTACTAAAACTACCATGCCATTCCGCCTCCCCGATGCTCGAAACGA -ACCAAACGTGAGTATGAAGCCATTATTTAACCCACAAGGCTAACTTTAGTATAGCCTCTA -TATAAAAAAGGCTCCCCGGAGCGTGCCAAGCTCGAAGAAGCATTGACCAAGCTCCGCTCC -CAGCTGCCCGTGCGGAGCGAGCTCTTCTATGATGGAAAATTCCAAGCGAGCTCCAAGTCA -TGGGATCAACCGTTGCCTGCCGAGCATGCCATCACCTTCACAAATTACCCACTTGCTACC -AACGAGCAGACCCGATCAGCTATTGAGTTAGCCCTGGAGGCAAAGCAAAGCTGGCAAGAT -ACCCCATTTGTTGACCGAGCTTCGATCTTCCTCAAGGCTGCCGAGCTGCTATGCACCAAG -TACCGATATGAGATAATCGCAGCTACAATGCTGGGACAAGGGAAGAATATCTGGCAGGGA -GAAATTGATGCTGCGGCTGAATTGGCGGATTTCTTCCGCCTGAACTGCAACTGGGCGGCT -GAGATCTTGGAGAAGCAGCCAACTCGTGGGAGTGATGGTATGTGGAGGTTAGTATACATT -GGGATGCTAAGATTCTACGGAAAAACTCATTAACAAATTGCAGTCGTATCGACTACCGTC -CTCTCGAAGGCTTCGTTTACGCTGTCTCCCCATTCAACTTCACAGCTATTGGTGGAAACC -TAATCTGCGGACCTGCGCTGATGGGCAACGTGGTACTGTGGAAGCCTTCGGCATCTAATG -TCTACGCGAGCTCTTTGCTCTACAAGATTCTCTTGGAGGCTGGCCTCCCTCCCAATGTGA -TCCAGTTTGTGACCGGTGATCCTGAAGCCATTACTGAGACTGTTCTCTCACACCGTGACT -TTGCCGGTCTGAACTTCATTGGCTCCTCCGATGTCTTCCGTTCTCTGTACGGGAAGATTG -GCCAGGGTGTTGCCGCGAAGACATACCGCGAGTTCCCTCGTTTCGTTGCTGAGACCAGCG -GAAAGAATTTCCACCTCGTTCACCCCTCAGCCGATATTACTAGTGCAGTCAACCACACCA -TTCGTGGCTCATTCGAGTACCAGGGTCAGAAATGCTCAGCCACATCTCGTCTCTACTTGC -CCGAGTCTCGTGCAGAGGAGTTCCTTACTCAGTTGAAGGCTGGAATCAAGGAGATCACCA -TCGGAAACCCGGACAAGGACTTGGAGGCTTTCATGGGCCCCGTGATCCACCGTGGATCGT -TCGAGAAGATCAAGTCCATCATTGATGCTAGCAAAGAGGACCCCTCGCTGAAGCTGATTG -CGGGTGGTACCTATGATGAATCGGTCGGGTACTACATTCACCCTACTGTGTATCAAGCCA -ATTCACCAGACCACCGTCTCTTCAACGAGGAGATCTTCGGCCCCGTGCTTGCCGTGCACA -TCTACAAGGATGCCGATTGGGCTTCTACTTTGAAGTCTGTTGATCAGAATGGAGGTGGAT -TTGCCTTGACTGGTGCTGTCTTTGCCAAGGATCGCGTTGCTATCCGCCAGGCTGAAGATG -CACTCCGGTACTCTGCTGGCAACTTTTATATTAGTAAGTTTCCCAACACTCAAGTGAAGT -ACTGCCATATCTAACCGTTTGATACAGATTGCAAGACAACCGCTGCCCTTATCGGTCAGC -AATCCTTTGGTGGTGCGCGTGCTAGTGGAACAAACGATAAGGCTGGTAGCTCTGACCCTC -TTCGTCGTTTCGTTAGCCCGCGCTTGATTAAGGAGGAATTCTTTGGTCAGGACGAGGGAT -TCACCTACCCCAGCAACCACTAATCTTGGTATATAAAAGGAATAGTCTATAGAATTTGAA -CGCAATATGTACTTCCAATCCAACCTTACTGTTTTTTGTTTCTGTTAAGATGCAGGATCC -CGTTTTTATGCCCAATGCACAAGAGATTACAGCGCCTATACTCTCAATATCTCATTGTCA -ACTGGCCTCGATATGCGGGTTATACCCGAATCATTGCTGACCGCCATGACCCCATAGCCT -CCCGGATTTACCTGTGTGTTGCACCAAGATTGTCCTATATGATTCTTGTGTCTTCCAAAA -TTCTGGATCTAAAGATCTGATCTTGTACCCTATCCTCCTTCTAAATTACTTCCCCAAATT -GTATCTGTCCGTTTCGATTACTTCAGTCTTGACCTAATTGCCTGTAGGATTCCACGGTGA -TTCAATACATCTCAGAAGTCTTCAATATCATCAACGTGGTTTACCTTAATTGGACTGTTG -CGCAACTAAAACCTATTTCTTTATGTTTTGCTTGTACTAATCAAGTAACTCGATTGAAGT -AATATAGCGCTAGCGCTGAATAGCACAGATCAAGGTATCTGTTCATAGCCCCGGTGGACT -ATATACTGGTCTGACATGACTCTCAAATGTAACTAGTTTACAAACGACTTAATGCTTTGA -ACCGTATCATATCTTATCTTATTTCAGTCATATTACCGCAAATCCATGCTATCAAAAATC -TTTCTAGGTACATCGGGAGGCCTCCCGATGTCCCACTACTCCGCAGCCCCCGCACTAGAT -AATCGGCATTGACGCCGTTGAGTCCGATGGATTGAGCTGAGAATCGACTATAGATATTAA -ACTATCGCTCCTAGTAGAAGCTTCCCATTGCATTCCACCATGGGCATCTCCAAGTCTGAT -CCTATCATCATTGTTGGAGGCGGAGCCTTCGGGCTCTCTAGCGCCCTTCATTTGACTCGC -TCTGGTTTTACAAATATTTCTGTGTTCGAAAGAGACGAGCATATTCCTCCGCGTTATTCG -GCCGCCAATGACTTGAACAAGATCGTGCGCGCAGAATACGAGGATCCGTTCTATACCGAT -TTGACAATTGTGAGTCACTATCTTCGATGCAACTATAGTAACCGGACTAAGTCTAATGCC -TCGTTTTATCTATTGATTAGAAAGCCATCGCCGCATGGAAAACTCCTCTCTTCGCACCTC -ATTTCCATCAAACTGGATTTCTCCACTGCGTGTCTGGCGAAGCACCCCAGAAAGCCATTG -ACACCCTCCATAGATTCCGGGCTTCGGCAGAAGGGAACAACCAAATCAAGCCACATGTGG -TACCCCTTAATGGCGAAGAAGATATTCGTCAAGCCTGCTGGCAGCTCGACGGTCCATTGA -CCGGATGGAATGGATACCTCAACCGCTTCGACGGGTATGCACACTCCGGAAATGCCTTGG -CAGGAGTATACCGCGCCGCACAGGCAGCCGGCGTGCGCTTCTACCTTGGCGAACGCGGCG -CAGTTGATGAAATTGTCTACGTCAGCACACTGCAAGGCAGGAAGAGCTCTGGAATCCGAA -CGAAGGATGGGAAATTCCACCCATCTTCGTTGGTGATTGTTGCAGCGGGAGGTGCAGTCG -GCCGTCTGGTCCCGGAGATTGGAAAGAAGGTCGTGGCCAAGTCGTGGTCTGTTGCGCATG -TTCATCTTACGGATGACGAGACGTCTGCGCTGCGTGGTATCCCTGTCACCTATGCTCGCG -ACCTGGGATTCCTCTTTGAGCCGGATCCGCAGACCAACCTGTTGAAGATTTGTCCTATGG -GCGGAGGTTATATCAATACGGACCCTGAGACTGGGGTTTCTCATGCGCCTGGGACTTTGG -GGGAAAGTGCGTTTGTGCCGGAGCATGATGAGAGACAAATGAGGAAGCTACTGGCGCAGA -CTCTCCCGGCGCTTGCAAATCGCCCGTTGGTGAAGAAATCGCTTTGCTGGTTTGCTGATA -CTGATGACTCGGACTTTATCATTGATTTTGTTCCGGAGACGTCGTCGTCGGTTGTTTTGT -TGTCTGGTGATTCTGGGCACGGGTTCAAGATGTTCCCTATCTTTGGATCGTGGGTTTCGG -ATTTCCTGCAGACTGGTCGGCAGCGTGAAGCGCGGTGGCAATGGAAATATACCGATCCCA -ATGAGGGCAGGGAGAATTGGGGTGGTGATGTAAGCTGGCGTCTTGGAGAGTCGAAGGAGC -TCTCTGAGATCCGGCCTGCTGTATTGTCTAAGCTTTAGATCTATGAGGCATAAGAATCGT -ACATATATCGATTTTTGTGACACTTCCAAGTTGAAGCGAACGTATAATTAGCCATTCGAC -CGATAGATCAACTTAATAAATTTAAGCACACCATATTCTCACTGCAAATATCAGTAGATA -CCAACTTAGAACTCATCAATTCATCAGACATTCAATTTACCATATACTATGTTCTATCAT -CGCATTTGATACCCACTCAGTATCATAGCTCTCAATCGAGTCTGCAACAGCCATAATTTC -AGCCGAAGTCAGCATCGTACTAAAATCACACTCCACATCAAAATCAGGATCCGGCCTGAG -AAGAGCATCATCATATCGAACAGACTGAGGTATAGCCCCAGCAACCGGTTTTGTAGATGG -GGAAGCTGGGTCCGGAAGAATACCAGCCTGCTGAAGAAAATCAGGGACCGCTAAGTGCCG -TGGCTGATCACCAGAGATACATCCCAATAATGTCTCGTGTAGCTGCTGGAGCTCGGATCG -CCGGAAGCTAGCAACTTGGTTTCCGTCTCTGATCATTTCGTCGAATATGGCGTACGCTTT -CTCCAGCCAGTTGGGGTGGCTTTCTAACAAGTGCGGGTCGATTGATGGGCCCATCAGGAG -GATAATTGTGGACACAAAGACCGATTCTAGATCAAAGGGAAGGAAGGTTTCTAGCAGGTG -GATGTTAGTGGAGAGGGCCCATCCATCACAAAGGTTGCTATACCTAAGAGGCCTTGGCTC -TGTAAATTGGAGAGAATGCTGATTATGTGCTGGGCGGATTCAAGACACATCTGCATCAGG -TTTCGGACATTTCGTGATGCATTGAGTGATTCTAAACAGCTTTCCGGGGACTCAAAACGA -ATTTTGAGAAAGCAGAATAGGAGAGGTCGGGTGGCCAGGATGATGCACTGAATGTTGAGT -GAGTAGTCAGCCAACGCCTCTCTTCCCACGAATTATAAGACTCGCACCTGATGATATTGA -AGATGTAAGTAGGCTGATGTTCGAGAAACGCCCTTGACCGGGTCTAAGTTTAGCGGAAAT -GACTCGCGAAGCTCATCGGCAAGTCCTGCGATGTTCGCCAATGCTGATTTAGTGCTAAGT -AGAAAAGTCCGGTTGATACGTCCATTGGCAACGTATATTGCTGCTCATCTTGTTAGAGTG -GGTATACAATGGAAGCTCGGGGGATCTTACTGCTGTTCACCTCGGCGATGATCTGGGACA -ACTTGATGTGCATGCCTAATGACATTGTTTCGGATGGGTCTGGAAAGGTGGGGAGCTGGG -TTTGCACATATCGGTCATTGATAGACTGAGGTAGACCCATTAGCGATGTCATTTCTCGGT -CCAGAATATAGATGGTCCACCAGATTTTGCCAGATCTTTGGACCATGTCATGTCCTAAGT -CTTCGATTGGCATATTAGTGTGCATTCCTTGTGCCATGGCTATCCGCATGGCTTGGCCGA -TCTGGATTGTTGATTAGCCAAGGATTTCATATGAACCCTTTATGATATCTTACGTAGTTA -TGTGCCGACGTTCGAAAGTCAACACACTGATAGTATAAGGCGATGCAACATAGGATTTCT -GTTCCCAGCATAGGATCCCGGTACAGAGCAGTCGGATCTGGCAGAAGCTGCAAAGCTTTG -ACAAAATAGTGCACCCCTGGTGGTTTTCTACCTTGAGCCTTAGGCTGGACGAATCCCTTC -CCAAAAGCCAAGATGAGGAGGAAATGGATGTACCACAAGCTGTTTGTCATGGATTGCCCA -TCTCCCGAGTAAAACCGCTGTAAGGAGCTCATGAACTCCTCTTCTTCGAAAAGATGATAC -AATTGCCCGCATCGGAACTGCACGGCGTTTATCAAATACATGGTGTGGTCGCGAGTCGGG -ACAACTGGGACATCTAAGCTCGGGGTTGTTCGCAGCCCATCCCATCCCAGTTCATAGGTC -GTTTCATCAAATAGCAAAGTTTCTGTGGGTAGTGCATCCTGATAGAGGTGCTGATGTGCC -AGGCTGAGAACTCGCCGAGTGAATGACCAGTTGGATGATGTTCCAAGATAGACTGTAGAT -GTTAGAGTTGCGCATTCTGCGAAACCAGTGTGACCTACATGTGCGACCGTTTGCTGCAGA -CATAAACGCAGGGGGACCAGACGATAACGGGTTGGCTAGACCCGAGTCTAGATCCTCTAG -GTCCTGGTGTTCATCATGATCCTCTGGAGTCCTAGTCCTTTCCAAAGTGGGTAGATGTTC -GGGATCTTTGGGATCTGCATTAGATTAGCAGGAGTAGGTAACAATCTAAGCTGTAATAAG -GACATACCAATGGGCTGAGGCTCGAAATTCGAGCTGTAAGGGCTCGCTTGGCCCTTCTCG -TTCTGCTCTATACGCGCAATCTTCTGCTGCAGTTCCAGAATGTAGCTGCGATAAATGTCA -GCTATCCATACTATTTCTTAAGACTCCATCTGACCTACCCTCTCGTCACCAGGATCTTTT -GATCGCGGTCGTTAAAAATGCAAGCAAGCTTTCTAACGAGGTTAGTCGGGTATCTCAATG -GCGGCATAGCATAGAATAAAGCCCTAACGCACCGTTTACTACAACCATCGCATGGCTGCG -AACCGGAACATTTTATTTTTTGTCTCCGGCATCGTACACAGGCATTTGATGATCGTTTTG -TGACTCTTCCATCACCCATTGTTGAGAGATGTTCGAAATCAAGATAGAAATGTGGGGGGC -GAAAAAGTTCCCCCAAAGGTCGTGAGGGGGAAATATCAGAAAAACGAATCAGACAGCTTT -TGATGAGAGATGTGGATACAACATCTTATCTTAGGATTTACGGAGAGATTGAAAGATACG -GCTTCCCTTCCGAGGTTGAACTTATATGTTTTTCATTCTGCTTTCAACGCTGCCCAAGCA -ACTTTCGATTCGTCTCCCAAAATTGTCTACATGCGGTGTGACCCCAGTGCTCAGTCCACC -ACTCTCCAAAGCTATCTGGAACCCTCTGCTCGTCAACCCGTAAAACCCGCTCATCTCTTT -TAAGCGAACAGAGTGCACATGGATGAGAAGCTTTCAACGCACCCTGGTCCGCCCGCGCCG -TAATAACTTTGAGCGCACTAGGATCTGAGACCGTAGGGATTGGCCGCAGGCTTGCGGCTT -GTAGAACAGCATTAAAGCTTGCCGCCCGTTCACGCCAAGTAGTAAGCACGTCAACGGCAT -CTTGTTCTTCTTTGATATTTTCTTGTTTGTTCACGCTGTTAAGTTTCATACCTCCAGCTT -TGCCGGAACGGGCAGGGCCGATTCGCATACTTTGACTAAGGATTGTATCGCGTTTCCACC -GGAGACTTCGCCCTGCGATTACACGGGCGGCCGATTGAAGGACATTGTGAATATGTAGGG -CTGTTGTTGAAATGGTGTTTGGGGATGAGGACGGAGCCGAGGATCGCAGCTGGGATTTTG -CGAGTGCAGCCTCTTTCTGAAGGTTCTCGAACACATCTAAGAATAACTCGAGAAGGACTG -ATGGCGGTGGAATATTTGTTGGGCGGACAGTGGCCGTGGTTCCAGAAACAGAGCTACTAA -GGGCTGGGCTAGGAGCTGTTGGTGGCGAGGTGGGTTGGGATTTGAGCGCTGGCTTCACAC -TGGGTCCATCTGAGAAATCACCCCAGTCGTCAAAGACTTCGTCGTCCCAGGCTGATGCGG -GCTGCGAATTGGATTTGACAGGCCTTATGGGTGTTTTTGCTTTCTTCTTCGAGACCTTGG -CCGACGGCTGGCTAGCAGGCACATCGGCGAAATCGCCCCAGTCACCAAATGAATCATCCT -CCCATGTCGAGTGGTCTTGATTATTAGGTTCAAGGTGTTGGTTCTTGGCAATCTTAGATG -GATGTTTCACAGTTGATGCGGAGTCTGGCATTGAGAGTGAATCTAGCAAGTCAAATGTTC -CAGAGACCTGTGGATTTTCTCGCCGAGGCTCTGGTTTTAGAGATGTAACTGTGTCTGATT -GCCGGAGAGCTATCGGTTGCGCATGTAAAGTGTTTGAATTAGGGCCCTCAAATTCACCCC -AGTCGTCGTCTGCATCACTTGCTGGTGTATCAAATTCTGCATCGAATAATACATCACTAT -GTGGATTCAAGGGAAGGTCAAGCTCCTGATGATCAGGTCTCTGCCACGAAGATGTCGGAG -TTGTAAATTGATTTTGTGCTTCTTGTGTCGGATTGGGACGCTTGTTGTGCTGCACACTTC -CCGGTGACCCGAAGAAACCGATGCTATCGTCGGCATCGAAGAATGAGTGCTTCTGGTGCT -GCAATGGCTGCTGTTTGGATCTATTGTCTGGAGCAGACGCCTGACCAAACTCTGCGAGCA -GATCTGCGGACATTTTGAATCTCCACGGGTGTCATGAAGCTGTGAGAGGTGACATAAAGT -GAAGCGCTGCAGGAATGCAAAGATTTTCTGCAATGGCGGCGACACAAGATTGAATAAATC -GACGTCAACCGTAATCTATATCACTTGTGATTTGCGAACCTCTCAATCCGAGATCAACCG -GAGTATATAACGGTGGAATGTCTGAATGGCCGAGTCACAGTAGCGCGAGATAAGGCACGG -GCGGTCTACTATCGCCCGCATTCCCCCTCATCTTCCAAGGTTCACCTCGTCATCAACAAT -GAGAACCATTGAGGAGACCAGGTATTTCCACTCCTATCAATTATCTGCATATATTCATTG -ATATTGACCCTCACAGAAAGAGATGGAACGTACTGTTCTCCGACAACGACACACCTTCCG -ATCTCCGAGCGGCGTTGCAATCTGAGCAAGGAGGAACCCTGTGCAATGACGGACTGAGGT -CGGTTTGCTGGAAGGTTCTGTAGCCTTGGAAATGTGACGTAGAATACATACTCATACTTC -ATGCAGGCATTTCTACTCTTCGACGGGTTGGAAAAAACGGAATGGACGTCTAAGCTCGAT -GAATCCCGGGATGCCTATCGCGCGTTGCGAGATCATTTCTTGAAATACATCGAACACCCA -GACGATTTAGAGTCGACTGTTGATCCGCTGGCGGATGATGAGCAGGTATGCAACCCTCTC -GCTGTGCTCTGCCCGACTTGAAGAATTCCCCTATCTAACTAAATCTTGTTGGAACAGTCC -CCGTGGCAAACACTCCGACACGATGAGACGCTACGTACCGAGATTCTGCAAGATGTCGAT -CGATGCTTGCAGGAAAACTTCTTTTTTCAGGAGCCCGACACGAAATCCAAATTGACTGAT -ATCCTGTTTGTCTACTCGAAGCTCAATCCAGATGTCGGCTATAGACAAGGAATGCACGAA -TTACTCGCGCCTATCCTGTGGGCAGTAGATCGGGACTCGGTGAAGCCACACCCTGGAGGA -TTTGATGCGAAGAAGGACAAAGGTGAAGGCTTGATGCTAAAGCTTCTTGATGCACAGTTT -GTGGAACATGATTCATTTACGCTGTTTCTCTCTGTCATGCAGACAGCCCGCACATACTAT -GAACACGGCGAGACACGGTCAGCAAATGGCCAGATGGATGTCATTCCGATAGTCGACCGC -TGTCACTATCTCCATAAGGAAGCTTTGGCGATTATCGATCATGAGCTCGCTGAGCACCTG -GAAGCTGTGGATATATTACCGCAAATTTTCCTAACGTAAGCTGCTTCCAGATGGATACCC -ATTCATTTCCTTCCTTGCTGACTTCCCAAAAAGTCGCTGGATGCGCCTTCTGTTTGGGCG -AGAATTTCCATTTGACGATGTCCTGATGATGTGGGATCTGCTGTTTGCTCATGGACTACG -GTCTGATCTTGTTGATTTTACGTGCATTGCAATGCTGCTAAGAATTCGTTGGCAATGTTG -GTCGACCTGCTCCGTCCCCCATTGTATCTCATTTTGGCTGACATGCATCACTCAGTGCTC -ACGGCCGACTATACAGCTGCCTTAACATTATTACTTCGCTACCCTTCGCCCGAACCACAC -ACACCTCAAAGTTTCGTGCATGATGCACTCTATTTGGAACAGAATCCCACAGCTGATCGA -GGAAACTTCATTATTGCCAAATATTCAGGCAGACCGCCGGACTCCAAAATTCGCAACCAC -TCTGGTACACAGCCCGCGAGGAAGGCCTTCCTTTGGGAAGACTTCAAGAACCGCAGCGAA -AGAAGCCAACCTCCTGGATCACCAGCCCGAAATAGCCCCAAAAGCCTTGAGTCTCTTTTT -CAAGACGTCTCTCAGGGAATTCAACGCCGGACAGAGGCCTGGGGCGTAGCCAAAGCTGTT -CGAGGCGCCGTGACAGAAGCCAGGAAAAATATGCAGACAATGCATTATGAATCAAATATG -CGTCCTGGATCCTCGAGGCCTGTTTCCGCTGCATCAGCCTCAGCTGGACTGGAAACGAAG -ATCAATCTACTCGAGGAAAGGAACCAAGCTTTGGCAACGATTTTACGCGAGGCACTGGAC -GATCTTGGCTCTCAGCTAGCAAAGATCAAGGACCTCGATTCTGATACGAACAGTACAGTG -AAGCAGGCCCTAGTCAAAGCTGAGAGTGTCCGGGCCTGCCTCGGAGACTCTTCCATCCCA -GTAGATCCCCCTCTTGACTCCCGTGTAGACGGTGAATTGAGCCAACTGGACGAATTACAA -AATACGCGCACCTCGACAATGGACAAGACAGAAGGCAGCAAGGTAGCAAACCAAGGTCAA -TCACGAACAATAAGTCCCAAAGCGGATATAAGCGGGCCGAAAGCCGATGGTCGACCCGAT -TCTGTGCTAGCGACGGCAAGGCGAACGAACAGTGGCCGGAAACCCGAAGGGTCACGGGCA -ATCCCCTCGCGATCAACTATACGACCTTCTATGACGGATTCGGGATTCTCATGGATGCTT -GGCGGTGGGCGGAACTTGTCGGGTTTTGTCAGCTCGACATCGCCGCCGCCGGAGCAGACT -CGACATCTAGATCAAAATCGCGGGAAGCCCAATGCTTTCGGGTCGAGTGGAGATGACATC -TCAGGGACGGATTCTGGGCATGGTGAATTGGCGCTGCACAGCCTCCGGGGGTCTCGGGAC -CCTCTGTCAGGGGCCGGCCCATGATATCTCCCAATTTTGCCCATGTCACTGCTTGCGTAT -GCACTGGATATACACTTACGAGTCAAAGCTAGTCCCACTTATCTCCCAGGCATTCTCATT -TCCCAGAAAAGTATATCTACCAGAGGCATTTGACTAGTATTTCCATCTGCCTCAATACCT -GTTTGCCAACTTCATATAGAGTACATGTAAGTAACCAAAAGGGGTTAAGATCCCAGACGC -TCGGATCTATCGAGAAAGCCACTTTGCATCATTGCAAGACTTCTCGGGAAATAACCTGGC -CATAACATCCTGCCTTTGGAGCGGGGTCAGAAATTGCCGAGCAACGTATATGGAGTATTA -AAGGAAGTGGTTCTGTTTAAGATACTGTCTTGTATAGATCTCTAGCCCAACGCCTTCGGT -AAATTTCTCCGCCAGATGGGATAGAAACCTGTCGATCAGCCGATCTTAAGGTAAATCAGC -TATTCAAAGGGAAATTGTTCTACGTTCTTCTGAGGGGCAGATGAATATAAAAAAAGATAT -AGAAGCCATAGAATATAGAGTAATTGGGCTATTACATCACGAAGGGGGGACCCATTTCCC -TTTTTCTCGCAGGGCATTTTCCTCTCCCCACTCTCCCCCGCTGTCCTTCTCTTTTTCTTC -CCCCTTGAACCTTCTACCTCGAATTCACCTGCTTGTCTCTTCACTTTCTGTGTGTGCTCA -GTCGCGGCGCCACATATGCCGCCTGAGACCCTACGAGGCAGCTGCCCGCCCTACCCCCGT -CACAGTTGCGAATGATCGTTCCGCCCATACATGTCGCATACCCCGGATACATTCACGAGT -GCTCGCCTCGATTGAGCCTCGTCCTTTTCTTTGTTTCGCTGATCCTCTGAATTCCCaaaa -aaaaagaaaaaagataataaaaactgaaaagagaGGAGACCTCGGGGATGGGGTCGGCTT -ACAACCATGATGGTCAGTCATGTGCACATGCTTTTTTTATGTTGATCATCCAGTCTTTGT -TTCACATTCCAAGGGCCTTAGAAAACCAGTGGAGATGAGGAGGGGAAAACAGGTTATTGT -GCTATACCTTCCGATTGAGCTCTCGACTCATCGAGCACCCGAAAAGAAACGGGTGGGAGC -AACTAGGGCGCGACTTGACTTTCTTCCCAGAAATCACGGTCACGCGCTAGCACATGCTCG -GAGATGCATCTTCTGACCGCGGAGACGAACCGTCGGATCATTCGATAGCATCACATCTTC -CTGAAAGGCCCGTGGAATGGAAACCCCGAGACGCGCCGGTTGAGTTGATTATGAGCTAGG -TGAATCGCGCTGACCAAACTTAGGCGAGCCGAGTCCTCTTTCGTCGCCGTTAGGACCTCT -CTCTGAGTCCCCTGAATCCGAGTCCGGACTGGACACACTTTCTTTACAGACAGATCACCT -CAGAAGCTCGGCGGGCAGTGTTTCTTCGGGGTATGACCCTCTGTCCAGGTCAGCGACAAT -CTTCATACAATTCAACACAACGTTAACACTTCCGGTATTCCAACAGATCTCCATTATTCG -AGAATGACCATGGACTTGGTCCAACTGGGCTGGATCGTATACGCCAAAACCAGCCCTCGC -GGGTATTGACCCTTCCGAATATGTCAACCTCATCGCTATCGCTAGCTCCCATGGCTAATC -AGCGTCCCGCCCCCTCTCCTCGATCTCGCTCCTCATCTCGAGCTCACACCGGTCACTTGT -CGAGCCGAAGCTTCACAGATCTAGAGGACCTACATCGGTTTCCACTGGAGTCGCTACATT -CCTTTTCCTTTGCCCAACAGTCCGAGGAATTTCTCCACACCCGTCAAAACATTCTCAAGA -GGTCTATTGATTTTATGCGCGACCGGATGGGATGGGCCGCAAGTAATCCCGGAATCGCCA -ACGCACAAGCAAACGCCAGTGGGGACACGGAGATGCAAGGTATGATGGAGCTTTTGACCC -GAGCAAATGTCCTCGAAACCCAGGAAAACCAAATCCATGGTCGTGGACCCATCACAGGAC -CCGCAGAGATGAATCGCGACAACATCTTTGAAAAGGCCTTCTCGGATGGTAGTTCCTCCC -CAGTCAGCACCCGAGATGGAGCCCTGGATTCCACCCTCTCGGGGTCACACCCGTCTGACA -GCTCCCAACTGTTGAGCCCGATGCCCAGTGATCGTATCTCCAACCACAGGAGGGACCTGG -TCTCTGCACCTACGTCCAGGCGCGTAAGCTTAAAACGTACATACACGGACCTGTGCTCTG -TCTCTCTCAGGAGCAAGCTGATGGAAACTCTAGCACAACCATACACCTCTCCCACGGACC -CTTTCACCTCGCTTGGCTCCTCGACTGCTGGATTGGGATTCCAGACCCCCGTCCTCCATG -CGCATAGCAACAAATGGACCCCGGCTTCCCAGGCCGTGTTCAGGACCGAAGCCCAGGCAC -CATGGACCATTTTAGCTGCTAATGATCTTTCATGCCTCATCTTTGGTGTTCTCCAGTCCG -AAGTTCGCCGTCTCAGCATCCTGGAAGTTGTACAGAAGGAGCACCGGGAATGGCTCGAGG -CCAAACTCCGAGATCCAAGCACAGATGCCGCCGCCCGTATGCCACTTCAGCCCGAAAGGG -CGAACATATCCGCTGTCAATCCCAAGTTCCGAGGTCTTGGAAATGGTGTGACAGCACAAC -TTCTTAACAAACCATCTTCACGAGAAAAGTCCCGGAGAGCGCAGACTGATGACGGATATG -GCTCCAGCACGAGAAATTCCCGAAACAACAACCACCCAGCCCATAAGTCACGCGGAGTCT -TGCTGTGCGGTGATGTTGTCCCGATTCAGAAGCGCAACGGATCACGCGGATCTGCTAGCG -TCTGGGTTATGGAGAAACGTGGTGGCCTGATCTGGGTCCTTGAGGAGATCACAGAGAATA -TTGCCTCCATCCAGTGCGATGATTCATGGAACGTCGCGAGTGCCAAAGGTGAAGTTGAAA -AGATCTGGGGCCCGTCTGTCGTTCAAACTGGCCAGCCTATCACCGACCTTCTACCCTGTT -TGCCATCGGAATCTCTTGAGGCACCCCCCGGGAAGGGATTAGCTAAAATCGTGGAGCTGA -AGCACTTTGCCGCCCGTACTGCTGCTGGCGTTTGTATTCCTGTCGCTGTTGGTAAGGGTG -AGGGAGATCGCACTCTTCAAGTCTCCAGCTTTCCGCATGTCGCAGGCATGATGGTGCTCT -CGTCCTCGTCATTGAATGTGATCAGCTCCAACTCTGTGTTCTCTTCGGTCCTATTTGGTC -AAGAAAGACCAGAGGGCCTCCACTTTACTGAACTTGTACCCGATTTCGATGAGATACTAG -ACGTTTTGACTGAAGAGGATAATGTGCCACTGGTTGATGGTATTGTCATACCCGAGCACA -GCTTCCGACGGGCACGGACACTTTCCATACTCCGCGATGGAAAAGCAAACGCCGCCTCTG -TTTTTACAGAGCCAAGTGGCTTGCTTGCCAAGCATCGAGATGGCTCGACCATTGTTGTGG -ATATTCAACTGCGTGTGGTTAAGAGCGGTACCTTCTTCTCAAAGGAGAAGGCAGAGAGAA -GCAGCAGTCGCAACAGCGACTCTGATGACAGTGACGATACGATTGCAGTTACAGAGTTGG -TCTATGCGTTGTGGATAACCTACTCGCGACAGCTCCACGCTGCTGGACCCGCTGCTGGTC -TTTCGCCGTCATCTGTACCAAGTTCTAAGCCTACCTCGCCCACACATGACCCTGAATCGG -ACCGCCCGTTAAGTGCCGGCGCCGACACTCAGAGTGCAGAAAATCACCAAAATTCTGTGG -AGATTCAAAGCCCGACATCAACACTTAGTCAACAACTCAGTGAGGCCGCTTCTGAGCCTC -TTACCACCCGGCCCGTACAGCCCGTTCCTCAAATTTCGAGTGCCAATACGAAGAACGACC -CTCCAGCCAAACGGACGATCAATGACTACGTGATTCTCGAGGAGATGGGACAAGGAGCTT -ACGGCCAAGTCAAACTGGCACGTTTAAAGAAGCAACCCAGTAAGAAGATGGTCCTAAAAT -TTGTCACAAAGAAGCGAATCCTAGTGGATACGTGGACTCGTGATCGGCGACTGGGTACTG -TACCACTAGAGATCCATGTTCTGGACTTCCTGCGACGAGATGGGCTCAAGCACCCGAACA -TCGTGGAGATGGAAGGCTTCTTTGAAGATGATATCAATTATTACATTGAGATGACTCCTC -ACGGGCTCCCCGGAATGGACCTGTTCGATTATATCGAACTCAAAGCCAACATGGATGAGT -CGGAGTGCCGAAACATCTTCAAACAGGTTGCGAGCGCCGTTAATCATTTACATACCAAAG -CACTGGTGGTACATCGTGATATCAAGGATGAGAACGTGGTCCTTGATGGAGAGGGACGGA -TCAAGTTGATCGACTTTGGGAGTGCGGCCTACATCAAAAACGGGCCGTTTGATGTCTTTG -TGGGGACGATTGGTAAGTTTCCTTTGTCTGACCGAGGGGCACGATGCTGACTGTGTGCGT -TAGATTATGCCGCGCCCGAGGTCTTGCAGGGCAGGTCGTATCGAGGCAAGGAACAGGACA -TCTGGGCTTTGGGCATTCTCCTTTATACCATCGTCTACAAAGAGAACCCATTTTATAACG -TCGATGAGATCCTTGACCACCCCCTCCGAATTCCCTTCCTTCCTTTCTCGGAAGATTGCA -TTGACCTCATCCGCACGATGCTCGATCGCGACGTCGACAACCGACTTACGATTACTGAAG -TACTTGAGCACCCCTGGATGGTGGGGGCTTGAAAGCTTGCTCATGCCTTCTCCCTTCGCC -TTTTATTGAAACAGAAAAAAACAAAAAACAACGCGATACCCCACAAAGTTACGTTTTGAT -GATGGATGCAGAAATTTCCGGGGAAACTGGGTATCATTTCGGGGTGCGTGTTCTTCTAAC -TTCCCTCAGGGCCACATGATGATTATGAACATCAATGCATTTGTTATCTTTTTTTTTTAT -CTGGGGTTTTATGTCTTCTTTCCCCCTCCCTGGCATTTTTTGCGATACCTTTGCATATCT -GTGACTTAGAGAATCCAAGAGAACAGACTTATACACAACATACGACTTTGAACTGCGGGG -GGATCTCTAGCGGATCTTTGTAGTGCTTGGCAAAACCCAGTCGAGGCTTTGCCCGGGGTA -TCATAGGGCGCATCTCCCCACTTCACATCTCGAGAAAGGTTGAACATGTCCAATCTCAAA -GAGAGACTTCCCATATTGATCGGTCCGCAACTGCCTTGGTGCCTTGGACTACTTGAAACA -TTTTGAAAAATTCCAAAACTGATAGGTCATCCATGGATGGTATATCTCCATCCATGGTTA -TTTTGCCATCATACCATGGGCTAACTCTCTTCCTCCCCGCACATGCCCCACCAGCAAATA -CCCAAAGAGGCGAAAAAGAAATCAACCGTTTTTCTCAGCAATTGGAACTCGCCTCGTCCC -TGGATGGCCTGGGCTGAGCGTGGATTTTGTTCTCGAAGGCGTCGTCAGCCGAGGGCTTTG -GTCTCACATCAGACGGTCTGTGCCAGGATACTGTATATACACAATCTCTTGTGTTGTGTC -TATCTATGGAGGTTTATTTAAATTTACCCAATAGATACCCAATAGAACATAGGACATCTT -CACATGGATACATTTTTGGAATGTACCTAGGGGAGCGATAAGCAGATAACGTAGAGGGGG -GGGGGGACCAGGTAACTAGGTACCTACCACCGTACTATGTGATTGTACCATGAGATGTAT -CGCAATCCCCCTCTGCATTCCGCCTTTGCATTCCGCCATTTCTACTGTCTCTTTGCATAT -ATCTCCCGTTTGTTAAAATTTCCCATCTTCTACTCGGCTCACTCGGTTCGCTTCTACGTC -AGTCGGCACAGTGCCGTCAAAGAGGCCTGTTCCACGGTTCTTTGGCCGTGGGCCCCGAAC -AGATACAGATGTGATTTTCCCCACCCACCTTTGGATTTCCTCTTCTCTCTCCATTTCTCA -AACTGATATCTAAAAGACCGGTCGTCATATAGCACTATGTTTGTCTAAGAGCTTTTGGTG -ACTGCGCACCCGTCGCTCTATTCGTTtccgctgtcctccgtcctccgtccttatcctccg -tcGTGAACCCTGTGATACTCCGCACCATACCGAACATTCCCTGGAAGACGTGACAAGAGA -AAAGCGGCCTAGAGAATAGGTGACACCTACTGTACCAAGACATGAACATGGCCTCTCAAC -TATCCGAGGAGATCTCACCCTCTCCGGGGAGTAACTCTCCGAACAGCCCGACCAATTCAG -CAGGCCAGATCCCATTAGACCAACCGCCAAAACTCAAGGGTCGTCAGAAACTATTGCAGA -GTCTCCAACGAATGTCATCCAGTCCGACGCTGACCAGACGCGGACGATCCTCATCAACAA -CTAGCTACCGCCGAGACAATAAGGCCTCTCTCTCTTGTGTTTCGTTATCGTCGACTTCAT -ACTCGCCCTGCCTCGGCAACGGAAGCTCCTCCCAGCTCTATGGCGGCCTCAATCCGCGGC -CTGCTACCCCCGGATGTTCTGGTTCTACCGGCGAGCCCTACGGTACAAACGCTCGCATCC -GTTTGATGGACATCGATGGCTCTAATTTCACTACACCTCGCACCGTGCCTTTGCCTTTTG -ATATGCGGCCAGCATCGCACGGATCACCCCGCACCGTGACACCGGTGGGATTCAAGGTGG -AGGAGGAGGTCATTGCTCTAAAATCCCAGCCGACCAAATCTTTTGACTTTTGGGGTGACA -TGCCACGAGAGATTAAGATGGGGATCCTCCAGTACCTCACCCCGCAAGAAATCGTCCGCT -GCTCTGCCGTGTCCAAAGCGTGGAATGAGATGTGCTACGACGGACAACTATGGTCCAAGA -TTGATGCGACCCAGTACTACTCTAAGATCCCGAGCGATGTTTTGGTCAAGCTTATCACCT -CTGGTGGTCCGTTTATCCGGGACCTCAATCTCAGAGGCTGCATTCAGATGCGTGATAAAT -GGTCCACGGATGGAGAACGCATCTCGGATCTGTGTCGGAATGTCGTCAACTTTTCGCTCG -AGGGTTGTCGTATCGACAGGGCATCCATCTACTCTTTCCTCCTTCGTAATCCCCGTTTGG -AATATATCAATCTCTCGGGTCTGTCCAGTGTCACGAACTCGGCGATGAAGGTCATTGCAC -GATCTTGTTCGCAACTTGAGACATTGAATGTTTCTTGGTGCAATAATGTTGATACCACCG -GTCTTCTTCGGATTGTCCAGTCCTGTGAGCGGCTGAAGGATCTTCGGGCCAGCGAAATCC -GCGGGTTCAAGGATGAGAAATTCACTTTGGCCTTGTTCGAGCGTAACACTTTAGATCGCT -TGATCATGAGTCGTACCGACCTGACCGATCACAGCCTGAAGATGCTGATCCACGGCGAGA -ACCCCGATATGGATATTCTCACCGATCGGCCTATTGTTCCACCTCGAAGATTCCGTCATT -TGGATTTGCACCATTGCCCTGAAGTGAGCGACGATGGATTCAAAAGCCTGGCTCATAACG -TGCCAGATTTGGAGGGCCTGCAAGTTTCTCAGTGCTCGGACCTGACCGATCAATCCGTCA -TGGACGTCATTCGGACGACCCCCAAATTGTCGCATTTGGAACTGGAAGATCTAGAGCACT -TGACAAACAGCACCCTGGTGGAGCTGGCTAGATCCCCATGTGCTCAACACTTGGAACACT -TGAACATCAGCTTCTGCGAGAGTCTCAGCGACACAGGAATGCTTCAAGTGATGAAGAGCT -GTCCGAAACTCCAATCCGTCGAAATGGACAACACCAGAGTCTCAGATCTCACCCTGATGG -AAGCCAGCTTCCGGATTCGCCGCCGCGGGTACAGCGACGATCTCCCTCAAGTCGGGCTGC -GACTCATGATATTCGACTGCGCCAACATCACCTGGGCAGGGGTCAGGGAAGTTCTGTCCA -GCAATGCATACATCCCACGCGCCTCTCGCAAACCCATTTCAACTGTTGTGACCGTAACAC -AAACATCGGACAACGGTTCCTCTCCGGCAACAAGTACCTTCGTCACTCCAGCCTCCTCAC -CTTCGCCATCGCCAGCGCCAACCTATCCCAATGAAATTATCCAGCTCAAATGTTTCTATG -GCTGGCAGATGACCGTCGACGAACACACGAAGCGTGTCCTTCGCGGCGATCTTGCTTCTG -CATCCCGGCTGGACCGGAAATGGGCCGACTACATGGTCGCCACCGAGGAGGCTGGCGCCG -CTGGAGCTGGTGCTCGCCGACGTCGACGTCGTGCTCGTGAGGCTGAGCGTCTATACAATG -CAGACGAAGATGACAACGATTCTTATGGCGTTGGTGCTATCTCTGCTCTCGGTGGTAGAC -GCAGGCGAGCCCAGAGCGGTAGCAGCTGTACTGTCATGTAAGATCAATGCCTTTTGAATT -GTATGGCGTTTCCTTTCATTCCGTGTGTTCTGGGAACACCCGAAGAGCTGAACGCAAGAC -CCTGCTCAACTCTGCTGGCGCAGGATCGCTAGTATCTCGATGGACTTCCATCTGAAATTG -AAGATCATATTCAATGATCGGTTCCAACTTGCTATGACTTACGCACGATATGATTTTCTT -TTCCGGGACGATTCACCAATACAACACAGTTTCTTTCGCGTTTGACTTGTTTTTGGCCCT -CTTTATCTCTCTCTATACGTCTTTCTACTCCGTATCGTCACTTCTACCCCGTCCACTTCC -TCAGTTTTAGCTCTGGCGCCAAAGTCAAGATTGGGTTTTCCTTTCTGCATGTTCTAGTTA -TGAGCTTTTCATATGTTCACATCTTACGTAATGGAGATGTTAGCTCTAGCATACGTATCT -AGAAGGGTTACTGGCGAGATGGCGCTCTTAGTTTTCACTCTCTCTTCGATCTTCCCTGGG -CTTTTCGATTTGAAATCCATGCCACCTTTTGGGCTGCTTTTATGACACTTGACCTCGAAT -TGAGATCCCCAAGACAAGTATATAGGTGCAGTTCGATGACCTTCACCATCACTCAAAGCT -TCTGTTGGTCGTTGCTCCTGTTATGATTAGCATCAACAGGACGGGGGCTTCTGACTTACT -TTTCAAGAAAGTGTTTTCGGGCCATGGATATGTGCGTCCAAGAGATCTCTTTAAACCGAT -TATCTTGCATCTTGTAACAGCCTATACGAGAAGAATTTCGTAGAAAACAGCAGTCTTCCT -TTCATGGCAAACAGGGCCAGTATTCAGAATATTGTAAGAACAAAGCGCAGTGAACCGCAT -GTCACTGGCAGGAAAGAAAACGAGAGCACATCACGAGAGGAAACACAAATAACAAAGGCA -CAAGCCCCCAATGCCATAAACACTGAGTTTCCATGAAGTCGAAAGCAGTCTCTAGGATAT -ATCACCTCCTGATATCCCATAAAGATAATCTCGTTTCTGATCCATTTTCACCTTTTTTGG -GCTTTTCGATTTCAAACTCTACCCCATCTCACGCAGAAACTTCCTCTGGTACTCGATTCT -CGTAAATATCAGTCTGCTTTCTGATATCGGCTTCTGCAGTAATTGATCAAGATGCAGGGG -CCTCGGCAATCGGCGCCTCAGACAAGCCACCAGTTGCTGCAGTGCTTAACTCAGTACTGA -GCTGTTCGGGAACTATTTGCTCCACCTTAGGCGGCTCACTGATATGACTACTCTCAGCCG -GTGCACCAGTCGCTGGTTGGTCAATGTCAGGCTGCCTAGTCATCGGCTTCACTTGAGTAG -TTTGAACAGCTGCAAGTGCCTGGACGGTGGTCTCGACAAAGGGGGTAGCAGGAGTGCTGG -TCTGCTCGGGGTCGGCGTTTTCAATGGACTTTTGCATCTGATCTTCAATAGCCATGCCCA -AATCGAGACCAGTGCCCACAATGCTGCTTGTTATAACAGGTTGTTTCTCCTCGATGTTCG -AGGCGGCAGTTTCAGGCTCTTTGGTACTCTTCTCGGTATCCTGTTCCATGAGATCTTCTG -TAACGCTTTCCGTGACGCTTTCCGTGGCCTCTTTCAGGGTCTCTTTTGGGGTCTCTTCTG -TGGCCCCTTCCACATCTTTCGCCGGAGCTTGCTCTTGGGCCTGCGCTGGAGCCTTCGCCG -GGGAGTCAATGTTCATATCCACGCCCAGTTTAGACTCAGACTTAGTCACGATGGGCTGGT -TGCTACTGCTAGCAGCTTCGCCGGTCGTTCCAGTGGGATCTTCAACCTCAATCGCACTAT -CTGGGTCTGTTTTGCCATCTGACTCTTCATCCTCGTCTATCATCTGGTTGGCGTATGGTC -TATCAAGATAGGAATAGTTGAAACCTTTGATACCATGTCTGGCAAAGAACTCCCTTGGCA -TGACAGTTGGTTTCACGCCATCCTGCATGGGTCCACAACGGACGTTGTAGCCCCAGGCTT -TCCACAGCGGGCAAATCTCAATCTCATTAGTGTCTGGATTTGGAACAACTGCGCAGAGAT -GGTGCTCATCAGCCACGAAATCGCGAGCCCAGATCTTGCCCCTATACTTCGGATCTTTCT -CAGTACCATGTGGCTTGTGCAGTATCAGTTGACGATTGATAGTATGAGGAAGATTCACGG -CAGAGTCAGGTGCAATGACCGCCCCAATAGACCAGATTTGGTTCGCTTGCATGATATCTC -TGGTTTTGTCACAGAGCAACCAGCCAATGCACTCGACGTTTGCGCGTCCACAAAGAATGT -AATAACCTTTGTATACCGGGAAGAAGGGTAGGCCATGAGCCCAGACGGTTGGAGCGGGCC -CCGAGGACACGCGGTGAGGATTGATGTTACCCATGGTATCGAAGTCATATGGATATCGTC -GTCCCCGTTCGCGGGAGTCAACAAACTTGGTCACTAGATAAACTAGCTCATCTGATAGTC -GGGTATTTGACGGGGGGAAGGTTGGCTGGCCGGTTTCTTCATCGAATTGAATATCCAAGA -CTAGTTCGAGCTGAGCCTTTTCTTCCTCCTTGTTGGAGGGATTTGCTTGGCCACTTGCGG -GTGTCCAAACAACATCAGGGTTTGTCTTCAGGTGATCTGCGAGGAGGCTGGTGCAGCGAT -CTCGAAGAAGTTCCTGCTGCTTGATGAAGCTTGCTCCTTCTGAAGGGCTGCAAGCCATTC -TGGCATCTTTGGCTTGAGGTCCAGGCATCTTTGGTCTAGGCATCTTCCTGCACCTGGTTT -GAGATATTGTAGGGTTGATTTGAGAATCTATTCTCGGTTAGTTGTGGATTCAAACGACTG -GATGTATATTTGGTTAATCGATATGATATTGAGAGAGAAGGTGACTCATGGTTTTCCAAT -TGAGTAAATGCCTGACTGAACACCAGTGATAAGGGCAAGAATATACTTGCTCTTGAAGAA -GAGAAGGTAGTTGCGGAAGTTTGAGCTGTTGTAGAAGTTCGACGTGTGAGGAAATCTAGG -TCATAGTATAGGAGTCAACAAAGAATAACGTAATTTGCAGTAAGATAGAGTAAACGAAGA -GAGTCTGAGCATAGCATCAGAATAGAGGTTGAAGTAAACTGGTGATCGACGATTCAGCAA -GATACCTTACCTGTTCACCGAATTGCGAAAAAAAAAGAGGTACAAACTAAAGCAGATATG -TAGATGATAGATATTTGTTGAAATGACGCAGGACACCGGATGTGTGAGGTGATACAGAAC -AGAGATGAAATGAAGTCATATATGAGCAGCGAAGAAGTTGTAGGTGCTGATCAGTTATTC -ACAGCTTGTAGAATTAGGTGGCAAATGTGGAATATAGTATCTGAAGCATTGACTCTAAAG -CTAGATCAATACGATGAGAGTTATGGTAGGGAAGAGTGGAAGTTGCAATATAGACGTTCG -GAAGTAGGGTTGGTGACAGGGGAAAGGGTCAATGGGGTAGATTGTTACATATAGAGTGGA -GTGAAAGTGAACAATATGTAGCGAAGTCATACCACTTGACACACTACCATGCTCACCGAA -TAAGAGATACTTTTTAGGGAGATTGACGTTCCATGGAGTCATTATAACTTCATTGAACAG -CTGTTCATGTATGCGACAGGGTCTTCCCTATATAGCGAAATTGCTTTGTTCAACCCAACA -ACCTGGGTAAGCTATGACATCAGAGCATAGGCATACATTATATGAATTAAGTCATGTAGG -GAGAACCCGGTCATTATCTGGTGAAGCTCTTTGGATAATTGGGTGTCAGTTGGCGGTATT -GCAGTGGTATGTGTAGAATGCTAATGTGGATATCAAGACAAAATAAGACATTAACCATCA -AACTAGTGGATTGATTACTCAAAGAAACCACCATCGCTATCATCTTGCTCCTCCGCATTG -GCTTGTTGTGGTGCCTCGTCTTTCTTGGAGGCACGACCACTGCGTGCAGGCTTCTCCTTC -TTGGTGCTATGCTTGTCATCTTTGGAAACAAGCTTGTCCTTTTGGCTAACAGCCTTGGTC -TCTTTGTGCTCTTTCTTCTTGCTCTTTTCCTTCTGGGTATCTGTAGCTGTAATTGTGCCA -GATGACGAATTTCGAATCTTCTCTCCATTGGCGTTGATGTGCAACTTTCCATTACGAAGA -AGGTCCAATGTCTTTTCTTCCATACACTTCTCAACAACAGGCCACAACGGGGGCTTGGAG -TCGTTAGACTTGGTTTCCTCTGTTTGCAGTTCTGACTGTGGATCCGTTGCATCAGGTTTC -CCATTGGGGTAGATTGAAATATATTTCTCGGTGAGAGGATAGTAGATCGCATAGTTGAGG -TTCACCTGGCAGGTGTGGATTTTTGCTGCAAGACGAGCCTTTTGTTTCGAATCAAGATCT -TCTTCCTTTTCACGGCGAGTGAGTCGATTGAGTTCCTTGGTAGCCGTTTTGCGGTCTGTT -AAAAGATAAGGCGCATTAGCGAGAACCTAGGTGCATTCAGACTTACGGAACACTTACCAA -GGAACCGGACAAAGTGATATTTCTTGATCAATGATGACCGCTCTCTGCGTGTAGTTTCAT -CTGCGAGATCTTGCTCATATCCAGCGAGAGCCCGCTCCTGCAAGATGCGAGCATCGGCAG -ACAGGTCCATTTTGTTGAGTAGCCGTTTTACATCGCGAATTCGCTTTTTGAGATCGTTGA -TCGATGGGTATTCCTTTTCAGGCTTGGGGGCAAATTTGTCCTTGAATTTGCGGGTCACTT -TAGGCTTCGATGCATCGCCTTTGCTGTCGAAGTCCACGAATTGACCTTGGTCATATGCTT -TGTCTGCCTTGCGGTGCACCTTTTTGCGGGCCGGGGGAGCGTCCTCTGATCCATCCCGGG -TCTTCCTCTTTGAGGGAGCTGCCCGATCGCGGTCACTTCGTGTGTCTGTAGACATGATCG -TTGGTTTTACAAAAAAAAAAAAAGGAAAGGAAAAAAGAGGCCGAAGCTACAAAAACGATA -AGAAAAAAAAGATATCGGCGGATTTATCGGCCCGTCCCCAGATTGATGCCAACCAATGAG -CGTCTTCCTTGACCACCATCACGACGGCCCTCTCAGGTCGCATTGATTTGATTTACTCCA -ATTCCCCACTTGAATTTGTGTATCTGGCAATCTGGATCTACTCATGTCATTGATTCAATT -CTTGAACTGAGCTTAGTAAAAGGGCCAGCATGGAGTCCGCCATTCGCTGGTCCCCGACCT -CGTCGGCCACTGAACAACGCTTCCTGTCCGTCGACGTCACGGGGAAATCGTTTCGTCTGT -GTAGAGTGACTGGGTTTGACGGCAAGAATCTACGCCATGAAGTTCTCTCAACCTTGACCA -ATGTGCCTGGGTTTCGTGCATTCGACTGGTCTACTTCAAATGAGAACCTGATCGCCGTGG -GTCAGTCATCTGGGGAGGCCACCATCCTACGCCTCGATGGGGATGCCAAGGAGTCTCTTT -CATTCCCAGTCCGCAACCAGCGGTACTGCAATGCAGTCGCGTTTAGCGCACACGGTCTTG -TGGCATCGGGTCTCGATCGCGTTCGGAATGACTTTTGCTTAAACATTTGGGATGTCAACC -AACGACTCAGCCCTGCTGGTGGAAGTAAAGGATTTACCGAGCCCTTGCGAAAGCTCGCCA -GTTCAGAGCCGATTACTAGTATCAAGTTCTTCCGAGACCAACCTGATACGATCGTGACCG -GTGTCAAAGGCCAATTTGTTCGGATCTATGATCTAAGAGGTTTGTCTTTCCTGGCATTGG -ATCCACAAAAGCAGAGCTGATCTCACCTTAGAGGGACCTGGCAATCCGTCAATACAATTT -CCCACTCGGTGTGTCCATAATCTGGCTATTGATTGGCTTGATGAGAACTATGTCGCGTCT -TGCCTTCCATCAAATGAAAGCACCATCTGCGTGTGGGACCGTCGCGTTGGGTCTCGACTG -ACATCTCCATCAGTGGCCCCTGCTACCGGAGCTGCAGATTCAGGCCAAGCGACGCCTGCT -TTGGAACTCAAGAATGTATTCCATCCCAAGTCCTCCATCTGGAGCTTGCGCTTCTCAAGG -ACAAACCGAGGCAGTCTCGCTGCGCTGTCCAGCAGCGGTCATTTCAAGAATTATGATATT -GCCAAGGACTACATGCCTGAGGAGTATCGGTCTTCGATAGATGAGACTCTTGGCCAGGGT -TCTTCCAGGAATTACCCAGAATCGGTGTACACCAAGCACGTGCGGGATGTATGCGCGCCA -TTTGATAATCCGACTCGTGGCTACAAAGAGAAAGACCGTGTTGTCTCATTCGATTCCCTG -AATCTCAGCTTGTCTCCTCAGCCAAGTGCTATCACACTCGACGGCAATGGACAGCCCCAG -ATTATCACCGCGCGAGCCCCGTGCGCACCAATTGATCTATCTTCACGAAGTGTATTGGCC -TGTGGTATCTCGTCGAGTGACTCAGATGTACGATCGATTCACCCACTGTCTGAGCATATT -TCGCCCATCTCAGACCTGATCGGGAACATTAGGTCACGCGTCTGCTCAACTTCACAAGAG -CATGGGGTAAACTCAAATGGTCAGCTGTTTGTATCAAAGAATGTTGATTACGAACCTGTC -CCAAGCCACGAGAACCGGGAACGTGCCATGGCTCTCGGTGTGATGGGTGTTCCTCTTACT -GCTAAAGAAGCGCTCACCCTATGCACGATCAACCAGTCCCGGTGCAAAGAGGGCTATTTG -TTTGATGAGGTTCAAAACAGAAAGGTGGCCTCTGATGATCAAGCATTACAGGATTTCTGG -AGCTGGATTGAACGTGGGTTTTTCATTTTTTTTTCCCTTGAGGTCGCAAATCACGTCTAA -TGAGTTTTCAGGTGCTCGAACTGAGTCTTCTGGGACGTCAATGGTTGTCAATGGCTTGGA -CCTGAATTATCTAGGTGTGAGCAATATCTGGAATGACGACCTAGGTTTGTTTGCTCCCTC -ATTTTCAAATAACTCTCAAAACTGATTTGAAACAGGCAATGGTTTCGAACAACGTTGTAT -CAACGCTAAGAGGACCGATGCTAAATCAGCTGCGGATGCAGTAGCAACTTTGGCGAATCA -GTTGAACCTCCCCGAGACAAAGGGTTGCGACACGGCTTATCCCGATCATCGTCGACTTTG -TCTTCGTCTCTGTGGTGCAACGCAATCTTACCGTGAGCTGGAGGAGTCAGTGAAAACTTT -ATCTAGTGAAGGACAGCATACTAAAGCTGCTGCACTGGCTGTCTTCCAAGATGAGCCTAA -GCTGGCTTATCTTGGGCTCCGAAGCAACAATCCCACGCAAGCTCATAAATTACTGGCTAT -GGCAATTGCTGGCGCCTCCAAGGGAGACAACGACACAGAGTGGGAGGAAACCTGTGCGGA -AATTGCGAAAGAGCTCACCGATCCATACGCTCGGGCGATTCTAGCCCTTGTGAGCAAAGG -CGACTGGAATGCAGTCATCAAGGAAACCACGCTACCGCTGAAGTATCGCGTAGAGGTTGC -GCTACGCTGGCTCCCAGATGACGAACTAACTGAGTATCTGAAGGATGCTACGAGCCAAGC -GATTCGTCAAGGCAATATTGAAGGAGTCGTGCTCACAGGCTTGGGTCATTTGGCCATGGG -TCTGTTCCAATCGTATATTGAGAAATTCAATGATGTCCAAACTCCCGTCCTAGCAATGAG -CCACACTGTGCCGCGGTTTATCAACACCCAAAAACACATTGTCCAGTTTGAGGCCTGGCG -CGAGACCTACCGGCGGCAAATGAACTCATGGAAACTTCAACTCGAGCGAGCCAGATTCGA -TGTCAGCTCTCGTCGTTTCGCTGTGACTGCGGATGGTCGCAAACTTGTGCCGCCACCACC -GCAGCAAGTCAGCCTCACCTGCAACTACTGCACAAATCCGCTCACTCAGCACGATGCTTC -CTCCCAGCTCTCTCCATCTACCAAGAGTAATGAGACTGTCCATCCCACCACCGGGAACCC -ACTGGCGGCCCCCATCATGTCTGGGACTTTGTGTCCGCGTTGTGGTCGACATATGCCGCG -CTGCGGCGTCTGCACATTGTGGTTAGGCTCACCAGACCCGATGTCACGCGCCAGTATTGC -TGCCGATGCCGAGGCAGGTTCTCGCAAGCCCACTGAGACCGAGGTGATGCGGCGATTTGT -GGTGTTTTGTATCCATTGCAATCATGGATTCCATGCAAATCATGCTAGCGACTGGTTCAA -ACGGCATAAGATCTGTCCTGTAGCAGAGTGTAACTGCATCTGCGATCGGTAATAGTATTG -TTTTGCGTTACCCTTTCCGAGCCGTGGACTAGATTCATAGCCTGTCTTTCTACGATATTC -TATACCCAATTGAGTCAATTTATGTGAGAATGAACAAAGAAAATTTGAATCCTTTGATCT -TGCGCAGTTTCATGCTAATTTTGCTGCTCTTATATGTAGCTGTGAGTCATTGTTCATAAA -TTTGTATGTTGTAGAGCCGCATCAGCTCTAAGCTTATAATTGCCCCACCAAAATATAATT -TCCACCACAACTCCTATCAGCTTCACATTTTGTTCCAGCTCAATACGTTACTATGCCTCA -CCCAaaagacaaggagaagcggggacgtggacatggtcaaggacaaAGTCGTGATGTCCA -AGTCTCCAAAGCCTTAAGCCTGCTTCTGCGACATGCCGCGGAGAAAGAAGGGCTCAAGAT -GAACGCGCAGGGTTATGCAAGCGTGCCAGACGTGGTATGTCGGTTTATTGTTCTCAATAG -CCTACAGTATACTGACTATGTGTAGTTAAATTGGAGGAAACTCAAATCGCTAAAAGTCAC -GTTCCCCGAGATTCTACACGCCGTCGACTCGTCCGATAAAAAGCGCTTCGCTCTCCTTCA -CATTCCATCCGCACAACCCACCAAATCCACAACGCAAGCAACAACAATCCCAGTGTCAAC -CCCCGACGCAGAGCATGATGCCAGCACCATCGTAGGTGAAGAACAACCCGCGACATCGGT -CCTGGAGTCCGCGCCCGCCACGAGCGAAGCCCAACAAACAGCCACGGACCAAGCACTATC -CGTGGAAGACACCGACCCGTCAAACTTTTTAATTCGCGCGACACAAGGCCACAGTATCAA -GACTGTAGATGCTGCCTCCTTCCTCGAGCCGCTTTCTTTGTCCGACGAATCGAAATTACC -CGATACCGTCGTCCACGGCACATTCCACGCGACCTGGCCTGCGATTCTACAATCCGGTGG -ACTCCGCTGTATGGGTCGAAATCACATTCACTTTGCTACTGGCCCATCATTAGAATCAGT -TCTTGCTATTCACACTGATGATGCTGCGCGGGATAAATCGAAGCAAGACGACCCCGGTGT -TATCTCGGGCATGCGGCGCGATGCGCAGGTTTTGATTTATATTGATCTACGCAAAGCGCT -GAAAGTAGGGGTGCCGTTCTGGCGTAGTGAGAACGGGGTTATTCTGAGTGAGGGCATACC -TGTTGCGAAGAGTGGACATGCTGAGAAGGGCGAAGAGCAGAAGTTCGTCTCGCTGGAGTT -TTTCGATGTGGTAGTGGAGCGCAAGGCTGGGCTGGGCAAACTCTGGGAGCATGGGGAGGT -TCTACAAGAACTTCCAGAGAATTTCACGAAGAAAGGGAACCCTAAGGCCCGGCGATGAGT -GGTAAGATTGGTGAATGGAGATGTGGACTCCAACTCTAGGGATTATCACATCTAGCCGGG -CTTTCTGCTGTTCAATTCGACGCAGACAGAAAATCCAACTCGCCTTCTAATGATCTACAT -GTTCAAGTATGGAATCATTTCAACTTGAGTTGCCCCCACTTCGGACTTAACCCTAGGTAA -CCCCGGAGCCTCCAGTGCAGGGCTATAGGGCTGCGGTAATACAGTCCCACCGAGCCGGAG -TGCCCTGGGATATATCCAACCTGCTATGTTACTGTAGCACTAATCCCGACTGTTCAAGTT -AGAATGTAATAAGGCGTTGACACATCGGATGCTGTTTAGACTATGAGTCTCTACAACTAA -TCTAATGATTTATCTAGTCCACGGGTATGGGAACGTGCCTATGCTAGATTCTCACTAGGC -TCAGGAATCATTTTTTTTTCAAAATCCAAGTACTCCGTAAGCAATTCTACAGTATTGGAT -GTACATATGTATATAGGTATATATATGTTGCATATCAAAGCTGCCAAGCTCTCCTACCCG -ACTGCGGGGAAATCGGAAGCGCAACGTCTTCCGATCTGTCACTTCTACAACATCCGACGC -CAAATACAATGTCATTGGCGTGGGGTTGCGGGGTCCTTGAATAGCCGCTTTGAATAAGGA -TCAAGGACTGTAAATCCGTTCGGATCTATATAATAATCATGCTTTTAAAATAAGCTTCTG -ATCTTTTCTGAAACCGCTCTCAACTCTTTGTATGTTTTGCCTCGCGCTCGTACATATCGC -ATAAGTGATTACGCAAAATGATGGCGACGCACCGTTTTAATCCCACTTTCACGGACAATG -TCATCGACGCGATGGGTCCTAAAACCACCCCGCGCATGCGGAAGCTCATGGCTGGCCTGA -TCAGGCATATGCACGACTATGCGCGTGAGGAGGAGCTGACTGTCGACGAATGGATGGCTG -GTGTGCAGATGCTCAATTGGGCGGGTCGGATGAGTGACGATAAGCGGAATGAGGGACAAC -TGGTGTGCGATGTACTGGGACTGGAATCGTAAGCCGAGCTCTTGTTCCCAGATTGGTATT -CCGCAGCTAACACACTGCCGTTGCACGACAGTCTCGTCGACGAGATCACTTTCACTCTCG -CTGGAGAAGCACAGGATGCGCCTACAGCCACAGCCATCCTGGGCCCGTTCTTCCGCGCTG -ATACCCCGTATCGTGAGAACGGCGAGGACATCGTCAAGACAAAACCAGCTGATGCCGAGA -TGACCTTCATGCACGGTCGTGTCATGGATTTCCAGACCAAGCAGCCTCTTGTCGGTGCGA -CGGTCGAGGTGTGGCAGGCTTCCACGAACGGGTTGTACGAGCAGCAGGATCCCGAGCAGG -CTGAGTTCAACCTGCGCGGCAAGTTTAAGACGGATGAGGAGGGCAGATACTCGTTTTACT -GCCTGCGTCCGACGCCTTACCCCGTTCCTAACGACGGCCCTGCTGGCAAGCTTCTTGAGC -TGATGGATCGCCACCCTTTCCGCCCTGCTCACATTCACATTATTGTAGGTTTTAGCTTGG -TATTCTAAATGTCGGGTTGAGGAAGCTAATATCTAACTTAGGCTACCTATGATAACTACC -GACCTTTGACTACGCAGATCTTTGACCGCAAGGATAAGTACCTTGACAACGACTCGGTGT -TTGCTGTCAAGGACTCGCTTGTCGTGGACTTCGTGCCACGTGAGAATGACCCTCAGGCTG -CCATTCAGCTCAATTATGATGTCAAGTTGGTGAAGTTCGAGGCTGGTACCAATGGCGCAT -GAGCGAGCCTGGACACTGTAGTAGCATTTTTGTGATTTGATTTTCTTTGATGTATTTAAC -GATAGCGGCATTGCACATATCAAGCATAACCAAATAACTTTACTGGCATTATCATCATTT -TCTTCTAGATAGAGAGGTTCCTTCAAGCATTCGCAACATTCGCGAACTTAGCAATTAACG -CACCGACCTCATCAGCGGCCTCAATACAATGCCAATGCCCGACCCCCTTCAATACCTCGA -AAGACTTCTCCTTGCTCGAGACATGATTAAAGATATGCTGACATCCCTCCATAGAGGCAG -ACTTGTCTTCCTCACCCGCAATAAGCAGGAAAGGCGCTTTGACAGCAGCATAATCCGCCG -GTTTCGCGGTCGCAATGGCACGGCACAGCGCTGCATAACCCTTAGGATCCTGGCCCAGGA -TTAGCTCCCGAATGAAAGCCTTTTGAAGCGGGGTACTGCAAGAGCCGACAGCTCCAAAGG -GGATGGTATCTGCCATTGGTTCGATCCCAGCTGCCAACAAATTAGCTCATTCAAATGAAG -ATCCGTAAGAACAGATAGACTGGAATGAGAGAAACTCACATTCGAGGACAGTCTCCGATC -TCTTATTCATGACCTTCACCAAGGTATCTGACGGGTGAGTCGGCCCAATGGCCACAACGC -CCTGTATGCGATTGGGATACCGCGCGCCCAGCTCTGTCACCACGAGCCCGCCCATTGAAT -GGCCGACTACAACGGCCTTGTGGACTCGCAGCGTATCGAGCATGCCGATGACATCCTCTG -CGATGCCGGGGATTGTGATGGTGTCATTGGTGTACGGGGAGCGTGCGGCACCGTATGTGT -CAATTGTAATGCAACGGTGGAGTTGTGTGAGGTGCGGGATTACTGGATAGTAGTAATTTT -GCGAAGAGCCCAGGCCGTGGATGAAAATGAAGGTTAGGCCGGTAGGGGTCGGCGGGCCGG -CGGGGTGGGAGTCAGCATAGTAGAGACGGTGGCTATTTGCTTGAACGAACGGCATTTTTG -CCTGAGGTTGGATTCAAGACGACGAGGTACAAGAAATGACGAGTAATGAGGAACGGAGAA -GCGGGCAGCCCTGGAGATCTATACAAATGATAAGTCTAACGTCATTCCCGATAATATCCT -ACAATAATACTAATAATGTACACAATGTTCTGCAGGATCACTTCTGTTCATTCCTTCGGA -GCAATCCAACTCTAAGCTTCTACTCCAAAAAAAAGATATTTGAGTCTCCCGAACCGAAAG -GGCTTCCTGCATCCGAACTGGGATATCCGACTGTCGGTTCGGACCCGATCCATCGCCAAG -ACCTCCAATTGGACCAGGACAATGTACGATGTACATCATACTTCAACGCAAGACCTGCGA -TAGAAATGATGCTAAGGCGATTGAACAAGTTCAGATAAAACTATTGAGAGGGTATTTTAT -TTAGTTCGTCAAAAGGGTCGCCGTTTCCGTGTGCAAATGCTTCTTCGATCCAATCCGCAG -TCTCGTCTCGAAAAGAATCTCCAAAAGAAGACGCCAATGTGGGTCTCCTGCACGGTCGAT -CGAGCAAAGGAGGTTGTAAAAGGGAGGTAGCCTTGGGTGGAGACTGGTCTCTCGCTGGAT -GACTACTCGTTTGTGTGATGATAGCCGCTTCGGCTTCAAGGAAATCAGCGGGGTCGTACA -GGAAAGATTCCCCGAAAGACGATTCTAGCGTGGGCACAGCCCGGGGAGGTTGGGGAGGTT -GTAGGTAGGATAATTCCTTGGGTATAGGGGGGTTCTTCGTTTCTTCGTCAGTTGCTCGCG -TGGTAGCAGCCATTTCGGCTTTTTTCATAACCTCTTCGTCGTCGAATGCCGAGCAGTCCG -AAAACTCATCCTCATCCTTGAATGAAGCTCGAGTAGCAATGTCAATATCTTGACCTTCAG -GCTCTTTCTGAACGCCTTCGTCTTGAAGCTCAAATAGCTTGTTTTCCAATGCCTCATTGA -TGTCGTCAAAGGCATCCGATGCAGCTTCCGTATCTCCACCAACGCTCCCCGAATCGGTAT -CATGCGACTCGATCTCTGTAGCTGTAGGCGCAGGCTCTGGTGTAGCGGGTGACTTTTGCT -TCCCAGCTCCTAGGAACATTGACAGCAACGGCTGACTCGACGGGACGCGGGCTGCATGGG -GATCCGGGATGCCGCGACGCTTCCGCTCCTCACGGGCCTGCGCCTCCTGCTCGGCTTTCC -GTTTCTTGTTGGCAATGCGTTTCTTTTCCTTCTCGCGTAGCTTCTTCGCTTTCTTTTCTC -GCTCCTCCTCGCGATCTAGCCGCGCAATCTGGGATGCAGTGAACTGAAATCGTTGGTTGC -TGCGCTGGTACCGGCGTTTAACGGTCTTGGATTTCTCCAAGATGATGCGACGCGGCTCAT -CCGGCATAGTGGGGGCGACGATGGCAGCGACTGCCTGCCTTTTGTGCGATCATGAGGATG -GAGAGCTGTGGCGAGTTGTACTAGAAAATTTTGGAAGTGTCGAAGTTGTTTTTGGTTGGC -GGTGATACGTTGCGCCGACCGCGGAAGGTCAACAAATTACCAAATTGGGGTGACCTTTCC -AACAGTCCATTCCAATGTTATTGGTCTCTATTCAGTGTATATTTGGATGCTTGTAATTTA -ATTCCCTATTTAAATTCAAACGCTAAGTGTTTACTGGTTTAATGGCGTCATTTAGATCTT -GCAGTCACGGACGGGATCCCAACACACGGGCTCGTACGTGTTGAGCTTCAGGCTGGCCAT -GTCCTCATCCGACAGTGTGAAGTCGAAGACCTGGGAGTTCTCTAGAATTCGAGACTCAGT -CACAGACTTAGGCAGGGGCACATATCCCTAATAGAACCGAGGTTAATCATCCACTGTCGG -GCACACGAAGAGAGTCATGTCCACGTACCTTCTGCAAACTCCAACGGATCAAAATCTGAG -CCTCGGTCTTGCCATGCTTCTTAGCCAGGTTCTGCAGAACCGGCTCCTTCATGCGAGTTG -CTTGCACGAGGGGAGAGTATGCTTCAACAACGATGTTGCGCTTCTGCAGCCACTCTGTAA -TATCGTCGCGCGGGCACCACGGGTGGATTTCGTACTGGCCGACAGTGATTTGTCCACCTG -CTCCGCTCTTGATGTACTCCTCCAGCTCCTCGAGGTGCTGGATAGAAAAGTTGGACACAC -CTAACGAGCGGACATGGCCTGCCTTTTGGGCCTCCACAAGTGCGCGCCATGTACCCAGGC -GGTCCTCCTTACCGCCGTAGGGTGCGTGAATCAACATGCTAATAGCCGCAGTGTTAGTTT -GCTCAGCTATTTTATTTTCCCGAGACCAAGGAGGTACATACAGGTCAATGTAGCCAAGGT -TGGCTGCGGCGATGCTTTCCTCAATCGCCTTCTTTGCCTTCTCGTATCCCATGCATCTTG -AAGGCACCTTGCTAGTGTAGAAGATCTTAGAACGATCAAGTCCAGACCCGCGGATAGCAG -CGGCACTTTCCAATTCATTTCCGTACACTTTGGCGCTATCGACCTGTAATAGGATGGTCA -AACCTGGGAACAATTTCTGCCTAGGTTGTATAGCTACTTACATGGCGGTATCCCATCTCA -ATGGCTATGCGGGTCACTTTCTCGGTGACATCGGAGGGACTGGTCAAGACTAAGTCAACA -TAATCTGTCACGGGGGATTGGGATGATGCTCATACGTTTGGTAAACCTAATTCCCTTAAG -TGTCAGTCATACATGTCACAACTCCAAGGGCCAATATACTCACTCCGAATCCAACGACGG -GAATCTCGTAGCCCGAGACCAACTTGTAGGTAGACTGCAGCGAAAGGGGGGCCATGGTGA -ATGTTTTGATGGTTGAAAGAAGAATGGGGGTGGGCTACCTTAATAGACAAAAGTTTCGAC -ATCCACGGATCCCCTCGGTTCTGTTGTTTCCACCCGCTTCTTGGGGCTTGACGCCATCTA -AGTGGGGATAGACGGTATCGGGGACTAGATGTAGCTTATCAATGGCCGATCAACGCAACG -TGGGGCATCTACCTATGAGTAACTTGACCGGGTTCGGAACTCTCAGAATCAGATTGAATA -TTCCCTCGGATCATTGCCCCAATCTGATCTGATTAGGGCTGAGTGTTATGAGACATGGCG -TATATGGCACCCCTGGTCAAGTGGAGCGGGTGACGTTCATCCTCGATATTTATCGGTTTT -GGAAGTTTAGAATTCATTAACCTCATATTTTTTAATAACCTATCGAAGACTAAAATGATC -CTGCTAGAATAACGGCATCAGTACAGAGTGAAGCGCGTGACATCCTTGCAAGATGTCAGG -CGGTAATGAATGGCAGATTCTGTACTCCGTGGTGGGGTTGAGTTTGGCAAGTTCCAACAC -ACTCGATATGCTCTCATACTAGATTGATTTGCTCGAATCACCCTTTGGAAAGGATTGTTC -AAAAGCTTTCCAGGGCTTGATCCATTTTTCTACCCCCTTTCATTTCCGCCAATTTGCGTA -TCTTTGTGTCGACATATGACGACATCGCAATAACAATATCCAACCCGCCTTCAGTCGCAT -TTTACCTTCCCAACCCCTCTACCCATAAACATACAATATCACCGCAGGAATGCGTTGCAT -CAAGAACAAAATTGAGCACGATGGCTCTGGGGCGGTGACATTATGTCCTGTCGAGCCAGA -AGACATGGTATTTAAACCCTCACTTTCCGTCTCAGCGCCTGGCAGTCGTCTAATATAGTA -TGCGACGGCTTTAGTGGCACGCATACAACCTGATCCGACCAGGCGATCTCCTTCGAGCCA -GTGCAATTCGTCGAGTTACCACAGTTCAGGATACTGGGTCCACGAGCTCCGCGCGCGTCC -ATCTTAACCTCCTAATCCGCGTGAAGAATCTCGACTTTGATCCGCAGAGCTCTCAGCTCC -ATGTATCAGGACAAATTACCAACGAAACACCACACACCAAAATCGGACAGTTCCATACCC -TCGATCTAGAGTTGAATCGCAATTTTACTTTAGAGAAAGAGGTCGGGGCAGATGGTGAAG -GTGTCGGATGGGACAGTATTGCAATCGAGTCACTCAAGGACGCTGTGGACGAAGGCGGTA -AACGCCGTGCAGAGGCTGTCGCAGTAGTGATGCAAGAAGGACTGGCTCATATCTGTTTTA -TTGGCCAATTCCAGACAATATTGAAGCAAAAGATCGAGATGTCTGTGCCGCGGAAACGGC -AGGGAGGAGGAGATCATGATAAAGTGAGTATATGCTGTTGTTTTGCTGTGTCTGTTGACA -GGGCTTCAAACTTACGCTGCACACTCCCAGGGAATGAATAAGTTTTTCAAAGTTACACTC -GAAACACTCCTTCGACAAATGGAATTCAATACCAGTCTTACTTCAGGCGCCAACAACGAA -GCTGTACGGCCCGTGCTTCTCGCCTCACCTGGGTTTGTCGCCTCTGGTTTCCAAAAATAC -ATCCAATCAGAGGCATCAACAACGACACCCGGACTCAAGCGACTTCTTCCCAGTCTCGTG -GTTGTGCACTCCGCATCAGGGTACACAAACTCACTATCAGAAGTCCTGCAGTCGCCCGCA -GTCAAGACAATCCTGGCTGATACCAAGTATGCCCGTGAGACCAAATTGATGGACCACTTC -CAAGAGCAATTACGCAAGGAGACCAACAAAGCAACATATGGTCCTCGTGAGGTTGAGTAT -GCTGTCGAACAAGGTGCTGTTGGCCGAGGTGGCGGCGTACTTATTGTTTCGAATCGCTTG -TTTCGATCACAGGATGTCGCTGAGCGTAAACGCTGGGTGGGGCTTGTGGACCGCGTCCGC -GACGTTGAAGGTGGTGAAGTGCGAATTCTCAGCTCCGATCATGAAAGTGGAAAGCGTCTC -GAGGGGCTGGGTGGCATCGCTGCGCTTTTGACATTCCCAGTCATTGAGCCGGACTTTGAC -TCTGATGTAGAATAAACCGGCAATTAAACTGCCATCAAAAATAGACAACTATCTATAACA -CATAGCATAATCAGAGTATGCAAAAATTCAACCTTGTACTCGGCAAGTCGTTTTTACCTC -CTATAATTTATATTATGCAATTAAGTTTTCTTACCCTTTCAGCTTGAGCGCTCATAAGGC -AATAAATATATACAGAAAGACACCGATCCTTGAATATATCTTCGAAGACGGTACAGACAT -GTTATTCATTGCGCCGTTCAGCCCACGTTCCGTGCATTTAGACAATACATCAAAAGAACT -GATTTAGTGTCTTCGAAATATCCATAGGCCCCCTCTCTTTCATGACTGCTCGCTTGGTAG -GACGATCTCCCATTTCAGGTGGTTCATTGCGCTTTTTCTCTCTTCCACGCTTTTGCTTTT -CGATCTCGAGCTTCTGTTTCACAGGATCTCGAGTAAATAATACAATTGACTTGGAATTAT -AGCGAGAACGAATAACGACCCTGTCTCTTCTGCTGTGGTCGATAGTTGGTAGCTTGAACT -CCAAAATTGGGGTATTCGTTGGTAGGGATGATGGTCTTCCTAAATGCTCCTCTCTGGGTG -TCTGGAGACTAGACTCGTAAACATGAAATAAGGCCGAGAGATTATCGTCGATTGTCTTTG -ACGTTCCATGGGTTTCCCAAAACCAATCGAGTCTGCAGGCCGACCATCCCTTTATGGGAA -CGATGTTATTCGCCGGCCTATCGTATTTTTTAACCACTTCTGTGATCCTCTTACACTCAT -AAGCCACAACGAGTTGAAGCTCTGGCAGTTTCCTCAAAGCTTCCAGTGTCGCAACGGTTA -TATCGTGTTGGTGGTAGAACCTGAGAATGCGGAGGTGCTGTAGTGCTTGGGATTCGATCC -AACCACGCACAAATCCATCTTGCAAAGGAAGACCATCATCACTAGCACCCACAGGATCCA -ACGGGGCGATATTTGACAAATATGTCTCACTATAAGCGTCCATGGCAACCAAATTCTTGA -TATTTGGTATGGTAGTCAGGTCTGTGGCAGTACAGTATGTGGTTGCGATTGTCAAGATTG -CACGCCAGCGAAGATCATCGCTGTTGAGGAGACCCATGTAGTCTCTCAGTGGTCTTTTGG -GACATCCAGCATTAAGGCAGTAGTAGGGACTGACCTCATGGAACTGTACAGGATAAACGC -TGGCCATTATTTTCCACATGTGCATTGTCTGCTTATTACTGGGTAGGCGGTTAGGCTTGC -GCAAACAGGCTTGGGAATTTATCGAAAACGCTTTGAAAAAGCCTTGGATTGGAGCTTACC -ATTTTTTGAGGGAATCCCAAAGGTATTTTGCCAAATACCAGTGGACATATTCGAACAGCT -CAGGCTTCAGCGCCGACTGATCCCGCAGGAGCGCTTTCATTGCTGCATGCTTGAGTGATC -CAGATCCTTCATCATTGGTTGCCTCCCATCCATGTGGTAAGAAGGGAATCTCATGATACC -ACGTGGGGCCGAGACGTAAGAACCCGGGTGGTTGGTTGTCTTCAAGGATGATGGACATTG -CGAAATGAAATCAACGACAAGAACAAGCAGGATTGTAGGGGAGGCTGATGTCTGCTCGAG -TTTTAACAACAAAATTTGAGGATACGCGCGTGTCTGGAGTCAAAACGGGACTGATTTGAT -GGATTCACGTGACCCTGTGTATACAGGCAATGAGAAAACCACAGGTCACTAGCAGCCTCG -GGGTAGGAGTTTGACTTACCCGTCGAATTATCACAACTTTCAAAGGAAATTGTCTTTCGG -CTGACTATCCCGGTCTTAGTACACTTTGGTCATGACTTGTTCTACCTTTCTAGCTAAACA -TCCCGCTTATCTCCGTCATGTTCGTGAAGTTGCTTCTGGGGAACTTGTAATGCCGGAATA -CGAACGTGTGAGTGACTACATTCAGGTTTCCTTTGATGAATCGAAGTTATGTCGACGGAG -CGTACAAGGGACTGACCAACATCATATACAATTAAGGATGAGAAAAACCGTTTGATTCTC -CAGGTTGGCGAACAATACTGTCGCTATCCAGGGTGCACCGTCAGCGAAGTGAGAGGACCA -GACCCGGAACATATGTTCAATGAGCATCGATCTATTATAAGTCGATAGAAGGATTGTTCG -GCTTACTGTTCTTAGCGTCCTTACCGGACTGGAAATTTGCGAAAGCATGTAGCCCGTCAC -AGTGAGGATTTGCAAGTTTCCGACGGTGTCATCGGTCGCATCAGCTACATTGAGCAAGAA -AAAGTTCGAGGTAAGTTGGATGATCTGATTCCCCAAAGCATACCCAATGAAATGTTTAGC -TTGGTACAATTCTCTATTTGCTTCCTAGCCGGCCCCGTTGCTGACCTCTTCTTTGGTCGC -ATTTAGAAGCGCAAGATCAATTGCAATACCGGTCGAAAGGCCAATACCTGAAGAGTAACT -ACGGAGTATACGATTTATAAAGATGGGGTGGGGCTTAGGCGGGCGACTGGGTGACTAATC -AATGATACATTATTAATATATTATCCTATTCGTCCGCTCTCAACAATGTTCCACGTTCCC -TTGAGCTTAAATTGCTTAAATTAAAGTCGGTTTTGTCTTTCAGGGCATCAAATCCTATAG -GCTACATATAAGCGAAGAAATAAAAGAATAACTATCAATCCATAGATGTGCAGACTGAAT -TTGTCAATAATATATTATTAACACATTATTAACACATTATACGTACTAGGTAGTTGTTTA -TTAGCACTCATCCAGTATTACCTAGTTTCTTTTTGTTAATGCATATAATGAAACCGGTCC -TCGGTCTTCTTCGCCATCCATTGCTCTGTCGCTTCCAAGATTGCCATTGAATCAATTGAT -CACCTCTGGTGTCCATTCCATGCTGGGAACACCGGTTTACCACAGTCCCTTCCCATGTCT -GTCTGGTCATAGGAAGAGCGATGCCATCTTTGGCTAGAAAGTGGCCATCAGAGCAAACTT -TGCCTTTTGGACAGTCTACAGTAATATCATAAACTGCTGTTCAGCATGATTATGGGATAA -CACCAAAAGCAATGTGATGGACTCACGGGAATTTCAAGTTGATTGGTAGCTGAAAGGATA -ACTTGTGCAATCTAGGGGACAATACCGAGATCAAGACTTATAACCAGGAGTTTATTCGCT -AACAAGTGTTTTCGATTTACGGCACGGAGGAAATCTATCCATACGATCAAATGGATGTGG -ATTTTTGTAGCATGGGGTTCAGCATTGAGAACCTTGGGAACATATAAAGGCCGACATATG -AGAAATAGCTAGGATAGTGACAATCATTACTAGATGTTGTCAGTTTGGGCTACCCCTAGG -GCGTTCATGCACATATTACCTTAGCCCTGCTCATTCCTGCGTGCATACCGACTCCAGGAA -CGTACTTTATTCCCCTTCCAGTAGAGCAAGTAAGGGACAGGCAAAAACAACAACGAAAGG -CAACCCAACAGAGTCAATGTCCACGCAACGCCAAGGTTGTTATACATGATTTCTGCAATG -ACAGCCATAATGCCGGCAGCAACAAGTCGCGTCATCTGAAGACAGGAAAGCGCGCTTGCA -GCATGTTGCTCAAACACATCGGCGACGTACTGGTAGGCCGAGACGAAAACACATAAGGTC -CCAAAGCCGTAGAATACCGACGCGATGAGCGGGCTCCAGAACGAGATTGAGAGTCTGGCC -GTCCAGGCCATCCAGAAGAGCGAGATGGGGATAAAAGGCGCACCGAACATAGCCATGTAT -AAACTGATCTCAGGGTCTGGGCGCTGCTTGCCCCTCGCACGACTGAGGTAGATCTCCCTA -CGGAGGAGCCACATTGAAATTGGAAGTGTCAGTGCCGAAAGAATGACTCCGACCTCAATG -GCCAGGAATGCCAGGGCCGTCGAGGTCGCACTGAAGTTGTTTGCCTTCTGATAGATGGAG -ACGTAGCCCGCGATGAAGGTGTACAGGATCAAGAAAAGGAGGCACAGAGAGAGGGAGTGG -ATCATGATGATTGGTTCTGTCACAAAAAGCACAAGTGGTCGGTATAGCGAGGTACGCATG -CGGCGGGCGAATGTGGCTTTTCTGAACTCCAGCGGAGACCGGTACCGGTCGTCACCTGTC -AGGCGGCGGAGCTCTTTTGCTTTCCAGTACAGTAAAATCGGACTGTATGTCTCGGGCAGG -AACAGCACAACCGGGATCAGGATAAGTCCTGCGAGAATGATGGTCATCCAGTCCACCCAG -CGCCAGCCTGCCGATTTAGCATGAACGGCGAATGATCCTGGTACTGGGCCCACCAATGGA -CCCGTGAATATAATTATCGAGAAGATTGTGAAGATGTAGACTCGTTCAACGCGTGACCAG -ACGTCCACAACGGATGCTGCTGCTAGAATTGCCGGGGCAGATCCAAAGAGTCCAGCTAGA -CCGCGGCAAACAATTCGCTGTACCACTGTTTGGGAAAGCCCTGCGCCCATGTCAAACAAC -ATGAACCCGATCATGGTGGGTATGTACACGGGAGTGCGTCCAAAGACTTCTGATATAGGT -CCCGTTATCAATGCGCCTACTCCAGCGCCAATCAAGAACACAGCTAGAGAATCTATGTCA -GTCTGGTTAAAGTCATTCCAAGATTTATCCACGTACCAGTCGGCAAAGTCTCGACCTCAA -AGTTAGTATGGTACAATTTCTTGGTCGATGTGAGCGCAGTGGAATCAATCGTGGATGACC -AGAAGATTACACATCCAGTCATTGAAGTGAGAATAGTCGTCCACACTCGGCGTTTAAGTG -GCCAGTTGTGAGGATCCATCGTGTCATAGTCGCCTTCAAATGTGACGATGATGAGTTTGT -TCTTCTTCACTGACTCAACATCAGTTTCGCTATCCGTCGAATAGCCCGGCCGGGCTCTTT -CGACGCCAGTCAACATAATATCAGCCGTGTTTCCCAGCGTCTCTTGTGAGTTGATGGTAT -GCGGATCGGCTGTGATTTCCGGATCGTACTCCGCACCCTCGACATCTCCTTCCCGCTCTA -TATGCGATCGAACCGTATGACGAGGTTGAAGTGATGGCCCAGTGATGAAAGTACGATGGC -CGTGTTCTCTCCTTCGTGCGGCACTAGGGTCGTCACTTGGCTCGCGAGCTTCCGTGACGA -TTTCTCCTTCGCGGTAAAAGTAGCGGCCCTCGTGGGTCCAGACATTCTCGGGCTTCTCAT -GCTTTGTAAGAAATTGTTGCTCGATCTTTCTACGGATCTTGCGGTGCTGGCGAATTGACT -CCATTATGCTTGATTCGGGGTCTTGCGTTCCAAGGTCGAATTCGCCCTTGTTACATTGAA -AATTCAAATTAGTTGGGGAGGTTCAATATCTTCGGAGGCAAACAATGACATATGACGGTC -CTCTCCCTCTAAGGAGATTAAATCCGTACAGTGCCATATCCATATCACTCGGTCAGGTCT -TCCGCCAATGACAAATCCCATCACATCAAGTTCATAACTTCATATGTTTGGCTGATGGAG -AGCTTGTAGAATGCTGACCTTGATCAATAGCGATAGCTGTGTTTCCCCATAATGAAGCAT -CGTAACGCCAAAAGGACATGTGATGTGTCGGTGGTGCAGATACCCAGGTAGGCAATTGTC -TACGATGATGTTAGAAAGACTGATCCTTATCGTCACTGGGCTAGGTAACATCCATGGATG -CAGCTCATATTCAATGGTTAAACAGTAGCAGGTCACAGGTTCATAAGGTTTAATCACAAA -TAGTGCACCACCCACAGGCCAAATATGCAATGGAACACCGAGGGTGGAATTACATACATG -GCAATTCATTAAACACAAAAGCATGCTTCATTGAAAGCCAGGCAAGCATCATTCCCTGTC -TTCGTTTCCAGATGCAATTACATCACATAAAGGATATCGGTATCTGTTGGAGTCGTTAGA -ATTTGATTGCGACCAAGTAGCCCGAGAATCTTACCTAGGGGATGGCTGCAGCTCAGCTGT -GGCTTCTGGGAATATTTGGACTCGAGCTCATCACGAATGCTGTTCAAAAGCTGAACATAA -CTTTGCTGAGGGTTCTTCCGAAGCGCAGCGATAAAAGCCCAGGACATGGCACCGGTGGCC -TGGCCGGCGATCTGGGCATCCTGACTGGTCTGGTCATCCTTGCTGCCGGACCACATGATG -ACATCCGCGGGACTAGTCTTAGTCTGCTTGTTGCGTTCATACACTTCGTCACCCTTGGTG -GCCTTCTTGATGAAGCCCATGGCAGTTGACATCATGCTACCCATATCACCACGCGCGTAT -GCTGACACCACACCAAGCAGGCCCTGTCCAGCCTCCTTGGCCAAGTTGGGTTCCTTGAGT -ACACCGGAGGTGGAGTAGACGTAGGGGAGATCCAGAGCGGAGCCAGAGTGGCAGGAGTCG -AAGATCGCTGTGAGCCGGACTCCCGGTTGTAGAGTCTGCACCATGATACGGTGCATCTCA -TCATCAACGATGTGGCCAGCAACCCGGAAGTCCACCGGGTAGATAACCTCATCATACCCA -TCCTCTTCGTCGCCGTCTAAGTCGGGTGTTTGACCGCCGTGACCTGGATTCTTGTTAGTT -CGCCACCATAGACACTACTGAAGCACGGCTACTCACCAGAGTAGTGGAAGAACAATGAGT -CATTGGGACGCGCATCCTTGACCAGCCAGTGCATAGCCCGAAGAATATTTGCCTTGGTTG -GCTGGCTCATCGGATTTTGCTGGTCATCAGTCAAGAGGACCATGTCCTCGCGAGCATAGC -CGAAGTTCTGGTTCAAGTACGCCGACATATTCTTCACGTCATTGATACACCCGCGCAGCT -GACCTTTCTGACCGAAGTAATTGATGCCAATCAACAACGCCTTTCTCTTGCCCGTACAGC -GGGAGTATTGGAAGTTATAGCCTTGGGGAGCGCCGTGGCCGAAGGCGACGGGGTTGCTAG -GAGGGGGAGGTGGACCGCCGCTTTGGCCATAGGCTAAATTTGTGCATAATTAGCTTTCCT -GTTGAAAATGCGATTCTGATTTGGGAAAAGTTAGACGGACCTGGCCTTCCTTGCATTGGT -GGCCCAGAGGGTGGGTGCTGGTTGTACCCGGACGGTGGCGCCTGAAAAGAATTGATACGT -GTCAATCTTGTGTATCTTCGATACCAAACTCGTCAGAACATACATTGTAATGCTGAGGGG -GCGGCGCGCCGTAGGGCTGGTGAGATGGCTGGGACGAATGGCTGTAACCGTATGGGTTGG -ATTGCGGCGGCGGCTGGTGCGACTGCTGGTGGTAGGACTGGCCAGGATAGCCGCCATACG -ATGGCTGTTGATGATTCATTTGTCTGCAATCAAATCTTTGTCAGACTCTCTTTTCAATAG -AGCAACAGGATAAACAAGCGTACGGGTACTGCTGTTGCGGGGGATATGACGGTTGCTGCG -GGGGGTACCCCGGAGCGCCCGAGTAAGGTGGATAGTAAGACATCAAAGGAGAGTCCTTAA -GTCCAGGGAATCAAGCGAAATGCAAAGATGTAAAGAACAGAACGCGTTGAATGTGGAGAA -GGGGGAGATTGAAGGCAACACAGATCGGAAGTACGGCAGCCGCCAAGCCTGGAACAGTAA -CCTTGAAAGAATATCCAAAGAATAAGATCAAAGTCACGGGGAGATAAAAAAAGAAAGATC -TGGAGATCGTCAGATTAGGAACTCGTCATATACATACTCCGAGGTTCGTCCACTATCTCC -TTATAAGGCTGGATACAATAAAGGAATCAAAAATCCCCCAaaaaaagaggggggaaaaca -gaaaatagaaagaagaaaaaCAAACTGCACATCTGGACCTTTATATCTGACATGCATTCA -TCGGCAGCCCCGATCAACCCCTGCCCACTCCTAGAAGCCCAAAACCTCATTGGGCTCCAC -TAAATCCTTGAAGATTTCGACTTTGATACAGTGCAGGGGAAATATACGGAGTAATAATGG -ATCACAAAAGATTATACATATCCCAGCGCTGTTCTGCACGTCATACAGCTCAAGGCGATT -CAATAGCACCGTCTTGGCGTGGGAACTTAGCCAATGAGATGTAATATGGGTTAAAAAGGT -CTCCAGGCAATAAGGAGTAGGTAGATTCCAAATAAAAGAAATAGTACGGTGTCGAATATG -CTCGGATCGGCTCGGCCTTGGAAATGCATCTGATATATCATCTGCATGCAATTAAGTTCA -AGGAATACTTTTATGAGGACTATGTTTATATTGTATGTACTCGGAAAGACCGTGTCAAGT -GGGGTGATCTCTAACAACTCGTCTTCTATTCTCCCTGTACTTCATATTCAATATTCATCT -CACAAACACCGGCTTGAAAGCCACGAAGGTCAGAGCAGAGTTCACTTCAACTGCTTTATG -GCCTCATCAATCATGCAGGTGTCCTTCTTTTTTGGGGGGGAATATCATCGAGCATTATCC -AGTTCTGGTCAAACTCGTTTATCCTCGTCGGGGCATGCCGCGGCCACGACCACGGAGGGC -GCGCTGGGCAGCACCTGGCGGCATACCGCGTTGTTGACCAGGTTGAGCCGACTCCTCATT -CTCTGTTGGGGAGGAGATACGGGTTAGTTATGATATAGAGGAAGGTTGAAAGATGAAGGG -AAAAAGTTAGCAGGTCACTTACTTATCGTCTCGTCCACACTCAGGATCAAGCAAGAGGCT -TCCACTGCGGCCTGGATGGCATTGACCTTGACGAGGCTGGGCTCCCAAACAAAGGCGAGC -ATGTTGTCACGAACACCCTCGTTGTCGAAATCGACACCAGCCCAGGTGTTACCCTTGCGG -TGCTCAACACGCAAGCGGTTCAGGATATCTGTAGCGTCGAAGCCAGCGTTATCACACAGC -TGGCGGGGAACAACCTCCAGGGCCTTGGCGAATGCCTTGACAACGGCCTGCTGCTTGTGT -GGGACGTTACGGTCGGCATACCCGTGCATATAGCTCGACAACTCCATTTCAGTAGCACCA -CCGCCAGCAACGATGGTTGTGTTACGCAGAGCACGCTTGACAATCATGATAGCATCGTGC -AGACTACGCTCCACCTCCGCAATGAATTGTTCAGCTCCACCACGCAGGACAAGAGTGCAA -GTCTTAGCACGAGGGCAGTCAGAGAACAAGTTGAAACGCTCTCCACCAATCTGACGCTCC -TCGAAGGCGCCGCAGGTACCCAGGTGGCGCTCCTGGATGTCAGTGCATGTCGATTGGGTT -GCGGCTCCGGTAGCCTGGCAAACACGGTCCATATCGTCGGAGGCCACACGGCCAGCACAG -AAGATGTCTCGGTCCGCAAAGTACCTGCAAATCAAATAATTAGTGTCTAGACCCATACCC -TGTAGCAAATAACAAGCACGTACTGTGTGGCAAGATCGCCAATGGGCAGCTTGCTAAGCA -CGACCTTCGCTCCTGTCTTGTAGATAGCCTCAAGCTTGTTGAAGATAATCTGCCACTCAG -CATCGACGATGGCCTGGTATTCCGAGACCTGCTCGACACGGACTTCGGCATTGTCCTTCT -CGCTCTTAAGCTCCAGCTCGACGTTCAAGCACACAATACTAGGGTTCTTGAAATGTTTGG -GCTGCTGCTCGAAACCGGCGTAGGAGAAAGTCTTCTTAAAGGCGACACCGTCGACGAAAA -GCGAATCCTGGAGACCACCACCAGTAACCTTCTTGACACCAATCAACCTCTCGTTGAGGT -CATCCTGGTCCAGCGAAAGTACTGCATCCACAACCACTATGGTACCATTTCGTCAGCCTC -GGGGAATTGCAACCACTAGGTAGGGGATCCTCACTCTTTGTGAAAAAGTCCGAATTCCGC -TTGATAAGCTTGCTGTTCATGGCTGTTGCGGCTAACCGTCTTAGTGTCTGAACCTTCTTC -TCCTGGCTGTCTGAGGCATTCATCGTGTCCACTGCAATCTCCTTAACCTTGTTGACTGCC -ATGGCGCTCGCCCTCCGCAGACCCTTGATAATGGTCTGCGAGCTGACGCCCTGCTCAACG -AGGCCTCGCACCTCCTTCAAAATCTCACCAGCCAGGACAACCACAGAGGTTGTTCCGTCA -CCGACTTCGGCATCTTGGGACCGAGCGATATCTGTGAGAATGCGGGCGGCGGGGTGCACA -ATATCCAAAAGCTACCAAATACGCGAAAGTCAGAAATCACAAACTCGCTCCGGTCAAAGA -TTTTCATCGGTGGCTAGACGCACCTTCATCACTGTGGCTCCATCGTTTGTAATGGTTTGT -TTGCCATTGGCATCCACAAGCAGGAGATCACCACCGTAGGGACCGAGAGTACTCTTGACT -GTTCCTTGCACTGCGACACAGGCGTTGATGTTGGAGATAATCTGACCCTTTCCTTGAGAG -GCATCGGTGCCTGTAGATTGCAAAGCAGCAGTAGTTAGCATAAGTCATAGAATCCCACCA -ACGAACTGCCCGCACTGGAGACAACACATACCCTCCTTCAACACGACAATCGTGGGGGTT -TGGCCGTTGAAGGCCATGATTAGATCGGTATTCGGACACGAAACCGTTCAGTAAAAAAAT -AAAAGAAATAAGAAAAGGGGTATGGTGAGTCGAAAGGTCGGAGGGGTTGATCTGCCAAAA -TTTCCCAGAGCTCCTGTTCCCCAAAAACCCCCAGTCGCGCAAGGCGGTGACCAATCAAGG -CTCCTCAACCCTACGCATTTGAAGAGATGTACACAATCTATATTGAATATAGAGTTTCTA -TAGTATATATTGAGATATCTCATTCGATTAATATACCTAAAAAGGAGTACAGTGTATGTA -GCATAAAGCGTTATATCACTGTCCACGGCAAAATAGTGGTGGAGTATCAGCGGTATCTCA -GACCGATGCTTCAACGCCAATAGaacaaaagaaagaaaagtaagaaaggtaaggttaaga -caaaatagtttcgaaaagaaatagaCCCCCCGAGATAAGAAACCGAGATAAAAAGTTTGA -AGCATGTTCACGTAAAGTCATAATTCCCATCTGAGGGACTTCCATACTCGAACTTCGAGA -GTAATTCAGCATCATGCTGTAACTCATGTGAAGGTGGGTTCACAAGTCCAAGCACGTCTT -GTCGAAACGGGTCTGAGCCATCATCTACCATCATCTGGTAGGCATTGCTGGCTGTATGAT -AGCGCATTGCATCGTCAAAACTTGACATGCCGAGATTGTGTGGGCCCGGTGCGAACTGGC -CCGTCATGGGGTTCTGTTGCTGGTAGCGTCGTAGCTGTTGGTGTATTTGCATTATCTGGG -GGTCAAGGGGCATGGGGGCGAACCCAGTCATGGGGTGGACTGTGCCAGGCATGGCTGTAG -GAATTGGCTGGTGCATAAGAGGGTTGTAGCTGGGAGGCTCATCGGGTGCTTCGTCCATCG -CCTGCGGGACATTGGCTGGCTCCTTCGTAGGCTGTTCCATTGCGCTATCCATATCCATGC -CAGACATTGTTTCCGATGGTGATTGATCATCATCTTTACTCCCAGGAATGGGGGGCGCCT -TGAGAGTCTCTTTGCATTGTGGGCAGTCTCGAATCACGCGATTGACAGTCTCTTTGATTC -GTACCCAGTGGTATTTGACCGCAATAGCTGCCGTAGTTTTGTTGATTCCGGCATGTTGTT -TAAGATGAACTTCTTGGGCGATATCGTACTGCTGCTGCGGGTCTGACACCACCTCTTTAT -CCTTGAGCATCAGCCTCTCAGGCTCCCCATCGTCCCCACTAAGAAGTTTATAGTGGGTCG -CCGCACTGCGAAGTCGACTCTTCTCTGCCCGATCAGCACCACGAGGGTACTTTCCGTGCT -TCAAATAATAACGGATTTTCTCAAAGCGATCCTGGGACACAGAGTCCTCGCCATCCAAAC -TGAGGCCTCTGCCGTAGATAATTGCGTCGACAAACTCCCCGGGTTGAGATTTGATACTGC -CTCCACCAGGGACTCGGCCAATCCTTTTGAAGCCAAGGGAATCCCAAAGGCGACAAGACG -CAACGTTGGACTCATACACCAAGTTGAAGACTGCATATGTATATCCCTGGCGAACATTAA -CTCAGAGCCCGTTTGAAGCGACTGCGGTAACGAAACTCACTAATCGGGGGGCCCACTCCA -GATAGGATTCGCCCATCAATCTACCCACGCCCTTGTTTCGAGCAGCATCAGTGACAATGA -AGGTGCCATTGCACACATGACTACTACGGCCAGGGTAATTAGGCCGAATGTGAAAGCCAC -CTAGACAGATCTTGGTCCAATTAGCTCCAGCACGGTCCATCGTATGTGTCTGCTCTGCAC -TCTCGATGTCGCCAGTAAGCATGACCACACCAAAGTTCGAGAACCAATATCGTCCGAATT -GGGCCAATGGAATGGGCTCTGTCATAGCATATGTATCTCCCTTTTCAATCTCTTTGTTAA -ATTGATCAGAGAGATACTTCATTAGAGCCCGGGGTACTTCCTCTGCTGAGGCAAAGGGGA -CCAGGGTCGCAACCGTAACACGATCTCGCAAAGTTACTTGTCGCGGGGCCAGAGCAAGTG -GATAATCTAGCTCAGGCCCCGATGGAACAAGCTCTGAATCTCCATGAACTTCAGACATGG -TAAGGTGGCCCTCTGGGGGTTGTCAATCACACTGTTTGACATGTTAGTTGAATGCATTCT -ATTCCCATATCTTTGCGACACAAAACCTCATAGATGTATACAAAAACAAGAAAACAACTA -CAAGGAACTGAATTCAACAGGGGGTCTGTTCACTGACATTCTGGACTGCTTCTAGAAGCC -CTCGATCGCACTTCGAGGACCGCGTGAGTATTCTCGAGTCCGGAATGGCCGCAACGCAAG -TCGAGAAGTCGTGATCCTTGCTGACTCATTTCATTAGATAAGCCTATCTATTTGTTGCCT -GATTCCTCAGGTTTGATATACAATGAATTTAACTAAATTACTAAAAATTAACGACGATCA -AGTGCTGTACCCATGAGTGTGATAAGTGAAGTGGCGAGTCTGATTATACTTGAATGGCTG -GTTATGAATGTTGAGCGACAGGATGTAAAACCTGAGAACTAGGGCAAAGATAAAAGAGTG -GGATTTTAGTAGTATCTAAATTTGAATGAATATAATAGTTCTAGACATTCCGCGCGATGA -TGTCTTCCTTCTTTAATGTTTTTATTATTTTAAAAGTATTATGTCGTAAAAAATAGCGTT -TCGGTGTATGGCGTAGCCCACGATGCTTAGGCCAAAGCCATGGGCGCATGTATATATCCC -TCAGGTCTCATCGGTATCATCCGACTTGATCCATGCTCGATATACGCCATGATCACTGTC -TGCATACATCGTGTCAAATCGATTCTAACAACATCAACATATATATTCCAAGTGGAGTGT -GCATTGACATTATTATTGTGGCAAGATTGCGATGTACTTGAGTTTCTGTGGATCTAGTAG -GATAATATCTGATTGAGACTTGGAGAGCATACATGCCTAGCTTTAATAGTGGGGAAAACG -CAAGGATAGGCTTTTGAGGCCAGCTGAGCCGTCCAGCGTTACGTTATGTGAGAAAGTGAT -AGGTCCCCGTCAGAAAGGTAAAGATTCTCGGACCCGTAGAGCTGGGCTTCTTTTCTGCAT -AACACGCCATATCTTAGCAAATTCGCAGTATATAACTGCATGTAAGTCCGCCATGGCAGA -GCGTCCGCAATACTTCAAGAACACGTTAAATCCGCTCCCAATCAATGCGACCACGCTTTT -TGAGAGCCTCCAAGAGCTTTGTACTGCTGTTCGCAATGGTGCTGAGTTGATACAGCAGAA -CACTCTCTTGCCTGAAACCTGGGGCCCAAATGGCATGTTTAAAGCTTTTCCCGGTATTTC -AAGCTCAAGCCCCACTCAAAACTACCATCTAACCGCTCAATAAAGGCATTGCTCTTGCAT -TCCTACGCCTAGATTATCAGTCATCGGTGCTACGGGGGCACAAAGCCGCTGCTCCTGACT -ATCGTCGCTACGCACTACAACGAATCCCATCAAGCCTTCCAGACACTCCTCTGCTAGCCT -CTCGATTGTCTCCAATTGGTTCATCTTCGCCTGTGACTGCTGTCACTCTGCGCATCTTGA -GTACTTTTGCAAAGAATAATTGGCAAGATGGCGCGCATCTCAGCATCACAAGGGAGGATG -TCACTTGCCTCCATGATGCCATGCAGTGCGCATTGAAGAACGAGCCTTTTGTTACTCATG -ATGGCCGAAAAATGGGTGGAGATGAAATGCTGTTTGGTCGAGCAGGACTTCTTTGGGCTC -TTTTAAACGTTCGTGCTCATAGGTTTGATCAAGAGACCCAGGATATCCTCTCACCTATAC -TGGGGGCAATTCCAGAGTTGGTCCGAGTGATTATCGATGCCGGGCAACAGGGGTCCAAGG -AATACACCGAGAAGAATGGAGCCCAAAACGCACATCCATTGATGTATGCATGGATGGAGG -GCCATTACTGCTTTGGGGCGTAGGTGCATCTCCAGAGTTCTTAATCGTATGCGTCAGCTA -ACATAAAATGCATAGGGTCCATGGAATAAGTAAGTCTACAGGTCATTATTTGGGCTTTGC -AAAATCCAATGCTAATGTGATTTAGCCGGTATTCTGACCATCCTGCTTTCATGTAAGCCC -GAAGAGCTTACGGACTATCTTCCTGTGATTGGTGGTAGCATCACTGCGCTCTGCAAGCTA -TCTGTAGCTACCAATGGCCACCTCCCAATGACGCTACCTCCATATAGCTTCGGGCAACGC -TCAGAACTAGTCCAATTGTGTCACGGGAGCCCAGGACTATTGATTCTTTTGGGAGCTGCT -TTGAAAAACACATATTTAACCCGCGCCCACTGGGATCCAACTTGGGACCAAGCCATATAC -TTGGGCACTGAGCGTGTGTGGGAAGAAGGGCTGCTTTCGAAGGGAGGCAGTTTGTGTCAC -GGCGTGGCTGGCAATGCATGGGCGTGGCTCCTCCTCCATGATTGCTTTGAATATCATTCT -GAAATTCTGAAGGATGGCCGGTCAGCTTATCTCAAGCGCAATCAACTATCAGCACTTCCA -AATGCTAAGGTGAATCGGGAACTCACCAGTGACTTCTTCCTCTCGAGGGCACTTGCATTT -ATGCTGCATGCGCGTGAGACTAAGCCATATAACAGCTCGCCCACATCTTCCGACAAAAAT -TATCGCATGCCTGATGAGCCTTACGGTTTGTTTGAAGGTCTTGCAGGCAATGTTTGTGCG -TGGGCTGATACGTGCGCGGTAATTCAAGCCAGGTTGCGCAAGATTGAACTTGCTAAGCAG -AGTATATGTGTTACATCTGGCTTATCACGAGATCCAATTTTCCGGGATATGTTCTCTAGA -CAATTGGGCTTTCCAGCCCTGGGTGGAAATGGCGCAACGGGTATTTTCTAAATAGCCCTT -GGGCTATTCCCCACATGTTCGTACAGTTGTTTTATCGAGTGACGTCACATGCCAGTAAGA -TACATGGCCATAACTTTCTCCGGAAATACGACAACAACATATAAAAACAAGGGTCTTGTC -TTTGAATCCTACCAATGATTCTTCCTTGAGGGTTACCGTAAAATTGTGGCTGAGGCGGGC -AACTAATGGGGCCAAAACTCTTTCCACCTTCGAAATCTGTATCCTTGGTTGAGAATTTGA -TTATAGTAAACATGGATCTCATTCCTCCCTCTTATGAATCCGCAACGGACAGGGATGCTT -GGACAATCATTGCTCGCTACATTCCTTCCAGCGACCTATGCGCTGCATCTCTAGTCTGCC -ATAGATGGCATGGTCTATTTATGCCTTTCCTCTGGGGTGATCCAGCATCTCACTTTGGCA -CAGATAATGATGCAGTCTACGGTAAGTGGCCTTAAACATGAGTGTTCAAAAATCCTAACT -AGATCATACAGTCGCCCTCACTCGATTTCGAAGGACACTGAAATATGCCCGACAAGAGGT -GCGGGCGTTGACCCATACCCTTCATCTTCCACCCGCTCTCTCGGAGATATATGGTGGTCC -GCGTGCAGGCTGGTTGAGGGACATCCTCGAATGCCTGCCATGCCTGCAGTGTCTAATGGT -CTCGAGATTGCCTTTCTTTGACCATCATGCCATGACAGCATTGAAAACCGATGAGAGATC -GAGCCAGTATAATATACGGCTCTTGTTGGCTGAACGTGAGCCGAATACTACTTCCACGAG -CCTCGCAGAGACTCTCTTCTTGTTTCCGGCATTGACCTACCTGGATCTTTCATACACCAC -TCCAGCACGAGACCGCAATGTACTATCTCAACTAGCCCAAATGCCCTTTCTGAGGGTACT -AAAATTGCGTGACATAGGGCTGAAGGATAATGACGCCGAATTTCTAGCTAATGCAATTGG -ACGTCGAGTCCGTTGTCTAGATTTGAGAGACAACATGCTCACTGACATGGCAGTCCGCTC -TCTGTTACAGGCATCATTTCTCCCGCCAAATGAGACGGAAAGGCAACAAAGTCCAAGACA -GGCCACCGCTTCGCCAGACCCCTTCTCCCACTTTACAATCAACGGGTGTAATTCAGGTTT -CTTCAAGCGGCCAGACTTAGATGAACAGTTTGTGAAGCTTCTCGCTCAGCCGGTTTTATG -CCATCCGTGGGTTGAAGACTTACCGCATACGGGAATCACTCATCTTTATATCGCTGGTAA -TCAAATATCCGTGGAGGCTGTAGCGAGTCTTTTGCTCTCGTCCCGGCTATATGCTCTGGA -TTTCGGGACCATAAAATCTCCAGATAAGGCACACCAGACGCACCATTATGGGAAGGAAAA -GTATCCCGGAGCTGAGAAACTAGTACCAATCTTGGGTAAAGTCGCAGGGGACAACTTGAT -ATATTTCCGGGCGCATCACGCTGTTCTCACTGGCCAACCACCAGCTAAAGACGTTGCATC -CAGTGTTGAACTTCTTCCAGAGCTTTCAGCAGAAGAGGGCCCCCGTGAAGTCGAACAATT -CGAGTTGGATGGTACGCAGCAATTTTATGAATTGCCTGGAGAGACAAGGCCCGTCTTTGA -GCTTGCGGATACATCAACAAACAACTCAATTAGCCCATCTTTGATGACAAAAAGGCTTCA -GACTTCCCAGCCATTGACAGCCTACGAGGATGAACCTGCACCACTGGTCAGGCGTGGTTC -GGCTTTTGCTCCAGAGGTTGTACAGCCCATATATGTGCATAGCAGTACTAACCAAGGCAC -GCCCTTTAGTCCTTCTGGGACAGACACGTGGGATGTAGGGCAGCCCGAGGGAAGCACCTT -GCCTTCTATTTCAATGTCTTCGGACGGCGACACTTTTGCGACCGACTTCTCTCTGAGTGG -ACGATTGCAATGCAGTTCCCCAATCTCATCAAATGATTCCCTTGCACGAATAGTACAGGA -ATTATTGGCGAAGCGACCGCATAACCAATCACTTCCGCTTCGGGGAGGAAAAGAAAGCTG -GTACCCTTACCTACACCCATCACATATTCCTCACATGGAATCTTTGGTACTTACAGACGT -ACCCTCGCATGTGACGCCTAACTCATCGATATTCTCTGCGCTCACTCGGTTCATCACTGC -GTGCTCAAACGAAGCATTGCTAGCGACTCTACAAGCAGGGTCAGACTATTCTCTCCCGCC -AGGGCAAGCGCGCATGCAAGCCGAGCAGCAGCGGTCACGATCTCTCTTTGGCTTGCGGCG -TCTAGTTCTCGAAATTACACCTGTTGATACCTCTAGATTGACATCATGGAAGCCCGGGAA -CCAATATAATGCCACAGGCCATTCAAGCACGGGTGACCGCGATTCTGAGAACCTTTGGTC -TGCAGCTACCAATGATTTCAGCTTCTTTGGCGAAGAAGAGTGCGGAGTCCCTGAAAATGA -TCCTGGCAAATACTTTCCAATGGCTGTGCTTAATGACAAGGTCTCGCTTATGCCGGAGGA -TGACTCGTTGCATTCACCCGATTCTCCTCGACTCGGGCGTCACCTTCCACAAAGCGCCAA -GATCCCTAATCAGTCAAGGACTGCTAGAGATGGAAAGCAGCAATGTGAGGCCCCAATGAT -TGACCTTGTAGCGGAGCTTGCGGCCTTTCGTCGCGGCAAGAAGACCGAGTATGAGCAAGT -GGTTCGTCGTGAAAGGATACGGCGAAGCACGATTAGCACGGGTGCTTCTGGCCTACTTAT -GCCATCATCGCCCCATTTTTCTCTCACACATTACGTCGAAGGTCATTGGAGGGGAGAAGT -GAAGGTCGTGCGGAACCCGATGCCAAAGGTACGGACCGGGATGGTGGACATGTATGGAAA -CTATTTCGAGAAAGGGTACTTGTACCCATGAGTTTTGTCTTTAGCGAGATACCCAGCAAC -TCTACCCAAGATACTTCGAGTCTACGTCAGGCAATCATGTTTTGTAGGAATTTTTGATAT -TTTCTAAGTCCTCATTTGGACCTGAACCATCCTTCCTCCCAGTCATAACAAGCCGTCGTC -AACGGGCTCATAAACATCACAATGCTAAAGAGACATCCAAACTGCCTTCATCAGACAATT -GGTGCCATCTTTTTCATCCTCAGTGTAGAAGCATGGCCGGCAGTTTCGGGACCGAGCATC -ACCCCTGCGATCCGGTCAAGAAGAACACCAATGCGAAGAGTGAAGACAAAATAAAAGAAT -TCATTTCAAACTAGTTCAATTTTATTTTGTTCATCATGCGACATTCATCTCCTTACAAAG -CCTTGGCGGCAGTAGGGGGCTGGAGCTTCTTGCCGTCCTTGTTGAAGCGAATACGGGGAG -GAGGAGCACGGTTGCGGCGGCCTTCGCGGGAGCGAACACTATTTGCGAAATATTAGCATA -CAAAAGGCGATCGAGTATCGAGAGTAATCGGTATAAACGTACCGGACAATCTTGCCGTGA -ATGGCGCAGGACACGCAGTACTGGAGCTTCAAGTACATCTTGGGGACAGAGTACTCAGCG -AAGACGGAAGCCTCGGAGATATCACCTGTAAAACGGAAATCGTTAGTTGCACTGGTTCGT -CTCGGAATTACTCGAAAGAGTCGTAGCCAAAGATCTGGATCTTGTCGTAGATTCTCGAGG -TATCGGGAGAGTTGTCACGTACGGATGGCAGCAGACTCGACCATGTTGCGGATGGTGAAG -CGCTTGATGGCCTTGTCCTTGGGGGTGCAGCGAGCGCAGTTGGAGCAACGCACAGGCTTG -ACGTGGCCGCGGCCATTCTTGTTGCGGCCGCTATCAAATCAATTAGTTACCGTCCCCTGA -AGTTGCGAATCATTCGAAGTCCAGTTTCCCATTCGCCTTGTCCAGCAATGTGCGTAGGTC -GCGATTTCGATGTAATTGGAGATGAATGTCGTGTGCGCTTCCGTCGGCGTCATTGCGGAG -ATGTCGAATGGGATACTGCTTCGCCGAGATAGGGGGATTTGTTGGTCTTACTTGTTCGCC -CTCTTCTTGACCATTTTGACGGTTTGTTCTGTTGATGGTTAGGTGTTGGTCTGTGGGTGA -AGGGGGGGAGATAGCGTACGGTTGACTTGTCGGGAGTGGGTTGTCGAAGCTCGCTCACAC -AAAGTCTCTTTCGGGATGGATTGGATTCCGTTGTGCGGGTAGCCCTAAGCCCTTAAGCGA -GACGGGTCACGGGTCTGATGCCCCAAGGTTAAGGCAGAAATTTCATGAGAGTCACGTGGA -TCACATGGACAGAGTCACGAGCAGTTACCCACCTAATTGGGACTAGCGCCCGGATCGTAC -CGTGTCATTTAGTGTTTACGGCTCCTCATCCCAGACTAACGAAGAAACACCCCTGTTGGG -ATTGACTATTGAACTGCCCATTTTTCTTTCCCTTCCCAGGAATCCTCCTTGCTTTCTCTC -TCCCATACCAGTCTCACAACTTGACATCCTTGTTTTGTTTGTGTCGTCTCCCTTCCTTTC -TCTCCATTTTTATTGAGACTTTCTCCTTTCGGCAGTATTATGTCGGCCACAGGTACTTTT -CGGGGAAAGCCCAATCAGACCATCGGGAGAGGAAGGCTGCCCGACTTCGGAAATGATCCC -AGTGCTTCCCACATCCCCCGACCCCGGCCAGAATCTTCAGTCACAAACAATACACCCTCC -AGCGACGTCGGAAGTGGCACAATGAGCGCCGCAAGTAGCAGACAGCGACAAAATCAGTCG -AAACGTGATGAGGTACGTCATGGCAGCCCACGGGAGTGATCTCCTGATGCTCACTATGAG -CTTTGACTTTGATCTAACCTCGATACAAGGCTATCCGCCGCAAAATGGAGGCCGACCTCA -GCAAAAAGAGGAACGCTCCAGCAAAATCGAGGAGCTCCCGCAAAGCCCCTCCCGGCACCG -TGCTCGCCCTAAAGCCCAGCCAGGCCCTCCAGATCAAGCCCGGCATGAGCATCGCGGAGG -CTGCTCAGCTTATGGCCGCCAAGAGAGAGGACTGTGTGTTGGTCACCGACGATAATGAGC -GCATCGCAGGTATTTTTACAGCTAAGGATTTGGCGTTCCGTGTCGTCGGAATGGGCTTGA -AGGCGCGGGATGTCTCCGTCGCTGAGATCATGACCAAAAATCCGCTGTGTGCGAGAACGG -ACACCAGCGCGACCGACGCGCTCGATCTTATGGTGCGCAAGGGATTCAGACATCTGCCAG -TCATGGATGAGAATCAGGATATCTCTGGTGTTTTGGATATTACCAAGTGCTTCTACGATG -CTATGGAAAAGTTGGAACGTGCATACAGCTCGTCTCGCAAGCTGTACGATGCACTGGAAG -GTGTGCAGACTGAGCTTGGCTCTAGCCAGCCCCAGCAAGTCATCCAATACGTAGAGGCAT -TGCGGTCGAAGATGTCTGGTCCAACCCTTGAGACCGTCTTGGACGGCTTGCCACCGGTCA -CTGTCTCTGTCCGCACCACTGTCAAGGACGCTGCGGCTCTGATGAAGGAAAACCACACCA -CAGCGCTGTTGGTACAGGATCAAGGCTCGATCACCGGTATATTCACTAGCAAAGATATTG -TTCTTCGCGTCATTGCTCCTGGCCTCGACCCCTCAACCTGCAGTGTTGTCCGTGTCATGA -CGCCTCATCCCGATTTCGCCCCATCGGATATGAGCATTCAAGCCGCGCTTCGCAAGATGC -ATGGTCAGTCATTTCAATCAACAATTAAAACCAAAGCATGCTAACTCTGCAACAGATGGC -CACTATCTCAACCTGCCTGTGATGAACGAAGGTGGTGAAATCGTCGGCATGGTGGATGTA -TTAAAGCTCACCTATGCCACACTTGAACAGGTATGTAGATAGCATCAGATCTGACATGTA -GGCGCTTTTGCTGACGTATTCAGATCAACACTATGTCAACTCAAGATGACGAGGGTCCCG -CGTGGAACAAATTTTGGTTATCGATGGATCACGAGTCAGACTCTATGGTCTCTGGCGGCC -ACAGCCAGCAACTGAATACCCCACACCGCTCAGTTGTTAGCCCCGACCTCACTCGCTCAG -GCTGCGACAACAGTCTTCTCCCCAACGACTCAGCATCGCACCACGGTGATGAACATTCCG -AAGTTGTTTCTCAACACCACCCAGCAGAACCAGCTGCTCCTACGCCATTCCCCTTCAAGT -TCAAGGCTCCTGGTGGCCGAGTGCATCGTGTGAACGTTCTCCCCACCAGCGGCGTTACCG -AGCTTGTGTCGCAAGTCACAGCGAAACTTGGCAAGGAAGTCGAGGCTGTTGGTGGCGAGG -CTACCGTTGAGGAGGGTCGGCTGAGCAACACAGGATATGCGCTCAGCTACATGGACAACG -AGGGTGACACCGTCTCTATCACCACGGATCATGATATGACTGACGCCATCGACCTGGCCC -GCCGTACTCACCGCGACAAGGTCAATTTGTTTGTTCACGACCCCGCCCAGCCGCCTATTC -CTGTTGAACCCCATCATGTTAAGATTGTGACTCCCGTCGAAGAACAAAGCGTTGCTGAGG -AACCTGCGTCACAAGAGTCCCCCGTGAGCAAAGCCCTGCCCCAGCAGCAAGCTGTGCCCT -CCCACGCCCCGGATGAGCAATTGATTGCCGGCGTCCCCAACGACTTGCTCCTTCCTGGTG -CTATTGTCACACTGGCTGCAGTCATTGCAGGTGTGTTCATCCTCAGCCGCCCCAGTGGTC -GGTAAAAAAATATCTGGCCCTTGATCTATTTCCGAACCCCTCCTTTTCTCTCCGAATACT -TCACTTTTTCTGATTCGGCTTGGCTATTATGCATTTTAAGAAAGCGTAGTAGATTCTTTT -CCTGTCTGGAGTGCTGCATACCCGTTTTTTTTCTCTAATATCGATATTCCGTGTCGCGTT -CAGATACCGCTGTTCGTGTGAGCAGTTGCGGCCTTAGTTTAGCCTTCAATGCAAATAATT -GTTCGAGATGAATCTTGATCAAATGACTCAGTAATATACATTATCGTTGTAAGCTACCAA -ATAAGTAGATGTTTGACTGGATTAGCAATGTTTCAATTCTTTCATACTTATTTATGCCTG -TTCGAATTTAGTTAGCCTTGTGGCCTGGAATTACCGCATTTGGCTTAGCGTCCAGATGCG -ATTATCGTCACTATGGGCTCTTTATGTTGTCCTTGGATTATTCGTACAGTTAATTGCAAT -TTTTGTCAAGAGAGAAACTGACACAAAGACCTCGACACCCCCTGTTGAATCTACTTTCCT -CTTGTGGTGTGATTAGCCTGGCACTTTCAATCTTCTTCACCCCGAAGGGCCTATCCGTCT -ATCGCTCTCCTAATCAGAAATTCTTCTTTTTTCCCTCTCCCCGCCCCACCAAAAACAAAA -CTCCTCGCCTTCGTTCCACCCTATCCCCACCGAATCACTTCTACGAGGGTTGAAAGAACT -TGGGTTGCTTGGATCGTCTGGAACTTGGGAACAGACACGCCCAACATGTCCCTCCTACCT -CCCGAAATCCACACCGCGCTTTCTCAGCTGCTCCGCGCCTTGGGCACACCCGACAATACC -GTCAGATCCCAGGCGGAAGACCAGCTCAACAATGACTGGGTTCAGAACCGACCAGATGTT -CTATTGATGGGATTGGCGGAACAGCTTGGCGGTGCAGAGGACACAATAGTAAGTTGCACC -AGACTCCAGGGGTGCAGTTGATTGAAAGAGGGGCGTTGCAAACGACCGACAACCGAACCA -ACATCCGAATTTCCCTATGATTTGGGATCAAAGCTAATGAGAATCTCTTGCTGATAGACC -CGTGCTTTCGCCGCTGTGCTTTTCCGAAGAATCGCAACAAAAACCCGGAAGGATCCCGCC -TCAGGTGACAACAAGGAGGTTTTCTCGTGTCTCCCCAACGAGCAACGAATTGCCATTCGT -GAAAAATTGGTGACTTGCCTGACGACTGAAACTGTCACCGACGTACGCAAGAAGATTGGA -GACGCATTGGCTGAGGTGGCTCGGCAATACACCGATAATGGTATGCGATTCGAACCCGCC -TTTGACGAAATGAGGACTGTTTGTTCTGACGATTTCACTTGTCATTTTTAGGCGAACAGT -GGCCTGAACTCCTCGGTGTTCTCTTTCAAGCGAGTCAGAGCCCTGACTCGGGCCTTCGGG -AAACCGCATATCGCGTCTTCACGACCACCCCCGGGATCATTGAGAGGCAGCACGAAGATG -CTGTTGTGGAGGTTTTTACCAAGGGTTTCAAGGACGATAATATTTCCGTATGTCGACATG -TGATTATGCCCTTCCAGCTCAATTTTGACCATTTTCTAGGTGCGGATTTCCGCTATGGAA -GCTTTCGCTTCGTTGTTCCGGTCGATCTCCAAAAAGTCCCAGCCTAAGTTCTTCGGCCTC -ATGCCAGACCTTCTCAACATCCTTCCTCCTCTGAAGGAAGCCTCTGAAAGCGAGGAGCTG -TCCTCGGCACTGCTCGCCTTGATCGAATTGGCTGAGATCAGCCCGAAGATGTTCAAGGTC -ATGTTCAACAACTTGGTTAAATTCAGCATTAGTGTCATTGGCGACAAGGAGCTTAGCGAC -CAGGTGCGCCAGAACGCACTCGAGTTGATGGCTACCTTTGCCGATTATGCCCCAAATATG -TGTAAGAAGGAGCCGGAGTTCGCACAGGAAATGGTCACACAGTGTCTGAGTCTGATGACT -GATGTTGGTGCGGATGATGACGATGCCGAGGAATGGAACGCATCCGAAGACGTAAGTGTT -TGCGTTTATGTCATTATAATTGACATCCCGCTGACTAGTAACAGCTTGAGCCTGAAGAGA -ACGACCTCAACCACATCGCTGGTGAACAATGCATGGATCGTCTCGCCAACAAACTCGGTG -GAGGGGCCATCCTTCAACCCGCATTCTCTTGGATTCCTCGCATGATGTCGTCTACCAACT -GGCGCGATCGCCATGCTGCTTTGATGGCCATCTCCGCCATCTCCGAGGGTTGTCGCGACT -TGATGGTCGGTGAGCTGGACCAAGTTCTAGCACTTGTTGTTCCTGCCCTGCAAGACGCCC -ACCCCCGAGTCCGCTATGCAGGCTGCAATGCTCTAGGTCAGATGAGTACCGACTTTGCCG -GTACCATGCAGGAGAAGTACCATGAGATTGTCCTCACTAACATTATTCCCGTTCTCGCCT -CTACCGAGCCGCGTGTGCAGTCTCACGCTGCCGCCGCTCTCGTCAACTTCTGCGAGGAGG -CGGAGCGCAGTACCTTGGAGCCATACCTCGGAAACCTGCTCAGCCATCTCCTGGATCTTC -TGCGCAGCCCTAAACGTTATTTGCAGGAGCAGGCACTGTCCACCATCGCTACCATTGCCG -ATTCCGCCGAGGCCGCTTTCGATCAGTATTACACTACTCTGATGCCTCTCTTACTCAATG -TCCTCAAAGAGGAACAAGGTAAGGAGTACCGCCTTCTTCGTGCCAAGGCCATGGAGTGTG -CTACTCTGATCGCACTGGCCGTTGGCAAGGAGAAGATGGGTCAAGATGCTTTGAACCTTG -TGCAGATTCTCGGCAACATTCAGCAAAACATCGTCGATGCCGACGACCCACAGTCACAGT -ACCTTCTTCACTGTTGGGGCCGCATGTGCCGCGTTCTAGGCCAAGATTTCGTGCCTTATC -TCCCCGGTGTTATGCCTCCTCTTCTCACCGTAGCTGCTGCCAAGGCCGATATTCAGCTAC -TGGATGACGAGGATCAAATCGAGCAGGTTGAGCAGGATGATGGCTGGGAGCTTGTGCCGC -TCAAGGGCAAGATCATCGGTATCAAGACCAGTGCTCTTGAAGACAAGAATACCGCTATTG -AATTGATCACGATCTACGCCCAAATTCTGGAGGAAAATTTCGAACCTTACGTCCTGGAAA -CCATGGAGAAGATCGCGGTCCCTGGTCTTGCCTTCTTCTTCCACGACCCCGTTCGAGTTT -CGGCTGCCAAGCTGATCCCTCAGCTGCTGAACTCCTTCAAGAAGGCACACGGTGGCCAGT -CCCCGGGCTTCGCTGAAATGTGGAACAAGGTCGCAGAGAAGATCATCGAGGTTCTGAGCG -CTGAGCCGACAGTTGACACGCTGGCTGAGATGTACCAATGCTTCTACGAGTCGGTAGAGG -TAGTTGGCAGAAACTCCCTCACCCCCCAGCATTTGCAGGCTTTCATTGAATCGGCGAAGT -CGACCCTTGAGGACTACCAGATGCGCGTGAAGCAACGTTTGGAGGAGCAGGCCGAGCTCG -AAGATGGTGACGAGGAGAACCTGGACTTCGAGTACGCCGTGGAGGACGACCAAAACCTAT -TGAGCGATATGAACAAGGCCTTCCACACCATCTTTAAGAACCAGGGCAACTCTTTCCTGC -CAACCTGGCAGCAACTGATTCCATTCTACGACGCTTTCATCACAAGCCAAGACCCGACAC -AACGGCAGTGGGCTCTTTGCATAATGGATGACGTGCTTGAATTCTGTGGCGAAGAGTCGT -GGGCCTTCAAGGACCATATCATGCAGCCGTTGGCCTCTGGTCTGCGAGATGAAAACGCCG -CCAACCGTCAGGCAGCGGCTTACGGAGTGGGTGTGGCCGCGCAGAAGGGCGGTGCCGCCT -GGAGCGACTTTGTGGCAGCCAGCCTCCCCAGCTTGTTCCAGGTTACACAGCATGCCCAAT -CTCGCACAGAGGAGAATGTGTTCGCGACCGAGAACGCATCTGCCAGTATTGCCAAGATCC -TGCACTACAACCCATCTAAGGTTCAGGCTCCCCAGGATGTCGTGACCAACTGGATTGAGA -CTTTACCCATTACCTACGACGAGGAGGCTGCTCCCTATGCCTATTCTTTTATTGCACAGT -TGATTGACCAGTATGTTCTCTTGATCAACCCTTCATACTTATACTAATATTGTACTACTA -GGCAAAACCCGGCTGTCTTTGCCAAGGCTGACCGTGTCTTCGGTTTCATTGTGCAGGCAC -TCGAGGCGGCTACCCTGCAGGGTCAGACCGCTGCCCGGGTGGCCACATCCGCCAAGCAGC -TGGTGGCAGTGACCGGTGCCAATGCAGACCAGATTCTAGCTTCTGTGGACCCAGCCAACC -AGGATAGAGTGCGCAAGTTCTTCCAATAAAGCTCAGACATTTTGTTTTATATCCGGCAGA -GCTTTATCCCCCTTTTTTATTATTTCCCAAATCATTCCCCTCGGTCATGTGAAATCTGAA -GTTACCTGAACCCCCGACACGTTTCTTTAATTCTTAGCCCCTACGTACGAATGCCATGAG -CTTTAGACGTTGGATTCCACCGTGTGCGCGATGTCCATGTAGATGAAATGCCAGCCTTTG -CCATTGATATGGCTGACTATCTAAAAGAAATGAAACCATGCTATCTACAACTTCGATACC -CTTTGAATTTCATTCGCAACCACCTCCTCCAGTCCCATTACCTTGGGCTCCCCAATCAAC -CCGTTGGCCCGCTCCGCGTACAGTGTCCCGCCTTCGCCCCAAATCCGACGTAATGTGAGC -ACAGTGGCCGCCGGGTTCCCAGCTTCCCACGCTGTAGCCTCCGCCTCGCGCAACTTCGCC -AACGATGTGCGAGATACATCCCACGGCTGACCTCCAAGCTGGCGCTCAAATTCCGCCTGG -ATTTGGTCGGGGCTCGTGGTGAATGAATTAACACACAAGGCAGCGTTTTGCGTCTTAGCC -GTAGCATGAAGCAGAGTGTTCAGGACGAGAATACCAACACTAGCGGAATAGTCAGCCAAA -AGTCCATCCTAACGAGAAAATTCCCAACAAGGGAAAGGAAAAGTAATTTTACTCTTTCAT -CGTCGTCAGACTGACATCTCCCTTCCCCTTCTCGCCCAACAAAACAGCCCTCCGTACTTT -GACATCCCACCCACCAGCCGCCTCCATCCCAGGTATGAGATGAAGATACATCTCCGCAAA -GGGACCAGTGACTACATAGGAATAGGCGAGGTCATCGCGTGAGATCTCATTCTCAAGATA -TGCACGCACCATTAGCTTCTGCTGGTGCGGCTTCTCATTCGCCGAAGCCGGCCCGTACTT -GATATCAGTGCCGTACTCGGAGGGCAGGAACCATTTCACAGTAGAGCTGGCAGCTGCGAG -TCGGATGAGGAGAATCTGTTGTGCCAGGGTGTCGCGGCCCAGTGCTGAAATCACAGTGTC -GATTCCTATTTGGCTCAATGGTGAGCTTTCATTTCATGGTTTGGGCGGTCCAAGATTTGA -GGATCATGTCATGCATATATATGGTTTCCTACCTTCGTATGCTGCCTTCACGGCATTCTC -ATCTTCGACATTACCCACAATCACCTCCACGCCTTGGTCTTTTAGTTTGTTGAGATCTGA -GGTCTTGGTTTCGGCTGTGCGCGGGGAAGTAAAGATGGCGATACGGCCGAATTGGGAGCG -CTCAGGCAGGATGGCGTCCAGGATGAATGAGCCGATATTGCCCGTTGCGCCAAATAGGAG -GATATTCTTAGCAAGAGACATGACTATATAGCTCTGTGATATGTGGAGGAATCGATAGTG -TAGCTCTAGAAGACGGTGCAAGCTGATATACAACGTGAAACGTCCACTGGTGGGGTGTGG -ACCAAGTGGTGGGGTGTGCACCAAGTGGTGGGGGTTTTGCTTATCAATCAATTGGGAGAA -GCTCCCAGGATGAATACATGATTTCAATTTTACTTGGAGATTCTAGCGTAAAGGACATGT -ACATTGCATTGGGGTTGCAACGTGCTTGGGTGGTTGGTATATCTGCATGTCTGATGAAGA -CATATCGAGCGTTATAACCACCAATACTAGGAATTAGGAATTGAAAACTTGAATAAAATC -TGGAACATATGCCTTTTGGATGCTTGAGACCCCTATTGTTTTCTTTAGTGGGCCTAACTT -GGCATTGGAACGACGCGAAACTTACATTGAACTGATAAGTGGGTTCTTCGAAAGCTGGGG -AATTGATTGGTTTTCAGATTGATTTCAGAATCAAATATGTGCATTTTATACCTCTGAACG -CTTAAACTACCATCTCCAAGATCTCCAATACCTGAGACTTCTCCTTCAGTGGCACTGGGT -TTGTCTTGCACCAACGGTCATGTAGGCTATTCTCCGCCAGTCCATTCAACTGGTCCCGCC -CTACTTTGACATCGCTCAAGAATCGAGGCATTCCCAATTCGCGGATAATCGCATCCAAGA -TATCGCCTAAGTCCGATGCTTCTATGTTGACCGAGCGGGCATGCAACACCTCCAGCACGA -CCGGATCTTGAATCAGGAATTGGCGCACGCTGGCTTGTTTTTCGCGGTTGGCGTTGTACT -TAGCATTGAACTTACACACAGCTGGCAGCAAGATACAACTGGTTTCACCGTGGCCGACGC -CCAGCGGACCAAGCTATACAACTGTTAGTCGGTCTCATCGACGTGCCTGTGGATGTCTTT -GAATGCACAGAGAGAAAAGTAGTCGCATACCTGATGTCCAATCCCATGACTGGCACCCAA -TTCAATTAACCCGCTAGTACAGGCCGCCATAGCATCCACTGATCCAAGTTGGCACTGTAG -GTGTGCTTCCCGGTCTGATCGGTCCTTCTTACAGCGCAATAGACCAGGGACCAGCAATCC -CAGTGCACGCTGAGCGAGTTTGTCTGATGTCGTGGTCGTACCTGACATGGAACACAGATT -CTCGACACAGTGATCAATTGCGCGCACACCTGTACTAAGCCAGATTGAATCGGGTGTCGT -TTTCGCCAGCTCAGGGTCGAGAACAACCAGCTGTGGTCCACGAATGGGATGTTGGAAGCT -GTATTTGCGCTTTGAGCGGTCTTCCGTGCCGCCAGCAAAGTTAGAGTATTCGCCGGCAGA -GAGAGATGTTGGCACGGAAATGATCGGGATAGTTGGGGCTTTGATCTCTAATCGCTTTTG -GGGGCCTTCTACAAGAGTTTCAAGCTCTTCAGGTGTGCTCACTTGGTTTTCAAGGACCTG -AGGGGGATGTTGAGCCAAGAATATACACACACAGGGATATATATATGTATGATCTTGCGG -GGGAAATTGAGAAGAGGCAAGAATATAGATAGAAATGAAAAATGAACGGTCTGAAATAGA -AAGGTGTACCAACCAATGCAATTATTTTCGCACCATCAGTCAAGCTGCCAGCCCCCACAG -TCAAAAGCAGATCAGCATGCACAGTCCTAACCTCTTCGGCAATCTGCAACACCTCTGACC -ACAAAGTGTGACTCCGCATACCAATCCAATGACCAACCAGCTTATCCGACCCCAAAGTAG -TTTCTAGCTGATCCATCACATCGGTATTTCGGGCTAGTGATCCTGAGCAGATCACATATA -CACGCGAAGCCTGAAATGTGCTATCGAGATGGCTTACTATGGCTGCGGGAAAGGCGATGC -CATAGGAAAGAAGCGGGCGCTCACGGCCTTCGAAGGCAGGTCTGACTGTTTCTGACATTA -TCACTTAGACAAGGTGAAAAGGTAAGATGTGAAGAGATGTGAGGTCAACCTGGACATGTT -TGGGAATGTACAGGAAAGGTGGGGCTGAACCCGATTTATGTGCATAAAATTGTGTTGACG -ATCCATATAGCTAAATGATAGGAAGGTGAGGAACATCATCTTTCTAGTGGCCATCAGCGT -TGGATTCGACTACTCAGGCATTTAAGCTGCGTGTTTACATATTTACATCAGTTGACTTGA -CGTGGTAAGCTGCGCCGCAATTTGGAGATCTGGAATTTGCCGAGGCTGCAACTGTTGAAG -TTAGCGGTGGCTATCACAAAATTGTCAAATAAATATTCACCACCGGACAAATTTAATGGG -CTATCTAGATACTATAGTAGTTTCAAGAAAATCACACACACAATCTTGTTCCATCAAAAT -CATCTGTTGATACATGGCGGTATACTGTAGATCATATCATATCACGTGCATCCCCACACT -CCGTCTCCGTTACCCGCCAATCCTGCCCTTGACATTCCTCATCCGACGAACCAAGCTCAA -AATGGCATCCACTCTCCGTCAGAACCCATGGCGGGCCTACCAAAGCCTCACCCGGCAGCA -ACTTCGCACAACACGCCTCTCCTCCCGGCATCTCTCCACCAATCCCCTCCGCTCCAATGC -TACCAATAACCCCCTCCGCTCCCGAGCATCCGCAGCGGCGGATCTCCAGCACGCCTCCCA -AGCTCAACGCAAAATGATTCTTTCCGCAGCTGGCATTATTACCTGCGCAGTCGGCCTTTA -CGGTGTCATCAAGCTAGACTTATTTGGACTTGACGAACTAGACTCCAAAAAAGAAGACAA -AAGTGCAACCACTTCTGCAACCCCGAGGAATGGCGCAATGCGCATGGACGGCCCAGTCGG -TTTCACCAATGGCGGTCCATCATTAATAACAATTCAAGGCCAAGATAAACTCGAGCAAGT -GCCAACAGGAACAAGCACAATTCCCACCTTCCCAAGCACAATCCGACTCCCCTCATCCGA -AGCAACAGAAGGCAAGCAAACAGGCGACGATCTAGCCCCATCCACGGGTGAAGAATACCA -ACTCCTAGGCCTTGGTATCCGCACCGTTTCTTTCCTCTCAGTTCAAGTCTACGTGGTTGG -CTTGTACATCGCGAAATCCGATATAACCGAGCTGCAACGGCGCCTCCTGCGCACAGCAGT -CCACCCACCCAATACCGACGCCCACTCCGCCATTTCTGGCGCCGGCGCCGACGCTGCTAC -TTCCCTCGTCTCGCCGGAGCGCCAACAGCTCAAGGAGCTTCTCCTTGATGCGGAGCACGG -CGACGCCGTCTGGTCTGCTATTCTCAAGGATAACGGCATTCGCACTGCATTCCGTATCGT -GCCTACCCGTAACACAGACTTTATGCATCTGCGCGACGGCTGGGTGCGTGGTATCACTGC -GCGCGCGCAGGCGAAGAAGGCTGCGGGGCCAAGCGAGTTCCAGGATGAGAGCTTTGGAAG -CTCGATGAATGATTTCAAGGCTGTGTTTGGTGCGGGCAAGGGGAAGGCAGTACCCAAAGG -ACAGACGCTTGTTTTGATGCGTGATGCGCACGGTGCTTTGGATGCGCTTTTCCAGCCTGG -GGCTAACGAGCCGGTCAAGTGGATGGGTCGCGTCGCTGATGAGCGGATCAGTCGGCTTGT -TTGGTTGAATTATCTCGCTGGTAAAACGGTTTCAAGTGAAGGGGCTCGAAAGAGTATTGT -TGACGGATTGATGTCCATTGTTGAGAGGCCCCTTGGGACTGTTGTGCAGAAGGTTATCTG -ATTTTTCTGGGGCCACGGATTCAGTTTCGCTTGCGTGTATTAACATACCTGTACGTTAGT -CGCCGTTCGATTGTTCTACCTTGTGTCAGTTCTACAAAGGATTCCAGAGCTATATGGCAA -ATTTCCACTCAATACGCATCTACCGCTGGAATGTGGTAGGGCTCTCTGGATCCCTCCAAT -CCCAGGCCATAGTAATATTTCCCCGCGTTTTACAAATATCCTATAGGGAAAGGAAAGTTC -GAGAGACTACATGGTACATAAGAGGTAAATGGTTGTACAATAAGAACCCACTTCCTTAGA -TTATATTCAGAACTCGGGCCTCGGGAGATTCCCGCTTGTATGAAATCACATCACCGCTTA -GATTGCACCAGCGGCCTTGAGCAGCTCCTTCAGGTCCTTCTTGGCCTGGAGGTCAGAGTT -TCCGCCGATGTGCTGCTTGTTGATGAAAATGTTGGGCACGGTGCGCTGGTTTGAGATCTC -GGCGAGAGCGCTCTGGATGGCAGCTCCTTCCTCTGTGGTAGAATTAAATTAGCGATTGGG -TTAAAAAAGAGAGATAAACATGTATCAGAACTATAGATGCAAAGACTAATATTCTTGCGG -GGGGAATATAAGAACACAAAGAGCAAAAGAGTAAACTTACCCTCCAAATCCAGCTCGAGA -GTAGTGTACTTCGCATCCAAGCTGTCAAGCAACTTCTTGGAAGAGGTACAGTAAGGGCAG -TATGACTTGGAGAAAACGACGACAGCGTTGTCGTTAATAAGGGACTGGGCACGGGTCTTT -GCGGCGGACATGGCGGCGGGATTTGGACCGGCGAAAGAGCCGAAAAGACGGGTGAAGATG -GACGAAATGGTGGGCGGTTTGTGACGAAAAGAGAAGAGGCAGAGTCACCTTTTATAGGAC -TGAGCGGGGTACAGGCACAAAGATGAGAGGATTGGTTGGCTATTCCGTCAAGATTGCGGG -ATTGGATGTGGCGATGATGTCAGGTGATATTTGTCAGCTAGGGAGCTCCCCGGGGCCCAG -ACTCAGGGCTTGTTTCTTCTGGGTAATGCATGTATATGGCTATGCGTAGTCAGAGAGCTA -ACATAGTATAATTTGAGACACCTCAAACTAGGCATGTGGTTACCGCAGTATCTACACATT -TACAGGTTCAAGTAACAATTGTGTGCAGGTTTGTGGTATGTATTCATCCATTCGATCTGA -GATATGAGTAGCTCCATAAGCAATTTTGGCATGAAAATACGAGCAAATTCAGGTACCTGA -CTGCTCGAGCCAATGAATAGATCGCACTAGATAAAGAGGAGAAATTTGTTGAGTCTTGGG -AATATGGAAAATCAGGCTTCGGAAATGTTTGGAAGAGACAAAATATTGGAAAAAGGAAAC -CACGGGGTCAAGAAAGTCAGAAGCAGTTTTAAGATGAGAAGGGAGAGGCGCACACCTCAA -TTTATGCTTCTTGTGAGGTGCGGATCCCTCGCTCCCATCCCATCCCGTCGCGATCTGGAG -CGATTCTTTTGAGTTCCCTCCTACCACTTCAATAGGAATGGCAATAATCACAGGAGAATA -GAAGGGAAGTATCAAGGAATGGATGCATGgaagaagaagaagaagaagaagaagaagaag -aagaagaaTAGTCATGCTCTGGTGGAGAAGGAAGCCAGATTAAAAATCAGTCTGAAGAAA -ATTGAACTCAAGGCGAAGAAATTCCCTAGATCGCAGGATATAAAGCCACAGAACAGAACC -GACAGAACACCATCCCGCCGGAGACGCTGGAAATAAAAGAGAATAATAACAGAGAAGGGA -ATCAAGCAGATGTGCAAATATCAGAGGTATGTTGGGGAAATCGATGAGAGATTGGATAGA -TGGCTGTGGAGTGGGAGTCCATCACATCACAATGCAGCATCCGCAGCCCTTGTCTTTCTT -TGGGGGCTGCTGCGCAGTGACAACTGGAGGGTTCCAGTTGGGGTTGGGGTTAGGTGTCAG -TCCCATAGGAGGTGTGTCCTGCACCTTTGGCATGGTACTGTAGCCGGTACTCGTATTCGG -CCGGGGTGGTTCAGGGCTCTTTGATGGCGGAGGCGGCAACTCTTCGAGCTGTAGAGACTC -GACAGCTTGCTGTTCTGGCTCGACCTTTTGGGCTACAGGAGGCGGCGCGTCGAATGGGTG -CACTGCGAGGGGGGCGAAGTTGCGGTCATATGTGTTCGGTGTCCGGCCTTCCTTCTCCCA -CTGTTCCTGCAGAATCTTCTTGGCGACAGTCGGGATCATTTGTTGGTCTGGTGGAAGGCG -AGGATCGGGCTTGTACATAGTTGCCAACCACGGCGGGTCTCCTTCAGGCCGATTAGCGGC -TGTTGTAGGACGATTTTCACCTGACCTGGCGGTAGGGCGCTGTGTATTGTTACCAGATAC -AGCACGAGATCGAGGAGTGCTCGCTTTCCTGCTCGTGGGTGGAGCAGAGGTCTTTCGATT -GGTGGAGCCGGCGTTCTTCATTATCTTCTTGCCCGGTGTGTTTTGCGTACTGCTGAATCC -GCGACTGCCCGGTCGGCTTCCTGGAGAAGGAGTCGAAGCATCTTGCTCAATATTGATAGG -TTCACTGGCTCTGCGGCTCCGCGAAGACCGGTCGTCTTCATCTGTTGCAGCACGGCTCCG -TGAACTGCGAGAATCGCGGCCCATGGCGCCTGGTATGGGAACTGGACTAACCGTAGAGAC -AGGGATTCGGGATGCGCTTTTTCCAAGGCTTGGGTGGCTGTAGATGCTGGCGGCTGCATC -GTCTGCAGCGATCTCCTCTGGTCGGCGGAGATCCCAATGTTCCCGCTCATCCTCGTCTAC -ATTGCCATTCACATCCCCCTTGCCAGGTTCTTCCTTCATTTTCGGCCAGGGTTCGGAATA -TTCTGCAGCAGGTGAGGTCGCCGTGCCAGTCCCATTGGCAGGGGTTTGCTGTGAATCATG -GCTTCTCCCACGTCCCCCACTCGATTTGCTTTCTCCTCTCATCCGTCGCTGGAAAAGAAT -GGCTGCTTGGTGGAGCTCCTCACTCTCGATCTTTGCCAGCTTGTCTCGGTGGATCCATTT -TTCCTGGTTCGTTTTCCCATCAAGGTACAATTCAAGACCATCATTGGCCTCGTCGGGCTC -TTTACGACCGGCTGTCTCGTGACCAAACGCATCTCGCATACTGCCTACTTCACTTCCATG -TCGGTTGAGACTGGGTTCCCTTCGTCCTCGAGTTGAGGATAAATAGGGCGATTCATGTGT -GTGGACAGTGCGGGAGACGGAGGAACTGCGTGAGCCGTGCGAGTTACGCTGTCGAGAAAA -TTTTGCGCGGGTGGAATTCGAGTCATGCGAGTTGCGCTGTTTCGATAAGTGGTGGTTCTC -CTGCGGGGAAAAGGGGCTCAGCACTTTTTGTCTTTTTTCCACATTCTTAGGCTACCAGAT -CGGTGATTCAAATACCACACGGAAATAATCTTACCTCGTTCGCAAGTGGCACCGCAAAAT -CCCATTGCGAGCCATCCACGGGCGATACTAAGGCATCTGCAGAATATGCCGCTGTAGCGA -CGCCCGCCATTCCAGAGGTTGTTGTTCACAAACGACGAAAAGAAAAGGCCAGTCCAGCTG -CAACCCTGGTGAATAGTAGAGATTGCCGTAGAGATTGGGTTGTCGGATCAAGCCATTCCA -GGTAGAGTCGGAAAGTGGAGTGTAAGGGGGTACACCGAAAGCAGCAAATTATCAACACAC -CGAGTCAACCACAACTGCAGGGGGTGTTCCACGGGGATTGTTCAATTTGACCACTTTAGT -TACAAGTAAAAGCCCAGGTGCTGGAGGGGCTGGTCCCGTGGAAGGCCCCGAGGTCTTGCG -GGGTCCGGGAGACTTGGAACAAAAAATGCGATTGCCGTGGCAAGGTAAAaatgagagttg -agttcgagaatagagaagagaatagagattagaAGAGAATGTCGAAAGTAAAAAAAAAAA -AGAGGGAGAGTGAAAGGAGAAAGTTGTAGGTAGCCAGACTAGGAATAGTCCTGGAAATAG -CCTGGACCAAGTCCGAGAGAGACTTCTCATTATAGACCTCGGGACTATAAACGGAGTACG -GAGTACATTCTAGGGTATTTGTTAATACATATTCCCATACATGCATCATATGTCCATCTT -ACATATTAATCAATAATCGTTCTAAATCTTCAATAGACTATTGAATAAAATTTATGAGGT -TTCTCGCAACTTAAGTCCTGGGCCGTGCGTTGCCCCTGAACCATCTTGATCAACCTCGTG -AATTGATCTAGACGGCGAAAGTATTTTCTAAGACTGCTCTCCACTTAATGTATACTTACA -TGAGAGAAAAAGAAGAGGGAGAAAAAAAATTGCGTATACGGAGTACCTGTCGGCAAATCA -TTCTGACAGGTTTAATGTTTGGGGAAATCGCAAAAGTTGATATATCACAAGTACGCCGAT -GCCCTCCATGCAAAGAAACATCCCGAGGGACAGATATGTCCCAGCAATTAGAAATTGACC -GCTAGGACAGTTCCCCGGTATGAGATAGATACATGTTGTACATGACTATGTGCGAGGTAC -TTTGTACTCCGTACGTATGTCTGTAACCCAATTTCAAATCCCTCTTGGTATGCACAGCGG -GGGAGCTGAGTAGGCAACTGGTTAGCTCTGCTAGAGCTTTGCAGCTGCATCCAGCCTATT -GAGAGTCGCCTGGGTATCCTTGATGAGAGCCTTTCGTGCACTGCGGGCCACCTCATCGCC -ATCTGGCTCGATTCCATCTGCTTTGATCTGGATTTGTGTAAGAATCGTTTCACCCAGCTT -TTTATGTTCATAGTCCCGCTTCTTTGGATCACTAGGCGGGTTCACAATGTATTCATCACA -GATCGGCTTCATCTCCTGCTGGAACCATTTGGTCAAAGCAGTGACCTGTTCCAGCGGCGT -GCGCATCAACTTAATGTTGGGCGGGGGTGCGGGGACACCTGAGTGGCCTGACGACGTAGG -CCGTGGCGGCCGAGACGGACCGGGAGATTCTGGGCGGACGGGCGACTTCTTGCCATTGCG -TTTCTTGTTTCTCGACCTCTTGCTTTTTCCACTTGGGGGTGTGGGAGGGGACGACGCTTC -TCCTGCACTAGTAGGGCGAGGCAGCTGCGGCGGTGTTGGCGCATGTACAGTTACAGGGAT -GGTTTGTCCTTCGTCGGAGCCATCACTAGGAGTACCCGCCCCTGCTTCAGACACCACGCA -TAACACCTCGGAGTGCTGCTTGATCCCCTCAGCTTTGCATGTCCGCGCGTCGTTCTTGAG -TAGCTTGCCCTTGTATAGGAGTCTCACACAATCTGGGTGGACGGCTCCTGTACTCTCAGC -GGCGGCCTCTCGTAGGTCTCCGATGGTCAAGACGCCGTCGTCGATCGCGTACGCACGGAA -GTGCAATGGGTAGATGGTGCCGCGGTGGCGGAGGGCAATAATATCCGGTTCGGCGTCGTC -TACTGGCGTGTCTGCGACGTCTGCGGGTCTGATGTAGCTGAAGTCCTCATCGCTGACCTG -GGGATTGGATGCGAGAGCGTAGCTTGGAGAGCGCCGTAGCATTTTTAAGGGGTTGCGCCA -ACTCATCACAATAACGGTGAACAGGCAGACACCAACAGCTGCAAAGAGGGCGAGGGGTTG -GTCGTCGGGGGAAACTGGTAGCTTTTTCGCGAGAGGGCCGATGGGCGTGTTGGACAGATA -AGTGAGAATAGGATTCAGTTGACGGCGGAGGGAATGTAAGTGAACGGGGAGGATTTCGGT -GATCTGGTCTATATAGACTGCGCACTTCTCGCCCAAGGTGGACTGAGCGAGGGTCGGGGG -CAGTAAAGTCATGGATCTCTGTGGAGGGGAAGCAAGCACAGGTGGTGGTGAAGTCATGCC -CCCAAATAGCCCGGAATAGGCAGATAAGGCGGAGAGTGCTTTTTTTGCCGTCACATTGAG -CAATTGATGTACTCCCAACGAGGGCGTGTCTGCCTAGTGGACCTTCTGATTTCAAGTCGG -TTATTTTGGTGTTGGGATTTTGGCGACATTTGGAATGGTTGGGATAGGTCGACGGATGCC -TTGTGTACCTTGGGGCTGCCTTGAAGTGTGGCGATACTTGAAGCCTGAGGCAGATAGGCT -ACAGCGCTGTAGGTCACACCATTGTTCTATCTAGGGTGACACATCTGCGAAGATTCAGAG -TATTCAAGTATTTTTTTATATCTCAGCAAGACAGCTCCTCATTCGTTGTAATATGGAGGC -TCTTTCGCGACGTTGAGTAACCCAATTTGACCTCGATCGGCGTGTATGTGCAAAAACGCG -GGGTCTTACAGCTCCCCACCTTTTGTAGTACGTGTCAAGATATGGACTCCATGAGAAATT -AACATATTGGCTCGTCAAATATCTTACTATGAAGAAATTTGAAAGTAGACAGTCAAATAT -CAAATCGTGTCAATGGCTTGGAGAAGATAAAGTCTTGGGTCATATAGTTATATTGACTAA -GCTTTTACATCACAAGTCCAAGTGGCTCAGTGGTTGCCTTCGTGGTCTAGTTGGTTATGA -CGTCGGTCTCATATCCCTTCAAACCTGTTAAGGTTTCATATGACATTCTTGGTAACAAGA -TTGACATCCGAAGGTCTCCAGTTCGAACCTGGACGAGGGCATTTCTTTTTTTTTAAATCA -ATCTGGCACCATTTTCAACATGTGTCCATTTCCCTGTATGGAGATAACTTCTGCTACAAC -CCTTCGCGTGTGCCTTAGTATTCCGCTGATCGCTATTGACCGTCCGAGGTCCATAGTACT -GAAGTCGCCAACTTTTTATTGCGCTCCACTATACTCCGTACCTAGGTTATATTTATATAT -TTACCATCTTAAACTTACTTGGTCCTTCTATACACGCTATCTAATTTTTCTATAGAGCTA -CCGATCATGtacctacatacatacatgcatcatacaAATCCCAAGTAAATCTAGCAAATA -CATAAACAAGCCCTAAGCAGGAGCTTCCGTATTCAGCATATAAAGGAACATCAAAATGAC -TATTCACATGTTTTCGGTGTCTTTATAACTACGAAGAGGCCTAGGCGAGTCGGACACATT -TAAGTGTATCTATACATTCGAACGGTGCCATAGTAAGCCTGGATGATAAGCGTCTCACGA -AGAACGGCAGTGGAACTCAGAATGATGCCAGGTAATATAAACTAATAAAAGAGGTTGTAC -ATATAACCGCGCTATGTTGGGAGTTGTAGAGATACGCATACGTTAGCTTGTTATTCTATA -TCTCGGGCCTTGGTAGTGAGGCCTTGCCAGGGCTAATACTTCCAGGGTGTTGGATAACAG -TACGCGGTGGACTTTGGACTGCCCAGGGTTCAATGTACCTGCGTACGTATGTATCTGCAC -GGCTGGATTGATCCAAACGTCAGCCCTGCTCACTCAGTATAGCCGATCGCGGTGCCGTGT -GTAACCATAGCGGTACATATTCCGGCCAGATTAGAATAACCACGTGTAGCCTGTTCGGTC -CGCGTTACTGTGCATGGTTTAGGTTTAACTTTCTTTCTGAGGTTTGCGCGGATAGACCTA -ACCTTGCTGAAACATCCATATCCATACGATCTATATAAGTTTACTCTATTAGCAAGCTCT -ACCTTACTTTGAGGCAATTGAGAGGACATCAAGTCGGTTTGTTATATCGCTCCATGCCGT -CTCCTTACTGGATTGTTGGTCACTTGCTGTGTATCTTGTGGACCATGCCCATCTAAGTGT -TTCGATAGGTGTATCATGATATGTTAGACTTCCTTTCTGTCAATGTTGTGTGGATATAGT -CTAGGctctctctctctctctctctctcCTGATCCACACATTTGTATATATCTATTTATC -TCTCTCCCCCACCGCACAGGTATCGAACGAATCCCACCCAGGCATATAAACAGTATATTT -AATGTGTTAAGATATTATATAATAGAAGAGGGGAGTGTTACCTTCTTAATTATATTCCAG -ATTACTTACTCTTTATCCGTTGAGATCATCTATGGGCCGAAGTCATCTAGCCGACAGATA -GTTACGGGGGAGAGGAAAGAAATGAAAAATATGTGCAAAATCCCCCTTCTTGATCCTACA -TGCAATGAATGGATGGGTCCATGTAGCAAAGGATACATCCCCGTTGTAGATACTGATATC -AACCAACCCAGTCCCCTTATACATGATGGCCTACGAGACCTAATTTACCCATATGGGTGA -TCCATACTCGGAGGGCTATACCGGGTTATATACATAGCCTACACTCGGCACTTTGTGGTG -ATAACATGTACCTATGGTATTCCTTAATTCACACCACTTTCCAGTGGACTGCCTGGGCTC -CATGTCACATATAACAAAGGATATTTAGTTGCACCATATTGATCGGTGGTTGGTGGAACC -TTGGTGAATACAACCAATCAATTGTATGTGTATTGTGGGCGAGAATCGTCGGTCCTTCAT -GGGTGACCGTCAACCACACAATGCAGTGATATGTTTCTTCCCGAGTCTGCATGATACCAA -CTTAAGTGTTGGATTAGAACATCATCACCAAGTGGACCTAGCTTTTACATTTTAAACGAA -TGTCTACCACCGAGGATGGCGACCACAGGCAAGAGATACAAGAGTGGTAAATGTAAGACC -TGACTCTGAGAGAGACCTGTGTCTTGCTGCGCTCCTTGTGATCTCAACTACCATGTCTCG -TTGTAACTGCCTTTCTCAATTAAAGCAATGGTTCTGCTGTGTGCGATTGGTCATACCTAT -AAGCATGTATATATTAATTGGTTCTCGTTGGGCATGTATCGGTTGGAGTATTGGGACCAG -ATTTATGGGGACATTATGGGAAATGCGTCCCATCTGCAGGTAGAGCTAGCATAGAGAGCC -ACTCTGTAAGCTACTAGGGTCCCATGGGGTGTTCTACCGGTTACTATCGTCCAAAACATA -TGACTTGCTAGCTTGAGGTGGTCGTCTTGGCGGTATACATGCGCCATGGGAGTAGGAGCT -TTGTGGGATAGTGTATCACCGGAACACATCCTTTTCTCGTCAGAGCTGGCATGTCTACCT -CTACTCGTCATAGGGAACCAAACACTGAAATATTACAACATGTGATCTTGCCAACACCAC -ACAATATATAATATCGGATGTCGGGACACACCACTCAAACAGTCAATAAATCGTTCAACC -ATAACAGACCTCATCGAAAAGTGTGCCTCATGTAAAAGATTCGACAGGCGAATAAGTATC -TAATAGAAACCCGCTAAAATTGAGGACAGTCTCGATCATTAGAGGCTGGTTTGCAAATTT -GGTCCATAGGACCAATAATTCAAAGGCAACCTCATAATCGCTCTGAGTGGTCTACCTGAT -AACTCGGTGGTTGCCTTCGTGGTCTAGTTGGTCATGACGTCGGTCTCATATCCCTTCAAA -CCTGCTAAGGTTTCATATGACATTCTTGGTAACAAGATTGACATCCGAAGGTCTCCAGTT -CGAACCTGGACGAGGGCATTTCTTTTTTTGCCGTTCCTACCCACAGCGTAACAAGCCCAC -TGCACATGTAATTACGTTAGGGCTAATCACTACCAATGATTTTGTACGTCGTACATCCAA -AAAGTATGCAAGAATCATACAAGATATATCAAATGCTGATTGGCTTTCTTTGTGAGGTAG -ACTACTCTATATTTTAAAACTGGCTAGCATCACAGTGTTGTATCGATACTACATAGGTAC -AAGCGAAAAGATAGGGATGGAAACCTACCTAGGTAAGATCTGACAACCTAGGTAGGTAAG -GAGAAATACGAGGGATCTTGGTGGCCGAAGGCCACCAGATAGTTGTTGCGGAGCGAAGTG -ACTTGTCTTAACACTATGTATATGTATATAGTCCTACAATTATTATTGTTAGCGTGTGAA -TATATTTTATAGGGTATATATAGGTTACATACTAAGCTTAGTATAATATCCTAGTCTGTA -TTGACCTTTAATGGCCTATAGCCGGCTTTTTTGGTTTGTGGCTAACCTTTCAGATCAAAA -TTGAAATGGCTTGCAGCCGCTGGGCGGCTACGCTGCGCATTTGGTCAATATACACAAAAT -TTTACTGATACTTCATCACAAGCCTCAGTGGCTCAGTGGTTGCCTTCGTGGTCTAGTTGG -TTATGACGTCGGTCTCATATCCCTTCAAACCTGTTAAGGTTTCATATGACATTCTTGGTA -ACAAGATTGACATCCGAAGGTCTCCAGTTCGAACCTGGACGAGGGCATTTCTTTTTCGCC -AACTTTTTTAGTGGTTTATACCTTTTCCTTTTTCATTTCTCATTTTTCTTATTACGCAGA -AGTGGGCGTTGACTTGATCTTAATTTTGTGCCTGGGGTGAGGGTAGAGAGGGAAAGGTGG -AGTGCTTCGAAGATTTCTTGTAGAACTTCAATTGGAACTGGTGTATTTAAACAATCGATA -CAAAGGAAGTGAGTTATGTACAAGCAAAAACACGTATATTACACTGTCCAGCAGCTAAAG -AGACCCGAAACACCGCAATGTATGGAGGTGTTCAACCTAGGCGCAGACGACCAGGCCCTG -CCCGATCTCCAAATAGCGCCGAGTTTTCTTCATTGGCATTGTTATCGCCCGACATCCAGC -GTGGAAGTCGGAATGGCTCACCCTGAAACCACTGAGGAGTTTTGGTAGTATCAAATGTGT -TTTTGACGCGATGCGCAAAGGACTGGAAGAAGTTCTCGTCTTCCTCATCCTCGCCTTCTA -CAATAGTAGGCTCAGAAACTGCCCTGTGAACAGGCTGAGAGCTCTCTGCAACCGGGATCC -GCAGTCGGGACGAGCCAGGAGAACCTTCATCTTCACGATCCGACGCCCAAGTCGAGCGAC -TACCACGGGATGACACTGCTAGAGAACCGAAGCCGTTCAATTCCCGTTCAATGAGATCGC -GACCTTCACGCCAGCCACCCATCCCATACCCTGCAGGGTTGTTTTCAACATCGTGGACAT -CATCATCCTCCGCAAGGCCGCACCCGAAGATGTTCTTGATACGCCCATACAGGTTGAACA -ATGTTGCACAGATAGGCAGAAGAATGAGAATCGGGAAGATAAAGTCAAACCCTTTCCCGA -GAGGCGTCAGATTGATGAGTTTACCAAGAAATTCGTAGAAAGTGGTACTTCGCCGAAACT -CTTTGGGTAAGAAGGTCAAAAAGTTGTAGGCAATTGGTACCGTCAGCCGAGCCACAAGAC -CAGCGTACCAACAGGCACTCTCGCCATAGGTGTTACGACGGACAAGTGCACGGTTTCCCC -ACACCTTGACATCACTGACACCAACAAGAGCCGCCGAACACATGTACAATAGCCAGGCAG -AGGCTGTGAGCTGACCACCGAAGCCAACTGGGTTGTTTTTCCAGTTGGGGACAACGGTCA -GACTCACAGCGGAGAGATGGGGCGCCAAAGACTTGATTAATTCTGACCAAACAATGCAAG -CTGACGCCACTGCGAAGAATGCGCCAAATGCCAACCTGACCGAGGGAATGACATTGGAAT -ACAGCTGATACCGCATATACGGGTTGAGGAACCTCACACGAGGGATCCATGATGACCTAC -GTGGGGCCGGCGCGAATTCGAGCTTCTTCGAAGCTGCAGAATTTATGATGGCTTGGAGAT -CGGCTGATAACTGGACTAGACGATCCCATTCATCCACAAATCGGGCCTTCATGTGCCGAG -CGCGCTGAAGCCGCCGTGTAAGATCAGCCAGATAGCGTTCTGTGATCACAGAAGGTATTG -TGGCAGAGGTATCCGGGGACTCGAGGATTGGGGCCCGCACGTCAGGTTGGCCGTGCCCTT -CCCGGAGTTCTTCAATCCATTCCTGAAAGTCACGAGCAGTTCCTGTCTTGCGGCGCTGTA -GTTGAGAGACCTGGCCATTGAGATTCGCCAACTCATTGACAGAGTCCATCAAGCGATCGT -GCACCTTGGGTGCATGTGCCTGGAGTCTGCGTAGCCGACCACTCACATTAGAGTTTCGAA -GCAGGTTTCGCGGGATCGAAACGAGACCGTGACCCATGAGATAGATGGCAAGGACGAGAC -CCCACACATATGCCAATGCCATGACGAGACCTTTGATGGATCTGAATTCGAAACCGTTTG -AGATCGAAACGTAGATCAACCCCACCAGAGCGCATCCCAAGACAATTAACTGGTATTTGG -CGTTCGATCGAAGAGAGTACATAAGTCGGCCTTTTGGCTCACGGTAGCCCGAGTCGACGT -ATTCACCGAGTAATGGAAGGATAACCCTGCAAGAGATAAGAATGAGTGAGAGCTCTTGGG -CAAGTCAAAGAACGCAGGACACTTACCAGGTGAGAACGAAGATTAGCCAGTAGGCAATGC -GCCAGCACACCAGAACCATTCGCTCCGGTAGCCAGATTGCTTTGGGGCCAGATCCATCTC -GCGAACTTGATGCCAAATCGATGGGGACAAGGAGGACAACACTGGCAGGGAGAGCCAAAG -CTAGGAATATGGGGACACTCAAATACGCGGGAGTGGCGCGGAGGGTGAGAAACCGTCGCA -GGAGAAGTAACACAAGTGCGGAGATGAGGAACAACGCAAATGCGAAAAAGACGTTCGAGC -CCACCGAGATGGGCGACACAGATACGGTGAACATGCCGAAGAAATAGCGAAAGCCAAGGA -AGAGAAAGAGAGAAGAAGATTCAGCCGCTGTTCATGGCGAGATATCTAGAGCGCAAATGA -AAGAAGCGACCGGACGTTGATAGTTGAAAGGTAGAAAGAAGTGTATATATAGAACAGGCC -TGGTGGTTTGACGTTTTGGGACCCCAGGCCAGGAGAGTGGCTGACTTAAGCTCTGACTAA -CTGTGTCAATTGGAGTACTGTACGGAGTAAGTACTGGGGACTTGACTTTGGTCGTGTGCT -CAGTTGTTTAGTGGAGTACCTGGTACCCGTTACAGCCTGGTAATTTGACCAATTGGTTCT -TCCGGATCAGGTCAGGGCCAGAACGCCTTGTCGATCTTGATAGGAGCTGAAtcctttcct -ttcctttattttttgcttttgctttTGGGAGAAAGATTTTTTATACGAGCAAAAAAAAAA -ACGATACAATTCTCGTACAAATGAATGATACGTAGTGTACAAAGTACATACTTCGATATG -TATATCAACAACAGTATGGGTACATGAATATCAGGCATTATTTAAGATCCACACCATGAC -GACGTGTTTTGAGCTTCTTAAATATAAAATATTGACGTTGAGGGATCGTCAGGATTTCTA -GTGTATTTTTAGTACATGTAAAAAAACAAGACAGTACCACCTGATTACCTTACCAAGTAG -TGGAAACCCACGCAAAACTGTAAGTACCTCGATACTTAGCCTTGTGACCTTCCCCCCAAA -TAACATCACAATACCGAATATATACCTATTGATACGCGCCTGCTATTTTATTCTTAATCA -AACACCCTCAATGGGATGAATAAAGCCTTTTGATTTCATTTTTGTCACTTCTCATTCATT -TATTCAATACATAATAAATACTCACTCGCCACTGCTTTCTTTGCTTCAATTACGAGATCT -GTGCACTCTTTTTGTCTTTATTCTGTCACTCGGTAAGTACCTTGGTGTTCTTTTCTCTCC -TACCGCTTGTAATTGTGCAGAGAGGACAGCAACTGAGGCCCGGTACAACTTGAGAGCTGC -ATGATCCCTGTCACGGATCCCATCCACCTTACAATTGCTCTCATTCGCTCCAAGCCACAA -TCAGGTGCTGGCCAGCGAATTGAATCCATCTGACGCTGGTGCCCGAACGCCTTGCTTTCC -TCGTTGTGTTGGTCGGTGTTGGTTCCCTTCAGTCCCTCCTGTCCTCCGTTTCGTTCATTT -CCCACCCATCCCCACCCTCCCTGATCGACTCGAATCACGGTTCCACACCAGCATCTGCTT -ACCTTTTTTTTTTCCCTTCCGCTGATCACCCGTGTCGAACTTCCTTGCGTCTTCATCTCT -GACCTTGCGACATCTCACAGTTACACGTCCAGCGAATGCTAATATTCAAGAAGCCTCTTT -GTGGATTTCCTATTAATCCCCCATTGATGGGGTGTCCGCTATGAATCCCGGAGAGTTCCC -CAACCCGGGCGCGGGGCCTGGCCGGCCAGCCCCGAACACTGCGATGAGGATGAACGCAAA -CATGCAGGTCCCAAAGAATGACAGTGTGCAAGCTATGATGACCTATGTTGCGCAAATGTT -GCAAAACCAGGGACCTTATGGAGGTTGGAAGTCCGAAGTTCTAATCAAGACTCGAGCGAC -GAATGTCTATCAAATGTAAGTTGTATTTTCTGCTTGCGAAACTTGGTTTGTGCTTTTACT -CCTATCTAATCTAACCTCGTGAGTTAGGATCACCTCCCTCCGTTTGATCCAACCTCGCAT -CGACCTCCATCAAGCGGCCCAGGCGGCCATGAGCTTCGAACTAAAAGCATTCACCAAAGC -CAACGAGAAGAGCGAATATGAGAAAGAATGTACCGAGAAACTTCTTCATATTAGAAACAC -CCGAGAAAGACAGGCTGCAGTCGCTTATCAAAGTGGAATGATGCCCCAGACGGGTGGACA -AAATCAGATCCCGGGCACTTTTCCACAGCACATCAACCAGAGCATGCAGGCGTCCCCGGT -TTCTGGCCAACAGCAGATGGCAATGGGAATGAATGGgcagaaccagcaagcagcaatcca -gcagcaacagcgacagcaacagcagtcgcaggcgatgctccaacagcagcagcagcaaca -gcagcaacaacaacagcggccccaacagcGCCCCGGTAATGGTATTCCGATGGTCGACGA -CCTCAGCACACTTTCACCCCAGGAACTTGACCACGTCTCTCGACTAGCAAACGAGATGTT -GAACAAAACAACCCCGGAAGATATGGAGAAGATCAAATTGAATCTGTCAAACATGACACC -CGAGCAGCGGCAATACCTGGCACGGAAAAAGCTTGAGCCTATGACCTATTTTTTCCGCTC -TCAAGCCCTCAACCAGATCAGACGCCATCGTCGCGCTCACCTGGACATGGCGGGACGTGC -TCCAAATGCCGGGGTGGATGCGAATGGTAACATGATGGGCGACCCCATGATGAATTCTCA -GCACCAGCGGCAGATGCTTCAAAATATGCTAAACCTTCAGCGCAATTCCGCATTTCCCGG -AAATCCCGGTCAGACGATGGAACCTCCAAACTTCATTGGCAATGTGGAAAACATCCAAGG -CCAGCAAGCAGATGGCCTGCGCTCACAGGAGGCTGGCCAGCTAGTCGTTCCTGCAAGCTC -TTCCCAAATGAACCAAGCCCCATTTCCGAATAATAACAATAATATGTTCCCACAGCAGAT -GGGTCAGAATGGCCAGGCGAATTTGAACGCCAATAACCCAAACGCCCAGGCCCAGTTCCT -TGCCCAGCAGCATCTGCAGGGTGGGTCTAACGCTCCCCAGGATCGAATGCAGTTCCAGGC -ACAACAGTCGCAGGCTCAGGCGCGTGCACAAGCGGCCCAGAAGGCCCAAATGGCGATGTC -GGGCCATGGTGGCCAAGTGACCCCCCAATCACAACCGCAACTGAACGGACAGAGCCCCGT -TATGCCAATGCTGAATCAACCCATGGCTCCAGGTCAGATGTCTCCGGTCCAGGTGCCCGC -TCAAGCCCGACCACCGTCTCGACCTGCGAACATGGGCCAGCATCCGGCCGGCGTTGCAGG -ACAGGCTGGGATGCAAGGTCCGCCCCAAATTCCTTCCAATATCCCGCCCCACATCCAGGA -ACAACTCGCTCGCATGCCTCCTGAACAGGCTCGGGCCTTTATCATGCAGCAACGTCGTGC -CGCTCTCAACAACATGGCTCGAGCCAACCCCGGTCAGCAGCCACAACCCCAACCAGGCCA -GGCCCAATCAATGATGAATAACCAGATGGGAAACGCCATGATGAGAGGCGCAATGAATGC -CCCGCAGGATTTGAACTCCGGCGGAATACCACCAAACCAGCAAATGACTATGCAGCAGCG -CCAACAACGCCAGAATGAGGTTTATAAGCTCCAATTGCTGCGGCAGCAAAACAACGGTGT -GGAGATGACTCCTGAGCAGGCCAAGCAAATGGACCGCGTGTCTTTCCCACCGTCTATTCT -GAACATGAATGGCACCTCGATGCAAGTTCCTAACAACGTCAAATCATGGGGACAACTCAA -ACAATGGGCAAATTCGAACCCTCAAGTTGCAAGCCCCAACGACCTCCCGCGCCTCATGAT -GCTTCAGAAGCTTCATCTTGGTCAACTGATCTCTGCTTCCACGAATCAAGTCAGAGACCA -AAATGGCCAGGGTCCAGCGGCAACTCCATTCCAAAACACCCAGGTTCCGTTTACAAATGC -GCCAGGCTTCCCTCCTGGTCAACAGCCAAATGCTATCAACATGGCCGCGATGCGACCAAT -CTCCGCCCAGGACATCCAAATGGCTCGTCAAAAGCTTGGACCCCAGGCGTCAAGCTTGAC -CGACGAGCAGATTCAAAAGCTTCTGTACCAAAACCGTCAAAAGCAGATGATGCAGGCTGC -CCAGAACCGAGCAATGCAACTTGAAGGAAACACACAGCCTGGCCAGCTAAGTCAACCGGC -GGTGCAACCGCCGGTTCCTGCAGCACAAGCTGTCCCGCAAATCAAACAACAGCATCCTCA -ACTCCCGCAATCGACCTCCCACGCGGCGAACGTGAAGACCCAAACTGGAGCCACGGCTAA -AGGTGCCAAGGGTGCTGCCGGGAAGCAGCCCTCGAAGAAGAGACCTAGCACTGATGATAC -GATAGATGCCCGAGTCACAGCTACACCCCAGATGAGCCAGCCTGTGGCTGTTCCAGCTCC -CCCAGGGACGGCCCCCCAACGGCCAGGTCTCCCTTTCACGCAGGAACAGCTTGCACAAAT -GAATCCTCAACAGCGAGCTCAGGTTGAGGCACACATGCGAAGGCAACAGCCTCAAAGTCG -CGGCCAGGTACTCAGCAGAGCTGCAGCCGACGAGGCATGGAACAGGAATCTACCCCCACA -GGTTATGGAGGTCTATAACGACATCGCCAAGAACGCCCCACCCGCAAAGCCCATGCCTGT -CTCGCCGGAGCAAAAGGCTACCATGACTAAACAACTGCGTGAAGCCTTGGATGTTCTGGG -CCGCTTGGACGCCCTTGTACAATGGTTTGCAAAGATGCAAGGCCAGGAGAAGAATGTGAA -AAATCTCCTCGCCATGGTAAGCTCTCCTTCAATAATACCTAAGCGTCTCTTTGCTAATCC -GATCTTACTTAGCGTATCCAATTGATGAGACAATTCAAGCCCTCGCAGGATTGGGTTGTA -AATGAGCATTTCACTGTTACTCCCGATTATCTGAATGGTGCCATCCTCTTTATGCGGAAG -TTGTTCGCAGTAATGATTTCTCGCATGCAACAAGGCCAGCGTCCTAACGCCCCCCAACCC -TCTGGCTCCAACACCCAGGCCATGCAAGGCAACATGCCAGCACTGAATGCGACCAACTTG -CAGCAGTTGCAGGCACAGGAAGAGGCCTTACAACGAGCCCGGCGCGCCTCTAGCCAGTCT -GTTGCAAATGCCCCAGCGGCACCTTTCGCGGCACCATCTCCCAGGGGCGTCCCTCAGTAT -GCCCCAGGCGGACTTGCCCCCGAGAATCTCAAGTTGCCTCCTCCAAAAAAGCGAAAGCAG -TCTCATGGTGTGGCATCATCCCCGGTTCAAGCGACTGCAGCTCCTGGCGTAGCAGCAAAA -TACAACAAAGCCGTGGCGGACGCCACTTCCAATGCGGCAGCCATGGCGGGTGCTTTCAAA -TGCCGCGTGGTTGATTGCCAACACCACTACCAAGGATTCCCTACGCAAGCCGCCTTGGAC -AAGCACATTGAAGAAAGCCATCAACCCGAGGAGGAAGAGACGATCGAGGATTATCTCAAA -TATTACCATGAAAGTATCTCAATGGGTCTTGGACTGAACCCCAATGAGAGTCTGGAAGAC -CAACAGATGGCCACACTTGGATCAGTGCCACCCTCCACCAAGCTTAGTGCCGCAGCTTCT -CCCGTGAAACAAAATATCGCTACCCCTCTCATTGCCAACATCACGCCCATGGCCAGGGTC -ACTAGCCAATTCGGAGTTAAGACCGCGTCTCCTGCTGCATCAACCCAACTGCTCACACCG -CAGTTGTCGTCTTCCAAGGGAACGAAGCCTACCGGTAAGGATGGAAAGAAGGATGTTATC -AAAATGGAGGAGAGCGACACAAAAGATCCCTGGGCGGAGTGTCCTACCTCGCTTGACACA -ATCCATGATACTTTTTCAAACCTTGCGAGCAAGGATCTGCCTCACCTGGGCTATGATTCG -CTCGAGGATTTCGATATCAATGAAGCCACTCCGGTGGATGATGACTGGGCTGCTCTTGCT -AGTCTGACTCCTCCGGACGAGGCCGAGGAGGCTGCTTTCTTGGAGAAGTTTTATGAACCT -TGGGACGACGAGTCCATTGCATTGGCTGCGGCAAGGATCCGCATCCCGCCTGAGATTCAG -GTCAAGGGCAAAGGACCTATGGGACAGCTTGAGGTTGACTGGGATGCTGTTGCGCGCTAC -GAAAGGGAAGGTCTCCCTATTCGTATGACTTGAAAGTGAAACTCCTGGTTTGCATTGAGA -GCATGTACACATTGCTAGTCCTACATGAAGAGGGACTGAGATTATGAAACATTTTATTTC -GGCGTCTTCGTTTCCAGCGTATATATGGCTCAGGAGCGATGCGTTTGTACACTACGCGGC -ACTCCTGGTTAATTTCTCGTGATTTGAGATCAACCTTTTGGGGGGATTATTTTTGAACTT -GTGTCTATTGGCCTCAGCGATGGCGTTTGGGGAGAGAAAAATCTAGGAGTAGGTAACTGG -ATTATTTATTCATATATAATATGCATACTTGGAAAGCTTGGAGTTTACACATATCATTTG -AGGGTAGATGTATTAAGAACGTAGACTGGAACATTGTAGGGTACCTTGTGAGGTGCCCCA -GGCCCGAGGTACCGTGACATCTGTTATCTTTCGATAAGCTCTCCGCTTTGTCTCCTCACT -TGACCTTGGATCACTTCACCTTTAGATCTGAACACTTGGGATTTTGATATCAGGACAAGA -TGGCAAACAATGCCCTCATTCCATTCCTGGTCACGATGATGCTCGTGACTGGGGTGTGCA -ATACTATTCTCAACAAGTATCAGGTGCGTCCTCGAGTCTCGAGTCTGAACTACAAACATG -AACTAATCACCCCATTCTAATTAGGATATGCAATGCGTGCGAAACTGCGATTCCCAGGAT -CCTGGTCAGCGGAAGCTGTTTGAGCAACCAGTCATTCAAACGTCAGTTTCATCTGTTGAT -GATAGAGAGCTTTGCAACACTCGTGATTAATGCTGACAACAGATGGTAACACAGAGCCGT -AATGTTTATGGGTGAGATGGGCTGCTGGCTAGTTATCGGCTTAACCTTCCTCTACCGACG -ATACATCGCACCCCGCCTATCCAGTGACCCCTCACCCCTCCTAACTGGCGGATATCACCC -GATAAACGGCGACGACGAGGGCATCGATGAAGATGATCACACAGTTGATGAACTCGACGG -TGACAGCAGACACCCTAAGCCTTTTCCTGAAGATAATAGCCGCATTCCACTTCGGGGATG -GAAGATCCTACTTCTTGCAGCGCCGTCGAGCTGCGATATCGCTGGCACAACCCTCATGAA -CGTGGGTCTTCTCTTTGTGGCAGCTAGTATCTACCAGATGACCCGAGGTGCCCTTGTACT -TTTCGTAGGCCTTTTCAGTGTCCTCTTCCTCCGCCGCAAGCTTCAGTTATACCAGTGGAG -CGCACTCTTCATTGTCGTCCTAGGTGTGGCTATTGTGGGTCTTTCTGGAGCTCTATTCAG -TGGTGAGTCGGGCCATGATATCAATCAAAGTTGGAGTGCTACCGATGCGGCCTCGCATGC -TCTTGTGCAGGCGCGTGATGTCGCTCGGACCCCCGAGGCTGTCCAGGCTATCATTGGTGT -GCTGCTTATTGCTGCAGCGCAGATCTTCACAGCCACACAGTTTGTGCTGGAGGAGTGGAT -TCTGGAGAATTATGCCATGGATCCAATCCAGGTCGTGGGCTGGGAAGGCGTATTTGGCTT -CTCGGTTACTACCGTCGGCATCGTGATCATGTATCTTATTGTTGGACGGACCGATGCCGG -CCGATATGGCTACTTTGATATCAAGGAGGGATTGCACGAGGTTTTCGATAACCGTGCTGT -TGCCATATCCAGCTTGTTTATCATGATCAGTATTGGGTATGTTTTACACCTTGCTTCTCA -ACTCTCTGTTTGCCCTCCTCCCCTCTAACGTTTGTTCCAGTGGCTTCAACTTCTTCGGTC -TGTCTGTCACCCGCACTGTATCCGCCACCTCTCGTAGCACAATCGACACTTGCCGCACGC -TCTTCATCTGGCTTGTTTCGCTTGGACTCGGCTGGGAGACCTTCAAATGGCTCCAAGTTG -CCGGCTTCGCGCTTCTTGTTTATGGAACTTTCTTGTTTAATGACATCATCCGCCCTCCGC -TCAAGGCTTGTTTGCCCCGTGACAATAGAGAGGGAGAGGTGCTACTCCCAGAAGGCCCCA -TTGAGCATATTTGATTAGCTTGGCTCCTTGTTTCGAACTTTTTCTTCATGTCATATGTCA -TTCTGGTGTCTTGGTATACTACAAGCCTTAGGTTGGGCTCATGCTATGGGTTTTGGATAA -ATTATTCGACCTTGCATCCTTATGATTCCTGCGATGGAGTTATTTCATTTTGATTTTAGG -GATTATATACTGAATTTTGATACTTTCTTATGGAGGCTGCACTTGATCTGAATTCTGATC -TGAAGTTGTAGATCTTTAAATTTTGTTGCCAATTGCATTTCGGTCTTTACTTTTGGTTGG -GGTCCGCGTTATACACTACTGGTAGTATCTAATGAGTACACGCGAGAAAATACTACTATC -ATAACTCTCATATGTGCTATGAACTACATGGACTTGGTGTAATACACCCTGCACTTCATC -ATACGCTAAAACTTTCAACTGAAGCATGTACAATAAGGACAACCCCTCCCCCCAACTGAT -GAAACGAAGTACAAGAGAAAAGGAAATGAACGGCAAGAAAAAGTCTAGATAGATAGTAAA -AGCGAAAAAGGTGCAGAAAACTAGGGAGTATAGAACTGGATTTCTGTTCATGGAAAAAAT -TAAACATTCGTTCGGATGAGATTTTAGCATCACTGCCATCAACTTGATTTACGGCTCGGA -CCAGTCCACTTGGTCGGTGACGGGATCTCCACCGTCGCGGGCGGAAGTGACACGAAGACG -GAGGCGAAGTTTGGCAGGAAGACTCTGGAAGTAACTGGTTAGTTTGAGATTCGAGAGAGA -AAAGTGTAAGATAGGAAGGAGAGACTGGACATACCCCATTCAGAGCGGTCACGCGCATTG -ATTGTGTGCCCTCGTCACCAGCCTCTAGGTCAGCCTTGCTGATTGCGCTCAGCTGCAGCC -GCTGACTCTTTGGCACGGCAGCTTGCAAACCGACGCTAGTGAACCGGTCGAAGTTGGAGT -CGTTGCGGAAGCGCGCTAGTATCTGGGCGTTGTTGCCACTTCGTTGAACCTGCAAGGTAA -GGATGAGGCCGTCCTTGTTGAGCGCGGTGTGTGCAGGGGTTGAAGCAGTAACAGGTGCGG -GGGAAATGGAAGGCGAAGGCGCAGAGGCACCTATGCCTCCTGCACCTAGCAGATCCATCG -AGGCAGACGAGGCTTGTGTGGGCTGGGGGGATGGCGAGGGGCCGTTGGAGCCAAACAGGT -CCATGATGGCGCTGTTCGGGTTCTTTTGGCCAGCAGGCCCTGGGGAGGGAGATGACATGC -CCGAGTCTCCGCCAAGAATGTCTGCCAGCAGATCGGCGGTGTTCGATCCGTTCGAGGTAG -GACTCGCAACAGGGGCGTCGGACCCGCCAACCAGGTCAAGGAGCAAGTCATGCTCTGCAG -GTTTAGCGACCTTTGTCGTCGACTTGCCACGGAGCACCTTACTCTGGCGCTTTTTAGCAG -CGGACGTAGCACCTAGTACCCGTTGTTCCTCACGGATCTCAGGCGCGGGCATCCGCTCGA -GCACGCCACGGCGGATTTGATCATAGCCAAAGAGGTTGGTGTATTCAACGGCACGCTGTT -GAATTTCTACACTCAGGTCCGCGGTTCGGCTGGAAAGCAAACGGCGTAGGCGCTCAATTT -GAGCTGGATCCGATATGCGCACAGTAAGCTTCATGGATGCAGTGATGATATACTCGACGG -CAATCTGGGAAGCGTAAGTGCTGTTGAGAATGTTGGTGAAGAGATCAACAACATCACTCT -CCCGAACTTCCTTGACCAGTTCCTCTTCCTCGTACTGACCACCCTGAAGTAGACTATCGG -CATATTCACCAATGGTCCACGTAGCAGCCAGTGTGAGACCCTCTTGTGAGATGTCTTCCT -TCAATGACGAGTAGAGCTTTTGTACTGCATAGGTCTGCAGATCTGGGGTAGTCGCAATCA -GGCGGACGAATGATGACAGAATCTGTTCTTTGACGTAGTTCCCAGCCAGCTTGAGAACTC -GCAAAATGGTGTCCATGTGCCAGCGCTTGTTAGGTGCGAAACGATCGGCCGCGATGCCTA -TCTGTGTCGTCATCACAGACTTGAATTCGTTGTCGGCCACCTCCAGGAAAGCCAGCAATT -CCCGCACCAGCACCCGCACATTATCTTCGTTGATCAACATGAAGCTGAGATCGAGCGCTC -GCCGGCGAATGCTAATATCAGGATCCCGCAGGCATTCCAAGATTGTGTTGCGATGCCTCT -GCACTGCATTTGGTTCAATTGCAACAACCTTATTCAGGGTGTTCAACGCAACGTAACGGA -TATTGTTGTCCTTGTTAGCCAGGAATTTGCCGAGAATATTGACGCCCAGCACCCTCAGAC -CAGAGTCGGCTTCAATGTCCAGAATCGTAAGAACAGCCTCATATAGAATAGAGTTGCCGA -CGTTCTTTGAGGAATCTGTGTTGGTGGCCACCTGGGCCAGGATATCGTTGATTAGTTCGC -TGGTGGCCACATCTCCTCGACCCAATACACGCAAAAACCGAAGGATCTTGACTTGGAGGA -AAGGATCTGTGATGCCGGACACATCATGCTCGGGGGCATACCCGGAGGTTGTCAGCCCCT -TCAAAGCGCGAACTAATCCTCCAGCCAACGGGCGGAACATTTCGATCACGCCCACTGGtc -ctccttcctcttcgtcctcttcttccgcttcACATAGATCAATTGCCAGTGTCAGCCCAC -AGAGAAGAACGCCATGGTTCCGGTCCGACAACAGGTTCTTCGCTTTCTCGAGGAAGTGCT -CGTACAAGTCCGGGACCTTGCGACATATCCGCATGGCACAGATCGCTGCTTTCCGTCGAA -TATAGGGGTTCGCAGTCGACATAAGGGATTCGACTTCGGGGAACAGATCACGAGACATTT -CAACCGAAGCAATGTTTCCGAGGGTACAGAGAGCGAGACCGACGATATATTGATTGGAGT -GGTTAAGGTCACTGCATTTTGTTTAGCCCAGAGTCAGCCAGTCAATTACAAGGGGACTTA -CTTCTTCAGAGAATTGGTCACAAGTGTCAGCACTTCTTGGTTCTCATCTAGCAACAGCAT -GGTCCCCAGATACCCCAACCGCTTATCAGCGAACCGGTGTGAGGCAAGTAGTTTCAGACA -TTCAATTTGGCCGAAGTGAGTCCGTTCCCCGAGAGTGAACAGATACAAGAGCTTGGCGAC -ATTATTTCTCCTATCAATCGTTAGCGGCTGATGGAACAATCGAATGAGGTAAATGTTGAA -GCTAACCGGATGCCGGAGTCATGACTCTCTTCTCGAAACGAGGCGCGAATAGCGGCGCTT -TCTTTTTGGATAACTGCACGTTCATCGGCGATGGTCTTTGCCGACCGCACGTTACGGATG -AATTGCTTGACTGTAGACAAATTGGATTGTGTGGTCAGAGTACATCTCAGGGGAGAGGAG -GGGCGGCGGGCAGCACTTACGGGAAGACATGGCGGCCAGAGGGACCCCCGACGGGGCGAG -GAGGAGATATTTTCAATAAAGAAGGAGGGGCGAAGTCAGGTGCAAGAACATACACCAAGC -ACCGCGCTCATAGAACCAATGAATTGAACAAAGTGTCAAGGCAGTTGGTCTGGAGGTCTG -GACTGTTGTATTATTATTCCTCATGGCAGTCGACCTGGCATCTTGGCATGGGGCACTAGT -ATGGAAGAGGATTAGCTTGGTTACCCTTAATCACCTGACTCTTTGATATCCATCTGACTA -CACCGCCTTTGAATAGACACCTGACCTCGGCAGTAAGACGTAAGTGTCAGAGTTGGAAAT -TACAAGTTCCGAGGAACAATACAGGGGTATAACACAGGCATATAACATAGGATCTATAAT -TAATGCCTACTTATGGAAAAGTAGAGTATGCAAGCACTATAGATATGAAAATTGGTCTAG -GTTTATCAACAATTCAACAGTCTGGGCCTCTGAATACATCCGCCTAGTTTACACATATAA -ACAGTACCTTGATATTGTACATACGTAGTACCAGAAGAGGTAGTCTGACGTAGTCCGTGA -ATTTGGTTTCGAGTTTCGAGTCCAAACAAAACCAAACAGACCTCGGGAGGAGCTGGCCAC -ATATAATCTTGGGTGTTTCCAAGTTTTCATCTCAGTGTCTTCAATGTGCAAACCCCCTGA -TTCTGCAATTTTGTCCATATAGGATGTCTAGCGCCGTTTCCCAGGATGAATCCGCAACTC -TGCGGCCGCGCGCCATGCCAATACAGTCTGATGCAAAGTCCGGCGTCGCTCTCCCCCTTC -CAGTGGACAAGCTAGCCCATGATACAAGGTATTTCACTCTTAATTTTCTTGACCGGACTG -GATTTAGCGCAAGAGCTAATCTATTTGTCGCATTCACAGTGGTCGCTCAACTCCTATTGC -TGAGGATGCTCCCCCATCAGCACATTCAATCTCATCCGCCAGGAAGCAAGTCCGCGCTCG -CAATCGCCTATTTTATACTATAGACTATGTGCCTCGTGTGTCGCACTTCGATCCCGATAG -TGACTATCACAATTTCCGAGGGTTCTTCACTCTCTTTTGGATTGGCTTGGCCATCATGGT -TGGGACCAGCGTATTGCGCAATATTAAGGATACCGGATTTCCTTTACAGGTGCGCGTTTG -GACTCTCTTGACAGCCAATGTGTGGGGAATGGGGCTCAGTGATGCGGCGATGGTGATGAG -CTCAGGACTCGTCCTGCCTCTGCACCGGCTGTGGCGGGATGGTCCGCAGTGGCTGCGATG -GTCACGCGGGGGTATGATCTTGCAGAGCTTGGGTGAGGCATTATGGTTGGTTCTATGGAT -TAAGTGAGCTTGTGGCTTTTCTGAGAATTGGAATTTGGAAGGTGGCTAATATAGATCAAC -AGCTGGCCCTTTATGATGCAATGGACCTGGACTGCGCAGGTGTTTTTCACGCTCCACACT -CTGACCATTTTAATGAAGCTGCATTCGTACGCTTTCTATAATGGACATCTCAGCGAGACA -GAGCGCCACCTTGCCTCCCTCGACAAGCCAGGCTCAAAGTCACCTGAGCCCCCGGTTGCC -ATCCATTACCCGGAGGTACACCGACGTCGGCCGTCAATGAAGCAGCATGATGAAGACCAG -TCCGCGGAGCCATTGGAGAGGCTGCGCGAAGATTTGGCTACAGAGTTGACCTCCCCGCTA -GGCAACATCTCATACCCGCAGAACCTGAATGTTGGAAACTATGTTGACTTCCTCTTTTGC -CCGACTCTTTGTTATGAGCTAGAGTATCCCCGACGGCTGGAGCGACGCTGGTCAGAAATT -GGCTGGAAGGCAGCTGCTGTATTTGGTTGCATCTTCCTGCTCACCCTGACAAGCGAGGAG -TTCATCGTCCCCGTGTTGGCAGAGGCCAGTGCCCAACTCCGTCTCGTCTCCAACGTCACA -GACAAGGCTCTTGTCCTTGCCGAAACCATCAGCATGCTCTTATTTCCATTTATGATCATC -TTCTTACTAGTGTTCCTCGTCATCTTCGAATATGTGTTGGGTGCGTTTGCAGAGCTGACT -CGGTTTGCCGATCGTCACTTCTATGCAGACTGGTGGAATTCATGTGACTGGTAAGCTGGA -CATGGCTATCGTATGTATTGGGGTAACTTGATTAACCAATCTCTACGTAGGATGGAATTT -TCCCGCGAGTGGAACGTTCCTGTCCACCACTTCCTCTTCCGCCACGTCTACTGGCCCGCC -CGATCTAACTTCTCTCAGCCTGTGGCCATGGTCATCACATTCCTGGTCAGTTCCATATTC -CATGAGCTAGTCATGAGCTGCATCACCAAGAAGCTTCGGGGTTACGGTTTCCTAGCTATG -ATGCTTCAGCTGCCTATTGTTGCGATTCAAAAGTCTCGCTACTTCCGTGGCCGAACAATC -CTAAATGTAAGCTTTTATTTTTTTGCTTTGGTATTTGCGGTTCTCACTGACTTTACTCCT -AGAATGCCTTCTTCTGGTTCTCGATGATTCTGGGCCTGTCTATGGTAAGCTCTATATTCT -CATTTTGAGATATGCAACATCACAGTATACTTGACTCTAACCATCAATTCTCCTTAGATG -TGTGCATTCTATGTTCTAGTTTAAAATCAGCACTTGGCGTGTAAGCGTTGAGACACGGCG -GGCTAGCAAGGAATAAATTGGGCCAAGGAAGGTATGGGACCTTGCCGGCTTGCCATTAAT -ACATGATACCTTACAGGGTGTCTTCCGCTCTTGTATATATATTCATTTGTTCTTATATCC -GCAGTGTACTTTGGTATTGATATTACCTAAATGAAGTATACCACCTAGGCACTTGTCTGT -ACCGCTGTAAAAAGAAGATCCGGCCGTTCATGTGGCCTCTGGCCTTTAAAAAATAGGACT -TGGATAACCTTTACATTCGTAAGACTACTGAAAAGACTATGAAAGAGACCCATGAGATAT -TTCAAGGCCGTATAAGCCGCCGTCTACTCTTGTTTAGTTATAACATTGAAATATATCGAA -TTGAAGTGAGGAGTATATCGTCAGAGGCTACGTGGCCGAGCTTTGAGAGCTGGGTCTTCA -CAAAGCCTATAGGAACAAAAGACATCCTCTTTACTGAAAGAGAGAGAGTCAGCGGAAGAT -CAGAGCCTTCATTTGTGTGGTACAAGATGGGATGGAGATGGATAGATGTCGGAGGTTTTC -CTTCTGCAGGGAACAAAGCTTAACCTTATTATAGCTGAATTTCAGAGGGCCTTGTCAAAC -GAGACGGACTCAGACTAGTGTCGTAAGGTTGTTGAGTATGCGCATCAAATATGCAGATGA -TCTATAGCCTGTGTGTGTTCCACCGGAACTGGATCTCTCGCTGCCTCGATCTATATTCGA -GGACATATGTACCGGTTTCGTCTCAAGCCTCGGGCTTCCAGGAAGATAACCCAGTCGTCG -AGGTGACAGCTTACGCACGGACATACGCCACGAAACATAATGCGCCACTGTGGATTCGAC -GGGTATCTGCAGATCCCCCGCAATCTCGGATGTACTAACGAGTACCGTCGATGCGACTCT -CCATCTCGTAATGAAGTACTGGTCGCAGAAGATGTTTCTCTGCATCAAATACAGGCGGAC -ACTTGATGTGGCTGCATATGTTTGTCAGATATTGGGGGCAGAGAAATTTCCTAGGTGACG -GTCTCACGAGAGCCAAAAATCCTACCAGTGAACACAGCGTAGGGTGAAAAGGGGCTCGGC -GCTTTTTGGGCCTATCCTTCTAGTGTCGAAATACGTCTTTTCACAATGGGTCTTATCCTG -CACATAAGCCGCCCGCGAAGCACAGTAGGAGCATATGAAAGGAAAGGGGAGCTGAGAGCT -AAGACGAGGGTAGAAGGAAAGAGGCAAAGACTTATGTGAGATCCAATGGCAATTTCAACC -TCTCTTTTTTTTCTAGGCCTAAACAAATTTCATTATAATGGTGCGATCAGTCATGGATCT -TATATCTTACAAAGATAAGATGACCTCTCTTTTTGAAGCTGGCAAATCCTTCAATTATGT -ATAGGTTGATTGACTATACTTCTATTTGATATGTAAGAAAATATATCCCTAGGGCATTGA -ATGCTCTCGAGTACTTTATTGGTGCGTAGGGAGAGAAAGGAAACTATCTTTACTATAGGA -CTAGATACTTTGCTGGGAGCAAACAGCAGAGTTGCATTTCTCACTATTGTAGGTACAGAG -CCAGAGGACCCATCCGTGAGTTACCAACCTACGTATTTCAAGAATAGCCAGAGCGCCAAT -TCGCAAGAAAAAGTCTCGGTCTAGTCAGTCATTAAATGTGCTTCAGTAAGCTCGAATTCT -CTAGACCTCTTCTCCCGCGAATACAAATCCGACAACATGAGTAGCAGAAGATATCATTTT -AGCTATAGTAAGCAGAGGCTTGAATGACTCCCCTCACTGATTTCCCCCCCCCCCTATATA -TCAGCTCTAGGATTTAGAGCTTGCTTGCTGTGTCAATATGATTCTGACTTTCCAGTGAGC -TTCTGAACCCCAGCAGTCTCTAGCATTATTCTGCTGAAGAGGAACGAGCGGAAGGTAAGA -TTCCCTTATAGAATGCTTCTACGGACAATAGTGTGCATATTAATAGACCCAATAAATGTT -TATCAATAGTCAAAGTCGTCTCGGCGCCATGAGAAATACGGCGCTCTCTTCACGGTAAGT -CTGAGGCGCACTAACTCCAAAGATTTCGACAACACAGCAGCATGAGATGAAACTGAAACA -GTATCAATACCATAGTATCCCGAGCCACACTTAGCAAATCTAAAAGCTAATCTATCTCCA -TAAATCCCATTAGCATGGGAATATTGATTATGTTTCTCGCTGTTTTGCACGAGGAGCAAA -AGTGACAGAACTTTTACCACAAATACTGCAGAGTCAGAAGTTTTACACAGAAAAGTCTCC -TGCACAAAGTATGCCTTTCTCGCTTTCTTCTGCCGTCTGCCTACAGACTGGAAAGGGTGA -AAACTATCTTCAAGATATCTAAGAAGAATAACTGGGAAGCCTACTAAGAAATCTATTGAA -AAGACCGCTGAAAAGACCATTTAAGAGCCCTGTAAGAAAGACATTATAAGGCCGCATAAG -CGGCCGTCTACCCTTGTTGTACATCCTGTAATTCCAAACGAGGAAGTTGGGTTACCTGTA -AAAATCCTTATCTGAACATGGAGCCAAATATCAAAGCACGATCATCAAGCTAGTTGAGCT -ACAACTCGATTGGGGCATATTACCAGGCCCTAACTGTATGACTTGTCGTGACTCAATGAA -AAAGCCTCACGAGAGTAGGCAGAGAAAACTCTATCGGTACTGATATTACTCAGAACGCTC -GGCAGAGTACTCTCATCAGACTTGGGTTCTCTACGAGATTCTGCTACATGCAAATTCAGC -ATTGGCCAGTATCTAGTGGCTTGACACCACGTATCAATTCAGTATTCCGAGAAGCTATAT -ATCCAATACCAGTGGTATGAAGGCAATTGTATATGCAATATGAGAGTGGGTATCTGTGCT -TGGGACTGGGTTCGATATTATGCCCAAGTGCCTTCCCCAAGGCTTCTATCGCTGTTACCT -GATTAAAATCCATACATATGAAACACCCAATTTCGCAACACAACAGCTCAGTCATTTCGA -GTGGTCTTGACCTTAGGAGTATGTTAATTTATTATTCTGCAAAATTTCTCACTCCAAATC -GATCTATGCCGCGGCGACGGTAGTCTGGCAGGAATATCAATACGCCTCATATTTTCTAGA -AATTGAGAGTCGCAGCAAGCATCTCGTCTAAACAGCCAATCGTATCAAATCGCATGGATC -ATATACCTGGGCGAGATGAGATATCTAGAAAGCCTTGTGATTATCACACTTTGCCGTTAT -CCCTTGCTGGGTACAATAGATATCGCCCATCTGCCTGATGTTAGTCATACTTCTCGACAG -ATTTAAGCCCGGACCGGGGCATATCTATATTGAATAACAGGTTGCATTTGGTGTGGGTGC -TAACCATCCTCCATCCAAGCCAATCCTAGAACTAGGTCGCTCTAAGTAAAAGATCTCCTC -AACAAGTATCAGCCAATTGATTCTTTCATGGCTACCGATGATGAAAATCAAAACGTCTTG -CATTGAGTTGGGTCATATTTTCTAAGGCGGGGAACGCCAATCAAATTCAAAGAGCCTCGA -CCCTTGACGGTCCAACAAGGCTGAGCAGCCCCCGATAGTATCACAGGTCAGTAATTGCAG -TCTAAACATTCCCAAGGCTGTACTAATCAAACATTCCTCAATGCATCAACGGTCTATTAT -TAGAAGATCCAATCATCGTTCACAAACTTATGAGTCTTTCCCCGCCACTTTGGTGGCCCC -GTTTCCAGcccaacccaacccaacccaacccaacccaaccAACATATCCATCTTACAACG -AGAGAAAACCGAAGAAACAAAGAAACCAACAAAGGGCCAAGGTTATGGCATATCATATCA -TATGAAAAAGCGTAATTCGTTAATTGGATTGACCAATAAGGTAGTCCACAGGCTTAGCCC -GGGAGCACAGCGTATAAAAAAAGAGAAGAAAATGAAAGCAAAAAGAAATAAAAGAGCAGA -AGGCGGATGTGGTCAGCAATAAGCAGAGCCAGAACCCGACTAGACAATTTCATACGCCAC -CAAGTGCAGGATTTTGAGGCAAGACAGATGCAACACACCATCCACACCCATGCCCGCCTT -AATGCGCCAACGGCAATGACAAAGATACACCCCGGTCCTGGCCTGCTTATTTCCGCACAG -CTGGGCCAAGAGACCAGGTGACATGAATGAAAATCGAAGTCCTCAAATGCCAAAACGTCC -CGATGTTTTGTGGCACAGGCAACGTAAATGCGAAGAGCGAAGCATTGGGAACGCCCCGAC -TCCTTGTTAAGATCGAAAAAAAAAATAGATAAAAAGTTTATAATAAGATTTGAAGCACAT -TGCAGGCGTGATTTGTCGTGTGTGATCATGTTTCATGGATGTCATGGGTGAGTTAATTAA -TAAGCAGACTGGCGTCTCCCCACAGCAGGAGGACGACCGTCTTCCTCGTAGTAAGAATAA -TAGGGGCGTCTCGGCGAGTAAGTGACATCATCTCGCTTGTATGCCCTGGCGTATTTGATA -TCATCCTCGCTCGGGCGGGCAATGTATTCGACGTCCGGATGGGGACGAGAAGGAGGGCGC -GCAGACCGAGTTTCAACACGGGGCAAGGCTTCCGTGGTGTAGGTACTGCTTCGCTTAGGT -GTCTGTCGTGTAGAGGGTATGCGCTCCCGATCGGGAGACCTCGCCGAGCGGTACTTTGAT -GACTTCGGCTCTACAACGATATGGTCTGGTTCCTTGACAACCTTGTAACGGAATGAAGTC -TTGGGGGAACTGCCCCTTGGGAGCGCATCAGGGGTACTGGGACTGGAATAGCCCGAGTCT -TCGCGCTCGCGCATTCTGGAAGGTCGCGATGGCACTCGCTCACGGGCCATTTCTTGTAGT -GAGTCTCGAGTCTGCTTTGGGCGGACGAAGCCAGCGGTGGTTGCGGAGCGGGTACTGAGG -AACGAAGGACGGAAAGATCCCTTAATACCTGGAGCGGTTGCGGAGGGTTGTAATTTGGGT -GGCTTGGCATCATAGGTTCGGGGCGACTCCAGGTGCTCATAAGAATCATCGCGCGACGAA -GAACGACGGGTACGGGTGGACCGTCCCGCGGATCGCGACCCGGACAACTCTGGTTCCGCC -GAGTCGTGACCACGGTGCCGCCGTGGGGACTTGGAACCCTGGGCTTTGGATGCGATATAG -TCCTCTGCATTGAAGTACTGACTATCGAGCTTGGACCAGCGATCGTCCGCATCGCCATCC -TCATCGTAACGGTGCTCACGGCGGCGTGAGGAGTCCGATGGTCGGGGTCGTGTGTCACGC -TCTCGGCGGGGCGTTGTTGAAGCACGTTTGGTATGGTAGCGGGGAACACTAGAATCAGAT -GTATCCTCATCGGATTCTACATGGCATCCAAAAGTATCGCGCTTTGCCTCGGCCTGTCGT -CGTCGCTCCTGATCGCGTGTCTTTTCACGGTCGGAATGTGTCGCCTTGGCCGTCTTTCGC -TCTTCCTTCGCTTGGTGATATGTGCTCATCGGTGTTCGAGTCTTCTTCTTATCTTCTGTC -GGTTTCGTCTTTGGGCGCAGGCCGAACTCTTCGCATTTACGAGCCGTCGTGGAGCGTGGT -TCCTCTGTGAATCGTAATTCTTCCTCAAAGAAGACTTCTATCGGGACCCTCTCCTCCATA -TAGTGGCCATTGCGGAATTCGCGAGTGCTGGCTCCACTCCCCCGCGGAGAGTTGTAGGCA -GAGTCTGTCCGTCTGCGTTCCAGCAGTTCACGCTTCAACTCGGCTATCTTGACTTTTTGG -TCATATTTGGTCCGTCGGACATCGTCTGAAAGCAGTTCATAGGCCTGTTGGACTTTCTGG -AATTCATCTTGGGCAGCAATCCGCTGCGACTCATCTTGAATCTTATCTGGATGGCACTTG -AGAACCCGCTTGCGGTGGGCGGCCCTGATTTCAGGGGTGGTGGCATCCTTGGCGACACCA -AGCACTGCATATGGGTCTATATCTGGTGGTGTGGACATTGCGGACAATGTTTGCTTTCAA -TTGAATGGGATGAAAGAAGATGAAAATGAGAGGTGGGAAAGAAGCGGTGGAGAAGATATG -GAATGTCAACAACCCGACGTTGACGTAGCAAAGATTCCACACCGATCCAGAATGAACTGG -AGATGATTTGCAATGGAAGAAAATCAAGGGAATTGTCCACAATATGGCCGATTAACCGTG -CAAACCCAAAGAAAACCCCCCAAACCCAAAAAGGACAAATGGGACAATGAATCAGCTGTC -AGAGCGGCCTCTCAGAGAGCCGAAGGGGGCGAATGACTGCGCCACAAAGAGAGGAATTTA -GTATGTACGGAGTACCTAATATGGAGACATAGGGAGTCACATAGACAAAATCTTATCGAG -ACGTTCGAAGAAGCCAGATGCCCAACCTAATAACCTTCCCCTCTGTGTTAGAATCCTTCT -CTTTTCACCGGTTATTGGTCTGAGCACTTCATCCGATTTGTTGGAAACTCTTGCCAATCA -AAAACCTCGAGCATTTTGACCATAGTGGGGCGATCAATCAACACGAGTGCCATTCAGAGG -ATCGGGGCCCCCCCCCCCCACCACCTCTCTCTTTTCCCAAATCGTCATGACGCCAATCTT -TTCTTCTTTTCTCGATCCAGTCTCTCCTGCATACCCCCAACTTTCCTCTAAGTAGTGCGG -GTTTTTGGCCCTTTTAGGTTCGGTCGCCTCActgattttggtttatttttctgatttttc -tgatttccgatttTCCCATTTTTTCGCCTTCAAGGCACACCCAGTCAGATCCAACTCACT -TGTTTAACCTGAAACCTTTAATTGGTTGATTGGGTATTATATTCTATACCGTACCGTAAA -GAAAATCAGTGCCTGACTCGTCCCATGGTAATCTCCGTCTTCCCGAAAGGTTCCGTGGGC -CTTAAATTTCTTTCTCTGAAGAATGTATATGTTTCTATATATAGTCAGATCATGTTCCAA -TATTTCACTTTGCAATAGGCAATTCGTTCATATTATTACTTGAGATGTCTACACATGTTG -TAGTAGATACGGAACACGGAGTACCTTGGACCGTTCGGGCATTGATTCGCACCCAATCTC -CCAAGCTAAAACCGCTGGCACTTGTCCCACACCGGCTCCATTATACGTGTCTTTGCTAAG -TACCCCCACCCGAGACTACTTATCTTCCAACCATTAATGTGCACTCCGTAGACTACAGTA -TGAGACAATTGTACACTCTCTCGATGAAACTACATTGTACTCCGTACTCCGTATGTCTCG -GACCATCCGGCCCATCCACTGGTTTCCAGTCATCTGGACCCGGTACTGCGGTCCGCGCGG -CGCAGGAATTATCTAGACTGCTGACTGCTGCTGAACAGAACTGCGTGTGGCAGCCCTGAT -CATGACAAGTTTACTGGCAAATGCCGAGCGAATGAACTCCGTGTCAGCATTACCATATTA -CCTATGTTGTAGGGACCATCCTCTTTATTATTGCGACTGGCTAAATGGCTTTGGTTTGGC -TGAGAATGCCATGTATCTGTCTATTTACGACATAGTATCAAGATGTCCCAGGTCTCAAGG -CGGGAGAAAGGCCCACGATCCATTCAAATCTTCGCCTCGAATCCTCGACTTCTACAGCCC -TCTCACTATTTTCTCTGGCGGTATCATTTGCCATTTTCGTGTGTGATAGTGATGGTAGTA -TACATGACTTTAGTCTTGGAGTGGTTGATTTATTTCCCAGGCAAGAATGGCACTAGACCA -CGCTCACTCTTTCTAACTCAACTGTACCAGCCGAATCCATAATCCTCCGATCCCATCAGC -AGTGGTCGTGAGGTCACACAGTGTCTCCTTCTGAAACACAGGGTTGTAACCTTATACGCT -TAAACTGTGATCTACAGATACCCCTCTTCCCGGACAGTCACGCAACCTGACTGGGACTGG -GGTCGGTGAACTCGGGATTGGACGTGACGAGAAGGGAAGAAAGGGGTTTTGTATAGTTAG -AATCGTAGGCCCACTCAAAAGCCGAAGTGGATTATTTAGGCAGGTAAGAAAGGCAGCGAT -GGCGATGTATGCAGACTACGTATTGAAATGCATTATATATGCTGTGGCTGGCTTGAAGTT -GTAGGTTTGCTACCCGGGGAACAGGAATGCAGATGCAATGCAGGCTGCAATAATGATGTT -TAGTATTACCCTCTCATGCAACAGTACTCTCTACTATTACATAGTACTTATGTTGTACGA -GTTACGGAGTAATGTGGCCAGACCTCTTCGCTCGGCCACTAAACGCTTTCTAGAAAGGGG -CGTGCCAAACTAAGCAGCAACCCTTGCGAGAAACAAGTGTTTCCCACGAGTGTGTGTAAG -AGTATTCGAACCGAGCACAAATGCGGGCTTCTAACCTATGATTAATACATGATCGGCCGC -ACATTTGTGTTGTCACATCGGGTATCAAGAAGAGGGTAGGAGGAGGAGGCCATGCAGTCA -AGATGTGATCTTGGCCCAAAGTTTACCCACTAATAAGTCATTTAGAAGCGCTATAGATTC -CATCAGTTTTGAGTTTGGAAGAGATCCCCGTTGCAGATGACTTGTGTAAATCACAAGAAA -CTTGCAACATACGATCAACTCAAGTTGTGATATCACAAGCGCGTGTGGCTTGATGTGATT -GCGATCGGTCAGATGGGTTTGTACATTACATGCGGGGTATACATGAGGGTCTGGTGCTTG -TGTATTTTGAAAGAGCGAATGGTAAATTGTTTCTTTTTCCTACCCTTCTATTACATTCCT -ACAGTATGGAACATGGAGCTTATGCTCAAGTTGCATTGGCAGTCTGACGGGCTCATCACC -TAGGCAAGGGCCAATGCACAGTTTAAGAAATGATGTAAGCGTGGGCAGTCTGACAGCACT -AGGATCGTGCATCAGGTCACTAGCCGACCAATGTTAGAAACAATAAAGCACCAAGGCGTC -CAACCACCAGCATCAACAGGGCCAAAGCCCTGGTCACTGTACGGAGCATGTCCCAGGGAA -ATGGTAAAATCTGGCGTCAAGCCACCATGCATGGTCAAAGCAAAGAGAAGATTGCCGTTG -CGATCGATGGGGCAGCCATAATAAAATGCAATTTCCCAAAGCTGCTGAATCCCACAAGTT -CATCCATTCGCAACCGCGTGGTCATCCTGCCACACAGGAAGTTCAAGATCGACCACAATC -GGCGAATTGAACCGTTTGCCCCCCTGGTGCGGCCAACTAGTGGATACAACAGCGGCGTTG -ACAATGCAGGTCTCCCTTCTTAGTGTTCGCAAGGCGTAATCTAAATCCCGGCTACTGGAG -CCAGGCCTACCCGCACCCAGGGACAGATGAGCGCTCGGCGAGTTTCGTGGACTTGCCTTC -CCAAAAAATCCAACAGATGACTCTGACCCTGGTGAGGCTGATGACATCGGTTGTGTCTCC -GCTAGATCATGCTGTTTTTGCCCAGAGTATGAAAATCCCTCGTTGTCAAGTCGTTTATCC -CGTTTGCCAGTCAAATCAACAAGGCTCTGTTTCTTACTTCCCATGGCTGGCAGAACGCCC -CGAGTGACATGGTCAACTAAATCAGCTTCTGGTTGTGTTTCTGTTTCTGTCTTGATAAGC -TTGCCTAGCCAGTAAACTCGTTCGTACCCTCGTCCTTCATGCACATGGCCACAAACAGCT -AAATGCGGCCTGATGAGGCTCAATGCACTACGAAGTGCAGCGCACCCCACGAGCATTCCA -TTCGGTTTCTGGTCGCAGTGAGACTGTGGTGGCGTATGCGTGACGAGGATGTCCGTGTCG -ATTGGTATCTGACTCCACAGCGTAGCAGCATCATTTGATTCGTATCCAAATGCCCAGTCT -CCCTGTGATGGCGAGTACGGGGACCCAAAGATTTTGAATACTGTGTTGGGTCCGTCTGGT -CGCGTCAAGCGAATCAAGGCGGGCTCATGGCGAAGTAGGACGACCGAAGGCGACTCCGTG -ATCAATTTTAAACAGTGATGTGAATCTTCGAGAAACCGGCCGTGGAAGCTTTGTCCATGC -TCTCCGTAAAATATTGGGTCTAAGGTGACATCGTGGTTGCCTGTTCAAAATGTTAGGCGG -GTTGGGTTGAGCTTTTCCTGCTAACGAAGTGGTGAATTTTTGCAAGCAGTGAAGATATAA -CTTCTATTAATGCCGAGGATAAGTCAAGTGTATATTGGGCGTGTGAGCAACACATTGGAG -AACTGACCACCGACTATGATCTTGATTTCAAAGTCGGCCTTGCAGATCCAATCCATTGTC -TTGCGGAGCTCTTTTAGGCTTCCATTGTTTGTCAGGTCGCCGGCATGAATCAGGACGTCC -CCTGCGGGTAATTTGAAGCCGGCTTCGGAGGGTGTATATCCGTGTGTATCAGAGACGCAG -ACGAAACGCGTCTTTTTTCGCATGACTCGGGCACATAGAGGACCGCTGCCTCAAGTTGTG -TTATTGAATGAAAATCAGGTTGAATTGTTGGTGTTTGATGGGATCGCAAGTCAATAGCAA -TAGTAGAACATGAGGCTCAGTCAGAATCTCCTTGGCCTTGGTTATCTCTACGAGAAAGTT -CCTTTTGGGGGGGAAAAAAACAATGGCCTCGACCCGCGCGGGCGAGGTTGGATATATTAG -TCATCGGACCCAATGTTTCCACCTGAAGTTGCCTAGATCATAGATCTGGCAATCGAACAA -GACGATAAGACAGCATTCAATGTTGATAACAAGAAGCAGGTTAACATGGAAGTGCAAAGG -GAGTTGTAGCTATCTACTGTTATGTTTTAGTATGGTATGAGGTTTTTGTAACGAATATTG -GAATTATATCAATTCTCCGTTCCTTGCAACTTTCAAGATAACCAGTCGACCTTGCAATAA -CCTTCTAGAACAGGGTACATATGTCAGCCCTAAGCCAATGCCCTAACGGCACAAAGAAAA -GAGAATCCTCATGGCGCACAGTGAGAAATAAAAGTATTACATAATACGATATCGAGCGGA -GTCAGGAGAGGAAAGGCACGTGACTTAGATAGCCTATTATTGGTCAGGCTTAGGGCTGCG -ACCCTCGCCATTAACACAAAGTGAGCGGACCCTCGCTTAGCCCATTCTCATCGAGCCATC -CGACTTCGTTGTGTGCGTCGACTTGACACCCACGACAACAACCGCCGACAACATCCATCC -CACGGGACATTAGATCACCAGGATGTCCTCCTTCGAATCAGTGGTGCGTTTTTACCCAAC -GACTTGAGTGCCGTTACTCTCGCCTTCAAAGTGCCCATATCGAATCTGTCGCTGACTTGA -ATTGTTTGGAAATAGGTCGTCATCGACGGCAAAGGACACCTCCTCGGTCGTCTGGCCAGC -ACTGTCGCCAAGCAGCTGCTCAACGGTCAGAAGATCGTCGTTGTGAGATGTGAGGCCCTC -AACATCTCCGGCGAGTTCTTCCGCGCGAAGCGTACGTTCAACCCCCTCTCGAATATTTCC -TATGCGTTATGTGTGATATTGGGAACTATGTGAATCGGTGGAGGAAAGAAACGAACGACA -ACATTCGCAACTCGAAAATCACTGGCAGTCTGGAGCAATTTGAACCAATTTTTGGAGGAA -ATGCACTACAAATTGGATATCAAAGCTGACTCGCAATGCCGCCATACAGTCAAGTACCAC -GCCTACCTTCGCAAGATGACTCGTTTCAACCCCACCCGTGGTGGTCCCTTCCACTTCCGC -GCTCCTTCCCGTATCCTCTACAAGGCCATCCGCGGTATGATGCCCCACAAGACTGCCCGT -GGTGCCGCCGCTCTGGAGCGCCTTAAGGTCTTCGAGGGTGTCCCTCCTCCCTACGACAAG -AAGAAGCGTGTCGTTGTTCCCCAGGCTCTCCGTGTCCTGCGTCTCCGTCCCGGTCGCAAG -TACTGCACCGTTGGCCGTCTCAGCCACGAGGTTGGCTGGAAGTACCAGGATGTTGTTTCC -AGGTATGTACAATCTCTAATTTATCACCCGAGTGTGGCTTGACTTAATCGGCAAATTCTA -ACCCAATCACAGACTCGAGGAGCGCCGGAAGGTCAAGAGCAAGGCTTACTACGAGCGCAA -GAAGGCCGCTCGCCGTGTTCTTGCCAAGGCCGAGCAGGGAGCGAACGTGGACAGCAAGAC -CAAGACCCAGCTTGCTCAGTATGGCTACTAGATACCTTTAGCGCGGCTGGTTGTTGTGTC -GAGCGATGGTTCTGGGATGGAATCGCGGGCGGATTTGAACGGCAAGCCGACCGACCTTTT -TCTGCGGCCCGATTCCCTAACTCATGAATACCACTGTCTGTGACGCACATGACGGCACGG -GGATTGGGGGTCTTCGAAATTTTCAAAAGTCGAAAAACGGAGTCGATTGGAATTAAAAGG -TTCAAAATAGCTGGCCGTTGACTCGCGCCGTGGCGGTTTAATCCAAAGAAACAATTTTCA -AGAGTTCAACATGATTCTTATGATCAAAAAATACTTTGACCTATACTTTTTTTTTTCTAA -ATTATCTTGGTAATCCCCTTGATTTCCCCTGTAACATATTGAACATGTATCTTGCCTATG -TGTATCCCCGCATCGCCCTGAGTACACATGTGTACACATGTATTAGAGCCATAGAGCCAG -AGTTATGAAACTCTGACGCCAGTGAAATTTCAAGGTGAATCCGTGTATCCACTGTCCCCA -CACAACTTAACCAATCCATGATAATCCATTGAAGCGACTATAGCCATATGTAGATCAAGC -CATTCCCGAAATTGAGGCCATATTCAATCCCTTAGTCCGGTTATACGCCCCCAGAACTGG -TACCCGAACCCCCCCCGCCTCCCCCTTAACCGCGGCCTTAGTCCCATCCCCGCATTTTCT -CCAACTTGGAGATTTCATTTATCTGTCCAGTACTAGTATCTCACTTCACATTGACTTGCG -TCTTTTGTTTCAGCTTACGTTGTTCTTATTGAAGAATTCATCAATTCAAACTCCCGGTAT -AGTATGTCTGGTTCGAAGTCTCGGTTGTCTGGCCTGCTTGGCCATTTCACAGGTTCAACA -CCGCCGTCCGAACCTCGTGTTAACACCCATACCCTATCCCCGACTTTCTTCCTTCCTCGT -GCTGCTGCTGTTGAACCTAATGTAAGTCCATTACAAATGGTACCGAAGTATTTCAATGGT -CTCACAAAGAACTATACAGGCCGAGGCGATCTACCATGTAACCGCCAACAACAAAATCCT -TCGCAGGAGCTATGGCGAGACTGCAGACCGAGCGAGGGGTATGGCTTACTACCTCAAAAA -ACATGGGTTTAACAGAGTAGGGATACTCTGTCCGAATACCCCGGCATTTCTCGAGTCGAT -TTTTGGAATCGCCGCAGCTGGCGCTGTCAACATCGGTAAATCAATTGTCTTGAGAGGAGA -GACATCGTTTACCGATTAAACTGACATGATTCTAGCCGTTAATTATCGTCTAAAACACGA -AGACATTGCTTATATATTTGATCATGGTGATGCGGAAGTGATCATCGTCGACGAGGAATA -TGTTCCACTCTTGGAGAGCTACCGATCTCAACACCCCCACATCCCTATTATCGTAGATAC -CGATACCGATGCAACGGAGGGTGAGCTGACGGGCCCCTTCGATGAGGCTGTTCTAGAAGG -CCTCAGATATGACATCGACACAGGTTCTCAGGGTTGGGAAGGACTCGAGAGTCAAGCTGC -CGATGAAGAGTCCACAATTGCGTTGGCATATACTAGCGGGACTACAGCTCGTCCAAAGGG -CGTCGAGTTCACTCACCGAGGCTGCTATTTGGCAACATTAGGAAACATCATTGAAACTGG -TCTGAACTACCACCGTGGACGTGCGCGTTATCTATGGACGCTACCTATGTTTCACGCTAT -GGGTTCGTGAATTTTGTCCCCTTGAAGTGATCTGCAAATTATCCGCATAACTGACCTATG -TAATTACACATAGGCTGGACTTTTCCATGGGCAGTGACAGCCGTTCGCGGAACTCATTAT -TGCCTGCGCAAGATAGACTACCCAGAGATCTGGAGGCTGCTGAAATCGGAGCATATCACC -CATTTCAATGCTGCACCGACTGTAAACACCTTGCTTTGCAACGCGAAAGAAGCAGAGAAA -CTTCCCGAGCCCGTCCGCGTCACGGTGGCAGCAAGCCCGCCCACGCCACTCCTCTTCGAG -CAAATGACCGACCTAAACCTGCATCCTGTACATACTTACGGAATGACGGAGACATACGGC -CCAATAACCAAGGCCTATCACTTGCCAGAATGGGACGTGCTTCCTCTCAAGGAGAAATAC -CAGAGGATGGCCCGCCAAGGCCACGGCTTCATCACAAGTCTTCCAGGTCGGGTCATCAAG -ACCGAAGTGCCCGAAGGAACTATCGTGGATGTTCGCAAGGATGGCCAGGAGATTGGCGAG -ATTTGTTTTGTTGGAAATATATGTGCCCGAGGATACTATAAGGACCCTGAGGCAACTCGG -AAACTATTCGCAGGAGGTGTTTTGCACTCCGGAGACTTGGCTGTCTGGCATCCCGATGGT -GCGATCCAGATTCTTGATCGTGCGAAAGATATCATCATCAGTGGTATGTGTAGCCAGTGG -CTCAACCGCCAATAGACCAGACTGCTAACAATAAATAGGCGGTGAAAATATCTCCTCCGT -TGCACTTGAGTCTATGTTGGTCATGCATCCTGACATTTTGGAGGCTGGTGTCGTTTCTGT -TCCTGATTCTCATTGGGGCGAACGACCCAAAGCCTTTGTTACAGTGAAGCAAGGCAAGAA -CCTACAAGGCTCAGACTTGATTGATTGGGCTCGTAATGTTAGCGGAATCAGCAAGTTTAT -GATTCCTCGAGAAGTGGAAGTGGTTACTGAACTACCTAAGACCAGCACTGGCAAAATTCA -GAAGAATGTCTTGCGTGACTGGGTAAGAGGTCCGAGAAAAGCTTAGACAAAGATTATTGA -GATGGAGTTATTTCTAGATAGATTGTTATATCTCTTTGAATACATGTGGGAGATATTCTG -GAAGAAAGCAGGGCTTCCCTAGGGGTACATTCCTATGAGGTCAGATGTCCGGTACTCCCG -CATTGAGTAGACTGATTGTAGATCAAGCACGAGGTCACATCAAGTGACCTTGAATATCGT -CTTTCATAGACTGGCTTAACAAGGCAGACTGATCCTCGTTTTCTCTGTTAGAAAGGGTTT -ACAATTTAAATTTACCCGAAAGGGTTGTACAATCTACAGTCCATCCATCTTATCAAACGA -TAAGCAGTGCTTGGCACTGACTCCGCAGCACTGTGAGATCCGGGCCCCATTGCCTCATCT -CCCCCTCCATTACCGCTTTTATCCTTCACATTTACTTGCATTGATTTTCCTGGTCACGCA -TTTGCATCTGACACCAAGGGGTGCTTGGGGTCATGATCTAGATCGGTTCGACTTCATCTT -GAACCTGCCCTTGTCAAGCTCTCCCTTGCCATCCAAATACGCGCCGCGAAACCACGCTTA -CATCTCTCGTGCACCCACCTTGCTGCAGATAGTTCCCGAAAATTACATCCATCTACTAGT -CTTCAGACAAACAGCAAGGTCTTAGGACTATTGACTGATCAAAGACTGGCTGCATCTGTG -GACCAGACTTGCCTGTTCCAGTCTCACATGCACCGCCCATGCAGAGTCCCTTGAAGTTAT -GAGATTTCAACCATACCGACACCCGAAAAATATATTGAAGCGAACCTCGAAAAGAGAATC -TACACCCGCTAGAATGCATCGGCCACGAGAAGCCGGTCGATGCTCAAGTGCAGCCCCTTC -CGAAACCTCTGGATCGACGTCACCCGAACGTGCCGCCGATGATGACACCGATTTCTTCAT -GGCCCAGGCCAACGACTCGCAATCTTCCATCGGTGTGGCCAATTTCCGCGATTCTCGCTT -GTCTAGTGAACCATCCGAGCCACTGCCACCGATCGGCCGACTCCCACCCGAGATCCTAAT -CGCGATCTTCTCGAAATTAGTTGCGCCTCTAGATATGCTCAATTGCATGCTAGTGTGTCG -TGGATGGGCTGCCAACGCTGTAGGGATACTGTGGCATCGTCCGACTTGTAACACCTGGGC -CAACGTGAGAAGCGTGACTACGTCGTTAGGGAAACCGGACAGTCTTTTCAACTACGCAGA -CCTTATCAAGAGACTGAATCTATCTGCTCTCTCAGATGATGTCAGCGATGGAACAATCCT -CTCTTTCAATCAGTGCAAGCGGATTGAGCGGCTGACATTGACAAGCTGCAAGAACTTGAC -GGACAAGGGCGTGTCTGACTTGGTGGAGGGGAACCGGCATCTGCAGGCGCTGGATGTTTC -AGAATTGCGACACCTTACAGACCACACGCTCGCCACGGTGTCCAGGGACTGTCCCCGTCT -GCAAGGCCTGAATATCACGGGATGTTCAAAGATCACTGACGACGCCTTGCTCATTGTATC -ACAAAAGTGTCGTCAAATCAAAAGGGTATGTGGATTATGTGCGGGATCTTGGCTTAATAT -TAGTCACTGATTGTATAATAGTTGAAACTTAACGGAGTTTCTAATGTCTCAAACCGGGCC -ATTCAGTCTTTTGCCGAGAACTGCCCGTCTATACTGGAAATTGACTTACATGACTGCAAA -CTTGTTACGAGCATATCAGTTACTCCACTGCTCACAACATTGCGACATCTCCGAGAATTG -CGGCTCGCGCACTGCACTGAAATTGACGACAGTGCGTTTATTTCTTTACCTCAAATGACC -TTTGACAGTCTTCGCATCCTTGATCTCACCGCATGCGAGAACGTCAGGGATGACTCGGTG -GAGAAAATCGTGCGTGCAGCTCCTCGCTTGCGGAATCTGGTACTAGCAAAATGCCGATTC -ATCACAGATCGATCTGTGATGGCCATTTGCAGGCTAGGAAAAAATCTTCACTATGTGCAC -CTGGGCCATTGTTCGAACATCACAGACTCGGCCGTGATGAACCTGGTCAAGTCCTGCAAC -CGCATTCGATATATTGACCTGGCCTGCTGTAACCTCCTGACGGACCGTAGTGTACAGCAA -TTGGCTACCTTGCCGAAGCTCCGAAGAATTGGGCTAGTCAAATGTCAAGCTATCACCGAT -CAGAGTATATTAGCCTTGGCACGGCCCAAGATCGGCCATCATGCTTCCGTTAGCAGTCTA -GAGCGTGTTCACCTGAGCTATTGCGTTCAATTGAGAATGAAGGTGAGTCTCGATTAAAGG -TCTTGATGTTCACTGCATGTAACACTGACATTACGGCTTAGGGTATTCACGCTTTGCTGA -ACAGCTGCCCACGTCTGACTCATTTGAGTTTGACTGGCGTCCAAGAGTTTCTGCAGGAAA -ACCTAACTGCGTTTTGCCGAGAAGCTCCCCCAGAGTTCACACAACAACAACGGGACGTCT -TCTGTGTGTTCAGCGGCGATGGTGTCAATCGACTCCGAGACCACTTGAACCGAAATGAGC -CATCATTCCAAGAAGAAGTCGAAGCCACGATGTACGATGACGATGAAGAACTGGACGAAG -ATGAGGGCCAGATGACTGGGCTGATGAATGCAACTGTCATCAACGACGGAGACGATGATT -ATATCGACGTTGGGCCTTTGAATACTTGATACGATACCCCCTTTCTTTGCGGTCGAAAAT -AACCCCAGGCCCTCGCATATCTTCCTTCTATTCAATTCCCTGATCTCTTTTATTCGCGAT -CCCTAGCTGGATCAAACAAGCAGACATATTACTACCCATATGAGCAAAGATCACTTCACC -CTTCATTCTGTGTTATTGTGGGAAATGCATTAGTACGCTAGCGATTACGGACTTGGCTTA -TTGTTCTGTTTTCATTGTGTAATCGCATAGAATTTAAGCAAGTTCTCATTTCGTTCACTT -TGTTCCCAGTTCAATTATTCCATTTGTGTTCGAGTAGTAGGATTGTGCAGCTTATCTGCA -TATATGTACATAAATTTATGTATGTAACCGTAATTACAGTACCACAAGCTACCCCCGCAT -TTGCCTTAAATTTGACTCCAACTTCAACCTCCACTCCATCCATTCCCACTCAGTTCGAAC -AGCATAGGGTATGGCTACCTACAAGACTAGCAACCCTCTGGCATTTACGCCATGGCCGGT -AACAATAATAACCACGGCCGTGTACCTTGCCTTGATCATCCCATTGTTGGTAATCCATCA -CAACGTCCCACCTGCGCCTCGGACGAGCCCCAACGGTCTCAACCTCACAGAGGCCTGGCA -AGATCTCCAAAGCCTGACAAAAGGCTTTCACCCCTACAATTCGCATCAGAATGACGAAGT -TCGCTCGTGGCTGCTCGAGCGCATCGACGCGATCAAGCAGTCCGCCCCATCCATCGAGGA -ATACCGTGACGCGAAAGAGGAGAAGCCCGATGTCTTTGTCTTCGACGACTTGGTGTCTAA -CCTGACTTTTATCGATAAAAGTGTCGGCGTTTACTTTGAAGGCAACAATATCCTTGTTTA -CATTCGCGGCTCGGAAGATAAAAAAGAAAAATGGTGGGAGACTCCCGGCCGAATGCCCGT -TGGCAAGGGAGGAGTACTTGTCAACGCACACTACGACAGCGTTTCGACCGGGTACGGGGC -AACAGACGATGGAGTTGGAGTTGTGACGTGTCTGCAGTTGGTGAAATACTTCCTGACACC -GGGTCATGCGCCGCGCCGCGGGTTGGTGGTTCTCCTCAACAATGGTGAGGAGGACTATCT -GAACGGCGCACGCGCGTACAGCCAGCATCCGATGGCGCACTTTGCACACACATTCTTGAA -TTTGGAGGGTGCGGGTGCTGGTGGCCGTGCTACATTATTCCGCAGCTCGGATACGGAGGT -CACCCAGGCCTATGCGAAATCACAGCACCCATTCGGCTCGGTATTAAGTGCCAACGGCTT -TGAAAAGGGTCTTGTCAGCAGCCAGACGGATTATGTTGTTTTGGACGGTATCTTGGGCCT -GCGCGGTCTGGATGTCGCTTTCTTTGAACCTAGAGCTCGCTATCATACAGATCAGGATGA -TGCTCGACACACCAGTATGGACTCACTTTGGCATATGCTCTCTACTGCTGTTGCTACGAC -GGAGGCGCTTGTTTCTGACACCACTGATCGGTTTGACGGACACCCCCGCGACGATGGAGC -TGTGCCTAGCGGCTCAGGTACCCGGGCTGTTTGGTTCGACCTGTTTGGGAGCGCTTTTGC -CGTATTCCGTCTTCACACGCTATTTGCGTTGTCTGTAACTCTGCTGATCGTGGCACCTTT -GACATTGCTTATCACCAGTGTTATTCTTTCAAGGGCTGACAAGATGTATCTCTTCCGTTC -ATCGGTGTATTGCGAGATCAACGACGAGAACATTCCGTTGCGTGGCTTACGTGGCTTCTT -CCGGTTTCCATTCCTGATCTCAATCCCTACGGCGGTGACTGTCGGGTTGGCTTACATGGT -GACGAAAGTCAATCCTTTGATTGCCCACAGCAGTTCATACGCTGTGTGGAGTATGATGAT -CTCAGCTTGGATCTTCTTGGCTTGGTTTGTGTCACGCGTCGCAGACTTTGCACGCCCATC -AGCCTTTCACCGAGTCTATACCTGGACTTGGATGTTTGTTTTTACCTGGTCTCTCATGGT -AGTGGGCACTGTTTATGAACACGAGGAAGGGCTTGCCGGTGGCTATTTCATGCTATTTTA -CTTTGCCGGTACTTTCTTGGCCACATGGATCTCCTATTTGGAGTTGTTCTCTCTGCCAAC -GAAGTCAGAGTACGTCAGTGCATCAACTGAATCACGACGGCCTAGTACCCAAGGAAGCCG -ACTTGCTGCATCGGGCGATGAGCACCAGGACGATGATGCTGAAGAGGATCCGACTGAGTC -AACTTCGCTCTTGCATGGCCGACACCGCACCACATTTGCCAACTACGTCCGCGTTGGGGT -AGACCGCGCATCCgacgaactagacgaagaggaagaagaaaaagaCCCGAACGTGTATGC -GCACGAGCAGGGTTGGAGTGGCGTGCTACCACGATGGACGTGGCTGTTGCAACTTCTTAT -CACAACCCCCACAATTCTCATGCTGATCGTGCCGCTGGCTCTTCTGATTACCAGCGCCCT -CAGCCAGACCGGTCAGGATGGTAACCCCCAGCTCGTCGTCTATCTCTTCATTTCAATTCT -TACGGCACTTCTCTTTGCGCCTATGTTGCCATTTATCCACCGTTACACGTACCACCTGCC -CATCTTCCTGCTCTTTGTTTTCATCGGAACCATGATATACAACCTTGTCGCCTTCCCATT -CGCAGACTCCAACCGCCTGAAGCTGTTCTTCCTGCAAGAGGTCGATCTCGACAATGGGAT -ATCCACTGCTTCATTGACTGGCATGCCGCCCTTTGTAAAGGATGTTACCTACGGGCTCCC -CAGCGCAGCAGGACAAAATGAAACCTGTGATTGGATTTTCCGAGGCAAGGGCCGAGTTCA -ACGATGCTCCTGGAATGCGCCAGTACCACATGTTGTCCCTAGTGCTGACTCACTCTCTGT -TTTCAACGAGGATGCCGACTCTCTGCTTGAGGCTTCTGAGTTGTCGCCGGACTGGATTTC -CTTTAGCATTTCCCACCCTCACCCGGATAGTTCCTCTGTCCGCTTCGAAATATCAGGCCA -AAACACACGTAATTGCCGCATTAACATGGACGGCAACTACATCACGAACTTCAGTGTCCT -TGGCTCCTCCGCTCCGGACCACCGCTTCCTTAACCCGTCTCCAGATGGTCTGAACCGGAT -CCAACTGTGGAGTCGTACATGGGACAATCAATGGACAGTTGATGTTGATTTCTCAAAGCA -TGACTCATCTGGGACCCATGAGGTTGACGAGTCCTCTATTAGGGGCCGCATCACCTGCCT -ATGGAGCGACAATAATCGAATCGGCCTCATCCCCGCTCTTGACGAGGTGAGACAGTTCAG -TCCGGCGTGGGTGGCTGTGACTAAGTTCTCTGATGGGTTGATTGAGGGATCTCGGTCATT -CGAGATTAAGCGTTCGAGCCTCGGCGCTTTGGGGTCTTGAGTCCTAGTAATAGAAGGAAC -CTGATCTTTAACACCTTTTTCTTTGTACGATTTATGAGCTGTTTTGTTTTTCCGTCCATC -TTTTTTATTGCGCTGTGCTGTGTATCATCCTATCAACGGGGTATTTGTTGATTTTTATTT -TGGTCGAGACATAACATACCCTTGACTACATCTTCTAAGAGATAGGGAGCTTAATGTCAG -AACTCTAGGAGGGGATACAGAGAAATTCGACTATATTTCGCTACCGAGTGGACCGAGGAA -AAGAAGTAGAGGATGGACGAGGAGCAACTAACCACTCACCGGTCATCCACGGGCCTCAAT -ATCCACTGCTCGGGCACTGAAGGCATAGACTCGACAGTAATTCCACGTTCGCGAGCACTT -TCATAGAGGGACCCGCATGCCTTCCGTGCCTGTTCAAATATCTCTACTGGGGCCTTGGTT -TCCACCCCCCAACAGGCTTTCTTTAAATACTCGAAGGGCATATCCAATACCAAGTCCACA -CCTTTCAGCTTTAGGTTCTCCACGGTAGGGAAGTGGGCACCGAGTAGAGCCCATTGTGGG -CCGGATGGACTCTCACCTACGATAATGAGCTCTGCTGTGGTTTGGGGGATGGTGTGTAGT -TGCGATTGCAACCGAGATTCGTGGCCTCGCTGCGATGTCCCCCGTGGGCTCTTGGACCAT -GATAGATGTGTATGTCATGGTGAAGACCGGATATTAGTTTAGCTCCGCAGAATCGAAAGA -GACGGGAGAAGAGGAAGCGAATGTTGGAATTGGGAACGCAGACATTATTCAACTAAAAAA -CGCAAACTATGTCCATTCTTCTTGTCTTATTCAATCCACAGTGTATGAACGTTTGTACCA -GGTGTTGGGGGCAAATCCCTAGCAACACATCGCCCTAACTACCTTGATCTTGGACGGTAT -GTGACAAAAATAAGACCCATAATTGCATGTGATCAAGAGAGTGGTTAAGGTTGGGAAGAA -GGCTATCAAGCTGCATAAAATCTAGAGGGCCAGGGATGTTCAATTAAGAAGGTCCAACCT -TCCCAGGATGGAATGTCGTAGCAATTTCAACAATTTAGGCTTGAGCCGCAAGCATCTGAA -CCATGGAAATGTCCAGAGTAGTTCACCTTGGAAGCTTATAGCTAAAGTATGTTTTTTGCT -ATCCATGGAACTTCATCCGGGAGAGCATCCATTGATCCTTTAGAAATTCCAAGCATTGAA -GTCATATACACCCGTGGACTACATATAGGTACATTGAACTATTTTGTACTCGGAAAGCCT -GGCACATTGGCTTCAAACCCTGTCCCGGTTCAAAATATTGCCTTGCTTGCATCAAGGCCT -ACAATATTGAAGTCACACGTCAAGTGAAGACAATAGCGACAGAGATCAAAAAAAATTTTA -AGACCATAATTTGCTAGAAATCATCACACAGAATTCAAGTCGGCCTGTATGTTTTCAAGA -CAGAGGTATATGCAAACGAGATAATTGGGACACCTTTAATAAATACAGCATCTCAGGAGA -TAACTGCTGAAAATTGTTCTATATACAAACACAAGTGGAGATGCATATTCAATGCACATC -GCGAACAAACCGTGTCAAATTTGATATTTATGGATTGCCCATTAAGTTACTGGCTCGGCG -GCACAACAACTCTGAGTGTACTCCGAATTGAATTGTGGTGACTCAGGGAGCTGGGCGCGA -AGCAGGAGGTTTGACTGCGAGAAGTTGAAGTTCTTTTGGATTGTCGGGGTGGAAAACAAT -TCGGAGCCTGGTTGATGTGTGATATTCACGGAGAGCGAAGTGGGATATTTGGAGTCTCAA -TGATGATCATATATCATAAACCGTGATTCATAAAGAAAAAGGAAGTCCGGGGCATACTGC -AGCCCCAGGATTTATTCTCATCCCGGTTTCTCGGGCATATTCCGCCGACTTTGTTGACGT -GGGTAATGGCAGAATGGATTGGTAAAGTCGATTGGGAGGTGATCCACATGATAAGCGCGA -GCCTCATTGCTCCACGAGTTGGAGAGATCGGAGTTGGTTCGTATGCGGTCGGGCGGTAAA -TGCTCAAATATGGCGATGGAAACGTCCTCGGCATCCATCCGGGAAGGTCTACGTTGGATG -AGACTAGGGGCAGCATGGGTCACATCAGAGGTCACCCGGGTTGCGTCGACAAAATCGGGG -AGGTCGCGTGGCGCCGAGAAAGACTCAGGTTTCCAGACGTGATTGTGTACCAAGTGAATG -AGTTTACCAAGGACGGCGAATTCGAGCTTCAGCTTGATGCTGTATACAACGCCCTTGAGC -ATGGTTTCCATAATGTAGAAGTTCATGTACTCGGCGACGAGAAGACCCAAGTCCAGGATG -ATAATGATAAAGTTGATGACCAGCAGCTGGTACTGGATCTTACGCTTGGTACGGTCCGGG -TCTAAACGTAGCATGCGTACTGTTTCCCAAATGTATAGCCCCGAGATGATGAATTCCTGC -AGACAGAAGCCTGTCATTTGGATTTTTTCCATTATGTTGTAGCCTTTTTTCCAGGGTTCC -AGGGAGGCCTTGTTGTAAACCATGTTGGTACCGTAGGTGAGGGCGGTGGTGGGGAGATGG -AGGATAACTGCGTTAATCATGATCATAATCAACACGCGACGCAGGACTCGATGGTTTGAT -AGGACCAAATGTAGGCGCGAGTACAGGACAATGGACTGGCCGGTGACCATGCACCACCAT -CCAATGGTGAGCAAAGTGACAGGGAGCCATGGAATGTTGTCGGTGACATTGAAGAACTTG -AGTAAGAAGCCTAGCGAGTAGGGAAGTAAACCGGCGGAGGAGGAAACTAATAAACTCCAA -AAGTAGAGTCCTCGATATTCCGCAAAGGTGACGAAGAGGAGGATCAATAGCTCGAAGGCA -TTGTACCAAGTTACACCAGCAAGAGTGGCGATCGTAATCTTCATTTCCAGGGAAGTTCCA -TTGTAGTGACCGCTGATGCCATCTCCGGCGGCACTGAAATTGCTCATGGCGTTGACTCAG -TGTCAGGGGAATAGCGGCATGGCGTGTGGAGTCCGGAGTAGGTGTGTGGGTATGAGATAT -AGGAAAATCTGGGGACTCGGGGGGACGCCCTGGAGAGAGACCTAATGCAACAAAGAGAAA -AGGTAAAAAAATAGAAAAGGACAAAGAGGCAAAGGCGAAGCTGGGGCGCACTGGCGCACC -CCGTATATTGCATGCGGGGGCCGGCGATCAGGCTACCCCAGACTGAGTAGATAATAGAAG -ATGCAAGTTTATAATAGAACGTAGTATCGTATTATAGTAGTATCATAGTGATAATATAAT -AATCAATCAAAAATGGGGAACTGTTGAAAATATGGTGAGAGCCAAAAAAAAGCAAAATCA -AAGGATTCGCTCCCCAGTCAACACACGACTTCATATGATTGGAGCTGTCTTCTCTACTCG -TATATTGTGTTACCTGTCCATTCAGAGTCTCCATGCTGCCACAATGTAATTGTGAACTTA -TCTTAACGTCTCTGAAAACCTCAAGTCTAGGGGGTAATGATATATTGAAAATACTCAAAA -AGCCACACGGCTAAAAGGGGGATCTTACGGGGTATTGAATATTGCAACCTGTTTGCTAGC -CAAGACTTTTTTTTTTTTTGGCATATCCAGCCTCACTGCCAGGATCATAGGCCGCTTCAT -TGCGCCACATAAAAAACCTCGTTTACGAGCGGATAGGTGCCTTCCCATAATTTGGTACTC -GGTGAAATTCTTTGTTCGCTTAAGGGAAATTGCCTCTTAAGATTCAAAACATCAATTCAA -CAAATGTAACAAAGACAATTTCGAGGGTATCAATGTAATGTGGAGAGAGCTGAATTGGCC -AAGGCGTTGGCGTTGGAAACTCCACACATTCGACAAAGCGATTTGCTGATGTCATCGGGA -ACTGGCGCGTGACATGCTTAATAACACAGCCCTAGCCAGGGGCCAGGGCCTTTAATCCCT -ATGTTGCAAGGATTTGTACGACTATGAATGAGAAACCCTGTTTAATATGAACCAGATTTG -ATTTCTCCAGACTCATTCATAATATTTCATCCAATGATGCTCTTCCCGAAACCCAAGCAG -CTCCTTAATCTTCCGGTTACTGATGGGTGCCTCTCTGGCCCCCAACTCTCGCGTAATCGG -GACATCGGGACAAACCCGCGCTAGGAACGCAGCAGTGTTCTCGTAATTCGTCATTTCGTC -ATTGACAGCGTTCAAGACCTGAAATCCAAGGCCATCTTTCCGAACCGCGAGATCACACAT -CTGTCCGAGATCGCGAGCATCGATGTACGACCAGCCATGCGGCGCCCATTTCTCGGGAGT -TTCAACATATGAACGGAACATGGGCTGCTTGTACTCCTCTGGGGCAACAATGCGCCCGAT -ACGTAGTATATAGATGTCGCTCCCATAACGGCGCGCGAAGCCACGTGCAATGCTCTCGCC -GCACACCTTTGAGATGGCATATGTGTCCATCGGGTTTGCGTCGACGCTTTCCTCGACTGG -GAAGGAGGGATAATCTACGTCTCCCTCTGCGAAGCTCACTCCGTAGACTGTGATAGAGCT -GGCAACGATGATTTTTCTGATGCCGAGGCGCGAGGCGCCCTCGATTATGTTGTACGCTGC -CGAGGTATTGCCGCGGAAGGTTTCTTCGTCTGGAACGATCATGTTGCGTGAATACCCGGC -CATGTGAATGATTGCATCGGGGACTAGCCCGAGTGGCTCACGAAACGGCTCTGTCAAGTG -GAAATGAGAGCCCAGTGCGCTGTATACCTGACCGGTGTTTGTCAGATCGACACGCATCGT -GTGGACTCGCTCTTCCAGCTCAGCGGGAAGAGCAACGAGGTCGAGGTTCAGGATATCATG -ACCTTGTTGGAGGAGATATCTGATGATGTGCTGTCCGGCTTTGCCGGAACCTCCAGTGAT -CACAATGCGTTTCCCCATGTTGTGGTTTTGTCTGTAGGAGGGGCGGTTGGCCAATGCGGG -GTATAGATGATCACGATATCTCTATATGGATGAATTGATGTAGACACCGTAGAGGACGAG -TGAATGCGTGCTTAATCAAGATACCCTATTATGAAATAGAAACATCTCAGATGTGTTTGA -AGAGAACATCTACGCTTTTGTTGTACGCGAAGCGTGAAGCTGGAAGATAACATCCCGATG -TGTTCGGGCCGACAAAGATGGAATGAAAACTTCCGGAGTAAAAGGACCATTGGTAGTTCA -CAAGAATCCTCAGACTATGCAACTTGTATAGTTGATATAGGGCTTGGATGCTCGATAGCA -GAGACTGTATATGTTGCATAATAATAAAGCTGAACATGTTGTCTTTTTTTCACCAAAACG -GTCAACCCACATAATTCGCAGCATAATTTAGGGCATATACTTTTGACCAAAAAATGAATA -TTTAAAATTATTTCGGACATCTAAGATAATATGGGGCGATGCTCTTTCATTGATGATGTG -GACTTGTTGGCGCTTCTCCATACTCCGTGCTAGTTGCTTACTTGCCTGGATCGCCCGCTG -CTCAAGGCAAACCTCGTCCTATTGGTTATGTTTTCGTCGGATTTTGTTCGTTATACGGGC -AACAAGTGCCAAGTATGCATAGCCCTCACCGACAATCGAGAAGTATGCACAGTGTACGTT -TGCACTCTGCCTAATTGCAAGCAAATAGCCGATGCAACAATTGTATCTGGCGTGCACGCT -TCCTAACTCCTTTCTCTTTCTAAGGAGATTTCTGAGAAGAAAGATGGGTAGCAAGGACGA -ATGGAGTAAGAGCTAGTGATTCGCTATGGGGAGGGGGGGGGCTGAGAACCATGTACTCCC -TAAGCCCTTGGCGTTCCCGTTTCACCCTACCCCAGATCACCCAGGCAAAGGAGTAACGGA -CTATACTCCGATTGGTGCATCCCAAGGTCTGTTGTCCTTGAAAGTGCCTACGTAATAATA -CCCCGAGCGACATGATCTTGATGCATGATTTGGATTATCTTCATATTATGTAGATCTTAT -CTAGAATTGGGGTTATACCTTTGTAGGAGGATTGTGGTGAATCCTTGAGCCCCAAACAGC -CAGCATAAAAATCGTCCACTTGGAACACTAGTCTCAGTCAATATAGTAAGTAGCCCGGCA -TTCTATGTTCCAAACATGTTAGGGTACACGAATCAAACTCCCTTGTTTTGACAGGTTAAG -TGCGGTCTGAGCATGATGTCGTAGGGTGATCACGATGATAATGATATTTCATACCTGCTC -TAGCCTGCTCGACAATCTTAATCATCCAGTAAGCACACACAGAGCGTATTGTTATGAGTA -TATTCACAACAATGCTTTGGACAATGTGCTCCTGCGGATGGTCCTAACTAGAACATCCGC -TACGGCCTTCATCCCATTCAGGTTGGGATGGAGAAGCGGCCCCGATGTAAAGCTTCCAAC -CCAGGGTTCTTTGCTGCCAAGTGCATGTTCCTGGCTGAGTTGGTGAATTGGGACTGTCTC -ACACCAATCTGTTCGACCTTTTACAGCGACAGAATATGCATGTTGTAGGTCAGACGCAAC -ACCGCGGTAGTGCTGAATTCTCTCCTGGGTCAGTGGAATGTCCCGACCAGCTTGGGTGCC -AGGTCCCAGAACAGCAAGATATTCGACAAGGAAGACTCGAGCTCCCGGGGCCCGTTTGTG -TATCTTGTCGAGGACTCCCCCTAGGCGTTCGGCCAATTCTCTGGGTGATAGTGGTGCGGA -TGATTGTACCTCGGCATGGGAAGCATCGGCGATCATGCCGCCAATGTAGTTTATGTCGTT -ACCCCCAGCAGTCACGGTGATTATATCGGCATCTTTGGGGAGGTTGGAGATTTGCGGCGG -GAAGGTCGTGTGTGCGGTGGACTGTGGATCAACTGTGATGTTGAGCAATGTTGCACCAGA -AACGGACAGATCAGTGAGATCGGCGTTCAAGTGCTGCGCTAGAAGATGTGGGTAATTCTG -CCCTGATCGCATCGCAGCTTTTGGTTCGATTTGTGGGGGAATCTCAGGACCAGCAGCAAA -GGAGCTACCGAGACTTGCAATCTTGAGTTTCTTGGAAGATTCCATAGTCCagaaaataga -agagtagcggagcagaagagcagagaagTGTGTGACTTGAAGTTGGAGAGTAGCTTTGGC -AGAATCCACAGAAAATGGACGCCTAAAATATTAGGCCGAAACTGCTGAGCTGAAAGTTGC -CCTGCAGGTGTTCTCGGGCACCCGGCCTGGGGCGCAGCACCCGGATTCCCTAGCCCTCGT -ATTTCCAGGGATCTGGACATCAGTGGAATCGTTTTTGTTTTACACCTTTGGAGAATTCCT -AAATAAATGATGGGTTACATACAAACATGACTTGGATCCGCCTGGGTTTCGACGGTGTGT -CAGTTGGGCCGCCTGAATATCCTCTGTAGCCATAGCCACAAAGGACAGCTCGATATCAGC -CTCTTGCTCCAACATGTATCCAGAAATAATCCTCTTGGCGAGTAGATAGGCGGCCTAACT -TGAGCCCCACAGGACCCGATTGGTTAGGTTGTGAACGCGTCTTAGAATGTACTCAATGTT -TACCTTGGGAGTTATGTACCCTGCATAGAAATAATGTCTAGAGGTCTTACAGACTCTTAC -TTTGCCACGTCTGCGACTTGATAGTCAAATTGTCCAGCAATCAATGGAGGTATCTTCTAT -GTGTCCCACTTGTACTCCGTACTGTTACATCAACTAACATAGCATAAGTATCTCGGAAGT -CTATTTTGAGAGATCATGGTGGAGTGACCGTGAAGTCTCAAAGTATGGGAGACCTCACCA -TAGCCTATGCATTCGTAAATAGGATATCAACGGCTGAACATTGAGAAAACACTTAAATAG -CTTCATTTCTCCTGTTGATGCTTGCCTTTCTATTCTCATCCGCAAGACTCTCTCATCTAA -ACAGCCTTTCACAGTTCAAACATCGAACATTTCTATTCCAGACACCTCTTCTTCCAACAA -CCCGAAGAACTATCAAACCGCAAGCATGCAGATCTCTCAGCTTCTTCTTGTCGCTGGCCT -CTTTGCCCCCTCTATCTTTGCTGCCCCTATTGCCAGCTCCGATTGGACCATACGAGAGAT -GAAGCGCGTCTGCAACGACGAAAACACCTCATGCACCTGGACATTTGGCATCGACTCTGG -TCCAGAGATTACCGATTGTACCTACGTTGTTGAAGCCGACGATGCCTCTCACGCCAACGG -AGGCCCTAGTAAATGTGAAGAATACTCCATCAGCTCGGGCTGGAGCGGCCAGTTTGATCC -TACCAACGGCTTCACTACTTTGTCTGTTGTCAATGACGAGCTCCGTCAGCAGATCTGGGC -TGGCTACACCGACTCGCAACTTGTCGATGGAGAGGTCGTCAAGCCAGACCAGAGCTACCC -TCCTACTGCTCTTCCTTGATTGAGCGAACAGTCCGAGAAGCACGTGGAAGAACAGCTGTT -ATGACCACTTGAAGACGACAGCACTACGTGTCTATCAAGCCCACAGGCTCTTTTGGGATA -GTCGATTTGTGATCTCTTTTCTTATATACAGTGTTGGAAGAATGAAACGCGCACTCGGAA -TGATGTATATATCGATCCCTCGTGCATTTAATAACAGTTGAGTTTTGGCTTTTGAACATA -TAATGCCTATCTTGTATTTTAGTATAACCTTGGGCTGGGAGTATAATAGTTGTAAGGATG -CTGAAGACTTCTTGGAAGCATCAGCTCAGTAATAAATAGCGCTTGTTCTTCCCCTACCCT -TGAATCGATGAACTCGGTGCCATTCGCCACTGGTCACCCACTCTAAATCCCTCAGGAAAG -GGGTCACTCGAATCAAGAACTATTTGCTTGAAACTAGTTATCCAAGCCCTCCCAGCAACA -GTTGGCAGAACAGCCTGATATCTGCCGACTTGTGTAATTCCACGAATATGGCTGGTGAAC -TCGGTGCCAATAATACTGCGATGTCCGAAAACCTCACCCTCTGCTAGCTGACCTCTCGCA -TGTAGAACGGCAAGCCGAGCACATGTGCCAGTACCACAAGGGCTGCGATCAAAACGCCCT -GGTGACACAACAACAGTATTGGTAGCAGCTTTACAGCCGCCCTCTTGTGTGTGAGGTTCA -GTAAACTCCAGAATACTAACTCCCCTAATTCTGGGATTCTCAGGGTGAACAGGCGTGTAG -GAAGCTTGGACCGCGCGTTTGATACGCTCACCAATCTCAATCAGCTGTGTGCTGTATTGA -GACTCAATTTTCAGTCCCACAGAAGCTGCGTCCACGAGCACGTACATCATTCCTCCCCAG -GCAATGTCAACGGAGACTTCTCCAAAGCCAGGGACCAGGACTTGATAGTCGAGTTTGAAT -ACAAAAGCGGGTACATTGTCAAATGCCACATTCTTGCATTTGCCCGCTTCACATTCTGCT -TTCACGGTGACTAGACCGGCGGCAGTGTCCAAGGTCAGTTCGGTGATAGGCTCTCTCATC -GGAACCATACCGGTCTCCAGTAGCACAGTGACGGTGCAGATTGTATTGGATCCCGACATG -GGTGCGTATTCTTCGCTTTCCATGATCAAAAACCCTGCATCGGCTCGTGGGTCACAAGGA -GGTAAAATTAGATTCGTGTTCATTGAGCACCGGCCCCGAGGCTCATTGAGTAAAAGCTGA -CGTATGGCGTCTTTTTCATTCGAAAAGTGAACCAGCTTGTCATACATTGTAGTGCCTGGG -ACGTCAAGTACTCCACCAATAATCACATCGCCGACTTCACCTTCGGCGTGGCATCCAACA -ACTGACAATGTGCGCGAAAATGGCATGGTATCGGGTTGGCCTGTAGATTTGCTTTGACAA -GAGGCGGAATTCCAAAAAGACCTCTTCAATTAGGATTCTGTATTGATCTGAAAGCACGCT -TTGCAATTCACAAGATCCCCGCATGATTTCTGGGCATAGATACTTATATAAGACTAGGAC -TCAGGCGCCGAAACTCGAGCAAAATGTCGAAAGGCTATTTAACATAATTTAAGTGGTCAA -CTCTAGCCAAGGACATAGAATTGCTTTTGGCGTACTTTTGAAACGGGCTGTCTACTTTGT -GTATTGACGCTGGAAGCGCTGCAGGCTCCCATTATTATTCCCCGGAGCTAGCGAGTCGTG -ATTATGGGGACGCAAATTATAGTTCTGACATTATGATAGGACAAGGGCTGTGGTCATGAT -TGATTAAGAGATTTATCCACACATGCATGAGACACGGGCCTCAGGCCCTGATCGAACATC -AGGTGAAAGAACCGATATCTGCAACATCGTGGACTAATCACCTTAACGTGCATATGTTTC -TTTGCATTCAGGTCGCTTCTCGCCATTTTACCGGGTAATCCTGCTCTCAGGTGACCCTGC -TATTCTAAGGCTATCTCGATGTTTGCTTTATCTTAGGCGCAAAAAGAGGTGTGATGAATC -GAGTCCGTCTGCACCGCGTGCAAAGTAGGAATGAAATTGTCCAGGCTGTGCTTGACTACC -CCATATACAGATTCTGGGGAAAGACCATCACCCGAGAATAATGGGGATTTTGTTTTCAAA -AACCTAGGGATGTCCTTGACTTGCTACATACAGACGTTATATTGGTACTTGAATATGCCT -CTTCAATTACACTGAAAAGTTGTCTTAATCATGATTTGATTTAAAGAAAAGTAATCGTTA -AATGTGTTCCTCTATATTATTCCGATCTAGAGTCTGACGAGGATCTAAAGTTTATTCATT -TCTGGATAACAATATGCGATGGATTTGAGGCCAATAAGTCCATACCAGCGTTGTAAGAAT -AGTAGTTGGTATAGCGAAGACCCAATAAACCCAGAACTTGTTTGACATGGTCCAACCGGA -GTCACCACCAAAATCAAAAAATGACATGCTGAAGATAGCGCAGATGAAAGTAGGAGGGAG -GAACGTAAGGGTAACAAAGGCGATCGTTTTCATCGTGGCACTGTCCAATTGGGTTGCCCG -ACCAATTTCAAGGGTAACACTGGCATCATGCTGAGCCACAGTATTGAAAGCCAATTGGAT -TTCATTTTGCAGCCTCTTTTCATTTGAAATGGACCGGGATCGTAAGTTGGCAATGAAAGA -CTGCCAGGATCGGAGACGGGAGTGAATGTCTTCGGACGACGCAGAGTCGTTATCTGGGTT -CGAGGCTGTATAGCTCTCGTGTTGTATCAAGATGTGCTCCACATTTTGCAAGGCAATGTC -TAGTGTCTCGTTCACATGAATAGCATGTCGAGCGATGTCGTGGAGTCGTCTGTAGTCGGG -CTGCGGTCTCCCTTCCGGCTTTGCTTCTGTCTCGGTCAAACGGACATGATCTCGGATGGC -CCATACAGAAGGCTCCTCAATCAACGCAATCAGCTCGGTTAAATGTGGATAAATCCAGAA -TGGATCGTTAAGCCATCTTTCATCCGGTTCCATCACTGCTCTAAGGAAACGTTCTTTGAC -GGGCGGGTCGAGATCGAAGGCGAGGATCACAGTTTGATTAGTTGAGGGCAGCCATCGGGT -AAAGATATCTGTATGATACCATTTGAAACTGGTATCAGGGTCTACCTTTTTCACAGCGAA -TTTAGACCAGGTATCTAAAGGAATTAGTTTTGCTTGGCCATTCATATTTAGTATACTTAC -TGAAACCAGTGATGATGTCGAAATCTCGCGTCTTTTCACATCCAAAGCTGCCGTTTCCTT -TACGAAAATAATATGACCACCAAAAGTGAGGCATACAAAATGCATCAGTGAAGGCTTGTC -GACATTGTGAACATCTCTCAATATCGGTAGAATCGGCCACACTTATAAAATTAGGGAGTT -TCTCTTAGTCGGGGTCATTTCACATACATTATACATATCTTAGACCAAGCAGGTATAGTA -TCAGAAAGGTCTTTTGGATCTTCAAGGTGAAAGTTCGCCCGCCGAGTTTGTCGCCAATCG -CTTGCATCCTTTTCGTTTCGCTGCAATATCAGCAGATTAGCACAGTGTCGGCTCTTCTGC -TCCTGCATTTTGGTTTCACCCTATGTGACGAGCACTCTGGGCCAGAAGATGTCGGGTTTG -CTTTGAAGAAATCACACGGGTGAACAAATCTCACTGAGTGGGGGAGATGAAAGTCGCTGA -GAGGGACCGTGCTCCCTCAGCCTCAATTGTGCAGCGAAACAGATTCAATTCGAGTTTGAA -ATTTATGATATGAAAGCCTTTATCCCATCCCAGATTCTTCATTTTACCAATCTGATCAGC -GAAACTCATCGTTCAGTGTTGGTGGCCCAACGTATTTCTCATACCCCCAAGCCCAGTACC -ATCCAGAGGCAATTATTATAAACGCAACAAACACCACCGGTGCATAATTGACCATATCTG -CAGTTACAGGGATTGAAGAAGGCATACAAAATAATGGAATGACAAGAAGACTCCATCCTA -AAAACGAATATGCGATCAGCGATGATGATAACTCGTCCTAGGTAGCTCATACTTACCCAG -TGCAACAATATTGCAAGTCAGTCCGAGTGTGCCAAGATCAAATTGACCATTCTTTATGTG -TCGTCGACCTCCAGCAAGTGACACGACGATTGGACAGGCATAGCTGACAGTCAAGGTAAT -AACACCGACGCCAGTGAAAGCATTGAATGCAGTATTTGAACCGAAGTAGATCAATCCGAG -CAATAATTGAACAACCATACCAAGCATCATAGCATTAAAAGGTACGCCATCCCTATTTAC -CTGCCTCCACCATATTGAGCCTGGTATACCCCCGTCGCGAGCAAACGCCCATATGCTTCG -GGAGACAGCCGTAGTGCAGCCTATGACACACAACATAGCGAGTACTACCAATGGCAAGAG -AAGTAGGAAAGATCCAACTGGACTTCCAATAGCGGACTTGATAATACTCGGAACAGGCTG -ACCAGATTCAAGAGTGGCTAAAAGTTTTGTGTCGGGGAGCACAAAAACCAATGGCACAAG -GAAGAGGAATCCAGCCAGGGTGTTTATAATGACCGTTCCAACCATTGCTTTTGGGACTTG -TGTTGATGGTTGCTGGACTTCTTCGCACATACTTTGAAGGGTCAGTTAGTGTTTCCATCG -TGGAGAATGAGAGAGGAGATGTTCTTTCAGGGTGGAAACCAGATACGTCACCTACCAAAT -TACCATGCCAGTGGAGGAAGTGCTATAAGCTGCCTGCAGGAGGCCGACGCAGAAGGACCA -GCCTGCTGGCCAACCAGACTCGGGCTCAAAACTAGTAAAGACATATTCGGCGTCATGTCG -GCCCTCTTTCGCAATTGCAAGGACACATATTACAATAGTCAATAATCCTGCAAGAGTCCA -GAATATTGCGAATGTCTAGATACTGTATTAGCACTGTGGGGTTGTCACAAGGATTTTTTT -AAACATACATCAAGATAAGGTAGCCATCTGTTCCCAAAAGCAGATACCACATTAGCAAAA -AGTGTGACTGCGAGAAATATGAGAAAAACTTGATATGTTTCCGCTTGGAAGATTCCCACT -CCTGGAGCGGACTCGAAAACGTTGACACATGAGATGAAGAAAAGAGTGCTCCCAAGGTTA -ACTGCAAGTGTGATGGTGATATTTCCAACGACATATGACCATCCGCAAATCCACGAGGCG -ATTCTTCGATATGATGGTGGAGACAGCATAAAAGCCTGGTAGTAGACCCCGCCAGCAGTT -GGATAGACAGAAGTTATTTCGCCAAGTGAGGCAGCCACGCATAGAGTAATCAAAGAGACG -GCAAGCCATCCCCAAATTATGTTGACGGGGCCGCCCCCAATAAGAGGGTAGGTGAATGTC -GTAGCCAGTCCATATGGGATTGCTGCCAGTACAAAGGACATGAAAGCTACATGAAGCGCG -GAGCGGTTTCGAACGAGTTCAGGCTCGTATCCCAGGGCTTCCAGTACACCGTTTGCTGAA -GATTGCTTGCCTTCTTCAATGCTGGGAAATTCCGCGGCTGAATCGGGATCGGGAGCTTTG -TTTTTCGAAACATGATGCTGGTGGGCAGTGACTCTGTCTGCCATGATAGATGTTTGACAA -GATTGGGATGAGTAACGAAAGAGTAGAAAGTTCCTTCCAATTTATAAACAGCTTTGATCA -ACGCTGGCTCAAATGGCTAAGTTGCAAGGAGTCAAATCCTAGGTATACACCGATGTAGTC -AAACAACGACACCTACAAAGGAAGTTTTAGATGCAAGAATTTGGAGGGAATTCGAGTGGC -TTATCACATATTTCAGCGAGGGGAGCTTATGCGCAGGTTAAGCTTCTATTGGCCTCATAA -AAGCTTGAGCACATTGACCACGACTCACTCAATCTGTTATCCTATTTCTTGCCCAAGGCA -TTTTTGCAAGGTCTCTACCGAGTGGAGAACGGATTCTCGTAAAGATAACGTTTGACAGGA -AAGTTTGATAACCTAGGTAACCACATATGTGAAGATAAGACTAGCAAAGAGATTGGACCG -ACAACATGCCTGAGCTGAAACATAGACGATTCCTCTCATTTGAAGTTTTGACAGTTTTAG -GTCCCCAAACCAGTCCAAAGTGGGTTAGAATGATAACGCTAATGCATGACGGCTACGCTA -TACCAGATATGGCTGACCTACATGTGGGTCCTGGTGCGGAGTTCACTCTCGGTCTCTCCA -GGCTCTGGAAATCGCATTTTGAAGGAATATATGGGCATAGATTAGTATTATTTGACCTGA -AGGCCCATATAGATAATTATTCTTGATAGCACAACTCAGCAGATGAAATAGGTAAAAGGA -TGCTGCCAAACATTGAAATCGCAATAGAGTCTAGACCATGATCATAACATCAATGAGCTT -GATCAGCTGGATATCCTTGCAGATGCCTCTTATAATACGGAGAAAGCTTAAATGAGAAGG -ACTTGAATTTTCTCCGCATCTACGACGTCATAATAAGAATTGGTGCGGAGAGCAATCGGT -TCAGAAACGATCTGCCATTGAATGGGAACTAGAACATAGGGATCACGTTTTTCCACCAAG -AGCGAAGTGACATGCTCGAATTGTGGCGGGATCTCCCAAACAATTGTGTTGGAGTATGCT -TAGTTATGAAATGTTTTTCAATATCAACTCTTACATATAGCCCAAACCATGTCCTCAAAA -AGCACTTGTACTCGTTATCTCTCTCGGGTTAATGCTATGGAATACATATGCCCAGGTCCA -AGCTTGGGCATGCTGACAGCCTGAAGGCCAATAATTCTTAAATGCGAATTTTCTGGGTAA -CGCTCGACATGCTGGGTCACGCGTATGCTACTACATGGGAAAAGACACTAGTACGCATTA -AGAAAACGAACCCCCGAGGCGTGTTGTTGCTCCTCTTTTGGGTTAGCGTGAAAGGTGCGG -GGAGAGTTACCAACCTCACGATCTCAGAGATGAGGCGTTGCTTATCAGCAGGTGATCGGA -TTCCGGTTCATACACCGACCTTCGGGTCCTTGATCGGGTTCTCATCCGATGCCCCGCCAT -CTCAGATGTTTTTCCCTCAGCGAAAACAGCATCCTCAATATCTTTGGTTTAGGTAGGATG -GAAAGCACAGACAAATTAGTTTCTCCACAGAAGAAAAGGCAGAATCCCAACCGGTGCCGA -CCAAAATCAAGAGGGGGCTGCCAGAGGTGCAAGCAGCGGCGAGTAAGTTGAGGTGAAATA -TGCACAGTTTCTTCTTTGTCGAAGGCCCTCTTTGAATTCCAATTGGTTAATACTTATCCA -GCGCAGATGTGACGAGGCAAAACCGAGTTGTCATGAATGTACCAAGCGAGGACACACATG -TCCCGGGTACCAAAAGCAGAGCTTGAAATGGCGATATGTTTTCGAAGGCAGCGAAGAGCC -AAATCAGCCAACACCACCAACCAAGTCTCCTGACACGGCTGCGACCGAATTTTCCGAGTT -ACCCGTGAAACATTCAACACTGCAGGAAAAAGACAATATCCAGAATGAGACTCAAAGCAT -CTGGGATACGGCTTGCCCAAACCTCTTTGACCAGCTGCCGAATCTTGATGAGCCAACAGA -TGTTTGGCTCTACACCGAGTCGTGTTCCAGTGGTACGGTTTTAGCGAGACCTGAAGACGA -GCTGGATGGGTTGTCGATTTTGCGTCAGCGACTGGCCAACACAACCGTCCCCTCCTTTCT -CATCCAACTGCCAGTGATGTTGGTGCAATATTACTTTCATACAGTTTGTAACCAATGGTC -ATCTTTTGACTGTCCTCTCAATCCTTTCCGAATTATTGTCAGCCGACTTTGGAGTCGAAA -TGCGGCGATTTACTTTGCTATCCAGAGCATGTCAGCTGCCTCGCTGGCCAACGATTTTCC -AACCATGCGAGCCATCGGAATACAGACTCAGCAGCAGGCGATTGCGTGTCTGAATAATGG -TGTCCAAAAGGGCTCCACTCAGACAAAGGATGATGAGTATTTTCTGGCACTGCTTATGAT -TGGATCCACCACTGGGTGGCACGATGCTTCCGATTTGGGGCTTCCATATCTGAGGGCGGC -CCAGGACCATCTTCTAAGACAAGAACGCCAGTGCAAAAATGCAAACTCTGCCCTCGCAAA -ACAACACCCACTCTTCAAGCAGTGTCTTCTTTGGTGGAACTTACTGGCTGCCTTTGTGGC -AGAGGAGTCGCCCATTCTTGACTTAGAAAACTCTGTAGAAACCAGCGACACGAATCTTTC -AGTCTATCTTGTCGATGGAGAGGTCCTACCGCATCCCTGGACAGGGTCCCTCAGATACTC -GTTGGGTCTCTTCTATCGAACTGCAAGAGTGATCCGAGCCGCAAGAACCTCCCACCGCCG -AAGACCACAGGCTTTGGATCCCACCTTAATTGACTTGAGCTCCATGGCCGAGGAACTTGA -TCTCAGACAAGAAGCCGAACATCTCGAGGACAGAATTCTGTTTACTGGCTTCTCATTCTA -TTGTGGGCCTGTTGACATCGGTGATACAAACACGCCACCGTCGCATTTTCTTACGCTGGC -CGAGGCATATCGCTGCACGGCACTGTTGCAAATCTACCACGTTTTCCCGGACATCCTCGA -AGAAAGACTACGAAGTAACCAAGCTGCGGACTCCGAACGGCCTATTCCGGCCCTCTTTTT -GTTGTTATTTCCAATTGCCACCGGCTGGTCATCATGCTCAGTGGAAGACGCACGCCACAC -CCTAGCACTGCACATTGTATCTCTCTTGGAGCAGCTTCCTTCCACGTCTGGAACGAAGGT -TATGCAGCCCATAGTTTTGGCTTGTGTCTCTAGCGATCTTGTCTTCTCGTCTGGTTTTGT -GTTGGGCGCTGCACAACATACGATCCCCAGTCTCAGTACGCTGGATGTTGATATTGCCCG -GGCCAGACGAAAGGTCAAAATGCGGTTGTCTGAACTTACCTTAATCCTACCTAAGCTGCC -AATGCAACGCATTTATAACGTTGTGCATGAGACCTGGGACCGTGCGGACTCTGGGTTAGA -AGAATTCTGGCTGGATGTCATGCTGGATTTGAATTTGGAAACGATTATGGGATAGGCTGA -GTGTTGTATCCAGGATACTTCCCCTGGCCAATATCACTGCCTGTTCATTGGCCCCTTCAC -CTTGTATCCACTGAATGAAATTTTTCCTCGGCAATTGCTCTAACTTCATAGATGACTCTC -CCATAAATGACATTATAAGCGCCTGTTACCTTGGTATAAGTATCTTTTTCGCTATTATTA -TGCCACTAAGCAGTCATTGGAATGAAAATGTTAGATTCTCTTTGTCATATCAATGATATG -ACCTCGGGCTATAACTGATGCCTAAAGCCCCTTGAGTCATTCCACACAGATTCATAGATG -GCTCTTTCCTGTCCTGACCACTTCTTCATAGTCCACCCCCCCCCATCTGGGCTATCTCTT -CTTTTTCATCTACATAGCTGAATCCTGATTCTTAAACTCGCACTTGGTCCATTATGCCTC -GAAGCGCTAAACGATATTGCAATTCCTCGACCCAACAAGACCGACCTTTTCTACAAAAAA -TTGGCTTACTGGTCATTAAAGGTCACGAGTGGGATGTGAATGAATGTTGATAACAGCATT -AATTTTCAGCTTTACAAATTTGGCCCATTGCCCATGTACGCGGATAATCTCTACCGCCAG -ATAACTTCAATTTTGCTGAACAATCCCCTCCTCAAAGTCTCGAAGTGGGAGATGATCAAA -TGATCGGAGTTCAGCGCCGTGAGGGACTACATAATTGCTTACCAAAATCAATCCCAAGTG -CTCAAAGACTTTGGGATTGGCGTTCATCCCTTGCCCTTTTTGTGTAATGTGCTCGACCAA -CTTGAGCCCGAGCTCCTCGCAGTGCAACGTATACGGGATCAGTTCAGAAAGCTCAACACC -AGAGGCATAAGCCACACTGTACTTTACCGATACTGCAGAGATCTCATTATCTCCCCCGGG -CAAATAGAGCGCAATACAGCCACTACCAACGATGCTCAGCCTAGATCGACGCCACAGTAG -GCACTGTGGCAGAAGCTGTAATTGAGATGAAGGTCGTCATCCGTACCACTACAACTTAGG -CAATCATTGACTTGAGAACATTTCCGAAGTCTAGTCCTAATGATGACTTAATTTGATATG -AATGAACAATTAAGAATAAATTACTTGCACACCAACTCTACTCATGCTTCCTAGTTATTC -GAGTTACATCGTCTTGGTTTCTCGTATAATAGCCACGCCTCCCTGGTGCTTGCCGGTCTT -AAGCTCAGGATTAGGTCTTGCACGACAACGCCACAAGCCTGTCTTTACAAGGCAGAAAAG -ACCACGTCCAGTTCAGAATACGTGACATGGCGATTTTTTTTCTACAACCACCATAGAATC -AAATACATATTAGTTTGTTAGGGTCAGTCTCTCGCAATCGGACAGATTTGAGTGCCTAGC -CCATGACTTATGCTTATCCGCAGCTCGTCTCCAGGCTGGAGGAATCGTGGTGGGCTTTGT -GAGATACCAATGCCACACGGTGTTCCAGTGATGATGACGGTCCCGGGCAGTAATGTGTGA -CCCTAATCTTTAGGATTAGTTATGTTTGCTTGAAATGCACAGGGTGGTTCTGTACCTGAG -AAAGATAGGATACAATTTCTGGTATGCTGAAGATCATGTCATCAGCGGATCCGTTCTGCA -TGATCTTTCCATTGAGACGGGTCTTTAAGTTCAAAACTGCAGCATCTGGGATTTGTTCTG -TAGACACGATACATGGACCGAGTGGGCAAAAGCCATCCTGATATAAGTCAGTCGTGACGT -TCGTCTTAGACATGGAGCATATTGACATACCATTCCCTTTGCATATGACCACTGTGATGT -CTTCTCCTGATGTTTGCGGGCTGTCACGTCATTGGCGACTGCATAACCCAGAACGTAGTT -GGCTGCTTCAGCTACTGAAACATTCTTGCACTGCCTACCGATGACTATGGCCAACTCAAC -TTCATAGTCTGCTTCATCGTAGTCCACGTTGGAGGGAAGTATAATCGGTTCGCATGCAGA -TGCAATGCATGTGTTGGCCTTCAGGAAAACACTGCATATCATTAGCAAGCATTAGCAGTA -GAAGTAGATATACCAACGTTGGCGTGTTTGGAAGAGTCAACTTCATCTCTGCTGCGTGTT -CCTTGTAATTTAGGCCGACGCATCGAACAGTCCCAGCTTCGATTTGCGAGACTGGGGATA -GCAATTCATCGATGGTTTTTATTTCACCCGTGAATGAAGCATTATAATCAAATGCCGAGT -TACCAGCCACGACCTTGACTGCCACTTTGACATTCTTTTCTATGGCTTGACCCACTATAA -GTCTATCAGTATGGGGTTGACTTTGGTTAACTTCGATGAAGCTGGGTATGCTATACCATC -AATATCCGGATTCACCGGCTCACCACCATATATTTCACCATCTTCCGCTCTGAAGCGAAC -CCATCGCTTCCAAACCTGTAGTACATTAGATTACCCCTAACACATTCCCGGCGAAGCACC -TACTGGATGTTTAGCATCTTCAAACTCGATTTCATTCGCGCTTGAGATTCCCATTTTGAA -AGATAGTGACATAAAAGATTTCCGGATAAAAAGTGCGAGTGTGTTATGTCTACTTATCAA -TGGGCTTCTCCAAAATATCGGGAAATTCATCGGGAAATTGACAGATAAGATAAGTTAAAG -TTCGTATTTGCCGATGCCAAGACAATAAAATGCGGGGAAATCATTTAGGTAAAAGATAGC -GAGAAGCTGATCTGATATATATAGATGTGAGAAAGCATACAATCATGCATATCAGGTACT -TCCAAGATTAGCAATAGTAATAATTTGGGAGTGATCAATCTAAGCTACTATGAGTTTAAG -CTTAAAATGCCCTCAAGCAGACTATGGACGAGCTAATGCAGGAAAAACTTTCACATCCAC -GGCTTATCCTCCAGCTTATCTCTTGGTGACACAAGATTTAATTGGTCACCTAAAGCCTTC -GGGAAATCCTTCACACAATAGCCAAGTTCTTGAATAAACCGAAGAATTTCCCGATGATAA -CGCCTGTAATAATAGAACCTGGATAGATATATAACCCATGTGGTTGGATCCCAAGTGTAA -CAATAAATCTACATATTCAGGATGAAGGAGACAGATATCAAACAGACCGACTTGTCATCG -GATCACGATGTTCAAATGGGTAACTTCGTAGTTGACCCCGTCCGAGAGAAAAAGCTATTA -TGGAAACTCGATCTCTACATTTGTCCGTTGGTGATGCTCATTTTCTTGGTTGCTTATCTT -GATCGGAGCAACTTGGGGTAAGATTTCCATCCACATAGTAGAATTCCCACCACTGACTCC -TGGCTAGAAATGCAGCTGTTGCTGGAATGCCGGTAAGGTCGAGTCCACATTTTGTGAAAC -ACTGGGTTACTAAGCTAGGTCTTACAGGAAGATATCGGTCTAGTTGGAAACGAACTAGGA -AGTGAGATCAGTCTCGTCTAATATTCGAAGCTATAATTAACAAAACCCAACCAGATGCAG -TATCTTTGTTCTATGCCACCTACGTCTTTTTTGAGATCCCGTTCTCTATGCTCCTCAAGA -AATTACGCCCCAACAGACTCATTTCTGCATTGATCCTTGGCTTCAGTGTATCAATTTTGG -CTGCTGGATTTATCAAGGATGTTGCCGGGCTCTATACTACCAGACTGTTTCTCGGTGTCT -TTGAAAGTGGCCTATTTCCGTGAGACTATCGGCTATCTTGATTACCTAGATAGAGGCTAA -TTATTGCGCAGATGCTTGACTGTTCTCTTGACCACCTTTTACAAGCGCGAGGAACAAGCA -CAAAGAATTTCTTACTTGTTTGTGTCCGCTGCTCTGAGTGGAGGCTTTGGTGGATTGTAT -GCACAACTCTAGCTATTCTGTCACGAACACTGATAGCTAACATGAACCTTAGACTCGCCT -TTGGACTGATTCGCCTGGATGGTGCTGCCGGCCTTGAAGGTTGGAGGTGGCTTTTCATTG -TGGAGGGCATCATGAGCGCGGTCATCGGTATTGCCACGTTCTTCCTTCTTCCAAACGACT -TCGAATCCGCATACTTCTTGAACGAAGAAGATAAAGAGCTGATGCGCATGCGCATGCAAC -AGTCAGCACGGTACGCTGATACTGAGAAATTTGATGTGAAGGAAGTGTGGAAAACGCTAC -GAGACCCCAAGAGCTGGCTCACTTCCTTCAACCAAATTTGTGTCAACACTTGTTCCTTCG -GATTCAGTACATTCCTGCCCACGATTATCCGAGGGTTTGGGTTTGATTCGGTCAAGACCC -AACTGCTCACTGTTCCCGTATACATCTGGGCCTCCGCGTTTTACTTAGTCATTGCCAATC -TGTCTGATCGAGTGCGCATGCGAGCCGCCTTCATGGTCCCACTGTGTCTGGTGACAGCGG -TTGGCTATGCCATGCTGCTCGGGTTGGATGTCCACGCGCGAGGTCCATTATATTTCGCGA -CATATGTGTGTGTTACTGGGTATGTACCTTCCGATGGAGTGTCTGTTGAGCATTTGTTAA -CATTTCGGTAAAAAGCATTTATGCTGTAGTTGGTCTCGGAGTTAGTTGGAACGCCAATAG -CCATTCTGGCTATTACAAGCGTGCGATGGGAGTGGGCCTGCAGCAGACTATCGGAAATTG -CGCCGGTTTGATCGTGAGTGGAATATCACAATAACGTGCCCATTTTGTGACACCTCAAAC -TAACATGGCTCAAGGCTGGTCAAATCTATAAGTCGCCCACCAATGGGCGATATGTGATCG -GTCATTCTGTTTCACTTGCTACTATCTGTCTTGCATTCTTTGGTAATGCTGGTATGTGGG -CTTTGCTCCGATACCACAATAGAAAGCGAGACCAGCTATCACCGGAGGAGAGAGAGCGCA -TTGTCAGCTCAGAATCGTACGAAAAGAAAGGCGGAGATTTCCATCCGGACTTCCGCTATA -TCCTATGAGTGTTACCATATATACAAAATCTTCAAATCAAATTATACATTAGGCAGGAGA -CACAATCTAAAGCTATCGCCTATACTACAAGATTCGTTTGCTAAGTCATTTTGGTAGTAT -TTCCAGGAGTAGTTTCGAATGTATTTACTTTAGCCATTCTTCCCTTCACCGGTGCTATAG -CAAGAAGGATAAAAGAAGTCCACTCCAGTAAATGGGAGCGAGGCAGTTGATAGTCGATGA -GATGGTGGGTTGTTCAATTTGGACAGTCGTCAAAAGGTAGTCGGATAAACAGATACTTTT -ACAACCAACGAGAATGACCACCAGGAATTCCATAGATTAAGTGTGATATGCGATTTCGGA -AATCATCTGCTCCCAGGTTTCCAGATCAGCAGCCTGCGGAAATGCAAAATTTTGATCTTC -CCATACTGCGGGCCACATCATGTCAGGCGATGCACCCGCTGCAATAGACTCGATGTTCAT -GACTGTTGACATAATTGGATTCCTTTCACGCTCCTGTGTCATAAATTCTGGCAATATCTC -AGGTGAACTTACTGGGGCCTGGTCACCTGGCTGGTCGGTAGCTCGAGCAAGCCCAGTAGT -ACGGATTATACACAACTCTACCACTTTGACCTCCGCAAGACCAAACTCGTTTAGCCAAAC -ATCCTTTCGTAACCTTTCACAATATTCTTCAAGATCGTTCAAGATTGCGGCAGTTTGAGG -GGCAATGGGTCGCCGTACGGCGATAATACCCAGGATGAAAGCGCTGTTGAACATATTGTG -GGTGCCTAGGGTCCATCTCATATTTTCTGATGTGCCATAATTCAAAGACTGCAATCTAAT -TTTCAGATCGGAAGACGCCGAGGAGATGCATGCCTCGTGACTAAGGCTGAACCGATTGGT -CATGGACTCGCGCAGAAGATAAGGTCGGTGCAGAGTGATTCTGGCAAAGTGGAAAGCAGA -TTGCAAGTAGAGACGATGCCAAGGTAAAAATGTATGCGACTCATCCAAAGAAAAGTCGGG -GTCTTCCACAGCAAAGTAGGATGGGAGTTGCTCTTTCCATGCAACAAGCTTGTTATCCAT -CTCGATTACCATATCGTACGATGCAGGATATAAACCAAAGCACCGGTCTTGGATTTCACC -TGCAATTTTACCTAGACCACTGCCGAGAAGCGTGGCTGTGCTCATTGTGGGATGGCTCAA -TGGATGAGCTGGGGCAAGAGAGGAATCTTCATCAGTCAATCCATCGAGCCATACATTTTC -AGGTGCTTTGATAAGACACATATGATCTGAGATCGCGTATGGTCTGCCAAGAGCGAGGGC -AATAGTCTTGTCAAGCAAGTACAGGTTGCTCCAAAGGCGTCTTCTGGTTTCGGTGGCTTT -TCTCGACAACCCCCATTTTTCTCCATCTATATGCATGCCCTGTGCTTGAGCCATGCGCAT -GGCAAAGCTGATGGTGAGCCAGCTCTCTGCAGCTTGTCTAGTGTATAGCAGATATCGTGT -AACCATCAAACCTGCTCTGACAAGATCTAAGCTCTCGCCATGAAAAGAGGGTGAGATAAC -AATCGCTCTCCTGGCAGCCCAGTAGAAGCGAAGAGAAGCTTCTTCGCTTTCTTTCCGCTT -TTCGGGCGAAGAGGTTGTAGGACTATCCAGGAGGACGCTAAAAGCGAGAATAATGAACAA -AAGCGCAAGCCAGCGAAGATCTACTCCAGCAATTTCGTCAAATCCAAACTTCCGGCCCCA -GAATTCTGCAAACTGAGATCTGAAAGTGTCGGGGTGAACCGTATCAATCGCCCAGTTGAT -CTCGGTCAGGAATCTTTCAATTAGCCGGTCAACGGTCACTTTATTCCGTGGTAAGAGACC -AAGTAAATCCCAAACCTGCGAAAATGGCCCAGAGTCATTTTGAAATGATTGCAAAGAATC -TTTGCTAGACAGCAAGCTCATAGGGGGTAGATCGGGAGCTGACTCCTGAATCATGCGTGC -CGCCACCTGGTGACCAAAGTATGAATTACCGAAAAAGCTAGGTGCTCCCCGGGCGATATA -CAACTGCCCCAGCTCTGTACCCTGGGTATCGACTATTAGGCCTCATCGTTAGCCATATCA -TCCACATTTGGACAGCAGGGATATCCGGTACCTGTTGTGCTCACTCTTTCCGGTGGCATC -GCATGACCCCTCTCTTTTTCTGGCCATTGACATAGTTGAGGGACTCCACGCTGGATACAA -CGAGAACAAGGCAGTTCTCTGTTACATTTCTGCTTCCTGACGTGACATTCGGAACAGGAA -GTTGCTTTTCTCCCTCGTCTCACTTTGGGAGAGTCGAGCGCCACATTGCTTCTCTTTCTT -CTATTGGTATTCATTGCTTCATGCAGTCCTAAGCAGCGAGTAGTCAGGAGTTGTTGAATT -GCAGAAGCAAAAAGGGAGTGACCCGAGGTTAAAGTTAGCCTGAAGTATTGGCCTTAGGGA -TCAGGCCTAAGGTGCTTGCTACTATGGACTTTATTTCTTTTTATTCTTCAATGGAGCAAC -ACTTGAATATATACTTTGGTCTGAGCTTGATTGATATAACATAGGGAAATAGTAGGACAG -GTGATTATAAAGGAACATTTGGAGTATGAAGGCCCTGTCTGGGCAGGTGGAGCGTTTGTG -CTGAGTCATCTGACGTCAAGCGCGTTGTTTACATCCTAGGCTCGCTTCGACTTCGATGTC -CCCCCACACCCTATCTCAACCCATTTACCCCTACTTCTCCCAGCTATTGATTCTTACTGT -TATCCATTGATTCCTTGCATTTGCTTCTTCAATTATGGCGATCCGCCGTTCCGCGCGCAT -TCGTCGCGCCCAGAATTCCCCAGATGTGAGTGGCATAGTAAAATCGCCCATTAGGGGAAG -TCCTGACAACCGTTGAAGTCAGGGTAAACTGACCATTGTTTTGCTTAGGTTGAAACTGCT -CAGATTGAAGAAATGACCCCTCAGCCAGTCGCAGCTTCTAGACTTCCCTCTGTTGCCGAG -CGCGAGGAAACTGACAGCTCCGAACTCCCGCACATCCAAACCCCCCTCGCGAAAACTCCT -CGACAAAAAACCCCTCGCGCGTCTACTCAAAAAAAGCGCGATAGTATGAGGACCCCCACT -AGTGTGACTGCGGCCCGTCCGTCCCGCGAAGAAATGCACCCCAGCAAGGCGCAACAGAGC -ACAACCAAGCACGCAGACTCAGGCCTTCTCCTTGGATTCAATCCAATCAAGAAAGATGCC -AATGGCAATGTGATCAAGCACAGCATCAGTGACAATACCCCGACCAAATCCAAGGCATCT -CCCGCGACAGACCAGTTTGGGACCCCAGGATTCGAATACAAGTTTCACTCCCAAGAAACG -GAACTCAGCGACGAGGCCAAACTGCTCATGGAAAGCGTTCGCGCCGATGCGGCTCGGATC -AAGGCGCAAATGGCCCAGGAGGGCTCAAAGCAAGAACATGAAGATCACGAGGCCGAACCA -ATTCAGGGAGACCGGAAGATCGCCATGCCAAAGGGCAAGGCCAACCGCTTCAGCGACATC -CACATGGCCGAATTCAGGAAGATGGACTCCATCGCTGGTCACGCTTCGTCTTTCCGCGCT -ACTCCCGGTCGCTTCCAGCCTGTTACCAAGTCTTTGAAGCGATCCAAGTCCAAGGCGCAG -CTTGACGAACCCGAGAGCCAGAACTCATCACCTTCTCGCTCTGCCACCAAGACCTCAAAC -CTGTCCGCCCCCACAGCCGCTACAACAGCTAAACGTGTCAAGCACAATCAAAGCGAAGAT -ACATCTACTCGTCATGAATCACAGACGGATCAAACGCAGAAGGATCAGACACACAAGGTC -GAAACTcctcgccgacccgcaggccctcgtcctcgccctcgcGCAAGCGTTCGCAGTTCC -CTCATGACTCCCACACACGCTTCGCTTGCTCGTACAACCTCTACCAGCATCAAGGCGCCG -AGGACCTCAATGATCCCATCGTTAAAATTCTCTCCAGCTGCCAAAACCGCGGCCTCACCT -CGGACTCCTCGCACTGAGTTCAACCCCCGCCTCAAGAGCAAATTGCCCACCCTTGGTAGT -CTTAGATCTATTCTTCGTCGACGCCAACCACTCTTCTCCCGCGACCCTGCCAAAATTGCG -TCTGGAACCCACGTTGCGGCGCCTGATTTCAATCCCAATTCACTCTTTGGAGGAGCTGGT -AATATAAGTGACTCGGCTCCAACCCCCTCGCCGAAGAAACACGTTGATTTCACTCCAAGT -GTCAAGTCTCGTCACGAACTTGCTGTCGTGTCACCCTCTCCTTCCAAGGCACCTTCCGCT -CAGCGTCGTTCCATGGCCGGAGATGTGGTCTATCCCACTCTACCGACTCTCACACCCGAG -AAGAACTCAGCCGTTTTACCTTCCACTACTCCTGGTTCTGCCTTCAAGTCCATTCGTCAC -GTTCGCAAATCGGATGCCGCTGAACAACCCGCTCCATACCAGGAGCTTCCTGTGGTGGCT -CACGGCATTGCTCACGGCATTGCCAATAAAAAGAGACACCGCGATCAGCTCGACGACGAG -GATACTCCTGGTAATTTACATGGTGAGAATCTGCCGCCTAGCCATACGCAATCCAATGAG -CGCAGCACCAAGAGATTCAAGTCCAATCCGCCTACTCCAAGCCCGGCGAAAAAACGTCTT -ACCAAGACTCCTGTTCGTCCTTCTGGCAAAATTAGCACACCCGCTAGCAAGCAAAAGAGT -CGTGGTGTTCTCAGCATGAGCCGCCTCAACATGCTTTCGAAGCCGAAGGGACAGACTTAG -ACACGCAATGACCTGATGGGCACACGCCTGGCGCGTTAGGCCGCGTCTCTGACGAATTGT -TGTTGTTTTTCTTTTCAGCCTTATGATCAACCTAGTACCCGGCTTTTGTTGCTACCTTTT -TTTTATTTTCTGGACTGTTGCAATCCAGCCCACACTGCAGTAATGCTCTTCTGCAGTGTC -AACGAACGAACATTTTCATTCTTGATTTGTATCTACACTACCATTTTTGAGGGTATCCAA -CCCTTCCCAACTTACGGGATATCTTCTTTTCGTCTCTGGTGTCGTTCCTCTTTCTTCCTT -TTCTGCCTGTTTTTCGGGTTACCCCTTTGTGGGAAATATGGCGTTCGCGATCATGCTTTC -GGGCTATTGCCATTGAAAAGGTGCCACGGTGAGTTTTTCTGTGTATGTATTGTCTGGGCA -AAAAAATGAAATCAATTATGTGATAAAAGAAAGCGTAGGCAGTAAAGGATAGTAACTTGT -ATGCTTTCTAGGCTGTTGATGTTCCCAACGACCAAAATAACCAAGCCGCGATTTTACGTC -ATATTTGGTTTACAGGGGAAAAGATATATCCAAGAGGTGAATCAAAGGATTTGCTATGTA -CAACATACAGGAATTAGAATCCCCATCCATCGCGGGGAGAGATCAAGAAAGCTAGCAATA -CATGGTAAAATCCCGAAGAAGAATAAAGAATAACGAGCAGAAATCAGAGGGCCTTCCCAG -CGGTAGAACAGATACTAAATAAGCGATTAAGCCCCTGTCTGACGTGCAGACAAGAGCCTC -CATGGTATAGACGGAGAATGGTACAGTGGCATTAAGCATGACAATCATTTGGGTATGTGA -GGATATGGGTGGTTTTCGGGGAGGGTTGGGTCGAAGAGGTGGGAAGATGTGAATAAAACG -CAGGGAAACGGAAAATGGTAGACATGCATCTGAGATGCAAGGAAAGTAAGGTAGACATCG -GGCCTGGACTCGAGTAAGCCTCGAATCAGGGATGGGCGGCGCAGTAAATATCAGTTTTTC -ATGGTTTCCAGGAACGAATCCAGGTTGTATTCCTCCTGATACTGGCGGTCATCCCACAGC -TCGCCCAGATCGTCTAGCCAGCCCTTCTTGCCCTTCTCCTTGACCTCGCCGTCAATATCG -ACCATGTCGACTTCGTTACCGGCTGTGTCGCTTGGCTTCTCAGCTGTGTCTGCCGTTTCG -CCGAGATTGAACAGGTCAAGCAGTTGGTCTGTGTCCATCGTTCCAAGCCCGGCGTTTTGT -TGATTGACCACGGTGGAGGCAACGTCGATCTTGAATCGTTGCAAGCTGTGGCGGGGTTCA -TATTAGTTTTCGTGCATCTGCGGAGTTCATGTAAAATGATAATAACTTACTTCAAGATCT -TTTCCTCTAAGGTGCCTCGCGTGATCAAGCGGTACACGTTGACAACCTTCTTTTGGCCGA -TACGATGGGCACGGTCCATAGCCTGGATATCTTTCTGCGGATTCCAGTCATGCTCCACGA -AGATGACGGTGTCCGCACCCGTAAGGTTGAGACCCAATCCACCGACACTGGTGGTCAGGA -GTAAGACATCATAACTGGGATCGGTGTTGAAACGGTTAACGATGTCCTGGCGCTTGGTTG -CTTCCACGCCTCCATCCAGGCGAAGGTATTGAACGGAGGGGAGAAGTTTCTTGAGGACAT -CGTTTTGCACAATATCCAGCATCTCTTTCATCTGACAGAACACAAGGGCTCGGTGGGGGC -TGACATAGCTTGCGCCTGTGTCCAGTTCACCCTCTGCTGTGTGGTCAAGGCCAATGCCGC -AATCCACCAGTAAATCTTTCAGAGCGTTGAGCTTGGGCGCGTGGGCAAGATCTCTGATAT -TCGAACGCTTCGCGGTTAAGAATGATTGCACCTCGTTGTACTGCTTATGTCCTTCCTTGA -CAACCAACGCGGGAGAGTTACATAGACGTCGCATATATTGAAGCGCCTGGAAGATATGCT -CTTTGTCGCCTCGGTCTGCGCTACCCACCTTGTCTGCCAATTCCTTCTGTTCCTTCTTGG -AGAAGTCCTCGAACAGTTTCTTCTGCAGCTCACTCGGATCGCAGTAGTAGTTCTGGATAA -TCTTGGGAGGGAGGTCATTCAACACTTCCTCCTTCAGGCGACGGAGTAGGAATGGCAGGA -CCTGCTTGTGCAGTGCCTCGATTGCTAGGGCGCCTGCCTCTTGTTCCTTGGAAGAGGATT -TGCTGAAACGGCTGGCTGCAATGGGCTTAGCAAACCGATCCAGGAAGACCTTTTCTGTTC -CCAAGAAACCGGGCATCAAGAAATCAAACAGCGACCAAAGTTCAAGGACATTGTTCTGGA -TAGGAGTACCAGACAAAATCAGACGGTGATTGCTGGCTAACTTTTTAACTGAAGACGTGA -TCTTGGCCTTCGGGTTCTTGATAAGATGGCCCTCATCGAGCACGCAGTAGTTGAAGTTGA -TAGGGCAAAGAATATCGTTGTCATTGCGGCAAACATCGTAGGAAGTTACAATGACATCAG -TGTTGGCGAGCAGGGGTTGGAGTCTCGACCGTTCCGCGGGAGGACCGACATATGCAATGC -AACTGAGGAATGGAGCATATTGTTTGACTTCTTGCTGCCAATGTCCAGATAGAGAAGGCG -GACAGACAATCAAAGAAGGCAGCTTTCGAGAGTCTGTTGATTGACTCTTGGCGAACTCCT -CCGCTCGCATATGGTGGTCACTTGCCACGATACAAATGGTCTGCAGGGTCTTGCCGAGGC -CCATGTCGTCGCAAAGAATACCATGGAGGTTATAGCGATTGAGGAAAGCAAGCCAATTGA -CACCATCTTGCTGGTAAGGTCGAAGCTCGGCCTTAATCGCGACTGGGATTTCGAATGGCT -CGACCTTACGGACGTCCAGCATCTGAGACATGAATTTCCTTTCTCGGTCACGGCCCTCGA -GCAACTCTGCTGAAAAGCCAGGGGGGTCAGGGATGCCGGCCTCCAGGGGAACCAGTTTAA -CAAGGGTGGCGAATGATGTTGTTGCTAGAAGACGCACATCATTGTCGGAGTCGCTCATCC -GGCCAAGGACGGGAACCACCAGGAAGATGACATAGGGAAGAATTCCGTCTTCCATAACAT -GGATCAGATGATATATGCACTCAATCGCACCTTGGCGGTGATTGACATCCAGCCCATTGT -TGATGATTGGCAAGACCTTTTCAACAAGCATCGTCATTCCCTTGACGGTAATGACACTAC -ATATTGTAGCGAAACATTTGGCTGCTGCGTAGCGAATGACAGAAAGTCGGCACTGCAATC -CCTTGGCGATAATGGGCATGAGGTCGATAACCCATTCGTGAAGACCTGGATCGAACTTGG -GCAGCAGAGCCCGCAGTGTAGACAGGCCATCCACAATTTCTTGGCCAAGTTCATTCTCTG -GGTCAGTGATGTCCTCGGGGAGTTCATCCCCTGCCAAAGCATCTCTCAGTGGCCGTTCAA -CCAATTTCGCGAGGTTCGGGACCTTCTCCAAAAGCTCGGCGCCGAACTTGACTGCCAGTT -GCTCCAGCGCATCCTTCGCACCGCGACGCATAATCCGTGCATCCCTTGCCTCTTTTTCGA -ATTTGGCCGCATCTGGATGGTCGCGTCGATCTTCTTCTTTGCGTAGCGACAGAATTGATT -TCTCCAATTGCACATTGTGCAGGAATTCAGGCGTCTCGGAAGTGTCCACACAGCAGTATT -TGACCAAGTTCCCAATGATTTTGTCCACTGGTCCACGCTTCGTAGCAGCTGTGTAATGTT -CGATAAGGCCTGCCACTGCTGTTGCAGAGCGCTGCTGAAGTTCTACATTCTCCTCTTTCT -TGACACTGTCCATCATGCCCTTGATGATGTGACCAGGCTTCTTGGGGATATCCCGCAAAG -CGACCAGTGCACCAGCAGTGGCAGCTAGGACACGCATGTCCCGCTGTTCCCTTATCAGTC -GTGCCTCCTCGACAGCGGATTGTGCAGTGGTCCGAGTATCGTTCAAGACTTGAGTGGCGG -TGATTCGTTGAGCCGGTGTGAGGTTCTTTTTCAGTCGATCAAAGTCGGGACCAACAATTT -TCTCAGCATCCGCCAGGGAAAATGCATTAGGTCCGGCTTCTGAGTCGCCCTGGACGACCA -CAGCCAAGGTTGGCAGCCGGGAAGGTGCCACATGCGCATGATCTCGGAAAGTATTCAATA -GTGAATGGCACTGGGAGCGTGCGACATGAAGGTAGCACGCAATGTCCGCGTACCAAGGTG -GGCGCTCGCCTTCGAGAATAGGTCGAAGCCGGTCACATAGAGACGCGCAGTATTTGCTCG -CTGGTCCGGATTGGCGAGCATACTCTTCTATGACCATGGCCGAAGCAAGCTGCGTTGTGG -AAGCAGAAACATCAAGGCCATCCAGAATTGCCGGCCAAAGGCTTGGTAGGTCATTCTTAT -CCCAGAGATATACCAGTTCACCCAGCGCCTTCGCAGCGTAAATCTTGGATCTCAGCATAG -TGTCTGCACCAACGAGGTCAATATCACCAGAAAGCATGTGTCCGTCCACGTTGTGAGCCG -AGGGCGGAGGTGGTTCTTTCTCTTTCTTCTCTGATTTACGCCTCCGTCCCAACTTGTTCT -CAGTTCCGCCTGGAATTGAGGCAGGAGGAGAGCTTTTGGAAGGAGTAGGGGCTGGGACAG -CGGTGGAGAATGGCAGACCAGACGGTTTGATAAAGAGCGAGGCGTTCATGGGGATGGGAT -ACCGGGGAACGCCGAAAGCGCCCAGAGTCAGAGTGACAAGCGGCTGGATGGAATCTGATA -AAACCTCTTCGGATTTGAGTGAACCGCGCAACTCAACAACGTTGAGAAGTTCAGACCACA -CTTGACCAGACAGTTTCAAAACACTCTCGTTGCGTTCCACGAGGAGATTTTGGAAAATCA -ACCGCAACGCCTTGCCATCTACCCAATCTGTGGTGCCCTCGCCTTCCAGCTTCAAGAATG -TCATGAGAGCCCGAAGGACAGCAGAGCGAACGCTAGTGATGGTATGACGGAGGAATGGGT -AAAGGCGGGGGACAAGTTTGCCAAAGGACGCTTCCGGGTCATCTGCGGCATTGGCCTTCA -TGGCATCCAGAACTTGAGAGAATGTGCACAGTTTGGCCAAAAGGTCCATCACAGAGCCTG -TACTGGCACTAAGATCGTCCTGCAGATTGGAGAGACAGTCCCAGACGATGTTCATCAGAG -GACCTAGAGTGCCAGTCCGAGAGGATACAAACTCCTCTGCAATTGGAACAAGGGTAGCGG -CGCTTACAGCCCGGACATCATCATCATAGTCAGCGAGGCCTTTCATGACAGCTTCCAGGA -CGCCATCCATCATGCTGGAATCCTTGACAAGCAAGTCCTTTCGAACTGCGACAAGGTACC -GCAGCCCGATCATACCACCATGACAAACCTCCCATACCGGGCGGTCTAGTCCCAGGTCGG -TCTGCATTATGATCCGGTATAGGCATCGATAGACCGCCCGGACTGAGTTCGGGTGTAGGT -GTGAGAGAAGAGCACCAAGAGTCTGGCCCACGGTTTCGCGAATTGGGGCGACCACATTGT -CTGAAATATAATCACCAAATCGATCCAGCATCAAAACGCAGAGTAGTCGACATGAGAGGT -CATCCAGCCACCGGCGGTTGAGTGCATCGTTTTCCGAGCGACTTTTTCCATCCAGCCGGC -CAGCTGCCACACCCTGGACCCTGATGACTTCCCGTAGAGCCATTGCGGCGCCATGTCGGA -CTTCCCAGTTTGAGTCAAAGATGTCCATGGTCAAGAATTCACACATGTGTTCCAGAGGCC -ATCCTCCGCCCTGCTCCACAATATCTGGCTGGATATGAGATCGCTCTGGAATGGCAGCTC -CTTTGAATTCAGTAACAAGCTTAGAGTCGTCATCTTCTTCAGTGCGCTCGAGCGAAAAGT -AATCAGGTTTCGCCTCGCCATTCTGGTCATCGCCGTTCTCTGCTTTGACGGGGTGAGGAG -TTGTGGCCGCAGAAGTTGTGAGAATGTCGGACTGTCGACGGGAAGACAGATCTACGACAC -GGACCTTGTTGGCCCCCATCTTAGCATTCTGTTTGTTCTTCCGTTTGAGTTGATTGAGCT -GTCGCTTGCTCAGCCCCCCGTCCTCCTTTGTCGCACTTTCAAGAGGCGAGGAGGGCTGGC -GGGAGGGAGCTTCATGCAGACTGCTGTGGCGAGAGACTGGTGGAATATTCGGTTCATGCT -TTGGAGCAGGAGTTACCACTTTAGGAGCCATTTCGATGTCTTCGATCAGGTCTTCTTCAA -GGTATTCGCCTTCGAGACCCAGCCGCGAAGTCAAAGTCTTCTTTTGGTGCTGCAGACGTG -AAGCTGGTTCCATGGATGCGAGGGAGTACTCATACTCTTTGCCCGCACCGCCGAGCAGCC -GCTTGCCGTACTTTAATACGGAAGGGATGTCCAAGGTGTCGAGTCTGAGGAAATCATCAG -CCAGGATTGGCTCTTCAGATTTGACTTCGGACTTGATCTCCACATCATCTTCGTCTGCTG -CTGACTTAATTTGGAGTCCATCATCCTCGTTTGGATCATACACCTCTGCGTTAGAGACAA -TCAGTCCAATAGCTTTAGCAGAAGCTGTCCTTGTGTCCCAGGACTTGGATCTGAGGTAAG -GCAGGATACGCCCGAGTAGATTAAACAGTTCATCTGGATGCTGCTTTTGCACATCTGCAA -GCTGCTGAGCAGCAGTATTACGAATCAACTGAGTACTGCCCGTCTCGAGAAGCCTGTATT -GTCATTGGTCAGTGGTCAGTTGGTTTCACGTGACTGCACGCATTAGGTTTCAATACTCAC -GTGACGAGACGGTCGAGTCTAAACTGAGTTAGCATTCACGTGACAGGCGCGTGGTGATTG -GCTGAAGGGGCTTACCGGGAAGCCATCGCGGGTCTTCACTTTTCTCCCGACTATGCGGCA -CAATCTGAACAAGAGAAAAAAAAGAAGGTAACGCAAAGAGAAAAGCAATTGCGCGCGACA -AAGCGCGCAATTAGGGTGGAAGGACCTCGATATGAAAAACAAAGACCAAACGCCCAACAC -CTCAAGCCGCGATGCTGAGTCAGCGGGGGGATTTAGATTTAGTACACAAGAGCGAGGGTC -GAGGTCACGCAGCCGGGTTTATATCATTGGGGGTGCTTCTGTCCGGCGCGCTGGTCCATG -TTTGGGTCACTCCGCCTAGGCCGTTTGTGGTTTAGCGCGGGCGGTGGGCTTCCACTTACA -GCGCGCTGATTTTTTTTTCTGCTGCTGAAAGGATCACGTGAGTTCAAGGAACGCGTTAGA -TGACATAGAGCATTTAGATTGATCAAAAATACCTAATTCAATGTATTGAAGTTGTTGTGA -TTGAGAGGGTTTCCGTGCATAATTCAAGTAAAGGGTCATTCAAGAATCCGTTCAAGGCCT -CTGAAGGAAGGGAGCAATGCCAAGACATGCCAGATGGCCTCTTCAAAGTTCAAGTATCAT -GAAGGATCAGCAGTGCATTTACATCAATGTAGCTGGTAGGTTTATGGTATTGGAAAGATA -GATTATCAGATATTGATTTCAGAACCGATGGTCTTCTTCGATTATATAGATGTATAGTGC -GATCCAAGTTTGTTGCACATCGATCGACCTGCCGAACTATATTATATATATCAGTTGTAT -AGATCCTTATATCCAACAAAAAAGGGTTCCTCCTTACATTAATCTTTCACCTTCTTCCTC -GATCAGGTATATGAGCGATGCATACAAGGTCATACATCTGTCATAAAGACAGTCATGGAT -TCACGGTGTTATAAAATCTTCTAAGTATGATTCCTTGAACTATATAAAGAGAGAACCAGC -GATACAAGGAGTCCCGGCATAAGCGGCTAGACATGTGGGACTGGCAATTTGACCTATTCC -CCCATGCCCAATCAAAATCCATGCTTATCTCGTGTCAATTAGACTTCTGAAATCGGGTAT -ATCTCGTCCTACATGGCAGCGTCTGCCATTTCGGAAATCCATTATGGATCAAGCAGATCG -AGCTTAAGGATAAAGCACTGTTTGTCGTTGAATTGGAGTATCATTTCGCTGACAGACCTG -AAAGTCAACCAGTCGAGCGTTTCTATACAGTCACAGGCTCCGGGCAAAAGTCCCGGGGCG -GAAACACGGAATTACACGCATTGAAAAATAACAAGCACATCCTTTCCATTCACCGTTGGC -TCAGCACATTAGCTGCCTCCTCTTCCTTCCCAAGTAGACCCAAGCTCCGAATTTGCTCAG -CCTTGGGGGCGTTCTCTCGCGGGTGTGCGGCCTCAGCCTTGCGATAGGCAGCTTGCATCT -TCATGGTGCGGAGAGCCTGTTCCAATTTCCCTTCATTAGATTCGAGTTCATCATTTCAAT -TGTGGAGGTATCTCAAATGAAAGGGGGCTGCTCTTACCTCTTTCCGGTGGTGAAGGCACT -CATAGTAGTCATCCAGGGCGGGCACACACTTCTTCTTGCCGCTCGCACCATCGCCCGAGT -TTACCACGTAGCAACCAAGAACCTCTTGCCAGAAGGGGTAGCAGCGGCCGGGGCCTAAAG -CACAGCCATAGTGCGTTAGCCGGTTTTATTCGCGGGGGAGGGAACTGCGTCCGCAAGTTG -TGATCCGAGGGCATGTCGGTGCGGACCCGAGCAGGTTCCCCGGGGATGGGGGAAATGAGC -ACATACCACCATTGTTACCGTAACCGGAAGCCATTGTTATGTGTGAAAGGAGGTAATTGA -GTAAGTGGTAGGCACAGAGACCGCGAAAAAGGTAGTGATGAAGTGCCGGCGGATGCGACT -CTTCCCTTTCGTTTCCGACTTGGTCTGTGCGCCCCGCCGTATGCAACATGACCATCATCA -ACTATGAGGCCACCTCACTCGACCTCCTCGTCTCTTTAGCTTGATCATGGCTCTCGAAGA -GCTGTCCGATGCGCATGCTATTCTACTAGCTACTCAACTCTGTGCAGATGGTAATGTGGC -GGATTTGCCTATTCTACAGGCCCGGTTTCCCCATTGTTTTCCGTTGGAACGCCTCCTGCG -CATCATCTTGACCTTTCTCCCAGAGAGCACAGAGCCCTCTCGGTACATCTCAGTGTTACA -GGAGTTGGAAAATAGCTCCAGTCTTTCACCCGATCGACCGATCGATACATCTGCCGTCCA -GGACTTGTCCGAATCGGTCGCAAGAAAGCGCGTACGGAAGCTTCATTTACGGCCGCTGCA -ACGTCCTGACGAGGAAGACGAGATAGCTTCAGCAGATCCTTTGACCAAATTTCTTATCCA -CCGGGCACATCTGATTGACTCAGAAACAGCATTGCAGCCCCTTGTCCTTGAACTCATCCT -CCCGTTTTACGAGAACTCGCCAATCATTCGAACATGGCTGATATCGAGCTTATTACCACT -ACTTCGACTAAATTACGAGTTGTATGCCCACCGCGACGAAACTATATCTCTTGAGATCTT -GGAGTCTATGGACGATCAAACCGCGGTCAACATTCTTCTGTCTTTGATGAGCCCCGAAGA -AAGTAATATGGATCTGGTCAACAACTTGAGGGGTCTGATTGGACCTTGGCTCTATGGTGG -TAATAGGTCAAAACGGCGAAGGCTTAATGAAGCAGCTCAACAGAGTTCAGTTTCTTTCAT -TCAAGGCTCCGAGAAACCCCAACCAACCGAATTGGCTGGGTGGGAACACGTGAACGAATG -GCTACTGTCGCGGAGTCTTGTCGACCGTGACAGCGTTGTGAGCGCATTCAATCACTGGGA -TGGACCATCAGATGTGGACTTGGGGGGATATGACAACGCTGGCACGCAGTTCTCCGAAGA -GCAGGCGAAAGATTTACGGGTTCGATACGGCCAGTCCGGTCTTGCTGTGGTGTATGCCCA -TGCAGACTCATCGATCACTGTGTTGGAGGGCTCATTCCAGGTCTTAACGAGAGTCGCGAA -GCTCCTCGATCTTGAAGACTGCTTGTACCTTGCTCCTGATTCAGAACTGCCATCTGTGCA -CTATGATACTGAGTCAATCTCCTCGACATCCCGCGCATCTCTATTGCAGAACTCCCTTTT -GAGACCGACGAACCCTCTTACAAAACCATCGCCCCCATCGATCTCGTTTCTTAGCGCTTT -ACTTCTCTCCCTTCGTATCTTGACTGAGCTTGGTCATCTGGTTCCCTGCCGGGTTGCGGC -AAATATGTGCCTTCACAGCACCGAGGAAATGCAATTGGCCGAACTCAAAAGTGTGGTAGA -TTCTGCTGTGAAGCAATCGAACGCCAACCAAGACTGGTCTATGATTCGGCAACGATTGCT -TTGGCTTCAAGATTGGCAGGCGGAACACTCAGACAACGCCTGGGATGAATCTTCACCGTA -CCATGGGCTATTCTGGCGGATCCCGCACGACACGGTTGAAACAGAGATTCTGAAAGCGCT -GCTGGCAGCTAAGGGTAATATCACCAAAATTGATTCTATGTTCTCAATACTGATAATGTT -TCTAGAGTATCAATTGGCTATCGATATTTACATCAACTTTAAACCAGCACCACTTAGCCC -TGCCCAGGTGGAAGCTGCAGTCCAAGAAGCCATATTCACATCGTATGATAATGCAAGCAA -CGGAAACAGAACCCGGGGAGGAATGAAGAGAGCTTATGACATGTAAGTGGTGCAATAACA -AGAACATACGTGGACGTGGTACTAACAGTAATTTAGTTTACAATCATTCGAATCGCACTT -CCCAGAGTCTGTTGTATTCAAACAAATCCACGCCTTAATCGCCGCAACACACGCACTCTC -ATACTACTCGCTGACCCTGCAACACGGTGTTCCATTCCAGCCTGTCAGCATTCGTGTCCA -TCACGATCCCATCCTCTTGATTGAGAAAGTACTCGAGCAAAACCCAAAGAGCTACACCAA -ACTAGACGACCTTCTGTCCATTGGACGAAACCTAGTAGCTGCAGGCGTTCCCACGCAGCG -GGCATCACCTGAAGCCGAAGACGAGCCAGCTCACCGTAAACCGCCGAAGGAGCACGCTAT -TCTCACCGCAGAACGCCGCATCACCTCCCTAGCAATTGCCTCCGCTCTATCCTCCAATGA -CTTCGGCACAGCCTACTCCTACATCCTAACCCGACTAACGCCCCCGTCCCTCCTCTCAAG -CTCCTCACCCCTCCTAAACACCACCTCTGTCCCCGATGACATCACCTGGCGCGCCGTGTA -TAACGCAGGTCGCTACCGCGCCACAAcaccaactcacccaccccccaccctccaatccca -AATCTCGCACCTCTCTCAGCGAATGGAACTCCTCTCCCTCGCCCTTGTTCTCGTCCCGTC -CCCAGACCCGCTCCCCGAAATCCTCGGCGCCTGGCGACGCTGTGACGAAGAATTGACCTC -GCTGCGCGCGCAAGAGCAGCAAGAAGAAGACCTCTGGGACAGAAAAGGAGATACACTAGC -CTCTGTCCCGGGCGGATTCGGACCCTCAGACTCAGAGCGTGATGCCTTTGACACCGAGCA -GCAACGCGCGGCACGACGTGCGCGCGCTGCGATGCCAAACTCCCATCGGCACGAGGCGCC -CATGGGCCTATTTGAGGTTGCTCGTGGCGCGGCTTTAGCATTGCATAAGAATGCTTTCCC -GTTAAGGAACGCTGCTGATGATTCGGCGTCGGTTGCCCCTCGGGACGATGATCGGCCGCT -GTCGCCGGATAGTGAGGGCCGTATTAGGAAGAGGGATATGGTTAGCAATATGATGACCGG -GGGATTGGTCAGTGGGATTGGCTGGGTTCTTGGCGCCGACCCGGTGAGCAAGAAGTAAGG -GAGGTGGGGGCGATATGGGGCTGGAATTTCTTATAGTTGCTTCTGTGCTCGACGAGGATG -CATGAGACTCAAGCTACCTGTAGCTTTTATGATAATAACGAGACATATTATTTTTAGATG -CATAATGACTAGTCGTCAAAAAAAATAGTAGCTGTAAAACACAAGACACATAGCATAGTC -AGTTGTATTCATAAATGTAGGAAGCTGCCAAACTCTTTTGAGTTGATACACATTTTCTAC -AAGAATGTGGACGGTCCTAAATAGACAAGTCCATACATGTATGTATACTATTCATATATC -CGTATATATATGACCAACTCATAGACAAGATGCAATGGTATTAAGTGCCTAAATATGTCC -CAAAGGAAGTCTCCAAAGCATTGCAACACTCCCGCATCCAGTGCAATGCGAGAAATGGAC -CCAAAATGTAACCCTGCAGCAGGATAAGAAGGCGAGCTGTACACAAAAAACGAAAATCGA -TGTACGAAGATAGAGGATATGTCCAGGCTCAAACTATGAGAGAAGGCCCAAGCAAAAGGA -AAGAGCGCAGGTCATCGCAAGGGGATGAAGAAACAGCGAGAACCGAAGCAGAGGGCGAAA -ATGACCGAAGGGTATGCGTTTCATGGAGAAAATCAGGCCATCAAGGATCGTAGGAGGTGA -AGCGTAGCCGATGAGATTCGTGTCGGGAAAAAAAGGGGGAAAAAAGCATTTGCAGATGAT -AAGGGGGTATGAGAGTGATGTCGAGCCCCGCGAGTTCCGGCCCGGGGAGCCAGCGGAAAG -CACAAAACGCAGATGGATACGAAGTAGAGTCATGAGAAATTGAGTTGTTGACAGCCAGGT -CAATCCTTGGGGTCCAGCCACTCATAATCAAAAGAAATGCAGCAAAGCAGTAAAAGCAAC -CCTCAAAAGGGGTCCTTCGGGATTTGTTTACTCTTTCCAATCCCAAACCTGCCTTATAAA -GTTCGTCATGTTTACCTTCTTGTTTCTCATATCCTGCACCCATGGATGTTCCAGCATCCG -CCAGGGTGTCGCACGTCGAGGGGGTTCTTTTTCCAAGCTGGTGATCGAGTTAGTGTGAAA -AGACATGAACCACACAAAGAAATATTACGTACCAGCATTCGATAAAGTATTTGAAGTTGT -CCGACCAGCGAATGCCATTTTGTGGCTCATCCTTCAATTTAGGAATTGGCTGACGGACGA -TATAAGTTAAGAGATCAATTAGCCCTGCACGGGGCTGCATCTCAGTCCCATCGGCAGGGA -AAGGGAATCGGTGCTGCGCGACTTCCAGCAGAGTCACCCCGAGAGACCAAACGTCGGATG -TAATGGTGTATGATTGACCAGTGATCCGCTCTGGGGCCATATAGTAAGAGGTGCCAATGA -AGGTGTTGGCGTCGCCCTTGGTGCCGAATTCACCGCTGACACCAAAGTCGCAGAGTTTGA -CCTTGCCATCACGGCACAAGAGAATGTTCGACGGTTTGATGTCTAGAAAACATTAGCCTC -TCGTGGTCAACGTATATAAGAGGGATCATGCTTCGTACCTCGATGAATGATCTTCCGGCT -GTGAAGATAGGTCAAGCCATTCAAAACACCTTCTGCCACCTTCCCAAGGACCTTCTCTCC -AGTGCGGCCTCCAAGCTTTTTGACCTCTTTGTAGATACTGTCCAGACTTCCACCTTCACA -GAACTCCATGGCGATAGATATGGTGCCGGTGGATTTGTCCATGAAAGCACCATAGTAACG -ACAGATGTGGTGCGAGGCGCAGTCTTTGTTGAAGTTAAGTTCACGAACAATTTGTTTCTT -GACATCAGGGTTGGGATCCGTAGTAATGATCTAGAATGGGATGTTAGCCTCTAGAGCCGA -GCGCGAGTACATAAGTTGATCAGCCGAAGCGACGCACCTTCAATGCAAAAACGGTCTTTC -CCTCCTTGAGTCGGCATCGGGTGACGGCTCCGCCAGCACCCTCTCCCAGACTCCCTAGTT -CGACAATCATATTCTGTTCACTGGCCGCATGCCAGCCTTCATCATCGAGATCATCCACAT -CGAGGGGACGGCCTTTCTCCAGGCTCAATTTGTCCAGATCGGGAAGGAGACCGTTTACAT -TGTTTTCACGGTCTCCGTTCTCGCGATCTGAGTACACTGAACTGATGGCCGAGGACGGAT -CAGACGATCCCTCTGGCTGTCGGAGGCCCATAGCGAAGTTGATCGTCGAATATGAAGGAT -TTGAAGAGGCTGGTCCATTGGTCTGGAGTTGTAGACTGGGCGCGGTTTTACCACCACCTC -CTAGGCCATTTGTACTCAGTGGGGGCGGGGCTGGTCGACCGTTCGGCATGAGAGTTGGCT -GGGATACATTCTGCTGACTACTCATAGGAGTCGCGAGTTGCAGTCGGGGTGGTGCTGGTC -GACCCGATGGGCGAGGCTGAGGTTGGGGTTGGGTTTGAATTTGAGGTACTTGGGCGGGGG -CGGCATTGCTGTTGACAGGTTTGGCGTTAGGTGAGGGGGGGATGCCTAACGTGAGCTTCG -GGGCTCGTGGACTGCCGTTATTGTTGCGAGCGCCAGGAACTGGTGGACGAAGAAGAGGAA -CAGGGGACGACATGGCGAGATTGTTTTGCCTTTCCCCAGGCAAGAACAACTCGTTCGGAT -CTGCAAGAGTAGCCAGAGTGTCGACCTTGGACCAGTCCGCCTTGGAAAGTCCTGAAGGGG -TGGGGGGAAGACGGATTTTGAAGTGCACAGAAAAAACGCAGTTGGATGTAGagcagaagc -acaagcaaaggcaggcaaagacagagaggcagaagcagaagcagaagagaTAAGGTAGAG -AACAAACGAGTGGAGTTTGGAAGTTACCGAGAGATGCGGCAGGTGACCTGTTACTCCACC -ATATAAGAAATAAAGGATTCGTACTTTGTAGTGTATGTTAAGGAATTATCTAATGCGTAC -TCCGTAGTTTAAAGGTGCGAAAACTCTTGATCTCTAGAGAATATTAATAGAGCAACACAG -CCGACAACAGAGATCGGGTGGTTGTGGGGCCCTTATGTTTCAGAAAGCGCCTGAGCTGAA -ACAATTCTACAACATACATAGTAACAATTATATCTTGATTTTTGGTGTTGAGATTTGGTC -GGTTTCCTTTGCTCTGTGTTGGGAATATCAACAGAGAATTCTCCGGAGTACCATGGTTCA -TATTTTTCTCTCTTTGGTTTCtatatacatatgtgaggtatatatatctacactcaccta -tatttgtatatacgtatattcatacatatatatatatatgcaattatataGAAAAGTACA -GAATCTCGATCTACAACATACTCTTCAACGTAAACTTACGTTGTTGCCTTGGAGCCGCAC -CTACTGGGCGCACCTACAGACAAACTGTGGCGTCGACAGTGCCAACACATAGATCCCCCA -CTAAAAGACCTATCACAGGCCGTGTCTTACCCTGTACAGCCTTACCTACTTTTTTTGGTC -GAACCTATTATATGATGTAATCTCCAGTTATATCCATATCCAATTTCAGAAAACCAGATT -AACTACTACATATGTCATTGAGGGGCTAATTTCAAGCTAATTCCAAGGCTAAGATTATAT -GCGGAAAGATGTGGCTTAGCCCTTTGTGGCCACGCAATACCCAATGATGACATGGGCCCG -ACAGCCCTAGTCGATCCGCATGTCACTGTTTTTAACCCTCCGAAATTGGGCATGAACAGG -ATTGGCTACGGAGAAATATACTGACCAAGCGTTAAATTAGAGAAGTTGTGATCTAAATTA -GACCTGATTCTTGCCGGAGAACCGGTGCCAAGTCCCACAACGGAGGTTAATCTGGATACA -AATTATCTTCATTATTCAAGACAAGCGCCTGATGAATCTCGCGATGATTTTGGTGCCGAT -GAGATCCAAAATATCCGAGAACGCAGATGCAGCAGTCTTTATATATACTACATACGTAAA -ACCTAGACGTTCTCGGCACACAGCGGAATAGTTGATCTTGATTGCTGCCTGACAGCCAAT -ATGGAAAGAAGATCACGAAAATATCCAACAAAATCATGTCCTTGTCTATAAAAGCTTGCT -TATACAAATCATCCCGACAGTGTAGTCCTAGTGATCACTCAACAAATTGAGGTGCCCAAG -AACCTCGCTATATAGTAAAATGGAAGACCCAGGCCAACTGCGGTCTCATCGACTCTTCAA -GGATGACAAGGCTGACAATAAAACCATTCGAGGACCATGGAAAGCATCGATCCATGATCA -TTGAGATATATGTAACGCTGCGTTTGAAAGGCAACGCGTAAAGCGGCCAATTCTCCCGAT -CATTTCGAAAGAGGCCATGTTGTATAAACATCCACGGTGTTGTCTTATATTACACGGTAC -CACCGAGCCCATTATAGTACGAAAGAAAAATTAATTGTAATTTTTTGCTCTGCGTATGGT -GCGTATCTGGATCCTGTATGGAGTGGATTGATCCAGAGGAATAAGCCCAACAAGTCTATG -TTGTACATATTATTCTCGATGCCGCTCGCTTGTGTCATGGGCGCTAAAAAAAGTACGTCT -CGATGGTGATCGGTCCGGTCGGATGACAGAGATGTGATTGTGAATGGTGATCCCGTAAAA -TGGGATTCTGCAAAGTCGGGTTATCGATGTGATGGGGTGGCAGATGAGGACAGTCTCAGA -TGTGGAATTGATATACTGGTGGGTGGGCGCCACGGACAGAGAATGTACAATAAGATCTGT -CAGTGCTTTGTGAGTGGGAACAAGCCCTATGGAATTTCTCTTCTCAAGTTGTAGCCATGT -GTAGTAGGAATTTCGATTGTATATCTATTACACATGTCTTTTGACTTGACTGATATTCCT -TGCTTTGGTTATGGCGTAATGATCAGGACATGTCAAGAACTGTGAAAAAGGTGTGTCCAT -CGAGTCCCAACGTGCGGTATAAGCGATGGGCCAAGGCGAACGCCCCTTCTCCTGAACGTC -AAACTTACCCATATTGTAAGGTAGGGGCATCTCATGCGATAGGCAAGGATAAATTCCAGT -AAAAGTCCAGGTCCACTCAAATCATTATCCCACGAACACCCTATCATTGACGTAGAAGGT -TATGCTATTTATAAAACACACTCCACCAATAATCATATGTATCCCACAAAGTTAATTGTA -GCTAAAGATAGCACCAAAAGAAGTATGGGGTTGATAAGAGCACGACCATATAAAGTACGT -TGAAATGAAATAGTAGAGGCCCGATCTATTGAGAAAGGGGTCACGAAGCGTAAGAATTCA -ATAGCGGCATGTGGAGTCTGGGGGCGCCACGAGGCCTACGGAAATCGCTTTGTGTGCGCA -ACCCCCTATAATGGAGTCGGCCCTATCATAGAGTCGGGACAACGAATTGGGCCAGAGTCA -AAGGCATAAATACGGGCGAAACGTTCATTGGCTATGCAGCATCGTCCACGGGTACGGAGT -ACCCTGATTTTCGATTTTCCGTGGCGGTCGAACCGCTTCGAGTCTGATCCAGGGTCTCGG -CTCAGGAACAGGATGAAGATCAACATGCTCTGGTGCTGCAGGTTCCAACCACACTGTACA -CCATACTCCGTGCGATCGACCTATTCTGGGTACTCCGTATAAGCAGAAGTAAGTATTTAT -AATCGATTGTACGGAGTATTCCGTACGGCTAATCGGTGTAACAAATTCAGAAATTCTGCA -TATGCGCCATGCCACGGACAACCCAATCACACGGCCAGACCCCGATATTTTCTCAAATTG -CGCGGCATTGACGCTCACATTGTCTAGAACAGGGGCACGACATGCAAGCATCGTCAAGCA -CTGTACCCGCAACTAAAATAATGCGGTCTAGCCGTTATGGCGTGAAGCGAGACTTTTCCG -TTGCGGGGGCAATTTGCAGTAACTTGAGGAGTGGCGTACTCCGGACTTATCGACTGGTTT -TGCTAACTAATATAGACCGAGTTTATGTAATTACAGTAGCTACGCGATGAGTCTACGAGC -AAAAGGCATTTCTCCTTCCCCGAGTATGCTAGGATATATGGTCATCTTAAGCAAGTCTCG -GACGATGATATACTGCTCTCGACACAATACATAAATTGGATGTAATAAATAACCCAAAGG -TAATGTAAAGATATTACGGACTACTCTGTAGTAAGTAATAATAAAAAGGATGTCTCGGCG -CTTCGCTCCGACAGCGAGCAAATAGTGTTTGGGTTTCCATGTTAAGATGAACGTCCGGAG -TAAGAACCAATCCCATTTCACAAGTAAAGTGCACATCATCGTGGATTCGCCCTTGCACAA -TAAGTTCTCTTGGCGAAATAAATGGCGGGCTGTTCAGGGGTAAGTGGAGATGAAAGAGAC -CCACGGGGGGGACATGTTACAATTGGGACGCCAGGAACGGGTAGTGGATCAGCGATGCGG -TCTGAAACATCAATAGAAAAAAAAGGGTACAAAAGATATTTGGGAGAAAGCGAGAGAGAA -AGAGAAAAGAGGTGATCAAGTGGTATTGCCTATAACTTTTACCTATGTACTTCAATGTAG -TTTGTAGCCATTTTATACTCCGGACGTACAAATATCAATGTCTTAACGCGATCGTCAAAT -AACGCTAGAAACATGAGCGGGTCAAAATTACAAGCTAACTTGGTAATGAGAAACAACCCC -TGGCAATCGAACCAATCATATAATTGAATATATGAAGGCTGTGCGAGGGTCAGGTTCCAA -GTACATGACAGGTTGCGGTGTGTGATAGTGGTGGATGAAGTACCTTGTACCAACAGTCTT -TGTTTCTTTTCCCCTTTTTTAGGGTCCCAAAATGGGCCAAAACAACGAGAAGCAAGTGGC -TGCTGATCTTGACCCTAATCTCCCCGTTCATACAAAGGCCCCCTGCCTCCCCCGTGCTGT -TTCTGTTTCTTTCTTCTCTCCAAGTACATTCTTCGCATATTCACTCTCTATCGACTTCGG -TCTTCCACTCTTTAAAAACCAACACTCTCTTTCAAAAGCTCCGCTTTTTATATATACCTT -TTTGAACATCACTTTCTACGTTTTTCGTACATAACATTCAAAATGCGTTTCACTGCTGTC -ACTGTCGCCTTCTTCGCCGGTCTGGCCATCGCTGCCCCCGGTGCTGACAAGACCGTCTAC -GAGACCGATGAGGTGACCATCACCTCTTGCGCTCCCACCGTCACCGACTGCCCCGGCAAC -GGTGCCGGTGTTGAGCccactgccagcaccacccccaccagcgcccccgtcgctatcacc -accTCTGCTGGTCAGGCCTCCGAGACCCCCGTCCAGTCTTCCGAGACCCCTGCCTGGTCC -TCCGTCCCCACTTGGGCTCCTTCCAGCGTCTCCAGCGCCCCCGCTCCCCCTGCTGAGACC -GCCCCTGCTCCCCCTGCCGAGACCGCCCCTGTCTCCTCCACCGTGGTTGCTATCACCACC -TGCGTGCCCACCGTCATCTACTCGACCGTCCCCGTTGCCCCCAGCTCCACCGCCGTCGGT -AGCAGCAGCGTCGTCGTCCCCCACGGTCCCACCGGTGGTGTCCCCCACGTGCCCTCCGGC -AGCAAGGGTGTTCCCACTGGCACTGCCTCTACCACCCCCGCCACCTCCTCCCCTGCCTTC -AACGGTGCTGGTGCTCTCAGCGGTTCCCTCGGCTTTGCCGGTGCTGCCGCCGCTGCCGCC -TTCTTCCTGGCATAAATGACTTCTTCCGGGTTGATGATGCACCTATATACCTAGCTGGAT -CCCATGGTTCTAGCCCTGGCGTAGTTGTGTCAAACTGGAGAGGTTGATTAGAGTTGGTGC -GAGATGAGGATTGTGTTATTTAGGGTTTCTGGTGATTTACTCTTAACAATTGTTGCTTTT -TGACTGGATATGTCTATCCATTTCTTGCCTTTTTGATATACATATTTCTACTTCTTCTCG -GGCTCTCCTGGATAGCTTCGGAGTTGTGCGAAGCTCGCTTGGTGGCTTGAGCACGACCTT -AATGTAAACCATGACAGCTTTTCACATTCTCCCTCCAGCGCCCGTAGCCTATTTAGAATA -TCTCCTTTTTGCACTTTAATTATATAGTTTGCTCTATATAGCCTTGTCAACGTGTTGATA -TTCACAGTTCCTTGTGGGATATAGATTGATATCATTGACCATTGTAGGTGACCCTTTGGA -GCATTCCAAGCTTTTCCGCCCCTCACTCCACGTGCCACTCAAATTACTTTACTTCATAGG -GCTGAATATGCCACTTAGAGTCGTCAACCCACCCAATTAGGTGGTAGTATTCCAAATTCA -AACCCAACTACTGTTGGTGCCGAACGCAAGTTGACACTTGACAGCTTTGAGCCTCACAAT -CACAATACCCATGATTTAAATCAGCTTCACTGAGGACATACATATATATGATTGTCTTAT -GTGCACACTGAGTTTATGTTTGCAAATGGTTCATATAGATATGCCACTCAAGGTGCTTAA -TACATGTTGCTCTTGAATGGCGACTATTGTGTCTTACTGATTCTTAGTCCCCTCAATCTC -CCTGCGTGCTTTGTTGTTTACTATCATTGAGAAGTTCATAACAGTCCTTCCTTCTTAAGA -CTTGTGAGCTTGGTGTTATTCGTGCATAGAGTGTTGAGTACAATGTAGACAAAGACAGTA -TAACCTATCATATATGGAAAAGTCGATATTTTTCGCCACCCATATCTTGTATCTTGCGGC -TTATTTTTCTATACCAGTCCTTCACTACAGGTTTGATACAAGGCGAGGGTATGGTTTTCT -TTATTTTACGAAACCTACATATGAAAAGACTACTATAATAGACACATGGCAATTTTGTAT -AGGCAGACTATTCTTGTATGATATACTCCGTGCAACAGACGAAGTACGGAGTTCAGGCAC -ACATGAGCATAAAAGCTGTTTGAAGTCAATTATTGGCAGCCAAAAATAGAGACCCCCACT -CGCGAGTTTAGTGGCTGAATCGCAAGTCGTATAGTTTATTGAGGATCGAGCGGCCTCTCG -CCCTCCAATTAAGTGCCATTAACGGACACTCTCACTCAAATGAACGAAAGAGAAGAATAA -TCAAGGCACTGTATAATACCCGCGATGGCTCTCATACTTCGCACATCTTATTCGACTCCT -GAATATGTGGATCTTGAGCCCCTTCTCGATGCGACATACAGACCTTGGCAGAGTTGGCCT -TCGTACAGCCCCCGTTGCTATTTTCGCAAGACACATTGCAAAGCCCGAACGTAAATAGTA -AGTTGAGGTCATTGCAGTGATAATAGATGCATAGCCCCTGTGTGGATAAATTTGGACCCC -AAATTGTATCCCGAGCAACCAACCAATTTTAGGATTATGGTGGATTTATCTTGATATCCT -TTGTTTTATCCTTGTGGGACTCCATGACCATTGTCCTGAAAAGGTCCTGTTTGATTTCGC -TTCTGGGTAATTGATTTTCATTGATTATTTCGGATGTCCGAGGGACAGCCAAGAACGATG -AAAGATCATCAGTGTTTTTTTAGCCCTAAGAGAATCACCACTGAACTGCATGTGAATCCT -TTGATTTGCCCCCTCTTTGTGTATACCCTGCACAACATGTGTTATAATCCCTCTCTGGGG -AAGAGGGACTGCCCTTTTAGAGCTATCTGCCCCAGGAAGGAGGGCTTTCATGCCAAGTTC -TGTTATATTCTACCTTTGGATACGAGCCTTCCGTGACGGCCCACTATCGAACCCTTCCAG -AGCATAGTTCAAGTCTAAAGATAGTATACCAAGCCATACGATATCTGATCTACTTGTCAA -AATCCATATAGAGTAATCCACCGCACAATGGTTTTCTGTCCATTTCAGCGACGAGCTTCC -GAGCCTCCCAAACCACCCTACGATTTCAACTTTCCTCTCATGGATCTCTCATCCCTCGAG -GGCGCGCCGATGGTTAATCCAAGATGCATTGTTCCGCCAGATGCCACGACTCCCCATATC -CATACAAACTCGCGTTCTATTCAATATTTGCTCTCAAAACCTTTGCCAGCGAGGCCTGCT -TCAATAGACGGTGTCTCGCCAACACAGCATCGTAATGAACTACATGATTGGATACCCAAT -CGTTCTAAGCTCGGTGGCAATCTAAGGCGGGCTTCGGCCTGTAGACGTGCGCGCGATCCA -TCGGACCAGCTGTCTCCCAACCATGATTCACAATATGAATGCAAGATGCGCCCACGTATA -CAACATACTCCACATCCTCTTGCACGACCAAGAAGCTGTGTACCCCTCACTTGGCTCGAA -GATGAAAAGAAATGGATTGTTGGGGATATATATATACCACCCACACATGAGTCTCGGACC -CAAGATGATATGGCGTCTACTCGATCGGCAGTCTCACCAATTTCGCCCATTTCGCCAAAT -ACACCATGGCAGGACGTATTTCAGCGCCTTGATGAGCACATCGAATTCAATGATCGCCTT -TGTCAGGAGAATCTGCTGAACCAGCCCCAGTTCTATGATCAACACAGCTTTGGTCGGACA -CATGACACAGATATAAATGATCGAGTTTCCAGCTGGGTTGCAACTACTCAAAGGATGCAT -GGAGATCGACGGAATTGGATGTGATTCAGCGTGGTAGTGGGTTGCTTGTTTAGCGTTATA -CCTTTTCAGATATATCATGGGAACATATTGGTTTGAAGCTTGGCTCACACATCTATGGAT -AATGCCAGAGTATAGTGTTGAGCAAAGCAGTTTTGATTTCCATTTGGGACAGACTTTTGT -AGAAGAAGCTGGGAATTGTTTATGGAGAAAGGGGTTATAATCTCAGACTGTCTAACTCTT -GGGTTAGCTAATTTCTCCCAATGGCTCAGGGGCTGCCATGATTCCAAATTATGAAGCTGT -ATCCCAATATGTAGGGTGAGGAGTCCATAAGATTTCTTTCTGCGTGTTTCTAGAGTATAA -AGGCCCCCTTCAGAAAAGAAAAACGCTAACAGGCCTGCCGTTACAGCGGGGGTTCGTTTT -AGACAAAGGCATCCAACCCATTTTGACTCGAAATCCCGAGTAATCCTTCGATGCTGTGTC -ACAAGATGACAAAATTGAAGGGCAGTGATAGCCATGTCTTTGACAGGTGTCGGATCTCCA -GCCCGTTATCCAATACGTCGAGCGTTGGGGGCTTGCCCTCCGCCCAATACTATATGCATG -TCGAGGGGACCTTCGGTGGATACAGAGGGAGAAAAAGGAGCGGGATTCTAATCAAACATA -GAAACCAGGTATACAAAGACTCGATCAAAGACAGGACAAGGCTATCATGGTAATAAAGCG -ACTTTGTTTTCATCCCAAATCGGGATTCCATTGTGTCTGGATGTTTGCTATATAAGGAGA -TCTTCTCTCGGGCTTGTTCAATAGAAAAGCACCTATACACAAGTTCCATTCAATTCAATT -ACTTATCTTAACAAACATGGGACAGGCCAGTGTATTCATCATACCCTCTGAGTTTTTGAG -AGGGTTGGAGGGCTTTGAGGATGAGGACTGGGTCGATGCAGGCCCCCAAGTCTTTAAGGA -TCCAGATAACGACGAAGCTCACTGGATTACTATCTTTATTCCTCAGGAAATGAAACACAC -TCACAGCGATGAAGAGATCATGGAGTACATGCTTCATAATATTCGAGCCCGGCGAGCTAT -CGTGAGTACATCGGCACAGCATAACCTAGGAATCAGTTGTATCTCGATCCCGGTCCCGAT -TAGATATGATAGCACGGCAACAGTCTTGTCCCGCGCCGAGGCCGATGTGAGCAAGTGGCT -CGAACTAGTTTGTGGTGGGACCGTTACTCTTGACCGGGAGGTGATTTTTGGCCGGTAAAC -CGTGGGCTATCTGGCCGTAGCATTTTAGAGATATTGATTTGCGATATTGTCGACACTAAG -AATATGGTCAAATAAGAGATTGCTCAGGTGGGATCCGAGAAATGTGTCAAATATCAAATC -TTTTTGAAGCCGGGTCCAACCGTTAACAAGCTCAACTGTTATTGGTCCCTCGTAATATTG -GCTTTTGGATGGAAATGCGCTATCGGCACAACGGGCAGTTATGGCGATCTCCACTAGGCC -ATATGAAATTCAATAAAAGAACTCTCAATATTGCTCTGCTAGTACGTAACTCTTAAAGCA -GTAAGGACAAAATGATTAACCGTGGGAATACCGTGAACTGAAGCCATGATAACGCATTTT -AGCTCTGGGGATTTATATATCTGGACGGAATGACCCTGTACTTATCGAAGGAATTCCCGT -CACTCTGTTGTTATAACTTATACTTCTCTCATTCCCTGCCCATTTACCAGGCCTTATATT -ACTTATTTTCAATTTGCCCAGCTCTATCCCTAAGTTTTCAACTTCTTATTGGTAAATCAG -ACCTATGACACTGTAAGACAGGTGTTCACTTATGTTTCGCTCCCAAGAGATGATTTTATA -CCAGAGTCCTACTGGAGCCAAGACTTTGATATGTGGAAATATACCCTCGCACGGGAGAGC -CGAGATGTAGCTAAGGCCCTGACCAAATGTATATGAGTACATAATAGTGTGAGTAATATA -GGTTCCCATGCAGGGGCTCCCCTCTCTGAGACGCCAGTCACGGCACTGCTGTATGCCCGT -CACCCGTCAGCATAGAAAACTAGCAAGCTGCATGTTGGATCCACCGCTCCACCTCGAGTT -ATCGACAGCCTCGCTAACCACCCCTGCAGATTTCGTTGAGGGGGTGATGGGAGTGCAGGA -TGATCATAAGAGGATGGAAGATTCTCGAATCACTTCGACATGTTTTTTCTTATTTTATCT -CGCGGCTGGGTATCATCCCAGTCGAGGAATGAGTGACTCCATTACTCCAATTCTTTTGCG -CGCTAACACAGACCAGCAGCTCTCCAGCCTCTGGATTATTTACCAGCCAGTAGCAAAGAT -GGGACGTATGATAGAATATAAAAAAAATTATATATCTTTATTATATATTATAAGAGGACG -TGCCCGACGCATTTCTTCTAGTTTTCATCCATCTGACACCAAGGCCATACCCACTCAACA -TCCCTTTTTTTTCTGTGACATTATGACAAGGAACAAAGAGTGAAACCAGACATGGCTCAA -ACTTGAATTGAAATTCTTCCCAGTTGAGGCAGCAAAAGTTCATTGATATTCCCAAATTCC -CCCCAAATCCGTGGTTCACATACCTGAGGAATCTCCCCGATTCTACCGCCTTCGATTAGA -AAAAAAAAAAAAAATCCTCCCCACCCCAAATGGTTCAACTCGCCCACATCTTCAAAAAGG -ATAAGGATAAGGAGAAGACTGAGAAGAACCCTGATATACGTTCCCGTTCCTTCTCCGCTC -GCACATCTCCTACCTCCAAATCCCCATCCAAATCCCCGTCTAAATCCTCTTCCCCCACCA -AATCTACCAAACCCTCCAAATCTTCCAAATCCTATTTTTCCCACAGTCGGTCATCCTCCA -ACTCCGGACCGACCGCCAATTTCAAGTCACGGTCTTCGCGTGACCAGGACGTACACCCAC -TTAATCTCCCACCTGACGAGTTACGTCGTCGATTGTCCGCTATGGCAGCATCCAATGAAG -AGCAGCGGAGCTCAATGGATATCGATTCTCAGGAACCTCAGAATAATGGCGTGAAAGGTA -CTGAGGAACGGAGTCCGACCCCACCACCGCATCAGCCGAATTCAACTTCGGCCGACGAGG -CCGACTCCTTCAAGCTGGCCGGAAACAAGTTTTTCAAGGATGGCAACTATCGACGAGCTA -TCGAAGAATACACCAAGGGTACGCATTTTCTTATCTCACAGGGACGCAATGTCTCATGAA -AATTGACCTTGAATGAGCGGGGTCGCTGATCATTTTTTTATCTACTGCAGCCATCGAGAT -CAATCCTAACTCATCGGCCTATCTCTCTAACCGAGCGGCGGCCTACATGTCCGCTAAACA -ATTTGTCAATGCGCTTGAGGATGTTCAGCGCTCCAACGAGCTCGATCCTAACAACCCAAA -GATCATGCACCGATGGGCTAAGATCCTGACGAGCTTGGGTCGACCCGCAGAGGCCCTAGA -GGTGCTATCACGAATCCAGCCACCAGTGGCCGCCACTGACCGGGCCGCCGCAGAGAAGAT -GCTGCGCTTCGTGAAACAAGCCGAAGAGACATTAGCAGAAGACCGTGGTGTGTCAATGGT -AATCTATTGTCTAGACCAGGCTCGACAAGGGCTCGGACAAGGTGTCAAGGAGCCTCGCAA -GTGGACTTTGTTGGCCGCCGAGGCTCATCTGAAGTTGAACAACGTCAACTCGCTGGGCAA -GGCCCAGGACATTGCAATTTCCTTGCTCCGCGAAAATAGCCAGGACCCGGATGCCATGAT -GATCCGTGCTCGCGCCTTCTATGCCCTCGGTGAGACCGAGCAGGCGCAGAAGCTTCTGAA -GCTTTGCCTCGGGCTGGACCCAGACATGAAGCAAGCCATCAAGCTGCTGCGCGTTGTGCA -AAAGCTGGCTCGCACAAAGGAGGAAGGAAACACTGCTTTCAAGGCCAAGGATTATCACCG -TGCTATCGAGCTTTGGGCCCAGGCTCTCGAGGTTGACCCTTCTAACAAGGACATGAATGC -CAAGATTCTGGGCAACCGTGCTCAGGCGTATATCAATTTGAAGGAGTATGATTCGGCCAT -CCAGGATTGTACCGAAGCTCTCCGCCTTGACCCCGGCTATGTCAAGGCTATGAAGTGCCG -TGCCAAGGCCAACGGCAAGGCCGGTAACTGGGAAGAGGCTATTCGTGACTATAAGAGCGT -TGCTGAGAACAACCCCGGCGAGAGCGGCATTCAGGAGGAGATCCGCGAAGCTGAATTCGA -GCTGAAGAAGTCCCAACGCAAGGACTATTATAAGATTTTGGGAGTCGACAAGGATGCCAG -CGATCAGGAGATCAAGAAGGCCTACCGCAAGCTGGCTATTTTATACCACCCAGATAAGAA -CCGCGACGGTGCAGCGGGCGATGAGAAGTTCAAGGAAATTGGTGAAGCCTATGAGAACTT -GATTGATCCTCAGTAAGTCTGCCACAACTCTTAACACCATTCGACACAGCTAATTTTATT -TTCAGGAAACGTGCTGCCTTTGATAACGGAGACGATTTGAGGGACCCAGCTGATATGTTT -ggcggcggcggcttccacagtgggggcttccacagcgcaggcggcttccccggcggtggt -ggtttccctggcggcggctttggcggcggcttcggtggcggcggcttcggtggcggcgTT -CAAATCGATCCCGAAATGCTCAAAAACATGATGAACGGCGGTGGCGGTTTCGGTGGTTTC -TAGATGAAGCCACCCATTCCGTTCTATTCACTCGATACCACCGATTCTTTTTTTCTCTCG -TCCCAAGATGACGAACCCCAAAAATAGGAGTTCGCATGTCTTCCACCGGAGTTGGAGTTG -AACCTCGATATCACATTGATCTGTTGGATATTGGGGATCACTTTTTGACAACTACCACAT -CACGACTTTTTTTCATCATGTTTCCGCGGAGACGCCATATGATTCCCTATATGATAGATT -TGATGTCCTTTTTTCTTCGGTCTAATCTTTACTTCCGCGTTTGCCTTTTTCCCCATGGTG -GCCCCTTTCAGAGAATTTGTATACAAAGTGCTGCAATCTGGAATGGATATACGAGAACGG -TGCATTTATTGGACAATGGGGTTTGAAATCCATATGCACAGAAGACCTACAAGATCTACA -TCCGATTGATCATTTTCGGCGTTTGTCTATTGTCTCGTTCTTCTCGTGGGCTTCGTTCGC -TCTTGACTTGAAGATTTGATTTGCATCCATAGATGTTTTGGCCCTCGATGCAATTTGTAA -GGTTGCAACTCTTGGTTCCGTATAGCCAGGTAAAACCATCTGCGGCATATACGCCTGCAT -GACACCCGTGAAGCAAAGATTTCTTTCAGATATGATTCTTTCTCTTATACTCCGTAGCTT -CGTATTGACTGTTCCTTCTGATGCTTCTATCGATTCAATTCAAGACCTATACCACTCGGA -GTAACGTAATCATGTAGGTGACTTGATCGGTCCATGCTTCCAACGTATCTAAGTACATTG -AATTTGAAAAACAATCAAGACTATCGTAAAAAGATCGTATCATCTGCGCCTCATCAGAAA -TATAGCAGTCGATGAGACGGCATTCGATCAAAGGTGAGCGTTTAGGGTTGTCATCTTTGG -CCAACCTGAGCACCAATTGGCAGTAGGTTTCCATTGCAAGATATCAATACTGGTCTATAG -GAAACTGGGAAATCTTGCTTGGTTCTGGACTCGTGTATACGTTTCGAGGGGCAAATCCCA -ATTCCACACCGGCGTGAGAGGAAGAGGACAGCTGATCTAGGTCGATCCTCCGGCTAGGAC -TCGGCGAGGGATTGATAGGCGAATATGCTGTTCATGTTAGTCTATGATGAATGAGATGGG -CTAAGCAAAGTACATATAATTCGAGAGTCAACTTTGAATCCAAGAAAAACTAGGGAAAGC -TTTGGTTAGGTATCTTTCTTAGGATGGAATGCGTTGATCTCCGAATATATCATCTCTGCT -CCATTCGTCCACCGCTATGCAGGGCAGTAAAAAACGTCACTCATTTTGTGTCTGTCTGAT -AAATGTGTCTGTCTGATAAAGGGTAGGTCATGCTATGTATTCATAATAAACAATATAAGC -AGTCGTAATGTTCTGTGATATGTACTTCTCAATATGCTGAACAACTGTACTAGTAAATAT -AAAACAAATAAATCAGGGGAATCTCAAAAAAAGGGCGGGGGGGATCAAGACAAGAGTAGC -GAATGAAGAAACCGGACAGAAGAGCATAATGCAAACTCCGCCGCTCATGAGGGATGACAT -TCAAGTCGAACCGAATACAAGAAGGGGCATGTAGAGGAAAAAATTGTAAATATAAAGAAG -ATGCAAGAAAAAAAGGGAGGATAGTGGCATAACATGTCTTTAATCAGACATAACCGAGAG -CACTGAGCATAGTTTTGCATTCAGGGACCGGAAGAGCGGAGACACCCAGCGCGCGGACCA -GAGCACCAGCCCGGAGCCGCTGGAGACCGATGGCGACCTTCCAGGCCTCAGTGTCCTGCA -ACACGAAGACATACTTCTTGCCTTCTTGGTCACGAACACTGGCACGGAGAACGTGCAAAC -CACGTGGCTCCGCCTGGGGCGGGTGAACAGGACCACCATTATGGCTGGCAGTCGTGCTGC -GCGCAAAACACGCACTGGGCGAAGCCAGACGGCAAGTATTCTTGCATACTTTGAGCAAGT -TGCTAAGAAAGGCCTCTGACTCTTCGTTGGTGGGAACGTTGTTCCCACGACGGAAAGAGA -AAGGGTTGCTGGACGCAGTTTTAGCGGATTGAGTGGCATTTACCTGTGGACACCACACCT -CCGCTAGCTTGAGGGTCCATGCCTTGACGTCCCACTCACGTTGGCCAACACCGCGTAGGT -CATCGTCGATGAAGAGACCTTCGGTTGTGATGCGCAGCACCAGCCCGTCGGTGACCATGC -CCTGACCTGATACAAGTTGCGCCACTGGGGTCTTCATAAAGGCATTGTTGAAGGCGTATA -CACCTTGTATCACAATCTTATCTGGATCCTTTGTGGAGTATGTACTGCGATGTGAAGGCG -GGGGCGTTGTGCGCGCCGTCGAGTGAGAAGGTGTATATGAAGACTCAGAGTACACCGAGG -TTCCAGCCGCATACGGCCGCCGTGGTCGGTCGTAGGGGCGAAAGGAGTACGCGTCGTCTT -CATTAGCGGTTTTGGCGGTTGCGGAGGGCGGTGGTTTCTTAGGAGAAGGTGTGCGAGGCA -CATTGCCAGTACTGGCTGGCTTGGGCGGTGGTGCTTTAGTAGCGGCCTCTTTCTTCGCGT -CTTGCGCCTTCTTCTCGGCGTATTTCTTGCTTGCCGCTTCCTTGAGCGCCGCAAATTTGG -CCATTGCTTCTTTCTGGGCGGCGGCCTTTGCGGCAGCTTCTTTCTCAGCAGCTTCTTTCG -CGGCCTTCTCTTTGgcttccgcttccagcctttccgcctttgccttcgcctcagcttcgg -ccttcgcttGTCTCTCGCGGGCCTCTGCCGCTTCCTTCTCCAGTCGATCCCGTGCCTCTT -TCTCTGCAGCGGCCCTTGCAGCGGCCATCTCCTTTTCGAGCTGCTCACGGCGTTCATTCA -ACTCCTTCTCCATTGCTTCGCGCTCGCGCTTTTTCTCTTTCTCTTTGGCCTCTGCCTCAC -GCTTGCGCTTGAACTCCTCCATCTTTCGGCGTAGCTCCTCTTTGCGCTTCGTTTCTTCTC -TTGCTCGCTCCCGTTCAGCTTCCTCTTTGGCCCGTTCCCTAGCCCGCTCTTCTCTAGCAC -GCTCCTCCTTGACGCGTTCTTCTCTGGCACGTTCCTCCTTGACGCGTTCCTCCCTAGCTC -GCTCTTGCTTAGCTCGCTCTTCTCTAGCACGTTCTTCCCTAGCTCGTTCTTCTTTAATGC -GCTCTTCCCTAGCACGCTCCTCCCTAGCACGCTGTTCTCTAGCACGCTGCTCCCTAGCGC -GCTCCTCCTTCGCTCGCTCCTCCTTAGCGCGCTCCTCTCTAGCTCGCTCCTCCTTAGCGC -GCTCCTCTCTAGCTCGCTCCTCCTTAGCTCGTTCTTCTTTGGCGCGCTTTTCAGCCTCCT -CCTTTTCACGTTTGTTATTATCATAGTTTGGAGGAGGACCAGAGCCATGTTGATTACCGG -AATATTGGTTGCCGTGGTTTTGTCCCCCATTGAAGTTTCCCCCTTGTTGTCTTTGCTGAC -CTGCCCCGTTGTTGCTATAGTTGGCACCGGACGACTGTTGGTTGGTGTTTGGACCGGCGT -TCCTAGGGCCACCCGGGCCACCAGGACCGCCTGGGCCCCAAGAACCACCACCGAAGCCAC -CAAATCCAAACCGCCCACCTCGAGCACTGCTATCGCTTTTATTGTCTCCCGAGCTGCCGG -ACTGAGCAGAGCCAAAAAGCCATCCAGCGACACCGATTCCTACTGCAAGTACCGCCATAC -CCGTGCCACTGCCACTAGCGACCGTGGCTAGATACCATGCCTTCACAATGGGTGATATAT -ACGACCACACGAAACATGCGAGGAGTGTGCTTGCGACGCCCGCTAGGAAACTGTTCCGGT -TGCACTGCAGTGACGGAAGATAGAGGAAGCCCGAGCCAGCGGAGTGAGGTGGGACGATAA -AGATGTCGTAATTGCCATCCGGCAGGCCCGAGGATTTGCGATATTCGCCTTCAGTCGTAT -GGGGATTTTCACCATGAGGTGGTGGCGGCGGCGGCGGACCATCCATGTTGGATCAGCCCG -GAGTGACAATAAGGTTAATATATATGTTGACGGAGTGCAGTGATGCGCAGCGTCAGTTGA -AAGAATAGCTACAACACTGGGTTGATCTGCCGGATCAATCCTAGATGGACAAGTTCTCGC -GGTTTAGAGTCAACTGCCAGGTCATATTAGTCTGACAAGTGTTGCACTGAGGCCTCAGGC -GCTCAGTGCTTTGGAGGGTACTCAGATGCCAAACCAAGAAGAAAACACACAGCAAGTATC -AACAATCCAAGGGAAATTCAAATCACAACGCAAAAAGTTGAAAGTGCGGGAGGACCTAAG -TCTGACTCGGGGGGGTTTTGTTTGGAAGAGAAGTGTGGGAGCTAAGGATCGGCGCCAACG -CTTCCTGCATCACTCGGCGTGGCCATCACATCAAAAACGAAATTTGGCAATGCGAACGAG -CAAACAAACGAGAGAGATACCCAGAATTCTCGATGCGGCTCCCCATCTATTATCCGAACT -ATGTTGAATTCCTCGGAAGCCACGCGAATCTACGTAGGGCTATGTGACCGAGCCTAGAAA -CAGCCTGAAGGGATGGAATTACAAGAACAATTCCAACGAGATATTTTGCCTATGGCAATG -GAATTGTTCAGAGTTATGACTACACAAGTGTTTTTTCTTGAGCATATGTCTTTTAATAGC -TTTTTAAACCTGCAAAAGCATCAATGCTCAGAGTCCTGTCAACATGGCAAAAGTACGGCT -GTTTTGTTTTGATTTTACCCTTTTGGGAAATGCCTTGACTATGCGGAATATCATTCTTCC -AAGTTCTGACTGGCGCTTTGATTTCTTGGCTTATACTGAGAGATGAAGCAGACAGAGGCA -AATAGTCTAGGTTCAGAGATATATGTTATCTCTTATGCTTCTACATCAATAGATTATGCA -ACAATTTGAGGTTATTTCCCTACTGTGTAGTTGAGAAAGATTCGTATTCGCATTCGGCCA -ACTTGAGCCTCCCAGCACATTTTGAAAAGACTCGGGACGAAGTACCACAGTATTCATACC -GTTTTGAGAAGAGTACAACCATGCGACCTTCCTGTAATTCTTATAAGCGATTGCCACTGA -TTTGTATTTTTATATCCCATTATATCTCAAGCTGCGCACCGGCGCAAAGAACACTTGGCC -CAAGTACTCCATATGTCTCTCAAATGTTTTGGCCCCAACGACACTTAGTGCTGGGGGCAG -AGCGCTAGAACCCTTGTTGATCTTGCGGTCAAGCGGTTGGGGGAGATAGTTGAACGTAGT -GGGTCCAAGGTATTCGAGTTGGAGGACGTACCACCGAGGCTTGTTCACTGGATTAGAGCT -GATAGTGCGGGGAGTGTGGTAAAAATCGACTGCGGTCATAGTTTAGGTTAAAAAAAAGTG -TCAACCAGCTATAAGTCAAGGTCACCGTCCACGGGTATGGTGGAAGCGCCGTGATATATG -TCCCATGATCGGGGCATGAGGTGCGGGACCATGACCAGGTAGTAGAGGATGCCGGGCCTT -ATGAGAGGCACTTGAGGTCATCCTGGCCCACTTAGGCCATGCTGAACACTGGATATTATT -TTGGCAGTGGGACAATGCAGTGAAAGTAATTTAGGAGAAGTGAATGCGAGAGACAACTCG -ATGGTGAGGAACAACCTTGACTCGGCAGAACGTGATCGGCAACCGATGATTGGGTGGCCT -TCCACTTGAGATTCCAACACCCTCTCTCTCTCCATCGGGGTGTTGATCGTCTTCTTTCTA -TTTTACCTCAGGTTCAAATCTTCTTGTATCCCCCCTGTGGTAGTCTGGTAGTCAGGTACT -ACCCCGTGTCTCCGCCTCCCCGCCCAGCTGTCCTTTTGGTCCCTCACCCAGGCACCTATC -TCCGCCAACCTTGAAACCCCCCAATCTCCGTCTTACTCTCTATGCCGTGTCGCTAGCCAC -CGAGAACCCATTTACGACCTGCCCTTGAAGGGCATCCCGAGATCATGTCCTCACAATCCC -ACCACGGTCATGCCGCGCCGTCACGTCAATTCCCCGTCTACAACCCCGTCGCCGCAGTGA -CAGCACCTGCGGGGACCTTGCTTCCGGGCACCAAAATTCAAGTCGGCAGCCATCGCGTGG -TAGTTGAGAGGTACCTATCCGAAGGTGGGTTTGCTCATGTCTACGTGGTGCGATTGCCTC -AGCCAGTCAATGGGTCCGAAACCGCAGTGCTGAAGCGGGTGGCTGTGCCTGACAAGGCGG -CCCTTGCCAACATGCGCACTGAGGTGGAAACGATGAAGAAGCTCAAGGGCCACCGGCACA -TTGTGAAGTATATTGATTCGCATGCCTCGCAGCTGCGCGGAGGGGGATATGAGGTGTTCC -TGGTCATGGAGTACTGCGCAGGAGGTGGTCTTATCGACTTCATGAACACTCGGCTTCAGC -ACCGGTTGACAGAGCCGGAGATCATCAAAATCTTCTCCGATGTTGCTGAAGGCGTTGCAT -GCATGCATTATCTTAAACCACCTCTCCTCCATCGTGATCTGAAAGTTGAGAACGTTTTGA -TCTCAGGTAAGGGAAGTTCCGCCACTTACAAACTGTGTGATTTTGGATCATCGGCACCTC -CTCGTCCCGCTGCTACTTCCGCCGCCGAGGGCCGCTTGATTGAAGATGATGTGCAGCGGC -ATACAACGCTGCAGTACCGAAGCCCAGAGATGATTGACGTCTATCGGAAGCAACCGATTG -ACGAAAAGAGCGATATCTGGGCACTTGGTGTGTTTTTGTACAAATTATGCTATTATACAA -CCCCATTCGAAGAGGTTGGTCAGATGGCCATTCTTAACGCGACCTTCAAATACCCTTCCT -ACCCTGCATTTTCGAGTCGGCTCAAGCTATTTATTGGTAAGTTTGGAACTTCGATTTGGC -ATATAATCCACCATTGATGGCTGACACTCCGCACAGGCTCCATGTTAAAGGAAGATCCCC -GCAATCGTCCCAACATATATGAAGTCGTGCGTGAGGTGTGTAAGATGCAAGGAAAGGAAG -TTCCTATCAAAGACGTATGTTGCCACACCCCTCATAAGTGCAAACTTGACACTCACTGTC -ATTTAGATCTACACCAACCGGTCTGTTTCAGAGGCGCGCAAGTATCAAGAGCTACCCCCC -ACGTCGACGGAGGCTCCTGCGGTGGGCGCAGTATTTTCGCCTCCGATGCAGGAGACCGAG -ATAATCCCTGAGATTGCGCCCATGCGGCGTGGTCGACCTGGAAAGTCCCCGTCGTCCCAG -CCTACCTCTGAGAGACCGAGTCCTTCGCCCTATCGAGCCGCCCCTGAAGGCTCGTCAAAT -GATCCGTTTGCAGCCCTGGATGGGAGCGCAGCCCGAAAGAAGACGGCAGAGGAAATGTCA -AAACGATTCCCCTCCTTGGACCAGTTTGACATTTTGCATGAGAAAGGGGACAAGTTTGAA -TTCGAGCCAACAGTAGAATCCAAGGCCGAGGATGAGGATCTTTCCCGAAGACTCACCAAC -GCCCTAGCCGACGATGCTTTCGCCAGACGTGCCTCGCCTGAACGTGCGCCGAAGCCAGCC -TATAGGCAACCATCACAGGCGTCTCCTATCCCAGCAGCTAATTTGCGTGAAATTCCTGCG -CCGCAGGCTATCCCCTTGCACCAACCAACTCCTCAGCGGCCCGTGATGGTGTCGACGGGC -ACCATGACTTCACCCATCCAAACACCTCGTTTGCCGGAGCCAAAGCTGTCCAATCGCCCT -ATCTATCGATTCCCCTCGTCTGATAATGAACCCCGATCTTTTAACCAGCCGTTTACAACC -GAGGAAGAACAAAGAGTGACACGACCGCACAAGGCGCCATCTCCTCCGACGTCGAGCTTG -AATATGGAAAGAAGCTCGCGGCTGCCATCTGACCGTATATCCAGCCAGCCTAATTCTGCG -CGCCCCTCGATGGAGACCTTGAGACGGCCTTCGACATTGGAAGTAAATGATCCCGTGGGA -CGTTCTAAATCCGCCACTGGAAAGGTTCGGCCAATGTCAGTCCAGTCGGGAGCGAGGTAT -GACCTGCCGCGTGATTCTGAGAGCCCACGGTCTTCGCTGGATATGTCCCGCCTACAATAC -GAGGGTGGTGCGCCATTGCGGTCAGTGCGTACCGATGCAGATCGAGATTCTGATCGGACT -ATTTCTTCTGATGTCGATTACCTACGCGCGATGGAAGAAGAAGAAAGCAATCGAAAGCGC -GAGAAACGCTCTAGCGGTAGTCACAAACACAACAAGCGTGGTAGCCTGTCTACCTTGTCG -CTTTCGGGAGGCAAGAACCTATTTGCTAGTCGGTTCGGCGACGCATTCCGCCGATTCGAG -GCAGGTAACCAAGAAAAATCATCCTCACCGTCAGCAGAGGATGTGCCTCGGTATGGTCTG -ATTGGGGTGTCGGATTCAGCCGATGAGGGTACCTCACCAAATGACGATATTACCTTGGAA -GATGTGGATCGGGACGACATCTCACCAGAGATGCGCCGGGAACTGGAACGTCGCCGACTC -TCCCAAGAGGAGAAGCGAGTGACCAATGCCGCAGCGGAGTACCGGCGTCGGGTTGCCGAA -ACAGGTGACGGGGGTGGAAGAGTGATAGGTGATGGTGCTCGCTCTCGTACAATTCAGAGT -AAGGTGCAGTCACTGCTCGGAGAGTCCGAAAAACCGATCATTCCAAAGACTGCTACTGGG -TATGGACGGTTCACGGAGACTTCCTCTTCTTTGCAGGCAAAGCAGAGTGAGGTTAAACCC -ACTACCTCCAGCCAGCCGCCAGCTTCCCACACATCTGGCATGACTTACACTCCTCGTAAC -ACTTCGACAACACTCCCGGATCGCCGAGACGGCCCCAATATCTCTGCAGGTCTCCCGCAA -ACTGTCGCCCCTACCAGTTATCCAGCCACGCAACGTCCTCTAACTCGACCAGCTGCACCA -CCAAAGCCGAAGAATTTACGTGCTAGTGCAACCACTTCGCGACCAGGTACTGGTCATGGC -CATGGTACCTCCCAAGAGGCCCCGGCAAGTCCTGGAGAGGACTGGGAGGCTAAATTCAGT -CAGCGTTTCCCAAGCTTGTCCGGACTTGAAATGGAGACAGAGATCAAAATCCCGAAACTT -TCCAGCTTGAGGACGCGGGAAGTGTAGAGTACAGGATACAATCTTGTTTAATTCACCTGA -TACGGTATCAGCGTGTGTTCGTGTGTTCGCTGTGCGGGCGCAACTCGGGGAATGACATTG -GTTCCCACGAGGTATGCATATATAGACAGTGACATATTGGATATTACATAAATTCTCTTT -TAAACCCCGTGGACATGACTTCGAAAACACTGCCCCCATCTTTCATCGTAGCTAAGAGCG -CGTTTGACATCAACTACAAACGTTGTATACTTCAACCGTCCCCACACCGCCTTTGTTTCT -CCCCGCAAGAGTAAGAGAAACCTCACTTTCAGCTCGTGAGCCAAAACCTCTTTTCAATCT -TTATTAGATCATGGCGCCTGTAACTTTGAAAACAGTCGATGGTAAGTTACAAGAAGCAGA -TATCTCTGAGGGTACACAAGCTAATTCCTCACGTAGACGACCTCAAAGATGTGATCCAGC -ACCTATTCGAAATTCAATCTGCCGTCCACGGCTATCTGGGACCAGAGACACAGCAGGAGC -TTGTGCGAAAAATGTACGTCAGCTGCAATAGTATCACATCTCCCCTCGATTCCGCTATGC -CCCCAAGTGTTCCCCAAAAAAAGCACAAGTTTCCTCCGACAAGCAACGTTGCTGCACCCC -AGCCCTACACACCACCCAAAACTAATACAATCCAGCAAAAACCTAACTGTCGCCCTCTCG -ACCCTTTCAACACACACCCAACTCCCACCAACACAAGAAAATCAGTCAGACACCAATACC -GACCCCTCCAACCCCTCCCTCGCCAGCATCCAGCTCCCACCAGAGATAATCGACTACGTC -GACTCGGCGCGCAACCCGGATATTTACACGCGCGAGTTCGTCGAGCTCGTCCAGCGTGGT -AATCAGGACCTGCGCGGGAAGCGCGAAGCTTTTGCGAGCTTCCGCGACGTGCTGGCTCGC -GAGATGCGCAGCGCTATGCCTGAGTGTCGCGGAGAGGTTGATCGGGTGGTTGCTACGACg -ggtggggctgttgatgggcctgatgctggggctACATCTACACTTGGGGGTGGGAATTGA -GGCATTTTTATAGGTGCAAGTATGGAATTTTCTAATTGGTTGACTTGCCTCTTGTTGGCA -GGGGTCAATCTTGTGGGCTGAGATCTAGTCCAACCTTAACATTCAGTCCTTGCCTTCAGC -CATTCTGTTTGAAGTGCATGGAGCTGGTGCATTCATTGGAGTTTTTATGGTTCGCATCGA -GCGCGGCGCTTGCAAAGCTTTCTTTTTTTTTTTCCCTTTCGGGTAGCGCTTGCAGTAGGG -AGTTGTGATATCCGAGAAGTTTTTATGATGGCCTTTCAATTGTTGATTGATTTAAGCACC -AAGCCTAGCAGCTTTACCCATATGTAGGACAACGTAGTCAGCTAACTATCACTAGTTAGT -TAGTTATTTACCCATGGATACTCCATGCCTAATAAATAAGGCTTTTGAGATCATGCTATG -GCTGACGTCATCGGATGGACATGCCCAGGTGAAGCAAATCTCGGAAAGAGACTTTCCGGA -CCTCAAGCACAATGCCAACCCGAAGGAGCCGATCTGCAGCAGCATGTGTCCATTGTCGGC -AGAAAAAGGTGCGTATTCTGGGCCTCTATAGTTACGGAACTTGAACTCATCGGATTAAAT -AAGTAGTTGAAATGTGATGCCCAGAGGCCCCAGTGTGGGAATTGTCTAGCTAAAAGTATC -GATTGTCGGTTAGGCGTGGTCCAGAACAAGCCAAGGTACAAATTAGATACCTACGATCAA -ATTTCAGTTTACTGATGCTTTGCTTTTGGGCCGACCAACCACCGCATTGCGGAATTAGAA -AAGGAGAATGAAGTCCTTCGACAAAAATGTGCCCAGCTCCGGGGCAGCAACACCATTTCT -GCTGCAAGGGTCCAATCCGAGCTGGAAAATCCTGTTGCACCAGCAAGCAGTCCATGTACT -TCATTGTCATCCTCAAGAGTAGCCGGTATTGGCCTCTCAAGGAATGACTTTCCCTCCACT -TCAACTAGAGATGCATCGCACCCGGAGGTACCAGAGAACAACCGCAGAAACACTTCATTG -TATCACGGCCCAACAAGTACGGTATATGATGATACGAGTCCAGATCACAACGAGCAGAAC -AGACATGGTCAATCCAACGAAGAGGGAATACGGCATTTTCTCTTTTCCCAGACAGCGCGA -CAGAGTAAGTCAGGTCGCATGTAACACTTGATAATATTATGGTACTAATTGAGCAACGAC -AGGACAGCTAGAGCCACTCAATCTTGCCGCCGGAAAACTCGACTTTGATGGTACGAATCC -GGAAATTGGCATGCATCTACTCTCTATATACTGGAGTCGTCAACTCTACACGGCACAGAT -AATCTATCGACCGGCTTTCATGCGGGACATGGCCTGCGAGGGACCATACTTTTCCAAATT -GCTCTTGAACGCGATATTTTTCGTGGTCTCGAAGCACTGCGACCGACCAGAGCTTCGGTC -CGACCCAAATGACATTACGACCGCAGGATGGAAATTCCGGCAACGCTTCACGCAGCTTTT -GCGAGATTGCTTCGATAAAAGTGAGATCACTACTCTCCAGGCATTGTTGATTATGTCCAA -TGCACTTTTCTCCAGATGTGATGAACGGAGTCTGTCGTGGCTATACGCTGGAAATGCATT -CAATATGTTTATTGATCTTGGTCTTCATGTGCTGCCTGATGTTGATAGTATCCCTGCCGA -GGAACTTGAAATTCGAAAACGAGTTCTCTGGGGAGCATACCGTGAGTGATCACTTCCCCG -TACCAGGGTATCTCCCACGGATTACCTCTAACTTCTCTGGCAGTAATTGATAAGATCCAG -TGTCTTTTTCAAGGGAGACCTCCACTTCTCAACCGTGTCAACTTGAGAGCGTCACTAAAT -TTTTTGGATGACTATGATGAACTTGAACCTTTCCAAGCTATAACATACACAACAATCAAG -CCTCGAGCTGTAGTTCCTTCACTGAATGTTTCTTTGCTCACCAATTTGTGCGGGCTGACA -ACGATTGTGGAACGTATTCTTCGTGAGATCTATTCGGAATCTCGAGAGTCAAATCTAGCC -CACAGAGCAAATATCTCGGAGGAAATCAAGTCACAACTGAGAATATGGCGCCAAAACTTG -CCACTTAGACTTGATTATCTCTCATTCCCAGAGCAGGCGGTTCTCCTGCCTCAATCCGCC -TGTCTTTTGTATGTTTCTTCGCACCAACCCATACTAATTACTAGTAACTAACTGTTCCAA -AGAGCCTTATTCAACGTGCTTATCATTTTGGTACATCGTCCTCTAATAATCGGCCACGAC -GGGGTCATCAATTCAACAACTGCCCACGAGTCAGTGAACGCATGTACATCAGCGGCGAAT -CAAATTGTTCAAATACTCCACGACTATTCGCAGCACTTTTCTCTAAGCAGTGCACCGTAC -ATGCTTTCTTATGCAACCTACATCAGTGCAACAATCCACGCACGGATTGTTGCCCAGAAA -GGGAGTAACTCAACCGTGTTCCAGTCACTTGTTCTTTGTCGGAATATTCTTTCTGAACAT -ACACGCCTTTATTCTGCAGCAGAAAAAGCCAAGGAAAATTTGGATAAGTTGATCTCTCAC -TTAGGGATCAGTACTACGGACGACAATCGGCGCATTGGAAGCCCTGGGGGCCCTGGGAGC -ACTGGGAACACCGTTCCCAGTGAGCACATGGTCATAGATGAGTCTATTAACGTTGCCAGG -GATGGCGTCGCAGACTGTGCACTTGAATTTGGATCCTCTATAATGAATCTGGAATTATCG -GACCTGGACCTGGACGCAATTGCTCAAGGCTTTCAGGTGGATGTCGAGTCGCATTCGTTC -TGGAATTCTCTGGTATGAATGGCCATATCCTCCCAACATGCCTACATAGATAGCCCTTCA -TGGTTCGTGCAATGAACAATAGCGAGACAAACAAAATTTGCTTCAATGGCATATATGATG -CACTAGCTACTGTAAGGTCTAGAGGTAGAGTTCTGCGAATTTTGAGCGACAATCTTCATA -TTACTCACCAACGAGCTAGTTAGGGCACCAAAATGAGTTATTGGAAAATGATATTCATGG -CAAACCAATTTAGTACAGACCCGATGTCATATTTCAGAGATAAATTGATATCCGGATAAT -CACATCCCGAGGGGGTAAAATCTCGGCTCATCTCCGTGACCACGCGGGGTTTTCTAACAA -ATTTTACGGAGTACGATTTGAATGATTCGACGCCGAATGAGCTCGGCAAGTGTCTCCCGA -CTACACTCCGCAATTTTAGGAGACAAATGAGGAGCCGTATATAATCCATTACCCCTCGAA -CAGGATGTCTATTCCACACTATCCCAATACATAGCGCTTACAGAACACCATGGCGTGTTC -TGACAAGAAGTCCTGTGAGGGCTGTTCGTGCAGTAATGAAGCCCAACCCCAACAAATCAA -CATCGAGGACTGTCTTAGTGAGTTGCAGGCCCTACGACGACGCAATCAGGAACTAGAAAC -GCGCATGAGAGTCACAAGCGAGGCAGATAATGCCCAAATCCCCGTCAAACAACCAGGACG -AACACTTCGTTCATCGGCTTGGTTCGACTGCCGCAGCAACCCCGGCATGACAGCTATCTA -CATGGAGCGGTATTTCAACTATGGTATCACCAAAGAAGAGCTCATGTCTGGTAAGCCGAT -GATTGGAATTGCGCAGACGGGCTCGGATATCGCGCCTTGTAATCGTCATCACATTGAACT -CTCTAAGCGTGTTCGTGAGGGTATCCGGACTGCTGGCGGGATTCCATTTGAGTTCCCCAC -ACATCCTATTCAGGAGACCTCGAGACGACCTACTGCTACCCTTGATCGTAACCTGGCGTA -TCTTGGCTTGGTAGAGATCTTGACGGGGTATTTCCTTGATGGTGTTGTGTTGCTTACTGG -GTGTGATAAGACAACCCCGGCTTGTTTGATGGCTGCGGCTACAATGGTTGGTATTAGACC -TGAAAGATCCCTTGACCCATGCTGATGCTTCTAGAACATCCCTGCTATATGCATGAACGT -GGGACCTATGCTCAACGGTTACTCCAAGGGAGCTTTGACCGGAGCGGGCACTGTACTGTG -GCATGGGCGAGAGCTCTACGCCACGGGTGAAATTGACGAAAATGAATTTATGGATTACGT -TGCCCGAGGGTAAGTCATTTTCAGACCCCCGATTAACGGTCACGGACTCTAAGACGACCT -CCAGAACCCCGTCAGTTGGCCACTGCAATACCATGGGAACTGCCTCGACGATGAATGCCC -TTGCAGAGTCCTTAGGAATGGCACTTCCCGGGTCAGCAGCCATTCCCGCTGCCTACCGTC -ACAGAGCCCAATGCGCGTATGAAACAGGCAAACAGATCGTGGAAATGGTAGAGGCAGACC -GGAAACCATGTGATCTCATGACACGTGAGGCCTTTGAGAATGCTATTGTAGCGAACGCTG -CAATCGGCGGCAGCACGAATGCTCCAATTCACATCAACGCTATCGCGCAGCACGCGGGCG -TCGAGGTGTCTATGGATGATTGGGATACTTTTGGGTCAACCATCCCGCTGCTGTTGAATA -TGCAACCCTCGGGGGAATATCTTGGTGAAGAATATTACCGTGCTGGAGGATTGCCAGCTA -TTATGGCTGAGCTATTGGACCAGGGCAAGCTGAACGGCGATGTGCTAACTTGCAACGGCA -AGACTCTTGCCGAGAATGTCCGCGGGAAGCATGCATGGGATCGGCGAGTCATTAGGCCAT -ACCACGATCCATTGATGAAAGACGCTGGATTCGCCCATTTGAAGGGTAATCTATTCGACT -CTGCAATCATGAAGACCTGTGTCATCTCACCGGCATTCCGGGAGCGATACCTCTCCAACC -CCGATGACCCGGAGGCATTCGAAGGATCAGTTGTGGTCTTTGACGGACCCGAAGACTACG -AGCATCGCCTAGAGACCACACCACACATTAACGAAAAGACCATTCTTGTTATGCGTGGTG -TTGGTCCAATAGGGTATCCTGGAGCAGCAGAGGTCGTTAACATGCATCCGCCAGGCCGAC -TCTTGAAAGAGGGTATTGATGGACTCTTCTGCATCGGAGATGGCAGACAGTCTGGAACCT -CTGGATCACCTTCTATCTTGAATGCTAGCCCCGAGGCCGCAGCAGGAGGCAACCTTGCTA -TTTTGAAAGATGGAGATAGACTACGCATTGACTTGCGCAAACGACGGGTTGATATTCTTA -TTACCGATGATGAGATCGAGCAACGAAGGAAGGCACTGGGCACAGAGGGATATCCCATTA -CCCCGAGTGGTACCCCGTGGCAGGAGATCTTCCGTCAGGAAACTGATCAGCTGAGCAATG -GCATGGTGTTACGGAAGGCAGTCAAATATCAGCGTCTAGCTCAAGAAGGCCAGCCACTGC -GCCATAATCATTGAAAAAAAAAAACATCGACAATAAGAAACGAAGATACTCAATGAGATC -CGCTTGTATTTGATCAACATAAACAGTATATCACTACAGCCCAAATGGATAAGTTTCTCC -CTTCCCAAGCGGTGTATCGGGCTGGATGACTTCCAAGGGTTGAACAGCCTTGGTCTCATC -AAACCAGACATAACCATCGAACTGCTTGTGAAGGAAAGCTTGCGAGTAGTGAGAGACACG -CTCTGTGTCAGGTCGGTAAATGACACCAATAAAACGCTGTAGGCGTTGCTCAGCAGCGAT -AGCCGAGCGCAGAGTGGGATCTATACGATCCTGACGCAGGTCGAGCACGAAGCGTGGGAC -CCCAGTATCATGAGCCACAATCTCCCAACTATCGTCCCGGGATGGACGGACAGACATGAC -CTCCATATCATCGTCCCACTCATGAGCAGCTGCAACTGTGCCAGTGTGAGTTCCGCACCC -AAGAATGGTCACATTCTCGCGTCCCAGTCGCTCACGACATAGTTGCCCAATGTTGAGCTC -CTTACGACGAAATCCCATGCTTGTATAGCGCGCATCCCCGACATGGGAGTTGTGCGCCCA -CACAATGGCTTTCTCTCCCTCGTTGTGCTTTAAGAGGCGTTCCAAAGTTTCAAACATGTG -CATGTCACGCAAGTTCCATGAACTCGCTGAGCTGTAGTACATTGCTTTGTAATACTTCTC -CGCGTCACGAACCAGGTGAGCATTCTGCTCGCCGCTGTGGTATTCATCGCCATCCCGCGA -ACCGATGTATTCAAGCCGTCGCTCCAAAAGATCTCCAAGCATCTTCATCACTGGGCCTTC -ACAATCTTCCATGCCTCGAAGGCATGCCAGACCATATGAAGTGGGATCGTCAACCCAAGG -CTCCAGACACCCATACCGCTGACGTGCTTGCTTGCTTGCCTGTGGGTCTACTCGGTCCAA -GTAGTTGACTACAGCACCGATAGAGGCACCCATGCTGTACAGGTCCAGCCCGTAGAATCC -AGCTCTTTGCTCTGGTGGGAGGGCCTGATTTCGATCACGCATCCATTCAACTAGCTGTTG -CATCTCATAATTTCTCCACATCCAGGTAGGAAATCGATTAAATGGTTCATATCCTTTTGC -ACTTCCTCCAATTTGCGCCTTGGGACCTGGGCGTAGGCGCACGTATCGGTCAATGGCTTC -CGCATCTGGCCAGTCTGCTTCGACTGCGACCATAGTATAGCCGTGCCGTTCGATGAGTCG -CTTTGTGATCTCTGCACGTGCAGAATAGAACTCAGATGTTCCATGACTGCCATCGCCTAG -AAGAATCACTGGATTACCCGCAAATGTATCGAAGTGAGATGCGAAATTTTTGTCTTCGAT -TGCAGGCAAGGGCTGCACTGCTGAATTAAAAAGCTCCTGTAGCTGAGGCATCTTTTTCGA -AGTCCGGAATATAGATGCCAATCGCATGAACGGCTGAGCTGTGATTGGGAATATCTCCAC -ATGAAATGTCTCACGTAGATGAGTATGTCGAAACTGAAAGGTCCGACATTGCCCATAACA -TTTCTATTCTTTTTTCATCGATACAACATATGTTTTTTGGTTCTCGGGACCTTTCCCAGC -GAATTATTCACATCCCGCCAGGTAAACAGTCATCATGACTTCATCGTTGAAGGCAAACCA -CAGAGCTTCAATTCCGATGACAACAGAATAAATGACATTTGGAGATTCAAATCGATAAAA -AACGCAGTGACCCAGGTTGCTCTTATCCAAACATATCCTCCTATTCAAAAGGTACACTGT -TGGACAAGTAAAAGTTCACCCCGGCACCTACGCATATGCCCGCATTGTTTCGAAGGCACA -AACCTCGACAATGGCTCTCCCCATCTGCAGTCCATCACCCCGCCTAGACCGCTTCGGACA -GGCCATGGAGTCTCTATATGGGAGTTTTAGCAGCATTGAAGATCCAACAGAGTGGACTCC -ACCACCAAGATCAGGTGGTCACCGGGGACGATACCTATGGACAGATGCCTTTGGGGTTGT -CAATTTTCTCACGATGCATAGCGAGTATAGGCGAAATGGCAATGACACCATTGGTGACGA -CCGTTATTTGATATTGGCAGCCCGTCTAATTGAGACTGTCCATGAGGTTCTGGGTAAGAC -GCGAGACGGTCGCTCAAGACTGCGAGGAGCGACAGACACAACCCCACTAAGCGGTGGGTT -GAGAATCGGCAAGACAGATGCTCATGGTCCGGACGGTGATGGGCAATATCATCATTACCT -CACGGTTTGGATGTTTGCCTTGAACCGAATGACCAAAGCATCGGGGGATGTGAAGTACAA -CAGGCAAGCCGTAGAGCTCGCAAAAGGGATACACCCAAGGTTCTTTGTGGATCACGCGGC -AGAACGGCCGAGAATGATATGGAAAATGAGCATGGATCTGTCAGAGCCAATGGTCAAGTC -AGAGGGTAATCTTGATCCGATTGATGGCTTTGTTATTTTTCGACTCCTGCAGGCTACAGC -CATGGAGGCAGGTGATGGCGCGGTCTTGACCGAGGAGATTGAGGACTATAGACGCACAAT -GGAGCGCAAAGGAGAGCACTCTGTTTCAAGTGATCCGCTGGATTTGGGTATGACTTTATG -GACGGCACATTGGTTTTCTGAGCGAGAGACATGGGCTGCTGACCTGGCGGGAAAATGCTT -CGAGCAAATATGTCAGTCGCCTTTGGGTCAATACCGAAAAAATATATGATGAGAACCAAG -CTGACTGGGGGTGATTCCAGACAACTTGTTCGAGATCAATCAGTATTTGGAACGCAATAT -CCGATTCCGGTTGGCCTTTCGCGAATTTGGTACCTGCATGGGCATCCAATGCCAGTCCGA -GGTGAATACGGAAAAAGAACGCTCTGTGGATTTGAAATGCTACTCCGATGCTATCATTGC -TGCTTGGGATCCGTACATGCAGCTATCCATATCCGATGATCTGACACCAGTGGACTTGAG -GCCTATCACGCGGGTGATGTATGCGTCTGCATTGATCCCAGGGGGTAAGTGAAACCATCC -CATATGATGAGTTTGGTATTTTGACGTATTTTGACACTTTCCAGCATTTCAAGCAGGATT -CCTGGGACAGGAACCAACACCAACACTCTGGAAATGAACAAAATCCAAAGAGATGAACAG -CCTCGTTCCGAACTCCCAAGTATTTTGGGAATAGATAGGCTGTTCTACAAGAATCAAGGA -GTAGGGCAACGACCCTCAGAGGGATGCGATGTAGGAGATCAGAAGTTATACATAGAAAAG -AGTCCTGCACAGATTATGCTTTTCCCGCCTTTTCCCGCCGTCTACCTGTAAGACTGGAAA -GGGGATTTTATCTTAAAGATGCCTGAGAAGACCACTTAGGAGCCCTGTAAAAAAGACATC -ATAGGGCCGAATAATCGGCCGGCTACCCTTGTTCCTCATAAAGACATAGGAAAGTTTGAC -TTTCTGACTTGACGAACTTCTGTGGAGATATTAGGGCCCATGGGTTGTCAACACAGAATC -GTCAAATTCACCCTACCACACTCTGATAATTTCAATTTTCCATTATCGTCCTCCAGGTAG -GCAGATTATCCAAAAAGTTCTGTTGATATCAGGATTAGGATCGAATGCAACATGTTTTGC -GTGCTCAGTTATAGGGGAGTCATCCTCTCGGCGGAGTCATCCTCTAGTTCCTCTTGTTTT -ATCACAGCGGCTCGTTATCATGTGGCCGTTGACAATGATAGTGCTACCGCCAGTGCCGCC -AATGCTGCGCTTCCAATCATGGTGGTGATGCATGGGAATAATATGATGAAGTAGACATGT -TGGGCTTATCACTCTACGATGCACAACATAGGCATTAAAGCATAATATGTACAGGCAAAC -TTGCCACGTATAACCATTTCAACATCAAGATGAAGACGGCTTGAGCAGTTCTCCATGCCC -AAAACAATGCAATCTATCATTACCATTCGGTATATTCACAGCTGCTAAGAATCAGGCCTC -GTAGGCAAGACGGATATCTATACTCCCGGAAGTCCAATCACGTCCCTCAAGGGTGCTATC -CGTCTCGGGAGTATATAGATAGCATACACAGCACTCAAAGCAATGAACAAACCAGACCAC -TAGAGAAACCGTCAGCCAATAGAATTGCATGTCGTGGCAGAAGTTCGCCAAAGATTCACA -TACCTTTTCCAACCAAGTATGGAACCAGATTGCGGTCATGAAGAGCATCCACCAGCCCAG -GCCAACAACACCCCAGACAAGACGGAGTGACCACGTTTCTGCGAACCCGCCAGTCTCGGT -TGAATCGGCCACTGGAGTATTATCCGCATCACTGGACTTGCGCTCACGTCCCGCATCTCC -AGTAGAACTGAAACGTGACAAACAGGCTGGTGCGCTAGCCCCGACAGCCTCAAAAGCTAG -GAATCCCGTGGCCAAAACTAGCATCAATACATGTCCACTGATATCATGGCCACCCCTCCA -TGCGCCACCGGCAGTTTTACACGTCGCTGCCGTGAATAGTGTTTCCAAGTTGGTGGAGCT -GCTCTGGCCAGCGGCCATTTTCCCGGCCTCATCGAGGGCTCGTTCACATTTGCCGCCCGT -AGCCACGAATCCGCGATCAATGATTGCTGGCCCGAAGAACCATTGAGTTGCAAGATACCA -TACTGTTGTAATGAGTGCGTAACGTAGAATGGCTTGGATGGTGCGTCGGAAACGTGCTTC -TTGTTGTGCGGAGGATTCCCGGCGAGATGAAGAGTATAGTGGTTGGAATATGAGGAGCGA -GACAAAGGCAGCTGTGGTCCATAGCCATCCAATCTTCACAAAGTAAACATTGAAGATATT -ATTTTTCCGGGCGAAGTAGTTGACGGGGCCTTCGGATAGGTACAGGTCTGCAGCCAGGGA -TGGGGCCAGAGGGCTTGCGGGCTGTGAGGAAGAGCTGAGCTGGTCTCTGGAATGACGTGC -TGTTGGGGATATTGTTGAGAACAAAGAACCCAGTAATAGTGTCGCAGGGTATACGAGGAG -AACAGCCGATAGGAGAGCGGGAGGTCGCGCGGTGGATGTGGCCATGGTCGTAAGCATAAG -GCGAGATTCGCGCAGCAGGTGGTCTATTGTAAAATACACAAGTAAAATTCACCGGGACAT -TCAAATGTGACCACAGAGCATAGTTGTTGCAATTGGGAAGGTCGAGGCAGGATAAGTTGA -CGATGACCTGGACAGATGACATCAGGGTTTGTCTGCCGAGGCTGCTGCATGAATGCCTTA -GAATGCCTCAGGCGTTCAGCCAAACATCAACCTCCCAACATCAGAGCTAATTCTCATGTA -TACAAAAGCTTTGAGAAGAACATCGAACCTCTCAATTATATATCTATCTAACTGCTTACT -TGCTTCCGATTTATCCGAGATACCCAAATACCAGTATACATTTGATTCGTGATTACAGTC -GACCTCAATGATGATGCTAGATCGATTTTATGTCAATCACCGAGTGAAGAATCCTGCCCA -CAACTAAAGCTAGTGCCATTATCCCCAGTATTCTCGATCCATTCCCATTGACCCAGGCGT -CTTAAAATACGCAACGACTTCGTCTGGGCTAAATGCGTGTTCTCATTCCTTTTGCTCTTG -GGCAGTCAACCTGGCACTGGTCGGATTTACTTGCAGATCTTGAAGGTTACTAGGAACTGT -CTTACATGAGACACAACAGCCAAGATCGGAGGGCCCTGCGGCTATATGTCCCGTTCGGGG -GTTGTTTGATGCACAAGCAGATATCCAGTACCCTGAAGCAGAATAGTTTGTTTTTGAAGT -AACTTTCCCGGTGTGCTGGCTTGTTTCCTACTGCTTGTTAGGGTTTCTTTGGGAACTTGT -CATTGGCCGTATGGAAATCTTAACTCTAATTTGGAAACACTGTATACGTATATCTAACCT -AACATTTGCTCAGTGCCAGAAGTGGATATATATCTCTATAAGCTTTGAAATGCCGTTCAC -CTTCGTAGCAGACCTCCATTCAACGGTGGGGTATTTAACGAGCCCTGGATATCAAAGAAT -TCAAGTGAAACCGAACAAAAACCAAATGCGTCAATTTTCATCATAAAATGTGCCCTCGCT -GGACCATTAACCGAGTGCCTGCATGGCCAACTCGACACTGAGGAGGTGGGTTAGCTGCAA -TTCTAGAAACACAAAGATTATGAACGGGCAAAGTAAAGGGCCAACATACGTCAGGAATGT -TCCGGCAGAAACAGGCATAGCTCTCAGCAGAGTAGGGCCAATACCCTTCCAGAAACCACC -GAGACCCTCAACAGCATAGGTCTTCTTGAAGCAGTCGCGCATGGTGCTGTACTTCTGCTG -CGCACCGAATCCATCGGTCTGCATCTTACTCTTGATAACATCAAGCGGGTAGCTGGAAAG -CCAGAGGGCCTCACCAGCCAGACCACCATAGGTGGCAATCTTTGCCTTGGAGATATCCTC -ACGCTTGACATTGTTACGCTTGGCATCCTGGTTCATCATGAATTCAAAGGCCATGAACCA -AGCACCGTACGCCTGGGCCTCACGGAGGAGAGTAACTGCCTGGCCGCGGTAAATACCGCG -GAGGGCGCCGCCCTGTGCAGCTAGCTTGCTCACACAGTCGATGGGGCCGGAGTACAGACG -GCCAGCACCGTGAGGCTGTGCCTGCAGACGAATACGCACATGCTCAATTGGACCTGACAG -CACGGAATTGGTAAGGCCGGCGAATGAACCGGCGATGTAGTACTGGGTGTAAGAAAGGGC -CGAGTTGGCGTATTTCTTTTTGTTCAGCTCTTCCAAACGGCGACGCGCCTCATGGTAGGC -GCCGAATTGGACGGACACCTGAATCGGTTAGAGGAATTGTCTATAATGGACTCTGGCTGC -AAGGGCATCTCACACAAGCACCGATACCGATAAGCGGGGTCAGTGTGCCCTTATAGAAGG -CCGAGGGACCTTCCTTCTTGAAGATCTGGGTAGCGCACTCTACTGCACTCTTGTACTGAG -TGGTAGTTTGTAGGCGCACCTTGACGATGTCTGCCAAATTGTTAACCAGTGCCTCCTGTG -CTTTGAGCAGATCACATACCAAAGGGTTGGCCTAGTTTCTTGTTAGCAATTGCTTCGATT -TTGCATGTTTGAAATATTTTCCCAAAAAAAACTACCAGTTATGACATATTCTCATATCCT -GATGTCCATGCCTTTCCCTGGGAATTCAAAGAGAGTAGGTTGCACACATACCGAGCATAA -CTTGGGCCATTCCACCCACTGCGCCCGCAGCCAAATCCTTGACGGTGCGGAGGGCACCGC -CACCCTTGTCCATCATGTCAATCACCTCAGTCTCCATGTTTTTAAAGTGATAGATAATTG -GACCAAAAGTATCGAGGGACACAATCTGGAGACTCCAGACTTTATATCCAGCCCGCTTAT -CGCTATTGCGCCATCTGCCCTCTGATTCGATGAAAATGCCGAGCAACGTGACTTTTCCGA -CGGGGATCTTACATAAGCGGACTGACCTTACCTAGGTAGGTTAGATCCTGGGCAGGAACG -GGCGGGACCCACAGCTGACCTAAGCATTGACATAAGCCCTCTCGTCCTCTTGAACTTCTG -CAAAGGGAAAAAAAAGTGTTGACGGATATAGATGTGGAGAGACTAGTGCATGATGCATCG -CCGGCCAGTATGCAGGGTAAGCGTACAGTCGCTAATCACCGAGACATTATCTGACTCAAA -CTACAGCTATCGCGTCGGGCTGAATGGTCGAATTCAGTCTACCGAACACCCTCATTAACA -GCTCTCACAGTTCGCCGGGCCTTAAGGTCGTCCACGGCGCAATGGAACTCGTCAGACCAA -TCAAATTGGAATGATGATGGTGCCGCTTCAAATCCAGCGCAGGCGAAGCCCAAGTTCGGC -AGCCGATGGGGTCCTAAGCAAACTACGCCATCGGCGGGCTTGAGTTTCGCAGAACAAGCG -ATGCGCCAAACCCTTGTCGCCAATAGTCAACCGTCGCCGCCACCTGCACCTGCACCATCA -CAACGGAACCGAGAGCAGCCAAATCAGCAGAGGAAACCGTTCATGCGGCGCGTGGGCCCC -CGAGACAACTCACAGAATCAGGAAAGGAAACCTTCTACCCAAGAGAAGAATCCGCTCGAA -GCAAGGGTTCCAGGTAGCCACATGAGGTGGTCAAGAGAATCCCCACGGAATCGGGAGAGG -AATTCGTCCGCGCAAGAGAGGAATCCGTTTGAAGAAAGGAATCCAGGTGGCCACATGAAG -TGGTCAAGAGAAAACCCGCGGAATCAGGAGAGGGCGGCCTTCCAAGCGAAGAATCCGCCT -GAAGAAGAGAATTCCAGTAACTACATGAGGTGGCCGAGAGGAACCTCACGGAATCAGGAA -AGAAACTCGTCCACTCAAGAGAGAAACCCATTTGAAAAAGAGAATTCAAGTGGATATGTG -AAGCGATCAAGAGAGGGCCCACCTTATCAGGAAAGAAATACGTCTGTTCGCGAAACAAAC -CCGGCGTTTGAAGGAGATAACCCCTTCGTCACTGGAAAGACACCAAACAGATTCCCGAGC -CAGTATCAGAGCAGAGCGACTAGGGACCATCCAATGGCCCGCGATGACTGGAAATGCCCT -CAGTGCATGGCACAGGTTTTTGCCACAAAGAAAGTATGCCCATTCTGTAAAACCTCTCGA -CCCGACGACGCCCAGCCAAAGTTCCGCATGACCAGAGACTACAACGATTCTCCCGGGCAT -ACACACCTTAAACAATCTCGATCCCCCTCAAAGTTCCAGCGATTAGGGGATTCGGTGTTA -TCCGAGCTGGAACAAGATAAGTCGAAAATAACCGAAACCACAGGCCACGATTCAGGGGAA -TCTGATCGATCCCAGATAAAGGAGGGGGCAGAAGATTTCAAGAAGAACCAATGGTCTTGG -GACATGTCTGCTCTGGAGCATCTAGAAAACCTAGAAGCTCAAGAACAGCAGCTGCCGAAG -CCTATGAAGCGCCGTGATGGACGCAAGGGACGCCAGAGCGAGTCTTCCGATGGAGCTGAT -TTTGACTCGGAAGATCGCGAGCGACTCCGCGTAGAGCGGAGACGCCAGAAGAAAGAAAAA -GATGCCCAAAGGGCTGCAAAGAAGGCCGCCGCGCTAGCAGCACCAGCCCCTCTCTATCTT -CCCGAATTTATCAGTGTTAGCAACTTGGCCGATGTTATCGGCGTGCGACCGGCGCAGTTT -GTGCAGCGAATGGAGGAGATGGGGTTCGAGGAAGTTACTTATAAGGATGTTCTCGATGCA -GAAACTGCCGGATTGGTGGCAGCCGAGTTTAACTACGAGGCCATCTTCGACAGCGGCAAG -GCAGATTTACACGCTGCTCCTGAACTAGAGGACACATCTGATCTGCCATCTCGGCCACCT -GTGGTCACTATCATGGGTCACGTTGATCACGGCAAAACCACTATTTTGGATTGGCTGCGC -AAATCATCTGTCGCAGCGTCTGAACATGGCGGTATCACCCAGCATATTGGTGCATTCTCC -GTAATGATGCCTTCCGGAAAAGCCATTACCTTCCTTGATACCCCCGGCCATTCTGCATTC -TTGGAAATGCGTCGCCGAGGTGCCGATGTGACCGATATTGTGGTGCTTGTGGTTGCTGCG -GACGACAGTGTTAAGCCGCAAACAATCGAGGCCATCAAGCACGCCAGCCAAGCTAAGGTT -CCCGTCATTGTCGCCATCAGCAAAATCGACAAGGAGGGAACCAACCCTGATCGAGTCAAG -GGTGACCTTTCTGTTCATGGTATCCATGTGGAGGACTATGGAGGTGACGTGCAGGCCATC -GGTGTCAGCGGCAAAACCGGACAAGGAATGGTTGAGCTGGAGGAAGCCATTGTTGCGCTC -TCAGAACTGCTCGACCACCGGGCAGCCACCACCTGTAACGTCGAAGGCTGGGTCATTGAG -GCCTCGACAAAGAGTTATGGTCGAGTAGCATCCGCCCTCATTCGACGTGGAACCCTGCGG -CCAGGCGATATTATTGTTGCTGGTACCGCATGGGCTCGTGTTCGCACCCTTCGTAATGAA -GCCGGTGTGTCCATCAGTGAAGCCACGCCTGGAATGCCCGTCGAAATCGATGGTTGGAGA -GAGCAGCCAGGTGCTGGTACCGAAATCCTGCAGGCACCGAATGAGCAGAAAGCCAAGGAC -GTTGTGGACTACCGCTTGGAGCGGTCTGATACCCAGAAAATGGGTATCGATATGGTTGCC -ATCAATGAGGCTCGGCGCGAGCTTCTCGACAAGCGCAAGCGCGAGAATGAAGAGGAAGAA -ACAATAGATGGAGTTGAAGCAACTGGGCCTAAGTCGGTCAATTTCGTCCTGAAGGGCGAC -GTAGACGGCTCGGTGGAGGCAGTCTTGAACTCTGTTGCGGCAGTCGGGAACAACGAAGTC -TTTGCCAATATCATCCGTTCAGGTGTTGGCCCAGTCAGCGAATTCGATATCGAACATGCT -GGCAGTGCCAAGGGCAAGATTATCTCATTTAACCAGGCTATTGAGCCTAATATTATGCGT -ATTGCAGAGACGCAGGGCGTCGAGATTCTGGACCACAACATCATTTACAAACTGATTGAT -GATATCAAGTCCATTCTCAGTGAAAAGCTACCTCCAACAATCACTACGCGTGTGACTGGA -GAAGCCGAAATCCAGCAGGTGTTCGAGATCACTGTCAAGGGACGTGAAAAGACAGCCATT -GCTGGAAGTCGTGTGCGCAATGGTCTGATCAACAAAACTCGCAAGGTCAGGGTGCTTCGC -GGGGACGAGATAGTCTACGATGGTAAGCATATCACAAGCAGCTTAAAATGAACAACTTGG -ACCAATCTAACTTTCACAACAGGCACAATGGTCTCCCTCAAGAACGTCAAGAAAGACGTC -ATTGAAATGCGCAAAGACACAGAATGTGGCATCGCTTTCGAGAATTGGACAGATTTCGCA -CCTGGTGACCATGTCCAGTGCTACGAAGAGATCTCTGAGAAGCGCTATCTGTGAGATACT -CGGTGATCTTAAAATCTGTGCGGTCGTGATCGCTTGTATTATTTGTTTCATCTAGCAAGC -CCATTCCCTCATGATCTTCATTCAAATTTTGTATTATATTTTCCTTTTTCTCTCGATATG -GTTGACTTGCATTCAGGGTCTGGGCGTTGTTGCATCTCTATGTACTAGGGACCGGAAGAA -AGGCAAAAAATCACTGTCATTGAGGAGGATGCTGCTGTCGCAGCGCATGTGTAAATACTC -AAATATCATGGCCGAATGGCCACATTATAATTCACTTCTTCTATGCAAAACAACAACAGT -CCGCCGTTCGTGTGTGGCGTTTGCTTTTGACAAAGCAGGAGCAGGACTTTGGCATGCTAA -GAGTATAGCCGCCGCTTTGTTGCACTCATGCGAAAATGCCGAAGTCGATTTCTAAGTATC -CAGTCCAGCTAGCTTAAGGTGACGAGGGCCGTGATGTTGAGAGCATCAAGCATGAGTTTG -TTCGTGTATTCATATTTCAATTATGAAGGGTATAACATTCATTCGTGGTCCAACACGCCC -CAACGCGGCCATGGCCGCTGGCCTTTCTATCCCAAAGTAATACACACAAATATTGAAGGG -GGAATAAAAATACGAAGAAGATGCTGAAAACGCTACAACTGATACAAAACACCACCGGCT -TCCGCAAACTTCTGCTGATACTCTTTCAAGCTCGCCCACTGACTCAATTCTTCTTGATCT -CGCTCATAATCACTGAGATAGCTCATAGCGCGGCCGTTCATCTGCGGGCTGACGCCGTCA -GCAGGACTGGCTGCATCACTCATGGGTGAATGGCATTCAAGCCGAACAAGTGTCTCAATG -AATGCCTGCAGGTAGGACTCCGCCTCTGTGGCAGTGAGGATGTGGCCTGGCGGGGCGGCC -GTTGGTGATGGTGATGGTACGGCAGCGGTGGTGAGATTTGGGATTTCGGTTGCGGACTGA -TTCGGCCGTTGGGAGAGGCTGAGGAGAGTGGATGCGACTTGGCGGATTTTATTGGTCTAT -TGGAGGGTCATTTAGTTAGTTCAAACACGAGTAGTGGTAAATAGTGGTCTAGAGCGTACA -ATGCTTGCGCCGTTCGGCTCCATGTTCACCTCATTCACTGTTCGTAATAGCACGAAGAGA -TCTTTGATGACCAGCTTGCGCTCTTCGGCCATCATGCAGCCAATATAGTCAGTGTGTGGA -GGGGGCTGAGCTTGGGTGTTGGTTTCTGACGGCGCTGCGCCGGTAACAGTCACACTTGGT -GCCGAATTGGGCCCTCCTTCGATTTTGATATGTGGAGTAGTGCTATCCGGAGTAGTGAAT -GTAGGTGAATTTCCGTACTTTGAAAACGTTTCGAAAAGGCTCCAGTACTTCTCAACAAGA -TAGGACCTGGTGGATAGTTGACTGGCATAAATATTGGCTTTCTGAATCTCATACTGTATA -GCACGTCGTTCTTGGTGGATGTGATTGTTCGAGTAACTGAAGATTGTGTCATCTCCGCCA -AGACCTGATAAACCCGCTGGAGATATGTGTTGGGAATGGCAGAGAGATAACTCGGCAGGA -ACATTGATTAGTGCAGATTTTGCTTGCTTGAGACAGTTCCAAATGCTGTTGCGCTGGTGT -TCCCAGTCAAATATTTGACCGCTACCAAATGCTATTTCCCAAGCTAGCAGGGCAGTGTAT -GAGCTGTAGACGCGGACGTTTGCGTTGAAACCGACTAGCTGTGAAACTCGCTCTGATGGT -TGCGGCTCAACATGTGTTGTGAATAAGAATTCATCGTCAACTTCCAGCGGGAGTGGTGGA -TAACGGTCCGTTGGTGTCTCTGGAGGAACATGAACCTTGATATCTGAAGAGCCCATCTGG -TGTAATGTCTGATATCCGACTAAGGTGACATAGAACAGGCGTCGTCCAAGCTCTTGAGTG -ATGAGGTCGACGCCTTGGTCTTCCGTGAATTTATGCGGTAATTGATCATGCGCCCTTATT -GACGCCGATGACATTGGGCTAACGGGCCCTAGTGTCGATGACGGGCGGTTGGTGGTCGTC -GGAGACCGGCATAAATCATACACTTGGAGCATGGCTCGACATTCCGATAAGTACACACGG -CACCGGCGCATGTTGTAAATATACCCGGAACATACAGCGAGAAAATAGCTGATCGCCGCG -TCATATACTGTGGCGTTGCGATCAAGATATCCAGTCCCGCGGGCTTGAATAGCAACGTCA -AGGCACCGCTTTACTAGAGCTATAGAATTGGGGAAAGCGGTGCGCTCAGCTTCAGTGTTG -AGCCGAAGTTTAGGGCGACGAGGAAAGGAAACCACTAGGAGTCCAATCATTGAGGCCAAT -AGTGCCAAAAACGTGTGGTTGGTCACATCTTCCCGACGCTCAAAGGCTGCCCGAAAAGAA -GGCTCATGTGGAATTGGGACCAAAGGATGAATGTAAGTAAAAAAGTCATCAAGCAGCATT -TGCATCGTGTGCAAAGAACAAATCGATTCCACCGAAAGGGGAAACGATGTAGATTGAGGG -TTGATGAATGGCGTTGCCAGGGGAGTCGTCGCCGACGGATAGTCTGTCGGGCTCAGCGAC -GCTGCATCTTCAGGCTCCCGTTTTTGTTTCTTAAATGCTTCTGCATGGCGGTTGGGAGGA -CCGCGGCGTCTGCTCGGTCTTTCATAAGTGCAGGGAATATCTAAGCTAGTGCACGACCGA -CAAGGTGGCCCACTGGTATCACACTTGCATAAAAATCAGTAATTTCCTGCAGACCATGAC -AAAAAAAACCATACCTTCACTTTTCGAATACTGCAGGCATCACAGGCCTTGCGCAGCCTC -GTTTTCAGGTCCATTAGCAAATTGTCGGGATGACCACACCGCGAACCGTCAGGGCATGTT -CATCGGCCCAAAGCCAAGTCAGGGCATCTAGGCTGCAGGCTCAACTCTGAACACAATGAA -TGCGGAAATCCTAGTCCCAAAAGTCGGAAATAAAGCCAGCAACTTTAAAAGCCGAAGCTC -TGGAAACCTCGGGAAGAGATCCAGATCTCTGATTCGAAGTCCGGGGGTCACACTTCCGGA -TTGCTAAGCGGGTCAAACCTCTAGGAAAATCTGGGGTACTTGAACTGGGCAAAGGACAAC -TATCAGGCATTTCTAATGCTTTGCTAGGAATGAAAATTGCAATATTATCGTGTGAATACA -TCCGCCGAAAAAAAGAAAAAAGAAAAAGCACGTCGCGGAGGCACAGTCACGTGTCACTCG -TAATCTAACAAGCTGTAGACACTTATTTTTCTACGAAATTAACCAGAGAGAATTACAATC -GCGTTTACCGAGAGATATTGGAATGTTCTCGGTTTTCCCTTCTTGGCATAGACCAAGGGT -GTCGCGTAAATCGCCTATATTGGGAACTAGAAAAGGAAAATTAGGTGCATTTTGCAAAAA -TCTCGGTCAACCCGAAAGAGATGTATCATTCACAGATCATGGCATAATATCGTCATGGTA -TTGCTCACCAAGTCGACTAGACTGCTGCTGGATAGCTCGTCGCTTGCGTTCAATGTCAGT -CGCCCAAAGTGCCGAACCAGCAGCCGTAGACATCAAAAATAACATAGAGAAAGGAGGAAT -TAGTTGGAGGATCAGAGCCACCGTTCCAAAGCTAGACTGTGTTAGATTTCATATCGATAG -AGGGAATTGATGTGACTTACGTCGTGTATTGCAGCTGACGCCGACGAATACTTTCCTTTC -TCTGCTGCTTGGACATGTCCAATAGGTGGAAGTAACGCCAGTGATGGAAGGGACCGGCAC -GATAGCCAGTCAAAACTAGGAACATAGGAGTGCCGGCTACCGGAATAAAGATAAGCGGAA -GCAGAACTATGAACTCGGCGATCTGCCGGAACGAAAATGGAGAGTACACGGCGCTGGTGG -TCGGCGTACCCAGACGCTTGGCCGGATCCCCGGGCTCAGGATAAATAACCCGCGAGGTGG -TAACAAGCTCCTCCTGGCCCTCATTTACCAGAACTGCATCAAAGATATCCACTAGAGTCT -CATCGACAAAAAATGCTTCGAAGAGTGCGGCAACGATCGCTGCTCCTTCACCAAGCACCA -GGAATGCGCCGCTTACCCATGCCGAACTGCCCTGGAAGATAGCAAGGAAGGCAACTTGGG -GCAGGTAAGTAAAAAAGAAGAGAATAAAGTATATGAAGGACGAAAGAAGGACGATGGGGA -GAAGGCGGGCCTTGAAGAGAGGCCATAGGAACCGATTTGACGCAAAATAGTAGATTCCCT -ATGTGGATTAGTTTTGAAGCTATAGAATCCCCTATATTAACATACCCAAAGAGGATACAG -CCAAGCAGCTCGGCTAGGATCCAGGAATCGCGACATGGCTTGCAAGTTCCCAACTTCAAA -GAATCCCTGCCAACGGCGATGGACAAGTTCGAAAGCAGGCAGCACCTTATCGATAAGGTT -TCACGTGATACGTGGACGTCAGTGATGCTGACTAAGCTGGTACTATTTTACCCAGAAATT -TTTCGCCGAGCGCCCTTTTCCTTTTCTTCTTCACATAAACTGAATTCCTCCCTATTCACT -CTGAACTTTTGTTACAATCACTGTTCTGGAGGGGAAATGGGAGGAAAGACCAAATCCGCT -GGCTTACCCCAGACTCTTCTACTCAAGCGTGGAGCGTTTGGAATATATTCAGCTGTACAT -TCCTCTTTTCAAGCCTGTACTGGTGGTGCTTGGAGCTTTTCTTAGAAGTTCCCATGTTGG -ACGCCCTGGCTTAAAGCCGACGTCGATGAATCTCGTTTACAATGTGCTCCAAACTGTGCA -CAATGGCCGTTGGCCGTAGATCTGGAGCCATGGCATCCCTCCATCCCGTACCAACACGGT -CAACCCATATAGCTTGCATGCCTGTAGCGCGCGCCCCGACAATGTCGAAAGGGTTGCCGC -TAATCAGCCAAAGTTCGCCCATCTGCGATGGATCTTGAACTGTCTGCTTCGCTAGGTGCT -TGTAACTTGCCTGTGATGGCTTATATTGCTTGACTTCGTCTGACGTGACGATGTCTTGGA -AGACACTCGCGTGCGGCGAAAGGTCCCTAGACCGTAACATCGAGTTGGATACCATATCTC -TTGTCCCGTTGGAGAATATCACGGCTTGGATGGTTGGATCTGCAGCTATGTGGGTCAAAG -CTGGATTCACGTCAGAAAACGTAGGGAGACTGTCATAAGCTTGCATCAAGTGTTCGATAT -TGTCATCGCTTAGCTGCTCGTTGTTGTCGGCCAGCGCGTGCATCAGTGAGTTCCGTGTGA -TGGCTGAGAACTCTTCATAGCGGCCTGTTGATTGTCAATATCTTGGAGCAATAAGTTTAA -AAGGATGGCTTGCCCATGCTGTTTAATCTCCAGGTGTACTCCAGTTGATACCGCCTCCAG -AGCGAGGAGATCGACTGCGCCTTGGCATTGTCACAGTGTTTCTCAAGTTGTTCAGCGATC -GATTCAGTTGACAACAAAGTGCCATATAAATCAAAAGCGACAACTGTGTTGGTTGCCATG -ATGTTTCATAATAACCTTGGGACAGACAAGCGGGTAAGCCAAGGCCGTTAAGGCTGAAGA -ATGCAAGTTGATAAATGATGTCAATCTTCTGTGGGGCAAAACAGACTCTCTGCCCAACAT -CCTCTCCGAGATCAAGGGGCAATATTCAGGACTCTCTCCCAAAATTCCAATGACTTCCAA -TGACTTCCAATGAGAACTATGCTGTATTTCGCGAATGTCTGTCTAATGCCATCGTGGCCC -GCTCTGAGGAAAAGCCGAAGCCAACGCGACGGAAGCCCAAGGGCAAACGCACCGAGCGCA -AAGACGTAACAACTTTGACAACAGCGTCAACAGTTCCAACAGGGAGAGCAGACCCCGAAG -AACTCGCCGAATTTGTTGATGTACATACACCCAACCAGACCAACTGAGCGCAAAGCGACT -AGCCCTCAGGCCAATAAAATCTACACACTACAGCCAATAACTAACAAGATCCAAAGCTAA -CAAACCCCAGTTCCTAGCCTCAGAAACCTTCACCACCTTCCCAAATTCCCTTCAAACGCT -AAGCTACGCAGCCATTCAACATGACCCAGCTTTATCAACCCTCTACATTCCCCCAGACGC -CGACACACCCCTCCCACGCGCCACACTTGAATCGCTCTTCTCCCCAATTCCAGTCAGCGT -GACCGACTCGCTACTCGTTTACGGCATAATCCCCGACGCAGCAGACCTACTGGACTTCCT -GGCACCTGTACTGACGGAGTACACATCGAGCGTGACGACTGGCCCACCGGCTTGGGCGAG -CACGCGCGCGGATGCATGTGAGATCTGCGAACGTGACTGGATCCCGCTCTCGTATCACCA -TTTGATTCCCCGCGGGGTGCATGCGAAGGTTCTTAAGAAAGGGTGGCATGATGAGTGGAT -GTTGAATAGTGTGGCGTGGCTTTGTCGCGCTTGTCATAGTTTTGTGCATCGCATGGCGAC -TAATGAGGAGCTAGCTAGAGAGTGGTTTACTGTTGAGCGAATTTGTGAGAGAGAGGATGT -GCGGGATTGGGCTATGTGGGTTGGGAGGGTCAGGTGGAAGGCGAGGTAGAGGTAGTTGGT -GTTTATTTGTGGTTATAAGATTCGACTGTGCGTGTTTACTTGAGACGCGAGGGGATGCAC -CATGGAAATGATAGCACATAGCCCTGTATGGCTTTCAATTGCCATTGACCTTTACAGACC -AGTATTGCGTATGCAATAACACCGAAAGCTAAGGTATGTACGGACCTTACATAGGTGGTC -TTTTATGAGTCCTTCCAGGCCAAAATGTCATCAATACCAGTTGATCTTCTCATGAAAAAT -CGGCCAGGAAGATTCGATAGCCGAATTTCACAGGACCGGTTTGGCGGCACTAATACTGTG -TCCATCGTTATTTTTATACTCCTCTATGCTCTCTACTCCTCTTCCACTCCAGCATTTCAT -TACGATGATAACTAGCTTCAAACAATGGAGCGGATCTACAATACCTTTCGCGAAAAAAGG -AGGCTTCCTATGACAGAGGAGCAGCTTTTGGACCTCAGAATCGATGTTCATCAGTTTGAT -CTCCGCACTGATATAATTGTTGGATAACCACCATCTTGTGCTGCGATGATAACTAACCGC -CTATGGGGAGGTGTACCATAGCACGACTGGAAAGACTCAGTGGTCCTATAGGGCTTACAG -CCTCTGAGCTGTGGATTTTTGTCACTGCCTTGTCGCCACAAACCATCCTCTATGCATCGC -AGCATAACTCCTGGTCAGTACCGAATCTCTCACAGTGAATGTGAAAGTAGCTTTCTGATT -AATCATATTCTCCTCCATCTTACGCGCAGTTTTTATATGCCGAGTATACCGCATCTACTT -TGGAGACCGCTAGAACGAACGGTCACGTTGACTGCGCCAGCATTTTTTCAATGGATGATC -TAGATAGGAAGGGCTGTACGTGATAAAATTAGGGTCACATATTGGTAATACAGCCTACAA -AAGTAGACGGCCGAGAGCTTGTGCGGTCTTATGGTGTTTTACAGGGCTGTTAAGTAGTCT -TTTCGGCGGTCATTTCAGCGGGTTTCTCAGGCATCCTTCTTCGATAACTATAAAATAAGT -TCCCTCCTTGAAACAGTGGAAATCGTCTGCGAGGTACGAAAGAAAAAAACCCTCTCACGA -AAGAAAGGCTTGATAAATCGAAAAAATATAATATAGACAGCTTGCTCCCAATCTTACAGT -CATTCCCAAGTAACAGTAGCAGTGACAGTCTCAGTTATGCGATCTGTTACCgtctgagtc -tttgtctcggtctcggtcacagtctcagtGATTGTGATAAAGCCAGACGGCGTAACCGGA -TTCACAGTCAGTATGCCTTTGCCTGATTTGGAGGCCGTGGTAACGATGTTGGCATCCACG -ATAGATGCGGGATCAAATGTATGTGCAGTTTCCACTGGTCGTGTAGCAACCGACCCTGAA -AAGGGGGTCACGAGAAGGGTCTTTGTGGAAGTAACATGGGTCGTCACTTCAGGAGCAGTC -TCATGCGGAATAGTCTCACTCAAAGACGGCAACAATGGAGGGGACGACGATGTTGTCGAA -TAAACGGTTACTGGAACAACACTTGTGGAAGTGACGTGAGCTGTCACCACAGGAGCAACC -TCACTGGACAGCAACAACGGAGGAGACGACGATGTTGTTGAATAAACGGTTACTGGAACA -ACACTTGTGGAAGTGACACGGACTGTCACTGCAGAATCAATCTCACTGGACGGCAACAAC -GGAGGAGATGACGATGTTGTCGAATAAACAGTTACCGGAACAACACTTGTGGAAGTGACA -CGTACTGTCACCGTAGGAGCATATTCACTGGACGGTAACAAAGGCAATGGCGATGTTGTC -GAGTAGGCTGTCACTGGAGCAAAACTTGTGGAAGTCACATGGACTGTCGTTGTAGAATCA -AGCTCGCTTGTGAATGGTAGCCACAAAGACACAGAAGAGGATAACGGTGTAGTTGAATAA -ATTGTCAGAGGAACAACACTTGTGGAAGTGACATGAGCTGTCACCACAGGAGCAATCTCT -TCTGAGGACGACAACAAAAAGGACGAGGGAGATGGTGTTGTCGACTGGACAGTTAGGGGC -ACAGGTGACGTTGCATTCGCACGGCCATTGTCTATAGGCGCTGCATAGACACTCGGAGCA -TTTCCAGTTGGGGCCAGCAAGGCTGAAGAAGCAGGTGGTGATGTTGGGTAAACAGTCACA -TGAACAACCGACGTGGCTGTGGAATGAATGGTCACAAATTTCAAAGACGTAGTTGTTGGT -TCAATAGACGGTCTTACCGATAGTTTTGCCGACGACACCGATGAGTTGTTGACACTGGTT -GCGTGGTGAACAACGCTTGTTGAAAATGGTTTTGGTGCTCCTTTTGTCACATAGACAATG -ACAGGCTCGGTTACAGTCACCAATGGCTTGGATCCCGATGTGACAGGTTGTACTGGTGTA -GACTTTGCCTGCGTGGAATTTGCTAGCtggataggattggaggttggaattatgggtgtg -gatgtggatgtgAAAGAAGTGCTACTGCTTGCTCCTCCTAAGGATGGAATGGCTCCCACA -TCCGGGCTGCCAGTTGTGGAAACTTTAGGTGTTGCGGGCACTGTGGGTACTGGTGCTATC -GATGGCGGTGTGGTTGTGGCGGCTTTGGTGCTGTTTGATTCATCTGAACTTCCAGCCGAT -GGAGATTGGAATGCACTCGGTATTACCGAGGAATCAAAGGAGGGTACAGTGCTTGAAATA -TAGAGGGGAACTGTATAGACAGGTGAGAAATTACTTGATGAACGCGGCAGTTCCGGACTT -GAGGGCAAGGGTATGTGTCGCCGAGTCGTAGTGGACGCATGAGAGGTGGTGGTTGGCTTC -TGCGAAGTCTGTGCATACTGAGAGGCTTCTGGATTCAGCGAGGTCGTTGGCTCTAACGAC -GTCGTTGGCTCCAGAGAGGTCGTTGACTCCAGAGAGGTCGTTGGCTCCAGAGAGGTCGTT -GGCTCCAGAGAGGTCGTTGACTCCAGAGAGGCCGTTGGCTCTAACGATGTCGTTGGCCCC -AGAGAGGTCGTTGACTCCAGAGAGGCCGTTGGCTCTAACGATGTCGTTGGCTCCAGAGAG -GTCGTGGGGTTCTGAATCGTCGTTGACGCATGCGATGTAATTGGCCTCTACATGACGGTT -AGCTTAACTGCTTATTCACTTAGAACAGGGCAAAAGTACTATTGACTCGTCGTACGTAAA -TGGTATCTGAGTAGGCGCCGAAATAGGCCCCGCAACAGGCGTGATCTCGGTGATACCGCC -AGCTTCACGCCGTTTGATATCTGGCAACAGGGGAGAAGAGGTTGACTCAGGTGCCTCCGA -TGACTGAGATAGCAATCGAGATATCGGACCAGCCAAAGCCATCCATGGCAAAAGAAGGAA -CAACAAGAACTGGAAGTGATGCATCCTGATCTAAAAAGGAGAGCGTTGGAAGTGATCGCC -AGGACACAACTTCGACGGAGAAAGTAGGACGTTAAAGAAAATCGGATGCTGAGATAGAGA -TACAATAAATCGCCTAATTTGGATGGAAACAATCACGCATATAAAGAAGCATGGCCAAAG -AAAGTATTATGAAATTAATGTGCATCCAAGGCCTGGGAGGCGTCTTGAGGGCTTGCAAAT -GAAAGAATGGCTGGATCACTAGGGACCACAGAAGCGAAAGGAAACAAAAGAACGAGGCAG -ATGGAGCAGCCTGAGACTTCAAAAGGAAAGATATTTTTCTTTCAAACCCCTCGCCATCGA -GGATCCTAGCTGGTATATCTGCATATTGGGGTTTCATAGGCCGAGTTGGGCTTAGGGCTT -CCTGTCAGCCACCCATTGTTCCCAACCTTGAAAGTTTTCCCATAAAATTCACAGTGGCCC -AATACCATCAGTTTCAGAGGGAACCGCCATTGCAAGGGGCTCTTTGTGGGACAGAATCTC -TAGAAATAGATCGCGCGACAAAGAGACGTTGCAGTAGGCGCCCAGCCCTGGTCGATGACG -CGTGATGATTGGCCAGCTGCAGGCTTAGGGCCACTTTATTATTTGATTGAAGATTACATG -TTGACATATGGACTGAGTAGATTTTTAGACGGAGATGGGTCTGGCCGATTCATTTGCTAC -GTCTACGGGCGCGTAAGTTGAGATATAATTATTCTGACTTGGTTACTCCGGGAATCATAC -ACTTTGGCCAGATTATTCGACACCTTTCATTCAACTTGTCACTCCGCTATATTATAAATA -AGTCGGACTACGAAGTTTTAACAGTGAAAGTCCCACCAATGCCCTCGAATGGCGAGATTT -GGCCAAGAGCATTCTTCGAGTCTCCGTAATAATGCATCCGATAGGTCCCAGCCTGCAGAG -GGGACGCAGTGTCCACACTATAATACTCATCCTCAATCTCCCATTGAATTGTCACCTCAC -TTTGGCCAAGAATATCATTGGTCCGCTTCCAGGTATAGACCAAATTCCAGTCGCGATCGT -TGCGCACGACCTTCCATGCACCAGAATCGGTTTTTCTTTCGACTGCAGCGAAGGTCGATT -CGAGATGCAGATTGTTGCGAGGGTTGGCACCCACAAAAGTCGTCTTGACAATATCTCCGG -GCCCGTAGGGTCCACTCCCCGGGTTTGAAATCACGTTTCCAAAGCGTTTGCCAATAGGTG -CTCCGTCATACACAACCCCCGCAATGAAGCTCAATGAACGGTTGGTGTTGATTGGTGGGG -TAGGGCCGGGTTGGATGGCTGGCAGTTTGGCAACTTGCGAGGAGCTTCCCAGATATGGCA -GATAGGTGAGAGTCAGATTGACGTAGGCCGCCAGTGTGTTTGGGCCGTAGAGTGTTGAAG -CGCCCTCATAGCGCTGAGCGCCGTACTCTTCCTCGGTTGTCACGTAGTGAGCATAGCTGT -TGGACGGGGCGCCCAGTACCACTAGAGGGTCGGAAACAGAAAGGTGACTCTTTGCCCCTT -TCGCAATAGCCTCTTTCCAGCGTCGTCCAGACATCGTGGTAGCTTCGCTGGTGGATATAA -CCAGGAACAGCTGACCCACTCGCAAGACCTGGATGTCTACAATGTCCGCGGCCCATGCAT -AAGGCTCAGTCAGAGTGCCGATATCCAATAGCACGTTCTTGGGGCTTTGACACTGCTGCT -GCTTTTTGCTTGGCTGATGAATAACTGCACGCGCAACTTGCCATAGGGGATTTCGTCCAG -CTGGTCCAGTAGCATTCTGGGTAAAGTCAAAGGCACCCGGGCCATCCGTAGTTCCAGCGG -CAAACGAGTACCCCATAGCCGCAGGACAAGTCGTCAGTATACTGGAGTTGAACGGCGAGG -GGAAAGTGTATCCATTCATGTCTTGGTACACATGGAAAGCCTTCACTGCCGAGCTTCCGG -TGATTTTGATAGAGGTGGTGTCCATCTCTTCGTAAAGCTTCTTTGCTGCGGAGTACTGGC -GTCGCCCAATCTCGAAGCAGCTCTTCGCTCCGTTATCCTTTTCACGGAAGAAAGGACCTC -TACCATGGCAATCCTCGTTTTTGCCACCGCAAGTGCTATCTTCGTAACGACACTTCTCTC -CAGAGCCGTCTTCGCACCAAGCCCCCAGGATATTGGGGGATGTGTCCCCGACATTCGCTT -GAGAGAATCCAATAACGGCGTCGTCAGTGAACCTCGCATCGCCCTCGACGCTGCGCTCGA -AGAGGTAAGCTGCGACACCTTTATTATCGCCGCTGACCAGACTATTGTTGTTGTACAAAG -AAGTGCCATGAACAGGAAACCATGTCAAGACAGCCGTTGTTTTGTGATCAGATTCTCGAT -CAAATCGAATGAGCGAGAGCGATTTTTCAACATTGGCAGAGTACCGTGCTCGTTCTTCCT -TGGGGTTATGATCGTATGAGAAAGGACTGCGATTGATATTGGTGTCTTGCAAGTCAATGG -ATCCAAAAGAAAGTCGACCCGGCTGCAAGTTCTCATGAGCACGAATTATTGATAGAATAG -TACCGTCGACAATGGCTTGATAGCTTTGCTTGTTGAAACCTAATGCCGGGATCTGAGGCA -GCAAGTAGTTCATCCACGCTCCAGGCCCGGAGTGCGAATGTGTCCCCGTCATTGCGACGT -TGTGTTCACCGTACCTTGAATACTTTCCACCGAGCGCATTGAGACCCTCCAGGACACCAT -GTCGCACGGCCGTGTCACCCGTAAGGGTATCGAGAACTAGGTACACGAAAGTATCGTCGG -GCTTGTTTGGGTCTGCGAAGATGAAGGCGCGAGAATACAGCCTCTGCTTTAGACCAGTGC -CTAGTTGGTCCAAACTAGCATAGCCAGCGAGGGCAAGCTCCACAACGGGTCTAGTAAATA -TATTAGCCAATTGAATACATCATATGTCCAAAATAAAGCATAAATATACCCAGTCACATC -TGCCTTGCCTGCTCCTACCAAAAAGATATCATCAGCTGTCGCCGCGTTTGCTCTGCTTTT -CCACTGGTTGAAGTTTGAACTAAAGCTAGGTAAAGGCGCTTGCGACTGGATATCGAGCTT -GAATACAGCAATCAGCTGCAAAAACACCAACGCTCCAAGAGCTACCCCAGCAAGAGTAAC -GCTCAGGCGAAAGTTAGCCATTGCTAGAACGGTTTGCCAatatatatgtatagatatata -gagttatgatatCAGCAGGAAGAACGCGCCGAGTGACTCTTTCGAAAGCCTTGGGGAGCT -GAAATCCCCTCGGGCGATATGGGTTCTTATACAAGTGCAGTGGGGAATACAGTGGGAAGA -GGAGCTACCTGCAAGTACATAAGATCATTGCCTAAGTACCCAATATTGGGAACGAAGCTG -AGCATTAGTGTGAAAAGCATATGAAGTACTTAGGTGACATTCGTCTAAGCGATGTGCAGG -GGCTTGGGATCGTCATCCATTGAATTCCGACACGCCCAAAAAGGGAATAGACTTTCACTG -CCCCCCCCCGCACGGGAGGAAGGTGCTGGTGTAACTGCACAGGTGAAGACTCAATTGCCT -TGATGTATAGCCAGTCAGAGAGCTGGGAACTTGCATTAGTCTTCAGGGACCCCGGGTAAA -AACAGTTCGAACCGGATAGGATCATCGTTCAAGTTGGGAGCATCCTAGAGAGCAATACCA -TTCGTCCAAGTATATCCCCGGACTCCCAGACCCTGGTTTCTGCTGCAGCCACATACACAA -CGTATCCCATGACTCGATCACAGTTTGAGCGCAATCGCATTAGATAGTATATTGATTACA -TGACAAATTAGGTATACTGTTCGCTGGTTTGTATTTGCCTCGGGGACTATAAAAGAATCA -CGTAGTCTATTTGAGCTGAGTCAACCGTGCTCGTGTCACCGCCTTTGTCACCGCCTTCAT -ATAACAATGATTCTCAGATTAATCTACTATTGGTCTTGACATTCATACAAGACCTTCACT -CCAAAATGCCTCTAATCAACGAATCACACGACTCGCTGCCATGTATGTACTCAAAGTGGA -AAAAATCCATGTATTAGAACCAAACTCACTAAATTTCTAAGATATCGACGCGGCGCCCAC -TGCGTCCgcgcgagcgcaagcgcagcagctcatcaacgcagagcTATCCCCGGAGCACGC -CTCCACAATGCACCCCTGGATCCCAGAAGCGCCAGAACCCAAATTCTCTCAATTCATGCA -ACAAGAACTTTCGCGCAAAGCGCAAGGAGCACCTCTCACAGGCGGAATCGATCTATCCCG -TTACGAAGCACCAGAGGCCCCGACTCGCACACCAGACACCGAGACGCCCGATCTAGTCAA -ATCGAGCCGGAGTTTACAGCAGGCATATGTGTCATGCTCATACTTGTCCGAGCGCAACAA -GAGCCTGGGTCTGCTGGAAGAGTATGGCAAGAACGCTTGGCTGGTTAGTAATTCGCAGCT -TGAGGAAATTCTCGGTTCTCTTGAGAAGGAGTTGGCAGAAACGAAGGAGGCTTCTGAGGA -GGTGAATAAGCAGAGAAAAATTGCACAGGAGGCGAGCCAAGGTGAGCTAGTCAGCCTGGA -AGAGACGTGGAAGCGTGGCCCTGGGGCGATTCTTAATGTTGAGTTGGCATCTGAGGGTCT -GCGCAGACAGAATCTTGACTATAGGCGCCAAATGGCTCAGCAGCAGGCTCGCTAACGGTG -AAATCTCAATCTTGTTTGTGTACTATCTTTCTGAGAAATGAAAGCTCGTCATTCTAGCGA -CCTTCTGCTAGCTGAGGGATTCATGTTCTTGGTCGCACAGTCTGTTTGGTGGAACGCTTG -CATGGTGGGGTGCCATATGTGCGACTTGAGGGTGCCCTTGTTCCTGCATTCGTGAACATT -TTGGGAAACGGCCTATGTGTGTTGCTCTTACTTTTGCGCCGAGGACTTATTCTCGCCTAC -ACGGTTTTTGCGAACCGTGGGCTCTTCTGCCATTTCCCAAAACTACTGGCTTTGAGGTTG -CAACTTCCTATGGGGATCGATCCTCATACTAAGCTCCAACAAAATACAGCCTTTGCGCCC -GAATGAAGATTGGGAAGGAATGAGACCCTGATATGGCCGGGTTCTGCTGGACGGCTCCGG -CTATCCCAAGTTAGAAGACTGAATGAATGATATCTTCTACCAGATCCCTTCTCTCAAAAT -ATAATTCCATTTCACGCCGTCACGTGTTTCTATTTAGAAACCCCCCAGAGCAAGTAAAGA -CTAAGTTGCTTGCCAAATTTTGCTCATGAACTCCCCCACCCCCGGCGACTGCTCAAATCA -AGTAGACCGGTCAAATACAAACACTCACAACACCCTCGGCGCAACCACACCAACACCATC -TGAAACTTTCACGGGACCCAAAGTCATCTTCTTCCCCTCAGCCTCTGTATTCCAATCTAC -AAGCCGCCGACACAGGCCATCAAGAGTGTTCAGAGGCGGGTTATACCCTAACCCACCCAT -CTCAGGCGCAAGCTTTGCACGCGAATCATCAGCAAAGACATGAACATTGACAATAGAGAA -CACTGAGGGCTGAAGCTGCGCAAGATCCCCCGACAAAACCGGTAACCAAGTGAGATATTT -GTACTGCAAAAAAGCATACATCTCAAGGAAATACGCCAATAACAACAATGGCGCGGGTGG -CACCTCCGGAAACAAAACTGGCGTTTTGGACAACGTCGTGAGCAGTGTGTACAAATCACC -AAAAGCGATCGCCGGGTTGGGGTCGCTGACGATGAATGCCTGGCCACCTGTATTCGGAAG -CTGCGAGCCAGGTTTGCTCTGCTGGATCAAACGCTGTTCGTACAAGAGGTGTGCTAGCGA -GACGTTCTCGGCGTGGACGAAACTCTGGAGGATCGGGCGCGCCCAGGTCGGACTGCCTCC -GTTGCGGAGGTACAGGCCCGTGATGACATTGTTGGCTTGGCCGCCTGTGCCATATACGCC -GTTTGTAGGGCGGATGCAGCCGGTGCGGAAGTTTGTTTCTGGGTCATCTGCTGCGCGGAC -GAGGCGCTCCGCTTCGGTTTTGGTTACGGCATAGTTGCCAAAAAATTGGTAGTGGGATTC -CGGTATTTTGGTGTCGTCGCTGATCACTTGGACTACGCGCTTGGGTTGAGTCTCCCACGG -CGCGATCCAGAAAGTTGGCTGGTGCAGGGCGATGGAGCCTGAAGATGTCCAGATGAAGGC -TGTGGCGCCGGCGGCTCTTGCGGCACTCAGGACGTTCCGTGTGCCGTTGATGTTAACTTT -GCTGCTGAGTGGCAGGAAACTCTTGAATCGGTCTTGGGGTCGGATCATGGCTGCAGTGTG -GTAGACTGTTAGGGATAAGCTGGTGACACTGGCAGCCCATGGCTTTTCGAAAGCTGTTGT -TACTGCGAGCTTGTCGGTGATGTTTGTTTTTACCCAGTCCACGCCTTTGTTGAGGATGTC -CTGCTCTGGGGAGATTAGGTCTAGAATGCGAATGGCTTTGGGGTCTTCGCCCCGTGCGAG -GAGATGCGAGACTATCCAAGCGCCAACCAGACCTATAGTTTGGCGAGTTAGTAGATGTAT -ATGAAGAGCGACTTGTATCTATACGTACCAGTTCCGCCTACGACCACATATCGTCGGGAT -TGTTTTGGAGGCAGGCTCTTGGTCACGTCGATGGGGGTCTCCAAGCTCTTTTCATAAGCT -GCTTTGATTTCTTCGATTGTCCAGCGATGAGGTGATAGTTTCTGTGCCTCTTCGGGGACT -ACTTGCATAGCAGAGTTAACACGCCACATGTAGAGAGCGAACAATCCAGCCAGGGCGGCT -ACTATAAGCAACACCATAATAATTGGATAGTTACAGGATATTTCTTCCAAGGTTGTGATT -TCAGGGCTTTAAAGGTTACAAAGAAGTACAAAAATTCATGCTGTTTGAAAGTCTAGATTT -CAAGCAGCTTGTTTCAGCCAGGATGAAACATATCTGATCAGATCTCGGCAGATCGCTTAG -ACTTGAGTTCGCTTCGGCTAAGCGAGCCTGAGCTCTGTGGCCCCTGAGCTTTGTGGCCAG -TGAGAGCATGTCAGCAAACAGTCAAATTGACAAACAAAATTTGTTTGCCAGGAGAAATAT -GAAGAAATTCTAGGTTAGAAGTACAAAATAGGGCTGGGTTTCTTAAACCTGAGGATGCCT -TGTGTGCATGTATGAGGTGTCACATTGATCGGGGATTTCTTCGTCGAATACCTACGCAAC -AGGATTGGAACACTCAGCTGTTGCCTAGTGGAATATTTGGAGTAAGGAATAAAGAATCGA -AGAAATTTATCATTTTCAACTTTCCCTGGAATGTTGTAAAGCCCTTTACATAAACCCTTC -AGATAAACCCACATCAAATGTACATGAACTCAGATATAAACATGCCAAGCACCTAACACC -AAATACAATACCACTCACAGCTTGGGCCTAGAGTATTCGGCAAACCGGCACACTAGCCAG -GCTGAATTCTTCCACATGTTTTCGGGCCTAAGCCGTGTGGAGAGTTGGGTTGGAAAACAC -AACTTCTGTTTTATGGACCTCTTGCCCATGGGGAAACCATGCCAAGGGTTTGTAGGGAAT -ACATATTTGATACTCTGTATACGTAGCTTCACACAAGTGGTGTCCTTTCTTTTTGCTCCG -GCAGGTCTGGTTagccattcaagccattcaagacctccaagcccccgaagcccttcaagc -ccttcaggcccttcaggccctGTAAGCCTAGCTCTCGTCGAAGTATGCTTATCACAGGCT -TTCGGCCCCACCTTGAGAAATTCCGAGTTTCGTGATTTCGATGCCTTCGATTTGTAATTT -ATATCAGTTTGCAGGACTTGTTGATACTTAAACCTATCGGGTTCGGGATTTGGGCATCCA -TTTGCAGAAGATATCTCTACAAAATCATAGAGAACGACACACTTTATTACATTACTACAG -TCTAATCTTGAAGCTGGAACTATATTCTCTCTTACTGCCAGTTCCCCTATTTTCTTCATT -ACAGCCCCTCACACTGGAAGAGCTTGCTCACTTGATCTCCGGTACACACCCAAATCAGCC -TCCCCGAGCAGTGGTGCACTCTTCCCAAGGCAGTTCAAGCTTGTGGGCATGGCAACTCAA -TGCCCTAATTCGGGGGGCCAAGATAAATGGCCCGAAAACACTGAGAGGGTGTCCCAAGAC -ACAAACTCCATCGAATCCATTTCCACGGTAGACACACAGACCGATCGCGATGCACTAGGA -AGGACATTGACAGCTCGCGCCTCGCGAACCTCGCACATGTCATTGTCGGAGCGAGTAACG -ACAATCGCAACCAATGCCACTGCCGACCCCGATTATGAGGTTGATTGGAATGGTGACGAT -GACCCCGAGAACCCCAAAAACTGGACCCTCAAGTATAAAGCTATGAGTATCTTGTTTCTG -TCATGGAACACTCTTATCGTGTAAGTTTATATAATCAAGTCATAAAACACGAATCTCTCT -GGCTAACACACTCGGAAGCGTGCTATATTCAACATCTTATACATCAGGACTTACGTTGAT -CGCCGAAGAATTTGGCGAGTCCGAGACTATTGTCACCCTCGGCCTGACCTTCTATCTGTT -TGGTCTTGCCATCGGATCCATGTTCATGGCCCCGTTGAGCGAGGTCTACGGACGGAAACC -TGTCTGTGTAGTTTGTCTAGCTGTGTTTACAGTCCTGATCATCCCCTGCGCGCTCGCAAA -GTCCGTTACGGCACTGATAGTCATTCGCTTTATTGCTGCTTTCTTTGGCAGTGTCATGAT -CTCTACCGCACCCGGCATGGTGGCTGACTTGGTGAACGATGAGCACCGTGCTTTGGCGAT -CTCTGTCTGGAGTATTGGACCACTCAATGGACCAGGTTTGTGCTTGCTTTCCTTTTCCTT -CTGATATTCCATTTCTAACGGGATAAAGTCATCGGCCCTGTTATCGGAGGCTTCGTGACT -CAATATCTTGGCTGGCGCTGGATGTGCTGGATCGCCCTGATGCTCTCCGCTGTTGCTTTG -GTCTTTGCTGTCATTCTCAAAGAAACCTACGCACCAACCCTTCTCCAAAAGAAAGCAGCC -AAGCTCCGCAAGGAGAGCGGCGAGTCCCGCTGGTGGAGTCGCTATGACCAAAAGGCAAGC -CTTCCCGAGGTCCTCAAGCTGAACCTCAGCCGGCCATTTGTAATGGCAGTCACGGAGCCA -ATCTGGTAGGTTCCTTCCACCTATACATTTCACATCCACAAGACATGGAACTGACACTCT -CACCAGCATCTTCTGGAACATCTACATCGCAATAATCTACGGAATCCTGTACCTATGCTT -CGTAGCCTACCCAATCGTCTTTCGAGGCATTCGCGGATGGCAACTGGGCGTGTCCGGACT -CGCCTTCCTAGGCATTGGGATCGGCGTCCTCATCACAATAGCCTGTGAGCCCTTAGTCCG -ACGGCTAATCAACAGCCACGCCAAGGACCCCGAGACCGGTAAACCACACCCAGAAGCAAT -GGTTTCGTTCGTCTGCATCTGTGCAGCCATGATCCCAGTCGGCGAGCTCTGGTTCGCATG -GACCTGCTCTCCAGCCTCAATCCCCTGGATCGTGCCCCTTCTTGCGGGTATTCCGTTCGG -TGCAGGAAATACAGGTGTCTTCATCTATGCATCTAACTATCTGACTCATAGCTACGGCAT -GTATGCTGCTTCTGCTTTGGCGGGAAACTCGGTGATTCGGAGTATCCTTGGTGGTGTGCT -GCCACTTGCTGGCTCTTCTATGTATGACAGTTTGGGGCCAAACTGGGCAGGCACCCTTCT -GGGGGTATTGGAGGTGGTTATTGTGCCCATTCCGTTCGTATTTTATAGGTATGGACATAA -GATCCGCATGAAAAGTCCCCTCATTCTGCGCATGCAGGAAGAGAAAATGAAGCTGGAAGG -AAAGCGGGCTAGGAGGCAACTCCAGCTTCAGCAGGCAAATCAGTCGGATGAAAAGGCAGA -AGAGGCGGTTTAGGCTTGTTTTTTAATGATTTTTTTGTCCGATGATTTGATTGGGATACC -CCTCGCCTTGTAGTTGAGGTATTAAAAAATCAGTTTAGCAAGTATCTAATGCAGAAGCAG -TCCATTAATGCGTGGGTGGCTTTGAATTCCATGAAGTCGTTTGAGCCATTACATATGTTT -TCTCAGTGGGTAAACTGTATCATCTGCTCGACACGCTGAGAATTGCAAAGAATACGCTCA -TACTCCCCGATGGCAATGGCTCCCGCCGGGAAGACCATGACTGCCTCGACATACACCGGG -CGAGGGGGCACCGTAGAAGACCTCTGACTTGGCCGATGCGCGTCGTCGGGAGTCGAATCG -AATGAAGATGGTTGTGGCACCGGCTCCGAGTCTCTGACATAGCCCAAGATATTCAGCCAT -GTCCCTACGCGCAGTTCTTCAGCGGTCAGGTCTTCAAGGAGGGCATTGATATCGACAGAT -ACAGAGCTTGGTGCTATCTTGCTGCGTGGATAGTTGTGTTCCAGTATCAAATGGCCGGTG -GAGATGGTATAACTTGTTACACTGTGTATACACAGGAACGTAGTTCATCGTCAGTATTGG -AATGCCTCGACTTTATACTATTTTCTTTCCTGCTCTGAGCCTGACGCACCAGCCAAGAAA -GCGGATCTTCGAGTCCGCCTGTAGAGATGGGAGATCAGAGAGGAACACGCGAGTTGAGGG -GCGCGGCCCATTCATCCCTGCGCTAGTCGAAGAAGATGCTCCGCATTGTAATTATGCGAG -TATCTTTTCTAAAAAAGAAGGCAGACCAAAATGTCACAAACAGAACGGTCTTGCTGGCGC -ACGTGACTGACCTTACCGTGTATGTCATAGGCTCAGCGAGGGGAAAGTCGGGCTCCCTCC -CCTCCACTTTACATTCATCCCGACATTATCGTTGGAAGCGCTCTCAACGAGTCGTGTGTT -CAAGACTGGCCATTCAGAGCTTATTTTCGCGCACTTCCCGTACTTCGCCTGTCAGCTCAC -TGACATTGCCTGGAGCCTATCGCTCCACCTGACTTGCAGCAAAGAAAACAACCAATTTAC -TCTCCACTCCTCACTCACTCCGGATTTTCAGCTGCTGCCTGGTCTCCTGAGGGCATATTA -CCTGATCAAAGGAGCTTATTCTGCTTATTTACTGTGTCCGGTTCCCAGTTACACAGACAC -CGACATCACCATGTGGTCTTGGTTTGGCGGGGCCGCTGCTCAGAAGCGGAAGGATGCGCC -CAAGGACGCTATTCTACAGCTGCGCGAGCAGCTGGGTTTGCTACAAAAGCGCGAGAAACA -TCTCGAGACCCAGATCTCAGAGCAGGATGCTATGGCTCGGAAGCACGTTAATACAGATAA -GAATGGTACGATTGCGAATCTACAAGATTATTGCTGAGCATCTATTTGAGAGTTGTGAGT -GGTAGCTAACTATGTGTTCCTCTACTATTGTATTGAACAGCCGCCAAAAATGCCCTCCGA -CGGAAAAAGGTCCACGAAAAGAATCTCGAACAGACAACAGCTCAGATTATGCAACTCGAG -CAGCAGGTATATTCCATCGAAGCCGCCAATATCAACCACGAAACCTTGGCTGCTATGGTG -CGGGCCGGCGACGCTATGAAGCATATCCACGGTAAAATGAAGCTCGAGGATGTCGACAAA -ACGATGTACGTTGCTGCGGGGACCGAGTCTATCTCTACTATTACCTTTATCGTGGTCCTT -GTTATTTATGTGACGGCCATATACTGATAATTCGCAGGGAGGAACTTCAAGATCAGCATG -CGCTCAGCACCGAGATCGGAAACGCCATTACCGGCTTCCCTATCGGCGAACAGCCCGACG -AGGACGAGCTCGAAAGTGAATTGGAGGGCCTGGAGCAGGAAGCCATGGATGCAAAGATGC -TCCACACTGGGACCGTACCGGTGGGCAGTCAACTGGACCGGTTACCAGCTGCGGGGAATA -CAGATCGTAAGAACAGATCTTACATCACATATCGTTCGCTAACTTGTTGATAGTCAAACA -CCCTGCCAAGGCAGAGGAAGACGACGACGAGGCCGAGCTGGCAAAGTTGCGCGCGGAGAT -GGCAATGTGACGGCCTCTTCCTTTCAGTCACGCGACCAATGCAGCATTTTTGTTGAAATC -TTACCCTTTTTCTCTTCCTTGTTCCTCTACTTACCTTTGCATGCATGACCCGGCCGTCTG -ATCAAGGCATTTACTTCCATTCGCTCTCGCTGACGCCCCTCTCACCCTTCTTTATTACCT -CTATCTCTTCTGAGGTTGTTTCTACCATCCCCTCCATATGATCCGACGCCACCCAGGTAT -TTTCGAAGTCTTACCTTGCCCCGATCTGTCTTGTTTTCTGTTACTTTTTCCCCCTATCTG -GCTGTTCCACTCCTCAACACCCTTCACAAGTCGACTCCATCGATCTATTGGGTTCATGTT -TATTTTTGCTTTTTTTTTTGAAATGGAGTTTTAGATGACTTCAGTTGTTCTTTTATTATT -TCGTGTAAGCCAAGCTATTAATTTTGAAATTTCAGCTTGGGATATTCCCGTTGATACATA -GAAAAAGTCCTCGGTCCGTGCCTTCGTATGTTGTACATCTTCGAGTCTACTCTGTGCTAT -TATTTCCATACCTACTCATTGACCCACCTTGAAAATGATGCTTCTAACTAAAAATACGGA -AAAGTTCATCTATAAAATACATCTGAAAGACTAGCATCTTATATATAACTTCTTGAATAT -CATCTTGAGGCAGATGAATCACATTATTCAGTCTTACGTCGCCAGAGATATAGGCCAAGA -AGAGTACAAGTTCTGGTAGACAAGAAAAGGCGCTTCATTATTATCTGAAACTCCATACTA -AAAGCCGTCATATGGAGACCCAGCTTCGAAGAGCGAAAAAGAATAAAAACCCAGTTGATA -CATCATGGAGATAGGCAAAATCTATCCGAGGAATATGTCGAGAAAGTGAAAGTAGACCCA -GCAGAAATACAAGGGACAAAAAAAACTCCAGCGGTTTTCGCCTGCCATATCCAGCCACCC -AGCCCATGCTAACCCATAAATTTCGCCCGACGCGCAAGAATCTCAATAAAAAAAATCCCC -CAGTTTCAGTATTTATGGGTACCAGGCATAGCTCTTGGTTGGCTGAGTACGCTCGCCGTC -CTTGGTCTCTGTGGTCCATTTAGACTTCCATTCAACCTCCTGTGCCGCAGCATCAATGAG -AGCTTCCTTGCACTCGGGCGGGAGCAACTCGAGGATCTCTGGAGACATGAACTCGGGATG -CTCGGGGTTGGCGACGCTGACTAGGCCGTTGGGGCCAATATCATGCACATCACCGTCAGG -GCCGGGGATGCCCATGTTTGAATATGGCGGGGATTCATAGACGATGTCCTGGATGACATA -GCGGTCGTGGAGATAAGCGGGAACAGTGGAAAAGGTAGTGGTCGTTTTTTCGGTGGAGGG -TTCGCCAGTGGCAGCGTCGTTCGCTTCTTGGGTTTTGTGGGTCTGATTCTCATGATCGGA -AGGAGAACCCCCATTAGAGAGGTTCAATGAGTTCATATTACTCGTCAACTCGGCTGGGGC -GGGCGCCGGAGCTGGAGAAACGCGCTCCCAGCGGGCGTGGGTTGGCTGCATGATCTTCGG -GTGCTGCATAATATTTGTGTGTACATCGTACACACCACCCAGATTCGCACGGCGCATTTC -GGCAAGCTTCGAATTAAAGTTGCTATAATGTATTAGTCGATAGTTCCAATTGAGTTTCTC -ACTACCAGATAGAGACTCACCTTGCAGCACGCGCATGCTCAAGCATCCAGTTGTCATCGG -TGACTGGTACACGTCGTTTCTTGGTGTCAACTGGCTTTCCGCCAGCAAGCGGGACCGCGG -GCGCGTTTGTGTGATACACACTGCTTGCGCCATGCCAGGCGACGTACTGGTTTCTGTTGT -AAGTTTCTCCAGGTCCAGGCAATCGGTCTTCCGGAACGGCCAGCTCGCCCTCCACGTCGC -CACGCTCGCGAGCAGCAGCCTCGTCATAATCGTCGATGATCTTCTTGCCGCCAACGATAA -TCTTTGCACCGAACTCACGGAACACTGATCGTGCGGTTACAACACCAATGGCTCGTCCCT -TGTAAGAGTGAGGGATAAGATCGCGTTCGATTAAATCACGCTTGGCGTCATCGTCAATGA -TAATCTTGTACAGCATCTTGTGCTTCTGAAAGAATAAGTACGAGTCACGGAACCCAATAC -AACGGGCCGGTTCGGTCGACAACATGTAGAGGCGGTCTCCGCGACCAAGGATCTTGAAGG -TACGGACTCGATACTCGCGTCCGCCTTGCAGATGACCATTCTCGTCGACCTTAGTAGCGC -CGATTGGGTCGGGCGGAATCGACACTTCGTCATTGATTACATCCATCATGTTTCCTTCCT -TGTCGACGGGCACCTGGGTGAGATGGGAGGGGCCACGATTCCGTGCCCAACGTCCACCGC -TAGCAGCAGGCCGACCTCGGCGTCTTTTGACTGGAGTGGTGACTCGGAGTGGTGGGTCGC -CGTCGGCCGGATCCCATTCAGGCCGAGGATTCTTGGGTGGGCGACCAATGCGACGCTTGC -CACGAGGCGGAAGTGATGGGCGGCCAGGTCCGTAGGAAGGTGTGTTGTCTGTGTCGGGGA -GAACTGGATCTTCTGCGGGCGTTGCAATACTCAATTTTTCCTCTGGTTTGGGTGTGGCAG -ATATTTCAGGAGCCTGAGCTTCTGCGCTttcagctgcaggttcggcttcaggttcaactt -caacttcaggttcaggttcacgctcaagttcaggttcagcCGCGAGTTCGACGTTGGGAT -CTTTCTCTGGGGAGTCTGCTGATTCAGCTATGGGCGAAGAGGGGTGAGATGAAGGAGCGT -CGACCATCTCCTCATCAGACCCATCTGGAATCGAAGGTACATTTTCTAAAGCTGGTTAAT -CGCAAACGCGTCGTAAGGGGAGGGTGAAGTTCAATGCATGGTAGAAGATACGGGTGCTTG -TGGCAGGTGCGGATCAATGCGGGGTGGAGATCGACTCACTTATCGACTTTGCAGCCGCCT -GGGCGGCCGCCCTGAGCTTTCGTGGCATTTTGTCGATGGTTGGTCGATATGCTGGGGAAG -CGCAAATGGGGAGGAATAGCTGTTTTGCGAAGGGCCGAGGTTTTTCCGAGTGCTCCACAT -GCTGCAAACGCGGCAAACAGCCAAATGGTGCAAACAAGGCAAACTGGTGGATATGATGAC -GTCATACGATTTCTCTTACTCCAACGGAGCATTTCGGGCTGCACACCTGCTTGCTAGGCC -GTTGATCGGCGAACATTCTGCTTCTGACTGTTTTTCGATATTCGTTCCTTTCCAATTCTA -GACCCTCAACCGCCATTGCCGTTTCAAAATACCTATATCAAATTGATATCATAGGTTATC -TGCCTAGGACAGCATTCTGGGCGTGTTACCGCCTAGTATGTCAGTATGAGAGTGCGGGGA -TCCGATGCTCAATATGCGTTCATCTATGGGATCTACACCTTAACAAACCCCGTAAATACT -CACCGGTCATTCCATGCACCGTTCTTTCCCGAATCAGATTATCATCATGGAGTGTCAATT -GCATCGAACATAGGGACGACTACTCCGTACATCGAAACGCATAGGTTAATTTTATGTGTA -ACCAATGACCTCATTTTCTGCCCCTATGTGTAGTCACAATCCCCAATATCACATGTGAGA -GATAGCTCCTGACTCCAACCCACCAGTGGCTTGAGCAGACCATATCCCGGGTCAGTAGCA -TAGTCACTTAGGACCAAAGGGCATGGTTCAATCATGGTAGCAAAACACCGCTTGGACCAG -GTTATCATGAACATCAAGATTGGCTTTACTAGTAGCGTCACCCCCGCAAGAGTTCTACTG -TAAGACGTTACAAGCCATATCTCCTCTTTCGGTCCATCAGCCGCGGAAACTTTGAATAGT -CAATATACAGAACGCCTCCGGAAATATACTCGTTCTGATCAATTATACTCTCAGCCCACA -GCAGGGAGCTTGAGAAACATGCTGGGTAGAAATCGGCTTACTTTGTTGTACGGAGTACAA -GCCCGCAGATCATATCCCAGTGCAGATGACCTACAAATTCAATCCAAACTCGGACTCGGT -CTTGTGCATAAACGGAATATGCACATTGAAATCATGCAGGTTTTGACTGCACATATGTGT -ACAGTATAGGTTGTATACCTCCAAAGATGGTCGAGTTGCCTAAAATTCGAGCATCACGCA -TGAAATCATGGCAAATACGAATTAATACGAACTTTCAATGGTGCAAATCCAAGGTATGAG -GAGATAAGATATTAACCTTGAACCAATGTCCTGGATCTTTATTTCCATAGTATGTTATTC -GTCAAATATGAACCCCCAAGGGAGGTTGAGGAAGGTGCCGATCACCTCTTTATTTGCCCT -CCCCTCCAACCCCAAGACCTATTTGAGGGCGTGAATACTCCAAGCTGAGGGGCCAAACCG -GAGATCGGCAATAGCTCTTAACGTCACTTCTCCTACCAATATATATCGCAGTAAGCTCAG -TTCTGTGGCACGTACATCATTCACGCGGGTCTTAAATCCGCCGAGCCACATGTTAGCGCA -GAGGGACCTCACGGCCCCGATGACGCATGTGGAGGGCAGCCTTGAAGCACCCACTGCGAT -CACAGCATATCTCACATTTCACCTGGTATGCAGATGCAAGGGGAGCTGAGAGCTAGACCT -GGGCCATATTGGAAGCTGAAAGGCTGTCACGATTTCGCGATCGAGACTCGATCTATGGGT -CTCACAGTTGTCATTATTTTAGGCGAAAGGTGCCCATGTCTTGATTACATCGAAGATTTA -TTACCACTTCAAGTCATATATCTACTAGGACACGGAAACCATCCGAATATAAGAAACATT -GCATACCTAGGTAATCCAATGTATAAAATGAGCTGAGAAGTGGAGATAAAGATTTGGAGA -TAAGAAAAAATCGCCAGAGAAAAAAAGACGCACATCTCTGAAAAGGTTCACATACTGGAG -CCTAATTAGTGACCGTGACCTGGAAATCTAATAATCTTTATATAGAAATACCAGAAGGGC -TGATATCTAGGGCTATTGAACATTATCACCTCATTTTCTCCACATGACGTTGGGTCTTTT -TTCTTTTTTTTTTTATCTTGGGACTTTTTGATCTGATCTCTGATTTCTAATAATTCATGA -CTTTTGATGTATTAGGAAAGTAAGTACGTACAATATAACTAATTAGATGACTTGATTACC -TTACAAGAATGCATCAATGCCCTCTTCGTTTTTCTCTCCCAGTCTTTTTCCCAACCTCAG -CTTCAGTAATATCTGATGTCTTATATATGATATAATCTTAGGCTACCTACCTAAGGTATA -TGTCCAGACTTAATAATACCCGAGAAATATGGACTATCAGTGAACCTTTTTTTTTTATCT -GGACCTTTTTTTTTACCGAAGAAAATTCCACTTCTAAATCCCAAAAGAAATCTGTCGACC -TTTTTTTCTTTAAAAAAAACCCGAATTTCTTGGGGAATATCCAAACGGGTTCAAATCCAT -CCCGCAAACTCCCTTAGCTCATCAAATCCTTTTTTTTCCCAGGGAATTTCAACCCCCCCC -ACTCTCCGTTCCTCTCTCTCCTCATTTTCCGACTTTTATTGAATGGAAACACAATGGCTG -CGCAGAGTGGACCCGTGTTGCACGGCGACGACATGAAGACAATGAAGTGAGTCAACTTCT -TTTCTTTACAGCGGGGTTGCAGAGGGAAAAAGAGGGTCCACGATCCCCTCACACCGCATT -GCATTACACTTGTTCTTGGGGTTGAACCTGCATGAAATCTGTTTCGAGGATGTGTTTCTC -TCTGCCTTTGATATCGGAGCTTGTCGGAATTTGCCTACAACCTTTGAATGGAAGATTTTT -ATCATTTGACTAATCTTTCATCTCAATAGCTCCGGATTGGAAGACGGTACTCCAGACCAG -ATCGGGTCAAATGGTTCTACCCCCACGCCGACTGGAATCGCAACACCTGAGCCTGATCCG -GCCGACAAGCGTCTGCCTTCCATTATGCACAACTACTTCCAGGTTGGTACTTTCTCCGGT -GATAAAGCGAGTCTACCGCGGTTATGGTCGTGTCTGTCGAAGCCCTCTGAGAACCCTCAA -TCAAATGCGCATCCCCAACCTTCCTCCGATTCCGAGTCTTTTGTCATGATGGAGCAGGAA -GATGGGTCTGATAAGATGGTTGAACACCCCAATCTCCCCACCCCTCCACACTCCTTGTTG -CAGCACGAGTCGGATGAGATGGAACTCGGGACGAGTCCAGGAGTGTCCTCTATCTTTACA -ACGCTGAAGAATTACCTCATTTCACCCACACAAACTCCTCCAGAAGACCCACCTTCCCGC -CGTCAAACATCCCTCCCAGTATCCAGTGTCTCTGATGACCCTGTTCTTGCCATACATTTC -TCCAATCCATCCCTCCCTCCCGTTTCAGATGCCTTTTCCCTGACTGAAGCTCCCCTCCTC -GACCATGAGAAACCGCATATCTCAGTATCTTCCGAGAACCTGGCCAAGCTAACTGAACAC -GCGCCCAATGGGTCGCGTCCTAAGAATACCCCACCTCTTACTCCCCGGGCCATGTCCAAC -GAGATCCCAGCCACCCAGGAAAAGACCGCCACCTCGGCCCCGCAAGAAAGCTCGCAAGAG -GCTCCACAGTCTAAGTCTAAGACGGACATCGAAAGCTCCACGGACGAGATCACCATGAAA -CTCGATGAAGCATTCCCATCCCAGTCCCAGGCGGAGATTGCGCCTACACCTGCAGCTGCA -ACATCAACATCCCCGGCTAATGGCGCTCCCACAGGCCCGATCAATGGCAAGTTGTTTGTG -AAGATCACGGATGGTCGAGGATTGCGGCCAAGTTTTGATCCCTATGTGGTGTGTGTATTT -GAATGGAATGAGTACATCTCCAAAGGTGCTCGTGACGGTGAGGAGGAAAAGAAACGGCGT -CAGCTGGAGTCAGATGCCGAGGAGGCTGCTGGTCGGCCTATGGCTATTCCCTTGAAGTCC -CGATCCAGCTCTCACAACAGTGCTGTCGAGGGTGATCACAAGGGGCGGACACCCGTGACC -GACCCGCACTGGAATCACGAGGCCACCTTGTATGTTACTCCAACCCAGCCTCCTTAACTT -GAATCACACGCTAACACGTCCTTTTAAGTGATGTGATGGGAGATCAGTCGGAACTTGATG -TCACAGTTTATGACCGCAACAACCAGGAAGCTTTCTTGGGTCATGTACGCCTCTGCATCA -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNGATGCCAGCCTCCACACCCTTGTCCCCCGGAATGCAAGCCAATTTCAAGG -GCTTCACCTTTGTGAATGAGAGCTCTATCGACCACCACCTTATGGACGAGAGCGACCATA -TGGAAGAAGACTCCATGCACGACGATTACTGGCAACGAGGCCATCGGTCAGGCAACTCTG -GTGACCAGCGCATGGCGGGCGTTCAGAAATCGCACGACGGCACGGAACCCGGGATTTTCA -ACGTTGATGATACCTTTGACATGTGATTACGTTTTTTTTTCTCCTACTGCATGATTATGG -AGTCTCGATCTGGCATACATTTTATTTCTTGTCTTGTCTTGTACATGTCTTCGGATTGCA -TCACTTCCAACCCGCAAATATGGTTATCCCTTTTCGTTTCCATTCCCATTTCTCTTCGCC -TTATTTTCTGATTCCTGGTTTTTTTTTCACTCATGCTGGTCTTTCTGCAGATTCTGCCCC -CACTGCCTTTTCTTGATCCTTATTAGTCTGTTTCCTGTTCTGTCGAATGCCGAGTTGAGA -TGATGATCATCCTTTGATCGTGGTCCTGGTACCAATCTTGAGCCTCGCTACTCGTCATGG -TCACAGCCCTCTCGCTCGTTTCGTCAGCCAGTTCCCAGTCACATACCCGTCCCTCCGGGT -TTCGGTCAGTTTCTTCACGTTCCGTTTCTGAGGAAACCCTGGGGGATATCTCGCGTGTTT -GGTGGAGTTGGGTTTGGGTGATGTTCATTGTGCACCCTTATTGTCGGTCATCATGTTCTG -GGGCGATTATCTAGTCAAAGCGAGCTGTCTTTGTCTTTGTCTTTTATCCTGTCTCCGGCG -CACTGTCGATAACTCTATTCTCTATCCTCTGGGATTGATGTCCTGAATTTTGATACCTGC -TCTTTTCTCTTGGATATTTTCTCCGGTCTTCTCTTCTGCCTCTATCCTTGCTTTCTCTCT -TCGCGAAGTTTCCCTTTCCAACACGGTACGGTGGTAATTTGAGGGATCTTTTCTCTCGGA -CttttcttttcttttcttttttttttCTCTTGCATATCCAGGCTTGATTGGAGCTACTCA -GATTCGGGGGACAACCGAGTCGTAACAGAGTATTTCATTCCAGTTCCATTTTCATGGCGT -TAAGACTGGCATCGGCATATTATGGGGCTCGATTTACATTGCATTGGGACAGGCGAAAAC -GCTGCTCTAGTTGAATATTAGACTCTTCCATCCAATCCATCCATTATTGCGGTGGAAATT -TTATACCCAATGCTGCTTCCCGAATTCCCTAGCATATCATGTATTGTCCTGTATAGGCTC -TTTTGATTCGATCCTTTGTACCCGAACACGGATTATGTCTACAGGGGGGTTGTCAACCAA -TAATGACACTAAATTCAGATGTTCTGACTCCTCTCCGATCCCGTCATCTGTTCAATCAGT -GCATGCTTAGAGTCTAGATCATCCATGGATGTTCGGTGGGACCGCATTACTCTTTTAAGA -AAATATTTCCTCGACACGCTCCTTCTACGTCCGTTGCTTCCTTCCGTTTTAGACCTACAC -ACGTAAATAAGAAGGCACAACCCAATTCAAGAAATATCCCATAAGACCCCCCTCAATTTT -GTTTTTAGTCGCACTCCAACGTTGAAGTACACATCCAATCCAAAATGGCCCATCCAAAAC -CAACAATCCTCCCCTCGATCTACACACAAAGAACACCACCAGAAACATTTTCAACCTCAA -CCCACGGCGATATATATTGGCACACCTTGATTTCATCCCCCAAAACAAACTCCACAGACC -TGTGCGCAGGAATAGCAACATGTCCGCCAGCGTCAGGTCACTTGTGTGCGCACCGCCACA -TGCAGTCTGAGATATATCATATCCTCGAAGGCGATGGCGTGGTGACGATTGATGGGGTCT -CAAGTAGGGTGCAGGCTGGGAGTACAGTTTTTATTCCTTCGGATGCTGAGCATGGGATTG -TGAACACTGGGACTGTGGACTTGAGGTGGTTTTATGTTTTTCCTACGGGGAGTTTTGAGG -ATATTGTTTATCGGTTTACGGGACAagaggaagaagggaagggaatggagaaggcgaagC -TGTGAGGGTATGAGGTTGTTTTATGCACGATGTTAGATCCATATGTGACCCCTATGTACA -TACACTACCAAGCAGGTGCACATTCAAGGAATACTATCTGAATCCCATCCATTTCCATCA -TCTCCTTGTCAGGGTGAAGTTGGTTCTCGTCCAGGATGTCCTCTCACGTGCTCTTGAATG -GCAGCTCATGGTGAGTCGTGACCAGAAATGATACCCAGTTCTCCTCACATGTCTTGACCA -CAATCCTTAGATCCTGCCGTTTCTTTTTAGATCAGGCACGGTGGTTTGTCTGGTCAGTCG -GCTAGCGCCACACTCGACGTGATTGACCTTCAATGCTACCGGGATAGCGCATAAGCGCTG -TGATTAGACACGATAGGATAGAGCTTGGTGTTCCACGGGCGATGCCGTTCTGAATAACTG -GATTCGTGTCTCCCGCGGGCGTGGTGATACTTTCAGGGCCAAATTCTTCCTTATTCGCAA -AATTGTAGTACAGTTCATTCAAATCGCTATCTTACATAACAATAAGCTAGAGCCGATTGC -TTGGCTTGCCCTGTCATCCTTCTGTCCGCCCTACGTCTGGATCCAGAAAACCCTCGATTG -ATTATTCAAGGCCCGGTGCTCAAGAAAACGACGCTCTTTCTTTAAAAGGGAAAATTGGTA -TCCCAGGTCGCCTTTTAATGATCGAATCTAGTGGTCTCTTTTACATTTGCGCTAAAAGCA -GCCACATCCACCTGGGTGGTTCCGCTTCACTCAGAAGGCATATGTGCCTCATATCTAGGT -CATTAGAGTGAGATCCTTGAAGCCTCAACGTGTAGATGAATTTATTTCATAGGAATGGAA -TGGTTGAGTTGTATGTCCTGCAGTGCTGCGCTGGGTTCAGACAGATTTTGTGCAGTCTAT -TTTATCTTATTTTTTGGCAATCAAACTCGACAGGCCTGTCTTGGTGATCAGGAAGGATGT -TTCCAGGAACTCCCTCTTTTAGTATAGTGATGGTCATGCTGGTTCCCGTCTGGATATTGA -GTGCAGGTACGTGGACAGGTGGACCTCCCCGATGGAATTTGAGACTCGAGGAACGCCCCC -AAAGCGGCACTTTCAAGCCCTAGTCGTCAGCATAACAAGATCCAATGGCGCAATTGGCCG -CAGTACTGCAAATGGACCCTCAGTGCGTCCCCACCCGATCGGGGTTTTAGCTGTGATGCT -CTTCCACGTTCCGATCTCTTGGAGTGGATTGAGATATACCCGTGGGTTCCGATGGTGGCT -GCCCTTACTCTGTATATCTCGCAATGTTAAGGGGGATGATGTGGTCGGACTACTCCGTAG -TACGCGGTGAGTCTGTCGTATGGAGTAGTCTATTCGCCGATCTTGGTATCTTTAATCTGG -GAACGTGTTGCTAGGGACTTGCGTCCACTCGAGTCATGTTCTGTAAAGTTTATTATGCAA -TAGGTACACTTCCGACTGAATGATACTATAGGCTGTGGTGTTCAGAACGGAGAAGGGTGA -TATTGTCAATAATAGTAATATAGATGTCGATCTTTTGATGCCATGCAGCGACGCGCAGAC -CACTGTACTTTGTCATCATTTCCAAATCAAATGTAAGAGAACCGTTCCCCAGGGCTGAGC -CGTTCAATTGCCTAATTCATTGGCGTGCAGGATGATATGCGCATGGGCCGTTAGGATATA -AAAGGGGACCTCTAGCGAGTCTATAGTATAGACTACTCCGTGTAGACTACTCTAATATGA -AACACCTATCAATCTAAATCACCTTCAAAAAGAGAGATAATAATATATATAAGACTTAGT -ATAAAAAAAAGCAAAGAGAAAAGACCAAAAAAGAAAAGGGGTAAAACACCCACCAGGTTA -AACCTAAATGGGCAATAGCGTCGTCAGGACGAACCCTTGACACTCGGATGCTTTTTGATA -TTCATCTTCCTTGGATGAATTGGTAGGGATGTTGGAGGGTATCTTGATCAACAATGGAAA -TAGCTACAAGTCCTTTACGCAGACCATATACAGAAGGATACGATTCTCTCCAGAGATACG -TTATAGAAAGAAAATTCTAGGCAACCCCTGAACCGCCCCTTCCCGTCTCCGCTCTTCAAA -GCATGCACGGCTTACAGTAAAAAGGTCCACGACCTTCAAATCATTCCTGATGTGGGGGAG -ATCGACTGCCATTGTCTTTCCTTTTTTTCCCTTTTCTCCCTTTTCCAACGTGCTAGTTCT -AGTATCATCTTCTTAGATATCCTTCCTATTCCTGCGCTTTGCTTTCTTCTCACCTTTTCC -TATCGTGAAGGTCTCACAGGGTGACTACTACTACTTGATACAGTGGAATCTGCCGATCTC -CATATACTACGCCATTTTCCGAATATCCTACATTCCCGAACCCCTCCCTATTGGGAATTG -ACTTTGCTTTTTCTTGCCTCGCGGTTGCTCTGATGCCAGAAGCAACTTCACAATGAGTGT -TGTCGCCACTAGAACAGAGACCCCTGCGGTATCTCAGAATTCAATCCCTGAAAATCATGT -CGACTTTTCTGTCCATTCAAATGCCTCCCGAACCTCCTCGATATCATCTTACAGAACCAA -TTATAGCGCCTCAACTGCGCCCACCACATATTCTCCATCTTCCCCATCCTCATCTTACCG -ACAATCGGACTCGGTCAATTCTTTCACCAAATTATCCGAACCCATTCAACGCCGAGTACC -CCACGAGGTCTACGAAGTGATAATCCGCCATCTAGAGGAGTTACACAAAGGGCAGCATCA -AACAGGATGTACCACGTGTTTTCAGCGCGACTTACACGCCTTATCCTTGACCTGTCGGTC -GTGGGAAAAGGCGGTTAGAGGTTCTCTGTAAGTATTGAGTCCAGCCTCCCCTTGATGAAT -TCGAGGGTAGCAGGAAAATCATGGTTTGCTGATAAAACATGCCATACTAGATACCACAGT -ATACACATTGTCGGAAATGATTCCCCGGCCCAACTCAAGAAATATCGTCTGAAGAGAGGG -AGTCGCCTGAAGCTCCTGAGACGAACCCTCCGTGAACGAAAGTTGCTGGCTAACCTTGTA -TACGAACTACGGGTGCCCCAATTAGATTTGCTCTTTACCACGACGAAACAAGGTACGCAA -TGGCAGGAGTATCGAGACCTGGTAGCGTCTGTGGTTATGGTGTGCCCAAATTTGGAGCGC -CTGTTAGGCTTGTCTATCCCATACCACCATGAATTCGATCGTCTGACCCATGCTTTGTCG -ACCCGCAAAAAGCTCAAAGAGCACACGTGGGTTTTGGGCGAGGCTGCACAAGCATCGGAG -CTGTCACCACGCGACGACTCCTGTCCTGGGAGCCTTGGTCCCTCCCAGATGTTCGAGTTT -CTGAATTACCACACCTCGTGGACAAGTCTGGAGACACTAATGCTTTACGGGTTGAACGGA -AATGGCGCATTGGAGCCTAGCATCTCCCTGCGCATATTCAATCTCCTTCCCTCGCTGCGC -AACCTTTGCATAACCTCCTTCAACAAAGACTCCTTCGATAATAGCACATTGATGTGTCTG -CCACCATTGGAGTCGCTAAGGTTGGAGAACCTGCCCGGGATTACAGATGCTGGTCTCTCG -CAGTATATGACACGACCTGAATCACTCTCCCTCAAGACACTCGTCTTGGTCGAGCAGAAC -ATCGAGTCACTTCTAGTCATTTCTAAAATTCTCGCGTCACTGCGACAACTTGAAAGGTTC -AAGGTAGTCCAGACCGAAAAGTGCCCAACTTTGCCGGATGAACATATGATCTTCCAGCCT -ATCCTAGCTTCATCGACCCTGAAATATCTTCACTGGGATGTGGCATGCCCAGACCCAGAC -ACAGCCCTCACCCAGCTTGACACACTCCCCTTCCATAAATCCACAAAATTCTCCAATACC -CCCAACTCTCATCTTGCTCAAAGCATTATCTCCGGGGGCTTTCCTCAATTGGAAGCTCTC -CGCGCACCCTCCGATGTGGAGCCCCCAGGAGCGCTCCAGGCTGTCTGCCGGCCTATATTG -AAGGGTCAAGCCTTACTCCGACCAGACCGGTACAGCTTACCCCGCAGCTCTCACGGTTCT -GTGAATACTCGACCGCTCGCTCTGCCGGCGGGGAATAATCTGACGTCAGCTCGAATCCGT -GCTCAGACGTTCATCGACATGGCCGCCAAGGACACCGAGGCCGGCATGCAGGTTCTGATC -CAGGATTACTCGGATGCGTTTGTGCCGGACAACGCTCAAAATTCGCAATTCGAGGATGAG -TCCGACCAGGAAATGGACGATTACGGGATTTGGGCTGCTTATGAGCGGTTCAGACATGAG -GACAATGACCACAGCGGCCCCAAAACGATCTACGAGTTCCGCATGCCAACCTTGATGGGC -CGTAGTGGCTTCCGGGATCCGGCAACTGGTGCATCGATTCCTCACTTCGTCCTCCGTCCT -GATATCACCGGCCAGGAGTTAGACGGTGGCTTGATTGGGTGGAAGCAATTGCTAGCTTCG -AACCAGTCACTGTCATATGCCGCCGGAGTAGGTGTCAACTGCTTTGGCAGCGGAAACCAT -TCCATCCCACCTCCAGAAGAGCCCATGTCGCCTGCATCCACCATTTCCAGATTTGGCTGG -GGAAGCCTGAGTGGGCGATCTGTAATGGGTACATCTCCAACGACACCCACGACACCAATC -ACGCCTCTAAGCAGTGTAATGGGCTCGGCTTTGCCTTGGGACAAGGATACCTGTACTGGG -ACCTGGAATCACAAGATGGGCCGGGACTGGTGGTTCCATATGGAGCGTGATCGTCCGTCC -AACTCTGAGCTCGTCGACGTGAAACAACTTTTCTAACTAAATTTTTCTACTCCTCGATCC -TCTTTTTCTCTCCTTCTATTTCTTGTCTACATACTCTGCCATATACCCCACACTTGTTAT -ATTCCCTTTTTCTTCGTCTTTACCTTGTTCACCTCTTCATCTCGCTTCGATCATTCGATT -CACTGGTTCTTATCTCATTGTACTAGGTTTTTTTTTGGTACACACGCTGCTTTGTCAGAT -AGACGGAGATAGCATTGCATAACGATTCGATGTGCTTTGGCTCTTGGCTATTTGAACTGA -CTTCATTGTTCTATCATTATTGGCGCGTTGGTGGAAGGTTAATGTGCAGTGTTCTTTCTG -TCCTATTTGAGTCTGGATACTCTCTGTATTATACTTGTACAGCATTCCCCACATAGTATT -AATTGAAGAGCATGTTAAATGATAGATCATCAAAGGTTGTACATGGCAAAGTCGTCTTTG -CACACTTCAAGAGCGGATGATTCGTCTTATCCTTCTATAAGTACGCCTAAGGCGATATTG -AAGAACACTTTTATAGCGACTACCAACAGTCTATCAAAGAGACACCACAACGCCGCATCA -ACGGCCAACTATTCTTGTCCGTTATATAGCCCTATAGCCCTACATTGAAACCGCACACTT -TGACACCTGGAGAATGTGGCATCCATTTTGCAATTCCGAAGCGGTAGGTCTCGACAGTAT -AGAAGATGACTCATCCCGGGCTAAATCGGACCGATAAGCGAATGTCAACTGATAAGCATT -GAATACAAAGAGCCCCTCGTTGTGAAAAAAATATCCCCCACCTTCGCTTTCACAATCGCT -ATTCGTCGCAATGGACCCCGCAAAGCCCGTTGACTTGCCTGAGAGGCCCAAGGAGGCCAC -CGAGCCTACGGCTCCACCCAAGAAGTCTAAGAGTCAAGAGAAGAAAGAGGCCGCAGCGGC -CAAGAAGGCGGCGGCCAAGCTAGCAAGACAGGACCAGCCCAAACCAAAGGGAGAGCCAAA -GGCTCCCAAGGCCAAGGAGGCCCCTGCGCCCCGGGACCCTAATGCTATGTTCAAGCTCGG -CTTCCTGCACGACGTGTACCAGGAAAAGCCCCTCTCCGAGACCGTCCCCAAGATTCGCAC -GCGATTCCCCCCCGAGCCCAACGGATACCTTCACATTGGCCACAGCAAGGCCATCGCAAT -CAACTTTGGATTTGCCAAGTACCACGGAGGCGAGTGCATTCTGCGCTTTGACGACACCAA -CCCAGAAGGTGAGGAGGAGCGATACTACAATGCCATCCGGGAGATTGTGACGTGGCTCGG -TTTCACACCTGTGCGTGAGACCAATGCCAGTGATAACTTTGATCGCCTCTACGAGCTTGC -AGAGGATTTGATCAATCGTGATGGCGCATACGTCTGCCATTGCACCAAGGCCGAGATCAA -GGCCCAGCGTGGTGAGGTCGAGGGCGGCCAGCGCGGCGGAGCACGCTTCTCGTGCAGCCA -CCGCACACGACCCATTGAGGAGTCCATGGCTGAGTTCCGCGCCATGCGTGACGGAAAGTA -CAAGGCTGGCGAGGCAGCGCTGCGTATGAAGCAGGATATCACAGACCCGAACCCGCAGAT -GTGGGATCTGTTCGCCTGGCGCATCATGGGCACCGAGGATAAGCAGCACCACCTCACTGG -CGGCAAGTGGAAGATTTACCCAACCTACGATTTCGCGCACTGCTTGTGCGATAGTATCGA -GGAAATCTCGCACTCCCTGTGCACAACCGAGTTCGAGCTCTCTCGTGTCTCCTACGAATG -GCTCTGCAACAAGCTGGAGGTCTACTGCCCCAACCAGCGTGAGTACGGCCGTCTTAACAT -CACCGGCACAGTCCTGTCCAAGCGCAAGATCATCCAGCTCGTCAAGGAAGGCCATGTCCG -TGACTGGGATGACCCACGTCTGTACACGCTCATCGCGCTGCGCCGTCGGGGTGTGCCTCC -CGGCGCCATTCTCGCCTTCGTTAACGACCTCGGTGTAACAAAGAACACCACCAACGTCCA -AGTCACCAAATTCGAGCAGATCGTCCGCCAGTACCTTGAAACCACCGTCCCACGCCTGAT -GGTTGTCCTCGAGCCGCTGAAGGTCATCATCGACGACCTTCCGGAAGACCATCTCGAGCT -CCTCGACTGCCCCTTCTCCAAGGACCCAGCCTTTGGAACCCACAACATCCCCTTCACGAA -GACGGTTTACATCGAGCGCTCTGACTTCCGCGCCGTCGACTCTCCAGACTACTTCCGCCT -CGCCCCAGGAAAAACAGTCGGCCTCCTCAAAGCCCCCTACCCCATCACGGCTACTTCTTT -CGAAACGGACCCTACAACCGGTGAGGTCACCTGCGTGCACGCCAAATACGAGAAGCCCGC -CGAAGGTGAAGCCACCAAGAAACCCAAGTCCTTCATCCACTGGATCGCCGAGTCGCCCGC -GCACAATAGCCCCATCCGTGCTGAGGTTCGCGCTTTCAACTCGCTCTTCAAGTCGGAGGA -TCCGTCTGCCCACCCGGCTGGTTTCCTCGCCGATATCAACCCGGACTCCGAGGAGATCTT -CAAGGACGCTATGGTCGAGACTGGGTTCCGAGACATCTCCCGCTCAGCGCCATGGCCTAA -GGAGGAAGGTGCCACGGAGCCTGGTGAGAATAAGCACTCTATTCGCTTCCAGGGTATGCG -TGTTGCGTACTTTGCTGTTGATCGCGAGTCTACTGATGAGAAGTTGGTGCTTAATCGCAT -TGTTACTCTCAAGGATACCCAGGGTAAGAATTAAGGCTTAGAACTGAGGGTTTTCTAATC -AGAATGCCTTTTCTTCTTCGTCCATGACTCCTAAGTTTCTCTATTTATTTCCCTTCCCTG -TTCCTTTTTTTCTATATAAAACACATGGTGGAAGTTGTGTGCGTGCGTGTGATGAGCCAC -GGTTGTTGGTATGGATTTATGACCATTGCATCTTTGCGTGTCTTTTTATCTTTTTTTCAG -CTCTTGGAGATTCTCTGGGCTGTTGTGATTTTCACTATGCATAACTGATTCTAGTGGCAG -TTATTGATATACGCTACAATGCACAGTGATCAAGTAGATCTCTTGACTCTTGAATATCTA -GGTATGACTATAAGTCAAACTCCGCCATATAGGCAGTACATTCGTCACATGACAAATGAT -CAAACACAATTTACCCGGGAGATGTATAAAGAAAATCGTAATAAGAGGTCTACGCCATTC -ATGCATAACGAATTACCGGCTTTCATTCAGGCCGAGGGAGCATACATACACGGGATGACT -CAAGACGTCAAAGAGGCAAAAAAAGCAGAGGAAAACGTGTATGGATCGGGGCATAAGAAG -CAAGATGGTCTACTCTAAACATAGATGCCCATGTTGGTATCCTTCTTCTTAGCCTGGACC -TCGGCGATAGCATCGACGTAGTTCTCGTGGCCGATCTTGCTGAGACCCTTCCGGAGTGCA -ATCATACCAGCCTCAACACAAACAGCCTTGAGCTGGGCACCACCGAACTCGTCCGTGCTG -CGAGCCAACTCGGCCCAGTTGACGCTATCCTCAACGGACATCTTGCGCGAGTGAATCTGC -AGAATATTGGCGCGGGCTTCCTCGTTCGGCAGGGGGAACTCGATCTTTCGGTCCAGACGG -CCAGAACGCAGGAGGGCAGGATCCAAGACATCGACACGGTTAGTTGCCGCTAGAACCTTG -ATGCGATCATCCGAGGCAAAGCCATCCAGTTGGTTCAGGAGTTCCAACATGGTTCGCTGC -ACTTCGCGATCTCCGGATTTCTCTGAGTCGAAACGCTTGGTACCCACAGCATCCAGCTCG -TCGATAAAGATGATAGAGGGCGCCTTCTCCTTGGCCAGAGCAAAGCAGTCACGCACAAGC -TTGGCGCCGTCTCCGATGAACATCTGCACCAGTTGGGGTCCGGCGAGCTTGAGGAATGTG -GCGTTCGTCTCGGCGGCACAGGCACGGGCCAGAAGTGTCTTACCGGTACCTGGGGGTCCG -TACATCAAAGCACCTGCGGAAGAGAAATGAATAGGTTAGCTTTGGAGTACAGGGAAATTG -TAGAATAACTCACCCTTAGGGGCCTTGATGCCGATTTTCTTAAAGCGCTCTGCCTCCTTC -ATAGGCCATACGATGGCTTCCACGATCTCTTCGATCTGCTTATCCAGACCACCAATATCT -GTGTACTTCTCCGTAGGCTTCTCATCGACCTCCATCGCCTTAACACGGTTGTCATACTCG -GCTGGTAGTGTGTCCAGAATGAGATATGAATCCTTGTTGACACCGATCAAGTCACCGGGC -TTCAGTGTTTCGTGGTCGACAAGGCCAATGAGTGGAAGGAAGATCGTCTGGCGGGTGGAG -GTCTTGATGACAGCGGATTTGCCTACGCGAGTAGCATCCAGGTCGATATTAGCACCTTCC -TCTGCTGCCTCTGCCTCCACATCAAGATCCAATAGCTCGACGACGTTTCCAACAAGGTAT -GGTAGTTGTCTAGATAGTCGCAAATAAAAATGTCAGTAGACTGTTGAAACTCAAGTGTCT -TTTCGCCGATCCGCAAGCCGGGCACCAAGCTCCGCGGGCTTGCAGAACAACGAATACAGG -GTCCGGTGGAAGCACACCTGTTGTTCTCAATCTTCTCTTGATTATCCTTCACCTTCTCCC -TCATTGTGCTCTGCTCATGTGTCAAACGTTGGAACTCGCTTTTCATGATTCGCATCTCGT -TCTCCAGCATCCGCCGTCGCGTAACGATGTCTGTCGTGCTTGAGTTCAAGATATCCTCGT -CCAACATCTCTTCTTCTTCATCTTTTTTGTCGGCGTCTTGCATATCAGcatcgccgtcgc -cttcgggcttcttgccatcgccatcgTTCTGGTCTTGCTTCTTCTCTCTCTGCTCCTGCT -CAAGGTCGTCGAGGTCCTCAAGAGTCGACATGGCGTAATCGAGAAATTGGGAATTATAGA -ATTAGATCAAAAGAAATCTCTGAAATGAAAAGCCGGCAGGACGATAGATTGAAAAAGGAT -GGGGGAGGATGAGACAAGGTCGCTAGGAGGAAGAGCTGTTGGTGGCCGAGCTCCAACAAC -GTCAGCCATAGAGCGTCACGTGATGTGATCTACGTTTATAATCAAATGCGATCCGATTCT -ATCTACTTTGAATCTGTTTGTATTTTGCACAATATAAGCCTTTTCTGTCGATTTATATTG -CGTCAAGTACCATGCATTCTAGATAAACCAATAAGGATTATTGGGCTGATTTTGGACGTT -GCCAAGGTTGTCCAGCACCGCTTTCCGGCCGAACTTGAGCAGACGAAGTCGAGAAATGTA -GTCATCGTTGTTGACTGGTACTTCATTAGCAGAAAATCATGAAAGAAGGGTTATCTGATA -GTAGATACCGTAGCCACCGCACATTCGCCTCTTTCCAGTGAACTCGGAGCGGCCATGTAA -CATTCTCCAGTTATCGAAAACTAAGACCAACGTGTAAGCCGAAGTGGTCTTCAAAAGGGC -ACGAAAGACTTACTGAGCGCCGATCCTGGCTTTAGCTTCGTCCACATTTCTCTGCTCCGT -AAAATCTCGTTATAGTGGCGCGCAGCGCCGTACCACATATCCTGTTCCGATACCGACCAA -TTGCTCTTTGGTGCACGATCATAATTGTTCCAACGGATCTGGTACATTCTATTCGAGTCC -GGATGCATTGAGAATACCGGTGCCATGGCGGATGGTTGGATGCATGTGTCGTCGTTCCCG -CTTGCGTGCCACGGCTGCCTTTGATTTACCAATGAGGCATAATGCTCGGGATTCTCGCGG -CGGAGTTGAGCTGCGGCTGCGAATCCATCAACTAAGAGGTTCTCTCCCCCAGATCCTTCG -GTATGAGATAGTAGATGGAAGAGCTGAAGTCTTGCAGGATCTGTGAAGTAGGTATTGTCT -GTATGAGCACCCAGGAATTCGTTGGTGTATGCCGTGTCTTTAAAAGTCAAATCGGCCGTG -AAGTCCCAAAAGCCACCTGAGGCTATGTTAGCGAAATGTTGCCTTATTGAACTTTGTGAC -TACGGTACATACCATAATGGGTGTTCCTGATGAACGCAATCCGCTCGATGAGAATTTGAG -TTGCTTCTGGGTTGATAGGAACGTCATCTACAAAACAGAACCCATGGTTCCACTTCACAA -GCATTAGCAAGTTCAGTCAAGAAGTTGAGAGAGGCGGCTTACGATATAGTTTAGCCAAAT -GTGAAGCGATTCATCGTTTGACATAACATCATCATATTTGACTCTGGGACTTTGGTCTGG -ATCCACGACTGGGGTGAAGTACCTTTTTTTACATCAACTTCATTCCGCGACCGATTCGTC -GGCTTTGGGCGACTTACCTGGGGCCTTCCGTTTCTGATTGAGGATCAGACACCGACTTGG -TGTCTATAAACTCAGGCGTGGGCTGCGAGTGTGCATGAAGCCAAGCATGCGGATAAAACC -CCGTGTGATTATCAGACCCTGTGGCGGTGTTAGTTCTGATAGTGAAGGTCTCAAAGTTAT -GTCTCACACTTGATCGTGACCATATTGTCACCGTGCTCGATTGACTCTACTTTAACATTC -TGTGGGATCTGTAGTGATAGCATTAGAGGCTGTGCAGTCACAGAAATGAAGTGAGATCTC -CTACCGAGAAAGTGTCGCTTATCCTCTGCATCGTATCCGGATGAATACATTTTGGACATT -GGCAGTTTTCTCTAGTCGCAGGCATCAAGTTGTCAATGAAAGTCCAGGCTTCTAGTTTTG -TGTGTCTTGGTCACTTACCTGAGGAAAAAACGGCCGCTGTAAAATACAAGTTAGTTATTA -TTCGCCTTCTTTGTGGAGTTGCGATAAACTTACAAATTATACCAGGATTTGCTATCCTTT -TCGTCTTTGGGTGGAAGTTTTACTATATCGGACTTGAGACAAGCAACCCGAGGACCGTTT -ACGCTCATCGTAGTGTGCAGCAATCTCGATGAGGTAGGCGTGGCATGTCGGTAGCCAGTC -TTCTATGAGCAACGGGACAAGGAGATCGAGTTAGTTTTTTGAATGATTAACTTGACTGTA -TATCATGCACTACGGAGTATATCCATACCAAACGGCCAGCGGGGAAATAGCGGGGCCTCA -GGCTCTTTGGTCTAATTCGAGAGGGTTTTAGCGTTGAATGTTCAACATTGAAATCAATGG -AGATCTTACCCGGCGCGAGTCCACAGCTCACTCGACCTGAGTGTTGCGCGCCAGCTGAAG -ATCATGATATGGATAACCGACTGATTTTaggacaaagataaggacaagaacaaggaaagg -aAGACACCAGTGGACAGATCCGGGGTAGCAATTTGTAAGAGCTTCAAGTTTGGGATTCCG -ACAACCGAGGTTACAGTCACGTGTGGGTATACAACGAAACTATTTTGGCCTAAATACTTT -GATCCTTTACAAAAGTAGCATTCGGTGGTAAAAGGGTTTTACCGATTATTCATTCTGAAA -CGCAGAATATACACAGTCTAACTGAAACTCGGTGTATATAATTGTATCACAGCTTGCTTT -TCACGCCTGTCACCTTCAACAGACAGCGCCCATTGCTCAACACAACCCGACCCTTGTCAT -TCTTAGTAGCGAATCGGATCTCCTCATAGCCATCTTGAATACTGCCCGTTCTCCAAATCT -CAGTGGTAAGCTTATCGCCGGGTCGGACAGGCGAGGCAAAACGAGCCTGGAACTCCTTCA -TATTGGCAGGGTTGCTACCACCCAATTCGCGCAGAATACCATGCGCAGCAGAATTCCAGC -TGAAAAGACCGTGAATGATAGCACCACCGAAGCCCATCTTCTGGCCAGGCTCCGGTGTGG -CGTGGAGAGGGTTGTAATCGCCGTTGAGACTAGTCAAGGAGTTAGCATGAAGTGTGACGA -GTATCGACTTTTGCTCAATGACATAGAGCTGCCCTATGTATGGGATATGCACTGAGTACT -TACCGGTAAAGGTGGGCAGTCTCCAAGTTGGTCTGAACAACATGGGTAGCGTCAGGGACC -TTGCCTTCCGGTGGGGGATAATTGACAGTGCTAGGTCCTGCTCAAATGCGTCAGTTGATA -CTTCAAAGTGGGAAGTTGAACTATAGTCTTACCCTTAGGACCACCCCAGCCACCCTGACC -GACCAGGAAACCACTACTCACGACCTTGGTGTACACCTCGCCGCTCTCCTTATCCACGAT -AGACTGCTCGGTTTCGATTACAGTGCCAGGCTTGCCTTTGTCATACACTCCAATGACCTT -GTTGCGCAACTCAAACTTGCGCCCTGCACTAGTTGGAGGAAGCGGCTTCAGGATTGTGAT -CTTACGCTGGCCATCGACAGCCCGACGATGGTCCAGCTCGGGGACGCCTGGGATATGAAC -TGCCTTCTGACGGGCATAGAAATCAGTCACTTCTTGATCAGTGAGCTTGAATGCTATAGA -TTTAGGGTTAGTGCAATTGCCCAGAAGTCCCCGGGCTCTTCACATACGGAGGATGAGAGA -ATAGGTTGGGAAGACCGAGAAGTTCGGGTGAAGTTCCTGGATACTAGTTAGATTATGATA -TGAAAACTGGTATCTATGTAGTCTGTGGCTTACATATAGGAAGTGCAGCTCGTCAGCCTT -GACACCAATGCTATTGGCGAACAATAGCACATCGCGCTTCTGCCAAGAGACCTCTTGGGC -GGGAAATTCGTGACCAGTTCCAGGGGCCGACATTGGAATGGTAGTATGGAGTATTGAATG -TATGGTCAATTATTCTTATAGGGAAAGGGGCCAAAAAGTAAGGGGAATAGAGAAAACAAG -GGATCGACGGCAGACTCAAGCACCTAAGTAGCTTTGGTAATTGCGGGGGGCCATCAAGGA -GCCGAGGATCTGATAACTTTTGGATTTGTTGCCGTGGTCTTGCCGAAGGGACATCGCCTT -GGCACAATTTCCCCTTGCAGATTATAATTTTTTGCGAAGTCGAAGGTCTACGAAAGAAAC -ATGTCCAGAAGAAGAGCGTTGGAGTTTGCTAGACTAAGCATCGACTCGGGTATCTACCTG -GAAATTCGAGGAGCTTCAGAATGATATCCCTCCGTTCGCGCCACAAAATGCCTTTTTTCC -TGCCTCCCAAGAACTAAACGCCGAAATAAACAAAGGCGAAACTAGATGTCCAATCCGCCT -CGGGCAATTTCATACATGCCCATGCCAGGATGCATTGCGTGACGAAAATATATTTCACTC -AGATCGTGCAAACGTGAAGTTGTACATCCCAATGACATCAAATTAGAAAAGTGGAACAAA -ATTAACACCTATGATGCATTAGTTTCATGACTTAAAAAAAGGAGTCAAAATGCTAGAACT -TACATGCGGTAGAAGCAGCCACCCTCGGCGGCGCGAGTAGCATCGGGGGTCTCAAACTTG -TACGAGATGCTGATATCCAAGTCACGCTTGTTCTTATCGTTGGGGCGGTTCTCGATGCAA -CCAGTAACCTTCTCATCCTCCTCAACAGTGAGGACATCACGAAGGTAGAACACGGTCTGC -TTCCAGTGGGTGTACTTGGCGTGAGGACCAGTGGAGAAGTGAATGGGCTTGTGGCAGGCA -CTGAACTCGATATCGAACCAGGCGATCAAGGCGTGGATGAAATCGGGGCGCTTGGCAGTG -AGGGCATAAGGCACCTTGAAGGCAAGATCGGCCGTGGTGACGGTGTTCAGATCAAAGGTG -ATGATGGGGCAGGGATCGGTCACAAGAGCCTTCATCTCGACGGTATCGACGAGGGGCTCG -TTAAGAGCAATCTCCTTCATGGGGCTGTAGTCGAATCCGTAGACATTGTCCCAGACTGAA -TTTGGTCAGTACATGCAAAAATAAAGTTCGAAGTAAGATGATTGCTTACATCCAATCTTG -TCATCCTTGTAGTCACCGTCCTCAATACCAGCAACGTACATAGTGGCCTTATCGGGGAAG -ATCAGACCACCGGGGTTGAGGTAGGTGTCACGGGCGTAGAGCACGGTGTCCAGCATGGAC -TCGTACAGAAGGAAGTAACCCATCCACTCGGAGATAATAATATCAACCTTAGGGAAAGGA -AGGTGAACCTCCTCCATCTTGCCCTGGAGAAGAGTAATCTTATCGGACAGTCCATTGACG -TGGACAATCTGACGGGCCTTCTCGATGATGGAGGACATATCGACACCAATGACGTGCTTT -GCGCCAGCACGGGCAGCAAACCTGTTAAACAAGACTGTTAAAAGGAGATCTAACAACACA -AATTAATAACCGGATACATACATGCTGAGAATACCGGTTCCACAGCCGACATCGAGGACG -ACCTTGTCCTTAAAGATGTGGCGGTTCTGGTAGATGGAGTCGCGGTAGGAGCGTGTGCGC -ACATCATCTTTCTAGAAAGCAGAGAATACATATTAGAGCACCGTAGATAGCCAATTAGCC -GGTGGGTTTGTACTTACAAGCATCTCCTCGTGGATACCTGTGGCGGCCCAAGAAAAAGAT -CGTTAGTCAAAACCACACTGAAAATTTGCTATCGTTGATCAAGATTATTTACCATGGTGG -TCGTAGCTATCAATTTATTAGTATAATTGACCGAAAGACTCAATCAATGGAGAAATAACG -TACCTAGTGAAGTAGCGGACTTCAGAGTGCTCCATGCCGACCATGCGATCAGCACTGCTG -GTAATTCCCTGAGGATTATCGGAATCGGTGTCCATCTTGACTGTGCGGAAAGAAGAACGA -GTCGAAAAGGAAGAGTAAAAAAGAGTAAAAAAATCAAATGGGAGGGATGTTAAAGATAAA -AGTCCAACAGAGATCGATCCTTAACTTTTTTTTTTCTGGATTATTTGCCCGCCCGCCAAG -CTTAACTCCAATCCCCGCCCGCGCTAGCCGTGTTAGAGAATCACGTGGACAGGCACGTGC -TATTGTTTGCCCTCATTTATATAGGGCTATCTGCGTGGTATAGTTGTACAATTGGAGGGT -TCTGGGGAGAATCGTGTGTACCTGCATCATATGAGAATGGTATCTGTGTGGGGCAATGAC -ACTTACACAAGGATAACAAGCGCCAATTGATGGTATTTGATGTTTAATATAGGTCAATTG -CACTATTAAATATATTTCCCTTTGGAGTGAGCTGGCAATATCTCTGAGAAATAATGCCAC -ATGTTATTCATGATAAATAAGGTCCATTCCACCTCAATTGTTGAAGTTGTAAAGTACAGT -TCAATGGCCCTGTGGGGTAAATAAAGGATTCGGAACTTGCCAAGACCACCGAGGGAGGCT -CTTTGATGCCGCCGGGAGCTCCGCTCCAACCTGTCATTCTTCCCCAAGCTTACGCGTTGA -ACGCCTAAGACCTCGGAACTAAACTTTCGAGCCACAATGAATCTAGGTGGAGCCAGAGTG -AATCTCTACACTCTTTTCGCGTTAGTTTTTCTAGCCTCGCTTATTGGCAGGTCCGACGCG -TCATTGGGAGACCACCTCCCCGACTTCAAAGAATGTGTCCAGGTACGACTCGGCGAACAA -CGGACCCTTGCAAAAAAAAAACTAACCAGAATAGGTCTGTAAAACGGAAAATTGTCAGAA -CGGCAATTCAGTGCTACGTACGTGAAATCATTATTCCAACAGTTCTCCGCGTTTCCCCTA -CTGACCTCTTTAGCTCTGCACCTCCGCTTACTACTATGGACCTGCCCTGCTGAATGTGAC -TACACTTGTCAACATGTTATTACCGACCGTCGAGTGTCGCGTGATCCTCCAATGATCAGT -CCGATTGTCCAATTCCACGGAAAATGGCCGTTTCGTAGGCTTCTAGGGATGCAGGAGCCC -TTCTCGGTGCTCTTCTCCTTCTTCAACTTCGCGGCGCATTGGCACGGCCTGTCCCGTATC -CAGGAATCGGTCCCCGCCTGGCACTCGCTCCGCCCGTACTACATGATGTTTGGATATATT -GGACTTGCAAGCTGGACTTTCAGCATGATATTCCACATGCGCGATTTCCCCCTGACCGAG -AAGCTGGACTACTGGGGGGCTGGCGCCAACGTGCTGTATGGTCTCTACCTTGCTGTTGTT -CGAATCTTCCGCTTGGATCTTGAAGATACCCCATACCGGCCGACCCTACGTCGGTTTTGG -ACCGCTATTTGTCTTCTGCTATATACGTTACACGTGTCTTATCTTACCTTCTGGTCCTGG -GACTACACCTACAACATGGTGGCGAACGTGGTGGTTGGTATTATTCAAAACCTCTTATGG -ACCGGTTTCAGTATCTTCCGATACCAGAAGCTTTCGAAGACTTGGACCGCCTGGCCTGGT -ATGATTGTGGCGTGGATAATCCTAGCTATGAGTTTGGAGTTGCTCGATTTCTCTCCTTGG -CATGGGTTGATTGACGCGCACAGCCTCTGGCATCTGGGTACAGTGGTCCCTGCTGTATGG -TGGTACTCGTAAGTCCTCCCCGATCGTACGACTTTGACCATTAATATGTTCTCATGACTA -ATGCATCGGTTGCACCCGACAGGTTCCTGATCAGGGACACACAGGACGATATTGCGGCGC -ACAGGCTAAAGGCATGAAGAGGATAAGAATTTGGGCAAATTTTGGCGATTCTCATACCTA -TATATACTGAAGATAGCATACGCATGTTAAAATGTCATGCTTGGCTGTATCTTAAAGGTG -GAGGTAGCAGTAATGAAGATTGGGGTGCTCCCCACCAGAGTGTTTACCGTTGCAGTCAAT -TTCCACGGCACTTAATGCGGATCAATATGTTATTGCCACGTCCCACAACGTACCCTAGTC -GGTGTTTCCTGCCATTCTCTCACCGCTAAGGCCGTCGCAATAAGCCATGTCTCAACCCCA -AACCCCTCAGCCGCTTTTTTTTTTTGTTTCAAGTACTCCCCTCCTTTTCGACCTCGACTT -TACCTGAAGCTTCAGAGCACCTCTCCTCTTGAACCCCACTAAGGTACTTTACAAGTAATC -GCATATTTTCGCCATGGCCTCTCCGGCTGTCAAAAAAGCGATCACTGAGGCGGCATTGCA -ATACACAAAGCCCGAGGGCAAGGTATTTGAGTATGGCACAGCAGGGGTATGTTTGATAGA -CACTCAAATTCAGGAGTAGGTCGACTAACTTCTATTCTCTCCATTACAGTTTCGCATGAA -AGCGTAAGTTTCTTCTATGGCTGATCCACCCCGCCTCCGCATCCTCTGGAACCCCGCGAT -TGTCCTTTTGTTGACGCCGACTTCTGACCTATCTGGCTGCGCAGCGACCTTCTCAATACT -GTTGTTTTTGCAGTGGGCTTGCTTGCAAGCCTGCGGTCAAAGAAGCTTAGCGGACAATGG -ATTGGTGTTATGGTGACTGCTAGCCACAATCCCGCGGAGGACAATGGTGTCAAGTTGGTT -GACCCCATGGTAGGTTCGAGCCATTGATCCCTGAGTACAAATCCATTAGGACAGGTCTAA -CCCTTTTTTTTTCCCTTTAGGGTGAGATGCTTGAAGTAAGATTGACTTGCGTTTACCCAA -CCGAATTTGGAATAACGTGCTCACTGTCATAACTAGGCCGAGTGGGAGGCATATGCGACA -AAACTGGCAAATGCCCCATTGGATAAGATTGCCGATGTCTACGACGAACTTGTCAAGGAA -ATCGACGTGAAGATGACCAACCCTGCTCGGGTGGTCTTCGCTCGTGACACACGTGCTTCT -GGCTCCCGTCTAGTAGGCGTTCTGAATGCCGCCCTTACTGCGACGGAGGTCGAGTTTGTG -GACTTTAAATACATGACCACCCCACAGCTTCACTACGTTGTTCGCTGCAAGAACACCCTT -GGAACACAATACGAGTATGGTGAGCCGACCGAGCAAGGATACTATGAGAAGCTTGCAAAC -TCCTTCAAGAAAGTTATGCGCGGAGTCAAGGTCCAGGGTTCCTTGACTGTGGACTGCGCC -AACGGTGTTGGAGGTCCCAAATTGAGGGAACTGATGAAGTACCTTAGTGGAATCGATATT -AAAATTGTCAACGATGATGTGATCAACCCTGACGCCCTGAACTTTGATGTATGTTTCCAC -CAGCCCAACACACAACTGTTGATCACCCTAACCCTTCATACAGTGCGGTGCCGACTATGT -TAAGACCAAGCAACGTGCTCCTCCTTCTTCCAAGGCTGCCGTGCTTGATCGATGCGCTTC -GCTTGATGGTGACGCGGATCGTCTTGTTTACTACTTCCAGGATGAGAGCAACGTCTTCCG -TCTTCTGGACGGTGACCGCATTGCCACTCTTGCGGCGTCTTTCATTGGCGATCTTGCCCG -CAATGCTGGAATTGCTTCGAAGCTGAAGATTGGTGTTGTCCAGACCGCCTATGCTAACGG -TGCAAGCACAGATTACATTGAGAAGGTCCTCAAGCTTCCTATTATCTGCACCAACACCGG -CGTGAAGCACCTGCACCACGCCGCGCTGCGGTTCGATGTCGGTGTCTACTTCGAGGCCAA -CGGACACGGTACCGTCACTTTCTCCGAGAATGCCCTTAAGGTTATCAAGAACACTGAGCC -TCAATCGCCGGCGCAGCAGCACGCCCTGGAGAGTCTCCAGGCCCTCACCGACTTGATCAA -CCAGGCTGTCGGTGATGCCTTGTCCGACGCACTTCTTGTAGAGGCAATCCTCGCCCACAA -GGGCTGGTCACCCAAGGAGTGGCTTGGAACCTACACCGATCTTCCTTCGCGCCTCGTGCG -TGTCGAGGTCAATGACCGTTCCATCTTCAAGGCATACGACGCCGAGCGTAAGCTCGAGTC -TCCTCCCGGCCTACAGGGCACTATCGAGTCTCTTCAGTCTCGGTACAACAAGGGTCGCAG -CTTCGCCCGTGCCAGTGGTACTGAGGATGCCGTTCGTGTGTACGCTGAGGCTGCCAGCCG -CTCTGAGGCCGACGATCTCGCCACTCGCGTCGCCAACGCTGTGAGCGAGGCAGGCAGCGC -CTAATAGATTCCTTGGTCCAGGTTTGATCCTCGGCCTTGACAAATGCCGCTTGGGATCAC -TTTGTCTCTGGGCTATATTGCAATTGTTACACTGGCCTTGATGGCTTTTCAGCACTATGG -GCCTCCGGAAATATGGTCCAGGAAAATATTCGATATCATTTTGACGCAGTAATGAAATCT -ATGGATTCTTTTCATATTACATGCTTAAACGCCCCAAATTCGTAAATTAACCCATCTATT -CATGTACAAAACGAAAGCAAACGAGCCAGAATGAACTTCTAGCCCTCTTCCTGGTCCATG -GCGTCGCGACTTCGCTTGCCCTTGAACGAGAACTTTCCTTTGCCCTTGGCTCCCTTCTTC -TCGTCGTAGTGCTTCATTGCCTGAATCGCATCACGCTGCGAGGCACTCACTTGCTCGGCA -AAGACCATGGCCTCATCTTTCGGGGCGGGGTACTCCTCCAACTCTTTGCCCATCGCAGCC -TCTATACGCTGCCAGATTTCCACATCATATTGTGTGACAAAGGAGATGGCGGTTCCAGAC -TTTCCAGCACGTGCTGTACGGCCTACTCGATGAATATAGGTCTTGCTGTCGGTTGGCAAG -TCAAAGTTCAAAACGCAGTCCACGGAGGGAATATCCAAACCACGAGCAGCAACATCAGTG -GCAACTAGGATCTCGCGACTTTTCGATCGGAATTTGCCCAGTGCTCCCAGACGTGCAGAT -TGAGACATCTGCCCATGGAGAGGGATTGCACCGAAGCCCAGACCACGCAACAGGAAAGCA -ACACGCTGCGTCTCATGAACAGTACGCATGAAGATGATAACGGACTGGCCGATGAATTCG -TGGAGGAGATAGACGAGGTAGATGTCCTTGTGTTTGAAAGGACGAAGAATATAAGTTTGC -TGGAGGGTTTCCACAGTAGCGTACTTGCTGGAAGAGATCGATACACGGGCCGGATTTGAC -AGTGAGGCTCGCTGGAGAGATTCAACCTTGGAGCTCAGAGTGGCGGAGAATAGGAAGGTT -CTGCGCTCGCGCGGCAGGACCTTGAGAATCTTATCAAGAATGGGGCCGAAATCCATATCA -AGAAGTCTGTCTGCTTCGTCCATGACTAGGAATTTGAGATTGCGGAGAGAGAATCCTTTT -GTGTTCTCGAGGTGATCGAGAAGTCTGCCGGGTGTGGCAACGATAATGTGAGGCTTCTTT -CCTAAAGCGATTGACTGAGGAACCATATCCATGCCGCCCACCAACGTGGTAGATCTGACG -TTGATCGTTGCGCCGAGGGACTCGCATGCGAGCGAGATCTGATATGCCAATTCACGGGTC -GGGGCCATTACGAGTCCGAAGAACGGTTGGGGTTTTTCCATGAGCGCTAGAGGACTTGTT -AGTATTGTATGTGAGGGTAGATGTCGCCAAAGATGCACAAACCTTGAAGAATGGGTAAAA -CGAAAGAGGCAGTTTTTCCAGATCCAGTTTCAGCCAAACCGATAATATCGCGACCCTGAA -GCGCCAGCGGAATTGATTCGGCTTGGATTGGTGTAGGGGCCTTGTAGCCCATTTTGTCAC -AGGCCTCACAGAGAGAGTCAATCAGACCCAGCTCTTTGAATGTTTTGGGAGCACTGGGCG -CGCTGGGCTCAGTGTCGGAGGAAGCATCTTCGACCTCGGGATCCTCAGCGGCGGGTGGGG -GGTTTGAAGCGCTGGCATTTGATGCAACGTCGCTGTCTTCGACGGGCGTAGCAGGTGCTG -CATGTGCAACCTTGCGCTTCTTGGTAATCGGCATGTTTGACTTGTGTTTCAACAAAAGAT -TTGTTGTCGATGTTCAACTATCTCAAAAGAAATAATTTAATGTGGGGCCGCAAGATTGTT -GATCCGGCGCTGTTTCTTAATTTTTGGAGAAAAAAAAAACCTCGAGCTGTTCGATTAGTA -CTCATTGGTTTTCCTCTCCAATCTTATTTTTTTCATTTCTAACATCGAAATGGCTAAGCA -ACGTGTTAAGAAGCGCACTCATCAGAAGCCCCAAAATGCTTCGGCTGTGAAGGGCAGTGC -TGCCTCAATGTCCAAGACCCCAAAGTCTATGGTCATTCGAGTAGGCGCTTCTCAAGTCGG -TAGCAGTGTCACTCAGTTGGTTAAGGATGTCCGTCGTATGATGGAGCCCGACACCGCTGT -GCGATTGAAGGTATGATTCACAGCAAATGGTCCGCTCGCTCCCTCTAACAAACAATATAG -GAACGCAAATCCAACCGACTGCGAGATTATACCGTTATGACCGGTCCCCTCGGTGTAACT -CATCTCATGCTGTTTTCTAAGTCGGCAACTGGCAACACAAATATGCGACTCGCTGTTACT -CCCCGTGGACCCACCCTTCACTTCAAAGTAGAGAACTATTCGCTTTGCAAGGATGTTGAG -AGATCCATGAAGCGCCCCAAGAGTGGCGGACAGGACCACAAGACACCACCTCTTTTGGTC -ATGAACAATTTCACTACACCAGACGCAACTGAAGATTCCAAAGTTCCGAAGCGGCTCGAG -ACCCTCACAACTACCATCTTCCAGTCCCTATTCCCTCCTATCAACCCCCAAAGCCAGCCT -CTTTCTTCTATCCGCCGTGTCATGCTTCTCAACCGAGAGCCTGCCGAAAAAGATAATGAT -TCATATATCTTGACTCTCCGTCACTACGCTATTGCTACGAAGAAGACCGGTGTATCGAAA -CGAATTCGTCGCCTGGACCCCAAAGAGATTCGGAATCGAGACAAGAAGAAGACCGCCGTC -CCCAATCTTGGAAAATTAGAAGATGCCGCCGATTATTTGCTTGACCCCTCTGCTGCAGGC -TACACATCAGCAAGTGAGACCGAGCTAGACACGGATGCCGAAGTCGAAGTTGCAGAGACC -ACAACCCGCCGAGTCCTGAACAAGCGGGAAATGCAGCGCCAAAAGGCTGCAGAGAAGGGC -CAAGATAAGCCAGCCCATACCCCCGGTGTTGAGAAACGGGCTGTGAAGCTCGTAGAGCTG -GGCCCTCGCTTGCGACTTCGCTTGATGAAGGTTGAAGATGGAGTCTGTGATGGCAAGGTT -ATGTGGCACGACTTCATTACGAAATCCGAGAAGGAGATGCGGAAAATGGACCAGAGCTGG -GATGTTCGGAGGAAGGAGAAGGAGCAGAGAAAGAAGCTGCAGAAGGCGAACATTGAGCGG -AAGAAGCAGGAGAAAGCCAAGGCTAGGGGTAATGGCCAACAAGTGGCCgatgaggaagag -gatgaggatatggatgatgaagattggctcagcgatgatgactttgacaaggatgctgag -ggtgagggtgaagcggtggatgatgaCTCTGACGCCGATTCGGATGAGTCTATGGAGGAG -TAAGTGGCTGTTTGAAGTCATACTACACCTTTTCGAAAGCTTCTACTTCTTCGGCAGATG -GAGAGCCGGCTCGGCCTTTCGAGAATTCAACTCGTGCTGACATGTCTATCATACTGGATT -GTATCAAAAATGACTCCCGTCTTCATGTCTTATATCCATGTCATATGGACCAAAATTTCT -GTCACTGTCACATACCCGAGAGGCATCTAGGCGTTACTTCAGGTTTTGTCTTCATATAAT -GCAGTCTTGCAAATTTATCCTTCATCCTTCATTCTTAGATCGAACTTAGTCTACGAGCAT -CCCCAAATATATACCTCTTAAATGCTAGGCTTCATGGACTTGGACGTACATCCAGTTGTT -TTTGATACAGTTCAGTTCATTATACCAACCCATCAATTTCATGAAGCCCTCGAATTCCAT -CACATCAACACACCAATCAATATACATACAATGTGCCATCGCATGTTGCAACACAATCAC -ACCACACCGGCCAAAGATATCAGTTTTAGATGAAGTTCCCAACTTTCAACAGTTTACTGA -AACGAATGAGAAGGCCATCGAGCATTCCAAGTTACAGTCGGTAAGCACTGCAGTTCAGGA -GAAATGCAGTCCCCGCCCATTCTGCTTGGAGACAATCGCACTTCTAACGGTATATCTTTT -GTTCGCTATAATCAAAGAGGAAATTCCTTTTCTGGAATGTACCCATTAGTCCATCCTAGA -TCATAGGAAACCGTACATGATACATTTTGCCTCGCGCCAATGTAACAAGGGCGTTAAGTG -GGGGAGATTCGTTATTCATAAATCTGGTCTATTTCTTAAGCCTTTCATCGACGATTCTGT -TCACCATCTTCTCCAAGATGGCGAAATCTCCACCGGAGGTACCAGCGAAGTATCCCTGCA -CGCTCTTCTCGACATACTTGCCCACGATATCGATCTCAACGTTGACAAAGTCACCGGGCT -TCTTCGCCGCAGTGACCACCTTCCCCTGCGTGTAGGCAATGAGCATAACCTCCCAGTATC -CATCCTGTCCATCGACGACCTTAGTAACAGTCAGGCTAGCACCATCTAGGGTCACATAGC -CCTTTTCCACAACATAGCGCAAAATATCTCTATCGCGCGGCTGCAACCGGAACACCAACG -AATTTCCATCGGGCGTCACGGACAGAATCTCCGCAACGGTATCGACATGGCCCTGCACAA -AGTGTCCTCCCATCCGGGTATCGGCCGAAACAGCACGCTCGAGGTTCACGTGGGAATTCG -CCTGCAAGGAGCCCAGGTTTGTGCGTCGCAGCGTCTCCGGTGCACAGCCAACCTTGAACC -AGGTCTTATCGAATGCAGTGACCGTAAGACATGTTCCTTCAGCCCACTTTTAGACAGTAT -CCCATGCATTATAGACAAGTAAAGATCAAAATACCATTCACTGCGATACTATCGCCTAGA -TGCGCGTCGGTAAGGATCTCCTCACAGTCGGAGATGGTGAGCGACGTTCCCCCGCCCCCA -CTCGACGAGGTATCGATGGGCTCCAAGGCCGTGACAGCTAGTAAAACAACATTAGCGATT -GAGTCTCTTCACAAAAGAAAGATATACACACTCCCAATCGTCTCGACAAGACCTGTGAAC -ATAGCTGCGGTTGATGATCTACAAGGAAAATGCCCCCCGCGATTGTGGCCTCGGCTTTGG -TCCCTCCGAGCGGTGTCGGCGTCGGCGTTTTGACGGAGATTCGCCGGTTTCCCCAGATCC -CATTTAACGTGGGTTTCTTGCTAGTTGAGCTTGTGTTTGAGCTTCCATTCTTTTTTTTTG -TTTCGCTGCGCTCGTATATACTATCATGACAGTTGCGCTCAGGTTCCAGGTTGTTGCCCG -TCGATCGGGCTTGGCCTGGGGTAGCAATGCCTTGCGGCGCACGATACCAGGACGGAGCTA -CTCTAGTGCAGCTTCAGTTGATGGGTCGCTACCTTTGGCTGGTATTCGGGTGCTCGATAT -GACACGCGTTTTGGCCGGTGTGAGTACATTCTCCGAGGCGCTCCTGTGTGTCTATAATCC -AAGCTAACTTGCTTTAGCCTTATTGCACTCAAATTCTTGGTGATTTAGGGTATGTGCTAA -GCTTCGAAGCCGTGAAACAGCTCCAGTGAGTAGTTCACACGATGTTAACAAGCCGAAACA -CAGAGCGGATGTTATCAAAGTCGAGCATCCTGTGCGCGGCGACGATACTCGTGCTTGGGG -TCCGCCGTATGCGAAGTACGAAGATGCTTCCAAAAAGGGTCCTGGTGAGAGCGCTTACTA -CCTAGCTGTATGAACCTTCTCTCGTGGGATTATGAAACAACTTATCATAATACTAATAAT -GGATATAGGTCAACCGGAATAAGAAATCCCTCGGTCTATCCTTCCAACACAAATCAGGTG -TTGAGATTCTACACAAGCTCGCCAAAGAATGCGATGTCTTAGTCGAGAACTATCTCCCCG -GAGGTCTCAAAAAATATGGCATGGACTATGAAACGCTGCGGGAAATAAACCCCAAGTTGA -TCTATGCCAGTATCACCGGATATGGGCAGACAGGCCCTTATAGCAACCGTGCAGGCTACG -ACGTCATGGTGGAAGCTGAGATGGGATTGATGCACATCACTGGCGCTCGAGATGGTGCGC -CAGTAAAGGTTGGTGTTGCTGTCACCGATCTAACCACTGGACTCTACACATCAAATGCCA -TCATGGCGGCTTTGATTGCTCGGGGTCGGACTGGTAGAGGCCAACATATCGATGCCTGCT -TGAGTGATTGCCAAGTTGCTACACTGGCGAATCTGGCTAGCTCCGCGCTTATCAGTGGGA -AAAAAGATTCCGGCCGTTGGGGCACAGCACACCGTAAGCAGCAAACAGTATCTTGTGTCG -AAGTTGATGGCAAGCAAACTGACACATCCCAGCGTCCATTGTACCATACCGAAGCTACAA -GACAAGCGACGGAGATATTCTCTTCGGCGGTGGTAATGACAGGCTTTTTGGCGTGTTGTG -TGATCGACTAGGCTTCTCTGAGTGGAAGACAGACGCCCGCTTCATCACTAACAGTGACCG -AGTGCAACACCGGGCGGAGATCGACGGTATGATTGAAAACACCACTGTGCAGAAGACCAC -ACAAGAGTGGTTGGATATCTTTGAGGGCAGCGGTATGCCCTATGCGGCCGTCAACGATAT -TCAGGGTACCCTAAATCATAAGCATGGTAAGCTTGGCTATTTAATTCACATGTGGTAAGA -ATGGTACTAATACTGATCCTACCAACAGTTCTGGCACGAGGCATGGTGACAGAGGTTGAC -CACCCTGCATGTGGTCCCATCAAGATGGTCAACACGCCTATCAAGTACTCCGAAGCAACA -CCGGGTGTGCGCACGCCTCCTCCTACTTTGGGCCAGCATACAGATGAGATCTTGGGCAGT -GTTCTCGATTATGGGGAGGCAGATATCGCTCGCTTGAAGGACGAGGGAGTCATATCCTAA -ACCTTGTATTATTTATGAGACACATGAAAGACATCGCCCGAGGTCTGTATATAAGCAAAA -TTAAAATTCAAGCCAATTCGATTATCCCTTCGGTTCTTTTGTCATCTTTTGGTCCCCTTT -AACATTTTCACTGCATCCCATAAGCCCCTTGAATTTCCCTTTGCATATCATCAAATGTGT -GCAATACATATCTGGGAGCTCCGTATTGTGAGAACTGCCCCTCGCGCGAGACATTCAAAT -TAAAAACATGATAGGTTTCACAAAATGTATTGTTGTTGCTAGATATTACCTTCATCAGAT -GTTATACATAAAAAAGTCGCTTGCACAAAGTGTGCCTTTCCCGCCTTTTTCTGCAGTCCA -CCTAATTACACTGGAAAGGCGGGTTTGAAGATATCTAAGAAGCTCGCTGAAAAAGAACTC -TGAAACGACCTGTAGGGCATTATAAGGCCGCATAAGCGGCCGTCTTCTCTTGTAGATATA -AATAGAAACATGCCAGTACTAGTGCTAAGAGGCGATCCAATCGAGGACAAGACGAATGTT -CACTACATTCATATGAGAAATAAACAACAAAATCTTCATATTGAATAAAATTATCAAAAC -CGAACATTCCAGGGTTGTAAAGAGCCTCAAATTCGGCGAAACCTGTCAGTTTATGAATGA -AGAAAGCCATTTAAGGTTACATATGTATATCACGTGACTGATATACAAAGGTCTTCAAAG -GACGACTTCACTTCAACCCTACTTCTAGTTGAAGAATTGAGAGAAGCCCAACTATGCTTG -GCTGCCCGCTCACCAAACTGCCCAGTGTCTATTGTGGCTTGTATCAGGCTGGAAGACTCG -CACTACCCTAGAGGAACACAGTGTGTGTTTTTCCCTCGCCATCATCTGTTGCTCTGTTCT -TTACTTTCACACTAGAGATCAACTAGTTTCCCGGAGACATCTTGCCAAGTCTCATTTTTT -GAAAACATTCATTTGCAAGAAAACTCATCATGAGAGTCTCAACGGCATTCATGGACCTCC -CCGCCCCCGATATGAACTCGGGGCGTTCCCAGAACGTACATGAGGGCAATATATTTGACC -ACGAGGGTGTCTCCCAAGGGATCGAGCCACTACCCTCTCATCTGGATCTTCTGACTGATA -ACATACCCTGGGAAAGTTGGGGATCACCAGCACTGCCTCAGGAAGAGGCACTTTGGACAA -CAGCTGGGGTCACGGAAACGGGTCAACAAAATGCCAGTGCCACCCTGGCATCAAATAGAA -CTCGATCTGGCCGTCTTCTTACTCAGCCAGCTGTTCGAGGACCCGAGTCTGTGACAACGA -AACGGGAACCTGTCAAAGCGACTGCAAAAAAGCGGAAGCGTCTGAACCGCCAGGTGAATA -TAGTTTGCACTGGCTGCTACCGAGGACACAGCCCCTCGAATAATTTGATTGTTCTATGTG -ACTCATGTGATGCTCCGTGGCATCAGAAATGCCACAATCCGAACATTGACAGTGAGGTCA -TCGAAATCCCGGAGATGGATTGGTTTTGCATCAAGTGTAAACCTGAGCAGCGTCAGACAG -CCCAGACCAAATCTCAAAAGAAGGCTGCAACAAATGCGAAAAAGGTTGGACGCCCAAAGA -AGCAGCCTGTGTCTGAACCTCAAATTGGTGGAAATTGTTATTCAGAGGAAGAACGACGGG -CGTATCTGTCATCTCTTTCGCATGGTTCGCTAGTCCAGCTTATAGTGAAAGTCTCAAATG -AGTGGCCGTCGGTTCCTATCTTTCCCCACGACATGCGACCCGTGGCCGATTTCACACCAT -CACCTTCTGCGACTCCACACAATAATCGGGCTTCCGACGCTACAACGAAGAAAAACCCAC -GTTCTGCATTACTTGATTGGAGAAGCTTGGAACACGCATCTGAGAACGCCAACGCTATGG -ATTTAAATCAACGGATCGGTAGGATTTCTGATACCATAGCGCCCGAAGCTACTGCCGTGG -ACACCGATGAACTCTTTGCAGCAATGGCCTCCAATGCTGCTCCCAACCCTGCTCCCCCGG -CGTCTCTTGGTGGAATCATTACAGACCAGATCTTCAGCGGAATTTCTCTCGCACATGCTC -CCACAACTGCTCCGCAGCTTTCTACCACCACGACTTCGCAGGCTCCTTCCAGAGCCGCAC -GCAGATTTACGTCTCAAGCTGCGCCTGCAGCTACGCGTGCTCGCAAGGCCTCATTTGGTT -CCTCAAATTCCGACTTGCTCACCGACGAGGAGTCATTATACTCCCAATCTCGACTACAGT -CTCCTACTCCCTCTGCGTCTCAGGCTTCACACGGTGGATCCCAGCATGAATCGGATTATG -ACCCTGAAGATTATCGTGCTTACCCTGAAGCTGGTCGGGGCTTTCAGGTGCCATCGACTC -CTAGCGATCTCAACATCATGGCTGAGAATAAGGAGTACCCTACCTTCAGTCATTCTATCC -GCGGTCCGGCTCAGAGGGCTTGGAGCAAGAAACCTCAATCGCCACGGGGTCGGCACAAGC -AGTGTTGATTGGTCCGCGATGTTTCTCTTGTTTAAACCAGTGAGGAATGCGCTGTTTGAC -ACAATCATGAGGGAGTTCATGAATTTACGCTGGTCTTTGCTCTTCAGCAATGGGAGGGAT -GTCTGATATGATCAAATGGGGTTCGTGGATCTGACTTTGTCTTTCAATATTTTATGTTTT -TTTATGCTTCTGGCTACAATTATTTCGTCGTTATGGTCCGGAGTTCGACAGGCTTTGTAT -AGTCTTTTTGCTTCATACTTACACGTCCAAATGAGATAGCTACCTGACGATAATCAATCG -ACCGGGGTTTTTCTACGGGGTTCGATTCGAATCTCGAATCTCCTTCTTACTAGAGCCATT -GTCCTGTACTGCCTTAGAGTGAATGGTCAGGGAACAGCAGTATCGGCGCCCTCTTTCAGC -TAAGAGACCAGAGTACCAGTCACACGCCAATTGAGGGAAGTGCCTTATTGCAAGTGGACG -TATCTGTTAAACCAGGTGCCTTCCCTTTTAGTCGGAACTCGTCACCTACCCTTCCCTCAA -CCCCATCACTTCAGCCATTGGGTATGGATATCTGCCCCCTCCCActtgttctctcttctg -tcttctttcttctttcttctttTAACCACTCCGGGTCCAAATTGCACATCGTGATTCCTG -TTGAGTATTCTTTGGCCGGTCGACTCAGCTTCAGGGCATATTCAACATATGTCTGCGGTT -GTGTAGTGATCGGTGTTATCCTGAAGGCTCTGGCAGGTCTTTGAAGGCGGCCCGGGCTAG -GCTGTGAGTGGCACGAAGCAAGTATATGCGGCTACCGACTGGCGATCTTCGGCTACCTGA -GGCGCTTTTGCTATGACCTCTGCGGATACAGACTGTATGTGTCCGGGTGCTCCTATTGTC -CGGCAGGAGCTGAACCAATTTTAAAAGGGGCTCTGCTCTCATCTCTTTCGACCCAAATCT -TGGACCCTTCATTACAGAGATAGCTCTTGGGTAGAATATATATGTATATACTTCAAGGGT -CATTTCCTCCTTTTTCTTACTATAGGAAAGACACCAAGTGTCGATTCAATCAGCAGTGTG -AAGTCGTAGAACAAGTGTCAACAATGCCCGAGTTGCTGCGGTGATCCTATAGATCGTCTT -ATCAGCCCCGCAAAACTAAACGCGCCGTGAATCTCCCGAGATTACAGCACTTCTCACAGC -ACATCTCTGACATCCAATCTCCTGGAGCCAGAGCCAAAGCCAAAGCAGACATTAAAATGT -CATCCTCAACCCCCCAGTTCCTAAAAACAACAGCTAAACCACCTCACCCAACCTCCGACT -TAGCCAATCCACGCACCAACCTCGTGACAATCGACACCCTAATAAGCATCCTCTCCAAAT -CCCTCCTCCACCCCTTCATAGCCTGGATTCTCGTCCTCTGCCTGCGCGCCCAGGTAACAC -CATCAACAGACCCAGCCTGGATCCTCACCGTGAGCTATGCAACGGTCTTGACCGTCCTCT -CCATCGCGCGCATTATCAACCAGCGCGTCGCCCACGGCGCCCCGCGCACAGTCAATTTCG -TGAATGAGGTCGTGCTGATTACCGGTGGGGCTAGTGGCCTGGGTCTGTTGATTGCGCAGA -TGTATATCATGCGCGGGGCGAGTGTCGCTGTGCTGGATATTAAGGATATTCCAGAAAATA -GTCGCGATGAGGTCTTTGGAGAGGGCGTTTTTTATATTACGTGTGATGTCGCAGAGAGGA -GCGCGTTGGAGGTTGCCAAGGGGAAGATCTTGCAAGAGGTAGGTTTGTTTGCTTCTGCTT -TGGTCACGTTGTGTGAGCTTCGAATCCATCTCGGCCTACTAGTACTACGGTTTTAGGGAA -TGAAATGATTGGACAAGTGATTTTCATAGCGCGCTATGGCTTGCTTCAATTAAGGTCTGA -TTCGCTAAATATATTATAGTTAGGAACACCGACGATCGTTATCAACTGTGCCGCGGCCAG -GATCAACGGCCTCTCGTTCCTTGATGTGCCAGCGGACGCATTCGAAAAAACCATCCGCAC -TAATCTCCTGGCAGCTTTCCATCTGTACCAGGTCTTCTTGCCTGGGATCATCGCAGCAGA -GAACGGAGGAACGCTTGTTACAGTCAGCTCTGTGCTAGGCCAGCTGAGTGCCGCAGGACT -GTCAGACTACGCAGCAAGCAAAGCTGGACTTAGCGCACTGCATCGGACGATAGAAGCAGA -ATTCCGAGGTAACCCGTTGATCAAAACGCTGCTGGTTGAAGTTGGGCAGATGTCGACGCC -GTTGTTCGACTGGGTGCGCGCGCCAAACCATTTCTTTGCGCCAGTCTTGGAACCGGTCGA -AGTTGCTCGGGAGATGGTGGCAGCGATTGATAGTGGTCGAGGTGGTGTGATCAGATTGCC -TTTCTATGCAACGCTCGTGAATTGGTATGCCGTGCTACCCGCCACGGTGCAGCGGGTTGC -GAGATATCTGAGTGGAATAGATGATGCTGTGACTCAAAGCCGGCAGACATCTGGGAAGAC -AGATTGATAGGTGTTCAAATGGAGCTGACCGGTGTATATCGGCGTGAATCATGAGAAAAA -GGGCGCTCGTACAATCCCAACGAGATGTATTATAGAACGGAGCGATCTGCTTCATTGAAC -ATATTCCCTATTCGACCATAGTTTCGATTGCAAGTCTCGCGCCCTAGCCTGGTCGATGAA -ATGTGATAAATCAAGTGGACCGATTTACTGCTCGCTGGCTAGAATCATCTTCTCACGCTC -ATGGGTTCTCGGCGCCAGGTTGGTCTTCTTAGTATGCTTGCGCACATTCTCCTCACGCCT -CTTAATAGCATTGAGCTCCTCGCGCTTGATTTCGCCAGCCTTCTTGATAGGCTTGGGAAC -ATGACGCTGGCGCTTGATGCGACGAATTTGGGGCATGTGCGAGTATCGCTTGATCAGAGC -TTGATCGTACTCCAGCTTGGAACGTTGACGGGCAGTCTTGACTCCACTGCGGTCCGATGC -ATTGGCACGCCAGAGACGAACATTGCCATCGTCCGATCCTGTCAAGACGTACTTGTTGTC -AGGCGTGAATGTTGCTGAGAAGACACGTTGCATTCGCTTTGTGTGGTAGATGTCGCGCGA -GTGACCAGTTGATCTGTTCCAAAGGCGGATTGTCCGATCATATGAGGCAGTCACCAGCTC -TTCACCTGTGGGGCTGAAATCAACATCCATAACAGCAGCCACATGATCCTTCAAAACATT -GAGAGCACGATCCATCTTTCTCATATCAAAGATGTAGGCGTTGTGATCCTCGTTTGCCAC -GGCAAAGTTGAACGCTTCCATTGGGTTCCAAGAAATGGCGTTAGATGCCAGTCTCAAAGT -CGTCTTGTGCACCGGCTGGGAAGTACGCAGGTCGTACATGATGACTGAGCGGTCCATGGC -GGTCGAAGCTAGGATAGAAGTCTCAGTCTGGTTGAAGGCAACAGATGTGATGGTATCAAC -GCTGGTCGGCCAGTGCAGAGTCTGAGATGGTGTGGAGGAGGGCCGGGAAAGGTCGTAGAT -AGAAATGACACCAGAAGCAGCAGCAAAAGAAGGGAGGGTTCGGTGATGTGAGACACTGGT -GAATGCACTATTCCCGAGATAGGTTGCCATTGGTGGGGCTTCAGATGATGAGTTATAAGG -GTCCCACAATTTGATCGTCTTGTCACTCGCGCATGACAGAAGTTTTCGCTCCGGAGTCCA -GCAAAGACCCTTAACCATGTTTTCGTGGCCTTGAGTATTCCAAATCTCGTCCTGTGTTGT -GAGATCCCACACCTTCACTACACCGTCGCCACTGCCACTAGCAAAGCGTTCAAGCGAGCC -GGGATCTTTTGCCATAGAGTAAACACCGTCAACGTGTCCGTTGCCCATTTGGGCCAGGAA -AGGAGCAGCGAACATGCGCTCCATCTTGATCGCATTCAGAGCTCGAGTGTATTCACGGGC -CCGCTCAAAAGGATGCTGAGTCGGGTCGAGATTTCGCGACTGGCGAACGACAGCGGACCC -GGGGGCCTGCTGCGATGCCGTAGAGCGGGAAATTGCTTTGATTTTCTATAATTCATTAGC -ATGGCTATCCAGTAAGCAAAATTTCAAGAAAAAAAACTCACCATTGTGCTGTTGTGAAAA -CAATGTAGAGAGGAAAAGAAAAACGCCAAGATGGAACTTTTTTTCTGTTTGATTTGCCAA -GCGGGCGGTGAGAGGACGATGAGCTGCCGCCAGCTTCAAGCTCCCCATCGACCTGGTCAT -CAACCTCTCTTCAAACCACCATCTTCTCTCGATCTCCTACTCTTTAACTACCCACCTCGC -TTGCTCGTAATTTACTGTCATTTATCATGTCATCCAAGGTCTCACCACGGATCCAAGAAG -TCCGAGATCTGGTGAAGTCGGATCCCTCGAAAGCAGCGACCAGTTTCCAGAACATCCTCT -CAGAGGGCCCCGGATCTACAGAGGCCTCCTCACGAGACTACGAACATGCCTTGATTGGGT -TGGGCGAGTTGTATCGGGATGCGAAGAAGCCACAGGAGATCGCGGACCTCATCAAGACCA -GCCGTGATACCTTCTCGTCATTTGCCAAAGCGAAGACAGCCAAGCTGGGTACGTCGTGCG -CTTCACTACGATTATAGAAACCGACTAACCGTGATTGCTTAGTCCGTCAACTTCTGGACC -TGATAAGCGAAATTCCCAACACACTCGAACTCCAAGGCAGCGTCATCCAATCATGCATAG -AATGGGCTATTGCCGAGCGACGATCGTTCCTTCGCCAGGCCCTCCAAGCCCGTCTCGTTG -CCATCTATATGCAGAAGCAATCCTATTACGATGCGCTCACTCTTATCAACTCACTTCTCC -GTGAGCTGAAGCGCCTGGATGATAAGCTTATGCTAGTGGAGGTACAACTGCTTGAGTCGC -GTGTATACCACGCTCTGGGCAACCAGGCTAAGGCACGCGCTGCTCTGACAGCAGCACGAA -CGTCGGCCGCCTCCGTTTACACCCCACCGAACCTGCAAGCCGGCCTCGATATGCAGAGCG -GCATGCTACACGCTGAAGACAAGGACTTCACCACTTCATACTCCTATTTCATTGAGGCAC -TTGAAGGGTACAGCTCGCTCGATGAGGGTGATCTGGCCACAGCCACATTGCAGTACATGC -TTCTGTGCAAGATCATGCTGAACCTGGTGGATGATGTCACACAGCTGCAAGGCTCGAAAC -AAGCGCAAAAGTATGCCAGTCCACGCCTGGAGGCAATGAAGGCTGTGGCACGGGCCCACG -CCAACCGGTCTCTGGAGGAGTACGAGAAAGCCCTTTCGGACTACAGATACGAACTTGGCA -GTGATGTGTTCATCCGAAACCACCTCCGTCGTCTTTACGATGCCATGCTGGAACAAAACT -TGATCAAGGTCATTGAGCCCTTCAGCCGGGTTGAGCTGGACCACATTGCGAAGATGGTGG -GCTTAGACACACAGCAGGTGGAGAGGAAGTTGTCTCAAATGATTTTGGACAAGGTCATCA -TCGGTGTATTGGACCAGGGCTCGGGATGTCTGATTGTGTACGATGAGACAGAACGGGACC -AGGCTTACGATGCCGCTCTGGAGACAATTGAAAAGCTCGGAAATGTGGTTGAAGGTCTGT -ATACTAATCAGGCGTCTCTCCTGGAATAGAAAAGATGGGATATGTCCAATGCCGTTGAGA -CGATGACTTGATAGAAATTAATCGAGACAATGCACGATCACGCTCAATACCCATTGAATC -CGCATGTAATTAATAACCAGAACATGTAGACTAGCTCTTGATATTGAGCTTTAGCTATTC -ATGCAACGTCAACTGCAAAATCCAAGCAAAACAATAAGTTTTGTCTTTCTGGTGTTGTTC -GCGATCGTCCGAAAAGGGTAAAGCGCGACGGGCAAAAGAATCGCGACCTCGCATCTCCAA -CATCTCGCATCTTTTCCCCCCATCCGTCATCTTCCCACCACCATTTCGGTGCTTGATTTC -TCCACAATGCAAGCTCCAATGCAGCAGCCAATGCAGCAGCCAATGCAACCACATCATTAT -GCTCGGGCCATGCCCCCAAATGCACAGCGATCTCCAGCCGCGCCCCGTCGCCCCCCGGGG -CCAGGTATGTCCCCTTTCATCGGTCCCCACCCACTCGTCTCGGCCCTAATCTTACATCTT -CATAGGTGGCGCAATGCCTGTTCCGATGCCACAGCACGCTGTGGCCCAACAGTACATGGC -TGCACGCCCCATCCCCCACCCCAACGATGCTGCCCTTCGGCGCAGCCGCAAGCCAACCGA -CAAGAACCTCCCCGACGGCATTGAAGACGTGATCATCGGCGAGGGCGTTCAGCAATACAA -GAACCTTCGTGACTTAGAAAAGCGACTGGATGCCGCTGTTGTGCGGAAGAGGCTGGATAT -CCAAGACTCCATCAACAAGACCGTGAAGAAGTATCGGACTATGCGCATATGGATCTCCAA -TACTGTCGAAAACCAACCATGGCAAGGCCCGGGCAACAATCCAGGCTCAGGTCGCTATAA -AGTGCGCATCGAAGGACGGCTGCTCGACGATGAAAGCGATCCTACGGTCCCCGACGAGGA -CGAGAAAGATGAAGATGCGATGGATCACGACGGTGCTGGGGAGGATAAGGCGAAGAAAAC -CGAATTGAAGAGCCAGTCTCAGCGTTTCTCCAATTTCTTCAAGACTATCACCATCGACTT -TGACAAGCCCGCCGCAGCAATTCCCGAGGAGGTTAAGCCAGTGAACTGGTCTAAAACAAA -CACACACTCCAACACACCAGCGCCCCCAGTTGCCGAATTCGATAGTCTACAGTTTTCCCG -AGCTTCGCAGGACAATTTAAATGTCACCATCAGCCTTGTCCGGGATGAAGTCCCAGAGCG -ATACAAGCTGAGCAAGGAGCTAGCCGAGGTTCTAGATGTCGAAGAGGAAACCAGGAGTGG -AATTGTTCTCGGAATCTGGGATTACATCCGAGCCATGGAATTGCAGGAGGACGAGGAAAA -GCGACAAGTGCGCTGTGATCACCGTCTACGCTCAGTATGTAATTTGCAGATTTAGTTTTT -CTGAGCCCCGCTAACAAGCTTTACAGATCTTTGGCCGGGAGCAAATGTTCTTCCCCCAGA -TTCCTGAAAGCGTCGGTCCTCACACCAGCCCAATGGAACCCATCAAGCTTCCCTACACAA -TTCGCGTTGACGAAGAGTTCCACAGCGATCCAACGCCGACCATCTACGATATCCAGGTCG -CAGTTGAGGACCCGTTGCGCACCAAAATGATGGCTCTTACTCAGAACCCCGCGTACACAG -CTGGCCTTCGCCAGATTTCCTCGCTTGATGACCAAGTTGCTTTGATTGTGCAGGCCCTTA -CACACTCTCGTGCCCGCCACTCATTCTATACTGCGTTGAGCAAGGACCCGGCCAATTTTG -TTCGTCGCTGGATCAGCTCTCAGCGCCGCGACCTTGAGACTATCATGGGCGAAGCCACGC -GGGGAGGCGGCGAGGATGGTAGCGGACCCGAGTTCCGTTATGGCGGCGTCGACGGCCCCT -GGGACTCTGAGGTTGCCAAGGAAGCTGTGCGGTACATGCTGGCTAAACCCGAAGCTGCGG -CTGCTCGGTAAATTTTCTAGCTTTGTCTTTTCCTTTGAACTCACATCTCCACTTTGCCGA -CCATTATGCAAGGGGATGGGTTGATGTTTTCGTTTTCTTTTTTTTTTCCTATCTGATATC -GGGCGCTTGTGTGGTGTTCGGACTGGTGTAGCCGGGGTATGAAAGGCAGGATTGAATTGT -TTCACAATCCGTATTTGAAGAATCTGATACGATGCAATATGATTCCGGATTTTATATATT -GTGTCCCTTCTAGAATCCGGTATATTTGGTGATATATTATCGACACGACCCCAAGACCTC -TATGATTTCTAAATATGGACTTTGGCCCAAATGACAAATTATTGTAAAATTAGAGACACT -TCGGATTATGGATGCCTCACGCCAGGTGTTGTATCTCTGCATCCAATAGAAAAGAGGTAG -GTCCAAGAGAGCAAAAACAATGTGCACTGAAGCTAATTGACATAACTCCAAGGGAAAGCA -ACATGGAGAGGGTTTGCAAACATCCCATGAGACACAAAGGAGGGGAGAGAACTTAACAGC -CCTCATAAGATTATAATCATAATCTAAACTCACACCCAGACCTCAAGCACCAGGCAAAGG -AACCTTTGCAACAAGGTCTGCCCTCCGTTTCCGCAACTCCTCCCCAAGACCTGAGTCCAC -GCCCGAGGAGACATGTTCCCGCAACGAGGAAAGATCGCCCTGCAGAACGGCGGTCTCCTC -CTGCTCCAGCTTGGAAGCTGCCTCGAGAACAACATCACGCAGAATATCCTCCTTAAGCTG -CACAGCCTTCTCGGCACTGATGGGCTCCTTCTTTCCCTTCTTCTCGCCAGCAGCAGCAGA -CTCAGCTGGAGTGCGGAAACGCTTGAGAACAGTCTCGACCATCTTGGAGAACCAGAAAAA -GTTCAAGAAGTTTAAGATCAAGTTCGACCCGACGTAGGTCACAACCAGCCACGTCGGTAC -ACCTCCCGCAGTGTACCCCTTGAACTTGGCGATCTCGGCGTTGGCGCGCGCGCATGCTTC -GTTAATACATAAGCTGCCGTCACGGGCTGGGTAAAAGACATTTGCATTAACACTTGCGGG -TTCAGAGAGCGGAACCGCAGCGGCAGACCAGGTTTGCTGAAGCGCGTTCCACATGTCCCC -GTACACCACAACACTTTGCCAGGTGCCCCAGACGAGACGGCAGGCGAAGAAGCTGAAGAG -AAGGGCCATACCGTTGTACCATTGGGCACGGCTGCCGGTCATGTTGACTTTGTCGAGGAA -CCAGTGGATGTTGAGGAATGGGCTGGAGAGCTCATAGAGGATGAAGGTCGGAGCGAAGTA -GTTTAGGAATGGTCTCTGTAAGAATATGTTAGATATGTGTTGAATCAGGAGAGAAAAGGC -ATAAGCGTTGATGGTCTTCTTGATACTGAGTGTATCTATATCAGGCAGATTGAAGAGGCA -ACTCACAAATCCTAGACTAAACACCCATAAGGCGCTGACAGCATGGAAAAGCATGCCAAT -GCCAAACATGCGCACGTGAACAATGCTGACAATCAAGTCATATACAAAGTATCCCACGGC -CAAGGCTTGAATGAGACCACAGGCACCTGTATATCCGAAGACCCGCTCGCCAGAGGTCAT -GGAACTGCGCTCCTCGTCGAAGTACATAACCCACAGGGCGATAGCATTAATGAAAATGCT -CTGGACAAAAGAGACAACATGTATATCCCAGTTCAGCTTCGTCCGAGGAGTAAGCTTTTT -GTACGACTTGAAGAGAATTGGCGATAGGCAAGGTGAGAGAACTGCATGGATGAACAGGTA -GAAGGCGAACGCTGCGATAGCCTCGTGAATATGGTCGGCAAGCGTCGGACTGTTGAACCG -GATAGCATAGGGCTCTGCAAAGCTTCGTAGCCATTCTGGAGGCGGTGGGAAGGGGTCCAA -CATCGTGGGAGTTCCCCTCAACTAGCTGTGATCACCCGATGTGGGGGAAAGCTCTTGGAG -GTTCGGAGATCGGGGGAATGTAAATTTTTATGCAGTTAAGAGAGCACCCTGTCCTATCTA -ACATGAAGAAGAGATAGAGGAAAGAGGGAAAGGAAACAAAGAAAACCCAAAGAGCTAGGA -AGATGGGAGAAATACGTACTCCATGGGGAATGGAGTAGAAGGTGACCAGGTGAAGCGCCT -CTGTATCGCCTCAGGCAGCAATGGATTTGTTGAGGGTCACCTGATCACTTGCCCATCAAG -GCTTGTAGAATTCCAAAAGGTGATGGAAAAAGAGAACATGGGGTCTACAAAGATTCCAAG -CTAAAAATATGATCGTGATTCCAAAAGATACTCATGAATAGGCAGGTGACCTTGACCACA -TGGATTTATACAATCTAACTACATCACATGAGTTCTGGGAGAGTAAAAAGAAAATCATCA -TTAAGCAATACACATTTAGGGTCACAAACGAAAGTCATTCAAGTCATCCAATCAGAAAAA -CAAAACAAGAGGTGGTATATGAAAAAGAAAGAATCAATGGTGCAAGAACCCCAACTCCAT -CCAAAAAAAAACAATGCAATCACACCGTAAGATTAGTGCCAAGTCCACTGTCGCTGGGGG -GAGCCAACAGCTTGAGAACTTCGGCTTCTGCAGCCTCATTGGCCTCTCTAATCTGACGGG -ACATTTCGTGATCCTCATTCTGGTCATCGTTCCCAGAATCCTCTGCGTCCTCTCGTGCTG -TCCGCGCGCCGTCTTCTGACTCGGATCCATCAATAGACATATCATCATGGCGGTCCTCGG -GGTCATGCTCGGCGATTGAGCTCTCTGCCACACTGTACGAGTTTGCTGAGCTCACTCCAC -TCACACGTCGATGATCCGTGGCGTCGGTGACCACACTGTCGACCACACTGCCAGAGTAGC -TTATCTTGCGCTCCGTGCCGGCATTGCTTGCTGCATAGCTGGTTCTCCGGTCAGTGCCAG -CGTTGCTCGGGGCATAGTTGCGTTCAGTTTCATTGCCAAGGTCAGATTCTGCGTAGCTTG -TCTTACGGTCGGTAGGTGCGTTGTTTTCCGCATGACTGGTCTTCCGTTGCGGACCAACAT -CTTCCGCTTCGCTGGCAAGCTCGTCTTCTTCGTGGAACGGCTCGTTCTCTTTGTTGGCAA -CAGAAGTTGGGCCAGCCCAAGAAGTCTTCCGAAGGGAATTTGATTTGACAGAGTGGCTGC -GCTTCAAATTCGTGATGCCAGATTTAGCACTCACCTGCCGGTCCACACCACGGTTGCCTG -TGACGTGACTCAAGCTAAGAATCCGACGTTCTTTCCCAATGTTGGGCTGTACAGCGACCG -TGCCAGCTTGGAGCGAGGTCGGCATTCTTTTCAAAGCCATCCCAACGCCATTTCCGAGCA -GACTACGGCTGTTGTTGGATTCTTTGGCAGTGCTGGTAGTACTCGCAACGCTAGGGCTGC -GCGCGCCTGCGCGAGAAGAACGGGTACTTCGGGTGCTGCGGTTCCTCTTCAGGTTCATGG -CCTGCGAGCTACTGGATCGCCCAAGGAGAACAGCACGAGCATGGTCGAGTACACCCTCCA -GAATGCGTTTTTCAAGGCCATCCGCACGTTCCTCCATGTGTCGCATCTCTTCGTGGCGTA -TCTCCAGAGCTGTCTCAAGGCCGGACAGGTCCTTGCTGAGCTTGTTGTTTTGCTTGATCA -TATCCTCCTTCTCTGCTCTCAGACGATGGATCATATTCAGCATAGAATCCTTTTCTTCAT -GCAAACCAACAAGCTCCGCTTCCACTTGCTCACGCTGCGAAACTCTTCGTTCTAGGGCGA -CGGCAGCCTCCTCGGCTTCGTGCCGCTTTCGCTCGGTATCTTCAGCTACCAGCGCTGACT -GAGCGGCGAACATCTCATTCATTCTGCGGGAGTTCTCCATGATATCCCTGTGCATTTCGT -CGAGCTTCGCCATCTGGGCAACAGAATGATCAGATTCCGGCGGATTTGGAGGACGGCTGA -GCAATGCGTCCAGCTTCTCGTGAACAGGACCACTCCCATTGGTGGTGTGGTCTAGCAACT -CGTTGAGCTTTTCGTATATTTGATCATTCGAACCTGAACCTTGATCCAGAGCGCGATCAA -GCTTCTCGTGGAGCTGCTCATTCTTTGAGTGATCAAGCAGAGTGCTCAAAACCTCATGGA -CCTTGTTGTTCTTGGCATGCTCCAAAAGATCGTCAATTTTGCTGTGTAACGGAGTATCCT -CTATAGCGAGAGCACGTTCCGATTCTGGTGGAGGTAGTGCGGGTAGCATTGCTGGGATAT -TTGAAGGTAATGCTGATAATTCGGACTTCAAGTCCTCTGCTGACTTCTGGGAGTGATCAT -AGTGCTGGCTGACGAGTGTCAATATCTCCTGCACCGATTCCAGAATACGGGGGTGGAATT -CAAGGAGCTTGCTCTCCACCCGATCAGTGGCAGCTGCGGCCCTTTCGTTCTCGGCATGAG -ACTGCTCCTGGTGCGATCTCACATCATTGTGCATGTCCTCCATTTTCTGGCGGGACTGAT -CGACCTGTTCCATGAGAGCTTTTGTGTCCACACTCAACCGCTCACATGTCTCATTAACAG -TATCACCCATAGCACCGATGACAAGCTTTATATCCTCCGCGAGATTTTTTGTACTTGTCA -AAGATTCGGTGTGTGCCAGGTCTCTCTCTTCGGTTTCCGAGAACTTCGAGTGGAGAGATG -TGTGCGCATCTTCATACTTGGCTAGGACTTCGGAGATTTTCTCGTCAATCTTTGTGCCAA -GTTCCACAATAATGGCGGCCCGCGTCTCGTCGTGTTTGACCATTGTAGCAGCATCCCGCT -CCTCATTCTCGACCTTGGCTGCATCTTCCTCGCCTCGAGAACGGTCAAACTCTCGGTTGA -TGAGATCTGTGAGCTCCTTCATGCTGTCGACGGTAGTGAAGCTCTCTGCTGTTACTGCCA -GTCCTTCAAGAAGCTGTTTAAGCTCAGCAAGACCTTCGGTACTGCCATCAAGCCTGCTCT -TGAGTTCGTCTCCAAGCTCGCCAACGACAAACTTGGCTTCGTCAATCTTTTCAGCAAGAC -CACCGTGCTCAACCTTGCGGGCATCGAACCCTTGGGCGGTCAATTCGTTACTACTCTCCA -TCTTCTCTCGGAAATCGCATACAAGTGCCGATAGGTCCTGGATCTCCGATTTGGTGGGGA -GAGTATCCACATCAGGAAGCTTAAGCTCGTCGACTTGAGTCTTCACCTCGAAGATCATGG -CTTCCACTGTTTGGAGATCGGATTTGAGTAGTTTCTCAGAAGcttcttcttcctcgccat -ccttagtcttgctcttcatctcatcGAGAACAACCCAGACCTCTTTGATGAGACCCTCCA -AAGTGCCGATCTCAGACTTTGTGGGAGCCTCGGCTTCAACGCGACTGGAAAACTCAGCGA -TCGAATCCCTTGCCTCTCGTATAATGGTCTCTAGGGCTCCAATATGCTCAGGTGTGGCGA -CCTGATCCGGCGAGGGGAAGACAAGCTCATCGATCTGGGTCTTGGTGCTCAATAGTAACT -GCTCGAGAACCTCGGTATCTTCCTTTCGGACGCGGTCGTCTTTCTCAGTAGTCTGTTGGG -TACTCATTTCGCCCATGGCGAGTTGAACTTCATGAAGCGCAAGAAGAATTTCATCAAGGT -CGGAGCGCTTGAGGGCATCCTCGGCAGCCTCTGCGGGATGAGGAGCAGTTTCAATGGCCT -CAACCTTCTCTTGAAGTTGAGTGATGAGCACTTTCAGACCATCAATGTCAGGTCCAATAC -TAGGCTCATTTGCCAGCATAAGTTCGCCTCCGCGGGAGGTACTGGCATCATCCAGTTCTT -TTTTAGATTCACGGAGGGAATCTATCTCAAGCTTCAGATCCGCGAGGCCATCTCTGATGG -TATGGAGAATCTCAGAACTGTCGGACTCAGTTGGCTTCTCCAGCAGTTTCTTCAGGTCAT -CACGGATGGAGTCAACACCACCAGTAAACACGTCGAGTAACTCGCTTGTCGCGGTTAAGG -TGCTTTCGCTACCATCTCTACGCGAGGTCTCTTCACGCAGGATATCGAAATGCTCGCGGA -GTGCCGAAATGATTTCATCCCTCTCGGCACCAGGCTCCGATTGAACCACGCTCATTTCCA -TACGCTGACGAAGGCTCTCGAATTCCTGCTTAATGATGCCAACCACATCATTTTCGGTGC -CATCTTTGAGCGCTGTCTCCTCACGTAGGTTGTCGAAATGTTCACGAAGTGCCGATATGA -TCTCGTCTTTCTCAGTGCCAGGCTCAGCACGAACGACACTCATCTCCATGCGCTCTCTAA -GACTCTCGAGCTCCTGTTTGATGATATTGACAACTTCATCGGCAGACTCATTCTTGTGGC -GATCGGCAGTGTCTCGAATAGCATCCAGGATCTCATCTTTGTCTTCGGTGGCATTATTCC -GTAGAAGAAGACTAGTGATCGTTCCCCGTAGGTGCTCGAACTCTTTTTCCATGGCATCCA -GAAGCTCAGGTGTGTCCGGATTGCCCCGCGGCGCAGCCAAGAGGGATGTATCCGTGATGG -TCTTCTCCATGTCGGCCTGAAGGAGTCGGAACCCTTCTTTGACTGTATGGATTACTTCAT -CCTTGCCGGAAAGATCGGCAGCGGTGACCACATAAGTCTCAATCTCCCTGTGAACGGTAT -CGAGGCCGTCCTTGATTGCGTCAAGAACTTGCTCAGTGTCTCTTCCACCTGCGGCCGAGT -ACTGCTTGAATTCCTCCTTCATACCTTCGAATAAAGTCTGGAGACGTTCCGTCATCTCCC -CACCAACATTAGTATTCGCGGTTCTGATGGCCTCGTTGAGACCAGAGGTGATTGCTTCCA -TGAGATCGTCCTGGTTCAGCTCAATCTCCCGGGTGATGGAGTTGCCATTTTGAATGCCTT -CTGCAATAGCGCCCAGAATCTCCTCCCGCGAGATAGACGGCTCTTGGACATGAGTGCTGA -GTGCTGACTGTTGGCTGTTTGTCGACGCCTCGATACCCTCGGCGATAGCACTCATGATAT -CCTCCTTAGAAACGCCAGGATCCCTATTAATGCCCAGCGAGGCATGAGAGTGTTCAATGC -CCTCGACTATTGCAGTAAAAATCTCCTCGCGGGAGGGAGTGGACTGATGATCCTCGCCCT -CATGCCCAGTCGATGTGAGAGCACGCGACGAAGAGTTACTTGAGTTCTGAATCCCGTCCG -CAATGGCGTTCAAGATTTCGTCTCTGGAGATATTTGATTCTTGGCGGTTTGACGAGTGCT -CCATGCCCTCCAAAATGGCACCGAGGATCTCATCTCGAGAAATACTAGGGTCCCCGTTAT -TGCGAAGGGAAGACTGCGAATGCTCGATGGCATCAACGATGGCGCTGAAGATCTCTTCAC -GAGAGACAGTCTGCTGAACGGTGGTACTGAGGGCTGATTGCTGAGCATTCATCGAGCTTT -CGATGCCCTCGGCAATGGCATTCAAGATCTCATCACGAGAGATGTTCGTGTGAGCGATAG -TAAGAGCTGATTGCTGAGCATTAAAGGCTTCATTGACAGCATTCATAACATCATGGCGGG -AAATTCGTGGCTCATCCATCTCTTTAGCTGCGGTGAAATGGGCTTCAAGGCCGTCGGACA -CAGCACTGACGATCTCATCACGGCCAAGGCCAGAGTCGTATGAATCTTTGCTCAGCGTGC -TTTGTGTCATGGATCCGGCTAATGCGCCAAGGATCTCGTCCCGGATAGAATTTAATTGAC -CGACATTGTCGTCATCCAAAGACCTGGAGGTGCTGTGGTTTGCCATCGTTCCAGTAACAG -CTTCAATAATCTCATCGCGGATAGAGTATAGGTGTTGGACATCATCTTCATCCAATCCTT -TTGGCCCCGCGTTGTTCGCAACCATTGCTCCAGCAACTGCTTGAAGGATTTCATCGCGCA -GAGCAGTTAGATGATTGGTGTGATCTTCATCCGCGGATTTGGAACCAGCGGCATGGCTTG -TCATTGCACCAGTGACAGCTTCAATGATCTCGTCGCGAACAGAGTGGAGGTGACTGACAT -GATCCTCGTCCAAAGACTTGGGCCGCATGTTGTTAGCTGCCATTGTCTCAGTGACCGCAT -CGAGAATTTCATCGCGGATGGAGTAGAGATGGTTGACATGTTCCTCGTCTAGAGACCTTG -AACCTTCGTTGTGACTTGCCATGGCTTTTGAAACAGCATCGAGGATCTCGTCACGCATCG -AGTGGAGATGACCGACATGTTCCTCGTCAAGGGATCTGGGGGCAGGCTCGGGCTGCTTCT -CGAGGCATTCGCGAATGGTCAAGATTATCTCGTCTCTGGATATACCCGGTGGGTGCTCGA -TTTGTGGAGGGATGAACTTCTGCATGCCGTCCTCCACGGCTGCGAGCACCTGGTCGTAAG -TGACCGCCTGTTCGTGCTGGGGCTGGTAAGACTTGAGCCCTTCTTCAAGACATGTGAGGA -TTTCATCTCGTTCCAAACCAAAGTTGTTAAACTCGATTTCGGGCTTGTAGGTTTCCCAAG -CTTCACGAACAGTCTCAAGAATGTCTTCTCTGTCCATTTCAGCACCTCGCGACTCGGCTT -GAGGTTGAGGAAGTTCAAGCTCATCGAGGGCTTTTTTCACAACAGAGTAGATTGCGGCGC -TATTCATGGTAGATCGGAATTCGGACAAGTCAGACGATTGCTGCCGACTCTCGTTGATAG -TAGCAGCCAGCTGCTCTTTGAGGTCGTCCAGGCTGCGGTTGACAATAGCGGAAATCTCTT -CTGGGCCGACAGCCCTAGGCTGCTCCTCAGGCTCGTCAATGGCACGGGAGGTCTCGATCC -CGTCTAGTCGCCGGCCGAGTTCTCGGCCCATGCCCAGAACCTCACCTCGTAGTTCGCGTA -CCAAGGACTTCACATTGTTGGTCATTCCACCTGCCTCGAGAACGCTATTCTTGACAGTCC -TAAGAATTGACAAAACTTCATCGGCATTCGGCGAAGAACCATCTCCTTTGTCAGATTTGG -AAAGGTCAAGTGCACGTACAATTTCTGGATCCACCGACCCCATCGAACCAGATCGTGTCG -CCTCGGGAGAAGCATCCCCGGATTCGATATTATCAGGATTGGCAGCCGTGCCGTAAGCTC -GGACGAGGGCTTTGCTCGATGGATCCTCTTTGACGAGGTTGAGGATCTCTTCAAGCATGA -CAGTGATCTTGTCATCGGCATCTTTGTCCGAAGCGGTAGCTGCGGGTCGAGATCTGATTT -CTTCAACGAGCTCCAATAGCCTTTCCTCACCTTCTTTAGTTGCCATCTCGCGCAATACCT -TTTCGAGCAAGGCGAGGGCTGCGGAGCCGTCGGAAATCGGTGCAGGCGAGACGCAAGAGT -CCCGATCAAACGCGAAGGGGTTGGGGCTCGGCATGCGAGGCACTGAGTAGCTCCTCTTTG -AAACGGGACTATCCGGATCCTTGGGGTTAGGCCCATCACCACCCTTAAAGAGATTGCTGA -AATCAAACCTTGTAGCATCAGCATCAAGATACTTGGTAGTAAGATGAGAAGATAGGTCCT -TCAGACTCTTTCGAATGTCTTCGTCGCGTGCATTTGTTGCCGCCTTGAGGCTGACCAAGT -CGGTGGCGTTATCTTTGCTGCGTCGGCTCAAATTCATCATCTCACGCTGTAAACCACTTG -CAATGTCCGTGAGACTGTGGATTTGATTTGAGATCGTATCCATGCTTGTAGTCTTAGTGG -GATGGGGTTGCGGCTCAAAGGGCGCAGTGATCATTTTCGGTGGTTTGGAAGGTGATAGAG -GGTCGGGTTCTAAAGGTAGCTGCATGTTTTGCAGGGCCAACATCGTAGCTGATGGCGGAG -TACTTTCTCCCAATATTGGCTTTGATAAGTACCGACTCGATAGTGAGTAGGTCATCTCCC -GGTCCGCGTCTTGACGAAGCGCTGGTGAGCTGACAGACTTGGTAGGATCGCGTGGCTTGG -CCGCCTCAATGATCGTCGCACGAAGAGGGCGCTCTGGGGATCTGTTCTCGAGGTAATTCA -TAAAGTCACTGCGCGCATCTTGTCGGGTATCTTCAGGCTGCAGTGACTGCATGCGCTTGT -GGTGCTGGCCTTTGACCAAAGACCTCTCTGGAGAGGAAGACTTGAGTGGCATTGGATTTG -TTGTGTAAGAGCTCGGCGCATGCTCGCTGGGTCTCTCTGTAAATTTTGCAGCCACTGCCT -GAATGTTGGGAGAAACATTGGACCTCGTCGCAGGGCTCATTGGGCTCATAGAACGAGGGT -TCATTGGTAGCGGCGGAGGTGCACGCTCGGGATCCGAACTGTCCCACCTGCGTAGGCCGC -GTTTTAGCATTTGTTTGAAATCACCCATGGGATTCGTCAAAACATGAATCAGGAGGCGTT -AGTTGCACAACCTACATTGGAACTTTAGACTCGGAATATGTCATCGAATGCCGGACCAAT -TCCCGGTCCCGTGGGATGTCCTTGTCTTGGGACGGCATGATGTCTTCGTGAATGGATTGT -CAAGGTGTCTCAGAGAGGTATTGACAATATGGATGGAATGTATGCAGGGTATGTGTAGAA -TGCGTTCAAAGTAAAAACAGAAGCAAGGTCATCAAGGCATATCAACACAAGGGAGGAAAC -GCTGGAAGAACAAAGCAAAAGCTAACAATCTCAGAACTCGGTGGAGGAGAGAAAAGCACC -GGGAGATCAGCTTGATAagaagagagcaagtggagatgagagttatatggagagagagag -agagagGAACTACGCTTGTACACATCACTCACGGCCATAGCAACCGGGGCAACTACGATC -GTCATGTAAACAAAACCTGACTCAGCGCAAACAAGGCGGTGAGCAACGCTGGGCACCAAC -TTCCCAAGGTCGGAAGGGGCTGGGCCGATAATCGTGGCCAAGCCCATCAACACATCCCCT -ATGTTGTACTCCTCCTACGGAGTAGTGTCGGTGGCCCCACAGCAAATTGTCACTGGATTG -TAGTAAATTCGAACAAAAAGTACCTACATACAGAGAGAATATCCGAGTATCGATTTTTGG -CCAAATACAGAGTAGCTTTCACAGGGGCCAAAAATCAAAAATGAAGACCTAATATAGGTC -AATTTCGATTGACGTCCTATCACAACTAATACATAAACATAATCTAGAGGTGAAGAGTGT -CTGCCCTTCACCATACTTAACATTTGATACTTGACAATTTTTTCTAAGTTTCATAATGGG -TAAGTAAGATTTGATCTACACTCTATTTACCTTCCTATTTTATCCCTAGCTAGCCTGAGG -GCTAAAAACCCGTAGCTATGATAATATCGTGTGAAGGTACCCTTTATATGGTGTCACTAC -TCTGTACATATATAAAGGATACTTTCTTTAGAATGCAGTACGGAGTACGGAGCACGGAGT -ATCTGGTGACAATTTGCTATCCGGTGGGGCCCACCACTACATGGTGGGCTTTGCCATCCA -TCCGTGACCATATCGATTATGTCATCACTTGATCCGGACCCGATGGGGAAACTATCCGAA -AGTATACTAGTCGCCACCTGTCGGTAAGTCGACCAATCAATCCACAGCACACAAATCGCT -ACTGTGCCCTGAAACCCTCTACCTCACATCCGCAAAAACGAACACTTCGATCTTGACCGT -TCCGTATCTCGACAACCACCCATCCTCGCCGCCCCCGGGACCTTCAAAATGAAGTGTACG -TCTTCACTATTCGACTCCCCTTGCGATATATATGCTGACGGATTTGTTACAGTCATGAAA -GTGGGCCGTGTGGCCATCATCACCCGTGGCCGCTACGCCGGTAAGAAGGTACGACAAGAT -CCTGATCCTCCCGAATTGAGCACGAGAAGAATGGAAGGATGCGAACGAATTGGAAGCGAA -AGACTCGAGATTTGAAAGAGCACAACGCAACATGGAGGACTGCGCAACTATTCAAAAAAA -ATTCGGAGTGCTAATCAGGCATTTTTCTTCGACTTTATAGGTCGTCATTGTCCAGCCCGT -TGATGCTGGCTCCAAGTCGCACCCCTTCTCCTACGCCATTGTCGCCGGTATCGAGCGTTA -CCCCCTCAAGGTTACCCGTGGTATGGGCAAGAAGCTCGTCGACCGCCGCAGCCGCATCAA -GCCCTTCATCAAGGTCGTCAACTACAACCACTTGATGCCCACTCGTTACACTCTCGAGCT -CGAGGGTCTTAAGGGTACCGTCACCAACGACACCTTCAAGGAGGTCTCCCAGCGCGAGGA -CGCCAAGAAGACCATCAAGAAGGCTCTTGAGGACCGTTACACCAGCGGAAAGAACCGTTG -GTTCTTCACTGCCCTGCGTACGTTTTGCTCAGGATATAAATCTAAATTTATTTTGAAACT -CGCCACTAATACTCCAAAACAGGTTTCTAAACGGTTTACTTGGGCGTTTTGTCGTTTTGA -GCGCACGGGTCTAGGTGAAGGCGAGGTCGAAAAACCATCGAGCACATTCGGTCCCTGCGA -GCATGGGATGGGAAAACATACACCATGGCTGGAAACAGATTCGTGAATGTATCGATGAAC -TCCAGTTTTAACGGGGACGCCACTTCCATACCTGGTGTCCGTCTGAATGAAATGGAAAAT -GAACAATTATCTCAAAATCAATATTGTCTATCACCTGAGCATTGGTCGACATATTCCCTC -TTTCCCATAGGCCGGGAACAAATCTGGTCTGTCATCTCAGCACCGATCGTTTCCGCATCG -GTGTATGGCCTTGTAGCTATGTGGTTTGTCCCAGGTCCTTCAATTTCACGGTGTCATTGC -TAAGATTGATGTATCCATGCCCATTCATAATGCGCAACCGTAGTTCTCACAAGTAGTTGC -TTTAATCCTGGATGGCAGTGCTAGATGCCCTCTCACTTGAGTAATCCTTATCGTTTTGAC -TGAGGACTCAGCCTTCTCGAACTTCATGTTGAGAAAGTCCCCAAGATCGGTCTAGGTCCC -CACATCCGTGCTGTAAGATGCAATTATAGGTTCTCTTCCTCCAAAGTCCAGTCATGTACC -ACATGTGAAGCCCGCCCCCGCCAAGATATATCACACCGCAGCCATATAGACATCATCGTT -TGCAAGGTATTCAACGCTAGCATGTCTAAAACTTGAACGCACTTCTAAAATATGCAGCAT -CTGTGCTTGTGCTGCTATTCCGCCGACACTGCAGCCTTGAAGGTTCTGGATTTCTATAAT -GTTCTATCTACCTGCGATCCAGACAAAGAGCTCCTTTGATTCAGTGGATAATTAGCACAC -CCTGATCCTACTCTCGCTAGTAAACAAGGGGACAATGGTCTTTGAATACTATTCTCGATA -TAATTCTCGAGATAAGAGCAATACATTCTTTATATACATTGTCTTTTTTCTATCACGGCC -TCAAACATTCATTCATAGTTGATGCAATCCATACATATTAAGTATATGTATCGATCTGGA -TGGCTCTCCATCTTCTGCCCATATCATCTGATCACCATTGAGCTATCATTTCAAGCAACT -ACCCAAACAGGAACAAGGCACATACCAAACTACGAGTCAATAAATCGTCAATCATCTAGT -TTCTATAATCATGCCCTTTCTTCTAGAAACTATCCACGAGGACGCAAGCGAGAAATGGGC -GGACAATGAGGTTCATCGCTACTACAACATGCCCATGGTAGCCGTGCGAGCCAGACCTCG -TGTCCTCATGCGCTGCAACGTGTGCCGCGACACTGCTTCGCTTTTAAATGTGAAGGTTGG -TATAGATAAGTGGATTAATGCTGAATATTTCATCTGGGAGATGTCCGCGGGACTAGCTGA -CAGCGGCGCAAGCCCGCGGTGGACAGAGCTCTTCGAAGGTCAATATAAGGAAAAGTGGGT -GCAGGCCCAGAACGAGTTCGAGAGCATAGCCGATCTGCCCGAGAGATTCCTTCACGGGGT -TCCAGTCCCGCCTTTCCAGGTAGCAGAGGTGAGGATCAAGACTAACGAATTCCTGTACTA -TTGGCGGCTCCGAGCCAACACAGGCCGACACATGATTGACACGAGCTTTTGGCATCTTTG -GAAGTTGATTCTCCAGCAGGACTCGCCCGCTGTTGGCCCTTTTGATGTTATTCCCAAAGT -GCCCTTACATTCTCCTGCCGGGGAGGATCCTATCAATCCTTGGAACATGTCTCTCCCTAA -CGACAGACGATCGGCCTGGTGTTATCAGTCATGAGGATGTTTCCATTCTTGGTCCTGAGC -ACGCATGCAATAATCTCGCTCCATTGGAGTTCTGCACGTCTCTGAAGGTACCTGTCAAGA -TAATATCCCTCCAGCTAGCCGTAACCGATTGCGCTCCTCAGCTAAGATGGACTTGCCGCA -GCGCATACGAGATCGTATGTTTCTGAAATAGTGGGGCATGTCCACAAATGCGTGTAAATA -TGCAAGAATGAGCAGTACGACAAGTCCACTGCGATGTCTCAACACTCCAAGACCCCTTTA -GTTCTTGGGAGAGGTGAACAATAACTATCTTGAATGGGGGATATCATCTTACGAGCCACC -TGTAGTATATGTTCTGCTCTCTTGTTTCTCTGGTCTAGTGTATGTTTCCTCCTCCCCCAA -GCTTATATTCACGTTTAATGCAGTATATTGTTGAACTGATCTTTCTGAAAATACGGTATA -GTAAGTACGGAGTGAGAGTTTACATACCAATAGACTGTAAAGAGTAGGACCGATATCTAT -AGTACCACGGGTGTTGGATTGCATCCACATGAGATACGAAGATGAAGACTGCCAAAGCCT -CATTGAAACTATCATGTTTCCATGAATATAAAGCGCGAGCCCGCACACTAAGAGCCATGA -GTCCTGCTATACTGTTATGTTTGGAATGCATAGCTGTTCAATAGTCCAGTGACTAAGATG -CATCATATGATAGTATCAAGATGTGAAGCTCGGGGTTTCTCTCAACTTGTCAAGTGGCGC -GATCGAGACAACAAAGCGACAATAAAATACACGAAATAATGCGTGGCACACTTCCGACAG -CTAATTCCCTTGGCACCCTTGACGTGGTATTTTCCTTCAGAGTCGATTAGGCTAACAACT -GCCAAGTCAAGGGCAAAGCGCTTCGAGGGCGCTAGGTTCCGGCACGGTTGATCACAATTG -AATGTCTATGTATCATTGGACCAAACCCTGTAGACACATAAACAAAACAAAAAAGCATTT -CATGTGATCATCACCCGAGTCAACCAATCCCAACCTCCAAGGCAACATAATTGAACCCTT -GATGTCTAGAGCTCCAACGCTAATTGACAGCTCGGTTTGCATGTGAAATGGATACATACC -TTCGCACTCTACGGAGAACATGCCAACAGCGGGTCACGGGCCTCCACGACGCTAGATGTT -CGTAAGGCTAACCGAGACGCCACCTTCCCACGTAGAGAGGTGTTCCAAGTCCGCCACCCG -GAGTCTTCTGAGATGCAACATTCTCACAAGTTCACTAGGCTCGATTGAACGTCAAAAGTA -AAAATGGAGTGGCGTGCTTGCCACAGTCTGAATTGATTAGGAATATTCTCACTGCAAAAC -TTTATCATCCATATCCTAATAGCTTCGGGAGCAAAGATACATAATTTATGGGTTGGGATC -CTTCGAAGGCACAGAGAGAAGAAGTGTTGTACTTTGTAGATGTTGAGACAACCGTGCTGG -TCAAGACCCACGATATACAGGGATCTGCATAGTAGCTTAATCGAGTGGGTTATCAGTCGA -TCGCCGCAATCCAGTTCGCCTGAGTGAAGGAGTACAAAAAAGCCAGGAGAAAACAAAAAG -TTCGCCGCCCCGGCTTCACATTCTAATGCTCATGGCATGTCTAGCTCGGGTGGACAAATT -TGATTTTCCATATGCCGACCATAAGTTTAAGAAGTCTGGCCACCCGGCTTACCCTGATAG -TCGAACTGAATATCGATTCCGCATTTTCTTAGCCGGTTATATTGATCTAAGGAACACAAC -TTGGACATATGCGCAGTGCCAAGGTACCTCCTAGCCCTCACATAGACTGGCCGTACTAGT -AAACATAGTATGACCGTAGTACTACACTCGTTTCATAGTCACATCAAGCGCCGAGATTCG -GATGAATCAACGACAGCTCCTTTGGCCCGTCGCACACCCGGGGGCCGGGGGATCGATATT -TAAACCGACTAGGACCTTGCGTAATGTTTTACCCCTCCTACATTTGCACCGGAAGCCGTT -ACGTATTCCCGGCGAGAGTACAAGGGGCCGACGCTCAGACCAATGGGTGGGTACATCACT -AGAGTCGCTAATGCATGCGCAGACGGCCTAGACTCGCAGAAGCTAGAAGCCGAAGCATCA -GTCGTATTCTAACATGTCCCGATCAGCCATCTGCGGCCACTGCCTGGGCAGTTTTGTCAT -AGGGTGCAGATAAGCAGGTTTGTCCAACGGGGAACGGCGAAACGGCCCCGGGGGTGAAAA -CAGGGTTTATCTTGCGAAGTCATCCCTCATTTGAATTCGAATTCTTTGGAGAAGCTTGGA -AGGTGATACGAAATGAAACCCTTGGAAGCTCTATAAGAGACAGAATAGATGTGATAGAAG -ATTATACAAGTGGAACCACACGGCGTTTTCGCATCAGACCCGCAATCACATGCGAAAGCA -GCTTATCACAGATTCACCTATGATTGCATGTGGTATTTTTCCAAATTTTTTGCAATTCAA -TGTATATTCGGTATAGAATAGAATCCATTTAATACCAACACATGTGGCTATAATTAGCCA -TAGGAGTTCAATTGTTCTGACGTTTGCAGTGCATAATGCCCAAAGTCAATGGCGCATTGG -CGTGGGGGCATCACTTGGAACTTGGGGCTGTTGAGCCTTCTTTTCTTCCACAAGAAAAGA -CTGAAAATAGGAGAAAATTATGTAAAGCAGAGGGCTAAAATGAGCAATCAAAATAACCAG -CCGAAAATACTATGTACGATGTATTGCAATGATGCGATTGCGGTAGGACCCATTGCATAA -TGACAGCGCAATCACATGTGAAAATCCTATTTTTCTATACAACCCAACTTATTCctctct -ctctctcttttttctcttctgaatcactactatacttcatactcatcttactatctctat -GCCCCACGCAGAAAACAAAATGGGTGAACCGGTTGTCAACGGCGAGAAGCACTACTCCCA -GTTCCTTGATGTATGTACACCCCCACTACAGCCCAACACCAAAACGCAGAACTACAGCTA -GGATTAGCCCATACTGACTACATACTCCCAAATTTAGCATCTCACTTCCTACCCTATTGT -CTCCGACTCCATCTCCTATTACAAGGGCAACCCCTACGGAGCCAAGTCCCTCGAATACGC -CGACCAAGGCTACACCCGTCTCGCCAAACCCGTTCTACCCTTTTTCTCTACTCCCTACAG -CTATGTCGCTCCCTACCTAGCCCGTGCCGACTCTCTCGGGGACAAGGGTCTGACCCAGAT -CGATAACCGCTTCCCTATCATCAAGGAGGATACCCAGAAGCTGCGCGGTTCCATCTACAA -CAGTGCCGCCCTGCCTGTCCGCGTCGCCGGTGATGTCAAGCACCATGTCTTCGATATCTA -TGGCTCTGAATACAAAAAGTGTGGTGGCGATGGCATCTTCGCTTCGGGCAAGGCTGTTGT -GACTACCAGCTTGGTTCTGTCACAGGAATCGCTGGCTTGGGCTAGCGCTTTCCTCCAGAC -CAAGAAGGAGGAAGCGAAGGAAGTTGTCAATGAGAAAAACAGCAACTGATTTTAGATCTC -TTTGAGCTTTTGGGGCGTTTCTTTTTTTGCTTTTTCTATTCTTTAGAGAAAATGATACCT -TATCGCTGCTCTGTCTGTGTTGCCTGTTGTATTTAGTTCGAGATCTATCGATGGATGTTG -TAACCTAACGCTAATTTACTGGATTTAATCAATATACAAACCCTCCATTCATATAGGTAC -TGCAAGTTATCTTCTTTATCTAATCACTCACAAAGACAGTCCTCGAAAGAAGCCCTAAAA -TAAAATAAGAAGACTGACCTCTAAACGCATTTGCTGGAACAAAGAACCATTGATAAGGTC -CATGCACAGACCACCTGGGCAGCCCCGATGGGGTAAAATAGTTCTATCGCGTCGCGTCTT -CCCAAAGCCATCTGCAACTCATCACCATTCCCACATAATAAAAATGTCGTCAAATCGCAA -AAGACGGGTATGTAACCTCAAATCCACTCGAGCTGTCCACTAGCTAACCTCCATCCCCAG -ACCAACGCATCCGAGACCGAGTCAACCCCAAGCGCGCGTCGGCGCAGAAGACAAAGCGAA -GAATCCGATTCATCTGCACCAGACAGCGACGATGACGGGCCAAGCGCACCAACCAGCACC -GATGCAATGGTCAAGAAAATGGTGCGGCTCGCCATAGCAAGCGAATACTCGCGGCTTCCG -ATCCGACGAAACGACATCAGCGCCAAAGTGCTGGGTGAGCAGGGCTCACGGCAGTTCCAA -AGTGTTTTTCAGCAGGCGCAGCAGCAACTGAAAGAGCGTTTTGGTATGCAAATGACGGAG -TTACCGGCGCGAGAAAAGATCACCGTCACCCAACGAAGAGGTGAGTCGAGTTCCGTCTTG -CTTTTCTTTGTTGCAGACTCACGCTATGCCTAAGTAATCACTACTGACGTCCTGTACAAC -AGCCGCGCAAAAAACTGAAAAACCTTCCTCGGCTAATAAGTCCTGGATTGTTACGACCAC -CTTGCCGCTCCCATACCGTTCCCCGGATATCCTCATTCCAACGAAGGCACCCTCCCTCTA -CACCGAAAGCACATATACAGGCCTGTACAGCTTCATAATAGCGTTGATTGTGCTGAACGG -CGGTACCCTGGCTGAGCAGAAACTTGACCGGTACTTAACGCGCACAAACGCCGAGGTTGC -TACACCGGTCGACCGCACGGATAAACTGCTGCAGCGGCTATGTAAGGAAGGGTACCTTAT -CAAGACGCGGGAGATGGATGGTGGTGAAGAGGTCATTGAGTATGTTCTTGGGCCGCGGGG -TAAAATTGAGGTTGGGGCTGGTGGTGTGGCAGGTCTTGTGCGAACTGTTTATGGGAAAAA -TCAGGATGACCATGAGAGCTTGACTCAACTCCAGAGGGAAGACCTTGAGGACTTTGAAGC -GAGGCTTGGAAGGAGTCTTGGGATTGAGCCGGCGACGGTACGAGCTAGTGGAGTGGATGG -TGCTAGCGATACGGATGGAGATGCTCGGGTTAATGGGAATGGGGAGCCAAATGAGGTGCC -GCGGAGCTCGGGGCCTAGACGGTCTTCGAGGCCTGACGCTAATCAAGAAGAGGAGGAAGA -TTCTGGCTCTGAAGAGTATGAGGAAGAGTGATTGATTTACGTTTGATTGGCACGCTGTGA -TAACATAGGGATACACACTACATGTAGATATCTGCAACTTGATACCCAGGATATGATAGG -TCATTCCAGTACATTTGACCAATGTTCGATTGCTTGATCATCGATATATAGTTTGAAATT -GAGGACCATAACGCACTGTGGACTCTTGAACGTGTTCATGTTTGTGAGTGCTGATGCTAT -TAATTGAGGTCAATTATTATGGCAGTATTCTGCGCTCTATATTGTCGCTTGATAACAGTT -CAACGTAGTCTTACAATAAAGCCACATTTGGCCTATTGCACGAATTAGCTATCCAGTAAA -TTATTTCCATCTTGATCACCAAGCCATAACTGCAGGTGTTATGCAAGAAACCGACATTGT -CCTGTTCAGCCCTGGAGATACATGGATATCAATTCGGAAAGATCTATGATTGCAAATAAA -GTTCCAAGCGGGCACGCTTGCAATGGACTTCTATAGACCTGCTGTGGCATGTTAATGACT -TCGCATGAATGTTGATGATTTGGCTCACGGGCCACCGACCTCTTTAGCTTTAAAAGTGGC -CAGGAATATCCAAAGTACAATGTAGGTAGGCTACTCGGTTCCGAATAGTCAAGAGCAGAG -CTCATCATGTTCAATGGTGGATTCATCCTTGCATCGAGCGCATTCTTCAGCGCCAAATAT -GATACTTGAAAGCATCATATATCCGGCCTTTGGAAGGAAAAAAACTTGCCCACAGTGAAA -TATGTCTGGTCGATACTCGCATATGCAAATGGTATCCCAAGCTCCTTCTATTCGTTGCTT -GTGGCAGCTAGAGAGGCTACACCTATGTAGGTAAGTAGTACTGATCACGGAGCGCGACGG -ATGGCCGAAGCTATATTATTGTCAACCTCAAGAAATGAAGCCCTGGAGGGCGTAGAGGGC -TGGATGAGTTTTAAAAAATCTTCTCCTACTGTCTCTTCTACTGTGAATCAGTCCACCATG -ACAAGACTATCTGCCCTCACCTTCCTTGGTCTACTTGTCACCTGAACCTCTCTGTGGCGG -CATTAACATTCAAGTATGTAGTGGATACCTCGACGACTGGGTAAATATACTGCGAACACG -CAATATCCCTCAATCTGATCGTGTCAAGGGATATATTTGCGCTGTAGGCCAGTTAGCCTC -ACGCCGACAGATTCATTCAACCACGGCCAAAATGACTGGATTCAAAAAATACACCCCTAC -TGTATCGACTCCAACTTAGACACCGAACCGAGCCACTTGCCTACCTCGTGTAATTTCAAC -TTCCACTTCGCTGATCACGAGCTCTTCTAAAACTCGCACCACTAAATCCATGACAATTAC -CTACTCGACTACTCTTTCTAGCGCCACCATCGACTACGATCTCCTAACTACGGCAACCAC -AATCAGGACGAATTCCCGAGTTGTCATAACCACAGCCAGCTCGACAGGATTTGAAGCGTA -GGTTCTTATTTTTGAATTGTGTCTGATTTTGGGATGGACCTAATTCAAGGTTGAGATGGA -AATCGAGACTGGCTTGGACATATATGCTTAGAGTACTCCGTATAACGCCCTGGGTTCTGA -ATATTAGTCACTTCAATAATCATGGTCTCTATCGGTTTTACTCATTGAGCCTGGCTTCCT -TTCACCGCACATGTATATGTATTCGATATTCGAGCCGTTCCATGTACTCATATGACGGAA -TACGTGTTTCTGGGTGCTCAGAAGCCTTTTTGCCAATCGATAAGGGCCAAATATATATAT -CCAACCTTCGGAACAGGCTTACATTTAGTCAGGATGAAACTGATTCATAGTTAATTTCTA -GGTTGGAGATAGTTCTAGTTCTTGATATCGCTTGATAATGGGATTACAAAATGGGAATAG -GGAATTCAAGTCGAACGACTAAACATGCATGGGTAGTAGAAAGTTGGGACGAGATAACCT -GGACCGAATATTAGATCTTACGAGGGTGCCGGATACATCGTGCGTGGAACATGGGTGTAG -AGTGGGCTTTTCTGTCACACAGTCACTAAGCTGAGAGAGATCCAAGGTCACCCACGATGA -GTGAACACTCTCACAGAGTTGATCAAAATTTCTACATATCATTGGCTACGCTGTTACAAG -TTTACGAAGAAACAAAGAATTGGGAACAACTTGAGTCATAAAAATGTGGGTAGTCATTTT -AAGCAACTACACAGCAAATCTACCTTCCATCCAGTAATTGTTTTTGAAAAGAAAAAATCA -TCACGGCATAATGAGCATCATCCGAGAATCATTTATTTCTTACCAGCCTTGGCAGCGGCC -TTGGTGACCTTACCAGTGCCACCAGCGTTCTTCTCAACGGACTTGATGACACCGACGGCG -ACGGTCTGACGCATATCACGGACGGCGAAACGACCGAGAGGGGGGTAGTCGGTGAAGGCC -TCAACACACATGGGCTTGGAGGGAACCATCTTGACGATAGCGGCATCACCGGACTTGATG -AACTTGGGGGAAGTCTCAGTGGCCTTACCGGTACGGCGATCGATCTTCTCGAGGAGCTCA -GCGAACTTGCAAGCAATGTGGGCAGTGTGGCAATCGAGGACGGGAGCGTATCCAGCGCCG -ACCTGACCAGGGTGGTTGAGGACGATGACCTGGGCGTTGAAGGAGTCACAGCCGGCAGCG -GGGTCGTTCTTGGAGTCGGAAGCGACGTTACCGCGGCGAACCTCCTTGACGGAAACGTTC -TTGACGTTGAAGCCGACGTTGTCACCGGGGTTACCGGCCTTGAGCTGCTGGTGGTGCATC -TCGACGGACTTAACTTCAGTGGTGACGTTGGCGGGGGCGAAGGTGACGATCATGCCGGGA -GAGATGATACCAGTCTCGACACGACCGACGGGAACTGTGCCAATACCGGAGATCTTGTAC -ACATCCTGGAGGGGAAGACGGAGGGGCTTGTTGGAGGGACGGACGGGGGTCTCGATGGCG -TCAATGGCCTCGAGAAGGGTCTTTCCGGTGGACTTGCCAGCCTTGGTCTCCTTCTCCCAA -CCCTTGTACCAGGGGCAGTTGGGGGAGGGCTCAAGCATGTTGTCGCCGTTGAAGCCGGAG -ATGGGGACGAAGGGAACGGCCTTGGGGTTGTAGCCGACCTTCTTGATGAAGGAGGAGGTC -TCCTTGACAATCTCGTTGTAACGATCCTCGGACCACTTGCAGGTATCCATCTTGTTGAGG -GCAACGATGAGCTGCTTAACACCGAGGGTGAAGGCAAGCAGAGCGTGCTCACGGGTCTGG -CCATCCTTGGAGATACCAGCCTCGAACTCACCAGTACCGGAGGCAATGATGAGGATAGCG -CAATCGGCCTGGGAGGTACCAGTGATCATGTTCTTGATGAAATCACGGTGACCGGGGGCA -TCTGTGAGGAAAAATTAGTAACGTGTATGGCGATAGTGAATAGGAAACTCTCTTACCGAT -GACGGTGACCTCGTACTTAGCGGTCTGGAACTTCCAGAGGGCGATATCGATGGTGATACC -ACGCTCACGCTCAGCCTTGAGTTTGTCAAGAACCCAGGCGTACTTGAAAGAACCCTTTCC -GAGCTCAGCAGCCTCCTTCTCGAACTTCTCGATGGTACGCTGGTCAATACCACCACACTT -GTAGATCATGTGACCAGTGGTGGTGGACTTGCCGGAATCGACATGGCCGATAACGACGAT -GTTGATGTGGGAACGCTCTTCCTTACTGTTTCAAGTCAGTCTCAATTTCTTGCCTGAGGA -GGAGAGTAGTCACATACCCCATGATGACGGTTTAACTTTTTGTAGTTCTTTGTAAGATAC -TAGAAAAAATCTGTGGAAAAATTGCTCAGAGACTACGGCAGAACGCGTAGAGGTACTGAG -CAAATGCAATCAGTAAAGAGTCGAAGAGGATAAAGCTTGAAAGCAGTAGGGGGTCAGGAA -ATACTCACTGTAGCAGACGAGAGAGAGAAAGAAAAGACTGTGGGAGGGGGGAGGAGGGGA -AATCCTTATATGGAGGGGCAAGCGAGCGAGGCGAGTCAGCAGGTGGGGCAAAGCCCTCTT -TGGGTATGGCCTCTGATTGGTTCGAGTACTTAGCCGTGTACGCAGATTTTTCAAAGTGGG -TGGGTGGGATTAAGTACATGTACTTACTACAGCATGTATCTACACATACTTTATAAAGTA -CTGTCTACCTCTTTGCTTGAAACACCCCTCCGTTTCCCTGGTTTTGTCCTGCCACGGCAC -TCCACTCGGTGCGAATTTTTTTTTTTTCATCTCCGTCCCAGCCCACTCTGTGGCTCTAAA -TGCTCGAACTGGGCAGCAACGCAGAGTGGTTGGGTGAGAATCCTTTGGGTTTGCATTGTT -TACTTCACGCATTTGGTGTGTTTTTTTTCTCTTCTGGCCTTGTACTGTGGCGATTGGGAC -ACTCTCCATCCATTTTTTTTTCTTTTTTTTCCCTTCTCTTTTGCCTCAGCCCCGCGGATG -AGGGGCAACATTCTCACCGCCAAATCATCCCACTAACAATGTCATGCATGTAGGATTTTT -TTTTTTTCTTCTTTTCGCCCACAATGATTCACATGTAGGCAGATATCAATGTCGGGCTCT -ACATCTGTACTAGGCCAATTGTATTTATAGATAGAAGTCATGCCAACCACCACAATAAAT -ATGAGCTAAAATAAAGAGCACGAAATGCAAACAAAAGAGCTAGATCCATGTCCGAAAAAA -GTTTCCGCTGATCGCAAAATAATCTGTGCACTGGGCTGTGACGTCGATCAATTTGGGTAT -CGTCTCAACTTGATAGTTTTGAGCTACAACCAACGCCTCTCCCAAAAGGCCTAGTAAATG -CGCCATTTTCGCCTCGCTGTAATCAATATGAAAGCCAAAAGAAATGAAAGTACTTGTTCC -CAGCCTCGTTGCCCAGGTCTAATTTTCTCTCCACCGAGCACCACACGAAGTACACTAAAG -CTGTCAGACACGGGTACCTCACGTTGAACGTAAGGAGAAGACATACCTTCAAGAATGTAG -TCATCGGTTCATCTGCACTACGAATCTGCAGCTGGAAGAAGTACGCACGCTCCCCATTGC -AGCTTTCGGCGGGGCATTGAGCTGCAAGTGTAAAAAAGAGAGCATGTCAGCCTTGTACGT -CTCAACGGACATGTCATTTCTGACAGATGACAGGAAAAACTTACTGCCAACACTGTCGGC -ATTGGCAAACTCCTCCTTGCCACCAAAGACATCGTCCACTTCCTTTTGCTTCATGCTTTT -TTTCTCGAAATAGTTCTTGTCTATCGGGGCTTCATAGGGACAGACGCGGCATTCGAAGCG -ATTCACACCCGCAGGAAATCTTGTGCTCGAGTCCGCTTTGGAGATGGTGAGGGCATTGCT -GCAATTGGGGCAGACTGGAGAGGAGAATCAGCTTCTGTTCTGGACTGTCACTGTACATTG -TTAACTTGGAGATAGCACATACAGACTAGCATCTTGATGATCTGACTGGAGGACGCAGCA -GGTAAATTTAAGAAAGTGTCAGACTGCTATACCAGGAGAAAGTTAGATCCTCAGCAGCTG -TATAACGTAGTCCCTGTCAGGTGCAGAACATCACTGTCGATGATAAGAGATCTAGGCGCG -CGGCTAAACCACGGAGTATCATTTGCCCGGGCGGCCCGCGTGTGGCATGTCCCTGGGGAA -CAACTCCATAACCTGTAGTGTTTGCCAGGAACTTACTTGAAAGCCTGTTTGAACAGACAG -ACCAGCTTGTACATACATATTCTGGATCATTCACAGATTCGTCTTGATCACCAAAATACC -ATACAAAACGCCTGGCTCTTGTCACAACTAGCTCCTCGCGTCCTATTACAGCTGAGCACC -AATTCCTGTACTACCGACTTCACTTTCGGGTATTCCGGACTTTGTGAACCCATATACAGA -GATGGCTGATGAGGTGAATACTGTTTACCAGACTCATGCTAAAACCCAACGCTAACACTA -CTATGGTTTAGGGCGCATCACCCCGCGAGCTGGTCGTAGAGGCCTGCCGCCGGGACCAAC -CCCACCTAATTGAGCAAGTGTTGAACGAAATGGAAGAGAAATCAAACGAGCAAGTGGCAC -AATTCTTCAATGAGGTGACAGATTCTATGGGAAATCACGCATTGCACATCTGTGCCCAGT -ACGGCAGCTGTACGTGTTTACTTTGCCACTTCCTCAATGCCCATCTCGTACAGAAGCTAA -CTCTTTCTTTTAATGGTCAGATGACACCATGGATGCGTTGTTTGACATCCAATACTTTGA -ATGCGACCCGTTGACCCGGCTGGACAAGGACACACCGTTGCACATCGCAGTGCGGTACGC -CAATGATAAGGACGCCGAACTGGGTGAGGCCATGATCAAGATGATGTGTGAGGCTGGCTG -TGATCCTCGAGTGCGGAATAAGCATGGTCAGAAGCCCGCTGAGCTGGTTTTCAACAATAA -TGAGATTAAGAAGACCCTTCAGCAAACCGAGTATATCATGGCTGAAGGTATTCAAAATAC -CGGCGATGGAGCCGAGTCAGATGGTGGGGAGGCCAGTGATAGCGATTAAAAGGGAACCTT -TTGGGTTTTGCATTGTCATCCATTGTATTGCCAAATTCTCTGTTTCTGATTAGAAGGCTC -GAGATGCTCTAGTTGAGATGTCTGTTTTTATGCACGAGTCTTAATACTTGGCCACGTCTT -CTATTACTATCTTACGATTACTACGAATCAAGTACAGTCTCTGAGAGCTGTTATCATAGA -ATTGACCTCTTTCGGCAAACTACTAAACTAGATGGAAATGCCGTTTGTTGATTTCCAAGG -GGGGATATCACAAAGTTAGATTCTTGACGCCTTAACAAGCTTCTGTACTTGATCTAGCCA -CTTGCGGGAGAACTCTTCTGCGGTGAACCGTTGAGCTGAGCGACGAGCCCGCTGCCGCAT -GGCGAGTTTCTCTGCCTCCGGAAGAGCAAGGGCAGCTTCGAAAGAAGCAGCAAACTGCTC -TTCTGTTTCAGCTCGGAATCCGGTTGCACCATCACCTAAATCAATGACAATATCTTCACG -TGGACCGCCTGAGTCATGTGTGACACATATGAGACCAGCGGCTTGGTACTCCACGACACA -GATACCAAAGTGCTCGTTCCACATAGCATTGACGCCAACGGAGGCGGTGCTGAGATGGGA -CAGCATGGCAGGCCAGCTGGCGTCGCACAAGAAGGTGGTATAATCGCGGATGCGAAGTTC -ATGTGCCAGGAGGCGCAGATTGTAGATGTGGGTTTCATCAGGACTGGCATGGCGCACGGA -TCCAATGAGCACAAGACGCGGAGGGGGCTGGCCTTTATAGGCTGGGTTGTTTGTTCGTTC -TTGTAAGAAGCGCGCAAAGGAGCGCAAGACTAGCGGGTGATTCTTCTCTGGTCGGAATTG -AGCAATGTAAAGTATAACGGGCTGGCGTGTCTTCTCGCTCTCCGTGTCAACAGCAATATC -TGATTGCAGTTCTGTGACTGCTGTTGGGGGGAACACCACCGCCGGCGATGATGCGGTCCC -GTCGCCGCTGTTGTTGCTGCATTCCTTTCCGGTGCCCCATAGCGCGCGGATATGTCCTGC -CGTCCATGACGAGTTGCACATAACTACATCGACCTGTCGACCGACCCAGCCATACAACTT -GGCAAATAGCTCCCAGTATCGACGTTTGATTTTCCCTTTCCACCCCTTCCCTGCGCCGGA -ATTGATCCCTTGCACCCCACTCTTATCATCAAGAGAGGCAAGCATGTCCGTTGATATGGT -CGGGTAATGAACATAGGCGCCAACAGGCACTGTCGGGAAAAGCCACTTGCTCAGAGCAAG -CGTGAAAGCATATCCCATGGTGTCAACAAAAACATCCGGCACAAGCAGCGTAAAAGCGTC -ATAGGCAACAACCAGTGATCCCAGCGACTGTCCCAGCAGCGTCATGTAAGGGTATGAGCT -ACTAACAACATACTTGCGGGTCGTCAAGTATAGGAGCACCACGGTCGGTGCATGTAAATG -GATATTGAACCGGTTCTCCACTTTCTCCAGCATCGTGGCTTTATTGACTTCATGATCGCC -TGTGTAGATAGCGCAGATTGCCTTCGGCCAGCGCTTCTGTGTTGCACATACTGCTTCCCA -TAAGACTCGCTCCCCGCCTCCGCCGGCATTGCTGTAGCACCTAGTCAGCGTTGTGCGTTT -GAATATGTATAGAGAATGCCATACCAGAATGGGTGGAAGAACCCAATAATCCCATCCCAG -TCCTCCGGACTCCGGTTGTTTTCCTGACCAGTACTCCCACTGATGTCACTGCCAGTTGAT -GTCTGTACCCCGCCTGTGCTTGAACTATCCACCTTCTCCCAATCCTCATCCTCGGCACCG -TTCCGCGATGGTGAAGTTGTTGAACCCTTCGACTGTGAGGCGCGTAGCTCATCCTCTTCA -GATCGCGCCCGCGCGATCACATATTCCCGGCGAGATCTTGTTCGTCGCCATATCAGCCAA -CCCACTCCCCGAAACACTAGACCGAGGAGAACGCCTGTTAGCTTCGGAAGTAGGACAGCC -GTGGCGGTGAGAACTAGAATGGCCAGAATGGCATTTGATAAGAGGGCCATGGTGAAAAGG -CAATGATTAAAGAGACCTCATCAGAAATCACACATAGTAAAATAACTTGAAGTTAAACTG -TTTGATAAGTCAGAAAATGCTCCCCTCTCTTGTTGTCAACAAGTTGATCAATCGGGCCAA -TGAAGTAACTCTCATACGTGTCACATGATGATTTCCAAACATCCGATTTCCGTACCGCAA -CTCCTATAGCATCGATTTTTCTAGCTACATTTTATCTTCTATCTTGTGTGTCGCCCCGCC -AGCAGTGATGTATGCACTCCGCTCGGCATATGCTCGACATGTAACGCGAGCCGCACCATT -GCCACGCTCGCAATTTCGACAATTTCACCCAACAAGACCAGCCTCATCCACCATCATTAA -CCTTGCACTGGACGGCTCTACCGCATTTATCCACAGCGTCCACAATGTCTCGTGTCTACC -ATGGGTTGCCTCAATTCCTCTGACAGCCGTACTTGTTCGTACATTTGTCGGGCTGCCCAT -CCTGTTATATACTCGACTCCATCGCCAACGCGAAAAAAAAATAACGCCAATTCTAAGCGC -GTGGAGCAAGAGATATACAGAAAAACATATGGCTGATTACAGAAGTCTTACCACCAAGAA -TATAACGATTTCCGAAGTAAAGAAAAAAGCTTTGCTTGATCTAAGAAAACGAACAGCTGA -GCTTCATAAGCGTTGGGGAGTCTCTGGCAAGCACAAAGCAATTGGTTTACTGCAGATCCC -TGTTTTTATTGCGCTCATGGAGAGTCTGCGCGGGATGAGTGGCAATAATAGCGGCATCAT -TGCCTGGCTGTCGTCGTTCATCGAGTCGCAGGATCCTGCCTCTGCAGCTCAATCTCTGCA -TTTAACCATGGAGCCCACCCTTGCGAATGAGGGCGCCCTCTGGTTTCCAGACTTGCTGGC -CGGTGACCCGACAGGCGTGCTGCCGCTTTGTCTAACTGCGTCGATCTTGGGGAATGTTCT -GACCGGATGGAAAGTCAAGCCCTGGAAGGAGATCCAGCACCTTCCAAAGACGGAGATGTA -TAAGCAGGTCACTTTCACCGGCTTGCGGACCTTTATCGTGGTTTTGACATGTTATATTGG -CTTCGCCAGCTGGACTCAAGGGATGCCAACTGCGCTTATGCTCTACTGGATCACTAGTAC -CAATGTTGCTACGCTGCAGACTTGGGTGCTGGACAATAAGGTGTTTTCACCAGTTACATA -CCAGCAGTTCATTGAAAAATCTATCGCTTTTGAAAAACCGGGTGACAAGGACCCTTTCCA -GCTGAAAAACTTGCGTTGATTCCATCCGCGGGAAACTTTGCATTTGGGATGACATGGTTT -CTTTCCCATGTAATGAATCGAATCGACGGTGGCTTGCCCAGCCATACTCCACTACAGAGT -TGCGAACCATCCACGCGCTGGCTCTAAGTGATTTTTGGAGCTCCATCAGGATGAAGCCCA -CAATCACACCGAAATTTGACAGCAAGCCCACATCAGACATGATTAACCCCAAGGCCCCTG -GGTGACTGATACGGACTTTATGAACCGGCCTGAATAGAAGCAGTTACCACAGGGTCGCCT -CTGGGGTTGTACGTCAACGTTCACAATTTGACCCAACTCACTGGCGAGGTGAGGGTCCAT -TTGAAACCCATTTAGTTCATTGACTGGTTTCGAGACTAATTGTGTAGCATCGTTTTCTTT -GCCCACTTGCATTCATGTACAATATCCTTCACTTTTCACAAGAATAGTCGGCCGCGTATG -CGGTATTTAGCGTGAGCATATCTGCCAGCTTCTATGTCTTCGGCTTGAGCTGTCCTACCT -TTCATATTCTTCTAGCAAGTGATAGTAGATTGTAGATTCGTCTTGTAGATAGAATGGTCT -TTCAGCCTAAAATTTTAGACTTTGCTTGTTAAGTTATTGAAAAGGCTGAATACTTGTGTC -CTTGATGAGATAGGACTCTTTCTAATAATCTGTATCATCTCCCCAGGTGCATCAAGCTGG -TATTAGGGATGTCAGACTATGTAAGAACTCAAATGTCCCTTCCCTATATCTCTTTTATCC -ATGGTGTTACAGGCAGTAAATAACATTGAGCCGTAGAAGTACAGCTGGTAATGGGATAAT -ATTGCTCGCGGAGTAAAAACACCGGCATTATCACTCTACTATGTACAATATACGAAGATG -GCAGAGCATACAACATCCAGAGGGCTTTTTATAAGTGTGACAAGGAATATTGCAAATGAG -CTTTACGCATCAGACAAACAATCATAATTTGTAAGATTGGAGAAACTATCCCTGGGATCT -TGTCTTGGCCTTGCCATAAATCCAGTAACAACAATCCATTGGAGGAAGTAGATTGAAAAA -CGTGTTAAAAGATCCCCGCCACCCTTTAAAATGCTTTGGAAATGCCACAAGCGATAGAAA -AAAAAGAAAAAGCGAACGCCTAAAGCTTTGACTTGGGAGGCTGTTCTGAGGCACTCGCTT -CCTGATTGACTTCTACAGTCCGCGAATCAGGGCCCTGACCAAAGAAGATGACCTCAACGG -TAACATCGTCACTAAGAAAGATTAGCCAACCCGTTTGAAACGAAAGAAATGTTATGCAAG -GGATTTTCATACCGGTATCGGCGAGAGTAAGGACTGGGTAGCGTCAACAACGCACACACC -ATATCCTTGTCCTTTCCGCCCATTGCGTTCCGCACAAGATGTGTTGCTGCATTCTTGTCC -TCAACAACAAATCGAGTAGCCGCGCCAGAGATATCATATTGTTGCTGGCGGATGGGACGC -CGTTGACCTTCCGTCGACCCGTCGGTTGCGCTTTCCACAGGCAGTTTCTTTTGACTATCG -AACCCAAACCAGCTCTGTAACCAAGCCTTGCTACCCGTGCTAGCAGCAGCAGATTGTTGT -TGCTCAACCCACTGACCAACCAGACCGACAACTTCTTCATTGCTCAGCATCTCCCATAGA -CCATCGGTAGCCATGACGACAAAGTCGCCCTTGGTGGGATCAATCTTGGTGGTAGTAATG -ATAGGCTCGGCGGTGACATAAGGGGGTGTCTTCAGCATAGGATGGGGGGTGCGCCCAAAG -AACTTCGCTTTGATTGTATCCTGGACATCCCGGCTCCACTTGTAGAAAGCGTCACCGAAC -GCACGACTGGGTTCAAGTTGGCCCAGAATTCGACCATTCTTGGTGACATATGGCTCGCCA -GGATGTTCCTCGCGCAAACGTTTCATTTCAGATGGGGTACCACCGGTCTGGTCCTCAGAA -AGAGCAGTAGCAGTCCATTTCCCGCTAGGGCCGCGACGGCCCAGCACTGCGCGAGAATCT -CCGGTAACAGCAACCTTGAGGTCCTTGGACTGGGAGTCATAGAAGCTAAGAAGAGCACAT -GAGCCTGAGAGAGCAGGAGCAAGGAGCTCGGCGGCAACCCGTCGTGATTTAGACTTCATG -ACTTCTTTGAAACTCTCGTGTACGATGTCATTGTCGAGGCGGACGAAACCCTGCTTGATG -GCGGCATCGATAGCCTCAGATGTTGGTACGAGGAGAGATGGATCAGCAGAGGCAGCTTTG -TAAGTCGTATTCAATTCACGGGCCACGTATGAGATCAAAACATTGCGTAACTTGGCGGAT -GTGGTCCAGCCCCTAATTGTGGTTAGATTTATGCATGATAAGTATTCAACCTTGACACAA -CGTACGAGTGACCGTCGAAAACTCCCCAGAACATCCAGTCACTGCTCGGCTCACCATTGC -TGACAGCAGCCACAGACGCAGGAATCTCCACTATCTTCTCCGCATGGTCATCCTCAATGG -GGGAATTGCTGGCAACCTGAACCACATCATAACGAACCACACCCTTGCCCCGGTTGACCA -GGTATGACTCCTCATTCTTGCGAAGCTTCTGGGTTGCTTGTTCTGGGGTCAACATCTCTA -GGACTCGCCGATCTGAATCGTCTGTAGTCTTGGCAATAGGCTGGTCTCCGGAGAGTGTCG -CGGTGTAAAACTGATCGCCGCTGACCAACAATGCACGTCTCGTGGACTCGGTGGGGCTTT -CTGCGTAGGCGGAGCTGAAAGCGCTTGATGCGAAACCTCGGACCTGGGTTGCTACCGATT -TAACGTTATCACCCTGGTAAGCATACCATGCCCCCGAGGCAACAACGGTCGAGATAACGG -CGAAGGAGAGTTTGCGGCGATGCATAGATGTATGAGATGTTACCGGGGTGTGGGATGAGG -GTTTGCCGCCAGTATATGGCCGAATACCTCGCAGCTGGTTGAATGTCAAAGAGCACGGCC -GTTTAGCGGCAATGTTCCATACTGGTGCACGCCGAGCAGTACGGCAGGCCTGAATAGTCA -CCCGACGCATTGGACCAGCAGTAAATATGACAGCGGATAAGCTTCTGTGCCTTCTGCAAG -GCAGATCACAGCTCTGAGATAGTATGTAGATGAGGTTTCAAGCTTGTCAAGGAGGTCCAG -CTTGTCGAGACAAGGTCACGTGCACAATTTATCTAAAATAGCGCTAGTCCGAATTAGTCT -CTGTCGTCATTCCCCGCGATTTTGTTTCGGCGCGTTCTTTTTTGTTTCCACTTGTGCAAA -AGAGCCTCTATCCTATCGTTAAAGGCTTTGCCTCATTGTATAGGATGTCATCACGAGCAA -TCCGAAAACTGCAAAAGCTGCGTGAACAAGAAAAGCAGCGAGAATCTGAGCAGCAAGATG -AGTCAAGCGAAGACGAAGCCCCGCGCCCCGCGAAACCAAAGTTCAATGCCTTCGACTTGC -TGAACGCTgaaaatgaagacgaggaagaggaggaagaggaagcagaggaggaaCCTCGCA -TGCAGACACCAGAGCCTGAACCAACCCCCATAAAGGCCaaaaagagcaacaagaaaaaga -agaagaagaagGCTACCAAAACCTCCGATCCAGCCAGTACCCCGCCTTCGAAATCAAATG -AAGCAGACCTGGATGAAATTGATCTGGCGTTGAAAGAGTTATCAACAGGTGGACACTCGT -CAGTTGACACCGGTCTATCTACCAGATACACACCCGAGGCCTCTTTCCCGAAAACTACGA -GCGAATTGCTCGGCATCGAACCCAAATTCCTCAATGCGACGAATGAGATGCGCAAACTGT -TTGGAAATGTGGTTCTGGAAAATTTTGATCAACCAGCGGATAGTGGAACAGGCCGCCGTC -GGGATCGTAATCGACAAATGGTCGATCTTGCTCAAGCCTTGACAGGAAGATATAGTCCAG -CTAGCCGAGGTCAAAGCCTGGCTGGAGTTACATTGCGACGCAATGTTCTTATGCAGGGCA -AAGATGAGTGGCCTCGTGCTCCAAGTGGAGGGCTAGGAATGGAGACTGTGGAGAAGCTTC -CTTCCGGGGAGACTCTCTACCGACTGGTGCACAATACCGCCTACCAAGATGTACAGCGGC -AGTTTGACCTCTGTGTTGAATCAATGGACCCACAGAGACTCATCGGACACCTCCAATACA -ACCCATACCATATTTCTACGCTTCTTCAAGTATCGGAGATTGCCAAACATCAAGGCGATC -ATGCTGTGTCTGCCGATCTGATGGAAAGAGCTCTTTTTAATATTGGACGCTCAGTACATT -CTTCATTTGCTAACCGCCTCAAGGAAGGCGAGGCCAGACTTGATTTCATACATGCAGAGA -ACCGAGAGCTCTGGCTTGTTGGATGGAGGTACATCACCAACCTAGCAATGAAGGGCACAT -GGAGAACCGCTTATGAATGGGCCAAGCTCTTGCTCAGTCTGGATAGCAGCGATCCTTACT -GCATGAGGCTTCTCATCGACCACCTTGCGCTCCGAGGTAGAGAATATGCACAATTTGTGC -AGTTATGCACCCAGACACGCTTTGTTCGCGACTGGGCCAACCTTCCCAATATCCAATGCT -CGCTGGTAATGGCATATCTCCGATTGAACAAGCCTCAAGAATGTCGTCGACAGCTGCACC -TCGCCATGTCCCGCTACCCGTGGATCTTCAGCAGGCTAGCACAGGAACTTGATGTCCAGC -ACGTTCCAAAGCAAATATGGGGTAAGATGCCTCCTACCGGCGCTCATGAGTTATTTACCG -AACTCTACATCGCCCGTGCAAAGGATCTTTGGAATACGCCAGAGGCTATCTCTCTCATTA -TGGAAGTCGCAGATTCTATCTCTGGCGAGGAGCCCAATGAGCCGCCAGAGATCACCTTGG -ACATCGCTCGTCATGTGGTCTTGTCCGATATAGCCAAAGTGACAACCCATCTTCCAAACC -ACTTTGTGGCTGGTCGGTTGTCCTCCTCCGACCCTCTTCCTCCCTATGAATCTGAAGCCT -ACCAGCAACAATCTCAGCCTACTCCCATCTATGCAGCCCCTGCTGGAGGTGGCGAAAATT -GGCTGCAGGACCTCCTTGGTCAGATCAACCACAATGTCCGCCCTCCGAATGTCCCATCGG -ACGACGAAGAATCAATTGGGTTTGAGGATGAAGACGAAAATACCGAACATTTCGATATCC -CCCACTCACGCCCACCACTCAGAGATCCAGCGGCCCTTGAAGGGTGGCTCACGCGTGAGG -GGTTGCAAACCCTTCGTATCTTCCTTTCACAATACGGAGTCGATAGAGGCAACTGGAGCG -AAGTGGTTGTAGACTATGCACCTTTGACCAATTATTTGGAGGCCATGGCGGCAATCTCAG -ATGCAGCTCGCCAACGGTTGTTAGACGGCACTATCAAGGACTCTCTTGGCGATTTTGCAG -TCAGCATGCTCGAGGATGAGCTCGCCATTATGCGCGGAGAATAATTTAGCTTCATAAAGA -TGAATATTGTATTAGTCTGTGTACAGTAGAAGACAACTGAATTATTTTATGCACTATTTC -ATAGTAGGGTCCTGGTCGATTATGTTAAGAACTCCAGTCAACTAAATCAACTAGGTCAAC -TAAGAAGTCCATGAGGAGGGCAAATAGGTATGCTTCTGGCAAAGTCAACATATAAATTAT -TGCTTAAAAAAATTGTGTGATTGTCCTGAAAATTTCCGATGTTTCTGGGCGATATCTATT -GTCCCAGGACAGATTTATACAACCGAAGTATAGAAGTATATATTTCTCCAACTCTGCGAT -CCTTCAGAGAACCTTAACAGAGAACTACAGTTTTATAGGATATTAGCATGCCTCGATCAT -AGTAGCACACAGTAAATGTGTAGCACTTGCATTTTAGACTGTATGACCATAATGAAGTGT -ATCATTGGCTCTGTGGAAACCCTCTAATGCTTGGCTGATGCACATCATCCCGCACTGGCA -GCTTCAACATGTTTTTTTTTTTTAATTATTTCATTCCTTTATTTCAAAGAGATGGTCTGT -ACATTCAATAATCTGTTTTATTATAGACATACATTGTACAATATACTTAATTGGCATAGA -ATATTTCAGAGATTTTCCTATGGACCCATGAAAGCTTGGACATAGCGGGAGTACCCCGAG -TAGATTGTGTCATAGCCTTATCTCCCGCGCAATTTTCTTTCACTCAAGCACACATTTGTT -ACAGAGCATGGAAATCATGGATCACGGCCCTATACCTTTAACAGCTCGTGAGGCTGCATC -GGGTCTCTTTGGCTCAGTCTCTATGACCTGCTGGTTTTTCCTTTTGGTATGTTTGACACC -AGACTCGCACCTTGATAACCGATCCAATGTATCTAACAATACTGTCATAGGTACCCCAGC -TAATTGAGAACTACCGCAATGGCAACGCAGAAGCCATTTCCCTCCTCTTCATCTCTGTCT -GGCTTATTGGCGATATCGCAAACCTCGTTGGCGGCCTACTCGCTGGACTCGTGCCTGTGA -TCGTTGCGATCGCGGTATACTTCTGCATTGCAGACGGCGTGCTGATCGCGCAATGCGTGT -ACTATAAAGTGCGCAGCTCGCGCCCGGAGCCTTTCCATCGCCGACGCTCGTCCACAGAGA -CTCCGGACCCAACTACCCCGTTACTGGGGCGGCGATTCAGCGACTCTCTCGCGTCTTCTG -AGTCACGACGTCGGTCGTCGGGCTCACTGCGCCGCTACCAAGCTGGTGGGCGGCGTGAAA -GCGAGGTAGAGGACACCCTGGCCAAGATCGTCGAGGAGAGTGACTATGGGCGTAAGGCGT -GGGTTAAGAATTTTACCAGTGTTGCTGGGATTTTCCTCATTGGTATTGTGGGTTGGACCA -TGGCTTGGCAGACGGGAATGTGGGCGCCGGCGCCGTTGGACAATAAGAATGGTTCAGAGG -AGGCCATCGGCGGACTGGTGCTGGGATATTTCAGTGCTGTGTGTTATCTGGGGTGAGTGA -TCCTCTCTTTTGGGGCTGATGGATTGATATTGACTTCGACTGCAGAGCGAGATTGCCTCA -AATCTACAAAAACTACAGTGAGAAGTCATGCGAAGGTATGTCTCTATTACCCTTGGATGA -TGCATATGTGAGGCTGACGCTGACCTCTGTCTAGGGTTGTCGCTCTTGTTCTTCATTCTC -TCTCTTCTCGGAAACTTGACTTACGGTGCAGGGGTGAGTGATACAGTAAATTGTACTATG -AACTAATCATAGGCTGACGGTGTGGATATAGATTCTTTGCCATTCTACAGAGAAAGAATA -CTTCCTGACCAATCTTCCTTGGTTGATTGGATCACTAGGAACTATGGTTGAGGATGTGAT -TATCTTCATCCAATTCCGAATCTACGCTGTTCAGGACCCTCGTCTCTCAGCAATCTCCTA -AATATTGCAATTTGCTCTTTTATTTAGAGCAATGACATCTTGCCCATAATTGTCGAATAT -GTACTTCGGTTTCAAGCTCAACAATTGTTCGGATGAGAGATGATTTGCATGCGTTTGTTG -GTGATGCTGGACATCAGAAAATTTGGATAGCATAGCGTTTTGATAGTAAACTAGCAACTA -AATCAAATACGGTTGTTAGCGTTGATGTACAGAGTTGCTCGCGATAGCACGACCTCGAAA -TTGCAAGTAGTGCACAATGATATTATCGATACAACCGTAAGTCCAAAGATGATATCGTCC -CAAAGTCTGTGCATGTTGTACTTTGGACGGTTGACGAAGACTCCCGCTGAACGTTAACCA -CAAAGGCGTGAGATCAACTTCAGATTGAAAAGACATCCGATACCATTCAGCCCTACAGCA -CTACGTCACATGGCCTACGAACAAGTACCCCACGTAAGAATCAGACGACATTCGTTGTTG -GTTCATGGTCGCATGACAGACGTGACATGAAGGCCGAGCAAACTAAGCTAGCTGATCCAT -GTGGAGGGCGAGAAAACGAATCATACATCTACTCGCCAGATCAAAGCAGAACTTTGATCA -TGGACAAATCAAGAAGTGTGCCAAACGTCTTCAATTCCCCTAAAATCCAGGAAGCAACAA -TAGTTTCGGAATCGCAGCGGTATCCGATCGAGTCTACCTCGTCCACATTGTGAGGCGAAG -GCACACTGAGCACCCAGTCTTCATGAATGTCCGTGTCCGTTTGAATTCTTTTGCTAATTT -TCTTTTTTTTCTTCCCCGGGTTCTAGGTCGTAGTGGGGGTCGTGGAGTATCGAGTACAAT -TGAGAATCGTCGAGGGAGCCGCCATGCGGCAGTGGTAAGAGAGGTTGAGGAATAGAGAAA -AAAGGAAACGAGTTTTAAAATTCGATATGGAGATAAGACGCTTAGTCGAAGAGACCGAAG -CCCATGTCCTCATCGGACTCCTCCTTGTCTATAGTGGGGTTAGTGTCATGACACAAAGCA -GAGATACATATCAAAACTTACCCTCCTCCTTCTCCTCAGCCTTGGCCTcggcgggggcgg -cggcagcagcggcaccaccagcgggggcagcggcagcggggccagcggAACCGACGTTGG -TCAGGAGATCCTTGATGTCCTTACCCTCGAGGGCCTGTGTTGGAAATCGAATTAGTTATC -GAGTCTTTAAATTGTCGCGCGCGGCCATGTTCGGGTTGTTTCGATATTGGCGCGAGTTGT -GGAATGGAGTCAGGTATTTACCCGAGCGAAGATGGAGGCCCAGATGGGCTCAACCTCCTG -GACGTTGGCGGCAGAGATGAGAGTCTGGATCTTGTCGGCCTGGATGAAGATTAAAGGTCA -GTCACAGTTTTCGCATAATCGCTATTTGATCCCAGTGGCTTTGCAGCAGCTTGTTGCCTT -TTTTCTCAAAGCTCAATTTCACACAGTAAATAAAGCTTCAAGTGCTATCTTCAAGCCTCC -ACTATTCCACCAACATCAAGAAATTTTGGATACGTACGGAGATCTCAATACCGTCATCGG -CCAGGATGAGGGCGGCGTAAGAGCAGGCGAGCTCGGCGGTAGACATGTCGACGGGTGGTG -TTGACCGGACTGGGTGTCTTGAAGAGAGTTGGGGATGGGAAAGAAGAAGAGAGGAGAACG -CCAATGATAGACACCCGAACAGGGCAAACGCGCCGTGTGTGGGAAAAAGCCCTCGCTTAG -GGCTCGGACTAATCGGGCTTAGTGTCTTCCCCTCCTCGTGACTTGGCACTAAGCCATAAA -CGGAGTTGGGCCTAGATGACATAATGCGTCAGATGACATAATCCCCCAGTAAACCCTGCG -CGGGGCCCGTGCGATCTGGTCGTATCTGCATGATCTCCCTATCAATCGCTCCATTCATTT -AATTGACATCACATCCCGGTGGGGTTCAAACGTACGACCGATTCTTCTTGTCTTTACTTT -CAAAGCAACCTATCCCTTATATTCAAGTCGTCCTCAGCCTTACTTTTCCCAACAAGCATA -CTTATCGGAATCTCCAACCATGGAGTCCCCCGCTATCCCGGTCCCGCTGGATCCGCGGGA -GCAGCCCATCCTTGAAAGCCTGCTCCGAACCCGCGACGCGCTTCTCCTAATCAAACAGGA -CAAATCCTCATACATAAAGTCCCGCGATGTCCTACCACTTTATGAGGAAGTGATCGCAGA -AGTTGAGAAGCTCAATTCGATTCGTAAAGAGCAAGATCGCCGTTTGGTGCACAACCGCTG -TGAGTTCAGTAGATCACGCGCTCGACTTGGAGGTGACTGACACTTAATAATAGTGGACTA -TGTCTTGGATGATTGTTTCCAGTTGATTTCCCTACTGTTTTTGACAGTTGGAAGGAACAA -CGAAGCTCCCGCAGTGTGAGTGACAATGTCCTTACCCCGCCATCTTATGACCTTGACTGA -CACTTTACAGATACTCCTTGGCGACTACTATCCAGGTATTTATCCCTAGGATTGCTGTCG -GATTCTTTTGTTAGCTTTGTGCTCCGAACACTGACTGTTTTGCAGCGCTTACTCGACCAC -CTAGAGGAGGCTGGATTCTATAGCTCGAAGGACTTGAACTCGATCACTAAAACACTACAG -TCTACACGGGAAACCCTTGATCGTGGCCGAAATACATACTCACCAGCTTTGCTCACACTC -TTGGAAAACCGCCTGGAACACTGTGAACAGTCACTGGCGAAGTTGCAGAAGGGTCTTGCT -GTATTGGCTCCTCCTTTGGCGCAGACCCATGAGACACTGGTCTCAATTCTGCGGTCAACA -TCAGCTGTGAACACTCGGTCAAAAGTATGCTATCAAATATTTGTTTAATCTATCTAGTCG -GCACTAACGGATGCTTGATTTTTTAGTTCTCGGCCTCGGAAGTAAATGCTCTTCGAGAAC -AACTGAAAAAAATTGAGAAGACAGCAAAGGATGGCAACTTTGTGGATGACGAAGGAAATG -TTCTTGCAGGACAGGATGACCTCAAGTCTTTGATTCACAGGTGTTGGCGGTGGACTGAAA -TTGTGCTCGAGCGGTATGTTCTAGGTTGCAAACGGGATGATCGACCATCGACTGATTGAA -TTCCAGAGAGGGTAAAATTGACGAGCGGTTCCAAGCTCAATACGAGCGATTGTTAGACAT -CCGCAACCAACTGGATCGTTTGTCTGTAACACAAGCTTGGTCGCTGCGCGAAACCGATCT -CTTTGGATACCAGCGAAAGCTCGACCGAATTGATGAGGCCCGTGTTAATGGAAATTTCGT -TGATGCTGAAGGTCAGCCAGCGGACCTGCATGCGCAGAGAGTAAGAGTTTCTTCTCGGAC -ATTCCGCAATGGGCTTGGCTAACATGTACTCAGACACTCTTATACCTCATCCGTCGCAGC -TATGCATACATATATGCGCTTTTGATTTCATCAGAGCCTGTATCTGAAGCTCTACTTCCA -GTTTATAACCAACTCCAAACTTTGCGCCGCTGCTTGATAGAAGTCAAGGAGTCGGGAGGC -GTTGCAAATTCGCGCGAGCTGTATCCATACAGCATGAAGGTATGCCATCGTCCACACTAC -TTGGGGATAAGAGCTGACTGAAATAGCTTAACTCGATTGACAACATGCGAGTTGATGGGA -AATTCTATGTGGGCCCGGATATCCCTGAGGGCCAGGGCAGTGTCAACAACCTGCTAGCCG -AATGCTACGATTTAGTTTGGGAACTTCGTGCTGCAGTGGTCGACGAGGAAGAGGAGTCTT -GAGACAAAACAGGTTGTCTAGACCAATGACTTTTTTTTTTCTCTGCCTATGTGTTTTATT -TCCACCAATGTTTGTGAATTTGCGCCGATGTTGACGAAGAATTGGCCATGGGATGTCTTT -GGCATACAGTGGTCTTTGATTGTATGATCAGAGTCTTACCTTGTTTTCACTTCTGTTTCT -TTATATGACGATAGGCGTTCGAGATGGGACCTGTGACGTTGGCACTTGTATTGGGAATGA -GGGCACTTTGGTTCCTTCTGGTGGGTTTACTTCGAGTTTAATGAAACTTCCTGTTGGCGA -AAGTTCTTTTGTCCAATCTTTCTTCTTTCAAATGATAGACCTAATTTTGGGCAGCTGTGT -GATTCTCGCTACACTTCCATCTGCAAAAGACAAAGCAATGAATACCCAGGGAACACCACA -CATATCTCAAGCAACTCGCCGACTCGGTGCTTGAGAAATCGATAAGCAAGATTATTCCAT -CCAGCACCCGTTGATGCAGCTCAAGTTGACCAAACAATATTCCCGGTATAAGCAAAACCT -TGATCTGGTTCGAAACTCATTTCATCCTGCTGAAATAAAAGACCCCTCCCTTGAGCAGTC -GCAGAGTGTGGGAATCACGAGCCGTGAAGCAAATTTCATTCACGTCTTTCATCACCAGAT -TTTCCCTGTCAATTCCGTCTCAAGGCAGCAACTGCATGCCGTGGTACAGTGGCCTCGATC -ACGGACGCCATTATCGATACCAGTCATTCGTGGCCTCTCGCAACATCCACATTTGCGTCC -ACAACAGCTTCGTTCTGCACAGTCTTCACGAAGCATACCTAAGAGATACCAATCCGCGTG -CTTTCTCTGCCTGTTGATTGCTCGTCGCGCCGAATCGCCCCATAAATTATCTGACTTGAT -CTATCCCCTCCGACTATCCCAGTATCTAAAATTGGGGGATTCGATTTGCTTTTCATCGAC -CTCGAGCCGAGACGAGATATACTTTCCTTGAGTTGTGCATGGTTTGTACTTTTTCTTGAA -TGCAGGCCTTTCGTGGAGCAATCCAACGTCCCTCGGTATCAAAAAGACGCATGATAGTAA -CTATTTGTTCTATTTGAATATTTGAAGAAAGATGCAATAATGACATCCTCCTAACCATCT -CATACGGCTACGCAGTATCCTGGATTTATTTAGGCCAATGAAGAAACCCCCAGGATTGAA -ATAAGATATTATTAGAAAGAAAGGAATCTGTAGGCCCGACATTATGGAAATATGTTGAGT -AGACTGACCTAGACGGATGGATAGAAGAGATATATCGTAACAAGCCTGCCTATACTGTAT -GCTGTAGGGTATACTCCGTACATACTGTAGGGATTTCCCCACATAATAGAGGCTGGGATT -CCGAAATCGGCGCAATCACCAACCAATCAGAGCGAATACAAATAACGTATAATCGGAGGA -AAAAAAAGGAACATCTTGTCCTCCCCCAGTTTCTCCCCCTCAACTTGATAGTAAAGATGG -TGAGCCAACTTGTTTGGCAGCGGGTTTCTGCCTCTCGTGGTGTAAGTAAACATTGAACAC -AGCTCAATTGGACAAACTAACATCTTATAGATGGCCACACGGCTCTCCCCACAACGGCTC -TTCGCCCGTGGCCTAGCCACCGAGGCCTCCTCCTCCCGCATGCCCCCATACCCCAAGCTT -GTCCGCAACCTCGAACAGGTCCGCCGAGTCCTTGCCTCCGACCGCGCCCTCACCCTCGCC -GAGAAGATCCTCTACGCGCATCTGGACAATGCCGAGGAGTCATTGCTCACCGGGACCAAC -AATGGCCGGGACATTCGAGGCAAGGCCAACCTGAAGCTGAAGCCCGACCGCGTCGCTATG -CAGGATGCTTCGGCCCAGATGGCTCTCCTCCAATTCATGTCTTGTGGTCTTCCATCCACC -GCCGTCCCAGCCAGTATCCACTGCGACCACATGATCGTCGGTGAGCGCGGTGCCGACACC -GACTTGCCAGCTTCGATCAAGGGTAATAGCGAGGTGTTTGACTTCCTGGAGAGTGCCGCT -AAGCGCTACGGTATTGAGTTCTGGCCCCCTGGCGCCGGTATCATCCACCAGAGCGTGCTG -GAGAACTACGCCGCCCCTGGATTGATGATGCTCGGTACCGACAGCCACACTCCCAATGGA -GGTGGTCTtggtgctattgccatcggtgttggtggtgccgatgctgttgatgcgctcgtt -gACGCTCCTTGGGAGTTGAAGGCTCCTAAGATCCTCGGTGTTCGCCTCGAGGGTAAGTTG -AGCGGATGGGCTTCGCCTAAGGATATCATCCTGTCCCTGGCTGGCAAGCTGACTGTCCGC -GGTGGCACTGGCTCCATCATTGAGTATCACGGCCCTGGTGTCGAGACTCTGAGCTGCACG -GGTATGGCCACCTGCTGTAACATGGGTGCCGAGGTGGGTGCTACCACATCCATCTTCCCC -TTCTCGCCTAGTATGGTTCCCTACCTGCAGTCGACCCACCGTGGTCACGTCGCAGAGGCC -GCTGCTCAGGTTGCCGCCGCTGGCCCCTCGAGCCTGCTGCGCGCCGACACTAAGGCCGAG -TACGACCAGCTGATCACTATCAACCTGTCCGAGCTCGAGCCACACATCAACGGACCCTTC -ACCCCCGACTTCTCAGTGCCCCTGTCCAAGTTCGCCGACACCGTCCGCGAGAAGAAGTGG -CCCGAGACCTTCGGTGCTGGCCTCATCGGTAGCTGCACCAACTCCTCCTACGAAGACATG -ACTCGCGCCGAGAATCTCGTCAAGCAGGCCACCGCAGCAGGCCTCAAGCCCAAAGCCGAC -TTCTTTATCACCCCCGGCAGTGAACAGATCCGCGCCACTCTGGACCGCGATCAAACGCTA -AACACCTTCTCCGAAGCCGGCGGCATCGTCCTCGCCAACGCCTGCGGCCCCTGCATCGGC -CAATGGAAGCGCACCGACGGCGTCCCCAAGGGCGAAGACAACGCCATCTTCACCTCCTAC -AACCGCAACTTCCCCGGCCGCAACGACGGCAACCGCCAAACAATGAACTTCCTCGCCTCA -CCGGAACTCGTCACCGCGCTCGCCTACTCCGGCAGCACAACCTTCAACCCCATGACCGAC -TCCCTAACCGCACCAGACGGCTCAGAATTCAAGTTCCAGGCCCCCAGCGGTTTCGACCTC -CCCAGCGCCGGTTTCGAAGAAGGTAACCCCAACTTCCTGCCCACAGCCGCCGTTCCAGAC -GCTACTTCCGAAGTCATCGTCTCACCAACCTCCGACCGTCTCGCCCTCCTCGAGCCCTTT -GCTCCCTTCCCCAAGGGCGACCTCTCAGGCCTCCAGGTCCTGTACAAGGTCAAGGGCCAG -TGCACAACCGATACCATCTCGGCCGCAGGCCCATGGCTTAAGTACAAGGGTCACCTGCCC -AACATTTCCGCAAACACTTTGATTGGCGCCGTCAACGCCGCCACTGGCGAGACCAACGTC -GCCTACGACGAAGCTGGCAAGCAGTACGGAATCCCGGAGCTAGCGGAGGCATGGAAGGAG -CGCGGAATCTCATGGCTCGTTGTCGCGGAGGATAATTACGGCGAGGGCTCTGCACGCGAG -CACGCTGCGCTGCAGCCTCGCTACCTCGGTGGCCGGGTTATCGTCTCCAAGAGCTTTGCC -CGTATCCACGAGACCAACCTTAAGAAGCAGGGTGTTGTGCCTCTGACATTTGCCAACCGT -GAAGACTATGATCGCATTGATGCCTGTGATAAGGTTGATACTGTCGGTCTTTATGATGTG -CTTGAGTCGGGTGGTCAGGGTGAGATCCAGCTGCGTGTTACTAAGCAGAAGACTGGGGAG -GTGTTTACTATTCCTGTTCAGCACACTTTCAGCAAGGACCAGTGTGCATTTATCCTTGCT -GGTAGTGCTTTGAACTTGTTGGCTAAGAAGGCTCACGCTTAAAAAGGGGAGTAGGTGGTG -GATTTCAGACTGTTTTGACACCATAAAAATGGGTTGGGTTTGATCTGGAGTTGATGATTT -GGATTGCATTATATCAGTCCCTCGCCTTGTATATGGTGAATTTTGGTTTTAGATTTTTTT -GCCGACTAGATGATATCCAATGTGATCTGTATTCTACTGTTTCTTGATGCACTGGTGTGT -AAGTGGTGTGGGATGAAGGTGGTATTGTAAATCAGTAATAGGCTGAGAGGTCCAGATTAT -TTGATTTAGTGCAATCCATTCATTGTATGTGTCATGGTATATATGGTTTTGAAGCTGTGA -CATCAAAACGAGACAAAATTCGTGATAGAAGGGAAAGAAGAGAAAAAAGCAGATGGGACA -TGAGATAAGCATGGGAATATCATAAAAAAAACGCATCATGATGCCTCGTTCTTTCATAAG -TAATCAAATGGGCAGAACAGTAACTTCCTCGAAGGGCAGGCCGAGAGACCATGAGAGAAT -GAGAATGAGAATCAAGCACGGCATAATAGACACCGATGTCTAGAAATACCAGAAAAAGAA -AACAACACCAAGACAATCATATGTCCTGATTGCCGAGCTTCGGAAATCTTCAGACATCCC -CGAGAGCATCCTGTATCTGAACGCCAACAAGCCTTAGCCTTTCTATACAGGCAGAATGGA -AAAACAAAACCGGCTTTATGCAGATGAGATGCATCATGCAATATCCTTTTCGAACCCTGA -AACAGagaggaagagagaagagaagagaagagaaagagaagagaaagagaagaagaagaa -gaagaagaagaagaagaagaagaagaaaaagaagaagaaagaaAGAGGTAGGGAGCAAAG -CCAAACATAGCAGTGTCATCCAGTCCACTTCTGATCAGAGAACCCAGGATCTCCAGGATT -GACCCCCAACATCTGGCCAGAGTTATTGACGCCGTATATACTCGCCTGGTTACTAGCATA -ACCACCACCCATCCCTGAATCATCCTCATCAAAGCCAAACTCCCCGCCAGAGCTAGCATC -AAAGCTACTCCGCCCACCAATCTCACCCGGATCATCACCCACAGCCGAGATCTGGTCCCA -CTGCAGATTCTGCAAAGCCGGGTATTGCGCCAGCAGAGCAGCAGGCAGGACGAAATTACC -CGTCGATTGTCCGTCCATACCGGGCATGTTAGGCATAACGAGCGGCTGGGGGATTGGCTG -AAGATGGCCAGATGGCGATTGTTGTTGCTGTTGCTGCTGGTGCTCTAGCAAGACGCGATC -ACGCTCTTGCGACTCCTCATCAAGAAGAGGCTTGATACATACGCGGCCCGCCTCTGAGCG -GAAGTGGCGGCCCAGAGCGTCTGCACGGGCGAAGCGGCGACCGCAGCCCCATTGTCCGCT -TTGCGAGAGCTCGCCGCGACAGACGAATTTCTTCTCGCCGGAGTGGAGGCCTTCATGGCG -TTTGCGGTCGTGCTGTCGTGCAAATGCTTTGCCACACACTGTACACACGAATGGTCGTTC -GTCGGTGTGAGTGCGGAGGTGGGATCGCAGATTGTATGCGCGGGTAAACCGTTTCGGACA -TAGGTTGCACTGGAAGGTAGCTGGGTGCTTCTGGATCCGTTTTGGCTCCCCAGGTGCTGC -GCCGGGTCTTTGTGGATCGGCGAGATCTAGGATGTAGTTCCGATTCTCAATTGATGAGGT -GGAGAGGCGACGGTTTGATCGTGAGGGTGAGGGATCTCGCGAACCAGGGTTACTGTGCGA -GTGAGAGCCAATCGATCCAACAGGTGACAAAGACCGTGCTGCGGCAGGCGCATGGGGCTC -CAGACTTGTCATTGTTGTAGCTGCAGGAGAGCGTGGACGGGAAGCAGGGTGATAAGGATC -AGATTTGCTACGAACACGAGATCCTAAGTTCTGGTTAGTGTCATATCAACCAAACCGGCC -GCAAACCAGCACTTACGCATAGCGGGAGGGCTCAATGAGTCAATATCTGCCCTTGACTTT -GATGGATCCAAGTTTGGATTTCGCAATGGTGGAGCAAACTCAACATTGATCGATGGAGGA -GCCATTTGTGAAGCCTGACCAATGTCGCCCGGCGTATTATTCAATTTGCCCATGTTCATC -ATTTCCTCTTGGGGCATCCCGTACATATCTGTTGGCGCCGAGGGGAACTGAGAAAGATAC -GAATTATTTGGCACCATTTCACCTCCCTGCTGTGGCATTAAGCGCGGAGAGATATATGGG -CTGTGAAGGGGGCTGATCCCCTGCTGATGCTGCTCTGACAAGGTGAATGTTTCCATGCCT -AAGCCGTTGTCGTATAGACTTGGGTCACTCTGGGGAACCAAAGAAGGCGATGCATTGTTC -TCAATGCCATCGAATTCGTGCTGAGACAAGTAGGGGGAGTGGTTCGCTGATGATACCTCG -GACACCTCAGACGGTGCACGTCGGTGGCCCTGGAAGGAAACATTACTCATAACCGACTGC -CAGTCTGGACCTGATTGGGCTGTCAAATATGCTGCAGTAGCCGGGTCGAGCGAAGTGTGT -CGTGAATGCTGAGGGGTATAGTATGTACCCGGTGTGGATCCTGGAGGGGACATGGGTCCA -GGTGAGGAATGCTGCTGATCTTGCGATGACGGATGTGGGGATGCTGAAGGGTTGGAGATG -GGGCTGACCATATCAACAGGGCCGATGGCTTGGTTGATGGACTGGCGCTGGTGCATTTGC -GGGTCCAGTAACAACGAAGAATCGTACTCGGGGGTGGAAACGCTGTTGGGGCTGTGATTC -TGGTAGAGAGCAAAGTCCAATTCTGGGTGATTCGAGTTGAGCATATTGGAGAAGTTTTCG -TCGCCCTGCATCGCTCTAAGTCCCGTCGAAGAGTGCTCGAGCTGATTGACCAGGCTTGTC -TCGAAAGATTGCGATATCGGAATATTGTTGTTATTGATGGTTGAAGCATAGCTCTGGTCC -GTTGGCGATAATGTCTGTGTCGAGGGCGCGCCAGCAAGATAGCTTGGGTATCCATATTCA -GCACCACCGATGCTACCGTCGGGATTGTGATACGAGGGATTTGTCGAATAAGCAGCCATG -GAGGGGTCAACTCCTAATCCACCGGAGTTGAGGTCGTGGTACTGTTGGCCGTGCGGCGAA -GGGCTCACGCTCCGATTCTTCATATCTCCTGAGCAATAGGCCGTGCCGTGGTTCTGGTCA -TGCGAGGCCATTTTTGCGAATGCGCGCGGGTGAGGGCTCTTGAACGAGCGCCAAGCCGGG -GCTGTAATCAGGAACGAGGGACGTAGTGGGGATCGGATATAGCGCAAAGACTCGGCGACT -AAGGTAGATTAGACGAGCAAATCTGCTAGGGTCGTGGACGCCTGGTGATGTAGGGAGAAT -GGTGTCTGATGATGTGGTGGGATGTCATTCGCAAGGGTCCAGGATCGGGTCCGGTACGCA -ACGGAGCTTCTGAACCGGTTGTCAGCGGTCGCCAGATTGCCTCATCGTCCCTGAGAGCAA -TTATACCTTGAGTTTGCCTAGGGGTCCGTAATGAGAGAGAGTAAGGGAGAGAGGAAGCTG -AAGATTAGGTTTCACGTGCGCAAGGCGGTCTAGGTCCAACACGTCTACTTCGTACTCCAT -ATGCGTGTTAGGGATTGATTTTGTTTAATATATAATATTCCCTTCCGCTGTGGGTGCCCT -AAGCGTAAGGAGAAAGAGTTTCCACCTTCTTTACTGAAGTTCCTTCGAACCCCAAATGGG -TCTCCCCACATTGGCATTATCAAGTGTCCACCCTGATAACAGATAGGCGAGCACGAACTA -TTTTTATACACACATTCTCAGTGATTCAATTGGAAAAGAGCGCTGCCTGTTGAACAAACA -ACATACATGCCGTGAAGCACTTCCTCTCCCAAAGTGGAGGGGTTCAACGCTAAAGAAAAC -TTGGCTTAGGTTTTGCACAGCCACCCAGCTGAGGCCCATGGGAACTAGTCCTATTTAGGT -ATAACCGATATAAGATATTGGGCAAGGCACACTTTCCAACAGTCTGCCTTGTCTATACTT -CGTTTGGATATTTCCTTGCTGCCTAACTTCATATCCCTGATACGAGAAAATCGGCGCTAA -ATCCTGTACACTTCCTCACTTGGAGCTATATACTCTCACAGACCACCCAATGTATCCAGA -CTAGCCCTAGCCTGATCATTCGACCAAGCAAAGAGCAAGTTATCATCGCAACTACTCCCA -AGTCCCGCATTATCAACCCAAGCAGGTTCCCAGTAATAAATGCCCAGTCCACCTGCGTCC -TCCACAGCATCAGCCAGCCTCTGAAGGAACGTTTGTTGTCCAGCAACTGAAAACGGAAGA -TCCTTCAGATCCGACGGGAACGCATATTCAGGATTGGGGCAAGCAAATGGCCAGTTCGTC -TCGACAACGAGCGTCTCCTTCCCGTACGTAGAGTGTAAGTTTGCAAGACTCGTCTTCAGC -GCCGCGATCGTCGCACCTGCGTCATAGAAGGGATAGAAAGAGACGCCGATATAGTCAAAG -TCCGCGGAGAGGAGTGGAGATCCAGATGCCAGGACTTCATTGTAAAAGTACGACTGATCA -GACCAGTTCCAACCGTTGTCGAGATGGATCATGATCTTGGGAGTGCTTGCTAGCTTGGAG -TCTTTCACGCCCCACGCGCCAGAGTGCAGGATCCGTGCGATGTTGTAGTAGCTGCTTGTT -GCTCCCAGTGGCCAGAGTAGGCCGTTGCGGATTTCGTTACCGATGGAGACGATGTCTACA -TCGAGGTTGTTGGCCGCAAACGTGTTGCATACGTTCAATGTGTAGTCGTAGACTTGTCCG -GCTAGAACTTCAATGTCTGTTGTGGACCAGCCCGAAGGCGTGGTCTAAAGAagtaggtca -gtcattggaccaagtcagtcattggaccaagtcagtcattggaccaagtcagtcattgga -ccaagtcagtcattggaccaagtcagtcattggaccaaatagttagtcTCATAGGGAAGT -ATTAGATTCTTACCTGTGTTTTCGGATCTGCCCAGGTATCACTGTAATGCAAGTCGAGAT -ACGTACCCATACCCTGTGCTTGTACCCTCTGCGCGAGCTTAACGTTATAGTCTAGATCGT -ACGATCCATCACTGGGATTCACCCAGATACGCTGGCGAACAGAGTTGACACCACTAGCCG -AGAGGATGGCCTCTAGCTTTTCCGTTGTGCCAGCTATGTTCTGATAAGAGATGCCTGCAT -CTTCTTCGACGAGGAGGGAAGAGATATCCGCGCCGTTGTAGGTCAATGTGGCAGATGCTG -AGGCGAAGAGCTGTCCGGCAGCTAGAAGGAGGTAGGGAAGCATGGTGGAAAACCACTTGC -TGGAGCCGAATGGAGTCAGTTCAGATCTATGGCTTTGTGAATTAGAGGGAAGAACTCATG -TATACTTGGTTCTTATGCTATCTACTTTCCTATGTACTTGGCATCTTTCTAGTGTATGGC -GTTGAGTTTCAGATTTCTCCGAAAATCCCCCCCGGGTACGTCGTGCACAAGACTCAAAGG -AGAATTATCCCGCCTGTTTCCCGTCTACAGAGTAGAACAATGACCGGACAAATGCCTTTC -CTGGCCTAGTTCCTGTGACTTAGTAGCTGAGACACGGGAAGACTAAATTAGTGATCGTTA -TGCATACTTGTTGACGGTGGAGTACATTTCACCCTGCAGACTCAAAACATTGTGGAGATA -GCAGTCTAACCCTCTGATATCATCATTGGTGGAAGTACACAAACATCCTCGGAATATTCC -AAAGTACCTTGCTGAGATTGCCAAGTGGGAACAAGGGGCATACAAGCCCACCACGGAACT -TCGCTGAAATAAGAGAACCAAGCTATTATGACGATGTGGTATGCGTTTTGTACAAGGTAT -GGAAAGAAGGTTCATCTCAGGGGGATAAAAATCAAAAGTCCCAAACCCCCAAATATTTCA -TTTCTTCATGATAAGCTAGACCGTCTCGCCTTCCAGCATCATATATCAGTCATGTCGCAA -TTCCTCAATCTCTGCAGAGGATCAAAGAATCCAAAATATTGCATCAACCCAACCGCCCGG -TTTTGACAAAATTCAACATTACATACAGTTAAAAAAAAAAAAGCGATCTGGCCCCGTATC -CATGCATAAGACCAAGAAAGAAAAACGTAAAGAAAAAATAACCAGAAAACCCGGGAGTGC -GCTGCGGGATTCTCTGCTCTCCATATTCATAGTCAAAGACACTGAACTCAACAAAATTGA -ATAGAACAGATATGCAAACGCGGTCGGAGGGCAGCCTGAATCTCATCGCAAGGGGGCATC -GATGGATGCTCACGCGAATTCGTATATGTACAGACCATGTAGGATGTATGGAACAAATTC -TAGGAACACTTTCCAAATATCAGCCTGCCGGGAACCGGTGTGGAAAGAGAAGCAAAACGA -AATTCGGTGATGTACAACAAATTCGCTAGGTATAACTGGGAAGTGTCGAGTGTGGAAGAA -AGTGGTGGACGTGAGTATTGCAAAGATGGTGTGACAAGACGAGGTCAAATGTAGAGGTGG -ACTGGACTGATCGGGACCGGCCACATTTCAGTTCGCTGACTACGACGGAATATCCGATGC -AGCCTTGGGAGGCGGCTTGCGCTTTCCAATCACAAGGCCTTCATCTTCCGACTCATCTTC -GTCCTCGTCATCGCAGTAGTATCTCGACTCGTCCTCTTCAGACGGTCGAGGCGAGATCGG -ACGAAGGGGTTTGACTGTCTCTCCGGTGCGGAGCATGGAGGGTACTTCGTCCAGTTCATT -GCCCTTGGCCGAGTAACCATCGCCGGAAAGAGAAGATGCGTTTGAGACGACCGACGGAAT -ACTGGGGTGGGAGGCACTTTGGGACATGCCGCTGGTGAAGTCGTCGGCAGAGGAGGAAGA -GATAGTGACCGAAGTGGAGGCGGCCAGCCCATTGTTGGTGTTCTCGGCGGGACATCCTTG -GTTGGCAGGTGGATTTATGGGCGGCAGGAGGAGCGATTCAGTAGAGTACATCGCGTAATC -AACTCTCAAAGTACCGCCTTCATTCGCCTCGATGTCGCTCCGTCTGCGGCGCAAAAGTGC -ATCTCTGGTGAGGTTCAACGATTCCGCAGGTGATGAGCCCTCAAGCTGAGAAGATCGTTT -GCGCCGGCTTACATGGCCAGGAGTCAGTTGCTCAGCAGTTGGGTGTTCCTCCTCATGCCA -CAGGCCGGGAATGCGCACCTGACCAAGGGCAGATGCATTGCTGATAGCGAGGCTAGGCTC -AGCAGAGCGACGTTCTCCATCCCCATGCAGGGTTCCTGACTCGCCAGATGGAGAAGCGCG -CCGACTGCGACTGCCCCGTGATGCTCGTCGAGTAGCTCCACTGAATAAGCTACCGAAGTT -GGTCTGTCCGAGGGCTTCAATGATGGAAGATGGCCGAACAGTCGATGGAATATCAAAACT -CGACGGCTTGATCGTCTTTGCAGATTCTGCTGTAGACATTGAGGACTTTGCGCGACGAGG -AGGACCAGGTCGGGGTGTCACGATAGGAGGTGGACTGGCAAGCTTGGCTGCAGCAGCAAC -TGCAGCTGGTTGCACGGCATCGAAATAAGACTGCCAATCGCCTTCATCCACCGGACTTGC -AGTAACACTCTGGGCGAGCGGGTGTCCTTGACCGCCAAATTTGAGCGGCCGAAAGGTTTC -TTCGTCCCCTCGCAAACTGGACCGGCGACTTTCGCGGGAGGTTTTCCCGAATGTGCTGTT -GCTTAAGGCAGACGACAAAGTCTCGGTAGATGGTGTTGTGCTTGAGGTGCGCTTGCGGGA -TTCCTTGTCCTTCGATCGACCGCTGCTGAACCATTTTGCGACATTAGACCGAACGCGTTG -AATAAAAGGAACTTTGCTGACAGCGCTTGTAACCTCTTCATTCGAGACCTCAATTCGCTT -GCCCTTGCTTTGGTAGCCAGGGTCGGTCTCCTCGATCCAAATTCGAGGATTGGGAAGACC -ATGAAGTGTCCAAGGGTGGTGCTTGATCTCTTTCAAGGTGATTCTCTTGATTGGATCTTT -CGTCAACAAACGCCTGAGTAAGTCATATAGCTCATCATCGATATCTTCATAGACGAGCTC -ATCATCATTCCTCAGGTTCCACTGATTGGAGTCGGATGCCTCATCCATCACGGGCACAAG -GCGCTTCCGTGGAATGAAGACTTCCTTTCTCACAATGGTGTGGAACATACTGTACTCGTC -ATCTGAAACAAATGGTAGGCGACCGAAAACCATTCCATACAATGTGACACCGAGCGACCA -GACGTCGACTGCACCAGTGATCTTGGGGATCGACCCAATGGTTTCCACAAAGTCATCGCC -GGTATAACATAACTCGGGAGCATAGAAAGCTGGAGTGCCAACAGTCTTGGACAATTCACG -GGCATCGTCCAGCTCTGTGGCATCGGTCTCCCCTACCTGCTCTTCTTCCTCGTCCCGCAT -TGGACGCCCCAGGTAGGAAACACCGAAATCAGAGATCTTTACCCGATGATTGCTGGTGAC -GAGGAGATTAGCTGGTTTGATATCCCGGTGGATGATACCTTGGTAGTGGAGGTATTCTAA -GCCAAGTACCGAATCCCGGAATGCGCTGCGGGTTTCTCCGATAGTCAAACAAGGGACGTA -TGCCATGTCATCGCCCTCTGCAGACCACTCGGGTTCAGAGGGGGCATATCCAAAGCTACT -AGAGGCTGTGCTAGAACGTCGTTCGAACGGGTTATCTGAATATGCGCCATACATGCTGCC -CTCGATCGCATTTAGCGACGGATCTTGAGCGGAGGATGGGCTTGAGTCATCTGTCACTGC -AATGGATCGGCTGGATGATCTTCTGATGGGAAGGCCGGACCCGTCATCTTCCTCATCTGA -AGATTCCGCACCGTGCTCCAGACTCCAAGCAGGGATTGGCCCTTTCTGGGCATCGGCAGC -CTGTGCTTCAATTCGCTCGCGAGCCCTTTCCCGTTGCTCCCGCAGATGCTGAGCGGTTCG -AATGAATTGGTGGCTCTCTTCCAAAAATGCAGGAGTATCTGGAATTCCATTCTTCTCCCG -CTCCAGACGTAGCTTGTCAACGTGGACAATTTCTCGTAGACCCTTCTTGCGCCAGACAAT -CTCGCCATTTTCAACATACTCCAGCACAATGTATACCTTTTGTCGATTCGGATCGTCGAT -GACTTCGAGCAAGCTGACCACGTTGGGATGCCGTGCCTTTTTCAGAATAGCCACCTCCTT -CTTCACTTTGTCTTCTGGATTGCCCAACTTGCCCAACCGACGGCGAGTGGAGTATCGCTG -GACAATCTTGATAGCGACCTTCTGCTTAGTCTCAACATGTTGGCCTAGTTTCACTTTGCC -ATGTTCACCACGTCCAAGCTCTTCGAGGATCTCATATTGATTGATGGTTTTGTTTCCCGT -GACCATGTCGCGATCAACTTCGGCGGTGTGTGTTTCTTTGGGTGGTTGAAGATGTGACGG -ATGGAGAAGCGAGCTTTTCATCGAGATGTCATCGTCAGACACGGATGACGTTCCAGTACG -AGGGTGTCTCATGGAGAAGAGGCCCGGGCTCGAGAGTGGAGTGTTGCCCGCAGTGCGAAT -ACCACGGGCATAGGAGTGCCTAGGAAGTGAATCTGGCCCTGTGAGGTGGGGTGGTTGCCA -GGTGGGATGCACCTGAGACTGGAGGGAGGCATAGGACTGATCGGGGTAAACTGGTAATTG -ATGATCCGAGTGCCGCGAGGCTCGCCGATTGCCTCTTCCAATGCTTCCACTCTCCGATCG -GACATCGACCACGATGCCTTCGTTGAGGGCTTGTAGTGAACTCGATGACGAGCGATTCGA -GGGGGTTCGGGCACCGTAAAGGCCGGGGAAGGCCTGACGGAAGGTTGCGGTGGCTTCTGA -TACATCGGTGATGATGGAGGGTTGTTCCGCAGATGATTTAGATGGGGGGCGAGACTCGGG -TTGGTCGTTCGATGCAGATGATAATCGCGATGACTCCTCGGACCGGCTATGATCCCGCTT -GTGATTTTTTGAGACATGGTCATTCCGAGGCAGCTGATCCGGATCCTCCAGACCAGCTTG -CGAGGACAGGATTTCTGCCATGATCCCTTGTTCCTTTTCTAGTGATGATAATTCAAAGGG -AGGCCGAGGCAGCGGTGCCGACAGTGCAGAGCATCCTAGTTGCACATCCTCCAATTTGAC -AGTTTGCTATTTTTGTTTTGGTTTCCCTCGTAATAATTGGTTCAAGGTGAGCTGGTGAAA -AGCTCGGTTGGCACGCACGAACAGGTCGGCATTACTTTCGTGAAGTCAATAGGGATGGCG -ACAACTATGCAACGGGAACCGTGTACCGGACAGCGGGTTGTCGCCTTAGGGGGGAAGGAC -ACGAGGTAGACCTGCTACTGCACGAGGACTTGGACGATCCCACATCCACACGTGGGGATT -CTGAGTCAAAAAAAAGGAGTGCCGTTGCAAACAGATGCTATAAATAATTCGGGGTCATGC -TCATTCGAATGGCCAATTCTCGGGAAGGAGGGGGAGAGCGAGACCGGAATATGAAGCCCG -AAAGTAATGTGAAAAGGAAAGTCAAAACAGCCAAAGAGCTGTGATAATTAAAAAGAAGAA -AAAGATGTGTAGAGCGAGAACAAGAGGGGAGTTGAAGAGTCGAAGGGGGGAGGTTGGGTG -GGTAATAGGGCAGAGCGATGATCGGGACGCCAAGAATCAAAATAATTTACAAAACGCAAG -TTGATCGTCAAGCCCAAATGGCAAGTGGAAAATAAAAACCACAGCTATTCAGTGAAATAA -GGGAAATTGAGTATATCTAATTTCGAGTATTACCATATTAGGAGGTATGTCAGCTCTAAA -ATGACAGGTGTAATATGATCTTTTGATTCCAAGGTCTAAGTACTTGGAGTGCATGTAGAA -CAATATGATTATATCTATTTCCTTTCCAAGGTTGGATTGTACTATGTGCTTTGTGAATTT -GCCACTATCTATGGATCTTTACGTCGATGACTTGAGTCTATAGCCTCCAAGAGGAAATAT -AGAGAATTGACACTTTCCATGGATGATAGAAGGATATAGGCCAAGGCTAGAAGACAAGTA -AGAAATGGTCCATGGTTGCCCAATCTTGTTTTTCCTATTTGCCTGGACTGGATTTCCTCG -GCTCCGAATACACATTGAACTGGGAGGCTATATACCGTTTTGCCTCCAATCGTATTTCAC -ACTTCTGCCCAAACTACTGTCCAATACCGAAAACTGTATCTATCCTACATAAGCGATAGG -CGACGGGAGTAAAAAAAATTTTTTTTTTCACCTTTCATTCTTTTCTTGTCCGTGCCAGTT -GAGGTAAGAGGCAAAAGCATGTCCAACTCTTTGACTATACTCCCGAAGTCCGTATACTCT -CCGGTAACTTCCATGATATTTGTATCCGtacaaatatagtacaaatatataatataacat -actgtatgtaatactataaaaCCCTCGGGAGGCGTCCTGCATGAAGCTCTTTAGAGAACC -TTGGAGCATATTCGTGTGTCCATGCGCTATAATCGACGTCATATAGGTCTGGCCCCCTAT -GCTGTAAGCCTACCCTTTTCAAACGGAATAGTCTTGGCACTTTATGCCCCTACCACTATG -GTGAAATAGATCCTATTGTGTCTATGAAATTTATACATTATCTCTGAGAATTAGAGTCCG -AGGAATTATTCCGTCACTCACACAGCCTAACCTACGGTGCGGGGGATCCCTCGGAAAAGG -GGCTAAACAGACAAGGGTCGGATATGAGCATACTCCGTACTTGGAAGTTCACCTACTGCA -GGCACTACTTGGACTAGTATACTAGTCTACTAGGCGGTACGTGTATCCCATCCTCAATTC -ACGACGTAGCTGAGTGGATTCCAGACCTCGCTGTCGCATGTGGAGTGGGGAGGTGATCGA -TGAATGTTTCTATGCATAAGCAGTAATAATAGTCATCTTGTGGCGTAGGAGGTATAGTGT -AAAGCGTACAACACCAATCACTTGTATAAGACTTGTACAGGTTGCCTCCCCGCAGTGGAA -TTTCGGCCTGAGCATCCGAGACAGCACGCTCTGCAGCCATTTCCGGCGCCGATGCACGCG -ATTTCCAAGCAAGCTAACCCTATAAAAGAACTAGAGGGAGCATGGACAATTCTCCGGGCT -ACACTTTGACCGTTTTAAAATACTATTACTTTGCATGTTCCTTTATTTCACACTATTCCA -CATCCAGGTTTAAATCATCTCGCATGCACCTACGGTCACCGAGCGCCTGCATCAGAAAAG -CGATCAAAGCACACGAGGACGAGCATGGCCTCTGCTCACTTGGCACAATCCCTATTCCCA -GTCCCTATTCCCAATTCCACCTCCCCCAGGCGGGATCTGCACTGATACTGAGCGAGGGAA -ATTGCTCGTGTTAACCCTGCTGTGTGACGAAAGAGGCAAGAGGCCTTTTCACTCTTCCGT -CTGATTGAGTCCCAGTCAACCGGGTGTGTGGCACGCCCTGACCCCTTATCACTCTCATTA -CTCTGTGTTAACGTCGAGCTGGATTTACAATCTGGCTGTGCTGAAGTCCGGACATTAGAC -ACCCCTAGGCGCTATTGTGCATGTTTGTGCGCAGGCACGGCGCAATACGGTCTGATCTAA -ATTATCCTTCTCAGTTGATATCTACTGGTCTTGAGTTTGTAGGAAGGTCTGTGGTATTAT -AATATCAACAAACCAGGAGAGTTCTATACAGGATAGTCCTATACATCTTAAATTCCTGAA -CTCCACATGTGATTGTATAGTTCAGTGGCGACTATCTTGTCTTGGAACCTCGTTCATAGT -CCATATGAAGGCAAAGTACACGACCGTAAAACATTGTTGTGTAAAGATCGACTGGCTGAG -CTCGACTAACTTTTCCACTTTTTGCTTTTTCCTTCGCCTCTCGCCTCGCATCTCTCGTCC -TCAGACTCGGCACCCCCGGAAGCCGATGACTGACGATGGTAGCCGCTAGGCCTCCATGCA -CAGGTCCCGTTTTAGGTGCCTTCAATAAGCGGGATAATGTGCTTACATTGTCATCTGCAT -GGGGACTGTAGAACATAGTGCTTAGAGTGGACCTCCCACAGTCAGGCATGTCTTTACTCC -ATGTTCACACTTGACTTATAGAATTATAGCTCTCTCAATAGAATTAGGCAAACAGCCCAT -TCTCACATATGCGAACTGGCTACACAAAGCCAAATGTTAGCTCGGTCCTGCCAACATCAG -GGAGTAAGCCAAGGGATCCGTGGTGCCATGGTGTCGCGTATAGAGCTGAGATTTTGCTCT -TTTTTGATATGAAAACACTACATGTAcatcatcatcatcatcatcaGGAGTTATACATAA -AAAAGTCGCTTGCACATAGTGTGCTTTTTCCGCCTTTTCCTGCCGTCTTCCTTTTAGGAC -TGAAGAGGGGATCTTGGAGATCTTCAAGAAGGGTAAGTAAGAAGAGCGCTGAAAACCCCT -CTAGAAGGACCTTTGAAAAGACCTGGAAGACATCATAAGGCCACATAAGAGACCGCCTAC -TCTTGCGAGAGATACCTTGTAAAGAAAAACAACATCTAGGGGGCCGGTTGTCGATTTATA -TACTCCGTATGCCACTTTAGGTCAAAGTGCTGAGACACAATCTGATACCCGCCTTAATCA -GGCCCACATGGAGAATCTTGATAACGTCAGGAAAATGATTCCGAGATGAAAGGCGTTAAT -CAAGTTAATCTAGAAAATCAAGGAAATCAGGTTTTATACTACTGCCTGATGCACATGGCT -TGAAATGTACTATGCGTTGCATAGGGAATAACAATTAGTGTACCCTAAAAACATGAAAAC -ACGCAATCACCCACCGTTGCCGGGCAACGCAGCCGTACTAACGTTGATCTGGCTCTCGGC -AATCTATTAAGTTCCCTCTTTCTCTGTTTAGCGCGAGCAAATAAGGTCGGCTCAATGTAG -GTAACTCTTGGTGATTGGCCTGGTAATATAGAGAAGCGGCGAGACCCAGGTTTAAATCGT -CACTTGTTTACTAAAGCTTTACAATGCTACCCTTTAATATCTCTTTACCTTTTTACATTT -TCGTTACTGACGTGAACACCTGTCAAATTTAATATCTTGCACTGTCCGAAAATAGACCTT -CGCGTGGCACACCATGCTCGCCGATCCTGGAATGTATCCCTGCTTCTGATGTCTCAGATC -GCAAGCATGAAGCATCAAAGGGCAAAGGCACTCTTACTTGCCCTGATTGCGCTCGTCGAT -CTCTCTAGCGCAATCCCATACACTCCGTCTTCCGTCTTCCTGTCCCCGCAACACAATGAC -TCTCTCGCATATATCCTTCGACCCAGCTCCAACGGCGAGACCGAGTTCGTCTCCCTCAAC -CTGTCCAGCAGTATCGATGCCGAGAATTTATCATATGAAACGCTGCTGAAGAGCACACCG -TTCTACAGGTCTGATCAGGATTCGAGCTTCGTCTCCGCAATCAATGATCAAGGACTCATC -ACAGTCTACTCGGGCAACTGCCACAACGCATCCGATAATCCAGTACTATGGCAGTTCCAC -CCAAATAACAAAAGCTCAGTCGGCAATGGGACTTGGGATAGAGTGCCAATTATCACGAGC -GACGCAAAAACCGCACCGAACTATCTGGCAGCGGGGTTCACATACTCGTCGTCCCGCCAC -GAGAATGAGAGCTCCATGTACGCTTTTGGTGGCATGTGTCCATTCGCGAACAGCACAGAC -GAAACATGGGTCTCGGCAGCCAACTACTCCCGGTCAACGGTCGTATTAGGGCCGAGTCCC -TATTACAACACCAAGTACACCGCTGCCACTACTGGGAAGCGTGCGCCGCCAATCGCTGAA -GCTGGAATGGCCGTCGTCCCGCTTCAGCCTACATATTCTGCCGGTTTTACGGGGAAACAA -CAGGACTTTTTGTTTATTGGTGGTCATACTAGGCAGGCGTTCCTTAACATGTCGCAGCTT -GCTATTTTCTCGCTACCGCAGCAAAGCTGGAGCTTTGTTTCTGCGATCTCGGATTCGACA -CCACAGACTGAGCTGACTGTTCGGGATACGGTTTCTGTTGAGCCTCGCTCTGGTCACACC -GCGGTTCTCTCGGAAGATGGAAATAAAGTGTTTGTATTTGGAGGTTGGGTGGGTGATATG -ACCGTTCCCGCTGAGCCGCAGTTTGCAGTTCTGGATCTTGCAGAAGGGTTTGGTGGAGCC -GCCGAATGGATTTGGACAGCTCCCTCTTTCGAAGGACTAGGGATCGCCAAGGGTGCTGGC -ATCTTTGGACACGGAGCAGCAATGCTCCCTGGGGGCGTGATGATGATTTCTGGGGGCTAC -AATATCCCGAAACCATCCTCGAAACGGGCCTCAGCGAATGCACAGCCCAATTCGCGGGTC -TATCTCTACAATGTGACTTCGAGCAGTTGGGTTACTTCTTACAGCAATCCTGCAGCCAGT -TCGTCGGATGCTACCTCAAAGAGTTCCAGCAAACTTTCATCATCCCAAAAGGCCGGCTTA -GGTGTTGGACTGGGCATTGGATGCCCTGCTGCTATGGCCATCGTTTTTTGCGGATGGAAC -TACCACCGAAAGCGTCGCGTAAAGGGCAAACGAGACTCGCAGCTACGGGACCTTGCTTTG -GGGGCAGAACGAGCTCACTTTTGGGGTCGAGATTGTCCAACCCAAGCAAGCAGTATTCGG -AGTTCTGGAATGAGCGAAAGAAGAGATGGCCCCGTATATTCTTGGTCCGCAAGTCGCAGC -CAGACGACACGACCCGACTGGAAAGACCAGGGCGAAGGCACGGCGGAGAGAACTGGGCTT -TTGATGGACCCTATTAGTCCACCCAAGAATAACCGAATGCCTGTGATACCTCCAGTGAAC -AATAGGCCATTCTCATATCGAAACAGTGAATATCGACGCAGTGATGCTACTGGTGACATC -CATCCTATTGATGAACGCGAGGAAGACGAAGCAATGTTCCGAGAAAACGTCATGGCTACC -ATCCCACCTGGAACAAGACCCACGGCCAAGACGACGGAACCCGAAGACCCCTTCTCGGAT -ACTCCCTACGCAACTCCTCGGAGCACTATCTTCGGTGTTGGGCTAGGTCCTTTCTACACC -CGACGAAAGGATATTGGATCTGTGGATGCTGAATCTCCGACAAAAAGCGAGCAAACAGCC -ACCAATCTGTCAAGCAACTCGGCCTTCTCTCTTACGACCTCCCAGCCTACGGGCAAGGTC -AACCAAGCACGAGCCATCCTCGTCGATCGACCTACGAGCTGGGGCAGTGGGCGCCAGTCA -CTGGAATACTTCGCCGCTGGCTCAACACACAGCGACCCAGACGGCGTACCCCCCTCAGAG -AACTCAGTATCGGCGGATTCCTACTCTACAGCCCAAACAAATCTGTCCCACCGCCAGTCA -GAAAACGATTCCCTCCTCTTTGACGCCCTTGAATCCACCACAGCCCCCTCATCCCCCTCA -AAGCTGGCCCGAGAATCCAAACCAAAAGCCTCCGACTGGGTAATGAACACCATGCGCCGA -GCCCTAACAATAAGCCGCCGCAGCCCAGACAGCCATCTCTACTCCAGTGCTAGTACTACC -AGCACCGCGCACCGCGCCTCCGGCATTGATCGCAGCAGCACCCTGGTCGGATCCGGCCAA -CAATCTACAGCAGTCCCCAGCACGCCGCGCCGAGCAGTCAGCGCTTCAGCAGAGCTATTC -CGCCGCAAGCAGGGTGCCAAGGACTGGAACGCGCGTAAGCGAGTCTCAGACGATGTATTC -AACACTGCCCGGTCTACACGCGATGATCTTTTCATCGGCGCGCCGGGATACCTCGGTAAC -GAAGCTGGCTTCGGCGATGATGAGGAAGATGTCCATGATTGGGACCTTGAGGGTGCTGCT -GAGGGCCGACGAGTCCAGACGACTTTTACTGTCCCTCGAGAGAAACTCCGTGTCGTTAAT -GCTACCGCTGGCGATATTGATAGCATGTCTGAGCGCTCTGTTAGTTGGGGCACTGGGAAC -CGGAGGGTTAGTACCTAGATCTGAGATTTGGGGGtcttttctcttcttcgattgtcttgt -tcgctttttcttcttttatcttcttgtacttttgcatgtgccttttttcttttcatttct -ttttccttttttGTTGGGTGAGACCGGTTTGGTGCCTGCTTTTATGAGAGACACCTAGAG -GTGTGTTGTGTCCCATTGCATTGCGTCGTTTGTACACAACTTCGGGGGTTCCGTTTTCAA -GCTCGGAGTCTAGTACTCTTTATTTTTTGTCACGTTGGTTTTTGTGGGTGGCTGCTGGTT -TTTGTCGTTTTGATATCCACGGCGATGTTTTGATGCCGTCCCGAGATGAGATGATTGTTC -TGATGAATCGAATGAATCGATGCCTCCCCCTTTTTTTATTCCACACTGTGGCGTTTGTCA -GATTAGGTAGCGAGTTCAAAATTGATGTCTTGCTATGAGATCTATTGAGCCTTTGTAAGT -ACCTGCAACAAGTATGGTTATTCGATTATGCGACCTTGCGATGTCTTTTTTCAAAGAATT -CATAGACAATTCATACACTATATCATAGAAGACGAGAATCAGTTTCTCCCGAAATTGGTA -AATGCATGAACATGGCATCTATAAAACTTTTCTAGCTCCATGATACCTCAAAGATATATA -TAGACACAGACATGATCCTTCTTAATGACGCTTGCGTATAACATTTTTTTTAATATAAAT -TGTCAGGTTTCCTCCCGCCTACGACGAAGCCGCAGTAGTAGGCTTAGTAGGCACAGTCTG -AGGCAGCTGGGGGAGCAAGTTCTTGGCACCCTCACGGGCATACTGAGCCATGAGAGCGTC -CATGATAACGAGCGACGACATAGCCTCAACGATAGGGACAGCACGGGGTGTAACACAAGG -ATCATGACGACCCTTGGCCTCCAGAACACCCTCCTCACCGTCGTAAGTAGTAGTAGTCTG -CGCCTGGCCGATAGTAGCAGCGGGCTTGAACGCGACGCGGAAGTAGATGGACGCGCCGTT -GGAGATACCACCCTGAATACCGCCAGAGTTATTAGTCTTGGTGGTGAGGCGGGGGCCGCC -GGTCTCGCTCTGATTGACAATGAAGGGGTCGTTGTGGATTGAGCCGGGGACCTCGCAGCC -GCCGAAACCGGAGCCGATCTCGAAGCCCTTGGTTGCGGGGATGCTGAGCATCGCGTGGCC -GAGCTTAGCTTCAAGCTTGTCGAAGCAGGGCTCGCCAAGTCCCACTGGCACGTTGCGGAT -CACGCATGTGACGCTTCCACCGATGCTGTCGTGGTTGTCGCGGTACTTCTCGATGACTTT -GGTCATGCGCGCAGCTGCGGCGGTTTCCGGGCAGCGGATGGGCGCGAAGGAGTCGACGGT -TTCGCGGCTGATCGACTCAATCAGGCTAAGGAACTCTGGGTTCGTGGAGGCGGAGGGGTG -TTCGGGAGTTGGGGGGAAGAGGTGCTCGTTGCCGACGGAGGAGACGAAGGCGACAATTTC -GATGTTATGCGCCAGGCGGAGGAATTTCTCGGCGATAGCGCCAGCGGCGACACGGCCTGT -GGAGATTGGGTTAAAAAGAGTAGATGTTGAGGGTAACACATAGCTTACCAATAGTCTCAC -GAGCGCTGCTGCGACCACCGCCGCTGCTGGCCTTGACGCCGTACTTCTCAAGGTAGGTGA -AGTCGGCATGGCTAGGGCGGGGGTACAGGTCCATTGTGCTGCCACCGTAGTCCTTGGGAC -GCTGGTCCTCATTGCGGACGACCATGGCAATTGGTGTGCCCAGAGTAATACCAAACTCGG -TGCCTGACTGGATCTCGACTCGGTCCTTCTCGTCCCGCGGGGTAGTGAGGGCGCTCTGGC -CGGGACGGCGGCGGGTCATCTGGGGTTGGATGTCGGTCTCGGTGAGTTGCATGCCCGGAG -GGCAGCCGTCAACGATGCAGCCGACGGAGCGGCAGTGGGACTCGCCATAGCTAAATCGGG -GTGTTAACAGCAATATGTGTGGTGGGGAGTAGGGAACTTACGTGGTGACACGGAAATATT -GACCCCACGTCGACATGATGGACGTTGGAAGATGATCAAAAGGTATCAAAGGAAAGTTAT -AAGTTCAAAGACCCCGAGATCAATGAAAAGAGTAAAATAGGAAGAGAGAACAAAGGTGTT -GGCTGATCGCAAGTAGAAAAAAAGCAATTAGAAATTCGACTCATCGCTGGATTCCTATCT -GGGACTAGCGCAACCGACTATCGGCCTGATAACTTTCGTGATGCGATCAATAACTTCGCC -CACCCTTGCTCCAATGCTCGCCCTTCGAGGATGTTCCTCGTCTTCGGCACTTCGGTCTAA -CCTATTGAAGTATGGATGGATTCGCCATAATTCAAACTATCCCGCTCACCAATTCGCCGA -GCTCGCCAGCCGTCCCTCCTCTTTACATCAGATCTACCAATCGCTATCAACAGATCCCTA -CGTGAACCTTTCGATCGAGCACTTCCTGCTAGAAAATGCCCCTGCAGAGTCCAGCGTCTT -ATTTCTTTACGCCAATCGACCATGTGTAGTTATTGGCCGCAACCAGAATCCTTGGCTTGA -GACAGATCTCCGCGCACTTCACAATGACCGAAGGAACGGCACAACTGGAGAAGACGAAGC -CGCGGTATTCGTCCGTCGGCGATCAGGTGGCGGAGCTGTCTTTCACGATGAAGGAAATTT -GAATTACAGCGTCATATGTCCGCGTACCTCATTTACGCGCGACAAGCATGCCGAGATGGT -GGTGCGCGCACTTCATCAGGTCGGAGCTACCAACACTAGTGTCAATGAGCGGCACGATAT -TGTTATGGCGCGAGCAGAAACAATGCAAAATGATCCCAATGAGCCTCTGACACGCAAGGT -CTCTGGCTCCGCATTCAAGCTCACTCGACACCGAGCCCTCCATCACGGCACTTGCTTGCT -TGATTCCCCGAATATTCATGATCTTGGCCGGTACCTGCGATCATCCGCCCGCCCGTATAT -ACAAGCCAAGGGTGTTGAAAGCGTTCGTTCTCCAGTTGGCAATGTCTCAGCTGCGTTAGC -CGACTCGTTCTTTTCCATGCAGAGCGTGGTTCAGAGTGTGATTGAGCAGTTTGCTCGGCT -CTACGAGGTACACCCCGATGCTGTTCTCAGGGCCCAGCGTGCTCATGCCAATGAGCCTGA -AGTCTTTGCTGGAGATTCTTGGGTGGTTGGTACGGTAGGAGAAGTGCAGGGCGAGCAAGT -ACCCGCCATTGGGAAGGGGATTGCCGAGTTGCGCTCATTAGAGTGGAAATACACACAAAC -ACCGCGGTTCACTTTCTCAACACACCCTATCGAGGAAGACCCTCGTGAAAGGCCGCCATT -GCCCTCCTCGCTTCCCTCAAGTACTCGTGCATTCCTCCGTTTACAGCATGGCGCTATCGT -CGAAAGCCACATCTCTGTCTCACCTGACGAGCCCACCGCATCTGACCAAGCCGGTCTTGT -CCATTCAGCACTGAACGGGCAGAAGCTGCACGAGATCACCTTGGCCCGATGGACCGAGAT -CCTAAAGCAGGGTCTTGGTGAGACCAAGGGAGTTGATGATGCTCTTGTGAAAGAGCTGGC -CCGATTCTTGAGTGGCCTTTTAGGGGGGTAATCGTCCAATTGCATTGATGCCATGCGATT -CGTGGCGTGTATAGATATTGAATGCTCTTGGGCTCTATGTACAACATACAATAATATCAT -GATATCCTATATGATCAGCGCCACACTATTGGCTGCTTTGTCTGTATCCTTCATTGGTGG -ACGATAATTTTCCCACTTATCAGATGCTCACAGCCTATCTCGGACAGACTAGTTTCTCGT -CACTGTTGATCTCCCCACGTTACGTCGACCGGCTAAATGCAAACCCCACGCCATCCCCAG -GGTTCCCCCATTATTTCCCCCAGAACTACGCACGGATCCAACTGGGCACATCGGCATTCT -CAGCGAGTCCTGATTCGCACTAAAATGGCCACAGATTTTACTGTATGTCAGCATAGCTAC -ACAAACCCCGATGTGAACTGCCTATTAAGCAATTCGTACTCGACAATTAACCGATGTGAA -TATTTATTAAACACACAACTATCCTACCAATAACCAAAAGCTATCACAACAACGCAGAAT -ACCTCATAGCTTCCAAGCCTACAGACTTACAATGTCTATTCACGCTAACGGCCGAACCCC -CTCGCAGGCCTTCAGCAAATCCCCCTTCTGCACCAGATCGGACCTCCAAGAAGCATGCCG -CGCCATTCTCGACCCGCTAGTGCCCCTCTTTACAGCAGGCGGCACACGCGTTAAAGTAGG -CACGTCGACAACACGGTTCGACGAAGGTGGTGCGCAGATTGAGGGATACGCCCGCCCACT -ATGGGGTCTGGCCTCGCTTCTGGGCGGCGGATATGAATACGCCGAGGCCGAGCGCTGGCG -CCAGGGAATCATCAATGGGACTGACCCCGAACACCCGGAGTTCTGGGGCCATATCGAAGA -TATGGATCAGCGCATGGTGGAGATGTGTCCGATTGGGTTTGCGCTTGCCGTTGCACCTCA -TGTCTTTTGGGATCCATTGACGGAGAAGCAGAGGGGGAATGTTGGTAGCTGGTTGGCGAG -TATTAACGCGCGCGAGATGCCGAATACGAATTGGTATGTCATGGCTTGATTTGCCCAGGA -TGTTCTGTTTTAGGTATGTCTTGAATGGTTCGGCTAATCGTGATTGAGACGATTGTAGGC -TGTGGTTCCGCGTCTTTGCAAACTTGGGCCTCAGGAAGAATGGCGCAGAGTACTCTTTGT -CTCGTATCGAGGCTGATATGGATCATTTGGATACCTTCCACGTTGGCGGGGGATGGAGCA -ATGATGGACCTAAGAGTCATCATCAGATGGATTACTACTCGGGCTCATTCGCTATTCAAT -TCTTGCAGCTTCTTTATTCGAAGCTGGCGGGCGACTTTGACCCTGAGCGTGCTGAGCGCT -ATCGTCAACGGGCACGGGAGTATGCGAAGGATTTTGTGCACTACTTCGATCCTAACGGTA -TGTTTCCGGCGAGTATTTCTTTCAGAATATGACTTGGTCCTGACAAGAACTATTATTACA -TAGGCAGTGCCATCCCATTTGGACGGTCTATGACCTATCGATTTGCCATGGTTGGATTCT -GGGGTGCATTAGGCTTCGCAGATGTCGAACTGCCAGAACCACTGACATGGGGTGTTGTGA -GGGGAATTCTCCTTCGCCATTTCCGGTGGTGGTCTACGCAGGATGACATGTTTAACAACG -ATGGTACCTTGAGTCTAGGCTACTCGTACGCTAATATGTACCTAACGGAGAATTATAATT -CTCCGGGGTCGCCGTACTGGTGCTGTCTTTCGTTCACACCATTGGCACTACCAGAGACAC -ACCCCTTCTGGTCTGCACAGGAGGAGCCCTACCCAAGCTCCTCCCTGCCTGAAGTTGTGG -CCCTAAAATACCCCAAGCATATCGCCATCCGTCGTGGTGGACACTCGTTCCTGCTGTCAT -CTGGCCAAGCATGCCACTATCCCCTCAAAGCGACACAGGCCAAGTATGGCAAATTTGCAT -ACAGTTCCGCCTTCGGATACTCAGTTCCAACTGGTGGCTACCAGCTGGAGCAGCATGCCC -CAGACAGCATGCTTGCGTTGTCAGACGACGGCGGAGATATCTGGCAGACTAGACGACTGG -CAATCGATGCACGTTTCGAAGAACCCGAGCCCAATGGTCAACCTATTCTTGTGTCTGGTT -GGAAACCGTGGCCAGATGTTGATATAGAGACCATTCTCATCCCGCCAACGGAGAAAAATG -AGAATTGGCATCTGCGGGCTCACCATGTACGTACTGGTCGCGCTTTGCAGACCTCTGAGG -GTGCCTTTGCTATTTATGGATGTCGCAGCGATAATGGTCGTACTTTGGGGCCTATGACTG -AGGAAGATTCAGAGGGAACAATGCATGATAGCCGGAGTGGGCTGGTGGTTTCATCGGCTG -GGGCTGTCGGCATTGTCGAGCTTCATTCGGCTATTGAACGGGCTGGTCGAATCGTCTTGG -CTGACCCTAATTCCAATATTATTCACGGACGTACGCTTTTGCCTTCGCTTTCGGTGGACT -TGGCAGCTGGGCAGGAGTCATGGTTTGTTTCGGCAGTTTGTGCTGTGCCAGACCATCAAC -GAGATTGGAAGGCAGCTTGGGAGAAGAAGCCAGTTATTCCGGAATGGCTACAGAAGCGAA -TAGATACAGACTAAAGTAGAAATCATATAAGACAATGAATTTGGTTAAGGGACTATGGAC -TTGTCCTATTTTGACAGGGGTTACTCAATTCGGATTAGCCCTAAGTTTTGAGTTTCCCCG -CAAACCGGGGTAGGATCAACCCGATTTCTCAGGCACCTTACCAGGTTCTCCTCACACATA -ctctctctctctatctctttccctatcGGCTAAAGCTACATGTTTTCTATATTTACCCTG -CACGCTTTCAATCGGGATATCAGCCCAATACTGACTAGCGTGTGGCTTTACCCCACGTCT -ATTTGAGAGCTCGTGAATCAGCCGTGTCTTGTACTCTTGGACCCTTGCACCAAGCCCCCA -AAACATCCTAGACATGTGAGCAACATTAGATTCTTGCAGCCCGTGTTGTCCTTGCTTCGG -ATTCGTGGATGTTTTCCCCCATCTCTGCTCGCCTTGTTTAGCATCCCTGCATTTTTCCCA -TAGGCCGCTTGGTGGCCAGCCAAGAGAACTGGGCTGGAGATTAAACCTCTCATAGGCTTG -AGATTGCAGTATTTTTTCCTTCCGTTATACATATACCAATAGCCGACGGCATCTTTAGCT -TAGCTTCGTTGTGAGCTGGTCTCAGAGAATTACTTTTAAATTGATCACTCCCCTCCTTGA -CTGGAGTCACTGCGATCATCTTGCTAAACTCCTTTGACTACTACCTAAGATGTCGTGGCT -TCTGAAAAAGATCGTGCATAATGAGGCGATGAAAGAAGATCCAAAGGAGATCTATGGATG -GAGGGTCTTTATGCTTGCTTGTTCTGTTAGTTTCTCCTTGGCGATGGCAATGGCAATAAT -GCTAGAACTACGTGACTAATGCTTCCCAGGCCTGTTTTGGCGGAATGCTGTTTGGCATGG -ATTCAGGTATCATTGGTGGAGTTCTAACCATGCCAGGATTTAAGAAGTGGGTTTCATTTC -TTGCTGTTCCCTGCTGTCTAGCAATTCTGACCAGTTTCATTGATGTAGGACCTACGGCCT -GGAGGACATATCCAAAGTTGCAGCGGCAAATCTATCGGCCAATATCGTCTCCACCCTCCA -AGCAGGATGCTTCTTTGGAGCTCTTGTTGCCTCCCCAATAGCCGAGAAATGGGGTAGACG -AATGGCCCTAATCGGCGCGGCAGTGGTAGCTATACTAGGAATTGTCATGCAAACTGCAGC -CAGCGGCCACATCGAAGTTATGTATATTGGACGGTATGCCATATCTTCCCATCCAGTCCT -TTGCCCACAAGTTCAAGTTAACACCCACAACTTTAGACTGATTTGTGGTTTCGGAGTCGG -TGCCGCATCCATGATCAACCCCCTCTACGTCGCTGAGAACGCGCCGCGCGCAATTCGCGG -CGGTCTAACAGGTCTATACCAACTCTTTATCACAATGGGCATCATGCTCGCATTCTGGAT -CAACTACGGCTCGCTGCTCAATATCGAGGGTCCTGCAATGTACATCGTGCCACTGGCAAT -GCAAGGTCTCCCCGCAGTGCTCTTGTTCTTTGGCATGTTAATGTGCAACGAATCTCCCCG -CTGGTTGGCCAAACAAGATCGATGGGAAGAAGCCCGCGCGACACTCTCCACAGTGCGCAA -TCTACCTTCTGACCACCCATATGTGGAAGAAGAGTTCGCAGCCATTGCCACGCAGCTGGA -ACAGGAGCGCGCACTTGTCGCTGGGTCTGGATTCTGGGACTTGATGAAAGAGATGTGGCT -CATTCCGGGTAATCGCAAGCGCGCAATTATCTCTATCGTCTTGATGGTTTGCCAGCAGAT -GACTGGAACTAATGCGATTAACTATTATGCGCCCCAAATCTGTATGTTGCTCCCACTCCC -AATATACTGTTGAGAAATCCCCCAGCTAACATGTCATAGTCCAAAACGTGAGCGCGCACC -TCTATAAGCCCCCAAACCGAAACACATTCAGATGCTAACTACACCCTATCTCCAGCTCGG -CGTAACCGGCAACGCAACCAACCTCTTTGCAACTGGCGTCTACGGAATCGTAAAGATGGT -CAGCTGTGGCGTGTTCCTGATCTTCGTAGCGGACTCCCTAGGCCGACGCCGCTCCCTCCT -ATGGACTTCGATTGCGCAGGGCTTGGCAATGATGTACATCGGACTGTACGTGCGCATTGC -GCCGCCAGTTGAGGGCGCACCCGTCATCCCCGCCGGATATGTTGCGCTTGTGTGCATTTT -CCTGTTCGCCGCATTCTTCCAATTCGGATGGGGGCCCGTCTGCTGGATTTACGTCTCGGA -GATTCCAACTGCCAGGCTCCGCTCACTGAATGTTTCTTTTGGAGCTGCTACGCAGTGGCT -GTTTAACTTTGTTGTTGCCAGAGCCGTGCCGAATATGTTGGCTACTGTTGGTGCTAATGG -TTATGGGTGAGTTTGGCTGGATAGGTTGGTGCATGGAACTGACGGTTTGCTGACTTTGGA -TTGTGCAGCACGTATATTATCTTCTCAAGCTTCTGTTTCTCGATGTGTGTCTTCGTGTGG -TTCTTTATTCCGGAGACAAAGGGTGAGTCGAGTCCTCTGATATTATGGTGTCTTTTTTTG -CTGATCATTGTGTACATATAGGTTTGTCTCTCGAGAAGATGGATGAGCTCTTTGGCGTCA -CGCAGCTGCTTGAGCATAAGAATGCCGACGCAGAGCGAGGTTCTGTTGGCGATAGAGGGG -ATAAGGCATCTCAGGTGCATGTTGAGAGGGTGAATGAGTAGAATTTCACAAGTTTAGGTC -TATGTGGAATGTCAATTGTTGAAAGTGATCAGATTTATATCACTATTTTAGTTTCAAAGA -GTATTCACATCTGTCATCGTAATTTCATGGACATCATCAAAATACCTGGGTAGTCCAACC -CTCTCGAAAGATATGTACAGCTCCCAACACCTTGGGGGCCGCCAAACCTGTCAATATAGA -ACAAAGAAATATTCGCCTGCGCTATGAACCCACGACATTCGCCTCAATTAAACTAGGAGA -CTGAAAGCAAAGAAAAAGGAAAGCAAAAAAACCGACTGGGTGGATCTATCAACGCCGTAG -AGAAGCCATCCAGTCTGAAAGTCACGATAGGGCTCAGTTTCTCAGTAAACTGACAGAGAG -CCACCTGATCAGCAACAGATAGGGGGGATAAAATGCCGAGTGGTGTACAAAAAAAAAACA -GAATGGGAAATTTTGAAAGGGAGGGAAATTTGAACGCGAATGAAACACAAAATCATCGGA -CGTAGTTGGTTGGACGCCGACCAAGTAGCCCACGCTGGGTCTGGCTACGAGTCATGCGGC -GAGGAGTAGGATCATCATCTAAACTCAAGCCACCGATGCCTGATCCAAGACCAGTCCGTC -GGGGCGAGTAGAAGGGGTTGGAACCCAGACCGGCATGATTCCCATACCGAATGTTGCGGT -TTGTGAAATTTGTGGCGGCGGTTGTAGTTGTAGCTATAGTCTCGCTGTCTGAATCTCGCT -CATAATCAGACAGGGGGTCGATTTGCTTTAAAGATTCCATGGTAAAACTATGTGGGTTTC -TGTTTTGGTTGAAACCGCTCTGACTGGCAACGGGGAATGGCTGGGTAGATCGAGATGAGG -TAAGCCCATACTGTGGCACGGAAGGTAATGCCGAAGAGAAACTAGAGGCTCCATCGGTAT -CGCCAAACGAAAGTGAAGGAGCGGGTGGTTGTGACTCAAATGATCGGTGTACAGGGGGAC -TGCTGACTCGAGGTTCAGGTGTAGACTTGGATGAAGATTGACCTGCATGCCAGTCAAGAG -CACCCGATTGGCTGTGATGCGGGGATTGTGGCTGCGACTGTGGCGATCCAGATCCTATGT -GCTGGCGGCTTTCTGACCTGGCCTTATTGTAGAAGGCGAATACATTCATAGTTTCCTGCG -CCGCCATGAAAACCAGGAGTAGTTTGCCGTAGCGATCGAAGTAGTCCCGGTTAAGCGAAA -CCGTGGGCAAATGCGCTCCCATGTGAACAGCGACGCCAAGTTCAGTGATAGAAATCAAGA -TCTCCATGCCATTCCATTGCACAAGAGGACGTTTCAACATAGTGATCAATGCGAATCCCG -CAACAAGGCTTGCCGCACCTAGGGCAGCGATCTCCACATAATTTCCTGGAATCAAGACCT -GGTGATTCTGAGAGATAGTCCATGCGCTGATGAAGCTGATTAGTAGAACCAGTCGTAGGC -ACCCATAAAAGAAATGACTTTGAGCCTCATCGAGACGCGTAGAGCCCTGGTTCAGTGGCT -GCTGAGACCAGCCAGGTCCCTGGGAAGCATCGGGCTGAAAATTAAAGGCACGGTCAAACA -GTGTTTCCAGCCCAGTAGATGTATCGTGATCACGGGGAGGAAAGAATTTTGGATCTTTGA -ACACTGGTTCAGCACCGGCATGCTTTTGCTGCCATTGACCTTGCGGTTGTGTAGGGCTGC -GGTGAAACGGATTAGGCTGAAACACTTGTTCAATGGGCTTTGTCGAAGTCTGAGTGCGAA -GCTTCCACGCAGGAGGGATCGGGGCTGCTGGGATCTGCCCCTGAAATGGATGAGGGCCGT -CCAGTACAGACGGCTGATCTCGCTGTGTCACAGTCGGTCGGAGGTTATGGGTGACAGAAG -GAGACCAGTCCATGCCATCATTATCCGTGAGATGAGAGGTAAGCGTAAGCGACTGCTCAA -TGGGTGACTGTAGTTGTGGCTGCGCCAATCTGCCAATCGGAAAGCGCTGCTGTAATCCAC -CAATGTTTGCCTTCGGTGTTCCGCCAGATCTTGTGATGCGAGTCGACACCGGTGACTCAG -GAAGTGCAGAAGAAGACGCCTTGGAAGATTCAAATTGAGTTTCCCAGCTGTTATCAGACC -AATTGACGAGCGGACGACTAGAGTAGCTGACAATATATCGAGAGAACAAAACCGACTACC -ATGGTCAGCGCGTGTAAAGTTCGGGGGGGGGGGGCGGAATGCCTACCACAATAGTAATAA -GAATCATCACAGCATGGACGGCTGGAATCAATTTGGGTTCTAACCCACTGGCAGACGGAT -CTTTGAGCACTGCCCAGAAAACACATCGTACCACCATGATGATAAGTTGGGCTTCGTAAT -ACTCTACAAGACCGCGGAATTTGCCGGGCATTCCGTCTATCTTCATGCGCAGCTTTGGAT -TCCACCAGAGCGAAACTGCACCTGCAACCAATGCCATACCTGCAGTTGGTGCTAGATCAG -GCGAGCATTCGCTGGGAATGCGCCTGAAACGGAGAGACTGGTTGACGCACGAGATCAGGG -AGTTGGGTGCCATTGGAGGTCGTCCGGTGTCGGGTGAATGGTCGAAGCTTTGCGAGACCT -GAGAGAGAGTGGCCACACTGATCAAGTTCCATGCTAGCTGACCAGCAATACTAGCCCAAT -ATGCAGATGCACCCACAAAGAGCAGCAAGCTTCTCCAGCTCCGGTTCCTCGCCTGTCGCG -ATTCTCGATTTGCTCGACTTTGATCCATCATGCGCCTGAGGTGATCCGATTTCGCTTCGT -AGCCAGCCTTTCTGATGCGGGATTTCACGATGGGTTCACAAGACTCGCAAACCTGGGGGT -ACCGGTCCTCCAGGCTTTTGCGGAACTGCTCATACCCGCGTTCGTATTCGCTATCGGTAG -GATCGTCTGAGGGAGGGAAGTAGGAAGCCAGTGAGCTTGTAAATAAATGCTGATTCCGAG -CACATTTGGCACAAAATGACTGGGATCCACCGAAGTCAGTCGATTCAAAGGGCGAGTCTG -ATGCGCCAGGCGCGTAGACCTCAGCGTTGGTGACCACAGCTGGTGGGTCTGTGATTTCTC -CTTTCTTTTCAGAAAACATTTAGTGCAACTCGAGTGGATTCCAGGGAAGAATAAGCCTAC -CTCATCGAAGTAGTTATCAGCCTCACAATGTTCACAGTGGAAATTGCGAACAGGCCCGCG -ATGAGAGCGAGTGGACCGCTTCCCACAATAAAAGCATATCAAACGCTTGGAAAATAGAGA -GGCCATGACATAGTAGCATGGAAAAAAAAACACACAAGTGAAAGCGTTGGTGTAAACAAG -GGAGATGGTTCCGGATGCCAAAGGGAAAATAAAACTAGTCGAGCTTGCACGTGCGGGGGA -CTTGTATGGAGTACCGCGACTCCAATTTTGTTGATCTGCAACTAAGACTTCACTTTACTG -CCATACTACCATTGTTATTGGCAGTTTTTTCTCATTTAACTCATTTCCTTTTTGTTATTC -TTCACTCTCTATGATAATGATACACCAGAATCCTCGGGCTGTTTCACAAAACTCTTGAGA -CCGTACAACAGCGGTCCAAGATGCCCTTCTCGCTTGCTTCTTTATCCTTCTGGGACCTGC -TCCTGTGGTCCTTTGTCCTGGCAATTACACTTGGGGCCAGCATTCTTCTTGTGGGAACCC -TTTACCCATATCATTGAATCAATCTAACTGACTTGCCCATCTCCCCTAGGTTCTACTCAT -CTCCCTTACGATCTCCCAAAATACCAATCCCACCAAAACTCGCCCAAGGGGATCTCCCAC -CCACCTATTGGTAGTTCTTGGAAGTGGTGGTCATACAGCTGAGATGCTTTACATGCTCGA -GCGCATGAAATTCGACCCTCAAATCTACACGTATCGGACATACCTCGTGAGCTCCGGAGA -TAACTTCAGCGCAGACAAGGCCAAGGATTTTGAGGCTCAGCGTGTACAGAATTCGCAGGG -CCATACCCATAACAAAAATTACACCATTGTCACAGTACCACGTGCCCGACGGGTCCATCA -ATCGTACCTCACCGCGCCATTCTCGACGCTTCAATGCTTTTGGGCTTGTCTCAATGTTCT -TCGTGGGCTTCACCCAGATCAAAAGCTCCCAAAGGAATACACCTCGCCTTACCCTGATTT -AATCCTCACAAACGGCCCCGCTACAGCAGTCTGCGTTGTCGCAGGCGCAAAACTCATTCG -TTTCTTTCAATGCTTTGCCAAATACACTACCTTGTGTCTTGGTCTACAAACAATCTCCCC -GTTTGCATCGGCACCAAAGCTGCGCACTATTTACATTGAATCATGGGCTCGGGTTAGTTC -GCTTAGTACTTCTGGTGTCTTGTTGTTGCCTCTGGTCGACCGTTTTCTCGTTCAGTGGCC -TGCCCAGGCTGGACGACGGGCCTGGTGGGGGATGAAGAAAACCCAATATGCAGGATGGCT -TGTGATATAAGCGCTGTGCATCTAAATCGGCTTAATCTGTCTTACCAGTACCTTCGATCA -GATTGCTGCACATATTACAAGAACCAACGTCCTCACCATGTAGGTATTTAGGTAAGAAAT -TAATCTATTGACGAGCAACGTGAAATCTAGTTCTGCACTGCATGCCTCTACACACATACA -CACACTGGATGTAGGGATTTAGATGGTGCGCTGCAATGTAAAACTCTATAATTTATAGTC -ACGACTTATGGGAACTTGGATTATTGCATACAAATTTAGAAATTCCAATAATAGACTGAT -TTCCAGGTACGGTACGTATAATTGTAGAGCACCTGGGTTACGCTACGTAGTTCGTCCGCA -GAATCAGGCATACTAAGTAAATCTCAACAAGGTCTTCAAACGCAATGAGATTTTGATTCA -AGACATCGATCATTAAGCACTCAGTCCACGCTTAATATTGCCATACATACATGTTATACC -TAAAGAAGAACCTAGTCCCATCTCTTTTAACAGCTTTGATCATATCTCGATTTCTGAAAG -TGCTCTGTCCTCTGATTCCCCCATTAGAAGTCTACTATTTGCTTGCATCTTGGGGCTGGA -TAGAAGCCATATAATCTTAGTGAATACATTGCTATATGCTGGACCATGTCGACCATAAAT -GCGTAGAATACGTAGAATGAGGGTGATCCTCACATGATAAGGTAGAGACCTTGCACATGA -CTTCAACGCTATATACATCGTAAAATACAAACTGTTCAACTCAGCTAGGTCAATCATACC -AACGCCAATCCAATGCTAACCACCAAAGAGATCACCGCCAAGTTCCAGGAAGTCCCAATG -ACAGTCCCTGAACCTTCAGGCGGAGCCTGAAAGCTCACAAGGGATTCATTTGTGCTCGCC -GCCCCAGTAACAAGAAGCCGCGTAGAATTGGCAGCATCATAAGCGCCAGTGAACACAATA -TTATAAGTAACATTTGTCGTTGCATGTTCCTCGTCATGATCCCTGAACCACGCAACCACA -AACCCTGAAACATTCAAGTGAATAGTCTCGTTTGTCAAAGACCCGTTAATAATCGCCCCA -GTCGGATATCCTAGGTCAAGTTTCTCTCTCCCACTTCCACCAGAGCAAGTGCCCTCCGGT -AGGATTAACCTCTGCTTCCGAAGTTGCCCATGCTGAAAGAGAGTCGGATCATTGAAGTCC -TTCGGTCTATCCTTCCAAGCCGAGATATCATAAACACCATCAAAATCAACCATATCATCA -TTCCCAGCCGTAGGCTGAATATCCAATTTCACGGAATCTATCGCCTCGTAGGAAGTATGG -CCTGTTCTATTGGTCGTCGACCAAGCAACCGTCGTACTGAACCCCAAAATATCAACCTTT -GAGCAGCTCTGGTACGAATCCGCGACATTAGGTAAGGCCTGGAATGAAATGGTAATGGGA -TTCTTTGGTCCATTGGGTGTGGGTGCCTTTGGCGCAACCCATGCAACGGCTCGCATCCGC -AAGGACTGCCAGCCAGCTGGACATGTCGCTCTAGGGCTGTAGACAAAGTTGGTAGTTGTT -GGTACGGGTGGCACGTCGAGCTTGTATTTGAAGATTGCTTCGCCTGTATAAAACACCCCG -TCGTAGTCGCTGGGTTCAAAGATATCTTGGTATCTGTAGTAGTTACCTGGTCCAATGAGG -TCGTAATCTATCTTGGGCGGTGATGGGCAAGAATATCCGCTTCCTGAGGATGTGGAGGTG -GTTGTATCgtcgctgtcgctggtggagtcgctgttgctggagctggaggtgccactttcg -ctgtcgctgtcATGGCCTCGGCGTGATAGTACCCCTTTTGGGATGGCGAGGAGAAGAAGC -GCAGCGAGGAAGGTTCGTTGGAAGAACATTTGTCTGTTATTCAAGTTCGTGAATCATTCA -GTGTATGGGTATCTTCTTCAGATACTTGTCAGGCCATAATATTCGTCCTGGGTGGCGTGT -GTAGGAGAAACTGTATAGAGGAGCATAATGTGGAAATGACAGAAAGTTATAAAGTTGTGC -ATAGACTTTTTTAAGCAACTGATCATCTCCTTTGCGGCCTATTCTAGTGGAATAATAAGG -GATCTATACGGCAAAAGCCCCAATGCCAAGGTGCCAAGGGCTTAAACCGAGATGGAGCTC -GTATTCACCATCAAATAGCTGATCACCAATGATAGGCCCGGTTCCGACTTTCTCTTTCAT -GTACTCTGTAGATGGTTTGTGTCTGTGTAAACTATGAGGGCACTGGAAGTTGTTCAGCGA -GCCCGCTTGAAGACCCCAGTTATCACTGCGATAGGAACAGACTTCCGCACGACCTGGTAA -AGCCTAGACCCAGCCTAGTTTATATGTTAGGCTTTGAGCAAAGCCACTACACAACGAGGA -AGCGAGACCGAGCCGCTCTTAGGAGATGTGGCGCAGGAATGGACATCAAGCCCTCATTAG -TTCTTTCAATGCGCAAATGTATTTGTGTCTATTGATTTCTCGACAAGCCTCTCATTCAAG -TAGTTCAAAGTCAACCCATTTACTTGCTGGCACCACAAGTTTAGGCCGGCGCTCATTATT -TTGCGACTTGGCCCTGCCATTAACCCACACCACAGTATCTTGTCCCCAATCCCACCAGTC -AATGAAAGCTCCCTTGAACCCGTACCGGAATTGATCACCGGGTGCTGCATTTTGCGGGAA -GTCACTCACCTCCCGCGTAAATGACCATGACTCACCAGACATCAGGGCCACGAATTTACC -ATTGGGGTCACGGCCAACGTTTACCGGGGTGTCAGTAGAAAATCTGTAAGCGTGGTGGGT -AAAGAGACCCCCAAGCTCATGCGGACTCCAGTCATCCTTCTCCTTTTTGTCGAAAGTAGA -GTCGGAATCCGTCATCGCGGCGATCGCAACCTTCGAATTTAAACGTATGGAATGTAACCG -GCTTTCCGTCATAGGCGTCCGGAGCTGTGTCATACGAGAGTGTGGCTGTGACAGAATGAC -GCAGTCTACCTTCTTGAGTGCGATCTTTCATCGATAGCGTAGTAGATTGTGATATTGAAA -GAATACTACGCCTCAACTTCGATATCTAGATCCGATGTCTCTGGGAGAGTAAGGGTGAGG -CGGGGAGATGCAGATCTCTTGGGCTTAGTATTTGGTCTTCTTGTTGGATGTCAAAGAGCA -TCGTACGGTCTGTCCGCGGCAGAATATCTGACGAAGCGTGAAAACTGACCCATTCCGGAG -TCATATGAAGCTTTGATAGGATACTAGGAATCATAAGCGCAGGCAAAGTACACCGGGGAG -TGATCTTAGGATGATCTAAATCTCAGTATATCAGTTGTCACTTGTTAGACAGATGGTCGA -TCCGCGTCGATCCTACCCCGCATGAGACACTTCCCTCGGCATTTCTCAAATGAATTTTAC -TATCTTTCTCTCTTTCTTGAACCATCATTCTCAATACAGTCTAATTCTACTCACTATGCC -CTACTCTTTGTCGGGTCGCAATGTTTTGATTACCGGGGGCTCTCGGTAGGATCCTTGGGA -GAATGGTTCTACTCAATAAGCTAACCTTGTGTGACAGAGGGCTAGGTGCTCTTTCAGCTG -AGAAGTTCGCCTCTGAGGGAAGCAATGTTGCTATCAACTATATGTCCAATAAGGAGGCTG -CAGATAACCTTGCCTCAGACATTGCAGGCAAATATGGTGTAAAGACTGTGGTTATCCAAG -GCGTAAGGCACTAGAATCCAATCTAAGGTCCATCCATTGACATCGTCTCGCATCTGGAAA -TAGGACGCCGGAGTCAAAAATGACTGTGTCAATGCCGTCAAAACCACGATTGAGAAGCTG -GGTGGCCTAGATATCATTATCTCGAATGCCGTGAGTTCTACGGTCGATCCCAAGAGAGAT -ACAAAATGTTGATATATGAGAAGGGATGGACAAAAATGACCAATTTCGCCGATCTTGATG -CCATGGACGACGATGACTGGGATAAGGTACTAAATTCCCCTCTATGTGATGAAATTTTTG -GTTCCACTGACCGTCGATACAGTGCTGGTCGGTTAATGTCAAGAGTAGCTTCCACCTGTT -CAAGGCAGCTTTGCCCACGTTCAATGCGAACCCTGAAGGTGGTGTTTTCCTCATCACAGC -CTCGACTGCGGTAAGCTACTAAAACATGTCACATGGCCTTGAACTATCCACTGCCATGGA -ACACTTCAAATACTGATTCCAGGGTCTTTTCTAGGGCGTCACGGCAGGTGGCAGCAGCTT -GCCTTATGCTGTCTCGAAAGCAGCATGTATGCTATCATTCCTACAAATTCCGCTAGTTGG -TTACTGACTTGTATCACTCAGCTATCCATCTCATGAAGTGCCTTGCTAGTAGCCAAGGCG -CCAAAGTTCGCGTCAATGCTATTCTTCCCGGTCTGTTGCTCACTGAATGGGTAAGTAGCA -CAGGCTCAACGCTGGTATTGATAAATACTGAGAATATCCCAGGGTCAACGATTCCCGGCC -GAGAAAATTGAGAGCTGGACAAATGCGACCACTCTTAAGCGAGCGGTATGCAATACTCCT -CTTCCCCAATCTTAGAAGGGGTCATGTATGCTAATCAATCCCGCTACAGCCCGAGGTCGA -AGACTGTGCGGACATGTTTGTTACATTGGCCAAGAATGCTTCTATAACCGGCCAGACAGT -TCAAATTGGTATGAGTATCAGCCCCAGCCAATTAAGTTGAAACTGACAGCTTTTTAGATT -CTGGATTCGCGATCTGATGAGACAAGGAAAGTGAATTACATATGGTCAGAACAGAGATTA -TAGTACACAGAGCCCCGCTTTACTTATACAAGAGAGTTGCGGAAGGGAGTCAGTTCCCGA -TCCTCATATTGAAATGCTCGATCGCCGAACATGCGAGCTTTGATAGCTGCTCGTAGACCC -TCGGCGTTTTGTTCGCCTTCCACTTTCATTCGGTTGATGCGCAGAACTTTGCCATCGGCA -GTAAACACATATAGCTGCCGTGACTTCTTTGGCTCGACAGGGTGAGAGGCATTTGTAAAT -GGCAGACCTGGAACGATCGAGAATGGATCACATCCCTTAGGCGGGTCAACCTCCTCGATA -TCTGTAAGAATGACTCGTTTCCGTTGGAATGACAGAGGTCCATCTACAAGATTGCTTGCA -CAGAGGGCCTTGCTCCAAAGAGGTCCAATCACTCGGTTACGTCTGTTGATATCTGCCAAG -GTCCAATTTGCCCACTTGATGTGCCGGTCCTCTGGTGTTATTTTGGGAGCATGTTTCAAC -TCCCCTGTCTGCTCAGCTCCAGCATTTACAATGGGCGGGACGAATAGTCCTTTTTCAATA -CTTTCCACGAGCATTTGTGCTCCTTTAGGTGCGACATAATCCACCAACTCGGGGACAGTG -CAGGATTCTGGATTGGGGATCTCGAACCCGGGGGCGGGCGTTTGTGATAATATCGCCCCG -TGATCAAAGTGTTTCATATGCAAGGTCTGAACCGTCACGCCAGTAGTGGTCTGACCCGCG -AGAAGAGTATGATGAAGCGGTGCAGGACCACGGAAGCTAATCAACCAAGTCAATCGTCAT -GATCTTCGATCCTGAAGATATAAGATCACATACTTTGGTAGTAATGAAGGATGCACATTC -AAGCCTCCATACTTGGCACCATTCAATATGCGCGGCGGAACAAACAGTCCGAATGACACA -GCAATCACCAAATTTATGTGACCGTTCGGAGAGGTCGGGGGCTGGCACAGTTAGCGAAAA -TACCAATTGGGTTGAGCTTTGGCATTACCGTCCACCCGGTGAAGGTGTCAATCTCGTGGA -TTGGAAGAGATAGAGCAGATGCGGCAGCTTTGAGTGGAACTGAGGGAAATTGAATTAGAA -AACACAGCCACTTAAGGAAGCGTAAAGTTAAGCCAACCTTCCCGAATTTGCTTCAGTCCC -CTTCCAACTCTCTTTCCGGGTCTACATACGACATCAATAGAGGAAATACGCTCCGGCTGT -TTCAAATGATAAGCATGGAGAGCTTTCAAGTGGGCAATACTGAATTCATCAGAGCCGCAA -AAGAGGATGCGGAGCGGATCGGAGGTCTTGGTGGCATATGACCTAAAGACAAGCCCTCGA -AGAACGAACGGCTTCTTGCAGAGACGTAAACTCCCTGAACCTGGAATGGTCCACATGATT -CTGGTGGATAATCATCAAAAGTTGATTGACAAGGAGAAAGTTGAAGCTTCCATAGTAGCT -TATCGATAAACCAGCCACGTGATGTCAATATCCACCCCGGATCCCACTTCGGATGCATCT -GTCTTTCAACCATTAGTGCTGTTGCCTGTCTGGTACAAATTGCAGGTGCCCCTCAGCGAT -GCAATGCCTGAAAGCGATCATGATTCGGTGATCCCAGCCCAACTTTCATACCTCGCGATC -TACAACCCGACACTCGGACCTACCGACGAGACGATAGCGGACCAGATCGTGTTCTATACT -TCAAGAACGAGCTATGCACGGCGCATCGATGGATCCACGGTAGAAGGTGAAGCAAACAGC -TGCCCTGAAGATGAAGAGAATGACAGATTGCGCCAGATAGGTCTGGCGCAAGGCATGGTC -AACTTTGCCAGGTAGGAGTCCCCGTAACCCCGCAAAGAAACCAGTCTAATCACGTTTGAT -CTGCAGTAACTTCTCTGGAAAGACACTGGAGTATGTTGAAACAGAAAAATCGCGGGTTGT -TCTACTTGAGTTAGAGAAGGACTGGTGGATTGTTGCCGTAAGTGGTTGAGATAACGGCAC -AGATCACACAGCTTATGTGAAGCCAGTCTATCGACCTGACTCGAATTCCTGCCGAACCTG -CTCAGAGATACTCTGGAGCATCTGAGTCTCCGGCATTTCATTATTCATCACGAGAGATGG -CACCGCCGCCTCTGCTGATCCAGCAATTACGTCGCGCCCATTCGGAATTCCTCCTACACC -ATGACTTCAAGTTAGACGACATCCTCCATCAAGTCGGCAGGCACACATTTTGCCTATTTC -TTGAACGTTTCTGGGAGAATTTTGCGTGGAATTGGGAACTACTGTTGACCGGAAACCCCA -TTGTCGATATCTATAATGGCATTAAACTCTCTGCGGGAGGAGAGTTGGGGATCGGCGTCG -GGGAGGAAGAGTGGGGCAGTGGGGAAAGAGAGGTACTTGAAGACTTCGTGGCAAGGACCG -ACGGGTTGGTGGACTTGGTTGTCTCCAGGTTTGGTGATCCGTCCTCACAGTCCGGGGGGT -CTCCAGCTTTGGCAGGGTCAAATGACCAAAGCCGATGGCTTGGGGCAGACAATGATCCTC -GACCATCCGATGGTGTGGTTTTTACTGGAGTTGGTGCTCTTTCTCGGCGATCCTTGGCCC -AAATATCACATTGGATGCAATGCATTTATCGGTTTGGGGATGCTGCGTATGGAGTTGGGC -GGGACCCAACCTCTCTGCGTCGCCGCAAACCACGAAAGCAGCGCGGACGGCAAATTTCCG -AAGGTGCTCCTCAGCCTTCCACGCCAGATCGCGATTTCAGGCCTGGTATACCTCGTCCGC -TCATAATGGCAGCACCGCAGCCAGTCTCAGAAGTCATTGAGGAAAATCGAGCAGGAACAA -GTGAGGCATCACCACTTCCTACAGAGCAAAAGAGCGAGCTTCCAGGATTTCCAACAGAGG -CGGTCATGAAGTACCTCACTCTTGGATATGGCTCCTCTTGGAGCTTTTCTCCAAAATCAA -CGTCTACGCCGTCTGAAATTTCTACTCCAATACGAGATGAATCCAATCCAAATATCTCAA -ACCCGAGGAATCAGCCCACTAGTGCTGGACAAGATGATGTAAAACATCGAAACAGCACGA -AAAATAGCCCCTCCGGTCATTTTCTTATTGGGCCTCGTGATGATCTAGAGAAGTTGGACG -ATTTAGAAGAGGGGAGTCCAGAACCCATGTCCGAGAACGGCAAACCCAAGACTCGGATGG -TGCATAGAACTTTACATGTCCACCTGGCAGATGGGCCAGACACTACTCCGACAAAGCTCC -AAGTCGTGATCTATGTGGTATGGGTCTTTCTTATATTCCACGTAAATCGATTTACTAATA -CACCATCCCAGCATCAACCATTTATGTTCACATTCCTCTTTGATCCTCAAACACCAGCAC -TATCATCCCCATCACTATATTCAAGCATGCATCACCAACTTGGTCCTCTTCAGAAACCCC -TTCTTTCATCGACATCCCCAACAATGGCAGCCTCCCGAATCGCCATGTCTGAGACCTCCC -CAGATCCCAATAAGCGGTTCTCCACTCGAACCCAGCCAGTTTACAATCTAGTCTACGACC -CATCCAACCTAACCATCCGCTCCTCCATCCCCAATATTCCCAGCATGAGCACCACTTCCC -CCTCCACCCAACCAACCAAGTCGCCAACCCGCTCCCCATCCCCCTCTTCCTTCCCCTCCC -CATCATCAACCCAACCACCCTGGTCCCGCGTTGAAAGCCTAGCAATCCACCACCGCCTCT -TATCAACACACACAGACACGCGTTCCCGTCCACAAGAGCTTGAACGCACAAGCAAAACCC -AACGCGGATGGTGGGTCGTCTGGGTCCGCATCCCACACAGTACAACCCAAACTCCAACTG -CCCTGTCCACCACTTCTTCATCCGCCGCCCTCTCATCCATAACAAACAACGATGACTCCT -TGCCCGCAAACCCGTCCCAAGTGCAGGCTACTCCCGCTCCCCTGCCTCAGGAGGCTTTCC -TCGTCCGCAAAGCGAGTGACTACGTCTCCCCAGCAAGCCATGGCCGTGTCAGTAGCGGTG -CTCGTTTCTTCCGCGATCTAGGCGGAGCGTCTTCTTCCGCTGGGTTGACAGGGTCTTCCC -GTGCTACGGATATGGCGCCGTCGAAGTTGGTTGAGGGGCTTGGTATGGATGCTAGGCGTT -ATATTGAGGGTCTGCTGAGTTTGAACCGATAGTTGAGATGGGATTTGTAGAGTGGAGTGG -GCTTGGGGGGTTTTTTTTCTATATCTACAGAAACAGATATATAATGTTGCCAGTATGGAC -CACGATTGGACATGAATATATGACAATATTGGAATACTGTAGTGGTTGGAAGCGCTGGAA -TATCCACAGCTTTACGGACACTTACGTAGACTGCAGGATTACCCGTGACCAGGTCCACAT -ATTAGATGAAAATAGACAGTACCATGCATAAGCTGTAAGATAAGAACAAATGTAATAAAG -GAGGGAGAGAAAACAATAAAAGCCGAATCAAGCTCTTCGCGTATGGGGAGATAACCGCAG -GGGACAAGAATCTATGACACAGGGGGAACTCGACCAGGGAAGGCGAGTCACGGCGTTGAC -CAAATTGCACAACCGGTGCTACACTGAATGGCTTCTCCTCTTCGTCTCTTGGTTGTTCCA -ATTCTGACTTCTTTTTTTTCGATGCTTTTCGGTTGGTTTATAATTGCCGCCTCAATGCGG -TTTGGCATTTGAAAATTCAGGGCGTCGGAACGAGAGGAAAAAGAAAAAGAAAAGCCCACA -GGGTTATGATGAAGACAACGCCTCCCTAGACAAATTCTGCAAATAAAGGCAAAAGAAAAA -CACAAGGTCGACCAGCTGAATTGTGGGGGGGAAACCCTCCACGACTCGGGTGCCCGGATT -CGATATTATCCACAATTTCGCATGCAGGCCATGAAGCCCTCCTCCTTTCCGAGAATAACG -TCATGAATTGCTCTTCGTGGAGGCTTTCATACATATGATCTCTGCTCGAGCCAGACCACC -TCAACACTTGGTCCGTTTAGAGAATCAAGCAAGAGCGGCCGCTACTCTTGCGGCTTCCCT -TGCCCTTGACAGCCTTCAGCAGAGCGGCACGAGTGGCAGCCTCGAAGACTTCACGCACAC -CCTCGTTGGTGCGGGCCGAGCACTCGAGGTACTTGTAGGCACCGATCTTCTTGCGGACTT -CCTCACCCTACAGTCATATATTAGTTTTTGACGAATGCACTGTGCAGTCGAAAAAGACGT -ACCTGCTCCTGGGAGACGGGGCGCTGGGAGGTCTTGGCAAGTTCATCAATAGTCTTGCGG -TCGTCGCGAAGATCCTTCTTGCAACCGACGAGGATAATAGGGAGACCCTGGCAGAAGTGC -AGGACCTCGGAGATCCACTATTGCAGTACGTCAGCATACTCGGTCCAGTTCGCACCCAAG -CATTTCTTTGCTTCGGGATGTGCGGTCAATTATAGAGACCCACCTTCTCCTGGACGTTGT -CGAGCGAATCGGGTGAGTCCACGGCGAAACAGATTAGGATCACATGTGAGTCGGGGTATG -AGAGAGGACGGAGACGGTCGTAATCTTCCTGGCCAGCAGTATCCCAAAGAGCAAGCTCGA -CGTGCTTGTTATCAACCTCGACATCGGCGACGTAGTTCTCGAAGACTGTGGGGACGTAGA -CCTGACGGGAACAAAGTCAGATTATGCTTCATTATTCTTCTCGCGGCGTGTGATTCTGGG -CAAAGTAATGTCGTTTCGGGAACCGTGGTTTCGAGGATGGATAGCAGCTTACCTCGGGGA -AGGTACCCTTGGAGAAAACACTGTGAACTTCAGTCAGACCGCCAGACCAAAAGATAAAGT -AGAGTTTGGAGGAAGCGCGCTCGATAAGGGAGCTACTTTCGGTACTCACATGAGCAGACA -GGTCTTACCGCAGGCACCATCGCCGACGATGACGAGCTTGCGACGGATCTCGGCCATCTT -GATAAGGAACACGTCAGTATAGGGGCAGATGAGCAGAAGATGCCAGGGGGTCCGGCAAGC -CCTAAACTTTCAAAAATGATGGATTCTACAGGCAGGAGGGGGTAACATAGTCGATCGAGA -AGAAAGGGCCCAAGTCAGGCGCTCCCGGGGAGGGGGGAAAAGGAAAAGGGTGATCCCATA -CAGTGTTCAAAGGCGCAACGGTGTAGTAGATTTAATGTAGTGGGTCAGCTAGTGCAAGCA -GCAATAGCCCTAGAGAAATTAGAAAGATGAAGTAGGTATTTTCGATGGGAAGCGATGAAA -AAAAGACGATGGAATGATGATTTGGAGGTGGAGGAGAGAAAGATAGAAGACCAGGCCAGA -CAACAGAGGGCGGTGGCAGTATACGGGTATTTGTTGTTGCCAGTGTACTCCGTATGGAGT -ACCGGTATTGTGGCTTCAAAGTTTCCCCCTGGACGGTGAAGAGTTATCAACGTATGTGGC -CCTAAAGGTACACCCACTTACAGTACATTGTACAGAGCACAGCTTTTGGTATTGGTATAT -ACATAAGTACGTTACATCTTTACTTCTACAGCACACTCTACCCCCTGAGTTTCAGGCTGC -CTTATTTTACATACTGTGGCTTATACCTCTCTCCCCCTTTGAGACTTTCAGGGAGGAAAT -GAGAACATAATGGTACACAGCCCTAGATATGTGTGTGGCTGAGGCCTGAAAAAAAAATAT -CATCCACATTTCCTGGCCCACTTTGTCCACCCAGCACAGCCACCCGTGGCTATGAAGTTT -GTTATATCCGAGATGAGCACCTAGATAATATGTATGACATGAGTGAGTCCCTATGGTTTT -ATATGCCCTGGCTCCTACTATCGGCTCTTATACTTGGCTCATTCTCTTTGGGGGCCTGAA -AAGTAAAGTCCGTTTAATCCACCTGAAACCTACATTTATGGGGAAACCCGAAGTTAGCCA -GCAGCAACATTTACCGTATGAAATAAAGGGCTATAACAATTTCTTAGCTCCACGTCAAAT -TTCACATACAATGTACCACCCTACCCAGATGTATGAGCTTCTATGTCATACGGGCAGTGC -TCATACGTGGTTACCTCCAAGGCTACATTGTTAACCTCTACCCAAAATGCTTGACTCGGA -CAATGAAAATCGCTCCGCTGGTTCTCTGGATCTCAGATTTATCTAGTGTTCACCGAGAGC -TCTCGGTTTGTACTGACTACTGTATACGTGAAGGGCTCCAATCTGATGCTTTGTGCTGGC -AAACCCGGTCTCGTTTGCTACCGTATCCATAGAGGGCCCGGCTAACTTGAATATATTGTC -AATCCAGGAAATCCAGTCGTGGAAGTTTTGATTCATTGATTCAACCAAATTATCAAAACC -TCACCCCCTTTGCTTCCGAATTGACATCACATTCTTGAGGCTGTTCCAAAGTGGAGCTGA -TGTATTTCCAGCTCCAATGATCGTATTGTACCGGCTAAAAATAGTAAACAGGGGCCGTTA -GGTCGTTTCTTATAAACATAAAATAGGAGAGCATTAACCTTTCAGATTTAATGTGCATGC -TTTACGGAGTAGTATTTCATTGGTTCCGCACCAGCAGAAAATACACACCATATATCGGTG -TTCATTCATCTACCGCTCAATATGGATCAATGTCACAATTCAAATCAACATGTCGGTGGT -CTTGATAATCAACCTAACATACAAACGCAAGTCCGTGATGGATTTCTCTCCTCATGCATG -TCAAGCCAGGTTGAGCGATACACGCCCAGATGTTGAGAACAAGCGAATCAAGAGGGGAAA -AAAGGAATGAGCAAATGCTGCTCACAAATAAGACATTTCAATCCAATCCTAGGTACATTT -TGATCGTTGATCTCGAGGAAAACACCACTTACAATACGAACCTTCGTAGATGAGTGTGCG -CTGCCATTTTCAACCATTCGTCATTCTGAGATACCCATGGATCTTAATTGCACATATGTA -TGCTTGACACAATGTACCTATCTGTATGACAGTACTGCAGTGTCTGTTTTTTATCCCAAC -ATCTCTTGCGCCCAACAAGACTCAACAGTGAATCTCTTTATTCTTTATGCTCGCCTTCCG -TATATCACTGGCCTCCTATGGTCATCCTTTACCATTTGTGCTATGGCGCTCCTAAAGCTA -CGACGCCGTGCTGCTTCCTATCTTCTGTGGTCTCCCAGGGTTCCATAATAAAGCAGCGTC -CAGCAATGATCTCATACCACGTGGCATTGGGGTAAGTGCACATCTTACAACTCATACCTG -CGCTTCTTGATCCTCCGAGGATCGTCGTAAACATGTAAATTCATTTGATCGGCATGCGGA -ACCAAATGAGTTCTGCTGGGAATTGACCGCCTTGCATATATTTTCCATAGGCATTCATAC -GGGAAACAAGACTAGACGGCGGCTTACGAGGCCTTGTGATGTCTTACAGGTCTTTCAGGT -GTTCTTCTTAGATCTCTTTTTTTAAGATAGTTTGTCCTCTTTCCAGTCTTACGAGTAGAC -GGCGAAAAAAAGCCGGAAAGGTATAGTTTGTGCAAGAGGCTTTTCTATGAAAAACCTCTG -ACTGGAAGGCGCTGAACAAAGATGGCTCTACGCATTATCGCGTGGCACTTTCTAACAGCC -GTTGAGGGTTTCCAAACCGTCGGATATATACCTTTGATCATGGTGTTGCCCGAATATCTT -TTTGATATCCGCCTGGCTTCCATGTAGGTTATAGCACTGGAAGTAAGTATCCAGTGGGAT -GTTAAAGATAGCTACTTATCCAAGAATATATCCGTAAGCTTCGGCTTATATTGAGATTGG -GTTTTTTTTGTATTTGTTGGCGTTGTTGGGGCCTTTCGACAGTTCCCGGCCTTCATTGTC -ATTATTCCGTTCGGGATACCTTCGTCCTGCAAATAAAGAGGGTCAACCAAAACTATGTAA -GCTCCAAGGAGAATATTTGAAAGTGCCGTTTGAATCGCCAAGATGTAGTTATAATGCAGG -ATGAGTACTGGTCTCGGCATGCTTGAATGTTTGTACAGAGGGCACCCGGCCGGCGACTGT -TGCTGACATAATGTATATTGACGGATCACGTGCCACCATCGGACACCCTCAATTTTCGAT -CCGCGCATCGGGGATTCTGAGGCAGAAACAACGTTCGGCCATTGCTGGACATAACCCCCA -ATATGCAGAGAGTTATCCAGCGGACAGCTTCAGCCCGGAAGCAGGCTCTCAAAAAAACCA -CCAAAGCTCATGAACGACAAGAATTGTTGGACCGTGTTGGGATTCGGCGGGCGCGGAAGG -ACTTCGGCGGCGCACTGTCTACTCAATTCCAAGCAGCACGAAAGAACCGCTGGGAAGATT -GGGAGAAGGGGCCTCTTGCCCCCATGCGAGACTCTGGCCTGAGTCGCACTACATATGGTG -GTCAGGATGCTTCCGTTTTGCACCCGCCGCGTCTCCCTAAGCACGAACAACGGAGGCATG -TCCTCTTCGCTGAGGGTGACCGGGTTTGTGTTATCCGAGGACGGGATCAGGGAAAGATCA -ATGTGATCCAGCAGATCAACCGGGACAGTGAGACTGTCCTCATCAAGGGTATTAATATGG -TATGCTTCTGGCGGATCAGTCACCCAGGTCAAACGACCTTTTAGTTACTAAAACAAATCG -CTCACTGGTTTTCCTCAATTAGGCCGACGTGATCATTCCTGAATGGGCCAAAGACAGATT -GGGACACACAGGCGATACCCAACCACAATCCTTCCCCGTGTCCTTCGACGACATCCGACA -CGTCATCCCGCTCGAAGACACAAAGACTGGAAAATTGCAAAATGTCATTGTGCAGCATGC -CTATGCCGCTGGGCCGTACATTGTCCGACCTGAACACAGCAAGCTCCCAAGATTCACTCG -CTATGTCGCCGGCTTGGACGTCGAAATCCCCTGGCCAATGGAGGAAGAGCCTGCCATCAC -CGAGGGCGATATGGACACAACCCGAATGGCAGTTGAAACCAGCACATTCACTCCCACACT -GGAGCATCCACCTATGCCCTCATCTGTCATTGATGAGCTGCGCAACAAATATTCCAGGTT -CCGCACTCGACACGATCCGGAATACTTGAGAGAGAAGATGAAGGAGGACCTTCGCAATGA -ATACAGAAAGACCGTCTCCATGATGACACCGAAGACAGATGCCAAAAATTTGAGAATTGC -TCAAATTGCGGAAGCGAGAAAGGCGAAGTTGGATGCCAACGGGAATGGGATATTGACACC -TGGTGCTATCAACTTCATTAACGAGCACATCCGAAACGAACGGCGTCTTCAGAAGCCAAA -GAAGTCCACAAAGCCTAAGAGTTCAAAGTCCAAGCAGGCTGCTTGATTGGGCGAAGAAGA -CGGAAGTCAGAGAGGAGGGGTTTTTTTCCTGTAGCATTGTGTACACACACCACTGTATTC -TAGTATGGAGCTTTGCATTCACTCATATATCTATTTCTCGATCAAGTTATTTCACAGATT -TGTATCTTTTGCATTCATAGAGTAGATATGCGTACCTCGTATGGAAAACATGATCCCAAC -TTAGGGTTCCAGTCAATGCGGATACTTCGGGTCCACCCTTACGTTCATCCCCCAATTCTC -CGACTAACCTGCATCTCTCTGCACTGCTGTTCTCTTGAACCAAAGCAGTCGCAACAAACA -TGGCGTTCGCCTCAATCAATTCCACGGGCGCGCCTATTGCCGCCATCGAGGAGGCATTTG -CGACGGAACCCTTGCTGAAGAAGCGAGTCTACGACGCGATCGGTGCGTAGCCAACAACGC -ACCCAAGCCCCCATTCGATGACCTCGTCGACCTCTTGCTGACCGTCGCCGATCGATTTCA -CCGTTTAGGTACAACACCGCAGCATAAACCTCTGCTCGAAGATCTTGCGAAATATACCTC -CAGCCTGCTGGCCAGGACTGCCAATACGACCCTCCCTTCGCGGCCCCCAGCCGCTGATGG -GCCTGCCGCTAAAAAGCGCAAGCTTCAAAATGGAGACACGGGGGAGACTGCGCAGGCGTC -AGTCAACCTGAAAGACGGCAATGCGCCAGTTCAGTTTTATGTTCAAGATGTTTCTTTCTC -CACGCCACAGAGGAAGAAGCTTACGCTCGAGATCACTGCTGGGCACCAGTATCTTCGGGC -AAGGAATCAAGCTACGAGAGAGATTGAATTCGGTGTTTCTATGGATAAAATTCGTGAGTT -TGAACTGAAAAAAAATTATCTAAAAAAAAAAAAGAGAAGAGTTGCTAATACGACCTCTGA -CTAAATCAGGACATGCCCTCTGTCTTCCTGTGCCGGAGAAGACACAAAAACAATTCAATT -TTTGCATCATTCCCGAGTACGCCGATGGCATCACACCCCCGCCCGAGGGAACAGCAGCAT -CCGAAGCTATGGTGTTTACGATAGCTGACGGACCAGCCAAGGCTGCCTTTTCTGGAAACG -GGGAGCAGCTTGGCCATGTTCCTGGTGAATCGGCCGAGGCATTGATTCGCAAAATCCTGA -ATGAGAACATGCCTCACACCAATGTTGTACGCCCTGATGATAAACAGTTTGTCAGTGCTA -TGCCTGAGGCGCACCGAAAGGGCGAGAAAGCATATCATGTCAAGGCATTCCGCGGTAGCA -AAGAAGGTATGATACGTCGCTTCCACCTCTCCACCAGAAGTCATCAAAGTTTTGGTTTTA -GTCTTGATCATTCTAACATTGTGTGACTTGCGCCAGGCTATCTTTTCCTTCTCTCAACGG -GTATCTTATTTGCGTTCAAGAAACCGCTTCTGTTCTTTTCATTTGCTACTATTGACTCGG -TCTCGTATACATCCGTCCTCCAGCGGACATTCAATCTCAATGTCATGGCTCGCCCGACTA -ATGGGTCCGAAGAAGACATTCAGGAGTTTGAATTTTCGATGATTGACCAGGATAATTTCT -CCGGGATCGATGCTTACATCAAAAGACATGGTCTCCAAGATGCTAGTCTCGCAGAAGCGC -GGCGCGCCAAGGTCTACAATGTCAATAAAGCTGGGGGAGAGGACGCAGCGGCCCCCGCCG -CCGAGGCAGCCGGCGAAGATGAAAGCGAGCTCCAGAAAGCGCAACGGGAGCTTGAAGATC -AAGAGGATGAAGAGGAAGAAGACTACAATCCAGACAGTGACGATAGTGAAGGCAGCGGAT -CCAGCAGTGAAGAGGATGACGATGAGGACGATGAAGAAGACGAGGATGCCGGCCACGACA -GTGACGAAGACATGGGTCTGGTTGAAAATGAACTTGGGAGCGAAGCTGAGGAAATTCCCA -AAGACCATTAGGATGCAACACGGAGCCGTCACAATACCCGATGAATGTGATTTGTATAGT -CGATCTATCTAGACATGTCTATGGAGTAGGGCCTATAATTGATAGAGACATTTATCACAA -AAAGCTACGATATACTTTATAGAGTCAATCGATGACCAGGTTCCTGCCAAGACGGTAATA -TTACCTTAACCGCATTCCAGACCGATTCCAGACGGGATCATCCCCTGAAAGGGAAAAAAA -AAATGAATCGGCTCTATGTTCGGGCTCTACAGTCGGGCTCCCTGATTCTGATTTCTTTCT -TTCTCCAAATTAAGAACACTTGCTCCTACCCGTGGACATAGCAGATATTTCCCCCCTGAT -ATTTCCGGTACCTGATCAATGCAGCCCTGGAGATTGAGCCGCCTCGGAAGCGCCCAGGTC -TGAGCTAGATACAAAGTAGCTAGATAGGAATATTCAACCAATTTGGTGGCTTTCTTTGGT -CTCCAGATTCATTGAATCAGGGCACGCCTTTAATATTTTTAATATGAGCGAGAGTAGTTG -AACAATGGAGACGATAGCGTCTCATAGGTCTCAAAGATTTCCCCCCAACCCCAAAGTTAC -CATACAGTCACTGTACACTCGTGACCGCCAGTCCCTACGCTCACACTCCAAGGCGTCTTT -TACCAATACACACCCAATACCCTCCGCTCCCTGTCTCTCCCTGTTGCTTCCCTTTCTCTT -CTCCATCTCCCCCTTCTTCATCTGGTCTATCTTTGAACCACTGCTCAGTGCGGCTCGTAC -TCGCAGTAGGTCGACCCTATCTGTTTTGTTTTCTCCCTTTTTTCACCCTTTTCATTGGCC -TGTCTAGAACCTCCCCGGCTTCCAACCCCAGCTCGTCACTCCCGACCGTCGAAAAGGGAC -CCACGAACGTCTCAATCCTTGAGCACCGCCAATTCCACCTGCATTTGGTTGTTTCGGCTT -GATACCTCTCTACGTCCCTTTGTTCCTGTCCCGATTGTGCATTCCCGTCTAGCCTGTCTC -CTATCATCGTCACCGGTACATCGGCAATTGAGCAAATAGATCCCATTGAGCCTGTTATAG -CTGCTCCGTTCGGAAAAAATAGAGGTCATTGTGATGTATGTGACTGGACTCTTTGATGTC -GCTTTCATCCTTGCCTGACTTCGTGTGTGCTGACTGCGCTCAGTGATAGTGTCAATCACG -TGTCTACGTCCCTGAAGCCATTCCCCCGTGAGCCAAGGTTTCCTTTCCCCGGGTTGCAAT -CTTCACTCCCGCATCCATGGCCTCCTCATCGTCAAACGTAGTGGGTGTCCACTACCGTGT -GGGTAAGAAGATCGGCGAAGGATCCTTTGGTGTCATCTTTGAGGGCACCAACCTCCTGAA -CAATCAGCAGGTCGCCATCAAATTCGTACGTTCCCCTTCTCCTTCACGACGAGCCCCATG -GCTGACCTGAAATCAGGAACCCCGGAAAAGCGATGCTCCCCAGCTCCGCGATGAGTATCG -GACATACAAGATTCTGGTTGGATGCCGTAAGTATTCCAGATCTAGGGAGAAATGATGTGC -GCTCTTCTAACTTCATTCTCAGCCGGCATCCCCAATGTCTACTATTTCGGACAGGAAGGT -CTACACAACATCCTCGTGATCGATCTCCTTGGTCCTTCCCTGGAGGATCTCTTTGATCAC -TGCAACCGCCGCTTCTCCACGAAGACTGTCGTAATGGTAGCCAAACAGATGGTACGTTTT -CCCCGCTCGAACTTGCAAGAACCATGGCTAACTGTGACCAGCTTTCGCGTGTTCAAACGA -TTCACGAGAAGAACCTCATCTATCGCGATATCAAGCCAGATAACTTCCTTATTGGCCGGC -CATCAACCAAGGCTGCCAACGTTATCCACGTGGTTGATTTTGGTATGGCCAAGCAATACC -GTGACCCTAAAACCAAGCAACACATCCCTTACCGCGAGCGTAAATCCTTGTCCGGTACTG -CTCGTTACATGAGTATCAACACACATTTGGGCCGTGAGCAATCTCGGCGTGATGATTTGG -AAGCTCTCGGTCATGTCTTCATGTACTTTTTGAGAGGTGGCCTTCCCTGGCAGGGCCTGA -AGGCTGCTACCAACAAGCAAAAATACGAGAAAATTGGCGAGAAGAAACAAACGACTGCAA -TCAAGGATCTCTGCGATAGCTATCCAGGTACGTTATCTCACTCTGATCCATGCCCATATT -TCATCTGCTAATCGCTCCGTCAGAAGAATTCAACAAGTACCTGAGCTACGTGCGCAACCT -TGGTTTCGAGGACACTCCTGACTACGATTATCTCCGTGATATCTTGACACAAGCTCTCAA -GAACGCCGGCGAGGTAGAGGATGGCGAGTACGACTGGATGAAGCTCAACAACGGACGCGG -CTGGGACTACAAGTCATACTCATCTCAGGCACATCTCCACAACCAGCACCAGGGCTCGTC -CGGTCGTGACCTGCACGCAAGAGAGCTCCGCAACAGCCAACGTCCCGGAGTGACAGCTGA -CCGTTTAAACGCTGCGCAGCCCCCGCCTCCTTCGCCAGCAGCCAAAGTTGGAGCTGGGAA -GAATCGCGAGCGCCAAAACGTCGGCGGTGTGGCCGCCAAGCGCCAGAGCGGGGGGTTGGA -AGCATCGACTCCGGCTGCGTCAACCCAAGCCCAGTTTCAAAACTCCAATGCCCACCTCCC -CGGTCGTAACGGAAGTCCAGGGAACCCGGCGTTGAGTAACCAACAGCCCCTTGGTGCCCC -AGGTACCCCTGAATCGCCGCCCACTTTCACGCAGAAGTTGATGAAGGCTCTGTGCTGCGG -TAGGTGATTCGGCTGTACTGTATCATTTGAGCGCCCAGCTCATCTCAGGATCCAGGTTGA -ATTTCCAGTCAAATGAAGCGATGGCGCATTTAGACCTACCGCGCCATGCTTTCACATCTG -CTTCCTGCCCTTCATGTCAGAAAATGGCCTCAGGAGGTCTTTATGGGATTTCTTTCTTTT -TATTCTTGTTTACTTTCACCTCTCGCTCTTTTCTTGCCTTGTGTTCAAGTGGCTAGCGCC -GCTTCGCCGAGCAGGAATCGTGCTTCTCGGCCTATATCACCTTCTGGCCTTACTCTTTTT -TTTTTGTCCCATTCGATCCTTCAGCCTATACAGTTGACTGTGGGTAGGAACAACCTTTCT -CTCATTGATGTTACGTCTCATCACTCTTTCATGGTGCTACGGCTCGGAACGACGAACTTG -TGAGATCTTTTGTCTTGTGTTTTCGACTTCGCTTCGTCTCGTCACTATATTCGGCGTGCA -CCGTTTAATATCAACCCTTGGTCTTTCAAATGGGCAGGTACTCATCGGTTGAACCTCCGA -TGCCATTGACTCGTGGCGCGCCGCCCTGGGACCTTGTTTCTTGCGCTGGCTCGGGAGACA -AACAACTTGCATCAATGGCTTTTACCCACGCGAGAATTGGCAAGACTCAATGACCCAAAT -ATGCCTTGATGAACCGAGCTGTGAGAGATGGCAATCAGAATCGATCTATTGGTGTTAGGC -CATGCTTGTCCATCTCTCTAGTACACAGCTCTCGGTCTCATTTCGTCCTGTTCATCAACT -GGACTAGCATACCAGACAACTCTCGTTTCTATTTGGGTGCTCTATCTCCTCTGGGCCGTT -TATCATTGCCCTGCCCTTCTTGCTATTTTTTCCCATCCTGTGTGCATCCGGGGGGGAGGG -AAGGCCTTTCCACTGCGCGATCGTCATTATTGCACGCAGAACCTAGACGAAGAGCATATT -GCATATATTCCCGTTCCCTTTGCCGTTTTCCCTTTCCATCAACCGCCTGCTCTGCCCCTT -TGATTGTTCCAAAGATCTATTGTCCATGTCTATAAttgtcttttgtttgttcttgtcttt -tcttgctctgttTCAGAAGCGAGTTTTTGTTACTGGTAGTCGTGGCATAAGATAGCATTC -TTCTTTTTTTTTTCATTTGGTTTGTTTTTACAAGTCTTCTGTGGTTATTCGTATGGCGTA -TATAAATAATTCCTGTAATAAGAGCTATTGCATTTTGATATCCTCCCTTGTCTAGCCTAG -CAATCTAGATGTATATCTAGTTCCAGGGTTACCCTTGCCATTCGAGATTGTCTGAGGTGC -TGCCACGGTTTGCAAATCAGATCAATTCATATGTGCATTTCCACTGCCATCTACTCTACG -GATATATGCTACACTAGGAAACGGTCTTTGGTGGGCTTCGATCATAACCCGCATATACAG -ATGGTCATATCACGTTGACACTGCGGATTTGAGGCATATGTTTCTCGGTAGCAAACCAGT -AGTGGCAAATGTTTGGAGTCATTGTCAATTGCCTTGCATCTAGTAAAGATGTATGTCCAG -AGTACCATTGAAGCAACAGCACAGCCTCTCCACCATTTCAACTCCTTCGAGCTACACACA -TGTATATCCAACCGTTGCCACCCGAGTTGTATTTACCTTTCAATACTATTGGTCCCTTGT -CGACCTCTTCAATTCGCCACTTCTCTGCTGGTCCTGGCAGTCACAAGCTTTCGACGGACA -CGACAAGATTGCGCAGCCACAACCCCGTAGCCATTTCTGCAGCTCTACATGAGACGCTAA -TGCAAGGATCACCAGTCCGTGGCATGTCATAGCATGACGTAGGTTACCCAACAGAGTCAC -CTTGGACGGGGTAGTCCAGCAGAGACAAGAGATCCAGTATCTGCTCCCAACGAGCATGTC -TCGACAACCCTTGAAAGATCAGTCCCCTATTCGGTGCAGAATAGGTGCATGGGAAATGTA -AAGCAACGTAACCCCAATCCCCGGCATGTCCATCTTGAGCCTAGCAAAGAATGTGTCAAT -ACCCATCATATGATTCTCAGGACTACAACATACAACTTACAACTTACGTTTGTCAACAGT -GTCGATCATCGATCATCCCCGGACTCTGCAGAGATACACATGAACTCTAAATGTCCAAAG -TGTCAAATCTATGTCCCTTGTTCTAGAAAGAGTATGGGGGTGAATATGGAGTATGGAGCA -CTGTTGTATATTGGACTGCATCCAGACATCCACCCCTAATAGGAGTACACACGCCCTTTT -CCAAGTTCACGGATATGTATCTCCCCTCTCGTACTTTACCTCGAACCAATAACAATCAAT -AATCCCGAAAGTCAGATGGCGCAGTCATTAGCCAATGATATGGAATGCCACCTGCACCGC -AGTGAATACGGATATGCACCAGCCACCCCAGCTTTACAGCCAAGACGATCCGCAGGATCC -AGGATCCTACCCCTAGAAGAGATAGGACCTCTCGTTCAGAACTGCTTTCAGTCAGGTAGA -GTCTGGGTAGGACCAGCTAGCGGGAGTTCTTATAGGCTATGTATTTATATATATATTTCT -ATACACACCCATAAATAGAGACATGAAGGTCAATGTCCAAATGTGCAGATTCCCCCGCAT -TTCTCTCCCCGCCTGGAACCTTCCAGGCAGCCCAAtttctttttttttttttttcttttt -ttgctttttGGTGCAGGGAGCTTCCTGCGGGGGCTTAGGTGCTCGAggatatgtgattgg -taggattgtggattgttattggagcttttggatGTTGCTGTTTGGGGGGTCTTTCCTATA -TGGTATACTGAGGTCCCATGTCTGTATAAGGTCCCAGGTCTACAAACCAGGGTACATTAC -TATCTTTGCACGTTATTGTATGGAATGTATTGAGCTATTGACTCCCTGCATATGATGCGG -AAGATCTTGATATATATGAGGGATGAAAATGCCGATGTTCGGGATTCAGACCGTATGTAG -ATATATACCTCCCAATCTAACAACATATACCACAAGTCAAACGCAATCCTCCAGGCGCGT -AACATGAAACGGCTCACACCATTCCGAGGTCCGCCGGAGACCGGAGCAAAGAGGTCACCT -TGTGCTTATTGCTTATCTGCGGCTGAGAATCCCCCAATGGTCGCCTAAAGCCTACGGTAG -GTAGCCTAGACCAGATTAGTATTCCCCTAACCGTAAGATGTTTCACGTAGCGCTGGCCAA -GTCCAAGAATTACCTGCACGGCCTTGAGCTCCTAAAATAAGTCTCAACTTGATTCTCTGC -CAATTCAGTCTAGTTGAATTTTTTTCTTTCCATAATCAAGATTTCAGTTGAAACAGTGGA -GAATGGAGAATGGAGCAACAAATGAGGTCATTCGCGGCTTCTGCCGTCTGATCGACTACA -CCGTGCTGTTTTGGGTGGTCCGGAGGGCTTTTCCCTGTAATTTTACTTCTTTTAGTCCTG -TTAGTCCGGTTTAGCTCCAAATACTTTCGTGTCAAGTCAAGGCTAGAAGGGGGCTCGTGT -GGAGTGCTACTGGTCTGGAACGTTGGGTATATGATATTCTCTTCCAAATCAGAGATCATC -CCGTGCGGATCGGCAATCTTTGTCCTTTCCCTTACGATATGGTACTGACTCGTTGCTTCG -CGGCAATGGCTAAAGTAACCTGTTTCACTCTGCACATTAGAGAAGCTAAATTAAAGATTG -AAGATTGAAGATCGAAGGTTTCAGGGGTAGATCCATGTTATATTTTCTCCTTGCTTATAG -CTTGACTATCACTTTGCTTTTACCATGCCGTACAACACAGCCCTATTACATCGAATACGT -AATATGATCATGGAGTACGGAGTATACAGTACGGAGGATATCATACACAGCTATCGGCTG -GAAAGACCGAGAAATAGCACCCTGTGCACGGGGTATAGCAAGCTACAAGGGTATATAGTA -TTCTCGTCAAGGATCGTGCAGGTACCAATGTCAACATGCGACCACGGACCCCTCATTCTG -GCGATGGAAACATCCCCCTGAAATCTGTAAATACACATCTGAAGAAACCGGAGTGCGCCG -AATACCCCATACGAAATAAAGAAATAAAAGGCGCAAGGGAATGGGAAAATTAAAAGCCTC -ACCCCCTTCCCACTAGCAAAGAATTCCCCGCCAGGACTAGGACCTACAGTAGCGCTAATG -GAAATGTACCCCATACATACCCATTGAACAAAAAGAGAAGAGTCACATAGATCCACTTCT -CGCTAAACCGTTCTAGACGCCTTGTTCCATGCGAAAGGGGAAAAAAAGCCAACGCGGTCA -CGGTAAGGGCGCACCTCGATCCCGTGCGCACCGATGTGGGGGTACGTTGCTCGGACATTG -GGGAACGGGGGGTGGATCTCTTTTCCTTTTTTTCCCATCGAATATTGGGAGTGTTAGGCG -CTTGGTGCTTTCAGGCATTGGGATATGTTAGGCACTAGTATCCCTGTCCCGGGACACGGG -CTTATCTTTGATACGCAGGCTCCGAGGTTGGGTCTAACTTGGTCTTTGCGAGTCCTGAGT -ACAACATAGACGCGTGGTTTACAATTGCTGGATAGCTGCGGTAGTGTTCTCGTGTCGAGG -TGTTATTGGGGTTGGCATGCATTTTGCTTTTTACTGCATTGCATGTTATCACGTATGGTT -GATGCCGGTCGGTTTACCTATCTAGCTGCATGTACGGAGTAGTACGGATAATGTTCTCTT -GTTCGATTGCACAGCATATTCTGATGCCAATTGGCAGTTCTAACGGTCTGTGCCTCTTGT -AGAAAGCAACAATGACTTTTGCTATCTGACCGTCGATGTTGTGATAATATACTCCGTGAT -GTTATGCGACGACTGTGAGATTGAGATGGCGAATCGCAGGATCCATAGTCCAATTAGCAT -TGAAATAACATACGTGTTCTGGCATACAATTGTGTATATATTGTACATTCTTCCAAGGTT -GTCAAATCCTTTAAGCGAACCAAGCTTCCCAGAGCCATGTTCACCAGTAACCTTATCCCG -GCCTAGAGCTTTGTAGACAAAAGTCTATGACATCTATGTCTGAATCATGTCATGTTGGAC -GGAGTAATCCGTATGATAACACTTAATCTTCTTATCAAGTGGATCGATATTGTACACCGG -AGATGCCTGAAGCCCTCCGAACAGATCTCTTCGGTCTTGGCTCTTGAGTCGGCAGTATCA -TCCACTTTTATAAGTGAGCCCATCATTTCGGCCGAAGATTAACTCTCTAGAAAGGTATGT -TGGGCATCTTCATATCAAGTGTGAAAGTGTTTTCTTGGCATAGCATGGCGCTCTCCTCAG -AGAACCTGAATCATAGAGTGGCGACACCCGGATCGACGAAAAAAACTCTGATTCCACGGG -TGACTGGGTGTGGAACTTCCAAGGTTGTGTCCGTTCAGGGGTGGTCAGATCATTTTAATT -TATTTTCTGCTCTCTACGGATGTGGCGATCGAGTGGATTGTGCTTCGGGGGTTTCTAGAA -TTCTAGAAGGTCTAGGCAGTTCCGCATGCAGTCTGAATTGAATCCTTTCGGTATGTTGTC -GAACTATGAAAATTCTGTCTATTTACTCGGAGTACTCCGACTATATTTTTCTTTCATACG -ATTCGCCTCCACCAAAGCAAGGCTATGTTTTCTTGGAATCCCTGTGACGCTCAGTCCGAG -GAAAGCTTGTGTGACATTTCAGTGTACAGATGTGTACGGAGTACGGATGTTGAAGAACGG -CATGATCGACAGGGCCAGATTGGCATTGAATACAAGCCCTCACTAGTGTGAGATTGGATA -TTTGATACGGTTGACTTTCCCAGATATAACCTTGGACAGGGGAGAGAGGTTCCAAGGTTA -GAATGACGTGCAAGACAATGTAAGGGAGGGAATGTCGTAACACGGCCTTATCGTGTAATT -TATTACACATAGAACCGTGGGTCGAGGATTGACATTCCGGTCTCGGATGGCGTGCTAGCG -GCCGGGTGATAAGAGGCTATCGTGGATTAGGAAGTAGATACGGATAGAATTGATATCGGA -AAGATGAAGTCGAAGATGAATGTATATTGTAGAGAATATCAAAAGTGCTCGAGTTCCTCG -TGTTCGGTGGAGCTTCAGGGGAATGAAAATTTGAAGTCGCCACGTTGGGCGCATTGGTCG -TTTTTCCAGAACCCATGGAAGCTTACACGTATGACTTCTGGAAGGATTGTGGAGTGATCA -AAACAAGGGCCCCGTCAAGTTACATAATACTAAATCGTTCAAGGCATACGACAGACGGGA -GTACAATGGTAGAGAGTCTCATTCTAACCTCAACACTCTAGGCTACATTTTCCAACACAT -CCCTTGCAGATCGTAGGCGCTTGTTCAAACGTGAGAGTTCAGCATGTTTCCATACTAAGA -CAAAAGGGAACACCAATCAGGAAATTCCATGAAGAAAAGAATTCAAAAAGCCGAATCGAG -AAATAGAAAAGAGAAAAAGAGAAGAAAAATGGAGAAAAATAGAGAAAAAGAAAAAGAAAA -AAAGAAAAAGGAAAAACAATTATGACATCTTATCGAAAGAAGACTCCACTTGGGCGCAGA -AACTAGTTTAGCGTCACCACGCAGATCCACTTCGATCCACTGGAGTCTACTGAGGTGCCC -CCCCCCGTAACGATTACCCAAGTATCGTCATGAATCATTCCAGAATCTTTACCCTTTTTT -CATTCTTAGTGTTAACATTTCCCGCCTTTTCACTCACTATCCGTTCTATATCGCTCTCCT -CTGTCCTTAACCTCTGACATACCCCTGTTACCCCAATCCTATATGCCTGTATCCCGATAA -CCCGCCGTTCTTTTAACAAGCTGGAGCCTCGTCCCACACGCAGGGGACGACCGGTGAGAA -AAAAGCCGATTAAGGGTCCCCGACGCACACTATACGCTATGCTTAAAAAGAAGACGTTGG -CACCGTCCCTCTTCTTCCGGAACCACACTCCTACCGCAACCCCCGAATTGTCCCCCACTT -CCTCAGACAGCGACTCCGAAGAGGACATGGAATCATCTGGCTCCCGGCCGGCCAGCCTGG -CTATCTCGTCGGGGGCCTTCTCTATGCGTCCGACGCTCGATGAGGTGCTGGCCAACATTG -CTCCACCCCCATACACCTTGAGCGCCTTCATGGCCTACCTCTCCCAAAATCATTGTCTCG -AAACCTTGGAGTTCACTATGGAGGCCAACCGATACCGGGATTCTTACTACGCACTGTCCG -AACGACTCGGCCACGCCGCTTTAGAATCCGAATGCTCGGATACTCAGCACCTGCAGATGC -TTTGGAAACGCCTTTTGACCGCCTACATCTTTCCGGGGTCGCCTCGCGAGATCAACCTGT -CGAGTGAGGTCCGAGACGCACTTTTGCAGTACAAGAACGTGTCCGCACCCCCCGTGCCTG -AGACGTTGGATTCCGCCGTGAAGCGTATTCACGATTTGATGGAGGAATCTATCTTCCTCC -CTTTCGTCAACAGCCACACCACCTCCCCCTCGATGGGACCCTCCGAACCTTTCATGGGAC -CCGACGACTTCATGAGTCTATCATCCACGAGCCTGGACGAGCACCCGATGAAGAGGGTGC -GGTCGCGCGTGTCGAAGCGCATCTCCCCGCAGTCCTCCACCAAGGATCTTCCCGCAACGG -GTCACAGCCGCTCAACCATGTCTCTTGGCGCCGTGCATGCCATCGGCAAACGCACTTCCG -GAGCCCTTCTCAGCACCAGCGGGGAGTCATCCTACCTCTCTCTGACCGACGACTCTTCCT -CCCCGCAGTCCAGTCCCACCGCCGAGGAGCCCATGACGCCGCCCACCACTCCACCTGCTA -GCGAGCCGCATCTGCCTATTCAAAGCCCCAGACTTCGTACCGAGAACCCTTGGAAGAAGA -TGGGTATGAAGCTTGGCTTTAAGAAGCGTTCCACTGGAGGCAGCAGTGGCGGTCCCCGCG -ATCCCAAGGTTCTTGGTCTTGACGACTAAATACCATTTCCTTTTCAACCACTTTTGTCGA -CATACAAAATTTACTCCTGACTCACGACAACCACGACCCTCTCAACGATACTTCTCATCT -CAACAAACCTTGCATAAAATACTTGCATCCCACGACTTCATTCTAGCCTTTGTCGAGCGT -TTATACTGGCGGTGTATGAGAGCATGATGCATTGAAAGCATACGAAACTGGCGTTTTCCT -TGTTATTTTTATTTTCTTATTGGATGTTCCGGCGGAGCATCTTCCATCTTCACTTGTCAT -TTTTTACCTTTTGTTTTGCAGCGGATGGCATCACGGGAAAAAAATCCCTTATATATCATA -TCATGTTCTCATCTATAGATTGGATAGAGGACCTGTTGGCTTTACAGGTCCTCTACACGC -ATGGCGTTTTTGTCGAGACATTGTCTTGGTCTCTCATCTCCCATTGGGATTTCATTTTAT -TCGGAATCAATAGACTTGATACCCCCTCTTCACATCATTTTAcaatagaccaatagacca -atagaccaatagaccaatagagaccaatagaccaatagagaccaatagaccGTAAATGCA -AACCCGTTGGTGTCATTAACTTTGAATCACATACGCAGACCTGCATACATCATGCATTCG -ACCGTTGGCCATTCCAAACTCAACAAATGAAAATAAGACCTTTCTAGCATACGTTCTCAA -CGTGTCTCAATTCATGAAAGATCACACGTCATCAGAACAGCTATCCGAGTATCCTTTCCC -AAACCCGAGACAAAGTGTCTAGACCCTTGTTGATCCTGGTCACACAGGTCGGACCACATT -CAAGACATGTCGGAGAGTCCGGGGAGACATACATATAGACGTCCAAGGGTTATACAGATG -CACACGTAACCCCCGTCCACAAACCATTCGATTATGCAATGTCCGACTCGGATAGATTAT -CCATGTGACTTTCCATTTGCTATAGTTATAAGATAAATTCAGAACGATTCAGATGTAAGA -TGAAGGCAATAATCAACGATAGAAGTGTAAATGAAGTATAAGTGTAAGTATAGATCAATA -TCAAATATCAATCCTCCTCCCACCCCTTGTCTCCAATGCCGCAATATCAATATCCCATCC -AACACCCCTTAACCGAGTCAAGTACCTATCAAACCGAGAATCATTGTTCAGCAAAGCAAG -TGTCCCCTCAACAACAGCAAGAACAACATCGACAGAAGACTCAATCGTCATTCTACCCAA -AACCCTTGCAGCCAATAAAGCATGAACCCACCCCTTCAACTGCTGCCCAACACCGCAGCC -CCGTTTCAGGAGGACAGATGCATCCCATTTGACATCGCCCGCGGTCTCCGTCGGTGCAAG -AAACAGGATGTACATCTCATCTGCGAAGACAGTCACAAGGCGCTGGAGCGGGAGGGTATT -TTTGAATGATCTAGACACGGAACCGCCTAGGAAAGTAGACATCGAGACGCCTATCTGTGC -AGATCCTAGATGATGGGTTCTTATATCCATGTTCGAGTACTGAGTCCACTGTAACGCGCC -GTCTCGGTGGAAGATACGCTCGTATCTCGATACCTGTGCTGGTGTGAGGATTGTCCATTC -GTTTTTGGATAGAGATGTAGGAGGACCATCGATATCGAGTGCTAGTGCTAAACTCTTGTC -CGAGTCTAGGAGGGCCGAGAAGACGATGTTTGCGCGTTGTCGGTTCAGACTTGTCATTTG -GACGGCGCGGACGGCGGCGTAGTTCATGCTTAGATGGAGGGCTAGGAGGAGTAGGAGGAT -GAGCCAGGTTGTAGTGAAGCTGGTTATGTGGGAAATTACTACGGATCCGACCTGTTGGAG -TTGCGCTTAGGGTTGGTTCGTAATGGAGAGGGTTAGAGATGGGCTTACCAGCATGCCGAA -GAGAGAGATAACAGTCTCCTGAGATGAGTCTTTCTATACAATGGAATATAGTATGTTAGC -TCTGCCTACCAACTTCCAAAGACGGTGCATAATGGGATTTGAGATTGGGACATACGGCAT -TAAGCTCTGCGAGATTCCCCCATTTAGCAAAATGAGCACTCAAGCTCGCCTTTGAGCTAC -CTCCGGCTACACCGCAGAGAGCCCTGAGGACACCCGCTGTGCTGAGGACAATCACCCTCG -GCGGTCCGGCGGGAACACCCGGCGAGAGACAGTCGAGGATCATGGCGATATCGTTAAAGA -CGTCAGCTGCTAGTCGGTAGGTTTTGCATTCTGGCTCGAGGGCTGTGCCGATGCGGTGGG -CGAAGCATATCGTTGCAATGCGGCCGGATATATCCTGTAGGATGTGTAGGAGCAGTGCGG -ATGTTGGAGAGGCGTTGGCATTGCCTACGCCGACTCCTGTATGTTGGGTTGAGTGTTAGT -TGGTTGGCCTGTGGTTGAAGTCTGGTCTATAGGGTGGGATATGGACCTTGGAGGACGGCT -CTGGAGGAGAGGAGGCCTGCTATTGAGCTGCAGAAGGCTTGGAGAGAGTCCTATTTGCGG -TGGATTGGTTGTGTTAGATCTGTGTTCCAGATGTATCTTTGATATTGCTTTTGGTATGGC -AGAGCAAGGTAGTTTGAGCCACGAGGTTTCTGTCTCTGGCAACTTGGAATGGATGCTCAC -AAAGATTTGATAACTGGATTTGTTATTAACATTTTTCATGATGAGGAATGCTTCATGCAT -ATCAACAGGCTGCAGTCTGTACTCACGGAAGATAGTCATCGCTGACACTTTGGGGGTACC -CAGAAGGCAGAAAAACATCACTCAGCGAGCTAACTAGGAGCTTGCCTGACCATGAGGTAG -ATGAAGAGCCCTGTTATACTAGGTCAGTTAAGATCGAGGACTTTGAGGTATTATTGTAGC -ATTGCAGGTCTTTCCGAATCATGTCATGGCATCTATATTTGAACAGGCATATGATAAATC -GAGATTGAATGAGAACAATTCATATACTTCATGCAAAAGAGAGTATGTGTGTTCTAGGAG -CAGTTGGTGAAGCTATTGACCCGTTTCCTAGGAGTGACCGGGATATTGGCCTATTCGGCA -TCAAGGACCACACGTGGGAACTTCAGTTGTGTGCCATTTGCTATTTCAACCCGTGCTTGA -AAGATACAGGGAAGTAAGGAATGACCGACAACTTACGATTAGATCTACGCGTCCAGCCGT -TGAGGAAGTGACCGAGGTAGCGACATCTCCCGAGTAGATGTAAGTTGCAGTCGGATATCC -AGCCTCATCCGTCTCCTGGAACGTCAGCCTGTTCGTCTCACTGTGAGAGGTCATGTTCAG -GCCTCGGGATGTGAAAGTAAACAAGGTTGCTCGGATCGAGGTAGAGTCGGGAAGGCACGT -GCTTGAGACCACGTACCTTTCTATTGATATCAATGCTCCGCCTATCAAAATAGCATGTAG -CTCTATCTACATACCTCAGTAGGGATATTCTGGAGTATCATTAAGAAAGCTGAGTCGAGA -AATTCATTTCTTCGAATTGTGCTCACAGAGCCTATGTAACAGGAACCAATTCCTGACCAA -AATCTATGTATAGAACGAACACTCCGCATTTCGACATCAAGACGATACAGCTTCATGTTG -CGCATGGAAATAAAACACCGAGAAAAATCAACCACCAGCGCCGGGTACAAGAAAACACCA -AGAGGGAAAAAAAAAATGGCGGCAAAGAATCACCGCTAGAGAAAGACGTCCTTGTCAATA -GCCTCTGGCCATCACAAGGAACAAGGTATAAGGTGAGAGTTTCTGGTCTCGGAAACTCTC -GGAACGAGTGAGCAAGCATCAGGTCTGTCTCCGACCGCGAGGGCATTGGCAATGGAACGG -TCGGCCAAAGTCAAGGTCATGACCGCGAGTCACCAGCCGAGACGCAGTCGAGAGGACCCA -AATCTGCTCATCATGCAACTTCCACAGGAAGTATCATGCCCCGTTCATACGTTTTGGAAA -GAAAAAAAAAAGAAAATTTTCCCGAGGGAAGGCTTATGCGAACATGCTGGCGCCACCGGT -CTCGATGGCCTCCATCTCAGCCTGAGTCCAGGGGAGACGACCGAAGCGGGAGGGGAGCTC -AGATCGGTCAAAGAACTCGCCGGCCTGGGGCTTGACAGGGCCTAGCGAGGCGCCGGGCTG -GCCGCCGATGGTGCCGTTGAAGGAAGAACGGCCGAGAGGGCCGTGCTGCTGAGCCTTAGC -ACGGTAGGTGGCGAAGGAATCAGGGAGAGAGCCGGTGGGAGAAGCGGGGTGGACGCGAGG -AGAGTGGTCAATGGACTCTGAAATTTGAGGTTAGCTTATGGAAATTTGGAGCTTGACGAT -GGAAATTTCACATACTGGGGACAGAGCGCTTTCCAACGAAGCGAATGAGAGGAGTGCGCG -TTGCGCTGCTGAGGACGGAGGTGACACGCATGGTGGTAGTTATATGGTGGTTATGTGTAA -AGGTGGGAGGTTGGCAAGATCAATTTGACGCGGGAATGTGCGAAATTGAACGGGTCCGGT -AGTATCCGAATCCGACCGAGAGGAGTCACAACAAGAGAGTGAGGTATGGTTTTTGAAGGT -AGTGATAATGCTAGAGTAaagagaaagtgtagagaattaaagaattagagaattgagtga -agaagaggaggggaaaggtgggaaaagcagagaagataagaAGGGAGAGTTACCGCATTG -GGAGCCGATTCGGAACCGGGTTATGTACAGTGTACACGGTATAATCGAGTACATCTACAA -TATTTATTACTACGTGCATCATATTAAAATATGTACACGGTATAACCAAGTACATTGCTT -AATAAAACACAAGTCACGCCATAATCTCAGCGTTTTCTACTTGTACTTTGCATCGGAAAC -GCATCCCCGCGCAAGCCGCCTCTACCTTCCAAGGCCGGCAATCAAGTGAAGCCATATATG -CCCCTCTGATCCTTCCCCCATTTGTGACTCCGGTGAGTTCATTAGGGAGCTCCTCCCCCA -TTTCGGTGCCTTTTCGGTGCCTGACGCCTACTTTTCGCGTTCTTTGAACCTCCAATCCCC -CAATCATCATCCCTGTCTGTGGGCAATCTGCTGTCCTTCCCCATAGACCAATTCAAGCTT -ATTTGTCCACACAGACCTCCCCCCGCAATCGGAGGCAAAAGTCCATTCCAGAATTGTGAC -TGGATAACATCTTGACATCTTGAAGTTATCTGTGCACCGGATTTGATATTCCAGGGTTTC -ACAAGCATCGGAAAAGTCCAAGGTTGGACACTTTGTTGGACACATTGAATACATCGCCAA -GAAAGAGAACCAAACAGACCAAACAGACCAAACAGGATCATTAGACGCTATTAGCAATAC -CAGCGACACATAAAAATAATCTGATCTCTGATAACATTGCGCCACTCAAGCTTGGACCCT -GACTAACGCATCCCGGATTCACCCGCTCCCGTTACCTTACACATATCAATATGCACGAGC -AGTCGCTTTTCGCCTCGGTTCCAGTCCACCAACACCATGAGCTTCTGCAGCAGCTTGCCG -GATTGACGGCCATGCAGCCCCGCCATTGCCTAGAGCGGCGGTTGATCTTCAAAGCCTACC -GTAAACCAGGGTTGGCGGGGCGCACGGGAGCCAGTCAGGACCTTCAACAGGCTGATTTGC -AGCGGCTGAACAAGATGCTCAATGGGAACATGTACTACACACAGGTGGTTGGTCCGGTTT -CCGAAACGGATTTCGGCTCCACTGCCCCCGTCGATCAAGATACTCAAATGGCCGGCATGG -ACGACTCGAAGCCTTCTTCATCTTACGACTATGAACGTCAGCCATGGAAGTTAGAGTTCC -GTGATATCCCGGAAGCTGGCACCCGCTCAGCGGTTACCTCGCGTCTGATGTCCAGCGCTA -CGCTGCCGCGGGGAGATGTCGTGCAAGCTATGAATGCGTGGGGCTATCAGTAAGTGGCGC -CATCGAGTCGATCTTATGATCGCGGGAACTAACCCCGCCTATCTCTAGTTTCACTACTGA -GTATGTGGTTGAAGGTGATATGTTCATCCACAATGACATTGCTATCTTCTTGCATCGCGT -GCTGCACTACCCGGGTGCGGATGAGTTGAATGTGCCCCGGCGGCAGCTCCCTGCTTTGAA -GGACATGACTCTCTTTGAGAAGAGCGGAAGCTATGTGCTGCAGGCCTTTATCATTGTCCA -AGACGGTAGCAACCAGGAGACGATGAAAACTGCCTCGCAGCATCTCTTCGGGTTGCGCGA -ACAGTTAAAATCGGCGGTTCTCCTGGAGCAGGCAGATCGACTGTCTCTTGATACACGGGC -GAAGTAGATGTCCACTCCTACTTTGATTCGTTTCAGCATGCTGGAGACTACGAAATACCC -ATCTCCAGCGATGATATGGCGTTGATTAAAAAAAAGTATACCCACCATAGCAATCCAGTC -CACTGCACATTTCAATTCCTCAGACCCAGAGTGAGAGTAGAGATAGCATGAGCCAAAACT -TACTATAGTTTCTATAAGTAAAACACCAATGCCTATCCTGATGTCCGGGTAGGACATAGC -TGAATAGGGAAAATATCAGCTGCGTCCTACCTAGACGAACACCACGAAAAGGGGAAAGAA -AGTGAAAACTAGAAAATCCACATGCTGGGTTGGAGTTATTCATCCGAGCTCGCCGTGCAT -TTTGGGCCGAAATCATCCATACTTTGTGTCAAGTCGGGTCAAACAGATATGTCTTCATGT -CTGTCTCAAGAGGAGGCCGTCCTTGCTTTTTTCTCACATCATTTTTTTGATCTTATATCT -TATATTTGAATAGCGCACAACTGGCTGTATACCATTCACTTTAAATGTTCAGAACATGTG -ACTTCGTGATTGCTGCCGCGTCCCGGCGCGCGTCCAACACCTCAGAGTCAACTGCAACTC -GCTGCAAAGACCGCTCGAATCAACAACCACATCAAATCTTTCCAAAAACCCTTCACGACG -TTTTCACTCACCCGAACAGTTAGAATGGCCGAGGGGGCTGCCAAGAGAAGCGCCTCCCCC -ACCCAACCCATCTCTCCACCACCATTGCGCCGCAAGGTTGACTCGACGGTAACCAGTGAG -TCTTACTATTCATTAGCATAATGTACCTACTGATATCAATCAATGTAAAGAGAAAACAGC -AGCAAACTTTTTCACTCCAGCCTCGCAAAAGAAGCCCGACCCCATCACATGGCGCGTCAT -TGGCACAAGTGTCATCATCGGAAAGCAGATAACAGGCAAAGCAGGCCCTCCCACTGAAAA -ACAGCGCCGCATTGCTGGATTTGATTTGGTGAGTAGTTAACCCAACACTTGAACAGATAT -CGTCGAGGGAAAGGCCCAACTGACATTAAGATATAGGACTCGACCTTGATCAAAACCAAA -TCGGGCAATGTCTTTCCCAAATCTGCCACTGATTGGCAATGGTGGAACGCCAAAGTTCCG -GGGCGGCTGAAGGAGCTTAATGCTGAAGGGTATGTCTAAATTCGGGGTATAGGATTACTT -GCGTTCACTCGAGCTTACACTTTGACAGCTTCCAAGTTGTGATCTTCTCCAACCAAAAGA -AGATCAGCGTCCAGAAGGATATCAAGGGCGGACGCAGTGACTCTAAAAGTCTGTCAAATT -TCAAAGAAAAGATGACCGCTGTCATGACGGAACTGGATTTCCCCGTCAGCGTTTACGCGG -CCACAACGGACCCTGAATATAGGAAACCCCGTCTCGGGATGTGGCGCGAGTTCCTGGACG -ACTATGATCTTGACGTTGCGGGTGTTGACTTGCCCGCGTCTGTGTTCGTCGGTGATGCGG -CTGGCAGGCCGGGTGATTATTCAGCTGTGGATCGGTAAGATCATCTTCACTCTTAGTAGT -ATGTCGTGACAAAATCTGATTTTGGCTATAGGGGACTTGCTACCAACATCGGTATGCCAT -TCAAGACTCCCGAGGAATTTTTCCTCGGGCAGATCACGGAGCCGGCGACGATGGGCTTTG -ACCCTATGTCCTTTGTAAAGTCGGATCTCGAAGAGCCGGGTATGTGCTGTCTTGTTGCCT -CCAATGAACTTGATATTCCCCGCAAGAAACCCAATTCCGGCAAATTGTTGACAGGTCTTC -ATCGCAGCCAAACCCTTCACTCGCAAGCACCCACTTGAACTTGTTATTTTTTGTGGCAGC -CCAGGTGCCGGGAAGTCCACTTTCTACTGGAATCACCTGGAGCCCCTCGGCTATGAACGA -GTCAACCAAGATCTTCTCAAGACAGTATGTGTTACAACTAACCCGAAATCAAACTGAAAT -CAAATTCTGACGGACAACAGCGTCCGAAATGTCTCAAGGTCGCTCGCGAGCACTTGCAAG -CCAAGAAGTCTGTGGCAGTCGGTACGTCATGGCATTTATTTCCATCTTAGCTTCAGCTTC -ATTTCCAAGCGATCTAACCCGATTACTCAGATAATACGAATGCCGACCCAGAAACACGAT -CTCACTGGACATCTCTCGCAAAAGAGCTGAACATCCCTATCCGTTGCGTACAATTCATAT -CGACGCCAGACCTATGCAAGCACAACAACGCCGTCCGAGCGTCCAACAAAGAACTGGTAT -GTCCTGGCAATATCCCCACCTCCATGTCTGTCTCATCCTTATTCTCATCCCAACCAGAAC -CCCGAATCCCGAACCTCCCTCCCCGGCATCGCCTTTGGCGATTTCGGTCGCCGCTTCCGC -GCACCAACGCTGGACGAGGGCTTCGACGACATCATCCCTGTCGAGTTCCAGTTCCACGGC -AGCGAAGAAGCAAAGGGTCTTTGGGGTCAGTACTGGGTTTGAAACCAGTGACCAAGACTA -GTATATAAGTCTATTACTGATTTTCCGTGGCGTATGCTTACGTCTTCATTGTTCTCATGC -TCATGTTGATCTCTTTAGCTGTATATGTTGAATCTGAAACACGGCTCGTGTCTATAGACC -TATTACCCGTTGATTGGGTTTCACCTGACCTGAAATCGCGAATGTTGATCTCGACTGCGT -CAATGTATGTTTCGGTTCTGTGGTAATCTTAGATCAAAGCATTCTCAGCTCTGAGGTGGA -CCCTGATGGAGGGGTATGAAATGCAAATTTGGGATATATTGGATGTGGTATAACAAAACT -TTCAGCCATGACCTACATACTTCCAGATTTAGCACATAACTATATCATCTACAGAATACA -TACAGATAGGTGTATGTTAGGGGTAGTAAGCGCTCAGACATCGCACTTTAGTACACATAC -ACACATAAAAAACACAAGTATACTCTATACTCCATAATTACCTGAGATACCCAGACCCAG -CAAACGCAATCTGAATCCGAAATAACGACTCTGACTCACAATATCAACGACGGAAGAAAA -AACCGAGCCTATTTGCCAAGCCTCGGCACAGACAGGTACAGGCACCACGACACTCGCACT -AAGCTCAATTGTGCCACTACTGACATGTCTCAAGTGGATACATTGTAAAGCCCACAGGTG -TTCATGTCGGTATACTATACACATTATGTTCAAATGGCGGTAGCTTCACAACGTGTCCGA -CCCACACTACCAAGTCACTCTCCACTGTGTTGTTGTACTGTTGTACACCTACATGTGACT -CGTCCACACGCACTTCCCGCTTTCACAGTCGTCTCCCACGTAGGGATTCTATTGCCGTGT -CATCTCTTCCAATGACATGTACAGTATGGAGTCGACAATGACAAGCACAGTGGGATTATG -TTCAAAGAGAATTGTAATATGGATGTAATTATAGACATGTCCATATGATGATCATATGTG -TAATTGTGCAAGATATGTACATATTGGAAACAAGGATATGGTACTTTTTTCAGATGCTTT -TTTAGCTTTTTGTACTCTGTGGAACAGAGTGGATGGATAGGGTTGAAGTCTATGGGGTAC -AACGGCTGGTAGGCTTGGTGTATGGGGGGTATAGTTATGTGTAATGAAGACATGAATGAG -AGGATGGGTGCATACACCCAGTCAGTAAGGGGGGATAACTCTTCTTTTTCTTGTCAACTG -TTGAAGGGAGTGTGATGTCTTGGGAAAAGTAGACATCAGGATCAAGAAATGGAAAAGATA -ATCCCGGGATAGACAAGACTCGGGATATAGACGAAGGAAAGACATGATTCGGGGCGAGAA -CGGTAGTCGCTGATCCGGATCGTGAAGTCGTAAGAGTCATTATAGATAGTTTCAAGAAGC -TCAAAGAAAGAAATAACCAAAGGAGAGACATTAATCGTAACAGTGCGGGCGATATTGAAG -TCATCAGAAGGCGCGCGGGGAATAATCTATCGTCATTATCGTTGCGGATCGCTTGAAAAA -ATGAGAAATTAATAAGAGAAGAACCTGAGCCCAACGAAATGACTCAAGTCTCAAGGCCGG -GAGACTAAATCCTGGAAAAATCGGAAGAAATTCCTCTTTTTCTCAGAACGGCAGGGACCT -GCAATCTCCGTAGCCGGTGCCACAGTCATTGGGCCCACAATATCCTCATCCCCCATATCG -GGGAACTTGTCGTTGTCATGTAGACGCGGGATACGGAGTTGTTGGCACGGGTGGGTATTA -GACTGAGTGAGCGTGAGAGCAACACCATCAATAGCGCTGAGCTGGTCGAGCATGGTCTCG -TGCTGCGTTGTCCATGCGCGGTTGATCGACTCCCATTTCTCCACGGTGAGCTTGTAAATG -TTCGAAGTGACACCGTAGTGCTCGCCAGTGCGGACGAGATGCTTGGCAAAGTTGCAACGC -GCAATGTCCCAGTCGGCACGTAGCTGCTGGAAATGGCCGAAGTCGCAAACCACCGGGTCG -TAAGGCTCGGGGGTAATGGTGAAATTGGCGTGGCCGAGTTTGTTGGCCCATTCGTCGTTA -GGGTCGGAGAGGACGAGGGGTGTCACGTCGCCTGAGACTGTGCGCACGGTGCTGGCTCGT -GGTCGGTTGAATGAGGCCACGGAGGGCTTGGACGAGAGTGGTGGCGAGGGGAGGATCCCC -ATATACCGTGGGTCAGAGGTCACATTGTAATTATGGTGTGGTTCGTTCATGTCTGCCACA -GAGGGAGACTGTAATGGTGACTGGACGCGTGATGCCGCGACGCTTGTTGGGTCGTCTCCC -ATAAACGGTGGTAGGAGAGGAGTTGTCGCTTTTTGTATATCATTGTCATCCTGGCTGAGA -CTGTTTGCTCGGGCGATATCGATTGGGCGTACGCCAGACTCGCTCGCGCGTTCTTCGAAG -ATCGTCCGCGCAGGAGAGGGGGGAATCGAGTCGACCCGGCTCATGGGCATCGAACCAACG -ACAGGACTTGCCAAGGCGGAGGAGGCCGCCGAGTTTGCGCGTGATCGTAAATCATCCGGG -AAAATATCGCCTTCTGCACCACCGGAGGATCGTTTGGTCTTCCACCGGGCTGACATGGTC -GAAATGTGTGAGTTAATGCGCGAGGCACCCTTTAATGAGACACTGTCCGAGCGTCTTTTT -TTCGGTGATGGAGCAGTCTCCATGCTCGCAACGCTCAGATCGTAGCTGTCTGAGTTATCG -GCGGGAGTAAAGTATGACTCTTCTGCGGATATGCCGTAATCAAAATGTTCTATAGAGAGT -CCGATGTTAGCAACTGTGGTCGCAGGGATTTTTCTTTTGCAGTGGCGATTAACACATACG -GAAATAGTGATCTTCCAACGAGTCACGCGTGCAGTCTTCTTCTTTAACTCCGGTCAAATG -ACTCCGACTGGGACTGTCCATTGAATTTCCCAGACCCGATGCAGAGACGAGGGAAGAGAC -CGAGCTGGAGACCCGGTTGTGGCCTGTCGCAGAGAACAAAGAGATGGCAGGTGAGGCCGG -CGCGGAGGTATTAATAGTCAATGGGGAGTGCGGCTGGCGTGAGTCCGCGTCGAGACTTTC -TCGTGTCGCGTCGATTATTTGAGACAGTTTATACATTTTCAGGTGAGAGCCGCTGAACGG -GGTGGCTGGTATAAGGGGATAGGTTGCAGTTCAGCATTGGATGTGAGGGATGACCACAAG -ATAAAGAGACGATTCAGTTTGAACAAGTCATTCCCTCAGGCTTTGGGCGGAAACCGGTAG -CGGTGTGATGGGTATAATAGTCCGGGAAAGGCGGGAACTGAAGTCTACCAGTTAGCATCC -GATGAGGAAATGCTGATTATCGTCCTGGTGGAAAAACAGTCCAAATGTAGGAAATATAAT -AAAGAAAAACAAAAAAACGAAAAATCAATCTCGAGTGACGTGGACCGACTTGTTTGGTCT -TGTCAACTTGGGCAGGAAAGTGCTTTTTGCTTACAATCGGCAAACGGACTGGCGAACAGG -GAGATCCTGGGTAGTACAGGTTGACAGAAGAGATCGAAAGTGTAGAATTCGAGTTGGACA -AATTCCATCCGATTCTCCAAAAGTTGACTAACACAACAACGGACTAAACTCGCGGGTTTA -AAAATATTTCCCTGGGTTACTCCGCTGAGTTCCGGTGGTTGACACTTTCTTGAACCATGG -CCGGAGAGAGTCTGGAACGCTTCTTTTTTTCTTTTTTATTCTTTTTATTCTTTTACTCTT -TTTCATCTATTTATTTACATCCAACAGCCATGTATATGGACGCATTCCAAAAGAGAGATA -TAAAGCTATCCTAATTGTTCTTCAAGCAGCTACACAGATACGCTGTGTAGCCTCTACAAA -AACCTGTGTGTGCGATCGGCAACCCAAACAATCTAGACTAAAGTGGATTCAAGGAACCAT -TATGATGCCATTTCGACCCAGTCACCCGGTGTTGATTCGCTCTATAGATCATCTTGGATA -CCCTTATCCTTATTTAATCATCTGTTGACTCGGTTATCATTCGGCCCAATGAGGCATACC -CATCCTCTTCGACGGCCTGGTGCATCCACACCCGTTTGGAGACGCTGAGCAACTGCAGAC -AAGGTCCCGTCCCGTAGAAGTCAGAACTTTGTCCGAGTATGACGAGAAGGGTTGCACAGG -GGTTTGGAGGGGTATCGGTTCGAAATCCAAGTGCTAGGTATAGAGTAATATACTTGTGCG -AACTCCGAACGAGACCTGGACTGTGACAGGATGGAGACGGGAGATCCAATCATAGGAATC -ATCGGGATGAAGGGGAAAACGGCAAATCCTTTCACTTTCACTTTCTGGCACACGTCAGGT -CCACATGGCTTGAACCGTACTCGGGGAATTCGGGGAACATTCCATTTTTCCAGAGGCAAA -GAGGAAAGGGTAAGCCCGTACTTCAGGGCAGTTTGGTGTGCCCAGGCAGTTTGGGGGCGT -AGGGCGAGTCTTGGCGCAGCCAAGATTCTCGGCTTTATCAACGGGACTTGGACGAGTCAA -CTTTGTGGTTCCCTTATGTGGTAGACTTCGGGTGACTTTGAGTCTTGTGATATATCAGAT -TCTGCGTCTTCTTTTTCCCCCTGGAGATCTCTCCGATCTTATTGCTCGCTAGAAGTATCC -CTGCAGGTTACTAGGACAGAACAGGCCTGCAGTCCAAAATGACATACTCCGTAGAACTGG -ATCTCTTAGCGGTGGCCAATCACCGCCACTCGGCAAAGGAAAAAAAAAGAACCTAATCGG -GTGGATTGCTGAGGTAATCACCTAATTAGGAAGGCACACTTCCGATCTCGGACTGTTATG -TCATATTCAAACCCCTCCAGGAACACATCCCACATGGCGATATAGATAGTCTTGGACATT -GAGATTCATCTACCCCGTACTCCATATATATTGTATCGTCGAGATATCTAAACTACTCAA -AATTGCCATTGCTGTATCCAATTTCGCATGAGCGAAGGAGAGACCGGGCCGAGGGCTCTC -ATGCTGACACTAGCCTCTCTCGCCCGTCGGACATCCAGCGCCACAGACAATGCACTGTTC -TCGCCTTGGATCCGACGGTAGAAATCCAGAAGCTTGAGTGCGGGCCTCGAGGCAATATCC -GCACTACTAGGGCTAGTGATACTTTCCAGCTCGGCAACCCATGCGGAAAACGGAACCGGC -TCGACTGCATATTGGCTCTGGATAGCAGGGACTAAGGACGACCATGCTGCTCGTTCAGGA -TTCGTGAGATGAAACACGGCCCAGCGCTCTTCGGCCGAGCTCATATGACGTGTGTTGACA -ATCTCCACTATAATATTGGATAGAGTGTCCTAAATAAAGCAAATTAACAAAGTGATCAGC -GGTAGATCTTGGCCTCTTTTTTTTTTTTTGTTCCTCCGGCCCAGTTTTTGAACCTGGTCC -GATATATAGTGCTCGGGATTTCGGCTTACCACTGGCACCCAGTCGACCGCGCTCGAGCCG -AGTCGGTTAGGGATTTTCCCCATCGCTTTCGACGTCGCGATGATCGTCGGTAGCCATTCC -TGCCTATTCCATTGTCCGCGCGTGGTGGTCGGTCCGGCGACCTGTCCGACACGATAAACT -GATGTCGGGATTTGGGATCGTTGCGAGGCTTCGAGACAGATGCGCTCGGCAACATACTTG -GATTCGCCGTAGCCTTGTGGGAGAACCACTGCAATGTCCTCCATCGGTACTTCTGGGACC -AAAGCTCCCATTTCTGGGGTCCAAGCACCTACCGTGCCGATTGAGGAGATGAAGTGAACA -TGAGCGTTGCTACGGCTGCTCAAGCTGAAATCGATAAAGCGTCGCACACCCTGGATGTGG -GTGTCCTCGAAAGAATCGACCGAATGGTTGAAGTCGACTTTCCAGGCGTTGTGGATGATG -GTGTCAACTGAGTCGAGAAGTTCTTCGTACTTGGACTCGTTCAGGCCGAATCGTGGCTCT -CCGAATGAGGCTTGTAGGAACTCGACCTTGTCCTTCCAGTCGTTTGTGTCGAGGTGCAGT -CCTTTCTCTTCGAAGCCTTTCTTCTGCCGTAATTCGGCATCCGAACGATTGAGGCAGTAG -ACTTTAGTGATGGAATCGCTGTTCAGCAGAGAATGCAGTAGATAGGTTCCCAGTGAGCCA -GTGGAACCAGTCAGGATCACAGTCGAAGGCGAATGCAATCCATTATGGGAGTAGAATTCC -CGCGCTGGAAGATCGGACGTATACTTTTCCACCAGTCTCTGGATCTTTTCTATTCGCGGG -GTCTCAGCTTGGGATTCTCCATCTAAGATAGCCATCAGGACTCTAGACAAGCGATCGCCT -GTTGGGTTGCTGTAGATAGTTTGTGCATTGATAACTCCTGCCTTCACATCCGGCCGGCGT -GACTGGATGCTCTTTTGCAGCACCTGAGAGACATGAATGGTCATCAGGGAATCAAGACCG -GCAGAGTAAAAGTCCTGAGTTTCTGCAATCTTGGGTTTCTCCAGGACATGGGTAATAATC -TGGCGAATATACTCTGTAATACTCTCTCGGTCCAAGGTCTCTGGCAAACACTGGTCCAAG -TCATCATCAAGGTCTTCGGCATACAATGCATCAATCTCATTCTCGTAGTCTTGAATCACA -GCCCGGCGCCGCACGCTCCCTTTAGGAGTCGTTTTAAATGGCTTCGTCTTCACTGCGAAC -CTGATTTTGTTCTTCATCACCCGGCCATGACTCGCAACGCCCAGGTTAGCCACTTGCACT -GTCGGCCAGATCTTATCAACCATCACCCTCTCATCCATATCCTGTCCTCTATGACCGGGC -TCGATGAGAAGCCCAGCCTGAAATCTAGACTGGCCGACTACCACCGCGTTTCCAACCAGC -GGGTGTCCTTCAATGATAGTCTCCATCCCAACAGGGTTGAACTTCTCCCCGTTGCTTAGG -ACAATCACATCATCCTTACGCCCATGGAATTTCCACAGTCGCGGATGTGTCGGGTGCCGT -GTATAAAGATCGTTGGTTGGATAAGAATCGATGTCGGGGAATGTGTGGAAAATACTCTTG -AGGTCGCGAGCAATTCCCTGCCGAGGAATCACCATCTCGAATACACCTTCGCCGGCTTCT -TGCATTTCCACGCCGTGAGCTTCGTTCCATTCGATGTAGTTCCAGTCGGCTGGATCCTCC -GGCACCAACGCTGGAATTATGCCGGCCTCACTACTTCCCATCATCATTTTTATCTGCGTA -TACTTGCGTAGTCGCTCTCCGGCCTCGAAAGAAATAGGCGCGCCACCAAAGTACATTGTT -TTGAGTTCCTTCAAACAAGCCAGTGCTTTGTCGGAGTGGCTCATGTCTTCCAAGATCGAG -GGTGGACAGATTGCGGTTGTGGGATGGGTAGTGCGCACCAGCTCTATCATATGTTCGACT -GAGAGTGGCTTGTCTGGCCTAATAAGGGCGGGAAATTCGAACCACACCGAGAGCAAAAGT -GGTACCAGTCCCATTAGATGGAAGATAGGCGTTGTAGCCAGCACCAGGTCGCTTTGATCC -AGATCATAGTACATACTTGGCTGACGGCCTACCGGCCTTGGGAGATAACGATGAAATTCG -ACGGTCATGAAGAAGCCGTGAGTGAGATATACCGGCTTTGGCATGGCTGTAAAAGAAATC -AATGCAGGTTCACAAGACCATGTTGATGGGGTTTACCTGTCGTCCCAGAGCTATGGATGA -TGCACGCTGTGTTATTCTCTGCCTCCGCATAAGACCTGGTATTCGGATAGTGGCGCAGAC -CACTTTCGTCACTGAGTATATTCTCCAGAGTCGGGACTTCAAAAATATCCAATGTCGAAA -CAAGCCCCTGGATTTCCAAGGCTCGTGCCTGGCGCTCCTCGCTGAAGAAGAACTTGGTGC -AGTTTGTCACTTTCAGTAGATGCACATGGGCTTCGTCCGAATTCCTGGTGGAAGGCAAGA -AAGCCTGGAGAGATCAGTCCGACTGTTCTATCTTTAGCTTTAGTTGCTTTATTGTCTTAC -TTGGTATCCAGTCTTTTGGCATGCAATCATAAATACGAAATAGCGCACATCATTTCTCCC -CATATAGGCCAATGTCTCTGGATATTGTGCCGGTCCAATGTTGCTTTCAATCCACCAACA -CGTGAAGTTCACGGCCCGTGCCAGGTCTCTGAATGAAAAGTCTTGGAATCCATCACTTAT -GTCCGATCCCCGTGGAATAACTGCAAATAGTTGGTTGGTACTGGTTTCAGCACGCTCATC -GACCGTTGTCGCAAGCAGTCTTTTCCGATGAGACTCCATGCTCTTGTCTGGATCTCAAAT -TTGTGTTCCGGGAAACAAGTCAATTTGATGCAATAAGATTGCTAGGGGATATCGAATAGA -ACAATGCGATGTGCTGGGAACTATCCATTTCAAGGGGACGTAGAATCCATATTTATACCC -GATATGCATGCATACTTCCAGGAACCACCATTTGCCTTGACTAGATGCTCTTGGCTCAGG -GTGTCACCGAAGTGGCAAGTCAATATTCATTATTGACGTTGTGTACCATATTTTCAGTAT -AGCATATATAGCTCATACATTAGGATAGCAAGTAGCATGAAAGTGTAGAACATTGCTCAT -ATGACACTAGTTCCACTCCCATTTAGACTTTATTCCGCGTGCCGAGCATTACTATGCTTC -GTTCTGGCTTCATAACAATCGATTTCTTCCATCCTCCGGGATCATGATGCCGGACTATGT -TGGAAATATCCAACATAAGCAAGGGGTGGCTATATCTCAAGGCAGGTGGGTCCGACACAG -AATAGGGTTTACCAAAAGATCACTATGTAAATCTGGGATAATTTATGCCGGTAATGATTG -TTCTATGTGTATGCTGTGACCAATTGATCACCTGTGTATGTAGCCATGCATCATGTCAGG -TTCCCAATTCATGTCCCTCATATAGATTCGCGTTACAAGAGATCTTGGTAAAGCGATGAT -CTACAAATGACAATGCATGTCTGTACGATGTAGATATCTGTGGGTATGTTCATGATCCGA -TCTGTGCTTTACTATCAACTGTAACGCTTAACTAAAAATTGTACACAAGCTCTTCTTTTT -TTTTTTAAAGAGTAAGACTATCACCTACTTGCTAGCGGATCTTTTGCATGTAATTACTTA -GATGTAAATCTTGGTAGATAGTATGGAGGTAGTCCACACTTTTTGACCCAAATAATAAAA -TTAACTCTGATGTTCTGGGTCTTTTCCAAAGCCTGCTCGTTCCTGCTTGTTTGTAGATAT -ATATCGCAGATGAAACGTTGGTTTGAGCCTGAGATATACCGACATCCATATATCCACACC -GCTCCCAGTCCTTGTGCAACTCTCTTCAACAATGAGCCGAACACCGATCTATACTTCTTC -ATATCTGAACAAAAATATAGATCTTGGGATGACAAAATACCTGAAAACGATCGGCCTGGC -ATGAACCAGGGCAATTCAAGACAACCAAACAATTAAATAAGACAGGGCTAATATGCAATG -GGTTTGAACTCATTCGATTTTTTATTCTGATCTTAGTCCTCGACTGCTGTTTTGATATTG -ATCCACGTGACAACGGATTTGTTTCCGATCTCGGACATCGGACCTATTCAAGCCTCGCAA -AATAAACTCTCTGGGTGCTGGAGCCATTCAGTTGTTTTGGATACACAACACATAATCATT -GGAACATATACATCATCCAATCAATGAAGTTCTCTTCGTATCTCGCGGCTGCGGCCATCA -TATCCGGTGCTCAGGCTTCTCCGCACCCCGCACCTCAACAGCAACAGCTCCTGGGCGTGC -CATCCACACCGTCCCAGCCCAGCTACAGCCAAGCAGGCCTAGATGATATAATCAGCGATT -CACCTTTACTATCTCTTCACAGAGATTTAGTCAAGATCGAGTCAATCTCTGGCAACGAGC -ACGATGTCGGTCTCTTCGTCGCGCAGTACCTAGAGGCCCGGGACTTCAATGTGGTAAAGC -AGGAGGTGCCCCCAGTGAAAGGACAAGAGGATACCAAGACCCGCTACAACATCTATGCTG -TTCCAAAATCCTACACACAACCTCCCTCAATCCTTCTCACAAGTCACATTGACACCGTCC -CACCCTTCATCCCATACTCCGTGCACCAGCCCACCACCGACTCAACCGTCGACCTAGAGG -ATATCATCCTAGGAGGCCGTGGCTCCGTCGATGCAAAGGGCAGCGTCGCAGCCCAGATCT -TCGCAACCCTCGAAACTCTAGACTCAACCCCAGATGCCAAGCTCGGCCTACTCTTCGTCG -TCGGCGAAGAAATCGGCGGCGACGGCATGAAAGTCTTCTCAAACTCAACCCTCAACGCCA -AAAGCGCTTTCAAAACTATCATTTTCGGCGAACCAACCGAGGCTGCCCTTGTCGCCGGCC -ACAAAGGCATGCTCGGCTTCAAGGTCCTCGCTCACGGCAGCGCCGCACACTCGGGATACC -CCTGGCTGGGCAAGAGCGCCGTCTCTGCGATTCTGCCTGCGCTCTCGCGCGTCGATGTGC -TTGGTGACATTCCCGCCGAAGAGGGCGGTCTACCTGCATCCCCGAAGTACGGACCCACGA -CGCTGAATATTGGCGTCATTCGCGCTGGTGTCGCGACGAATGTTGTTCCTTCTGAGGCGT -GGGCAGATGTTGCTGTTAGACTTGCAGCTGGTACGCCTGCTGAGGCGCGTGATATTATTC -AGCGCGCTGTTGAGAAGGCCGTGAAGGATGTTGAGGCGGAGGTTGTTTTGGACTTTGTTT -CGCATGGTGAATCTTATCCGCCGCAGGATTTGGATGTTGATGTTGATGGGTTCAATGTCA -CTACTGTTAACTATGGTACTGATGTGCCAAATCTGGCTGTTGGTCCTGATGTCAAGCGCT -ACTTGTATGGGCCTGGAAGTATCTTTGTTGCTCATGGTGATAATGAGGCTTTGAGCATCC -GCCAGCTTAAGGATGCTGTTGTTGGTTATAAAACGCTGATTGAGGCTGCGCTGGAGAGAG -AGGCGAAGAGCAAGGTGGCTGTATGATTGATCTATTGTCACATAGTATTTATCTTGATAC -CGGTGTGATATCTAGTGCTGTGTATATATAAGTTCGGCTACTAGGTCTTGCCTCGACGTT -ATCTAAAGTCACTCACTGTTAAAGGTGGTGACCTGGCTATACCATATTATATGTTATCTC -TGTGTCACCAAACACCTATGCAAGGAGTACCGTTTACAGGAGAGTATACTTGGTCATGTA -CAGAAGGTTGGTATACAGATCTAGGCTCCCTCCAAAGGCGGAAGTAAAATCACAGGAAAA -AAGCGCTGTACAATAGACATTAGAGACATCAAAACCATAAAAAACTAAGACTAGAACCAG -CATATCTACTGTCTAGGCGCCCTCTCCCACCCCGTCTTGAGATCCCCGGTAGGCTTGAAC -CGCGGGAATGGATGGGCCTCAGTATTGAAAACGTGATCCTCCAGCGAGATGAAATCACGA -TCCGAGAACAACAAGTAGAAATACTTGAGCGTCTCAGCCATCCAACAGCCCTCCATATTA -TCGCGCTTGTATGAAGGGACTGCATCCGCGTTTCCGAGCGATGTAAATCCCTTAATACGG -AGAACTGGTGCTGTGTTGGTTGACCTAGAAATACGGGAGGTACTCTTCTTCTCGACCACG -GCTGTGTGTTTGATGAAGGACTTGAAGATCTCCCAGCCCCATTGACGGTAGATTTCATCG -CCGGTGATACGGTAGAGATAGAAGAGGGACTCGACCGTTTCCGGGCGTTGTACATTGTGG -CGGTCATTGCGGTGAATATCGATGTCCCCGCGCCAGGACTCGGACGCGTCGCTTTGCGCC -TCTAGAGGCTTAGAGATACCGCGTAGTGCCTCGGGCTTGGATGCGCGGTTAGGGTTCGTC -ACTGGATCGGGGTACTTGTCTGCCATCATCATGGGTGGCGAGTCTAGGACGAAGTGGGAG -ATCTCGGCTGCGAGGCCGGTTGCAGTTGCCTTATGCATTGCCCAGCAGGTCTTCATTAAC -TCCTTAGACATGAGAATCTCGTCCTCCCGCTGTTGCGTCCAATCCGCCGATTTTCGGGCT -TGGGACAATGGCTGACCACCGGTCGCACCAAGCGCGATTGTTCCGGGCATGAAGCAGACC -AGGTGGTCCATCCTAGGTGAGAGGTCCTGGTCTAGGCCCTGGGGCCGCTCGCCGATAATC -ATCAGCTGGGAGTTCTTGGTGAATGAGACCAGGTGCTTGCGGATGCCTTTCAATGCCTCG -TCCCACATCTCTTTGTAGATAGGCTCCTGTCCTGATGTCTGCAGGTATTGCTTGATGAGA -TATTCTAGAGCGAGTCAGCTTGAGGGAACCACGGAGGACCGAGTGTCACCAACATACCAT -AGTAAGAATCTGCCTTATTACCCAGACTAATGCTATCTCCTTTGAATGCACCACTGTCAG -GATGAATGGCAGTCCGGACAAGGCCATCCTGCGGCTTTTGCAGGTCAACCAGCTTCATCG -TTCTCTCAACCAACTTCCAATATTCTGCCTCACCGGTCAGCTTGGCCAAGTACTTGAATT -CAAGCTGCAGCGAGCCAGCCTCGGAAATAGACGTAGCCCCCTCATCAGAATGCACGACAA -CGCCGGCTGAAGAGTTGAGATTGACATTCGACCATGGGATACCACTTGGAGACTCAAAAG -CCCCGAGTAATCGCTCAGCGAGGTCAGCTGCTTTCTCAATGTAAAGATCTTCACCGTCAG -CACCAACATCATCGTCGCTGAGGGGTGCCAAATTGGGGTATTGGGTTGAGAGATAGTGTG -CAGACAGCAAACCACCCAGCATGCGGATAGTTGTCTCAAACGTATTGACATCACGGTCCT -GATCGTAATTCAACGATGTTGAAATCCATTGCCGCGCGCGGTGGACCTTTGAGGTAAGAT -TCATGATCATCAATGTGTCCAGCGAATCAACTATCATCCAACCTAATCCGCCACTTGTGT -CTTCTTGGTTAACGTTTGATACTGGACGATACTGGTCATATCCTGGATAATGGTGTCAGC -ACTCTCATATTACACAAGTCTACTGTATGACCTACCCCATGCGTTCTTTTCATATCCTTC -CCAACTGACGATGAAGGCGTCCCGCACTTTCTCTCGTCTCGCAGTCCAGTCAGACAAGTC -CCCAGATGTTTTTCCTTCTTCCAATTTCTGGGCCCATTTCCATAATTCTACCCCTTTATC -ATTCGACTGTAGCCCTGACCACGTGGGGAGGTAGAATTTATACCACAGCAAAAATAATAC -TGTACATACCCCCAGCACGTTCAGAAACGGCCTCCATCTCCTTCGAGGACCTGTGCGCCG -CGGCGCAAAGAATGGCTTATCTTTATATAAAGGTAGTGTGCGGTCCTTTTCGAAGTAGCC -GTTCACTTTGTTGGATAGGGGGATGGGCTGGTTCGACAACCCATATCCGCGGAGCCCGCT -ACGAGGGTTTCCGTAGAAGCCGTCTTGTTGCGTAGGCGCAAAGGGGCTGCGCGATGGCTG -GTAAGCCATTCTATTATTCGATATTGCGTTGTGCTCCGTGAGGGAATCTCTGCCAGTCTT -GCGATTACCGAAGGCGCCACATAGGAGTGAGGAGTAGATTGAGGTGCTGTAGGAGGATTC -GCTTCTGGTTGGCGTTGATCAAAATTTTGATTAGAGGTTCATTCAGGCGTATCTGATATT -CAAAATACAACAGTTAGTCAGGTTTGATTCTAGGACGAGGAGCTAACCCCTCGGACCAGC -TCTGGAGGACAGCCGCACACTTACCGAAGTATAACCCCTTGCTATGGTGTTTATGATATA -CAGATCTTGATCGCAGCGACAACCTGGTCTTCGGCTCTAAACAAGAAGAAGAAACAGCCA -ACATGCACCGATACAGTCGTGTTTGTTGAGTCGATGATCCGTCAAAAATTTCTCTGCGGA -GATGCGGAGCGCGGAGTTTATCCCGCGTGTCAACAATCTTAAGGCAGATAAAAGCCAAGT -ATCGCAACAAGCATTCGATATGATTCGAGACGAAATTTGCACGAAAACCGAGAACGTGGC -GAGGGCAACTTGAAGGAAGCTGGGGAAGTGGAGAGGCAAGACTAGGGTGTCCGCAAATCA -TATGCCTTTGTACGCGTAACTAGCCTCATTACTCCGTGTAGTCTACAGATATCTACCAAA -CCCAATGGTTGTCAATTCCTTGGGGCTACATTCTTTAACTGTTATTTAAGTAGCTTATGG -TTCCAATATGATATACAAACTCAAAAAATGGAAGAATAGGTCGAATGCTTAGCATTAGCG -CTTCAAGATAAAGATCTAAACCGTGCACCAGATGACGTGGGCCACCAGCCCCCACAAGGT -AGATACAATTACAGCTTCTGATACTCAAGCTTGCCCTCCAGCTTCTTCGCCTCTGCCACT -TTCCGTACAGCCTTGTTGAAATCATCCTGGTTGATAGCGTCACGGTAATCCTTGATCGCG -AACAGACCACTAATAGAGAGTTAGCCAACGAGTATACAACATAGTATGATGTAGTGCACT -CACGCTTCAGTAACCACGTTTCGGAGATCAGCACCGTTAAGACCATCGCTCATCTTAACC -ACACTCTCAAAGTCGATATCGCCTTCCTGTTGGACTGTGCTTGTGTGGATCTTCAAGATC -TCCAGACGACCAACCTCATTGGGAAGAGGAATCTCAATCTTGCGGTCCAGACGACCGGCA -CGCAACAGAGCAGGGTCCAGTGTGTCAGGTCGGTTGGTGGCCATGATGATCTTTGTCTTT -CCGAGGTAGTCGAAACCATCCAGCTGGTTCAGCAACTCCATGAGCGTCCGCTGAATTTCT -CGATCTGCAGATGTGCCCTCTGAGAATCGTCGTCCACCAATGGCATCGATCTCGTCCATG -AAAATAATGCAGGGCTCGTGTTCCTTGGCGTATCCGAACATTTCTCGAATCAGTCGAGCG -GATTCACCAATATATTTGTCCACGATGGCCGATGATACAACTAATGAGAGTAGACAGGTC -AGAATATTGCGGGTGCGAGTGTTCATGTTAAGAAGTTCAACATACCCTTCAAGAAGTTGG -TCTCCATCGAGCTGGCCACTGCTCGTGCAAGCAGCGTTTTACCAGTACCGGGAGGGCCGT -AGAGCAGAACACCCTTCGGAGGCTTAATTCCAACTCTGAGGAAGAGTTCCGGATTCTTCA -AGGGCAATTCGATGACCTCACGGAGCTCTCTGATCTGGTCGTTAAGACCACCAATACCCG -CGAAGTTGACGGACCCTGGATCTTCCAGAGACATGTTGTACACCATAGGATCAACTTCAC -GAGGCAGCATTCGCATGATGGTGAGTGTTGTCATATCCAAGGCCACACGTGTGCCTTGCT -TCATCTTGGATTTATCCACCTTTGAACGACAACCGACCACATATCGAGGACCCGATGACG -CCTTGACGATGACTGCAAAGCGGATTAGCTGCTTGATGCGATCGTTGAAGCGAAGAGCTT -GAGTTTCTCTGATACTTACATCGCTCTTCGTCGAGCTGCTTTAGCACCTCGCCGATGATC -TGACCAACACTTTGTAGTGCCTTGATGTTATCCTCGGAGACATCAAACTCTCTTTGAAGA -TCCTTGATACCCAAACGAAGGGTTTTCAACTTGGCTTCCCATTCTCGTAGCTCGAGCAAG -CTCTTTTTATAGTCCTCGAGCGCCTGGCTGCGCTCCGGGTCGCTCATGATGAAGGACGGA -TAGATTCACCGAGTGTGGACAAGTGCGGATGGAAGCCAAAGAGGAACTGTAATTCAGATG -TGAATTCAGGGTTTCGTGTCAGGTCGTAGATGCGAAATGGATGGATATCGTGATTGAAAA -GGGCGACAGCACCTTGCCTGGGGAGCTTCCAGCTTCCAGCGCGGCGTCCATGAGAGACGG -TCACATGACCTAAAGCTGCCTTATGTCATTATCTTGGTCATCAGCGGACACCTGAGGCAG -AATATTAGGAACAGTATACAACACAGGGCCATGAGATGAATAGAATGGTTCCATGCTACC -TTTGTATAAGACAAAAGACTACACAATTGCTGAAATTATTGAAACACGGTTCACTTTCCA -AGTTCAGTCTATATTTGAGCTCTTCCCTTAGTTGCTTTGCGCCTTTCTTTCTCCATTTGC -TCTCTATTTATACCCTACTTACAGCGTTTCATTTTATTTATAACCATGGACCTCTTGAGA -GTAGTTGAGATGCTCAAGTCAAAGTCAGTGAGTTAGTTAAGTATACGGGACATATACACA -GTGTAAATTTAGGCTAAGGCCGTTGATGCAAGTTCCCATGTTCTGTCAACGACGGGGTTT -ATAGCATTAGGAGAGAAAGTATAGTCTTGCCCAGTTGGGACAGTGGTCATGGTGCATTGC -CATTATTTACTCCCAAGATCATGATCAGAGAATGGGCTTTTCTTTCTGTTTCTTTCTGCT -TCAATGAAACAGTCCGGAGAGGAAGATTATCCGAGCACGTTCCGAGAATGGTTGGCCTCA -ACAACAAGGCACTAACAGCTGTCTTCGTCCGAATTAGAACGGAAGAATGCTACGTAAGAC -GAGCACTGTTTGATTGCTTTAAATAATTTCTCTCTGGTAACATTGGCCAATATAGTAAGG -GGTTCTACAGCTCAAATGAAATGCGTATGTTGTAATGGAGGGGTAGAATGCCGTACGATG -TGCACAGTCCTGTAACAGGGGCATACGTACATTCGGAGCAGGGTATGCTATCATAGTCTA -ATATCTTCCGAGGAACGGAAGATCTCGTAGCTAGAGAGGCCGCATATGGTCGGATCAACC -AGCAAGCGCTGGAGAATGACCGTGGAGCCCCACTGTTGCATTGGAAAGAATCGGAGCAGC -AATGAGCTTTCCCGGGGTTCTCTGGGGAGAAAGTATTTAAGTAGTCGGGTGTCTCCGTAA -AGTTTTGTTTCTAACAAGCCCTTTCTTATTATACCAGGTAAGTTGAATATAGCATCATGA -CATAACCCCCAATCCCCCCCACCAACACCTACCCCTCCATACATATACATCCCTACTACA -CACACACCAGTCTTCTGGAAAGGAATCGAAACCAGAGACCTACCGGTGATCGTAGTAGGA -AGTCCCCCGTTAAGCTCCTCCAAGCAAATCATATCCTTACCCTTTCCCGCCCACTCTTTT -TGGGCCATCGGTGAACCTCCCCCCGCCGTTAGTCACCTGCAGCTTTTGCCATATATCAGC -CCAGATCCTCTTTTTCTTTCTTTCAACTGACTGAAACTATAATCAGCCTGTCCAATTTCA -ATATGCCTATTTCTAAGATCCACGCTCGCTCCGTGTACGACTCGCGCGGCAACCCCACCG -TTGAGGTGGATGTTGTCACCGAGACCGGCCTTCACCGCGCCATTGTGCCCTCCGGTGCTT -CCACTGGTTTGTCAACCATCAGACCATTGAAAACAGGACTAGATTACTAATATCAACTCT -TAGGCCAGCACGAGGCTGTTGAGCTCCGTGATGGTGACAAGACCAAGTGGGGTGGCAAGG -GTAAGTGAATGAGACCAATGAATCCGCTTGACACCCCAGTAATCAGAAGCTAATATTTTG -ATCTCAAACAGGTGTCCTCAATGCCGTCAAGAACGTCAACGATGTCATTGGCCCCGCCCT -GATCAAGGAGAACATCGATGTTAAGGACCAGGCCAAGGTTGATGAGTTCCTGAACAAGCT -TGATGGAACCGCCAACAAGGGCAAGCTCGGTGCCAACGCCATCCTCGGCGTCTCTCTGGC -CATTGCCAAGGCTGCCGCCGCTGAGAAGGGTGTTCCCCTCTACGCTCACATCTCCGACCT -TGCCGGAACCAAGAAGCCCTTCGTCCTCCCTGTTCCCTTCCAGAACGTCCTGAACGGTGG -TTCCCACGCTGGTGGCCGTCTCGCCTTCCAGGAGTTCATGATTGTGCCTGAGTAGGTCTC -CTAGCCTCGAATTTCTTGCAACTTTCAAACTGACGAAACACAGCACTGCCCCCACTTTCT -CCGAGGCCCTCCGCCAGGGCTCTGAGGTCTACCAGAAGCTTAAGTCTCTCGCCAAGCAGA -GATACGGTCAGTCCGCCGGTAACGTCGGTGACGAGGGTGGTGTCGCTCCTGATATTCAGA -CCGCTGAGGAGGCTCTTGACCTCATCACCGATGCCATTGAGCAGGCTGGTTACACTGGCC -AGATCAAGATTGCCATGGATGTTGCCTCCAGCGAGTTCTACAAGGAGGATGCCAAGAAGT -ACGATCTTGACTTCAAGAACCCCGAGAGCGACCCCACCAAGTGGCTCACCTACGAGCAGC -TTGCCGACCTCTACAAGTCCCTCGCTGCCAAGTACCCCATTGTCTCCATCGAGGATCCCT -TCGCCGAGGACGACTGGGAGGCCTGGAGCTACTTCTACAAGACCTCTGACTTCCAGATTG -TCGGTGATGACTTGACTGTCACCAACCCTCTGCGCATCAAGAAGGCCATTGAGCTCAAGT -CCTGCAACTCTCTTCTCCTGAAGGTCAACCAGATCGGTACCTTGACCGAGTCTATCCAGG -CTGCCAAGGACTCCTACGCCGATGGCTGGGGTGTCATGGTCTCCCACCGCTCCGGTGAGA -CCGAGGATGTCACCATTGCTGACATCGCCGTTGGCCTCCGCGCCGGTCAGATCAAGACCG -GTGCCCCTGCCCGTTCCGAGCGTCTTGCCAAGCTTAACCAGATCCTCCGTATTGAGGAGG -AGCTTGGTGCCAACGCTATCTACGCTGGCGAGAAGTTCCGCACTGCTGTCACCCTGTAAA -TGCAGCCTTTCGGCAGCATAGATCGGGCCTGATGAAGGGCCCATCAAAAACGATGGTTTG -ATGATCTACGGAGCCATCGCCCATGATGATTATGAATTTAATAGGATCTTTTTCAATGAA -ACAAACTTCCACCACTTGATAAGTTAGAATGTTCGTAAATCAGTAAATCAGCAAATCAGT -CAATAATGAAATGTAGAAACAAAGATCACGCTCATCGGGCATCCAATGCCCGAGATCACG -TGTACATCCCACCTGGGGACGATAAGTGTGTCCCACATAAAATTTGCCCTCGCCCCTCCA -ATCCTTTGCTGGGTCTATTGTTGGCTTGTTCCTTTTGGTCTTGCGTATAGCGCATTACAG -TCTTTCATTTCTTTTTATCAAAGCTTTTAGATTTCCAAAAAAATGCCGCCTTCCAAGTGG -GGTATGTTATTATCCTTGTATTATCGCCTTCTATAGTGATTGCCACCATTACTTAGCTCG -TGCTCTTATTTCTATCGCTGTTGCTGATTCTTTGATTTCCTACAGACGAAGAAGAAGAGG -AGAGCCCCGTGCCTCCTGTTGTCGCGCCCCGTCGCCGTTTCGATGATGAAGAGGACGAAG -ATGTAAGGCAATGCAACCTCAATTCTGTCTAGCACGACTCTCGCTCGCGCCAAGTCCAAC -AAACTAACAACACCCGGAATGATAGGTTCTTGACTCATGGGATGCCGCCGAAGACTCCGA -GGTAGAGCGCGAGAAGGTCAAGAAGGCCGCCGAAGCCGAGGCCAAGGCTAAGGCCGCCGC -TGCCGCCAACAAGAAGACCAAAGCCCAGCGGATAGAGGAACTCCGACAGCAGCGCCTCCG -TGAGGAAGCCGGCGGTGAATCCGAGAGCGAGGACGAGGCAGACCGCCGTGCTCGTCTGCG -CAAGACCGAGCAGGAAGCTGATCTTGCCCACGCCGAGGATCTTTTCGGCTCTGTTGATGT -CAAGCCCAACAAGGGTGCCCCCAAGGCCATCGTTGTCAGCGACTCGAATGACCCTACTAA -GGCTATCGATCTGTCCGCCATGCCCCTGTTCAAGCCCGTCACCAAGGACCAATTCACCCG -TCTGACTGAGACCCTTGTGCCGCTCCTCACCCCGCACTCCAAGAAGGCCCACTACTCTCT -CTGGGCGCAGGACTTCACTCGTAAGCTTGTGAAGGACCTTCCTAGTGCTGAGATTAAGAA -GATTGCTAGTGCTCTCACTACTGCTTCTAACGAGAAGATGCGCGAGGAGCGTGCGAATGA -CAAGGGCAACAAGAAGACCAAGGCTGCAAAGACCAAGATCTCGCTGAACGCCAGCCGCGA -TATCCAGCGCGATAACACCAACTACGACGGTGGTGATGATGATCTTGGAGATGATGACTT -TATGTGAATGTGAATACGGACGACTAGTCTCGATTTGAATTACCCAGGTGTTTCGCTGTT -GCTTTCCCCTGGATCTGATCCTTGAAAATAAGCACCCTAAAAGAAGGCCTCCATCTTCTA -TTCCTGCCTCACCTATATGCAGGTTTCTGCTCCAATGTTACCCCCTATCATTCTGTTTTC -CAAACCTCGCCCTGTACACATCCCCTTCCATCTCAAATGCTTGGGCATAAGTAACCCATA -GAAAGACCCAATTCATTCCGATCATCTGATTCTTATATGACCTCACATACATTAACCTTC -AATTCTAACTTTCACGGCTAATAATCACCACATGGCAATTAAAAATCAATCACCAAACCC -ACTACTCCTGACCGCAGACCTAGTCCCCCGGCTTCTATTAAGCACCCCAAGCTGCCTAGC -CTCCTGCTCGATATCCACAATACTCCGGACAAGACTAATATAACCCTCATCATCCAAAGC -AAAAAGCGCCTCCTGGACCCACACGCGGCAACAAAGACCCTCTCGGAATCGAGCCGAGTA -CAACGCCAGCGGGACAAGTGCCAGACGTGTCCCTAGTGCGGCATGTAGGACGGGTCCGAT -TACGCCAATCTTCAGCGCTAACACTACTCGCATGTCGTTTGCTATGCTATAAACGACCTC -GGTTTTGTATTGCCAGGCAGTTGGGCCTGTCTCGTTGGTTATGTAGAAGGTTCGGCCCGT -TGTGGGTGTGGAAGTGAGGCATAGTTTCCATTCCAAGGGACAGCTACCGCCTAGGTCAAT -GAGGAGAACGTAGAGGGCATTTGGTTCGAGGGAGAGGGGTGGGATTGACATTATTTGAGA -CTTGGGTTATTTGCATTAGATTGGATTGCGCTTTCGTTCTTTTCGCGGATAAAAGACTGA -TCAAAATGGAGATTTGGTGGCCTCTATGACCATTTAGATTACTTACCTTCAAAGAAGTAT -AATTGACTATACGTATTGCTTTCCCCCCTATATTCTCTTTCGGAATTTCACTGATAGCAT -GGAGTTGGGAATTGCTTGAGGCAATCGAGAAGAGCTTACCAGACGAGACTTTAATAATGC -ACCTGCCTGATCTCGACTTGTAGGATACGGTTCTGTATAAAGAAGGAGATGTAGCCATAC -ATACTACATATGTATAGCACTAACTATGATTCGCTGGCTTGGTCATGTGGCAGTGATTGA -GATGCATTTATTACATCTAGGAACTGTCCAGGAAAACCTGGGATATGTACATCGGCATAC -ATACTTCGCTTCATTTACATGCCAAAAATGAGTACATATGGTCCTTTCAAGAGATATATG -CTTCTAACCATGTTGAATATGATTGAAACTATTTGCCGTCGTTCTGAGATTCAGACGTCT -CATCGTTTGAGAATAGGCACTAGGAGAGATCGACAACATGGACTCACATTCATCCCAGCT -GTTGCTATTGATCAAGCAACCAAATAGGACTTTTAAAAGTCCTAATATGAGGCACTGGGA -CCAACACAAGTCAAGATCACTGTCCGGCGAAATTCAAATGTCATCAACTGACCCTACAAC -TATCGAATTGCTTGACACGCGGCAATCGCAGCCATAGATTCCTTCATTGATTCGACAACG -GATGCAGACATCATGCATCCCGGCTTACTTCTTCCCACCACCACGTCTCGTATCAATATA -TTTGTTCTCTATCCGTCATGATCTCTAAAAATTATAGTCGACAGCTTGCTGAACGAGTTG -GAATAGAGGGGAGTAGATCACAGCGCATGAGCAACATGAGTTCGACACTATCAAATGAAA -CTTCAGGGGTCGATACGATATCATATCAAACAAATTTTCTGCAAGGAATGATCGCAGAAC -GAAGTGTCTCGGTAAACGTGTATAACCTACACTAAGACTAAATACAATCAGACTTTTAAC -ACTCACCTGACTCACCCTACACACAAAACCTACTGCAATACGTTGTCAGAGTACATATAC -TCAAGGCCTATCACCGCAGAGAGAGATATACACACATACAGGAATCATCTAACAATCACC -AATCGCAGGACGACAGACCACCTCATCAACCCTCCCCCAGCCCACTACCGCTTCGCTTTC -GTGGCGTCCCCCGCCATCCGATCACCAACCCAAGATTGCATCATTCATTGCGTGCATCAT -CTGCTCAGTTGTCATATTATCGCAAGTGCCCGGACCCCCTCCGTGGCGCTGGAAATGCGT -TGTTTCGATTGGCTGCTCTTCATGGTGGACGTGTGTTTCGGTTGAGTTTGCTCCAGTTCG -CTTCAAGGATGGAAAATAGCCTGTTGGTGATGTTAGAGCCTTGCTGAATTCGCTCATCTT -ATTGTCCGGCATTCGGCTCCGACAAATCGCTAGTGGAATGTTAGTCCCCGGTTCTGTATC -TTCAGCGCTTGATAAGATGGTGTCTTTGGACGTCCATCTTGTCCTCGTATTTTCCTAGCA -AGGAAACCTAGATGTGTCCTACCCGATGGTGGCAGCTGGCTTTAGGTAGGCACAGCATCA -CTGAGGGAAGGCAGTGGGATTTTTGTCAACTTTGAGGATTTAGGTGCATATGGATGTTTG -AATATATTTTATGTTATTCGGCTAGATGGGTCAAGACATTTGGAATAATAAACAAGCGTT -TCCGTGCAGAAGACTAACTACATACTCTTGTATCGATTCGTACGGAGTACGTACTTCAGT -CGTGAATACTGGTAAAAGGGGAATCATTGGCTACGCTAGCAACCCAACACAGGGTGCATC -TCGAGTAAAGAAGCTCGGAATGGAAATGGAAAATAAACACATTGAAACGTGCTGGAGTGG -GAATAGAGACAATAAAAGAGAGAGAAATTTCCACTCAGCCCCTTTTCTGCTCGCGTCAAC -CGAATATCAAACTCCGCCGGCCGACCCACACGGTACCGAAGACGTGCAAGATATCTGCCG -GTCCAACCCAGCGCTGCAGTGAAAACAAAGAAGCGACTTGCATAACCACGTCCAGGAATA -AATGCGAACGCCCACCCTACCAAACGGCATACGGCTCGTCGCGACCCCTAATGTTGCTTC -TTTAAGCTTCAGTCTCGGCGGGCTTGGCCTCATCGTGCTTCTCCTCGGCGGGagcagcag -cctcggcagcagcggcggcaggagcagcagTCTCTTCCTCGGACTTCTCCTCCTTGGCGA -CACTCTCGCCCTCGTTGCCCTCAGATGAAGTCCAGAGGGTCAGGTTATCGCGCAGAAGCT -GCATGATCAAGGTGCTGTCACGGTAGCTTTCCTCGGAGAGCGAGTCGAGCTCTGCAATAG -CATCATCGAATGCCTGCTTGGCAAGGTGGCAAGCACGATCGGGAGAGTTGAGGATCTCGT -AGTAGAAAACGGAGAAGTTCAGGGCAAGACCCAGGCGAATAGGGTGAGTGGGGGTAAGCT -CGGTCTGGGCAACGTCGGTGGCGTTCTGTGACACGATCAACAGCTGTTTATTGCAAATAG -AGGGAAGGCTTATGATCATAAGCCGAAAGAAGGGTTATTTACCTTGTAGGCCTCGTGGGC -AGCGGTGGCAGCGCCCTTGCGCTTGGGGCCGGAAGCGAACTCGGCGAGGTAGCGGTGGTA -ATCACCCTTCCTGTTTACAAAATCAGCATAAAGCCACGAATCGAGGAGTCTCCAAAGTGA -AGTGGCTGGAGTGTAACCCACATCTTGTAGTAGAAGACCTTGGACTCTCCAGTCTCGGCC -TTAGGGATGAGAGCCTCATCAAGAACATCAAGCACATCCTGGCAGACCTGCTCCAGCTCG -GTTTCGATCTTCTGGCGGTAGTCACGGATGATTCCAACGTGCTCATCGGAGCCCTTATTT -TCCTCCTTCTGCTCGATGGAGGAAATGATGCGCCAGGAGGCACGACGGGTGCCAACGACG -TTCTTGTAGGCGACGGAAAGCAGATTACGCTCATCGACAGTGAGCTCGCCACCAATCTAG -ATATGTGTCAATCGCGAGTACCCAGAAGGTGATGGGAAAATGATGGTGTCATACGTTGGC -GACTTCCTGTTCGGCGATTAGCATGGATACCCGGTAAACAACACGAGGGGGTATGACTAA -CCTTCATGTAGGTGACCATTTCTAGAAAACAGAAAATGAAATGTCAACGTCTATGTCCAA -CCAAATTATCGCAAGTCAACTTACCATCGTAGCGCTCGGCCTGCTCGCAGAGCCGCGCGA -GGAACGTCTTGCTGGTGAGGGGGTCAGTCACGCCGGGTTATGGGGTTTAAGGGATAGGGG -CGACAATGGGCATACTTCTCACGCTGCATGGTTACGATATTTGTTAGCCCAGTTCTTATC -GACAAAAATTCCAGATGGGGGTGAGCTCAGATGGAGGATGGAAATGTGGGATTGCGATGG -CGGTGACAGCTCAAGAGAGCGCTTGACGGAGGGGGACAGCGACATACCTCAGAACCCATG -ATGAAATTGAATCGAGATGGGTGGTTGAAACTTGAGATTTAAAAATTAAAGGGATCGGAA -GCACAAGATGCAGAAGAGGAAGATGGGTAGAGACAACCAAGTGGGGGAGGGAGATGGAGT -GAGGTGAGTTGGGGGGATTTGAATCATAACAATAATACACTGTTGTCATGTACCTAATAC -TTAGCTCCCCCTATTGTGGCACCGCTCTCTGTAAGTACTCCGGTACTTGGGCCGCAGCCC -AGTATTCATTTTACCGTCGATCCCCCTGATAGGTAATTAATTTGGGCGGGGCGAGGTATA -TCTGACCGTATGGGTTACCAAGTCCAGATGTCGCTCTCCTTTGAGGGCAGGATCAAGTAC -CAAAGGGCATTTGGGCTTGGTTTCTATATATTGTACATAGATCTCCCCTCTGTATATCTA -TGGGGTCATAGTATGTTGTTCTAGAGATCCCGACCGGTACCGTCACCTCTGAGCTGAAAA -CAGCCATATTTTCTGAAAGTAATTCTTTGGACGCATTTTTAGCACATAAGCCTCTTTATT -GCGTCCCTATTCTAGAGAAAGCACAGAAAAATTATATAAAAGTCTGGGgatgatgatgat -gatgatgatgTAAATTTCCTTGGAATCTTCTAGACAAATAGGCCGGTTATGATGTAGCTA -ATCAGCCGCACAATCAAATTAGGTCTCCAAGTATCACAGGCTTGAAGTTTTTGACCAAGT -TCTTCCAACTAATAATAGGCTACTCTGCTTCGCCGAGATTTTCCACATACTACATAAGCA -GTAATACGATCGAGCTGATAACAAAGGATCTAAAGGGGCCAAGGGCAGAATTTATTATCG -GACTTAATAACGACAGATGTGATATTCTCTAGACATTTTGACTGTCCTCTATCGGTGGTA -TGAAAACTCAATTTAAGCTCGGTGCAATTCATCTTAGGAAGTAGACATAATATATATATG -ACATACAAGAGGAACTGTGCCAATCATTGGGCATTTCTAAAGTATTGATAGACGGTTTCG -CGATTGTGTATTTAATGCTCGGTTTCAGATTCTGAGGTCGCTTTTGGAGCACTTCAAAGT -TGCACATAGTAAAGTTGTGATGCGACCATTTTGACTCCTTTTCTGTCTTCTGAATGATAA -GACCATTTACTACATAGTGACAAAACATTCATATGATTGAAACCGGTTGTTTTCGTTCAA -CGTGCTCTTCCTCGATGTATAGGGGAATATCGCGATTGTTGAAGCGATCATCTTCTGGAT -TCCGCATGACTAGTGTTCTCATCTTGCCATCCCCGGGCAGCTAATGTAGAGGAATATGGC -TTGGGCCAATGCCCGCCGGACCTTGTTAAATAGCGCAACTTGGTGAGAGGCAACATTCGA -AAAGGCGACAGTTCGACTCAGCAGGACGCTAGTGTGAGGTCTGTGGGCGATCGAAGGGAC -TCACGAGACGATGAAAACACGCAAGATTATAAGATTTAGTCTAGAACTACCTGGGTGCAT -CCTTGGTCTCTCCTAGATGCGTGATAAATGATGACACAGGTAGAGACTGTGTAGGGGTAG -ACTGGAATAGACTCGCTCGAGCTTTTGTAGTGGCCTCACTAAAGACTATTCATCGCAGAC -GGAGTGCTGCAAGATTGCCAAGCTGGCGTTGTCCGAAGGATGAACGGCATCGGAGTAATC -TGTAACAATGGGTTCCCAGAGCAAGGTCGATCGGCAGGAACATAATCTAACATACCTTCT -TCCGGTATATCATAGCATTCGGTCTCATTTGTATAACGAAAGCACTTCCTGATAGAGGTT -AGTCCTGCCATTGACGTGGATGACTCGTGTTCGCAACCGCATATGTGCCTGAGCGGTCTT -GTTCAGTTCCAGGCCTTGTGTAGCATACCGAGGTCTACTGTTTCCCGCGAAGTTACACGC -AAACTAAGCCGCAAGTTTGTTGCTTGCGGGCACTAGAGTGGAAGGTAGAATGCTGGATCA -CGAAGCAGTAATATATGTCCTCAACCTCCTTTTCTAGTGCGATCAACTGCTCGTTGAGGT -ACCTCGAAGGCACTTCAAGGGAGAGCACAATGTATCTTATATCTTCTCCACGGTAATGCC -CAGGCGAGGGCCAACGAAGAAATGCGCGATGGTGACGTGAGAACATGCTGTCGCGCGATC -TTCACCAATATCATGGCGGTCACCGAGTCCCCTCCACGGCAGAAGAAATTGACCTCCGCG -CTCAGCTTTCTCCTATGAGAGACAAAGGAATTCGATCCCTTAGCTTTTGTTGGATGGTCG -CTTTTCGAGCATCACTCGCCTATTGCTCATTTAGCCATGCGCTGGTGATCCGTCGCAGCT -GGTCAGTCAACTTTGCCACTTCATGATGGAGAGGGAGGAGCGAGTGAAAGTTGGACAGTG -GGCTCCATGTTACTTCGAAGGGTTCTAACCACACCGACTCCCATTAGGGGTGTAGCAAAT -TGAGGCCGAGGCTGTCACCCAGAGGTGAAGGAAGGGCGAGAATCTAGCTACCTTTCCTAG -GTTGCCTGGTGGGGAAATGCAGCCGAGGTCTTTGACTTGCTTTCACTCAGCTCGAATGGC -TGGCCGTGTAGTTCGGCTTTGGTCGCCTTGAGTCAAATTTGTAGCAGAAAGCGGCCTAGG -GCATACTAGACAAGGGTTCTCTGTTCTGGAGAGAGCCTTTTAGTTGCCACCGACATACGC -TTTTGGATACTGCTATCTGTTAAGGACAATCAAGCCTACCAGTGGATCCGCAGGTTGGAA -AAGGTTAGATTAGCCGAGGGTTACAATTGTCCTTATCACGCTTTAAGGGAGGCAAACATG -GCATATGAGAGGAATGATGGTCGCTCAAAGAGCTGTTTAAGAAGGTCAAGTACGCGAGGT -AGCACGGTAGTATCACAATTTGGTAGTCGTATGCTATGACGGTAGTAACCCGTCCTTTAA -ACCACGTTGGTCAATTTGACATTTTTAGAGGAATCGTGTTTACTTTATGAACGTGCCAAT -CGAATCGCTTTAAAATACTCGGGCCCGTCCCGAGTTTAGATTTGGTGGATACAGCGGTCA -GCGAAGCGAAGGGAGCCGCCACTACTCCGTACTCCCTAGGCGATATAATAATAAGTGACA -ATCCATGAGACCATTTACTAACTATGTCCCTGGTTGCGCAGGTTACTTTGGGTCCTCTTT -GTTGCGTCCTATGTCGTGTTTTCTTACACGGAATACAGAATATTCCATTCTAGGACTCAA -GGTGAGCAAGCCCGAATTACGAAGGTTGGTCCCAGGTGTTTGGGCGTTTGCTCAGAATCA -AAAGCGAATTTGTAGAGCTCAACCTAGTGCCCGAATTCAGGCCTACACTTGTGCTAAGTA -TGTATCTATGCTTCCCCCAAAATTCCCAAATCCCGAAAATTGGAATTGTGCGAGGTTGAA -TGCAAAAATAGGTTAATACAAAATGTACCTTTTATATGGAATACATATACAAAAGGCACT -TCAatatttatatatatatttacgtatacatatCTCGCCCATATCGTTATCTTTCTTAAC -AGACATACCATAAAACCTGTTTATTGAATTATTATAGAAATCAATTGAAAATAAAATTGT -GGTTCTAAAGAAAAGCTAGTAGAAGGAGTTGTATATATTACACTTCTGATAGCGGTACTT -CATTTTTCTACGTTCAAATCACAACACAAGGATACATAGATCAAAGGTATATTTGTAAAG -TCAAGTGAATAAGTAGGTTTTTAATGTCAGTTGATCTTCCTGTAATTAAATGAGATAGTC -GAGAGAGAGAGAGACACATATATATTTATATATATTAAAATACCTTTCGTATGCCCATCA -CGTAGAAAAGGTGCATTTGCACTAAACTGGCTCTTGCATTCAACTCGTGCAGCCTACCTC -CTCTGGATTTGGGCCGTGCGTAGAGCCGAGTTCAACCTATATGTTATGATCGACATGGTG -ATGAGCTCCATGGCTGCATGTCTTTAGGTCATCCTGTGACCGTGCTTCAGTTTGGGAATC -CTCTATGCTAAGGTATCTAAAAGGAGACCATTCTGAAAAGACAGAAAACAACCTCCATTG -AATCAGGAGCTGTTGTGGTGGATAGGCGTTAGACCGTCTGAATGGGTGCCTCGGAAATAA -CTATGCGTACGTTGGACCAGCAGCTTCCCATTTAAACCACATATCAGTTTGCCATTCTAG -TAGATCATTTGATTGACGTCCTTTCGAAAAATGACGATCAGCTTTTTCTTTTCCTGGTCA -CAACCTGAAGTCAAATCGCAGGTTGGGTAAGAATCGAAAGGGGTTTGATGGGGAGTGAGG -CTTATTACATTTCTTTGCCGCAATGCTAGTTCTTTTCAACCATCAACCAAAGCAAATAAA -AATCCCCTCTAACTATAATTGGTAGTTGTCAGTTTTTCTGAAGCAACTTATTATCAGGTC -AGACTTGATATTGACCTCTAATAGATATATATTCTTCATTCTATAGACTACTCCGTAGTG -GGATCTAGTTAGTGCTAGAGGTTATGAATTCTGGGAAAAAGTGAAACAAGAGATCCGAAG -TAAGCATGGAGTAGTATGTATCACACGAATTATCATTGATCTGAGATATGTATCGGTAGG -TATTGCCCATTCTAGTCAGTGGAGTTGAACTCTTAACAGATTGAAAGCTGGGTGGGGGCT -GCTCAACTCTTTGTTTGCTATTTATGAGGACCTTTTTTGTGGAGTCGGTTTCCTATATTA -TTCCTGAGACCATAAATCCCTCGATACTTCCCAATCTTGAAGGGTATAGCACTCCTTCAT -CATGCTTAAACAGCATACAATGTACAATGAGGGTTCTCCGGCTAAGCCGGCCATGAATCC -GCGCCGAATCATGAATTTAATCCTTGGTTTTGCATCCTTGGCCTTTTTGTATAGAATCTT -TGTGTCCTGTGATGAAACAACGCAGCTGAGCTCATACGGTAAGTCTCCTGGGTGGTGAGA -ATCTGGATTTGGCTGACCAGCTTGTTTGGTAGAACCCGAAAATACAGAAACCAACCCATC -ATCACCATTGCATGAAGTAGCAACACACAGTAAGCCCTTCATCCTATACCACCACAGACG -AAACTAATAAGGTGAGTTGAACAGAGCTAATGGAGCCAACAAAGCCAACAAAGCCAACAA -AGTTGGTTTCACATATGGCCTTGCACGAAGCTAGCAAGGACGTCGCTCTCGTCGTCGCAA -AAGCACGATATGAAGATGTTACATGGGTAACGAGTTTCTGCCAGGAATTGTAAACATCCT -ACTTCCATTCCCCCCCCCCATCCCCCAACGTGGTAGTAAAGCGCATCTAACAACCACTAG -TAAATGCACGCCATACATCTATACGATGGACGATGAGCCTGAAGAAGGCTTTCTTATTCC -CCATACAAATGTAGGCCACGAAGCAGGTGCCTACCTCTCCTACATCGTCGACCACTACTA -TGACCTTCATCCATTCACAATCTTCCTCCACGGCAGAGAAGAGCACTGGCACAATGATAT -AGCTGGTTCAAAGACCCTGGATGGACTTCCAAATTTACGGTTCGAAGCCGTGAGTCGCAA -GGGTTATGCGAACCTACGCTGTTTATCTATACCTGGCTGCCCAGATGGTCTGTACCCATC -GATTATTCAACAATCTGATATTGATAATCAGAATCTGGTCGACAATTTCCCAGAAATATG -GGGTGAGATGTTTGGGCCGGACACCCCCGTGCCGCTTCAGATCGGATTCCGTTGTTGCGC -GCAGTTTGCAGTAACCAAACCAAGAATTCAAGAAAGAAGATGGAGCGATTACAACCGATT -CCTCCAGTGGTTACCCAGGGGGCTCGAGTACTCTGACACCTACGGGGTAGGTTGGCTCAT -GGAGAAGTTATGGCATATTATTTTTGGGATGCCCGCTGTCGAGTATGTTGCTCTCTTATT -CTCCCCTGTCCAACACGCTGGGATTATCAAGGCTAATTCCTTTTTCTTTCCCCTAGCTGC -GCTGATCCTGTCCAGTGTCGCTGCGACCTCTATGGGTGGTGTGGCCCGCACCCAGGAACC -GATGGCAAAATTTTGACACCTATCACATAGGCTAAGACCCTGGGATTTTTGTCCAATATT -ACTTATCATATCATTAATTTTAGCGTTTGCAAACCATTGTATCAGAAGGGAGCGATACTA -TCATAAATCCACAAGTGCATAAATCTCATCGTACCTATAATCTCAGCGAGCGTTTTATGA -TCAACGATCTTGCATGTTAGTTTGGCCTCATTTTTACCAGATTAATTGCTCATGCTAGCC -ACTCAAATCATTACGTGGAGATTTCTCCCGGGGATGAAGCCAGTAAATGAGGCATTTCTT -TCAAGTCTTGAAAGACACTACCTGTCCAGTGGAGCGGAAGGGTCCAATAAGATCACACCC -TAATTTAGTTTATATTTAAATTCATCTGAGAGGTTAAATTAGCCAATACCATCTGCCGTG -TATGAACAGCCATAGTCACTAAGTCAAAATTTGACATGGACTCCGTAGTCTTGGATGTCA -AACAGTTTTCAATGCTATCTAATGTCTTGGCAGTGCCATATCACAACTTATGAGGGCGTA -CCAACTTACCTCTGGTAACGGCTAATTGTTTTGAACAGATATAGGGTAAACATTGCAAGA -CTGCTACGTTTGGCGAATATTGGGACATTGTGTAAAGCTCATTGCATCCTTAGATATTGA -AGAAGCGGGTTTACTGATCCTACAACCTTCTATCCCGCGACTCATTGAGAGGCACGATCA -GTGAAAAGCCCAGCAAAGAGTTATTGTGGGCCGACGGAAATTGGCCATAGAGAATATATA -AGTAGTTCTATATGCTTGTTAGTCGACACAAGACATTATAGTAGATTGACGTTGTAACCA -TTTGTTAATTGAAGTGAACTTAATCATCATTATCTATAGACCGCCGGATGAATCTCGCAG -TAGGCTACCGAGGTGACATGGATATGATAGCGCCTTAGATCCGATAGGAACTAGGGATGT -TTAGTAGCAAAGCTATCAAGTTGTAAAAAGAGGCAGAGTTTGATACAATGCCTCCTGTGT -CCGATCTTTTCCTAAACTTCATTTTTGTTTTCTAAGTACTAATCGCCACGACGACTATTT -TTTAGAATCTCTACTTATATGGTGGGATTTTTACAGTCATTTTAGGATACTTTGTGTTGA -ATAGCTTTTGGATCACTTGGAAGGTTGGTCAGGATAGATAAATTGTGATATGCCTTTGGA -GGGAGATAGGAAGCTGAGGGAATGGCTCGGATTATTCCAATTAGGCGCAGAATTTCTGTA -CCTTACTGTGTGTGTCTTACTGGAAAAGACACCTATGATAAAAGTGAGCGACAGTGTACA -TCATGCTCCCGGAAAAAGCTCACAGCTATAGTGTGCCAATTGGGGGGTCTGAGAATACAG -GTGCAGTGACACAAGAACACACTGGAAACTTAGCGGTGTCACGCTAATTGAGGTTGATTT -AGGGGGCGGATAACACCTAGGGTTGTATGAGTTTGGAACGATTGTTACATTTCTCTCCAA -TGGACACCCGACTATAACAGATGTAAAACTTAGTACCCTTTCTCCATCTAAGAGCCTTCA -ATGATCAAGTGGTAAGTTGCGGTCAAGCTATCTCTTGCTGAAGCAAGAACATGAATCAAG -AGTCGAATATGCAAAGTCCCGCTCTACGGCTGAAGCATTGCTTATTCTGGTGGATGAATC -ATATCGCTCAGCAATTGGACTCTAGGCTTGATCGGCGAGCGGCAATTCCTAAGAGAACTT -ACCCTCTAATCTTCACCAAAAATGGCTTGGAATGAGCATATCGCCAACTGAAGGCCAGGG -TAAGCAATTGAAGTGACATATGTATCAAGCGATTGGATATTGAAATTTGGTTACGCACTA -CGGAAGTACTGAGGATCAATTAAATAATGAGTACGGAGTAGGTAAGCGATGCAAATGAAC -TTTTGTTCTACCGTCAGCCCTATGGGCAAGGATAGGTTAGCGTACTTGTGGATATCCCAC -GCTTGATGCATTCTACAGGGACTGTGCTTTAGCCTTGAAAGACCTCAACCAACCACAAGT -AAGCTGCCACAAGTAAGCTGCCACAAATAAGCTGAGTGGTATCGAGTAGTGTAATCTGGA -GGTAACCTAAAACATAGCTTAAACGTAGTAAATGAATACGTCAAGGGCTGTACCATGACA -TAAAGTCCAGAAGACAATCGTATCGAAGGAGTCAGGTTGATTTAAAAGGTTGGATACCCA -CTTTAGGCTAGTGTATGCAGAGGATCAAGTTGCCAAGACTATCTGATAGGCAAAAAAACG -ATGCAGCCTAGAGATGCCCCATAAGTGATTCCAAGGCAATGAATTGGCGTTGACGTTGCA -GCCCTACGAGAGGCTCTTTGGAGGTCTCTAGCTCACTTCGAGAAGACAAGCTGATATAAC -AAGAGGCTCAGGAGGCTATAGATGGCCAAATCAAGTATTTGAACCAGTAGATGCCTaaaa -gaagacaaaagaaagaaaaaaaagaaagaaaTTATAGCGGAGAATTGCAGACCCGCCTTC -AGCCCTGTAGACTCCCCAGTTCCACTCCAGGCCCGTGCTTACTCAGCCCCTGGAATCAAA -TGGATGAGAACACGTTGACTTCCCCCACTGGAGGTCCCTCAATTTGGGCTTTGATCTTCA -TTTGCTCTGGATCTTCTTTTTTAATATCACCTGGTCCCTCGCTTTGGGCTTTGCTTTTCT -TCCTTTGTGAAAGGGCATGTATCTTTATTAACAACAATCTCTTCATTGTTTACCCCAACT -ATCATTCCTCACCTCTGCGGTTATTGCAATTGATCCTGCCTATTGCCGTTGAGGGTGTCC -GTGATCATGGAGAACGCCTTTCTCCGTACTCCAGCCGAGGCGCTGGAGCACTTCAAGGTG -ACCGAGCAGACCGGTTTGTCGCAAAGCGCGGTTGTGAAATCCAGACAACAATATGGACCG -AACGGTTAGTACACATTCCGCAGAATGACGACCCCAGAACTAATATCTGCTCTTTATTTA -GCTCTTGCGGAAGAACCTCCGACTCCCATGTGGGAGCTGATTTTGGAACAATTCAAGGAC -CAATTGGTTCTTATCTTACTTGGCTCGGCCGCCGTGTCCTTTGTGCTAGCTCTCTTTGAA -GAGGGTGACGATTGGACTGCATTTGTTGACCCCGCTGTTGTAAGTGCCTGGATATGATTC -GTTTCATCATGTGCTGAATTTTGCTGACAAAGGCGTGCTTTGTGCATAGATCCTTACTAT -CCTTATCCTCAATGCCGTGGTCGGAGTGACCCAAGAAAGCAGCGCAGAAAAGGCTATCTC -AGCTCTCCAGGAATATTCGGCCAACGAGGCCAAGGTTGTCCGTGATGGAATGACGCGAAA -GGTCAAGGCTGAAGATTTGGTTCCCGGAGACGTGATTCAGATTGCTGTTGGTGACCGTGT -TCCCGCCGACTGTCGACTGATCGCCATACAGAGCAATAGCTTCCGTGTCGACCAGGCTAT -TCTGACCGGTGAGAGTGAAAGTGTTGCAAAGGAGATCCGGGCTATCAAGGATGAACAGGC -CGTCAAGCAGGATCAGCTCAACATGATCTTCTCTGGTACCACTGTTGTCAACGGTCACGC -TACAGCTCTCGTCGTGTTGACCGGTGGCTCGACTGCTATTGGAGATATCCACGAGAGTAT -CACTTCCCAGATCTCCCAGCCGACTCCCTTGAAGCAGAAGCTGAACGACTTCGGTGACAC -GCTCGCCAAAGTTATTACCGTCATCTGTGTCCTGGTTTGGGTTATCAATTATGAGAACTT -CAACGACCCCGCGTTCGGTGGCTGGACTAAGGGTGCTATCTACTATCTGAAAATTGCTGT -CTCTCTTGGCGTGGCGGCTATTCCGGAGGGTCTGGCAGTAGTCATTACCTCCTGCCTGGC -CTTGGGTACCCGCAAGATGGCTCAGAAGAACGCAGTTGTCCGGTCTCTTCCCTCCGTCGA -AACCTTGGGCAGCTGCAGTGTGATTTGCTCTGACAAGACTGGAACCTTGACCACCAACCA -GATGAGTGTTGAGAAGATTGTTTACCTGGCAAGCTCTGGCACTGGCCTCGAGGAAATCGA -CGTCGAAGGCACTACTTTCACTCCTGAGGGCAAGCTCACCCAGAATGGCAAGGTTGTCGA -GAACTTGGCCGTTTCTTCATCGACTATCGCACAGTTGGCCGAGGTCACTGCCCTTTGCAA -CGCCGCTACTCTTTCCCACGATCCCAAGAGCGGAGTCTTTTCTAGCATCGGTGAGCCCAC -CGAGGGAGCTTTGCGTACTCTGGTTGAGAAAATTGGAACCACCGACAGCGCCTTCAACCA -GAAGCTCTATCGTCTCTCTGCATCCGAAAGATTGCACACTGCAAGTGCTCATTACGAGTC -CCGTCTTCCTCTCAAGGCCACCTATGAGTTCTCCCGTGACCGCAAAAGTATGTCTGTTCT -CGTTGGCGAGGGCAAGGAACAAAAGTTGCTCGTCAAGGGTGCCCCTGAAAGCATCCTGGA -CCGTTGCTCGTACGTTATTCAAGGCGCCAATGGATCTCGCGTTCCTGTCACCAAGAACCA -CTTGAAGCTTCTCTCCGAGGAAGTTGTTGAGTATGGTAACCGTGGTCTCCGTGTCATGGC -CATCGCAAGTGTTGATGACGTCTCTGGGAATCCCCTCTTGAAGAACGCTACCTCCACCGA -GGACTACACCAGGTTGGAACAGAACATGACTTTCATTGGTCTTGTCGCAATGTTGGACCC -CCCTCGCCCCGAGGTTTCCAATGCTATCAAGAAATGCCACGCCGCCGGAATCCGTGTCAT -TGTCATCACCGGTGATAATCGCAATACTGCCGAGTCTATCTGTCGCTCAATTGGTGTGTT -CGGTGCTGACGAGGATCTTGCCGGCAAGAGTTACACGGGCCGTGAATTCGATGCTCTTAG -TGAAAGTGGACAAGTGAAGGCTGTTCAGACCGCTTCCCTTTTCTCGCGCACTGAGCCTAG -CCACAAGTCCAAGCTTGTTGATCTCCTCCAGTCTTTGAACCACGTCGTCGCTATGACCGG -TGATGGTGTCAATGACGCTCCCGCGCTCAAGAAGTCTGACATTGGTGTAGCCATGGGCAC -CGGAACCGACGTGGCCAAGATGGCCGCCGATATGGTCTTGGCTGATGACAACTTCGCTAC -CATTGCCGTCGCCGTCGAGGAAGGTCGTTCCATCTACAGTAACACTCAACAGTTCATCCG -CTACCTGATCTCTTCCAACATTGGAGAGGTCGTCTCTATCTTCCTGACTGCTGCGCTTGG -CATGCCCGAGGCTCTGATTCCCGTTCAGCTATTATGGGTCAACCTTGTGACTGACGGTCT -GCCTGCCACTGCTCTGTCCTTCAACCCTCCCGACCACGATGTGATGAAGCGGCCTCCCCG -CCGTCGCGACGAGGCCCTTGTCGGCGGTTGGTTATTCTTCCGCTACATGGTCGTCGGTAT -CTACGTTGGTGTTGCCACCGTCTTTGGTTACGTCTGGTGGTTTGTCTACAACCCAGATGG -ACCTGGCATTTCTTTCTGGCAATTGGTAAGTTTGTTTGAGATGTCTCCCAAGTGTTCATT -GTTTGCTAACATGCCCCATTCAGTCCCACTACCACAAGTGTGCCTCTCAGTTCCCCGAGA -TTGGCTGCGAAATGTTCAGCAACGATATGAGCAAGTCCGCCTCCACCGTGTCTCTTTCTA -TTCTCGTGGTAATTGAGATGTTCAACGCCATGAACGCGCTGTCCTCCAGCGAATCCCTCC -TGACTTTCTTCCTGGGCAACAACCCCATGCTTATCTACGCCATCACTCTATCCATGCTCC -TTCACTTTGCCATCCTCTACATTCCCTTCCTCCAGAATCTGTTCTCGATTCTGCCTATGG -ACTGGAATGAATGGCAGGCAGTATTGGTGATCAGTGCGCCAGTGATGTAAGTGCTTTGAT -TCTGTTTCATTATGAGAAACATCTTACTAATGCATACTCTCAGTCTGATTGACGAGGTTC -TCAAGTTTGCCGAGCGTCTTCTCTACAACACGAAAGCCGTCAACCCGGTCGCCCAGCAGG -GTGTCGGCAAGCCCAAGCGGGCTTAACGGGTAGGACTATTTTGGCGAGAGCCTAAATACG -ACCTTCAGCTTCCCGGATCTCTGCCCGAATAAGAGCTAGCCGGTGTGTAATAGAGATAGA -TTGTACTTGATTGGCTTGACATTTTGTCGCGCATTGAATACGAGATAGCGTAGACTGACT -ATGCTATCATGGATCCCTATAGATCCCTGAATGAAAGATTTCTTTGCACTTGACAGTCGA -GCAATTATCCGTACAGCCGAGAGACTAGTTTTGAGTCGGTCTTGGCGTGTCCTGACGTGT -TAGCTTGGAAGGCCGTGATTCTAGGAAGAATGAATGATGTGATCTAGTTGTCATTAGTGT -ACCTGTAATGGATCCGACTGTTCAAGTAGCAATAGCTTTCAGGAACCAGATATAGTTGAG -GTAGTATGGGCTTCGGGGTAACATTCCACCAGACACGGGAGAGAATAGCCCGCTGAAACC -ACCACCGGAATATCATCCCGCAACTAGAGTGTTCACTCATGCAACCCTGACCACTTGAAA -CCACCTTGCCAGGGGTCTAAAGCGGGGGAAATCAGACCAGATCGTTACCTTCAGACCTCG -GTGCAACTGTCATGCGCTATAATTGGATGTCGATCGTCTTTCCCCAGGGATCATCTAGAG -CTTTTCCCCGGACTGGAAAAGTGTGGAGTTTGCGGAGAAAAGGGTGTCTTGTTTCTTTGT -TTAGGTTAAATACCCCCTAGAGTCCCTAGAATCCCTCTTTCTTCATGTTCTCTATAATTC -AATATGGGAGTCAAGCAATTCCTGAAAAGCCGCTCAGGCTTGCGCGTTGATAATCGTACG -ACTACGTCGGCCGCTACTCTCACTCTACGACAGAGTCTGTGGCCATTGACTCTAGTTACG -ATATTATTCTTCTTATGGGTATCTTATACTTTGTCCGTAAACAATCCATCTACTAATAAC -TCGATTAGGGGTTCGCGTATGGGTTGCTGGATACCCTTAACTCGCACTTCCAGAAAACCC -TACATATCGATCGAGGTCGATCAGCTGGCCTTCAAGCTGCCTACTTTGGGTATGAAAATC -TCCATCCAAAACCATACAGAAAATTCCTAACAAACCCCCAGCGCATACCCCCTAGCCTCT -CTCGGCTATGCAAACTGGATTCTCCGCAACTACGGCTACAAAACCGTCTTCATCTTTGGT -CTATGTCTATACGGCATCGGAGCTCTATGCATGTGGCCCGCAGGACTCAATCAATCCTTC -GGCGGCTTCTGCGGTGCTACATTCGTCATCGGATCGGGACTGGGATCTCTCGAGACAGCA -GCTAATCCTTATCTGACCGGTAAGAAAATCAACATTCCACAACGAATCCCGCATGTATAT -TGATCTCTCCTTACATCTATCCAGTCTGCGGTCCACCCCGCTACTCAGAAATCCGAATCA -ACTTCGCCCAGGCCTTCAATGCAATCGGCACAGTCGTCGGCCCGGTCTTGGGTTCTTATG -TGTTCTTCACAGACACAAAAGATGATGTAACAGCCCTGCAGCGCGTCCAATGGGTCTACC -TCGCCATCGGCATCTTTGTCTTCCTCCTTGCTTGTGTGTTCTTCGTATCTAATATCCCAG -AAGTCACGGATGAGGACATGGCATTCCAGGTTGCGCAAACGCATGTCGATGAGCAGGATC -AACCTCTCTGGAAACAGTACAAACTGTTCCATGCGACTCTGGCACAGTTCACGTATACAG -GCGCACAAGGTAGATTCCCATCCCCACTTCTCATTCTATACCCCCATGATACCAAGCTAA -CCCCCCCCCCAGTCGCTATCGCCGGTTACTTCATTAACTACGTCAAAGATACATGGCCGG -GTACAACGAGCGCCACAGCAGCGAAGTATCTCGCCGGCGCGCAAGGCGCATTCGCAGTCG -GTCGATTCCTCGGTGCGGGAATCATGAAGTTCGCGCGTGCGCGCTGGGTATTCCTGGTTT -ATTTATCGTGCACGGTTGCGTTCCTTGCTGCATCAGTGACGCAGACGAAGCAGGTTGGTG -TGGCCATGTTATTCCTGACGCTTTTCTTTGAGTCGGTTTGTTTCCCGACTATCATGGCGC -TGGGTATCCGTGGTCTTGGACGTCACTATAAGCGCGGCTCGGGGTTCATCGTTGGTGGTG -TTTGTGGTGGTGCTGTTGTGCCGCCGATCCTTGGTCATGTTGCTGATATGCGTGATAATA -CTGGATTCGCGTTTATTGTTCCGGCTATGGTATGTTTTATCCGCTCTTGGAGTTGAATCT -TTTTGGGTCAATTGCTGATGATTACAGTTCATGGTTGTCGCTTGGACGTATGCCGTTGCT -GTGAATTTTGTTCCGGCTTATACTAGTACTGTGGATAAAATCGGTGAGAGTGACCTTGGT -CTTCAAAGTCCGACTAAAGATGAGGAGACTGTTATGGGCTCGCTTGGTCTTGATGAAACT -ATTGATGAGAAGCATGAGACTGTTCATGTGGAACGTCAGATTGTTGTTGTTTGATCTGCT -TTCAGTATCTCTAGGTAATGGTTAATGGTTTTATGGGTGTTGGTATTTCTTTTTATGCTT -TGCTCAGGGTTCTTCTCTTAATAGAGTGATGGGTGATGATCAAATGAATGCAATGATGCT -TGATTGCCAGAATCACCGATGCTTTTTGAATCACAAGTTAGAATTCGTATTCAACAGAAT -GCTAAACAGTCATATCGAGCAGCTTTGTAGCGCAGTTTAGATCGGGAAGCTTAATACTCG -ACACCGTTACGGCTCAAGTATAACGTGAATATATTCCACCAAATTAAGAGCTTTAAAGTC -ATGGCTTATTCCAGATATATATAGTCAAAAGCAATGATCCCCTGATATTTTACTGATCTA -TCTCTTTCTCAACTGTTTATCCAGTACGAACAGTCATTTATCTACTCAGGTACGCTTCTA -CTCGAAACCGAATAAATATATAATCACTTGAATATAGGCTTGGGTTATATTTGAGAATTA -CGATAATCGGGGTGCAAAGACGTGGAGGAACTCGGTTAGATCAGACAACCTTGTTGCGGG -TTGTTGGTCAAAGACAGTGCTGATCACCTCTATTGTGTCTGTGACACGGGGCATGAGAAT -GAACCACAACCCTCTGCTAGGTCCACACCCGATATAAAGCAATGGTATTAGTTGCCCCAC -AGCCTTGCCTCTCAGATATAAAGTACGAAAGATAATTCTTGAGTGGGCTCAGACACCGAG -TTCGTCATATAGACATTTGGAAAGTTCAGTACAGAAAGAAGGGAATACACAAACGCGGGT -CCCTGATAAGTATTCGTGGCCACTGTTTCGAGATGCTCTGTAGTTTCTCCGTGGAAACAT -GGACGAGAGAGCAATCGCATCTTCGATGGTGTCTCCGCGCCACGGAGTCCCACTGGAAGT -GCTGCATGAATCCAAGAAGCTTCGTGAGGACCATATCAAAGAACATATGCCGGGATCATT -GCTGGAACGTATCATTTCTCTATTCTGGGATTCGATGATATCCCAAGAATGGCTGTTTGT -GCGTCGATCAGATGTATGCTTGCAATTGAGTATCACAGGACCACTGAGCTAGGGTGGACG -AAGGGCACTCCTCAAATGTGATATAGTCAACTCGTCATCGTGCAACGGGAAGACCCAAGC -TCCATCGAGGTTCCAGATGGAGGAAGAAGCTTGGGAAAGGGAATAGTaggaagtatggag -taggaaggaagtaaagaaaggtaagatatcgaaggaatggaggaaagcaaaggaaaTGTG -ACAGTTAGGAAGTGTGGTGGGTAGGTAGTAAAGGAGTCCAGAACTGTGACTTGGAAGGCC -CTGGCGTTTCAGGAATCTGTGCAGTCGGTGAGCCTTTATCCCACTATTCGCCGAATTACC -CACAGCTATTGGTTCAATGGTCTCTTAGAAGTGACCAAGTAAGAAGTGACAATTGAGGCG -GTTTAGTCTCTCTTATGGCAAGTAAATGTTTGTCAAGTAAGAATAAGAAGAATGAAGGCT -AGTGTATCCACGTACTGCAAATCGAAGAGTGCTGTTTATTTCCTATCATATCGAGGTTCG -AGTTATCTAAAACAAGAAGTGATGCAGAGTTCCAATATATTCATCTACTGGGTTGCGTAT -CTTTTGCATCGGTATGACGATTTTTGCGTTGGAGAACGTGTTGGGTCTGGGGAGCCATAA -ATAATTTGATTTTGCTGGCCAGTTCGAAAGAGAAAATTGTAGAGCAACAAATGAAATGGG -CGATGTCATGGTTTTGGGTTGGGGAATATCGAAGTGGGTATTGGCCTATGCAATTTCATT -ATGCAAGTATTGTGTTGCTGATCCAGCGAGATGATATGGCACAGAGTTATATACGAGACA -GGGGAATCCCGGGCAAAAGCCACTAAAATCAAGCGAAGGTCAAATCATCATGGGGAATAA -AGGAAACATAGGCGGGAAAAGAGCCTCAAGAAGTTAGGACAGTGTGAAGGGAAAGGGTGG -AAGAGAGGGAAAGTGAAAGGCATAGGTCCAAGAATGGATGGAATCATGCCGAAAAAGAAA -CAGTTCCAAGGAAACAAAAACGAGAACGCGCTGTGTAAGTGGGGGTATAATAGATCTATC -GTCTAACCAAACATAGGAATATGGGATCGGGGGATGGACAAAGAAACGCGCGAATATAGT -GCACATATGGGAAATTGCAAGGAAGCTGTCGGGTTGATGGTGGTATCATTCAGCGATGCC -GACTTTGTCTGTGGCATCGGACGGCGAGCGCAAAGAGGAGGAGCGAGGAGGCGGGAGGGG -ACTGCTTTCAAGGTTTGAAATAGGAGTAAATGGCGTTCTGCTGCTTGTAGGAGTTTCAGG -CTCGTCTCTTAGACGATCATCGGTGATGACAACATCACCTGGTGTACCAGGTTCGTCGAA -CTCATCGACTTCTTCAATTTCTTCCTCGATCCCTTCGACATCTTCCTCTTCTTCGATAAT -CTGGCTCTTGCGTGCAGCAGGCCGCTCTGCCACAGCATCAAGACCGCTGGCATTCAGTGG -CTGACCGGGAACAGGGGAGTTAGCTACCCCACCAGACATTATCAAGTTAGTGGTATGGAT -CATAGGACCCTTGAAAAGAGATCTCTTTCGCCTAGCAAACACCAGGCTTGAAGAAGGCGA -GTAGAGTAATGGTCCCGGTACCGCCTCATTGAATGATGATGACATCTGACCTGAGTGGTC -CGCAATCTCTCGAGGTGCTCCGCCTGTCCCTCTTACCAATCGAATATTGCTCGGAAAGCG -GGATGAGGCACCTGGAGTAGTGGGGAGCACTTTGATAGATGAGTGGGGCGATCTGGAGGC -TCTGGCAGTGGGTGTCGAGATAGTGTCTACACTGTTGGAGGAAATCGAAGAAGCCTTAGA -GTGAGCACGGGCGTGATGGATGCTGCGGATCGATGATACCGAGGAGGACGTGGATTTGCG -TCCTCGTAGTCCGGCTGACGAACGTCGATAGGATCCACGTGGTGTGATAGGTGAGGGTGA -GAGTGAGTTTCGATGACTATTATGACCAGTGTTCAATGGCCGCGGCTGGCGAGGGTAAGA -GTGGCGATTTTGGCGGTATGAGGCACTGCTGGGGGCGAGGCTACTAGCATTAGAGATTCG -TTTAATTTGGGTGGGCGTCACGACATCCTCGCCTCCAATCTCGCTTGCCGAGGATGGCAA -GGTCGCAGTTTTCTTTCCAGTATCTTGACTCTTCGCCGGGCTCAGTTTATCGGAAAGAAA -TTTGCGCGAGCCTGTCGATTTGGCATGGGGAGACCGCGCTCTCTTCGCCGACTCTACAAA -CCAGTGCTTTAGAGACGCTAGCTGGCTGAGTATCGGATACTGGTGACGACGACTAGTGCG -ACGCCGGAAAATACCCTGATGCTGTGTCAACTCCGGCTGCGATGCGCTAACATGCAGCAC -GCTGGGCCGTGATCGCCGTCTGTCCTTTGGTGGGGGAATTGGGCGACGTAGTGTCGAGTT -GTGAGACCGAATAGAATCTTTGTCCAGCGGCAACGGTGGCGTGGTGACCTCAATTGGGAA -TACTGGAAGTGGTTCAGGTAGCGCAGGAAGATCCGGCACTGCCAACTGAGAGGGCAAGCT -TCTTCTATCCCGTCTCCCACGATCTCGCAACCCCGAGCCTTCTTCCTCCACCTCCATTAA -AGCGGATGATCGCTTCTCTAGACGACTGCTCGCAGCGCTTAGACGACGGAGGGTCTTTGC -CTCAACGTCCCGCTCGCGACGCTTGCGCTCTCTCTTTATTTCCTTCCGTTGCTCTTTTTC -AATCAGCAGGGCCCACCAGCCAGCTAGTGGGTCGCACCGCTGAGCCAGGACATTCTCAAT -GACCTCGTCAATATTGACCCCAGCGCTTTTCATCCGCTGCAACGTAGTCTTCTCTAGTGG -CGTAGTAAATGGCGAAGGTCTTGGAATTTTCAAGATTGCAAGCTGTTCAGGAGCATTTTC -CGCAAGGAAAGGATGGGCTAGAATGTCATCTAGGCTGGGCCGAATCAAAGGCCGCTTAGA -CAACAGAAGATTGATTAGAGCCTTAGCATCATCCGTGAACTTTTCGTTGAAGGTCGGCCC -GTCTGACAATATCCGCTTTTTGGTGATTTGATCGTCATCGTCGTCGTATGGCAGCTCGCC -TGCCAAAAGCGCATAGAGAATGATTCCTAAGCTCCATACATCCACTTTTTCACCCGCGTA -TTTCTCTCCTTTTAGCATCTCGGGCGCACTGTAGCATATTGTCCCACAGAATGTCTGAAG -GTAGCTAGCTTTTCCTTCATATTCACGCGTAAACCCAAAATCGCATAGCTTGACGTTTTC -CTGTTTATCCAGCAATATGTTTTCCAGTTTCAAGTCGCGATGCACACATGATTTACTGTG -GACGTAGGCGACCGCTCCCACCAATTGTGTAAATATCCGCTTCACCTTCTCTACCGGGAG -TGGGCCGTGTCGGAGGAGGTAGTTGTAGAGTTCATCGCCTGTGACAAACTTTGGTCAGTA -GGTTTCTCCAAGGGTCTTCTGCCATGCGCAGCCCGACATACCGGGACAGTACTCCAACAC -AAGCCAAACAAGACTCTCGGTCACCACCACTTCGTATAGCCGCGCAATATGCGGATGTAG -GAATTGGCGATGGTGATGGATCTCGCGCACGAGATTAGTGTCCTCCTTGCTGGAAGATTT -AAGCACCACCTGCCGACTAGTCAGCATTGCCCGGACAACTTTGTTCTTCGTACGACATGA -CCTACCTTGGAGCCATTGGTGAGCTTATGCGAGGCGAGATAGACCTTGCCGAAGGAACCT -TTGCCAATCAACCTTCCCAATGTATAATTCCCTACCGTCCGAAGGTCCTTGGAGGAAAAC -TCCCTGCGGACAAGCACCACGTTAGCGCAACTGGCTATCTAGACCGGAGCCGAAGAGGTC -GCTTACTCGAGAAGTTCATTGTAGGAGTTGGCAAGCTGTCGAGAATTTTCGAGTTAGCCA -TATTTCCACGAAGTCTGCGCAAGGCTCGAATAAGGGAAATCGGGGCTCCCTATGTTCCCT -GATGACACACCTTCTCCTTTTGCTTCTGGCTTTGAACCCCCGCTCTTGTTCGGGGCGGCA -TATGTTATGATTGCCAAGTTTCGCGATCGAGAGAGTCTCTGCGCGTCAAGGGGTGGAAAT -GGTTGTGCCGGGGTGCCATTCAACGGGATCGAGATACGCGGAGAAGCGGTATCCGGAAAG -ATCGAGAAATTTCAGAAAGGGGTAAGGCTCGGAAAGATGGCCGGATAAATTGCAAAAGAT -GGAAGGAGGGAAATTGCATACACAAAGGCCTTGAGAGATTCCCAGACGTCAACGGGAACG -TGGACCGTGTGGAGAACTCGGGACTTGAACGGCTCTCCTTTTTTTGGGGGGCTTCTGCTA -TTATTATCTCTATGTACAACATACAATCTCCCGTTATATATTTCAGGTAAATTATGGATT -CCGGACTCCGTTCCTGTCTAGGACTCTGGGCATGACGCTGTTTATAGTCAGCAATTCCTC -AGACCTCAATTGGCTACAATGCCACAGGCTTCGACTGTGAGCCCTGAAACAATGGCCAAA -CTGATCATGATCGTCATAATCTTAGGAGAATATGCACTTGTTTACTGCCTTACTCCCTGA -ATTCAACATGATCCCTTACTTGTGTATTGTACATGTACAACGTACGACACCGGCGCCCGA -TTAATTTAGTCGACTGCACCTATTGGCTGGGACATGCTGACATGAGCCAATTGGAGAAAT -GGCTGAACAACCATCGTCATATTATATGCACTGGTATTATATGCTTCATGATCGCCACCA -AGGCAGCGATTCAAGATGTCGGACTGTCTGTTCACTGGCACCCATCGTGGGATCGACGGG -GCTAAGTCCCGGCTCACCCTGGAAAGCCAGAGACACCATGCCAAAGTATGAGTGAAGAAG -GTCTATAGAGGTTGATCAGCAACACATTCTTGATATCAGCACGCTATCAGCTTACCAGGA -GGATCCCCGGGCCCCTTCCCAAACCCGCCGACAAGATGTTGGGTCTTTTCGAGCAAGTAT -TGACGATTGCGAACTTCATCAACCACGGAATACTGATCCAGCATCTGATATCCGCGTTAG -TAGACAATCACTGCCAACCAAGGAGTCGCTTACCATTAGTGTCGCACCATTCCAGAATGA -ATAACACGTGTCAGCGTATTTATTGCATCGACCGTTGAACCCGGCGCATGATATTGTGTC -AGCCCCGATGGGTTGAACATTGGGTAACCCGCGGACTCTATCGTCGACAGATCCCGTCTC -TGCCACCTCGCTCTGTTCATCATCTTCATCATCTTGAGGTTCCTCTATCTGGGCTGTCTG -CCTTGAAGCCAGCCATGTAATCAAAGCCTCGAATTGATTGGAACCTGCCGATAGTAGGGG -CACTGATTTTAGATCATTAGAGGTCCGGCGCAAAAAGTCCATAGAGCTAATCGCGCAATA -CGTGTGTCCACCTATAAGTTGTCAGATGAATTCAGGTGGCAGAAAGGGAGACACAAACTA -TGCGATTCACAGAAAGGAGACTCAGCCATGCCACCGTCGTATGTCTGTTTCGTCAGACTG -TCAGTATTTCAATCGCAGACGGATCATCAAGGTCTCCACATACCTGGCACGCCTGGATAA -AAGAAACAAAACTTAACACATCAATATCCGGTACACCCTCAAGCCCTGTCTCATTCCGCC -CGCGCAGGATATACCGTATACCGGCAGCACAGCAGCAGTATCGCAAATCACGAGCTCCCT -CTACGTGCCCCTCAGGACCGACAAGTTCACCGAAGCTCCCGTCAGCACGTTGGACTTTGG -GAAGCCACTCAAGGCACTCCCGTCGCTTGACACGCGATAAGTCATCCCCGAGGATGAGTA -AGTTGACTAGCGCAAAGAACGTTGCGGGGACATTGGCTGGATCCCAAGCTTCATTATCCT -TATTTCGCTTTTCTGGTCCAAAATCCGTGCCTGGGAAGCCACGGAATCCACCTGAGGGCA -CCTGGCAGTGGTAGAGCCAGTTGACGTATCCTTGTCTTTCTTCCGGCGTTGTTTTGCTCT -GGAGAACGCCGAGCACGTCTAATCCTGAGAGGGTCAGGTAGGCGAGGAGCATGCGATTAG -AGTCATTGGAAGTGTAGGCGGATGGGAGGAAGGTCTTCAGGCATCGGAGGTAGTATTTGA -CATGGCGCTCTGTGTTGAAGATTGGGGTTGGGTCGTTCATTGTAGAGGATGGGCGGGGTT -AGACCACTATGTAGTAGGACACGAACTGGTCAATATTCCAGTAGCACTTATTTCATTCAA -TCCAACCAGCGTGCGTCAAGTCGTAAATCAAATCCAATCAATCAGCGCAATGTTGTGTTA -GTGGGGGAATCTCGACCGGAGCTGCTTTTTCCCCACATGTGTCGCGGCCAAAAAAACACT -CGGACCAACTGTCTCTGTTATTTCCGTGCCTGGTTGGAACTTCTCTTGTCAACAAAATCG -TGCCATTGTGACTCGCTCATCATGGTTTTTATTTCATCCAAGACTCTTATCCAGGCACAT -GCCTTGTTTCTGGTTGTCATCGCGGGTTACTTAATCAAGAGCCCGGAGGTGATCACTGAC -TGCGATCTGGTGTTTATGATGGGAGAAGCCTTGAAGATTGTACGACACCATCATCCCCGA -TCCACAAATTCATCCCAGTCATATTGGCAATGAATATGCCTATATTGAATTGACTGCTAA -CACATACGACACAGGACTTTCCATCCCTGTCAAGCCCACAACAGTCCCCCTACACCTTCT -GTGCCATACTCATTTTCGTTGAGGCCCTTTCTGATATTGTCCTGCTGTCCAACATCCCCT -TCCACGAAGCTCTCGATGAAGCACTTCCCTATATTAGGCCCCTGCGAAATGGCAACCTCC -CTGCCGAGGATCTGCAGGTTCTACAGCGTTTGCCCGAATATATCACCAAGTCCTTGACCG -TCTACTGGAGCGTCTGGATCGCAGTTGCTGCATCTCGCTTTGCTGTATATGCTGGAATTG -CGTTCTTTATCTACCAGGGCAGAGGCGATCACCTTGTCTCGTCTTACACAAGTGCTGCGG -GTATTGGAGGCCTGGATCGCCTGAAGAATCGCGTGGTCTTCAGCTTTGCTTTCTTCGAAA -TGATGTTTTGGTTCTGGGTAAGTGGAATGTGTCTTCACCAGGCAGGCTTGGTCCTGTCGA -CTTTTTTTCATGCTAATGTGATACTCTACAGAATTTCGCAACTCTCCGCGAGGAACGCCA -GGAACGCTTGACCAAGCTTCTGGAGGATACCCGTGAAATGTGAACATTTCCGTATCTTCC -TTCTACTTCTTTCTATAAGTGTATTATTACTCTGAAAAATATCTCATCGCTGTTGAAAAG -CGATGGCACTTACACACCGTGTACTATATCAATTAAAACTGCATCGTGAATAAAATAAGA -CACAGGTGCATCTTCAGCCAGTCCTCGTGTTTCATTTATTCCATCGTCATGACGACATTT -TAATGCTGCTAAACATACTATCTCCCCTGCAATCAACCAGCGCGCTTCATTCTAGATCAG -AATGGCGAAagcagccgcagcaccagccgcagcaccagccgcagccacaagcGAAGAATC -CTTGTGCGTCAGTGCAGCTACGCCAGTGCTAGTCGCTGCGGCAGAGGCAGACGAGGCAGT -GGAAGCACTGTTTGAGGTCTTGGTAGAAGTGTGGGTGCCCGTTAAACTAGTCGAATCCTC -GGTCTTAGTCGCAGCCGAGGCCTCGGTGCTAGACTTGGTTCCAGTCTCTGTGGAGTCGGA -ACTCGTGCTGGTGCTGGTGCTGGACTTGCTGCTTGAGCAGACAACCTCGGTGGGTATATT -GGTCACACCACCCCGGGCACAGATTGACTTGGCGACCTTGAGGGTTTCTATGATGAAGAA -TAACATATCAGCCTATTGTACATGCTCCACTATCCATTGTGCACAGCCATTTGGAATAAT -CTTGCACTTGCAGACTCAATTGGAGGTAGAAAGGAGGGAGTGGAACTTACTATCCTGGTC -CTCCTGGGAACATTTATCAGCGACGCAGCAGGCGACATCGCCCAGGAAGGTCTTGGACTC -GCAGATGCACTTGAAATTAATGCCGCAGCTCTCGGGAATTGAACCTGTGGCACATGTTTT -CTATAGAATGGGTTGTTTAGTAGGCGGCACATTTGGCACATTTGGTAGTTGCATGTTGAC -TGTAAACACATACCGAGCAGTCGGGTAGATCACCCAAGCCTTGAGCAGCGGCTGTGGTCA -GCAGAGCTGCCAGGGCGAAAGTAATGGTGAACTTCATCTTTGCAGATATGGGGATGTTGG -ATGAAGTGAATTATGGGTAGAGGAGAAGAAGTGAGAGGTTTTGTTCTCCAACTCAGGTCC -CCAGGTCGTGGTAATACACCAATAGATAGGTCAAAAATACACTAATACGCAGATCAATCA -TGGGTACAGTATGTATTTCCCCGCACAACCTTCCAGGGTTGGGTTTCTCCACAAAATTCA -GTCTGTATCGAGGTATTTTTGAATGATTGGACATTAAATATCAGAACTCCTGGGTATTTT -ACATAGGTATTTAACTGCTCCACCGCACGTCTATCTCTCCCTCTTCTATGTCAGGTAGTC -GAGATACCTACCGGTGCAGATGAACATTATAGAAAGCAAAGATGTCCCCACTTCTCAGAC -TTTGAAATCAATAGCGCCGGTGATCTCCAGGACATGCAGCCGATATTGAAACTAGCTGCA -AAGTCCCTCACCGACCTCGTTCACATCATGCTAGTATGACTCGCCGCTTGCACCGTTGTG -AGGCTGGCCAACTATGGGCGGATAACCGTAGACAGAGGCGTCTTGCTCAGACGGGTCAAG -ATTGAGATTCGACATTGCGTTTGCACCTTCCGCATTGGTGAATCCTGCTGGACGAGCTAG -TCCAGCTCCACTTCCGCCCTGACCAACAACCGGTCCTCCAGCTCCATATCCAGCGCTTGG -AGGTCTCTGATTGTCGCCGCGGGCCGACTGGGTTTCACGGTACTTGGATAGATAGATCTT -GAGTGCTTCAGCGTAATTCTCGAAGCCAAGCGAGGTCATGGCAAATAGGATGTCTTCGCC -GTTGACGGTTTTGCGCTTCTCCTGTTGGCATTTCTCCGACGCTTCACTCGTGATAAATGA -GATGTATTCGCTCACACATTCCTGCATGCATTCCTTGGCTTCTTTTGCGATCTTTGCATT -GTCTGGAAGGGCAAGCTTCATGATACGAGCCACATTGGCAATCGGGAGCCACCGATCCTG -TTCCTTCACCTCAAACTCGCCTTGGTGTCCATGCGCGTCGTGATGATCCTTGTCTATGTG -GTCTTGCTCTTCTCCCGATTGCGCCTCCGGGTCAACATCGGGTTCCTTTGAAGGGGAGGT -TGATGACATGATCAAAACCGGGCTGGATAGATGATGTGGGTCTTTGGTTTTGTGTCAACA -CTTATGCTTCGCGTGCGCGCTGCGCAGTAGTCTCAAGCGAGAAGACTATCAGCTCAGTGG -TGGGGATTTGACAATAATTGCAGCTGTAGCTCTTCAGGGATGAGGGTTGACTTCAATGTT -TAATGCGATCACGATGTAGCTGCTGTGGAAACCGTGCGCCTGGTAGCTCGGAGTCTAGCT -CTGGAGGCCATTGATGGTGATGTGACACCGATGGTCACATGATACATCACATGACCGTGG -ACGGTCACGTGCCGTCAGAGATGGGGGCACGAAGTACGGAGTAAAACCCGGTTGCTTCTC -TGGCCCTTTTTTGTATCGATGGATAGTATAACGGCGATGCCTACTCTTAAGAGCGAGCCT -ACAGTCTTGGGGCCTCGACCAGGGGCCGTTGTAGACCCGATTGATACAGGTTCAAAACCT -CGGACCATCTTGTATTGGTATTTCCTATAATTACCTAGCGGGCCCCGGCGTAGCTGTGGA -TATTGCTATTTGGTACTGTGGCATTTATCTGCCAACTATATTCCCAGTACAGACCTCCCT -TCACCTTGATTCCAACACTTTTCCACTTCCGCTTCGTCTGTTTTCTATATTATGACGGTC -ATTCGGGTTGTACGGCTTACACTCTATACAATCACAAGGTTCGTATGGGAAATCTGCAAG -CCTCCGGTCAGCATTATCGATTCTACCCCCTCCACAGTGGGCCAGTCCCTGATCCCAACA -CCAAACTTGTTTGAGCCTGGGTATGAAAGAAAACCGATCCGAGCTATTTCAATCTTGCAG -AGATCTCCCTGCACCGTGGTCGAAAAGTTAATGCTTATGTTGTAATGAATGTGATGCCTC -GGTCTAAGTGAAGCGCCGATCCTAGCTTTTCCCCCAAATCGACTTCGTCAGTGGCTCAGC -TTTCCCCGCGTTCTGGGCGTATCGTATATACTGCACACCTACTAAATTATAAAGGCTTTG -TGGTTGGTTAAGATGTTGTCTTGAATGGAGTCTTGAATGGAGCCTTGGATGAGGTATATT -GTACAAGAGTTTGAAAATTGCCTCTATCACATTCAATACAATGACACTGCATGCAGGTTC -ACACTCGGGTCTTAACATATCGACCGAGCTGCTTATCTTTTCCAAGATTAGTTATTTTTT -AGGACTATAATTTAGTCCTAAAATTTGAATACATCGTTATTTATATCAACTATTCATTCC -TATTGGCGATCATTTCCACGTAGTTTCCTTCAGATGGTCGAGACTTGATCTGAGTTGGCA -TAGTCCGTACCCCTCCTGACTTTGTTTTACTTTTCTATTTAGACCGCTGCATACAAGCAC -ATCACCCCTAAGCGTCGCGTCGTTAGCTTCCTGCAATCATGGGTCTTGGAGTACTGGAAG -ATACAAAGCTCGCGCATGTCCCAGGTAAGATTCCCTAGCCTTTTTCTCCTAGACGCAATC -AAATCTAGCGTGGCTCATCGCATCCCCAAAATTTATAGGCACCTCCGATATCTACGAACA -AAACAGCGATGCGAGCGAACATATCCCAGGCGCCCCAGGCCTCAAATGTGACTGGTCAGG -CAAACAGCCGATTATCCTCGTCCCGCAACCGAGCGATGACCCAAACGACCCATTGGTATG -TCCCAGAGAGAATTGCGCCCCAAGGCTTAGTTCCACAATCACACAGAAGCTGACCCAACT -GATCCTATTAGAACTGGCCATTATGGCGACGCGATATGATCCTTGCCATCCTCTCCTTCG -TAACCATCCTCTGCACAACCCTAAGCTCCATCCTAGCGGCCAATACCGTCACCATCGCAG -ATTATGAGGAAATCACGTTCACGGCAGCGGCGCTGTTGACTGGATATCACCTATGTGGTG -TCGGTGTGGCGGGAATACTTATTGTGCCAACGGCTCGAGTCTGGGGAAAACGTCATTTGT -TCCTAATTGGGCATGTCTTGATGATTGTCAGCTGTATCTGGGCTGGAGTTAGTGGAAAGA -ACGAGAATAGTCTCATATGGGCACGGGTATTCCAGGGTGTTGCATTGGCGCCGTTTGAGG -CGCTGGTCAATGCCTGTGTTGGAGATCTTTATTTCGTCCATGTGAGTTGGGTCCTACTTT -CGTAAACATTTGGGCAATGAGGAAATTAATGGAAGTAACACAGGAACGAGGAAAGCGAAT -GGCTGTATCCAATGTCGCACTCTTTGGCGCGGCCTTTCTGACACCCGTTGTAGTGGGAAA -GATCACTAAGTCCATGGGATGGCAATGGTCATTTTACTTTGTGGCAATCTTCCTCGGCGT -CTCATTACCACTGATGTTCTTTTTCGTGCCTGAAACGGCTTACAGGCGTCCTGATTACTT -GAACACCGACTTCAAGCGATCCGGTGGACAGGGTGAGTCCATGGAGTCTCATGCTTCGCT -CAGCGAAGTCTCCAATGATGAGACCAAAGAGCTTAGACCGGACTCTGTCGGTATGAACGG -AATTTCAAGGACTGAGGCCGCGACAACAATCCCCGAGAAAGATTCGTTCACTAGATCGCT -CAGGCTTTTCAATGGACGAAAAACAGATGAGAGTTTCTTCAAGCTGCTCCTTCGTCCATT -TCCGTTGTTCTTCCATCCAGGCATCTTCTGGGTAGGCAATACACTTTGCATTTAATCTCA -CATACTGATGAAAGTCTAGGCTTGTCTGATCCAAGGCGTTGTTATCGGATGGGCGGTCTT -CGTCGGCGTAGTCCTAGCAATCGTGTTTCTCGGACCTCCAATGTGGTTCGAAGAAGACAA -GACTGGATATCTTTACACAGGTGCTTTTATCGGTTCCGTTGTGGGCCTCGTCCTGTCTGG -TCTGCTTTCTGATTCCATCAACAAGTTGATGATCCGGCTCAACCACGGCAAATACGAACC -TGAATTCCGGATCCTTTTGGTGATCCCCCAGCTCGTTTTCTGTGGTATCGGACTCTATGG -GTTTGGATGGACCGCAAACAATGTGATGCACTATGGCTGGCTACTCCCAGATGTCTTCTT -CGCATTTTTGATCGTCGGCATGGTAATGGGGGCCGTTGCTGCTTCACTCTATATCGTCGA -TGCCCATCGTATGTGCACTATGAGAGCTTGCATGAGAATAATTGCTGACAGTCTTAGGCG -AGATGGCCATTGAGGCATTTACCTGCATGCTGGTTTTCAAGAATATGTTCAGCTTCGTGC -TGACCTTTTTCGCCTACAAATGGTTTGCCCATGGAGGCATCAAGCATACCATGATTATCA -TCGGCAGTATCCAAGTGGGCATCTGCCTGTTAAGTATTCCCATGTGTAAGTTTTGCTCAA -TATGCGGTATTGGTATGTTTTCTACACCCGACTAATCATTGCCCGGTATAGATGTCTTTG -GGAAACGGAATCGGTCCTTCTTTGCGCGTCATGACATCCTCGAGATACTACATCTCCGGT -AACATATGCTGGCAATACCGGAAGTCTTGAACGTGCAGGGAGCAGGTATCTATGAGTTTA -CCCGGTATGAGCAATAGACCAAGCATATGCTATGAGACTTTGCATGTGTTTACACAATTC -AATATCACAATTTTCTGAATCACAAATATCATATTCGCACTCGAATTCACTAGGTGATGA -TTAATTAGCTGACAAAAATGGACACTTTCACAATAAGGATGATTATTGACACCATGGGAT -TCCTTGTTCTCTTCGGCATTCTATGATATGGTATAATGTTGAGTGTTTTCGCACGAGAAA -TATCAAGATGCAGCTAGTTGTCCAAGTCCTATGTAGGTCGTAAAGTAGCACGTCTAGAGA -TTCCATAATTTCTTCTAGACCTTGGGTCACGCGAAATCGAACTCTTGAGACTGGACTTTT -CGACATCTGTATGTGGTTCTTTTTCGGCAAGGATAGTATCGCATATTAAATCTTTCGTGT -CCCGGAGAAAAATGCTCAAGTTGAATTCTAACATGAAAGAGAAGATGCCAAATATGGTGA -TTGGGACTGTGCAAACGAAGAAAATCCAGATCTTCGGATGGGTCTTCCAATGTAACTGTT -GCGTTTTCTCGTCACTGTCGAGATTGAAGAAATTCATTCCGAAGAATCCCTAGAATGATC -AGTTTTGCTCAAAAGTGCAGGTTGGTTTGGCCTACCGAGATGAAAGTGCCCGGTAAAAAT -AGTAATGTTAGGGCCGACACGGTGTCGGACAATATTCTTCCTTCGTTTCGAGTTTGTTTG -AGAATGCCTTTCGAGATGTTACTATCTCCGACGGCTACAAAGTTGGAGGCCTATTGACTT -TAGCATGAATCTCACTCTTTTTTGGGGCAGACATCACTCACGCATTTGATCTCATTATCC -ATCCGAGCTACAAATGCTTTGGCTCGGAAATTTAAATTGGAAATTAGGTTTTGACGAAGC -AGGAGGGCTTTCGAGAGTCGCTTTGTTGCTGCAGAAGGACTGATTGACTGACTCCACATA -TCATGGTCTCGCACGATTGCGCCAAGGTTGTTCGAAGCCGCGTCCATCGTCTCGGAGATG -TGGATTACATGTCGTGATAATTCGTGAAGCTGGTTATATCGTTCCATGAGGACCCTAAAT -GCTTCATCGTCGTCGTCGGAGCCAATTTCTTTGACTGGTTCTTGGCGGCGCCGCTATGCG -TTGTTAGTACCCGCAATCTATATCATTAATTAGGGCAAATACCTCTTCTATATCCCGCAC -TGGCTTCTGGAAGCTCCACAGATCTTCCTCGCACTGGTCTACCACAATACTTAGGGCAAA -GTCTAGCATCAGAAATGGGTCTTCGTTCCATGTTTTGAGGTCTGCGCTATGCAACAAGCG -TCTTAGCCGAAGGACAACCTCGTTATCGAGATCAAAACAAAGGAGCAGAGTGGTGTCGTT -AGATCCGTCCGTTTTGACTCGGCCCGTCTCTGTCTTTGGTCCGAGACAAGACCGCCACAA -GGTACAAAAGGCCATATAATGCCAAGTATAAGTATGATCTGGGATAGAGGTTGAGCGTAG -ATCTGTATTCTGCACACCCGGTAATCTTGCATGATCTTCCTGGTTTTTTGGCTTCACTTT -CTTAGAAAGAAAACGTGAGAATGTTCCTTTACACTTCAGGTTAGTGAATGATTGAAATTC -TTGCCATTGCCTCGTACCATAGCTTGCTTCCTCGTCACTTTCTTCCATATTTTCAGTGCT -GCCAAACATTCCATTTGCGTTCCAGCCCAGGTCTCGAAAGAAGAATTCTGGAAGGAAGAA -TTTCTTGGCTAATTTTCTGCACAACTCTTCACTCCCCTGGATTGCCAACATATCTTTCAC -TTTTGGCATGATACTGATATTAGCATTAGCAAGTTTCAAAAATGTATCGACTGGTTTCAT -TCACATAACATGAACACCAGGCGTTCTATAGAACTATATTGCATCTATCAGCACGTGCGC -GACAAGAAAAGCTTCGATTCAGTTGCCCAACATTTTTGGCCAACAATTCATCTTGGCTTA -ATTCCGTGGGCTCTGGCAGATCTCCATTATCTGGCATCGATTGTTGGGGGCTAAGTGCAG -GTGCGGGCAATAATCGAGTCTGGCTTAAATCCTCGGGCTCGGTCACATTTTCAGCCTCCG -GCATCGATGTTTGAGGGCTCAGTGCAGATGCGGTATACCGCCAGTAATTAATGCTCACTG -AGCGATTCGCCGTCATTGTCGAGGGACCGTAGGAAGTGCTGCTGGAAGTTGCCTAAGGAA -ATGAGAGAAAAAAGCGTTCCCCAGAGCCAATTCAGGTCAGCCTGACATTTTATGTTCAGG -GGAAGGCAGATAATCTTTCCCTCGAAAGGCAATACAAGGCACATGCGCCATACATATTGC -ATGAGGAAATCTCCGATCTGGTTGGCTGTGAGGCTGAGCTGCACTCTTTAGTGTGTATTT -TGCTTTTTCAAATGGCCCCCAAAACGCGTGTCCATAGCGGGGGCTGAGAAAAGGTCAAAA -TTTTTTGCAACATTATATTTTATATGGAAGCTCAAAGTTTCGAAGTCATCTTTAACTACA -TATGGAGATGTTGATCAAGGGTACTGGGTGGCAGAGTAGGGTCAAAGGGACAACCGGGTT -CCCACAAACAAACGCTTTCATTCGTCCAAGTTGCTTCTAGCGTCAAGTCGGTAAGATGGC -TAGAAGTTAAGTCTTCAACCGGCAAATAATATTTAATGCGACATCCCTTTGTCACCTGCA -CTGTCCCCCCCTGTCCCAACCAGACAAAATGGCTGTAAGAATTGACCATGACTCTCTTCA -GGTGGACAAACACCTGATGCACGAAACTCTTGAGGCATACGTCGATGGAAAAATCCATCC -CCTCTTAAGGAAGCGTGTCTGGCCGCGCATTGTGGTCCGTGGTCACTATGCCCGAAGTGC -TCCCAATGTAAATATTTCATCTACTGAAAAACGGGAAAAGCGATTTAATTGGCACCGACC -AACAGCGTTCATCGTCAACCACTTGATACTGGAGATCAGGTGTTTCCCTGGACGAGACTA -CGTGCAACATTATGCCCTCCTCATCGCATATTACCTTAGACTGAATTCGATCAAGGCGAC -CGCATCAGTGGTAGAATATACTCTCCCTTCTTCAAAAGCTTGTATGGATATTCTCTTGCA -ATCAAATTTGCAATTCATGGGCGCTGTCGACACCATCATCCTTGGGTATGTGGATCACCT -AGCCAATGAGTATCATACCTGGGAGACAGGGACAGATCTCCCAAATCAGTTATTTGCATG -GAAATACATTCAATTGGCCGATGGAAGCGTTGTTGCCTTTGTGGGTAGCATGATGAGCCT -TTGGGGCGATATCATTGGAGACGTCGTTCGCACAATACGAACCAAACACAAGATATCCAA -CGTTCTTTACCTCGGCAAGGCAGGCTCATTGCGATCTCAAGATGAGCCAAATAAGGTGCT -AGTCACTGGCGAGTCCAGTCGCGTCGATGGCAAGCTTCTTTCTTGGGAAAGCCCACTCAC -CAAGACACTCTCGACAATGGAAATCCCAGACCTCTACCAGGGGACTCATGTCACTGTGCC -AAGCCCATTGGTCGAAACTGAGGATTGGCTCACCAGAAACCAAAAGAGAGCGAACTGGGT -GGACTGCGAGGTCGGTTACCTAGCAGAAGCCTGCCAGGAAAACAATATCCAATTTGGGTA -TTTACACTTCATCTCAGATAATGTGGCACAGAAACACATGTATAGTCTTGCAACAGAGCG -CCAGGATGCTGTTATAAGTGCGCGGATGGATATTTGGGACAAAGTCCGGCAAGTACTATC -CACACATCTAGGTATAGAGGACTTGGGAGCAAGCATACAAAGGGATCTTTGTAAAATATA -GATAGACAAGCATTCAAGTTCCGTTAGAATATAGTATATCAATATCAAGGGTAAAAGCTT -CTAAGCTAGGGTAAACGTACTTTCTGATTTAATTATAGCAAGGCCCATCTGGACAGGACA -CCAATTGCCAATAGCAATGGGGAAACTCGGTGCCCACTCCGCCAATTCTCGAGGTCATAC -TTTCGATGAATATGGCAATACCTAGAGGAGGCATTGAAAGACATCTATTAGTTTGGACTG -ACTCCCTTGGATGTATAGTGTAAGAGGACTGCACATTTGCATACGGAGTATACTTTGCTA -CAAGGTAGTATTGTGGAATCTCATCAAGGTGCAGATAGTTCTATCTCCTGGCTACAAATG -CTGGGATTACCAGCTATCCCGTTCTCATATAACTACAACCTGAGATAAGACCGATTATAT -CCGCAAACACCTGGAGCTAGCATGTTCTAGAATTAAAAAGCTTACATTGTAATTGATCGG -CAATTAGAGTGCACCAATCGAGTTGAAGCCTTGCGGGTTCCGAGAGGGTCCTGATAATTT -CGGCCGACAATCATCGGCTTTGTTTATACTGACATTTACCAGCACTTTAAAACCCATCGG -CGTTAATATCATGCAGTTTATTAACTCCTATCCCCGTCGAATGTCATATATTTCTTTTGT -TCTACTGCGAATGCTGCTGCATTCTTCGTTGTGATCAGCTATAACTATAACCAGGTCACT -CAGCCCAGCACCACCTAATAGCTCGGCACGCTATCAGCTTTATACAATGACCACGACATT -GCATATTCCTGCCTCAGACAGTGCCAAAGATATTGGTCAGGACGCATCATATCCATCAAC -CACCGAACATCAAAATGTGAGCCAAGCAAGCGAAGACTACCTCCATCAACCCAACGAAAA -GGATCACTCAAGTGATCCGGGCGGCAGCGACGACGATGTACGAAAAATAACGGGCTATCG -ATGGTTTCTAATTTGTATTGCTCTTTACCTCTCAGCATTGATGTATGGACTGGATACAAC -GATCGCTGCGGACGTACAAGGGGCGGTCATTCAGAGGTTTGGCCAAGTCGACCAACTCGC -CTGGATTGGGGCCGGGTTTCCTCTTGGATCGGTCGCTGTCATTCTCCCATACGGATTTCT -TTTCACCACTTTCAACATGAAATGGCTCTATATTGCCGGGATTGTTTTTTTCCAGGGCGG -GTCTGCTCTCTGTGGTGGCGCTCCAAATATGGGTGCCCTGATTGTTGGACGTGTGATTGC -CGGTGCCGGAGGGACTGGTATCTATCTGGGAGGACTGAATCACTTCTCAGCCATGACGAC -TCGAAATGAGCGTGGGGCGTATCTTGCTGGTACTGGGTTTGTCTGGGGTTTGGGGGCCAT -TTTGGGCCCTGTTGATGGTGGCGGGTTTTCTGATTCATCTGCCACTTGGCGATGGGGATT -CTACATCAATCTTATCATCGGTGCTATCACTGCTCCTGTCTACTTGTTCTACCTTCCAGC -CATTCATCCTGCCCCTGGAAGACCACTAAAAGATCGATTTCTAAGTTTGGACTATGTTGG -ATTTGTGCTTAGCGCGGCGATGTGGGCGACCTTTGCGATGGGATTCGTCTTTGCCGGTGG -TATCTGGCAGTGGAAAGATGGTCGCACTATTTCAATGCTTGTGGCATGGGGAATTCTTCT -TGCCCTTTACGCACTGCAGCAGACCTTTTGCATCTTTACCACCCCTGAACATAGATCATT -TCCCGGACATCTGCTCAAGTCTCGAACCCAAATTTTGTTCTACATCACCACCACCTGTGC -CAACACCTGCATGTTCTTTACCACATTCTACATCCCGATATACTTCCAATTTACACGAAA -CGACTCATCTCTCATGGCGGCAGTCCGACTTCTTCCATACCTCCTGTTCACAATCGCGTT -CAACCTTGCCTCCGGCTCGGCTCTTCCAACAATAAAGTATTACATGCCCATGTGCCTGGT -ATCCGGTGTCTTCATGACCCTAGCAGGTGCGTTGTTTGTGGCATATCTTTCCCCTTCGAC -ACCAGCAGGCCAAATCTACGGTTTCAGTATCTTGATGGGAGTCGGGACCGGAATCACTAT -GCAGCTGGGATACTCTGTGGGCAGCCTGAAAGTCAAACCAACCGATGCCCTCAGTGCTAT -CAACCTGCAGAATGTTGCACAGATTGGCGCCACAGTCATCTGTCTTGTCATTGCCAGCCA -AGCGTTCCAATCAAACGCCATTAGCAACTTGAGTCATGTTTTGGCAGGCCAAGGATTCAG -TCAATCGGAAATTCAAAGTGCCGTTGCTGGATCCCAGAGCATGCTGTTTGAGAGGCTTAG -CGGTGACTTACGGACTGCAGCTATTCAAGCAATCACTGCGGGGTTGGCAAAGGCCTTTAT -CATTCCACTTGTGGCTGGCGCATTAGGGCTTGTTTGCTCTCTGCTGATGAGTCGAGAGAG -ACTATTTGGATAAATTGTGGCTCAAAGTTCAAACTGTACAAGATCGCATGAATTTGTTGA -AGTATGATGGTTCTCCTAGTAATATGTTGGAGATAAGCAACTTCTTGCTTCGAGGAGAGG -TAAATCGTGTGTACATTGTTCAGTTACGAGATTACATCTTAGACTATCTGCGTAGTTAGT -ATTGTTCTTTCGTTCCATATTGGTATCATCTGGATTCATGTTGCGATATAGTCGATTCCA -CTGGTGGCTGAGTGGCGTGTGGTTATACTCATTTAATATTTCGAGTAGACCGCCGCGAAA -CCCATAAATGGGCCTCGATCCCGGGCCCAAAGCCCAACCAGCAGTAGGCTATGGGGCCAA -CCTGAAAGACCTCGACCCGAACCGAGAGCCCATCAAAGGGGCCTATAAGTTCCCCATCGA -AGGATTCACATAGAAAAGTCGCTTGCACAGAGCATGCTTTTTCCGCCTTTTTCTGCAGTC -TACCCAATAAGACTGGAAAGGCGGGTTTGAAGATATCTAAGAAGTTTCTGGAAAAGACCA -CCGAAAAGACCACTGAAAAGACCTGCAAGACATCATAAGGCTGCATAAGCGGTCGTCTAC -CCTTGTTACACATAAATATGGTCTTATTATACAGAAAATAGCTGATTGATATAGGGTTTT -TCACGTATATTCCAGCCTATACATGCCGTGATCGTGGCGATCTTACCCACATGAGATGCC -TCTATTGCTGCAGTCCCTACCTATGGACCGGCATCTGTTTCCCTTTCTGGATCCTCACAT -AACCTACAATTTAAGGCTTGAGTAGCATTCAGAATATCTAAGATTACTGGCAAATTGTGC -ATTACTTGAGAGCTGTGACTTGGCATATTTGCATTGTTTGGCGCCTATTGGGCCTTATCA -GACCCCTATCAAGGGCCCCGTGGGCTTTTGCGCCGGGCTTATAGCCCAGACTATTTGCTT -GTTGAATTGGGCTGGCTGTGCACGGGCGACTACGTTGTGGCCTTGTGGGCTTTGCGGAGG -TAGGACTGTAGGAGGATGACACCGCCTTTGACCATCTGCTGCTTCTTCTCTTTATCTTCA -TCCTTTTCCTTCCCATAATTGGTATTCCTGTTTTATGAATTGGCTCATACAAAAGGGGTC -AAGAAGATGACTTTTCGCCAAAATTCTGTTCACAACCTCCAGCTGGGGGCGACTATTTGC -CCGGAGAGTACTAAGGGCTAGAGCGGCGAGCCGCCCACGCCCGAATCGCGGGCGCTAGAT -CATCTCCCGCCTACCTCTAGTAGCTCTAGGGCTCTAGACTCCCACGGCTCGCGTCGGTCG -ATTCCCACTCTTTCCTACGGCTCACACTTTCGCCATGCAAGGCCGAAAACGCCGCGTCAG -AACGTGTTATCCTTGCTATACGCGTAAGCAGAAAGCGAGTGCTATATGGCTCTGATCATG -ATGCAACGCTGATTTTGGTTTGGTTGTAGTGCGACCGCCAATATCCATGCAACCACTGTA -CCCGCCGTCGCCGACCTGAAGAGTGTGTCTACAGTTCTCCGCCGGTGAATGCGGCCCCCG -GTCCGTCAGTTCCTACAGATTATCTGGAGACCCAACCACCTCGACCGGTAGAAAGCGCAC -GGTCCATGCAAGGAACAGCCGTAGATGGCCCCGAGACTCACTGGCCCAGAGCGCATTCTG -CTTTAGCTCGATCCTTTGGTTATTTCGAGGATAGCAACTCCAATACAATGGCTTTGCTCC -GAAATGTGAGTCTCGCAGTGTTGCTGTCCTTATGGTATTATTTAGACTCATTCAACCTCA -CAGATGGAATTGCCTGATCAGTTTGATACAGAGCTTAAGGATTCCTCTTCCTCTACTTGG -GGCGCCATTCACCGTGAGCTTGATCTCATGCCTGAGCGCCAAGTCCTCGACTTTCTTGTG -CAGTATTTCGTCTTTGAGTTGAATTGGTAGGCCTATCTATCAGTTTGGCTTGGCTTGGCT -TAGTTCTCTTGTTCTAACCTCGAGCTCAGGATAAAGCAAGTCATTCACGCCCCCAGCTTC -TTGGCAACATACCAGCTGTGGTGGGCAAAGGATAAAATAGCGGAGGTTGCGGACGTCGAG -TTTGCAGCTCTAATTGCTCGTATTTGTTCTTATGCGACACAATTCCTGCCCTCACCGTCT -CATGCCGTTGATCAAATTCGCGGTCGGTCTTTAGCCGATATTCGTGACACATGTACTATT -ATTGGAAACAATCTTGCTACAGCTTGCGAGAATCTCGACTGGAAAGGCACCTTGGTTCGT -GTTCAGCACATCATTTTTGCTGCATTGAAAGTCTCATGTGAAGGAAGGACTAGTCAATTT -TGGGAGGGTATAGCTTCTGCATGTAGAGCTGCTCAAAAAGCTGGCATCCACCCTCATACT -ACCAGTTTGATGGAATCTCAGCTTGCCAAAGATAGTGCGCAGGAATTAGAAAGAGATATT -CAGCGGAGAACGTTTTGCAGCCTTTATGTTTTGGACAGGTACGCTTAGTATCATTCTGAT -ACTGAAATCTTATTGACGCATTTGGATGACAGTCATTTGTCCAGACAATTAGATCGGATC -GCGTTCTTACCCAATCACCTGGTCAAGGAAGCACTGCCCCGGCTACGCTTGGTCCCAAAC -ATTGGCGATATATCGATAGAAACTGCCGCCAGCGCACCCGACATATTCACCGAACGTCTC -ATGCAAGTTCAGCTTGGCTTATTTTGGCAAGGCTTTGGGTTACAACGTAACTCCGAGTTT -GACCCGATCGAGAGTGAGCAGAGATATGAGAAGCTCAACTCCGAGTATCTCAGCAACTTA -CACCCGGCCTTCGCTATAACCTACCCAGACACGACTTTTGATATGACTCTCTCAAAACTA -CCCATGCAAAGACAACTTCTATATATCGCGAGATTCGACTCGATTTGTTGGAACTTCCGA -CCTCTTCTGCTACTCAAACCAGATCAAGTGGCAAGATTAGCCCCTTATAAGAGAGTCCTT -CTCCGGTCGCAGAAACGGAGACTTGGTATGGCTGCATTAAAAGTGCTCGAAGCTGTTGCA -ACCCTTCATAAAATGTTTGGGGGCTCTTATACGCGCTTTTCTGCAATTATCTTCAACTCT -TTTGAGCCCGCCATTCTTCTGCTCAATCTTTGCGGCCATGCAGATTTCCCGTTTGACCAA -GAAGAACAAAGCACAACACTTATGGGAATCAAGATTAGAATGACATATCAAACAACAATG -CAGGCGGTAGAGCAGGCTATCCATCGTCTTCAAATGCTAGCCGAGCTCAGTGACATGGCT -GCGTCCGGAGCCCGCGTGGCTACTCAGCTTTTTGCCAGAACTGTGCAGGCCAAGCAGTCG -TCTAGTCCACTCGCACCCACATCTTCTGTTTCTGGCTCTCTAGGACCACTCCAATGCCTG -ACTTCGATAAGGATGTGTGGTGAACAGGAAAGCTGGGAGTCTTTGGAAGCGGAGGATCCT -TATTCAATGCCGGATACCTTTTTTCCGATGACACAAGAAGAGTCTTTTCCACACTCCCAG -CTTTCCTCGTTAGATTTTCCTATGCTCAGGGGGGAGTCATACTTCTAGCGTCTTCTTCGA -TGAGCCATCGATATTTGCTGTTGGTCATGAATCATTGGCGTTGTGGAATCTACTGTACTC -ATTGGACTGATGGGGATCAAACTATACATACATAAAGAATCTACACAGTGTAGTCGCGTT -GATCACAGTTCTTTTTCCATGCCTGCGGGGACAGTATTCAGGATCGAACTTATTTTAGTA -CTAGTGGGATAAGCAAATAGAAACGAAAACGCCCAATCGAGAACCAACATGTCGGTCTGG -CTAGTCCAAGATCCCAGTTTGAAAGAGACCTTACGAAATGTCGCGCTCGACGGACTGCAC -TAGTTGTTTCAGTAGGCGCCTTAGTGATTAAGGTATGGATACAATTATTCGCATTTCGCA -CATGTCAACAACTTTGTTCTTGGGCCCACAGTAAGACGCTGTTGTAGACCCAATTTGGTC -CCGCCCATGTCGCCGAGATATTAGAACGGAAACAAACGTCTGAGCGGAAAGATGATCCGA -TTTTCCTAACCGAAGTCCCAGACTCAGACACTTAGTACTATGGCGCTAGAATTGCAGAGG -CCGCGAGGCTATTTTCCGGTAGGAAGAGTCTTCTGAGCTGCCACATAACACCTATCAATC -CTTATCTACCACTTTCCAACTCATGTCGCTCGGACTAATCTAAGTCTCGCGAATCTTTTT -CGTGAAACGCTGGAGGTCCTCGGTGGAGGCCAAAATGGTGCGTTGACATGTCATTTTGTG -CTGGAAATCCACAGTCTTTCCAGAAGCGACGCGCCGGGCGGGCCCCAGATGCCTAATCCG -GCCTGTTTGGGGTAGCCCCGCGATTCGTATCTGTTGGGATGCCATTCGCATGAGCCATCT -TGATGAGCTGGAGCTTGCTGACACTCCTCAACTTCTTATCGACAGTGGAGACGATTCAAA -TCGAAACAAGCTGCCCAGACTTCCTGACAGGATCACCTGGATGAGCGGGTCAACTCGACA -TCTGAGAGTAAGAATTATGCCTCACCGATATCGACTTCCTGGGGGTCAGCACTCCATATT -CGTCGATGGAGCCCGGTTGTAGGCGAGCTGTGCAAGGCGGACGCTCCGTCGATCACACCT -AGGCGGACATCGCAACTTAGTGAGACGAAGAATCAAATAGTTTCATTGACCTCTCCAGAG -CATCAACGGCGCCGCCCGCAAGATTGCCGAGATGACCCAATAGGCCTCACAGTTCTCCAC -GCCCCAGAGTCAAAACACCGGACCGTGGACATCCTGTTCATACACGGACTCGGAGGCACA -AGTCTACGAACATGGTGCAAGAACCGGGACTTGGAGTTTCTCTGGCCCAAAATGTGGCTG -CCAGAGGAGCCAGATCTATCCACGGCACGCATTTTGACATTTGGCTACAACGCAAACTTT -GCCGCAAAGAAGGAGCAGGTATCCTTGTCGATTGGCGACTTCGCCAATGACCTACTGTTT -CACATGAAATATGACAACGATGGCGACGACAAAATGGGGGAAGTGCCAATTATCATTGTC -GCACACTCGATGGGGGGGGGGTCTTGTGTTCAAAAAGGCCTGTATGTGATCCAATTAGAG -GAGCTTCAGCCGAATAATTATGGGTTGAACCTACTAACTTCAGTGGTAGTCATCCATGGT -CATCTTGAAGAGCAGTACCACAAGATTACATCTAGCATCAAAGCTGTTCTCTTCATGGCT -ATTCCGCACCGGGGGACTGATCTCGCAGAGTCCCTAAACCGGATCCTCACCAGCTCCATT -TTTGGCCAATATCGAAGGACTATGTCAGAGAACTCACGAAAGGCAGCACTACTATCGACG -AGCTGAATGATACATTTCGCCATCATGCGACAAAGTTGCAAATATTTTCGTTCTACGAGA -CTCAGTACAGCGGTCGGACCGATGAATTTGATGATTCTAGAAAGATATTCGTCCCTATTG -GGTTATCATAATGAGACTCCAAAGCCTTTGATGGCAAACCATCATGATGTGGTGAACTTC -TCCAGTCGGAATGATCCGAACTACAAGTCATTTCGAGGTGCCTTACGCAGAATCGTCAAT -GGGTTCCGATTTTCGAAAACACTCGAGAATGGTACGGAGAAAGACCTGGAAATGATCCAA -AAGTGGCTGGGAGTGACGGGACCGCCGGAAGAGGACTTGGTTTCGCATCGATCGGTGCGG -AAGGCAGGAACATGTGAGCACCTACTGCAAAAGCCCGAATTCGAAGGTTGGCTCGATTCA -GGTATTCCCAGTATCCTCTGGGCACATGCTCCCCCGGCGAGTGGAAAATCGATTTCAGTG -TTCCTTTGTAATAGAATCCCTTCAACTGCGCCAAGAACGCTGTGCGTACTGGTTTTTCAA -GGATGACGATGTTCAAAAGCGTTCCCTAAGCAATATGCTTCTCTCAGTGGCCTACCAGAT -CGCGGTTGAAGATGGGTCGTTTCGGCGAGTTCTTATGGACGTCGTCAAGTCCGGCATGCG -TGTCGACAAAGCGGATGCGTGGACAATTTGGCGAAACATCTTCGCTTCTAGACTTTCAGC -CATCAGCACGGTGCTGTATTGGGTCATAGATGGGTTGGATGAGTCGGAATCGAGCAGAGC -ATTTATCGATCTCATCTCAAATATCAGCACGTCCCAGAGCAATATTCGAGTCCTTTTCTT -CAGTCGTCCACTGTCAACCATCGACCGATCCATCCAGATAGCAAGAAGGCGAGTCAATAT -CGCAGATGTTGCCTTGACAGACAACTTGAAAGACATCCGCCTCGTGGCTGCAGATGAGAT -GGAATACTTTCTCCCGAGCCATGAGTTCCAGGATTTCAAGCACGAAATCATTGAGGAAAT -TACCTGCCGCTCGCAAGGTAACTTCCTGTGGGCAAGTCTCATCCTCAAAAAGGTCGTCAA -TTGTCATCGTCAAGATGAGGTCAAACAGGTCCTACGTGTCACGCCGGATGGCATGGATAA -GCTCTATGATGGAATGGCTGAGGCCATTGCAAAAGTAGTCGGTGAAGCAAACACAACTCT -TTGCAAGATACTACTCTCTTGGGCCATATACTCGATCCGACCAATTGGGATTGAAGAACT -CATGGAGCCCTATGCCACAGAGCTCCACACAGTCATAGATCTTAAACACACAATAAGTGA -AATTTGTGGTCAGTTTGTGGCCATCAACGCCAACAATCAGGTCGTACTGATCCACCAGAC -GGCACGACAATACCTCAGAGCCTGCACCAGGTTCCCATTCTCCCTCGATGCGTACGAGGT -GAACGAAGACCTTCTCTTCCAATGTCTAACATCTTTTTGCGACATGTCTCTAAGAACTAA -AATCCGACTGCGCAAGACTCCCATGTTCGTCACGTATGCGTCCGTTTCCTGGGCTCTTCA -CCTGAATCGCTCGTCTGTTAAATCTGACAGGGTACTCAAGATACTTGTTCAATTCTTAAG -TGGTAACTTTCCATTGCCTTGGTTTCAATTTCTAGCTATCAATGGGCATCTTTCGCACTT -GGTGGCAGTATCTTCCCATCTCATGAGCTTCGTCCGGCAACACAGGAAGGTTGACGCGAC -CAAGCCGCGATCGCCTTTTCGCTCACCGGAACTGGCGTTGCTGGAGGCATGGGCATTGGA -TCTCTTGAAGATGACAGCAAAATTTGGGAGTCATATGAGTGATGATCCCGACGCGATATA -TAAATATATTCCCGCCCTCAGCCCAGAGAACTCGGTCCTTTACCAGAAGTACGCAAGCAA -ATCATCATCTACGATCTCCATCTCCGGTATAAAAAATACAGACTGGGATGATTGTCTTGC -TCGGGTATCAAATGGTTCTGATTCTGCGTTGTATATTGCAGTGTCGGCGGAATATCTTGC -TCTTGCGAACGGCCAACCGAATGGCAGAATTCGGCTGTGGAGTAATACCATCTTCCAAGA -GCACTCTGGATTCAACCCTGGCGAGCCGATATGTTCCGTCTCTTTTAGTGACTCTGGCTC -GCTCCTAGCCTGCTATTGCCTGGATAATACTTATGTGTGGAGATTGAGTGACCGGAGTGT -TGTCATCAAGATCCAAAGCCCTACCAGGAACGACCGAAGTCTCTGAAGTTTGCACAAGAC -GAATCGTTTCTCATTGTTGCTACTGACTTTCGTCGTGTCTATCAACTGAATCTTGACAAC -GGCACCCCGACTTGGCTTACCTACGGTCCTTCTCTTCTACTGGAAACTTCAGTGCCGGAG -GGGGCCTTTGTCAATACCCCATCGTCGGTTGACTTCAACCCAGATTGCACACAGCTTGCT -ATCGCCTATCGAAATTTTCCTTTGGCCGTTTGGAATATTGACCCACCGGAAGTGATTGCC -AGGTGCCGGCAAAAAGAGACACAGGGACAGACAACGAATGCAGCACCCTGGACAGGTGTC -AACTGTGTGGTCTGGCATCCCTTCGGTGGCCAAGTTATCGGAATCAACCGTGACGGCAAT -ATCTTCAAGTGGGATCCGATGGACGACAGCTACGAAGAGATGAAACAGGAGTCAGATAAT -ACGCCCTCGAATATTAAGTGTTGTCCCAATGGGCTAGTCTTCGCCACCAGCGACGTTAGA -GGCTCGGTTAGAATTTACGATTACTCGCAGATGGTTCAGGTGTATAAGCTCACTTCAGCT -TTGATGGTCACGAATATTACATTTAGCCCCGACAGTCGCAGACTCTATGATCTGCGAGGG -TCCTGGTGCAATGTATGGGAGCCAAATTGTTTGATCCGGATCAATGATACCAGCATAGAC -TCATCCGAGGAGAGCGAGAGCGTGGCAAGTTACGAACAGAAAAGAGAAATCGGCTGGGGC -TGGAAACTAGACGACAAAAGCGACTCCATAATCTCGTCTCCAACGTCAGAAGTATATGCT -GATACCAAGCCGCCGATCACGGCAGTAGCAAACTGCGCGAAGAACAGGAATATTTTCGCG -CATGCCGCAGAGGATGGGACGATTGAGATTTGTGACATAGAAAGCAAACGCAAACTCCTC -ATTACCGAGTTTACTTTCGGCATGGATATTGTCCACTTCGCTCTAGCCCAGAGTGGTAAT -TACGTCGCGTACTGCTTGCTTAATGGGCATGTCACAGTGAAGAGCATCAACTTATCCTCA -GAACTGGAGGAAATTTCCACGCAAACAGTCTTTGCAGAAACAAGATCTATCAGCCGCGGC -CAGATCAGACAGATTCTTTTCGATAGTAAATCTGAGCGACTCTTGGTCTGTGGCACAGAG -AGACTTCAGGTCCTCTGTGTCACAGACAGCAGCATAGCTGCTGAGATGGAAATAGACCCG -GTAGATTTGCCTGCACAATGGGAAAATCATCCAACAGATCCGAACATCTTATTGGGCTTC -ACAGCCGGCGGAATTAACGCATTATCTTGGGATGCCCTCGAATTCAGATATACGCTCTCG -TACAACCTCTCTATCGACAGCTCCGATGACCATCCGGCCCCATCTTCAAAATTGGAAGCT -TTGTCGCAGTCGTACCATCCGAAAATGCATCTTACAACCACTTCAGAAAACACTGCAAGT -GGCAGGCGGTTTCGCTTTCTGCTGCTGGACACCTCTGTGCTATATGAAGATCAAACGAAC -TCTCCCTCTTCTGATTCGATTAGTGCTGTTCATATCCCGCTTCCTATTGCCAAATGTATA -GAGCAGCCAGTTGGAATTCTTGCAGATGGCCGCCTCGTCTTCCTTGACAAGGCCCTATGG -GTGTGTACGGCGCAGTTATGGTTACCATCCGCTCGCGAGTCGGGAACTACAGTGACTAGA -CACTTTTTTATCCCTCGCGACTGGTTGAACAGTGCGGGTTTAGCATTATGTAAGGTGCAG -GCGGATGGGAAATTTTTGTATCCCAGCAAGGGTGAGATGGCGGTGATTAGGAATAGCATG -AGGACGGACTGGTGAAGCACGACGCTTTGGGTAGAGTCATGATACGTTGGACTCGGTGTC -CAAGCACTTGAATGAATAAATGAATATATACATGCAAATACTACATGACAATCGAATTTG -AAGTCATTCCAACAAAATACAACTATGTAATGAAATGCTTGGCAGTTGGTGCCTTATTCT -TATGGCTTCAAGGGCCGAATTGAGCCATCACTCCCCGATTACTACCTGTGGCCCTGAAGT -GTTATCGTCATTCAAATGGATCGTGGGAGGTCGGGATATATAGTGTATATCTATGTAGAT -CCATGTAGATCAGGGTTCATCGAATCATTTTGAGTCACGTCACGACCTGAAAATGAGAGT -CGGCTTCGTTCAAATGCGGAATCTTCGCTCCACTTAGACCATTCACTAGCGTAGAGATCT -CCGTACCTCCGGGGCAGATCGATCTTCACCCCAAAACGATGATGCAATGGCTTTTCGACC -GACGTCACGGGGATGAAAGAAGCTTTCCGGACTTCCATTCCACATATAGTTTTGTCTCTG -CTATCTTCTTCTTTTTTGACTCTAAATTTGTGCAATCATGGCGACTGCGCAAGTGACTCG -CCAGCCCCGGTTAGCGGACGATGGTAAGATCTACATTCATGATCTTCAGCTGAAAGCTAA -TCTCACATGAAATAGCCGCAACTGCTGCCCTATATGTGACGCATCCTGAACGCAGGGCAT -CTCTCCGTGCCCCGACCGCAACCAAAACTCAGAACATCAGTTAGTAGTATCCCTGGATAG -AGGCAAATACATTCAAGCTAATAATGCGCCTTGTGGCAGATTCACGCACGGTCGGTGGTG -CCCGAAAGCTCGACCATGCTTCTGCTGCCTCGGCGCTCGCTCACGCCAATCGCAAACCTG -TCGAGATATGGCAGCCCCCAGCTAGACAGCCTGCCGCCGAGAAGGCTGCACTCTGCGTGA -AGGACTACGCTCCGTCCCAGCAATCCAAACCCCCCACGGCGCACAGCGCGGACGGACTGG -GAGCTGCAATGCTCGCTGTTCGTGAGCAGAGAGCCTCGGTGAGCCAGCCATCTGCATCCG -CTGTCAGGCGTGGCAGCTCTACCGACCGTTACCACTACACAGCCGTTGGCGGCAACCCGA -AAGACAAGGCACTGAAGGCTGCATCGGGTGCCTACACGCTCTCGCGTAAGAGAGCCGACT -CAGCACCTGGTGACGCGGGAAACACCTCCGAACTCCAATATGACCCTTTAACCGGCGCAG -CACGACATTCTCGTGTGGAAGAGGAGGATCCCCTTGACCACTTGGATCCTGCTATGGAAG -CAAGCCGGGTCCGGCATATGTCTAACGCTAACCCGAAGCTGTACACTTCGTCACCACCTG -TCCAACCCGAGACAGAGGAGAAAATCCACAAGCACTCGCAGCGCGCTGCTGCGATTTCAA -TGGCAAAGGAGATGTACAAAGTCACCGATTCCAAGCCACAGACCGGCGAATCTCCCGCCG -TCCGAGCAGCCCAAAAGGGGCAAAGTCAACTAGGCTACCGCAAAACCGTATCCACGGCAG -ATGGGGCTGCCCTCCGGCGTGCGATTGCCCTACAAGAGGCAGCGCAAAAGCGAGCCGCCG -AGAAGCTAGCACTCATGCGGAATGAGCATGCGGACTACCAGCTATATTATGGTACCGCAC -CTCAGCCACAACGGTCCCGCTTAATCACCCGCCGCAAGAGGTCTTCAAGCGATGCGGAAG -GCTCTCAGACCGACGCAGAGCAGTCGCGGCAAATCCGGAGCCAGATGAGCAGCCTGCGGG -CCAAGATGGGCCAGGCAGGTGATCGCCAGACACAGGATCGTGAACTACTTATGCAGGCTG -CTCGCCGCAATGTGGACCGAACCCTTCAGGACATGGAGGAGCGAGTTTGCGCGGATACTG -GTCGCGCTCCACCTTCACTGCAGAGGGAGTGGGACGAGGCTGCCCAGGCACGCGTGCGAC -AGGAGGCAGAGGAGTTCGAAGGCGTTCCCACCCAAGAGGATCGAGTCAACATTGGCCGTC -AAACGTATATGGACATGGCCGACGTCGATGCTGTCGCCCGGTCTCGCCTCCAGCCTGCTC -TTGATGAGATTGCCGATAATGCCGAACAGCGGAGAGCCAATGAGATCGAAGCCCGCCTAG -ACGCTGAAGAGGAGGAGAGACATGCTGCTGTCGAGCGTGAACGCGACGCTGATACAAGAG -AGATAGAGAAACAACTCAGAGGTAAGCTGACCTGTTTTACCCTCTTTCAACACATCCTCT -GACCAATGTTCTAGGTACAAACAAGCGCGAAAAATCCGAGAGCAAGATTCCCAAATTCTT -CTTATGGAGGAAGAAGGGCAAGCGAGCTCGGGTTGAAGAACCCGAGACCGAAGAGGCTCA -AGCCGCCTCGCCTGTTACTGAAGGAGCAGGAGCTGTATTGGAGGAGCCTCCGACTACAGG -CCCAGATCCCACTGCGTCAACGGACACCATTCCCGAGCAAGCCTCTACTGAGCCAGAGCC -GAGGACCACCATCCCCGATGAAGCTTCTGTTGAAACAGAGATAGCCGCTCCCACAGGCGT -TGCGGCCGAAGGCTCGGTCGCAGAGACCGAGCGTTCAAAGCCCGATGAAAACGATGTGCC -CGTGGCGATCCCACGGCGGCCAACACGAAGCCAGATCGAGCCCCTCGAGCTCGGACAGCG -GGAGGTAGCCGTCTCAGACCCAACAGTTGTGCACTACTTCACGCCACCAGTCACAAGTCC -ACGCGCCGATACAAAGCTCAAGAACTGGTTCCGCGACCGTCTCGCCCGCCGCTCCAGTGG -TCCAGTTCCCATATATCCCCCCCAGCCAGCTCCAGAGTCTAACACAGAGAATGAACCCGC -ATTTCAAGGTGGCGCCACTCTGACCGGCCGTGATGAGTCTCGTGGCACAGCACTGGGCTC -ACATCCTCTCACCGGAACTATCCCAACCCACAATAGATCTTCGAGCTACTACAGCAATGA -CTTTGATGTCAGCAAGATCAATAGTGCAGAGTCTGCCCCAGATTCTACCAAGCAAAATGG -CAATGGAAAGAAGAGGAATCGTTTGAGCAGGTCGTTCCTTAAGACCGTCTCAGGTAACCA -TGACGAGTCTCTTTCCAACGATGGCGACTCCCGTCGCGACTCGGGGGCTCAATCGATGTC -TAAGAATGGGGACGGCGATGGCGATAGCGCTGATGCTCAGAGTCTGAAGGACAGTGCTAT -TGACCAGAGTCTTCCTGTCCCACCTACTATCAGTGAGACGGTGAACGGCAGACGCGAATC -AAGATTCTCCGAgattctgtgattctgtgattctgtgattctgtgattctgtgattctgt -gattctgtgattctgtgattGATATTGAATGATGATGTGTATGTCTATTTGAAACGTTAA -CTTGATGTACGATTTGTTGTTTTGTATTTTACGGGTGATTTACTGGCGCTTTTCGGAATT -TGAATTCGGTGATCTGGATGATATTGGAGTGGGATTATTTGGTTTACTTTTACTATAATT -CTCGGTGTTTCGTATATAATCAATCTACTTCACAATGTCTCCCTAGCAACATCATTGAAC -ATGTCTTTCAAGATGAGCAATAACTCGTCTCAGCGGATCAGTTACGGCTTTCTCTCCCAA -TGTGTCACCCCCGTATAAATCAATAACCTCCTTCACCTCAAAGCCATGTCCCTGTCCCTC -CGCACGCTCAAACGAAACATCCACCCCAAGCTCCTCCAGAGCCTCAGCCACAGCAACACT -CTGCTCAAATCCAACCACTTCATCCGCATCACCATGCACAAGAATCGTAGGCGGGAAACC -CTCCCTCAACCCAAACCTAACAGGGAAAAGTACCCGATCCTCCACCGAGACCGCTCCAAG -CCCCTCAGCAGCAATCCTCGCCGCCAACCCAGGCTTCCTGGTAAGTACATCGGGAAACTG -CGCCATCTGGTGCAAAACCATAATCCACCGAAACCGACCATCAGCAGGCGGGTCCGTCGG -AAACGCGTACCCATCAATAGCATCAGCCCGCATAGCATCCTCGACCTCCTTCAAAGCCTG -CGCTTCATCCTCGACAGGGGCAACAAGCGGCTGCCCAGGATGGATATACCGCTCGCTGGA -AGAATCCATCATTCCATAAACGGAGAGCAAAGCGAGCGGCCGTGGTGTAGCGGGATGTGC -GGTAGCAGCGAGCGCTAGATACCCGCCTGCACTGGAGCCGGCGATGATGACCCGCTTACT -GATGTTCTCATAAACCCAGTTCACGGCGTCTATTGCGTCTTGTAGGATTTCAAGACCAGG -GGTTTCGGGGAGTAAGCGGTAGGAGGCTGTTGCGTATGTCCAACCTCTTTTTTGGCAGGC -ATTTATTAGCCAGTGGGGTGTGAAACTTGTTTTTTCGCCTAGAAACTGCGAGGTGGGGTG -TGAGCTGGCTTATTGGTATTTTGGGGTTTTAGGTCATGTTTGGGACTTACTAGGAAGCCT -CCGTGGAAGTGTAGGACGACGGGGCTGTTTTCATCTTTCGTTGGTGGGGAGACGACATCG -ATCTCTAGGGAGAGGCCGTCTACGACTTTGAAGATATGTGTGACTGACATGATTGCGTGA -ATTTGATTTTGTTAGAATAGAAGATGACCTTATTTAGCTATTTATGTCTTTTCGTAAGCA -GAGGACTACCCCCACTGGCACCCCTGCCAGCGGGATATCTCCACACTATATAGTGCCGAG -AGGGGGAGAGGACAGATAGAACAGATTCTCTTTACGCTAAGCCCAACTGGCTTACAGTAT -AGTATCTTGTGTATATTGTATACATTCTGAGTATGCTGAACATAATGCGCGGTGCAATAT -ACAATAAATGTGCACAAGTATAGAAGACTGTGGGAAATCCCGCGGCTTCAAACTGTGGGG -AATCTCCAGGATAGAAGACACGGCAGTTATAGTTGCTGTAATTGTACGTTCAAAATCCAC -ATATATATTGTACTTGTAGCTCGTGTACGCTTCGATCATTTTGCTATTCGTCCTCATTAT -TTCTTGAAAATGCACGAAGAGTCGCTTGCAGCATTCCCGGCATTGGGGCTGGTTCAAACA -ACCAGCCTGGCAATCAGTCTTGTGCTCCTGGGATATACATCATACATGTCATATTTCCAC -CCCCTTTCCAAATACCCAGGCCCAAAGGTAGCTTCTTGGACGAACGTGTGGAAGGCATAT -TACGTCTATAAACTTGTTCTCCATGAAAAACTAGTCGAATTACACCAGCAGTATGGCCCT -GTCGTTCGGATCGGTCCGAACCATCTCCATTTTTGGGATGGCGAAGCCATTACTCCAATC -TACAAGGGGGGCAGGAAGATGGGAAAGACAGAGTTCTATGATGCCTTTACTGCATTCAAT -CCCAATCTTTTCGGAGGCCGGGACGAAGATGTAAGACACAATGTCTGTATTTATCAGACG -ACTCGCTAACGGGAACAGATCCATGCTCTTCGTCGACGACAACTATCCCACGGATTCTCA -CAAGCCTCAGTAGAAAACTTTGAACCTCTCATCAATGGTCACATCAAGATTTTGGTCGAT -AAACTCAACGAGTTGGCAAAGACTGAGGAGGTCTTCGACCTCAAATCTATCATCTCATAC -TTTGTTCTTGATATCCTAGGCGAGGTCGCTTTCAGTCGACCTTTCAATGCCCAACTCAGA -GGAGAAGCAGATGAAATACACGCAATAAACGATCACATCCTCCTATCATGCGTAATTGGT -GAACTTCCCCTCCAAGCACTATCAAAGTTCCTAGCGCGGTGGTCGCCCGTTCCGTGGATG -CGAAGACTATTGAAGAGCAGAAACAACCTCAAAGCAACGTGTTCCGAATGTGTCAAAAAC -AAAATCGACAATGCATCTGATCGTCGGGACCTACTACAAAGCCTCGTGACCGCGAAAGAC -GTTGAAACCGGCGCCAGTCTCACTGAGCAAGAAATTAATTCAGAGGCATTTGCTATGCTG -TGAGTATAACCCTAGCGCCTCAAAGGGCACCTTGAAGTTTTCGACTCAGTATTAACTTGT -ACCAGCGTAGCAGGCTCTCACTCGACCTCCGGTACATTGACACTACTCTTTTGGCACCTC -ATTCATAACCCAGAGATATGGGCTACCGTCGCCGCCGAGGTAGCTAGTACTCTTCAACCG -CTACAAGATGGAGACATTTCCTATCCCATCAGGGGACTGGAGGCTTCTTTGCCGTACACC -ATGGCATGCGTAAGGGAAAACTTCCGCCTGAACCCCGTATTTACCATGCCCCTGTGGCGA -TGTGTCATGTCACCAGGTGGAGTGAGAATTGGAGAATTCGACGTACCATACGGCGTAAGT -TGTGGCTGCTCTTCTAACCATCTATGCTTGGTCTTCAATCAAAAATACTGACCATTGAAT -TTCAAAAAATAGACCAGTGTTTGCATCTCGAACTATGTCCTCCATCACAATCCAGAGATT -TGGGGTGCAGATCATGCAGTTTTTCGACCGGATAGATGGCTGGGAAAAGATGAGCCTAGC -CGGAGTCGGTTTCTGATTCCATTCAGTATTGGACACCGCATGTGCATCGGTCGGAACCTT -GCCATGACCAATATATTGAAGACTCTCACTACTTTGATCGGTCAGTTCGAGTTTCATCCG -ATCTCTCGGGATCAGCATGTCCGGGTCCGCAGTTCGGGCATTGGGGAGATGGAGGGATCT -TTTCGGTGTACAGTTTCAGTGAAACACTAAGCAATGGTTCATGCACGGGATTGGAGAATG -AGAGAGTCCATGTAGAGACGAAATTCTGTATGTAAAGAGACATTGAATACATTGGGGCTA -TAGTATGCTGTAATAATACAGCCTTATGTATGCATAACTCGTATTTATGCACTCCACTAC -CAACTTCTAAGCATGCCCAGGTGTTTGGGATGGTAATAATACACGAGCATGTGACGTGTT -GATCAACATCAATGCCCAAGCCTTTTCAAGGTTCTCTCCACTTGCACTACCCCAAATCAT -GAGAGATACATGGACTCCCGTAACTTCATACACACAGAATCAATGAAAGTCCCGGAAAAT -GGATGGTCAATCTGCGTCAAATGGCCACTTGAACTCTGTGCCTTTTCGGTGTTCTGTTTG -CTCCCAAACTTTCGGAAAGATCCAACACCTCAAGAGACACAGATCAATTCGTACGATCCA -AATCCCTTTTGATATTTCTTGTTCAAAACCACTAAATTCCATTTTGGTAGATATTGACAA -GAAACCACATGTCTGCGAATTTTGTGGTTCTGAATTTACACGCAGGTAAAGTCAGATCAA -CTTGCAGTTGATAGCTCGGCCATTTGACGAAATTAAATCCTCACAGTGATGCCCTTTGCC -GTCACTGGAAGACTTGCGCAACTCGCTTAGCATCTGGAAATCCAGTGCCCCGGCGGACTC -GATCTGGAAAGAGAAAGCATGCGTGCGATCGTTGCACAGAGCGCAAACGCGCCTGCAACC -AAGGAAACCCTTGTGCTGAGTGTGCCCTTCAAGACTCTGAATGCACATACATTCAGAAAA -AGCAAAGATACTCACAAGTACATGTGAGTGTGGGAGATTCCCATGATGACGATGGTGGAA -ACGAAGTTTCTGGTGCTTCGAAAGCCCAGCAAAACCCAAGTATAGGCCAACGGTTGATAA -ATAAGATTGGCTTGGCTGATCGGTGCCGGTTCGATTTTCTCTTGAAATTTACTCGGGCCG -CTGGAATCAATGAAGGGTATAATTGCAATCGCTTGTTCAAGGCAGATGATCCATACGACC -ATGGTGCCTCTGATGACCTTTTTCAGACGAGCCTAGATTCAGGCCTCTCGGGTGAGGATC -TCACATGGTCAGATGGAGACACTGCGTCATTCGCAAGGTTCGACAGTCCTACCAGAGGAA -ACTTCGTGAATGATGCCCTAAGGCTCCAGTCCAGTCGGATTAAGGACATGCTTGTCGATT -TCTGTGACGAGAAAACAGAAGGGAATGCAACTATGGCCGACGTTATATTATTCTTTTCAC -CAGAGAATATCCTTCATTTTGTTGAAATATTTTGGGATCGCTGGTATCCCCACTGTTTAA -TCTTCCATAGGCCAACATTCAAGATCGACTCTTGTTTGCCTCTCCTCCTTATGAATATGG -TTCTCATGGGAGGCTGTACATCCCCATACAAGGCTGATCGGCACATTGCAAGATTACTAC -TAGATATTGCAGAAGGGATTGTGTTCTCGCAGCCTATGTTCTCGACTACAGGAATCAGGG -TGGAATGTACGGATCAAAGCCATTTGACTCAAATCAGCACTTTGCAAGCTACCTATTTGA -TTTGTATCATGCAAAAGTGGGAAGGGAGCAATGAATCAAAAATTCGCATCCAGCGTGACC -AATTCACGAAATTTGTGTCAGTACGTATTGCTCACCAAACTACCTACCATTTCCGAGGTC -TTCAACATTGACCCCTAACGATTCACAGGCAACTCGTGCAATGGGGTTATCAAGGGCTAG -ACATAATCATCGACCTATTACCTCAAAATTTGACAATTCGGAATGGCGAGCATGGATTAA -AAGAGAGGAAATCAATCGGTGTGTTCAATGAAATTAAGCTCTTCCCTCGTATACTAATCT -CAGCATAGAATGTGCAACTATGTGTTCCTACTTGACTCCGCCTTTGTCATTTTCCACAAT -TCGTTCCCACGAATGGTTCTTCAGGAAATGCAAATTGATTTGACATCTCCAGAGCCATGG -TTCCAAGCATCTTCTCCTACCGAATTCCTATATGTTGTCCAGTCAAACCCAGGACTGCCG -GAGAAAGATCTCTCTCTTGTCGATTCCGTTCGCAGGTTGTGCAGTGATGACCCGAACCAA -AGCACGAACTTTTTAAACGGTGCAAGCAAGTTGAATTTGTTCACAATCGCTACAGGTGCG -CTTCCCCTTCATGCTTTTCGCAGGTCACTGAGTTAACCATCGGGGTCCAGCGATTCATGG -CCTTATATTCCATCAGAAATCCTCACTATACACCCTTCCCTTGTCCGATAACCCGCTACG -CAAGGCTCTCGACAGACTAGACGTGGCTTGCAAATCGAACCAGCAACTATATGTACGGGC -ATCGACGGATACATTTGATAGAGCCGATGAACAAGATGGGTTTATGCAGTACGCTGAAGA -GTTTGCTCTTTTGGCACGTATCTCTCTCGAACTTTCATATTTCTCGCCAACGGAATGGTC -CGAAATTATTGAAGGGCTGTCTGACAGCTCCTCTCGCGGTGAATTTGCTACTTTTGATCA -AGCCGGCATGGGTCCTGTTGGGAATTTGATGCTGGCTGTCGAAAACCTGAGCTTGAACAG -GTAGATACCCTTCAGTAAAGGATCGGTGGATTGGTGTCATTGGAAATATTCTTGATAGTA -TAGAAAGGCAAAGGTCTAGCCGGAAGCGTTTAGTCGTGTAGGAAGTACGTCCAAAAGACA -CCTAGAAAGGCCATTAAAAAGGAATAAACAATTTTATTTACACCAACTATTCTCATGGTT -ATCTATATCCAGGTCATGCTTATTACATAGATCGTAGATCCAAGGATGCTATCTCCGTTG -CAGTAATTTAGTATTGGCCTAACACTGAAAGGCTACTCCCCAGCACTTACTGCTTCTCTC -TTTCATCCCATCAGACAAAGTGTGCCCAATCATTGGAGCTATGAGAGCAGCTAGAGCAGT -CGAGGGTGTGGTCGACTCTAGAAATGAGTCAAATGAGTTTCTGGGCCGCGCCGAAGTATT -CAATGGAACTGCGTTATTTAGAGATCAATAAGATATTTGATTTGTTAATTTTAGAATGCC -TCTTAGTGATCAATGCACTGGCACATGGTATTCATTAGCATTGATCCATTCTAAAACCGG -TCCCCTTGATAGTGTTTCTCAACCTCCACCGTAACATCCACCGTCCTTAAAACGCCCCCC -TTACCTTGGAGATCCTGCGTCAAATGGCGCGTACTCTCATTTCCCTCGCTCGAACTCGAC -CGTCTCCCCTCATTATTCTGATTATCCCCCGATGACACCTCAGTCACAACCCCAGTCTGA -CCTCCCCATCCATCAAACATTGACCGACCATTCTTCCTAGGGCGTCCACGCGACTCTGAC -CGCGAATGTGACTGGCTTGCTGCCGAGGAGGATTGGTTGATATACCCGCTAGACTTGGTC -TGGGGGAGCTCGAGCCCCTCGGGAATCTCAATTGTGGATAGGCCAGAGTGCAGGCCAGCG -AAGATGTGGTACAGGGTTGGAACGCATGCGATTGTGACGGTTGTGAGCATCATTGCCTGT -CCAACTACTTCCCAAATAGAAATGTCCCCTATATACTTGTCAGATTGGTGTACTCAGGTT -CGAAAATTGGAAGCGGAAGGTTGTCACGGACAGGAAACGTCGGTCGAATGGGTGAATGAA -GGTGTGAGGGCTAGGTGCGCGATTCCGAGGCCGACGATACTATGGCAGGTTAGTTATTAA -ACTTTCTAAGGACAATTCCTGGAGTGGCTCAAATACCTCAGGCGCAAAGAGAAGGAGCAG -AGGATCTTGGCCCGCTTGTTCTGCGCCATCTGGACATGTCGCATCATAATAAACGGCAGG -ACAATGATGATGATCTCTGTGAGAATGTTGAACACTGCAATCGGGTAGAATAGGGCTCCC -TGTTTGGGATCAGAGGTGTTCAAAAAGCAGTGATGGCCCGGGTTGCTGGTGTATACTGAC -CTGTCCAGCACATCGCCTGGGGGAGTATAGCCACACATGGGGTAGTTCACATTGGAACGC -TATACTCAACACCGCGAATACCGTCCACCCGACAACTATACCACCGGTCACAATGCAGCT -GCGGCGAAGGCTTTTATTTGGCGTCAACTTCCAAACCAGCAGGATAGTCGACATTTTCGA -AAGCGACAAGACAAGGATGAGCAGTAAGTGAGCAGCGAAGGCGAATTTGCTCCAGGCTTG -AATGTCGCCGCCATTAAGCTTGTCTTGGTGCTTTCCCAGTCCATGGCCAATAGCCTTCTG -TAAAATAACGACTTGAATAATTGCAAGTATCAAAGCGAGCCAGATGGGAGTATTGACTGT -TGTCCGTCGCTGCTTGAAATAGATCACCGAGCCGAACTTAGCAAGCACAAAGATCGTGGC -CACAATCAGAAGGGACACCGAGGTCAAAGTGATGAGAGGCCCACGATCATCACTTGATAG -CGCCAGGAAGGCGGGCTTTTGTTCGGCCATCACACTTTTCAGAACTTGGGAAGATGGGTG -CCATATAATCACCTTTCTCACATCTTGTAATCCTATATTTCTTTTGAAAAATTTACGATC -CTTGTTCTTCGAGTCCGACCCCAACTGCAGGGGCGGAAGGGTGGCTTGCTAGTTTGGCGT -ATTACCATTGCATCTCACCTGAATGCAGGGATACGAATCCTGGGAGTGTCTATTAAATCG -GACATACCAATACAACATTGTACACTTATTCTGGGACCCCTCTAGTGGATCCGACAAATT -CCCTTCGATGGTGATATTAATGAACTATGATCAACCCGAAATCGAAGGGGCCGAGTTAGC -GAAGGATGCCGCTCGGACTCACATAGGAAAGACCGGTGCTTTTACTAGGGCAAATGCTAG -TAACCAACAAGCGTGGAAGTTGGGCCAGTCAATTTACAACAACCCAATGGTCTGCCAAGA -CCCCCGGTCTAACACTAAATACGGAGTAGCACCGGTTTTATTATATTGCAATTGACAAGG -GTTTGCGCCGAAGCGAGGACACCGTTTCAACGCGGCACAACTTCTCTCTCGCTTCTAGAT -TGCATGTTTCGCATTCATTCCGCAATATCCAAACCCCTGTCTAGTCTCTATGCATGGAAT -TAAGGAGCCAAAAATGGCTACTTCTTGCCGTCTTTCGACCGACCGGTCCATTCTCAGTCG -ACTTTTTTTTCTCTGCCAATTTCCTCTCCTCAATCGTGCCTCAATATGCAGGAAGATTTG -GAAATGAAGAGCGAAAGCGCTGTGCAATCGGTGCCAGTAGAGAGTGGGAATGGCGGCTAT -GAAAGCGCTACATATCGCCAGAAACAACACCTCGAGGTAATTGCCCCACGTGCAGTTGAC -TTGCGCAAAATCTAATCTTCCAACAGAGATATCTGAATTTCTTCTCATCAGTCGCCTTTT -CCGCTTGTCTGCTTGCCTCATGGGAAACTGCCGGCGGTAGCTTGCTCTCGGGGCTGTACA -ACGGCGGTCCTGCAGCGATTGTGTATGGCATTATCTTGACCACCCTGGGCAATCTGACCA -TTGCATGCTCCTTGGCCGAGCTTGCCTCCATGTACGTGAAAAGATGACTGCGATCAGAAT -TCCACCGCGCCACCCTTCTTTCGGCCTACCCAGTGGACTGGACTGCCTCGCATCTCATCT -GGCTGCTGCAATCAGCGGCATTCTCGCACCCTACCTTCGTCCATTGACCCGCTAACCTTG -CGACAGACACCCCACTGCAGGAGCTCAATATCACTGGAGTTATTTCCTCGCCCCTCGTGG -CCGTCGATTCATCAGTTTCTTTCAGGGTGAGCGCACGCCTTGTAATTTCTTTTTTTTTTT -GGGCATATGCTAAACATTTCAGGCTGGGTTACGGTGTTTAGCTGGAGTGCATTGGTCTGT -ATAGCACCTTTCTTCATCGGAAACCAGATCCAGGGCATGGTTGTGCTGGCCTACCCGGAC -TATGAAATGGCGAGGTGGCGTGGTACTCTGCTCATGTGGGCAGTTGCCATCATCCCGATT -CTCATGAATGTGTTTGCGCGTCGCGTTCTGGGAGTCGTCGAGGTTGCTGCTGGTATCATG -CATGTTGTCTTCCTACCTATCGTCATCGCAGTCGTCGTTATCCTCGCCCCTCGAAACCCC -GACGCGTTCGTCTGGGACACCTTTGTCAGCGGCTATAGTGGTTGGAAGGATCCTGGAGTT -GTCTTCTCCATTGGTCTTCTCGGTGTTCTTACTCCCCTGAGTGGTAGGTCCCTCCACTAC -CGGCAAATTGAACAATCTAACCTAAACCAACAGGCGTCGACGGTATCATCCACATGGCCG -AAGAAGTCAAGAATGCCAAATTCGTCGTGCCCCGCTCCATGATCTACGGCACCCTAATCA -ACGGAATCCTCGCCTTCTGCTATTTGATCGCCATTCTCTACTCCATGGGCGACTACATGG -AAGCAGTCACCAGTCCCACAGGCTACCCCATCATTACAATCACCTACCAAGCAACAAAGT -CCAAGGCAGCCACATTCATCCTCATGACAATGGGCATCCTGCCCGGCTGGATTGGCTTCT -TCAACGGCCTCGCCTCTGTCACGCGTCTCACTTGGGCTTTCGCTCGAGACAACGGACTCC -CCTTCTCCGTCTTCTTTACCCGCGTCGACCCAACCTACAAGATCCCCATCCGTGCTTTAT -TCCTAGTTGCATCGTGTATCTTCGCACTATCATTCATCCAGATTGGATCGACGGCTGCAT -TCAGCGCCATTCTCTCAATCAGCACCCTCGGTCTTTACATTTCCTACATCATCCCTCTCA -TCCTCCTTGTCTTCAAGCGCTTCATGGCGCCCCAGGATATCCCACAGGGATCATTTACTC -TAGGGAAACTCGGTCTCCCCATGAATCTCCTTGCCATTCTCTTCGCCACCTATTTCGCTA -TCTTCCTTCCATTCCCGTCGACCCTCCCGGTTACCGCGGAGAACATGAACTATGCCGGGC -CTGTGCTTGGATTTGTCATGCTGTTTGCCTGTGGAGATTGGATTGTTCGCGGACGGCATA -TTTGGCAAGGCCCAACAGTCAAGGCGTGGGCTCAAACTGAGTGAAGCTTGGAAAAAATTG -GATCGAACGTAGAGGGGGAAGGGGAAGCAATTGAGTTGCCAAAATTTGTTAATAGCTTTT -TATCACCACGGTGACTAGACTGTACGTATACTTTATTTACTATCCATCCAGAATGTACAT -ATTTTCTACAAATCATTTATCACAATAAAGCCTTGTCTTTGATTCAGACCAGGTTCTGAC -ACGGTCTCTCCAAAATTTTCAACCTCGGGCTTGGCTTTTCGCCCACGTACCGTTTATCTG -GCGTTTTTAGAAAAGCTGTCAAAGCGGCTGCGGTGGAGCTTCCGAAGCTTGGTATTTTGG -AATTCCCTCGAACGTCATTCCGGGAGACGATTACTTTGGTCAAGCTTTGCAACTTTCAAT -TCATTCCTTGGTTCAGTTCTCGGGTCTAGGGCCAATACCTATGTATACAGCACCGAAGCG -CTATTCGGCTGTAGAGGGTGGTTGGCAGATTTGGGAGTAGCGCTCCGCAAAGCCCACAAA -GCCCACAACCCTACCCGCAAATGGGCCGCGCTTTGGGCCCGGGTCGGCCCACTAAGCCCA -TAATGGGGGCCGCTCAGGCCCAATTAGCACCAAACAGTGCAAACACTCTAAAAAATAAAT -TACTCTAGGTTTAAGCCCCGTAATAAGCCCTTATATTGTGGTATTATTTTATTATTCTAT -ATGAAAGTCATCAATACTAAGTTAAACTTATATATTACGTTTAGATTTACGCGAGAACAT -GTTCTTAAGATAATTTTTAAATTATAGAGGTTTACCTAGAAAGGGGTAGTACTCCCACAC -AATTCTTAAATATATATCTACGATCAAAACATTAATATTTAGCATTTTTCGCGATCTCGC -AACCTCTACTAGTTAAAACTTATTTCAAATTTATTTCATTCAATATCCAGTCACACATCT -GTATAACTATTGTGGACTCATCTTTTGAATTTTAAACTCTATAGATTCACTAGAATACGG -CATATTGACGGATTTTTCCTTCTAAGCATACATCTTGCTCAACGAAGGCCTGACTGTCTC -CAAAGATGTAAAGCTACAATGCCCCTTGTCATCCTCGATTCCACCAAAATCACCCGCCCA -GGATGGCGACGCCACTGTTTCTCCCCACATCACCGACCCTCTAGCGGCATGATCCAAGTC -CTCGTCCTTGGGCAAATGGCATGCAGCACTGATCGCAGCACTGCAAGCCCCCGCTAAGGG -GATCTCGGACTTGAACTTTCTTACCGATAGTCCAAGCAACAAACAGACCATGAAACCACC -CACCAAGATAGAAATCAAGATAGCCAAAGGCCTATGGCCCAGGGAATTAATCGAGTATTC -GTCAATCTGTTGGCCAGCCCAGTCATACATGGTGACCCGAGCATAAAAGATACTCTGGGA -AATCAACCAGTGGAGAATCATGTAAGCCACCATCAGAGGGACTCCATACTGATATGGCAT -GGACAACCAATACGTTGACTGCTGCGTGCTGTCTTTTATAGGCCATGTCACTCGAAGAGG -CTTGGAGCTTGCTCCGTAGGAGCTGTATTCGGCTGTCGCGAGCATGCTAGTCAACACGCT -GTTGTAGAAGAAATAACTGACTGTGATGACTAGCTGTGGTATATTGGCTATCACCACAGA -GCTCAAAGTTGACAATGTCCTTTTAAGGTCAACGAGGTCGCTTTGGTCCCTGGCACCAGA -GAACCCCTTTTCCCATATTTCATGGAGAATTCTGGGCGTCAACCATCGGTCTCCTAATGG -AGCGATGGCTTTCCCGAAGAGAATTGCTCCGACGGCAATACATGCTGAACATCTGAGTTA -TTTGTGGTTAACATAGAGTCAAAGAAAATGTTTGAATAGGCTCTTACATGGCGAGGGTTG -TTACCCAGCGTATAGGACTTGGGGCTTGCAGCCAGTGTTTGCGTTGCGATAATCGCCTGT -ATTCCATCGGCACTCGCTGACCGGCTCTTTCCAAAGGCTCCTCGGCAGGGTCATCTGACT -TTTGGTACACTTTCCAGGCTCCGCGACGGACATCAGCGTTGGATAACCAACACATACCCT -CCGTGGTGGAATCTGGCTTCTCCATAAATGACGCCACCGCATCGCCCACGGTGAGCAAAG -GTGGCGATCTATATCGACCTATTCTGGCCGCTAAGAAGATTGCCACGACTTTTAGGAACG -TCGCGAGAGCAATGATGATGCAAATTGGCGGACTATAGAGCAGCTGGCATCGCTCTTCGG -ATTTGATCACAAGACATCCGTCGATGCTATAAGTCCTTGATTCCGTGTTTGATAACCCAT -TGCAAGCAATGACATAACCACTTGCCGTGATGTTAGATGTGGTGTTAGCCTGTATGTATC -GATTTACTCTCTCAAGTGTCGGTTGGTCCACATCATAATTGGCCCATTCCAAGAGAAGAT -CGGCATCGACACAGGAAGCTTTATCCAGAGTGTCAAGGCACGCTTTAGTCGTGAAATTTC -CATTTGTGTAGTAATGAGCTCGACCAGAGTCAAGGATATTGATCATCAAGGTCTTGCCAG -CGTATTCAGACCCTTCCACTTCAGTACTGGGCCGACTACCTGATGACATGCTATTCATGA -GGCTCGTTGAGAGTATCGTAGCATTTCCACCTTGGCTCACAGATAACTCATTCGTGAGCG -CGACGATCACCTTTGCCCCCCTGCTGTTATAAATGTCAACATCGATACATTGTTGAGTGT -CCAGACGGTCATAATCCCCCCGGGCAATAACAGAGGTGAAGTCCTGCCAGTTCAATTCAC -CCTCGGCACCCCCAATCTCTGAGGATGTCCATGGTGATACGGTTTCAGTGTCAGAATAGA -GACCATCATCTACATCATCATCCCCACCCCACGAGTACGGGGTAATGCTGAAACACTGAT -CAAGGGCAGGGGTCTTCAAGCTCGCGATGTTGGAAGAATTCAAGTCCCCAGGTCCCACAA -CAACCTGCAATTCGTTTATGGCCATGGCCTCAAAGACCATCGAGTTGTACCTGTCAGATT -AGGTCAATGCCACATTTCTGGTGTCCCAACCCTTTTGCGATAACATACAACAAATGAAAC -GGTGTTCCCGTGACAAGCAGTAGCAGCCATAGAGCGAGCCGAGATCGTCCAATGACAAAG -AGATTTCTCACACTCGCGCATCCGATATCAAGCCATCGACCTTGTGCGTGATATTTGTCG -ACCTCTTCCCGACTCGGCGCGACCAGTGACTGCATGCAGTAGTTGCTCGCACCCAGGATT -CCAGTACTGAGTACGTTGATTATCAAGTGCAGGGCGATGTTCCATCGTTTGGACAAGACA -CAGCTGCCCTGGTACATTACATGGACGCTGGAGAATTCACTGTTCTCTGAGTTTCGGCTG -GCGAGTCCAGCAGCGACTGCAATGAAGATGAGATTCAGGAGAAGCAAAACAGTAGTTGCC -TTGGCACAGAGCAAGACGCCATTTATCCACTGTGGCTTTTGGGGGTGGGATGGGTCGTGT -TCAGTAACCTCTGCTTCGGCACTCGCTTCGATATTTCCCCGTCTTTCTATTCGATTGGAC -CATTGAAGTATTGCTTTCTCGGGTTGATCGCTGGGAACACCATGATAGCGTTGACTAGAG -AACACATATTTGGCACGATGCCAAACACTTCCACTCTCCATACTTGCGGTGGGAATGTAC -TGCGTGGTGTATAGAAGGGATACACTCCAGGCTTGGGATAGAGACAAGTGCACAGAAAAT -GATAGGCATTCCGGGTAGAAGTAATTCTCCGAACCCTCCCCCCTGCTCGAGAAAATGAGG -CTGAGTTCGTATCCATGTGGCCGTGGATGTGGAAATGATTTTCACGGGTTGTTTTAGCTC -CATTGGCACCGACCCCATTGGGGCTAATGCCTGTATGGAACTCCTAGTACCCTTGGCATA -TGTACCGCGCCTGCAATATACGCATATTTATATTAACGGCAGCTCAAACCCTCTGTTTAG -TGGTAAAAATGGCATTCAGGCATCAATCACCCCTGAAAAGGGGTGAACCCTGAGTCCTAT -AAGCGTACGTATTACGATTTTGGCCCGATCTTTGTTTTTGTTATGTGTGAACTGGGTCAA -GGCCAAAGAGTTATGTCAATGTGTTGTAGCCTCCGGATTGCCGGACCGGGCAACGGATGT -TTAGAAGCGAAGTTGGAGGCTAGAGCATACGACACCTGCCAGTCTATCAGAACAAATCCA -CAAGTACATACAGGCATTTACATTGGTTTCCAATAAGCGTGATAATGTGCGTTGAAGCTG -CGTCGAAAAGATTAGATGTAGGGGGCATATCCAGGTGCTTGCGTATGAAAACACAGTAGA -GAACTGGCCATGGTACCTGACTCATATGACGTCTCTTGCCATAGACGTACTAGTACCTAT -ATCTGTAGATGTAAAAGACGGCGACCTTCACTTTCTTCTTTTTCTCTCTTTTCGATCACT -CTTCTTTCACACTTTCCTCTGTTATTCCTTTCTTCACTTCTTTCTTTTCAGTCTCTTTTT -CTCAATTCAATATCTTTCTTCTGATTTCAAAACTTCCTGTTTCTCTTTCCTTCTTTTCAA -AATTGCAAACGTTTCGGTGCTTTCCTTGCCAAATGAGCTGATATGTGAGATTTGCTCTCA -CATGTGTGTTGCCGACGTCCTATCATTCGTTCAACGTTACCCCGAAATTTCTCAACCTCT -TCTTTGGCACTGCCAGCACCGGCGAGAGCGCTAGGAGCTGCACGAGGCACTGATGATATG -TATCCAAGGGGGTGATGTGAATTCAACACGGACGTTGAAACTGTTGAGGACGACACCAAT -GACGAAGGAGAACGATCACCGGGCCATGATGCATTAGTATGTTATTGGAATACCTCAATC -ATGGGCTGGGCTATTCGAGATGGCCATAATGACATAGTGGAGACTCTTACTTAACACCTT -GCTCAATTGAAGCTGGAGATCCCAGCTCTCAAAGTGAAGTGTATGAGAGATGCATTAGAA -TGGGCGGCTCAAGGGGGCTTTGTTAGCATGGTTAAATTGCTCCTTGAAATTGACCTAAGC -ATTACAAGGTCGCCTGAGTATTACGACAATGCATTATATTTTGCATTAATGAAGCCATAT -GGACATATTTACAGAGACCACTGCACGTGCTTGAAGCAGAATTTCTGCTCTGAACGGCGC -AAGAAATCAGGACCGGACTATTATGCTACCGTGAAGCTTCTGCCCGATGTCGGTGCTAAC -CCCAACTGGCCTGGGCATTCCCCGGGGTCTTTTATATTTTCACATCCACCGCTAGAGATT -GCAATGCTCACATGTAACTTAATCTCCGATAGCACTGTGAAGCTGCTTGTTCAACGCGGT -GCAAATGTGAGGGACAAAGACTTTCTTCATGCGTTCACAAATACCTCGAATCGATGTACG -GTCTCCCATGGAGAAACGGCAAAGCACCTTCTCAACCATGGTGCAAATATCAGTGTCCGG -GACAGCCTCGGTCGGTCTATGCTAAGCAAAGTCCGCAAAAGAGACCTTATTCAACTATAC -GTTGCTCGTGGACTCAGTCCAAACGACGTGGATGAGTCCGGCAAGACACTACTTGGCGCC -TTAGCAACCGAAGAGGCATCTGAAAATAGGGCAGAAATGATGAAGGTGCTGTTGGAGCTT -GGCGCCGATGTGAATTCACGATCTCCCTGTGGACTCACTCCATTACATTCGGTCTTCTCA -AATAGACCAAAGCTATTTATATATGACTCAGAGGGACATGAGTTTGACGGATTCGACCGA -TACGGACGCCCAATAGCCTAGCCTGAGTCAGAGAACCCGGATTATGAAACCAACCTTTCA -TTCGTTCACAAAACCGCAAAGCTTCTCTTGACATACGGGGCAGATGTGAGCGCCCGAGAT -GATGAAAAGCAGACTGTTATCAATAAAACTGATGACAAGTCTCTGGTAAAGCTCCCCCTC -GCCCACGGAGCGGAGGTAAATCTGGTGGATGTCTATGGCCAAACGCCTCTTCATACGATG -ATATACGGGGCGTCCGTCGCCAGCGATATTTGAAACGATCAAATTGTTACTGACCCATGG -GGCAGACCTACATGCAGAAAATGGTGACAGAAACACCCCTCTCCATTTGGCCTCACTGAC -ATCACCATGGGAATTGGTGAAGTTGCTGCTTCATCATGGAGCGGATCGAAACCACCGCAA -TGTCAATGGCGAGACAGCATTGGACCTTCTGGCGCGTCGCCGGCGTCATCGAGAGCAGTT -TCCATTCGACAGGGCTAAAAATGGGAATCGTTTGATCACTTACAGAAACACATTCTACTT -TGCCATTCGGTTCTAAAGAACTCGGGACTTGCAGGGGGTCACTAACAATAGATCGGAAAG -TGTCAGGGACTTGGTGTCTCTTGGCTCTTCCCTTTATTCGCTTTCATGTTCTATTCAATC -CGCATCATATCCAAATATCCAATTCGAATGTAAAAATGAAACTTGCCCCGGTCAAAAGTC -GGTCATTCCCCCGCATCCTCCTGGAAAATTGCCTAATGTGATCTGTCAAGGATCCTCGAA -ACATCCACCAATCCCAGCGGAGTACGGGGTATCTCTATAGCGAGGACCGAAGCATGCACA -CATTATAACAGGCCATCTTCCTATGTAAGAGGCATCTGAACAACACTCAGCAATGCGTCT -TTTCGCGCTTCTTCCTGTTCTACTTGGACTCATAGGCCACTTCGTCTCTGCGACAGATAA -TGGCAAAACTACTGATGTAACATGGGATAATCACAGTCTCTCAGTCAAAGGAGAGAGGGT -ATATATTTTCTCCGGCGAATTTCATTACCAGCGACTCCCTGTGCCGGAGTTATGGCTCGA -CGTGTTTCAGAAGCTACGCGCAAACGGGTTTAATGCAATCTCTAGTGAGTAACCCGACAG -AAACATTCAATTACACTGGCCTAATCTCAATTAGTCTACTTCTTCTGGAGCTTCCACTCT -GCCTCTGAAGATACTTTTGACTTCGAGAACGGCGCCCATGACGTTCAGCGCGTTTTTGAC -TACGCCAAGCAGGCAGGTCTGTATGTGATCGCGCGTGCGGGACCTTACTGCAATGCCGAG -ACCTCTGCTGGTGGGTTTGCATTGTGGGCATCCAATGGTCAGATGGGCAGCACGCGCACC -AGTGCAAGTTCCTACTACGATCGATGGTATCCGTGGATTCAGAAGATCGGAAAGATCATC -GCCGCAAACCAAATCACGAATGGCGGTCCGGTCATTCTCAATCAACATGAGAATGAGTTG -CAGGAGACAACTTACAGTGCCGATAACACTGTTGTTAAGTACATGGAACAAATCAAAGCC -GCCTTTGCTGAGGCTGGCATCGTTGTCCCGAGCACGCATAACGAGAAGGGAATGCGTTCT -ATGAGCTGGTCAACGGACTATAAGGATGTGGGCGGTGCTGTCAACGTCTATGGTCTGGAC -TCTTATCCTGGTGGTTTGTCCTGTACCAACCCAAATACTGGGTTCAATCTCGTTCGCACT -TATTATCAGTGGTTCCAGAATTACTCGAGCTCTCAGCCTGAGTACCTGCCAGAGTTTGAG -GGCGGTTGGTTCTCCGCTTGGGGAGGGACTTTCTACGATCAATGTTCTACCGAGCTCTCG -CCTGAGTTCCCCGACGTTTACTATAAGAATAACATCGGCCAGAGAGTCACATTACAGAAT -CTCTATATGGTCATGGGCGCAACGTCTTGGGGGCAAAGTGCGGCCCCGGTTGTCTATACT -TCCTATGACTACTCTGCGCCCATGCGAGAGACCCGTGAGATTCGTGACAAGCTCAAGCAG -ACCAAACTGATCGGCCTGTTTACTCGTGTGTCGTCTGGACTTCTAAATACGCAAATGGAA -GGTAATGGAACGGGGTACACCAGTGATGCTAGCATATATACCTGGGCTCTGCGAAATACA -GAAAACCACGCTGGGTTCTATGTTCTCGCGCACTCCACTAGCTCGTCTCGTGCCGTGACT -ACTACTTCGCTTGATGTCAACACCTCGGCAGGTATGTACTCCGAATTTCGATAGGAACGA -TTGCGCTGACTTGGATACTACACGATAGGACCCCTCACAATCCCAGATATTGAATTGGCA -GGCCGTCAAAGCAAGATCATAGTCACTGACTATGAGATCGGCGATGGCTCGAGCTTGCTT -TATTCATCTGCCGAGGTTTTGACCTACGCTACTCTTGATGTAGATGTTATTGTCTTCTAT -CTGAATATTGGGCAGAGAGGAGAGTTTGTCTTCAAAGACAAACCGACCCATCCAACTTTC -AAAACGTACGGAAACTCCAAGGTGAAATCAGTCGCATCGGATCATGGCACCAAATACACA -TACACTCAAGGGGATGGCACCACTGTGTTGAAATTCTCGCACGGCGTACTGGTCTATCTG -CTCAATAAAGAGACGGCATGGAACTTCTTTGCCGTACCGACTACCTCCAATCCGCTTGTG -ACTCCCAGTGAGCAAATCATCGCCCTCGGGCCCTACCTCGTTCGCACAGCAACTGTCAGC -GGAAACACTGTCAGTCTTGTTGGCGATAATGCAAACACGACATCTCTTGAGTAAGTGCGA -GATTGACTCTCAAAGAAGTTGTTAGCTAATATAAAATTCTAGGGTCTACACGGGCAACGC -CAAGGTTACCAAAATAAAATGGAATGGCAAGGAGGTCCCAACCAAGAAAACAGCATACGG -CAGCTTGATCGGATCAGTTCCAGGAGCCGAACACACCAAGATATCGCTGCCAACCCTGAA -ATCTTGGAAAGCACAGGATACACTTCCAGAAATCAAGCCCGACTATGACGATTCTCGCTG -GACTGTCTGCAACAAGACGAAATCGGTCAACTCCGTAGCACCGCTTACACTTCCCGTGCT -TTACTCTGGTGACTACGGTTACCATGTCGGGACGAAGATCTACCGCGGGCGGTTCGATGG -CACAACAGCGACGGGCGCAAATCTTACTGTCCAGAACGGCGTGGCCGCTGGCTGGGCCGC -CTGGCTCAATGGAGTCTACGCTGGCGGAGACATTGGCGATCCCGCCCTAGCAGCGACCTC -AGCCGAGCTACTCTTCAACAGCTCTACGCTCCGCAAGAAAGACAATGTTCTGACCGTTGT -CATGGATTACACAGGCCACGACGAGGAAAATGTGAAACCGAACGGGGCCCAAAACCCGCG -CGGTATACTGGGCGCTACGCTCCTTGGCGGAGATTTTACGTCCTGGCGCATCCAAGGCAA -TGCCGGCGGCGAGACCAATATTGATCCTGTCCGGGGACCCATGAATGAAGGAGGTCTCTA -TGGAGAGCGTCTTGGTTGGCATCTTCCAGGATACAAAGCGTCGAAGAGTGCAACATCAGA -CAGCCCTTTGGAAGGTGTCTCTGGTGCCGCAGGCCGATTCTACACGACGACGTTCAAACT -GGATCTAGACTCAGACCTGGATGTTCCGATTGGCCTGGAGCTCGGTGCATCTGATTCCCC -CGCAGTAGTGCAGATCTTCATGAATGGGTACCAGTTCGGTCATTACCTGCCACACATCGG -ACCCCAGACAAGGTTTCCATTTCCCCCGGGGGTGATCAATAACCGCGGGAAGAACACGTT -GGCGATTAGTCTGTGGGCGCTGACGGAGCAAGGAGCGAAACTGAGTCAAGTGGATCTGGT -TGCATATGGGGCTTATCGCACTGGTTTCAACTTCAATCACGATTGGTCCTATTTGCAGCC -CCAGTGGAGGAACAACCGGGGACAATATGTTTAGAATGTCTGATGACACATTTGGAGATG -AAATGTTCCCGGGGTCTAGATTCTTTAGCTCAGGTACCTGTTTCATCATCGCTCTATATC -CCTGTTTCCCTGCTTCCCTGttttttgtttttttgtttttttgttCGTTACATCAGAGTT -GAGTACTTTACTGTATGTTGTAGTCCACGACAGACAGGCATACCCCCGGTCAAAAGTCCT -AGAAATGTACTCCGTGTAGTCCTTACAGACACTCAACAGACCTAAATATAGTCAACTCTC -AACTCTTTCAACACACAAATAACAAAGTAATAGAATTAAATAAGTTCTACCAAAAACAAA -ATGCAAGTTCATTGTCCAGGCGACTATCGGCCACAAAACATGTCTGCCCAACGTCCGTGT -CCACCGCCATTTGTACTTCTCCACACGTTTAAACACGTTTAAACACGTTCTTCCTTTTCA -CTTCTAAACATATTCTCTGTTTATATTTCTTATAATATCTAATCATGTCGCGCCATAACA -GCGTGGATCTAGACTCCCCCGATCGGCCTTTTGACAACATCATCAATTTCCGCGATGTCG -GCCGGTCGGTTAATCAATTTTGTCGCAAAGAGTGAGTTAAGACGCGTCGTGGCCCCAATC -CGGGGATATGTTCCAATATTCCCAAATACAAAACTAACAATTCATATAGAATATTGAAAG -AAGGCGTGTTCTTTCGCAGCGCAAGGGTAATTCCAACCCCGCTCAAATTATGACTCTGAT -GCGTTGTCAGAGCGAACAGCCAACGAGAAAAGACGAAAAACTAATTTGAATCTAGCTCGA -TGATGCTTCAGAACGAGATAAACGTCGTCTAGAGGAAGAACTCAACATTCACACTGTCAT -CGATCTCCGTTCCCAGTATGTCACAAATATCACCAACACCGAAAATCCAGAAACTAATAC -ATAAAAAACAGAACAGAACACCAAATGGGCACAAAAAAGCGTCGAGCTCAAAATGCCAAA -TCAGACCAATCAGAAAAATCACCAGATCCAATCCCAACAAACCCAGACGAACACCTCCTC -CAAATCCCCGGCTCACAGAGAGCCCTAATAAGCCTAACAGGCAAAGGCTTCGAGCGCGCC -TTATTATCCAGACTGGACTGGTTCACCTATCTGTATGTGCCCCATCCCTACATATGCAAT -CTGCAATCCATGGCATTATAGTCATATGCATTCAAACAAATAACTAATCCAAATCCAGCA -AAATCATCGCCCTCGTAACAACAGGCTACCGCAGCGATGCCGTGCGTCTAGTATGCGGCA -CCGCCATGCAACCGCGTGGTCTAACAGGTCTAGCACAAGACACACTGGATTCAAGCATGA -ACGAGATGCGCTCCGTATTTGAGATCCTAGCTCGCGAGGAGTCATATCCCACATTAGTGC -ATTGCACCCAAGGCAAAGACCGAACAGGTCTGGTGATATTACTTGTATTACTTCTTGTTG -GGGGTGTGCCGGTTGACGCGATTGTTGATGATTACAGTCGGTCTGAGCTGGAGCTTGTGT -CTGAGTTTGAAGAGAGAATGGAGGAAATCAGGGCTATTGGGCTCGGAGAAGATTATACGC -GCTGTCCGCCTGGTTTTGTCGCTGAGACGACGAAATACCTGGAGACTAGGTATGGGGGTG -TGAGGGGGTATTTGGAACGGGTTGGGATTGGGTTTGACATGCAAGAGAGGATTAGGGGCA -AGTTTTTGGTTTAGGGGGTTTCTTTGCTTCTTTGGGACGGATTTGGATATATGGGTGGCT -TTGGTGGCTTTGACTGTATTTGATATGGGATCTGTAGATACGATAATGACCTTGGCATAT -TTGGGATGGGTCTAGCATTTCTTATTGTGGATTTAGCAGATAATAAATGGTGGAGGATTG -GGTGTTTTCATTTCGTGTTTCATAATATCAAATCAGGAAGCGTGTGGAAATGCCGGCCTA -TTGAATTGCTACCTAGGTAAACAGGAAAAGCCATACAGATGTATGTACGGTGTACGAAAA -ACCCAACTTCCCAAGCACTAATTTCTCCTACACTGCTCGAGTTCGTTCTCGACAATAGCG -AAGAACTCTAAAGCCAGGTCCTGTGGCAAAGCTGCCTTCGAAGTCCGAGCGTGCATGGCA -AGTGTAACGATTTTTGGCTGGTTACAGTGCTCGAGTACCCAAGTGCCACTGTCCATTTTG -GACCTCCAAGTCAAGGTTTTGGCATAACTCATTTGACGACACGCATTAAGAACGCCATCA -TAAGTCACGGGGCCATTCTCACGGTACCATGCCAATGAGTCCATCATACAATCGAATATC -CACGTCACTTTCGGAGCAGCAAACAATCTGTCAGGCGGTGGACCGAGTAGTGACTTTCCC -AAGTCACGACCGATAGCAATATCAAGCAAGAACCAGAACCGAGGCTCGGTTTGAGAATTC -AGATTCACATAGTCATCCATATCCTTTCCTGTATTGAAATTCATTTCGAATGCAGGATTG -GTCGTTTGGCTGGCGGCATTGGCTTTGGTAAAGAGGACAAATTCCAGTTTGCGTGCTGGA -CAGGGTATAGTGGTATGGCTTATGATGCGAGAAAGCCGCTGATAGTCCAGTATTGGCTCA -TGGATAATCGCATACACGTCAACGTCACTGATACCAGGCTCGTATGCGCCGTACCCGGCA -GAACCAAAGAGATAGATAGCCAACAGATTGTCTTTGAGCTCCCCTTGTAGACTGTGGACC -AGATGGCGTAAATAATTGCCCAACTGAGTATCCTCCACTTCCGTTGCCAGATGAACTGCT -CGGTCTGAAAATGTTAGAGAATTGGTCATCTTGACAGTCCACAGGGAAATCTCTCCTGCT -TTGGTTATCAAATCCTGATAGATTCTTAGGCCTCGTTGACCAATGATACACAAAAGCCTG -ACAATAGAGGAGTTCAAGAGAGTGATGTAGTCACTCTGTATTTGCAAGTGCACTTGAGCG -GCCTGCAGCAGTCATAGAGACAAGTGACATGCCATTGATGTTGTTCTTACTCCGTACCCC -GCCTGGTATCCCGCCTGGGGAATACTATTGGTACCTAGGTTGTAGGCCCCGGGGTACGCT -CAGTATTACATTAACCCGTGTTCGACGTGCCCCTGAGCATTGATTGGATCTCACTTTCAA -TTTGACTAGTCTGAACCCACTGAAATGTACCAAGTGAGCATGACTCTAAACCTCATAAAA -GAGGCCTAAGATAGAAAACTCCACCCGGCATAGAAAATCAAGTATGTATAGCTACCTAGG -TATGTAAAGTGAAGATTCCAAGGCCAACGTAGTGGCTCTAATAGTGTATGCCCATCTTTC -CGAAGAACACCATATAAATGACAGGTATAAAAAAAAACATTAACAAAAGCAGAGTATCAG -ATGACATCGTGCACATCATATAAGTAAATACAGGGAGAGAGAATTAGACCCGTTGGCCAC -GGTAATAGCCGGTATTCGGACCCATCGAGTCATGGGTCAGCGGCTCTTCGATGGAAACAT -TCTCTGCCTGGGCTCCACGGAATCCGTTCTCGCTCATCAGGTACTTCTCGCCCTGAGGCA -CGTGGTTGCGGGTGCGCACGGAAGGCAATAGATCAACAACGAAGGAGAGAACATAGCCCG -TGAAGACGAAAGCGATACACCACTCGAGAATCGCACCGGGATTTTGCTTTGACGCCTTCT -TGCtgccgatgcagacaccgaagccgatggcgagtgcGAGCTCGACGATGACGAAAACAA -GCTTGATGAAGAAACTGACAAGCAAGATACGGTGCTCGCGACGGTAGAAGATGCCGATAC -GGAGGTACTCAATGCAAACCAAGATGGCACTGATCACGTATCCAGCACTGCAGAGAAATT -GATTGGTCAGTTGTGATTCGCGCACAGTTTGGAATGTAGATATGGCTCACATGAACAGAG -CGATGAAACCATCGTGCATTTTTGGGTGGCGGAGTGTATCAAAGACAGAGAGCAGAATCA -GGCCAGCTGCACCGGCAATAGCGAAAAAAATTGAAAGTACAGCGCACGTCTTGTCCAATC -TGCCCTTGTTCTTGGCTAACTGGTTGGAGTGTCGAAGCCACCGCTCGGAAAGGAATGCTA -GATCGAGAAAGACGACGGTGACCGCGCTGCCTGCAACGAACAGCGGCTTTAGGCCTTGCG -CTCCGACATCTGAAATGTACCTATAGCAGTCCTGTTAGCGCATGGATAAACTAATAGCAG -AGATAAAACATACGCGATAGTCTGTCCGTCGTCCATACTGGCGTAGTGCGGTCTCCCGTC -TTGAACCCAGATGACCAACATGGCAATCAACATTGCTAAAAACATGTTAGCTGCGGCCCA -AAGCAAAATATCCAGTAAAGAGATGCCTTACAAACCCACATGCAGGCTGACACCAGTGGG -AACATCCAAAATGATATAATCCACATGTTTGCGACTTTTGTGTTTCGAGACTGATTCAGA -TGTAAAGGAGCTAAAAAGAGAGTATTGTGTGCCCGCTAGAAGTCGAATTCAAAAGACAGT -GGATAAAGAGAGTGGATTGAAACACAATGCGAAACGAGGCTATAAGGATACTGTCAGTCG -CAGTTTCTCCACGTGATGTCCATGTCGGATGGAAGGATGGAAGGAGTAGCTTCCAGGGAT -AATCCCCAATCAATAGTGCAACTCACTTATATCCCTGTATTTATGTACAATGTTTCACGC -AGAACTTCAGGTTCAAACAGTCTAAAAGGGGCGACAGTGGCGCTTAGATGACAAAGGGAT -AGCAAAAGTTTGGAACCGGGAGAACGATGATGCGTAAGGGGAATATATGACAATTGTATA -CCCAGTGGGTTACTACAACAGTACCTGGATAGGAGAATAAAAGGAGGTTCACAGGTGCAT -GAGAGCATGCCATCGATAATTAAATGTTTAGGCACACCCCTGCCCTATAAAGACGTCATT -TCCGGGGCTCTGCCAAGGCAGAAAAATTGCGTGCCCCCACAGCCTAGTCCGAATGATCTA -CCTCAAAATTTACTTCAATGGTATGTTCCGCTGATAGTTATCATCGTAGCCATGGTATTA -ATATGAGGCGTAAACATTAGAAAAAATTATGAACAAGGGATCTGCGGGTGGCTGAATACT -TGGCGTACCAAGAAATGAGACCTATATTTACAACGCTTTCAGCAACTTTGCATTTCCCCC -GTACGGAGGACAAGGTACAAGATCAGTTCCACTTCTGTATACCATTAAACTTCATATTCT -AAGCCGTACGAAGATCAAAGTGGAAATAATATGTGGATGCTTGTAGCCCAAGACCGATCT -ACCTAAAACGCCATCGCGTTACGCAAAGACCACAATTGTGGAATCGATAAAAAAAAAACA -TTAAGCACAAACACCCCACACCGAGATTATTTTTTGAGACGGATATCAAAGCAGCCTTGG -GACACTTCAAGTTCATCGGCTCGTCCGTTTACGGGTAGTTCCAGCAACCGGCCGCCTCTG -CGCAACAGCAACTGGAGCGTTGCGCTCCGCCACCTTGGCCACCTTGATGATCTGCACATC -ATCCTCGCTGGTCTGCGATTCATCTTCAGTGGATTGCTCCGACCCTAAATTGATTGAATC -TGGAGCTATGTCCCCTCGGACCTGCGATGTGCCTTGCGATGAGCGCTTGGCCGAAGATTG -AGAAACTGGGTATGGAGGCCCTGAAGGGTAACTCATTCGATTCTGCTGACCAGCCCAGAG -ATCGGAGGAAACCTGCGGTAAATATGGCGCAGATTCCCCGTATGGACTCATTTGCATTGT -ATTGTAGTAGGACGGGGCCCATTGAGGATCGTCTGGCAGTGGTCGTGGGAACACAGGCGG -TTGTTGCATTGATTGTGCTCGGGCATTGGGGGCAAGGCGCTCGGAGGTGTGTGTTGGGTA -GGTAGGTCGAAGAGGTGAGTTGGACTGCTTGGGGCGAAGAGATTGTGTCGTTGGAGTCCG -GGGCCGAGCAGAAGATGAGCTGGACGGCCGCGAGCCGGCCGTCCCACGAGCCCAAGTGCT -GTGTGCGTCAGGCGCAAAGGCCGGGTACGCGCCGGACAGCTCTGGGGCAACAACGCTCCG -TCGGGATGATGGGCGCGACGCTGCTTTGATAGAATTGAGGGACGGGCGCCGGCTTCCGCT -TGAACCATCCGTCTGCTGGCGCTTGCGCTCCAAGGCATCCAGTTCCTGTTTCGACAGCCG -AACATTTTTCCTTCCCAGAAGTTGGGCGCGACGAATTCGTTCGAGCGCTGTGTCCACCAA -TTGATCCTCCTTGTCCCGCATGGCCAACTGAATCGCATAGGCCACCGTGGTATCCTCATG -GTCGTATAGATCGAACTCTCCCCCATCAAACGTTTCAGCTTCATCCGTCAGATCCTCGAA -ATCCATAGGTCGCCTCCGCGGCCCTCGCCCTCCATACTCCTGCTTATATCGACCGCTTCG -CACCAACTCCCGCCCGTATACATCCTCCTCACTCTCCACCTCCTCCAGCCGTGTCTCCTC -CCACGCACGTCCCCTAGTATTTCTCCCCGTTTGTGCCCCTGGTGGTCTTCCCCATGAGGG -CATGTCAAAAGGAGCTCGCGGCCGACCACCTAGACCGCCTCCTCGGAAAAAGCATTCTCG -ATCGGTTTATCCAAAAAGGCAGCGTGGCCGAAAACGGTCACACAGGTTGCGCCAACCAAC -CACAGGAGAGTGGTGAGGGGGGAGGCGCAGATGCCGACGGGGACACCGATGATGAGCAGA -CCGGTGTAGAGCTGCGAAGTAGTGAAACGAGAGAAGCCGAGATCCAGGTCCCGACCTTGC -AGCATGTTGATTCCATAGAGACCACAGGTCATAAAGATGATATCGAGCAACAGGATCTTG -TTAGTCAGCAAGCTGTAGATGCTGAGCATCACAAACACGACGGCATAGTTGCTGGAGAAG -TAGGACAGGTTGTAATTGACTCGGCTCTGGGCTTCACCGAAGTTGGCAGGCTTGGATATA -CGCTTGATGTCAAGGAACTCGGAAAGCGGACGCAGGTTGGCGAAGCGCGTACTCAACGAG -GTGCTGCGGAGGTTGTTGAAGCGATCGCCAAAGCGCGAAGTGAGCGCATCAATTGGAATG -GAAACCATTTTGAATGAAGAGAGGTTGGAAGGGGAAAAATAGATTGAAAAGATTGAAAAA -GTTGAAGAGAGTGGAAATGGGACAGGTAATATGACAGAGGAAAGCCGGAAGCAACAAGAG -GGTCACTTTCCTGGGCCTTTTTCCCCTTTCGATTTTAGTCAGAGAGCACGGTCGTTTCAC -CACTCGAGTACTTAGACGAAACGTTGTCCGGAGTAGTCCTGAGGAGACATTTAGGTAGAA -ACAATATGTCACTAATAATAGTAACGGGTTCCTGAAGTCCCAGTTCCTGGCTAGGCAACT -CCCATAGGAAGTCCTCCGTACGGAGAAAAGCAAACTACGCATTAAATCTAGATCCCTCAC -TATATATACCATCTACGCTCAGATGCAATTATTCATGCAGAGCCCAACTATAATTTCCCC -TCTCGAAAATCCAGGGAAAAAAATGTAAATAAAATCAAGTAAAAGCAGTAGTCACTGTAA -AGGACAACTCAATTGGGGCCACGTTTGGCTCTTCACAGTGTGGATAAACGCCCGACCGCT -TTTTCCCTAACATGTCCTCCACTAACACACAGCCTATTCGTTCATACTTCTCTCGTTTCT -GGCTTCTGCGGCATTGACCGCTCTATCATCATGAAGCTGTTTGCCTTTTTATCGGTCTTC -GTCTTCTTCGGCCTTGTCGCTGCCTGGTCCAAAGAAGGTGGGTATTGTTTACCCCGACCT -GAACCCCATACTGACTTTCAAACAGATTATGAGATCTTTGGTCTACAAAATGATGTGGCC -ACAAATGAGGGTGCCAATGTGACCTTCTATGGTGCGTGATATCTCCCAGGCGTTACAAAT -TTCTTTTTCTCTTTCTTTGCATCTTCGGGGATACATATCTGACCTACATCCCTACTGCAG -ACCTCCTCGAGATCCGTCCCAATGCGAATCTAGAACAAATTACCAAGGCCTACCGCAAGA -AGTCGAGAATCATTCACCCAGATAAGGTCAAGCGCGCCTTCATCGCCAACTACGCCAAGG -ACAAGAGCAAGGCCAAATCCAACCAGGGGGTCAATGTCAACCAGGGTCCTACTAAGCGCG -AGATTGACGCCGCAGTCAAGAACGCCGATGCCCGCTCTGCCCGTCTGAATCTCGTCGCCA -ACGTTCTGCGCGGTCCAAACCGAGAGCGTTACGACCACTTCCTCAAGAATGGCTTCCCCC -TATGGAAGGGTACCGGATACTACTATTCTCGCTTCCGTCCTGGCCTGGGCTCCGTGCTGA -TGGGGCTGTTCCTTGTCTTCGGTGGTGGTGGTCACTACGCTGGGCTGATCCTTGGCTGGA -AGCGCCAGCGCGACTTTGTGGATCGTTACATTCGCCAGGCCCGCAGAGCCGCCTGGGGTG -ATGAAATGGGGGTTCGTGGTATCCCTGGAATCGACTCGGTCACAGCCGCGCCTCCCCCTT -CTGCTTCCGAGTCCGGTGACGCCGGTGCCGTGGCTGTCAACCGTCGCCAGAAGCGCATGA -TGGATAAGGAGAACAAGAAGGACAAGAAGGGAGGAGCCCGTGTGAGCGCCCGTCCTTCTT -CTGGTACCTCCACCCCCACCGAGCAGGTTGCTAGCACTGGCGAGCGCAAGCGTGTTGTTG -CTGAGAATGGAAAGACGCTGATTGTTGATTCTATTGGCAATGTGTTCTTGGAGGAGCAGA -ACGAGGCTGGCGAGCGCCAGGAGTATCTGCTCGACATTGATCAGATCGAACGCCCTACCA -TCCGTCAGACACTGGTGTGCAAGCTTCCCATCTGGTGTTTCAACATGACTGTCGGTCGGG -TGTTGGGAACTGGAACCACCGAGGAGGAAGTGGATTCTGACGAATCCGAGGAGCCCGTGG -TCGAGGAAGCTGAGACCACTGCCACCTCAAACTCCCGCTCTCGGAAGCGTAACAAGCGAT -CCCAGCGATCATAGACTTGAACAGCTATATTGCTGCACCTCGGGCATTAGTTGTTGTTTT -CTCTCCCTGCGGGATGATTTCAAAAGTTCCCCAGGTTGCTTTACTATAGAATACCTGCTC -GGATGTTGGCAATACCCAAGCAATACCCAAGTAGAAACGAAAATGTACACATACTTTTTT -TTCCTCAAAGCCAAAACTTCAGGGCAAAATGATGTGAGCTACGTATCATTTACTGGCCAT -CGATATGCTGTGCACGGTCTATAGTTATAAAGCCATTGGCGACCCAGTCATGTACCATAT -ACTCCGTCCTTGCAGATCCATCTCATATGCGCGGTACTATGGAGTGTCCCCGCAAAGATT -TGTCGGCCACAGTGATATCTGTATACCAAACCTATGTTCATTGGGTTTATAGGCGGGTCT -ACTGTACAAGAGGGACAGTGCACAGCTGCAGACACAACTAGTGCGGTGTACAGTTGTCCG -ATTGTCAGATCCACCGACTTGGGCACCAACACTTGACTATTTGAGTATAAATAGGAGACT -TCAGTATATCCCTGGACTAACTTCGAATTTTCGAATATTCCGTTGACGGTACAAAACAGC -AGCCTTTCCAAAGAAACCAATTCTATCATATTCAAACGGGCCTCCCATTTCATGACAACA -TTGAAAACACAAAGTTTAACGAGAAAAACAAAGTTGGGAGTATAAAAGCAAAAAAAAGAG -CAAAGGACAGTAGGTGTATCATCGTGATCGTCATGTGTCGAATTTCAGCAAAGCATTGCA -AATCGTGGAAAAGAACCCCAGGCTCGAACAGCTCAGCCTGTCCAGGGGTATCGATGTCGT -CTACGCAAACATCTTGGCAATGTTCATGAGACTGCTGATATTCTCCGACATGCCCCTACC -ACCGCCGCCCATCATACCACCCATACCACCCATACCGCCACTAGGCTTCGACGCACCACC -AAGGGCACTGGTGATAGCATGCGGCAAGCGAATGCCAATCATAGCCAGCAGCTTTGGCAG -AACGCCGCTAGTCAGAAGAGGCACGATGACCATCATAAAGACCTTCATAGGGTTGCGACG -GGCCCAGCGATAGATATCGCGGAAAAGGCGCTTGATGTCGCGGAGCATGCGCTGAACGAA -TCCTTCGCGGGGACGCGCCCGTCGCGACGATGAGAATGACGAGAATACAGATGGGCTTGA -TCGACCTCCTGTTCGGCTTCCGCCTAATCCGCCTAGTCCTCCGAAAAAGGATGGTGCTGA -GTGTCGCGAGTGGTGAGTGGATGAATAGCCGGACTTGCTGCGTCTGCTGTGTGTGCTGTG -GCCGCTAGGCGTGGGTGAGGACCGTCGCCGCACGTATCCTGTCGACGAAACTCCGCTGGA -TCTTCCGTCGAACCAACCCATTGTGGCAGTGTGGTAATGCTGATGTTGAAATTGCTTACG -GATGCGACTTTGGGCACTTGGAGGCCACAGGTGTCTAGTCTGAGGGACAGGTGGGGTGCT -AATGGAGAGATTGTGGTGGGTAGGTACACAGAAAAAGGCGTGTTTGAGCTGCTGCGTAAC -AAAGCCCCTGATGATCCCTTTGAAACTTTTTTTCGAGCTTTTGTGTGGCAATTGAAAGAA -TGAAAAGAAACAGATGCAAGTCAGCCACTAGCTGCTGGAGATGCGCAATCTGAGGTCTAG -GCCGATGTGTGCATCCGCTTTGCGGGTTTTCTATGCAGGAAATTTTTTTGACAGCTGGCA -CAATTGGGATTCTAAAAGGGGTTTGTAAATAACACTGAATGATAATTTCAGGCGAATCAA -ACGGGAGCTTCAGATTGGATTGATCAGTCACCCCCTGGCGGAACAAAAGTGGCTGGGTTG -TGTGGCCGGCTAAACTTAGACTTTGACTGCATTTTGCGGTGGAGAAAGTACTCCAGATAG -TCAAGCAATCCACTCTCCAATGATAAATATCAGAAATATCTTTTGAAAATGTCCTGTTGA -TCCTGTGTCGATGTAGTTGAGAAATCTAATTTACTTAAATATTTGAACCATGGAACCTGA -TGGGCATGTCTGCCTCAGGCAACATCTACGTCACTGCATGTGATGGCTTTCCATGGAAGA -CTCTCTCCAGATTCTTCTTCGAGTCTATCACGTCTGGTATAAACATTTTCTTGCTGAAAT -TTAGAACCAGGTGTAACCATGCCAAGTATGTTGGTATCATCCCCGTCTTCTTGCTGCAAT -ATAACTAATTTTTGACCAGGCCCCGATTACTGTAAGACCTCCATTCAAGGCCTCACTGAG -ACCGTTCCAACTAACACATACCAGACAAGATTCTAGGCATCGCCCAAGATGCCACCCAAC -AACAGATTCGCACTGCATATAAGAGGTAAGATCACTACAACTAGCCTCGAAATGACCCCC -TAACCAAGTCTAGGGAATCACTGAAATCCCACCCCGATCGAGTCCCTGTCGACTCCCCAG -AGCGTCCATCACGCACACGAAAGTTCCAGGAAATCAACGATGCCTACTATACCCTTTCAG -ATCCCGCGCGCCGCCGGGAATATGACGCTAGCCGCGCATACCAAGCAGCTGAGGAGGAAG -CAGAAGCAGAAGTCCCACCAACTAGCGGCGGTTTCCCTTGGTCCAGCTTTGGATTCGGCG -GGAATCGCGATGAGCGCGACAACGATCAGTTCGGTTCCGTTTTTGAAGAGATGCTGCGCG -AAGAGGGTCTCGCCGAAGAGAATGAGGGAGAAGGCGGGAGGCGGACTCGCCCAACCGGGC -GATTCTGGGCACTGGTTGGCGGTGTCAGCGGTGGTGCAATGGGATTCATTGTTGGCAATG -TACCTGGTGCGTTGGCTGGCGCTGTTGCCGGTAATCGTATCGGCGCAATCCGTGATGCTA -AGGGCAAAAGTGTCTATGAGGTTTTCTTGGACCTTCCAACGCAGGATCGGACGAGGCTCC -TAAGTGAGCTTGCTGCTAAGGTCTTTCAGACCACTATGGGGCGCTGAGTCTGTCTACTCA -TCATTTCTGGGTGTTGAGGATTTGGGAGCTGGCTCTTACAGGCCTCGGCGGAAATTCGGT -TGGGTCGGTCCCCTCAACTGTGTGTCTACGTGGCTATGATCACTTAACGTAGGAGGAAAT -ACATCTCTCCAGTATATAGCAAGCTAGGTATTTAAGATAATTCTCGAGTGCATTTGACTC -GTTCTAATTTGATGAAAGGTTGGCCCCCCGGGGCCGTGTACCCCTCCGTTTCTTATCGAT -AACAGCCATGCACAACGGGAGCTTCCAGCCAAGGTTACCTACCTAGGTAGGTAGGTAGGT -GTCGATGCTACACAATATAGCATAGTCCAAGATGTTATGCAACGGGCATGTAAATCAATT -CCCCGGTAATTCAAATGAGCCATTAAATCTAATAATTTATCTTTCTAAACAAGTGAGACA -GACCAGGTGACCTTCTTTCCGCTTCTGGGCCCAATCGCCTCACGCTAAGCTCCCGCTCTC -ACCGCTTGCGGTCAAAATAAGCCCCTCTCTCTATATTACCCTCGAGTTGCCCGCAATTTT -ATCTTTGTCAAATTTCTTATTGAGACTCAATTGACCAAAGCCTTCAAGATGACTGAAGCT -GCCAGTATCTACCCCCCGTTGGAGAGCCGTCCCATGAAGGACACGGTCGTTCTGTTCGAT -GTTGACGAGACTCTGACCCCCGCAAGACGGGTAGGTAGCAATGAGACTCACTGCGGTTCC -ATTATACTAATATATTTCTACTACAGCATGCCTCCCACGAGATGCTCGAGTTACTCTCTC -GGTTGCGCCACAAGTGCGCTATTGGATTCGTTCGTCTCCTCCCAATCAACACATCCACCC -CGTTCCTAACATGATTCAGGTTGGCGGTTCGAACCTTGTCAAGCAGCAAGAGCAACTTGG -CTCACCCACCATCGATGTTACCACTATGTTTGATTTCTGCTTCTCCGAAAACGGTCTGAC -TGCATTCCGCCTCGGCAAGCCACTTGCCAGCAACAACTTCATTCAGTGGCTCGGGGAGGA -CAAGTACCAAGCGTTGGTGGATTTCGTTCTTAAGTTCATTGCCAACACAAAGTTACCCCG -CAAGCGTGGTACTTTCATTGAGTTCCGGAATGGAATGGTCAATATTAGCCCCGTTGGGCG -TAACGCAAGCGTTGAAGAACGCGATGAGTTTGAAGCGTACGTGGAGATCTTCTGTCCACT -ATGTCCAGGTGGTTTTGGATACGAAAATAGGCTGGCTGACTGACTTGCGTGATAGTTTCG -ACAAGATCCACAACATCAGAAAGACACTGGTGGAGCTCTTGAAGAAGGAGTTCCCCGACT -ATGGTCTCACGTAAGTCTTTGTCTAGCGATGTTTGCTTTTGATATGCTGCTAATCAATCC -AAAAAAGCTACTCTATCGGTGGTCAGATCTCTTTCGATGTCTTCCCCACCGGCTGGGACA -AGACCTACTGCCTACAGCACATTGAAGCTGAAAAGGAAATCTCGGGTATCGATTACAAGA -CCATCCACTTCTTCGGCGACAAGACTTTCGTCGGAGGAAATGATTATGAGATCTACGAAG -ACCCTCGTACCATTGGGCACTCCGTTGATGGTCCTCAAGACACCATCAAGCAGCTGAAGA -AGCTGTTTGACCTATAGATACGAGGTTGGACAAGTAGCCGAAAGGGCATGGATTTGTGAT -AGCGTTTACATGTGAAGTCTAATTAAGATATTTTCTGAAAAAGCCTTAGTTAATGATCGA -TCAAAATAGTCCTCTAATTGCATCAAATCTCCCAGATTATACATAGAAAGGTCCATTTCA -CAAGGTATGCTTTTCCCCCGCTTGTTTCTACCATCCATCTTAAAAACTGGAAGCATATCT -GAGAGGCCCGTTGAAAAGTCTACTCCAGGGCTAAAGAGTCATGGAGACATCCCAAGGCCG -CATAAGTGGCTGTTTACTCTTATGAGAAGTATAATTACATACTCCGGACAAGATAATCAA -TGCGGGTAATGCGGGCCGGGAAACTCCAGGGCTGGAAGCGTCGATCTTTCCGATGCCCTT -AGTCATCCTGATCATTTCAGGCCCCGGAATAAGGACTTCCAAGCTTTCCGTTTCTGCGTC -ACTCTTGATCGATCTGAAGTGCACTTAAAATCAAACATCCGCTTACCTAGTGTCTTTGAC -ATGGCGCCTCAAAAGCATGTTGCGGATGTTCCGCGCAATATCCTTCCCCGCCTGAGTTGG -AACAGCTCCTCTTCACGTCCATCCGTTACCATCACCCACCATTCCGCACTTGCAACAAAT -CAAAGACGGCCACTTTCTCAAGGCCGGAGTCTTTCGAAACAACAATTTCATTCCCTCGGC -GCACCTTCTCAGCCTTCCCGGCTCATCTCCACATCGACCCGGGATTCTATCTCCGGATAT -CTCGTTTCCCGAGGAACCTCTATAACTACGCCGGCGACATCCACAACCCAGCGCGGCAAT -CCGATCCGACACAATGGTGTCTATGTGGCTACCTTTCATCCCGCTCGCCGCGCCTTTCAT -GCGACGCCAGTTCGGCCACGCGACCATCACTTTGATACCCTGAAATTCGTGAAGCGTTTA -CAGACGGAGGGCTTCAGCGAGGAGCAGTCCGTAGCTATGATGCGGGTATTGAACGATATT -ATTCAGGAGTCGATTCAGAATTTGACCAGAACAATGGTCCTCCGAGAAGGTATGTGAATG -CGTCTGTCGCGCATGATGACTTGGTTATGTTTGTCGGAGATCTAACGGGAAACTCTTGGC -TAGACTCGGAGAGATCAACTTATGAACAAAAAGTGGATTTTGCGAAATTGCGCTCCGAGT -TACTCAACACAGACTCGACTGAGGCACAGTTGACCCGTTCGTCACACGAGAAGATCGCGG -CAGACCTCGCCAAACTCAACTCACGACTCCGGGATGAGATTGGACGAACACAAGCCTCGG -TCCGGTTGGACCTGAATCTTGAGAAGGGTCGGATTCGTGAGGAAGCAAATAGCCAAGAAA -TGCGTATCAAGGAGACTGAAACGCGAATTGAGCAAGAGGTTGCAGGATTAAGGGAACGAG -TGGAGGCTGTCAAGTTCTCGACTCTGCAGTGGCTTATGTATGATCCCCGAATTACCTCGA -AGCGCGACCTTATACTGACTTTCTTCTCTCTCCAGGGGTGTCTGCACAGGTACCGCCGCA -CTTATTCTTGGTGCCTGGCGCTTGTTTATGTAATTGGGACAAAAGTCTGCATTGGATATG -GAGTGATTGTTTTTGGCTTTGCTTTACATGAGGCATTCGATGTCTTACAACTTCTCTTTT -TGTATACTAGTCTCTCTACTCGTCTAGCCATTCTATGTTGTCATGTATGTAGGTTTAGAC -AATACCAAGCATTTGGTAATATAACTATCAATGAAAACCATTAGATAATTCAAAATTCAG -CCTGGGTATATGTAACTATGGAGTAGCGGACCACACTAGCCTGAGGTTACATGGGGGGAG -ATGAGTACGGACATTTGGGAGTCTCGCAGGTTCCTTCTCTATTCGTGAACATTTTCCCCT -CTTCATGCACCTCTATCTTCAGGGATAAATCTACCTATCCAAGCCATTTTCAACGTTACA -TACATACCCTTTGACCAGGGGTAGAACCTAACATTCTAATTGGTTCACGGCAGAACTAAT -ATGCTAGTGCCTGTAAAGCACTTGTCGCAACGGCTGATGGATCCTCAATCTCCAATGGAG -TATTGTTTCATCCGCCTAGCTGCATGCTTGTTATCTATTCTTCTGCTCTGGCTTGGGGTT -ACATCTTACAATGTTTTTAAAGCCCATTGTAGGTCTTGGAATCGATCTTCGGTGTCGATG -CCCAGTCAAGAACCCCTAAAGCCTCAAAAGACTAGCAAAGCCGCGAACACTGTGCCGTCT -ATTGATCATTGTGAATTGAAATCGAGTGTGAACGATAGCCACGGCAAACTGAGAAGAGTT -CAACCAAGCGTCGGACTACTGACCATTATGGCTCTAATCGCCATATGTGGATTGTTGCTT -TGGGGACACCTCCCATCTGCCTTGTCGCTCCAGAGCGGATTGCAGTATATTGCTGGCTTT -TCATCCCAGGCACCTAGCTCGACTGATTTGCAAGAATTTTTCCAGCTCTATCAGCCAATA -TTGCTTCATTCTAGTGGAGATGGCACCTGCAATGTCGAGATGTTGCTCATGGATCATGTT -TTTGGAGTTAGTTATGGAGCCCCATTTGTTGGTAAGAGCTCTCGAACAGTTACGCCTACT -CGTGGTCTGATCTTGCAATTCATCTAGGCGACTATGAACCCCCAAACTGCGAGTTCGATA -CTGTGCGAATCAATCTCACTGTCACCTCTCGGGGAAAGCAGTATGACCGCCTAGCACTCA -TGTACCTTGGGGACAATGAAGTGTTCCGGACTTCAACGGCAGAGCCGACAGTCAATGGGA -TTGTCTGGACCTATATCAAAGAGATGTCGCACTACAACTCGCTATGGAAGAATCCCCAGA -AGCTAATATTTGACTTGGGAAATATCATTAACGATGTTTACACCGGCTCGTTCAATGCCA -CACTAACGGCGCACTTCTCTATGGGGCAGAATGTCAAGACTGCGGATATGGTTCTTCCAA -TCTCTGCCAAAAAGTCGGCATCAAACTCTTCCAGCGCTTTCCAACTTCCCACTGACAACA -CAACGGTCATGTACGAGATCCCTGCTGCGGCATCGCGTGCTGTTGTCTCAATATCCGCAT -GTGGACAATCAGAAGAAGAGTTCTGGTGGTCCAATGTCTTCTCCGAGGATACCCAAGACT -TTGAAAGCACTGTCGGTGGATTATATGGATACACTCCCTTTCGTGAAGTTCAGCTCTATA -TCGATGGGATCCTTGCCGGGCTTGTCTGGCCATTTCCTATTATCTTCACCGGAGGTGTAG -CTCCGGGATTCTGGCGTCCTGTCGTCGGAACTGACGCGTTTGATCTTCGACAGCCGGAGA -TTGATATTTCCCCATTTCTCCCCATGATCCAAGACGGGAAAGAACATTCATTTGAGATTC -GGGTGACTGGTCTTGATATCTCAGCGAATGGCAGTGCCACATTTTCCAACACTGTGGGCT -CGTATTGGGTTGTGACAGGAAATATTTTCATATACATCGATAATGACAGCTTAGCCTCGA -AAGCCACTATCACTCGTGACAAAAAGGCACCCACAGTGGATGCTCCCTTGCCAGTCTTTT -CTGTGACTCGGAACCTTGTTCACAATGAGACTGGGGGAAACGATTCCTTGTCGTACTCTG -TGGTTGTCGAGCGAGTTTTCAGTGCAACATCTTCTCTGTATTCATGGTCTCAGACACTAT -CCTTCTCCAATCACGGTCTTCTCAACCAGCAAGGATACAGTCAAGTCAACAGACAACTCA -CAAGTGGTAATAACACTATCACCGAGCTCGGAGATATACCTGTTTCCAACAGCGTTGCAT -ATCAATATCCTTTGGTTGCCAATGCAACTTACGGTATCACCTCAAATGGGATGACGATTC -ACTCTTGGATGAAGAGAGGTCTCGATTTTGAAGCAACAGGAGGGCTCGGTATTTCCACTT -ACACCTTGACCTCGGGGCCATCATACCTACACACGAGTCAGTCTGGCATGGCGAAGTATA -AATCCATCACTGGTGGAAAGTCAAGCTCCTGGGGCGATACCGTTAATATGTTCGCCAGTC -AGATGAATGGGCAATCATACCACCGATATGTTCATGCAGTCAATGGTACCGTTGTGCATG -ACACCGACCTCAAATACAAGGCCTCGTCTTCCTCTTCACAAAATCACGAGGATACCGGTA -GAGGCAGCGTGAGGGCTATCATTGGAAAGGGACCTGGAACTCTTGTAAATTAAGAAAGGT -GACATACACGAAGAATTTTGGACGTCATGGTTGCAAGGTAGATGATATCTCGTTTGAGAA -ACGGAGGTTTGATGCCACGCGTCATGGTAGAGTAGTGCCCGTAGTGAGTTATTATATTAT -CGTAAGAAATTTAACATTTGATTTTTGCGCAATAAGACTAAGTTCGAGATGAGACAAGGT -CCTGAATAGTCGGTTACCTTTGTTTGGGGATTTCAGTCTCATTTGGCACTGAGCACAAAA -GGCATCCAAACTGGACATCGTTGTTACCCCAGCAAGTTTGAATCCTTATTACACTTCTTC -ACATATTTTACTGGCCTTGCCTCGTCCCCTCCGCGCCAACGTTCCGTTCAGCGCGCCCTC -GCAATGGAATTACCTTCTTACAGCATCGAGGGTATTGATCGCTTTTTCCACCAGCTAGGG -GTGTAAGTTCTCCGGTTTACACAACGTATAAACAATAGAACTAACAAGAGACAGATTGTA -CCCCACAGGCATGTTCTCCGACCTCACAATAAACACCGCGGACCAAACCTTAAAGGTGCA -CAGAGTCGTCGTCTGTAGCCGATCTGAGGCTTTCTGTAGGCTCTTCTCTAATGTACCCAA -TGCAAGTATCTCACAATCTCAAGCACTTTATCACCCACAGGCTGGCTGACTTTAATGGTT -GCAGGAAAGCAAACAGGCCCATATGTTCGAAATCAAGGACGAACACCCACGAGTCGTAGA -AGCCATGATACGCTCATTCTACGGACTTCATTACGATATCAATCAATCCGAACCTCAAAT -GTGCCCTATGTTATTCAATGTCAAAGTATACTCTCTTGCCGACAAATTCGAGGTGGAATA -TCTCAAAATCCAAGCAAAATTAACCTTCGTCGCCCTCACCCAAGAGAACTGGGACTCGGA -TGAATTCTTGACAGCTGCTTTTGAAGCATACAAGACAACACCGAAATCTGATCGAGGTCT -GAGAGATGTGGTCGTTGCGGTCTGTCAGAAGTACAGGAAGGGCCTGCGCGAGAAGAAGAG -GTTCGAGAAATTGCTTGAAGAAACTCCCGGGCTTGCTACGGATGTTGTGCTGCTTTCGCA -TAGATGGCTGCCCCAGCCTGCATCTGCGAGGGTTCGTATCGTTCAGTCTTTCTCTTGTCT -GTCTTGTTTTGCAAAATGGCAGATTGAGGTTGGGTTGGCGGAGTATTTTACTGAGTGTCC -CTTTTGCCGGGATGATAAAGTTGGAGCTTTTTGAGGTCAGACTTGCAGTGGATTATCTAT -TGCTTGGAGATAACATGTTTCTTTTGTAATATCAGTAGGAGTATTGAGCACAGAACAGAG -AGATATGCTGATTACCTTCCTGTTAAGAATCGACGAGATTTTGGAGAATACGCCCACTGA -ATACTACATGAGCCTAGACAAATTATCTATGTGTGTTTGTGTGATTGAAATTAGACGGGA -CTCTTGGGCGGTAATCAAATTGTCAATATTACGCATCGGAACGAAGTATATACCCACGTG -GAGGGATTATATATGTTTGGACATTTTCAAACATATTATCCGAGAAGGAGATATCCAAAA -GGACAGCTGATATTAGCTTAGGTAATTTATCTAAGGTTCACTTGATATCTTGAATTACAG -ACATTTGGACGCAGTATAGAGATCTGAGCCATTGTTGTCTTGATATACTAAGTTAGGGTC -GCCCTGAATTGCATTTAGCCGACAATTAGCCCCTGCTCTTATCGCCCCGTGATTTTTGCG -ATAAGACGATCCATGGATCTGTCGATCGCCGCATCTTCTTATCGCATTATCTCTCCTCAA -TCCTTCATTGTGGCGACAGCGTAGGCTGTGGTTTTCAAAATGGGTGTGGTCGTAGAAGAT -CTCGAAATACCAGGTATATCTCTTGTCTCTTGATTTTCTTGACGAAATTGAAGTACTGAT -GTTCATTGTTCTTCTAGACGCGACGCTCGAGTCTCTATACCCCATCTTCCAGCAACATGA -CGACCATGCTACCAAGCGGCGTAAGACTATTAAAGGGGACGGCAAGGCTACCAAACGAGG -CGCAAGGAGCCTGACACAAGAAAATGGGTTATCAATTACAGGAATACCCCTGGGCTACAT -TCCATTGACACGGTTGACAATGCGCATAGTAGGTTGCGACTAGTTTCCCGCTCTAAAGCA -AAATGACTGATTCCTCACGCTACACAGAGACCAAACAACACGAGTACGCGCAAATACAAG -AGATCGTACGACCAAACCCTGTCCGACTCCCGGGTTCCCATACTCATAGATGTTCGAAGC -GTTCATTTTATCGATGATGATTTGAACAACACGCCACAACCAGAATCGGCCAGTGGCGAC -GCCAACCGTATGGAGCTGGAACTCTCTAGTCTCAATGAAGAAGAATTGCTGATATATCCA -TGCGAGGACTTGCAGCTGTTTGATATCCTTGGGCAACTCCAGACCGTCAGCAAACTTACT -CACGTGGACCTGTTCTCGAATAATCTGCCCACAGCATGCTATCAAGCGCACCTATGTACC -TCGCCAGATGGAGCAACGTTTACTCTCGAGACAGTCGTTCTTTGGAGGGATTCTCTTGAG -GCCCCGGAACCAAAGCGCTTGACGGAAGCAGAGCTGGAAGTCTTTACAAAATATGTGCTG -CAGGAAAAATGTGTCCGCCCGTCAGGATTGACCGACTCACGCGAATATTGCAATAAAATG -CTAGGAATTGCCAAAGAATGGTCTCCCCGTGATTTCTACAAGAATGTTCACGTTCCCAAC -TCCGCAGAAAGTCCGCCGAACCTCACATGCTCTGAATTAAATTGCGAGTTATACCCGTTC -CAGAAAAGAGCTGTTCGCTGGCTTTTGCAGAGAGAAGGGAGAGAAGTCAGGCCAAATGGC -GAAATTGTCCCTATGGGAGAGCTTCCTAAAAGTGACATCCCAGCGTCTTTCAATTCTATG -AAAGATGCCGATGGACAGACCTATTACTTCAGTCATCTTTTTATGATTCTGACAACAGAT -CTTTCTGGGTGGCACGATGCCGCAGACAACCTGAAAGGTGGAATACTTGCAGAGGAAATG -GGGCTGGGCAAGACAGTCGAAGTGATTGCTTTGATAAGTCTGAACAAACGAGAGCCGCAG -TTCAAGGCTGATCCCGACGGGCTTCAACCTACTGGCGCCACCTTAATCATCACACCACCA -GCGATTCTTGAACAATGGAGACAAGAGTTGAAAGAGCATGCCCCCACTCTGCGTGTTCAT -CATTACAATGGCATTAAACGTGGGAGAGAAAAAACCGACGACATGATTGTTGACGAGCTT -GCTGAGTTTGATGTTGTTCTAACTACGTACAATGTCATTGCTAAAGAAATTCACTATGCG -GGAACTGCTCCACAGCGAGCTCTACGACACGAAAAGCGATTTGTGCAGAGGAAGACCCCA -CTCGTTCGTCTGTCATGGTGGCGTGTTTGCCTGGATGAGGCTCAGATGATTGAGAGTGGA -GTCAGCAATGCTGCGAAAGTAGCTCGCTTGATCCCTCGAGAGAATGCTTGGGCTGTGACT -GGTACCCCACTGCGCAGAAACATAGACGACCTGTTTGGCCTTCTTCTTTTTCTCCATTAC -GAGCCGTTCTGTTCTTCAGCCCCTTTGTGGAAAAGATTGTGTCAGTGTTTTGGGACAGTC -CTGGCAAAAATTATACACACAATTGCTCTTCGGCACAGGAAAGGTCAATTATTGGATGAG -CTTCGCTTACCACCTCAAAAGCGTATTGTGATCACTACTCCCTTCACAGCTGTAGAAGAA -CAGCAATACGGTCAGCTCTTTGAGCAAATGTGTGAAGAATGCGGTTTGAGCACCTTGGGA -GCCCCCCTTCGAGGCGACTGGGACCCCGAAGATCCTGTAATTCTCGACAAGATGCGTAAC -TGGTTAGCGAGACTGCGCCAAACCTGTCTCCATCCCCAAGTCGGATATCGCCGAACCTTG -GGTCCAGGGTCCGGTCCTTTGCGGACTGTGGATCAGGTCCTGGACGTCATGACTGAGACC -AACGAGTCGGCCATACGTACGGAGGAACGATCGCTCATTTTATCGCAACTTCGACGAGGC -CAGCTTCTGGAAAATGCAAAACGCCGCCAAGAAGCCCTCGCTCTTTGGCAGAAAGCATTG -GATCATGCCACTCAGCTGGTTGAAGACAGCAGAGAGCAATTGCGCCTAGTGAAGTTCAAA -GGCGCTACCGGCAATAACGGGGCTAATCTAGGAGACACCAATTTGAATGGCgaggatgat -gaggaggacgaagaagaggaAGCAGACAAGAACAGTCGTCTCGGCCAATGTCGACAGAAG -CTTCGAGCTGCACTTGAGGTCCAGCATATTGCAGTGTTCTTCACTGCGAACGCATATTAC -CAAATCAAAAGCGATCCAAATCTGACTCAGCTAGATTCGGATGAATTCAAATCGCTTGAG -AAACGGGAAGAAGATGCATATGAGGCTGCAAAAGTCATCCGCAAAGAGATGTTGACTGAT -ATTTCTCGGAAAGTCGAGCGCTACATGAGAGAAATCAAAATTAAAGCAAAAGACAAAGCA -TTCGTGCACATCCCGAAAATGAAACCCCAGCTCTACAGCAAAGGAGTCGAGTCATATAAT -CTGCTTAGCAAATTCGAAGATTTTTGCGACGCTCTTAACAAGCATGCTAACCAATACAAG -GAATGGCGAGATGCCATGGTTAGACTTGTCTCTCAGTCGCTCATTGACCAGGAAGAAGAG -GCAAAGCTGGAGGGCGATGAGTATGAGCGGTCAACAAAACATCAAGACGAGATGTATGTC -TACATGGAGGCTTTGAGGTCAATGTACAGCGACCGATATGATGCTCTCACTGGTCAAAAG -AATACGCTCATTTCCCACGAAGCCAAAGCCGGAATTATCCAGGCCCAAAAAGGGGAGGGG -CCTTCTCCGGAATTATTTCTCAAGATTATGGAGACTCGCAGTCAGATCATGCCTGATCCT -AGCCTTGGATCTCTCCGCAACATTGTCAGTGAGCTTCGCAAACTTGTGACATCGTTGGAG -TGGCAAGCCAGCTCGGGCAATTCGCGTTCTCGTGCTGAACACGAGATCGTCGAAATGGTC -CTGAAAAATGCCGGTCAAATGATCGCTGAGCAACTGAAGGTTACGAACAAACTGAGCAGA -GAAGTTGAAATGTTCCGCGACACCATGAACAATCGATTGGAGTATTACCGTCATCTACAG -CAGATTTCGGATACGGTGGCCCCATACGACGAGGGAAATGCAGGCAAACCGCTAGATGAA -TCCGCTTTCTATCTGAGACTCAAGCAAGAGGAGTCCATGGAAGAAAAGATCGCTTCTCTC -AAGTCCAAAGCAAGATACCTCATTCACCTACGAGATGATGGTGGCTCCGAAAGTAATCCT -CGAGAGTGTATAATCTGCCAATCGACCTTCGAAGTCGGTGAGTATCGCTTTTTCTGGATG -TATGTTTACGCGAGGACTGACATTTCATGCAGGTGTATTGACTGTGTGTGGTCACAAGTA -CTGCAAGGATTGCTTGCGAATGTGGTGGGCTACACACCAAAATTGTCCAATGTGCAAAAA -GAAGTTGAAACGTAATGATTTCCATCAGATCACATACAAGCCACAGGCACTAGTTGCACA -GGAGGAAAAGTCACCCGTCAAACTGGACCATGAAGGTCATTCTCAGAATGCGATTTATAG -TGACATCAGCTCCGGTCATCTCAACGAGATTAAGAACATTGACTTGGAGGGTTCTTATGG -GACAAAAATTGACACTTTGGCCCGGCACATCCTATGGCTGCGGGAACATGACCCTGGCGC -AAAGTCAATCATTTTCTCTCAATACGGAAGCTTTCTGTCGGTATTACAGACCGCATTCAG -CTCCTTTGGGATAGTCACGACCAGTATTGACTCCCCCAATGGCATTGAGAATTTCAAAAC -AGACCCGGCTGTAAGCGGCCCAGACTTTTGCCTCATTGGAAGGAACACGCTGACAATTTA -TAATCTAGATTGAGTGCTTTTGTTTGCATGGTAAAGCGCAGTCATCCGGGCTGAATTTAA -TCGTTGCAACCCATGTCTTCCTATGCGAGCCACTAATCAACACGGCAATTGAGCTCCAGA -TCATCGCCCGAGTGCATCGTATTGGTCAATATCGACCAACAACAGTATGGATGTACCTTG -TCTCCGGCACGGTGGAAGAATCCATATATGAAATCTCAGTAACCCGACGCCTAGCTCACA -TCACGGAGAAAGAGAAGCAACAAAAGGCAGCACTATTGACATCTCCTGCGGATGAAGATG -GTGTTACTGAAGCGGCCATTGAATCTGCTAATTCGATAGAACTACAAGATGCCACTCTCA -CAACTATGATGCAACGCGGTTCTCTAGGTGGAGAGATGGTGAAGGAGGACGACCTTTGGC -AGTGTCTCTTTGGAAATTCCAAGAAGAAAGATGGCACCGATCTTTCTGCCGAGGCAGAAA -GAGAGGTTGGTCGTTTCCTGAGAGGCGAAGCTGCTGAGCAAAGAATGGAAGGTTGAGATA -CGAATACATACTGTAATTTAGAATGTTATATGTCTTGTTATGATGTAAAACCGATTCTCA -CTTGCTTATCATCCCCTTCCTTTACATGGACATCCATGGATATATATTCTAGACTCCTGT -AGGCTTGGTTTGGGATCGCACCTGCCAGATGTAGATGCCCCGGGTGACTGCCAAGGAAAT -GACTAAGCACAGTATGACTAATCAACAACCTGGGCAGGTGCCCCATAACTACTGTTCACC -TGAACGGCGATGACGTGAGTGCAGTTGCCTATATCATCTATTCTTTTATAAATCTGGTTG -TCTTGTTGTTATTTTACTTGTCAAGTTGATGCTTTGTTGAAATCACCTGCAAAGCACTTT -CATCATGGCGGACTCACAGAGTCCTGAGGCTCGCGAGTTGAGTCTGATAGGAAAGGTCGA -ATTTAGAATTGCTATGGCAGACACAGATGAAAAACTGCAGTCGCTCCTTGGAACATATCT -GCCCCCCTTGTTATTGAAGCTTGGGTCGGACAGTGTGGAGGTGCGCAACAAAATCATCTC -GGTCTGCCAGCATGTCAACACCCGAATCAAAGCCCCCTCTATTCAGCTGCCCGTGGCAGC -ATTGTTGAAGCAATTCAAAGAGCAAAAATCTCAATTGATCAGGCATTTTGATCTTATTTA -TGCTCAGCAAGGCATTGATCGCCTCGGCTCCGAAGCCAGAGTCGAAATCCTACTGCCCTT -GCTCCAGGGTATCTCAAAGATCGGAACCTCGCCCACTCAGGGTGCTATCGTATTTAATCT -GGTTTTGCGTCTATTGCCACTTCTAAAGCTGCCAGCAAGAGGAAGTGAAGAAGACCAGGC -CCTGAAAGCCCGACTCGGTCTATCAGATGAAGATTCCCAATTCTTGTCCTTCTGGCTGGC -GAAATTGTTACTTCTTACCCCTGGTCCTAAAGAAACTTCAACATGCCCCGGCCTATCTCC -AGATGAGTATAACTTTTTGAACAAGGGTGTCCCCGCAAACGAGACATGGAACCCCTCTAC -CGATGGTGGCTTGAATCTGACCGAGACAAAAGTCATCGCCCTTCGATTCATCGGAAGTGG -TGCCTTTGATGATTCACAGCGATTCCTGCCTTCATTGATCGCTTCTGCTGATGCCAACTC -ACGCCTTGCGGATCTGGGAGATGAGACCCTGAAACGATTTACTGCGGACATTGAAAATCC -TGATGTTGTTCAGGAGTTGTATGATCTGTATTTCGGCACCATTTCGCCGAATGCGCCCAA -TGGTGCTCCTCCTGCGCGCCCTCCACTCCAAGTGAAGATTCTTGTCTTCCTGGGGAAGTC -CATTCAAGCTACTGCAGACCCTCAGCGAATATTGAGACTCATCGAAGATGGGCTCTTGTC -AGACTCTGCTAGATCCTCTCAGGGGTTGCAAGCGTCGAAACTAAGAACTCAAATTTTCAC -GTTCACGACCTGGGTGGTCCGTATGGGCGCACCCTCTGACCTGAAGCAAATCGCGCCCAT -ACTTATTGCGGGTCTGAGGGATTTCATTCAGTCTCAAGGATGGCCCAGTCCGGCAGCTAG -TGGGCAGAAGCTACCTACGACAGATCTCAGCCTGCGAGGTCTCGCCTACGAGAGTATCGG -TATCATGGTTCCAAAAGTCGATTTTATGTCGCAAGCCGATGCCGTAGAGAACTTTGAATT -TGATCTGGTCCGATGGCTCTTTGCTTCCTTGAGTGCAGATGACTCGAGTCCTCAAATATT -CGTCAGCATTGATCAGGCACTAGGAAGTATTCTCAATTCTTCACTAGACAGCCATGATAA -GACATTCCAGGATCAGCTGCGGCCATTCCTGGTTCGTCAGATGAGCATGCAGCCTGGCGA -TATGGATCCCGATACTGGTTATCGTGCTGTGCGGGGTGTTCAGTATGCCGCAGTGCGGTT -TGCGAACCGATTCCTGCCATTCAGCGATGTGGTCGCTCGTTGGATCGATTTGATGGCCAT -TGCAAGAGGCCCAGAAAGACAACAAGAAATTGTCGAGGAAGGAAAGAAAGGCCTCCACCC -TTACTGGTACCGACTCCTCAATCCTGCCAAGGATGGAAAATGGTCAGCTTCCTCAGCAGC -TCCAGTTCACGACGAATCTTGGTTCGACTTCCCAAAGTTCGATGAAGCTACTCGTTTCTT -ACTTGGCAAGGGAGGGCAAGAGGAACACTTGAATGTTCAGTCTTTGTCCGCATCGCAACT -CCTGTCTGGTCCCTACAAGGGTTCATTTACGCCTGCCATCACATTCCTCCGAAATACATT -GATTTGGGAGGCTTTCTCAGCCTCTGGAGTTGCTGCAGATTTAGAACAAGATTGGGACTA -TAAGCTAGAAGTCCTCCTCTCTACTAGCGTGGAAGCCCGCTCAGCCGTGAGAAAGTACAT -TCGAGAGTCTGCGAAGGAGTCTGTTTTGGCATTCTTGTCTGGCACGTTAGAAGGTCTTGT -AACCGGCGAACGTGAAGGCCTACGACAATGTGGTGTTCATTTCGTGGAAATCTGCTCCCT -CGCGCCTAATGACGTTGTTCAACACTTGATCTCGCATGCGACATCCTTGATGGAGCCTGT -TTGCAGCAATAATCAAGATAACCAAGGTATTACTGCACGGGCATTTGGAATTTTGGTTTC -GCACCCTGCTTTCTCAGAGGAGCAATTGAAGAGTTTCATCACTCAACTGACTGCTACCAT -CCAATCATGGAATACTTCTGTGGGTCAAGAGGTCATGCGAGTCCGTGGGGCGATTTTAGC -GCTCTCATATTTGCTCAGTCGACTTGCGTACCGCGGTCTGGTGAACCGAGCCCCAGAAGC -ACAAACCAAGCAGTTCATTGAGACTCTCTTTGATATTCTAGATGTTTCCCGGGATACACT -GCTCCAAAGGACTGCACAAGAGGCCATTGGGCAGCTCAGTCTATCTGGTGTGCTGTCTCT -TGATATTATCTCAGAAGATGGCTGGAAAAAGATACTGGAAAAGCTATCGCGCGACGCCAA -AACTGAGAACCAGATTCCTATCAAATCGCTGGGGTTGTTGACTCTGACTTTCTCCCGATC -AGGCTCAGACGGTACCATGTTCACTGATTTCTTGGGTTCACTCTACAGCCTACATGAAAT -CCGCAGTCCCGAAGTTCAGTTCACTGTAGGCGAATCCTTGAGCAATGTCGCTGTTGGGTG -GGAATCTCGGGCTCTTATCCAAGATTTCGACATTGATGCAGAATTCCCTTGCTCAGATGT -CCCGCGCTCCGTGTTTACAATCATCTGTGACAAGGTCATTGCCGATTGCATTGCGCCCAA -GCCATCTTTGCGGAAGGCATCTGCAATCTGGCTGTTGTCGCTGGTCAAAAACTGCGGTCA -CATGCAGGAGATGCAGGAGCGGCTGCGCAAGTGCCAAGCTACCTTCAGCAGCCTTCTTGG -AAATAGGGACGAGGTAGTGCAGGAGACCGGAGCGCACGGACTCAGCCTTGTATATGAGAT -CGGTGACCAATCACTGAGAGACGACCTGGTGCAAGATCTGGTTGAATCATTCACCTCGAC -TGGTCCCAATCTCGGGGGTGGCAAAATTGAGGCCGATACCCAGCTATTTGAGCCTGGAGC -TCTTCCCACCGGTGAAGGGTCGTCCGTCAACACCTACAAAGATATCATGAACCTGGCCGC -CGAGGCGGGTGACCCCACTCTGGTTTACCGGTTCATGTCATTGGCTTCAAACAATGCGAT -TTGGACAAATCGTGCGGCATTTGGCCGCTTCGGAATCAGCAGCATCTTTTCTGATTCTAG -TGTCGATGGCTACCTGGCAAAGAACCCTAAGATTTATCCCAAGCTTTTCCGTTACCGGTT -CGATCCTAACCCGAATGTGCAAAGATCTATGAATACCATCTGGCAAGCGCTAGTTAAGGA -TCCCACCGTTGTCATCGATACCCACTTTGATGATATCATGCAGGATCTGCTGAAGAGCAT -TCTAGCGGGTCGAGAATGGCGTGTTCGACAAGCAAGTTGTACCGCCGTGGCAGACTTAAT -TCAAGGTCGCCGTCCGGAGAAGTACGCCCAGTATCTCGATGAAATCTACAGTAAGGCTTT -CAAATTACTGGACGACATTAAGGAATCTGTCCGCACCGCCGCGCTGAAGCTCTGTCAAAC -GATTACCAACTCAGTCATCCGCACACTCGAGACGAGCGGCAGCGAGAAGCGCGCGGGAAC -TCTACTCAAAAGTGCTATCCCCTTCCTTTTGAGCGATAAAGGCTTGGATTCTAGTGTGGA -AGATGTGCAAGGGTATGCCATTGGTGCGCTCATCTCGATGATCAAGAAGAGCCCTGGCAG -CTTGCTGCGCCCATATGTTCCCAACATGTTGGAGAAGTTCCTGAGTGCTCTGAGCTCTCT -GGAACCACAGGCTGTCAATTATGTTCACCTCAATGCCGACAAATATGGACTGACCGGTCA -AGAGATTGACAAAATGCGTTTGTCCAGCATTCGCACATCCCCTATGATGGAGGTGATTGA -GCGGTATCTGATTGACAACTTGGATGACAGCAACCTCGATGAACTGGCACGGAGACTTGA -GGATGTGCTTCGGTCGGCCGTGGGTCTGCCATCGAAGGTGGGTTGCAGCCGCGTTCTGGT -GCTGCTTAGCATGAAACCTTTGCTCTTCCGCTCATACGCAGACCGGTTCATCCAAATTCT -CACCAAGTACGTGGTGGACCGAAATGACACGGTCAGCGCCTCCTACTGCTCATCCATGGG -CTATCTGCTGCGGCTTGCGTCGGGCGATCGGATTCTCAAAACTTTTGACTTCGCGAAAAA -CCTCTATCTGACCGCCGAGGATGCCACACCACGAGTGATTTCCGGCGAAATCCTGTATTC -CGCGTCCAAATTATCCAACGATCGGTTCATGGCATTTGCGGCGAGCGCATTGCCGTTTGT -CTTCGTGTGTAAGCACGATGGCGATGAACATGTGAAAGAGCAATTTGAAAAGACATGGCA -AGATAACGTTGGTGGATCGCGTGCGGTGTCACTTTACATTCGCGAGATTGTGGGCCTAGT -TTCCGACAATTTGGACTCCCCCCGCTGGGCTATCAAGCACACGGCCGCACGGGCGATTGC -GCAAGCGGTGTTATCACTGGATGCGGAGATTGACCTCCCGACAGCTCAATTGGTTTGGCC -CGTATTGGAGCGTGCTCTGGCGGGGAAGACGTGGGAAGGTAAAGAAGTGGTGCTTAAGGC -ACTAGTGAAATTTAGCGGACAAGCCCAAAAGTTGTGGCAAGCCAACGAGGAGTTGAGTAA -GCTAAAGGTATGGGCTGACATGTCGCCACTTCCAAGATTTCCACCATTATAAAAATAAGG -GAACGAGTTCGAACTATGGAAATGCTAACGCATGTCAGACTATTACGGTTCGCGAAGCGA -AGCGAACTAATGTGCTCTACCGGCCGCATGGACTGCGAGCAATGGGCGAGATTGCACAAG -CACGCAAGGACCTCAATTTCATGCCGGATGTATTAAAGGTTATTCCGGGAGTGATTGAAG -AATTGAACCAGGACCAAGACCAAATGGAGATAGACTCCAGGAATGGGCGCGGCTCGACCC -AAGATGACACCCTTGCGGCGTGTGTACAATGTCTCCTGCAATGTTTCAACCCCACCGCTT -CGGGTGAAGGTCAGTCACCGAAATCCCCGACTTCGCACCCGAAATCCTAAAATTCACCCA -AAAAGAAAACCCTGCGAGAAACCCCTAGCGTCATTCCTTGCTAACACTAACCATAGCGCT -GGGAAAGTACGTGGAACAGGTGACCAGTCCGCTGGCCCAAGTTCTCCAACTTGGTGGGCG -ACAGGTCGTCAGCACGGGCTATGATGAGCTCCGAGCTTTCTTTACGCGAGTGGATCAGTG -GATCGTTTCACCGGCATCCGGTGAACAAATCGCCGTGCCCTTGACGGTTCTTGCGGACAA -GTTACTCACGGGCGAAGTGGAAGGGTCTGTGGAGGCCATCCGCAAGGGTCGTGCAGAGGC -CATTCTAGCGTACCTGAAACTGAGTCCGCATGCCCAAGCGGCTGTGCCTGACACCCTTAT -CAAGCGTGTTCGTGAATGGCGAGCCAGTGAACGATCCGGATCGGTTCAGCGTCTACTGGA -CGAGGCTTTGGCCTTGCCGAATTAAGAATCCCACGTGGTGCATTTTATTTACGGGTCGAT -CAATATATGTTTCTTATTCGAAAGAGGATGCATATGAAGTTAATCGTACCAATAGGAGGT -CATTCCAAGGGGGAAATAGTATACTTAAATTCCTGTGAAATGCCCTCAAAAAGGCTACGC -CACTCGCTTAACACTCGGCCAAGTCCGTTTGGAATGCGGAGAGTCCAAAAGAGATCGAAA -GTGTGATCTCTTCGATTTTCACGATCCAGACCGGGGGTCCCGGCCCATCGAGACTGATAT -TTTTCCCCAGATAAGTCCAAGTGAATTGAGCTCATTCTTCGGCTCCGGAAGAGGAGAGAG -GGTTCTGCTCAGCCAGCCCCTGAAGTCCCGAGGTCTCTCTCTTCACTTTGCGTATCTTTG -CCCTAATTCCCTTCCCCTTAACCCGAATCTTTGGGCTCTCTCTCTCTCTGTGTAATAATA -GTAACTTCTCATCCACATGGTTGGCATCTGCATGCCATTTGAttttttttttctctagtg -attttttttccctgttttttttttCCCTTCTATTCTTTTCGATTATTTTCCAATTTTTCT -CCCCTTTTTCTCCTTAGCTCTCTTCAAATAATTTCGCAGGTTGAAAATAGTATAATTTTT -ATTTTTCCTTAATTCCCGCCACCCGTTGGAAAAAGACTGCGCCCATGAACTGCATTTCTG -AACATATAGGCCATTTTCCCTCCCTTTTTTCCCCACCCTATCTCTTAGAATATCTTCTTT -TATCTTTAATATTTCCCCCATACTTCGTCTAGTTTTCCTCTTTTTTTTTCGTTCTTAATA -TATACTAGTATGTGTTTGTGTTCCAATTCGGCCCGTACAACACTGATCCATACAGCCTTA -TGACGGTAACCAGTCTGATCATGCAAAACAGACGGAGCAAAGCCTCTGGAAAACCGCCTC -TTCGGTTACCCATCATTGCCCCCAAGGAAGATCTTTTTGACGCCTTCGCCAAGTCTGAAA -AGCCCTTGATTGCTCCACCGCGCATCCGCTTGCCTCCGCGATCTCGTACCGGGTGTTGGT -AAGTGATCGTCTGTGATCGTCTCTATATCATTTCTGAACCTCAAGCTTATTCGCTATAGG -ACATGCCGAGTGAGTTTCTTTTTTTCCCCCCCCCATCTTTTTACCCTGGGAAAAAAAAGG -AACAGTTGATTAATTGGGGGTTTTCTTCTTTCTTTTTTTCCAGAGTCGAAAGGGTAAGTG -AGTTGACAAATTGTCTCTAATTCCGTGGCTGACGCACATTCTCTAGTCAAATGTGATGAA -GGACACCCGCAATGTAATGTATGCATTCTTCACGCTTCAAATTTCCACTTTTACTTATTT -CTTTCCAGCAATGTACACGCCTAGGCCATACTTGCGATTATCGTCCACGGCTGGCGTTTC -GCGATGATACCCCTCGTATCATGGGCCGCATGGCCGATGTCAAGACGAACGGCCATGTCG -TTTGGGATCGTAAGCACCTTCTTTTCAGGGGTTCCTTCTTTTCTTTCTATAGTACTAACT -AGAGTAGTCACCTCACCGGGTTCCAGTGATGAGAGAACTGATTATTTCTCTCTCGGGGAT -TCACTTCCCCCCTTTTCCTTGCTCACTTCGGACGAAGACCGGGAAAAGAAAGCAGAAGCG -TTTAGTCCAGGCACGTATCATGTCGTGATGACACCAGACAGTTTCTCCTCTCTGCCAGAA -TATGCAGACGATGTCGAGAAACCTAAATCAAATCCCGAGTGTAGGAGATCTGCTACCAAT -AGTCCTACAATCACGGCGAAGCCAGACAATCCGAGAGAAGATCCCAATGTGGTGATTTTG -AAGACTTTCGAAGATGTTACTCGTCGCTCGCCGTCTAGCGGGAAGATCAGAATCTCTCCA -ACCTCGGAGATCTCAGATCCTTTCTGCAATTTATCTCTCTCGCCAAATTTCACCTCCCTT -CCGTCGCCTGATGTGAAGGTAGAGGAGAGTCAGATGACTTTCCTTGACCCACGTCTTGAC -CAGCAAAGTCAGGATTCGACTATTTTCTCCCATTTCCGTCATGTCGTATGGAGGCAGTTA -TTCCCTCATGACCACAGGCTGGATGATTCTTGTGGATCGGATAGTCATAGTCACTTTATG -ACTCTTAGCATGGACTTCCTAGAGCAGGAAGCCACTCGTTTCCCTCCTGTGGGTATTCCG -TATTCCATTCATTAGTTCGTATGCTCGTGCTAATAATTACAGCTTTCCCACGCTATGATG -GCTGTATCAGCCCTTAGTCTCTCCCACAGCGGCACAGGTCACAATGTCGATGCGCTCCAA -TATTACCAGCAAGCTTTTCCCTCGCTCCAGGTCAGCCTACGGAATAACGATGACCTCGTT -TCCGATGGACTGTTCCTTACCCACTTCCTGCTCCTGATCTACGAGGTGAGACTTGACTGT -TGCAGTTTCTCCCCCAGTCTCTATACTTACAATCCTCCAGATCGCAGCCGCTGAGCCACA -CGGCTCGAACCTCTGGTCTCACCATATCTCTCGCCTCCTCCACATCGCCTTCCTTCGACG -AGCCAAGTATGGCCAAGAACCTCATCCATTCATCCTCTGGTGGGTGTGCCACATTGACTT -GTACGCTCTGTTCAGCGGAGCAGGGACCGGAGAGTTTGTACGAGCCGTTATCGACCACCA -GATGCTCCCTGGCTCTGAATGTCTTCTCTACCCTTCTGCCCCAGAAGGATACAGTGTGAT -TTACTCCGATGAACATGAGAGCTTGCCGGTGATCATGCGTCTGTACCACGACACATTCCG -ACTGGCGGCTCAACTAGGCTTCTTGGCTGCTCGACTTCGTCATGATAAACAGAATCTGCC -CTTTGCAGAGTTTGATCAGAGATCGCAGGAAGTACGCGATCTGCGCCAGGCTTTTGGGAG -ACTATGGGAATCTCCCGATGTTGCCTTCCTACATCAACACCAGGAGAATCTCCCGCGGCG -GTCTCGGGAAATTATGCAACAGGTTAGTCATTCCGCTTTCACTTTTTTCGTATAAAAACA -AAGCTAACCGACTCCTCCTAGTCCGCTACACTGTACCATGTTTGTCAACTCTTCTCTTAT -AGCAGCATGTGGCCCGGACAACGAGTCGAATCCGATTTCTGCCCAGACGGAGAAATCGAC -CACCACGCAACAGAGATCTTACGCTTAGCCGAGCGGACTACACACACTCGTCGAGCTGAT -CGACACTTTATAGTTTTCCCCTTGTTCCTAGCAGGTGCTACGGCATCAGCTAGTGGATTG -AAGATGATGGCAATGGAATTGATGACTAGTATGGAAGACGAGGAAGATGGCATGGGTCGC -AATGCAGCAACGACTCGTGCTATCCTTCAGATTGTGTATGAGCGACAATTGGAGCGCCTC -ATGCATGTGGGTCATACCTTTGATGTGGATTGGTCTGATCTAATGGCTCAGGAGGGTCTT -CAAATGGTCAATTTCGGATTTTGATTGGATTTGTGTTTTTCAGAGCATTGGCGCATCCGG -GCATATTTTTATGGGGAACTATGTATGTTTTGACTATGTTATGATTGATGGGATATGGGG -GTTGTATGTATAGAAATGAATATAATGGAAACTGTTCGGGCCGACCCATTATAGGAATAT -CCAGCCCCTTGGCAAATATTGGCATGTCTATAGTACAGGGTAGTATATGAAGTACTGTCT -AGTCCTATCTAGCACTATATAAGTACATATACTATCTAAGCCAAGGCAGCAACTTGTGAT -TCTCGTCACCTGCGGGGTCGGCGTCTCGCCCAACTTAGCGCCTACAAGAAAATCTACAtc -ctttgctttttctttcctttttccttttgctCTCGGACAATTTCTATGAATAGATGAGAT -ATTTCTCATTGCCCCGCACCTCCGACTTCGCGCCTATCTCCCTAATTGAGAGTTCTCTTT -CTGCTGTGGCGGAATTTTCGTTCTGCAGCTTCGAGCCTATTTCTTAAGTATGAGTGCTGG -CAGCAGCCGTGCTCTGCACCCTGAGGAGCTTTTCCTCAACGGTCCAGGCAGACAAGAATC -TCCCACCATCGGCCCTCTACGTATCAGTAAACGGGACTCATCGTCCCCAGCATCGGCCGC -CGGCTCCAGTCCCCCACCAAACGCCCCGCTCCCCTACCCGGATGACCGTCAACGTCCGCA -AATGCGCACCTCAAGTTCAAACGGATATCATGACAACATTAGATATGGGTCTCCGCCAGC -TGGTCCTGGCTCTGGCTCTGCAAGTCCAAATGAATACCCAACAGCATTGCGGCCGCGGGA -CGGTCGCGAACCACGAGTCGCAACATTGGCCGAACGAAGAGGTGCAGCCCCTAAGCCGCT -TCCCGAATCGCCTGGCCATGATGCGCCAGATCGCGAAGCCTTAGCGGCAAGACAATACCC -TCGACCCCCAGCAAGTCTACCACCCGGCCCACCAGAGCCACTGGCAAATACAAGTCAATA -TGATTACCGTCAACAATACTACCCGCCTCCGCAACGCCAAAGCTCAACATCTGCAGCCCG -TCCCCCGTCGAGTCTACAGGCCCCACAAGGCCCGCTCAATCGCATCAGTTCCACCTCGAC -AACCCGCGCCCAGCGTGGCTCGCCCCCGCCACCAGAGACACCTATCGTTGGCCCAGGCCA -GCATCCTGCCTCTGACATTGAGGCCCGCTATGCGGCCGCTGGCATCGCAGGCACAGGGAC -TCTTCAGGGAATCCAGTCGCATAATGTGGCCGCGCAGAGACGAGCAGAGCAGTACTCTGG -ACAGCAGCCAGTTTTCcagcagcagcagcagcagcagcaacagcGCCCGTGGACTCCGAC -TGAGCAGCCTGGTTCCCAGCCTCATGGGCCTCCGACAGTTTATCAGGGTGATGAAGTTGT -CACCGATAACAATCAGCCTGCTGCGCATCCGGGTCAATATAGTAGCCCGCCACCTCAGAT -GCCCGGGTCCTATGGCAGCCCCACGCCCCAGGTTCACCCCGGTCAATACAACAGCCCGCA -GCCTCAGGCTCACCCCGGTCAATACAGCAGTCCCTCGCCCCAGGCTCATCCGGGCCAATA -TAATACCCCGCCGCCTCAGGCTCATCCAGGTCAATATAGTAGCCCGGCGCCTCAAGCTCA -TCCTGGACAGTACAGTGGATCCCCATCCCAAATGCACCCTGGGTCTGGGCTacagcagca -gcaacaacaacagcaacagcaacagccgcaacagcaacaacaacaAGAGCAGCTAGGAAG -GGTTGCCCCTAATGCCCTGGAACAAGACATGGACCGGATGCGCCTCAGTTCCTCTCCTCC -TCCTGCCTACTCAAGTGTATCTGGCCCTTCTGCTTCGAATGGGTATCCCAATGAAAAACA -TAACGGCgctgcggcggcagcagcagcaacagccgctgcagcTGGCGCTGGAGCGGGATT -GATGCATCCGGCCTTGGTAAATCAACCTAATCCTGCCGTCAAAGCACATTCACCTGCGCC -GACAGCCTCCGCACAGGACCATCCCGCTTTCCGGAATGAACAGAGGCAGCAACAGCAGGC -AGGCCAGTCCCCACAGGCGTCTGTTTCTAATGACTTGTCCGGTTTTCATCATCAGACAAT -CCAGGTGACTTCTCCGGGGCCCTCTAGCGCAGGTCCAGCATCCCCGCCACCTCTTCCGGA -GGGGTGGATTGCCCACATGGATCCAAGCTCAGGACAGTATTATTACATCCACTTGCCCAC -CCAGTCAACTCAGTGGGAGTTTCCCAAGGGGCCTACGCCACTTAACCTCAACGACACGCC -CTTGTCCCCAGTGGGAAGTGTGTATAGCGCCCATCCGCTGGCTTCCCCGGGTCTGTCGGC -TTTTGGAAAGCCTCTGGCGTCACCGGGTGTGCCCTTAACCCCTGGCTTTGAAAGCCTACA -GTCTCCCGTTGTGTCTGGATTTTCTGGGCCTCCCCCCAGTAGTGGCATGGATTTGTACAA -AAATGTTCCAACCAACGGTGTCTACTTCGGTCCTTATTTGCGTTACACCAACATGGATAT -TGAGCGTGGAATCTGGATGGGATCTATCCTGCTTGTGACAGATGCCGGACAGCCACCCAC -GATTCATATGCATCAGAGTCTGGACCTTTCTCCGAACCCGAGACAATTGAAGGCAGTTAA -CATTGCCGCCCATCAACGCTGGACATTCTACAAGTATGAGATTGACCTCCAGATGGACGA -TGCGGGTCCTGCAAAATGGACCTACGCCATCACCTCACACCTTGGCTGCACTCGTTATGA -GTTCCTCGTCGCTGGTCGACACGAGACTAACTGGCGGTTCATTGCTACTTCTGGAAATGA -CTTCTCGCTTAATGTCAATGCTAGTGAGCGGGCTCGTGTGGGCGGCATCGGTTACATGTG -GAAGGATATCATGCAAAAGCACAATGAGATCGGTGGCTTCCACACCCAATTGTGTCTGGG -TGGTCAAATCTATGCGGATCGCATGTGGAAAGAGATCCCGTCTCTCAAGCAATGGTTGAC -TATCAGCGGAAAAGAGGCTAGAAAAAACGCTCCTTGGACAGCGGCTAACCAGCAGGATGT -TTCTTACGCATACTTCCATTACTATACCAGCCATTTCGATCAACCTCACATAAGGGAGTC -ATTTGCGCAGATTCCTTATGTCTGCCAGATAGATGATCATGACATGTGAGTCGGCCATAC -ATCAAGAATTAACTAGCAGCACACTGACATCCACAGCTTTGATGGATTTGGCTCGTATCC -AGAGCATATGCAGTTCTCAAACATGTTCAAGAACATAGGCCGCATCGGTATCGAGATGTA -CCTCCTCTTCCAGCATCACACAACTCTCGACATTCTCCGAAACGTCAGCAATGACACGGA -CCTCTTCACTATCACCGGCACAGGCTGGCACTTTGTTAAATACCTAGGTCCCGGCGTCGT -CGTCGTCGGTCCAGATTGTCGCTCAGAACGAAACCCTCACCAAGTCATGGCTGGTCCCAC -GTATCAGGGTCTCTTCCCAAAGATCGCAATGCTACCGCCCAGTGTCCAACACTGTCTCTG -GATGGTCGCCGTCCCACTCATCTACCCTCGTCTAGAGACGGCAGAACACATCGTCCAGAC -TGTCGCAACTGGAAAGCGAGCCGTGACTGGCGCGTACAATGTTCTAGGCAAGGTAACCAG -CTCCGTCGCCGGAGTGGTTGGCGCCAAAGACTTCGTCGGCTCGGGCTTTGACTCAGTGAA -GCGAGCCGTAGGTAAATCCGGGCTGATGGGTGGTATACTGAGTCCATTTGGCGAATTCAA -CTCGATGGACGAGCTACGCGACCAATGGACACACGAGTCGAAGGTCTGTCCCCTCTCCCG -CCTATCACAATTCCCATTCAATTATACTAACCTAAACTAGGATCTCGAACGCACATATCT -CATCCGCACACTGCAAGGCATCGCCCATCAAAAATCAATCCGAATGACCTTCCTCTCCGG -CGCCGTCAACGTCTGCGGCGCAGGTCTCGTCCACGACCCCTCAAACCCTTCAGATCACAA -GACAATGTATCAACTCATCGCCTCCTCTGTCGTCAATACCCCACCCCCATCATACATCAT -CAAGCTCCTCCACAGCCACAGCAAGCCCCTCTACGTCCCTGCCAACGGCCACAAATCCTC -TGCTCAGGTCAGCGATACAAAGGAAGACATGATGGAGATCTTCCAGACGGACGTCAACGG -CCAGGCTCGTGAGTATCGTAAGCTCATGGCAAGAAGGAACTACGTCGCTATCGTCGCCTA -TGACCCTGAGGCAATTAATGCCTCTTATGGTATGTCGGCTGGTGTTAGTAAATTGAGTCT -TGCAGCGGACTTCATGGTTCAGGGGGATGGGGCGCTCGGGAATGTGGTCAAGTTTGGACC -GGTTATTGTGCCGTGTCTCGATCAAGGGAGGTGAGAGGTGAGAGGTGAGGAGACTTGAGG -CGAATGTTTCTCTTTTTATACTGTATTTCGGGATCTTGAGATGTTTTTTTGCACAGCATT -CATTCCGTTCTTTCATTCGTTTTGAATCGGTCTTTTTGGCGTGTTTGCATATGTTACTGT -TCATCTTTCTCTATTATCTAGCTTTGTCTGTCCTGTTTGGGACAATTATACCGCTGTTCT -GACTGAATCTTGTACTTGCTTGAAGTAACGATTTTAGAATGATAGATGTCAGGCCGAAGA -AGATCATTTCTCGTAAAACAAGGTCTGATAATGTACAGAATAGGGTATTCCAATAGTAAA -CAAAGCAACCAACACCAGCATCTAACCCGAAAAGATATACTCTATCCAGTTGGATCACCC -CCCGGAATACAAAGCCCAATCCTCCAGAAATAATCCCGATGGGCATAAAATAGTTCCCAA -AGTCGATACCTCGCCGGTATAAGAGACCAATCCCACCGAAGAGGCGATAATCGCTCCTCA -CAACCAGCAAGATCGGAACTTACTTGTGAACGTTCCAAGACAGCCTGCTCCCCTGTCACA -GCGCCGCTTCCATCATTATCACCATCAGTCGACCGACACCCCTCTCCACCAAAACTAGAA -CCAGCCAACCCAACAGGAATATTCAAATCCCGATACAATTCAACCAAAATCCCCTCACTC -CCTCCACTGGCAATACAAGTAGGATCATTCGCACCAACACAATGAACCACATCCTCCACC -CGCGGCGGCAATCCCTCCTTCGAAATAAAAACCTCCCCAGCATGAGGACTATACCCCCAC -TCAACAAGCGGTAACATAGGAACCGGATCCCCAGCATGCGTTACACGACGAAACCGCTGA -CCTCCTTCAAGCGGTGGAACCGTGATCCCATCAAGCCCGAACTGCTCATCGAGGAATCTC -GCAAACGCCGCATTCCCAATCATAGGCTCCCCAAACGTCGTCACAGTCGCATCCCAGCCT -TTCAGACGCATCTCCAGTCCCGCTAGTGCTGCGACTGCTCCGCCAAGTGAATGACCTACC -AGCGTTATGGGATAATCTGGGTATTTCTCGCGAAGAACAGAGATCTGTGGGAGGATTGTT -GTTCTGGTGTGCTGCCATGAGCGCATGAAGCCTGCGTGGACGGTGCAGTTCTCGCAGGGG -GTGTTTTCTTCTTGGTAGGGGATGTAGGCTTGGGGGTAGGCGGAGAGGTCGATTATTGTG -TTTGTTATTGAGTATGTGCCGCGGAAGGCGAGGAGGATTTGTCTGGAGGTTGGGTTGTGG -GAGAGGGCTATGTAGCCGCATGAATCGGAGAGGAGGAGGCCTGTGTTCCAGGTCTATCGG -TTGGGTTAGTGGGttgttttttttttatttgttgtttttgttgtggatgttAGTAGATGG -GCTCACGGTTACTAGTTCCAGATCTGGAAAATCGGTGCAGCGGCTGAGACACTGGAATGG -TTTGTGTACTTCTGATGTTCCGACGCAGTATGAGATGTCCACTATTCGGGATAACTCCTC -GAGGGAGTTGAATAGGTCGGTTGAAACATGGTTAAACTCCTGGGCTGTGGCGGGGCTCAG -GAAGTAGATGTACAGTAGTAAAGGGAGGAGTAAAATGCTAGCCATGGCGTAACATGCTGA -CGTGGGTAGCTGGAGGGAAGATATAAATTCATTTAGGAGAAGGAACAAGACCGTGTGATG -TGGAGATGTGACTGGAATCCGGGGTAACGTGGAATCACATTATGTCATCGGCTTCCTTAT -TGGCTGGTCCATGTCGATCAAGATACCGGGGGTGTACATGGGTCTACAAGTGAAGGCATG -TAATCTAAATGGTCACGAGAGGCTGGAATGTACTCGAATTCTATCCAATACTATATGCAT -GCTGATCATGGACTAAGGCCGGCATTAAAAGTTCAAATCAGGCGTTCCAAATGACAACAC -GACCGTCGACACCGCTCGCTACAACAATATGTTAGTCTCTGGTAGGTCTCGTGTTTGTGG -AGGGTGATCTTACTGCTGAAGGAGCTCACAACTCCGTTGGCTTCATCATGGGCCCGGATG -GTGTGGACAGTGTTCTGGTGGGTAGACTTGAGCTTGGTATCTGCCTGAGCCTGGCCCTTG -AGATCCATCTGCCGGAACATGTTCAAAGCAGACTCCTCGCGGGCACCCCCGGCTGGTCCA -GCCTTGTTCTCCAGAGAGCCGGCCAGTTGCCATCCATTCTCATCGCCGCGGAAACGGAAG -GGCTCACAATCCTAGATCTGGTTAGCACAAGCCGTTGTATTACTCAAAAGGAAACAACTT -ACATGACCAGCTGCAATAATCTCGTTCTCGCCGTTCCAGATAAGACTGTTGAGCGGCAGC -AAACGGGTGGCGATATTCAACATTGCCCGCGGGGGCTGCTCTGGTGCGCTGGGGTACACG -ACAGTGACGCTACTATCGTGTCCAGTGAAGGCAAGCACATTACCACTAGGAGAGAATGCA -ACACCCTGGATCCAACCAGCAGTATCATTCAGGTACTCGCCACAGATGGTGTTAAAAGGA -AGGCGCTCTCCCCACGCGCTGGGCTCCGGGCGGGTATCGATACCCTTGATGAAGCTCGAG -AAGACTCGGGCGTGAGAATCGGTCGATCCGGCTGCCAGCAGGACGGAGTTTGGGTGCCAC -GCCAGGGTTGTAATGGTGCTGCGAATAGGCTTCTTGATGTGCTTTGAGATCCACCAGTCG -TTCTCCTCCTCGAAGTAGCAGACTGCAATCACTCGGGCGCCGGAGCCGACGGCGAACTTT -TGCTCCGAGGGAGACCAGCGCACAAAGGTTGCGGCACGGTTGATTCGGAGAAGTACTAGA -GTGGGTTTCCATCCGGTGGGAGTCTTCTCCCAAACATAGGCATTGCGATCTATAGAGCGA -ACACCTGTTAGGTATTTGAGGCGTGGTAGAAGGAACCTTACGACAACATACCCTGCGAGC -AAGTGACGATTCGACCACTGTTGGGGGCAATATCAACACTGGTGACGATCTTCTCGTGTC -CCTTAAGCTCATCGCTCAGCGCAAACTTGTTGCCTGATTGTTGATACAGCTCCACATTGT -TCTCCCGAGCGACAGCAAGTGTCTTCTTATCGGATGAGAACGAGTGGTCTGCAATTGGTG -CATGGAAAAGATGGTGAACCTGAGGAGTGGCCATATTGGAGATAATAACGACAGGAGTTG -TCTCTCTACCGTCGTGCTTGTATACTTGGTTGTGTAGTCGCGGGACCAGAAGGGGAGCTG -GGTGATGACGGCGGTGCTTAGTCAGGACATGTATCACATGACCTGAAGCTACTTTTATGT -CGAGGTTTTGAGTCGCCATTGACACGGCACGCGATTGAGCTCCATTTTCCCATACGCTAT -CTCATTTACCCTTTCAATCAGACGACAGCCTTTCTGAAATTATAAATCATCCAAAAAATG -GCCTCAAAATCATTGCCCGCCCGTGGAGGCCCCGATACTAAGGCCGTCGCTACCGGCCCT -CCCAACCAGACGTGAGCTTGTCCTTCCAAGCACCGGAGAATGTCGCGCATTGGCGCTAAC -AACTTTTAGGCTCTACTGCACACACTTGCCCGATAAGAGAATCCCGAAACATGATCTGCG -AACGGCACTCTATGCCTTATTCTCCACATACGGCACCGTCCTCGACATTGTGGTCATGAA -GACCAGCAAAATGCGAGGTCAGGCCCATATTGTGTTCAAGGATGTCCAAGCCAGTACACA -AGCACTGCGGGCTTTACAAGGATTCGAATTCTTTGGGCAACAGATGGTACGTTTTCATTA -CTCTTCTACCATCGCAAACCCCGCTCAATCAATGGATGTCTAGAAAATTGTCTACGCCAA -AGGATCATCAAATGTCATTTCAAAACTTCGCGGCACATATGTTGCTCCAGTCGAGGCCGC -GCCAGCTCCAGTCACGACCGACCTCCAAAAGTCTATCTTCGGAGCTCCTCCAGGCGCCCT -CCCTGCACGACCCACAACCGGTGCCAATGGAGAGGGTCAAGGCTTGAAGCGACCGCGTGA -AGAGGAAAGTGACAACGAAGAGGCCCCGATGGACGAAGATAGCGATGTACCAATGGAAGC -TTCCTCAGATGAGGAGTAATCCAATGAACGGAGCCCCTGCTGGGGACTGGGAGAGACTTG -AAGTCCTCACCTTGCATGTGATTTTTTTACTTGATTCTCCAGCGTTTCCAGCATATGCAA -GAAGTATTGGCTGGTACTGTGCTAAACAAAGAGCTCGTGTGGGACGTGCAGGTTCACGCA -ATGACCTACAAGGCCGAGCCAGATGAGGAGATATGACCCAGGAGTGACGCGCTGTGCGTC -TGCAATATTCGCCATGGCATCCGCGGTGTTGCATGGGTGGACAAAAAAGGCCGGGGCGCC -AGAGTCAGGATGGTACTACAAGCAAAAAAAAAACAAAAGACATGAGCTCTTGTCACGAGG -TTCCGAGAATGCACCTACACCCATACTAATCCCACCCATTACACCCACACTCTTCAATTC -TTGCCTGTACCGTTCCGGCACGACATACTGGTAGACTTCGTCTAGTCCGAGTGGGCCGTT -GTGCCACCTGAGGCCGAAGTACAAGACGGGGACCTGATATGTGGGGGACAATAAGATGTC -ATAGTCAATCTGTAAGCTGGGGAGTTTGGCGCGAACGAGGGCTTCCTGCGGTGAAGAGTC -AGAAAGTGGGCACGTCCCTTGGGAGGGCTAGAACTCTGCAGACCGGATCCTTATCTTCCT -GTGATTCTTCCTCTAGCGCCGGGGCTTCTGGGCTGGGGAGATTGACATATTTTGTTATTC -TGAGGCCTGTTCCATTTGGCTAGTTTCCAAAACATATTTTCCCATTAGTTCTTCTGGATT -CAGTCTCATAAAGAAATAGATAAGATTTCAATGGTTCATGCATGACAAAAAAGGGATACC -TTTGTGAGCAGTCGAATGGCGGGCCAGTCAATATTTTGGGCTGAAGTGCATCGATCAGCG -AGATCTTTACATGCACTCTCAAACTCCGACTGCACCAGGAAGGGAAATGCAGAGCACGAT -TCAGCCATGCGGCTGGAGCCTGGGGAGGTGGCTGTCACTTCGTATCAGTGTACGAGTTGT -CCGGATGCAGATTTCAGCTGTGAATGCGGCGTGGCCAGGGTACGGAGGTTTCGCAAGGAT -AAGAGCACTTGACCTTGTCACTATGTCAAATGAGAAACATTGGAAGATGAATCCACGGAT -ACAAAGTGGTTAAAAAAAGCAAAGATTGATTGCAGTGGTCCGGATATAGCAAAGGAAGAA -AACAATGGCGTTTGTCAGGAATACGACGTGTGCTCAGAAGTAGCCAGTACTTATGTTCTG -TGCGGTACGTGTATATTGAATACAAGTAGTGAGTACTTCCGTTTGCGGTATGTACCTTTT -TTTCTTTTTTCCCTCGCGTGGAGATGGTTCCTTGTTCTTCTGCATGCAGCTGTTGATGTC -CAGAGTAAATGGAATCTGAAACTTCGTGGCAAAATGCTTGCGTCTTTTTTTTTCTCCTAC -TGTTTAGTTGCATATACACGTTTGTCGTGATTCTCTCCACTTATGTATCGTATGTTCCCC -CTTCCCTTATCCCCCTCAATTCTCTACTCCGTACGAACTGTTTAAttttctttttgcttt -tttctttttaatttattttttgtattttgcATGTCTATAATCTTAATGTTGCGTATCTAC -CGATTTTGATCTACCCCTGGTCAACCGAGACCGAGAGATCGTGAAATCGCGAAATCGTGA -AATCCTCAAAATCTTGCTTGAAGGGGATTCCCGGCTTTCAAGATTGGTTATCTGCTTGAC -GTTACCTCAATTCCCCGTCATCCGTCTTCCATCTTTCCAGTCCAACCAATTCTTACATTG -CACCCAATCTGAAACACGGGCCATCATCTCCGGGCATTGGGAAACCCTGAACCGCCAATC -GTCTCAATTCCAAACCTGTTGTACGAGCTTGGGGCTCGGATTGCGTCCTCTGTGGACCAG -GGCGTAGCTTGACGTCAGAAGTCCGGGGTGCGATCTAATCTGAAGAGCTCTCTCTATCGG -GGTAAGCTTGGAAACTCATCACTGCGGGGGAACAGTGCTCCAAGCCCTTATTCTTGTATA -TCATTCTGCTCCTGCACATGCCCCGTTGAATCTGGAGCAAATTCAAATCTTGCATCTCTG -AAGTCATGGGCAACTCTCAATCGAGTCAAGCCAAAGACGAAATCCGCCGCTCCAACCGCC -TTTCAAAGCCCCTGACGAAGAAGTTCGGTCAAAGCTCCCAACTACCCGAAACAGACCCTG -TAACGTTCAGTTCAGGGCTGATAGGGTGGCAAAACCCCTGGGTTGGCAAGAATATCGCCA -GTGCCCCTATTGTCCCTATAGAAAAACGGGGCAGTTACCCTAAGAAAACAGAGATCCCAC -CGACTCTTTTTGAAACAGACTCGTCCGAAGAGACTCCGACAGAGGAGCGAGCCTTTGTGT -ATGATCAGAGTCCCACGCAGCGTCCAATTCGTCCAAGCCTCTTGACTGCAAGCTCTTCGA -GAAGAACATCTTACCAGTCGGAAACTTGGGAATCGGGTTCCCAGCCCTCGCCTCTTTATG -AACAGCCACCGAAGCGGGCCAGTTCTACTCGAATTCCTCTACGGAGGCATAATAGCGCTG -TCTATGAAAACCACATCGGGGACTCGACATCTTCTAACACCCACTTCTTAGTTGGGAACC -AACGATTCTCATTAACACGTCGACGATCGCTCCTAACGCGACCTGGGGTTGCCACAAGAC -GAAATGGTACAGTTCGACGCGTCCCGTCACCCATTGGGGAGCCTGAATGCCCGATTGAGG -ATCCGACTGAGTCAACCGTTTTGCAATGGCCATTGCCTTCGACCCAGCGACCGCTGCGTC -TGCCATCCCCGGTGCGGCCGAGAAGTCCAATGGAGACCCAATATACTCAGCTCGGGGCAC -TTAAGTTAGGGTCTTTGCGAGTTGTAAATGGCTCGGCCTCTCCATGTCCGAGCGAACGCA -TTCCTTTCAGTGGGCCATGTGCTTCAGGTCCCGGGCTTGGTCTTGAGAACATAGAAGTCA -CAGGGCCTAGCAGGGGGTCGACGCTGGACATTCCATCCTTGCCCGAGAAAAACAAATCGG -ACGATGTTCCAGGCAGTCCATTCTCCTTCGAAAAGTCGCCCACTATCACAGTCCAGCCTC -GGGCAAAGTCTTTGTTTCCTGGGGATCCAGAAGATGAAGGAATCGTGCTCTGTGATGATA -CAAGAATCCAGTTGGAGAAAGGTGCTTTGGACACTAGTCTTGCCCGAAGCACGTCCCAAT -CCCTCAACAAGTCCGACAGTGGCTATAGCTCTGCCACCTCGATTCACTCCATGCAGCGGA -GCCGGACTCAGAGCTCTGCTACTTCACAGACGTCGAGCTCATGTGGTACAGATAGCTCAA -AGAACATCTGTATCCTGAACAGCTCGGCTTCAAGTCGACTAGGCGATAAATTGCAGCGTC -CCATGAGCCTGCAGACAAACCCGGAGAACTACTCAATGCTGTATCCAGTTGCTGCTCGCT -GGTACGATTCGAGTGACCCAACTCCTCTCGCTCCATCGCGATCGCGTCGGTCAACGTTGT -GTGCACCCCGATACACGGAATATTCATGGCCGCGTGAATCTCTCCTTGAAGTCAAGTCCA -CTGCCCTGGCCTCTGTTCCACACCAAAACCAACAAGGTTTTACCAGAGGACCTTTCTATG -GAGATCGATTGTCCTTGGGGCCGTTTGACGTTGCGTCCACTATGGGGTCTGCCACTACCG -GAGAATCACAACTCAGTCACCAGCAGCCAATCACTGAAACCAAGGAGCGACTCCGCAGGT -CGACCTCTGAGTATCAACTCCATGAAGAGTACAATACACAACGGCAGGTCTCGCGATCTA -GAAGCCGACTCGGAAGTCGCATCTGGAGCATGAGACCAGGCGCTGACGCTCCTCCGCTGC -CCACTATCATGTCCCCAGGCTACCTGCAGGATGACCTTGACCTTGATGCTGAATTAGCTT -CCTCTGAGCCGACGAGAGGCCGGTCTCGAAGCCGAAGCAATGACTATCACCGTCGTCGGT -TGACGAAACCTCGTCCACAAACGGACCTGTGCATATAACGACCCCTCCCTTTTTTTGGTT -CTTGATTTTTCTGGTAATGATTTTGTTGACCATTTATTGCCTTGGATCTTGCTTTAATCT -CGTCTCGTTTGCCTATGGCGTCTACAAAAGTTTATTATGGAGGTGGTCCTTGTTTTTAAT -TTTTTTGGGAGGTTTTTTTTGGACGTGATGCGGGAGTTTACGACCCATTTTGGATATGGA -GTCTTATTAGGTATACCCTTTTGGAATTGGAAGTGGAATTGGCGGGAATCATATCTGGTG -TTATACACAAATCCATGACTTCTATATACCATGTGCTATTCTACAATTTTAATATTTTTA -TTGGGCGTTTAGTCTAGTCCCCGGAATGCATGTATGTCCCCGATGGGATAAGATCTCACC -TTTCGCCTATCTTGCCTTCTTATGCATACGTACATAATTCCTCACCGCGTGGTTGAGGGC -TTCCCCGGCCATTCCTGTACACGAAGTAAGGTTATCGAGTTAAGCAAGGGGCTTCTACTA -GGCGCAGGGTCCATCAAGTTAGTTTCTCTACCATTTACCGCATATTACCAGTCTAGACAC -GGGATAGCAATATAATAAACATCAATTTAGATTAATATAACAAAAGTCAGATCGTAGAAG -TACATAATCATCAAATGCAAGGAAGAAAAGAAAACGCCACCCGGAGATCCCAATGCAAAT -GCAATCAATTATATCAAGTCAATGGATTTCAACCCATTAACACTCATAGATTGATCAGCT -CACATAATCCGAATCTTCTTCGGCGAAGGAGTCGCCTCTTTCGGAACAATAACAGACAAA -ACACCGTTCTTCAGACTCGCGCGAACGGAGTCCTGATCAACACGCGCAGTGAAGCTGAAA -GTTCGCTGGAAGTCACCGACAGACCTCTCTTTGGCCCAGAAGTGAGGTTGGTCTTTCTCG -ACAGGATGGGAAGCAGCGGTGTCGACAGTGTCGGCCGAGCTAGTAGTAGATGCTGCATCC -TCGTCTTCCACTGTGGGCTTGTGCCAGCCTGCAGGAGATGAGCTGCGGCTTGAGCTGGCA -TCGTTGTCATCATTGATGTAATCACGCTCTGTGTGGCCCTTGATTACGAGAGTGTGGGGA -TCGGTGAACTCGATCTCGATATTGCTTTGGTCCACACCTGGGAGTTCACCGTCGAGGTGG -TAGGCGTTTGTCAGCTCGCGCACATCAAAGGCTGGTGCGAAGGAGGAGCTGCGTGCTTGC -TTGGAGCATTGGTAGGACTTGGGACATTGGCGAGACTTGTATTGATGGGGAGTCTCGTAG -TCATTGAGGAGTTGGAAGAGTGGCGAAAGGCCGGATTCAGTGTAGAAGGCCATTGCTGGA -GATTTTTGGATTGCAAAAAAAGTTGTAAGAGTGGTAGATTGTCTGAGAGACTATGATGAt -tgtgttctgtttgatctgttctgttTAATTGTCGTGGAATATCTGCCATTTTATATCTGA -TGTTCATAGTACCTGAGGCTCATTGCTTATACTCCGGCGGAAGCATCCAGAAACTAGTGG -CGCAGTCCACAAGCCGCCAGCAACTTCAAGACACAACCACTCCATTCCATAAGCTGAATA -ATTCTATAATTCTATAAAATTGACTCCCCGGACTGAATGAATAATTCATGATTGGTAATT -ACTCTATATCTCGAAACTTCCGGGACTTTACAATGAGTCAATAGTGGCGATATGGTAGGA -TGGGTAGAACACTGCATGACAAAGCCCTGATGACTCAACAGTCTATGATATACTATTATA -CAAAATGCCTCCAAAACGAGGCCCCAACCACATCATGCTTGGCCATGTCTTTTGCGTAGC -ATGTTCAACATGTTCAACATGTTCGCAGGGATCCGTCCTGGGTAGAACCGGCAACACTAT -TAGAGAATTTCGGTTCTATTTCAATGAGTGAACTGGTGGAGACATATGCCTGAAAGGCGC -GAGACATACAAGGCTAGCCCTGTACTCCGTGGTCACCCCCGGAGAACTAAGATTCTGAGA -TCTGGGCCCGCTCATCGGATATTGCGGATTCCCGGGGTAGCATACATCTCGAGCTGTCGA -TTGCCTTTGCGCAAGAGATACTCAATTGTGCTAAAGTCTTTCTTATTCACTGAAAGATGT -TTCTGGAACTCTGCCCTGTGTACCGTCTTAGCCGGATTGAACCGATATCTAGGGGGAATG -CTGTTGGGAAAGGAAAAACGAGCGGAAAGGGAAGAGGGagataggcagataggcagatag -gaagataggcaATGGATAGAGCTTTACCGGGCATATCTTTTGAAGTTGATCCTAGAGTCC -TACAATAGAAGGTATTAGTTGAGGTTGACGTCGGATGAAGTGACATAAACCCACCTCAGG -CTTATTTCGGATCTCCCGTAGACATTTTCGGTATAGAGACAGAACGTCCCTCTGGAGACC -GGAGAGCCTAACCATTGCGAATTAATAGAAAGGAAAAAGCCTGTGAATCATTGGGTGGAG -CTCATGTTTCCCGCCTCTGGGGAAAAAGAGCTTCCCGGCCGAGTGCTCAGATGTCCCCAA -TCTCCAAATCCGTCGAACTTCTAACTTCCCCATATTCCCCAATGGCTGCTTTTACTGTGA -GTGTGAAATTTGATTATTCCAAGAGAAATCAAGAACAATATATACATGAAAAAAATGATC -TAATGAATAGTATTTTATTGTATCAACATATACTAATTTCCTAAATAGCACCTCGTCCGC -TTCCTGGCCAAGGATGGCCAAATCTATTATGGCGATGCTATTCTCCCATCCGGTGTGAGC -GATATCGCCAAAGCCACCAAGGCCAGAGTCATCCAGGGTGACATCTTTGGCCAGCACCGG -GTTACCGACCAGATTGTCGATGTGAGAATGCTCCTCGCACCTTTGGCCCGTAAGGATATT -GGCACAGTTCGATGCTTGGGTCTGAACTATGAGCAACACGCCAAGGAGTCGAACATGCCC -ATTCCAACATATCCTATCCTATTCTACAAGCCCATTACTTCAATTTCTGGCCCCACAGAT -GATATCCCCGTGGCTCAACTTGCCCAGGAAGGCGAGGGTCTGGATTACGAATGTGAACTC -GTCATCGTTATCGGAAAAGAGGCCACTGATGTCCCCGAGAGCAAGGCTCTGGACTATGTC -CTGGGCTATGCTGTCGGCAACGATGTTTCCCACCGCGATTGGCAGATCAAGCGTGGTGGA -GGTCAGTGGTCACTGGGTAAGGGATTCAATGGATGGGCACCGTTTGGTCCTGGTATTGTT -TCTTCCAGTGTGATTGCCGATCCGAATAACTTGGCTATCTCAACCAAGTTGAATGGACAG -ACTGTCCAGTCTTCGTCGACTAAGGATATGATCTTCGGCGTGGCTAAGACGATAGCTTTC -CTGTCTCAGGGAACTACACTCATGCCTGGAGATGTGATCTTCACTGGAACGTAAGTACAC -CATGATGAGCAAGCACATTGGCGTTTGACTTTGATAGACTAACTGTTTGACAGACCCCAA -GGCGTGGGCATGGGTCGCAAGCCTCCTGTGTGGCTGAAGGATGGCGATGAAGTCGAGGTT -TCGCTGGAAGGCGTCGGGTCCTGTGTCAACCGCGTAGTCTTTGACAAGCCCAGCGCCAAG -CTCTGATCTATTTATCTTGAACTTACCGTAAGACCAATGAAATGGTTGGCACTGGTACAA -GTTGTATATAACTACCGGCACATTTCAGTCTTTGCAATTGATCATAGTGCTTATTTGGAC -CCAAAAAATTATCTTTGTATCCGTAATCCCGTGTGCATTACATGTAAGAGAGAAACATAA -ATAAGGATCTAAGCAGTTTGGGCGCCGTATCCCCTGATTCTCTCGGCGAAGGCCCAGCTT -TGACCCTGAATCTGCTCATCCTCCTTCAGATCAATACCATCCTTCGTGATCACGGCTATC -GAGTACTTGTCGCTACTTCGTGCATCGCGGTAGAACAAAACCTTCAAGCACGCCTTCACG -GCCTTGATAGCATCTTCCTGGCTGACTTGTTCAAGAGGTGTACCCTCAGGGAACTCGCGG -CGCAGAATCGGAACCGCAAGATGAGCACCAAAGCCTGTAGCTAGATGAGGAGCAGAGAAT -GTCGTTCCGAGCAAATCAGCCGAGCTAAGGAAGGGCTCTCCTTCCGCATCGAAGCCTGCG -ACAAGGACGTGGTTCCACAGTGGGTTGAACTCGGAGCGTCGTTTGTAGAAAACCTTTGCA -AGGTAAGTGTGCAGGTTCTTGGCGTTGAGCATGTTTCCGTGAGGGGAGTAGTTCTCCTTG -ATATCCATGGAGTCGAGGAGACGGTCGAGATATTGCATGTCGGAAACGTCACCGCCAAAA -CCAACCATTGCTTTATCGCCGAAATTACGCAGACGCTTCGTGTCGGAGAAACGAGCCAGG -GATCCATAGGAAGCTATATATTCGCAAATAGTATGGCGTTAGATGGCAATTCTGCAAAGG -AAACGGAACGTGACAATGGAACGGCATACCCAGGTTGTCGGTAGCTATAACCACTCCGCC -ATTAAACTTGACACCCACCACTGACGTTCCGGTCACGGCAGGCGATGAGGTATGGGCCTT -GGGGCCTGTGGTCTGCAAATATGAGTGATCGTAGGCACCGTAGACATCGTTCCTGGGCTG -GATGGATAGTTAGCAACAGAGCAAGGACTGGGAAGGTTGTGCCACCTACGCGGCCCCAAG -CTTCAGGGAAGTGGTTCATGATGAATGAAAATAAATGATAGTTTCAACGGGGTTCAAAAA -AGTGAATTAAATTGCAGGCAATGAGGGAAGACAACAGATGGATAGCACAAAGTGATGACG -ATAGATGGGGAGCTATCTTAGGCAGTCGAGCTGACGGAGAGCTCGGCTGACTAAGATTCG -CGGTCACGGCGGAGCCCTGGATGTACAATATGGAGCATATATAGAGCCTACTACGTATGT -TGAATACCCTCTTAAATGTAAATAATTAGAATTTAAAAGTAGTTGATTGGCCATACTGCA -CTTAGCTAGTCATCTTAGAGGCCAATACTTTTGTTCAACCTAATTTGATGCTCGTATTCG -CTTTCAAAACTTCTATACACACTACATCGATGGATGTGTCCCCGGCACTTTCTCTATGAA -TATCAAGACTCCTTTTCCTAAAAAATTCAATGTCTGCTCAGTGCTCAAGTCGGGCTGGTT -CAGCGGCATCGGCCCATTCCGTTGACGACATTGGCTCCTCACGCGTGTAAAATATCACGA -ACCGGATCTTCCACCGGCATTATGATATCAACTAAGTGGGTGACCAGCCTCTGTTCACGG -CAAAATATCTATGTTCACCTCCAATAAGCCCGAACTTACTCTCCATGCAGCTACCAGCAT -CCGGGCCCCAATTGTGGCCATCTCCAAATTGTTGAAGTCATCCGGTGACTACAAAATAAG -CATTGGGAATCCCGACGATGCGAGCGCCGTGCAGTGGGAGAATATGACGAAGGAGCTCAT -TCACAAACCAAAATACCAGCTCGAGATGATAGATCAATATGAATATGATCAAATTCAGAG -CGAGAGGCGGTCACTCCTATGGAAACGAACACGAACCGTTGGCGTTGAGAATTCAAGAAT -TCAGTGCCGTCTCAATGGACTGGCCGAAACTATAGGCTTGTGATGGGCACATCGAGTAGA -TTCTTGCCGTCTTTTCAGGTGCAAGAAGACTGGGCAAAGGTGGAAAACTTCAAATCAGAG -TGGAATACGGAGAGGACTTTGATCGAATGGTCTTAATTTCTTGTTTAAGTCTGTGTGAGA -AGGCGAGGCGGCGCAGACAGAGTGCATCCGGTGGCGGCGGAGGCTGAGTCGAGTGTTATT -GTGCCAACGCAATGTACACGAGTCCAAGCGAGCGCTATGTCAATCGAGAGGGTATCGTTG -ATCTTCTTTCAGGTCGCTGTCTAGAGTCATTGGATATACCACCAGAATGTTTCTCCAAGA -TTGCCCTCAAACTAAACTTTGATCAAGGATAGAAGACCCcttcttcttcttcttcttctt -cttcttcttcttcttGAATGATAGGGTGCCAGAACGAAAGTTCCCTTGTAAGAAAGGCAA -ACATTGCGTCATTGAGTGATCGAAGCATTCAAATTATGCGCCAGAGTTACTTTGTCAGGG -CACAATTTCTAATGCCCGCAGTTCCCGAGAGGATTGATATACCCAAGGATATTTGGCATT -GCCATTCGAAATATGGCATTAGGGTTGGTCTTCATGGTCCGGTTGCCGCTGAATGAATAC -TCAGGCAGACCCAGAGACAAAATCAACCGAGCTTCGCTGGCCGGATGCCTTTTTCTCCTC -CGATTGTTTACGAAAATCTTGCTGAGTTCTCAAGAGCTCTTCCTTGAAACCCCATTTGGG -TAGATTTGATGTATTCCACAAGTCGGATGGCAACGATGTGTCATATTGCGATTGTTCATT -GATTCCAGCGTGCTCTATCATTTTCTTCAACAAGCTTGGATTCTTAAGCGATGATGAGCT -GGCCAGTTTTGAGTTGAAGTGTACGCCTTGCTTTTTGAGCGAGAGGAAATGTTCGAATTT -CGCGTTCGCGGCTGGGTTGGGAGACCCAGGAGGTGATGGTGGGATGTCTAGATTGGGGAC -CGGAGGAAGAGTGAGGTCTTGAACAATTCCACGTGTAGTTGAGAAGGGAGATGAGCGTCC -ACTTGTCGAATGCTTGTCCTCAGAGAGAGGCGCCATGTCCATAGATGCGGGGCCAAGGAC -AGGTCCTTCTGGGACTGAATCGGTGGCAACGTGAACAGGTGTCTTTTTTGGTTGTTGGTG -GAACGATTTATGATCTAGATAAATTGTCAGCGCCCAAGTTTAGAGAATACTGGCGGAGAG -TTCAGAAGCGTGTTACCTTCGTCCTGGCCAGCTTGTGAGGCTGTAGGGGGAGAATCTTTA -CGATGCTGTTGCTTATGAGTCAGCGGCTTTTCAGCATTGCTCCTCCAGGGCTGGAGATAC -CTGAGAATTGGGAGAAGGACCCTTCTGCTCTGCCTCATCCTCGCTGCTGCTCTCATAATC -ACCGAGACCAAGCATTGCGTCGCAGTTTGTGTCTAGGTATAAGGGTCGGGATTGGAAGGG -GGGAAAAGGGAATTGGAATAGGACCCAGATATATAGACAATACTATCGCTTGGACAAGTG -CAAATCGAACTTAACGTCCTAGGACAAACAAGACGCGGAGCGTAACTATCGAGACAGCCC -TCCAGGGCTCTCGGGGAGCTTGGATATGTCTTACCTAATCAGAAGGCAGCTACCGCATCG -CGTCAGACACCCAGCTTCTTTGCTCTCCATCTTCCAGAACTCCCCAATCCCGAGAATCCT -CTACATTCAACACCTTAATCTTCTATACCAAATTGTTATTGTGATACACTTTCCTGGTCA -AGCTCCATCTCGCCGAAAATGACGTCGCTCATGGGTAACCACTCTGGGACTATTGGATAC -TCCTTCTCTGAGCCAACATCCGCTAATTTCAAACAACATTCCTTCTACCCGTAAGAAACA -ACCTGATCAAACTGAGCAGGAACATTGGCTGACGGTCACACTAGTTACACCGATAATGGC -GGCTCCACCCTCGGCATCACGGGTGCTGATTTCGCTATCCTCGCCGGTGATACTCGTTCC -ACATCAGGCTACAATATCAACTCTCGCATGGTCCCGAAAGTCTTCAAGATCGGCGGTGAG -GATGAGACTGGTGAGGGAGCTACAATCATTCTTTCCGTTGTTGGTTTCGCAGCAGATGGA -AACGCTCTCAAGGAAAAGCTGGACACTGTGGTGAAGATGTACAAGTATCAACATGGGAAA -TCTATGTCAGTGGGAGCATGTGCACAGCGATTGTCGACCATCCTCTACGAGAAGCGATTC -TTCCCGTACTACGTACATGCTATTCTGGCTGGTCTGGATGAGGAAGGTGTGGGAGCTTTG -TACAGCTACGACCCCGTGGGCTCTTACGAGCGGGAACAGTGCCGCGCTGCCGGTGCGGCA -TCCAGCTTGATCATGCCTTTCCTGGACAACCAGGTCAACTCCAAGAACCAATACATCCCA -GGAAGCGGAGAAGGACACGCACTTGTGCCCAAGAAGGCTGAGCCTCTTGACAAGGAAACA -GCCAAGAAGCTTGTACAGGATGCCTTTACAAGTGCAGTGGAGAGACATATTGAGGTTGGT -GATGGTTTGCAAATGGTGATTGTTACTCGGGAAGGAATCGAGGAAGTGTTCCACGCCCTC -AAGCAGGATTAGAATATAGGACATGGTAGATCATGGCCTTCTTATCGGATCGCGGAGCTC -CTTTTTGACCAGTTGATCATAAACATAATCGAGATGACTGAAATTTCCATGAATTGATAT -CTTTACGTTATACGTCGTCAGGCCTCTAAGATCAAGAAAAAGAAGACAGCGTAAAACCCG -ATGCACCTGAACGCCCACACCATGCCGGATGAGATAATGAGTTGTGCAATCAGAAGCGTT -CCTTCATTCGTGAGGCGACGGTCCGAAGATTGCCATAAACCTTGCCCTGTTTTGTGTTAG -AAAAATTCCTCTCAAGGATTTAGATGACCAAGACTTACCGGAGGACTACCCAAAATCAAG -CTGACGATTGCCTCTCGAGCAATGTGAATGTTCTTAAACCCACCCAGAATGTGAATCTTC -TGGTCTGCAAGTACCACTCGTGTCCGACTGGCATTTTCGATCGCATACTTGGTCTTTCCG -TCTTTGCCTGCAATACGTCCAATGGCACGACCAAGATGTTCTCCGTTCAGAGTCTTCACA -TCCTTGATCTCAAAGGTTTGAATGTACAGATCATCGAGTCGGAGAAGGGCAATCGCATCG -TCGAGGTCGAAGCCAAGAGTGAAGGCCTTGACGAAATCGGCACCTTTTTGCAGAGCTCCC -ACATCGGTGGTAAACTTGGAGGTTCGCAATTCCACTGATCGGCTTTTGATGTTCATTCGC -ACTTGCAACTTCAGGTGCTCGACGATAGGAGGGCAGATCTTGGCCCAGCTCGCTTTAAGC -GGGGTCATACGGTGCGGTGGTACAGGCACCTTTCGTGTTTCGATCCGATATGCTGCGGCG -ACGTCTTTGGCTGGGGTGAAGACAGGACGACCTTCCTCGTCGATGCGCATGTCGCTGTCC -TCCGTGGACTCTGCAACGGGGTTAATCGGGGCGCCTTCGTTGATGGCTTGTACATCGATC -AAAACTTCATCATCCTGATCTATTGATTGTCAGCTAGAGGTTTTTTTGGGGGGACAGATC -AGATGGAGTACTAACCATGAGAGACGGTCTCAGGCTGAAGCCCAAGGGCCTGCTCTGGCT -GACGAATAGCTGTAGGTGCCGACATTGTATTGTTTCAAGAATGAAGTTTTTTTTTTCATA -AAATCTGTTATCGCAAAATTTAATCAGTTCCCCCCTCACGGAAAAATGAAGTGTGGGCGG -AAGGGAATTTTTTGATGCAAACAAGACAAACTCCGATGATTTCATTTGGGTTCTTCTAAC -CCGCAAAGCTCGGAAGCTTCCTTGTCAACAAGTCAACTGCTGTATATGACACAGTTTGTC -CTACTTTCTGATCAACTGTCTCGTTTATGTCCAGGATGTAAAGACAACGATGTCTGCTAA -CCTGGGACGCCGGCTGTACGCCCATATTTGGGAAACAGCAAATCCATTTCTTGGTCGCAT -GCGAATAAATCGCGTATCGCCTGATGCTTATAGCTCAAGATTATTCCTCGATTCGTCCAT -AACTCACTCACGAAGAATCTCTACACTTCCAAGGAAACCCACTCCTCTCACCAGAAGATT -CGCTTCTGGGGGCTTTTTCGTCCTCGGCGTTTCGCCTACGTCAGCTGTTGCGGATACTAC -CACAGCATGCATACTCTCCTCTCCCAAAATTGCCGCCCAACATGTTTGGAAGCGAACTTT -ACATGCATCCAACGACCATCGGCGTGGGAAAGAAGCTTCTCAAGAGACCCATGAACGCAA -TGAAAATGACCATTCCCAGAAACTTGCCCAATCCGGAAAAGATACATCCAACACACATTC -ACAACCCAAACCATCACAAGACACTCAACAGCCAACAACGAACAATCGCCACTTGATGGA -TCGCTTACCACATATGCCTCATATACATCGGCCAACCAAGGAGGAGCTCCTGGCCGCAGC -AACCGGGTTCTGGTCACGCCTGAAAGTACGATTCAAATGGTTTTCTATTAGGAGTGTTCG -TCCCTACAACCTTGATGAGATGACTGCCTTCTTTTCGTGGGTTTTGCTGGGCCACGTCGT -CTGGGTCGTCGTGGGAACTACAACTTTCTTTTCGCTTCTGATTCTTGCGATCAATACAGT -CTTTGCTCAAGGTACGTCCATTTTTGGTTGCAATAAGGTCTGGATACTAATGTACTTGAA -CAGAAACATTGGCAGGGTGGGTGGGCAACTATCTCACTAAGTCTTCTGGAGTCAAAGTTG -TTTTTGAATCTGCAATCGTGCCCAAATGGAAAAATGGCGTCATCACCTTCAAGAACGTAT -TTGTGTCAAAACGACCAGGCCAAGGTGCAGGACATGTTAGCAAGGGCTCATCAAAGTCTG -CGGCAGCCGAGGCCGCGGCTCGAGGGGACAGTGGATTCGAAGATTCCCAAATAGTATCTG -ATGAAGAGGAAGATACTAACTATACACAATTCGATTTATCGATCGAGACGGTTAATGTGA -CACTATCATTTACCAAATGGCTTAATGGAAAGGGTCCACTTCATGATGTCGAGGTGAAAG -GCATAAGGGGTGTCGTCGACCGCCGCCATGTTTATTGGCCGGAGGAAGATCTGGACCCCA -AATCCTACCGGCATGAGCATAGACCCGGTGACTTCGAGATCGACTCTTTCAAGATGCATG -ATTTGCTTGTCACTGTTTACCAACCGGACAACTTCCGTCCCTTCTCCGTGAGCATCTTCT -CGTGTGACCTCCCTAAGTTGCGGAAACAATGGCTGTTCTACGATTTCCTCTCTGCCAATA -TGATGTCTGGATCTTATGACGACTCTTTGTTCACAATCCACGCTCGTCAAACCCACGGAT -TCACAGGTATCCGGCAGGACAACGGAATAGAGGATGATGACAAGCCAAATCCGTGGAAGA -AACATAACAGGATTAGAGTTGACGGACTGAATGTCGACCATCTCAACCGTGGTGTTCAGG -GTCCTTTCTCTTGGATTCACGAAGGGACTGTGGATATTGTGGCCGACATCATGTTACCTG -CGGAGAACGATGAGAGCTTGGCCAAAGTGATGGTCGATTTCTATGATCGCTTAGAAGCCA -CTGTCACCTCCAATCGTCACCCGGAGCCTGTATCTTCAGACTCCAATCCAGAATCAGAGA -CTGAAGATCGACGTTTCCTCGTCATGGATTTGCGCGTGCACCTCAACAACGTTCGAGCTG -TTGTGCCCATCTTCACCCGGGACCTTTCTTACATCAATAACGCACTCATCCGTCCCATTG -TTGCCTATATCAACTCCAAGCGTACTTTCATTCCCATCAATTGCCGCTTAGTCAAGCGTG -TTGGAGATTTCGATGGAAGCTGGACTATCTTTGACAGTGGCCTGATGGATGACCTGTCGG -CTGCAGTGAGTACCCTCTCGTCTTTTATATAATATCCAGTCATTGCTAATACTTTTTCCA -GACCTACGACGCATTTGCCCGGGATGTTGTGGATGATCAAGCCCGGAAACGGCGGTTTAA -GAAAGTCGGATTTTGGTCCCTTCAACTAGCCGCCCAGGCTATTTTCATGGGCATGGCTGG -CAACATCGCCTAATTGTACTATCATGTACCTCATTTGACTGGCGCTTTTTACTTTTTTTG -GTCTGTATCATAGTCATTGCGGTCGCATTTACAGGGAGTCTCGGTCATTTATTTCAAGAT -AGAGTCTATCTCAAGTCAAGTAGGAGACAGAACTCAAACTAGTCCCAACTATGATACGAT -TTCCTAGGTATTGCCCTGGACAATCGAAACATTCCACTCACCAACTCTCCAATAGTTCTA -GATCACTTTCATATCGTTCTGGTTCCACTGCAGTTGTGCCGAATGCTGGGACGGCTTTGT -ACTACAAACAATGTTTGGTATATCTATTTCCCCTTGTATGGTGTTGGTGCAAAGCCATTA -TACGCAGTACATTCTCTGATATAGAAAAAGAACTGTAAGAAAAAGACCGGTGGAAGAAAT -AAAATCAGTATACTCGAGACATACTCGAAGCATACTCTCTTAGGTAACTCCGAGTAGCAG -CCGGGAGATTGAAAGGGATCAGTTCTGCATACCAGCACATCCCCAAGCCTCCTCTTCTCT -TTTCTTCCTTAATACAGAGTGATCTGAGATCGCCATTGCTTTTATATCTGACTTCTTATT -ACCGTTTGTTATTATTCTTGTTAGCTTGAAACCCCCTTTTTGCCCCCACTTCACTTTCTT -AAGCTCTGGGCTCTCTCACCCTCGCAACTCTTATTCTCTCCAGGGTCCCACCAAGGAGTT -TATACCCTTTCTTTCTACAATATAATCGGTCGACCGGTTGACACCCCTTTTTTTCCTCCC -TGGCGCTTAAGCTTCAGCTTCCGGGGTTCCGTGCTCTCGCCCCCTGCCCTTCGGCTCGTG -TCGCCCCCGAACGTGACCCCTCTGATGAGTCTCGATCCCTACACCAGTCATGTCGCAAGT -TCCGCGAGTGAATGATGAAAGCTCGCCGACTTCGTCTAAGAATCAGACCGGCTATTCTGT -CAATAACGGCCTTATCCCGCGGGATTATTTAGCTCGATCTGATCAACGCCCCGGACCTGC -TCCTGGTTCCCGACATATCACACCTTCCCCGTTACATACAAGTTCCCTGCCTCAGTCGTA -CCGTTTCGAAGGATCTGCGTCACAGAGCCCCCTTTCTCCGAAATCCAACCCTTCCTCCCA -GAGTCGGTCGCCAGTCACTCGCGCGGCTAATGCGCAATACCCTGCGCCACCCTTCAGCCC -TGGTCAGAGCATGTCGCAATCACCTGAAATGGTAGGAACAGAGTCGGCGAATGTCAGCGG -CCGGCCTGGCGAAAGCAGTATGCCGAAATCATCCCGTGCGCCAGAGCCCGATCGATCTGC -TTCTTCCTCCGTCAGCTCTATACACTCGATGCCAGGAGATCGCGCACCCCATCGTGTCAT -CCCTCGCACCTCATCGATCGACTCTGCTATTTCATCCCTGTCCTCTAACTCTCAGAAATC -TACATTTGATCCGAATGCTCTGAGTTCAGCCGATATTAACAATCTCATCAACGCCGCTGG -GTCTGCCGAGGCTGTAATCGTGCACCTATTGAAAGAGAAACACCAAGCGGCTTCGCAGAA -CGCGCAACTTTGGAAATTAGTTGATAAACAAAGAACTTTGATCCTCGGGCTTAACAAGGA -TCTAGAGCGTGCTTTTCAAGAGAAGGATAAATATCGGAAGAAGCTCAAAGATGTACAAGA -CGCGCCGCCATTGCCCGTTACTGAGCTCTCGGCCTCTACGGCCGCGCCAAGTAGTGACAA -GGAGAATGTGTCGCGCAAACCTAACCAACCATCGATTGTACCACCATCTGAAATCTCTCG -ACGGGACCTCGAAAACACCACAAGCCCGGTCTCCCCCACAGATATCCCATCTCCAATAGG -AGACGGAAAGAGCAGGTTTCCCCACCCAAACCGCAGGCCACCGCCTGCGCCTCTGAATCT -ACGACAAGGAGACAATAAAAGAGCATCttcagcttctgattctgatcctgattctgattc -cgattatgGCGACGCCCAAGATATAAATGGAATTCCGAGTGCGGGCCGTGGGCGGCGAAA -GACGAGAGAGCAGGACGACCAAGACCGCGAGGCTACGTTACAGAAGGATATGCTTGCCTC -TACCGGTTCTGTGGGATCATCCCAAGCAGCATCCTCTAACAGCTCCCGAGACACAGCAAC -GTGTAGCCCGGAAACGGTACCCAGGGATCTTTCTACGAGATCACCACCGAATGTGGGTGA -TTCTAGCTCGCTGGGGTCGCTTTTGGGCTCTCGACCGCCTCCCGCTTCGTCCTTTGGCGG -GCGCTCCATCGTCGCCATGCCAATGAGCCCCGGGCTGCCCTTGAGTCCGAGACCAGAAGA -TCGACCTATCAATTCCCCCATGCCTCGCATGCCACGAGATGCATCCACTTCCGTTGCAGC -CGGGTATCAAGCCCCAGGGTTGCCGCTCTCGCCGAGAATGGCCAACCACCCTATGGGCTT -TGCTCCCATCCCTTCAGGACGTCCCAGTGGCCCTATTCCATCAATTGATCCCACTGTCAT -GATTGACAGTCCAAGATCCGCAACTTTCCCCGACAATCAAATTTACCGAGGATTGATGTC -TGATGAATATCAAGGCCTACTTTTGCCTCCTAATGCCCTTCCTTTAGTTCAAGTCAGAGT -ATCTTCGTCGCGCCTTCGTCCCTCTCGAAACAGCTATATGGCATCCAAGCCTCTGGACGA -GGAGCCTGTTTTCACTCTTAGTGTGATCCTTCGATCAGAGATGTCAGAGCTATGGCGAGT -TGAAAAGGTCATCGGTGCCTTGCCGCAGCTGGATCAGAGAGTTCGACAATCTTGTCCGCT -TCCCGCAAAACTACCAGATCGAAATATTTTCAGCGGACATTCCCCTGCAAAGGTTGACTT -GAGACGTGCCGCGCTTAATGCATATTTTGACAGACTTCTGGACACGCCAGTGGACGAGAA -TGCCGCTCTAGCTATCTGCCAATTTCTGACAAGTGATGCCATCGAGCCTCGAGACGACGA -GACAAGCATACTGAAAGGACTCACCCAAACAAGGCCCGATATGCCACGCGGACCCGACGG -AAAGCCTCAAAAGGAAGGTTATTTGACTAAACGAGGCAAGAACTTCGGTGGCTGGAAAGC -GAGATATTTTGTTCTCGACGGCCCAGAGCTGAGATACTACGAATCTCCGGGTGGTCCGCA -CATGGGTACTATCAAACTTCATCATGCCCAAATCGGCAAGCAGTCACCAAAACCCACCGA -GACTGCTCCACCTACCGGGGGCGAGGAAGACTCTGACAACCAGTATCGTCATGCATTTTT -GGTTCTGGAACCGAAGAAAAAAGATTCCTCGGCACTTGTTCGACATGTTCTTTGCGCTGA -AAGCGACGAGGAGCGCGATACATGGGTCGATGCTCTCATGGAGTATGTTGAGAGTGCTTC -TTCCGAGAATGAGGGACGCGGCAGCATATCATCCAAGAGTCAACCTCAGTCTCAAGACGA -ATACCAGATACAGCAAAAGCAACTTCCCTCGGGAACCGAGACCAAGTCTAAAATTTTCAA -TGGAGGCAAAAGATCCGGCCGCGGGGTTGACAGTCCCGACCAAGATCTAGGAGGTGTGCA -GGGATTCAGCTTTGAGGATGCGGTGCAAGCTGATCCTCCCACTATTGGTTCCACTCTGGA -GCAAGCTCCCCGATCCCCACGAATCCCGGCTTTGGCGACTGAATACAGGGACATGTCTTC -TCCCGATCAAGCAATACAGTCTCCCAGACTGATTTCTAGACCCACAAACGGCACAGTCAT -CCAGGACGTGGAAGCCTGGGGCAACAAGGCAAAAACGTCGACAAAAGAGAAGAAACGCAG -TATCTGGGGTTTCCGCACTAGATCCTCCTTTGATCTTGCCACTCAGGCCAGCAGCGATAC -ATTAGTAGCAAGCAACAATGTTGAGAGGACAGGTCCTGTCAGACCTGTCTTTGGTATACC -CCTGGCCGAGGCTGTACACGATTGTGGCCCGCCAGGGATTAATGTGGAGTTGCCTGCAGT -GGTTTATCGTTGCATTGAATATCTCCATGCCAAGGAAGCAGCCTTGGAGGAAGGAATTTT -CCGTCTGAGCGGTTCCAATGTAGTGATCAAAGCTTTGAAAGAACGTTTCAACACTGAAGG -TGATGTTGATTTCGTGTCTGGAGATCAGTACTACGACATCCATGCCGTGGCTTCTCTATT -CAAGCAATACCTTCGAGAGCTTCCGACGACCGTCTTGACCCGGGAACTCCATTTGGACTT -CCTTCGCGTTCTCGGTATGTACACATTCGATGCTTGTATTCTATTATCCTGCTAACCTTA -CTAAACAGAACTTGACGATCGTCAAAAGAAGGTCGCTGCCTTCAACTCCTTGGTGCATAG -ACTGCCCAGTCCCAATCTAGCACTATTACGGGCTCTATCGCAGTTCTTGATCGAGATTGT -TAACAATTCTGATGTCAACAAGATGACGGTCAGGAACGTGGGCATTGTCTTTGCCCCAAC -TCTCAATATTCCGGCGCCGGTATTCTCGATGTTCCTCACGGATTACGACAGTATCTTTGG -CGATAATGATTCGAGGGTTGCCAAGCCTATGGAACTGACAGTTGAAAACAATCTGTCGCC -CGATGACATTCGTTCCCCGCGTCATCAGATGTTCTCTGATCTCCCTACACCTTCGTACCA -GCAGACTTCGTTCCGAACCGCTGGTGATGGGAACGGTCCTTCTGATAGTGCCAGAACACA -CCATGACACCGGTTTCACCCCAATGCAACCTAGCTATGACCAACCGACGTCTAGACATGA -GCAGTACAACCAACCTCCGGGTGCCGGCGCTCCTTATAATTCTTTGAATGGCATGTTGGT -GCCTAGCTCGGAGGACACTCGCTCGACAAAGACGAAGCGACGGGAGAGTTCAATGCTCTT -TATGGATGGTAAGACCCAACCAAAAGATTTTCCTTAGCCTTTACTAATCTATTCCACGAA -GACCCTTCCTTCTACCAGGGCAATTGAATTATGATACATTTCTCACTAGTCTTTTCTTTT -TTCACTTCTGATTCTGGCCCAGTGGTCATGCCGCTCTTGCCCGGAAAAATTACGACTTGC -ACGACCGCAGAATGATGACTTTAACCCGCTTGGCAATCTCAGCTATCAAAGTCTCGAGGG -AATGTTTAATGATAATGGTAACCAAGATGCGGTTGCACACCAAATTTGATGGGTTGTCCA -TCTTGGAGTTGAGAGTCCACCATGCTATGGTCAGCAACTTTGACCGTCGTTATACCCCCT -TCTTTTACTTCTTTCCCTTCATTTTCTCATTCTTCGCCTTTATTTCCTCTCCTTCATATT -TCAGCGCTTCCACTTGGGCTTGATGTCCATATGTTATTACCTGCTCTCATTCATCTGGAA -ATTTGGTGGTCCACGGCTACGCGATATTGGAGGCTGGGTGGCATAATTTGCCGATTTGTT -TTTTTACAATTTACACGACTTGTGGTGGCGGAATACAAATTTCATTTCATCTATATATGT -AACATCATTTCCCGTGGCCTTGCTCAATCAGAATCGTCGTGTACTTTCTCCTTTCCAGAG -ACCCATAAGTTCACCGGTCTCTCTTGTCCAATCTCGGACACTATCCACCAAGGCTTTGGC -CATTCCCTTGTTCTTGGATCTGCCATCAGAGGGACCTAGGCCAGTAGGTTTGCCACGATC -CCCGGTGACATACTCGGAGCCCATGCTTGGCAGCACATTTTCTCCCGCGCTCAAACTAAC -AAACAGAGCGAATGGAACGAGCCAAACACACAGCCCAAAGTATGAAGCCACTTCAGTGAA -AGAGGGCATATCCTCGTAATTGACTCCATACGGTTGACGCCAGTTGTTCGACGGAGGCAA -AGGCTTTGAGAAATGGCGGAACCATAGCCAGTGGTTGAGACCGACTAGTAGGCAGGAGAG -GAGGAAAAGAGGATCAGATAGTTTCACGATGGGGAATCGGCGTAGATTGCTGGCGTAGAC -CATGTGCGAAGCAATGCTCAACACAGATAGTGACACAGGGAATCGGTCGATTATCATAAG -CAGAACTTGGACCGCGATGATGCCGTAGATCAGACGTGTCAACACTCGGCGGGCAAGAAC -CGTGTGTTCTTCCACAAGCTCGGATAGATAGTAGAGACCCGAAGCTGTAGGTTCAACAAA -GTCAGCTTTCTATTGTTGTTAGAATTCCATGTCTCGCTCCGTACCTATGGCCAAGGTGAG -AAAGGCAAACCCGACAACCACCCCGAAATATCCGACTAGCGGTAGAATCCACATGATGCC -CAGTAAATACAGAGCTGTCAAATGGTGGCGTTGAAAGTAATGCAGAAGGCTAATTCAGCC -TCAAGAGAGATTCGAGATGCAATCGGAGAAACGTGGACGTTTTGGGGTATGGATGCCTCA -GGCAGCACCACCAAAAATAATTTAATGATTTTCCAAACTTACAACTTGCAACGCAACTTG -CACATTCTATACCACACACCATGTCGCGCCCTGAAGATATCCTGTATGACAGCCTGACTC -TACTCCACACATATTGAAGAGTTTAAAAAAAACTAATCCCACATGTGGAAAACTCCAGCC -CACCGGACCTGTTCTACAATGACAATGAATCTCGAAAGTATACGAAGTCTTCGCGAATTC -GCAACATTCAAGCTAGCATGACCAGTCGCGCACTTGAACTCCTCGACCTTAAATCCCCAT -CTTTCATCCTCGATCTCGGCTGCGGCTCCGGTCTCTCCGGCGAAATGCTATCGGAAGTTG -CGCCCGAGGACGGTGGCCCGCACACATGGATCGGAATGGATATTTCACCCAGCATGCTGG -ACGTTGCTTTACAACGCGACGTAGAGGGCGATCTATTCCTTGCAGATATTGGACAGGGTG -TCCCGTTCCGACCTGGCACATTCGACGCCGCAATCAGCATCAGTGCAATCCAGTGGCTGT -GCAATGCGGAAACCAGCGACGTTAGTCCTGAGGGTCGTCTGCGACGCTTCTTCGAAGGCC -TTTATGCCAGTCTTCGCCGGGGTGGACGTGCTGTGTGTCAGTTCTACCCGAAGAACGATG -TCCAGCGGAGTATGATCAGTGGAGCTGCAATCAAAGCTGGCTTTGGTGCGGGTATCCTTG -AAGATGATCCTGGCACTAAGAGCAGCAAGTTGTATCTTGTCCTGACTGTTGGTGGTGGTG -GTTTGACTGGCGATATCACTGGTGTTGTGAATGGCATGGATGATGTCAATGTCTTGGATG -CGAGACGGAAGGCTGCGGAGATCAATCATGCTCGCGGACCCCCGAGGAAGGGAGACAAGG -CTTGGATTTTGAACAAAAAGGAGCAGATGACGAAGAAGGGCAAGGTTGTCAAGGCTACCT -CGAAATACACTGGCCGCAAACGACGAATCGCTTTCTGATTAAACTGCTTTGGTTTACGGA -TGCCATTTGTCTTTGATGGATTCGATGAAAAAAAGAAGGAAAAAGGGAGAAAAAGCCTGG -ATGAAAAGAGCATTCAAATCACCTGCATTCCCCTTCAAGTACATAACTTTCACCCATTTT -GTCTGTGTATGCGCATTCCCTTGGGCGTTCTCTACATGTCTGAATCGGAATTTTATAACA -GGCTTGTCGAGAGGTGCTCATGGACGTCACCTACAAGTTCGACTTGGTCAAATCCGTCGA -TTATGGGGTGCCTACCAGAGGCCTCGTCAAGGGTTGGAGGTTTGGAGGTTGAGCTCTCTA -TCTTCTGGGCACCGTGATATGAAGTTGGCCCATTGGAGAATCATGGCCTTCTTTGATGTT -TATACCCAACAATTAATGCAATGAATGTCCACGTTATGATCACAAAAGAAAAAGTGTCTA -TTGCTATAATTTGCCGGGGAACGTTTCGAGCTAAATTGAAAGGAAAGATAATACATCAAA -TGAGGACCACACCGACCAATTCAGGTGTGCAGAGCACATTCCAAAAATCAGCAGGCAGCA -TCCTCAATCGTAGCGGTAGGTATATTGGAGCGCGGGTGATTCCAAAATTTACAGAGACGC -CTCCATCTTGTCCAAAACAGCGCGAGTGAACTCGTGAGTGGTAGCCTGACCACCCATATC -ACGGGTCATAGTGCGGCTAAGAAAAGAAGTTAGCTTGCTTGTTTTTTTCCATAAAAAAAA -AAAGTGTTGGTCCACTCACCCTTCACCGATAACATCGTAGACAGCCTTGGAAATGCGGTT -GGCGTGATCATCGAGACCAAGGTGGCGCAGAAGCATGGAGCCAGACAGAATCATAGCACT -GGGGTTGGCCTGATCCTTGCCCTTGATGTCAAGACCGACGTGACGGCAGCCAGGCTCGAA -GACAGCAACGTCACGGCCCATGTTGCAACCGGGGACAACACCGGGTCCACCAACGAGGGC -AGCGCCGACGTTGGACAGAATGCCACCGTAAAGGTTAGGCATGACCATAACGTCAAACTG -CTGGGGACGGGACACAGCCTGCATGGAAGCGTTGTCGACAATCATGTCCTCCACCGCCAG -GGTGGGGTACTCCTCAGCGACCTTGTGGAATGTGCTGCGGAAGAGACCATCGGCCAACTT -CATGATGTTGGCCTTGTGAATGCAGGTAACCTTCTTGCGGTTGTTGGCAAGGGCGAAGCT -GAAAGCGAATCTGGCGATACGCTCGGACTTGGCGCGGGTGATGATCTTGAGGGACTCGAC -AACACCGTTGACGGACTGGTGCTCGAGGCCGGAGTACTCTCCCTCGGTGTTCTCACGGAT -GATGCAGAGGTCAACATTCTTGTGGCGAGTGTCGTAACCGGGGATGTTCTTGATCAGAGA -GATGGAGGCGTAGATGTCCAGCTCCTGACGCAGGGCAACGTTGAAGGACTGGTGGCCTGA -GCGCTCAATGGGAGTGTGGAGGATACCCTTGAGACCGAGCTTGTTGCGCTTGAGGGAAGA -CAGGGACTCGCGGAAAAGCTCCTCGGAGTGCTTGTTACCGGCATCGACACCGCTAACGTC -GACCTGCTCCCACTCAATGGGGACGTTGTCGGCCTTGAAGATGGTCTTGACCGACTCGGC -GACCTCAGCACCGATACCGTCACCTTGTCGAGTGTTAGTCCAGAGCATGCGCAATTCAAG -CATATAATGCGCAAGTTTATGTGAGGCATACCAGGAATGAGGGTCACGGTGTACTTGCCA -CCATACTTGGTGGGCTTGAAGATGTCCGACTGAATGCTGGCGAAGGCTGCGCAGATTGGT -TAGTGAAATTTCGAGACGGTGGAAACATCAATGGCGTACTTCTGGCGGGAATAGAGCGAC -CGAGAGAAGAAGCTCCGGAAGCGCTGCGGAGCAGAGTCTATAGAGAGCAAGAGATTAGCG -AAAGCCAGCTGGCGTGAAATGGACAATGGCGGAAACACGGAACGGGAAAAAGCAAATATG -GGAAAACTTACCCGAGCAGGCTGTGCCGTTCTGGAGAACATTTTCAAGGAGTTAAACAAA -GAAATTGGGACAGAATCACGAATCACCAAATTCGAGGGGGTTGGAAAAGGGAACAGTGAA -AACGGGAAAAAAAGCCTCCGGATGGGCTCGGCATCGATGTGGTTAGCGCCGGCCCGATCT -TATCTGAATGGATACTCTCCTACAACTTCAAGCAAACAAAGAAATAGATCAACTCGACAT -TCTTATTCATTTAATTCTCCGGCAACTTCTTTCGCCAATTCTTATTCATATCCTGTTCCG -GTCCTCGAATATCACAACCCCAAGCTCAGTGGTCAGCTCAAACAAAACATACCTACGACG -TTGACTAATATATACTCTAGCTCAATTAACTTGGCTTATTAGCCTATTTGTCTGTCAAAG -CATCGAATGTGATGGCTTGACCTAGACCCCCTCGACACGAGACCCCAGTTCATTATTGGA -GTGGCAAGTGGACATCGGTCAATAAGGACCCTATTCAGAACGGGGAACCACGAACAGCAC -CTTTATTTGGGCTGATGTCGGTCGACAAAGAGCTCTATTCAGGATCATATCTCTATTCAG -AATAAGCGGGTTCCTCGAACTCATGTCCAAGATACTCTTCTCAATAACGAGACCACCATT -GAACCAGCGATGATACCTGCAGTCTTCATGGATATATATAGCTCATTGCAGTTCGCCTTC -TCCGGTTCTTTTCTCCAGACATCTCCATCACCAAGCTCTCTCAGTATCTGTCTCTCGCTC -TTCAAAACAGTTTTCCAATTCCAACTCCCAGGTTTCTCCTCAAGAACCAAGATGAAGGTC -ACCTCCGTTCTCGTCGCTATTCTCGCTGCTGGCACTGTCCAGGCCGCCGCTCTTGCACGT -GGTCCCAGCGAGACTCTTCCCAAGTGGTGTGGCCACATCGGCCAGGGCTGCAAGCGAACT -GCCGATGCCTCCGTCGATGTCAAGCGCTCTGCTGATGCCCTCGCCGAGGCCATGGCCAGA -AACTTGCCTATGGTTCTTCAAAAGTGGTGTGGTCATATTGGCCAGGGCTGCTACAAGGCC -AAGCGTGCCGCCGATGCCGTCGATGAGGTCAAGCGTACCAGCGATGCTCTCGCCCATGCC -ATGGCTGCTCTCGAGGAAGAGGATGATGAGTAAAGGTACGTTACCAGTCTCGTCTGTTCT -ATGCGTGGAATCATTTGACTAAATTTTTCTAGTGTCTCCTTTACTTTGATATCCGTGGTG -CTGCATGTATCAACAGAGATTGCTCAATAACTCTCTTTTCGATTACCGATATGCACACCA -TCGACTACTTTCGCTATGAAATATCTCAGGGAAGCCCGCTTGGCATGCGATGGATGCGTT -ATGAAAATACCAATGGCGAACACTCTTTTATTATCACCATTGGTCATGGCATTGATTTCT -GGTGAATCATGATTTGAATTGTGGGGGTCTCTGGATCGTTACTCGAAGAAATATGTATCT -AGAATCAGAGAAATCCATGCCCTTTTGACACAACACTTCTATCCAGAATCGTCAAGGCAA -GCTATTCCGCTCAGGTCGCTAAGTTTGATATATATTCCTCGAGCGGGGAGGGAGCTTCTA -TAGTTTAGTTGGTATGTTTACTTTATACTAACCGGGGCATGTAGAGGCAAAGAAAAGGGT -AACAAGAACCACAATTATAGGGATTTATATACACAAGGAATTCGTTTCAATAGAAGTAAT -TCCGACATAATTAATGGTGGTATACACCACAGCTAGGGTAATACCGTCCCATCTCTTGAA -ATGCGCCATACTGTGATTCCATCGGAGCAGCTCTTGTCATCTCCCACCATTGCTGACCTT -GATACACTGGGTCTTTTGACCCAGCATACGTTTCACCAGTAGTGGGCTCTTTCGGGAGGT -GAGAGGCTTGATTGGCAACGTTTTCGTAACCAGATACCATGTACGGTTCCGCATCAGCAC -AGACATTTCCATCTGCACTGTCGTCCATGGCAGAATCATTCATGGCAAAGTCCCCAGCAC -TGGCTTCATGTCCAAACTGCGAGTAGAGCTGCCATGTTTGAATCAACCGCTCTCTCTGCT -CTGCATCGGCATTTTCAAAAAGGGCAAGCTGGTCTTGAGTCAGGGTGTTTGGGTCAAGGC -CGTGATGCCTCAGAACATCAATGCTTGAGGGCTCTGTGTGTTGGGCGACTCTGTGGGCCG -AGTGATGGTAGTGTTGGGTAATGCTGTATGCAATTTCTGGCGCTTGAGATGGTACAATGG -TCTCTCTAGGGGCGGGATGCAGCTGTGTCATACTCATTTGTCTATCAAACAGAGCTGAAA -GTTCGTCTTGCACTTTCGAGTCGCTTTCCGAAGCAGACATACCGCACTATATACAACACC -TGGTCAGGTTTCTTGGCGTAGTTAGATGACAATGAGCACGATCTACCTGTTTACAGTGGG -GGTTCTCGATAAAAGCTGCAAGTGCATTTGAGATATCGGAGACGTGCCCGCAGTGCGGGC -ACTGATGTAGTATGGAGGAAGACATGGCGGGGGTGGGAATTCTGTCTAGCTCAACAATAT -CCGAACGAATAGAAAATGTCAAAACTCGATTCAGATGTCTGTTCCATGAAAAACTGACTA -GAAGAAAGAATTTAAAGCGTTGTGCTTTTTTTAATTAGGTCGAGGGGCAAAGATGCCCAA -GTGCTTGTTAGGTTGGGCTGCAGTCCGAGGGCCACTAGAAGAGGATCATATGTTATACTG -CATCTCATGTCTATAATATACCCATAATGAGCCCATAACATCTCCCTTTTGTGTCCATAA -TATACACAATACTCTAAGGTACCCAAGCAGTACCCTCAAAACTTGTAGAGAAGACATACA -AACCAAAGTACCGATTAGGTATCTCAAGGTCATGTGACTCGAAGCTCTGCCAGGCAACCC -CGTGACGCGACAGCGTCACTGCTTAGTGCCTAGGAACCTCCGTTATCATCCCCAGAGCTT -CCAACCTCACCCTACCTATCCATAGTCACCCTCCAACCCGCTCATTCTCCTCCCTTATCT -GTCTAATATACAGATCAACTCTTTCGACCTACCCGCAGTGACGTCTGTTGTCCTTGAACC -TCACCGCATCGCATCCTCACCAAAATCATGGTCAGTCAGCTTCGGATCTACACAATATGC -CATATTGTATCCCAATCAAGCTAACAGAGTGGGTATTTAGACGTCAATCGGCACAGGCTA -CGATCTCTCCAACTCGGTGTTCTCGCCAGACGGCCGGAATTTCCAGGTACACCAAACACA -ACCCAGCTCGTGAACGAGCCACGAAACCCAGCTTCACATTGCTGACCAGTCCATCTAGGT -CGAATATGCCGTCAAAGCAGTCGAGAACGGTGGTACCGCAGTCGGAATCAGATGCAAGGA -TGGTGTAGTGTTGGCAGTTGAGAAGATCATCACGAGCAAGCTCCTAAAGCCAGGCGCGAA -CAAAAGAATCGCAACAGTCGATCGCCATGTCGGAATTGTGAGATGCCATTTCCATTTCAT -TCAGATCGCCTCTCCCCAATGCAAGATGCTTCCATTGAAACCATGCTACCAGGGAAAGGC -TTCAAAGTGCCCGTTGCTAATCTGCCTTCTTTCTATCGATCCAGGTATCCGCTGGACTTG -TCCCTGACGGTCGTCATTTCGTTTCCAGAGCAAGAGATGAGGCATCCTCCTGGAGAGGTA -CATACAAAGGTCCCATTCCGACCTCTGCGCTTGCCAATCGCTTGGGTGGTTATGTGCAGG -CCTACACCCTCTACTCCAGTGTACGGCCATTCGGTGTGACTGCTATTGTCGGCGGGTGGG -ATACTGAGGCGGAGCTTCCAGTTGATGGACAGGTCGGCGCTGGACCCAAGTCTGGCTCTG -GTGGAAAGGTTGAGGGTGCCAAGGCCGGAGGTCCTGGCCTATACATGATTGAGCCTAGTG -GACTGTACTGGGTATGTTGCGACCTGTGTATGTGTGTATTTGGTTGGATATATGCTGACA -AAAATGGGTAGGGATACTACGGTGCTGCTACTGGCAAGGGACGACAGGCTGCCAAAGCTG -AACTCGAGAAACTAGACCTCACTTCGGGTACTCTGTCTTTGGCAGATGGTGTGAAGGAAG -CTGCCCGTATTATCTATGTTGCCCACGAGGACAGCAAGGATAAGGACTTTGAACTCGAGA -TGACGTGGATCAGCTCGGTGGATGGACCGACCAAGGGCCGTCATGAGGAAGTGCCCAAGG -AGCTCCTTGAGGAAGCAGAGAAAGCAGCGAAGCGCTCTCTGGAAGGGGACGATGAAGAAG -AGGAGGACAAGGGTCAAGGTGAACAGATGGAAGAGTGAACATGTAATAGATTATGCTCCC -AAAGAATAATGGTTCTGTGAAGGTGTTTCCTTTCATATTCCAATGAAAAACAATCTACCA -CGAAGGCCCAAGTTTAAGTTCCAAAAAGCCGCACCAAATATGTTAGAGGGTTCCAACGTA -GCCCTCAACCCTCAAAAGATATAATATCCAATCTATCGCAAGAATTACAAAATCTTGCCC -TCAGACCACTCCTTTCTCTTCAGACCCAACTCCACAGCAGCACTACCCCAGTCCTCACCA -TGGTTGTGCATACCACTGTTTCCAAGGCCAGCCCGTTCTAATCCCTGCTGCTCCGTCAGA -ACAGTGAGCAATCCAAAGATGACCGGAACACCAGAATCAAGCTGCACACGCATAAGCCCG -TGGCTGACAGCGTCAGCGATATATTCAAAGTGCATCGTAGCGCCCTTAATGAGGACACCG -ATGGCAATAATAGCATCGAAAGGCTTCTTGGGGCTTGTGGATTCCGCTGAGCCGGACACT -TCGGTGGGCGATGCCAGGAGGTCGGTGGCGCTGATGCCCTCGCCGGAGGAGTTGGCCTGG -AGCTGGGAAGCGGCATAGAGGCTGTAAAGAAAGTGTTAGTTAAAGCTATGCGGGAGCTAT -CGTAGCTATAGGATATGTGATCATGTCGTCCACTCACCGTTGCACTGCGAGAGGCAGCTC -GTAACTGCCTGGAACTGTCTGGACTGTGATGTTTTCCGGCTTCACGCCGGCTGCGAGGAG -GTTCTTCTTCGCTCCTGATACCAGTTGTTCGATGATGACTGTGTTCCATCTGGCGTGGAC -GATTGCAACGCGGAGCTCAGAGCCTGTTGTAAAATCGGTATTAGCTAGGGAGTTTCCCCG -AGGACTGTTGTTTGCTTTTACCATCGTGGGTCTGGGCAGCGCCGGGGCCTTTGATAGAAG -TCATTTTGTTGAATTGGATGGTTAGGATAAAGAAAGAGAGAAGATGTTTGGGGATTGGGA -GAATTGAGCTTGACAAATGTTTATGAATCAGAAGAGCTTGAGGGGCGAAATCCCTCGGTC -CCGCATTTGGCGGGGGAAGGCGGAATGTCAACACCGAACACGGCAGTTGAGCGCAATTGG -CTTAGAATGCGTCTATAACATACGTTCAATTTGGTTTATACAAGGGCATGCATTCTTATG -ATAAGCTATAGGCAATGAGCCCAGAAAAGGGGGATAAAAAAAACAAAACGAGATTCCATT -GTCCCGAAGCTCAAAACACCTAATGATAAAAAGATGACCGGATCATGAGAATCATCTAAC -CTCGCGCTAAAGAAACATTTATTCAACAGTGACCGACTTGGCCAGGTTACGAGGGAAATC -GACGTTCAGTCCCTCGCCCACGGCAAGCCAGTATGAGATGAGCTGCAATGGGATGACATT -GAGCAGACCCTGAAGGCAATCCACAGTCTTGGGCACGTTGATCTTTTCGGTCAGGGCGGA -CGAGAACTCGGGATCATCGTCGTTGCAAATAACAATGGGGCGACCACCACGAGCAATAAC -TTGCTGGTAGGCGTTCAAGGACTTGGTGAACAAGTTGTCACGGGTGAGGATCATGATAAT -GGGGAGGTTCTCGTCAACGAGAGCCAAAACACCGTGCTTCAATTCACCAGACATGACAGC -TTCGCAGTGGAGGTATGAGATTTCCTTGATCTTCAGGGCACCCTCGAGGGCAGTGGGGAA -TTGAGCACCACGGCCCAACAGCAGCAAGCTCTTCTGGTTCTTGAAGGACTGGGCGCATAG -CTCCTTGATCGGATCGTTGAGCTTCAAGATCTGCTTGAACTGCTCAGAGATATGTCCAAG -GCCCTCCATGATCTCCTCACGTCTCTCCTGCTTAGAGGCACGATCTTCACTCAGCGACAA -AGCGAACATGACCATGGCCACAAACTGAGAGGTGTATGCCTTGGTCGAGGCCACACCGAT -CTCGGGACCAGCGTTGATGTGCACACCACAGTGAGTGAGGAGAGAGATGGAAGATCCGAC -AACGTTGACACAACCAACGGTCAGGGCGCCACGTTCCAGACAGTAGCGAAGAGCCATCAG -GGAATCAGCAGTCTCTCCAGACTGAGAGACAAACACACAGGTATCGTCACGGAACACTGG -AGCCTGTCGGTCAAGGAAGTCGGAGGCGAGCTCGACTGCGATAGGAATCTCAGTGAGCTC -TTCGAAGATACCACGCACAGCCATGCATGAGTGGTAGCTGGTGCCGCAGGCAATGAAGAT -GATTCGTCGGCAGCGACGAATAGTCGAGATGTATTGGCGGAGACCACCGAGGGTGACCTT -CTTGTTGGTCACATCCAGACGACCTCTCATAGTGTTGACAACAGACTCGGGCTGCTCGAA -GATTTCCTTCTGCATGAAGTGATCGAACTTGCCCTTCATGATCTCTTGCAGCTCGAGCTC -AATGGTCTGAATGGCACGGACGTTGGAGGTGCCATCATCCTTGGTCAGACGGTGGATGTT -GAGCTGTCCCTCGTGAATGTGGGCGATATCGTCATCCTCAAGGTAAAGAACCTTCTTGGT -GTGCTCCACAATGGCAGATGGGTCAGAGGACAGGAAGAACTCGACAGGCTGGGGGATGCC -ATCGTCGGAGAGGAATGCCCGAGACTGGGAGCGGTGCAACAGCGACTTATCAGGAGGGGC -AAGGAGGCTTGTGGCAGACTTCTTGGCAGCCGCATTCTGGGAGGCCTGCTCCGCGGAAAG -GACACCATCCTCTGAATATTCAACATCCACGAAATCCACCTTCATCTTCTTGGAAGTCTT -CACACCGATGACCAGGGGCGAACCCTTACGGGCAGCAATGACCTCATGGGGGTAGTGGAC -GGACTTGAGCAGCAAACCAAAGGCACCCTCAAGCTCTTTGATGACTGCCTTGGCCAGGAC -TGTGAATTCGATATCGGGGTGTTGGTCGTAGAGGTATTTGGCAAGTTTGGCAATGCACTC -AGTGTCGGTCTCAGTCTCGAAACGGAAGCCCTTGCTCTCGAGCAGAGCCTTCAATTCCTT -ATAGTTGGTGATGATACCGTTATGGACCACGGAGAACTCCCAGTTGGGATCAGACCTATC -GACAATTCAATTAGCTTCACAGTCACATCAATAATCATGGACTTCAGGGAACTTACCGGT -GGGGGTGGCAGTTCTGGCGGGAGGGAGTGCCGTGGGTGGCCCAACGGGTGTGAGAGATAC -CGGCGTGGGACTCGAAGGTCTTGGTCATGTCCTCTGAGCTCTGCTCAATGAGTTCCTTCA -ACTTGGCCACCTTGCCAACTTCCTTGAATGCACAGACTTCATTCTTCTTGTCGCCATCTA -CGGCCATACCAGCCGAGTCGTAGCCACGGTACTCCAATCTCGAGAGGCCTGGGGAAAAAT -TGTGGTGAGCGGAAAGTTGCAAAGGCTGTCAGGGGAGAACTTTGTATCACTCACCATTTA -GCAGTGTGTCAATGATGAATTTGCGGTCCTTCTCGACCAGGTAGTTGATGTATCCGAAAA -TTCCACTAAACAGACTGTGAGTATATCACCCCATTGAGGAAAGTAGAAAACGTACCACAT -GATGGGCGAAAAAAAAGAAGAGGGACTAAAAAAAAAACAATGGGTATTTGAGGATGGATT -GCAGAGGGACGGATTAAAAGAAAGTAGGAAAGTGATTGAAAACAAGCTGGGTACAGGTGT -TTCGTATTTATAGAGGGGGGGGCAAAGAGTAGGGGGAGGGGCGGATTATTATTAGCTCTT -TCTGCAAGCTTTGATCCAATGAGGGAGGTGCTCGGACCATTTTGGTTCCCAGGGGAATTG -TCTCACTTGGCACACAAATACTTTATATCAAATAGCATGACGATGCCACTACTTATATTC -CGTGATGACAACGGTCTCTAGGTAGGAGTATACGGAGAAGTAAACCTTGGTCTTGTCAAT -TTTCAACTTGTACCTGGTTGCCCTCGATACATGCTCAACTTTATATTGCCATATGATGCC -GATTGAGTTGCCATATTATTCACGCTAAATCGGACCAGAAACAAAAACGAGGCGAAGCTC -TTGGAAGCCCAAAAGAAAAACATGATATACGGGGGAAAATCTATAATTATACCTAGTGGC -CCGGCACATTCCTTAGTGCACGGAGGATCCCCTGAAAAGGAACCTTGGAAAGGTTCGACA -GACGTTCCACGCTTCTGCAGTTCTTTTTTTTAACCTATTGTGTCTGATGCCCCTGGACTC -GATAAAGGGCCTTGTGCATTATTGGTTATTTGTCCGTATATCTTGCCTATCTTTGGTACT -CTAGTGAAACCTACCCTTATTGTACGCCTCAGTGGATAAGCTGAGGTCTAGGGTCAATCT -CTCTACTCCGTACTTCGTGTATATATATCAGCCTCATTTCAGCCTTCGATCAGAGAAGAG -GGCTATGCTCCCTTATATACGGAGCAAGCATGACTGACTAATTATATAGTAAGCTGGAAG -ATACTGATTGTTCCCTTCGCAGGTACTGATCGGACTTACACGAACAACTATTCCACATAT -ATCCTATTCTGTGCATCTTCAAATGTTGTATGTTGTACGGAGTATATCGGACCACACCAA -ATGCATGGATTTCAGGCTTTGTGCTTTTGTATAACCTGATAGAATCCAATGACGAGATCG -CTCTTAAGGCCATCTACCAACAATCACTCCTTAACGATAAATTAATACAAGAGCAAATTT -ATTTCCAATGAGTGCCCAAAGATACTTGTCAGGTACCTCAATTGTAGGTGTTCTTGTATT -GAACGATTAAGTCGCGGTGAGGTCACTCAAGTGCTTTTGAGaaaaaaaaaaaaaaaaagg -aaaagaaaaGGCCTTGCTGAATCATTCAATGTTACTGAACCCGGGGAACTTCCAAGGTTG -AATTTGAACTTCAACATTACCTTAATTGTCTAATTCTTCGCTGTACGCACTTCCCTTTTC -TTTCTTTATAGGAGTTATCCCTTGAAAGCGTGAAGTTCAAACTCCGTCCCCACCATTCCA -CTTCCTTCCGCTCCATCGCGTCTTAGCCTCCTGCAAACTCTTGATTTGCGCGTAACGACA -CCTCCAGCTCCTGGATATCGCTTTTCAGCCCTTTTCTTCCGATCTCTTTTGTTTTCTCAT -CTCTTTGACACGCTGAGCCCCTACATTTAAATTATTTCAATACTGCCGTGCCGGGACCTT -CTCGGTCGCATGTGTCCGCAATAGCCGAATTACTGCTGTCAAGGATGCTGCCTTCTGATC -GCGCCACGGTCTCTGGCGTGTAGCCAACCTCTCACCATGTCACGCGCCTCGGCTGGCTTT -GCAGACTTTTTCCCCACCGCCCCATCAGTCCTTCAACAGAAGCGCTACAAAACAGCGCAG -GAACGACGGAAATTACATGTCGAGGGTGAACAAATTTCTGAAAGGATCTCGGATTGTCCA -GTGTCTCTTACCCCGAGCGCGCCCACAACCATTGAGACGCCTTTCAACCCCAGTACAGAC -GAAGGGGAAGCTCGCACCCCGTCGGCCTCCCACGAGCTTGAATTGACCATTTCCGCCAAG -GCGAAAAGCGTGGAGGATAGGACTGGGCTACCTCCCCCCACGGCAAATCCCATCCCATCA -CAAATTGATACACTGACACCGCTCACCACTGCGGAATCATCGCCCCCTTGCAAAGTGACT -AGTCCATCACAGCAAAAGATAGCAGGCGGAGGTTCTCCGGGTACTTCTTCCAGGCCGAAT -CATGAAAACACCAAATCTACCATTACACCAATCCACACCCCTCCTACCCCTCAGTCGCAA -ACCCAACACCTCGAGGTCCGCGTGAGAGGGTGCAAGGTTGTGTATGATCCCGATCTCGAC -AAACGGACCTCGTCGAAGGAAAAACGGAAAAAGCTTGAGTATGTGGACATCTTGAATGAT -GGCCAAACAGATGCGCCACCCGATCCGCGTCGTGCGATCGCCAATTATACTCGTGGATCC -GGCTGCAAACAAAAGACAAAATACCGGCCCGCGCCCTACAATTTGAAGCCATGGTCATAC -GATGTGGCAACGAGCATTGGTCCCGGGCCTCCGATCCAAGTTGTGGTAACGGGGTTCGAT -CCTTTGACTCCCATCGCCCCGATCAGCGCGCTTTTCTCGAGCTTTGGAGATGTGGCCGAA -ATCAAGAACCGCACAGATCCCATCACCGGCCGATTTCTGGGGATCTGCTCCATCAAATAC -AAAGATAGCACCTCAGCCCGGGGGACGGGCCCAGCATCAGCATCTAGCGCGGCGCGACGT -GCATACTTCGAATGCAAAAAAGAACAACGCATTGGCACCCGCAGGATTCGGGTCGAGCTG -GACCGCGACGGGATTGTGTCCGACCGAATGGCCGCGAAAGCAGTGGAGTCCCAGCGACTG -GCTTCCAAGAGTACCCTTCCACTTCCGCCTCCAAGTGAACCGAGACCCGATGTCCAGATC -AAGAAAAGCGAACCACCACCGACTGCGCCCAAGGGTCCCTCGGGCAGGTCAGCTTTACGG -CCGGGGGTCGTTATCCCAGAGGGCCCTAGGGCCGGTGTCCGATCCCCTGTTGTACAGTCA -TTGGTCGAAGAGACTGCTATCTTGAGTCAGATCAAACGCGATCCATACATTTTCATTGCG -CACTGTTACGTCCCGGTCCTCAGTACAACGGTTCCTCACTTGAGGAAGAGACTCAGACTC -TTTGGGTACAAACATATTCGTTGTGACAAAACCGGCTACTATATCATCTTCGAGAATTCG -CGCCGAGGTGAGGAAGAAACTGAACGCTGCTACCGACTATGCCATATGCAACCCTTGTTC -ACCTATATCATGAACATGGAGAGCCAGCCTTACGGTAACCCGAACTACGAGCGAAGCCCC -AGCCCCGAACGCCTCCGCGCCGAACGGCGAGACAAGGCCGCAAAGGATCGGACAAGGAAA -GAGGCCGACTTGGATATTGAGGAAGAGAAAAAGCAGCGGGCGCTTGATTTGGATCCATGT -CGGGAGGTATTGTCTATCATCATTCGCGATCTGAGGGACAAGCTTCTCGAGGACGTAAAG -TCGCGCATAGCTGCCCCCGCTCTTTACGACTATTTGGAACCGAGTAAACATGCGGCCAAA -AGGGAAAAGCTGGGAATTTCGGACCCCGAAGGTACAAAACGGTCTGTTTTCCAAATCGAC -AGCACCCCTGGCTCTCGAGGTGAAATGTCTGCAGGACGAGATCCTCTCAGATCCTCAAGT -CTGAATGTTCTTGCTCTTCCTCGTATCCGGAAAGCCCATGGCCTTGACCATGCGGGTGCT -GCCTTCCTCGACGAACGGCGCAGGCAACCACTACGAAAGAAAGAAGTCCGCCCCTTGTAT -CATCGCTTACAGCAGTTGCACGATGATGAAGATTCGGATGACGAACAGCGCACTCCTGCT -GCTAGAGATGTCGATGAACGAGAGAGCCGACCCCCCAGTCGCGCAAGTTCAACGTCATCG -GAGGACGGAGGATACCAGTCAGACGATCTAGAATCATCTCGAGGAAGCGAAGAGCGTGGT -GCTGGAGCTGTGGACGATTTGGATGCCACAGTTGAAACCGGCGATCTCTCTCATGAACTT -GTGCTGTCTCCCACGTCGGGCAAGAGGAAGCGATTGACCGAAGATGACGAAGCACGAAAG -CGACAGCGACAGGAGGATGAGCTCTTTGGCATTGACTCGGAAGCAGATGGACACGAGGGT -GCAACTAAGGACCTGGAAACGCCTGCGATTATCGGCGATGCTCTTGCAGATGTCGGCGAC -AAATTACCAACGCAAGACCATTCCGCAGACCAATCCACTGAGCTTGTCTCTCAAACACAT -CTCGGGGGTGATGACCTCTCGGTCAGCGAAACGTGCAAGGCCGAACCGATTGATGAGCCG -AAGACAGAGATTGAATGGGGCTTTTCCAAGGATGAGCCCCGCGCAACGGTGGAAGATGAT -GAAAACATTGTGCTCGATCTGGATGGTTGGCAACACCTGGTCAAGGACGACGAAGACCTG -CAGTTTTTGCGGGAAGTCCTAATGGATCAGTCGGAATCAATCATTCGAAATCTTGCTGCA -TGGGGTTGGAAACAGAAAGAGATCAAAGCTCTCAATGGCCTGGAAACTGGCACAACCCAG -GACGCTATCAGTATCCCTGGTTATTATGTGCAAAACTCCACAGGAGCTGCGCGGACAGAA -GGTCGAAAAAGGATTATGGAAGCAGAAAAATCCAAATACCTCCCCCATCGGATTAAGGTT -CAGAAAGCTCGTGAGGAGCGCGAGGCGAATGCTAAGGCAGACCCTCAGGCAGCAGCAGCA -GAAGCTGCCAGAATTGCAGCCGCCAAAACAATATCAAAATCGACATCCCGGTCAACACGA -GTCAACAACCGCCGGCTTATTGCAGATATCAATGCTCAAAAGCAGGCCCTTCCTACTGCG -AGTGGCGAAGGAGACGTTCTTCGATTCAACCAGCTTAAGAAACGCAAGAAGCCTGTGCGC -TTCGCTCGCTCTGCCATTCACAACTGGGGTCTATATGCCGAGGAAAACATCACTGCCAAT -GACATGATCATTGAGTATGTCGGAGAGAAAGTGCGTCAGCAGGTGGCCGATATGAGAGAA -AGACGATACCTGAAGAGCGGAATCGGTAGCAGTTATCTTTTCCGAATCGACGAGAACACC -GTCATCGATGCCACAAAGAGAGGAGGAATTGCGCGATTCATCAATCACAGCTGTACACCT -AACTGCACCGCCAAGATCATCAAGGTTGATGGTAGTAAACGCATTGTGATTTATGCGCTT -AGGGATATCGAAAGAGGTTTGTTAAATGCGCTCGCTTCAGGAGAAAACAAACGTGCTAAC -TAAATCTATTAGATGAGGAGCTGACGTACGACTACAAATTCGAACGAGAGTGGGACAGCG -ATGACCGTATTCCCTGCCTCTGTGGCTCGACGGGCTGCAAGGGATTCCTCAATTGAGGTC -ACCCACCAAGTGAAGTCCTCGGGAGACATATTGACTCTGATCTACTGGGCGGCGTTGTAT -CGGGCTGGCGTTTTCAGGGTGGGAGCATTGACGCATTTATCAATGGCAATCGTGATTTCA -TAAACTACAAACTCTCTTGTACAATATACCAATTCCAAAAGAATTTCGCCTATTCTCTGT -TTCAAGTCAAGGCACTGAAGCAAAAGTCCTCTTAGAAAGCAATGTGAGAGCGTAGATGTG -TCAAATCCCAGAAGAACTCGACTCGGATATCTGATAACCTTGGTGCCACTGCATAGGGTC -CTCTTTTGAGAAAAGGAAAGGAAATGGAGGAGAGAGCCTGTTTCTCAAAGACCAGTTCCT -AGCCCTGGGGTATCGATTTGCGAAGCAATTTGGGTTTCATGTGGGGATTGGGTTGTTCAG -ACTGGGAGAGATATTAGCAAAACGATTATGGAAGAAAAAAAGAGACGACCATATTACTCA -CCAAGGTGACTTGGAAGAAGGAAGTTCGCAATTCCTTCCTTAATCATATATGAATGACCG -CAGTTCCCGCAGGTCAATTTTCCTTCTGTGACGTGCGTCTCAAGTAGCAACCGGTGCAGG -TCCTTCAATGTCTGCTCGTTGTTGAGAGCCTCTTCCTCGGGCTTCGAATCCGGAAGACTG -GGGAATCCAAGCTAGGATCGCACAAGAGTCAGCTTATTTGTGATTGGGATTCTGATTGTC -GGCCGCGTTTGGGTGAACTCGGGGTTGGATGTTTATTACCTCGTTGGAAATGGTGCGCAG -GGCTTGCCAGTCAATTCGGGGGAGGATGTTGCGGATGAATTCGGGCTGGAAATCAAGTTC -CTGTTGCTCTAGCTCGGCATCCTGAAAATGTAATGGGAATGAGGCCGGTGATGTCTTGCA -GTCCTTCACGGCGCATGTCACGAAGTTGGCGGTGATCACTTTCATTGTGGCGGGTTTCGG -TGGGGTTATTGCTGATCGATCGTCGATGCGAGGATTTTGGGAACTTTGTTCGGATATGTA -GAGATCGATTGGGTCTAGATAGGAGATCAATTTAGAGGAGAATGAACGTCGGTGGCCCCT -CCGATGGGAGAAAAAAATAAGAGCCCCACGGCTTATCAGATATCATGTGATCGATATGGA -TACCCGGACTATGACCCCGACATTGTGTACTTGGAAATCTTGTATACGTTATTTTACAGA -AATAGCCTAGGTTCCAATTGGAAGTCTTGATTAGTCTTGATTGGCCTTGATTAGCTATTA -TTCATTTCGGATCCTTCAAGTTGGATTATTTGATCTGGACATCCAGCAGATCTAGATCGT -CAATGTCATAGTCGCCTAGACATGAGTCGGGGTCAAGTTTGTCCCCATCAAACAGGAGGA -AGATATCTTGGTCCGCAGAAATCTTCTGCTTATCCCGAAAAGCAGATATAAGTCGCGAAA -TAACGGTCTTTTGTCTTGCTTTAAGTCTGAAATCGTCGTATCCAAGACTCCTCAGAACAA -GTTTCATTGGCTCACCCTGGTCATCTTCTGGGGGTTGCGATGCTGTAGGGGGCTTTTCAT -CATCAGGTGGTGTAGCTGGCTGGTTGACCGGAAGAGGTAAATCGGAAACTGCTTCCATAT -GTATGCGTAGCTCTTGACCTGCAGAGTCATCATCATCAATGCCAAAGATTCTCCCAGAGT -TCTTCCCGGTATTTATGACTAGACTCCTGCAGGTTGTCACGTCGAATAGACGCCGACCCT -TCCAGGTTAGGTAGATCGATGATTGTATATGCGGGTCGAAATTTTGGCGATTGCACCATG -CCAGCCGCACATCTCTTAGCCGCTGTGACATTTTTCGATGGACAAGTAAAGGTTTGGTGT -TCGCAATTTCAGACGAGATCATGATTTGTACGACCGGGTCATACGCTGGAGCTTTCGGCG -GTTGAATAGGCTGAGGAGGTTCGAGAGGTGGAGGGCTTCGAGTTTCCGAGTCATCTGCCA -GGACGATCACCGGTTGAACCGGTGGAGCACCTGGTGGTTCTGAACCGCTGCCTTTCTGCG -GTGAATGAGACACGCGGTTTTCTGGGCTGCGCATTTGGGTTGTCTCGCGTTCTTTAGCTG -ATTTGGCCGACTCAGGTGAGACATTATGCGAGGAACCCGAAAGCTCTTGTGGCTCTGGTA -GCGCAATTCTAACGCTATCAATCTCACGTGTCACATTCCGCACTGGACTGCTATCAGCGC -GCCCTTTACCCTTCTCTTCAGAACGTCGACGACGTTTCTTTCGTTTTGTGTCTTCAGTAT -CTTCATTGTCTCCACTATCTTCTGGGAGTTCTGGTGCTTTCGGCTGCTTCTGGTGTGCTT -CACGGTTCGCAGCAATGATATCAGAGTACGTCTGTTCTGAGCGTCGATAGAAATCATTGC -CAGTTTCCCCCGTATTATTGAGTGCCCACGTGGGCTTCTTGAAAAAGCTCATGACTCTCA -GGGCCGAAATTGGTCAAGTCAAAAAATGCAGTATGATAAGATGACAGAATGTACCTTAGA -AATGGCTCAAGGGGTACAGATTATAATCAGCGATATGTTAGTGGAAGAGTAAAAATGTTA -ACATGGCCTAGGCATGATCCAAACAATTGGCAGTCAAGGTGAAGGGAATATGCAACATAT -CTTTCCAGACTTGGGATGGGTATAGTGGAACCTCGTGAGACACCGTCAGGTTGACCGCCT -ACTAAGAACGAAACCAGGGTACTTTCTGACCAGTAGACATTTGACAATATATGAGACCTC -TACTCTTGATTCCTAGAAAATGACCTTCGATACTATAGCTTTATATCTTTCAAATTTTAA -CCAATCCAATACATTGAGTGTTCAACAAAAGTATACTATTTTATGACTGGCAGTATCGGC -AGAACACAATAGCGTCCACGTCTGACCATAGGAAAACTTATCAGCAGATCCCGCCCCACT -TGCGATAAGTTCGATCACCCTCACCGCCTTCCTGGTCTCTCACTTTCGAATGCTTCGTCC -TTCCGCCTACTTCAAAACATAACTCAACCTGGTATAGCCCCCCTTGTCCTCCCTTGGACT -TTGCCCTGGCTAATATTTGATCAATTTTAGACCACAGTAGCAATATGGAGAACGCCCCAA -CCTCCGAAGGGAAAGACCCTTCTGTCTTTTTGAGTGAGATCATCGGTGCGCCTGTGATCG -TGAAGCTGAACTCGGGAGTGGTATACAAGGGTATTTGATCTTTTATCAATTCCTGTCCAA -TTCAATTCTAATGCACATATAGGCGAATTGCAATCAGTTGATGGCTACATGAACATTGCC -CTCGAAAAGACCGAGGAATACGTGAACGGAAAACTACGACGAAACTACGGGGATGCTTTC -GTGCGCGGAAATAACGGTTTGTCGATCCTCTCTTGTCTTCCCGTCCAACGACTGGCGCGA -CAGCTTTCTGACCAAACCTACAGTACTCTACATCTCTTCCAGTTGATCCGAAAATACACT -GTACGAAATCAGCGATTTGGCCCATCTATGACACCATGCATGTGGATAAGAAAGACACGA -TTTGAGTACTATCGCAATGATTTGGAATCTTCTAGCTAGGAGGACAGATTACAAGCCTAG -GTGGTGAACATGTCTTCTGATGACGCAGTTGACCGGACCTTCGAGCCTGGGGCCATCGAG -GCTGTGTACCTGAGGGGTTTCTTTTTTTTTCCCTGCTTTGTATTGTGCTCAAAATTGGTG -TCCATGACCATGGGCTCATCATTTGTTGCCCAGGCAGCTTGGTAATGTTTTCTGGTGATG -CAGCGATGTGCCATAGAGCTCTGTTTTAATTGGTGCAAAGTATATTCGCACAGTAATACA -TGAAATCTGACCCCGATATGACTTGTTACCTTGCCAGTATACGTTGGAGGTACGTTCCTG -GGAAGGCTGTATATCCAATGTACTCCTGGGGCCCTGGGCCTTGATTAAGTAAGCTTTCCC -AGGTGAGAGCCCCCGCCGAGGGGTCCCCGCGGCACTTTTTTAAGGTTCTTCTTTCCTTGC -GCCTCATCTAAGTTTTTAGAAATTTCGATCGTTCTTTACTAGGATGTTGCAGGCAATCAA -GTTCAGCAAGGGCAACCTTGAGATCCTGGATCAGTTACAGCTTCCATTTGTCGAAAAATA -CATTCCCGTTCGGACTACAGAAGATGGATGGCACGCTATCAAAGATATGAAAGTCCGAGG -GGCACCTGCTATTGCCATTGTAGCAATGCTTGCCCTCGCCGCGGAACTAACCACTGCGGT -GGATAGCGACAAGCTTTCACAATCACCAGAAGAGGTGTGCCAATATATCACAGAGAAGCT -TGCTTATCTGGTGACGAGCAGGCCAACTGCCGTCAATCTCGCCGATGCTGCGCGCAAGCT -GGAGGCTTTAGTGATCAACCGCACGAAAGCATCGGGCTCTACTGGCCGCGACGTCGCAAC -CGCGTTTATTCAGGGAGCAGAGGATATGATGGGAAAGGATCTCGAGGATAACAAGAGAAT -TGGGCACAATGGCGCAGAATGGATCGCCACTCACGCGGCCAAGCCAGGAAGTTCAGAAGT -TGCCGTGTTGACTCACTGCAACACTGGGTAGGGCCGATCCACTGCGCTATCCGTTATTCG -TTGTATGTTCTCACTAATACTGAATCTCTAGCTCCCTCGCCACTTCTGGATATGGCACAG -CATTGGGCGTGGTTCGTTCGCTGGCTTCGAAGAATATCCTACGTCACGCTTATTGCACGG -AAACTAGGCCCTACAACCAGGGATCTCGCCTTACAGCCTTCGAGTTGGTTCACGATAAAA -TTCCAGCTACACTCATCACGGATTCTATGGCAGCCGCTTTGCTCGCAGATACCAAAGTCG -GAGTGAATGCAATTGTGGTGGGAGCGGACAGGGTAGCAGCCAATGGTGACACAGCGAACA -AAATCGGCACATATGCCCTGGCTGTCTTGGCAAAGTACCATGGTGTCAAGTTCCTCGTTG -CAGCACCACTCACAACCATCGATATGGCCACAAAGTCAGGCGAGGATATTGTCATCGAGC -AACGTGCCGCATCCGAGGTGACCAGCATCAAAGGTCCCTCTGGAGGGTCTGAGACCACCA -ACCAGATTACGATCGAGACCGTACAGATTGCCGCGCCAGGGATCAATGTCTGGAACCCAG -CATTTGACATCACTCCTGCGAGTCTGATAGATGGGATTATCACAGAGGTCGGAGTTGTAG -AGAAAGGATCCGATGGCGAATATCACTTGGGTGGACTATTTGACGGTTCTGCCTTTTGAT -CCCTGATCTGACCTGTGCCCGTTTGGCCCAAGTTTGTCTCATCGAGCGATGTAGCCTCCG -CTGCAGCAGTACCCTGCTTTTCGAAAGATGCCAAGAGTTCATTAACTTGACTTTCAATTG -TGTCGAGGTGATTTTCCAGGGCCGAGGCAGTCATTTCACCCCTAAATATATGTCAATACC -ATGAAATCATTTGTGTAAGATGAATGCCGGCTCGCATACTTTGCAAGATCTGCGAGTATC -TATTTCAAGAGTTAGCTGCCCGGCCTTGGATCGGTTCGTAGAGGGAATTGCCTTAGCAAC -ACAGTCTAGGGAACTGGATGCTGCTTGCGAACGCCGAAATAGGGTGACTTACCTTCGCCA -TATCGAAGTCTGCATCAGAGTTGGACATCATGGGCGTTTGACGGTATGTTGCCGAGTACA -AGAGTGTTTTGAAGACAAGAGGTTCCTTCAAGTTTGTCTACGGGGCCCTGTAGGTGACAA -GCGGGGCCATCGACATGATTAGTCATCCCGAACCCGAGATAGATTTCCCCGATAAGAAGA -GAGAAAAAAAAACCTCTTCCTCACTGTTATCACCATCCACATATTATCACCATGGGAGTT -CAAAAAAAGACTCGCAAGTATGCGCTGGCCAAGAGAGCCATCTCTATGCGCGACAATCGC -CTGTTCGTCCCAATCACGAAATGAAGTTGTGTTGCGACTTGCTAACAGTCAGTTATAGGA -AGCAAAACCAAGACAAGACAGAGAAAGGCAAAGAGGCGGCAAAGAAGGATGACCTCGTCA -AAGAGGCGTAAGTGCCCAATATAATACTATCGGTAGAACACCATTGACAAGTACGGTTCT -AGCCCCCAAGCTCCTTCATCGATGTTCTTCCAGTTCAACACGGCCCTTGCCCCCCCATAC -TCTGTGCTGGTCGATACCAATTTTATCTCTCACACCATTCAGCACAAACTCGAGGTTATT -CCAACCATGCTCGATTGTCTGTACGCCAAATGTATTCCTATCGTCACGGATTGTGTACTT -GCCGAGTTGGAGAAGCTTGGCCCGAAATACCGACTTGCCTTGCGTGTAGCCAAGGACCCC -CGATTCGAGCGAGTCAAGTGTGATCACAAGGGAACCTATGCCGATGACTGCCTGGTCGAT -CGTGTCATCAAGCACCGCGTCTACATTATCGCCACAAACGATCGTGATCTGAAGAGACGC -ATTCGGTATGTTGCTAACATCTCGATGCCATTTTCGCTTTACCAGGCAAATTCTAACCTT -CATAATAGCAAAATTCCTGGAGCACCCATTATGAGTGTCGCTCGTGGTAAATATGTGATT -GAACGACTTCCCGATGCCCCCGAAAAATGAGCGACTTGAACGATCTATCTTTGCATTCGT -CTGTCGCCATTACGACCATCCACATGATACCCACAAAAATCGCCGTTTCTTTCTTTACTA -GAAAATTGTAAAATAGCATAGCAAACAATTTTCGCCTTCAGACACCAGTCAAGCAACTAC -AATACGCAAGACCACAGGACCAAATTATGCTGTCAACCGACCGTATCCGATCAAACAAAA -CGACTGCACCGAGAACCAGCAGCATCGCTCCTCCCAGGGCTGGTTCTGATACTTGAAACC -TATGTGCCAATGGAATAAATCATTTGAAGGACCCGGGACTTCTCAGGCAAGGTTATATAG -CGGCACATTTATGCCTACTCCTTTATAGCGACTGCCAATGCAAATGTGCTTGTAATCTGC -AAAGACGAATGTATCTCAAATCTTTAGCTAATCATAGCCATCTTCTTGGAGGCTCTGGAG -CCATGTGGTCTTCAACATATAGAGCACATAGTGCGTTGGCTTCCAATGTGTATTCGTATC -TATTAACCATATGTTTCTATCATTTGACGAATCGCATGCCTCACAAGACGGGTGTTGGCA -AAGCTCTGGATCATTTTCTGTCGCGACATCATTATGTGCAGTGGAGGTAAATCCGCGAGT -AGTGAGCTGGATTCAATCAAGTCATCACCGGGTGCTGCGATATCGAGAAGCTGCTTCAGT -ATCCGTAAAGAGTACAGTGCAGCTTGAATTTGAGCAAGAAGGTGAATGTCCGTCCATTTG -GTACTTTGCGCCATATACCCCTTTGTAAGGAAGCTTTCCAATTCCTTCCCACTTGGAATC -GTGGTTGTGCCGGCTCCATCGCGATAAATATCGGAAAGAGCAAACAAAAACCAGAAGCTC -TCCGCTGACAGACCTTCGTCAAAGGCAGCATGTGCTGTGGCAAGACGTCTTCGGACGAGT -TCCATGTCGGAGGCCACAGTCTTTGTTCCGCTTAAAGTGATTTCCTCGGCCACAATTCTT -CCACCTCTGCGGACAAATTCATGTACAGCCGCGAAGCGATTGGCAGCAGGACGGGAAAGA -TTGAAAAGCGAGTAGCCAAGAGACCTGTATGTTCGACCTTGTTCCCAGGCACATCGCCTG -GAAGAGTCTTCGTTCAGAATGCCGAGATAGACGTGCGGCTGTTCACCCAAACAGTAGATA -CTAGGAAGCTCATACTGCCAAAATAACTCCGACACCCTCGGGTCTAAAGGTAGTAAGGAC -TGCCTTGCTCCACGTATCACCTCATTATCTGGGGTCTCGTATTGATACTCTCGAAGAAAC -TCCAGATATTCAGAGGAGAGCTCAGTCGCCTGACTGCTCTCTTTGGATATCCGGGTAAGT -TCAGCAAAGCTCATGTGAACATTCCTTTGCAATTCGTAAGCAAACCACTGAACGCTTGAG -ATGCCCAATCGACCGGAGAGTGAATGGGGACAGATTCTCAGTCCGCGAATATCAGGCTCG -GCTGGATCCCATACGCCTGAGGATGTTTGCACCGAGTCTAGAAACAAGACCGCGCCATGC -TGGCCAAGATCGTGAAGCAGGAGATCCGAATCATCTGTCAATACTGCACAGCCAGTACGT -TTTGCCACGCTAGCGCACTCGACATCTGCTTCTCCCGCTACCATGACGGTGATTTCCGCC -CAGGGGTAATCAGTATCGGCGAAGCAGCTGATATCATCGTCGACCTCTTTCCGGATCTGT -TCCTTGTTCCATCTGTTCCGCAAGTCTTCAAAGACGGCGGATACCATGAAAGGATTCTCT -GGAAGGCCTTTTCGTCGCTGAGGCAAGCCTCGACTGCACCAAACCTGCCCTCGTTCAGTC -GGCATCACATCGCGACATTTGGGCGTTATAGGCAAAGACAGGTTTCGGCGAGCCAACTCC -AGCCGATGTCGTAACTTTTCAATTCTGGAAAAACGGATTTCCCGCTTGGAGACGGGCAAG -GCACCATCGAAGCAAATTTTATGTCTGAAGAGAGAGAAGAAGAAGGAAAATTGATAATCA -GAGGATAGTTGTGTTGAGAACTAGTGTGAGGGCTTACATTTTGACCCCCAGTCTGGTTAG -TTGGAGAAGAAAACTGCTCACTCCACGACTAATTTCATCGCAGGTTGGCTGGTAGTCAAG -GACATCACTGCTTGGATCCATCCAACTCAAGAGTCGACGGTAGATATGATACACTAGACT -AGGCCCGTCAATCACAACGGCTTCGACTCGCGGAAGTTCTGAATCGATCTCCCTTCCATC -TAACAAGACATTCTCAGCATATGGAAGCAGATGCCTGGTTAGATGAGGGATCCCCATAGC -CAATGTCGTGTAAGAACCCTTCTGGTTGTGGGCTGTTTTTTTTTGTTAGGCCCAACCTTG -GAAATTTTTTGTATTAGTCAGGGCTGAGGCGTCGGGGCTTTAGATATTGATCTCATCCCT -CTTTAGACTCTAAATGAGATATGTATAGACTTAAAAGGTGACGAGAAAGAAAATAGACAC -AAAAGTAAAATGTTCAGAACCTTTTCAAGTTTCCTACATCGTTGGATTTGTAGAACAAAT -GCATATATAGAATCAACGTAACAAGCATGATAGATAACAAAAGAAAACAGAAGGCACTCT -ATGCTCCGAGAAATATTTATCCAAGTATAGATCTGTCCATTTTAAATTACCGGGCAGATC -GCAAGAATCCAACCACTCCGCGACAGTAGCCCACCAGCTCAGCCAGGCGCTTTCCTTCCC -GCTGCTGCTCAGCAGACCAAGTTGATAAAGCATCTTGCAATACACGATACTGAGGCTCAT -CCTGAGTAGAGCTGAGACTCACAGTGTCGGCGTGAATTTTCAAAAAGACTGCCATCCAAG -CATTGACAAGCTCGAAGTCTCTCTTGGACTTCAGGCGAGCAGTCAGGGCAACAACGAAGT -CGGACAACTCAGAGTGTCCGTCACTCACCCGAGGATCCAATGAACGGATCTCGAGGTCTG -CTTTAGCTGGAGATAAGGTCTTGAGATGGTCGATAAATGAGCTAAAGTCACCAGAGTCAT -GGCCAGATGCCAATGACCGAGTGAAAATAGATTGAGGGGCATTGGGGATTCCACCATTTT -GCAACTTGGCAATCCGCATCCTTTCGGCTTCAGCGGTTTTGGCCAGCAATGCAGCATCAT -CCTCGGTGCCGGGGATTTCGCTGGCGGCCTCGGGCCCACTGGAGCCGAGGAGAGAAGGAA -GGAAGAACGGGGCCTTCTCGGGAGCCTTGGGAGCTTCCTTGGGGCGATTACGTTCCTGTC -GAAGTGTTAGCTAGAGTATTGCACATTTGCAGGAAGAATAACATACCCGAATAGTGTCTA -GGTGAATCAATGACTGCCATCTACTTTTGGGGACAAGACTGAGAGTCATCATGTCGTGTT -GTAGTTGTTCCGTCGCCAGTACTGGGCCCTCTGCCTCATCTTGCTCGGTAGTATCCGCAA -ATGCGGCTTCAATAAGGCCAGCACCGCCTTCTCCTGATGTCGCTGGGAGTCCAACGTTCT -CAATAATGTTCTCGTCCAGATTCTTAGTCGACACAGGCATAAAGAGGCTGCGATTGCTCC -AAAGACTGATGCCAATGCTACCGGCGTGTGCGGTAGCAAGGAATTCACCCGTCGAAGACA -TGGCCAGAGCGACACATGTGCTTGAGACCCGGAAAATGTCAATTAGATGCCCAGTGGGCA -TATCCCATACGCGTACCACTGAGTCCATGGAGGCAGCAACAATCCATCGTCCATCATTTG -AGAAGATGAAATCGTTGACTTGACCCACACATCCCCAGAATTCACGAACGAGCTTTCTAG -TCTCAAGGTCAATCACACGGATCGAGAGGTCATCACAGCTGAAGGCGATCAATTCGCTCG -CCTTGTTGTAGCGAAGTCCAGTAATGGCAGCCATGGGGTGCCAGTCTAGTTCATCAAGGA -ATTTCCCTGAGAGGAGATCCCAAAACTATATTACAAAAAAGGTCAACTTGCGAGTCACTA -TAAGAAAGAGGGGATACTGGAATGTACCTTGACTTTGCCATCCAGGCCACAGCTGACAAC -GGTACGGTTCAAGCTGTCAATCATCAAGCCAGTCACAGCTTTGGTGTGTTTGTAAGCCGC -AGCGGCCGAGGCCTGAGCATTGGATTTGGTGCCACGGAAAGCTGGGGGGCGAGCTGGGAA -GCTTTGTCGGTGCTGACCTGACTGCATGTTGAACATATCGATAGAACCTCCGGCTGATCC -AATAACAGCGAAAGTTCCACATTGCGAGACTGTCACACTCTGTAAATAATATTAGCAAAG -AAAAGAACATTGCTTCATCACGTTTAAGAACCTGCGCACCTTGACTTCTGTTCCATCACT -GGTTGCAAATGCCCATCGACCTGCTCTCTTCTTGCCCCAAAACCAAGTACGTGCGAACTT -ATCACCCCGGTGACCAGTGACAACACTTTCCCAGCCGGTTGCATTGGAGGCGTCCGCGTT -TGTAACCTTGGGGTTCGACCAGATCGGGCCGGAAGTAGTGACTCCCATCCCACCATCACG -GTTGAGGCTGCAGGCAATGCATGTAACCTCGGGAGCTTTGAGATCCTCTGTGTTTGAGAC -TTTAGATGGCCCCGCCTTCTTAGCCTTGCGCTCCAAGCTACCCTGCGAGATCTCGGTATG -CTGGCTATCTTTGCGCAGACTGAGGCCCCATAGACTGCAGTCCTTGCTTGCACTGAGAAG -CCACTTGCCACCGGATTCGGACCAATCCGAAGATGTAGGCAGGAAATCGAGAGCGCTGAT -GGCAGCAGAGTGTCCGTTTCTTCTATGAAGAGGTCTGGGGATGGGCGAGAATGGCGTTTC -GTCAAAGATCCAGGTCTTCAAAGAATTGTCATCACCCGATGACACGAGCACGGGCTGTCC -ATCCAAGAACTCAACACGGTTAGCGCCCATAGGCTTTTCGTCGCTTGACACTCGATGAGC -TCCACGAAGAATGCCTGCAATGCGACCTCCATTATTCAAGTCCCAAAGTGTGATATCGCC -GCTACCACTCGACGCCGTGGCCATAACACCTGAGCTGCGACCATCGTGACCAGCACCCAG -TCCATCGTTCCGGAAAGAGATGGATGTGACTGGGAGACCCCGGGATGAGGCGTTCTTGAG -AGACAAAACCAGCTGACCGGAGCTCACGTTCTGAATAGCCAACGCACCGCCCTTATAAGC -AATGGCCACAAGTGATAGAGCAGTTGTGGGCTGAATCGCGGTCACGGGTCCAGCATCTGA -CGACAAAGCGGGCAACGTGTGTAGAAGTTTTCCACTGCGGAGGTTCCAGATATCGACGGC -TCCGTCCGATCGGCCCACAAATATCTTGTTCAGATAAGTCGGCATGTTGCACATTACTCC -GGTGTAGATCTTCTCCCCAGAGGAATCCCGAGCAGACTGTGGCCGCAGACTGGTATAGTG -CTCATATGTACCCGTTTTCCAAACCTCGAGGCCACCGGTCCAGCAACCAACTACCCATGA -ACCAAACACCACCAAGCGCTCAATGGGCCCTTTCAGGTCTGCGGGACCCTCAAGCTCTGC -AACCTTTTTGCCACGTTTGAAAACCCAGACTCCACCGGGGGACTGCGATCGCAGATAACC -CCAAGCTGCGAATACCTTATCCTGCCACGCACATGTTGCCGTAATGATTTCCGGTGTTTG -GGGTCGAGTCAGGAAGACGAGGTTCAAACCCCGGCGCAAATCATAAGTCTGCAGCGAGTG -TCCAACGGAGGTTGTGATTTGGAAGGTCGATTTACCTAGGCGGACCGATGTAAAAGGAAC -GGGTGTTGGTGACACCAATCCTAGTGTCTTTAGGGCGAGAAGTCAGTTTAGAGAAACGAA -GCGATTTGATTGTATTCCTCAACTTACTCGGAAAGGAGCAAAGATTTTGGAACCACGTGT -TCCAGCATTAACTTGCGATACTCGTTTCTGCCTCTTCGCAAGAGGCAGATCGAAATTATC -AGCCGAAGAAGGCATCCTGATATCGAGGAACGCAATGACAACAATGGATACGTTGGAGGA -TTCAAAGTAGTCAGAATAGACCCCTTAATGTTTCAAGTCAGAATTCCGTTCTGACTTGTT -CACCAAGTCAATCACGAAGGGTATCTAAACAGCCCAAGTCACTCTCACTAGCTTTGTCTT -TTTTTTCCCGCTGTTTTTTTCTGCCCCGCCCAATCCTGCAAGCACTACACCGCCTTCAAA -ACGGGTGATGTACCTTTTCCCCGGGTACAAGGTACCTTCACATGATGAGAATACCTAGCA -TCTCTCTTGGATTTGGTACGGTATAGTATGGTCATGTAGTTTCTTTTGCTGTTGAATATT -TTCAATTTGCCTTATTTTTCATGGCAAACTCATCAGAGAGCTTATTGCACACTGCTATAC -ATAATCAAAATGTTATCAAATAGCATACTCATGTATGGAAGACTTCCGCTGTAGGTACTT -GGGTGGGCATACGTACTTTTAGATCTACTAATGGCGTCCTCTGTGGTATACGAGAAAGCC -CGATGAGCCAAATACACAATTAACATATCTTTCTCAATCCATAAGATCTGATCATGAGCG -AAAGTTCAAACTTGAAGCCAACAGGCCGTTCCCTTCTCCGCCTATGTCAAAATGCGGGAC -CCCATCATATTTGGTAATACAATCCAAAGAGTCTGGAGCTTCCAAGAATCCCTCTCCGAA -TTGCCCCTTGTGCTTCCTCAACACCCCTCTCCCATTGATTAAAATTAGTTCAATTGGTTT -GGACTGATTCGGAAACACAATATACTGCGCATTGATCCCCCATATGGATAATGCCGCTGA -GCACAGCAACGGCTCCCCTGCCGATGCTGCACCCTTGAGATCCGCAGAACCCAACTACTC -GTCGCCTTCACTTGATATGGACTCCGATAATAATGATTTGACACCTACAGGACCCATCCT -CGAGCCCACATTGGTGTCACCTAAACCTCCATCACCCGCAAAGTCGATTCAAAGCCAATC -AGGAAGAGCCGATTCGGGATTTGACCCTCGCTCCGCCGGTCGAATGGGACATTCTGCACG -CCCCCTAAGCCACGGACCCAGAAGGTTCAGTGGAAGCACTGCTGCCAGTACCGCTGCAAG -CTCAATCAGTGAAGTGGAATCCGGCTCCCTTAAGCCATGGAGGATCGGAGTCTGTGCCTT -GGATGTGAAAGCACGCAGTAAACCGAGCCAGAATATCTTGACTCGACTCCAATCGAAAGG -CGAATTCGAGGTGATAGTTTTTGGAGACAAGGTCATTCTCGACGAAGCGGTCGAGAATTG -GCCTGTGTGTGACTTCTTGCTCGCTTTCTTTTCCGATGGATTCCCCTTGGACAAAGCCAT -TGCGTATGCTAAACTTCGCAAGCCGCTCTGCGTTAATGATCTGCCTATGCAAAAGGTCTT -GTGGGACAGGCGGCTCTGTCTGAGAATCCTGGATCACATGGGGGTCCCCACCCCGAAGCG -ACTGGAGGTCAACCGTGATGGCGGTCCGGTTCTAGAATCGGTGGAACTAGCCCAGCATAT -ATATCGTCTTACTGGTGTGAAATTGGAAGGCCCAGAGAGTGGCATTGGTGGCGGCGCTCC -CCAAACCCAGAATGTCTCAATGTCCGAGGATGGCGAGGCCTTGATTGTTGACGGCAAAGT -GTTCAAAAAGCCCTTTGTCGAAAAACCTGTCAATGGCGAGGACCACAACATTCACATTTA -CTTCCCGAACGACCAGCAGTACGGTGGCGGTGGAAGAAGGCTATTCCGAAAGGTCGGCAA -CAAAAGCTCAGAATACGATCCCAATCTCACTGTTCCACGATCCGTCACCGAGAAGGATAC -AAGTTATTTGTACGAACAGTTCCTCAGGGTTGACAATGCGGAAGATGTCAAAGCATACAC -CGTGGGGCCAGACTTTTGTCACGCAGAGACGCGCAAATCTCCTGTGGTCGATGGCCTCGT -TCGGCGCAATACCCACGGGAAGGAGGTACGATACATCACAAAACTTGGCAAGGAGGAGGC -AATAATCGCATCTAAGATCTCAAACGGCTTCGGTCAAAGAATTTGTGGCTTTGACATGCT -TCGCGTGGGAGATAAAAGCTATGTCATTGACGTCAACGGCTGGAGCTTCGTCAAAGACAA -CAATGATTATTATGATAAATGTGCCAATATTCTCAGAGACATTTTCTTGGAAGAACGACG -CAAATGCGAGGGGATATCAGAATCGTCTGAGCCGCCTTCGCCGGACATAGGTCCCCTATC -TAGAAGGAGCACGGCAGGATCTCATCGCCAAGCCCTCAAGACACTGCTGAAGTCTCCTAG -TACTTCTAGACTCTATGGTCAAACCCAGAAAACTACGGAGGCCTCAGAGTCATGTACTCC -ATCTCTGGCCTCATCTGGGGTAGAGAGTGCTGATGTTGGCGCTGCTCTCAGCAAGTTGAA -TTCTCGGGACGCCCATTCCACTACTCGTGGCTATAGTCCCTCGAACACGAAATCTCCGGC -TCTTTCGGTACAGCACGCCAATGACGAAGCCCTTCCGCCCCTTCCTGCTTCCAAGCACTC -TTGGAAGCTGAAAGGCATGGTGGCTGTGATTCGACACGCAGATCGGACCCCGAAGCAAAA -ATTCAAGTTCACTTTCCATAGCCAACCTTTCGTGAATTTGCTGAAAGGGCACCAGGAGGA -GATAGTGATCAAGGGGGAAGCTGCATTGTCCAGTGTTTCTGATGCCGTTAAGCAGGCTAT -GGAGAAGGGCCTGGAAGACATGGAGAAGCTCAAGCTGCTCCGTACATCTTTAGAGAAAAA -GGGCGGTTGGCCTGGTACCAAAGTACAGATAAAGCCGATGTTCCGGAAACGAAAACCAGA -AGAAATGGGAGAGCAGGGCCCACTAGAAGGTTCAACTCCGTTGTCCGAGGCCAAGCCCTG -TGAAGAGCCCACTACGCCTGTGGCCAATGATGGGGAGGGACTGACGCCAGAAAACGAGGA -CTTGCGTAGATCCCAGACACGAAGTGATTCTATCTCAGGTGCGACCTTCTCTCGGTTCTC -CGCCGTTGAGAATGACTTGATTCTGGACAAACTACAGCTTGTGATCAAGTGGGGTGGCGA -GCCCACACATGCTGCGCGTTACCAATCGCAGGATCTTGGGCTGAACATGCGGGATGACCT -CAAGCTGTTGAACAAGGAAGCGTTGAATAACGTTCGCATATTTACTAGCTCAGAGCGTAG -AGTGAGCACCAGTGGTGAGTTATCTATCTTGCTAGTATCTGCATCAAGCTAATGACACTT -CTACAGCACAAATATGGGCATGCTCATTTTTGGATCAGAAGGATATCCCTGAAGACTTGA -TACAAGTCCGGAAAGATTTGCTTGACGATTCAAATGCCGCCAAGGATGTTATGGATAAAG -TTAAGAAGAAGCTGAAATTGCTTTTGCGAGAAGGGTCTGCGCCTTCACAGTTTGCTTGGC -CTAAGGACAAGAACATCCCAGAGCCTTCGGTTGTCCTTGCTAGGGTAGTCGAACTGATGA -AGTTCCACCGCAGTGTCATGCGATATAACTTTGAGAGACTTGACGGTTCACCGCACATCT -CCTCGACTCTATCCGAGGACCCTGAGACGTCGAGCCAATCCTCTGATATATCTGGCATAC -AGGGACGTTGGTGCGCGGGTGAAGATCCTCGACTCTTTAAAGAGCGGTGGGAAAAGCTCT -TTGCAGAGTTCTGTGACACCGAGAAGGTCGACCCGAGCAAGCTTTCTGAGCTGTATGACA -GTATGAAATTCGATGCACTCCACAACAGACAATTTTTGGAGTGGGTCTTCATGCCACAGG -ACGATTCCCGTGACTCTGATGAAGATCAGAAAAGCAAGAGCGCAGCAAAGACAGACACGA -GCACGGAGGGAAAACTTGGACCGGAAGCTGGGAATGACAAATCTGAAGAGCGTACGGAAA -GCCAAACGTTTGTTCATCGCTTTGGCTTGAAGAAACGCATCCTCCCTCTTGAATCACTGC -CACACCTTCGGGCACTAGATGATTCGTACGATCACTACTTCAAGCTTTTCCCTGGCTCTC -AATCTTCCAAATGCAAGGTGGATGAGCGCCTTTCAAAGCTCCGAGAACTCTACAAGCTAG -CGAAGGTTTTGTTCGACTATGTGACGCCTCAGGAATACGGAATCACCGACAGCGAGAAAC -TCGAGATTGGCCTCCTAACATCTCTTCCCCTTCTCCAAGAGATCGTTCGAGATCTAGAAG -AGGTGCAGGCTTCGCAGGATGCCAAGTCTTTCTTCTACTTCACCAAGGAATCTCATATCT -ACACCCTCTTGAATTGCATCTTGGAAGGTGGAATTCAAACCAAGATTGCCAGGAGTGCTA -TCCCAGAACTTGATTATCTCTCTCAGATTTGCTTTGAGCTATACGAAGCTCGGGACAGTG -AATCGGACTCCTACTCTTACTCCATCCGCATCTCAGTCAGCCCGGGTTGCCACGCCTTCG -ACCCGCTTGACGTGCAACTTGATTCCAGGCACTCCATCGGCTGCACCCCACGCCGAAGCC -TGACAGCCCATCAAGATTGGAAGGAGGTAATTGAGACGCTGAAGGCGAAATTCAACACGT -AAGTTGTTTGCTACCTCAACCTCCTTGGACTTTTACTAATCACTGCATGTAGTGTGGAAC -TACCCAAGACCTTCATTGCAGTAAACCTCAGTGACAAGCATAGCTCTCATGGTGAGACTT -CACCAACGCCCTCACGTGGTATTTCCCCAACTCAAGACTGAGGTCGATATTCCGGGGGCA -TTGTACTTGTGATCGCCTTTTTTTTTTTGCTTTACCTTGTCACTGACAGATTTGCATGGG -ACTGCTTCTTATGTACACCTGCATTGCCGAGGCTGTATTATGTACGATGTAGTAAAACAT -TGTAGCAATAAAATTATTGGCAACCTCAAATTGAACATGCTTTGATACCTATGTTTCTAT -ATTCATGCTTTGCATACCAAGCTTTTTTTACCAGTCCTTGCATATAGAGGTACTGCTAGT -ATAAAAACCTAAACCTAACTCATCGTAGTTCCTCTCTACAGGGTCTTTCTATTATGCATT -GACAATAAATTTGACTCAGTACTATTTTCATTTTTGATAGAGCCACAGTAAAGTAAAAGT -GATTAAAGAGTGATATTTCTGCTAGTATGCTAAGTAAATAGCACGACATGGGTGGTTAAA -AGTCCGGCTGAGCCGTAAACCCCGCCCTCTCAATTGTTGATTTTTCTCTCACTACGCTAC -CTACTCAAAGATGTCAGAACCTTTTGCAGTTGATATAGCTGGAACCACCCTGACATACAT -ATCTCACCCAGGTGTTGAGCGTTCGATTGATTTTCACGACATTCTCTGCGTGTTGTCCAA -TCATGCACCTACATATAGTGTGCTGTACTTCCAGAAAATCGAACTGGATGGCTCTGAGAA -CTTTGCTTTGAAAAAGATTGATGTAAAATCCCTTCCCAAAGCGCTGTTGCCATTCGTGGT -CGAGATTCCCAGCCATCTCCGACATGAAGATGAACCACCTATCATCCAGGTTGTGGTATC -TACTGGATCAGGAACGGGGAAAGCCAAGACAGTCTTTCAAGATGTTGTACAACCTCTTCT -TACATATATTGAGCTAGAGAATTACGAGTTCTACGAAACGCAATCGGCGCAAACTATCGT -GGAGCTCGCACAATCTAAATTCCTGGAGCGGGCTCATAATGGCGTCCCTCAGACCATCAT -CCTTCTTTCTGGTGACGGGGGCCTCATCGACATTCTCGAGACATTCTACAAATCCAAAAA -GACAATTGATGTGTCTCCCAATATCGTGTTGATTCCTTGTGGGACTGGGAATGCCCTGGC -CAGTTCCATTGGATTACGATCTGGTCCGGTGTCCGGTCTTTCAACACTGCTTCGAGGCAG -CCCATCCTCCATACCAGTCTTTGCTGCCAAATTCTCACCTGGCAGTCAGTTGGTACTCGA -TGAAGGTCGTGAACGAGCTGATATTGACACCGATGCTCATCACACATTATACGGGGCTGT -TGTTGCGAGCTGGGGGCTCCATGCTGCGTTGGTAGCAGACAGCGATACAGTCGAATATCG -CAAGTTTGGAAGCGATCGTTTCAAGATGGCTGCAAATGAGCTACTTTATCCATCGGATGG -CAGTCCGCCTCATCAATTCAAAGGCAAAATAACATTAACTACCTCGAATGGCCCGAATGA -GGCACGTTGCCAAGAAGCAGTGGGTGGGCTTGAGCATATGTATGCGCTTGCTACGCTCGT -TCCACGACTTGAGAAGGAGTTCTTGATTTCTCCAGACTCAGTGTCCCTGAGTGGAAATAT -GAAGTTTATCCGGTTCGGTCCTATGTCGGCGGAAGATGCGATGCATTTGATGACTCTGGC -CTACCAGGGCGGTCGACATGTGATAGAAGATACAGTCACTTATGTCGACACTGAACAAGT -CCGTATCGATTTTCAAGAAGATGAAGAGAAATGGCGGCGCATTTGTATCGATGGAAAGAT -CGTCGCTGTCGAGAGAGATGGTTGGGTGGAAATTTATAAAGAATCTAGTCGTCTGTTGAA -CTTAATCAATTAGATAGAGATCCATTAAATTGCGATCATTATGATATATATCAGAAATAG -CCCCAGCATTTCATAGTACAACACATATAAGCGATGCGGGCTAGCAGACCTACTCTGACA -GATAAACAAGTGTCATTGTACTCCGTACACCTCACAGGGCCATCACTTACGGGGCCGTGC -TCAACATTTACTGAGCATAAAGCGGCAAGGAGCACTAAAACGGAAATGGACGCCAAGATT -TGCTTATACGGTCGCATACGGTCGAACAGGTCTATCAAGGACTAGTTCAACTGTCGATCC -ATAAAGGGAATTTAGTCATGTGTTTTCCGCCATCTCCGGAGATTGGACCCCATAAACCAT -CCGCATCCACCCCCAGCATATGGGATATGCAAATGATACCTCTCTTATCTAGCACCCTCG -ACTTGACTATAATGGAACTGCCCATTAGATTCGATGCTGATCACTGCGCGCACCTACCAA -GCCTCCCAGACTCCAGACCTAGGTGCAGCACAACTCTGCAATAAAAGATCGCGATTCTAT -ACCCTCTCCGGCTGGCAGACAAGCCCGGATACAAAGGAAGCGACCGACATGTTGTTAGTT -CATCAAGTGGGTAGTGTGCGCGTTGGGGAAGTTGTTAGGTGAGTCTGGTCCGGATTGCCC -CTCGAGTCTAAAGTCCTTTTGGCTAACAGTGAAATCGTCCGTCAGATACACAATTACATA -CACCCCGGCGGCCGATCCGATCCTGCCCATTCCTTCAAATCTCTACGTGAGGGTGAAGAA -CACATCTGCTATCCCATTACGCGCCGCATACCTCCATGGCCCCTACACGCTATACGCCGC -TTGCTATCCGTCGCAATTCGACCCCAATACCAAATATGAGCGACAGGATCTAGAAGGTAA -ACCGCAGTTCGAGCCATACCTCAAGGCCGGAGGTGGTTGGGACGCCGCCATCAAGGTGCC -CGATAACCTCCTGGAAGCTCACGACTTTGGCTCTCCGGGTCAAGGCGGACCGGCAAGTGG -CCAAAGCGTCAGCTGGATCGTTGAAATTCAATCGCAAGTGATATTCTCAAGTAGCGCAGC -AGTGCATTTCGAGCTGCTAGTGGGCCGAGATGAAAAGTCGCTGGCATATTTCTCAGGCGG -TGCATGGGGTGGGGGAAATGGCTCGACGGGGCCTCCGGCAAAGTTACAAGACCATTGGCT -GCCTGAAACTAGAGGTACTCGGGTGCTTGCCTCGAAAGGTGTGTACTCAAAAGCGATTGC -GCTTCATGTCGACGATACAGTTAGTCTATGGAGCAATCCGCCTTTCCCCTCGGCGGAGCC -AGCTTCCAAAAGCGCTGATCGGAAGAGTCAGAGCTCTCAACCTCCATGCCACAATTCAGC -GCCGGAGGACTCCCCTGAAGCTCCGATGAAAAGTGTCAAGAAGAGGCCTGTTCATCTTGT -GTTACTGACCCACGGATTGCACAGCAACCTAGGTGCAGACATGCTCTATTTGAAGGAGAG -TATTGACGCAGCCATGAGAAAGAAAAAGAACGACAGTCAAGCCAAAACTCCCCACGTTAG -CTCTGTTGGACTGGATGATGGAGAAAATCCGACAAGGTATGAAATCTTTGAGCGCTCTGG -CATCTAACAATCCTAATCCATCGTATTCTTAGCGGAAAAACCAGTCATTCGGAAGAGCAG -AACAGCCAATCTGGTAACAACCCCAACGATGACGACGAAGAGCAAGTAATCGTGAGAGGG -TTTTCTGGCAATGCTGTTCGGACTGAGCGTGGGATTCAATACCTTGGCAAACGGCTGGCC -AAATATGTCCTGCTTCTTACATATCCCGATCAGCCCTACTTTCCTCTGAAGGGCTCAAAG -TCGAACCCATTTTCTCGACCTTTTAGTGCCCACAAGGAGCGAGCTCAGCCATTTGCTCAT -TCGGCCGAATCAACTCCGCAGGAGAATGCTAAAGAATTTGGAAATGAAGACCATGCATAT -CAAATTACGAGCATTAGTTTTGTCGGCCATTCCCTCGGTGGGCTGATTCAGACTTATGCA -ATCGCATATATACAAAAACACTCGCCTCAGTTCTTTGAGAAAATCAGACCTGTCAACTTC -ATTGCTCTTGCAACCCCCTTTCTTGGTCTCAGCAATGAGAATCCAATATATGTTCGATTT -GCATTGGATTTAGGTCTAGTTGGTCGTACTGGCCAAGATCTCGGACTGAGCTGGACAGCA -CCAAAAGTTCGAAGTGGCTGGGGGGCTATTATTGCTGGAAGAGGAGAGTCTGTCAAAGAG -CCTGGCCATTCGGACCCTGGCTCCAAGCCGCTCCTACGAATTCTTCCTTGTGGTCCTGCC -CATGAGGTTCTTAAGAAATTCCAGCATCGTACGGTGTACTCGAATGTGGTAAATGATGGG -ATAGTTCCTTTGCGGACGTCGTCCCTTTTGTTTCTGGACTGGAAAGGTCTGGATCGGGTT -GAAAAGGCGAGGAGGGACAATGGGCTTGTTGGAACTATGGCCGAATGGGGGTGGGCAGAG -TTGACAGGCGCCAACTCGAAATCCCCGCGACTTCCTCGCCTCGACGAAGAGCCGCCTTTG -AGTCTTCCGGCAATGGGGACCGGTCAGCAAAATGCACATTCCGCTACCCAGGTTCCCACA -AGGCCCAAAGACAACCTTTCGGACGATCCGATCTCTCCAAGGCCGGAACAATTTCTAGCA -CAACCCAGTCGTGTGTCAAAGCAAAGAATTCCAGAGGAGGCCGTGGTAAAGGAGAACCTT -GCGGGAAATACGTCCTCCAGTCCCCTGGAAAACTTTTTCTCGTTGTTTCGCCTGAATCAG -GGCAAAAATCCCCCTAATAGCAAGAGTGCCAGGATTTACAAACGTAGTCAGACCCTGAGC -ACTTTGGAGACTGGTGACGGTGACGGCACAATACCTGATGTTCAGGGACATTCCTCGCAC -GAGCACGAAGGGGTGCATACGCCTCCAAAGACGACATTTTTCGAATCCGCGGGAGACTTG -TTGATGCCGCCTTTGCCGCCCGTGGATTTCATATTGGACCCTGCCTCGCGACCCCGGACA -ATCTTCCACGACCGCATCTACCACCCAGAGGATATACCACACCCTTTACCGGTAAAGCGA -CGCACATTAGCATTTGGCTCATCACCGGGCAAGCAATCGAAGTCGGCGCCCACTGACCAC -CCACCTGCTAGCATGGCAGACAACCAAAGTGGCCTCAAGGTCGAAGAGAAAATTGCGAGA -GCCTATCACCGTGATCTGACTTGGCGTAAGGTCCTCGTTCGACTTGAACCAGATGCCCAC -AATAACATTATTGTGCGGCGCATGTTCACCAATGCCTATGGCTGGCCTGTTGTGAAACAT -TTGGTTGATACGCATTTCGGCGACACGCCCACTGCGGAAACTGACGATTCCCTGAAGCAA -AGTGCCGAAATGGCCAAATCCCTAAATGTTGGTCCGACTAGTTCGGGTGATGAGGTTGAA -GGGCAGTCCGATTCTGTCGATCCAGATTCGGTAAGAAAAACAGACGATATGCCAATTGTC -TCCGATCTCCGTGTATCGGAAGACCTATCTTCTGTTCCAAAGGATACCTTACCAGCCGAA -GATGAATCACACATGAGCGCCAGTGAGAAAAACTATGTATCGAGGCAGGATTCAGCACGA -TGGACTGATCGTGAAGTTGTGGAGGATGACAGCGAGTCCGGCTTCGAAGTGGAAACTAGC -GCTGGTTTCCGTCACAGACAATAGCACCTGGCTTCTATTGTACATAAGTTTCCTTAGATA -TATACCCTGTAATTATGCGAATGTGATATTTCAAGTGATTATTTTTCTATCTCGTATTCA -ACTAGTCCGAGGAAATTACGGAGCAGGTAATCAGTACTTCAATTCGCCTTGAACATGGCT -CCAAAACTCCTCCACTATCTCCTTTTGTCCAGTCCCATTCGGAGAACCTCGGACGGCAAG -ATACTTTCGTCCGTCAAGACGAGCCTCCCAAACGCCCGGGTTACCATCCGCCATGATACC -CCCAGCTTCGGAGAGGATAACCCATCCCGCGCAGACATCCCAGGCCCAGCATCCACCTTC -CCAATACAGGTCCAGCGTGCCATCAGCCACAGCACACAAATTCAACGCGGCAGAGCCTAA -ACTCCGCATTGCGCGCACCATAGCACCCCCGTTTTCCTTTGACTTGCCCAGCTTCTCATA -TGTTCGAAGCTTAGTCTCCCAGTTGTTCCCGCTGCGGTCCGCACCCCATTCAATAGCAAT -GAGAGCATTGGCTAGTCCCGACAGGGGCTCGACATTGTCGCCTTTGAGCGGCAGACGAGT -CTTGCGATTGAGGAAAGCGCCTTGGCCTCGAATAGCGGAATAAAGGGTTTTGGTGGAGGG -GTTATACACCACTCCCACCGTCGGAATACGTTCAAGAGCGAACCCGAGTGAAACACAGGC -AAATGGAAAGCTGTGCACGAAGTTAATTGTTCCATCGATGGGGTCCACGATGAAGGTGGG -GGCATCGGTCAGTGGGCGTGCTGGGTCATATGTTTCCTCGCCGTGGAATCTAGTACATAT -CATAAGCATATATGTGGCGGCACGAAAGAGGAAGCACATACTCGTAATGTGGGTACTTCT -CCCTCAGAGAGGTTGAGATCATGTTTTCGACGGCACGGTCGTATTCCGTGACGAGGTCAG -TACCTACGGAGATCAGTCGTATTCTTTTTTTTTTTGCTGTCTGTATAGATACGCACTGTT -CATCTTGGACCCTGTGTCGTTAGTTCCTGGGAGTGCATTGTTGATGATATCCCCGGCCTT -GACGGCCAATGAGACTAGAAAGTCATGAATCTCGTTGAGGTTTGGGCTCTCCATTGTACA -ATTGAGATGTTGTGGTGGACTGAAGTTTTATAGCTGTATAGCTGTACATATGTAGTTTAA -TGGTCAAAGCCGGAGGAGGTCCGGAAACGCTAGAGGCGGGGCAATGACAACCTCTTTTGA -ATATAGTAACATCAACTtatatattataaatatatagatgtataCTAATTGAATATACAG -CTATGTCACTTCTAGTCTCTAATATATTAGACCTCTCTATGTATTACAAATCGCAAAGAT -CTCAGGGCTAAGTAGATATCTCCGCGTTTCCAGTGACGTCGAAAACCCCCACTTCCGCGG -GAAAAAAAATCTTTGAGATTCTGCGATTTTGAAAATTGACACACAGCCCTTCTTCCTGGC -TTATTATACTCCCAAGATGCCGACGGAACTTGAAGAGGTATGTGCCTTTGGCCTACGATA -TCGATTAGCTCACTCACATCTCTTTTAATAGCTGGTGGAATTCCTGCACCATGGCAACAC -CCAAATCAGACAAATCGGTACGACCACTCCTCTTGCTGCGCGAATATATGATCTAACAGC -TCATAGCTTGTGAAAATCTTGTCGGCTTCTCCACGGCTCAGCCTGCTTTGTTCAAGCGCC -ATCAACTCCTCCCTGTCCGCGATTTGAAGCTTCTGGTCCGCGATTACACTGTGCGTTATC -TTTGATCATAGGGCGTGACTGTCGACTGATATATTTGTACAGCCAATTGCGAAGAATGCA -TTGACCATCTTGATCAACTTGTCTAGCGATGAAGAGGTTTTGAAACTTCTCGCGGAGGAT -GAGTTCATCGAGGACTTGCTTGCGAAATTGACCGTGAGTTTGTCTCAGATAACATATCTG -TTAACAGTAATGGAGCTAATTAGTTTGATTTCAGAATGTCGAGGAGCCCAACGCCGATGA -AGTGGCCATGCTTTTGGCCAATCTCGTCAAAAGCGAAAACCTACAGAAGCTGCTCACTCT -CAAGCGAAAAGCCCCCGAATCCGTATCTACCTCCCAGAACGCGATCGATCAACTGATGGA -CTGCTTCGTAAAGGGCGCTGAGGGTGCGCTTAACAAGCGTGCCAATTATGATTACCTATC -ATACGTGTTTGCCGACCTGTCTCAGACAGAAAAGGGCAGAGCCTATTTCACCCAACGTCA -AGAGTACGATGACGTGGTCCCTATCACCAAGCTGACGGTGTTCACGGAGCACAAGAGCGA -TATTCGGAGGAAGGGCGTTGCATCCACAATCAAGAACGTGGCTTTTGAAGTCGATGCGCA -CCCAATGCTCTTTGACGAGGACGGGGCCAACCTTCTGCCTTACCTGTTACTCCCGCTTGC -AGGTCCAGAGGAGCTGTCGGATGAAGATACTGCGGACATGCTGCCAGATCTTCAGCTCCT -GCCGCCCGATAAGCAACGAGACAGCGACCCCACCATTATCACGACCCACCTCGAAACATT -GCTTCTCTTGACCACAACTCGGGATGGCCGGGATAAGATGCGAGCAGTCAAGGTCTACCC -TTTGATCCGTGAGACTCATATGCATGTTGAAGACGAGAATGTGACGGAGGCATGCGACCG -GTTAGTGCAGGTTCTGATGCGAGATGAGGAAGGTGAAGGCGAAGGAGAGCCCGAACAACC -TAAAGTTGAGGCGCCTAAAAACGAGGATGAAGACGTTGTAGAACTGTTTTAATGATTAGA -TACCAAATCAATTGTACATATGGACAAATTATACAACACGGTATATTGTATGGAGTACAC -CGTAGTTGAACTTGTATGTACGTATCATTGTCATTGGAAGGGGAAGATCAATCAGCCGAT -GTCGGGTGGGGCAGCCCCGGAGAGGGACCGGTCGTTTTTCACCACACTACGACCTTAGTC -CTCCACTAGCTCTGTGATTCTCTCTTCCGATTCATCAATGCCAAGATTGATCCTGAGCTA -CTGCATGCCCTAACCCGAAATACTCAGAGTATTTCATCATCCTCCCAACATCACGTCGAG -CAATATTCCACTGCGGACGCGGAAGCTGTCTTGTCCCTCGGCCTCGAGGCGCAAGTGGCT -TCCCGCTGAATCGGATCAAACTGATTCCAATTTCGATGATTGTGCGAGCGACAATGGAAA -TTACCCCGAAATCGACAATCTACCACCCGACGTGAACCCCGACACTGGGATTGTTCAGCG -CTTTCGGGCCTTCATGTCGCGCGCCCCTCTTCTTCCTTCGACGCCCGCCCTGGGTGCTTC -CTCTTACGGAGCATTGCAAAGTCCAGATGATTCCGAAGATGAAATTTCCGTTTCGCGTCG -TCAGAACAATCGTAGGGGTTTCCAACGCGCGAGCTTCGAAAATGGCGTTGGAAGCTCTTC -CAATATACCGGGGTCTACCCCAAGACGATCACAGTCCTCGGTCAGACGACGAAACAGCGT -GTACGCAGAGATAGCTGATCGTCGGCCTTCATCCGCCACGTCCGATGTGGGATTGGGTCC -TGATTCGAAATACTCTTTTGCCACAGGATTGGCTGTACTGGGCAACCCGGTGATGCAGGA -GACACCGGCACCTTCGCCATATATGACAAGTGATGATGAAAATTCCCTGGATATtgatga -tgatgatgatgatTCAAAGTCCTCGGGTGAGGATCCCCCAGACAATTCGCCGTATGCCCA -AGTACGAGCTTCGGTTCCGGCGACGGATGATATCACTCTTTCTATTAACAGTAAGCTTTC -TTTCTGAATATGTATTTCTCAATACCTAATGGCTTCTAGCACCACGGATGTGGTTTCTCT -CTTTGTTATTCTCTTTGACGGGGTCCGCCGCCAATCTTTTCTTTTCCTTGCGCTATCCCA -GTGTTTCCATTACACCAATCATTGCTCTTGTTTTAGTTCACCCACTGGGCAAGTTTTGGG -ATGTGCTTTTGAAACAAACAGGAGACCCTCTCGAGATCTTTGAGAACGGAACCCTTCACC -ACCGGGAATCGCTCTCTGGTGAGATCGAGGCCCCTCCTTTTTCATTGGCGTCCCGGGTCA -GACTTTGGCTTGCACAGGGCCGGTGGAATGAGAAGGAGCATGCTTGTGTCTATATTAGTA -GCAATGTTTCCTTCGGGTTTGCTTTCGCAACTGATGTTCGTACACCGCCAAAATGTTTGT -GACGAAGCTAACGTTGAAAGGTTATTGTCGAGCAACACAAATTTTATCATCAGGATGTCC -CAATTATATATCAGCTACTACTCATCATATCCACTCAGGTTTTGGGGTACGCATTCGCGG -GTCTCACTCGACGATTCCTCGTGCGGCCATCGGCTATGATCTGGCCGGGAACCCTGATGT -CCACTGCAATGTTCTCAACAATGCACAAGTCTGTCAATAAGAAGGCAAACGGATGGAATA -TCTCCCGATATAAATTCTTCGTCGTTGTGTGGGCAGGGGCTTTCCTCTGGTATTTTGTTC -CAGGATTGTTGATGCCTGCTCTAAGTTATTTCAATGTGATCACTTGGCTGGCACCCAAAA -ACGTTGTGATCTCGAACTTGGTAAGTTACTCGGGTCGTTTCCTGCTGCTTTGACTAAATT -TCCTAGTTTGGCGTTGCCTCTGGTCTCGGGATGTTCCCCTTGACCTTTGATTGGGCTCAA -ATTGCCTATATTGGGTCCCCGTTACTCACGCCTTGGTGGGCTGCAGCTAATATTGTGACG -GGACTGGTGGTCGTCATCTGGGTAGTCGCCCCAATACTTTGTAAGATTTTCCCATCTGAT -CTCATTTGACTCTGCTGATCAAACCAAGATTATAAAAATGTTCTTTTCTCTGCATATATG -CCCATAGTTTCAACAGCGGTGTTCGACAATACCGGGCGGCCATACGACGTTAGCCGAATC -TTGACCAAGGACTTTTTGTTCGATGAAAAGGCCTATCAAGATTACTCCTCGGTCTATCTC -CCAATCACCTATGTGTTGTCCTACGGTGTTCAGTTTGCTGCTTTGACATCCCTTGTCACT -CACACCATTTGCTGGTATGGGAAAGATATCTGGCATCAGACCAGAAAAGCCTTTGAGGAA -AGACGAGAAGTGCCAGACGCGGAAACGTACCAGCCCCTTCGTGGAAGTAATGATACGGTT -CGACAGAGTCATGATATTCCAAGGACCAGCTCTCATGAACCGAGCCAGGAGATTCCAGTG -GGGGGGGATGATGTTCATTGCCGCTTGATGAGGCGCTACAAGGACGCACCCCTCACGTGG -TATCTTGTCGTGTTCATTTCCATGCTCGCCACAGCCATCTTCACGGTGGAATAGTAAGTT -ACCCCCTCTTTTTGATCTTTGATAGTAGCTGACTTTAGCCCCAAAGCTATCCAACACACC -TACCTTGGTATGGGCTACTCTTGGCTCTTGGTATTACTTGTGTGTTTTTCATCCCAGTGG -GCATCATCATGGCCGTCACCAATCAGCATAGCAGTCTTTATCTGATCTGCCAACTTCTTT -GCGGTATTGTATTCCCAGGAAGGCCTGTAGCGAACATGATCTTCGTGACCTATTCATATA -TCAGCTCTGCGCAAGGGATCAAATTTTCGTCGGACCTCAAACTCGGCCATTACATGAAGA -TCCCACCACGGATTCTGTTTAGCGTCCAGATGATGGCAACTTTGGTCTCGAGTCTGACCC -AAATTGGTGTGCTGAATTGGATGTTCACTTTTGTCCCGGGACTTTGCACACCGGAGGCCA -TCAACGGTTTCAATTGCCCAATTGCTCGAGTCCATTTCAACGGCAGCATACTCTGGGGAG -TCGTTGGGCCCCAGCGCTTCTTTGGCCCAGGGGGACTCTATCGTCCGCTTGTCTGGGCCT -TTTTGATGGGAGCCGTGGCGCCCCTTGGAGCATGGCTTCTAGGGAGACATAGCAAGAAAA -GCTTTTGGCGTATGGTCAACTTCCCTATCTTGTTTGGCAGTCTGAGCTGGATTCCTCCTG -CAACCGGGCTCAATTTTTCTATCTGGGCCCTTGTATGCTTTGTGTTCAATTATGTGATCC -GGCGAAGGAGAACTGCATGGTGGGAAAAATATGCAATGACGTTATCAGCTGCATTGGATT -CTGGCTTAGCATTTGCAGTGGTCGTTGTCTTCTTCGCCTTCATATATCCTGGCTGGGTCG -ATGGTTTCAAATGGTGGGGGACCGAGATTTATAAACAAGTAAGTCCTTACAACTATCTAG -TAGTTTGATATCCGCTAACTGTCTCAAGGGCTGTGATTGGATTGCCTGTCCATACAAACC -ACTGGAACCAGGTCAACGATTCGGTCAGTAAGTTTTGTCTCGATTGAAGCAGAAATTTCA -GCGAAAAAAATACAAACCCTATCAGTAAATATACTACAAGTAGTACCTAGCGAAATAGAA -AGTCAAGAACGGCTGGCCTTTCTTCTTTGAAAACTCTGGACTTGGATAATCTGACTTAAG -TAGTTGGCTCGAGAGGATTTTGAGAAGCGTTACAATTTGAATTCTATTTTCCTCCAAATC -GACCGAGAGCCATAATAGTCGTGAAAGCTCACCCATTTGTATTCTATATACTCGTTTATC -GCGGCCGAAAAAATCTTACCAGGGCTGAACCATATACCATCCATTAGTTTAACCTGAAGT -CTGACTAACTGATAGGACATGCCCTGAGACTACTGAAAGTAACCTAAACGCCCTATCCTC -ATCTCGGGTCTTTAGAGCCAACCCCCACTATACCAAATACCTCTCTAGACTGCTCGCACT -CTCAGCTTTCCACCGTAAATCAAGATTCACTTTTTGTATTTGCCTTAGACTGGAGATGTG -GGGTGAGCTAGGCCGAGGTTTTGGTAAAGGTCCAGAGCGGCGGTTACAATCCCAAGTCCC -AGAGTCGCTACCTCTTCACAAGTCACGCGTTAGGGGATGCTGTATTGAAAGCCCCCTTCA -GCCACAATGTATTTGAAAAAGGCTTCTCTTTTCAGGATGAACGGTGTTGGGCCCATGAAC -AAAGCCACAAACTCGGCCACCTTTCTTGAAGGTGTTCGTGATGTCTTTGCCGGCATATTC -CACGGTGCCAGCGAAAATGCATCCGACTAGCACAACCAGTGGCCCAAGGAAGTTAGTATG -CTTCCAGTTGTTAAACCCACGCTGAAATTATTGATCAGAATGCAGTCATCACGCAGTGTG -GGAATCCCTCGGTTGCTCACCGGTTGCGTGTGTTTGGGTGAATAACGATGATTGCGTTCT -ATAATGTCATACTGGTGACGCGGGTATGAAAGGATATCTAACTGAAGGAGCTTTGGAATA -TGTTATTTTATGATGGACAGACTGCTCAATCAGTTGTTCCCGGGTATGACGGGACCAGAT -CTATGAATTCCAAAATAACCCACTTCCAAGGTCAGGGTTCACGGTTGTCCAATGTAAAAC -AACAATTATGCATAGACTGTTTTGCATGCTCTATATCTTTGGAGGTCAGTTGACCCAAGC -CCCAAATTTTTGGCAGGTTCTCAGGGTAGGTACAGGGTACGAATGTATGTACATACTGTA -GGGTGGTCTTTAAAAGAGCCTCCTGACGCACAGACCACACAAACCGGAGATCTCCCCGAA -GCACGTGATCATGTCACCGGGACGGGTAAACATTTGCTCCGAGGTCAACCGTCATACCTG -CGGGTCGCTACCTCCAGCTCTTCCAAAGTAACTTTCCCATCTCTCACTCTGCTTCAAACC -TGCCCTTTCTTGTCGCAACACAAGATAGGCAAAAGGTTAGCTTGTTTCCATATTTTCACC -CCTGGCGCGATCCGCCGCGTCGCCTTCTACCACTCGTACCAACTGTGCTTTGGACATCGG -CAACATGTCGACACCAACAAATCCCTCAAATGCTCGCACTCCAACCGGGCCTAATGGCGC -ACCTTTGATGAGAATGCGTCGTCCAAAAGCAGCGGATCCTTTGGTCCGACCAAAAAGAAG -GCCTGTCCCCAAACCAGCTGGCGCCACTCCCGCCGGCAATGGAACAGCCACTAAGACTCT -TCCCTCACGGCCACAAACGTCCAACCCCCAGACTATCTTCCAGTCTGACCGACCTATGCT -TGAGCTTTCAAACAATAACATGTCTGCGAATGGGTTTAGCGGTCCTTTGCTCAGCGATAA -ATACTTCGATTATCCGGTAGTGACGACAAAGCGAGCTCTGATGGAGGGACTAAAGCATCA -CATCGCGCGATTTGCTTCGAAGAAGAGCGTCGATCCACGTGATGAATCACAATTCACACG -ACCAGTCCGACTTCACCGCCGCGATCCGCGTTCACGAACTCATGATCCAAATTCGGGACG -GTCCGAGATCGATGGCCAGCCTATGGATGAAGCTGAACGCGAAGCTTTTGATGCCCGAAA -GGCAGCACGTGAAAAAGAGCGAGCAGAAAACCTGTCCCAAATCGCCCCCGCCCTTGGCTC -AACCTCGAAACGACCAAATGCCCCGAAGCAGAAGACGCAGCAGGTTTTCAAGTCGGATAT -GACACCTGAAGAGATTGCAAGAGCACGTATCAAGTACGAGGAGGCCTTGCCGTGGCACCT -GGAAGATTTTGATAACCAGCATACCTGGGTTGGAAACTACGAAGCAGCCCTGTCGGAAAC -ACATGCAGTATTTATTCTCGACAACGGGAAAATGAGAATGATCCCGGTGGAAAAGTGGTA -CAAGTTCAACGCTAAGAGTAACTTCAAAGCTTTGACTATTGACGAGGCAGAGAAATTCAT -GGCCAAGAGAGTGAAAGACCCTCGATGGTTCATGGAAAAGCAACAACAGTTGGAGCAAGA -AAAAGAATTGGAAACATATGCTAAGCAGCGCAAAGTCTATGCTGGAAAACAGGGCACAGC -TGCCAAGGGCGCCGGTTTGGAGGCTGGTGAAATGGATTTCGAAGAGGATCGATTTGCCGA -CGACGAAGAGCACGATGACCTCTTCAATGAGGATGAAGATGCGAAGGATGCCGAAAAGAG -AATCAAAGAGGACCAGCTCAAGGCGAATGTGTTCGATCTCAAGGACGAGAAGGAGTATGA -TGCTGAAGAAATGCGAGAGAAACGTGAGCGAGAGGCCCGGCGTGTCTTGGGCAAGGGTGT -GCGTAAAGCTCTGAAGAAGAGAGAACGGAACTTTGATTACAGCAGCGGTTCGGACGTCAA -TCCGTACACGGACGAGGAGGTAAATTTTCATCCATCTTTGGCAGCAAACGCCAAACTAAT -GTATACAGAGCTCCGATGACAGCGAAACCGAGCGGGCCAAGGAGGAGGAACGGAAGAAGG -CAGAAGAAGAAAAGGCCCAGAAAGAAGCGAACTCCTCCAAGGGAACAAATACTCCCTCCG -GTCGACCCAAGCATACAGATGCCCTTAAGAAACCGTCTCGGAAAAGGCTTGGATCACCAA -ACGCCTCCGATGCGAGCGGCACCGATACATCGCGAAAGAAGGCCAAAGGCATGCATCTCT -CTACGTCTCAACCTCCGTCGCGTCCGATCTCTCCTTCAGCTTTGCAGGTATGTATAATTA -TAACCCTTTTGTCAATACATACTGATTGACTCAGACTGGCAAGAAACGCGTGCGAAACAT -CCCTGGTGGCGCTGGATCAGGCAGTGATGTCGACACTGTCGGATCTGGAGCTGAAATGAC -CGAGGGTGGCAAGATCAAGAAACTGAAACTCAACCCTGTGGCTTCTCGTGGTGGCACCCC -ACAAGGGTCCCGCGCCGCAAGTCCTCTGCCTCGACTCCCCGGAAGCCGTGCAAACAGCCC -TGACGTCCCCAGAGGTAAGTTTCGGTCCAAATCTATTCTTGGTATGAGACATTAACTAAC -CATCTGCAGGTCAACGTGTGTCAACCCCAATTTCTGGAAACCAGACGTTCCCCACGGCCG -GCGAAATTCACGCAGCCATTCCAGCATCAGGCATCCTCAGCAGCGATCTCCTGAAGGTTT -TCCGTCCCCGTATTGGCGAGTCTAAGGAGAACCATCGGAGATTCATTGCCATTGTCAAGG -ATGTCAGTGTTTACGGAAAAGAGGATCGTATGCTGCGACCAGGTCCCTGGAAGGGGAATT -AAGACTTGTGTTAAGTGATTCCAAGCTAAAATCAAATGTCGAACACTTTCCATATTTTTA -CTTTTATTTTCTATCTGCCAATGTCCAGCAGGCACTTTGTGACTGGGAATGCTTCAACGA -ATATTTTCCGCGGGTATTTCATGCTGGCTGTTCATAGTTCATGGGAAGATCACCTATCTA -ATATCCAGTGCAAAGCAACTTTTGCATAAAATTTGAACATTACAATCCATATTTGGTATC -TTCCGAGGGTTAAAATGAGATAAGCGTTTTTTAGGGTCGTACAGGTAGAACCTTATACGG -ACGTCGCACTTTCCTGTAGACTTCTAGGTACTTCCACTTTCTTTTGATTTGAACTCTTAG -AGAGGGCGTAAGCCTGGAGGCTGTGAATTACACTATCATTCTCCCAGTCTCCCCACTGCT -GGAGCCTATGCTCGGTCTTGGCAATGGCACGATGCATCTACCTCCGAAATGTTATCTAAT -AACATTTTGTGTGAAAAAAAACCCCCCTGTAGCATGACTCCATCTAACGTGCAACTCGGT -TGAACCCTTCTGGTTCTCATGTATATAAAATGCTCGATTTCTCTATGTGCCAAAACGTGA -TTGAAAGATATATTACATTGTACGGAGCATCTGAGATCAACGGCCTTCCGATGTCAGGAG -TCGTACGGGTCTCCAATACTCCGTGATATAATAGAGATCCATTGCCTCAGGCTCGGTGAT -TATGCGGATGGCACTGGCAACATGCCATCAACTTGAGCAGCCCCCTCAATTGGACATACC -CACTTCACTCGTCTCTCGGCATAGATCTCGACCTGAGGCTTCTGTTCGTTTAAAATCTCA -AGGTCATCAAATATTCCTGCCCGGACAACTGTGACCTCGTTGGGTTCTCCATTGGACTCT -ATCTTCCGTCCAAAAAGCGGCGTGCCTATCACTTTCGTCAGATGCTTCCTCTCGAATTCA -GTGGAATACATACCACAGTCAGGACAGAAATAGTTCTTAATACTGTTTCCACTATCCGAT -GTTTTTGATAGTTCTTTCGGACTTCCGGAGATCTGCAATTCGGCGGTTTTGACGAGAATA -TTGCACGAATAGAGTGATCCGGTAATCTTGCGACAATCGAGACAGTGACAGATTGCCTAG -GGAGTAAGATTAGATTTGTTAAGACTTGAATACTGTCAGTTGTACTCACCGACGTTATAG -GTTGGCCGTTATGTTCGACTCGGATCTTGCCACAGAAACAACTGCCAACTGCCATTTTTA -CAATATCTGAAGAGTGAATCACGATGTTGAGTAAACACAGAAATCGGAAGCTAGGTAGAT -CCTGACTTGTTAATAAGCACTAGGCTCAAAAAAAACATTGGGGAGATCTGGTGAGATGGG -GGCCAGGACCACAGTACCAACTATGTGCGATAGAAGGGCACAGGGAAAATGTTTGTTGAG -GATGGATTATACAGACCTGCCACTACGTATTTACCCACCATTTCCCACCAAAACCGACCA -ACTTTGAATTCTCTCAATATATTTATATATTAATCATATTTAACAAAAGTTTCAATGGGA -GATTAATATAAGATTAATCAAATTCTTCTTTTCGGATGATTAAATAGGTGTGGCTGTCAA -ATAGTCACTGATTGAAATGTTAATGCTCGTTGGAACGTCCATTATATATATATATACATG -GGTGTCCATATTAAACTCGCGATCTATATCTTTTGTCAATGGTTGCAAATATACAAGTCC -CTATCTAAAATACTTACCAAGATAGTTTGCATACATTTACTCTTTGTGCGCAATTTCGAA -TGACTTCAAAATAATTAAAAACCATGGTACAATATGAATTTACAAAAACAAGTCGGTTCA -AAGTTGGTTAGTTTTGGTGGGAAAATGGCGAGTGGGTAAATACTTAACAGTATTCCACCG -TACAATGGATATACGTAGCTCTTTGTTTTAGCCCTTTGTTTTATGAAGATAATTCGAAAG -AAAAAAGAACTAATAAATGTCATCACTCCCATTATAAACACAACGAACCAGGAGACGAAG -CTGGTTCGAAATCTAGAAAAAAATATATCACAATCATTACGTCTCAACCCAAGAGAACCT -TTAGCCTCTAGGATCTATATGCGAACAGAAACTGAAAATGCACCTCTAAAACTGCGAGAC -CGCAACAGCAACAGCCTGTGAGTCACTGTGGCTGATGCTAACGCTGACCTGCTTGACACC -CGCCTGCTTAGCAGCGGCAGCGGCGGCACCATGAAGCTAGAGATACGTTAGCGTTTTGTC -ACTAGGAATCATGGAATAATCAGACACTTACGTTCACCACTGGAGCGCCGTTGGCATCAA -CTCCGATCTCAATATCCTTCAGCGCAGCACCGGCACCCTTGCCGCTGACACCCAATGACT -TGAAGACAGCTTCCTTGGCACTCCATCGGCCTGCGAACGAGGACTGTGGGGAGGCAGCCT -TGCGGCAGTAGTCTTGTTCACTCTGTGTGAAGTTGCGCTCAACGAAAGTCTCGTTCTCAA -TGTTGATGGAATCAATGTGCTCGACGTCAACACCGACCTTGGAGTTCTCGGCGGCGTTGG -CCTTGGCAAGGGACTCGAGGGTCTTGCGAGTGCTCTCGGACTGCTTGGCTTCAGGCGCAG -TGTTTGAGTACTTGAGCTCAGAGGTCTTCTTGTCGACAGACACACGGTAGTCGGGGTTTA -GGAAGATCTTGCTCTGCTGATCATCCTCATAAGGAGCCTTGTTCTTGGCAACGAAGATGC -TGTTGTTGATCAGACCGTTGTGGAAGTAGCGGTAGGCCTTCTTCTGGCGACCCTCGACCT -TGGTCTTGTAGGCTTGGTACTGAGCCTGGTCAAGAGTGGCATACAGGTACTTGGGGTGGA -TACCAATTACCTGAGCACCCTTCTGACCGAAACCGAAGGAGGTGACGGAGAACGCCTTGA -CACCGTCGGTCTGGATGCTGCGACTGGGGTAGACAATGTAGTCGAACTTCTCCATGACCT -TGTCAACATTGTCAGCGTTGCGGTTGCCTGGCACGAGACCACTCTCCAGGACCTGCAAGC -AGCCGTTGAACATCCAGGCACCGGCGGCACCCTTGGGGTGACCAGTGAGGTACTTCTGGA -AGATACCCATCACTGCATTACCCTTGGAGCGTCCAAGGTGCTTCATCTGTTGGCAAATGA -CATCGGACTCGTTCTTGTCGTTTGCGACAGTAGAGGTACCGTGGAAGGATGCAACATCAA -TGTCGTCAACAGTGAGACCCCAAGTGGCCATTGCACCACGGAGGGGAGCAATGCGCGAAT -CCTGCTTCCAGAAGTTGTTGCCCAGGCTGTATTGGGCATCCTTCTCCTGGCGAATGGCTT -CGCGCTCAATGTGCTGAGCACGCTCCTGCATGTACTCAGCCTCGTTAAAGGTCTCGTCAC -TCTGAGCCTTCATGGCATCAGCCTCCTCCTGGAGGTACAGAAGCTCAGCTTCTTGCCACT -CGTTGATCTGCTTCTTGCGGAGATCCAATTGACGGCGACGGTACTTGATGTCCAACAGAG -GGGACGGGAACTTGCCAGGGTTCTCGCGGGCGGTGGTCAGGACACCCTGACCAGGAGCAG -GAACGGACCGTCCAATCTTGTCAGTGGCAGTGGTGGTCAGAGCAATGATACCGTGGATGG -GCACTCCCATGTCGAGAGCAAGTTGTGCAGTCATGATGAGCTGCATACCGCAACCCTGGG -ACTCCATGAAGCCAGCGCGAGTGGTGGTAGTGGGTCGTGACATCTCTTGCGGGGTGCGAC -CGTGGGCGAATTCATCTTCAGCGTTGCTAGTAGCCTTCATGTTGGCGAACTCATAAGAGC -CCTCCTCCTGGAAGTCATCGAAACCACCAACGAAGCAAACACGGGCCTTGCCCTCGACAA -TGGTCTCGTAACCGATATCCACAGATTCAACAGCCGTGGCGCAAGCACCGACGGGAGTCT -TGATAGGACCGGTAGAGGACAACAGCAGCATGTTGACCCAGGCGGACATAGTGTTGATGA -AGGACTCCTGCAAGATATCCTTTTGCAGCGGCTTGTCGAGGAAGCGATCCTTGTACATAC -CACGCAAAGCGTGGGTACCACCGATACCCGAGCCAATACAGTTGCCAACTTCAGACAGGT -GAACATACTTGTAGAACTCGTAAGGGTCGGTAATACCGGCTGAAAGCATAGACTCAGCAG -TGCAGACCAGCACAAAGAGTGTGACAGGATCAACCTGGGAGATAATATCGTCGGGGATAC -CATAGCGGCTGGCGTCCCAGCCTGTGGGCACCTGACCGGCGACAAGACGATCGAACTGGA -GGGCCTTGGGGATGAGCATAGTGGCGCCCTTGCAAAGACGGACGGTGTACTCTCCAGACT -CAGGGATCTCGAAGATCTCAACCTTGTCTCCATGCTCACGCTTGAATTCTTCAGCGGTCT -CCTTGGAAGCCTCGAAAGGTTCGAGGTCCTCCTGGATGACAATTTCTTGAAGAAGCTGCT -TCTTCTTAGGATCGTATCCCTTGAACAACTCGGGCTCGATCAGACGGATACCAGTGTGCT -CCAAGATGTGCTTCTCGTACTTGGGCTTGACATCCTTATCATCAACGGGCTCGCCGGTCT -TGGCATCAACCCAGCCAGAGTAGGCCTGGCCCTTGAGGGGACCGTTGTGGTGCTTAATCA -GACCCATGATCCATGCCATCTCAACACAGCCCTCCAATGAGAACTTGCCCTTAGATTCCA -TCTCCCATCGGGTACGGGAGTTACCCCAAGGGCCGACTTCAGCGAAACCAGTGACAACGA -CCACCTTGTCCAGGTTGACCATGCCCTTGAGATTCTCGTTGAGCGGCTTGACTTCCTTCT -CCCAGTCGGGCAGGTTAGGGAATTCAAACTTGATGTTGGCACGGGGCTCAGCAACGACCT -TCTTATAAAGAACTCCACTGTCTTCGCCGTTAACGATCTTGTGCTCGATGGCGGTTTCCT -TGATGACGGCCTGGCGGACATCGCTGGTCTCCATGATGTCGGTGCGAAGCTTGGTCATCA -GGCCCTTCAGGTCTGGAATGAATTGCAGACCACCGTTAAGATCAGCAAAGACTGGATCGA -GCTGGCAAAGGTTGACGATGGCCGGAGACATCAGTCCGAGGAGGTTGAAAGCCATCTCCT -GCTGAGAGAAAGTACGGACGCCCAGCTTCTCGACGCCTTCGGCGACCATATTGTTGCCGC -TCATCAGACCAGTACCACGGGTCCAGCCAATGACCGCACCACAGATGGTAAGGTAGTGGC -CCCAGTTCTCCGAGTACCAACGGTTGAACAGAGTCTCCAAAGCAAGCTTAGACTCAGAGT -ACAAGCCATCATTACCAAATGTGCCGTGGTTTGGTGACAGAGGCAAGATAACTTGAGCGG -GGCGAGTTTCGAAGCCATGAGCTTGTTTTTGGGCCTTGACCGAGCCCAAAAGACGAAGCA -GGTTGGTCAACATAATACGGTGAGCCAGCTCGGACTTGGAATCGATAGAATCAATCTCGC -GGCCGTTCTCAGGGATGGCAGCAAAGGGAACGACAAAGTCCAAGTCCCAGCCAAGACCCT -TCTTGCTGTCGTAGATGTACTCGACCAGAGCCTCAACATCCTGCTTGCTGCCTTGGTTGA -ACGGCACGACCACAAGCTGGGAGCCACGAGCACCGTAGCGGGCGTACATGGCCTGGTAGT -ACTCGGTAACCTCGCGCGAGAAGCGACTAGTAGTGACAATAACCTGAGCACCACCGCTGA -TCAGACCTTGGAGGACCTCGGCACCAATAGAACCAGCACCAGCACCGGTCATCAGTACGT -TCTTGCCCTGGAAGGTGAGGCCATCCTTGGCAGCCGACTCCAACACATTCAGATAGACGC -CAGTCAGTTTCTTATTGTAGTCCCAGCCGTGTTCAGTCTTCTTCTTCAGGTGCAGGAATG -GAATGGTCTCCACCTTGCCAGGGCGAGTGGGAGTAGTGCGTTTGGCAGCACTACGGCCGC -CCTTCTTGGAGTGGCCGTTCTCCTGGGGGATAATTTGGCTCTCGTTCATGCCGAGAGCGT -GAACAACATCCTTGTACAGTGCATCAAACTGGAGCTGGGACGACTTGGAAAGACGATGCT -GCTTGCGAATGAGCTTGTAAACACTCTTGAGGTCGTTTTGAACCTTGGTGCGGTTACTGT -ACTCGGAGATAGGGCCTCCCTCAGCCATGTGCTTAACGTAGTGCTCAAGCTTGCGAGCAC -TGGTGCGAGGAGCTTCCTTGTAGCCAATGCTGCCACGGGCATCAATGGTCGTTTGGGGGC -CTGTGGGCACAGCGACATCCTTGTAAACTGGAGCTACCTCCAACACCTCGCGACAGTTTT -CAATGAGCTGCTGACCAAGTTCCTTGGCCAGCTGGTAGGTCTCTCCGCGCTCAGTCGGGC -AGTTGTCGATGTGATACTGCATAAAGTCAAGGAGGAGGGGGTTGGAGCGGTTCATAATGC -GAATACACTGGCTGACAATTTCGCGGTCAACCACCTGCAGGCGACCAAAGATAATATCGT -AATACATGCTGAGGGCATCTTGACGAGCCCAGTTCCACGAGGAGTCGTAGATACGGGCCT -TCAGCTGATCGAATGATGGCTCAATGCCCGAGGCGTAGAAGTCACCGTGCTCTGCTTGCC -ACAAGTCCAGCTGAGCCTGCAACGCTTTCTGAGTCTCTTGGGATGTCACATAGGCCTTCT -CGCCACCACGGAGGTCCATTTTGAGGTAGCGGGCAATGATCTCAAGCTGCTGCTTGAACA -GAGCTCGTTGATCCTTGGTCAGGGCATCAATGGCTGCGGGGTCCATGACCATgccaccgc -cgccaccgccgccgtcaccaccgGCAACCGGAGCTGAAAGGTTTACGCCGGCACTGGCGG -CATATTTGTTGGCTACATCGTCGAGATAGACCTTAGCATCCGCTTCAGATCCAAGACGAG -CAGCGGGCTCCATGGTGAGAGCAAGCAGGAGCACACCATCCTGGCGGCCGGATCCCAAGC -CCCATCGAGTCTCCAGGTATTTTCTGACAGAAGTAATGTTGAAACCACCGGGCATTTTGG -AGGAGACCATTCTGGCGATGAGTGAGGAAGACTGCTTGCCAAGTTGGCCGTTGAATGTAG -CTTGCATCGAGGCACCTAACTCATCGAGAGGGACGTCCTCGGGCTTTTCGGGAGTGGAAC -CGAATTCCTTACCCAGGTCACCTAGGATTTCATTCTGTAATGTGGACTTGCCTATTCGCG -TGTTAGACCAGACCATTTGTTTCTTGAACTCGGCTGTACTTACCTCCGACAAGATCCTTA -ATGGCTTTTGACAATGGCACATCCGCGAGGCTCTTCTTTAGCTTCTGAGCAACTAGTGTC -CGCAGGATATCGACTGCTGTGACGGGAACATCTTCAACTGAGGCAGCAGGGCCTGCACTG -GGGGGTGGCGGAGGagctccagctgcaggagcagcagctggtgccgcaggagcagctgaa -gTGGGCTCTATAGCCTCGGGTTCTTCCTCCACGGGGTCGACATCGTAGTAGATCTCCTTG -GCATCCTTATTATAGCAGAGAATCTGGCGTTGCACGGAAGTGGCAGCATCGTAGGCCTCG -TACTTAGAGGCTAGTGTCCGCCGGGCCATTCCTCCAAGAGTATCTGCGGGACCAATCTCG -ACGATGCGCTCGGTGCGCTGTTCGGCGAGGATCACATCTTGGGTCTCGATCCATCGGACT -GGGGAGGCGAATTGGTAGGCAAGCAGCTCCACCAACAACGTGTGGGCGAGCTCTTGTTCA -ACTTCCGGACGCATGATGGAAGTTAAGGGTATCAATTGATTAAAATGAGATTAGGACAAA -TCTCCGGGTTTGCGGGCGCGACAAGAGTCGTCGGAGAGAGCCCAACCGAAGGAAAGAGGG -AGGAGAGGAGGATGATGTGAAGAAAAGTAATAGGGGAGAATTAGAATAGGAGTCAATCCT -ATGAAAATGTATCAGTGAAAGTGCCCCAAGACAAAAAAAAGATGGTCCCAGAGAGAAGAA -GGGAAAAAAAAGGTGGGAAAGGAAATGGAGTACGGAGATAGATGTGGTATCAATTAATAC -CGGATGGTAATTTTCCCAGTACATTAATTAAATAAAATATGATAAAGCAACCACTCACCC -TAAAGACACCGATGGAATTGGAAAGACGAGATTGATGAATGAGAAGAATAATAATTCTGA -TGATCCCACAGAAATTCTTCGGTCATTAACTGCGAGATTCGGAGGAATGGGGAGGGACAA -GAACAAAATTGGTGAGGGCGTTGGGGTTCCAGAGCTTCGGCCGGAGGATCTGACACTAAG -CAACGCTATGCGGCATTGCGCGGATCACCAGCTAGCTGGGTTTCAGGGGGTGTATAAACT -AGCTTGTAGAAATCCCAACGACTAACATGCGGCTGAGATGGATTATAGATATGGATGGTC -TACATTTATATACATTGATACTTCAATCATAACCACTACCACATTATGTACCACCAAAGT -ACAAAGCAAGATAATTTCCAAGGGTCAAATTACAATACCTGGGTAACACTGTGTAATTCT -CAACGGGTTCAACCCAGTCAACAACTCCATTGGGGCGGGTCTAGTATCGCGCTAAACCTC -TTTCAGCTGTCTGCAGTTCAACCAGCGGACCTTTTTTTTCTTCCCTATTGCTTTCCCCTT -CAGGCTCAATTGTGCTCTGGACCTCTTTCCCTATTTTATATGTATATCTGTTGAAATTGT -GCCCCTTTACTTCTCTTTATTCCTTTGATAATACCCCCTTAACTTCACGGCCTAAAGGTC -TTCCAGTGAGTCGGCTTCTCCAGACGTTACATGTCGCTTTCTAACAGAGACAGGAAACCC -CTCCCTCTTCAAATTTTAGGGTCCCTCTGGTCTTCACTCTCTTTATCTCCTTCTATTACT -TTTCACAGAGAGAGAGAGCCACATCATGTACGGCACATTGACCGGTCCCCAAACCGGTAT -TAACACCCCTCGATCCTCTCAATCCCTCCGACCACTCATCCTCTCGCATGGATCTCTCGA -GTTCTCTTTCCTCGTTCCTACCTCGCTTCACTTCCATGCCTCGCAGTTGAAAGACAGCTT -CACCGCTTCTTTACCCGAGCCCACAGACGAATTGGCCCAAGATGATGAGCCATCATCAGT -GACCGAATTGGTTGCCCGCTACATTGGCCATGTGGCCCACGAGCTCGATGAGGAAGACGA -TGCACAGGGTAACTTCCTGGACGTCCTGAAGCTCGTTCTGAATGAATTCGAACGTGCCTT -CATGCGTGGTAATGACGTTCACGCCGTGGCAGCTGCGCTCCCCGGTATTGTTGCCAAAAA -GAATCAAGTCGTTGAGGCCTATTATGCCGGGCGAGCTGCGGCGGGCCGGCCCACCAAGCC -CTACGACTCGGCTTTGTTCCGTGCGGCGTCCGATAAAGCCGCAGGTATCTATTCAGTATT -CGGCGGTCAAGGCAACATTGAGGAGTACTTTGATGAATTGCGCAGCATCTACACCACGTA -CCCTTCTTTCGTCGAAGAGCTCATCACTTCCTCCGCCGAGTTGCTTCAGTCGCTCTCCCA -AGAGCCCGAGGCTAGCAAGCTCTACCCTAAGGGCTTGGATATCATGCAGTGGCTGCAAGA -TCGGGACGCACAGCCCGATATCGATTATCTGGTCTCTGCTCCCGTGAGTCTACCATTGAT -CGGTTTGGTCCAATTGGCCCACTATACGGTTACCTGCAAAGTTCTGGGCCGCCAGCCTGG -TGATATCTTGGAACGCATTCTCGGTACCACAGGTCACTCCCAGGGAGTCGTCACGGCTGC -CGCCATTGCAACCGCTACCAGCTGGGATTCCTTCGCCACAGCGGCCCGCAGCGCCCTGAC -CATGCTATTCTGGATTGGTTTGCGCAGTCAGCAAGCCTACCCCCGCACCTCTATTGCCCC -ATCCGTTCTCCAAGACTCGATTGAGAATGGTGAGGGAACCCCTACTCCCATGCTGTCCAT -CCGCGATCTCTCGCTCGCCGCCGTTCAGGAGCACATCGATGCCACCAATCAACACTTGCC -CGAGGACCGCCACATCTCCATTTCTCTAGTGAACAGTGCTCGCAACTTTGTAGTGACTGG -TCCTCCCATCTCGCTTTACGGTCTTAACGTTCGCCTACGCAAGGTCAAGGCGGCCACCGG -ACTTGACCAGAACCGTGTGCCATTCACCCAGCGCAAGGTTCGCTTCGTCAACCGTTTCCT -GCCTATCACCGCTCCTTTCCACAGCCAGTACCTTGTTTCCGCCTACGACCGCATTCTTGA -GGATCTCGAGGATGTTGTTGACATCCCCGCCAAGTCATTGGCGATCCCCGTCTTCCACAC -GAAGACTGGAGAGGATCTGCGACAGCTAGGTGACAAGAGTATTGTGCCCGCGTTGGTTCG -CATGATTACCCACGATGCGGTCAACTGGGAGCAAGCCACTGTCTTCCCCGGTGCAACTCA -CATCCTTGACTTTGGCCCCGGTGGCATCTCTGGTCTGGGCGTTCTGACCAACCGCAACAA -GGACGGTACTGGTGTTCGTGTCATTCTCGCTGGTGAAATGGACGGAACCAATGCGGAGGT -TGGCTACAAGCCAGAACTGTTCGACCGTGATGAGCATGCTGTTAAGTTTGCTGTCGACTG -GATCAAGGAGCACGGTCCTCGCCTGACACAGACATCTACCGGTCAAACCTACGTGGACAC -TAAGATGAGCCGTCTGTTGGGAATCCCCCCTGTTATGGTGGCTGGTATGACCCCCACCAC -CGTCGCTTGGGACTTTGTGGCTGCTACTATGAACGCCGGTTATCACATCGAGCTGGCTGG -TGGTGGCTACTACAACGCCAAGACTATGACTGAGGCCATCACCAAGATTGAGAAGGTTAT -TCCTCCTGGTCGTGGCATCACTATTAACTTGATCTATGTCAACCCCCGTGCGATGGCTTG -GCAGATCCCGTTGATCGGTCGGTTGCGCGCGGAAGGCGTGCCCATTGAGGGTCTCACCAT -TGGTGCTGGCGTGCCTTCCATTGAAGTTGCGAACGAATACATTGAAACTCTTGGTATTAA -GCACATTGCTTTCAAGCCTGGCTCCGTCGATGCTATTCAGCAGGTCATCAATATTGCTAA -GGCCAACCCCAAGTTCCCCATCATCCTTCAGTGGACTGGTGGCCGTGGCGGTGGTCACCA -CTCCTTCGAGGACTTCCACCAGCCCATGCTCCAGATGTACAGCCGTATCCGCAAGCAAGA -TAACATTGTCCTTGTTGCCGGCAGTGGCTTCGGTGGCTCGGAAGACACTTATCCTTACCT -CTCGGGTACCTGGTCCGCTAAGTTTGGCTACCCTCCTATGCCTTTCGACGGCTGTCTCTT -CGGGTCTCGCATGATGATCGCCAAAGAGGCACACACCTCTTACAATGCCAAGAAGGCCAT -TGCCGATGCCCCTGGTGTGGACGATGAGGAATGGGAGAAGACCTACCAGAAATCAACAGG -TGGTGTGATAACCGTTCTCTCTGAGATGGGTGAGCCTATACACAAGCTGGCCACCCGTGG -CGTGCTATTCTGGCAGGAGATGGACCAGAAGATCTTCAAGCTCGACAAGGCTAAGCGTGT -CCCTGAACTGAAGAAGCAGCGGAACTACATCATCAAGAAACTGAATGATGACTTCCATAA -GGTCTGGTTCGGTCGTAACGCTGCAGGCGAGACTGTGGATCTCGAAGATATGACCTACGC -TGAGGTCGTTCACCGCATGGTAGACCTCATGTATATCAAGCACGAGTCGCGATGGATCGA -CCCCTCGCTCAAGCGCTTGACAGGTGACTTCATCCGTCGTGTCGAGGAGCGTTTCACTAC -TGCCGAGGGCCAGGCATCGCTGCTCCAAAGTTATTCCGATATTGATACCCCTTACCCTAC -TGTTGACAATATCTTGGCTGCTTATCCCGCGGCTGCCGATCAACTGATCAACGCGCAGGA -TGTGCAGCACTTCCTGCTTCTCTGCCAGAAACGTGGACAGAAGCCCGTGCCTTTCGTTCC -CGTCCTGGATGAGAACTTTGAGTTCTTCTTCAAGAAGGATTCTCTGTGGCAGAGTGAGGA -TCTTGAGGCTGTTGTCGATCAAGATGTCGGCCGAACCTGTATTCTTCAGGGTCCTATGGC -TGCCCGCTTTTCCAACATCATTGATGAGCCTGTCAAGGACATTCTTGATGGTGTCCATCA -GGGCCACATCGCTGGCCTCCTGCGTGATGTCTACGGTGACGATAGCACCAAGATTCCCGT -CATTGAGTACTTCGGTGGACAGCTCATGAACACCACTGAATCAGAATTTGACGGGTTGGT -TGTTTCGGAAGAGCCCAACAAGACCAGCTTCCGCTTGTCTTCCACTGCACCCTTGCCAGA -CTTGGACCGCTGGCTCAGCCTGCTTGCCGGCAAGGCCTACTCCTGGAGACATGCTCTGTT -CTTGGCCGATGTTTTCGTTCAGGGCCACCGCTTCCAGTCTAACCCCATGAAGCGCATCGT -TGCCCCCACTGCAGGCATGTACGTGGAAGTACTTAACCCCAATGATGCCGCAAAGACCGT -TATCAGCGTGCGAGAGCCGTATCAGTCTGGCAAGTTGGTCAAGACTGTCGAGGCCAAGAT -CAACGAGAAGGGCCAGGTCAGTCTGACCCTGTTTGAGGGCCGTACCGCCGAGAATGGCGT -TGTTCCCCTGAACTTCTTGTTCACTTATCACCCTGAGACTGGTTACGCACCTCTTCGTGA -GGTTATGGGCGATCGCAACGATCGCATTAAGGAGTTCTACTACCGAATCTGGTTCGGCAA -CAAGGACGTGCCCTTCGACACCCCTACCACTGCCACCTTCAGTGGCGGCCGGAAGACAAT -CACTGCTCAAGATGTTGCCGACTTTGTTCACGCTGTTGGAAACACCGGTGAGGCTTTCGT -TGATCGTCCTGGCAAGGAAGTCTTTGCCCCTATGGACTTTGCCATTGTTGCAGGCTGGCA -GGCAATCACCAAGCCCATCTTCCCTCGCACCATTGACGGAGATCTGCTGAAGCTTGTCCA -TTTGTCCAATGGTTTCAAGATGGTACCCGGCGCCCAGCCCCTCAAGGTTGGCGATGTTCT -GGACACAACCGCCCAGATTAACTCTGTGATCAATCAGGATTCTGGTAAGATGGTCGAGGT -CTGCGGCACCATCAAGCGAGACAACAAAGCCATCATGCATGTCACTAGTCAGTTCCTGTA -CCGGGGTGCTTACACCGACTATGAGAACACCTTCCAGCGCAAGGATGAGGTCCCCATCCA -GGTTCACCTCGCAACCAGCCGGGACGTCGCCATTCTCCGCTCGAAGGAATGGTTCCGCAT -GGATGACTCTGACGCTGAGCTTCTGGGCCAGACCCTGACATTCCGCTTGCAGAGCTTGAT -CCGTTTCAAGAACACTACCGTCTTCAGCAACGTCCAGACCGTTGGACAGGTCCTTCTCGA -ACTTCCCACGAAAGAGGTCATCCAGGTTGCCTCTGTCGAATACGAGGCAGGTGACTCACA -TGGCAACCCCGTGATTGACTACCTCCAGCGTAACGGAACTTCCATTGAGCAGCCGGTCTA -CTTCGAAAACCCCATCCCGCTCAGTGGCAAGACCGCTCTGGAACTGCGTGCTCCCGCCTC -TAACGAGACCTATGCTCGCGTGTCTGGTGACTACAACCCCATCCACGTGTCTCGCGTCTT -CTCCAGCTACGCCAACTTGCCCGGAACCATCACCCATGGCATGTACAGCAGTGCCGCTGT -TCGCAGCCTTGTCGAGACCTGGGCCGCCGAGAATAACATTGGCCGTGTCCGTGGCTTCCA -GGTCTCCCTGGTCGGCATGGTGCTGCCCAACGACATGATTACCGTTAAGCTGCAGCATGT -TGGCATGATTGCTGGTCGCAAGATCATCAAGGTTGAGGCCAGCAACAAGGAGACAGAAGA -GAAGGTCCTGCAGGGTGAAGCCGAGGTTGAGCAGCCAGTCACTTCCTACGTCTTCACTGG -TCAGGGTTCTCAGGAGCAGGGCATGGGTATGGAGCTGTATGGCTCTAGCCCAGTGGCCCA -GGAGGTCTGGGACCGCGCTGATCGCCACTTCATGGAGAACTACGGTCTCTCCATTATTGA -CATTGTCAAGAACAACCCCAAGGAGCTGACGGTCTACTTCGGTGGTCCTCGGGGTAAGGC -CATCCGGGAGAACTACATGGCCATGACCTTCGAGTCTGTGAACGCAGACGGATCCATCAA -GTCCGAGAAGATCTTCAAGGAGGTCGATGAGACCACCGCCTCGTACACCTACCGGTCTCC -CACCGGTCTTCTCTCTGCCACCCAATTCACCCAGCCAGCCTTGACCCTGATGGAGAAGGC -TAGCTTTGAGGACATGCGCGCCAAGGGCTTGGTGCAGCGCGACAGCAGCTTCGCCGGCCA -CTCACTTGGTGAATACTCGGCTTTGGCCGCTCTGGCTGATGTCATGCCTATTGAGAGCTT -GGTGTCTGTCGTGTTCTACCGCGGTCTGACCATGCAAGTCGCCGTTGAGCGTGATGCACA -GGGTCGTTCCAACTACTCCATGTGCGCTGTCAACCCCAGCCGTATCTCCAAGACTTTCAA -CGAGCAGGCACTGCAGTACGTTGTTGAGAATATCTCCGAGACCACTGGCTGGCTGCTTGA -GATCGTCAACTACAACGTTGCCAACATGCAATACGTCGCCGCTGGTGACCTCCGTGCTCT -TGATTGCCTCACCAACCTGCTGAACTTCTTGAAGGCGCAGAACATTGACATCCCCGCTCT -GATGGAGAGCATGTCTCTGGAGGATGTCAAGGAGCACCTGGTCAGCATCATCCAGGAGTG -TGTCAAGCAAACCGAGGCCAAGCCTCGTCCGATCTCCCTGGAGCGTGGTTTCGCCACCAT -TCCTCTCAAGGGTATTGATGTGCCCTTCCACAGCACCTTCTTGCGCTCTGGTGTGAAGCC -CTTCCGTTCCTTCTTGTTGAAGAAGATCAACAAGACCACCATCGATCCCAGCAAGTTGAT -CGGCAAGTACATCCCCAACGTCACTGCCCGTCCCTTCGAGATCACCAAGGAATACTTTGA -GGACGTCTACCGACTCACCAACTCGCCCCGTATCGCCTCTATCTTGGCGAACTGGGACAA -GTACGAAGAGGGCAACGAAAACGTTGCAAAATAATCGATATATTCCTGTTGATATCCTCT -ATAATGTTGTGTTTTTGATAGACCGTTTTTCGTTTCATGGGTCATTATGTGATATGTCTC -AAAGCCCTGGAGTTTTTAATGTATATGAGTTTCAGTTTCGAATTTTAATATCAGTTCATT -CTCTTTCACTTGGTCTTCCCTGTCCGCTTCAGTCACTCTCATAGAGTGCCCAGTAAATTT -AGTTTCCCTAGGCAGTGTTCAACTCTTTTGTTTCTATACCAAGCTGTGAGATTCCAATGT -TTGGGATATTGCTGCAATGTGCTCAGAAGGCTGATCTCTCTATGACATTTCTATTTGATC -AAAAGATGTCCAAAGATAGTGCCGCTCATCACTGCGTCATCCTTTGTCTTTGAAATCTCC -GGGGTGCCACCTATACTGTTCCTTTTCTCCTTGCAATATTTGATCCCAAAATGTCACCTT -CAAAATCCGATGAGCCTCGTGACTATCCAAAAACATGTCTTCCCTGTGGTATACGTATCT -TGATGTTGAACACGGGAGTACTGGGCAAAGATCGCATAGATGTGCTCAAGGAGTCACAAT -GGGAAATCCATCATCAATCCCCGGTTCCTGAGCATATTCTGTTGCCATCGACCGAAGATG -TCAATAAGACTTTCTCCCAATTTCCTTGGTGATCATCGTGCTATCATCGAGGGGGTAAGT -TAACATTCTCATAGCAAGACTTCCAAGCAACTTGAAGTTAAAAAGAAATCCAAGCTGTCA -TTCAAAATATGATCTCACGGGAATTGGCTCAGTGAGATCTCTCGCTGGTCACCCATGTCA -ACTTCCACAAAAACACCGCACGAATTTTTGGAGAGACCACACCACAGTATGATGAGACAA -TATTCACACATTTTCATCCGGAAGATCACTGGATGGATCCAAGTCGTTACAGACACAAAA -ACATGGGATACATCTTTCACTATGATTGTTGGGAGATCCTTGGCCATCTGACAGGGAAAG -AATTGATCGAAAAGTCACTTGGGCAACTTATTCAGGCCGCGAGGCTGTATTGGCACACCC -ATGAAGACAGGTGTTTTACTGACGTAGATACCATATTTCAGGCCCAGCTCGCCGAGGTTT -CATACGTGAAATGTGAGAGATACGTTCGCGGTTGTGATTTCTATGTCAATCCGTGAATTA -TACCTGAGCTTCAGGATATCATCCAACTCGCCCAGAAGATGGTTGGAAAAATCAAAAGTA -TCTATCTCAATAGATTTCCACTCGAAATCTCCCTTCTGGTGGCAGAGATCATTTGCCCTA -TCATCTACATTCAATCCGATGTGCAGGATATGCGTAATATGACATCTGTGTTTGCTTTGA -ATTTACCAGATGCCTTTTGGAGAAAGCGATTACAGTCCAAACCTGATTTGTTCTTCGAGG -TGGATTTGCAAGAGGCTGATTCAGTGGACTGGCAGCTCTTGTGGTTAGGGTTGATGGACC -TTCTTTCCCATAAATGGTATAAATCCAGTGGATTGGCACACCGTGAGCGATTACTTGAAT -CTATTAGGCGATCATCGCAAATCTTCAACAAACTCGACAGAGACAGTTTGCTGGGAGATA -GTATCGCAGGTTATCTTGTCCTTGCTTTATGACAGATCGAGCTTTGGCACTTTCCCTTCT -AATGAATCGTAACCAATATGTTGCATACAGTGACATAGTAAGGGCACTATATGTAGGGTA -ACACCACGATGTCTTTTTTGGATGATCTTTCTAGATGTCTTTCTAGATGTTCCGTTTTAG -ATATGTATCTTTCGTGGTCTTGTGGGTAGGAAAAAGCAAATAGAAAGAAGTACAGCCAGG -AATGGGAATGGTAGCAGAGAGTAAACATGCCAGGCTTATCATTTTACTATGTACAATATA -CGAAGATGTGACAAGTATGAAACAAAAAAAATCAACGTGTAGAGGACTTTTTCTATGTAT -AAACAGGGATATTTGAACATTATGATCGTCCCGCACGGTTGAGAGGACCCCAGTTGGCAC -GAGGGTTGAGCTGGTACCGTACTAGTCGTTTCTTGGTATGCGCAGCAGACACTTGGCGGC -CCAGATCCTCATCAAAGACCACCTCAAGGGTACTAGGATGAGCGCGCTTGGAGGGGTTTG -CAGGGGAATTCTCCGGTCTAGGCTGGGGGTCGGCAGCGGCATCAGCATTGGGGCGACTCT -CTGCAGAAGCAGGAGCATTATGTCCAGGGTTTCGCGACCCGGACCGGTAGAGAGCAGCTT -CAACCTTAGTAACCAAGTCGCTGACATCTTTGCCAGTCTCAACGGCAGACATCAGATCCT -GTTTGAGAAGTGATAACCATTGATCTTCATCGTCCATTTTACGCAAGTTTTCACGGCTCT -TGCTCATGATCGTAGCACCGGCGGTTCTAGGCTTCAAGGAAGTTTCCTTGATAGGTGATC -TCTGACGGGCCCACTCGCGCATGATCTCCCAAACCACCTCCCAGGGCGCATCGGTGCGGA -TTGAGTTTGGCCTGGCGTGGCTGCGAGTGGAGCGATATCCGAGATGGCCAAGAGCACCGC -GGAACTCGTCGATCGGAGGGGTTGTTGTGTGGATAACCTTGGCGAGAGCACTGATTGAAA -AGAAGAAGGGGTACTTTTCCTGAGCGGCAACGTCCATTCGAGGAATAATGGCAGGGTATG -CAGGATCATGCTGGTTCTTTGTGCTTGGTGCGGATTCTTTGAGGGTAGATGGCTCCGAGC -TAGGCGGTGATAAGTCCAGATCCTCTTCAAGGGCAGTAGTCAACATTCCCTCAATCCGAG -CACAGGTCTGATATGTTTCGGGATCGGCTCCAGGGAGCATGTCCAAGATTTTCTGGATAA -ATTGAGGATTGTGTAGCGGACCGCCCCACATGGGTCCTGCAAGATGTGTTTTGGTACCGC -AATGCTCGCAGTGCGGTCCGGCTGTTGGCCCTTTAGTCAGGCCGTGGTGGTATATAGGGT -TTCCCTTGCGGTCCATTTTGGATTTCGTAGAACCAAGGGGCTGCGTTGTCCATGCGCCAC -ATCCGGAATCGCAATTGTAGACGAGCATCGTTTTGCCCGCCGTAAATTTGACCTGTGCAG -GAGATTGATAGACTCGAATAAAGACGCGAGCGTAAAAGTCGATAGACAGAGACAATAACG -GCTCGATGGCCACTCCATACTTACCTGCGGACATGGCAAGGGAATTCAAGATCAATCGCA -GACCACCCTCATGGGAGTGGAGGCCTTTCATGGGGATACCGCCGTACAGAGAGAATGATT -TTTCTGAATAGCCAGTTGAGGCCCAGACACCGGCATCAGTGCAAGTAACACACAAAAGGC -CACCGTCTTTGGCTGCCTGCACTGCAGCATCTATGAACGGAGCTGCCGTACCATATGGAT -CCAAATCAATGACGTCGAACTTGCTCGCGTTGTCAAGTCCATACATGTAAACTCGAGCGT -CGGCCAAATTGGGTTGAATCGTTTTGAGATTGTTGTATTCGATATTTTTCTTCATAGATT -TGATTGCCGAGTTTGACAGGTCATTGGCCACAACCTTTGTGACAAATGGGAGCTCCGAGG -CATAGCGAAGAGCGCGCAGTCCGGTTGCAGACAGAGCATCAAGAACAGTGAAAGAACCCG -CGGATTGCTCTGGTCGGGCTTCTGACTTAGGTTTTTGATCAGCCGGGGGAGCGGGGACTG -GCTGGTCGTTTGTCTCTGTGTCTTGCTCGTGTTCTGCGTTTGGGGCGGGCCTCGGGTTCT -CAGTCTGGTCGTCTTCACGCTTCCGTTTCTTTTCCCCGGTGCCTGCTTGCTTCTTCAGCT -TCTGAGCCGCTTTTTGTTTCTTTAGGTCGATGATATGTTCTCCATAAGCGCGAATGGCCA -GCACACTAAGGTCGCGATTAAATTGTTGAATTGGGTTGTAGAAGACGGATTGCACTTGGT -CATCGCTTTTGAGATCCTTTTTTGTTCCCTTGGAAGCGGCCTGTTGAGCAGGCGGGTTGA -GAATCTTCGCCAGACCTTCCTTGACAACATGGTAATTGTCTCCCTGGTATTCTACCAGCT -GGATCTGAGGATCCTCCATGGCTCTATGAGTGCTAAGGATTCGGACACGAGATCGGACCG -AAGTGGGGGTCGAATAGGTGGATTGGGCTAGAAGAAGGGGAGGACTGAGTTTTCGCTTAG -CGCATCTCGTTGCTGATCCAAAAATCAACGAATGGTTCATGAGCGAAGCCTTTCACAGCA -GATTACAAGAGATGTGAATGCGACGGAGAAAAAGATTCTACGCACGGACCACGTTACAGT -ACATAGCAATACTTGTTTTGTAATTTATATCAACTATAAAAGTACATGCATATATGGATG -ACTATAGAAGAGCAAACTTTCACAAGGAGATAAGAACCTTTACTTCGCCAAGTATAGCAT -AGGCAATATTGATGGATTGGGAGGACCCAAGTCTCAATGCTCGTACAGCACATTAGAATG -GATATTTGTCCACGATATCTTCAGTGTCGTTCTCTACATTAGGATTAAGTCTAGATAATA -TACACGATCACACAAACCTGTCATAGCCATTTCCCAATAATTGGGACCTGACTATACATC -TCATTCCACCCTTTAGCCTCCCAGCCTGAACCTTCGCCGGCACGTCCAACAACACCGAGG -GCACCTGCAAGCCAGAGAGCAGCACCAGCGGCCGCAATTCCATTGACAACCCACCAGACC -TTGCGCTGCCTCTTGGACTTTGTTTCTGTAAATCCAAGAAGACCATCCAACGAGCCCTTT -TGGCCACGTTCCTTGCGCGCGCTGGTCACTCTCCAGCCCATCCAGGTCGCCCAGCCACCA -ACAAAGTGCCACACACTAGCAGTCACAAAGAAGAAGTAGAAGGCATTCCAGAAAACAGGG -CTCCGAGCAAAGCCATGGGCAATATACCCAAGCCCTACGCCAGAGCTTCCTCCTTCGACG -ATCAATGGCGTGACACGGTTGACCCATGCGTGCAATGCAATCAGTGGCACGACAAAGTAT -CCAGTGCGCGATTGTAAAGACATCCGTGGCCAGAAATACAACGCGTTTCGCTGTGCGCGA -GTCTCGGCTCCATACAGACGTGCTCTACGCGCAGCGCGGATGTTGCGGAGAGCAACTCCC -GAGGCAATATGCGCAATTATAGGAATTGTCAGGACAAGATGCTCTAGCCCGGGAGCCTGG -TATACTGGTCGTGTGAGCAAGAGATAAGATTCGCTCGAGGGCACAGAGCGAGTCACAAGC -GGGATAAGAGAGGTGTTGGCGAGATGTAGAATGAAGAAAGCTGTAGGTGGGTAGGTGGAG -TATTTTTGAATACCACTCACTAAGCGATCATTGTTGTTAGAACATATAAACCAGCTTTCT -GTCGCTTGACGTACGCCACGAATCCCAGCGATGGCCGCTGAGCCCTAGCTTGGGTATTGA -AATTCCAACTCCATGTTGCATGGGAGAGGCGGCCCTAGGGAAGTATTCGCCGCTTTCAAC -ATCGGGGGCGTCGTGGACAGGCGATGGATCTAATTCTTGCAAGGAGATGATAGATCTGGT -GTCGGTGTCGGTGTCCTCCGGTTTGGAGAGCACGTGAGACGACATTGTATAGTTTTCGGG -ATTTGGGGGGTTTATCGAGCTGGAGGGAAAGGGCAAAAATAAAATTCTTCAGGAAGAGAC -TCGTCAATCATACATCCATGCTGTATAGGTGAACTAGGAAAAGCAGGGAAATGTTGGGGA -TCGCTGAGTCCCAAAAATACGGCAGCTGTGATAGGCTAGTTCCTCGACGAGTAATGACGA -TTGCCCACTTGGCAGGGCAGGGTCAGGTGACTCAATATAGTTACATACATCGAATGTTTC -TGTACTTACTTGATTTACTATAAAGGGGTGGATATTGCGCATAGGCTTAAATATTAATCA -TTTGCATGAGACAATACATATAAACTAACTATTTATTCAAGAAAATGTGAGCAGGAAAGC -CTCTATAAACAGACAAGTATATAGTTAGAGATGTTACTTGGACTCAATGATGCCTAGTTT -CACAGGCAGCGATGCGTTAGACCCGCTATTTATTGATGCGTAGATGGTCGCAATGCGCCG -CCGTACCAAACGCTCTCTCTTTAGACTCAAGCTGTACGCTTGTTCAAGAACAATGGCACC -GGTATGCTTTTTTTTGCTCCAATGTGACACATGAGCTAACAGATATAGCGTGGTTCAACA -AATCCAGCTCCTAGGGTAATCAATCATCCACATCTGCGACGTCTGTCACGGCCAAGCTAA -CCTTTCCTTACAGACTGAATCAGCGCGGTCTGCGCTCAGCTCCTTCACCTGCACCCTCTG -CAACAAATCTTACTCGCGTCATCCAGAATATGAAGCGCATATCGGCTCATATGACCACCA -ACATAGAAAGCGCTTGCAAGATCTAAAACAGTTGACACGTGATCCAAATGCTGTCGAGAA -GAACCGCCGGGCAGAACGTAAAGCCGATGCACAGGCCGGTCTGAAAGTCATCGAGACCCC -CGGACACAATGTCGCATCAAGTGGTGGAGGCGCGGGTTTCAAGAAGGGAGGGTTCAAAAG -CTCATTCGCTGCTGTCAAGGGAACGGTGGTCCCTTCGGCCCCAGTCAAGAAGAACGTTCT -CGGAGATGACGAGGAAGACAATAGCCAGAAACCTCAAAATGATGATCCATCGGATGTAGG -AAGGCCCGAAAAGAGTGGATCTGGTGATATGGAAAGTGATACCGACGAGGAATATGGCCC -AGAAGGACAATATGATCCTCGCCGGCCAACGGACTGCTTCGCTGGGTGTGCTGCCCAAAA -GGTCTGACGGGTATCGTGATGCCTGAAATGACCTGCGAATTGAAACCAATATCAAGCAAC -AGATATGTACATGATGGCGTTGAATGGTATGGACCATCCTTTTCGAATTACTATGCAGCA -ATCAAATTTTACCTAATCCGATTAAGCACATTTATGTTGGTACTGATCTGGATGTAATGG -TACATGCTTTCCAAGTGACTGTCTAAAACGGTTGGATAACAAAAGCAGAATAAAACAACG -ACATAAGATGAAACAGATGAAAGGGTACTGGGCACATTGCTATATTATTGCGAGTTCCTT -ACTCGCCCTCATTTATTGGCTTACAAGCGCAGAAGCTTGGCAGCCTGGCTGACAATGGTG -TCCTCCTGGGGGAAGGCCATGGCCTCAAGGCCAGCGGCGTAAGGAGTGGGAACCTCAGCA -CCGGTGACACGCACGGCAGGGGCGGTCAGGTAATCGAAGCCGTACTCCATGGACAGAGCC -AGGATCTCAGACGAAAGACCGAACATGGGGTAGCCAGACTCAACGACCATAATGCGGCCG -GTCTTCTTGAGGGAGGCAATGATGGTCTCAACATCGAGGGGCTTAATAGAGCGCAGGTTG -ATAACCTCAGCATCGACACCGTACTTCTGCTTGAGCTCGGCGGCGGCGGTCAGGGACTGA -CCGACGCAGCGAGACACAGAGACAATGGTAAGGTCCTTTCCGGGGCGCTCAATCTTGGCC -TTACCGATGGGCAGGACGAAGTCGTTCTTCTGGGCAGCCTCGCTCATAGGGAAAGCCTGG -CCATACATGAGCCTAGCATTGGTTAGTTTACGCAGAACCCAGCAAAATGAGTGGATGGGA -TCTTACTCATTCTCCAGAACAACAACGGGGTTGGGGTCACGGATAGCAGCCTTCAGCAGA -CCCTTGGCATCCTCGGAGCTCCAGGGGGAGACAACCTTCAAACCGGGAATGCTGCCGTAC -CAGGCAGAGTAATCCTGGGAGTGCTGGGCGGCGACACCAGAAGCGAAACCGTTGGGACCG -CGGAAAGTGATGTTGCAAGGCTGAATACCACCGGACATGTAGTGAGTCTTGGCAGCGGAG -TTGATAATTTGATCGATGGCCTGCATGGCGAAGTTGAAGGTCATGAACTCGGCCTAGATC -ATGGAACCGTTAGCATGCGATCCTGGATCAACCGTCATTTCTGGAATTACTTACGATAGG -GTGCAGACCAGCGAGCGCAGCACCAACGGCGATACCACAGAAGCCGGCCTCTGTGATGGG -AGTATCGATAACACGCTTGGGACCGAAGCGGTCGAGCAGACCCTTCGTCACCTTGTATCT -GTAGAGCCGGAAGTTCAGCCTCAGTTCCAGGTAAGCGGATACTAATAACTCACGCTCCGT -TGTACTGTGCAACTTCCTCACCGAGAATGAAAGTCTTCTGGTTGAGTTCCAACTCCTCGG -CCAAGGCCTCGTTCAAGGCATCGCGAACGGTGACCTGCTTGGTGCCGTCCTCGGTGGCAT -ATCCGCGGCGCAAGATTGAAGGAGCACAGGCAGACTGGCGGAGGTTTGAGCGCAGAGCAG -TGGAAGCAAGGCGCGAGGAAAACAGACGGGCTGTTGGGCGGAAAAGTCGGGGAGCTGCCA -TTGCGGAAATGTGATTGAATTAAAACGAGGCAAGAAAAAAAGATTCAATTAACGATCGCA -GACCAAATTGGACCGCAAAATCAATTGAATAGAAGAACTGGAAAGTAGATAAGAGTGAAT -AGCCAGAGAAAACTTATCGCCGTCCGAATAATCTTCGAGATGCAAATCCGAAGATGTTGG -GCAACACGTGATGTCAGAGCTCACCGGTACTGTAACCGTAACCATACAGAATTGGAGAGA -GCTCACCTGAGGTATCAACTTATGGTTGTTCAACATATCTTTCAATGAGGTATCAACTTT -TCTACTGTTAGCTATAAGTACGAGTTTCCTTCCACATATTGCAAGACCTTTCAAGGGGTT -TCTTACCGTAAATCCATGCTTTTATACACTCTTCCAAGGGTGTATTATCGGTACTGATGT -TGTCCTTGCTCTGGCTACTTCCGTCGGCACTCGCCGCTCAACCCTCCACGCCCGATCCTA -TCTCCGCTCCCCTGCGAGCCCTTGACTTCGGGCAACTGAATTTCCTCCATACTACCGATA -CCCATGGCTGGCTGGCTGGTCATCTGCAGGAGTAGGCTTTAGACCCTGCCTAGCTTTTGA -GTTGTACTAATAATTACAGACCGTCCTACTCCGCAGACTGGGGAGACTATGTATCATTTG -CGACACGCATGCGGGAGAAGGCAGAAGCACAAGGACAAGACCTGCTCGTTATAGATACTG -GCGACCGAGTCGAAGGCAACGGTTTATATGACTCATCCGAGCCCAAGGGTATCTACCTTT -CAGACATCTTCCGGCACCAAGACATCGACCTGTTGACGTCGGGAAACCATGAACTATACA -AGCAGAACACATCCGAGACCGAGCTTCTCATCACCGTTCCTAATTTCAGGGGCAAATATC -TAGCTTCAAATATTGATATCATTCATCCAACCACGAAAGAGCTTGTGCCTCTGGCTCCGC -GGTTCAAGAAGTTCACCACCAAGAAGCAAGGGATCCGTATAGTAGCGTTTGGCTTCCTCT -TTGATTTCAACGGAAATTACAACAACACCGTGGTCCAGAGAGTATCTTCCACCATCAAAG -AGGATTGGTTCCAAGAAGCCATTCGGGACAAAGAAGTGGACCTGTTTTTGGTGATCGGCC -ATGTGCCTGTTCATTCACCGGAATACCGGGCGATCTTCAAGGAGATCCGGGGAATCCGCT -GGGACACCCCAATCCAATTCTTTGGCGGGCACCAGCATGTGCGTGACTATGCGCAATATG -ATAGCAAGGCCTTTGGGCTAGCGAGTGGGCGATTCATGGAAACTATTGGCTTCATGTCAA -TCGATGGTCTCGCCAGCAAAGCCAGCCATCAGTCTGCCGTTGGGCCCGTATTCAAGCGCA -GATACATTGACAGCAATCTGTTGTCTTATTATCATCACACCGGTCTTGATCAAAAGACCT -TTCCAACTGAGAAAGGCCTCAATGTCTCCCGATTGATCACCAAAGCACGGACCGAACTCC -ATCTGGACCAGGTCCATGGATGCGCGCCACATGATTTATGGATGTCACGAGCCAAGTATC -CGGACCCCAACAGCATCTACACCTGGCTTGAGACTCAGGTTTTACGCGATGTGAGGGATG -AGTCTCGTGGAGACACACCCAGGCTGGTCATTGTTAACACGGGCGCCATGCGGTTCGATA -TTTTCAAGGGGCCTTTTACACATGACTCGACTTTTATTATATCACCATTTACCAGTACCT -TCCGCTATGTGAAAGATGTCCCATATGACAAGGCACAACTAATTGTCGAGGTTTTGAACG -AGCAGCCGCAAATTTTATCTACTGATGAATCTTCGTTGACAACCTTGGGCCCTGTCGAGC -AACTTGCATATCCGGAGGACGTGATTGCTCAAAACTGGCCTTCACCTGGCGACCAAATTC -CCATGTCCGGTCCGGCCCTTGTGCCTGGTTACACGACAAAAGATGATGCGGGCACAGATG -GTGATGACACCATCCATTCACCCATTTCCTTTTACCGCGTGCCGAACTGCATTCGAACCC -TCATATCTACCAATGCATCGGAGACACCGAAGACGGTGGATATGGTATATCTTGATTTCA -TAGAGAGCAAGGTTATACTGGCCGCCAAGTTTGCTGGTTTCGATGTCGACATTGCAAAGG -AGAGTGATGTGTACATGCCGCCCATGTCTCTAACAAACCTCATTCTTGACTGGGTGAAGA -AGAACTGGAAGTGTTGATAGTTATACTTTCTGGCAGTGCTTTTTTTTCTACAATTCATTC -CTAGTTTCTGGACCAACATTGTAAATCGGAAATAAAACCGCCTAGCTTTATAGTACATTC -ATATTGGCTGTTCACTTAATCACCACCGTTGAATGTGAACACACCCTTCCCACTACGGTA -GCCGCCCAAAGTGCCCTGCACGCTCTCTGCCAGTTCCGGGCCCTCGGATTCCCAGTTCCA -TTTGACCTCATCCACGGGAATATCCCGGAACTTGCCAGAGCGAGTCAATTGGAGCACATC -CTTGATGGTGCTCTCCTTCAGCTCAGGGTTCTTGTCACCCCACCTGCTAACCCAGAACCC -GTCAAACGCGAGGTTCTTGAAGATCAACAAACCCGAGGGCAATGCAACAGGCTGCTTCGA -CATGGCTCCGTAGGTCACCATGTGTGAATTAGGAGCCAAAGTCTTTGCCAGAGCGGTGGC -ATTCTTCCCGCCTACGCAGTTCAACGCCAGGCGAATGGGTTCGCGGCCCTGGCGGGTGAA -CTCGTGGACCATATCCCGGAAATTGCCAGTAAGCATCTCCTCCTCAGTGATAACAACAGT -GGCTCCTAGGTCCCGGAGCTCCTTCTTGAGAGCCTCGGTCTCTTCGGGTGTCTTGCGCTG -TCGAACGACATTGAGCGTCTTGATTCCCCATTCGCGACCCAGTTGGATAGCTGCTCGCCC -GACACCACTGTTGGCGCCGTTCTGGATCACCCACTCCTCGCCGGCACGCATCCAATCCCA -TTCACAGAAATCCCGGATCATACGGTACGCAGTGACGGGGTTGACGCTAACAGTGCCAAC -CTGCAGCGGGGTCAAGCCCTCTTTGTTCTCTATCTTGATCAACTGAGATTCATCCAGCTG -CGCGTGGGTACGCCAGGTACCCAGACCTGTGCGCTTCATAATGACCCAGTCGCCTTTGCT -GAGGTTCTTGACGCCTGCACCGGTTGAGAGCACCTCGAATGCGCCCTCGTTACCGCCGAC -AGCGGCAGGCTCCGCGTTACCAAGGTCTGTTTGGAAGGGGGGTTTGCTGGGATAGACACC -CTGGATCTGGTTAACGTCGGCAGGGTTCATCGGCGCGGCCAACAGGCGCAGATTCACTTG -GGTTGCATGCGGGGCGGAGATTGAGTGCTTGTGTAGGCTGAATTGATCAGTTCTGGTTTG -CACAATTGCAGCTAGGAGATCTTACCGCAAGACATCTTTGGGCTCGCCATATTTGGAGTA -GATGAGAGCCTTTGCCTGTGTGTAGCCATAGACCGAGATGTATCGCCGACCGAGTGGAGC -TTTACCGAGCACCTGTGGTACAGCTCGAGCTGAGGCCGGGCGAGCACTTCGGGTGGCCGT -CTGTAGTACACTTCGCGAGAACATAGCTGGTAGGTGAGTGAATGGAGGAGGAAAGAAAGT -CGAGCAAATCCGGATGACGTCTTGGTCGATTTCGCTCGGCCCGAAGGAACTATTAACATT -GGAGTGCTCCGAGGTGGACTAACACAGGATTTATTGAATTTGAGCTTGGTATTCAAGCTC -ATGGCTATTTAATTCTAAGGCTAGACGTTATATAATACATACCTATATATCAGGCTCATC -CATAAATATCTTCAGAGGTGACAGACTTGAACAGATACCGTAGAAAAGCAGCACGGACCC -AGGAAACTCGCGTTCCGCTCTCTTTTTGTGTTATACTCCTCACACACTCGTCTAAATGTC -CTTATTGCGCGAAGCTGAGAGATGCGTCGCTAATCAATATGCACATAATTTCAATGCCTT -TATAACCCCGTTGTCGCGCGCAGGACCATGGCTGGACCGCGTAAAGGAAGCAGATGTGCG -CAAGGAACAAGGTAGCAATATCAGACTTTCACTCTCTATTTCAGAGTTAACCAATTTGAT -CAGGGAAACCCAAGTCCACAGTGGATGGCCGCTTGATCTCCATCAAAGACAATATCTGCA -CGCGCGACCTTCCAACTACCTGCGCCTCGGGAATCTTGGATCAATTTACCAGCCCGTTCA -ATGCCACCGTTGTCAATCAGCTAGAGGCCGCCGGTGCCATTATCGCCGGAAAGACAAATT -TGGACGAGTTTGGGATGGGATCAAATTCGATCAATTCACATTTCGGTCCTGTCAAGAACC -CTCGTCGGGATTCCAGCGGGGAAGACCTCTCCGCGGGTGGTAGCTCTGGTGGGAGTGCTG -CCTCAGTCGCTGCTGATCAATGCTATGCGTGAGTAGCCAATTTTTAGAAAGGAGAATAAT -TGCTGACTCTTACAGATCACTAGGAACGGACACCGGAGGCTCGGTCAGACTTCCAGCTGC -TTACACCGGGACAGTGGGATTCAAACCGTCATATGGACTGATCTCACGATGGGGCGTGGT -GGCATACGCCAATTCTTTAGATACCGTGGGTATAATGGGAAAAACCACCGCCAATGTTCG -AGACATCTTCGGTAGGTCTTGACCAGTGTGGCTAGATAACGACCATGCTAATAGTATGTC -TAGACATCCTCAACCGACATGACCCGCGCGATCCTACCAGCCTCTCAATATCCTCCCGCT -CTCGAATCCAAGCATTTCTCCAAACATCACATTCGGCCTCGCGGCTGACTTCAGCCCCTC -TCCGAATTGGCGTGCCAATGGAGTATAACGTTTCTGAGCTGACTCCCTCGGTCCGGCGAG -CTTGGGCTCTTTCTCTGGCACACCTGAAGCAACAAGGACACACCGTGCATTCCGTCTCTC -TGCCCGACACAAAACACGCATTATCTACATATTACGTCGTGGGACCTGCCGAGGCATCTT -CAAACCTGGCCAAATACGATGGAGTGCGATACGGGACTCGCGCCGAAGGCCCAGATGGCG -ACGGAAAGCCCGATGGATATCTATTCTCAAATACCCGAGGACAAGGGTTCGGGCAAGAAG -TGAAGAGACGAATTGTTCTAGGCACGTTCACTTTGAGTGCCGATGCCATTGACAACTATT -TCATCAAAGCACAACGCGTGCGTCGTCTAGTCCAGCAAGACTTTGATGCTGTCTTTACAG -CCAAGCACCCGCTTTTGTCAGAGCAGGGCGCAGACAAAATTGCAAAAGAAAACGACGTAG -ATGTCATCATATGTCCTACTGCTCCCTCATCCCCGCCACAACTGTCAGACTTGATGAACA -AATCAACCAAAAAATCACCCCTGGATGCATATGTCGCTGATGTATTCACTGTTCCCGCCA -GTCTAGCTGGGCTACCTGCAATTTCAGTCCCTGTGACAGTGTCTGGGGAGAAGGATGCTG -AACTGGCAGGAATTCAGGTCATCGGTCAATATGGAGACGATCAATTAGTCATGAAAGTGG -GCGAGATGTTGGAGGGAAGACATTTGGATTAGATCCCATACCTGTACGACTACTATATGA -AATGTATATTATGTGAAAAGATTTTGAAATTAGAATTGGTATCAATGCTATTGCGAAAGC -CAAACGCCTATTTCATTTCCCGCTTTAAGTCATCTCAACATCGCCGTCACCCTGCACTTC -AACGTTGGGGGTGCCAGCGGTCCCGACGCTCATCGCATCTTCAAGGCCGTGGTTCCCAAC -ATCGCCTGCCGCTGGCGTCTCACGATTGCTGTTAGCAGCGGAGTCAGATGCCCCAGCAGT -TCCAGGAGTATGAGCAAGGGTAGAGTCAAGCGCTGCACTGACACCACCCTTCAACACCTC -CGCCCCTCCCACGACTCCCGTTCCATCGAGCTCGGTGTTGTTGTTGATGACATCGCCATC -AATGCCGCCCCATGTAAACATGGCTTCTACCTCGTCTACGATATCGGCCAAGACATCTTC -GGCGATCTGATCGTTGAATCGTTCACCAATGGCCCGGACTGGCTCTCCGGTTTCAGTCTT -CTCGGGTGGCCCAACCCACCATGGCCCCTCAAATCGGCATTCGACGGCACCACCGCCGTA -GCAGCTCACCGCCGGATAATAGCCGATCAAGCCGTCGTCGGCGTTTTCACGCTCGCCGAT -ACCTAAATTGTTTGTACCATTGGCGAGACGGCTCGCAGGCGGGAGGAAGGCGTAAAGTTC -TTTGAAGGGGGTGCCCATCTTTACACCGTTCTTGAAAATGGTAATGCTAGACCCCGGGAG -TGTGCGAAGCGTGGCGTCTTCGCTGTTGAAAGGTGACGGGGGACCAAACGTCGGTGTTTC -CTTCAGGTTGAAAGCGTAGTCACGGAGGTGTTTTGTAGAGAAGACATTGGATTGTTGCCA -GCAGAAATCCGATTTATAGTGGAAGGGAATCCGATCTCGGATGATGTTGGTTGCTACCGG -TTGCGATGGACTATTATCAGCGGAATCGTAAGTGGCCTCGACAATTTTCTTGTGAAGAGA -AAGAGGCGGAAGAGTGATAAGCATGCCAATAACATCGCCCTCGTTTATAGCTTCCCCCTT -CGGAAAGAAAAACTCGCATCGCATGCGATTGACAACTTCGCCGTTCACATCTCGAATACC -GTACCCGTAGCAATCCACTCCAACATTTACGTCGAGGTCGGCCTCACGACGGGCGAATCC -GATGCGGACATGGCCACGAGGTGACGTTGTCGGTCTGCCGTTCTGAGGTGCCTGGGGATC -ATTGACTATGCCACTAATCACACGCGCTTCGTAATAATATGTGCCCTCTCGAGCACATAC -ATTGGCACGGGCGGTATGCCACGCATCACTGGTAGTTACCGCAGTCGCGCTCTCTGAAAA -TGATATGGCAGCTGGTGAATCCTCGAAACTGAAGCGGGTATGGTACGGGCGAACATCGGT -CTGTCGATATTTGATGTGAGGGAATCCGGGGTCGGCAATGGCATAGCTGTAACGAAACCC -ATTCTTATTGGTGAGTTTATCGACCACCTCGTAGAAATCCCAGCTCTGGCCCAAAACGTC -CGACGTCTTTCGGCTCACGCGATTCAATTGGCGAGGTTGTGGAGGGTACCGATCCGAGGC -GTGCGGCTCGGCGAGCAACAATGGACGAAGAATGCTCAGTTTTGAGCTGGGCAGGGCGTG -GCTCGGTGCAGCTGTAGCTCGCTTTTTCGGCGGTGGTGCGCTCTCGGTGTCCAGTCCTTT -GGCTTCACGCTTCTTGCGACTATCTCGCTTGTTACGCTTGACTCGACCATCGCCTTGGGC -GGACGGGGCGGGTGAAGATCTTGGATTCAAGTCTGTGAATTGTGCATTGAGAAGAGGTCC -ACCGTTAGGAGGTAGGATCGGGGAGTTGATGTTGGACGACGGAGCGGAAGATCCGGGGGC -TTGGCCGGATGCCATGATGGAGGCCCTTTTGACCGCTGGCGTTGTGATCTACCGACTAGC -CTGTCGATCTAGATAAATTCAGCAATTTGGGCAAGATATAAGCCAATGAGTATGGAACTA -CAGCATTGTGCTTCTGCGGAGTTAGAAGTCCTTACACAATTTGAAGCTTGTCTGTATGCT -GTCAAGCTGTTCCGGCGTGTATATACACTGGAGAACCGAAGAGTTGGCAACCTGCAGCTT -CCCCAATCAAATTATTATAGTATGCCAATGGCTCCTATATTCCTTAACAGGCAGGAAACT -TCTCTTTAAAAGTCAATCACACGCGCGCCGAATGAGAATCTCCTTGCGAAACGGAGGCGG -TCAGTGTTTAATTGCAGAAAATGCGTGAAGGTCAGGGCGGTATGTGGTACAGGCGGTGAG -GGGATTGGGGGGCACCGCCTGGCAGAAAAAAAAATATGACCACACAATTTGATCGTCGTG -TGTACTTCGATAGGCCAGCCCAAACTACTCATTTCAGTTTCTTCGGTTTACCTGAAGATT -TACTTGCCATGCCAAAGGTATCAAGCTCTCGCGCCGCTGCGGCAGCACGCAGACACAACC -CTCTCGCTGAAGATATTACTTCCGCGGGGCACCTTCGAACACAGAGCAGCAAGAAAGGCA -AAGGCAAAGCCGACGAAAACGATGAGGATGGCGAAAATGGACAGCGTTACGTCGATGCGA -AGATGTCGCGTAAGATTCTACAGATTGGACAGGAATTGGCCGACGAGGATGCCGCCGAGG -AACTCAAGAACTCAGCACGGCCCCAGACGAATGCATTTGAATTCGATACCCGATTCGAGG -ATGAGGAGGCTTTCTCCGACGATGAGGGGAAGTTTGAGGCCGATGATTGGGTTGATGATG -ACGTCGAGCAAGTTGTAAGACTCCTGAATTTCTGAACAATGAAATTGCTTCTAACATTTT -ACAAAGGAGGTCGACCCCAATGATCTAGATATGTTCAACAAGTTCATGCCTCACGAGGAG -GACCCTATATTCCACCCCAAGGATCCTAGCTCCGCTGGCCCCACCAACCTTGCCGATTTG -ATTCTCGAGAAGATCGCCGAGCACGAGGCAAAGCAGGCTGGCGAGGGTCCCTATGGTCCA -TACATCCAAGGTGGTGGTATCCCCGAGGATGCTGTCCAGATTCCTGCCAAGGCCGTAGAA -GTATATGAGAAGTATGTTTTGCGACAATTCCTTGTGAGGCCAAGCTAACTTGTCCTAGGG -TCGGCATGATTCTTTCACGCTACAAGTCTGGCCCCCTCCCCAAGCCTATCAAGATCCTTC -CCGCTGTACCGAACTGGCAGACTATTCTGGATATCACTCGCCCTGAGTCATGGACCGGCA -ACGCCGTCTACGCCGTAACCCGTATTTTCATTTCTTCCAAGCCTGCGGTTGCCCAGGAAT -TCATCAATATTGTGCTTTTGGACCGAGTGCGCAACGAAATCCACGAAACCAAGAAGTTGA -ATATTCACACCTACAATGCGTTGCGCAAGGCCCTGTACAAGCCCGCATGCTTCTTCAAGG -GACTTTTGTTCCCGCTCGTGTCCACCGGCACCTGCACCCTGCGTGAAGCCCACATTGTCT -CTTCTGTGGTTGCCCGTGTCTCCGTTCCCGTGCTGCACTCTGCAGCCGCCCTGCTTCGCA -TGTGTGATCTGGCCGCGGAACAGTCATTGACCTCCATTGAAAGCACTGGTGCTGTCAACA -TGTTCATCCGTGTCTTCCTAGAGAAGAAGTATGCTCTGCCGTACAAGGTCATTGACGCCT -TGGTATTCCACTTCATGCGCTTCCGCGCTGTGGACCCTTCGGAGGATGCTATGAACGACG -GGCCTTCAGGGCTCGGTTCCAAGGCGTACAAGCTGCCAGTTCTCTGGCACCAGTCCCTTC -TGGTCTTTGCCCAGCGTTATCGTAACGATATCACTGAGGACCAGCGTGAAGCGCTTCTCG -ATTTGCTTCTCGTCCGTGGACACAAGGATATTGGACCTGAAGTACGACGCGAACTGCTTG -CTGGCCGTGGACGTGGTGTGGTGGTCCCTGACCCGGAGGCCCAGAATGCCATCGATGCTG -GCGACGACACGATGGATGTCACCATGTAGATGAGATCAATTGATCATAGCCCCGCACATG -TTCGCTTACGATCTTGTCATGAGCACGTTTCTATGGGATGGCGTTTTTTTCTTTGTTTGA -GGGACTGCATATAATTCCCCTGGTGTCGAAGGTTTCCAAAATGGGATGTCTAAACCGATC -ACTAAATTTAGTGTATTGAATTTGAGGTAGACATATATCGTTATCTTTTCACAAGAAACC -TCCATAGTCCATATGCAGCAAATGGTAGGAGTCACCCCCTGTAAACCGAGGTCAACCCTA -ACGACGCCCCGTTTCAACCCCTCGACCTAGACTGCATGATTCCGTCGTGTTCGAAGTGGT -TCTGTAACATCCCCAGAATGATCTCCGCCAATCTAAATGGCACACTGAACCCCTCCATGC -GAGTATGGAGTACGGAGGACGGAGTACATAATCACACAGTGACATTCTCAGACTTAGGGG -CCACGGAGAATTAGGGGGCGGGGCAGCCCTTGGCTGATTGGCGTTCGAACGGAGCATTCT -CCGGGCAATGCTCCGTCTTAATCGCCTCATAATCAGGAGAAAAGGTATCAGGGCCAGGTC -CACCCTAGAAAGTAGATTGTTCGAGACTCTGGTATTAAATCCGCATCCACACTATGACTT -GGATGGATCGGAATGACTGGATCTGTCATAAGTACTCCGTACTACCCGAGATATCAGAAG -CTGAAGTAATACTCCAAGTGTACTCCGTAGAGCCGATATCCCAATTATAGAAATCCAGGT -ATTGTAAGGCAATCAAGTGGGCAAATCCCCCGGTGAGGGATGTTTCAAGCAACGGATCAT -AATTGGTTGCCGAGATGGGGCCGACGGAATGCAGGGGGAAATTAACGTAATTTACAGGTC -TCGTTACAGGAGACTCAGCCCCTTTTGAGGTACTCGGTGGCACATGGTAAATTGTATGGT -GCAGCATGTACCTGACGTACGTGTGCGCCCAGGTTCCAAGGTTGCCCCCCCTTTGTTCCA -TTCCCCTAGCCCATCCCTTTCAGACCACCGTGGCCTCTACTTAAACCTCATCTCTCCCCG -TCCATTTGCAGTTGAACAGCTGTCCTTTCCATTTCTCCCTCTTTTTTCTTTGCTGACTCA -CCCTCTGACTCAATCTTCCCCGGAGTCTCTGGTCTCCGGTGCCGATCGTGATGATAACGT -GGCCTCTGTGATTTGAACCCTCTTCACCTCCAGTCATTTACGTCTATATACAATTGTTAA -ATCACGTCCGGCACCGGTTATCCATTTGACCCTGTCACCATGGCGGATACTATCAACCCG -ATGGATACTCTCCCCGCATCACCATCTCCAGAGATGTACACAGTTTCCCCCGCGGATACA -TCATTAGACTCTCCGGAGCCTGAGGATGAAATCAaagaagaagatgatgagaagaagcca -acaaagaagagaaAGTCTTGGGGACAAGAACTCCCAACCCCAAAAACAAACCTCCCTCCT -AGGTATGTTATTAATCCTGGCTACAAACGTGGAACCCCCGCTGACTCTTCTAGAAAACGC -GCCAAGACAGATGATGAAAAGGAGCAGCGTCGGATCGAACGGGTCCTTCGAAACCGTGCT -GCTGCTCAAACATCACGCGAGCGTAAGCGCCTCGAAATGGAGAAACTCGAGACCGAAAAG -ATCCGAATGGAGCAACAAAACCAGTTCCTGATCCAGCGTCTTTCTCAGATGGAGACAGAG -AACAACCGCTTGAGTCAACAAGTGGCCAAGCTCTCTGCCGAGGTCCGTGGATCTCGCAGC -GTGACTCCCAAAGCCAGCTCTCCCGCCATTGAATCTCCTACCCTCACGCCTACGCTCTTC -AAACAAGAAGGCGACGAACTCCCTATGGAACGGATTCCTTTCCCCACTCCCTCCGTTACC -GACTACTCTCCTACCCTCAAGCCTTCCACTCTGGCTGAGGCCTCCGACGTGACACAACAT -CCTGCAGCGGTGTTGTGTGAATGTGACCTGCAGTGTCCGTCGCCGGCCTCGAAGGATCTG -GAAGTGCCCTACCCCTCTTCGACCTCTCCGATGAACCTCAGACTGCAAATGACGTTGCAG -CTCCTCTTTCTGACGATGACTTCCGCCGCCTGTTCCACGGTGATTCACCCGCTGAGCCAA -ATTCTTCTTTCCCTGAAGACGGGTTTGCCTTTGACGTTCTCGACGGAGGAGATCTATCAG -CATTTCCCTTTGATTCTATGGTTGATTTCGACCCCGAGTCTGTCGTCCTCGACGGCGTCC -AATCGTCCGGTCTTTCGGATGAGACTTCTCACCAGACTACTAGCTTGCAGCCCAGCCTTG -GCGCGTCCACTTCGAGATGCGACGGGCAGAGCATTGCAGCTAGCGGTTAGTGAGACTTTC -TCCCCGCAGACCTGGTCGGCTGGCGCCTCGGAGGCTCGACCGAGTTGGGAGTCGTTATTA -ACCATGGCTTGGGCAATCGATAGTCTCAGCCTGTCAAAACGACACCAGCGGGGAAACACT -ATGAAATCCGGCCGGCAACGCAGTTATGGAAAGGTGCATCGAGGTAAACGGAGTTTTCGG -AGTTCCAGTGCGAGCAAAGCTCTTTCAGCTTTGCTCATGGGCAAACAGCGCTGAGGGTTC -CATTTTTTGGCCTGTATACTATATAGTTCTTATAATGCAAAATCGACTGGTTCATCTTGC -AATCACTCATCGCCTGAATAGTACATGTAGTAACTCCTCTAATTCGCCACCGTAATCCTC -GGCTCAGTCAATTTAGAGACGTTGGGCAAGGTCAAATCTAGAGTTGTCCGTGATGCTAAA -ATAACACCGACGCCAAGAATGACATGAATGCGGTCAGTGACGTTGCCACGCGCCCCCAAA -TCCATCTGCCTTCCACAACACCTGTAATCCTCATCCTCCATAACATCTCTAACCGACATC -ATGGTTCGCAAAGACCCTATCTTCGAAGCCCGCACAAATGTCAAAGTAAATCACCTCCCC -GCACTCTGTTTCCCCTCCTCACAGCGTGCATCACCACTGCATGAGACCAACGCTAATCTT -ATCTCCCAGTTACACTCCAACAGACTCAAGAAAGAAGCCGTCCGAGCAGAGGCGACTTTT -AAATCCGAAAAGGCCAAAGCAGATAAAGCGATGAAAAACCACGAGTTCCAGATCGCCCGC -ATTCACGCCGCCTCTGCTGTCCGGGAGAAGCGACGCCAAGTAACGCTCAAATCCGAAGCG -GCACGCGCCGACGTCATAATCAACGAACTCAAGGCTGCCCAGAGCACCCGCGACACGTCC -CGGACGCTCGCCATGGCATCGCGGGGTCTAGACGCTGCCTCACGGAGCGTCAACCTCGAA -CACCTCGTCTCTCACGCTAACAACTTCCTCGCCCGGTCGGAGGATTTCAAGATTGCTAGC -AGTGCGATCGAGGATGTCGCACAGGGTATATCGATGCAAGAATATGGTGCGGAGGGCGAG -GCTGATGTTGATCGGCTTATGGAACAACTGGCGGATGATGCGGGTGTTGATATGCGCATG -AATCTCGAAGCGGATGCTGCGCCCAAAGAGGATGTCAAGGAACCCAAGCATGTAGATGCC -GAGGTTGAGGATGGCTTGGGTGCAAGGCTACGAGCTTTACGGGCTGCTAATTGAGCAAGT -GATTCATGCTGTCTCTCCTCTCTATGCTCCCTCTCCTACTGAGCACTCCGGATTTACGGG -AGTGTCAAATTCCCCCATATAGATTCTTCGCAATCTTTTGACCTTGCAACACTGACGATC -CACGAAGTTACGCTTGTATTTTCAATTGTTTCTATTGTGCGCTGTTCTATACGTGACTAG -GCGTATTCGAGGAAGTTTTTTTTTTTTCAACACATTGAAGCTAGTCCAAATTTTGAGTAA -TTGCCGTGGAAAAATCTAAGAAAAGACCGGTTCGGTAAAAACATGAGGCGAAAGTACTAG -GGTTCGCAACTAGATCGTAAATAGACTAGACACTCCACTTAACCCAGGCAACATACGAGA -CATCACCAATCAAACGAGAGCATCTGCCTACTTTAATGCATTCATTTCCACCTTAGTAGG -AATCGGTATAGCTAAGGCGATAAACAGCCCCCAAATGGCCAAAAAAAAAAAGGGTCTGTC -AAGAGGCGTGAGTCTTTTCATTTCTATAAAGCCATGGATGCCTCCCTGACAGTTTTCTCT -CCAAACTTCCAGGACCATTCAAGATGGCACCTTCACACTCGCTTCTTCTTTTCTTATCAT -TCGTTCTAAGCCCTTTCGTTCAGGCTGCTCCTTGGATTGTGACAGATGTCTACGAGCAAA -GGGTTACTACAGATTACTACGGCGAGCTGACCACCGAGGTCCAAAAAATCTCTCCAACTG -CAACCTTGCCATCCGAGGCCTTGTCCACCATTACTTCCACGAATACCGATTATGACTACA -CCATTGTTCAAAAGTTGTATCCCACTGGCTATGGCAAGCAGCGAGACTTATACGATGGCT -ATCGTAACGATGTGGATAGTGATGGAAACTATTTCGGCACCCTGTACAAGGTCAATTTGA -CTTATTCGGCCCCAACAGCCTGCTCTACTCAATGGACCACTACGACCGGTGTCGATTTCT -ACCCTCCCGATGAGATCGCCACCTTGCTGCCCAGAGATAATATCGAAACCACAACAATCG -TCGACGACAGTCAGGCATTCCAGCCCACCACCTACATCTATGAAATAGTGTTTGTGGAGC -CCACCCAACTTCCCACGAGCTCTCTAAACTCTCTGAGCTACTACAACGCCCCTACCTCTC -TCTATTCAGGTGTTGGATGCGAGTCCACAACCCGCACCTCTCATTACGGATACTCCGACT -ATTACAACACTGGCGAAACCGCCTCTGGGTCTGACTACACCGATTCAGATGACAATTCGG -ACGACAATAACTACAATAACTATGGTTACTATGACTACTACAACGACGACAATTGGTTCA -CCAGCGCACGGTGGTCCGGCATCGGCATCTCCTACCTCGCAGTTACATTGATCTGCGTGC -TGGGTTGGATTGGCCTAATCTTCATCCTGGGCATGATCGAGGCCTGGGTGCGATTCCGTC -GCCTCATGACGGGCTGGCAGACCCGCCGTGGTCTTCCTCTTTTCTGGGCTTTCATTTTGC -TCCCTCTTTCTCTTTTCTTCCTGTTTTGTTTCCGCAAGGGCTACCGCGCCCGCAGCAAAG -AGGACGCCGAGATTCTCAAGCAGAGATGGGACGCCATGGGTTTCTGGACCAAGATTCGCC -TGTTTTTCGCCTGGGGTTTCCGCTTCAAGTACCCAACTGTATTGGGCCCCGCGCCCGCGC -TTGTCAAGGCAAGCAAGAGGCCTACCGAGTCTGGTCCGCGCTTGCTAGATCCCTCGGCTG -CTGAGATGTCTCATGCTCATATACCTCAGGCTCAGCCGTATCCTGCTCCACAGCAATCCG -GTGTACTGCCATCAGGCCATGGCGAGGAGACTGGTCCCGTTCGCTATGCTTGATTGATGG -TTCATATGTGATCTTTTGCTATGTTGAATTCCGATCTATATTCCCTGTCACTTAATAAGT -CACGAAAAGTTCTACGGCCTTGTTGTTTTGAAACTTGTATCTAGCCAGCTACTGATCTTT -TATTAATATATGCCACTTACTCGTTTCTCAATATGCATACTACATAGCCATGCCACAAAT -GATATTCAACATCTCGAACACGTCTCTCGTGTTATGTCGAGCTAAATCGAAAATATCAGC -ATTGGAAGCCCACAATAATCACTTGGCGACTACTTAATATCTTGTCTTACTCTCGACAAG -GCCAAGATCGACATGCAACCTTGGAAGCGTTATTCCTTTTCAATCTCTTCTTTTCAATCT -CTCTATACTTCAACCGCGTCACGCCCATTTCGTCAAACAGGGGACAATTTCCTTCAGCCT -GAACTACAACAAGGGGCGAGTTTTCATGACGGCGTTTGAAGTCAGATAACCCCAGAGAGG -CGATATCACGGTAGGGCTTCTCGAGCTATGTGGTGATGATCTCTGGTCATTTCTCAATAT -GATCAGAATCCCTCTGAACCCAGGAATTTCGAACACGCATTCTCTCCATCTGTCGGCTCA -ATGTTTGATCATAACCTCCCGTGTCCTTTCCACTGAATAATTGAGCTGAGGTACTGATAA -ATGTTTCTGTGGAGGGACTGCCCGATAAAATGTTTATGATCTAGGGCCGGTAGGAGGTTG -AGTGCGAACTCACAATTTTGTCCCAGAGCTGTCATCCAGTTCTACATCCTAATGGTCCGG -ACTGCAGTATTGTTGAATACAAAGGGGTAATGAATATGGAAATTGAAGTGTCTTGTCATG -GTGCAAGACATATACATAGCCACATGAAACGAGACAACCTGATCGATATATATATGCAAC -TACAAAATACACCAACGTTTTGTCTGTCATTATCGATAAAGTAGCTATTTATATTGTTTC -ACTACGAGTTGTAAAAAAAGAATTTATATGCGTTTCTCTGATTAAACTAGGACTAACGAT -AAAAAAGAGAGCAAAAAAGAAATCTCAGGTTTGGTTTTAGGAATAGCCGAACGCGGGGGT -CGAACCCGCAGCCTTAAGATTAAGAGTCTTACGCTCTACCGATTGAGCTAGCCCGGCCGG -GCTGTTGAAGAGAGGGGCCATAGAATGCAAAATAAGGCAAAAACAATTTGGCTGATTGAG -ATACCGCATCTCCACATCAGTGCATGGCCAAGATCAAACCACTTTACCCATGGAGGTCGA -GTCGGCAGGTTCAGCCTCCAGTCCACATGGAAAACAAGCTGCATGCCTAAACTGTCGGCG -GAGCAAGATTCGCTGCAACCGCCTGACAGGCGAGTCCAGCTGCGAGAAATGTAAGCAGAC -CAATGCAGAGTGTATTGTGCCCAACCATCATCTAGGCCGACAGAAGGGTGTTAAAAAGTG -AGTCAGCCCACGTCATGATGGAATAAAGATCTAAATGAAGTCTAGTAAACGCAAGGGTCT -GGAGAAAGCGATACATCAGATCGAGCAGGCAATCAAGCGGCCCAAAGTTGACTCTAGTGA -CGGTGATGCGCAAAGGGCCATTTCAGTCTTACAGGAACTCCTTGGTCAAGTCCAGGGCCA -GTTGACACAGAATGAACATGAGAATACGAATGACCTCTCAGAGGCACCGGATTATCCGCG -CATGACCCCTCCACGCGATACTCATGCGGAGGAGAGTCTTGCTCTTGTCGATGCAGAAAA -CCCCCTCCAGCTGCTTGCCAGAGCATCAGATCTTCAGCTTTCGCCAACGGGGGTGCGGCG -TGCGCCAAGACGTCCAATGCAATTGTCAGAGGGTTCATCTTCCCTTCAAAGCGCCCCAAT -AGGAGAACCGAGTGGCAAGTCATTTTTTGTTCCCACAAAAGCAAATCTGGATGTTGGTTC -TGAACTTGACCCTGTTGAGCTGGGCTTGGTTACATTCGATGAGTCGGAGTCTTTATTTTC -ATTGTAAGTGGCCTGCTTTTTTTCAAACAAGAATATCTGGCCGCTTATGCGGACTTGTGA -TGTCTTTTCAGCGGTCTTTTCAGATTTTTCTGTTTGGATGTCTTCTTTTAAGCTACTTTC -TAGAGGCAGAATTTGTGCAAATGGGATATTTATGACTGACCGTTTCAGCTTCTACCAAAA -CCTCGCACACACTCGATGGGGCCTCGATCCTTTGATACACACCCCGTCCTTTGTGCGGTC -CCAGTCGGCTTTCCTTTTTACTTCAATTATGGCAGGAGCTGCCCTATTTTTACCATCTGC -TGCTGCTCTATCCAAAAGACTATCTAGGCATTGCAAGTGGCTCGCCAAGCGTGTGTTCAC -ACACCGCCATAGGTCTGTTGAGATTGTTCTGGCTTTCATGGTGAACGTTCCATGGATGAA -TCCTGGTGATAGACTGGGGGACGATGATACTTGCTCCTATATTGCCATGGCTCTCACTGT -TGCTCTAGACTTATCCTTGAACAAGATTGTCTCGCCATCTTCAAGCTTTCAACAAGAACT -CATGAGTCGTCTGGCACGAGCGGAGTGTATTGATGCGAAGAGGGCTTTACACATGGATGG -ATTTGATGATGTTGACCCATCATCAGAGTGGGGCCTTAGATTGTTGCGAAGACGCGAAAG -AGCTTGGATTGCGTTGTATGTTGTTGAGAGGGGGTAGGTTTGGTTCTGCTAGTAGTAAAT -CTGGATCTGACACTAATTCTTTCAGTGTTTGCCTTGCACGTGGACGAAGCTATACGGTTC -CGCCAACAACGCTCATAGAAAACTGTGATCGTTGGCACCTCTCCAATATTGCAGACCCGC -GTGATGGTTCTATGAATTCTATGGCGGTTCTTCGAAGGGACCTTGTAGGTGCCATCCCAA -GTTCCATAACAAATACCAATCCACGCACCCAATACCATACTGTTCCGCTCCATTCTAACA -TATCCAGGATGGACTTTTTCAGAAAGTCAAATCAAGCTGTGATACGTATCGCGTCGTCGA -TACCGGCTCAGAAGCTGCCCAGCTGTATGACCCCACGCTTTCGATGAATATCATTAATGT -ACTGACTATATCACAGAATTAAGGAAACCATTCAAACCTTCTACGACCGGTGGTATGCAA -CCTGGGCCTTGTCAATTGGGGAAGAAGACAGTAAGCGTAACACTTCAAATTGATTGACTT -GGAAATTAACCATTTACAGCATGCTCTTTACCCCCATATGTCGAGATCCTTGTCACGCAT -ACTCAGTTGTCCACCTACGGTGGTGTCATTAATCACCCAACAGCTCCAATTGAGGTGAAA -CGTTTCTTCCGAGCCGCTGGTCTATCGTCTGCATTGAACGTCCTACGAGCAGCCATACAG -GGTGAATCTCGACTGAAATCAATGCCAAACAATACCGTCATCATGATCTCCTTTGCCGCG -TGCTCTGCTCTCAGTCTCAGCGTGACACCTGGCGATAGCAGATCCAGCCTGGCACCGAGC -GTCCGAAATCTCATCGAAGAGACTGCCGGCGTCCTAGAGAGAATAGGATCAACACCAAGT -CATCGAAGCGGCGCCTCTGTGCTTTACGGAAGATTCTTGCGTGAGTTGATCCGACGTGCA -CCGGCCTTGCCTTTACAATCCCGAGGAAATCCATCTAAAGTCGATGCACCCGAGCCGACA -TTTCCGTCTGCATACCTGGATCATAGTGCGGTTCCCGTAACACTACCGCCCGAGGGTCTC -TGGTCTGAGCCATTGCAGTTCTCTGCTATGTCTGATAATCAGATCGTCGATGCAGTCAAT -CGGGCAGGTACCGCGTTTGGTGCGTCGATCCCAGATGTGCCTCTTGATGACATGCTCAAT -TGGGATTGGCTTGATTTTGCCAACCCGGATTTCAATTTTTAGCAGGACCGGAAGGCGGAT -ATTTGTATACCCCAATGTTTATCTTCCTCGTATTTATTTGATTGAAACTCGTTACATATT -CCTATTACCAGCTTGCATGTATCCCCCGGGATTTTCTTCATACCTTTTTGTCGAGTATTC -TAAATGCAAGGCACCGATTGCCCGAGTTTCAGGCATATCCCTACGCCACAGGCCCCGATG -TATCCCCGTAGTAATGATACAAGCATAGCCATGACCCTTGACTGGAGTGCCCTGGCTGCT -GGCCAGGCTGGAAGTGCGGTGCTAGGCAAAACTGCTGCCCGCTGCGGACGCGAGCATATA -TATCGCGTGTATAGCAGCCAGAATAGAATAATAACAGGGTAATTATACGTGGGAAGTTAC -ATTGTCGACTACAATTGTCAACTATCAAAAGAGTTTGAGGCTCAATCGGAGACGTCTAAC -GAAGGCGACGATATGCCGACGGTTGATACGGAGTTGTGAAGTTTTTTTTTGAATTTTTTT -TTCCAAAAACCTCTTCCCAAAAGAACAAGATTGAAGTACTCCAGTTTTTTACGTGAAGCT -GTGGAGTTTCATTTGGAATTTATAGAAGAGTATGTAAGTAAAGATCAGAAGAGAAGATGT -TGAGCTCGGATGGGTTCCGAGTGTGGCCGCTCGGCATCCAGAAGTAGCCCCGACTAACCG -ATGCCGTGAATGTTTTAACTGCGCGATGATTTCATTTCATTTTGTGTTTTGTTTTTTTTT -TCCCCTGGTCTAAACCTTAATGAAACATGGCAGCTCTAAGAATTTCCACTACTTTGTCGC -GCAAAGCGCTCACTGCCTGTCGACCTCGACTCTTCCTCGGAGTGCGCGCCAGCGTGCGCC -GCTCGCTAGAATTcagcaccagtaccagcaccagatactccgtaaccgcgccattacAAC -CTCTCGCTCCTAAGATCGAGCGCGGTGGCTCAAAGGTATACAAAGATGCCGATACGGCCG -TCGCCGACATCAAGAGTGGATCGACTATCCTTAGCTCTGGGTTTGGTCTTTGCGGTGTTG -CAGGTATGTGGATAGGTGACCAGTGTGGGAAAAGTGAAGCTCATATAGAATATAGAAACC -CTAATCAATGCTATGCACCGCCGAGGCGCTGATCAATTGCACTCTCTGACGGCTATTTCG -AACAATGCCGGTTCCGCGGGCAAGGGAGGTCTTTCAACCCTCTCGCAGAATGGCCAGATT -AAGCGTCTCATCCTTTCATATCTCGGAAACAATAAAGCACTGGAGAAGAAATATCTGACG -GGACATATTGCTATTGAGCTTTGTCCCCAAGGTACTTTGGCGGAACGTCTCCGCGCTGGG -GGTGCTGGAATTCCAGCTTTCTTTACTCCAACGGGAGTCCGTATGTATAGAACCTTCTCA -GGCTATGTTTATCGCTGATAACCGAAGATACCTTTATCCAGGAGGGAAAGATCCCTGTTC -GCATGGATGCGTCTGGTAAAGTTCTTGAGACTGGAAAGCCGCGCGAGACCCGAGAATTCA -ACGGCAAGACATACCTGATGGAAGAAGCTTTGACCGGGGATGTAGCTATCCTCAGAGCCT -GGAAAGTGGATGAAGCTGGCAACTGTGTGTTTAGGTGAGTATATGCGTCTACCAGTATGC -CCCAGAATCTGGATGTTGATATTCTTCAGATATACTACTAAAGCATTTGGACCTATCATG -GCCAAGGCGGCAACCCTGACCATTGTGGAGGCGGAGAACATTGTCCCAGTTGGTTCCATT -GATCCCAATGACGTGGACCTGCCAGGTATCTTCGTGGATCGGATCGTTCCTGCCACAGAT -GACAAGCACATTGAAAATAGAAAGTTGCGTTCGAGCCAGGCTACTGTGACCGGGCCTTCT -AAAGATGCGGCTCAGATCCAAAGAGAACTGATAGGTCGCCGAGCGGCTAAGGAGCTTAAA -CCGGGATTTTATGTGAATTTGGGCGTTGGTATTCCTACACTGGCACCGTCATTCCTGCCG -AAAAATGTCAAGGTCTGGATTCAGTCGGAGAATGGAATCTTGGGAATGGTAAGTTTCTAA -CGTTTCCCCGCGGGCCGTTCTAACAAGATATAGGGTGATTACCCCACGGAGCAAGAATTG -GATCCGTAAGTCGTTCTCAAATTTGTTGATATGAGTTCTCGCTAATTCAAAGCAGCGATA -TCATCAATGCCGGAAAAGAAACCGTGACCTTGGTTCCTGGAGCTGCTACATTCGACAGCC -CCGAATCCTTTGGCATGATCCGCGGAGGTCATGTAGATGTGTCTATTCTCGGCGCTCTGC -AAGTAAGTGCCAACGGCGATCTGGCCAACTATATGATCCCTGGGAAGGTGTTCAAGGGTA -TGGGAGGAGCAATGGATTTGATCTCGAACCCGGATAAGACCAAGATCGTGGTGGCCACGA -GTCATGTCGCCAAGGACGGATCTCCCAAGATCGTGCAAAAGTGCAGCCTGCCCTTGACTG -GCGCCAATGTTGTCAGCACTATTATCACAGATCTGGTAAGTGCGAAAAGAGCCTTGATTG -AAGCCAAACTGACCCGTACAGTGTGTGTTCCAGGTGGATCGGGCTACCGGAGAGCTCACA -CTGACGGAGCTTGCTCCGGGCGTTGAAGTGGAAGAAGTACAAACGAAGACGGATGCCAAG -TTTAAAATTGCCGATACCCTTGAGATTATGGAATAAAGAGGACCGACGTCCCCCGAATAT -GCCAAGAGTTAAAACAAAGGGACCTCGCTATAAATAACCAGAAAATGACTAGAAACTAAC -CAGAAAATCACTTTTTACTTATCTACCCTCTGTACTTCCGGAAAACCACCCCATAATTAG -GCAGAAGTTTTTCTTTTCTTTTTTTCCCCGTAGGCTCAGGATTTTTTCGGCCCCCCCAAA -AACAAGCTCACCCGATAAATTAAGCTTACCTTGCCCAGCCCTTGGAACTTCCCATCTTTT -TCTTCATTTCACCATGGCACTTCATCCATCTACTCTGGAGTCAATCCCAGAGAATGAAGC -AATCATCTTAGTCTCTGACTACGAAGTAGCATTCAACCTCCAGTCCAAATCATCTTTCTC -CATTTGGAAATGCTCTCACCCGGACTGCATTGCATACCCATATGCCAAGAGAGCCATTTA -CTGCCGAATAGTGCAGCGACCTTTCTGGGCACTGATTTTGATGATGTGTGTTACTGTTTA -CATTTCAGTCTGATTTCTGGTTTTTTGTGTCCACACTTGACATGTGATGAACTAACTTTG -AGACAATTTCTCACTAGTGGCATCCTCAATCTTCTCACTATTGCGTTGCAAGGCCTTGGT -TATATCATTCGACACGATCGCGGGTTGGGTATCTTCCCCGGTGAACGTCCTAGGTTTTCA -ACGTGGTTTACCCACGGTGTCGACCCCATACCATGTCACTCGCACAATGACTATTGGCGC -CGTGTTCCACTACGATCGGCACTGCGTGCAGGATGTACCAGCGTCGAGGTTGACGTCTGG -CCATGGGGAAATGACATTCTAGTTGGTCATTCGCGTGTCACAGTCCTTCGGGGAACATTG -CAGAGCTTGTATTTTGACCCCCTGCTGGAGATGCTCGACACACACAATGCACCTTCTCGT -GATTGGCCGAAAGTCATGGATCAGAAGATAGTTGGGGTATTTTCCAACGATCCAGAACAG -ACTCTGACTTTGATGATTGATTTCAAAACCGACGCCGAGCAGCTCTGGCCTCTACTTGTC -GAACAGCTGAGCCCGTTTCGTGAAAAGGGCTACCTGACTCACTTCAATGGCTCGGATGTT -GTATACGGCCCAATTACTATTGTTGCCAGTGGAGACGCGCCTTTTCACCTGATACTGGAG -GATGCGACCTACCGCGACATCTTCTTTGATGCCCCTCTTGACAGATTGACATTTCCGACC -GAAATGGCGGCAGATGACAGCAACCCAATGGATTGCACCTATAATCCATCCAACAGTTAC -TATGCATCAGCAGACTTCCGCACGGCTATCGGGTCTCTTTCTCTTAACCGTCTATCGGAT -GTTCAGCTCGCTACCCTTCGGAGTCAAATACATGCAGCCCACGAATTAGGATTGAAGGTA -CGATACTGGGGAACTCCCACCTGGCCGGCGGGATTACGGAACCATGTCTGGAGTGTACTT -GTGCGTGAGGGTGTGGACGTGATAAATACAGATGATCTGCGTGGTGCAACTAAGCAGGAC -TGGAAGTCTCACCACTGGTGGAGTTGGTAGCTATAATAAACTTTCAGCAGAATCATATCT -CGAATCTCAAGTGACGCGTAGGCTTGCCATATCCTCAAAAAACCGAAATTAATATTTGTA -AAGCAGGTTAATTATGTTTACTAGATTTCTTGGTCATCACTCATAGGACCATCATGAACT -CCGGGTTACGGTCATCCCTATACCCTACGTATTGATCGTACTTGCCCTTCTTTTTGTCTC -GATTGATCTTTGCGCAGTAGAGCACATTAAGCAAAATCCTATGCAAATCAGCGATAGCTC -TTCCACCAATACTGATAATCAATATACTCACATAAACCATGCAAATACTAACAGTCCAAG -GATAATGGTATGTCCACGGTGGAAGAGTGGCGCATCTCGAGATGGATAGTTGAAAGCTAT -AGTACAGACATTAAAGGTCTGTATAAAGATAAAAAGCTCATTATATGGGCAACTTACTTG -AAACAAATCCTCCTGCATTTGCAATTGCTAGCTGCAATGCTGATGTAGTAGCTCTTTTAT -AATGCCCCGCAGAGTTGTTCGACAACCACACTAGCATGCACGGTACACTGGCATATTGAC -CGGTCGCCATGAGGAAGGTCATACCATATTGAATACGAGGGGAGTCAATGTTGGCGATAG -CTGCATATCCAATGATAGCAATTGGGAGAGTAAAGAGCATTACCACGCCTCGGAGGCGAA -GACGATCGGAAAGGACCGCAACAATAACTAGAAATGGGTGAGCATTTGATGAAATTGACC -AACGAGTATACCAACCTGTAACCACGGCTGCGACAGCATACGGTATCACAGACCATAACT -GAACCTCATTGGCATCTTTGGCAAAATTGAGGTTTTTGATGATTGTTGGCAGCTATAAAT -CGTTAGTGACTGTGGAACGAATATAGGAATAGGGGACCCATACAAACAAGCCAAAAGAGT -ATAGTCCAGCGAGAATGGCAAAATACGCAGAAGCAGAGAGCCACACTTGTAGATCCAACA -CACCTCGGCGTACTTCAGACCATTTAAACGATTCTGCCTCAGGCGACCTGTTTTGCGTTA -GCCAAGCTTTTATCCTGTGGGTCGGAAAAGCTTACCCTTCAGGGGATCCGGGACTATCAA -GCCTCAGCCGCTTCCCGCCGAAGTCCTTTTCTTCTGCACTTAAAAAAGATGCAGATTCCA -AGCTATTTGGAAGAAAGAAGAAACTAAACACCCCACAGACAAATGTCTGATTAAAAGTCA -GACAAGATATTTGTTTTAATATCACTCTCACATACCAAAAGTCCCTCGATAATCAAAATC -CAGCGCCAACCTTCTATGCCTCCACGAGGCCCAATTTCGGCAAGCCCGCGAGCAAGAAGT -CCTAATTTGTTAGCAACTGCTGGAAAAAGAAACCGATCCATACACACCTCCAAATGCGCC -CGACAAGGAAGCAGCTGTGTAGAATAAGCCAATCCGCAGGGCCAAATCGCCTCGCCTATA -GAAAGAAGATAGATATAAAACCTTTTTGATTTGTTAGTTCGGGCCAGTGTCAATTCCCTT -GCGAACATACCATGCCAGGGAGAAGACCCCCCTCCGCAATACCCAACAAGGCCCGGACGG -CCACAAAACCAGCAAAGTTCTCCACAAATCCTAGACACATGGTAATGATGCCCCAAATAG -CAGTTAAGAATGGCAGCCAAATCTTCGGTGATGCTTTCTTGAGCACCAGATTACTCGGCA -GCTCGCTTATATATCAATTAGCCTTTGTATCTTGGTTCGATGATACAGGACGTACCTGCA -AATGTAGGTGAGATAGAACACAGCTAGGCCGATATCATACTGGTGGTCTGTGATAGAAAG -GTCCGCTTCTAAACCGAGGATCTTCGCGTTGCCGACATTGGTCCTGTCCAAAAATGACAT -GAGGAAAAGCAGAGCTAAAATGGGAAGTATACTGGAGAGTATCAGCAACCGCGCGATCAT -TATGGTGTGATGACTGAACCGACTGTATATCTAGCTTCCAAAGCACCTTCCGTGTTAATG -TTTGCTGGTGTGCGAGGTCGCCATATCGATCAAGGGGAGAGGACTTTGCTTCTTCGAGCT -CTGATGCTCTTGACTTGGGAGGGTCCATGATGGCTTGTGTAGAGTGTATCTTAGTGACGG -TGGAATTGATTCCTATCTGTCGCAGCATAGGGGAAAAGAGGACAATAATGAGAACAAATG -GGCCATAAATCAGCAGGGGGAGCCCCCTCGGCTGGCATTTCTGGGTCCGAGCTCCTCTCC -AGCCTCAGATTTAGACCAATGGCAGAGCTTCCCTAAGCTCGCGTCCGGGTGCTCTATTTC -CCCGCGATCCCCGGATTTTCACGTGTACCCCACTGGTAATATTACGAGAATAATTTGCCC -ATATTACTGTCAGATTACTGTCCCCTCCCGGTAATTTCGGTGTTCACTTCTGAGCCCCGG -TTAGTCCCAAATCTCCCGAGACCGGGAAAGAGAATTGGTCAACAATCTTTAATGGCAAGA -AATCCACACCCAACGCATAAGGGCTCTTCATCCGACATGTCTCGACTTTCACCCAGCCTC -AAGGCATTAATCAATGCTCCTGCCGCCCGGCCATCCACCGTACCTGCGCCAGCGAACATA -ACCTCGGTCTACCAAAAGATCCAACAGACCGCGCAATCCAAACAAATCTCCCAGCCATCA -TGGGTCGCACTTTCAGTGAGTCAATTCCTTTACTTAAGATTATAATTGATCTAACATCAA -TTCCAGACCGCAGCAACAATGACCATGAATTCTCCGGAGTCCCTCGCCGCCCTCTACGAA -CTAGCCAGCACCAACTCCGACAAGCAAGTCGACACCGCCGAGCTCATGCGCGAGGTTGGA -CTAAAATGCATCAGCTTCAACGGTATTCCTCGCACTATAAATTGCCTCAACACCTTCAAA -GCCAGCCTCCCGGACTCAGTCAGCGATCGGCTATCCCGCACCCCCACCCGCGCACCAACC -CCGGAGAATATTGCCGCGATCAGTGAGCGCGGCCACGCGCTTTGGGATTCTATCTATCGA -CCATTCGAGAAGAAGCTTTACAATAAACTGGCTGACTCACACCCGGACCTGCCGGTCCAC -ATCCTGCACAGCAACTACGGTGCCCTGTTATCTGATCCGGAACGTACCACTGGCGCGTCT -GCCGGGCGTGTTCTGACGTCTATCGTTGCTGTCGCATGCTTGCGCGCTCAGTCTGGCGTT -GGGCCCCAGGTTATCTCGCATGTGTTTGGGTTGCGGAAGGCGCTTGAGGATGGCTCGTGG -GCGAATGATGTGGAGGGGGAAGAGGCTGCACGATGGTTGGCTAGTGATGAGGGGAATACG -TGGATTTTGAATAGTGTCGATGCCATCGTTGAGGCTATTAGCCAAGGGACTGGATCGAAT -TTCGCTCCTGCTCGGGCGAAGCTGTAAATATATAAGGGATATATACTACATATCTTCAAT -AGTACATATGAGTGAGGTTGAAAACCCGAGGCCTCTCTTGACGGAGCTACTCCATACATA -GGAACAGCAATGTGCTAGTTGGTACTTTGGCTCTTATAGCTTGTGCTGGAACACCTGAAT -CATAGTTAGATCCATAATACTATTACGCACCCTCATTTTGGATGTCGTCTCTAGTAGAGC -CCAAATTATGGGTGAATGTGTATCGTTAAAAATTCTCACTACCGCCTAAATCGATGAGAT -TCGGTCTCAGAACCTGACAAAACCTTCAATTGGGCTTCCACTTGAACAGCCGGCTCGTCG -TCCGGCCATACACACCCATACGCCCTATCTTTGAAAGAGAAGGTAGCACATTCAACCTTG -TCGTCAAAACAGATGCCCCGGATGAATCCCAGCCGGGAAGAACGATAAGCAGTATCCTTG -AATGCCTGCATTTTGGCGAGGAGATTAACGTTAAAGGAACCGTCGGCGAAATCAAGTATA -TCGGCCAAGTCAAGTTCAAGATTGACGACAAAGGAAATTCTTCAGAGGTATTGCTTTGGG -TCCGAAGAGGGGAGTGTTGCACTGCTTTGTGGGCCACTGACTATGATACAGTGGGCTGTG -CTTCTGACGCTTCAGGGCATGCGGTACGAGGTTAAAAACCTGTTTAGGTTTTAGTTTTTG -ACATGACACCGGATCAAAGACTCTATCGCGTTGCATCAGTAGCGCCGATGAAGGTTATAA -TTGCCACATCCATGGAATCACCACAGTGGCCGAGCTTTTGCTAGCCGCTCAGGCAGTAGA -TGCAGTGGTTTTCCAGTGGCTCTCCATCGAAGTCGTCCATCATGTCTCAATAAGCATTAC -ACACGTTCGGAACTCAATCTAGACTCATCTTGTTTTGTTTCAAGTCTTGATGGCCTACTT -GCACGGATCACATCACATAAGAGAGATGCGCTTTCTCCCAGCCCACCATGTATTGTCCAA -CCGTGCGTTGATTGGCATAGCATATGTGCCCGTAAGGTCACTCCTATGGAAAGATTGATC -GACGGTATGGGGAAAATGTCCAAGCAGACCGCATATTTCAAGATCGATGAGATCTCCAGC -GACTTTGGATAACAGACCGCTTATCTGGTCGAAGAGTATTGTGTAGGGGCCAAGGTAGAC -TGCAGCTTTGTCTTGCTCCGTGACCACCTTGGAAGATCCCATTGAGGTGCTTAATAATTT -CGCGCACATGGTGTAGCCATCGCGAGAAAAAAATAAGTGATTACAGTACTTTTCTCTCTT -TCAATGGAAAGGTTCAATGGAACAAGTGGACAATTAAATTATTGGGGACATTATCCAATG -AGAGAGAGTTGTGGCATACTTCCAGGGCGAGGCGACCATACACCGGCGCTCATTCTTCTT -GGGCAGCAATTGATTCGAGATAAAGCTGCATGTCAGTCTAATGAACATCCGAGGGTAATT -GACACCTTGGTCAAAAGTCAGGCAACATAAGACCCCCCCTGTTAGTATTTGCGGGAAAAG -AACCCACACTGGCGTTCTTCATTTCGTGATCAGAATCCCCGCACCAGGATCCAGTGAGCT -GGCCTCACATCTAATAGCGTGCAGACCTTTCCACCAATCGCCTTGATACTCCTGTGACTG -TGCAGTGTAAGCTGCATAATATAGTCAGAAGATACCATTGGCACCACTTCGATTGCTTTA -TCACAAAAAGTTGAACACCCGTTTGATCCGCTCGTGCATACCCTACGTGTTAGTGGCGTG -AAATAAGTGAAGCTGCCATGGACACTTAAGCTAGTGCTCATCATGTCAGTTTGATTTTTG -TCTATGAGAAATGTCTGCGGGCAATGGCACCTGCGATCCGGGCGGTCACCCAGACGCTTA -TCGAAGACACCACTTTTGTCAAGGTCGTTACTCTTGAGAACACCAAGCCATGGCCAACGT -TGAACTTGGTTACATACACAGTGACGAAGCCACACACGACTGTCATTCGGGTCATCGATA -CAGGTCCAACTCCTACGCCTGAGAGTCACCATCCAATTAAAAGCAGAGAAATATCAGACG -GAAAGAAAGGTGCAATAGCAGGGTCTATTCTCGGTGTTGTGGTCTTGCTACTGCTCCTTT -ACTATCTATATATATGTCGCTTGCAATGGGGGACATCTATTACAAGATCGGCCAAAATAT -CAGGCGGCTCGAAAGATCCTGAAGCGTCAGACCCGACACCTCCAGACCCTGCTCCTAGTG -ATAAACCACCCAGGCAAAAAAAGAGTGTAACCATATCGTCTGATCTTCCTCGATATTTCT -CTCAATATTCAGGTCCAAAGTACGCCAAGCCGATCATTACACCTCTTGTGAGAATAACAA -CGGAAATGAGCTTGCGGACTGGGCTTCAGACAAGAGAAGGAAGGTTGGGAAAACCAGTTC -GCTTCATGATTCGAGAACGGGAGAAACCGCCGAGGGTGAAGAAGCGTCGGAGGCATAGGC -GTCGAAGCAAACATAAACGGCGCGAAGATTGATGATACGCATTGATGAGGTTATCTCGAA -TCCAAATTGGAAATTTGGAATATTTGACAGGGAAAGATTGTATCTATCCATAAGGCTGTC -CGTCCCCCCATCTGCCCATACATGGCCTATATGTACATTAATCCACATCATTATACGGAG -TACAAGTCCCTCTAAATCTACGATTGCTGTTTACTTCCAGGTAGATATGATCCAACCAAA -TGGCCTCGTCAAATAAGCACTCCATGATTATCCGATAACGCCTCAAGAAGAGGATCTCAG -AACCAATTAGAAACCTGCCGACCAATTGTATATTCGAAATCTTCAACTAGTGTTAGTATG -ATCCAATGTGTCCACGACTTTTTGTTCCTTATTCCGAATGCTTTGTATAAAGACTACTAA -GCAAGTCAAAGCGATGTAAAAGGCTTAATAGGATATAGGGCGAGCATGCCTAGGACAACT -TTCCCAGCCACTCGACGAGGTGCTGCTAGCAGGTACAAATTTGGTACATCATACTCGCAA -CATTTGATAAATTATCCGATAACTTTTGAACGTACAATTCTTCTCTAACGTATAATCCAA -ATGGCCAAGAAGCCAAAAATGATAAAATTGAGACACCAAGGGTACCGACGCGCTTGTCAT -TTCAGTCTATATCGATTCGCGACGCCGGCTTTATACCTAAGAATACCTAATGGAGCTCTT -GGATTTCACGGTGACAGCTAAATCATGTCGCAAACGATGTATAAACACTATTTTGGAAAG -AATATCTAATCGAGAGAAAAGATATCAGTGAACGATGAAGTGGAAAAAGGACTCGCATGA -CCGTGGTAAGAGATACAGAGCAGAGGCTTGCTTAGGTATGTTATTGATGAACCGAACGGC -TACTCATTTTCTACCAGTCCACAGTATTGCCCTACGGCGCACACAAAATTACGCCCTGGC -TGATCATGGCTGACTAGTCCTTTGATGGGCAAGGACAAATATTTATCTTGGGGAGGTATA -TGATCCTACCTAAAGGGGATGTCGTAAATAAATGTTACACAAAGTCGTGACGAGCAATGA -CATGTGATAGGATGACCAAGGAAGGTGCATTGGAGGTCCAATCTGGGAAGGTGCATAAGG -GTATAAGCCCCCCCCTCGCTCGTCTTGATCCGTCTTCTTTTCTTTCCACTATCTCGCAGT -AACCCTCCCACCATCTCTCTGATGAGCAGTTGGCAATTTTGGTACAGAAAATTGATGATG -AACATCGATTTAATGATCGATCTCTCTGCCACATAATCGGCCAGAGTCCGAAAACAGCAA -GGCTTGATACCTAGGTAGTATCAAGTTTCATGAACTCATTTTGGTCTTGGAGACGTCCTG -GTTGAAAGACTCAGAGGTTGCCTCAGCCACCTATGGCCTAAAGTCTGATGATACGAATTG -ACAGGATTGTCTAAGCTCTAGATGAGAATATGGAATGAGCCGAAGGACAGTAAAGTGGAA -GATGTCTAGATCCTACAACTATAGAAATGCAGAGGGAATCATAAATAACAATAAGCAGAA -CATGGTTCAGGGGAAATTTGCATTGCAGTTAGCTAGTTCAATAACTATTTGATGGTAGCC -TGCATATTCAGCCTAGTTTACAACGGCCTGTTGGCGCTGTCATGTATTGTGCTTAATTTG -GGACTGCTTCTCAACTCTATGTATTTTCAAAGGTTGTTGGTTTGCCTTGAGCCAGACGTG -GTCTTTGTGTACTCCGTACATTAATCTATATCTTTATGCAAAGCCCCCTGAATATACAGT -TTTTTGCGTGGATAAGCTCCAACCAGTAAAAGTCTCGGGATCATCAATTCGGGGAGTGGA -GTCTCTCACAGTTCCCGATTTTTTCATTTCGAGTCGGACGCGAGAACGAATTCGGTACCA -GGGCCGATCCCATCAATTCAACAAAAACAAACTTTCAATTCAAACCTCGGCTATTACTAT -ATTAGGTAACTATCTAGAAACAAAAGACCTGGTCAGTCATCTTCGCGACCACTCCTGGCT -TGGCAAAACCTAGCGCAACCAAATTCTTCTTAGTGGCATTGACCATTCCAGGCGGCCCAC -ATAGCATCACCTTAGTGTCTGGCGATGGCGCAGGGAGCTGCTCCGCCAAAACCGTCTGAT -CCACATACCCAGATCCATAGGTCCAGCCTTCCGGCGCAGAGTCCAGCATATACCACAGCT -TGAAATTCTTCGGGTACTGGCGCGCGAACCGTTCAAGCTCCTCGCGCAGGAGAATGTCAG -ACTCGCTACGGTTCGCGTACACCAGACTAACCTCTGTCATATCGGTGTCGTTCTCACAGA -TTGCTCGGATAAGCTGATACATTGGTGTGATGCCTGTGCCGCCAGCCACCATACCGATCT -TACGACAGAGACCTTTTGTGTAACGCATAGAGCCCTTAGGCCCGCGGAACTCGACTTCAT -CGCCCACGGACAAGCCGGCCATATATTTCCCGCTTAATAGACCATCGGGATAGCATTTGA -TCACGAGTTCAATACGGCCCCGGTCGATGTTGTTCGAGGTAGGAGTGTACGAGCGAGTGA -CGGTGGTGCCATCGACAATGGCTCTGATGGCAACGTGTTGGCCAATGGGAAGACCTAGCA -CACCCGTGGCAGTGGGAAGGGCAAAGACGAAGCGGTAGACATTCGTTGCTAGCTCGGTCT -TTTCAACGAGTGGCAGGCGCTGATATTGTTGCGGCTCGAGGAAACCGCGTGTCAGATGCG -GATCGACACGAATCACTTTCTTCGACTTCATATGAGGTGGGAAGCGCATAAACCCAGATT -CGATCTTGGTGAATCTGCCCGCTTGCCGCGCTACCATTATTCCCACGGCCGTGGAAATAG -CAGAAGCAGCCAGAAAGCCATTGACGAAGCCTCCGTGAGGTAGTCGTAGGTTCTTCAGTG -GGTGAGGGATCAGTTGGGAGGTGGCCCTCAGGCTGTTCGTAAGATTGCTGCTTCGGCTGA -AGATGTAGAAGACTGGGATCAGTCCACCCAGAGCGCAAGCGATCGTTTTGATGGTCGCTG -ATGAGGAACAGGATTCTTCAGGTGCTTTTTGAGAGACCACGCGAACTGCCTTCGGCCGGA -CGTACTGCTGGGCATCCTTCAAAGTGCCCACCAGGAATGGCTGCATGATCTCGCGGGCGT -CTTCAGAATGCCCAACATCCTCATAGGCTGCTGTGGCATCTGTGCCGGCTACTTCAATGA -GAGCATCTGCGCCACCTGGGTGATCTTGCAGGTATTCTGTGAGATCAAAGACTGTTGCAA -AGTCAGCATCATTTTTATGATTCTTTAAAAATGTTGTTATATACCTTGTCCGTGGATGAC -CATCCAGATATCACCCTTGGTGTTATGTGCTGTCACATCTTTCAATGTATACTCTGGCAG -CTCGACATCTGCCTGCCACTGGGCCTTCAATTCTGTCGTTTGTGGAACAGACATCGTGAA -TGAATAATTTTGCAAAACCCAAAAGGACGGAACCTCAATTTAATATTGCGAAAAGTACTC -CGTACTGCAATATGAAAATGTAGCTATTATCGCGATAACCCACTCAGCACTTCTCACGTA -TGCATAAACTAGCCTATTAGCGAGCAGGAATTCAAAATGCCTCGGTATCTTGAAGAAAAA -AAACCCTATATCTTCTCGGTGGGGCGTTTCTGAGACGGGAAATTTGCGCCATGGCGTCAG -CCACGGAGAGGCCGAAACAACGTCCACTCTACTCTTGCTGCAATCTGAGGCTGTGGATCT -TTTCTGGATCCTAATCAAGAAAAATGGTTTGAGCCGAGGCTCTCAGAGATATGCATCTTC -CTTTATGAAATGAAAAAGAATCGGGACGCGCTTCGATGAATAGTGGAGTCGTATACAAAT -GGCTCATATAATTAGATCCTTATAAATGCGAGCGGGCCCTTCAAAGAGGAAGACTAGATT -ATAATTTTCATTCTCCTCTTACGATGGCTGTCAAAAATTTCTACCTGTTGGGCGAGGCTA -TCACCTCAGCTCGCCCAATCGAGGTAGAGACAACGGTCGACTACCAAGGTCTTCAGCTCC -TCATCGCTGGTCAATTCGCCATCGTTGAGCCAAATGGTAAGATTTTATCTCATTTCGACT -GGTATCAGCCTAATTCTCAACTTAGGAATTGGGTTCCAGTCAGAGGACTCGATGTTGTCC -ACTCCCACAGAGATCCTTGCCAATGAAGAGCCCATCGCCATTTCAATAGATGGCAAGGCA -GTACGGGAGATTCCTGGTCCCAAGGGACTGCCATTCGTCGGAAATTTCTTTGAGATCTAT -CCTGATCACTTAGGAAACCACCAGCGTCTGTTTGACCAGTATGGACCCATCATTCAAACC -ACCAATATGGGCCGGACAGTCTATCACACCAATGACCCCGAGTTATCGGCCATTGTTTTT -GCCGAATCTGATTTCTTTACCAAGAGAATCAACGAGGCACACCCTTTACACCCGATCAAG -AATCAACAGGCAGGTGTGTTTTTGGGTGACACCGATACACCTGAATGGCGCGCGGCGCAT -AAGTTCCTCCCACCAGCACTGGGACCGAAAGCAGTGCGCCACTACGCTCCCACTATGCAG -GAAACTGTCGAGGATGCATTTACTGTGTTCGATGAGTTAGATGAGCGCGGTGAGGCTTGG -AACGTCTACCCATATATGCTCAAGCTTGGTGCGCAGGCTGTGGGTAAACTGGTTCTCGGC -ATGGACTTTAAGCATTTCTCGGCCGTGGATGCGCCACCTCATGAACTTGTATACCGAATC -GCTGAGTCGTTGGAGCTGAATAAGAAGGTGACCGCTCGTGGTGATTGGTACGCTAAGTTG -CCTTTCGGTGACCCGCAACGTCTACGGAACGCTCGTTACCGTATCACTGAGATGGTCAAC -GAGTCAATTCAAAATGCTTCTCGCAATGGCGTCGAGGATCTCCCGTTGCAGGATGCTGCA -TTGAAGGCCTCCAACATGATTGGTATGTGCAGTAACCGGAAGCTGGAAAGCTTGAGCTAA -TATTTCGCTAGATTATATTATTCGCGCTACGGATAATAAGGGCGAGAAACTACCCAAGAC -AAGCCTGATGGAGGCACTCGTCGTAGCCACTGGCGCTGGGTTTACGACAACTAGCTCGTT -GTTATCGTGGCTGCTTTATTCGATAGTCAACTACCCCGGCATGCAGGAAAGACTCCTACA -AGAATTGATTGACAACGATATCGACGCCGATACCCCAATGACCGCTGACCTGACGGATCG -TCTGACATTCTTGGACAAATTCATCAAGGAAACCCAGCGCCGCCATAACCCCTCATATCA -GCCTGCGCGCACGGCCAAAGTTGACATGATTCTTCCAGGCGGATACAAGCTCCCTGAGGA -TTCTATCATCATCCCTGCCCTTCACCACATACACAACAATCCAGCGGTCTGGTCTAACCC -AGCACGCTTTGACCCGGATCGTTGGGATACGGCGGAAGTCAAGAACCGCCACAAGACTGC -GTATATTCCGTTCGCTGCTGGACCGCGTATGTGCATTGGGTTCAATTTTGCCTTGCAGGA -AGTCAAGGTGTTCTTGCCGAAGTTGTTGTACCGGTACAAGTTCACGAGAGAGAACGATGC -GTCGGTCGAGTATGATCCTATGTTCCAGTTGATCCGTCCTACTAATCTCTATGTGCGGGC -GGAACGACGGGTGAAATGGCCCCCTAAGACTGAGTGACAATTGTAATTGCGAAGATTTGT -ACACAATAATTTTGTTTTTATATCCAAAAATAGAGATCTCCCAAAAAAGTTTAGCTCCGG -CCCGAACACCGAGTATTTACCCCGAGGTCGGAGTTATGAGGTGATCGGTGATTGGAGTTT -CAACTTTCTTCCTTCAACCATCTGATAATCTGATAATGAACAATCAATCGCGACCAACGC -GTTACATGGCTCATGTCCTCGAAGAGCTGGGCCTAACCACCATGTGGCGGTCGTCGCTCG -ACGTGAAACTTCTCTGCGCGCAACGCTTCGTGCGCCTTTTCGCGTACGGTGGCTCAACTT -TGATTTTGGCATCGTATCTGTCGGCTATGGGGATTTCAGATGATCGCATTGGCTTGTTTA -TGACCCTGACGCTGGTTGGGGATGTGGTAATTAGCTTCTTCCTCACCCTATATGCCGATC -GTATGGGTCGCAAGGCGGTGTTGTCGCTCGGATCGATCCTCATGGCTGGCAGCGGAGTCA -TTTTCGCGCTATTTGGAAATTTTTGGATTTTACTGGCAGCTGCAGTCTTTGGTGTCATCA -GCCCGAGGTTCGTTTTACTTTTCCTTTCTATTTTCTGCTTTGATCTGACCTAATCTCCTA -GTGGAAATGAAATTGGTCCGTTCCGCGCCGTTGAGGAATCGACTTTAGCTCATCTCACGC -CGCATGAGCTTTTGAGTGATGTTTTTGCCTGGTATTCCTTAATCGGAACTGCAGGCTCAG -CCACAGGCATGCTTGTCTGTGGTTGGATTATCAATTCTCTAGAGTCAATTCATGGATGGG -CCTTTATTCCCGCTTGTCGCATCATTTTCTTCGTCTATGCCGGCGTTGGAGTCGTGAAGT -TAATCCTCACTTTGGGTCTGAGTGGTGAAGTCGAGGTCCAGAAGAAAGAACCACAGGAGC -AGAGCTCCGAAACCCAACCGCTGTTGACAGAAGCAGTTGAGCGTGATGTCGAGCCGACCC -CAAAGAAGAAGGGGTTGTTTCCGTCTATTGAAAAGGACCTGTGGTCGCTGGTTATTCGCC -TCTTCATCCTATTTGGAGTGGACTCGTTTGCGTCGGGGTTGGCTTCACTGTAAGTGCGAT -CCAAACTCTAGTCGGCTGCAACGCTAATTTGTCAGGTCCTGGATGACCTACTTCTTCAAG -GGTAAATTCAACCTGCCTGAAGGTGAACTCGGTACTATCTTTTTCACCACAAACATCATC -GCCGCAGCATCCATGCTGGTTGCCTCGTCCCTCGCCAAGCGAATTGGTAATGTTAAGGTA -AGATAATCTTACTCGGGGCAGTTGAGCGCCTGTTAACCTGTCGATAGACTATGGTGTTCA -CACATTTACCGTCGGCGATCTGCCTGGCTCTTATCTCTGTCCCATCTAGCCTGCCTTTAG -CACTGACATTTTTGGTTCTGCGAGCCTGTTCACAAAACATGGATGTGGCGCCTCGCTCCG -CATTCCTTGCTGCGGCTCTACCGGCAGACAAGCGCACTGCTATCATGGGCGCGGTGAATG -TTGTCAAGACTACTACGCAGAGCATGGGCCCCTTGTTAACCGGTATCTTGTCGCGCAATG -GCCACTTTGGTGTATCCTTCATCATTGCAGGATGCTTGAAAGTGATTTATGACCTTGGCA -TGCTGTTCAGCTTTGCTGGGAAGGAAGCTGCCCGTCGGAAGCAGGCTGCCCAAGATGCCG -ATGAGGAGACAAGTTAGAGTATTTGGAAAGCATCGGAGTAGGTTGCTGATTCATGTAGGT -CGTGAGTCGTTTTACTGCTTGGGAGTCACACCTAAACTCTCTGAGCCTACAATTGTGTTA -GTGTAAATGGTTGTATCGAGGCACAAGAAAAAGTAAACTTTTGGTTGATATAGTATGCAC -AAATGATTCCAATTGATCAAACATAAACCAAACGCACAGAAAATATACAAGTCGAAAATA -AATCTAAACAAACCGAGCAGCTCGGTTTCGCAGAACAGACAAACGCCGACGTGACATCAC -AGTGTGATTGCCGAGGGCAGTGCCATTTTGATGCAGCGTAACATTCGAATGGATGGGATG -ATGCTTGCCAGTGACAATGTGTGAAGTTTGATTCTTCAACAACTCCATAACCTGATGAAC -GATCCGGTCGCTTTCCGCATGACCGGCATCATTCACACGGCTTTGCTGCACCGGCTCAAG -CTGGTTATGTGTAATCTGAGATCCACGCCACGACTCCAAGTTTGACCCAGGCGTCTGGAT -ATTATTGATATCCTCAATTGTAGTATGCTTCTCAGCACGCATAGGCGCATCGGGCTGGGC -AGTAGGCATGGACTGCTTAGTCTCTGTTTGCTCGCTTTGTTTTGTTCCCTGCGTAGATGC -GTTCTCGTCGTAGAGAGGACTATCGCTGTCGCAAACAAGTTTCACCCAGACCTGGGTGTA -GGCAAGTTGGCGTGGCTCGCACTGGCTGACTTCGACGCCATCTCGAATCTCTTTGGTGAT -AAGGGGCGGGTCGATACATCCTGGGACCTCATGTACGGTAATGTTGCAGCCGCTGCCGCC -TTCTAGGAAGGGGACATCATGGTTATGGGAGAGGAGCTCTGCATCGACGGAGATGGCGCT -GACAAGGTTGCGCTCGTAGGAGGGGACTGCGAAGCTTGTGCAGCTGTCCACGGTGATATC -CTGGGATGGAGCGGATTCTTGATGATTTTCGTATGTGTCGAGGTTGCTAAATGGCTCGCC -AGGGCAGCTTGGATAGAAGTGTGCCCAGACACGAGCAATGCTTTGGGCGTGGACCGTGCG -TGATAGGAGGCATAGGGAAATAAGTGATGTCACGAGGGAATGCATGTTCTCTGAGATTTT -GATAATTGGATTGAATTGAATTTGAGTGCAAAGTATAGACAGCAAGAGCCTCAATACCTA -TTATATCCTATTGTATACATAATATGCCATCTGGCCGTTCTTATGATTCCCGCACCGCCG -TAATGGGATGGCAGGGAAGAATGGCAAGAATGACTATGATGCTGGACTTGAAACGAGTTT -AAAATTCAAAGGACCTCGGCGTTATGCTCAGGGGCTGGCAAGTGTTATTAAATCTTATCT -TCCTTCAATCTGTTGGTTGATTGCACATTCCAGTCACAAGGGGTCTGTATCATGTCGATC -TAGGGTCGTGCGCGCTATGTGGTTACTCCCTATACATTCCGATGGCTGTGCGCCTCTCAA -AAGACTTGGAAATGCTTGGTTACCAGGCAATCCTGCCCATACTATTCGTGTGTACTACCT -CGAACGCGCTAAAAAAATGATTGACGACATTAATGTATGAGGTTGGCCAGTGATCATTCC -ACGAGCATTCGATTGGCTCTGACAACTTCGCTTGAAACCCTGCCCAGGTGTTGATGCTAG -TACTGGATTAATTTGCTTGCGATTCAAATCGCGACTACAGTCAAGGTCTCGTGCTCATGA -AAGATGAAGCGGTAATCTATAGACATTATACACACAGTAAAGTAACATATGCTGCAAGGT -CCACAAAGACACCAGTATTTGAGTGTAATCACAGACCCAGCTTAAAGCCACCCCAGCTAA -GGGTAGAGAAGACACCAGCAGGATCATAAGAAGACCTAATCTTTCTCATACGAGCGACGT -TCTCAACGGGAAAGCCCGCATAGGGATCTTGCCACTCACCAGCATCACCCATGTAGATGA -ACTCGGAAGCAAGCCCCAGCTCTTTGTTGATACCATGGAGATGCTCGACCAGTTGGCGAG -ACCAAGCCTCGATGCGAATATCGTCCTGAGCTAGGTCCCAACCGGTACTGAATTGCCAGA -CTATACAATAAGGATTAGTCTAGGAAACAAAGCCGAAGCAACTTTGGAAGGACTGACTAA -TGAGAGGCTCTTCTTCAAGACCCCAAACGTTTCCAACGCCATTGGTCTTGGCCACGCGGA -TTGAATTCGGCGAAATGACGTTCGTAACAAAAGTTGGGTAAAGCCCCTGGACATCTGCAA -TATTGTCGACTGCAGCTTTCCAGGCCCTGTAAATCGAATAAAGAACCTTTGCATCGGGTT -TCATCGTCTTATGAGAGAATTGTACACTAGATAATTAGTGAACTGAAAGCTCCAATACAC -TCGAGATTACATACCGCATCATCTGCTTAGGGGAGTCGAGGGCCTCGGCCCACGGCTTCA -TAGTAGTGACATTATGAATTCTTTCAGTTGCTGGAATCTTGGTAAAGTTCGCGAATTGAG -ATGGGGGCTTGCTGATACCCTCCTGGACCCCGAGGACTGAGCCCTGTGCCACCTTGGTGG -TCGCATTGTAGGTAATAGTAGAGATCATGCCCGCAGCAATGGGGTGTTCATCGTCCAGCT -TAGCTGCTTCGCATACAGCAGCAAGATATCGGGGAATGCTTGTCTCATTGAAGACCTGGA -TAGTGGTGCTGACCTCTGGAACAGCATAAGTCCTCAGTTCAAATCTGGTGACGATTCCAA -AGTTGTTCGCGCCGCCCTTCAAGGCCCAGAAGAGATCGCCGTTTGAGGTCTTGTTGACGG -TAATCTGGGTGCCGTTTCCAAGAACGACATCATATGCGACCACATTGTCCATGGCGTATC -CGTACTTGTTGTTGAAATAGTGGAAACCGCCGATAAGACTGAGTCCGGCAACACCAATTG -TCTTCATACGGCCACCGATGCAGACACGACCATGAGGCTCAAGTGCGGAGTATACCTGAT -ACCAGTTGAGGCCAGGGCCAACTTCCACGGTGTTGCTTTTCACATTGATAGAGTTCATAT -TTGACAGAGCAATCAGGACTCCTGCATCAATGTTGTTCGATCCAGGATACTGTTTTAGAG -TTAGATAGCTATAACTTCATTATCTTTGATACCAGCTTGGAAGAAATCTTACGTTCATGT -GACCTCCACCACGAACAGCAAACTGCGCATTGCAGGCCCCGATGATCTTGACTGCTTGCG -ACACCTCAGCAGCAGTATTGGGCACGAAAACACACGCAGGAGAAAGATCCGCCCGAATAT -CCCAGTAGTTATCAACAGTCTGAGTTGTGTAGTTTGCCAAACCGGGACCGATCACGCTCT -TTGGCATAGACCGGAAAAGCTTCGCGCAAGCACATGAAGCTCCCTTAGGAACCCGAAGAG -TCCCGGAAGAAGTGGAGAGGAAACCCGTCACAGTTGCTGAGGCGCTTGCACTGCTCAAAT -TAGCAATGCCAAAAAGGCACAGTCCGATTAAGTGAAGGAGAAACATCATTTTGGTTGCAG -CAGAATCAGATGTCTGGTTAATGCTTCTCAAATAAGCCTAGGGGACTCTCCCGTCTTATA -AGCCTGCGATTCTTCGGTTTCTGTCTTCATGCAATCTGTGCCAATGAGAGTTCCAGTCTG -TTCAACCTTTGAACCAACTATTTTGGGTGTTTTCCATGACGATCACGGCAGGGGCTCGTT -TTCGGATCAACAATCTGGATACTCCCAATGTGCTCCACAGGAGGTGAAGAGGGCTGAGTC -TTGTTGAGGCAGAACAATTTCTTGTTGAGGCAGAACAATTATTGACATGGCAAACCATGG -CTTTAGGCATCTGGATAATTCTAGGTTTATCATCTTCAAATAAGACCTGTAGGTACATCG -ATGAAGGAGATCGGGCAGGCTGGATAATCCGAACCCGAATACGAAATATAAGGTTACGAG -TCCGAGACACGCAGACCAATCAGAAACTGACGTTCTATTCTCTCCGGACTTCGGGTTCGG -ATCTTTGATCTTCAAGCATCCATGTTTTCACTATGTAAAACAGCGAGTTCACTAATTTGC -CTTCAACACGGACAAAATCCGAGTTTCATCGGCAGCCTAAATTATAAGGTTATTTGCCAA -GGAACGTCTAGAATCCCCACCCAAAATAAGCTTCTAGGGACAAGGCTGTCTAAATCTTCT -CGGATTCCAACCCGAGTACGTGAGTTCAAAATAGATTTTCAGACCTCAGCAGCGTCTATT -GCAAAAAAATCGTAGGAATTCAATTGACCTACAGATATAAAAGCATCTCTCAAGCTCTTT -ATGGTCTTTGGATTCACAGTCAATAGTTTCCAGTCTCGATACTATTCTTACCATTCAATA -TCTTACTCAAACTAGACCACCGAAGCCATGGCCAGCGATTCCACATACACTCTCTACAAC -TACGACCCATCGGAAGTTGCGGCGATCATATTCGTCGTTCTCTTTGGATTAACCACCTTG -GTGCACATTTTCCAAATGATACGGACTCGATCATGGTTCTTTATTCCTTTCACAATAGGA -GGATGCTGTAAGCAAAAATTCCATCTAGAAACTAATGCCTAAAGCTAATTGACTTGAAGT -CGAAGCTGTCGGCTATGTCGGTCGATATCTTAACTCGAACGAGACCCCCAATTGGACAAC -AGGCCCATTTATCATGCAATCACTCTTGCTCCTTGTCGCACCAGCATTCTTCGCTGCATC -CATTTACATGATCCTGGGCCGCTCAATTGCATCGACCGGCCATGACAACCTTTCTGTCAT -CCCAGTCAAGTGGCTCACGAAAATATTTGTCTGCGGAGATGTGGTCTCATTTTTAGCCCA -ATGCGCTGGCGGTGGTTTTCTTTCCTCTGCGAAGACTCAGTCAAAGATTACCCTCGGACA -AAACATTATCATCGCCGGCCTTTTCATCCAAATTGCCTTCTTTGGTTTCTTCGTTGTGAC -TGCTGGCATCTTCAATTATCGGCTCTGGAAGTGCCGTGATTGTATTTCGATGTCTTCTAT -CAGGGTGCCGTGGCAAAAATGTTTCTTCGTCCTATATACGGCTAGTCTGTTCATTATGAT -CCGGTCTATCTTCCGTGCCATTGAGTATATTACTGGTACGAATGGCCCGCTTATGTCAAC -AGAGGTGTATCTGTATATCTTTGATGCAGCTCTCATGTTCTTGACTATGCTCACTTTCAA -CATCTTCTCCCCGAAAACCTTGGTGACTCCACAGTCGGCTACACATGATGTCGAGTCTCA -GGAGAGCAGCAAAGAAATGCTTGACATCAGAAGAGATGCCTGAGAATTGAATCTGAGGCT -AAAATGGAAAGAAAGTACCATGGATTATTTTTAGCTTAAACGCTTCTTTTGTGGGTCGAA -GGGACGCTAGTATGGCGCTGGACATGGATTTGTGGTTTGGGAGTACGAAAACTCTTTCTC -CGAATATTTGGAATAGTTATATTTTAGAACTATGATACACCTTCGCCCCctctctctctc -tttatctctctATTTAACACATCTACTTCGACCTGTACAGGTTCAGTCTCGCCTAAGGGG -CTTACCATGGGGCCTCGACGGTCGCACAAGAAGTCCAGGAACGGCTGCCAATGGTGTAAA -GCCCGCAAGGTAAAAGTGAGTGACTCAAAAGAATATAGTATATCTCCTTTTTTATTTAAC -ACATTAAAAAAGTGCGATGAGACCAGGCCACACTGTAACAACTGCTTCAAACACGGCGTA -CAATGTCTCTTCAGTGAGAGCGCCCCCGCAGGCCCCGCCAGGAAATCAGGGTCTCCACCT -CTCGGCACTTTTGACGCCCCAAGTCCCGTATCGGGGCAGAATACCCCGTCAATGGGTATG -GCCGAGATGGCTCTACTTCATCACTTCTCTACATCAACCTGCTATACGATTGCACGAAAC -CCTATTTTGCAAACAGTATGGCAAATCAGGATCCCCCAAGTCAGCTTCTCGTCGCCATTC -GTATTCCGCGCCATTATCGCTCTGTCTGCCCTACATCTGGCTCATGTAAAGCCCGAATTT -CACGACCATTATGTTTCTCAAGCCGAGCTTCATCATAACGCTGCCCTGCAGATGGTGACT -GCCATTCTACCAGATGTGAATAAGGAGAACTGCCAAAGCATATACCTCTTTTCGATTCTG -ACCTGTATCATTTCCTGTGCCAAGCCGCGCATCAGAGATGATTTCTGGGCAAAAAGTGAT -CGAGATATCGAGTGGTTGACTCTATTTCGTGGTACAACACACATAATCGCGTCGGCAGAC -GGCTCTCTCAAGACGGGCCTACTAGCACCGATGTTTGCTATGGGCCACCGCAGGAAGCTG -GCTCGTGATGCGAGGTCTGCCGCGGCCACGCCGCCGTTTCTTCTCGTCTTAAAACAGCTC -TTGCAGGATACAGTTCAGGATCCTAGTGAATTGCAGTGCTACCACGACTCAATTGAGGAT -ATGGCGATGTCATTCGCCACAATCGACGAAATTGGGTCACATAACTGCGAGACTGCTGAT -ATATTTATTTGGCTGCTGGCAGTTTCGGATCAGTATTTTGGATACTTCCAGCGGCGCACA -CCTGAAGCGATGGTAATTTTTGCATATTTTTGTGTTGTTATGAAAGAGATGGAATGGGCT -TGGTGGATGCAGGGGTTTAGTGTTCATGTTATAAGTGGTATATATTACCATTTGGACGAG -GAGCACCGTTGCTGGTTGCAGTGGCCTATGCAACAGGTAGGATGGGTTCCATGAATCGGT -GAATATCTCATACATAGTCGGCGAACATTGCTTTGGGTTGCTTTGTGAGCACTCCTTTCA -GATGCCAGTAAGATTGACATAATCATAAACGCATAGAACGATTTTCTGAGGTAAACTTAC -CAAGGGACCTTGGAGCGAATCTCTTAGCACTGCTTTCTATAGGCCCATTCTGAGCCAAGT -CTGCAAGGAAGCTCGCGTCTGGCTTTTAATGAAGAGATGAAAATTTGACTTTCTGTATAT -CCTGTCTAGAGATATAGAAATCGCCGATAGGAATGGTGTATCTATAATTTTACTTGTAAT -CCTCATGCATGCTTTGACCCAAGCCATCATTATTTTTTTTCGACAGGATGTTTCTGCAAA -CTCTCGTACCACTGGATCATAGATCCTATCATGAAAGCCACCGAGCCCCAGAATGTAGCG -AGGGAAGATTGAAAATTCGCGCCAGAGTTGTCACTCGCTGGCCCCAGTGCGCCACAAAGA -GTAAAGCCAACACTGCCTATCAGATTCCAGAGCCCAATGTGCCAGCCTAGAATATGCCAG -GCAGGAATATACCACCTTGACTGCGTTTCAAGCATATACAGGTATGAGCTCACCGTGAAC -CCCAGCCCACCAAGAGTAGTGGTACCCCAGTAGAGGCCATCAAGGACTCCCTTGCTCAAG -TGGTTAAAAATACCAGGCACACCAACAATTGCACCAATCCAGAAGATCGTTGCGCTCACG -AAGAGGATGAAAGAAGCTAGAAACCCAATTTCATGAAAAAAATGCGTGCGCAATTCAGAC -ATGGAGGGGATCCACCGGAATGTCCTGCCATCTGTGGACGAAGTAACGTAGCCTGTGCCA -TCCCGATGAAGATGAGCACTGTCACTAGCATGATGATGGACACTGCCGCCACCATGATGA -AAGTGTTTTTCTCGGAGAAATGATTTGTGATTGGCCTGATGATGCTCACACCTCGATAAC -TTCGGGGTGAGCTCCTTGATTGCGTGGTGAGGAACCCCCTCCTCAACAGATTTCTCGACG -ACAGTCTGAAGCGCCCAACCAAAACATCCAGTATGGTTGGTATTGAAAGCTTCCAACATC -AGCAAGACACCTCCGATTTCAAAGATGGTCGCGCCGACGAACCCTGTCCAACCACCAGCA -ACGTCAATCTCACCGTGGAATTCGGTCTTCCTGTCTGCGAGAGGTAACCACACAAAGAAG -CCGTTGATGATGAAGACCGCGGACCCGATAGTGAAACTCATGGCGACTAGATACGACACG -TCCCAATAAGGGACGTAGGTCGCCATCTTGAGTATGATCTTGGCTACCGCTGAGAATGAC -CGCGTAAGTGGGGGTACTTTGATTCCGGTTTCATGAGGCAGGGCCTCCGTATCGACGCGG -ATGGCATGTCGGCCTTTTCTGTTGTCGCGAGATCGCCAGAGGCGCTCGATTTGTGGAGGA -GGACGAGTCTCTTCTGAAGGCTGGATTGGTTTGTAATGTGCTGCGGTGGGGTTGAGGAAA -GATATTGGGCCTGTAACACGGGTCTCGTCTAGTCGAAAGCCAGGATAACCTCGAGGTCTA -CGACCGGTCTGGAAATGTGAACGCCGCGTCATATTGGCCTGGGCCGAGGTAAAGTTGAGT -CTTTGAGCCCCTAAAAACACATTTCGGGGATGATTCGACGCGTATGAGTTTTGATTTTTT -CGACGATCCCCCTTCCGAAGAACCGCGGAAAGTGCGATGTTCAATCCGTGTGTGTTCCGT -ACATGAGCACGTCGACGTCAGCGCTATGTAGATATGCGGTACATTTAAAGCGGCGTTTCT -CGACCCCAGAAATAGCACCCACACCTGGGTAGATCTGTTATCTCACACAATCATGTTGAA -GGGGGGGAATTGACTGAGTACTATGTACGTGTCTAAATGTGGTAGTAGCAATGTAAGTGT -CATTGAGTGTATCCATGCTACAGTCGTTCTTGAAATCGCCTTGACCAATACACACTCACA -CTCTCTCTTCTGGCCCCACCGCCAAGGCCTTGGCGCAAATCAGTTATTGAGCGGGCCGCT -TTAAATTGAAGGGAGCAGTAAAATCGACTTGGTAATTTACAGTACCAAACCACGTAATAT -TACATCCTATATACGAAAACCTCATGGACTTCCATAGAGCCCGAAAGAATGGCACCTCGT -GCCTTGTTTTCAGATCTCCTATATGAATATGCTCTCCATTTTAACCGCACAATAGGTACA -CTTTTGCTAGTCACACAATTGCGATCACTATTCAGCAACACCAGAGCTTCGCTAACCTCT -TCTAGACTATCCTTTCAATAAAGACTAGAAAGCACAGAAATAAATTACTGAGCATGCTTT -GCGCATAATCAGACATAGTTAGACGTGAAATTGTTTTTCGCCTTACCACCCACGGCTATT -GTAGGAATAAACCGGAGGAAAGTATACAATCTCTATGGGGAGGTTACAGTGATTCTCGAA -ATTGCCTGACCAGTACTGTACACACTGAGATTCTTTCTCGGCCACACTACCAAGGTTTTA -GCGCAGATTATTTGTTTGACTGGCCCGCCTTAATTTTAGAGTTATCGTAAAATACGCTTT -GTATGATCCATACATAGCGACGTAATAACAGTGTGGGTGAGAACACCGTGGAATTCCATG -AAACCCCAGAGTGGAGAGTGGACCTCTGTGATCTGTCAGATAACTAGGTCTCTATGAACT -ACTCATATGTAAGATAGCCGCTTGATATATTTATCAGGCAATAAATAAGAAGGAACAAGA -AATATATATCTATATAACAAACTTGATAAAGGAAATCCGCCCTGATGTGTGTCCTAGGGC -AAGTGTATTACCATTGACCTTGAAATATGTGGGTCGAGATTCAGTAGGGAGCTAAAGAAC -CTTTTCGCCTGACAATCTTATCCAATGGTCATTCCCGATCGAGATTCCTAGATTCGCATA -AGGTGGATAGAACGAAGAAACGTCACACTCAGATTCAATGTCAACCCAGCCGAAATTAGT -ACGGAGACGTGAACCTCCGCAAGGAAATTCTAGCGTAGTGACTACTTCTTCGACATTCCA -AGTTCGTGCCAGGGCGCCTGTCGCCACATCCCAGAGCCGCATTGTGTTGTCTTCAGATCC -AGATGCCATTAGCTGGCTGTCCGGCGAAAAGGCTACCGAGGCAACTGATTCCAAGTGGCC -TTCAAGAGTCCGGGTAAGTGTGCCCGTCGCCACGTCCCAGACACGCACTGTGTTGTCAGT -CTGAGTGCACACGTCTGTCGCCATGTCATATAGCTGAACACCAGAGCTTGATGCCAACAG -CCGGCCGTCGGTTGAGAAAGCAACTAACGAAATCGCACTTAAGTGACCTTTAAACTGTTG -AGAAAGTGTACCCGTTATAAGGTCCCAAAGCTGCACTTTACCATCACTCGTGCCGGCCGC -AAATAACCGTTCATCGGGGAGAAGGCACCCGACAAAGCGTAACTCAGGGCGCCTTCAAGT -CTATTGTTAAGTGCACCTGTCATCGGGTCCCAGATACCTATTATATTATCACCAGAGCTT -GACGCAAGCAGCTGACCACTAGGTGAGAAGACCACCCACTCAATTAACTTCGAATGACCA -TTCATTGGTTCTTTTTGCGCGCCTGCTACCAGGTCCTAGAGCCTTACTGTATTATCACCA -GAGCCGGACGCTAATAGCCGGCCGTCAGGGGAGAAGGCTACTGAATGAACGCAATATGAA -TGACCGGTGAGCGTCTGGGTGAGTGCGCCTATCGCCGGTTCCCAGACTTGAACTGTGTTA -TCGTGGGCGCCTGACGCCAATAGCCGACCGTCGCGCGAAAAGACCACGGATTGAACCCGA -CTCGAATGGCCTTCCAGAATCTATAGTTCTGGGCCCTAACTTTCCTCGACTCGCGGTAAC -TGGCATAGCCAAGTTGGAATCTCTTGTTTGAACTGTACACGGATACGTGCTGTCTGGGGT -GTAAATACAAGTCCTACACAATAAACTTGAAGGGGCGCTTGATCAACAATCCGGTAGTTT -TTCAAAATAAATCGTTTTGCATCGTAAAGAAGGTCCGAACTTAAGGAAGACCCATTGCCC -TAAGGCTGTTAGTGCGAGTCACGTCGCCTCTCTGTGCAACTTACCGCTATGGCCACTTGG -AGCCGATCAAGAATGCCTAGTGTTTCCGACATAAGACCGATTAGGCTCATTGCTTCCACC -CAATGGAGGAAATGCCTCTGGAGAAACAACAGAGCACCATATATCATACTATCTGAGTTC -GCGCATCTCACTAGACGGTCTGCCCAGTATCGACAGGCATACTGCAACTCCGGAGGAATG -TAAGTCTCGACCGTATGGCGAGCAATCTCCGTACGTTCGATTCCGTCGCTTGGTAGGCCA -CATATATTTTTCCGCAGATTTTGACACATCAGGAGGCATTGTTTAGCCAACATACAGTGC -ATGTCCGTTTCGTTTATCCCAAAAGGAGTTCTGTTTCGGATCACTGCAATACTACCAGGC -AATATGATATAAAGAGAGAGTTATGCACCGGATGGCATATTTAGTCGCCCGTATTTGGAT -TCGAATTTGGCATCTTTCTCGTGACGGGCCGCGGCACGACTTTTGCCTTGCTAACGGGGA -GCTTGCGGCAGTGGGAGCTCGAGAAACGGGCCCGCCCGCGGTTGAACCAAGATAGTTGGT -TAAATTTAAACTAAGGTTGTATGCCTACTACGTGGTAACGTGGTAACGGCGATCGGCACG -CTATGGTTCTTATCGGCCGTGATAGCACCTGCGATTTGGCGAAGATGGCGATAGCGAAAT -CAGAATCGCTCAACGTCATGGTTAAAGAGGAAAGGCTGGCTTAAGCTTGGGTTGTGCTAC -GAGGTGCTGCGGCACTAAGTGCTACAACACTATAGGTCTTTTCGGGGTTGCGGGTCTGCG -GGGTTTAGCGCGGTCTAGAGATAGTAATAGTGGTAGTTCATTGGTACGTAGATAGCAGTA -CTGGGGGTCACCCATAAATCGCATTAGTTTATAAGTTGATGTTTTGACTCTAGTAGACGG -ACTAAATCAATATCGCTTTTCACAGTAACCACATACGTACACATACGGCTTCGAGCCTCT -GGAGCTAAGTCCATATACGCTTCCACGTATTTTAATCTTCCCCTGTACATACATGCGCTT -CTGAAATTGAATCCATCTTCCCGTAATGGCGCTCGTTCCACTACCTACTGAGATCCTGTG -GACTATCGCATGGTTCGCATCGTGCTTTTGCTATAAACGGGATATTTACGCCTTCGTCCG -GAGTAATTGTCATCTATACGAAACACTTATCAAGTTTCTGTATTACGGTCGCAGCCAGTA -TAGGCAGGGAGCCGCGCTCTCTTTTCTCGTTGGGCGGAGCCTTCTTTTACAAAGTTCAGA -TTGAACGAGCTCGAGCAGTGTATCAACAGCTAGTTCAGGACGTTCAAATCATGCCACCAT -TTTGGTGACATGAGTTTCAGAATTATAGGGGGNNNGGACATCAATGCTAGATTATTAATA -CTCATTCTGTCGAGGAAGGACTAAAGATCAGTGCCACGAGAAGGAGTTTCTCGACTATGG -TATTGAAGGCTCGGTTCTAAGATCGGGGTTGCAAGAAACTTTGGAACCCTTTAATATTCA -CAAACACATGTCTAGACACAAACACATGTCTAGACGGACGCACCGCGGATCCTACAAGCG -TTCAGCCTCAGTAAGTTTCTATCGTTTTTACAGTGATGGCCCAAGATGTACCCTTCTACT -CGAAACCACGATTCCAAGCCCTCAAATGATCAGCGGCTGTCAAGAATTGCAAATTTTAAA -AGAGGAAGATATTCCCCACCCCCGCAGCCGACCGGAGAGCTGACTGGGGCAAAATGAGTG -ATGTACTCCATAGACCTGGATGATATCAACTTCAGATCGCAAGACGTGTCATCTTCGGAT -CCTGGATCTTTACTACTTCCCGTAATCGAGGGGCAAAGCACTTTCAAATTGTACCCCGTC -CCTTGGCATATGCGCACTTTGTTGAGAACGTCAATGCTGCTACCAGAGGTCGGCAAGTGT -AACTGCCGGAATATGGTGGAGCATCCAGCTCCAGCATCTAAAAGTCTGTTTTTCCTGTAT -CATATATTTTGGGACTTTTAGACATTCCAATTGTACACATACACCGTTGAATCATTCTTT -CAATCTTGATTTCTGGTTACATGTAGTATCAATTCACCTTGTAATCCTAACACACACACT -GCCAATATACGGCGGCTTCATAGTAGTATAAACCGCGGCTTGCTACTCCGCATTTTCGCT -AGAAGGGATCGACAGGAGAAAGGAGTATCAACAATACTACTCCGTATAATTACCGCTAAT -ACTCTCCCAAGTTGACGGAGTCTGGGGATAGATCTGGCCAATGGAGGGGGGAGTAGTCTA -GATCAATGGGCTTTCACCACTCGGAATTTACCCTATTTGGATCATCTGGGAACTTTAAGT -TTCCCTCATTTGGGTATAAATAAATGCCCCATGGTGAGCTCGGCTTTTCTATCTCAGGGC -TAAGATTCTACTTCACTTCTCCAAATCACTTCTCCCATACGATAAGCGCTTGTCTGGCAC -CTTCACTATGCCTAGTGCCTCCCCAGTCGCGTCCTTAGCGGATGAAAAGGGCGCGGTGAA -TCATGAAGAAAATGTGATGGATATCGCTGCTGGCAGAGGCATTCCCGCCACAGATGATCA -TGGCCAGGCCCTTGTGGAATTCGACAAGGCTGCTGAGTCGAGGTTGCGTCTCAAGATTGA -CCTATATATCATTCCCACGGTGGCATTAATGTATCTGTTTTGCTTTATCGATCGAGCAAA -CATAGGTGCGTAGATTGCCTATCACTATTACCTTGCGACCCTGACTAATCATTATTGTTG -AAATAGGAAATGCCAAGCTAGCTGGCTTCGAGGATGACCTTGGACTTGAAGGCTTTGATT -ATAATATTGTACTGTCGGTCTTCTTCGTCTCATATATCGTCTTTGAAATCCCAAGCAATC -TTATGTGCAAGTGGATCGGCCCTGGGTGGTGGCTGCCAGGCATCGAAGTCGCGTTTGGTA -TTTGCTCCGTCGCGACAGCATTTGTCAATAATATCCATGAGGCTTCTGGTGTTCGGTTTC -TTTTGGGGTATGTAGATAATGGGCTCATGATTTCCACATGGACACCTCGCGGACACCGCT -GACTCTTTACTAGTCTGTTCGAGGCAGGATTGATGCCTGGAATCGCATATTATCTTTCGC -GGTGGTATCGACGGAGTGAACTTGCCTTCCGTCTATCACTATATATCGTTATGGCTCCCC -TAGCCGGCGCATTTGGCGGCCTTCTAGCTTCTGGCATTCTGAAGCTAGACCGCTTCGGCT -CTCTCTCGAGCTGGCGAATGATCTTTGCGATCGAAGGCCTTATTACTATCTGCATTGGGG -TTATTTCATTCTTCACCCTGACCGATAGGCCAGAGACGGCGATATGGCTCTCACAAGAAG -AGAAGGATCTAGCCGTTGCCCGTGTTAAGTCTGAGCGCGTGGCGACAACCGAGCTTCTCG -ATAAATTCGATAAGACAAAAATGATGCGTGGCATTTTCAATCCAACCACGGTTATCATGA -CATTCGTCTATATGTTGAATAATATCACAGTTCAAGGACTCGGAATGTTTGCACCTACTA -TCATCAAAACTATCTACCCCGATGACGGGGTGATATCCCAGCAACTACATACGGTTCCCC -CATATATTGTTGGGGCATTCTTTACTTTATTGTTCCCGTTCCTTAGCTGGCGCTTCGACA -ACCGACTGTTCTTCTGTGTTATATCTCCACCGCTAATGATCATTGGTAAATATACCCCCC -ATTATATTCTGATCAAAGGCTGACTACAGTGGGTAGGCTATGTTATGTTCCTTGCTTCCA -AAGATCCAATGGTACGATATGGAGGGACCTTCGTCATAGCCAGTGGGTCATTCTCGTTAG -GGGCCTTCTGCGCGGCGCAAGTGTCTGCCAATGTCGTATCAGATACCGCACGGTCATCGG -CAATTGGCGCAATGGTCATGTTTGGCAATATTGGTGGGCTTATTAGCACCTGGTCCTTCC -TACCCTTCGATGCCCCTGACTACCACATCGGAAATGGGCTTAATTTGGCAACTTCTACTA -TTTCCCTCATTCTGGCCGCTAGCCTGTGGGCCTATATGGCCTGGGACAACAAGCGGCGTG -ACCGAGTGGATATTGCTGCTGCCTTGAGAGGACTTTCATTGCTGCAGATTCAGGACCTCG -ACTGGCGTCATCCTGGCTTCCGTTGGCGACCTTAGGTGCAAGTTTTGAGTTGTATTCTTC -GTCTTAGATGAGAGAAGTCTGCGAATCTACCCTACTTTTGATCGTGTAGCCGATAGTAAG -AGAAAAAAGGCTTCAGATAGAACTACAACAATCTACCTGGAACCTGAATAATCTGTATAG -AGGTAGAAGGGGAGTAGGTTTTGAGTAGAGTTTCTGCTCTGATTAATCCAAGCGTGGTGT -AACTATACTTGGAATATTCCAAATAGGGTTATTTTTGAGGAAAAGATGCCGGCCAACCAT -GTTAAATGATCCAGTTGCGCTTCGTCTCCAAAACTCCGATTTCAACATGCGATATTCTGT -CAACCACGCAGGGAAAAAAATTCGGATAGTGTAGAGGTGAGGGACTAGGGCCGCCGCCTA -TGAAGTTCTCATagtcggagtcggggtccgagtgtagtggaAAATCTGGTACCAAGACTA -GCTCCCACGATAGTGATGACGACGATGACACAATGGACTGTGCCTGGGAGGAAACCGAGT -GGTTCGAGACTTATGTGTTGATAGATGAGCTGTTGGTGGCATGTAGGGAGTTTCGTGGAT -GCGTGCATGGGCATGTGGGGTCACTGGTCGGGGTGAGATAGCGTGAGCCTGAACCGAGGA -CTTGGCTTTTCAGACTCTACCTAGGGTGCGATAGTCTCCACACTTCTAGACGCTAATGAA -ACCCGTACCTTGCGGTAGGATAGAGCACTAGAGATACGGCTGACAGCGTCTGCGGCTGTC -ACTTAGGCAGCGGGTTTATTGCCAATGTTGACCTGCCAGGACGGGCTGAGGAGTTGGAAG -CCTATGAGTGCGGCGGTGGACTTGGTCAAAGTGAAGCAGTCCAAACTCACCTTCGCCATC -GTTTCTCTGAGGGTATCATAGACGTCTTTAGCCAGCCGTTCGACAAGAAGAAGACGTAGG -TCGTTGCTGTCAGGTGTCAGACCCCAGATGAAGACAGGCTTCCGGGTCTTTGACTCCGGA -TCGGTGATGGTAGCTCCTTTCTCATGGCGGCCCCAGCGAATCTCGGCCGCGGGCTCGTTC -CGGCGAGGGTCTTGATCGCGGCCCATGGTGTACTGGCGTCGATATTTAATTTGGATCTCC -GAGGTCGTGACATGACCTACAGAAAAGTATTGCTGGTGGCCTGGGAGCTCAAAACGCAGC -CCCTGCAGACTGTTGTTCTTGAAAGGCTGATCGCTGAAGAAACCGAGCATGGTATCCTTG -TCAATGATGTCGTGACGCAGCAGCATGGGTCCGGAATCGGCTTGCTGGTCAGAAGGATAG -GCGGAAACTTTCAGGCCAGGGAACTGGGCGACGACGGCGGAGCGCAGGCAGAATCCGAAG -CGCGGAATGGCTGGTCGGTGCTTGCCACAAAGCGGCTCGTTGAAGAAGCGGTTGAAAGCT -TTTTTGATGGTTCGACGTGCAGGGTCATCGGCCTGGGTTTGTTTGTTGTTATTGTTATTG -TCTCCCAGATTCTCCTGGTCTGCCTTGCCGTGATTTCCCAGGCTGAGGGCACCGTCGACG -AGAGCGTCAATCCAGTTGTGGTCGATGAAGAAGAAGCGGAGGCTCTCCCCTGGCAAAGAG -CTCTGGTCTGGAATTAGGTAATGGGCGGGAATGTTGACTAGGTAGTACAAGTCCAGAATG -AACCGGAGGACAATCATCCAGTCGGGCGAAGCGGGCTCATTATACTCGTCGTGCGGCGGC -GGAGCGGCGAGGGTCCAGCCCTCCGGTTTCGGATTGAGGCTCGAGGCAACCTCGCATGGG -GCTGGATCCATAGCCGGGTGGAGAAAAGGTGCCACGGCATCATAGGAGAGGTCCAGATGG -GGTTGGAATTTCTCCGGCCGTTTCCACCGGCGGCTGTCCGTTGTGCGCGACACCGATACT -GTGCCATCCACCAGGCTCTCGAGGGCCCGAACAAGGTCGAGCCGGGAGTCATGCGAGCGA -GCGGTGGCCCGGAGCATTTGCTGACGCACCTCAGCGGTGGCCGTCTTGAGAATCTGATGG -CGCACCCGAGTGAGCGAGATGGTGTACGCCCGGTCTGCCAGTGCGAGGTTCCGGCCGAGA -GACCAGGCGGCGGCAAAGGTCAGATCCATCATACCCAGTGCGGGATCGAGAATCTGCAGA -TCTCTGCCGGTGTTGGAGAGCAGATTCCAGGACAGGGCCGGCACTACGTTGGGGGTGAGC -GGTCCGCGCATCAGGGCCGACGTGATCTCGCCTGTTGCGATTCGCGACCGGACTATAGTA -TAGCCATCCTGAAGCCGGTGCAACATGCGCCGGCGCTGCTCTACCGAATCTGGGTGGAAT -GTGTCTGGAGGTAGATGGGGCTTGAGCATGCCGCTATTGTCGCCTAGGACTCGGAAGGCA -TCCTCCATGCTGAGCACCCCTGGTGGTAGACACGTGAATGACCAGCTATATAAGGATGCC -AGAGCGACACGTCGAGGAAGCGGAGACAACTCAGAATTCGTCGTTGTCGCCAAGGGAAAA -TTCTTCAGCGTCTCCAGTCCTTCAAGCGAGACGACGTGGGCAATCATCGGGCTTGCCTGG -TCGCCCAGGAGAGGCCCGACGCGGTTTGCCACGAGCACACTGAAATTTCGAGTGCTCTCC -TCTTCTGGTCGGTCGGCGTGGGCCATTCCCCCGGTGCTCATGCGCATTATATGGGCCAGA -AAGCGGTGGCGGGTTATATCAGGGATATCCGGTTCGTCTGACGGCGAGGGTTTTATCTCC -CCGTCCTCGGTATAGGCCGTAAAGAGCCCGTTGAACAAGGCAGCAGGCGGAAAGATGACG -GAGATATCAGGCTGTGTGGCCTGCCGAATGTCCTTGAATGGGACATGCGCCGTACCATTT -GCTAGCTTGAGGAGGTCCTTCATGGGCACGCGCACGGCCAACGACTCCATCTGAGGTGCC -GGGGTGCGGATGCTTGTGGGCGCAAATATGCCGGCCATTTCAGGCGGTGCGATCGTCAGC -TCTTCGGGGCTGAAGACGAGACACGCCAGCCAGGGTACCCGGTTGCGATCCGCGTCGTCG -AGCGGCGCCCGCTCGGTCTTCCAGCTGGCAATAGCCTCCCAGGGCAGCATCGGATCGTTG -AAGACAATGTGCGGGAGGGAGTCATGGGTAGCCGTGTGGCCCTGCTCGGGATACACCGAG -TACACGGCATCGGCGGGCAACTCAAATCGTGGGCCTAGCACCTCCATGGCCTGCGGGACT -TCCACGTAATATTCCGACGGAGCTGCTTCGGGATCTTGTTGAGCTCGTGGCTCCGTGGAG -ATCCTATGCTTCACACGGATGGCGTGTGTGCCACGGACGAGACCCGGCCGGTGGAAGAAC -AAGGGAAACTTCTCACCCTCCGCAAGCAGCTGATCCAGCTTCTCCTTTTCGCTGGAGTTG -CGCGCCTCGACCTGGTTTTGTCGATGGCGTAGAGTCAGGTATCGCCTGCTAGTGAGCGGC -TTTGCAAACAGCCGTCCTGGAGGGGCCACCCGAGGTTTTCAACATAATACGAGGGTCAAC -ACGAGGGTACGTCGAAAAGGAATCCTGGGGACGGGGATCAACAGAAGTCAACATAAATCC -CCCCGTCCCCTTCTTCCGCACTAAATGTACCTCCAGGGATCCGATGATTCGGGGTTTGCC -TCAGCGGAGAGGAACAATGAGGCTCTCGCCTAATTTGGACTGTGGCGTTAAGAGGTGGAG -AGGAGGAATTGTGTGGCGTTGTCTAGACACGATCCATGCACATGCAAGTGGTGACTCGTA -GCGATGGCGCCTGGGTCGGATTAGGTCTCGGTATTTATACTCGTCGTATCTTTATACAAT -AGCAGTCTTTCATGTCTACGGCGGGCTCGGAAAGATCTCTCGGGGATGGATTTGGCCACT -GCCAATTGCtttcattttacttttgatttacttatgatttgcttttgatttgttttcgat -ttCTTCAAGCTCTCGTGAGTGCAAAGAAAGAACACAATGGTGCCATGCAACCTCCTCTTA -CCCATGAAGCTCGATGCCTTCGTCTTCAGCCGCGAGGTATGCAGTGGTGGCGTCGGACAG -TCCAAGATTGCGCCCATCACCCAACCCAACTATACTTTTCTCCAGTTTTAAGATAGCCTG -ATCCAGAACGATATCCTCAACCATGTTGACTTGCACAATTCGTCGCCCGCGACACGAAAC -CCACGCCTGAATGACCTGGGGAAGGCCCGCGAGCGCCGCAACCGTGTCGGCGTCTTCGTG -CATTGGAGTCTGCCTCGCCCCCTCCGTTCAGGAATTATCACCTCCCAGCCGTCACAGAGC -GAGAAGCAGGAGAATTCAGGGCAAGGGATTGACTCCGATCCCACAGCACTGCGGTACCAT -GCCATTCCAAACCGTTGGCTTGTCATTCGGACCTTTAACCTGGCCGCATAGACGACGGTA -ACCCGGAATCCGCCAAACATGAACGAGGTGACGGCATGGGTAATCGAAAGTTATCGGACG -CGGAGCATCGATAAGCTCGAGGAATCGCCGGATCTGGAGGCTGATGTGGCCCTTTTTATT -ACCTCGTTCATCCCGAGCAATCAGTCTGCTACGGACATAAGCGTAGAGGGGGGGCCGAGA -TCTTCATTGGGTCCTGCACCGAAGCCTCGAAATGGGACCCCCGCGAGAGTACCCGGATGG -GCCAAAGTCGTGTCGGACTCAGTGCTGGTAGCAGCTCGAATCAGCTATCTATGGAATACC -AGTACCACCGCGGCAAAATCTTCTCTATGCTCGACAATTTGCGTACGCGGATGTGAATGA -CACGACGAAGTATCTCCAGTCCGTTATTGCGGATTACTACGTGGTCGGCTGGCACAGCGA -CAGAGAAAAGGACTTGACGATGCCTACCCCGGTTAATGGCCGGCCGCAGAATCGGGGCAC -CTGGCTGGATGGCCTGCGCGTGGAGATTCCTCGAGGGCTTCCAGAGAAGGATGGCGCACC -CATTGACGACTGACTCAAGGATATCACCCCAGCCCACACACTCTGCCACGGGGCCATGTA -TGAGGTCGTCTGGACGGGGGATCGGGCACCTAACAAACGGCCCGTGGACGCATTGGCTGC -GACGATGGGGGACATGTCCTTTGCGGTGGGCAAAACCCCGATTGACACGGTGCTAGCATA -CATGGCACGATGCACACAGCAAGATGACCTGGAGGAGGGAATTTGGGCGCTTCGAAATCT -GCTACGTGCGGAAGAGGACGGAGTAGACGCGCAAATTGCCACCGAGGACGAGATCCAGGC -GAACAGCTACGCCCATTTTGACGATGGCAATCACTTCTTCCTACCGGCCCAGGAGGGGGT -CGCCACCCAACTGGAACTAGACCGGCGCGCTATCCAGTTAGAAAGCAGCGCCCAGCACTT -GGTGGATGCCATCCACCGCCGTGTGCAACATCTGCGGTGGGAGATATTTGCCTGGTGGTG -GTGCCAGGTCTCCGAGGGCAAGAAGACCACGCACATACAGAGCGGTTATCACGAGTGAGA -TATTAAACAGGTCAGTGCCAACATCGACGCGCCCTCGAAGGCTGCGACCCGGCTTCAATG -CCAAGTCATCCAATCCTCGCAGGCGTCGGTCAAGACCGGCGTCATAGAAGCTTTCCATCA -GCGCAATAACCCGACACTGGTGATCGGCGGCGTCAAAATCGTGTGACCAGCCGACTTCAA -TGATGTGTTAAAGGGCCGGTTGACCAACCAGATGATGGCCTCAGAAGAGCTCCCGCCCTG -CGGCGCTCTCTCGCCCATTCCCCGAAATCTGTTCCCGGGCGATCTACTGGACGCCGGAAA -TCGCCTCGTGCGAGAGTTTGTCACGCTGCGAGACGTCACCGTGACCCCTGACCAGCAGAT -ACCACCGCCATACCACGACAAGATGACCGGCCGCTCTAGCGCCTGGCGCAATTCCTAGGG -CGAGACGCAGCCCTGGTTCCCACTGTATATGGAGTGGGAGACGAAATAGGCTCACATCCC -GTTCGGGGAATGGAAGCTACAGCAGCGGCCGGTGCGAACAGGGCTGGCACCCAAGACCAC -GTATGTGCTGACGGAGGATGTCCGCTTGGCCGGACCGGAGAAGGGCTATGGAGATCTAGC -TCTACGACGTCGACGCAGTGCCAGACCATCCCACCTGCGTGAACGTCGTCAACCCACGCG -CTTCGATCCTGGGTCATTCGGAGTATATCCAATTCCATTTCTCGATCAATCAGCCGGCGC -GGCTCAGCGCGCATTTTGTAGTGCGCGAGGATCCCTGTGTCGCCAACTCCCCCTGGCGGC -CTGCGACGGAGTTTGATGAGCCCATATGGGGCTGGGTGGTCACTAATTACGTTTACAACG -GTCTACACATTTTCCTCCCCAACGGCACCTTCTACCGCGAGGTGCGCACCGCCGCCAGCG -ACGTGGATGGCCGCCACCGTGGCCTGACTGAGCGCCTATAGCCAATTTCTCAACGACCTC -GTTGGGCGACCCCTGATAATGGCTAACATGGACTGGGCGTTGGAGCTGTCGGCGAAACTG -AGACAGAACGGGTTCACGCTGCCAGCGCAAACATGGCCTCCTTCCGGGCAATGCCCAGGC -ACAGTATTAATTCCCATCAAGTTTGCAGACGCGATCTGCAACCAGGACGGACTGATCGAT -TACTTCAAGATACCTATAACTGACTCGCCCTCACCCCCATCTAATTCCACTTCAGAATCA -GAAAATAGGAACGATCTTGACTTAGAGACGATTTACAGCTACTTTCACAGTGAGAAGAAG -CCCCTGGACGGCCATCCGCTCAGGTCCATCGGCGCATCCACAAACCCGCGCCTGCCAGCC -TTTTGGCTCGACCCGAAAGACTACGCATCCTCTCACCCCGACAGCGACTGGAAGGTAATC -AGTAGCCGCTACGAAAGAGATCACAACAGCCAACTAGATGTCTTTGGCACGCTCTTCAAG -CACTTTGCTCCGATCACGGGCTTCAGCAGCATCCCCTCAGTGCGAGGTCTCAGCCTGTCC -CCATGGGCATGGGAGACCGCACTCAAGTGCATGACAACTCTCTTCCACGCGGGGCCACTC -ATCGTGAGAGACAACGTGCTAGAATTCCAGGACGACTACCGACTCCAAGACAGTACGGGC -GCGTACGAGGTGAACAAAACGGCCATAGGGTTTGGCCTGGGTGTCGTCGCCGTCAAAACA -GCCGACTGGGCCTGGCTGCAGCCGTACAACCTCGACCGCGCCAAGGATTCCCAGGAGTAT -ATGAGTCTGGACGTGGGCGGAGGCGGCGCTGGGAAAGCCGATGTGGAGCGACCAAAGTGG -ACGAGCGGGCCCCAGCCAGCTGTCACACGTTCCATGTAGCTCAAAGCCCCTATTAGGAAG -AGGGATAGGCAAAAGGATAGTGCTGCGTGGCGGCGGGCGGGGAGGGCGGAGGGGGAAAGT -TATGAACTACTACGAGTTAGGTTACGTATATTGGGATAGCCGAACTATGTTGACGTTACT -ACAGCACTTCATCCTTTGTCCTACCTTGATATACTTAGATTAATATACTTAAATTAATAT -GAAATTCTGCCCGACTAAAAACCTTCAAGCTGATTTGTCCTACCATATTGTAATGATAAA -GTGTCCACAACTTCCCAGATCTAATTCATCACGTCTAGTCGACGTGGGTATTGTCTCAGA -GCGATATAATGATGAAATATTTAGTCTATTTTTGGAATGTATCCAATTTCTTGTGAGAAC -AGAGGGATGGTAGAAATAACCGATACTATATCCCTGTATCTGGCATGCATGTCCGGATAT -CTCACACTCGACGGTGGTTGCGAGCCGCAACTTAGGGCTGATTTTGCGGAATAACCTTCC -GCCGACCCGTCGATGGCGCGGTGTCAAGTCCAGCTCCCATTGGCAGCGACCCCCACGACG -GCGAGAGCACACAAGAACCGTAAGATGAAGCGACCCTCTTCACCACCACGAAGAGAGAAA -CATCCACGGGATAACCTACAAAGTTTAATCACTTAGCATAGCCAGCCTAGACTATTGCCT -CTGCTCCCCTCAAGGGGATACGTGCCGGGTTGATGAAGGTCGGGTGATTTCCTCTTGGCC -GTAAAAACGGCAAAGGAAGCCGAGATCCTTCAGCTAGTACCGTGATAAGTGGGTTCGATC -GCTTGGAAAGAAGGCCGTCGTTCTATGTATCCCCTTTCGCTTGCTAGGAGGGGATAGCCC -GCAGGCTGAAGAACTGCGCGGTGTGCCAGACAGGGCGATTGGCTTAGTTTCTCGCAAAAT -ATTGCCCTGGGCTAAGGTGAGGACGCCCCGCGTGTTTACGCGCGTGGAAAAGTCGGGGGC -GAAGCTTAAATCTGTCGGTGGCGTGCGGGCGGACCCCTGGCCGGAGGGCCCGAGGGCTTG -CCCGAACCACCGGAAGGAAAAGGGGCCACCCCTGAAAGGATCAACCAGGATGTTGCTGCC -GACAGAGGCATCTTGATTACCTCAAGATTGGTATTGGATTGTTGCGCTGTAACTAACTAA -AAGCTTTCTGCTCATTTTACTCCACTGGAAAGCGTGGCTGTCGGGAATGTTCTTTTTTCT -TTGTGAGCCCAAGTATCATCGGGGCAAACCACCCCCTCATTGGTAACGTCGCTAATCGCA -ATCATGGCAGCGCGAGAACACTTGCTTCATTAGCCGAACCCCACAATCAGAGTCACGCGA -GGAGATCTAAGTTTCACGGCAGAAGGACGAATCCTCAAATCTGAAATGCTATCGACACAA -CAGATTCTGACTATCGGGTCTAGATGGAGAGAGCTCATCGTATTTACTATCTATCTTCAT -GCCTCATCTCGCCACAGCCGACAATCTGGCTACCGGAAGCGAGTACGACATTTCCGATAG -CTGAACGACCCAACCAAAAAAAAAGACAATACAGCCGACAATAGAGGAGACTGGAGACTA -CGAGATCTAGATCCCACAAACACTCCCTCCTTACCAAACCCCACAACCATAACCAGCGTC -GCAGGACCACGAACGAACGGAGAAGAGTTTGCCAGAGTCCCTAGTCTAGGGCATCATCAA -TGTGCTTGGCGTACTCCCCGCCGGTTTCCTCAATGTAATACACGTAGTTCTTCTTGGCGC -CGCCAGTATTGGTGTAGGTCATGGCCAGTTAGCTTTGCTTGTGGAGCTTGCTGCTCTGGA -ACTTGGCCATGGCCGGGGGCGAATTGATCCAGCTGCCGTCCTTGTAGGTGCAGCGCTGGA -TTTCACCGTCGTTCGCCATAGAGTAGAAGTAGATTTCACTCGCCTCGGTCTGGACCGCCG -CGAGAGGCGTGAGCTTGAGAGCCGTTCCTGTGCTGGGGATTGCCACGGCTGTTGTCACCC -AATCACGTTAGTGACTCGAATTTCCCTCCATGGGCTAGCTACTGAAAGTACTTATCGTTA -GCTAGTCTCTTTCTTCTAACCGCTCTTTCGCGCCTCCTACCTACGTTTTTGTCGATATAT -CTGTCAGAATATAGGTCCGCTAGATTGTGTGTCGGGTAAATATGTAAATGAGCTTGCGGG -ATCTCACCTTCTCGGTAACCCCCGTGCCTCTATCCCTAGAACTACAGACTTGCGCCGCTG -ACCTAGGGATTGTCACATATAATCAGATTTTCGAATTGCCTATCTAGGCTAGTTAATGAC -ACTAAATAGTCAGTCACACTTGCACTGATCCCGTATACGATAATTTAACTTCGGGGGAAA -TGGGTACTAAAACTTATCGACTGTCACTGTGCCAATCTTTGCGCGCAGAGCCATAAACTA -TCAAGGAAGGGGCTTTTTCATGGTTTTCATCTGATTTTCTCATATAACGACTAGCTTACG -GCGCAAAGGGTATGTTGAGATACCCACCTGATCTCAGGTAATCCCGCTCCGAAGCCACAA -CTCACTCTCACCCAGTATCTCCTCTCCGATTCGCACGGCTCAACAAATCTTCCATCCGGG -ACCAGCTCTTCCAGTGTTTGCTCTCCTAATCATGCCCAATTCCATCTTCAAACCATCCTA -CGCCTTGCCGGCATAGAATTGGAAAAGACGTCTGGGCCCTGCGCTATCGCGACTTAGTTG -AATGACATCTGCTGATTGAAGCTTCGAAACGATGACTTTCTGATCTCCAACGTCCTTGAG -CTCGTCGTGAAATACCCAAAAAACACTCTCTTCCTTATACCCATAATCCGACGCCCGCAA -GCCGTCGAGGCAAATGAATGATGAGTCTTTCTTTCCAAAAGATACAACTGAACTTCCCCA -GACATCACCAAAGTCAAATGCCGTGAATTCGAACAGCTTCTGTGCCCCACGTTCGAGGGT -TGGGGCCGCAACTAGAGTAAAGGATTAATCAACAGTTGCGGGATTTTTAGTGACTGAGAA -TATTGCTTACTATCAGTAGGGCTACCGTTTTCGCGGGGTCTTGATTGGGCAATCATCGTC -ACTGCCTTAATGCCTTTCTTTTCATCTTTTGAGTATTGAAAGTTACCTGGGAACACCGGT -TTTGGACTTACGACAATAGAAGCCTGATACGCCCATGGCTGATCATACTGCATTGTTTTT -TCCACCTCACTTGGATCCTTGCTGATTGCTTTGACCTATTCTTATCTTAATAAAAGGCGT -TGATACGCGTGTGGTGTGTGCTTACCTTTTCGTAGATTGCCTCAGTTATATCAAGTCCTT -TTCGTACAAGGGTTTGCTGTTCAGTTTTTCCATTTTTCCATTTATTGGACGGCACTGAAT -GAGGCGCAAGCGATGGTTAGCTAAGAAATAACCTGATCTGCGTTCAATAATGACAATACC -TTTCTTAGCACCTTCAATGAAAACACCGTATTGAGTGTAATCGTCGAAGAGATCCCAACT -CTGGTGAGGAAGATCAGCAGCCGTTAGAAGGAGTCAGACAACTCGGGGCCCCACCTTCCT -ACTCGCAATGGAGTAGTCGAACGGTTTGAAGGTGTTATCGAAATTTGAGACATTGGTGTA -CGGCCATAGCACCGCACTAACTTTGATATCAGCTCTTGGACTGCAAAGAAATGAATAGAT -ACGCACAAACGTCTGTAGCGATGCTTTCCATCCTTGAGCTCTTTGTAGAACTCATCGGCC -TCCTTCTTGATTGCAAGGAGCTGGGAGCCACTGTCGGGATCCTCTGTGGCAAAAGCCTCG -ACTACATCACTGCTACCTCCGCCCCCAGAGATATGAGTCCAGACTTTGACACTAGCATTC -TTCCGGAGTGATTCGACAGCCTGCTTGACTTCTTCCGTCACTTTCCCGGTGGTACCGTAC -ATTTTGAAAGCAATTTCGGCCCTAATCATTGTCAGAGGGAATTGCCAAGCTTTGAAATTC -TGAAAACTTACGACTGCTTGATCTCCTGCTTGGAAGATTGGGTTGACGTTGAAATGGAAA -CACGGGCAAGGTATTTACCACCTTTGATGAAATCTATAGAATAAATGAGCATCCTCTGGG -CTAACCTGCCGGACATGGCGTCCTTGCGATATACATACCTGCAATGAAGCGGTCTCCATA -GAGGTCATTCAGATTATCGCCGGTATACTTGTTGAAGGAATACTCATTTCCGATTGAACC -TTGCTGACGTGACGAAACCTCGACTTGGTAGGTAATGGTACTACTTTCATACTAGAAGCT -CGAGTCAGCAAGACAAACTAAATCGTATCATGTCAATCGGTTGCTTACCTCGGATCGGTT -GAGGTAGCTTGAATCAACCTTGCCCTCTTGGCCCCAGCCGGAAATAGCTGCTCCAGCGGT -AATCTCCAGAGACTGGGCGAGCTTCCTGCCGGTAATCAGAATCTCATCATGCTTTTAGAC -AGGTTTATGGTTGAATTATTCGACTGACTTGTATTCCGTAACCTCTTCGCTTTGGTAGGT -GATATCATAGCTTGCAGGAAGTGGTTTCTCGGCCGTGATAGTCACAGCCTTTGCAACACG -AGGGCTCTGAAGGTACGTGTTGTACCTAGTTAGAGAAGCCAGGTTATTAGTATATCAAGA -GCTCGAATTACCACCTCTTAGTAGTTGCTTGCTAAAGATATAGAACATACCCCTGGCCTT -GTTGCATTCCATCAAAGTAGGGAACAATTTCACGTTCCATTTTGAATCAACCAATTGTGT -AAATAGTCTCTTTGTCAGAAAAGTTTCGATGTTGGCAGAAGTGAAGATTTCGAATATGTG -GACAAAATTTAAGTAGAAGTAGCGAGCGAAGTCGCCGAAGTTAACCCTCGATCAGATTCC -AAAGCCAAGCATGCTCAATACGGCCCACTAGTCCCCTCATTTACCCATGAGTAAGAGAAT -CTCGGGAGGACCGTGACGAAGATGTATTTTGCTGCATTGATTGGCTTGGCGAGAGGGGAT -GATGAGCGCGTACGATAGCGAGGCAATTTGACATAGATCAGACAAACCCTCGATCCCTAC -CTAAAGAAGATAATGTACGCACTCTGTGATTCGAATTCAGTCACGAAAATGGATCATGAA -TAGATGGAGCTAGATGATCTTCCGGAAACTTTCCAAGGTAGAGTAATAAAATGAGCAATT -TGGGGCGCCCAACGGGGTCCCCGATGTTCAAAGACGCATAGCGAAATTCCACCTGCTAAC -GGCCTATGCAGCCTGCCGCGGCCCAGGTCATGCTGTAGCACCCGACATGTACAGCTAGCC -ACAATATTCCTATCGTTACGCAGCACCCGGATCTTCAGCCACGTGTccagccagccagcc -agccagccagccagccaATCTATGCGGTGAAGAGAATAAATGAGTTTAACATACATGCTA -GTCGTGAAAAACGAACCAAACTACCGACAGTATTTTATAGTGCCTAGCGTGACTGAACAG -GTGAAAAAGTTGAATAGGATATAGAAAAACAAAATTTGAAAGGCATTGAGCCCTGAGGAA -TTGAATTTTAGGATAAATAAGCTACGTACTCTTCAAATATGGAGGACACAGACACCGTAG -GTACCACTATTAGGTGATGACCCCGACTCTGAGTTTACTTTATCATACTACTCATCCACA -TCTACAGCGCAGGTCTACGCGGCACGGGGATATCCGGACCGATTCCTCGCTCAGCTTCGA -AAGTCTGTACATACTTCCGGACTGCCTATACTAGCATCGATCAGCTGGACTACGGATTCC -GTACCTGAGCGCCTGGCTTCTGTCTCATGAAACACGAATCGCATGTGCTGTGTAATATGC -GCTATTTTTCCGACATTCCACACAAAGCATCTAGTTACTGGCTTCAGGCCATGGTACTTT -TGTTCGATGATTTCATGGACAATAATAGCAAACACAGCCCATAGATGAGTAAGGGAGTCT -CGACTATAAAGCACATCTACATTCCCTAGAGAAAGGGACCATCAATTCCATTCATCATCC -GAACCGCCGGATCCCTCAGCATACCTTGACGACACAATATATCCAACTTCACGGAAACTT -CTATCACCAACTCTGAAATCAAGATGGGTTACGCACAATGGATCGTGATCAATGTCATCA -ATAGCATGTCTTCGAGCACGCTGAGTATCGCCAATGCCCTCCCCGACGTTGACAAAGGCG -AAGGCTGGTATAGTACACCCCATGCCCAAATGCGAATATCATCTTCTAACAGAACACCAG -GGGCAAGTTCTACCGGGACGGGAACAAAGATGAAGAAATCAAAGCTCCTGAGATAAACAA -AATCACGGTGCCTCCAGGCGGTTCGAAAAGTATCAGCTCCTGTGGTCGCTCCGATTCGGC -CTCTGGAACGACCGGACATATTGACCTGTATGATGGCAACACCAAGATTTGCAGAGTCTA -TTGGGACTGCCCCTGGGGTAGCAAGGGCAATGACTTTGGTATTTCAGAGAAGAGTAATCA -TGGTTACTGGATCCAGCAAGGATCCTGGAACAAAGACTCTGGAGCTATTGGCTCTGTCGA -TGTTGAGGTTGGAAAGAAGGGCTAAAGCAGTCCGATACCTTGGTTTTGCAGAAAGGGAAA -TGCTCAAGCCCCTTTTTTATTGCAACAGTGGGATTATTTCGCTGGATTTTCACTCGTTCT -GATGTATATATGGACTAAGATCCGTAGTGCACACCATCAATCCACTTCGAATTTCTAAAC -GGGTATAAGGGTATTTTTTTTTCCGTATAATATGCGTAGGACGGAAAATGAGAGAACCGA -AAGCGGTTCAATCTTGCAGTTTTTTTTCAAAGAAAAAATGTTTCAAACTATTAGATGCGC -GAAGGCTCTTCAGCGAGAGTCCTTCGGGGTACTAGTTGGGTGTGGCACGATAGAACAACA -TCAAGTCTAGTCTCGGGGCATTGCTTCCGACTAGAGCCCATATCAAAGTCGCAGCTGAGG -ATAGCGTGCCACAACAACTAGAACCATGCAGCTTTTGGGTGACGCATGATCAGCTGGGAC -CGGCCTTGCAATGTGGATGTATGCACACGTCATTCAAGATTTTTTACTTAAATAGGTCTT -TGTTTCCAACAAACCATCAATAAGCGGTCTATTCCTTCCGAGCTTTCTTCTTGTCCTCGA -TCTCTCAAGCACTCCCCCTCTGTCTCTTTCCTAGAACCGTGCTATCTTTTCTTGGCAATC -ATGTACCAGTTTGATGGCATCATCAAAATCAGAAATGATACTGGTGTAAGCATCTTTACT -GCACGGCCAAATTTGCAATTGTATGTCTCATGATCAGTTATTCCGAAGTTGAAGTTGACC -TTTTGGCAAATTTCTAGCGGCAAATTCTACAATGCGCCAAATAAGGACAATGAGATTCTC -CCTCGTGATATGGAAAGTCTAGAAATCAAAGACAACGAACATCTTCTTATTCACACTTGC -GGTCGTGATCAAGAGCTTGCCGGCACAACAGGAACGATTGATCTCGACGATATTGACGAC -CAATACATCGCCACCCTCTACTGGAATGTCCCCTTCGGTTCGAAAACAAATACCTTCTCG -GTGTCCGGGCCCGGCAGGCGCTCTCAGTATATTGTTGCTGCCGATGGATATAATAGAAGT -GGTACTGGACTTGGTACTGCGGATGTCGAAGTCACCAAGAAGCCAACCACTCGTGGTTGA -CATCTCTGATGGACTATTGTTTAAAGCTTTTCAATATGAAATGACTTGTATAATCACCCA -ATGAGGGCCACCCCATTTTTACAGCCTATACCTATAGACCTCTAAGCATATGCATTGACC -CTAGTGACAAAAATGAGAGACGAATTTGACTAGGCAATATCAATACTTCCAGGCAGTGCG -TAAGAGGTTTGCAAGGTGGCAAGCGAGATCTCCTCTCAAAAGTTGATCGGTAGTGGTACG -GCGAGGAAGCGCACGGCTAGAGAGACATAACTGCTTTGATGGCTAGCTGAAAGACCAGTG -ACTAGCTGAAACAATAGTGGCTACCTGAAAGAATAGTGGTTCGGTGGAAGAACGGTGGCT -GGCTGAAGTCCTGACCGAAAACAGAAACTATAGCATTTATCGTGAAGCATTCTGCATAGT -GATATCCCTACATACTATAAGGGGTGACTTTATGTAACGCGGATCTTATAAAGTACCACC -GGGTCGTTTTGTAATCTCTGGCTTTATTTTTTATCTATTCTACGACGAGAACACGCTGAA -ACAAAAACAGGGGAATCGAGCAGGGTAAGTAATAATTTCTGTTCGAAGCATTCAAATATA -TCTTTATCAGCTCTTATTGTCTGTCTTCATTGCTACCAACATTTGCTGATAGTCAATTCG -CAGTGTTCATCGTTCATCATGCGTATAATGCCAAATCTTGTCCAGGCATTCGTGTCGACC -TCGTTGCTTTCCAGTCTCTGCTGGGGCTTTGAAAACCCGGCGCAGCAGGTTGTAGGCATC -CAAATCAAGTCTCAACGTCCAATCTATCTCATCGCCCATAGAGTATTGCACACAACGGCA -ATCCCAATCGCTCTCCGAGATGGCGCAAATGCATTTGAAATGGATGTGGCACCCTATGGC -GGGAAGTTATATGCATACCATGATGATATCGGGCCTGGAGGTAAGCCAGGCCATACGGTA -GAAGAAATGTTGGATACTTTTGCACAGCATGCCAACGAGACCAACTTTGTCTGGTTTGAC -ATTAAGAAGCCAAATACCTGGGCGAGCAAGGGCACCTCCATTGAAAAGCTTCAAAGGATG -GTACAAGAGAAATTGGAGCCAGTCGGAATCCGAGCTCTTTATGGCTTCAGCATGAAAGAC -ACGGGTCTTGAGGAATTCGATAAACTAGCCAAATCCTTGAGTGGGAACGAGGCAATTAGC -ATTGGTGGCCGGAATGGGCCCGTTGGGCGAACCTTTGACAGCGTCAGCTCTGATAAATTG -TCCGTGAAACAGAAGGTGATGGACTACGGGATTTCGTTTCTCAATTTACCGTATGCGATG -GACTGTGATTGGCCAAGGGATAAGGAGGACGTGATCTATCCAACTGGTGTCTGCTACGAA -TTATATCAAGGCGCGAAGTTGCTACGCGATCGACAGCCCAGAAAGCCCAAGGTCGGAATG -ACCTTTGGCTGGACCATCAGGGCCAACGATGAAGGAACCAATCGGGTGGACAAACTCTTG -GGATTTGCAAAGGCCGATGGCTTGATCTATGGCGAGGCGACCCATGATTATCGAGATTCT -CCAGACATTCGCGTTGCAATTGGACTGATTCAAAGTTGGTTGGATCAGCACTCCGAGACG -CACCGCCGTGCTACAAAATCCGATGTTCCTTGGTGATTGATGGTACTTGGTGTTATGAGC -TAGTTCTTCAAATGAAACAGTCCATTTCAACTGGAATACCAAGTATAGGAGAGGAAAAGA -GAAGAGAAACAGGGGATTCGATGAAGATAGAGATTTGTATGGGTCGTATACACCTACCCA -GGTAGGTATCTAGGTAGGTCATTTATAAAATCTGTAAACAAACAATCAGTTCGTAATTAG -GGTACGAGGCGGTCATCGCAGAATCCGATTTCTATGTCCTAAATCGTCACAGGAGCTTTT -CTATATCGCCGACAAATCGCTATCCTTGCCAATTCAATCCTTCAAGATGATCACAGGTGA -TGGTCGCACATGAGGGCCGGTTATCGAGAAGGTTTTTCTATCGTCTGGGTCGAGATATCG -CGAAAAATGCttgatattgacattttgatattggatatagtatattTTAGATCAGGGTGG -GAGTAGGTACCACCACTTCTTAATAAACCTTTGTAATTTGTACTAATCTTGACCAACGAG -AGGGTCACGCCCCTGGTGGTATATCTAGTCGCCCATAGAAGTGTTAAACTGGAAATTGTT -GCAATATTTCTCCGAACATATATATCACATTAATCATCGTATCCGCAATAGTGCCAACCA -TTAATGCTAAACAGAAAAATTCCATAGGTACGACATCACTCATGATCAAAAAACAGTCGC -ATAAAATATCGAAAGAACACTCCCAACTTAATCCGTACATCGTCGACTTGATTTCTTCTA -GGGGCAGAACAAAATCCACGGCGATGTCTCCGTGGTCATCCTGGCAAAGAGAAGAAGTGA -TAAGTCACGTTTGCTTAGACCCGTTCGCACCACTATTACTTTCCTCGTCTTCGACGTAGG -CAGCCTGATCGCCAGCCTTAGATTCTGTCTCCGTCTCATCGCCGTCATCATCCTCGTGCT -CAGCCGCTTCTGTGTTCGTCCGCTGCAGACTCGCAGGTTTCGTTTTCTGGACTGTCTTGC -CCTTGTTCCGACTCCTGTCAGGCAACGACTGCTCCGCCTCACTCTTCAGGTCACCATAAT -CATCGCCGGCCTGCGACGCGCGGGCGCTCTTATTCATGCTCCTCCTTGCGCGCGCACGCG -CACGACGGATCATCTTGTTATGGTAGTCTTCCTGTTCAAGCTCCGGATCTTCATGGAGAA -TGCGACGAGCGGCTTCACGCCCAACAATGGGCAGTCGGTCAACACCCTGGACTCCCTCCG -GCAGCAGCTTGGGGTCCAGAAGACCAACTTGCACGAGGACGCTGGCTTGGTCCCAGTATA -CGTGCTCGGAGTAAAGACGCCCGGCGCGCATACTGACAATACTCACAAGGATGATCTCGA -CTCGCTTGTTTGTCGGTGGAACACCCGGTAGCATCCATGGAACCTCCTGTGTGTGCTCGA -AGCTTACGTACAGCTCGTCCACGATGCGGTCGGACCCTGTGGTGCGAGAAAGCAGTCGGA -GACGCATGGATGGAGGCAGTTTTCCGATGAAGTAATGCTCATAGAAATGGCGCAAGGCTT -TTTTTCCGACTCCACCGCTGATTGTTGGTGTGTAAGTGACACTGGGGGTTTTGTGCTGAA -CATAACCATCCATGGTCTTTCGGACGTTGGACGAGAAAAACTTGCCTAACAAAGCATTAG -CCAGATGTCGCTCGATCCTTTTGCGATTGCAACTTACCATCCACATGCTGCTCATACCGC -TGCTCGTAATCCGGATCTCTGGAATATCCCTTCCGCAGAACCTTCAAAGTCCGTGTCCAA -GCTAGATTCGCTGAAAGATGATCGTATTCCTCCACATCGCTCTCCGCAAACCCGGGCTGG -GCATTATCATATGTAAAAGCCGGGTATGCCAGTTCCAAACGCTCTCCGGTCCCGATACCA -GAGTTGATTGGGCGAGTTTGGCGGCGTTTCTTGCCTTGCAAACCAAGCGCGACGGGCTGA -ACAAGTACGTCCACAGTCTCGCCTGCCAGGTGTACGAGGACCTGGAGCGATAGGGGGAAT -CGCGATCGTGTATCGGGGATCAGGCTGGGGTAGTATGCGACCAGGGCGCAGAGCCGACTG -GCGTTGACGGAATTCAGATAGTAGTCGAGACAGAAGTCTGCGGCGTCGCCAAAGGCTGTG -GATAGAGCCAATTAGTTAATCACTCTTTAAGATGTTCAGAGAAAGTGCTATGCATACCGA -CCACACCGTAGTTCTCGCCTACACCGAGACCGTCCTTTACAGACTGCAGCCGACGGGCAT -ACTCTTTTCCTCCACCATTGTAAGGAAGGTATACGACGTCGAATCCTTCTTCTTGCCAGT -TTTTGATAATTTCCATATCAAAATCATCGGACTCAGCTGTGATGCATAGCCGCGCATTAT -TGTTGCCATTGCTATAAAGGAATCCACCGCTGCTCAGGCCAGGGGCACTAATTGAAATGT -TGGCCATCGTGGTGGTTGTGCTGGTTGTGCTGGTTTTCCAACGTATGTACAGTGGTTGCT -ATCACTGCAACTAGGTAAGATTTTATTTCAATGTCAATCAGGAACACACTATACAAAATT -TCGACGCAGAGCAGGTTCAAATAAAAAGTGAGCCCATGGTACCTTAAGAGGTCCTACGTC -ATTGGCGTCTGGGTACGTCATAATATCCGTGGCATTTTTATATTAAAAAGTCTTGGCATC -AGAACAAATTGATCGGGTGCATTTCTGTTTTCTTCCATTCTGACCTTTCGGCTTTCGGAT -GTTTTTTGTGGGCTTTAAAGCTGTATATGAAGGTAACACTTTTGCTCTTCCAATGATCAA -AAATGATTAAAGCCTGTGTTGCTGTTGTGTATGCCTTGTTCCTCTTATGATCATGATCGA -ACCATGAATCTCAATTTGCCTCTCTGAATTTTGAAACCGGGGTTTGTCGTTTGGTAGTGG -CTCGATAAATATGTACCAATTTATTCTTGTAGTAAATCATAGCGAGAGACCTCTGATACC -TGGGACCAAGTATCGAGTGGGGGGACCTTCGCACTCTCTGTGGAACGGCTCTTGACAAAC -CAGAGGCGGCCATAGTTACTATTTTTATCCGCTTCCCAGCAAGTCAAGTAAGTACCATGA -GCAAGAATACATATTTGTACAACCTGAACCCTACAATCCAAGATGTACGATTTATATACA -TCAGAGTCTCCGGTACTCCGTACGTGCAACTGGGTTCCAACAAAAGGTCGATTGAGCCTC -AAATAAACCATACAGCTCCCAAAAATGACATCCTTTTGTTATAAAAAAAGCTCACATTAT -TTCTATCTGAGGCCGGCATCTTTAATATGACCCAGATATTTTATATACCGCTTTGCACAT -ATAAAAAGGGATTTGGCCCTCGATCGGGGTTGGTGTCTGCACCTGTTTAGGTTCGATCAT -GGATCTGACTTTGGTTATTCCTAAATGACAACCAGAGAATGGAAAGCTCTGAATGAGCAG -AGGTGAGACTTGAGTGAAAAGAGCAGAAGTCAGCTCTTCACAAGTACTTCGTCCTGTCTT -CGCATTCTACATGACTGCCTGCCAATGATGCATGGCTGTTGTTTGGAAAAATCATATACC -CTAGAGTGGGAGAACAGCAAGTCATAGTCCTCGATGACTGAATACGCTTGATCCTTCGAT -GTTTATCAAAGACTGTAAGGATTTATGGATGGGGTTCATACACTCTCTCAGATCTCCGTG -TAGTATTGACGAGCAGAGTATTCCAAGACAGGTCTTTCCATGGTCTCAAACTTCGCCGGT -GGAAGAATGAGAAGTCTAAATATTTGTATGATATTCTGTATTTTGTCCTCATTGTCCCTA -ACTCAACACGGATAGGGTTCTGACTTGCTTATGTTTGCTTCAGTAGAAGACGTTTTTCAA -CCGACGACTAAAGTCTTCATGAGACTGGGTTGTTTGACGATCTCATCATCGAAACAGCCC -ATAAGCAAGAGCCCATAAGCAAGAGCCCATAAGCAAGAGCCCATAAGCAAGAAAAAGACA -CTGATATGTCAGCCACGGGAAAATTGATCTGTACTGACAACTACGGGCTGCGAATGCTAA -ACATGCTGGTCAATTCTGTTCAATACCCTTGGCATTTGTGTAATACCATTCACTCTTTTC -AAAAGTGTTGGGGCTTCTTTGAATTTTATTTTAAAAGTTCCATACTTCGAGATACTGCAT -TGAATAAATGAATATTGGAACTAATTCATTTTGCGGCAACTCACAAATCGTTGCGCGGTG -TGTATCCTTTAATTGGCCCTATCATGAACCGATTCTGGATACAGCGTCAATCTTCAACAC -CAAGTCTCATCAACTGTCCTGCCTCGCCTCTGAGAAGGTTCACTACCCGGTACATAGATG -TCTTCAAACAAAACATCAAGACTGGCCTCAGGAGCGGGCATCTGCTTGGCATTTTCGACT -TCCTCATTGACAAACCTTCTCACTTCCTTTTCGATAGCCTTGGCTTCGTCCTCGGTAATG -ATGCCCCAATCTACCAACTTCATCTTCAAGTTCGTAAGTGGGTCGTTTTCACGGGTTTTC -TGCAACTCCGCTCGACTGCGGTAGGCAATTCCAGGGTCGGACATGGAGTGACCGGCAAAT -CGATAAGTGACATACTCATATACGAGGGGACCATTGCCCCTTTTAACGAAATCTCGCCCA -TGCTTGACTGCAGTCAGTACAGCTAGAACGTCCATCCCATCGATCCGAAGACCGGGAATG -TATTGACCACGTTTATAATATTCCGTCATGGCAGATGCCCTTTTATCAGACGTCCCCATC -CCATACTTGTTGTCTGTTTGCCTTGCTGTCAGAGGAAGAAAATCAACTAGCAAAATAGTA -GAGACTTACTCTCGCAGCCAAAAATCACGGGCAGATTCCACAATTTCGCCATGTTGAAGG -CTTCGTGCACCTGTCCTTGGTTCGCGGCTCCGTCACCATACAAATTGATTGTGACGTTAC -CTGTCTCGTCATACTGCTGGGCAAGCGCTATGCCAGCCCTCAGTGGCACATTCGCACCCA -CAATACCATTTCCGCCAAAATAGCTTTTGGAGTACATGTGCATCGATCCGCCTTTCCCAT -GTGCTATCCCATCCCGGCAACCCAGGAGCTCACCGATGATTGATCTGATTGTACCGCCCC -GCATATATGTGAACCCATGAGAGCGATACGCGGTGATCAGTTTGTCTTCTTTGCTTATTG -CGTGCTCAATCCCAACTGCGACTGCCTCTTGTCCAGTTGATAGGTGGCAGAAGCCGCGGA -TTTTTCTCTCTTTGTACAACTGGTCGGCACCTAGCTCCATGCGTCTTGAGACATATCAGC -GTGCGTGAATTGTTGAGCTTCACACAAACGATGCAAACCTTATTACTATCATGTCGCGAT -ACAGCTGCTTCAGTTGGTTTTTCGTTGTCTCGACAAAGTATGGCGGCGGATCGAGATTGT -ATGTTTGAAAGCTCTCCTCCGCAAGGGGTATTGTGAAAGGCCTATCATTTTCCTACCTCA -ATTCAGATTAGTCTCGCTATCCCTGATATGCATAGTTTGAATTCTGACTTCAGGTATCGT -CTCAGGGCCTGTTACTTGAGCAAATCCACGCTGTTCAGCACGCAACCGGGTAACGGTAGA -TCTGCGGGCTCCCAAAGGTAGGAAAAACTTTGCTTTGGTTGAGATTAGCATGATGATGGT -GGAAATACTAATGAGCCTACGAAACATTTCTTTCATTTCGATGCTAGGATTTTTGTAGCA -TTCTGTGTGATGCAAGTCTGAATCATGATCAAGACGGACATCACATCCGACACAGATGAT -CGATTTGGCTCCACCATATCAGTAATTGAATGTTCAATATTGAAATTGTACCACATTTTC -TGAGATGCTCACTTAGATTTGGGCTTCGTCAACATGCAGGAGGGCGGGGGTACCCATCGA -ACATGAGTTACCCGGGCGTTGTCCCTTTGGTGTCTGATTCAAATGCCCAAGATAAGCCAG -CACAGAGCCGAAGGCTGGAACAACCCATAACTCAAAATGACGAAGCTGGCATGTTTCGGA -CGTTTAGGAGATATGAGCTGCTGAATTTCCGTCAAAATGACGTTAATGAATGCTCCAGCC -CTGGAGGCTCAGGTATGACTAGGCTCCTCAAAAGGAATCAACAATATTGATCAGAGTTGT -CCACCAGACAACGCGATTGCGAGAGACATAAGCCAAAATGTAACCTACCACATCGTTGTC -ACTTGGCCGGTACTGACGAGTCATAGGCATTGGACTTTCTTGCTCTTGGCCAAGATAAGC -GCAGAAGTCGCTGAGTGTACTTCTACCAAATTTCAATGGACAAGGAAAATGGTGCTGTAC -TCCTCAGTCTCAAGATGCACTACGGACATGCATCCGGCCGTTCTTTTGTCAAAGAAGATC -ACGGAATCATAGCATGCAGTCATGAAGCTGGCGTGCCGTGAATGATACCGAACCTATACT -ACGCCCCGGGCTAGCTGCTAAGAATCCGTGGCCCAGCCGATATTCAAACGTAGCCTGTCG -ATTATGGCATTTGAAAAGGGAGCCTGATCGATAACCTCAGCACTCCAGCGTCATAGCCAC -CCTCCGAAACCATGCAGCTTTATTTATCCTTTCGAATCACCCTGTGACAACGTAGGATGT -ATTTTACTTCTTTCTGATTGCGACCAGAGAAAATGGTTGGAGTCTTGGTCGTTACGGCGA -CGGGGCTCGACTATGCGACCTCTTTGTCGGGTTATCGAAGGTGCAGCCGAAGAAACGACG -TGGTCGGTGGCAGGGGAACTAAAACTCATCAAAATTCTGGAGTTCATCTAAGACTTCAAA -ATTGAATATCATTTCGCTTCGGATCTATGACATCTCTGTCTGTTTCGCATACATAAGGAG -CCCTTCCGAAGCTCTTTCGGCGCCGAACTTGATTTCGCTCTGTGCTGTTCGGCGCCGAAT -TAACTCTCACCTCCGAAATCCTCGACAAGCGTGAATCATTTATAAACCTGCCATTACCCC -TATGTGTCAATTGAGTCAGCTTCAAATATTCACTTCTGTTATATTCGTTACTTCGTTTAT -TATACCATGACTGAATCGATAGAAGAAGGGGTGCACAAACATCCAGACCAGGTCTCAATT -TCTACCGAGAAGCCTTCTGATGTCAAAGATGATGAGACTCACTTAGCAACGGGGACCCGG -CTGCTAGCTATCATTGTTTCTATTTTACTTGCTATGTTTTTGGTTGCCTTGGTATGAAAA -TTGTGTTTCCTATTGCTTACATGGATCTATACTAACAAGCCTTCGAACACTATAGGATCG -CACCATCATTGCCACAGCGGTCCCCCGAATCACAACCCAATTCAATGCTCTGGAAGATAT -CAGTTGGTATGCGAGTGCGTACCTGCTGACGAGTTGCGCCACCCAGCTATCCTGGGGCAA -GGTGTACACCTTCTACTCGACCAAGACCATCTTCCTGATCGCGGTCCTTATTTTTGAAGT -AGGATCCGCGGTCTGCGGCGGCGCACCAAACTCCAAGGCTTTCATCGTGGGGCGTGCGAT -AGCAGGGATTGGATCTGCAGGCATATTCTCCGGCGCGTCGGTGATCATTGCACAGATTGT -GCCCCTGGAAAAACGGCCCATGTACGTTGGGCTGATGGGGTCGACCTTTGGGATTTCATC -AATCGTCGGTCCTCTTATGGGAGGTGCATTCACAGATAACATAACATGGCGTTGGTGCTT -CTACATCAACTTGCCCATCGGCGGTTTCACCATGGTCATCTTATTCTTTTTCCTGACTGT -CCCCCACAAATCCCAACCATACACCTGGAAGCAACAAATTCTCCGTCTCGATCCCCTAGG -TAGCGTGTTATTCTTGCCCTCTGTCATCTGCTTCCTGCTCGCCTTACAATGGGGCGGCGC -AAACTACCCCTGGAGCAACGGACGCGTTATTGCACTGTTCGTCATTTCAGGGATCCTGAT -GATCGCCTTTGTCGGGGTACAGATCCGGCTCAAGAAAGATGCGACGGTGCCGCCTCATAT -CTTCAGCCAACGCAGCATCATCAGCGGCGTCGTCTTCTCTCTGTGTGTGGGCGGAGGGCT -AATCTCTATGATGTACACGCTCCCACTCTGGTTCCAGGGCGTGCGAGGCACATCAGCTGT -GAAATCCGGTATCGACACGATCCCCATGGTGCTGGCGCTTGTTGTTGGAACCATAATCTC -TGGGGGAATTATCACCGCAACGGGATACTATGTTCCATGGATGTTTGTTGCGGCAATTTT -CATGTCGACGGGCGCAGGGCTGATGACAACCTTCAAAGTGGACACCAATCATGCCGCCTG -GATTGGGTACCAAGTGTTGTTTGGTATCGGTATCGGCACGGGAATGCAGCAGCCCTCCAT -GGCGGCGCAGACTATCCTTTCGATGGACGAGGTCGCCATCGGTATCTCGCTCATGTTCTT -CTCCCAATCTCTTGGTGGAGCAATCTTTATCGCGGTCGCTCAGTCGCTCTTCCAGAATTA -CCTCGGGGCAAACCTTCCCCGTGTGCAGGGAATTGATGTAGCCAAAATCATGAAAACTGG -AGCAACGGGTCTGTCAGATGCTGTTCCAGCTAACAAGTTGGCCGAGGTTCTGGTGTTATA -CAATGATGGCCTCAGGCGGTCTTTCATTGTTTCCGTGGCAGTGTCCTGTCTGATGATTGC -CCCCGCGCTGACAATGGAATGGAAGTCAATCAAGAAGGAGAAATCTGCGGCTGCGGCTGC -TCAAGAACAACTATAGCTTGTGATAAGGGGCGTGCTGGGATGCATATCACGTCGCCAGTA -TACAACTGTCTAGGGGTTTCCACTCAGAATATGGGGCGTTAGGTACACAGTCATATTTAT -AGATATTTGTGGATGTTTAAACAAGGTAGGAACAAATATATATCTCTCAATCATTACCTG -TCTCCGACATGATCCATCAGCTTTCTTTTCGGCTATACTGTGGTTAAATGTTAAGTAGAA -ATTTAGAACAGCCTTCAAGGGATGTGGCAATTTAAGCGAACCAGAAAAATCGGAGGAAAA -TAGCTATAGCTTCCTATCTACACAGCATAGTGATACTGAGACTATTAATATACCCCTTTC -CACTGTTGATGCACCTACCTATGAATATTTCTTCTCATATACCTTGAATTTATGGCGAAA -TATTAGGCGTATCAGGTATCATACATAGCTTTTCCAGCTCTAAGCTCCAAGGATAAAAAA -AGACAGTGGCTTTCCACATTATTAAAAAGTATGTCAATAATTTTAGCATGAGTGACCGAA -ACGAGCATTCCAGGGACATATTCAATACATAAAAGCTGCCTTGCATAAAACCGGGAACTC -AAGTTTTTGCACGATAAGATCAACTTCCCCTCCACAACTCTGTTCAACACAGAATGTCCT -TGGTGCAGCATGTCTCTGCCATTGACGGCGTTGATCGGGAACCCGAAATAGACCTAATTG -ACGATTGCGACTTCGTTCCCATTCGCCGCAAAATAAGCTATGATGTCCTCCAACCCCCGA -GCGAGGTTATCCCGGTCGCAGAATTGCTTCCTGGAACACCAGCGTACAAACATTCCCCTG -CAAAGCGAATAGGTATGTAACGTTTGATATAGGGTAAGACAAACCGTTGACACCATCTAG -CTCAGGTCGTCGTCACTGTTCTCGCGTGCTGGAGCGCGTCGGGGATCGTGTTTGGCTTCG -CGGCGCTCAAACCTGTTCTCGTCGAAGAAGGTGTTTATCACGAGCGTTGCACGCCTGCGG -AAATAGACGAAGGGCTTGAGCTATGCGCACAGCAGGATCTTAGGTATGTTTGTTGCACAG -ATGACTTCGAGTTGTGTTTGTCGCTAACCACAGCTAGACTCAACCTGTTTTTCACCATTG -CTTCAATAACAGCCAATGTCTCGGCTCTTCCCGTAGGGACGATTTTGGACCGATGTGGAT -CACGCGTCTGCTGGTTTATTGCTTGCCTGTTGCTTGCCATCGGCGGTGTTATCATGGCAT -TTGCATTCCACGAACCTGGCTTCGATGGCTATATCGCGGGCAATTTCTTCCTCGCTCTAG -CTGGCACATTTTTATTTGTACCCTCGTTCCAAATCGCGAATGCGTTTCCTAAATATGCAG -GGTCGATCGTCGCGCTGGTGACGGGTGCATTCGATGCCTCTGCTGCTGTATTCCTCTTCT -ACCGACTCATATATGAGGCGTCGGGCAGAGCTTTCACCCCGGATAAGTTCTTCCTGGGCT -ACCTGATAGTCCCCGTGTGTATTTTAATCGCACTAGTGACCATCATGCCAGCGCGCGACT -ATGTATCGACCTTGCAATTGGAAAATAGGATAGAAAAGGCCGAAGATGCCACACGGGATG -TCCACGACTCCGACGATGAAATTGAGAGCACGTCGGAGCTCAACCGGGTGCGGAAGAAGC -GAGCAGAGCAGCGAAGGAAAAAAATCCGCCAAATTGACGCGGTGCTCGGTGATAAAGATG -AGCGGCAGATGCGTGCGGAGCGAGAAGAGGATCGCAAGCAGACGAGTTCGGTCTGGGGTG -TAATGCACGGCCTGCCGGCGCATCGGCAGATGGCTACGCCCTGGTTTATCCTAATCACGC -TGATGACTGTTTTGCAGATGATTCGCATGAACTACTTTATCGCGACGATTCGCTCGCAGT -ATGAATATATGCTCGGATCGATCGAGGAGGCAGATAAAATAGGCGCCTTTTTCGACGTCG -CGCTACCGCTCGGGGGTGTTCTCTTTACACCTGCGATCGGGTACCTTCTCGATCGCCTCA -GCGTGCCAACCATGCTTGGGCTAATTGTGCTTTTCACCACGGTCATTGGTGTGCTCAACT -CCATTCCGGCAGTTTGGGCTGGTTATCTGACCGTTCTGCTCTTTGTCTTACTCCGGCCTC -TCTATTACTCGGCCATGTCGTATGTTGTCTTCCACTCACTGGTACATTCCACGTGCTAAT -CTTGGCTCTAGTGATTATACCACCAAAGTCTTCGGCTTTGCGACGTTTGGGCGCGTATAC -GGTGCCATCATTTGCCTCTCGGGAATTGCCAACTTTTCTCAGTATGGGCTTGACGCATTG -ACTCATCATACGTTTGATGGCAACCCAATTCCAATCAATGCCGCCTTGGCAATCGCGGGA -TTTCTTGTTGGAACGGCACTCGTGATCTTTGTTTTTGTTTCTGTGAGCCAGATGAGAGAG -CAGAATCGGGTAGATGAGGAAGAACGCGAGAGGCTTCTCTTGgaagaggatgaagaagag -gacgaatacgaggatgatgaaAGTTACCGTCGACGGGATTAACATTCTAAGGCTGAACCG -AGGTTCAAAAAATTGAAATTCAATTTTCAATATCAACACTTTGCGTCCATCTATTTTGGT -CCTTATAGCGATTCTGATATGAGGATACTCAAATGCCTCACCTGGTGTCATTTGAGTGGT -CCCAAGAATATTTGGCCACTTATGGATTTCAGTGGTCTTTTCAAATATTCTACATATACG -TCTTGTTTTAAGACATTGTCTCTGTTTCATAATTCAACGAGAAGATAGAATGCGCTAGGA -GGCTTGTTTGTGGAAGCGATCTTTAACCTTTCGACCAACCGCTTGCAATAGAAGATATAT -TGAGTAGCCTATGGCATTTATGTAAGGATGTGACCATTTACTGAGGATTTGAGAGGCTTT -TCTCTACGAATTATACTTTCGGCATCAGCGACTATTTGGGCGATGGCTCAAGTTCCCCTA -TAAACTCCTTTCTGACTGGCCAAGTCCATGCCCACTGAACCACCTGACCAGTAGATAAGG -TCTCTCCTTGCCCAAGGAATTCCAGATGTAAGGGGAGTTTGTGGGTTTTTCGTGAATCAA -CAGTAAACACTAAACAAGCTTGGCTAGAACAGATCGTCCAAAATGACGTCTCTAGCCCTA -AATATCGGCCTGAAACCCGCAGAGAGGGGGACTAAAGTGTATCAAATTTGCCATTTGCCA -AGTTGCCAAGTACATTCCCCCGCATTTACTCCCTCCCCGGATTGTCCCCAGAAGTAGCCA -CCTAACTAGTCCAACTATATACAAGAGATGACTCGTAGAGAGTGCCAAGTGGAGTGACCT -CCCCCTCATTTGTTTCGGACATTATTACTTGCCGTACGGCAGATGTGCTTGTCCCGAGGC -ACACTACTACCACTTCCCGGCTATTGCGTTCTCGATGCTCCAGTTTCTTGGAGACTTTCC -TGGGCAGTTTTGGAGCAGGCTGCAATAGGCGAGCGTCGATCTTGAATGGCTGATTTAGGA -ATGGATTGCATTCTTTCGGGGCTTATTCCTCCTTGGAATACTAGATGAATATATACTTTC -TGATCCTTGCTGGTCCGGTCGTTTGCTAGATAAGGCTGCAGCTCTGTCTACTAATTCTAG -TCCTACACCATTGAATTTCTTTCTTCTTCTCATATCCCATTTCCGTCCTTTTCACTTTCC -TCTACATCTTATACATCCATTTTACCTTCCAATTAAGTTCGGATATATCAGCATGGGAAC -TATGGCTCCCATCGATGAGAAACCGCCTGCTGACCACCTCATCGAGGACATTGGAAGGAA -AAGTCCTCTCAGCGAGGAGGAAGAGGAGGAGTTTACACCACAAGAGCAGAAAAAGATTAT -TCGACGGATTGATCTGCGTCTGGTGACCATGACCGGATTGGCTTATTGCATTTCGCTCAT -GGACCGGACGAATCTGAGCATGGCTGCCGTGGCAGGGTAAGACTACAATCATGTAGGACA -ATCGTATGCGCCAAAGCCATTTACTGACTGTCATGATAGGCTGAAACAAGAATTACAGCT -GGAAGTTGGTCAGCGCTATGTACGTTCAAATATAAAGGGCACATTTCTGGAAGGTGATTA -GCTAACTTTTGAATAGTCCATTGTCGTTTTGATGTTCTTCGTGCCCTATATTATCTTCCA -GCCGCCGATGACGGTTATCATTCGCAAACTTGGCCCGACATTCTTCCTGTCCTCAATTAT -TGTATGCTGGGCCGGAATTATGATTGTAAGGGCTTTCTGCGTTCTCTCAACATCAAAATT -TCAATTTATTGACTGGCATAGGGAATGGGATTCGTCAAAGACTGGGGCGCGTTGGTTGGC -ACGCGTGTGCTTTTGGGTGTGCTGGAAGCTGGTTACTTCCCTGGTTGTGTTTACTTGCTA -TCTTGCTGGTATACCCGATGTAGGCAAAAACACCCCTCCAAGTGATTGCACAGTGCTGAC -CCTTCAAAGATGATGTTCAAAAGCGTTTCTCTATCTTCTATCTCATTGGATGTGTTGCTT -CTGCATTAGCGGGCATCCTTGCATTTGGTTTGATGCAATTGAACGGCAGGGAAGGGTTGA -CCGGTTGGAGATGGATTTTCATCCTTGAGGGAGTGGTATGCAAAGTTGATTTAGCTGAAA -CAAACCGAGCTAACAATAAGCATAGATTACTGGCGTTATTGGCATCCTATGCTTCTTCTT -CCTGGTCGACTTCCCTGACCGTGCACACAAGTCCTGGCGCTTCCTGAGTGAGAGAGAATG -CGCATTCATTGTCCGTCGCATTAACAGAGACCGAAATGATGGCGATTTGGAAGCCTTCTC -TCTAAAGAAATTCCTGAAACCTGCATTGGATCTTAAGATCTGGGCGTTTGCCATGATCTT -CTTGTGCGTTGGCTCTCCAAATAGACCCGAGTCCTGCTAACCGCATCTTAGCTCGATCAC -TACCGTCACTTACGCTATCGCCTACTTCCTTCCCATTATCCTCAACTTAGGCATGGGCTA -CGGCGTCGGCGAAGCCCAATGCCTTGTCGCGCCACCCTACGGATTCGCTGGAATCGTCAT -GTACGGCACGGCTTGGGTTGGCGACAAGTACCGCGTCCGCGCACCGATCATTATTTTCAA -CTCCCTGCTCGCCATCATTGGCCTGCCCATGATGGGATTCGCTAAGAGCGATGCCGTCCG -CTATGTGGGCGTGTTCTTCACTGTTGCAGGTGCCAATGCTAATATCCCATCCTGCATGGC -ATACCAGGCGAACAACGTGCGCGGCCAATGGACGCGTGCTTTCTCAAGTGCCACACTTGT -AGGCCTTGGAGGTCTCGGTGGTATCGTGAGTAGTCTAGTGTTCCGGGAACAAGATGCACC -TGGTTACCGACCGGGAATGTATGCTGCTATCGCGTATGTGGTCTTTCCTTCGGTTTCTGA -TCGTGTTCATTTACTGACAGTGACTCACCAGGTGTAACATTCTTATCATTGTGTTGGCCC -TTGCTTTAAGCGTGTGGTTCCGGTTCTGTAATAAGCAGGCGGACCGGGGAAAGCGGGTCA -TTGAGGGCGATGCAGCTTTCCGGTACACGATTTGAGATCGGGGGAATGTTTTTATATTCA -CATCCCGTATGAATTAAGATAGTAATGCGAAAGCAGGCCTTTTTCACTGCCAATATTATT -CTTTAAATGATGAATCGAAGCTCAAGGAAAGCTATGATAACATCGGGAGCTTCCCCCGTC -CCCCTCGGTGGGGAGGATTTGTGGAGTAGACAGGCAACTTTTCCGACGGGAAACCGGCCA -TTTACTCCGACTGGAAGGCGGTCAGTAGTTGAACTGGGGGCCCAAGGCAAACCGTAGCAG -CTGTGATTGGTTAGATAAGACGATCTCTTATCGCCGCTGTTAGTTTTCCGAAGTGTTCTC -CGTCATCGGATGATCGGACAGGGAAAAGGCAAGAGGCGTGGGCAATAAATATTGATTTCT -ACAGTTGTGGCACTTTTTGTCCTCTTCATACTCTTGTTTTATTGATTTACTGATTTTCTT -GTGGATGTGTTGAGTGCTTTTCTCTAAAGCCCTCATTACCCGGTCTCAGTATGGCGGCAT -CACTGTTTGCCCTTCGCGGCGGCCGACAGCTTGCTCTGCGGTCGCGCGTTCGCGTGCCCT -CAATTGCTAGAGCTTCATTGTCACCGTTGAATGCTCGGAGACCATTGCATACTTCGCAAC -CCGCCCCTCGTCGTGGTGTCTATACTAGCTCCCTCTCCGACCATGGAGATCCCCAACCCC -AGGATATCTTCCAACCTCTCGATACCTTCCCTCGACGCCACATCGGTCCCTCCCCGGATG -CCGCGGCAGAGATGCTCGCCGTGCTCGACCCGCCGGTGGCATCCTTGGACGACTTCGTGA -AGCAGGTCTTGCCCGCGGACATTCTCTCCAAGAAGGACCTCAAGGTTACCGACCCCCAAG -CTGATATCAGCTTGTACCGCTCCAGTGTGCAAGGTGGATTGGGTGAAACCGATATGCTCA -AGCTGCTGGACACCTACCGGAAGCAGATTGATATCTCCGGCAAGACATATATTGGAACCG -GCTACTACCCCACCATTGTGCCCCCAGTCATCCAGCGCAATGTTCTCGAGAACCCTGCCT -GGTATACCAGCTACACACCCTACCAGCCTGAGATCAGCCAGGGCCGTCTAGAGTCCCTGC -TCAACTTCCAGACCCTCACAGCTGATCTCACTGGTCTGCCCTTCGCCAACGCTTCAGTTT -TGGACGAGGCCACCGCCGCTGCCGAGGCCATGACCATGTCTTTCGCCACAATGCCCGCCT -CCAAGCAAAAGAAGGTTGACAAGTCCTTCGTTGTGTCTCACCTCTGCCACCCCCAGACTA -TTGCAGTGATGAAGTCCCGTGCCGAGGGATTCGGGATCAACCTCGTGATCGGAGATATCT -TGGCCGATGACTTCAAGATCGTCAAGGAACAGAAGGACCACCTGATCGGTGTTCTTGCTC -AGTACCCCGATACTGAGGGAGGTATCTACGATTTCCAGGCTCTGGGTGACTCCATCCACG -GCCAGGGTGGTACTTTCAGTGTAGCCACTGACCTGCTGGCTTTGACCGTCCTCAAGGCCC -CCGGCGAATTTGGTGCCGATATTGCCTTCGGCAGTGCGCAGCGTCTGGGTGTTCCCATGG -GCTTCGGTGGCCCCCACGCCGCCTTCTTCGCTTGTGCCGATAAGTACAAGCGCAAGGTTC -CTGGTCGTGTCGTTGGTGTGTCCAAGGACCGTCTCGGTAATCGGGCTCTGCGCCTGGCTC -TGCAGACTCGCGAGCAGCACATTCGCCGCGAGAAGGCTACCAGCAACATTTGCACTGCTC -AAGCTTTGCTTGCTAACATGACTGCCATGTATGCTATCTACCATGGCCCCGTCGGACTCA -AGTCTATTGCCCAGCGTATCATGTCCATGACGTCACTGCTGCGTGAGAAGCTCGTCGGCC -TAGGCTACGAAGTCCCACTCCGCTCCAACGGCGCTGACGGCGGCGCTGTGTTTGACACTC -TGGCCATTGAGCTGCCCAGTGTTGCCGAGGCAGATGCCATCATGGCCGAGGCTCGCGCTG -CCTCTGTCTTCCTCCGCCGGCTGGGCGGAAACAAGGTCGGTCTCTCTCTTGATGAGACTG -TTGGCCGGGATGAGGTAAAGGGAATCTTGGATGTTTTTGCTGCCCACAAGTCTGCCTCTC -CCGTCGAGGTTGATGGCACCCTTGGTCTCACCACTGTCCCCGCCAGCCTTGAGCGTACCT -CCTCTTACCTGACGCACCCAGTTTTCAACACCTACCACTCCGAGACTGAGATGCTCCGGT -ACATTCACCACCTTGAATCAAAGGATCTTTCTCTGGCTCACTCCATGATTCCCCTCGGAT -CTTGCACCATGAAGCTTAACGCTACCACGGAAATGATCCCAGTCTCATGGCCCGAATTCT -CCCAGATCCACCCCTTCATGCCTGCCGAACAGGCCAAGGGCTACACAAAGATGATCGACG -ATCTGGAGCAGCAGCTGGCCGACATCACTGGCATGGCCGAGGTCACTGTGCAGCCCAACT -CGGGTGCCCAGGGTGAGTTCGCTGGTCTTCGTGTTATCAAGAAGTACTTTGAGGCCAAGG -GAGATGCCAAGCGCAACCTCTGCTTGATCCCTGTCTCTGCTCACGGTACCAACCCCGCCA -GTGCCGCCATGGCTGGTATGCGCGTTGTGACCATCAAGTGTGACACCAAGACCGGTAACC -TCGATCTGGAGGACCTCAAGGCTAAGTGTGAGAAGCACAAGGACGAGTTGGCTGCGTTCA -TGATCACCTACCCCAGTACTTTCGGTGTGTTCGAGCCCGGTGCCAAAGAGGCCTGCCGCC -TGGTGCACGAGCACGGTGGCCAGGTCTACATGGACGGCGCCAACATGAACGCCCAGATCG -GTCTCTGCTCTCCTGGTGAGATTGGTGCCGATGTGTGCCACTTGAACCTTCACAAGACCT -TCTGTATTCCCCACGGTGGCGGTGGCCCTGGTGTCGGACCCATTGGTGTTGGCGAACACC -TGCGTCCCTTCCTGCCCTCGCACCCCACTAGCGAGTACTTGCAGTCGAAGCGTGGCGACG -TCTCCTCCCCTCCCATCTCCGCTGCTCCCTGGGGCAGTGCCAGCATCTTGCCTATCACTT -TCAACTACATCAACATGATGGGCGACCGTGGTCTGACCCACGCCACCAAGATCACCCTGC -TCAACGCAAACTACATCCTCTCCCGCCTCAAGCCCCACTACCCGATCCTGTACACCAACG -ACCACGGCCGCTGTGCACACGAGTTCATCCTCGATGTGCGCGCCTTCAAGGAGACCTGCG -GTGTCGAGGCCATTGACATCGCCAAACGTCTCCAGGACTACGGCTTCCACGCCCCTACCA -TGTCCTGGCCCGTTGCCAACACACTCATGATCGAGCCCACCGAGTCCGAGAACAAGGCTG -AGCTGGACCGTTTCTGTGATGCCTTGATCTCTATCCGCCAGGAGATCAGCGAGGTCGAGT -CCGGTGCCCAGCCTCGTGAGGGTAACGTGCTGAAGATGTCCCCGCACACTCAGCGCGACC -TGCTCACCTCTGAGTGGAACCGCCCTTACACTCGTGAACAGGCTGCTTACCCATTGCCTC -TGCTTTTGGAGAAGAAGTTCTGGCCTACAGTGACTCGTGTCGATGATGCCTTTGGTGACC -AGAACTTGTTCTGCACCTGCGGGCCTGTCGAGGACACTGAATAGAGACAGATCCAGAGAG -ATCTATGATACCCACGTTGCGCTGCTTGAATTTTTTTTTTTATAAGGGGGCTCGTGTTTC -CGATGCTCACCTCTTCTCTGTGTATACATTCAACATTTCTTATATTTGATGAGGGTAGAA -AAAGACTACCTACGGACTACATCCTTACGAGGGCTGGATTGATTCAACATAATCATCCAA -TATGCCAAGAATAATACAATTCAAAGAATCAAAAGTATTTAAGACATTCAAATCGATTCC -AATTCAAACCGATTTTTCAAGGATAAAACTAGCAATAAGTAGTATTATCAGATCTTGACT -GGCTTTTATAAACTTCCCAAGGTTGACGCTTTTCAGGGCCTTATCGGTGCTTTTATGACC -ATACGGAACCATCTCCTAGTAAATTACACCACCAAAAAACAACTCATCCGCTCCAAATAC -CGTATTATCTCTGTATCCCCATCGGAGACGGCCGACCCCCGCTCTCCCCCCAATAGCCGC -CGTGGGCCCTAATCACCAACAAACTCGCTACAACCCACTACGACCGACCTCATGCGTCGG -AGGAATATCGAGAACATTCGGATTAAGCAGGAACAACCTTGAAATTCTCCACAGCGCGGG -GTCTACGCTTTCCGGGTAATATCACTTATCGCTTGGCAAAAGGTTGAAGAAACCAGACAG -ACATATAAACGTGTGTCTGACCCTCTTTTCTCGCCTTGATTTTGTCTCGATCCTTCGATC -TTTTTGATTCCACTGTGGAGATTGGTCTACATTTGCATCTATAATATATATCGAGACTTC -AGATTCACTTTATTTGTGGAGAGCACTATACAATACTGCACACATTCATCCTCAACTTGA -TCACCGCGACCAAGAGTTGGCTGTCTAAATTATATCATCGGAAATATGGGCTCAACAACG -GACTGTCTCTTTGACTACGCATTTACTGTCTATCGCGGCACATTCATTGAATCGATTCTG -CCAGGCTCGGGGTCTAAGCCTATATTGGCTCGCAACCGAGGCGCATTATGGGTATCTGCT -GCTGATGGGCGCATTCAAGGCTGGGACTGGCAAGCCAGTGATGACAATGCGTTCGCTGAA -CTGATGACTCGGAATTATTGGGTTGATATCGATGCCGTTGCGGCCCAGGGCCAAGTCAAT -GGTCATGATTCAAAGATCAAAGTGAAGATTGTCACCACTAACGAGGAACAAAATGAATTC -TTCTTTCCCGGCTTTATTGGTATGCACTAAATCTATCTCCCTAAACCCCAACCAAGTCCA -AACCCCAAGCCTTGATACTAACTACATGTCCCCCCAGACACCCACATCCATGCCCCACAA -TACCCCAACGCGGGCCTCTTCGGCTCATCAACACTGCTAGACTGGCTAGAGACCTACACA -TTTCCAGTAGAGTCCCGCTTCGGGTCACCACCTGACTCAAAAACAGGACACCAAACCTCA -ACCGATCCCAGAGATGCACCCATCCTAGCCCAGCAAATTTACGACCAAGTAATCTCCCGC -ACTCTATCCCACGGCACAACGTGCGCCTCTTACTACGCAACCAACCATGTTCCCGCCACA -AACGCTCTAGCAACACTCTGCTACATCCGCGGCCAGCGCGCCTTCATCGGTCGCGTATGC -ATGGATAACCCCGACTTCTGCGTAGACTACTATCGCGATTTCAGCGCCAACGACTCACTC -GTCGCAACCCGCCAGACCATCGAGTACATCCATGCCCTGGACCCAGAAGGCAATCTAGTT -AAGCCGATCGTCACACCTCGCTTTGCGCCCACATGCACAAGACCTGCATTGCAGAGCCTT -GGCGACTTGGCAGCAAGTTACTCGCCTCCCCTGCATATCCAGACGCACATCTCGGAGAAT -ATAAACGAGTTAGCGCTTGTCAAAGAGCTCTTCCCTGAGGCGGACAGTTACGCCGCCGTC -TATGATAAATACAACCTCCTCACGCCGCGAACGATCCTCGCGCATGCGGTGCATCTTACG -GCGGATGAGAGGGCTCTGATCCGAGCGCGCGATTCAAAGATCTCGCACTGTCCGGCGTCG -AACTCCGTGCTGGGGTCTGGCATTTGTCCTGTTAGGACGCTGCTTGACGAAGGGATTACG -GTTGGACTTGGGACGGATGTGAGTGGTGGGTATAGTCCTAGTATTCTCGAGGCGGCGCGG -CAGGCTTGTCTTTGTTCACGACTGCTCGCTCAGTCGGCTCCGTGGCAAAGGGACAATCCC -AAGGATGCAGATGGGAATGGCTCTGATGGACGGGAGAAATTGTCGGTGTCTGAGAGTCTT -TACCTTGCCACGCGTGGTGGAGCGGCGGTTATTGATATGGCGAGAGATGTTGGTGGGTTT -GAGATGGGGATGTTGTGGGATGTTCAATTGATTCGCCTGGGTGGGGTCAAGTTGGCGAAG -AAGAGTCCGCTGGATTGTGTTGTAGGTGATGGTAGTGCGGATCTTGTGAAGGCTGGTCCT -GTGGGAAATGTTGATCTGTTCGGAACGGAGAGTTGGGAGGAGAAAATTCAGAAATGGGTT -TGGAGTGGCGATGATCGGAATGTAAAGGCTGTCTGGGTTGGGGGAAGGTTGGTCCATTCA -AGGGTTTGAGTGGTGATGTATCACTTGGGCTTGTTGTTTATGTATTATTGGTTTTGAGAT -TTCCTGTACATATTTATGATATCTCACTCAGATTCTTGTTTATATTTGAGATACTTTCCA -ATAATAAGGGCATCAACGCCATAGTTAAGTGAAATTAGACAAGCGATCACCATTCGCATT -CTTACTATTCCGAAAACTTCTGTTCTGCTGCTCTGCTGCTTCCTGAATCTCAGCCATTTG -CCCCAAATGTTTACTGGAAGAGCTCCACGCAATCGAAAATCTATCGGCATAAGTAAGCTT -CCAGCTAAAAGAGGTGAAAACCAGGAAGATACTTACAAAGTTGGGACTCAAGTAAGTCTC -TCCAGAGCTACCAGAAGCAACACTGATGGTAATCTAAATCAAGTTAGCATTATATCATCT -GACTATGCTCAAAAGTTCTTACGGTGTTAGTTCCAGCAACGATAGTTCCAGCCGGAATTA -AAACATCGTAGACCTCGCCAAAACCTCTATACGCGCCGCGTGTCACGCCACGGGAGTTGA -GATTAGTCGGGGCAGCCGGGATGGAACCGGAATAACTGTTGACCTATGTAAACAGTCAGA -TGCCTGCCAATACTTAAAATAGGGGGAATAGGGGTTCCACATACCACAACCTGGGGTCGG -CCAGACGCAAAGGAAAGAGTTGTGCCAATTCTCAGCGTCGCAGCTCCAGTCTGCGAAGAA -GATGCCGTGAATTTGATTGTCAAAGGGGAGTTCACCGATTTGAAGAGAGCCATGGGGAAA -GAAGAGACCGTGGAAGTGCCGACTGTATAGGTAGATGGAGACCAGGAAGACATGCGGGTG -TCCGAGGGATGCATGCGGAGCTGTTTATCCGCATTGAGGAAGCCGTTTGGTCTATACAGC -AGTCAATATCAGAGCTGCACCGGTTATGTATACCAAACCACTGCACTTACGTCCCATCCC -AGTCTCCAATCTTGAAGACTGTTGTTCCGGTTTTCACAGACCCGGAAATACTCTTCGAAG -TGGTTGATCCAGCGGTAACAGTCACCGAGGAGCTAGCAACGACATATTCGCCCTGATAAT -ATACCATGGTGTAGGTTCCGGGCTTCATGGCGGGGGAGGTGAACGCCCCGGAAGACGAAG -CGTAGGTCCAGTACTGTGCGGCAGAATTATACCTGTCTTGCCTTTAGCTACTTACCAACA -TGAATACGATCTGCATTGGACATACCAGTGAACCACCCATTTGAAAGCAGAGTCAGCCCC -AGATGCAGTTCCCTTAACATAGCCTCTTCCAGACGTGGGGACGTATCCTTTGATATCGAG -GTCGGCAAAGAACGATGTGTCAATGTTGGAGCTGGGGGTACCGCTCCGGCTGAAGGTCAT -CGAGTATGGACCGTGCAGGCCCTGGCGATAGGTCTCGGTTTGAACATGTCCGGAGTTCTA -TATAATGTAAGAAGCCATATTGATTTGAGAGACAGTGGTCACATACCATGTACCAATATA -GGCCGGTGTAAGAGCCAACATTGTTGGTATTGATATCACTGGTATCATTAGCCTGAATAT -AGTAACAACAAATGGCTGAGGTACTTACCGGTGGAAAGGTCCGCCAGCGGATGTCTCGTA -TTGGTTTAAGATCATACACACGCGGTGTGCGCTTCCAGAGACACCTATGCGTGTTATTTT -TGAGTCCACGAAACAGGGGGCAAGCGTTTTCTTACAGTGAATTTTGTCATCAATAAACCG -TTCGCTGGAATAGAACTTGCTTCGCGTTTGGCCACCGACCAAGAACTGACATGTGTTAGC -TGTATGTTTCACTCGAAGACAAAAGGTCAGCGGAAATTCACCACATCCTCTCCCTCAACA -ACACTACCCCCTCCAATATTGGAGACAACACCGAAGGGCTCTTCGTTGGGGAGTAGGTCT -GCATTCAGGCGCGCGATGTAACGAAGTTCTCCAATACTCGGCTCTAAAGAAAATAATGAG -TATCGACACTACAATTAAGCACAACAGACACGAACCTGCAGTTGTGTATGTTGCCATATG -GATAGTTGAATCTCCATTATGGACAACATAGTAATGTGTCAAGGTGGATGTTTCACATGT -CACTTTAATGTAATCGCCTATTTTGATCAGCGCAGTTTCCAGAAGACATATTCTTTCTCA -ACACACCACTCTGTGTCGCAGTCACAGTTGCGCTACCAAGACCCGATCCGATATGAGATG -CAGTCGACTTGTATTGAAGCTCAGATCCATAGTAATTGATCGAAGTAATGTCACAGTTTG -TCCGGCTGACAGTAAACTTGAGGGGGTTTGAAGAGCCGGCATCAATTACGTAGGAAGCAG -TGCTAGTGGTAATTCCGAAGGCTGCGGATGCCTGCTTAGCCAAAAGGGCTAGCGAAACAA -CGGCCAAGGACTTGAAAAACATCATGTCCAGCGTGAGCTGTAGAGGAATGTACTGGTGCC -ACGAAATACATGGAGGGCAGACAACCAGTATAATATATACGAAAGAAATTCTGTTTCTTC -ATGCTGAAACTCTGCATGCTGAAAGGTGGAGATTTGGCAGGTAGCTGGACCTCCGTTCCT -GAAGAGTTCTCCGTGAATTTGGCCATACTTCCACCAATGCGTGACGTTTCTGGTTCCTTG -GCAGTTGGATGATTTCCATTTCAGGAATGGACAGTCTGCAGACTTGACGCAAAGGCCGCT -GGTTGAGAATAACCACAAGATCAGCTCCTGTGAGGGGCGCAAATTCCCCTGAACGAATGC -AGCTCGTAGTCTTATTCAAACCCCCTGCAACTCCGTGTGGGACCCGTGTGGGGGATGGTT -GTTTCAATGTCACGGGGCGCCTGTAGTAGTCGAGAAATTCGATTCAGCTTGAGACATCTT -GAAGCTTGATGGATATCACGATCTAAACAGGTAAATTCCCCGATCACTTTGATTCGACCA -AATGGTAAAGTATCATTCCAGTGTCCCGAGTGCGGAACTCGTTTTCCCGAAAGGCCGAGG -GAGCTATTAGAATTTTGGATGTCCTCTAGAGGTCCTTAATATGTGTATTAATGTTTTTAA -GGATAAAATGAGCGACCTAACTGAATATGCAGTCGAACTATCTTGAAAGCGCCCAGAATT -AGCCGCCTTGGAATTCAGGCCAAATAAAGCTTAGTTAACTATCTCCCATAAATGAGGTTT -TTGTAGCTGTTTTGGTTGGGAAAGCTTTTTCGATTTTACAAAAAGGGAATACCCAGGCGA -CCAAGAAGATGCATAATTATGATTGGAAAAAATGATCACAAACACAATACTACGTGTAAT -CATCTGCTCAAGGACTGGTATTGAATGGATTTCAGCCGTCGCATATAGCAAACAAAGAAA -GGACTGTGAGTTCCTGAGAAACCGGGTAATACATCAATGAATGAAACAGAAGTGCCATAA -TATTGGAGGCCTCAAATAGCAAAAGGTCACAATCCAGCATTTCAAATCCAAATATAGACA -AATGTGTCCCAGATGGGAATTCACCCACCGTGGCGTAGGGGTCGATCAGACAAGTTGACC -AAACACGCTCAAAGGCACGGCTGAGAATTGAGTAAGCTTATGCTTTAGAATCGGAGTGTC -AAGAAAGTACTTACATGCCTTCTCACAAGCTTTTCTGCTCTGATGGAGCGACTGGCCTTA -AAGGCCACACTCGGGGCCCAGGGCGATTGGCTTCCTACCACCTCCCCCTTTCTGTATACA -TTGAGGGTCAGCCGAGGCGTCTGCAATTGCAGTAGCTAGAAGCATACTGCAGAGGGTGAG -GATGTTGCCGAGTTGGAAGTACTTCATCGTGTATAGAAGAAAAGTAGAATGTGGCGTGGC -CGTTCCAAGACAACAGCACTTCGTCGCTTTATTTATATTCCTAATGAGGGCAAATTTGGC -CGCTTATCATAGATAAGCATTGTAATATTTCTTTCGGAGGCAGCTTATTCAGACGAGCAG -GATTATTTATTGATGTACATTTACAGGTCGAGATCAGCGTTGTGTGAGGCCCGAACAAAC -CTCCGATCTCCCCGAGAGCATGGGGTTGATTGGATAGCATTCTTCAAATATGTGGTACGT -TGTGGTAACAAAGAACAATATCACGTTAATGAGCGAAAATCAACGGGGATTATTACGTGT -CTCATCGCTTTTCCTACCTAAAATGGGGCTCAATAGACGACATCAAATGTGTCTATCAAC -AGCATTAACCTTGCTTCGGTATTTTCTAGGTTTCTTTTTGAGTAAGGATCGCTCCAGTGT -CTTTTTATAATTGGAAATCAAATGCCCATTTCTTTTTGATTGGCAGAGCACAGGGATTTC -CTCACATTTGATAGATACTCAACAGAAAATTTCAAACCGATGTATGCTTGTTTCAAATAT -GGACGCCGTAATAGAAGGTCCGGTCGCTTGTGTGGCTTTGTAAGTTCTTTCCGGCAGACT -TTTGAATGCCTCCTTGGAATCTGAAGGGTGAGAGCGGGTATATTGGGTGCTGGTGCATCT -TTCCATCATGGATGAAAACAGGGTATGCGCCTGATGAGAAAGCCAGAAGAAACACACAGG -CATTAATATGACGATGGGACATGTATTTTCGACAAGCACACAAGGTGAATGCCGGTATGC -AAATCCAAATGCTTGCTTATACTTCTTGATCAGCGCCATTTGCGACCATATCGGTGAAAG -TAAAATTCTTGGGTTCATTGACTTGGGTCATTGAGGACGGATAAGAACGGAGAATAGAAG -TGTACCTTATTAAGTATTGCTGTATATACATATCGAGCGAGTCCTGTCATAATAGTCCCC -TAGATGCTCAGAGCCCCAATATTCTTCGCATACCAACGTTCTCAACGACGTAACCTCCTC -CCAATTTGAACACCCCTGGACCTTGCAATCTTCCCTCATGGCGTCCCATAGCCATTTCGG -TCAAGAATGACCCAAGGTGCTCCGTTGGAGAATGCATCGACTTCATGAGATACCGTACCC -CTGGTGCAAGCAAAGCCAATCCAACTCTTGAGACAACTCCAGTCACTCCACTAGACGGGA -TATACGATTTAATGGCATCGTGCGTTGCTGGATCAACGAATGCAGGCCTCACACTGTCGA -CACGAAAGGTCGACTTTTGCTCTGATAGCTCCCCGAGAAGCGTTTCGGTGTCCCCTTTTA -CTCTCGCAAAGATCGGTGAGAATCGCCCAGGAGTCTGTGTGGCTCCTTCGCCAGATACAT -GGATAAATCGGAAGGGATGGTTTGATGGTTTGATTGTGGAAAACGCGTTGGCAGCTGCCA -GGGTATAGTCCTTAGTTATCTTAACGTATTCCCTATCAACTATTAGTTCTGATATCCATT -TCTTTCAGGCGACGGCTCGTACTCTTTAGTGACGTTGGTCTGGCTTGTTCCCAAGGCCCA -AACACATCCATCCGCATCTTTCAATTGATCAAGCAGCTCTGGCTTGTATGATTCAAAGTC -TTTGTGGGCAATGACATGAACACGTGGATCCTTTGCGTCTTCTGCCATTCGCACGGGGCT -TCGGCTCAAGATCGAGATCTTGGACACTGCTGCATTTTTGAGCATTGCGTCGAGTACCGA -AGACCCGACTAGCCCAGTTGCCCCTGTGAGGATAAGGTGCATTTTGTAGATTATTAGAAA -TGTGAGGATTGATGTGAGGAGCTGGTATCAAAAGATGCATAAGGAGGGAACGTCTATTAT -ATACCTACAAATTTTCTCCTCCATTCCAAATTCCTAAGTCCGCGAGAGAGCAATATTTTT -GCCCAGGACTATGCGGCTGATGCTGACAGTGCACTAGCCAGATACGGTTAGTGGATGTAG -TACTAATGAGTATCCCAGGCAGTGTTGATTGGTTCCAACCTCGGATGACTCGCACTAACC -GTATCTGGATAAACAGAGATAGGCAGAGCCTCAGAGGGCCTTTTGTTCTATCTATCAGCT -CAGCAGTGATGGATACCTAGGTAGTTCCCAGTCAAGCTAGAAATATCATCACTATGGGAT -GATCTTCTTCAGTCCACAAGATTTGTCCTCAATCACCAAACCAGGGGGGGGGAGGGGGTT -GATCTTTAAAGATAAGCTGCTCGTTAAGCTTAAATATATGGTCCTCTCAAGTGCCTCACT -TAAATTGTTCTCATAGGTCCAAAAGCTGTGAAGTTTGCCATAAAGCTCTTTACCTTCCCT -TGAAATTTATTCGCTGTAGCAGGAGAAACGGGTTCCATCGTACCTTTGTTAACATAGCAG -CATATTACAAAGTAGGAAAGTATAGAACGATAAATTATCAAAAATTGCGTTTGCTGGGAA -GATATATATCCCTTCTCCTCCATGTTTCTTTCTTCTTATCCGCTTCTGTTTCTTTGGGAA -TCTCTTGCGCCTGCCGGCCCCCAGCTGTGTGAGAATAGGTGATGTCAAGGTCTCCTTCCA -ACAGCCTAGCCGAGTTCGGGAATTAAGAGGTAGAAAAACAAAGATGGGTTGTACATCAAT -TTGCGGTGTAGACCACCAGGTCATAGTATTTGCTTTTTCTCAGGGAAATGTTGAATGTTA -CCCACTGACGATATACCTTAAATATATCACCAGGCCCGAAATCTCACGAATCTGACAATT -TATTGATTCAGTCATGTTATCTACATGCACGAATAATCCCCGGCGAAGTTATCACGCTTG -AAGCCTCTTCCGCGCTAACAGATTCGCCATACACAGCCCTTCGCTTCCGCCTAAACCCCT -TTTTACTATGCGTGCTAGGTTAGGTCCGAACTCCGAACTCCGACTTTTTCAATGTGACGC -TGACCAACCACCAATATATAAACAACTTTGCCATTGTTGATGTTGCCTCTGGAACAAATC -TAAATCATCACTAAAATGTGGAAAGTTTGTCTTGCTACTTTAATCAGTGTGGGCTATGCC -TCACAAACCGTGCTCCGACCAGAGCCCTCGGCGTCGGACTTCACCACGTTCCGAAGTACA -AATTCACCCCATTCGATACGCATCCGACAGCAAAATGAGTCTATCTGTGCTGCGGGCTCG -GCGCAGTATACAGGATGGCTAGATATTGGCCCCAAGCACCTGTTTTTCTGGTACTTCGAA -AGCCAGAATGACCCTGTCGCCGACCCGCTCACATTATGGATGAATGGTGGGCCCGGGAGC -TCCAGCATGTTGGGTCTGTTCGAAGAGAACGGCCCATGTCTGATAAACAAGTACGGAAAT -GGGACTGTTCACAATGAATGGGGGTGGTCTCGAAACTCCTCATTACTTTTCGTTGACCAG -CCAGCCGATGTAGGCCTCTCCTATATTGACGAGGGGTATGAGGTCGCGCGCGATTCTCAA -GAAGCTGCGGTCGATATGCGCCGTTTTCTCCAAATCTTTGTATCAGAGGTCTTCCCCAAC -AAACGCGACTCGCCTTTCCATATCTCCGGTGAATCTTATGCAGTAAGATTAGCCCCCTGT -TACTTTGTACGTACCACTGACACCTTCAGGGCAAATACATCCCCTACCTTGGGGCTGAGA -TTGTGACTCACAACCTGCAATATCCCTCTGAGCCCCAGATCAACCTCAAGTCATGCCTTA -TAGGCAATGGCTTCATGTCATCCAAAGACATTACTTTCGGGTACTGGGAGACGCTTTGCA -CGACCAATCCCGGCGTTTCTGAGCCCGTGTTCAACCAGACCCGGTGCGATATAATAGCTG -CGAACATGCCGCGATGCATGGATCTCTACGATACTTGCAACACCAATCCTGACGTTGTAA -TCTGTAATGCTGCATATTCTGTTTGCTATAAAGGGATCGTCGGGTTGTACGAGGATGAGA -GCAAGAAGGGAGGCCGTAACCGCTTCGACAGTAAGATCTTCCAATCTACGACCAATTAGC -CAATTCAGCTGACACAAGGAAATTAACAGTCACTGCCCCGTGCTACATTGACGAGATATG -CTACGAAGAAGCAGGCTTTATCGAACAGTATCTGAACTCCACCGTGGTCCGGGAAGCCAT -TGCGCTACCGGAAGGGGTCAAGGGGTACGCATTAGAGTCCACCGCGGTGGTTGACGCCTT -TGCAACAACACCGGAGGCGATGACATCTTCATCCGATCATATCAATTTCCTGCTTTCCCA -TGGAATACATTTCCTGGCCTACCAAGGAAATCTTGACCTGGCTTGCAACACAGCAGGAAA -CCTCCGGTGGGCTAATTCGCTTGCCTGGAAGGGACAGACTGAGTTTACATCGAAACCGAT -GCTTCCTTGGACCTCAATCGTCGCCGGAAGGAATGAGACGGTTGGCACCGCGAAAGAGGT -TCGTGTGCAGTTGGAGGGCCAGACAGAGGCAACGCGGTTTGCATTTGTCACAGTTAACGG -GGCTGGTCATATGGTGAGTTCATCGATCCAGACTTGTTAATTTGAACAGGGGCTGATGAT -TATGATAGGTTCCCCAGAATCGTGGTGATGTGGCGTTGGACATTTTGACCAGATGGATTG -CGGGTGATTCTTTTACTTAGTTTTGAAACTATCAGGGATTTGTAGTTCTCGAAAGATGGA -GTCGCCGTTGTACTATCTTAAAAAAAGGAGCCCGTGTGCATCTGGCGTTATTCTGGGCTC -TTGTTTCTCATAGACAATAGACACCGTAAAATTCATATACTTGCGCCGTATCATATTCTC -CAAATTGGAACAATGCATTCGTGTACCTAGTCTTGATTCTTGGGACATTGCTCTTCCTTG -TGCTTAATGTTATCTCTCTGGGTAGAACTGGCGATTATATTTGTCTAGATATGTTGTGCC -AACACTGAAAGGAACATCCTATTCGATGTAATCAGTGACGCTTGCCCAGTTGCTGTGGAT -GCTGTTTACTTCATCATTATCTGACAATTACGAAGACCCTCCCTCTTTCTCTCAATTGAA -TTGGCAGGCTCGACTCAGGAGATAGGTATGAGCCTTGGAAGTAATCCGAGACTGGGTTTT -CGTTTGGCTGATTGCTTCTCAAACACTAGGATCCTCGGTCCTTGGCTCGAGATCATCATG -AGTTTCCCGTTCGAATGTGATATCTCACGTGCGACTAGGCGCTATTGCTCTAGCATAATG -GGGCGTAGTGTTGTTCTAACCCATACGGGGAAGCCTTTTGTCACTGAGAAGATCGAATAA -GCTACACACCCAATGAGCAGTAAAGCTGAGCCTTTGGGTGCAAAGGCGGCGGTGGCTGCG -CCTCCAAAATATGTTGCCTTATCTTTGTATTTGCATTTATTGTACTGTTATTTAATTTTG -ATGTTAATTGGTTACGATATCAAACTTGAGGGCATGATTTAAAGATCTTCAATATTTCTG -CATTGGACTGGAATGACAGCTTGAACGATGATACACCGTCAATCAACTGACAATGAGCTA -CTTAACGAACAGTATATCCACCATCAACAGGAAGACAAACTCCAGTCAGCCAGTTGGCGT -CATCACTTCCGAACACAACAGCTAAACGAGCAATATCCTCCGGCAGTCCGGGCCCTTTCA -GCGGATGGCGCCTTTTAAGATCCTCCAATGGAACCGAGTTTGTGACTGTTTCTTTGAACA -TAGCCGTTTGAGTGTCTGAACTTTTGTCAGTGGGTGGGTGCTTTTGTCGAGTTCAGAACC -AATCGACTTACAGCCAGGACAGATGGCATTGATATGAATATTATGAGGGCCATAATCTAG -AGCAATCTGCCTCGTGAGGCTGCTGACAGCCCCCTTTGACGCACAGTACGATGCTATCTC -AACTGTCAGCAACACCCAGACCAACATATCAAGAAGTTATTAAACCCACGAAGCTCCGGT -CCACCGACCATACTCATAATAGAAGAGATGTTAACGATCCAGCCCCGATCTCCGGAGGAA -TGAGGTTCTTGCGCCAGCATTTGAGTAAGAGCATACTTGCAACCCAGGAACACAGATTTA -GCGTTGATCCGCATCGTCGTTTCCCAGATGCTTTCATCAGTCAGATGTAGTAGGGCAGGC -GTCCGCGACTCCAGGCTGATCCCAGCATTGTTGACCAGACTAGGACAAATCGTCAGAGGG -GTTATTCAAGTTCTTCATTATTGGATTTTGATACATGTCCAGTCGTCCATGTTGCCTCGC -CGCAGATTGAATCAAGTTCTCCATCTGCACTGCATCCCCAACATCTGTCTGCACAAAGCT -CGCCTGCCCACCGTCTCTGGTGATCAGATCGTGCGTCGTGATCTCTGCCTCCTCTTGGGA -TCGCGCAGTTGGCGTTAGATCGGCACATATCACCTTTGCCCCCTCTTGGGCATAGCGAAT -GGCGATTGCTCGTCCCATGCCGGATGAGGACCCAGTGACGATGGCAACTTTGTCTTGTAG -GCGAGATGATAGCGACATGTTTGCTTAGATAGTTCACGAGAAGTAGTAGTTCAAGGTGAG -ATGATACTGAGGAAAGAGGGGTATGGTCCGTGTCAGATGCCCTGGCTTTACACCCTGGCA -GTTTATTGGGTTGGAAACTGAGCCGAAGTGATCGTGGAATTCATTTATCCTTCATGAAAC -ATAGATTACATTTTTAATTGTTGCAGGTTGCCAGGACCTATAGATTATATTCGTAAATTG -GGTTCGGCCCGATTTTTATAGCCGAGAAGTATGGGGATACGGGGACGTAAATAGAAGGCC -TGCCGAAAAGTCTCGCTTCGCCTTCCTTTTTTGGGGCACAAAGTGGTCTACCTTTCTTTA -CCTCACTACAATGTACTTCAAATATAATTAATTACAGATCATTATTCGAATAATAATGAC -AAATTAAAATATGCGTACGGAGTACAATGTCCAGCCTTGGAATGTGTGTGTTTCCCCTGA -TGGCGTATATTGCCGGGTTGGATGTCTACAGGACAGCAAGGGGGACACTGTTGTGCAACC -TTCCACGAAGTTCATAGAAGCATGTGTTAGGCTATCATAACAAATTGTTCTTCGAATATC -CAAGTTTACTAGATACAAGGTGGAGCTTAGGTTAACCAGCCTGAGGTTGTGTCTAGATCT -CAAGCTACATGTAAGTTAAACCCCAACCCTCATACGGAGTACAATCTACCATCTTCACAC -AGCCAGTAGGACAACATTGCGTCTACATTCATTTTCCCTCCTACTTTGGTATATTTGACC -GGACGTGTGTCACATTATAAATTGAAGGGGgaaaagaagagaagagaagagaagagGGGG -CTAGTTGACCGGCCCGGAACTCCCGAGATAGAGAAGTGCGTGGAATAATATTTCCATGTC -AATCAATCTCAGCCACAGTACTCCGTACGGAGAACAATATATTTTATAGTGCGGGCTTTT -TATTCAACCTTGTCTTAACATTTCGGACATTCCACTATTCCTGGTTCCAGACATGATTCT -GACAAGCTCATGAGACCATGATAATGTACGGAGCATAGAGTAATTAGAGGCAGAGATAGG -AGGCTAATTAAACAACTAAAAAACAACAATACTAAGCTCCGGCTATTGTTCGGAGTCCGG -TTGACCATTGCTTCTCTCAAGGACCAATTATAGACTCTTATTCCACGCCCGGTTTCGGGA -TCCACCAGGCTTGGAGAAGACATCGCGTGGTGCCTTTATTTAGGTAAGACTTGATGCGGA -AATGGTTGCTCAGCCTTGGAGAAACTCAATATATCGCATTCGATGACCACCATCTCCGGA -TTTCAGGAGGTCATCGCTGAATCTGTCGATCGCATTCCTGCCGAATTGGGAAGTGATAGT -CAATATGAGCTAGCAGCGTCACTAACGCAATGCACATGCACGATTATACGATAGACCGGA -GGAGAGGGCTTGTTGGCAATAGATACGAGAGATCAATATGGTAATTCTCTAATTAATACG -AGACTTACGTCTTAGTTCTGTTAGGTAGGTTACATCCAACTCAAACCCCCCAGAAATTAG -ACCCTGCGACCTCTCGTCCCAGGTCATACAGACACAGCTACTGGCCGCACGCAACGGTCA -TGTCTCGGCTGTGCCTGCGCAGTTATGCTCACCCGACTTTGATCCTATCGGGTGCCCCGA -ATCCACAATCCGTCTAACGTTCTGGATGGCTATAGGCTTGCCATAAACGCAACCATATTG -TTCTTATCGTATTGTTCTCGGGGTAATATCCTAATGTACTCTGATTTGCGTTATCGGCTC -CTTAACAGTAGCTCATCTCCGCAGTGCATATATATATTGCAGGACGGACCCAGGAGTAGA -GATCATCGAAAGCCTGCTCTGTGTCTGTGTATCACTGACTGGGTTTTTGGGAGGCTTCTA -GGCCACTGTTAATATGCATTTTTCCACTGCGGTTCTGGCTGCGCTGTCCGGTACAGCTGC -AGGAATTGTAACATCGCCATTCAAGCGCGATCTGAAGCTTTCCGCTGAGCTAGGCATTCA -CCCGGACATTTTGCTCAATCACAAGACTTCGGTACTTGCTCTTGCTGCCAGTGCCCAGGT -TGATACGACCATCGAGACGGAGTGGGCTTCGGTATGTGTCTAGGGTTTTTCAAGTAGCTT -GAAGGCGGATGTAAATGCTGACAGTTCATAGCTCCCAATTGATCACAGCGACTCTTCGGT -TGGTACTTACAAAAACCGCTACTGGGTGTCCGAACAGCATTACAAAAAAGGAGGACCAGT -CTTCGTCTACGATGTCGGTGAGGCAACGGCTGAATATTCAGCTCAAACTTATCTCGGTAA -TTCGACTACATTTTTTTATAAGCTAGTCGAGGAGTTTGGTGGAATCGGCATCGTTTGGGA -GCACCGGTTTGTGTCCTCTCCCATGTAAAATTTCTAACTCAATTAACCCCAACTAGATAC -TATGGGGAATCTCTTCCCTACAACGTCAGCATAGAAACAGAGCCTGAGCATTTCCAGTAC -CTCAACAACAAACAGGCATTGGCAGACCTACCATTCTTCGCGGACAAGTTCACTCGTGCA -AATTACAGTGACGTGGACCTGACCCCAGGCGCAACTCCATGGGTTATGGTCGGAGGTTCG -TACTCTGGTATGCGATCTGCGTTCACTCGTCACTTGTATCCTGACACGATTTACGCCGCC -TACGCTTCCTCTGCCCCCGTGGAAGCACGGATTGACATGAGCGTCTACTTCGACCAGGTG -TATGACGGAATGGCAGCAAATGGACGTTTGAACTGCACCAGGGACATTAAGGCTGCACTA -GAGTACATTGATAGGCAACTCTCGAAGAGCAAATCGTCCGCGGCGGCCATCAAGCGTAAG -TTCCTCGGTGAAGGTGCCGAGAAAAACAGCAACGGCGATTTCTCCTCCGCGCTTCAGACA -ATCTACTGGTATTTCCAGTCTTATGGCTTCGGTGGCGGCACAGGCAGTCTGGAATCATTC -TGTGAGCACATGGAGACCGATCCGGAGACTAGCGAAGCGGCACCCCCACATGGATTCGCC -CCAACCCGGGGTAACAAGTATGCCGCGGATCGATATGCATCATGGCCGGCTTTCACGCAA -ATGACCAATAAATATTTCGAGACAAACTGCCGAAAGCTTGAAGCCAGCGAGCCGCTTGTG -TGCGATCTATCTCAGCCTTCTTCCGAACCGGACACCATCAGCTGGACATGGCAGTACTGC -ACTGAATGGGGATACTTCCAGACCAACAACTTTGGACCTCACTCACTGCTGTCAAAATTC -CAGACTCTTGATTACGCGCAGGAGTACTGTAACCGTTACTTCCCTGAGGCTATCAAGAAA -GGTCTGTTCCCCAAACACCCGCAGGTTGAAGCGACCAACGCGGAGACGGGCGGATGGACT -ATTCGTCCGTCCAACGTTTACTGGAGCGGCGGACAATTTGATCCTTGGCGTACTTTGTCT -CCCCTTGCGGCTGGTACCAGGTTAGCCCCGCAAGGAGTGGCCCTCACAACGGAGATCCCC -AAATGTAACGTGGAGACGAATCAGAGCACGGTCTTTGGCTACATCATGCAGAATGCGGAG -CATTGCTTTGATTTCCGCACGACATTTGCACCGGGTGCGATTTCCCGAGGTTATTTTACC -ACTGCCTTGAAAGAATGGCTTGAATGTTTCTGAGATCGACATGGTCTGATCCTATTTGGA -TTTTATTAGTTTGACACTATTCCCCTTTTGTATCAGCTTGCGATCCCACTTTCTCTCACG -TGGAGTTCTACCGAGGCCATGAACATATATCAACCTTGCTGTTCCTATATTTGATGACTT -ATTGTTTCTGTCCATGGAGATATCATTTTAAACAAAAAATATGCCTGTCAGCCCTATAGT -TGCCGTGGTTGCGTTGTCTGCGGCCTTACTGGTCGTCCGCTACTTCTTACAACTGCACCA -CCACCGTAAACAATCCCAAGCTCTACATTGCAAACCTGCCCGTTTGGGCTCCTCGACTTT -CTTCGGGATCCCAGGTTTTATTCGTTTGACCAAGGCTGCCCGTGAAAAGCGTTGGATCGA -TCATCTCTACGAGGAATATGCTGTCTATGGAAATACCTACCAACAGAAATTCCTGTCTCG -AAATATTTTGACCACTATCGAACCTGAAAATATCAAAGCACTTCTCGCTACCCAATTTAA -AGATTTTTCTCTTGGCACAAGACATCAACAATTCTACCCACTTCTGGGCGATGGCATCTT -CACCCTCGATGGTGCCGGGTGGTCACACGCGCGGGGACTATTGCGCCCGCAATTTACTAG -GGACCAGGTAGACTTGATTGAATGTTTAGGGTTATGCACACATGGCTGACAATGATAGGT -CGCCGATCTCACCATGCTGGATGATCACATCTCCAACCTGATTGACCTAATCCCCAAAGA -CAGAAGCAATTTCGACATTCAGCGCTTGTTTTTCCTGCTCACCATGGACTCAGCAACGCA -CTTCCTATTCGGGGAATCGGTCAGCTGTATGCTTCCGCCTTCTGAAAAGGCAGGTGTACT -AGAAAGCTGCGCTGTAGGCAGCGCACAAGGCTTCGCCAATGCCTTCGGAACCGCTCAGGA -TTATCTCGCTGCACGGAGTCGCGCTCAGGGCTTGTATTGGGTTGTCAACCCCAAAGAGTT -TCGCGAGGCAAACCGAAAAGTGCACGAAGTTGTCGATCATTATGTGAATCTCGCTCTCGA -GTCAAAACGTAACCCCGACAAGAAAAACGCCGACGGTCGGTATATCTTCCTTGAAGCCCT -GGCGGCCGACACCGATGACCCCAAGATTCTACGCGATAATATGCTCAACATTCTGTTGGC -GGGTCGTGACACAACAGCTAGTCTGTTGAGTTCAACGTTCTTCTACCTTTCCCGTTACCC -CAATGTGTGGAATCGGCTGCGTCGTGAGATTGTGGATATTTTCGGAGACGCAAAGAACCC -GCGGTCGGAAATGACACAGACCAGACTGAAGGATATCGCTTATCTGCGGTATGTGTTGAA -TGAAGGTGAGTTGATATTGCACTGAATATCACAGTGGATACTGTATGCTCCGTCGACAAA -TATACGACTAACTCCAACACAGTCCTTCGTCTCCAACCCCCTGTGCCTATCAACTTCCGA -GTTGCAACCAAAGACACCTCCTTACCGGTCGGTGGAGGTGCAGACTGTCAATCCCCAGTC -TATGTCCCCAAGGGCACCATGGTTGCCTACAATGTCTTCGCTATGCACCGCCGGACGGAT -CTATGGGGTAAGGATGCGACTACTTTCCGGCCCGAGCGCTGGGAGGAAAATGCTAAGCAC -GGCTGGGAATATCTGCCTTTCAATGGTGGGCCGCGCATTTGCCTTGGTCGTAAGTTGTGC -ATATCCTTGTCTTTCTCTGGAGAGACGATAATGAAAACCCTTTGGAATGACTCACTGACA -ATTCAACACTTAGAACAATACGCTCTCACCGAAGCAAGTTACACCCTTGTTCGTCTAATG -CAGCACTTTGATACTCTCGAAAACGCGGACCCCCATCCTCGACAGGAGCCTGTTAAACTA -TCCAACCTTACCATGTCGCATGATCTCGGCGTGCCCGTTCGTTTGTACTCTTCCGATAGG -ATTTAGGTTTCTTTTTTTCTCTTATGATTGAATGCGTATATGGATATTCTAGGGGAAGGA -TCTCTGTTTCTGATGTTTTGTGGTTATGGCTGTAAGGATATTTTGGTACTCATCTGAGTC -ATCTTTTGTACGTTGTGCCATGAGTTAATTTCAAATTATTGCTAGATCATATGAACAAAT -CACCCTCGCCGGATATCGAAGTCCTTTGGTACATTTAGTTTTTCCTTTTGTTGGGTGGGA -GCCCTAGGGTTTAGACTGCCACCTAGATGCTACTTGGTCCCAGACCTCGGGAGATGGTTT -CCTCCTAGCCCGATACATGTATACACATATCACTAGTATTCTTTTCCTTATCCCATTAAT -ATTGCACGAATAGATCGATTTAAACATATACTCGATTCCTGATCCAGACGATTTAGCAGT -CCGATATGCCGTATCACATATCGGGTCCAAGTCAGAATTCATCTATGGATGTTGTCGTAT -TCCCTCGGCTTGGCAAAGGGGGATATGCGGAGTAGAGCCCCAACGGTTGTACGGTTGTAA -GAGCCTCATTGTTTGCCTTGGCTGGGCTGTGCAATGCGCTAGCAATGCGCCAGCGATGCA -CTAGCCCCTAGCAAAACGGGGGCCACAGGTCCCCAGAGCTCCGTATGATCAACCTCTGAG -GTCCTCCTCGCTTTTCTTTTTTTAAACACATTACAAGTGGACAAAGTATTGTGTCTTACT -CAGACAACCCCGAAACAAACATCATGTCAAATTCGCCAGATCACGAAGAAAATGATGCCA -TCCTGGTTGGCCCAGACGATGTTCGTGACTTCAACGTGGACAACGTCCTTCCTCTCCCTG -CCAGCGACATCGTTGAGATCAGAAAATGGCTTCAGCCAACCGCATATGACCTCGAACGAA -GTGAATACTCCAGGCATCGTCTTTCACATCTCGCTGGAACAGGGAAATGGCTCACCTCCA -CAACAACCTATCAAAGGTGGCACCAAGGCGACGAAAATGGAATCCTGTGGGTCAAAGGTA -TTCCAGGCTCCGGCAAGTCTGTCCTGGCTGCTTCGATTATCAACCAACTACGAAAGGATG -GCATTCCAGTGATCTTCTTCTTTTTCCGACAGATCATCGACGCCAACCATCAGCCCGTGG -CGGCATTGCGGGACTGGCTATGTCAAGTCCTAGGCTACAGCCCCCCACTCCAGGTCAAAC -TGAAGGAATATATGGATGACAGGCGTGATTTAGCCAGCTTGTCTCCGAGCGATCTCTGGA -GAGATCTTAAAGTGGCACTGGTCGCGTGCCCCAGGGTCTATTGTGTCACAGACGCCTTGG -ATGAGATGGACCTGGGGAATGATGAGTTTCTCCACAGCTTGGTCGAATTGGGACATTGGC -GCCCGTCGAATGTCAAAGTTTTAATCACTTCACGCCCAGTATCGATAGTGGAGAATTCGC -TCAGATCATTCTCCATTCCCCAGATTCGATTGGAAGAGACCCTTGTCGATCTCGACATCG -CCTCCTACGTACAATACAAACTTCGTCACTCATCGATCGACCCTGAATCATGGAGTGTCA -TAGAGGAGGCTATCCCTGGTCGGGCGAATGGGCTCTTCTTGTATGCCAAGCTTTCAATGG -ATGCATTTCTTGAGCCAGGTGTCGACGCACATGAAGTTCTCAAGGCTTTACCGGCTGATT -TGAATGTGATGTACAACGACCTTCTGCATGAACATGCACGACGCTCAAATGTTCCAGAGG -ACTTTCAACTACTTGTCCTGCAGTTTGTGACCCATGCCACACGCCCACTCCGTATTCTCG -AAATAGCTGAAATGGCGAAAACAGTACAAGCTTCTACAAGTCGCAGTCTGAAAGAGACGA -AAGACTTGGTCCGGGCCGCTTGTGGTCCGCTGCTCGAGATCCTTCCTGATGAAACAGTCT -CGGTCGTTCACCACTCATTCACCGAGTTCTTGAAAGGGTTCACCCGTTCAAGTGAGTTGG -AGGATGAGACATATCCAATCCTTCAAACGGGCTTAACAAACAAGTGCCTGGCTATTGCGT -GCCTGACCTATCTCCGGTCTGGGTGTCTGGTCCATCAAGAATTGGAGGAAGCATCCAATT -CATATGAATATGGAAATCCTAAGGCTAAAGAGCACCTAGAACTCAAACTCCGGTTTCCGT -TCTTGATGTATGCAGCTAATAATTGGTACACTCACATCCGCCGCGCAGTCTTTGCGGGGG -TAGATATATCAAGTTTCTACGCGATGCTTGATACTTTCTTTGCGGACAAGCAGATGTTTA -CCGCCTGGCGAGTTTTGTCATGGCCGACACATAAAACCCGGGGCATCACCGCGCTTCACG -TCGCTGCTTGGGCTGGCTTAGATGAATATGTCGCATACTTGCTTGAGAGGGGGAATAACA -TCGAAGTGCGAGATGATTGCCATAATACACCATTATACTGGGCTGCCACTTCCGGTTACC -CCGGCGTGGTACAGTTGTTGCTTGCCAATGGAGCTGATCCCAACGCGGAACAAAACGGAG -GGTACAGTTCACTACATGTGGCAGCTGAAAGGAATCATGCAGCAGTGGCCAAACTTTTAC -TTGCTGCTGGGGTCAATCCACTCACTCCCAAAATGCAGGATTCACCTCGTGGTATTGGGT -GTGGGAATTCACCCACATCACTTGGACATACGCCTCTGATGTATGCATGTCATCATGGCC -ATGTCAATACAGTGGCCGAATTCCTACCTTTTCTGAAAGACAGCCAAGCGTATCACCGAG -CCCTTTACTGGGCGGCTCATCATTGGCAGTGTGCGGTAATTGAGTTGATCATCCAATATC -CAGGTGTGAATCTAAATGGTAAATACCGAGGAGACACTGCTCTTTTTATAGCCTGCTTGA -AAGGAGATGAAAAGACTGTTGATGTTTTGCTGCGAGCGGGAGCGGACCCAAACATATATT -GTGAACAGGCACCGGATGAATTTTACCGTGGAGTATATCACACGAGGTATTCAGAAAGTG -ATGAGGATCAAGAATGTAAAAGGGGGTATACTGCTTTACATGCCCTTTGCAAATCCGGCA -GAGATATGCGCACGCCAGAACTTCTACCAGCATGCCTATATTCTCTGCTTCAAGCAGGGG -CTGATATTGATGCGCGAACTCCTAGTGGTAAGACGGCTCTGCACATTGCCTGTCAGCACC -AAAAGGCAGATGTGGTGACGATATTACTTGAGGCTGGTGCCAATGCTGCGGCAGAGGACT -GTTCTGGGGCCACGCCATTACATACAAATGGAGGAATGGATGCAGAGCTGCTTCCTGTGC -TTTTGAGTACTGGGGTGGTCGATATCAACAAGGTCACTGCCAAAACTGGGAAAACTCCGT -TACACGTTCGGCTTCAAGGGCTCTGGGTGAAAGATGTACTTCCTTTTTTGAAGTATAAGC -CTGATGTGAATATTTCGGACGACAAGGGCGATAGTCCATTGCATTACGCTTTCGCGGCGC -ATGGTCAGATTCTTTCTCGTGTCGTTGATACCTTGATATCTCTGGGCGCCAATCCAAATG -CGATGAATGACGAAGGCAATAGCCCGTTGCACACGATGACGGAACATACTAGCCCACTCA -TTTCTAAGCTCTTAGATGCTGGTGCAGACCTTGAAGCTCAGAATCATGACGGCCAGACAG -CTCTTTTCAAACATGCGAATAGAGATAAAGAGCCTTTTGACACGTTAATTGCTCTTGGTG -CTCGCCTGGATACGCGTGATTCTGGTGGCCGAACCCTCTTGCATCGTTGCTACAACGACA -CCAATCGTTTGGACTATCTGATAGGTCTTGGCCTTGACCCTTTGGCTACCGATCATCAAG -GCAATTCGCTTCTAATGGAGGTTGCAGCGACTGAGGACGACTACACGCATCCTGCTATCA -TGGAGCATCTAATAACCCTGGGTTTGGACATAGATGTGCCAAATCATCGTGGCAGAACCA -CTTTGCACGTGCTGTGTGCTAGGGTGGGACTTGTCACTAACTCATTCCCACCAGAATATT -CACTAGATTACGTCCTTGGAGTCTGCAAGAACCCGAATCCCAGTGATTGTGACGGTGTTC -AGCCTTTGCACCTAGCCGCTACAATATCAGAGTCCTATGTTATCAAACTTCTCAATGCCG -GTGCAAACATGTTGAAGGTAACACACGAAGGGATGTCAGTGCTCCATACCGCAGCTCGAG -CCCGTCAGCCGAATATTGTAGCTCTTTGCTGCTCTAGGCTATCAGCGCTTGGAGATCAAG -ACAAGACTGCATTTTTGAACAAGCAAACCAAGGATGGAGAAACAGCCCTTCACTATGCTT -GCCGTTCGGGTCGCCCTGAAACCGTTCGCACATTGCTCGAAGCTGGCGCAGACCCCACCA -TATTGGACAACGGGCGCCGTTCGCCTTTTAGAGCATGTGCGCAATTCGAAACAGAAGAAA -AACTGTGGAGCGGTCCTAAGAGAATACCGCAGAGAGAATTCCTCCATGCAGCCGGTGTTC -TGGTCAACGATCATAAAAGGCCGTTCCTTGTCCCTCCTCGCAGGGATGAGAGCGAGGATT -GCAACCAACAAAATGCCGAGCATGATACGGTGCGGCTCGATGAGATCCTGGATTTGCTTG -TCTGCCATGGAGCCCTCTCACCCGGAAAGAATTGCTTTCATGGTGCGTTTGACGAAGCTA -TCTCAAATGGGCTCGAATACACAGTGGACTGTCTTTCACGACTAGAATCTCGGCTTCCCG -ACTCAAACCCCTGCCCCATCGAGGGGCTTAGTTATGCCATTTGCAAAGGCCGCTTAGATG -CGACAAGAGAAGCGTTCAGAAAGTACGGCGGATTAGACGATGATCACGATGATAGCTCTG -AGTCTTCTAGACTCGATTACGCCTACATGCGAAACTTGGTGTTCGCCAGGCAGTACGAGT -TGTTTGAAGAGGGAGTAAAGAGGCTTAATTCATCCAGGCTGAACTACCCTACAAGTTTTT -TATTACATTTTCTCGTCGCTTGGGGCTACAGAGACCTGCTAGATCGTGTATGCACTAAGG -ATGCTGCATTGAGGTTTGATGACCACAATTGGTGTCAAGAAGTATCATTTCAACCAGGCG -GGTGTTACGTGGATCCTCTACTGGTCACTGCATGCAATCGTGCTTTGCCCAACATGGACG -TGGTGGAATTTCTTGTCGAGAAGGTCGGCGTCAGTCTAGACGCGCATTTCATGGAAAGCT -CCTTGTTTAAACATTCTAAAGTGGCTGGAGGGGCACTTCATAAGCTCGCTCTGGGTCACA -ACTGGTGGCATGTTTATAAAGCCTTGCCATATTTGATCAAAAAGGGCGCGAACCTTGGTC -TACGTGATGAAAACGGAGTGACCCCGTTACACAAAGCCTTGCATACTCGCCACGAAAGAG -GTCCCTTCCTCAGAGATGCGGCAAAACTTCTCATTGAGAGCGGAGCAGATGTCAATGCCG -TGGATGCCAACGGAGATACATGTTTGTCAAAAGCCGGAGGTGATCTAGAGATGACCAGAC -TGCTCATAGCACACGGTGCCCATATAAACGCAGCTGCCATATTCTCTGTGATTGAATACG -GCGAGGTTGAAGTACTGGAAACATTGCTATCACACGGTGATTTCGCAAATGTCAGGCGAT -CTGAGCCAGTGATTCCTCGACAAAAGAGCGATTATGACGCTAATATTCTTGATTCGGAGG -TCTCGCCTCTTCTCTTTTCCAGAGTTCCTAATCTGCGATACACTAAGAACTACTTTACAA -ACCGACGTAGCCAGCTGGATGATTCGGCCAGCAGTTTACTCGATAACCGCATCATGGAAG -TGTTGCTCAATCATGGCGCAGACCCATTTGCTACATACGTCGGGGAAATCTTCCCTAGAA -GCCCTAGAGGAAGTCTAGAGTGGCGATTATTTTCACACCTAGAGGACCCCGATATCTCAA -ACCCCGAGATTGAACCCCCAAGAAAGACTGTTATCCATGAAATTTTGAAATCAAGGCACT -CGTCCTGGCCATTCTTTCAACTACCGTCCTTGGAGCTTGAACGACGTGATTCAAGTGGTT -GTACATTACTTCTTGCAGCTTCTCAGAGTGCACATGCACTGTATGAGAAGATGGATTTTG -TTGAAGGCGGTACAGCTATTGCAAAAACGATTTTCCAAGAGCTCATAGACCGCGGTGCCG -ATGTCATGGCACAAGACAACAATGGCAATACTATCCTCCACCATATGTGTTCTCCGAGAG -GTTCCTTCCTGTATAAAATTCCTTTGGGGGGGTATTCCGATTTGTTGGAAACCCTGCATG -AAGTTGTCATAAAAAATCCGGGCCTTGTCCATCTCCGCGATCGGAAAGGCGATTCAATAT -TTCACCGCACCCTGGCGCTAGAAAACTTCGACTTTATTGATCCTCTTCTCCAAGTGGGGG -TTGACCCTCTCCAGTCGGACTCCAATGGGGACACCGCGCTTCATCATTTGGCTAAGCATC -TCACGGAGGATGAGCCACTGGCGCACTTCAAGCGATTCCTGGAATTGGGCGTAGATATCA -ACTCCCGGAATCACCATGGCGACACACCTCTCTTCAGATATATCCAGAACGGCGTAGTTA -TTCCTAAATCCAGAAGGACTGAACGATTTGAGAAGGAAGATGACCTCACCGAGACTGTCT -TCGACTTTTTTGAAGAGGCTGGCGCTGACTTCTTGGCACAAAACGATGCGGGCTCATCAT -TGCTCCATATATTGGCATCTAAGAAGAATTATGATGATGGTTATAGTGACGAATACCCGT -ACAATGTCGTCCGACGATTTAAAATTCTGATGGGTCGTGGACTTGACCCTATGTTGGAGG -ATGTACACCAGCGAACAAGCTTGGATGTTGCTGCTGTTCATGGCAGTGAGCATATTATGA -AATTATTTGCAAGAAAGCCGATGGATTAGGGTGAACTTGATGAGAGGAATAGTCAGGGTG -ATCGCCGTGATCTGAATGGAATCTTGATGGCCGTAAACTCCCCCTTCTTTGGAATCTTGC -GCTCCTAGTCACTTCAAAATATCCCAAACGAAATCCAAAGCATGATCCCCGATCAAATTC -GCAAGAAGATTGAAAAGACTAGCTCATATATCTCATTGTCTATACTCGCAAGTAGTAACA -TATCGCACCCTTTGATCCTTGTCAATTTAACAATGCCCAAGCCAACGTCGTCGTGTACTA -TTCAGGGCTTTGAGAGTATCTTCCAACCCTTTTACTATCAGATCTTCGAAATACAGATGG -ATCATAGTATTCTGCATCGGGGAAATTGCCAGCCGGGGTTTCCCCGCGGTTTTGCCGTTC -GCTCATGAAAACTGCCGAATGCGCTCAATGAGGCAATGAACATCGCAACGGTCTCCGAGT -TGAACAAGATGCTATTTTTATTGGCTCCTGTTTTCAGCTAATCTCGGACAAATTGATTCT -GAGCATATCAGACATATAAAAGTATGTATATCCCGGGGAAATGACAAGGATTATCTAGGG -ATCTACACTATGAAACTGTCAAGTGCAATCACCGGGGTTTTGGCAAATCGGGCTTTTGCT -GTTGTCCATAAATCTCAAATTGACTATAACTCAGCTCCTCCGAACCTTTCTACCTTGACA -AATGCATCCATTTTCGAAACCTGGAGACCCAGAGCCCATATCCTCCCTCCATCTGGGAAG -ATAGGTGACCCGTGCGGGCAATACACCGATCCCAAGACTGGTCTCTTCCATGTTGGTTGG -CTCCATGAGGGAATTGCAGGGGCTACAACCGATGATCTCGCTACTTATAGAGACCTCAAT -CCTGATGGAGCCCCGTCGATTATTGCGGGAGGAAAGAATGACCCCCTTGCTGTTTTCGAT -GGCTCTGTCATTCCAAGCGGCATAGACGGCTTGCCAACCCTGCTATACACCTCTGTGTCA -TACCTTCCAATTCATTGGTCCATCCCCTACACCCGCGGAAGTGAGACACAGTCATTGGCC -GTTTCCTATGATGGTGGGCGCAATTTCACCAAGCTTGACCAGGGCCCGATGATCCCCTCG -CCTCCTTTTGCTGTCAATGTCACCGCTTTCCGTGACCCCTACGCTTTTCAGAGCCCAGCC -CTTGACATAGCTGTCAACAGTACTCGAGGAGCCTGGTATACTGCCATCTCAGGTGGTGTC -CACGATGTCGGACCCTGTCAGTTTCTCTACCGGCAGCATGATGCGGACTTCCAATACTGG -GAATATCTCGGGCAATGGTGGAATGAGCCTGTCAATACGACTTGGGGAAATGGCGACTGG -GCGGGGGGATGGGGCTTCAACTTTGAGGTTGGCAATGTCTTCAGCCTGAATGCGGACGGC -TACAGCGAGGACGGCGAAGTATTCATGACCCTCGGTACCGAGAGTTCTGAAGTTCCTATC -GTTCCCAAGGTATCTTCAATTCATGATATGCTGTGGGTAGCCGGAAATATTACAAACGAT -GGCTCCGTCACCTTCAACCCAACCATGGCAGGTGTTCTTGACTGGGGCTTGTCGGCATAT -GCTGCTGCGGGCAAGATCTTGCCAGCGAACTCCCAGGCATCTAAAAAGAGCGGTGCTCCC -GATCGTTTTATTTCCTACACCTGGCTCACTGGTGATTTATATGAGCTATCGAAAGGATTC -CCTTCCGCTCAACAAAACTGGACAGGTACTCTCGTGCTGCCGCGGGAGCTGAATGTTCAC -ACTATTCCTAACGTGGTGGATAACAAACTTTCGCGTGAGTCTTTGGCATCGTGGCGTGTG -TCCCGCGAGAGACACGGTCAAATTGACCTCGAAACAATGGGAATATCAATTTCCAGGGAG -ACTTACAGTGCTCTCACATCCGGCTCACCTTCTGTTGAGACCGATCAAACAGTATCGGAG -GCTGAGTTAAAGGCATTCGGCATCTCACCATCAACCAAGTTCTTTGTTCTGACAGCGAAT -ATCTCTTTTCCAACCTCTGCTCGTGACTCGGGTATTCAATCTGGCTTCCAGATATTGTCC -TCCGATCTTGAATTTACGACTATCTACTACCAATTTTCAAACGAGTCCATCATCGTGGAC -CGTAGTAACACAAGTGCCGCGTCGAAAACGACTGATGGAATTAGCAGTGCCAATGAGGCG -GGCCGTCTGCGGCTATTTGACGTCTTGCAAAATGGAAAAAAGCAGATTGAGACATTGGAA -CTCACCGTGGTTGTGGATAATGGGGTTCTTGAAATATATGCCAATGGACGGTTTGCTCTA -AGCACATGGGCTCGGTACGTTTATCTTATTTCTAAAACATGGGACAATATCTAACTACAT -GAAGTTCTTGGTACGCCAACTCGACGGGAATTAACTTCTTCCATAACGGCGTAGGAGAAG -CGAGCTTTGGAGATGTAAAAGTCTTCGAGGGATTGTATGACGCCTGGCCACACAGGAGCT -GAGACCTATCGGTCAGAAAGCTAGTTGTGTCGTTGAATTCGACTTTGACGACAACGACCC -TCCTCCTGTTATCTCGGGCTAAGTAGTCTTTTTACAATCACCTAAGATCTCGGACGCGTG -CAACTATTGGAACTTACTCTTGTTTTCTGTATATATCAGTGGAATTTCACCACCGTAACG -AAAAGACAAATATTTGGAACACTGGCAGATTGCCCTGAGTACATGTTGAGAATAGATGAG -AATCCATTCTAGCCCATCTCCAAATTACTCTATTAAGTCAAGAAAATGATAGTGAATATG -TAATAAATTTGAATCAAAATTCTATAAAAAGTCCATATCCACATAAGAGATATTCAAGCC -AGAGTTAACTCAGAACTATTTTGCCACCTCTCCCGAGCCTGGTACTGCTAATGGGCGAAC -CTCTATTAATTCCGTTTTTGTGACAATATATTGGACTGCCCAGAAGAGCAGCCACTCCTA -ATGACCAAATACTAAACGATGGCTCGAAGCTGTCTGTTCTGCATCAGACGTGCAACAATC -AAAAGAAGACCGCCAAGTAACAAGGATGCGCCTGAGAAGATTGATAGGGAGAGATACCCA -TATCTTTCAACTAGAGCACCACTGATGGGCACATTTGCCATAGCACTGCAGTTTGTGTTA -GCATCTCATATTATCTTTTCAAGAAATTGTGGTCATACGATAAAGCCGTGGAGGCTGACA -CAGTTCCGATTGCTAGACCAAGAGTGGTAGAAGTAGAAACCTGCGCGGCACAAGCTTGCT -GGAGGGATAAAATGCCCTACAGCTCTTTTTAGTATCCAAATAGTATTATTATTCGAGAAA -ACTCACACCAGATGCAAAGCCGTATGCAGCAGACCAAACCACCAGACCGGCCACAGATGT -CACCTTGGTCCAGGTTAATGCAATAACCCCTGCAGATATAGTGGCTATAATGCAACAATT -GAATTTTCCGTACTTATCCGCAATGATGCCGGGGAGGACTCGGCCGAAAAATGAAGCCCC -GTTCACAATAGAAACAGTATAGAATGCAAGAGATGTCGAGAATCCTTTCTCGACCGCATA -AGATGTCGTATAGAAAAATGGCGAGAACATGCCGAAGTATATGACGAACATGGCGAGGCA -CAGTAGGTGCAGCGATGGCTTTCTTAGAAGAGTCAGGGCCTCTTCCTTGTGACTCTGCTG -TTTGTCCTTTTCTTGGGTAGTATCTTCAATCCCTTCGTGCTCTGAATCACAACTTTCCAC -GTTGGTTGTGGGTGGTATTGGAGATTTGGCAACTGCGCAAGTGAAGATCAGGCAAGGAAT -CATGATAAACCCGACAATGCGCATGGTCCAGGGAAACCCGATGCTGGGATTATTGATCAA -TCTGTCAATTGCAATGGGCCAAATAACGCCGCCAAGCGAAGAACCCCCAAGAACAATCCC -CATCGCTGCAGCACGCTTCACCTTGATATATTGTCCGATCAACGCGAGCATAGGCCATGT -CACCAGGCCCATTGAGATACCAAGCAACACACCCTGCGCGAGTAAAAATTGATAGAATTT -CTCGCATAGCGAAGTCATCATGAGTGCAAAAACACACCCGAATGAGCCGACATATAGCAT -CAACTATTAAAAGGAAACATTAGTTTCCTCTCCAATGATCGGGAGTAATGATCTTACTGC -TGGTCCAAAAATATCAACCATCGCGCCCGCACAGACAGATGTAGCGAACAAGAAAAAGAT -TGCTATAGCCCCGAGCCATGCAATGGTAGATGTGGAGCTTGCAGATAGTTGATTCTTTGT -GTAGTACTCTTGAAAGATACCGAACGAGTTCATAAAACCCACAGTACAAAAGGCACACGT -GAAGGCACCAAATATGACCAGCCATGCTCGGGCTCTAGCAGGGTCAGTGGTCAATTATAT -CGTAACCCGTAGCGTATCTCACGTACGTGTATTCGGTTGGAGGACTATCCTTCGAAACCA -TTTTGCCGGTCGATGGGAATCACAAGAAATGGAAAACTGCTTTGGCTTGAGGCAATAACA -CCGCGGGAGGCCTTGGTCTTTATAAGACATCAGATGAATGCATCTGCAAGGTCAATAATC -GCCCTCGGAGTCTCCGAGCTACTGTCTAGATGCATGGGGGCACCATCGCCTAATATGGTT -TAGCGTCTCGGAGAATCCGCGGAATCTCCAGACGAACATCTACAAGCTGTCTGCTGCGCA -AAACAAACAATCACCCCACAACCTTCTGCGGTTCCATCTACCTTTGAAATGTTTTCGAGG -CAATCTACACCATGAAACGAAGACCAGATGCTGGGGGCGCCACCTCAGGCTCGAAGAGAC -CCTCTCGACAAGACCCCGTTTCCTGCGAGTCTTGTCGAAAGAAAAAGCTGAAGTGCGATA -GAGGGCTCCCGTGCAGTAGCTGCACTGCACGGAAACTGGAGTGCAGCTATGGCGGCTACA -GCTCAGGCACAACGCCTATACGACCTATGAGCCAAACAGATCAAAGGCCCATATTGGACG -ATCGCGTGTCTGACCCACCTCCAACCTTGCAGTGGACTCCACGACAGGACACCGAGACAC -AAAACAGCAGAAATGAGTCCCTTTTGACTGCCGATTGGCTTGAGACCATTGTCATGGGAC -ACCGCGTACCAAGTGCAGTCCCCGCCACGTTGAGGGCGGAGCTTTCCCAGCACCGGGATC -CCGAGCAATCACCTACCCAGCAAGATACCACGCTTAGCGATCGATTTCCTGCAATATGGG -ATCGGCTCGCTTCTCGTGAGAATCCGGCTACTATCCATCTGCCGTCCTATCTACCGCCGC -TGGCAGAAGCCTTGAGTCTATTTCGGTATTATTGTAAATACCTTGACTTCCAGTATCATG -TTATCATACCAAGTCGTGTGGAACAACAAATTCAGACAATCTACGATTGTGTTACGCGAA -ATGAGACCATCAATTTGGCCCATACTGCGCTGCTGTTCGGGATCACTGCTGCCGCCTTGT -ATTACCAGCTCCTGGTAGAGTCACCCGAGCACGCAGAACCATTCAGTCAAGAAAGTACAT -TCTTGGCAGGTGCCGCATTGATTCAGAGCAACTATATACCTTATCCCAATCTTGAAGGAC -TGCAAGCGACTATGATCATTGGTCATCATCTGTCCAATATGGACTTGCCCTCCTCCGTCA -GTCCTTTATTTGTGCATAGGTCATTTGTGAGCCAAGCCACGAGTATGAGGCTTCATCTTG -TGGACTCTCCCCGGATCGTAGCCGAACGGTCAGCAAATGGGTTTGATAAAACCAATGTTG -AACTCAAGCGACGGCTGTGGTGGGACTTGGCAACATATGACTGGTACGTTGTTTGCAAGA -GACCCTGCCAGTTCTATGACTGATAATACCTGAAGGCTTCTAGGATTTTTGAGCGGACCC -CAAGAGTGGACGTACTCGATCCAACCTCAGCATATGGTTGTACAAGAGCCTCTTAATGTC -GAGGATGAAGAAATCGACCATAGCGAGAATGGTGTTCCTCTATCTACTCCAACCGCAATG -TCATTCAGTCTATGTCGATTGAAGTTGGCGGTTGTGTGTCGCCAAATTGTGGACGAAATG -TCATACTATCATTTTCATGGACAAGAGGTACCCTATGAGAAGATACTCGAACTCGATCAA -AAGCTTCACAAAGTATCCCGTGAGATTCCCAGTTATTTTCGCTTTGACCAAGCTTCTCGG -CGTAGATACTCAGTGCTTTATCGTGAGCGACCAGCCCTTGCGTGGCAGCGAGCTGTGGTG -CAGCAGGGCTTTCATTCACGACTATGCCGCTTGCATCGCCACTACCTCGTGCGCGGAGCA -AAAGACCCCAAATATTCCTATTCCCATGTGATATCTCTTCAATCTGCACGGAGGGTACTT -GAAGTAAAACGCATTATGGATGAGGAAGAGCCTGCCTTCACCCCGCATAGCTCGGTCATA -TGGGCAGTCATGCACCACGTGTTCATGGCAGCCGCCATCCTCCTGATAGATGTCTGTTTC -AACTGGGATGACATTTTGGCCGAGAAGCGAAAAGAGGAGGTCCTCGAAGCTTGTCGAATG -CTCAGCCGGGCACAACAATCCTCGCCAACCGCACGTGAGGGAATCAATGCCATGATGGGT -ATCTTGCGAAGGCATTGGAAACACGAAAAAAGTGCAAGTTCTCGTGACTCACAGGAGCAC -TCTGCATCTTCAAACTCGGACGCTACAACAGAAAGTACCCCACGACCGGATATCCTCACG -CCTGTTTCATTGGGTTCGAAGAGGCCCGTCCCATTTCCTCCACATAATCCCATTGTTCAA -TCAACGGTGTGCTCTTCAGCAGATGCACCGCCTAGTCCAGTGCCACTTGAAGACATATGG -GCGGAAATGTTAGAGAGCAGCGCTAATATTGAGCTGAATACACCTGACTGGACAGACTTG -CTTACTGAGTTGACAAATGTTACTTTGCCAATCGATTAATAGAAGATGATTCTCCTATTT -AATGGATAGACGATGAGTACATGACTGCGAACGACTCTAGACTCATATAAAATATGCGTG -GGCGATTGCCTTTTGCTCAACCTTGGAACCTGAGTATGCTTAATCCTTCTTGGGAATTCC -ATTGGTTTAAGCCATGATGCCACCATGATGTGACGGCATTTCCTCAGGGCTGCATTTGCA -ACTTCTCACAAAATTACAATTTCTGATCACTAGCCCGATACCACTTTTCTCTTCATCAAA -TTCCACCTCCGTCAGCTCATTATGCCTTACCATAAGAATGACCTCCCCTATATCCGCCGC -TACATCACCACTCACAATACCGAGGGCGAGTCAGTATTTCTCTCCCATGCGCAAGTTCCT -GACTATCTCCCATCGACACCAAGCGGAGAGGATGGCGAAATTGCCCTGCTTTACGCTACA -ACCACTATGCCAGCCTCGGTTGACGCTGAGGCGGATGTTGCAATGTACGATGAATTCCTG -CATCAACCTCCTGGGATCACAGTTGATAGCGGAACCGTTTTCCGCCTGATTGATCTCCGT -CCGGGAAGAGCGACTCCAATGCATCGGACAGTAAGCGTCGACTATGGGGTTATAATTGAG -GGTGATGTTGATCTCTTATTGGACTCGGGTGCTAGTCGGCGTATGCATCGTGGCGACGTG -AGTGTGCAAAGGGGTACTGCGCATTCCTTCCGGAATGTGAGTGACACAAAATGGTGCCGC -ATGCTGTTTGTGTATCTACCAATGGACAAGTTAAATATCCAAGGGAAGAATCTAGAAGCT -GAGGTATATGATGAGGGATTTGACAAACCAGAGGAGGAGGGTCCTGAATGCGGGGGTATG -TGAAGAAGGACTCGGAGGAATCTAAATGAGGCTAGGTAAACAACAGCGGGACTCTTCTCT -CCGAGCACTCATTCTTGATCATAATCAGGTCAGCGCTAGATTGACGGTATTTTGTCAGCG -GCTGAAATAGCCCCAAAGCATTGTCCTCGCAGCGCGTATGGGGTTTCAGCCCCCATCTTA -CCAATCTCATGATTATGAATCAAAACCAGCGTGCCTCCTCCATCCATAGAGGATTGAATT -GCGTACCTACCTCAATTTTTGGATATAAAAGGGGCAAGCTCCCAACCTGTCCTTTTCTAT -GCTGCCATATTCTTCACTCCTCACTTCTAAAGACTCCCTTAACTCCCTTTGTCCATTAGT -CATTTTAAACAATTGATCATCTTACTATGGACACTCCAATTCGTCGTGTCATTACAGGCC -ACGATGCTCAAGGGAAGGCTTATTTTGCCTCTGACGAAATTCTAACACCATACGATCCTA -CCTCTGCTCCGGCATTCTCAACCCCGGGGCCTGACTCTGGATTCGGGGTTATCCAAATTC -ACCGGTCACGCGGCTTTCCTGTGGACAATATACGCCAGCTCCCCGATCCGCATAAGACCT -TGGTCCCGCTGGCTGACACTAAGGGACCTTCTTGCCGCATCCTTGATCTACCCCCGGCAG -ATGCAGGATGGTTCCATCGGACGCTAAGCTTGGACTACGCTGTGGTACTGTCTGGTACCG -TTGGGTTCATAACAGATGGAGGTGAAGAGAAGATCTTAAATCAGCACGATGTCATTGTCT -GCCGCGGCGCAAACCACGAATGGGTAAATCGAGGCAAGGGTGTTGCTCGGGTCTTTGCGG -TGGTTATCCCTAGTAAGGAGATTGTAACGGAGGAAGGAAAGAGATTGGAGAAGACACCTG -CAGGCGATATTTATGATCCCAAAGAAGAGGAAGATTGATCAAGGCGATGTATATTTGGGA -TTTAGAGGGAGGTCCTGTTCTATACTTCTCGGAAAGGCGGAAAATTCCGAGCTCTCGGAA -CACGGAATAGGCCTTCCGACGGAATTAATTCCAGTCCTCCCCGCGCCTTTTGACATTTCT -ACTTTTCCCACATTTACCTTGAAAGTTTCGCTGTGGCCTCAATATCAACTATTTTCCTTG -TATTCTTATTCTACATCATTTCTTCTATACTGTTTTGTGCTGTGCCACAAATTGCCGCAC -TTACGCTTCCACAGTACCCCGCGTCGCAGATCTCTAATTGACGCGCAGCATGGACCGCGA -CCAGTTCAAGACTGCGGCTCACTCTGCCATTGATGATAGTACGTGCTATTTTTCTTTTGT -ATAGTCTTCGAGTTCGATAATATTCAATCTGACCAAGCCCCCGTGCAGTCGTCAAGTACT -TCGACAGCGTGCCCGAGCGCCGAGTCCTACCCGCCGTCGAGCCCGGATATCTTCGCCCTC -TGATCCCAGAGAACCCACCTGATGAGCCAGAGGAGTGGGCGCAAATCCAGGAAGATGTTG -ACAACAAAATCAAACCCGGCCTGACCCATTGGCAGTCTCCGAATTTCATGGCATACTACC -CTGCCTGCGTGACATACCCCAGTATCCTGGGTGAGATGTACTCCGCCACATTCACCGCTC -CTGCTTTCAACTGGTTGTGCTCGCCAGCATGTACCGAGATGGAAACCATTGTTATGGACT -GGGTCGCGAAAGCTTTGGCATTGCCCGAGTGTTTCCGTAGTACATCAGAGACCCACGGTG -GCGGTGTGATCCAGAACAGTGCGAGTGATGCTATTGCAACTATTATCGTCGCTGCGCGGG -AGAGGCGCGTGCGGGAGCTACTGCTTGCCGAAGGCCTGAAGGAGGGTACACCGGAGTATG -AGGACCGCAAATTCGATGTCCAAGCCAAGCTGGTCGCTATTGCCAGTGATCAAACCCACA -GTAGCGCTGCTAAGGGCGCTTTGGTTGCGGGTACTCGCTTCCGCGGTGTTCCCACGCGGC -TGGAGGATAACATGGAAATGACCGGACCTCGTCTCCGTGAAGTTCTTGAGAAATGCGATA -AGGACGGCCTCACGCCCTACCAACTCACTATGACATTTGGCACAACAAATACCTGCAGCG -TGGACCGCTTTGCAGAGCTCAAGGCAGTTCTGCAAGAAAAGCCGGCGTGGCAGCGTATCT -GGGTGCATGTCGATGCCGCCTATGCTGGTGCATCGCTCGTGGCGGATGAATGGCAGTATA -TTGCAAAGGACTTTGCGGATGGTGTTGATAGTTTCAACATGAACATGCACAAATGGCTGC -TAGTCAACTTTGATGCCAGGTAAGCTTTGCATTTGAATGGTCTGTGAGTAAGTTTGCACC -TTTTGCTAACATTCTTAAGCGTCCTCTTTGTCCGGAATCGCTTAGATCTGACAAGCGCAT -TGGATATCACACCTACATATCTGCGCAACCCATATTCCGATATGGGAACAGTAATCGACT -ACCGGAACTGGTCCATTTCTCTCGGTCGCAGATTCCGCGCACTGAAGATTTGGTTTGTGA -TCCGGAGCTACGGCCTCAATGGCATGAAAGCGCACATCCGCAAGACAATCGGGTTTGGTG -ACCTCTTTGCTGGTTTGGTGCGCGATCGATCCGATATATTTGAGATCGTCACAAAGCCAG -GCTTTGGCTTGACTGTCTTCCGTGTCAAGAGCCCGCAAGCTGTATCTAACGGGGCATCTG -ATCGCGTGGCCAAAGATGATGCTGCTAACGATTTAACCAAGAAGATCTCCGAGTTGGTAA -ACGCGCGGGGCGAGATCTTCATTACCTCGACAGTCGTTGATGGCGTCTGCGTTATTCGGG -TGGTTAGTGCCAACACTATGGCCGAAGAGAAGTATATCCGCAATGCCTTTGATGTTATTG -TCCGTACAACGGAGGAGGTGCTGCAGGAGAAGTGAGTTAGATGTGATGATGTAGTGTATA -TAGAAATCCCCTACACATACATTTCTAGACGATGTAAGCAATCAGTTAGATCAATGTGAT -CCATAGATATTCTCCTCTACTCAACCTATCTTTTCGGCTTTATCTTTCTCCCCCACACTA -ACAGCAATTGTGGATATACATCTCGCGTCTCGCACCAACCCCAAGCCAGGGTCCCGATCT -CCCCCCACGCGGGGTGGGCACTACTATAATAACTTCATTCCTGGTTGTCACCTGTCGTCA -TTCATCCTGCCCCTCCAACTAGACACCTATACCTAACAAAAATGTCTTCTCAGAAGCCTT -CGTAAGTCGGTATATTCTCAACTACGCGATCAGTGCTAATAATCGCAGGTATGTCCTCGT -CACTGGCGCCACAGGATTCATCGGCGCCCATGTTGTCGACAACCTCCTGGCTCGCGGATT -CTCAGTCCGAGGCTCAACAAGATCAAAGCAGAAGGGAGAGCAGATGAAAGCTGCACGTCC -GCAGTACGCCTCAAAGCTTGATTTCGTCGTTGTGGAAGACTTCACCCAGATTGGTGTTTT -CGATTCTGTAATGGATGGCATTGATGCAGTCATCCATGTAGCGAGCGTAGGCAACTCATA -GTTCTTTACTTATGACAGACTGAACTTGCTAACGTACCTCACAGCCCTTTTTCTATGACA -CCACTAATAACGAGCAGGAATTGATTTTACCCGCCATCAACGGAGTCAAGTCAATTCTTT -CCGCCTCCGCCCAATCTGGTTCAAAAGTCCAGCGCCTTGTCATGACATCCTCTTTCGCCT -CAGTGGTAAATCCAAGCAGCACTCCGGAACCAGGATTCACCTATACTGCCGCGAACTGGA -ACCCACTGACATACGAGGAGGCAATTGATCCGAAATCCGACTCTGTCACCGCATACCGCG -GCTCGAAGAAGTTTGCCGAGATCGCAGCATGGGACTTTGTCAAGGAGCATAAGACTAAAT -TCGACCTGGTGACTCTTTGTCCTCCGATGGTTTTTGGTCCCGTGGTACACCCCGTACCGA -CGGTACAGCAGCTCAACGAGTCAAATATGGTCCTCTGGAGTGTGGCCTCCGGTGCAGATC -CACTCCCTGTAACGCGGGTCCCGGGTTGGATTGATGCCCGCGATCTCGCTGAGGTGCATG -TTCAGGCCTTGCTTACACCCGAAGCTGGTGGAAAAAGATTTGTGGCGGCTTCGCCGGAGC -CATTTTCGTACGAATACGCTGCGGATATCATTCGTGATGAGTTCGACTGGGCGAAGGAGA -CTGTAACCACGAATTATACAATGGGTCAGAAACCGGTTGGCTCTTATGGAATCGACGGCG -ATACTGCAGCTCGGGAGTTGGGAATCAAATATCGACCCTTCAAGGAGACTGTGATCGATC -TTGTTGGGCAGATTCGGAAACTGGCAGAGTAATGTATATATATTTGCCTGGTGGTATTGT -ATGTACAATCAGGTGGTTAATGATGATAATCCTCGGGTGGGAAATCACTTTTCTTGGTCA -AGTCAGGTAATTTTCCGGGTAAATGACTCAATATCAACTCGATAAGATTTTTGTTGTTGA -TAGTTGAATATCGTATTCGCTGACGATACATTTCGAAAGATGGAAGCTTGAATCCCCTAA -CCGTCTCAAAGTTTGCCTCTATAAAGGCTTCAATAATGCCGCAAAACAGTTACCTGTAAA -TTGATAAATTAGATTATTTTTCCCTATAGCTGCTATCAGACAAGGGCGCGGACGCCAATG -CTGGAATTGCTCTCTATCATGCCTCGAGGGGCCGCAAGGAGATAGTGAAGCTGCTATTAG -ATAAAGGCGCTGATACCAACTTATACGGCGGTTTCTACGGAACTGCACTACAGGCGGCTT -CAATGGAGGGCCACAATGAGATTGTGAAGATGCTCCTAGAAAATGGCGCCAATGCCGACA -CGCAGACCAAAGTGAGCCAACAAACCGCACTCCACGTGGCTTCAGCTGTAGGCAACAATG -AGATTGTGCGGTCGCTATTAGACAATGGCGCAGATACCAAGGTTCAGGACAACTACGAGG -ACACTGCACTCCAGGAAGCTTCAGAAAACGGCCACATAGAGTTTGTCCAGATGCTTCTAG -AAAACGGCGTGGAAATGAAGGCCCAAGGCCCCGAGGATCGAGCTGCGCTCTAGGTGGCGG -CCTCATTGGCCATCCACAAGGAGATGCGCAGTTGCTACTGTTGGGATGATCAGGGGGTTT -TGTCCTGGGAGCCAGTGGAGACCTCGTTGAGCCATTGCCTCCTCGCTAGAGCCGCTATCT -ATAATGGAGAGAGTGTCGCAATATTTCGGCCAGCAAGTGCTTGTCTGAGCGTTTGGCAAG -AAGAATCGAGTCATCCCCATGTCTAGCTAAATATGAAATATTTCATCTTTATTGGAATAT -TCATGACATACTTCCCTAGACGACTTGAATCTAGCTGTCTAGCTAGCATCATGTAACTAC -TTCACATCAATTAAATCTCGTGACAATGCTACATCTTGCTTGACTCTCGCTTCTTGATCA -GATTTATGCGCACCTGCTATATTCCTTTGTGTCATTTTTTGGGTCTTTTCGAGCCTCATC -GGAGACATAGCGGCCTTCTATCCACTAATATGGATGCAATGGAACCCATTTGATACCAGC -TATCCCGAGACTTTATAGACGCTATTGATTGCCTCTTTTCTGCATCTCTTCGGTGTATTC -TTCTGAGTTGGACTCAGGCACTTAGATATCTGAAGGAGAGCAAAGCCAAGAGTGCTAGGA -GCAAAAGATTTCTAATGCTACCAGCCCCTTTTTTGTGAACTGAGCCAGGGGTATACCAGT -TTCAAAAAGCCTAGATTGCAACTGATGTCAAAAACCCCCGACTAGTTTGCCAATCAAAAC -AAAGGGAGAAGGGAAAATCATCCTGTGCAGTTCCATTCAAAGGTAATTACAGCAAAAGCC -TTTCCTCAGAGTGTAAGCTTTTGTTCATGTTGCTTTCTCACTCTCCAATCATTGAAACCC -CAGTCTCGGAACGCCTCCACATGACGCGGTAACACAGAGGACAGAATCATTGTGCTTCCT -TACATACATTCAAACAACCAACTTATCAATAGCAGAACTATTGATGTAATAGCGATGTGG -ACGGACTATTCTACTGCTCAACCTATCGAAATCACAGCGCATTCTTCGAATCCAAGTGCC -CGGGCATTGCAAGCACCAAGAAAGTGAAATTTCACCGATATTGCCGATCTGAGAACAATC -CATGTGGACTGAACACATTTTTATTGACTTGAGAGATAAGAACAGCGGAGAGCAAACAAA -TATGAAGACGCTGACTTGTCCTCCTCGACTAGGTGGCGGATGCAGCCTGCCGTCGCCAGA -TCCTGCCCATCTTTTCAGCAGATTTCAGCGGATAATGATAAACCAAAATGGTACTGGCAT -TAAGTAAGCAATCTCATATTATGTTAGTTAGTCAACTATGCCCTTGAACTCGTATATAAT -CTTGTATTTCAAAATCATGTTCAAATTCCACAGCCCGAAGAAAATCCAAGACCAACTTTA -GAAGGTACATAGCTCGCAGTAATCTTAAGCCTCCGGCATGGTTTGGCTCAACACAACCAG -ATCAACCCCGAAAATAGCAATCAAGTCATTCCTCAACCCTTGATGAGTTATTCTGGAATT -CCTTACAAAGATGTGTGAGCGGCGTGAAGCTGGACAGTGATAGTGGTCGTTTCCGGCCTC -AAAGGCGGTCATTTTCGGCGTCAATTCCAATTATCATCATACTTCAGTTGTAAACTGAGA -GCATGTAGCCCGCATAGAATTTGATACATATTGCAAATGTCTTATCCACTGTGGTGACCT -CAAGTGTGATTTGGATCGATTGCGATTTGTCTCCAGTTGATACCGACCCTAGATCCGATG -GGCTGAGTCTCAAGTCATATGAATTAGAAACATGAGTGACCGGAGAGTCAACCTCGGACG -ACCATATGGGGTTGATTCCCTTCGTTGATGTAGTGGGCAATTCAGCTCGGCTTATGCAAG -ACTTTGGAGCGGCTGGGCAAGGACTTAACGCAGCTGCGAAACTTGAGTCTCGGGGACCTG -CAAGCCTTGTTATCCTTATTCTCGTATCTTTCCTAGGTTATACATGGAAAAGTCATTTGC -ACAAATGAAGCCTCCTCCCGCTTTCTCTCTGCTGTATGCGTACCTATACAGCCAAGAAGT -AAGAAGAAAATAGCTCGATAACAGTTAAGCAGAATATCTAAAAAGACCACTTTGAAGATA -ACACAAGGCCGCCTAAGCAGCCATCTATTCTTATACTACATAATTTTGTTCTCGGGATCA -GTAGGACGCAAACATACACTAGTAGTAACCGGATTGAAAGACCTCCAGAGTCTATTTCCC -CCGGGCTTGTTGAGACCGTCCACGGCGCTAACTTCAAGAGACATATTAGTAGACTCCTGG -GTGTATGGTGTTGTCTGCTGCCGACGAAAGTAAGCATGAGATACCTTATGAGCGATTAAA -TGAACCATTCCAAAGTAGGTCTTGGCGACGCCTTCAAGGAACTTGGGACCAATTCTATGT -TTTATGTCATCAATGGCCCCGGGGGCCTATAGGCCTCCAGGGCTGAGCCCCTTTTCCACG -CGTAATCGTATAATTCATTGTCAGCCCGCCAATCTCTGCGGGGATTATTGCATCCAATAC -CCACAAGAGACAGATCAACTGGCTCCCAGCCCTGTTTTCTCCACATCAATTCCGTCATTC -TTCCCTTCGTTGAGATCCATCTCTTAGGCTATGATTGGATTCCCTAGTAGTGGGGGTAGG -AGATGGCTGAACTTGGGTATCCCGTTAGAACTTGGGATTGGCGTTGAAAAATAATTTTGA -TATCCGCACACCCCTTTTTATTTGAGGCACCACCCTAAGCCATTTTGGGTTTAGCCCTAT -CAGCCCCCACTCCCACGTGACTGCACATCGAACATCGAAGTGGGGCGCTGACAGGGATCA -GGCACCGCCCTTGGATTCTGCCGGCACTCCCGGTACTGCCGGTCCTAAAATAGGCATTTT -ATCAAATTGCTTCTACGGAGTACAAAGTATCAATGTCACAAATTTGAAAATAGAAAAAGC -TTGGATACAAGTTATTTTCAATTCTTCGCACATCCTTGACTGACAGATATTGAGATAGGT -CTTTTTTCTTCGCTATTTCGCTGATCGGAAAGCTAGTCTACGGAGTACGGTAGGCGGTAA -ATGGGCGTATGGTCCCCACATAGCCCACTGTGCGCTGTATAAGGGGAAAATTATGTCGAA -TTAAATAGGGCAGCCAAGAGGTGCTAACTCGAAACTGGTTAAAACAGGTGTGCCACTAAG -GAAAAGGGGGTGTGCCGCGATCAACTCGCGTGTCGACAATCAATTCCCGCAACCCCAATG -CAACACGCTCATTTTCAAACCTCTCGTTTTCAGATTCAGATGGCCAGTCTGCCCTCAATT -TTGGCATTTATATAGTGGTTTCGATCTTTTCTTCTGAGGGCCTTTTTTTTTCATGATGAC -CACAGTGCATGGTTCCAATAATAACCTGCATGACATCAATCCTTTCGCTGAAAGCTCAAC -TGACGAGCATGCTGCCCTGAATACTTCCTTCGAATCCGACTCCAGGTCAAGTGGTTCCTA -TCCTTGTCAATATGAAATGAGCAGTGAGTCTTGTACCATCCAATTCGTCTGGCGAAAGCT -GATATCAAATTCTTATCGTTATCAAGGACAAGCCGAGCTGGCCTCTGCAAACTCAGGCTC -AGACAACAACGATATCTTCCATAAAGTGAAAAACTTTCTTCGAAAAGACAAGGCACCAAT -TTCCGATGATGATGGTTTGTCGGGTTGGCCCCACCACGGCATACCCTGCAACCCGATGTC -CATGTTCAGACAACAGGCACGACAAATTCTCAGTCCCAGGATCATCTTCTCCGGTGCATT -CGCCGCATCGGCAATCATTATCTCTGAAACCAAAAGCCCGGCGCTTGTAATATTCATCTG -GAACCTCCTTGCTATTGTGCCGCTGTCGATCACGCTCACAGAAGCCACTGAGAGGATTTC -CAAGGATTTGGGTGAGACCGCTGGCGCTCTCCTCAACATCACCATAGGGAATCTAGCGGA -GCTTATCATTTTGTGAGTACCAATCTTGAGAGTTTCGTATCTTCCAATCAACATATACTA -ACTAGCATATAGCGTGTAAGTTTGCCCATAACACCAAGTTGACTGCCTTGGGGCTGAGTC -GAGACTAATCTTCGATAGCACTGCTTTGTTGAAAAACAACATAAAAGTAGTCCAATCCTC -ACTTCTCGGCTCCATCCTAGTGAACCTCCTGCTTGTTCTTGGGTCTGCAATTATAGCAGG -CTGTATCTCTAAGAGTGATGTGACATATAACACTGACCTCACACACTCATTTGTGGGACT -TCTCAATCTCACAATATCCTGCCTTATGATTCCAGTAAGGCGATTTTTCAAATTTCTCGA -TTACATTGGATGCTAATCTTGAACATAGAGCGCGTTCTATGGTAGTGTGAAGAGCGTTTC -TTCTGCTGATCATATGTCATTATCCTTCAGCAGGGGGGTGTCTATCATCCTCCTCGGCAC -ATACTTCTTATACCTTTTCTTCCAGTTCAAATCACACGCTCACTTGTTCCTCTCCACGCG -GCTTCAGACACCACTCCCAACCGATGATCACGAATATACAGACCTCGAATCGCATCCCAC -CGATCTCGCAAGCGCAACAAGCAGGGCTCTTCCTCATCTAGAAATGCAAGAGGTCCGATC -ACGGTCATGTTCAGTCGACGCACAGACGGCATCAGCAATGCCACTGTTCCATGACGGGCC -ATCCGAAGGAAGAACAAGCCTCAACAGACAACCGGAGCCTATCGGAGACTCTCTGGGATG -TTCCTGTGACCGCTATATCAAAACGTCCAACCGCATGATATCTGTAACCCTCCTCGCCTT -ATCAACAATCCTCATCGCTATCTGCGCAGAATTCTTCGCCTCGAGCTTCTCAATCCTCAA -TGAACAGGGCGTCCTTGGCGAATCATTTGTCGGCTTGATCGTCATCCCCATCGCCGGAAA -TGTTGCCGAAAATGTGACTGCGGTTGTTGTGGCCTCGAAAAACCAAATGGATCTTGCCAT -CAGTGTTGCTCTTGGATCAGCCATTCAAATTGGTCTCTTGGTGTCACCTGCAATGGTTCT -TATTGGCTGGGCTCTCGACAAGCCCATGACTTTGCACTTTGACAAGTTCGAGATGGTCAC -ACTGATTGGGGCTGTTCTTATGGTCGATTTCATCGTTCTCAAAGGAAAGACCAATTATAT -GGAGGGGGCTATCTTATGTGCCTGCTTTGCAGCTATCTCGTATGTGCACTCTTCCTATTC -TTTGAACATTCAATTGCTAATCTTTTCGCTATAGTGTTGGCGCATACTTGTTGCCGCTCG -TTTGAGGATCGCAAGTCTCGAGGGTGCCGGTTCATGAACAGCAGAACAATCTGTTAAAAT -TGGGCTTGCCGATTCTTGCCGTCTATCCACCAATCTTTTTGTTATTCTTCAATGTAAATA -GGAATTGTAGATTTAGCCATGAAACTGAAATTTAAATTCATTTCATCTCACCTTTGCTGG -ATTGGTCGCCTTGGTGTCCCGAAAATCCAAGCCGAAGCCACCCGTCGCCTAAGAACGCCA -TTCGGGAAGTGGATATCAGTTGACGAAAGATTTGCTTGATACTATCGAAACGATAATCGC -TCTAGGTCTTTCAAGTCTAGGTCGTTTTGAATTGTTGAAACTGTCGCACGACTGCCGCCA -AGCTATTTCGAAACAGGACTGCAATATAGAAGTGTCCACGCAAGAAACTTACAAAGTCAC -CCCGTGTTTTCATATGATTTCAGTTAAATTGTCATTGTCTCGAGTCGGCGCCAGACTCAT -CCGGTTGGTGACCTGCAATAAGACTGCCAGGTCATTCCATTAGCAAGCCAGATGGGGACA -AGGTGTGACTTCGGCAAAGATTATCTCCGCGGTGGATATCCACCAAGCTTAATCAACCAA -TACAGTATACATCGTATGATGTAGATGATACCAGATATGCTGCTTGCATATCTCTTCGAT -ATACGCTAGGGCATGTCGGTCATCCATAATTGCCGAGCGCCTCCGACATAGGGGGTTGCT -GAATTTCGAAGACTCCGATTGCAAACTGTCTGCAGTGGAGAAAGAGAGAAGAACTGAGGG -CTCAAGCAAGTTTCTGGATACAGAAAGTACTTCAATTTCAGACTCACGTGTCTGATTTGA -AATCGAAATGTTGGTGTCTTCGCTGATTCTGCTTGCTTCATTCAGCAGGCTCGTCCACTC -CGCTACCCTGGAGGAGCTCTACCTCCAAAATGCACCCTCTTCCCCACGTCCATATGTCAT -TCCGCATTACGCAAACTCGCATGCCGTCACCGTTGGGGATCAGCTGTACCGCTTCACAGT -GACTGGCCCTTCCTCGGATAATGCGTTCACTCTGATGAGCACCAACGCACCCTCTAGCGG -AGCACTAGGTGTGTTACCGCACATGCACCAGAAGCATTCTGAGAATTTCTTCAATCTCAA -GGGCAGGTTCCAGCTCTGGGCGCAGAAGGGTACAGCGGAGCAACAAGCTCGCCTGTTGAC -AGCGGGTGATTATGGGTCTGTCCCCAGAAACACGACCCACACCTTCCAGATTTTGGACCC -AGATACCGAGATGGTTGGCGTTATTGTTCCTGGTGGCTTTGAGTAAATTTGCATATACCG -AACTTGTATGAACTTGACTAACCTGAGAAGGGAACTCTTTTATGCCATCGGAACCAACTA -TACCTCGTCCACCAATACCCCCTTCGTTCCCGCTGTTTCCAACAGCTCTTCGGCGCCCGA -CCCCAGCATGATGTCTGCGTTGCAGAGGTTCGACGTCTACGCCCAGCTTGACTTCGAGCC -CCGTCGTGATCTGGTTAATGGCACCGCCCCTGGTGACTCGTGGCACACCGGCGATAATTC -TCTACATGGCTCTGGTAAGCCATACTTTGTTGCGAATGGATACGGTCCCAAATACCTCAG -CTCCAAGTACGGGTACCAGGTCATTCAGCCTCTGGTCACTAAGCAGTCCCAGGATGCCGA -CTACACTCTTTCGACCATCTCGCTCAGCCGCCAGACCAAGAACAATGCGCCCACATATTT -ATTGTCTGGAGCTGCTGCTTTTGAGGTTGTGGAGGGTGTGCTTAGGATTCAGATCGGGGA -TTATCCCGTGGCAACTTTATACACTGGCGATGTAGCTTTCGTTCCGGTTGGTGTGGCTTT -CACTTATTATAGCGAGGTGGCATTTACCAAGGTGCTGTATGTCAGTAGCGGTAGCGATGG -TGTGGATTCGCAGCTGATCAGGGGCGGAAAGCGCTGGAAATTTGCTACCTTTCCGAGATA -CTGAGTGATGCTACAGGGGTAGGTAGCGCCGATTTTATTTGTTTTGCTTATGTTTTAAGT -TTGATAAGAAAAAATACAGACCTGCATGTAAGATGCCAAGAGAAAATTCTCTTCTGATCT -ATACCTTTCTTTACATTGCGTCTGACTCACGCTTCAAATACGGAGTATATCATAAGGTCG -GCCTTGACCTAGTGGTGTCCCTGCGACCCCACTGTATACCCCCCAAAGTTACCCCCTGAC -AACAACCAACTCAACACACAGGCCAGAATGTCTACCCCAAGCCCTCTAGTCTTAGAACTT -GCAACTCTCAATGACCTTCCGGTCATTACAGAGCTATGGTTCACCGTGTTCAGCGATCCC -GGCATGCGCAAGCTATTCCCAGACACGCCAGGGACGCGCGAATGGTTCACTGAAGCCAAC -CGCATCGACATGCTCACAAAGCCGTATCAAAAATACATCAAAATCATCGATCCCAATACC -AAAGATGCGCAAGGGCAGGCACGGATAGTCGCGTACGCAAAATGGGACCTGGCCATGCCC -GACGAGCGCGGGGCTAGGTTCCCGCCTTGGCACGGGGACATGCCAGGCCAAGATTGTGAT -GCTTTCTTTGGGGGACTGGAGAAAGAGCGCAAGCGTGTGATGGGTGATCGGAAGCATTAT -TGTAAGTCTGATTTCGACTAGCCGCGACCTACTGTGTGTGACTGTGTCTTTGCTGACGCC -CTGGACAGACCTTGATATGCTTGGTACGCATCCCGACTACCGCTGTCGCGGGGCCGGCTC -TATGCTTGTTCGCTGGGGGTGCGAGATTGTGGACCGTGAGGGCGTTGGAGCATACATCGA -TGCTAGTAAGGCTGGTGCCCCGCTGTATGCGAAACATGGGTTTGTGGATCGCAGTGACGT -CGATGGACCAAGTGAGGTGGTTCCGATGGCTCGGGGATAGATTGCTATCTCATCGGTCGA -GTATACTCAAAGAGGAAGAAATGCGGCTGTTCGAATTATAGACAGATAGAGTAGTCAAAA -TGAAACATTGAGGGCGTTTCCAGTCATGGCTCTGTGTATTGTACGTTCTTACTTTCTTTT -CGCGGTGGATCCAAGAATACGCTGTTCTTCCATATTACTCCGTCCAGCTGACGATAACTC -TGGATTCGAAAAAGCGATCCAAGAAAAAGATATCTTGACACCTTACTATTACGGGCATAC -TCCGTACGGAATATGCCGTACTCACCCGCGAAAAGCTCTATCACACACATATCACATTAA -AAATAGCAATCTCAATAACGATTACGCAAAGTTCACAGCTGCCAAGATGACCAAGGACGT -CTTCTAGAGTGATTTCCCTCTCGCCAAGCATCTCGGTCTTTGTCACATTCCCGTAAATCC -AGCAATAAGCGACCACGAATGGCACATGGGTAACATATCTCCGGGTGGATAAGACTGCTA -ATATCTAAAATTATCTTCCACGTCCGGATGCGAGATCTTGGCTATCACCAACTCAAACGA -CTTATCACGGATGACCCCAAATTGCGGGTGAGATCTAAAAATGGTAGAGATGTGCGTCGA -CAGGGATGAGGTGCAGAAATTGGCCTTGGGAGCCTGCACCGCGGCCTGATCCCTGTTGAT -CTTCCAGGTCCCAATTTCAAGGGACTACAATGCACAAGGTAACTATGGAGTACGGAGTAC -TCCCTACCTCGACACAGTATTTGGGAGATCAATTCAAAGACTTAGTTTCTTGACGCTAGG -GTGAAGATCCCCTACATCAGCCAGTTTCCAAAAGTTCAAACAGACGCTAAGTGGCTTTTG -CACGGATGTAGATTCGCCAATTACCAAGCAGCGTGGCCTCGACCAATCTCGCCTGTGACG -CTTAATCAACGCCAAGACATCCCCCCCTGCACTACCCCTTCTATGTTGTACTTTTTGTTT -ACTTGACGTTCATCCGACTATTCCGATTCAAATCCGATTCACCGGGATTGTCAGTGGTCA -GCTTACATCTTGAGACCTTTTTTTCTCTGTCTAGCGGTATGATAGCCCGGTGCTGACGGA -CTGTCTACGGGCTAGTACACCAGTATTCCCATTGAGGCGAGCTAACAGTCACATCGGACC -AAAGCATGCATAGTGTCTCTGTTCTTCTCCTTGGCTCGTGGAGACGGTAAACAACCGCCC -CACTATGTCCGACAGATGATTCGATCTCGTCACACTGTTGGTCTCGGCCGTTGCGCTGAA -ATAATCAACTGGTTCCTGGTTGGAGAATCGCCACCCCATGCCTGATAAATAGTACCTGGT -TTATGCGTCATTTCATTCCCATAGACCTTCCCATTGGCTCACTGCATAGAGTCGCACTCG -GCGGTTTTGATTCCGTATTCGGTGTTCGGATTTGCACCGGATTATAAAGTGAGCCCTGGA -TCTCCTATCTTTTCTTTGAAGCTTGAACCTTAGGCAACTCAATTGAGGTTGAGCTTTCTA -TCAACAGACCTATTATTCAATTGTTGCAATGCACGCGCTTGCCCTCGTCACGGGCCTTGT -TGGCATTGCCAATGCTGCCTGTCCTTATATGACTGGTGAGGCCGGCGACAATCCTCACAT -CGAGCGCCGAGGTGATGGAGATGCTGCTGCCAACACGGAAGAATTCCTCTCTCAGTTCTA -CCTCAAGGACCAAGACGTCTATCTAACCTCCGACGTTGGAGGCCCGATTGAGGATCAAAA -CAGTCTCAGCGCAGGAGAGCGTGGAGCGACCCTGCTCGAGGATTTCATTTTCCGCCAGAA -GATCCAACGCTTCGATCATGAACGGGTATGTTTGCCGTATTTCAATTGAAGTTCTCAAGC -TCATGTGTTTTTCTCAAGGTACCGGAACGCGCCGTTCACGCTCGAGGTGCCGGCGCCCAT -GGAACATTCATCTCCTACGGCGACTGGTCCAACATAACTGCCGCGTCTTTCCTTTCCGCC -AAAGGCAAACAAACCCCTGTATTTACGCGATTCTCCACCGTTGCTGGTAGTCGAGGAAGC -GCTGATACCGCGCGAGACGTCCACGGCTTTGCGACTCGGTTCTATACCGATGAAGGAAAC -TTTGGTACGGCGGACCCCTTTCGACAACAAAGAACATAACTGATTTCGATATAGATATTG -TCGGAAACAATATCCCTGTTTTCTTCATCCAGGATGCTATCCTATTCCCGGATCTCATTC -ACGCCGTCAAGCCTCGCGGCGACAATGAGATCCCTCAGGCTGCTACCGCGCACGACTCGG -CTTGGGACTTTTTCAGTCAGCAGCCTAGTACACTTCACACCCTTCTGTGGGCAATGGCTG -GTCACGGTATTCCTCGGTCGTTCCGTCATGTTGATGGATTTGGTGTGCACACCTTCCGTC -TGGTTACTGACGATGGATCGACCAAGCTTGTGAAGTTCCACTGGAAGGGATTGCAGGGCA -AGGCTAGTTTTGTTTGGGAGGAAGCGCAGCAGACCGCAGGTAAAAATGCGGACTTTATGC -GTCAAGATCTCTTCGAATCTATTGAAGCTGAACGTTACCCCGAGTGGGAGGTATGTAATA -ACCGCATCTTCGTCGCAATCCTTGCTAAATATTCCTTAGCTGGGTGTCCAAATCATGGAA -GAAGAGGATCAGCTTCGATTTGGATTCGATCTTTTGGACCCTACCAAGATTGTCCCAGAA -GAACTCGTTCCTGTCACCAAGCTGGGCAAGATGCAACTGAACCGCAACCCCTTGAACTAC -TTCGCTGAAACAGAGCAAGTTATGGTACGTCTCGGTTCATCAATACCAGCGAACAATACT -GATTTCTTAGTTTCAACCTGGTCACATCGTTCGCGGCGTCGACTTCACCGAAGACCCCCT -CCTCCAGGGCCGTCTCTTCTCCTACCTAGACACCCAACTCAACCGCCACGGCGGTCCCAA -CTTTGAGCAGCTTCCCATTAACCGACCTCGCACACCAATCCACAACAACAACCGCGACGG -AGCAGGTATGTACCTACCACAATTCCAATCCCGCTCTGAAACCCATTTCTAACAACCCCA -GGCCAAATGTTCATCCCCCTAAACCCCAACGCCTACTCCCCTAACACGGAAAACAAAGGC -TCCCCAAAACAAGCCAACCAGACCGTCGGCAAGGGCTTCTTCACAGCCCCAGGTCGCACA -GCAAGCGGCAAACTCCAACGCACCGTCAGCTCAACCTTCGAAGACGTCTGGTCTCAGCCA -CGACTATTCTGGAACTCGCTCATAGCAGCCGAGAAACAATTCGTCGTCGACGCAATGCGC -TTCGAGACATCTAATGTCAAGAGCAGCGTCGTCCGCAACAATGTAGTCATCCAACTAAAC -CGCATCAGCAATGATCTCGCCACGCGCGTCGCAAAGGCCATTGGTGTTGATGCGCCGAAG -CCAGACGACTCATTTTACCACGACAATACCACCGCGCACATCGGCGCCTTCGGCCAGAAA -CTCGCTAAATTGGATGGGTTGAAGGTTGGACTTCTTGCGTCTGTGGATAATCCTGATTCC -ATTGCGCAGGGCGCGGAATTGCAGTCTGCTTTGTCTTCTGCGGGCGTTGATGTTGTTGTT -GTGGCGGAGAGATTTGCGGATGATGTTGATCAGACTTATTCTGCTTCTGATGCGGTGCAG -TTCGATGCTGTTGTTGTTGCAGATGGGGCAGAGGGTTTGTTTGGATCGAAGTCGTTTACG -AATCTGCCTAACAAGGCTTCCGGTTCGTCGTCTTTGTATCCCGCTGGTCGGCCTTTGGAT -ATTCTGCTTGATGCTTTCCGATTCGGGAAGACTGTTGGTGCGCTTGGTAAGGGTTCTGAG -GCTCTAAAGTCTGGGCTCATTTCTGAGGACCGCGATGGTGTGTATACTGCTAGCAGCGCT -GGGGATGCCTTTGCCAAGGATGTCAAGGAGGGACTTCGGACCTTCAAGTTCTTGGATCGG -TTTGCGATTGATGAGTAAAGGATGTCGGTTATTTTATGACTGTATGTATGGATATATCAA -GTTTAAATAGGTGTGGCTTCGTTCAAATTTTCACAATCTACAACTCGAGATTTATGCTTT -GGAGGACACGCGATCTGTAGGCTGCGCTCAATAAATTTGTATTGGGGAAGTGTCATTTAA -GTATCAAATAGAAACGAAGGTCATATTTACAAGAGTCTCTTCGTTGGGTTCCTGAGAAAG -TTCAGGGGAATAACAGGGCAAGTCAATCAACCTCTAAAGCTAGAGAAGCACTAGCCACGC -CGACGCATAAAGGCATCTCTATCTGATAGTGCTGTTGCTGCACGCAACTTCATCCTCTTA -TTGGCAGGTTTCGTGCCCGAAGGGGCGCGCCTCTTTTCCTGACCGAGAATAGCCCACTCG -GCAACAGTCGCACCTGTTCCATTTTTGGCCTCCAAATCGACAGAATCAAGCGCCTGATTT -CCCACAATAAGCAGAGACGCATAGCGCGCTGCTGGCACGGGAGACGACAAAGTCACATTT -GTCGTGTTTCCAGTGGGCATGGCTATGGTATCAAGGTTTGTGGTGTCAGGGTCGTATGGG -TTCGATTGAATGATGTTTGTCAGGCTGTGAACAACTTTGTAGTCCGACGTTGGGGCGAAA -AGCGCCATCGTAGGATCTTCCAGTGTCTTGTTGTGGAAGATCACCGTAGCGTTGACTGGG -GGTGCATGCGCCCAATCGAAGTAGAAACCTGATACCATAGCGCCGGCCTCGTCTTCGAGG -GAGACTGTCACAGAGTTTACCTCAGCTGCCAGTGAGGGCTGCCATCTTGTCGAAGCTGCG -CCGTCTACTGCGGAAATGGGGAATTGGCCAGGTTCGAATGCATCCGAAGAGGAAACAGCC -TGGCACTGTACAAGATTTCCTGCCACAGTGTTTTTCGATCCGCTCTGTCGGTTAGGAAGA -ACAACAGAGCCCTTGAGAGGAAGATGATAAATGGTGACATCCTCCTCTGGGCCGGAGTGA -ATGGTGATTGTTTTGTTGACGAATCGCTGGTCGGCGGTCTCCAGTGGGGCAGTACCTATG -GCATGGCTGATAGTCGTATGTGTGTAGTTTGACCACGCCGAGATGGGCCACCCCCGCCAG -TAGAAGGTACGATATCTCAGGTATGGAATCTGTGGAGGCAAATTTGGATCCACATGAAGG -ATATCATCTGGCACTAGTCGCAATCCAAGATAGCCATAGATGGCAACCTGATTGGAACCG -CCGTGGCCAGTCAAAAATGGGTACGCTGGATGCGTGCCTCCATTAATGGTAGCGTCGTCT -ACCATCTGCTCCGACAGCTGAAAGAAAGGCGCACGGGTATAAGGCTTGTATGAGTACTGC -GCATATGTGTATGCTGAGCAGCCAGAAGGCGCCATTTCGTTTGCGACAACAGAAAAGATA -GCCCAGGTCATGGCGGGGCCATCAGGAGACTGTTTGCCTGCGTACTATATCCAGTTAGTT -CTGGCGTCCTTTTTTGCCGAAGGTCAACACTCACATAGTCCAGGTCATTCAGCGAATTTT -GGCTGGTGTAGTTGGCACTGTAACCCAGGGGATAGGTGACCATAACCACATCAGCTTGCT -TGACCACGGCGCTTCCATTCATGGTAGTGAACTCAAGAGTTACCTCGTTCTCACGAAGCA -CCAGAACATTGTCAGCCATTTCATCCCAGGTGGCATTTTCTTCGATTCCAAATTGCTGAC -GGAAAGCGTTGGCATGTCGTAGGGTCTCAGCCATGAGCGGCATTGTGTAGCCACCCGCAT -CAACGTGGTTGGCATACTCATCCTACTTCATGTTAGTTTGGATACTGAGAGAAATGGAGC -GTCAAACTTACAGGGTCAGTCATGTTGGTCAACGTCCAATAAGACCCATTCCGAACAAGC -AAGTCTGCATAGAGCGTGGCTGCGGAATCATAGATGGGGAAATGCTCTTCCTCGAAAAGC -TTTGTGTTGCCACTTGTCACCCATTGATTAACCAATGAAATTCCAATGTCTCCGTTCAGG -TGATACTGGTAGTCCCAGCAAGCGCCAGTTGCCGTGCAGTTGCCAAACCGGCCACTGGTC -CAAGGGAAGATTGCAGCTGATGGACTAAAGCGTGTCTGATTCTGAGAGCCGGAGAGAGCA -GTATTGATGTTCTCTTGTGCTTGGGCATACTTGGCCACGCGGAAATTGGTGAAGCGCTCG -GACGCCTCTGGGTGAGAGGCAACAAGACCAGGTTGCATCCATACATCCGAATCCCAGAAG -ACTTGACCACCATAAGAGTCAGAGGTCAAGCCACCAACTGAGATACTGTCCACATTTACT -GGCGCTCCAGCAGCTTCCTTGACGCCATTCTTGCCAACAGTATTCTGCAACAAGTAGTAA -GTATTGGCCACGGAGATTACTGCAGAATCAATGATATGGCTATCTGTAGGTAAGGTCCCG -TTAGGGAATGCGTAGTGGTCAACCGAAGCTTCGGGCATAACATTTTCCCACTCGACGATG -TGGGTTCGGAGAGACTTGGTGTAGCCATTGCTCATCGCGGCGGAGGCAGCCTTCTTCGCC -GTTTGTTTCGGATCGCCAAAGGCATCCGTAGAGGCAGCTCCGACATATTTGATGACGCGA -ACTACTTGACCAGGTTTGAACTTCACATCCACGGCTTGCGCAATGGACGATTTGTTGGAG -TGAACATATGGCTTGTCGGACACGATCTTGCGCGAAGAGAGATCCACATTTGGTGACCCA -GTGAGGTTTGCATAGACATAAGCTGTGACATTGGCAATCCCATTCGGTCGTACTGCTGAG -AAGATTGCACCATTGTCTTCGCCAGACTCCACGAAGTCGGTGCGCACTGCACAAGCTCCA -TCGATGACATTAACGATGGTCGCATCCGACCCCGTAGTCGGAATAATCTCCATTTCGACG -ATAGCCTGGTTGACGTACAGCTTGTGGGCGAGAAGACGATAGGTGATCTGGTACGATCCT -TTCTTGCCCCGAGGCGTCCATTTGTAAGACCAGACCAAGATTCCTGCTTTGAAGTCGTAT -GTGGTTCGGAAATTGGAGATGGTCTTATTGTCAACCTTTGCATCCAGATAGGTACCATCA -CCAAGATCCAAGATGATTCCCGCCCAGTGAGGAACACCACTGATCACACTATCATATCCA -TACTGGGATAACCAGGGAAAGTTGCTGCCATTTGTGGTAGGCTGTGAGTCAAAGAACCCA -CTGATCGTTGCAAAGGTCTGGCGTCTGGAGAAAAGTGGCCAGCCGTTGGTGTCATCAGGA -CTATCAATTTCAAAGAAAGGACCAGCACTTGCAACATTGATACCCAAGTATCCGTTTGCT -ACGGATCCTCGAGACTGATAGCGGCCCTGTTCTAAGTTGGTTGTGCTCAGGAGCCACTTG -TCTTCGTCCCAGGTCACGCCATCGAAGCCGGTCTGGTACACATTCTCGCTGAAGTTTGTT -GTTGAGTTAGAGCCCGTATGTTCTCTGAGGCATCTTGTGATACGACTTTCGGGAGAGCCG -CTGGCAAGATTCAGAAGTAGTGGCAATATTGCGCACGTAAAGGACTAAGGAGAGTGTTAA -GATGATTTGGGAAGGCATACGAGTACCAAAGTACCAAAGTACGAACCTTGAACTGCATGT -CGCAGTTACACCACTTGGCCAACCAGATCAGATTCTTGAAGAAATCACTCGAAAGCAAAT -TGAAAGAGAAGAAGCGATCGGCACTATACATCGGAAGAGGAGGGAGGAAGGCAAGTTTTA -AGGAAAGATGGAGATTTAAGGGCGCTTTCCTAACTCAGCAACAAATGAGATGATCCAGGC -ACAGTAATAATCTCAAAATCGCTCAAAGTAGCAATGAACGATGAAATGTGGGGAAATAAA -GTCTGTTCTCAGTTGATCATACGTCTTCTAGTCATAAGTGGAGGTCTGCCATCAGCCAAG -GGGGAGGAATGGCGTACGAGGCTGGCTTACACTATGTAAAACGGGTGTGTCCGGTTGCAG -GACCTTCGGAGAACCGGGAAATGCGGGGGGTAACCCACAAACATGACAGTATATATCGAC -TCACATTGGTTCATATTGGCTATGAAGGCCGACAGCTTAGTTTCCATCGGCATCCTTCAC -GGCCAAGACCTGCCCGGGTTAGAACCGGCTCGTCCCAGATCGTCATCGCGTACAGAGAAG -GTTGGATGCGAGGGGATTTTGTGGGGAAAATGGGGCTTTCATTCGTAAATTATCCGAGAA -AATCCCGTTTGAGGCTATGTAATTATGAGGGAATCAGAGTATTAAAGTGATCAAGATTCT -GGCTTCAATCTTAGAAGAGATGATGGTGTAAAGGATGGAAGGTTGAAGTCCCACGGTTGA -GAGAGATTGGTGCCTAAGGTGTCTGTCTATACAACACGCAGATTTTCACCCATCACATGA -TGGCTTGGAAATACTAAGATGCAATCTGATGTGCTTAAAGTTGTTTGGTTTCATTTCATA -GGTCATCTCAGATCATGTTGGCACCTGGTCCCCGGTGGATCGACCCTTCTCTGCACATGA -TCTCCAGTATTTACATATTGAAACAAAATAGCAGGTTGAGCCTTGTCAAGAAGCCCAGGA -ATGGGTTCGAGGCGCGAGATGGGAGAACATCTTGAGTGTCACTTCAGGCAACCTGACAGA -AATCCTTGCTCGATATTTTCCGCTTTCGTTCATTCATTCGTTGCTATTTGCATGGGGTGT -ATCCACGGTGCGGATACGCGTCGTAACTAACTTTTGGTTAAACCGATTTGGCAAGACTCG -AGGCAAGTCTTATTTGCCACGCCTGCGATGTTCAAGTATGTTCTTCGCCGTTATTTTCGT -CGTCGGAAATTTCATCAAGGGCCATAGGCGCAGCAGGCGAGGACTCCCTCTGTTGTGGAA -CGGTGTTGCTTGACCTCGAGTGCAAAGTGAGCGCTCTCTCACCCACAAATCTATCACCGG -GCCAATCCTCTTTACACACCGGGCATTTTTCTGCTTGTTGCATGCGGAAGAAATTGCGCA -TGCAATGGTCATGGAGCCGACCATTGCATCCATGGTTTTCGCAGCGTTGGCCCTAAAGTA -TCTAGATCAGGTCTGTCTCATAGTAACTTGGTCTCAAACGTACTTACCACGGTGATGATG -TCTTTGCAAGCAGCACAAAACTTGATGCGCTCTATGGGTCGGCCATCGAAATTCTCGTCG -TTATACGTTAGGACTAACCAGCTTCGTAATTCCATCAGTGCTCGAGGTGTGAGGCTGAGG -TAGCCCTTCGGGCTTTTTTGCAACCAGCCCTCTTCAATCAAGTGGAGGATCATAGTTTCC -GCTTGGGTCATACGCAATGACTGTACCGCTGCGGTTTGGGTCTGTGTTGAATCGTTACTC -GTGGATTGCCGTTCGCCAGACGGAGCCTTATGGAGTTGAACAGCTTGCATCTGTGAAGCT -ACCATTCCTTCACACACTCGTGTATTATTTGTGGCGAACATGAAGTCAAGCAAGCGCTTA -AGAAATGCTATCTCGTCTGGAGAATAAGTGGTCGCAAGCTGGGTCAAAGGATCACTCGTA -GTATTTACAAGCGCATATACTCGCTCTAGTGGTGTTGTCGAGACATCTTCCTGGGATGGT -TGAAGGATTTGTGGCAGTGAGCTTCTAATCTCGAGATCGAAAGGTGACACAGCGGTGTTG -GCGGCCGAGATAAAGTCGGAGAATTGGTCTTCTCCGACCTCGTCAGGATCAACCGTCCGT -CCTTCTGCTCGAAAATTCTAACGTTAGCGACTTCTGGGATTCAGACATGGTGCTTAAACA -AGATTTCATTACCAGATACTGTTAATATCGCAGCTAGTATGGGTTGCGCATCTTCAAAAG -TCATAGACGAGCGTGCCATGAACGCCTGGAGAAAGGCACGATTGCTGTCGTTGTATTCGG -GCATATTTTCGAAGATGCCTTCTGGTCTAAGGATCCCAGAGTTCGCAATATCAGTCAGTT -CAAGGTCCTTTGTGCCAACAGAGCAATGGCTTTGACGCGATTTGACGCGCTCGCGTGGCT -GTAGATGAAAGAAAGATAAGAACGCCTGATAAGGAAGCTTAGTAAATTTGTTTGTTCCTG -AGGCACCATATTGCTACTAAACTTGAAATAACTGAAGAGAGAATTGTGTAGCAAACACGG -GCCCAACACTTGTCGAAATGGCTAATAAATCCAGGGGAAACGATGTCTGAACTCAGCTAG -GATGGCCGGAGCAAACAGGTACCAGGCATCTCTGTAAGTATATGATGCCGCGGGCATCCG -CCTTTGAACAGCCCTGGCTCTATGTAGCACTCGTAGACGGTTCAAGCATATACTATGATG -ATGGGTAATCCTTCCATTTACATAGAATCAAAAGGATGAGTTTACAAATCTACATACCAA -ATTAGCTGAAACTGAAGCAGCCGAATCTATGTGTTCCTACATCTATGACTCTAAGGCTTG -AGAGGAATTGAATGTCCATGCTCTCGAACATAAGGCTCTCCTTTGATCGAATATGGAGTA -TCCATGATTTGAGGCTCTAGAAATTTTAACGCTATAGCCCTAAATATGCCGTCAAGAATC -ACACAACTAGCATCGTGGGGAACGCCCTAGTGAGATCAAGATGCAGGGTGACAGTGCCCA -GGTGAGCCGTAGATCATTTGACAAGGTAGTAAGCACATGGGAGTATCCTCAGTCAGATTG -GTCAAAATTGAGGGTTGGGCATTGATTCACACATGTAATTTGTCGTCTGCAATCTTGCGT -ATAAAAGATTGAGCGGTTGGGGTTCTGTGGGAAATAAACATCTACATTGTGACCTTCACG -GTTTGGGAATTATCCCGTGGGTCTTTTATTCCGGATATTAAGTCTCAAATTTCGGCTCAT -TTACAATCTTGCGAATATAATAGAGAAAAATGAATGTCAAGTACATTATATATTCTTTCG -ATACTTCATTCCGCATGTGCAACCGCTTTGTCAACTATTCTACCAACCTGAACACACTCT -TCGCCTTTTTTTGCACAAATATCACTGCAGTAACGACAGTAGCGAGCGTGATGTATACAT -AGTCAGGTTCTATTTCACCGACTATATGTGCGATATCACAGGTTTTGTGTCAAACGACCC -GAGAGAGCCCGAAGATGTGGACAATAACTCTAGCAATTCAACATATAACCAAAATAGATG -ACGAGGCCACGATCATGCACTCTTCATGCGACGCATCAACCTCGGTATCAAAGCCAAGCC -ATCCAGCGAATTCCATGAGATTGATGGCTGCAGCCATGACCACTGGAAAGCCACCCGACT -CCAGCTTCGGCGATTCTGGCTCGTCGACTTTCGCTAGTACGATTCGGTGTTATTGGGAGC -CTTGTAGACGCTGGTGCTACTGGAGCCGGACTTATCTTCTTACTCTGGAAAGAGAAGTCC -GGTACCACTCTGTTCCAAAAAAAACCCCCCAGGTCATTGTTTCTGGAGTATGCAGCAAGC -TGCAGAGGTTGAGCAAGCGACCCGGTCCATAAGATTGCAGAAAATCCAGGAAGCATCCAT -CATAATGCATCGCAGGAAATCTGTTCCAATGTCTTCGTTGCCGATTTACTCTTATCAGGT -GAGAAAAAGCACTATTGCATGCATTAAAGTGATGTACCAAAGACAGACGTGTCGAGTATA -TACAATCCATGGAACTACTATTCGGTGCTTGCGAGGCACATGTGCATGATTCGTGTGATG -TTGCGGAAATATCAGTTATTATTAATAAGGGAATTGCATTTGTAGACAATGACTTATGTG -AATTCAGTGTTACAAAGCGAAGTTTCAATTCTGTGCCATCGAGTCAGTTTCTTTGACCGA -TATTACTTAGTGGGTGCTAATCACGTACCAAGTTTGGATTCACCTAGAGTACAGGAAGGG -AATTACTGGGCAAGATTCGTACATGCAGTCTAGCTGTGAGTTTCATTGATCGCGCAAGTC -TGATATCACAACTGCAAATGCCAATTCAAGAAAGGTATAGACCGAGACCCCATGCCCTAG -ATGCGCCGCTGCGCAAAGGAAAGTGGAATCGAACCACCGACTGGTATCGCCGAACGCCAA -AGGATCAAATGTAGTGACAACAATATGAGAGAGGGAAAGTCAAGAGCGGTGGCAGACCAC -TAGTGTCTACGCCGGGCGGGCTCAGGGCTAGGGCTTCGGCTGACACTGCGGGGATCTCTG -CCACGCTCCTTGAGCCACTTCTTGGTGCTGAGTCGGAGTTCGCGGTCAAGCTCGGGTGAA -GGGTCTTTTCGGTGAATGAAGTTGCAGAAGCCCCCTCGTACACACCCTTCACCGCTATTA -AGACGACAACAGGCTTCTCGGAAATCCGTGACGGGTGATAACTCGCAGTATATGGGTCGC -GCTGCGTACCAACGGGAGTTCAAGGCATCGCATGCTGCCTGCGCATCTTCTTCGTATTTG -AATCGGCCGTAAACGTTACCAATGAGATCTATTGTCGCGAGACCAGATGTTAGTAAAGAG -TCGGACATTAAAAAGAGAACATCACTTACGGTCGTTGTTGTTGTCGCAGATGACCAACTC -TTCAATCTCGCCGTACTTGCACAACTCACACCATACGTCCTCGTAGAAGGCGTCAAAGTG -ATTTTGCATTTGACTAGCGTTCATCTTGTTCTTGGGGTCGTAGGCCGGGTTCTGGTACAT -GTTCGGCATGAGAATGGTTTGCGAGTACGAAGGCTTGACATGCTTGCGAGAGCAGCGATC -GCCGTGTCGGCATGCGCCAATCTTGTAGTAGAAGGAACAGTTGACCTTGTCCTGTTCGGT -ACCGAAGATAGATGCAAGATAGTTGGCCATGATTACTTGCTGTCAAGCTCTGTTCTGGGC -TGGGAAAAATTTAATGAGTGGTGATACGAGATGCTCAGTTCGCAAGAGAATCAATGCAAA -TGAAGGGAGGGCGAAGTTGCTTTGAAAGCTGGATGATATCGGCTGTCCGAAGTTTCTGGA -GAGCTCAAGTGGCGCGAATAATTGATGACTAAACCTGGGGAGCTGCTAGGCCGTATTATA -TATTTTCCGGGCGGTGGAACCTCCGAAGTATTCGGCAACAATCGTATGTTGTATCCGCAT -ATTAGGCGGTGTGTGAGGACACGAGTCCAATACTTATGAGGTCTCACCACATAAGCCTAC -ATGTATGTGTGTCGTAAAACTATCCAAAGAAATGCACCTTTTCCACCACATATATTCTAC -ATTTCACAGCTTACCATCGTCATTCTATCAGCTATAGCACTAGTATGTGGGGAAAAAACT -TGAAAAATAATATCGAAAAGACTTAAAAAAAGAACTTTACAATGTTGAAGATGTATTCTA -CTCGAGCAGAAGCTATGTACCTCCAGAGCACTCTGTAGGAATCTGTCTGTAAGTTTTTAA -CCTGAGGTCACACTGACTGAAGGTCATTTTTTCTTGGCTTGACAGGATTCTATATGCATC -TTGTCTAAACTACATGTATTAATACTCTTGGGATAGCTTATTCTTTCCCTCGGGCTCGAT -AATGCTCTGCCTAGACTTGCTAGCCGTGGGTGTAGAATAAGGCCGATATCAATTGTGAGA -GCCAGCAATTAAAGGGTTTGGACTTGAGGTTAGGCTTATTCGTTCCTAGGGCAGTTTGTC -ACCATGGCTTGAATCAAATCATGAGCAGTAAGTCTCAATTGAAGAGTTACCTAACAAAAC -CATGAGCTGATTTCAAGAATATGGATTTGAATGCGagatcaaggccaaaacaaagacaaa -gagaaagTGAATAGTGATGAAAAGAAACACAGATATCGGAAGGAGAGTGCGGTAACTTGG -CCATGAGTATGTTCTGGGTCTTTCCGAGACAGTTGCATACTGCTATAAATGGACTGGGTT -GGTCCCCCGAGGGCGTTGGATGCGTAAGGGTTGGTTTGTATCAACATTGCCAGACATTGT -GTGTGTGGCTGATATTATCATCGGGCCGACCGACTCGAATGAGGGGTCCAGGCCCTGTCA -GGTGGTTTCAAGATGTGGTGTGTTTGATAATGTTTGCGTTTAGGGTTTTCTGGTGCTGGA -GGTAGAGCCGGGGCTAAATTGATTTGGGATGGAGGTAAGATGGAGGTAAATTAGGCGGAT -TTGAGTTTGGTCATTGTGAGCTAGATATCTGATATTAGGACTTAGATGAAGGCTGATAAT -ATTAGATCGTAGGCTTCCAAGGTCACCTTTTTCAAAGAACAAAGGAATGAGGTAAGAGAC -AGGAGAGACCATGAACCCAGCCAGTGAAAGATAGCCTGGTCTGCCAATTTGATTACATGG -ATTAGTGATATTGACGAAATAGACTGAGACATCTGTAAGAAAGGGGAGGAATGTGGCTGA -GCCTGAGTAGTCGCAGTACCAGGAGAAGATGCTAGGGTATTTGCCAGTGTGGTTGATGAG -GAGGTCATTGAAGCTGAAGTGGTAAAGCTCTGGGGCTTGTCTATGAAACCATTTCAAAGG -TGATGACTCGAACGCTCCGAGATATTTCAAGATACAGTCAAACTCAACCGTCTTCTGTAG -ATATCCACGCGTGTTCGTACGCGGAGGCATTCTCCATCTTGGCTAAAGATCTCTGTGGCG -TCGAGGGCCAGAAAAATGGGACTGCTTCGATGTCAAAGTTGCGGAAATCGAGATCGAGGA -ACTCGGTATCAATCTCGAAAAGTATGGAATTGATGAGATCAAAGGATAAATATATACATG -ACTGCCATATTATTTTGCTGATCTTGAGGAAAGTATGTCTATCTCTTTTCCTATTCTAAA -ATGCTTATAGCTTGGGACTAGGAATTTTGTCTTGAACGGCCAAGCCGCTCACATCCACTG -AAGGCTTTTGCTTTTTGATCATCTCAGAAACAACCTTTCCAGTTCCCAGACTCAAAGAGA -TTCCCCACGCCCCATGCCCAGCGGCTATGAAGACGCCTCCGTTGGGGTTTGCCTTCACTC -CTCCCAAAGAGTAATCTCCAACTCTTCCAATGGTGGGAACACCAAGATTTCCAGTATTTC -CCACTGGGCGGAAGCACAGCCCTTCACGGATAATCTCTAAGTCATCTGTGTTTGCCACCT -CGTCGGTAGCCTCATTTGCATCATTGGGAAGGCCTCCCATTAAACGGACTGCAACATCTT -TGAGTTTTTGCATCTCGGAAGGATCAAAAAGCTGCTTAGTATCCGCGGAACAAGATGGAA -GCTTCATCGCGGGGTTGTTGAGACCAGCAATATAAATTTCTCCGCCTTCCCTGAGGAAGA -TCTCGGGGCTAAACCCGCAGGACGGAGGGTGTGTGGTGAAGACAGCATGGCTGCTTCCAC -GGTAAACGTTCGTCTCGTGCTCCATTGTATACCGAGGGGATCGAACAACCAGTGAATAGC -CAGCCAAAGGCGAAATATCAAATGATACCTGAGAGGAGGGAAACAGATCTCTGAGAACCT -GTGGTGTCCACGGTCCCATGCTCAGAACAAGGTTTGTACACGGAATGATTGACTCTTTCT -TGGAATCCAATGACACGATTTTGACGCCGGTAATGGTGCCATTTCCATCCTTGACTATTT -CGGTGGCCTGGGCTGGGTTATGAAGCTTAGCACCTCGAGATATAGCACCGTCAATGAGGA -ATTGGCTCAACCGTAATGGGTCTCTGAACATAATCAGCAAATCATCTGCGGAATACTTAG -ATACCATACTCACGCTTGAGCCACGGTTTCGCCCTGGCTGATCTTCTCCAGGATTCCCTC -TTTCTGCATGGTCAACCATTCCGGAAATTCAACCGGAGTGGTATCCTCGGATCCAGCTGC -GGTTTCTGCTCTGCTAGTGCCCGTTCGTAACCAATCGTCACCGCGGGGGCCACCTCGTTT -GCGTTGAGCAACGGTATCCAAATTGAATGCTGTTCCTTTCATAAATCCCCACTTTTTGTC -CCCGCCTTGCTCTGCAGCGAGCTCATGGTGCAAGTCGAAGGACATCGCACCAAGAGGCGC -AACAGATGGCTCGAACCAGTCTCTGGCTAAGAAACCTGCGGCATATCCCGAGGCTGCGGT -GAATAGCTCTGGGGATTGCTCAACGATGTGGATCTCGCCTTCTGGTCGGTTGGCAGATAA -GTAATATCCAATTGAGGCTCCGATGATGCCTCCTCCAAGAATGACGGTCGACATGCTGGC -TGTGCAGAATTGAGAACGCTTGTCAAAGAAGATTGCCGGAATGAAGTGTTTGTAGCAATC -GATGTTTGCAGGTGGCACCGTTCGAGCTTAAGTTCCGCGGCAATCTATGATTTTCCTTTC -CGGGTTCCTTAACACCGGGCCGGGAATGCATACTAGTGGTGGGGTGGGATAGCGGGGAGA -TATTTCCTGGCTTGGCACACATTTTGGGAAGAGCTGGTCGCAAAAGTTGACTCTTTTAGG -CGATCGTTCAAGTATTTCCTATCAAAAGTGGATCTCTCGGACCGCTTCTCTGAAATGAGC -AACGGCGTTATCTTCCATTTGCTATGCTCCATATGTCCAATCTCAAAATATATATCAACG -GATTTTCCATCTGATAACCAGTGTCTCCAAATATCGATATATGATTTAAGGTTAAAATAA -TTTCGAGATCACAACCCGAAGACTATTCCTTGAGGCAACCACACTGTGGATGATACCTGC -GAATCCGATGTCTTATCTACATATCACACGATGACTTCAAATGTTGTACTCCAGCTCAAA -TGGATTTTCAAAGGTCCTTTCTTTCCATCACATATCACTCCGTATAATAAGAGGCGTACA -TGACGTCTATATCTGGGCCGGGCTGGCGGGGTCGGGAAAGTCTCCGAGATTACCCTACCA -TGTCCGGCTGGGAAAAGACGCTAATGCACATAGCCCCCAGTGTTACATACGGTATTATTT -CAAACGCATACAATGAGTTAATTAGAGAAAACAGTTGATCAATGTGGACTTTGGTATTGG -TATTACTGGGGCAATAATTTCTTGAAGTGAGCGTTTCTTCTCGGTCGAGCTGCCGTGGTA -AGGTACAGATAAGCGCCGCACTGAACCTTTCGCGCTCTGCCTCTGTAATATCCTCAATCA -AGGTTGTCATCATTGATATTTCGCAACCAAAGCGTCTGATCAATTTGAACGTAAGTAATA -CCTGCTGGCTATCCAGAGCACCCCAAGTCTCGAGTCACTGGCTTTACAGCCATGTTTGTT -CGTCCATATCCCAAGACCTTCAATTCCTCAATCCAAATGAACCGTCTTTTGACATACATG -GCCATGTTGAAATTTGCTACCAAGCATGCTCCTTCATGCTATAAGCTCGAATTCAACATC -CTGTCTAAATTTTATTCAACGCGGTTTTTTTTTTTCTACTGGAAGCTTCATATCACTAAC -ACCACTACAGCATTCATCAAAATGGCAGCCAAGACCATCATTGTCACAGGAGCCTCAAGA -GGTATCAACCCTCGGGTTTTGTGAGTTGTCATATACTGAAATTAGTCCCCTAGGCATCGG -TCTTGCTGTGGCCAAGTTTCTGCTCACGGCACCCCAGTCCCATAATGTGGTGGTGGTAGC -CCGCAGCGTTGAACCCCTCCAAAAGCTCAAGGAGCAATACGGCAAACAGGTGGCAGTCCT -GAATGGCGATCTGGCAGACTTTTCTCTTGCCCAACAGGCTGTCGACATAGCCATCAAGAG -CTTCGGTCAGCTCGATGGAATGGTCCTCAACCACGGTGTGCTAGGCCAGATCGGCAAAAT -CGCCGACGCTGATGTGCAACAGTGGAAGGAAGGCTTTGATATCAATTTCTTGAGCTTTGT -TGCTTTTGTACGCGAAAAAAATCCCACCAAATCCACCAAGCATTACTAACAGAACTTAGG -CTAAGGCGGCTCTCCCGGCTCTTCGTGAGTCCAAGGGTAAAGTGATTTTCACTTCTTCCG -GTGCAGCAGCCGCCGGCACCGGTTACCGCGGTTGGGGTCTCTATGCTGCCACCAAAGCCG -CCATGAACAACTTCGCCATGAGCCTGGGTGCCGAAGAACCCGACGTCACCACCGTTTCTA -TCAGGCCTGGTATGGTCGACACCGAGATGCAGCGCGCTCTACGGGAGGACCATGCTACGG -GACTGGACGCCGAGATGCACTCCAAGTTCACTGGAGTCCACGAGACTGGCAAACTCCTCA -AACCCGAGCAGCCTGGCCATGTCATGGCCAAGTTGGTGCTTGATGCGCCGAAGGAACTCA -GTGGTGGTTTCTATTCGTATGTGCTATCCCACTCAAATTACGCGAAAAATACTGATAATT -GGCAGATGGAATTCCAAGGACCTCGAGGGTTTCCAGAGATGATATCATTGATAGCTAGTA -TATATTGTGTATATTTTGATCATAGTTGTTCATATCTGTTTTCTTTATAGCAAGTCAGTA -ACGATCGCCATAGAACAATGAGTGCAAGAAATGAAGCAACGATTGTTTACAAGTCAGTCG -AAAGATGAAAAGCAATACATTTTATAAGCATTAGGTGCAGCACACCTTCATGTTTAGCAT -GGAATGTAAGGGTATACATCATGTAATTGGAACTGAGAATATAGCAAAAATTTCTGCGCG -TACCTTTAGATACAGATGTAGGTATTCCCATCCTCCCAATCTTGTTCCGAGAATATAATA -TATATGCAAGCAGAGACGAGTTCCAGGCTACAATAACTGAGTTTCATGTCCTGTAGTAAT -CTTCAATAAAGTTAGCAACAGCTTCAAAAAAAAGATGAAGTGAGATGCAGATAGGCAGAA -TTGATATAGATCAGATAGTCCGTTTGGCTAGGACAGGTTTGGTCACCATGGAATGTATGA -TAGGGGCGAACACAATAAATTCATCATGATAGGAATTGGAAAAAGGAATTCAGGCAAGAG -CTTACCATTAGTGTTTAGCCTTGGAAAGAGTCAATCTCATTTGAAAGACTCTTTCCAAGG -TAGTTGGAGTGGAGTGAAGACTGATGAAGAAGAAGTGAAAACGAGAAAAAAATTCAGCGC -ATTTTATGTAGTTAAAAGCGGTATAATTCTGCCGGCAAGAAATTGTACTTAACTCCGTCG -CACGCCTGCGATGGGCCTTGAAACTTTGAGCCCCACGCAAAAGATCAAAGAAATTGAAGC -ATCCCTTTTTGCGCTTCTCCCTTAAGATCTGGTACTTACCCTCTATCCGCCTGTCCCTAC -ATCACCTGATTCGCTATGTCTGACCGAGAATTCAACTGTAAGTGATCTCCAGATCTTGGC -CGCCTACGCGCAACATGCTGACCTGTACGTGAATAGCAAACGATGACCTATCGCTGCCTA -AAGGTATGGTACAATGGCCCTTATGGATGTGTATAACTATTCGTGAGGACCTTTATCGCG -TTACTGCATATGCACTTGTAGCTTCACACACGCGTCTGCTAGTATTTGCTAACGTAACTC -TGTGATAGCGACGGTCCAGAAGATTATCACCGAGATTCTACCACCTAGCTCCGGCCAGAC -CTTTTCTAAAGATGCGCGCGACTTGCTCATGGAATGCTGTGTCGAATTTATCACCTTGAT -CTCCTCCGAAGCAAACGATATCAGCGAGAAGGAGGCCAAGAAAACCATCGCCTGTGAGCA -TGTAGAGAAGGCATTGCGGGATCTCGGGTTCAGTGATTACATCGCGGATGTACTTGCTGT -TGCTGAAGAACACAAGCAGCAGTTGAAGGTATGTTCGGGTTTTATTTGGAGCTGTGATAC -CTTAAAGCGCATGCTGACCTCTGTTAAATATAGTCACGAGAGAAGAAGCAGAGCAAGATG -GAGCAAAGTGGTCTGACGGAAGAAGAGCTTCTTAGGCAACAGCAGGAGCTCTTCCGGTCA -GCGACTGATAAATACCATGCTGTCCCAGAATAGAATCACCTAAGACTCGTGCTGGTCATA -GAGTGAAAAGGATGATGGCGTTTGTGTGTTCCACTGGATATGTGGGTCTGGAGTTGTTTT -CCTGAATGATTTTTGAATTTTTCCTTTCTCAACTTTGAGAGAATGGGTTGTGGGCAGGAT -ATACTATCTAATGAAGCGAAAGCGCACAATGCATTGGTTAATTGCATATGATTTGAATGT -TGGATCGACATGGTCCATCTCTCTCTAGAAGGCCCAACGACTTACAGCGTATGTGCAATC -AAAGAGTGTATCGACCATCATGACATATGAAGAATGTCCAAACTGTTCTGGCTGGGCCCA -TGGACGAATCCTACATACTTAGGTGACCATGTCGCAGTGTACAGTATAATATCTTATACT -GTATAGTTATACCATGACCTACCACATAAATTTGATCTAGTTCCAATATTTTATTATTTT -TTTGGTCCTGAAAATGGCATCATGATAGATGAGTTAGATGTTTGTTCCTGAATGAGTAAT -CGATGTTTGCTGTTACCTATGTACCCATCACCTGAGACCTTGAGCTTTTAAATGTTCGGA -AATATTATTTCCCGTCCAACTCCAACGCTACCTGCACGTGACCTTCAATCCGCCCAAACC -ACGCTAAACCACAATGGGAAGAAAGCGCAACCAACACAAATCGAGCCATCGCGGAGCTCC -TATGCGCAGTCATCATTCGTCGCAACGCCCTTTACTGCCAGTGATAACAATGCCTCCCTT -CAAGGATGAGCATATCTTGGTACGCTTATTTGCTCATATTCTCTCATCCAATTTTAACAA -ATTCATCTAGCTAATTGCGCCGGGTTCACAAATGACCTTGGCGCAACTCGGCCTGCCTGA -AACATTCACGCCCGCGCGATTTCGGTTTCCAACCCGCATGTTCCCTGCTGAGAAGAAAGG -AGAATACGAGCCTTACCGCGTTTACGAGAGACGCCAAGTAGTCAAGGTCAACAATGGCAC -GGATGCCCCAAAGGCAGATGTGGAAATGAAGGACGCTGAGCCCGTCGCGGCCGATGGTAC -CTCGGCCACAGAAACCACTGTCAAGACCGACGAAGATACACAGAATGATGGCGAGTCAAC -TAAGACTATCGAAGAGGTATTCTACGAGGAGGACCCAACATCCGACGAAGGGGCAATCTA -TCCGATCGAGAACGGGCGCATCGTCGACTGGCCCTGTTTCTTCGCTCTGCTCACTCACAT -TTACAACACCTTGAGTCCACCTTTCCATACGCCTGTTATGCTTGTTGGCCAGCCTGTCTG -GTCCGCACGCGATCGCGAAGCAATTACTCAATTCGTCTTTGAAAAGTTCAAGGCACCCGC -GTTTAGTCTTATGGACTCTGCTCTTGCGGCTTGCTACGGCTATGGTACAGCTACAGCCAC -TGTTGTCGATGTTGGCAAGGGCAAAGTTGATGTGACTGCTGTGACCGATTTCGCAGTCAA -CGAACACGGCAGGGGAATCGCACTGGAGGGATGTGGTGGAGACGCTATGACCGATCGTCT -CGTTGAGCTGTTGGGCTCGAAGGGATTTACCAGAGAGATGTGCGAGCAGCTCAAGCGAAG -CAATATTACTGAGATTCTACCCCCAGGAACCCCACTGCCTGGGTCTTCTGCTACTGCACG -CCAGGGCGCAAACCCTGCTGCCGCTGCTTCGACGGGCGGACCAGATGGAAATGAATCCGC -TCCGCGTGGCCCAGGAGACGGCACTCAGACAGGAATTGAGGGAGATAACGGGGAAGAAGA -CGAGGGTGTTCTTGATGTCGCTGCGATTGTCAGCGGCAACCCCACTGAATATCTTGCTAA -TGTTGAGAAGGAAAAGACAACCACCAAGAAAGGTGCTGTTGACCCTAAGGGAGCTCGTCT -ACCTAACTCGAAGAAAGAGAAGGCCTCCTTCCAGTTCGAGGAATTTGTTCAGTTGGAAGG -AGAGAAGGCAGCAGCCCACGGTTCTCGGCAGTACGTTCGTCAAACTCGAGATATCGAAGT -CAGTGTTGAGCGTTTCCTTTTGACTTCGCCACGTTCGAAGGTCGGAGACCGCCTGAGTAG -CGGTATTCTCGAGGATATTGCGACCCAGATTCACCACACCATTCTTGCCGTCCCGGATGC -TACAAAGCGAAGCGAGCTCTGGGACTCTTTAATCATCGTTGGGTGTGGAAGCAAAGTCAA -GGGTATGTATTATCATCACTACTCACAACTTAGGTATTTTCTCTAACATTGAAATAGGCT -TCACGCAGGCTTTGCTAGCTGTCATAACCCAAAAGTTCATACTTTCTCCATCCGCCACCA -TCTTTACGTCTGAAATTCCTTCAAACTTCACCACCCCCGTTCCAACCGGCGCCAACACAC -CTGCTCCTATGGGCCAACCTGGCCCGATGTACCACCCCGCAGCACATGGAGTAAACCCTC -TCCTTGTGGCCGCCACTCACAACAATCCCGGTATGCCCGGCACGCCTTCTATGGATCCCA -GCATGGTATCACACTACCGCTCAACTGGCCACTCCCAAACTCCAACCTCAGTCAAGACTC -TGAAGCTTCCTGAGTATTTCCCAGACTTCAAGGACCAGGGCAACAGTAACGCTCCCGGTG -CCAGCGGTGGCAATGCTGGGGGTAACCCGGGCGCGCCCGGCACATCTCAAGGTGGCCACG -GAATGGAAGAGGCCGTTTTCCTTGGCACTCAAATGGCTGCCAAGGTTTTCTTTGTCATTG -ACCAGGGTCTCAGTAAGGGCTACATGAGCCGGGTTGAGTACAACGAGAGCGGGCCGTCTG -CAATCCATGAATACATCCTGTAACTGCAGACACGGTGCTTTAGTACTCATAATGTTCTAT -CGCACAATTCTTCGTCCGCTGTTGAAGGCCTTCGCCAAGGCGAAGTTTGCTGGTTTGCCT -CCGAAACATCTCTTTCTTGTTTTATGCACGCTATGATCCTAAGTAGAAAAGCGATCTCAG -GATGCAGGCGTAGGAATTGGGGGCGTTTTTCTTTTCTAATTTTTGTCAGAATACTCAATG -AACCTGGGGCCTCAGCTTCTATTCCATACCTACAAGTATTCAGTCATACTATACCTAAAC -ATTAAATTAAGAATCAACTAACCAATCAAACTTCAGAGTGGCTAGGCCTCGTTCGGCTGT -GTCATGTGATGCATTATATAATAAAGCGGCTTATCGCGATCTACGCCACCTCCAAGCTTC -TCTCGCTACTTACTTCGAATACATCTCTCCTACCTCAGCTTTTTGATATCTTGACCCTGC -GAAGCTTTTCCCCTCTTACCTGTTTTTTTTTAACGACTCTCTCGACCACATTTACCAACA -TAAAGATGGGCCGCGCGGAGGCTGGCACCACAAAAGCCATCGGCAACAAGATCAAAAGCA -AAGGACTGGGTCGCCTTCGATGGTTTTGCCAGGCATGCGAGAAACAATGTCGCGATGAAA -ACGGTTTTAAATGTCATACTATGACTGAGGGTCACGGTATGAAAGTCCCATCCTTAATAT -TCTGGCCACCAGCTAACTTTGTATTAGTCCGACAGATGATGATCATTGGGGAAGATCCGC -GCAAGCACATCAGAGAATTTAGTCGCGAATTTGAGCGAACTTTCCTCGATACCCTCAGAA -CCACGCACGGAACTAAACCCATCAACGTCAACCACTTCTATAATCAGATTGTGGCCGATA -AGCAGGTATGTCCCTGATATTTCACTCTTTGCGGGTTCTAGCTAACTTTCATAGCACATT -CACATGAACAGTACAGGTATGTCCCAACGGAACGAAGGAGAGCGGAGAAGCGAGGACGAG -CATGAATGCTAACATTGAACGCAGAATGGAAAAGTTTATCACAGTTTGCAGCCTATTTAG -GTCGGGAGGGAAAATGCCGGGTCGAGGAAACCGAAAAAGGCTTGGTTATAGCGTGGATTG -ATAACAGCCCAGACACACTTCGCCGGCGGGAGGCTATTCTAAAGAAAGAACGCCAGGAAA -AAGGAGACGAAGAACGAGAACAACGCTTGATCCAGGACCAAATCAAGCGAGCCCAACAAG -CCGCGATGGCTAACGCTACGACCAGTGCAGATGCAGAGCCTGAGGCGAGATTGTTACAAA -GGAAGGAAGGAGAAAAGATGACATTGAATCTGGGACTTGGCTCGAAGAAGGCAGATACGA -AGCCTGCCTCGCCTCCTGCAACAGAAAATACAGCAGTGTCACTGGAAGATGCAGATGCAC -CAACAGGAAACACCGACTCTCCCGCTCCGCCTGCGCCTGCCGCACCAGTTAAGATTTCGA -TGTCAATGGGCGCACAGAAGCCGAAGAACGTCTTTGCTATGGCTAAGAAGAACCCTTTGG -CCGGGAAGAAGGGATCAATTTTCGCGGCTCCCAAGAAGATGACTGAGCAGGAGAGAATTA -TGAGGGCCGAAATGGAGGCAATGGAGCGAAAGCGATCTCGTCCAGATTCAGGCTTCACAA -ATAAGCGACCAAAAATCACTTAGTTAAAAGGTGGCATATCTTACATACAAAAATACATAC -CCAGCTACCCGGCCTACGGTTAAGCCTACCACCATCCAAAACACTTCATGGGTGGGCCAA -CTTTGGGCCGGGTACATGCATTGTTTCTTCAAATTTGCTTCATCTTGCAACTGTAATTAG -GGCTGGCTAACTTCGGGTTTTACAGAGTGCTGGTTCCTAATACAATTCAAATAGTTCTAA -CGTACATAAATTGTATGTGGGCTCTAGCCATCCAAATATATATTGATAACCTATTCAGCT -AGCTAAATTTTTCCAAATGACGTCGAGGTGTGTTTACTGCGAAATATTGATTGGAAATGA -CTTAGATATGGATGCACCACATATATCAGCTACCAATTGAATAAATACAAGGAACTTGTA -GAGGATTACCCAATAACTCAGTTGATCCTGCCCCTTGACAAGTCAGATTGTGGAGATGTA -TAGAATATGTTGCTCTCATAGCGTCGTAGGTAGTGGTGTGATTCTCACATTTGGCACAAG -TATCATTGCTTGATCCCGCGTTCAATCGCGGCGTTGCTATTTTTTTTTTTTTCCTGTGTG -TGGACCGAACGGGCTTTTTTGCTCAATTAGTCTATCATAAGAAAGAACACTAGAAGGATA -CGCCACCTACACGGGTCTGTCAAGCAACTTGGTGGTCTTTTTGGTCTGTTCAATAGTCTC -AAATATTTCCTCGAATACAACTTCTTATGAAAGCCTCTCCCTTACAATGTGCAACACGAC -TCCTCCGTGTATATAACGTGCTATCGAGGTATCTAAAAGCAAGGGCTGAAGGATTCGCTG -GTCAGCCATAGAGCACTAGAGGTGTGATGAGTTGGTGCCCAGGGGTATACCAACCTTGGA -AGGTCCATTTGTCCCTACACATTATTCCACCTTGAGCTCCGATATCATTCAAGCGATCTC -CATGATTGAATGAACTACATCATAAGCGAGTATGACGTTAGGTTCCTTTACAAGCGATGC -CATATCTGATTTTGAAACGGCCATGATCTAATTGGGTAAGACTCAAAATGAGCATGTTAC -TCTATCCTATAAGGCTCAACTATGTAGCAGTTACAAGGCGAGTACAATGTCGGAAGTCGG -CAATAAAGCCTCTCTCGCATGCGTCCAAAAAATTTTGCTTGCTTTGCTCTAACACGTCTC -TACCTATCGGCTCATTTGCTTCCCTTCCGTTCTGTTACTGTGCTACGTAGGACTGCAGAT -ATGATTTTGGAACACTCTCTGACCCCATTGTCCCACTAAGGAATTGATAATATAAATCAA -GAGCCCAGTAATGGTTGCTTGATTCTATATCCGCAACCGCCCCGAGGTACATAATGGAGC -CTGGAATTTCATTTACCTCTTTTCTGATTAGCCACTGGCTAACCTTCATTCTCTGTGCTG -CTCTTCTAGTAGGTTAAACAAAAAGCCCCTCATATAGGACTATTACTTTTTTTACTGAAA -TGCGACAAGTAGAGCACTATCTACCGCCGATTCACAAACCCTCTGTCTCATATACCCGGA -CCCGAGATATCGAAATGGACAGGAGCCATTTGGACATACAATTGGTTAAATGGGAAGATA -CCAATATACGTTCACCAGCTTCATGAAAAATATGGTAGGTGTTGCTGGCAGTTAGTGTTA -TCAAAAGAAAATATACCTGACGTTGAGATAGGCCCTATTGTACGAATCAGCCCAGATCAA -ATCGATGTATGCGACATAAGTGCCGTCAAGGAGATTCACAAAACCAATGGCCGATTTAAT -AAAACTATCTTCTATCGCAGACTAGTCACGGGCAATGTGCAAAGCATATTTTCAACCAAC -GATAGACAGTTTCATGCCCTCCACCGGCGTCTCTTGGCATCACCTATCTCCGACTCTTCA -CTAACTCGACTGGAGCCACTGATAGCCAATAATGTTCGTATAGCTGTCAACAAAATCAAA -CTGGATTTGAGGGATAGTGGTGCTACTGATGTGTTCAAATGGTGGCTGTTTATGGCCACA -GACATTATCGGTGAACTCACATTTGGAGACTCATTTCGAATGCTTGAGACCGGCAAGGTG -CGTTGAGGGCATGGGTTCGATCAGTACTATTCATATATGCTGATCAAGAATATAGAAAAA -TCAATACATTCTTGACATGGAAAGGCTGATATCCCTACAAGCCATTCGAATAACGTTCCC -GTTTTTAGTGAAGATTGCAAGATATGCCCCAATAACGATTATGAAGAACGCAGCGGAAGC -ACAGATTGGACTCAGAACGTATGCCATGCAGTCTATTGATCGCTACAAAAAAATCGTCTC -CCAAAACCCATCCGACCCAAAGCCAACTCTGTTCACAAAACTCTTTGATGAAAACAGTGG -CCTGTCGGACTCAGATATTATACAAGAGGCCCAGGGATACATTGTTGCAGGAAGTGACAC -TACAGCTGTTACAATGACCTATCTTACCTACTCCGTTTGTCGAGACAGCCGAGTGCGAGA -GAAGCTTGTTGCCGAGTTGGCTACTCTTCCTAAGCCTGTCGCAGATAAAGCCCTCAGAGA -TCTAACATACCTCAATCAGATCATCAACGAGACTTTGCGGTTGTATACGGCTGTTCCGTT -TGGTTTACCTCGACTAGTACCACCAGAAGGTGCTCAGTTCAATGGATACGATGTGCCGGG -AGGAATCACGGTCTCAACACAGGCCTATAGTCTCCATCGAGACCCTGCTATCTTTCCAGA -CCCGGATAAGTGAGTTTAAATATCAATAGTGCCAACTCTGGGAGTGAGCTAACAGAATAG -TTTCATACCCGAGAGATGGGAAAACCCCACGAAAGAGATGAGAGATGCTTCTTTACCATT -TGGTGGTGGATCAAGGAGTAAGTCCTTTGGTATTGTTCCCAGGTTGGGTCACCAATAGAG -CTAATTTTATAGTCTGTTTGGGTATGCACCTAGCTCGAATGGAGCTCCGCCTAGCCACGG -CGCTTTTCTTCCGAGCCTTGCCCAATGCTCGACCTTCCACGAAGGAAGGTATGACAGCCG -ACGACATGGAAATGCAATCATTCTTTTTAATGGCGCCACAAGGACATCGTTGTATTATTG -AAACATGAGTCTTCAGCATGATCTTGGAATCCGGAATACTTTTCATGGAGCCGACCCAGA -TAACTCTGTTGTTTCATGTGTGATTCGTACTAGTACGTCTGTCAAGTCTGGTAAATTTGA -TCATGGTAAGAGTTTGTTCTTTGGCAATGCTGCCTTCTAGGAACCACCTAAAAAAAAAAG -GGTTCCAATAACCATACTAGCATTTAATGGAGAATATGGAGATCGTTTCGAGGTCAAATA -AGCACCAACTTCTCTCGGAGATTATAGAATACAGGATATGCACCATGACGCCCTGACCTA -CATAGTGCATACGTAGGCAAGTTTTAGGTTCAGCCCTGGAGTGTCAGCCAGCTTATATAA -CTTGACGGCTTTGACCACACCAACGGCCTCGCTGAAACCTATCTGGTAACCTGACATGGG -AAAATGAGAGGGAAAGCAAAGAAAAGAGGAGCGATGTAGTATGGGAATTATTATACCATA -CAAAATTCGACCTGATATAATCGGAGCCTCGATTTCCAAACATACTAGGCAATGGAACAA -TACTCAACTGCTGAATATAGCGGAGATGGAGTGGCGAGGATAACCGCATGTCGAAGTGGC -CATATCACCATAAGAGAGACCGGGTAGTTTGGGTTCTCATCTCTGCCACGATGTACAATA -TACAGTTATTCTGAGAATTTTGCCATAATTATTTGTCAACATGTGGGCTCGGCTGTGCTC -GCGTGTTAGCGGGAACCTTGTACACACGAGAGAGGGTGTTGTCTGTCAATCATATCCGAT -TAAAGCATGATGTCTCCACTGCCTGCAGTTTTGTAAATACCTTACTTGCCTAGACGGCTG -GTTCTTTGCCAGATTATTATCAGATTCCACTCATATAACAGCTAAAAAGTGTAAATGTAA -AAAATACGTTATTATTCCCCCACGATCTTCCAACTCAGGTATCTAATATTACAGCTGAAT -TGAAAATGGCCCGACTCTCTAACCCTTTGGTATACGCTAGTAAGCGGTATGGCCCCCTTC -ATTTGACGTCTATTTACATTGAAAAGTTAGAAACCAAACACAGACCGGTACAGTCTTCGA -AGGCTCCAAGAAACGGTCTAATATAAGCATCAGCCCTCACCTGGTTCAGCCGCTGTTTCC -ATACTTTCAAAAAGCTTACTTTGACACTCTCCCACTTTCTATACCTTCCAAGTTGTTTGC -TCTCACCTTTTTGTCTGTGTTCAGATGTCGGATACCTTGAGTAGCCCATATGGGCTCTCT -GTCCGCAGAAATGGCTCTTGTCTCACCACCGAAGAAGACTGCGGCCAAACCTGGGAACCC -TTTCACGCCTGTTGCCCCGGTGGCACAAAATGTCCAGCCGGTCAAGATGACGTGAAATGC -TGCCCATCAGACGCAGACTGCTCCGAGTTGGTCGATAACACACATTGTGCTAACAGCACT -GCAAATGTCTACAAGGCCAATAATTGGTTTTGTTGTGCAGAAGGCACATCGGCTTTCCAG -AAAAAGAATGGATATGTCGGCTGCACAGACAACATATCCACCTTGGACAATAGTCTGAGT -CTCTTGAAAATCCGGTATCATGGTATGTAATATCCCGATGACATAAACCTCCAATGCAAA -GTCATAGACTCGTCGAGGTGGCTTTCTAATCCCTCACCTACAGGCACTACCTCTACACCC -ACGCCATCGACAACAATAGCCAGTACAACACCCGGCATCACGACCACTACAAGCACTACA -AAATCCTCACATTCTGCTACCAACTCCGCAAGTTCTTCTTCGTCCTCGTCTAATACCGGA -GCCATTGCTGGCGGCGTTGTTGGCGGAGTAGCTGGGCTGGTCATCCTGGCTGGCCTACTC -TGGTTCGTACTCCGTCGCCGAAACAGAGTAAAGAAGAGCATAAGGACACCTACGGATCCA -AGCCCTTTAATGTCCAGTGCTCCTGGATCGAGTGTTCCCGGGTCGAGTGTTCCCGGATCG -AATATCGTTGAGTACTACAATAAAGGCCCCGAACGACCACAAGAGCTTGCAGGCCGAAAT -GAAAACAAGCCTCCCCAAGAACTTGGTGGTCGAGATGAAAATATGGTCCATGAACTGCCA -TCGCAGACGATATATCGGTAATGGCGTGCACGGTGTTTAGGTGGAGTAAATATCCGAGAG -AAACCCCATACCCCCTCACTTTCTTTATGATATAAACATGTAGTGCGCCGAACTATTTCG -ACTCTCCGGCGCATGTCAAGATGACAGGCCACTTCTGCGAAATCACACAACATTGGTGTT -TTGTCAAGGGCAAAACCCGGAATATAGAAAATCAAAACGGATCCGCTGGTCCTTACATCT -CACTGTCGACGTAACTCCCAGCTGCGGCTAACGTAGCGTAGTAAAGGTCTGTCTCAGCGC -AGATGACCATTCTTCCCTTTCCAGCGAAATTGAAATTCGCCGTACCAGATCCAAGATAGA -TCTTGCCGATGAGTTTGCCAGATGGATTCCAGACATGGACACCATCGCCACAGCCGGCAT -ATACGTTACCCCATGAATCACAGTGGATGCCTTTTGAAGAAATTAGCCTCAACGGCATAT -GAAGTGTAACTTGCTTGACTTACCATCAGGTGCACCAGAGTGAACAAAGGCAAAGGTCTT -GCGATTTTCGAAGGTGCCGTCGGCTTTCACATCAAATCGGTAGCTGTGACACAAATTTCA -TAATCAGCTTTAGGCCCAAACGAGGGAATATTAGAAACGACAGTACATACATAGAAGCAG -GGCGGGTAAAGTCGAAACCGAAAAAGCCACTATCGATTCCGGTATCGGCGACATATGCGT -AATTGCCATTTGGAGAGAAAGTGAGGCCTGGTACAATGTTAGTTGCATTTTCCCTTTCCA -GCCTGAACACTTTCTTGATTTCCTACCATTGGGATGGTCGAATCCATCCGCAACAACTGT -CACAGCACCTGTATCGGGGTTGAACCGGTAGACTTGATCCTGCAGACCAGGCGCAGGGCG -AAAGTCCTGTACATAGCCGTAGATCGTGTCTGTGAAGTAGATATCTTTGTTTTTAGGGTG -AATGGCCACATCGTTCAGAGAGTTGAATTGGCGGCCGAAGTAGTTGTTAAGGATAACTGT -AGACCAGTCTGTCAGCCCCCGAAATTCAGAGAGCTAGTGTGCACTAACCTGTGGTGTTAA -AAGGGCTTTTTGGGTTCATAACCCACAAAGCAGGCGGTTCATTATCGCCTTGACCCTCAC -CGGCGAAAATGATCTGTCCCCGGTAATTGGTACCCCCTGGAGCGCTGTTAGTTGCTTTGG -ACTCGTAAGAGGAGTTGAATGCCAACCGTTTGGATTGAGAACCATTGGCGATGAAGGAAC -TGTGGTTACATCAACAAGGCCGACAGCATCTGATTTCTGGGATACAGCAGCAGCGTCTGC -AAGAGAGATCCTCTCAATGATTGCCGACTTTTCCAGGCCGGTGCTGGCGTTCTTAGCGCC -TGCATTTTGGACGAAAAACACCTCGTCCGTTTCCTTGGACCTGGAAGCTCAAGTCAGATA -CTATCTAACCTTGGATTGTACTAAATTTGCGTCTTGAATACATACCATACAACCGCCTCA -TGGAATAGGGGGTCCTTCGGGGAGTGTGCGATCCGGGTTAAAGTTGGAGCTTCCCCAATG -ATTTTCAGAAATTCTTTGTCGTATACATGGAAAGGCTGCTGAGCTAGACTTTCTCCTGTC -GAGCCTGGAGGTACAAACGGCTATGTATGGGTTAGTACGATGGGCTACTGGAGACTGCAT -GAGAAATCGCATACACTCTTCGCATTGAATTCGCTCGGGGGCTGAGTCGTATCTAGGACA -TTGAAGGCTCTCTGGTCAATCAACTGGGCTTGAGCAGGAATATCACTTGAGCTGACTGGT -GCTAGGAAGGATCCTAGCAGTAGCAGCGTAGCTAGCGAGGAAAGCTGAGATGCCATCTTT -GAGACAGTGCCAGACGCGGAGTACGTAGAACAATGATCGAGGGAGGGGGAGACCGGGGCA -TTTATTCAAGGGGCTGGGAAAGTGGGGATAGACCCGTTTGCCCTCTGAGATACACTACAG -TTCTCCGGAATTGCAGTCAATGCTGTGGCGTGTGTTAGTATTCTCGGAGTTGGCCCGATA -ATCGTCACTGCGCTTGGCACTGGATCGGCCCGATTCGCGACGTCAGCGCCGGAGCACGTG -TTGCCTTTTTTCTACTGAACATTCCAAGAGTTACATATCGCACAGCAGCCGGTGTCTGTA -GTATGGGGCTTCACTGCATGAGACGACTAGTCATCGCAAGTCGTTCATATTTCTAGATTT -AAAGTGTTCGCTCGCAGTAATTTGTCTTCGTTAGGGCTGGGGGTTTAGGTCTTCGTTCTG -GTATACACCGTCTGAAGTCAGTTCATTATGACTCGCTTCGTCATTGGAAAGTTGTGAGCA -CTGCGATCACTATGTGACACAAAGCTGTGACTGATGAGACATACGCCCAGAAAGATAAAA -ACCGAGAATGCCATTCAATAGAAGGAATCACTAAATATTGCAGCAAGAAATTAATATTTC -CAAGGTTTTTCGACATCCGAAGAATAATTTATGTCCTTACTGATTCAGTAGTCGATTGGT -CTACATCCTGAAGTTTATGCAGTTCATTGAACGAGAGGCAATGATCAAATCACCTCCAGC -AATGTATTCTTAACAAGAGGCACAATTAAGCATTTGCACCCCCATCCACATTCCACGCAG -CCCCGGTCACGTAACTGCTCTGATCACCAAGTAAGAATGTCGTGACACTTGCGACTTCCT -CTGGTTGACCAGGCCTGGGTATCGGTGTATTCGATGCGGAGGAAGATAGACCAGCGTTGT -CTATGACTTCATGAAACATAGGGGTCTCAATAGCTCCTCTATATAAGCTGTCAGTTTTCT -TGCTCACAAATTAGGGTAAGACGGTAGGAGAACACGCGGTAAAACACAATTCACTCTGAT -ACCTCTTGATCCGACCTCGAGAGCTGCACTTTTGACCATCCCCAATGCAGCATGCTTGCT -AGCAGCGAAGACAGCCCCGTTCTGGTAGCCTCGGGTACTAAACATGCTTGAAATGTGCAC -AACACTTTTCAGATCACCAAGGAACCCAGGGGTCAAAGCTTCACCTAATATGTTGAAAAG -ACCCTTGATATTCAGGCTGGTAATAAAATCAAACTCCTCAGGGCTTGTTTCCCAAATTGG -TTCCACTCCCAATTGACGACCGCCCGTTCCCGCGAAATTCGCAATCCCATTGATGGCCCC -GATGGAATTCTGTGCCTCTATCAAGACGGCCTTGACAGCAGAGCGATCCGACACGTCAAC -GGTTTGAGCCAAGATCTTCCCTGAATATTGGGTTTTGAAGGACTTTGCAATTTCTTCAAG -CTTGCTCCTGTTGATATCCCATAGAGCAAGATGGACTCCACTACTAGCAAGACTAGTAGC -TGTTGCGAGTCCCATCCCCGAAGCACTTCCAGTCACAACGATCACTTTTGTCGCCATTGT -TGTCAGCGTTAGACGAAGCAAGGTGGTCTCGAAAGATTTGCGAAGAGAGATGTTACTGAA -ATGTTCTATAATTTAGCCGTCATGTACTCCGTACTAAGGTAACAACTCTTGCGTCAGCGA -CCGGAGTTAATAGATGTAACCTAAGCGCGGTCATCAAGACCCACTCGGACTAGATATAGG -CGGTTTGAGACGCATTTCATTGGAGCATTAAGCACCCAGGCCGCTCACCCCTGCACTTAG -CAGCACTCAGTGAGCATCAGGGGTCCTCAGATGATGATCAGGTGATTCTGAGCAGATCAC -TGTGATTATGTAATACATGAACGGGCGCCCGGGAGGCCCCGGGGGTCCAACGGGCCAGGG -GCTCAGGGGCAGTTTCGCAGTGATCAAAAAGCGGCAAGAGGTGAGCAAAACGGGAATGAA -ATGGTCTCAAAGTGCCTGACATTATCCAATACATTGTTGTGTGTAGTGCTCTGGTTTTCC -ATCGAACCGTCAGGTATGAATCTTGTTATTAGACCAATATAAGAGATCGTGGATTAAATG -TCGGTGGACTGGCAGCCTAAAGCCAGCCTGTTTACCGGTCTGGAAATCCAAGCGATGAAC -TTAGTAGTATTTGTGGACAACATACATACTTCTGCTCGGCTGTGAGACTCGGAGTTAGAC -TTGAATGCATTCAAATTTCCAGTATAAAGATCCCTTGTTCTTATGTAACTTGTGCTTCGT -CGAAATTTATATAACGTACACATCCCTGTAGTTGCTGTGTTCGTAAGTTCGACTGGCCAA -ATCCTACATTATGGAGGGACGGCCGGAACACTTGGATGTTGTTTGATTGGCTCTATGATC -AAGTGGAATACAGTGGTGTCTACCCGTGATGTCATCGTACGCTGAAGGCCTGAATATACA -GACATAAAAGCACCGAACTTGACTGAATATAACTGACTCTGTGTGACAAATTTTCTATTT -TTGGGCTGAGGAATAGAATGGATGTTATAATTATTCTAGCAAGGGTGTCGTCGGTGGATA -AAACGCAGCACCATATTGGTATGTACTTCGTAGTCAACTCCTTGTGGCTATTAATGCTAA -AGTCACCAGGAACAAGGCCGTAGATGCTGCATTTTTGAAATTAGGGAAAGCGTCTCCGGA -AATTTAAGTTTTTAATGAAAATTGCGAAGCCTAGCCTACTTCTTTCAGATCGTTCTAGCC -CATTTGCATGTCTAGACTTGTCTTTTGTCTTAAGAAATCCACAGTTTTACCCAAGTTTCA -TAATGCTAAGCACTTTACGGGGCCTTTGCTCTGCGTAGCGTCTAACTTTATTTTCGACCG -TTTCTAGCGCTAGAAAAGATCAATCTTTGTTCGAATTTCCGAAACAGGGAACCTGATACA -ACATGCCATCCCCTGTTGCCGCATTTGGGGCGATTGCAACATTATTACCGAGCAAGGCCG -AGTCTCCTCCATGATTTACTTTTCAAACAATGATGTCTTTATTCCGCCTCCGTGTCCTTT -TGCTTTAGGTCTGGGCAGTGGCGCTTCAGTGGAAGACTGCGGAAGACAGAAAGAATCAAA -CGATCCACTAGGGGAAATCTAAGCCGCCGGACGATGGTGGCGTGCTAAGAGATTTACACG -ATGGCTTCCATGTGAAGCAGGAGCCAAGTCTGAATTGTGGACTTTTCTAAGAGAAAGCGC -ATACGTCTTGAAGAGAATTCAAGGCACGAAGTAGTAAACCATGGATTGTCTCTAGACATA -GAGCAGGGGTAAACAATGATTGGCCACAGTACAACCCATTCCCTGGCAAGACCAAATACT -TACTACGATTGGGTTGAGCTACTCCACACTTCAGACCAATCGTACTACATCTTCTCCTGG -ACCACTGTATCGGTGTCCACGGAGACACAAATTCTACGAAATTCTATCAACAATTGACTT -CGGAGACCTCTCAGGGAGCAGGTGATAGGATTTCCCTAGTACTGGAGTAGTGTGGCATGT -GTTCAACTGCTATCAAGCTCGACAGTACATCTTGACAATACAGCGGCCACAGTTCTAATC -ATCCCGGATCTGAGTTCCTGATAGTTCCGTTGCCTTTAGCCCCGGACCAAAACGATCAGA -ACCCTGTCGGTCCCACATTAGGGCATCCTCCTGGAGTTGGACCCAAAGGATGCAACTTGC -AACTTCCAACGCCTCCCTGCCTAAATCGTGGCTTCCACCCCGACTTTAGTCCGGCATTCA -TCCAAGGGCAATGGATGATCGGACCCACAGTGTCTACTCCACCAATGAGGGCTGTTCCAA -AGAATCGAAAAATCTAGATTGACTGGTGGCTTGAATCTATAATGTGGACCCAATCAAACT -ATATCTTCTGTTATTTGAACCATGGAATTTAAACCAACTTCATTCCATCTCCAATGGCAC -TGCTTACATTTAGCCGAATATCCACTGACCCAAAGGTACCGCATATGAAGCCCACGGCCC -TGGGCCTCGACCTTTGTGGAATCTTCCATGGTGTACGAACCCTTTCAGGCGGTTTGACGT -ATGTGCGCCCCCCAGTGGTTCGCTGGTATGGAGCCAGGTACCCGGCACTTTACGGTTCAA -CCTTTGGTGTCGACTCACGAGGCAAGCGTGGCGGAAGAAGATTTTGTCGCTTTTGAAAGG -TATATATAGCTGTCATTGACGTTCCTTTCCGTTTTCTTGCTTCAAACTATCACATTCCTT -TCTGATACCACCGTTCATTCATTCACTACTGTCTTTGCAATTGAACCACATATCACATTC -ATTTATATCTTCATTGATATTCACCTTCTTTTGTGACCCAACACAACACAACCTTCAAGA -TGCAATTCACCAAGTCTATCATGCTGCTGGCAGCTGCCCTCACTAGCAGCACTTTCGCCA -GACAGACAACCTTCGAGCGCCGCCAGGCCATTGCTACCGGTACCTCCGGTGGTGACTGGA -CCTCCACTCCCTCCAGCGGCTCATACTCGACCGACGGTTTCGGCAGCTCCACCAGCTCAT -CTGGCTCAGGCGAAACCTACGCTGGCAACGTCGGCAACCCATACGGCAGCAACATCATCG -TCGTTGACGCCGCCTCTGCCAGCAACTACAAGTACGTTGCGCAGTTCACAGGCCACAACA -CCGATGACTGGACCGTCGCCATCTTCAACAAATATGGCCCCGATGGCAAGATGACCGGCT -GGTTCGGCAACGCCTGTAACACTTTCACCCTTGCCCCCGGCGAGACCAAGTACGTTGCCT -TTGATGAGGATACCAATGGTGGCTTCGCCGCCGCCTCAGGCTCTACCATCCCCACTGATG -CTATGGGTGGCTACGCTTCCACCTGGGGCGAGTTCGACTTCGGTTCCACCACCAACAGCG -GCTGGTCTGGATTCGATGTCTCTGTTATCTCTGCGCAGAATGCTGGCATGTCCATCTCCG -GTATGAAGATCTGCGATGCTCTTGGCTCTGTTTGCTCTTCTGTCACCCAGGATGCCGGTG -TTGTCGACAACGCTTACACCTCCGCTGAGACTGACATCGGTGGCATTGGTGCCAACTTGG -GCTCTGGTCCTGTCCGTCTGGCTGTTACCATCGATTACAGCGCATAAATTTGTTATTTAG -ATTTGGTTTTTATATTATTTTGAAGATAGGACTCTAGAGTCATTCTGTTTCGGAGGATGA -TCTATACCCAGCTGGAACTGTTTCTGCTTCTATGTCAATATAGAGACTTATAGGCTGGTC -CTTTTTTTGGATATTTTTTTCGCTATACAATACAACACATTCATTCAAATACTTTTTTTC -CATTATAGAATTTTGAATCACATTCAATTATAGAGATTCAGGTCTTACTCATTAATGGGC -AGTCTCAAATGTCCGCGTGGCGTTAACGTGGGTGAGCGTTGTGTACGCAATCACCCACTG -TTTAGCCGCCACCCACTATGAGATCATCTTCAGGAACAGTGCGCCGATTCAGAAAGATAG -AATTAAGAGAAAATAGAGTTAAGCTCGTCCCATTCTGAGATCTGTTCATGATAAAATTGG -AGTATATTGACAAATTCATTACTGACTTCATATCCGCTGAAACTCAAACAAAATATCAAT -TCCAAAATTACTTCCGTGCGCTTACAAGACCGTAGAACTCAAGCGGCTTTATACTGAAAG -CGCCGTTCGACTCAAGTTTGTACTTGGGATCCGCCAGCTCAAATGTGGTCTCCTTAATAA -TGCTGGCGAGAGTGAAGAACGACCACCGCCAAGCCCAGAGCTTGCCAATGCACTCACGCT -TGCCGTTACCGAAACCCTTCTTCGCGGCAGCAGGGAGCTGATCCCACTTCTCGCCTAAGA -CACGCTCGGGCTTGAAAGCCTCGGGATCCTCGAAGACAGCCGGATCGCGGTTGACCTGAC -TCAAGACCGCGATCATGGGCTGGTTATGAGCGATTTGGTACTCGCCACCGCCGAGAAGGA -CAGGGCTCTTATCGTTGGAGGGAATAGGCTCGATGTTGAAGCCTGGCGCGGTAGCAGAAA -GACGGAGCGACTCGCGTAGGATGGCCTCAACATAATTGAGGCTCTGCAGGTGCTCTAGGT -CGATATTGCCGTCGCCGACCACACGGTCGATCTCCTCTTGCGCCTTGGTTAGCTCGCTCG -GGTTCTCCATCAGGTAGTATAGGGCGTAAGAGACGAGATTGGGTGCTGTGGCGGCACCGA -TGAAGATGGAGATGATCTCATCGATAACCTGCGACTCGGTGAGCTTCTCGCCGCTCTCCG -GGTCGGCACCGTTTAGCATCGCATCTAGGATGTCCTTGCGGGCGGTCTCTGGCTGCTCTT -TTCGAGTCTTGATCACCTGCGCAGCGAACCCGCGCATCGTATTCGTGTCTTTTTCAAATT -TGCGCTGATACAGCCAGTTTAGGAACTTAGGGCGGGTGGGTCGCTTCATCGCCTCCATGG -TGGCGCCGTCCATGGCGTTGATCATGGGCGGTTCAACGCCTTCAAGCACGTGGACACGCT -GGTTGAAGAAAGATTGCATGCAGGATGCAAGCAGGAGACGGTCCAGGTCTTTGGTGAGCA -GAGTGCGAGCCTTATTTCCTGCGCTCCACTTTTTGGTCAGGTCCGGGATAACCTCTGCCA -TGTCGCCAAAAAGGTTGTCGGTCGCAGATTGCGTAAGATGAGGCATCATGATCCGGTGGG -CGATACCCCAGCTCTTCTCGTTGTCGTATGCTGTGAACAGACTGTCGTGCACGGCATATC -GGATTTCGACTACTGGTCCGGTAACACACTTGCGGAATCGGGTCTGGTCGCAAATTTCTT -CCAACAGAGCGACACTGCCGATGAATACAATTTCATGGCCGAGAGCATTAATCTTGAAAA -TGGAGCCTGAAAAATTAGTAATGACTCGTTAGGACTGGGTGTTGCTGAGGTTCGTAGACG -GGTTGTTAGCATACCAAATTGCTTGGCCAACTTGTTGAGCGAGTTCCACGTCTCATTGGG -ATTTACGTCAAAGACATTACCCAGGAGGGGAATTCCGGGTGGTCCTGGGATTTTGGTAGG -CATTGTCGCTGGGGAAAAAGGATGGGGGGTATCACTAGTATACAAAAAGAAGTATCCAAA -CTCAGAGGAAGAAAATGTCCTAAACTCCCAGTGGGCGGGGGAGCCTTATAGCACCAGCTC -TCAATCTTACTCTGCATGAAAGTCTGCATAGTTTTCCGATCGGCAGATCAAGGCGACCTG -GGTCATTGGCGCAAGAGGGATCTCGCGCTCGGCCTAAGTCTCCATCGTTCACCAGAAGTT -TGGCGAGCTGAAGGATCGGCGCGATCGCGATGTAATAGCGCAGATTAGTGCTGCCGATCG -CACCTGGATAGGCAAACCGTCAGGCTATCTGGGGCCCATCTGCAGCCATCTGGGGCCACT -TGAGTAGCTGAAGCGCGCTACGGGCTGTAGTGCAACCAGTGAGCTCAATAGGGTTCAGCA -CAATTGTCAGCCAAAATCTTCTGCAGATTTTTCCACTGCATCTTCCTGGATTCGTCTCCA -CGCATTTTGACCGCTTCTGTGTCGCATTCTCTTGCCACCAGACATCGTTGATCATTGTTG -GAGGGGTACGATAAGCATCACACAGACGTTCGTTAGACTCCGAATAGGGATCGAATAATG -ATCAAATACGATCTGTTTTGTTAGACTGATTTTTAGATATTGACTAACATCATGCCAGCC -TCCCTATTCCCCATAATCTGCAACCTCCTGCAAGGATCAAGGCCCATGCTGTGTGGACAG -CATCCACTTACCCGAGTTCATCTTTGATCAAGCTCCCCCTTAGCTTCTCAAAGTCAATAC -CAGAATGAGGATGTTGGATGACGTTAGTGTACAATGAATGTTGTACAATCCAACTCCTAG -ACACCTGGGATTCCTTGTAATCCCAGTAAGGAACCAGGCCGCTACCCAGGTAGGTACCTA -ACTAACTAGACTGGAGGTTAGTTAGTTATAATTCTGGTGTATTACATAAGAGCACTTTGC -CGCCAAACCAGAAGATTATAGGGACAATCGGAACACCCGATTGTCACTGCGGCATATACG -GAGTCTACTAGGCCCTAATAACCCCAGAGGACCACATCCTACCGTTGACACAGTCATGTG -AACCGTTGAAGTGATAAAAATACATGATTTCCCTCACAGGATTGCTATTAACTGCTTAGG -CATGAGAGAGGTTACTCTGAATGTGTGCTAGGTTCTAATTTGATAGTAGCTGGATGGATG -TATATCTAGGAAATAGTACAGCGTCAACACAAATGAGTCTCTAACCTAGAACCCAAAGCT -GTCCAGCAACGAATTGGCATCCGGCTCATGAAACATCGTAGGTGGCTTAGGGTCCGGCTT -CTGTCCTTCTTTCTTCGGTTTTTCCCCAGGTGTAATTTCAGCGACGGCCGGTCCCTGTAA -GACATCTGCTCGGTGCACCGGAATATCAGCAACTACGAGGCTGCTCCGCCGACGAAGTAG -TTTCCGGTAATCGACTCCGTTCTCATCCATCTCGAGTGAGGTATTAGGCTTCAATGCGTC -CTGGGTAGGCAGTCCGCCGAGGAACTCCCAGTGGCGTACCAGGTCGAGAGCGAGGAAATC -GCAGCCCATACGGTCGTAGAGTCGGGCATTGCGAATAACAAATTCCCACTCTTCTCTAGG -TCGCACTTTGGAGGCGCCTTTGAGGGTTTGAAGTGTCTTTGCGCGGAGTTGGTGGTACAA -TACTACCAAGGCAGGGTCGTTGGAGAGGTATGACTTGGATTGAAGGGTTACTCGTCCCGG -GCTTCCCGGGGTGACTGGGGTTGCTGGTGATAGTAGGGTCTCGACCGGCGACTAAATCTA -GTTAATTGGGGTCTGCTACCGAGAGAAGAAAGGTACTCACAATCAGTGCCCGCACTGCCA -TATCTCGTCGTCCAAGCATCCAGAAAGCCCAAGAGGCCATCCAGCGGTTGCCCTCGGAAG -CGGCTTCGGGCAATACTCTTTCTTCTAGAATCTCCCGAAGAACTGGTCCGTTGTCCCCTT -CTGAGGCTCGAGTGATGGCGATCGCGAGTTGGAGATCACCAATCTGGTTCATGATGACGT -TTGCAGCGTCACGCAGATGATCAGCCAGAAGGAAGAACGATGCAGCATACTCTATTGAAT -GTTAGCTGGCGGTTCTTATTCGTAGACAATATGAGTTTCGAATTACCAAATCGTCGTTTT -CCAAGCAAGGCATACGCATTCTTGAGCGCAGATGTCCTCCATCGCGGTTCCTTGAAATCG -TTTGCCAACAAACGTTGGGTTGCTGCTTGTTCGCGGTTCCAATGTGCAATACGCCAGAGA -CCCTGAAGGACGTTCTTCTTTCCGAGAGCGAGATAATACAGTGTGCAGTCGATGGGGTTT -TTCTCTTCCGTTTTGGTGTATTCACATCGAGCGATGATTTCGAGTTGCGCTTTCTAATTA -ACCGGTTAGTTTGTGGTAAAGTAAATGAGATGGGAGACGCACCACAGCTACAGAGTCCGA -CAGCCACATGAAGATACCGCTTTCACGGGCGGCCTTCCAAGTCAATTTTGCATTGAATTG -CCGGGAGACAAGATCTGTGAGGATATCTTGGCTACTGCTATGGTAAGCCCAAACAATCTC -TCGCCATGAAACAGTATCTTTGTTGGCGACTCCTTGGGTCCGGCGTAGCATATGCTGTCG -GAAGAATAAGAGATAGCGTGTTGCATTATCGTCCATTGAGCGCCGGTGTTTCTCAACCAT -AGCCACGCATTCAATGGTATCTGCCAAGCGGAATTGCTCGTGGCTTGACAATTGCGGCAA -GGCGATCCGCGCTAGACTCTCATTGAGCGCGGCTGCGGCATTTTCATCTACGACAGATGA -CAATTCCTCATCGGTGGCATCAACGTAGGAAGAGCGCATTTCCTTGGAGAAACCTTGTTG -GGAATCCTGGCTTGTGTTAGTGTTAGGAAGAGAAACGGTACGATGCGCACTTACATTTTG -TTCGTAGAGATATTCTACAGGCATCTCCAAGAATCCATCCAACTCATCGCCCTCGGTGTA -GAATTTCAGCTTGCGATGTAGGTTCAAGAGAATCGAATGTACCAATCTAGTCTTCCCAGC -CAGAATACACTGTGCCAAGTACTGTGGATGAAATACAGGCAATGGGCCATTCAGCCGGCT -GACAACGTCGAAGAGATCGACAGACGCCAACCCTCGAGACGGCATCCGAAGCTCAGACAC -TAGGCGGTTTCCAACATCAATGTCCTTGTCATAAACGAACAGTTGATTGCCAGCTCCAAT -GACCAAATGGCCGTTATTCAGCCAGCAAGAGTCTCCTATAGGGTGAGGTGTGAGATCTCG -GATCCAGATCTCCCGAACTTGTGTCCACGAGGGTCGGGAGTCTAGATAATCATAGCGGAG -CTGAGACAGGAGAACAACCTTGTGTGGGAATCCCACGGCTAAGATGGACTGCTTGTCTGG -AGTTGATGTCCAGTCTAAATCCCGGATAATGTCATGTGAAGCAAAATGTTCTTCGAACTC -CAGCTGAGCATTATTGGTGTCCCAGATTGTGAGGTGAGTCCGGTGCTCATCAACCAGAGC -TGCCTTACGAATAGAGCTCCCACTAGCCATAGATGGGTTGATGATGCCCGTATCGACAGT -TGATTGAAGTAGCCAATCGACCGTCATATTCGTTGGATCAACCGTGGCTGTCCATGTCCG -CACAACTCCGCTATGAGTGTAGGAGAGAGCAATGTCGGCAGAAAAAGAATCGAAGAAGCC -AGATGCTTCTGATTTAGGACCTGCTGGATCCACGGGGTGGATATATGCAATATCCTCCTT -CAAACCCATGTCAAACGTGCATAATTCTCGTAGTTGATAGTCATGGCCATTAACAAAGCT -CCCTTTATGACCTTCCTGACTCGGGCCTCGTATCTCCCACGCAATTCCGTGCATGTCTGC -CCATATCGTTGCTACATAGAAAACACCTAAAGTCGCTTGGGAGCTGGGGATTGGCAGCAC -GCATAGTGGCTTGCCGGGCAGTGTAAATTCCAAAGCAGCCAGTTTCTTGGCATGATTTGA -GCTGAAATCCCATAGGGAAACACCATTGTGATGGAGATTGACGAGAAGATTACTGTCCTC -CAGCACACATGTCCGATGAATATGCTCATCTGAGAGGAGGATACTCTTGTGTGCTAAGAT -CGATCTTCCATCACGACGCTGCTGTGTCCATACAGTGGCTTTATTTTCATTCGTACGCGA -CACCAGTGTATCTCCAATCGCATTTCGGACAATCTTTTTCACAGGGGCAGTATGGCCAGA -CCAAGAAGCCGTTTGGGTGATTCTTTTCGTGCGTGGTGCAGGGTCAAACAAAACATCCAC -ATGAGAATCATACCACTCGATCCGACCGTCGAAGTGATGGACTAAAACACTGAGCTTATC -GTCCGATGTTGTATTTTGGAATGCACGAATTTGAGCATAATCTTCTTGCGGTGACTGATC -ATATGAAAATCCCAAGTCCAAGCCTTCAACATGGAGGACATTGAAGACGCTCAAGTTAGT -CTTTGCTTTAGAACCAATATCTTCTAAGGCCCAAGCAGACATATGGCCCTGGCCATCAAT -CACAACACAGATTTCTGGAGCCTTGCTCGCAACCTCGATGATGTGTTCTAGCGCATGGTT -CTCCTTGTTTCCTGTAGTCCTCTGAACAGCACGCTCGGTCGCAGCACAGAAGTCACGGCC -ATCCAAAATAAACCCATATCGCCGTTGCGGTTCTTGGTCTTCATCCGACGCATGTCTGGG -TTGAACCGACTCACCCATGTCAATCTCTGCCCAGAACTGTAAGGCAGAGAGCGCATGAAC -GTCCGTCACTGCCCACACTCGAATCTTGTTATCAGCGCAAAATGTATACAGGACATTATC -CATAGATTGCTCGGGGTGTCGAGGTAGCCGCCAGTGTATCCCCGTGACAACATTCGGATG -AGGCAAATACGAGTTTTCGAACCGGACCTCGTCTGCGCCGAAGGCAAGTCGTCTCCATAC -CTTGACCAAGCGGTCATATTGTCCTACTGTGACAATCAAGGACGAATCCGGAGAAAATTG -CGCAAATTTCACGGGGGTCGCAAGCTTTCTTTTCCAGACAAGGCGCGGTTCGTCATTCAT -AAACCAAAGGGCAAGACGGGAGTTGCCCACCAGGAGCTCGTTCGAGGAACCCCACGACAG -TGTATAGATCGATTCGTCGTCATCGGGAGCGCGGAATGTGTAAAGAAGCGACCACTAGAT -CAAGCCGTGCGTCAACATCAATTGTGATTGCCATTATGAGACTGTGTTGCTGGTCGACCA -AGCTCCGAGTAGTGGGGTTGTATGCGCACCTTGAGGGTTTCGCCCTGGATCCCATACGGC -TGGTACACAAATACATCAGGACCACCACAAACAGCAATCTTGCCAGATGTTTCGTCAAAC -GCTATGGCCTCGAGGGTATCTGTATCGTCGACATATATTGTTTGGAGAAGTGTTTGCGGA -CCGCCGAGGATAACCAGCGCGTGGCCGGAGATGTATGCCTATTGGAGAACGCGCGGGGCC -AAGGGTCAGCGACAGGGGGCTTTCAAAGAGCCGCGGAGCTGGAATGAAAATCTGGGAAGA -CTTGAGAGTACTTACAACAACTCGAAGACCATCCCACAACGCCGTGCTGAACGACTGGAG -CTTCGCTGGGGGTCTCCCCGGCAGTACCGCTCGCATAGAGGCGGTTGGTTCGGTGGATGG -ACACTTTTTGCGCTTGAAAACACGTTATAACATTGCCAGGTCAAAGATTATTCTGCCTGG -GAGCAAATAGAGTCACAAGGGAAGCGAAACGTAGACGGGTGTTTGAGTGTTGTCTGGATA -CACGCAGGGGACAAGGCGGCAGAGACCTGATGATGTCGCTTCCGCTCCTCCATTCGGTGT -GACTGATGACCTTCGATTTCAACAACACAAAGAAGCTAGGTAGGGATAGACACATGATGT -GAGTTCAACAATTGTAATTCTTGAGTGTGGGAATTTATATTAAAAGCGCTCTAAGTGATT -TGGGGGATAGCTATAGAGCTCCCCGATAAGCATTTACCTATATCTGACGTCGGCGATCGA -CGAGGCGTTTAAGTAGACAAAACATATCTCATTTTATTTTACAAGTTTATAGGATTTTTT -TTCGGAGAGGTCCCTTTCATATTTAGAGTATCACATTCAATCATGGCGAAAGTACAAAAC -AATATACCAAATACCCAGAAACCCTTGAACTCGCTATATGCTGCGCCGGATGATGCATTG -AATGAAACATGATCGCACCTATCGACAGGGGATGAGAGAGGAGAAAGTGAAATATAACCA -TCGCTGATGTACATGAGGCTGAAATAGCTGCAAAGTGGTCATATACCGAATATGAACTTT -AAGTCCGTCGATTCGCTGATCTGCGACCATCTGTTGGAGACTCGGGAGGTTCTTCGTCCA -CAAAACGGACGCTATTTCCCACTGAACTTCGTCGCCCAGTCCGTGGACGTTTGGAAGAGG -TGGATAAAGCAGTCGTAGTCCCGGCTCGAAGTAGTCGACTTGCGAAGGGTGTGCGTAGGG -GTGTGGCGGGGGTGGTGGACATTTGGGCGAGGGCTCGCGCCCGGGCAGCTCGTTTGGAGG -GAGAGGTCGTGAAATTGGGGGTTGCCAGGGGACTTGCAGAGATCGGATCGTGGGATGTTG -CCTGCGATGTTGCCTGAGATGTTGCTGTGAAGTCGCTTATTTTGAAAGGTGTTTCCGCCT -GCTCCTGATACCAGGGGTCGAACATCGCTGACCCGGCGCCATCGCCATTTCCAAAGGCTG -AGATTCGCGCCGGTGTGAAAGATGGACCGGCCGAATCTCGAGCTTCTAGCTTCATTCGGG -TGTTGTCCACCCAATCGTGAAACATGTTTCTTGAGTGCTTCCTCAGATTTCTTTCCCGCA -TAGCTTCAGCGGTCTCAACGGTACTGCCTATCTGGAAGATACGAAGGCTGAGCTTACGAA -GAGAAGCTGCGGCTAGTCGTAGGACATGTAATCGGTATACATTGTCCGCTTGGTCTTCCA -TCTCACGTCGAATGATGAAGGTTTCAGACATTTGCATCAGCTGGTCAAAAGCGACCTGTC -GGAAATAAGAATGGTCGGCATCCTGGCAATCCCGAATTCTCTTGGTTGTTTTGTCATGCC -ATTGAACGAATATCACAGATGTGTCGACGAGTAATTTTTCACGATCAATATCACCAGCGT -GTTGCTCCATCTCCGTTATGACTCGAGTTTTTGATAACCAGGTGGCCAGCGCCCTAGATG -CGAGGTTGACCTTGATTTTCCGCCGGACTTTGGCATATGATTCTTGTCGCCGTCGTTTGC -CAGAGTCCACTTTTGCTTGGTGCCATTGCTTCATGTACTTCTTCATCAAGAAATAGAATC -GAGCATCTTGAGCCCAACCTTCCATCTTGACGACTTGATTATGTTTGGATCGCCAGATCG -TAAGAGACTCAGCTGCGAGTCGTGGAGCATAGAACTCAGCTGCTACATACTCCCGTTCGC -GCTGAAGTTGCAATTGATCACGCCAAATAGCAAACTTGGATCGAGCCAGATTTTCGGTCT -GATGATCTTCATGGGCCTCAGCATGAAACAGTAAACGGGTGTAAGTATCCCGAGTGTTTG -TCACAAATCGAGAAAATACTTCTCGCTTGATGCGTTGCTCGCGTATTCTTCGCATCAGCT -CAAATCTTTCCATCAGGACCCATTTGTACATCGCCTCCATTTTCATCCGCTCGTCAATTC -GAGCCCGGAGAGCCAAGCAACGAAGCGTATCATTCCAGGAGTTCCACGCGTTGTGTTGGA -TTCGATGACGATCTACCTCTTCTGCCTGACTGATCATTTGGGCCTTCCCAATCCATTGGG -AAAGAGCATTTCGTAGAATCCTGGTATCAACCATCTCAGAAACCCGAGAAGCAGCCGGGG -CCAGCCGGGACTGAATTCTCCATTCATCAAAGGTTTCTCTGAGGTCATGTTGGCGCCGAG -AGGCCTGCACTTCCTGGTCTGCAAGTGCAATGGTCTTCGAGTGCTGAGACCATATCTGAA -GTATGGATTTCAAAGATGACCGTTTGTTATGTAGGTCTATCTCATGGTCGCGCTCTCGAT -TAGTTCGCAAGTTCCGAAGCCAGGAGAGAAGTGATCGCCTTTTGAGTCGATAATCAGAAA -CTTCAAGAACACGCCGGTCGCAGAAGCACCACCACCAGTCCCAAAACGATGCTTTCTGAA -TCTTCGTATTCCGTACAATAACTGCTTGGGTCTCTGCTGCCTTCAAATCTTGGATCTTCT -TGCGCCATGCATTGAAAGGCCTCCGTAGTGCAAAACGCTGGGCCTTCAACTCATTGACGG -CAGTGATCTCGCGCCAGGCATTGAAATATTTGACACCAAGAACATGTCTCCGGGCAGCAG -AAGTCCGAGCTACTTCTTCCGACGTAAGTTGGGCCCAATGACTGAACGCCTTTGTCATTA -GATATAAATCCCGTGCGCGGCTGGCACGCGTTTCAAGATGCTTGAAGAATCGATCTGTTC -GAGCCGAGTGGCGTTTTTCTTGAATTACTCCTCGCCATGGATCAAAGGCTTGTCGGATGA -GAAGGCCGCGATCATAATTGGCAGCAATTGTTTCCCTAGTCTGGCGTGTCTGTCGCGCTT -GGAAGGCTTTTTTCAGCCATTGTGTCAAAATTCGACGACAGATCGATCGCTGTCGATACA -TATTGAATGTTGATGCATCCCGAAGTAAATCTGAAAGTTGTGGGCGATAAATGAACTCCG -GTGGCACCTCTTCTTTGTCAAAAGAATCACGTTCTGGGCTGTGTGACATGATTGATTGCA -CCTCTTCGGACTCGGAGCTGTTTAAAGAATCTAACGAGGGTGAACGACGGGGACGTGAGG -CTTCGGCCATTCTCTTCGAACGATCACGAGCAACGGCCGACCTTGCGACCACTCCATTAG -TCAAGGGAGTTTCTTCCGTGGCTTCGTGCTTATTTTCCGTCCCGTCAAGTCTGCTCAACA -AGCGCCGACCGACGTCCAAAAACTGGTTGATCAATTGTGTTCGATCTGGTGATTTGGTAG -GCCCGGCATCATGCCTGGAGGTAGGTGAGAGCTTTGGTTTGGCAAATTCAGGTTTGCCAT -TTTCAAGTCGCGACATTGACGATCGAGAGCTGGGGCGGTTGATAATACTTCTATGTGTAG -CATCTTCGCCGATGTCGTACGTAGTGTTGAACGATGCGCGCCGTTTGGGTGTCGGTGGCA -CTCCATTCATGATTGGAGAATGGTCATGGTCAACGGTCTGGCTGTGTGTAACATCCACAG -TATACTGATCATATTCATGTGTCTCATTGAGGTTCGTAGAGCCCTCCTCGCCGATTTCGA -TCACGATTCCCATCTGTTGGAGCACATTCTCGAATTTCTCGAATAGAGTCTCGCCGCGCA -CACCTTTACTTCCCATTTTGAACAGGAAGCGCAGGCATACATGACCTGGGTCCGCGTTGA -CGCCATGTTCACTGATGACTTCATCGTATGCTTCGAAAAGCGCCCGGAATGGAAGTCGGT -CCGCTTCGGGTTTTGTCTCGGCGCCAGCGATAACCTGGTAGAGAAAACCGACATCTATAG -AGGTCAGGCTTGGTTTGAGTTAAGTGATTGATGACACCCTACCATCATCTGAAAGAGATG -AGATCTCTTCGGTCACGGCTCGACGCTGAGAGGATATCGGAGGCATCGAAAATTCGCGAT -CTTAAGGCGCGCTTGTCGCATCTGGGTCACAGAGCTGCAGGACCAAATAAATCATCGCGA -AAGCGGGGAGCGCCGAGAGTGAAATACATGCGCAGACAGAAGCACTCTGCAATCAGTAAT -TCCAAAAGAACACAATAATAAATTCAACCCATGCGCCATCAATCGAAGTAACAATGCGGG -TTGTTTTGGTTGTCGCGACAGAAACATGCACGGTCACGTGGTCTTATGTAACATGCTTGG -ATTTCCAAGGTTCTATTGGCTTCACATCGGAACATTAAGGCTACTTTGCATTTCAAGGCT -GCATCCTAATCTTATTCTTACAAAATGCCGGGAAGAACTCGTCCAGTCGAGAAACTTGCC -AAAGCTTCAGCCCAATGCTCCGTAGAGGTGTGCACCTTCTGCGTCAATTTCAGCATTGCC -CCAACTAACTACCTTGATAGGTTGCGGCATACGGCAAGTGTGTTGTTGCGGATTACAACT -CGGTACATAAAGATATGTGTGCCAAAGAATTCATGCGACTGAAGAATTGCTATCTGGTAG -GTGACAAGTGAGTGCTGCGTAGTCCAACATGCTAACCAACCCCAACAGGCGGCATCCAAA -AAGGGCTAATTGCTCCTTGTGATTATAACATCAGAAGCATCAGAATTACTCAAAGATTTG -GAGCTGTTGGGATATGGCGGCGCAAGCAGGAGCATTTGTAAATTATTTCAAAAAATTTCG -CTAGGCATTGGATGTGAACTATTGATGGAAGGCTGAATTCGTTCAGAATATCAAAAAAAA -ATAGTTGAAAAGAAAAGTAGACTTGATATCTATGTATGTTCTTTTGGTTGGCTTTCTAAA -TCTTGAGAAGACGCAACAAGAAAGTGGCATTTTGCACCTCCTGTTCTTCCTGCGAGATAG -TGCCCTCTTCCTCCCCAGTAGGAAGCTTATCGCCACCGAATGCACTCTGTTCAACGGGAC -GGCGCAGGACCTTCTCAAAGGCTTGAGCCTGAGTCTTGAAGGCCTCGTCAAGGCCAATGT -TGTCCGGGTCCGAAATCACAGTGTACTCCATTTTCACATCATCGGTGGCAACAAGACCAG -CCTTCTTGCGAAGGCGTTGGACACGGTTAATGATCTCACGACCCAGACCCTGATGAGCCA -GCTCTGGGTACAAGTTCACATCAAGGATAGTCAACACGTCAGCATCCGTGTTGGTTTCCA -TGCCCTTGGAGTTGTCGGTCTCCTTGATTCCTCGCTTCACAACGAGATCACCCTCAATGA -GTTCAATGCCATCAACCAAGATCTTCTTATCTGCAAGATATCCCTTCACGTCATCGCTAG -TGAGAGAAGGTAAGGCCTTCTTGACCGTTTGAACCGCCTTCTTCAGCTTCTTACCTAGCA -CAGGCCAGTCAGCGGTCACCGAGTATTCGACTTGATATTTGGCTTCGTCAGAAGAGAGAA -TAAGCTCCAAAACGTTAAGCTCTTCGAGAATGTATGACTCGAGAGACTTGACATCTTCCA -AATATTGCGGGTCGTTATGAATCACGACAAGTGACTTCAAGGGGGTCTTGAGGCCGATAG -CCCGACGTTCGCGCGAGATACGGCCGAGTTCAATGATATTCTGCATCCGAGAAACTCTGC -GTTCAACAACTTCGTCAAAGAGCTCTTGGCGCACTTCAGGGAAGGGAAGGAAGTGCACGC -AACGGCTGTCTTCAGCACGAAGCGACTCGGGAATGTGGGGAAGAAGGCGCTGGTAGATAT -TGTCGGTGATGAAAGGAGTGAAGGGAGCGAGGCCTCTGACAAGAGTGTAGAGGACCTCAA -AGAGAGTGTTCAAGGCGTGCAATGTGTCATCAACACCATTTTCACCCTTGAGTCTCTTGC -GATTAAAGCGGATGTACCAGTTGGTCGTGTTATCAATCAACTCTAGAAGGCGTGGTACAA -CGGTGTACAGACGGTAAGCAGCCATCTCCTGGTTAATGAACCGAAGCAGTGATTGACAGC -TCGCGAGAATCCAGCGATCCATCACGTTTGTGTTGGTTTTCTCCGCTGCAGGGTCAAAGA -CAAAGTCGATGTCGGAAGCCTTCTTCAGGAGGGTCACCTGGCCCTCAAAGAACTTGTAGC -TGTTCCACAACGGAAGAAGCACCTTGCTGACAATTTCCTTGACGCCAGTTTCTTTGAATC -TGAGAGGTTCAGCACGAACGACAGGGCTGTTGATCATGTACAATCGAAGGGCGTCCGATC -TATTGAGTGAAAGTTAATGAGAATTCGTATGAGAAAACTCCCGAAGGCGAAGGACAGGAA -AAAGTATGGACAACTCGAGACTAAAAGAATTGCGTGACTCACCCGTATGAATTCATGATT -AGCGATGGGTCGGGGTAATTTTTCAATCTGTGAAATCAGTAGATTAGAAAGAGTTCCGGC -CATAGTCATATTGAGGTCATGGTTACACACCTTTTGGACATCTTAAGACCGTTCTCTGCG -AGGACAATGCCATTCACAACGACATTCTTGTATGGCAGTCTTTGACGCAAGTGGACTCCG -ATCACGCTGAGTACATAGAACCATCCTCGTGTTTGACTGAAAAACATACGTTAGCGCGTG -AATCAGGAAGTAAATGTGGAACTTACTCAAGGCCCTCGGCAATAAAGTCTGCTGGGAAAC -GGTCCTCGAACTCATCTGCGCCCTCGAAGGGATAGTGGACACTTGCAAAAGGCATACTAC -CTGATTCGAACCAGCAATCAAAGACCTCGGAAACACGTCTCAAGACGCCGTTTCCTTTGG -AAGAAGGAATCGTGATGTGGTCGACCTTATCCCGATGGATATCGGTGAGCTCGCCCTCAT -AGCCACTCAGTTCTCTAAGTTGCTCGATGCTGCCGATAGCCACAACTTCTGAGAAGTCTT -CACTAACCCACAATGGCAACGGTGTTCCCCAGAAACGGTTTCGAGAAATGTTCCAATCAT -GAGCGTTCTTAATCCAGCTGGCAAACCTCCGTTCCTTAACATTGGATGGGACCCAGTGAG -ATTTATCAATACCATCAAGGATATCGGGGATGATAGGCTTGACCTTCACGAACCAGGATG -GCACAGCACGATAGATCAGAGGGGTATCGGACCGCCAGCAGAAGGGGTAGCTGTGAGTAA -TCTGGCTATCAACGAGAACACGACCGTTGCCCTTGAGGAGTTTGATGATGCCCTTGTCAG -CAGCCTTGACATTCTGGCCCACAAAGTCACGGACCTCAGCAGTGAAACAGCCTTGGGCAT -CAACGGGATTGGGGGGAAGACGAGTCTCAGAAATGACGCCATGGTTCATGGCTACTTGGT -AATCCTCCTCACCAAAGGCAGGGGCCTGGTGAACAATACCAACACCATCCTCGGCAGTGA -CATATTCGTCATTCAGCACTTGGAAGCCACAGTCCTTGAATTCTTCATAGAAGTAATCAA -AGAGGGGTGTGTACTTCCATCCGAGCATGTCAGAACCCTTGATACGGTCGACGATCTTGA -ACTTAGCTTTCTTCGGGTCCTTGTAGATGGTCCGAAGAAGGGCCTCCAGGAGAATGTAGT -TTTTCTTTGTAGCTTCATCGTAGATCTTGACGTACTCGAAGTTGGGGTTGGCACACAGAC -CGGTGTGCGATGGAAGTGTCCACGGGGTGGTGGTCCATGCAAGTAAGCATGTTTCAGGGT -CATCTAGCAGGGGGAATGTCACAACAACGGCGGGGTCCTGCACATCTTTGTAGTTCTGCT -GCGCTTCGAAGTTGCTGAGCGGCGTGTTGAGAGCGGTGGAGTAAGGCATGACACGGTAAC -CGCGGTAGACAAGGCCTTTATCGAAGAGCTCCTTGAAAACCCACCAGACGGATTCCATGA -ATGATGGGTTCATAGTCTAAATGACAAAAAGAAAGCGTTAACACCAGGGCACAGAAGGCC -AGGCGTATTCGAGCATACCTTGTAATCGTTATCAAAGTCAATCCAGCGACCCAATCGCTC -GATGGTTTCGCGCCATTCTGTCGCGTATCTCATAACAATCGATCGACACTCCTCGTTGTA -TTTCGCGATTCCAATCTCTTGAACCGCTTGCAGACCAGACATGCCCAATTTTTTGTCAAT -CTCATATCTAGATTTTGTGTCAGCAAAAGCCAAGGTACACAGTCTTGAGAAAGTCTTTAT -CTTGGATTGAATTGAACTCACTCAATGGGCACACCATGTGTATCCCAGCCAAAACGACGC -TCCACATGGAATCCCTTCATAGACCAATAACGGGGAATCACATCCTTGATAGTCTAGTCA -AAGTATTAGAATTACCCGCAGTCCATTCACAAATTGTTCACTATGATCTTACCGAAGCAA -GCAAGTGTCCGTAATGAGGAGTTCCGGTAGCGAACGGGGGGCCGTCTAAGAATCCATAGC -GTGGCTTTCCAGCCGAAAGTTCAACCTGGCGTTTAAAGGCATCAATCTCCCTCCACCGAG -CGAGCACAAGCTCTTCTTCCGCAGGAAAATCCATGATTGATGACTCCAGTGATTTACGAA -AGTGGAGAGATGATTGACTCCGACTAACTACCATTCAAGGGCTGGCAAGTTTACGGGTGA -GTCACCCCGCCTCGGGTTTTTTTTGCCAGTGTGCCTTGTTGCCTTGACCCCTCCATTTAC -ACCGCCTTCTTCAACTATCATATTAGGGTTGCGATGAAAGGACTCCTACTAGTATGTTAT -TAGAATAGACCAAAAAGCATAATCCAAAGGCCAATTGCCAATGAAATGACCAATTTATCA -ACGGGTAACAGGTATACTATGTGGTTAGTACAGAGTATTTTTTAATGTAATAACGTAGGG -TAGGATATATCATTGAGCAATTTAGATAATATTGAGGTACCGGCTAGAGGCCAAACCATG -CATGCGCTACAAGGGCAAACAAAGAGTCCATTGGAACATCGGAATTCATGGTACATTTTC -CCTAACTCCTTTTCTTATTTTCCCAGACCCCCCTCATGCCCAATTCCCAAACGTGTCAAC -AGATACCAATAGTGTCATaagtagcaagaagaaatagtcagagaaggcgaaagagaaagC -TGGACCTTCATTTACTTTCTAGTACCGATTATATCCACCTCCACCTTGATTGTGGGGGTA -GTGATCTCGATGCTGCCCACCACGGCCGCGGCCGCCACGGTTACCTCCTCTGCCGCCTCG -GTGATCTTGAGAGCCATAACCCGATTGATGTGCGTTATTGTACTGGCCACCCTGGTTGTA -CTGGTTGTTCTGGTGGGCATAATAGTCCCCAGGTTGTTGGTAGCTGTGTTGGGGTGGGTA -CTGGTGTGACATCCCGCCACGTGGAGGAGGAGGGGGACCTCTGGCGTAACCTCCGTACCC -TTGGCCAGGGGGAACCCATCCAGGAGGTACCATGGGGGCACCACCGGCACCACCTTGTGG -GCCGGTGTTGAATTTCGGGTCAAGGTGCGCAGCGAACGGGTTCGGGCGATCACTTGAGTA -GTTCATCCGGCCGCCACGTCCTCTGCCCTGGAAAGGTGCGCCACCGAATGATCGGCCTGA -ATTTTGCGATCGGTTCTGGGTAGCTCTGATATCGTTTGAGTCTAGCATTGGCGTCGGTAG -CTTCACCCCGCGGAGAAGCATAGACTTGTGGACATGAGAAGACTTTGGGATTTTATACAA -TACACTTTTATCAGTTAGTATATGAACTGCAGAGAAAGGGGGGTTAAACTCACGTGAGAG -AACGATCGTCATCAACTCCCGGCATCCCATGCGCCTCCAATGGCGATACCAACGAGCTGT -GGGGAATATAGGTGTCGCTGCGCTCGACAATACCGGCCAGGCCGTCACTCACATCCTCTT -TGAGAGTATACTGTGCCGGACCAGGCTTCTTGGAGTAGAAGTTCGCAACAAGGTCTTGGT -ACAAGGGGTTTCGGTCGGAAAGTAGAAGAACGTCTTGACCATGAGAGTTCCGAATCTTTT -CCTCCTCGGTAAGAAGAGGATATACCTTGGCCATGGCGTTTAGTAGACGACTTTCGTCGA -TGAAAGGGAGCAGGATCACTCCTTGCCAGGCAAATTTCTTGCCGTTCAAGTCCAGGGGGA -AGTCCTCCGGGTAGAAATCGATGATCTCGCTATCGGGGTTTTCCATGAGAGGATGGAAAA -CCTTCGGCAGCGCGTGGTTAGACGCGGCAGGCAGGACTCCCATTAACTGTTCAAATGGTT -TGAATATCGTGCCCTTCTCGAACTGGACGTCCATATCGGCGATATCAACGAAATCAGCGG -CTGTACGTTATATGTTAGATTCTTCCTGGAAAAGGTAAAGGAAAGGACGGGGAAGGAAAA -CTCACCAAATGGCGCATAGTGTCTAGGGTAATACCAAGTCTGTGATCAAGTTAGCAGATC -AATCATATAATTAACTAGGGATGGACAACATACCCAAGAGGGACAGCCCTGGAAATAGTA -TCGCAAAACCCAACAAAGACCAACCGCATATTCGCGAGCGACCTGGTGGCGGAATTCTAA -ATCTTTAGGATCCACGCCGAACTTTTGCTCATAGTATCGATCGGCGTATCCTGGCTCCCA -CAACTTGACGGTGTCTTCTTGAACCTCATCTTGTTTGGGCTTTTCAGCAACAATGGGAAC -AGGCGGAGGGGAGTCTCTGCCCGGTGTTCCTGCCTCGGTAACATCCGGTTCGGCTACATC -CGCCTTTCGTTTTCCGAGAATCCCAGGAGAAGCCTGTTCTGGGATTTCGCTTGATATTTC -CTGGGCATCTGTGACTGAAGTTTCGCTGTTTCCAGCTTCATCCTGGCTGCCCTTCATGAG -TCTACTCTTCAGCACAGCGGCGGCGCTCTTATTCTCCATGTTAGCACGGTACACGGCCCC -GCGGTTGATTACCATGCTGTGTGTCAGCTCTCTGGTCTCCTTGTTTAGGTGGCCCCGGCC -AGGGGTGATGAGTTCCATGGGAGGGGCGGGCTCGCCGCGAGAGCGATCGCCAGCTGTCAA -GTCACGCTGGGGAGAACTCCGTCTGCGCTTTTGAGCACGCTCTTCATTACGAGCCGCATC -TTGCTCCTTCCGTCTCTTCTCGTTTGCTTGCTTTCTTTCGTCCGCTTGCCTACGACGTGC -GAAGATAGCATCCTCTTGTTTCGCCAGTCCCTGCAAGATAATTTGAGCGTTTTTTAACTC -AACGCTTCCATCTTTGGTAAGGTAGCTTCCCATAGCGGGGAGATTGTCACGCCAGATCGC -AATCAGGGTATCAATACCGTTCTCACGAATATCCAAAGAGGGCAGGTGAGGAAGGAAATC -GTTTCCCACGAAAAAGCACATGAAGACCCAGTCATCCAGTGCACGCTCCAAGTCAAATGG -GAACGGTTGCTGAGGAACGCGAAGCTCCACGGCAAGATATTCTCTTAGCACGGCGACATG -AAGCCAAATGAATGGCTTCAGAGGGGCAGCATTTTGCTTCTCGTCGAATTCACCATTCTT -CTCTTTCGGACGGCCTAGACATTCCTCAGCCTTATGGCCATTCTGTCCGCAAAGTCGACA -CCCCCGAGGCCTTGAGTCTTGGGCAAACACATCTTCGCGCAGTACTCGGAAATAAGGCTC -ATGGGTACCAAGACCCAGCATAATCAGATCAGCATCCTGTTAGATTTGAAATTAGTGACG -TGAGAAGATTGACAGGGAACACCAAAACAAACTTACCAGACCATAGATGACATGACGAGT -GTTAGGATCGTGATTAGGCGATGCTCGCTGCGATCGCACAAAATTCATGATCTTGTGCTC -TCCTTCTCCTGGGACTGTGGCATCCGAAATGATGATTTTCAACTGTTCACGTTAGTATGT -GAATTGGTGCGAAGGAAATAGATCAAAACGTACATTCTCCCAACCAGGATCAGTGTTAAG -CTTGTAGGCAATCCAATACCGCAACGAGGCAGCAAGGATATCCATGAAAGGCGTTCCTGG -GGTGATGACATTGCTGTCCCACGTTTTCTGAACAACCTCCTCCCGAATCAACTGTTCGCC -GGTCTTGGTAACATGCTGCTGTGCAAGCATCTTGTGAAGTTCCTCCTTCTTCTGATCATT -CTCCTTGGCCTCTTGCGCCGAGCGGAATCGACGTGCACGCTGCTGGTTCATCTTCGCTCT -TGGAGCGACGCCATCTATACAAAAGATATCGAGGTTAGTCGGGATCAATTTGGAGATGCG -GGAAGCGCAAGCTCTCTTGGGATATACACATACCGATAGCGATCATGAGAAGCTTTCTGG -GGCGCACCATGTTGACTACTCGATCGGTATACTTGTAGATCTCCATCATCATTTCCTGCT -CGTCCGCTGGGGGAGGTTTCCCCTCGGGATGAGTGCAGGGATGAACAATACCATTCATGT -CCAGATAGAGGTTGTCCTGCTCCTCGCCATTAGGGTTTGGACGAGTGATGTCGATTGGAA -TTTCTTCTCCGTCGACCACCTGGGCCTGCTCTTCAATAACCGGAGAGACAATCTTCGGAT -ACTTGTTGGAGAGCCATCGGAACAGCGCTGGGACACCCATGATGGTAATGCGACAGTGGA -GAGATCAATACTTGGAGGGGTTTCGGTACTGTTGTGTCGCAATCAGTTGATGGCGTTCGT -GCAGGCGCAGGCGCGGTCTCTAATTTATCGAGCTAGATGCAGTGTAAGCCAGTTTAGATG -AAATACAGACTTCTCTAAAGCATAAAGTAGAGATAAAAAGAGGTGCAACCGCAGGTATGA -TCGCGGCGCAGGTTGGGAGTAGCGAAAGAGAGCTTGCCGTTAACGAAAAGATAAATTCCG -GGCGGTCGGCGATTCTGCGGGTTGAGCCAAGATCGGCGTTTAGCCCCCTTTGGCAGTACA -ATAGGACAGCTTTCTACTGCAATCAGCGAACCAACCTCTTCACTTTTTTCCCCTTGCTCT -ATCATAGATGACTACCAATTTTTACTTCCCCGGCCGGCAATGACCATGATATTGCGAATC -AACTACGATGTTCCGAATCCTCGAGTCGCAAGCTCCGGCCAAACAAACGGCTACGGACAC -AATCGACGTATTGAGCAGTAGACTGCAGAGTGCTACATTGTTGGAAGATCGTCGTGCAGC -AATTCAAGGATTAAGAAGTTTTGCAAAACTTTACCCTGCATCAGTCGCTTCTGGTGGTCT -GCGGTCATTGATCAGCAGTCTACGCAATGACAGTGAGGATGTGGACACAATCAAAGTGGT -CCTCGAGACCCTTTTGATGCTCTTCACACCCGATGAATCTAGTGTGGGTATTTTGTACTC -TAAAGAAATTTACCCGTGTTACTCATATATATGCTTAGCCTGAAGCGTCGGACGAAATAG -CTCTATGGCTGGCAGATGAATTCACTCAGGTACGCAACTTGACCTCGGTCCCGAGATGAT -GGACTGACATCTCTTAAGCGACAAGATAACATAACTACCCTACTCAACCTTTTGGAAACC -CGCGAATTCTACTCACGCCTTTATGCACTCCAATTGATTTCGCACATTTGTAGCGCACGA -CCGGAACGAACACAAGAATGCATTTTTACAGCCCCGCTAGGTATCTCCAAACTAGTCGGT -GTGCTGACAGATGCGAGGGAACCAGTACGAAATGGTATGTCTTGTTGAACACTTGCGTCC -ATTTGATCCATGAAAGGTCGGGGCTGACAAACTATTTTCTTCGTTCCTTAGAGGCTCTCG -TCTTGCTGATTGCCCTGACCCCCGCCTCGGAAGAGCTGCAGAAGCTTGTGGCATTCGAGA -ATGCATTTGAAATACTTTTCTCTCTTATTGAAGCCGAGGGAGCATTGACTCATGGGACCG -AGGTGGTGGAAGACTGCCTCTCCTTGTTGGCTCACCTGCTGAGGTTCAATGTTTCCAACC -AATCGTTTTTTCGCGAGACAGGCTGTATGAAAAGGGTAACACAGTTGCTTCATGAATGTC -AGCAGGAGCCAGAAGACAATGACCCAGCGCCAGAGTGGACTCTAGTCCATCGAGATAAGA -ATGTCTGGGGCCTGTTGGCCATTATTCAGTTGTTCTTGATTCGCGGTGGCATGAGTACGC -CCATTAACCAGACGGCATTCTGGCAGAATGGCGTAACAGAACAAGTTCTTAGCATTGCAT -TCAGCCAAAGGTTCAGTGTCAGTGTGACCTCAAAGGTGAGATGTCGATATTACCCTTTTT -TTCTCCGTTTTCTATGACTGACAAGTCCAGGCCTTGTCAACGTGTGCCGATCTGATTCGC -GGCAATTCACCGTTACAGGAAAGATTCGGTGATATTGAAATCCTGTGGGGCTCTTATCCC -CGCGGCGACAAGGCTGCCAATGGTGATACGAATGAACCTTTACGCATCAATGTCATTGAA -GCATTCTTAAAGCTCAGCCTCGAACCTAGTCCCAACAACCTTTTGGACGCACGACTTGCT -GCCTGCGAATGTATGAAAGCGTTTTTCGCCCATCATTCTGGAATTCGCATGCACGTCCTG -AGGCGAGCGATTGAAGGGCATACCAGCGGACAGGATCAGATTCCCAATATTTTATCTGTA -CTGCTCACTACACCCGAAGCGCGCGGCAATGCCGATCCGTATCAAGTGTGGATAGCATGT -GTTTTGATGTTCCATTTGCTTTTCGATGATGCAGAGGCCAAGGCAACAGCTATGGGCGTC -ACTGAGGGCGATGCTGAGAGCGGCGAAGAAGTGGTCACCAGTGTTCAAACTGTCGTTGGA -AACCTGATTACTGGTCTCCAGCGCGGTGACGATGAAAGAATCACTATCGGTTATCTGATG -TTGCTCTGTGGTTGGCTTTTTGAGGACCCAGACGTCGTGAACGATCTTCTTGGAGAGGGT -AGCTGCATCCAAACCTTGTTGCAGGAAATCAAACACCAGCGCGCCCCTAGCAAATTGGTA -CCCGGCCTTTGCACCGTGTTGTTGGGTGTCATATATGAGTTTTCTACCAAAGATTCCCCG -ATTCCTCGCGTCACGCTCCACAAGCTACTCATCGAGCAGCTTGGCAGGGAACAATATATT -GACAAGATCACAAGACTTCGAGAATGCCCATTGGTTCGCGATTATGAGGTCCTTCCCCAA -ACCACTGGTGGTCAGCTCGAGGGTGGACTGCCGGAGGTGTTCTTTGACCGTTCATTTGTT -GAGTTCCTCAAAGATAACTTTAGCCGATTGCTTCGTGCCATTGACCGCGAACCCGGATTT -GAGATCTCTATTGTTGCCAATGGTGTCGAAAAGGGAATTTCGCGTGAGCTTGTTGATTCG -TTGCGTGCTGAAATTGAAGATCGCACCCAAACCCTCCAAAAATTGGAGTCCGATCTCGTC -AGCATCCAACGAAAGCTCGATCAAGAACAGCTAAACCACCGAAAGACAGAGGAATCCAAT -GCAATCGAGTTGTCAAAGCTTCAACAAACCATCCAATCTCTACGGCAAAGCCATGCACAG -GAGTTATCGAAACAGCTATCGAAGCTCGAAAACGAACATAAGCAATCCAAAAACGAGTTA -CTCAAGCAGCATAATGATCAGCTCCGTACCATCGATCATCAACTCAAGCAGACCTCGGCC -GAATCTGAAAGCAAGAGCAGCAAAGCAAAGGAAATGAGAGATCACCATGAGCGTGAGGTT -GCTGCTCTACAAAAAACCATTCGTGGCTTAGAATCAGAGCTTTCTCGCGTCCAGGAACAA -CATACAGGAGAAGTTGTCAATTCCAATCAGAAAATCCAGGAATTGGAAACTATCATCAAC -CAGTCCAAGGAGGCTCACAGTGCACGGGTGACTGAGCTTCAAAGGAAGATTCAGGATTTG -GAAAGCAACAATAAATCTCACAATGATCAAGTTGATAGTTCCAACAAGAAGATCCAGGAG -TTACAGAACACAATCAGCCGGTCCGAAGAGGCCCACAGTGAACAAGTCGCCGGTCTTGGA -AAGCAGATTCAGGATCTAGAAAAAACTAAGAAAGCCACTGAGGGACAAGTGGCCGACCTC -AACAAGAGAATCAAGGTCTACGAAAGCATCAAGGGAACCCATGATGGAGAAGTTGCCGAT -CTTACTCAAAGGCTGCAAGGCGTGGAATCTACGTTGGCGGCAACCAAGCAAAAGCACGAA -GCGGAAGTTAGCGACTACAAGAGCAAGCTTGACACCCTGGAGTCTGAGTTCTCTGCAGCC -AAGCAACAATATGAGACTGAATCCACTGGCCACAATAACACAGCCGGGACTCTCAAGTCG -AATCTTGCCGCAGCTAAAAAAGAGCATGAAGCTGAGATTTCAAACCTAAAGCAACACATC -GCAGCTCGGGAAGCAGACCTTGCCAAGGCCCAGAAGTCCAGCAACGATCTTGAGACGGCA -CAAGAAGGCGCCGCATCCCAGGCCTCTGCGATGGAAGCACGAGCTAAAGAGGCTGAGGAC -AAGGCCAGGGAGGCCGAGTCCCATGCTCACAATGCGGCCGAAGCCCTCAAAATCATGCAG -GCCCAGCTCGACAAGGCTAAAGTGGAAGCCAAAGAAAAGGAGGAGGCTCGCCAGTCAGCC -CAGTCTGAACTGGAAGATTTGCTGATTGTCTTTGCAGATCTTGAGGCTAAGCGAACAGAA -GACAAGGTATGCTACTAGCCCAATGAATTCTATGCTGCGTGTGTCTCTGGAGAATCTGAC -TAACAACCTTTAGAAACGGCTCAAGGATCTCGGGGAGGAAGTATCGgaggctgaggatga -cgacgatgacgaggacgaggaagatgaGTGATTGACACGTGCCATTTGTAAATAGCCAAG -CGAAAAAAGAAGGTTCTTCCAAGGTTATACGTGAAGAATTCGCCCTCACGAATTAATCCT -CTCAACACTTTCTTTCTGCTGTCTACATAGTCATGAAACTGAGAAGGGGTACAACCATTT -TCAAGGTATACAGAAGGCATATCTCGGACTACTAATAAGTCTACACAAGGCCTCATATGC -GGCCGTGTACTCTCGTTTCCGTTCTAGGTGTGATGTCACGAAATTCCACAGCGTGAGTAG -GTAGCTGCTGTGTAGTGTTCTAAGGCTGGTTCATTTCTTCTCCATATATTCGTTCTCCTT -GGCTTTTTGTATCTACATAGGGGAGTGTGAGGGGGGGCTTCTATGTGATCGTGGCGTCCA -ACACAAACTACTACCATTTTGACATATTCCCTGCCTCGCTCGTGGACATTATAACACTAT -GTGGGCAGAGGATCCTCTAATGGTTACGATCTGAATATAAAACAGAATCCGGCCGCTTAT -GTGGCCTTATGATGTCTTATAAGGCTCTAAAGTAGTCTTTTCATTGGTCTTCTCGAATGT -GCTTTTTAGATACGTATCCCTAATATATTTCCGCCTTTCCACTCTACATTCAGGCGGCGG -AAAAAAGACTGGAAAAGCTTACTTTGTGCAAGAGACTTTACAGTGTATCACGTTGGATGC -TGTGTATACAAATAACACTACTAGCAGTCTATCATTATGGCTACCTATGGAGTTCCTCTA -TACTTGACAAGAGAATCTGTAAGTACATGCAAGTGCAACCTTGCGACATTCCCAGGTAGA -TCAGGTCTGCGATCTATCATCATACGTTGAGGTCCTCATGCGAAGTGCATTTTTGAAACC -TCACGGCTTCTAACACCCAAGAAGGTCCTTTCAAAGCGCTCAGTATTTAGCCTTGTAATT -TACAACATGGAAGGAATGTGTACGTCACAAAATCTTGAAAGATCAGAAGTTTAGAACAGC -TTCTTACTGTCAGGCTCGTTTTCCCAGGGCAAACAAGCCATAGTGTCTATTTAATCCTTG -CCCACAGCGTCCTAGACTTCTCTTCCACCCTTCTACCCATGCTCATCGAAGACAATTCAA -GCTTCTTTCATCGATCCTCACAAGCCTATAATGTCTGCATCTCCTCGTCAGTACTCTTGT -GAACCCGAGATCTTGTATGACTACGTTTGGGCCCATCAGATGCAACGTTCGGCAATAAGA -ATCGCTAATGCCCTGAACTTTCCAACAATAAAGGAGGTCGACGCATTATGGTACAATGAC -GCGGAGACTCAAAGAATTATGACCATATGGTCGGACTTCGCTGGAAGACAGCGGGGATCT -TCGGCATTTGTTTTCCTCTGGTGACACACAACGATATTTGTTCATGTCATGACAGCTCCT -GTGCCCGCCAAAAATTGGGGTCGGGGGTGTAACTTGCGAGGTCTTTCCTGTAATGCTCTC -GAACTGGTGTACAACACAGCTTATGCGTGTGGTCCCGATTCTGTTTTCCCGCAGCAATGT -CGACACGACTTCCCCAAAGCTTGATGCATACGACTTTTTTTTATGCGCTATGCATTCTTC -GTTGCGAGGCCAAAGCGTGGAGACTAAGACTGGGACAGCGAAGCAGACGACCGCGGTCCG -CGTTCCCAGTGAATCGGCACCGCGAACCTAGTACTGACAGACTTCATACTCTATCTCCCG -TATCTCCCATTTCAATTGAAGAGCCAGTAGCAGCATTGCCCTCCACAGCCGCCCTTGAAC -CCAAGCATGCGTTTACGATTGCCTCGGGGGGCGGCAAGATCCATTCACCTAGAGAAGGAG -CCATCAGTGCTTACACCAGCACTTGTGAGATGCTGTATCAACTTGCTGGCAAACTATGCG -AGCAATTCAGCGGACTCAGCTCTCAGTCGAGTATGATGAAGCCGAGGAAGGAGGAAATAG -CAGCACCAAGTTCGCTGGCATCGATGGGCTGGTCGAACAATTTGCTCCGACAATTGGACA -GAGATGCATTGGGGGCGTCACTGCCAACAATCAGCCTCTTGGGCAGATTGTCGGATACTG -TGCTCGAATGGGAAACCAGAGCGCTGGGGAGACCGGCCGTGGAGAAGAGATTGACCCCGT -TGGAGACATTAATCCCGGAGTAAAGCCCTCCAGCAAACGTTCCCAGGTCACTCCAAAGAC -GCAAATCAGCCGGCATGATAATGGAGTTTCCAAAAGCAGGACTCGACCCTCGAGAGTCCA -CAACACAGGATCGAACGAACGTATCCAGGCGGCATTGAATTCGAAATTGGCGGGTACTTC -TAAATCGCCAGACCAGTTGGATGCAGTTGACTTGAGCACGGGATTTGAGTTGGACTCGGT -GATCAAAGTTGATTTGGACAAAGACTGAATCTCCAGGATATTTTGGGCCATTTGTAGGGT -TCACGTTCAAAAGAGGCGATGTCGGCCAAGTTGAAAATTGGGGTTAAGATGTGGAATCTG -GCAATCTTTCTGGAGGACATATTGCTTTCATAGCATAAATTGAAATTCATTCAAACTAGT -TTCCGGAAAATTTGCAAACATAAGTACATATCTGTATAATCTGGATCGTAGAGGATCCGA -GTCTTCAAGTGGAAATGTTCAAAGGTGTGTGCTCTGTATAGATCGACAGATTGAGTCACC -GGGCGGTCAAGAAACTGCCCGCGACACCGGAGGGGAAACAATACTCTTTTTCCTTTGTCT -TGTTGGCTCTTGACTTGGTGATTATTTTCTTTTGATACAATATTGGCTCTAGGATCAGCT -CTGTCCAGGTCGGTCGATCTCCCATAATTCCTCACTTCCCTCCAAATGACTGGAGACCAT -ATATCCCTAGATTCATTGTGGGGGAGAAACAGATAAAGTACAGTTCCATGAGTCAGGGTA -CTAACAAGTCGCAGAATGAAATGAGCAGTCAGTGAACTTTGCCATTACCGAGTCCTCATC -CCTATCCTCCTTTACAATGTCTGAACCCACCAAGATCAGTATCTTGGGCGAGGAGAGCAT -TGTCGCTGATTTTGGCCTTTGGCGCAATTTTGTCGCAAAGGACCTGATCAATGGCTTGCC -CTCAACGACCTACGTTCTCATAACAGACACCAACCTCGGATCTATTTATACCCCCACTTT -CCAAAAAACATTCGAGGCAGCTGCCGCTTCCATTAGCCCATCCCCGCGGCTCCTCATCCA -CAATGCAGCTCCCGGAGAAAGCTCCAAGTCACGGCAGACAAAAGCGGATATCGAAGATTG -GATGCTGAGCCAGAACCCACCATGCGGTAGAGACACAGTGATCATTGCGTTGGGTGGTGG -TGTAATTGGAGATTTGACTGGATTTGTTGCAGCAACCTACATGAGAGGTGTGCGCTTTGT -GCAGGTCCCAACCACGCTCCTTGCCATGGTGGACTCCTCCATCGGAGGAAAGACGGCTAT -TGATACCCCGCTTGGTAAGAACCTCATTGGTTCTATCTGGCAGCCATCGAGAATTTACAT -TGATCTTGAGTTCCTCGAAACTCTCCCTGTCCGAGAGTTCATCAATGGCATGGCCGAGGT -CATTAAGACTGCCGCCATCTCAAGTGAAGAAGAGTTCACTGCCTTGGAAGATAACGCAGA -TTCAATCTTGGCTGCTGTGCGCAGCGAGCCGAAGGCTGGCCAAGGTCGTTTTGACGGCAT -CCGGGATATCCTGAAGGCTAGAATCCTAGCCTCGGCACGTCATAAGGCCTTTGTTGTCTC -TGCAGATGAGCGTGAGGGTGGCCTCCGCAACTTGCTTAACTGGGGACATTCTATTGGACA -TGCCATTGAGGCAATTCTTACCCCTCAAATCCTCCACGGTGAATGTGTCGCAATCGGCAT -GGTGAAGGAGGCAGAGCTTGCGAGACATTTGGGCATCCTGAAGGGTGTCGCTGTCGCTCG -TGTGGTAAAGTGCATCTCGGCATACGGCTTGCCCACTTCCATGAAAGATTCTCGGGTTCG -TAAGTTGACTGCTGGTAAGCACTGCTCTGTAGACCAATTGCTGTTCAACATGGCTTTGGA -CAAGAAAAATGATGGCCCTAAGAAAAAGGTTGTGCTCTTGTCGGCAATTGGCCGGACTTA -CGAGCCAAAGGCCAGTGTCGTCTCTAATGAGGACATTGCTGTCGTCCTCGCGCCAAGTGT -CGAGGTTCACCCCGGAGTTCCCAAGTCGCTCAATGTCACTTGCGCACCCCCTGGATCAAA -GAGTATTTCCAACCGTGCCCTTGTGCTTGCTGCACTCGGCTCTGGAACCTGCCGCATCAA -GAATCTTTTGCACTCCGATGATACCGAGGTCATGTTAAATGCCTTAGAGCGACTAGGTGC -GGCAACTTTCTCTTGGGAAGAGGAAGGCGAAGTCCTTGTTGTGAACGGCAAGGGCGGAAA -AATCATTGCCAGTCCTTCTCCTCTTTACTTGGGTAATGCCGGTACTGCGTCGCGATTCCT -CACTACCGTGGCCACACTCGCTACTCCCAGCAGTGTCGACTCGAGTGTCTTGACCGGCAA -CAACCGCATGAAGCAAAGGCCTATTGGCGATCTTGTTGACGCTCTTACTGTCAATGGTGC -TGGTGTCGAGTATATGGAAAGCAAGGGATGTCTTCCTCTCAAGATTGCCGCTTCTGGAGG -CTTTGCAGGAGGGAAGATCAACCTGGCCGCCAAGGTTTCCTCCCAGTACGTCTCTTCGCT -GCTTATGTGCGCTCCGTACGCCAAGGAGCCAGTCACTCTGAAGCTGGTTGGTGGTAAGCC -CATTTCACAGCCCTATATCGATATGACCACTGCGATGATGAGATCTTTCGGCATCGATGT -GAAGAAGTCGACTACTGAGGAGCACACCTACCATATCCCTCAGGCTCACTACGTGAACCC -TGCCGAATATGTTGTGGAGAGCGATGCAAGCTCTGCGACCTATCCTTTGGCAATTGCTGC -TGTCACTGGCACCACCTGCACCGTCCCTAACATTGGTTCCAAGTCCCTTCAGGGCGATGC -TCGCTTCGCAGTGGATGTCCTAAGGCCCATGGGCTGCTCTGTTGTTCAGACGGATTCTTC -CACCACTGTTACCGGTCCTACTGATGGTGTTCTGCAGCCCCTTTCTAATGTAGACATGGA -GCCTATGACAGATGCCTTCCTTACTGCCTCTGTCCTCGCGGCTGTTGCCCGGGGCAAGGG -CTCAAACCACACAACTCGTATATACGGAATCGCAAACCAACGTGTCAAGGAATGCAACCG -AATCAAGGCTATGAAAGACGAATTGGCTAAATTCGGGGTGGTGTGTCGTGAGCATGACGA -CGGTCTTGAGATCGACGGCATCGAGCGTTCCGCCCTTCGCCAGCCCACCGGCGGTGTTTT -CTGCTACGATGATCACCGGGTCGCGTTCAGTTTCAGTGTGCTTTCTCTTATCGCTCCCCA -GCCTACCCTAATTCTTGAGAAAGAATGTGTTGGCAAGACCTGGCCCGGCTGGTGGGACGC -CTTGAAGCAAATGTTCAGCGTGAACCTCAATGGAAAAGAATTGAAGGAAGCTGAGCATGC -TGCCTCGAGCGATGAAAAGCGTAGCAGCGCCTCTGTCTTCATTATCGGTATGCGTGGCGC -TGGAAAGACCACCGCTGGAAACTGGGTTGCCAAGGCCCTTGATCGCCGGTTCATCGACCT -TGACACAGAACTTGAGACTTCTGAGGGTATCACTATCCCTGATATTATCAAGGCACGCGG -TTGGGAGGGATTCAGGGATGCCGAACTTGCTGTCCTTCGACGCGTCATTAATGATCATCC -CACCGGCTATGTCTTCGCCTGCGGCGGAGGTGTCGTCGAAATGCCGGAGGCACGGAAGCT -GCTGACCGATTACCACAAGAGCAAAGGCAACGTGCTCCTCATCATGCGCGATATCAAGCT -TGTGATGGACTTCCTCCAGATTGACAAGACTCGCCCTGCTTACGTGGAGGACATGATGGG -CGTGTGGCTACGACGTAAGCCTTGGTTCCAAGAGTGCAGTAACATCCAGTACTTCAGCCA -GCACTCTACTTCAACGGATCTGGCCCTTGCCTCAGAGGACTTTACTCGCTTCATGCGGGT -GGTCACCGGCCAGGTAGACAGCCTTAGTATCATCAAGAAGAAGAAGCACAGCTTCTTTGT -CTCATTAACTCTGCCTGATCTCCAGTCATCGGGCGATATTATCACTGAAGCTTGTGTCGG -ATCTGACGCTGTCGAACTGCGGGTGGATCTATTGAAAGACCCTTCAGTTGACGGCGATAT -TCCCTCAGTTGATTACGTCAACGAGCAAATGTCTCTCCTTCGTCGTCGTACCGCCCTGCC -GCTCATCTTCACCATCCGCACCAAGAGCCAAGGTGGCCGTTTCCCCGATGATGCTCATGA -TGCGGCAATGCAGTTGTACCGACTAGCGTTCCGTTCTGGCTGCGAATTCGTGGATCTCGA -GATCGCCTTCCCCGATGCAATGCTCCGTGCCGTTACTGAGATGAAGGGTTACTCTAAGAT -CATCGCCTCGCACCATGATCCTAAGGGCACTCTGTCGTGGTCTAACATGTCTTGGATGCA -ATTCTACAACCGTGCTCTGGAGTACGGTGATGTGATCAAGCTCGTCGGTGTTGCCAATAC -CCTTGATGATAACACTGCTCTTCGGAAGTTCAAGACATGGGCTGAAGAAGCTCACGATGT -GCCGTTGATTGCGATTAACATGGGTGACAGTGGTCAGCTCAGCCGTATCCTGAACGGCTT -CATGACCCCCGTGTCTCACCCCAGCCTTCCTTTCAAGGCGGCACCTGGTCAGCTGTCCGC -TGCAGAGATTCGCCGTGGGCTCTCCCTGATGGGTGAGATCAAGGCTAAGAAGTTTGCCAT -CTTTGGTTCCCCTGTCTCAGGCTCGCGGTCTCCCGCTCTTCACAACACCCTTTTCAGTAC -AATGGGTCTTCCCCACAAGTACTCTCGCCTGGAGACTACCAACGTCGAAGATGTCAAAGA -CTTCATTCGCGCCCCCGACTTTGGCGGTGCCTCGGTGACAATTCCCCTCAAGTTGGACAT -CATGCCCCTGCTCGACGAAGTCGCCCAGGAAGCTGAAATCATCGGCGCCGTCAACACTGT -CGTGCCCGTCCCCAACGGCGACAAGCCCCCTCGTCTGATTGGTTACAACACTGACTGGCA -AGGCATGGTCCAATGCATGCGCAACGCAGGCGTCTACGGTTCCACCGGCAACGAAAGTGC -TGTGGTCATTGGTGGTGGCGGTACTGCCCGTGCTGCTATCTTCGCCCTTCACGACATGGG -CTTCTCCCCAATCTACGTGGTGGGCCGCACCGCCAGCAAGCTCGAGGACATGGCCTCGAC -CTTCCCAACCAACTACAACATCCGTGTTGTAGACAACCAGGCCCAGCTTGATACCGTTCC -CCAGGTTGCCATCGGTACCATCCCTGCTGACCGTCCCATCGACCCTGCCATGCGCGAGAC -TCTCTCCAACATGTTCAAGCGCGCTCAGGAGATCGACGGTACTTCCGAGTCTAGCAAGCC -TCGTGTCCTCCTTGAGATGGCCTACAAGCCTACCGTCACCGCCTTGATGCAGCTTGCATC -TGATGCGGGTTGGAGCACCATCCCTGGACTGGAGGTCCTTGTTGGACAAGGTGTGCACCA -ATTTGTTCACTGGACTGGTATCACTCCCTTGTACCATGAGGCAAGGGTAAGCAAATATCG -TGATTATATCCCCCCTCTCTAGCTCTATAACTAACATCTCTTGTCTGTGTTTAGGAAGCC -GTCATGGGTACTGCTGAATAAATTATCATGAATGCATCATGAATATCTCGGGCTCTTTTC -TCTATCTTCCATATTTGACGCCATATCATGTTTCATGCTCCATGTACTAGTGGGTGAGCG -GGTTCCTAAAAAGTATTCAACAAAACAAGTCCTGACTTGCATCTTGCCTGTCGTGTTCAT -AAGATAGTTCCAACATAAAGTTCTATTTCGGTCAATTCTATATTCTACTACCTCCCGACC -TTGTCTTATTGATACATCTCGCTGTTTTGTAGACATATTGTTGTCATTCGTGGCATCTTA -GGGATAAGAGATAAAACCGTCCTCGCATTGGCTTGAAGTACCTGCTAAATTGACCATCCA -CCATTCCAAGTGCCGTTCTGCCTACAGATCTGATAGTTTTTCTTTCGTGCTCTCCGTCGT -AAGTATCCATGAGCGAGGCGGTTTTAAGTAAAGCTCTCACGTCTCGCCTCAAAAGGAACA -GATATTTTGACCTATGACTTAAGAATGTGAATGACTGCCTCCCTCCCCACAACCTCCACC -GTCTCCTCGATGCGACCATCAAACGATCACTCAGAGATACCCCCAACGCAAAAAGATGGA -TGCAGCCATCGCAGCAAGCCCGATCTTGTCTTGCCTACACCGCGATGAGCTCACCCGGGC -AGCACCTTTCCATCCATGAAAGAGCCCAGTCTCTGTCATTCAAGAGGATTGGCGCATCAA -ACCGCCGGGTCCTTCGAGGCTTCAGTCAAATTGACTAAGGATGCAGGCTGAATAGTGACA -CTTAACAAGCCCCGTTGCTAATTTTGAGCTAGAATTTCGGGTTTACCATTTGAGACACGG -ATATCAGAGGGGATCATGAAGAATGGAAATGGCCATATTATACGCAGAACTTTGAAAGGA -AACTAGGGAGAAATAGTCACGTTATTAACAGAGAATGGACATCATTGAGCTGAATCCTTT -GGAAATTGGAATGCGATTCTTTTCTACAAGGCCTATTATCTTCAAGTTCAATGTCCGTAC -TCTGTCCAAAGCTCACTTTTCACTCGGCCTTCACGTCTGGTCGGGGAGAGATGGTCAACC -ACAAGTCTCTCCAGTACCCAATACCACCTTTAATCATAAATTTGGCCTCAGGCTTATCCT -TTACAGTCTCCGGCTTATATAAACGGAAGAACTAAACAACACATTAGCACCGTACCCCAG -AATGCTGATCTCACAGTACCAAAGAGGGAACTCACCTGCAAAGGACCCTTGAACAACTCC -ATCATAGCAATATCCCTCCCCAAACAAACACGCGACCCATACCCAAACGAGAAGTTATAC -TTATTATACAACTTCGCCTTCTCAGCATCCAACCACCTCTCAGGCTTGAATTCCTCAGCA -TCATCCCCATAAAGCTTTGGATCCCGATGCACAAGGTACGGGTTACAGCTAACCTCGGTA -CCAGCCGGCGCGAAGCGACCATAGAGCTCAACCCCAGGCTCAGCGACATAACGGGGGAAG -ATGTTCGGTGCGGACGGACACAGACGCATCGTCTCCTTCACACAGCCCACATAGAAAGGC -AAGTGCTCCATGACCTCGTTGTACTGCGGCATATCGGAGATGAGGCCCTTGCGGGTTGCG -TTATCTATTTCCGCCATCATACGCTCATACGCATCCGGATGCGTAATGAGGAAGTGCATC -ATGCCCTGGAAGGCGGTCCCGGTTGTGTCTGCGCCAGCAAGCAGGACGAGTAGGACCTCT -GCACGGATGTACTCCAGATCTAGGGGCTTGCCTTCTTCTGTGCGAGCTTCGAGGAAGGTC -TGGAGCAGGTCTGTCTGGTCGATTTTCTTGCCACTTTGGATGTCCTTGATGCGCTGGTCG -ATGAGTCGGTCGCGGAAGCGCATTAGAACGCCGATGCCGGAGTCATCGGCTGGTGAGGCC -ACAAGGTACTTTTTCAGGGCGGTGGTTTTGATCCATGAAGTGAAGGGGTGGAGGCGGGCT -AGGAGGCCGAAGGCTGGGAGGCCGTCGTGGAAGCCTTGAATTAGGCCCCCAATGTCTTCG -CCTTTTTCGACGAAGCCGAAGGGCTCGCCAAATCCGATTTCGCTGATGATGTCGTAGGCC -ATGTAGCTGATCGCCTTGTTAGTCGGGGTCTGATTTTATCTGGCTCGTGTTATTACTTAC -ACTGCCCACCAGGAGAAGTCGAAGGCTTCGCCGGTCTTCACGAACTTTTCCGTGAGCTTG -TTGGTCCATGCGGTGATGCGCAGGTCAATCAATGGCTCCATCTTTTTGATGTTGCTGAAG -CTGTACTATATTGAGTGTCAGTCTGATGTCTGCAACCGAAATCGTAGATCGTCATACCGG -TCCAGCAACGTGCTTCCTGAATACGGCATGAGTCTTGTGAGATCTCATGTTGAACAGCGA -CTCGGTCTCGCCGAAGCTTCCAGTGATATAGTGTCCGGTTTTGTCTGCATTGCGGTGGTA -GATTTCCGGGAGCTTTTCGTGATCGTTGACCAGCAACAATGTCGGGGTGATGCGAACAAC -AGGGCCTGGGGAAATTGAGTTAGAAACATTCCTTTTGAAGATAAGATCAAGACTCACCAT -ATCGCTTAGCAAGCGCATAAACAGTGGGCACCTCAGTATCCAGTAGATTATGCCATGCGA -TCCAAAGACGAGTTACAGAACCCCAGAAAGGACCGGGGAATTTCGCGAGCGGACTAAAGA -AGCGATAGTAAATAACTTGCCAGGTAAACTTCAGCACCATATAAACAACAATTCCCAGGA -TAGCATTGGTCACCGTCACCAGTGACAGGACATTTTCTGCAAGACCCATGGCGAAGTTAG -AGAAACATCGAAGATGAAAGTCCCCTCTTTCTGACCAGCGAAGGGAAAGAGTTCCGGAAA -TAAAGCACCAATGGCCATTAGCATTAAAAATCACATTGTAGTAAGGGGCCAAACGGGCAT -TGGTCAAAGAGCTACTCCGTACCTCGAGCTTAGAAGACCCTCACGACAATTCTGCAAACC -TAGACCCAAGTAGGTTTAAACCGTTGAAGCTATGCTCAGCACGGGGCATTTCGTGCATCG -ATCCCTCCCCATTTGCGCTAGTCCTTGCGCTAGAGTGGTAATAAGTGCATCGTAGACAGT -TTTGTGGCTGGCCCATCGCTGTGCACATTCCTAGTAAAGATCGATCCCGTATAGCTTCCT -AAAGCAGTAAAGATGTTCAGGGTATGGAGAAAGACGTTGTCTGTCAGAGGGGGCCGTGCG -GGTAGGTGGGTGTGCCGTTGCGCCGTTGCCCACGAACATTTGAGACAATGATCAGCATAG -GAGATTACTCCGTATATGCTCGGGGGTTCCTTGCCAAGTACTCCGTACCAGACCAGGTTT -TTGTTCCTACAATAAGACATTTGCACGGGGCTAGATGGGAAGAACAAAGTACTCGGTGGA -GCTGCTGTCGTGGCATTCTCCACCATGGAATGGCCATTTCTCACTGGGCAGTTTCTGAAG -ACTAAATGAAGCACACGATGACGGGCCGGGTCCTCGTGCAAAGTTGTTAAGCCGAATGCA -TACAATATATGCCGAGCATATGCTCTTGAAACGAATGTGAAGAGGTTGGAATCGGGGCTA -ATATCATTAGTCTAGTTCACTACAACATACGATGGATCGTAAGTAGGCTGGAGCGTTAGG -ACTCTGAGGCTTGAAACAAGTTAAGACACTAACTCCAAACAGTGATGGCCATTTGTGGTC -TGGTTCTCAAGAAAGAGTTAAAGAGAAAGGCAGGTCAAAGTACGAAGGGCTTCGTAGATA -CATTATGTACAACATATGGAGTAAACCTGGGCATGGTTCAACACATAGGGGTTTGACACC -ACGCTGCCTGGATTCAAATACCGCCTTCCATTTTTAGTCGGACTCGGAATTCTGCTACTT -TATTTTTGTTCTGCCACATATTCAGGGTGGAGTGACTTAGTCTTGCTAGCAGTTTCAATA -TAAAGCGCAGGTACCTAGTTGACAAAGGCGGTCAATAGAGCGGTAGATGGAATCAGGTGT -CAAAGCACAAGAGCACCTCAAACTCCAGGCGTGCAAAGAGCCAAAGGAGCTAGTTAACCA -GTTATCTTCCAATATGAGACATACCAACTGTTACTACTCCGTATATTCGATGTATATGCT -GTGTTAATCGGTCGGTTTAGAAGCCATGATAAACTTCTCTGAGAAACTGATTGACTGCTC -GTTATGTGAAAAAAACACTCTTATCTGCAGTACGGAGTACGGAGGACAACATACTCATAA -CTCATATCACATATGAAACTTCCAAATCATTCCCAGTCTAGCCAAATTCTCTTTCGCTTA -ACAAGCGTCTCCACTAAAGACGGCTCGGAAGGCAAATGGAGTCGTGCAGATCGGAAGTGG -GCTGCATAAAGATGGCTTCGAATTTCTGCGTTAGGAGGTCCATTACTCTGTCATGGAGAG -CATGCAGAACTATTTCGGGTCTTAAAACGTGCAGATCGTTCAGCTCCCTTCCCCTCGATA -TTTTTATTTAATATTGAACATTCGTAAAATCTTCGAAATAATTTTGTGTTTTCTGGTCGC -TCTAATTACTTTTTTTTGTGTCAAGAAGAAGTGTAGGGTTATGGTATACAATATGTGTAC -GTTTGAAAAAATACTCCATATCCAAGTTGGAGCATATTAAGATCGCCATCGGGCATCTCT -ATCAGCACGGAAATAAATATAAATCAGATCTCTCCATATCATCTCCCGGGGAATCCTCCG -AGACCCACGAACTCTTTCTTTCGTTCTATCAAGTCATATTTAACTTTTTTTCCTGCTTCT -CGTTGATTATAAGTAATAGTCCAAACTTTCAAGGTCGAGTACCTTCCAATAGTAAGGAAC -TCATAATGATCGTCTCCGAGACGTCAAATATGGCGGCCCACGACTACCTAGCCATCCCTG -TTCGAGCCAATCCCACCCCGGTCTTCAGGTGGATATCTTCGGCGAGCTAAAAACGAAGAT -TTCTTGTCCTCAGAACAAAGGACCCAATGAACAGAGTCTGAGGCTTGGTCTAACAGGAAT -TGGATATATCCATTTACACCTTGGCAACTGGAGACAGCGTCTGAGCGGCAAATAATAGCG -CTAGAAAGCCCGCCCCAACGCTCTTCATGTGGTGAATGTTTTTTTTTTCTTTAGACATCG -CGAGACTGGCGGCCTTCCTTTAAACGGCGGGCGTATTCTCGCACGAGTAGATCAGCAAAG -GTCTAGAGTGGTGGAGATGCTCGGTTATGACAGTTCCGTCTGTCTCGTGTTAAGTCTATT -ATTATTAGTCTACGGAGTAAAAGGACAAAACCCTGCCAATCCTTACCATGTTTTCTTTTT -TTTGTCAACGGCATTTCAGGCGGTATAGGTACAGTGGATCTCGGAGACTACTAGAGCCAT -GCACGATGCATCTCTGATGTGCACAGTCATTTCATTCCCACTAGTTCCAGGGGCATCTGG -GAGAAAACTCGTTATAAAGTGTAGACCGGGGTACCATCAGCCATTCAGTTTGATGTTCTT -GATATTTCAAGCGAGCATTTTATCATCCGTTCTCCATGAGATCGACATGGAAACGACTGT -CTATGCCATTCATTAAGACCACTTCCTTCCTTTAAATCAATTCACGGTCTTTCTCTCTGT -TCATCGTTTCTGTCTTATCTCTCTATCGCCATCGTTATGGCTGGTAGCAATGGCTCTCAG -GGCCCTATGGTCACAGGAATCTGTATAGCATTTGCCGCCTTGACCTTCGTTGTTCTGGCT -CTTCGTCTCTTTTCACGCATTTTTGTCCTGGGTCAAATGGGTGCTGACGACTGTAAGTGA -ATTTTACTTGATCTACTTTGCCGACACGATACTGACTTCTTCCAGACCTCATCATTGGCG -CTTGTGTAAGAAAATTTCATTCGAGCTCAACAAGAGTTACCACTAATGGTTATAGGCGTT -ATCGTGGTCTTTTATCGCTGTTACGCTTGTTGGTATGTTCCAGCTTCGCTTCTTAGAAAT -CTTTGCAGAAGCTAATGTTATTTAGCGATCAAACATGGACTTGGAAAACACATTGAAGAT -GTCGACCAAGCCGGGATGGTCGACTATGCTTTTGTAAGAATGCAGGTCCCAGTCTCGTGC -AAACGATGTTGTCTAACCTCTTCCTAGGCCGTCTGGCTGAGCTCAATGTTCTATCTCGCA -ACACTTGGATTCATCAAAACTTCCGTCCTATGGTTCTATACTCGCCTCGGGGATCGCTAT -CTCACTCGCCTATCCTGGGTTATGATGGGAATTATCATAGCACAAGCAACCTCGTTTGTT -CTCGTGGCTGCATTCCAATGTCAACCCATCAGCATGGCCTGGACTGGTACTGGACCTGGA -AAATGTGTCACCATCAACATTTTCTACCTGTGCAACGCCGCACTGAACATTGTCACCGAT -CTCTTGACTTATACCTTGCCCATCAAGGTGATATTCAGTCTTCAAATGCCTCAGAAACAG -AAGTTTATCCTGGCTTTTATCCTCTGTCTTGGTCTTTTGTAAGTGTGCCCTCGTAGTTCC -ATCCGGGCAATTGCTAAAAGATAACCTTCAGCGCTTGTGTCTCCTCGATAATCCGAATCA -CCTACATCCCTGCCATGCTTACCTCAAAAGACTCCACCTACGCCATTAGCGGTGCCATGT -ACTGGTCAGTCATCGAAACCAACGTCGGTATCTTTGCTGCCTCAATTCCCTCATTCAAGG -CGATTGCTTCCCGCTTCGTGCCCCGCTTCATCGGCGAATACAGCAGTGGGAAGAAATATG -GCCCCTGGTCAAGCAACACCACTGGCCCGCGGTATCCATCGGGGTTCGCCAAGGTCACGG -ATCCCAACAGCATCACCATGGATACAATCCACTACAATGAGGATAGGACCATGGGTACCA -AAATTGGCGCGGCTAGCAACTCCAGTGAGGAGCGCATTATCCCCCAGGGCAAGATCTTCA -CTCACACGGAGATCGAAACTACCTTCGAAAGAATGGATCGTACAAGCTGTGGCTCATCAT -CGCTCGAGCGGGCTCGTCAATGATATCGGATTTATGTTAATGACTTGGCGGCTGTGTGTG -AATTTTTACATTTAATATTCGGGGCAATATATTTCGGTTTCTTATCTTGTATAACATATT -CCAAGGGGCGTGGAAGCCATAACTAGATCTAGTGCTTATGTAAATGACTGTATAGATCGA -TCTAACTACTGGTTTTCTCATGGTGTCTCTGTTCGAATCACAACCTGGCCCTGGAAGAAA -CCAACATCATCCAAGGCATGCCAGTTGCTGGAATTTTGAAGTTTGACGACACCCCAAGGC -TTCCAAAAGTTCCCCCGAATCCACGCCAACTTCCGCAACCAAAGAAACCACATGATCACC -TCTCATCCTTCAATCACACTATACAGCCATCTCATCCCGTCTCACCTCCACATGCGAAAA -GCAGCCACCCCAACACCAATTCCGGCAAAGGTACAAGCCCAGCGCATAAGGCCCCTCCGG -AGATGATGACTCACAGACCTCTCAATAGCCTCTGAATCATTGATACGAGGAAACGGCACA -CCAACAGGAGCTTCTACGCTCAGAGCCTCACACAGCGGGCCCCACCCATCCTTAACATCG -AAGAAAATCAATCTATCCTCAGGCACAACCTCTTTTAACCAAGCAACATGGTTCGCATAA -GTCTCCCTCCCATGCTGACCAGCAACACCACCATACAGCTTCTCCCAAAGCGCAGCCAGC -AAATACCCATACGGAATAAAATGTCTCAACCCGTCCAGCGGGAGCAGAACGACACGCGCA -AACCAAACACCAGTCAAACTCTGCACGTGGAGCATACTCTTCTCCCACAGGATAGGGTCA -CGGGTGGTGCAGACAACTTTGGCTTCTGGATACAGCTCCATGAGTTCGGGGACGAGCTCG -GAGCCAGGGGAGTCGGTGATTGCGACGTAGCCGGCCAATCGGTGACGCAGAAGAGTGAGC -ATCTTTTCCCGACTTATGCTATCGTCTTTTTTGAGCCAGTTTTGTAGTATTGGCATCCAG -CTTTTTATTTCTTTTGGTGGCCCTTTCGATATTTGTGTGCCGCAGTGGTACACCGGTCCG -TTCAGGAGAATTTCGAGCGCTTGGGTGAATGATGTGGTTCCTGTGCGTGGTAACCCGGCG -CCGATGACTTTTATCTGTGCACCTGGCTGTGGAACGGAGGCTTTTTGACCCATTTTGTCT -CTATATGGTGGTTTgttgggttgggtcggggttggttggggggtggtttgaggctggCTG -AACCTCTTTTTTCTCCTCTTTCTTCTTCCTCGCAGTGCTGGCTATTTTTGCGGTGATATA -TTTGGATTCATAGGCCTATGAGCATATATTCATGTTCGTGAAATAAATTAGGGGGTGAGT -GGCTTTACGTTTCTTTGATGGATATAATTTCCAGATAGGAGACTCCGGCTGTTTGGATAT -TTCCGCCCCCACTTTGGAGACCCCTCCCCCAGTATGATCAGCGACAAATTGACGATGTAT -TGGCATTTTTGGATCTCCAGGTCGAATGGTTTTGGGGAGAGATCGAATCAAGGGTACGTG -GAAAACGTAATACTATATGTATTGATGGTAATAAAGCATGTAGATCAAGTACTTCCATTG -TGTCTAAGCGTTGCTGGTAGAGCTATGACTAGTCGATGGCCTGAGGCAGCAGATATTTAT -ACCACAGCATGAGCAGGGTTTTATTCGAACGGTACGTTAGTAAATACAGGGAGTAGTGAG -ATCGAGCCATGACTCCATGCACCTGAGCCATGGGGGCCTAGCTGGGCAGAGCAACTTGGC -ATTGGCTTCGAATATGTAGGGCTATTGTCGACCAAGAATACTGAACCTCTTTATTCCAGT -CATTGCAGGGGATCACGGTCCTCTCTGTGGACTTGAACCATTGGGTTATCATAGCAATTC -ACCATTGCGCACCACAAAAACATATAAATGTATTGGCAGTACGTGATATATATTCAGCGC -ATGCTTGATATAAAAATCGAATCGTCTGGCAGAGCCATGTTTGCGGCCTGAAAGCTAGAG -AAAAGAAGAGAAAAACAAGTAGGAATCACCATGTAGAATCTATAATACCAAAGGTGAAAC -TTCCTAGAATTCAGATTCATGAAGGCCAAAAAATATGTGAGAAGTaaaaatataaaaaaa -ataaaaaaaGATAGATATAGCATGTCCTGGTTTCGATCCAGGGTCCTCCGGGTTATGAGC -CCGGCGCTCTTCCCCTGAGCTAACATGCTGATTGGTGGAAGAATTCCCCTTTTAACGCAA -TATAAACTGCTGAAAAGAACATCCAAAAGCGATAGGGATAGCAGCAGTAAAATTTTCGGG -GGCTTTCTTCAATATTGATACACAGTAAACATATCAGTACTACTTTCTGATAGTAGAATA -AGATTGGATAGAAGAAGAGTATATCTAGAACTAAGAAATATAAAGTTTGGAAACGTCAAG -CGCATAAAGAAGAAGAGTTATCGAAAGTTGTATATAGAAAAGTCGCTTGCACAGAGAATG -CTTTTTCCGTCTTTTCCTGCCGTCAACTTCAATAGAAACGACTGGAAATGGGATCTTGCA -CATAATTAAAAAAGCACATGATTAAAAAAAAAATTCCATGAAATGCGCTGGAAACCCCTC -TGGAGGGACCTCTGAAAAGACCTAGAAGGCATCCCGAGGCCGCATAAGTGGCCGTCTACC -CTTATATGATATGGGCTCCTTAAAGCAAATGATGAAGATCATATGATCTTCCAATAGGAT -GACTAGACACTTTTTCACCGCTGCCCTACTTGTGTAGACTAGCATGCTGAGATGATTGCT -TCTTCTTATCAACTTCGTTCCTTCCTAGAAAACTAATATTGAATTTGAAAAAAAGAAATG -TAGTATATAGATGTTGTATTGGATATACCGTCAACATCCCAGCATGTTAGCTCAGGGGAA -GAGCGCCGGGCTCATAACCCGGAGGACCCTGGATCGAAACCAGGACATGCTATGTCTATC -TTTTTTTATTCGTACTATCAGAAAGTAGTATGTACTGATATGTTTACGTGTATCGATTTT -GAAAAAGCGCCCTAACCGCCCTTGCATTTTTTTTTTTTTTTACAATCATTACAGACCGTG -AGGAGGGTCCGCCACCCATGTGAAAGGAATCCTATATGAAAATGTACCTTTCACTAGGCA -GACCATACAAAAGGTACTAAAAGCTATACTTCATAACTTTCTAATGCCGTTCAAGTTTAT -AATTACATATACTTCCTGGACACCCCTTTTTCTTTTTTTGGGAATAAGAATAAGTCTTGG -GGTTATCACACTAAATATCCGGATATTTTCAATTAATTCTAGGTAAGTTGGATGAAGTGG -TTGGTGGTTGAAGAGAGGACCTCAAGTAGAATTGATGAGTAATCTTTCATATGGGTGATC -CCTCTTATATGGGTGGCGGACCATCCTCATGGTCTGTAAGTTTGTAAAAAAATGTAAGGG -CCGTTTACTATTTTACACTCCGTACTGCTACCCCTAGCAGATATGTCCAGTTTAGCAGGT -TATATTGAACCAGTAGGAGAACTGTTCCACCAATCAGCATGTTAGCTCAGGGGAAGAGCG -CCGGGCTCATAACCCGGAGGACCCTGGATCGAAACCAGGACATGCTATGCCTATCTTTTT -TTTTAAAATATTTTTTGCTTTGTTTTATTAGTGTGAACATTGCAAACCCAAAATTCGTTT -TGACCCAATAATAAATTCCACTCAATTGCAGGTCGTCCGCAAAACCTTCGAAGTTTACTC -TGACAAGCTCTGGCATCGTTTGTCACAGTCAAGTCATTGTATGTCACGTTATCTGGTCTA -ACTGACCTGAATCCCTCGGTATAAGATCTCACGAGGCAACAGGGATGGCATAAAGGATAT -ACTACACTGATTTGGTACAAGAAATATCAACCTTGGGCCTGACTAAGAGTTCGTTGGCTT -TTACAAACCAACTCTGAATTATGAAGCTTCCAGTATTGTAACATCAGGTGTACTTATTTC -TTCCTGGAAGACCACCGTTGAGCTTTGTCAGAGATTTGGTAACAAACCAGCATGTCTTGA -ACCCCCGACATTCTTCTGCCGAAAAGCCTTCTAAAATATCCACGAGGGAGTTCGTTTTTC -GTATATTGCACTCTCCAGCCGCTTTCAACCAGCAGTGAAACCCCCCATAAACATCTTTAC -CGCGGATGGAAACCCCTCAGCGGCCAGCAGGATCTGTTTTCAAAGCAGAAACAGCATAAG -CGTGTGGGTGTGGAGTCGAGGTCAGATGGTATTGGAGAAAAACTTCGTCGCCATGGTGGA -GATCAAGCCCAAATATCTTTGAAACAGTGAAACTGAAACTACCAGTTCTTACTTCAGTTT -GGTGGTCTCCATGATTCCCAGAATACAAATTCACGAGAGCTTTGGTCCCTTCAAGGTGCT -GCTTAGCCATGCTGGCTTTCTTTTTGCCCCAGGTCTCCTGTGCTTTAATCATACGCTCTT -TGTGAAGATCCCACCAGTAATCTCTGATCCCTGGGTCAGGCGAGTTTCGTATATCGTTGA -ACGCATCTCTGGTTTGCTTTGCCCATGCTCGAATGTCTAGATCTTCAGACGCAAGGTATC -GCTGAAATGTAGACCTTCCGCCAATTGCGTCGGCGATAGCTTTGTTCTCTTGGGTATCTG -TGAACCCTTGCCATAATGGTAAGGCAACGTTGAGGTGATTGCCCGACCAAATAGCATTTG -TGTTTTTCGGCAACCAGGCGTCTAGGTGAATAGAGGTCAGCGTTTGGAATACCAGACACA -TCCACATCTCTGCCAGGTTGAGAATCAGCTGCGTTTGAGTGGAAGATGGGGCATGAAGGA -TAGCCAGAGTGACAAAGGCTGAGTTCATATCCACGGCAGAGCCCCACACGGCGTAGTGTG -ACCCCATATTCTTCCAACGACGTTTAGGGTCTTTGTGTTTCTGTATTCTCAGACACAGCA -CACATGCCTGTCCGACATAAAGCCAGAATCTTTCTGTGGCTTCGTCTTCTTTCCATAGAA -TGTGAAGATACACACCTGCTTCCTTCTTCGTTTCTGGGTCTTGTCCAATTTCCAAGAAAC -TAGAAGGAATCATTAACAAGTTATCTTCAAATCGTCAAGCTTTAACATGCCTGATCAGAT -CTAATTTACCTTGTTCACAAATTTCTTTGACGGCTGGGCTCAGACTGTCTTCGAGTATTC -TGGCAATATCATACTCCTTGAGCACTGAAGAGTACCTCTCGCTCATTTTGAGGCGCGCTT -TTGAAATTAGATACCCCAAGATGGAAGTCCGATCAACCAGAACAAGATTTAGGGTCTCAA -TTTGATCACTGGACACCTCACTCCAGTTTCGTCACTTGGGGGTTGCGATGCCGCCGCTGG -CTGATCTCTGTGGCGAGTAGTCATTCTTCGATTAATTTACACTTGCGCCGTGACCATTGG -TGCTTTGGATGAATCATGAGACATGGCCTCTTCAGTCAGCACAAGCAGGAAGAAAGAGAT -AAGAAAGATATGACACATACTTCGAGCTGATTGTCAATGTAATCCAGGTCATTGATCGGC -ATAAATTTTGTAGGGTCTGAACCTCGTAGGTACCCTCCACTTCGTCCCTCATGATAATTT -CCTCTGCAGTGAATGGACGTATGACCAAACCTCCCGCATCGATTGCAAAATCTACAGAGA -CATTCATCGTATCCCTTCATCAGACATGTGCAGTCTCTGTATTCCGCAGACATTTCAAGG -TCTCAAAGATTGTAGAGAAATAGATAAATCAGACCACTAGAATGAGCTTGTGCAGGCCAA -GCAAATACAGGTAAAGCAAAGAGAGATCTGGGGCGTCGACAAAGGGAAGAATAGAGGACG -ATATGAGTCTGTAGGATCATACAAGATCTAGTCATCTTTGAGGATGACCTATCAGTATCG -AAATGATTAGAACGGCAGTCAAAGCTTTGGAATACTCGCAGAGGTTAGACCGTGGCGGAT -ATCAAGAACGTCAACTTGGCCATAAAAACGTGATCGATACACACAACAAGATCAATGTGG -ATTAAATGGCCGGGAAGAGCCAGGCATTCAGGGAAACATAGTGACAATACCAGATGCTCA -GATGACAAAGAAGATTATGGCGATATTATCAAAACAGTTACTCAAATAGGCAGATATTCA -GTTTCAAGTCTAAATGAGGTTCTGCAGAGTCAGAGTTGCGGTGATGCCGTTGGAAGATAA -AAAAAGACCACACTGAAATGTTTGACGCATGAGCGAGCAAGATTCCTCTGAGACTATAGA -ATATACTACTGGTGAGCAAAAGGCCCATGACCTGTGTAGGGGACTTGACCAGATAGCAGA -TATCAGAAGTTATACATAGAAAATTCTCTTGCACAAAGTATGCCTTTTCCGCCTTTTTCT -GCTGTCTACCTATAGGATTGGAAGGAAGGGGGGGAACCAAATGATCTCAATGATGTCCAA -CAAGAATATCTGAGAAGCTCACTACCGAGACCGCTGAAAAGACTACTTAAAGCCCCTATA -AGTCGTCGTAAGGCCACATAAGCGACTTACTCTTATAATGTTGATCTGATAAAGATAGTA -CGTATCTATGGATATCCCCGCGACCAAATATTATATTTGCTTCTATAAACATAATTTGTA -GGATACCAAATCTCATGCAAAGCTTGTAAAAGCTTACTGAAAAAGAGTCTCTTACACAAA -GTTCAATATATGCTTTTCCCGCTTTTTTCTGTGGCCTACCAACAGTACTGGAAAGGGAGA -AACTATCTTGAAATTGTTTGAGAAGAACATCCGAGAAGCCAACAGAAAAGACTACTTAAG -AGCCTGATAAGTCACCACAAGACCACATAAGCGGCCGTCTGCTCCTGGCGTAGTAGGTTG -ATGTTAACGTGCAAGCCCTAATGCGAAATGATATATGAGGGAAAATGTCCAAAGTACTAC -CTGTAAGTGTAAATTGCTTTTCAAAGTCTGGCATATACCTGACACTCGCATTTTGCACGC -ATGGCTATTGCAACGCAGATACCTCGTCTCTCGAGCATACATGTTTTCCGCCCACATACA -AGATACAGAAGATCGACGGGCTCTTTCTTCATATCGATCTCATGTGAGGCTACACTACAT -GAGGTTGCATAGTCTCATAGTTGTCTGTAGCCGGCCAGAACATGTGTATGTGTACAATGA -TATGGGCGTCTGTCTTCGATCGGCATCTGCAGACGTTGAGTACACTTGCATAAGATGGAA -TGCGGGGCGTGCACTCGAATTCAGGGAGGATTGGCAGGTTGAGAGATGAACTTGGCATTA -GGAGTATTCTGTGGCTGGTGTGCCATGGTTAATTATTTTTCCAATATTGCTTGTACGGAG -CCTTGAGTGTCCACTCCGTACTGGTATGGACCATACCGCATCACGTGCTATATATCATAT -GACCTCTATGCGACCACGCAAGGGATATCATCACAACTCGAAATAGTGAATCGAACAGAA -GCCACTTGTCACACCCGTACGGTACCAGGACGATCATAAGTTAGACAAAAGAGAAGTTTC -CACATCCACAATTGTCTCCCCACCTTCCGACTTCCCCCCCACTCGGGTATTAAGGATCCA -CAGTATTTAAGCCACCGACCCACCAATGTATATCAGCGTCTCTATCATTGATTGTACAAC -GTATGTCAGCAATATGTAGATATACGGTGGCTTATTGGATATGGTATATGATAAGCATAA -GATAAACCCCAGCTGCAGACAAGGATGAATTTTGAAAATATTTGGCTTGAGCAGGATCTG -ATGACGACAATCAAAGACACGAGATACGTACCCGCTCAAGGATAGGAAACTGGGACTGTC -CTGTACAAATCATATACCCAAATAAGTAATTCCATTACTTGGACATGGACATCGGCATGG -ATATGATCAAAACCCTGAAATCCAAGTTAGGACTCTGCTACATTGTATATTACGTCGACA -CCCCAAGACCAACCCAGATTCCAAGTACATTCCATCCAAACAATGCCGGTACCGGGTACC -CGGTACCAGAATACTGGTAGCTAAATTGCCTATCAGGCCAGAATTTGGGTGAGGTCGGGT -TTGTTCCAAGGCTTTCTCGGACGGGACTACGCAATCGTGGTATCATACCTATATTGGAAC -ACTTGTGTACAAGATATTCATAGGTAGATGCCACTGTTGGGGCTACGGAGTACAATGTAC -GATCAGCGCGAATCGGGATTCGTCCGTGGGATGTCAATTAATTTATTGCATTCTGGAATA -TACAATAACCTTGCGCGTGGTGCATACTTGGGTTATTTTTGTCAACTTTAATTAGTCTAC -AACAATATACAAGATACAATGGTACAAGATGTAATTCCGTGATTGCTATACATAGTGACT -CTATCTAGAGATCTGCGTTGAGGAGCTAACATGGCTGACCTATGTGGGTAAACATCCATG -CGACTGTGGATGCGAGGGATTCGACCGCATCCCTCGACCCTTGATTGTTTCAAGAGACAG -GAGAGGGCATGTTTCACCGTTTCCAGAGATATAGTAAAGTGTGGAGAGAATGATTATTTC -CAAGTAGATTCATGTCGTATCCAAAGCTAATATACATTTTTTTTTGGAGGAATGTACTAT -AGTACAAGAGGACACACTGTGCGGAGTACTGGAGTCAAAAATGCAGCAACACCATGTACA -ACAGAAATGTACAACACCAAAACTGTTTGTTTCCCAGAGCCAAAGCCATAGACAGAAAAA -CAATAAAAAAATTATAGAGGAAAACACACCGTCCAATGCTAAAAGGGGGTTCGCATGTAA -ATCAAATCATCAACGTAAAATCGTACACATAATAAAATCGGAGTCCGAATAGTGATACAG -AGAGTTCATGAGGTTTAAGGAGCTAGATCGTCATGATGACGAAGCCCCGGATTTTTGGTG -GAGCCCAAGTGGATCCACCTATGACGTTCGGCATGCCGATCCGGTGGGCTAATGACGTAA -CTGGCGCTCTCAGTCTCTCCTCCTGCGCTGCTTGCCTGGCCAGTCGATTTCGGGATGTAC -CACTCGCATGTCTGTAATTATTGTAATTAGCGTATGATCAACTTAAGGGGTAAAAGGGGG -AAAAAGATACTTACGTCTTGTCAGAGCGTCGTTTCGGCTGAAGGTTTTGTCCTCGTTACA -GATATGACAGCGCACCTTCTGTTTCCCGGCGGTGTGGATAGTATGCTCGTGTCGCGTAAG -ATCGTAAGGCCGGGAGAAAATCGAATTGCATGCTTTGCCCGTAGATGGATTGATGCGCTC -GCACTTGTGCGGGCCAGCCTGGGAGTTGCGGAGCGAAGTGTCCCGACTGACAAGGTGGCC -GCCCGGTGTTGTTTGCCGGTGAGCCTCGCGCTTGTGTCGCTGAAGTCTGGAGGGGGTGTC -GAAACGGTACGAGCAGTTGTGATAGGTACAAGTGTAAGTACCCGCATCCGAAGATGTGTC -GGCGGGACGAGATATACCCTCTGTGATTGGCATGTTCATCTGGCTTGGAGACGAGCTCTC -GGTAGATTCCACTGCTGGAAGTGAAGGGAATTCGGTACTCTGCTGCAGGAGATTGCTTTG -AGGGCGATGCATCTGAGGTTCAGAAAATGCGAACGGTGGCTGCGCCATATTGGCTGCCGT -GTATTGAGGGAATGCTTCCATCGGTCGCATTGAGCCACTGTCGCGTCGCAAGCCTAGTGC -ATCACCCAAATTGAAATCACCCTGACCATGAGGGAACATAGGCATGGTATCATCGCCATC -TTGGAAATCCAGGACTGCGTCCTTAGGTGACATCGTCTTAATATTTCCAGTGTTGCTCTG -GTCCATATTCATTCCGTTCATACCGCTCATACCGTTCTGTCCCATGGGAATGCTGGTCGC -CATCTGGGGCTGCTGAATAGTATTGTTGTTTATATCACCGGCGAATGGGGAAGAGGTACG -GAAGGGCGAGCGATCCCGTCTGCCGGAAGGCGATTGCGAACGAGCAGACAGGTGTCCCTG -GTTGGCAGCTTGGAGGCGATCGGCAATAAAATTGCGTGGGTTGAGATTGTTCTGTCCCAT -CGGCTTGGACACTTGGGGTGCTGCCATGATGGCAGGGTTATATAATTCGTCTTGGTATAT -ATCGGAAATGGTTCTGTTCAGCGGAGGGACTCCAATTGACATGGCATTATTGGCACCGTT -GTAGTCTGGAGGTGCATCATAGCGTAAGTATTGGTCCATTGCCCAATCAATATCGGGAAT -TCCATTCTCACCATTGCTCATCGACTTGGATGCGTCGTCAAATTCGTCAAGGGTAGTCTG -CGGGGTCACCGGCGAGTTGTGGCTCATTGTTGACACCGATGGGGCCAGTGAATAGTCGTT -CTGCATTTGCTGCGGAGGCGACTGCTTCTGCTGGTCCATAATCGCTTTTCTCATAGCAGA -CTCAGCCTCGGCGCTGTGACCATCTTGGGACGAGGGATCGAAATGTTGAAAGGGAGTCGC -AAGGAATGAGTTCTGGAGAGGGGTTTGTACCGGACTGGGGAATGGCTTATTGGCGCTGTT -GGAACGGCGGTGGTAGTTGTTGGCTGGCACCCCTGAAGGGAGTGAGCGTGAGGATGCCGA -AGGCGAAGATCTCTTTTGCACACCTGGGTATGGGATGCGCGCAGGGCTCAACAACTGCAG -GTTGCCGTCCCAACCCTGATTACCGGCGAGAGGCTGCTGTACCAACATGGGCGAATAAAA -TTGCTGAGGATATTGAGGCAATTGGTGAGAAGGCGAATTAGAACCCTGGATGTCGTACGA -CGGGTTATATGAAGACGCTGGGGTCATCGAAGGAGCCCCGAAAGATTGTGAGTCAGACGT -CGGGTAACAAGTCGAGAAGGTTGGCGAAGAGAAAGTAGGCGAGGAAGGTGTGGAAGATGG -CGGGGAGAGCTGTACACTGGGGTGACACTGAAATGGTGATTGCAAGCGAGATTGAGCGTA -GTGAAAGGTGTTCTCGGGATAAAAATCAGGTGGATACTGGTCGTCTTGTATGATGATGTC -CTCTGAGTTGTATGGATCGGTGATGAGACCCTTACGTGGGTACATAAGCATTATGTTATC -ACGGTTTGGGACGTCAGAGAGAAAGTGGGTAGTGTTCGCCACCGAGGAAAGCGGGCTTTT -ATAGAACCCGACGGCACAAGATCCGAGTAAGATTTCCTGGTCAATTGTGGCCGTGCTCTT -GGGGAAAAGTGAGATTGACACTTTGGTTCAAAGCTTCAGGTAGCGAAGAGTCTGGAAACG -TCGGGAGGCTGATCAAATGGAAGTATCAGAGGGCTGGAACTTTAATAGAGTTGACTATGG -TTGATGTGAGTTTGGTTCTGGAGAATGACTCGACCGGAGGAAAGCAAATCTTACTCAATA -TTGATGATCTCAAGAGTCAGAGTTGTTGTGGGTATGAGTAATGAAGTTGAAAGGAGGGAG -AAGTGAGTTAATAAAGAAGAGTCCTGGAGTTTTATGTACTTGGATGACCTCACTTAGAGG -TCGGTCTTCCCCGAACAAGGATCTTACCGTATTATCCATATTTAGCGGTATTTATCAATT -TAATTATTTTTAATTATTGTCGATTCCAAGGCTAGTCCAAATCGGGATTAAATTCGTATC -ACAAAAATACGCATTTGTTTTTACCGAGTCCCACTGATCTGATCTGGGGATCTTCCTCGA -CTCTTTCCTTTGTGAAGTTGTGGCTTACAGGACACAAAAGCCACAAGGATCCACCATACC -GATATTCTATGTGGGCTTTTGGGCGTCAATGGTAATTGTTGATCGAAATAAATAGGTGTT -TTTTTTTTGGCAAGGGAAATTCGAGACAAATAGTTCAGCCAAGGTTCACAGTACATCGCT -TAGAGAGAAAAGGAAAGAAAAAATTTCTGTAGTCTAGGACTAGCTCCGATTTGCACTCGG -GTGACCAGGGCCAAAATTAGGAAAATGGGGGTGGAGATACCGAGGAGAAATCCTTAGGGC -TGAGCCACACCTTAGTTTGTTTTACCTATTTTGGTGAGTAAATTTTGGTTGGGAATTGAC -CCTTCGGCCCAATCTACTATTTCCGGTGAGTCAGAACTCATTAGTATACCCAAGGATATG -GAATTGGAATCCGGAGAGTTGTTCAGCCGAACATTATCATTCGTCTCCAACATACGTAAC -AGACTTCCGAGGTTTCTTTATTGGCACTAGCCACATCGAGAGCCATGTGGCTGTGATGGC -CAATGGTTAGGATCCGCCGTCCTAACGGTGTAGCATTCCTGCACAACACCTGGGGAAACC -TTAGCACATTCGTCGTTCTTTGTACTCGGACCCAATTATTTCACTTGGATACCCAAGATT -GTGGCTGTGAAGCACTTGTCAATCAACTGTCTCCCGATTAGCCTAAGATGTCTCCCCACT -AAGCGCCAATCGGCGGGTTCCGCTCTCTCTCGACAGGCGCGATAGTCAGCTGACCATACA -CACAAATGGCATGATAAGCTGAAGTTGGACACGGGTCCATGGATCTTGTGTGTATATGGG -AACTTGATGACCAGCTGACCCCGGATCTTGAGTCACCAGTGATTTTTTCTTCCATCTTAA -GCGATGTGCTATCGTGTATACTTTCCATGTCTTACACTTGTTCCCCATGTGTAAGCGCTT -GTCTCTACGGGACTTAGCTCTGCCGTAGGCTCGGCGGGGAATGCCCACTCTGGGTTGATC -ATTGTTGGGCTACACAAGAATTCCATGCGCATCTAAGATCATGGTCACATGGTCTTTTGT -ACTGTGGGCTGATTGCCTTAATTTTCTGGTATGCATTGTTCGATGATTCGATGACTCGGT -CACATGTGAATCAGATGTTTCTCCCTTTTGACTCAGAAAGTGAGAATAAAATTCTGTATT -AAATTACAATATGGCTTCTGATTTTAGTATTTTCAATTTTTCATTTCCTGGCGCTCGGAT -CGCTATAGATTCATTTGTTAGCGGCTTTGTAACACGCGGAAGACCTTCGGTAGCGTGGTA -AAAAAAAATGCCATGTACTGCGGGTTACATTGAAATTTTCTGCTTTAGCGTTTGTCATCC -TTCAACCGTGCATATGCATATGGCTTAGACACTAAATGCCAGAGTATCAGATCGCAGATC -AAGGGCCTCCTAACCGAGCATCGGCAAACCCCGACTTTTTCAGCCAAGTTATTGGGCATG -AAAACAGGAGGCAGCAACAATACGATAGTCGAAGTGCCATGTGACCTCCCAAGGGGTCTC -CCTTTCAAATCCACTAGCGCAAATGTAAATCCGACCGGCATGTTCTCGGGGTTAGGTGAT -TTTCTCGGTCTGGACACGCCGAAACGGGTCCGGGGTCTGCCGTGTCTACCGAACAGTCTG -GCCCTTCACCTGATCTGCCTTCTTCCGTGGACAGACCCGGGAATGATCTTGGCGAATTCC -TCGAGGCTATACCAAAACGTGGCCTACAGAGTCCGGGGAAGTGGCAAACCAGCCTCAGCT -GATCCAAATATCTAGATCCAATGGCCCACTCAGATTATAGTGGGAATGTGTAGATCTAAG -GTTTTTTTCAGTCGCTATCAGAGGACGGATTCGCGTACCGAAGGCATCCTTTTTTAGAAT -ACACAGAACCTCCTTCTCAATTGGGCAGGCATCGAATCCTTTCATTCATTACCAAGAGAG -GGCCCCTGCATGGTGACTGAGAAGATCACGGAGAAAAAATTTTCACCGGCATGCTATCCC -GACCAAAAAAAAATATCTGTCAGCACTCTAAACGGGTTTCTCGGCGACAGGTCAAGATTG -TCATGCAGGTTCATGATTAGTAGCAGTCTAATGCAGCAGTGAAAAGTTTTTCAATGTAAA -TGATTGGCCATAGGAGCTAGGGGTGCACAGCCACATCTCGGTACTGCCGTTACGCTGTCC -AAGTTGCCGGTTTAACCTGTTACCATACGGCACTTGCTGCAGCCACGGTCTGTAGGTAAT -CGTATTCGGTATTTCTGGTATGTTCCAGAAGGAATTTTGGTGGCTTGTGCATGCCACTAA -TGCTATATTCAGTAAAGCGTGGTTGTTTTCGTTCTTCTGGAAGCTTTCTATTTTCGTCCC -GGAATGTCGGCGTCCGGGTATCGCCGACATTGGAGGACAATGCTCCTGTTTTCGCATTCG -TATTCAAGTCTCTCTTCGCTTTTTTTTCGCTTTTTTTCGCTATTTTTTCGCATTGCATAT -CTGATTGTGGCTGATTATTGATCATCCCGATATCATTCAGCGTCACCACAAATGACCAAT -TCAATACTAACTAACTATTCTGACTCACGCCTGGGCAACTTCAAATGTGCTGTCACGTGT -GTGCTGTAGATCATTTGGGCTAGCCGCATATAGGTACTATACGGAGCATGCTTCGTCATC -TCAAGCCAGAAATTTTCAGAAAAGAGGCTTCAAGGAGTTTTGTTTGGAAAAAGGTTCGTG -GCTATGCAACTTATGACAACAATGGCTGGCAGCTGCCTTACTCGGTTGCCTATTTGGATG -TAGCTATACCACTTCATAAGCCGCCCACGATCTATCTCTTCATTCCGGAACGCCACATGT -CTCTGATAGTACTATAGTAGTTATACCTGCGAATATGCCTATTGGTATGATACCTGTACT -GTACAAAATCGGAAGCAGGGGCCCATAGATGACCAAGCACACTGTGACTCAGCCTCCAGT -TCAGTCACTTGTGGTGGTTCTTTTAGCGGTCCCTTAGGGTATCCTAGCTAGTTGGTCGTT -GGGATAGACGGGAACTCAACCGCTACCTACGGAGTATTTGACGTAATATGGCGGTACGCA -CCCCGTTTAGGTGATAAGAGTCGCTCTCGCACCGTATTTATCGGTGACACAAGATAATAT -AATTACATTGAGATTATCTTAGCACAGACAATAGAATTATGCAGCAGTGTCAATCTAGAC -CCCATAAATAGCAATAGACCCGTGAGCTTGGTCTGCAGGACGCTGTCCCAAGAACAAGGC -AGCAAGCTGCAAGGCATCGGTATTTCTAAACAGGGTAGAGAAGCTAGATACAGCCTACAG -GACATTGTGGCGTCTACTCGCAACCAAGTTTGATCCCTAGAGCCGATAGCTTGGGATATA -GTATCCCGATAGCCAAGTTTCTGGGGCTACTATGTCGGTATACCGCATACGTTGTTATAT -TTCGAGATAATAAAATTGACTAAAAGGTGTCTTTCTGTCTATCTTTAAGTTAAGCATATT -GCCCGTCAAGTGTGCTGTAGAACTTATCATATCCAATCAAATCAAATGTATATTTCATAA -TACCTTACTAAATTCCCACATCATGTACCCAATTATTTAATTAAAGGCCCTACTATCGTG -AACTGCGACTCTGGTGTACAACTTGATGAGGGTGTAGCTGCATAGTATGCTTTGGACCTA -AATGATTTTGGTCTATATAGTCAAAGAGGATCCTCGTCTTCAACATCCCAGCATGTTAGC -TCAGGGGAAGAGCGCCGGGCTCATAACCCGGAGGACCCTGGATCGAAACCAGGACATGCT -ATGtctatcttttttttcttcttttctcctcatgattttcttcttctCAATCTTGCACGC -ATTCAGTATTGGCGCGTGCTGTGTTTATTTTTGGTGCAATGGGTTTGCATAGCGTAATTT -TTAGGCTACATCAGCAAAGGGCACTGCGACGCCAGGAACTTCGTTTGGCACCACTGTTTA -CGACTGTGGCTGTTCCCTGGTGACGTGGTTCGGGACTCGTCATGGCCTATCAACATTCGT -GATTAACAACGTGGCTTGCATGTCCGTATTGAGGACTTGTGGATTCGATCCACCTGGGGG -TAAAGCGAGAGCACGTAGACATGATCATATAGCCCATCGATGTCTCTTCTGTCATATAAA -TAGAACATTTGGCTGGAGATGTGTCAGAGGAGACCTCAAAACGCATGATGCAATATGAGT -AAGGTATTGAACAAGCCGGGACGATAGAATTTGACAACTTACTATATTTGCGTAGCCAGT -TGTGTTGAGATGAAATGTTGTCAGATATATTAGGCATTCATATGCTATATACAGGATACG -GATCAAGTATGGCAAACAATAGGACATAGGACCACGTTTTGGCAGGATAGATAGGATCCC -AGTGTCCTATTCTTTCTTTCTTTCTTTCTCAGTCTAGTTTTCATAGAGATATCTGAATCA -ATATGATTTATTGACTCATCATCTTTATCATCAAATGTTGTGGACTTGAATTCTGTCAAG -TCCAGTCCGATATCTGACATCAGCTGTGGTGAAGTGACCAGCTTCTGTAGGTTCACATAG -ATAGTTCTCTTGATTGAATCCACTGCCGTCACTTGTCGAGTTTATCGCTTCCCGTGTAGA -CTAGGACTCCCCTAGGATCAGCTTGAAAGATATGAGCCAGGCCACCCTTGCTTGGCACTG -ACTGTGTTTTCCAACGACAAGGAGACAGGCTCATGGCTTGCTGGGCGTTGGGGTCAATGA -CGGTGAGGCCATTCAGGTATTTCAAAAAATCATATAAGTCTCGATCCCGTCCAATATCAC -ATCATCTGTAAGCCCTTTCACATGAAAGGCAAGGATGCGTAGTCCATTTCGACAGAAAAC -GTGTGTCATTTTTCACCACGGGCCCAGCTGTTGGTTGTGTTTGTGGCTTTCTCGGGCCTA -CAAACTTCAGGACCCAGCTTTGTTGCAGAACTTGTTGACTGGGTCAGCGCCGGATAGTCG -AGATACAGGCTGGTGTCCACGGGCAGGCATTATGTCGAATCAATCGTATACACTGCAGCA -CGGCATGCTCCTGTCTTGCACGCTTGAGGGCACTGCTGATCCCATCGTAATAATTCTCGG -TATTAAGCAACCTCGTTCGGGTGGTTGTCCAGTCACTTGCCCTGTTGACCACGAAGGCCA -TGTCCAAATGTGTTCCCGGACGTTTCTCTAGCATCTTGATCCAACGTCCATTAACCATTC -ATCGTGTCGCTTTCGGTCGGGTTTCTTACCAAACCGCGTATTCGTACGCAACAATCAGGG -CTCGTTTCTGGGGGGGCATGAGATTGTCAGTGTAGAGTTTTGGCAATAACCTGCCAGATA -AGGTGAAAACTATCTCGTGCGTTGGGGGCCGGCGGGAGGGGGGGAATGAATCCTGTCATT -TCTGGTAGGGAGGGGCGATTCGGATGTTAGTAGGGACCGTAGGGTTATCTCAGGTTGGTG -GGGGCGATATTGAGGTCTGTCTGGATTCGGATCTGGGAGCCTTCAGTGATCATTGCGCTG -CGTATTGATCAATGTAAGCAAGGTTGGCCTTCAATGATCTCACTGGTCGAGTTCGTGTTG -CCGAAGTGATACCGCCAGTGCTCTATCCCAGTCTCGCTCTTGATACTATCCTTTTCTCTT -TGTTGTCTGCTTGGCGTCTTCGATGTTATTGGTGGTGTGGACAGGTGGATTTGATTAGAA -gtcgtggtggtggtggtggtgatggAGCATCCTGGAGTATCAAGACTGACAGATTAGCTC -CCACATAGTAGGTGCTCTGTTCGTCCGATATCCTCTTGTAGATTTCGGTGTGTGAGATTG -ACGATTCATCATCTGGTGCGTACTCTGGGTGTCTGTGTCTACTGGTTGCATCATGTCTCT -TGCTTTATAATTAGGGGGTGAAAAGGCCAATATCTTGTTTCATCTCTAGCTAGTGTCATT -TCAACAAACCGGTGTCTTTCTTCGAATCTAGAAAAAGAGCTTCGCCAATTTTTATTACAT -GTTCCATGCACCATAGCAATTCCAAAGGGATACAGTCTTCTCCCTCATTTTCACGGCCAA -ATCAGAGGTAAATTGCGGGAATGATTTCACCAAGCTGTTTGGGAATGCCGATGATAACTT -GTCTTGTCCAGGCTCTGCTTTGGGCATTCTCAACTCCTTGGTTATTTTAATGGCAGTGTC -TCGCAGAACCTGGTCACTTTCTGGTGTTGAGCTCTTTATCTGAGCGATGGCATGAGGAAA -TATCTTCGAGTTGAGTCGCTTGCCTAGCTCTTGCTCGGCTTTATTCTTTGAGGGAACCTT -CAGTCCTTCGTTGAACATTCGGTCTGCGAGGGAGTACATCAATATGTGTACCGATATGGG -AACTACTGAGGTCATCAGGCCTCCAGCTGATTTACTAAGAGCTTCTGTGCTGTCCGTTCT -GGTGTCCGTGTCCGTGTCCAACGTCCAATTGTCTAGGCCGATAACGACGTCTGTTTCTGG -ATAGTGAATTCGTACCTCGTAGTCCAGCTGATAGAGATACTCGACCATGTCTTTGACTAA -AATCAGTTCTTCCTGTAGTGCAACTCTGTTTGTGGATGACCCCTTGGTTAGATCGGTCAA -ATTGCCACACTGGTGTATTGTTCTGTCCCGTCAATGTCTCACCTGCAACCCACTATCACA -AGCTTTGGCAAAGAAGGCTAACCGTGAACAAACTATGCACTTGTGCACAGGGTACTCATC -GTCTCCGCATGTGATGGTCATGTCGCTGTGTCTCGGATGCAAGTACATACTATTGTATCG -TTGGTGGGTTTTCCAGTCTCTACGGATGTGCCTGGTACCTTGCTAGGGCGCGAAGGACTT -CATTAGCAGGCTCCTTATCCTGCCCTTTCTCCTCCTTCCCTTTGACTGTAGATGTCACCT -GTTGAATTCTGACTGTGGATAGCTTCTTCTTCGTTTTCATATCGACAGGATGGTGGTGAC -CCGAGCGGGTTCCGACTCTCTTGTGGGCTTTGGTTGTTGATAAGCTAGAGTGCTAGAGTT -GGGGGTGGTGAGGAGAGCAAAAACTGGAGTGTGTGTTTTCATGCACCCTTTCATGGCAGC -AATATTTGCTTTGGCTGCTTGGCCTGCACTTCTGCCCTGCATGTCACGCCTTCCTTCCAA -TCTAGCAAGCCCAGTTGATTACCGTTCTGCTTTTATTCGTCGCTCTCTGTGCCCTTGTGT -GTTCTTATATGGGGCTCAGACGGGGAGCTGACGATTTGATACTTCTCGGTGCCCCAGCTT -CCCGGGTATAGTTGGACCCTCATTTCGGGGTGTTCGATGCTGTCTTCAGATGATGATATG -GGCTTCTACTGGGCTTAGGGAACTTATCGGTGGGCTTTGAAATTAACATTGATCATTGAA -CCTACTTCCAGCGTGTTCTAAAATTATAAATTCGCAACTTGATTGTGGGTATCGGGATCA -ATCACGCTCTCTAGTGCCAAGCAAGGTTGAACTGCTGTTTTGGGGGGTTCAACCTTATTG -CCAAGCAAAATTGAACTGCCGTTTTAGGGGGTTCAACTTCAACTTGTATCTCTTTTGCTT -ACCACACCGCAATGCTTGTCTACGATGTTTCGATATGGGCATTTCGTGAGCTGCCATGGT -TCAGTTCTCTTTATGTGCAGCTTTCATGGGGGTCACGCCTTTGGTGTGTTAATTGATAGC -GAGTGCAGTTCAATGTTGGGATTAATATATATCTGGACTATAGATCACTACAACTGCATC -TACAATCTCGAGGAGTCTTCAACCCCAAATTACACCAAGCCATTATGGTATGCAAGTATT -TATTAAGACTAACGGAGTATACAGATTGAATGTCAACCACAAGAAATAAATGAATAGAGA -CGGTAGAGACATGTGTACATCGTATCCCGAGGTTCCATGCACACCATCAAATTACCAAAT -CCCAGCGATAGCATGACATGGGGTAAGATAGAGAATTATAGATGGAAAAAAGAGACATGG -TGGGCCAGGAAGACAGCGATATAATCAAACTATACAATACATGGCGGTTAAGGTTGAAAT -TGGAAATTGGACAAGAGGCCTCAGGCAGTCAACACAACGCGTGGCAAATCCAAATCGCGA -AGACCTCAACCAGACACACCGAGCGCCAACCAGAGAACAAGATGTCAAATACAAGGCGAA -CAACAAAGCAAAACACACCGAGGGTATACTACGCAAGCCCGAGAGTTAAATTATTTCAAG -TTTTGATTGTATTCGTATCGGATCGCACTTGTGTGCTTGATCCCACCATGTCCATGGTGC -CAGCCGTTAAGTGAAGAGGCTGCTATGATACAGAAAAGCTCAGAAGCGAACACTATGGAC -GCGCAATAGTGGTAATCGAGTGTTATGAAAAGCAAAACCCAGAGACCCATGATGATTGTA -GTAAAATAAATCAGAGAACACCCCAACGTCCCATCGTACCGTGAACCAGCCCGGAAAAGA -AAAGCATGAATAATTTAGCCACCGAAACCGTAGAGGGTACCTGAAAAAGAAGGTTAGAAA -CGCGTTTTGGGATGGAAAGATACATGGAAACTTACGGCCCTGGCGCTTGAGAGCGTAGAC -AACGTCAAGAGAAGTGACAGTCTTGCGCTTGGCGTGCTCAGTGTAGGTAACAGCGTCACG -GATGACACCCTCGAGGAAGGTCTTGAGGACACCACGGGTCTCCTCGTAGATCATAGCGGA -GATACGCTTGACACCACCACGGCGTGCGAGACGACGGATAGCGGGCTTGGTAATTCCCTG -AATGTTGTCACGCAAAATCTTGCGGTGACGCTTGGCACCACCTTTGCCGAGACCCTTTCC -ACCCTTGCCGCCTAAATTGAGACGCGTTAGAACGCGTTTCTGAAGAAACAGAAGACATGT -GGGGTAAACTTACGGCCAGACATATTGAATATGGATTAGAAGAAAAAAGGGGAGGATGTA -GATGTGGGATGAAGAGTTGGTGGTTGAGAAGTAGTCGATGGAATTGATGGAAAAAGAGGG -AATGGAAACAGAAGTGGACAGAGGATATTTGTATGATACGGGAATCAGCGAGGATTGGGA -AATCACGCGCTAGGGATAGTGTCCACCAATGAGAGGACAGGATCACAGACCAACCTTGAT -CTACCGGGAGACCCCAGAGTCGGAACGCCAATTTTGGCGTGGTCTATCTTTGATCCTCCT -ATAATCCACCACCAACAATGTAAATCTATAATAAAAATGGTAGAATTATACATTCCAATG -TGTGGTGAATAGATGAGAATGCATCAATTGTCCATTCAAGGGAGGTTGGTTGCAAGATCA -AGAATTCAAGGGTTTGACATTCTATACTCCCTGATCCTGGGCTCTCATTGGTCCACAGCT -CGCTTAGTTGGCAGTACCAGCACTTTGATCTGACTCAACCAATCAGAGCTCACCGCAAGC -CAGTGGGCTACACGCTACGCAGAGTCCCCAGTCTTACGCACTGGATTTCCTGACTGATCC -TGATCGGCTCACTCCCCCCCTGATTCGTGGCTCCCCTTCCAGATCAATTCCCCCCTTCCC -GTCTGCTACAAATACTCTTCCCCCACCCGCCTCTCTCTTCCCTTTTACATTCATCCTCCG -CCAACCAACTCTTTCATCAACAAAACCACTTTTTACATCAAGTCTATCATCAAAGATGGC -TCGTACCAAGCAGACTGCCCGTAAGTATTTCTCATATTATATTTAATTCACCTTTTCAAC -CTTGTTGTCGATGCCTGATTAGTCAGACATCGACATCCAAGAGAGAATATGGTGACGCGT -TCAAACGCGTTTGAGAAACACCTAACTAACTGTCTCAATAGGTAAGTCCACTGGTGGCAA -GGCTCCCCGTAAGCAGCTCGCTTCCAAGGCCGCTCGCAAGGCCGCTCCCTCTACCGGAGG -TGTCAAGAAGCCCCACCGCTACAAGCCCGGTAAGTTCCTCATGTCGGACAAGACATCCAT -CACACATTGCAACCCATTCTAACATTCTATCTCCAGGTACCGTCGCTCTCCGTGAGATCC -GTCGCTACCAGAAGTCCACTGAGCTTCTGATCCGCAAGCTCCCCTTCCAGCGTCTGGTCC -GTGAGATCGCCCAGGACTTCAAGTCCGATCTCCGCTTCCAGTCCTCTGCCATCGGTGCTC -TCCAGGAGTCCGTTGAGGCCTACCTCGTCTCTCTCTTCGAGGACACCAACCTGTGCGCCA -TCCACGCCAAGCGTGTCACCATCCAGTCCAAGGACATCCAGTTGGCCCGCCGTCTCCGTG -GCGAGCGCTCGTAATTTTTCATTTTCAACGAGCTGGGTTTCTGAAAGGCGATCATGGGGT -TTGTTTTTATCTTTTTATCATGACTACTGGCGATACATGATGGGTTTGCTTTCAAATATC -AATGGGTTTCGGCACCGGGTTGCTACGCAATTTTGCTTTTTTATCGATGCTACTGCTCGC -CATCGATTGATGGATCAGTCATGGGCATGTACATTATGGGCTTCCCTCTAACATGATGCC -TTAGCGAGCATCATCTCATGACAGGACGCTTTAATGCTGTCTCTTATGGTTCACCAATTT -TCTTCCTTGTAAACAGTAAAGTAATCCTTTATGGTGCGTATTCGTAGCTACTGGTCCGTT -CGGCGTTAGGTGGGGATTCATTCTTCTGTCTTTGTTTCCTCTCTTTGAGTATATCGTCTC -GCGTCAGACTAAACTCGGTCGCAAGGTGCCCTGTGACCTCATACAAATTGAAGACACCCC -AGACCTGATACCAATGCCAAACGACACCATTTGAAATGAGACAAAAGACTATTTGACTAG -CAATGACGAATCGCTTCATCTCTAAATAATATTTTTTTTCCAAAGTAACCAAAACTAAAT -GAAGCTATCCTATCCGAATCTTGAAAATTTCCTCTCATTGCGCAACAAGCCATCGTAAGT -GCGAGACTTCCTATAATTGACGTTGGACTTTTGAGATTGGAAAAGAATTTGACCACTTGA -AGAGACAAAACAAAACAAACCTAACTAGAGAGCTACCTTTCGGCACCGAAACTTCAGGAC -TTGACTGTTTTCCAAAGCACCCAGAGTAGTGACTTGGCGGGGACTCGATTATGACTTCAC -ATTGACTCACTGGTGATCCACCAATCAGTAGGACTTGCAAGTGTCCAGTTAACTCCTGGA -GTCTAAAGACACAATGAATATGCTCAAAGGATCACGGGATCAAACAGCACGCAGCCTGAA -TTCAAGTTTCCGACTAGTTCATACTGATCAGGCTACATCATACAACTTAGGTACCCCGAT -CAGACCCTCAATCTAATCGCCACACGAGCACCTCATTCCAAATCAAACCCCTGTATCAAA -ATTAGGATTGAGTCACAATGAGCTAAACACATCACAAAGGCTGCAGCTGCAACTGGGCAG -TACGCATGTACCTTTACAAGGTAGTTCTGTACTCTAAAATATCAGAATGACTCATTTTTC -TGATCACGAGGTATAATACCGAATAAATCAAGATTATCCGTGTAGGTTAATACCAGACGA -CCATGCCTCCAACCCAGACCAGGAAGTAGACTAGCTGACTACATTGTCTCTCCTATCAAG -CCCAGTCAAATGAGAGACAAGTGGAGCTTGCTTGCGACCATGCAGATTTATGGTCTGATA -CCACCAGCGGCCAGCTATCCACAGATACCTGGATGCCTGAATAGGTGATATATTTGGTGC -TATGTACTTGCAGGGCTGAACCAGATGTTGGCGGTTAGGCGTCTATTCTAGCTGTCATAC -TGATTTTAAGTGGGGACAGAGTGACTTGGACGCGTGTTTTGTTTCTTTGATCTTACTGTT -GAAAATGGTAGAAAGGCTTTGAGATCAGAGAACTAGGCAGCTCAAGCTGAGTGGCATCTA -CAGTCCCCAGACGACACGTATAACTATCTTGGGTACTAAGATCAAGGCAGAATAATGTTG -TGGCAAATGTATACTTTCTCAGTGGATCCATGTGCAGTCTCTGGCCTGAGGCAAAAGAAA -ATGCCAAGGCCGTCGGAAAATTCTAGCTTGCGCCCGTAGAATTTAGTTCGATTTCCTTGG -GTTCTCTTTTTTTTCTCCTGCAAGCCACCCGTGGTTTTACCAAGCAAATGGCGGACAAAT -CTCGGAGATCTCACCTTTTATACATCTACTTCGTCTAGATTTTTTTTTTTTCAGGATACA -TCCATGGATAACTTGTGAACTACAGAAGCTAGAACCTCCCATCCAGGCTCGAGGTAGCTT -TGTTCATATTATTCGAGTCCGTCCGTGCCCAATGTGATAGCCCTGGTTATACCAGTCCGT -GGTCTTACATGGCCCCATGTGGCCTGAGCCATACAACATGCATCTTTCCTCAAAGATATG -GATCTAGATATGGCTAATATAGAGCCACCTTCTCGGCGGGTACTCCCTGGTGGCCTGACT -AGCATTTTGATGACATATCTGCAAAACACGACGCTGGTGCCTAGCTCGGGGATAATTTCA -AAGGTATTCCCGAGACAAGTACGAAGTCAAATTATGCGGCAGTTGCGTCCTGATCTTATC -CTGGGCATCTAACGGGGGCGCAATCAGAAAAGGTTGCAAAGTTTGCATTGCATATAGTAG -CTCCACTATGCATATGTGCACAAGCCCCTGATGAGGATTTGTAATAATAATGCTTTCCAT -ATAGGAATGGAGTATGTACGAAGTATGTGCACAACATATGGAGTTTAGGCGGCCATAAGA -GTCATAAACCCCATTTATGAAGGTGCACCGGAGATTATCTCAGATATGCCGATCCATAGA -ACTTTAGATCTGTTGGGAAGCTTAACGTACCCAAGGCGGAATGTCCGAATGTGGGCAGGT -CTGAGGGTTGTGCATGGCCCGATACCCATGGAGATTTATTTGGACGTGTACATTATACAA -GAGTAGACGGCCGTTTGTGCGGCCTCATGATGTCTTTCTTGTAGGGCTCTCAAGTGGGCT -TTTTGGCGGCCTTAATGGGCTCAGTCATTTTTCTTAGGCATCCTATAGATAGCTTTTCCC -CTTTCCAATCTGTATATTCCGGCAGAAAAAGGCCGGAAAAGCATACTTTGTGCAAGCGAC -TTTTCCATGTATAACTTCTGAAACTCCTGAAACTTCTGATTAGAAAATGACTAGAATAGG -AAGTGGTACATACCCGTGGACATTTGAAGGGTTCTGAACCTTGGTCAGATGAACCAATCA -GATCGATGCCGCATACCCGAGTGGCTGGACATAAATCCTTCATAAATCACGTGGCGTAAA -ACGTGGCTGCAAACAGATTTCGGTTGCTTCCAGAGCATCAGCCGTGCATCTCTAGGGCTC -AATCGGTTCCATTTGTACCTATAGCTGTTACTTTACTTTGGGAAATATACTAATAATGAA -TAAGTAATGTAGATATGAAGTCACTATGAATTCAATGGGAGAAGATGTGCACAAAACTAG -ACTTAAGGTCTAGCCAACCTTGGAACTTTTCGTCGGGTCAGATACCTTGTCCGGGTCCGG -GGCCCACCTATCCTACCAGAGTCCACTTTGCACTTTATCTGCATATAGCGGGCACATTAT -TGACCTTATATATGGCTGAGAGGCTCCCGAGGTGCCTAGTGCCCATTTTCCCTCGGCATT -CCTTGGACCACCCACAAAAGCTTCAACTTCACCTCTTTTTTATATACCTCGGAGGATACT -ATCAACTACATATTGCAAATCCTAATCTAATACCTGCCCCACCATGGCAGACTCGGAGCC -CCATGCCTCAGACAAGATCGAGCCCTCTGCGTCAACGAAAATCCCATTCTGGCGCTCGAT -CTATGACCAAGGTGTCGTCACGCAAGAGATTATCGATTTCCCATACCCCGGTTCGGGAAC -AGAAGACGACCCTTATGCAGTATCATGGATTCCCAACGACCCCCGCAATCCTATGAATTA -CAGTGAGTTGAAGAAATGGACTCTCACCATGTTGGTGGCCGTTGCAACATTGGCGGTTTC -ACTGGTTTCGTCGGCCTACACCGGTGGTCTTCGAGAGATCATAATGCAATTCAATATCGG -CCAAGAGGTTGCCACTCTTGGCGTTTCGCTTTTCGTTCTCGGTTTCGCGGTAAGACAAAC -TATCTAGATGCTGGCTCTTAAGCTTGCAAACCCGCTACGAATGCATACTAACGTTGTTTG -CTTTGATATAGATCGGACCATTGCTCTGGGCCCCACTGAGTGAGCTGTTCGGCCGTCAAG -TCCTGTTCATCGGCACCTACGCCGCCCTTACTGCTTTCAATGCTGGTGTTGCAGGTGCTC -AAAATTCCTGGACCATTATCATTCTTCGATTCTTCGCTGGTTCCTTTGGATCATCGCCTT -TGACCAATGCCGGTGGTGTCATTGCGGACATGTTCCCCGCGCAGCAGCGCGGTACCGCCA -TGAGTATGTTCGCAGCTGCACCTTTCCTGGGCCCTATCATTGGCCCCATCGCGGGTGGAT -TCATTGGTATGAGTGACGGTGGTTGGCGATGGGTCATGGGTTTCCTTGCTGCCTTTTCTG -GTACTGTCTGGATCCTTGGTGGTCTGTTGATCCCCGAGACCTATGCACCCGTGCTTCTCC -GCCGCCGTGCGGCGAGGCTCTCGAAGTTGTCGGGCCAGGTCTACCAGAGTAAGCTTGATA -TCGACCAAGGCAGACCCTCTCTCAAGGAGTCCTTCAAGGTCGCACTTTCACGTCCGTGGA -TCCTTTTGTTCCGGGAGCCTATCGTGTTCCTGCTATCTCTCTACATGGCTATCATTTACG -GCACCTTGTATATGATGTTCGCCGCCTTCCCTATTGTCTACCAGCAGTCCCGTGGCTGGA -ACCAGGGTGTTGGTGGTCTCGCATTCCTCGGTATCATGATTGGCATGTTGTTTGCCGTCG -CCTACTCTGTCTGGGATAACAAGCGCTATATCGATGTATCCGTCAAACACCACGGGTTCG -CACCCCCAGAGGCACGTCTCCCGCCTTGTCTCTTCGCCTCCGTCGCCATTCCTATCGGCT -TGTTTTGGTTCGCATGGACCAACTACCCCTCCATCCACTTCATGGCCAGTATTGCAGCTG -GCGTTCCCTTCGGCTTCGGAATGGTCCTAGTCTTCCTCAGCATTATGAACTACCTTATTG -ACTCCTACACAATCTTCGCCGCCTCCGTGCTCGCCGCCAACTCCGTCATCCGTTCCCTCT -TTGGTGCCGCGTTCCCTCTCTTCACCACCTACATGTACCAGGATCTGGGTATCCACTGGG -CATCATCGATTCCAGCTTTCCTGGCTCTCGCATGCGTGCCCTTCCCATTCTTGTTCTACA -AGTACGGACCCGCGATCCGCACTCGTTGCAAGTTCGCCGCCCAATCCGATGCTTTCATGC -GCAAGATTCAGGAGCAGGTTACAGCGCCGCCTGCAGAGGATAAAGTGGAATACGACCGTA -CCGAGGCCCCCGAGCCCGAAGACTCACTCTCCAGCGAGTCCCAAGACGGTGTTGAGGAAC -TTCCCGATACGCAACGCACTCGCAGCCGTGCGCAATCTGTCGCTTCCAACCGCACTACTG -CCTCGTTGGCTCGCTCGGTTACCTACGATGGAAACCCTTACGATATTGATCGTGTCCACA -CTCGTGACTCATTCAAATAAGCGGCTGCTGAGCAGCTTGTTTATATCCTCTCATCTTACC -ATCTCTTGTCCGGGCTTGTCATACCTATACTCGCCTGTGTCACTATAATCATAGTTTCTA -TTGACCCCCTTTCTTCCCTGTTACAAAAAGTACCGTTGCACTTTATGCGTTTGTTTTTAC -TTTGATACCCGTTCATATCATACTGAGGGCAACCTCATCCGACTAATGTTCTTATGGGAT -GCATTATATAGAGCTGGAGTTTTATTTTACAGAAATATATGTTTGTCAGTTCGCAAATAA -CTTCAATTTTCTCGACCATAAGTCTTTTCAATAAAAGAGTAAATCCAGTCGGAACCGGTC -TCCGGCACTGATTTCACCCGCTTTTCGCCATTCGACCCTTATCGTAACCCCGAGGTCGGA -TCTCAAAGCGCATCGCTTTCTGTGTTGTACATAATTTCTAACGATCTTTTTTTTTCTCCC -TTCACTTTCAACCATTCACAGTTTGTAAATAACCAGTCGCGGTCTTGGACATGAGCCTCC -TCAGAAATGGCCTATCCAGGGCCTCGACCTCACCTTCAACCTCCACCCTCCGCCGACCGA -TAACCCCCAGTTCCCAACCCCAGCTATTGCGCCAACCACGCCCACAACGCCGACACAACT -CCGACTCCTCTGCAGGAAAGACCCCGGAGAGAAATGAATTACACGCACACTCACCCAAAC -TTCCAGACAATATCCCCGCCTCTGCTGCTGCAACTGGGGCTATAGTCACACCTGCACGTC -GTGGATTCCGTGAGGTGATAAAGGCTGGTCCAGTGGGCAAGTTTGGACGATGGTATGCGC -GCGCGCAGGAGCGCAAGCCGTACACAACGCAGTTTTGGAGCTCGATCGTGATCTACCTAT -GTGGTGATCTGAGTGCGCAGATGTTGTTCCCTACGGAAGTTCCTGCACCTGCGAGATTGG -ACTCTGAAGATGGGAATGTGGTTGCGCGGGGGGATGGGGAAACTGTCAGTGCTGGGTACG -ATCCGCTAAGGACGTTGCGGCACTTGTGTGTTGGGGGTTTGTCGAGCATTCCGTCTTACA -AGTGGTATGTTTTGTCAGGACTCTGTTATGAATGGTCTTCTTATCGGATTGTTACCACTG -CCAAATGTTGGAATTACAGATATGTGCATGCACTGGTTAACTAACACAACGGCGCTAGGT -TCTTGTTCCTCGGAAACCATTTCAACTACCCCAGCAAGATCCTCTCGATCCTGACGAAGG -TCGTCGTACAGCAGACGTGCTTCACGCCCGTCTTCAATACATACTTCTTTAGCATGCACT -CCCTGCTTGCTGGTGCGACGCTCGAGGAGACATACGAGCGGGTGAAGAAGGCTCTCCCCG -TTAGTATCCAGAACAGCGTTAAGCTCTGGCCGGCTGTCACTGCATTCATGTTCTTGTATG -TGCCGCCGCAGTTCCGGAACATATTCTCTGGGGGTATTGCGGTTGGTTGGCAGACTTATC -TGAGCTGGTTGAATCAGAAAGCTGCTAAGGAGGTTGCGGCTGCTGAGGCGCTTGCTGCGG -CGGAGGGCATGAGTGGTGCGTTGCATGCTGTTTCTGGGGGGAACATGATCTCTGCTGGGA -CTGCATGAATGCGTGGAAAGCAGATTCAAAAAGTACACTTGAGGTTGAGTGGATGTATAG -ATTTATAGATCCCTGCCGAAGATCGGCGTTGGTGATGTTGTCTTTTTTTTTGTTTTGAAG -CGATGCATGGGATACCAAGAGGAATTACTTAGATCAATAGACTTCAGCCTCTTTTGCTAA -TTTGCATTCTATGTGATCTCTATTATTTTCTACGATAATCGAGCATGTAAAGATCGGCCA -GTAAGAACACCGGGATTCTTAAGGCTCCCTCACGCAATGACGCACCAGAACATCAACTCC -ATCTACAGAATATGATCCCCTCCAGTCGCAGGCATGTCCTGCCAGGAGAGAATGGATGAT -GACTCTGATTATGGATCTGACTTCACTCCAGATGAAGAAGTTCTGCTAGACAATTTGCTG -AACAAAGCCGTCGCGGAACATGCGACCGCCGCCGACGCCAACGCCACCTCGATCTCGACA -ACCTGGATCTCGACTCCAACGCCCATCGAACAGCTCACCACGCCAAAATCCCCTGAACTC -GCCGACCTCGAGTCATTGCAACCGACAGCTCTAGAGGCACTCGTTGCAGATATCGAGGAT -GGCATTGAGCCTAGCGTCCGGCTGCCTAAGGTGCTGGGCCGGGAAACGCCTCGCTCGACA -TGGCAGTCGCAGCCTCGACCAGGTCAGGCGGTTAGATGGGGTGCGAATGCGAGTGCAATG -GGCAAGTCGAATCCGCGCGCTCACAACAGCAACAGGACCAGCCCGTTTGGTATGCTTTTC -ATTAGCTTTCTCCATCCCCTTGCCCGTTGATCGTCCACACTTTATTTCTTTTTTTATGGG -TCGGCCCCATTACAACCAAATTGAATTGAGCTGACAACATCTTCTCCGAAGCGTTTAGTC -GAGAACCCTGGTTCAACCGAGGGCCGAGAAAGAGAACGCGAGCGCAACGCAGTGCGTGAG -CAGGAATGGATACAACAGGATCCCACCGTCGCAGCGCTTGACACTCGAACACCCGTCGAA -CGGTATCGACAAGCACCCAACAAAGCGTTCTCGGTAACTGATCTCGTTTCACCAGCTTGG -TGCGAACTGCAGTACTGGCTCACGTTGACCAAACATGGGCGGAAGAAGCCGACTGCTGCG -ATGAAGAAGGGCAGCTCCATGCACAAGACGCTCGAAGATGAGATATACACTACCGTCCCT -GTGGAAATCACGACCAAAGAGGACGCGTGGGGTCTCAGGATATGGAATGTCATTCAGGGA -CTGCGCATGCTGCGCGAGTATGGCATCACGCGTGAGCTGGAGGTTTGGGGCGTTGTCGAT -GGGGAATTTGTCAATGGCGTTATTGATGAGTTGTCTTACGAGTGTCCCAACTCGGAGCTT -GAGGCTACTGCTGCTGGGTACTATGCGGATGTTGTAGCGTCGCGCGCTGCGCTACCAGAG -TATCAGATGTCTTTGTCTGACTATTTGCTTTCTTCGTCGCAAGGCGGAATGAAGCTTTCA -GACCTAGGGCAGAATGAAGTCGAGGAGACGGGGCATTCTGAGCCAGAACTACCTGCAGAG -GTGTATAATCTGCGCCGGATTTATTTGACAGACGTGAAGACGAAGGGTAACCGCTCGCTA -CCGACTGTCAAGAGCACGGGCTTCCGGCCTACTCTTCTCCAGTTGCAACTTTACTATCAT -ATGCTTAACCGGATGATCACGAGCGACGATGTCACTATTGACCTCCTCGCTACTCGTTAT -GATTTTGACGCTCAGAAGCCGTTCACGAACGCCTTTGTCTCGGAGGTTGGGGGTCTGAAC -GACCAATACTTCGACGCTCTCTCCTCCCAAGAATCAGAACAGAACGAGGGCCCTTCTAAT -GCTAGCAAGGATTCAACCAGCCTCCTCTTATCACACAACAATCTCTCCCGTCTTTGGAGT -CTCATGATACAACAACTCCGCCTCACATTCCTACCCGAAAATTCCCCCGACACACAATCA -ATCGCACCATCCATTCCATCACTTTCCCAGCCAGAGCTCTTGGAACCATACCCAACCCTC -CTTTCCCCGGTGCTCACAGCTAGATATCTCTCATCTGTCGCCAACGAAGACCGGGAGAGA -CAGCTTATCGGAAGTCGTTCTTTCCTCTTCGATCCGACATCCCTCACCTCCTACCTCACC -GACCAGATGACCTGGTGGCGCGGTGAGCGAGATCCTCGCGGCGTGGAGATCATGGATGCG -TGGAAATGTCGGATCTGTGACTTCCGCGATGAGTGTTCTTGGCGTGAGGAGCGTGAGATG -GCTTATGCGAGACGGGGTGGTGGTCGGAGAGGGTCTGTTGCGGATATATAATTTGTCTTG -AGTCTGAGTTGCATTAAACGCGTTCAGCGATTGTAAGAATATGAAGTTGGTTACACCTGA -GCATGCTCCTTTACTACCCCGTTGTTAGAAATACCATTGAACAAACAAAATACATCAAAG -ATTATGAATGGAGAAGTCTACTTCACGGAGTATGATTGTTGCCTTTTTCTGACGTTTACC -AATAAACTGGCAAGGGGAAGAATATCTGAGAAGACCGCATTACTTAAGAGTCCTAGAAAC -AGACATCACAAGGCCGCATAAGCGGCCATCTACTCTTGTCGTTTATCGCAATTTCAAAGA -GGATAGCTCCTAACCTCATCCTCTCTCTCAACCTCCAACCCTCCCTCAACCTGCACTTTA -TGCTTCCCATCCAAAAACTCCAACAAAACATCCTGATACTTCTCCTTCCCCTTATACCGG -CCAACATCATCCCCCCAACTCCTAATCGCATCAGCCAAGCGACCAGGCACATAAGGCTCC -TCGACAATCGCAGTAATAGGCTCAAAATAGGTCCGAATCGTAAAAACCACCGCCCCAGAT -CTGGGCAGTCTCCTCAGCGACTGCCTCTCTGAGCGAAAGAAATGATTATCAATAGCACGA -TTCTTCTCCGCCGTATTCCACGATACTGTCTCCGCATCCTCAGATCCAATGCTGTCGGAC -CACGCCAGGTTATCATCCACCTGGATGAAATAGTTGTTCCGTAGAACGGGCTCTTCAGGC -CGTAGTCGTCGGAAGAAATTCATCATTCCTTTTTCGAGCTTGTTCTTGAACTGTGGTACG -TCGCCGGATGTGTGGATTTCTGAAAGTCGCATCCCGAATTTATCGCTCAGTCGCCAGAAC -CCGGCGAGCAGGATTGCGCCTGCGAGTAGGTAATATTCGCCGTCTGCTCGCTCGATCATG -AGGGCTAGGTCGTCTTGGACTAGTCGTGCTGCTGTTGCCATTGGGTCTTCTGGGAGTGGG -CGTTGGGTTATGTTGAAGGTCTCGTTGGTTACTTTATTGGTAATTCCTGTTGTGGTCTTT -GTGAACATGCCTGGGTATCTCTCGGGGAGGTAGGCGCATCTAGATAGTGGGAGGAGTGTT -AAGGTGGGGTTCTTGGATATGTGGTTGTATGTCCGTGGTTTGCCCTTACAGTTCTTCGAG -TAATTCGATTGCTGCATCCATTGCCTCTGGTGCTGTATGGCAGCATTTGCTTCCACGCTC -TTCGATACGGCGTGCCTTGTCGGCATGGTATCTTAGATAGTGATTGTCGAGCTCTGTATT -CGGGATATATATGGTCAATGGGCTGGTGGAAGTTGGCGATCAATGAACGACTCACCGATC -CATTCGTCCCATTTCATAGTTCTTAGACCCAGCGTGATGTAGTACTTTCTGCCAGAAACA -ATCAAGTCAACAAATAGTCATATAAGTGAAGTTATTCAACATACGGGCCATATCTAAAGG -GTCGGTAGGGAATTGGCTTTGTTAAGTGGACATCCCATCCTGAGTATGGAGCTGCTGTTG -GCCGTTTGAAGTCCGATGGAGTCCATTCTATGAAACAGGAATTAGTGGCTCTTTCTCATT -ACGGACTAAACTCCACATTCACTTACCACCCGGTGTCCTGGTGGTCTTTCCTCTCACACC -CGAGGGTTTCTCTATATCACTTCTCGAAGCTCTACCCGTGCTTGAAACCCATTTGCGTGA -GACCAAGAGAACAAGCCATACGCAGGCATTCAGAATGAGCAAGACATTCACAACCTTGTT -TTTTAGGATTGATAAGATAGTTTCCCAGAGGGAGTCAAGTACCGGCATGGTGAGACACTT -CTAGAAAACAACTTGTCCGAGGACACATTGAGAATAGAAGGCGAGGAACCCCAAGTAGTT -TTTAGTATATAGTGAACGCAAAGCGGATCGCCCTTATCAAGTTCGGAGTGCGGGGAAAGA -TATCACTGCCTAAACGTCTCTTTCTTGCGAGAGGCCGTGTCAATCCGATACGAGATAGGC -CATCGTGTGTCAATATGTCAATCATGCTGGAAATATAGACTATCTTGTCTGCGGAAGAGG -GAACATCGCCTTTTTCGAGCTGTTATATCCGGGACTGCATGCCTTTGTCTGTTTTTTGTC -TGTTGTTCGCCTCTAATGACGCGGAGTGAGGGTTATCTGCCGGGAAAGGCTAAAGGGTAG -GGATATTTTGTCCGAATTGGGAAAGGTGACGCTACACTTGTACTTGTCAACCTCAGGTGT -CACATTTCCAAGGTTACTTGCATTTGACTGTATGTTCTGTGATGCTTTTACATAATTATT -CTACCGGTTTTATTCAGATGGGTTTCCAGGGGTATCCTGGATCTATAGATGACCTGCTGC -TGTTCTTTGGAGAAGACAGTATTGGCCCCATTTGCGACGGAAGCGAGCTGGAATGACTGG -AAGATCAGGGATCCAGATCAGTCTACCAAGCAGACCTGTTTGGGATGCATCAAGGAAGCT -TGCGCAGGAAAGCGTCTCCCACTGCGCCTTGTCCAGGAAGTAATGGGGCTCTAGCTCAGT -GGCGTCATGACCGGGGAAGTAAGAGTCAACCAGGAAAGAACATTGAGAGATATCAACCTA -TTGGAAAGTCAGCATAGGGATAGGAACAATCGTTCCAGCGCCACGTTGAGACACTTACGT -ATTTACTAGGGTCCTCAATGTTGAGATCATTCATGCCAGATGGAATCTGTGAAGCACCGT -CTAGACGCCCAAGATAACCCGGGGAGTCTGGAAACTCACCCGGAAGTAGTCCTCGGAATT -CACTTCGGATGAACTTGGCGCGCATATCGTTGGGAAGGAAATATGATGACGGGAAGCGAT -ACCACTCCTTGCCAAAACACACCGAGTCTCCCGCATGCGCGCTATTCACACGCTGAAGGG -GCTCCAAGACCTTCAATGGAGCATTATAAGCCGTAACCATCCCTAGAGTGCGCAGCATGC -CGGCGTTCAGAGCAACTAGAACAACCGACAAGGCTGCAGCAACCTTCAGTTTAGTTGGTA -CGCGGCCGATCAGTTCCTTCCGGTCAGTAGAGCCAAGGTAGGTCAGAATAATATGGAAGC -TCAAAGCCGCGCTGAGTGCTAAGAACGGGTATGCTGGGTACATAAACCGCTCCTCCTTGT -GCGGCTGGACCGAGAAGATCGCGAACCACATATAGAAGGGAGCGACCAAGGTCATGGACC -GCAATGAGGTTTGCAAGGAGGTGGTGTGCGATCGAAAGACGATCTGGAAAAGTAGGAGGG -GTGCGGATAGCATGGCGAGGACAAACCAGACATTGAAGTTGAGCAAGAGGTTTCGAATAT -AGAAAGTCCACGGCTCGGTTCCAAAAATATCCGGCCCTTTACCTTCTCCCCCCAGGATGT -TGTATGCAACAATGTTCCAAGGCACGAGAACGAGTCTTCGGAAGAAAGCATAGTCAACTG -CAACCTCAAGAGCCTGTGATACATATTAGTAAAGATATTGAAATTCAGCATGATGTCACT -TACCAGAATCGACAAACAACGAATGATGCCGTCGAATACATTGTACAAGAGTGAGCCAAG -AGACCCAGACATTATACTAAAGATAGCTTCCTCGGCCAAAAAAGGGACAATGAGAGCACC -GGCAAATGGCCAGCCAACGATTGTCCCCAGCCCAAACCACATGATGCCTTCTGCTGTTTT -CCGGCCATTTTTCCAATCCAGGAATGAAGCCAGACCAAGCATAGACATATACATGGTGAA -AGTAGAAGGCAAAAAGGCAGTCGAAGCATGAAACATGCCAGGGCTAAGTGTGACGATCAT -CACAAACAAAAGACCGATGCGGGGGCTGAGTGTACGGCATATCGCAGAGTATAGTCGAGT -CTGACAGGCAGTGCAGAAAAATGCAAGAGATAGGCGAATGGTGTAGAATTCAGCTGCCTT -GCTGTGGACTACCAAAGACCCGATTTTGCCTACAACAGCATGGAGCGACACGTAGAGCCA -GCTTCGGATAGAATAGACCGGTGAATATTCCCAGGTTTGAAGACCATATCCATGTTGAAG -ATAATGCGCAGGTTCCCAGAAATTGAACACCTCATCGCAATCTTGAATTGGAGAGAATAT -TGCGGCGAGAGTGTTGGCAGCTAGGAATAGGTAGAGAGTGATGTTGAGGGGAATGTAGAA -TGGAGGTGGTGGGGGCCTATACATAGTTTAGCGACATGTTTAATCTTGCATAGATTCAGA -TACATACTGTTTCTTTGATCGAGGCTTATGGGCATCCCCTGAGGCAATTTCGTTCCCTGC -CATCTTGCAAGTTGGACTAAGTTGTAAGGCATGGATCAATTGGATAGAGCTACGATAACC -GATAAGACTCTGTTGACCGCCCTGCAGGGTCTGGCAAAATAAAAAAAGTTTCCCGTGCGA -CTTTTGATCTTTCAAGTTAAACTGATTTTTGTTTAATAAATATTTAAGTTGAAGATTCAA -TATGGAGGGACACAATAACCCTTCCAATGACATGACTGGTGCTGGTGAAGACTCTGCCTC -GAAAAAGCCCAGTCTTATCGAGACGCCTATCCCAGAGACCCCAGACAACGGCAGCGATTT -CTATAACACTCCCTTGGCTACAGGCACACCGGACATGGGGATAACCAGCCTAAATGCTGG -AAACAGCGCATTCGAGCCCTCTACTTCCTTGAAGCCAAGCAATATCATCCCCGGATTGAA -CCTTATTCACCACCCATCCGGACAAGAAACTGGCTCGGCCTTGACCGAACACACGATCAA -GCAAGAGCAAGATCAACCAGTGGATCACCAAGAAGCGAAGACAGAAATCAAGCAAGAGGA -AACACAGCACATCTCCCAGCCACAGTCCACTGAGGTAGTGATGAAGGATGAGGCGGAGGC -GCAAACTAAAAGCGAAATTACTGCTCCCCTCGACGAAGCCATGGATGTCGAGCAAGACAC -CCGCCAAGGCAATATGATCGAAGCCACTGGTCAGGCTGGCGACGCTACAGAAGGGGACCA -CCCCGAGTGGGAGGAAGACTCTTCCCCGTACGAGTCATCTTCAGACAGTTCTGATTCATC -GGACTCCGATGATAGTGACGACGAAGACTATCCCATTCTCAGCGCCGAGGAGCAAGCACA -GATTCTCATGCGAGCGGAAGGTGGTTCCGACGACGAGGGAGATGGCAAGGGGAAGCAAGG -TGGACAATTGCGGACCACCAACGAGATCGAAGACCAGGTCTTGCCTATCCCTGACGTTAA -GATCACCCCCGAGATGAAGATTGTGTTCCTTGGCAAAGTTCATGCCGCTATCGATAATAA -TGTTCTTATCGAGGCTAATACCTCTGGAGAATACCAGGTACTTGAGTCGGGCTCTTTGCT -CTGCTCTGATGACCGCCAGACTATTGGTGTAGTAGCAGAGACCCTGGGCAGAGTGGAGAA -TCCTTTGTATACTGTCACATATGCCACAGCTGCCGAAGTTCAGGACAAGGGCTTAGTCAA -GGGCAAGGATATCTTTTATGTTGAGGAGCACTCCACCTTTGTGTTCACTCAGCCGCTCAA -GGGTATGAAGGGAAGTGATGCTTCCAACTTCCATGACGAGGAAGTTGCCGCTGAAGAGAT -GGAATTCTCCGATGAtgaagcggaagctgaatacaagcggaaactgaagcagaagcGACA -AGAGCGAAAGGAAGCCAGAGAAGGCCCCAGAGGCAAGAAGCCAGCCCCAGGACCTTCGAA -ATTGAACCAGAGTGAGCTGAACTACGACGATGCCGGAGGTGAGGAGGGCTACACCCCTCT -TGCGCGCCCTACCAATCTCCACGAGATGATGACCCGCGAGCCACCAGTTGAAGGGAATGA -GCGTGGCGGGTTCCGTGgtggcagaggccgtggccggggtggtgaccggggtgaccgtgg -acgcggAGGACGAGGCAGAGGTGGACGCGGAGGAAGGGAGTGGGATCAGGACCGTCGTCC -CCAGCAGGCCAGCGGctcagcccacgagcctcagcctcagcctcaacctcaagctcaacc -CGCTGCTCCCTACGGCCAGCCGATGTACCAGCAGCCCCAGCAGCCATATGGGATGCCCCA -GTCCTTCGCCCCATATGCGCAATACACACAGCAGCAAGCCCAGAACCCGCAGCAGCCGCA -ATTCGGCCAAGGTGGAGCGCAACCACAAATGCCCTTCCAATTCTCTCCATATGCGGCCTT -CCAACAGCAACCCAATAACTTCCAGAACATGCAACAGGGTGCTCCTCAATACAATCCTCA -ATCCATGGCTATCTTAcaacaacagcaacaattcctgcagcttctgcaacaacagcagca -gtaccagcaatctcagcagcagtaccaacagccccagccccagcCCCAGCCTTCCCAGGT -CCAAAGCCCGATTGTAAACTTTGATCAAGTCAAAGCTCAGCTGGATTTGCTGCGAAATCT -GCACGGCGGGAACCAGGGTCCTCCTTCCTGATAACTTTGTTTTGCATACATGTTCTTCTG -TGATCTGTGATATTCTAGCGGAATATTTTGCTTTGCATATGGAGCGGGGTTTATGTGGGT -AGGAAATTTTGTTTACTCTAAAAGTTTTTCATAGCTCTCATTGAGTGTTGTTCTCATACG -ATTTACGGTGTCTATGGGATATATGGGATGTAAACAGTAATCATCTGAAGCATCATGTGT -ATGAATGGTATGGCTATGAAGCAAGTTCGCTATCTGTAAAAGTACAATAGACCTAGACCT -TGATGATCAGTCGTAGGGATCTTTCTCTCTCTTAATACTCCGCTGGTTTGGCACTCTTCG -CCAGTGTCCAGAAGTTCTCCATCTTGGCTCGGGACAGGGTTTCTTTGGCAGCAGAATCTG -TTTCACAGATACGAGCACCAAGCCATCTCTTGAAGGCCCGGCCTTGAGTTTGACCTTCCC -AGAGGAGATTACAAGTATTCGCGCTAAAGTCCCGGGGCTCTCCCTTTTCATCTTCAGTGG -ATAACCAGGAAACCTTAGCTTCAGGGCTGTCTGCAGGCACTCGGTCGGACCCAGGGTTTT -CAGTCCAATCAATGCGGTTCAACATCAATTTCCGGTAATTGTTGATGGAGTGGTGGCCCC -CCTCGACAATCACCAGATTGAATCGAGGATGCATGACACATACGCCCGTCAGATTATTTT -GCTGTGCGTTTTTGCCGATCTTGAAGCGGTTCCGGCCGCTTGCCAGACTGTCAATTCGAT -ACACAGATTGAAAGATGCCCAAAGAGGCGTCCTTCTCCTGCTGAGCTGCCAATTTCTCAC -GCCGTTGCTCCTTGGTCAATTTGCGCTCTTCATTCGCCATCTCGTGCGCGTCCTTCCGAT -CTGCGATCTCTTTGGTCACACGAGCTTCCACAGCTGTAGGATCCTTCACAGCTTGTTCTC -CCAAAACCCGCATGAGATTGGATTTCTTGACCTTTGGAGGCGGTGCTTCCTCGAGTCCCA -GTCTGATCTTGGCTTGCTGCTCCTTCAGATCTGCCATGCGTCGCTGTCGACGAATCTTAG -CTTGTTCTTTCGGAGTAAGGAACATGGGTTTTTGTGCAGGCATATGCTTTTCTTGCGGAG -GTTGAAGCTCAATAGGATGCTGGACATAGCTATTGATGGTATTAAGTTTGATGTTTTGGG -GGTCATCAATTGCGTCATAGTCGTTGCCATTAATCAATTGTGCATCCCACCATTCTACGT -CAGGTGGAGCAGGAACAAGGAATGCCTTCTGTACATCCATATCCTCGTCAAGTCCAGCCT -GCCGCGAGTGCTGTTCAATTCTTTTCTTCATAGCTTCCATCTTCGCCTGTGTGCGGAGAG -CCGCTGCTTGTTGAATATATTTGCCCTTTTCATTGAAGTGAAGTTCTCGGGCTTGTCGAG -GCTTCGCAATCGTTGCTTTGGGGTTGATAGACGGGTCAAAATAAGGGTTATTCTTGATTT -CTTCAACAGATGGACCAGACAGATCAAGCTGGGGTCCACCAGCACGTTTTGCCTTAGCCT -GTCGTCCATTTGCGCCCCGATATTCCACTGCAGCATCAGAGAGAAGCGCGGGATGAAGGC -CAACATCCAGACCACCGCGAGCCTTAGACATGCCCTCGTCATCACGGACTGGTGGTTGAT -AGGATGGCGCGGGTGCACTCGGTCGCGGCTGAGAGGATGCGCTGGCTCGTTGCGTAGCTG -CAGCAACTCGAGCTTTCATCTGTTCTAGTTTCGACATCGCGGGGGTGGGTGCTGGGGGAG -GAGGGCTTGGAGAAACCGGTGCGGACGGTGCTGGCGGTGTTGGCGTCGCTGTCAGACTAC -CGCGCGCTTGTAGACGAGCGCGCACAGCTTCTGCCTTTGCCTTTGCTTCTGCCATTATTT -TCGCAATATCTGGCTTGCCCGGGGCAGCACCTCCTTGACCTGGTACTGGAGACCCATGGT -TGGAGCGGGTCCTTTTCTGGGAATTGTCGTGTTGATCCTCAGGGTGAGGGCGCTTCAGCG -CGTTACCTGATGCATCGGCCATGGTGTAGACGGCTTATGACCAACTCAATCTTCTCGCGA -CAGGAGAACACGAAAAAGTTGAATAAATGAGGAATAGACAAGCAACAGGAGTGTATTAGA -GTGATCCACGCAGAAACCAAAAAGAGTAAGGCGGTGGAGCTGGGCAGAAAAGAAGTTGGC -TCCTCCGGAAAAAAAAAAGTTCGCTCCAGAACTAAAAGCACTTGGAGATCTGAATCCGAG -CACTTTGATTTCGTGCGCCAGCGTTTTCTGTTGCTCAAGGAGCTGCAGGCTACAATGGCA -ACTTTTGCGCGCCCGGTCGCCTCGACCATTAATGGGGTTGACTTCAATGTCTACTCCGAG -GAAGAGATTAAAGCTCTCTCGGTGAAAAGAATTCACAATACTCCGACTTTGGACTCATTC -AACAATCCTGTCCCTGGTGGTCTTCAAGACCCTGCTATGGGCGCTTGGGGAGATCATGTG -TAAGTTTTGACTTGATGAGTATTCCGTTCAATCTTTAACATGAGAGATTTTCTTATTTTA -CAGATGTACTACTTGTCGCCAGAACTCTTTCACTTGTACCGGACATCCTGGGCACATTGA -GCTGCCAGTACCATTCTATAATGTCACGTTCTTTGATCACATCTTCCGCCTTCTCCGAGC -ACAATGTGTGTACTGTCTTCGCCTCCAGATGGGTCGCAACCAGGTCAACATGTACACCTG -CAAATTGCGCCTTCTGCAATATGGGCTGGTCGACGAAGTCGCTATTGTCGATGCTATGGG -AGCCGTAAAAGGTGCTAAATCAAAGCCCGCAAAGGGCAACGCGgacgattcggacgactc -agacgatgccgatgatgatgactatATGCAAAAGAGGACCGATTATGTCACTCGGCGCAT -TCGCGAAGCCAAGAAGGAGGGCAAGTTGGACGGATTGATGCCTGGTGGACAGAACCCTAT -TGCCGCAGAAGCCAGACGAGAGTTGCTCAAGGCCTTCTTCAAGGATATCAATGCAGTCAA -GAAATGTGTCAGCTGCAGCGGGTACGTTATACACTGTTCCTCGATCTCGACTTTGAATTT -CGGGCTAACATTATAGCAGAGTGTCCCCCGCCTATAAAAAGGACCGCTACAACAAAATCT -TCCGAAAGCAATTACCCGAGAAGCAGAAGCTTGCGATGCTGCAAGGAGGATTCCAGGCAC -CGAACACCATGGTTCTTCTCGAGCAAGAACGGAACTCCAAGTCCAAGGACGGTGTTTCTG -AGATCCACGGAGCCGAGGAGGAAATTCGCCGTGGTGGCGCTGTTGTGCAGGAGGCACAGG -GATCGGCTGGCCAGGAATTTATTTCTTCTTCTGAAGTCTACGCAGCTCTCCATCTTCTTT -TCCAAAAGGAAAGTGAGATCCTAGAGCTTGTCTACAACTCTCGGCCTGTGCCCAAGGGAA -TGAAAGTTGTGTCAGCGGACATGTTTTTCATCAAGAGTCTTATGGTGCCGCCAAACAAGT -ACCGTCCCGCAGTCCAGCAGGGTGCCGGAACAGTCATGGAGGCGCAGCAGAATACTTCGT -TCAATGCGATTTTGAAGGGCTGTGACACTATCAGCCAAATCAGCAAGGAGATCCAAAACG -AGGAAGAGAAGACCATGTCTCGGGCACGGAACTACAGTGATCTTCTTCAAGCTATCGTTC -AGCTCCAAGATGTGGTCAATGGTTTGATTGACCGCGATCGTACCTCAGTCACTGGATCAG -CTGCCTTATCACAGCCTAATGGTATCAAGCAGATTCTTGAGAAGAAGGAGGGTTTGTTCC -GAAAGAATATGATGGGCAAGCGTGTCAACTTTGCTGCTCGCAGTGTTATCTCGCCCGATC -CCAACATTGAACCCAATGAGATTGGTGTACCCCTTGTCTTCGCCAAGAAATTGACTTTCC -CTGAACCTGTGACAAACCACAACTTTTGGGAGCTGAAGGAGGCTGTTATCAACGGTCCTG -ACAAATACCCCGGCGCTTCTTCCATCGAGAATGAAAATGGACAAGTCATCAACCTCAAGT -TCAAGAACCTGGAAGAGCGGACAGCGCTTGCCAATCAATTGCTTGCACCGTCAAACTGGC -GCCAGAAAGGTTCTCGCAACAAGAAGGTCTACCGTCATCTCACCACTGGTGATTACGTCC -TGATGAACCGTCAGCCAACTCTGCACAAACCCTCCATCATGGGTCACAAGGCTCGCGTCC -TTCCCAACGAGCGTGTTATTCGCATGCACTACGCCAATTGTAACACCTACAACGCTGATT -TCGATGGTGACGAGATGAACATGCATTTCCCTCAAAATGAGCTTGCTCAAGCGGAAGCCA -GAATGTTGGCAGATGCCGACCACCAATATCTCGTTGCAACATCAGGCAAACCACTGAGAG -GTCTGATTCAAGATCATATTTCAATGGGAACGTGGGTTACGTGCCGTGACAGCTTCTATG -ATGAGGAGGATTATCACCAGCTGCTGTACAGTTGTTTGAGACCAGAACATTCGCATATCG -TCACTGACCGGATTCAACTGGTCGAGCCTGCAATGCGCAAGCCCAAGTGCTTATGGACCG -GAAAGCAGATCATCACAACAATTCTCAAGAACATCCAGCCCCCAGAACGAGGAGGCCTGA -CCTTGAAGAGTAAGTCTTCAACCCCTGGCGATCGATGGGGTGAGGGCAACGAAGAAGGGG -AGGTTATCTTCCAAGATGGAGAGTACCTTTGTGGTATCCTTGATAAGAAACAGCTCGGTC -CCACCGCGGGCGGTCTCATCGATGCCATCCATGAGGTTTATGGATACAGCATTGCTGGTA -AGCTCATCGGTATTCTTGGAAGACTTCTTACTCGTGTCCTGAACCTGCGGGGCTTCAGTT -GTGGTATCGACGATCTTCGATTGACTAAGGAGGGCGACCGCATCCGCAAGGAGAAGTTGG -CCACTGCTCCTCAAATGGGTCGCGAAGTTGCTCTGAAGTATGTGACCTTGGACCAGGCTC -CTGGTGACCAGAGTGCCGAGTTGAACCGACGACTGGAGGAGGTTCTTCGCGATGACGACA -AGCAAAGCGGCCTGGATAGTGTTTCTAACGCCCGGTCTGCCAAGCTCTCATCTGAAATCA -CCAGCGCCTGCCTCCCATACGGCCTTGCTAAGCCCTTCCCCTGGAACCAAATGCAGTCTA -TGACGATATCTGGAGCTAAGGGATCTGGCGTTAACGCCAACCTGATTTCTTGTAATCTCG -GCCAACAGGTTCTTGAAGGTCGCCGTGTACCACTCATGATCAGTGGAAAGACTCTGCCAT -CGTTCAAGGCCTTCGAGACTCACCCAATGGCTGGTGGCTACGTCTCTGGTCGTTTCTTGA -CCGGAATCAAGCCCCAAGAATACTACTTCCACGCCATGGCCGGTCGTGAGGGTCTTATTG -ATACCGCCGTCAAGACTTCTAGATCTGGTTATTTGCAACGTTGTTTGATCAAGGGTATGG -AAGGCCTCAAGGCTGAGTACGACTCCTCTGTCCGTGAAGCATCAGATGGCTCTATTGTCC -AGTTCTTGTACGGAGAGGACGGTCTCGACATCACCAAGCAGGTGCACTTGAACGACTTTG -ATTTCCTTGCTCAGAACCACATCTCCATTTCCAAGCAAGTCCAAGCCGATGATTACCACA -AATTGGCGAAGGATAACGTTACGGATTGGCACAAGGATGCCATGAAGGCTGTCCGCAAGA -CCGGAAAGATTGATGCCGTGGACCCCGTGTTGTCTCGCTTCCCCCCTGGAGGAAACCTCG -GAAGTACTTCCGAAGCGTTCGCCCAGGAGCTTAAGAAGGTATGGACTCACTATCTCTTCC -CTATAATTCATTGCAATGCTAACAGCCTATGAAAATAGTATGCGGATGTCAACAAGAACA -AACTCATTAAAGACAAGAAGAAGAACATTGAGGGCAAGGTTACCAAGAAGACCTTTGAGG -CTTTGATGAACATGAAGTACATGAAGTCGATCATTGACCCTGGTGAGGCGGTCGGAATTA -TGGCCGGTCAATCCGTTGGAGAGCCGTCCACACAGATGACTCTCAACACCTTCCATTTGG -CTGGTCACTCGGCAAAGAACGTCACACTGGGTATCCCCCGTCTACGTGAAATTGTTATGA -CAGCTAGTGCCAAGATTATGACGCCTACGATGACCCTTCTTCTGAATGAGGAGATTTCCA -AGGACGACTCGGAGAAATTCGCCAAGGCTATTAGCAAACTCAGCATTGCTGAACTTGTTG -ATAAGGTGCAAGTTAAGGAACGCATTGGCTCAGGCATTGGCCACGCCAAGGCTAAGATCT -ACGACATCGAGATCGACTTCTTCCCATCTGAAGAATACACAAAGGAATACGCCATTGAGA -CCAAGGATGTTCTGATTGCTCTGCAATCCAAGCTCATCCCTCGATTGGTGAGACTCACCA -AGGCTGAGCTCAAGAAGCGCAATGAAGAGAAGAAGGGCGTCAAGGGCACAACTTCTCAGC -CTGAAATTGGTGTTTCAGTCGGTAAAATCGCAGAGGCACCCAGAGGCGCTGACTCTGAGG -CCCAGCCAGCCgacgacgacaatgaagatgacgaggacgatgCTAAGCGAGCTGCCGGAG -CTCAGAACAGGGGCCACCAGGTCTCTTACGAAGGACCGGACGACGCCGAGCAGGATATCA -TCAGAGGCCAAGACTCCGACGAGGAAGACGAAGATGAAGAAGATCAAAGCACTAAGCCTG -CCAAGCGCGCTGGTGACGTTGAAATGAAGGACGGCTCTGACTCCGAGGATGACTCTGATG -ATGAGACCGCCCAAGACAGCAAGGCGCGCGAGGAAGACGTCATGGGCAAGTTCGAGGAGA -TCACGAAGTTCAAGCTCGACCCGAAGAAGGGCAACACTTGCTCCATCCAACTGCAGTACG -ACGTTGAGACTCCCAAGCTTCTTCTCCTTCCCCTGGTTGAGAAGGTTCTCCACCTCGCCG -TCATTCAGTGTATCCCCGGCATGGGCAACTGTGTTTTCGTTGAGGCTGACGAGGCGAAGG -GTGATCCCGCCAACATCCTCACTGAGGGTGTCAACCTTCTTGCCATGCGCGACTACCAGG -ATATCATCAAGCCCCACTCGATCTACACCAACTCCATTCACGACATGCTCAATTTGTACG -GTGTTGAAGCTGCTCGCGCCACCATCGTTCGCGAAATGGATGCCGTTTTCAAGGGTCACA -GTATCTCCGTTGACAACCGTCACTTGAATTTGATCGGTGATGTGATGACCCAGTCCGGTG -GCTTCAAGGCCTTCAGTCGTAACGGCTTGGTTAAGGAGGCCTCGTCTGCTTTCGCGCGTG -CCAGTTTCGAGACTACTGTCGGCGCCCTCAAGGACGTTGTGCTCGAGAGGGGTGTTGATA -ACCTCAAGAGTCCTAGTGCTAGAATTGTTGTTGGCCGTGTCGGTACTGTCGGCACTGGTT -CCTTTGACATTCTTGCTCCTGTAGCATAGTCTCTGACTGGATGGAGTTTTTGAAATGTGT -TTTTCTCATGTGTATATTGTGTTTATGCAAAGCTATTTATGTTTTTCGCTTCATCTTTGA -TATGTCCTGGAGTTTGTCAGCGGAACCAAAAAAGTCTTGTTCATATTAGCATCTTGACTA -GATAAATAGATCTTTTTCTAATGGTGTTGCAACATGATTTTATATTTTATGTCCCTGGCC -AGTTAGTATACAACATTTCACTATACAATTTACCCCGCAAAATACCTATCAAAACAGCTC -TGGTACATCCTTCATTAGAAGGACGAAGCCCATTGTTGCCCCCAGGACACTTTAAATCTT -AATATCAATGTGTACATGTCATTCCTTTCCAATTCAATTATGTACAAATGCCCTTTAAAC -TCAATATTCTACTGCGCCTGCAAAAGTCCGAGATCTGCCGAGGTCCCTCGGGGATGTCGC -CGGACTACGCATATCCCTGCGACTTAAAATGTGACTTGTGCCACCTGCAGCATCATCGAA -CCTCTTTGCTTCCTCTACAACTACCGTTGGAAAGATCATATTCGCCAGAATGCAAGCCGT -CGTCTTCAAGGGACCGTTCAAGGTCGCCATCGAGCAAAAGCCTATCCCCCAGATTCAAGA -TCCAACGGATGCCATACTAAAGGTGCGATATACTGCGTTATGTGGCAGGTAAGAGATCAT -GGGCACCTTCAGCAATACACAGCCACTCATATTTACAAAGTGAACTCCATGTCTTTCGTG -GCCACCAGAAGTCCGGTACAGACTTTATCATGGGCCACGAGTTTACAGGTGAGGTCGTCG -AGGTTGGATCGGACATCAAGAACCTCAAAAAGGGAGACCGAGTAGTCTCACCTTTTACTA -CGAGCTGCGGAGAATGTTTCTACTGTGAGCGCGGCTTCTCCTCTCGGTGCGCTCGAAGCC -AGCTATTCGGTACTACTGTACTTGACGGTGGACAGGCCGAATATGTACGCGTTCCACTAG -CCGATTCGACTCTGGCTCAAATTCCGGCTTCAATCGATGAGAAGAAGCTGGTTCTTATGG -CCGATATATTCCCAACCGGGTACTTTGCTTCCATGAATGCTTTCTCTAACTCAACCCCGG -AGGAGGTTCAAAACAGCACCGTACTCCTCTTTGGATGTGGTCCTGTTGGTATCTGTGCGC -TTGTTAGTGCGCTCGATTACAAGCCGAAGAATCTGATTGCAATTGATAGCGTGCCTTCCA -GACTAGCACTTGCACAGAGTCTAGGAGCTGAGCCGTGGAATTTCAAGGAGGACGAAGAAG -GTCTACGCCAGAGGGTCAAGGATCTTACTGATGGTCGTGGAGCGGATATTGTTATTGAAG -TCGTTGGACATAGTAGCGCTCTCAGAATGGGCTTCGATTTGTTACGGCCCTGGGGCCGTA -TCTCGAGTGTCGGTGTCCACAATCAGGATATCCCCTGGACTGGAAACGAAGCCTATGCAA -AGAACCTTCGTTTACAGATGGGCCGATGCCCTGTTCGGAGTATGTTTGATTTCGAATCTC -ATTTTGACTGTTTTACTAAAGTGTGCTAGGTATCTTCGGCCAGGCCATGGAACTTTTCGA -AAAGAAGCAGGACACCCTCAAGTGAGTTTGGCTAACCCGATATCCTCTGTTTTGCGAGTT -GCTAATGCCTTCCAGTTTTATGTCTCAAGATATCCGTCCGCTCTCGCAGGCCGCGCAGGC -GTAAGTTCTTCCTCAGACTTTGACAGAATGTAACTAACCCTTTGAAAGATATGACGATTT -CGATAACATGCGGGCTCACAAGGTGATCTTTGAGGCGGATAAGTAATTAGCCTTATAAAA -ATTTACAAAGTACTTGTAATTTGAGGATATTAATATCGATCTGCCCATACTCCACGTCTA -GTTCATTCCCAAGGGTTTTGTAAAATCACCCTGTAGGCACAGTAGAAGAGGCTGAAAAGG -CTATGAATAGTCCCTTCTATCCCCATATCACAAAAGACGGGCCAAACGCTTAGCTTTGTG -ATGTCCTTTTGTATCATCTCGTCCCATCAGAAGCTGTACATAGAAAAGTCTCTTGCACAA -AGTATGCTTTTTccgctatatcccgctctaccccgctctatcccactctatcccgctctg -tcccgctctatcccgctctatcccgctctgtatcgctctcttccgctctatcccgcCTGA -TACCGCCCCTTTCCCCCACCTACCTGCAGACCGGGAAGAGGAAAACTATATTAAAGATAT -CTTAGAAGAATAAACCGGTAGCTCACTGAGAATCCTGTTGAGAAGACCGCTGGAAAGACC -ACTTCACAGCCCTGTAAGAAAGACACCATAAGGCCGTATAAGCGGCCGTTTAGTTTTGTA -GATAATGAAACAAAAAGCCGAGATGGTAAGTGCATCATGCACATCATGACGGTTGCGTTT -TAAGATGAGGGTGTTCCGGAAGAGACTGCAGATGCTGCCCTAAGCCCTATGATATACCCA -GTCGAAGATTTGACGCTGGTCATTGCACTGTGGAGTACTTCTTTGTGAGAAAGCCTGGGA -CTTTTGAGCTCAATCCTGAACAAAAGTCAGGGATGCATGAGCAATTCAAACCCGACGCTC -ATGGTCGACGCCTGTGGCACTCTAAGCGAATCAGAGGCGCTTCTATACTTCGATAACGAC -CCCTAAAGCCCTTTAGATCTTGCCAATGACCTTCCACTAAGATATACGGAACATTTAAGA -AATTCAAGCCATTGAGGATGAATGCTAAATCATTTACTTTGTCCTATTATCTGCTTTTAC -ATTTGTTTAGATCCCAAGAATAATGAATTCACAGATTTGTCTTTAATTGCAAAGTAAATA -AGCGCCGTCTATGCTAAATGGCATCATGCCCAATTTCAAACGCCCTGTCCATGCCAGTAG -ATGCGTCATTATGTAAAGAACAAACAATATTATTTTATGGCGCATCGAGCATCATGAGGT -CCGCATCAATGTCCTCGTCGTATTCGTCATCGACGACCCGGGATGCGGATGGGATCATTC -CACCCATTGCGTGCCCGTTATTCACCTCCTCTCTAATCATCCGCTCTTCCTCGGTTTTCC -TCTTTGCCTCATGGCGCTCGATATCAGCCAACCCCCAGGTCGAAATGCCTTCTTGGTCTT -TCGCCCTAAAAGGCTGGGCCATGGTCCGCAGGAAGTTCTTGGCATTGGATACAGCCATGT -CAGTGCTGAGATTGGTTTCGCTGTCCAGCATTGCCTGGTTGATCCACTTGGGGAGTTGGG -GCCGCTTTCGCTCGAACCGACGATCCGCCAACACCATGACTCCGTAGTCGTCCTTACCAC -GCAGAACACGGCCCAGACACTGAGCTGCATGTCGAATCGCATCAAAAGATAAGAAGTCGT -TCTCGCGAATACGATAGTTCTCACGTAAAAACTCGAGACGAGCTTTCAAGATTCGAGATT -CAGTGTATTGGAAGGGTACACCAATACAAATGACTGCACGACCGTACTGGTGATCGAAAT -CAATACCCTCAGAAACTTTGCCTCGAGCGACACAGAACAAAATCGCGCCACGTCCATTGC -AACACGCCGTTCGATAGGTCTCCAGCGCGAGTGACGATTCTTGTGCATCAGGGGTTTCAA -CAAGAATTAATTTGTAGTTCCAGATTTGGTCCAAAATACCCATACCTTGCCACATGCTGA -TAATTGATTCCATGTAGAGGTAAGAAGGGAAGAAAACGACAACACCATCAGGTGTGATGC -GAGAAAACTCGGTCACCAGGCTTCCGTAGTTTCGCACAACGCCAGGATCATTGCGAATTC -CAAACGAAGATGAGACCTGTGCTTGGTCGGACCCTCGAGTGACAATCATGGGCAAGAAAG -ATCGTCGAGCGAGGGTCATACTGTAAGATTCTTGAAGAACAGCATTGAACCCCAGCATCT -TCGGATACATTTCCAAGGGCGACAAAGTTCCCGAGGTAATGATCACAGACCTGAAGCGAT -CGAATACCGGCTTGATAGCAATAGCGGCATCCAAGCACGTAAAATGAAGGATGGGGTTTG -GTACAGTGGCCGCTTCCGACTCGAAGGGTTCAAGGATAAGAACAAATCCCTTGTCATAAG -TTGCGACCAGTGTTGCAAAGGTGGCCACCTCTTGAAGAGGTTGATAATCTTCGATGTTGA -CAAGTTCCAGCGTTCGTACAAGCGACGTTAAACGCTCGGCGCAGAATCTCAAAGGCTTTC -GTTCAATATAGGTAAGGTCTTTTAGATGAGTAAGGAAAGAAGGAGGTGTTTCCGATATGG -TATGGGTGACTTTCATCCGAGTCTTCAAATACTCAATAAATCTCTTCAGGAAAGAGATGA -AATGCTCGGCCCGGCGGATGTTACCAGGGACGGCTTCCTTGAGTAAATCATCCGGCAATA -CCGGATTCGAGATGAATTGATCTTCTTCCCGGGCCTTCTCAGCTTCCTGCAATCCCTCCA -CTAATTTGGTGTATTCGTTTTTGAGTTTTTCTTCGTCCGTGCTCTTCATTTCTTCGATCC -TGCGTTCTAGGTTGTTTGCACCCCGTGTCGCTTTTCGTAGGGAATCCTCGCTGATGTCTA -GACTGAGCGATTCAATACAGACATTGTCAATGTTGTGCGCTTCGTCAAAGACCACAATGC -AATCTTTGGAGAGCTCTCGGGATACTCGTTCCGCAATTTTCGGATCGAGGAGATAGTGAT -AGGAATAGATAATAATATTACACCATGGCATCTAGTGAAGCTTGTTAAAAATGGCACACT -TTGTAAGCTACTCGCTCGATATCTTACCATGCGGCGAGCAGAGAAGTAAGGACATTGCTT -GTGCTCTTCTCCGTATCTTACCAGCCCATCCAGTGTGAAAACGCCTGGGGGAACGAGGTT -GTGGGGCTCAAGTAAGTCAAGGTTCTAGTCATGTTAGCTTGTCTAATGCCATTTCGCAAA -TAAAAAGGTCCTACCTCATGATATACGCAGAGTTCAACATCTTCGCCGCGCTCCTTTTTC -TCCTTGACAAATCCAGCTGTCAAGCTTCTGCACCTGGCATCAACAATAGTGCCACTCTTC -TCTTTCTTTACCGAAGGATGAAGACACAGGTTCTTCCGACTGGTCAGGCCAAGCGCACGA -AACTCCTCTTCATGACCCAGCTCCTTAGCGCGATGTTCCATCAAAGCCTTCAGCTCTGCC -AAAGCTTTCTCGATCTCCGACATGGTTCGGGAGCAGTAGATGAGCTTACGGTGCTCTGGG -TAGTGCTGTTGGTAGGCGACGATCAACGACAATAACGAAACTGTCTTGCCAGTCCCTGAG -GGCATTTCCAGCACACAGTGACCCCCGACATCAAGGGTCTTCTTGAGATCACACATGTAG -GCATATTGTTCGGGGTAGATGCGAGGATATGGAAACAGCACCGGAAGATCACTGTGCGAG -ACAGATATGGTCAGTTTGAATACTAGATAGGACCATACACAAGGGACATACTCAATGAAG -AACTTCATCTTGGCTGGGCTCCGATGCAAAACGCAAGAGGGCAAAGAATCATAAGTGAAG -ATAGCTTCGCTCCCATCTTTCGCGGCTCCGCCTCTGCGCTTGTCCTATTTGGGACAGCGA -TGACACAAACTCACGTGATGAGTATGTTGCTCCCGCGGATCTTTTGGAGATCTGATATTG -AATGCGCTTGCAGCTTCACACACGCGATGAGGGAGTGAATACAACATCCAAATCACACTA -CAATCTAGAATTCATAGAGCCCCTTGCTCTCTCCGAAGCCTTTCCAAAATCCTCAATTTT -GAGGTCCTCCCCCACTACTTCCTTATCCCGCTGACCTCATACCCCACATGAGCAAAAAGT -TCAAATCCCAGGCCTCCAGCAGTCGCGCTGCTGCCAGTGCCTTTGGATCATTTGGGGGCT -TCTCTGGTGGTCTCTCCAGCGAAGGAAAGGATGCGTCCGCTCTCACATACATTGCCGCAC -CACCAGATCTTTCTCGCATTGCGGAGCAGCAACTAGTTATCGCATTCAAGAATTTACTGA -AGAAAGATGACATTACACGATTGAAGGCTCTAGAGGAGCTACGCGATCATATATCAACCG -TGGAAAAGAACAAGAGAACTCTAGATGACGGGTTTCTAGATGCATGGGTATGTATATATT -GCATCTACATACAAACACCAAGTAATAATAATGTTTTCCCACTAGGTCAGAATTTATCCC -CGACTCTCGATTGATTTGTCTCGCCGAGTACGTCAAACTGCCCATCCGATACAGGGTACA -ATTTCTGGTCTCGTTGGGAAGCGGATCGTACCTACTCTACCCAAAGTGATCGGAGCATGG -ATCGCAGGAATCTATGACAATGATCGACTCGTCCATCGTGCTGCTCTTGAATCGTTCACA -AAAGTCTTTACAACCGAAGAAAAACGAAACAACGTGTGGAGGATCTACCAAAGCTCCATT -TTGGACTTTGTAGATGATGTTATTCTTCATCAAACATCCTTGACATTGAGCGATGAAAGA -ACAGTGAAAAGAGATGATGCCGAGGGGAAATATGCCCGTGTTGCAGGAGCCGCAATCTTG -CTTTTCAATCGCGTACTAGGTATGCTGACCGTTTCTACTTGCAAGACTCTATCTAACCAT -CCATTTTTCCTAGGCAATTCGTCCGATGAAGATTTGCAGAAGAACCTGTCAGAAATCGAG -AATTTGCTAGCAAGCAAGAGTTTGTGGGCTCTCTGTCACCACGACGATCCATATGTTCGC -CGATCTATATACATCCTGCTCCGATCTGCGGTTTCCCGAGAGCCCGGATGGATTGACTGG -AAGACACTCAGTTCAGCCGTTATCGGGAAATCTTTGTCTCTCCAGCAAATCGGATCCGCA -ACCGAATTGTCAGAGTCGCTCCTTTTACTCACCTCGCTGAGGCCTCAAATATGGACTGAT -GACTACACTGGGAAATCATCTGCCTCGAAGAGATTGCGCCAATACTTGCAAAAAGGGTCT -CAAGGTGGACATTCTAACTTTTGGTCAAATCTAGACCAATTACTTCGCATTATCCCCCAA -GAAGTACTGGCAGGTGCCGATAAAGCCACTGCTGATCATGGAGTTACTTGTACCAGCGCA -ATTGCGTTGACCGAGGCTTTGCAAGAGGGCCTGACCTCGAGAGAAGAGCCGCGTTCAAAC -CTTGCCATCGGGTGGAAATCATATATCCAAATAGGAACATGGCTTGCAGCACTGATCCCC -CAGGAGCAGAAGTTTGAATTCATCGCGAAGAGAATGTCTCCCTTGGTATTGCATTATGTG -CGAGCTGACCCAGAGCTAACACAATGGTCACTTCCGGAGCAGTCAGGCGAAGGAATTTGC -GTGGATTACGTTTCTACTCTAGCTTCCGTTGAGCAATCCCAGGAACTGCAGTCATTGTGT -ACATCTCTTTCCGATAGCCTCCTGGAAGCTGCGAAGCTATCGTCTCCAGAGCAATCAAAG -GACTTCCGCGAATCTCAAGACTCAATATGCGCCCAATCCAGACGCCTTCTGACTTTGAAG -TCTGCTGTTCTTGCACGCGTCGCAGATACTGAGGTCGAGCCTCAAGTTTCGGGGGTCGTC -GAAAGAACGAGCACTTCCCTTCTTGAAGGTTGTCTAGAAGTCCTGCGCACCCGTAATGGG -AAGCCATATGGAGCTGCTGCCACGGTGGTGGAGTGTGTTTACAACCTGCCGTCTGTCGCC -AAAAAGTCCCGAGACCTCAAAAATTTCGTGCAAAATGATGCACCGGAGCTTTTGCTTTCA -CCGTCTGCTGATCGATTGATTTCAATCATCTTGGAATGTCGGGATTGGGATGGATTTGCA -TCCAGTTTTGAGAACGTTGTTGAGCGAGCCTTGGCCTTGGATCTAGAGCAGTCTAATGTG -CATGTGCTCCAAAGCTTACTTTCTTCTCTCGATTTCAATGATGCCGAACACAAGGAGAAG -CTCAACTCGCTTGTTGTGCGGGCTCTTGGCAAGGCCTGCCAAGGAAGTCACGCCCATTGG -CCTATTATAACTGCAGTCCTTCAAAACAAAACATCACACGGCGAATTGATGGATCAGATA -TTCTTATCTTTGATCGAGGCGTTATCTTCAGATGATAAAGTCTTTGATGCTTTGCACGGG -CTTTCTCACATCGGAAAGTCTGCCCCGTCTTCAGTCAGGGAGTTCCAAACTGGAGCCTTT -GGATCGAAGTTAACAGGGAAGCTACTTTTTCTCACAGAATCACCATCTGAAGAGGTGTCA -AGCCTGGCATCAGCGCTGCTGAAATCATTGAAAGAATCTGGTGTTGGCGACACGAGTGCT -AAGTCGAGTATCGAAATTCTCCAACATGGATTCAGTCATGTGGATGAAGAGTCATTGTCG -TGAGTGCTCCATCGCCATCACATACTGCAGAGAACTGTGCTGACTTTGTTTCAGGATTGA -ATCACTTCTTGCTATCGCAGATGAATTGCTGCCAGCTTTTACGGCCGAAGGTGCAGCTGG -TACCGTGAAGGATATTCTACCCTCTCGTAGTTCATGGGAGGAATCTCTGACTCCCTTCCT -CCAACTTCCACCTCGTCCTTCCACTGCTATCACAAGTCCTCTAGGAGGCGCTGTTCACTT -GATCCAACGTGAGCTATCTGACTCCTTCAAGGCTCTATGGCCGACTATTCCTCGTGATTC -AGAGCATCGCTCTTCTGCATTCCGCCTGGCATCATTCACCGTCAGCATTCTTTCTACATC -GGAGTTATTGAAAAATCTGGACCAAGAGGACTTAGAGACATTGTTCCATTTCTTGCCCCT -GGCAATTCAACTTATCGATGATGATTTGAGCATTGAGAATTGCAACGGCATCTCAGGCCT -CGAGCTGGCTGACCAACGAGAAGAGTACATGGAAATTGTATTCGCGGGTCGAAAGGTGAT -CAGTAACTGGATCCGCGACAATGCACCAGTCAGCTTCGCACCCGAGAAGACCGTTTCCTT -CTCGTTCGTCGAGTTCTGGGAGACCAGACTAGAGGAACTTAAGGGCACATCACCCTTGGA -TTATAGGGTTGGTGAAGCGTTCGCCAAAATAATGGCTGTTGCGGATTCATTGCAAAAGTC -CAAATCATCGGAGGACGTCGCAAAGATCTGTAGAGAGGCCCGGACAGCAAATCTGATCCG -TTCAGCATCTTGGTTTGCAGTGTTGCGAAGCTCCATACTTTCAAATCCCATCGGCAACAG -AATTTGTAATGAGCTAGTCGCCGATTCCACTGGATTGAAGCCCCAGGATTCTTCCCAAGT -TGGTAAGAATCTGATACCAAATTAGCGAGATAGACCAGTACTAACTTGTGCGCAGGATTG -CGGAAGCTGGCTCTGTTAAACATCCTATTATCTGGGGAGGAAAATGTTGTCTCCACCATT -CCAACACAGAGATTGGTCTTCCTCACCAAGAACTTGATTGAATGTCTCCATTCCGGCTCA -ATGTCTCTTGGTTTGCAATCTGAAGTCATCCAAACCTTGTCTCTTGTCCTTCCAGCTCTC -GGGGAAATATATGGGTCTCATTGGGAAGAAAGCATGGCTATTCTTAACTCTGTATTGCAG -GGAACTAATGGAGGCGAGGAGGCTCTACCATTACTAGTGTCATCTTTCAGACTCTTCGCA -CGACTGAAGTCAATCTCAGAAAGTGACAGCAATGACGACGTCCAAGATGCTTGGTCAGAT -CGAAAGGCCGGTCTTTTCAATGCCCTAGCCTCCACCATTGACACATTTGGTAAGCACTTT -GGGTTGCTTTTATCACTCAACTGTCTAACGAGATATAGATTCCTCAACCACATTCCACCA -ACCTCGGGATGTTGCCGTTGATTTATTACGACGGCTGATTAACACCATCCCCGTTGACAA -CTTGGAGGATGTCAGCGAAACGTTCCATCTTCTGACTGCGCACAGTCGAGCAGTTCAGCG -AGCTGCTTACACAATTCTCCATCACTATATCCCCCATGCCCAAGAGAAGGTGTCTTTTGA -GGTTGCCTTATCAAAGACTGCAGTCAGTCTTCCTGATGAGCTCATCTCTCTATTGCTTGA -GCCCCCCACAATGCAAATGGTCTCTGCTGCCTACGGAGATGATAAGATGTGGACCAGTAT -GCGGTCATATCTTCTCAGCTGGAAGGTGGTGTTCGACCATTTCTCAAATGCTGTAAGTTT -TGACTCTTCCTCAATACTTGTTAGATAGACCTTCTTACAATTTGCTCAGTCGCTTCCTGT -TCAAGAGTACTACACGGCGAGCATCAGAGAGAACAATATCCTCATCCCCTTGCTGGAGTT -CACGTTTGATTTCCTCCAAAAATCGCACGGAAAGATGATCGATGCATCCAAACTTGATAT -TCGATCATTTGAGCCAGATGAGTCCGAGAGTCCCGAAAAGGAGACCCAGTGGCTTCTGGT -ACATTTGTACTACCTGTGCCTGAGATACTCTGCCAACATGACCAAGAATTGGTGGATTGA -CACAAAGAAACGCATCAAGGGCCCGGTGGAAACCTGGACAGAAAGATACGTGAGTAATCA -ATATTCGGTGGTCGGCCCTTTAAAACGAGACTCATAACTAATACTGCCCCCAGATCTCCC -AGCTCGTCGTTGAAGATGCACTGAAAAGCGTGACCGACTGGATTGCCACCCAGGATGCAA -ACGAAGAGCGCGCACTAGAGGTCAAAATTTCCCCGAAGACAGGCGAAATCATTGCCAGCA -TTCCTGTCGACGAAGAGTCGCCTCCAGTTGCCATTTCTATCACTCTCCCACCCGCATATC -CGCTTCAGCCTGCGTTGGTTGTTGGTCGCAGTCGTGTGCTTGTGGATGAGAAGAAATGGA -AGAGCTGGCTGCTGACGATCCAGGGTGTTATCATGTTTGCCAATGGCAACCTGGTCGACG -GATTGTTGGCCTTCCGACGAAACGTTCAAGGTGCTCTCAAGGGCCAGAGCGAGTGTGCTA -TCTGTTACTCTGTCATCTCAACAGACATGCAGACACCAAACAAGCGGTGTGCCACTTGCA -AGAACACCTTCCACTCAGTGTGTCTGTTTCGCTGGTTCAAGAGCAGCAACCAGAGCACTT -GTCCTCTTTGCCGAAACAACTTTGTTTATGTTTAGATGCTTTGTTAAGTTCACTTGTACA -TAAAAAACAGACATGACATGACAAACGTAGGATAGGGCCCTTTGGTCCAGTCCGATATCC -GTGGAAGAAGCATTTTAGGATCTTATCTTCAGTATTGCTCATTCCATCGGTAATATGACC -AGATCACCACCTTTACCACCTCGTATATGTACTGTGTCCTATATCTGAGACCAATAAAAC -GGGGAGGTACTCAGCACGTTTCGAGCCATACCCGATTGGGAAGTTGATTCTCTTTGATGT -CCATACTGAGACTATGGGTAATCACAAGATCACAATGAGTTCTTTATATTTGCCTATGAC -CCTGGGAATGGATATAGCATTTCTTTTGCTCAAATTTACCGAAGGAAATAGCTCGCCTAA -ACTAACTTAAATGGAGTACTTTGTTGCATACCCCGGAAAGATCTGTTGATTTTATTGCCT -ATAATAATGCGGAGAACTGGCTCTTATATGCAGGTGTGCCATACTTTTCAATCCCATACC -CAATGGTATGTCATATATGAGCCAGTTTAGTCTTCATCAAGCCTATACACAGTTGATTCA -GCCTAATTGTGTAGTTCTGGTTCCCTTTCTTCACGGCTGGACCTGGCGATCCTGCCACAC -ATGAGGTTTCATACTCCCACGATCATTAGGCAAAATGCCTCGGTCTAAAGGAACTGACTT -GGTTTCTCGACAATTTACTGTGCGGCAAAATACAATGATCTTCTAACTCTAATTGACAAG -GGGAAACCCTACATCTACTTTTTGTGTATAAAGCAACCGTGGTCTCCGCAGTCTGAAACA -TCGCAAAGAATTGCACACAAGAGCATATTCAACGCAAGAACCCTTTCGTTTTACTTTGAT -ATATATTTCGATACCATGATTTCCAATATCTTGACATTATCCCTTCTTGCCGGCAGCGTC -CTCGCTGTCCCAGTTGAGGTTGACATCGAGAAACGTATTACCTGCCCAGGCGTCCACATC -TTTGGAGCGCGCGAGACCACCGCATCCCCCGGATATGGCTCATCCAACACAGTCGTTAAT -GGCCTGCTCAGCGCCTACCCAGGCTCTACCGCCGAGGCCATCGTTTACCCTGCCTGCGGT -GGACAATCATCCTGCGGCGGCGCGAGCTACTCCAGCTCAGTTGCCCAGGGCATCGCAGCT -GTCGCCTCAGCTGTAAACTCCTATAACACACAGTGCCCATCGACCAAGCTTGTACTGGTC -GGATATTCTCAGGTGAGTGAAATCTCCTCGACTGCTGAAGACGAAAGACCGACTTACGAT -GCTCCAGGGTGGTGAAATCATGGACGCAGCCTTGTGCGGTGGCGGTATCCCCAACCAAGG -CTACACCGACACTAGGGTCCCGCTGTCTACCTCTGCTGTGAATATGGTAAAGGCAGCCAT -CTTCATGGGTGATCCACTGTATGTGGCAGGATTGCCGTACAATGTTGGAACTTGTGCTGC -TGGAGGCGTATGGACTTCCCTACGTTCCCCTTGAACTTGGAAGCTTTGAATCTAACGATA -GAAATATAGTTCGACGCACGTCCTTCTGGCTTCTCTTGTCCGTCGGCCAGTAAGATCAAG -TCATACTGCGACGCCGCGGACCCGTATTGCTGCAACGGCAGTAACGGTGCGACCCACCAG -GGCTACGGGGCTGAGTATGGCGCACAGGCTATTTCCTTTGTCAAGGGTAAGTTAGCTTAG -GTGCAGATTTAATGTCAGCCCTACCGATGTTACCAAGGATATTGTGCTTGGGAGGGTTGG -ATAGGCCTGTGGTAGCTCGGTGGTATTGACAGTAGGAAGATTGTTGCAGGAAATGCTGGA -ATCGAGATGTTTGCGATATTGACTACACATAACCTATGCAATCGTTTTCGCCGTTGAGTA -TAAGAGTGCGAGCTTGAGCCAATGAACAATAGGGTTTAGTCACTGAGTCAACCTTCGGAT -CTTCCAAGAAACAAGTCATTTTATAGAAGTGAGGCTGTAGGCACTGAAACTTGATAAAGA -GATATAAAGTAGAAACTGATCTCGAGATATTGATTGTTTGAAAATCAAGTTGCAGGTTCG -AAAGAATTCGGAGATGAAATTATGAGGCGATTGCGATGACCCCCAGACAGCTACCCCACC -ACCGaagctccaagctccaagctccaagctccaagctTCTTCCATTCCTCATGGCCTCTC -ACATTCCCAATTTAAACACTCTCCGAcgaggccgaggccgaggtcgaggcttaggtcgCA -CAGAAGAAACCGGACTCCCTTCTGGAAAAGATCGCATCGTTCAAGGCACAGACAATGACG -CAAGCGTGTCCCGCCTCAGCGCCGTCGAGCTAGGATACCTGGACGATGCGTACGCGGCAG -CATTGACACCTCCGGGGTCAGCGACGCGGAGGTTGCCAATTATTAACAGAGGTGATCCCA -ATTCTCCCAAATTACGCTACTTTAATTACAAGAGACCCTATAGCTGACTTGGAGACCTCC -CCGCGCACCAGGAACCTACGCCCGCACGACAGCCATTGACCAGCTTGTGGCGCGTTTTCT -CGGCCCATACTCACCTGGAAATACACACAAGAAACAGATCATATCTCTAGGCGCCGGGTC -GGACACGCGCGTCTTCCGGCTTCTCTCGTCGCGCCAAACCCCAGATTTCGTTTATCACGA -GCTCGATTTCGCCGTTAACACAGCGGAGAAGATTCGCACGATTAGATCGGCGCCTGTACT -GCAGCGCGCACTGGGGATTGACTCATCGGAGGTTTCGTCTGAAAAGCATACTCGCGTTGC -CGTGTCGGAGGCCGGGGACGCGCTCCATTCGCCCTCGTACCATATCCACCCAGTTGATCT -GCGATCGCTGTCGACGTGCAGTAACCCTGCCAGTGCACTTCCAGGCGTGGAGACGGGATT -ACCGACTTTGCTGATATCCGAGTGCTGTTTGGTCTATCTCTCGCCCATTGAAGCGGAGGA -GGTGGTTGCGTTCTTCACGCAACGCCTTTTTGGTCATGGGCGTGCAGTGCCTGGGCATGG -AGACAGCTCGCAGGAGGCGCAAGTAGGTGTCGCTCCACTCGGGTTGGTTCTTTATGAGCC -TATTAGACCGAATGATGCTTTCGGTCGCACGATGGTGTCGAATCTTGCGGCGCGCGGGAT -TCAACTCAAGACTCTCCCAAGATATGCGTCGCTTGGGGCGCAGCGTGACCGCTTCCGAGA -GCAAGGCTTTGGGAATGGCCAGGCTGTAGCTGATATTGAATTCATTTGGAAGCGGTGGGT -GAACGAAGACGAAAAAGAAAGAGTTGCTGGGCTAGAAATGCTCGATGAGATGGAGGAGTG -GCAGTTGCTTGCACGGCATTATTGCATTGCTTGGGGATGGAGAGACCGCGAAGATGTCTC -AGCTTTTGCTGGGTGGAAGGATTTAGAGGCCCAGCAAGGCGAATAACTTCATTTGTGGAT -ATTTGATGTCTGACGATACCACTCTATATATCACGAAAGGTCATGATGCATGTCTGAAAA -GACAATCTGAAATCAGTTTGGGCCGAGAAACCTGTCCAACACGATGCAGTATATACAGAG -CAAATTTATATTCATATCCTCCATACCTTTACTTTGGTCCACGCGCAGGTCTATGAGGCT -TACAGTCGTCAGAGGGGTTGATCATGGGGACATCACGGTGTGGCCGTGTAAATGCGACCT -GTCATATTTTGTCAAATGCATGGCAAAAACCAACTGGTAGGCCGAGTTGATAATCACCAT -GCGAGTAAAATAAACGGTCCTTACAGCAAAGTCAAAAGAGACATACTGTTCATAAAATTA -CGTTTAGCACCCGTTGCAGTAGGGTTTGGTCAGAGATGCAAGGATCGCTACCGCTTAGGG -CTATTGAGCTATCTGACAAATATCTCACCACAGATCAAAGGTTCAGTACCAAGGAGAGCC -ACTCAATATTTGTTCTTTGTTCTTGACGTAGTTCTGGAAGACCTGGCCTTGGAAAACTTG -ACCAACTCGACACAACTGGCTCAAGCCATCGGACACACCGACCTTTGACACCTGCATCGC -ATATCAAATCAGTTCCATGCTTCTTGCATTCATCCCCCTGGATACGATGCACTTTACAAT -TTGCGCAACTCCGGCGCGTCTTGTACAAGTCTCGATAGACCCATTTAATGCGGCTCTTTG -GTCACAGCGGCGATCCTGTTGATCTCTCGCTATGCTATCAAAACCCCCTATACGGAGTAG -TCTCCCAGGCCAAGCTTCCAACCAAGTCACGAGGCGAGATATCGGCAGTCGGGACCACAC -GCTCTAGGGCTTCGGATCACTTGTCCATGTGATGCCACGCCTATCAAAAAGAAGCGAGAA -TCCGAGATTTTTCTAGTTGTGGTTCTATTGGCCCCAGGAATGGAGATGTGCTGGGTTGTA -TGATGATCAACAGACCTAGACCGGTGCAGCGTGAGTTTGATTAGGATTGACGGGCTACCA -GGCTATAAGAGCTTCGGTGGAGTTTCTCTCCTATTTCATGTGCATATGGATTTGATATTC -CATGCAAGATTGCAAATTCAGATGAATGAGATGTCAGGTGATAAGATATCTCCAATGGTT -CCTTAACGTTGCGCCGAGCAACGTGTTCCTCCACCCTCCTCCACCCTCATTATTTGTTTC -ATGTTTTTTCTTTTGCCGTTGAAAGAGTTCGGGAGATAACTCCACAAAATCAGATCTCTC -AGCTTATTGGCGTTCGTCCTGCCAAGCAGGAACCATCGGGGTAATACAGTTCATCATTGG -CTGCAGGCTAAGACTGCCCCTGACGCTAAGTATCTAGTGGCTCCATATGGCTGTCCACTA -GCGGAGTTTGCTTCAGGGGGAATGCTTTTTGGGGGTCCAAGGCAGCCCGCATAATTCCAG -TTTGGAGGAGCAATCATGAGAATTATGGGCAATCACAGACGACCCGCCCGCAAAAGCTGG -GGCTGCCCTGCTCTGCACCGCAATTGGTGCCGGAATTACGCGCACATCATCCCAGATCTA -ACGCCTTTTCAGGCTAGCACACTCGTGGACCTGAGCTGGGACCCTCCCCCAAGGCCGTGT -CCATTTTCTTGGTGCTTTTCCCCGTTTCTTTCCTTTCTCTCGTCTTGACCTTCATTATCT -GTGAGCTCTGTGTCTGTCCCTGGCCATCGGCCGTATATCTTTTGTCACTCGTTTGACTTA -GCCATTCGCTTCATATATTCATTCATTTATTGATATTTATCAATTTGCCAAGTCTTTTCC -CCCTGACACAATGAAGTTCCAGATCACATCCTTCCGCGCAGCGGCGCTTGTTGCCCTCAT -TGCGAGCGCTGAAGCCGCCAAACATAGCCATGGCCACAGCCATCACCACGAGACTCGTGA -CGTGTCTCTGGAGAAGAGGGGTGAAAAGTGCAAGTTTCCTTCCGACGCTGGCCTGGTTGA -GATCACCCCACATGGGAAAAACGCCGGTTGGGCTATGAGCCCCGACGAGCCCTGCGAACC -TGGCAACTACTGCCCTTACGCTTGCCCTCCCGGCGAGGTGTCTATGCAGTGGGATCCCAA -GGCGACCTCCTACACTTATCCTATGTCCATGAGGGGTGGTCTGTACTGTGACGACGATGG -CAAGATCCAGAAACCCTTCCCCAACCGTGACTACTGTGAGTCGGCTGCTGGTGTCCTCAA -GGCCCGCAACAAGTGTGGCAAGGCTGTCTCTTTCTGCCAGACCGTGCTGCCTGGAAATGA -AGCCATGCTTATCCCCACCATGGTTGAGGACCTGGCCGACCTGGCCGTGCCTGGCATGTC -CTACTGGTGCTCCACTGCCGCCCAATACTACATTAATGCCCCTGGTGTCACCGCGGAGGA -GGGCTGCGTCTGGGGTAGCTCCCAAAATCCGGTCGGTAACTGGTCCCCGTACACCGCTGG -TGCCAACACCGATGGTGATGGCAACACCTTCCTGAAGATTGGCTGGAACCCTATCTACCT -GGAAACCACCACGCCTTTCCGCAAGGTCAAGCCTGACTTCGGTGTCGAGATTGAGTGTGA -GGGTAACGGCTGCCACGGCCTGCCCTGCAAGATTGACCCTGCTGTCAACGACGTCAACGA -GATGATCGGTAAACCCTTCGTTGGCGCTGGCGGCGCCACCGGTTGTGTCGTGACCGTCCC -CAAGGGAGAGACCGCGCACATTATCGTCTTCGAGAAGGAGAGCAATGGCAGCACCTCGTC -CGAGACCGTCGAGATTTCCACCAGCTCTCGTACATCTACTAGCAGTGCCAGCTCCAGCAC -CAGCACCTCGACTACTACTACTCCGACCAGCACTACTGAGAAACCAacctctacctctac -caccacctccagctctacttccacctccagctctactcgttcgtcaacttcaacctcgac -tcgtacctctacctcgagctctactgctagctcctcctccacGACTCACGTGTCGAGTGT -CACTAGCCCCGCCCACCTGCCCAAGTCGAGCCCGGCAGGCTATACCTACCAGCCGCACGT -GCTGACCGGTTCTGCTGATATCCAAGCGGCCTCTACGGCGACCGCCGCCGCCGCCGCGTC -CACATCTTCCACTGGCGATGCTACTAGCGCCACCGTCTCTATGTTGACTCTGGCCTTCGG -CGCCATTGCCGCCCTGGCTGCGAGCATCTAAATCAACTTCAACTGATTAGGCCATATCGA -CTGTACAATATTGCATATATGTACAGGGTGTGTACAACACCGACTTGGGATGCTTTTCCC -GCACTATATTTGCCTGCGGCAATCAACCTTGAAACTCCGCGGGCGTCTATTACCTTTTGA -TTCTTTCACATTCTTAATTCTTTTTCTACATAATCCCCCTTTTTTATGATTCTTCACATC -CTTGGGATGCCCTTTCTTCACGACGCTCAACGTTGACTTTTTTTTTTAACTCTGCTTCAT -GACATTTATATCCCTTGGCTCCTCAGCTTTGCTCTTCTTTCCTTTCACCTTGTTGATTAT -CCTTATTATTGCTTCTGGTATGCAGGGGAAATTGTTTTTTTGTTATATACGATGGTTGGA -GGTGAACCCAACCCAGTTTTTTTTTGGTTGGACATAGGAAAACAATATTTATATTTTATT -CGTCAAGCCTTGCCACTAGGATCTTTTAAGATCTAAGACAGACTCTTATAGAAGGTACAA -AACCCTGTCTCATTATTTACTCTCGCATTGATTGTTCAGGTCAAGCCCACTGTCTAAGAA -ATACTTGAGTAAATTGAATATAAACAAAGATCAAAAGAGTGTTCATTCCGATATATCATA -CGCCGTGTACATGCATCATTACTATCAGAGGCTCACATCTAATCTACAGGAAACTATAAC -GCTGAGAGAATGGCTGGGAACAACCAAGTCCTTGTCCTCAGTCCACATGCCGACACCCCC -ATCCCAAACGCCAGATTCAAGCAATTCATGCATCCAATTCCCTGATAGGAACAATCAAAT -CAGGCCTCCTTGCCTCAATAGCCAACATGTTCAAATTCCGCTGTCTAACCTCGCGCAAGT -ACTTTCGTTGCTCATGCGGGATGTCTTTGACAGTCTGTGAGTCGTCCAGGCCGTAGAACA -TAGTGTATACACTAAACCCGATGAGTCCGCCTACAATGCAGCCGGCGATGATGAGGAAGA -TGTAGAGTGGATATTTGTCGGGATTGGACATGCTGCTGCGCTTGAGCAGGGCCGTAGCGT -CATCCATTGTGCGTGGTTGTGTGATTGTTGACTCTATTGTGCGTGGGATGATAGGCATTG -TGTGTTGAAGAGGCTGCCGAAGAAAAAGGGTCTCGTTAGTTGGAGGACATAGGGAATCAT -GTGCGGCTGCATAGTCACATACAATGGCTGTAAGTAGGGCTCAATGTCAGATTCTTCCTG -GGGGATGCAATGTTGATTCGAAGTAGCTACTGCGAGGGGATGTGGTTTATAGAGCTGGAA -GATAGCAGATACTCTTAGTGCGGGAATTTTGACTCTATATATAAGCAGAACCTGCCACGC -TAAAGCCTTTTCTCTATCTGATTGTACAGCCCAGCAATGCTGAAACACGAAGGGAATTGG -TAAAAATCAACCATATCCTTGATGTTCCTGCCCAAATAGGCATAGTCCCAGCATATTCCC -AGACCAATGACATCAGGAACCGAGCGGACCAATCAATGGGATTCAGGGACTGGTGGGTCC -ACACGTGCCAACCAATCAAATCTGTTCTACATAGGCTAAATGTTCTAAATATGCATACGG -GACACTTTGACTAGAGTGTAAGCTTCTCTCTTCTCACGGACCTTTGGCCCCAAATTAGAT -CTAATTTGTCGTGGGCTGTCCACCATTTTATTGGTCTAAGTTTGTTGTGTGTTGTGCTTG -ATTGGCTCGACTTCTGTTCTCTCTGGGGCCCGCATCGGTAGCAATTCGAGCCCCAATTCG -ATTTGGCACCGCAATGATATTTGGTTTCTTACATGTCTCAGTTGACTCCATCTTTTCGGT -TGCTAATCTTAGGACTCGTCAACTGCGACTCGGAATTCCGCCGTTGCGTGAATACTTTCA -GCCCAGCCATACCCATCTGTGCTTCTTCTGCTACATCTTCAAAGATGGGTCTTTCCAGCC -AATTTGATTAGAATATTCTTCTTATCCACTACAACATAGTTTGTGATACACAATATGATC -CCTGAAGGAATTCTTGGGTTGTCAATCAGGTGGGATAACCATTTATAGGAACTCCATGTT -GTATCGGGTCGGATCATACGAGTTTCAATCAGGGTGGAAGAAAAAGCAAGCATTTATCTC -AGTCCAAAAATGTTTAGGTTTCTTGAAGGGCTGACTAGCCTCTCACTCGTACCTGAAGAT -CACCTTTGATATTCGCGGTGTGGAAATTCAAAATATACATATTTCGGGGCCTGTTCTGGT -TGTTTCTGATTTCTTTTCCCTCACATTCTCTTCTATTTCTCTCATCTTCGAGCAGAACAA -AGAAAAGAGACACAAAGCGTCAATTAAATCTGTGAAGCTTGAAACTGATGAACCACGTAT -CAAAAACCCAGATTGAACATCGCATTGTCGTGATGGCCGCAAAACCCCACCGTAGGCAGT -CCGCTCGGTCAGCAAATCCTCAACTAGTGGACCCGACGCCCGTACCCGAGGACCGGACTA -CGAATCTTATACCTTTCTCGCCACATTCCACATTCCCCATGCCACATCCAAATGGAGAGA -GTATGCGATCCATGCTGGACATGTCGCAACCGCCGCATCCAATGCGACCAGTCTGGAACG -CCATGCGCCAAGTGCGAGAAGGCTGGCATGGAATGCTTCGACAAAAGACCTATTAGGTGG -GTTAAAGGCGTGGCGATTCGCGGGAAGATGCAAGGCCGTACGCACGAGGGTATATCGAAT -GGCTCTCCTGTGCACAACTCTAGAATTACCAAGTCCGCGCCATCAACACCTAAGAATTCG -CATCGCGCGCTGGTCCAAGCTTCTGCTGGCAGTAGATGTTTACCGGTTACCTTGCAAGAC -CCGTCCATGTCAAACTTGGATCAAACCTCGAAATATTATATAGACTACTGTAAGGTTCGC -ATGCAAAGCTCTATAATCAGGGAAAACTGACACCGCATTCAGATAATGAACGCATATGCA -AATTGTTCATTGTATACGACAGCGACAGAAATCCGTTCCGGAGTTTGATCTCCTTTGGAC -TGAAAGATCCCGTCTTACAGAAAGGTATACTTGCTCTGGCGGCACGGCACCGTGCAAATA -CAGGCCAGTCGTTCCATCAGCTTCAAGCTCCGACATCCGATGGTCTCATTAATGCCAACC -GTGATGCGCTACTTTTCAAGCACCAAGCTATGGAGGCTCTCTCCCGGGTACTTGGCGACG -AAACAATAGTGAAAAGTGACACAACCGTTGCCAGCATATTTCTTCTTATTTTCTTGGACC -TCCTCGAGTTTGGAAGTGATGGTTGGAATTTCCACCTTGAGGGTGCTAAGGGACTGATTG -CATTACACCGGCCGCTGCTGGAGGCACAAGCTGGAGTTAATAATGGACCTGGCCAGACGG -TACAGGAGATATGGGGGTTCATCTCCAATCAGATTCATCTGTGAGTCGAACAAACCTAAG -GGGATAGTATAAATTCACGTGAGAAACTGATCGGTTACAAATTAGAATTGAAACGCTGGG -AGCAACATTTTTACGACCAAAAGTACTATCAGAATTTATGTTTAACGGCCAGGCATCTAC -GCAACCCCAAGAAGAAATTGAACAATCTTTCCTTGGATGTCCGGAGTTCCTGTTGTCAGC -GATACAATTTCTCTCCAATGAGAGGGACGCCATCGCAGGCAAGCTACCGGACGGTGCTGC -TCTCCAAACACACATACAAGATACCACCGCAATGCTGGAGCTCATCCAAAAGTTTGACTG -TTACGCCTGGACTTTGAGTCTTCAACGTTCGAAGCAGTCCTCGACAGATGAAATCAGTAA -TCTCTGCAAATTGTCGCAGACATTCAAAACCGGGGCTTTGATATATGGGAGACGCGTTCT -TGATGCACTCACAGAAACTATCACAGAGCAGGATAATTTGGTCTCCGAGCTGTTGGGTTT -GATTGCTAGTTTGAAAGACGATGATGCACTTTTCAAATGTGTTCTGTGGGCTATCTTTGT -GGCGGGCTTGGAGTGTCGTTCTCAGGCCCAGAAAGACTTTCTGGTTGAATCTTTAGAGAA -GTTCTGGACTGATACGAGCTGTTCGAATGTCATGACTGCAGCAAAGATCCTGAAAGATTA -CTGGGACCAGGAAGAAGGCTTGAAGACTTCGTCGCGGTGGATATTTGATATTGGCCGCTT -AGGTCGTGACTGTCTCCTGATTTAACATACTAAGCATAGAATATAAGCCAAATTTTCTCA -TATGATCTAGCACAAAACCATCATATTGAACTATCATTTCAAAAGCAACTTGAATGCCTG -CAGCAAATCATCCTTCAGATCCTCCCAGTTCTCTAGTCCAACGCTAACTCTCAGCAACTT -GCGGTCAATCTTGGAATCCGACAAGGCCCGCCACTCAATCAAAGACTCCACCCCGCCTAG -GCTAGTAGCATGCTGGAAGAAGTGAAGCTTGGTTGGCAAAATCTTCGCAAATTGCTCATT -GTGTAAGACCATGGAGAAGACGGGTCCAAACCCATTAGGCATCTGCTTCTGCAACCACGG -CTCATCCTGCAAACTAGCATGATAAATCCTCTGTACAACAGACTGAACAATCATATCCTC -ACTGCCCACAGCAGGGGACTTGGCTTGCATAGCACCATGAAGCCAGGAGAGCAACTTAGC -TGAATTCCGGCTTGCTCGCTGAACCCGCACCTCCAGCGTGCGCAGACTCCGGATCCCAAG -CCAGCTTTCCATATTACCCATCACACTCCCAAGCGCGATTCTATCCTCAAACAACCGCTT -CGTCCAATCAGCACGCTTCGTTGCAAGAACGCCACAGAGCAAATCGCTATGTCCACCGAA -ATACTTCGATCCGGAATGCATGACGATATCCGCGCCCCAGAGGAACGGATCCTGCAGCCC -AGGAGGTGCAAAAGTACTGTCGACAATGAGGAATGCGCCGCGGGAGTGCGCCTTACGTGC -GTATTCCTCGATACTGAATGCCGTGCCTAGCGGGTTGATGGGTGTCTCTAGTAAAATGAC -ATCGCCCGGACCTAGGCTTTCGGCCGGACAGTCGAGACCAAGTTTCTGTAGCCCGGACAG -ACGCGATATCACCGCGATGACTTCGTGGCTGCCGTGGTATCCCTCGCCAACGGAGATGTG -GCGTGGATTGAGCAGCGTGAGAGCAGCGTGTAGGGCAGCTAGACCTGTGGAGTAGCTCAC -TGCTTGCCCACCTATGAGGGATGAGAGCACTGCTTCGAAACGGGTTGCGTTTGGTGCGAA -TTCGCGGGAATAGACATAGTTCTTGCCGTTGAACTCATCCTGATGGGGCCGTTAGTTGAG -AATGTGGTTATTCGTGTGTTTGCTTGAGACGTACTACCGGATCCTCGGATGGGGCGAGTT -GGTCTGGATCATTGGGGTACCTGAAGATTGTAGATAGATGGATTGGGGGTGCGACGTCAG -TCACTAGATTAAGGGCATCGTCTGCATGCAATGCTTTGGTGGAAGGATGGGCGTTATGGA -AGGTCATTTTGCTTTGAGAATATTGGAAATTCTCAGAAAGAAAAGAAACTGGAGGGTTTT -TATATCTTCAAGGTTGACCTCTTCTATCAACTTGGGATCGAGACCCACTCCCATTAACTT -CCTAAAACGGAAGTCAACTGAAGTATAGAAAACCCTACAAAAAAATTGTACAGACTTCCG -GAATATTCCAGAGACATCACAGTTCTACATCTAAAATACATCAGAAAGTCGTCTGCATAC -CTCAAGCTTTTGCCGCCTTTTTCCTCCATCGAGCTACAAAACTGAGGGCGAGAAGATATC -CAAGGACATCTAGAGAAATGGAAGTAAAGACAGAGATAGCCATTACTGCCGTCAAGGTCA -CACTACATTTAGGATAAAAATCTCAAGGGTAAATTAGCATGGATGCTATATATTCTTCCC -ATTCCTCTATTCCACTTCTCCCTTCCATCTCTGTGCTCCATCTCCTCCTTCACCACATCC -CTTGACACCAGAACCTTCTTCACCATGGAGAAAGACCTTGTCCAGGGTGAGTCATTCTCT -GACTTCTGAGTACTTTCGGCCTAATAATATTCAGAGCGCCCCGCTGCGAGTGTTAAGACT -ACCTCGAACGAGTTCAATTTGTTGTTAGACGAGATCACAGCTCGCTGCAGCAAACAGACA -AAGGAGATCAACAAATGGCTTCAATGCACCAAGAACTTCAAAAAAACCATTGCCGACATG -CTGGAGCAGGAGAAACTGAAGAGACAATTTGCTGATACCGAAGCTATGAGATGGCCCAGG -ACCCATAGGACTCCGAGGATTCCTAGCACTTCCAGAATTTCCAGAACACCAAGAACACCC -AGAGCGTTGAGAACACCGAGAGCATTCCAATTACCGAATTAATATGCTATGATTTTATTG -TCTGAGTTTTGTGGAATTTCCAATTCGAATGTGAGAAGACTCAAGGGCCGATAAGCTGAA -CCCCAACATCTCTCGGCTCCGGCTTTTCCCCCTCACATCCTCTCTTCCAACAACTCACGC -CAAAATGCCCCCCTCGGCTCAAACGGTAAGCTATGATCTACTCCTCTGTTTCGCTGATTT -GCATACACATTTCATGATCGAACCACTTCCCCTTAAATATTGCTACCTAGAGCTCCCAAG -GCTACCCCGCACTATATAACATCTGCCCTGCAACTCGCATGCGCGTTGACCCAGAAACAA -AACTAACACAATGGAATCAAAGACCAGACTAGTCTCAAACCTCCGCCTGCTCATCCCCCG -TCTTCGCCTCCTGCAAAAGAAAGACACAGCCTCCTCAGTCGTCCAACGTCGCGAACTCTC -CCAACTCCTCAGCGAAGGCCGCGATGCATCAGCGCGCATCCGGGTCGAAAATGTTATCGC -AACTGATATCGCAGTGGAGGTAATGGAGATGGTCGAGCTTTACTGTGAGTTGCTGCTGGC -GCGCGCAAACGTGTTGGATCAGATGGCCTTCAGCGACCAGGGTACCCGCGCCAGACTTCG -CGCCAAGGAATTGCTCAAGAAGCGCACCCAGGAACAAGCTGGAGCTACCGCACCAACTGC -CGCAGGCGCCAAGGGCGCTGGGGAAAACACTGGTTCGATATTCGGGTTCTCTTGGTTGGG -AGGAGGAGCGCAAAAGAAGGAGGCTGAGCGGGCTGCGCCGATTACGGCTGGTGGTGCAGA -TGACTCTGGTGATGGACTTGAGAATGAAGATAACCCCTACATGGATACTGCGATCGATGA -GGCGGCTACTGTTGTTTTCTATGCGTGGCACCGCTTTCCGCACGATGTGCGCGAATTCAC -CATGCTTCGCACTATGCTGGGCGAGCGATATGGCAAGGAATTTATGACGCTGGCGCAAGA -TAACAAGGTCGATACTGTCAAGGTTCCTGATCGGCTTCTCAGGGGCTTGCGTGTGCGTCC -GCCAGGACAGGAGCTTGTTGAGAGTTACCTTCGAGAGATTGCGAAAGCATACGGCGTTGA -AGGTTATGAAGCCGAAGAAGAGCTGGGAAGTGCGCCAGAGTTTGTGGATGACCTTGGTGA -TGGTGATGGCGACGCAGAAGAACCCCAATTACCGCAAACTCCTAGCAAGCAGCAAGGAGA -CTCAGCGTCGAGGCCGAACCTCTCTCAAGCAAGACGTGCCTCGGAGACTAGTGAATTGAC -TAAAGCCACGCCTCCACGTGGACTCGCTGCGGGCCGAAGTCCTGTCAGCGTTGCACCGCC -AGCTCCTAGAACAGATAATCCTCATCCCAGAGTCAAGCTTCCCGGCGCAGAGGGGAAGTC -TGCTGCTGTCTCAAAGGAGAATGGCCCTAAGAACAAGAGTAACAACAGTGGGATACCCGA -GCTGGATGAACTGACACGGAGATTTGCGGATTTGACAAGGAGACCATGATTGAACAGCTG -GATAGATCAATATTGACCAGAATTCGGCCCTCGGCGTCTTTATTTTAGCCATTATTCATG -TTGGGTTTCTTTTCAGCGCCTTGGACAGGTCTTAGTTCGAATGTCTTGCTCAGTCCTGCA -AAAAGGCCTTCTCATTTTAAACGATCCACGTTGATTGCATTTGACTGGCTATCTGTGAGA -TGCCACATTTTAAATTAGGCCAATGGTCCAGTTCCCCAAATATGAGAGGTCACATTTAGT -GCGGAGTGCCGCAGTTTGGGGATACGGAGTACTCCGTACCTCCAGCGTGAAAGATAGAGG -AATCCGAGATAATGTTACATTGGAACTAGAACAGTGTTGCTTTCTTTATATATCAACAAT -ACATAACTCCAGAAAAATCTATGTCAATGCATTCGGAGATACTGATAATATAGCGCTGAA -GAATGTTGTAGCCAATCTACGAGATAGAAAGCCAACTAATAATTTGCCACGTGATAGAAG -CAATAGAAGCAAGGCACCGCCCTCCCCCCCCAGAACTGTGGCTTTCTAAGTTGTCTAAAA -TCTAAAGCAGGCTGATCCAAGCTCAAATATCGAATTTCACCCAATAGTCGACGCATTGAA -GCAGTACAAATCGGGAACCTCGTTGGAACTACAATATACGCACGGGACCCGCAAACCCCA -ATCGCGTCGATACACCTCCCGAAAAAAGGCGAGCCATGACACAGACTTAATATGAACTCC -GAACCCCCTGCGAATACGATCCTCCCTAGCTAGCAGTCGCTAGACACTGCGACAAATTCC -TCCCCTCGATCACCGAAATCACCCCCAAAGAAAAAGACCCAAATACCATGCCGGTCTATA -TGCTTCACGGCTTCCGATGGCCCCGCGCCGGGTTCACGGGGATCCGCGTGTACATCGTAC -TTCACAACCTGGAAGAAGCAGCAGCGGAATACATTCAGCAACCGCTGACGACAGAGCTCC -TCGCAGAATCCTTCCACAAAACACAAGCGGACCTCGTGAACCGTCTGCCGGAGCTGTCGT -TTATCGAGCAGTATGACCCCGCCGATGAGAGCAGTGGCACTGTCAGCCAGGATTATGCGT -ATATTTCAACGCGGGTTCTCGAAATTCCGGAGGATGAGAGTGGCGGTGGAGAGAATCTTG -AAAATTGCGTGGAAAAAGGATCCGGGTTGACGGATGACCAGATGGCGGCCCTGGAGCAAT -TGCGCGACCGTCTAGCTCCTGGCGAGAAGATCGGCTGGCATCTTGTTTATAATGGGGATC -CTGAACGGTGGTTTCCTGATTCTGAAGACGAGGATTACGAATCTGAGGAGGATTCTGAGG -AGGAAAGCCAGCCTCGGGGAGAGAGTCAGGTCCAGAGCCAGAGGGAGAGCAGGAGTAGTT -TTGCACAAACTGCTAGTCCACAGAGCTATACGGTATGTTAGTTCCTTTGCTTTCGTTTAC -TATTTTACTGGGGAGGCATTCTTGCAGAGTTTCCCTTGCTCTTACTGCTAGATGGTTGCT -AATGAAATCTTTCTAGACTCGTCTGACGAGATTTTGGAATCGGATGAGCTCGGGGTGAGA -GCTGTGGCTTGGTGCTATGGAAAATGGCAAATCAGATCGGTTTATTCCGTCACTGCGCAG -TGTCAGATTGCCTCGTTACATCTGTTACACTGGGCTGTGACGGGGGCTTTTAACGAATTC -TACGCTTTATGTCTGTTGATACTCTATGGTTTGCAAATATTGTTATAATAGTTCATCTAA -TGTATTGGATATCTTACGGAAATTAAATTGCTTGCGGATCATTAGTAGAATAATTTACTA -GAGTCCAATATTCGCTGCTATTTCACTTGCATCTCTATAGAACCTGATATAGAGCGGTGA -GTCGAAGCAATATGGGAGGGGAGCTTCGATAACGGGAACAGCGGCGAACATCCAAAGCTT -GTCGCAACACTCCCCTCGAGTGGTTTCATAAATGCTGACCAGGTAACATCCTGGCTTTTG -ATTGCAATGGTTCTCTCATTTATTCTGGTTCAGAATCGCCAGTGAGTGGCTTTTCGCCTC -GGTCATTCCCGATTTCCACGCTAATGACGGTATAGGGGGAAAACGCGCCTGGCGAAATGG -TATTCACCATACAGTGTGAGTGTTGCAAGAACCCAACCAGTGAATACAGAACTGGGAATT -AGAGCTGACACCGGCTTACAGGATGAAGAGAAAGTCAAATTGAAAGGCGAGGTTTGTTCA -TGCAATACCACCCGCACCTGCACTTGCTCCGAGCTTCCCATCTAACTAACCACCGACAAC -CGTCTAGGTCCATCGCCTCGTCGCTCCCCGAGATCAGAAATACCAGTCCAACTTTGTTGA -GTTCCGCCGGAGTACCAAGGTCATCTACCGAAGATATGCCGGCTTGTTCTTTTGCGTCTG -CGTCGATGCGAACGACAACGAGCTAGCTTATCTCGAGGCTATCCATTTCTTTGTTGAAGT -GCTGGACCAATTCTTTGGCAATGTGTGCGAGCTGGATTTAGTCTTCAATTTCTACAAGGT -ACGTGCAAGTTGCTGGGAAAGCAATGCTCGATGTGGTCCAGGACTGAAGTGGGATCGGAA -TTACATGAGTCGGGATACTGACCACATGTGCCTGATTTATAGGTCTACGCCATCCTGGAC -GAAGTCTTTCTCGCAGGCGAGATTCAAGAGACAAGCAAGCAGGTTGTTCTCACGCGATTG -GAACACCTGGATAAGTTGGAGTAGAAGTGAACCGTGATACTCGGATTCGAGAGCGATATG -AGATCCTTATCGATCACGGAGCCGACTGCATTGGAAAAATCAAACCAAAACAGTCATTTG -CATGGAGTTTCAGGTGTATCGGTGGCTTTCACACAATATAAGTGCTGTTCATGTTCATAT -TTGGGATTATTCTGGAGATATCAAAGCATCGGTACAGCTCATGCCATCACTTTGGGGTCA -GAATCGAGATCATAGATCGTGCGAGTATGCTGCATGTGTTTCTCCATCTAGTCTACCTGC -TCATTGCGGATGTATACGCTTTTTGGAGAAGCGTCCTTGCTTTCAAGGATTATATGGCAA -ACGGGATATGATCCGGGCAGGATATCGATTGTTTTCGACTCAGCTCACAGAAGCCCTGAT -GGGCTCTGGTGTCCTTCTCCAGCGCTGGGTGTATTTTCCAAGTTTCGTATTCGTACTGGT -AATCCCAGCCTACATGCAAATCCTGTTCCTTATGGCTTTGCTCGATATCTGATTCGATTC -TGTCCTAGGTCACACATGGAAAGGTCGCGTTCACAATCAAATCTTTAGCAGATCCTTTCa -aaaagagaaaaaaaaaagaaatgaaaaaaagaaaGGCATCTGAACAGAATATCTGAAAAG -ACCACTGAAGAGACATCACAAGGCCACATAAGCGGTCAGCATTTTTTGATTTTCTTCAAT -GGTCAGGACTCAGGATCGCTTCCTGCTCCTTGGGTGTGCTGTGATTGCCAGGCTTAAGAC -AAAGAAGCTGATATGCTGAATTGGACAAATCTGCCCATGTGGTGCCAGTGCCAAGCCTTT -CGTATTCTCTTTCCACTCAAGGGTTCAGAGTATAGCTATGTTGTCACTGCCGAAAAACAC -TGCCGTCGGTGGATGTCAGGCGGAGACAAAATGTGCAAACCTGGCTGGACCCAAGAGCTT -TAAGCTCAGATTAGATTGCAGCGTTGAGCTTTGTTTCCAGTTTGTGGAGGAACTTCCCAA -TTGGTAATGGCTTGGTGAGGTGATCGTAGCCATCGATGGCGAGCGTTAGTCCAAGGGCCT -TGGCACAACACATTTAAGGGAGCTCTGTCGCTGTTGACCCTTGAAAAGGGTTTTGCTCTG -CTGAGACATTCTTCGCTCGGGTACCCTTTCACTACATTTTAGGCCACAAGGACTATCCTG -CAACATACTCTGACTTCCGCATGCACGTTCCAGGCGAAATATCGTGATATTACTCAGCCG -AACAACATCACCCGTCATAGAGTGACTATGACATCGCACTAGCTGCCTATATTGTACACA -GCCTCACACATCTGCTCAGTCTTTTTGGACACGTGATAGAGCTCAGGTTGTCTGACTCCG -GGTCTATGAAGATAGTTTCCCCAAATGGAAAAAATCTCATCAACACGGGACGATCAGTTC -AACCGTGACAATACTGATGTGTGACAGCACTACGGAAATCCTCAACAATACACACCGGAG -GCTTATACCCCTGGCGATGAAAATCATGTGAAGCTTTGTATCTAACATGCCCAATTCCAA -CTCCCCCTGATCAATACTTGGGTGATAACGGCTTTATGTCCGCGACACATTGAGCGGATG -GTCTTTCCTGTGATAGGCAGCGTGACCAGACGATCCAGGAGAAGGGCCCCATCATGACAG -GCTGAAACTTTTAGACACTTTCCGCGGTGTTGAAGGTAAATTGGACCGGGCTTGGCATGT -GGGACTCGCAACTGCAGTCTCGAACGAGACAAATGCAATCAATCTGATGGAGCGGGGGCG -TCGGATTCTTGTACCCGGAGCGTTTTTCTTTTGGGCAAGACAGCTGCGAGATTGATCTGC -GTGGAGCATTGTGGGCCAATCTTGACAGACGCAATTTCAGTCTCACTGTTGCTGGATATG -AGATGGCATATAACACAGCATTGCAGGCTCATATGAGCAAAGGAATAGATGAGTCTACGA -TACTCCATGAGCTTTCTAGGTTTCTGACAACAGACATTCGGAAATATACCTCCACAAGCT -GAAGGTGTACGCGTTTCCATCCAAAGCTCCACCAATGACCGACACTGCATGGATCGCCAA -AATCATATGGATTGTTCGAATTTCCGAGCGTACGTGGATCAGAGTTCTTGGCAGGGCCGA -TGGAACAATTCGGTAATTTGGATACCGAAATACCCGGCATATGCAATACAATACAATACC -ATACACTCCACCAAAACGACCCCTGACAATCTTTATGTGCGTCAGCCTGTGACGTCGCAC -CTCTATACATCTGTATACGCAACGACTTTTCAAGGCATCGCTTTTAGCTATACGGAGAAC -GGAGTACAACTTGGGATGCACTCACGGGCCTCGGTTGGAGAACTAAAATAGGGCACTGGA -ATGAAGGAATTGCCAGTGTTGCCACTCGCGGGGATAATACTTCGGGAAAAGTGGTTCGAG -GCTCGTCATTGGCACACCATGAATGCCCACTCTACTTCACTTCTATCTCCCTTGGAGATC -AGTTCAGGCCTGGATTTTTTTTTAGCTGGTTCTGttatttatatattttatgtatttatt -CCCTTCCTCTTCCCCTTATCCCCATATCAGCCTCTTAAAAGCCCCCCTTCCTTCTAGACT -TTTTCTAGTCTTATCCTTTCTTTATCCAGCTCTACTTCCTTTCATTTATACCTACAATTC -CTCACTCTTTATAGAAGCCCCAACTATATTGAATTACAATTGAGCTTATAAACACCATGT -CTCTCATGCTCTCTGATAATCTGGCTCCCCAGCCCATGACCGACGTCTTCACCAATGATA -CAAATATTGACCGCCGAAAGTGCCACCGCACAGTGCCTATGAAGGTCCTTGCGCTGGGTG -TCGGTCGTACCGGAACTGCCTGTGAGTATGACATGCTGTGTTGCGTTGGACTTGGTGTAT -TGTATACACCGGGCTTGATGGGAACGAGAAACATAAACTAACTACTATTTGACAGCTCTC -CGCAAGGCCTTTGAGCGTCTGGGCTATGGCAAGTGTTACCACATGATGTGCGCAAGTGTT -GAGAATCCGCCGGATTGTCTTATGTGGCACGATGCCCTGAACGCGAAGTACAATGGAATC -GGCGAGTTCGGCCGCAAGGAATGGGACCAGCTTCTGGGTGACTGCCAGGCCGTCTGTGAC -TGGCCTGCCTGTGCATTTGCAAAGGAGCTTATCGAAGCCTACCCCAATGCGAAGGTCATT -CTGACCACCCGTGATGTCGACCCGTGGCACGCGTAAGTCACACTTACACATTATATATCT -ATATTTCTGTTCCAAGACACTAACAAACAAACAGCTCCGTCATGAAGACCGTCTTCTGGC -GTGTCTCAGACCCAGAGCACAAGTTTGTGTCGAACTTCAGCTGGGCTGCCGGCATGTACT -ACCCCATGCTGAACAAGTTTTTCGAGACATTCTTCCGCGGTGACTTCCCCGGAAAGGGCA -AGCAAGTCTACGAGGACCACGTCGCCCAGGTCCGTAGCTTGGTTCCCCCCGAGCGTCTGC -TCGAATATAACATCAATGATGGCTGGGCCCCGCTCTGCGAGTTCCTCGAGGAGGAGGTCC -CAGATACCCCCTTCCCCCGTGGCAACGACATGGCCGATTTCTACAAGCGCTGCAGCACTC -GCAACCGTCACCAGATGATGAACGCTGCCCTCCAAGCCGTCACTATTGGTGGTTCGCTGC -TTGCCGCTGGCTTTGCTGCTACTTTGGCTTTCAAGCGTTTCTCCCCTCGCTAAAACACTC -GTTCTTTTATCTTGTGCATTTCTTTTTTCCTTTTATCATATTCCCCACCTCTCACATTTT -TGCTGGCTGCTAGCGTGCGCAGGCGCCTCTAGCATGTTGCCATACTCCGTGGTCATTTTT -TGTCATTTTTTTGCCTGCTGTCCTTCCTGGAACAGACCCATGCATCTATCACGTCGGCTT -CTGATCTCCCTTGTTTTGGCATCTTCTTTCGGTCATAATTGGTTAAACGAGGGACTTATC -GAAGAATGATTTGGTAATAAAACACTATATATCATTATGTCTTTCATTTCTATCATCCTT -CCTTATGAATGCTCCGTACCAATGTAGGACAGGCACATTCAATAATATGCACTTTCTAAA -TCGAGGGATAAAGCACAAGGGAGCCGCTTCCCCAATGCTCATCATCATGCATTGCCCAAA -GGCAAGGGGCGCTTAAATCATGGCCAAGGAAAGCCTTACCGCCGACATCTCCAATCGATC -GGGGTCAATGCAAGGCGCACATGGCGAACGATTCATCAAAACAACCTAACCACTGACCGA -TTGATCGCTTGTACATGGACACGTAGGAATAGCTTCAACAAGTAAAAATAAACACTAACT -CCTAAATCCTGCCGTCTACTCCGGTCCGAGGACTTCCCAGCTTTCCACTATCGTACTCGT -AAAAAATACGAAAATAAGAGGAAACGTATCAGCCACAATTAATTGGATGAAGCGCCGTGG -AGATTTATTGATTGGTGACTTGGCATGCTGGGAAAGTGATCCACATTAATTAAGTGGACC -AAACGACATGTATGCGCATACAACGTACATAAAATACATACAGCAAGTTATGTGTATTGT -TATCTATTGTATTTCCTTGTAATAGAGAAATAAGTGATACGATTGCTTCTCTTTGTTCCG -ATACACGAAAATAAGCGGAGGACGGAAACTTCTTATTTCGCCCTAGACTCGGGGGTGGAT -CATCTTAAAATGCTTGAAGGCTTTGACGAAATACGAAAAAAAAAAAGGTGAGGTTGGGTT -GGTACACGTGGACTGGTGACACGTTAAGTGATCGGTTCCTTGATCACGAATTATGGTATC -TTGGGATTGAAGGGTTTGGCGGCGTATGCGAGCCTTGATCTTCGGGATTATTTGGGCATG -GGCTGGTGATTCAAGGTATTGTATTGTGTTGGGTAAAAGGTTTCTCCTGTAGCCATGGAT -ATCTGATAGGTCCTCTTGGCCTCCAGTGGATGCGCTGGGAGGAGCATAAGAGTTCGTATC -ATATGTATAACATACTATCTATGCTGTAGGTATTTGTGTGATACCTAGTGTTGAATGGTC -TATTTCGTATAGTGTATGATTCCACTCGGAAAACCTCCTATACGAGTTTGATTTCTTCAA -TGCATAAAGGATCAAGTGGTGTCCGCAGTGCGACAGTATCTACATCGTCTGGTATCTACC -TCTAAATTTTGGATACAAAATTCCATGCTGCAGATGCGGTAGGTTCAAATCTAATGCACG -TGTACTAGGATAGAGGTTACAACAAAGATAGCCTGCGTATGCTCTGACGACGCATTATGC -GGAAGAGCTGTATCCGAACCAATCCTCCACGTTCAATTCACCGGCGGGCGTACGTAACCG -TGATGATTCTCAGCCACATGCAAGTGGcttttgacttttagcctttggcttttagctttt -agctttaTCAGAGTGCCTGGACCTCGGGAACCTTTGAAGTTGGATCCATTGGTTCTACCC -TCCGAATAGGATAAGTCACCTGCGTAGGTTAGGGTTCCAGCTGTTAATCTGAGTTTTGCT -TACGGGTAACTGGGTGTTCATCCTTGGTCCCGTTCCCGAGCCACGTTACATGGTCGACGT -CTTCCCGAGGCTTTGATAGATCGAGGTCAATGAACGTACAGAACAGGCACGATCTCGGAG -TTATGATGCTCGGGTCTTGCTAGCGGCAGCTATTTTGCTTTAGTCTTGCGGCCATGACGT -GGCCCCTCCGAAATACTCGTACCATCACACTGTCTGGGACACGTATTGAGAATAAAAAGA -AATTATAGATCAAGGTTTTCCATAGCAATAGCCAAACGATCAAAATCTTCCGAGGGGGGC -TTGTGATTAGTTGACTCAGAGCTTTCTTGTATAGTATCGAATTCCCGAACACGCCCCCGA -CAACTTCGAGCAACTGAGACAACTTTTACATAGAAGGACCGAAATCTTGGCATTGGACGG -GTGTTGGTTTGATTTCCATTGACTAAGCGCTTTGTCGGAGCTTTAAATGGCTTGTAGCCA -ATGAGAATAGGCGATCAGCACAGTTTGTGACCCACTATGTAGGCAAAAGCATGCATTGAC -AGTTGGAGGGGATTCGTACACAATTTGCTTACCTTGAGCCTCTTTGGCCTTTTTGCTCTT -GGTTGAGTATTGATCTTGGCCTATACTTTTCTAAATTTCTCCTACATGATTTGCATCCAA -TACAACCTCACTTGCGTACATTCTTTTGGGTGATCTTCTTTTTGTCTGGCTCAAAAAACG -TTTATCTATATCTTGCATCGGAAAGTGATGCAAAGAGTCCATGAAGACCTCCATGTTGAC -CCTTTTAAGATCTGCACAGTCGCTCTTATCTCTTGGTATCGTCTGGTTATGCGCTATTGA -CACTGCGCTTGGGGAAGAAACCTCTTTTTATAAATCGGTAGGAGGATTGGCCTAATAAAC -CCAAGTGTACTTTCTGACTGAGGCTTTTATCGGTATAACAGAGACCCGACATATACCCAC -CCGTCTTCAACATCGAAAAGTCCGATCTAGGCAAGCTCAGCCCGGGCTACATTTTCATCA -CACCTTACGAACTTCAGAATCCCGGCCCGTACATCTACGATAACACAGGGGTAGGTCTTA -ATTCTTCGGATTGAGATGCCCCAATTACATGGAAGCTAATGGTGCACACGATCAATAGGA -GTTGATATGGAGTGGCTGGGGCGTCTCGGGGCCGGGCAACGCCCATGGCCTCCACGTGTG -TAAATACAACGGCTCCGATCACCTATGTTTCTTCCAGGGTAGCCAGCAAAAAGGGTATTG -CCGTGGACATGGTATTATCATGGATAAAAACTACCGAGTTGTGCGGTCAGTTCAGCCAGG -TGGAGGAATGGCATCGAGCGACATGCACGAATTCCTTCCCATCAATGATGGCAGAACCGC -TCTCATGACAATCTACCAACAACGGCAATTTGACATGACACCGTGGAACGTCAAAACTGG -TCTAGGGTGGCTCATGGAGAGTGTCTTCCAGGAAGTCGATGTGGAAACGAACAAGGTCCT -GTTTGAATGGCGATCTCTCGATCATGTTGATCCGTCATCGAGTTATACATGGCCATCTCA -TACGGATACTTCTGGAACAGGATTGAATGTACATGAGCCTTGGGATTACTTCCATATCAA -CTCCGTTGACAAAAATGATGCTGGTGATTATCTGATTTCTTCGCGGCATACTTCTGCCAT -CTACAAGATTTCCGGTCGAGACGGCTCGGTGATATGGAGGCTTCATGGCGCGCAGCCCTC -TTTCCAGAATATCAATTTTAACTTTTCGCAACAGCACGACGCCCGGTGGCTACATGAGAA -CAATACACATACAGTTCTGTCACTTTACAACAATGGCTACAACGGCTTCAACAAAACACA -TACCTATTCTGCGGGCATGATCATCATGATTGACCATGTGGAAAAAACAGCCATCCAGAT -TCGAGACTATGCACCAATTCAAAATGACCTAGTAAGCTCTAGCCAGGGAAATCTACAGGT -GCTCCCCAACCAAAATGCCTTCATTGGATGGGGAAATAACCCATTCGTATCCGAGCACGA -CGAGGCAGGCAATCTACTGTTCTGGGGATCTTTCGCCAAAGACACCGTGATGAACTACCG -CGCAATGAAGTTTGAATGGGACGGTGTACCAACAGACTCGCCAGCCCTATGGACATACTC -CCGAACTGCGGAACCATTTTCTTCTACGAGCTTCTATGTCAGCTGGAATGGCGCAACCCG -CGTTCATACATGGCGATTTTGGGGAGCGATGAACTTGACTGGACCTTGGATCTTGTTGGA -CGAGGTTCCTAGAACTGGATTTGAAACCGAATACACCCACGGTAGATTCTTCCTCTGGTC -TAAGGCCGAAGCAGTGGATCGCGAGGGCATCGTGCTGGGAAAGTCCGAGACCAAGTATAC -TTTCGTGCCTTCATCTGAGCTGCGCGAATTCTGCGCAACCTCTACCTGCTCGGATGCACA -GGGATATGGGTTCTCTGGCGAAGAGGCAGCCCGTCCTTTCATTCCGCCAGTAGGTGTCAA -CACTGTGCCGTGGATTGACCCCAACAATCCAGGCTCCAATATTTGGACAAATCCCTCGGG -TTATCCACACTCTTCTGGTAGTCCAAAGCATCCTACTGAGGGGAACAATGGCCGTGAGTC -TATCTCGGAAGAACTCTTTTCATCGAGCCAAAGAATCAAAGCTGACAATCCCACAGACAT -TCTTGCTTGGGGCACCTTTATCCTCGTGGTGATCCCCATACTCGCTGGAATCTTCCTTGC -ACATCGATATTATAGCAAGCAATCGATGAACCAACAGCGGGACCAGGAGTCATATGGGCC -TATGAACTGCATGGTTGAGCGGAAGCACCCTCCCATGCAGGCCACTGATCTGCCTTGGTG -GAATTGGCGCAGGTGGATCAGACAGGAAGAAGTACCCTCCTACTTCCTATTGGGTGAGCG -AAGCTCATACAGTCACAGGTGTGAACGAGAGCGGAACGAATAGTCAAACACTCATTCGCG -TTGCTTTGTATACATATTATGATAACATAAGATAAACCATTTATACAATCGCTATATACA -TAAACTCTTGCACCTACTGCTCAAATTGGCAAGGCTTTTTCGTCGAGATCATGTATGCTT -TTGACCTAAGAATAGGACAAATAGTCCACAATAGGCAAGACCAGAATAGGCAAAACCGCG -CGAGGAGGAACGGTGCGTGAACGTTTCCACCGTATGTAATACAATACACATACCCGAATA -ATACATAAAGCCGACACCCGCGCCCACACACTGGGTGTACGTACCTCCGGAGCGAGCGGA -TCTGTTCGATCCCTTAGCGCTGGGCATTCAGCCTTCGTGTTGACTGCTTTGTGCGTCGAA -CATGAATTATAGGACGTAAGATTCTTAATTGTTCTGTTGGTATAGATGTGGCATGGTACA -ACGCGCATATATGTACGGATTACTCCGTACAGGGTACATAATCTGATAAATTACGGATAT -GTACATGTTTGTCACCATGTCGATCAAAATATGCAAAGGAAAGGCTACGATGTGCTGTGC -CAGGACTCAGAGTGGCTCTTGTCGGCCTACGTGGAATGTGGCTGGAACGCTTGGACTTTG -TGCGCTAGCTGTTGCCGCCGGTGTTGGCTCTAGAACCTTATATATTAATTGACATTTGTG -TACGGAGTACTCTGTATTTGAATTCAAGGTTTCAATGTATCTAAGCTTGAAACACTTATG -AAACCAGCACTTAATACATTGGTAAACAAGTGCATATCTACAAACAATAACATCTGTGTG -TATTTATCTGATGGGACTTCAAAGTATTGTGAAGCTAAAATTACTTTCCCTCCACGGTAC -GTACCTGATATGCCAAGACCTCCGTCGTTAAAACAGTCCACGAGTCATGGACCACGAGAG -GAGGGCTTGGCAGGGAGATGTGGAGCAGTCGTCGCAAGCTGGGTGTCGTTGTCGATCTAT -GATACCAGAGAGTCGTTGTATGTGGCTATTTTTGCTGCTTACGATTTAGCGCCCTAAAAC -CCCGGCGGGCAACTACCCTCCTCAAGTAGTAAGATCAACATATCAAACTGCATAACCGCC -GACAACTAGGTAGTTTCTTACCCTTACCAGTATCAGCTATGGCATCCCCCACTTACGGCC -TTTCAATAAATATTAATGGGCGCGGTGACGCCAAACTCGGCGATGGGCCCTCCCACATGG -GAGTCGCACTCTACGAGGACGGTTCCTCAACTTGCAAAATGCACCATATACGCAACCCCA -CAGACGAATACTTCATCTATGACCCGCGGCCTCAGCCGCTAAAAGACCCGATTCTCAAAG -GTCGTTGCGAACTTGCCTCACTCTCGGCTGAGAAGAGCCGACAAGCTGCTAGCCTGATCT -CCGCGTTTGGTAACGATGAATCCAACATCCCCGAGTTCGGGGTCGGAAATTGCCAGGACT -GGGTTGCTGGGACTGTCGGAATGCTTGAGTATCCTGGCTTCATTTCTTCTGGCGAAGGGA -CATTCTGGAAGAGTATGATTAACAGAAGCTCGGAGGAAATGAAAAATGAGTGTCTTCGGG -GAGGGAAGAAATGGATAGATGTTCCGGAGTCGATGTATGAAGGCGAGCCTGATGCAAGGT -TCCTAGACAGAGAACGGGAGACAATGAAGCCCGTTGAAAAGTTGGCTCAGAACGACGTGT -TTCGGGCGCGAATACAATCGCTTCTGGGCTCCAAGGGAAATAGAGAGACTGATCATGCTG -AGGCGCCTGTAGAACCGCCGTTCTATGTGTCGAGTCCTTTCTTTAGTAAGTTGGATGAGA -GGAAGGATTGAATGCATTCCTAAAGCATTGCATCAAGATCGCTTCAAACAGCGTATGGAG -GTAACAGCTAGATTGGGAAGATGGAGCCCTAGCACAGGACTATATGAGCCACATGTGTAC -GGAGTGTAAAGCAACCATTTTACTACATAGCGTATGAGGTAAATTACCATACTTGAACAA -ATACTGTATTTCCGAAAATTATGTGTGTTATGAAAGATACCGTTCCTTTTTGTAATTTAC -GGCATTCCTGGTGGGTCCCGGCAATAAAAGTAAGAAAGGACGTCATCGACCAAAATCCCC -TGGGCCGAGATGATACAGCATATAGGAGAACATTGaaattaaagtaaaaaaaataaaaca -aaCACTCTTTCTCTTCCTCCAATTCTCTCATAGCCAATAATAGCTAAGCCACAAATCCAC -GCAAACTACGAAAAATACATGAAATCAAAGGAGCCTACGACTATTTCTGATCCTTACGGC -GCCTCGGCTTACACTAGTACATTCCTCAATGCCTGCCAGGACTGGAAATAATAAAGCGAA -TAATGCCCGCCATGCGCGTACGACATCTTGGTCGAGTGGCTTCCACTTCATCCAGAAGAC -CGGCGAATCCTCCGAAAACGCGCAAGGCCGCCGACGACGCGAGTCCAATACCTCCCTCGC -CAGTGTTGAAGAACGCTCCTCCTCCCAAGATTCGCTCGACTCGTTCGCCATCGATGATAA -CATGTTAAGAAATACCTCAATGGATCTGGGTGGTGACCGCAAAACCCAGCATGTGCGCGG -CCGCTCTATCAGCCATGGCCAGCCACAAGTACTGACCCTGCAGGGAGAGGAGCCGCCGAC -TGTGCGAGAGCCAGCTCGCCCCGCGCTTGTAACGTGGATGTCGCTGCCGCGGAAGGGCCA -ACTGGGGCTGCTGGGCCTCTGCCGTGTGTTTGATTTCTTGCAGATTGCTTCGCTACAGGC -CTACATGTTCTACCAGCTCAAGTCGTTTGATTCGAATCTCTCCGACTCCGATGTTGCGAC -TCAGGCGGGAATTCTGCAGGGTGCTTTTACTGCTGCTCAGTTTGCGACGGCGATCCCTTG -GGGTCGTGTTGCCGATGCGGAGTGGGGTGGTCGAAAGTTTGTTTTGCTTGTTGGTCTGGT -GGGCACTGCGGTGTCTTGTTTGGGAGTCGCATATTCGACGTCGTTCGCGCAGGCCGTGTT -CTGGCGCTCGTTTGGTGGCGCCATCAACGGTACCGTTGGTATCATTCGAACTATGATTGC -GGAGAATGTGAAAGAGAAGAAATATCACTCCCGAGCTTTCTTGATTCTGCCAATTGGCTT -CAACATAGCTTCGTTGTTTGGCCCTGGTGAGTACCTCGCTCTGTCAATGCTGCTCCGGCA -ATAGACAATCCGCTAATTAGTATTACAGTGATGGGTGGCATGTTGGCGGATCCCGTTAAG -AGTTATCCCCGTCTCTTTGGTCCCAATTCTTCCTTTGGAGGTGCGGATGGAGTGCAATGG -CTCGAGAAGTACCCTTACGCACTGCCGATGTTCGCAAACTTCTGTTTCCTCTCTGCAACG -GCAGCTTTGGTTGCGTATGGGTTAGAAGAGACCTTGGCATCTTGCAAGGGCAAGCCTGGC -CTGGGCGTATTTGCCAAGAGGTTGTTTGCGCGCGGCGTAAAGAAGGTTTTCCCCTCTTCG -TTGTCCTCGTACACAAAAATTCCGCTGCGCGACTATGAAGAGGATGGGCCTTTGCTAGGC -CGGCCCACGGACCACTCGGAAAGCTATGAACTTGAAGAAAAGGCGCACAAGCCTACCCGC -CTTCGACGCGTGTTACCTTTTCGAAGAATCTGGACCAAGAACGTGTTGTGCACACTCGGC -GCACAGGCTTTCTTCGACTTCCAAATGGGTGCTTTCAACAACCTCTGGCTACTCTTCTTG -TCTACTCCCCGCTATGATGCCAATGACCCCGCGAGCCCCACCCGAAGCTTACCGTTCGCT -TTCACCGGCGGACTAGGCATGCTGCCACAGAGCGTTGGTTTCGCAACCGCAATCTTGGGT -GTAATTGGCATGTTCCTCCAGTTTACCATCTACCCTACTATCAACGGCCGTCTGGGAACA -GCCAAGAGCTACCAATATTTCCTCTCCCTCTTCCCGATTGCATACTTCATTGCCCCTTAT -ATCTCGCTGGCTCCATCTACAATTCCCCCTCCTGGTCAAGCCAACGGCCCCTGGGTTTGG -TTCTGGATCATTGTCGTCCTCTTCTTCCAGGTCACTGCCCGAACCTTCACTCTTCCCACT -TCTATTATCCTGCTGAACAATTGCTCGCCGCATCCATCAGTTCTTGGCACAATCCACGGA -ATTGGTCAGTCAGTATCGTCAGCCTTCCGTACCATTGGACCTATCTTCTCAGGTGCGtgg -tatggatatggtctggatgttggtatggttggCTTCGCCTGGTGGTTGATTGCATTGGTA -TCCGTGTTTGGCTGCCTTGCAGCTGTCTTCGTATATGAAGGTTCTGGACATGAGATTACC -CTTCCTGGCGAGGAAGAAGAGATCTGATGTGATACGATATGAAAGTGTCTTTTTTCTCGA -GAGTTGATTGTACATTCCTTCAACATTTTGCATATTATAAGGTGTCCTTTGGTTGGGTGC -TTTTTGTCCTATTATGCTTTGAAATTGCTGTTCAGCTGGTGCTGGTCACCTGAATATCCC -CGACTGATATCTTTCCCATTTGCACTCATTGCTGCACCAGTGAGCAGTGTCGTTTCGCAT -ATTGAACTTTCTGATATTTTGCCTGCATCTGGTCTTTTCCCCCGCATTTGTTTCTTGGAT -ACCATCTTGCTTTGCATCTTTCCTTATACCGCAGCATTTTCATTTTGTTTCCTACGTTAG -AATAGAAAAAATCCAGACACTGCTCTGGTCAGTTGATCCGAGGGTCAGGAATGGAAACCA -ATATACCATACATATACTATCTTTCCAAGCTTGATCATTCCGAAAAAATTCAGCATATAT -ATTATAGCAAGTGATTCGGTCGTCATTTAGTAAAACTTATCATCTATTTTGACCTTGTTT -TCATATCTTTTCACACGAAGGCAACTCTCACAAGCTAGCCCCGTATCCCTTGGCATAGCT -AGCATCAAAGGTCTCCTTATTGACCGCATATTCACGAATATAATCCCTCTCAAGATCGTG -CGTCCTCCGGCCAAGAAGAACAAGACTGTGCAATGGAGCGCCCATCTCAACAGTAGACAA -CTCCTTAAGCGTACCAGAAACAAGTTGTTGATTAGCAGCGCCAACTCGCGCAGCACCAAC -AGCCAAACTGTCCGGTCCATAAATGCCCTCTTGGCGTTCTTCCTCGGTCTCCAGCATCTG -CGACGCACACTGCGCAACGGTCATGTAGCGCGGGGGCTCGAAAATGCGGCGGCCGCGCGC -CATGTTCTCTAGCGACTGCTCCTTGACCTTAATATCAAGCAAAACTAGTGTGTGTAGGCC -GATCTGAATGTTCTCGCGCACGCGGTCGTAGTACGACGACGGCTTCCAGGTCTCAGTGAA -GAAGACCATGCTGACTGTCTGTCCAAAGTTGTACAGCTGGAGTCCTGTGCAGCCAATTCC -GGACATGATGGAGGCGTTCGGGACGACTTTGGTCTCGATGCCAAGTTCGCGCGCACGGAG -GACGAGGTCGGTATGAGTTGTTGCGCTGTTGGTCGTATGGTGTCAGCTATGGATCTTGTA -GGGGAGACGACTAGAGCCCGTACCCGAATGGATCTCCCACCACTAGGAACACAATATCGA -CTTTGTCACCGCCAGCGAGGATGTCATCGCTGCCAGTCTCAACTAGTTCACGGTCTGCCT -CGATGACAGGGCGGCCATAGAACGCTTCCTACTGGTATTATGTTTAATCAGTCCATCTGC -GCAAGATATATGCAAGATTTCATTGCCATGTGGGTCCGGAGTAAAGGCTTGGAACCAGAC -TCACCAGCTTTTCCTTATCGACAAGAAGAATGCTCGTGTATGCCTCTAGATAGACTCTCT -CGGCCCGCTTCACGATCTCCAGGCCGCGGACAGTGATATCTGTCTCATCGGCAAGACCAA -GTCCAACTAGATGAAGCATCTTGACTGTAAAGTCTTAGATTGTGCAATTGAACAGAATGC -AATGGATCTCACTGTATAGCAAACAGTTCGAAAAAAACGAATGACCGTGCCCCGCGATGC -TGGCAGAATGCTCCGTGACCGCCGGATCAAAAATTATCCGGTTATCGCTTATCTCCCCCA -GATCGGCCTGTGCGCGTCCAGAGCTTCATCCTCCTTGCGACCATCTTCTTTGACTTTGCT -TTGAACCAACCCAATACCGCTCACTCTTCCTGGTTGAATACCAGTGTACCCTCTATCCAG -ATAGCTCAGGTTATTTCCCCTGGTAGGGTTACCGCCATCAATCATGAGCTCTGATCGGAA -TCCGAAGTATCGGCAGGAGATACAGCAGGTGAGTTCCTCCCCGATCCTTGGTTTTTTTTT -GCTTGTGGTTTTCGGTATATCGGTCTTTCTGGCTCTCGATCATTCCAATCATATGTCCTT -GTCAGGTCTGTCAGGACTTCTGAACACATTCTGCCTGTCGGGTGGCTTTGCGACACAACA -TGAGTCGCGCAAACTCATTGTCTTTGGGGACAGTGCCACTTTCACCTCGATGCCCCCGAG -TCTCACTGAAAGACAGCGCTATATCAAATTTGGAACTGCCTTGCAGGTCGGAGATATAAT -GTCCACGGATAGTTTTGGCTGGGCTTGCGACATATGACTTGCATTGCGAATGTACTCATT -TGCTGGGACAAGACTATCCCATCCTTGTCCTCTTCAAGTTCTCAAAGCGACAATCGATAT -TGGGTCCATATATATGTGTTCATGCTGTAGCTGTCCAGCTAGGCCTTTCAGTATTGCATT -GTTGGATTGCTGGCATGCGCCCATCTTGACGTCGAGCAACAAAACGGCAATCCTCAAAGC -GCGTGAAACCTCTCATCAACTGGTCCTGTCAACATCAAAGCTTTTTACATTGGGCCAGCG -AGGTGTATTGGGTTAAATACCAAGGTCAATTTCAAAATTGCTGGAGTTGAGAGCAATTGA -CAAGACGGTCGTACCTTCTCTTCTATATCTTGATGTTCGCAGGAGTCACGAGTGAGCTTC -TTATCGATATCGATATTATAATGGCTGCCGTTCACTCTATCGGCTTGCCAAGTGCCCGGG -GTTCTCATTTATCACTCATCCACGACCTTTCCTTCAGCTTTCCCTGCTACTTGTGCCTCA -GAAATCCCTCCACTGTATTGAATCCTTCATTTGTTTTTCTCGAGAATTGTTGATCATGGC -GCCAATCATGTACCCTCCCAGACTTGACGCCAGATCGATTGTCGAGCTCTGGAGACAGCG -CCAGGAAACAATTGCGCGAAATGAGGAATACATGCAAGGATATCCTCACGGGACCTTGAG -TGATCCTGATATGGCCTCGAGTAATCATGATATGGGCTTGGGTGATTCAGATGAGAACTT -GAGTAATTGGATTACCACCTCACCTGTCAATGCTACCTGTGCAGCTTCAGACTCGGGTTT -GGATATCCCTGACAATATTTCTATGCATTCTATCGAACACGTCTCTGGTGCGAATCAAGA -CGACCTGTCGGACGATGCCAAGTCGGATTTTTTTGCTATGGGAGAGTACTTCCATGATCT -TTATACCCACCATCATGATCCTACAAGGGAGTCACTCGTTCGTCTTCGACTTCACAGATT -GGGTACCCATATATTCAACTCGCTCAGCTTTCTGAGGCACATTGCCCCGACCAACTCCGA -GCCAGTGACTGACACAGACCCCTCTGAACCAATTATTGAGAGCCAGGAAGACCACGAGAT -AGAGAAAGAAAAAATATCTGGAGGGCCATTTACTAACTACAGTAACAAGATGATGTTTGT -CTCCGGAGAGACAGCTGAGCCGTCCCCTGAAACCACTACTTTGATTGAAGAGATCACCCG -CCAACAAGTGATCGAGATTGTGAGTACTAATGATTGTGTCTTGGAAGCTCTTGTCATTAA -CAATCAAATAGCTCAGCCGCAGCACCGCATTGGCGACTCGCCGCGGAGTTCGCTCGATCT -CTACTGACGATCTGATTTTCCTGATTCGCCACGACAAGGCCAAGGTCTCCCGTCTGCGAA -CCTTCCTGTCCTGGAAGGACGTTCGCAAGAATGTCAAGGACTCCGACGACAAGGGTGGTG -GTGACGCCGCTGATTTTGCCGCTGCCGACGATGCGGCTGGCGTGGTTGCAGGCCCCCAAG -ATGTTGCCCCCAAGCCCAAGAATAAGCGCGCCAAGGTCGGACTTGCGTGGGATGTCAACA -GTTTCTACTCAGTTCAGGTCCCTGAGCGTgaagatgaagaggacgaagaagaggaggaaC -AGAACTACGCGACTCTCCAGCGCCTTGCTGCCGCCGATGAACGCACCCGCCATATGACCC -GAGAGGAGTATGTCTTTTGGTCTGAGTGTCGCCAGGCTTCCTTCACCTACCGCAAGAGCA -AACGGTTCCGCGAGTGGGCAGGCTTCGGCATCGTGACCGAGTCTAAGCCAAACGATGACA -TTGTGGACATCCTCGGGTTCCTCACATTCGAGATCGTCCAGACCCTGACTGAACAGGCAC -TCAAAGTCAAGGAGCGCGAAGATAATGAGAAGCACCGCCGCGGTGGCGCTGAAGCCGGCG -AGAACCTCAAGAAGCGCAAGCGCGAGACCGGCCTGTTTGACCCCCCCGAGGAGGGACGCA -CTCCTATTGAGCCACGACACGTCCGCGAGGCCTACCGCAAGCTGCAGGCCACCCCCCAGA -AGGCGGTCGCTATGCTGTTGCACAACGGCCGTGTTCCTGCCCGCCTCCCTCTTGCTTTGG -TGAGTTTACATAAAATTTTACTGTTGTGTTCTTATGCTAACGAAGCCACCAGATTTAATT -GTTTGTCCACACAGGCATCAGCTTCAAATTGATGCTCTTTCATGAATTACGCGCATGGCA -GCGGGAGTTTGCGAACGATACCCAACCTTTTCTTGTTTGATTTGTGCATTCAGTGGCGAC -GGAGTGAATCCCTTAGTTGTTCTCTCTGATATCACCATTTTTGTTTGAATAGTTCACTAT -TGTACCTTTGATTTCAAATGCCTGTGCACTGTATTTTCGGCCTAATTATCCAACGAATTC -CTCTACGGCATGACGTTGGGCACAGGGTAAATAAATGTATTAGCCTGAACCATGAAGTTT -CATAATCGGAACTGCTCCCCGTGTAGAAGATGAGCCGACGGTGAGTGAGCTCGCAGAATT -ATCTACGTCTCATAGTTGTGTCAGCATTTACACAAAGATGCCCGAGATCAGAGATACAAT -TTTGATTGAATAGTGCTCAGAAATGAACATGAACACAACCATTAAGGTCTCAATTCAAGT -AGGTATCGGGTATTTGTTCCTCCAGTAAAAGCTCTCCCAAATGTTCATAGTATGCCAATC -CTTCCGTACTTCAGGGTGCGACTTGACAAAAGAGAAATATCGATGTATATCAGTAGATGG -CAGGTTCAGTGCATCAATATAGCGCAATATCTCGCGAAGAAATGTTTTGAAGTTTTTCTT -GGCCGCTAACTCAAATAGCGATAAATCAAAGTGATTTTCTGGAAGAGGATTTGCTCCTCG -ATCTAGAAGAAGCCGAGCATAACCAAGGCAATTTGACTCGCCCTTATAGGTTTGAACTCG -TTGGCGTAGCCCTGGAGACTTAATTCCCACAAGCCAGCCCGACCCGTAGGTGGGCTCTAG -ACCCGGAGTGAAAGCCCACTGAGCCCTTCATAGGAGCCTATCAGGGTTGGGCTGGGCCTT -GCAAATCACCAATAATTCTGGTGTAGATCTCAAGGCTAAAAGTGCTCCTCCGCTACAGTT -GTTTCTGGTCGTCTTTTCGTCGTCTTTTGTTTAGATGCTTTCTTCTCGTTTTCATTGTCA -CATAGCGGGAAATTGCATGAAATGTGAGGACGACTTTCTGTATAGACCCCGAAATAAACT -AGTAAGATGTCGTGGACATACCCAGTACATCTTCGAACCATTGATCATTTCCCAAATCCA -TAGACTGCATCCAGACCATCTGATCTAGATCCGGAATACGCCTATTGTGAGAGTCATTCT -GGCATGGCCTGGTATCACCATTTTCAATCTGAGAAGACATGTCCCACCGAGCTTCAGCCC -AAACCTGGCACCTGGATAAAAGCTTGGATAAAAACTTCAGTAAATGATCATTGCACTGAA -GTTCCGGCTCCTTGCTGCTTAGATCAAGCTTCCGATGCATAGATTCCATCGTCGATATTA -GGTGAACTGCGTTCCGCACCGCTTGACAATCCCATTCTGGGTCTTTAAGGGTTGAAAGAT -ACTTCAGGAGTGTGATATTCAGAATCATCTGTGACCAGAAGTGAAATGGTTGACCGGCGA -GTTTTGACGTAGGAATTTTGTAGAAATTCTCCAACCACGATTTGATATTCTCGACGGACT -GCCACAAGCAATCGAGACGTTCAAACTCTAATAGGAGCCCACCATCACCACTTCGTCCGG -GTAAATCTAGGGGCGCGTCCTGACTGATGGAATAGGCTAGCTGGTTGATGTATAACTCGA -CATATTGGGCGTGTGAACTGAGAATGTCTACGGTTCACCCCAGTGGAAGGTATTAGCTCC -TATAGCGGAAGTCAATTGCAATATACGTTAATCAAAAGACTACTCACCATGCTGTGGGAG -ATCTGAGCGAAATGAAGATCTTAATTGATGCAGTTCCTTCCGTAAGGTTTTCAGATATAG -TATGCGAGGAACTGAGGCTGTAGGAGCCATTCCTGTGCGAGCAAAATTTGTTTCGTCCTG -CTGTCGTACGTCCTCCGCTCTCTGCTTCAACCGTTGCAAGCGTACTTGAGCGACAAATAA -TTCATCGGCGGTGCATGACTTGTTTGTAGCGATGAGTTGAAGCGCCTCTTCCATCTGTGG -AGTCCATCGTATAGCATCTTGACGCCCAAGATGAGATGAAACACTACAGATATTCGTGTT -AGTATGGGTCTTTCCCCCTTAGATATAGGGTAATCAAGTTTGATAGAGAATGAGGAAGAC -ACTGCGCACTTGGAACTCAAAATGAAACATGCCAGTAGCGCCCGTCGCCTTTCAAATAAG -CAGTAGGCCGTTTCATTGTCGAGGGCGTGGTCATATTTGTCAGATTGCCCCTGAGTGATA -GCCATCATGACTTTTGCACCTGGCGAGCATGGCTTGAACAACCGCAAGTCATATACTAGT -GAGATTGCGAGCATCATGAGACGAGAGATTAGGTCTGCCCTGCCAAGGAAAGCGTCGGTA -CTCCATGCTAGATAAGTTAGTAATCCAAGCAGAAGGTCTATGTTTGATTCGACTTTGACC -AGAGCAGATGTGACGAGAAGACGCTTCAGTTCTTCAACTTGGATTAATCTTTCCTCTGTC -GAGAACGTGGTAACAGTGTAAATAGCTTGGAACAAGATTGGCCTGTTCTGGCGGAGAAAC -CAGCTTGTCATATCTGGGGTAAAGTCGAGGAAAGGGAAAGATGGCAGCATTCTTGACCGA -AAGAAAATAAGTCGCTCTTCTATTTGATGTGGGGATAGAGTAGAGGGAGATAGATGATAA -GGATTCGACAAAAGTGTCGGATTTGAACTCAACGATTCTGTGGCAGCCGACTCTATCGAG -TATGTAGGCCCCTCATTAGACCGCAGGTTAATGCTCGTAGGATTTATCAGAGTGCTATTT -GAGTAAGACGTGGTTGATGAAATGCTGTCCCCGTTGAGGGAATGGGGTTGTAGGATGTTG -GCCGGAGACCCAGAGCCACCTGTGGAACCGATGAAGCCTTGCATTGCAGATAACAGGCTC -TCCATCTTGTCCTCTAACCGAGCGATGCGCATATCCGACACATCGACTAGGGTTGTTTGA -AGGTTACGTTTGCGAGCAGACTCGGATGGCTCACACCTTTTCCCAAGGCGAAGGCATCTA -GGAAACAATTAGCTTAACGACGAGTATCAAAAGTACCATATCAATCACTTTTCGCAATTA -TCGCCATTTGCAGTACGCACACACCGGGACTTTGCCTTGTAGCAATTGGTGCAAGCCTGG -CCATAAGTGCTAGGTTGCCCGGGTGTCGGTTTCGATCGCTCCATAGTCATAGTGACTGGC -GGAATAGGCCGTCCTAGATGGGCAGGATACCCCAACGGAACGTAGTTCCCAGGCACATCT -CAGAGATACCACTTTGTTTATAGAGCGGAAGTAACGGAGGCAACTCCGTCGGGTTTATTA -ATGTTCCGTATTATATCATACATGTATGAGATGCCGTCGGATTTAGCCCGAAGTCCGTTG -ATGACATACCTTGGCAATATCGCTTTTGCCCTATCAAATATCCCTCTGAGATGCTGTTTG -CGCCATGACCTCTGGAAGAAAGTTATGGGAGTCGGCTAGTTATCTCGACACTAACAGGAC -CAAAAGACGCGATAGACCTGTACGAAAACAAATATTTATCAAGGCCTTTGAGCGAGAACG -GGCTATTTTGCCTGTAAAATTAGCGGGCAATCGATCTGCAGGTAGATAGACTCATCAAAT -CTTGAAGACAGTCGAAGCGTTTCTCTCCTCACTTCCGTGCCATCAATGGCAAGATTTCGT -TGCAGCTATCGTGTTCATCTAGGGCTAGAAGCGGATAATGCTGCTCAGGCCACAATGAAC -TATCCTAGTGTTTATGATGGATTTAAACAGAATCTCAATGAATGCGTTTATGCAATAGAC -CATATTGTAACTCAGAAATAGCTGAAACAACAGTTATACCCCCATTTCAGCGATTTACAT -GACGATACTAGCATGTTAAGATGATGTCCTTCCAGAAGGAGTCTTCTGGTAAGTGTCCTC -GAGGGTAGCTCTAGCCCTCGCTATGCATTAGTTTGTAGTACCGCAAAATGCGAGGAATTT -GAAAGTATCTGCAAGCTTGAATCTAGGACTTGAACCGAAGATGCGGGTTTCGATAGCAAA -CTCCGAGTCACAGTACACTTCGGCACAAATTTGCATAGATATGGATGAAAGAAGCCCCTG -ATCAGTTTCGGATTCCTGAAGCGAGGTCCGCTGTATTCTATGCCCGTGCGACCTAATATG -GGATCTAGGTGCAGTGAATTTCCAATCAACATCGGAATATAAATAACCTGATTCCTCTAC -ATATTGATCATAATCCAGTGTCAACACAGTCTCATGCAAGCGCCACAAGTCATTCACCCA -CAGCAATTGACTAGCCTTACTTTCTTACAATGGCCGAGCCGAGATCTCCAAGTTTGCATG -AGCCTCCTATAGACTCGCCCGTGACTTGGCAAAAACACTCAATTGGAAAGCAAGATGGCG -CCGCTAGCGACGATGGAGAGTTCAACCGTCAATATCCTTCAGGCCTGACAGTCACTTTCA -TCTTGATGTCCGTTACTCTGGCATATTTTCTACTTTTTCTTGATCTTGCTGTCATTTCCA -CGGCCACTCCGGCCATCACGTCCCAGTTTGACTCGTTGGTGGACGTTGGTTGGTAAGTAC -ACATTTACTTGGCTAGGCCACAACTGCGTGTCATCTTTCTACGAGGTACTGATCGATATT -TCTCAGGTATGGCGGAGCCTACCAGCTTGGAAGTGCAGCATTTCAGCCCTTAAGTGGCAA -GATTTACAGTCGTTTTTCCATCAAGGTGTGTCTTCGTCTACAACTCACCAGGAATTTGAT -TCTAAAGAGCCCCACATAGTGGACCTTTCTTGTCTTCTTTGTGATCTTCGAAGTTGGATC -TGCCATATGCGGAGCAGCTAATTCGTCGGCGATGTTCATCATCGGCCGTGCTATCGCGGG -TATTGGGTCTGCTGGAGTGTCTAACGGCGCAATGACAACCATTTCCGCCGTCCTCCCAAC -ACAGAAACAGGCTCTTTTTATGGGCCTCAACATGGGCATGGGCCAGTTGGGTCTTGCGAC -GGGTCCCATTATTGGAGGCGCGTTCACCACGAACGTGTCCTGGCGGTGGTGTAAGCTATT -CCATATCTTACATTGAGGATCTCTAAAATGCTAAGTTCTCTTTTGTTAGGTTTTTATATC -AACCTCCCTTTGGGCGTTATTATCGGCGGATTCCTTTTTTTCAACACAGTCCCAGAACCG -AAACGCAAAGACCCGCCTCTGCAGGTTCTTGGCTCTGCCATCAAGTCTCTAGATCTCCCA -GGATTCATGCTTATTTGCCCAGCTGCCCTCATGTTCCTCCTGGGGCTCCAGTTTGGTGGT -AATGAGTATGCCTGGGACAGTTCAGTTGTGATAGGCCTGCTAACTGGCGCTGGCGTGACC -TTTGTCGTCTTCCTTGTCTGGGAGTGGCACCAAGGAGACGATGCCATGGTGCCACTTGCC -ATGCTCAAACACCGAATTATTTGGTCGGCTGCAATGACAATGTTCTTCAGCCTGTCTAGT -GTTCTTGTGGCTGATTTCTATATTGCAATTTATCTCCAGGTAATTCACGACGATACGCCC -CTGATGAGTGGTGTTCACATGCTCCCAATCACCATCGGTATCGTTATATCTACTATGGTC -TCGGGCACCCTGAGTAAGTGCTTCCAGTCGTGGCCTATAGTCTCACGTGAATCACTCTTT -CCAGTGACCGTCGCTGATACTACAAAGTTTCCATTTTGGGCTACTATCTCCCATTTCTCC -TTGTGGGGGGTGCTATATCATCTATTGGCTACGGGTTATTGTCGACATTGAGTGTGACGA -CACCGGTTGCAAAATGGATTGGGTATCAAATTCTCTACGGTGCTGGAAGTGGTGGCACGA -CTGCGGCCGTAAGTTCGATTCCCGTCAAATAATGGCCGCAACAGCTGATATGTATCTTCT -ATTTAGCCCTATATCGCGGTACAAAACCTTGTGGCCCCAGAGCAGATTTCTCTTGCGATG -GCCATTATCATCTTCTGGCAAAATATAGGTGCTGCCATCTCTCTGATCGCTGCCAACGCC -ATCTTCAGCAACAGCCTTCGTTTCGAGCTTCAAAAACGCGCTGCAAAAATTAGCGTTTCT -TCAGACGCCATCATCGAAGCTGGAGTCCGCTCAATCCGTGACCTGGTGTCTGGCTCCGAG -CTGACTGCTGTGCTCGCCGCCTACGCTAAAAGCATCGACAAAGTTATGTACCTCGGTATT -GCTGTCAGCATTTCCGTCTTGGTGTTTTCTCCGGGACTTGGATGGAAGGATATTCGGAAG -GTCAAGGAGCTTCAGGTAATAACTGAAAAGTCTCCTGACAGCGATTATATGGATTCGGCA -GTGGTCGGCGAAAAGTCAATCGTTGGTTAATAACGAATAAGGGAAGTCTGTATTCATAGT -TGTCCCTTATACAGCATTCGATATTGCTCCTAGATATTTTTGGACTGCCGTTGGCAAAAT -AGATGAACTCGCTACCCATAATCAAACACTCTCAAACACGTGTATCCTTAGATCCAGAAA -ATCAAATGACTACATACTACCCATACTAACACTCATTGATACATTCAATCAAACTTTCTA -CCCCGTTGCCAGCGCTCCATACTTTTGCAATTCTCAAACCTGCCCTTCTCTCAATCAAGT -CGTACCACGCTGTGCTAGTTCGCTCTTCGGAAGAGAATAGAGTTAACATCAGCATATCAA -GTCCCGTAGTCTCCCAATGGGCACCGATGTGCGGCACAACGTTCTCGTTGATCAATAGTC -TGCTATACCCAGGCTTCATGGCCTCTTTCACTCGTGCAAGTATGCTTTCGCATACATCGT -CCGGCCAGTTATGCAGAGTTGAGTGTATATAATAGGCACGGGCACCTAAGGATAGTGTAA -GTGGCATCGTTCGAAATTGATAAAAAATCATTTAGACCAGGGCTCTGTATCCTCACCTTT -AACGGGCTGTTCTTGATGAAAGTCATACTCCATTCGAATTATAGCCGGATCTAGATCCTT -GATCTGGTGAATCATCATAGGTAAATCTTGAAGGATTAATTTTCCGGGGGCGTTCGGATA -GCGACTGTGAAATCTTGTCAAGTCATGGCCCATGTTGCCGCCAACGTCCACGAGGAATGG -TGCATCTGGGCTGGAATCGGCTCCATCGATAAGTATTTCCTTCACAGGATAGACGTCGGG -GTCCATCCAGCATGGGCGACCCAGGTTGTAACCTCCAAGGTAATCATTCAGGTGTTTTCC -ATAGCCCGAGTCATGTACCCAATCAAAAACGTTCTTTTCAGTACCATAGGCATACATCAG -AGATGTGTCATGAGAATTTGTCGGATTCACCCAGCCACGTTTACGCGAGAACTCGTGGAA -TTTCAAGGTGCCAGCAGCAGTGCATGTGGTGCTAGATAAGCGGCATGTTATATTAGTCGC -TTGTCACTTCATAAGGTTTGACTGAGGTTGAATGAGCACTCACACACCAAGATACCCGTG -CCCAATTTGAGGAAGGCTCAGCGCCTTGGTATAGTTTGTGGATCTGTACTCATCTTCGCC -GACCTCGATGAGGAGTCCCATCGCACAGACATGTCGCATCACGCGACCTTGAATAAATTT -AGCTATAGCTTTAGTGCGGGTATGGAAAGCCCACCGAGAAGCTTAGTATCGACCCCGAGA -CCCACGGCCAAATCGGTTACCTTCTGGGGCTTATCGCCATTCTGAGCCATCAATACCCAT -AATCCCGAATCCACGCCAAACCCTATCGCCGCTATTGCACCAATCTATATTATTACTTAG -TCTGTCTAGACGATGACAGTCAAAAGGACTCACCTGTCCCCAACAGTGGTCAACCATGGT -TTCCCGAGGAGTCTCTAGAGCTCTAGCCAAAGCTCGACACTTGATTACCATATCCTTTCG -GTCCTCGTCAGTTCCATGATTTAGGTTGTTAACACCTTTGATAACATCATTCAGCAGTTT -AGGGACTGCCTCAGCAATGTTTGAATGTGTGGCGATACTAAGATCATCAGGACACGAATG -ATACTCTGCGTTCTCGGCTCCCATGCTGTTGGATATAAGAACTCAAATTGAAAGACTTGC -TCCAGGGCGAAATGAATAAGATTGTGTATACTTATATTCTAACATTGAAGCTTCTACGAC -ATATTGTTTGCTGCCTCTATATAACAAGATTCAACATAGCGTGTAATAGCCTGAAGAGAG -TATGACGTACAAACTTGGGTCTTGGGTACTAGAAGCCTTGCAAGCTGGCAATATAACGTC -AAGTCTCGTATACCGAAAACCTTAGATGTTGATAATACAAACTCTCAATGAGTCTCGCAT -ACTAGAAACCACAGAAGCTGGTAGCACAAACTTGATTCTCACAAACAGGAAACATTGGAA -GCTAGTGATACAGGCGATAACCTGGTACGGCTCAAACCTTTGACGTGCCAAGGATTCTCT -AGATGTTAATAGCAATATCACACGAAATTCCGTCACCGGACCTCATTCACCACAATAATT -GGCATATGCGGCCAAGTGATTTCTCTTAGGTCTTTTCCGCGAATAGGGGATTCACCAGAT -CATGACTGGGTTTCTTACTAGCGGCCAGGTTTAATAGCCTTTAGCCATGACAGTAGCACT -GAAGTCCACCAAAAATCACTAGTAACTATTGATGTTAGAGCGACATTTTAGATTTTTGAC -ATGTTGACCTGGTGATCTGGCAGACAAAATGCCTTTTTTCAGTGGTCCTTTGAGATTCTA -TGTTTTTAACCATTTTTATTAAATGTTCTCTTGAGAAAACGGAGGGGCAAGGGACTGGAT -TTGTAAAAGCGACTCTTTCATGTAGTACCATATCCTGGGATTCAGCAATTACTAGCCCTC -TCTTTCCACTTTCTCAACCACCCCATGCCACTTGTGAAAAATGTTCTCTGGATCAAACTT -ATTTTTTGTCTTTTGCAATTTCTCCAAATTCCAGCCATATGCTTGAACTGGCGTCAACAA -GTCCGCTTCCATATTGAGATAACGTCCCACCGGCTCTTCTCCGAAGTTGTTCCGCCTATC -ATGCCCTACCCACTCGTGTCCGTTCTCATGGCGTATCTGTGCGCATAATTTGGAGTTAAA -CCGCCGAACAGTTTCATCTAGGGCGGGGTTGGTCCATCCAAACATAGTAACAGCATCGTA -GTACTCCCCACGATTGGCGTACGCGGTAGCACCTTGAGGCACTTGGCGCAACTTGTGCGT -TGAAAAAAGTTCAACTGCTAGGGAGCAGACCTTTACGTCAAGGTGCATATCGTGGAATGA -CCAGACGGAATCTGCTATTTGTTGAAAAGCGACTGCGTGGAGTGGCATCGTCACATTGCC -GGAGCCCTGGAGGCGGCGAATGCCGTATTCGCACATTGAGTTCTGTAGGGTGTTGATCTG -GGCAATGGACATCATGCTTGTGCATTCTACAATAGATCCAATACTAAGAAGGTTTGAAAA -GTACCTCTTCGCTTCCCACTCTGGTCCATGGTAAAGTGGAATAGCAGAAAGGATCCGGTC -TGCCCCATCCGGGGTGTACCCAACAGCTATAGCTAAGCAGTGGCCCTCTTGTTGATCCCG -ACTGTGGAATTCATTTGAAAATGCCACTAACGCACGCAGCTTGTCAGGTGCAAATGTCAG -AGTTCCACTCCAGACCAGACCCTGTTTAAAGACGCGGGACGTGAAACGCGTAACGACTCC -GATCTGAGCTCCTGCTCCACGGACGGCCCAGAAAAGGTCTGGCTGTTTTGACTCGGAGGC -TTCGAGGATAGATCCGTCCGCTGTGACTATTTGGACTGATATGAGGTTGTCAATTGCGAG -GCCATATTTTGCTGTCAGCCAGCCGTGGCCTCCTCCTAGTATAAGACCTCCTACACCAGT -GTGGTTCACTGCGCCGCCAACAGTAGCTAGTCCACAAGACTCTAGAGCACTATCTAGATC -ATCCCATAGGCATCCACCATCAAAAGACACGGACATCTTGGCTTGGTCGACTTCAACCTT -TCGCAGTTTCCCAAGATGAATGACCATACCTTCCGACGATGACATCCCGCTTGTCGAATG -ACCACCGCCACAAACAGCCAGTGGGAGCTTGTTTAAAACGGCAAATTTGATAGCCACAGA -GACCTCTTCGGCAGATGTCGGGCTAACAATCACAGCCTAGAAGAAAATCAGTTAGATGTG -ACTTGGACAGACCGGCGGGGTCAAACTGACCGATTTTATTTCAGCGGCCTCGCTCCATCT -TTTGAGGCGCGTGGCATATCCATCATCCCACGGCATGAGAACGTCACCACCTGTTATAGC -CCCGCGTAGCTCAGTCCAGTCGGGCATTTTGGTGATACTTTCAATTGAAGGTGGCACAAA -TCAATAGTATCATACTCCTTTGTGATAGACCATACAAGAAATGGCAGGTGATATAAGCAG -TCACTATGTAAAACTAGCTTTATAAACATTATAGGGACTACTATCCGTGCGATGATTGTG -TGATACTATGCAAAGGCAGCCTTCAAATCCTGATTTTATACAAAACTCTAATTGAAATCT -GATTAAAGTCTAATTGAAATCTGATAAAAGTTTGATAAAAGTTTGATAAAAGTCTGATGA -AAGTCTAATGAAAGTCTCATAAGTGCAAAAAACTCGTGCAATGGATAGACTAGGATATCG -GAAAGTACCAGAGCGTTCTGGAGTACTAGATTAGATGTAACCTGCATGCCAACCATCAGG -ATTGACATATATGCAGAGGCCTTTTCTGGAAGAATTTCCCACCAATCTAGTCAATGTGGG -AGATGCCATCATGACAGAGCAAAACAGACCCATTTCATATCTCTAAAATTCACGTATATA -TAGAGACCTCTGTTATAGTCATAAGGAAATGTCACACATCATCATATTTGACAAACATAC -ATCAGCACAACATCAAAGGCAATTTACCTAACCAAAAATGTCCCTGGTGGAGCAGAATGA -CATTGCAGATGTCAAATCTACTGATATCAGTACGGTGACACAGGTAGATCTTGAAAGGAT -TTGGACATGGAACAAGATATCCCCAGAACCGATTGAGCGATGCGTTCATGAGATGTTCGA -AGAGAAAGCTCAAACTCAGCCCAATGCATCTGCTATCTGTGCCTGGGATGGGGACTTCTC -TTATGCCGAACTGGATCAACTCGCTACAAAATTAGCAGACAAGCTTAGTAAAATGGGTAT -TGGGCACGGTCAGTTGATACCTCTCTGTTTCGAGAAGTCGATGTGGACGACAGTTGCTAT -GCTTGGAGTGCTTAAGGCCGGTGGAGGATTTGTCATGCTTGATACCTTGCTCCCAGAACA -TCATCTCCGTCTCATCGTCCAACAAGTCAAAGCAGATCTCATCTTGTCTTCTGTCTCAAA -TCAGGAATTCAGTTTGAGATTGGCTCGGAATGTGGTTCCTATAAGTCGAAGATTTTTCAT -CAACCTAGACGTTCAAGTAGACCGTCGCATTTCCCCCAGTTTTCCGTCTTCAATCGTGTA -CATTAATTTTACATCAGGCAGTATGGGAGCCCCGAAAGGCGTTCTTTTGAGCCATCGAAA -TTTTGCATCGGCTCTCTACCACCAGGCACAGCGTCTCGGATTCACAAGAGAGTCGAGGGT -CTTTGACTTTTCCTCATACAGCTTTGATGCATCAATTAGCCAAACTTTCACAACTCTCAC -TGCTGGAGGTTGCTTATGTGTACCGATGGAGCAAGACCGGATGAACAAGTTGGCTCAGAG -CATAGTGTCTTTGCGCGCAAACGTTGCTGCCCTGACACCCTCCGTTGCCCAACTACTGGA -CCCAAAGGATGCCACTACGCTACATACAATTATGTTTATCGCTGAAGCGTTACAAGTTAG -GGATATCAACCGTTGGTGGGGAAAAGTCCGAGTTTTCAATATATACGGGCCAAGTGAGTG -TACCCCCTACAGCGTCATCAATAGCCATGCTTCTTGCCCACAAGAAGCTACTCGAATAGG -CATTGGCGCTGGCCAAGTTACATGGGTCGTCGACCCGCATGACCACGATCGATTGCTTCC -ACTTGGCGACACCGGCGAGCTACTTCTGGAAGGGCCACTTGTAGGTGAAGGGTATTTGAA -TAACCCAGAGAAGACAGCAATGGCATTTATCCACGACCCACTGTGGTTGCGAAGAGGAAG -TTTGGGACAACCAGGAAGGTATGGACAACGCCTCTATAAAACAGGAGATCTTGTTCATTA -TAATGAGGACGGAAGCTTGACCTTTGTCGCACGCAAAGATACACAGGTTAAGATTCACGG -ACAGCGAGTCGAGCTTGGAGAAATTGAGCACTGCTTACAGGAGCACATGACCGAAGCAAA -GCAAGTTGTGGTAGATATGGTTGTGCGAGAGAACACAGGTCCAGCGTTGGCAGCCTTCAT -TCAAATCGATCAGATTGCGCCAAATCCCAAACAGCCAGAGTCTACCTGCACCGCCGAGAT -ACTTCACCTTTCTACTGAGACCGAAGACATACTAGCTCAGCTTCTGCCAGGCTTTATGGT -GCCCACCATGTTCTTTTCTGTGCGGGAGATCCCCATGACGGTGTCAGGGAAGCTGGACCG -GAGAACACTACGCAATATTGGCACTTTGTGTATTCAAAAATTCATGGAAAAACAGAGACC -AAAGCCAAAGCCAACATCACGCCTCGGGCTGGAGTTACAAAGGATATTGGGCAGGGTTCT -GACCCTTGACCCGGCATTGGTGGGAGCTGGAGATAATTTCTTTCGGCTAGGAGGAGACTC -GATTTCGGCGATGCGGGTCGTCTCCGAGGCTCAAAAGGCTGGGGTTGAGCTCACGGTATC -AGACATCTTCCGATATCCTACACTCACCAGCTTGGTCAACCGGTGCCACCATGTTGTAGC -CGAGAAGGCTCCGGAAATGATCCCACCTTTTGCCATTTGGAGGGACGTCTTTGACAAAGG -CTTGTTCCTCCGAGAAATTGCAGCCCAGTATCAGTTCGATCCAGAGACCGTAGAAGACGC -ATACCCGTGCACTCCTTTACAGGAAGGTCTCGTGTCCCTGTCTTTAAAGCACCCAGGGGA -ATACATGATTCAGCGCACTCTGGAACTGCATTCTACCATCGAAACAAGAAATTTTTGTCG -AGCATGGGAAGGAATGGTTCGAAATGTTTCAATCTTAAAGACAAGAATTGTGCAGTGCAG -CGGGGTGGGTCCCTTGCAATTGGTTTTGAACCAAGAAATCCAATGGACTCATGCAAACGG -ATTGAATGAGTATCTTAAAGATGACAAAAAACGGATATTGGAGCTTGGAACGCCTTTTGC -ACGCTACGCCATTGTCACAGATGATGCGGGCTTGCGTCGATGGTTCGTGTGGACACTTCA -CCATGCACTCTACGATGGCTGGTCACTGCCACTCATGATTGACATGGCTAATCGAGGATA -CCATGGTATCTTGGAAAAACCAAGACACGAGTTCAAAAGGTTTGTCAAGTACCTCGAGGA -CCAGAACAATGACAAGATAGCTGGCTACTGGCGTGATTCTCTTATAAATTGTGACTCTAC -CCCCTTTCCGGTACTTCCATCGTCGGTGCAGCGGCCAGTAGCAGACAGCGACATAACTCA -TGAAATACCATGGCTAATCACACAATCACGAGACATTACAGTCACAACACTTGTTCGTGC -CGCCTGGGCTCTGCTTGCCAGCAGCATGACAAACTCAGACAGTGTCGTGTTTGGTATAAC -TACATCAGGACGCAGTGCGCCCGTCAGTGGTATCAACGAGATGATAGGCCCGACAATTGC -AACTGTTCCATTGCAAGTCAGAGTTTTACGGTCCCAGAAAGTTCTAGACTATTTAGAGGC -AATGCAGCAGCAATCAACGGATATGATTCCTTTCGAGCAGTTCGGTTTACACCGAATTGC -TAGAACATGTCTTGGGGCCCAGCAGGCATGTATGTTCCAGACACTTCTTATTATCCAGTC -ACAAGAAAATAAACCCGACTGTACGCTTGGAGTGTGGGAAGAGAACAGTGAGCCGGAGTG -GGTCAATACATATGCACTAGCGCTGGAAGTGCAAATTGGCAGGAAGAGGGTCAATGCTAG -ATTTGACTCAAATGTGATTAAGCCGTGGATCGTGCAGTCGTTATTGGAAGGGCTCGAGTT -TGTAATGAAACAACTTGACTCCGCAGGGTCCCGACAGACCATGGCAGACATCGAGCTGGT -CACACCCCGGAGTTTGGAGAAAATATGGGGTTGGAATTGTACCGTCCCATCGCCAGTGAA -AGAATCTATTCATCAATTAATTGAAGAGCGGATGCACAATCAGCCGATGGCAAGAGCTAT -ATGTGCTTGGGATGGAGAATTCACATACGGCGAGCTAGATCGACTGTCCTCGGTGGTAGC -GGCTCAGGTCGTCAAACTTCGAGTCGATTCGCACCTACTAGGGCCGAATATGCTGGTGCC -ACTATGCTTTGAAAAGTCCAAGTGGACAATAGTATCCATGCTTGGTGTCATCAAGTCCGG -TGCAGGCTTCGTGTTGCTCGATCCTTCCCTCCCTCAAGCCCGACTACAATCAATGCTCCG -AAAAGTGGGCTCGAAGTTGTTGTTATCCTCCCAAGCCAACATGGATTTGAGCCGCAGATT -GTCAGAGATAGTGGTTCAAATTGGCCCAGACCTATCTCACATCTCGAATACTGTCTTGGA -TAATCCCTCGGATAGTCTTTCGAGTTACGCTACGAGTTACGCTACGAGTCCCGCACCACT -GTTCCAATCACCCTCGAGGGTATTTTATGCAGTTTTCACTTCCGGCAGCACTGGAGCGCC -CAAAGGGGTGTTGGTGTCGCACGTGAATTTCTGCTCTGCCGTACGCTATCAGTCAGACCT -TCTAGGATTCACCAGAAAGTCTAGAGTCCTAGATTTCGCATCATACGCTTTTGATGCCGC -GATCCACAATGTTATATTAACACTTGTTGCGGGAGGATGTCTGTGCATTCCTTCCGAAAA -AGACCGCGTCGATAACATTGGAGACATCATTTTCACCATGCGGCCGACAATTGTCAATTT -GACTCCAACAGTAGCACGTCTGCTAGACCCAGAAACGACACAAGGCCTTCAGACTTTGAT -TCTACTTGGCGAGCCAGTCACAACCGGGGATATTGAGCGATGGCAATCGCAGGAAATCCA -GATCATTAACGCCTACGGGCCCGCTGAATGCACGCCAATTAGCACAATTCACGCATTTGG -ATCCAACACAGACAGGATTATTAGGATTGGAAAGGGTGTGGGTTTAGTAACATGGATTGT -GGATCCAGAGGACCATAATCGCTTGTTACCACTAGGATGTACCGGTGAACTACTCCTTGA -AGGGCCGCTTGTTGGAATTGGCTATATGGGGGACCCGGAAAAGACCGCTGAAGCGTTTAT -CGAAGACCCTGAATGGCTGCTGAGGGGTTCAAATACTCGATCTGGGTGCCATGGCCGCCT -TTATAAGACAGGCGATCTGGTGCAGTACAACGAAGATGGAAGTCTGACGTTCATGGGCCG -CAAAGATACCCAAGTCAAGATCAGAGGGCAGCGCTTCGAGCTTGGAGAAGTCGAACACCA -CATTCAAAATTGCTTGCCAAAAGTAAGTCAGGTCATAGCCGAAGTTGTTGTGCTTGAGGG -TGAGACAAATCCTAGACCGGCGTTGGTGGTCTTTATCCAGGTGAGCAATGTGGGCATGAA -GATCAACAACAAGCCGACTCCTGAGGTGAAGACATATCCCATGTCTGCTGATATCAAGAA -GAAGCTGGCATGCCATCTGCCAACCTACATGGTGCCGACAGTAGTGTTCTCTCTGCAGGA -TATGCCGCTAACACCCACGGGGAAGACAAATCGCAGACGGCTTCGTGAGATTGGCCAGAA -ACTGCTCTTGAAAGATGGAAACAAAGAGTTCGACTCGTCTGAAAACTTTCTTGGGAATCC -ACACGATAGATTGATATTGGAGACTGAGCAGCCGGCATACACAATAGCCCAGAAAGTGTA -TTCCATACGTCCCTCGTGGACTCAGGACAATTCCTCATCTGGAGAGGACAGGTCACAGCA -CCGGCACATTGAATTAAACGACGTGCTTTTACATTCATCCGGTCTGGACTCTGTGGACAT -GATGGAACTCATGTCGTTTATCTCGCAGAAATTCCACGTTCAAGTTGGAATGCAGTTTTT -GATGCACAAAGCAACCAGTATCAGGATTTTGGCTCAGCATTTAGTCAGTTCACAGGCGTG -TGATGCTGATTATCAGCCATTTCCACGCTCTTCAACCCGTGCGCCGACCTCGGTTGATCT -CGTGGCTGAGATCGGGCGATACGACTCTAGGGTTCTGAGCGCACAACAGAGACCCACACG -CCATGATCACATAGCGAGTAATAGTCTGCCAATAGACAGAGATGATAAGTCTTTTACCGT -GCTCTTGACTGGCGCCACCGGCTTTGTCGGTACTCAAATCCTTCGCCAGCTCCTCGAACA -TCACCATGTTAGTCGTGTGATTGGGCTTGTACGTGGCGATACGGATGATGCAGCCAGAGA -GTATATCATTGATAAAGCTGTGAAAGCGCTCTGGTGGACTAATTGCCACGCGGATAAGCT -AGAGGTCTGGCGAGGCGATCTATCACTGCCAAAGCTGGGACTTAATCCAACGCGCTGGGA -TTCTCTAGCCAGTGGCCAAGCAGTCAATACGATAATCCACAGCGGCGCTACAGTGCACTG -GACAAAAAGCTACGAAGTCCTGGAGGCCGCCAATGTTGGGTCAACCATAGAGCTGTTACT -CCTCGCCGTAGGAGTCCCTCGAATGAGGTTCCTCTATGTCACGGGCGGTCGGCCTTGGAG -ATTTCACGAAGAGCTGGATGTTGCGAACGAGCTTTCAGTCGCGGATGCCGTCGCGTATAG -TCAGACTAAATTTGTCGCTGAAGCTGTAATCAAGCGTGCGGCCCGGCGCATCCCATCTGG -GACAAATCGGCTCGCCATTCTGAATCCTGGATGGGTTATTGGCACGCCAAGTGAGGGTTT -CTCCAACACTGGTGATTATATCTGGCGGCTGGTGGCGACATGCATCAAAGTAGGGGCGTA -CAACCAGGCTGAGGCGGACGGGTGGCTTTCCATGTCTGATGCTACCGCCACAGCGACGGC -GATTAATGACGCAGTACTGAGCAAGAAGATACAGATCGTGAGTGAAGAACAGCCCGTCAA -TGGCATTTTATGGAGGGAGTTTTGGGCCATTGTTGGGGGCCTGGGTTACAGGCTTGAGGC -CAAGAGCATGGTAGATTGGTTGGCTCTTGTTCGTGCTGACATTGGAGCTGTGAGGGAGAA -GCATCCACTGTGGCCACTGGCACACATAATCGGAGGCTTGCAGAACGATGAGCGAGTTGT -GGGTAGCTTGCGGCAGAAGCGTGGTAGCACGCCATTGCAATTGAAAGTCGCTGTAGGCAG -GTGCGTCGAGTTTCTGGTCAAAGTCGGCTTTTTACCTACACCTTCAGATCAAATCCAAGA -GACTATATAGGATGGACATAATGGATCAATTTGAAGAAGACAATATGATGCAACATTGCC -ATTTTGAGCGGGATAATTTCATTGCAGCATTATTACAGAACTTCTATCATCACTTCCAAG -TTGTATCAATTATTGTCCTCAACTGGGACAAATGCAGATATTTCATTTAGTCATACAGAA -TAGCTAAGGCTGGAAATCTCCAGTCCTTCATAGCGGAGTAAAATACACTTATGCCTGGTG -CGTCCGGCGCCCTGAAGAGATGAGTTAGCATTTGTGTTGCACATTCAAACGATCAAAGGG -AAAAAGAAAACATACACTCAGCCAATTCCTGCATACTCAACTCTCGAATGCCATTGACAT -GAGGAACGTTGCCCTGCACAGCACCCTTGACAGTCTCATAGGCACGGCCAAGCAGGCCCT -TGTGCGCAACAACATGCGTAGCTAATTTGGCCTCGAGCTTGATAACGCCAATCCGGGGAT -CCTTGGGTCCGCCGTCGTGTACACCATCGCCCATATCTCCAATCCATGCCTTGAGTGCGG -GCGAGTAGTACTTTTCCACGATAGCAGGGTCACCAATTACTGACGCAGTGCCTGAAATTG -ATGCCCAGGCTCCACTGATGGGATCGAGGAAGGACATGTTTGTTTCTTTTGGGTGCACGG -TAAGATCCATTGTTTTGCCGGAGAAGAGGTTGGTGTGGAAAATGAGGTCAATACCGCCGT -TCTCCTTTTGGGGGCAATGGATTAGCATTTTTCTTTTACTTTATTTAGCCCTGATCATAC -CTGCGCAGCTAGTGCCATACATCGCGAGGCGAGGTAGTCGCCTTCTGACTGCTTGGTCGT -CAACATGCCAAATTTAACGTGGGTGATAAATTCAACCAGGTCTTCAATCTTCTGAGCGAG -AGGCGGGTCTTCAAAATTCTGGGTCTTGTAGGGGTCCACGGAGTGGTTGCCCGTATTGGT -GTTGATTGAGGCATCTGTAGTAGACATTTTTGCTTGATGTTCGAAATAAAAAGATATATT -AAAGCAGACAGTTTCGAGAGAAGGGGACGACGAAGAAGTTATATAGCTATACATACTAGG -TCGTCAATGTGACGTCACCTACGGAGTACAAGATCAGGATTAAGGCTCAAATTCAACCTT -GGAAGTTCACCGCTTCACTGCATCACTGCACCACTGCATCAGATCGAGATCGGGGCTCCA -CAGCTATGTCTCTGTGATGCATATCCGACAGGTTTTGCCTCGACTCCGCATCGGATATTC -TGCGAACACCCATTTGATTACACCCATGCAGAGTGTCTCTCGAAAACGCCGTTCTGTAAA -TTGTCACATGAGTAATATTTTGTGGGCATGATTGCAAGGTCGTATCCCGAGTGACTCTCA -TTGCAATGAGATACTCGTTGATTTACCAGGGTGCGCAAGAGACTTCGCGATTGACCGTTC -CTACAGAGCGAAACATGTCGGCATTTATCAATCCACTTTCGTGCTCAGCCAGACAATTGA -TTTGAAGTATCACTCAATAAGCAGTCACAAAGTGTCCTCTCGATCTGCTATTGAGGTATC -AAGCCCTGATTACTTCAATCAGCGAGATCTTTCACCGAAACACAAGCCTGCACAACAATG -TGTCAATGTCTCACCTCTGAACAAAGTACCACAGCAAGTTAAGCAAAACAGCCATAATGC -CAATCAAAATGGCTATGCACCACGATGTCAGCCGATGTCTTTATTAGACGAGAAGAAACA -AAAATCGAAGAGGGAACAAACCTTCATATCCCCAACTCTTCGCCACCTTGACAGCTCTCC -TATTTCCCGGTGTTCTCACTTTAAACCAACCGGACATTGAATTTGCCTGGCAAGTGTGGC -CATCAACCAGCCGACAGCCATTCCTAGCCCGGCGACACTCAAGATGCCACCTCCGGCGCT -CCATAATGGCGCGCACACGCCCGTTGCAAAACGACTCGTCCGTCTTGAATGAGCACTTCT -CCATAGGGCACTCGAGCTTATGCTCAAATCGCTCAATGAACCAGTCAACTTTCAGGCGCA -CGCCTGTGTAGACTATCGAGTAGAAGTGGCGGCTGCTGTGTTTTAAAGCTAGGGCGTCTG -GGTAGGAAAGGTAGGTGCTTATTTGGATGTGGAGCTCTGAGGGGAGGGCCATTAGATTGG -TGGATTGAGAACTTGAAAAGGGCTGGGATAGCTCCATGTGTTTAGACATGTTTTGGGCCA -GATCTTTGGACAGCTTTTCAAGGCTATTGCTTGTAGCTCAATTACATTCGACGTGATGTT -GTTCATCTTATACGAGCTATTATACGAGCGGGGTCGAAAACTGTTGCCTGAAACCGTAGC -AATAGATAGCTTCTAGCTGGAGCTGCTTGTCCGGATGTATTAATCAAATGCTCGATTAAT -CCAAGACAACTGCCTGCCTTAAGCCTCCTAGAAGAAGAATTAAAGGGGTTGTGGTCATAA -GGTTCTACTTATTCTGATGCCTAATAAGCACCAAGTTCTGGATCTTAAATATTTTTCTCT -CCTAGAGTATCTTCCAGGCAATAGGAAAATCTACAATCCTTAGAGTAAAGGCAAGATGTG -CTAAGCTATGTTGTAGGAATTCCAACCTACCTTCCAAAGGTTAGAAATCCCGGGCGATCA -GTATCGCTGACATAATCTTCAATATCGGCCCGGCCTCAACCTGGAAGCTTCAACTTTTAG -AGATCGCCACTCACCAAATCAAACCTCCCACTACGTTTCCCCGAAACAAATTTCCACAGC -CACAAACATGTCAGGACCAAATGCCTACGGCACCCCGGTGTCGGACACAGCCTTCCGCAA -AACATGGGACCGCGAAGAATACGCCAAAAAAGCAGCCGACGATGAAGCAAAGCGTAAAGC -CGAAGGCAAAGCTCGCTACGAAGCGAAGCTACTGGGCAAGAAGTACCACGCACCAGTCGA -CTATAGTTCGCTCGAAGACACCACAGCCCGCGACAAGCGCCTAGATGTCGCATCCATGGT -GGGAAAAACGATGCTTGTGCCCGCAGGCGCGGGGATGGGCAAGCGCGGCCAGGGCGCTGG -ATTTTACTGCAAGGACTGCGATCTTACCTACAAGGATAACTTGCAGCTTGTGGAGCACTT -GAACAGCAAACAGCATTTGATTGCGACGGGACAGAGTGGCGAGGTGAAGAGGGCAAATGT -TGAGGACGTCCGCTTGCGGTTGAGGATGCTTGCGCATCGGAAGCGTCAGAGGGATGAGGA -GGAGCGCAAGGCTTGGCAACTTGATCTGGGCGTGCGACTGAAGGAGCGAGAAGAGATCGA -TGCCAAGGAGAGGGAGGAGAAAAGGCGCAAGCGCAGTGAAAAGCGGAAGAACAACAAGGT -TAAGCAGGAGGATGATAGTTGGGAGGGCCGGTTGGGAATCATTTCTTGAGTGTTGTTCTC -GGTATGCGCGTTCTATTCTTGATACCCATGGTGTTTTGGCATATTTTCCAGGCATAACTT -TTTAACTGATCATGTTCTTCGTGtgcaataatggcacttccaaaattcaaattgcaaatt -gcaaGCTAAAGATTTAACAAGGGTTTTCCCAACGCCTAACATAACGCCTAACATGCAAAT -CATATCATTTGAAAACAATGTCCAACCAATTTGATTTAGTCAATGCACTCGATAGCTTCG -AGCCGCTGCATTGCTCGGCCACCTTCTTTCTCGAATTGGCTGACAAGTCGCTCGCATTCT -CTCTTCAGTCGTTCTTCGCCATTCTTTGCGTTCCACGCGATCCATTCTTGGACCGTCATC -TTCTTCTCTGGGCTTGTCAGGCCACCCTTCACGCCTTTGAAAATTCCAGATAAATCTGCG -TTTTCCTTGTCGCTAGCCTCTCCAAACAGCACTTCATCAATATCCACTGGTGTCCATGGA -TGTCCGGAAGGTCCGAATCCTCCATTGGCGTTGCGCTTGGATGGTGACGGGGTACTAGCT -ACCAGAGGGGCTCGGAACTCTGGATGCTTGGGTGTTGATTGGACAGGTGGACGGGATGCG -GAGGGCCGCGAGGATGGGGGGCGGTTTTCAGCATCCGACGACTGGGGTGATAGCGAAGGA -GTGTTTTGGTGAGCTGACGGGAGTGGTGAAACTGCCCGGTTGGAACGTTTTGAACTCCTC -CGAGACTCATGCACTTCATGAGGGGAGGTCTGGCTTTGGGACAAGGTCTCTGCAAAATGC -TCTTCGTGAGGGATATCGCTGTACCGTTTCACGGTCTTTGGCGGTAGACTGGGCGTCTTC -CGATCAGATTGTGGCTCGGGAGACATTTCTGATGGAGAGGGAGAAAGCTCCACGGGAGGA -TTGACCATCTCAAGCTGGTCAGGCAAATCATCTGTAGTCTCGAATTCATCATCATGTTCC -TCGGCCGACGGTTCGAAGCGATGTGCCTCGACTGGGGGATCAAGAACAGGAGGTTGTGCC -GCTTTTTTCTCGGATTTCTTGGACTTTTTATCTTTCTCTGATGTGGCATCCTTCTTGGAA -ATCTTCTTTGATTCAGCCTTGAGCTTTGGTTCTTCGGATTCTGATGCATGCTCCGGCTCC -TTGCTGACAATCTCAACGGAAATGAAGGAGTCATGACGCTCAGAATCATCCCGAACATCG -GGAGCCCTTGACGCTAAGGGTTGCCGAGGCTCTGGCGACTGTGCTTTTGTCGTCTCCTCA -GTCTTGGTCTTCTTTGTCCCCTTTTTCTTAGCTCCCTTCGCCGGTTTAGTCTTTGGGGCT -GGCAACTCGGCGGCCTTCGGTTCATCATTGTCCGGTTTCACGTAAGGATCTGAACGGTCC -GCGTGATCACTGGGATCCTCTACTTCATCGCGCGTGATCAGTTCAGGGGATTTAGCCACA -GCCTTGGACTTTTTGTTCGATTTAGTCTTCTTCGATACCCGTGGCTCTGCGGGCTCTGCC -TCGATTTCAGGCTCGGGTTCGGCAGGGTTGGGGATATCTGCTTCGAGGCCTGCTTCAATT -TCCGCGTCGATCTCTGAGTCACGAGGGGCTCGCGCCTTTGACAGGACTTTGGGTGCAACA -GGCACATCAGACACCTTGCGAGCCTTTGACACTCTCTTTTTCGTGGGCTTGCGACCTCTA -TCTGTTTCTTCTTCCGAAGCAATGTCATCCACTGGCTCTGCATCTGCCATTGCGGAGTCA -CTGTTTTCGTAATTGTAGTTAAGAGCGATGCTGCTACTCCGTGTCTTTGTAGCGCGTTTC -TTCGCCGGGGGTTCTGCTGGCGGCTGCGATGGCTCTCTTTCCAGTATAGTAACAGGGTTT -GGTTCATCCTTGATATCCTCACTGGCCCTCTTCTTTCCTCTGCCTGTACGTTTGGGTTTG -ACAGACTCCGGTTGGGCGATTTCTTCAATTTCGGCCTCAATCTGACTTTCCACCTCGGCT -GATTCTTCCTTCTTGCTCTTGGTGCGCTTTGATTTGCTCTTGGTTGCCTTCTTGGTAGGT -TTGGATTTCGTGGTAGCCTGGGACATGATGCTCTGGTCCATTGAGTCATCCATCTCAATA -TCGGGCGCTTCAGATGTAGCTTGATCAACGGATTGCGTTGAAAGCCGCGAGGACGCCTTG -GAGACCCGAGGTTTCTTAGCACGAGTCGATTTGCTCTTTTTTCCTGGTGGCTGGGCAAAG -ACAAAGAATGAACAATCCGAGGAGCGGCGATAGTGTTCGTCACTGAACAAGTCAGCAAAC -GCCTAATCGGGAAACAGCGATGAATACTCACAATGGGTCGTCCTTGGGTTCCCAGCCATC -CAAAGACAACTTGCAGTAAGCACAACTGGCTAGATCATCGCTCTCGTCTGTAGGGCAGAA -GTACCATCCGCCTTCAACCATCTGTCGTTGTTGTGAGCCAAAAGAATACAAATATGGCGA -GCTCGCATTACCTTATCCGACTGGCATACCCAGCCCTTTTTTCCGTCGTGGGGCCATGCC -GTACCAAATGTTGACAGTCTCGCTTGTGTAATTCGGTCGCCCGTAGGGTCTTCAATCGAG -GCGGGGTTGGAGCTGCTTTGCTGGAGGTCCATCATGATGGCCCATCCGCAGTCGGGTGCA -TGCTTGAGATGCTCTGTAACGGGGTTGTCGTCCTCCTCCCAGCCGTCCAGAGCCCGGTGG -CACTCGAAACATGTTGTGTTATCGGGGTTTGTCTCGTATGGTTTGTAGTAGAATCCAGCG -TGCGCCAACTAAAATCACAACCAAGTCAATCACCTTGAAACGGGCGCGCGGAACCTCTCC -AAAAAAGCACATACCTCAGCTGGTGAAGGTCGCTGATGCGGCCATGTAATTGCTTTGGGT -GTTTTTGCGCCGGAAGATCTGCGCTTCTCCGGCTTCAGCACTATATCAAAGCTGGCTAGG -CGGGCCGCAAAGGTATCCATTTCTGAGCCCATGGCAGTGTATCAGAGCAGTAAGAGTGAA -TCGGTACAGGGATCCGACATGTAGAGACACCTTCAGTGGAGGAAAGAGGTGGATATTGTC -GAAAAAGTCGTTGTTGTTGACCCTCCGCGTGTGTTTACTTTTGGTTGTTATGCGTCGACA -CGCGTCCACGTGACCCTTACTCAGCACCCGAGGGAGCCCAGAGCAGGACGGGGAGTACGT -CAGTGCCGGAAATGTTGACGGAGCGCCCCAATTCGCTTCACTTCAATTCACTCATTTGCT -TTTCATGCGGGACATTTATGCTCTTTGATCTGACTCACTCATTGCGCTCCGCGCCTCGAT -TTCCTTTGTTCAAACCTCAATCATGCCGGGAGTAATAGTATGTCAGACCCCCCACTCTCC -CCGGTGATGTCGCTACAACTTATCTGCTAGATGGTTGCTTCTCGAGGAGCTAAACCCAGC -TCTCTCTGGCGCTCTCGCACCCCTATCTCGTCCCCACGCGTTGTGTCTTCTTCGGCTCGT -TGTCTGTCATTCAATTGTCATTCGAATACTCCCTACAATGGCCGATTCTCGGCAAATAAC -GGCGCTTCGACATCGCACCTGCTACGCAAGGTAACCACCAACAAGTCGTCTTGCTCCTCA -TTCTCGACATCTCCATCTCCGCTGCTGTCCGAATCGAACTGGGATGACAATCCGAACTTG -TCAATCACCAATTTCTCGGAACTTCCCTCAAAAGACTTCGGAGTCAACCAGCACATGATC -ATAAACCAAGATTTCAAGGAGGCATTGCGAATGATCCTTTGGCAGTTCAAAGCACCAATT -CGATACGCCTTTGCTTATGGATCGGGTGTCTTTCCACAGACTGGCAGCGGAGCTCCGTCC -AGTTCACTGCACCCCTCGGCACCCACTGCCATTCAAAATATGCAAAAGGGCTCCGGGAAG -ATGATTGACTTCATTTTTGGAGTGTCGTACTCCCAGCATTGGCATGACATCAATCTGAGC -CAACACCGTGATCATTATTCTGCATTGGGCTCAACGGGGTCGTACATGGTCTCCCAAGTA -CAGGACCGCTTCGGCGCGGGTGTCTACTTCCATCCCTATATCACTGTGAACGGGACAATG -ATCAAATATGGCGTGGTGAATCTTGATACGCTATGTCGGGATCTCACTCAGTGGGACACC -ATGTATCTCGCCGGTCGTCTGCAGAAGCCAGTGAAGATCTTGCGGGACCACCCGAAGGTG -CGATTGGCCAACCAGATGAACCTGTTGTCTGCTTTGCGAGTCGCACTACTGCTACTGCCA -GAAAGATTCAGTGAGTTTGAGCTGTATAGCACAATCGCAGGTATGAGCTACATGGGAGAT -CTGCGCATGGCCCTTCCAGCCGAGGACCCCAGCAAAGTGCGGAACATTGTCTCAGGACAA -ATGGCGCACTTCCGCCGGCTCTACGCGCCACTCATCTACAATTTGCCAAATGTCACTTTC -CAGGACCCTCGCTGCAACAACCCGGATTGGATTGATGACCCCAACGCCATCTTGGCCATG -GAGCAAGATATGGATCCAGTGAAGCGTGGTAACATGGTTCGCCGTCTACCTGAATCTTTC -CGGCAAAAGTTGTACTTCCAGTACCAGTCTCGCTTCGGGATTCCCCGGGCCGATTTCAAT -AAGATGGTGCAGGAGAGTAAGGACGCGGACGAGGTTCTTCGTCGGCCTCAAGGCGGCTCG -TTTGAGCGACGAATTGCCGGGGATGATCACCTCCAGGAGGAAGTCTCTAAGTCTATTGAG -AAGACAATTCGCTGGCCAAGTACTGTCCAAACGATCAAGGCGCCGTTCACGGCAGGTCTT -GCTAAGAGCTGGACTTACATGATGGAGAAACGTGAGAAGCACAAGAAATCTCAGATGGCG -GCCGCCTCAGCACTTGAAAGCTCCCCTGGGGTAAATAAGGTCACTCAGGCACCGGATTCA -TCGGAGTCCAAGGAAACAAAGAAAGAATAAGCAAGTATATTTTGGCAGACACATCTAGGT -TGCAGACATGCCGCATATGGAGGCGTTGTGGGTTTTTGTCACCCATGTATCAACATCATT -GATATATCTGGTATCAGCAGGAAATTTTTATTGGGCCCAATACATACGGACTATATAGAG -CCTCTCAATTGTTGAATAAACCATACAGTTCCTTTTTTTTTTTGGCTTTCTTACTCTCTG -CTTTCTGGCGTGCCATAAATATATGATTGGTGACTTGTGGAAAAGAATAGAAAGGGGCCT -TTTTTTTACACAGACCGGGTTTATTGAGGTATCATGAGATGAGGTAGAGAGGCTGTATAT -GCTCTTGAGATTGACAGAAACCCCCGCTGCAGCTGATCGAGAAAATATGCATAATGCAAG -GTGGACTACAAGGCCATTTAGAATATAACTTTTTCGAATGCTTCCTGTAGTCCTCTACTC -CCTAACCACGCCTCCAGCGCTTGCATATCACCTCGATTGGCCCGTTGGCCACCTCGCTCT -CCCAGCGAGTCCGTAATTTCCTCCATTCCGGAAAATACTCTCTCAACACCTTTTGTCTCT -CGAAACCTCTCGCTGACTTTCTTTTGGTGACTGGGGATATCACCCCAGGAATGTGTCTTG -ATGAAACTCATATAAGAGTCGATGTCAGGCGATGCACCCTCAAGACACAATACGCCCGGC -TTCCCCGCCAGCACGAAGCCCGTCAGACCATATCCCGGTGCGAGGTTGACCATATCATCC -CTCTTTTTACGGGTTGACAAGGACGGGAAATAGAACCAGACTCGCACAGTTGGACCTCTG -TCTGTGTCTTCTGGGGTGACTGTCTGAGACTGCTTGCTTTCAAGAAAGCGGCTGGTTTCT -GCTTGGATGTATTCGAATACCTCTAAAGCATCATCACCTTGTGGTATTGAAGCAGTAATT -CCAGCAACTTCGGCTCTGGACATCCACTCTGGTTGCCGAAGCGAATATCCCAACGGTGGT -GCTTCGTCAAGTATCTCTGGATTATCACATTGCAGTGGCACGGAGATGTTGACGTTTATT -GATTTGTCTCCATCTGCAATTGGTAGACAAACGGTAAGAGAGATACTTGAAGAGATCCTA -GGAGGTGTAGAGGTGGGATTCTCGCACCAGTGACGGAGTTTCTCGACACACTGCGTTGTG -GATTCGGGGATTTCCAGCTCGTCTGGAGACGGGAACATCGCCATCAGGAGGTCAATCGTG -GACAATTGGGACTCCATGATGTCCATTGGTAGAACGGATGGTTCTTCAAGGACCATACTT -TTCATTCTGAATGCAGGGGTCACCAAAATCGAAGAGCTTGCATATAACCAAGGGTAGTGG -TTTGCAGTGTTTTGAGATGATAAAATTAGAGATGTGTGGATTGAAGCAATTGTGATTTTG -GCTCGTAAGCTTGAAATGTGCTACAATGTGCGCAGTGATATAACGAATCAGATGATTGGT -TACTGAAGAATAACTGTCTTCTTTGAGCTCACTGAGTCGAGGTCTTGAGACTACCCATCT -AGGAAGCCCTTGCAATCAGGGCCGACCTCGGGGCACCAGGGCCGGCCCAGGCGGGAGATT -TATTATATAATCACTTATCAGATCCACCCGTCCATGGATCTAGGGCTGTATGCACACTCC -AAGTCGATCATTAGTGATTTGGCTAGAATAGATTGTATATCTTGTTGATGCCTTGGGAAA -TATTAGGACATGCATTATATTTCATGTAGTAGACTATGACTAACATCGCAGTAAGCGTCC -GTTTAGTATAGATTCGCCCCCGTTCATCTTCTTAGCAGCATGATAGCATTCCCAAGTGAT -ATGCGACGGTCAGGACTTGCATCTGTCAAGCGTCGTGCAACATCCATCAATTGATAACCA -AAAACGCTATCAGCGAGGTGTTCTGCCACACCCAAAAGTACTTTACATAAGCCCAAGAAT -CATTCCGAACTTGCATATCAAATGGCAAGGCATCGATTCCCTCTAAACGTTGGTTCAGCC -AATCCTCCACCGTTTCAGATGCAAGCCATTCCCACGTAAAGCCACAGCTGTCGCCAATAT -CAATGATAGCAGCGTGGCCTTTTGAATTCATGACTATGTTGGATGGCTTCAAGTCAAGGT -GAGACCGATGAGACTCATGCATATAGATCAGAGCTTCTCCGATCTGGACCATCCGGGCAC -TCCAATTGAAATCCTTGCTAATTTAGCCAGATTGAAGAACTTGCTCCAGTGAGCCTAGGT -CCCAGCATTCGATAAGAAATCCAGAAATAACCTTGGGACGCTCACTAGCCGGGTCCATTT -TGTAGGGGTAACTGCATATGACAACGCCAGCTAGCTGCACAATGCCAGAGCAACCCTGAA -ATTGCGTCAGCACCTCGAGTTCCTTCATTGTGTATTCGCTGTCGCACGGAATATACAGTG -GGCGATTGATCTCCTTGTAGGCGAACACTTAGTTTTCAAGCAACACCTTGGAGACACCTC -AAAGGATTTCACATGTTGACTGCAGCTGATTGGCATCGATTCTACGTAGTGAGGACGAGG -AGCAGACAGGATAGAAAACCCTCTTCGGGTCTTCTGTAATCTCATAATTCATTTTATGTG -CAATGGTTGCATAAGCATTGTCTTGGGACTTGCAGCCAGGTCGAAATTTGACAGAATTAT -GGGTCTCTTTGGTGATGGTCAACGAAACCTTGGTCACAGTACCATCTAGGGGAGGAAGAG -CGCGATAGTCGATAGATTGGGCAAACTGCGAGAATTCATCGCATCGGAGTCTTCTCAGTG -GTCGAAACTGAGTCAGTCTGTCAATAACTATGATCTTGGTCCACCATTGGCAGCCGCCAT -CTCGAAAGATCAAGGTCAGCCATCGACTTTGTGGCCCAAGATTTCTATTGGGTTTAGAAT -CATTGCTGGGCTCACTGTCGTAATTTTCGTCGTAGTTTTGGCACTTCAGCTGGAGATGTG -AGCAAATTTCCACGTGAGACATGATTGACCACATGAGCATGGAGGGACGGCGACTCGTCC -TGGGGCAATTCTCGGGTGACACCCCCTTGCAGATTATATATATCCAAACAAGCGCCAGAC -CTTTTCGGCGGTATATTAGGGGAAATCACTACGTAATCCAGATGGGACTGTGCAAACTAT -CTCGACTAATATGCTAGTGAGATTCTATTTGAAAAGACACCTAAAGCAAGACCGAGGATG -TCCCTACCATTGCCGAGCATCTTGCAAGCTTGCCCAAGGCCAACACCCAACGGCCACTTG -TAGCCTTGATCACACAGGGTTGTTCTCCTGCAATCTTTGCTATTTCTGGCACCCCTGGGT -ATAAACAGATTGTTGTCCGTAGCATCTCAGGGACTCATCTCAGGGACTGAGGTTATCGAC -CCCACCGGTGCTGGGTGAGTCAGAACAGCTTTCAATTGCATCGTCAGCTTCGAAAATGAA -GTAATGGGGATTACTGACCATGTCCGTGATAAGTGATGCATTTGCAGGAGCACTTCTCGC -TGGGATTGCAAAGGGACAACCATTGGAAGTTTCCATTGATATGGGACATCGGCTTGCTTA -CTTGGGAATCCAGCAAATGGGACCAAAGTACGTCTTACCGTTGCTCCAAGTATATTCTGG -GGGCTTGCGCAATCAATGCCCAATTGAGAAAGGCCAGAGTCGTGATATATGAGGGATTCT -GTAAGTGGCCCCTTCGAGCCAAAGCTCCGATGAGGCATATGTCCAGGCATCACTACATCT -GATAAATACACAGGTGCAAGATAAAAAGGCCGAAGAAGAACGGAAGACGTAGCTAATAGG -CAAAGAGATCCAACCTGCAAGTTTCAGGACCAGCATTACAGGAAAGGCAAATTTTTCATG -TTGACTTGATGTACTGCTCATGCCAAGGGATCTTCGTCGTCTTTTTGAGAGAATTGGCTG -AGATGTAGATAATTTATAGTGTTTTAGTGTAAAATTGATCTTAATCTACTGGAGTGTATA -TAGATCGGCGTTTGACTCATAGAGTTCCGATAAAATCCCCCCCCCCGGGCACCCCCACCA -ACTTTGCTCCAACACATTCCTCACCACCAATGGCTTCAACATCTCCCTTTAATTTGGCCA -CGTGTGCCCGTCCGAATATCCTGGCATTGCAGCCTTACCGATGCGCCAGAGAGTATGCTG -CCTTTTCCACATCACCGATACCGGCGAGCTAATGAGTCAGTGACTACAAAGATGATGGGA -CAAACGTCCTCCTCGACGCCAATGAAAATGCCTTCGGTCCTGGTCTTGCCCTGAATAGCG -AGGGAGCATTACAAACCTCCCAGGCTGGCAATGCCACGGGAGCCTCCAAGCCGGAGATTG -ACTTCCTCGGTCTGAACCGCTACCCCGATCCGTGAGTGGGATTCTACAGTCGCGTCTCGT -TCAACCCAATTTGACCAACAATGTGTCCCTCAGTCACCAGATTGAATTAAAGCAGCTCTT -CTGCAACCTCCGCAATACCCACCACCACACCCCTAAGACCCTCCTCCCCGAGCACATGTT -CTGCGGCGTTGGCTCAGACGAAGCCATTGACGCGCTCCTCCGCTGCTTCTGCGTTCCCGG -CAAAGACAAGATCCTCACATGTCCCCCGACCTACGGAATGTACAGCGTCAGCGCGCAGGT -GAATGACGTGGAGATTGTCAAGGTCCCACTAGACGCTACAAACGGCTTCCACCTGCAAGC -CGATAAAGTCAACGAGGCCCTATCAGCCGACCCCTCAATCAAGCTGGCCTACATCTGTTC -GCCGGGCAACCCGACCGCGAATCTGATCCGCAAAGAGGATATCCGCAAGGTACTCGAGCA -CCCGACCTGGAACGGCATTGTGGTGGTCGACGAGGCGTACATCGACTTTGCGCCCGAAGG -TTCCAGCCTGGCAGAGTGGGTGACGGAGTGGCCCAACTTGGTGGTCATGCAGACACTCAG -CAAGGCCTTCGGTCTGGCTGGTATCCGTCTGGGTGTGGCCTACGCCAGTGTGGAGGTTGC -CCGTCTACTCAACAGCCTCAAAGCCCCTTATAACATCTCCAGCCCCACAAGCGCGATTGC -CTCAGCGGCCCTTACACCGCCCAATCTGGCGGTGATGCACAACTTCCGTTCGCAGATCCT -CGCGCAACGCGATCGCCTGCTCCGTGAATTACCGGCGATCCCTGGTGTGGGTCAGTTCCT -TGGCGGCAGCGACGCGAACTTCTTGCTCGTGCAGATCTTGGACGCCGAGGGCCGACCCAA -TAACGTGACTGCCCTTGCTGCTTACGAGGCCATGGCGGAGAAGCGCGGTGTAGTGGTCCG -CTTCCGTGGCAAGGAACTTGGGTGTGAGGGCTGTCTTCGTATCACGGTCGGCACCGACTC -GGAAGTTACTAAATTCCTCCAGGAGTTACGGGCTGTTTTGGGTGGGTTGCGTGCTGGGGC -TGGAATCACGTCTGTTCAAAATGAGGAAAAGCGGGAGGAGTCGGCGGCTGCTGTGGTTGG -ATGAGAGTTGCGCGATTAATATGAGCGATCATAAGGTTTGGGAAAAGATAACATTGTTGC -ATTTGCACTTTAGGGACTCAAAGATAGAATATCATCCACAACTCCTCGCTTATCTTAGAC -ATATATAGCCCATAGATCATTTTCGATCTACGGAGGATCTAGATACAACCTCAGGAATGT -AGCACCAAATAGTCCACGTGGTGTGCAACACTCATGATAACCCTGACTCGAACATGGAGC -CAAGACTTCTTCTCCGTTCCTAACAAGGAACCTGGTATTTGAATGGCAATGTCGACCAGG -TGTTTGTGAATTCTTTGTTCACCGCATAAGTGAGACGCTAAGAGTAAATGTGATATGGAT -TGCCCAGATTAGCCAGTCGAAGCTCAGTGGTCTAATGATATAACGGCTTATCTGGACTTC -CCCTCTTCCTGTTATGAAACTACTAATTTCCCCTATATCTCCCGTATCTCTCCCAGGATC -TTCTCGCGATCTGATGCCCTCCTAGTCTCCGCAGCCCCATAACCGTTAACCCCCCCCCAC -TTTGGCACGTGTGAGCAATGTAAACACATCGTGTGAATCGCTTTCTGCgagagagagaga -aagaaagagagagagagagagaCTGCTGTTTTTCTTTTCAGCATGGCCTCCATGGCCTCC -GTGGCCATGACGCTGGTCACGCGTGGCATCTATCCCGACTCCCCACCAGCCATGCAGACC -AGAACCGATATCAATCCAACACTGATGATGAGCTGGTGGGCTACCGCATTCTCTTTGGTG -ATCATCGTCATCCGCCTATGTGGTCGTTATGTTCGTGTCGAACGCTTCTTCACCGAGGAC -AAGGTCATGATGATCAGTATCATTCCTCTGATGATCCGCATGGTCCTGGTACATTTCATC -CTCATATTGGGAACCAACAATACCATCACGACAGGCCTGACAGAGAAAGAAATCTCAAAT -CGAGAGCTGGGCAGTAAGCTAGTGCTTGCCGCGCGGATATGCTACGCCATCTTGTAAGAC -ACAACTCTTGTCTTCACCTTGGATCTACCAGTTCAAAATGACTCACAACTATAGCATCTG -GACCGCCAAAGTGACCGTCTGTGAATTCCTCAAGCGAGTCACAGGGCTGACCTGGCGTCG -ATCGACAACCTTCTTCCTCCGCTTCATGTCTGCCTTCCTCTTCTCAACCCTAATAGCCGT -GCTCATCGCAACCCTATCCGAATGTCAACCCTTCAACCACTACTGGCAGGTCATCCCAGA -TCCAGGCCCAACCTGCCGCACCGGCTACATCAACCTAATCACCATGGGCTCCTGCGATGT -GATCACCGATCTTTTACTCGTCGCCATCCCAGTCCCAATAATCCTCATATCCCAAATGCC -TCTGAAACGCAAACTCGCCCTATCAATCCTCTTCTGTCTCTCCCTGATCCTAGTCGCCAT -AACCTCCTACCGCGTCCCCTCCGTAATCAACCACAAAGGCTCCCAACAATACCGCTCCCT -CCTCGCCTCCTTCGAAATCCTCGCCGCAACTGCAGTCTCAAACGTCCTAGTCATAGGCTC -CTTCGTCCGAGACCGCGGCCCAAAGAAACTAAAATACAAACGAGCCCAAGGCTCAGCTTC -CGTCTCGGAAAGCATGGATAAATCATTCGTCCGCAGAAACACCGTCATGCAAAACCAATG -GGGCTCCGACTCCGAACTCGCAACCGGTCTATGTATCCGTCTAGACCCAGATATCTATAC -CATCCCAGGAACGTCAGACGGATCACATCCACCAAGACCAGCACCCGTCGCTCCACCAGC -TCACATCCCGTTAGCAGTCGCACGAACGGGAACCCTCGATCCAACCTGGTCATTCGCAAC -AACTCGTCGCTCAGAAGACGACCGTGCTTCCACAACAGATAGTCTCGAGCCTAAAGTCTC -GCCACGAGAGTACCTACGCACAAACCAGTCCCCGCGCGAGATTTCGCCCACATCAGAGAC -GCCGCGACGCGTCTCCTTCTCAGATGTCGGTGGTTTATTAACAAGAGACCTACCGGAGCC -AGGACCTGGTCATACACGCGCCCAGACATCCTTAACATCGCCTGTGGAGAGCGTGAACCG -TCGAAGAGGAGGCGGGAGTAGGGCTTTCTTGGAAGATTTGGGAATTGTTCCCCGTACGAC -GTTCGGACTTCGACCGCCGCTTTCTGGGCCTGCGGAATTGCCATTTCCTCCGACGGCGTT -TGGTTCTCGTTTGGCTTCGGTGTCTGGTTCTTCGGATTTGACACTCGATACTGATGTTCA -GCTTCATGATGTTGGGGGTTTGTTATCGCGACATGATCATTAATatgtgatttgatgtga -tgtgacatgatCTTACGACTTTCTATCTTTTTTTCTTTTTGTTTCTTGGATTGGGATGAG -ACTTATTGTCTCCTGTGGAATATTGGGTTGGAGTCTCTCGTTTTGCTTTGTGGGACTAAA -CGTCTCGTATCCCCCACTGTATCTTCCTTGTTTACTGTTTCCTGCTCTGTGTTATATTCC -CCCTCTTGCGATTATTAGCGTTGGTGTTCGAGGTGTTACCTTCTGAGATCCTTTGGATAC -TTCAAATCGCTCGGACATGACTCTGGCTAATAATGTACCCCTACATAACTATTATCTACA -TATGCAACATAACATGTAATGACCTTGAAAATTCATTCAATATCCCGATCACACTGTAGT -AGAATTGGACTTTCTCCTAGAGTTTTGTCTTAGAGGTAAAATAGTAAAATAAAGGCTTCG -ATTATATGAATTTTGGAAACAACCAGCTGTCCATTTATGAATCTTAGATATAATGCATAT -GCAAGAACCAAACGACAAAAAATAAATAATAAAAAAAAAGACGCAGAGAGAATGCTAGAC -AAACGCTAGACTTCATGTATAAGTTGACGCATTGAGCATATACGGATGTCATCCTCGAAT -CTATGCCTTCTCCTCAATCGAGGCCTTCTTGAGCTCATCCTCAACGTCGGCGACGGCATC -CTGCTCCTGAGCCAGCTCAGCCTCTGCTGAAGGAGCACCGTTCGCTTGGGCGGGCTTCTT -GGCAGAATCGGTGGCCCGGCCGTTGGTCTGAGGGGCGGCAGTGGCCTTAGTCTCCTCCTC -TTCGATGCGGGTGATCTCTTCCTGAGCCTTCTTAATGTTCTGCGGGGTAATTAGCAACTG -GTAACAGCCGGTAACCGACAACTCTCAAAGGGAAATGCTTACCTCTTCCGTCTTGCTGGC -CTGGTTCTTCTTCCACTCGGTGATCTTGGCAGCCAGCTTCTCCACGGCGGCGGGGACATC -CGCCTGGTTCATGGGAGGATCGATCTTGACCTTGGCAAAGTCCTCAATGATACCAACATT -CATGTTGAACTTGGCACTTTCAGCAGGGGCAGCACCCTTCTTACCGCCCTTCTTGCCCTT -CTTGCCACCGGAACCGGTGAAGTAGTCCTCCTCATCGTTCTTGCGCACGACCTTCATTCC -CTTGATCTCAGCGGCTTCAACAGTGCGGCCAACTCCGGCACGGAAGTTGCTGCCCTCGTC -TTTCTTCTCGCTCAGGCCTAAAGCGGCGAAGTCATAGGCGGGATCGAAGTGACGGATCAG -ACCCTGTGCAGTCAGAATCTCATCAGCGTAGGCAAGCTGCGAGGCCTCCTCGAGCTTCTT -ATCAGCGTTCTTCTTGCGCTTCTCGCGCAAGAAGGCCTCCTGCTCAGCCTTTTGGCGCTC -GCGGCGCACACGCCAGGCCTCATCCTCGTACTCCTTGTAGGCACGGCGGGCCTTGAAGTA -GGTATCCTTGATAGTGCGGATAGCAGTGTACTTCTCCTGCTGCTCGTTGCGCAGCTTGGT -GCGCTCATCGCGCAAAGCGTTCAGGTTCTTGAAGACAGCGTCCTGCTCGGCCTTGATGGC -ATCGAGCTCCTTCTGAATCTCGGCGTACTTGTCGCTCAGCGCCTTGGCCTCGGGGTTGTC -GAGGGTCTTGCGCAATTCGGAGATCTGTGTCTTGATGTCGTTGATACCCTTCTGCGCCTC -ATCCAAGCCGGCGAAGCTCTTGCGCTGCTTGCGCAGGTTGGAAACCTCGGTGAGAGCCTT -GCGCTCATCCACCAGGCGGAGAGTGCCAGAGTCAACCGACTTCTCGAGGCGGGCGATCTC -CTTGTCGATCTCCTCGACGCTCTTGAAAGACAGACGTCCGCGAGAGTTGTTCTGCTCAGT -AATACGGGCCTTGAGGTTGGCATCCAGGGCGTTGATCTTCTCCTGGGTGGAGGAGCGGCT -GGACTTGAAGCCGGACTGCTTCTGACGAATGGCTGAGAGTTCGGCGCGCAGCTCCTGCTG -CTTCTTAACGGTGGGTGAATCCTGGTTGTTCGGCTTGGCGTTGTCCAACTTGGCCTTGAT -TTGGTTCTGTTGCGCATATCAGTCATTGGCCCGAGGACATGGTATGCAAGTAAATAATGA -ACACGAGGGCAATTAACAATCAACAATCAGAGTGGGTCGGGATCATCCCGCAATCCAGAG -TCAGATGAGAGCTTGAGTGACAATTCCAAAAAAGAAAGGCTTCCATACCAATTTCTCCTG -AGCAGCAGTGTGCTCCTTCTCAGCCTCAGCGAGACTGGCCTTGTAAGCGGCCTCATCGGG -CTTGGTGGGCTTAACCCGCTCCGCAGCAGTGGCAGCCATTGCGACGAATTGAATGTAAAA -AGAGGTACAGAGAAGGAAGAATTGCCTTCTGAGGATCCTGAAGGGATTAAATAGACCAAA -ACCAAAGGAAACTAAAAAGGGGAAACTTGAGATAATTAAAATGGTTCGGTCGGTTTCTCC -GCTTAAGTGTCGCTGGCAGAAAGTTCCCGCCGATTTTTCGATTTCACCTTTATCTCTCCA -CCGGAGCTCTCTCTGCCCGTTTTTTTTGGTTCTCTTAACGGGATAACTTCTTATTTCTTT -TCACAATCTAGTCATCGACTACTAACTCCATTATGGATGGAGATTCAGCACACTCCATGG -AGGCCGAGCGCGAGATGACTCGGCTCTGGCGCACCTTCAGAACAGTCTATGAACTGCTCG -CTGACCGGGTAGGCACTTCAGATCGAGTCCCTGACTTCTCAGTGTCTCTCCCATGATACC -CATTTATACACGGCTAACCATCGGCGACACACCAGGGCTACGAAGTGTCTGATAACGAGC -TGGTTCTCCCGCTAGAAGACTTCCGCGCAAAATATGCTGACCCTCTGGGTTACCCTGAGT -ATGCGAAATCCTTGCCTCCGCCGTCCCATTGTCGTGAGCTAGTGAAAAACTGAGCGAAAC -AATGCCGGCACACATCATTGAGGAAGCACATATCACTAACAAACCGTTCCGCAACAACAG -TCGCACTAAGATGAAAATCCAAGCCCGCCCCACCGAAACCATGAAGTTGAAATACACAGC -GCTGCCCAGCAAAGCGAACCCAAACCCCCAGCCCGATTGCGGTACAATCTATGTCGATTT -CTGTCCCGACTCCTCAGGTGTTGGAACCAAGCAAGTGCGCGCCTTCAACCACCTTGTTGA -TGAGAACAACTTCCACACTGGTATCTTTATCACGCAGACACCCATCTCGCCCTCTGCCGT -GCGCCTGCTTTCCGGTGTTCCCGGCCGCATTTGCGAGCACTTCCAGGAACAGGATCTGCT -TGTGAACATCACTCGCCACGAGCTGGTCCCTAAGCACGTCCTGCTCAGCCCCGAGGAAAA -GGCCAAGCTCCTTGAGCGCTATCGTCTGAAGGAGTCCCAGCTTCCTCGCATACAGGTGTC -CGACCCTGTCGCTCGCTATCTTGGTTTGCGCCGTGGTCAGGTCGTCAAGATCATCCGCAA -GTCTGAAACTGCGGGTCGCTATGCCAGTTACCGCTGGGTCATTTAAGTTGCGAAATAAGG -TTAGTAAGAATGGAACAGGAAGATTGACACGCGACTCTTCATTGCCCGATGCTAGATCCT -TCTCTTCTTTTTTTTTTTCTTTTTCTCTCATTTGGGCGTTTTGCAAGCCCAAAGAGTCAA -AATCCGCTTCCTATGAACCTCAACCTCGCAAACTCGAATTGCATCGGGTCCACTTCACAG -TCTCCGGTATGACCGCTTTCCATACTATGCAGCTCTGCAGGAAATACCCAACTGGTAGTC -GAGAAAGAGCAGCTACAGAAGTGGACACTTGGGAGTTTTTTTTTTACTCATTGATTCACA -TTCAACAAACTCAAAAAAAGTTTTTGGTCTCCCCCTTGTATTATGGGCAGCATGGCGTAT -AGGCGCGAAGTTTCTCCTGCTTTGGGGGATTGCGATGGGGGTTGCCTGTGATTTTACATA -ATGGCCGTGTCTTTGTATGTCTAGATGAATCCAATCTATGAATGGCTGTCATTTCGAGTC -GGGTGATATAGTATACCTATGTCCCCAGTTATATATGGAATAGTCTTTCCGTCTGACCTA -GAGTGACGGGCATCTCAAAAGATATCTCAAACATCTATTATAGAGGCATGTCTATCTACC -GGAAGTATTCTATCTTTTCTTACTGACCCAAACCAAAAATGAGGCCGCGTAAGTATAAAA -TCTAGGAAGGAAAACCTTGGTTGACTAAGCCCCGAACTGCGCCACAGCAATTCTCTCTCG -ATTCCGGCGCTTCCTGTTCTATTTTTCTATCACCCCTCTCTCCTTACAGTTCGCGAGTCG -AGTCTACGAACCCTCCAGATAAATCAGCCTCCATACCCACATTATCAGCGCAATCCCCTC -AAATCGGCACTAAGCCTAAACATCTCCCGGGAACTCCACCTTCAATTTCCACCATGTCTT -ATAACAAACCCGTAAGTTTTCAAAAGCAATCAATCGCAAACAATCCTTAGACACAAATCC -TAAAATCTAACTTCTTCCAGACAAGTCCACCACCTTCCTACCCCGCCCAGGCCCACGACG -CCGGCCCCTACTACGCAAACTCCCCACCACCGCATGGCCAAAGCCCCGGCGCCGCGAGTG -ACTACTACGGCGGCGGCCAGCCCCAACAGCAGGGCTACTACCCACCCCACCCAGGCTATG -GCCCGCCACAGCAACAACAGCAGCCCTACTACCCCCAGGGCCAGCCGATGTACTACCCCC -CGCAGCAGCAGTATCCCCCTCAACAGCAACAGGGTTACTATGCCAATGACCGCGGGAATT -CCGGCGGTGGTGGTGGCATCTGCGCCGGTATCATGGCCGCCCTGGCTTGTTGCTGCTGTT -TGGATATCTTGTTCTAGACTTGCTTTTGGTCCGGATTTTTATTATGCGACTACGCTACTG -GAAATTGGTTTTTGGCTGGTGTTGGAATGCTTTGTCGACTCGCTATGCCTTGCACATGCT -GCACTCTTGAGAGACATATCGAGAGATAAAGGCTGGTGTTTTCCTGCGGCATTCCACTGG -GCATTTGGTGCTGGTTTGTAATCCTGGGTCAGGGTTTGAGTGTTTTTTTTTTGCTTCCTT -CGTATATATCCTTGACTTTATGACTTATCGCTGGCGCAAAACAGGACACGGCGTTCTTGG -ATCCTGGGGTTTGGCTACAATTGTACTACAGTTTTATTCTTATCCTCTCCTCGAATCGGT -TTACTATCTTCTACATCAGTTATCCATCTTTGTCCATCGCTATACATACAGCATAGTCCG -AAGCTAACGACTGTGTATAGAAGAGAATTTGAAGTTGTATAGATACTAGATTTGATAGAT -ATAACTTGAAGGAAATACATGCACATTATCATCCATCACAATATCCATCCCATAATCCCC -attatcaacaaaaatataaatcataacacataaaAGGGTATCGCATCCAAATCCTTACAA -ACACCATCTCCCAAATCACCCCCGACTACTATATTCACTTCCCCCACCGTAAGCAAGAGG -ATCAACATGTCTAACCCCACCTCTCCTAGACCTCCTCGGCGGACGCGGCGAAATATCAGA -AGGAACACTCGACtcctcttcctcaacctcaatgacatcctcatccATCTCACCAGTCGC -AGAAGTAGCCAAAGACTCCTCAACAACAACACGATCATGTCTCGACGACGGCCGACGTCT -ATAGCTATCACGGAAAGTCTCCCGCTCCTCAGCGACTTCAATCATGTCGCGGCGTCGTCG -TCCACCCGCCGATGATCCGCGtcgactgcgcacttcgactatctcttcgtcgtccattga -catagacatcgacatcgatattgaGGATGAGGTTGGTGCTGCGACGCCGCCTCGGGCGAT -GCCACCTGGGTTCACGGCTAGGAAGAGGACGTACATTAGCAGGACGAAGCCTGCTACGCT -GCCCAGGACGATGCCCACCACTGTGCCTGCGGCGGGACTGGTGTCGATTCGTCCGTAAGT -TGAAGGGATGGAGATTGTTGTGTAACGTTTTTGGAGGGAGGTGTCTGTGTGTAGGAGGTT -TGGGTTTAGATCTCTTGTGTTGAGGTTGGGGGTGGTTGGGTCTAGGTCGAGGTCGAGGCT -GAGGCCTTCGAGAGGATGGGTTGACATGATGTCTGATTGTTTGGTTGTTGGTTTTGAGGT -TTGTAAGGATAGGTAGATGTCGAGATAATTGTGTGGGGGCTTGAATATTTCGATCCGAGC -GAATGGATGATTTGAGGTCAGCTTGCGAGCTGAGATGCGGTGGGGTTTAGATGCATATAC -GTCCGAGTGGGTTTTATTGAAAATATGGAGTACAAGCAAGATATTATTAAAAAAGGAACT -GAAAAGCGCAACGAAAGAAAACTGTACTTCGTTGATGTCCAACGCAAAAGAAAACAAGAG -GTTGGGGTGGCTGGGATCTGCGGAGAAGTGGAGTTGGAAGCCCTACTTTATGACGGACCT -ATTGGACTATATTGGGAACATTATGTTCAAAACATATCTCTTTAATTTTGAATTACTCTC -TTATGTATACTCCCATCTGAAGCTATTTATTTACTTGCCATTGAACACCTAGTCTAGTAA -GCTCCCTTACGTTTCTTGTACTTAGGCTGGCCCACAGATTTAACCTCGTCCAGCGCCGTC -AGCACAGGAGTAGTATGAATCAAGTCGAAACTCTCCAATTTAACACCATCCTCAAGTGCC -CACACGCTCAAAGCCAAGTAGTTAGTACCCCGGTAATTGAGAATGCCCTCAGGAACAGGG -TAACTCGTCTGGGGACCAATGTTGCTCACAAACTTGCCGTACTGATACCCGTTGACATAG -AACTGGGCACGGTAAGCCGGCGATGAGGTGGAGTTCCCGAAGTTGAAGTACATAGGAACA -TCCCATCCTTTCTCCACATCCAGATCGAAGCTTGTGCTGTAGAATTTGATTCCAGGCTTC -GACAGACCGTCGAAGGGACTGCCGGATTTCCAGTTCTGGGTCGGTGGCTGGGGCTGGTGG -AAGCCCTGGCGCTCAGCGTAGATACCACCCTCATTCAGGGGACCGCGTACTTGGTCCAGA -TAATCCTCGCCGCCCAGGTTGCCGGTCAGCTTCCAGGTAATGTCACTTGCGGAGCGACTG -GACAGCTTGTAGTTCAGGATTCCGCGGGGGCCCTTTAATGTGTCTTCACCGATGACCCAG -TTTCCGTCTAGACCCATGTTGTCCACAACTACGGTGAAAACGTACGACTTACCCTTCTTC -AGGGTTGGTAGCGTGTAGGTTCCGTTGTGGTTGCCGTCAGCATCAATTCCGTTCCATGAG -CCTATGTGGTCCTCGTTCAGCCATACGGAGCTGCCGAATGCGCTGCCACCTTGGGTCTGC -AGGAAGAAAGTTTTCTCCTTGCCGGTTGCGACAAAGTGGCCCCGGAAGAGAAGTGTTCCG -GCGTTGAAGCCGTAGTCAGAGCCGTACAGCGAAGTTGGGGTCTTGAGTGGGTGGACGGTG -TTTTTGGTGGAGGGCTTTGCAGAAACCCATTCGGAATCATCGTATGAAGCCTGGATCTCG -GGGAGACTGTCAATAGACTTCCACTTCAGACTCTTCAGAGTCGGAAGCTTGATCTTCGGT -GCTGTGTAGGCGACGTTGCTAGACCAGATTCCGTTCTTGTCGACCTTGACATGTGTCTTC -TTGCCGTTGATGATCAAATTCTTGGCCTTGGACGGGGCGCCAATCACCTCAATCGGGGTA -GTAGCGTTGAAATCAGCAGTGATATGTAGGTCATTGCCCTTCACATAAGCAGTTCGCACG -AGGTAACCGGCCTTGACGATGAGAGATGAGGCGGCTGCCTTTTTGGTGCTATAGCCAGGG -CTCTCGCCCTTGGTTGGGAGCTGAGGGACCCAGTAGCTATAGGCAGAGTTCCGATCTGGT -AGTTTGGTCAGTTACGATGAAGCTAGGGCAATATGTGAGGAAGCACTTACCCAGAAGGAC -AATCTTCAAATCACCTACTTGCACGATACGGCGAGTGGTGGTGACATCCCAGCCAATAAC -CACAGCCTTGCCCACCTGCTTACTGGTAATGCTCGACGATGACCCTTCAACAACCTCGGC -TTTCGACTTGGAAGAGACAGCCAACTCGTGGTGCTCACCAGGGCCGCCATATAGTACAAG -GATCTTGGATTTACCAGACGTCGTCCATGTGAATACTTCAGCAGAGGAGTAGAGAATGTT -GGTGCCCGCAACATCATAGTCAACAACATGGACCTTGGAGTCACGACCGCTGAGAGTAAG -GCTTCCACCAAGCTGGGGGATAGTCACTTGACCCGCGCTAGTAGGAACCTTGAGCTTATA -GTCGACGGACGCCCAGCTTGAGTAGTTGGAGTGTCGAACGATGAAGAAGGAAGAAGCCGT -GTCATCGCCCCCGAGCAGGGGGGTCACAATCAGATCAGGCGAGGTAGTGTACTTGGTTGT -GGTTAACTCTCCCGGTGTTAATAGAAGGTAGTCCGGTGACACTTTGGCAAAGTTGCCAAT -CAGCTTCAGCTCGCTATACTTCTCGCGGGTGATGTTGCGGGATTCAGTAATCGGGGAACC -ATAATCATAGGATGAGTAGCCACCGGGGTGGCCTATGTTACCCCAATTGGTACCACCGAA -GATCTGTTAAAGATTAAGAGTTTGTTAAATGTGACGGGAGAGTTAGAACATACCATGTAC -AGGTTAAGAATTGAAACTTTGAAGCTGAGATTGTTCTTATATATCACTCTCTCGAATTCA -TGGTTCAGGAGGGCAGCACACTTGGCGAATCCAGCACCTCCCCAGGGATCAAAAGCACCA -CTTTGGAACTATACACCAGTTAGATTGATAATCCACACGGTAAATTTTCACATATCTTAC -CTCAACTAGAGCGTATGGGGTAGAAGGACTCTGCTCCATGTGTATTTTATAGAAATCAGT -GGACAGATCCGCTGGCCAAATCGATGGATTTGTACAGTCAAAGCCGAGAGGGTAGTTGTC -ATGACCGTAGATATCCGCTGCTCCCTCGCCAGTTCCAGGGGCATTATGGCCACCGTTCCA -GGCATCATTGTTTATGATAGGCACAACAACTCCAGCATCGCGAGCCTGGTCTTGCACATA -TTTCATATAAGCTCCGTCTGGGAACTTGTCTTCGTATCCACAGCACGCGCCACTGTACTC -GTTCTCGGTTTGGACAAGAATGATAGGTCCTCCGTTGGTGATTTGACCCTTGGCTATTGT -TGCGGAGACATGAGACATGTAGCTAGTTAATCGTGTCAGTATCCACGGTTCTTCCAGACT -TTTATTCTAACACGGTCACATACTTTTCCGTTGCCTTCAGGTAGGCCGCATCACCACTTC -GCAGAGTTCCATTCACGCGCTGAAGCCATCCCGGAAAACCACCACCCGATACTTCAGCAT -TAATGTAGGGACCAGGACGAGCCAGAAGATAGATGCCAGCCTCTTTGGCAGCGTCGAAAA -ATGGCTCTAGAGCAAACGCTCCCTCGGCAGTATAGTGACCTGGGTTACCTTCCAGCAAAG -CCCAATCGACGTAGAAAGATACACAGTTGAATCCCAGAGCCTTGACTTTTTGGAGCACAT -CAATCCATAAAGAGGGAACGGGCAGACTACAGAAATCTTGGTTAGCATACTTCTTTCTAA -AAAAGAAACATGCTATTTGATGCTTTTAACTTACCGGAATGGGTGAATTTCACCACTGAA -GAGCATGATTCTCTCACCATTGACAAACAGAGACTTGTCATCCCAAGTCACCTAGACAAT -TCACATGTCAATACTATCCAAATCTAAGACCTTGGCCCAACCTACATGTTTCTGGAGCAA -TTCCCGCTTAACTGGATCAGGGTGCTCGGTGATCGTAAAGCCATTCAATGAATGGCTAAT -GGCCGCACCAGCGGCCTGGGCTGCCAGATAGGCAATTGCCCAAGAGGACGAGAGCTTCAT -GTCTTGACAATTCACACAGAAGTGAAGGAGGGTGCTAGGGAGCGGGAGAAGCACTTGGAG -TCAAGTCCGATTTTATTGCAATGTAGGGGTTGCGCCACTCATATACGGAGACTCGGGACA -TCACTAACACAAATCAAGACACCACGCAATTGATGCATTGTAGCAATGACCCATTCTCCT -AGTGTCATTTCGTCATTTACCCCAGACAAGTTTAGGAGTGTCCGCCCGGATGCTCCGTGT -TGCATATGCGGAGAAGATCCACAACGGCATGTATTTGCGTATGTTGTATTCAAGATTTCA -GAGGAAGAGAGAGCTAGAGCTGATTAAAGCAGTATAGAGGATTGGAATGGTTCTAGTGAT -GCAGATCGTATAAGAGGGTTTAATAGGGTTCCAATGATAAGAGAGCAGACCAATGGTCCT -AACTGTGTAAATTGGACCTCAATGGCATCAACAGGTTGTGTCCAAATTCACTTCAGGCAA -TTCTGGATATATTGCATATAGCCAAAATTCCCCTTGCCTCACACGGTAACCGGTCCGAAG -ACCCCTTTGCGGGTCCTATTTTTCCACATATCAGATGGCAGATGTGTCTGTGTCTGTCCA -TGGCCCTCTTTTTCGGCTTGATCACAGTTCGGTTTGTAGAGCTGTAGTTTGTAGCCTACA -AACTAAAGTAATCTTTGAAATACCAAGCCCGATACTAATATTGGCCCCATTTTAGACTAA -CAGAAGTTCAACCTAAGCAATTCTGCGGGGGAAGTTGAGGTCTGAATTTAGCCGGTGCAC -GTCCTGGTGTTTTTCCGGGGTAATGGGGGAAATGAAAGGGACCCGAAAGAGGTAAATTCA -TGGCTTTTTGATACAGCATAGATCTCCTATGGAGGTTAAAAAAACAAACAAATCATTTAC -ACAGTGGTGTTGAAACGATCTTCCTCATCGCCATCCTTAGTGTATGTCTCACGCTCATCC -ACCTCGTCTTGCGCTTCCTGTCCAACCTCGAGATTCTCTTCCATCAATGCAACCTCCTCA -GCCTCAGCGGTCGGCTCGGTCTTGCCATCCTTTTTCTGTGTAGTGTTGATCAAAGGGGTG -GAATCGTTGTAGATACGCAGCAACATGGCACGGCGCTCTTCCTTGGGAAGCATAAGGTTC -AGAGAATGGAGAGATCGCCACAAGGGCAGAACCATGGCCTCACTGTAGATAACGTTAGTA -AGATGGGAATATCACTAGAAGATATTTGAGAGAAAAAACTTACGGAAGACCGTCTTCTTC -GCCGGTCGCTTGAACGCGAAGGATGCTGCACCCCATGGGGATATCCTTCACTTGTTCGCC -AATCTCGCCCAGGGCCTTCCAGCCATCGTCATTGACCTTGGGGAACATCTCCACAAGGAG -ACGACGCAGAGTCTCCCTCTTGGTCAGGATAACGGACCGTTCAGGACCCAGCCAAGGCTC -GACAATTTTCAGGCCATCGGTCTGAACACGCCATCTGCAGACGTTCTCACGCTGTGCATC -CTGCTTAACGAACATCTTGACACCGCAGTGGACGAACTTCATGCCCTGGCCTTCATTACA -GGTGAGAATGTCACGTCCCAAGGCACTAGTGTAGTAGATTGTGCGGGTTGGAAGTCCTTC -AGCATTGCGGACCATGAAGCGGTCACGGGGGAATCGTTCCGACAACTTGTAGAATTCATA -GATGGAAAGAAGCTCCTCGTTGTTGGGATCAAGGTACTTGAATGGCTCCTCTTGACGAGG -ACCCTTCTTCTTTAAAGATTGTGTAGTAGCCGCGGAGGTAGCAGGTGTGCTGTCGCCAGG -TGTGTCAATGCCAGTTCCCACTGCAGGCGCCGGGGCGTGGATGGGACGGTCACCGACAAG -CACTTCGCTACCATCGTCGAGCTTGGTCCTCTTAGCTGGGACTTCCTCTTCGAGGTCGAA -GTTGCCTGCATCCCGCTTCGAGAGGGGGACAGAGGTCGTCGATTCATCAAGAGTAGTAGA -TATCGGGAATTGGTGGGCGGCCTCGGCAACAGTAGCATTCTTTGCGAGCTCCTTCTCAGC -ATCTTGGTCGAGAGTAACCAATTCATCAAGAGACTCGAGCTTCTCAAGGGGCTTTCCATC -TACCTCATTCTTCTTGGAATCAAGCTCCGCGGCGAGAGCTGCGACGGAAGCCTTGGGGAT -AACATTGCTGCTCTCCGGCTTGGCTTTGATTTCGGAGGTCTTCTCAAGAACTGTGATGAA -GAAACCTCCAGTGTCTTGCTGGTGAGGGTAGACACGCATGCATCGCTCAAGGGGCAGGTC -GGCAGCCTCGCCTGTCGGGGCGAACATGCCCTCTGCAAGTCTAGCAAGACCATTGATGCC -CTCTTGGTTTCGGTGATCTTCAATCTCCTGCCAGTTGTTCCACAGGCGGCCCTCGCGGTC -CATCACTTTCCAGGTCTTCAGACCGGGAGATCTCTTGAGGCCGGGCAATTCCTGGCTGCA -ATCAATGATCTTAACGTTGGCAGCGCCACCGCATCGTTCAATTGCGGAAGCAATGACAGC -CTCGTTCTCCACAGGGTTCAGACTGCAGGTTGAGTAAACGACTCGTCCGCCGACCTTCAG -CATCTGCAGAGCGCGAACCAGGATCCGAGCCTGCGTGCTGTAAAGTCCCAGAGCATTGCC -AGGAGTCCAGTCCTTCCAGACACCAACATTCTTGCGTGCGGTGCCGTCGCCAGAGCAAGG -AACATCTGCCAGGATGCGGTCAAACTTCAAGTATCTGTTCCTGGGTTTGCTGCCATCAGT -AGTGGGCAGAGGAGGCAGTTTGATGGAAGGAAACATGGTGGCATCGTGGTTGGTAACGAT -CAGGTTGGGGGAGCTTAGGCGCTTCATCTGGTGAATAAGCATGTGGGCGCGCTTGTAGTC -CGCATCGTTGGCAATCAACAGGCCGGTGCTACGGCCGTCGTCGTTCAGTCCTTCAGGGCC -GAGGGGTTCGGGTCCAGCAGTGCCGTTCTTGATCTGTTCAGTGACTTGCGCAATGGCATC -TTCCTCGCCTGCATGAAGCAGCTCCATGAGCTGCGCAGATTTGCTGCCCGGCGCAGCGCA -CATATCCAACACAGTCATTCCAGGCCTAGCGTCGATGAGAAGAGGAGGGATCATGCTGAC -AACCTCCTGGCGACTGATGTTTCCGACGTCGGTCTCGGCGACAAGGAACTTTTGGAATGA -GGCAAAAGGCGCAAATCGACGGATGACATTCTTAGGGGTGGTCATGGACCAAGCGAGCTG -ATCTGGGTACCATGAAACTGGGCGCGGGGGCTCGACAAATTCACCCTCGTATTTGATAGA -GGTAATTTCGGGAATGTGATGGTCCTTAAGACGCTGCTGAACAGCTAGCGCGTGTCTAAC -AAATATCAAACAACAGTCAATATCAATTGAAACCTTATGAGGCGCGAATCGCAAAAAAAT -ATCCCAGTTGTGACATACCCTCGAGAACCGGTGAAACGGAAGCTGTTGGGGAGATCCTTG -CGCATGGTAGCCCAGAAGATATCCTTTTCCTCCTCCGAAAGGAATTCGGGTTCATTGTAG -TAGCGTTCGAACTTCTCGTTCTCTCTGGAGATATCTTGCCATTGAGCGCGCGTACGGCCT -GCGCCGCCACCGCCGCGGCCTTTACCACCCTGGAAGAAAGGAGAGACGGTCAGTACAACA -AACACGTCGTATAACACGTATGAAGAATGAAGTTACCTTTTTGCCGCCACGTTTTCCCAT -TGTTATGATGTGTAAAGAGTTATAGGAAAGTGTACTTCAAACAAAATCGCCTTCAAAAAA -TCTCCCATCAGATGCGGGATGGTTACGTAAGCACATGATCAACCACTGCAGGTGGGCAGA -GTCGAAACATGATCCTCAGGTCCAGAGTAGGAAATCCAACAAGATATTTTTTGGCGTCAG -CGCTGTGCCGTTGGTCTGGGTACCGACGGAGCAATGCAGTAACCTCCCGCGGGTTTGGGG -TTGGGAAAGGTGGACTAGTCCATTTGAAACGGATGTACTCTGCTGACATCTCCGCGAGCC -ACCTTTCGACTCGAAAGGTGAATCTTTCGGTGTCGATTGTGTCGAAGAACTTGACTGAGG -ATACTTCTTTCTTGGTGTTGGTCCAGAGCTGTCTGAATCGCTGGATAGCTGCCTTGTCAA -ATGGGTTGATCAGCTGTGTTGGCTCGTCGCCGTTGCCAAATAGGCCCAGCTCCCGAGCTT -GTTCGATGGTGGCTCGAATACAGACAGTGTGCAGGGAGATAATGCATGCATTTGCTGAGC -AAATTACTTCCCATACCAATGACCTTGGGAGATCAGACCTATTACGGAACCGCAGAAAAG -GGTGGACGATGTCCGCACTAATGGAGACCTTTTTGCACATAATAGGAAGATTCATCAGGT -CGAGCAGGCCATCTGACAGCCTGCATCTTTGTGCGGCACTGATGAGCTCGGGAGGGGCAT -TGAAATGAACAATGTCAGTACATTTCCGCCACCATCGAGCGTCTATAATGCGATCCGGCG -CAAGTGTAATCCCTCTGGGAAGCTTGGTTTTGGCTAAAGCAACTTGGCGGGATTCCCAAC -AGACATAAGCAATGGTTGGAGGCTTTTTGTTAATGATCCATGCTCGTCTGAGGTCTTCCT -CGGGTATGATATCTGGGTCGCAGGGTTCTCCCATCTCAACGACTCGCGCTGGTAGCGCAA -ACTCCCAAATCATCTGACGCAATTCAGCTGGCAGGGTGGAGAATTGCGCCATGATGAGAG -AACCTCAGTCACTGGAATGTCAACATTAGTCAGAAGAAAACAGTAAAGAAAGGTGGTAAT -GCAAAATGACCAACAAATGAGATGATCAAGGCTACATACAAGAGATAGGAATTGAATCGA -GTTTGAATAGATGTCAGTCGTCGAAGTAAAGCACTTAGAACGTAGGGTCAAAAGTCTGCA -AGCGAACGGCGAACGACCGATTTGAAGAAGTGTGAGATGGAGGAAGCACAAGGACGTGTG -GTTGTTTAGAAAACCACAAGCGATTCACCTCGGAGACAGATGAGAAGGGCGGCAAAGGCA -AGTACGATGGTAACGGCAATACACTAAGAAGGAAACAGCCACGAATAGAACAAACAAAGG -AGATAATTGGTATGTTTGCTGCAAAGGCAGCAAACTATATAGTCTACTATGGCAAACCAT -TAATTGCTATTTCACACACGAATTTGGGATCTAACAGCCCAAGGCAATGATTGATAGTAC -ATTCAAATCAAAGCCGCCAAAGACCCAGTTTTCCTGGAGCATTCAGCAGCTATCTTCACA -CGCGATCATACTCTAAAGCAACTTAAGAGATCATGCGGGACATGTCTCCACGGATGCGCT -GGTAGTCGAGGATACCCTCGATGCTGCCGACAGCGCTCATGGCGAGGTCCTGATCCCACA -ACTTGCGGGTAGCGAAGTCCATGACGTCCTTCTCGGTGATCTGGCTGACAGTACGCTCAA -TATCCTCGGCGCTCAGACGGCGGCCAGTGGTGATGATCTGGCGGCCGATGTCCTCAGCGA -CAGCGGTGGTGCCATCGAGAGACAGGAGGATGGAAGCCTTAAGCTGGGCCTTGGCACGCT -CAACCTCGGCGGAGGTGACGTTGTTGCAAAGGCGAGACCACTCGCGGAGAGTGAAGTGGA -TCAGGTCATCGAGCTGAGTAAGGTTCTCGCTGACAAGGTAGATACCCCAGAGACTAATAT -CATGTCAGCGTGTTTTCGATTAAACATAGAAATTATGAACAAAAAAACTCTTACCCAGTG -TCGCTGTAGCTGGTGGAGAAGCTCATGAAGCTGTTGGCGAGGTTGTGGTGGCTGACGTGG -GAGCTGAGCTTGCTGCCGAGGAAGGGCGACTGGCCCATGGCACGATCCCAGTTACCGACG -ATGGCCTGGGTGACTAGAGCGGTGAAGTAGTCATCATCCTTCCAGCTAACACCCTCGACG -GCGAGAGCGATGTGAGCGGAGGGGATGGTGTCGTCGCGGAGACGGACCTCGGAGCCGATG -AACTCGGGGGTGCGCTTCTGCTCGGCAGTGAGGGCCAGGGCAGCGGAGGTAGGGGCCTTG -CTGGGGAGGGAACCGAAGTGCTCCTCGGCGAGTCTCACGAGCTGCTCGTGGGGAATACCA -CCAGCACCGACCAGGACCATGCGGTCAGCAGTGTAGTTGGTCTTGATGTAGTCGGTCAGG -TTGTCCCGGGTGATGGTCTGGATGTTCTCCTTGGGGCCGAGGATGGTGCGGCCAAGGGGC -TGACCCTGGTAAGCGGTGGCGTGCAGGTGATCGAAGACGACCTCCTCGAGCTGTTTGTCA -ACCTCCTCCTGCTCGCGCAAAATCACATCACGCTCACGCTCAATGGCACCAGCCTCGAGC -TTGGAGTTCTGAAGGATATCGGCGAGGATGTCGACGGCCTTGGGGACATCGTTGTTGAAG -GCCTTAGCGTAGTAGACGGTGTTTTCCCTCTGTAGTCGCGATCAGTATTTGGCTTGGTAG -ACGTTGGGTGGTTTCCCGGTCGTACCGAAGTGTAGGCGTTCAGGTGAGCGCCCATGTTCT -CGATCTCAAGCTCCAATTGGTGCTGGGAGCGCTTGTTGGTGCCCTAGTTATATTCATCAG -ATAAAATCGCTTTGCAGTTTGGAGTTTGTGTGGTCGAACCTTGAAGGCAAGGTGCTCAAG -GAAGTGTGCAGTTCCGTTGGTCTTGTCAGTCTCTGCCCGGCTACCGGCATCGATCCACAC -GCCGACCGTCGAGGTTTGAGCCCATGGTGAGTGCTCAGTGGCGATCTAAAAGAGAGTTAG -CGAGATTGCGATGTCATTGAGATGAGTTGTATAGTCGTACGGTGAATCCGTTGGAAAGGG -TGGTGGACTGGGTGGTAGACGGCAGAGCAACCGGCGACGCAAAGCCACGCTTCACGGGCT -GGATGGACTTCAGGGCCGCACGAGTGCGGAGAGCCTGGTTCAGGTTGAAAGCAAGGCGCC -GAGAAGCCATTGATAACGACTTGTATAGGTGGAGGGAGGGGGAAAAGGGGAGAGATGGAG -AAAAGAGCTGATGGAGACACCCTGAGGTTGAGGTTCCGAAGTCTGAAATTGTTCGCCCAG -TTCAGCCAATTACATAACCCGCTTTTCCAAACCGCCCTTGACATCCCGAATACTTCCTTA -CTCTGCCTTAAATCAGTGACGTGGGCAAGAATACATTCTAAATGGGTTTAGATGTTGACT -ATATTGATCCTTACAATGTTCAAGTACGATTTGCTTGATGCAGAGGTATCCCGATACGCC -TCAATTTTCGATCATTCAATTTCGGTCATCCAATTTCGTGCCTATTGATTGGCTACATCC -GCTTCTCATACAAACCGGCTTGCAAATTGTTTAATATTCTCCTCCATCGACACATCCGCT -GAAAAGCCCAATTTGAAGGCGCGTGCAGTATCGAATCGAGGAGTCCATGATTGCACAATT -TTGTCGGTAGCAGCATCATACTTCTCCTCCACGAGATCCCGACGCTCTTTACCACCAACT -TCCAGCAAAGCATCCAACATCTGCTGAATACTGACCTTCAACCCAGGAAGGTTAACAGAT -CTTGACTCTCCGAATGACTCTTTGGGCACATCGCGTGCATGAATCAAGTTCTTGATAACA -GTATGCGGCGAGCAAATCCACATCTCTGTTTCCTTGGCCACAGGCAGAATAGCCTTCTTA -CCGTGGAAGGGCTCGCGGATGATATCACTGGCGAAACTACTGGCAGCCTGTGTGGGCCTG -CCAGCTCTCACTGTTACCGTGGGCAGACGCACACACCGGCCATCAAGGAACCCGCGCCGC -GAGTAGTCATTCAGCAGTGTCTCGATAATGAGTTTCTGTGATCCATATGAGGAGGATGGC -ATAGGTGGGAAGTTTGTTTCGTCAATGACGAACCCTGCTGGTGCAGGGCCGTAAACTGCC -AGCGATGATGTGAAGACCACCTTCGCTCCAGGCATGGTCGCGCGTAGCCGGTCCAGAATG -TATCGTGTTGCGTCGAGATTGACACGGATGCCCAGCTCAAAGTTGGCCTCGGAGCCGCCA -GACATGATACCATGGAGGATGTAGACTGTCTCGTAGGAATTGGATGGGCTGATCAGTTCG -TCGACCTCTTTGGTGGAGGTGAGATCTGCCTTGACACAGCGGAGGCGGCTTGCGTGCTGT -GCAGCTGACTCTGGGATCTCGGGCTCAACGATATCTGTGATTGTCACGGTCGTCGTGGCG -GGTGTTCTTTCGAGGAGGGAAGCCGCCAGTTCCTGGCCGACATAACCCCCTGCACCGGTG -ATGAGAATCGACATGATGGGTATACGGTGAAGTAATTTGGACAAGTAGGCCAAAGTCTGG -TAAGTGGCAAGAGGATGGTACAATCAAATTGGAGGATATTCAACTACGGAGAACGGAAAA -TGGACGTATGACCATTATATAGTAACACACCTTGCGGGGTTGAAGCGGAACTTGTCTCTC -CCCCCACGCTCGTTCAGTCATGGCAATCCCCCAATTAGCCAATGGTGTTAATATATCCCA -CGAAGATGATCACTTGGTCCAAGTCTCACGCATCAATTCCGAGCCCTTTTCTTGTTGCAA -GCCTCGGTGAGAAATCCGATTACCGGGAGGCCCGGCGCAAGTCCGAGAATGGGACTGGGA -CAGATGACCTGGGGAAATGTGCGAGACGGTTCTCCTTTTTAGAAGACAGTAATGCTTCGC -AAAATATTCAAGGGTCTTTGTTAGGGTGTTTTGGTATACTAAGTATCGGTCGACCACACT -TGCTTCGAACCCTCCAATGCTAGACGCTACATCTAGAACAATTTCTCACATGTTGTCGAC -TAAATGTTTACAAGTCTCAATCGCTTGTATTGGCCACTGATTGCTACCTAGGTACGGATA -TCATACGCTAACCAGAGCTATCTCATCGCCTCAGATCAAGATACTCAGTATGACACATTC -CAGCGTGTAAAATTCATCGAAAAGGAAAACTAAATTTTAAGACGCAGCCAAAAGTACATA -GGCGGAAATACCTGTCACAAGAAAGGTGGAAAACATTCGATACATCACAGAACACGAGAC -GCCGAGGGAACATCAAAAGGGAATGGACATGACCAGAGATTAAGACCTTTATGCGGTACG -CTTGTAGATACCAGCGACGATGCAGTGGTCACGCTCGAAAGGCTCGAGAGTCAGCTGCTC -CTTGGGCTTAATCTTCTCCTCGCGCATCTTCTGGACCTCCTTGGCGAACACAACCTCGGG -CTTGGCTGTGCTGTCAATACAGTTGGCCTTGATGGAGACAATGACACCACCCTCGTTCTT -GAGGAACATGTGAGCGTTCAATCCGACAATACGGGCCTGATCGGGCTGGGCAACATCGGC -GAAGATGACATCGACCATGGGCACGAGCATACGGTAACGGAGAGGGTGACGGGCATCCTC -AACGATGGGGATGACGTTGGTACGGTGAGTGGCCATGCCGATCAGATCACGACCGGAGCG -GTGAGAGAACTCGACGGCGTAGACGTTACCAGTGGGTCCGACAATATCAGCAACGTGAGA -AACGGAGGTACCGGAGGCACCACCGATGTAGAGAACCTTGGAGCCGGGCTTCATGTAGAT -GTCATCGAGACCACCCAACACACCAGCAGCCAACTAGAAGATGGGGAAAGACGTTAGAAA -AGAAGGACTAAGCAACAGGCGGTCATAGCTATCGGGGCGATATCAAGGAATTCCGAGAAA -AATTCGATAAAATCGACAACGATGCCATGTATCCTTAGCCCAGGAGAAGGGGATATTTAC -CTTAGAACGGAAGGGGTTCCAGACACGGTACTCGTTCTTAGTGACAGCACCATCCTCAGC -AGGGGCGCCCTCGACGGAGATACGCTTCTCGCCGTAGACAGCAGTTCCAGGTGTAAGGTT -CTTTGTCACAAGCATGTCCTCCTTACCGCCGCGAGCGACGAAGATACCGGCGTGACGGTG -AGGCTCCTGTTCGCAGGGAAAAAAAGATTCCGTCAGTTGCTGATATCCACATCCATGGGT -AATTTCAACCACTTACAATGACAACCTTGGCACCACCACGAGCACCGCCGCCACCGCGAG -GAGCACCACGACCACCACGGGGAGCACCACGGCCACCACGGGGAGCACCGCGACCACCTC -CACGGCCGCCACGGTCGCCAAATCCACCACGGCCACCTCCACGGCCGCCAAAGCCACCAC -GGCCACCACCGCGGCCACCACGGTCGCCGAAGCCACCTTTGTCAAGTCAGCGAGGTGAAC -TGTAATATCGCAACAAGAAAGAGAATCATCGCCATTGGTGCAATAGAATCACCAGCGAGA -TTTTTGGCCTCGTTGCTTCGAATGGACTGGACTGAACAAACCTCGACCACCACCACGGCC -ACCGCGGTCACCACCAAATCCACCACGTCCACGAGGAGCGAAAGACATTGTATATGTTTT -GAGAGTAAAAACTCGGTTGTAAAGGGAGTAAAATGGGGGGGAGGAAGGTTAAGCTGTCGT -TTCCGAGTGCTTAAACCGAGTAGTTGAAAATTGGAATAAAAGTTCCGAGAAAGTCGATAC -AGATGGAGAAAAAAGTGACAACGTATCAGAGATAGTTGGATCAAGCAGTTCAAACAGAAT -TGGCCGCTGACTATCTGTCGGATGCGGATCGGTACCGAAGCTGCTCGTTGATGCGGTCGA -GATTTTTTTTGCGCCTTTGGGATCTTTCAGAAAATCAGAACGGCGCTAGGTTGTTCCGGG -TTTAGCGCCTTTTTCGTGTGAGAGCGTCACGTGGATTGCCTCCATAATATGGGTGTCACG -TGGCTCGTCGAGCCCTGTATTGTTTGAGATGAAAGACTGCGATAGAAAATGTCAAACATT -GCCGTCGGTTCTTTGATGATGGTTTTCCAGGGCATTGATGTTTCGATGAAAGTATGTAAT -GGTCAACAAAAAGATTTGTCAGGTCTCGGCACTTGGTGTGCTCCTTCCTGTCAAACTAGC -CCTACAGTGGGCGGGAGGGAACGCTACGTACGACCGATGACGATAAAGAACATCAATTGA -TTCATTGGCCATACCGTATCATAGGCGGTCATGAAATAAAATAATAATGACAAAATAAAA -CAGAGCCCGCCCCGCATGTAGAAAATTCATGTCCTCCAAGCTTGCCTTCTGTCTACGGGT -ATATACAAGAATAACAAAACCGATCAACTACTCCCCAAAGAAGTAGTAGTTTGCGGCATA -TTGTATTTCAAATGCGGGTGGCATGCCCTTGCAGGTCTTTGGTGGTGACCCAGCAGCAGT -GTTGACTCGATAGACACTCTTGTAGTTGCCCACCGTGGCATCCGTAGTAGTGAGATAGAG -CCAAGGCACAGCGCCGTACTTTCCCGCCACTGAACCTGACGGTGCATCGATTGCGCCTGC -CTTCTTAGTCATGACAATTCCATCCTGCTTTTGGGGTGTGGTGTTCAGGTTGAACTCGGG -GGTTGTTGCATCAAAGAAGAAATGGTGACCCATCAGCTCGATGTTGGCAGGAGGAAACGA -GGCATATTCGTTGGTTGGTAGGGCGGTCCTGTAGGCAAGGTCTGGGAGCTTCCCCAGGAG -ATCGGGGTAGTTGGCTGCGATGCATGTGGCATTGTAGAGTCTTGCAACAGCCCCTACCGC -CGCGGGTGCGGATTTAGAAGTAGAGTCCGCACATGTATAATTCTAGTTTTCTGATGAGTG -GCGTAATTAACGGGATATTCTCAATAGTCTGTCATACCTGGGTACCCCGTCCAAGTGCAA -CATATCTCGGCTTTAGGCCTGACGGGCTGGCCATCCCTGAAGCATAGGAGGGCATAGCAA -TCTTGGAGACGTCGCAAGTCGATGCTCTAATGTTGTGCTTTGCGGAGTTGATATACTGGC -TCACTTTTCCATAAAATTCTGCCAGTTCATCGTTCCAGTCATATGCACTGTCAAGGAAGG -TTGTCGGGGCAGCAAGTGATATTGCTGCCAGGCAGATGATGAGAGGAGCAAAGCGCATTT -TCACTATGTGCTTATGCTAGCAGGCAGAGAAATAGAAGGGCGCGGCAAATGGCGGTGACG -TTTTTGATTGATCAAGGATTGTCCAGCAAGCCTTTGATGTAGGTCGGGTATAAAGAACAG -AGGGGAGTAGCGTTAAAGAAAGGAATGACAAGGGAGATATTGGTATAAGAAATTGGCGGC -CGGGCCGAGGGGCAGATATAGATGCGGTGGTAATTGCAATCATCCTCCAATTCATACATA -TATCAAAGATAGTAACACAGGTCCGACTCGAAACACTGCTGTGTCTATTAACAGAGCGAT -GGATGCAACCTGCAGGTACTAGCAGGGATTAAAGCTCGACCACGGGCTCTCATTGGTGGA -TGGACGCGCCCAGGTCAGACCCTCCCAGCCTCTTTCTCTTCTGATTTGTCGCCTGGTCTA -AATAACTTTGACTCGATGGAGATAGCAACGCGCTAAGCAAAACCACGGGGTATTCCCTTG -GCTGTTCAGTTGTAGAAGCTTGGACCGAGCCAATAATAATGTGATGAGATCTATAAATTC -CCCGAGTCTATAAATAAGTCTGACTGAAGAACAAGTGCCATGTGTGTTGAGCGGTAGGAT -ATGTAAAATCATTGCCGATATAGGCTAGTGTGCGATGCAACGGTGCCTGGGTCCTATCCT -AGGATTTGTGCAGAACTCGGGGAAATTGCAGGCATTACCTACATAAGGCCCCGGCACTGA -TCACAGATGTTTTTTTATATAATCTTCAATTTGTGCGATCCAACTGACAGAATGAATTGC -AGAGGTTGACATGGCGGGGTTTGGCTTTCCTTCCGTATTTTCCCCTTTCTGAGTTCACGT -TACAGTGGACTGGCGCCTGGATAACGCCATCTTCCACATAAGGTTCCTCTAAGGCTATTA -TTGTAGCTCCTATTAATATTTATGTGATTGCTGACTGCTTCACGATCAACTGTCGGCTTT -TTCGTCTGCGTTCGACTTGGGGCGATCCACCGGTGAGATTAATATATATCTATACACGGA -TAGATCCCGAGTCAGCGTCAATCCCACGGGTACGGAGTACCTACTTGATGTATCTAGATG -TAATCCTGAACGACTATCACCAAAAATAGAACCATAGAAACTATCCAGGCGAATGTTTTG -ATACCCTGAGCATACTAAATCTATACATAGTGTCTTGCAAACATGGTACCCCACAAGAAG -CCTACGTCCTATAAGCTTATTAGGTGAGGTCCCGCGTTCAATCTCGGCTCGGACCTCATT -TCTTATCTTTTGTGCTCATTCTGTGACTTTTTGTCTTTTTATATCTCCAATTGACCAATG -AAAGAGCTTTTAATATCTGACAGGTCAACCCAATGACGTGCCGGGGTCAGGGTGTTGATG -GATTAGTCGACTATGAAAAACAAGCTTGGAAAATTCACAACTTCGCTTAGAATAAGGTCT -AGGAAGATGCCTAGAGCCAAGTCTCATGGCATTGGGTAAACCTATGGCCCAGAGCTTCAC -CCGAACCCTATCTTGGAAAGGTCCGACATCTACGTTCATCTCACTGATATTGCAGACACA -TGATATTTGTCGGGAATCAAATTTACACAATGCAGATCCACATCCAAGAATAGTACAACT -GAAGAGACTTAACCCAAAACGCCAGGACTTATGATCAGTCGTGAGGAGAAAAGATAAAAA -TAAAACTCCGGAGCTCTTCGCTGTGTGAACATCAAAACTTGGAACATTAGTCGAAACCTT -CCTGCTGCCAGTCATCACCGTGCAGCTGGCCCCCACTCCCCGTGCTTCCTCCACTGCTAC -CGTAGGCAGCAGCGGCACTACCGACGGCGCCCATTGCGCTATGCTTCTCGATCTCTGTTG -TGCTCATCATGTAGAGCACAGTAAAAAGCAGAAGATGAAGTGCAAAGCAGTAACCAGCGA -ACAGGTTCCGACTCGTCCGATTGGCAAGAATGACTCGAGTGAGAGAGAATACAATTCGCT -CGGGTAGGCTCATGCGTTTGTATGCGCGCGTTGATTCCCGGCCCCGGAATGCGGCAAACG -GTGAGATTCGAGCCTCGTAGGCATTCTGATATCGGTCAAGGGACAGACCCGAAGGCGTGT -CTGCAGAGGTATAGACAGATGTTGAGCTTGGGGCATTTGCAAACGATGATGCAGATGTGG -AGGCACCCTGACCACGACTGTAGGTCGAGACATATCGTGTCTTTTCGTAAAGACTCAGGT -TGTCCTTCTGGAGCGATGCAACTTCCTGTCGGAGGGACTTGACAGTGGCATATAGCTTTG -ACAGCTCTTCTTCAAGCTCGGCATTCTTCTTCTTGAAGCGGTCTCGTTGGGCCTGGATCA -TAGGCAGGAGGCCGGAACCGCCGCCAACGGCTTCTCCTGCACGAATAGATTCCATGGTGT -TTGCCGACACGGCACTCTGATCAAAGCCAGAGATGATGGATGAGGTGGGCGATGATGCGC -CCCGACGGGCAGAGTGGGGGTACCTGGAGTTGTAGGTTCCTGCAATAGACATCGCCGAAG -ATGGGAATGTATTGGCGGCCTCCTCTTGCACGCGAAGGAGGTCGTTTTCAAGAGTAGTTG -ATAGGGTTCGCGACTTTTCTAGCTCTCCCTTCGAACTCGAAAGCTCATTTCGCAGTGTTT -CGAGTTGATCTTGAATATCGCGATGCGACACTCGCAGAAGGGTGAGGTCATCCGTCAATT -TCTTATTTCGGGCCATAAGGAGTTGCTCCAGGGAATTCTTGTCCTTAGATTTGACCTCGT -CACCATTGGTACCCGCCGCAGCACCGTCGCCAGCACCGTCCGCAGCCTCATCATCATCAC -CAGCTGCAAATTCGATCGACCTGATCATCTCCAGCTCCCGGCGGATCTCATCGTAGTCGG -CTACCTTTGCTAAACGCACACGTAACTCTTCCCTTTCCACCGCTGCCTTTGAAGCTGCTC -GTTCAGATTGAAGCAGTTTAGCTTCCCAGGTATGCCGTTCCGAGTCTTTGTCGAATCGTG -CAGCATCCAGCTTCCGAAGCAAGGAAGAGTTCTCTGACTGGAGTCGCAGATAATCAGGGT -CATCCTCAACAGAAACGGGACCAGATGCATGTGATGCCGCCTGAGCCAGCTCCAGCCGCA -TTTGTTCATTGCGTGCTTCCACATCAGCCAATCTCAAGCTTGTTTTCTCTAAATCGGCAG -AAACAAGCTCGAGCTCAGCTGCAGTAGCGCCACTTCGCGAACCTTCACCATCATCACCAT -GATTCTCACCCTCACCCAGGCGTTGGGAGACCTCATAACTTGCTTTGAGCTCCTTGATCA -ACCGATCCTGATTTTCCACTTTCTCTTCCAAAGTCTTCTCCTTTGCTGCCCAGTTATTTG -TCTTTTCGGTCAAGACAGCTTCCCACGACGCTTCGATCTCCTTGGCTCTTGATTCCTGGT -TTTCCTCTAGCTTCTTTCGTGCAGCGCGTTCCTCCTGCAGGCGCTTCTCGGTGTCCTCTT -GTTGCCTGGTGAGGCGATCCACTGACTTCTGCAATTGGTCTCGTTCTGAGGTAAGTTTGG -GAACTGTATCTTCGGACAGGACAAGGGAGTCGACAGAAGCTTCGAGGAGTGGGTATGGGT -CTGGAGCCTCGGACAGTGAAGAATATAGTTGTAAGAATGAAGACGATGATGTTTTGCCTT -GATTGGTCAGGAGATCGATGAAGGATTGATAGGATTTCAGCAGTGCCTTGTATTCTGAAA -GCTTAGATTCATCATCTAGCTTCTTGTAGTCTTTTGTTTTCTGCGCCAGCTCTTTTCTTT -GCACAAGTGCGACACGCTGTTGTTCAACGAGATCCGTAGCGGTTGTATCGAGATTCGCGA -TAGTATTGGCCAAATCAATTCCTATTACACGTCAGTAAGACCCGGCAACAATCAAGATAA -TAATTACCTCTCCAGGCTGCAATGGCGCGTTGGAACTTGTTGGTCTCTTCGGTCAATTTG -CTGGACATGAGTGATTCAGGCTGGTCATCATGTTGGTCCTGAGGAAGCAAACCCTCTGTA -ATGGCAAATGAATCCATTGTCGGTCCTAGCCCCAGGCGTCTGTACTATAACCCCACATCA -AATAGCAGTCCACCAAGGGGAAAGATGGAGATCTGTTATCGCAAGGAACAGGACACGCGG -CGTGCGGGCCGATATTGTTCCGACTGTATGTGTACAACACACGATCTGGAGTTGTGGTCG -TTGGATACAAGTTCGCGACAAATGTGAATGTTGAAGACGAGACAAGATCTAGTGAGCTTC -AACACGCAGAGTGTGGAATTATTGTATTGTTTCAAGATGATAGAAATTGGCCCGAGGCAC -CCGTTTACATTAGTACAATGTATAGCAGATGGCAGGAGATAACGACCCCTTTTAGATTGG -GGTGACCGCTTTACTGGCCGCCTTTTCTATCTTAGTGTGATGTCACGTTTAAAGCTCCGT -GTTGTAAATGGTAACTCCAAAAAAAATGAAAGAGATCAAGGCTCAGAGTAGTTTGAGCCC -CGGTAGTTCAGTCTTGATAATAATGTATAAAAGTCAGAATTGTACTTTGATAAGGTGTCT -GTAGATCATATCAGATTCTGCCAGTCAAATACCATTCATGCCGTCGTTGTCTTCCGCCAT -CAACCGCGGCCCCCTGAGCCAAGCTACTGACCGCCCTCACATCGTCACTCACCAAGCTTG -CGGGGAGATGCCTGAAACTACAGCCTCATGATGGCTTCCGACGATGGAATTGGTCCCCGC -AAGAGGCGCAAGATCAGTCCGCCAGGAACTGTACCTTATGTGTTACATTCTCTCTTCAAG -GATGTGCCACTAGCGACCGAGGATAGCTCCGACGTATACATTACCTGTGTTGAATACTGG -AGTAAGCTTTATCTGTCTTGGTCGTAGGGGCTGCCTGCTCGATGGCTCCCAGACTGACTA -ACATTGATGATGGACAGATGAGAATTTATATATCGGCACCTCTGCAGCCGAGATCTTGCA -CTTCGTATGCCTCCCGCCAGCTTCCCCAGACGAAGCCGCCGAGCCTACCTTTATATTGGC -ATCACGATTACCGATCCTATTCACCCAGAGCCCTCCAGCAGGAAGTGACCAAGAAGGTAT -TCGACAAATAGTTGTCTTATCCTCAGTAAACAAGGCATGTGTCCTTTGCAATGGCACTGT -TACGTTCTACATGCTGCCGGAGCTCAGCCCGGCTTTTGGAGCGACAAAGGTCAATCACTG -CCGCTGGATCGGTGGTTTGGATCTCAACTGGGATGCGGAGGAAGGGGAGGATCCGACGAT -CATGATTGCTGTGCAGAACAGGATCATGTTGGTCCAGATTGGGGACGAGGCTCGACGGAT -CAAGAAGATCGAGTTCCCCGGTTGCTTAACTGCGGCGCGACGAGGAACGATCGCATGCGC -TGCGGATGCACACTCATACTCATTACTTGAAGTGGCGCATCAACAAAAGATCCCGCTTTT -CCCAATATCCTCTTCGAATGAGGTCTTTGAGTCGGGACATGTTGAAGATATGAACCCAGC -GCCGCGCACTCCGCTCAAACGTTCACCTTCTTCTTCCTACCCTACCTCTCCACCGAGTGA -TGCTTCAGGACATGGCAGAAGCACTAGTTTAAACACCTTTGTGGGAATGCTCTCACCACA -TGCACAAGCGGCTCAGCTAGATAGATCGCCATCAGGGACACCAGATCCTTTCACATCTAC -TGGAGAACCAAGACGATCAAGCTCGGAGGAACGAGAGGGGACAAGCTCTCCCAAGCCACC -CAGTGACCAAGGGCCAGACAATACGTCTGCCACTAATGACGGCTTGAAGCCCTTGCCCCC -TCTGCCTCCGCCATCAAAGCAAGGCTCGAAGCGCCTCCAGCCTCATGTGGTATCTCCGAC -GCCCTCCGAGTTTTTACTTGTGACCGGAACAGAGGAAACAGAACCGGGCGTCGGAATGTT -CGTCGACATGGATGGTGAAGTTGTTCGGGGTACAATTAATTTTCACAGATTCCCCAAGTC -GGTTGTGATTGATACAAACGAGCATGATGAAATGTTTCAGACTAGTGACGATGCTAGAGA -GGAGTTTGTTCTCGCTGTTATCGAAGATGAAGACGAGGACGCTGGAAAGTGTCGAACAAG -ACTCGAAATTCAACGTTGGGACGATGACCCAGGGGAGATTGAGCGGACCAAGAGCTGGCT -AGAGATTCCATCTCCTGCAGAGACACAATATATTCAAGTTGGCCTCCGACATACCCTCAG -CCCTAGCAATCTTGAGCTCAATGAGATAGGACAGCTCTTGCGGATGGTCCGCCTCAGAAA -CCCTACGCTACCCCCGCATGTATCAATAACCGACTCAAGAACTCAGGCATCCATTGAGCA -CTCCCAAAAGGAGAAGGAGCTGTTCGATCCGCAGGAATTGACCGACTCGGATGGATCCAA -GAAGAGCGAAGATTCAGCAAGCCAGGGCTGGGAGGCTCAACGCGACGCAGAAGAAGCAAA -GTTTGCACATGGATTGGGCAAAGTCCAGAGCAGCCTCGTCGTGTGGTCTGGATCTCAAAT -ATGGCGCGTGCTGCAGAATCCATTGATAGTTCAGTTGGAGGCCAAATTGCAGAGCGCCCA -AGAGACGGACAAGGACGAGCACACGGTGCTGCATAGAGATACCATTGTGGATTTGCTGCT -CTCCATCCAGGATACCGAGCCAAAGACAGAGGCAGAATTCATTGGATTGAGCTATGTGAA -ACAAAAAGCCAGTCTAATGCTTTACGGAGATCTTCTTTCCATGCAGCCAGACGACCACAA -TACGGCAGCAATCGATGGCGCTGAAAAAGCGCTGCTGGCAGGTAACCTCGATCCACGTTT -GGCATTGCTTTTTATTCCTCTGTTGCGATGTGAGGTTTTGCAGGGGCCTCAGGGTATATG -GCTACACGCTGGGCTGGCAAGCATGGCGGACAAATATCTCGAACAGGTCGGAAAGATAGG -CGTTGAATCCTCTGGATTGGATGCTTCTCGTAGCCCTGTTCTCAATATGCTCAAACGGTT -CCTTCTCTCCTGGCAACAGAAGCGGGGGTATGGCAGTATAACCGACGAGACATATGTTTT -AGATAGCACCGATGCCGCGCTGTTGCACCTGACTTTGGAACAGGATGCCTACCTGACACG -AGATCAGCGTGTAGCGTCGCCAATTCGTCCTGAGTTGAATAGGCTGGTCGACAATTGGAA -AGGCAATTTCGATCGAGCAGTGATGCTACTGGAGACATACAAGCGACTCTATGTGCTAAG -CCGTTTGTATCAAAGCCAGAAGATGTCTCGGAATGTTCTCAAAACCTGGCGACGCATCGT -TGATGGGGAGACTGACGCTGGCGGAGAGGTTTCCGCGCATGGCGTGGAGACCCAGATGCG -CAGGTATTTGGTCAAGATTAAGGACGTGCAGCTGGTTGAAGAATATGGCTCTTGGCTAGC -TGAGCGAAACCCCAATCTGGGCATCCAGGTATTCGCAGACAATGCTAGCCGCGTTAGACT -GGAACCGGCGGATGTTGTTGCTTTACTCAAGGAGCGAGCCCCAAACGCGGTCCAAGTCTA -CCTAGAGCACCTGGTATTTGCAAAGAATGTGAGAGGACCTGTTGTCTTGGTTCATGAACA -TCTAACTAACTTGTCAATACAGCTCACGCAATATGCCGATGACCTGATCTCATACTACTT -GGACACAGTACTTTCCGTGCTCGAGTCATCTCCCGAAGCGCGAGCTTCTTTGGCCGAGTC -CTATTCGACCTATCGAGCCCTGCGTGCCCCCAAACCGACATACCTCAATTTTATAACCGA -GAATACCCCCGCAGAGCCCTGGTGGCAGTCCCGACTCCGATTACTTCAGCTACTGGGTGG -AATCTCCAGCTCCCAATTCTCGTCACAACCCCTCCCAACAGGGATCAGCTACTCAATCCC -CAATGTTCTCACCCGCATCGAGCCATTCCAGAATGAGCTAGTCTCGGAATGCATTATCCT -TGACGGCCTACAGGGCCACCACGGCCCGGCCCTCCGTCTCCTCACACACGGGCTAGGCGA -CTACGACTCTGCAATCCGCTACTGCCTCTTCGGTGGTCCGCGCAGCTCCTCTTCCTCTGC -AACAGGTAGTCCGCCAGAACTAGCAGACCATACCCTACAGTCAACGCTCTTCCGCCACCT -ACTCGACGAGTTCCTGCACATCGAGGACCTGTCAGACCGCATCGAGCGCACAAGCGATCT -CCTCGCGCGTTTCGCGGCTTGGTTTGATGTCCGCGAAGTCCTCGATATTGTTCCTGAGGA -ATGGAGCGTTGATATTCTCGGCGGTTTCTTCGTGCACGTTTTCCGCACCCTTGTGTCGCA -GAATCGCGAGACCCGCATTGAGCGTGCCCTTAGCGCAGGTCTGAATCTGCGCATTGGTGC -GGAGTATATCGATGGCATGGAGAAGGTTGGGCCTTGGGTTGAGGAGGCTGAGGGCGTAAG -AAGGTTGAAGGGTGCAGGGCCGCGGCAGGTTTCCTTGCCAACCGATGGCCCTGTTGATGA -TGCCTCTGATGACTCCAGTGAGTTTGGAGATACGGTGGGGCCTACATCTGGGGAAGTGGG -CCAGTAGATTGATGATCAGGGGGAGTGTACAGGATAGTATATGGATATGTTATGTTGGCT -TTTTGAAACTATGATACCTACGCATTTTGCAGTTTGCTTGGATGCCTTTCGTGTTGACTT -TGTTATTTTTTTCTGTTTATATCCTAGTTTTGGGTTTGGCGCTATGTATCATGAGCCGAT -CCCCAGATCCATCATCTCCTTCTATGGAAAGAAAAAGATAGAAGACAGAACTGCACAACA -GCAGCAGTCCTATACACATTCCAAGTCGACAGAAAAGAAAAAAAGCGAAACCATCATATT -GCGTTCGTTCCCTTCGTAGCTACTGATGAGCGTATGTTCCGGCGCACGCCGAAAGCCAGT -GGGACATTAAGGTGTGTAGTGTTTTAAAGCTCCAACCAGACCTCGGACGGCCATTTCCTG -AAATGACATCCAGACCCGGAATGGAACCCGGAAAGAAGATAATACGTCAGAAAGAGAGCT -TTTTGTTTACTCTTCTGCCTGCAAGAAAGGGTTGGGGATCTCCTCACTGCCGTCGGCGGG -GGAGATCAGTACGATCAAAGTGCCACGGGCGACGATCAGACCCATTGAGCGGGTGGTAGT -GTTACCCTCCTCATCTGTGGAAGTTAGTATATGAGGAGAGATAGAGAGATTAGATTCAAG -TAAGATACTTGAAACTCACCACGCATAGTCTCCTTGACGTCGTCGAGGACCAGGTTCATC -AGCTGGTCGTAACCCTTGAGGATACCGGTGACTGTTGTTATAGTTAGTATGTATAGTTCA -TAGATGGTAATGAGAATTTCCAGATCAAGAGCAAAAGATTTAAGAATAAAAATAGAAATG -TGAGAATAGATGAGAAAATATCAATCCAACATACCCTCACGACCGCCATTGAACTTCACT -TGGACCTCCTTGTCCATGTACTTGTTGAGATCCAAGATGTTTTCCTTTTTGGGCTTCTCC -TGTGCACCGCCACCGCCACTTTTCTGCTGGCCCTGGCCGCCGCGGCCACCACGACCGCGA -CCGCCTCGGAATGAGCCTCGTTCAGACATAGCCTGTGTGTAATTGAGGGATTGAAGAAAT -GGAAGAAAGGTATTAAAGGAAAGGGAGGTATTGTATAGACTGCGGACAGACTTCGGGGTA -TCTCTGTATCTCTGTACGGAGTATCTTGTAGGGAACAGCGGAAGGTACAAGCAAGGTGGG -CGCGTGTATGAGGGATTTTTAGTGGTTGGCCTTAGGCAGGGAAACTGGATCTCCGCCTTG -GTATGGAGATTACCAATTACAGCTGTATTATTGCATCTCACAGTCACGAGGGATCCCGCT -CGGAGGTATTTTACAAGTAGATTACGGGTATCTTCATCTCTCATCTAAACATTCAATGTC -GGAACATTTGACCTTCTCAATCACTTTGCACATTACCAGTTATTTATGTATATTTGTGTA -TATATTTGCAATTCTTTTCAAGTAATCCGATCCTAGACTAGATCGTTATCAGGATTTCCT -CCGAGTTCCCGGGGACAAAATGTTTCAGCCCGCTTGCCCAGGCATCAGCGAAAAAAAAGA -TATCCACAAGCCTCAAGGCTTGATTTTGTCGATCGACAGATATGAGCGTATGTTTTATAT -AGAATGATCTGCGACTATTGTTACGTAGACTTGCTTCTGCATAGATATGCAAGACTGTGT -AGGCAAGGCGTTACTGTTGTACACCCATTAACCAGCGTATCGTAGGCCAGCGGAACCCTC -CGGCGGGACTCCGCTGAGGGTTCCAAGGTGACCGGCTTTGCGGGAAGAGTCTCATATGTC -CAAGTTGCAATCGGAATTTCTAATGTAACACTGTGTGCTAATTATAAAGAGagaaaaaga -aaacaaaaagaccaagaaaaaaaaaacaaTCGAATTCTTCAAGTCCCCGGGACCCCTAAC -GGGGACACAGGTCACGTTTTTCCTCTCCACTCCCCGTGCGCTGTTCCGTCCACTCTGTAG -GCGCCCTACACTTTCCTCTTTTTTGCACCTTCATGGATGTTGCTTGAGTCTTATACAAGG -AGCGTGCCGGTTTCTCTCCGCGCAAGTCCGGTTCACCTCCGCGCAAGTTTCATACTCCGC -GCTCTTATCTGACTCCGCGACCTACTCCCGATTTCCTCCTCTCTTTCCACCCTAATATCC -CAAACTCCTCCAGGGTGTAAATAGGGGTTTATCCTATTCCTGCTCACAATGGAATCCCAC -GCTCAAACAGCGGAGTCGTCTCATAAACGGCCTCGGTCTCCCACCGCCGATCACGGCATG -TCGAAAGTGCAAAAGACGCACTCAAACCACCTGCAAATAAACTATCTCGCCCGGCAATAC -CCAGACAACCTACCGCTCGTCTCCGTCGATGATACCATGCCTGCGATCATCCATCTGCTT -GGTGAGTACGACGGCGTTCTACATCGCCACGAGAGCATCGCCGGAAATTTGGGCGCATGC -CCGCTCGGTCCGATCCTGATCAAGCGCTTCGAGCGTCTGTTCGATGGACCGCCGCAAGTG -CTGAAATCACATGGCAAAGACGGACCTACGGTGACTTGGCTCGATGTGGTGGAATTCGCG -AAGAATAAGCCCGAGCAGTTCAATCTGGAGAAGTCGCGCAACGGCGTGCGAGTCTGCCAG -TTCTATACCAAACAGTGTCGCGTGGAAATCAGTGAGGAGGATTTCGTTTTGATTGCTTCC -GGCATGCCACAGAAGATGATCCCGCCCCAGCCTATCATCGAGGATGAAGAAAAGGAGCTG -GGCGCGCTGGAGATTCTTGAAAAGAACTTGCATCATATCATTCAGATGGCCGATCAAGGT -AAGGGAACCACCATACTCATATGTCGACTCGGGAAGTGTTCACTAACAATATTTTTGTGT -TTAGCCTCTGCTCGAGCGAGACAGCTTAACCATCGATTGAAGAATCGCCGCAATGCTATT -GTCACCCGCCGAGAGAACGATGCTTCTCTACATGCGCAATCTCGAAATGTCACCGAAATC -TGGCGCGATGCCAACAACAACAGCGGCCCAGGAAACGGCCATGGATCACCTCACCCTTCT -CCGTCTGGTTTCGTCGCTGTCAACTCTCATCGGCCTGAAGGCGAGCACACCGAGGAGAAC -CCTCTATCCTCCCAATTCATGTTCTCACACACCAACACCGATAATGTCACCATGATCAAT -GGGCAATCGATCAAAGGTGCTTCCCCAACCACTCGTGCAGACTTGATGAAGAGGTTTTTC -ACAACGGCTGATCGTCATGCCCGCGGCTATGACGACGCAACAGCCCCAGTCAACCCCCAA -CCACTCCCTCGGCCTCGTGCCTCCGACCCAGCAGACTACAGTCTCTACAACCCCACGACG -GCAACTCCAGTTGCCATCCCAAGCACCCCATCGTCCCTCCTCCCGCCGCCAAAATCAACG -CACCAAGAAAAAGATGATGGAGGCCCCTTCAAGCTAGAAATGATAGCCCGCATGGAAGAG -CTTCAGCGCGGCGAGCGCGTAATCCCCCCTTGCGATCGCTGCCGCCGCCTCCACATGGAC -TGCCTCAAGAACCTCACAGCCTGCGTTGGTTGCACGAAGAAACACGCCAAGTGTTCCTGG -CGTGACGTGAAAGAAGATGAAGTCACCGCAATGCGAGCAGGGACCACGACCTCAACGGTA -TCCCACGAGCGCCTAGAAAATGACCGCTCTAGCGCAAACCTCACCCCAACCCAGTTCGGT -ATGGGTCCACTGCCCCCGATCTCCGCCGAGCGTGAACGTCCCCGAGAACCACCCTTTGAG -CCAGAGCACCGCGCCTACCACTATCAACATCAAAATCGACGCGAGTCGGCCCCCTCCGCC -CCGAACTTGGCTCCTGGCTCGGCGGTCCCGATGGAGCTTCCTTCAAATGATAACTCTCCT -CGACGGGCAATGAGTGAGACAGAGAATAATGGTCGCCCTCCTGTCCCTACTGGCCGTGTC -TTGGATCGTATTGAAGACGAGGATCCTGATGCCAACCAGCGACTCAGACAGGCTATTTTA -GATACCGTGGATCATCACACGCGTGTCGCTGCGGCTGTTCAGGAGAGGGAGCGTGGTGCG -GAGCGGGGTGCAGATCGTGCTGGGGAACACCCAGGTGTGGTTCTTCCTGCCCTCAACCCG -ACTCCGGCTTCTGGCGCCGGTCACgaccgagagcgtgaacgtgagcgggaggctgctcgt -gatcgtgatcgACGCATGGTACATGCGTAAAGCTTTCAGCCCATTGTATAGTTTTTGGTT -ACATCGGGGCGTGGGAGTACTGTTAGATATCCCTCTGATTAGCGTTTTTTTCCCTTTATC -TCAATCTCAATTCTTCCAATGTAATTATTTCCCCTATATTGTAGCGTATGTACCGGATAA -TGCCCGGCACCTAGGACATTTTCCTTTCCATGTAGGGCTCACCTCTTGTCGCTTTTTGAT -GAGTAGAACCGCGCACATCCTTCAGTGCTCCGTACCCTGATCCACTTGATCATTATCTGC -GTGCCCTGTTTTCCACTGTTTTGATGCCGGTTCATCTTCGGTCTTCACTATCACAGGTCT -TCGAGCCTTTGCAGCTCCGAAACCAAACTATTGTTTTTGTATTTGGGTGCCTGTGGGTCT -GTGTGGGTATGTGTCGTAATTGTTTTCAAGGCCGTCTACCATCTCTTGTGTAATTCCGCG -CTCTGGACACTCATTTGGGTCTAACCTGCAATACGCCCCCACTATAGCCCATAGAAACCT -TCCTACGGAGTACTTGATTCAGCCTTTCTTTTTGGGATTTTTTGTTCATATCTTCATCCA -CTCTGTGAATTCAGACAAGAGCTAGTAGAAAAGCGGAGCGCCGGATTGGTCTGTTGGAGG -ACGAGGACCACATCAAAATCGTGCTTACACTCTAGTTGGGGAATCCACTTTCTCTTCTAA -TGAGGGTTTATGATATGTATCAAATTGTGGTCTTTTTTTTAGCCAATAGCCTCGTCTAGT -AGTGCAGGGGACTCGGTTTCTTGAAACTCTATAGTCACTTTAGCTTGACACTAGGAAATG -AGTCGAGGATCGTGTTAACGACAAAGAAACGATCCGGATGGTGATGTGTGACGAACACGG -TTATCAATCCTTAGGTTGCCGCATAGACGCCGTCTTCTGATCAATTAAACGAAGCTAAGT -GAGAAAGTCATGACGTTTATAAAACACCGGTTAGGTCTGGGAGTCGTCTTCCCATCCCCA -CTGAAAGGGTTGGGCTGGCTGAGAAGACGATTATACAATAAAATAGGACACGGAGAGTGG -GCCCTGAGGTGGTTTTTGATAAACCTCACCCCAAAACCCTTTCACTTGGGTATAGTTGAA -TCCCTTGATGCAGTTTGACGGGTCTTTCCAGTGGACAATGGCGCGAAAACTCGGTCTTTG -CAATGCAAGCTGAAGATCTCCATGATACATCAGCAACAGGCTCACCGCATCTCCAGTTGC -GAAAGGATATGAGCATTTAACCTCTGACATGCACTCAAAGTGATATCAGATCAGGAAGGC -TCGGAGGAATGGACAAATAACGTAATCGTCAGGTAGATCTTTCTGGCCAAAGTTCAATAC -AATATATATCACTCGCTATTCCTCCCAGCATTGAACAATTGCGTTAATCATTTTCAACAT -TCCTTTTCTTTCCCTCCTTTATCTACAACGAGAGATAATCTACTATGTCTTGGCAAGGTC -TGTCCTCCGCTCCTCTACAGGTCCCAGGGACACTAACAATTGCTTATCCCTATAGACTAT -GTTGACACGATGTAAGAGCAAACTTCACACTACAACTGCTAGATGCTTGCCTGAGCACGG -AGAATACTAATAAGAGACACAGCCTTGTCGGCTCCGGCCATATTGACAAAGCTGCCATCA -TCAGTGCGGCGGGGGATAGTATATGGGCGGCCTCACCAGACTTTCAGGTATTCGCAACAA -ACTCACCTATAATAGCCGTGCTCAACGATGGCTGATATGTATTGACGGAATAGCTGAAAC -CCGAGGAGATGAAGGACTGCGCCTTGCTAGCCAGTGGTGACAGTTCCGCAAAGGATAAGG -CCATCAAAAGCGGACTGCGGATTGGTGGGATGTTTTATTTCATACTGCATATTGCGGACG -ACGGGCTTATATTGGCAAAATTTGTACGGTTCCCACCCCCAAGATGTCACTAATGTTGGA -AGTAAATGGTATTGCTAAGCCATTTTTCGATCATAGGGGAATTCAGGTGTCGCTGTTGCT -AAAGGCAAGCAGGCCGTTATTATCGGCCACCACGACGAGAGACAGAACCCCTCAGAGGCC -GTGTCGGTTGTCGCAAGACTTGCGGACTTTTTGAAATACAAAGACTTCTAGTTCGTACAA -GCGAGGCCTTGGCCGGAATTCTGACTGGATCTTCGGTCATTCGCTTCGGTAAATTCGCCT -GATGATGATTCAATAAACTTCAGCCCCTTGAAGTGCTGTTAAAATGAGAAAATCAATTGA -AATTTCTATGTTTCGAAATGGGGTTGCCAATTTCGAATCTAACATCATTCCTTCTGGCAA -GTCCTTGTTATTCTAACGTCTTATATCTTTCGTACGGGTATAATCAGATCGCTCGATATT -ATTACGTGATAAATACAAACCGGGGAATTTTAGCAACTGCCGTAATTAGGGTTAAAAAGA -GCATTACTATGTTTCTAACAAGGCGATATCAGGCGACTCTGTATCATCAGAGAGGATCGT -GCTCGCTGGGAAGCCTCGGTCTGTCCTACCTAGGGTGCGATTATCTTATCACGGGTCGCT -AACCCTAGGCGTTTGCTTCTACGCGGACGAAGTACCGTAGGAAGCAAGGCATCTCAGAAG -CAGGATATAGTGACAAATGTTGCAGACCATCAGGCGCTGGCTTAAAGAATGATAATCAAG -CCATAAGAGGCCGATGTCAGTAGGAAGTAGACCATGCTTCCGCCGAGACAAAGTGTCGTT -TGTCGCCAAGTACATGCAGCTCAGCACCTCTTCAACAAACCTGCTCATCGTCAAAGCCCC -ACGATTGGCCTTGTTCACTAGCATCACCATCAACTTTGTGAGGTTCGTCGGCCATATCTC -GCACCGAAACTGGCCAATTCGCCGAGTAGGCCGGAAGATGAATCATGAGCGCACTGCCAG -CTACTGTAGATGTCCTTCGCGTAGCAGACGATCGTTGAGGACGAGGTTTTGGGCGCCTCG -GACCAAGCTATAACGCTGGCAAGAGACAGCGTGACTGAGAAGGCACACTCGCTCATTATG -GCACCGAACACCCTCGGCAGACTGAGCTCAGTGGAAGTCGTGATCTGTTCAATGGGCTAT -TCCAGTTCATGCTCGAAGTTATCTAGCGGCTGGAGCGGGAAGACCCTAAGTTTGCCGCCC -AAGGTGCACATCTCTTGGGTCTTTACGGACACCCTTGGGAAACATGTTTCGCCAAGCAGT -TTCGCCAAGCACGTCGATATTCGGCGACTTGGAAACGCGAATGGGTGGGGGGAAGGGGGT -TAAGGACTGCTAACATCAATCTTTGCCAAGAGGGTCTGCAACAAGATGACCAGCTGGCTC -GGCTGTGTGCCTTCAACCGTTCGCTGGACAACTCTTAGAAGAGGATGCCCAAATTATTAA -ATTCTCGTAATCTCTATATGATAGCTAGATACCATATACAAAGCATCAGGTTACAAGGAA -AAGTTCATAATCACAGCTGTTGTTGCAATATCATGTAATTGATCGCTTTTACTACCTTTC -CACAACAACCCGGGCGAGAATTCCATACTTCAAAACATGGTATTCACACTACTGGCAGAA -AAAGCAAAGTGTCGTGCATGTGCATTAGGGGACCTTCTCGCCCGGCCTGTCGATGTGTTT -TGGGTTGCGCAGCTGGCTTGCACCCGCAGCTTACCGAATACGTGAGCATTTATCATAGGC -CACTATTCCCCTTCGCAGTTCACAGAAGAAAGAGTCTGATATCACTTGTGTCTTTCTCTT -TTCCAGAATGCAGGGGATTGCATCGCAGATAGGTGGTCCATTTTACAGAGTAGGCTGCTT -GAAGTCAGAATATGGCATGACTGTCTTTCTTTTTGGTTGTTTTACAACTTCCATAGCCGA -GAGATCAACAAAGGTCGAAGAGAACGGAATTGAAGCTTACTAATATCTTCAAGAAATATC -TATCCCAGTACGGAGTATATGCGCCAACTTTCCCACTGCATGCACTAGAGCAGACCGCCA -CGATGATCTTCACTTTTCGAAAAATGTGATACGTATACTCCGTATCCAAGATTTCATATC -TTGTTACTTCATAGAAAAGCAGGTATGTGCCCCTGCCTTCGCTGCTGTTCTTTGGATTGT -TGGCTTGCACTAGGACATACTACGACTACCTAGATATTCGTTTACATAATGACGATCGCT -GAAAAGAGCGAAGAGCAAAAACTTGCTGTTCAAAAAGATGTCGAGGATTTAGTTACCGCC -TCCCAAAGATATATGCAACACAATGATGTTGACGGCGAAGCCAAGCTGGATCTCCAACGG -ATGGCATCAAAACTGACGCAAACGCTGCGTGGTCCTATTCCATCGGCACTCTCACACTTT -GAGGACGTCAGTTTTCAACTCAATTGCTCGCGGATTCTATTCACTGACTTCCGGAGGACA -AAAGATCGTTCAAATGGCAGCCCTTAGAACATTGCTGGAAGCGGGTGTTTTTCACACGAT -TCCCAAAGGAGGAGCAAGTATGAGCGCCGATGAGATATCTGCCCAGACAAGTCTGGACAA -AAGTTTATTGGGTAAGGTTATCCCAGACCAGACAAAGTGACCACTTGCTAATTGAAGCCA -AGTACGACTGATGCGGGCTGTCACTCCTAGAGGTCCGTTCCGCGAGACCGGCGAAGAGAA -GTACGCCCACACGCCCTATTCGGAGGCCTATTTGACTTCAGATATACTGGGTTGCTTCCC -CGTAATGTAAGAGAAGCAAATAACACCATTTCAAATACAGGAAAGAGGTACTTATCTTGA -AGATATCTAGGTCCGACTTCATCTTTGGACCTATCTTGGGAATTTGCGAATTCCTTCGTC -AAAACGAATGGAAGAATACTATTACGACACGAAACAACCCATTTACCCTTGCCTTTGAGT -GTCCCGGCGAGACAATGTTTGAGTATCTATACAAAAACCCCCAACAAGTTTCTCGCGTCG -CCAAAGCCGAAGCAGCAGACCCGGAGCAAATCGCAATGGATGTTTTTCCATGGGAAGAGA -AATTGGGCGCTTTTGCAGATGACAAGGTGGCTATCGTGGACATAGGTGGTAGCCACGGCA -ATGCACTCCGACAAATCAAGAAAGATGCGCCTGGATTGAAAGGGCGCTTGATTCTGCAGG -ATCTGGAGCCAGTCATCCTTGAGCATGGCAAGACCTTGCGCGCCGATGGTTTCGAGACGA -TGGTCTATGACTTTTTCAAACAGACGCAGCCAGTTCAGGGTCTGTAACGGTTTTCTCCAA -GCGTATCAAAGCGTGCCACCGTGCTAACAGTGGGGTTCATTTAGGGGCTTTGATCTACTA -CTTCCGACGTATCTTCCACGACTGGCCCGATGCTCCGGAATCCAAGCAGATTCTACAGAA -TACAGCAGCATCTATGAGTCGTGATTCTAGAATCCTGATCCACGATATCATTGTTCCTGA -GGTTGGGGCCACTATGAGCCACGCATGGCATGATATTAGTTTGCTGGCAATTGGGGGTGT -TGAACGTACGGAAAAGGACTTTGTTCGTCTTTTGGACGATGCAGGTCTGGAGGTGGTTAA -GGTTTGGAGAAAGAGCGGAGATATGCTGGGTATTGTGGAGGCCTGTCTCAAGTGAGAGGT -CTCTGCAATGCGTATGAAGGTGAGCGTATACGAATGTTACTACATACTGAGTCAACGGAT -GGCCCAGTCAATTTCGGTCATGTCTATGCGTCTAAGTTTTTTTTTTTTTTATATTGGCGG -TTGAATACGTAGAATGCTTATAGGACCACTCGTAGAGTCTATAATTGAGAGGTTAGACAC -CAAGTATACCTATGTTGTAGAGATGAGACAATTATCCAATGCAAGGGGTATGAACAGGCA -GCACACTCGGTCGACCGTAACTATATGTGGGGCCGGATATACCCTCACCCTGAATATATC -CGTCGGCACTTGTCTACACTTTCAATCCCAACTACCGTCATAATCTGATCTCCAGTGGGA -CTGGCGGGGACTTGTGGGGTGATTCTATTGCGAAGCTTCTGATTGATTGAGAGAACTGTA -CAAAACAACACCCCGCCATCCACATATAAAATATATACTCCCCCGGATTCTATAGCCCTT -GACACTATAACTATTTACCTACCTAGGTATATTCCTCTCTATAATCATGGACCCCTCCAA -AGTGAAGATCCCCCCAATGAAAGACCAAACTGTCGACAACATCACCGACAATGTAATCAC -AATCAACAGTCTCTGCGAAGACGAACGCATGAAGTACATCCTCGAAAGACTGGTAACCCA -CCTTCACGACTTCGCCCGCGAAACCCGTCTCAGCAGCGAAGAATGGATGACAGGCCTCCG -CTTCCTAACGGAAGTCGGTCAGACCTGCACAGAAGTCCGACAGGAATATATCCTACTCTC -CGATGTCCTGGGTCTATCTATCCTAGTCGACTCGATCGATCACCCCAAACCAAAGGGATC -GACGGAGGGAACGGTGCTGGGCCCCTTCCACACGCATGATGCCGAGGAAATACCGGCCGG -TGATTCCCTGTCGCATGATCCCAAGGGCGAGCCATTGCTTGTTGTTTGCACACTACGGGA -TCTGAAAGGGAATCCGGTGTCAGATGTTAAGATTGATATCTGGGAGACGGATTCGACGGG -CCATTATGATGTTCAGTATGCAGGTAGGGAAGGGCCTGATGGGCGGTGTATCATGCGCTC -GGATAATGAGGGTGTGTTTTGGTTCAACGCGATTACGCCTGTGCCGTATCCTATTCCTCA -TGATGGACCTGTTGGGAGGTTGTTGAAGAAGTTGCATCGTCATCCGTATCGTCCTTCGCA -TATGCATTTCATGTTCGAGAAGGAGGGTTATGATCACTTGATTACGTATGTTTTTACTAT -TGTTTTCGAATTAGTGGATTTTTTATGCTAATTTGATCCCCAGTGCTCTCTACCTCCGCA -ACGACCCTTATGAGACATCCGATGCAGTCTTTGGCGTCAAGGATTCCTTGACTGTGGACA -TCGGCAAGGCTGACGCTGAGATCGCGAAGAAATATAACGTCCCGGAAGGTCATCCTCTCC -TCACTTATGACTTTGTGCTTGTGTCCGACGAAGAAACCAGCCAGTTACGTGCGCACAATT -CCAAGGTTGCGCTGGACAAGTTAGGTCGGAAAGTCAGGATTGTTAATGGGCTGCCTGTGC -CGGACCTGGATTGAATGCCGAGGAATATCATTTGAGTATATCCGTGCGGGCTCGGCTATA -AATCATCATCATTTCTTCCAATATTGCCCAGTTTTTGTTGCATAGCAAGAGAAGAAATAA -AATTACATGTGAATGCAAGATATCGACGAGGTAGTTTAACACCGGAACTGATACTGCCAA -TTATAACCCCATGTCTGTATTTGTTTGTCCCGGTCTTGTTGAGAATATGTAACATGATCT -CAGTGATGACTGGTGCTTTGATTAGCCCAAATTGTGCTTCTCGAGTCTGTGCACGGTACG -ACGACCGGACTAGTAAGGAATGAACAGTTTCCCTCCTGGCTATCCGACGTATTTTGCGCA -CTGGTACAACCAGAAATCAAGGCAGTTGAGGGGTATCCAACGCCCTTTGTATCTTGTCTA -TATAGACACATCGTCATGGCTTGATCGAGAATCTCTGCATTTGGCTCCGATCTCCTCCAT -ATCGGCACTATGCGTTCCACTTACCTTTTCTTATCTGTGGCTGCTCCGTTGGTCTCGGCC -TTTGTTGTTCCCGATGAGAGCATCCTTGCGGAGATAGTCTCTGAGACTCATCCAGCCGGG -TGTGATCAGGTGTCTAAAGTAGATGTTGATACGAGCCTACTGGGAATCTCGCCGGAAAAA -CATAGAGGCCCAGGCAGAGATAGGGATTGGCGTGGTGATGAGGAACATCCCAGGCACGGC -GATTGGCCTGGACATGGAGGCTGGGGAGGACACGGAGACTGGCCCCATGACGATGAAGGT -CCCGAGCACAGCGATTGGCCGGGCTACGACGAAGATCCGCACAGGAGATGGTCCGGTGAC -GACGAACGACCCGGGTATGGACGAAGACCCGGATACGGAGAATGGCCTGGACACCGCGAT -GATGGCGAAGACAGATACCGAAGAATTGACGCCTGCCCGGGCCCGCTCTGCCGCGCAGAT -AAAACAACGTGGGAGCTTATCAAAGAAAGCGAGCATACGTCCCACCTTGCCGAGCTCATC -GCTGGAAATAAAGACCTGATTGAGATCTTGAACAGCACGACGGCAAATCACACCTTCTTT -GTGTTGACGAACCGTGCCCTGGAGGGATTTCCACGAGGTGAAAATAGTCCTAGTCCCAAG -TTCATTACTAGCTTGCTACGGTATCATATCCTGCCTGATCAATTATCCATCCATCATATC -GCAGGCCATCAGACGCTACCGACAAAGCTGAAAGAACCAGCTCTTGAATCAAAACTGCCA -CAGAGGATTGTTGTCCGAGAGCACCGCGATGGAGTGGTCTTAAATAGAAGGAGTAGGGTT -GTGGGAGCTGATATGGTAAGTAGAACAATAGTCCTCCTGTGGACATATTATATTATGGGG -TATGGTATACTAACCTCACAAGAAAACCAAAAACGGCATAATCCATCTCATAACCAGTCC -CCTGCACACACCCCCCGAGACACACACATTGCTTCACCACGCACCAGCTCACTTCAGCAC -CTTCACGCTGGCGTTAGCACGCACGAAACTTGCTTACAGTCTTGACCCGGCTCAGCGACA -GGGCGGCACAACGTTCGCGCCCACAAATGCGGCGTTTAGACGGCTCGGGGAGCATGTTAA -TCGATTCCTTTTCTCGGAGAGGGGCGAGGGGTGTCTGCGTGCGCTGATGCAGTACCATAT -TGTCCCAAATCGAACGCTGTACTCGGATGTGTTGTATTGTCATAATGGGAAGGCTCACGG -GTTGTTTTCAGGCCCTGGCAATGCGCATGAGGGGGAGGGTAAGTGTGTGAGTGGACCAGA -GGAAGACTCTGTGAATTTGCGGCTGGTGACTTTGTTGGAGAAGGATTTGGGAGTTGATGT -TAAGAGAGACTTTGGTGAGGTGGATATGCGGGTTAATGGGTTTGGTAGGGCTGAGCGATT -GGATTTGTTAGCGAGCGATGGGGTGGTGCATGGTATAGATCGAGTGTTGGTTCCGTCGAG -GAAGATCCAAGATGAAGTTGTTTTTGAAGAACTGATGATTGAGGAGCTCGTGGAGAGATT -GGGTTGTGCTTATAGGGCTCATGAACTATGAGGCTGTCTATATGTCAGAATTGAGACGCT -AATTATGTAAGGTGAAGAAGCCCTTTAAGGCCGTTCAATGCAATTTTAAGTCCAGCGCTT -TAAAGGCCTTTTCTCTATTTTCTATACCCTTAGAGCCTCTATATTACTTGGTGTTTCGGT -ATACCCCATGCTATATATGCAATGGGACTTTCCTCTTTAGTCGCTCTAACTATTGTGCGC -TCTACTCCAGTGACAGCGAGATCACTTGAGATTATTTGTGTCTGGCCAGTTTCTAGTCAA -TATGGCCCGGGGACCCGATTCGTGTAGGGTTCCATACAAAGGGATTAGCAATACTATTAC -TTATAACTCAAGGCACTAAATCTTGCTTGCAGCCTGTGTGGTTGGGCCCCATGACTTTCC -TGAGGGGAAGGGATTTGGAGATTGATCATAGAAAGGTGTTTTGGTGAGGTTGATAATGCA -GGTTAATGGGTTTGGTAGAGTTAGACGATTGGTACTTGCGGTGAAACCTACGATCAAGGA -AATCAATTTGAAAGGTTTCATTCCTAAGTTTGTTCAATCACAGTGGTGAGCGAGATCGGT -GCTTGCAAAATAAAATAAAAAACCAGATGTAACCCTTAAGTTAAACCCCGATGCCTCTAG -TTGGCAATTTGCCTAAAACCAGAGATAGCATCAAATAGGTGGTCGAAACCAGGAAGCAAC -AAGCAAGCAGAAACCATAATAAGCACCACATGTAAGATGTGTGAAATCTTTCGACTAGTG -CATTCGTCTAGCCGGTTTCGATAGAGCCTGGAAGGAAGACTCAAGAGCTTTCTTGACCTC -CTTGTAGCTGACAATGAAGCAACTTTGCTCGTCGCGACTGATTAAATGAATCTTTTCTTC -ACTTCCCGCATCAAGTTTGTTCATGCAGGTGATAATATGAGCAAGGTCAGTCACTGCGTC -ACCTTGGTTATCCACCGAATGAAAGACGTAATCGCGGAAAAGCTTCAGGAAGTAGCGTTC -TCCACTCTCCGACCAGCGCTGGTCGAGTTCATATTCAGGGCGTTCATTGATGAGCCCGAG -TTTGGCAAACAGGCGCACGAGTCGTCCATTTTCCAGCTCGCGGGCGAGATCAGAGGTCAG -CTGATCGTCCAGGTGAAGTGCCGAATCAAAAGTCGACATCAGCTGGGATGAAATGCCAGT -GATAAAAGTGTCAATGGTTCGCTCCTGGTCTTTCTGTAGTCCATTCAACAGCCAGAAAAC -AGAGTTCTTCAATTGAGGGCTATAAGCGCGAGTGAAATGCTCCATGGCTTTGGTCGGGTT -GTGGACGACGCTGGGCGAGTTGGCCCCAAGGGCGACAATCATTTGGCCGAAGTTGACCAG -ATCTTGTCGCTGGAGGTCGGCCACAGTGCGCTGGGTATCGAATTGCACCACGTCAAGAAT -AGCACAGGCGTTGATGCGAATGCGGGTCTTAGCGGTGATCAAAACCTTGCTAGGTTCGAG -CACTCGGGCGGCAAGACCACTAGCGTGAATCGCTTTGAGTCCATTGGCAATCTGGGTCAT -ATAGCCCCAGAGAACTTGCTCAGGAATGTGAGCATTAGGACGGCTTGGGAAGCGCCCATT -TGCAGCGCTTAAATGCTGGTCTGCAAGTGTCTTGGACAGTGGGTGATAGTCAGTCACAAA -GATCAAAGAGCTGTCTTGGAAGCTGCGGCTGGTGAATGCGTCGTGAATAGTCACCACACT -GGCGTTGCAGACGCGCTTCCAGGCTTGCACAGAACGGATAGCTTTTTCATTGGTCAAGCG -GAAACCTATTAAGATTAGTTCGAAGCTATAAACAAAAATGCTGGGAGGTTGCCATACCCT -CGAGTCTGCGAAGAGCATAAAAGGTGCCATCTTTGCTAGATTGGGCCTTGTAGATCCAGC -TGGGATAGCCAAAGGTCGCAGCATTCTTCTGATGACTCAGATCCAGAGGGACAAGGGAGT -GGAAGTAGTCCACCTGGGCAGGAAGTTGGGTGTCTTAAATTTCGTCAGCCTGAGTACAAA -TTTTTGTCAATTGTGATTCAACATACTGGGGAGAGTTTGCAGAGTAGCTGCAGATTTTTT -CTGCAGTTCTTCGCGCAAGTCGTTAGGGATGAAAAGGTCATGCACGTTGCGCTGGTACCC -CAATGTGTTCTGGCTGTGAGGGCCAATTGGGGCATAAAGATGGTACTGAATCTACTGGGC -GGTCAGAGCTTGTTCAAAGCAACAAAGAGGGGGATGTATTGAACATCAACGTACCGGTTG -CTGAAATCCAGTTTGGTTTGCGAAGAAAGCCCCTCCCATTGCACCAGTGGGGTCGTGGGA -ATAAGGATTGGCAGAAACGGGGCCAACGGCACCAGGAGTCATGGGAGTCGGGGTGGTCAC -AAAAGGATCGAAGGCACCTGTGGGGGTGACTCCTCCGTTTCCGTTGCCCTGAATCGGTGT -CTGGATGATGAAGTTAGCACTATGGCTCGGACGCGAGCTGTAGTACTGGGGCTGAGCACG -CTCAGGCAGAGGATCGAAGTCCTCTGCCAGGGCAGACAAGTTCCCAGTATGACGTTTAAG -TCCACTTGGCACCATTATGATGACAGGGATGAGGATGATAATGATATGAACGACAGAGAG -AGAGTGGGATGGTACAAAAGACAAGGAGAAGAGGAGGAACGAGAACAACAACAAGAAGAA -ACATTGAGGGAAGTCGAACGTGAAATCAAAATAGGGCAGGGACGACTGACCACGTGCTCG -AAGCTCTGGGGAACAAATTCCTGCACATCGGCTACCGTCCAATCTTGCACCGCTTCTGAC -CGCGTTGTTGGTGTACTCGTGTTGGAACCTTTTTCATAGGTCAGTGGATGCCATTGTAAC -CTTCTTCCCTCCTTTCACAAAGCACATACGTGATGATGTTGTTCGAGGTTGAAATGGAGC -TGCACTGGCAGCTTTAGGAGATATAGCGGTCGACTTTTTCGGTGCTGATACACCGTTCCC -AGACAGCAATGATGGGGTAAAACTAGGTGAGTCGACATTGAACCGCTTCTTGCTATCAAA -CGAGACAGTCCTTGTCAGCTCGCCCAACTTTTCGTCTAAAGACCAAGTTTGCATCGCACC -TGTCCGACTGATTTGCATTGAGCTTCGACGGGTCATGATTAAAAGCACAACCTGGATACA -AATGAGTTTAAAATCAGCTTCACCACTCACACTGGCTATGATCAAGATTTGCTAATACCT -TTGTCTTCATAGCGGCATCGCCCATAGATGGTCACGTTGCGGCACAATGTGTCTTTCGCG -TTCTCTATCAAGCAAAACCATTGTTATTAGCAACTCTGTCCGTCTCAGGTTCGACATTTA -TCTCCAGCTTGCCCATCGCATGACTGAAATGAATGTGATGCCATTCAAATTATACTCACC -ACGACTCTTCATTTTCGGCGATCCTGTCGTCCGACGAGCATCCTCCAATGGAGGCTTTCC -GGCAGACGCCATGATATCTGGCTGATACCCACTTGATGGTGCGGATCAGCGCAAAAAGAT -CCCCATTGAAACTGGACTCGGTAGAGGACAAAGGATGCTATTGGATGATAGCTGCATTCC -CTTGCTTTGATCTTATATAAATTCAAGTCACCGTGATCCACGCGTTTGGCGATTGGCGGA -TCAATTCCGGGAGAGCCCACGGACGTCGGGCGGTACCTCAGCCGATAAGAATAGGCATTA -ATGGTATGTAAAGATAAAGCCAAAGCCCTCTCTCACCCGAAGGAATGACTGAGAGCGACA -GAACTGATGGAATTGCAAGACGCGGGGCGAGAAACGAACTCGAGTGAAGGGGGATATGCG -CACGGACCAGGCGGTCAGAGCATCATTGAGCTTCGCCTTTGAGGCCACTTGAGCTTCGAC -ATCGTTCTTACTATTCTACGCGGAATTTGTGCACGTGCTTAATGAACACTCTTATATGAT -CTTTCAATTCATAGCTATTCACTCTTAACTCTTTTGATATCACCCCCATCACTGAGGTCT -ATGGCCTACGGCCCTTCACCAGCAAGATGTCACAATCGGAGGTTGATCTGGACCAGCTTT -CCGGTAGCGAGAAGTCTGCGCTAGAAATGTACATGGCTGTTACTAGCCAGGAACCTTCTG -AGGCAATTCCCCTGCTACGCCGGTCACAATGGAACGTACAGGTTGGTTTACCTTTCCCAT -CTTCTGGCCAAGCGTACGGGTGTTTGACTGATCCCCGCATGATAGATTGCCATATCCAAG -TTCTTCGATGGCGAAGGCCCGGATCCCCTCGAAGAAGCTCGCGCTGCCATGGACCGCCCA -CCTCCACCTCAACCCAACCGCCGAACCCAAAATCTAATGACCGACGACTTGAACGAACAC -TTGTCGCAAGTTATTCGTGCCACAACTACAGATCTGGCTCCGCGAGTTGACACACAAGCA -GGAGACCAACATACCTACCGGCCTCCTTTCATTCTTAGTCTACTCCTCAGTCCATTCAAT -CTGGTCTATCGACTCCTCTGCAGCTCATTCCGCCTCTTTGGTGTCCTCTTCCCTTTTCTG -CCACGCTTGTTCAATAGGACGGCGAACCCTGCGCTACAAGGCGCTCGCCGGAATACTACT -GGCCGTCGATCTCTCGGTCCCAAGGACACTGCCGCTCGCTTTATCCGCGAGTTCGAGGAA -GAGTACGAATCTAACCCGATTCCATTCCTAGAGAACGGATACAATATGGCGCTGGAGAAA -GCCCATCGCGATTTGAAATACCTGCTGGTCATTCTCTTATCCCCAGAGCATGACGATACC -AACAGCTGGGTCCGTGATACCCTGCTGGCACCTGAGGTCGTGGAATTCATCAAAGACCCC -CAGAACAACGTCCTGGTGTGGGGCGGCAACGTGCGAGACTCCGAAGCATACCAAGTTGCC -AACTCATTGAAATGCACCAAATTTCCCTTCGCTGCGGTCGTCGTTCATACCCCGAATGTT -TCATCAACTGCCATGTCTGTCGTCGGACGGATTGCAGGCCTCACTACGCCATCTGAGGTG -GTGACCAAGCTCCGGGCGGTCGTGACAGCTAATAGCGAGCCCCTGGAGCGACTTCGCTCT -TCACGTGCGGAGCAACAAGCCTCGCGCAGCCTTCGCGAAGAACAAGACTCAGCCTACGAG -CGGTCGCTGGCGATCGACCGTGAACGGGCGCGCCAGCGCCGGGAAGCGGAAGTGGAACGC -CAACGCGAAGAACAGGAAGCCGCTGGCCGACAGGCGGCAGAGGAGCAGCGGCGCCGTAAT -CTTGCCCAATGGAAGCTTTGGCGCTCACAATCGCTTAGCGCGGAGCCCGGACCAGATGTT -AAAGATGCGGTCCGTATTAGCGTGCGGCTCCCATCCGGCGATCGTATCATGCGCAAGTTC -GCGCCCGATGCGGATATTGAAGAGATCTACGCGGTGGTCGAGTGCCACGAGGTTTTACAG -GAGCAGGATACAGAGCGGTCCGTGACGAGTGAACCAGAGGGATTTATTCACCAATACGGA -TTTCACCTTGTCTCACCCATGCCTCGGATGGTGTATCCTGTTGACGAGGGTGGGTCGATC -AGAGAGAAGATTGGCCGTGGTGGCAATCTGTTGGTGGAACTGATCGACGAAGATGAAGAC -GATGACAACGAAGCTGTAACCGATATGTCTTGAATATGAGCGATCTCCATTCAATTGTTG -CTTTTGTGGATTTTAGGGACACACCGCTCTATTTTTTTTCCCCGACAATATCCAATGTAC -AATGTAGATGATTATGATTCGATATCTCACTTTCTCGCTTATTTACCTACTCCGTAGTTG -TACATTCTAGGCGGTCAGATTCTGAATCGACTTAGGCGAACAAGCATATTCCATATTTGG -CATGGTGTGCTAATAATCCTTCCTGGTTAGGCTCCGAACCACACAGCTGCACATCAGCCC -CACgtacggattacggattacggagtacggagcacggagtaaggGACTGGAGGCCTTATG -GGCACCCAATAATAATCAGTGAGATTTCCAAGTAATTTCGATATCGGCTACGACTGTTAT -TGGATATTCGGACAAAATGTGTACTTGGTCCGTTAATAAGGTACGGAACAGATATACCTT -TCGTCCGTTGATACAACATCAGATCTAGATATCAGGGGTTAAGGGGTTTATAGATTGGGA -AGTCCGCTCTTTACCCACTCTCCCGAGGGGCTCAGATGAGTCAGCTCGGATAGACCTTAC -CCGGACATCCGAGGGGTCCAAGTGGACGGATGGTGAGAGTCCCGACCCTTCTGGAACCTT -TTTTATTTGATATGTATGTATCGTGTACACGTGGGTATGAAGGGCATGGCACAGATTTAC -TTGCATTCTCTTTTCTTTTTTTGTGTTGCATTAATGTCTCGAGGACATGGAGTGTACTCC -ATCCATGGATAGAATCACCATGGAGTCATCGTATTCGCCTTCGCTACCGCATACACCATC -CAAGGCCACTATTACCCAAATCTAGAGATATAGCCGTGCTGATGTTTCTTGGCAAGCCAC -TTTAAGGGCTAGGGGCATGAAAAATCTTAAACATCAAAATTTCAATCGGAAACTTACAGC -TCCGCCCCGTGTTTGTTTCAACAGGTGTCCCCGGTTTTTATTTTTCTCGGCCAAAGGATA -CACTGTGTTTCCGAGTCGGGATGGTTTAGATCCTGACCCTTTTTCTTTTTCTCTTGTATA -CTTAGTCCAAACTTCCATTCCTTTTAATTTCTAATATCTTTACATTTACATCTTATATGG -TGATCATCTCTTTGATTATTCTTTCTTAGGGATATCACCCCACTTAATCAGTATCCCCCT -ATCAATCAGACAGTGACCGTGTTGGAAGGTCATCATGTCGCTCCTAGGGACAATAAACTC -CAACGTCACCGGCACACAAGACGTGCAGGAATCACATGTCGCCCACATGGAGGCTCGCCG -CCATGACCCCACCGACAGCGTGAGCACCGATGACACCGCTAATGAGAAGGGCGAGATTGG -CAACGAGGAATATGGCAAGGTTGAGGTCACTCGCCTGGCCCGGCAACTCACCCGTCAATC -AACACGCTTCTCTATCTCAACCCACAATGCAGAGAATCCCTTCATTGAAGTTCACGAGGA -CTCAACTCTCAACCCCAGCAGTGAGAACTTCAAGGCCAGAGATTGGATGAAGAACCTTCT -TGCCATCCAATCCCGCGACCCAGAACGATACCCGAAGCGCCAGGCTGGACTGGCATTCAA -AAATCTCAGTGTGCACGGATTTGGCAGTCCTACAGATTATCAAAAGGATGTGGCCAACTC -CGTTCTGGAAATTGGGGCCTTGTTCCGCAAGATTGCTGGCACTGGCAAGCAGAAGATTCA -GATCCTCGACAACTTCGATGGTCTCATTAAAAGCGGCGAGATGCTGGTCGTTCTCGGCCG -TCCTGGATCTGGCTGTTCGACTTTCTTGAAGACCATTGCAGGAGAGATGAACGGTATCTT -CAAGGATGCCAACTCCCACATGAACTACCAAGGAATCTCCGATAAGCAGATGCGCAACCA -GTTCCGTGGTGAAGCCATCTACACTGCCGAAACCGATGTGCATTTCCCTCAGCTTTCCGT -CGGAGATACCTTGAAGTTTGCTGCTATGGCTCGTGCCCCACGCAACCGCTTGCCGGGCGT -CTCTCGCGATCAGTACGCAGAGCACATGCGAGACGTCGTCATGGCCATGCTTGGTCTGTC -GCACACTATCAATACTCAGGTCGGTAACGATTTCATCCGTGGTGTCTCTGGTGGTGAGCG -GAAGCGTGTGAGTATCGCCGAGGCCACCCTCTGCGGTAGCCCTCTCCAATGCTGGGATAA -CAGTACCCGTGGTTTGGACAGTGCCAATGCCCTGGAGTTCTGTAAGACATTGGGCTTGAT -GTCTAAGTACTCGGGTACCACCTGTGCGGTTGCTATCTACCAGGCGTCCCAAAGTGCCTA -CGATGTCTTCGACAAAGTGACTGTGCTATACGAGGGCAGACAGATCTACTTTGGCCGCAC -CACCGACGCCAAGGAATTCTTCACCACCATGGGCTTCGAGTGTCCCGAACGCCAGACTAC -CGCCGATTTCTTAACCTCTCTTACGAGCCCCGCTGAGCGAATCGTCAAGCCTGGTTTTGA -GAGTAGGGTTCCCCGTACCCCCGATGAGTTCGCCACTGCTTGGAAAAACAGTGCTGCATT -TAAGACGCTTCAAAATGAGATCGCCGAGTATGATCAGCAGTACCCTCTTGGTGGCGAGTC -CCTCGACAAGTTTATTCAGTCTCGCAAGGCAATGCAGTCTAAGGGTCAGCGTGTTAAGTC -TCCATACACCCTATCTGTTACCGAGCAGGTCCAACTCTGTGTCACACGTGGTTTCCAGCG -TCTAAAGGGCGACTCCAGTTTGAGTGTGTCCGCCTTAATTGGAAACACGATCATGGCATT -GATTATCGGTTCGGTTTTCTTCCAGTTGGATGACGACGTGACCAGTTTCTACTCTCGTGG -TGCCCTTCTCTTCTTCTCTGTTCTGCTCAACGCCTTCTCCAGTGCCCTGGAGATTCTAAC -TCTTTACGCGCAACGTCCTATTGTCGAAAAGCAAGCTCGCTACGCCATGTATCATCCGTT -CGCTGAAGCGATCTCGTCGATGCTATGCGACATGCCGTATAAGATCACCAACGCCATCAC -TTTCAACATCACACTCTACTTCATGACTGGTCTACGCCAAACCCCCGGCGCCTTCTTCAC -TTTCATTCTCTTCTCTTTCATGACTACCCTGACCATGTCCATGGTTTTCCGTACCATCGC -TTCTTACTCGCGTACTCTGTCTCAGGCTCTGGTGCCTGCCGCTATCCTGATCCTTGGTCT -GGTCATCTACACTGGTTTCACTATTCCAACCCGCAACATGCTTGGTTGGTCTCGCTGGAT -GAACTACATTGATCCAATTGCTTATGGCTTCGAGAGCTTGATCGTCAACGAGTTCCACGG -TCGCAATTTCCCCTGTCTGCCCAAATCTTTCATCCCGGTGGGACCCTCATACATGGATGT -CGACCCGCTTAGCAAGATCTGCTCAGCAAAAGGAGCCGTCGCTGGCCAGAACTTTATTAC -CGGCGAGGATTACTACACTGCAAGCTTTGAGTACTCCAACAGCCATAAGTGGAGGAACTT -GGGAATTATGTTTGCGTTCATGATCTTCTTCATGGCTACCTATCTCATCGGCACTGAGTA -CATCTCCGAGTCAAAGTCCAAGGGTGAGGTTCTGCTTTTCCGTCGCGGTCACGCCCCCAA -GCACTCTAGCAACTCCGAGGATGACGTCGAGCAGACTCAGTCCGTGTCCGCTGCCGAGAA -GAAGGATGGCGCTAGCTCCAATGGCGAAGAAACAACTGCCGCTATCCAGAGACAAACCGC -TATCTTCCAATGGCAAGACGTGTGTTACGATATCCACATCAAGAAAGAGGAACGCCGCAT -TCTCGACCACGTTGACGGCTGGGTTAAGCCTGGTACATGCACTGCTCTCATGGGAGTTTC -TGGAGCCGGTAAAACCACTCTTTTGGATGTGCTTGCCACGCGTGTGACGATGGGTGTTGT -TTCTGGTGAAATGTTGGTTGATGGTCGTCCTCGCGACCAGTCTTTCCAGCGAAAGACGGG -CTACGTCCAACAACAGGATCTACATCTCCATACTACCACCGTCCGCGAGGCTCTCCGATT -CAGTGCTGTTCTCCGTCAACCGCGTCACGTTCCACACCAGGAGAAACTCGACTACGTCGA -GGAAGTGATCAAACTGCTTGGAATGGAACACTATGCGGATGCCGTCGTCGGTGTTCCTGG -TGAAGGTCTCAACGTCGAACAGCGTAAGCGACTTACTATTGGTGTCGAGCTAGCTGCCAA -GCCGCAGCTGCTTCTCTTCCTAGACGAGCCCACTTCCGGTCTTGATAGTCAAACGTCTTG -GTCTATCCTGGATCTTATTGACACCTTGACCAAGCACGGCCAGGCTATTCTCTGCACTAT -TCACCAGCCCTCTGCCATGCTCTTCCAGCGGTTTGATCGTCTCCTGTTCCTCGCTAAGGG -CGGTAGAACCGTCTATTTCGGAGAAATCGGCGAAAAGTCCTCGACGCTTTCTAACTACTT -TGAACGAAACGGTGCACCTAAGCTTTCTTCCGACGCCAACCCTGCCGAGTGGATGCTTGA -GGTAATTGGAGCTGCCCCCGGAACCCACAGTGAGATTGACTGGCCTGCTGTGTGGCGCGA -TAGCCCTGAGCGCCAGGAGGTCCACAATCACCTTGCTGAGCTCAAGTCCAACCTCTCTCT -GAAGCCCGTTGCCACAAACGATAACGACCCAACCGGGTTTAACGAGTTTGCCGCACCTTT -CACTGTCCAACTCTGGGAATGTCTTATCCGTGTGTTCAGCCAGTACTGGCGAACTCCAGT -CTACATCTACTCTAAGATAGCGCTCTGCACCCTCACAGCGCTCTACGTCGGATTCTCTTT -CTTCCACGCCCAGAACAGTATGCAAGGACTCCAGAACCAAATGTTCAGTGTCTTCATGTT -GATGACAGTCTTTGGTAACTTGGTACAACAAATCATGCCGCACTTCGTCACCCAGCGCTC -CCTTTACGAAGTCCGCGAACGCCCCTCGAAATCATACTCTTGGCAAGCCTTCATGGCTGC -AAACATTATCGTCGAACTCCCCTGGAACGCCCTCATGTCCGTTCTCATTTTCGTCTGCTG -GTACTATCCCATCGGACTTCAACGTAACACCAGTCCCGATGACCTCCACGAGCGTGGCGC -TCTGATGTGGCTCCTCGTTTTGACCTTCATGCTCTTCACTTCCACCTTCTCCCACATGAT -GATTGCCGGCATCGAGCTCGCAGAAACAGGAGGCAATATCGCCAACCTGCTCTTCTCCCT -TTGTCTGATCTTCTGTGGTGTCCTCGCCACCCCCGGCACAATGCCCCGCTTCTGGATCTT -CATGTATCGTGTCTCGCCATTCACCTACCTAGTCTCTGCAATGCTGTCGACTGGCACATC -CGGCGCAAAAGTTATCTGCGAGGCCGTCGAGTTGCTCCACTTCGAGCCTCCCAAGGGCGA -GACCTGTGGCGCATATATGCGGGATTTCATCGGAACACCCACCCAGCCCGGACGTGGCGG -CTATCTCCTGGACTACAACGCGACGAGCGACTGCGCTTTCTGCAGTGTCGATAACACCGA -TACTTTCCTCCTGGGCGTGAGCAGTTCTTACAGCGATGCGTGGCGCAACTTCGGCCTCAT -GTGGGTGTTTATCATCTTCAACATCTGTGCTGCTGTCGGTATCTACTGGCTTGCTCGTGT -GCCCAAGGCACCTCGCAGCAAGAAGACCAAGACTGCTTGATTGATATATACCCTCTCctc -tttctttttctttttcctttccattccttctcttttACCCTCTGCTCAACTGAACAGGCT -TTCGAGCGCTACTGTCAAAACACTGCATTAGACCGCCATCTCTAATCCCTCCACTCTTCT -TTCTGCATATTTTTTTGATTTACATATTACACCTGGTCTTGCAGATAGGGATGTCCTTGA -TAATATATACACTAGCATAGAGCCTCTTGTTCAATTACATCTTGGTTGCTTCTAATTCAA -GTATCTCGTGCGCACCCAAGTAGACAAGACAATAAATCGCCTTACAAGATCCTCAATAAA -AGATTATCATTTATGTAGGGTAAGTCTGAAGATTGAAGTGATGATAATGATATGTATACA -GTACAGTTCAACATTATTAACCAACATGATATGCGCCCGCTAACCGCAAAGCAGAGAAGC -AAAAAGCAGAAAAAAAAAAGATTACATTGAACGAAAAAAAGAAAAACCCCCTTAGGGAGT -AAAAAGCAAGAAAAAACAATCACCAAGTAAAAGCCTCTTCGACAAAATCAATAACAGCCC -CTTTCCCAATCCTCTCCCTAGCGGCAGACCTAACCTCATCCGCAAAAGCCCCAGGTCCAC -AAACACTAACAAGCGTCGCCCCGGTTCTCCCGGGTAGAATCTCATCAAGAACAACATCAG -GTCTACACCGACCAGAATGCATAAGCACCGTCGCCGAAGGACTAACGATATCCGCCGCGC -GACGCGGCTTCGAGACGAAAAGCTTTACAACGAGGATATCCCGTCGGCCGGGTAATCGCA -GGATCTGGTCCATGTAGCTACTTACCCAGGCAAGGTGATCTGTCGAGCGGACAGACCAGA -TCAAATAGACTTTCCGAGTTGCGGCGGTGTTTTGGGCGGCTTGGGTGACGAGGGCGTGGG -TGTGGAGGAGGTGGTGGGTTATCCCTGCGCCGGCGCTGAAGAGGATTGTTGTGCCGTAGC -TGGAAGTATTTGTTGGGTCTGAGCCGTAGGGGCCCTCGATGAAGCCTGATAGAGTAAGGG -ATTTGGAGGGGGATGCGTTGGCGAGGTTGTAGAGGCGACGGGTCATTCCCTTTTGGGCGC -CGATTATGAGGGAGACTTGGGTTGTTGGTTTGGTGGGTTTGGTGGGGAGGTTGGATTGTT -TTTCTAGGTCTGTGTATTGGCCCTTTGTTGGGGTTGGGATTTGGGAGGGGATGGGTGTTG -CTGTGGTGTTTGTGCTGGGCTCTGCCCAGGCTATTGAGAATGGGTGAGACATCCACCATG -AGATGCTGGGGATGTAGGCGAAGACGTGGCAGCCTGGTTCGATTTGGACGAGTTTCGGGA -GGTGGAAGGTTACGCGGCAGGCTTCGCCTGGGAGGGCTTGGACGACCATTGTTGTTATGC -CGGAGGTTGAGATGTTAAGGTGGACAAGGCGGAAGAGGCGTGCGAGTCGGTCACCTAGCC -AGATTGCGGCGATTGCTTTCGCCCAGCTTTGCTGTGGGAGACTGTCTATCCGTAGATGCA -TGTATACGCCGAGGAAGGTGAGTACGGCGGCGAGCTGGTGCAGGTGTAGAAAGGTCTCGT -AGAAGGCGTGGCGGATGGGCGAGGGGGAGTGTAGGCAGAGGAACATCATTGCGACTGTGC -CTACTAGACCCCAGGTGAAGAAGGGCGTGCCACGTAGACGGGCGAGCATGCGAGTGAAGT -CTTCCTCGTCGACGGCGTTGACGGCCCATGCAATTGTGTGTACGACTGACTCGAGGACGA -CGATGCGGCCCAGCCAGCGGTGGAGGAGGTTGTATGTGTCGAAGCTGATGTGGAGGAGAG -GGATCAATGGGTTGTTTCGGCCAGCGAGGAGGAAGAGCGGGATCATGTTGAGGACGGCCA -GCGTCCCGGATCGGCCGCGCAGCTCGGCTACCAAGGCTGCCTTGTGGTTGGCACTATAGT -CCAGGAAGACGCAGTATGTGACCTGGCTGGCGAAGTATAGTGTGACCAGGATGGTCTGCA -GTCGGGAGGGTAAAGTGCCGACATTGACAGCGGAAGAGAGTTGCAATTCTCGGTTGTGTC -TCTTTCTTCCTAGTGGAGCGTAAAGGATGTGTTTCTTGAGGTTGGACCACCATGATGATT -GTTCCTGGGCCCAGAAGGTCTGCTGGCGCTGGTTTGCACCAGATGCGGTGATTCGGCGTA -GATAGGCATGGCTGATCTGGGCGACCCGGCCACAGAACAATGCGAATGCTACGGCGGCCA -GAGAAATGTAAATAATTCGAGTAATGATCACATCGCGGGGGACGTCCACACCACCCAACC -CTCTTGAGTACTCGTATATGTATGTGGCGTTCGACCCGTACCCTCGTGCCGCGATATGCG -GATGGGTTGTTGCCATTGAACGGGCGTAGATAGCGAAGATAGAAAGGAAGGTGTTGCTGT -CTGGACAGACGACATGCAAGGCTGTGCTTTCGACGTTGCACAACCCCTTGGTCGAAATGT -CAATCTGGACTCTGTTTCAAGGAGCCGTTTCAACAGCTGGGATCAAAATCTGAAGGTTTA -GATATCGACTGGATGTGGGCGTTTGCGCAAACGGAATCGAAGACGGTGGATCAGATGAGA -TATATTATAAACAGGGTTGGCGGAAGTGAGTCGATATAGATAGAGACCTCTTTGGTTGTA -TCAGACAAAGACAATGTGTAATAATGGGACCATTGTGGCAGGTTCGTTTGGAGGTTGACA -ATGAACAATGTGTATGTCTAGGTTGAAGTCAACAAAGCGGCCTAAAATAACCCTCTGATC -TTTGGAATTCGAGAGAAATTAGCACGTCACCAAGAATCGAAGGTTGGCAGAGGACTCACT -CCCAAAAGGGATCCTGGAATGCAAGGGATGGGCACGTTTTTCTGTCAGGAATGAAATCGG -ACGATGTCGAGGCAGTCCAGACCAGAATCAATGCCAAGCAAATAGTAAAGAGAATGTATT -CAATCGAGACCAGCCGACTATAAAAAGCTAAGAGAATGGAGGAAAAGAACGAAAGCTCGG -GCTGAAGACGGAGTTTTGAAGGGAAAAAAGGGTAAGATGGACATATCTTTCTGCATAACC -CCTGTCGATGGTCTGATTAGCATGTTCAAGGCGTTTAAATGGCCGAGAGCCTATCAGCAT -GATAAGTGAGTGACACTAAAGAACGCCGGCGTTTCCAAGAAGGCTAGTGGCCCCTTAATA -CAGGCGTGGTGATTTGCTGATGATCCTTGCAGATCCAAGTTCGATGCTCGGCGTGACGCC -TTTGGATGCGAGAGCCTGACTCTGAAACATGATGAATTGTAGACTTTGAGTTTTCCCGGT -GGATATCCGGTGAACACCCGTCTTATTGTCTCATCGTCAGACATTCGCTTTGTGGAAGTT -TCAGAGAAACGCGCACTGACCACTTTACTATACATCTGCGTTTGACCCTTATAGCAAGTA -CGAAGTAGTTATTATGTACAATCTCTATTCAAGGTATATATATTCTTCATTTACCGTCCC -ATCCATCCGCCATCAACGGTGAGAACCTCGCCAGACACATAGCTACTAGCCCGACTAGCC -AGGTACACCACGGCACCCTTGAAGTCCTCGGGGTTTCCCCATCGACCAGCGGGGATACGT -GCCATGATACCAGCGTTGCGAGAAGCATCATTGATAAGGGCGGTGTTCATATCAGTGGCA -ATATAACCGGGCGCGATCGCGTTGACATTGATTCCCTTGGCGACCCAGTCATTTGAGAGG -GCCTTGGTCAACTGCCCAATACCACCCTTAGATGCCGCGTAGGCGGGAACTGTAATTCCA -CCCTGGAAGGTGAGCAAAGAGGCGACATTAATGATCGAACCTCGACGACCCTGGGGGAAT -TCAGAGGCGTCACGGGCCAGTAAGTATGCGGCGAATTCTCGGCACATGATGAACACGGAG -GACAAGTTGATTTCCAGGACCTATACATCATTTCCACCAGTGAGTTATAGTGTTTCAAGG -GGAAATAGCATGAGGGGGTGTATGGAGCTTTTGACTCACCTCGTCCCAGTCCTCATCTGG -GAACTGTTCGGCCGGGTGTCGCCGTTGGATCCCTGCACAGTTCACCAGGATCTCGGGCTT -CAGTCCTTCGCTGACCAATGCGGGGATGATGCCTTTGACGGCTTGCCGATCTCCCAACTC -AGCGACATGGATCCAAGCCTTACGCCCAAGACGGTTGATGATCTCGTCCTTGGTTGATGT -GTTGGTGTTGTCTCTCTATGAAGATGTTAGCACGTGCCTTCTTCACTCATTTCGGGTAAC -TCACCTGAATCAAGATAATATCGGCGCCGGCTTCAGCCAGAGCAAAGGCCATGGCTTGGC -CAATGCCTCGTGTTCCACCTGTGACGATGGCGGTTTTCCCACTTAGGGAGAAGAGGGACA -TTACGTCTGCCATTGTGATATGAAGCAAAGATGGTTTGAAATCAAAAGAATAGAAAGAAT -AGAAAGAGTCCTCAATGGGAGGAGCTCCAAATAAACCAAACCCCCAAATGGGGAACGCGG -GGGTTCTCCGAACGGGCAATATCTATACCCGGCCATTAGCCGATCGGCAATTGCAGTCTT -TTTTCCCAAAAAAAATATATATGCTATTAAGTAGTTGTGAGACAATGTTATTATAAGTAA -TATTCTGATCTAGGTATGCCAGTCTCAATATGATTTCAATTCTAAAAGGAATATGTTAGG -CCAAAAGTAGACCAATTTACccccaaaggccccaaggccccaaggccccaaagtccccaa -aggcTTTCCAAGGTTAACGCATCAACCCCGTTCCCCCACAATACCAGAAGCATGGACACG -ATGAGTAAGGGTCTGAGGACTTCCTTGTGGAGACGCTCGACTTTGATTTTGTACTTTTCA -ATGTTTTCGCTATGGACGAAGAATCAGAAGCTTCACGGCGTGATATCGCCAAACGTGTGT -CTCGAGCTTGCTTGCATTGTAGACAGCGCAAGTCTCGATGTGACCTGTAAGACCATTTTT -CCCTTCTTCGGCTTGCATTCTTTCCCCGGATCTGCAGTATAGTTGGTTCCGTGCTGATCG -CATGCAGAGACTCCAACGGCAACCCGGGCAAACCACCATGTCAGCGCTGTGTACGCGAAA -ATCGCGAATGCATACTGGGTGGCTCAAATCGGGGAGGTCGTCGAATCCGAAAGAACAAAA -TCAAAAACTTCACGCCCGCCAATCACTCGCCAAGCAAGGATTCAGATGCATCAAGCCCAA -CCAACTCTGAAAACCATAGAGTGCAATCAGCATCCTATCCCGGGCCGGTAGTCTTTTTGC -CACCGAACCCTCCGGCACCAGCCTCAGCATCCGTTGAGGAAGATGATACCTCGATATCAT -CTGTCCCGCGTAATCCCTCAGACGCTTGGCAATGCTTAACCGGCATCGCTACGGAAGGCA -CGGGGGCGATTCCCGTGTCCCGCAACGACCATGTTCGGTCGGAGCAAACAACATTCCCAA -CGTACAACGGCCTGCACAATGGCGTTGTCCCAGACTTTAACCACCCACCCACCGGTATCA -AGGCCTATAGACTCGTTCAGTCGTGTGCTCTAGACCCTGAGACGGTGTGGCAATTAATTT -CTCGCTATGCGGATAACTTCCATCAGTATCTTCCTCTCGTGCCACGGAAGTATTTCACGC -GCAACGCCCTAGATGCCTTTGCCGTGAATGAAAAACACCTGCTTACCGCAGTCCTGACGA -TCGCCTCCAAGGACTTGGTCGATCAGCCCGAGATTCACGAGTACTGCTCGAAGTACATGC -ATGAGCTGATTTCAGGCATCGCCGCGGGGGCAGAGTGTGATGTTGAAGCGGTAGAGGCGC -TTCTGTTACTTGCTGAGTGGGAGCCGCAGGGACTGCGACCTCGCATCGAGCGCGTTGGAC -GTGGAGAGGAAGACCGCGCTGCGTGGATGCATGTCGGGCTTGCGTTACGCTCGGGGTATT -TCATCGGTATGGACCGTACTTCTTTCCGGGGGGATCCTTATGGTGATCAAGAGAGTGAGG -CTCGTCGGCGCCTGGCATGGGCGAGCTGCTACGTCTCTGATCGATTAATTTCTGTGCGCA -TTGGACGTGCTTTCTGGTCTCGTGGCCCAGGACCCATGACGGGTCTTGTCAGTCAGGACT -TTCCGTCGTTACAGCCAGTGAACGAGGGCGACGAGGACTATGCAAAGATCTTTCAGGCTA -TGCTGGATCTTACGCAGCTTTATGGAAATGTTCATGAGGTCTTGTATTCGGGGATGCGTA -CTAGTAACCAGATGATGTTGATGGGAGACTACGTTAAATATGTGGATGACTTCCGTCTGG -CGATTCTACGGTGGAAGTCACTTTGGGGGTCATTGGACTGTAAGTTGACCTGTTCTCGAT -TTTTTGACCTTTTACGCATGTGGTTGTTCTAACATATTGTTAGGCTCACCACCTATGCGA -GCAACACTGCAATTGTCGTACGAGTACTTACGTCTGTATACAACTGCTTTCGCGTTTCAA -GCTGCGATTTCGCAGTCGTTAGTCAAACCGAAAACAGACTTACAAGGCCAGCGAGAACAT -CTCCGATCGACTTTCAAGAATGTCGCTTCAATGCAGGACTCGCGGTTTATTTATGAATCT -GTTGATGCGGCCAAGTCGTACCTGACCATCCTTGTGGATCTGGTAGATCCTGAGAAGCAT -CTCCATTTCATGCCTCTCCGCTTCTACTTGTAAGTTCTTCCGCCGGTTCTGTCGAGGCAA -ACAGCTCATGCGATATAGATATGGCATCTACTCTGCCGTCTTTCTCTACAAGGTAAGGCC -CCTATTCAATTCCTCCTTTTGTACGTATCATCTGACCGATATCATAGGCTCGTTCTTTCG -GAATGATGGTCCCTTCAGAAGAAGCAAAAGTCCAGGGTCTAGTCAGTCGAACCACCGAGG -TTCTGAAGCAAGCAAGCGCCGGTGCTGATGACGTTGGGGCCCGCTATGCTCGTCTTCTAG -AGCTGCTGTGGAAGCCCAAATCGACATCCGTAGACACACAGCAAAACAGTGACTTCTCAC -TGCAAACAGCTCTCTCTAATCCGGTCGCAGATCCAGGATACATGCAGTTCAGCCCCGCAA -ATGACTTCTCATGGCTGGATCTAGAGGCCGTGGGCGATTATGTATCCGGGGACCAGATTT -CGAATGGTTTGTTGGGACTGGATGCGTTCCAGAATGGAGCTGATATGTACCAATCTGGAG -AGTCACGATCGCAGTCCTGGCAGCCATCTGCATGGATGGGAGATATGAGCAGCAATCTTC -TCTTTTAAATAGGCTGAGATCGGTGTACCCCCACACCCCAATCGGACGGCAAATGCGGTG -CGGGGGGACTATCGGAGATAGCCCCGAGGGTGGTAACCGAGGGGTTCTCTTTTCCTTTGC -CAGTTCAACCTTGGAGGTTTGAGTGTTTACTTCTGCATTTTACTGCATATCATCACACAC -ACATCAACTTTGTCGCATCATGTCATTCAATGAGATCCAGGGCCGACTCGCGCTTATTAC -AGGTGCATCGGGAGGGTAAGCTATTCCTCAGCACTCGGTACCCCGACATTGAGAATAAAT -AATTGACATTCTTTTTCTAGAATTGGTGCTGCCTGCGCACATCAGCTTGCAGAACATGGA -GTGCATCTCGCTCTGACGTATGCGACCAACTTGACCGCAATGAACGCGCTAGTGGCCGAT -CTACAAAGCAAATACGCCGATAAGAAGCTTCGTATCTCAACCCATAAAGTGGATGTTGGA -TCCGCTGACGACATCGAGACGATGTTCCAACAAATTGACACCGAACACGGCCACCGACCT -GATATCCTGATCTCGAACGCTGGACACGGAAAGCGGTTTCCCCAAATATGGGACTGCTCT -CTCGAAGAGTTCGATTACACTTTAACCGTCAACCTGCGTGCTTCGTTCATTCTCGTCAAG -GGAGTGGTCGAGCGCATGAAAAGCCAGAATTGGGGACGTATTGTGTTTATGTCCTCGATT -GCTGCGCAAGGCGGAGGAATCAATGGATGTCGTATGTACCTTGGTTTCTTTCTGGGGAAT -ATCGAACTGACCCTCTCTCTTTCTCTCTCTTTCAGATTACGCCGCCTCTAAAGGTGGTCT -TACAGGCATGATGAAAAACCTTTCTACCCGACTGGCTGAATTCAATATCAGTGTTAACGA -TGTGGCACCTGCCATGATTGGTGATACAGGTATGATCCCTAATGCAGCAGCGATCCCTGA -AGTAGCAGCTGGTATTCCGCTTGGTCGTCTCGGGACTCCTGAGGAAACAGCCAATGTTGT -GACTATGCTTGTGAAAACAGGATACATGACGGGTCAGAGCCTCTTGCTGGCTGGGGGGCT -GAAATGAGCTACCCGGGAATATTAGGAAGAATGTTATAAGAAGTAACTAGCCTTGCACGC -CTAGGAGTTATCATCAACATTTTGATTCAATTTTGTAGATCCAATTTGATAAATTTTCAT -CACCCATATATACATCTATCCTTTTCTATTGTCACAATCTGGGTATTTTCAAACACAATt -catcatcatcatcaccatcGCAACGTTCACAAAGAGCCACTAGAGCGTCGTCGGCTCTCA -ATAACAACCTTCACAACAGGGTTCCCCTCCTCATCAACACCGTTTCCAGCAAGTGTAAAC -GCCCGCTCGCCCTCCTCCAAGGTAAGCCTATGCGTAACAACCTTCTCCTCAACACTCTTC -ATCTTCCCACTAGCCAGCAGCGCAATAGCAGCAGGGTAAGCATTTCCATCATAGCGGAAC -ACACCAATGATATCCACCTCGCGCAGCGCCGCCGCACCAACAGGCAGCGTCTGGATAGGA -TTCCCCATACCAATCTGCACCAACACACCACCAGGTGCACTAGCATAGATACCAGCCTGC -ACACAAGCCGGTACACCAGTACAATCATAAACACGACTGAATCCAGGAACGATAAGTCCA -TTCTTGGCACCGACCGCCTCAACCAACGTAGCCGCAACCTTCTGCGCATTCTCCAGCGCA -TAAGCGGTCTGCTCAGCATGAGGCGCGTCCTTCGCAGGAGGCGGTGTCTCCGCTCTAGGA -ATAAGAGCAGTCTTAAGACCAAGTCCCATCTCCTCCGCAATCTTCAATCTCCGCGCGTCG -ATATCAGCAACGACAATTGTTGTGAAGTTCTCACTAGCAGCCAACGCCCCGGCAAGCAAG -AGACCAATTGCGCCGGCGCCGAAAACCAACGCAGCGGTCTCTTCCCCAGCTGCGCGGTTC -TGCGCTACTTCTTCCTTGCTAGGTGGGTGTGAGCGCCGCACAGCGTGCAGCGTGACAGCG -AGCGGCTCGACGAGCGCACCGCCTGCATCGGAGACAGTGTCTGGGAGGAGATGACACATA -TCTGCTGGGTGGTTTGTACGTTCCATCAGTGTACCGTCGAGGTGGGGGAAACTCTTTGCC -GAGCTGCGGAATTGCATGGCTTTGCAGATGTTGTAGCGGCCTTGTTTGCAGAGTGCACAG -GTGCGACATGGGAGACCGACTTCGAGGGCGACTCGGTCGCCGACTTTCAGGGTTGAGACG -GAGGTGCCGGTGGCTGTTACTGTGCCGGCAGATTCGTGGCCCAGACACATTGGTGAGCGG -ACGACGAAGTCGCCATTGCGGCCGTGGCTATAGTAGTGGAGGTCTGAGCCACAGATTCCG -GTTGCTCGGATGGCGACTTGGACTTCTGCGCTGGATGGGGTGGAAATGGGACGTTGTTCC -TGATTGGGTTAGGATTGGGTTGTATAGTATATGTTTTTGGGGTTTACTTACCAGGCGGAG -GTCTTTGGCGCCGTGGAGGACGAGTGCGGTGGTTGTGCTTGCCATGGTGTATGTAGTATG -TAGCTGTTCACAAGTTTCAAAAGAAAGAAGGGAAGTAAAATAGCGAAATAGCGAAGCCCG -ACTTTGTTCGGCCTTTGGAGTCAACTCCCGTGCACGTCAGGCCAAACACCCCACTTGGTC -ATCGTGGGGGGAAGACTTGGGCCGGAGGTGCTCCGCTGTCGGTGATTGGAAAGAACAGGA -AATTTCAATACCCGACGTGGGGGTATTTTTTTACATTTGAAGATTAAGAAGTCAGGAGAA -GTAATGAGTTAAGTTAATGAAGTGTAGATGCTACAAGATAGATGTAGAAATGTATGTGTT -TAGTACAGAGATATAAAGGTTGATCGATACGGGAATATACGATCTATATATGTTGTCCGT -TGGATATACAAACCGGCGGAGCCCCCGTTGCGTATGGCATGGGGGTACGATAGCGGGGAT -CTTGTCTCCAATCTTTTGATTCCCTGGAATCTTGGAATTTCTACTTTCTTTTCATTCTCA -AATCTTTGAATTGCTTCAATCCCTTCGTCTTCGACTATAGAAACAATACAAATGTGAGCA -ACCCCGCTGACATCATTTGTCGCATCTAACATTTGCAGAAACATGGCTCAATCAGCTCTC -CTCATTGGAGAAATCACTCACGCGCGCAAGGAGTGGGAGAGCATCTCATCTCTTGTGACA -TTGAAGGTAAGTAATTCCTAGTGCGCAGCGATCGAATCCCGCTAAAAACAAACCAGGAAT -TCCCCAGCGGAACCAGAGAAGAGTTCATTGCGAACTGCAAGGCTGGCAAGTATGACGATG -TAGTCGCGCTCTACCGGTCCAACAACTCCACCAAGGTAGGCATTGTTCCTCCTGCGCTCG -GGTATATTGTACTTACAGTGCATAGTACACCGGTCCCTTCAATGCAGAGATGCTCGAGGT -TCTGCCTAAATCTCTAAAGTACATCTGTCACAACGGCGCGGGATACGACAACATTGACAT -CACCGCCTGCACGGAGAAGAACATCGCTGTGTCTAGCACCCCAGTCGCTGTCAACGATGC -GACAGCCGATGTGGGCATTTTCCTGATGATCGGTGCTTTGCGCCAGGCTCAGATCCCCAT -CACTGCTCTCCGGGAAGGTAAGTGGCTCCGAACCATGCCGGAATAATCCGCCCCAGTAAC -CGGTAACTAATCATAATCCCATAACAACAGGAAAGTGGCAAGGCCCGATCACCGCAGGAG -GAAAGGAGTACAGAACCACCTTGGGCCACGACCCCAAGGGTAAAATCCTCGGAATTCTGG -GTATGGGAGGCATCGGACGGGTATGTAACACAGGCGGTAGTACACCACCCAACCCTAGCT -AACCACGAACAGGAAATGGCAATCCGCGCTAAGGCCTTCGGAATGAAGATTCAGTACCAC -AACCGGACCCGTCTATCAGCCGATCTGGAAGTCGGCGCGACATACGTCTCGTTTGATGAG -TTGCTCGCCAATTCCGACGTGCTCAGCTTGAACTTGGGCTTGAACGCATCCACCCGCCAC -ATCATTGGTGCGAACGAGTTCACCAAGATGAAGGATGGCGTGGTGATCGTTAACACGGCG -CGTGGTGCGCTGATTGATGAGAAGGCTTTGGTCGCGGCACTTGACTCTGGCAAGGTATGT -AATGCTGCGCATTAAACTTCAACATCCCGTCCATGATGGATATGCGGACAATCTCGACTG -ACTGTGGTTGTTCTTTAGGTGCGCTCTGCTGGCTTGGATGTGTATGAGTGCGAGCCGCAA -ATTGAACCCGGTCTTGTTAGCAACCCTAATGTTATGTTGACGCCGCATATTGGTACTGGC -ACTTATGAGACTCAGAAGGATATGGAGATTCTGGTGTTGGATAACCTGCGGTTGGCTATT -CAGAAGGGTGAGCTTATTACACAAGTACCAGAGCAGAAGAAATAGATATGGATATATAAA -GAATAGAATTCGTTGCGTTCAATCATCAAATATCAAGTAGTGTAGCTTCGAAAGGAAATG -TTCGCCTTAGTTGATTGTAACTTTAACTAGTTAGTTAGTTATTCCATGTTGACACCTGAG -ATGTGGGGCAGGAACTAGATGACTAAGGCATGACCTAACTCAAAGGCGGAGTCCACCCAA -AATGGCTAGCGTTAGGGCTCGCTTAGGGCCAGAGCGCCAAGAGCGAAAAAAACCGCACCA -AAGCTAGTGGCTCAAGCGAGCGCCGGACGACGACGAAAAGCGACAACAACTCATCACCGA -GGACCATCACACCCCCTCGCCAACTCCTTCAAGATGGTGAGTGCTGCATGCCCCTGTCAT -CGATAATGACCATTCGGAATCCCATTCTCTACAAAATGCCACCCGTGTGATGCACATTCA -ATGTCCGATCGCCGGGTCGACTTTCGCTGCCGACATCACCAACATCCAACGACGATCCCA -TTGCTAATGAGCGATGCTTTTCACAGGTCCGTTACGCCGCCCAGGAGATCTCGCAGGAAA -AGAGCGCCCGCGCGTGCGGCTCTTACCTGCGTGTCAGCTTCAAGAACACCCGTGAGACTG -CCCAGGCCGTCAACGGAATGAAGCTGTCCAAGGCTCTCACTTTCCTCGAGAACGTCACCA -CCAAGACCATGGCCGTTCCCATGCGCCGCTACGCTGGCTCCACCGGTCGCACCGCTCAGG -GTGAGTCGCCCTTTTTTTTGGAGCAAATTCCGGATCGATCGGATGGAATTTTTGAATGGA -TTGTGGGAGGACGAACAGCATCGCCGATCACACGACAGCCACTGGGATATGCAATGGAAT -GAGGAAAAATGAACTGAATGAGAGGATGGATTGGCTAATTGGCGATGCTGTGGATTCTAC -GATCTGGATGAAAATTTGTGAAGTCGAACATCGACTGACAATTTTTGGTTTATAGGCAAG -CAGTTCGGTGTCTCCAAGGCCCGCTGGCCCGTCAAGTCCGCTGAGCACATCATCGATCTG -CTCAAGAACGCTGAGGCCAACGCCGATGGCAAGGGTCTCGATACCGCCAACCTGATTGTC -AAGCGCATCCAGGTCAACCAGGCCCCCAAGGGTCGCCGCCGCACTTACCGTGCTCACGGT -CGTGTACGTTTTTCCTCCGATTTACGGTTAAGTCTGTATCTGCAAGAAATGCTGACTTTG -TCTCCAGATCAACCCCTACATGACCAACCCCTGCCACATCGAGCTCATCCTCACCGAGGC -CAGCGAGCAGGTCGAGAAGGCTTCCACCGACAAGCAGGTCCGTCTCTCTTCCCGCCAGCG -TGGTACCCAGATCCGCCGTGCTCTCATCGAGGCATAAACGGATTGCCATAAGAAGGTGTG -AGGGGACGTGTGGTTACGCAATGTGAAAAGATTTTTGCGGTGGACGGGAAGGATCGGAGT -TCTTTGCATCAGCTGTGTATGTTGGAACGGAATCGATACCGGATCATACAAGATTTTAAG -GTCATGGCGCCCAACACTCGGAGGAGAACCCAATAAAAAGTTTAAACAAAATGGCCTAGG -GTGATGGAAATAGGATTCTGTACTTCCCGATCCAATCATGATACAAACGTGGTTCTTTCT -CCGTGACTCTATCGAGTGTTTTTCCCCTTTGATAGAGAGTGTTTTCGGATCCTTCTAGCT -GCACACCTCTCTCGAGATTCCGATCTTTCGGTCATATAATATGATCTCTTTTTTTTCTGT -TCTAATATGCATCCCATTGTAGCTATGAGTGGATGATTCGATAGCTCAATAGCAAGTTAG -AATGCGGGCTAACCTCTATATACTGATATGACGGGAGTGTCTCAAGGCTGGCAATGGAAT -CACAAGGAAGCCCTGCCTAAACCACTTTTGGTCCCCGACAATCCTGATCAAGCAACTTCA -TAAAATATGCCAAGGAGAACGTCCACCTACACATAACAAGAGATCTACATGCGGGGGTAT -AAGTAGCCAGGAAAAGAATACAATTTTGGAGATTCATTCATCCAAACTACTCAATATATA -TATCTGTTGtatatatatatatatatatatatatatatatTCATCCGATCAGTCCATCAT -GACCAACCTCTCAGACTACCGTCTCCTCTGCTTCGACGTCTACGGCACCCTAATAGACTG -GGAAAGCGGCATCATCACTGCACTAGAACCAATCCTCGCGAAAAGTACCACCCAATTCAC -CCGTGAACACCTCCTAACAACCTACCACGACCTCGAAAGCACCCAGCAAAACACAACCCC -GGACCTACCATACTCAGACTTGCTATCAGCAATCCACCCCACCCTTGCCGCACGTCTAGG -CCTTGACCCGCCAACCATCGAGGAAAGCCACCAATTCGGCAACTCTGTCGGAACCTGGCC -AGCCTTCCCAGACACTGTCGATGCCCTGCGCCGGCTATCAAAGCACTACAAGCTAGTAGT -TCTGTCGAACGTAGACCGGACCTCATTCGCCAAGTCCAATGCCGGCAGCTTGCAGGGCGT -GCCGTTTGATATGATTCTTACCGCGCAGGATATTGGTAGTTATAAGCCTGATCCACGGAA -CTTTGAATACATGTTGAGTGCCGTGAAGCGCGAGTTTGGGGTTGAGCCTGGACAGGTTCT -ACAAACGGCGCAGAGCCAGTTTCATGATCATCAGCCTGCGAAAAAGGTTGGCATTAAGTC -GGTTTGGATTGAGAGGCCTGGGGCTTTGATGGGAAATCGGGGGGATCCGATCTTTGATTG -GCGGTTTGAAACATTGGGGGAAATGGCAGATGCTGTTGAAGCAGAGTAGAGAGGTACAGG -TTTGATATTTGAAAATGTGACGAGTTTGTCAGGTACTCGATAGTTCTCAATATATATTGA -ATGCCCTTTGAGCTTAGGTACTGTCTACCCCCACACGTGGCTGAGTCACCAAAATCAAAA -CATCCCATCCCTGGAGTCTCGTTGCCTGGAGCCTGGATGGCCTCGGCTTGGCAATGCAGA -ACACCTGGCTGCGTGCGTGGAATGTGGCTTTGACCGCGGGAGATATTGGCATTACATATC -TTTGACTGTCCCGTTGTCTTGACCCTTTAGCGAGGGCGCTTTTGATTAGTTCCCTGCAGT -GCTAGCTTTAGCCCGGCAGTTGGCCCCCGGCTAGAATTATTCTTTGACAATCTGAATCCT -TTTTTCCTCAAAATTCCCCCTGAAATTAAACAGATTGTGCGACCATGAAGCGCTTCCTGG -GATCTCTGAGCAGACGCTCAAGTGAGTGTCTCACCGTCTCCGCGTTTCCCCGGGCCCGCA -AATTGGCTGACTTGTACCTTGAAGGCTCCAGCGACAGTGTCGAATATCGCGAAGACTCAC -CAGAGGCCATTGTCCTGAAAGAATTGGTTTGTGCTGATCTCAGATGAAATGAAGAAGACA -CGCTGAGGTCTCGCTAGACTGCCTTCtgcgaatcaaatacaaatacaaatgcgaatgtga -atgGCTCTTCGCGCGACGCGGTATGTGCCCCGGAGGTGACAGCAGCCCCCTTTTCAAGAC -TGACAATCGCATTTTCACGACTCCAGCAGGGAAATGAATTCGTCCACCTCCCCCAAATCG -TTGAAGCCGCCGAGTCGAGCCCCAATGCCGCCAAGCAAGCGGCTCTGCGCATTCGAAAGT -ATCTCGCAGAACCCGCCGGCACCCCGAACCAGACCCAGTACAATGCCATCATGCTCATGC -GCATCTTGGTCGACAACCCAGGCCACACCTTCACCCGCAACTTCGATGCCAAATTCGTGA -CGACCATTAAAGAGCTGCTGCGCACAGGGCGCGACTGGCACGTGCAGAGCTATCTCCGCC -AGTATCTGGATCTCCTCGAGCAACAGCGGTTATGGGACGAAAATCTCAAATCGCTACTGC -AGATGTGGTCGAAGGAGAAGACAAAAGCCGTCCGCGGTTTGGTGAGCACAGCATGATATC -TTTGCGCGCCCACCATTCAAGACTCTAACATAGCCAGATCGACCCTTTTCCGATGAACAC -CATCGTCCCACCGCAAGTCCCGCCCAGATATTTTCAGCAGCAACCATACAGCCTGTCCGG -CGTGCCGGCAAATACCCTCCCCAACCCGGCAGAACTAGCGGCGCGCATCGAGGAAGCGCG -CAATTCGGCAAAGCTGCTCACGCAGTTCGTGCAGTCTACCCCACCAGCAGAGCTAGAAAA -CAACGATCTCATCAAGGAGTTCGTTGACCGCTGCAGGTCCAGTTCGCGCGGTATTCAAGG -GTATATTCACTCGACCAACCCAGCTCCGGACGAGGACACTCTGTTGACGCTCATTGAGAC -GAACGACGAGATCTCGGTAGCGTTGTCGCAGCAGCAGCGTGCCATGCTGAAGGCACGCAA -AATCAGAGGATCTTCGTCGCCGAGCTCGTCGAATGTGAATAGCCCCTCGCCGACATCACC -GCCTGTGGCATCAGGACCTACGTATTTCTCCCCTCCCACCGGTCTGCCACCTCAGACTGC -AACACGGAGCCCTGAATCCCCTGCTCCTGTCCCGACACAGCTGCCCTCGGTAGCCATGAC -TGGTAGCCGTGTGCCTGCTTCAAATCCTGCGCCATCTGCGACTGCCAGCCGATATGAATA -CAATGCCGAGGAGTTCCAGGTGCGGAATCCTTTCGCCGATGACTATGCTACAAATGACTC -GGATCAGGAGAGACATCGGGGGCATGGTGATTCACAGCCCCATAGTGATCGCGTGCGCTC -CCAGCCCACTGAGCAGGAACGCTAGGGTCATGCAGGGATCCATCTCACGTTTCTTTTGAT -CGTCTAGCGCCTGGACTCTTTCTTTTTTTTTTTTTGGTTATGTAAACCCCTGCTCGCGGT -TTTGTCTTTTTTTTTTGGCTCACTTTTTACTCACTTTTCCTGTCTTTTGCCTTCCATGAT -ACCTCGCACATCGCAACTGCACACATATTGATTGACCTTTTACTGCCAACGTCTTTAGCA -GCTCGTTTGATCTATAGGAGAGCGAGGGTGCGCCGTGTCTTCGTATCCTAGGCATATATA -CCCAGGTCAATACATACTTGCTACCTTATACCCTCTGCGGGTCATTGTTGTAGCGTCCGA -AGGACATCTGGTCATATACAAGGCCCGTACAAGCAAAAGATTGGATGCATTGAGGATGTC -ATAAACAGCTTGATAAAGTGCACAAAAGTACAACATGCTAACCCAATGCTTGTGGCTGTT -CCCATACATTGAAAACAAGCCCCCCTAAATACCAGACACCCTACAAGCTGAAAACGCCCG -CGCGATAAAAACTGCCTCCTTCATATCTTCGTGACCTCAAAAACAGCGGACAAGCGACCG -GCATCTTGACAACTGGAACTGGAGTATAGCACTACTCCTCAATCTTGAGAATAACCCTCT -CACTATCCGCCCGATCCCTCGCCGCAATCTCCTCAGGAAAAGGTTTCCTCGCCCACACAC -GTCTCCTGTATAAAGATCAGCCATCCACACACCCACCAAAGAGACCGAAGAACTCACGCG -CGCTCATAAATATGATACCGAGGGTTCTTAAGCTCATTCCTCAATTTCGCAACATAAACA -ATAACCTCATCCTGCGTCCAAGGCTCCGGAGCCCCGAATTTACAAAGCAACCACATAGCC -CATCCCTCCATCCCAGACATCCAATGATGATAATTGACAGTGCCGGCCTCTTTATATTTT -TGATCACGCGCCCACGGGCCAAGAGGCCATTTGTATTCTTTTTCACAGATGTCCACGAAC -CCGGCCTTGCGGAAGGCGTCTTGCATCATGTCGATTGTTCCTAGTGGACGGTTTGCACGC -TCGCCGCAGGCTGATAGTGCTGGCCCCCAGCTGCGGAGGATGTTGTCTTCTGGGAGGCTG -TCATCGTCGCATTCGACGTAGGGACTTGCCTCTAGTTGTTCGATCCAGCCACCTGGGCGG -AGATTGCTGTTTGGGTTTGGGGTTAGTTGGAGGAGGAGGATCGGAAGGGGTGATGGCTTA -CTCGTAGCATTGCTTGTAGAGACGGTCCCATTCGTTGTAGTCAAAGGCTCCGATCATAAT -GCGCATGTGGATTAGGTCTTGGGGTTCGTTCCAGGTCCATTCTTCGGCGATGTTGTCGAC -TTCTAGGATGCAGTTTGGGGGGACCCATGTTACCGGTGGTGGGAAGAGGTCGACTCCGCG -GATGGTGGCTGGATTTGGTTAATATGTTGATATGTGGGTGGATGCTGGGCTTACCTTCTG -GAAACATGTCGGCTACATCGCTAGAATAGGGTAAGTGAGGATTCTTTGCGAATATGAGTG -TTTTCTTTGCACATACATGGCCCAGTTACCCAGACCTGTTCCCAGATCGAGGATATGCTT -GGGATTTTTCACAGGCGCCCGGAACAGTGGGTTGTCGTAGTCTGAGTCCATAACGAGGGC -TACTAAGTGGCTGCAATCGTTAGTCTTGTGAGATGATTATGCATAAGATGCATACCCAGC -TTCATATGTCTCGAACTGTTTCTCGTCCGATGGAATCCTGATAGGAAGTCAGTAGGTATA -TCTTGAGGCAGATATCTATACGTACAGATATTCTTTCTGGCTCAATGTTTGATACCGTCG -ACCATTCTCCTCTAACCCCCGATAGAGTGAGGAGCCGATCGAAGTGGTTTCCCTGAGATT -GTATGAGAGATCGTGTCCATATAGGCACATAGGTAATATTTACGATTGCCAATCATCGCT -GTAAACTTTTGTGAGATGGATCGCTGGATCCTAATACCTGTTAGAACAATAGATATTGAA -AATCAAATCGACATACCACGGCGATTTTATCCTCTGGCTCATGTGTATTATCCATTGTCG -AAATTCAGATTTCCATGGAGGAAAGAAAAGGTGAAGAATGCGAAACACAGTGCGCCAGAG -AAAGGCATCCGCCCAGTTATAAAGTTAAGCGTCATGTCAACGGCCATATTAGGAATCGTA -ACAGAGAAACACCATAGGCTTGAGATGTGGACTTTTGATATGTGGGCTCTGGAAAAATCT -GCCGATCGATATGAATCCTGGGATTGGACATCAGTTTCGGTGAATTCGTCAAAATGCCAT -CTTCTACCAATAAGAAGTCTCTTGTGAAGGTTATGCTTCACTGAAATATTCTGCAGAGGC -CTAAACTCACTCCACTCCAACATCAAagctcagcaaagctcagcaaagctcagcaaagct -caTGGACCTGGACTTTCCACATGGAAAACTCGCATTAGATATATCTATTTTGATGTATTT -AATTCTTCCTGCACGGTCATAAATATGGCCACCCGCATGGGCTTCGACTTATGATTCGTA -GATATAACTTCCGACACGAGGTAAACGAGGAATACGCAGCTTCATGCAGGTGAGTCTTGT -TCGAAGCAAATGTACAACATGGGTATATTCTGATATTCATACAATCCTACATTGCCTAAC -TACCGTATACTCCGTAGCTGATCCGGGGAAATAGGCTATCAAATAGGCTACGCGGGAAGC -CGGTCGGATGACGTCTCGACACTCGAACTATATAACCACACTGCATTCATTACAACACAC -CATTCAACACATCTCATCTCTACAATCCTAAGCATGTCAAACCTTCAGAAAATGCGCTGC -ATTATCCAACCTTCCGCCTCCTCCAAAAACCTCATCCTAACAGACACCCCTCTCCCAACC -CCGAACCACGCACAGGGAGAACATCTCATCCAAATAAAAGCCTGCAGTCCTTGCGCCGGC -GAACTTCTCTGGATGGCAAATTTCCCCCCGCCCACCGAGCGCACTCTCATCCCATGTCCG -GATATGGCGGGAATCGTCATCTCAGCACCGGACGACTCGCCCTTCCAGCCAGGCGCTGAA -GTTTACGCGCGCACAAACTATGTCCGACCAGGAAATGGGCGGGAATATTCCATCGCCGTT -ACCGATGAATTGGCGCACAAACCTAAGGATCTAAGCTGGGTCCAGGCTGCGGCGGTTCCT -GTCTCCGCCGAGACCGCTTGGCAGGTTCTTTTTGTTCATGCTCTTCCTACCGGAGGAAAT -GGAGAAGAGATGGATTTGGAGACTGCCAAGACCGCTTGGGCTGGGAAAAGAATTCTTGTT -ACTGCTGCTTCTGGTGGGGTGGGTATCTGGCTTGTGCAGTTGGCTACCTTGCTAGGTGCG -GAGGTGGTTGCGACGTGTGGTCCCCGGAATGTGGAGCTTGTGAGGTCGCTTGGTGCGAAG -GAGGTGCTTGATTATCGGGTTGCAAATGTGAAGGAGTGGGCACAGGTGCCTTCGAATAAG -GTGGATGTTGTTGTGGATTGTATCGGGGGGAAGGCTCTTGAAGATGCGTGGTGGACTGTG -AAGGATGGTGGTGTTGTGCTGAGTATCTTCCGCCCACCTTTGCAGGCTCGTCCTGATAGC -TTTGAGGGTGCAGGTGTGAAAGATAAGTTTTTCATTATGCAGCCTGTGAGAAGGCAGTTG -GAGGAGATTTCGAAGTTGATTGAGAAGGGGGTGTGTCGGGGGTTGGTGGATAGTGTTTGG -CCTCTTGAGCAGTACGAAGAGGTATTCAAGAGACTGGATGGAGGACACGCGAGGGGGAAG -GTTGTCTTTGATTTCTCGCTGAATCACTAGCACTAGTGTTCCAGCAGTTTTCTCACGGGT -TGTTGCAACACACGAGATAGGAAAAAGGGAAGATCATGGTTGTTTCTAGAAGTTTGGTCC -CAGTACTGAGGAGTATATTGAATGATGACTAGCTGCCATTTCATCATTTGATGACTTATA -TTACACAGAATAAGAGACATCATAAGAGCCGATTATGTATCTAGACTCTGCAAATTAGGA -TCTATACAGTACCTCACAGTGTGTGCATCCTTGGTGTATGTGAGAAGGATAGTCTCATGC -TAGGAAAAGGCTGGACAGCAGGAGACAAAGCAACGAAAGAGTATCGTGCATGTGATATGT -TCTTTTCGGGACCAATTCCCATTTACAAGCACAGAATGTGAGCCAAGAAGAGGTATATGA -GATATCTGTACCCCTAGGCTCCAAATCCACGTGGTCGGGTGGTCTAAGACTAGTATCTGG -TACATTTGCATTGAAAGCTCTCGTATCGACGAGGCCACATTGGAATTATACAACCTGTGA -TGATGCTTCCCTCTAAAAGCCTTGAGGTGTATATGACTACTCGTAATGTTGAATTATATT -TGGTGACCTTAATGATACATGGTTCGATACTAAATTTGCATATGCTGTGTACCTCTACAC -ATAGTAGTTGATCAACTAAATCTATTTGGCTCCTAGTGCAGGATGTTCTTTTCTCATTAG -ACATCACGATGAAACAAAACCCTTGATAGGTGGAATTAAGAGGTACCTAGTCAGTTTCAT -CAGACTTAGTTTGATTTCCAAACCCAGTGGACCTAACAGGATCATCCTGGAATGAAATTA -GATCAACGTCCAACTACTGCCAAGTCGAGTTTATTTCTGATCTCCACATTGTGAAAGCAA -AGCGATCAAATGTCATGTCTCACCTGTTAGAGCTGTCCCAATCAACGTGAAACTCGCATC -CCTGTTCGGCGCGTGGAGCAAGGCTTGGCAACCCTAGCCTCAGGAACGTCGGGCTTGTCT -CGGGAGAGAAGATCGCCCGGCCTGGTTCAATATGCCTCCGAGGGCCATATTGTCCCCGAT -AGGATCCGATCTAGATTTACCGCCAGTCTCACCAGTCGCTTCCCGGTGGGGGAGCGCTGT -CCCTTGGGTATATCAGATGTGTTTCAGCATGATACGTGCAACCTTACGCGCCGTTCAATT -TGTCTCTGATAAGTGTCAATCTAGTGTTTGCGGTTGATTAATGACGCGATACAGCTCAAC -CAGAGCTTCTTTTGAATATGTCGACATCCTATATAAGCAGCGGCATGCCGGCGCGACACA -AGACCAAATGAGCATGCTGGTCAGTCTTTTTTGAATTTTCATTCACTTTCACTTTCGTTT -CCTCCGGTTTGAGACCGTGCAGACTTTCATTCAAGTATACATTCATTTCTCGGTCCGGTT -TACCGATTCCATTCACTTCCACGACTTCCCAACTTGCCAAAAATGAAGCATTTCGCCCCC -GCTCTGGCTTTCGCCAGCATCATCTCCTTGGTCAACGGCCATGGTTTTGTCAGCAGCCCA -AAAGCCCGACAGCCCGGCACGGCAATGGAGGCCGCCTGTGGCAAGCAAGTCGTGTCCAAC -CAAGGCTCTGACAAGTACGGCAACATCCAAGGCGAGCTGCAAGTTGCCTCTAGCCAAGAC -GACTACGAAGCCGCCGCATGCGATATCTGGATTTGCAAGGGCTACAAGTTCGCCGACAAC -AAGGATAACGTCTACTCGTACAAGGCAGGACAGAAGGTCGACTTCACCGTCGACATTCGC -GCTCCGCACACCGGTGTCGCTAATGTCTCTGTTGTCGACACATCGTCCAACACTGTTATC -GGTGCCCCGTTGATCTCCTGGTCTGTCTATGCATCTGTCGCCACCGGCGTCAAAGCCAAT -GAGACCAGCTTCAGCGTCACTATGCCCGATGATCTCGAAGACCAGTGTGTCACCGCTGGT -GACTGTGTGCTGCAGTGGTACTGGTACGCACAGTCAATCGACCAGACCTACGAGTCATGT -ATCGACTTCACTCTCGGtggatctggctctggcggtgtctcggctccggctcCGTCGGTG -AAGGCCTCGTCGATCGGCAGCTCTACAGATGTTGCTACCATTGCTACTGCTACTAATCCG -GCCACAACTGGCGCTGTTCAACCCACCACAACGGCTGACGCTAAACCCACCACCACAATT -GCTGCTCAGCCTACTACTACTTTCGCCACCAGTGCCCGTCAATCGGCTACTTCTGCCGAG -CTACCAGTCGCTACCAGCACCGCAGCTACTACCAGTGCTGCCATGGCTTTCCCCACCGAT -AGTGCTGCGGATGTCCTGGCCTGGATCCAGGCTATCTTCGGCAACTTGGCGGGTAACTAA -ATGCGGGCGCGAAGAATGGCCCACTTAAAAGTGGGCGACGTAATATCGGAAGCATATTGT -GTTCAACTTTGGACCTACCTGTACCTCCTGTGTCAGCTAGTAGCTAGCTATACTGGTAAT -TAAATATCAAATTGATCACGCCTCTCATTGTATTGTCCTGGATTCCAATGCAACGCTGTT -TATGGCGATATCGGAGAAATCGCCGAATATCCGACATCGCCACTTCCTCGATTCAGTTCA -CGAACATCAAAAATGGATATAAAACAAGTACTCCCTATACAAGTAAACATTTTCTAGTAA -TTCAAATCATCCAAGCAATGTACTGTCGCCTGAACTCTACAGCAACCAGACTGGTCAATC -CCTCTTCAAGAATCCCTATTCCACCAGTCTACAGACATTTCACCACAACAATGACCCAAC -AAAACAAAGACTGGAGCGCAAGCCAATACCTCAAATTCGAAAACGAACGCACCCAGCCCG -CCCGAGATCTCCTCTCGCACGTGCCCCTAACAGCCCCAAAAAGAATAATCGACTTAGGCT -GTGGCCCAGCCAACTCAACAACAGTCTTGGCAAACAAATACCCATCAGCAACAATCACAG -GACTAGACTCCTCGCCCGACATGATCGAGCGCGCAAAGAAAGTCCTCCCAGACCGGGAAT -TCTATGTCGCAGATCTAAGCACCTACACGCCAGACCCAGCCAAGCCAGTCGACCTCTTCT -TCTCGAATGCGGTGTTCCAGTGGCTGAAAGCTGATGATCGTATTGCGGTTATTAAGCGGC -TGCTAGAGCCACAGCAACAGGGCGCGGTATTTGCACTTCAAGTTCCACATAATCTGAATG -AGCCTTCGCATATCTTCATGGGCGAGACTGCTGCTGACGGCCCGTGGGCGGACAAACTAG -CTGGTATACAGAGAGATGGGTTTCAGTCACCGCAGGAACTTTATGATCTGGTTAAGCCAT -TTTGTAGTCAGGTTTCGGTTTTTGAGACCAGTTATTATCATTCGCTTGAGAGCCATGAGG -CTGTTATTGAGTGGGTTAAGGGGACTGGGTTGAGGCCGTTCTTGGATCCGTTGGAGGAGG -GCGAGAAAGAGGCTTTCTTGAAGGGTTACTTGAGACGCTTACAGTCGGTATATCCTGTTT -CGGTTGATGGGAGGGTGTTGTTGAAGTATCCGCGGTTGTTTATGGTTGCTGTGAAATGAG -TTGGCTGAGGGTAGTCGTTCTTTATGCCTTGTCGGTGGAGATTTACTTGTACATATGTAT -GTGTAACATTATGTTACTAGTCTTTTGCGTCACATAGTACATTGATAGCAAAAATATCTG -CTCTGGGACTTTTAGTGTGGATCCTATCTGACTTTATAGAAGTCTAGAGATGGAAAAAAA -GAGACTCAAGGATCTATGATCGGATTTACGTCCACGTTGCTACTTAGTAAGCTGTTTGGG -TACATGACAGCGCCATTTCCCAATGTTAGCTTAAACTGTCCCAGATGAAGCGATCTTAAT -GGTACCATCAGTCTTTCCGGACCGATCCAAGCTCTTGCTCTCCATCTTTTGGAGAATATC -CTGGCCAGAGACGACCTCTCCGAAGACAACGTGCTTGCCATCCAAGTGGGGGGTTTTCAC -GAACTGTTCAAGTCAGAAACCACGTCTTTCCTCTATCCTTTAGTATACTTCCGACTTACA -GTAATGAAGAACTGAGAACCATTGGTGTTTGGTCCCGCATTTGCCATGGAGAGTAAGCCG -CCTTTGGTATGCTTAATCTGGAAGTTCTCATCCGCAAACTTCTCGCCATAAATCGACTTG -CCGCCGGTTCCCTAGAGAGCAAGTAGAGATTAGCTAGGGCTCGAACTTCCCCTCACGCAG -CGACCGTGACTGACATTGCCGCGAGTAAAATCACCGCCCTGGGCCATGAACCCAGCAATT -ACGCGGTGGAAAGCAGAGCCTGCATAGCCGAAGCCTTTTTCGCCCGTAGCAAGGGCACGG -AAGTTGCCTGCTGTATAGGGGACGACGTCGTCGTATAGTTTGAACTCGATGCGACCCCAA -TTTTCCACTGCATGGAGTTAGTATCTTCCAGTGCGACAAAACACCGAACTTACCCTGGCC -GCTCGGCGAGGTGTATTCAATATCGAAATAGGGGTTGGCCATGATGAATTTGATTCTGAA -GTTGAGACAGAAACAGAAGGGAAGAAGGGGCGCGGGGTAATAAATGAGCGTATGCAGGAA -TGAGGTCATACCGCTGAATTGGCCAACGTATGAGAACGTAACACATAATAGAACATAGAC -TTTACATGCACTTAGGTACTGAGAATAGTGAACATAGATGTACTTCATAGGTAGTTGGAT -GTATTGGTTCCCCACTAGGGTTCGACTAAACCGGTGACCGCTACGAAGGAATTTTGTTAA -CTCAAAAGCGTACGCCATGGTTCATGGAAAGATAAGTCAAATTGGTGAAAGTTTCGTTGC -ATTGTGTCTTGAAGAAACAAGTCTCAGACAGTCACATGTTTGAAAAGATGCTACTGTCAA -AAAGACAAGTCTGAGAGGCCATATGTTGAGTGCAGGAGGTCTGAGACCAGAGCAACATAG -GATGCGAAAGTGCAAATGAACCAGATCGCGGGAAAGATCAAGAGAAAAGGCGGCCAAGCA -AGACTCTCACCTAGAGCCTACCATCCCCGCATGTGTGCCCTCTCATTTACTCAGATTCGT -TGAGTTCGCCGGTCTGGATATCCTTCTCGTTGTTCTCGTCGGTGGCATCAGCAGGGCCTG -AAGCGGTATTGGCGAGATAACTCTCCATCTTAATGTTCAGTCGAGAGTATTGTTTCTGGA -CGGTACCTGCCGGCTTGCCGGCATCTTTGGCAATGGCAGCCCAGTCAATCTGTGACATAA -GTCAGTTAGTATATGAAATGCTATCGTCACATTTGAGAGACACTTACCTTCATGCCAGTG -GACATCTTCACACATTTCCACAGGAACACCAGATCCCTGTCAGGAATAGTGCTTGCAGAA -CCCTTCACAGGGGAGCTCTTCACTGGGGTTCGCTTCACGACCCCTCCCTTTGCAGCCTTG -GTACCCTTGGACTTGGGGGTGCTTGGGTCTTTTGGTGCCATGTTGATGATAATGAAGTTG -AGTTAGTGGTAGAGTAGTCTGGAACGGAAGTGTGATTTGGAAAAAGTGGGTTGAACGGCA -GAGGAATAAAAAAGAAGAGACAAAAGGGAACGGTTTCGAGTTCTGGCAGGTGCTGACGTA -CTTAGGTTGGGAAGTCATGTCAATGAAGATTTTGTTGCTACGACAAACAAACCCTGAGCA -ACTGATTTACTGGCAATATTGCTGTTATATGACAAGTGAATATGCAATAGTTGGGACCGC -AGCCCTCCACTCATCTAGACAATTACCGTACCTTCATTGAGGAATAAACACAACTTACAT -AGTGATCTATGTGCGTCTGCACATTTAGGCCAAGCCTAGTGGGCTATACGTTTACTTGAG -TTCAATAAAATTGATATCTGGATGTTTCAAGTAGAATGATCCAAGTCTTTAGTGTAGTAT -ATAACACATCCAGCACATCCAGCACATCAATTCCCTTGTCCCTTCTGGCAAACAACTCTG -ACTTGCACAATGGGCTATGGATGTACCAATATTAGCCTCTCGTGGTATCGTAGAGGTACT -TTGCCGCAGGTTCTATGACGAAGATTAGCGGAAGGCCAAAGCTGCTACAGGGTAAGTTGA -TGCACTCCGGTGCTGTCCTAAAATGATATTGGGTCATTTCGAATACCATACTTCCTCCCT -GGGAATAAAATCTACTATTTGGGTTATTGTACATAGAAATGTTTTCTAGAGACAATGTGA -ATTAACTCATTGATCGCTGCATCATAGGTATGGCACCAAGCAAATTCACACACCTATTGA -CACCGAAAACATTCAAAATAATTAACTGAATATCTGGAGAGTCACGCTTAGAATCACAAA -GCAGCTACACTGCCTTTGCTGCGGTCCAAGACATACCGATTCCACCTCTGTGGGACCGCT -TCGTCATCGCCTCCTCAATTTGCTCCAGATCCCTCAGTCGATTCGTCTCTTCTTCAGATG -CAAAATTAGACTCAATCAAATAAGGACCATCAACCTGAGATGCCCGTTTATGTCCCGGAA -CGACAATAGCCGGTCCCAAGACTGCAATTTGATCAAGCGTTCCAAGCCAATTCTTCCGCT -TCTCCGCTGTCTTGGCTTCTGCCAGAAATTGAAAACAGTTACCGTAGAAGATATCACCAG -CTATCACTATATTAAGATAAGGTGCATGCCGGAATGATGAGGCATTTGTATCGAAATGGA -CGACGTCGATGCCGAAAAGGCTTGAGTTATCGATTGAGAGTTCCCCATGTGCGGGGAGAG -ACTGGTGAATAACTCGGCTCTCTACGAGGATCAGGCCGTTCGGAAACCATCCTTCCCACT -ATGGGACAGTGTTCGCTAGTGTTTCTTCCATAGCTGTAGCAACGGATGCTGTGGCGACGC -TGATGGCACCCGGAAACTGTTTTCTCATAACCGGGTTACCCAAGTAATGGTATCCTTGCG -CGTGAGGTGTGTCAATGTATTTGAGTTCCTTTCCCGGAGTCGTCTCTTTGATCCAGGATG -AGAGCTTCGGCGTGACGGGTGGATATTGGGGTGTCAACGAGCACGGCTGATGTAGGTCCA -TGTATCAAGGTACAGGAAAAAGGGGACCACCACTCCTTGGTTGTGTCTTGCGAACCTGTT -GTGGCCGCTATGGCCGGCGAGACATACACGTCAACTTGAAAATTGTGTGGCAGGATTGAG -AAAAGAAAGAAAAAATGGAAGAAGGGGAAGCGAGGAGAGAAGGAAGATAGTAACAGAAGA -TGATTACGAGACGTATTTCTGTTCATAGACTGTCACAATTATTCTCAATATACTAATAGC -TCCGGGGGTATAGTGTTCAAGGGTGTCGAAAATTAGAAATCATTCTGCAGATCCTAGATC -AAACGCTGCAACAACATTTTGTTTAATGATCTCGATCAGTCATGGGATAAGCTAATAAGC -TAACTTTGAATACTACGTTGTTCCACATTGAAAAGTTGAAGCATGTTCTATGATGAAGTA -AGGCATTCTAACATACAATGATCTAGGGTTCTATGACAATATTCTATAATACATAGAGCA -AAGCGGTAAACAAAAAGGAGCCAATGACCAACTTTACAAGTTTGCAGCTTTGGCAGGAAC -TCGCAAAATTGACTGTTGTGTCATCGAAGCCACCAAATTCCCTGCCTCATCCCACAGTTT -ACTCTCGCTATATGTGCGGCCATGGCCAGCACGGTGATTGATAATCTCTCTCAGGTGCCA -ATCATTCATTTTCGGCGGCGTGAAGACACGCAGAGCAAAGTCTAAGCTTGAACATGCACC -GGCGTCCTCAAAAGACAAGTGGTTGTGTGTCAACGGCAAAAATGACAAAACGCCATCCAT -AATGAACGCCAAGCTGGCCATTTGCTCTCCTTCAGTCTGTAGGGCGTGTTTGACTCGAAT -CCAGTCAGCCGATGACTTTTCCGTTGGAGGCAATGCCTCCTGGCTGGTTTCAACGGTCTT -GGCCATCCCCATGAGATTTTGAGTGGTGATTCCTTCCGGACAGGGGCGGCCTTCATAGAG -GCTTCGGGAGAGTCCAAACAGAGTGTTGAAGGCTTTGAATTGTGGCTCCGCTATTTTACC -AGCTTTAAGCCAATCTTCCATAAGACGGTCCCAAGGTACACAGTCCCGCCAGTGGGTGTA -ACTCCGTGTAGGCTGACCCGAGAATGTGAGCAGAGACGGCTCATCTCTGTGGAAGTCGGC -TAGGAGCTCCATGCATAGAAGGCGTTTGCCGGTGGACGGGTGCTTCTGCTCAACGGCAAC -GCGATAGCTGACGAAACTCTTCGATTGGCGAAGTTGGACAGGCGTGCATATCAATTTGGC -CTCAGTACTCACTGCTCGGAGGTAATGACCAAACGCCGAATGCAAGTGAAATCCACTCGG -AACGGTCTTACACGCAGCGTGGATTGCAAGAGCGATGGCGTAGCCCCCGTAGGCGATGGG -TGCTGCATTACCCATCCTCTGGGGAAGATGCGTGGAAACATAGTGATCATCAATTTTCTC -TTGCGATATATGATCGGCCAAAGTGGTCGCCATGATGGGTTGAAAGTATTTTGAAGCGTG -GAGGTAGGGGTAAATAAAATCCAGACTTATACAGACACTAACAGTTCCCTCGGCTGGCCG -AGGCCTTCTACGGTTATCAGTCCGAAAATCACAAGCACATGAAATCGAATTTTATTTGAT -GATCTTGGACCCACTGCATCCGGAATGTCGGGCATCATATGCCCCAAATAAAAACCTGGC -CTTTTCATCGGCTTATCTTGATAGAATTGTGATTCTAGACATCAACCCGAATATAGCCGG -AGGCTGTGGACAAAAAAGCTTTTTGCCTTTTCTCTTTTCTATGGCTAATCTCAAGTGATA -TATATGCAAGCCAATGACAAAATTCAAGGGAAATGTTGGAAATAATGAATTCTATCTGAG -TTTCTTCCTTTCAATGACGAAAAAGACACCGGGCTCACGTCGGCTATAAAAATTCAGTAA -TTGAAAATTCGAGTACTCCGTACAACGATAGGTTTTCCCACTCTCTACCAGCGGAAATCC -ACATCGTCCACTTTATACCCCACTTAGTCATACCCCCCAGATGGATTCGAGCCTGAACCG -AGGCTTCAAAATAGGATGTGAGTTCCTCGCGAGTCAAGATGCTGAGAAGGAACCGAAACC -TCGTTCGGAGATCCTCCAATCTGTTAGATGAATTACGCAAAAAAAGAATACGCGTGGTAA -GCACACGCTAGGCGAAGCTGGTGGGTATACTGTGATGACTCAACAACAAAGGGGGTAGGG -GTGTCGACTAGAAGCGGCGAGCCGGGACCCAAGGGCATAGGAGATTCACCGATCATGGAC -CATGGAGACGGGGTAAGCAATGTAATCGTCGGTTGTCGGTGAGATGAAGATAATAGAGCT -GGGGATGATGGAGAGGGCTGTTGCCGTTGTTGTTGTGGCATGGATAACGACTTTGAGCCT -CTTTCTGCGGTAGAATAGGTGATCTGCTCTTGCTTCGTGGTTAAACCGACTTCCTTTGAG -ACATCATTACATGTCTCCAAGGTATCGCGGACATGAGCATGGAGATCTGGGTATGTAGTC -AAGATTCCAGATTCTTAGAGTCCGTCCGAAGGAACTGATCGCTGCGCTCATCTTCTTAAT -GCTTGTTTCCAACTGAGCGTTTCGCGCCTCGTAATTCAAAAGGACGCCCTCTTGGCGGGT -ACGATAAGCACGTTGTGCCAGTCGTATTGGCGCCCGGCGTCGCTCCAACCCCCGCCAGCA -ATTAGCCACATATCGTGGAGAGAAATTGAGAGAGTATTAGTATCAAACAAACATCTGCGA -AAAGAGCTTCTGTTCGGCCATTGTCCTTTCACGGTCTTCGGCGTTTCTTGCGTGCGGGTC -CCTTGGCACTTTGCGAGCTGCGCGGAAGACTTTGCCCTTCTTTTTCACCAAGCTGTTCAC -ATAGACCAAGCCGACTAGTTTAGGTCCTGCCAGGATCGCTATTTGTGATTTCTGGGTTAA -GTATGTTATGCCTGCCTGGTGGGAGCCCTTCCCCGTAATTAAATGTTTGGAGGCTATGTG -ATCCCTGCGAAACGTAGGAAAGCGACGAATCCTCAGAGCAGAAGGTTGAATCTGCAAGTG -CGATATCACGCAACATTGTCCAGAGTGCTAGGAACATGGAGAATTGGGGATTCCGTGCCA -GGTCGGGGCCCCGGGGCCTCGGCGCCCACGACTCATGATTAACGCCGAGTTCTCGGAGTT -CCGACATTGGACATTAGACCCTTCTACGAAGAACTCTGGGAAACACTTCAATTGCTATAC -AAGAGTATTGAAATTTCAACAAATCGGGATTCTAGAGCATACAAATATAACGGTGCAGCA -AATATGAAAAGTATATGATCTCATAGATAGTGAATTCCAACACTGACTGTTTGATATGTA -ACTAGTTCAAGCGAGAGTCCAATGCTGCAGATGAATGATTGCTCCTTTGTATGCATGATC -GTCTCCAGTGAGTCCAGTGCATAGACGAAGATTTTTCATCCATTTTCGTGTTTTTCTCAG -TGTGTTGTATACAAGTGGAAACTTTTTCTATCAATGGCAAAGTTTCGTTCATATTTGATG -CGCCTTAGTAATGGTCAGCTGCTTCGATGGGTAGGATAAATTGATGGGTAGTTTACCCCT -GTCTACGGAGTACTCCGTACCCGTTTGATGGTAAGGAGCTGCCCGCCAAGAAAGGCCCGC -CCCGCCCCTTCCACTTTTCTCGGCAGATGACGAAACCCCGCCAAGTCAGACCTGGAAGTC -ATGACGAAGGCCAAGAGTCAAATATCGCGCTGCATCATGTTTTCACGAAGCTACTAGGCA -TTGGTCCAGGCAACTACATTGTTCCGTAGTCTGTCGCATTTAAGCAGACGTCTTCCCAAA -AGACAAATCACTACATCTTTTACCGAGACCATACCAACACCCACACAGCATGGATCAGAC -CTACAAGCCCAAAGAATCGGCGCCCGTCGTACGCATCATGGTCCTGGAGACCGACATACC -TCACCCAGACACACACTCGGCCCGCGGCTCCTTCGGCCAGATATTACACGAGCACTTTGC -CAAAGCCGGACGCGCCCATCATCCACCCGTAGGCGTTGAGACAGACCAAGTCTTCGTGGT -AACCGAAAAGGGCGGCCGGATACCAAGTGTGAATGAGTTCGACCGCTTCGACGGCCTCCT -CATCACGGGGAGTGTATACGATGCCCATGCGAACAACGAATGGATCCTGCAGCTGCTAGA -CCTCCTCAAAACCCTCTGGATCAAACGTCCCGACTTCCGCTTCGTGGGCATCTGCTTCGG -CCATCAGCTCCTTGCCCGCCTTCTCGGTGGCTCTGTTGCACCTGCGCCCTCGCAGGATTG -GGAACTCGGTCACTGTCGTATCTCGCTGACGCCCGTTGGCCAGCGTCTCTTCCGCGCGCG -CGACGACCACATTTATCTCCACCAGATGCATCAAGATCAAGTCGTCGCGCCACCTACTTC -CGCGTCGGCGGGAACTGACTTGCTCGCCCCGAACACGGAAGTTGCTTGCTGGGGCCGTAG -TGATCATACACCAGTTCAGGGCCTGTATATCCCGAACCGGCTATTTACTACGCAGGCGCA -TCTGGCCTTCGACGAGGATATGGTGAAGCGACAGATTCAGATTAGGGTGGATAGCGGCGG -CATCCAAGACCTCGAACATGCGGATCGGGCGGCTGAGACGGCGCATTTGGAGCATGATGG -TGTTGAAGTTGCTAAGGCTGTACTGCGAGTGTTTGCATCTGACAATGACGAATAAGCGGG -GGACTGCTGGAGTACCTGCACCTTTGACACAGCCATCTACTGTTCCCGCTTGTCTTTCAT -GCATGATATATATAGACGTCTTTAGTAGAGTGATGTTCATTTATTCTCATCATGTCAATG -TTAGTGTAAAGTATGAGAGCTGGCAAAGAATGCCCTAATGCATCGGCCATGTACATAAAA -ATCAACTAGGATGTGACCAGGTATGTATACAACATAGACATACGCATCTCAAATGCCGTG -TAATGACCTCATTGGCTTGTTATAGTACTTTCGCATACTAAATCTCTCGATCCATTTTGC -TGGGTACTAATTCTTAACCTTGGAAGGTACCTGATCCCAAATTCAACATGTGAGACAGCC -AATAGGATTCATCCGACTCGGACTTCTCAGTTACCATGGCAACGAGATTCCAAGATTGAC -CTTTGTAAGATTTCTGTTGCGGCTCCGCCAAGCCGGAGCTATATTTTCAATTACGCTCTG -TAATACATTTACTATGATGTACTTGATACCATATAAATACACCTATTTTCCATGGTCGAA -TCATCAACGTGGTTAATATCGTGGTAAATTCTCATGAAAACGATTGCTTGAACTTATTTG -CCAGATATCCCTAGGATTAGGCCTTCCTAGGTGATCAATCCGTTGTACTTGATGGTATCA -AAAACAGTCGATTACTAGCGCAATCTGATCGGCACGATCTACATGGACAGAATTCTTGCT -AGCTCAGTTCCCACAGCACTATTCCAAACATGACTCCTCCTCCCCACCATCGCCTTGAAG -TAACATTTTCTATATTGTATATGGAAGTCACTTCCTGACAACTGTATTCTTGCCTTGAAA -TCGAGTGACGGTGAAACGATCGCGGAACCAGTATGGCGAGTAAGACGCTGGAAGCGCGTT -TCAAACATTTGTCTGTTCATGATGAAAATGACGGCCACAATAAGATCAATTCATACCACA -AGTCCAAGGTAAGTCACAACATGCATTGATAGTACACTTTGTAACAAGGAACAGCTTGCA -ATTACAAGCACGCGTCCCTCGGACAAAACCCGACCAACAGAAAGCTCCAACTCTACCCTG -TATAAGCAAAAAGGCATAACGGCCGACACTGTCGCTTCCCAGGAGGCCCAAGAAGGACGT -GGAAATTCTCCCCGGCGAGCTCGCAAGCCCCTCTCTGTTATTTCGACGCAGAAGCCCAAT -GAAGTGGCCAAGGGGGAAGAACACCCCATTATTGATACCAAGCAACGAGATACACCCATT -AATCATGTCGTTTCCCAAATATCTCGAGTCAAAGACATACAAGCTTCTCCCCGGGCTCGA -CATCCCCTGTCTGTTGTTATGTTGCAGACATCAGATCCAGCGCCCGAGACAGCAAAACAC -CGTGTTCGTGACGACGAGCCACGAAGCACAGTCAATAATCATGTCGCTTCCAAGGTGGGT -CAAGAAATGGGCGAACGAACTGTTCCCACAGCTCGACGCAACCCCTCTGTTGCTGTATCG -AAAACATCTGGTGCAGCCCTGAAGGGAGAAGAACACTCAGTTCAGCTTATCAAACACCCT -GTCCCCAAGGAACTCCATCTAGGCATGTTCGAAGTTGGCCGACCGCTGGGAAAGGGGAAA -TTCGGACGAGTCTATCTTGCAAGAGAACGGGAGAGTGGCTTTGTTTGCGCGCTGAAGGTA -CTGCACAAAGACGAGATCCGTCAAGGGAGAGTCGAAAAACAAGTGGCTCGGGAAATCGAG -ATTCAGAGCAATTTGCGCCATCCTAATGTCCTACGACTATACGGTCACTTCCAGGATCGG -AAGCGAATAATTTTGATACTTGAGTATGCCGGAAAGGGGGAATTATTTAAGCATTTGCAG -AAAGAGAATCGGTTCCCAGAGTGGAAGGCAGCTCACTACATTGCACAAATGGCCAGTGCT -TTGATCTATCTACACCGCAAACATGTTATTCACCGGGATATCAAGCCTGAGAATATCCTA -GTCGGTCTTCACGGAGAGCTGAAGGTGTCTGATTTTGGTTGGAGTGTGCATGCGCCAAGT -GGTCGCAGGCTCACAATGTGTGGAACGTTGGACTACTTGCCTCCTGAGATGGTGGACCAG -AAGAGATCCACGAAGCCTTATGATGAGAAAGTGGATCTATGGGCCTTGGGGGTTTTGATG -TACGAGTTCCTGGTCGGTAGCGCGCCTTTTGAGGACACGCCAATTATGACAAAGAGACGA -ATCGCCAAGGGGGATATGACGATTCCCTCTTTTGTCAGCCATGAAGCTACAGATCTTATC -AAGAAGGTGAGTCCATTTCCGCAAACAATCTTAATCAATTACTGACAACAATTCAGCTCC -TCGTCCTTGATTCAGATCAGAGAATTTCCTTGGAGAAAGTCCGACAACACCCCTGGATCA -TCACACATTGTGAGAACATTACAAAACAAGACAAGTCTTGAGTGATGGCATGATGGGACA -ATAATGAATATATGATTGTGTTACGGTGTACATAATGTCAACCTAGCTTTCAAGATGAAC -GGTTCAATTGAATCGTCAGGAAGCTCCTACTCCGAGCCCCACGAGATAAACGTTATAAAC -ATTGACCCACTCGTTAAACAAACTAGAGTGAATAACTCTAATCAGCAGTTGGTGAATCAC -AAAAGAATCCATAACTTTCAGTTCGTCACAAAAATATGACTTAAGAACCCTCTATCTCCG -TCTGTTGTTTCTCCAGACGTCCCATAGTGATAATATATAAGTCCTTAACAAAGANNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNAACCTGCCGGTGCTCGCAGCGACGGGGCTTAAGCCCCGGGGGCGGGCA -CGGCCCCGGTTGTGCCTAGTAGCCATTTTCTGCCAGTCCAAATCGGGGCTTGGTTGGCGC -GATGGCTTCTTGAACCTGTCGTAGAGGAAGAATAATTGTAAAAGCTCCATCAATGTCCAA -GGGGTTCCACCTATGGCGTATTATCTACCCGGTACCTCAGTACGCCCTATTTTGATTGGA -CGTTCGTATAGTAGGCCGACCTCGGCCATGATGGCTAGGATTGAAACCAGGACTATATAT -CTAATCAGGCATATTTAAACTTTTGTCTTGCCCTTTCAGGAACTCTGTTTACCAACTATT -CTCTTCTCGCTTTGATAGGATCCTATCCATCATGTTTTTTGGCCCAAAAAACCACCCAGT -GATCTCACATAAAGACATACTCTCTTGGATATTTGATGAGCCTACTTATGATGTCGACAA -GCCTGTAAGTGCTGCGCCGGTGTTAGCGCCCTAAGTTAGATCCCACAAGGATAAGGAACT -GACACACTCCCCATTAGGTTTACTTCGATCCATCAAATTCCTCAAGATATATCTCCCACA -ACCAGGCGAAGACTTTGATTCGTAAACTTGTGGCAGGATTTAAAGAAGCAGGTCTAGAAA -GGGGCGATTGTGTCAATCTACATTCATACAATGATGTACTGGTATACTTACCTATAAAGA -CGAAACCATGCTTACACAGCAACTAGATTTACTACCCTATCCTTTTTCTTGCAGTCATCG -CCAGTGGGGGAGTTTTTGCAGGTTCAAACCCGTCGTACAAGACCTTTGAGCTGTCACATC -ATATGAAAATTGCGAAGGTCAAATTCATCATTTCTGAGCCCCAGATCGTCGAACCCGTTC -TCACAGCGGCCCAGCAAAATGGCATCCCGATGTCCAATGTCTGGGTATTCAATGAGTTTG -ACCCAAGTTTGCCCTCCAATATGAAGTCTTGGCGATCCCTGCTTGATCATGGAGAGAAAG -ATTGGGTAAGATTTGACGATCTACAAACCGCAAAGAATACACCTGCTGCACGGTTCTTCA -GTAGCGGTACTACAGGATTGCCCAAAGCCGCTTCATTGAGTCACTATAATCTAGTGGCTC -AGCATACTTTAGTTTACGGTACACACCCAAGAGGATACGAGGTTCGTTCATACCTCCTAC -CAGAAACATGAAAGGAATATCTACTCACCATGGAAAAGCCAAATCGCCTTATGTCTCTAC -CAAACTTCCATATAGCTGCAGCCCCGCCGACCCATATAACAGCTTTGAAGGATGGTACAA -CGACCTACATCATGCGAAGATTCAATTTGGAAAAATTCATCCATCACATCCAGATCCATG -CAATCACGGATATCTACCTTGTTCCTCCCATTGCAATCGCTATCATCAATTCTCCAATGA -TCCAACAATCCTGTTTCTCGACAATCAGATCTGCTACATGTGGAGCCGCGCCACTCGACA -AGTCACAGCAATCCATGCTCCGGAATAAACTGCCTGCAGGTGTTCCATGCACACAGACCT -ACGGATTGACAGAGATAAGTGGGGCGGGAACAACGTTGCCTTATCCTGAAGACGACGAGA -CTGGTTCGATCGGTAGATTGATTCCTGGCCTTGAAGCAAAGTGAGTCCCCCTCCCTCCCT -GTTTCTCCCTCACAGAATCTGAGCTAACAACCTATAAGACTTGTGGACGATGAAGGAAAA -GAGATCAGTGAATATGAGATACCGGGAGAACTCTGTTTCCGAGGCCCGACAATTATGAAT -GGTTACTTCGACAATCCAGTAGCCAATAAAGAGGCTTTTGACCAGGATGGTTGGTTCAAA -ACTGGTGATCTTGCTTACTGTGCCAAAGGGACAGAGTACTGGTACATTGTGGGTCGGAAG -AAGGAACTCATCAAAGTTCGAGGGTTTCAGGTTGCACCTAAGGAGATAGAGAGCGTGCTT -CTCTCTCATCCAGGGGTCATGGACGTTGCTGTCATTGGGGTTAAACTTACCAAGCAAGAT -GACGAGTTTCCCCGTGCCTACATTGTGAAGAGAGACGGAGTCAGCAGTTCAAGTCTGACA -GAAAATGATGTCAAGATCTTCGCTGCTTCAAGGCTCGCAAAGTACAAGGCATTGACAGGA -GGAGTTCGGTTTGTGGATGCACTACCGAGAAATGCCACTGGAAAGACTTTGAAAATTGCC -CTCAGGGAGAGAGCTGAAACGGAGGGTCGTCAAAAAGTACATCTGTAGTTGGCCATGCAT -GCTCATGCTCTTTTCTTTATCCTGATGTTTGCAGTAGCAGGCTATTCTAGCCTACTAGGG -TAGTTTCAACTCGTTAGAATAAGAATATACAATAATGAGTTTCAGCCTCAAATGAAATAC -ATCTAAATCAATACATTTGTCCTTTGTGTTTTCTGCGACGAAAGTGATGAAACCTGCAGA -CGAAACTTGGAATGCCAGCGCCAAACGATAGTTAGTCTTAACTGACCCTTGGATGAAAGT -TCCAAAAATTACCCAATGAAGGACATGATCTACAGGAACAGGGTTGCGGAATCACTATTT -CCACGCATGTCTGCCCAAGTAGATACCACAATTTATATAGTGTATGGCAGACACAGCTAG -CTAGGACTAATACCCTGATATGCTGCGATTTTCTGACAGCTAGTTTTGCTCCGATGTTAG -CTGAATAGCTATATTGACGGCAAACGACGCAAAGGAAATCATATTTGGGGCAGTGTGCGA -TCAGGGCATTGTTTTCAAAATCTCTGCCAGTCTATTGAGGATTCTTGGAATCGCATTGTT -CGTGGTCCAGCACAAAAAGACGGATTGCGTCTGGTTAGCTGAAGCAGTACGAACTGCATA -CTCTGATTGTCCAACCCTCCAACCCCATTGGCCAGAAGGCAGGCTTTGACTAACCACTTG -CCTGGGAGGATTAACACTGGCTCCAAAAACATTGGGTCGACTTTTAACGAAAGGCGAGCC -AAGGAGAGGAAGAATTTATCGATTTGGTAAATGAAGTCAAAGAGAGAGGTCTTCTTTCTG -GGGAACCAGGATTAAATCTTGATAAGCTTTGATGGAATACTGGCTTTGAGTCGAGTCAAT -CATGATGTCCAAAGACATACCAGTCGAGCAGCTACAGGAATAGCTCATCTTTGGTATCGC -GAATTAACCTATATCGGAATGCTGCATGAGACGTGATATTATATCTTGACTAATATAGTG -AGTTGACAATAGCCGCCCTGTGTAAGCTACTTAGGTAGCCCTGGAGGGTCCTGATGATCA -ATGTGTAGATCGCGTTGATGATGCAACAGCTGGGTAAGTAAAGGTCCGGGATGTGGTCAG -CTATATTGATTAAAAGTGCCAAGGGATGACCCCTGAATTGTCAAAGCATAAATACCAGGA -ATGATCCGGTACCTACATCATGTATATGTATGTTAGGATGTCGTACATACAACATGCAAA -ATGCTTAGATCCTATGCGGATGCCAAGAAGCTAAGGTGGGGCATGATCATCATGATAAGA -TAAGCACTGCACTGCCACAGGGGAATTTCACGGAATTTCTTTCTCTCCTTACATGAGGGA -CATCCTTTCATCCTATTCTACCAATTCACCCCATTCATCTCATTCATCATGGAGGAACAT -CTGCGCCACAGTCTTTCCAATCACCCCCATCCCTTCCAGAGCTTGCATCATCATAGCAGC -TCACCACACCGGACCGACAGGATGACTCCACTTTCTATCAATGAAGGTACCACAACCGCG -CTGGAACACGAGCTTACCACCATGCAACAACACCGTAGTGGCAGCTCCAGTGACGAAGAA -GGTCAACCTCCACCACGTTACACCCCTGAAAGTGATCCGTTCCAGCTTGCCTCAAAGCTG -AAGACCGAGGATGAGATTCGGCAGATGAAAGCGAATACCTCGCGGAAACGTGATTCCACT -ACAACCAAAAGCCGGAAAGTGGGATCCATCGTGAAAGACACCGCTCTTCTTGGAAAGCAG -GCTTTTGTCACCAAGAAGCTGCAGGGATTCTACGAGTCCCAAAATGAAAACATCGAGCGT -ATGCTGAAGCCTGTCGAGGAACACCGACGTGCCGCCCGCGAGTTGAGTGTCGACAACCGT -CTCAAGTACCGTATCGCTGTCTATGGTAGTTTTGCCGCAAACATCATCCTTTCGGTCATT -CAAGTGTACGGTGCTGTGTCCTCTGGATCACTCTCTCTGTTCACCACAATGGCTGACGCC -GTCTTTGACCCCATGTCCAACTTGACTCTCCTGCTGTGCAACAAGGCCGTCAATCGCGTT -GATCCACGCAAGTTTCCGGCTGGAAAGGCCCGTATTGAAACCGCGGGCAATATCTGCTTC -TGCTTTCTTATGACTGCTGTCTCTTTCATTATCATTGCCTTCTCTATCCGTGAACTGGTT -TCGGGCTCCGAGGAAGAGACCCAGTCTTTCCACCTCCCCTCGGTTATTGCCGTTGCCGTC -GCCTTCGCCACCAAATTTGGCCTTTTCCTGTACTGCTGGGCCCTCCGCAACCAGGTCTCG -CAGATTCGCATTCTGTGGGAGGATCACCGCAACGATCTGTTCATCAATGGGTTTGGTATT -CTGACCTCTGTCGGTGGTAGTAAGCTGCGTTGGTGGATTGATCCTATGGGTGCTATTATT -CTGTCCGTGCTAGTCAGTGTGCTGTGGCTGCACTCCGCCTATGGCGAGTTCCAGTTGCTC -GTCGGCGTTACTGCCGACACCAAGATGCAGCAGCTCATCACTTATATATGTATGTGGATT -TCATCTCGGTTTCTCTTAAATATTGATTGCTAACCTTCTTACCAGCCATGACCCACTCTC -CCGTCATCACTGCCATTGACACTGTCCGCGCATACACCTCTGGGCCTCGCCTGTTGGTCG -AGGTGGATGTCGTCATGGACGCGGAGGCGTCTCTACGTGCGACCCATGATGTTGCCGAGG -AGCTCCAGATTAAACTCGAGTCGCTCCCTGATGTTGAGCGTGCTTATGTGCATGTTGACT -ACGAGACCACTCACAAACCTGAGCATTCCCTGAAAAAGGAGCTGTAAGGACTTTGCTTGA -CACATTAATTTGGACCGGCTTTTTTTTTATTTTGGATAGCGTCGCCTCAGGGTTGGTCCA -TTGCTTCAATTTTCTTAGTTATTATTTTATTGAATATCATCACATACATTGTGTTTGCTA -ATGTCTTTTGCTCCCTTTTTACTCCTAAGAACTTTACCTAGGTAGAGTTGACCAATGTCG -AGTTGACTGTCTGAATCATACGACAGCTCAGTGGTCAACTTCCCTAATCTTGTCGAGTCT -AATATCCAGCCTTGAACCCAATGGCCTGCTGGCAAGGATAAGTGTGCTTTTGGGTTTCCC -ATTGTGATGTCACTGATGTTGGAAATGTTTAACTGGGATGTGCTTTCCCTATTCAACGCC -GGGTGCCTTCCCTATTGAGCACCGGAAGCCTTCTCTTCCCAGTGGTACCTTCCTATCAAG -TGACGACTGCCTTCCCTTTTAGGCACCGGATGCCTTCTGCCTTCCCTCATCACTCGTCGC -CTACCCTTCCCCCCATTACCTGGGGGTATAAATACCTGCCCTCGCCCActttctttcttc -tttcttccttcttCAACTCCCCTCATGGATCTTTGCCGCGGCCCAGGTTCGTCTGACACT -AGATGGGACTTCGGTCCTGTCTAAAGGCACGAAGCATGTTGTGGCCCGGACACAGTCCCC -CTACAACCTCTGTGGAAACTGACCGTACTTTCCTGGGTTCTCCATCGACCATCGGGAGTT -TCGTAAACCGTAGACCCCTTCCCCTTCCCCATCAGCCCCTCCGTATGCCTGTCCGTTCTT -GCGTTTGTTTGTTTTTTTGTGCTTCGCGTGGTTTGTTGTTccttccatttccccttcccc -cctgccttgccctacccAGTCGCACGGGTCCTGTGTGCGATACCTGAACAGGACTTGCAC -CTTTACGTGGTGTTCGGGATGAGTAGCCTTTACGGTGAAACGAAGCAAGTCCTCATTATT -TAGTGCAATATAGGATTCATCTACCATTCAAAACATACTTGTTCTCATGTTCCCTCGTAG -CGAATCACAGTAAACATTTAACAAGCCCTATCCACCTCATGTTTGACTTGAGAGTTGGAG -ATGATCACATCACACCGTGGGTCCTGAATATCGTCTCATGTATCACACTCCAGCGCAAAG -CTTTGATCAATATTGCATGCATCGTAATTCTAATCTGATATTCTCGTTCTAACCTAGCTA -TATAAGTACAGATACATGGTCACATGAAACTTCCTACCCCAGCGATGGCACCTGCGTACA -CACCACAGTCAGTGTATGCTGTAACTTAACGTCTGTAGCAAAGTTGTATTCGACCGTCCA -TCCAAATTTTTGTCACCCATCCTCTCAGCATCGTGTACTCGACGATAGAGACCCTGGCAG -ATTTGAGATAAAATCTACACCTGTGAGGACCCTGTAGGATAGTTACTCTCAGACTTGCTG -CTATTCAAATGATGTAGCCGCGAGCCTACGTCTCCGCGACTAATCGGCACAGTGTCTGAA -GTAGCTAGTCGATTGATTTCGGAATCTTATAGGGCTTCATGCCAGCCTGGAATTCTCAAA -AGAACAGATCCGAGAGTCAGGAGCATAGAAGGATCCTGTGCGCGAGATCTCCTAGTCCAA -TTTGAGAATCGAGAATCAGTATGTGACGGATCGAGCTATGCAGAATGGGTGTTTGACATG -CTAACGCAATACAAGGTTCGCGTTCATCTTCCGGGTCGTCGTGCACTTCGCTGCCTTCCT -GACTCGCGTTCTCATCGTCCATCTCTGTCTTGTTATAATCGGAACGCTCGTCTTCTCGGT -AGACCGACGGCAGAAAGGAATTGGGGTTGTCACGATCGGTCACTATGGGGTGTGACCCGA -CTGAGTTCGGCCAAATGAATCCCCCCATTGAATGCGAGGCTTCTATATCGTCGTTCGGTA -GTGATTGGAGGGGGGTGAGCTGGAGGAGATACTGTGACTCGAGATGAAGGATGGAGAATA -CAGGGGGTAAGCTTGTGGGAAGGTGGTTTACCCTTTTGCCTCGATCGTCCGTACGGGTAT -TTCAGGCCATGTTGAGCATAGCGAGAACAGCCGTGTTGCTCTTCGGTGCTGCACACCCTC -GATGCTGCACAGGGGTTGCATGATCTGTGAAATAGGAGTCGTTGCAGTTGAGTAATCCCC -CTCTAGGATGATGCTTCTGTCTCTGAATATCTGTGGAAGTCATAAGCATGATTTTGGTGA -AGATCAAAGCGAATGGAATACGCAGGTTGTTCTTTCTCTCCTTGGCCTCGGAATGCCCGC -TATGTGCCCCGTACATATATCTTCTATGAAATCGGGTCCCCTAACCGAAGGTCCGTGTAT -TATATGAAACATGCAACTATTTAATGTGCAACAAGAAACTACCATTAGAAGCACCACAAG -TTAGATTCTACAGCATTTTATATCATCCCATCAATTACCAAAACGAAGGGTCTATCATTG -ATCTCATTCATGAAAACGCCCGAATCCGGTGGTATCTCACAAAAGACCGTGTAACTGAAA -CTCCAGCACGATGACAATTCAGAAGAAAGAAGTGTCTAGGACGAACAAAGGGACAGAATC -TAGGGAAAGAGAAGAGAAACGAGGTCCCATACAATTGTGGGCCAAGTACAGAAGAGGTTG -AATGATATTGTTTACAACAATGAACGAGGACTAAGAAAAACAAAAATGATTCCAGAGGGA -AAAGAAATCTAAGCAGACTGTTCAAAGAGCGCGATGGTGGAGCAGTAGTTGCTCTTGTTC -GGCTTTCCAGGAACCAGACCCTTGCCATTGGACGAGGTAACCATGTTGACTGCGGCCTTG -GTGCTGAGACCGTACAGCTTAACATCCGAGCAGTCGACCTCGACCATGTTATCCTGGCAG -TCCTCAGAGTTGAGGCAGGATTGGGCGTAGTTCTCGAAGAAGCTGTAGAGACCGGCGCCG -TAGACAAAGACGTCGGAGGAACCGAGGACACGCAGACCCCAGGCCTTGCGGCAGGAGTTT -GTTTTGCAGTTGGAGAAGTCAGGGTCGTTCCAGGCGCGTTGCGGGGTGAAGGGAGTCAGC -GAAGTCGGATTGGACTGGTAGTAGGGGGTCTCAGTTTGGATCAGGCCCATGAAGACGTTC -TTAGCACTGGAGACCTGGTAGTTGTACAGCTGGTGATGCTCCGAGGAGGTACCCCACAGC -CAGACAGGGTTTTGCGACTCGATGAGCACACCACGGCCGGTGTAGACATTGATCTGGTTG -TGGTCCTTGAGATCCAGCTCGTGATCGGCGGTCCAGAACCAGGTGTTCTCGAGGTATGCG -CTACCCTTTTGAGTAATATGCAGGAGCATGAAGGCGGCCATACACTCCTCCTTAGGGGTA -GTCTTCTCCTTAGGGGTCTTAGGGCAAGTGTCGCTCTGCAGCTCAGTGCCAGCAGCGCCA -CCAATGCGGAAGTGAACGTCCCACATACCAGCAGAACCTTGGGAAGAGCCTGCAACGTTC -CACTCCATGAGAATAGCACCAGCGGCGGGACCCTTGGTGGTGATGATGAGATCGCTCATC -TCGACAGAGCCAGTATCACCCTTTTGGCCGACCTGGAAGACAGGAACGGGCTTATCTTGG -TTGGCAAACTTCGGGCCATGTGCCATGAGCATGGGCCAGATCTCACCAGTGATCTTGATA -TTCTTGGGGACCTTGATAGTGGAGGTAATCACATAGGCACCATGATCAAAGTAGATGACC -TGACCATCCTTATAGCTGTCAAAGATCTTCTGGATGGCCGCAGTATCGTCAGTGGTTCCA -TCACCCTTGGCGCCAGCCGACTTGACACTGATAAACGAGGAGGCTGGAAGTTTCTCGTAA -AGAGGCTTGGAGCGCTCAAAGATGGCACCCCCCTTGAGCAGGCTAGCGGGCTTGGGAGCG -GTCTTCATGGTGCTCTGTGTACGGGCAGAGGCAACAGCGTGCGAGGCGCCGCAAGTGCCA -CTACCAGACTGGGTTCCTTGGCTTACTTCAGAGCCGCTGTCAGTACTTTGAGCAGACGCA -GCAGTAGAGGTGACAGGTACAGCAGGCTGCTGTGCAGAGACCACACTGGAGGGGAAAGAA -TGACCAGGCACCGCGGAATTGACAGGGGCCTCATTAGAAGGAGTAGCTGCGGGAGTGGGT -TTAGCGACCTCGGCGGAAGAAGAAGGCTTGGGGGAGCTATCAGTATCGCTACTGGCACCA -GCACCAGGGGCAGGCGAAGGAGCGGCAGCGGATCCAGCTGAGGCAGTCTTTGCGGGAGCA -GGAGCCTCGCCGCCCTCCAGGCTAGCACTAGGAAGCCCTTGGGAAGCGGCACAAGCAAGG -ACTGTCTCTGTGACTGTCTCAACAACGGTGACAGTCTCAGGCTGAGCCTCGCTGGCACGC -TTGTTGATAGTCTTAGACGGGGTATAAGTGTTACCCTGTACCCAGTTGTCGACAACAGAG -CCACCTGCCAGGATGGTCTTGCCATCAACACCAGCCACAGCAACCTTGGAGCCAGAGAAG -TCCACGTTATCCAAAATGAGATCACCACCGCCAACGGGGATGCTCTTCTGGGTCCAGGCA -GTGACAATACCAGTGGGGGTGTTGGTCAGCTTGCTGTCCAGCATCAAAACAGAGCCAACG -GTCTGGTTGGAGGGCGAAGTAGACATATTCAGACCAACCTCGCAGTTGTTGATCTTCACT -GACTTGAAGGTCCAGGCCCAGTTCCAGTTCATGTAGATGGCAGTCTGGCAGTCATTGAAG -ATCAAGTTGCGGGTAGTGAACTGCTGGTTACCGAGGAACATGCCGTAGTTTCCGCCGTTG -AAAGTCAAGTCGGACATGAAACCACCAGACCCGTTGTCCATGAAGATACCCTGCTGCTTG -TTGGCAGGACCACCACCCTTGACCATCTCAAAGCGGATGTTTTGCAGACTCGTAGCCTGG -CCGACCTGCCAGTGGATACCAGCCCCAACGCTAGCGGGCATGGCTGTTAGGTCAATAACC -AAGTTTCGAATGGCACGGAAGAAATTGTTTTGGTTGGTAAACCAATTGTCGCCGTTGTCC -TCGTAAGGATCGGCGTCGATCACAGCCATACCCGCGAACTTGGGGGAGGCCTTGATGATA -GGTAATTCGACAGCGTCACCAACAATTTGGGTATAGTAGTACTGGGTGATAGGCTTTGAG -ACAACATAGGTACCAGGCGGGAAGTAGACGATAGCAGGGGTGGTGGTAGAGGAATCACAG -CCCTTGCCACAGCGGCCTCCAGAAGACATGGCCTTGTTGATAGCGGCCGTGTCGTCAGTT -GAGCCATCACCTATGATATAGTCAGCAGGTTGCCCGAGGGTATAGGGTTTGGACGAATGA -CTGACCTTTGGCACCGTAGTCCTTTACATTTCGGAAGATCTTGTAGTCAGGCGTTTTGGC -AAAGGGGATAACACCCTGGCGTTCAATATTGGCGACCCAGTAGCTTGAGGCGGCTCTTTT -TTCGGTTGTTGCCAACACGTTGTTGACTGGAGATACTGGAACGGCGACTGGCACGGCTGC -GACCAAAGAGGCTGGCAATCCGAGCAAAAGGATGAGATGGGCAACAAAGATCGCGATGGA -GGTATGTAGAACCGCCATGTTGGAAATGGGCAACGCTGGACCAGCAGCGTAGACAATGTC -TCAAATAGGACTAAAAATAAAAGATAATTCAGCAACCAGTGCAGAAATAGAAATAAGAAT -CGAGTCCAGCGCTTGACCAGCAGCGACACGCAGAGAAGATTTAAAGAGTCAATCTTCAGA -TGCACTTTCCCAGAGAAAGACAGAACAATAGCGCaaagaaaagaagagaatgaatagaaa -agaaagatcagacacaaaagaCACAACAGCGATGGGAATCCTACCTATAAGAAAGCCCTG -GAGGCGTTTGCACACTTCTCCTGCAGCCAATCACACACACCACAACCCTGAAGTCCACCA -ACCCACCAATCAGAAGCTCGAAGCTGGCGACAACAAAGCTTGAAACGTGATGGCCAGGAG -TTGAAACGCTGAAGGAGATCGGTGTTGCAGATTGACTCGTCTCAATAGCGCCGGACAGTG -TTTCGAATAGTGTGGCCGTGAATAGGCTATTTTCAATTCGGTGTAAACAGAAATGAGACG -GGTAAGGGTCTACGCGCTATATAACCAGATTCATCCCCAAACAGTGCTGGATCATCCATT -CCAATCATCTGAGCTGGGAGCATTGCAAGCGCGTATCACTATGCCCTTGACCCGGGTGGA -TCCCCCTTTGCAGTACGCATTACTCCGTATAGAATATTCGATAGTGCCAATGATAGTTCA -CGATCCCCTAGATTTGTCATATTTTGGAGATCAAACAGTTTCTTCTGTTTGAACCCCCAT -GTCTGTGTGATGTGCATAGCACCAACCAGACTCGACATATCGAGTCAATGTGTATATTCC -CATATTGTCAGGTCAAGTGTCCCAAGGTCAGGAGTGGCGATGTTATGGATGGACATTCCG -CAGAGCTTTGAAGCATTCCTAATGACGGAGGGGGAAAAAAGTCAAGTCATAACATGACCA -GGTCACTATGGAATTCCCCAGATGAAGGGTGAAAGCAGAGTAGGGGAAAAGTGTCAAAAG -CCAAGCTTTGTTTCCCCCTGAGGAAACCTGAAGGGACCGGAGACCTCGGGATGGGCTTGC -CCTTCTGTCTCGCGTCTTATTGCCCTCATGGCCCTGGTTGCCCTTGGAAAGGATTACACA -CCATTCAAACCCCCCGGGGCCCAGGTGGCGATGTTCGCTTTTTAACTACATATGTTTTGT -TCCAGATTCCAAAAATAACCCTTAGAAGCCTTAGAAGCGCCCATCCCAGACATAGTCTGT -GCAACACACCCGACGCAGAGCCTCATGTATAATTATAAAGTACAACACAACTTGTATAAG -CTGGCAAGGCTACGTTTAAATTTTCTACTTGAATTTTTCTACTCAAATTTTTATACGCCA -CGCGTCCATAGTGGACACGGTACGGACCGTGCTCTGGAACAGCTGGCGTGGCTATGATGT -GGCGTGTAAACCAACCCCATCAATACCTAACATCTCGGTACGAATGAGACGGTGGAGAAC -ACGCGCTAGTCTGCGCATTCCCTCGGTCAAGTGTCGTGGCTCTTCCCATGCGAAACACAA -ACGTAGATGATCCGCAAATCCCTCAGTCAAGTGATGGTCTGGCACCTGGAATAGGTCACC -CGCACACAAACGCAGATTTTCTTCGCTCTGCGCTCGCTTGACTACGTCGGCGGCGAGTAA -CGGCGCGGGTAGACCAATCCACAGGAAGTATCCTCCGGCGGCGGCGGCGGGTGCGGGGAC -TGTGACACCCAGCGGGACGAGATGCTCCTGGATTGCTGATAGCAGGGTGTGGTAGCGCTC -GGCGTATTTTGGTTGGAGGACGTCGCGGATGTGGTTATGGATGGTGCCTGTGGGGAACAG -CTGGTCAATGATAGCGGCGCATAGATGGGAAGGAGCGCCGCCCGATCGGGAAGAGCCTCT -AAGATATCGGGGTTAGTCTGAATATTCCGTATCCGAGAAAGAAGGGTGGTCAGTCAAGTC -ATCCCGAGCGGTGCCCCGTACGTTTGTGATAGTCCATAGGCAACCTTTTCCGACGCTTCC -GCCCAGCCGGTGCGAGCCCCGGGCCCGATCAGCTTGCTGAAACTGCCGTTGCTTAGTGCG -TGTCCCCATTCATCCTGTGGGCCACCGTCCAAATAACGGTCCACGTCGACAATGCGTGGA -AGACATGCGCGGTCGCCGAAATTCTTCTCTCCTTCTGGCTTGGGGGACCACTGAAGGAAA -TCGTATACATCATCCGTGATGATCAGCGCATCGTACTGACGGGCGAGTCGAACGAGAGCC -TCACGTCTACGCAATGACATAATTTTCCCGGATGGATTCGCAAAAGTCGGCACGGCGTAG -ATAACGTGCTTGTAGATTTTGCGCCAGGGCCTGGGTGGTTTCAATTTCTGCAGGGAAAGA -GGGGGGGGGGCAATGTCAGTCGGGGACTCAATAACCCATGGCACAGTGGCATGATACAGT -GACGGTCATGTCAGCTCAGAATAAGTCATGAGCCTAGGATTATCCTTACCGGCTCAGTAT -TACCAGCGCGCAGGGCGACATCTTCCGCAGCACGTAGCCCGCTCTCCAGCACTGCTATGT -CTACGCCCTCGTTATCTTCGGGTATTCCGCGTAGGCGCCCGGCGAAGCCGGCATCATCCA -TAACCCGCGCAGCCAGGTGATACGTTGGTGCGACCATCCATACATTGCGCGTGTAGACCG -GGTCTGTAAAGGTTTGCAGCACGCATGCTAGGTTCTGACTGGCCCCGCCGGTGATGCAGA -TGCGTTGCGATGAGACGGGCTCACGCGGCTGGTAGAAATCGCTCAGCCATGCGGCTATGT -TGATGCGCAATGGCCCATATCCTTCATCCGGGCCGTAGAGGAGGGCTGAGTTTGCGATCG -AGGGATTGGACAGTACGGCCGTGGCGGAGTGGCGGAGGTTGGGGGCGGGGAGGAGCGCGG -GGTTGGGCCAGCCGATTTGGAGGTCAATAACTTGCTCGGGGGAAATGGTAATGGTCATGA -TGAGGATATCAATAGTCCGGGAGATTGAATATGTAATGTGTACTTATGTACAGGTTAGAA -TTAGTCTACTGCATCAGAGCTTGGCTTAGGTTAAGATGTGGCGAGGTGGGCACCGGTGCA -TCCGAGATCGGGAAGTAAATGGTTCAGGGGGTCACCAGGTACAGGCACAGCATTGGCTAC -AAGAGATCTAGTATATACATTGTACAGACATAGGAGTTTTGTACCTGAAGTACATATACT -CCATATCAATTCCTGTAATGTCAGCCTCGGGatatataaatatatatatatTGCGCCATG -TCATCAACAGCTGCCTTGATACCAGGTATGCACTTGTATGCCCATTTGGGATAATCTGAT -CGTGTTTTATATATAAACTACTATTGATCGGAACCGTCTTGTTAGTTTCGACTTCCCATC -TTCTGTGCCATATAATATAACATCTAATATAGCAAATGCATCGTGTCTGATTTTTCTTTT -TTTCAATGATTTGTCCGTACTTCTATAAATGTGATTGCCTATATAAGATTGTATTTTATG -AGGGTCCCACATCTAAGACATGAGCTTACAAGGTGCTTACATGTAGTTCTATTATTGGCA -ATGTGCTCCTATTCTCTGTATTATACAAATGTATTGTAATATATTGCATATTATTTATTT -TTTTAACTTAACTTCTCTCTCTTCCCCCCCCCTAATTCTTCCAAATTTCTTTTGCTCAGC -TTAGCCTCATTCCCTGTGAGGTCAAGTTTCCCTCATTCTTCCAATCCCTCCTGCCGATCC -ACATTCCGCCCCCCTAATCCTCCCACCTGATCCCATGCGGGATTGACCTTGTCTACCGGG -TCTTCCCAACTTATCCTCACAACATATCTATCAACATGTTGTCAGCTCCCGTTAAGTCGG -CGAAGCGGATCTCGTCTCTGTTTTCACTGGGGTCCAAAGATCGGGATTCATCTTCAACCG -TGTCTTCGCCAGTGAACTCAGAAAACATCGACGGCCGACGCCGGAGTAGCTCAAAACCAA -CTCGTCATGTCTCGACTCCCAACCCTGTCTTTTCCTCGGAGCCCCGAACCATGCCCGATA -GCCTCGACATGGAGAGCCTACCTCCGCCACCTTCACTGCTGGCGGTGAACCAAGATTTGG -CCAACAGCGCTCCCAGTTCACCTGTTGGCCGTCCGCAGAGCAGAGGTCGTGACGTGAGCA -GGCCCTCCAGCTCCGCAGGGTTGGCCATCCCCGGATCGACGCCAGACTCCCGCCCTAGCA -CACCATCGAAGCGCAGAAGTTGGATGCCGGGTCGTTCGCGCGCCAGCTCGATTGATAAGC -GACCAACACCCATGCTCCCGGCTGCTTGGATCGCAGGGCTGGACCAGAAGGTTGTTTATG -ATCTGGGGCCATTGTCTAGAGGAGAGCAGGTATGCATACAGCCAAGTTCTGTGTTGAAGG -GCCTTTCAAATTAATACTTTCGTTACTTTCCAGATTCAAGAACTGTGGAATGAACAAGGT -GACACTTATGTGTACATGTTCCCCCAGAATACCGGACGCGCCCCATCCTTCAAAGTCGAT -TCCACCATATTTGCCGAATCCCCTTCCCTCACCTATCTAGCCCGCGGCACTGATGGCAAG -AGCGGCCTCGAGCAGCCAACCCGAAATTTATCCCTCAACCCAATCTCTCCGCCCATGTCG -CCGCTGGACCGTCAGCCCGAAACCGACAATGATAACGACAGTGCGGGAAGCCAAAGAATG -GCATTCGAAGATTCCCCCGAGGAAGTGCAGGAACTCCATCTTTATCTCCCCATTCCGCTC -AACTGCGATGTGTCCAACCCGCAAGCAAGGCTCACTCCAGATGACATAGAGACTCTTCTT -TTGTTCCGCAACCTTTTCGCATTCCTGCTCGGCCAGTCCTTGATTGCCTCCCCCAAATCA -TCCACTCTTTTTTCTATCTTCATGGACGTGGCAGTCCTTTTGGCACGTTTTGAGTTCTCC -AACTTGGACGCGTCCAACTTCGGAGAGACTGCTACCTCCAGTTTCAGCAACTACTGTGAA -GAACTTCGCCTTGCAGATGTCCGTCGAAGCAGGGAGAAGACGATCGAGGCCATAGTCTTG -GGTGAACGTCTGCGCTACTATCCGCTGTATGTCGAGGGCTTTGTCCATGGTGTTGGAAAG -CTCGATGAGCTCAAGCAATTGCGGAGCCCCAAATATACCTTCATTCACCCTATCACACAG -AAACGCATGGAGCGTGCATTCATTGATCTTGACACCCGTCTGCGCGGTCTCTACGGCAAG -CTCGAAGATTTCGATTTCCCTTCTGTTTTCTCGGGTATTGCCAACTCAACTACTTCGGAA -GAAAGCAAGGTGGTGCGCTTCAAGAACTGGAAGACAGGATTCCAAGAATTCCGCCGCTTT -ACGATCCAGTACTACCGACAGAAGTACGGCTCGTGGCCGCCCAAGGCTCGGTCAAAGAAG -AACGAGTTTGAGGAGAGCGGACTTAACCGCCTCCTCCTCTTGGATTTGTATCACGACTTT -ACCGATCTATACGACCTCATGGTCGACCGCACGTCCCTGACGACACGCACAATTGATATC -TCGGGTGCGGGCAATGACCCAGTAGATTCGTCTGATCCGAAGCAAATGACCGTTCGTGCT -TTGCGACAGGTGCTCTCCGAGTATGACCGCAGTACACCTCCAGTACTTCCTCCTATTCCA -TTCGATATTCCCCAGATGCCGTCGCTGCAGCCCTTGCATCGGAAACCGCTCGACGCTAAG -AAGGAGGCTAAGCAGAGTGCCAAGAAGCTCAAGAACTCCGACATCAACGCTGTCCTGATG -GGATCCTATACCAGGGAGGCCATGCGGCCCACTCCTTTCATTGAGAGCTTCATGCAATTC -GAGCGTCGCTCTGCGCACGGCAAGAATGTTAATGACATCACCGATCTCCGATGTGGCCAG -TGGATCTTCCTATACGCCGTGATTCAAGCCCTTCCCATGCTGGTGGTGGATGTCCCTGAT -GTTCACTTCAACGATGGCGTGGAGTATTTCCTCTGCATTGCCCCACGTGGTGGTGCTCCC -TGGATTCACAACGACACCAAGACTGCTCGCAGCTGGTTCGGAGTCGCAGGTGGTGCTGGT -GTTGTCAGCTTGCCATCCGATGTAGTCATCAACGGAGTGGAAGGTGTATACCGTCGCAGT -CATTGCTGGCAAGTCGCCGAGCAGTGGGCCGAGGCGGGCTCTCTTCTTGATCCACCCATG -ATGGAAGACGCTACATACGACGACGAGGAATCCTCTATATCATCACCTTACCCAGGACAT -CAATCCTCGGCTGGATCTTCTTCCGAACCTTACCCGAACTCTATGATGGTTCCCGGCGGG -CTCACCCCTCCCCCTCCAGCCATTCCACGAACCCGCTCGCCAGCTGCTGCCCAGCGATCT -GAGCATCGCCACTCATTCTACCCTGGACTGGAGGCCTTGCCCCTTCCTGCGGGTATTGCC -CCCATTGAGCCTCCCACACGCCCGATTAGTCGATTTAACCCGAACATGAGTTTCGACGAC -ATTCTCAAGGAAGTCCCGAAGAAGGACAACAAGAAGAAGCACTAAATAATCATTGTCTTG -TGTCTTTTCTTTATGATTAGATTCCAAGGCACTACGATACGGCTTGCATATTCAGAATAC -ACACACGTTTTGTGACTGACTTTTCATACTCTGGACTGTTTCATTTCTCTGTTGATAGGC -GCATGTGATGAAATACTGGGCTGGACCGGTGCACAGGGAAGCGAAAACGAAAAAGAGGTT -TTTAATTCATGGAGCATGGAGGAGGTACATCATAATTATCAGCATATCGGCATCAACATG -TCCGGAGGGGCTTTTCTTTGTTCACCTTGTCATTTTAATTTTTCTGCTTGGATTCCCCCC -TGTAGTTTTTGATAGAAATATAGATTCTAATATCTGTACATCTAGTTTCTTTGACAAagt -gtagtgtagtgtatttttcaatgtgtagtatggtatatatagtgtgtagtgtgtGCCCAG -GTGCCCATGTGTTTGTTTTGGATCCATGGTTCCAGCCACGAGTTACCATGTTCTTATTTA -AAGCCTCAAACCTCTAACCGCGAGACCTGGATAGATTTATCTGTTTGACTCTGTACTCCG -TAAATCAATGTTTCTTCAAAATACCCAATTGTGAAACCATGTATATGCCGAGGCCAAACC -CACTAAAGCAGGCAATACGAGAAGGATCAGCCACACAGCCGCTTCTTCCTTTATCGCCGT -CGGTTGCAGAATCCCCATGGAATTGTAAGCTAATGAAAGGACCGTCACGCAGATCTCTTC -CAGACCAGGGAAAGGGCCATCTGCTGTCACTTGAGCTTGATTCAAGTCCAGCTTTGTCCA -ATTTCTCCCAACTCTCCATCCCCGAGTCCTTTCTTCATCCTACCGGCGGAAAAGGAAAAA -AAAAGTCAAAGAACCTCGATAACTCAACAAAACCACCGAAAATGCATTAACGATACCTAA -TACACAACCATCCATACTCTAATCATCGCAATACTCTCATCGCCCCAACCTCTACCTACC -AACCATGCCTCCTTCCCGCTCCAGCGTAACGAGCGACAGCCATGAGCGTCTTGAACCACG -ACTCCTGCGCGCGGCAGAATCAAACGACGTCGACGCCCTCAAAAAAACAATAGCCCTCGC -ACAAGAATATAACCAATTAACTGACAACTTCCTTCGCATCGGATTGATGCGCAGCGCAGA -ACGTGCTTGCGTCGCCGCAACGGAATGTCTCCTCACCCTCGGCGCAAAAACAGATGTCCC -TTCAAATCGCGCCTCACCACTTCTCCGCGCTGTAGAGCGCGACCATTACGAAATAGTCCG -TCTCCTTCTAGACCACAATGCTAGTCCAGACTCTGCCGATAAAGAGGGTCGCACGGCCCT -CATGACTGCCGCGTGGAAGAATCATGCTGATATCCTGCATCTGCTCATTATGCGCGGTGC -GGATGTTAAAAAGCGTGATCTGCGCCGTCGGCATGCATTGCATAACCTCGCGGCTGATAA -GGCTTGTCGGTGGGGGTGGGGCGATGAGATTGTGAGGTTGTTGTTGAAGACAGAATGCCC -TCTCGATGGACTTGAGGGGCAGGATGAGCTTGGGCGGACGCCATTACATTGGGCTTGCGC -GTCGGGGAATTGGAGGTTAGCGGAAATGTTGCTGACGAGGACTGGTGAGGTGAATGGGGA -GGTGCAGATGCCTGCGAAGGTCGATGCTGTTGAGTTGCGTGGTAAGACCGCGTTGCATGT -TGCTACGGCGCATGACCGGGTAGATATCGTGCAGCTGCTGATTGCACACAAGGCGGCTGT -GAATGCTGCTAGTGATGGTGGATGGACGCCGCTTCATAATGCTTGTGATAAGGGTTGTGA -GGATATTGTGCGGATTTTGTTGAGCGAAGGCGCGCATATCAATGCGCAGCTCTTAAATGG -AATTACGCCGCTGCATCTTGCTGCCCAGGGGGGTCATAGTGAGGTTGTGAAATGTCTTCT -CGAGAGGCCGGATCTGAAGCGAAGGGTTAGGGATAATTTTGGCAGTACGCCATTTCTGCG -CGCTGCGCAATTCAAACGAAAGGATATTGTGTTAATGCTTGCGCCGTTTAACAATGTTGA -GAGTCTTTCTGATGATATTAAGGAGGCCTGTGCCGCGTTCGATGCTACTGTTGTTGATTT -TGGCAACTTTCATAATGAGAATCGGGTTAAGCGCATGTCAGTGTTTAATTTGCTTTATGG -GAGGGACCGGGAGAATCCTAGGAAACAGGCTGTTACCACTGTGCCAGCTGAGAGTCGTGC -GACTGATTTCCGGTGGATTCATTTGCCGGCGAATAATATGGCATGGGTGGAAGCCTTGCT -CACCAAGTCGTTTATTGAGGAGGGAGCACATGATGTTGATGGTTTTAAGGGGCTGGAGAA -GTCTTTTAACTATCAGCATCGGGGTCAAAGGACTCACTCGCACTTTATGCGGCCGTTATG -TCAGAATACACCGCGGACGATGCTACGGCGATATGAAGAAGAGACTTCCGAGGAGCCAGC -GTCTGATCAGGGACAGGCGAAAGCCATGGATAGCAACGCCCGCAAGCAGAAAGGCACAGG -AAGCAAGCCTGACAAGGCAGATTCTTATTTCCCTGACGAAGGGACAGGGCCCCATGGAAA -AAAGAGCAAAAACAATCAGAAGCGTGGCAAGTCAGGCAGCGCACCGGGAACTCCCAAGCA -GCCCGAGACGAAGGGAAAGTCACTGTGGGGACAGAGTGAGAAACAGGCGAGATCATCACC -TCTGTCATCATCAACCTTTTGTAAAGATCCCCATCCGTTGGTTTCTGCTTGTAATGTCTG -CGTATTCATGCCATATCTGCACTTCGAGACCGCGGAGCGCAGACAACAGATGCAGGATGC -AATTTCCCGAGCAGAAACGCTGGATTTTGTATCTAATGGGATGAAGCGAACGCGCACAAG -AGATGTGCTGCTCATTGACGCCCATCTATCATCCTCAACGACCTCCCTCCATGTCCGCCG -AACACTGGACCAGTTTTTCTACCCAAATATTGATACCAAGAGCCGCGATCAGGATCAAGT -GGTGTACCGGTACCAAACAAAAAGCCCCGGTATGGGGGTAGACCCCAAGATTTTCATGGT -TGATCAGCTGTGGATGTGGGTTTTAGGCACGAATTTAATCGTAACAGCCTTCCCACAGAG -ATGGGACCAGCCCAAGAATGACCCGCTCAACGTGCTAGATGGGATTATCGAGGATATCAA -TTCCAAAACTCGAGATCCTGTCAAATCTGTTTATGATTTAGCCATGATAATCACAAATCG -ATGCTCCGGGGTCTTTGATCGACACCGCCTGGGTGACGAGGAGTATCAGTTCCTAGATAT -GTTCGAGTCCTCTATCGGTATAGCCACGGACAAAGAGACTGTGCTCTTTAATCAATTCAA -CCACGCATCCGGACAGGCATCGGACTGGCTGAAAAGCCACCGCAAACTGAACGGCTCCTT -CAGCTCTCCGCCAAAGTCGGGTAAGGGGACAGCTGACGATCAAAGCAGCAACTACTGCGA -CGAGGACGGCCAGCCACTATTTGTCGATCGACTCCTGGACATCGGACAGGAAACTAATCT -CCTAGCCGAGACAAAGGACATTCGAGACGAGCTGAACATGATCAGGACGGTGCTGGAACA -CCAAAACAATGTACTGCTTGACTTCCAAGAAGTGATCTGCGAGACCTATCAAGGCCAGCA -CCGATCACAATTCGAGGTCAAGAAACGGTTCAAAGACCAGCAGCGCATGATTGACATGCA -CTTGAAGGACATTGACCGCATGGACAAACAGGCTGAGCGCATCTACCACTCTATCACGGA -TCTACTCGATCTCAAGCAAAAACACGCCAATGCCTTTGAGGCACGCTTTGCCCGTGATCA -GGCTGCCGGTACAGCCCGTCAGGGAAAGACGATCATGGTCTTCACTATTGTTACCATCAT -CTTCCTGCCGTTGTCGTTTATCGCCTCCATCTTCACGATCAACCTGAAAGAATTCGAGAA -CTTGAATCTCGGTTATGTGGCAAAGTATACCTTTGGTATCGGCTTTGCAATCTCCATCCC -ACTTGTCTGGCTCGCTCTCACGGTCGACGATATTGGTGGTTTCTTCCGCGTGGGCAGCCG -GCGCTGGCGTTCTAACCGGAAGAAGGTGCTTGCGAAGAGTGAATGTGGCGACGAAACGCT -CCAGGCCTTGGAGATGGAGAAAATGATAAGTTTGTCGCGCATGCGGAGAAGCATTGATGT -TGGATTCGGTGGTCATGTTTTACCTGTTTCATCACGCGGCACGGGGGAGTCGAGAAGGCC -TGAACTATATGTCGGTGGATTAGGCAATGGCACTGTGCGGAAAAGTTACGATCTGCGGTC -TAGTCAAGACTACAGATCACGATGATATTATTTACGGCATTCTAATGATTGGATATTTCG -CTTTGTCGATTATCATTTTGCCGTCATTTTGCGGTTACGATCTTTGGGCAAAAGTTGGAT -ATACCCGTCTCGATATAGCGCTGAAAGTGATCAGACTTCTGATAAGAATTTAAGAAGTTT -TGGAGGAGAATTGGAGGTCTTTCAATTAAGTAAATCGGCTAGCTCTAACCCACAGGACTT -GATATCTATTCAAGCTGCGTCGCACCCAAATACTCCTGATTCCTACCTTTCCCATTCCCA -AGGATCTCAGCGGGGTGGTACACATTCATAACGGCCATGGCAATAAGCATCAAGACTCCG -TCGAAGATGTACATGAAAACCTCATGCGTCATGGTGTACCCATCATAACCCTCTTTGTAT -TCCACCAAGCGGAACACAGACCGCACCAGAATCAGGAAGCTCACAATGTAAAGCCCAATA -AGGATGGTGAACCAATTGCGTTGCTTGAAGCCGTCACCGCGGGTTTGATCGCGATCTGAC -TGTGATCTGGAGGTTGGTTGACGCGAGATTCGGAAGTGAAATACCGTTGTAACAACCACG -AAGAAGCCGAAGAAGAGAAGCTGGACGACTAACCCGCCGATTGTTATGTTCGAGCCGGTT -TCACGCATCGAGCCTGCTGTTGCCATGAGGCCACCGCCTAAGATGGGATGTAGGTTAGAA -TTTTGTTCAGTCAATGGCTGGCCTGTGATCTGATTTGATGACATTGACTTCACATACCAG -AGCACTGTAACAGAAACGAGAACACATCTCCGCCCACGAAGATCTTTGTCATCCACTTTA -CTGGCACTAGACTCAGGTGCTCGGCGTGGAGATAGGTCACCGTTCGCCCGAGGATCATGT -AGATCGATGCCGCATACAGTGGCGGAGCGAGCAGGATGAATAGAGTCTGCATAGCATATA -AGGGGATTCCATAGAAGTTATCATGCGATGCGGCTCGACAAATGTATCCGATAACCTGGA -CTGTATGAATTGAGAGTTAGTGATGTTATTAACAAAGTCGAACAAACAGCTCGCATAGAG -AACATACAGATTCCTCCAATCACGAATGGAACGAAGTACCAGCATCGTGACTTTGTAAGT -TGATAGCCATGGTAAGCGGTGGTGACGCTGAAAAGAATGATAAAGATAATAGCAGCGGGA -AGGGAAGGGTTGTATCCATAGATATTATGTTTTTCAGCCATCTTGCGATGGGATTAAATG -AATGTATGAAGAGTTGAAATTCATTTGAGCTGATTGGGGTCAAGATGAGATTTATATCAA -ACTGGGCAACAACCATCGGCTGGACAACAAACACCGCTACCACAGCCCAATATAGTTTTG -TATCCGAAACAGATGTCCTCCGCGATCCACTACAAATTTAGGCGACCTTTGATGCAATAC -ATTGATTTGAGCGTTATTCTTATCATTTGGGGGAACGCCTTGGCTTTTGCGCAGCTTTCA -CGCGATGTGGGGACGAGGAGGCTTTTTCGGCGAATTcgtttccgtctccgtgtccgtatt -cgtttccCCGTATCCCCGTATCCGCACTCGAATAGCTTGAGTTCTCTAACCTATCCCTTT -GATAATGGGCCCAAGGAGAACACATCGGAAATCACGAAATGGCTGTCCGCAATGTAAGGC -ACGAAGACTAAAGGTGCGGCAAACATCCCCGGGTTGAGCTTTTTCTAACCAGCACAGTGC -GATGAGAAATATCCATGTACGAACTGTACCAAACACGCCATTCAATGTTCCTATGTTGAT -CGTGGCATATCGACTCCGGCTGAGAGTCCAGCTACTGGATCAGAATCATTGGCATCGTCT -TACGTGCCTTCGCTGTTAAACATAGAGTCATACCCGGCAACTCCTGCCGGGAACCTGGCA -GGTCTGGGTGATGCCAAGCCCAAGTCAGAGCGTAGGTCGGGTCTACTACAATTTCTGTCC -GACTCACCAGTTGAAACTCCGACTTTGAATGAAGATGACTGGGCGTCAGACCTTGAACTC -ATGCACCATTACTGCACTGCGACATGCAATACACTGACAATCCGTGAGGACTCACGACAT -GTTTGGCGCATTGTTCTCCCTATGGAGGCATATTCGAATAAGTACTTGATGCACGGACTA -TTGGCTCTATCAGCGCTGCATCGCGTGTCTCTCTTCCCAACCCAGAAAGCGAAGAATATC -AAGGCCTCCGCATACCACCAGGCGGCTGGGCTAAAAGAATTCCGAGAGCTCATTTCCTCT -CCAGTCGACCCTAGCAACTGGCAGCCTGTCTTCTGCTTTGCTTCCATGGTCATGGTACAT -GTCTGTGCTTCCCCGATCCGACTGGGCGAGGATCGTTGGCCAGCTCCCATATCGAATATG -GTGGAGCTCTTTTCCGTAGTCCAGGGATTGCAGACTCTCATGGAGCCATGGTTACGCTCT -CTTCGAAGAACCAGGCTTGCACCGCTTGTAAACTGCGTCTGGATTGAAAGTGAAATGCTC -ATTACAAGGTGGGCTTTTGAGTTCTTGCAATCCGACATCCCGGTAATTGTTGAACCGAAA -TAACTGACCTCACTTTAGCCCTGCGACCGTACACCAATCTCTACTTCCGCCCGACATCCA -GACTCGAATAGCACAACTCCATCGGTTTATTGATGATTATCCATTCCCTTATATACAACC -CCAGTCCGAGCAGGACTCGAGCGCCTCACATGAATTGCCATGCCCTGCCGAACACCGAAC -AGACTACAAGCGCTCCCTGCTGTTCTTTGAGAACTCAACTCGTCAAATAGAGCTCGCAGG -GCCCCACGTTGAGGTTGGAATGGTCTTAATGTGGGCGTACTCACTTTCTAAACAATTCCG -AGACGATTTGGAAGCATCTCATCCTGCAGCTCTGGTACTGCTGAGTCATTGGTGTGTGCT -GCTGCATACTGTTAATGATCTCTGGTTCATCAAAGGCACTTCTGTTCAGCTACTCGAAGA -TATCGAGATTAAGATCAATCCCGGCTTTAGGGAGTGGCTGGTATGGCCGAGAAGATGGGT -TTTTGGAAAATGACCTCAATCTACCCTATTCAATGAGTTTCAAACCCGACAAACGGATGG -ATTTTCGGTATCGGACTTTGGTCTACAAAAATACTACAGCGAAAGCGAGATTTGTATGTA -TTCCACCTGATGTTGAGTGGCGATTCATTCTTTCAATTCAAACGATTTCAAATATCAACT -aaacaatagaaaagaacaaaaaaatagaagagaagaaaaACCCATAACAAAAACTTCAAA -TAATCCAAAAAAAAAAAAGAAATCCGACTTGTTCTTATTGCACATGAACCTTAACATACT -CTCTCGCTTCACCATCCTCAATGTCACTTCCTTCAGATTCCACCGGAGTGAGGGTCCTTC -CATCCACAACCGTCGCGTCTTTCTCACTTTCCGAATCACCATCTGCCACGAAAACCTTCT -TCTTCCCAACCTCAGCATCCTGAACGATTTTGGGCAAAGTAACATTCAGGACACCATCCT -CCAGCTTTGCCGAGATGTCGTTGATAATAACAAAAGCCGGGGCGGCGCGACTTCCAAGAC -GGACCTCGCGCTCAAAGACACCAACCTGCGGACCACGCTCGTCCATAACAAGGGCGTGGT -GCAGATTCTCATTGACACCGGGGCGATAAACGACACCCGCCACATGCAGAACAGACTCAC -CAGCATCATAATCAATGCTGAGATCGCTCTTCTTGGCTCCGGGAAGTGAGACATGCACAA -TGTATTTGGCATGGGTGTCGAAGACGTCAACGGAGGGAGTGAAGTCAACGCCCTCTGCGG -TAGGCCCGTTCATTGGGAAACCAAGTCCTGCACCGAGCTGGCGTAGGAAATCGAACGGAC -CGCCGAAAGGAGGTCCACCATGGTGTCTGCCGTGGCCACCGGGACCACCCTTATGATGGC -GACCCTGTCCACGAGGTCCGTAACCGAAGGAACCGGCGTAGGGACGATATCCATGTCCGC -GTCCATTTAGATGTCTGCCGTTTTCATCGCGCCCAGGGCCCCCGAAGGCACCGCCCCCGT -GGTGGTGGTCATGGCCGTAACTGTGAAAGTCGTAAGGTCCAGCGTGACAGCCATGGCCTT -GTTTTCCCTTGCCACCCTTGCCACGGAAGGGGCCTGGTCCATGGTGGCCGGGACCTCTAT -GTGTCTTGTTCTTACACTTTCCGCTGTGGCCGCGGCCGCGGCGGTTACCAGGAGAGTCGG -AGTCCGATATCAAGGGCTCGATATTCTCTTTGGAGGCGAATGAGACGACGTTGGCGTCTG -AGAGCTCACCGACTGCTTGGGGTTCTTCCGAGTCCTTTGAGAATTCCTTGGAGGTGCTTT -GGTGACCACAGGGCGCAGCGTGAGGGCCATGTCCATGCCTCCCTTTGCCGCCTTTGCCAC -GGAATGGGCCTGGACCGTGGTGGCCGACGCCCCCACGCACATTGCCCTTGCCCTTACCAT -TTCCGCAGTGGTCACACCGATGGTAGTGGGTGTCTGGGGAGTCAGAGTCCGATACTGATG -TTTCGTCTTTCTCCATGTTGTTGTTGGGCTCAGTATTTTCACCTGCCTGAGCACCTTGGG -TGGTACTGTCATTTTGCTCGGGGTTGTCCCATCCACGACGACCACGACGTCCGTGGTGTC -CACGAGGAGCAAAGAACGGGTGATCCTCTAGATTGCCGTTGACAAAATCCCAAAACGGAT -GCTGAGATTGGTTGAGTTGGTAAATGGAAGTCATTTTGAAGTAGTAGATTGATATGCAAT -ATAAAACTTGTGAGCGAAAATGATATGTTGGAAGATATCAAAACCGAGAGCAGAAAAGCA -AAAAAAGCGAAAGACACTTGTTCCGATACTAATGCAAGAAGCAACAGCTTGCTCTTTATA -TACAAATGGTTACACACCCAGGCAGCCGCGTTGACCAACAACCTCCAAGTATATACGTAT -GATAACCCTGCGTGGCTATGTACATCCTCTTCACGCTAATTTGCCACAGGGCTCGGGGCC -GAGAATTAGTCCCAAAGTAGCAATATCTCGGTGGTAACGACACAATAGACAATTATCTCC -GAGCCTATGACAACCTAAAACTCGGAGCGGCGAATATACATCATATAGCGCCGAAAGTAT -ATGCAGTAGGTATGCATGATTGTAGGCTGCGCTCCAATAAATTTTGTCAGGATTACAGCA -CTGAGGGCGTTGAATGATGATGCCTGGGATATGGAAAGGAACTAGGCGAGAGATGGAGTC -TTTATCCGTAGCTCACACGTATTTTTAAGTAATAGGCATTCTACATATATTTCATGGTTG -ATATAATTGACATTTGTATATTTGGTGCCCTTGCAAGCTGTAAATGGTAAAGTCAGAACC -CCGAGAGGCGAAACTTGATCGTCTCAAATTTTCCCTTCTGGCACGCCCCGGAAAAAAAAA -AAGAGACAGATAGCCCAAAGCATCGATACCACAGGCGAACTTTTTAACGGAAAGAAAGCA -AGGGTCTATATATAGCGTCAGCCTCCTCAACACGATAGAACTCACTCCGAAGCATATTCG -CTGATGTGTATGCACAAGTCTAGGATTTGTAGATCAGGTCTTAATATCTAAAGCGATCAT -TATTGAGGGAAATTTGGATTGCGTACAGTCTAGCAACCTCTTTGGCATCTTCAGTCTCGT -TATACGGCCGAACACCCGCTGATATCTCATAGATCAGACATCCTAATGCAAACAAATCGG -GTTTAAAAGGACGGGGTGACCCTGGCGCCAAGCAATATCTGTCTTCTTCCTCCACTTACA -TATCTATGTCACCGATAATCGACCTAGCATAATCGCATGGTTTAGCAGATGGGTCATCGA -CCACGAAGAAATTCTGTGGACCAATATCAGCATGGATGACCCAATGGCTATGTTTAAAGG -CGACAGCATCGATAGCACTCATAATCCAACGATTTCGATCCACTGAGAGACCTTGTAACA -ACGGAGATGGATGCGAGTATCTCGAAATCGGTGATGTGGAATGCGGATGGTCTGTGATTT -CGAGGTAACCGACAATGTTCGGATGTTTAGGTAGAGTTTGGTAAATAGGCCTTTCGCGGG -TAGTATATAGGCCCGATAGCTTGTTGTACATTTGTGATAGTTGAATCTTTATCTCCTATG -GAATCGTGAGTGGAGGGAGTCTGGATGATTCCATGGCTATCTTCAAAAGCAGCCCATGCT -GCTCCTAGGGCGATGAACCGTGTCCCGCTCAATTGTAGCCAGAGGAGACGGTCATCCATA -TTAACCTGGAACGGAGATACATGAAGAAAAGGAATGAGATTGAACTGAAACTGAAAGGAT -GGACGGGAGGCCGAAATTGATCAACCAGGACTTACACTGAGTCAGCCGCATAGCTTCAAC -TAATTAGAAATATAATCAGAATTGCACGTGATGTTAGCCTCACTGAGAGATTGACCACCT -AACCGAGGTATAGAAAGCTGCAAAGGTATGTTGAAATGTAGAAAAGATTGGACAGAGGCC -TCATACAGAGTAATGCTAGTAGCCTCGTCGCGAAAAAGTCATCCCGATTAGGCAATATGG -CCCTAAATCCGATTGACTTCGACTGCTGCGCCACATCCGCCACTCATGAAATCATAGTAG -TTACCGCCGACTCCACCGCCTATGAAGTGAAGGCAAAATCTCGATGAACTTGATCATTGA -TGTGAACCAACGGGGACAAGTTCGCTTTAATGCCATTTATTTGCATAAACTCAAGACATT -TACAACCATGGTAGATTTGGATTCGGCCTCTTTTCGAAGTGCTGAAGAGCCTGCACCGTC -TGATCCTCGAACAGACGGCATCTCCTGCGCCCGGAAGACAAGGTGACATAGCAATCCGGA -CCCTAGTCTCAAAGCCTCAAGCCGCCAGAAATCTAGAAATCTAGAAATCCTGAATTCTTT -GAAACATCATTGGATCTTATTGTATTTGTGTAGTTCATGTGATTGCGCCAAGCCCTACCT -AAACCCTATCATACGATGTTCCCTCCTAAGACTTATTATATCTTTCTTCCCTTCTGCACA -ACCCCTGAACATAATTTACAGCTGATTGAAATCTGTCCAAACTCTCCAGTACCATGTGAA -CAGCATGTGCCTGAATCTCCCGGTCGTCTCCGACAGCACGTGCTATCCGATCTCACCGAT -TGCTCCGAAGGGTTCCGGGTGTCCGAAAGCCTGACAGCAATGGAGCAGTCACAATAGCTC -TCAGTTCCAACCACGGTCCCGGAAACACGAGTATGCAGCCCTTTATGGAGCCTAACAAGT -GGACCTGCCGCAAACTCCGTTCCGGATATCTCCTTTCCGCCGATGACGCGTGAAGTCGTC -AGTGTGTCAGCCAAAGTGTGGACCGTGAAACCTTCGTTATGAACGGCAATGCCGCATTTA -GATATGTAGCAGGTGTTGGTACTGACCTGGGGTTGATTTTGAATATTTCACTTTCGACCC -ACTGAACTGATCTGAATCAGTCCTGCATTGCATTGCCATGCAAACCCGGCCATTGAGCGC -GATGCTCCCTCTGCGGCGCCAGGCCCTACGAATACGGAATTCTTTCCCTTGCGTTGCCCC -CCCGGTCAGCTCAACGCACATATCTTCCTCTAGCATTGCGCTGGCAGTCAATGCACCTGG -CCGGCGATGCTTCGCAACCCAAGTTGAAGCGCCCGCCGCGGCTCAGCAGTCTGATGACAA -AAGAGAACGGGTCGTAATTCTTGGATCCGGCTGGGGTGGATATACCCTTTCGCGCAGGCT -TTCTCAAAAGAAATACGCACCAATGATTATATCACCACGGTCATACTTCGTCTTCACTCC -TCTCCTCACCAATACGGCCAGCGGATCCCTGGATTTCTCCAATATCGTCGAGCCCGTGCG -CGATCCACGCGCCAAAGTGGATTTTATCCAAGCAGCTGCACGCGCAGTCGACCTGAAAAA -GAAGACTATTCTATGCGAAGCCACAGTCGTCAAAAGCGGTGTCACCGAATCACCTCGTAC -GGCTGAAAATGAGCGACGAACCGAGGATGGCCCGCAGACCACGAATATGAAGCCGATGCA -AGAACAGCGTCAATGGGAGCAGGGTGAAACATTCGAAGTACCCTATGATAAGCTTGTCAT -CACTGTAGGTGCCGTCAGCCAGACCTTTGGAACACCAGGTGTGCGGGAGAATGCAATGTT -CTTCAAGGATATCGGGGACGCAAAAAGGGTCAAGCGGCGCGTGCGGGAATGCTTCGAGCT -AGCCGTGCTGCCATTCACTACTCCAGAGATGAAAAAGTGGCTGCTGAACTTTGCGATTGT -CGGGGCTGGCCCAACTGGCATTGAGCTTGCGGCATCGCTGCGCGATTTTATCTACTCCGA -TATGATGGCACTGTACCCGACTCTGAATACACTTCCGAAGATCACCTTGTACGATGTTGC -GCCTAAGGTGCTTTCGATGTTTGATGAGTCGCTTTCCCGTTATGCGATGGAGACCATGAA -GCGAGAAGGTATTGATATTAAGACCTCTCACCATGTCAAAAGCTTGCGCTGGGGACCACC -TGGCGCAGCTCCGCCGTACCATATGGATCCGAAGCGGTGCCTGACTCTTACAACCGAAGA -AAATGGTGAAATGGGCGTTGGCATGTGTGTTTGGGCCACAGGTAATGCCATGCCTAAATT -TATCACCGAGTCGCTTGATTCAGTCCCCGCATACCCCAAAGACTCCATCCATCAAATCGA -GAGCTCTTCAGCTGTAGCGATAGACGCCGAACACGCATCATGGAAATACAAGAAGGCTCC -CAAGAAAGGTCCCCTTCTCGTCGACGGACATCTGCGGGTACAGCTGCAGAATGAAACAGG -CCAGACAGCCGTTCTTCGGGACGTCTTCGCGTTGGGTGACAACGCCATGCCAGAGACTGG -TGCGCCTCCTGCAACGGCACAGGCCACCAGCCAAGAATCCAAGTGGCTGGCTGATCGTTT -GAACAAAGATGACCTGGCTCGGACCCCGCCATTCTCTTTCCGCGACCTGGGCACGATGGC -ATACATTGGTGACGAGCGGGCCTTGATGCAAATACCCCGTAAAGGTGACCGTAACTCAAA -GAGTCTCATGCCAGAAGGTATCAAGGGACGTACTGCCTCACTGATCTGGAAGACGGCGTA -CATCGGCATGAGCATCAGCTGGCGCAATAAATTGCGAGTGGCTTTCCGGTGGACGATGAA -CAAGTTCTTTGGGCGGGATGTTTCACGATTTTGAAGATCATGTAAACATATTAGAATTGC -ATAGAACATATGGAATTCAGGAAGATGCTCTACCTCATTCGACTACCCTAGAAGACTCGA -TGTAGGTGTACAAGGAAGGAGGACGTGACGAGACACGCCCCATGTGTTCTGTGGGACCTG -TTGACTGGCTACTGAAACCCCAAAGCTCAGCCAATGACTTTGTCAACGAACATCCACGAG -AAGGCTCAATTCGATTCAGGTGCGGACCTACATAGACCTCAAAATTTTTGTTGATATCCT -TATACGTCAGGTACGACCCTTTAAGCAATCTCATGTGGGCTACGGTGAGGTGGATGAGAG -AGGCGGGAGAGAAAGAAAAAGAAGGAAGATGAGCAAAGTGATTTTATACCCCCACGTACC -TTCCCAAAGTGGATAGTTCCCTTGCACCTTCCCAATCAGCCTCCCTGTAGTTCCTTCCAA -CTCTGGATGTAAACATACAACACCACTTCAATTTTAACTATCTGGACTACCGTACAAGTC -CAAGCCCAACCAACACAACACATTCTCAATACCCCGTGGATCTCTCACTCCTCACCTCCA -ATCCCAAGCTACATCAAAAGCTCAAGTGAATCTATCTCCCCGTCAGCATGACAGCCCATA -AACCAGAGAATCAGACACATACCTCATAACATAAGGGCAGATACTCTCCCCACCATATGA -AGTCCAAATAGCCTGCTCCCGAACCGGCGTACCAGTGCGAGCAGCATTGATCATCAACTT -ATCATTGATGAAGATACCAACATGTGCAACCTTGTTCTTGCAATCACCCCCAGTAGCCCA -GAACAAGAGATCACCAGCTTGAGCCTGCGCACGAGGATACCGCTTGCCCATACTAGAATC -ATACTGATCCTGCGCGACACGGGGAATTGTCTTCTTCAATGCCTGGCAGATAGCATACTG -CGTCAGACCGGAGCAGTCAAACCCGCCGCCAGAGGGACCCTTGCAGCCGCCACCTCCCCA -GACGTATGGGAGGCCCTTCTTCTTTTCGGCTGCAGCGATGATTGCCTTGCCTGTGCCGGT -GCTGCCACCACCACCGCCGCTACCGCTGCATTTGGTGGTCACGTAGCTGCTACTGCCAGT -TTTCACGTAGTAGTCGGCTACATAGCAGCCATCAGATGTTTTGTCCCAGATATTGTTGCC -GAAGACGGAGTCTCCAGCAGCTTGGCAGGTGACTTTGACGTCGTGCGACTTTTTGTATGA -TTTGACGACCGCATGGCTTGTTCCGGGACCGGAGCGGCAGTTGACGGTCTCGCCGGTGAT -GGGGTAGGCACTGACTGCTGTTATAGCGGTCGTGAGGGAAAAGGCAATGAGGGAAATGAG -ATGCATGGTGAACTGCTTGGTTGTTCGTCTTTGTAATAGAGAGTTGCTGTTTTCTGTGGT -GTAGAGGACCGAGTTGAAGAAGATGCATCTAAGTCGAGATGGAAATTGAAGTCTTTATAG -ACAGCTCGTCTACGCTATACCACGATAAAATCTATAGCTGCTTGTTATGTTGTTCGTGTT -CATCGGCCTGATCATCGATTCGTATTGTTCTTGGAGCAGAATGCAACCCTTCACTCGTAT -TCCACCCTCAGATTCGACACGATCACCGTGACAATTGCCTTATAGACACCAATTGATCGT -GATCTCCAAGATTGTTTACAAAAATAACAATTTCAAAGGATCTAGTAGACACTAACTGGA -GTTTCGGTTGATCTTTTCTCGTATGTTCGTGTTCCTGCATTCATACTTCCCTCGAGGCCA -CTAAGAACACAAGGAACACGAGAACTACTATTTATAGCAGATGCAACCACAATTTAGCCC -CAAATTAGATAGCCATATTTTAGCTCAAATTTCGTGCAGTTTGTAACTCGTAATTTCATT -TGCCCGCATCTTGAAATTAACAGCATACTTACCACTAAGCTGTGTGCATCCTTCTGTGTC -GACCGTTATTGAGAGATGCCAACGCCCGAGGCTGCATACGACGTCAACTTCCAAAGTCGA -CCCAAGAATGCAACAATCTGAGATGGTACTTCAGTATCCCTTCTATCCTATCAACATTTC -GTATCAACTGTTCCACCGACTGGTATTGTCTAGTATTTACAAGCATGTGGCCAAAAATGA -GGGTTAGAAGATACATGAAACATTAAACAATCCGAAGACTGGACATATTGGAAATATCAA -GATTGCCAGACAAGACCTTAAGCCCTCATATGAAAACATCAAGACATAGTATCTGTTTCC -ATTTTGTCTGAAGGGGTGATGTTCCGTGATCCCCTATTTGCTCCAAACAAAAAGCGGGGA -GACAGGTATTCAAGACTCTCACCTTGTCCAAGTCACTCTGATTTCCAGTTGATACCCAGC -TCCTTCTCAACTTCCAGTATCAACATGTCAATCTCCGAAATTTATCCAGTCTACAAAGTG -AAGTTGAAATTAGCCTGCCAAGACCCCGACATGCCATCACCCCGATACCACACCATACTC -TTTGTCCAAACAAACGCGCAAGGTCCCTGTAGCGGCATGAAACACCACGTCACTGGAGAT -ATCGTCACAGGCATGCACTACGAGCCCGCAACATACGACGATCCCGAGACCGACGAGAAC -TTCTTCTCAAAGGAGCTGATTGGACATACCAGAGCCCTCAACTATCCCAAAAACTGGCAT -GATGTTCTCAAGTCTATCCCAGCACCAGGGAAGCAGAAGGCTTTCAATAAAGTGACGATG -AAGACAGAGCCTATTAAGAGCTGGGATCCCGTGACATTTTATGAGCCCGGAGAGCCTCGT -CGGCCGTTGGTCAAATGTACTGAGTGGATTGAGGATCAGGCTATTCCGATCTTGATTCAT -GCGGGTCTGATTCAGTGGAGGCATGCGCGTATAGCTGGTGCTTAGGGTTCAATGGTATGT -CTTTGTTGAAGTCCTCCTTCTGATTAAGTCCATGCTCTCTTCTTCAGCAACAAAGCACGG -TGACTGACATTTTTCCAGTTTTCTGATTCATGCAAATGCCGCAAAACTGAATGCCGTGAG -TAGCGCTCTAGAAACTGGTTAAGGGCAGTCTGGCGAAATGATATACAATTTCCGGTGAAA -GTCAAAGTGATTCCCGGATGACAGAGAATATTTCCGAATTCGGATTGACGTACTGGCTGT -GTGACATACGAAACATGATAACGGATGGTGCTAGATCGACAAACTCGGAAAGAATACTCA -AGGGCCCCAACATGATCAGAAAGCGCCAATCAAATATTACTTTGTCAGCAGGGATTCAGT -AGACTACCTAGTAGAAGTATATGTTTCTTGGCCAATGAAACAATTCAGGGAGCCTAGGCT -GAGGTCTAGATCTCACTTTTTATATATCTCTCAAGTATGGTATATCGTACCATCGGACTA -GCTAAATTGGTAGGCCACAGAAGTAGGCTTTTGACCTGAATTAGATGTAAAAAATTCCAA -CTATCTACTTAGCTAGACGCTCAGTCTTTGGCCTTCTACCAACTATAAGCCCGACCCGGA -AGGGTTGCAAAGGTTTTGAGAAAGAAGAGTGTGAAGATTATAGTGGGTGGCTAGGTGAGT -GAAGAAATAGCGCTAAGAAAGTAAATGCGCCCCAACTGATGGATTAAGACATGTACGTAT -GTTGTACTATGGATCGGCATACTGGGTAGGACACATTTCTACGGGAAATCGTGCCGGTAC -AACCCCCATCTCTCTAATATTTAATCGATGTCCTCTTTCATAATGCATGACATTCATATT -CAGATGCACATAGTCACTTCAAACATGGATGCCTCTACAATTCTCAAGCAACTTGCCTGC -GCAGGAGATGCATTCAAGGAAAATGAACCAGGTTCACGAGAAGAGCTGATTGCACGAAGC -CGTGCGCTCATAGCCGCACTCGAGCTGCCAAGCGAGTTCATCCTGCAGACTCTCTGGGCA -GAGGTGAGAAGATCAGCCCCCATCCTAATTCCCCGGTAATGTGCAAGCAAGCTCACACAA -CCGCAGCCCGCCCGATCTGCAATAATCCGACTCGCTGTGGACGTGCATCTATTCCAACAT -ATCAAAGAAGCAGCTCCAGCAGGTCTCAGTCTGGCAGCTTTATCTCAGAAGACGGGCGTG -GAACTTGACCTCCTTACCCGTCTAATAAGGCACTTGGTCGCCGTGAACGTGCTTGTCTTC -CACCCAGGTGGGATCCACGGCACAGCACTGAGCGACAGTCTCGCCGATGAACGATATCAG -CGCACCATCTTATGCTGCTATGACGTCGTGTGGCGAGCGCTCGAAGCCATGCCGGCTTAC -TTTAAGAACAATGCATACCGCTCTCCAACTCCGGGAGGTACGGACGGACCGTTCCACCAA -GCGCATGGGACGAACCTGGGACTTTTTGACTGGATTATTGCCACGCCGTCTCAGTTGCAG -AATTTTGATGCACTGATGAAAACCTACCGTGCGGGGAAGGTGAACTGGTTTGCGGAGGGG -TTTTATCCAGTTTCCCAGCGGCTGGTCTGCGGGTTTGATTTGAAGATCGGTGACGCTTTC -CTAGTTGATGTTGGTGGTGGTGAGGGTCATGATGTTGCTGCTCTTGTGGGGACTTATGAA -AACCTGCCTGGGAAGGTTGTGCTACAGGATCGTAAGCCTGTTCTTGCGAGTATTTCGAAT -GGAGCCGAGGGAAGGGCGTTTGAGATACAGTCTTATGATTTCTTCACGCGGCAGCCTGTC -AAGGGCGCGAGAGCTTATTTCTTGCATTCTATCTTGCATGATTGGTGTGATGAAGATGGT -GTGCGGATTTTGGAGAATCTGGTTCCTGCTTTGGTCAAGGGATATTCGCGGGTTCTCTTG -TATGAGATTGTTGTGAGTGAGGAGAAACCGACGCTAGCGGCCACAACGATGGATCTGGTC -ATGATATGTCATTTTGGGACGAGGGAGAGGACAGAAGTGGAATGGAGGCGGATTCTGGGT -AGAGTTGGGTTGAGGGTTGTTGGGATTTATACTTATCCTGGTGTTGCGGAGAGTTTGATT -GAGGCTGAGTTAGCATAACTCATCTTGCGTTCTTTCAAAGGTAATCTTGATTTCTGGTTG -GATCATTATATATATATATATAGACGTGGAATTCCAGTGACAATCCAGAGTTACATTTCG -GGGAGACTTGGGATTCTTTATATAATTCATCAGGTCTACCCAGCCTTGGAAGTCTCCCTC -AGCTCACCATACCTGAGACTGAGAAGTAATAGAGTACATAATCGTATTTTATAATAACAA -TATTTTTAATCTTCTGTCCGACCTTGAATGATCCATCATGATGGTACTAATAATACACCA -GTATAATAAGGATGGTATTTTCTTCTATGGTATTTCGTAATCTCCTTATCCTCCATGAAG -TAAGTGCCATATAGATTAATTATTCGATCAAATGGCAGTAAGTACAATATATATTGTGGT -ATATAGATGTAGATTGCAGTTTAGTTTGGGTAGGTTTTAGGCCGAGAAAGCCGTGTATAT -ATCCATGGATTGTCTGATTGCCTAGACCAAATGAATGATGTCAAGAAATTCCGAGGGTCC -CGGAACTTTTCACAAGATGTCGACCTACAAGTATGGCTTCCCTAGGCCCTAAATAGGAAT -CTTGGGTTTTTTTTATAGTCTAGTGGTAAAATACGCTGTGGTTTGTTCAACCCAACGGTC -TCCCCGAGTTGGAACTCGACGCAGCCTGTTGAATGATATCCCTTCCCTTTATCAAATATG -AGCACTGTCGACGCAAATAAAGACTTGTATACTTACAAGAGAGATGCAAATGAATCTGAA -CGGTAAGAGTGAGAATCTTGAAAAGAATAATGTTGCTGAGGACATGAGATTAATGCGTTT -CCGAGATTGGACGACCAACATGTAATGTTCCTGAACTTATTAGGAGGGAACCTGATACAT -CCTCTGATTGATAGAAATAATCTTCGTGCAATTTCCGACATCGCCACCGGAACAGGGTAT -GTAATTGTCATGTGTGTCCTTGACAGGGAATATCTTTTTCAATTCAGTCTATCTTTCAGG -ATTTGGTTGCAGGATGTCCGCAACGCATTTGAAAACGCCCGTCCGTTTCCTCTCCTTCAG -GGATTTGATATCTCCAGTGCGCAGTTTCCAAGTGATCTGGATAAAGTAGGCTTCTCAGTG -CAAAATGTGCTGAATCCATTTCCTCCGGACGAACTCAATCGATATGATTTGGTCCATGTC -AGGTTCCTCCAACCTGGCTTGAAAGAGTCTGATTATCAGCTTGCCGTGGCAAATATGGTA -TCTATTATTAGTAAGTCCTATACTTTGCAATGAGATAGCTATTACGAGAAGGCCTCTAAC -AACTTTCATCTGTAGAACCGGGAGGTTATCTGCAGTGGGAAGAAGAAGACTGCAGTAGAT -CTTTCAAACTGATGACCACCATACAACCACAACTGCAAGAAATTGTGGATGATTATGTTG -GCTATGCAAAGTTGCACGGCTTCAGTCTTAATGCCCCTGAAGCCATTGTAAAGGCATGTA -TAAATGCAGACCTCAAAGTGATTAGCGAGACCTTTCAAAATACAGCGAATGCGCCGCCCG -AGCAGCTTTCTCATATTAGGTCTTTTTTTCGTCGATCGTTGGCAATGGTGTTGCCAGTTT -TCTACTTTGATCAAGGGCTTGTTCATGATGAAGACGAAGGGCAGACCATGTATCATAGGA -AGATTAGCAAGGTGAAGAGGCTTTTTGAGGAAGGGCTAGTGCTTGATGATATTTTTGCAT -GCATTCTCTGCCAGAAGACCTAGGTAGCCTTGAATAAAAGAAAATCAGGCTTCCAGAAGC -TGACAAGAATAATAGACCATTATCACTCTGTAGTGGTCATAGTCTGGGATTGAATGATAT -AATGAATCAATTGGAAATATAGAGAGAACCCAAAGCTAGATACCCGACGGGATACATATA -TGAAGAGAAGACAGGCTTATGTCAAGCCCCTGTTTCCTGGGGCTCTCTGATCATCAATGC -AAACCTCTGACTTCCCTCTCCACCACGCTCCGTGTAATCCAGCTCTATTTCCTCGACGAT -CTCCCACCCGTGCTTCAAATATGCTGGCACTCCTTCCATTGTAGCTTCCAGATATATCCG -GGCGCTACTGGTATCTGCTTTCTCGATACCCCATTTGAGAAAGGCACTTCCTATTCCCAT -TCTCTGGTACTCCGGCAAAACGCATAGTAGCTCAAGCACTAACAGTATTCATTTAGCAGT -CAATATCGTAAGAATCTAACGGAGAGTGACAAAATGAGGTGACGAACCAATATCAGTTTC -CTTGAGATGTGTGTTTCTACTACGCTTTATCATTTCTTCGTAGAACGTGTAGGTACCCTT -GTTCAAATTTGGGGCATACGCCCACATATTCCGCAATTGCGCTTGAGCATCCTTGCTCAG -ACTTATGTCCACAGCTCTCTCGTACTCGTAAATAGCTGGTATATGCCAACGACTATATGC -GATGATATCGCCGGCTTCGGAGTCCACACCCGCCAGAATATGGCATTCGGATTTGGCAAA -ATACTCGTGAGAAGAGACTGTTTTGAATGTGTGGACTGCCAATGAATCACATCCCTTATA -TGTGCTGCGCATATAATTGGACGAGGGAAAAGAGCGAATTTCAATGCCCACAAGGGCACT -TAGGTCAGATTCATCGGCGTACTTTATCTCTAAAGGCACAGCGAGATCTGGCGTCATCTT -CGATTGTAACTGCCGTGGGCATAGATGTTCAATCCCAGTGCAAGATTCAGAATTTGTACA -TTAATTAAGACATCCATTGACTGCATGACGCATCACTGCCCTGGGACATGAAGACGATGA -CTCACGGAGCGGTGAGACTGAAATAGGCTAGAGCTTCCCGGACCTTCTAAGAAGGGTTGA -GGTATCAACCTGCACGTTATGTCTTCAGATACAGAGATTGAAGGGGCCACTCCCGTTTTT -GTAGGAATGTAAGAACATGAATCTCGAGCGAGCTAGCCGTTCCAAGGTCTCACCGCTAGT -TCCCGGTGAAGCGAACACCGTTTCGGAGTAGGGCTGCTGTTAGTTGGAATAGATTGATGC -AGGATATCTTGTAAAATTACTGGTGAAGGCAAGTAAGAATCCTGGAGCTTAAAACTACAG -GTAGCGGAGGATGCTAAAACACATGGCTTTGTCTTTGAAGGTGCTAGATATTGAAATTTA -GAAGCGGTCACAATGCATTACGAGTCTATATTGGACATCTAGTTAATTTCTTGCCTCAAG -CCAGATATTTGGACAATACGTTTCCCCATTCTGCTTCATATGCTTGCAGCATCTAGAAGG -TCAGCCTCGAACGCACTCTACACACTCGGTACACTTGGTACATGGCTCAAGGGAGATATC -GCCCGAGCTACGCACACACGCGGGTTACAGGCCAAGGTGTACAATCCCATCGGCTCCTGG -CTCGAGTCACTCTGGAGGTAGTCACGACACGCAAACCCTTCGGCTTGCTAGTGGGAATCC -GGCCGTGTGTGTAGGGACCTTCGCAGCCTCCCGGGCCAGATCGGGCCGTGAAAGTTCTAT -AGGTTGGTGCAGATATAGTTCTTCATTCTGACAATCATTACGTGAGTCAGAATCTTAGTA -CAGTTTGATCTGAAGGACTAGGCTTGTCGCTTACCTCTTTCCCCCGCACTACATTTCTCC -CCCTGAGACAGCAACTTCCACAGTTCTGAGGCATATGACCTCCTATTTGCACCCGTCAGC -GCAGAGAACAAAGTTTTAGCTCGGTAATGTTTTCACATATCTCTGGTATGAGCAGGCTGC -GGATTTTGCCGGTAGCGCTCGGCAAAGCCCCAAAAATGGGCCGATCCCAGGCCCATAGCC -CACCAAGAGAATTGGGCTATGGGCTCAACCTTCGAAGCCTAAACCAACACAAAGCCCATT -CGTGGGGGCCTATAAGGCCCTAATTATCGCTTATCTAGAGGGCACAACTAGTATGTTGTG -ATCGCTATATTCCGACCTTGCTACTAAGTACATAGAGTTTTAATTCCCTGTCTATAGATG -TAATGATGGGCAGATGATAGCTGTCTTGAGTTTGCAGCCGTACTGAGAAGCTGCCTCTTG -CAATATCTGGCCGGCCAGCCGGTGCAGGGAGTCATCGGCAAGCCTTGTGGATATCATTGA -TATCGTCGATATGGGATTAAATCTAGGGCTGAAAATGGGGGTAAAGGCGGGCCAGGTAGA -CTCAATAATGATGATATAATTCCTGACCGCGGTGCCAGCGATACTTCGGCGTCATCGAAC -GTACCAAAGACTATGCAAACGCTGTGATAGAGCATGGCGAAATTCGCAGGGATGTTACGC -AGAAGCGTGGTGATCCTCTGTTCCGGATGGCGTTCAACCAAAGTGTCAACCAAAGTGCCA -CCAGCGTAGCCCGTATTGGCAGTGGAGGAGCCAGGTCAACAATGCAATGATCTTATTGAG -AGAGGCTCTTTGCCAAGTACCATGTAATTCCGCATGCTGACTCCCTGCAGTTTTGATTAG -GTCTAGAAACGACAGGCCTATCACTCGGAGAGATGCAATTTGGCACTACTGTAATGAGTT -GAACCTAAATATCAACAAAATCAGGTTTAAGTCCAGTCACCTATAGCACACACGAAGAAA -TCAAGCACCCTTGAGTAACCTGATCCTATTCTCTGGCCGCTATCTGGTTCCTTGCCACAT -TCAAAACTAGGTTTGGCTTGCAAAGTCTCATGTAAGTTGGTATGCTGGTAGACCGAGCTG -CCGCAATGGCAGATTGGCACCAATCTTGGTCTTACGGCTGTGATGAAAGCCCTACAAAAG -GCCGTGGTTAACACCATTTGCTGGAACGAAACGACGTTGAACCTAAAAGACTCGACATAT -AAATATACGAATTTATCCGACACGACTCTTCTTTCAACCATTCTACACCATGTCTGTCCT -GCTGAGAGCACCGCGTTCAAATATCTTCAACAGACTGTCTACTGTCAAACCGATACCACT -TCGGTTTCCACCTCGATTTTCAACTTTGACAAACCCTGCTGGAAAGATGCCAGTCACAAA -AAGCCAGAATGCAAACTCCATGGAACCTCCAAAATCATTCAAATGGAAAAGAATCAGCAA -CGGCCAGTATGTCAGGCCATTTGATCCTTTTGAACATATGTTTTCACTCTTTTGTCACGA -CCCAGCCAGACCAGAGCATCGGAGCCTTGACGTCATGGTTCATGTTGGCATGGAAACCTC -AAATCAAGACTCAAATAAAATGTTGGAGGCGATCAAAAAAGCATGGATCGGAATGCGTTA -TCTCCATCCTAGTCTTGCCACTGAAGTCTACCCGAAGGAATTCCGGTACATGCCTCTCAA -AAGTCAAGCCGATATTGATGCTTGGGTGGAAAAGACTTTCATTGTTAAAGAGTGGGATGC -GAGCACTGAAGTACAAGGATGTCAAGACCTCGGCATCGCACCTTCTGCCAAGTCGCCTCT -ACTATACTACTTCCCTGACAAGCAAAAGCTGATCCTCCGCATGAATCACATGCATGGGGA -TGGAAGAGCGTGTGTAGATCTGCTACAAGATCTCTTCAATGAGATTCAGCGTTTGAACTT -AGGTGGAGAAACTGTCAAGGAACCTTGGGGTGCGGAAGTACCCAAACTTGCTTCTGGTGC -TTTTGACACTGCGGGAATAACAGCCTTGGAAGATGACTGGTCTTCAAATCTTACGGCATC -GTTGCCCCAACCACAAGCTAATGGAGAGGCCTTTGAAATTCCTTCTATTGATAATACACA -CCTACCTGGACCGGGCAAGACACAATCGCTCGACTTTTCCGAAAGTGAAACATCTGAGCT -TCTAGATCAAACCAAGAAAATGAATTTGGGGATTACTGCTTTCGTACATGCAGTACTTCT -TCATGCAGGAAAGAGAATCAGTCCTAGCTCTGATAGTATGGTCCATTCGACCGTTCTTAT -TTTCAGTTTCCGAGAAAAATGTACCGGCTCACCTGCCAATGCTAATGCGAGAGCTGCTGC -TCTCCGAATTGGCTTCTGGCCCGTACAACTGTCGATGTCGGATGACTTCCAACGCACGGC -CTTTGGCATTAAACAGGAATACAAGGCTCTCGCTCGTCAGAAATCTGCTGCTATTGCCAC -GATGGTACCATATTTGCAGAAGTCTGTATCAGTACTATCCAAAGACTATTTCAAGGGAAT -TCTACCTAGCTTCATTGGAAATCTCTCGAATTACTTCCCAAAGACCTACGGCTCTTTCAA -GATCCGGGACCTCTGGATGTGTGCGGTGCCGACCGATGAACGAATTTATCTTGGTATTCA -GACCTTTGGTAATAAATTGTCTATCAGGGCTTGCTACAACGAAACATACCATACAGATGA -GCAAATAGCCGACTACCTTCTGCATATCAAGAAAGAGATGTATGCCGGTCTGACATCTGA -CGTCGAAAAACTCAATTTCCTGTCGATGCTGAACTTGCAGGGCTAGTGCTCAATCTGATT -GGATTACATGAAAGTGTGGCAAGTATATCGATTGGGCACAAAGCGATTCGATTGATCCGC -TTGGTACTACTTTTGGAAAGGTGCTTTGTTGAGTTGAAGATCAGTCAGTGTCTCTGTCAA -CAAATAATAGCACATGCATGATTTTCGGTACTATTACGCCTCTGTGAGACCTTCGAAGCT -TAAATAGATTCCTGAACTATCGACTGCGGGCATCGTTTTCGCTATACCTTCTAGAATAAA -TCCTTCATCATACGATATCAGATGCTTACCGATGTTTTCTCAATTTTACTTGCTGGTCCA -TTGCCCCTTTTTTGTGTAGTGTATGCTGTCAATTGTAGTACTGATTAGGACACTAGTGTG -ATAGAGCGCCTGACGGCAGTCTCTGGCCGCCAGTAGACCTTTTGCCTACGGTAAGATTGG -ACCATTGAGCCTACCGCCGGATTTGAATAACCCTCCCCTGGCTTTTATTATCCTCCTATA -TATCATCATCAACACCATTAGAGGTTATACAAAAGTCTCCCACATAAAGAGTGTTGGCAT -TTCCCGCCTTTTTCTACCGTCTACCTATCACAGTATCTTAATGTGATTCAAGAAGAATTT -CTAAGAAGCCGACAAACAGGACCACTGAAAGGACTACTCAAGAACCCCTTAAAATATTAC -AAGGCCGTATAAGTGACCGTCTGATCTTATGACATTTCACCATTTCAGTAGCAAATTGAA -GTAGATGACTAAAGAGAATTGGGAAATCGCAGAAAAGCAAGATTCGAGGTAGTCTACGTA -TTCTCAAGATCCAAAAGATTCATATATTCAGAACTCAATATTCGAGAATTAGAGCGATAT -TAGTATATTTGTATCAAGTACGTAATTGGCTAATAGTAACTGTACTGATGCCAAAAAGTC -TGCTGGTGCACTGGCGGCTGGTCCTTCCGGCTTGCTTTACAGCTAATAATATGCGGCAAG -GAACTTCTTAATTTGCACGCATGAAGAAGGGGACTTTGGAGAATTTTGATCGAAAATACG -AATTCGAATAGACAAACAACTACATACCACACTTTTGGAAGACCTCTCGCGTATAAGGTC -CGGTGAACGTGGTGAAATTATTCGGCATTGGATCGCGTATTGTATTTTGGTATGCGACAA -TCAACACTCGCCTCGTCGGATTTTGAACTAGCGGCTAATCAATTGACTCATCGCTGAAAG -CTTTGCTTGGTAAAATCAAGCGAGAAGGATACACTACCGAGATGAATGGTAGTCATGGCA -CGATATCCTGAATGATGGATCAGTATAGAGCACGATAAGAGGCTCGGAGATATGCAAGGG -ACCTAAAGAGATGACGGCGAATTTGCGGTGTACGATGTTGTCAATATGAAAAAGGGGATC -GCCCATATATGAACTCTACGTATATATGTTGTATGATGGACCCAAACCTAATTTTTACTT -GGCTGACGGTCTTCCAGTTTATTTCTCCTGTTTTTTCGAAGTGGCATACTGGCCTTGTAT -CACCATAAAACATGAAAAGGGTATCAAATAACCCACCCGGTGATAATCAGCTATGAAATG -GTTCCTCCTTATAGAGCCATATTGGCACAAGTAAAGGCAGGCGTAACAGATCCCGAGTAG -TCAGCAATGATCGCTTCAACAGCTGCATAAAGCCCAGGACCCAGCACAAACAGCCCAACC -ATCATGATAAGAACATTGAGCAGAGTGAAAGCAGTCTTTCCTAGACCGTTGAAAAGCGAT -CCCCGATTGAGCTGCCAGTACGCGACAGCGAAGTAAATGAAGCCGAAGAACGAATCAAAG -GCTGCGCTGAGCAGGGAGAGAAAATCACCCATGCTGGGAATGATCTCTGCGAAAACAAAC -CCGAGTATCCAGATCCCGACCATGATAATTCCCCACACACTCCATCCAATCACGGTGTGG -CTGTGCGAATGTCGGGACTTGCCGAGGATGCGGGTGTAGATGAATTTCCCCGTTACGTTG -GCGTAGATCACGCCAATGACGAGGGTGGGGACGATCACGAAGCCGAAGGATGCTTTCTTG -TACGAGACCACCCCGAGACTACCAAAGGCTGGGGCGGTAGCATATTGGCCGAGGTAGCGG -AACCCAATGGCTGAGGGCACGATAAATAGGATGGCAGAAATCGCAGCGAGAACAGCCAAT -GCTTTAGGGAAATCTTGCGGCCGTTCCATCTCCGAGATGAAAGTTGGGAACAGAATTTGG -GGGACCCAGAGGTAGGTGATATTCAGCACAGCGTTCATGCAGGCGACCCACGTGGTGCCT -GGTAGAGGGAAAGCATAGGTTTTCACGGGCCCGTCTGTGGGGTAGTTGCCCCGGTATCCG -AATAGAGGGGCTTCTTCGATTCCCGCAAACACGAGGAATAGAAGAATAGCGATACCCATA -CAGGCGGCTTTCGGAGGGTCAGCTTGGAGTAATTGGGAGACTTAGAAAAAGCAGCTACTC -ACATGAGAACATTGACATGAAGCTGATATGTTGCAAAGTCCGGGGCATGGACATGACGAT -TCCCACAACAGCCGTGATGATTGAAAAGACAGCAGTGCACAATGAATGATCTGAAAGTGT -ATTCAAGACCTTGGCGCCAGTCAAGACGTGAAATCCGATGAGAAGAATATTATTCGCAAG -CATCATGAATGCAGAGAATACATAGGCAACTTTGCTCTGTCCAAATACATGGTATCCAAA -GTCGCCTGCGGGTGATTGCAATGTTAGTTATCATTTCACGCAGAAATCCGGATACTTGGA -CTGCCGCCGACTCACAGATATCCATAATATGTGGGTGTTTCATGATATACTTGTGCATCG -TAATAGACGTGACCCAGAAGATGAGTCCACAAAGGACCATAGTAATGATTCCTGGAACCC -ATCCGAGGACACTGAAGGAAGGACTGTCAGCTTCATTAAAAGCCATATTCCAAGGAAATG -GGCACTCACGAGAGAGACCACGACTGAGCCATGATGGCTAGACAGACCTGCTCACCGGCA -AGCAAACAGGCCGTTTTCTGCCAGGACATTGTCCGAAGCTTGATATCATGTTCATGGTCT -GGACGAAAAGTGATGTCAGGTCAGCGTGACATATATCGCAGGGCAGTCGGTACCTGCGGC -GACCAGAGCAGTGATGGTATCAAAAGAGTCTGTTCCATCGGTCTCGGTGCCCAGGACGTA -GCCTGGCTCCTTCTTATCGTTGGACATGTTTCCAGATGAAGGGGTGTTGAAGAAGGTGAA -TACTGAAGGCAGGAACAGACTTTACATTATAAACAAATGAGACACAGATTACACGTGGAT -CGAAGtttttttttcttctttttttcttcttttttgcctctttcttTGCCTTGTTTGGGC -TTGGCGTTAACATGGTCAATTTGACGCCAATCCAGAACAAGGCAAAAAGGAAATGATGGC -GCCATAATTAAGAGCATCGCGATATTGGCTTTAGCTTGTCAGGGCTTAGCCGAACTTGCT -TATCGGTCGTTCCGTGGGTCTGGCCCCATTTGACTGTTGAAGGCTTTGGATTTTTAATCT -GATTGGTTGAGCTAGTGGAATTTCCCGTCTTAGCAAATCAAAAAAGTGCACCTGGGCAGA -ATTTACCCTTGGGCTATTTCTTGGCACGTTGCTTGTTATATATTTCTGGGAATTTTCGTC -TGCGTGTTTGACCCTATGGACAAACTCAAACTACAGCTTCGCACTAAGCGAGCCGCCCTG -CACAGCTCGATCAATAATCTCCATGGCCTGGAATAGCTGATAATCATGCTGTTTAGGAGT -GACAACCTGAACACTAAGAGGTGAACCGAGGTACACGCGACGGTCAACCATTTTCTCGTC -CCCTATGTATCGAAGCTAATTAGCGAGACCTCGCCGGTAGAATGTGAATATCGAAAACTT -ACACAACTGCCGATTAGCCTTATCCCAGCTTCCCAGTACAGGGGAATCCATTTCCCGTCC -AATCTCAAGGTCAGATTCTGTGAATGAACGGACAGGAACCGAGGCAGCCGGGTAGTCTAG -CAGCACAAAGGCTGAGGTGTACCCAACGGCGTTGTATCGGTCGAGCTCTGGGACCGGATG -TGGCGCCACAGGGCAAATTATGGCGTCAACACGTCGAGCAACAGAAGCCTGTGATGTCAA -TGTCCACATCTTCATCATCTCCTCTTCAATCTCAGCACGCTGTGCCTGCAGATTTGCCAG -TTGAAGCACTGTCAGGGCTTTGCCACGCCTCAGGCGGCCCTGCAGCCACGGAGTGAGTTC -TTCTCCCATGCTCTCGATAAGATCGAGCATGTGCGACCCATCATCGACGCCCATCAGCCG -GCCAGCGAGGCCTTGGCATTTCGGTAGAACAGATGGAACGGGAATATCAACGACTTCGAC -GCCGGGGGTACTGCGGAGCTTGTTGGCAACTTCGTTCAAAATACGGGTAATTGGGGGCAA -AGGTGTGACGAGCCCATCTGTCTTGAGGACACCGATGGTGAAGTTGCGTGGGGCGAGAGT -AATCGGTACTGACTGTGATAGCCAGGAGCCCGGAATGCAATCTGCGCCGAATAGCTCCGC -ACGAGGCACGATCTCTGCCATGACGGCGTTGATATCCGCAACAGAGCGAGCCAACGGACC -TGCTACTGCTTGCAACCCTATGCGACCCTTGCCTTCAATCTGGCCGCTCTGTTGGCCCCC -ATAAGGCACTCGTCCATTACTCGGCTTGAACCCATAGATACCCTGGCACATAGCTGGGAT -ACGAATGCTACCTCCGATATCCGTGCCAAAACCGACCATGGACCCGCGCATGGCGAGCAA -AGCGCCCTCTCCGCCAGTACTGCCACCGGCAGTCCATTGACGGTTGAGAGGGTTTAGTGT -GCGCCCGAAAAGATGATTGCAGCTGTCCAATGTTCCGAGTGTTTGTGGTATGTTTGTCTT -GGCGATCACCACCGCGCCCAGAGACTGCAGTAGGTCTACTAGTGGGGCATTTGCTTTGGC -AGGCTTGAATGCTAAGGCAGACAATCCAATGCTTGAGTCCACGCCTTTTACATTGAATGT -ATCTTTCACTGACACCGGGAGCCCATGTAAGGGGCCAATTGGGGCGCCCGTGCGCTGGAA -GTGATCGTCTAGCTCCCGGGCTCTTTGCAGGGCACGGTCGAAGAGTGGTTCGGTCAGACA -TCGGGTGAGTTGGTGAGTGACGGCCGCTCTCTGCAATTAGCCATCACTATTAGCGCCTTG -ATCAAAGATAATTAGCTGATCAGCAAGCACACCTTGCAAAACCCTCGCACCACATCCTCG -GCCTTCAATCGACCTTCTGTCATTTCCGCCACAAGGCCCCGCACGTCGTATTTCTCCGTG -ATTTCTAACTCCTTGGGTGTGATTATTCCGCAAGTTCGTGGAATATCCATCACCCCGACT -CGTTTGTAGCTTTTTGGACCAACCGTGATCGAATCGGAAATCGAGAGCATCCCATCAGGG -ATGAGGTGTGCGGGTAGCCGCCATTCGGCAGGGATAGCTGCAGCAAGCTCAGCTTGCTTG -CGTTGGGCAATTGCCTTATAATGAGGTTCCTGGGAGGCCATGGTGAAAATGTGAGAATCG -ACATCGAGGTGGAGAGGTGCTTTTTAATGCGGGGAAGTGTGGAGAACAACCTCGGCCCAG -AGGCCAAGGTAGGGATTGTCCGCGGGTACCCGATTAGGAATCATGTACATATAGTGATGG -TGTGCAACGTAGGAATTCTAGCTCCAATTTAATGGGGTTTTTGTATTGCTACCTCGACCA -GGTCAATATTTAAGCTTTGCCTATGTCAATAGTGAAGTCGTTCCATTGTTACCTACAAGA -TCAAATAAAAATTTACAGACTCCCCTTGAGCATAGCTCCGCCATCCGACGCAATAACTAC -TCCATTCAAATGACTTGAAGCCCTAGACCCGAGGAAGACAACCAAACCAGCGATATCCTC -AGCATTACCAAGCCGACCATTGGGCGAGGACGCCTCAATATCTTTCGTACCTCCTCTCGC -CTCAATAAACTTTTCAGTCATCTTCGAAGGGTAAAACCCAGGGGCAATAACATTGGTCAA -GATATGTCGCGGTCCAAGGGCAACTGCCAAATTTTTCGCAAGGTGGATAGAAGCCGCCTT -GGAAGCGGAGTATGAAAATGTTGCATTATCCCCAACTGCGCCGATCACAAGACCTGCCAC -GGAGGCAGTGATAATGACGCGGGATGGATCTGTCATGGTTGCCTTTGCTGTAAGTAGAGG -AGTGAATCTAGTTCAATTTTAAAATCGTGCCTCTCCCTAGTTTCGCGTAGGACAGTAACA -TACTTTTGGATGGTGTAGAACACGCTCTTTACATTCAAATCCATGACTTTCGAGAACATC -GGCTCAGGGTGAGTGTCAAATTTCTCGCCCCATGTTGCGCCGGCATTGGCGAAGAGGATA -TCAACATGGTCTGTGGTCTTGCCCACTTCAGTGACAAGTCGGTCTATCTCTGTCATATCT -GAGCTGTCGGCTGGGACGGAGATGGCTTGTGCGCCGGGCTGTTTGTTAGGGATGGCATTG -AGAGCTGCGACTGCTTCGTCGCATGCTGCCTTCTTCCGGGAGGTAATGTAGACTTTGGAG -CAGCCGGCCTGGAGAAGACTGCAGAGATGTCAGTAGATGTCATCGGGGTAATAGTTCTTG -TTTCGCTGGAGAGGTACATACCCTGAGGCTGCATGTAGTCCTAATCCTCGGGACCCACCT -GTTACGACAGCGACCTTGCCCTCAAGGGAGAAGATGCTGGTAAAATTGGAAAGTTTGACT -TGTTCGTCGACGGACATGATGGAACAAGTTGGAACTTGAATTAGAATGATGATTTTGAGG -AAAAGAAGTTTCTCCTAGCTATTTGTATCTGTACAAGCCGAAATGGCCGAGGTTTATTTG -AAATCTGCCCCGCTTGGAATCTTCCAAGATTGCAAGATCGGAAGCCTGGAGAAATCGATA -ATCTGCATATCATTCCATTTCCAGATTGTCCTGCTTTCATATGCTCATCGGTGATACGTT -GATGTCACAATGGTCTTGAGTGATCCAATTAGAGTAGCTTGTTTGTGGTTTAATAGAGCC -AAAAAAAAAAGGCAAATATACCGTGTGCTTCTAATGGGAATGGTTACGCTTTTTAAGGGC -AATCCAACTGACGTCTGGTTCTGGTTTCAGATAGAAAGTATTGAGCATTATCTTGAAGAA -TGGTTGCATTTCATGCCAGGTGACTATTCGAAATGACGATAAGAATCTACAGGGCATATT -CGCAGCTAGTTTCCCCCTTCCTCTCTCCTTGTGTGACGTATGATCGTTGTGTTCCTAAAA -GACCCCCGAAGagcccacaaagcccaaagcccacaaagcccaaagcccacCCTAACCAAG -CTCCCAAATGGGCTGGGCTTTAGGCACTATCTCGGGGCCCACCGGTGGGCTAAATATGGG -CCCAAGTTTTTTTataggcgcctcataggcgcctaatagggcttaatagggcttaatagg -cAGAATAATAAAAATATCATTAGTCGTAGCTGTATAAGTGATATAGAGTTTGCCAGTAGT -TCTGGGGATTCTGAATACTACTTCAGCCCCAAAATGTAGGTTGTACAATGGTGTTGGTGT -GAAAACACAGCAGATTACCACTAGGCCACAACAGAAACTAAGGTTCCTGTTTATAGCCTA -GGGGGCTTATACTGTGCTAACAGATGATCCGGGAAGGAAAGAGACACTAATCCCTAGACT -GTGGCAAAAGAGGGATCTTAGGTGGCTGATATCGCCGATAGCACGGTATACGGGTATGGG -CTGGTATATATAGACTGGGATCTACGTATAATCCTACTTATCAAACAGCTACTTCATATA -TACCATGGCCGTAGTTATGCAGAATAATGGAGGCTGATGGGCACCTTATAGGCCCCTATC -AATGACCTCTAGGCAGGGGTGGGCCTTGAAGGTCGGCCCCCTAGCACACTGCTGGTTGAG -CCTTGGGCCCGAGATATAGGACCATTTGTGGGCGTTGCGGAGGTCTAGTTGCTACACTAG -ACTGGTTTAGAGGATGGCGCAAAGAGACATTGGGTATGGCACCTAAGAATTAGATCGGAA -ACATTCAAGGCAAATAAAAGAGAGCAAATTAGAGAAAATTTAGAAATCTGCTCAATCCAA -CTCCACCTGTGGATATCTCTACCTTGGTGGCCGCAAGATCCGTGTTGTCACATCACTTCC -GACGAGCATTGAGTTGAGGGACCTTGGTGGTATATTTCTTGTCTACATTTCCACTCCTCC -TTGCCACTGCTCTCTGCCCGCTTACATCCACGATTTCACCCATGTCTTCTCAAACATACA -CTCTGCCTCCTCTCCCCTACGCCTACGATGTAAGTATCATTCAATCATAGATGCTAGGGT -TCCTTGTTTTGAAGCTAACAAGTAGCAGGCCTTGGAGCCCGTCATCTCCAAGGAGATAAT -GGAACTGCACCATCAGAGGCACCACCAGACATACATCAACAACCTGAACGCAGCATTCTC -CGCTCAAGTATCTGCAACTGCATTGAATAACGTCCTCACCTTGGTTTCATTGCAACAGAA -GATCCGTTTCCATGGCGGTGGCCATATCAATCACTCGCTGTTCTGGAAGAACCTGACTCC -CCCCAACACAACTGCGAATGATATCAGTGCCGCACCTACTCTGCGTGAAGCTATCATCAC -CCGCTGGGGCTCGCACGAAGCCTTTGTCAAAGCTTTCAACAACGAGCTTCTTGGTCTTCA -GGGGAGTGGCTGGGGATGGTTGGTGAGCAAGGGTGGTGTCAAGGGACGACTTGAGATTGT -TATAACTAAGGATCAGGACCCTGTCAATGAACCCGATGTGCCTGTCTTCGGTGTTGATAT -GTGGGAACATGCCTACTACCTCCAGGTGAGTTTACTTCTATCTTTTGCTCTTTGCTGTTT -GTATTTGTCTGCTAATCTGTGAATAGTACCTGAACAACAAGGCTGGCTATGTCGAAGGAA -TTTGGAAGATTCTCAACTGGGTAGAAGCAGAAAAGCGCTACACTGCTGGCGCGGAAAGCC -TTCTAAAGCTGTGAGAGATTGACTGACTCACAGTTCTCAAGTCTCATTTAGTTTCAATGA -TCAGGTTGTCTGCTAGGAGGGCTAGTATTTTAGATTCATGCAAGATAAGTGATGGTTGCA -ATTATGTCTATGATGTATGCTTGTAGATAAGTCTCCGTAGACACTCGTCACATTAATTGT -ATCCCACCACTCCAGTTCCTATGATTGGACCCCCTTGCTCTCTAACTGTCACTTGTCGGC -CAATGACGCCCTTCTCCGAGACGCCCAGATCCAGCCTCTCGGGCAGTTCGAACTCAAATT -CCCTCATATTCGTTGTGTTGGTGTTCTTGTCATTTTCCATACTCAGGTAGAACTGATGAA -GCGGGGAAGTGGGATGGGATGTAGCGGAGAGGTCGCCATATTTTCTAACATCAACTGTAA -ACCTTTGTCCGTTCCAATAGCTCGTGAGGTAGGATGAACTCACTCAATGCGTATATCGAC -ATTTCGACATCTTCTGAGTGTGCTATATGTCTCTGCATCTTCCGTTCGAAATATTGCTTT -CGTTTTATGGTCTGACGCATCTGTTTCGCGAAAGAAAAGAGAGTatgatgacaatgagcc -agttgacgatgatgatgtaaatgaggataccgattctggtgacgacgataatgatgCTTC -TGAGGGGCTTGAGTTTGGATGAGTCTCTCGAGGGAATATAACGGCAATCGTGCCTCCGAA -CGTTGGGCGGTTGGTTTCTCCACACATGATGATCTATAAAAGTGAAGGGGTTGAAATGAG -ATGTGATGAAAGAGGGCTGATATACAGATGACTATATTGGTTGTATGTAACAAAGAGAGA -AAAAGACAAAAAGACAAGGAAGACCGTCTAAATACCATTCACAACACCCCCAAGGTGCTC -AAGGTGAGAGTGATCGAGCACGAGTGAGCACATTAAAAGTGGATTTCCTCAAATCTGCTC -ATGTGGGGCCCGTTGGTGCCTTACTGCCCTGTGGCCTTGTTTCCCAGGCAGAATAAGTTG -CCTCAGGCAGACTTTTATGGTCTGCCCCTATTTATAAATAACCCTTGGTGTATACTTCGT -TTCACTATAAGCGCTTAACCTAGTGTCCATTGCGATCAAAATGGTTACATGAGAGTCTGC -GTTGGGCCGTGTTGGGTCATCATACCCCTCCTTCCTCTCCCCTACAACATACACAACCCA -CCATCAAAAAAATGGACCAGCCACCTCGCGCCGAGGACTACTATGACCTAGGTGACTATC -GTCGCAGTGTAACCACGACGAACGACAATGCCCAGCTTTGGTTTAGCCGGGGCTTGATCT -GGGCCTACGCCTTCAATCATGAAGAATCTGCTTGTTGCTTCGAAAAGGTCGTCAAATTCG -ACCCGACATGCGCCATGGGATACTGGGGTATTGCCCTTGCCCTGGGGCCCAACTATAATA -AGCCATGGCAGCTATTTGATGGAGAAGATCTCTCTTTTACCACGACTCGCGCCCATCGCG -CCATCTTAGAGGCGAAGAAACATGCTCACGCCGCGACGCCTTTGGAGAATGCTCTTATTA -ATGCACTGCAGCACCGATATCCCCAAGAACAGCCCATAGAGGACTGCACGAATTGGGACA -AGAACTACGCAGAGGCAATGACATCAGTGTACAATGATTACCCGGATGACCTTGACGTGA -CCGCGCTATACTGCGATGCGCTAATGAATTTGACTCCATGGGGCCTGTGGGACATCAAGA -CGGGCAAGGCCGCACCAGGAGCCAAGACATTCGAAGCAGAAAAAGCGCTAGACCGGGTTT -TGAAACGCGACGAAGCCCTCGACCATCCCGGCATCTTGCATCTTTACATCCATCTGATGG -AAATGTCCCCCACACCTGAAATTGCCCTACCGGTAGCGGAGTACCTTCGCGGCCTTGTAC -CAGATGCCGGTCATCTGGAGCATATGCCTTCCCACATTGATGTCTTATGTGGGGAGTACA -AACGCGCAGTAGCGGCTAATACCTCTGCAATTCGCGCAGACGAGAAGTTCCTCGCCAATC -AGCCCATGGGCCATTTCTATAACCTGTACCGCGCACACGACTATCACTTTCGAATGTATG -CTGCTATGCTAGGTGGCCGATCGAAGGTCGCACTTGAGTCTGGAGCTGAGCTCGCGCGGA -TCATCACCGAGCCTCTTCTGCGCATCGAGTCTCCGCCGATGGCAGACTGGCTGGAGGGAT -TTGTCGCTATGCAAATGCATGGACTTGTGCGCTTCGGTCGCTGGGAGGAGATCATTACTA -CAGAGCTTCCAAAAGATGCAGAACTTTACTGCGTTACCACTGCAATGATGCACTATGCAA -AAGGTATCGCATTTGCTGCTAGCAGCCGTGTCTTGGAAGCAGAGGAACAGCGAGAGCATT -TTCTGCGCGCAATGAAGCTTGTCCCGGCTTCCCGGACGATCTTCAATAATCGTTGCATCG -ACATTCTTCGGGTCGGTGAGGCAATGCTCGATGGCGAGATTGCCTATCGCCGTGGCGAAT -ATAACACTGCGTTTTCGTATCTCCGGGACGCTGTTAAGCGGGACGATGCGTTGCCGTATG -ATGAGCCATGGGGCTGGATGCAGCCAACGAGACACGCGCTAGGGGCGTTGTTACTCGAGC -AAGATCGAGTCGAGGAGGCTGCAGCTGTGTATTCTGCGGACCTCGGAATTGACACATCTC -TGCCTCGCGCTTTGAGACATCCGGGTAATGTATGGTCTTCGCACGGGTATCATGAGTGTC -TGGTCAAGATGGGCCGAAAACACGAGGCGCGTGCCATGTTGCCGCAACTGAGGATTAATT -TAGCGTGGGCGGATGTACCCATTAAATCATCGTGTTTCTGCCGACAGAGAACTGAGTGAT -TCTCTTTCATGTTTCAGAACTATCTGTTTGGCCCCTTGAATATTGCTTAAGGGGCATTTG -TTCGAGGTAGCCACAATATACGGAATTTGCATCAAGCACACGAGCGACTGGTTTCCATGA -CGGAGATGGATCAGGAACGTAGGAACCGGCGCAGTGAAAGTTTCTGGCCACCATGAATGC -TCCCATCAACCTATATTGTCTTCTGTTTGGGGTGAAATTGTTACCGAAGAGTAATGATTA -GGAGTAATTATAGAATGCGGGCTGTTAACTTATACGGCTGTACCATGGAGGTCGTACGTG -TGTTGGAATCCACAGGTTCTCTCTTGTTTACAAGCTTGAGCATTGTCCGCAGATATCTTC -ACACGTCCTACAATACGGCATATTATATGGAGACTTGAAACTGTTACAATCAACATGAAG -GATCTTTAGTCGCTATTTCATAACCTGACCAATAGATGCCCTAACTGGACGATTTGTCAA -AGCCGTCAGCAGGAGCCAGCGCTTCTTCCCTCTCTGTAACCAGCTGTACGTTTGAGCAGG -ATGGCCTTACGCTTCATGGCCCCAGAAGTTGTGCGTCAAATTGGCTCGATATCGAACCTC -ATTTGTAGGTTTCCGATAATAGTCCATGTATGAATGGCGTCAATGCGATGCGTGCCTCTT -GGCCAAGCCCCCAGAACCAGACGGGCGACACCCCTTCCCAGTTTCGTACCCAGGAGCTCC -TGGTATTCTCTAGTACCATATGTCCAACACTGCGGCTCAGGATCATCAAACCAGTGCAAT -CCATGACGAGGGTACAGATGTATCTCCACATAAGGCCAGGTGAAGAGATTCTGAACGTTA -AGAAAATAAACGTACCGGAGCGTGTCGATATGATTATCGAGCTGGTACTGCGCCTGAGCC -ACTTGGGACCAATAAGGCCCAATTCGTCTTGTAATATTGTCAACGAAGATAGCTCCGAGC -CCGGTACGGTGCTGATAAACAGTCCAGCCCTCTTGGACACCATCATAACGCCTCAAAAAA -ACATAACGCCAATCATCTCCTGTCGAGAGCCCGGTTGATTCGATGCTGCCCGCTTCCTCG -GGGGGTGATCTCAAATCATTTAGATAAACCTCGAAACTCTGGTCTTGGAGTGGGTGACTG -ACTAGATCTTCCCAAGTGACTCTGGATCGTCGGACTGGACATGCCGTTGTGTTTTGACCG -GCTATCCATGCTTGATATTGAGCCCCGATGTTTCGATAATTGATGACGGTATCATCCGAA -ATATATTCCTCCCAGGAGATGTCTGAGGAGGGGAGCAGTGGAGAGTGCGAAGAGCTCCCA -TCAGTAAGTCGTCTGGATGCTATTGCAATGGCGTCCGAAAGTTGCCATCTTAGGCTTCTT -TGTCGTAGCGGCGGCCGTATTGCTGGCGTTCGGGAAGACTGGGGCGGAGGCGACACCTGC -TCAGATGATTCATTATCGGAGCTGTGCCGTTGCGTAGGTCGGTGGGAACGCGTTTCATTC -CGGGTGCTGTGTCTCCGCCTGTCACGAGATGATGGGGTGAACCGGTGGCCTCCTGGAGCT -TCGCTGGATCCGTCTAGAGAATAGTGGCGCAGGTCAATCCTCTGGATGCTTGTGCTGGTG -ACAGGTGTTTGGAGTCTCGAGTGTGAAATCAGGGGTTGTTGTGGAACTGTCTGGTTAGCC -GCCATGCCAAAATTATCACGTTCAAATGTGCCTCACCTTCTGTGATCCTGTTTTGGCCAG -TCGCGTGTTGACCACTAGACTCACCATCAAGATGAAAGCCACTTGTACATATCCGTACTG -ACGAAAAGTATTGCCGTATCATTTTTGGGAGAGACGGAACGAATTGTCACTTGGGTTGTG -CGTTATCATCAGTCGGTGCAGAGATGTCAGTTTGAAATGAGCCTTTCACAGGTAATTGAC -AAAAGCAGGTACTAGACTATAGAACATGCACGGGATTTCCAGCCTGTCGTGGCATTTAAA -TACTCGCCCACATCCTCCTCTTGTTCGAATAGCTAGTGAAATTTTTCTACTTACCTTTGA -GACTTTTAACTAGCATTCACACAACCTCTCTTCTTTTCCGGCCTCCACTACGATTGGCAG -TGAATGAAAGGGATATAGAGAATTCAAGTAAATAGACTACAAACGACCCCTCAAAAGCCC -GCTTGGACTTACTAGATATAGCATATACAATAAGCCTAATCATCTGTTCCCACCTACTAG -TCTACTGTGCTTAATGAAAACCGCAATCTCCTTATTTTCTGTCCTATCACCTACTTGTAG -GGGAGACTATAAGAAGAAGAAGATGCTAGGAAACCGCCTAGGCAGGATTTCGCGCACAAT -AAAAGGGATTATATGAGGAGCCCCGTATCTGCTACCAGAAGGGAAATTACCCGTTGCTAG -AATAGGAAGTGATAACGTGGTTATCGAAGCGTAGTCATGTTCTCTCGGCAGATAACCCTG -CTATGCGATGTGGGAGACTGTATTGTCATATACGATCGCGGCGCGTAATTCAACTACAAA -AACACTAGGTTTCCATATGAAAGTGATAAGACCTTAGCTATTCGAGTTTGGAAAATGAAC -GGCGTGAAACGTGCATTGTCAAATATATTAGCCTTCTAGAGATGTCAAAGTTGATGCTGC -AGCTTATTCGCTGCCTCGTTGGAAAAGCAGCACCAGTCTTCGTGAATAATAACAATTGGT -TCTTCGGGCAAGACTGATCCCGCTTCTAGCCCACCCGCAGCGCGAATTGTGAATAATCAA -CGAGAAGGGAAGGAGATGGATTGGTCTACGCTAAGCGGAGAGAGTACTGCATTACCAGAT -GACTATTTGAAATACGAATATGTCACTGCTCAATATATTGCCTAACGTAAGTCTACTCCG -TGCTTTTCCTCCGGTATCTCAATAGCTTTAATATCAGTTCCTCCGTCGTAGTGATAATCG -AGCTTTACCACCCGTACAAATGGCCGGTCATCATTGTATCCAATCTGTAATATTGAGGTA -GAAGCTTTCCCCCATAATGTTCGCACAACAGATAAAGCAAGGCACACTTCAATCCCTAGG -GAAGAAGCCGATTATCTTGAATATTCTTCCACATATACCAAAGATCGACATTAACTTGTA -ATCCGAAGAATACCACGCCTGGCCAGAAGGCAATGCCACCGTTCCATGCCATAGGGCCTG -TGTGATGCATGTGGATACCGCTGGCGAGGGAATAAACAATTGGAGCAACAATGTTGATTA -AGCCCATGCTTCTTGGAAGTATAGGGTTAGGTCTCTTATCGCTGAAAATCGCCCATGCAA -CCGTCCACCCCTGCACTAAGAAGATTGGCCACACCAAGAACATGGCCATGTACAAAATAT -CATTCAGGAGCTGTGTCAACTCTGGTCCGTAATCACGGAAGTTCAATAGGCCCATGGCGA -TGGAAGCTACCATGAAGGCACTAAATGATGCGGCGGCACTCGCAAGTTGAAGGTCGGAAA -GTATAGGGTCGATTTCAGGAATTTTACGAATCTGACGCGTCAGAGCTGCGGCGTAGACGA -GGTACAAAGCCCCGGCGACCATGATAAAGATACTACCGGCCTGTGCACCCTTTATATGAT -GCCTGTAATGGGTCTCAACACGTCTAGCATCCCACCACGGTGCAGTAGGGGGAATATTGC -CAGATCCTGCCAATGCTATCACGAAGGATATGATGCAGAGGCTACCACAAGCAATGGCAA -TTGATTCCCACCGCTTAGTCGTAAGGGCATTTGCCAGACGCCGTGAGCTGGATTGAAGCC -CCATGGTAAAGTTGAGTGGAGAAATCAATGTGAATGGCAATTTCGTGATAATCCTGGGTA -ATTTATATACCTTATGGGTCTGGAGTCAAAATGCATTTCAAAAGCACATGAGAATCCGGA -AAGGATTTGCATTCTTCACCAGTGTAGATCTCATCATACAAACCTGGAAAGTTGCATAGA -ACGCTACTTGCACATGTTTCACGGCCGGGGGCAAAGACTGCGATCTTTACTCCGCCATCT -GAAATCAAGTGTAGATCTCGCAGATCCTGCCAGTTTTGATCTTCCGTACATATTCACTTG -CTCGCTATCTACACCAGTGTAACAACCGAATCCTCTCCACCTCTCGACTTTTCTTTGAAA -GCCAGCCCAAAGAAGGACGCTCTACCTCTGCAGCTACATGATCGTCGAATCTTCGGGGGG -ATTTAATGCCAACAGTCTGGGATATACCAAGATCAAATATAATATCTGACCTTCTTGGGT -GGGCCTGCTTGTTCTGCTAGATTGCCATGTTATGATCAGGGCGAACAAACTGAGGTAAAA -GTAGCACAAAAGACAAATCAGCATTTGCCCACTGATGCAAAAGGGCGAACGACATGTTCA -AATTCTTCAAATTTGCCAGGAATGAATGTGCCAAGTCCTTAGCGGTCTTTGTCCCGCAAG -ATATTGACTCTATCCTCGGAAGGTGCTGTATCCTTCCCCCTAAATTTCGTAAATCGAATT -CAGAAAGCTGTTGTAGAATCCAGCACTTTGCGGATACATTCCACCATCTTCACAGTGGTG -GACGGATGATACGGGATGGCATGGCCGCCATCATGAACAAGAACCTCCCTATTCCACGAA -ACACAGAAACCCTGTACTTGAGCCGCGTGAGAGCCGTAGCTATCATTGAGAGCATAAATG -TGTGCAGTAGGGACCTCTATGGGAACGGAAACTGCATGACCAGCGCCATATCGCTCCAGG -AACTGGGTCGTCTGATCCCATTTGTGAACATAAGCAGGATCTAGAAGCTCCGGGATCGTC -GCAGAGAGATTGACAGCGCAAGGCTCGCTAGCTTCAGGCAGTGGATAATAGCAAGTACCA -CTGCGATCGACAGAGAATGGTCGAGACTCTGAATCAAATGGGAGCGAGGCCGAGAAGAAT -ACGGCAGACCTAAATGGCTGCTCGGAAGGTAAGCTATGCCTTGCATGGTAGAGCATATAG -GACGCGGCTAATGCTGAGCCTTGGGAAAATGCAAAAACACCGTCAAATGGCCCCTCCTCG -GCGATAGCTTTGTCCAGGAGCTGGAATGCAGAACGTATCTGGGACGGTGTGCAAAGGTCA -AAATACGACAAGTATGGCCCCGCGAAATAGTCTTTCACTCCTAATAATACCAAAAGCAAA -CATAATCAGCATCAAATTCAGATATTCTATTTGATCAACAAACCTTTTTCTGGCTCAACT -TCAACCTCGCCTTGCAAGAAATGAAACTCGTGTTCAGCAAACTCCTTCAGTATGGCCGCC -AGCTGTGCCTCTAGAACTGCTGCGTTGGAACCAGCGCCATGAAGACATAAGAAACGCATC -TTGCACATAGACGGCTGTTTGTTCATGGAAGAAAACACAGGACCGATCTTCCGAAATTGA -GAGAGTCTCTATCCCTGCTTTACCTTGGCGAATCTAACTCAGATATAATCTGAATTACGT -CTGTCTATACCAGCACATGCAAGGCTGATACGTCATTCTGAGTGAGAAGAGCAGACAAGA -TAATGGGAACTTTCATGACAAGACCTAATATAGAAGTGGTGCGACTGTTGTCAGACCTTG -ATTACGTTGGATGAGGAGTTGGCTTTGCTACAAAAATTACTCGCAAGTATCTGGATACCA -TTGAAGAGACGATTGTGCTCCATGGGAAGATTCTTTTCTCTTCTGATTGGGTATTACACA -GCTTGATTCTATTATCCCGCGTTCTTTCGATCGTCGGCCATATCCATCAAGAGCTAATGT -GGCACAAGTTCAAAATTTCCATGCATGCTGAACCGCATTCATCGTAACCTACTCTATTGG -AATATAGGCGGAGCAACCTGTCATGGAACAGGTTATCTTCCGGTTTACCCTGAGCCAAAG -TCATTAAAGCCCACTCTAGCCTAAGCGCGATGATGTGTTATGGCATAGGTTCGATATCCC -GATGTATCCTAAATTGACTTTCATTGTAACCTATTCTACTACAGGTTCCATGTCTTTTCG -CGTCCTCAGTCCATCATAGCTTTTTTAGGATAGGCTCAATATTGCATCATGATCGCCTCT -AGCACATCCTCAAGATTCTTTCATTTCTGGATATAATTGATCATAGCCTACTCTAGCCGA -GGCGCAATGATGCGTAGTAGCACAGATTCAACATTCCCATGCATTGACTTGAAGGTGAAA -ACAAGATATCTTTGATACTCTGAGCCCTCAGGTCCTGGAATCAAATTGTATATTTAAGAT -TCATCCAGCAACTCTCTCGTACCTCAATTAACCGTTGACCGTTATTTGAATTTATTGCAC -TTTATTTTCAGCTCAATCTTCATACTATCGAAATAGCTATACAATTCGTCACGCTATGGA -GCCACAAACTGGAACGCAAGACATTGCCGTAATCGGAATGGCTTGTCGATTTCCCCAGAC -TGCAGAGGATGTCGAGAGCTTCTGGGGCACTCTGATTGCCGGGCGTTCTACGTCAACCGA -TTTCCCCCCTGACAAGATGAATGTCAATGCCCACTATCATCCAGATCCAGACCATGGAGG -CAGTGTAGGCCACCCAAAGCCCTGATTGAATATATGAATCAGCCACTAATGGGTATTCTT -GGTAGATGGCTTGTCGTGGTGGCCACTTTTTGTCCCAAAATGATCGATATTTCGACGCGC -CATTCTTCAGCATCACCAAAAACGAGGCTATGGCAATGGATCCCCAGGGCCGTATTCTAA -TGGAAAATGTGTATCATGCAATTGAGAATGGTATGTAAAGTTTCATGCGCAGGTGACTAA -ATTTCGTTATAAGCTAACCAGCCATCCTGGATAGCTGGGCTTACTATTGGAAACATTGCT -TCCAACGACGTCTCTGTCTTTGTCGGTGCGTCCAACAACGATTTCAAGCAGATCATGGGA -GCCGACGAGCAATCTCTATGGAAATATATGCCCCTGGGAACAGCTCCTTCTATTCTGTCC -AACCGCATTAGCTGGTTTTTCGATTTCAAAGGGAGTAGTATGACAGTTGATACAGCGTGT -TCGAGCAGTTTGGTCGCTTTCCATCTCGGTTGTCAGGACCTTATATCAGGAAGCTCTAAG -ATTGTCAGTGAAGTACCTTAATCTATGCGCATGTTTTTTTTTCTTCTTCTTTTCTGATAC -TGATACACCCGCTGGTGCTTTTAGGCCGTTGTCAGCGGCGTAAATCTGCTTGATGACCCT -GACATGATGTACCGAATGAGCCATGTGGGCTTTCTAAGCTCTGACAGTATCTGTCATAGC -TTTGATCATCGTGCCAATGGATATGCTCGAGGTGAGGGGGTCGGCACAGTGATTCTCAAA -CCCATTTTGGATGCGATTCGAGACGGTGACACCATCCGAGCAGTCGTTCGAGGAACTGGG -TCAAACCAGGATGGTCATACCCCTGGCATGTCGGTCCCTAGTTACCAACGCCAAGAGGAA -CTTATACGCAACGTGTATGCCAAGTCAGGACTGAAACTGGAGGAAACTAGTTATGTTGAG -GCTCATGGTACTGGGACGCCAGTCGGTGATCCCATCGAGGCTCGCGCTATGGGGACCGTT -TGGAAGTCACGGAAAGATTTGAATCTCCCTCCTTTGGTCATAGGGGCTGTCAAATCCGTG -ATCGGCCACCTAGAGGGAGCATCAGGTTTGGCTGGTCTGATTAAGACAGTTCTTGTTCTT -GAGCGAGGTATAATTCCACCGAACCTGAACTTTGAGAGAGTAAACCCAAAGATTCCGGTC -CAGGAATGGCATCTTCAGGTAAGGAGTCCGATTTTGTGGTCTTCAGTTTGAATAAGCAAT -GGCTGATAGGAAGGCAGTTTCCTCGAGTGGTCACCCCATGGCCGGGCCATGGTTTACGAC -GGGCTTCTGTCAGCTCATTCGGTGTCGGAGGAACGAATGCCCACGTCATTATGGAGGATG -CGTATCATTATCTGCAGTCTCACAACTTAGAGGGAACTCATTGCACCGTCAGCCCTGCTC -CTGACGTCGTGGCCAACGACCCAAACAGTTGTATGGAAGGGAGCGTAAAGGAGAGTGATT -CACCTGGCCAGCAAGAGAACTTATACAAGAGGTCCCAGAAGAAGGCTCAACCCCAGGAGC -ATAGCCTGCTCCAAGTGAGTATGAAGGTAAACAAGGAAAAATCTTTCCCGACAAGGTCCT -TCATCTGGTCATCCTCTGATGAGGATGGTCTAAACCGAATTTTCAACCAATTTCACTCTT -GGCTGCGAGATCGTTCGGACATTGAGAGTGAAGATCAGTTTCTTGCAAATTTAGCTTACA -CTCTGACCCAGAAAAGAACACAGTTCTCATGGCGTTCCTTTGTAAACGCCAGCTCTCTAG -CGGAACTCCTGGAGCGACTGGCTGAGGGCCCCAAGTTTGTTCGTCATATGAGACCAAGCA -AGACCCCCATGCTAGGATTTATCTTCACAGGGCAAGGGGCCCAGTGGGCTGGAATGGGGA -AGGAACTGCTCAGATTTCCAGTTTTCCGTGAGAGTTTTGATGATGCCAATCGCTATCTGC -GCAGCATAGGTAATACGTACGATATCAAGGGTAAGGATTTCAAAAGGCTTCTGATCAAAC -CAATCACTGACCAGTCTTCGCAGAGGAACTTCTACGAGATACCAAACAATCTCGGATCAA -CCAACCCGATCTCAGCCAAGTGGTCTCGACGGTGATTCAGATCAGCCTGGTTGACCTCCT -GCGACAGTGGAATGTAAGGCCAAACAGACTGGTTGGGCATTCTTCAGGGGAAATTGCCGC -TGCCTACTGTGTAGGAGGACTAAACCGTGAATCTGCCTGGAAAGTTGCCTTTTTCCGTGG -TATTGTCGCATCTACACTCGTGGATTCAGGGGGTCGTATGCTTGCCGTCGGCATGGGCGA -AGAAGCACTTCGTCCCCATTTGGAGGCAGTACATCGGGAACTTGACGGGGCATTGTCTAT -TGCATGCTTCAACAGCCCAACAAACCTCACTGTTGCGGGCGACGAAAAAATGGTAGACCT -TCTCTCCTTACGACTTGCGAGCGAAGGTGTGTTCGCACGTCAGCTCAGGGTCCAGGTCGC -ATATCATTCTGCGCACGTCAAAAGTGTCGCTGACGAGTATCGCCGGCTGATGGGTGAGCT -ATCTGTGGGAAATGAAATATGGCCTAGTTCCGAGCCTGTCATGGTGTCTTCCATCACGGG -AAGACCTGTCACTCCATCCTCTCTTGCGGATGCTGACTACTGGGTGCGAAATATGATATC -GCCTGTACAGTTTGTTTCCGCCATTGAAAGTCTTATCGACACCGTCGTCTCGAAAGGTAA -ACGAACAACCACAGTGGTGGGCGGAGAGCATAGCAACATCTTGAATATGGTTGAACTGGG -TCCTCATCCCGCGATGCAAAGTGCAGTAAAGGAAATTGTGGGAGCGAAGTACCCCGTCGC -AGGCCGAGTTTCATACTACTGGGCTGTCCAACGCAATACCTCAGCCATGACCCAGTCACT -TGACCTTGCTGGCAAGCTATTCTGTGAAGGCTATCCGATAGCCTTCTCCAAAGTTAATCA -AAACTCTAGTGGACGAATGATGGTTGATATTCCTCCGTATACCTTTAATCATACTCGCGA -ATATAATCCATTTGGCCGACTGAGCAAAAGTCTCCTCTATCGCCAGCGTCCTCGGCATGA -CCTCCTCGGGGCGCCAGTTCGAGACTGGAATGTAGAAGAGCCCCGGTGGAGAAACTTCCT -TCGAGTTCATGAAATTCCATGGGTTAAGGACCATGAGATCACCGGCCAATTGATCTATCC -AGGCACCGGCTATCTGGCCATGGCAATCGAAGCGGTCAAGATGCTGGCCGACCCCAAGTC -CACTGTCAGCGGGTTCCGCTTACGAGATGTGGTTTTCAAGGCTGTCTTGCGAATTCCAGA -TACAACGGAAGGCGTAGAAATCATAACCACCCTCCGTCGCAAACCAGAGTCAAGTTACCG -TACCTCATCTATCTGGTTCGAATTCAAGATCTCTTCATACCAAACCAAGAAGGAGGGCTG -GATTGACCATTGTACTGGTTTAGTTGCAGTGGAATATGGCAGCAATAAGAACAACCCAAT -TGTTAAGGATGACCCATCCGTTGCCATCAAGCAAAATTTTGAGGCTCTCAGAGATAAGGC -CAATGATGAATGCACTGTCCCCGTTGACGTTGAGAAAATGTGGGCAGATCTGAGTTCCAA -TGGCCTGGTGTTCGGCCCAACCATGCACGGCCTTTCTAGTGCCTCTCGAGGTAATGGAAC -AGGCCAGGCATTTGGAATTGTTCTCGTTCCAGACGTTCGCGCTAGCATGCCTTACGAGAC -AATGGAGCCGCATATCATGCATCCAGTGACCATGGACTCTTGTACACAACTGGCGTTCGT -GTCGATTAGCGATTTGATCAACCAAGAACCTTCTGCTGAGATAATGCTGCCACGGTCAAT -CAAAGAGCTCTGGTTGGCACCAAATATTGCTAACGAGCCTCGAGCAGAGATTAATTGCCA -TGTCACGTCCAAGAGAGTTTCACATAATTCGATTGACCATGACGTGACAGCATGGAGTGC -CAACGCTACAATGCCAGAATTCCGCATTTGGCAATATCAAACAGTGCTTTTAGAACGACC -CACATACAAAGGTCGCCACTACCAACAACCATGGACTTTGGTGTGGAAGCCAGATGTTAA -TTTGATGGAATCACAGGAGACGGTAGACTACCTCCGCCAGGCTTGTGAGAACAATGAAGC -CAGCGAGCGTGAGCAGAATGCAAAGGATTTCTCAGATGATTTAGATCAGGTGGTTGCAAT -CCTTGCTCAGAAAGCTCTATCCAGTATAATAGCACTCGGCTCCTCCTCCTTACCATCTCA -CCTCGAAAAGTACCTTACCTGGCTTCAAGAGATACAACACCGTCTGCCGATGGAATTAGA -CTCTTCGCAAAACCAAGACATGTTAATTCAACAAGCTGCTTCTACTTCCGTGGAAGGAGA -GCTGATTGCATGTGTCGCAAGCCATCTTGACTTAATCATAAATGGCCAAACTGATCTTTT -CAACCTTCTATCTCGGAATGGCCTGTTAGATCGCTTCTATGCGGAAAATCAGTCGTCCAA -GCGCATACAAAAGTTACTCCGCGCATACGGCGACATCTATGGACATCAGACAGGAGAACT -GAAGGTTCTCGAGGTTGGAGCCGGTGATGGTGCGAGTACTAAGGCCTTCCTCCCCGGTCT -AGCCACTAGCCTGTACACTTATACGGACATTTCGACAGCCTTCTTTGCCAAGGCACAAAA -GTCTTTTGGCGTATGGTCAGACTCAATTGAGTATCGCAAATTCAATATCGCGCAGGACTT -CTCGGAGCAAGAATTGAAGGAGAGTTCATTTGACGTCATTTTGGCTGTGGATGTAGTCCA -TGCGGCTCCAGACACCAGTTATGCCCTACAAAACCTCAGGTCATTGCTGAAAGAGTCAGG -AAAGCTGATTCTGGTCGAGATTTGCAAGATCGATGCTCTCCTCTACCCACTCGTCTTCGG -CCTAAATCCGGGATGGTGGTTGAGTAAGGAAGAAAGCCGTAAAGCTGGTCCCCTGCAGTC -TGCTGCTTGGTGGTCCGAGAAACTTAAAGTATCAGGCTTCTCTGAAATTGAGTTTTCTAT -TGGTGATTCCGCGAGTCCAGCAATTTCGGAGCGCACCTTGATGATCGCTGGTGTTCCGAG -CCTAGGCGCGCATATTAAGCAATCCTTGGAGATACAAGCGGCACCGATGATCGTGACACA -TCCTCAACCATCGTCTGAGGTCAGCAAAATTGCTGATATACTTAGTGAGAGTCTGGCCTC -GCCTGGGAGTAGATGTAATATCGTCGATATCCTCAGTGTCACACATCATACAGACCTGGG -AGGAAAGATATGCATCATCGTAGATACACAGGGGTGTCTTCTTTCTCGCATGAGTGATGA -TCTAATGGTCAATCTTCAACATCTTTTCAAGACCTGTGGCGGCATCCTATGGGTCCGCGG -AGACGAGCGATCAGATCCGAATGCTGCTCTTATCACGGGTCTCATCCGTACTGTGAGATG -GGAGCAGGACTTCCGTGAACAGAATTTCGTGACATTGGAAATTTGCCATGGACACAATGC -TCAGGAAGCCGTTCAACGTATCGAGAGGATCTACCAACATGAATTTGCATCAGAATTGAA -GTCCAGTGTCAAAAAGAATAACCGCAACGCGGAGTACAGGGCTTCCGTCCTCGGTCCCCT -CCAGACCAACCGCTTGGTTTTACAGAAATCTCTGGATCATTTCATGGCCAACAAATTTGC -TACGATGACTCCCGAAAGCATGCGGTTGGGCGATTCACCGAATCGAGTGTTATCTTTGAC -AACAGACAGCCCAGGTCTTTTGGATCGGTTATATTTCAAGGACTGCTGTGTTCACGCGAG -TCCGCTGAAAGACGACGAAGTCGAATACAAAGTCCTTGCAACCGGGCTCAATTTCCTTGA -TGTTATGTCCGCTATGGGTGAAGTGCCAACGACCAAGTTTGGAGGCGAGGCGTCCGGTAT -CGTCACGCGAGTTGGCCCCAATGTAAAGAGGCTGCGCCCTGGACAGCGTATAGCGGCAAT -CTCAGTCTGTACTGGCACTTTCCAAACAGTCGCTCGTACCGTGGAAAATGCAGCTATCGC -CATACCAGAGGCAATGAGCTTTCAGCATGCTGCAGGCTTCCCAATTGTCTATGCCACGGT -GTTCTGTTGCTTGGTCGAGATTGCGCGTTTGAAGAAAGGGGAGTCAATTTTGATCCATGC -GGCAGCAGGCGGCGTAGGGCAGGCGGCTCTGATGCTCGCCAACCACATCGGGGCTCGGGT -TTTCGCGACCGTTTCCTCTGAGTCCAAGAAGAAGATTGTGCTCGAATATGGTGTGCGTGA -GGAGCACATCTTTTACAGCCGCGACCTTGCTTTCAAGGAGATGGTCATGCAGCAAACCAA -TCAACGTGGGGTGGACGTGGTCTTAAACTCGTTGGCAGGCGAGGCTCTTCGGGCAACTTT -TGAATGCCTGGCCCCATGTGGTCGTTTCATCGAAATTGGTAAACGCGACTTCGTAACGAA -CGGACGCCTTGATATGGCACCCTTCCTCCGCAGTGTGACCTTTGCCGCCTGTGATCTCGA -CACCATCATACAACATGACCCCGTTCGGGGTAATAATCTCCTTCAAGGCGCAATGGAACT -CTGGCAACTTGGGGTCTTCCGACCAACATCTCCGTTCACAACGTTCAAGTACTCCCAGTT -GGAAATGGCGTTCCGTCAGCTCCAGTCTGGTAAACAGAGCGGCAAGGTAGTATTGACCAT -CAGTGATGATGATGTTGTCCAGGCTCTTCCATCGCTCCCCCCGCCCTATCAGTTCCCAGA -AAATGCGACGTATCTGCTCAGCGGTGGTCTTGGTGGTCTCGGTCGGAGTGCAGCGCGGTG -GATGGCGTCTCGTGGGGCAAAACACTTACTATTCCTCTCAAGATCTGGTGGCTCGAATGT -TGATGCACAGGTCCTACTCAACGAATTGATTGACGCTGGGTGCGATGCGCAGGTCCTCCG -GGTCGATGTTGGTGACTCGGTTGCCTTGGATCAAGCGATCAGAGCTCATACGAAAACAAT -GCCTCCTATCCGGGGTTGTATTCAAGGTGCGATGACATTGCTGGTAGGTTCCCGTGTCCT -ATACTGGTCTGCGTTTTACTGATGATGATTCTTGACAGGATTCTACGTTTGAAACGATGA -CATCGACCCTCTTTGAAGCGAGCGTTAGGACTAAGGCACGTGGATCGTGGAACCTCCACG -AGGTGCTTCCAAAGAATCTCGACTTTTTCATCCTTTTGTCTTCATGCAGTGGTGTTGTCG -GGAACCGTGGCCAGGGCAACTACAACGCCGGTAACACGTTCCAGGACGCCCTAGCTCACT -ACCGTCGATCCCAGGGCTTAAGTGGAATATCGCTCAACCTAGGCCATATGCTTGACATTG -GTGTAATCGCGGAACGAACAGATAATTTATTCACTACATCCCTCCGAGCCGCATTGGGGA -ACCAAGGCGTTTCACAGGACGAGTTCCATGCACTTCTAGAGTACCATTGCAATGCTCAAA -ACTTAGATGTGTGTCCACAGACTGTGGTGGGACTTTGTACTCGTGAACAATTCCTCGCAG -ACAAcctacctgaacctaccttcctatcctatccCCTATTCACCCATCTCTGGCGCTTCG -GTGGCTCCGGGAGAAGCGAGGGCAACAATTCTGTCGCTGCTAAATTATCAATCAAGGGGG -CATTAGACAAAGCCCGCCCAGAAAAAATACTTCAGATTGTGATTGAAGGCATCATTGAGA -AATTGTCCAACTTGCTGGCCATATCTGCCACGGAGATCGATAGCGACACGGCCCCTTCCA -ATTATGGTGTGGACTCCCTTGTGGCTATTGAGATTCGGAATTGGCTGTCAAAAGAGGTAG -GTGTGGAAGTTGGAGTGTTGGATATTGTGGGCAGTCAGTCGATTGTCCACTTAGGGGAGC -GGGTGGTCAAAGCTAAGGGTTAAGAATGAGAATGAGGGGATGGGGGGATGGGGAGATGAG -GGGGATGAGGGGGATGAGAAGGATCTCATGTAAGGAGCTATCAGATAGCCTGGAGTGGAA -TAAAGAGAAGAGTAGGAGTTTAGAATTGTTAGAATTGTAGATCTCGGTACATTTAGAGAG -AAACTGTATAGGGGGATAAGTATATATGGGGATAAGTGAACAGGTCACATGACCAACATA -TCCAGTCCTGGATACATACCCTCCTGTTCTCCCTATAATCGACATTATAAAATAAAATAA -ATTTCAAAGACATCAGAATACTGCAGAATGTAGGATTGTCAAATCCAGGAACCAACATTG -CCTTGCATTGTTACAGGGTTCTAGCTCCTAAAGGCCCGGAAACGTCTTAGATAACTTGCG -CTTTTATCCGCGCCCACTTATCTTACTCTCTCTCTCACCTCCTCTTTACACAGCACCTTC -CCCCCTCCCATCTCAAAATTTCAACTTCTCTTGAATTCTCTCGACTTCTTTTCCCGTACT -CTCACTTAACACATGAACGATGCGAGGTCCTGAGGCTTCGAGCCCCGATGCGCTCGTAGG -CCTGCCGTCCACCCTGAGAACGCTGATAGGCACTCCGACGACGTCACCGATTGGCTTTGT -CTCAACAGTACCATTACCAACAACCATACAAAGTAATCCATCACTTTCGCAACAACAAAT -TCAAGCAAATCTACGAAATGTTCTCTCCTCTTTGTCGACGGAATTCCGTCCCGAGCCTGT -ACTGGCAAGTGGCAACTCTACCGGGTCGACCGGCAAAGGCATCTTGATTGGGATCTTGTC -CGCGTTTGGCTCGGCTGCTGTTGCATTTATTGTTTTAGCGATCTTTTTCTTCTTCAAATA -TACCCGCCCGGGGCGGATTATGCTCGATCGCATTGGCAGGCCCGGTGAATATGATGACGA -GCAAGCGTTTTTACGGGAGGAAGAGGCGGCTCTAGAGTCAATGGATGACTTGGCGCGGTC -GGAATATTTGCGCGCAAAAGGTAAGATCAGAGATCGATCTGCTTGTGGTTCGCTCCGATC -GTTTTACTAATGATCGGTAGCTTTTGTCCAAGGCAACCCCCCCGAGTCCGTGCAAACCGA -TATCTCCCTTTCACAATTCCTCGCGATCCAAGAAAAAGGTGTATCTGCCTGGGAATTCCA -GCCAGAGCTGGAAATTGCAAACTGCTTTGTGGAAGGACGAACGGAAATTGAGTTCTTTGA -TTCAGAGTGTAGTGTACAGACCAACCTGCCAATCCCAAAGCAGAACGATGTCTATTACTG -GGAGGCGAAGATATACGATAAACCTGAATCGACCATGATTGGCATCGGTGTGACTACAAA -ACCATATCCCCTGTTTCGAATGCCTGGTATGTGTGGCAACGAAAATGTGTCTACCAGACC -CCCGCTAACAACCGCTAGGCTTCCACAAATCCTCTGTTGCCTACCAATCTACGGGACATC -GCCGACTCAACCAACCGTTTACTGCAACCCCATACGGACCTCCGCTCCTCCAGGGCGATG -TTGTTGGCGTAGGCTACCGTCCTCGATCTGGTACCGTCTTCTTCACACGCAACGGCAAGA -AATTGGAAGACGTGGCTCACAATTATCGCTCACAAAACCTCTTTCCCACCATTGGTGCTA -ATGGGCCTTGCACCATTCACGTGAACTTTGGCCAAATGGGATTCGTGTTCATTGAAGGCA -ATGTCAAAAAATGGGGCTTGGCTCCCATGACTGGCAGCCTCGCCCCGCCCCCACCCTATG -GCAGCGAACAAGGCAGCATTCTGCTTGAGTCAGGCCGCGAGAGTGCCGCTCAAATTTCTC -AGCGCGTTTACCAAAATGCGCACTACGCGGCCTCTGGCTCAAGTGTTAGGATCCCTCCAG -CGCCCAGCCCAGGTCCCTCTCGATCACCAACTGACATCTCCCTCGCTCAGTTGACCCACA -TCCCTTCCCATGAAGATGCTGGCGAGGGGTCCAGCCGTTTTGCTTTGGCCGATGCAGAAG -GCAATGATGTGGCAGCTCATCATGATCAGGAGCCAGAGGCGCCACCCCCGGAGTATTCTA -GTCCCGACGGCAGTCGTAGGGGCAGTGATGCCTCTCTTGAATCTCCGCCTAGTTTTAGGG -CTTCAGAGAATTTCAACTCTCCATCAAATGACTCTATTGATTCTGGTGGTCAATTCCTAC -CAAGTTATCAAACCGTGGTGGAGAGAGATTTGAATGAACATAACTCAGAGGCCAATGATC -AGTCCTGATGCCGCATTTATCCCTCCCCCTTTTTCGTTTGCTTTTGAACATACCTCATGG -CGCATGAGGGGCTTTGTCTAATCATTTCTTTGTTTTATTTAGCGGGTTCACGGAAGGCGT -TATAGCAATTGGCGAAGGCAGTTTCTTGGTTTTTATGAAATGCTTTCTGTCTGGATTGGA -ATAGTCTTTGTGTGTCTATTATCCCTTACCCTGGAGTCAATGAACTCACAAGGGCACGAT -CGCTCCCTTTTTGGCCCCCCTTGTTGAATATGTATATCCCAATTTTTTGAAATTAGGTAG -AATCTAATAATAGGTTCTCAATATAATATTTAAAAAAAAAAATGTATCTTTCATAAAATA -TAGACCGTAAAGGTCGTAGTGTGAACGTCATCAACTCCCAAGACATATCTCCACTGAGGA -TCTGATATCAGATCTGTCCCCGGATGCTCTCTCGCTCCACTAGATTCCCGGTCCACCTCT -CCGAACATCGAAATATCCAATCGGCACACTTTGATGTGATACGAGGGGCAACTCCCGAAC -TTTCATCTGCAAACTCCCGGCCAGCCTCCGGATTTCTGTTGTCTCTCCTCGGGAATCTCT -GCTTCGGGTCTATTCTGTGCATATGCAGAGAGCGGTCGACATCGATCTTCCAATAGTCCA -AATCCTAACCCTGTGGGCGCTATCGTGTCGCGACCTCCTTTCTTTTCTTTTTCCCTCCTC -AGTCTTTGTGAGCCGCCTCTCATATCCCACAGTTTGACCACCAATCCATTTGAGCCTGAA -ACAATCTATCGCGAAGGCCTGCCTCCTATTGGGGATCAGACGCGCTCGGGCAGCTTTACT -GGGTATTGGGCTGTGTCAAATCTAGCTTATTGTTCGTGTCTCACATGTCCATAGTGGGCG -AGATAAGTAAAACAATGGCTGAGAACGCAGCATTCCAAGTCATTGTCTTGGTAAGTTAGA -ACCCGATGATTCCTGTTCTGTTGCAGAACCCGTCACGTGCATGGCCATGTGACGAGGCTT -GATGGATATGGCGCATGTTTTATCAAACCTGATGGCGGGTTCTGGCAGATGGAACCCCTG -CAGCCCTTTTTGCTAAATTGCTGACCCCCCTCAGGGCCCGACTGGTGGTCCCCGCGAAGA -TAGCGTCACGGGCATTCTTGTCCGGTCAACGGCAACGAAATGGTCTCCAAATTCGGTGGT -AGCCGTTGATGCGGGTACCCTTCTTGCGGGCATCATTCGATTATTGGAACGCTACATCCC -CGAATGCACAGATGATCAGGGGATGATGACAAGTGGGCCCTTTAAGGGGCTTGAGCTGCC -CTGCAGAACACCACAGGCAAATGCAGCTCATGTATTTCGAGAAATCATTGGTGCTGTACT -CATCACGCACCCTCATCTAGATCATATCTCAGGGCTGGCCATTAATACCCCAGTTCTGGA -AGCGGGTAATGGACCAAAGCCTGTAGCTGCTTTACCATCTGTGTTGTCCGCTCTCAAAAA -CCATATGTTCAACGATGTCATATGGCCTAACCTGTCCGACGAGGATGGTGGGGCTGGTCT -GCTCACATATCAGCGCCTGGTGGACGGTGGAAATCCCCGCTTCGGTCGTGGAGATAGTAG -AGGCTACGTCAGAGCTTGCAATGGCCTATTAACAAAATGCTTGAGTGTTAGCCATGGCCG -ATGCAAGCAGCGATACCATCCAGAAACTGGGACGCACCATCGGGTCGGGAGTACCGTCTT -TTCTGATCATCAATTGATGATCCCTTCCAGAGCAATTTCAGTGGACCATACCGATGGCAG -GTAAGTTCCTGGGTCCGTAATCCCTACGTGAATCTTCCTAACTATCGCCAGTTTTTATTC -CCCCGCTCGCTCGCCACGTATACTTCCATCCAACCCGAAAGAGCCCATGATGGCGACGGT -TGAGAGTTCCGCATTTTTCTTACGTGACCATCATACCGGCAGCGAGATCATTGTCTTCGG -TGACGTCGAACCTGACTCTGTGTCTATGGGAACTCATAACAAACGAGTATGGGAAGCTGC -TGCGCCAAAAATTGCCACGGGCAAACTACGAGCGATCTTCATCGAATGCTCCTACAACGA -CAGCACCGACGATTCATATCTGTACGGCCACATGTGTCCACGACATCTAGTCTTTGAGCT -CAGCGTCTTGGCCAACAGAGTCCTGGAAATGCGGGATGGGAGCACAGGTGAGAAAAAGCG -CAAGCGTGAAACCATAGGTCCTGCCGAGAGCGGCAGCGAGCAAGTGAGCCCAAGAACCAA -ACGGGCTGTAAGCTCCTCTGTCGATTCAGGCAAGATATCCGAGTCGCTTATTGAGCCTCG -ATCGCACCCGGGCGAGTCATTTGATATTCCTCGGATACCCAGAGTGGACTTTGAAGAGGT -TCTAGGACATCCAGACCCTCAGACATGGATTGATACTACTCCTCTTCCTCTGGAGGGATT -GAAAGTCTACATCATTCATATCAAGGAAAATTTAACTGATGGGCCTCATCCTGGAGAACA -GATTCTCAAGGAACTCCAGCATCATGGCGAAGCAGCTCACCTGGGCTGTCAGTTCTTTCT -CCCAAATCCTCTTGAGGGAATATGGATTTGAAATGTACGAAATCACCAGGCGGCTTTTTT -GTTTTTGGGTTGATGCAGCGTCTTTTTGATCATTCAATGTATATTAGCCAGCATTCATTT -ATCAGCCAACGGTTACATTTTTAGTGCATCTTTAAGTTTACTCTAAATTCTTGTGCAATA -TATCTCGATCTAATCGATGAACAGTCGGAGATCGACATTTCTCTTACAACTCCGAATGTG -CATGACTTTGCCGCGTGCTCATGTCTGCCAGTTTGTATTTGCCCTAATGTATTTGCCCTA -ATGTATTTGCCCTAGAGGATCATTTTAAAAACGCAAGACAAATATATTCCCATTCCTCTA -TTAAAGATTAAATACTCCGTACCCACATCTCCATTGCATCTATTTTGGATTCATTCAGTG -AGTTGGTACTTGATACCGAGGCGCGGTTGAGTGCCAATAGCAGCTTCCACCAACATAAAG -CTCAATAGCCTCAAAGTACTGCCTAATTTCTTTTGAGGCGGGTTTTTTCAATCTTTCCTC -CTTGAGCTTTGCAGCGAGAGACTCTTGCTCAACGATGACCCCGAGGAGCATCACTTTGGC -CGCCTCCGCTGTTGTGTTGTGCTCCTTCATTAGAACTGCCACGCCGTTTCTCATTCGATC -CTCCTGTTGATCTTTCTCCATATTCCAACTAAAGTAGTCATTTGTCAGACCGAGAACATT -CCCCATTGCGGTGTCATGCTCCTCGATGCTAGCGTATTCTTCATCGCTGAGCATCATACC -CATCCCCCAGCGAACGAAGTAAGAGGACATCCTTGATGATAATTAGATTTTTGCCTTGAA -ACTGTCTTCAGAAAGGCTCAAAACGTACCAGTAGCCGCTGTTGGCGATACGATATGGCAT -ATAGTCCTCCATTCGTGTGAAGACCCCACCGATGCTGTCGAATGTTGCCAGATATTTTTT -TAACATCTGGATCATAGTCGCAGCTTGCTTGGGATCAATGTCGATCGCCTTGCGGAGCAC -AGCTGCCAGTGACTTTTGTCGCAAATTTCCTGACTTGAACATTGATTCCTCAATGTCTAG -TCGAAGAACTTTTGCCATTTCATCGTGCTCTCGGCATGCCTATAGACTGTTAGATGGTTG -TCCATGCATATCAACTTCCAGTGCTCTTACCGAAGCATGGTCCATCTCCTCTGTCATGTC -TGTAGATAGTTTGTCAGAGCAGCTCAGATGTGTTTGGACAGATTGGTGCCTTACCATCAT -GAATCCACAGCACCTCAGCCATGGTGGAGAACAGAACTAGTCTTTCGGGAAAGGCCGTGC -AGTGGCAAATGGTGAAGTAGCTACCGTATGGGTTGGCGTGTCGGACAATTGCTTGGCGCC -ATTCCCTTGTCCCTTCCTCGCAAGCTTGCTCGTCGATCTCTCGGAGGGCATATTCGACTT -CGCGGGCAACAATACTGATTCCGGCAGGGTAGCGCGAAAAGAAAATGGGGACTTTGTGGC -TTTTGACACTCTCGAATACCGAGGGGATGGTTGAGGCGGGCCTGGCGGAATATAGACGAG -CAGCCTGCATGTTGGAGGCTTGAAGCgatcttgaattgtttttgattttgattgattttg -attgttttgatGGGAGTGTATAGAAGAATGTCTAAGTTCGGGGGTGACATTGACCCTTTT -ATATCAGAAACTGACAGACTAGACAGCCTAGAGAGCATAGACAGTCTAGACACTCTAGAC -AACTCAGTACATTGCACGGAATACGGAGTAAATTGTGACTACCTAAACCTAGACATTCAA -AATAAAGCATTTGACGTGTATAATCGGGCATCTCAAATCGGGCGACAATCCGGATCACGA -GGATTTTGAACCCACCAAACGGAAGTCTTCACAAAGAGACTAACAGAGATGGATTTCTCT -CTTCTGCCAAGAATATTGGACAATGGCTCAATAAGAAGATAGGTAATATGCATGATATCA -CATTAGCGATCCATCCACTGCGCTAGTCGTGATAGCCGATGGATCCGTACGATCGGGACA -CTAGAAGGGGAAATGAATATCAGGGCTGAAGTTCTAAATCATATGTATCAACGTATATCG -GATGTATATATGTATTAATTATTTGATTTCATATGAGGCAGAAGTTATGCAAATAGTGCC -TGGTTATATGCACCGAGTATACACAACATATGGAGTCTCCTCTGTACAGACAACATATAT -ATCATGATCTACGTGAGTCAAAATGTCTTCAATGGTAGATAATGGCATAGATCTAGTGTT -AATTCCGATGAGTCTAGACCTGTAGATTCCCCCTATTGAACTAAAAACACAATCTGCGCA -AGAGAAAGATGAAGGAATTTGTTTGAATACGTTATGCAAGTTCTATGCAGGGAAGCAGGG -AACGGGGAATCCTGCATCGAGCACGCTTTTGCATCCGGGTAGTCGGGAGATCCAATTATT -CGGGTTGCATGTTGATCACTTGACAGATAGTCGGGTCTTATAGGAGGATACTATACACTG -GATAGAAATTATTCCACGATGCAAGATACATATCTATAACGCAGGTTTGCTTTTGTCCTG -TCCCATAAAATGTCTACGGCGGATGCACTCCGGCAGCAAGGGCTTTGCTGACGGCTTACA -ATAGAAGCATATCGCAGAAATTGATCTTTTTGGATCCTCTTCCGATTCGTTGCCTGTCAG -CCTCCCTGATAATAATAGTCATTCTTGACAGAGCGCGGTGGTCTCTAAGGAAATGGCTTC -TTTGGGTAGGTACTTAGGGGTCTTAATGGACTTGAAAGTCATTGACGAATCCCACGGAAT -TGATACATGATGTTCTTCTAGATCTTTCAATTGGTGATATGTATGCAGTCATCTGGAAAA -TCACCAGATAAATTCCCCTTGAGAAAATCATTATAGAATATATAAGCCTTGGGAACCTTG -TCAAACAAAGCCGTGAGATACAGAGAACCACACTGCCACTCAACATTGCTTTGAGAAAAG -GGTAAAAACTATCTCTGCTAACCACCGCGATCATTGCAGTGCTGGATTGGAGGTTACTGT -GAAGTGGTGCATGTAACGGAAAGGCTTCTCTCGTAACGTAGAGCTGAATACATGGGGATA -TagaaagaaaaaagaaaagaaagaccaaaagaaaAACCAAAAAATAGCCGAGAAAAAGCA -CACATACTCAAGCTATTAAACCTTCTTTTTAGTTCAACACATTCTGTTTGGCATGCGAGA -TAATATCTTCCCATACAGAGAAATCACCAGCAGGAGTTCGTAGGCCCTTACTTGCTGAGG -CAATTCCGTTTCCCTCGAGCAAAGGACTGGTCGGCAAATCTAAGATTGGCTCTCCATCTC -GGGCAAAGGCAATCAGCTCTTCCTCGCTACCCCATTCGCTCATCGCCATCACCCCAATAG -GCACACCGCCATCGGGGGTATAACCAGCAGGACAACTGATGGCAGGACAGCCAGTAAAGT -TGGCAAGCCAAACATATTCCATATTGCGCACAGAGGATTTGGTGTCCGACAAACCACGAG -ACAAGTCCGCCTGACCGCCGTCAATATGCCATCCAGGGATGGGCGTTGTGGGCGAGACGA -TGAGTAGCCCAGGGTGGGTCTTGAAGAGGTGCGCCAGATGTGTCATAAGAAGATTGCGCA -GCCGCTGGGAGGCGAGCAGGTCCTGTGCGGTGACCTGGTACATTCCCATAGAGACAAGTA -CCTTATTCGGGGCTGTGAGCTGCCGGATTTTGCTTGCGTCAACGCCAGATGCGATCTCCG -CCATGATGGTAAGGGCATGGGCACGCTGACTTTCCGGTAGGTAAGGGATTGTAATATCGA -TGATAGTGTAGCCTTGCTCGCGGTAGTAGTTGAGCGCGCGATCAAACACAGCTCGTACAG -GGCCCTCGGCGCGGTCAATCCAGTCGCGGACAATCCCAATCGTCTTGGTCCTCTCGGTGT -TAGGATCCAACGGAGCAGGATAGGGGAATTGCGATGAAATGGGGTCTTCTGAAGCTGGTG -CTGGAGTGGCCATGATACGATAAGCGAGGGCGAGGTCATCGATGCTGGCAGCCATAGGCC -CGATAACGCCAACAGTGGGCGCGAGGCTTTGGGATGGCGCAGCAGATACACGGCCATGTG -AAGGCTTTAGGCCCCAGATACCGCAGAATGATGACGGGATACGAATTGAGCCTCCCCCGT -CTGCGCCAAGCGCAATGGGTACAAGGCCCGCTCCGACGGCGTATCCAGAGCCACCGGAGG -AACCGCCACAATAATACTCCTCATTATGGGGGTTTCTAGGTGTTCCATAGTTGGGGTTGT -TATTGTTTGTGTCTGAAGACTGTTAGGTACTGCAAGTCGATGATGCTCAAATGAAACTCA -CCCAGTCCGAGCTCATGCATGGAAGTTTTTCCGATGATGATAGCTCCGGCATCCAGCCAT -TGCTTCACACACCACGAAGTCGCATCCGTGCCGTGTTTAAAATCCAGCTTCGAGCCCAGC -GTGCGCTTATACCCTGTGAGATGTACCTCATCCTTCACTGCGACTGGCACGCCATCGAGT -GGCCCCAATGACTGGCCCCTCTTGTGTCGCTCCGTAGATTCTTCCGCTGCTGCACGAATC -ACCTCGGCCTGGGACTCAAGAAACGCAGTTGAGTGTTTCCCGGGCGGCTGGGTATCACGT -CGGATCAGAGGCAATAGCGTCTCGATGACAGCTGTCGGGGTCAACTCCCCCGATGAGTAA -AGCGCGTGGTAGTCAGCGGAAGTATAATACCCCTGGTCTCCCTTTCTGCGCTCCACAATA -GCAGACGCGACTTTCGCTGCTGACTCGACCTGACTGGTATTGTCGACCGGGATGACCGTT -GGATCGTAGCGGCCGGGGTACTGGCCCAGGTGGGGAATTTCACGAATGACACCGAAGCCA -GCATTGCTCCAGAAATGTCTTTGGATGAATCCAATATTATGAATTCTGACGTGAAAAGCG -GAAAATTAGCACCCCTGAATGTTGTCGCTCAATGGAATTCCCCTATAGCTTACAATGTAG -CGCCGATAGCTAAGGGCGTGCCGCGGAAAATCGGTAGGACCTGGTCTTCATATTTATACG -GCACGTTGGGACCCTCCTGTGCTGGGGGATAATTCAAAAAAGATTGTTCATTTGAGGGCA -TGGTGTTGGTTGCAGAATGGTTGCAACACTCGGAGCTGGCTCATATGTTGCTCCGACCTC -AAAATGCCGCGGCTAAGTTGTTGAGACCGGTTGTGCCTCAGGTTCATGAGACGTTCCACA -CCTTGACCTTTCTCTCTCCTAGCTAGCTGGATGAAACTGAGAGTTTTTTTTTCTCGTCAA -TTTTACTCCAAACTCCGTGCAAAGTTTTACATCGGTCCTATATACATATATATCAAAATC -AACATGTTTGTTTATATGCATTGTTTCATGTTGGATAAATACAACATCTATAAATGTGAT -AGTATATCTACAAGCTCAACTTAATATTGTACTTGTCCATCCACAAAGCAGCGCCAGTTA -GCACCGCCGCGTACAGGATTAAAATACCCATGGCCTGCGGTGTATTGAGATCCAATGTTG -GGATGCTCAAATTCACCGCTCCAGTAAGTAGATTCGCGATCAGGAACAAGGCAAGACCTC -CGCGGTTAAAGGCATGCAGGATGCGACTGGTTGCGAAGGACGTTTGCTCTGCCTCACTTT -CCTTGCTAGTCGCTCGGTGGACCGCAGGGAAGAACACAGTCTCGGTCAGACAGAATAGGA -AAAGCTGGGCACTATTGAATGCTGCCACCCAAAGCACATATGGCATGTTAGCCAGCCGGC -GCGACACTGGAATGTTGGCACCATAGCCGAATGCGTAGGTGGAATTGAAGAAGAACATGG -TTGACCAGAACATTGTCTGCAGAATCAAGCGCACCAACACAGAGCTCCGCGCCTTCTGGG -GCGATTTCGTGGCCGAGGTACCACGGGGAATGATGCGGACTCCAACGCCCCGTCCAGCGA -GGAAGATCGCAAAATATCCAATGAAAGAAAAGACTCCCTCGCGGTTCTTGGAGAGCAGAT -CTGGCCCCCGAGGAGACACGAGGATGTAACCCTTGAGATCAGTCGACTCCAGAGCGACTT -GATAGAGCACTGCAATTCCAAGCGCAAGCACCTCATAGGAGGGGATCAGTGTGGTTAAAG -AATCGAATAGCTCCACGAAAGGAGGCAAAAATCCAAGCGTGAAGAAAAAGTTCCAGTGCA -CACCATATTCCGTCACATGTTCCGCATAGTCCAATCCCTTGACACTCCACAGTCGGATGA -GGCCAAGCACAAGCAGAGGAATGGAGTGTCGCGCCGATCCAATTAACCTCTTATGCAAAG -CTAATTTCGGGGATTTCGAGTCGCGGCCCTTCAGAACCGCGCGCGCAGAGACCACTCCAG -CGGAGAAAACAAAAGAACCTACACCCAAGTCCATTAGAGATGTTCCCCAGTTCTCCACCT -TGGCGAAACGACGTGGAAATGCGCGGAAGTCGACGGCAAGAATAGCTATGCAGGTAACAA -TCATCATGGCTGCGCGGTACGTAGTCAAGAAGGGGTGAATGGGCAAGGCCTGCTGTTCCT -CCTGGGGGCTCTCATTTTTGGCCTTGAATTTGCGGGGCGGTTTGGTTTTCTGGGGTGTGC -GCAGACTGTTTCGGGTGAGGAATAGAAGAATCGCGGGCGAAAGGAGGAATATGTTGAGGA -GAAGCGGTGCGGAGGAGTATGCAGTCGTTGCGAATAAGATGGCTAGTACATTGAGAATGA -AGTCGGTCGCGAGCGCCGCAGGACCGTAGGGTGTGAAGAAAGATAAGCGGGACTGGAGCG -CAGACCAGAGGAGGACAGATGTCTAGAGTGATAAAAGGTCAGAATGTATGAATTGCAAGG -TAAAAGTCTCGAGGTTCCTCACTGAAGCAACAAGTGTGACTGTATTGATCTCGAGGATTG -TGCTTCCCGCAAGATTAGACACAAAGGCCTCTTTACGAGCTTTGTAGCTCGGGTCCATAT -GGGGCAGTTATCAATATAGAAAGCTGATCACGCTGAAAAGATCAAATCTAAAGAATGACG -TGGTGGAATGGTGGAGGGTTGAAGCTCTAAATGTTGTCTTTACGCGTGATCACCTGACCG -TTCCACGGTCCTCAATCCCAACAACTGTCAAGATCTCAAAATGACACTGTGTCAATTACA -TCGAAAGTTAGTGGAGTAACATTTAAGTGGCTGCTCTTATACATGACTCAAAGGAATGGT -GAGTACTATAGCTAACCCTGAGGTTGGCTTTATGGCTTTACAGCTTTAGGATGTAGGCAA -CTGTTTGCAACCAACTTTGGTTTCGATTAATGAATTTTGAGTAGCGGGTCGTAAGTCTCT -AGTGTCTATGAATAAAAGCATTTTAGCACGCTGTGCTTGATATCAATATGCAAAAGAATC -ATGATCTCGGCTTTCCACGTCCACCACGACGTGCTCCACGGCCTCCGCGGCCTCCGCGGC -CTCCTCCACGCTTCTCGATACCGCCTCGGTCTCCTTGACCAGCCCCAGGGCTGAGTGCAT -CACGACTTTTGCCACCCCGGGAAGAGTTGCGAGGTCTAGAAGAACCGCTTCGGGAAAGCT -CATTCGGGTCTGCATAGTTGAAACCTGGAACGCCCTTCAAACCCATCTTGCCAATGACCT -TTTTGTCCATTGGGGGAGGCTCAGGGCATCCCATGCCCTTGACAGCCATTTCATTGGCGA -GTTGAACAAGGCCAGCCTTGTCAAGACGAAGTTGCCTCAGAAGACCCGATCCAGCGAAGA -AACCAATGAAAGACGAGTAGGCACGTTGCTTTGACTCCTCCTCAATAGTGTACATGGCCT -GTAAGACAGCTTTCGAGCATGAAGAAGCACCAGCATTGATCGCTTCGGTATCAGGGTGAG -GCTTGATTGGCAAATGGCGGTTTTGCTTCATGAAGAACGACTCTGCTTCCGTGAGGAGAA -TGATGGCACGACCATCAGCACCAGCACGGGCGGTACGACCGACACGGTGAACGTACTGCT -CGCCGTTTGATGGCAAACCGACCTGGATGACCAAGTCGACGTTGGGGAAATCCATACCAC -GACCAATAACATCTGAGGCAAACAAGATTCCAGATTCGGCCTCCTTGAACTGATTCGTGG -TTCTTGTGCGAGCACCCTGGTTGAGTCTGGAGTGAATCTCGAACACCTTCAAGTGAACCA -TGCCCTTGCAGAAGACGCCTGCGAGGAGAGCAACCATGTGTGCAGTCACGCCGAAGACGA -TGATCTTTGAGCTTTTCTTGATCTCAAGCTTGAGCAGGGAGGCAAGGGTAGTGAAGGTGT -CTGCCACAGACGGCATCAGCACATGGTACTGAGGGACTCTCTCATGGGTTGGTGATTCAT -TCTCATCAATGGTGGAAATACTTGTGTAGTCTCTCCTCAGTACAATATTGACCACATCCT -TGACTTTAGGTGGGACGGTGGCCGAGAAGCACATGCCTTGCCACCCAGTGCTCTTTTGTG -GGATGAGTTGAAGGATCCGCTTCACATCAGCAATGAAACCACTCTCAAGCATAGTGTCGG -CTTCATCCAAAACCAAAGTTTTGATGTTAGACAGCTTCTTAGCGGTGGGAGTTTCAGATA -GGTAGTCGCATAGGCGTCCCGGAGTGGCGACAAGAATGGATGGTGCCCCCTTCATGAAAC -GCGTATGAGCCGATGCTCGAGCCGTTCCACCAACTGCAACATGGCACTCAAGAGGAGTGG -GAAGTTGAGATGTTAGTTGATCACATGACTTTGCAATCTGCTGTGCAAGTTCTCTGGTTG -GAGTAATAATCAGAATGCCGACCTTGCCCTTGGGTGGCGCGGAATCACCTTGGAGCAGGC -AGTGTAATGCAGGCAACAAGAAAGCAAGGGTTTTTCCAGTTCCAGTTTTGGCTTGGACAA -GGCAGTCACTGCGCCAATTTGGAAGTTCGGTCAAAACACGCTGCTGGACAGGGGTCATAT -TGGTAAATCCCATCACATCAAGGGCTTTGAGGAGCTTAGCGTCAAGTTTCCCAGCCATGG -TTGAATACAGCTGAGTATCATTGCTGAGCTGAAGACCTTCCATGATGGTCAAGACTTTTA -TCAAGGAAACTGCAGTTAATTGTTTTGATAAGAATTCAAACTCGACGTGGCGAGTGAATG -ACTTTTAGTGTTGATCAGGCAAGACAAAGATAAGAATTGTAGTGAGTGTAATCTTCTTCG -TTCTCTCTAAGCAAAAGTCAGACTGATGCTTAATGATGCGAATCAATCCAACCTTGGAAG -TTTATATTTGACGAAGAAGAGATTTGTTGCCAAGCATTTAAGATTGTCGCAAAGTCCGGT -AAACAGTATATGATTGGCCGATATGGACCCATTATCCTCGCTTGATGTCACGTGCAATGT -GGGTTGGTTCCAGGAAAGTTGAGACTAACCGTATAAGGTAATTCGCCTCCAGCTTATTTT -TAAGTGAAAATATGCCGGAGGTCCAGGTTAGTGGTATATTTTGTCGATGTTAGATGAGTG -ACTTTGCTAGGTATTTGGTATTCTCCTTTGATGGGCATTTTTTTCATGTCCGTTTTTCCA -GATACGAAAGTGTCTAAATCACAGGCTGTTGTGATTATATATCTCCGATAAAAGGATGCA -GCGATCCACACAGTGTAATGATTTGAAAGAAATGATTTGAAAGAAGGTGTAGTCGCCGCG -TAGTAAGTACTTATATTTGAAAGAAATTCTTGCGAGCTATGTCTGATATCCTGTTGATAT -AATAGGCCCGGTCAAGTCGATCACTTCGATCGAGCGTAGCCCAACACATAGTACTTAAGT -TCTACTTCGTAGGTAGTACGAAATGGACAATCTCCGATCATGTTTTTAGCGAGAGTGTCA -AACTTCGCTTACGGGCGCCAAAGCCATAACACAACGTTTTCATACGGCATGGTGATCGTC -CCGATTCCCACCGGTTGTGGATGCGAGTCGGTTCTAATTGCGTCAAATAAATTGGCGGCA -TGTTTTTATTCCTACAGATTTGAATGGACTACATTTAGGGGCCGCGGTTCATAAAATCCA -AAGTGATTCCTCGGGTTCACGGTAAGCGACGGTTTACTTGAAACCCGGCATCAACATCCG -TAATGATCGGCCGGGCCCACGGACCGTGCCTAGAATGCGAACGTCCACATGCAGTGTGCC -GGTTAAACTCTTGGTTATGTCGTAGGGCCACCATGTTCGAGATAACCGTGCCCGATTCCC -CACTTGCAGGCGTCCACATGGCGCATGGCGATTACGGTGTACAGCGTCAAGGGCATTGAC -GTGTCGCCTGTGTACTTTTGAGAGGGCAACATTCATCTGACATCAGTAACGTCACGCTTA -ATATGAGGAGGTCACAGCATCATGGAAGCATCAGGTTGTTCTCAATGGATTCATATGTGC -CGCATAAAAAAGAGGAGAGATCAACAGGATCATTAGAGGTATTAAAATGTAGAGATCGTC -CTGCTTCCTCCAAGGCCGTCATAGCGGACATCAGAGCGGAGACCGCGCTGCTCAAATAAT -AGGCCCTTTGCAAGGATTTCCCCATTGAGGTCCCCTAGGATCATAAACATAAAGCCCTAA -TAATAGTAAAGTCAAGAAGTTCAAGTGCTACAAATAATCTTCTATGACATGGCTCTGTTC -ACCCCACATGCTTACCTGTCCAAATGATACCAGTGGATGCCGAGAGGAGCCTGATCCCCG -GTAACCATCGGCGTTTAGATGGGGGCCAATTCTAGTCCGCCTTTACTCCGTAGATGCATC -TCCACAAGAACGCGGATCATGACCGATGGGTTGTGTATGCCCAATGGCGCAGGGAATAAC -TCTGATTCTTTTCTTGTTGCCTCTTGGATGGACACTCCCCTTGTAGCCCTCAACATCTAA -AAATAGAAAAAGGACATGCATACTACGTAGTAAATGCTCATAGTTACGGCGCGCTTAGAA -CACAAGACCTTTAAGTGGCAGCACATTCATCTTGGCATTCGAAAAAAAAAGCGGACCTAT -GGGGCGGCTCACCGTAGTGGCTGGAAAGAGAACGCTATATCCATGGCCCGTGATTATCCA -CACAGTTCGTCTATTTCTCCACGCTTATTGGGCGCTTAGGGGTCGGGTTAAGGTCGCTTT -CGCCGCATCACCGGGAGATTTGCCTGGCTTGTTAATCGGTCTGGAGAGATACGAATTCCA -TGTTGGGGGGTTGACAATAGTGTGGTCCTAGCAAGGCCACTCCGTTGTTGAGGCCACAAT -CACCCCCTGAAAGCATGGCTATACCTCCCAAGACACCCTATTTTTCTCTACCCTGTATAG -TATTCCAAGCTAATCTAGCCCTTGATTCGTACGATCTGTCACGTTCTCCCTTAATCCCTG -CTCGGCTCGGAAATAGTCCACCCACACGCGATCTTTGCAAATCAGTAATCCACCACCTTC -CCCTCATGTTAGCTCCTCTTCTGCTGTTCAGTGCTAGGTCCATGGTCACCAGCGGGGGGT -TCCCCTTGAAAGGATCTGCGCAAATGAGAGAAGCTGGACTGCTAGCACACCAATAACAAT -GAGCTCCCAGGATGAACCGTCTGTGAAGAGACGGGAGTCTCGATCGGGGACAAGGAAAGT -CTCGACCCTGTCAGTGGAGCAACTAGAGCGGAAGCGTGCCAACGATCGAGAGGCTCAGCG -GTCCATTCGTCAACGCACTAAGGAGCATATAGAACAACTTGAAGCTCAGGTCGCCACGCT -CCAGTCACAGATTGCCGAGATGCGGCCCCAGAACGAGCGGTTCGATGAAGTTATGCAACA -CAATGCCTTTCTGGAGAATGAAGTGATCCGGTTGAAGCATCAAGTTGCTTCTTTGACTGG -CCGACCGGACTTTGCATCAAATAACGAACCGATGGTTCCATTTCAAAGTGGATGGTCTTT -GGAACAAGCCTCGAATAGTGCATTGCCGGGCAGCCCAACAGCAGGAGCATTATTACCGCC -ACACTTCACTGCAACATCCCACACCCAAAGACCCTCAGGTCTGTCAGCACCGCGTCGAGC -GTCTTCTCAGCATGATTGGCAGCAACCATACCTGTCCACAAGATCACCGTCGCTCGGCGC -AACCTCTAATCCAGAATTTCCTAATCGGATGGAGCCATATCCCATAGATGGTCAAATACA -TCAAGGACAACAAATCGGCCCACCCCAGCAACAGTCCGGATCTTCTTTCACCCAATTTAC -TTACAGCAACCGATCGGTTTCCATGTCGGGTGCCAGTCCGATTCCTCAGCCAACGCCCAC -CCCGATCTACCAAGCCTCCACCTCCCCATACTCGCAACCAATGCCTCAAGTGCAACAAGC -GGATCCGACATATGAGTATCCATGGGCACCTTCATCATGAGTAATGCATGTGCCAGGCAA -CCGGTACAATTTGAGGGCGCTGTATACCTGGTGGAAACACAATGGCTTGCGCGTTCGGGC -CGGAAGATGTAACCGTCGATCCACTCTCACACCATGTGAGTTTCACTTTCACGGAAGAAT -CTCAATATGAACATGCACACTGATTCATGCTATAGACCTCACAGCAATACTTGTTCTCGG -ACTCGAAGTCGTCGCGAAAAGCATGCATTGGTAATTCCCTTTACACAACCGTAATGAAGG -AGATAACAGCCGTTCCGAATTACGCAACACGTCGGAGAATCTTCGGTCCCACCTCCGGAA -TATTTACACCATGCAAGCTCACCACTCGAGGACAAGCTGCTTGGACTCCACGCCGGAACA -CACTCGGGGGATTTGGATATACAAATATGGCGGGGCAAATGGGCATGACTGGGGCCATCA -TATTCGCTGCTCATTTACTCTTTCAATATACACACTATGACATTATGCATCTCAGCGGAG -TGAATTTCAAGTGGTCACAGGAATGGCCCATGATGGCCCTTCATATTCTGTATGTCCCGA -GAGGGAACGATGAATACATACCCGAATAGAAACAAATAATACGCCGCAGTGTGCATGAAA -TCAAACTATTTATAAAGCAAGCCATCTAGGTGGGCATTCATTACCCCATCTCATTTAGTT -CGTGTATTGCTTCTGGAGAAGCTTGGCAATTGTCTGCAACTGGATGTTGCTGGTACCCTC -GTAGATAGCGCCAATCTTGCTATCACGGAACATCTTCTCAGCAATGCCCTCACGGACAAA -GCCCATACCGCCCATCCACTCAACAGCAGAGCTGGACACCCGGCCGGCGACCTGGGAGGC -GTACAGCTTAGCCATGGCGGCGTCCTGGACGAAGTCCTGGCCGGCCTCCTTCTTACGGGC -GGCGTTGTAGACGAGAGCGCGGGCAGCGGCAATCTCCGTGTAGGCCTGCGCAATCTGGTG -CTGCATGCCCTGGAAGTTTCCGATGAGGTCGCCGAATTGGCGACGGTCGTTCCAAACGTA -TCTTTGGTTGCGGTCAGCTGATCGGTCTCATGCGCGCGTGTATATCGTATAGACAAACTC -ACCGTGCAGCGTTCTCCCATGCACCAAGAGCCAGCCCGGTCATCTGAGCGGCGATACCGA -TACGGCCCTCGTTCAACACGCTGATGGCGTACTTGTATCCCTGTCCCTCCTCACCGAGGA -GGTTGCTCTTAGGGATGACGCAGTCGTCAAAGTTGAGCACACAAGTGCTGCTAGCGCGAA -TTCCAAGCTTCTTCTCCTTCTTGGCAATTGAGAAGCCGGGGGTATCCTTCTCAACAATGA -AAGCAGTGATACCCTTGTAGCCCTTGCTGGGGTCAATGTTGGCAAATACGATGAAGATAC -CAGCCTCCATTGAGTTGGTGATCCACATCTTAGAGCCATTGATCTTATACCCATCGGCGG -TCTTCTCGGCCTTACACTGGAGCGCAAAGGCATCGGAACCAGAGGCAGGCTCAGAGAGAC -AGAACGAGCCGACAGTGCCAGTGGTCAGCTTGGGCAGCCAGGTCCGCTGGGCCTGGGCAT -CACCGTACTTCATGATTGCCGTGTTGACCAGGGTGTTGTGAACATCTACCATCACACTCA -CACTGGGGTCGACGCGTGCCAGCTCCTCGATCGCGACGATCGCCGCAGTGAAGTTCATCC -CTGCGCCGCCAAACTCCTCTGGCACCTCAATGCCCATCAAGCCCTGCTCAAAGAGCTGCT -CCACGACCTTCTCATCCATGGTCTCGGCCTCGTCCATTTCCCGCACCTTCGGGCCGATTT -GCTCTAGTGCGAACTTAGAGACGGAGTCGGCCATCAGGGATTCGGTCTCGGACAAGAGGG -TGATCGGCGTCGGAGTCAACTCGGAGAGGTCAACCTCCCGTCGGCGGGGGGTGGTTGAAA -ATGCATAAGCGCTAAAAATGGGGAGGAATATGTTAGCAAACGAGTTCAGTAACATGTCCT -GGACAAGACGGTACCTCTGTACGGGCCGGCCGACGAGTCGTTTGCCCGCCAAGCGGACTG -AGGGGGTACGCGACAAAGGTCGCAATGCGCGGATAATAGAAGCCATTGTGAATAGGTGGG -AATTAAGCTAGGCGATGAAGAGGAGAGAAGGGAGAGAGGCAAGGAAAAGGGTCATCACGA -GCGAACCTCGGTTACCGGCGGCGTTGGGGGATCCGCGGGGAGTCCCCACATGTCTCCCGA -GGTCTCTTACCTAGGTCCATCAGGGACGAAGGGAACATGTATGAATATGTCTTAACATCA -TATTTTGAATCCATTCCCTTATCTATAACATTACTGAGTTAAACATTAACCTTTTTAGAC -ATCCCTCCGTTCCCGGATCAGATCCTCGGCAGAGATATTATTACCCCCTGGCGACGGACC -CTGACCCTCCAGATCCTCGAAACGAGTCTCAATATCAGTTCGGACTATAACATCCTTTTG -TCCATTGACAATATGCTCCTTGCTTCCATTTGTTCCACCACATGTGGTGAGCTCCATCCA -GCCGGGGTCATCGTCAAACGACTTTCTACCCCCATATACATTGGACTGAAAATGTGAGTG -ACTCGCCAAGTTCGACTTGTTCGTGCCATATGCTGCGCGCCCACTCGAGTTGCGGTACTC -GTAGGGAGTCGACGTTTTCTTTGAGGTGAAACCCAGTCGCTGCATCACCTTTTCCATAAG -GGGTCGGATGGGTGGTACGCAGCCAACGATGATGACGATCCAGGCTTCTGTCGAGGCCCA -GATGAGGAGGGAGGATTGAGCGTCTGTTGAAATTATGATAATTAGCAAAAATATTCTTTT -CCGAAAGGTGAAATGATGGATGGTCACTTACAGGTGATATCGGTTGTTTGGGTCAACGCC -TTAAGCTCATATGTCTTGACAGCGGAGCACACCGCTGCTCTGCATTTGAGAAAAATATTA -GCCCAGCCAAGCTGGAATGTTCGGCGACGGAATACTTACATCCAACCCAATCCCATGAGG -ATAGACAATGCGACCTTGATTCGCAGCTTCATTTGGAGCTTCCAAAAGAGATGAATAGGA -TACGATGCCAGCAAGGCGTCGCTAAATGCGTTGAAGCCTAATTTATTAGCAACGGCTCAA -CTCTTGGATATGTCGATGCAATGTGCAGACTCACTAGCCTGGAAGAACGAATAGTTCTGA -TTTAATTTGCGCCCCGGATCGCATGTTCCTTCCACGCTATCATCCCACAATTTGTGCATT -GGCTTGCATTGGCCCAGAATAGTCCCAAGAACGGCAATGTTAATTGCCCCAATAAGAGTA -CCTAGGGCAATTAAGAACCAAGGCTTCTTCTCGTGCCGACCACGAATTTGGATAAGGAAG -GCGAGAATCGCAAGCTTTCCCAGCATGCTGGCAGCTATTGCGAGTAACTGGAATACCCAT -GACCAGTGCAGCGCTGTGACTATTTGTAAGTCCGACAAGTCATCCAAGTGATGGCCCATT -CCAGCATTGACTGAAAACTGCAGGAACACTTGGGCAGTGGTCTCTGTTGCCTGTGAGAAG -CACACTCATTATTAGCATATGCCATGAGATCGAAAGATTTCATAACACCCACCACTGTCA -AACCAGCGACAGCAAAGTCCCATCTAAACCGCCCCACGAATTTCCAGTTGCTCAAGAACC -GAAGACTGAAAAGAAATATACCAATGCATGTTTCTGTCCAAGAAACTGCGAGGATGACCG -CAGAATTGTTTCCGGCCATCTCGGGCCTTCAGAGAGGGAATGCCTCCGCACGTTTTACAG -TAGGCAAAAGCAAAAGAAGGTGCAAAATGGCCAAGTCAGAACTGGATGCATGTGTTTGAC -ATTACCAGCATCTTGTATCGGCCACCCGGAGACCGGTAATCATCGCTCTACGATCCCGTC -AATGTCCCACGCGATGGACAGGGGTTAAATCATCGTATCGACGTTTGCTATTGGATTCCA -AATCAGGCCCCCAGGACACCTCGCAGTGCGGAAGGATTACGGAGTACGCTAAAAGGGTCT -GCGTCTACTTCGTATTGTCTGCTCAGAATAAATTAATTAGTCAATAACAAAGCGTCCGTA -TCGAGTGCCGAAGCTGGCACTTGAGTCATCAATTTCTAGCAATTCCTCCAAATTAATTGT -TCTAGCTAAGACGTAAGCTGCAAGAGAGAAGTATGGGTGATTGGCTTCCAACATGTAGGA -GTTTTTTTCCAGATCTTCAGGTCTGCAGTAGTGTGATGATTTGAGCATGAGTATGTGATG -GGAATGTGAATAGATTGGTCTTGTATTTTTGTCTCATATGAGTGTTGCCTAAGTGGTACC -ATAAGCGAGACAAAGCTGTATGAATTTCTCGAGTACGGAGCACAAAGGTTGCATAGAGAA -TAGCCAAAGTGCCAAGGAACAAGGTGCATATAGGATGTGGATCTTTCTAAATGAAATTAT -ATTATTCCGTACATATCAGGAGCCATTGAAACAAGAAAGCGACGATCGATTCACAAATCG -TAAATCCATCGAGATACAACGGGGACAACTCATGATCGGCCTTTTGGACGCATAGACGCA -TATTTTACTTGAACTAAAGCTTTCATGCACGAGCCCAGGTTCCCTGGTGGTTTTAATCTT -TCGCTTCAAGAAGGCACAAGTAAGGCTTAGCCTTGACGCTAGACACAAAACCGAAGGCTG -TGCCGTTGGACGGAGGAGCCTGTGTATATTTATTTTCCCCAGGGAGAAGGCCCGGGATCG -GCGGGCCCGCAGCCGGGAATGAAAAGACCCTGTTCCCATTTCATTAGAAGCCACAATATC -GAGCTGAGAACCGAGGATGAGATAAATGAAAATCTCTGGAGGCGTATATCAGGGCGTATT -CCTAAGCCGTGTGCGTAAATTATAGTTGACTCTACTTGATTTCGCCACAACTTCAATGGC -TTGTGTAGGATTTGAATTGTTTCATTCCTTTGGATCTGGAACTAAAAAGACGCGGGATCT -GAGTAAAAATAATGTACATTAAATATCTAAATAAGGAAGAAAGGCATAATCTCGCACATC -CACCTGGGCTCTAGAACGCTCCACCCATGCGAACTAGATGATACCGGCGGCCCGGAGGAA -ATCCTTCTGGTGCAACCAAATTCGAATATCTTTGCCCTTAAGATAGAAGGGAATATAGAC -CAAGTAGACCACGACATTGATAGCCGCAACTGTCATAAAGACACTCAAAACACCATTGGA -AGTGACATAGTCGTTGATAACCCACGTCCAGATCCACGCAATGCAACTCTTCAAAGCAGC -AATGGTGACCATAATCTCGACGGAAAGCTATAAGAAAAAGTGAAACAATTAGCAAATGTA -CGGCAAAGATGGGGTCGAACATGACAAGATAAAATCCCTACCTCTTTGTGACAATCCAGG -GCGTAGTTCATTCCTAGCGTAGTTCCCATTTGACTTCCTACCGACAGAATACCGGCCCCC -ACCACAGGCATTGCCCAGTGCATTTGGTGCTCATAGGAAAGTCCAAACCAAAGTGCACCA -AAGAATGTGAAAAATGCAGCCAGTAGACAGGTTGGGAGTCGCATCTCGGGCTCACGAATG -CCGTTATTGCGCTTGGTGAAGAAGTTGGCAATCCAGTCGCCTCCTGGGCCCGAGAAGAAG -ACGCCGAAAAGAGAGCCAATGAAGGGAGACAGGAAGAATAGACCCTGGCCATTGGAATTG -AATGTATATGGAGGAGGTGCAAAGAGCTGTGCCACCGATGCGCCGAGGATGACATTCCAG -CTAAGGGCCATGCCATAGACGCAGCATGACCAGAGGACAGTGGGATAAGCCAGAAGAACG -AACGGCCGCAGGAACACGTGGAGAAGATTCACATCTGGATCTCCGCTGTTCCAGAACTTG -AGTTCTTGCACAAAGGTCTTTTTGGGGATTGGGCTTGATTTCTGAACTAGCACCTGGTTT -TTATCCTCATCGACCCTGGTGGACTGGAAGGGATCAGGACGGGGTCCGTCGAACTTGGTC -TCTGGCATGAAGAAGAACATGGAGATCAGGCTGGCGCCAACAAACATGGTCACAATCCAA -AAGGCCCAGTCCATTCCGAGTGCTTGCACGATGAATCCAGAAAACATAGGTCCCAATTCA -TTGCCTCCTAGCACAGAGAGACCATATATGGCGATAAGCTCTCCGCGGTTGTGTAAGAAG -AACATATCAGTCACAGTTGATGGGACGATCGACTCAATGGGCGCCTGGAAAGCGCCTAGG -AAAGCACGCCCAACTAAAAAGACGGTGTAGGTTTTGTTGGATGCAACGGCTAGCCAGATG -CAGGCGATGGCCATGAGGAAGTTCGAGAAGAGGTAGATAGGACGGCGGCCATACTTGACG -GCAAACGGCATCCAGACACAGTTCCATACACCCTGATTGATCAAGCAAGCTCCGTTCGCA -TAAGTCAATTCCTGGAGGGGCTTATCAAAGGTAGCGGACAGTTGCATCATACCTGTACTA -GTCAGAATGATTCTCTATAGTCGAATACGATAGAGGATCAATACATACCAGCTGCCATGA -TAGGACCTGTAATAGCACCCATAAATGCAAACCAACAGCCATTGAAGAAAGTCAAGCTTT -TCTTTGTAGTGGACCAGTTTAGAGGATCATTTGGATCGTTAATCGAAGGTTGCGGGACCA -ATAGCATGCGACCATCGCCTGTTTCACGATGCTTCAGTTGGTGAAGCCGCTCGGAGATGC -TTCCATTGTCGCTACCATCGAACAGTATCTCAGTTCCAGGTGCTGCATGGAACTCGACCT -TCTTCTCGGGCTCGGGATGTGAGCTATTCGACATATTGCAGGTGCCCAAGGGGGGGCCAA -GAACTGAAGTAAAAAAATGAGAACGAGTGCCAGGAAGTGTGAACAGGGGGAAAGGGAAAC -ACTTTGAACACTACGTTGCATCGAATGAGGCGGAAAGACTCTTTTTAATACCCGGAATAT -AGTGCATAGTGACCATTTCCATCTTAGTCTGGAATAATATCGAGTAGATAGTACGGAGTA -GAGTACTCGGCATTAAAATTCAGCCAAAACGGCTTTGATACATTCCTTTTGGCGTCATGC -ACCGCATCTCCGTATTTTTTTAGGGCCAATCATAAGTCCAATATGAGAACTAGCATTGCG -CTATGCAGGGATGTATGCGGCTGTCGTAATAACGAGGCTAGTGCGTGTGGAAAAAACCAG -TTAGCTCCTCCTCATCCCGCCCGAATACGCTATACTACTATATTCCTGTGGATTATGTGG -CTTCCATGCTGAACTAGTCAGAATTTTCAAGTTTTGAATCCATTGAGGAAGAGAGGCAGC -TGTAAGCCCCTCATATATCCCCCTTTTTGTGATTGGTGATGAAGCATTGCATGTTCTGGC -CGACCTCGGGACTTGCCTTACAGCTACTCCCGCCATACATGTATCCAGCTTAAGTCGTGC -ATTTCAGAAGAAAAATAATAATCTTCAAATCCTTCTGGAAGAAAATAAACCGAGTTTCAT -TGGATGAAGATGACTCCGTCGCAGGATTTTGCATCTCACCCTAGCTCTGGCGGTTTACCA -AGTTCGGTGCCAGTGCTCATTGTGGGCGGAGGTCCGACTGGTTTATTGCAGGCTCTCCTT -CTCTCTCGACTTGGAGGTATAGATGCTTGCAGAGAAATTGACGGGAAGATTTTGTTGACA -TGAAATGTAGTTCAATCATTGATCATCGAGCGTTATCCCGAGCGTTTGGCTGCCCCGAAA -GCTCACGCGGTCAATCCCCGCTCCCTGGAAATCTTGCGTCAATTTGACTTGGGAGAGAAG -CTTGTGCGTCAGCTAGGCACTTCGCGAGATGACAGCTCCTCGGTCAATTTTCTCACCAAT -CTTTGCGAGGAAGCGATCGGCAGACTCCCGTATGAGCGCATGGATCCTGCGGTCTTGGAT -GACACGCCAGAGGTATTGTCTTTCATAGGGTGAGAAGCCTGGCAATGGCACTGACCAATT -GCAACAAATAGATGATTCACAATATACCCCAACCCGCCCTCGAGCAAGAATTGTCCAATT -TTATTGCAAAGGATCCTAACATCACTTTGGTTAAGGGATTCAGCATTCATGCCGTGGAAC -AAGTATGCGGCTCATTTTTCTCATTGATGAGACAACACGGCTAACCGAGGATCAGACTGA -AGATGAAGTCGTAGCGACGATTGAAGAACGGTCAACTGGACAACTACATCAAACAAAGTC -CCGTCACCTCATAGCTTGTGATGGTCGTAGAAGCAAAGTTCGTGAGCTCCTCGGTATAAA -GTCTGAGAGCGAAGACTCCGATCAGACAATGATGACCATTCATTTCAATGCCAATCTACG -ACCCGTCGTTGGAGACCGTGTCGGTATGCTGTACTGGATAATGGACCCTATAGCTGCGGG -CTTTATCATCGGCTATGATCTCGATGGTATCCAGGTACACATCAGCCAGGTAGACGTTGA -AGAGCATCCGGTGGAGTCGTGGACGGAAGATATGTGCAGGGCGAAGATTCGTGGTGCTAT -CGGTAAAGATGATGTTCCGTTCGATATCCTGAGCTATCGTCCTTGGGTTTTCCGCCGCCA -GGTTGCTTTCACGTTCCAGGAGGGGAATATTTTCCTGTGAGTTTTTGTATTGGGGTAATA -ACCCTATGGATCCCTCGCTGACGTTTCCTTTCTTTTAGGGCTGGAGATGCTGCCCACTCA -TTCCCCCCAACTGGTGGACTAGGCTTGAATTGTGGTCTTGCGGATGTTCACAACTTGGCC -TATAAAATTGCCTTGGTTCACCGGGGGGTGGCAACGCCATCTATACTTTCAACATACACT -GCCGAACGACGAGGTGTTGCTGATTCATATTCCAAGCAGAGTGTTAAGAATGGCAAGGAG -ATATTTGCATTGTTGCAGAGCCTCAAGACCGCCGGTGCTGAAGATATTGCACAAGCGCGA -AAGAATATGATGGCAGCATTGGCGGATCCAGTTCAACGCAAACGCGTTGAGGCAGGCATT -GAAGGACAAAGGGAGCATTTCGATAATGTAAGTGGTCTCCTCGTGAATTGCCGCTTTCTA -CCTAACACACCAACTTCCATAGCTGGAGCTACATATTGGATATGTGTATGGGGCTACCAA -GCCGCCTCCTCATGCGTCCTATTATTCTCCCAAATTTGTGCCCGGGGCGCGCTTGCCTCA -TGCTTGGATCAGCTTTCCAGATCGGCTCTCCCAGGAGACAGAAGCAGTCAAACGCTCTGC -ACTGCCTAGAGAGCCAGTCGATGTGTCATATATCAAAGAGTTCGATGCAGATCAGGTTCG -GGCTCGTCAGTGGAGTACCCTGGACCTCTGTGGCCCTGACACTTGGACCTTGATTTTGAG -GCAGGAGCAGCAAACCCCGCAGGTTACCCTTTTGCAGAAGCACTGTAAGATGATTGGAGT -ACCCCTTAATACCTGGTGTCTTGGAGTGGACTTTGAAATCGTCAGACAGGGTTGGTTTGC -TGATGAATTGGCCCACGGTGGAGGTGTTTTGGTTCGTCCTGATCAGCACATCTTAACGAA -GGTCACTGCTGAAGCATACGGAGAAGACATGATTGCGGAAGTGAACAAACATCTGGGGAT -TTAAGAGACATCGACTTTGTAGGAATGTGATAGAGGCACAGCATCAATAGATATCGATTT -TTTATTTATTTTGATAATAATAGGTCCACGGGAATTTTCAGAATATTAATTATTTGAGCC -TCTCACCGATCAAAGGGGAaaaaaaagaaagaaagaaaaaagcaagCATGAGCGATAAGT -TCAATAACCCCTTCTCGTTGGGAGGCTCGGGTGTGATACGACGCCTTCCGCTACAATCGC -TTCAACTCCCCTTGAATTCGCACTCCATGGAAACATCGGTTTTCTCCACGCGGTACTTTG -TACACATCTATTGCTCTGCGCATCATAACATGTACAACTAGATTTTCCATGGCGTCTTTT -CTATAGTTGACATTGGCATCGGATATATCAAAACCCAAGCAGTGCCTCGTCGACTTGATC -AGTCTCAAAGTCAAACCGAATCAGATCCTCTTTATAAGGAATAAAGTATGTGCCGGTGAC -CCTAATTAACATGAATTGTCATTAGTCACTAGTTAGTAGGGGATTGGGGTTTCAAGGATA -TCTGCTTACCTTGCGTGCTCACTGCTAGCTTGATACTCAGCCAATGCTGCTTGATCCTCG -TGGTAGCTCATTAGAGCGTAGTGAAATCCTTTGCTCTTCTCAATTGAACTTGTGATCGAG -GGGCCTCCAACAACAAGCTTTTGGCTTTTCACACAAGGCAAAGATTTGAGTGTCTTAATT -TGGGAAAGGAACTCCTGCTTGATTTCGTCTCCGATGTCCGCGCGGAATTTGAACAGAACT -AGTATTGAGGTCAATTGAGCACATTGTTATCGGGAATGAGCCATACCAATGTGAACAAGA -ACCATGTCGAATATTAATTGTAAAAAGTAAAATAAAATGGAGTTCATAAACTGGCTCAAA -TCAGAATGTAGTTCTCATTTAAATAAGGCGACGGATGCCATCCGTTGCTTGGCGGAAGTT -TCCCGCCTTCACCCAAACCCCGTCATTTTTAGTCTTCTTCTCTTGACATATTGAGCAGGT -TCATTCTGCTGAAAGACACTGTCCGCTCTGGGCTCCTTATATTAATAAACCAGCAAGGTG -AACGCTGACCAGGGACACTGTCAGCCCTGGAATGGACTCTCCACGGCGGTCTCGGAACCG -CGCCGCCTGTCGGAGATGTCAGCGACGGAAAATCCGCTGTGATGGAGATCTCCCCCGCTG -CGCATCGTGCAGAAAGGCCAATGTAGCCTGTGTAAACGATGGCAAACAAGAGGTTAATCG -AACGTATGTGTATCTGAGTCACATGGAACTTCTTTCCCCCAATCCTTTCCCATTTCTGAT -ACTAACAGCTAATTAGGTACATTGCAAATCTACAAAAGCGAGTGCAGTGGTTAGAGTCCT -TAGTCAAGGAACAAGATCCAACAGTCCAACTAGAGGATGGCCCGCAAGTAAATCTATCGG -ATAGCTCACAACTCGAACCTACAGGAGAAAGTACCCAAATCGAGTCTACTATCCCACAGG -GCGACCGAATTCGCCGGTCCATAACACCTCACCATGAAAAATCTGGGCCGCTGAACGCAC -AACCGTCTCGAGGATATCCAAGACCGGCGCATGAAATTGGCTTCGTCTCTCTTTCTTCGG -GAGAGCCGAGATACATTGGTCCTTCAAGCGGATATTTCCTAGCAAACCTCATATTTTCAA -GCGCTGGCCGACGAACAGGGCCATCGGGAAACCATGGCAATGTGAACAAGCCAATTTCGC -TTTCGTCCGAATTTTTCAACAGCCCAGCACCGCTCCCAAGTCGCAAAGAAGATGCCGTGG -AGCTTACCTCAAAGTACTTTGCTTCAGTCCACCTCATATATCCGTTTCTACACCATCCGT -CTCATATGACAAGACTCAGGAGAATTTATAGCGATGAAGCATCCCCTCATCCAGCCGATG -CCTTTCACGTCTTCATGGTGCTCGCAATAGCAGCCTCGGATCTCTCCAGACGGCTGAGAA -TTCCGCTTCCAGCCGAGGGCTATTATACAGCAGCTACCCAATATTTCGAACGTGCTTGTG -CCGACGGGTCTTTGGAAGGACTCCAGAGTTTGCTCTTGCTAATGGTATATGGAATTCATA -ATCCCTCGTGCGATATCAATGTCTGGAGCCTGAACTACCAATGCTTGGGCTCACTGATTG -ACTTGGGTCTTCAGCGTGATGTTCGAGCATCATCGACATTCCCCATCTCGTTCTTGGAGC -AGGAAATGCGCACTCGCATCTTCTGGGTTGTATACAGTTTTGATCGTACATTGGGCACGA -TGATGGGTCGGCCGATCGGGGTGAGAGATGAGGCGTGCGAGCTTCGGGTAAGTGCTTCTA -TGCGGCATGCTTCTCTAATAGAGCTTGGCTAATAGCACCTTCAGCTCCCTTCAGATGTCT -CTGATGACGATCTTCCTGAGTCAATTTGTGGCGAAAGCCCGAGAAATCGTACAACCTCTC -ACATGACATTCTCTATTCATCTCTTCAAGCTCGCCCGCATCAACTCTGAGATTAAGTATG -TCATGCACAGCATCTGCAGAGATCCTCCCCATTATGCCTACCCACCGGTACCCAACATCC -TTGTGTGGCAGAAAGAGATGATCGAACGCCTTCAGACATGGCTTGCAGACATCCCCGGTG -GAGGCGAAGGCGATGCTATCATGAAGATATGCGAAACCAAGTACCACGAAATGATGATCT -TGGTGCTTAGACCTAGTCCTGGGATTCCCGAACCATCGGAAGAGATGCTTGCGCGATGCT -TTCAGCATGCTGTAGACCTCCTCCGCGGGTTTGGAGAGCTTTACAGGCAGGAAGCTCTCT -TATACAGCCGACTAATCGTTCACTCTGTCTTCTTGAGCACGCTAATAATGCTGCATTGCT -TGTGGAGACTGCCCCATATCGCATCCGAGGTTCAGATTGATGAGGTGGTTGCTGACACGA -GTATCAGTTTGAATATACTAAGCAGTATTGGGGAGTACTGGGTCGAAGCTAAGCGCGCAA -GGGATTGCATCCACGAATTATCTGGTGCAACTGTCCAGAAACTTATCAGAATTCGAAGCT -TAGAAGCTCCTTCAAGCTCGACCAGACCAGGCAAAAATCAATCAACTTCCGGCACGGCAA -GGGCACAGAGGCTGCATACTGCCAACGAGGGAATGGAGCTGCCTTCTGCACTTCATGCTG -ACTGTGTCGTTGAAAACTTGCCGCTGAACTCCTCTATGGATGATTACGATTCAAACGTTG -ATTCTATGAGTTGGCTGCATGATTCCATGCCCGGGGGCTTTATGGACTTCAGCGGAGCCC -CGGACTTTGACACTCTCATGTGGGAGGTTTTTAATGCTAACTGAGGAAAGAAGATTGATA -TATAATAATTGGGATGCATCACATATTCAAGAGAGCTTCATCAATCAATTAGATTCAATG -CCCATGGTCAGCTAGCTATCGAGGACATAATCAAGTCTAACATCTCATATCTCATTCAGC -TCGAAAAGTGACACTGACATTACTTTCGACGTCAATATCCTGAGGCGTGAGGTCCAGCGG -CAGAGGGTCCTCCCCAGACTGGCTACCATAACTAGCAGCCATCATGTGTCTTTGCGCTCT -GTAGGGACTTGCCCCATTGTCGGCGATTTCCGCCACTGCAACATCACGTCCGATCACTTC -GGAGTAGTCTTCAGCTTGCTGAATCGCATCGCGCAGAGCCAATTTGCGCGCATCCGAAGC -CAATCGTCTCCCAGTTTCGTCAGTGAGAGTCCAATTCAGACCGTCGATCTCGACCTTGGG -ATAGGCCAGCAATTCTTCGATCGCATGATTCAACTTGGCGAAATCACGGAAGACGGCTTT -GAAGGATATACTGGCATGGTGTGGGTTTGACACGGGCTGGTCATGTCTATCTGTTGACTT -GGTCCAGGTTCTGATACTCGTAGATGAGAAATTTACAAGAGGGGGCTCGGAACCATTGGC -CAAAGGGTCAAAGGTTGATTTGATCCAATGTTGTAGGTTGATAGATGATGTTGCCACTTC -TTTGGCGACTTTTTCTTGCTCGTGGCCGGTTCCCTTCACCGAGAAGCTCAGAGTGCCACG -TTCTGGGGCATGGGTTATGGATGATTTACCCGTGACAATGATTTGCAGGGGAGCCATGAT -TAAGAGGGATGAAGGTGAGAAAATATTTTGTAGTCTACCGCATATGGAGTATTTATAGGC -CAAGTCACCTGAAGGCTACCTAATGCCAAGGCCACACAGCCCCATTGTTATTCATCTATC -ATCTTCATCTTGGAATATCTCAACCCCCCAGACATACTCTCTTCTTGATGGCATTTCAAC -TACCATTCGGCAAAAATTTTATTTATTGACAGCCTGCCAGATCTCGTTCATGTCCTCCCA -GCCTCTGTCCGAGTGGATCATAAACTCATCAGCCCCACGGCAGAGATCCAAATACTCTTG -AAAAAGGATTTGATGGTCAATAAACTGAACCACATCAACCAGTACCTGTGGCTGGCTGGC -CCACCGATGCCGCCCAAACCCCTGAATCATCAGATTGCAATCTCGCCCGAAATCGTCGTG -GACGAAAGAACCATGCATATATGGTCTGGGAGCACTCTCAACGTATTCATCTGAAGCCAC -TTCATGGTATCTCCTTGATCCTCAGTTTTGGGAGTCGCACATTATTTGTAAGGGGCGCTG -TTGTGACTCCACGCCAGGCGACCTAGTGATCCAAGAGCAAGCGTGTTCGCGAGACCTGTG -CAAATGCGCTCTCGGTTTGCTATTTTCATACATGGCTCTTGTGCAGTTTGAAAGTGATTT -CTTAATAGCACGGAATTACCATCTGTTGCCAGATACCATCACCTGGGAGATGTGGCTTAA -GCTCAATCAGCATTTATTCAGGAACGATGCGACCAGCTCCGCGAACATCAACTCGCGATA -TTTGTTTGTTGTTTAGAGAGCTGAGACTGTCGCGACTGAACAAAACCTATGCAATTCGCT -ATGCTAACGTCTGAAGCGGCCATCAGTTTACTTACCAAACATATAGTGAGCCGTTTTACA -ACTATCTGACCCCGCTGACTGCCGCGGCCATCTACGTTGCCCTGGTGTTAACGGTGATGC -AGGTCGGACTGGCGACCGATCGTTAGGCCTTCAATACACCATCCCTCGATGCTTCGTATG -GATTCACAGTATTTGCGATCCTCGGACCTCTGCTTGAAATTCTGTTGGTCGAGTTATTGG -TGGCGCTGCCCTCACTAATAACGTGATAGTTCCATGGAAGTTCAAGCGGCAAAAATTTGC -CATGTCGCAGAAGCGGAACCAATGACCTTGATTCTCACACAACAATACAGCCACAGAACA -GTACAGTCACAACACTCTTCTACAGGGAGATCAATGATCTAGAAACCGTGGGCCTACTTA -GGATGAGTATCGCATATTATGATAATTTACACAAATAGAGAAATATAAGTCGAATATCAT -TTGTGATTAAGGCTGTCATATGTGATAAAAATCAAGAACAAAAATACCGCTCTCTTTACA -TACATAATAGTACATAGCTTTTGTCTCTTGAAAGACACGACCTGATATATGGCATCGGAA -CACAATCACTCTTTCTCGCGAGGCCGGTAGATTACTCGATCAGCAAGCAAGCGAGGCGTG -TTGATGTCATGGCTAGGCAGGACAACAACATCAGTTTCCTTGGCAAATTGCTTGATAGTC -TTCAGAGTTTGGTAAGCACGCATAGGGTCGTCGTTGATCCCATCGGGCTGTTCTTTATCC -AAGAGGTCCAGGCCATATGTTGAATCCCCAGGGAGGAAGTATGTAACTTGATCCTGCCCA -TCTTCTCCCTCTCCATAAATCACCAGACTGATATGTCCGGGGACATGTCCCGATGTATCG -ACGATGACAATTTTGCCATCTTCCGTCACAGGATATGACTGTTTCCAGGGTCCAAGTGGT -CTGTCCTGCAGGTCCACTATCTTTGGTGAAAACTCCTTTGGCCAGTGATTTGGTGTTGCT -CCTTCCATGCTTGCAAAGCGAGGATGCTCTCCGAAAGCATTCCAGTGCTCACGGCTGACA -TAGATTGGTAGATCTGGTGCTTCTGCAACCAGGTCCTCTAATCCACCCGCGTGGTCATTG -TGAAGGTGACTCAGAATGACGGCTTTCAGGTCTGTTGCCTTGATTCCGAGCTTGTGAAGT -TGTTCAAGGACTCCGTCTTGCCGTTCAATGGTGAATTCACTGAGGAGGCCATTGAATAGA -GCGAGTCGTGGGAAATATCCGGCGTCGTTGCAGCATGGAGATTGCCCTGTGTCCATGAGG -AAAAGACCTTCAGGGTGACGAATTAGGAATACCCCAACTGGCAATGGCTCGGTCCATGTC -CTGTCTAAAAGTGATCGGACACGTCGAAAAATCGCGAATTTGCTCACTGGTTGGGAGCGC -ATTGATGGTCTAATGCGCACCGTTCCCGTTGTGAGAAATGTCAACTGGACACCTCCACCG -GTGTTGGAATTGGACGACATAGTATGTTTGTACGTGGTAATCAGTGTGTAAGAATAACTG -TTTGAGGAAAAATTATGATCAAAATTCGACGAAAAGTACTAAAATAAATGAACCTATACT -AGATAGCCGAAATTCGGCAAACTGATGTTTAACCGCCAACTTTGCCGAGATTCCAGATAT -CGAGTTCATCCGTCGGGGGAATTGATTGACCGTGTTTTTATTCGACCATGCGGTCACTTG -TCGCGGCATCTTGTATGGCTTAGCTCTATGAAAAGATTCATTGCAGAGAGCAAACGTGTT -ACTTGCTCCTTACCCATGGGTTTCTTTGTATGATCGCATCGCTCCCATAGATATACTATC -AACCACAGTCGGAGGCGGAAAAGAGATCCCGCAAAGATGAGATCGACATATTACGCCCCA -TTATGAGAGATACGCGAAGTGAACACGGTGGCGCTACAATGCTCCTGGCTAAAATCTAGC -CGCTGGAAAAAGACGACAAAGCTTCAACAGATCAATATCTTTTAGAAACAGACGTTATTG -GGCTATTTCGTGATGATATCCTACAGGATATGAGTGAATGTCTAGAGTGACTGTTACCAT -TTTCGTCCACTGCATATCCTCACATGGATCTTAGCAAGCAAAGTGGATAGGTACAACAAG -ACTTCATGTCTCAGGACGTTGTCACGATGAAATATGAGTGGGCGAGGTAGCTGGTCTCAG -AAGCTCATGGCTTTGTGGCCTTGTTTTGAATGTGCACTAATACGTATGAGATTATCGAAG -AATCAATGTTCAAATCATTATACAGATCAGAAACAGATGTAACGACCAGCTGGTTTCATA -CAGCACCATTGAGCTTCATGGTTTGTGACCTAGTTTCTCCAACTACTGAGCAATCTCTTT -CCCGGTAGTCTCTAAAAGTCTTCGTAGGCTTTCTCTAAAGAGGACTTCCGTTTGTTTTTG -GGTCCACACAACCTTCTTCTCTCTTTGCTCCCTAGCATCGTATTTTCTTGAAATTTCAGA -AGCTCATGGGACCGTCAAATAAAAAGAAAGACCAGTCAATCGGCCAATCATTGCTATCTG -TAGGCAAAGTAAGAAGCGCGTCCGGCAGTTGATTTTCGCTAGCCTCCGTTGTTCTTAGAA -GATTGAGCATAGGGCAAAAGCTCGAGGGAAATGGGAAGAGCTGAGCCAACTGCATTTGCC -CAGGAACCGGAAGAGCACCCCACGTATTATCAGCTTGAGCGGGAAAGGCATCTGTGCGAA -TATTATGCAGACCCTTGATTTTGTCGAGTACAAAAGCAGCCGAGGTATGCTTCTTACTCA -TGGTTTCCAGCACCTGTTTGCTGATTCTGAGCTCCTCGTCTCGATTTGGGTTCAATGTCT -CGCTGTCGCTAAAAGCTCGTGGGATTCCGGCGATAAGGTTAGCCCAAGAATGGATTGGTA -GCAAGTATCGAACATCCTCGTGATAGAGAATCTCCTCATATAGCCGAGAAATACAGGACG -AAGCAGTCATTGAGGCGTATTTGTATGCCGACGGGTTGTGCTGGCGGCCAAGGAAGCACG -TCAAGATAACAGTCACCAAGTAAAATATGTGGAGTTCAACAACAGGGCGGCTGTAAGGTT -GTCTAGATCCTGTGTTGAACAGTTGCATTTCGGGAGGCCACACTTGTCTCTGCAGTCCCA -GTGTCTCTAGAAGTAGATGCACTTCCTCTGAGGAGGTTTTATCAACTCGAGCCAGGTCCA -GGACCTGCCTAAGGCCTGTACAGAGATATGCCAACTGACAAAAAACACTTGATCCAAGAT -CAGGATCTTTGAAGTCACTGGGAACCGGCAGTGGTACAGACGTTTTCAAGGGGAACATTC -CAGGGCGCCCACAGCAAGCCGTCTGCATAGTATCGTTGATAAAGAGATACCACCACATTC -TGCGTGCTCTCTCTGGATGCTCCAATCGGCTATATGCCTCCTTTTCATGCAAATGTAACT -GGATCGCAAGTCGGATCGCCATACCAGTCCATTGCCACGGGTTATCTAATACTACCGCGT -CCGGGGAATATGGAAGCCAAGTGCTGAGAATGCAAAGTGCGCTGAGCATATTCAGTGGAG -CTGGATCGTGGTGCAGAAAGAGTAATGTTTTAACCCGGCCATAAATGGCAAATGTGGTCC -ACTCGGCTGGCCCTGCTGTTTGACGCATGGTACTTCCCGCGAAATACAGGCACTGCTGCA -ACAACGTAGAGGTTTGTATCTCAGATTGACCACGATCTAAAACTGGGACGAAGTAGAATA -GTTCACGGAAATAGAAATCCGTAAGTGCTTGGGCGACGACGGGAGGCGGAAGGCGACAAG -CTTCTAGAATGAGCTGACTTCCATCAATGGAATTCGGCTGTTGTGAACCAATGGAATTTC -CAGGCCCCCAACAGGTTATATTCCGATGACCAAGAGGACTCGAGACAGAGGGTGTGGCTT -CCTCTGCTCTACTTTCCTGTGTCGCGGCTGGGACAGCCCCACTTGGCGACAGGAGACTTG -TATCTGAATATTGAAGTGAAGAGCGAAGGGCTTTTCTTTGCTTCCGGGGTCCAATATGAG -GTCTGAGGTGTCAGCGTATAGGTTTTTAGGAACTTACAGCATGAGGTAGTCTTACTGACA -CATGTGGCCTCCATGCTCGCAACGGCTGCAGGCGCCACTTGGTCGACCTTCTAGGTCACA -ACGGATCTTCATCGCTATTAGTCTTCATATCAGACAGTCATACTATCAATCATACCTTAC -GACTGTGACATCTGGTGCACACAGCACGCGCCCTACGTTGCATGGTTAACGATTTTGATA -TTCCAACGGCAAAGGTTTCAAGAGGTGTTTGCCGAATACACGGCAATTGCCGCCCACAGC -CGAATATGATGTCATACGCCACCACCGCCACACAACACATCTGGGCAGAACGTTGAACAT -TGTTCCATTCAAGGTCAGAATCTCAAAGAGGTATCATCAATGTACACGAATCTGCAACTC -GATCATTCACATTAGCTACGGCTCGTATTATCTTTTATTTCCAGCACAATGAAAAGTACA -AAGTCCCAATTGTATCAAGACTCCTCCCTCGACAAGAATTAGAGCAACATCTTTGTCAAC -CTTGACTCCTGTCATGGCACTGTAGAGGCCAAATACCCGATGCACTACCGGTGCAAGATT -CCAATAATCCTTGCGTGTGCCGAAGTCAAACATGAGGCTTTATTGCCAACTCGTTCATTT -TCAATGAGAAATGCTAAATCACTGAATCTGAAAGTCTCGTGGCCGGCGACAACGGGGTCG -ACCAATACTTTGGCTATTCCGGCCAGGTACGTTGTTGTGTCAAGCAATTTGACTCGCATG -ATATGATCGGACAGTGGAAGAGACCTATCAACAAGTCGGTAATTGCCAGGGCAGCATAGT -ATAATTCGCAGAAAGCTATAACCACGAGGATCCGACTATTCAACCCTCAAAATGTCGCTC -GATCTTCGGCTTTCGGCACACTGCACTGAATCGATATAGACCGCCAATCTGATAACAGTA -AATGATCTATATCTAGGCAGAATATTTTTTTTTGGCGTAATGAATATACATATCAGACGA -TAGACCAACTCAGAATACTCCGAAATGGAACTACACATGCTCCGTATACTCCGTACTTGC -TCCATATGTGCGGCCCCACCCCGTCTTTCGCAACCCATCCGTCTATAAAGGAACCTCAAC -AGAACAGTTATCACAAACCACAACCCCAAATTTGCTAGTAAATTATCCAAAAAAAAAACA -ACCACACCCAACTCCAATATGGCAGACCAACCCAACCAAGCCCAAGAAATGCACAAGATC -AACTCCACAGATCTCCAAAACTTCGTCAAGGAAATCCTCATCGCAAACAACACATCACCG -GCACATGCCGAAATAGTCTCCAAATGTCTCGTATCTGCCGACCTGCGTGGCGTAGATACA -CACGGCAGTAACCGGATTCCCTCCTACATGGAACGGATCCGACAAAACGCCCTTGACCCA -ACCGCAGTCCCAACAACCCATCAAGTGACGCCAGCCGTCGCACAAGTCGACGCGCACAAT -GGGTTCGGTTTTGTAGCTGCACACGAGGGAATGAGTCTCGCTATCTCGATGGCTCAGACC -TACGGTATCGGGATGGTCTCCGTCAAGCACTCGAATCACTTCGGCATGTCCGCGTGGTTG -GTACAGCAGGCGCTGGAGGCTGATATGATGTCCCTGGTTTTTACGAATAGCTCACCTGCG -CTGCCGGTCTGGGGTGGCAAGGAGAAGCTAATGGGTGTGTCGCCGATTGCGTGTGGAGCG -CCGGCTGGAGAGGGAAGGCCGTTTATCTTGGATATGGCGCCTTCCATTGCGGCCCGCGGA -AAGATCTACAAGGCGCTTCGGCGCGGGGAGAAGATTCCGACGGATTGGGCGCTTGATAAA -GATGGTGCGCGGACGGATGATCCGGCTGAGGCCTTGCAAGGTGTGATGCTGCCTATGGGT -GGACCGAAGGGCTCTGCGCTGTCTGTTATGATGGATGTCTTCTCCGGTGTGCTTTCTGGT -TCTGCTTTTGCCGGGCATGTCACCAACCCATATGATCCGTCAAAGCCAGCTAATGTGGGG -CATTTTTTGGTGGCTATTAAGCCGGATTTGTTTATGACTATCGAGGAGTTCAAGGGGAGG -ATGGATTATCTGTATCAGCGGGTTATTGATTCAGAGAAGATGGCCGGGGTGGATCGAATC -TACTTCCCTGGAGAACTCGAGATGATAACTGAGGAGAAGCGGTTGGCAGACGGCATTCCT -TTTGCCGCAGCTGAAATCGAGAGCTTGAATCGAGAGGCTGACTTGGTAGAGGTCAAGCAT -TTGAAAATACAGACTACCCTCGAATAAGAGCAACTTCATGTTTCACAAAAATAGACGATC -ACTTATGCGGCCTTGGGTGTCTTTTTTGGTAGTCTTCTCGCTATTCTTATGTCTTTTTTT -TACACCAAGGAAGCGAATTTTCCATGTATGACCTGGGATATCGCGATGTCAGTCGCATAA -CAATACATGTCGAGTCCGAACTTCAACTTGGATATAGCCCTTCGCCTGGCAATGAGATAT -TATTACTACTTCAAGCCAAGTATCATAAATGTACTGACCACCTTACTGGTCTCAGAAaat -agaccgagaacaagcaacagaacaggaacgagaccagaacaACTACCCAGAGAGATCCCA -CCTGAAAGTAAAAGGCCACGCATTAAGATACATAGACGCAGGCGAAGAAGAAAATAATGC -ACAGAAGAATGTAAATAGTGTCACAATTGAATTATCGTTGACAAAGCTTCCCATAGAGGA -ATAAACAAAAGCTAAGAGGGGACAATGAAAAGCAAATAAAGGTTCGAGAGAATAATGAAA -TGCTCTGAATAACACGAATAAGGTCTCACTTTACAGCCAGAGATAATAGTAACAGGAGAA -GGGAATGCAAATCACAGATGGGGGTAAAAAACGGGAAAACAGAGAGAAAAGCAAAACATT -AGGAAGCAAGGAAAATGTAGTCAAGACAAAGGAAATAAGAGAAATGCTAGATAATTTATC -AAACACCCGACGGATGAAGAGGAATCCATGCACAAGAAGGGGAAAAAAAGCAGGGTATCA -CGCGGTATCCAAACAACCTCGCTGCTCTTTCAAGCAACTCAAAAGAAGCATCAGAATGAT -AGGTTGAATCTGCTTCTAAAATCAAAGCTCAAACTGCATGAGTGCTTGATCAATCCAATC -TCATTGCTCTGTTCTGAGATGGAATATTGCTTTTATGTATCAAATTGCGAATTAACGGTC -GTCGCCGGCGCGGGATTTGGCACGTCTTCGGAAAAAGTCCATGACTTTACGTGCTGCCCC -AGAGCTACCGGAATGGTGAGACCCATCGCGCTCATATTCGTAAGCGTCGGCGAATTTGCG -GTGATTTTTCTGCAGAACTCCCGGCCCACGTCCGGATCGACCAACCTGCATCGACTGCCT -AGGCACACTGTCATATGGACCATTGTAATCATCGTAGTAGTTTGGCTTAGACGAGTGGTT -GTGCTGGCCAGGGTGGTCTGCAGAGTTTTGGTAGACTTGCTCAGCTGAGGGAACTCCAAA -TGAACCACTCTGATGATCAAATTGAGACCCCAGATCTGCAAACTGTCGATCAATCTGGGC -CTCGTAGCTGAGCGGCTCATTTTGGGTGTAAACATCCTCGCCAGGGCCCTCGTCGACTAT -TCCCATGGCATCTTGGGTACCATAGCTTGTCGCCCGAGAACGACCCTGCGCCGGACCTGC -AGGGCCCGTAGCAGGACGTTGCGGGGCATGGGGATCGACGGGGCGAGGGATCTGCGAGGT -TTCCTCCGGAGTCTGGGCCCGGGTATTAGAGAAACCCCGGAATGAGAAGGATTGTGGGAG -TAATGAGAATCTACGCGGACGGGTTTCCGCGCCAGACTCGCTTGCCTTGCGATTATGGGG -GTACTGCATAGAGCGGCGGGAGTCCACAGACATTCTAGAGTTTTCTCTCGGTAGCGGCTC -CTTCATGCTGACAGGTGGGTAACGCTTGTCCTGCTTTGTACGAGTGGCGTTAGACTGCGA -ACGGCCACCCAATATTGAACCAGAACGACCGAAAAGTTTCTCCCCTAGGCTGGAAACAGT -GTTGGATCGCTTATGGCCCTTTGGCGGCTCTTGTCGAGGAGTCGTGTTGAAGGTTGAAGG -CAATTGCTGGTTATGGGTGCTTGGGCGACCAATGGAAGTGCTCGATACCTGCGCAGGATC -CTGTGCAAGGGCAAATTGCCTGCTCTTAGGCTGAGCTAGGCGGCCTTCTGTGTTGGTTGC -GGTAACAGTAGGAGCCACCGGCTGTCCATAGGAGCCGCGAGAAGGCAAACGACCCGTATT -TCCAAAGGAGTTCATGGATCCACCAGTAGCTGGCCGAGGCCCTTGAGACATCACAATTGC -CTGCGACCCAGTAAGTGCTGCGTTGTCCGAGGTCGAGCGTGGTAAGTGACTGGGAAGTGG -CTTGTTGAGGTTGAGGACTGGTGCATCTGGAGGCTGGTGCGTAGGCTCTGTCGGCTGCTG -TACAGACGCCGCGATTTGAGCCTCAGATGGAGCCGGGCCTCGGCTGCGTGATGCAGATGA -TTCCACCAGGGGTTCTCCTCGGGTCGTTTGTGATTGAGGAGCAACATATTCGACTTGTAC -CGTCCTCCGCTTGGCATCGCGAGGAGTCCTGGACTTTTCGGGCGGTGGGTCTTGAGAAAC -ATCTCCGGCATGATGATTCAAACCACCAACAGCAGGAGCACTGTTCTGGTGTGCCTTCGG -TGGTTCACGCACTGAGGCACTGCGGACAAGAGGCGGTGCTTCCTTTTGGGTTTCTGTAGA -TATGTGAGTAACTTGAAGCCTGAACACATGCAGAAAGTAAGTTCTTACCAGAGGGGATGG -TTGTGTTCGCAATATCCACAACATTTGTCGTGCTGCTTGTGATGTGAGACACAATGTTCG -AGAACTCGCTGAGCCAGCTGTGACGGGCAACCTCGAACAGGTCTGCACGTTTCCTGGGAT -CGGGAACTAGTATCCGTCTCAATAGATCGCGTGCATGTGGTGTGACATATTCCGGGAATG -TAAGAGGAGTCGTGACAATATATTTGTAAAGAAGGTTGATGTTATCGCCATCCGGGTTCG -CTGGGTCGTCGTCGAAGGGTAGGTATCCCGCCAACATGGCATACTAAGTTTCACCAACAA -GTCAGTTCAACGTCTGATTGTCATGGCTGGAGGGAAAACATACCAAAATTACACCACAGC -TCCACACATCCACCTTCCGGCCTGTGTACAAAGAATCGCTCACGACAAGCTCAGGGGCTG -CATAGCAAGGACTACCACAGCTGGTCTGCATAAGATCGCCTCGCCTTAGTCCATTTGGAC -CAATTCTGTCCAGTCGCTTCCGTTTCACATATTCTTTGTTGGTCAAGTTGTACTCGGTTT -CCTCGTCTAGGGGATCCACTGGGTCGAAAGTGTTGGCGAATCCGAAGTCAGTAATGATGA -TGTTGCGATTCCGATCGAGTAGCAAGTTCTCCAATTTGAGATCACGGTGCACAATACCCT -TCTTGTGAAGATAGCCAACTCCGGAAACCAGCTGAGCAAACAGTCGGCGTGCAGAATTGT -CCTTGAGATACCGATTATTTAAGATATAATCAAATAGTTCACCTCCTGAGGCATATTCCA -TGATAATGCCAATGTGACGGTCGGTTTCCACCATTTCGTGAAGGCGCACAATGTTAGGGT -GCGCCAGCTCCCGTAGGATGGCGATTTCACGGTAAATTTTGGGGAGTCGGCTGGGGTTGG -TCCCCAATGTGTCTCTTCGGATGAGCTTAATTGCCACCTGGATACTACCGTCACGCTTCC -ACCCCAGCTTGACCTTTCCAAACTCACCCTCCCCGAGGGTTTGCCCGAGGATGTACGAGC -CAAACGTAGTTTCCTTGCGTTTGATCACGTTTCCATTATTGTAATCGTGCCGACTTCGAG -GCCCACGAGCCTCCTCCGCACCGTCGGCGCTAGCTGCACCGAAACCAGATGAAGACGCAT -TTTCTGTACCGGCCTGTCGTATTTGTTCTCTCTCTTGGTCCAAAGCATGGTCGCTGGTCG -GCACAGAATTGACCTCGGCGCTTTGTTCTTTGACAAAGTCTGCCTGGCCAGAAGCACTAT -CTAGTTGCGACCCAGTGGGATTTGTGACTGGGGCTGTCCTTGATTGTTGTGCCTCCCGAG -GTGTCTCCTCGGACTGGTGCCTCCGTGGCCTACCATTTCGGTCCTCGCGGGTGCGATCGC -GGCTTTCTTTTCGATCAGACCTGCTCTTGCCTTGTCGGGAGCCACTTGTTTTTTCGGTAG -AGGTTCCATTTGGACTGCGGCGACTGTGGTGGGTAGATGAGGTCCTCGGGGGAGGAATTG -GCGGTGTACCTAACATGTCGGGAAAGGCGGTGCGCGCATTCTTCTGGGCGGGTGATGATG -TGCTGGTCGATATCGCTGGGGCATGAGCGGGAAGTGGCACGGCCTGCTGACGAGAAGTGC -TCATAGCCGAAAAAAGCTGTTCGGGACCGGCCTCAGAAACGGCTAATTAAAATTAGCAAG -CTGTTCAACAATGTAATGATGCAATGGTTTAGCGGAGGGAACGGCTACGCGCTGGGGGAC -AATTTTTATCGCGGCCGATAGCATACCTCTAGGAGATGCGCAAAGCCTTTCAAGACTTGC -TGTGACTGTCCGAGGGAGCTTTTGCAGACACTGACAAGGTTCGGGTTATCAATCGCGATT -GGTACAATTAGCGCGAGGGAGCACAGTGTCCTCCCGAGGGGGTTGTTTGGCAAAAAGTCG -GGGGCGACCGGTCGCACTCGCCAGATGACGATTCAAGGCGCAATCTATAGGCATTGAGAT -ATCGGTTAGCTTCAATTTTTTTCCCCAGAATCTTATCGATAATCGCCAAAAATGTGCCTA -TTTACTCAGGAATGGGGGGTTGGGCAACTGACAGCAGGAGATCGATCGGAGGAATGGGGG -GGGGAACTAGCTTACGACCAATCGCGCAAAAAACCTTTGGATGGAGGTTCAATAACGCCG -AGGACACCAGGTCGCAACCTTCTTAAAAGGTAAAGACGCGTCGAAAAACCACTTGTTTCT -CGAAAAACGAAAGCAAAGCTCAAGATACCTGCGCTGCAGTACTTCAGAGTCCCCCTCTGT -CCAGACCCACGATTTGGGGTTTAGGGTCGCACGTTGTCCAAGAGATCACGCAAACGCAGT -AGCAAGTGTCTCAGCCGCGATTGCGAACCCGATCAACAGAACTCTGAATCAATTGCGACA -ACAGTTGAACATTGTCGAAGATAGGAGGGGCGGACAGGGGGCGCGAGGGGAGGAAAAAGA -CGGGAAGCACCGGGATGGGAAAAAGGGGGAAGAAGATATTCCCCCAAAGAACAGAATGAA -AAAGACCAAAAGAAGAAAAAGAAAAGCCCAAAGAGTTAAAGGATGAGAAGGTGAAGAAGG -AAAAGAGGAGGTGAGATACGAGTGTTAGTCTGGGGGAGGCGGGTCTTGGGGCCTTTAGGA -TATGACCGCTTGGATACATGGAAATTCTATTTAATTACATATCAAGAGCTGTAGGAGTAC -GAAGTACGGAGTTCTTCCTTCAGTATATACTCTGTTAATGTGATCTTATTTATCTTTCAA -TCTTCATCTTATATCACTGGTATGATCCATGGCGACAAAGTCACCGTTGTGTGCGTATAC -TGTGCTCTGTATTAGGAGTGGATTCGATATTGACCATATATCGATCCCTTGCAGTATTAA -AAAAAAGGGCTCGAGTCCCCTCAAGAATAAAGGGAGATGGATTTGACAGAAATAATAAAT -AGGCGCCAAAGAAAGTGTCAGAGGGTTTCAAGAAAAGAAAAACTAAAATAGGGGGTTGTT -TCGAATGCTCAAGTGGACACCTTAGGATGATCATCATTAACCACCATAACAGCTCAGATC -TGGTTTTTTTTTTCTGGAATTATATTTGAAATTTGTATTGGATTTCGAAATTCAATAAGA -GTGACAACTCCCTATCTCAATACTCCGTACCAACCGTTTTGAATGAGAGTATTATACCTA -GTACTCTGTACGGAGCTTGAAATTCAATGTATATCTACTTAGGGGCTAGAACCAAAACGA -TTTACCCGCACGAACCCTGGAATCACAGCTCCAACCATGGTAACATCTAGCTACCCATGG -TTGAATTTCGATCTAAGCAAATCTTGTTTACTTTCAGTTTTAAATCTGCAATATATAGTA -CGCtgtatgtattatatgtataggtatgtattacatattttatgtataggtatgtattac -atattttatatgtatgagtaatgCCCTTCCCGAATAATCCGCCGAACCTTACTCGCTACA -TTTGCCCATATCGACAATACACCTACTCCGTATAATATATGTACAATATTGTAGAGCTTT -TCTACCTCACCAGAACAGTCTTTGAAAATGGCATTGCACCGTTGGACAAAGTGGGCCATG -GACAACTGTCGAGGCGATGCGTTGGCAGTTTTCGCCTCAACAATGTATACTATGACCCAA -TAGATCGTCCAGCGGCAACCGCATATGAGCAGCCCTGTCCAATGACTTCTCTTCGCCGTT -GCTTACCCACCCTCACCTCCTTAAATTTTGTCTGCTGATTGACCCCAACAAAGACGGTCT -ACTGTACAGAGCACTCCTAGCCAGAACCAAAAACCTGTCCAAGGGTTCCTCAAAAGTCCC -AAAGGATGTGAGATTTCTGCAAAGCATGTGATCGGGAACTCTATCATGGATATACAAGCT -CCACTTTCTACATATCGAAAAATAAAAATAAAGATAAGGTGAAATAGACAAAGCCTGGAG -AGCTTGGGAAACGGGAAATCATGCCGCCGTACGAAGGTCATCAAAAATGAAATAACTTAA -ACATCTCCCCTATACGAAATCTCGTTTCCCCATAGCACTGTAATGACCGTAGGCGCCAGC -CCCCAGATTTTTACCGCTTGCCCCCTGACTTGGTCGTCCCGTCGAGGGACATTGTTCCTG -ACGCACCATATGCAGGAGGCGGTGTGGGGTTAGGCGACGCACGACGCACAACAGGGTCTC -TGGCGCTCAAGCCAGAAGGGCCCGAAGCAGCCTGCGCGGCTTCTGCTGAAACTTGGCCGC -CGTCTCCACCACCGACGACAACCTGTTGCCCGTCGCGCATTCTTCCAACAACCATACCCT -TGGCAACGGATTTCAGAAGCGCACGGCGACTGACACCTGTGGCATCGATGTAAACCAATC -CAACGTTCTTGATGCCGCCGGTCAGGTTGGTTGCAACGGTGCCGGCCTCGTTACCGTAGC -GGTGGTCGATCATGGAGCTGGCGGCGTTGGCTCCAGAGAAGAGCATGTTCCGGGCGCTCT -GCTCGATGCCATCGGCCAGGGTTGAGAATGCGATCATTGATTTGTTCAATACGCCAGGTT -TGTATTCTGGTGGTGGGTTGCGTTTGTCAAAGCCTTTCTTGCCGGATTCCTGTTTGCGTG -CCAATGACGCACCTAGGTTCTGTGCAACCTTTCCGACCTGTCCGACTGTGCGGGCAGAAA -GATCTGCTGCTCCATGAGAGAGAGTACCGACCTTGCGGATGCGCGCGTGAGTAGCATCTG -AGAACTTCACAGGCTTGACATTCGGCTGCGTCCTTTTCTGGAAATTCTCCGCGCCACTCG -CAATAGCACTTGCTAGATATGTTGATCCAGTCACGATAAGACGAGATGCTGTAGCTGAAC -TCTGAACAAGAGTCGAGTTCTTATACGCTGGGTGTCGGGCCATCGCCAGGTATTCCTCGG -AGACATTGCTGACATTGACTTGGTTGCCTCCGCCTTCTGCAGGAAGTTCAATTCCCACCG -GTCCTGTTTGCGGTATATTAGTCGGTTGCTGGTTTGAAAAGGCTCGTAATTCAACAAACT -TACTCTTAGAACCAGGCTTGACGTCGGCATCCTCAACTACATTGTAGCCATCTCCCAGCT -CTCCAACGACACTGCCATTTTCTTCATCCACGAGTACAATTCTTCCATGTGTATCTGCCG -TCTTTGTATCTGAGGTTCCAACATAACCCTCGCCGGGTGCGTACTCGGCCGGGTTATAAG -CAGCATGATCCTGCGCCTTCGGGTGCGGAGTGCACTCGAGGAATGCAGTGCACTGAGCAA -GGATAGATTCAAAGGTATCGATATCATCCTGGGATACGTTTCTGGGGCCTGTGCCAATGC -TAGGGAACTGGATACGAATGAAAGCACCAGCCTCGGGACCCATGTCCCAGCGGGGAATCA -GATAGCTGTCAGGAGGTTGGTGGAAAATCTGTGTGGTCGCCGGTATCGCAATATCGAGCT -CAGGTGGAAGATATAAATGAAGATAGAAATCCTCTTCCGGCGAATCTTCAGGCCAATCTG -TCCCAGGACCAGCAGTGGCGGTATTGGTGGGGACCATTAACAATGACAGGGTTTGAGGAC -CAGACGGCGTGAGTTCTTGCTCCTCGCCATCTTGGATATGGAAAGCACGAATATTATTGA -CGCTATATAGCAGTCGGGGATCGTGGGCTGCGCTCGCCATGCTGGTTGCTGTCACAATTG -GGGAACTGCCCGCAAATTAAGGTGCTGATAACACGGTTATGACGTCGTCCACAGGTGGAA -CGATAGCTGATACTAAACTGGTTGAACGAGGCTCGGAAACACCTCAAAGGACAAGAGATA -ATGAAATCTTCAAGATTAGGAGGAGATAAATCTATTTTCAGACTTCGTAAAGGAGGGGCT -TTTTATGGAACTATGCCCGAGCGCTTAGATAACTGAGGGATGACCCGTCCACTCGACAGA -AGGAGGCTGTGTTACGCCACACTATGTTAGCACAGCCACACTATACCCTAGCCACACCTG -GTGGGGTACAAGGTTGTAATCGCATACCTATGATATGATTGACCAATGTTGTCCCGGGCA -TCCATCTATAGACGAAATTCAAAAGCAAAACTTGCCTCGTTCGTTGTGCCTGTCCAAATG -TTTACGTTCCAAATATTATCAGATTTACACCTGGAAAATCCTTCTGCGTATGATCTGTTC -AAAATTTCTCCCAAAGCGCCATATCTTGCTCTTCTGGGTGACATTGGAGTTGCCAAGGAC -GCAGGCTTCATTACATTCATTGAGACACAGTTGCGCCAATTTCAAATCGTTTTCTTTCTA -TTAGGAAACCATGAGCCCTACCACTCAACTTGGGAGGAGACCAAGCAGGCCCTCCATCGA -TTCTCGGCGTCTGTCGATCGTCGGCGTTCAACTACAGTACAAGATGGACATCCGGAAATC -ATCGGTTCTTTCGTCTTCTTGGATCAAACCCGCTACGATCTCTCATCAGACGTGACAGTT -CTGGGTTGCACACTGTTCTCGCGGGTGAGCGAGGCTCATAAGGAACACGTCAGCTATGGT -CTCAATGACTTTTACCACATTGAGGACTGGACAGTTGACGATCACACGGCTGCTCACGAA -GCAGACCTGGAGTGGTTAAATCGACAGGTCTCTCACATCGCTGCCTCTGAGCCTCATCGT -AAGATCGTGGTTTTTACGCATCATAGCCCAGTCACTCAGGACTGGCGGGCAGTTGATCCA -AGGCATGTAAATAGCTCGCTGTCGTCGGGCTTTGCCAGCGATCTCTCGAGTCAGGACATT -TGGAAGAGTCCGCAAGTACAATTGTGGGCATTCGGGCACACCCACTTCAACGCCAGTTAC -ATTGAGGAGGGTACCGAGAAGATGATCATCAATAATCAGCGCGGTTATTACTTTTCTCAG -GCACAGGGATTTGATGCAGAGCTAGTAGTTATTGTCTGATACAATGCATTCACAAGTTCT -GATGCTGTAGCCGTAGTGTGCGAGCATGCACATATTTTGACATAGATGTCACAGAGAACA -CATGGCTCATTTCACCCTTATGGCCCTGAAATAGCCGGTGGTCTTTGCCTCCTGGATTCT -AGAAGCCGTGGGTAAATCCGCCTTCGCGGACGAATTGATCATATTAAAGTAGAAGACCCG -CAGTTGATCCCACTGCCACTAAGTTAGCCACCTGCAGGTGGACTACCATCTTCGGCAGCT -TGTCAAAAGCTGCATTGTAGTAAGTGCGAATCTGTCATTCAAGCTGCCACCAGACATCGA -CTAGATAAATCAGGTCCGGTCAGATTCACCCTCACAAATGTGTGCTTTACTAATGATAAG -TCTATATACCTAGCTAGATCGCCTGAATTCCATGGTTGGCGGTCCGTGGGACATCCATGA -GTACGGCGCAGAAGTTGCCACCCTAATCTAGTATCTTTTGTCGTGTCTGATTTATGTATA -CTACCAATTCAACCTGCAGCTGGGTACTTCTTCCACAACCACTCCAACAATATGTGAAAA -GCAGAAAGGATAACTTTTACTTTTGTGCATCAGGTCTTGAGACTTCTCTGTTTGTCGGGG -CCTATTTTCAGCTAATCCTAACCCTTCAAACCATCGCAATAATTCTCAGCTCAACTTCGT -GTCTCAATATCATCTATCTCAAAGTCTATAAATAGAATTATACAATTGGGCTCGTAACTC -AGTGGTAGAGTGCTCGCTTTGCATGCGAGAAGTCCGGTGTTCGATCCACCGCCAGTCCAT -ATATTTTTGGCCGAAGTTGAGTCTTGACTCCCTTCAGGACCACTAATATGGCCTCTTGGA -TATTACCTAATTATGTTTATAAATTTAAAGATACAGCAAAATCGGTTCCTAGGACTTAAA -TATAGCGAATACCTGCGTTTTCCGATCGGATGCCGCTGCTTCGTCTCCACCATACGTAGC -AATAACGAATTTAACCCCTTACTACGATGCTCCTTTACCCTCAATCTCTCTTCATCTCCC -TAGGCCTCACCTACTACAATCTGTACTTGGATATAGTTCCGCCGGCTTTGCATTGTCTAC -AACATAGGTACAGTCATGTTTCATGAGTATTTGCACCCCAGTACTTCCTAATGCTTTCCA -TGTACTCTCTTCTACTGTGGCCTGCACGCTGGAGCTTCTACCATACATATATAAGCGTGT -ATGTGAGCCTTCTATACTGATTACTTAACGCGTTCAATAAAGTAACTAGTTAACAAGCAG -AAGGTGGCCTAAAACTTTTCGTAGTACTGGGGTGCAAATACTCATGAAACGCCGGTGTAC -CCACGGTCAATCGTCGTAAAATACAGTATTCCATGACTCCTTAAGAATGCCATTTTGGGC -TTAGGGCTTTAACATTAGCCATTCTAGATCACTGCAGGTGGTGTGACTAACTACAAGTCA -CTATGACTCCATGTTAGGAGTAGGCAAGATCTGGCGATGTAGCCTAGTTTGCATTACGCC -CCTGAATCGTCCCTTCCCGCCTTCGGAATTCAAGGCTTCGCTTCGCTGGCATATACCACC -TTGACTACTTCCCACTTCAGGTCTATCCGATTTTTTTTTTGGTTGAATCGAATACATCGG -AGACTTATCTCACGCAAGGCGAGGCGTTACAGTACTTTTTGAAACCAGGTTTCCCTTCAC -ACTCTTCGAAGTTGACTCCGGGAAAATGATCATAAGACGCTAATCTCCTTTAGATCACCG -CCGCGTTTGCTCGACAGAACGCTAAGGGTAATCACCGTGACCATGATCCTGCTGTCAGAG -TAACGGGGCTTTAACAACGATCATGTACAGTATCTCAATTATCTCGTAACCTAACGAACA -GTAGCTAAGGTTGAGGATCAAATCTTAATCTAATCAATGTGTAGCTAAGATATATTTTTA -GCTCATACAAATAGACAACTGAGAGCCAGCCCTGCATCATAGATTCTCTCACTCGTGGTG -TGAAAATACCAAAGCTCTGACCTCGACACTAATCCTTGGAGACGGAATTCCCGAGAAACC -CAAGGGGGTAAATGACGTGTGAGGAGCATCTGCAGTAAATTCTCATTAGACCAGTGTACT -CTGTACTCAGTATGGCTTCTCCAGTCCGTAACTCGGAAGGGCTTAAAATAGGGGTATAAG -TTGATAACACGAATGGCCGACCACTTACATTTGACACTGCCCTCCTTCGTATCAAAGCTT -TTAAACAGAAGCACGTCTTTCACGTCCTGTCCACTCATATAATACCATTGATATGTATCA -TTCGGCTTGTACTGTGGATAGAGAAGAGTGCCAGTATCATCACCAAGTATCATATCTGTT -TCGATCAGGTTACCCAAGTCCAAGGCTCGACCATCACACACAGCAATAGGGTGATCTTCA -ATCGGTCCACTTATTGGTCGCCAAAGACTATTCGTATCAATTAGGGCCAAGTATTCAATC -GGCTGTCTGCTGCCCCAGGCATTTGGAGGAAAGAGAGAGGGGGGGATAGACTGTTCTGGA -TCCTGGGACTCACTTGATTATTTGGACTCGTCCAGAAAGAAGGTGTTGAGATTCCTTGGG -AAGATGATTTCGAATACGCTCTATCATGGGAGTTCCAGCTTCGTCCCGAGTGAGAAGCGT -GTCTTGAACATTGTCAGCAATTATCGACTTTGATTCGATAGACTCGGTCATCTTGACGAA -AAAATTCTGTAACTCACCAATATGAACCTGCCGGGCAAAACCCTGCAAATTTGGATTTCT -CCGTCTTCGCTCCTTTGCGCTTTTTCGCTTCCTGATCTATCAAAATTGAAATATAGATCA -GTGGCCGTAGGCAAAAATAGATTCGTTTTACCTACTTTCCAATCGAAAATCAGAACCTCA -TCGGCTGCCTCAATCTCCTTTTTCAAAATAGCCTCGCATTCCGGCAAAAAGATACTCTTA -ATCTTTTCTTGGTCCGAATAATCATACGGCTTCGTAAGAGCCGGACCGTGGCATCTTTTA -TAAACGAAACCATTAGTGTCAAGATCAAACGGCGGCTCCTCCACCAAAGCCCTGATATCC -CTTATAGTCTCCGTTTGTTCTGCTGCACAGAATACTAGATTGGTCGTCTTTTCACGTTGT -TTTCCCGGATTTCTTCGCCTAGAAAATTGCCCAACTTGATAGGGGCGAGTATCAGCGTAG -CTGGGTTGCCATTGGAGATATGTCAATCTTGCTTCAATATCTGGCATACTGCACGCCTCC -TGGAATATGAAATCAAGTGAAGAGGAGGAAGTAGAATATACTTTGATGCAGCAAGTACGC -AGGCTAGCAGCTTCTTATATTTCAACATCTTTGCTGGCTAGCTAAGGACCTCATGTCATT -GCAAATCAGGTCAGACGGACTAAGCCGGAGGTGGTAGCTACCTACTATAGGGAGGTGAGT -CATTACGGAGAGAGGCCAAAAATACCACACGGGGTTAGGCGTTGACCGTATTGTAAAACG -TAAAAATCCAATCAATAATCGTGCCAAGCGCCCAACACACCGGTTTTGGCTCGCTAGCAT -CCTGCATCTAGGCTTTGGCCGCTATGTTCCTTCGCAATAGGCCCTCATATTCTAGTTGCA -ATCCGAGAGGGTAATTAGTCTTCGGAACGTAAGCCACAAGTCACCATGAACTTTCTGATT -GGGGAGGTCTATCTGATACAAATGCACATATTCATATCATCCTCTGATACGGCTCAATTT -GAATTTGCATGAGGAGTTGTGCTTTCGTGATGAGATGTCTAGCTATCTTGGAAGGATATG -TACATGTTGTAATGCCTGAGGCAATAGATGGGGCTGCGGGATTTGGGTTTGCGCTTAGCT -GTTATAGGAAAAGGGCCCGCAAACATACCGAGCCAATTGTAGCGAATTCAAACATTGCAA -ACATATTGCCTGGATGCGAAATTTTGACCGATCGTTCCTCACTGACAGCAATTGCGAAGC -TTAGTGAACAGCTCGCTGTGCCCACCGAAATATGATGGAACCCGAAACTCCAAAGGTCGA -GGTCTCGCAGGCCAGCGTTGCTGTGCCGAACATGGAGATCCCGAAGCGCGATGCCACAAA -TGATTCTGGCGCCGCGGGCGTGGACGAGCCTCCATCCAAGAAGGCACGATTGGATGAACC -CTCTCAACCGGCAAAGGTAGATATGCGCGATAGGGGAGTGGCACCGATCAAAGCAGAGTA -TGCCCGACTCTTCCCCTAGGGATCGCAAATGGCTAAATTTTGCTACTCAGGTACCGCATC -GAAATCACACCAAAGCCCCGCCCTGCAGATGAGACTAGTCCCGATGATGCCGCCGAGGCC -GCTCCCCGTGATGAGCGTGATGGTGGTAAGAAGGACAAAGGAAAAAAAAAGAAGAAGGGC -CAAAACACCAATCGCAGTTATGGTACATCCAAAGATGCGAAGGGCCTTTGCAGCACGAGA -ATGTTTGCCAACGAGTTCTCCGCCGAAGAGTGCCAATATGGCGAGAAATGTCGATTTGAG -CATGATTTGCGTGTTTATCTGAAAGACCATAAGCGCGAGGATCTTACAACCCTGAAGATG -TGCCCCGCCTACGAAACCCTAGGCAAATGTTATTCCGGCTGGAAGTGTCGCATGGTTGGC -TCGCACTCAACCGAGAGAGAAAACGAAGACGGCAAGAAGGAACTGATTTTGCTGGAGGAC -ACAGAACGTATGCAGAAGGTTCAGCATCGTGTGGCCAACGCAACGCCTGACGGAATTGTC -AACATCATCTCGAACGAAGATAAAATCGCGCTTATGAGAAGGCGGGAGGATACCCCTCGG -GCCGATTCCTATACCAAATGGGCCACAGAAGTGTCTGCAGAATTGGAGAAGGCGATCCAC -CAGCGATCCACCATCCGTGAGAAGGGTGAGCTTGTTGAACCGGACGAACAAGCGAAGATC -GACGTACAGGAGAACCGTGCTCAGTTCCTTGAGCCTCCCTTCATGCCATCAGAGAAGCGT -CGCATTTATTTCGGACCGGAGACACCCGTTTTGGCACCTCTCACGACCCAAGGCAACATG -CCATTCCGAAGACTCTGTGGAGATCTGGGCGCGCAGTTCACATACTCAGAGATGGCTATG -AGCATGCCTTTGATCCAAGGCTCGAGGTCCGAATGGACCCTATTGAAGGCGCACGACTCC -GAAATGGCACCACCAACCGTTATCCCGGGCGATAACATTGTTCAGGGCTACGATAACGCC -AAAGACTCCAGGTTCGGCGCCCAGATTGCTGCTAATAAGCCTTGGCAAGCTCTTAAGGCC -ACTGAGGTGTTGTCAAAGTTCACACCCAACTTGCGCGTCATTGACTTGAACTGCGGCTGC -CCTATTGAACTTGTATTCCGTGATGGAGCTGGCTCAGCTCTTCTTGACCATCACTCCAAA -TTGGAGAAGATGATTCGCGGCATGAATACTGTCTCGCAGGAGATTCCGATTACCGTCAAA -ATCCGCATGGGAACCAAGGACAACCAGCCTACCGCCCAGAAGCTGGTCGAGCGTATGGTC -CTCGGTGGGTATGAGTCTAGTGTTTTAGACCTCGGCCCCCCAGGTGCCGCGGCTATTACT -CTCCATGGACGTAGTAGACAGCAGCGCTACACCCGACAGGCAGACTGGGGATACATTTCA -GACTGTGCAGCTCTCATCAAGCGCCTGAACAAGAAGCAGGATGCTCTTAGCGACACTATC -CGTGAGCCGGATGCGCGCCACATGCCCAATGGTGGCAAGACTTATTTCATCGGAAACGGC -GACTGCTACTCGCATACTGACTACGACGACCACGTCAAAAATGCTGGCGTGGACTCTGTG -ATGGTGGCTCGTGGAGCTTTGATCAAGCCCTGGATCTTTGAGGAAATTCAGACCGGCCAA -TATTTGGACAAGTCTGCGACTGAGCGTCTGAGCTATATTGAGAAGTTCGTCAAGTACGGT -CTCATGGCCTGGGGCTCCGACGAGCATGGCGTTGGAACGACCCGTCGCTTCTTGCTAGAA -TGGCTGAGCTTTACCTATCGCTATGTTCCAATCGGACTGCTGGAATACCTTCCTCCGAAC -ATTCAAGACAGACCCCCTGCTTGGCGCGGCCGGAATGAGTTGGAAACGCTAATGGGCAGT -GGCAACTACAAAGACTGGATCAAGATCACGTAAGTCTGCTTTCACTGGCCTTTTTCTAAG -CTAATTGACTAACCATCCATCGATTCCATAGCGAAATGTTCCTGGGTCCTGCACCAGACA -CCTTCAAGTTCGAGCCCAAGCACAAGTCGAACTCCTACGAGGCTGAGGGTTAAAAAAAAG -ACCTATATATAAATCTCACATGTCCTAAAATTGTACATAGAATCATTACGAATGCATTGT -CCAGACATTTTAGCAAGGCGCTGAATTTTTTTTGAGAAATTCTTTAGGCGTCAAAAGGAG -TTCTCAATGAACTTTGTCGCATCAACATTATATGTGGCCACCGTCCATTGTCCACCGCGA -TAATCAGTGAGCTCACACTGGTATGACTTGCATAAAACCATGAAAGCCTAATCGACCGTC -AAGAGTTGCTCGTAGAAACCACCAAATCCGCGTGCGCGATCATGAAGATGAATTTCCAAA -ATCCAATGACGCTTGAACCCATCACGACCCGGCAGGCCCATTGAGTCATTATTGCCCTTA -GTGATATACAGTGAAACTCTATCCCGCGGAATACGTTCATGAGGGAATGACCCAACGAGA -TGGCCCGCAATATCTGCACCGAAATCGAAGCCTTCCTGTTTCGCAACCTGACAGGAGATG -TTGTATAGCTCGTCGCCCGACATATCTGGGTTCTCGCGATATTTTGCCTTGACTGTATTC -CAGATCGGCTCAAGGGCGTCCCGCAGCTTCGACTTATTTGGATCATTTCCTAGGACGAAA -GTGCGACCGAAGTCGGCTTCCCAAGCTTCAAAGACTGGCCCCAGGTCAACAACTAGGATA -TCATCAGGCTGGATGACACGATCCGGGGGGTTATCGTTGAACGGGCTCAAAGTGTTTGGT -CCACTGCGAATGACTCGTTTGTGCCAGTGGGTTTTGATACCATAGCGATCGAGTCCTAGT -TGGTAGATTTCTGCATTTAAAGTCTTTTCGGTGATTCCTGGTCGTATCAAATCGCGTTCG -ATCTCCTCGAAGAGAGCGACAGCTTTGTCTTGCGCACTTAGAAGCTGGGATGCTCTTTCA -ATTTCTTCGGTACTCAGATTATCGTGCGCCATAGCTTTGAGGGGAAATATAAAAACAGTC -AATGTAGAGTGTGAGGATCAAGAGTTGTAGATGTTGTATAGCTATATATATACAATCAGC -TGCTCCAATAGCCGAAACTTCCCCACTCGAGAGGTATCCTTTCCCGGTGACTCACCGAAC -ACGATGTACTTGTACGGAGTCCGGAGTATATAATTGGTGTGTCAATTAGTTCAACAGGGT -CGGGAACTATGGTAAAATGATAGTGGAATGATGACCTTGAATCCTGAGATTTAATATAAG -TTGCTCCGTAGATGTAGATCTTGTGATAACCACATATACTAGATTTTAGTGTTACATAGC -AATTTGGTGAATCATATTGGGGGAATTAAAGCATTCCAATTAAAGTAACGCGCCATATAT -GTTGTAAACATCCAAACGTCCCAGGAACGACAAGTCAATCGATCCTGTCCTAATTAGGGT -TGATGACAGCATCACGTGCAGATCCCCCGAAGCCATGACGTCTTCCTCCGAACTTGAAAA -ACTTCCATAAGACCACCTCGTCTCTTGATCTCCCCTTCAATTGCCTCAACTATTTGATCG -GATCTCCTCTTGCTATAATCCTTCTGTATCCGTGCTTGTTCACTACAGAATTTCTGTTTC -TCGCATTTGTTTACATTGCCCCCTAAGCGATCTTTGTGAAAGCTCTCGACACCCCGCCCC -TTTTGATTAGAATTTCGATCGAAGTGTCGCTGGATTGATAATACACGGCAAAATGACTCT -CTATTACTCCCTTGTAAGTTCGACAAATTCCTTCCTCGTTGCATGTGGTGGTGCTGCCCC -AGAATCGTTGCAGCCTGCAGCCTGAAACTGTCCACCCAACCGAAGCTGCCAACGTTATGT -CTCCAGTTCCTGCCCCTATATACTAACGCAACTTTCTGTTATAGGTGTTCCTCCTTTTGG -TGTTTGAGATGGCTGTATTCTTGGCCCTCGTCATACCCCTCCCTCACACCATCAAACGCA -AGCTGTTTGCGTTCATCTCGGAGAGCCCGATCATCGCGAAGCTGCAATATGGATTGAAGG -TAGATTGCCAATTAAGAAATCCGAATCTTCAGTCAGACAGCTAAACTCGCCCCCAGATTA -CATTTATTTTCATCCTCATTTTGTTCCTCGACTCCGTGAACCGTGTGTACCGTGTGCAGC -AGGAACTGTCCGCCTTCACTAAGGACGGCCCCGGCATGGGGTAAGTAGGCGATACTCAGG -TATTCCAGCAAAATTTCTAACCACTGTCATCAACAGGGCCGCCCACCTCGGCACCGACCG -CATGGAGGTCCAGGCCCGCAAGTTCTACTCGCAGCGCAACATGTATCTCTGTGGATTCAC -TCTGTTCCTCTCCCTGATCCTCAACCGCACCTACACCATGATTCTCGAGACTCTCCGCCT -TGAGGACCGCGTTCGTCTCCTCGAGGGCGACAAGAAGGCCGGTGGAAAGGACTCTGCCCG -TATCGCGGAGGCTGGTTCAGTTGGCGAGATCGGTCGTCTTAAGGAGCTTGTTGAGACCAA -GGACCGGGATATCGAGACCCTCAAGAAGCAGTGCGAGGGTCTGACCCGCGAGTACCACAG -TCTGGGTGACCAGATTTCCAACGAAAAGGGCGACAAGAACGAGAAGAAGGACAAGAAAGA -CCTGTAAAAGGGTTGCATTGTTCGATTCTGCTGGATTCAGACTGTCGAAGCCCAAACTTT -GGTAGACTATCCCTCGCACATCTTTATGTTCACTTCGAATCGAAGCCGTCTCCCTTGTTA -CGGGAGTTGAAACCCTTTTCGGCAACAAAATCACTCGACACATTCTGGATTAAGTGCTGT -GCATAACTTTCCTAACTGAGATTTGCAGCTGCTAAGAGTTCTCTCTCGCTGCATTTATTT -CCATGTACTAGCAGATATCTTTTAAGATTTTTCGGTAGGATCCATAGTGCTAATTTCATA -GTACTTGGCACATTCAATTCGATTTTACCCTTTCATATTCAATTGTCTTGTTGATCTCCA -TTGCTCTATTGTGAGGCTTGGAAGTCGCAAGCCTGTTAGAGTGCATTAAGAGCTAAGGAC -CAGACCAAAATAATCGTCACGTCTAAGGATCATAAGTTATTCGTAAATGTACATAGTATA -TGGATCGGTTGATCACGGCTGGACAGCAACAGGTCAACAAGTCAAACCAGGAATGTCCAA -AAGCTAGGATATACGAGTTCCTCTTCAAAGATCCTTCGAACCTGTGCGAGCACAAATCGC -TCAACCTTCCCAACATTCTTCAATACCTGCTTCCCATCTTCCTTACGACCAATCTCACTA -TCCATCCGAATCTCCTGTAGCAAACCACCTTGTCGCTTCAGATCCTTCTCCTTCTGATCG -CTCCCAGACCGAGGCCGACCATCATTACTAGGATTTTGGCTACCCTGGGACTGCTCAGAC -CCAAGCAGAGCATCTGCGTCCTCTGCTGAAAGGAAACAGAAATGCACTCGCTTGCGGATA -TACGCGACAACGGCGACTCCATCAAACGTGATGCCGGTAACATTCAACTTCAACGGCAGC -CCCACAAAGCTGGGCATCGGGTAATCAAGCAAGATTTCTGCGGTCAAGGACATGCGCACG -TCGCCAGCATATTTAGCATGGCACAGCACCTGGAAATCCTCAGGACGGCGCTCGCGCATG -CGCGGCGGGATGGGGAAAGATTGATCTCCGGCGGGGTCGTCTGGTATCTCTATCGAGGGC -TGTGGCGCGTCGATGGGGTCACTACTATGCGGATTTGACCCGCTGCTACCGGTGTGGCCA -AGAGAGGGATGGGAGGGAAGCGTGTTGGCGGTTGAGGGTCGGGACGCGGAATGAGTGGTA -TCCAGGTCTGGCTCTGCCAGGTGATGTGCGCCAGCAGGGCGGGCTGCTTGGGACCTAGGT -CCAGCGCCTATACCCGAATCCGACCATCCACTTGCAAATGAAGTGCCACCTGCTACGGCG -GCGAGCGGTGTCTGAGTGCCTGATAGACCGCCCAACGACCGCATGTGATATCCGAGTGTT -GATGTACCGCCAGGGATACCAGGTGTACTGGCCCGGGGCAGGAAATGTGGGTTGAGGTGG -TCGCCTAGTGGGGATCGAAGCGCAGATGGCTGGAAGCCTTCACCGGGGTATGGATGGTGA -CTGAAATTATGAAGGCTATGCGCCGTTGGGTTGAAGGTCATGTCGTCTTCGTATGGGGAT -TCGTGGAGTTCGGACATGAGCTCCTCTGACGCGACGGAGGTGTTGGCATCATCGTCGTCT -TCCTCGTAAAAATCCGCGAACGGTTCGCAGAAGTCTTTTATCTCGAGATCCGGGGCGATA -GTCCCGAAATCGAAGGAGTGGACCTGGACCGATCGAATAAAGCGCGGTAGGGTGATCTCT -TGAAATTTGTCATGTACGAACGACCGGATTCGCTCTGCCAGGGCTTCCCCGTCCGGGCCG -GAGGTGGCAGCTGCCCAGTCGACTTCAATTGACATTACATATGCACCACTCTAGCTCTTG -ATGTGGGAGAAATCGGAGGCTGAGGAGCGGAGTGACGGTATCACCACGTGATATAAATGA -TATTGACCAGTTGGCGCATCTATGTAGTATTTTATCAGATAAAATGTTTCGTTATGCACT -ATGTGCTGATTATGACCTTTCATAGCAACTGTGGACTCTGCATATTCAGCGACCCTGTCC -AATTCGATGCTCTCTCCGCTCGATATTTTGATGGGGATCCAAGTTAGATGAAAGTCAATG -ATCAAGGCTCGGTTGAGCTCAGGATTCCCAAGGACATTGGCACGCCGAAGATCCTGGTAA -AATGAATGGCTCTTAGCTCGCGAATATCTTTTTTCAACTACAGGAATTCCCGGCGGAGCA -CCGAGATCACTTCCATATTGACTATGCTCTCACCTCCCTAGCCCATGGGCCATGACTAGC -ATATGAGAAATGTTTTCCACCCCATGAAAGTAGATTAGTGTCAAGTTGATTGTACCGAGG -AAGACAGGTACAGCATATCCTTAGGCTCTCCGATGAATATGGTATATTTCTGGTTCGTGA -AAGGCTTCCTTCCAAAGTCCTGTAGTTGTTCCCTTTTCAACGGCCGTATACGGTATAACA -AAATACGGCATGTGAGGTTTGAATATCACATCATACAGTGCCGTATTTCCGAAGGCGCAC -ATTTGGTAGGTTGTGATCTTGTAACATCCGGAGACCATCTTTTGTAAGGCATACAGATAT -AGAAGATATAGGCGACGATGGTTGATAGCCTAAATGGGACTCCCAGCCCATATTCTCAGA -ATTGAAACAACTATCAGCGGATTGAGGAAGTTGCCGCAAATATTTCCTGACTTAGAATGA -CTATGGTCGAAGCTGGTTTTCCAAGGATGGAGCTTCTGCTCTGCCTCGTTTCTCCAAGTC -TGATCGCGCACAGGGGGTCGGAATGCCACCAGGAAGAAGGATAGTACACGGGGAATTGAG -GAGCTTGGACATTGAAAATTTGGTCCAGTCTCTCTACTAGCATCACAGAAAAAAGTATCA -CAACCTATTAGGCTTATCATATGGAACGCAGAGAAGAACACCCATAATTTCGTTCATCAC -ATAGGAATACTCAAGTCCTTTTTGAATCGCCATATGGTATCACTAAGCAATAGTAGAGTA -GACAAGTTTGCTCAGCGTTGTACCTAGCCTTTTTGTCCAATTCGGTAGGCATTTTGTTTG -AATGTACCACCTCTTCCAATATTCCCATTTTTTCATTGGTCGAAGCAAATGAAGAGTTTC -CGCCGAAAGCTCATGGGTCTTATATTCACCTGTGATGAGGATGTTGTAGTTGTCTGCAAT -GACCTGATGGATCGAGAGCTGATCGGGTCGGTGATTGCGTATGCTCGAGGACTGGGTTTG -TTCCGCCTCAGTAACTTGAATGTTCTTATGAGAGATAAATTCTACTCCATCACTGACCTT -TAGTTCGTCTCGAGCAAGAGGAGACTTGCATAATTCCGCGATTATATTATTGAAGAGATC -ATCAACGACACGCCGCCCTTGATGCTTTATTTCCTACTCATAGCTGATCAGCAAGCCGCC -AGTGGTACACTTATAACTCGATCAAAAGGGGGAACAACCGTGGCGTATCCTCTGACTGTA -GGTGCCCACACATAACGTCGTAGAACTCTTGTTATTGAGCTGGACAATCAGCCGAGAGTT -TCAGTAGCTTTGAGCCATAATTTCCAGCGAAAGTGGGTATTGCCCCTGCGGTCGACCTAG -ATATTTCCCCGACTCGACGATTGGGAGAGTAATGTGTGAGAAAAAAAACGTACCATTTTC -AGATTATTTTGATCTCTCGTCTAGGTGGAGAACTAGCGACTTGAGGTCTGGGCAGCTGTC -ACTGGCCATGTTGTCAAAGGGTCATTAAAAAGAGGTTAAATAGGGCTGAAGCTGGGGGAA -GAAGGAGGGTGGGGGGGGGCAGGCTTACGATTCCCAGGGGATGAGACATATCAGGGCTCA -ACTCGTAGCGAAAGCTACATATTTGATTGGTAGTGCACCCAAGTGGCACAGGCCTCCGAA -TTATCAGGGACTATATAGCGCCCTTTAGGGCACATAAAAGACCAATACTTCAGTCCCACT -GTATGCTGTACAAGCAGGAACGTCTCCCGATTAATTACATCCTTGCAAGGCTGTAAGCGA -CACGTGAGAGCTACTTTCTTTGGCATAGACGAGAACCTCGTGAACTAACAATGATTCATC -AAAACTAGCGTACAGTGCACTTTTGCAAGTACATAATTCAGAATTTAGCGTAAGAATGAT -CATTCAATTGGGTTTCTCGACCATCCTGTGTTGGCATAGGTATGAGTTTGAACAGTATTT -GTATAAGATGATGACATTGAGAGGGAGTCAGAGGGGCACACGGTATATACTGGGATAACC -CCTTGTATTCTAATGGATTTGGCCAATTACGGCGCCAAAAACAAAGGAAAAAAAAGCCAC -TTGGCCACGCAAGAAGCTTTGACGTGCCCGTGGGGCGAACTCCTGGGGATTCTTTCACTT -TTGGATGGCATCCTTCGGAATCTTCCTCGATCCAGAGCCATATGGTATATATTTGGCTTG -GAGGTATAATGACAGTCCTACGTATGTATCTACTCATTTATCAACGAAGTAGAATGCCGA -ATATATACGGCCTAGGTCAGGAAAACCGAGAACTATTAATAAACCAACAAACCCAGGGTT -TTGGATATTGGTCGATTTTCAACCCCGAAAATCAGCTGATATATATCATATGCGGAACTA -ATGCAACCCAGGTGTCTGTGCAGCCTATCACCTACCTTGTATCTTGGGCATCCTGGCCGC -GTCTAATGTTACCGGCATTCTCGACAACGTACTGTCCGAGTTTTGCCCGGGAAAGTCCTT -CAGATCTTGACCAGTCCGGGAGCGTTGTCCACGAACTTCCCTTTTTTGCGAACTGGAGTT -CACTTGACTTTGTGTCGTACGATGAGAGGATGGAACCAATTTCCAGGCGCCCATACCCTT -GTAACTCTGATGTGACAGGTTCCCGAGAGTGTTCTTTTCTACGATCTAGCGTCCGATCGA -CACCGCAGAATGCCAGAATGCTAGAATGCATGACTCGCGTATGCACAACGTGGCCAGAGC -ACATTCTCGCCAATAGGGAATTTACATACGTTCGGCCTTACCGACAGTAACGTCACATCC -TCGCTATATCACCTTAGCAGACTTCTCGTTTATGTGTATGTACTAGGTCCTACAATGCTT -TAAAAGGTAGACATACCGTACTGAAGGGAGTTTTTCGTCAATTCCAAAGATGCAGAGCGT -GTTAGACTGATTCATTTTTGTCCAGTCTACTTTACTACAAACAGATAAACATGGTGGAAT -ACGCGCCATCCCCAGTCGTTGGACACTATCTTGCACGCACTTTTCCCGGTTACCATAATC -TCGCAAGAGCATCTCAAGTATCCGGTATTATTAGTCATATGATCCAACATATCTACTTGC -CACCAACGATGGAGATATGCCATATCATTGGTTGTGTTGTGTTCGTGGAAATGCGGCATA -CCGAAGATCTTCTTGGCAGATCAATCGAATTTAGACTCGGACAACGATTGCTGTGATATA -ACACGTGGACACCGGATTCCATCTGCTGTGGCGTTGAGATAATTCGAACCCGCGAGAGCC -GCCAACGGAAGCACACGTCTTAAGCCACCCCTGGGTAGAGAGCAAGAGACGGCGGCCTCA -CGGACGACGGTGTGGTTAGTAGGCTATCATGGACCATGAGGCTATCGCATACACCACGCA -GGCCTGGTATCACTGTTGAATAGCGGAATACGTATGCAATCGGCCATAGAAAACAAGCAC -GAATGGCAAATGACAATATAAAAAGAAATAAAAATATCATAAAACCAAAATAATATCATT -CAGTTCGTATTAAAAAAAGCCAAGTTCTGTGGTCTCTCGACTTGTCCACGGGACAGTATT -GTCTAAGGAGAGAAAGATCCGAGCACGCGTAGATAGACAGGCCAATACAGAGAACCACCG -GCGAATAGTGACAGGAAGAATGCAAAAAAAAGAAAAAGTCAAAACGACAGCCTCAAATGC -CAGATACAGAAAAGTGTCCACCCAAATTCCAAAATGACCGACGCGAGGCAACAAATAAGA -TTATCACCAAATAGCAGATGTGATACAAGGGTATCTTCTTCTATCTTGAAGTAGACACAG -ACAGGGCGGCGAACCCAATTAATCGATGACCGTAGGACAAAGCGATAGATCGTTCTCCAG -TTTCCGAGGCATTGGGTGCCTATGATGTCTTATTAGCAAAACCCTATGAGAGCAATAGCT -GCCTCAATATAAAAGATACAACTCACATCCGGAATCAATTTAGCCACCGTGACGAGTGCA -ATTAACATCAAGTCGTTGACGACTTTTCTCGTGGACACCTAGACACTTAGGAGTGCCTTG -ATGGGCGCAGGGCACCACACCCTGTTCTTGCTGCTTGCAACCGCAGGCGTATTCAAAGAT -GTAGACGCTACACATATTGAGGGTTGAGTGAAGAGGTCAAGTTGCAGAGTATTCAGAAAC -AAGTACGGCGAATTGAAGATCACTGAGCTTGAAGGTCTCAAAAACTAGGTGTTGATCGAT -ATGGTGAGAAGCAGGAAGGGGATCTATGCCCGGCTTTATAGTATTCGATGACTACCTCGA -GAAAGTAAATTGACTGCGCAACTCAATAGAGAAAATAGCAAGAACCCATCACCGACATTC -GCAGAAACAGTTCAGTCCTTCCGTCCCTTCACAAGCGCTGTAACTCACCCATCCAGATGC -GCCATATGCAGGCAGTAACTCGGTGATCACTTGTTGCGCAGTCAAAGAGAAGGTGATGGA -GGACATAAAATGGTATTCACATGCGCCATATCCAGGGCATATGCTTTTACCAACAGAAAG -CAGAGTTTGTGGTAGGTGGACCGTTAGAGTGTGGTGTGCCTCCAGGAGGATTCAATTATT -GATATGGTGTTCGCGAGCAGCAGGGGTCGCCCGAAGGCGACCGAAGCCCTAAGCGTGACT -ACCGATGCAGTTTCCAAATCCCACAGGCTGCAGGAAGCACTTGACATCACTCTAAAAGCG -CCGTAGCCAGTGTTAGGGCTCATCCTCTGAGCGCCAACATTGACCCCTCTCTGGATAGCA -CCCCATAGGGCCAACGCTGCGGCCGAAGCGTGGATCAGGAACATGCTTTGCCCCATCTGA -CAAACCTCTATGATCCTGATACCTAGCCCTAAATGTATGGCTCATACTTTGGAGAAGGCA -CCTGCCACAGCTTTCTCATAGTCTTCACTGGCGGCGCAGGAGATGCGACTAATTGAATGA -GGCCCATGGGATTGTGATGACTCTTTTTGAAATTCAGGTCTCGGGATATCCAATGTAGTT -CCGAGACGTACGTGCGTTAAGACCCTGCATAGGTCATAAAGAGCCTTGGCTCCAGACCTC -CATGATGTTGTGCTGTAAGACTAGATAGGCAGACCGCTGAAAGTAGTATCGGGAATCAAG -CGAATTCACAGGAGGCTGCCCCCCAAGCCAAACTCCACCACAAATCATTCTCGATACGGT -AGGAGGTGACTTCATTGACTGTTTTGCAACGGGCCTCGACTTGTGTATGTAGGCGAATAA -GGCTCATTTCCAAGGCGCAGTTGGTGATAATCGCAAATTCTGCGCGATTAAAACGCGAAA -GACTGCATGATGACAAGGCATTTTTAATAAGTTTTAAAGGTCAAGCGAGTAAAGGATAAG -AACAATCCCGAAGCCAAGAAAATTCAAACACGGTGTAGTATAGTATCGGCTTTGGCCGAG -TGTGATATTAGCCAAGAGGCGTTCATTTTCTGATCTAGCGCGATCGTGCAGCAGGGCCAA -GGATACAAAGAAATCAATGATGGCCTATCAATTGATTCTAGAGCGCTAATGACTGGTGAC -AAGCACTAAGTTCCTCAGCCATGGCACAGTAACGTTCTAATGGGTTCGACTGAGGCTCTC -CCCTTGCTGAGCCGCCGGTCCACCGGAATATTAACTCCGGCCCACCGGGATATTAACTCC -GAGCGAGTACTGACGCGCCGGGCTAGGGGCGGCAAGGCACAGCGACGCAATCGACAGGTC -GAGACCAATTGCTGAGCAACCCGTTTTGCCGCTGTTGATTTGTTTTACCCCGTAGATATT -GCagagaaggaaaagaaagagaaaaagaaaCGCTAAAAACCAATGAAATAAACAAACCGA -CATTGTTTTGAATTCTACTGCATGAGGTTTGTACCACGTGATCCATGTGCTCTCAAGGTT -CAATTTGAAGGTTAAATGAACTTGAACCTTGGAAACGAAAtaattttaattttaatttta -actctactcttgattttgtttcgaACCCAGATCAATTTAAAACAAATTCTCACATGTTAG -GTCCTGTAGTGACAATTACCCGAAGGCAGTTCTCGGTTAACCAGATATCGATTCTCGATT -CTGGGGAAAAATTAGAATATGATGTCATTTTCGTGTCTTTCCCCCCCCCCCCCTCAGCGA -ATTGTGCGCTATCGGGTGCCCTGAGAGAAAGGCCGGGGAAATCATATACCCCAGCATAAC -CGGTGGTCCTAGCGGCCATGCAGCGGCCGGATTGATTGGTGAAAGAGAAGTATCGACGAA -ACAACTCGGAATGTGTAAATCTTTCCCACCCGAAACCATTATTAACACACATACTGCATA -GGTGTGGATACATACGGAAGCCTGTGAAAGCCATTAGTGTTCGAATCTTAGGCCCTCATC -CTAGTTGTTGATCTTCCGAGGTCCTTGAGCTTCGGCTGGCAACGGGTTTCTCTAGGCAAA -GCATATTGTGCAATGAGACAGGGCTTAATTCGCCAAGTCAACATATTTGCCATGGCTGCT -GGGAATGACTGTCAGCCGCCCAGATGCCCAGACTCTGGTGCGACCATGCCAGAGCCAAGA -GAGGCAAAGTCGAAGCATTCAGAAAAAGAAAGGGTGATAGCTGGTCGATAGTGAATTCTT -GGCTTTGATCTTCCCCCCTTTTTTCGGCCAAGACATGTGAAGCTATCTAGCATGTTTTGG -GATTCTCCTTTATGGGTTTTATGGGCGATAATCGCGGGGAGTGGCTTGTCTGCATGACCG -CGCATCCACTACCCCCCGGCCCAGAGATGATGTGCTGGATGCGCTGGATCGGCATGATTC -GGACCCATCATTGATACAGCATGCCCCTCATTCTCCAAGGTTCCTCTTGGCTTATCTCGG -TCTGGGCTATATAAGTGAGGTCGAAACTCCCTGGGAATCCTCGCTTCCATCCAAACTCTT -TCATCTACAGCTCTATTTTCCATACTCAGATCCCGTCTTTAACCGAGTTACCTCTTCCAC -ACACAAAGCCCTAAACGATTTTTTTTAAATCTCAGAATGTACACTTACACTCCCACCCAC -CGTTTCCTGGTCCTCTCTGCCACTGAGTCTGGCCCTTACAACCCTGCCAACAAGTTCCTT -GTCTTATCTCCCATTGAATCTGGTCCGGATCCCCGCGTCTGGGAAGAAGAGTCCCAGAAC -GAGACCAGCTCAATTGAGGAGAGCCCTGTGCTGGCTCCTGTCAGCCGTGCCCGCACCAAT -AGCTCTCTTTCTACCAACTCTTCTGCCTCCCTCAACGACCTTGCGGCCACCCTTCCTAGT -GGCTTCCTCTACCTTGGTCATGGAAAAAAGCAGCACTAGGCGCTCTATCACCGACCCGCT -TGGTCGCTTTGGTCTCCTTTTCTCATTCATATTTCACGATATATCACCACAACAGTAACC -GATGCATCATTGAGCTTATCATGAATTCTTTTGCTTTCTTTCACAATTATCAAATCAATG -GAGCGATTCCAATCTGGAAGTTTGGGCCACCCCATCATGGGGCAGAACAGGAAAATCAAT -AGGGGTGTTTTTTTTTGGCGCTTTCAAATTACACTATGTTTTTCATTCAAAATTTGTCTA -CATTACTACTACTTCTTTGCTTCTGGGAAGCACACGTATTGCCCCATTCGGCAGGCTAAC -AATCAATGAACTCTTCAAACTATCCGCGTGATTCTCTTACACTCCATTTTTGCCACCGAA -GTTTGCCTATTATGCAGCCCACCTGACCGAACTACAAAGGTTGTACGGAGTAGAGGGATC -TCCATCCCGCCGGACGCGAGGAGGGGTTCCTAAATGCTTGCCAAGTGCTGCCAAGATGAT -GATTTTCCGCCAAGCCTTGTGCAATCTGATACTCCCGCCTTGGCCGGGGGGACAGGGATG -GACATTTATCGCGGACCATGTTCCACGGCCTAGGCAACAGTGTCCATTGTACCCCGTACG -CCCGTGTATGTTGCATCCACCAAATAGCACCTATTTTTGGGGCATTGCCAACCTTGCCAA -GCCTCCCTGTTGCCAATCTTACTGCGACAGCTTATCAGTCCACAATTATTATTATTTTTT -ACATTCTCTCCTTACACGTTGATTTTGCTTCATGCATTAGATTGGGGTCGAAAACAACAT -GATTGCAGGGATGTTTTCTTTGTGTTTTCCCCCGGTGAAGAATCCCATCCCCCCTTTTAC -CCCTCAGCTTATTATTTACTCTTCCCCACACGTTTCTCCGGTGAAGAGGGCCCCTTTATA -GTGCAATTTTTGGCCAATTCGATAGCTCCGGATTACTCTAAATTGCCCGTGTTCAATTAG -ACAAGCCACATGACATATTTTATACGAGGTGGACTTGTAAAAAGGATAATTAGCCGAGTC -GGTTTTCGGATTCCGAGACCAGAACTAAGTACTACCTGTCTACTCCGTAATCCATACAGA -GCAGATACCTAAGGCTTAGACTAGCATTTAGGTTCTCACTGGCCGATTTCAATCTCACTG -GCTCTGTGTGTAATACGGAGTATGTTGTAGATCTGTGACACTATGTGCTTTGCTCGTAAT -AGCCTCCCCCCTTTCTAAATATGGTTTCGTCCATGAACCTCGACCGGACTCCACGTAGTT -CATTTTGAATACGCTTAGGGGGTGTCCTAGTTTGACTTGGGAAGATACTGTCTGTTCATA -TTATATATGGTTCCAGATCGCTGATTTTTGAGAAAGGGGCAATGCTTGTGCTCTGCCTTC -GGTGTCTCTAGATATAGTCCGAGGACATTGTGTACCTTTTTCGTATTTCTTTGCCAATCA -TACAATTATTAAATCAAGAGTAGCGAGAATAGTTGGCCGTTGATGTGGCCTTGTGAGGTA -TTTTCGGTGATGTTCTCAGATATTCTATTCAGATGCCTTATTTGAACATGTTTTCTCCCC -CTTCTCGGAATTAGAGATACTTGGATGGCAGATAGAAAGCGGTAGAAGGCATATTTTGTG -AAAGCGACTTTTCGATGTATAATCTTTGACTTCCCAATTGCCTTCCCTTGTCGTCACCTA -TTCTACGAGCTCTACAGTGGCATTTCATGAGTATTTGCACCCTAGTACTACGAAAATTTT -CAGGCCACCTTCCTTTCTGCTTGTTAATACTTCATTGAACGCGTTAAGTAATCAGTAGTA -TATAACTAGAAGGCTCACATACACGCTtatatatatatacatatatatGGTAGAACCTCC -AGCGTGCAGGCTACAATAGAAGAGAGTAAATGAAAAGCATTAGGAAGTACTGGTAGCTGG -CTCATGGCACCAAGCATAAGCATGATTTGACCTTTAATTTTGTGCTAACTGCAATATTCT -CATTGGAAGTTTGATTTCCACCAAAATACGCAACCGTCGAATATGGCGCATTCTTACCTA -GAGAAACTTCCGCCGACCATACCCGAAGAGCGAAGACAGGGACGGTATCCAAGCCATACT -TCTACATGAACATTATGTCCCTACGAGAGCTGTCAATCGCACATGCTACTAGTCCCAACA -TCCGGTGTAGCAGAAAAACTTGTCTCAAAGCATGCTTGTCCGTCGCATCGCGAAGCGCAA -GACTTCCGTCTTCTGGATGCCCCAGAAACAACGTCCCGACAACAGTAATCTACACTAGAT -CCCCAATACAAGAATTACCACTCGACAGAAGGAATCTGATAATCTAATTTGTATTACAAT -TTCAGGCGTCCATTTGCAAAAGAAATAGTCACATCAGTTCCCGTTTGTGGCAGATTTGCT -GGAACTAGTATGACGATGCGGTTCACGTATGTGATTTGCTGCCTATTGGCTATTAAGTGA -CAGGGAAGCCGAAAGCAAGGACAAAAAAATGCCGGGAGATGTCTTGCATCTATACTCCGA -CGTGCTCCCATCCATGAATAGCCCCTCCAACTAATAATAATATAGCATCCTCGAAAGACC -AGACTGTATCAACATATTTTGTATATCACAGGTCCTGTCATAATCAAAGAGCGAATCATT -TCCCCGTGCACAAAATTGCATTGTTGAGATAGTCCAGTGGGCTTGGCACGTGGGTCTGAT -GGTCACTTTGGGTGCATGCCTTGGGGTAGGTATCTTGGATAGTCTCTTGGATAATTTGCA -GTATTTGATGTGATTATTAATCTGAAGGGCGACTTTTAGGCCTCACGCTCTGATGACAGT -ATTTGCTAGTGCCCAGGGCAGAATGATCAAAATTATATATATTGAAGTATTTAACTCACT -ATGCTCATAATATCAAAGGAACATAAAGAGTCAATTCATATTTATATGTAAATCAAACCA -ACTAGATATAATAAAGTGATGTAAGACATATATATGTTATGTCGTAGGTCATCCCAATAT -CTACTGTCATTTATCTCGTGTCGCGCGCATCTTGGCACAAAACATGCTATTCTTAGCTAC -TACCTAGTGGGCGACCCCGCGATTCCCACCAAGCGAGACAGGGTGCTGTTGATATCGAAA -GAACATGGCGATACCAGCCTGAATCTGACTGCAGATCTGGAAACTACACCTACGACGTTG -TAATCGTGACTTAATAAAAGTGGTGTGGGGGGATTTATGATCCTTGACACGGGTTGGTTT -CAGCCTATGATAATCCTCAGGGTTATCGAATGAGAACTGAATGCAGGAAATGAGCCTCGA -TACTGCTTAGACACCCACCTTCGCTTGCGCAATGGGTGATGACCCGAGACAAAGTTATCA -CACTTGTACTTGTACCATTGCGAGACGCATACTATAATATTGCAGGCGCCGATGGACGAA -TTGCAAGTTCTCAGAATTGGGTATGAGACACGGCCTACAATAATAGTTTATGAATAGGAC -GGGGGAATAGGGAAGAATGTCTTGGGTAAATATGCCGTATTAGACTATAAGAATAGTTCA -CTGTTAATTACTTAGGTACGTTGGAAGCACGTGTCGTTAGTTCTTTAACAATAATACAGG -TTGATGGGGCGATGGAGCTCTGAAGTCATTGAACGATGATGGTATGATCTCGGACGATGA -TTATGGTGGCGACATTCTTGTTATAGACTGATGGACATGACCCGAAATAATCCCGATAGA -ATACTGTCTTGTAGCTCACACTAAAAAATGGCGTAGTTTCATGTTAGAACGGACATGACG -GACTCCGAGCCTCGGAAGTTCCATGCTCGGAATACGCGGGTCTGCAAATAATATATTATT -TTAGATTACCTTACTCACAGAGTAAAGTAATTCAACTATGTTGTATAGTGAATCATTATA -CGACATATAACATACAACATAGGTATTGTTACTTAGCTTTCCCAAATTTGAGAAAGGCGG -TGTACTCGACGCTAAAGAGATGGAAGTTCGGCCCAATTGCTTCTGCCGCGTTTCTAGCGC -CAAAGCCAAGACGTGTTCTGCGACTCGCCACTTTAGTCAGGTTTCCTGGAATTTGGCAGA -TCGACCAGAGAATAGATCTCATAATATATATGAAAGACTTGATTAGTCCAAATCCCGGAt -atatatctatatatctatatatctatagatctatagtatcAAAGAGGTGCAATGAAGTGT -TACAACATAGAAGGCATAGCCGCTCAGATCTCTTATCTGAGATCACCCCTTGCAGATGAC -CCCGGATTTTACAGGGTATCTGAGTTTATTGAGCGCCTCTCTTGATATCCGGATCTATGG -ACCTTCCCAGTGGAAGGGACTAGAAAGCGGAGAATCTGCTATTCACATGGGAGCAAAGCT -CGACATTGGCAGGACATGTTAGCTGCTTCACGGAGATCTCGAGACATCGTGACATTCTCG -GAGGAACGGTATAACCCATTGCGGTCAACATGATCCAGCTATACTACTTGTTCAAAGCAT -CAGGAGATATTATCCCCCAAGGATATCATCATCGATTCGGGATGTCGGCGACTTCCCTAG -AGCCCTGGAGCCCCGGGGCCGGTCTCAATTCAACCAAATTAGTCATAAGATACAGTACAC -TAGCCGTTTCAGGTTCGATACATTACCGATCATGACGTTCGCATCCTGCAAGCGGTGTGT -AGTAGTAAACATTCTTGATCGGCTTATACTTCAGTCTATATACGCTTTTAGGTCTCGATT -CAGTGATAGTCAGAGATCTACAAATAATGCCATACTATCGAAATATATTTCAAGGGCTCG -GGGCCAAGATCCGCTAAACGGTCACGTGGGTAAATACCGTAACAGCGCTTGTCCCAAACC -GGAAGTCTTGCCATCTCATACCCCGTCTTCATGAGCCCTTGCCCCTGCTGTTTCTCGACT -TGTCACTATTCGATTGTACAACTCTCCTTATTCTGAGCCCTTACCCCCTAGTCTTCAGAT -AAAATTTCAAAAAAAAGGGGAGCTGCACTCCTTCAAGTGACTCGACAACTTTCTGCCAAA -GCGGGACATCATCCGAGGGTGGACTCCTGCGTGTGAACCCGCCCAAAAACCGCCCATCCA -CTCAAAAGTCCACTCATCGAGGGGCACTGACTGTCTCAAAGTCGCATTCTTTATTCTCTT -CAGATTTTATCTGTGTGAGCTTCGCGCTTCAACGCCGGACTCGAAGAAACCCTGTCATTC -ATAAGCCCCTCGGGTTTGGCGTGACATCGCGCAAGACCAGACAGGGAACTAGGATATCGA -TTCGTCTTACCTCGACTTGGAATTCTCATTATTTTTGATCAAGGATTGATACTAGCAGTT -CGTCACCATGACTGGCTCTACGCCCATTGATATTTCCACTCGGCAGACGACCTCGGTGTC -GCCGCCGGGTCAGCAGGCGTCTAACCTGACCTCGGCGCTGCAAAGAGCGGGCAACGGCGA -GCGGACAGGGAGCTTTTCCCACCTTCCCGGAGGAGGACTCGGCGTATTCAAAGCCCCACC -ACCTCGTAAGGATTCGATCGGCGCAGCCACAGCACAATGGGGGAACGGGACGAAACCTAT -CTCCATGTCGGGATCTCACCGTGACAAGGGTCGCCGTGAGTCGCTCGCAGGAAGCTTAGT -GGGTGGTATGAGCTGGGGTGGTGTATCCGTCGGTAGTTGGATACGTGATGAGTGAGTGGC -CTGTTCGGACCTAGAAGCCGCGGGGGGAACCGCCCACCGTGTGTTGCTATCGTCGCACTG -CATCGAGAGCTGACCAAGGAAAATTTCCGCAGTATCATAATGACTGGCACCTCACCATTT -ACTTTCCAGTCACCATCTTTCCATTCCTCATCGTATCTCCCGAAGCTGGAAGCGAATTTT -ATGCGAGACTTTTCGTGCTGCGGTGTGACGTTACCGACCCTCCATGACCTATTACAACAC -TACGAAGAAGCCCATGCGACCAAGTCACCCAACCAAGGCCACCGGCCGAGCCAGGGTGAA -AATCGAGCTGCCTTGGCGGCTGCCGCAATCGCACAGCAACAGAACCAACAGCATGGCAGC -CAAGGACGTGGCCTGCAGCCCGACCGGACCCTGGACATGCAGCGCAAATTGGGCCAGAAT -CCATCGCCATTGCATCACGCAGATCTAGACACAATAGATGACATGGAGTTGGACGATGCC -CTGGGAGACGGAGATGGATCAGGCTCGCAGATGTTCTCTTCTCAGCTGCATGACGGTGGA -CAGGGCGGATATGGTAACTCAAACCAGCAGCAATTAAATCTGGGTATGCTTCCAAGCCAC -CAAGGATTCAGCACACCCTCGCAACCTGGTACACCGATCGGATCTGGACGACCTCTTTCA -TTGCAGAACAATCCAACCGTCTCTTCCGTCAACACCCCAACTTTGATGGCCAACCCGCTC -CAAAATTCGCAATTTCGAAATACGCCCGATTCATCTGGTCCAGGGACCCCTGCCGAAATC -GATGAAAGTATGATGGGTGGATTTGGTGATCTCAGCATGCAAAATAACACCATGGGCCAA -AATCAGAATCAGTTTGGCCGATTCAACGGGAACAATAATGATATGGTCGATCTCTGCATT -GACGAGCCGGCCAAGCGGCTGTTCAGCCCCAGTGGGGGCATGGGCTCGCCCAACGCCCAC -TTTAAGCTCAGTGGAGCTCAATATGGCCCGAATAGCGATATTGCACGACGGATCCGCGAA -CAACAGTTACTGGCCGGTGTCCCCGACACCACCGTTATTCTTCCCAACGAAGAACCCAAG -CCGTTCCGCTGCCCCGTTATTGGCTGCGAGAAGGCATACAAGAACCAAAACGGACTGAAA -TACCACAAAGCTGTAAGTTTCGACTTGATACTCCACGCATTCGTTTTAATTTTCTAACTC -TCTCCTTTCTTTCCAGCATGGCCATAACAACCAACAACTTCATGACAACGCCGATGGCAC -GTTCTCAATCGTCAACCCGGAGACGTCAGCGCCATACCCCGGCACGCTTGGCATGGAGAA -GGAAAAGCCCTACCGCTGCGAGGTGTGCGGCAAGCGATACAAGAATCTGAACGGACTCAA -GTACCATAAATCGCACTCACCACCTTGCAACCCGGACTTCCAGCTCGGAGGTCGCAACCT -CGCCCTCGGTGGTGGAGTCATGCAAGGGCAGAACATCAATGTTGCTGGAGCTGGCCTCCC -CGGAATTGGCGAAGAAGGCCTTATGTGAACCCGTGCACAGATATGTCATTATGTCCATAC -ATAATTCCCAGCTAATTGACGCGAATGATCTCGACGGCCTTCGGAGGTGAGAGCACTCTC -CTTCACACACGATGTCATGATTTTGAATTGGGTCCGGTGACTTTGGAGTTATTGCTTTTC -TTTTTCTCTCTTCGAAAATTGTTCTCCCTGGTTTACTTATCGGCTGGGTTTTGTGGTACC -TTTTTTGCTTTTCGTTCTGCTGTCTGGTAGATCCTACTGGCATGGTCATTTTACCTCCCC -ACTTGATATCCCTCATCGTTTCCAGATTCCATTTGCTTGGCCTGGTAATAGAGCAATTCC -TGAGCGATCAAGACCAAGATCAATACCCTCTCTCAGCAAAGAGCTGTCTAGAAGAAACTG -CTTGCACTTGCCTGCTTTCTCTTTCTCTTTTCATCATGTTTTTGCTTCTCCATACGGCCT -TCGCGTCTTCCATGAGAGAAAACCCAGGGGAATTGGATATTCCGAAACCTATGCCCGTCC -TGCTATTCTATTTTACTGCGTGCTAGCAAGGACTAGCTTTGTGTGCCTTGTTTCCGCGTT -CGCGCCCCCATTCACTGCGTTCAGTGCCCGCTCATATGCCCCTTCCAACGACGACAGTCG -CTCGCCCGCAAGTTCCGTAAAGCCCTGAGTTTCTGGAACTACCTCAAAAAAGTTGGCGAA -GTGGGATTTGGCCATGTTTGACTGGAATCATATCCGAGGCACGAGGTTCTGGGAGCGGTT -ACTCCTCTCCCCTCTTTCACACCCAGTCGGTGTTCGCTTTTCCTCCCTGCGTCTCATCTA -CACAAGGATTGGACAGGTACTGGATCTGGGATCTGGAAATATTAAAAGTGCGGTTTCTTG -GCGTCGGATCATTGTAGATATGGACCTTTTTTTTGATACGATTTGGCTCTTTTTGCCATT -GTCATTTATCTTCTTTTTTACCTCTTGTCTTGCTATCATTTATATTTCTTGATTTCAATT -TTGATGATTTTTGTTATTGTAGATCCGGGTTTTGGACTTGGACTTGGTAGACCAGTCGGC -TTTTTAGCACCTACTATCCCAATAAGTGGCATTCTGTTCAAGCCTCAAGCAGCAGAGGGT -TGATGATAATAAGTCCAGTGGAAGTCCACAGATTGTTCAATATCATATTTTCATTGATGC -TTATTTTGCAACTAGCAAGATCTCTGTCTTTCAGTCTAGTACAAAACCAACCTTGAAGGT -ATGGCAGGGGGCAAGGAAAAGCAAAGAAAAAAAAAAGGCAGAATCCCATCCTCACTTGTA -AGCGCCGTGAGCCACTGGAAGCTCAAAATCAAAGAAAAAGCAAAGCAAAGACCCGTTGCT -GACCATGTCGATATTTGGGGAGTGAGAATCATGAGAAATATTTCGCTTGTACAAACCGTT -TCGAGAGCGTGACTGAAAAGACGTAGGGATTGGTTTTCGTTTGTGTCATGAGAGAGAGAG -CACATGATGAGCAGAAAAAATGTTACAGAACGAGGAAAGAGATGTCGTCAATGGAAACCG -GAAGATGGATTGGGAAGAAGAGGAGTAATCGCGTCGTTTAGACTACCACTCAAATTCACC -TTGGCCCTTGTGGAAGACGTATCTAGTTGGGTTAGTATATGCATAGCATGGCAGATTGTG -ATAACTTACTCTTTGCCACCAGTGGTCAGGATGCCATCCTTCAAGGTGCACTTCCACTTG -TTCTTCACCCGCTGAACCTTGTCGTATGTGCATAGCATGACCTGGCCCTCCGACTCATCT -GCGTCATGATCATCTGCAACAAGGTCATCTGGATCATCCAGGTCCGAATTGATGGcgtct -tcgtcttcctctttgacgtcgtcttcTCCACCATCAAACTGGCCAGTGACGCGAGGGGCA -GATGAGGTTGACTCGGTCGTGTTTTGGCTGAGGGGTGTGGTGAGAGCCACACGGCGAGCG -ACCTTGGAAGAGGACTCATGCTCTTCCAAAGGACGAAGGAGACCTCCACCCTCATACTCA -GACATGCGCTGCTTGAGGTGCTCGCGTAGAAGTCGGTCACCTTCGCCACTCTCAGCAGCT -GCTCTGCGGCGTGCTACCTCTGCCTTCCAGTCCGCAATTGGGTCATCACCTCCACCATCG -GTTTGTCCAGGACCCATGTGTGGGTAGTTCTGTTCTTGTTTGATTGGTGGCATTTGCCCG -TGGGCATTGGCGTGCATCTGTGGATATCCTTGTCCACCTTGGGCCTGGGCTTGCATTTGG -TGGACGGAGTTAGCGGCTGCGGCCCCGTATTTCTGGTGAAGCGCATTTGCAGCACGGTCG -CGAGCCGACTGAGGATTGAGACCATGCGGGGGCATCGGGCTAGTCATGTGTGGCATTCCG -GGTTGGCCATTAGATCCTGGCTCGGTCTTGATGCGAGGTCCACCCATGGGCATGGCGTGC -GAGGGCGCAGCTTGAGGCATTGAAGGAGGGGGAACATATTGTGGCTGGGGGGGAGGTTGT -CGGGGGGCATTAGAGGGAACAGTAACCGCAGGAAGGGGAACAGAGTGCTGCTGAGGAGGC -TGAGGAGCGGGATCCCAGGGGAAATGAGCGACATTCACAGAAGAAAGCTTATGTTGCCAG -GTCTGTAGATATGAGAATTAGTCCAAAGTTGCATAATCACAGAATCTTGGCGGTCTAAAA -GCGCCTAAGCGGGTAACAGCGGCACATATAAAGGGAAGACACATAAGCCCCGTGTTCCCC -CATAAGCACCTCCCGCACCACCTTCATCCCCGCTTTGGGGCGAGATCCTCCAGCGACTTT -GGCAACACACAGGCATATAGATAGAAATATGGTACTAACCTCGCGCAGATCATGCAAAGT -TTGCTGATCAACACCACTTTCCTCAAAGTCGACCTGGGAGGCATCACAGACCTCCTGGAT -GACCTTCTCGAATACCGCACCCTATGAAGCCATATTTGTCAGCTCTCAGCTCGGATCAAA -GAAAATTTAACAGATCTCAAGAGATTCCAGGGGAAGGCGGTGATAAACGAAGACCAAAGA -CTCCGTGCGAGCGACGGCTCAGGATAGACGGGGTTGCCGGGGAAGAGAGTAAAGAGTTAG -GGAAATTTGGGACTTACCACTTGCTGGTTGGACATGATTGCTGAGCAAGAACACCTTGAT -GAGCTGGTGAATGATTATGTGAGAAGAACAGAGAGCAGGCAATACGGCGATTGAAGTGTA -CGGGGTGATGATAGCTTGACTAGTAAAGTGCTCCGTCGCAACTATCAAGTTGCCGAAGTG -CTCTCGATCACCACAAATGACGCAATATCAAGCAATTTGTGAACGTGTAGGTTTCACACA -AATAGAAAAGAGAAGAAATGCTTTGAAAGTCCAAGCAAGTGCAAATGCCAGAGGGAAATG -AGACGGTGAGTTTCCCGATGGGTTGGCGAAGGTGAAAGTCACGGCTTTGCGATCGAAATC -CCACTGTATCCCGGAGGCTTAGTCATCGGTCAACCCACCCGTTCGAAGTAGAAACTTAAG -GAACAAACCGAGAGCGGCAAAGATACAGATGAAAGAGAGGTTAGATGAGTGAATTAAATT -GATACAGCCGTAAGAATGGCATTTGGGACAATTTGTACAGCTACCTTACGGGAAAGTAAA -TACAGAGAGAACAACCACCAAAACGCCGAATTCATCACCAAAAAGAAAACAAAAACCGAC -CAACCATTGCACCGAAATTTATGTCCCAGCCCATCAATGCTTTCAATATTACCATGAACC -AGGGTATCCCGAACGAGAGAAAAGACAGAAAGGAGAATGTGCAGAAGTATCTAGCGGATA -GTCATAGACTTTGTCATCTGAGCCTTGCTGCCAACACCCCCCTCACGAGCGGAAGAGGCA -CTGCTACCAGCCTTGCGCTCAAAGATCTTCCAGAACTTCAAGCTTTCGTCAGCAGCAGTG -GTGGCCAGCATCTGTCCATCAGGGCTGATAGCACTGTGGAGGACACGAGTCTCGTGAGCA -GGGATCTCGACGTTGCGGACCAAAGTTGGGTAACTCCAAATGCTGAGGGAGTTGTCGGGG -AAGCCGCTAGAGCTGACAATCTCACGGTAGTGGTTGCTCCAACGCAGAGAAGTGACTTGG -GAGCCAGTGTCAATGCTGTTGGTACGGGCGCCGGTGGTGGTGTTCCAGAAGTGAATGTGA -CGGTCGTAGGAACCCCCGCCAGTGGCAAGGAGGTTGGATTGCCAAGGGCACCAGCTCAGA -GCTTTCACAGCGGCACGGTGGTTGGTCTTGGTGAACTTGGGAGCACTCAGGGAGCGAGCA -TCCCAAATGTTGACCAGATTATCGTTTCCACCAGTGGCGAGCTGAGCGCCATCAGAGCGC -CATTCGAGACCGCAGACCTCGGAGGTGTGGGAGACCAGCTCAGCGATCTTGTGTTCCGCG -ATACGAACATCGTGATTGTAGACTAGGCCACTGCGGGCCCCAGTGGATAGAGTGTGCTTA -TTCCAGCCCATAACACCGACGCGGGAGTCGTGGCCATACATGCTGCGAAGCTTGGAACCT -TCTTCAACATCCCAAATCTGAACCTCACCGGTTCCCAGGCCAACACCAACGTAGGCACCA -TCACCACTCCACTTCACACTGCTTACGTATGTGTCAGGGGAGGTTTCCAAAAGGCAGTTA -ACAGAGCCAGACTCAGCAGACCACACATAAACGTTCCGCTCGAGACCGATGGCGACCTGG -TTGCCAGAGCTCCAGTCGAGCAGATTAAGATAGTAGTCATCCAAAAGACCCGGGGCATCC -AAAACCCGTTCGGGTGCAGTTTGGACACGGCGACGGAACTGAGCAGCTGTGGATTTGGCG -GGGCGGAGGGGGCGGTTGTATTGAGCACGAAGATCAATAGGCTTCGATGACTCGGGAGGG -GGAGGCTTAAAGGCAAGAATGCGAGTGTTCATGTTTACTCCACACGCTTCAGCCAGGGAT -GATTTGTAGGCCACAGCATCAGGAGCTGGGCGAGTGTAGGTAGAAGGTTCATTGTCATCC -AGACCCAAGTTTTCCAAAGCAGCAGTAAGGTCATCATCAGACCCACGAGCACCAATGTCA -AAAGCGCTGGCTGCTGAGGCAAGGACCGATGATCCGTCAACTCCACCACTCTTGGGGCGT -TGTTTTTCCTCTGGCTTCGCTGCCCCGCTAGTGGCTAGACCCTCGCTTGCGGTCCGGTTG -GGAATAAAACGGTCACCAGCGTTGTGAGGAATGCGCACAGTAGTCTTGCTGGCACGAGCA -CGAGTAGTAATTTCCCGGCGAGTATGCTCCTTGGACGGAGTGTTCGCTGTTGTCGCGCTT -GTGCCGGTCAAAGTCCAGTCCGAGACACCTAGCTCTAAGTGCTTGGGTGACTTGCGGGTG -CGGGTGCTGGCGATATTCGACTTGTTCGTCTTGGGAGAGTTGTGATGGGTGGTACGGGCA -GCTTTCGAGATAGACTTGGCGAAGTGGGCGGAGAGGTTTCCACCGTATACTGACTTGGCT -TGACTGCGGCCAGCATCAGACTGACGAGGAGGAGTGAAGGGCGATGAATGACCGCTTGCG -CGGGGGTTAGGTGAAGGACTCAGAGGTATACGACCTCCAGCAGTCTTGGAGGAGAAGAGC -CCATGGTGGCTCTTCACAGGAGTCGATGCAGTAACCGAGGCCATAGTAATATATCAGAGT -AGATGTACTGGGACCAACCCCGATAGAAGATGCAATACGAATGCAGAGGCCGATGCAAGA -TTTGCCAACAGAAAAGTAGACAATTGAAGATAATGAGAAGAGGCAAAATTGAAGGAGAAG -GAAAGGGAGGGTTCAGGAGAAAACCGAATAAATAGAAGGTAGCTACTATGCCTCGTCGAA -AAGTATGAGGCTCTCCCCATAGCAACGGTTACCTCAGCTACATAGTACACTAATCCTCAT -TAGGGCTAGTGTCAACATTCGGCGCTTGCCTTCTTTTAGAATTGTCTTGGCATTAAAATC -TTAGGATTGTCGGATTCAGGAACAAAGAAGGTAAGATATTCTATACAGGATATCGTCTGT -CATTCACATTAAATGAGAAAAATGTCGTGATCTTCTACCGGTTGCGGAACTAATTCGGCC -TTTTTTTTCTCGCCACTGGAATGGCACATCCATCTCCTCTTTTCAAACCCCCCACCACGT -TCTCCTACCCAGTTCCACCGAGACCCACCACGACTTCTCCCTGCCTGGGGATCTTCGAGG -GTCTGTAAGTCGATATCCTGTTGTACTCCTGGCAATGCAGGACGCCTACCTGTTACCTGC -CAGTCAGGGTCGCCAAACCAGCGCGGATTATCGACCCAGCATAAAAAAGGCCCAGGGCCA -TGTCCCCGCGTGCCTGGTCAATGCCTCGGTGACGTACTGCAGCAATGATCGTATTTATGC -GTTCGGAGGTTTTGACCAGTTTACCGATGAAGGTCGACACCCTCCTTCGCCTATATTTCA -AAATGACACTGATAATTTTTAGTTTACAACCACGTTCTCAGACTCGACTTGAAGACTTTG -CAGTGGGAGCTCGTTGACAACTACGGCGATATCCCCGGAGTTCGCATGGGTACTCTCCCT -TCTCATTCGTCCATGGTTAATGATAAACACTAACGATAAATTTCATTTTTGATAGGACAC -ACAGCGACACTATATCAAGGCTCTAAATTGATTGTTTTTGGAGGCGAAAATGAACATCGC -GAATATCTCTCCGACGTCGTCATTCTTGATCTCAATACCTTCACCTGGACACTTCCCGAA -ATTTACGGATCGGTTCCGCGCGGTCGAGCCCGCCATGCTGCTGTCATTCACGATGACAAG -CTTTTTATTGTGGGTGGAGTGACAGGTGTAGGAGGCGTGACAGGGGAGAAAAATGTGATC -TTGGATGACTTGACCTATTTGGATCTACAAACATGGACCTGGTCCCGTACATGGAGCTTC -ACTGCTCGCTTCGATCATACGGCTTGGGTTTGGGGCACACGACTATGGATATTTGGTGGA -CTGGGTCCGAACATGGAACGAACAACTGATATCTGGTGGCTCGATCTCAAGGGATCTCCA -TCGTTGGCCGGAAACACTACTCCCCAAGGTAGCGTCAACTCCCTGGACGTCAATCTTTCC -ACGGCTCATCTTCCAGACTCTATCTCTAGCAGCACCCACCAAATGTCTCCCCGCTCCGGA -ATCTACGCTGCCAACTCAGGCAGTGTGCAGGTACGTAATTTGGGTCGGCGCAAGCCAGTT -GCGCCTGGTGCCATATCTTGTCTGAGGTTTGATTCTGGTCCTCATGTTCCCTCCCTCTTT -TCTGGCACTCATTTTCATTCATTTGCCTCCGGTGTCCTGCTTGACTTAATTACGCCTTCG -GAGACCGTACGGTCGCATGATTGTAACTTGTCAGCCCTAGACCTCAACTCGCTGCGCTGG -CAACGACTAGCTGATGGTCAAGAGATCTTCCGGCCCGGCTATCGATGGCACTACTGCACT -GTCAATGATACGGGGACCACAGCTTGGCTCTTGGGATGTAGCCTTGATGTCGCGAATATA -CCAGGTGGTAGTGACGAAAATCATATGAGTGAAGTCCTGACTATCGACCTGGAGAAGTAT -GGTTTACTAGGTAACGGTCTGTCGGCAGAACCTCTAGAACAAGGTGCACCGTGGCCATCT -GATCAAATGGGCCCGTCTCAGATGTCCGGGCTTGGAACCGATCTAGCCGCTGTTTTTGAC -CAACCCCCTGAGTCTGGAAGTGATACGGACTTTGTCATCACCGCAATCCGTGACGATCAT -GACGGATACGTATCGGAGAACATGGAGGATACGCCGTCCGTCTCCCCAACCCAGGCTCAG -CCGACCTTTGTGGAACAGAACCCCTGGACCTCAGGGCCAATCCATGTCCACCGTATCATT -CTGCAGCTTAGGTGGCCACATTTCAGGCGCTTGTACTCCGCCCAAATGGCGGAGTACCAC -ACTAAGCGGATGCACATCCCCGAGCCTTATTCTGTTGTGCGTGCGTTTCTCTATTACCTG -TACACAGACAGTATCACCGGCCACCCAGACTCTTGCTCGAGCGCCATCGATGTGGCGGGT -ATGCTTGTTATGGCCAATCTTTATGACATGCCGAAGCTTCGCCTGCTGTGCGTCAATAGG -CTAAGCCAAGAGCTTGATGTAGACAACGCGGCGATCGTCTGGGAGCGTGCCGGGCGGACC -AATGAACATTGGTTGAAGCGCCGAGCTGCACAGTTTTGCCTGACATACTGGGGTCGCGTG -GTCCGAACAGACGGTTTCAAATCCCTCAGCCACCAGAGTCTGATCCAGCTCTGCGAGGTG -GTGGATACAGAAGGTCGCATCTTCGCAGGACCTGAATTGGAGATGGTCACATTCGGAGCC -GATTGGCTTGACCGAGACAAAAGCTCCCGACTATTGGGATCTAGTACAGATGAGATGTCC -GAGGTTGATGGGGATGAAGGGATGGAGATGTCATGACACGAATGGGGCACGCATTATGAT -CCGGAGTTCTTGGCGAGGATTTTATTATTTATTTGCACATCTTTGTAACACGTTTGGAAT -GTACAGATTCTTTTAGAGATCGGCCCGTCTTACCGGGCCATATGAGCCCATTCGTCGCCA -GCCGAGGTGGAGCCTTTCGACGTTCTTGACGCGGACACAACTCCGGATCGCGGAGGTCAT -TCCTGTCACGATAAGGAAGCTCTTGACTGTTGTCTCACACTTTGGGGGTTTCAACAAAAT -TAATAAAAAATGTTGAAACTATTTAAATATCTATCGGTGCTCGAATTCAAAGTTTGGTTG -ATATACGGCTTCTACAGCTATTCCTTTGACACTTGATTTCACCATGGGATCTACCGAGCA -ATCCACTCTTCCTTCAGATTTCCTCTGGGGCTTCGCAACGGCCGCATACCAGATTGAGGG -AGGTGCCAACGAAGATGGTCGTGCCCCGTCAATTTGGGATACATTTTGCAAGACTCCTGG -CAAGATCGCGGGCGCGGGAACTGGAGATGTAGCTTGCGACTCTTATCACCGGACACACGA -GGATATCGCATTGTTGAAAGAATGCGGTGCTCAGGCATACCGATTCTCCCTGTCATGGTT -CGTCTTTCTGTGTTGCTTTGAAGCTTCGATCTTGAAGTTGCTAACCTCGGCATGCCTAAA -CGTATAGGTCACGCATTATCCCTCTCGGCGGTCGGAACGATCCAATCAACAAGAAAGGTC -TACAGTTCTACCAGAAGTTCGTGGATGACCTGATTGAGGCAGGCATTACTCCCATGATTA -CCCTCTTCCACTGGGATCTGCCCGAGGAGCTCCACAAGCGCTATGGTGGACCTCTCAACA -AGGAGGAATTCGTTGCTGATTTCGCGCACTATGCCCGCGTGGTGTTCGAGGCCTTTGGAT -CCAAGGTTAAGTACTGGATTACTTTCAATGAGCCTTGGTGCATCAGTGTTCTTGGATACA -ACAATGGTTCCTTCGCCCCGGGCCACACAAGTGATCGCACCAAGAGCCCGGTGGGAGATA -GCTCGACTGAGCCGTGGATTGTCAGCCACAGCTTGCTAGTTGCCCACGGAGCTGCCGTGA -AGATCTACCGCGAGGAATTCAAAGAACGGGACGGTGGTGAGATTGGCATTACGCTGAACG -GTATGTCTGATTTTGCCTCGCGCGCATGCATAGCCTTTACTAATCAAATTATCAAAACAG -GTGACTGGGCCGAGCCCTGGGATGCCGAAAATCCAGCCGACGTGGAAGCCTGTGACCGCA -AGATCGAGTTTGCCATCTCATGGTTCGCTGACCCGATCTACCACGGCAAATACCCAGACA -GTATGATCAAGCAACTTGGCGACCGCCTGCCAACCTGGTCACCGGAGGATATTGCCCTTG -TCAAGGGCAGCAATGACTTCTACGGCATGAACCACTACTGCGCGAACTTCATCCGTGCGA -AGACCGGAGAGGCGGATATCAATGATATCGCGGGTAACCTGGAGCTCTTGCTCGAAGACA -AGAACGGTGTCAGTGTAGGACCTCTCACACAATCGCCTTGGTTGCGACCATCTGCTATTG -GATTCCGGAAACTGCTCAAATGGCTCAGTGAGCGTTACGGATATCCCAAGATCTATGTCA -CCGAGAACGGTACTAGTGTCCTGGGCGAGAACGATATGCCCCTTGAGGAGTTGCTCAACG -ATGAGTTCAGAGTGCAGTATTTCCGGGATTATATTGGTGCTTTAGCAGATGCATATACCC -ACGACGGTGTCAACGTGCGGGCTTACATGGCTTGGAGTCTTATGGAGTATGTTTGGACCT -TATCCTTAATTCTAACATTACATTTGACTAACTTGTTTACAGCAACTTCGAATGGGCTGA -GGGCTACGAAACCCGCTTCGGTGTCACTTTTGTCGACTATGAGAACGACCAGAAGAGAAT -TCCAAAGAAGAGTGCCAAGGAGATTAGCCAAATCTTCGATCGTTTGATTGAGAAGGCATG -AGATGGATGGTGTTCCGATTGATGTTTATGCCGGTGGAATCCGGCGACTACTATGATACC -TGGACTCTGTACGAAGTTTTTGAATGCAAATGATATTTGATTTTTCTAGTTACGCCATGA -GCCGTGTAATCCAAACACATGTGGGGGTTGTGAGAGATGAAAAGATTCATGGATGAAGGT -AATGGATATCGAGTTGAGAGATCATGTGAGAAGAACGATATAGTCTACCTCAAAGTCTTG -ATTGAAGTATGACATCGGCATTCGTATGGCACACTCCTCTTCTCTTTGCTTTCATAAATT -GTTATGCTACTTGGCCATTTTGGAAATGGGCATATTTGCCTGCATTCAATTCACACTACA -ACCAAGTCATAACTAGATACCTACCAGTACACAAATAGAGCCGGCCCAGTCAGCTTGTCT -TCATTCGGCTTTCTGGTAGAAATTGGGGCTGGGTATGGTTGCAATACGAAGTTGTATGTA -AGCATTCAATCTGCTCTCGTTTAGTCCCTTTCAATGACTATTTACCCCCAAAATGCATCA -TTAGGGGCCCCTACCCAATAGATCGACTTGCTCAAAAGCTATGTAGTATTGTAGGAACTG -TCCAGACATGTTCCCGTATGGTCCTAAGTCCGGTCATTTCTGTTCGATCATGCAGCAGCT -CATTGAAGTTCGATATACTCTTCATTCTCTCGGCTAGATATAATGAACTTTCAACTACTT -TGTGTGGATTAAAAGCTAGATCACGAAGTGAATTCTGATACCCCGGGAGTTTAGTAAATC -CGCCTCCGAATGAGATATCCTCTTCATCCCTATAGCTCCGATATATTTACTCGAATCAAT -TTGTCAAGAACACTATGGATGTATCCCAAATCTCCAAGCGGCTTTTGCCGGAGCCCAAGA -GATTCCCAGACTGCTGCCTGGCTATTTCCAGCACTTTGATCACCTACCTTGCTTCGCTTC -TACCCCCAAAACCGGAATTCACCATCTCGGTCGGCAGTGGATCTGGTCTCCTGGAAGGTC -TCATCGTACAATACGACCAAAATCTGTCCATCGAGGGCGTTGAAGTGGATTCAACTATTA -ATCGGTACATTGCCGAAGAAGATATGAATGTGGTTGGCGGAGGCTGGGGCCTTTGGCCTG -CTGCCCAACAGGCTACAGTGTGGATGTTTGTCTACCCTCGCGATCCAAAACTCATCACCA -AATACATTGATACCTATGGTAACCAGAATGTCGATTTCATCGTCTGGCTCGGGCCTCGGG -TCGATTGGCCTGATTATGAGCCGCGTTTCTGTCAATCTCCATTCTCTGAGTTGACTTTTC -CAGATCCTATCGGACTAACACCATATGAGACTGTGGTTGTTGCTAAAAGAACGGGTTAAA -ATGAAGCCATTCTGCATCGTGGATTTGGAAGATTATGCTACACACATGGTGCATTCGAGG -ATTCTTCTATTTTGATAGAATCTGACTATATTGTAATGGTACGACAAAATCAATAATAAC -CAATTGTCTAATCAATAAATCTGGTAAAAAAAAGTAGACGAATTTAACTAAACAATCTAA -CTAACCAAATCAACCACTTTTATATAGACTTATCAAGACTGACGAAATTCGGAGTAAACC -ACCGAGTAGATGACCGAATGGCGTTTGTATTTGGCAGCTTGTCCAGCTGCTGGAGGAAAC -AGGAGACGGTCAAGGCCCTACAAGGCCGCGTTGCACAATTTTCCCAAGGATACCCTAGGG -CCACGGGCAAATTATAAAGAGCCAGAGCACGGGGCTAGACAGATTAAGGGATAACGAGGT -TCAATAGCGGCATAAGAAGGCTGTCCTGAAGCCAATGCTTTACATCCGTTTGCGCTGAGG -GCCGTTGCTCTTGTTACAACACTCTGTAGAGCTAACCAAAATCTGATCTATATAATGCAT -GAAAATTCAACTTGCGACTTGAAAATGTGATTTGAAGAGCAGGTCCTCTGTCTCAAAGAA -AACGCAAATTGTATCGTGATTGAGCAGTCTGCAGTCGGTTTATCTGAACTTGGCTTACAT -CTAATCTCATCTAAGTTCTAGAAATTGGGAGACTAGATCATGATTAGGAGGTACGAATAC -AGCGATTACAAGATACGAGTGGAACCCTTAGATGCATCTCCTACGGTAGCTACCTAGTAA -AATAAGGAGTAGGGGAGGGGTGTTTGGGAGAAGCCGCTCTCAAAAGACGCTACTCCTTGG -ATGTCACGTTGGTTGCAGTCAATACAGAGGTTCTTCGAAGATTACGAAACAAAACGGCTT -CGTACAGTAATTCATATATTAAAGCTAAATCGCAAGGTTAAGCTTAAAGCGCATACCTAA -AATGCTTGGACTCCATATCGGTCAGATCACTGAAGCCATCCAAGATAGCCTGAGAGACAA -CATCCTCGGGAGTCTCATTAGTATCAGCTGCAGCCATTGCAGCATCCCTCCTCGAATTCT -CCCATTTCATTGACACATATCCCGCACAAGCTAGAACAATGATACCCGCATAGCAAATGA -GCAGGCCTAGTAGACCCGAGTAATAGCGCGGGGACTCGTTAGCAAAGAAGATATTCGCTC -CGGCAACATTGCCGGCTGCATATAGCGAGAACCAGAGACCATTAGCGACCGCGCGCTTGG -TGTGGCCGGCAACATTTGTGGCAAGTAGATTCCACGAAATAATGACGGGAGCACCGACAA -GATACTGCAGCCAGGTACATCCAACTAAAGTCCACGGGTGATCGAGACTGGTGAAGCGGA -TACCCAGGAGACCGCCAAAGGGAATCAAACATACCACTGCAAGAACGATGCAGCGGGAAT -CCTTAACGTATGTAGCAATAATACCACAGATTGGAACAACCACCGCTTCGACCGCACCTG -TAGGCAGCTGAAGGAGTGTAGTGTAGATCTCGCTGAAGCCAAAGCCTTTGATCAAGGCTG -ATGAGAAATTCGATGAACCGCCGTTGATCACACCGCAGGCGATGCCGACTCCCGCGCAGC -AGAGGACTCTGATATCGTAGAGGATCTCCAGTGCCTGGCGTGGTTTGATATCTTTGGTCT -TCACGCCGATCATGTTTTCGGCGACACGCTGCACCGCGATCACGCGCTGTCGATGGGTCA -AGAAGCGCGCAGAGCTGGGCGAGTCAGGCATAAGCCAGAGGAAGAGACAGGCACAAAGGA -AGTTGAGTAATCCGATAGTTAAGAAAATCAACTGCCAACTGCGGAGCGCGGTGTTATGTG -AATGGCCGAGACCGAATCCTAGTAGTGCTCCTACCATGGTGGCCATACCATTTGCGCTAA -GCCAGACAGCCATGCGCAACGGTTGTTCAGATCGCTTGTAGAACATTGATGTCAGGTTCA -TCACTAGATAGAAAAAGAATGAATTAGTATTCTTCTTGTCTTTTGAGGTTGATATGGAAA -ACTCACTGCATGGACTGACTCCAGCTTCTGCCATGCCCAAGAGGAATCTCAACACCAGCA -TTCCTGGGTAGTTTTTTGCTCCAACATGTGCGATAACGAGTACACTCCAGATCAAAAGCA -TACCACCCATATATTTGGCGATTGGCAATCGTTGGATGAGGAGATTGGCTGGCAGAGCCC -AGAAGAGATAACCGAAATTGAAAATCGCGGCAACCCATGAGTATCGTTGACCTTCAAGAC -CGAGGTCTGGGATTAAGGAATATGCCGATGCGTAATTTAGTGATAGTTTGTCTGCAACAT -GTTAGTATCGAGCAATGGGGAGATACTTTGATTTTGTATACCGAGAAATTGAATCACATA -GGTCATATAGAGCTATAACATGTCAGTAACAACACTCAAGATATCACGATCACCCTGGAA -CTGACCATAGGCATGATCCTCCAGTCGATAATCCTCAGCACCTCGGCGCCCTCAGTCTCG -AGTTCCTCAGCAGTTAACTGCTCTGCTTCCAGATATAGATTGGCTGCGATATCATGAGTC -TCGCCGAGTGCAACATGTGTGACATCTTTCTCTGTAGCGGACTGATCCACTTTGAGGTTG -TCCATTATAGGCCGTCAAGTTGGGAATGAAGTCCATAGCGAAGTTCAGATAGTTATATAT -ATAAAGCTATCCTATCAACGCCAAGAAGATAAGCATCACTCCGTTTATCTGAAGTGATGT -CTTTGGTCAAAAGGTTGGAGATAGGCGGCCAAAAGTCCGAAAAGTGTGGGGGCGGACTAT -TGTCCGCTGTTCTCTTTCTATACGGAGTCCGGAGTATACGGAGCATACTATATATCAGTT -TATATTCGGACTCTCTTTTGACTACATTTTGACTACATTTTGACTACAATTACTTCTCCT -TACATTCATTTCAAATTCAAGATGACTCAGTCAGACTTCTCGCTCTTAGACGCGAGTATC -TCCGACCTACAATGTGCGCTTTCTACAGGATCGCTCTCTAGTGTCGGCTTAGTATCACGG -TACCTACGGCGAATCAGCGTCTACGACTTCAACACTCTAAAACTCACCGCTATCCCAGTC -CTCAATTCATTCGCCCTCGACGAAGCAGCCGCTTCCGATGCCCGGCGGGCAGCCGGCTTG -CCGCCGCGTCCATTGGAGGGTATCCCTTACCTGGCAAAAGACAGTATTAAAGTCAAGGGC -ATGACCGTTGCGAGCGGATCACCTGCATTCGAGACCCTCATTGCCAACGAGGATGCTGCA -TGTATCCAACTACTCCGTGAGGCCGGCGCCGTGCTTCTCGGCCGGACGAACATGCCCGCC -ATGGCGTACGGTGGTATGCAACGTGGGTGTTATGGGCGCGCCGAAAGTCCATATAATCCT -GCGTACCTGGCTGCGGCGTATGCTTCCGGTTCATCAAATGGATCGGCGGTTGCCACAGCG -GCCAACTTCTGTGCTTTTTCTCTAGGCAGCGAAACTGTTTCCTCGGGACGCTCGCCCGCG -TCAAATAATGCCATTGTTGCATACACGCCTTCAAAGGGCCTTCTTCCTCTTCGTGGTATC -TGGCCACTCTACTTGACCTGTGATGTCCTTGTGCCTCATACTAGGACTATGGCAGATTTG -TTCCAGGTCCTAGATGTTCTTGCTGTGTCCGATGAAGCCCCGAGAGGAGATTTCTACCAA -GAGCAAAAGCTTATCTCCCTCCCTTCAATCAAAACACTCCGACCGCAGTCCTTTGCTGAA -TTGCAAGATGGCACATCCTTGCACGGAAAGCGTATTGGGATACCGTCAATGTACATTGGG -GGCGATGACTCTTCTCTATCCCCGGCCAGCAGGGTCAATACCCGGCCCTCTATAATCAAG -CTATGGAACAACGCCAGAAAGGTCCTTGAGTCCTGCGGTGCGGAGGTAATAGAAACAGAC -TTTCCACTCGTGACAACTTACGAGTCAAACGCAGAAAAGGGCCAGCTCGTCACCGTCGCC -GGACTACCAGAAGGCTGGTCCGCATTGGAGAGGAGCGAGCTCGTCGCACATATCTGGGAT -GACTTCCTGATCAACAATGGACAAAAGAACCTGGAGACTCTTTCTCAGGTTGATCCGACG -ACTATATTCCCGCTTGCACCGGGGTCTCTGAAAGGTACGCCTGATGCAGCTAATGCACTG -CGATGGGACGAGATGGTCAAATACCCGCACCAAAGATCGGCGTCTATTTATGAGGTCCCC -GATATTCAGCAAGGAATTCAGGCTCTGGAGAATGCGCGCAAGGAGACTTTTGAGGACTGG -ATGGATGGGCTTGGGCTTGATGCTGTGGTTTTCCCGGCCAATGCTGATGTTGGCGCTGCT -GATGCGGACTGTGATGAAAGGGCGTCGCGGTTTGCGTGGAGCAATGGCGTCAAGTACTCC -AATGGCAACAGACCTATTCGTCATCTTGGTGTACCTACTATCTCTGTCCCGATGGGTGTC -ATGGAAGATACGAGAATGCCGGTGAATCTTACCTTTGCCGGGAAGGCATATGACGACAAC -AACCTGCTTAAGTACGGCTTTGCATTTGAAATGGCGATGCAGGGCCGTGTTCAGCCATCG -CTTGTGCCTGCTCTTGACTCTGATTTCATCAAAGGTGGCCAAGATCCTCGGCCATGGGAT -TCTCCGTCCACCGTTGACTTGGTGGTAGAGACTCAAGTAAAGAAGATTGACGGCTCGGTA -GTTATGATTCAGGTCCAGGGGTCAGTCTTTCCCAAGGACTCCCAGCTGGAAGGTTTGGAA -TGCCATGTCAACGGTCAGGTGGTCAAGCCGGTCATAGAGGGCGATAGATGGTTTATCCAA -GCTTCCTATCCCGTATCGGAACGCGACCAACCCTGGAAGAGGTGGTCGAGTCCTGCTTTA -AGACAGACAATCGTAGTTATCACAGCTCATACAAAATCGGGCTCGACAGCGGGCAAGTTG -ATCTCATTATGAAGACCTAACATCGCCAAATTCGATGCAATTCTTATTTCCACTACTAGA -TTATGTAGCGCATTCGTAAAATACAAAGCTTTAATATTTACAAGATCTCTTTCAAGTCTT -CCTCTAACAAGTCTTGATGCATACTCTCCCCATACATCATATCCTCAATCATTCCGGAGA -CAGCTCGGTGCAAATAATTCTTCCTCAGCTGCTCCAGGTGCAATTCAGCCTCTTCTGGTG -ATAACGGAGCACTAGAATGAGGTTCAAGTGTCCCATCATTCCACATAATCTTCTGTCGTA -TGAACTCGAGCACATTCATATATGGCATTGCATCCTTCCACCTGTCGACAAAGCCAGCCA -GATTGGCTGCGCATGTGGCAATTGCCTCTCTGATACGCCGATGGTTCCGAATATCGGAGA -TAGGAGCGGTAGCCCAGACAATGTATAACATCATGATTCCCGCTCGGAATTGAAATACAA -GCTAATGATGGGGGTTATTGTTAGTTCATTTCGAGAGTAACACGATAGAAGAAGGGTCCT -TGGCACATACTGCAGACCAGGTATAGCACATTACGTGTCCTTCTTCCTGCAAGAGCCGAA -AATTCAGACAGGCTTCCCCAATTGTCGCGAGAATTTCTTCCGTCCCATCAATCTCGAGGA -AATCACGTTTCTCTTCTATCAAAATCAATAGACTGTAAACATATAGCTTTCGCATCCATA -TAGGATGAAGATAGCCGTGCTGAGCATTGTCTGATTCATAATTCGGAATCTCTTGCCGCC -ACCTATCGAGAGCGGACTTGAATGAGGAGCTGTAGCCTTTCACATCCCTGGCGAGAGGAT -TGCGGGATCGGTTCTCCCGCGATCTTCTAATGCTGGCTTGGATCTTGCGTAGTCGGAACA -AGTGGCTTATGGCGGGGTTTGTGCTTAGGGTTGGTTGGAGGGTATCCCCCAGAGTGCACA -TCTAAGACGAGAATTAGTGACTGTGAGAAGCTATGTAGAAACATATTTTGGGATGGCTCA -CGTCATCATCAACAACTCTATAGGCAGCTGATACTGGGCGATCCCATCCAGAGACAATAA -TTTCGTTGAGCATATAGCAAGTAATGAACATATTCTCCGACAAAACTTCGCTAGATTCGG -TCATGGAACCATTGTCGCGGCATATCGATACGTGCTTTCGAATTTCTGTCATTGCAGCTG -TACAGAAAGGCAACACAGATGAGACGCTCGTTGTGATATCCTCAGAAGAAGGAGAATGCA -TAGCATAAAGTATCATGAACAGTGAAGCTTGGACTTGTAAGACCATGGATGAATGCCCAA -CAACATATGGATAATATCTCATAGCCGAGGCATAATACTCCTTTGACCGGACGGAGTGCG -AGGGGTCTCGTTGTAAGTTACTGCACAGGGATGCGCCAATAGCATATACCATCTGCAGAA -AGAAGGCTTTCCATTCATCCTGCCCATCCAGGATCGCTTCAGCATGCCATGATTTCCAAG -CTGCTTCATTTAAGAAGGGCCATTCCCGGTGAGTCACGGAGAAATACACATTTGTGTAGA -GCTCTCGCAGCCAAGGAGAGTCGTCCTCGGTCAAAGGATATGAGGGTGTCAAGTCAATCC -ATGGAACGTCGAAAGTCGTCTCTGGGAATAAAGAGAATGTGTCAGGGGAGAAAGATCGAG -ATGCTTGAGATGATCCTTGACTGCTGTGTTGCAAGTCTGACAGAAGTATGAGAGATCCAT -CATTATCTGCGCTACTATTCTGATCGGCCATTAGATAGTTTGAGTCATTGCGCTTGTGTT -CTAGGCCTTCAAAATCCGTCTGGACTCCAGCATCACATGTCTCTATTGTTTTTGTGATAC -TGGATTGACGATCCAGCTCAGAGACCCGTTTCTCGAGCTGTCTCAAATACTCCAGTGGGA -TTGTGATATTATCTCCCGTTTTCCGAGACCGAGCAGGCAAGCATTCAGCACCCTTGCGAC -GACATTCGCCACATGAAGGTTTCGCATGATCACATTTCCGCTTATGTTGGAAGCAGCGAG -CGCATGCTATATCTCTATGCTTCAAGGGGCGCGACATGCAGCAATTGGGCACATATAGGG -AGTGGTAGAGAAATGCGGGGAGACCTGCGATAACGGAGTTGATCCGCACCGTCCTTATTA -ATGGGCTATAATAGGAAGGTCTAGGCGATGTTGATACAACTTATTTCTATTCTTTATACT -ATACTAATTTCTGTTAAGATATATGTCTTAAGATATATGAAGTAAAAAAAAGATATTAGC -CTCGTCAAGTTGGGTTGGAGATAAAGTCCATCATAACTATCATACCTCGATCAAGCCCTA -TAAGGAACCCCGAATTAAGAAGTTGACAGCGACAGATCAGCTCTTATTCCATCTCTGAGC -CTTTGTATCATATCCAATCAATTTAGGATCAACATGTAGACTCTTGAGCATTAATTCCAT -CGTGAAATCCTAGGTAAACAGGTTGGCATATAGGTTCCACCGAGAACCATAGTAATATCA -CAATACTTACCTCATTCTGACTTTCCTCCTCGCCGTGATTCTTGCTCAGTAGATGGTGAG -AATAATCCTCAGTGTTGTGGTCATCAACATCTTCCTCGTTCTCGTCAATGTGCTCCCGCT -CACAACGCTCCTCATCAAGCTCCCACCGAATTGCATCACGCTTCGACTGCTCTTTCCAGG -CTGCCATTCCTCCCATGTGATTAACCCTTTCCTTGGCCCTAACAAACACCTCATCCGCTG -CATCCTGACTGGCGAGCCGCCATTTGGTAATGAGAGTTGGAATTTCGATGTATCTTGGGT -TGTTCTCAAGACTGTGGGCCTGCGTTGCGATTTCGAGTTTGGTTCTGAGCACATCAACCT -TTGACTGGATCTCCCGTTGTTGCTTTTGGAGTTCTATAATTTCTGGGTCGGAGGTTTGAA -ATGATTGTGAGTGGAACGGGGGAAAGAGTTGTAGGGTGCGTTTGCGCTTAGGGGCCACTG -GAATTGCTCTCGGACTTGATTGATTTGACATTGAGTCTGAATCTACAGGAATGGTAGGAG -TCGCTGAGGCTCTTGTCTCATCAGGGGTCCTGGATACGACAGACTTGGATGGTGTGTCGT -CAGTGTTTGTTGGTGTAGGTCTGCGTAGCGGGGACTTGAACGGTTTTGAAAGAGCGGTGG -CTGCATCGATGCGACGACGTTTAGTAGCTGGCTTGTCCATGATGTATCGGATCCAGACTT -CGAAAGTTCGATATGTGTTCAAAAAGGAAGAGAGATCAAAAGTTGGAGATACCCGAGTGC -GGAGATGCGACCTTATCGATAAGCAATTGAAATTGATCTACGCTGGAAATGCATACACAC -TACCTAGGTACCTACATATCTCAATGACCAACTCCTCCTGCTACCCATTGCCCAAACTCG -GGGTCACCAGGCTGAGGAAATACCCCACGGTCGttggcatttccattcccattatgggca -ttgccattgccattggcgttctcattCTGTCGGTtgcgctgctgctgctgctgACGGAAA -TAAACCAAAATCAATAGCGTAGCAGCCAGTCCGACAATGAGAAGACCCTCGAGCATCCCC -TCGTCAATGTCCAGGTCCAGATCGTCATAATAGCCATCCTCGTGGCCTTCAGACTCCAAG -CCACGATATTCATCATCCTCTCCTCGTTGTTGTTCGTAAAGGTCTTCGTAATAGCCCTCC -TCGTCGTTCTCGATGAAAGCCTTGACCCACTCCTTGAAAGTACGACGGGGCTTCGAGTCT -GAGGGGGGAAAATAGTAGGTTAGAGACTAGAACGACGCGATTGATTGCATGGGGTAAGCA -GGAAAGAAATGCGTACCTTCCTCGTCGCGAATAGGGTTAATATCTCCGTTGGTGATGCGA -TTCCAATATCCTCGAACTCGGAGTTTGATCAAGCTGAGCTTGACTGGCAGATAGGCTTCA -GGGCTGAGATCGAGAGCCAAGTCATAATACCGCTTGGCCATGTGGAAATCCTGATCTACT -GCCACGCCATTCTCATGCATCCAGCCCAGATTCCAGTAGCCCTGTGCACTATGATGGGCT -TCAGCAGCATTGTGGTAACAGGTCGAGGCCTTTTCAGCATCGACAGGTGTACCCGTGCCA -GACAGGTAGTAGTCGCCCATCTTGACCAAGGAGTCGATATTAGCTTGCTTCGAAGAGCGC -GTCCAGTAGATCAACGCGAGTCTCGCATTGCGCATCAAGGAGGAGCGAGCCTTCTCGGCC -CATGGAAGAATAGAACTGAGCGATAGCACAGAACGGTGCTCATCAAGCAGATAAGCCACA -TTTGCCTGTGCATTCTCATAGCCCTGTTCTGCTGCCATCATTGATGGGATGAGAGCGCCC -TCCTGGTCACCAGATTCGTAAGCAGCGTTTGCCTCCGCGAAGGCTGAGTGGACGATTTCT -GCCTTCTCGGCAACCATTTTGTAGTAGGCGGCGGCCACGCCACAGTGCCGCTGTCTACCA -ATACCGAAGTTGGCCATTTCCGCTAGGTAGTAGTATGCCTCCATCCACCCCCAGCGGGCT -GCAAGCTCGAAGTAACGTGTTGCGCTAGCCACATCTCCTTGATCCAAAAACAGAGCACCC -AATCGTGATTCTGAAGCTGGGAAATCCTGCTCTGCCGCTGCTTTGAAGTAGTGAGATGCC -TTGAAACCATCCTGTGGAACCCCATAGCCCTTCAGGTACATCAAACCTATCCAATGCTGG -CATAGAGAGTCGCCATTCGTAATGCCCCGCCGGAACCATGTGAGGGCAGTCGGGTAGTGT -TGCTCCACCCCCTCCCCGCGGAGGTACATCAGACCAATATGTCCCGCGGCCTTGGATGCC -AGTTTATCAAGTCCTGCGGGCGGGTTTGAGTTGACGGACCCATCCTTGTTCCAATATTTC -TTAGTGACAACCTTGAAATACCTCATGGCCCGTCGGAAATTGCGCGGCAGACCCCGGGCG -CCCTCGTAGTTCATCTTACCGAGGGTATACGTAGCTTTCAGTTCGCCCTTCCGGGACATG -AGGTCCAAGTACTCCAGGATATCTTCCACGCTGGCCTCGGGAGTGGCATTGGCAGAGTCC -CGCGCATTTTGACCAGAGCTCGACACACTGGCTCCTGGGCCGTAGACACCACCTTCTTCA -TCTGCCCAACGATAAGATTCTCGAACTAAAGCCCTGCCACCCGGAGGCCCTGAGCGGTAG -TACTCGACCGCCTTATCCGCAACCTTCTTATAGTAATGAATCGCGTGGTCACAATTCTTC -GGTGTTCCAATACCAGTGTGGTAGCGATACGCAAGGGTCATCTCCGAGCGGATGTTCCCG -GCTTCCGCTGCAAATTCATGGTACATCAGGGCCTTTGCCTGGTCGGGTTCGACCGCACCG -CCGATCCCCGTCGCATACATGAACCCGACCATGTTCTGTGCTGTGTTGTTTCCATCTAAC -CAAGCCAGCTCATGGTACCAGCGGAAGGCCTCTTTGAGGTCGCGGGGATAGGAGAAGTTG -CCATAGAAATTCATCTCTGCTAACAAGAACATCGCATCCGGATCTCGGTCTTCGTCTGCT -GCTACTCTTAGCTCCCCCACTGCCGCTGCGACATGTTTGTTCAGTTTGCGCTCGGGCTTT -TCAATGGAGGGGCCATTCAAGAAAAGCAGTCGAACCGTTTGGCGGCCATAGTATACAGCA -GTCCCCAGTAACCCTCTCGACTTGTCGGATTGAAGGACTGTGTTCTTTGTGGTGCGAAGT -GTGTCCCAAGCGTTCTCAACTCGTTCTATTATCAAACGATAGAGTTAGTCAGTATCAAGC -TTCAGCCGAAAAAACGCGAGCTGTCTCTACTTACGAGTCATGGGTCGGGGCTCTGGTTCC -CCTGTCTGGAACGTAAATTCACGATTTGCATCGTTGCCGATTGCTTCGGAGGTCTGATCA -GTAAGTGGCTGACCAGGCCCAGCTGCACATGTCTGGAGTAGCACTGAAAATGGGAGTCAG -CTTGGGTTGCGGTCCTGCAGGTGCGGGCAAATCTTACGCAGGGCCGAGAGAAAAAGTGAG -AGCGGCGTCCTCATAATGCCTCCAATATACTGATGTAATGTCGGAGAGTGAAGCACATTG -CGACTCTCAAAGCAATTTGAATCACAAAAGCGACAGAGCTGGATCGTGATATGATGTAGT -TGCCCAGAGTACGGGCCACTTGGCTTGACGGAGCTTCATTTTGCCGAGAGCCAATCACTT -GTAAATACCGATGCACCTACAAACGATATACCCATGCTTTCAACATCTGTTGAACATCAA -CTGGGTATATTTTCTGTATTTTTTGGTGCATAATGTTCTAAGATCTTTGTTGTCTGTCTC -CATCTACGAGTGCTTCAATTCGTACATAATACTTGGACAGGCAGGCATCTGGACCTCTTT -CAAACAACCACTAAATCCTAGGAATCTACATGAGGCATTCTTGTCTTCTTCTTTACAAAC -AGAAAAAGGAGTTGGAAATATCATAAACATTGTTGGAATCTACTCGTCCCCTCAGGAGGG -TGTGTACAAAACAGAAAGCAGCAGTCAAAATACATCCCCAGTGCGCTTTCCATGCAATAA -AAAGGATGTGGGCACAAATTCCCCAATGGCAACCAAAGTCCGCACTCTTGTCACAGCATC -CATGGAGTATACCCTCAGGTCCGATCAAAGCATGTGGGGGCTGTGCACAAAAGGGGAATA -GTGTGTCAGGAAAAAGCAAAGCAATCTCAGAACTATGGCGGCTGAAACCAAACGGGGGCA -CGCGTTGGTTAATTGGTTAAAAAAAAAAGACTAAACAGATACAATGAATGGGAAGCAATC -CCCGATATCGTGAAAAAAAAATTATCAATCATTTTCGAGATGTGTAGCGCGAAAAGATCT -TTTTTGTTTAGTGGTTGGGGTTCAATTCGATGATGGTACAGGTGTGGAGCACGATCCTGA -GATAGACCAGCCATCTTCGCACGATAATCACTGGCGCGGCGACGTAGGGCTATATGCCGG -AGATGTCGGGCTATATGCCGGGGAGGTAGGCGACGTCGGAGAACCAGAGAAATTGGGAGA -AGTCGGCGAGTAAGACTGGGGGCTCGTAGGAGACCAACCAGGTGAAGTTGGTGTGTACTT -GGGAGAGGTGGGAGATGTTGGAGACAGGTGCCGAGCACCGCCAATCGCAGGACTGGTGGG -ACTGTAGCTCGGCGAAGTCGGACTGAAGGCCGGCGAGGCCGGACTAAAGCTGGGGGAAGT -CGGGCTGTAGTTAGGCGACGTTGGCGAGAACCCAGGCGAAGTCGGGGAATATGAAGGGGA -GGTAGGTGACGCTTGGCCGTACGCAGGCGAGGTTGGCGAATAAGCCGGAGACGTCGGTTG -GAACGATGGGCTGGAAGGCGAGAATCCAGGAGAGGTCATCATTCGCGGGCTTGTCATGCC -AATGCCAGGAGATGTCGGTGAATAGCTCGAAGACGGAGAGTATCCAGGAGACGTCGGGCT -TGTATTGAAGGGACTTGTTGGAGAGTATCCACCGGGGCTGCGAGGCGCAGGACTGAACCC -ACCAAAGGTATTCGAGCTAGCCTGGTATTCGGTGAAACCTCCAGTAGGCTCCTGGCCAAT -CTGGTGCATGGGTGAGAACGTCGCATCAGGGTCGCTACCAGCCATGGAGGGGGAGCTGGC -CATAGGAGAACCGGTGTCGTACTGAGTAGCAGCACCATCCGAAATAATGGCATCCTTTGC -ACCGACTGCGCCCATAAGACCGTAGCGTGCATTATTGGATACGACAGTGTTGAGCAAGCT -CTGGTCCAGGTAAACGTCGAACTCACCCGTTCCGGCAGGTGCCATCTGTCCAAGGATCAA -GTTCTCTGACACACCGCGGCAATCATCAAGCTCTCCGAACGCAGCAGCCTCAAGCAGAAT -TTCAACTGTCTCTTCGAACGAACAACGCATGAGGGCACCATTGTCGGCACGATTGATACC -ATGACGAGTAACAGGAGTCAGATAACCACGAACAGTCATGACATCGACTAGCAGAGCCAA -GTGGCGGTGATTAACGTAAGAACCGTCAAATGCGAGCACCAGGGTCAATTCGCGAAGCAC -AGCTGTACGGGCAGCCTCAATACCGAAGACCTCGAAAATCTCAATAAACTGATTAGAGTA -GGTGCGCGAGGCATCGACGCCTGGAATTGAGAGAACAGCAGCCAAAGAAGATCCACTTGT -TTCCAGAACCCATTCTTTGCAAAGCGGATCAGACTTGCTGGTGTGGAGCGAACCATCTTC -GATAACTCGGACCTTGGACTTTTCGTTGATGAAAGCACGGTCAACACCGGGAACACCACG -AAGGGTCAAAGTGTCCAGCAGGTGCTGCTCAAGCTTCTTTAAGGTGAcgtcgtactcgac -atcttcgtcgtcgtcatccttgtagtcTTGGACCTGACGAATACGAATGACTTGCTCGTC -GGCATTGTTATCACTGAAAATGACTGCAATATCACGGGGATATGCCTGTTTGATCCTTGT -AGCAACGTCCTGCACCGTAAGACCCTTGTCAAGAAGAGAGCGGCGACTGAGAACAATACG -GAGCAACCACTTCGACTGCTGAGATGCGTCGTCGGCATTATCCTCAGGAATGATGAAGTA -TGATTCCACCATATCCCTATCGTTTTCGATGACGGTTGACTGGATATCGGGGTCGTAGTA -AATCTCCGTCGATTCCGTCACAGACCGCAGACTGGTATGCTCCACGAGACTTCGAAGAAG -TTTAGCGCCCTCCTTATCGTGAGTCATCATACCTTCTTGGTACACAGTCATGGATGGTGT -TTTGATGTTGGTGGCAACGTTAAGAATCTCCTTGAGACGTGGAACACCAAGCGTGACGTT -CTTGGAGGACACACCAGCAAAGTGGAAAGTGTTCAGTGTCATTTGAGTTGCAGGCTCACC -AATAGACTGAGCCGCAAGAACGCCGACCATCTCACCAGGGTTTGCTGCCGCACGGGCGAA -ACGACTTTCAACAGCACCCAACACATGCTGGAATGCTAGCTTGTTGAGCGAGTAGTCTGT -GACAAGTCGGCGGAAAGCGAGGCGACTGCGCAGTTGCGCCTTGAACAGCAAAGTCGCATT -AACCTGAGCCTCTTTCGAAATGGGATCATTACCACGGACGATCACCAAGCGATCCAAGAG -TTGCTGAACTTGAGGAATGACCTCAGCAGGGTGCAAATCACTGATTGTGCCCTCGCGAAT -ACGGAATGTAGTTCGAGCCATTTCCAGGATACGCTGGACATTAATGGGAAGCTGCATCAT -CTCCTCGTCCTCCTTCACCACCGTGCGCAGGAACTCGCGATCTTTTAGGATGGCCTCCCA -TTCTTCATCAAAGTATCTCTGTACCTCCATATCGCCGGCAATTTCGTTCGCTTGCTCCAG -AACGTCTGGGCCGAGAGTTCGCTCAGGGTCCATAACATCAACGCGGAATCTCTCGATGAG -CTTCGCATCGGAACACTTGATGTGATCGACACGCTGATTCTCAATGTGTGCACCATCGAG -ACCGTCTTCTCCGTAGATGAACTGAATGATATCTCCGAGGGAGTTGCGCACAGTACCATC -GTACTTGACCATGACCTCCTCCAGGGCCTTGACCAGCTTACGCTGGATGTAACCAGTTTC -AGCAGTTTTTACGGCAGTATCAATCAGACCTTCACGACCAGCCATGGCGTGGAAGAAGAA -CTCGGTGGGAGTCAAGCCACGGAGGTATGAATTCTCCACGAATCCACGCGACTCGGGGGA -GTAATCGTCCTTGGTGAAATGAGGGAGAGTACGATACTTGAAGCCGAATGGAATACGCTT -GCCCTCCACAGACTGTTGTCCAACAAGCGCAGTCATCTGTGAAATATTGATTGCTGAACC -CTTGGAACCAGACCGCGCCATTTGAATAGCGTTGTTCAAATCCTTGAGACTCTTTTCTGT -GGCATCACCAGCTTCGTCACGAGCATTGTTCAAAGCACGAGAGACCTTGCTCTCGAATGT -CTCACGCACATTCATACCAGGAAGGGCTTCCAGTGTGTTTTCAGTGGCACTAGCTGTAAT -CTCCTCGACTTCCTGTTTTCTGTTCCGAACGGCCTCCTCGATCTTGTTGATAGTATTTTG -GTCGGGAATGGTGTCACCAATACCGATACTGAAACCGTGGTGCAGCAACCAGTACCCAAC -AATCGTTTGGGCTCCAGTGAAGAATTTGACACAAGTGTCAGGACCGTACTCGTTGAAGAT -TGTGTGAATGACACCACCACCACTGGCACCGACTGTTTTCTTGGAGAGCATACCATACAT -CAGCTGTCCACCGTGGATCAAAAGACCTCCATCGTTGAGAGGACTGAACTTCTCGGCCAA -AGGCGAGCCGTCCTTGTCAACTCGCAGAAGGTTGAGACCTGAGGGGAACGCCATGCTGAT -AATCTGCTTTCCCGTCCAACGGGGGCGAGGCTTGATAATTGCGGGGGGTGGGATAGCACC -ATCCCAGTCAGGGACCCACAGCATGAGGTTCATAACTTGGTCCTTAGTGAGGAAGGTGTC -ACGACGGCAGATCTTGTAGATACCGCAGAGGGTATCCTGCACAATACCCATGAGAGGACC -GTTACGCTGCGGAGAAACAATGTTCTTCGGGACCAAAGTGAGCTCGGCGAGCTCAGCGCG -CGCCTCCTCGCCCTGGGGTACGTGCAAATTCATCTCATCACCATCAAAATCAGCATTGTA -GGGACTGGTAACCGACAAGTTCATTCGGAATGTGGAGAAGGGCATGACACGGACACGGTG -ACCCATCATGGACTCCTTGTGCAGAGACGGCTGACGGTTGAACAGAATATAATCGCCATT -GTCAAGATGGCGCTCAACCTTCCAGCCATACAGCAGTTGCTGAGCACCTGCGCGCTTGGC -GTGGCGAAGATCAATTCGTTCACCGTTGTCACGAACAATGTATCGCGCACCAGGGTGAAT -GTTGGGTCCATTTGCGACCAACTTCTGGAGCTTCTCAATATTGTAGGGTGTGACAACTTC -CGGGTAGGTGAGGATACGAGCGGTCGAGTACGGGACACCAACTTCGTCCAGCCTCAAATT -GGGGTCACCTGTGATAACGGTACGAGCAGAGAAGTCGACACGCTTTCCCATCAAGTTCTG -ACGCAAGCGACCCTCTTTACCCTTCAAACGACTGCGAAGAGCCTTCACGGGACGGTTTCC -CTTTTGCATTGCCGTGGGTTGACCAGCAATGTCATTGTCCATGTAAGTGGCGATGTGGTA -CTGAAGGAGAGACTCAAATTCACGAATAACGTGCTCGGGTGCTCCCTCCTGCTCACAACG -TTGAACATTTTGGTTCGCACGGACAATTTCTGCGAGCTTATAAGTCAGATCATCCTCGCC -GCGAGCGCCACTTGTGGCTCCCATTACAACACTAGGGCGAACAGTCGGGGGAGGAACAGG -CAAGACGGTAAGGATCATCCACTCCGGACGAGCATAGTCATTGCTGAGACCCATGATGCG -GACATCTTCCAGAGAAATGTTTCGGAAAATGTTCAGGGCCATTGTGGGAGTGATGACTTT -CTTCTCCGGTTGAGCCATCTCATCCTCCATCATGGCCTTGCTAGGCTTCCATGTTCCAAC -CAGACAGATGCCTTCTTTCCGAACGGTGGGTTGGGCGTTACCGCAGCCCCCATGGGAGCG -GCCCTTCTGGGAGTCCTTCGAAAATTCCTCATCATCAGCAGGCGCATCCGCTTCACATAC -CAGGACATCCTTGGAGAGTCGCCAGATAGAATCGAAACGTCGCTTGGGATCACGGAAACG -GAGGGCTTCGGTAAACTTTGAATCGGCCTGGTATCCATGTCAGCAAAGTGAACCACTCAA -GATTTGCCATCCCAATAGGGATGACTAACCGTGTTGGCCTTGATCTTTCCACAGTTGTGG -CAAACTGTCTCCAAAAGCTTCTTGATTTTAGTCAAAAAACCTAGAGGGGGGCCAGGAAGT -CAGCAGTGAAGACAAAAGGCAAGAGGAACCCGAGGCAAATATGTTACGCACCGATATGGA -ACACTGGAGACGCGAGCTCAATGTGACCAAAATGTCCAGGGCATTCTTTCGGTCCCTCTT -CGCAAGTCTCGCAGTTATATTGACGGTCAATGGTACCCAGGCGAGGGTCGTTGATGCCCT -TGGTCCGAGGCCGTTGGCGTTGATCGTCCTATTCCTCGCTTAGCCATCAAGTCGTCACCG -CGCATAGGAAAAGCAGGCCTTGTCCGAAAATGATACAGCGGGCAACAAATCAAAGATTTA -CTCACCAGCGTCTCGGGATATTCGATATGAACCACACTCATGCGCTTGATCTCTTCGGGG -GCGAAGAGACCAAACTGAATTTCCTTAATGGTGCGCAAAGGCGCCTTGGAGTAAGGGAAA -AACACGTTAGACATGATGGCAAAGTTGTGGTGACAAAAAAATAAGGATGAAAAAAGGCGG -AGATGACCGGTCACGTAGGCGTGTCGTAAAACCGGTGTAAACGACGGATTTCGGCTCGTA -AATATCGCAAAAAGAAAGTGACAGACTCGCTCGCAAGCTAGAAGTCAGATTTACACGTAA -CGGCGATGAAAAGAAGGTGATGTGGAAGCAGCTAGCCGGTTGAGGCCTTGGGCAGCCCTT -GCCACGAGGGCGCCGGCGACCTAGAGGTCCCGAACGTCACgaagaagaaaagaaggagaa -aaagagcgcagcaaagaagaTCGTCTTCGCGGTATCTTTGGAGAAATATCGCGATGGGAA -AGGCGCGATCCTCTTAGATTGCGGAGGCGGTGGGTGGATGGCGTGTCGGGGAGATGAAAG -GGTGATGTTCTGAGGTCGCCTCACTGTGATGGGGCTTGAGTTTTCCCTGCACGATGGTTG -CCGTCTAAACGGGATCTCACCGCTTTGGCAATTTGGTGGATACCTTCCCCAAAGTGCAGG -CACCGGCAGATTTTTATGTGCCGTTCACTATGGTCGATTCAGGCGGTGAGCTCTGCAGGC -ATGAATATGAGGCATCAACTTGGGTTCGTTTTAAATGGGCTTTGTCACCGCCTTGGTATC -TTCTAGATTCCAAGAATATCGACACCGGCAGAAGGAACTAAGATTATTGTCACTACGCAA -AAAGCTATCTCTGGAGTTCACACTGGATGCTTGGACACCAGTCATTATCCTGGCCTATCT -GATAGGCTGAGGATTCACTTTGAATTCAAATATCCCACCTCCATACCACTTAGCGCAATG -AATCTCTCACTCGTCGATCCATTTGTCCTGGCGCAGGAATACCCAGATACTTTGACAGAG -AAGCTGAGTATGTACTATGCCCATATTTGCTGAAAATGAAGATCACCAACCGAGCCAACG -CGACATAGGTAGCGGCCATGCGACATGTTTAAGCTTCAACCACAAAGGCGATTACCTTGC -CTCCGGACGAGTTCGTGAACCCAACACCGAAAAAAAAAAAAACACATGTCTCCATATAAT -GGTTTGAAGACTAAGATATTATCTAGGTGGATGGCACAGTGGTGATCTTCGATATTGAAA -CCAATGGCGTGGCACGCAAATTGCAAGGCCACACGCGACAGATCCAGTCTCTAAGGTACA -ATTACCCTCGACTAAACACACCGATCTACATCGAACCCCCGAAATACTGACTTTCATACT -AGCTGGTCTCGCGACGGCCGATACCTCCTCACATCTTCCCAAGACTGGAAATGCATCCTT -TGGGATCTAAAAGACGGCTCACGAATCCGCACAGTTCGATTCGAAGCGCCAGTATACATC -GCCGAGCTGCACCCATTCAACCAGTACAGCAACCACGATCTCTTCCGCTATCTGTCGAGT -CATATCAACTAATTTTTTTTTGTCCTGCAGCCTTCTTTTCGTCGCATCTCTCTTCGAGGA -CCAACCAGTACTGGTAGACGTCTCCAGCACAAAACCAATCAAACGAATCCTCCCATCTGC -CCCCCTCAGACCACAAGCTCCAAAAGACGAAGAAATTGATCCAGCCGTAGCGGCGAAACA -AGCAGCGCAAGATGCCAAGCACTCAACATGCGTAACCATCTTCACAGCCTTCGGCAACCA -CATCATCGCCGGCACCTCAAAAGGCTGGATCAACATAATTGAAACACAAACGTGTACCAC -AATTCATTCGATGCGTCTCTGCAATGGGGTAGTGATTCTCCTACGTCTAGCCAGCAACGG -GCGCGACCTCCTTGTCAACAGCTCGGACCGAGTAATCCGCACAGTGCTTATGCCAGACCT -ATCGCAGCTAGGGATCGATCTGGAACCATCCGCGATAAAATTGCATGTGGAGCATAAATT -CCAGGACGTGGTCAACCGACTCAGCTGGAACCACGTAACATTCTCATCGACCGGCGAATT -CGTCACCGCGACAACATTCATGAACCCTGATATCTATGTGTGGGAACGCAGCCACGGATC -GCTGGTCAAGATCCTGGAGGGACCGCGTGAGGAACTTGGCGTTGTGGAGTGGCACCCGAG -CAGGCCGATGGTTGTGGCCTGCGGGCTGGAGACCGGGTGCATATACACGTGGTCGATAGT -GTCGCCGCAGAAATGGTCGGCGCTGGCGCCAGATTTCGGCGAAGTTGAGGAGAATGTCGA -GTACATGGAGGCGGAGGATGAGTTCGATGTTCACCCAGCTGAGCAGGTGCATCAGCGACG -GCTAGATCAGGAGGATGAGGTGCCGGATGCTCTGACGTTTGACTCGGTCAAGGGAGATGG -GGATGCTGATGGCGTTGAGCCATTTAATCTGCCTGTCTTGCTGAATATTGAGGACAGTGA -TTCCGGTGAGGATGTTGTTGCTGTCGGGCCGGGGACTATGCGACGGAGGACGCCTGGGGC -TGGGAGGGAAGGTGTCAATGGGGATGCGGACAAGCCTGCGACTAACGGGGCTTCGAGGGG -TACAAGGAATCGGCGGCGATAGTAGGCATGCCTTTCTTTCTTTTTTTATTCTTGTGTGTT -GTACGTTTCTTTTTCTACCTGCATCTCTGTTGTTGGAGGGCTGTACGAGCATGTTTTGCA -TACGGTGGCGTTTATTTTTCAGGGGATGATTGGTCTGCAATCCGGCGTTGGTGGAAAAAG -CTTCTAGGGCTTTTTTTGATCTCATGTATTCATACAGATACAACGCTGACAGGTCGTGAT -GACCCAAGTGATCCAGGTGCTCCGAAGTTCAACAGAGTTGGATCTGTATGTGTAAGAATC -AAAAGATATTCATCGAATTGTTGACGGTAGAGGTTGCAAGAACCAATAGAGTCAATGGTT -TGTGATGGTAAACGAGGCTTACAGGTGCGATATTCCAGATACTGCCCCGTCCTAGCCATT -GCATTGATCGTGTAAGAGAGCGAAAGAAATAATTAACAATTATCAAGAGCAAAAGTTGAT -TCCAATGAGACATCTTGACTTAGCATTCAGGTAGCAAATGGAAGTGTCAGAAACTATAGG -AAGGTAACCTGGGTATATATAAGCCGAAATGATTCGGAATTCTTGAAAGCCCAAAACGCC -ACAAGATGCAAAGTCCAGCATGCAGATAGAACAAAATAAAAAGAGAAAGATAGCATAGAG -AGAGGGGGGGGGGAAAATCATGGCCCAAGAAGGGTCGTTGGCGCATACATCGTGTCAGCC -GTCAATCCCCCAAATATTTTTCAGTCATTCAACCATGTTCAGAGTCCCGTCGGCCGTTGG -GAACCTATAAGATCAACACTTTTTTGAAATAATATCTCAGTTCCCCATATATGTCTGGAT -CTTGAACAAAACAAAAAATCATAAAGCTGTAAAATCGTTGGCTCTGGAGCATGTAGCTGT -ACTTGCAGAAGCACATTCAACGCTAACGTGGCAAGTACACACTCTGGGCCAGCCCAAAAT -CTGTGAGACCCGGTCTATAATTCCAAGGTTTGAATACGATGATCAAATGGGTGTAAGGGG -CAATCAAGCGATTATTGGGATCACCTTTCAATTGAAGGAGCCACCTTTTGATCGATCGCC -ATGTGGGTATAATTCAGCTTTTATTCCTCGGGTTTTCGGGCTTGGTAAATGTGCAAAAGG -TTGTAAGTTTTGATCTTTTTGTCGAAGGCTTCCTGTTTGACGTCGAAGGCAATGCGGTCA -ATTTGCTCTCTAGACCAAGAAAGAACCCGACTGAAAGGAGCCAAACACAATGGCTGAACG -CTCTCAGATATGGCCAAATTATACCAACGGGCCACCTTCTGCTCGTGGGAATCGGGATGC -CATGGGTTCATAGGTAATCCCACAATCTGGTGGTCGATCTCGGTGAAGCCCGCTTCTTGC -AAATTCCGGATGGTGTCGGTCGAGCTGTGGGCCAGGGGCCGCATGGTAGCTTCGGTCGCA -TGCTTCAAGGTCGAGTACCAGTTAGCCATCGCGCGTCCCGGTTCGCCATCTTTGTCCTCG -ACGCGAGGCCGGAAATCGATTTCAACCTGCTCAAACCAAGCGCCCGGCCGGAGATGCTGA -AAGACACGTCGGTAGAGACTGGGCCAACTTGCGACACTACCACAGCCCATCTGCATGTGG -ATGATATCCCATGAATCCTCCCCCATGGTCCAGGGTGCCTCGAAGTCGAAGGGGGCATAG -AAATCGCAGTTCTTGGGGAAATTGGCCGGCTGTATTGGCGCAAGGTCCACCCCCACCACG -AAGGAATCGGGGTATTTGTTCGCAACTTCGATGCTCCAGATACCCGTTCCACACCCTAGG -TCAAGAATCCTGGACCCCGGGGGATGTGGTGCATAGATTAAGCCATCCTCGGCCCGTGCT -ACCGTGAACAATTTGTGAAAAATGTCGAGACGATCCTGCTCTTCCTCGTCACAGGGGAGC -GGGTAAATTCCTTTCCGATAGCCATGGTACAATCGACCATTCTCTTTGTGATGTGGATAT -CTCTCGGGTATTCTACATCGGACAGAGGTCAGTGATTTGAACTACCTGATTGAGCAAGGG -AGTGACACACTTGCGATCGGACATGATGATGGGAGATGATCGTGAGATTTGCGATGTATT -GGAGGTGTGGGATGCTTGTGTGGCTACCGATCCATTGCTCGTGAAGCGACCGTTCCTGCG -AGGATCTTTATCGACTGGATCTCGATCGATCAAGGTATGCAAGTCCATGGCCGAAGATGG -TTGTCGTGATCGGACTCGATCACTCGCCACGGAGTGCGAGTAGTATCGAGAGTGACTGTC -ACTCGTTGGGGTCGACTCGGACTTGTATCGTTTGGGTGGGGGAGGAGAAGTGATCGGAGG -TAAATGTCGCAGCTTGGTATCCCCGTTGAACATTTTCGGTAGCGAGGTTCGGTCCGACAT -GGGAAAGGAACCAGACTCTCTATACGACATGGTGATATCTGCCATCAAGTGAAAAGTCAA -GCTAGGAGACGCTCAAACGCAGGTGGAAAATCATCTAAGGAATTTCCCAAGATGGAGGAA -AGGGGGATTTTAAGGTGAAGCTATTTTTCCCCCATATAAGCCCGAGAAGAAACAAGGAGG -GGGAAGTAAAGATGTGCAGAAACACGCAGAAGTACAAACTGGGAAACGACCCGCGCTGCT -GGGTATTTAGTGTACTTTTGTTTTTAGGGATATTTCATCACATCTCTTGGTAATATGAAA -TCAGTCAAATAAATATATTGAtctatattctatattctatattcttgacattgtattctt -tatattattttctatgctatattttatatctgatttgatatcttattttatatGTAGAGA -GTCCCAAGTGGACATCGACCCACGCTCCGGGATGCGGACCGCCTGGGCCCTCAGGTTAAT -AAGCTCCTGAGACTATCCATGTATCCTCTACTACTGATACTACAGACCTTGTATACTACC -ATTTGCATCCGTTCGACTTCGTACATGTATAAAAATATCTTCACCGCAACCCAAATCTCC -GGGTTCGTGTGGGATATTCGAGCGAATCAAGTGAGATACCACATCTTGGCCTAGCTGCAT -TGACCTTTTCCTTTTTTGTGATATGTAAGAATGACCCTAACACCGGACCCATTGTGTTGG -GTCCAGTTATACGTGTCAACATTCCCTGGATTTTCCGGGATTCACTACGGAGTACTTCCA -AGGCCTCTTTCTCTTGGACTCGGGGATGACCTCGGGATCTACACGTTTTAGTAGTACGGA -GTACAGAGTCAGTAAATATGCCAAACCCCGAGATCTGTGTCTCCTTGCATCAGACATACT -TTGGACAATTGAAGATATCTGACCTCCTACGATCCGGCGCTTGCCGCGCTGAATTGCCCG -GGGTTGACCTACGTCAAGGTCGCCGAGATCTCTGTAAGTTCATGGAATTGGGTTTAGAGA -GTAATGTTGGGACTATCTTCCTTACATTGTACATTCGGTTTTTTTCCCGCTCGAACAACG -AATCAAGATTGATGATCAGGTTTGATAGTTGGCGGCGACTTAATAGGGTTCTTTTGATCT -ATATGTACTTCGTATCGAAGTACCCCGTCTGTACATACATAAACTGCAAAAATATAATTG -GGCCCCACTAGGAAATTTTCCTACGCCAATCCGAACCTCAAATGAAATAAAAAAAACTCA -TTCAAACATTAATCTCCTGACGCAGATCAACCGAAATAGAGGGTAATCGGATCTAGCACC -TTCCACTATGCACATACTGAAAGCAGTGGCATCATCAACTTTGTCCCTGCCGGCAGATAA -TTACATTTACTCCATCGTCCCTTCGGCCCCGCGGACCTTTGCCGCTATCTCATCAGATGA -TTCGCTGCGCGTATTTGACGCCGACCTCGATCGCGGGTCTGTGATCTCGAACGCTGCTCA -TAATGGTGTGACCGCCCTGCGGAGCTTCGCCATGGGCGAATCGCACCTCCTGGCTACGGG -TGGCCGTGATGGGAAGGTGAAGGTTTGGGATGCCCGGACTGAGAATGGAAGTCCAGTTGT -GGAGATGGCAACGGGTGAGTTTGTGTTGGTTTAAGCTTTTTTTTTTCTTATTTGGTGGGC -TTCATGTGTCGATTGTCTGTTAGGGTGTGTCGGGAACCTGCCGTGAAGGGTGGTTTTTGC -TGCATATCCGGCTTATTGCGGGTTTTCGGACAATTTAGCTAGACTCAAGGTCCTAGCTTG -AATTCCAGGTTGCTTTTAAAATGCATGTTGGCTAATACATTGGCTATGTGTTCAGCGAAA -CACTCACCTGTTCTGTCTGTGGCTTGTAATCCAGAGACCAACACTATTGTGGCGGGAACG -GAGCTTGTCTCTTCACAGGCGGTTGTTGCATTCTGGTAGGTTTCATCTTATACTTCGATT -TTAATTTGAACGAGCCCAGCAGTATCATTCAACATTTGTATTGGGATATTTCTTTCAGAC -AAAAAAACAACAACTATCTATTATCAAAACCCAGTCCCATTGACCAGAGAACAGGGATAT -CCGGTCCCCTCAGGCATTCCGCCTCCAATATGTGGAGAGTCACAATGACGATATCACCGA -GGTAAGTGTACCTTCTAACCCCGAACCACCCAACAGTCCCATCCAATACCCACATGCAGC -ACCAGCCAGACTAACCATGACATCGGCATCTTGCCTGTTACCGGTGGTACACAGCTCCAA -TATCACCCGACGCGCAGCAACATCTTACTCTCCGGCAGCACGGACGGACTAGTGAACATC -TACGATACGACCGTAACAGATGAAGACGAAGCTCTGGTGCAAGTGATCAACCACGGCTCC -GTACATCATGCCGGGTTCCTGTCGGAGCACAGGATCTTCGCCCTCAGTCATGACGAGCAC -TTTTCCGTGTATCCGGCCACAGACCCAGACGATGCTTCGCCGGAGCCCGAGCCCGTGCAT -TTCGGTGACGTGCGCGACCCGCTGGGCTGTGAATATGTTGCGCAGCTGTGTGTTGGTGCA -CAGGGACCGTATATCGCCGCTGGGAATAAGATGTAGGTTTCTTGATTGTCCTTATTTTTA -CGTGTTTCGGTACTAGGAAGAAGGCACGGCATATACACGTGCTGTTGTGCTTGATTCTCT -GGGGGGGGGTTTCGAGAGCTGGGCTCATGGGTATTTGGCTAATTCATGTCTTGCAGTGAC -AACCGCCTCGATCTTGTCCCACTTGTTTCCAGCCCGTCGTGGAAGCTGGACCGGGATAAT -CTGTGGCGCCTGCCCCGTGCTCATTGTGAGGAGGTTGTGCGCTCGGTGTATTTGGATGAA -CAGGTACGGATTTAGGGAATGCTTCTCGCTACATGACCTATCTAGTTCGCTGATTTCGAT -TTTTCTTTATGACTCTTGCTAATCCAAATTTCCCCTCAGAGTCAATCAGTTTTCACCTGC -GGTGAAGATGGCTTCGTGCGAGCATGGAGACCTACTGAAGGGAATGATGCTCCAGTCCAG -TCCGGGTCTGCAAAAGCCCGGCCGAAGGAGAAGAAGAAGGATAGGTTCAAGCCGTACTAG -TCTGTGTACTATGGGCGGGTGAACACATCTGTTGCCGATGGTCTGCTGTGTATCATTCGG -CATTTTCAGTGTGGGTAAAAATAGATAGTCCTGTACTTGAACCAAATGAAGTTATGTCAT -TGTTATCAATTGTCGTGGTGCGTAGGAGGAGTTGGTCGATGGATTGCAGCTATCTAAGTT -GGAAATATATTCTCAGACTATGACCTCCTAGGGCACATATAAGAGTTGAAACCACTTTTT -TATTCTGGACCGTACTAATTGAACCCAACTTACACATGTACTGTTTAAGATCAAATCATT -TCTCTAAAATACAAGAAATACACCAAAGCTGTTTGTGTAGGATTGAAGAGTAGAAAAAGA -AAGTAGTGGTAGCGTCCGGGATCGAACCAGGAGCCTTTCCGGATTGAAATACTAAGCCAA -TAGTGTAGCCTGATCCCTCAGGTGTCAAATCTTATCTCTATTATTTTTTTCCGTATCAAA -ACTTTTCTCCAGGAACAAGGGACCAACTTCCTGGAATCACATTCCCtccaattctacaat -acacatacccaaatcccagccgaaacatccaaaattcaaaaaccaaaCCTCATTGCGCAC -CCAACCCCTACACCTTCAAGGGCAAGCAAGCACATCCCACACCACAAAATGGACGCCCAA -GCCTACCTAATTAAGCACGGCTGGTCCGGCCCCGGCAACCCGCTGAACCCAAATAAACGA -CCCGGTGCACACAGTGGACTAGGCCTCACACGGCCGCTCCTGGTATCGCGCAAAGCCAAC -AACCACGGCGTGGGCAAGAAGACGACAAAAGACCCGACGAACCAATGGTGGCTGCGCGGG -TTCGAAGATGCCCTGAAGGGTGTCGGGAATGACAGTTTCGAGGCCACCTCTGCGCGTGAG -AACAATGCGCTGACAAGCGAGCTCTATCGGCACTTTGTGAGGGGAGATGGGCTGGCCGGT -ACGCTGGAGGGGTCGGAGAAGAAGAAGGATGAAAGCAGCTTTTCTATCTCCAAGTCGAAG -CGCAAGCGCGAGAATGAGGATGATGGTCTTGATCGCAAGGCTAGGAAGCTGGCAAAGGCT -GCGCGGAAGGTGGAGAAGGCGGAGAGGAAGGAGGCGAGACGTGTAAAGCGGGCTGCGAAG -GCTGAGAAGAAGGAGAAGAAAATGGCCGAGAAGCTTGCGAAGAAGGCTTTGAAGGAGAAA -AAGCGGACTGCATCCGAGGAAGATTATCCAACGCCTACATCGATTGATCAAGAATCGGAT -CAAACTGGGACGGAGACTACGGAGACAGACGAGGCTGTTCTGCGATTAGAGGAGAAGGCG -GCAAAAAAGGCAAAGAAGGAGAGCAAGAAGGCCAAGAGCACAGGAAGCGATATGGTGTAT -GAAGACCAAAAAGAGAGGTCTAAAAAGGAGAAGAAAGACAAAAAGGAAGCCAAAGCCTGA -TGTTACTCATTTCTAACCATTCTGATATTGGAGCGCTTCACATGCATGTCAGGACCATGT -ACAAAAATTATGTACAGACCTTTTTTCTCTTACGCCATTTAGAAACCAAGATACCCTTTT -TGTCTGGCGACATCATGCCTTCTGTAACCAAAGTAAATGTAAATGTCTATCTTCCTTTTT -ATGTGTTTGAGCTAAGCTACGCGGGTAATTTTCCAGGTCTGCCTGATAGAATCCACGGTC -TTTTCTCCCAGAACTCATCTTTAATGATATCGACATGATCAACCACGACTTGTGCTTTTC -GGCGCAGTTTGCGCATCTTGTCTTGCTGTGACCTTGACTGCTTAGGCTCGACCATTGGTG -ATTCTTCGCCTGCAGGTGATTTTGCAGAGTTAGGTTCCTCCAGCTCGTACTGTTGAGGGA -ATTGTCAGAACGGACCGAGACGAAAAGAACAAGATCATAGAACTTGGCGCATACCTGGCG -ATAAAGAACAGTGCCCTTTTTATAGATCTCCAGCTCGTTATTGTAATTGATTCCAAAACG -CTTGAAGAGAATCTCATTCTTGTCTGCAGACACAGTGCCCTTATATTAAGTCAATTGCCT -GTACTTCGAATTGCAAGAAGAATAACCCACCTTCAACTCTTGCTCGGCGTCTGTGTTACT -CATTCCACCCTGCAGGACCATCGTCCAGAAGGTAGTGTTGTAGAGATTGTTGATATGACC -TAGTCATCCTGAATTAGCCGGATCTGGCTTTGAATCAGGAGCATTGGCTCAAAGGCACCT -ACAATCGACTTGTCTCCAGCTCATGTAATCGCGGAGAATTCGGTTATTTGGGTAGATAAC -CGCACGCCCATCAAAAGATGGCAGATATGGTGGCAGAAGAGGTCTTTCAGGGAAGTACTC -GCTCCATTTGTAAATGTAGTGTGCTGTGAATGTAGAGACGATAGTGGTCACTAGTTTCCT -AAATGGAAGAGTTCCTGTCAACGGCCGCTCTTGCAAATCATATCGGGAGATAGAAGCTTA -TAGAAGATACTCACCCATTGCGCCGCTCAAAAAGTTGGCAGTTGGGATGGAAGACAAAAC -TGAGGGATTACTATTTGTGATTGCAAGGATGATGATGATACAAGTTGACCATACCTGAAT -TCGTCACTGACTCCATAGGCGATGCACAGGTCTGGAAGCTCTTTCATGACCTCAACAGCT -GCAGCATTCATCAGATCCAGTGCCCGGCGGTCATTGGGTTTCCTGAAGGCATAGTGATCA -GAGAGTCTATGTAAATGTCAGACTCTAGATATAGATATGGATGCATTGGACCGGAATAGC -AGTCACGGCTTGGTGAAAGCTATGCATAGAAAAACGGATATATAGCCGTCATCTTTTTTT -CTTCCCGCAGAAAAATCCCTCATGCATAAGGCGTCCTCATAAGAAAGAGACTGATACTCA -CTTGTGAAATCCTCGTCCATCAATGCGGACCACAATCCACGTATTGGGGAGCAGCACGTC -CGGCTGCTCGAAGGACTTGACGTACTCAAATCTGTGTCCGTTGAGAAGGCTGTCTGAATT -TCATTGTTTTGGATTCCTCGATTCTTACCTTGAGTTCGCCATTCGGTCTAGACTTTGATG -TCAAGTGATGAATAGCTCCGGAGAATGGAAGTGTCATTTTGTGCGATTTCTGCCCGCCCG -CATTTGCCCCAACTGTTCTTGATTTTGAGGACCTGGAAGTGATTTGGTGGACAAAAAACA -CCTAAGATTTCTGTGAAGACTACAGCCTAATTTACTATCCAAATATATCAAGGCAAGGCA -GAATCGAAAGTAAATAAAACTAGTATGGTGTTGAGATGCATATCAAAAACCCCATGGGAG -ATGCTTTAGAATCTAGGAAATACAAGGGAAAGGACAGAACCAATATGGTCTACAGATACA -TATGAGAATCATGATTTCTCAATCTCCTCAGAGATCTCTTCATCGCCTGTCTCACTGGCT -CGCTCGCTAGTCTCGCTGGCCTTGTCAGACTTGCGGGACTTGCGAGAAAAGAAACCCAGG -CTGCTGCGGGTAGAGCGGTCGGCAGATGTCACACTTGAGACGGTGCTATCGACACTCTTG -CCGAGCTGCGCTTCACTGGCCGCATCCTCGTCAATATCACCTTGCGAGGATTCGCCTTTC -TTAGAGAAAAGGCCCGAACGATCCTTCCATGAGCTGAACTTGCTGGAACTGCCCTTGCGC -GTGATCTTGCGAATGAACGACTCTTTCGAGGCATTGTCCATAGAAGTGCCGGATGGCATG -CGCTCCAACGATTCATATGAATCCCCAGTGAAAAGGGAGATGGAGCGAGATGTGCGAGAC -TCCGAGGGAGATCCATCGTACTCGTGAATTTCAAATGGGCTGTCGGGATCATTGCTCTTG -CCTGTGTCCATCTCCGGCTTCTCGTCCGTGAAAACCGCCGTGAACGAGGGAGCAGCAGGA -TTCAGCTTTGGGGTCAGTGGTCGATGAGAAGATGACGGACGAGTGCCGATCGGTGCCTGA -AGAGGGCGGTGATGGCGGTCAAATGTGTCCTCCACGAAATCAGGCTCGCCGGTTAGACCG -AGTGGAAGATGACCTGACGATCCATACTGGATTGAAGGTCGCCGAGAAGGAGCTCGAGAC -CAATTGGATGGCGCGGCCCAGTCGAATCCAAGCGGGCTGTTGCGGTGCCCAGCATTCTCA -GTTGATGGCCAACCGAAATGGCGGTTGTCTGTCGAGGGGTGTGGAAGCTGATTTGGGGTG -TCGAATGAGGTCGACGAAGGTCGAGGGGACAAAGAACCACGTCGAACGCCCCCGAGAAGC -GATGATGGATCAATAGGGTCATGTCGAGGACTGAATTGATTGTATCCACTGTTGAGATCA -GGGTTCCCACGGGTGCTAGCACCGCCGAAGCCAAACTTGCTGGATGAGAAGATATTCGAG -AAAGCAGCTCGCCGGGAGGGCGCATGATCAGAAGAGCTGTCTATGGTGACACCAGCCGTG -TTGCTTCGAGGGAACAAGGACATCGGGTTTGCCCAGTTGCCAGTATAGCTCGATCGTCGC -CGGCGGGCACCGATTGGATCCAGCTCATCAATATTGCGAGGGAAGGATTGACTCTGGCCA -GGCTTCAAACTACCCAGCATAGGAGACTCATCTGCCAGGGTTTTCCCACGAGGCCGGTGA -AAACCGAAAAGTCCGGAAAGTTTTCGCGATGCGCTTTTGGGGGCATCCCCGGCAGCAGAC -AAATGGACACCTTGGGATTCGTCCTGCCGATTTTGAGTCTCTAATGGGCTGGCAAAGATA -CTGCCAGCTCGACTGTCCGAAGAGACAGGCGACACTGGGCCGTGACCGAAGCTATCCAAC -GGAGAACTGGACGTCTCCATGGTTGCAAATCTTGGACGAGCTATAGACCGGGGGAATTCT -GGTGATTCCTCGTCACCAAGGAGATCAGATGGCAAGAGGGCATCCGCACGGGGACTCATT -TGCGGGTTATTGAAAGAAACTTCCGTATCTGAGCGCATTATCTCCGGGGGATCAGACAGG -CCTGGAAGGGCCATGCCGTTATTGATATTGAAAAACGCAGAGCTCGGGGCAAATGTGGGT -GAGTGACCGGACGGGTTGTAGTTGATGCTACTTGGAAATGTTTCGACACCCGGGAAGTTC -AGAGGGGAAGAGACATTGCTGTTGAGTGAGCTGCGATGGCGACGGGGCCGAATGGTGGCA -GTATTCGACATATCGAGATCCAAGGTCGGGAAGGCAAAGGGCCCTGAGCTGCCGGGTCGT -TGGGCTGTAAGCCACTTTAGGCGTTCTTGGGCTTCTTGATATTGTTGCTGGGCTTGCGTG -TGCAGGTTCACAAGTGTGGCATAACGTGCATGTAGGTTGCCCAGCTTGTGCTCCCACTGA -CGAGCTCGCTCGTTGTCGATTCGATCCAACTCTTTGCCATCTTCACTATCGCCTTCTTGT -AATCCCATTCTTTCCTCTTCTAGCTTCTTGACCCGCCCTCCTTTATCCTGGATCTCATCA -TCGATCGTCTTCATCTCCGCCTGCTCTTTCGCGATCTTGGCTCGGACCTCATCTGCGCGC -ACCGTCCCTTCTTCTTCAATCCTGGCTTTGTCATCCTTCGCCTGTAGAGCATCAGAGTTC -ATGCGTGAAATCTTCTCTTGCCATCGCAAGATGTCATTCTTGCGCTTCTTGCGCTCCGCC -TCTTTTTGTTGAAGAAGTCTCATGCGCTTGGACTTTTCTCCCTGTACGGTTCGGTTGACG -CTTTCTAATTTGTAGACATGTTTTTTCAAATCGCCACTTGCTTCGTCCTTCTCCCTAACG -CGCTTCCGCAATTCACTGCGTTGTTTTTCAAGGTCCTTCAGCACTGCAATATGCTCTTCC -TCCTCCTCTGCACCCTGCTTCTCCACGTTCTCGTTCTCGTGCTGCAACGACTTCAAACGC -TCTGCCAATTGTTCCAAAGTTTCATCTTTGGCAGTTTTTGTCCGGCTCCTGAATGTTTCT -TCGAAATTGCCATGTGAAACATCTGTGGTTCCGGTCGCGGGCGAAATTTTCCGCCCCGCC -GAGGGCCGTTTCCCCGGTAATTGGCCTGCACTGTGCTCGCGAGACATTATCGGGGCAGAT -GGCGTTGGAAGGCCTACTGTAGACCTGGGGATTGAGGCGCGTATAATTGGACCTCCAGCG -TTGCCATTTTGTTGGGCTTCGGACAGCGGAAGAGACTTTGTTCGGACATGGAGGATCGCA -CTAGGAGTCTGGAAGTTGGCGGCACTCACGGAAAGAACACAAATATGATATATGTTCCCA -GGTAATAGATTTGATATCTCCACCGCTGTTTCTGCTCGTTTAGATTCGCCGACTATAGAC -CGTTAGTGCTTTGCTTAATGGCAGAAGAGAACCAAGTAGCCCAAACTGTACCTTTCACAT -TATTAACCTGAATGATATGCTTATGTATTGAGTTATGGAAGTCGGGCTGTTTCCAAGCAA -GACGGATTTCACGGGCAGTGATCTCTTCGAGGGTGACCTCTGGCGGCGGAGGGATGTCGA -GCCCGAGCTTCTCCACTAATACCTCATTTGAAGTTTGACAGACTTGCCAGGCACGATAGA -GAAGCCTTCATACATTCGGTCAGCCGAGCGAAGGAAATGATGAGGTAGACGAGGGACTAA -CCAGATTAAAGCCCACAATATCGAGACCGCAAGGAAGAGGGCCATCACGCCTCGCACTCT -CACGCGAGGGACCACTGCTGGGTAAGCGTCAAATGCAAAGTATAGCGGCTCATACATCCC -AACGTCTGTAACCCATTGGACGCGGGTAGCGGGATACAGGAGAATTGCAAGGATCGCCTC -AATGCATTGGGCGGTGGGATGGGAACACGTGATCCAGAATGCTAGCCACCTAAAGCAGCT -TAGCGCCAAAATGGAGATCAGCCGCTACGGGACCTCGGCACCTGCATTCCCCAAGGCTGA -CGTAATCAGAACTGTTGGGCGTTGAATGTTGACGAAACCAACAAATGTTGGAAAGACGGA -GTAATTCGGGATCATCTAGATATATCTCCTGAATCAGGCTGACACTCCATAAACTCCTTT -CGGTCCCTGATATTGTGCTGATATTGTCCCGTGACGGTGATTATGTAGCCGCGTCCACTT -GTCTCCAAGTGTCACATGGGCCAAACGATTAACTTCCCGATTCTCCAAAAACCTGCCTCA -AGTCTTTTTTGACTCCTTCAGACCTTCTTTCTGCGCCTCTGGATTTCTAGAATTTGCTTT -GGATTGGCCTGAAACGTTATCCATAGCCGCTGTCAGAGGTGCTAGACACCACTCGGCGGG -ATGCAGCTCTATCGCTCTATAAGTGGTTCCGTCTCCAAGACATGGAATTCTATCAACCCG -GCCACCCTTAGCGGGGCTATTGATGTCATTGTTATAGAGCAAGAAGATGGTATTTATACT -TCTTCGAGATTTGGCGTTCACACAAGATACTGATCTGGCTTTTCTCTTTCAAGGTACCCT -TGCTTGTTCGCCCTTTCATGTCCGATTTGGCAAGTTTTCGCTCTTACGTCCATTCGAAAA -GAAGGTGGAGTTCAAAGTGAATGGTGTAAAGCAAGATTATGCCATGAAACTGGGAGAGGC -GGGCGAGGCATTCTTTGTCTTTGAAACGACCGATGACATCCCCGCATCCTTGCAGACATC -GCCGTTGGTATCTCCGACAGCCAGCCCGAGAGCCCAGAGCGAGCCGGATCTTCCCCCGTC -GCTGCAGGAGCCAGATTACCTTGACCTTGACAAAGGAAACGGGGATGCATTGTCAGAAGG -CGCAAAGACCCCTCCCCCGATGGCGATGCCATCGAACCAGCTACGAGCAAACACAGATTT -AGGTAGGGTCAGATTTTGAACTTTTAATTTCGATCTGTTTCCCTTGGTACTAACTATTTC -AACAGGTACGATAACACCTTTATCGCGGTCTCCCGACGAATCTGACGTGACTCGTCCACG -GCACGAGTCCTTGGGCGCTAAACCGCTGCTTGATCACACACTGTCGGACAATGTGCTTTC -GACCCGCCCTCCTATACTCTCTCTATCTAAAACCTCTGCAAATGGGAACGAAGAACCCCA -GGGTATTGAGGGCGAAACAGAGAACCGCTCTCGTAGCCCTCCTCTGTCTCCCCAGGAAGC -GGTGTCCCGTGCCATTTCGCTTTCAAAGAAGTTGTCTGGTTCCAATATCCCATCACATGT -CACCGAGACTGGAGATTTGATGCTGGATATGACAGGGTACAAAAGCAATGAAGAAGATGC -CCTTCGTGCAGAGGTTGTTGCGCGCAAGATTCTTGCGGAAGAGCTTGAAGGAAGCTATGA -TATCGGTGCGCTGATCGGAGCCGATGAGCACGGCAATCTATGGATATATAGCAGCGAGGA -GTCAAAGGAACTGGCTGATCGAAGAGCTACTCTTAATTCGATGCGGCCAAACATGGGAAT -GAACGATGATGCTCTCTCCGACCCGGGATACCACAGCGAAGGCGAACAGGCTGTCTCGGA -GCCATCATTGACTACGCGCCATCACCGAACTAAATCAGATGTCCAACCTGGTTTTCCCAC -CCCTCCTCACTCACCCCTGCATGATTATTTCCCGGTCGAAACCCGCAACTACGCTAAGAC -GCTGCGCTTGACAAGTGATCAACTTAAGGCCCTGCAACTGAAGCCTGGCGCTAACACTAT -GTCTTTCAGCGTCAACAGAGCAATTTGCACAGCAAATATGTATCTGTGGAATGGCAACAC -TCCGATCGTGATTTCTGATATTGATGGAACTATCACCAAGTCGGATGCATTAGGGCATGT -TCTTAATATGATCGGACGCGATTGGACACATGCAGGAGTCGCTAAGTTGTACACCGATAT -TGTCAACAATGGGTACAACATCATGTATCTTACCAGTCGGTCTGTCGGCCAGACAGATAC -CACACGAGCTTATCTTCATGGCATCTGCCAAGATGGATACAGATTACCAAAAGGTCCGGT -GATATGCAGCCCTGATCGCACGATGGCCGCTCTGCGGCGAGAGATCTACCTGCGAAAACC -CGAAGTTTTCAAGATGGCATGTTTGCGGGATATTCTCAACCTGTTTTGCGGTAAAGAGAA -CCCCTTCTATGCTGGCTTCGGAAATCGATTGACAGACGCATTGAGTTATCGTTCTGTGAA -CATTCCGTCTACTCGGATTTTCACAATCAATTCCAACGCTGAAGTGTCTCTGGATTTGCT -TAGTCTCAACAAATACAAGAGCAGTTATGTGACCATGCAGGAGCTGCTCGATCATTTCTT -TCCACCAACCAGCCTCCTCGTGCACGACGGTGGAGAGGAGTATACGGACTTCACATACTG -GCGAGACACACCGCATGAGCTCGAAGACTTCTCCACAACAGACAGCgaggatgaagatga -agacgaagacggcgaagatgccgatgaagatctggccgaagaagacgaagaagacgaaga -tgaagaatacgatgaagagaacgatgaagaGATTAGCGAGGGTGAAGGCAGCGAGTATCT -TGACGAGGTAGAGTTGGCCGAGGAAGATCTCGGGGCAAGCTACATGTCGCAGGACTCGAT -TGCCTTGTCTAACCCAGCAGGCTCGATAATCGAGTCGGTGGAAGGGGATCTTGTTGTCGA -AGAGGGGGCTCTCGGCGAAGAGGAACTTTCTCTCATCGCCGAGAAACCACAAGAATCAGC -CGAACCAAGCACAACCTCTCTTCCCATCCGGTCCAAAATGCCCCAGAATCTTTGAACATA -CACTCACTCATCGCTATCCCTCTTTGATAGGGAATCGATCCCTCGGCATTTCTTGCGTCA -TAGATACCCATCTTGCGAGTGATTTACTGAGTCAGATTCTGCGTTAGACTGCTCTGTCAG -CGATACATTCACGTTCAACACTATTGCATGCAGTCCTGGAGTTGGAGACTGCATTATACA -CGGCCATTTGTGCCCAACTCTTTCTAATTCATGGCATTCCTGGTGTTCATGGTGTTTCTG -GTTTTGTTTTCGTACATTCATGGCGCATATATTTTGAGGATCGAACAGGGTCATGAGATG -AATAGGTAATGAATGAACCAAGTGAATAGACAGGCGCAAACACATTGAATGAGTCTCTAA -AATATCTGGGAATACTGTAGAATACAATGTAACAAGGTCCAGCGCATGACTTCCGTGGAC -CTGCAGGCACCGCGATTACCTAATCGTGATATTGGCCGAAGCCCTTGGAGCTCATGCTTC -TTTTCGGGCTCATTTAAACATCACTTCCTCTCTCTACTCTCACACCGAACGCTATTAGAA -TATTTGGCTGTTTTTAAGCTTCTGAGCACTGTGCTGGGAATGCCCCCCTGATACCTTTTC -TAGGACTTCTAATCATCACTTAATTCAACATCTTGACCACTTTCGCAACACCGCAATTCG -CTCTTTGCCTCTTGCTTGATTCTTGATCAGCAATTCAGGACAAAATGGCATCCCCAGTCC -AAACACAGGGTCCAGCACCTACGGCACAGCCTCTCACCCCGCCCGCCGAATCCTCCGCCG -CGAACGGGGCCTCCGCGCCCGCTGCTACAAACTCGCAGACCCAACCCCAGGTGCCAGCAC -CAGGCCCTTCAACAACAGTGCCACAACCGCCGCCAGTGCCCGGCATATCAACCCAAGACA -GCGGGAAAACGCGACGACCACGGGATGTGCGGCTGGTCCACATGCTCCTCGCCTCACTCG -GGGTAACCTCCTACCAAGACCGGGTTCCTCTCCAATTGCTGGACTTCGCCTACCGCTATA -CAGCGAATGTGCTACAAGACTCCGTGCACCTCGCGACAGAAGGATACGCCGCGGCAACAG -ACGGTACACCGGGCGCAAAGGGCTCTGGCGAAGTGAATAGTGTTTCTCTGCCCGCATTAC -GGTTGGCCATTGCCTCCCGCTTGCACTTCCAGTTCCAGACAGGGCTGCCGAAAGAGTTTC -TTATGGAGGTCGCGTCAGAGAGGAACCGTATCGCTTTGCCCGGTGTGTCGCGGGGCTTTG -ACCCTGCTGCTGGCGCTAATGGCGCTGTTCCTGCGGCGAATCAGAGTGTTGTTATTGGTG -GTATGCGCTTACCTCCTGAGCGCTTCTGCCTTACTGGAATGGGGTGGAATATGAAGGATG -AGTGGGATAGTGAGGGCGAGGAAGACGATATGCCTGATGTGATGCAGCAGCCTGGGGCCG -GGAAGGGTGCTGGTGCCGACGGGACTGCAGAAAATGAAGACGAGGATGAAAATGATGGCA -AGATGGAGGATATCTTCGGTGAAGATACTACTATGGGCGAGGCGGGCGATGGAGACGAGG -ACCGGAACATGACTGATGTTTAAGTGAAGTACATTTACGTCCAGAGGTTCTTCTGCCTTT -GGCCTTTGGTTGGTGGTGTTTCTGGCGCTTGGTGTTGTCGTTCTCCTCTTTCTCTCTAAT -CTGTCATGGGAAGGGGTCCTGGTCTTTACTACCATGAAAAGCATCTATTTCATGACTTGT -GTCTACAAATTGGGGTCAAAGTCCCATCGCCTGATAATAAAGGCAAATAAAACGATAAAG -AAGGCATCCTTCTGGTCAATTAAACCAAGATATGCAATCGTCTGAGTGTATATTGTACAT -AATCGAGTTCATCCCATCGCTCCGCATTCGGGATGATCATGCAATGTAATAAAAAGATGC -CCAGCAAAACGCCTGCATTGTTTGCCTTTGTGCGCCATGAGGAGAGAGAGAAAGAGAGAG -CAAGAAAGAGGGAAATCGATATCAAGTCTGAGTCCGGGTTGGCCAAGAAAGTCCAGGGCC -AATCTACCCGGGAGGATCCAGTTTGATATCTGCCCGACAAACCAAGAGCCGGAAGGAGGC -CAGGCTAGACCGCCGGGGCTGAAAGATATGGCCGTAATACTGTCGGACATACAAGGCCCA -GCCAATAAGGTAGTCAGGTCAGAAGAGTCCAACAGCTTCCTTCCCCAAACACTTAGCCCA -AAGGCGAAAGTCCTCGCCCTCCGACCTTGGAGAAACGCAAGAAGCAGAATACTCAAGACT -CCGAAACCAATATAGACAAGAAATACTCACACATAAAATGAAGATAGCGAGAGAGGGAGA -GATATGTGGAAGAGAAAGTCTCCAATCATTTTCCAGCCGCTCGAGTCCGCGCTTCATCCT -CACGCATCATGCGAATAACTTCCGCACGCCGCTTCTCAACACGTGCAAGATCCTCCTCGC -GCACGCGTCGCCGTTCCTCATACCCTGCAAACCACTCCTCGCGTGCGCGGTCCTCTTCTT -CCGGCTTCGCGTGGGCGAGCATGCATGAATTCATTGCGAGACGTTGCTGGCGGCAGACCC -ACGTTGCAGTGACAGTACGGTTAACGGCGCACTCGGCGAATGCTATGTCATTATTGGGTT -AGTAGGTTGTTTCAAGCATCTTTGAGGTGTGGGGAATGGATTATGGGCATAGAGCCAGAC -TTGGTCCTCTGTTTGGAACACACAACCAGCTTTTGTAGATTGAAAGCTGTGCAGATGACC -CGATTGGCGTAGAATAAGTATAAGTTGCCTACCTTTAATCTCTGGGGCGCAATGGGCGCG -TACTCGCTTGTAGTATAGCAGTTTCACTTCTTGTTCTTGGGGGGCCGAGAGCGGAAGCGG -GTTGCGAAGATTATATTGTTTGGCTGTACTCTTAGGTACATCTGTTGAAGATGTATCGGG -AGATGTGGAAGATGTAGCCATTGAGACTGTAAGTCGGTCTCAAAGATCGAAGTTGGTCAG -GTTGAATGTATGGGATGGTTCGATCGTAGGGTTTGCTTTAGAGGAGGATTGGTGCCGGTC -GTTGAGGGACAAGATGTAAATATGGGCTGTTCCAAAGCTCAGGAAAGCAAGCCGATAGAG -GTAAGAATAATTCCAAGAGGGTTCAACTGAATGGCTTTCCAGAGAAGATCGAGACCAAGT -GGGAGTGTTGATTGCGAGTGAGCCGAGCTCAGGCGGAGAGAATGTTACTATCGGCAGAAT -CAGAAAAAAAAAAGTTCTCCGACTTTGCACCTCCAGACTACACTCTTCTGTTGGAGACTT -CTACGTTTTTCTTTTGGTTCTCTAATTTTTTTCTTCCGTCTATATCTTCTCATATTAGAA -TTAGTGTCTTGGGAATTAGCAATACCAAAACTCAGCATGTCATCCATGCGCAACGCCGTC -CAACGGCGGAACCACAAAGAACGTGGCCAAGTCCAGGGCCGCGAGAAATGGGGTCTCCTC -GAGAAGCACAAGGTCAGTCCCTGTGGAGGGAATTTTGGAATATTGAGCAATAATCCAAGC -TTTTTTCTATGCCCAATCTCGAGACAAGTGTGCTAATTTCAAAACAGGATTACTCGCTCC -GTGCCAAAGATTATAATGCCAAGAAGGCTAAGCTGAAGCGCCTCGAAGAGAAGGTGCGGG -ATCGCAACCCCGATGAGTTCGCCTTCGGTATGATGGGAGACAAGAACCGGACGCAGGGTC -GGCACGGACGAGGCAAAGGCACAGCACGAGACTCTGCCACAGCGCAGGGACTGAGCCACG -AGGCAATCAAGCTTCTCAAGACACAAGATAAAGGCTATCTTCGGACAGTTGGCGAGCGGA -TACGCCGCGAGATTGAGCGCCTGGAGCGCGAAGTGGAATTGCAAGATGGCATGAACAAGG -CTCTTGGCAAAAAGGATGGAAAAAATGCGGAGAGCGATGACGAGGATGACGGATTCGACG -ACGATTCCGACTTCGATTTCGGAGCTCCGGTGCAACCGAAACCAACCAGGATGGTGTTTG -CGGATGATCGCCAAGACCAGTTGGCAATGAAGAAGCAGAGGATACAAAAAGAAGAGCCTG -CTCCAGATTCAGAGGAAGAAGATCAGACGTCACGCACGACCGGCAAGACACCTAAGCAGC -TTCAGGCCGAGCGGCAAGCACTTGTGGATGCTCGCCGAGCTCGCAAGCTCCGGAAACGTA -CCATTGAGGCCCGCAATAACAAGCTGACTGCATTGCACAAGCAACACGCCGAAATCCGGA -CCGCAGAGCAAGAGCTTGACTGGCAGCGGGCCAAGATGGAGAATTCCGTGGGGGGCACCA -ACAAGGATGGAATCAAGTGGAAGATCCGTGAGCGCAAGCGCTAAGTGTTTTTTGTTTCGA -CGGCGTTAATGAGTGATGACTCGCTATCACCTTTTTTGTACATATTGATACCAACCACCA -TAGAGGCAGCCCGGCGCATACAGTAGCACCTAAGGAACCCACTTTAGTTCTGGCCATCGA -CCCCTCGAAAAAATCGGCAACATCAAGAGCTTTGCGTCATGTGATAGCTGGAAGACTGAT -TGATCTACCATGAAATTTGACCTTTGACCACCCAAAATACTGCGACGCCACCCCTGCACT -TGGATGTGGTTGAAATGATATCAAGGCTGGCCTCTCTTTTGTGATAAATACTATACACAA -TCCACAAACGGACCGATTCGTTCTGTACCCACAAACCCACAGCGTACAGAAATCCCTGGC -ATAAACGATATAGCTATGTCATTGCGGCCTAACCCAGCCGATGTAAAGCATGTTGCAATC -GTTTCTTGAGGTCGTTGCGGGCTTTGCATAACATCATATCGCCACCTGTTTCAATTCCAT -CCATTTTCAGCATCCCTATAAGCATCGTCTCGCTTAGTTCCTTGTGCTCGAGATCACGCT -TCTTAATATCGGCTGGTGGGTGGGCGGCGTGATCGTCACAAAGTGGGATGATATCATGCT -CGGACCAGACAGTCAAGGAGACAGTCAGGGCAAATGCTTGCTCCACCCCTGTGCGAAGTG -TGTTTATTTTCGGTAGAAACCGAAGTGGGTATGAAGTGGCCGGTGTATGTCGTGATGCCC -GAGAGGGCGATGGTGCGGGTTCAGCTCCATCATGAAGACCAGGAACATGTCCAGTCTGCT -TGTTTTTATTGACTATAATCTTGGATTGAGTCTGAGGCGGACGGCTTTTATAGTCTCTTT -CATAGTCCGAAGAGTCCCTCGGTGTACTCGGGTTTGTTTCGAAGATACATAGCATTTCTG -AGTGTAGCTTGATCCCATTTGGTTTGCACATTTGATTGTCAGCTTTTAGCAGCGCTCCCG -TGTACAGTGACCCAACATGGACCCCACCCCCCTTGATTTGTGGCGAGTGATCATTTGGCG -AGCGGCAGCCAACACAGTGCTTTCCCATTCGGCTTGCGTGTAAAACCCTCCAATCATTCC -CCATGCAGTCTATGTCACATCTCCTTTTCATCCTCGGTAGATTGATGGTTTGGTTCTGAA -GCCATACGTCCTTTGGAAGGTAGGGAGTGGATTGGATGCCTGGAGCCTATGCGGGGACAA -TGCAACATCTCCGCTCATTCAGAGCGAGACTCTGTTTCCCTTGGGCCTCGCTCTGGTCGA -ACTATCTTTATGCCAGATACTAGAGGCATTACGTATACTGGCAGATGATGATCGTGGCAT -GTGTTTAATCTAAAGACTGCAACTCGGCTCTTACAATTTTGGAGGAGCAAAGCAGTGGAG -AATATGAAAAGGTAACGGGGTGATGCTTCCTTTAGACTGGAACCAAAGAGCCTATATTGG -AAAGTGAGCAGATGCAAGGTTAGATATCTCAACTCATCGTCTCTCCATTAGTTGACAACC -TGAGGAGTTTTGAGGGTAGATCTTGATTTTTCAATATACAACAAAGTCAAAGAAGCCATC -CTAAACAGCTGCATAATATATTTACTAATACAACGATGTCTTTCCTTGCACTTCTGATTC -GGGGGTGAATGAACGTTCCACTGTCTACAAAACTGTCCACCTCACACATGAGGTAAAGTT -AGGGCTTCTTTCCGCCCTATTGCGGTTACGAACATATCcattcagtatcattcggtatca -ttcggtatcatccggtatcattcggCATGTTGGGTAATTTAACAGAGTCCTAAATCGAAC -ACGAGAGGGTATCAATGGCGGAAAAGACAAGAGGAAATCAAAGCTAGATCAAGCCCAATA -TCCCCCGGTGTAAGTGTAGGCGGGTGCGATTGCCTACTTGGGAAGAAGGGTGGGAAAAGG -GCCGATGGTAAGAGTCAAAGTGTGCGATTCACACGGAGTAAGATCAAACTCGATATATTA -ATACTGCTTATGTCTCTGGCATCAGTAGATGTACTTACGAATCTACAGGTACCTACCTAA -CTACCTAGGTAGCTCTGGAGGTATAAGAGTCTCAGTAATCACACCATCGCTGATAGGACA -ATTTCCCCAGACCTAATCGAGGCTTAATAATCATTTGCCACCCCTCCAAAAGGGAAGGtt -atcttctcttttcttctttcttttcGTACCTCACACATAACACAATGGCTCCTTCATTTA -TGACCATTGACACGAACGATATGGACGTGGATTTCTCCGTCCAGGCCAAGCGCACCCTCC -TCCTAGCACCCCCTTCCATCGCCACCCAAGAGGACAAACTCCGCGGCCTTTTCAGTACCT -TTGACCGCTCAACCACAGACCTACAAATGCTCGACCGGCTCTCCTCAAGAGTCGTCTCCC -TTCCAGCGGCAACTTACGATCTCGTACTCATCCTCACCGACACAGACGGCACACGGCGCT -CCGAGGCGCTGCAACTCCTCACCCGAGATGTATACGCCACTCTAGTCCCCGCCATGAAAC -CGGGTGCCAAGCTGCAAACCCAGGACTCAGCTCTAAATGCCTCTGATGCCATGGAAGCCA -TTCTGGCCGGCCTTGTGCAGTCCGATAACGGCTTCGAGAAACCAAATTTTGATCCCTCGG -CCGCTGTTCCCTTGAAGTTCGGCCTGAAGAAGAAGAATAAGTCTACCGCGCCAGCCACAG -TCCCCAGTATTCCCAAGGGATTCGAAGCACCGATGGGGATCGATGCGCCAACTAATCACG -ACCGCGACGATGATGATGAGCTCATTAATGAAGATACGCTTCTCTCCGAGGAGGATTTAA -CAAGACCTATAATGCCGCGTATGTCACCACCTCTATTTGAAATATCCTCCTTTCCACAAG -ATATACATTTTATAACACTATCTTCAGCCCCAGAGTGCCAACCAAAGACTGGTCGCCGGC -GACGCGCCTGCAAGGATTGCACATGTGGTCTAGCCGACAAACTCGAAGCAGAGGATAAGG -AGCGCCGTGCAAACGCAGATAAAGAATTGAACGTGATGAAGCTGGATACGGGGGACTTAG -CCGAGCTGGACTTCACCGTTGAAGGCAAGACTGGTTCTTGTGGAAGCTGTGCTCTGGGTG -ATGCCTTCCGCTGTGACGGTTGTCCTTACATGGGTCTTCCTGCCTTTAAGCCAGGTCAGG -AGGTTCAGATTCTGAATGATGTTGCGCAGCTTTGAGATACAAATACTTAACGACTGCTAC -TTCAATTTTTCATGACGGAAGTGGGGGGGTTATTGGGTCTTTGCATTGGGGTTTTCTGGG -CTTTTGGGATTTATTTCTGGGGGTTTCATACAAAATTATGCTAGGGGCGTCTGGCTCTAA -TTAATTAACTCGATTTACCTTGATTGTAAACTGAAATTTCGAGTGCAGCCCAACGTCATT -GAGCTTCGGATCAGATAAGTAGGAGGTTGATCTGCCGGAATCGATTTACTTTGCTTTCGA -AATATTCAACTGCACATATTCTATTGGTACCAAGTTGCTTTTATAAGTAGTAGTTATCAT -GGCACGTCCCTTCATAAAAAAAAAACCGAAAAGAGATGAGTATATGGTCTACTTGACCTA -TGACTGGTTCTTCTTCATGTGAGCAACACGAATGCTCACTGCACGCTTGTGTGCCTCCAG -GCCTTCCACAGCAGCGAGGGTCTCAACCGTCTTGGCGAGACCGAGAAGGCCATCGGCGGT -CAGGTTCGAGCTGGTGATGTGCTTCAGGAAGGAGCCGAGGTTGACACCCGAATACTGCTT -GGCATAGCCGTAGGTAGCTGGAAAGGAGGATGTTAGCAAGATACAACGAGGTATATGAGG -GAAGATACGTACGCAGGGAGTGGTTGACTCCGGCGGAGTAATCGCCAACACTCTCGGGGG -TCCAGGCACCGATGAAGACACTGCCAGCGTTCTGGACGTCTTTGACAAGAGACTCGGCGT -TCTGGACCTGCAGGATCAGGTGCTCGGGCGCGTAGTCGTTGCTGAGGGCCATGGCCTCTT -TGATGTCGCGCACAACGAAGGTGACGGAGTGCGCGAGGGAGCCGCGGACGATATCCATGC -GTGGCAGGGCTTTGGCCTGGGCATCGACCTCGTCTTCGATAGCACCTAGCTCGGCCTCGG -TCAGGTCGACGGCGATAAGGATTACTTGTGAGTCGACCCCGTGTTCAGCCTGGCTGAGAA -GGTCGGATGCAACAAAGGCCGGGTTGGCGGTCTTGTCGGCGATCACAAGGACTTCACTAG -GACCGGCGGGCATGTCGATGCTGACGCCTGCGGAGGTATCGTTGGACACAAACATCTTTG -CGGCCGTCACGAACTGGTTGCCTGGACCAAGGATCTTGTCGACCTTGGTGATACTTTCGG -TTCCGTAGGCCATGGCGGCCACAGCTTGGGCACCACCGGCCAAGACAATGCTCTCAGCAC -CGACCTTGTGCGCGACATAGACAATTTCAGGAGAGATGCTGCCATCGGAGCGGGGAGGCG -AGGCCAGGACGATCTTCTTGCAGCCTGCCACCATGGCGGGAACGCCCAGCATCATGGCGG -TGGAGGGCAGCACAGCGGTGCCACCCGGGATGTACAAACCGACACGCTCGATGGGGCGCG -AGAAGCGAGAGCAGACCACACCGGGCATGGTCTCCATCTCGAGGCTTTCGTTGCTTCCCA -TCTGTGCGGAGTGGAAGCGATCGATATTGCCGATGCTGATATCAATTGCCTCCTGGACAT -CAGGGGACAGCTTCATAAGTTCAGCGGGGAACGGCGCTTGGATGACGGGGGAGGTGAGTG -ATGTAGCATTCTCGAACTTGTGGGTGTATTTGAGCACTGCGGCATCGCCGCCATCACGGA -CATCCTGGATGATTGGCTTCACCAGATTGACAATGGCTTCGTTGGACTTCTGGGAAGGAC -GCTTAAGGTAGTCGGCGACCACCTGGGGAGTAGTAGAGGCAGTGATGACACGCTTCATCT -CAATTCGGGATGTGCGATCCTCGACCGGGGCGGGCGCGGGAACAGGCACAGGCTTTGACT -CGGGCTTGGCTAGGCCAGCTTTCTCAGCCCAAGGTCCCTTGGCGTCACCCTTTCTCCGCT -TCACCTTCAAGCTCTTGAGGTCAAGGTTCCGCTCGATGTCCTCCAGGCTGACACCGGCAG -CAGTGCATTTGGTCAGGGCAAAGTACAACAGGTCGGCAGCCTCGAAAGCGATTTCCTCCT -TAGTGTCCGCACGACACAACTCATCAGCCTCCTCCATGATCTTAGCATCGATGAGCTTAG -GCTCATTGAACAAACGCGCTGTGTATGATCCTGCAGGAGCATCCGCCTTGCGGGCGTCTA -AAGTCTTCTGGAGACGAGACAGACCAGTAGATGCGCCGAAGCAGGTTTCAGTACCAAGGT -GGCAGAATCCTAATGAGTCATCGTGGTCAGTATACAGACATCCTCCATCAATGTAAATAT -AGTTGGTTCATACCACGTCCGATTTGCTTGACAACAAAGACTAAACAGTCCGAATCACAA -TCGAAGCCAATACGGATCAATTTTTGCACATCCCCGCTCGACTGGCCTTTGTACCACAGA -CCCCGCTTCCGGCTCTGATAAACACCGGTGCCAGTACGCAAGGCCTCGACAATGCTCTCG -TCACTGCTCCAAACAAAGCCCAGACAAGATCCCCGCTCATCGGTGACAGAAGTGGCATAC -AGGCCATTGGCTTGGTCAGTAACAGCGGCCGATGCAATCAGCTTAGCGGCAGAGATTTGG -CCAGCTACATCCCGCTCCACGGTCAATGCCTGAGAGGGAATGATAGCAATCGCACCCTGC -TTTGTGACCTGTGCAATGGCATCCTCCGATACTGTCTGAGAATCAAAGGTGGTGAAGAGC -CGTGGGGAGTCCGAGCTAATCTTCAGTTTCTCGGCGGCGACGGGGACGGTAGAAGGTGCA -GTGCAGACACTAGCTTCGCTGCGCTCGGCGTTGACGGCAATCCATTGCTGGAAAGTATCC -AATTGACAGTCTGACAGGGCGTTGACGAGCAAACGGCAGGAGGGCACGGATTGTTCCTCG -GAGAGGGTGGTCAATTGGTCCAAGTTGATCAAGATCTTGGCTGCACCAGCGTTGAGGATA -TCGACGAGATCGCCAGCAGAGGAGATCTCAGTTGCATCGACAAATACATCGAGGGTACGA -AAGTTCTGGCGCAGGAATTGCTCTGCCTGGGCGAGACTTGAAACCTTGATCAGTACACGG -CCGAAGTAGGCGATTTGCTTGAGCGATAGTCCGGAGCCGGAGGTGGGGGAGGAGGGGTGG -GAGATGAGGAACGGTGTCGCCATGATGGNNNNNNNNNNNNNNNNNNNNNGGGGGACAGAG -GGAGCAGGGGGAGCAGTGGTGGTCAATGTTGGATCGTAGTATATAATGATTGAACAGGAG -AGTTATATCCCCTAAGACCCTTTTGAAGGTGAAATTGTGGAGGAGAGCGATGACTCCCCC -TTGCAACCTTTTTTTCCCGGGTTCGGCCCGCACGCTGAATCATCCGACTAGATCCCTGGC -TTTGTGCCCTCTGGGATATCTGATGTGATTCTACGTTGTCTATAAAATTGCATTATATCA -TCTGCATTTCTGATTTCTACATTTTTGCATTTTTGCATTTTCGTACTCGATATCAGATGC -TTCATTTCCCAGGTACCTAGACCTTCTATGCCCGTCTTAACTATGAACCGCCTCGACAAT -TTTATAGATCTTACTCTCGCAGTCCTGGAATCCTTAGAGGACTTGACCTGCCTTCGCTCA -GAACAAGGTCTGTCCCTTGAGCAAAAGTGGCATTGCCAACTGTCACCTCATACTGCACCA -GTACTGGTGTAGTGGTTGAGGGATGTTCAAACAGTCCACAAGGACCCTCATGCTGTTCCC -GCAGCAAGAGGAATGGTACTTGCTGCGAGATGCAGGGATCCCGAAAGGTGGTCTGTACAT -GTGCGATGGTGTTGACTGTATCCGAGTATCTTTGGTAAGTCGACTGTTTCAACTTGATCA -TGCCATTGCCACATGACTCATTGAACCACTTGATCGGTGCTTGGGATCGCTAGTTGACCC -ATAATTCGGGTCATCCTGGAACTCGGACGTGGAGAGAAGGCAACTGAGTTGAACATGATG -TACATTGTAGACATCCACATATGCATTGGTTCTGAGTATAGGCTGGAACGAGGCAACTTA -GGGAAGGCGTTACGTAGATATCAAGGGGGAACTGCAAGCTGGAGTTTTGTACAACATACT -GTGGAATATGGGATCTGAAGCCACAAGTTGCTTGAATGCCCAGAAACCATGCTAATGTTA -ATGTTCTCGTGTCGCGTATGCAACGCCCCGATCGAAAACCATAGGTGCACGGATGCCGAC -CAATGCCCCTAGAAGTATTTCAAGGGCCTGAGCAGTGAGAGAAACACAAGCTGGTCAACT -CCCCAAAGAACCAACCATTTCCCAAATGCCATTCCCACCCACCCCCACGGGCGAGAGCAG -CTTCCACTCTGGCAATTGCCATTGCGGTGCCGTGCGGTTCAACTTCACGTTATCCCCGCC -CCTACATGAATATCCCACCAATAGCTGCAACTGTTCCATCTGCACCAGAAACGGCTATTT -GCTCGTCTATCCATTCAACAAGGATTTTGTCATCGAAAAGGGCGATACACTCAAGGATTA -TCTCTTTGCCGGACACAAGTCGAAGCACCAATTCTGCGGGGTGTGCGGAAGTAGCTGCTT -CATCCGTCTTCTGGAAGAGGGAGCACCACCAATCTCTGCCGTCAATGTAAGTCCAATGAC -CAATATGATGCCCCAGCTACTGATCTGATTAGGTACGACTCCTCCAGGACATTGATATTG -AGAACCTGCATCTCAACAAGGTTGATGGAAGGTCGGTCGGGCCAGCATACACGCTTTGAA -GCAATCCTTTCCCACGTAGTTATATCATTTTCAAGGTCATAAACGATGACTCATAGCACT -ATCTACTCTGTACTGGGAGTTTAGAACCGGTGAATAATGTAGTATATGTTCTATCGCCGT -AAATGGTTCGGAAGTGATATATGACGGGTGTATGGTACATATTCAAATCCCATTATGTTC -TCCACCGGTATTCCGTACTCCGTACTTGGAGCCCAGCGGCTGGAGTTCAGATCCACAAGT -CATGGATCGCCATGAATATTCCAAGTCCAGCTCACAATGAAGATATGTTGGTCAGACACT -TCAGTCCGGTGACAAAAGTCCATCACATTGAGGGGTACGTTGCATGTCAAAGCACCCAGC -GTGACAGCCACGGTGCTTGGTAGCCACCGCTGTTGTATATCTTGCCCCGGACAAGAGCCG -AAGACGTATTTTCCTGTCAAGTGGTCGCAGCCGTGTATGGTTTGGAGACAGTTCGAGTTG -AAGGTCTACAACatatgtagaatatgagatgtagatatagatatatatatatatatatac -atataGAATACTCCGTATATTATTGATCTCAAGTTTCAACCATGTGAAACCCGAGAAAGA -TGTGCATTGTCGGATGCACGCCAATAGTGGCGCCCTTGCAGATCCCCCTGAATCTTTGTC -CGAGACCttttttcttttttttttttttGGCATCTACCGCACGCGATTGAATCGGAATGC -AAATGACATGGGAGAAAATAAACAAGTCGGCCCCCGAGGTCGCGTGTTCACCATGCCTTC -CTCTTTCGTTCTTGCAACCTCATTTAATTTCATATATAATTGCATTTGACCTGCCCTGCA -GTTCAGGCCTCTCGTTCTGCAATTGAAATTATTCCTACCTTTGCATCCTGCCTAAATAAA -CCTATATATCTATACAACCCACATTCATCGCGTAATCATGCGCATCTCCTCCCTGTCACT -CCCTTTGGCAGTGTTGAGCCTCGTTGGGCCTGCTACCGCCTATCCCTTCCCGGAACCAGA -GGTGGTCTTGCGCCGCGACTCGGACAAACAGGCCAAGGCCGATGCTGTCAAGGAAGCCTT -CCAACATGCATGGAATGGATATGTCGAATATGCGTTCCCTCATGACGAGTTGCATCCGGT -GAGCAATGGCTATGGTGACTCCAGGTGAGCATGTTTGGTTATACCGTTGTGGCTGAGAAT -TGACACGCGATAGGAATGGATGGGGAGCTTCCGCCGTCGACGCGCTCTCGACCGCCATTA -TCATGGGTAACCGGGATGCTGTCCAAAAGATCCTGGACCACATTGAAACGATTGACTTCT -CCAAGACCAAGGACGAGGTCAGCCTGTTTGAGACTACTATCCGATACCTGGGGGGCATGT -TGTCGGGCTATGATTTGCTCAAGGACTCTGCAGCGAACCTGGGCCTCAATTCCACTCAAG -TTGATACCCTGCTTATCCAGTCGAAGAAACTCGGCGATGTCTTGAAATTCGCCTTTGATA -CACCGTCCGGGGTTCCCTACAATAACCTGGACATCGCATCCAAAGGAAATGATGGCTCGA -GAACCAATGGCCTTGCAGTAACCGGAACTCTGGTGCTGGAGTGGACCCGACTGTCCGATT -TGACGGGCGATGAGGAGTATGCTCAGCTAAGCCAGAAGGCCCAGTCGTACCTCTTGAATC -CTCAGCCGTCAAGCGGAGAGCCATTCCCCGGCCTGGTGGGAAGCAACATCAACATCTCCA -ATGGTTATTTCACCGACAGTGCTGTCTCGTGGAACGGCGGCGATGACTCGTTCTACGAGT -ACCTGATTAAAATGTATGTGTACGACCCCAAGCGCTTCAAGTCATACAAGGACCGCTGGG -TCCTCGCAGCCGAATCAACAATCAAACACCTTCAGTCGCATCCCGCCCCTCGCCCGGAAA -TAACCTGGTTGGCCTCATACAACAACGGCCAATATGATCTTAGCTCACAGCACCTGACCT -GCTTCGACGGCGGCAGTTTCATCCTGGGCGGTACCGTGCTGGCCCGACAAGACTTTATCG -ATTTCGGCTTGCAGCTTGTGGATGGCTGTGAGGCTACTTACAATCAGACCCTGACAGGCA -TTGGACCAGACTCCTGGGGCTGGGATCCGAAGAAAGTGCCATCCGACCAAAAGGACTTCT -ACGAGAAGGCCGGATTTTATATCAATAGCGGTGCATATGTCTTGCGCCCTGAAGTGATCG -AGAGCTTCTATTACGCGTACCGCGTTACGGGCAATGAAATTGTACGTTCTCCTTTCTCCT -GATGGTTCAAATCATACTGATCCAGATATAGTACCGTGACTGGGTGTGGAACGGTTTCAA -GGCAATCAACTCTACCTGCCGCACTGGTTCGGGCTTTGCCGCTGTAAGCGACGTGAATAC -GGCTGGCGGGGGGTCCAAGTACGATAATGAGGAGAGCTTCCTCTTTGCCGAAGTCCTCAA -ATATGCGTACCTTGTCCATGCAGAGGGTACGTTGCGGGCCTTTCCAGCCACCTTTCTTCC -ATTGAACATTGCTAATACTAGATTTGGGTATAGATGCCCCCTGGCAAGTCCAGAAGGGTC -AGAAGAATGAGTTCGTGTTCAACACTGAGGCCCATCCCATGCGCGTGGCGCACACCTAGG -TGATAGGATTGGATTATCACTACACTATATAGTACATGCATCAAGCCTATATTATCTCTT -GTCATTGGTCTTGATGTAAAACAGACGAAAATTCTGCAGCCCTTTAAATTCTCTCTGCAC -TACGTACAGCCTGTACATCGAAATGCTAAGTTGTTTATTTGTAGCCTCAATCACCCAGGG -TGTCGCAAGGACGTATCCCACATCAGACGACTGCGCTGTGTTACCGTACTTCAGCTCTAT -TGGCCGTTTTAAAATATGAGAATGACAACGATATTGACGGATCGAGCTGGACAGCCTATA -CTGATTAACAACTGCATGCACGCTATGTGTTTTGGCAAGTCAAGAACAGTGTGAAAGAAC -ACGAAGTGGGACATACGAGGTCCGGCAAAAGATTTTAGCCATCGCACGCCATATCTAGAA -TCGGTGACACAATAACTTTCAGAAAATCCCACGCTGGATAAAAGTCCGTAAAATGAGCCA -TAAATTCACCTAGGGGACACATTATGATGCTATTTGCAAAGTGTATCACGAAAACGCTAA -CTTGTCGGATAAGAGCTCTATCCTTATGTATAAGAAGGCCATGCAATACCGTGGAATAAG -AACAGTGAAGTGATATTGCCAATTGACCTAAGCTATCAACCCAAATGGCTGTCTTAAAAC -AAACTCTCTTAGCAGCCCTAGCAACAACGTCCTTACCTCAACCAAGCAACAGCCTTTATC -GCTCACGACCCGACCTTGCACCACCTCGATTAAACATCACCATCCCCGCCACAAACCCCA -ATGGCTCCGAGTATGTCTTTATCGCCCCTTACTCTATGGGTGGAACCATCGAGCGGCCCG -GTCCATACATCTACCGCAAAGACGGCGACTTGGTGTGGGCCGGCACAGGGTACTACGCCG -GCTTCGTGGCTGATTTCCATCCAACTATGTACCACGGGGAGACTGTTCTCCAGGCATTTC -AGGGCAATATGGATAGTTCCCACGGTGAAGGGTTTGGTCAGCATGTTCTGCTGGATCAGA -GTTATCAGCACGTTGCTACCTCGACTGCCGGGAACCACCGGGTCTCATCGATACATGAGT -TCAACGTTATCCGGGGGGAGACGGCTTTGATTGAGGTCTTTTATACGACCCCGGTCAACT -TGTCGGCTTACGGTGGGAATGATTCGCAAACGTGGTTAGGAAATGGAATTTTTCAAGGTT -TGTTCGTCATGAGTACTGGAAAATGAGTTTGTTTGCTGATGAGGTATTTGACAGAAAACG -ATATTGCCACAGGAGAATTGATCTTTGAGTGGAATGCTTTAGAACATGTTGATCCATCCG -GTAAGAGAAATTGCTGTCATTCAAGCCCGACCATAGTTAACCTCCAACTAGAAAGCCTAG -TGACACTAGGCTCTACGAAGGCCAATAGCGGATTGTCCTCCGCCGAAGCATGGGATTTCT -TCCACCTCAACAGTCTCGATAAAAACGCAGAAGGAGACTACCTGGTCTCGTCCCGACACA -CTAGCACGATATTTAAGATCAACGGCACTGACGGCTCGATCATCTGGCGGCTCGGTGGCA -AGTACCCTAGCTTCTCACAAATCGGGAACTGGACCTTCGGATTCCAACACGATGCCCGCT -GGCAACCGCAACTCAACCAGCCGGGGACCGAAGTCATTTCGTTCTTTGACAACTCCGGCG -ATGGAACTATCACCTTTAACGAATTGTCTCGGGCATTGGTTGTCCAAATCAACTACACAG -ATAGCACGGCGACAGTTCTCCGCAAGGCAACAGCCCCTTATAATCTCCAAGCCCAGTCCC -AGGGAAATGCACAGTTACTTTCTGACGACAGGATATTCGTCAACTGGGGATCGGAGGGCG -CTTTCACGGAATTTGATGCTGATAATGAGATTATGTACCATGCTTTTATCCAGACAGGGT -CGGTCAGCTATCGTGGCTTTTTGGCGAACTGGACTGCTACGCCAAAGGAAATTCCAGCGC -TAGTTGCACTAAAAACCGCTTCTAACCTTGTTGAGTTTTATGTGTCTTGGAATGGGGACA -CGGAAACTTCGGCTTGGAGATTCTATTATGTGAACGAACGGAAGAAGACGCGGATTGGTG -AAATGGATAGGGACTCTTTCGAGACAGCCTTTACTTGGAAGCCGAAATTTGAACTGTCTT -CCTCTGCTAAATTTGTTGCTGAAGCTATTGGTGTGAATGGGGAGTCTCTTGCGCGGACAG -GTTTGACTGCTACAACTGCCGCAATCAAGTCGATGGCGTGATGAGCAAGATTGTGAAGCC -TAAGGTCATAATGTTCTGGAAACTATGTTATGTTTGATTACAGTTGTTCATCTTGCATGA -GTGTGGAGTAAAGCCAGGTCTGAAAGTGAAACGTTCATATAAACTAGTATTAGACTTTAG -TTGGAGAGGATAGTAAAATGGTGCGCCTCACAATAACCGCCAGTGCAATCCCTGCCAATG -GGCATCACGCGTCAACATGCCTCTTGGTCTCTAGCTGCTTTCCAGCCTTCCGTTGCCTAT -GACACGCGATAGAGGCGCGCACACAAAGAATGATGTTGAAGAGGCTGGATTTATCTTCTA -TTAGATATATCAAAGAGAATGAGGGACGGTTCACTCACATTCCAAGAAGAATCATAGAAC -ACGCCAAACATTCCGCCCCATACAATGCGTCAACATTATGGGTGCTGGAATAATCATAAC -CACCAAGATTCCAATAGGCAAGCTCGTGCAAATCGAAGCAGACTGTAATCAACTGGGCAA -TGCAAGCTATGCAGTCGAACGCAACGATTATACCAGGGTGGACCACACAGTTGCAAAAGA -CGACGATGAGAAGGATAAGGGACCAGAGGAATCCGTATGAGGCCTAAATATGAGAAATAT -ACTCAGCACAGCTTCTTCAAAACAGCTGATGAAATGATCATAGTGGGAACGCTTGTACTC -ACTGTGCCCAGGTTTATACTAGACCATGCTTCTCCTAGCCCATCGCTTGAAACAACTCCT -TCCTGATGTTGAACCATGGCCCAAGCAAAGCAGATTATTGCTACCATGGTGCACAGTACC -GCGCCGAATCGCAGAGACAAAAGCGACAGCCATTTTCTTTCTTTCCTTGGTACTGCTTCA -ATGCAAAAAGAAGAGGAACGTTGGTGGCCCAAGGCTATAGACATGGTTGTAAGTCTAACG -ACATATATTGAATACTGAATATCAAGGCAATTGCTGGGAGGTAAAAATGAGACGAGTAAG -ACATTTCCATGATTTAAGGATACATCTATATTTGTTCGCAACACAAAAATAGTTCACACT -ATATGCTAATCATTGCCTTTTATATGTTAACACAGCCCTCGAGATGCGGCTATGCCAAGA -GCCTAAACAAGACCCCGCAAATTGCAAATTCCGATCCAGTTGACTCGATGAGCATGGGTA -CAGTAATATGAATTTGTTGTATACGAGCATTGATGCATCCAATTCACACGGCCCCCAAGC -CGAGCCTCCCAGGTACCACCGCATTTAGCAGGTGCTTACACTGTTAAAGGAATTGGTCTC -GGCATATCCAAAATGCTGCCGAATTTCTCTTTGACGCCATGTGCCGTGGCTTTTGGCTCT -TTCGTGATTATATGATATTCCACTGAAGCCTTGAATAAGTCATGGGGTAACTTTGTCTCT -AATGCTCAATGTTAGAATTGTATTTATACATTGTTTAATCGCTGAATTGAGCTACATTAC -CTATTCGAAGTATGGACACTGATTCCCGGGCTGGGGTTCCAAATTTCGTATCATCAATGA -AGAATAGTCTCATGAAATTGTGTGTGTTATATTGAGCCTCCATCGATGACACAGATATCC -TCAAAAAAGACAGACACTTCCGAGGACTTCACTGTTTCGTCTCCCACAAATTCCATTTTT -AGTCTTATACCGTCGATTAACGATGAAATTGGCTTTCGCACGCGAATGTTTCGGTCGAAG -CATTATCCTTTTGTATTTCTGTTTGACTCTACAGCTACTCTCCCTGCCCCCTTCGAGAGA -TCCTCATAGCTCATTTGGATCTATCGCTCTGCTGTCTTATTATATTTAGGGAGCCAAGTA -CCGCGTGTCTGAATACAATTGATCAAACCTTATGTGATTAATCAGTGATGCTATACTGAT -AGAGTTGGTGGTCCCATCTCTCTGGTTTTTTTTAGTCTTGTCTTCCTGTTCAATGGTATG -ACCTTGAGGCTTCTTACAATGTCATGTTATCACGCCACTCTGCTCAACTCGCGTGTCAAA -CGATGCGAGAGGACGATTTGATAGGTAGGTAAACCCCGCGGAGACCAGAAACGGTTCTCT -GGGTCGGGTAGATCATTGGATACAGATGTCGGTCGTACCATTCGTGAACAACCTGCCGAC -GTCCGCGGGTAAAATGAATTACATCACCACCAGCATGGGATTCCGGGGTAATATATAAAG -AGAGATGACTTTTCTACATGGGACTTTTCACTTCTACTGTTTTCATCAATTATCACGATT -ACTTCACACGTTTGCAATATGGACAAGTACAATTCCCGGTGGGTCTCGTTCCACATTCGG -GACCACATCAACGATGGCGAGATCCTAGTGCAAAACACTGTCATTGAAGGGTATCTGTAA -TTCTCAGAATTTACTCGATCTCTACTAACACCTGGCAGTGGCGAGTTCATCGACCCTAGC -GATCGCCGCAAGTCACTCACCGAAGATGAGATCGACGAGATTAAGATCTCTCCTGACGGT -GTTGGCGAAATTGGCGCTAGCGGACTTCGCGGTAGTGAGGGCCGCCTGGACTTGTTTCAT -GACAATCAGAAGATTTGCGAGCTTCACTGGGAAAACCGTGGCGGAGACCACGAAAACCTC -GTGGAGGTGTTGGACAAGAGCGACAAGTACACTATCTGGCATGGAGGATGGAGTCATGAT -GTCGGTCCCCTGGGGCATGTCTTTGTGGATGTTGGGAGGAAGGAATAATTACGCTCCACA -GGCTGCACCAATGCCCTGGGCACTGAAAAGCGAAGAGCGCGCGGTCCGTGGAGCACGAAG -TGGCAACACTTGGGAACAATGGTATCTTTCATACCGTACAGAAACACAAAAATACCAAAA -ACAGTGCAAAATTTCAAAACATCACGAGTAGAATGTCGCTCGCATTCTTCATTCCATTCC -TACTCCAATATCAATGCTTATGGAATTGTAAAGAGCTGTCCACCATACTGCATGGTATCA -TCTAGGAAGATAACTCCATGCCAAATGCCACGTCTGATCTCGGTCAATATCATAGAATGG -CCTGATCACTCCGGTCGTCTTGCCACTGGAATAGACCCGGATATCCCATATCGACTACGG -AATTCGGAAGAACACTTCCCGATAGAGCGACGAGCCCGCACGAGAAAAGGAGGACTGCCA -CGGATCATTGGCTCGAGGATATGAAATTACTTTGCGACAGGCACCGAGGAGAAGAAATCA -ATGAAAGAATTAGCAGCGAAAGCAGCCGAGGAGTGAGTAGTTCAGCTTGGCCTGAGCTCT -GCATCCGCAACTGGCAGTTCTGGAATGACGATAAATCAATAAAAGCTCAAACTGTGAACA -GGCAACTAAGAGTGCTGTCCGCAAGCCAGGCCTTATCTTTCCTCCAGCCGTGGACAATTA -AATCTCTTCCCACCTTTACAACTTCCTCAATTTGCTTTTCAGTGCCCCATCCAGGAACTC -TACTGCCAAAGCAAATCAGATGCACACCAATGTGAATCACAGTGCGGAATACCATTTCAT -CGTCGAGGTGCCCATATCCCTTCAAGAACCCTTGTATCATCCACAGTCCACCGTCCAAAT -TCTTGAACAGCTTTGTTTCATACAGCTCAGCAATCATTTGGCCTAGGTCCAATGCGCGGC -TGCCAATCTGCGACATTTCCCAATCGGCAATGAACACAGTGGTCTCAGATTGATGAGTGG -GAGGTACGTTTGGAAGAAGAACGCTAGAAATCACGTCAGTAATCTGGCTTAGTCTATATC -AATGTTAGATCGAAGAGAGCATACTTTCCAGTCCAGAAGTCACCGTGAATAATTCCAAAT -TTATCATCATAGGTCTGTCGTTCAAGCTCTGCCGCGGCAAGATCACGAACCTTCTCAAAA -ATATCACGATCGTCCTCGAGAATATCAGGGAAATCTTCAATGGTGTTGATCAGCAGAGTG -TAATTCACCCAGAACTTCAAGTTTTTCATCTCTTCATTGGTTCCGAGCTCTTTTGTAAGC -TGCTTCTCGGCCGTCCAAAGGTGAAACGATCTCAGCCAAGATCCAAGGGCGCTGCCCAGG -TCTCGCCCTGAGGATTCGGATAAGTCATGAGAAACCTCTGAGAGTAGGAAGTTCTTCAGG -TCGGTTGAATTTGGAAGATCTTCCACCACCTGGGTGTCTGTGGCCCGATTGAAGTCTAAT -AGGCGGGGTGATTTCACAATTATCTTTCCAGTTGTATAAATTGCTAGGCCATCGAGAGCT -CGCAGAATGGCTGCCTCGAAGTGCTAATTGAACATCAGTATAGCTCAAAAGAGGATAAGT -CAATAAAGTGCAACTAACACATCTCGTGATATCGATTTTAAAAGTTAGGTTAGAAGCCGA -GTGATCTTTGGTGTGCTTGATAATGATAGATTGTCCACTCGAGGAAAGTATGCCTCGATA -GACAAAGTTAGCCGTACCTCCGGATAGGCGCGTAAGCGAGGAGCAAGCGAAGGGAGTAGG -GAGAAGTTGCTGCGCAATTTTCTCCCGGATTTGATCGTGGTCATTTGCAGTCATGTTGTA -TGGCTTGAGTGGAAGGTGTAAAACATTCAAGCTGTTCCCACGAGTGTTCCCCTTGCATGG -CTTGAATGCGCTAGTACATAAATGGTCCCCGCCTTCATAGCAGATATTTTGGGAAGACAT -GCCCCAAAGTACATAATCCTCAGACCAAATTATTACTTGCATTACATACGAGTCCTAATA -CGTCCGATGGCTGCTTGGATGGGGGGTGATTACCTAGTGACTGATAGTGCGCTGACTTAA -TAAAGTTAGCTATGCTCCTTTCTCTATTGCCGATACTATCAATTTCTGGGTGATTTCTAG -AAGCATGGAAAATATAGCTGTGACTAGCATGAGTAGAATTTTTATTTAACAGTTAGTGCA -CTTGTACTTATCATAAGATGTGGCCACAAAAGGAGAGTGAGATGATATGAGTGACCTTGT -ATAACACAAAGTAGTGTTGAAGTCTAACAAACAGTGAGAAACAGTCAAGACTCAGCAAAT -TGGCTCCATAAGCTTGGAAAAACTCCATATCTGCCAGTAGTACCACTAGATTCAAGCGAA -AGGTCCAACTGTTGTCGTACACTATAATGGATTGAAGTGAGTATATATAACCTTCTGGTT -GTCCTTGTATACGTGACATTAGACAAATTCACTGGGTGTGGCATCTATGCCGAAATAGAG -CCATACGTGTGCATTTGCCATCTACTGAAATTAAGGCATGCGTTCTTCTTGAGCTTGTTA -CTTCTGCCATCTGCCCACCAATCCGAAGACGCCTCTGAGATAGCACCGCAATCATCTAGC -AAGAAAAATGTTGCTCGCTGACGGTGGCCTCCATATTCCAAAACTCGGAAGGACTTCATC -GGGAATTATGTCGGTATTCTTGCAGGTGTTATCGCACAAATTGACTTCTTTGGCCTCGCC -CAGAGCAGTCAGTACGAGCAAAAAGCAATACTGGGCCCGAAATATCTCCCCGCCGACAGC -GAGTCCGACACGATGCCTATACTATCGGGTGGATATGTGCTTTTCCAAAAGAATAGACCA -CAGTCAGGGCCATGCTAGATGAAGAGCACGAACATCTTCGCCCCCTCGCGATGGTCATAA -TGTATATACCCTTGGCTCGGTATGCGGCCACAACGTCGTCATAGCCTGCCTACCTGATTT -AGGAACCTACCCAGCCGCGACCGTAGTAACGTCTATGATAAAAACGTTCCAGTCACTCAA -GTAGGAATTGGCAACGGAATTTTATCGAAGGTCAATCTGGGTGATGTGGTGGTGAGCGAA -CCTATTGCTGACTATCACGGCCTGGTACAATGGGATATAGAGAAGCTAGGGCAGGAGGGG -CGGTGTGTCCACACAGGCTCACTCAAAAAACCTCCGATCGCGCTACTTAATGCATCAAAC -CAGCTGAAAAGTAGCCATGAGATGTTTGGATCGAAGATCAATGAGTACCTGGATGACATG -GAACGAAGATTCCCGCGACTGGCGCCTAGATATACCAGGCGTGAGGAGAACGATCAATAT -TCAGAGAACGCAGGGGAGCTAGGAGCCCATATAAAATTCAGGGTGCACTACGGCCTGATC -GCATCCAGGAACAAAGTAGTTAAGGACGCCCAGTCTCGAGACTCCCTCGATAAAGCATTG -ACGGGCACGTGTTGTGTGTCGAGATGGAGGCAGCCGGGCTTATGAATGATTTTTCCTGTA -TTGTAATCCGAGGCATCTGTGACTATGCGGACTCGGAAAGAGACAAAAGTTGGCAAGGAT -ATGCTGCCACTATGGCCGCTGCATACGCAAAGGAACTTTTGGGATGTGTGGAGCTGAGTC -TTGTTGATGCCGAGGATTCAGCAGAATCCATATTTGAACACAGTGAGTACCTGCCTATCT -GAATAGCCCAGCTGCAGATGTCTGAATTACTAGCAAATACTCGAGACTGAATAATATCTG -AACTTGGAATACATGCTGACTGCTTATAATTCAACAAGAGATCGATTGCGTGCAGCAGAC -AACAGCCACAACGAAAGCTGTAATGAAGCTTAACGAAGACAACCCAGACCCACTGACGAG -CATGTACAGCCTAGCATCGATGTACTTGAAATAGGCCCGATGGGATGAGGCGGAGAAGCT -CTTGGTGCAAGTGACGGAGACTCGCAAGACGAAACTCGGCGCGGACCATCCTGATACGCT -GACGAGTATGGGCGACCTAGCGTCAATGTATAAGAGCCAGGGCCGGTTGGAAGAGGCCGA -AAAGCTTGACATGCAGGTGAGGAAGATTCGTTGGCAAATTCTAGGGTCTGGGCGTCTCAA -CGTTCTGGACGTTTCCTCAGAGCCTGTCGAATATGATTGACATGTGGAATTTGGGGATAC -CGATTCATCTTCATTCAGTGACAATGACTCCGTATTTTATGTTCTTGCGTCTATACCTAG -CACTAGTTTGTTTGACTCTGGAAGAGGGGAAATAAACTTGTTGTTGATTCACCAGTTTTC -AAACCTGCTGAATGAAGATGGAGATCTCCTTTCGCTGCTTTTGATCGGCGTATCAAAAGA -ACCGATCGGATTCGAGAGGATGCGAAACAATTTTCGGAGACTACTGAGACACTTTGCGAA -TGACCTCAAAAGGAGATATTCTCAGCGAATCGCACCGGGACCTAATGAGATTTGTAAGCT -CATCTTCAGTCATGATCACTCGCAAGCTCTTTGACACGGTATATTCTCGACGAGCAGCCA -AAGCTCAGTCCTGACGAAGACCTAGCTCACAGGAGGAAAGTGGAATTTTATCTTGTCTAC -ATATCACTACACCTCAGACAAGTGAAGTAATCGATGGACTTAATCCTGACGATGAGGACA -GTGACCAAGGATCCGTGGCAGAGGGAGGTAACGAGGATGAGCCATATGAGGGAACTTTGC -AAAACTTGGATTAAATGAAGCACTTTATTCTGCAAAGCGCCGCCTACCAAATCCTTCGTC -GTCGGCTCGAAGATTTTGTTTAACCATCACTAAACTCCCAGTTTCGGGATTTGGTGAGTA -GCTGGTCAAACCCAGAACATAAGAACCATGGTGATACTGCTCGGTATAAACTACGGAACC -TCGTCATAGATTTACAGCACGTTAGTCCCCTCGAGATCCGGTTTGAGCGTGATGAGGTCA -ATTCTCGGTTTCGTATGGCCGTCAGCTACTACCGACATTTAATTGAGCGCTGGACTGGAG -AGTCTTGGGGTTGGTGGCCATTACCTCGCTATCCGAGACCACTGGCTGAATCAGAGACAC -GGTTACGGTGGAAGTGTGTGAGTATTCAAAATCTAGACAGCTCTACACAATAATGATCGA -ACCCTCATCAGCGGGCCAGGTGTTTTGGCGAAGAGTGATGGGCAGAGGTGCCAAACACTC -TTGTCAAACGCCTCCAATCTATAATCAGAAGGCTTCCCGTTGACAGCTTGGTAACCCAAC -TTACGCCGCCTCCTCCTGAGGCACACACTTCTAACCCGAACCCCCGTAACGTTCAAAACA -TCGATCTTAGTTCCAATTGGACTTCCTATCAAGGCCATAGAGTCGAGCAACCGGACAGTT -AACCGTCTAATCCTTCACTTCGTCTTCGACCCTCGGGACCTAAGGTTAACAGCGCGGGCA -TAAGAATCACTAGGCAACACAACCTCTTTCTCGCAAATCAAGGCGGAGATTATAGACTCG -CAAAAATTTGCATAAGCAATATGACCTGTCATGGCTTCTTCACCACCATGAAAAAGGAGT -ATTTTCGGCTGAGAGGTGTCTTACAAAGCTGGTTCAGCATTTGGAGGTACTATCATTGTG -ATTCTTATAAAGTGAGTGCTATTACCTAGGACTCTGGGGTGTATGTTAAAGATATAAATC -GCAGTTCGAAAAGTTTGATGACCATGAGTTTACGCTCAAAATCAAAGATTCATTCCCTGA -CCATACGAACACCGATTAGACCGATGGACAATATTCCGCCTATCTCAGAGCACGAATTCA -TGAATCGTTTCTACGCTTGTCATAACCCATATCGTTGCAGATAATTCGATAATGGATATA -TCTCGTTATCCGCAATTAACGGATACGATAATCGATTATCTGCCGATAATTTGGATACTG -CCGCAGCCTACTCGGGACCTACCTAGGTGGCATCCATGAATGCTCCGAAAGACAGATCAA -AGTCATCTACACAAACGTTGTCTTGTTCGAACAAAAGCAGCGCGTATAAATATAAATGCA -AACTGACTTTCCGAGCGCACCCCCAGGTTGGCCCTTAATCCCATGGTCCAAATGCCAATG -GCATATCGAGAACATGATGCTAGTCAACTAGCCTAAGAGCTTCTACCGTTCTGTCGTCTC -AATTCAGGAGTATGGGAATATCGGGGGCTGCTAGTGTTGGAAAATCAGGTACGGTCCAGA -GATTACCTTTGTGGACTACCTAGAAGGAAATTGGCAAACGTAAGGGCCATCATCCCTAGG -ACATTTGTTAATATAATAGAAACTTGCGATCCTATTTTATAAACCCCTTGATGAGTTGTT -GAAATCTTGCTAAATTGAGTTCCTCTCGGGCAAGGAGCATCTGAAAGAGAGGCATAGCCT -GTATTGGAATTGACGAAATACCCCAAGTGTTCGATATCAATTTGGACTAGTCTTAGCTCT -GAATATCGTGTAAGATATCTAAGGGATTTGTCCGGATAATTTTAATTCAAGAATTTGTTT -ATGGCACGGTCGAAGTCAGAATATCCCTCCTCTTGAGTATGGCATCCCAAGATTTACTTA -TAACCGACCTTTTCATATTCCACTTCGTTTTGGGTAGTCCTTGTAGGAGAGGGGAGTATA -TGACAAATACTTTATTGGGTATGTCTTACCCATTATAATAAGCAGTTCCAACTGGTGTCT -TCCCAACGGCGCCAAAAGACATCATGTGGGCACCTAGATCTTCTAGAAGCTCGAGGGCTT -CGATATAATTGTCGAATATAGCAACATGCACAGCAGCAAGGCTATCCCTTCTAGTAGGAT -TGATGTCAGCAGAGGCATGCTTGGCAAGGAATTCAATTGTTTTGCTTTGTTTTGCCCGAT -TTAAACCCTTCGGTCAAAATCCACGTTGAGATTGGCCAACTAGCCTTGACTACGTACCTG -TACTAGTCTAGTGACCACCCCCAAGGTGGCTTTAGACAGCAGATGAGAAACTGTTACCAT -GTACCAGTGTCCATCTGTGTGCAAAATCCTGGGAAACAGTAGTCTAGGGTAGGACTGGGC -GAGTTTCCAAGTTCGTTTGTGGAAGGGACACATCCTACATGGAGATAAATGGTACATATC -TTTATATGTAGTATTGGTGGCCTGTACATTGATTGTTATGGGAGAAGTAATTAACATAAT -ATAGTGCAAAATATAGTGTAACCACAGATCAAGCCGGCCTAGGACGTCCTATGGCTGTCA -TCGGAGTACTGGCCACCATGTTCACCGGCATGCACATGCTCAACAGCTGCACCACTGCCT -CATGTCTGCCCTTTGCAGCCCACCATATGGGTCCCAGGTCTCTAGAGTCCTTAACATCTA -CATCCACCTGTCCAGTCTCAAGTAATAGCCTTACCACAGCCTCATGCCCACCCTGTGCGG -CCCACCACAATGGTGTTCGGCCACCAGAGTCCTTTACATCCAAATCCACCTGCCCAGTCT -CATGTAACAGTTTCACCACCGTTTCATGCCCCCCCTGTGCGGCACGGAGTAGAGGTGTGA -ACCCTTCGGAGTCTTTTGTCTCAACATCTATGTTCCCAGTCTCAAGTAACAGGCGTACCA -CCGCCTCATGCCCATACTGAGCAGCACGGGATAATGGCGTTAAGCCATCGTAGTCCTTGG -AATTGACATCCACCTGTCCGGTCTGAAAAAGCAGCTTTACCACCGCCTTATGTCCGCACT -GTGCAGCTCGGGATAATGGTGTCCGGCCTCCGGAGCTCTTTGCGTTTACATCCACCTGCC -CAGTGTCAAGCAACAACTTTACCACCGATTCGTGCCCATATTCTGCTGCCCACCATAATG -GTACCATGCCACAACCATCCGTCTTCCCAATTTCAGCCCCTGCTAGGATGGATTTCCGAG -CTGTATCTTCATTCCCATGACGAGCGGCCCACTGCAGTGCTGAATATCTGGAGTTTCGAG -CATTATTTCGATATAGGTATGGATTGACCACTTTGTGGAAATGTTTGTTGGTTTGAGATA -ATGAGTTGATATCTCTTTCTGCATCCAGGAATGTTGCTACTAATAGTAGTAACTCTGTCG -GTAAATTTAGGGGAAACGTTCTAAGAGATTGAGTGACTGTCCGGGATGCGGCTGCTCCCC -TGCCTCCTTGCATAATTGTGAAGGTAACGGGAAACTAAGCTGAGGTAAAGGCTTCAATGC -TTGGAAACGAGTGTTGCTGATATCATTCGGATGATAAAGAGTTTGGGGGTTTTCAGGCAG -AAACAGAAACATTTAGTGCTTACTTTTGTAGAATTTCCAACCATCTTGGATCTTATCCTT -CAGGATGCATTCTCAGCCATGAAACGAGATTAATTTACTGATGTCCGGTGCGGATGCAGA -TGTCAGATGTGGAGACTACAGCAGGGAATGCAGCAGGGAATGTCACGGGCCAGCGTGCGG -GAGCGGGAGCGGAGGCTAACCATCTTTGGATAGGCCATCGTGGGCATAAAAAGTCAAGCC -CTCAAAGCCACGGCCGTGGATTTTATGCCCACGATGGCCTATCCAAAGATGGTTAGCCTc -cgcctccactcccgctcccgcaccgcacccgcacGCTGGCCCGTGCCGTCCAGCCTTTCT -CTCCATCGGGGCTACGGAAATAAAATATTGAATGTACAGTAGTATTCGAAATCAGATTTA -CCTCCACACTCTTTGACTTTCATTCCCGAACCCTATCTTGAAACACACTAGTCTAATTTT -AGCCATTCCCCAGTAACCCCTGCTAGACTGTATTGTATTCCCCAGTAACGATGGATGGGT -CATTGTTATAATACCGTAATTTGCTTTGGAATTTAATAGAATCTTTTATGCAAACAGTAC -AAACAAAGGTGCAATCCTAACAATCTGGCAAGATCCCCCCCATCCTACTTCTTTGCTGTA -CGTCTTGATTGTCTCGTCACACGCAGTCTTCGCTTATACTTCTTTGACACCCAATAGAAG -GTTGCAGTGGCTTTATAGAGAACTAACCATGGTAACTTTCTCTTTCCCATAGCTCCTGCT -ATATACTACTATTTTCATTCGTTGCTGTTTTCGATCGAAAGCTGGTACCCCTAGGAAGCT -GACATGGACCTTCCAAGGACTGTTATATGTAGAGACGGTGCAGAGTCCAGATCATGACAA -CTTAATCTATCAGCCCGACGCGCCCGAACTATGGGAAAACGACTCGCAATACGCTCCGAC -TCCTAGATACTTTGGTAGTCCCTACGCAACGCGGTCCAACTACAGTCCTTCTCTGTCTGT -GCTGGAGAATCAGTCGCAACCTGGTACGCTTTGGCTACTCCGGTTGTCTGAATGGGAGGA -AGGAAGAGTATACGACCAGGATCCACCGACCTGTGTCCACTATCGTATTAAGTGGCGAGT -GATAGTCAATAACCGGGAGGTGGCAAAGGACACAGAGGATGACCTCGTTCTAGCCCTAAG -TGCCTTTTGGCAGCTGTTCTTGGAGAAGAAGCTTGAAAAGGTTCTGCGAAGAAAAGTTGC -CTGTAACCGTCGAGTGACAGCTGATGACACTGCTATCACCGTTTTAGTCAACGATCGTAC -CCAGCGCAATCTAACTAAACGATTTGATGATACCGATATCATTTAGACCGCTATCGAGAA -GCAGCTTCTGATGTGGTCAGGGCTTTTTTCTCGAGGTAAGGAACTCACGTTAAAGATTTG -TTTCAACTACGTAGACGAACGTCACTCTCCCCTAACCGCTGGTTGGAAAGGGGAGAAAAG -AGGAAAGTCCTCTGTCACGAAAAGGATACTTAAAGAGAGAGACGCATAGCTCGATACGAA -GTAAGACGCTTCTGGACAAAAACCTACCTAGCGTAGCGTCTATAATCTGATGCGATGCGA -TTCCACAACTTACCCCTTAGGCCCCTACTGCTGGGGGGATCTAGCGGGGAAAAAGCACTA -CCAACTGAAAACCCACCATCTGAAGCGCCTCGTCACCTATGTCGAAAAAGGTGGGGTGTT -AGACAGCCACAAAGATGTGCCGGAAACAGTACGTAAAGAATTATATATGGAAGAACAACA -GAGACTAGAGAAGAACAGTCGTAAGGGGGGATATATCATAGGAAACGGGATAGCGTACCC -TCCTATCAATATCAATGTCTATCCCTCACAATCTGCCGCCGCTGGGTCAGGCATTTCTGC -TACCCAAACGGACGCCGGCCCGAAAGGTCTAAGCTCGCTCGAGATTCCTAGGCCCAGGGA -TGAAGCTGTTAAGGAATACAGTGAGTGGCAGGTGTTTAATGTCACTAATGACACGCTCAA -GGCTGCATTTCGACAAGTCTGTGATATGATGCTCGAGAATGGCCTTCACCTTGAGCAGGT -CTATCAGGATCAAGACCCGGAGTTCTTCATTGACAAGGGCATAAAAATGGGTATTGCTCG -ACGGTTCGTACAAGACATTCAGCGCTGGGTCGAGAGCGTGAAGAAAGCGATCCCTATTGA -TGAAATCATTTAGCACGACCAAGGTGGGCGTTGGGGGGCGTGACTATTTCTTGTGTTTGG -TTAATAATATTAGAATATACGGCTCTCAGCTGCTTAAATTGAATTTATGTCATTATATCC -CTGATTTATTTTTATTTTATTTTTTGAGTTCTGCTATTCGACCCATTTCATTTTTGAGGG -CTCTTGCGCAAACAAAGCAACCCCAGCAAAGTTTAACAAAATTAAATAGCCAGCATCTTT -ATGTGTCTGGGCGAAAGGCGGGTATAAGGTCAACAATCAACCACCCACTTGGCCACCATC -CCCACCGCCACTGCCTCCACTCGTCTCAGTGCAAACGTTCAGAGTAACGATCAAAAGCTG -TTTACGCTACAGACGCGGTCGAAATCAGTCTCACCACTTCACTCTCTCCAGTTCTTTCTT -GAACTCTATCTTGTATAACGCTATGCTGTATCTAGCTAGTTTTAACATTACATGTATTCT -TGCTAATATCCCAGTGCTGTAAATTGCCTAAGCCGTAGCTCCCACTCCTGCTCCGGGAAG -TGAGGTGCTATTCTGCTTCTCCGGCTAAGATCGTACGGTAGACTTACTAGTATGGGGTTT -ACCCTAGAGGTTCCTCATCCCTAGTGGCACCCAGTTCAATTCACTGTGAGAGACCCTATG -CATTAGTTTAGACAGAAGTGTACACGTGGCCAAATCACAGAATCGGTTAACCTAATAATT -CATCACATACTAGCTGTCAGCTGATCTGAATTGGGTTAGAGTCGGGTGACAAGACTGTCA -TGAGATATTTCCACCATTATCCTGTTCTCCTGTTCAAAGCACTACCGCTATCTAAATAAT -CTTGCAAACGTTCCTTATGCTAATTAGTCTAGGCCATGCCGCAGCAACGAAAGAACAATG -AATACTACGCGCTCTTTACAGGGCGTATCAAGGAGCCCACCATATTTTCGTCGTGGTATG -TTTGTTTTCTCTCCTAGATAACAAAGTTGAGCTTGATCTGACCGAAATGGTAGGGGACAT -GCGCATCCTCGCATGACAGGGTGTTCATCATTAAACAAAAGCTACCCCACCCTTCAAGAA -GCACGCGACTATATGGAAAGAATGGGGGCGGCCAAACCCAAGGAGATCATAAAGAGCGAA -GCAGAAGATACAACTCCTTTGGATGGAAATGGATTCTATGCGGTTGCGAATGGGGCAAAT -CCTGGTATTTATCCATTTTACCAGTAGGTCTTGCCGAAGTGCCCGCCGTCGAGACGTTCA -TGAAAGCTTACAAGGTTTAGCGGCGAAAACGGCGCTGAGAGGGAAGTCACCGAGGAGGGG -GGTTCATGTCATAAACGATTTAAGACGATGGCTCAAGCCGAAGCTTTCATTGAAGATTGG -AAAGAATCCTATGCTGAAATTTGGTACGAATTGATAAAGGAAGCATTGGATCGAGGCTTC -AGAACTCGCGATATTGGAGCTTTTAGATCTCGCGAGATGGGGTTGATCATTGAAAAGTTC -TTATACAAGCCTATCAGAACCACCGAGATCGACGACATCGTGAAGAAGACAGAAAAGCAG -CTTTCGCTGCAAGATAGCAAGCAAAAGAAATAGGTATTCCTTGAATGAATATCGCGTTCA -AATGTACCATCTGAACCTTTCCATCTGGCCTCCACAACTCGAACAGGTTACCAGTGTAAC -CCCTATCAACCTGAACTAGATCAGAAATTCTTCTAATTACCTAGCATAACGATAAATGCG -CAAATCCAGCCCTTGTTATATGCATAGCCTCACGGTCGCATGGAGGGCTCGGTTTCAATC -ATATTAGTATTTCCAGTAAAGTACACAGCCACCAATAATGGTGACTTTTGCTTACAATAC -GAATCGAGGTTATATGGTGGGCTAAATCAAAGATAGACTATCGTGTTTCAAGATAAGAAA -AGAATCTACGTGTATGAGGTGAAACTGATTTCGAGCGCGCCTGTAGCTGTCAAACCCTTT -GCTGCGATACTGAGATTGCGAGTCGCGAGAATCACGTAGATGAAGCTAAGCTAACCTCAA -CGGCTTTTGATTGTTATCCTGAACGTTCGCACCGCGCCGAGTGGAGGAAGCAGTGGGGGC -AATAGTGGTAGCTATCGCGGTGGACGATAAACAGTTTCTCCTGTGGCCACCACAAATTGC -GTTCAAGTTTCGCAAACTTCTATCTTCAAGCCAGAGTGCCGAAAATGACCTCTAGCACTC -TCTGTCTTTGCACCAACCTTGTCAATACACCATGCGAGGAATACATCAAATCCCTGACCT -GGATCCACTTCANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNTAGGTAGTATCTGATATCGCTGGTCGCGATGGAACTGCGATACCTACAAAGGGGCC -ACGCCAGACGGTGACCAGCCTGTGCGCCTGAACCCGGCCCGCTTGTCACATCCCGATTCA -TCCACATAAACTAGATGGTACGACCGAAAATCCGACAAAATATGGAGATAAAGTTCTCGC -AGATCGGCATTACGCTCCTTTGTATGTTGCCGGGCCACCTTTTTAGACCAGCCTTTAGTG -ACAAGGGCTCTTCTGATGCTAGAAGTTGTAATCATCGTGTGAAACTCATCCCAGAGGAAG -ACAGCTATCTCATCAAGGTATATGCCCAGTTTTTCTGATAGGTGGTCGCAGAGCGCATCG -ATCATTAGTGGTGTTACGGTTAGTTTCCGACCTATGCGAGTTGGGGGTACGTGGACGCTG -CTAAACTGCCGCAAGTTTCTACGAATATTTATGATCGTGACCTTGCTGTATTCGGCTTCT -TCAGCTATTTGAGTTGTGGTTAGTGACTGGCTCTCTATCATATCACGGATAAGATGCAGC -TTTGAGGGGATAAGCCTCGGGCCCATATTGTAAGCCTAGTGTAGGGTGTCGGTGAAGAGT -CATTTACAGAAGAAGCCTGTTGCTTAGGGAAGAGAAGAGGGTGGAGTGGTATAGCAGTTT -CCTGTAATTTACAAAAGACATGGTCCCACCCGCGTTATGTAAAAAACAATTTCTATTTTG -GGAACAAGGGGATCGACGAATATACCATATCTAGGCTAGCGTGTTTCAAATTAGGGTTTG -GGAATTAATGTCGAAGAGTGTAGAGGTAAATCTGATTTCGAATACTACTGTACAATGGCT -CCGTGAAGTTGTATCCAAATCATACTAACGATGAGACGAATCGTCGTGTAAGTGGATCAA -AACACAAATTCAACTAAAAATTGCCATGACATGCAACCGATATTGATATTACTGACATCT -GTACTCTGCGATCAAGCTGCATCAACAATGCTGGGCAGCAACTCGTGTGTGTAGTTCCAC -AAGGGACCCGCGTGGTGCGAGAGGAAGGCTGCCCCCGTGTACCAAGTGTTCCGGTGACCC -TGCAATCCCCCGAGACTCTCATAGAACCCGTCGCGAATTGTATCAGCCGGAACAGTTAGT -TGGAAAGGAGTGTGGCTGTTACAGGCCAAGAAACGAGGAGTTTCAGTACTGGAGACGCTA -AACTCGTCGCGTAGACGGACGATGGATTGGAGAATATCGGATTTGGCATCCTCCATAGAG -ACGTCATTTGGCCCACCATACCATCCCGCGAAGATCCCGTCAACCGGAGTGGGGTTGAGG -TTATACATGCCGGGAAGAGGCGGGATCTTGAAGGTGCTGTCGCTGGAGGGCCGCACATTC -TGGAAGCTATAGCCGACAGGCAGACCGGTGTCGGTGACTACGAAGACGTACCAGGCGCTG -TTGTCAAACTGCTCAAAGATAGACCTCTCCTGCGTGTCCAGGCCCAAGGTTTGCATGTTC -TCGATCTTGGGCGGCATTGTCACAAGCACCTTGGAGGCGACGACTAGCTTCTTGCCTTTG -GGCGTTTGAATGACTAGCTTAACCTTCTCGCCCTCGTTGCGTTGCGCGGCACTCACGGTA -CTAGAGACCAGAGCATCTGAGCTTAGCTCCTCTAGTGCCTTCTCGTAGATCTCATGGTTG -TTCCGTCGAGCGGTAGTGAGAAGCCCCTGCTCCAGCCCTTCCACAAACGGTTGGTCCACG -GCTTTGAGCATGTACACGGTTGGCTGTCCCAAAATGTCACCACAGCCCCAGGAGTACTCC -CAGATGGTAAACGCAATGTCCTGCAGCGAGTACTTGGTGGTGAAATCCCGGAATGGTAGG -AGGAGATCCTCGGGGACCATTTCAGGCAAGGTCCAACTGGTGGGTAGATACGGGTATTTG -GCCAGCTGCGCACTATAGGCCGAGAAATCGAACGACGGCTTGAACTCAGGGAACTGCTCG -CCCGTTTGGAAATCCACAAAGGCCTGATATTTGGCACCAAAGGTGTACTTGCTCAACGGG -ATATCGAACCGGGCGAAGAAGTCGCGGACCACTGAGTAGTTGGAGTAGCTGTGCACGCCG -TAGTCGATCTTCTTTCCGGTTTCAGGATCTGTGTAGGTATTCGTGTGGCCGCCCAGAATC -GCTTCCTTCTCCACAACCACCACTGTTTGTCCGCGATCTTTGAGGTTGATGGTCGCGTAA -GTGCCCGTGGATCCTCCCCCAATGATACACACATCGCGGGTGATCACATCCTCCGGTGCA -TAGCTGCTGGCGTCGAATTGGTTGGCCATCGCGGCAGCCATCGCGGCAGCCACCTGGGTA -GCTACTATCCACAATGCCGGACCGGAAAACATTTTCACAGAAATCTATGGAATTAGAGAA -GATTCAATGGTCGAGGTGGTGATGAAACAAGACACCAAGGCAGGATTTATAGGTTCCAGG -TACACACCTTGAGTCGGTAAATGAGACGTTAATTTAATCATACTTCGCCCCCAAAATAGG -CAAATCGTATGTCCAATGCATGTCTTACGTGGCCCGACGAACGTGAGTCTGACGCGGGTC -CCTTGGCATCATCCAGACTTTTGACTCTGGATGGCCCGAGAGTTCTAGACCCGAGGCGAT -GGATATCTGAGGAAAGATGAAGTTCGAGCACGGAAGGAAATCAAGGCTTACCTTAGGCTT -TGTTACTGACGATCACTGCAGGCCTGATTTGTGCAACCCCCTATGTGATGTGTATAAAAG -GGCGCTTTTGACGATGTCAGATGGTCTTTTCTCCATCAATATTTTCTTTCCTCGATGGGG -CCTGACACTGCCACTCTTCATCTACCGCGCATTCTCTGCCTCCATGGGGGCGGATCCAAC -GGCGGAGTCTTCCGGTTCCAGTGTCGAGCCCTGATCGCCCAACTGCAATCGACATTTCGG -TTCTGTTTTGTTGATGCTCCCTTTCAGTCTGACCCGGGACCCGATGTGGTCCCCACCTAT -CAGAACCATGGTCCGTTTCGACGGTGGCTACGGTGGCACCCAGATCATCCCGAGGTGGAC -CATGACACAGCCGCCCGATTCATCCTCCACTCCATTACGACCGCAATGCAGGAAGACGAT -CGGAGCGGCGCGGAAGGTGAATGGGTCGGTCTGATGGGGTTTAGTCAGGGCGCAAAAATT -GCCGCCAGTCTGCTCCTCCAGCAACCTCTCCTAGAGTCTGACCAGAAATTGGGCTTTCGC -TTCGCGATTCTCCTGGCGGGGTCGGCGCCGTTGGTCTCCTTGGATCCGCGGGGACTATAC -AATCCATATCTAGCGGATGCATCCCAGATCTCATTGTCGCTGATGCCGGATGTCCCTAAT -GCGAAGGGGAGAGCGGAGCACCGGATACGATCGGCCACCGTGCATGTTCATGGACTAAAG -GACGCGGGTAGACTTCGTCATCAGCAATTATTGGAGCAGTATTGTCATGTGGGAAGTACG -AGGTTAGTAGAATGGGACGGAGGGCATCGCGTTCCTATCAAAAGGCAGGATGTGACGGCG -GTGGTGGAGACCATTCTGGATGTAGCGAGGGAAACAAACAGGGGGATGCTGATGACACCA -GATTACTCGGGTCCCAACCACGTAGCTACATATAGAAAATACAAATGAACCCATGAATAT -TCAAACATCTATTTTCTTTCTCTTTCAGTATCAATTAGAGAAACCATGCCTCCTCCCCGG -GGAACCCCGAATGTGCTAGAGGGCCCCGGCGACTACGATGTCACGCCCCTCATCCACAGC -GACACCTATGCAGCCATCGATCCGCACAACCTTGACCTAGCCGGCACCGCCGTTTTCATA -TCTGGAGGCTCCAAGGGCCTAGGTCGCGGGATGGTTCTCTCGTTCGCCAAAGCCGGAGCA -TCGTACATCGCGGTGGGAGCACGATCCGACATGTCCCAGCTGGCCGAGGAAGTTGCCTCA -GTGGCACTCTCCGCGCACCGACAACCGCCCCGATTTCTGCCCATCAAGATGGACGTAATG -GATCAGGACAGTGTGGAGGCCGCAGCCAAAGTGCTGAAGACTGAGTTCGGGCGTTGTGAC -ATTCTGATCAATAATGCCGGAGTTCTGGGAACTCTGACCCCAGTGGCAGACTCGGACCCC -GCGCACTGGTGGAACATCTTCGAGATTAATCTGCGGGGGTCTTACCTAGTCTCTCGCGCG -TTTCTCCCCCTGCTCCTAGCTAGTGAGGGGGGCCGGAAGACGGTCATTAATGTCAGTAGC -GTGGGGGCGCACCTAATCAATCCCGCCGCCAGTGCGTACCAGACTTCGAAACTGGCTCTT -CTCCGTCTGTCCCAGTTCTTGGATCGGGAATATGCAAACAAGGGCTTGGTGTCTATCTGT -ATTCACCCGGGCAATAGTCCCACGGATATCATTGGGGATCCGAAGAATGTTCCCGGTCAT -TTGCTAAATGGTAAGTCCCCCACAACGGGTGAACCCATGAGTCAAGGTGAAAATGTAACG -TTTAGCTGACCCATGCAGTTCTTACCGATACCCCGGAGCTTTGTGGCGACACTGTAGTCT -ATCTAGTATCTGAGAGGAGGCAGTGGCTGAGTGGACGCTATGTCAATGTGACTTGGGACA -TGCCGCAGCTTATGGCCAAGAGGGAGGAGATAGAACAGGGAGATAAGTTGAAGGTTAGAC -TTATCTACTAGTGTGATCTTGACGAAGCTTTTCCCTGGACGGAGGTATATCGACTGTGTT -CAACTATGTCGTTACTTTGGTTGAAATGAGAAATGAACAGTGAACAATCCAATGATCAGA -ATCCTATATTCATACTATATCTGAGATTATGGCCATGAATGACCACAGTAGTCCTAATGC -ATTTTACTTCTGCACCTCAACAATGCTGTAAATACTGCGCGTCTCAAATACCCGGATCAA -TCGAAGACCCGCCTTTTCAACCAATTCCCGGAACTGCTGAGGGGTTCTCTCAATCCCATT -GATGGTGCCCATTAGGCTCATGTCCCTCTGGTGGTGATACCGGCTGTAGTACCCATAATT -GGCCAGCAAGGGTGGCGGAGCATTGTCCATCTCGTCAGACCCATGAGTGGTGTTCATGAC -CTGATCGCAGATGAGCACACGAGACGTGTCTGACAGGGAATCTTTCAAGGCAACCAGAAT -CTGGACGCAGTATTCGTCGGACCAGTCGTGTCTATCCAGATTAGTTTTCTGAATTGGTTC -ACAGGATTGGTGGAGACTTACAGAATTGCACGCAGCCAGTAGACGTCAGCGTTTTTGACA -GGGTTGGGTTGAAAGAAGTCGTGCGTTTGGAAAGTCACGCTACCGGATTGTACAAGATGC -TGGCCATTCTGCGGCCAAAACGTCTCCTCTGCCTGTCGAATAACCTCATCCCGGTCTTGC -ACAACGCAGGTAAAATGCGGGTAGGAGGTGAGCAGTTGCAAGTCGAAACCGCCTATTGTA -ATCATTAGTAAGGAATCTTGAAGCCTGACTCGAAACCATCCATACCAACGCCACCCCCAA -CATCCACCACCGTTGCGCCCTCGGGGAGCTCATCCCAAGGATAATCTAAATGCATTAGTC -TTGACAGACAACTTGGGCGGAAAAAGAAGGTTACCAAAAGGAAGAGCTGCTCCACTGACT -TTCCCTCCACCAACCATCGCCAGGCCGAAATTATCCAGTTCCGGCCGCTTGATCAGGCCA -TCCGAGTCGGGTTGAAGGTGGCTCCAGTTCTGGGGATCCGCAACTCCCGAGTATCCTACA -CCGTCGTTGGTGATCTGGTCGGGTCTGACCCTCTCCGAGAGCCAGTCCCATCGCGATTTG -GGCGTACCCAGTGCTTCCTGCAGCGCCGTCTCGGCGACATCGTACGAGGCCCCCTTGGGA -CCAAGCAGGAATTTGGGGAACCTTTCAGACGCCGAGTAGACGTCCATGTGACTGCCATTC -CCATTCAAGTTAGCGGGAACAGGGGAATTGATAGCATAATGCCATACAAGATCAGAATAT -AGGCACGAAACTCCTCGTTGCCAACCAGCGGAGCCGTGATGCGGTTGTTGGCAAACCGGT -CGGTGCCTGTTTGACGATAGATGTGATTGGAACAGAGGCACCGCATAATACGGGCTTTAA -GATGGAGAAAAAGAGGTTTGAATTAGCAAGGAATATGACTGATGGGCTGTGAGGCCTACA -AAGCTTGTGAGGCTCAATCCCCGTTCTTTGAGCCAGAATTTGCACGTCCATGCCGGCATT -GCCCGCCTCATGCAGGATGTCCGGGATGCGTCGCTCCACCGTCACGCCGAGGGCGCGCGA -TTCCCAGAACTGCGCGGCAACCTCCTGGAGTCGGTTGAAGGGCTCTATAACCATCTCGTT -CAGGGCCCCCGAGACAGCGAGAATGGTGCGCTGCGCCTGATGAAGACGCTTGCTGGGAAG -GAAGCGGGCATTGTCGACACCAGAGGTCTGGCTCGTGTCGTACTGTTCGCGGGACCATTC -GTCCGAGACAACTTTGATGCTGTCGCTTAACAATGAGCTAAGTTGGGTGAGCTTAACAAC -ATCGTTAACGGTGGGGGGAAAGCGTTCCATGGTGACAAAGAGTGATTTCGTTGGCTTCAA -CTGGACTGGCCCGATGGAATCCCATCCATCCGTCCTTGTTGGCGCATTAAAAGAAAGACT -GCAATGCACTCAATGAATGATGTCGAAGATCGGAGTCAGAACCAGCTCTGATAGGAAAAC -CCCGAGTGAGCAGGTCCCATTTTCGTGGGGCCATTTTCTGACGACTGAGTTCTGGTAGAG -GGGATATCTTTGCCAATGGATTACTTGGGGGCCAAAATAAGCAAGCTTGGATCTTACGTA -TTAGGCTGCGGCACGTCGCAAAAGGATCAGTTGCAGATTTGGAGAGATGCCTAGTGTACT -TTTGTACCTAATCTCTAGATGCAACCTGGTCCGATCCAGCGTGGTTATAACTGTTGCAGC -TATTGGCTAGCCTTTCCGGATTATGACAAAAAGCGGGCCGGCCCTTGCAACATTGCACAC -GGAAAACTACGGCACTGGGCCATAACAAAAACTCCAGTTTAGTCTCGGGAGGGTCATATA -GTCTGACACGGGGCGATATCCGCCAAGAGGGTCACGCTAAACTGAATTTCCCAAGGGCAA -TATAGTGAGGTAGCACTTGGATAAATTACCTAAATCTTGGAAACAGGATCTCACATGAAC -TAGAATTGCCACTTGGGCGAACAGGCACACAGGACGAAGTAGAAGTTGATACATATTCTA -GAAATTAAAATCTAGGTGAATCTATCGGTCTGTCGCTGAAGTCGCATATCTAGCATCAAC -CCACATCCCATCAGCGAGTGCATCAGATCTCCGACCGAGGCTGGGTCGATATTATCTACT -ATCAGGGTGTAACTGGGCGAAGATCCTACGCTGGGAGAAGGTCGATTACACGAGGGAGGG -ATCGTGGTCGAAAGGGATGACTTGGAGCAAAACATATCCATGTCCACTCTCGGAGGCAAT -CCAGACTCGGGAGTCAAAGGTAAGAGGGACGGATCGACCATCTCGGGGTCTGAAGAGGGG -TACTTGGGATCGTTAGGAAAACTCTCCAGGTTCATCAGGCCATGGAGATCAGAAAGCTCA -TGGTGGTGTTGGACGAACGGCTGAATCCCCTCGAGGTTTGTAGAAGAACATTCGGGGCTG -TGAGGGGAAGGCTTCTGATTCATCAGGTGATCGTGACCAGGAAGCTCATTATCCTCGAAA -GTTGAAGAATACCCGAGATCGTGATGGGAAGCCCCCAGATTCATCAGTTCATCGAGATCA -GGAAATGCACGGTGGAGTTGGAACTGATCCTCCACGGTGTTCTGTAAGCGTCGGCAGACC -AACTCATACCTGGTCATTGATCAATTAGTAGCTCCTCAAATATGGTCTCACGGAGGTCCA -GATCGCACCGGTTTTTGAGGTCAGTCGGCGACCGAAATGGGAACTTGGTCGAGGCGACCA -GTCGCCAGGCCCGACCCAGTTCCAATACCGCACCCAGTAGGGCTTCGTCATCCTGGGCGG -TCCATGGGCTTCGGTCGATACTGGGATCGAGGGAGTGCTGCCAGTGCTTGGCGCATTCTG -TTTTGAATCCATTCATTAGATTTCCCGCCCATGAATATAAAAATAGTGTGCGTTCTTGAC -ACACGTACGATCCGCATTGCGGGTACCGACATGCTGGCTGACTTGATTCCACCTAGAATG -CATGAGCAATTTGGGCTCTTGATCCCAAGGGCACTCCTTGCCTTACCGGGTCCCGTATGT -ACTGATGGCTTGGAGTAGACTTTGATCTTCCTCCTCAGCCCATGGGCCTTTCCGATACGC -ATCGGCCATTTTATACCACCGTTTTCGACAATCTTTGTTGTTCCGACCGGGCAATTTCGT -TGATACCGCCGTCCATGGAATTGGGGATGAAGTATCTTTGGCAATCTTAGCCACTGCATA -TCTGAGAGTCCTAGATTGGCTCTTACTTTGATAGACTCCGACCTCGGATTGCAGAATTGA -ATCTTCCTTCCGGGTCCATTTGCGAGGACAACGAGAATCCATGGTTGTGGTGGATAGAGA -GGAGATGGCAAAGTACAGAGTGTCAGCCATGGAGATAAGTCCCGACATAGGGTTAGGGGT -GATGACGCCACCACGAAATACATACATACCTTGGCATTGGATACGAATCCTCAATTCGTG -ATCTCCGTCGAAACTATGAAGGCCAAGGATCTCTACACTTTAGGCGATGTAACTCCATAT -CTTAGCCCTTCTCTATTGCCACGCGTCTGACATGCAGGTTCCGTCGGTCCAGATTCGGTG -GAGCCGCATTTTATCCTTGACCTTGTTAGGACCCGAGTGATGGGTACTTTCCTGAAGGAT -GAGACATGCAGGGCTTTATCGTTTTACCCCCGTTACCATCGGAGTAGATCAAAAGGCCCA -ATGCATCACAGTGTGCATGCGGATAATATATATCAAGGTAGACCAGTAAGGGCCCATGAT -AAAGACTCCTATACAATAAACGCGACTCTAAAGTATAGTCAATATGACGCTTCAAAAAGA -CACACATATCTTTTTATTTGGACCGCAGACGCTCTCCGCCACCGAAACATCCTTCACGGA -GCTTCGTGAAACCATATTGGGCGATACTGACCTGCATTGGCTCCTGGACGTCATCAACAA -ACTTCCGCTTCACTTTGCCGCATTGGCAAAGGCCCTACCCACAGTGCATACCTTTCCTGG -GACAGAGGAATTGCAATGGCTGGAGGACTGGTTCAAAACGGGTTGCTCACGGTATACTGC -TGCCCCTTGGCCCAATATCTTGGTGACTCCGTTGGTCGTAACGGGGCACCTCGTTCAGTA -TCTTCGATCATTAGAGGACACTTCCAGCCTGAGGTCCGAACAGAGCGAGGCTGTGGGAAT -CTGCACTGGTCTGCTAAGCGCCCTGGCAGCTGCCAGCTCCTCCAGTCGATCCCAGATTCA -GCTTCATGGGGCAATTGCAATTCGCCTCGCAATGCTCGTGGGTGCCGTAGTAGATGCCCA -GAACTTTGCTGGTGGCCCCGATAATGAGACCGTCTCACTTTCAGTGGCCTGGGGGCCAGA -TAAAGCTGAATCACGCGTGAAAAAACTTCTGGAGCAGTTCCCTGAGGTGAGTCCCTTTCA -ATAGATAGTCACAGCCAGTGCTGACTCTCCCCACTACTAGGCCTATATATCCGTGGTTTA -TGATGAGAGGCGTGCGACGATAACGGTGTCCAAAAGCAAATCTTCGGAGCTGATTCAGCA -GCTAAGAGCGGCAGACATAGTTGCCGCTGAGATTGGACTGAGCGGGCGTTTTCACTGGCC -CTCTCATAAGTCTATAACCGACCAATGTATCCGGTTCTGCGCAACTACTCCTGCATTTCA -ATTCACGGGTGCATCAGAGCTCGCCTTCCCCGTGCGAGGAACAGTGAATGGCAAATATCT -TGCCGACGCAAACCTCCATGAAGTCGCCATTCGCTCAATGCTGGTGGATCGCCCAGATTG -GTATCACACCTTGCTGACAACCTGGTCAGAGGCGCTGACCAGTCCTGCGTCGTCAATTAC -CTTGTTTGGTCCCAAGAGCTGTGTGCCGCCGTCCCTGATGTCTCGCATTGGGCCCCATCT -CCGGCTCTCGCCCAACCTCGCTCGTCGAATTCATTCTCTCGATAAGGCAGGGGACCGTGA -CAGTGGCCGTGATAATCGAATCGCCATCATCGGCATGTCATGTCTTGTGCCCGGGGCCAG -TAGTCTGGAGGAGTTCTGGGATCTTCTCTGCCAGGGCAAGTCGCAACATATTCCCGTGCC -TCCGGAACGCTTTGGCATGCAGTCTACCTGGCGGGGCATGGATGAGAATCGAACATGGTA -TGGCAACTTCCTCCAAGACTATGATGCATTCGACCACAAATTCTTCAAGAAAAGTCCCCG -TGAAATGGCGTCTGCCGATCCACAGCACCGCATTATGCTGCAGACTGTGTACCAAGCGGT -TGAAAATTCCGGATACTTTGCCCCCACAACTCAGTCACATCGGACCACGGATTCCAGAGT -TGGGTGCTACATTGGCCTGGGTCTGGTCGATTATGAGAACAACATCGCCTGTCATCCTGC -CAACGCATACTCTGTTACAGGGAACTTGGGAAGCTTCGCGGCGGGAAAGGTCAGCCATTA -CTTTGGCTGGACCGGTCCCAGCATGGTCATTAACACGGCTTGTTCCTCCTCCGCGGTGGC -GATTCACACAGCATGTCAGGCTATCCTCAGTGGGGAATGCACTGCCGCCGTGGCCGCAGG -CGTCAACACGATGAGCTCCCCGGAATGGTTTCACAATCTGGCAGGCGCGTCCTTCTTGAG -CCCCACCGGACAATGCAAGCCGTTTGACGCTAAGGCTGATGGGTACTGCCGAGGCGAAGG -AGCGGGTGCCGTGTTCCTCAAACGGTATTGTGACGCTGTTCGGGATGGGGATCAAATCCA -TGGCGTTCTGGCTGCCACAGCCGTATATCAAAACCGCAACTGCACCCCGATAACCGTTCC -AGACTCTTCCTCACTAGCCGAGATGTTTTCCCACGTAACCCGGGCGGCTGGCTTGAGTCC -TGGCCAGGTATCAGTGGTGGAAGCGCATGGAACAGGCACTCCGGTGGGAGACCCAGCCGA -GTATGAAGCCATTCGTCGTGTATTCGGGGGTTCCGTCCGCTCAGATATCCTCTCTCTGGG -CTCTGTCAAGGGTCTGATTGGTCATACAGAATCTGCCTCAGGGATTGTGTCTTTGATCAA -AGCGCTTTTGATGATTTCCCGGGGGGTCATTCCTCCCCAGGCCAGTTTCGACGTTATCAA -TCCAGCAATCAAGTCCAATTCCCAAGACCGGATCCAAGTCGACACGCAATGTCGAACTTG -GGATGCGAGCTATCGCGCCGTGCTTATCAACAACTACGGTGCGTCAGGCACAAACGCCTC -CATGGTGGTCACCCAGCCACCCACGCAGAAGCAGCCCAAGTCTGCAACGATGCAATACCA -GGTTCCCAAGCATCCCTTTTGGCTAAGCGCGCTAGATGATCGAAGTATGAGGGAATATTG -CACCAAGCTACTGCATTCGATCCGTAGAGAGGGCAAGGATCAAGATCCACATCAACATCT -GGCGGATGTGGCGTTTCAGGTGTCACGACAGTCCAACCGCTCCCTCGCTCGGTCTTTGAT -ATTTCACGGCGTCTCCCTGGAGGGGTTGGAGAATACCCTGGTGGAATTTATCAGTGGGCA -AGGGACTATCCAATCAGTCGCCCTGCCTCCGCAAAGACCTGTCATTTTGTGCTTCGGGGG -CCAGGTCTCGACGTTTGTCGGTCTAGACCGTCAGGTATATGAGCATACAGCCACCCTACG -AAGACACCTCGATGAATGCCATCGGGTTTGCCAGCAGATTGGGGTAAGTGGCGGAATCTA -CCCCGAGATCTTTCAACGGGATACAATTGGCGATGTTGTCAAGTTGCAGACCGTGCTCTT -CGCGATGCAATACTCCTGCGCCAAGAGCTGGATTGACTGTGGCGTCACCGTTGCTGCCGT -CGTGGGCCACAGCTTTGGCGAGCTAGTAGCACTCTGTGTATCAGGTATCCTATCAGTCGA -AGATGCTCTTCGCCTGATCAAGGGTCGGGCCCAGTTGATCCAGCAAGGCTGGGGAGCCGA -GAAAGGCTCCATGATGGCAGTAGAAGCAGACCTGTCAGTGGTTGAGGATCTACTGGGCGC -AGCCAATGCAGCATGTGAGGATGTCGTGGTGATTGCATGCTACAACGGGCGCCGCAGCTT -CACCCTGGCTGGATCGACTAAAGCCATCGACACGGTGGAAATCCTGCGCTCAAACGATTC -GGGATTTTCTTCCTCGGTCAAAGCAAAGAGGCTCCAAGTCACCAATGCTTTCCACTCGAC -CCTGGTTGAGCCCCTCCTACCGGCGCTTAGGGAGATCGGAAAGGGGATCCACTTTAGAGA -TGCGAAAATTCCTTTCGAAAGAGCTACCCAGTCGATGCATGTCAGCCAATTGACGGCGGA -CTATGTGGCTGACCACATGCGCCAGCCAGTGTGGTTCAACGATGCCGTCCAGCGACTGTC -GCAGCAATATCCATCAGCCATCTGGCTGGAGGCCGGGTCTAATTCTACAATCACCGTTAT -GGCTAGTCGAGCGTTGGAATCTCCCAAGACTTCGCATTTCCAGCCCGTGAGCATCACTGG -TAAAGGGGGGTTAGAGACTCTGGCCGATGTCACGGTAACTCTGTGGAAGGCCGGCCTGAG -TACATTGTTTTGGCCTCATCACACGACCCAGACCACTCGCGATTTCCCCTTGCTCCTTCC -CCCTTATCAGTTCGAGAAGTCGAGGCACTGGCTAGACCTGAAGGGGCCGCCTAAGCAGCA -AACATTACCATCCACGGTGGAAGCCTCCAGCGACTTGTGGACGCTTTTGGGGTATCAAGA -CCTTGAAAGACGTCATGCCCGATTCTTAGTTAACACCGACTCGACAGCTTTTAAGCAGTA -TGTTACCGGTCACAAAGTGGCTGAAGCAGCACCCCTATGTCCTAGCATCCTCCAAATCAG -CATTGCGGTCGATGCAGTCTCAAGCCTCTGTCCAATACTCGTCCAGGAGGGGCTTCAGCC -CCAGCTAGAAAATATGGATAGTAGCGCTCCGATGCCACTCAATGAATCCCGGAAGGCCTG -GCTAGATGCAGTGGTGCTCGATGACGACCGCCAAATCTGGGAATGGAAGATGGTCAGTCA -GAGTACCGAAGCGGGATCCAAACCTATGGCTCATATTGCTGGACGGCTCACTTTGAGGTC -CAGGGATGATGAGACCTTCCAGAGCGAATTCAGCCGGTATAAACGTCTCGTGCGGGCGAG -TCGACCCCGCCATCTGCTCCAGTCCGGGATGGACGTAGACGCCTCGTTGCACGGGGATGA -TATCTACCGCTCCTTTGCAGATACTGTCGAGTACAGCGAGGTTTATAAGGGGGTTGAAAT -GATGGTCGCAAAGGATGAGGAGTCAGCTGGACGAGTGTCACTGTCCCAGGACCCACGGCG -GTGGGTTGAAGTTGGTCTCGCCGACTCTTTCTGTCAGGTAGCGGGGATCTTTGTAAACTG -CATGACTGGGCGATCTGATAAGGAGGTGTATATCTCGGATCGCATCGAGAAGTGGATGAA -GTCTCCCAAGAGTATCCCGCAAAGTGCTGCTTCCCAGAAGGCAGCGTGGGAGGTCTATGC -CTGTCACAACCGTCCATCTCCCAAGCAATTCCTCAGTGATGTCTTTGTGTTTGACTCGCT -CACCGGCGAGCTGGCTGAGGTAATTCTGGGAATTCGGTATCAACGCGTTTCTAAGATTGC -GCTTGGCAAGGTCATATCGGGTCTTTCTTCGTCCGATACAACACATTCCATCTCAGCGTC -TATATCCAACGGTGGAAATAACTCCCTTCTATCAGCACCTCAGTCGGCCCCGAAGAAAGA -TGCATCGCGCTCGATGAAGGTCGCAGCCACGGTGAAAGATATTCTAGTCAACCTGTCTGG -TTTAGAACCCGCCGAGATCAACGATGATTCCGACCTGATTGAACTCGGCATCGATTCTCT -CATGGGAATGGAACTTGCCAGAGAGATTGAACTGGCCTTCAAAATCAGTTTGGACATGGC -CGATCTTTTAGAGATTACCAAGTTACGGGACCTTGTCCAATGCGTGCAAAAGGCCCTGGG -GCTTGGGCAAGAGGAAGAGATGAATGGCATCAATGAGCCCACGGACCAAGTTATCCCAGT -CAATGGCCACGATGGCATCAACGGAGAATCTCTCCAGGAAGCCATCTCTACCAGGACCTC -GAACGGTACATCCAACGGGAAACCCTCTGCCCCAGCACCGGGCAATGGGACTTTGACAGA -GGCTATCCTGCAATGCTTTGGCCAAATTCGGGCGAATACTGATGACTTCATCGTACAAAG -TGGACTGGATGGATACATCGATAATATTCTTCCGCTGTCGGATGAACTCTGTGTGATGCA -CATTTTGGAAGCCTTCGAGCAGCTGGGCTGTCCTATCCGTAACACGCCCGCTGGTAAGAT -CCTGAATCCAGTGGAGCACCTGTCGCATCACACGCAATTTATACACTGTCTGTTTGGATA -TTTGGAACAAGCCGGTCTGGTTGAAATCGACGACATTCGGATCAGAAGGACGCCCCGCTT -GGCACCAGTGCGGTCGGCCAATGAGATCCTCGAGGAGCTTGTTGATCGCTCTCCGGTGTT -TGAGAGTGATCATCGATTGACCCATTTTGCAGGCGCCAGCTTGGCAGACTGTCTATCTGG -CAAGAAGGACGGTCTCCAGCTCATTTTCGGCTCTGCAGAGGGAAGGGGGCTACTGAGCGG -TTTTTACGGCCAGTCGCCCATCAACACAACATGGATTGCACAGATTAGGGAGCTTTTAAT -GCAGCTCATTCCCCAGCTATCCATAAATGAAGCACCGGTCAAGGTGCTGGAAGTGGGAGC -TGGAACAGGAGGGACCACTGCCCAAATGGTGGCACTCTTTGCCACCCTCAACGTGCCAGT -GGAATACACGGTGACGGATATCTCATCCTCACTGGTAGCAGGGGCTCGCAAGAAGTTCAA -AGAATACTCGTTTTTGAAGTTCCGCGTGCTTGATATCGAAAAGCAACCCCCCGCGGACCT -GCTGCACGGTCAACACATTGTTCTTTCAACCAATTGCATCCACGCCACCCACAACCTCGT -ACAATCGACCAGAAACATCCACGAGATACTACATCCTCGCGGTTGCCTGATAATGCTCGA -GATGACCCGGAGTATGGCCTGGATTGATATTATCTTTGGTCTGTTGGAAGGGTGGTGGCT -CTTTAATGACGGACGGCGTCATGCCTTGGCGGATATCGCTCGCTGGGAATCAGCGTTGCA -TTCGGCCGGATATGGACAAGTTCTTTGTACTGAAGGCAGGCATCCCGAAACAACTATTCA -ACGCGTCATTGTCGCCTTGGCCGAGAGGTATCATGTTTCCCCCCTGCAGAAACCTCGGGC -TGCTGACCTGGTGGCCCGTGAAACAGTTCTTGATGCATACGTCCAACGCTACACCGAGGA -CTTTCGAATTCCCAAGGGCCTTACAGCAGTGGCGCAACCATCCCAGGAGCAGCGGTCATG -CGTGGTCGTTACAGGCGCAACTGGTAGCCTTGGAAGTCAGCTCGTTGCCTGTCTGGCACA -AATCCCTGAAATCCACAAAATCTTCTGTCTCAACCGGGCCAGTAGTGTTGATGCGAGGCT -CCGTCAGCATAAGGCCCTGGAATCAAGGGGGATCAATCTGGAGCCAACCGCGTTGGCAAA -ATTGACGGTGCTGGAAACGGACCTGTCTAAGCCTCTGCTTGGCCTATCCACTGTAGACTA -CTACCGAGTGGTAAAGGAGGCAACACACATTGTGCACAATGCGTGGCCCATGAGTGTCAC -TCGGCCTATCAGTGCCTTTGAATCCCAATTCAGCGGTATGCGCAGCCTAATCCACCTCGC -CCGGGACATCGCTACCCACCTTCCGTCGTCGTTCAAGGTCTCATTCCAATTCATTTCTTC -CATTGCTACTGTCGGTTATTTCCCTCTGCTGAGCGGACACGCCCTTGTTCCAGAGCAACG -GATGACAGTAGACTCGGTGCTTGCCTCAGGATATGCGGAGGCCAAGCTCGTTTGCGAGCA -CATGCTGGAACAGACGCTTAATCAACACCCCGGCCTGTTCCGGGCCATGACTGTGCGAAT -AGGACAAATTGCTGGCTCCACCACCAGTGGATACTGGAACCCTACCGAGCATTTGTCATT -TTTAATTAAATCGTCGCAGACTATCCGGACGCTGCCTGCGTTCACAGGGGTAAGCTCTTG -TTGCCCTCTGATCCTTGTATTACACAAAAAGACATCTGCTAACAAGTGCCTCCTCTCAAT -TCATAGGAACTCTCCTGGTGTCCCTCTACGGATGTAGCCATCATACTGTCGGAACTGCTC -CTCGGGGATCACACCCCCTATCCCATCTACCACGTCGAGAACCCCTCGCGGCAGCCGTGG -GATGAAATGATAGGAATGCTGGGCGAGGCGCTGGGCATTCCGAACACGAACAGAGTTCCT -TACGCAGACTGGATAGCACTGGTGAAAGACTACCCTGGTCCTGTGGGGAGCGAGAATCCA -GCCGGTTTAATTATTGACTTCTTGGAGACGCATTTTGTGCGGATGTCCTGTGGGGGCCTT -GTTCTGGATACGACTCATGCCGCCGAGCACTCCCCAACTATGGCAAATCAGAGACCGGTG -GACTCCCATCTTGTTCATAATTATATTCGAAGTTGGAAGGGGCTCGGGGTATTAGAATGA -GATCAGTGTTCCTACGGAAATGCCAAATTCCCCCGGGAGATTTTTCTGAATATGGGAGCT -TGAAATAATAGTTTACAGATTACATGTCTTCTATGATGACTGAACTTTTGCCAAGTAACG -TTCTATCGGGGTTAGTATAATTCATGCATATGTAGATTGTAAAGTGCTCCACGTACTTGC -AGCCACCCATCTCATGCCCAGATAGCCCAGTAGGTCTTTCAAGGGCTGGATCAACGACTC -CGGATCGACACACCCAGATACCCTGGCATCGCCAAGACACACCATAGCGGCCTTGTATAT -GCAATGGGGGAGAAACACGGACAAGGTCATCATCTCAACCGGGGGAGAGATTTCCGCGCC -GCGTGCAATCTCGTGGACGAGGGACTTGAGAATATTAATTGATGTGCGGATACAATACTC -ATTGTCGGGGATATCCATGTGAGATCCAACCTCGAGGGTTTCGAGCATGGCACTAGTTAG -TAGTGTTGGGATTAGTCAATTTTTTCTGAACTTGTGCTGGGCAGCCTTACCTTCTACATA -GTGCCAATGCGCTATCTGACAGAGGATGCGGCGGATCCGAATTACCGCTTTGGAAGGTCA -CCAGAAACGAGGTGAGGGATTTGAGGATCTGGACCGCCTCGACGCTATCAAAAAGGGGAT -TCGATTGGGTCGTATGGATATGAACGAGTACCCGTTCCAAGAGCTGGGATGCGTGGGCTA -ACCTAGTGAATGAATCCACTTCTACCTTCGCCCAATCTTCAGATTCCTATTTTCGGAGTT -GTTTTCAGTTGAAGTTGAGTGGGAGCTTGGCAGCAGCCAGTCAATCAGTCACTTACCGTG -GCTTTAGATCGATCACTGTAAGTATTGGGTGCTTGGCTATTCTCCAGCGAGACATATCTA -CCACGGGCTCTTGTTTCAGTACGAATATCATTTGATGTGCATCCCTAGGGGCCAGATCTG -ACCTATCAGTGATGACAATGCCCTGCCAAGTCTGTCGTGCCTCCTCGACTTTCCGTGGGT -CCCGGAAGTCCTTGCGCAGGTTCTTGCAGGTGGTGTTGATCCCTAAGGAGACTGCGGCGC -GCATATTGGCTGCAGCAGAGATATATGCGGAGGGATACATGGCATGACCAATCTCGAAGA -CCGTTAAGAGAAGTCGGCTCTGCAGCATTTCCAACGAATTCGTCCCCATCGCCTCAAGAA -GAGCTACAAAGCTCTTGAGAGAAGCATACATCGATCGGGTCGAGGGGGGAAGCTCTCCCC -CCACGGGCTCCTTGCAAACAAGGGCCATACAGAGGGTCAATAGGCTTAAGTCAGATGGTG -CGCTGGCTGCCTGAACATGTGCGTTTAATGACCGGATATTATAAGAGGGCTCCGATATGA -TAGGGAACCAGGTATGTATAGTCCGGAAATAAAAGGCTGCGGCCGCCTGCACCTGCAGTC -TATCGCCCACAATTGATGAGACCAGAGCTTCAAGGGTATGACACATGTTGGAAGGGGTCA -GGTCGGCCCAGTCCGGCACTGGCAACAGACGAGGAACTTCAGCTGGAAGGAGATAGTCAC -AATGCTTATTCAATCTAAATTGATCAGCATGCACGCCTATAATTCTAGCGGCTACGCACC -TTTTACAAAGGGTACAGCAGGGAAGGACCTTGTCACATCTCCGTTTCTGTCTTCGGCAGC -GAACGCAGGCCTGATCAGCCGACATAGTACGCGGGTGCGGGTGCGGGTGCGGGGTGCGCA -GGTTACCTATGTAGCCACTTAAATAACCAGGCGCGGAGATATGTGGCATTTCCACATTCT -GCACCAAGCCTAATTTGGTTTAGATCCCGCATCCGCATCCGCACCCGCATCCCCACGACA -TGATCGTACCTACCTAGCTAGGTACATGTCTGCCTAGGTATGTAGAAGCACGTTGATCTT -TATATATTACCAGCTCTCCATAATATGTCGTAAAGGTAGCCCTTCCAGTATCTCCAGTAG -GTTTCCTATTCACAGCCATGGATGCGATTCGCAACGTTGCCATTGCAGGGGTATGTCAAA -TAATCATTCTCTTGTCGTACTCAGGACTTATAGACTATGTTTAGGCTTCCGGAGCCCTGG -GCTCTCCTATTCTTAACGCACTTCTCGAAAGCAAACTCTTCAGTGTGACAGTTTTGACAC -GTCAGTCGTCTCAAGCACAATTCCCGGCGTCTGTCAAGGTAGTCCGCGTGGATTACAACT -CGGTCCCAGACTTGACAGCGGCTCTCACCGGCCAGGATGCCGTCATCTCGGTCCTGACAA -CCAGTGCCATGGAGACACAGATCCCACTGATTGAGGCCGCAGTGAAAGCTGGAGTGAGGA -GGTTCCTTCCCTCCGAGTTCTGTGCCAACATTGGCAATCTCAAGGCGGCCAGTTTGCCTG -TCTACCACTCAAAGCTTGCGATACATGAGGTCATTCAACAACAGGCTCGGGACCATCCTC -ATTTTACATATACTCTACTTCGCAACGGGCCTTTCCTCGACTGGAGCATGGCCTATGGTT -TCTTTTTCAGCCTCGATGGAGGATCGACTCCCTTTTATGACGGCGGGGACCGTCCGTTCA -GTACAACCACCTTAGCCACGATTGGCCGGGCCGTGGTGGGGGTCCTGCATCACTCGGATG -AGACACGAAATAGAGCGGTCTTTGTTCAAGACCTCGTTACCACCCAGCGAAAGATGCTGG -CCATTGCCCAGAAGGTTGCACCGGATCGCAAATGGACCCCGAGCGAGGTGAGCACCGCCG -ACATGGAGACCATGGCGCGGGACAAATACGCCAATGGGACGATCGATCTGGCAGCCTCGA -TGGGATTCTTTTGTCGCTCGGTTTTTGGGGAGGGGTACGGGGGAGAGTTCCAGGAGGCGG -ACAATGAACTTCTCGGCATACCGTCGAAGACGGATGCTGATCTGGAGGACTTAATCCGGG -AAGTACTTTCTGGATAGAAATCTACTTTGTGAGCCTAACTCTAAGCGCCCTCGTAAAAGC -GGCAGCTAAGACAGGAAAGCCCCACCTGACAGAGGAAATCAAATACATATAACCACCCTA -GGTGTGACGTTGTTTATGTAAGGCCTCTACTCATCCATGGATATAAATAGCATAATAAAT -TCTTAGTACCTTCCGAAGTCTATCCCATGGGAGGGAGTAGAATGAGGCTAGGTTGCCTTG -ACAATATATATTTACTAGACTGCGACATCGCCCGTCCACGGTGGGCGGTGGGCTATTTTG -ATTTTCGAGTGCCTGTCCATAACGCCCAAGGCTTAGTTTCTTATATATTTAGATATTTTA -GTAAAATTTATAGATATTGTAGATTAAAACCGCGATTTTATATAGAGTTAGTAAAAAAGG -TATAAATAATAGTTAGTTATTTATTATACAAAGTTAGCAAGTTATACGTCTATTTTAGCT -GACTTTATGTAATAGTGGGTATAGTGGGTATACCCGGGATAGGTATACCTACAGGCCTTA -GGCGTTATAGACGGGCACTCGAAAATCCAAATAGCCCACCGCCCACCGTGGACGGGCGAT -GTTGCAGTCTACCTATCGCTCTAGGGTCCGAATCATAGGTAGATTGATAGGTGCATTTTT -CCGTCTACACCAAAAATCTTTCGATGTTGAAGATGGAAAGATTCGCGATATATATCTTTG -GATGTATTACCTATATCGATGGCTTGAATACCGGGATATCTTTCTCTTGGCGAAGGAGGG -TAGGTACTATACTAGGGACAGATGGCGTAGGGATACTTGTTAGTAGCTCGCTGCTTAGAT -TAAAAAAAAAAAGTCCTTCGCTAGATATAGGCGACCTGCTTGAAGAGTACGTTCGACTTG -GTCGTGCGTACAATAAATAGGCTGAAGCGTTAGGCGGACCTGGGATTTTACTTATGTTAC -CTGTAACGGTCGCCGAATACTAGTAAGTTATGTTATTTTCGTGTAACTTCAAACTTGATG -CTAAAATCATATTAAGATATACTAGCAGGGAATTCTCTTGGTATTTCTTATCTGCTGTTA -ACACTCTCCGTAAAAACGGCATTAACAAAATTGCGGCACTGAAGGGTCTGGACGTGCTAG -GAAAAAAGATATCTGATGAACTTTGGGCTAATTCCTAGGAGAAAGAAGGACTGCTTAAGC -GACCTCTCACTAAGTTAGCTCCaagaaaagaaaggaaaagaaaggcaaagaTGAACGGTG -AGATGATTAATTATCTATGCCTAGGTTAGGAATTCTAACGCTGAAAGAGAAAACAATCGT -GACGGTGAAGATAATCAACGCAAACGTCGGCAGAAGCTTGGAAACACTTAATCTGAGGGA -GATGATGAGGATACTCGAAATGCTGCAGTCCGCTTATCTTACTGCATTTATTTGCTCCCA -AGTTGAATCCTCCGACAGGGGCGGCGTCTTGCCTGAATAGCTAGCGGTTGATAAGGGGCT -TGCCATGGTCACTTAGGGCTACATTTTCACCTCCTAACATTGTCAACTTGCAAAAGTCGT -GGAGTCCAGCATCCACCGGTCGAGCTGCCCCACATCAAGTAGCTCCGCGGAACAATATAC -TCTCGACGAGGTGGAGGCCTATATCATGCTCCAACGAGAGATACATTACGAAATGTTATG -TGGTGAGCGAGTGGGTGCCATGGTTGTATGATTATGGGTGAATCCCCACCGGAGGTTGCC -AATAGAGCAGCCTGGGCTGGTTCACTACTGGAAAATTCGAAATAAGTCTGAAGACGGTCG -ACGCGGCTTCCCTACACTCCCTCGGGTCTTGGAATCTGGACCGCACAGCATGTGGTATAA -CACGACAGCTAAAAAATAGATATCTCGAAAGAATACTGAGCCATTAATGAGAGGGCTCGA -GAACTGACCCAGCTGATGAAAGAAACTGCAGCCACCTCGGGTGCAGCGGCGCTTCGAAGC -GCGAGAACATCAACGCGATGTGCTCGTAGGAGACCGGCCCGGACAGGGATTCACGGAAAC -TTTCGAACGACACTGGATCAAGTCCATTATCGTTAGCCTGTAGTACTAGAATGTTGTGAT -TATGTTATATTAGGGCGGGCTAGGAAAACTTACAAGTAAGCAGCCTCGCCTGCGTCCAGA -TGGTCCCAAAATGCTTCAGCTGCAGATGAAGCACGGTGATATCATGGTCGATACTCGAGC -GGAGACTGTCATCTAGCAGCGATCCCGATTTCCTGTGCAGATCGAGGAAGATGAGCGCTG -TGAAAATCGTGAACGTGATAGCCGGGTCTACCGTGAGGCAGAGTGCGCTATCCCACTGGC -CCGCCAGCGCGGCGATGTCCTGGCATGTCTCGAGAATTCGCTGCCAGCTTGATACCCAAT -CGTCTGCTCGCCGCTGGCCGCAATCCACGAGGGAGACAAGCAGCTGCGCCATTCGCAGAT -GAAATACGGTGATAAGCCTGGCATGGTGGTCGACCGGGGTCTCGTTTGAGAACGCGTTGC -GTCGCGGGTTGAGCCAGCCCAGTGGGAGGGCGACCCGGAGTGTAGTTAGCTGGCGCTCGA -CTTTGGCCAGTTGCGCTGGTATACCGGCTTGCCATTGCAGCATATGAATGCGGAGGACGC -TGCCGGCTTGACGCATGGTGGCGATTGTGATGTTATGAATGTTATTCAGCAAGGTCTCCG -GGTCTGATGTGATTACTGGAAGAAGTTCGGATAGACCGTCCAAGTTGGGAGGTAGGAGAA -TTCCCTGAGAGGCACTCTTCGTCTCACTGAAGGTAAAGAAGGTGTTGATGACGGTATCGT -CAATTTGATATGGTGTCCCTAAGGCGAGATTGGAGTATGTGTCGAGTCGGTAGATACACC -ACCATAGCGCCCGCCATTCCTGGATATCTTCATCGCTCACCATGCTCCAATCAGGGTAGA -GCAAGCGGATATGGTCAAGTCGGTCCAGGCCAATACGATATGCCATGCGCGTTACCCTAC -CAATGCGCAACCAGGAGTGGTGGCCTGGGAATTGATGAAACTCGTAAAAGGCCAGTATGC -AGGCTTTGCGGAACTGGTCAAGGGATAGGGCGTCTCCGACCAAGTCATCTTCTACCAGGC -TAGAGCCGAGGAGTGATTCTAGCCCTGCGTCTAAATCTGGTCCTCCATCACTCCCAGCAG -AGTGGGTCAGCTTTGTGGCAATGATGGCCATGGTCGTGATGAGATCCTGACTACATAGCG -AGTCCTGGTAGCGGCAAAGAAACCGCTCCTTCGAGAAAATAGGATTTGCATCCCTAGCAA -TATCGAAGAAGACCCTAACCCTATATTAGTACCAAAGCTTATTAGCTTAGCCCGGGAAAA -TGAGAAAAGGTACCTACAGGTATTCCTCCTTCTGCGAGGAGAAGTACAAGTAACTGGAGC -ACGAGGGGCTGGTGCTAGCGGATTGCGGAGGAACTCCATGGGCCGAGTCATGCGGGACTG -GATCCGCCGGGGTGGGGAGGAAGCCTGTTGGAGAAATTACTAGGTCATGCTCTGGAGGTG -TGGCACTACCGTCTGAGGCTGCTTGATAAGGAGCATTAGATTCTCAGCACCAACTGGAGG -AGGAATAGTCAAGCTACCAGACGGAGTGCGGGACTTGGATCGGCGAGTGGCATATCGCGA -ACCCCGAGGCGGACCAGGCTTTCGTTTTGCTAGTGTATAGGTGCATGGTATTCCCAGGTC -GAAGCAACGAGAGCAGGTCGGTCTCGCTAGGTTGCCTGCGGACGTTTAGTGAACGAGACT -CTCAGATTATTGGGNGGGGGCTGGCGCACACTTGACCTTCCTACTGCGCAGAGAGCTTAG -CGGGAGCACTTTACGGAGTTTCGGGGGAGAGTGGGTTACCGGATGCACCTATCGCACACC -CTCAACGGCATTGTTTTGGGCTCCTGACTCAGCGGACTAGGCGAGGACGGTCTAAGATGG -GAGTAGCTAGCGCTCGGAGGAAAGATAAACGAGCTTGATGAATGTGCAAAAACGATTGCC -GAAGCACAGGTTCAATTATGACATATTCTGCAAGTTCAATCACTCGGCGGCGGAGACAAC -GTGGGTTGGGGTCTGGGCCTAGTTGTCTCCGCGTCATCCAGTGGGCCAAAGTTCCTCCGC -GGCGTGTCTTCTCCACCAATCAGATCCCTTCGTAAAGCATCTCCCATTACTGGCCAATCG -ACTGCTTCAAAGACCCCAGCTTGAATTTGCGATACAGGAACACATTCTGCCCAGGTTCGT -CAGTGCCTTCAATAGTTCACACACGGGGAGACGACTCACCAGCAGCCGCGAAGGCAAAAG -CAAGAAGACCAGGCGAAGCATCCACGCCATTCCTCCCTCCCAGATCTCTGGTCCAGAGCA -TCGGCTCATCAGCTTGTCGACAATCGACGCCGCATACGCGTTCGCCGGCATCCCGTTCGC -ATTGCCCTGGTACTTGATCTTTTCAATCGTACCTTTAATTGGGAGATAAATCGAGTCCTC -TGGCGCGTAGAGGCCGTGGTGAAGGATGTTGCTCTGCACAAACCCTGTCACGATTTCGGT -CACCTCGATCCCTAGCGGTTTCACCTCCTGCACCTTAATTAGAACGACGCCTTCCAACTA -CGGGTCGTGTGCTTACCAGTCGAAGGGTCCTCGAATACTGACTGAGTGCAGCCTTGGACG -CATTGTATGCGCTCTGCCACACGACTGGGATGTTCCGCGTGACGCTTCCAATCTGCACAA -TGTGTCCTCGTGGTGACTGACGCAGAAGGGACAGGAAAATCTGGCATAAGCGCATGACAG -CAAAAACATTGACTTGAAAAAGCTCTACAACTTCGTCCATCTCCAAGTCCATTGCCATCC -TTGCGTAGTGCGTTCCCGCATTGTTGACCAGATAGTCCAGGCGCCCGCCTGTTTCTTTCA -CGACAGCATCTTTTAGCCGTGTAATGCTTTCTGGATCGCCCAGCTCAAGCGCCAGGGCTC -TAATGTTGCTAAATTTGCTTGTGAGCTCGTTGAGAGACTCGGTGCGGCGTGCTGTGGCGA -GAACAGTGACGCCCCTGCTTGCAAAGGCTATTGCAAGCTCTTTGCCGATTCCGCTGCTGC -ACCCGGTAATGAGCGCGAAGGAATCCCGGTCTGTCATGGTGTGGTTGATGTGTACAGAGA -GGCCCTGTTGTTGGGTCTTGATGGTCTTGGGAGTTGTCGAGTATAAGGCGTATATTTGTG -TGCGAGATACTTGATTGCCGGACACTAAAGATGTAAAATATATCCCTTAACCCAGGCTGA -GTTTCTCCGAGTAGAAGTCTTCCATGTGCATTACGGAGGAATCTTCCCTCGGCAGTAAAA -TAGTACAGTACTAACACCCGGTCATACCGAGGCAGAAACGAATTCTCGTTAAGGCATATA -CAAGTGGAGCGATATGATGCAGCGAGTCTGAAGCACACGGCCTGGAGTACTCCGTATACT -GGTGAACAATGTGATCCCCATGTCATTCGTCCAGTAGGCGGCAGTCCAGCTCCAACCACT -TGTCCCTCTGAACATGATATAACCCGCATATCTTCGACTAGAGTAGCGAGATAGCCAGTA -AGTGATGACGATTTCATGGTGGCTGCCGGCATATACCTTCACATGGTTCGACGCTAGGCT -CTCGCGGCCCCTTCAGGCTGTATTTGAGGTCAATCGTTCCTGGACAGGACCAGAATTGTT -TGTTGCCAACTGCTCTTGCCGAATCCTGATTCAAACAACAAATTTCCGAAGACAGCCATT -CTCTAGCGATGGATTTCCGCCTGATCTGGCTCGCTGGGCTCGCCCTCGCAGCACTGGTAG -CCCGATCAATCATATTGAAGTACAAACACTCCTCGCGGGCTCGGGCACTGATCTGCAAGG -CCCCACTTACGCTGTTTCACGCTGTTTCCTGGATTAACCGGCGCAGTAAAGATGATCAAA -GAAGTCAAAGCCGACAAGTTCACCGAGTATATTGCCTCCCTGCATGAAGCCCACGGGCCC -ACCTTCAGGGAAAAGATGATTGGGAACAATCTCATCTCGACCATCGATCCGGCAAATATT -AAAGCCCTGTTGGCTACTCAATTTAATGACATTGGATTGGGGATTCGCTACCGACAATTC -CATCCTCTCCTAGGGGATGGAATATTCACGCTTGATGGCAAGGGGTGGTCCTTTACGCGG -GCAATGATCAGACCTCAGTTCGCGCGAGAGCAGGTATGCAGAAGTCATTGTATCCAGGCC -AAAATCAGGTCTAATGAAGTGAATTGGACGGATAGATTGCAGACCTCGACCTTTTCGACC -GAAACGTCGCGCAAATGGTGAACGTCCTCCCCTGAGGCCACTCAGGCTTCGACATCCAGG -AGCTCTTCTACCTCCTCACCATCGACTTCGCAACCGAGTTCCTCTTTGGCGAATCCACGG -GATCCCTGCACGCATGCGGGAGCCCGTCCAATTTCCGCAACGCAAACGCCAAAGTCCACG -AAGTCGTCGACCACTACGTGCAGCTCGCGCTGCTCTCCAAGCACGCCGGTAAGACCCAGA -AAGTTGGCCATGGACATGGCCCGGGCCTCTATGTGTTCCTCAAAGCGCTACGTCGGGGAC -AGATGACCCCAAGCTCATCCGCGACAACCTGCTCAATGTACCTCTTGTGGGGCGGGACAC -AACAGCTAGCCTGCTCAGCTCTGTCGTCTTCTTCCTTGCGCGCAATGGCAGGTTTGGGAG -AAGCTGCGCAGGGAGATACTGGGCGAATTCGGCGACTGCCACAGACCGGGGAACGGAATT -ACTCATGCGAGGGTGAATGATCTTACTTATTTACGTTACGTTCTGAAAGAGAGTGAGTTT -GTCGTCCTCAATCCCAAATTCTGCTAGACTGACCCTGTTAGCGCCCCGACTGCTCCCACC -AGTCCCAGTGAACTTCCGGCAGGCCGTCAAAGACACATCCCTACCGGTTGGAGGGGGCCA -AGCTCGCAAAGCACCCATTTATATTAAGAAAGGTGACATAGTCGGGTCCAATGTCTACGC -AATCCACCGCAGAACCGACATCTGGGGTGCCGACGCCCACGTGTTTCGCCCTGAGCGGTG -GGCGGAAAACCCGCCACGGGGATGGGAGTACCTGCCATTCAATGGGGTTCCTCGAATTTG -CTTAGGACGTGAGTCTCCCTTCTCTGGATTATATCAGGTTCGTGATAATTGACTTGTCCA -ACAGAACAATACGCGCTGATAGAGGCTAGTTACATGATTATTCGGCTGCTACAATAATTC -AATATCTTGGAGAATGCAGAGGAGGATCAGTTTGCAGAGCCGAGGCTGCAGGCAAATCCG -GTGCTGGCTCATCGCGATGGAATCCATGTGAGGTTGTATTCTTCTACCACAGCCAAAGGG -GGAAAGGGCGGCTTGAACGCGGTGCTCGAAGAGCCGCTGTGAACTTTCGTAGCAGTATAG -GCTTTATATTCTCGGGAGATTATGCTGGATACGCCAAGTTATGCCGAGTAAGAATAACCG -ATTGTTTGAGGTAGTCATACTATGAGTAATCCGTCTTCACGGTAGTGGGGCTTACAGGAT -AAACCCAGCTGATCTTGCCCTTCATCCGCTCCTCTCCAGTTACATAGTCCAGCACTTGGC -CCTTGCGAAACTCCGGCGCCATAACATACTTCATAAACAAGACTTCAATCGTGTGCGGCA -CCGAAAGGTACCGCGAAAGCAGATAGTAGCAGGGGTTATGCCACGCATGCATCCTCGTCT -CAAGCCCAGACTTCGCAACATCCCCTTCAAGCGAGCACACAGAGCTCATCCGGACCGCTT -GATAATTCTCAAAGATCTCTGTCAGTACCTTAGCACGGGGCCCCTGACTCCCAGACGCTG -CTCGAACAGCCTTGCGCAGGCTATTGCATAACACCACGATATCCTGAATTCCATGATTAA -ATCCCAGACCCAGATGCGTGGTCATCTTGTGGCACGCATCACCGACAAGCACAATCCGCC -CGAAGCTCCAGTGCTTCACAATTCCCTCATCCAAATTCGTCAGTCCCGCCCCCAACATTC -GAGGCCATACATCCTTGACCTTGACCGTCTGCGTCAGCGGAAACTCGGCAAACTCCTCGG -CGACCGCGTCGACGTCCTTCTGCGTGTAAATCCTCCGCTCGCTCGTTGGCCTCGGTAGCC -GCTTATACAGGAAGAACCACCCGCGATCCGTCCCGCTAAAGTACATGATCGCCTTCCCCT -TGGATTGGATATCGTACCCCTGGCCCGCCGGTGACGGCGATGGGAATGACCCGAAGAGGA -GCTGGTATGTCGCCGTGTATGGGTGAACGGGGTCCCACGAGCGAGTCGCGTTCTCCTTCA -GGGCGAGGTCGCGCATTATCTGCCGGGTCTTGCTGTAAACGCCATCGGCGCCGATGATAA -CTGAGCCGTGGAAAACACTGCCGTCGGCGCAGGTTGCGGTGACGCCATCGTGCGTTGTTG -TTAAGTCGGTCAGTTTCTTGTCTGTCAGGATCTTTTCTTTAGCTCCTTCGGGGAGGCCAT -TGTACATGCTTTCGAGGAGCTCGGCGCGGTGGAATGCGACTGGGCCGTGGCCGTGGCTAG -AGAACGAGACAGATATTAGTCGCAATACCTAGGGGAGAATAGTAACCATGGTGCTTTTTG -TACGGAGTAATGAGAACTCACTATTCACGTATCAGGCCATAGCGATTCCCCTCGGCGAAG -ACGTGTCCGTCGGCCGTAAACGACAGGTGATGGTCTAGCTCCACGCCGCGCGGCAGCAAG -TCGTCGAGGATGCCAAACTGATGCAGGACGCGCAAGGTATGGGGATGAACGATGAGGCTG -GCGCCCCTGTCCTCGAAGATTTCCAGCCGGCGCTCAAGGACGACAAAGTCGATGCCGGCT -AGATGGAGGGCGTGTGCGGCCGTGAGCCCAATCGGGCCTCCGCCTACGACGATGACTTTG -AATTTGGTGCTTCGGCTGCTCATTTTGGGGCTTAGGTTGCTGACCAGCTATGCACTGTGT -TAGGAGCGATGTTCTTTTGTACCGTGCAGGTCTACACCGGGGGAGTCATACGGTCGGTTT -TTGCGCCGAGGGGTCACAGGATGGGCTATCTGAGGGAGAATAATCCATGGCAGGCCGAAT -GGTGCTATTATAAAAGTTTCTGGGAGCTCGTTCGGGCCCTTGTTCGAGTATCAGAGCGCT -CTCAGAGACTGTGAGGAATATGTAGCCAATGGGCGCTGCTTGTCTGGGGAAGATCCTCCG -AAGAAGTTGTACCGAGATGAGCCCAAGGCCTAAATTTCGAATTGAATGTTTCTCCGCGGG -AGGATCATCAAATGCTTCAATATGCTCCGCTGCAGACCTATAGCGCGAAAGGGGCCGCTT -ATCCTGTCCGTCAGTAATAAGACCTCGAAGCGTCCCCGAAGCATCCTCGAATCACCCTGA -ACGAACGTCACTAGCTCGCCCGTCGCCATGTCCAAACGCGTCTCTGTGATAGTCATCGGC -GGCTCGCACGCAGGCCTGGCGGTCAGCCAGAAACTGCTGCGCCAAACGCCCAAAGCCGCC -ATCACGCTTATCAGCCCCTCGGATGAGTACTACTTCAACATCGCCGCGCCCCGCTTCCTT -GTCAAGCCCAAATGCCTGCCCCCCAGCAAATACCTATACTCGATCCCGGACGCGTTCCGT -GATTATCCCGCGGGCTCTTTCACCTTCGTCAAGGGACTAGTGACCGAGATTGACTACACG -ACCAAGTCGGTTGCCGTTGCTCTGTCTGCAGGCTCCCTTGCTGCTGCTGCCGTGTCTTCG -TACAGTTTCGACTATCTCGTGATCGCGTCTGGCAGTTCGACGCCTGCAACGCTCGGACAG -GAGGGCGTGCGGCTTCCGTTCAAGGCGACGGCGTTCGAGGATATAAGGAAGGCCATTTAT -GAGGCGCAGGCGAAGCTGGCAGGCGCCCAGAGAATTGTAATCGGTGGTGCAGGGCCCCTG -GGCATCGAAATAGCCGGGGAACTGGCCGAAGCGCCAGGATCGAAGAAGATCACGCTGGTG -TCTCGGCCAAATGTCTTGCTAGAGGGGACGACTGCGGCTATGCAACGAACGGTGATGTCG -CTTCTGAAGTGGAAAAACGTCGATATCTTGACCGGGACCACGGTTGAGGAGTCCGTGTAC -GAACCCGACACCCAGACATGGAGGGTCAAGTTGTCGACTGGGAAAACATACACTGCTGAT -GCGTACATCGCGACCACGGGCACTGTTCCGAATAACGAGTTTATCCCAAAGGGCTGCCTC -AATTCCCAAGGCTGGGTCAACGTCGACGAACAGCTTAGAGTTGTCGAGAACGGCGTAAGT -CGCAATGACACGTATGCGGTCGGTGACATCACTTGCCATCCATATCGACTGCTCTCGCGA -GTCTCCCTTCAGGGACAGACGGTAGCATCGAATATTGCGGCGGCCATTGAGCGGAACTCC -CGGATCATGACGTACTCGACAGAAGCGCAGAAGAAGATGATGGTCGTCCCCGTGGGGCAG -TCGACGGGAACAGGGCACCTCGGTGGTTGGACACTGTTTGGATGCCTGGTTTGGTTCTTC -AAAGGCAAGGACTTTTTGACTTATGAGGCGCCCAAGTTCTTGCGGGGTCAGATGCGATGA -TTGTATGATGATTGTACTAGATAATTATTCTATGCTATTAGAAGAGTTCAACTGTGATCT -TATATTTCCGTAGTATATCTGCGTACATCTCTAATACTTTTCTCCTACTTGGCCTTATTA -TCCCCAAAGTCTCTGGGCAGCTCAGGCCCCCAAACAGACAACGGCCCAACTGGCTCTCTG -CGCGTCTGGTTGTTGCAGTTCACAACATCCCCATCGGCGTAATGCTCAATCTTGAATTTG -GAGCTGTCATACCAGTAATCGAAAATCTGGCTCCCCAGGATATGTCTTCCAACACCCCAA -ACGCTCTTCCACCCCTTCTGCGCCAGATAATGATGCCCCATCAGCTGCGTATCAAAATCG -GCCACCTCAAACGAGGTATGATGCACGTACGTCTTGCTCACAGTTGGCGAAGCCCGTTGT -AGGAAGAACGTATGGTGGTCTGTGTACTCCTCGCCCAGGTCAAGGTGCATAAAGGTCATA -ACATCCATCTGAGGGAACTGCGGGAGATTGAGGATATCCGAGTGCACAAAGTTGAAGTTG -TTCGTGTAGAACTCAAGCTCCGTCTCGAACTCCTTGCACACGTAGCCGTAATGCCCGAGC -TTGTGCACCAGCGCCGGCCCATCGCGGTAGCGCTGGAATTGTCCCTTGCGCGGTTTCTCA -AATGGCGTATTGAACGGACCCTGAAAGTCGTGTGTTGCTGTTGGTGTCGTCTTCGACTCG -ACATCCCGCTCCTTCTGCCCGTACACAACGTAGAGGAATGTCCCATTCGGCCTCGCAAAA -GTCACCATCTCGCCGGCCCCGGGTGCCTCGTCCAGCTTCCTGCGCACTGCGCCGGGCATC -CCCGCGGCTTTTTCAAAATTCTCCTCATCCTTGGCGACGTACGCAGCGCCCAGGAAGGTG -GATCTCCGCGTTGGGCTCTGTGACGCGACGTAGACGTAGGGGTCCCTTCCGTACCCGCGG -TAGTAGATTGCGTTGCCCTTGCGCGTGGCCTCGACGAAGCCAAAGTCCTCGGCGAACCTA -GTAAAGGCTTCAAGATCGGCGTGCTCGAAGTAGACGTGGGCGATTCGGCTGAGCTGGACT -TTTTGAGGCGAATTCTCGACGACGTGGATGGTAGATGAAGGGAGGATCCAGGTTTGCCAT -TTCGATGGGAGGAGGGTGATTATTTCGAGGAGCCAGAGCGACATTCTCCTGCGTTTGGTT -CTGTCGTCCTTCTTGGAGAGTGTATGTGGGTGAGAGAGAATGAGTGCGTCTGAATAGGAT -AATCACCAGAGTATGTATACAGTGATAGCACGGTATCTCTATCTCCACGAGATCTTTTAT -ACGTTCATGATGCTGAATAGGCACTGCATCTTGTGACGACCGAGGCTCTCTGGTCTAGAT -GATAGATTTCTGGGGGATCCGACTTGCCGCGGAGGAATAGAGAAGGGGCAGCAGCCCTAA -GAAGGAGGGGATTACGACTCAGGAACGCGCAACGAAAAGATCCTCCGAGGAGGAACTCCG -GAGTCCTGAGCTTGAGGAGCCAGAGCCTGAACTGGGACATACCCAACTTAAATCTCGGCC -TTCTTTACTCGTACTCCCTTGTCAGTGCTTTTCCCGATCCTTCCCCAGTTACCATCCCGC -ATTCGCACCCCAGACAATGAGTTCATTCCAGTATCTCTGTCGTTTCTCTACAGAGAACGG -CGAGGCCTTCTTCGCCAAATGCGCGTCGGTTAAGCCTACCATCGGAGCATTGGTAGACGC -ATATCCAACGTACAAGGACCTGGTGAATGGCAATAATGCGACCACAGCCACTATAGCCAA -GGTGAGCTGTCGCTTCTCTCCTGCTGTCATTGGATTGCATGGCTGACGGGACACGGTCTA -GCTCTTACCACCTCTGCCGCAAACAAGCAGCTCAATTTACTGCGTCGGGCTCAACTATAA -GAGCCACGCGAAAGAAGCAAGCGTACGCCGCCTCACATACGACTACATAGAGACGTCGCA -TCTAACGGTTAACAGCTCAACGTCCCCGCCAACCCACCCGTATGGACCAAGCCCCCCGCC -GCGCTCGCATCGCCAAACGAAACCATCCCTATCAGTCGTTTCTGCGCCTCGCATCTTCCA -GACTGGGAGGTGAGCCCGTCCACAATAATCCAGCCAGCGTAACTGTCACTCACAGAAGCA -GGGCGAACTCGTCTTCGTTACGTCGCGCGAGTGCCGCGACATCACCCCAGAGCAAGCCTC -CGAGTACATCCTCGGATACACTATCGGAAATGACCTCTCGTGCCGTTTCTTCCAGCTTCC -CGAGCAGTCCGGTGGGCAGTTCTTCTACGCAAAGGCGTTTGATAAGTTTGCGCCGGTCGG -CCCGGTCCTCGTGAGCCCGAGTGTCTTTGCGAAGGAGAGGGCATCCGCTACGCTCGTCAC -CCGGGTCAACGGGGAGGTCAAGCAGGATACGGTGATTGGGAAGGATATGATCTATTCGCC -GGAGAGAATCCTTAGCTGGATGTGTCAGAGTATGCTCTTTTTCTGTTTATTCTACTTTAT -TTCATTTTTCGTGAAGCCTTCTCCTACATCTCTAACACGAATCCGCATCCAGGTACAACT -ATCCCAGCTTACACGGTAGTCATGACGGGTACCCCGGCGGGGGTGGGGGTATTCCAGAAG -CCGCGGCAGCTATTGAAGGATGGGGACCAGGTCGAGGTTGAGATTTCAGGACTGGGGACG -CTGAAGAACGTGGTGCGATTTGACGAGGGGCAAGATTCCATCATGTAAGATTACACTTTA -GGACGATGAACACGGCGAACGCTCGCTTGTGTCAAGTTTGAGAATCGAATACTGATGGAC -ACTGCTTTGTTCGTTTCAACAGAAAACTTATATGGACGCTGGTGAAAGCAAAGCTCTGGT -ATAGGTGTAGTTCATGGCTGATATGAATGCAGTATCTGTACTTGGCGTTCCGGCTTTCGG -GCCATAGATTATGTGTCTGTCTGTGTTTCAACCCTCTGGCATGCAAATGCGTGTTGATCG -ACCACCAAAAAAGAATGCAACATCCCGGGATTTTACGGATTTGTATTCATGCCGACCCGT -ATGTAGGAATACACGACATATTCTATAAGACAGAAAATCGAAGCCGCTAGTAGGTTCCGT -CAAGCCTGATGCAGGCATTAAGCAGAATAGCTGATGTGCCGCTGGCAAGATCATCAATAT -CGTCTACATTCCATCGAGAAGGGTTTACTTTCTATCGGAGCAAAGTAAAGTTATGCTTTT -GGAGGAATTTGTCCCTGCAATCCCATGTCGTCTGCTTGTTTCCCACACTTTGTGAATTCC -ACTTGTTGGTATCCCGAATATCATTCTCATTGCCACCGACGCAGAATACCCCAACCGGAC -CCTCGATTGCAGGGAGAGATGTTTCTTATGCTTATGAGTCTGTGACGTTCATCACGGCTA -GATTGAGGTAGAAATCATCGCTTAGAGCGGCGGCAAAATTCAAGAGGGGAATATCCATCG -TGGTCGCAAGCCACTCTGGGGTCGTTCGTCCCCTGTAAATTGGAATCTGAACGTGGACTG -CCAGACTCTGACCGCACACACATCTGGAGAAGAGCAGCAAAAGCCAGTAGGTGGTCTGTT -CGAAGACGCCCTGCTTATTCGTCATCAATGGGGCGATCACGTTCACTCTCTGCGCGATCG -TGGCTATGTCGATATGCCGTGCCTGTCGCACGAACACATTGAGCCACAACGCCACTACTG -TCATGTCTGAATCATCGTCAAGCTCTTCTTCACCCTTATTACCCGGACCTCGGATGTGAT -TCCAGATGTTCCACTCATCGAAGGAGATCTTAAGTCTGCGGGGGGCTTTTGGTTTTGTCA -CAAAGTGTGGGAAGACTTCGTAGTCCATCTCGGTCCGGGCAAAATCACTTAGGGCAGCAG -TAATCTCGATAGCTTTTTCGGCAGCTTTGGGGCCAGTGACATTAGGATAATGTCTAGCTT -GGCACGAAAATAGAAGGAGGTAATTAACCTTTTTGTTCCAATGTGGAACCTGTGTTGGTG -CTCGGTAGACTTCCGTAcaacccactctaaccaggacccaacctaacctaacccaaccca -aAATGGGTTTTGGGTCCATTACTTCAACCTAACGATGGGGTTTGGGTTAACGTATTTCAC -AAGCGAGCCGCCATCACAAGCGAGCCGCCACCCCAACATAACCTAACAATTTTAAATGCA -TTTATCTCAACCAAAACGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNTGCTTCCACCGGCTCAGTAATCAATTGAGAAGCTT -CTTGAACGGACAGGCCTTCTTCGGCGGGTATATGCTTCCGGGATCTAGTACGCTTTTACT -TCTGCTTCTCGTTGACAGCGCGTAATTCCCTATTTTCTTTGGTGAGTAAGATAGCACCTT -GCATTGTAAGACGGAAGCCTTTCACAAGCTGGTTTAATACACGATCTGACGGGCTAGGTG -GACTCCGAGATCTTGTACGAAGTAAAGCTTTGATCGATGAAGCCTATCGACGTAGTTCTT -TCTCTGTAAAGGGGGTTTTTGGGGTGAAATTCCGGCTTGATTCACTACCCCGGCTAGGTG -GGGGGGTCGGGGTTCGAAGCTGAATATTAAGCTTTGAAATCACTCTCTCCGGTGAAAAAG -GGACTAAACCAGCTGCTCCGAAGCTATTTTTGATAGTCTCCGATTTAAAAGCTTCAATTC -GTGCGAGGGGGTACGCTTCGAGGAAATCAAGTTTATCAATATGATTTATCCTAACCCGCA -TTTTTGATTCAACTAACCGGCCGTACGCACGTTTTAACACCGTAAAACAGCCGATATCGA -GAGGTTGTAAAAGGTGAGACGAATGAGGAGGTATATAGATTGGTATCACCTTATTTTCTT -CACAGATTTTGTCGAACCTCGGGGTTAAATGACTTCCGTGGCCATCGAGTATCAAAAGCC -GATACCCCCCCTTTATACGTAAAGAAGTAGTAGGAATAAAGAGCTTTTTAAGCTAGCGAA -GTCTAATTTCATTAGAGGTCCAGCCGTTAGGACTAACTTCAAAGCGCCAATCCTCCGGGA -GGCCGTTAAACTAGGATTTAATAAAGACTTTACCTTTAAAAATCACACACGGAGGAAGAG -CCTACCTAGAAGCATTTATACACTTAATCACCGTTACCCACTCACGATTTCCGGGCTGTA -AAAGAGCTCGCTGACTGTAGAATTCGCTTCTCGAAATCACCTTCGCGGTTACGGTTAATC -CTATTGCAAAACCGGTCTCGTTAAAGTTATAAACGTCGTCTGGGTCGATCCTAAATTGTA -AGATAGTTTTTTAAATAAGATTAAACCACTCTGTAATAATCTTCGGGTCTTTATATTTCG -CACGCTCGTAGTTATACCGTTTCGAAAACCGAGAGGATAGAAGAGGACGTCTTTTCACAT -AGTTCGTCACCCAGTTTTCGCCGACCAAAAGAACCGGGGTAGTTCCGCGCTTTTCGAGAA -GAAGATTCGCCATTTCTCGTACCATAGAAGGCCGGGGGGCGCCTCCACGTCGATCCATAG -AGAGAATCCATTTTTCAAGAGATTCTTCTTCTAATTCTGTCAATTTATGTGAATTTGCGC -GTGTAGTGCTTCGTTCGGTGTGGCCGTTAAGACGATCGCGTAGGGTAGAACGGGGTATAT -TGAAAGTACGGGCAGCGGCAGCAATGGTACGAATTTCTTACTTCTTAATAGCCTGGATAG -CTAATAAGATCCTACCCTCTTGCTCTATTAAATTTTTAGAGCTTCGGGAGCGAATTGGTG -GCATGGTGGGTGATTGAATTGACCAAGTGCTAATTCCTGGACGCGTTTTGGGTGAGGTGG -CGGCTCGCTTGTGGTGGCGGCTCGCTTGTGAAATACGTTA diff --git a/annotation/Tools/test/gff3_sp_fix_fusion/genome.fa b/annotation/Tools/test/gff3_sp_fix_fusion/genome.fa deleted file mode 120000 index b7b2d6853..000000000 --- a/annotation/Tools/test/gff3_sp_fix_fusion/genome.fa +++ /dev/null @@ -1 +0,0 @@ -../genome/pcoprophilum_scaf_1.fa \ No newline at end of file diff --git a/annotation/Tools/test/gff3_sp_fix_fusion/result-all.gff b/annotation/Tools/test/gff3_sp_fix_fusion/result-all.gff deleted file mode 100644 index 49c103207..000000000 --- a/annotation/Tools/test/gff3_sp_fix_fusion/result-all.gff +++ /dev/null @@ -1,33 +0,0 @@ -##gff-version 3 -Pcoprophilum_scaf_1 maker gene 94834 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.1 -Pcoprophilum_scaf_1 maker mRNA 94834 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;_AED=0.02;_QI=18|1|1|1|0.66|0.5|4|2137|248;_eAED=0.02 -Pcoprophilum_scaf_1 maker exon 94834 94884 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:3;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker exon 94935 94959 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:4;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker exon 95013 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 94852 94884 . + 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 94935 94959 . + 0 ID=IDmodified-cds-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 95013 95701 . + 2 ID=IDmodified-cds-2;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 94834 94851 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:3-utr5-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 95702 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-utr3-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker gene 96315 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker mRNA 96315 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1;_AED=0.09;_QI=0|0.5|0.33|1|1|1|3|62|405;_eAED=0.09 -Pcoprophilum_scaf_1 maker exon 96315 96739 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:83;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker exon 96810 97424 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:82;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker exon 97540 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:81;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96377 96739 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96810 97424 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 97540 97779 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 96315 96376 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker mRNA 96377 96707 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker exon 96377 96707 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5;Parent=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96377 96691 . - 0 ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-cds-1;Parent=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 96692 96707 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-utr5-1;Parent=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker mRNA 96708 97321 . - . ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker exon 96708 97321 . - . ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5;Parent=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96708 97283 . - 0 ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-cds-1;Parent=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 97284 97321 . - . ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-utr5-1;Parent=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker mRNA 97399 97915 . - . ID=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker exon 97399 97915 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 97450 97854 . - 0 ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6-cds-1;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 97855 97915 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6-utr5-1;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 97399 97449 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6-utr3-1;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 diff --git a/annotation/Tools/test/gff3_sp_fix_fusion/result-intact.gff b/annotation/Tools/test/gff3_sp_fix_fusion/result-intact.gff deleted file mode 100644 index f5b4dee31..000000000 --- a/annotation/Tools/test/gff3_sp_fix_fusion/result-intact.gff +++ /dev/null @@ -1,10 +0,0 @@ -##gff-version 3 -Pcoprophilum_scaf_1 maker gene 96315 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker mRNA 96315 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1;_AED=0.09;_QI=0|0.5|0.33|1|1|1|3|62|405;_eAED=0.09 -Pcoprophilum_scaf_1 maker exon 96315 96739 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:83;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker exon 96810 97424 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:82;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker exon 97540 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:81;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96377 96739 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96810 97424 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 97540 97779 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 96315 96376 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 diff --git a/annotation/Tools/test/gff3_sp_fix_fusion/result-only_modified.gff b/annotation/Tools/test/gff3_sp_fix_fusion/result-only_modified.gff deleted file mode 100644 index 85061fc39..000000000 --- a/annotation/Tools/test/gff3_sp_fix_fusion/result-only_modified.gff +++ /dev/null @@ -1,25 +0,0 @@ -##gff-version 3 -Pcoprophilum_scaf_1 maker gene 94834 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.1 -Pcoprophilum_scaf_1 maker mRNA 94834 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;_AED=0.02;_QI=18|1|1|1|0.66|0.5|4|2137|248;_eAED=0.02 -Pcoprophilum_scaf_1 maker exon 94834 94884 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:3;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker exon 94935 94959 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:4;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker exon 95013 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 94852 94884 . + 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 94935 94959 . + 0 ID=IDmodified-cds-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 95013 95701 . + 2 ID=IDmodified-cds-2;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 94834 94851 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:3-utr5-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 95702 96376 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-utr3-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker gene 96315 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker mRNA 96377 96707 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker exon 96377 96707 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5;Parent=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96377 96691 . - 0 ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-cds-1;Parent=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 96692 96707 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-utr5-1;Parent=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker mRNA 96708 97321 . - . ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker exon 96708 97321 . - . ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5;Parent=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96708 97283 . - 0 ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-cds-1;Parent=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 97284 97321 . - . ID=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5-utr5-1;Parent=new1_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker mRNA 97399 97915 . - . ID=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker exon 97399 97915 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 97450 97854 . - 0 ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6-cds-1;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 97855 97915 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6-utr5-1;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 97399 97449 . - . ID=new2_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6-utr3-1;Parent=new4_genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 diff --git a/annotation/Tools/test/gff3_sp_fix_fusion/result-report.txt b/annotation/Tools/test/gff3_sp_fix_fusion/result-report.txt deleted file mode 100644 index 11f63a77f..000000000 --- a/annotation/Tools/test/gff3_sp_fix_fusion/result-report.txt +++ /dev/null @@ -1,9 +0,0 @@ -usage: /Users/jacda119/git/NBIS/GAAS/annotation/Tools/bin/gff3_sp_fix_fusion.pl --gff test.gff -f genome.fa -o result -Results: -1 genes affected and 1 mRNA. - -/!\Remind: - L and M are AA are possible start codons. -Particular case: If we have a triplet as WTG, AYG, RTG, RTR or ATK it will be seen as a possible Methionine codon start (it's a X aa) -An arbitrary choisce has been done: The longer translate can begin by a L only if it's longer by 21 AA than the longer translate beginning by M. It's happened 0 times here. -Job done in 1 seconds diff --git a/annotation/Tools/test/gff3_sp_fix_fusion/test.gff b/annotation/Tools/test/gff3_sp_fix_fusion/test.gff deleted file mode 100644 index 090a19392..000000000 --- a/annotation/Tools/test/gff3_sp_fix_fusion/test.gff +++ /dev/null @@ -1,21 +0,0 @@ -Pcoprophilum_scaf_1 maker gene 94834 97915 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.1 -Pcoprophilum_scaf_1 maker mRNA 94834 97915 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1;_AED=0.02;_eAED=0.02;_QI=18|1|1|1|0.66|0.5|4|2137|248 -Pcoprophilum_scaf_1 maker exon 94834 94884 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:3;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker exon 94935 94959 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:4;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker exon 95013 97321 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:5;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker exon 97399 97915 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:exon:6;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker five_prime_UTR 94834 94851 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:five_prime_utr;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 94852 94884 . + 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 94935 94959 . + 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 95013 95701 . + 2 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 95702 97321 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 97399 97915 . + . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.1-mRNA-1 -Pcoprophilum_scaf_1 maker gene 96315 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.6 -Pcoprophilum_scaf_1 maker mRNA 96315 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6;Name=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1;_AED=0.09;_eAED=0.09;_QI=0|0.5|0.33|1|1|1|3|62|405 -Pcoprophilum_scaf_1 maker exon 96315 96739 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:83;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker exon 96810 97424 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:82;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker exon 97540 97779 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:exon:81;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 97540 97779 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96810 97424 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker CDS 96377 96739 . - 0 ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:cds;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 -Pcoprophilum_scaf_1 maker three_prime_UTR 96315 96376 . - . ID=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1:three_prime_utr;Parent=genemark-Pcoprophilum_scaf_1-processed-gene-1.6-mRNA-1 diff --git a/annotation/Tools/test/gff3_sp_fix_longestORF/genome.fa b/annotation/Tools/test/gff3_sp_fix_longestORF/genome.fa deleted file mode 120000 index b7b2d6853..000000000 --- a/annotation/Tools/test/gff3_sp_fix_longestORF/genome.fa +++ /dev/null @@ -1 +0,0 @@ -../genome/pcoprophilum_scaf_1.fa \ No newline at end of file diff --git a/annotation/CheatSheet/Bash_cheat_ sheet_level1.pdf b/annotation/knowledge/Bash_cheat_ sheet_level1.pdf similarity index 100% rename from annotation/CheatSheet/Bash_cheat_ sheet_level1.pdf rename to annotation/knowledge/Bash_cheat_ sheet_level1.pdf diff --git a/annotation/CheatSheet/Bash_cheat_ sheet_level2.pdf b/annotation/knowledge/Bash_cheat_ sheet_level2.pdf similarity index 100% rename from annotation/CheatSheet/Bash_cheat_ sheet_level2.pdf rename to annotation/knowledge/Bash_cheat_ sheet_level2.pdf diff --git a/annotation/CheatSheet/Bash_cheat_ sheet_level3.pdf b/annotation/knowledge/Bash_cheat_ sheet_level3.pdf similarity index 100% rename from annotation/CheatSheet/Bash_cheat_ sheet_level3.pdf rename to annotation/knowledge/Bash_cheat_ sheet_level3.pdf diff --git a/annotation/CheatSheet/Bash_lecture.pdf b/annotation/knowledge/Bash_lecture.pdf similarity index 100% rename from annotation/CheatSheet/Bash_lecture.pdf rename to annotation/knowledge/Bash_lecture.pdf diff --git a/annotation/CheatSheet/README.md b/annotation/knowledge/README.md similarity index 100% rename from annotation/CheatSheet/README.md rename to annotation/knowledge/README.md diff --git a/annotation/CheatSheet/anchors-in-markdown.md b/annotation/knowledge/anchors-in-markdown.md similarity index 100% rename from annotation/CheatSheet/anchors-in-markdown.md rename to annotation/knowledge/anchors-in-markdown.md diff --git a/annotation/CheatSheet/annotation_tools.md b/annotation/knowledge/annotation_tools.md similarity index 100% rename from annotation/CheatSheet/annotation_tools.md rename to annotation/knowledge/annotation_tools.md diff --git a/annotation/CheatSheet/annotation_tools_chloro.md b/annotation/knowledge/annotation_tools_chloro.md similarity index 100% rename from annotation/CheatSheet/annotation_tools_chloro.md rename to annotation/knowledge/annotation_tools_chloro.md diff --git a/annotation/CheatSheet/annotation_tools_mito.md b/annotation/knowledge/annotation_tools_mito.md similarity index 100% rename from annotation/CheatSheet/annotation_tools_mito.md rename to annotation/knowledge/annotation_tools_mito.md diff --git a/annotation/CheatSheet/annotation_tools_plasmid.md b/annotation/knowledge/annotation_tools_plasmid.md similarity index 100% rename from annotation/CheatSheet/annotation_tools_plasmid.md rename to annotation/knowledge/annotation_tools_plasmid.md diff --git a/annotation/CheatSheet/bash_other.md b/annotation/knowledge/bash_other.md similarity index 100% rename from annotation/CheatSheet/bash_other.md rename to annotation/knowledge/bash_other.md diff --git a/annotation/CheatSheet/bioconda.md b/annotation/knowledge/bioconda.md similarity index 100% rename from annotation/CheatSheet/bioconda.md rename to annotation/knowledge/bioconda.md diff --git a/annotation/CheatSheet/cigar.md b/annotation/knowledge/cigar.md similarity index 100% rename from annotation/CheatSheet/cigar.md rename to annotation/knowledge/cigar.md diff --git a/annotation/CheatSheet/git.md b/annotation/knowledge/git.md similarity index 100% rename from annotation/CheatSheet/git.md rename to annotation/knowledge/git.md diff --git a/annotation/CheatSheet/gxf.md b/annotation/knowledge/gxf.md similarity index 100% rename from annotation/CheatSheet/gxf.md rename to annotation/knowledge/gxf.md diff --git a/annotation/CheatSheet/perl_code_profiler.md b/annotation/knowledge/perl_code_profiler.md similarity index 100% rename from annotation/CheatSheet/perl_code_profiler.md rename to annotation/knowledge/perl_code_profiler.md diff --git a/annotation/CheatSheet/pictures/gff_history.jpg b/annotation/knowledge/pictures/gff_history.jpg similarity index 100% rename from annotation/CheatSheet/pictures/gff_history.jpg rename to annotation/knowledge/pictures/gff_history.jpg diff --git a/annotation/CheatSheet/pictures/git_file_states.png b/annotation/knowledge/pictures/git_file_states.png similarity index 100% rename from annotation/CheatSheet/pictures/git_file_states.png rename to annotation/knowledge/pictures/git_file_states.png diff --git a/annotation/CheatSheet/pictures/library_types.jpg b/annotation/knowledge/pictures/library_types.jpg similarity index 100% rename from annotation/CheatSheet/pictures/library_types.jpg rename to annotation/knowledge/pictures/library_types.jpg diff --git a/annotation/CheatSheet/pictures/scheduler_rosetta.pdf b/annotation/knowledge/pictures/scheduler_rosetta.pdf similarity index 100% rename from annotation/CheatSheet/pictures/scheduler_rosetta.pdf rename to annotation/knowledge/pictures/scheduler_rosetta.pdf diff --git a/annotation/CheatSheet/pypi.md b/annotation/knowledge/pypi.md similarity index 100% rename from annotation/CheatSheet/pypi.md rename to annotation/knowledge/pypi.md diff --git a/annotation/CheatSheet/rackham.md b/annotation/knowledge/rackham.md similarity index 100% rename from annotation/CheatSheet/rackham.md rename to annotation/knowledge/rackham.md diff --git a/annotation/CheatSheet/rnaseq_library_types.md b/annotation/knowledge/rnaseq_library_types.md similarity index 100% rename from annotation/CheatSheet/rnaseq_library_types.md rename to annotation/knowledge/rnaseq_library_types.md diff --git a/annotation/CheatSheet/screen.md b/annotation/knowledge/screen.md similarity index 100% rename from annotation/CheatSheet/screen.md rename to annotation/knowledge/screen.md diff --git a/annotation/CheatSheet/snapshots/GFF2_Spec_first_draft_03_feb_2000.html b/annotation/knowledge/snapshots/GFF2_Spec_first_draft_03_feb_2000.html similarity index 100% rename from annotation/CheatSheet/snapshots/GFF2_Spec_first_draft_03_feb_2000.html rename to annotation/knowledge/snapshots/GFF2_Spec_first_draft_03_feb_2000.html diff --git a/annotation/CheatSheet/snapshots/ensembl_cigar.md b/annotation/knowledge/snapshots/ensembl_cigar.md similarity index 100% rename from annotation/CheatSheet/snapshots/ensembl_cigar.md rename to annotation/knowledge/snapshots/ensembl_cigar.md diff --git a/annotation/CheatSheet/snapshots/sanger_gff2.md b/annotation/knowledge/snapshots/sanger_gff2.md similarity index 100% rename from annotation/CheatSheet/snapshots/sanger_gff2.md rename to annotation/knowledge/snapshots/sanger_gff2.md diff --git a/annotation/CheatSheet/snapshots/sanger_original_gff.md b/annotation/knowledge/snapshots/sanger_original_gff.md similarity index 100% rename from annotation/CheatSheet/snapshots/sanger_original_gff.md rename to annotation/knowledge/snapshots/sanger_original_gff.md diff --git a/annotation/CheatSheet/sugar.md b/annotation/knowledge/sugar.md similarity index 100% rename from annotation/CheatSheet/sugar.md rename to annotation/knowledge/sugar.md diff --git a/annotation/CheatSheet/taxonomic_classification.md b/annotation/knowledge/taxonomic_classification.md similarity index 100% rename from annotation/CheatSheet/taxonomic_classification.md rename to annotation/knowledge/taxonomic_classification.md diff --git a/annotation/CheatSheet/vulgar.md b/annotation/knowledge/vulgar.md similarity index 100% rename from annotation/CheatSheet/vulgar.md rename to annotation/knowledge/vulgar.md diff --git a/annotation/LsfTemplates/bsub.insilico_normalize.template b/annotation/lsf_templates/bsub.insilico_normalize.template similarity index 100% rename from annotation/LsfTemplates/bsub.insilico_normalize.template rename to annotation/lsf_templates/bsub.insilico_normalize.template diff --git a/annotation/LsfTemplates/bsub.maker.template b/annotation/lsf_templates/bsub.maker.template similarity index 100% rename from annotation/LsfTemplates/bsub.maker.template rename to annotation/lsf_templates/bsub.maker.template diff --git a/annotation/LsfTemplates/bsub.pasa.template b/annotation/lsf_templates/bsub.pasa.template similarity index 100% rename from annotation/LsfTemplates/bsub.pasa.template rename to annotation/lsf_templates/bsub.pasa.template diff --git a/annotation/LsfTemplates/bsub.trinity.template b/annotation/lsf_templates/bsub.trinity.template similarity index 100% rename from annotation/LsfTemplates/bsub.trinity.template rename to annotation/lsf_templates/bsub.trinity.template diff --git a/annotation/LsfTemplates/trinity.lsf_grid b/annotation/lsf_templates/trinity.lsf_grid similarity index 100% rename from annotation/LsfTemplates/trinity.lsf_grid rename to annotation/lsf_templates/trinity.lsf_grid diff --git a/annotation/Tools/Util/R/multiplotAll.R b/annotation/tools/R/multiplotAll.R similarity index 100% rename from annotation/Tools/Util/R/multiplotAll.R rename to annotation/tools/R/multiplotAll.R diff --git a/annotation/Tools/Util/R/sr_AllResu_AllIntervalPlotMean.R b/annotation/tools/R/sr_AllResu_AllIntervalPlotMean.R similarity index 100% rename from annotation/Tools/Util/R/sr_AllResu_AllIntervalPlotMean.R rename to annotation/tools/R/sr_AllResu_AllIntervalPlotMean.R diff --git a/annotation/Tools/Util/R/sr_MadeGraphAndRegByInterval.R b/annotation/tools/R/sr_MadeGraphAndRegByInterval.R similarity index 100% rename from annotation/Tools/Util/R/sr_MadeGraphAndRegByInterval.R rename to annotation/tools/R/sr_MadeGraphAndRegByInterval.R diff --git a/annotation/Tools/Util/R/sr_Mean2col_AllIntervalPlotMean.R b/annotation/tools/R/sr_Mean2col_AllIntervalPlotMean.R similarity index 100% rename from annotation/Tools/Util/R/sr_Mean2col_AllIntervalPlotMean.R rename to annotation/tools/R/sr_Mean2col_AllIntervalPlotMean.R diff --git a/annotation/Tools/Util/R/sr_Mean2col_PlotPoints.R b/annotation/tools/R/sr_Mean2col_PlotPoints.R similarity index 100% rename from annotation/Tools/Util/R/sr_Mean2col_PlotPoints.R rename to annotation/tools/R/sr_Mean2col_PlotPoints.R diff --git a/annotation/Tools/Util/R/sr_Mean2col_PlotPointsAndRegs.R b/annotation/tools/R/sr_Mean2col_PlotPointsAndRegs.R similarity index 100% rename from annotation/Tools/Util/R/sr_Mean2col_PlotPointsAndRegs.R rename to annotation/tools/R/sr_Mean2col_PlotPointsAndRegs.R diff --git a/annotation/Tools/Abinitio/Augustus/augustus_create_hints.rb b/annotation/tools/abinitio/augustus/augustus_create_hints.rb similarity index 100% rename from annotation/Tools/Abinitio/Augustus/augustus_create_hints.rb rename to annotation/tools/abinitio/augustus/augustus_create_hints.rb diff --git a/annotation/Tools/Abinitio/Augustus/junctions2hints.pl b/annotation/tools/abinitio/augustus/junctions2hints.pl similarity index 100% rename from annotation/Tools/Abinitio/Augustus/junctions2hints.pl rename to annotation/tools/abinitio/augustus/junctions2hints.pl diff --git a/annotation/Tools/Abinitio/Augustus/protein2hints.rb b/annotation/tools/abinitio/augustus/protein2hints.rb similarity index 100% rename from annotation/Tools/Abinitio/Augustus/protein2hints.rb rename to annotation/tools/abinitio/augustus/protein2hints.rb diff --git a/annotation/tools/abinitio/snap_train.sh b/annotation/tools/abinitio/snap_train.sh new file mode 100755 index 000000000..3e9c1ec27 --- /dev/null +++ b/annotation/tools/abinitio/snap_train.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +NAME=$1 + +if [ -z "$NAME" ] +then + echo "Must provide a name!" +else + fathom -categorize 1000 genome.ann genome.dna + fathom -export 1000 -plus uni.ann uni.dna + forge export.ann export.dna + hmm-assembler.pl $NAME . > $NAME.hmm +fi diff --git a/annotation/Tools/Util/bed_create_random_feature.pl b/annotation/tools/bed_create_random_feature.pl similarity index 71% rename from annotation/Tools/Util/bed_create_random_feature.pl rename to annotation/tools/bed_create_random_feature.pl index d91a7282b..693ef50b8 100755 --- a/annotation/Tools/Util/bed_create_random_feature.pl +++ b/annotation/tools/bed_create_random_feature.pl @@ -13,8 +13,10 @@ use Pod::Usage; use Carp; use Bio::FeatureIO; +use GAAS::GAAS; # PARAMETERS - OPTION +my $header = get_gaas_header(); my $opt_genome; my $opt_sizeGenome; my $opt_sizeGeneMAx; @@ -37,10 +39,11 @@ } if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); } - + if ( ! (defined($opt_genome)) and ! (defined($opt_sizeGenome)) ){ pod2usage( { -message => "\nAt least 1 parameter is mandatory:\nInput reference gff file (--f)\n\n". @@ -86,7 +89,7 @@ my $start=int(rand($opt_sizeGenome-$opt_sizeGeneMAx)); my $end=$start+$opt_sizeGeneMAx; - my $primary_tag="gene_invent".$i; + my $primary_tag="gene_invent".$i; my $random_strand = int(rand(2)); @@ -100,13 +103,16 @@ =head1 NAME -gffRepeat_analyzer.pl - +gaas_create_random_feature.pl + +=head1 DESCRIPTION + The script aims to create a fake bed file. =head1 SYNOPSIS - script.pl -g name -s 10000 -o - script.pl --help + gaas_create_random_feature.pl -g name -s 10000 -o + gaas_create_random_feature.pl --help =head1 OPTIONS @@ -118,7 +124,7 @@ =head1 OPTIONS =item B<-s>, B<--size> -INTEGER: Genome size. It define the range where features will be created. +INTEGER: Genome size. It define the range where features will be created. =item B<--nbg>, B<--number_gene> @@ -128,7 +134,7 @@ =head1 OPTIONS INTEGER: Size of genes. It define the size oft the gene features to be created. -=item B<-o> or B<--output> +=item B<-o> or B<--output> STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. @@ -138,4 +144,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/ComparativeGenomic/orthomcl_analyzeOG.pl b/annotation/tools/comparative_genomic/orthomcl_analyzeOG.pl similarity index 94% rename from annotation/Tools/ComparativeGenomic/orthomcl_analyzeOG.pl rename to annotation/tools/comparative_genomic/orthomcl_analyzeOG.pl index 4ebdd760b..22c64750f 100755 --- a/annotation/Tools/ComparativeGenomic/orthomcl_analyzeOG.pl +++ b/annotation/tools/comparative_genomic/orthomcl_analyzeOG.pl @@ -11,14 +11,9 @@ use Getopt::Long; use IO::File; use Pod::Usage; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; +my $header = get_gaas_header(); #VARIABLE DECLARATION my $orthoMCL_file; @@ -29,7 +24,7 @@ my $nbProt=0; my $speciesTreeString; my $species_opt; my $focusThisTaxid=""; -my $taxid_opt; my @TAXID_LIST; +my $taxid_opt; my @TAXID_LIST; my $message = "command line: orthomcl_analyzeOG.pl @ARGV\n\n"; @@ -49,11 +44,11 @@ # Print Help and exit if ($opt_help) { - pod2usage( { -message => "$header", - -verbose => 2, - -exitval => 2 } ); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); } - + if ( ! (defined($orthoMCL_file) ) ){ pod2usage( { -message => "$header\nAt least 1 parameter is mandatory:\nInput reference KOG/COG file (--cog).\n\n", @@ -79,7 +74,7 @@ #### Manage others otpion: my $focusThisSpecies; if ( defined($species_opt) ){ - + $focusThisTaxid="$species_opt"; # get taxid if (! looks_like_number($focusThisTaxid)){ # try to retrive taxid from name @@ -139,7 +134,7 @@ @TAXID_LIST=@$taxidListFromOG; } else{ @TAXID_LIST=split(/[:,_\-\s\/]+/,$taxid_opt); } - + $message = "\nList of taxid: \n"; print $message; if ( $opt_output ){print $outReport $message;} @@ -183,7 +178,7 @@ push(@species_names, $spName); } $speciesTreeReady = $db->get_tree(@species_names); -# print $speciesTreeReady, "\n"; +# print $speciesTreeReady, "\n"; ## Clean Tree $speciesTreeReady->contract_linear_paths(); } @@ -195,23 +190,23 @@ print $screenDisplayTree->write_tree($speciesTreeReady), "\n"; if ( $opt_output ){ $outSpTree->write_tree($speciesTreeReady); - + #print species tree within the report file open F, "<$outSpTreeName" or die "Could not open file '$outSpTreeName' $!";; while () { print $outReport $_; - } + } close F; } -# create hash taxid => nodes (Leaves) +# create hash taxid => nodes (Leaves) $message = "\n\nClean Taxid List according to tree provide:\n"; print $message; if ( $opt_output ){print $outReport $message;} if (defined($opt_tree)){#Clean hashAbbTaxid to remove Taxid not existing in species Tree. Only useful when tree is no performed using taxid but comes as external file. my @copy_TAXID_LIST=@TAXID_LIST; foreach my $taxid (@copy_TAXID_LIST){ - my @nodes = $speciesTreeReady->find_node(-id => $taxid); + my @nodes = $speciesTreeReady->find_node(-id => $taxid); if ($#nodes == -1){ $message = "Species $taxid not present in Species Tree. We remove it from analyse.\n"; @@ -226,7 +221,7 @@ ## Create hast taxid/node whole Tree my %hashTaxidNode; foreach my $taxid (@TAXID_LIST){ - my @nodes = $speciesTreeReady->find_node(-id => $taxid); + my @nodes = $speciesTreeReady->find_node(-id => $taxid); $hashTaxidNode{$taxid}=$nodes[0]; } @@ -239,7 +234,7 @@ print $message; if ( $opt_output ){print $outReport $message;} my $hashOGsorted = sortOGforFlatDisplay($hashOGFiltered); -foreach my $key ( sort { $hashOGsorted->{$a} <=> $hashOGsorted->{$b}} keys %$hashOGsorted){ +foreach my $key ( sort { $hashOGsorted->{$a} <=> $hashOGsorted->{$b}} keys %$hashOGsorted){ $message = sizedPrint($key,100)."$hashOGsorted->{$key}\n"; print $message; if ( $opt_output ){print $outReport $message;} @@ -275,16 +270,16 @@ ## Create list of leaves my @original_ListLeavesId; my %original_hashNodeAllDescendant; my %original_hashTaxidAllDescendant; - + #print "\nAmong the $nbAppearance gene appeared at taxid $taxID ($sci_name) we have:\n"; - my @cladeNodes = $node[0]->get_all_Descendents(); ## I know all the descendent of Node of appearance + my @cladeNodes = $node[0]->get_all_Descendents(); ## I know all the descendent of Node of appearance foreach my $NodeFromDesc (@cladeNodes){ #get List leaves my $NodeFromDescTaxid = $NodeFromDesc->id(); $original_hashNodeAllDescendant{$NodeFromDesc}=$NodeFromDescTaxid; $original_hashTaxidAllDescendant{$NodeFromDescTaxid}=$NodeFromDesc; if( $NodeFromDesc->is_Leaf ){ push( @original_ListLeavesId, $NodeFromDescTaxid); - } + } } ## Create the corresponding hash of leaves my %original_hashLeavesId = map { $_ => 1 } @original_ListLeavesId; @@ -297,15 +292,15 @@ #### loop each list of present taxid - my @ListsTaxidPresent = @{$hashAppearance->{$taxID}}; # all list of Leaves Present + my @ListsTaxidPresent = @{$hashAppearance->{$taxID}}; # all list of Leaves Present foreach my $oneList (@ListsTaxidPresent){ my @ListTaxidPresent=@$oneList; # List of taxid that have the gene my %hashTaxidPresent = map { $_ => 1 } @ListTaxidPresent; # Hash of taxid that have the gene # print "ListTaxidPresent $#ListTaxidPresent @ListTaxidPresent 5555555555 sizeList $#original_ListLeavesId @original_ListLeavesId\n"; - + ### Case No loss - if($#ListTaxidPresent == $#original_ListLeavesId){ + if($#ListTaxidPresent == $#original_ListLeavesId){ next; } @@ -320,14 +315,14 @@ } ### Case several Lost or only one ancestral lost - else{ + else{ ## Create list of Taxid Absent my @ListTaxidAbsent; - my %hashNodePresent; my %hashNodeAbsent; + my %hashNodePresent; my %hashNodeAbsent; foreach my $taxid (@original_ListLeavesId){ my @node = $speciesTreeReady->find_node(-id => $taxid); if(! exists($hashTaxidPresent{$taxid})){ - push(@ListTaxidAbsent, $taxid); + push(@ListTaxidAbsent, $taxid); $hashNodeAbsent{$node[0]}=$taxid; } else{$hashNodePresent{$node[0]}=$taxid;} @@ -352,14 +347,14 @@ # if(exists(clNodeTaxid)) # if(exists($hashNodePresent{$anc_clNode})){ - $presentGeneFound="yes"; - $LossId{$OneTaxid}++; + $presentGeneFound="yes"; + $LossId{$OneTaxid}++; # print "oui existe save child $OneTaxid\n"; last; } } if($presentGeneFound eq "no"){ - my $taxidFocused = $original_hashNodeAllDescendant{$parentNode}; + my $taxidFocused = $original_hashNodeAllDescendant{$parentNode}; if(! exists($hashTaxidAdded{$taxidFocused})) { push ( @copy_listTaxidAbsent, $taxidFocused); # Push new ancestrak taxid to test if absent if before $hashTaxidAdded{$taxidFocused}++; @@ -449,7 +444,7 @@ open F, "<$outTreeName" or die "Could not open file '$outSpTreeName' $!";; while () { print $outReport $_; - } + } close F; } @@ -463,7 +458,7 @@ ################################################################## FUNCTIONS ##################################### sub deduceAppearance{ my ($hashOGFiltered, $speciesTreeReady, $hashTaxidNode)=@_; - + my %hashAppearance; my $nbOGstudied=0; foreach my $key (keys %$hashOGFiltered){ @@ -475,7 +470,7 @@ sub deduceAppearance{ else{ my @nodesList; foreach my $taxid (@speciesList){ - push (@nodesList, $hashTaxidNode{$taxid}); + push (@nodesList, $hashTaxidNode{$taxid}); } my $nbNodes=scalar @nodesList; # print "nbNodes $nbNodes\n"; @@ -520,12 +515,12 @@ sub sizedPrint{ sub sortOGforFlatDisplay{ my ($hashOGref)=@_; -my %hashOGidSentence; - foreach my $OGkey (keys %$hashOGref){ +my %hashOGidSentence; + foreach my $OGkey (keys %$hashOGref){ my @ListSpeciesOG=@{$hashOGref->{$OGkey}}; my @ListSpeciesOGSorted = ( sort ({ $a <=> $b } @ListSpeciesOG)); my $IDsentence; my $cpt=0; - foreach my $key (@ListSpeciesOGSorted){ + foreach my $key (@ListSpeciesOGSorted){ if ($cpt == 0){$IDsentence.="$key";} else{$IDsentence.="_$key";} $cpt++; @@ -538,7 +533,7 @@ sub sortOGforFlatDisplay{ sub filterOGfileByTaxid{ my ($hashOGref, $ListTaxidToTest)=@_; my $OGnotKept=0; -my $OGKept; +my $OGKept; my %hashOGrefCleaned; my %taxidAnalyzed; @@ -584,7 +579,7 @@ sub readOGfile{ chomp($line) ; my @splitedLine = split(" ",$line); my %hashOGspecies; my %hashOGspeciesWithThisSpecies; my $OGcontainsThisSpecies="no"; - + my $OGname=shift @splitedLine; $OGname=~s/://g ; # remove ":" foreach my $prot (@splitedLine ){ @@ -658,7 +653,7 @@ sub get_taxon_efficiently{ =head1 NAME analyzeOG.pl - -The script computes some statistics of a COG/KOG file from OrthoMCL output - +The script computes some statistics of a COG/KOG file from OrthoMCL output - Statistics as : - number of OG by number of species - number of OG by number of species that includes a specifc species (if specified by -s option) - gene appearances @@ -681,16 +676,16 @@ =head1 OPTIONS =over 8 -=item B<--cog>, B<--og> or B<--kog> +=item B<--cog>, B<--og> or B<--kog> Orthomcl file containg Ortholog groups (COG) from OrthoMCL. -=item B<--taxid> +=item B<--taxid> -Taxid list. If provided the analyse will use only these species. If a tree is also provided, the taxid will be filtered according to the tree to keep only taxid present in the tree. +Taxid list. If provided the analyse will use only these species. If a tree is also provided, the taxid will be filtered according to the tree to keep only taxid present in the tree. If no taxid is provided, but a tree is, only species from the tree will be analyzed. If no tree and no taxid are provided, only taxid among OG will be use. -=item B<-t> or B<--tree> +=item B<-t> or B<--tree> Tree file in nhx format. If provided the analyse will focuse only on species present in the tree. When no tree is provided, a species tree will be created on the fly using the NCBI taxonomy database online according to the species present among the OG. diff --git a/annotation/Tools/ComparativeGenomic/prepare_matrice_by_window_v2.pl b/annotation/tools/comparative_genomic/prepare_matrice_by_window.pl similarity index 88% rename from annotation/Tools/ComparativeGenomic/prepare_matrice_by_window_v2.pl rename to annotation/tools/comparative_genomic/prepare_matrice_by_window.pl index ed0bbd052..e4c6fa33c 100755 --- a/annotation/Tools/ComparativeGenomic/prepare_matrice_by_window_v2.pl +++ b/annotation/tools/comparative_genomic/prepare_matrice_by_window.pl @@ -1,9 +1,5 @@ #!/usr/bin/env perl -########################## -# Jacques Dainat 11/2015 # -########################### - #libraries use File::Basename; use strict; @@ -19,7 +15,9 @@ use List::MoreUtils qw(uniq); use Storable 'dclone'; use Clone 'clone'; +use GAAS::GAAS; +my $header = get_gaas_header(); # END libraries # PARAMETERS - OPTION my $opt_position; @@ -53,12 +51,14 @@ -exitval => 1 } ); } +# Print Help and exit if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 0 } ); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); } -if ((!$opt_position or !$opt_aliDir or !$opt_tsv)){ +if ((!$opt_position or !$opt_aliDir or !$opt_tsv)){ pod2usage( { -message => "\nIf you want to merge sequence by window, at least 3 parameter is mandatory: position file, tsv file, and directory containing alignments\n". "Many optional parameters are available. Look at the help documentation to know more.\n", @@ -89,7 +89,7 @@ my %consensListOk; if($opt_consList){ print "Moreover You decided to make consensus between species. Here is the detail:\n"; - # Mange consensus list given + # Mange consensus list given my @consensListPair; @consensListPair= split(/,/, $opt_consList); @@ -104,7 +104,7 @@ push(@{$consensListOk{$consensList[1]}}, $consensList[0]); } } - foreach my $key (keys %consensListOk){ + foreach my $key (keys %consensListOk){ print "You dicided to make a consensus of (inter)species <@{$consensListOk{$key}}> and call this new sequence <$key>\n"; } } @@ -197,9 +197,9 @@ if($count >=4){ # SequenceNamesList if ($el =~ "H_vulgare"){ # H_vulgare sequence => shoud contain EPlHVUG or MLOC that will be used to retrieve position information - + if($el =~ "EPlHVUG"){ - + $el =~ /.*_(EPlHVUG[^|]*)\|.*/; $hordeumName = $1; } @@ -208,7 +208,7 @@ $hordeumName = $1; last; } - + } } $count++; @@ -255,13 +255,13 @@ # need to know the highest value my $highvalue=get_highest_value(\%hash_chr2name, $chr); - + # Go through all the chromosome window by window my $stop=0; - while ($stop != 2){ + while ($stop != 2){ if( (1000000*$currentLimit) > $highvalue){$stop++}; - + my $nbPrintedVal=0; $currentLimit = $currentLimit + $opt_windowsSize; $currentCenter=$currentLimit-($opt_windowsSize/2); @@ -272,12 +272,12 @@ %$tmpAli = (); # empty the hash # we go through the chromosome positions by increasing order - foreach my $position (sort {$a<=>$b} keys $hash_chr2name{$chr}){ + foreach my $position (sort {$a<=>$b} keys %{$hash_chr2name{$chr} } ) { #print "study of $hash_chr2name{$chr}{$position} $position\n"; if($position > (1000000*($currentLimit-$opt_windowsSize)) and $position < (1000000*$currentLimit) ){ #kb to nucleotide : * 1000000 - if(exists ($hash_HordeumName2Aliname{$hash_chr2name{$chr}{$position}})){ - + if(exists ($hash_HordeumName2Aliname{$hash_chr2name{$chr}{$position} } ) ) { + if (fill_for_ali($tmpAli, $hash_HordeumName2Aliname{$hash_chr2name{$chr}{$position}}, $nbPrintedVal)){ # save sequence of the alignement to create supermatrice $nbPrintedVal++; } @@ -285,12 +285,12 @@ else{ #print "Hordeum Name: $hash_chr2name{$chr}{$position} doesn't exists among the Hordeum names of alignments we are working with.\n"; } - + } # We are over the chromosome size, consequently we stop elsif($position > (1000000*($currentLimit-$opt_windowsSize))){last;} } - + #report print $report_stream $currentCenter."Mb\t".$nbPrintedVal."\n"; @@ -315,14 +315,14 @@ $key =~ /(.*_[^_]*)_/; my $nameRough = $1; $hashNum{$nameRough}++; - push(@{$hashNames{$nameRough}}, $key); + push(@{$hashNames{$nameRough}}, $key); } # foreach my $nameRough (keys %hashNum ){ - if($hashNum{$nameRough} > 1){ + if($hashNum{$nameRough} > 1){ # create a small alignementof only same species - my $aln = new Bio::SimpleAlign(); + my $aln = new Bio::SimpleAlign(); foreach my $nameComplete (@{$hashNames{$nameRough}}){ my $seq = new Bio::LocatableSeq(-seq => $tmpAli->{$nameComplete} , -id => "$nameComplete"); $aln->add_seq($seq); @@ -338,7 +338,7 @@ if($opt_consList){ foreach my $ConsensName (keys %consensListOk ){ - + my $SuperAln = undef; foreach my $nameCons (@{$consensListOk{$ConsensName}} ){ #print "Does it start by ? $nameCons\n"; @@ -377,7 +377,7 @@ } print "We put $key in $ConsensName \n"; delete $listOfAli{$key}; #REMOVE THAT KEY-VALUE pair - } + } } } } @@ -388,7 +388,7 @@ } #create a consensus from each ali if asked - foreach my $consensName (keys %listOfAli){ + foreach my $consensName (keys %listOfAli){ my $aln=$listOfAli{$consensName}; my $consensus = $aln->consensus_string($opt_consThreshold); $consensus=~ s/\?/-/g; # quand il y a que des gap il remplace par ? .Donc ici on remplace ? par - @@ -424,7 +424,7 @@ print("Parsing Finished\n\n"); - ######################### + ######################### ######### END ########### ######################### ####################################################################################################################### @@ -437,12 +437,12 @@ ######## ###### #### - ## + ## sub fill_for_ali{ my ($hash,$name,$nbPrintedVal)=@_; - ### get file name !! + ### get file name !! my @matches = grep { /$name/ } @listFile; if (@matches >= 2){ print "several file start with the name $name whithin the directory $opt_aliDir. We don't know which one take. We stop. \n"; @@ -454,7 +454,7 @@ sub fill_for_ali{ } else{ $name=$matches[0]; - + if(-f "$opt_aliDir/$name"){ #print "$opt_aliDir/$name\n"; @@ -477,10 +477,10 @@ sub fill_for_ali{ my @ids= split /_/,$id_original ; my $newID=$ids[0]."_".$ids[1]."_".$ids[2]; #print "newID $newID\n"; - + if(! exists($hash->{$newID})){ # If Id is a new one - if(! $nbPrintedVal == 0){ # if not the first alignment read we have to add empty seq in front - + if(! $nbPrintedVal == 0){ # if not the first alignment read we have to add empty seq in front + # If it's new but some other were already existing we add empty seq in front my $emptySeq = ""; $emptySeq =~ s/^(.*)/'-' x $sizeAlAli . $1/mge; # create a string of gap for the sequence to add in front of the ali @@ -523,7 +523,7 @@ sub get_highest_value{ my ($hash,$chr) = @_; my $high=0; - foreach my $position (keys $hash->{$chr}){ + foreach my $position (keys %{$hash->{$chr} } ) { if($position > $high){ $high=$position ; } @@ -541,19 +541,22 @@ sub is_folder_empty { =head1 NAME -script.pl - +gaas_prepare_matrice_by_window.pl + +=head1 DESCRIPTION + The script take a position file as input and a directory containing the alignments. It will concatenates files to create mini-superMatrices corresponding to the size of window size choosen. -The aim is to check if ther is a clear phylogeny pattern by portion of the chromosome. If hte pattern is different in different location of hte chromosome it could be a hint of recombination. +The aim is to check if ther is a clear phylogeny pattern by portion of the chromosome. If hte pattern is different in different location of the chromosome it could be a hint of recombination. =head1 SYNOPSIS - ./script.pl -p positionFile --ad directoryWithAlignments [ --output outfile ] - ./script.pl --help + gaas_prepare_matrice_by_window.pl -p positionFile --ad directoryWithAlignments [ --output outfile ] + gaas_prepare_matrice_by_window.pl --help - ./prepare_matrice_by_window.pl -p Hordeum_position.txt -tsv clusters_1L_7speIN_1speOUT_noDup_withHordeum6235.tsv -ad ALIGNMENTS -c -o out_consens + gaas_prepare_matrice_by_window.pl -p Hordeum_position.txt -tsv clusters_1L_7speIN_1speOUT_noDup_withHordeum6235.tsv -ad ALIGNMENTS -c -o out_consens example with -cl option - ./prepare_matrice_by_window.pl -p Hordeum_position.txt -tsv clusters_1L_7speIN_1speOUT_noDup_withHordeum6235.tsv -ad aliTest/ -c -cl Ae_longissima/D,Ae_sharonensis/D,Ae_bicornis/D,Ae_searsii/D,Ae_tauschii/D,Ae_comosa/D,Ae_uniaristata/D,Ae_umbellulata/D,Ae_caudata/D,T_urartu/A,T_boeoticum/A,Ae_speltoides/B,Ae_mutica/B + gaas_prepare_matrice_by_window.pl -p Hordeum_position.txt -tsv clusters_1L_7speIN_1speOUT_noDup_withHordeum6235.tsv -ad aliTest/ -c -cl Ae_longissima/D,Ae_sharonensis/D,Ae_bicornis/D,Ae_searsii/D,Ae_tauschii/D,Ae_comosa/D,Ae_uniaristata/D,Ae_umbellulata/D,Ae_caudata/D,T_urartu/A,T_boeoticum/A,Ae_speltoides/B,Ae_mutica/B =head1 OPTIONS @@ -566,7 +569,7 @@ =head1 OPTIONS =item B<--tsv> This option define the tsv file that contain information needed to link the position to the alignments. Format like this: -Nom marqueur (sequence untiliser pour construire le cluster) \t nombre sp \t nombre individu \t nombre sequences \t Toutes les entetes des sequences de l'alignement seaparer par une tabulation +Nom marqueur (sequence untiliser pour construire le cluster) \t nombre sp \t nombre individu \t nombre sequences \t Toutes les entetes des sequences de l'alignement seaparer par une tabulation =item B<--ad> @@ -594,7 +597,7 @@ =head1 OPTIONS Optional treshold ranging from 0 to 100. The consensus residue has to appear at least threshold % of the sequences at a given location, otherwise a '?' character will be placed at that location. Default value=0; -=item B<-o> or B<--output> +=item B<-o> or B<--output> Output name of the directory that will contain results @@ -604,4 +607,30 @@ =head1 OPTIONS =back -=cut \ No newline at end of file +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/ComparativeGenomic/synplot/synplot.R b/annotation/tools/comparative_genomic/synplot/synplot.R similarity index 100% rename from annotation/Tools/ComparativeGenomic/synplot/synplot.R rename to annotation/tools/comparative_genomic/synplot/synplot.R diff --git a/annotation/Tools/ComparativeGenomic/synplot/synplot.pl b/annotation/tools/comparative_genomic/synplot/synplot.pl similarity index 85% rename from annotation/Tools/ComparativeGenomic/synplot/synplot.pl rename to annotation/tools/comparative_genomic/synplot/synplot.pl index 603c2661d..89c41ec66 100755 --- a/annotation/Tools/ComparativeGenomic/synplot/synplot.pl +++ b/annotation/tools/comparative_genomic/synplot/synplot.pl @@ -1,76 +1,11 @@ -#!/usr/bin/perl +#!/usr/bin/env perl -=head1 NAME -synplot.pl - Make synteny plots for small genomes -=head1 SYNOPSIS - perl synplot.pl -f ,, \ - -g ,, \ - -o - perl synplot.pl --help -=head1 DESCRIPTION -Make synteny plots for a set of contigs or small genomes. Given a set of Fasta -files, each representing a single genome, and GFF3 feature tables with CDS -features for those Fasta files, perform Blastp between each adjacent pair of -genomes, and make synteny plots. -Requires the accompanying R script synplot.R in the same folder as the perl -script, and command Rscript in path. -=head1 ARGUMENTS -=over 8 -=item --fasta|-f ,, -List of file names separated by commas; nucleotide fasta files containing -contigs to be compared, in order that they will appear on synteny plot. -=item --gff|-g ,, -List of file names separated by commas; GFF files containing predicted CDS to -be compared by Blastp against each other. Must be in same order as the list of -Fasta files given to -f parameter (above). -=item --out|-o -Prefix for output file names. (Default: test) -=item --bidir -Flag: Take bidirectional reciprocal best blast hits only. (Default: Off) -=item --plotonly -Flag: Draw plot only. Requires precomputed intermediate files with same file -name prefix as supplied to -o parameter above. -=item --gencode -Genetic code for a.a. sequence translation. (Default: 4) -=item --help -This help message. -=item --cds|-c -How to depict CDS direction. Allowed values: color, arrow. No quotation marks. -(Default: color) -=item --color_id -Color corresponding to max value in color scale, used to show percentage ID for -two CDSs connected by a stripe in the synteny plot. -=item --color_cds_f -Color for forward-directed CDSs (if "--cds color" specified), or color of arrow -(if "--cds arrow" specified). -=item --color_cds_r -Color for reverse-directed CDSs (if "--cds color" specified, otherwise ignored) -=back -=head1 OUTPUT -All output files have output prefix as given to --out. -Synteny plot is in PDF format at .synteny.pdf. -Intermediate files: Blastp output in tabular format (outfmt 6), and tables -containing coordinates of features and connecting polygons for plotting (suffix -.tab). -=head1 COPYRIGHT AND LICENSE -Copyright 2016, Brandon Seah (kbseah@mpi-bremen.de) -LICENSE -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program. If not, see . -=cut - -use warnings; use strict; +use warnings; use Getopt::Long; +use File::Basename; use Bio::SeqIO; +use Cwd; use Bio::DB::Fasta; use FindBin qw($Bin $Script); use Pod::Usage; @@ -81,7 +16,7 @@ =head1 COPYRIGHT AND LICENSE my $outfix="test"; # Output prefix my $cdstype="color"; # How to indicate CDS direction my $colmax="red"; # Color for maximum %ID in color scale -my $colcds1="darkblue"; # Color for CDS +my $colcds1="darkblue"; # Color for CDS my $colcds2="darkgreen"; my @valid_cds = qw(color arrow); # List of valid CDS drawing types my %contig_length_hash; # Hash of hash of contig lengths @@ -90,25 +25,44 @@ =head1 COPYRIGHT AND LICENSE my %x0_hash; my $plotonly; # Flag - do not run Blastp, only call synplot.R to draw plot my $bidir; # Flag - Bidrectional best hits only +my $help; +my $man; +my $verbose = undef; + +if( !GetOptions( 'fasta|f=s' => \$input_fasta, # Input should be list of Fasta files, comma-separated + 'gff|g=s' => \$input_gff, # Input should be list of GFF files, comma-separated + 'plotonly|p'=>\$plotonly, # + 'out|o=s' =>\$outfix, # Output prefix + 'gencode=i' =>\$gencode, # Genetic code + 'cds|c=s' =>\$cdstype, # How to indicate CDS direction + 'color_id=s' =>\$colmax, # Color for maximum %id in synteny plot + 'color_cds_f=s' =>\$colcds1, + 'color_cds_r=s' =>\$colcds2, + 'bidir' => \$bidir, # Bidirectional best hits only + 'verbose|v!' => \$verbose, + 'help|h!' => \$help, + 'man|m!' => \$man ) ) +{ + pod2usage( { -message => "Failed to parse command line.", + -verbose => 1, + -exitval => 2 } ); +} +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0 } ); +} -if (! @ARGV) { - pod2usage (-message=>"No input parameters supplied", -exitstatus=>2); +# Print Help and exit +if ($man) { + pod2usage( { -verbose => 2, + -exitval => 0 } ); } -GetOptions ( - 'fasta|f=s' => \$input_fasta, # Input should be list of Fasta files, comma-separated - 'gff|g=s' => \$input_gff, # Input should be list of GFF files, comma-separated - 'plotonly|p'=>\$plotonly, # - 'out|o=s' =>\$outfix, # Output prefix - 'gencode=i' =>\$gencode, # Genetic code - 'cds|c=s' =>\$cdstype, # How to indicate CDS direction - 'color_id=s' =>\$colmax, # Color for maximum %id in synteny plot - 'color_cds_f=s' =>\$colcds1, - 'color_cds_r=s' =>\$colcds2, - 'bidir' => \$bidir, # Bidirectional best hits only - 'help|h' => sub { pod2usage(-exitstatus=>2, verbose=>2); }, - 'man|m' => sub { pod2usage(-exitstatus=>0, verbose=>2); }, -); +my @tools = ("blastp"); +foreach my $exe (@tools) { + check_bin($exe) == 1 or die "Missing executable $exe in PATH"; +} ## MAIN ####################################################################### @@ -135,7 +89,7 @@ =head1 COPYRIGHT AND LICENSE open(OUTPUT3, ">", "$output_tab_3") or die ("$!\n"); print OUTPUT3 join ("\t", qw (genome1 gene1 genome2 gene2 g1x0 g1x1 g2x1 g2x0 g1x0 g1y0 g1y1 g2y1 g2y0 g1y0 pid))."\n"; close(OUTPUT3); - + # Run functions parse_fasta_gff(); if ($bidir) { @@ -145,9 +99,10 @@ =head1 COPYRIGHT AND LICENSE } } +print "Call Rscript\n" if ($verbose); # Call R script to generate plot system ("Rscript $Bin/synplot.R --args $output_tab_0 $output_tab_1 $output_tab_2 $output_tab_3 $outfix.synteny.pdf $cdstype $colmax"); - +print "Result written to $outfix.synteny.pdf\nBye Bye\n"; ## SUBROUTINES ################################################################ sub parse_fasta_gff { @@ -155,17 +110,16 @@ sub parse_fasta_gff { ## Record contig lengths for each Fasta file my %contig_zero_position; my %running_total; - + for my $i (0 .. scalar(@input_fasta_list)-1) { my $the_fasta = $input_fasta_list[$i]; - $the_fasta =~ /(.*)\.fasta/; - my $label = $1; + my ($label,$path,$ext) = fileparse($the_fasta,qr/\.[^.]*/); open(OUTPUT0, ">>", "$output_tab_0") or die ("$!\n");; print OUTPUT0 $the_fasta."\t".$i."\t".$label."\n"; close(OUTPUT0); $running_total{$the_fasta} = 0; ## Predict ORFs with Prodigal if not already supplied in GFF file - # system ("prodigal -m -c -g 4 -a $the_fasta.prodigal.pep -q -p single -f gff -o $the_fasta.prodigal.gff"); + # system ("prodigal -m -c -g 4 -a $the_fasta.prodigal.pep -q -p single -f gff -o $the_fasta.prodigal.gff"); my $the_fasta_object = Bio::SeqIO->new(-file => $the_fasta); my $seq_object; open(OUTPUT1, ">>", "$output_tab_1") or die ("$!\n"); @@ -182,16 +136,16 @@ sub parse_fasta_gff { $contig_zero_position{$the_fasta}{$seq_object->display_id}, $running_total{$the_fasta}, $i - )."\n"; + )."\n"; } - #print $the_fasta."\t".$total_length_hash{$the_fasta}."\n"; + print $the_fasta."\t".$total_length_hash{$the_fasta}."\n" if ($verbose); close (OUTPUT1); ## Index Fasta files and parse corresponding GFF files # Load fasta sequences to memory my $db = Bio::DB::Fasta->new($the_fasta); #my %CDS; # Output file for translated CDS sequences - my $outfile_pep = Bio::SeqIO->new(-format=>'fasta', -file=> ">$the_fasta.pep"); + my $outfile_pep = Bio::SeqIO->new(-format=>'fasta', -file=> ">$the_fasta.pep"); # Open GFF file open(GFF, "<", $input_gff_list[$i]) or die ("$!\n"); open (OUTPUT2, ">>", "$output_tab_2") or die ("$!\n"); @@ -204,9 +158,9 @@ sub parse_fasta_gff { my @array = split("\t",$_); # Split notes field into elements my @attrs = split(";",$array[8]); - $attrs[0] =~ s/ID=//; + $attrs[0] =~ s/ID=//; # Gene name parsed from ID field - my $gene_name = $attrs[0]; + my $gene_name = $attrs[0]; my $start; my $stop; # What type of feature @@ -216,7 +170,7 @@ sub parse_fasta_gff { my $gene_seq = $db->seq($array[0], $array[3], $array[4] - ); + ); #print $db->seq($array[0],$array[3],$array[4])."\n"; # Output sequence object my $output_gene = Bio::Seq->new( @@ -233,17 +187,17 @@ sub parse_fasta_gff { $output_gene=$output_gene->revcom(); $start = $array[4]; $stop=$array[3]; - } + } if ($type eq "CDS") { # If CDS, write translation to file # Translation table 4 (protozoan mitochondrial) - my $output_pep = $output_gene->translate(-codontable_id=>$gencode); + my $output_pep = $output_gene->translate(-codontable_id=>$gencode); $outfile_pep->write_seq($output_pep); } my $zerostart = $start + $contig_zero_position{$the_fasta}{$current_contig}; my $zerostop = $stop + $contig_zero_position{$the_fasta}{$current_contig}; # Define color for CDS depending on transcription direction - my $cds_color; + my $cds_color; if ($zerostart < $zerostop) { $cds_color=$colcds1; } @@ -268,16 +222,24 @@ sub parse_fasta_gff { } sub run_blast_pairs { + + print "run run_blast_pairs\n" if ($verbose); for my $i (0 .. scalar(@input_fasta_list)-2) { my $j = $i + 1; my $blastfile1 = "$input_fasta_list[$i].pep"; my $blastfile2 = "$input_fasta_list[$i+1].pep"; - system ("blastp -subject $blastfile1 -query $blastfile2 -evalue 1e-3 -outfmt 6 -max_target_seqs 1 -out $outfix.blastout.$i.out6"); + if (-f "$outfix.blastout.$i.out6"){ + print "run_blast_pairs output already exists for $outfix.blastout.$i.out6, skipping this step\n"; + } + else{ + system ("blastp -subject $blastfile1 -query $blastfile2 -evalue 1e-3 -outfmt 6 -max_target_seqs 1 -out $outfix.blastout.$i.out6"); + } open(OUTPUT3, ">>", "$output_tab_3") or die ("$!\n"); open(HITS, "<", "$outfix.blastout.$i.out6") or die ("$!\n"); while () { # Convert Blast hit results (pairs of genes with best hits to each other) to polygons for drawing synteny diagrams chomp; + print "$_\n"; my @splitline = split("\t",$_); my ($query,$subject,$pid) = ($splitline[0],$splitline[1],$splitline[2]); print OUTPUT3 $input_fasta_list[$i]."\t".$query."\t".$input_fasta_list[$i+1]."\t".$subject."\t"; @@ -344,3 +306,113 @@ sub run_blast_pairs_bidir { close(OUTPUT3); } } + +sub check_bin +{ + length(`which @_`) > 0 ? return 1 : return 0; +} + + +__END__ + +=head1 NAME + +synplot.pl - Make synteny plots for small genomes + +=head1 SYNOPSIS + + perl synplot.pl -f ,, \ + -g ,, \ + -o + perl synplot.pl --help + +=head1 DESCRIPTION + +Make synteny plots for a set of contigs or small genomes. Given a set of Fasta +files, each representing a single genome, and GFF3 feature tables with CDS +features for those Fasta files, perform Blastp between each adjacent pair of +genomes, and make synteny plots. +Requires the accompanying R script synplot.R in the same folder as the perl +script, and command Rscript in path. + +=head1 OPTIONS + +=over 8 + +=item --fasta|-f ,, + +List of file names separated by commas; nucleotide fasta files containing +contigs to be compared, in order that they will appear on synteny plot. + +=item --gff|-g ,, + +List of file names separated by commas; GFF files containing predicted CDS to +be compared by Blastp against each other. Must be in same order as the list of +Fasta files given to -f parameter (above). + +=item --out|-o + +Prefix for output file names. (Default: test) + +=item --bidir + +Flag: Take bidirectional reciprocal best blast hits only. (Default: Off) + +=item --plotonly + +Flag: Draw plot only. Requires precomputed intermediate files with same file +name prefix as supplied to -o parameter above. + +=item --gencode + +Genetic code for a.a. sequence translation. (Default: 4) + +=item --help + +This help message. + +=item --cds|-c + +How to depict CDS direction. Allowed values: color, arrow. No quotation marks. +(Default: color) + +=item --color_id + +Color corresponding to max value in color scale, used to show percentage ID for +two CDSs connected by a stripe in the synteny plot. + +=item --color_cds_f + +Color for forward-directed CDSs (if "--cds color" specified), or color of arrow +(if "--cds arrow" specified). + +=item --color_cds_r + +Color for reverse-directed CDSs (if "--cds color" specified, otherwise ignored) + +=back + +=head1 OUTPUT + +All output files have output prefix as given to --out. +Synteny plot is in PDF format at .synteny.pdf. +Intermediate files: Blastp output in tabular format (outfmt 6), and tables +containing coordinates of features and connecting polygons for plotting (suffix +.tab). + +=head1 COPYRIGHT AND LICENSE + +Copyright 2016, Brandon Seah (kbseah@mpi-bremen.de) +LICENSE +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=cut diff --git a/annotation/Tools/Util/AGP2chromosome.pl b/annotation/tools/converter/AGP2chromosome.pl similarity index 83% rename from annotation/Tools/Util/AGP2chromosome.pl rename to annotation/tools/converter/AGP2chromosome.pl index 5b39dfb13..1b697b735 100755 --- a/annotation/Tools/Util/AGP2chromosome.pl +++ b/annotation/tools/converter/AGP2chromosome.pl @@ -2,12 +2,13 @@ use strict; use Pod::Usage; -use Data::Dumper; use Getopt::Long; use Bio::SeqIO ; use Bio::DB::Fasta; use Bio::Tools::GFF; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); my $opt_agpfile; @@ -15,14 +16,6 @@ my $opt_output; my $opt_help; -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - # OPTION MANAGMENT my @copyARGV=@ARGV; if ( !GetOptions( 'a|agp=s' => \$opt_agpfile, @@ -37,11 +30,11 @@ # Print Help and exit if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header \n" } ); } - + if ( (! (defined($opt_agpfile)) ) or (! (defined($opt_fastafile)) ) ){ pod2usage( { -message => "\nAt least 2 parametes are mandatory:\nInput agp file (-g); Input fasta file (-f)\n\n". @@ -73,9 +66,9 @@ if (open(my $fh, '<:encoding(UTF-8)', $opt_agpfile)) { while (my $row = <$fh>) { chomp $row; - my ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, + my ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, $component_beg_or_gap_type, $component_end_or_linkage, $orientation_or_linkage_evidence ) = split(/\t/, $row); - push (@{$hash_agp{$object}{$part_number}}, ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, + push (@{$hash_agp{$object}{$part_number}}, ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, $component_beg_or_gap_type, $component_end_or_linkage, $orientation_or_linkage_evidence)); } } else { @@ -146,7 +139,7 @@ ######## ###### #### - ## + ## #check if reference exists in hash. Deep infinite : hash{a} or hash{a}{b} or hash{a}{b}{c}, etc. # usage example: exists_keys($hash_omniscient,('level3','cds',$level2_ID) @@ -168,7 +161,7 @@ sub exists_keys { my $sequence=""; my $seq_id_correct = undef; if( exists_keys($allIDs,(lc($seq_id)) ) ){ - + $seq_id_correct = $allIDs{lc($seq_id)}; $sequence = $db->subseq($seq_id_correct, $start, $end); @@ -179,23 +172,27 @@ sub exists_keys { if(length($sequence) != ($end-$start+1)){ my $wholeSeq = $db->subseq($seq_id_correct); $wholeSeq = length($wholeSeq); - warn "Problem ! The size of the sequence extracted ".length($sequence)." is different than the specified span: ".($end-$start+1).".\nThat often occurs when the fasta file does not correspond to the annotation file. Or the index file comes from another fasta file which had the same name and haven't been removed.\n". + warn "Problem ! The size of the sequence extracted ".length($sequence)." is different than the specified span: ".($end-$start+1).".\nThat often occurs when the fasta file does not correspond to the annotation file. Or the index file comes from another fasta file which had the same name and haven't been removed.\n". "As last possibility your gff contains location errors (Already encountered for a Maker annotation)\nSupplement information: seq_id=$seq_id ; seq_id_correct=$seq_id_correct ; start=$start ; end=$end ; $seq_id sequence length: $wholeSeq )\n"; } } else{ warn "Problem ! ID $seq_id not found !\n"; - } + } return $sequence; -} +} __END__ =head1 NAME -AGP2chromosome.pl - -The script aims to combine contigs from the fasta file in chromosome as described into the AGP file. +AGP2chromosome.pl + + +=head1 DESCRIPTION + +The script aims to combine contigs from the fasta file in chromosome as described into the AGP file. AGP version 2 is expected. See https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/ for specification of this format. If you are unsure about the AGP file you are using, you could check its sanity using the agp validator provided by the NCBI at this address: https://www.ncbi.nlm.nih.gov/projects/genome/assembly/agp/agp_validate.cgi The result is written to the specified output file, or to STDOUT. @@ -203,8 +200,8 @@ =head1 NAME =head1 SYNOPSIS - ./AGP2chromosome.pl -g=infile.gff -f=infile.fasta [ -o outfile ] - ./AGP2chromosome.pl --help + gaas_AGP2chromosome.pl -g=infile.gff -f=infile.fasta [ -o outfile ] + gaas_AGP2chromosome.pl --help =head1 OPTIONS @@ -214,7 +211,7 @@ =head1 OPTIONS Input AGP file -=item B<--fasta>, B<--fa> or B<-f> +=item B<--fasta>, B<--fa> or B<-f> Input fasta file. @@ -229,4 +226,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Converter/bam2bigwig.sh b/annotation/tools/converter/bam2bigwig.sh similarity index 100% rename from annotation/Tools/Converter/bam2bigwig.sh rename to annotation/tools/converter/bam2bigwig.sh diff --git a/annotation/Tools/Converter/bam_to_wiggle.py b/annotation/tools/converter/bam_to_wiggle.py similarity index 100% rename from annotation/Tools/Converter/bam_to_wiggle.py rename to annotation/tools/converter/bam_to_wiggle.py diff --git a/annotation/Tools/Converter/bed2wiggle.rb b/annotation/tools/converter/bed2wiggle.rb similarity index 100% rename from annotation/Tools/Converter/bed2wiggle.rb rename to annotation/tools/converter/bed2wiggle.rb diff --git a/annotation/Tools/Converter/cufflinks2hints.rb b/annotation/tools/converter/cufflinks2hints.rb similarity index 100% rename from annotation/Tools/Converter/cufflinks2hints.rb rename to annotation/tools/converter/cufflinks2hints.rb diff --git a/annotation/Tools/Converter/embl2gb.pl b/annotation/tools/converter/embl2gb.pl similarity index 52% rename from annotation/Tools/Converter/embl2gb.pl rename to annotation/tools/converter/embl2gb.pl index 4302860a4..ad7318e06 100755 --- a/annotation/Tools/Converter/embl2gb.pl +++ b/annotation/tools/converter/embl2gb.pl @@ -1,21 +1,13 @@ -#!/usr/local/bin/perl -w - -## NBIS 2015 -## jacques.dainat@nbis.se +#!/usr/bin/env perl use strict; +use warnings; use Pod::Usage; use Getopt::Long; use Bio::SeqIO; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $outfile = undef; my $embl = undef; my $help; @@ -25,15 +17,15 @@ "embl=s" => \$embl, "outfile|output|o|out|gb=s" => \$outfile)) { - pod2usage( { -message => "Failed to parse command line\n$header", + pod2usage( { -message => "$header\nFailed to parse command line", -verbose => 1, -exitval => 1 } ); } # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } @@ -54,7 +46,7 @@ $gb_out = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'genbank'); } -### Read gb input file. +### Read gb input file. my $embl_in = Bio::SeqIO->new(-file => $embl, -format => 'embl'); @@ -68,12 +60,15 @@ =head1 NAME -gb2embl.pl - +gaas_embl2gb.pl + +=head1 DESCRIPTION + The script take a EMBL file as input, and will translate it in Genbank format. =head1 SYNOPSIS - ./embl2gb.pl --embl=infile.gff [ -o outfile ] + gaas_embl2gb.pl --embl=infile.gff [ -o outfile ] =head1 OPTIONS @@ -81,7 +76,7 @@ =head1 OPTIONS =item B<-embl> -Input EMBL file that will be read +Input EMBL file that will be read =item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--gb> @@ -94,4 +89,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Converter/eugene2maker.rb b/annotation/tools/converter/eugene2maker.rb similarity index 100% rename from annotation/Tools/Converter/eugene2maker.rb rename to annotation/tools/converter/eugene2maker.rb diff --git a/annotation/Tools/Converter/gb2embl.pl b/annotation/tools/converter/gb2embl.pl similarity index 56% rename from annotation/Tools/Converter/gb2embl.pl rename to annotation/tools/converter/gb2embl.pl index 834c8fb2d..52b4d3d6d 100755 --- a/annotation/Tools/Converter/gb2embl.pl +++ b/annotation/tools/converter/gb2embl.pl @@ -4,19 +4,13 @@ ## jacques.dainat@nbis.se use strict; -use Data::Dumper; +use warnings; use Pod::Usage; use Getopt::Long; use Bio::SeqIO; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $outfile = undef; my $gb = undef; my $help; @@ -33,8 +27,8 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } @@ -55,7 +49,7 @@ $embl_out = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'embl'); } -### Read gb input file. +### Read gb input file. my $gb_in = Bio::SeqIO->new(-file => $gb, -format => 'genbank'); @@ -71,20 +65,23 @@ =head1 NAME -gb2embl.pl - +gaas_gb2embl.pl + +=head1 DESCRIPTION + The script take a Genebank file as input, and will translate it in EMBL format. =head1 SYNOPSIS - ./gb2embl.pl --gb=infile.gb [ -o outfile ] + gaas_gb2embl.pl --gb infile.gb [ -o outfile ] =head1 OPTIONS =over 8 -=item B<-gb> +=item B<--gb> -Input genebank file that will be read +Input genebank file that will be read =item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--embl> @@ -97,4 +94,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Converter/rfam2apollo.rb b/annotation/tools/converter/rfam2apollo.rb similarity index 100% rename from annotation/Tools/Converter/rfam2apollo.rb rename to annotation/tools/converter/rfam2apollo.rb diff --git a/annotation/tools/converter/scaffold2AGP.pl b/annotation/tools/converter/scaffold2AGP.pl new file mode 100755 index 000000000..cf7969617 --- /dev/null +++ b/annotation/tools/converter/scaffold2AGP.pl @@ -0,0 +1,160 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use Pod::Usage; +use Bio::SeqIO ; +use Getopt::Long; +use File::Basename; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $infile = undef; +my $outfile = undef; +my $contigcount=0; +my $help = undef; + +if ( !GetOptions( 'i=s' => \$infile, + 'o|out|output=s' => \$outfile, + 'h|help!' => \$help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined( $infile) ) { + pod2usage( { + -message => "$header\nMust specify at least 1 parameters:\nInput fasta file (-i)\n", + -verbose => 0, + -exitval => 1 } ); +} + + +#DEAL with output create default output file based on the input file name +if (! $outfile){ + my ($file_in,$path,$ext) = fileparse($infile,qr/\.[^.]*/); + $outfile = $file_in.".agp"; +} +open (AGP_FILE, ">$outfile"); + +# =================== + +my $inseq = Bio::SeqIO->new('-file' => "<$infile", + '-format' => 'Fasta' ); + +my $outseq = Bio::SeqIO->new( + -file => ">contigs.fasta", + -format => 'fasta', + ); + +#Read scaffolded FASTA-file +while (my $seq_obj = $inseq->next_seq ) { + my $scaffold = $seq_obj->id; + my $sequence = $seq_obj->seq; + my $start=1; + my $oldsum; + my $newsum; + my $count=0; + my $rounded; + + next if ($scaffold =~ /^contig/i); + foreach my $substring_sequence (split /(N{20,})/i, $sequence){ + my $type; + my $substring_length = length($substring_sequence); + $count++; + $oldsum=$start; + $newsum=$oldsum+$substring_length-1; + + if ($substring_sequence !~ m/^N+$/i){ + $type="W"; + $contigcount++; + $rounded=sprintf("%05s", $contigcount); + my $contig_obj = Bio::Seq->new(-seq => "$substring_sequence", + -display_id => "contig$rounded", + -alphabet => "dna" ); + $outseq->write_seq($contig_obj); + } + elsif ($substring_sequence =~ m/^N+$/i){ + $type="N"; + } + $start += $substring_length; + if ($type eq "W"){ + print AGP_FILE "$scaffold\t$oldsum\t$newsum\t$count\t$type\tcontig$rounded\t1\t$substring_length\t+\n"; + } + if ($type eq "N"){ + print AGP_FILE "$scaffold\t$oldsum\t$newsum\t$count\t$type\t$substring_length\tscaffold\tyes\tpaired-ends\n"; + } + } +} + +close AGP_FILE; + +__END__ + + +=head1 NAME + +gaas_scaffold2AGP.pl - This script + +=head1 DESCRIPTION + +Creates a AGP-file needed by e.g. EMBL for a scaffolded assembly + +=head1 SYNOPSIS + + gaas_scaffold2AGP.pl -i scaffoldfile.fasta -o scaffoldfile.agp + gaas_scaffold2AGP.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--gff>, B<-f>, B<--ref> or B<-reffile> + +Input fasta file. + +=item B<--out>, B<--output> or B<-o> + +Output agp file. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat / Henrik Lantz diff --git a/annotation/Tools/Util/fasta/fasta_bionano_filter_not_scaffolded_part.pl b/annotation/tools/fasta/fasta_bionano_filter_not_scaffolded_part.pl similarity index 79% rename from annotation/Tools/Util/fasta/fasta_bionano_filter_not_scaffolded_part.pl rename to annotation/tools/fasta/fasta_bionano_filter_not_scaffolded_part.pl index 505d922df..ed22ca967 100755 --- a/annotation/Tools/Util/fasta/fasta_bionano_filter_not_scaffolded_part.pl +++ b/annotation/tools/fasta/fasta_bionano_filter_not_scaffolded_part.pl @@ -1,6 +1,5 @@ #!/usr/bin/env perl - use Carp; use strict; use warnings; @@ -8,15 +7,9 @@ use Pod::Usage; use Bio::SeqIO; use IO::File; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $outfile = undef; my $file1 = undef; my $agp = undef; @@ -37,12 +30,11 @@ # Print Help and exit if ($help) { - pod2usage( { -message => "$header", - -verbose => 2, - -exitval => 2 - } ); + pod2usage( { -message => "$header\n", + -verbose => 99, + -exitval => 0 } ); } - + if ( ! ((defined($file1)) and (defined($agp)))){ pod2usage( { -message => "$header\nAt least 2 parameters are mandatory.\n", @@ -76,7 +68,7 @@ while (my $row = <$fh>) { if($row =~ /^#/){next;} chomp $row; - my ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, + my ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, $component_beg_or_gap_type, $component_end_or_linkage, $orientation_or_linkage_evidence ) = split(/\t/, $row); if($component_type ne "N" and $component_type ne "U"){ if ($object =~ /^Super-Scaffold/ ){ @@ -95,7 +87,7 @@ ######################### # primary contig header: >004069F|arrow|arrow_obj -# alternative contig header: >000019F-023-01|arrow|arrow_obj +# alternative contig header: >000019F-023-01|arrow|arrow_obj # If piece used not all contig: >000838F|arrow|arrow_subseq_300884:420004 => To keep @@ -124,7 +116,7 @@ print "subseq of alternative contigs $header has to be included into the final assembly. Indeed the primary version of this one has not been included\n"; $fastaout->write_seq($seq); } - } + } } } else{ #its from primary @@ -147,7 +139,7 @@ } else{ $fastaout->write_seq($seq); - } + } } else{ print "not match for <_obj> at the end of the string\n"; @@ -162,6 +154,10 @@ =head1 NAME +gaas_fasta_bionano_filter_not_scaffolded_part.pl + +=head1 DESCRIPTION + This script aims to filter the NOT_SCAFFOLDED.fasta file from bionano output in order to remove redundant part from secondary assembly. Indeed the NOT_SCAFFOLDED.fasta file is a mixup of the primary and the secondary assembly. Is not included in the output: @@ -171,8 +167,8 @@ =head1 NAME =head1 SYNOPSIS - perl my_script.pl --fasta1 file1 -a agp [--out outfile] - perl my_script.pl --help + gaas_fasta_bionano_filter_not_scaffolded_part.pl my_script.pl --fasta1 file1 -a agp [--out outfile] + gaas_fasta_bionano_filter_not_scaffolded_part.pl my_script.pl --help =head1 OPTIONS @@ -184,7 +180,7 @@ =head1 OPTIONS =item B<-a>, B<--agp> or B<-f2> -This is a file containing the headers of sequence to be removed. Only one ID per line. Header should be identical at 100% to be removed. +This is a file containing the headers of sequence to be removed. Only one ID per line. Header should be identical at 100% to be removed. =item B<-o> , B<--output> , B<--out> or B<--outfile> @@ -198,4 +194,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_checkProteins.pl b/annotation/tools/fasta/fasta_checkProteins.pl similarity index 100% rename from annotation/Tools/Util/fasta/fasta_checkProteins.pl rename to annotation/tools/fasta/fasta_checkProteins.pl diff --git a/annotation/Tools/Util/fasta/fasta_cleaner.pl b/annotation/tools/fasta/fasta_cleaner.pl similarity index 60% rename from annotation/Tools/Util/fasta/fasta_cleaner.pl rename to annotation/tools/fasta/fasta_cleaner.pl index 2cba0756f..bec0175f1 100755 --- a/annotation/Tools/Util/fasta/fasta_cleaner.pl +++ b/annotation/tools/fasta/fasta_cleaner.pl @@ -1,12 +1,5 @@ #!/usr/bin/env perl -# A filter for Uniprot and RefSeq fasta files that makes the fasta -# headers a bit more terse. Reads from STDIN, writes to STDOUT. -# -## Note: Will pass any other fasta file unchanged. -## Note: For Uniprot, will also change any 'O' in the protein sequence -## into 'K'. - use strict; use warnings; use Pod::Usage; @@ -18,14 +11,9 @@ my $outfile; my $help = 0; my $verbose = 0; -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; +use GAAS::GAAS; +my $header = get_gaas_header(); my @copyARGV=@ARGV; Getopt::Long::Configure ('bundling'); @@ -43,11 +31,11 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } - + if ( !(defined($file_fasta)) ){ pod2usage( { -message => "$header\nAt least 1 parameter is mandatory:\n Input fasta file (--fasta)\n\n", @@ -141,3 +129,75 @@ my $end_run = time(); my $run_time = $end_run - $start_run; print "Job done in $run_time seconds\n"; + +__END__ + + +=head1 NAME + +gaas_fasta_cleaner.pl + +=head1 DESCRIPTION + +A filter for Uniprot and RefSeq fasta files that makes the fasta +headers a bit more terse. Reads from STDIN, writes to STDOUT. + +Note: Will pass any other fasta file unchanged. +Note: For Uniprot, will also change any 'O' in the protein sequence + into 'K'. + +=head1 SYNOPSIS + + gaas_fasta_cleaner.pl -f infile.fasta [ -o outfile ] + gaas_fasta_cleaner.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f>, B<--fa> or B<--fasta> + +Input fasta file. + +=item B<-v> + +Add verbosity + +=item B<-o>, B<--output>, B<--outfile> or B<--out> + +Output fasta file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_domain_extractor.pl b/annotation/tools/fasta/fasta_domain_extractor.pl similarity index 88% rename from annotation/Tools/Util/fasta/fasta_domain_extractor.pl rename to annotation/tools/fasta/fasta_domain_extractor.pl index a19de7e75..b3c7e6136 100755 --- a/annotation/Tools/Util/fasta/fasta_domain_extractor.pl +++ b/annotation/tools/fasta/fasta_domain_extractor.pl @@ -1,25 +1,13 @@ #!/usr/bin/env perl -################################################### -# domainExtractor.pl - Jacques Dainat 01/2015 # -# Bioinformatics Infrastructure for Life Sciences # -# jacques.dainat@nbis.se # -################################################### - - use strict; use warnings; use Pod::Usage; use Getopt::Long; use IO::File; -my $header_nbis = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; +use GAAS::GAAS; +my $header_nbis = get_gaas_header(); my $inputFile; my $outputFile; my $nameSeq; @@ -115,7 +103,7 @@ if( ($nbSeq > 1) && (defined($nameSeq)) && ($headerFound eq "no") ){ print "The header you specified >$nameSeq< doesn't exist in this MultiFasta file.\nPlease check it.\n";exit; -} +} print "Name studied sequence: $header\n"; #print "sequence: $seq\n"; @@ -152,7 +140,7 @@ =head1 NAME Rule of coordinate system 1-based coordinate system = Numbers nucleotides directly 0-based coordinate system = Numbers between nucleotides - + =head1 SYNOPSIS fasta_domain_extractor.pl -i -s -e [-o -n ] @@ -166,19 +154,19 @@ =head1 OPTIONS Input fasta file that will be read. -=item B<-s> or B<--start> +=item B<-s> or B<--start> Start coordinate of the region that will be extract -=item B<-e> or B<--end> +=item B<-e> or B<--end> End coordinate of the region that will be extract -=item B<-n> or B<--name> +=item B<-n> or B<--name> In Multifasta file case, the name allows to specify which sequence you are interested in. -=item B<-o> or B<--output> +=item B<-o> or B<--output> Output file. If no output file is specified, the output will be written to STDOUT. diff --git a/annotation/Tools/Util/fasta/fasta_extractFaFromMultiFa.sh b/annotation/tools/fasta/fasta_extractFaFromMultiFa.sh similarity index 100% rename from annotation/Tools/Util/fasta/fasta_extractFaFromMultiFa.sh rename to annotation/tools/fasta/fasta_extractFaFromMultiFa.sh diff --git a/annotation/Tools/Util/fasta/fasta_extract_sequence_from_OG.pl b/annotation/tools/fasta/fasta_extract_sequence_from_OG.pl similarity index 76% rename from annotation/Tools/Util/fasta/fasta_extract_sequence_from_OG.pl rename to annotation/tools/fasta/fasta_extract_sequence_from_OG.pl index 283cc02d9..4d81e8af2 100755 --- a/annotation/Tools/Util/fasta/fasta_extract_sequence_from_OG.pl +++ b/annotation/tools/fasta/fasta_extract_sequence_from_OG.pl @@ -4,14 +4,15 @@ # Implement case insensitive ### use strict; +use warnings; use Pod::Usage; use Getopt::Long; use Bio::SeqIO ; use Bio::DB::Fasta; -use Data::Dumper; use IO::File; -use warnings; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); my $opt_fastafile; my $opt_help = 0; @@ -23,14 +24,6 @@ my @tab_seqID; my $path; -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# lucile.soler\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - # OPTION MANAGMENT my @copyARGV=@ARGV; if ( !GetOptions( 'f|fa|fasta=s' => \$opt_fastafile, @@ -45,9 +38,9 @@ # Print Help and exit if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); } if ( (! (defined($opt_OGfile)) ) or (! (defined($opt_fastafile)) ) ){ @@ -171,14 +164,17 @@ =head1 NAME -fasta_extract_sequence_from_OG.pl - -This script extract sequence in fasta format from a fasta file. You can extract one fasta sequence providing the name of a file created by the in-house orthoMCL pipeline. +gaas_fasta_extract_sequence_from_OG.pl + +=head1 DESCRIPTION + +This script extracts sequence in fasta format from a fasta file. You can extract one fasta sequence providing the name of a file created by the in-house orthoMCL pipeline. The OG file contains all the orthoMCL groups and the ID of the sequences in each group. =head1 SYNOPSIS - ./fasta_extract_sequence_from_OG.pl -f infile.fasta -og OGfile.txt - ./fasta_extract_sequence_from_OG.pl --help + gaas_fasta_extract_sequence_from_OG.pl -f infile.fasta -og OGfile.txt + gaas_fasta_extract_sequence_from_OG.pl --help =head1 OPTIONS @@ -208,4 +204,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_extract_sequence_from_id.pl b/annotation/tools/fasta/fasta_extract_sequence_from_id.pl similarity index 77% rename from annotation/Tools/Util/fasta/fasta_extract_sequence_from_id.pl rename to annotation/tools/fasta/fasta_extract_sequence_from_id.pl index 8b59571da..d96238813 100755 --- a/annotation/Tools/Util/fasta/fasta_extract_sequence_from_id.pl +++ b/annotation/tools/fasta/fasta_extract_sequence_from_id.pl @@ -4,12 +4,14 @@ # Implement case insensitive ### use strict; +use warnings; use Pod::Usage; use Getopt::Long; use Bio::SeqIO ; use Bio::DB::Fasta; -use Data::Dumper; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); my $col = undef; @@ -18,22 +20,14 @@ my $opt_fastafile; my $opt_output; my $opt_help = 0; -my $opt_name = undef; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; +my $opt_name = undef; # OPTION MANAGMENT my @copyARGV=@ARGV; if ( !GetOptions( 'f|fa|fasta=s' => \$opt_fastafile, "line=i" => \$lineToAvoid, "col=i" => \$col, - "s=s" =>\$separator, + "s=s" =>\$separator, 'n|name=s' => \$opt_name, 'o|output=s' => \$opt_output, 'h|help!' => \$opt_help ) ) @@ -45,11 +39,11 @@ # Print Help and exit if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); } - + if ( (! (defined($opt_name)) ) or (! (defined($opt_fastafile)) ) ){ pod2usage( { -message => "\nAt least 2 parametes are mandatory:\nInput reference gff file (-g); Input reference fasta file (-f)\n\n". @@ -114,7 +108,7 @@ } else{ @cols = split /$separator/, $_; - } + } my $id = $cols[$col]; $id =~ s/[^[:print:]]+//g; #print $id."\n"; @@ -159,7 +153,7 @@ } else{ foreach my $seq_obj (@list_seq_result){ - $ostream->write_seq($seq_obj); + $ostream->write_seq($seq_obj); } } @@ -179,7 +173,7 @@ ######## ###### #### - ## + ## @@ -188,19 +182,22 @@ =head1 NAME -fasta_extract_sequence_from_id.pl - +gaas_fasta_extract_sequence_from_id.pl + +=head1 DESCRIPTION + This script extract sequence in fasta format from a fasta file. You can extract one fasta sequence providing a sequence name or the name of a file containing a list of sequence name (one by line) =head1 SYNOPSIS - ./fasta_extract_sequence_from_id.pl -f=infile.fasta -n sequenceID [ -o outfile ] - ./fasta_extract_sequence_from_id.pl --help + gaas_fasta_extract_sequence_from_id.pl -f=infile.fasta -n sequenceID [ -o outfile ] + gaas_fasta_extract_sequence_from_id.pl --help =head1 OPTIONS =over 8 -=item B<-f> or B<--fasta> +=item B<-f> or B<--fasta> Input fasta file. @@ -231,4 +228,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_filter_by_size.pl b/annotation/tools/fasta/fasta_filter_by_size.pl similarity index 65% rename from annotation/Tools/Util/fasta/fasta_filter_by_size.pl rename to annotation/tools/fasta/fasta_filter_by_size.pl index e146e28f6..d571d0d09 100755 --- a/annotation/Tools/Util/fasta/fasta_filter_by_size.pl +++ b/annotation/tools/fasta/fasta_filter_by_size.pl @@ -4,24 +4,19 @@ # Implement case insensitive ### use strict; +use warnings; use Pod::Usage; use Getopt::Long; use Bio::SeqIO ; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); my $opt_fastafile; my $opt_output; -my $opt_help = 0; -my $opt_size = 1000; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; +my $opt_help = undef; +my $opt_size = 1000; # OPTION MANAGMENT my @copyARGV=@ARGV; @@ -37,11 +32,11 @@ # Print Help and exit if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); -} - + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + if (! (defined($opt_fastafile)) ) { pod2usage( { -message => "\nAt least 1 parameter is mandatory:\nInput reference fasta file (-f)\n\n". @@ -93,7 +88,7 @@ ######## ###### #### - ## + ## @@ -102,20 +97,23 @@ =head1 NAME -fasta_filer_by_size.pl - -This script filter sequences by size. It will remove from the output all sequences under a certain size (1000 bp/aa by default) +gaas_fasta_filer_by_size.pl + +=head1 DESCRIPTION + +This script filter sequences by size. It will remove from the output all sequences under a certain size (1000 bp/aa by default) We keep all sequences >= --size =head1 SYNOPSIS - ./fasta_filer_by_size.pl -f=infile.fasta [ -o outfile ] - ./fasta_filer_by_size.pl --help + gaas_fasta_filer_by_size.pl -f infile.fasta [ -o outfile ] + gaas_fasta_filer_by_size.pl --help =head1 OPTIONS =over 8 -=item B<-f> or B<--fasta> +=item B<-f> or B<--fasta> Input fasta file. @@ -134,4 +132,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_get_longestORF.pl b/annotation/tools/fasta/fasta_get_longestORF.pl similarity index 82% rename from annotation/Tools/Util/fasta/fasta_get_longestORF.pl rename to annotation/tools/fasta/fasta_get_longestORF.pl index ac456b36b..1343fa5f1 100755 --- a/annotation/Tools/Util/fasta/fasta_get_longestORF.pl +++ b/annotation/tools/fasta/fasta_get_longestORF.pl @@ -2,23 +2,17 @@ use Carp; use strict; +use warnings; use POSIX qw(strftime); use Getopt::Long; -use NBIS::FASTA::Longest_orf; use Pod::Usage; use Bio::Seq; use Bio::SeqIO; +use GAAS::FASTA::Longest_orf; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); - -my $header = qq{ -######################################################## -# NBIS 2017 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - my $codonTableId=1; my $MIN_PROT_LENGTH = 100; my $force_start_codon = undef; @@ -50,11 +44,11 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } - + if ( !(defined($file_fasta)) ){ pod2usage( { -message => "$header\nAt least 1 parameter is mandatory:\n Input fasta file (--fasta)\n\n", @@ -103,22 +97,22 @@ @orf_structs = reverse sort {$a->{length}<=>$b->{length}} @orf_structs; my $acc = $seqObj->id(); print "looking at sequence $acc \n" if $verbose; - - my %candidates; + + my %candidates; while (@orf_structs) { my $orf = shift @orf_structs; - + my $start = $orf->{start}; my $stop = $orf->{stop}; - - my $length = int((abs($start-$stop)+1)/3); + + my $length = int((abs($start-$stop)+1)/3); my $orient = $orf->{orient}; - my $protein = $orf->{protein}; - + my $protein = $orf->{protein}; + ################################## # adjust for boundary conditions, since starts and stops run off the ends of the sequences at partial codons ################################# - + # adjust at 3' end if ($stop > length($seq)) { $stop -= 3; @@ -126,7 +120,7 @@ if ($start > length($seq)) { $start -= 3; } - + # adjust at 5' end if ($stop < 1) { $stop += 3; @@ -135,15 +129,15 @@ $start += 3; } - + if ($length < $MIN_PROT_LENGTH) { next; } if ($force_complete and (substr($orf->{protein},0,1) ne 'M' or substr($orf->{protein},-1) ne '*' ) ) {next;} if ($force_start_codon and substr($orf->{protein},0,1) ne 'M' ) {next;} - + print "Candidate (len $length): ".Dumper($orf) if $verbose; push (@{$canditates{$acc}}, $orf); - + } if($keep_all_orf){ @@ -157,7 +151,7 @@ $cpt++; } } - else{ # let's keep only the longest + else{ # let's keep only the longest my $orf = @{$canditates{$acc}}[0]; $seqObj->seq($orf->{protein}); #changing the DNA sequence by the corresponding AA sequence is enough $fasta_out->write_seq($seqObj); @@ -198,7 +192,10 @@ sub exists_keys { =head1 NAME -fasta_get_longestORF.pl - +gaas_fasta_get_longestORF.pl + +=head1 DESCRIPTION + The script take a nucleotide fasta file as input and will extract the longest ORF(s) and translate it(them) in AA. By default it extracts only the longest ORF even incomplete (missing start or/and stop codon) >= 100 AA. This script is an adpatation of the TransDecoder.LongestORF tool, adapted to use bioperl. @@ -206,8 +203,8 @@ =head1 NAME =head1 SYNOPSIS - ./fasta_get_longestORF.pl -f infile.fasta [ -o outfile ] - ./fasta_get_longestORF.pl -h + gaas_fasta_get_longestORF.pl -f infile.fasta [ -o outfile ] + gaas_fasta_get_longestORF.pl -h =head1 OPTIONS @@ -252,4 +249,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_removeFromFasta1_intersection_withFasta2ID.pl b/annotation/tools/fasta/fasta_removeFromFasta1_intersection_withFasta2ID.pl similarity index 71% rename from annotation/Tools/Util/fasta/fasta_removeFromFasta1_intersection_withFasta2ID.pl rename to annotation/tools/fasta/fasta_removeFromFasta1_intersection_withFasta2ID.pl index f79e1208b..e58beed8b 100755 --- a/annotation/Tools/Util/fasta/fasta_removeFromFasta1_intersection_withFasta2ID.pl +++ b/annotation/tools/fasta/fasta_removeFromFasta1_intersection_withFasta2ID.pl @@ -6,15 +6,9 @@ use Getopt::Long; use Pod::Usage; use Bio::SeqIO; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $outfile = undef; my $file1 = undef; my $file2 = undef; @@ -34,12 +28,11 @@ # Print Help and exit if ($help) { - pod2usage( { -message => "$header", - -verbose => 2, - -exitval => 2 - } ); + pod2usage( { -message => "$header\n", + -verbose => 99, + -exitval => 0 } ); } - + if ( ! ((defined($file1)) and (defined($file2)))){ pod2usage( { -message => "$header\nAt least 2 parameters are mandatory.\n", @@ -114,7 +107,7 @@ =head1 OPTIONS =item B<--fasta2>, B<--file2> or B<-f2> -Fasta file 2. This is the "reference file" in which we will remove sequences already existing in file 1. +Fasta file 2. This is the "reference file" in which we will remove sequences already existing in file 1. =item B<-o> , B<--output> , B<--out> or B<--outfile> @@ -128,4 +121,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_removeSeqFromIDlist.pl b/annotation/tools/fasta/fasta_removeSeqFromIDlist.pl similarity index 71% rename from annotation/Tools/Util/fasta/fasta_removeSeqFromIDlist.pl rename to annotation/tools/fasta/fasta_removeSeqFromIDlist.pl index 38545ca45..82a4daa9d 100755 --- a/annotation/Tools/Util/fasta/fasta_removeSeqFromIDlist.pl +++ b/annotation/tools/fasta/fasta_removeSeqFromIDlist.pl @@ -3,19 +3,14 @@ use Carp; use strict; +use warnings; use Getopt::Long; use Pod::Usage; use Bio::SeqIO; use IO::File; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $outfile = undef; my $file1 = undef; my $file2 = undef; @@ -35,12 +30,12 @@ # Print Help and exit if ($help) { - pod2usage( { -message => "$header", - -verbose => 2, - -exitval => 2 + pod2usage( { -message => "$header\n", + -verbose => 99, + -exitval => 0 } ); } - + if ( ! ((defined($file1)) and (defined($file2)))){ pod2usage( { -message => "$header\nAt least 2 parameters are mandatory.\n", @@ -113,13 +108,17 @@ =head1 NAME +gaas_fasta_removeSeqFromIDlist + +=head1 DESCRIPTION + Compare a fasta file to a list of ID in order to remove the matching name from file 1. The whole header must be identical to be consider as identic. =head1 SYNOPSIS - perl my_script.pl --fasta1 file1 --list file2 [--out outfile] - perl my_script.pl --help + gaas_fasta_removeSeqFromIDlist.pl --fasta1 file1 --list file2 [--out outfile] + gaas_fasta_removeSeqFromIDlist.pl --help =head1 OPTIONS @@ -131,7 +130,7 @@ =head1 OPTIONS =item B<--fasta2>, B<--file2> or B<-f2> -This is a file containing the headers of sequence to be removed. Only one ID per line. Header should be identical at 100% to be removed. +This is a file containing the headers of sequence to be removed. Only one ID per line. Header should be identical at 100% to be removed. =item B<-o> , B<--output> , B<--out> or B<--outfile> @@ -145,4 +144,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_splitMultFastaFile.sh b/annotation/tools/fasta/fasta_splitMultFastaFile.sh similarity index 100% rename from annotation/Tools/Util/fasta/fasta_splitMultFastaFile.sh rename to annotation/tools/fasta/fasta_splitMultFastaFile.sh diff --git a/annotation/Tools/Util/fasta/fasta_spliter_overlap.pl b/annotation/tools/fasta/fasta_spliter_overlap.pl similarity index 74% rename from annotation/Tools/Util/fasta/fasta_spliter_overlap.pl rename to annotation/tools/fasta/fasta_spliter_overlap.pl index c23465783..3e8f84566 100755 --- a/annotation/Tools/Util/fasta/fasta_spliter_overlap.pl +++ b/annotation/tools/fasta/fasta_spliter_overlap.pl @@ -7,22 +7,16 @@ use Pod::Usage; use Getopt::Long; use Bio::SeqIO ; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); my $opt_fastafile; my $opt_output; my $opt_help = 0; my $opt_chunck_size = undef; -my $opt_overlap = 0; - -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; +my $opt_overlap = 0; # OPTION MANAGMENT my @copyARGV=@ARGV; @@ -39,11 +33,11 @@ # Print Help and exit if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, - -message => "$header \n" } ); -} - + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + if (! defined($opt_fastafile) or ! defined($opt_chunck_size) ) { pod2usage( { -message => "\nAt least 2 parameter is mandatory:\nInput reference fasta file (-f)\nChunck_size (-c)\n". @@ -78,15 +72,15 @@ while ( my $seq = $fasta1->next_seq() ) { my $start = 1; my $end = $opt_chunck_size; - + while ( $end < $seq->length() ) { my $sequence = undef; my $seqObj = undef; my $id_seq = undef; if($seq->length() > ($end+$opt_chunck_size) ){ - - + + $sequence = $seq->subseq($start, $end); $seqObj = Bio::Seq->new( '-format' => 'fasta' , -seq => $sequence); $id_seq = $seq->id."_".$start."_".$end; @@ -123,7 +117,7 @@ ######## ###### #### - ## + ## @@ -132,19 +126,22 @@ =head1 NAME -fasta_spliter_overlap.pl - +gaas_fasta_spliter_overlap.pl + +=head1 DESCRIPTION + This script split sequences by size with an overlaped part. =head1 SYNOPSIS - ./fasta_filer_by_size.pl -f=infile.fasta [ -o outfile ] - ./fasta_filer_by_size.pl --help + gaas_fasta_spliter_overlap.pl -f=infile.fasta [ -o outfile ] + gaas_fasta_spliter_overlap.pl --help =head1 OPTIONS =over 8 -=item B<-f> or B<--fasta> +=item B<-f> or B<--fasta> Input fasta file. @@ -163,4 +160,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fasta/fasta_statisticsAndPlot.pl b/annotation/tools/fasta/fasta_statistics.pl similarity index 100% rename from annotation/Tools/Util/fasta/fasta_statisticsAndPlot.pl rename to annotation/tools/fasta/fasta_statistics.pl diff --git a/annotation/Tools/EnsEMBL/removeIsoforms.sh b/annotation/tools/fasta/removeIsoforms.sh similarity index 100% rename from annotation/Tools/EnsEMBL/removeIsoforms.sh rename to annotation/tools/fasta/removeIsoforms.sh diff --git a/annotation/Tools/Util/fastq/fastq_check_sync_pair1_pair2.pl b/annotation/tools/fastq/fastq_check_sync_pair1_pair2.pl similarity index 75% rename from annotation/Tools/Util/fastq/fastq_check_sync_pair1_pair2.pl rename to annotation/tools/fastq/fastq_check_sync_pair1_pair2.pl index 0bc52d813..42fbfc1c2 100755 --- a/annotation/Tools/Util/fastq/fastq_check_sync_pair1_pair2.pl +++ b/annotation/tools/fastq/fastq_check_sync_pair1_pair2.pl @@ -1,18 +1,14 @@ #!/usr/bin/env perl -################################################### -# Jacques Dainat 01/2018 # -# jacques.dainat@nbis.se # -################################################### - use strict; use warnings; use Pod::Usage; use Getopt::Long; use Data::Dumper; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); - my @inputFile; my $check_complete; my $gzip_input; @@ -32,7 +28,7 @@ } if ($opt_help) { - pod2usage( { -verbose => 2, + pod2usage( { -verbose => 99, -exitval => 0 } ); } @@ -66,8 +62,8 @@ $count++; my $id1 = <$in1>; my $id2 = <$in2>; - # skip all line that are not header - next unless ($count % 4 == 1 ); + # skip all line that are not header + next unless ($count % 4 == 1 ); #extract header chomp $id1; @@ -78,11 +74,11 @@ if ($id1 =~ /^@\S+\/[12]$/) { # @ at the start of the line followed by non-whitespace, a /, a 1 or 2, the end of the line $header_type = 1; print STDOUT "Read Id looks like Casava 1.7 style\n"; # TESTING - } + } elsif ($id1 =~ /^@\S+\W[12]\S+$/) { # @ at the start of the line followed by non-whitspace, a space, a 1 or 2, non-whitespace $header_type = 2; - print STDOUT "Read Id looks like Casava 1.8 style\n"; - } + print STDOUT "Read Id looks like Casava 1.8 style\n"; + } else { print STDOUT "Unknwon id style (Not Casava 1.7 or 1.8): $id1\n"; exit 1; @@ -136,7 +132,7 @@ ######## ###### #### - ## + ## sub concat_list_from_left{ @@ -145,7 +141,7 @@ sub concat_list_from_left{ my $result=""; foreach my $element (@{$list}){ - $result = $result.$element; + $result = $result.$element; } return $result; @@ -165,27 +161,26 @@ sub split_keep_delimiter{ push @result, $element; } $cpt++; - } + } return \@result; } __END__ - 'c|complete!' => \$check_complete, - 'h|help!' => \$opt_help ) - =head1 NAME -fastq_check_sync_pair1_pair2.pl +gaas_fastq_check_sync_pair1_pair2.pl -=head1 SYNOPSIS +=head1 DESCRIPTION -The aim of this script is to check that paired reads from 2 fastq files are still synchronized. -Read1 and the read2, that come from a paired sequencing, are in the same position in the two fastq files. +The aim of this script is to check that paired reads from 2 fastq files are still synchronized. +Read1 and the read2, that come from a paired sequencing, are in the same position in the two fastq files. But the order of read in R1/R2 files can get out of sync if you e.g scan/trim the two files independently. So it is a good thing always to check. - fastq_check_sync_pair1_pair2.pl -i input_R1.fastq -i input_R2.fastq - fastq_check_sync_pair1_pair2.pl --help +=head1 SYNOPSIS + + gaas_fastq_check_sync_pair1_pair2.pl -i input_R1.fastq -i input_R2.fastq + gaas_fastq_check_sync_pair1_pair2.pl --help =head1 OPTIONS @@ -209,4 +204,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fastq/fastq_deinterleave_bash.pl b/annotation/tools/fastq/fastq_deinterleave_bash.pl similarity index 80% rename from annotation/Tools/Util/fastq/fastq_deinterleave_bash.pl rename to annotation/tools/fastq/fastq_deinterleave_bash.pl index 63edd8252..a19ba8ee1 100755 --- a/annotation/Tools/Util/fastq/fastq_deinterleave_bash.pl +++ b/annotation/tools/fastq/fastq_deinterleave_bash.pl @@ -1,16 +1,12 @@ #!/usr/bin/env perl -################################################### -# Jacques Dainat 01/2018 # -# jacques.dainat@nbis.se # -################################################### - use strict; use warnings; use Pod::Usage; use Getopt::Long; -use Data::Dumper; +use GAAS::GAAS; +my $header = get_gaas_header(); my $start_run = time(); my $inputFile; @@ -36,12 +32,12 @@ } if ($opt_help) { - pod2usage( { -verbose => 2, + pod2usage( { -verbose => 99, -exitval => 0 } ); } if (! $inputFile ){ - pod2usage( { -message => 'at least 1 input file is mandatory', + pod2usage( { -message => 'At least 1 input file is mandatory', -verbose => 1, -exitval => 1 } ); } @@ -56,11 +52,11 @@ if ($suffix eq ".gzip" or $suffix eq ".gz") { $gzip_input=1; $fq_ext = pop(@pieces); - $filename = concat_list_from_left(\@pieces); + $filename = concat_list_from_left(\@pieces); } else{ $fq_ext = $suffix; - $filename = concat_list_from_left(\@pieces); + $filename = concat_list_from_left(\@pieces); } if ($gzip_input) {#unzip input case @@ -72,7 +68,7 @@ } else{ print "command2\n"; - my $command = 'gzip -dc '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > '.$filename."_".$output_suffix1.$fq_ext.') | cut -f 5-8 | tr "\t" "\n" > '.$filename."_".$output_suffix2.$fq_ext; + my $command = 'gzip -dc '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > '.$filename."_".$output_suffix1.$fq_ext.') | cut -f 5-8 | tr "\t" "\n" > '.$filename."_".$output_suffix2.$fq_ext; print "Command launched:\n".$command."\n"; system ("/bin/bash -c '$command'"); } @@ -86,7 +82,7 @@ } else{ print "command4\n"; - my $command = 'cat '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > '.$filename."_".$output_suffix1.$fq_ext.') | cut -f 5-8 | tr "\t" "\n" > '.$filename."_".$output_suffix2.$fq_ext; + my $command = 'cat '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > '.$filename."_".$output_suffix1.$fq_ext.') | cut -f 5-8 | tr "\t" "\n" > '.$filename."_".$output_suffix2.$fq_ext; print "Command launched:\n".$command."\n"; system ("/bin/bash -c '$command'"); } @@ -107,7 +103,7 @@ ######## ###### #### - ## + ## sub concat_list_from_left{ @@ -116,7 +112,7 @@ sub concat_list_from_left{ my $result=""; foreach my $element (@{$list}){ - $result = $result.$element; + $result = $result.$element; } return $result; @@ -136,7 +132,7 @@ sub split_keep_delimiter{ push @result, $element; } $cpt++; - } + } return \@result; } @@ -153,7 +149,7 @@ =head1 SYNOPSIS Can deinterleave 100 million paired reads (200 million total reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s) - + Script inspired by a pure bash code from the nathanhaigh repository: https://gist.github.com/3521724 Also see the interleaving script: https://gist.github.com/4544979 @@ -183,7 +179,7 @@ =head1 OPTIONS STRING: Suffix to add to the output file 2. By default 2. -=item B<-gz> or B<--gzip> +=item B<-gz> or B<--gzip> Bolean: The output will be compressed using pigz. @@ -197,4 +193,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/fastq/fastq_guessMyFormat.pl b/annotation/tools/fastq/fastq_guessMyFormat.pl similarity index 100% rename from annotation/Tools/Util/fastq/fastq_guessMyFormat.pl rename to annotation/tools/fastq/fastq_guessMyFormat.pl diff --git a/annotation/Tools/Util/fastq/fastq_interleave.py b/annotation/tools/fastq/fastq_interleave.py similarity index 100% rename from annotation/Tools/Util/fastq/fastq_interleave.py rename to annotation/tools/fastq/fastq_interleave.py diff --git a/annotation/Tools/Util/fastq/fastq_pairfq_lite.pl b/annotation/tools/fastq/fastq_pairfq_lite.pl similarity index 91% rename from annotation/Tools/Util/fastq/fastq_pairfq_lite.pl rename to annotation/tools/fastq/fastq_pairfq_lite.pl index 94c9f64c0..4ece1e562 100755 --- a/annotation/Tools/Util/fastq/fastq_pairfq_lite.pl +++ b/annotation/tools/fastq/fastq_pairfq_lite.pl @@ -16,21 +16,21 @@ my $fpread; # file of paired forward reads for 'makepairs' method my $rpread; # file of paired reverse reads for 'makepairs' method my $fsread; # file of unpaired forward reads for 'makepairs' method -my $rsread; # file of unpaired reverse reads for 'makepairs' method +my $rsread; # file of unpaired reverse reads for 'makepairs' method my $pairnum; # for the 'addinfo' method my $uppercase; # for 'addinfo' method my $stats; # currently, for 'makepairs' option only -my $version; +my $version; my $help; my $man; my $script = basename($0, ()); $script = "pairfq_lite" if $script =~ /^-$|stdin/i; -GetOptions( +if ( !GetOptions( 'i|infile=s' => \$infile, 'o|outfile=s' => \$outfile, - 'p|pairnum=i' => \$pairnum, + 'p|pairnum=i' => \$pairnum, 'f|forward=s' => \$fread, 'r|reverse=s' => \$rread, 'fp|forw_paired=s' => \$fpread, @@ -40,15 +40,21 @@ 'uc|uppercase' => \$uppercase, 's|stats' => \$stats, 'version' => \$version, - 'h|help' => \$help, - 'm|man' => \$man, - ) or pod2usage( "Try '$0 --man' for more information." ); + 'h|help' => \$help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0 } ); +} # # Check @ARGV # -usage($script) and exit(0) if $help; -pod2usage( -verbose => 2 ) if $man; print $VERSION and exit(0) if $version; my $method = shift; @@ -165,7 +171,7 @@ sub make_pairs_and_singles { "Please see https://github.com/sestaton/Pairfq or the README for supported formats. Exiting.\n\n"; exit(1); } - + if ($fname =~ /\|\|/) { my ($name, $comm); ($name, $comm) = mk_vec($fname); @@ -176,45 +182,45 @@ sub make_pairs_and_singles { } if (exists $rseqpairs->{$fname}) { - $fpct++; + $fpct++; $rpct++; if (defined $fqual) { my ($rread, $rqual) = mk_vec($rseqpairs->{$fname}); if ($fname =~ /\|\|/) { print $fp join "\n", "@".$forw_id, $fseq, "+", "$fqual\n"; print $rp join "\n", "@".$rev_id, $rread, "+", "$rqual\n"; - } + } else { print $fp join "\n", "@".$fname."/1", $fseq, "+", "$fqual\n"; print $rp join "\n", "@".$fname."/2", $rread, "+", "$rqual\n"; } - } + } else { if ($fname =~ /\|\|/) { print $fp join "\n", ">".$forw_id, "$fseq\n"; print $rp join "\n", ">".$rev_id, "$rseqpairs->{$fname}\n"; - } + } else { print $fp join "\n", ">".$fname."/1", "$fseq\n"; print $rp join "\n", ">".$fname."/2", "$rseqpairs->{$fname}\n"; } } delete $rseqpairs->{$fname}; - } + } else { $fsct++; if (defined $fqual) { if ($fname =~ /\|\|/) { print $fs join "\n", "@".$forw_id, $fseq, "+", "$fqual\n"; - } + } else { print $fs join "\n", "@".$fname."/1", $fseq, "+", "$fqual\n"; } - } + } else { if ($fname =~ /\|\|/) { print $fs join "\n", ">".$forw_id, "$fseq\n"; - } + } else { print $fs join "\n", ">".$fname."/1", "$fseq\n"; } @@ -236,13 +242,13 @@ sub make_pairs_and_singles { my ($rseq_up, $rqual_up) = mk_vec($rseq_up_unenc); my $rev_id_up .= $rname_up." 2".$rcomm_up if defined $rcomm_up; - + if (defined $rcomm_up && defined $rqual_up) { print $rs join "\n", "@".$rev_id_up, $rseq_up, "+", "$rqual_up\n"; - } + } elsif (defined $rcomm_up && !defined $rqual_up) { print $rs join "\n", ">".$rev_id_up, "$rseq_up_unenc\n"; - } + } elsif (!defined $rcomm_up && defined $rqual_up) { print $rs join "\n", "@".$rname_up."/2", $rseq_up, "+", "$rqual_up\n"; } @@ -320,22 +326,22 @@ sub interleaved_to_pairs_and_singles { if ($fpairname eq $rpairname) { $fpct++; $rpct++; - say $fp join "\n", ">".$fname, $fseq + say $fp join "\n", ">".$fname, $fseq if !defined $fqual && !defined $fcomm; - say $fp join "\n", ">".$fname." ".$fcomm, $fseq + say $fp join "\n", ">".$fname." ".$fcomm, $fseq if !defined $fqual && defined $fcomm; - say $fp join "\n", "@".$fname, $fseq, "+", $fqual + say $fp join "\n", "@".$fname, $fseq, "+", $fqual if defined $fqual && !defined $fcomm; - say $fp join "\n", "@".$fname." ".$fcomm, $fseq, "+", $fqual + say $fp join "\n", "@".$fname." ".$fcomm, $fseq, "+", $fqual if defined $fqual && defined $fcomm; - - say $rp join "\n", ">".$rname, $rseq + + say $rp join "\n", ">".$rname, $rseq if !defined $rqual && !defined $rcomm; - say $rp join "\n", ">".$rname." ".$rcomm, $rseq + say $rp join "\n", ">".$rname." ".$rcomm, $rseq if !defined $rqual && defined $rcomm; - say $rp join "\n", "@".$rname, $rseq, "+", $rqual + say $rp join "\n", "@".$rname, $rseq, "+", $rqual if defined $rqual && !defined $rcomm; - say $rp join "\n", "@".$rname." ".$rcomm, $rseq, "+", $rqual + say $rp join "\n", "@".$rname." ".$rcomm, $rseq, "+", $rqual if defined $rqual && defined $rcomm; delete $singles{$fname}; delete $singles{$rname}; @@ -357,13 +363,13 @@ sub interleaved_to_pairs_and_singles { $sfh = $rs; } - say $sfh join "\n", ">".$singles{$id}->{'name'}, $singles{$id}->{'seq'} + say $sfh join "\n", ">".$singles{$id}->{'name'}, $singles{$id}->{'seq'} if !defined $singles{$id}->{'qual'} && !defined $singles{$id}->{'comm'}; - say $sfh join "\n", ">".$singles{$id}->{'name'}." ".$singles{$id}->{'comm'}, $singles{$id}->{'seq'} + say $sfh join "\n", ">".$singles{$id}->{'name'}." ".$singles{$id}->{'comm'}, $singles{$id}->{'seq'} if !defined $singles{$id}->{'qual'} && defined $singles{$id}->{'comm'}; - say $sfh join "\n", "@".$singles{$id}->{'name'}, $singles{$id}->{'seq'}, "+", $singles{$id}->{'qual'} + say $sfh join "\n", "@".$singles{$id}->{'name'}, $singles{$id}->{'seq'}, "+", $singles{$id}->{'qual'} if defined $singles{$id}->{'qual'} && !defined $singles{$id}->{'comm'}; - say $sfh join "\n", "@".$singles{$id}->{'name'}." ".$singles{$id}->{'comm'}, $singles{$id}->{'seq'}, "+", $singles{$id}->{'qual'} + say $sfh join "\n", "@".$singles{$id}->{'name'}." ".$singles{$id}->{'comm'}, $singles{$id}->{'seq'}, "+", $singles{$id}->{'qual'} if defined $singles{$id}->{'qual'} && defined $singles{$id}->{'comm'}; } close $fs; @@ -441,7 +447,7 @@ sub pairs_to_interleaved { } else { print $out join "\n", ">".$rname."/1", "$pairs->{$rname}\n"; - print $out join "\n", ">".$rname."/2", "$rseq\n"; + print $out join "\n", ">".$rname."/2", "$rseq\n"; } } } @@ -457,7 +463,7 @@ sub interleaved_to_pairs { my $fh = get_fh($infile); open my $f, '>', $forward or die "\nERROR: Could not open file: $forward\n"; - open my $r, '>', $reverse or die "\nERROR: Could not open file: $reverse\n"; + open my $r, '>', $reverse or die "\nERROR: Could not open file: $reverse\n"; my @aux = undef; my ($name, $comm, $seq, $qual); @@ -531,7 +537,7 @@ sub store_pair { my $rct = 0; my %rseqpairs; my $cwd = getcwd(); - + my @raux = undef; my ($rname, $rcomm, $rseq, $rqual); @@ -551,7 +557,7 @@ sub store_pair { "Please see https://github.com/sestaton/Pairfq or the README for supported formats. Exiting.\n\n"; exit(1); } - + $rseqpairs{$rname} = mk_key($rseq, $rqual) if defined $rqual; $rseqpairs{$rname} = $rseq if !defined $rqual; } @@ -578,7 +584,7 @@ sub readfq { } my ($name, $comm); defined $_ && do { - ($name, $comm) = /^.(\S+)(?:\s+)(\S+)/ ? ($1, $2) : + ($name, $comm) = /^.(\S+)(?:\s+)(\S+)/ ? ($1, $2) : /^.(\S+)/ ? ($1, '') : ('', ''); }; my $seq = ''; @@ -623,10 +629,10 @@ sub usage { Required: addinfo : Add the pair info back to the FASTA/Q header. - makepairs : Pair the forward and reverse reads and write singletons + makepairs : Pair the forward and reverse reads and write singletons for both forward and reverse reads to separate files. joinpairs : Interleave the paired forward and reverse files. - splitpairs : Split the interleaved file into separate files for the + splitpairs : Split the interleaved file into separate files for the forward and reverse reads. Options: @@ -715,11 +721,11 @@ sub joinpairs_usage { __END__ -=head1 NAME - +=head1 NAME + pairfq_lite.pl - Sync paired-end sequences from separate FASTA/Q files -=head1 SYNOPSIS +=head1 SYNOPSIS ## Add pair information back to the reads @@ -743,26 +749,43 @@ =head1 SYNOPSIS pairfq_lite.pl splitpairs -i s_1_interl.fq -f s_1_1_trim_p.fq -r s_1_2_trim_p.fq =head1 DESCRIPTION - + Re-pair paired-end sequences that may have been separated by quality trimming. -This script also writes the unpaired forward and reverse sequences to separate +This script also writes the unpaired forward and reverse sequences to separate files so that they may be used for assembly or mapping. The input may be FastA or FastQ format in either Illumina 1.3+ or Illumina 1.8 format. The input files may be compressed with gzip or bzip2. Optionally, the script can interleave paired -files, separate interleaved files into separate forward and reverse files, and -fix paired-end files which have lost the pair information. +files, separate interleaved files into separate forward and reverse files, and +fix paired-end files which have lost the pair information. =head1 DEPENDENCIES -There are no external dependencies with the 'pairfq_lite.pl' script. See below for +There are no external dependencies with the 'pairfq_lite.pl' script. See below for information on which Perls have been tested. =head1 LICENSE - -The MIT License should included with the project. If not, it can be found at: http://opensource.org/licenses/mit-license.php + +The MIT License can be found at: http://opensource.org/licenses/mit-license.php Copyright (C) 2013-2016 S. Evan Staton - +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + =head1 TESTED WITH: =over @@ -790,12 +813,12 @@ =head1 TESTED WITH: =back -=head1 AUTHOR +=head1 AUTHOR -S. Evan Staton +S. Evan Staton =head1 CONTACT - + statonse at gmail dot com =head1 REQUIRED ARGUMENTS @@ -840,15 +863,15 @@ =head1 OPTIONS =item -rp, --rev_paired - The output file to place the paired reverse reads. + The output file to place the paired reverse reads. =item -fs, --forw_unpaired - The output file to place the unpaired forward reads. + The output file to place the unpaired forward reads. =item -rs, --rev_unpaired - The output file to place the unpaired reverse reads. + The output file to place the unpaired reverse reads. =item -p, --pairnum @@ -871,11 +894,7 @@ =head1 OPTIONS =item -h, --help - Print a usage statement. - -=item -m, --man - - Print the full documentation. + Print a usage statement. =back diff --git a/annotation/tools/grid/augustus2grid.pl b/annotation/tools/grid/augustus2grid.pl new file mode 100755 index 000000000..a1c0dbdc7 --- /dev/null +++ b/annotation/tools/grid/augustus2grid.pl @@ -0,0 +1,260 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = "augustus_output"; +my $genome = undef; +my $species = undef; +my $hints = undef; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( + "hints=s" => \$hints, + "species=s" => \$species, + "genome|g=s" => \$genome, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($genome) or ! defined($species)){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\nInput genome fasta file (--genome) and augustus species hmm profile (--species)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ( "augustus" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +my $augustus_config_pathj = $ENV{'AUGUSTUS_CONFIG_PATH'} or die "AUGUSTUS_CONFIG_PATH is not set, aborting." ; + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/augustus.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my $counter = 10000; +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + my $outfile = $outdir . "/seq_" . $counter . ".fa"; + my $cmd=undef; + if($hints){ + $cmd = "augustus --species=$species --hintsfile=$hints --alternatives-from-evidence=true --gff3=on --extrinsicCfgFile=/references/software/augustus/config/extrinsic/extrinsic.E.cfg --uniqueGeneId=true $outfile > $outfile.augustus" ; + } + else{ + $cmd = "augustus --species=$species --alternatives-from-evidence=true --gff3=on --extrinsicCfgFile=/references/software/augustus/config/extrinsic/extrinsic.E.cfg --uniqueGeneId=true $outfile > $outfile.augustus" ; + } + push(@cmds,$cmd); + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + $seq_out->write_seq($seq); +} + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, like merging of output files + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_augustus2grid.pl - + +=head1 DESCRIPTION + +Chunk input data to run multiple augustus jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_augustus2grid.pl -f fasta_file --species species_name + gaas_augustus2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--genome> or B<-g> + +The name of the genome fasta file. + +=item B<--hints> + +Augustus hints file (e.g Intron) + +=item B<--species> + +Species name for the hmm profile to use within Augustus + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/tools/grid/blastp2grid.pl b/annotation/tools/grid/blastp2grid.pl new file mode 100755 index 000000000..6edf150ef --- /dev/null +++ b/annotation/tools/grid/blastp2grid.pl @@ -0,0 +1,328 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = "blastp_output"; +my $db = undef; +my $fasta = undef; +my $chunk_size = 500; +my $eval = 1e-5; +my $nb_seq = undef; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we + # don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( "h|help!" => \$help, + "fasta|f=s" => \$fasta, + "db=s" => \$db, + "chunk_size=i" => \$chunk_size, + "nb_seq=i" => \$nb_seq, + "eval" => \$eval, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($db) or ! defined($fasta) ){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\nInput fasta file (--fasta) and a database (--db)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ("blastp"); +foreach my $exe (@tools) { + check_bin($exe) == 1 or die "Missing executable $exe in PATH"; +} + +# .. Create output directory + +if ( -d $outdir ) { + msg( "Be careful, we are using an existinf Output directory $outdir. " . + "If you do not want that, you have to stop the job" ); +} +else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir"); +} + +# .. set up log file + +my $logfile = "$outdir/blastp2grid.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err ("Can't open logfile"); + +# .. Read protein fasta file. +my $inseq = Bio::SeqIO->new( -file => "<$fasta", -format => 'fasta' ); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while ( $seq = $inseq->next_seq() ) { + $counter += 1; + push( @seqarray, $seq ); + + if ( $counter == $chunk_size ) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk( $outfile, @seqarray ); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = + $outdir . "/chunk_" . + $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk( $outfile, @seqarray ); + +# Push all jobs into the command list +if ( !defined($nb_seq) ) { + for ( my $i = 1; $i <= $chunk_counter; $i++ ) { + my $cmd = + "blastp -evalue $eval -num_alignments 100000 " . + "-seg yes -outfmt 6 -db $db -query $outdir/chunk_$i.fa " . + "-out $outdir/chunk_$i.tab"; + push( @cmds, $cmd ); + } +} +else { + for ( my $i = 1; $i <= $chunk_counter; $i++ ) { + my $cmd = + "blastp -dbsize $nb_seq -evalue $eval " . + "-num_alignments 100000 -seg yes -outfmt 6 -db $db " . + "-query $outdir/chunk_$i.fa -out $outdir/chunk_$i.tab"; + push( @cmds, $cmd ); + } +} + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.tab>; + +foreach my $file (@files) { + system("cat $file >> $outdir/blastp.merged"); +} + +msg("Finished BLASTp grid run."); + +# -------------------- + +sub write_chunk +{ + my $outfile = shift; + my @seqs = @_; + my $seq_out = + Bio::SeqIO->new( -file => ">$outfile", -format => 'fasta' ); + foreach my $seq (@seqs) { $seq_out->write_seq($seq) } +} + +# -------------------- + +sub msg +{ + my $t = localtime; + my $line = "[" . $t->hms . "] @_\n"; + print LOG $line if openhandle( \*LOG ); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd +{ + msg( "Running:", @_ ); + system(@_) == 0 or err ( "Could not run command:", @_ ); +} + +# -------------------- + +sub check_bin +{ + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err +{ + $quiet = 0; + msg(@_); + exit(2); + +} + +__END__ + +=head1 NAME + +gaas_blastp2grid.pl - + +=head1 DESCRIPTION + +Chunk input data to run multiple blastp jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_blastp2grid.pl -f fasta_file --db db_name + gaas_blastp2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the protein fasta file to use as query. + +=item B<--db> + +The name of the database use to blast + +=item B<--nb_seq> + +The number of proteins contained in the db. Useful to cheat on +the database size. (OrthoMCL aggregation as example). If not +provided, the current database size is used. + +=item B<--eval> + +The maximu evalue of the sequences kept in the result + +=item B<--chunk_size> + +The number of sequence by job. If not provided, default size +will be 500. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/tools/grid/blat2grid.pl b/annotation/tools/grid/blat2grid.pl new file mode 100755 index 000000000..2c2cf5460 --- /dev/null +++ b/annotation/tools/grid/blat2grid.pl @@ -0,0 +1,289 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = "blat_output"; +my $db = undef; +my $fasta = undef; +my $chunk_size = 500; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( + "chunk_size=s" => \$chunk_size, + "f|fasta=s" => \$fasta, + "db=s" => \$db, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($db) or ! defined($fasta)){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\nInput fasta file (--fasta) and a database (--db)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ("blat" ); +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/blat2grid.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read protein fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + push(@seqarray,$seq); + + if ($counter == $chunk_size) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk($outfile,@seqarray); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk($outfile,@seqarray); + +# Push all jobs into the command list + +for (my $i=1;$i<=$chunk_counter;$i++) { + my $cmd = "blat -minIdentity=98 -minScore=80 $db $outdir/chunk_$i.fa $outdir/chunk_$i.psl"; + push(@cmds,$cmd); +} + + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.psl>; + +foreach my $file (@files) { + system("cat $file >> $outdir/blat.merged"); +} + +msg("Finished BLAT grid run."); + +# -------------------- + +sub write_chunk { + my $outfile = shift; + my @seqs = @_; + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_blat2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple blat jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_blat2grid.pl -f fasta_file --db db_name + gaas_blat2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the nucleotide/protein fasta file to use as query. + +=item B<--db> + +The name of the database use to blat + +=item B<--chunk_size> + +The number of sequence by job. If not provided, default size +will be 500. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/tools/grid/exonerate2grid.pl b/annotation/tools/grid/exonerate2grid.pl new file mode 100755 index 000000000..f9257f154 --- /dev/null +++ b/annotation/tools/grid/exonerate2grid.pl @@ -0,0 +1,276 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +use Bio::SeqFeature::Generic; +use Bio::Tools::GFF; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $gff_formatter = Bio::Tools::GFF->new(-gff_version => 3); + +my $outdir = "exonerate_output"; +my $genome = undef; +my $proteins = undef; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my @annotations = (); # Stores Rfama annotations as hashes +my $help; + +if ( !GetOptions( + "help|h!" => \$help, + "genome|g|fasta|f=s" => \$genome, + "protein|p=s" => \$proteins, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined( $genome ) ){ + pod2usage( { + -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH +my @tools = ( "exonerate" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/exonerate_search.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for grid\n"); + +my $seq; + +my $seq_counter = 0; + +while( $seq = $inseq->next_seq() ) { + $seq_counter += 1; + my $outfile = $outdir . "/" . $seq->display_id . ".fasta" ; # We could also use the display_id, but this can cause trouble with special characters + my $seq_out = Bio::SeqIO->new(-file => ">$outfile" , -format => 'fasta'); + $seq_out->write_seq($seq); + my $command = "exonerate --showtargetgff --refine region --model protein2genome --percent 60 $proteins $outfile > $outfile.exonerate 2> /dev/null" ; + push(@cmds,$command); +} + +# Submit job chunks to grid + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, merging of output and printing gff + +msg("Merging output and writing GFF file"); + +my @files = <$outdir/*.exonerate>; + +my $outfile = $outdir . "/exonerate_annotations.gff"; +open (my $OUT, '>', $outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; + +foreach my $file (@files) { + + open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; + + while (<$IN>) { + chomp; + my $line = $_; + next if ($line =~ /^#.*/); # Skipping comment lines + + print $OUT $line , "\n"; + + } +} + +close($OUT); + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_exonerate2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple exonerate jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_exonerate2grid.pl -f genome.fasta -o outdir + gaas_exonerate2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--genome>, B<--fasta>, B<-f> or B<-g> + +The name of the genome fasta file to use as target. + +=item B<--protein> + +The name of the protein file to use as query. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/tools/grid/interpro2grid.pl b/annotation/tools/grid/interpro2grid.pl new file mode 100755 index 000000000..024aa34e3 --- /dev/null +++ b/annotation/tools/grid/interpro2grid.pl @@ -0,0 +1,295 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = undef; +my $genome = undef; +my $fasta = undef; +my $chunk_size = 10; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( + "chunk_size=s" => \$chunk_size, + "f|fasta=s" => \$fasta, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if (! defined($fasta)){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\nInput fasta file (--fasta)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my $interproscan = "/sw/bioinfo/interproscan-5.3-46.0/interproscan.sh" ; + +if (-f $interproscan ) { + msg ("Found interproscan at $interproscan"); +} else { + die "Could not find Interproscan at the default location." ; +} + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/generic.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +#### HERE YOU READ YOUR FILE TO BE CHUNKED AND RUN ON GRID! #### +#### Example below: Read a FASTA file, split into smaller sub-files +#### and analyse with e.g. blast or whatever via grid-submission + +# .. Read e.g. protein fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); # Stores entries for a given chunk to be printed later +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + push(@seqarray,$seq); + + if ($counter == $chunk_size) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk($outfile,@seqarray); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk($outfile,@seqarray); + +# Push all jobs into the command list + +for (my $i=1;$i<=$chunk_counter;$i++) { + + my $command = $interproscan . " -i $outdir/chunk_$i.fa -d $outdir" ; + push(@cmds,$command); +} + + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.tsv>; + +foreach my $file (@files) { + system("cat $file >> $outdir/interprosan.merged.tsv"); +} + +msg("Finished with InterProScan"); + + +# -------------------- + +sub write_chunk { + my $outfile = shift; + my @seqs = @_; + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_interpro2grid.pl - + +=head1 DESCRIPTION + +Chunk input data to run multiple interpro jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_interpro2grid.pl -f fasta_file -o outdir + gaas_interpro2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the protein fasta file to use as query. + +=item B<--chunk_size> + +The number of sequence by job. If not provided, default size +will be 500. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Grid/lastz2grid.pl b/annotation/tools/grid/lastz2grid.pl similarity index 57% rename from annotation/Tools/Grid/lastz2grid.pl rename to annotation/tools/grid/lastz2grid.pl index 568154e75..efa23a999 100755 --- a/annotation/Tools/Grid/lastz2grid.pl +++ b/annotation/tools/grid/lastz2grid.pl @@ -2,37 +2,20 @@ use strict; use warnings; +use Pod::Usage; use Getopt::Long; use Scalar::Util qw(openhandle); use Time::Piece; use Time::Seconds; -use FindBin; -use lib ("$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors"); use File::Basename; use Cwd; use Carp; no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--query filename] - The name of the query genome file. - [--target filename] - The name of the target genome file. - - Ouput: - [--outdir name] - The name of the output directory. - -}; - -my $grid_computing_module = "BilsGridRunner"; - +my $header = get_gaas_header(); my $outdir = undef; my $query = undef; my $target = undef; @@ -42,19 +25,46 @@ my @lav_files = (); my $job_limit = 500; # Maximum number of jobs to allow before aborting my $quiet; +my $grid="Slurm"; +my $queue=undef; my $help; -GetOptions( - "help" => \$help, - "query=s" => \$query, - "target=s" => \$target, - "outdir=s" => \$outdir); +if ( ! GetOptions( + "query=s" => \$query, + "target=s" => \$target, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} # Print Help and exit if ($help) { - print $usage; - exit(0); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($query) or ! defined($target)){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\n a query genome file (--query) and a target genome file (--target)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; } +$grid= undef if lc($grid) eq 'none'; # .. Check that all binaries are available in $PATH @@ -65,12 +75,12 @@ my $query_base = basename($query) ; my $target_base = basename($target) ; - + my $query_dir = $outdir . "/" . $query_base ; my $target_dir = $outdir . "/" . $target_base ; -# .. Create output directory +# .. Create output directory if (-d $outdir ) { die "Output directory $outdir exists. Please remove and try again"; @@ -90,18 +100,10 @@ msg("Writing log to: $logfile"); open LOG, '>', $logfile or err("Can't open logfile"); -# .. load grid module (courtesy of Brian Haas) - -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; - msg("Generating size indices for genomes"); my $query_size = $outdir . "/" . basename($query) . ".sizes" ; -my $target_size = $outdir . "/" . basename($target) . ".sizes" ; +my $target_size = $outdir . "/" . basename($target) . ".sizes" ; runcmd("faSize $query -detailed > $query_size"); runcmd("faSize $target -detailed > $target_size"); @@ -153,27 +155,53 @@ foreach my $target_seq(@target_seqs) { my $lav_file = $query_seq . "-" . $target_seq . ".lav" ; - push @lav_files , $lav_file ; + push @lav_files , $lav_file ; push @cmds , "lastz $query_dir/$query_seq $target_dir/$target_seq > $outdir/lav/$lav_file" ; - } + } } - # Submit job chunks to grid - -msg("Sending jobs to LSF grid\n"); - -chomp(@cmds); # Remove empty indices - -&$grid_computing_method(@cmds); +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} msg("### Converting LAV files to PSL ###"); foreach my $lav_file (@lav_files) { my $psl_file = $lav_file ; - $psl_file =~ s/lav$/psl/ ; + $psl_file =~ s/lav$/psl/ ; runcmd("lavToPsl $outdir/lav/$lav_file $outdir/psl/$psl_file"); } @@ -238,3 +266,80 @@ sub err { msg(@_); exit(2); } + +__END__ + +=head1 NAME + +gaas_blat2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple blat jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_blat2grid.pl -f fasta_file --db db_name + gaas_blat2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--query> + +The name of the query genome file. + +=item B<--target> + +The name of the target genome file. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Grid/pfam2grid.pl b/annotation/tools/grid/pfam2grid.pl similarity index 54% rename from annotation/Tools/Grid/pfam2grid.pl rename to annotation/tools/grid/pfam2grid.pl index 1625d372b..9ea040ac6 100755 --- a/annotation/Tools/Grid/pfam2grid.pl +++ b/annotation/tools/grid/pfam2grid.pl @@ -7,8 +7,6 @@ use Scalar::Util qw(openhandle); use Time::Piece; use Time::Seconds; -use FindBin; -use lib ("$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors"); use File::Basename; use Bio::SeqIO; use Cwd; @@ -16,35 +14,32 @@ use Bio::SeqFeature::Generic; use Bio::Tools::GFF; no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS - Sweden # -# # -# Please cite NBIS (www.NBIS.se) when using this tool. # -######################################################## -}; - - -my $grid_computing_module = "BilsGridRunner"; +my $header = get_gaas_header(); my $pfam_hmm_file = "/projects/references/databases/pfam/31.0/Pfam-A.hmm"; -my $outdir = undef; +my $outdir = "pfam_output"; my $fasta = undef; my @cmds = (); # Stores the commands to send to farm my $quiet; my $help; -my $nogrid=undef; my $chunk_size = 500; +my $grid="Slurm"; +my $queue=undef; my @chunks = (); # Holds chunks, partitioning the fasta input (so we # don't send 50.000 jobs to the farm... if ( !GetOptions( - "help" => \$help, + "help|h!" => \$help, "fasta|f=s" => \$fasta, - "hmm=s" => \$pfam_hmm_file, + "hmm=s" => \$pfam_hmm_file, "chunk_size=i" => \$chunk_size, - "nogrid!" => \$nogrid, + "grid=s" => \$grid, + "quiet|q" => \$quiet, + "queue=s" => \$queue, "outdir|o=s" => \$outdir)) { @@ -55,18 +50,26 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 1, - -exitval => 0, - -message => "$header \n" } ); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); } -if ( ! (defined($fasta) and defined($outdir) ) ){ +if ( ! defined($fasta) ){ pod2usage( { - -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -message => "$header\nAt least 1 parameter is mandatory:\nInput fasta file\n\n", -verbose => 0, -exitval => 2 } ); } +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + if (! -e $pfam_hmm_file){ print "The cm file ".$pfam_hmm_file." does not exist. Please define it using the cm option.\n";exit; } @@ -78,7 +81,7 @@ my @tools = ("hmmscan" ); # List of tools to check for! foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } -# .. Create output directory +# .. Create output directory if (-d $outdir ) { die "Output directory $outdir exists. Please remove and try again"; @@ -93,17 +96,6 @@ msg("Writing log to: $logfile"); open LOG, '>', $logfile or err("Can't open logfile"); -# .. load grid module (courtesy of Brian Haas) -my $grid_computing_method; -if(! $nogrid){ - - my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; - msg("-importing module: $grid_computing_module\n"); - require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - - $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; -} - # .. Read genome fasta file. my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); @@ -114,13 +106,13 @@ my $counter = 0; my $chunk_counter = 1; my $seq; - +my $outfile; while( $seq = $inseq->next_seq() ) { $counter += 1; push( @seqarray, $seq ); if ( $counter == $chunk_size ) { - my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; write_chunk( $outfile, @seqarray ); @seqarray = (); $chunk_counter += 1; @@ -129,52 +121,61 @@ } -my $outfile = +$outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... write_chunk( $outfile, @seqarray ); # Push all jobs into the command list for ( my $i = 1; $i <= $chunk_counter; $i++ ) { - my $infile = $outdir . "/chunk_" . $i . ".fa"; + my $infile = $outdir . "/chunk_" . $i . ".fa"; my $outfile = $outdir . "/chunk_" . $i . ".pfam"; my $cmd = "hmmscan --cpu 1 --domtblout " . $outfile . " " . $pfam_hmm_file . " " . $infile . " > /dev/null" ; push( @cmds, $cmd ); } -msg("submitting chunks\n"); + # Submit job chunks to grid + msg("submitting chunks\n"); -if( ! $nogrid){ - # Submit job chunks to grid - msg("Sending $chunk_counter jobs to LSF grid\n"); - chomp(@cmds); # Remove empty indices - &$grid_computing_method(@cmds); +if( $grid ){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); } else{ - foreach my $command (@cmds){ + foreach my $command (@cmds){ - system($command); + system($command); - if ($? == -1) { - print "failed to execute: $!\n"; + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; } - elsif ($? & 127) { - printf "child died with signal %d, %s coredump\n", - ($? & 127), ($? & 128) ? 'with' : 'without'; - } - else { - printf "child exited with value %d\n", $? >> 8; - } - } + else { + printf "child exited with value %d\n", $? >> 8; + } + } } # ..Postprocessing here, merging of output msg("Merging output and writing GFF file"); -my @files = <$outdir/*.pfam>; +my @files = <$outdir/*.pfam>; -my $outfile = "pfam.merged"; +$outfile = "pfam.merged"; open (my $OUT, '>', $outdir."/".$outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; foreach my $file (@files) { @@ -182,12 +183,12 @@ open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; while (<$IN>) { - chomp; - my $line = $_; + chomp; + my $line = $_; next if ($line =~ /^#.*$/); # Skipping comment lines - - print $OUT $line; - } + + print $OUT $line; + } } close ($OUT); @@ -197,7 +198,7 @@ # -------------------- sub write_chunk -{ +{ my $outfile = shift; my @seqs = @_; my $seq_out = @@ -239,13 +240,17 @@ sub err { =head1 NAME -pfam2grid.pl - -We run hmmscan searches against a pfam.hmm +gaas_pfam2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple hmmscan searches in parallel +We run hmmscan searches against a pfam.hmm =head1 SYNOPSIS - ./pfam2grid.pl -f genome.fasta -o outdir - ./pfam2grid.pl --help + gaas_pfam2grid.pl -f genome.fasta -o outdir + gaas_pfam2grid.pl --help =head1 OPTIONS @@ -253,19 +258,31 @@ =head1 OPTIONS =item B<--fasta> or B<-f> -The name of the fasta file to read. +The name of the fasta file to read. + +=item B<--chunk_size> + +We create chunks with a maximum of $chunk_size sequences. By default 500. + +=item B<--hmm> -=item B<--hmm> +File containing the pfam hmm models -File containing the pfam hmm models +=item B<--queue> -=item B<--nogrid> +If you want to define a particular queue to run the jobs -Do not use the script in grid version. +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode =item B<--outdir> or B<-o> -The name of the output directory. +The name of the output directory. =item B<-h> or B<--help> @@ -273,4 +290,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Grid/rfam2grid.pl b/annotation/tools/grid/rfam2grid.pl similarity index 82% rename from annotation/Tools/Grid/rfam2grid.pl rename to annotation/tools/grid/rfam2grid.pl index 121cff40b..075031dfe 100755 --- a/annotation/Tools/Grid/rfam2grid.pl +++ b/annotation/tools/grid/rfam2grid.pl @@ -2,34 +2,27 @@ use strict; use warnings; -use Data::Dumper; use Carp; use Getopt::Long; use Pod::Usage; use Scalar::Util qw(openhandle); use Time::Piece; use Time::Seconds; -use NBIS::Grid::Bsub; -use NBIS::Grid::Sbatch; use File::Basename; use Bio::SeqIO; use Cwd; use Bio::SeqFeature::Generic; use Bio::Tools::GFF; no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS - Sweden # -# # -# Please cite NBIS (www.NBIS.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $rfam_cm_file = "/projects/references/databases/rfam/14.1/Rfam.cm"; #cm models to be annotated by tRNAscan my $gff_formatter = Bio::Tools::GFF->new(-gff_version => 3); - -my $outdir = undef; +my $queue=undef; +my $outdir = "rfam_output"; my $fasta = undef; my @cmds = (); # Stores the commands to send to farm my $quiet; @@ -38,10 +31,12 @@ my $grid="Slurm"; if ( !GetOptions( - "help" => \$help, + "h|help!" => \$help, "fasta|f=s" => \$fasta, "cm=s" => \$rfam_cm_file, "grid=s" => \$grid, + "queue=s" => \$queue, + "quiet|q!" => \$quiet, "outdir|o=s" => \$outdir)) { @@ -52,12 +47,12 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 1, + pod2usage( { -verbose => 99, -exitval => 0, -message => "$header \n" } ); } -if ( ! (defined($fasta) and defined($outdir) ) ){ +if ( ! defined( $fasta ) ){ pod2usage( { -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", -verbose => 0, @@ -105,7 +100,6 @@ msg("Creating chunks\n"); my $seq; - my $seq_counter = 0; while( $seq = $inseq->next_seq() ) { @@ -121,7 +115,7 @@ msg("submitting chunks\n"); if( $grid){ - msg("Sending $seq_counter jobs to the grid\n"); + msg("Sending $#cmds jobs to the grid\n"); chomp(@cmds); # Remove empty indices # Submit job chunks to grid my $grid_runner; @@ -131,6 +125,7 @@ elsif( $grid eq 'slurm'){ $grid_runner = Sbatch->new( cmds_list => \@cmds); } + if($queue){$grid_runner->queue($queue)} $grid_runner->run(); } else{ @@ -253,14 +248,18 @@ sub err { =head1 NAME -rfam2grid.pl - +gaas_rfam2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple rfam jobs in parallel We currently run infernal (cmsearch) searches directly on the contigs – rather than using the Rfam pipeline with it’s two-step search approach (blast to limit candidates, infernal to refine and verify). Infernal ("INFERence of RNA ALignment") is for searching DNA sequence databases for RNA structure and sequence similarities. It is an implementation of a special case of profile stochastic context-free grammars called covariance models (CMs). A CM is like a sequence profile, but it scores a combination of sequence consensus and RNA secondary structure consensus, so in many cases, it is more capable of identifying RNA homologs that conserve their secondary structure more than their primary sequence. =head1 SYNOPSIS - ./rfam2grid.pl -f genome.fasta -o outdir - ./rfam2grid.pl --help + gaas_rfam2grid.pl -f genome.fasta -o outdir + gaas_rfam2grid.pl --help =head1 OPTIONS @@ -278,6 +277,14 @@ =head1 OPTIONS Define which grid to use, Slurm, Lsf or None. Default = Slurm. +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--quiet> or B<-q> + +Quiet mode + =item B<--outdir> or B<-o> The name of the output directory. @@ -288,4 +295,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/tools/grid/scipio2grid.pl b/annotation/tools/grid/scipio2grid.pl new file mode 100755 index 000000000..d3d8c426a --- /dev/null +++ b/annotation/tools/grid/scipio2grid.pl @@ -0,0 +1,283 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $scipio_outfile = "scipio.merged.gff"; +my $outdir = undef; +my $genome = undef; +my $protein = undef; +my $queue=undef; +my $chunk_size = 10; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $help; +my $grid="Slurm"; + +if ( !GetOptions( + "h|help" => \$help, + "fasta|f=s" => \$genome, + "grid=s" => \$grid, + "chunk=i" => \$chunk_size, + "protein|p=s" => \$protein, + "queue=s" => \$queue, + "quiet|q!" => \$quiet, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($genome) ){ + pod2usage( { + -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ( "scipio.pl" , "blat" ); +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/scipio2grid.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read protein fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + push(@seqarray,$seq); + + if ($counter == $chunk_size) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk($outfile,@seqarray); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk($outfile,@seqarray); + +# Push all jobs into the command list +for (my $i=1;$i<=$chunk_counter;$i++) { + my $scipio_cmd = "scipio.pl $outdir/chunk_$i.fa $protein > $outdir/chunk_$i.scipio"; + push(@cmds,$scipio_cmd); +} + +# SUBMISSION +msg("submitting chunks\n"); +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.scipio>; + +foreach my $file (@files) { + system("cat $file >> $outdir/scipio.merged"); +} + +system("yaml2gff.1.4.pl $outdir/scipio.merged > $scipio_outfile 2> /dev/null"); + +msg("Finished scipio grid run."); + +# -------------------- + +sub write_chunk { + my $outfile = shift; + my @seqs = @_; + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(0); +} + +__END__ + +=head1 NAME + +gaas_scipiogrid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple scipio jobs in parallel using Grid + +=head1 SYNOPSIS + + gaas_scipiogrid.pl --genome genome.fasta --protein protein.fasta -o outdir + gaas_scipiogrid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--genome> or B<-g> + +The name of the genome file to read. + +=item B<--protein> or B<-p> + +The name of the protein file to read. + +=item B<--chunk> + +By default 10. We slice the fasta input file in many chunk to distribute more efficiently small tasks to each cpu. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Grid/tblastn2grid.pl b/annotation/tools/grid/tblastn2grid.pl similarity index 68% rename from annotation/Tools/Grid/tblastn2grid.pl rename to annotation/tools/grid/tblastn2grid.pl index e78d88876..0d54d143f 100755 --- a/annotation/Tools/Grid/tblastn2grid.pl +++ b/annotation/tools/grid/tblastn2grid.pl @@ -7,14 +7,16 @@ use Scalar::Util qw(openhandle); use Time::Piece; use Time::Seconds; -use FindBin; -use lib ( "$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors" ); use File::Basename; use Bio::SeqIO; use Cwd; use Carp; no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; +my $header = get_gaas_header(); my $outdir = undef; my $db = undef; my $fasta = undef; @@ -25,6 +27,8 @@ # don't send 50.000 jobs to the farm... my @cmds = (); # Stores the commands to send to farm my $quiet; +my $queue=undef; +my $grid="Slurm"; my $help; if ( !GetOptions( "help" => \$help, @@ -33,6 +37,9 @@ "chunk_size=i" => \$chunk_size, "nb_seq=i" => \$nb_seq, "eval" => \$eval, + "quiet!" => \$quiet, + "grid=s" => \$grid, + "queue=s" => \$queue, "outdir=s" => \$outdir, "h|help!" => \$help ) ) { @@ -63,6 +70,14 @@ -exitval => 2 } ); } +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + # .. Check that all binaries are available in $PATH my @tools = ("blastp"); @@ -87,18 +102,6 @@ msg("Writing log to: $logfile"); open LOG, '>', $logfile or err ("Can't open logfile"); -# .. load grid module (courtesy of Brian Haas) - -my $grid_computing_module = "BilsGridRunner"; -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or - die -"Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or - die "Failed to initialize GRID module\n"; - # .. Read protein fasta file. my $inseq = Bio::SeqIO->new( -file => "<$fasta", -format => 'fasta' ); @@ -148,9 +151,41 @@ } } + # Submit job chunks to grid -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} # Merging the outputs msg("Merging outputs from chunks"); @@ -212,12 +247,16 @@ sub err =head1 NAME -The script allows to generate a tblastn using Grid +gaas_tblastn2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple tblastn jobs in parallel using Grid =head1 SYNOPSIS - tblastn2grid.pl --chunck 100 --db genome.fa --eval 1e-6 --outdir blastouput --fasta db.fasta - tblastn2grid.pl --help + gaas_tblastn2grid.pl --chunck 100 --db genome.fa --eval 1e-6 --outdir blastouput --fasta db.fasta + gaas_tblastn2grid.pl --help =head1 OPTIONS @@ -245,6 +284,18 @@ =head1 OPTIONS The evalue of the sequences kept in the result +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + =item B<--outdir> The name of the output directory. @@ -255,5 +306,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Grid/transposonPSI2grid.pl b/annotation/tools/grid/transposonPSI2grid.pl similarity index 78% rename from annotation/Tools/Grid/transposonPSI2grid.pl rename to annotation/tools/grid/transposonPSI2grid.pl index 9c5a15af0..c8a995859 100755 --- a/annotation/Tools/Grid/transposonPSI2grid.pl +++ b/annotation/tools/grid/transposonPSI2grid.pl @@ -8,24 +8,18 @@ use Time::Piece; use POSIX; use Time::Seconds; -use NBIS::Grid::Bsub; -use NBIS::Grid::Sbatch; use File::Basename; use Bio::SeqIO; use Cwd; use Carp; use Bio::SeqFeature::Generic; no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS - Sweden # -# # -# Please cite NBIS (www.NBIS.se) when using this tool. # -######################################################## -}; - -my $outdir = transposonPSI_output; +my $header = get_gaas_header(); +my $outdir = "transposonPSI_output"; my $fastaFile; my @cmds = (); # Stores the commands to send to farm my $quiet; @@ -35,12 +29,13 @@ my $chunk=10; if ( !GetOptions( - "help" => \$help, + "h|help!" => \$help, "fasta|f=s" => \$fastaFile, "grid=s" => \$grid, + "quiet|q!" => \$quiet, "chunk=i" => \$chunk, "queue=s" => \$queue, - "outdir|o=s" => \$outdir)) + "outdir|o=s" => \$outdir ) ) { pod2usage( { -message => "Failed to parse command line", @@ -50,14 +45,14 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 1, + pod2usage( { -verbose => 99, -exitval => 0, - -message => "$header \n" } ); + -message => "$header\n" } ); } if ( ! defined($fastaFile) ){ pod2usage( { - -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -message => "$header\nAt least 1 parameter is mandatory:\nInput fasta file\n\n", -verbose => 0, -exitval => 2 } ); } @@ -128,7 +123,7 @@ msg("submitting chunks\n"); if( $grid){ - msg("Sending $seq_counter jobs to the grid\n"); + msg("Sending $#cmds jobs to the grid\n"); chomp(@cmds); # Remove empty indices # Submit job chunks to grid my $grid_runner; @@ -237,14 +232,16 @@ sub err { =head1 NAME -transposonPSI2grid.pl - +gaas_transposonPSI2grid.pl - -=head1 SYNOPSIS +=head1 DESCRIPTION -We transposonPSI against protein fasta sequences +Chunk input data to run multiple transposonPSI jobs in parallel + +=head1 SYNOPSIS - ./transposonPSI2grid.pl -f fasta_file -o outdir - ./transposonPSI2grid.pl --help + gaas_transposonPSI2grid.pl -f fasta_file -o outdir + gaas_transposonPSI2grid.pl --help =head1 OPTIONS @@ -266,6 +263,10 @@ =head1 OPTIONS Define which grid to use, Slurm, Lsf or None. Default = Slurm. +=item B<--quiet> or B<-q> + +Quiet mode + =item B<--outdir> or B<-o> The name of the output directory. @@ -276,4 +277,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Grid/trnascan2grid.pl b/annotation/tools/grid/trnascan2grid.pl similarity index 51% rename from annotation/Tools/Grid/trnascan2grid.pl rename to annotation/tools/grid/trnascan2grid.pl index 348b21b4c..8a69e5682 100755 --- a/annotation/Tools/Grid/trnascan2grid.pl +++ b/annotation/tools/grid/trnascan2grid.pl @@ -3,11 +3,11 @@ use strict; use warnings; use Getopt::Long; +use Pod::Usage; use Scalar::Util qw(openhandle); use Time::Piece; use Time::Seconds; use FindBin; -use lib ("$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors"); use File::Basename; use Bio::SeqIO; use Cwd; @@ -15,51 +15,63 @@ use Bio::SeqFeature::Generic; use Bio::Tools::GFF; no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; - -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--fasta filename] - The name of the genome file to read. - - Ouput: - [--outdir name] - The name of the output directory. - -}; - -my $grid_computing_module = "BilsGridRunner"; -my $rfam_cm_file = "/references/databases/rfam/11.0/Rfam.cm.1_1"; +my $header = get_gaas_header(); my $gff_formatter = Bio::Tools::GFF->new(-gff_version => 3); - my $outdir = undef; my $fasta = undef; my @cmds = (); # Stores the commands to send to farm my $quiet; +my $grid="Slurm"; my @annotations = (); # Stores Rfama annotations as hashes my $help; - -GetOptions( - "help" => \$help, - "fasta=s" => \$fasta, - "outdir=s" => \$outdir); +my $queue=undef; + +if ( !GetOptions( + "h|help!" => \$help, + "fasta|f=s" => \$fasta, + "grid=s" => \$grid, + "queue=s" => \$queue, + "quiet|q!" => \$quiet, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} # Print Help and exit if ($help) { - print $usage; - exit(0); + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header \n" } ); } +if ( ! (defined($fasta) and defined($outdir) ) ){ + pod2usage( { + -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + # .. Check that all binaries are available in $PATH my @tools = ( "tRNAscan-SE" ); # List of tools to check for! foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } -# .. Create output directory +# .. Create output directory if (-d $outdir ) { die "Output directory $outdir exists. Please remove and try again"; @@ -74,14 +86,6 @@ msg("Writing log to: $logfile"); open LOG, '>', $logfile or err("Can't open logfile"); -# .. load grid module (courtesy of Brian Haas) - -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or die "Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or die "Failed to initialize GRID module\n"; - # .. Read genome fasta file. my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); @@ -93,7 +97,7 @@ my $seq_counter = 0; while( $seq = $inseq->next_seq() ) { - $seq_counter += 1; + $seq_counter += 1; my $outfile = $outdir . "/seq_" . $seq_counter . ".fasta" ; # We could also use the display_id, but this can cause trouble with special characters my $seq_out = Bio::SeqIO->new(-file => ">$outfile" , -format => 'fasta'); $seq_out->write_seq($seq); @@ -102,17 +106,45 @@ } # Submit job chunks to grid - -msg("Sending $seq_counter jobs to LSF grid\n"); - -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} # ..Postprocessing here, merging of output and printing gff msg("Merging output and writing GFF file"); -my @files = <$outdir/*.trna>; +my @files = <$outdir/*.trna>; foreach my $file (@files) { @@ -120,14 +152,14 @@ open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; while (<$IN>) { - chomp; - my $line = $_; + chomp; + my $line = $_; next if ($line =~ /^Sequence.*/ or $line =~ /^Name.*/ or $line =~ /^---.*/); # Skipping comment lines - + my $annotation = parse_line($line); push(@annotations,$annotation); - - } + + } } my $outfile = $outdir . "/trnascan_annotations.gff"; @@ -146,17 +178,17 @@ sub parse_line { # chomp; my $line = shift ; my ($seq,$trna_num,$tstart,$tend,$ttype,$tanti,$istart,$iend,$score) = split(/\s+/,$line); - + my %tags = ( 'tRNA-type' => $ttype, 'anti-codon' => ($tanti || 'unknown'), 'ID' => 'tRNA_' . $ttype . "_" . $seq . "_" . $tstart, 'Name' => 'tRNA_' . $ttype . "_" . $seq . "_" . $tstart, ); - + my($from,$to) = sort($tstart,$tend); - + my $strand = ( $tstart < $tend ? '1' : '-1' ); - + my $f = Bio::SeqFeature::Generic->new( -seq_id => $seq, -start => $from, -end => $to, @@ -167,7 +199,7 @@ sub parse_line { -score => $score, -tag => \%tags, ); - + return $f; } @@ -200,3 +232,80 @@ sub err { msg(@_); exit(2); } + +__END__ + +=head1 NAME + +gaas_trnascan2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple trnascan jobs in parallel using Grid + +=head1 SYNOPSIS + + gaas_trnascan2grid.pl -f fasta_file -o outdir + gaas_trnascan2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the protein fasta file to read. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Maker/maker_AEDmeanInGffFile.sh b/annotation/tools/maker/maker_AEDmeanInGffFile.sh similarity index 100% rename from annotation/Tools/Maker/maker_AEDmeanInGffFile.sh rename to annotation/tools/maker/maker_AEDmeanInGffFile.sh diff --git a/annotation/Tools/Maker/maker_AEDplot.pl b/annotation/tools/maker/maker_AEDplot.pl similarity index 87% rename from annotation/Tools/Maker/maker_AEDplot.pl rename to annotation/tools/maker/maker_AEDplot.pl index 27a23aeb9..8a047e724 100755 --- a/annotation/Tools/Maker/maker_AEDplot.pl +++ b/annotation/tools/maker/maker_AEDplot.pl @@ -1,23 +1,19 @@ #!/usr/bin/env perl -############################################# -# Jacques Dainat 11/2014 # This version use the GFF.pm from Andreas code -############################################# - -#libraries +use strict; +use warnings; use Statistics::R; use File::Basename; use strict; -use warnings; -use Data::Dumper; use Carp; use Getopt::Long; use IO::File; use Pod::Usage; use List::MoreUtils qw(uniq); use Bio::Tools::GFF; -use NBIS::CheckModule qw(:Ok); -# END libraries +use GAAS::GAAS; + +my $header = get_gaas_header(); # PARAMETERS - OPTION my @opt_files; @@ -52,6 +48,16 @@ -exitval => 2 } ); } +# Check R is available. If not we try to load it through Module software + +if ( system("R --version 1>/dev/null 2>/dev/null") == 0 ) { + print "R is available. We can continue\n"; +} +else { + die "R no available. We cannot perform any plot\n"; +} + + ############################# ####### Manage options ####### ############################# @@ -69,27 +75,12 @@ if (-f $opt_output){ print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); } - $outputPDF=$opt_output."pdf"; + $outputPDF=$opt_output."pdf"; } else{ $outputPDF="outputPlot.pdf"; } -# Check R is available. If not we try to load it through Module software -if ( system("R --version 1>/dev/null 2>/dev/null") == 0 ) { - print "R is available. We can continue\n"; -} -else { - print "R is not loaded. We try to load it.\n"; - if(module_software_installed){ - module_load("R"); - } - else{ - print "Module tool doesn't exists. We cannot load R through it."; - } -} - - ####################### # MAIN # ####################### @@ -108,10 +99,10 @@ foreach my $file (@opt_files){ my $gffio = Bio::Tools::GFF->new(-file => $file, -gff_version => 3); - + # parse file name te remove extension my ($file1,$dir1,$ext1) = fileparse($file, qr/\.[^.]*/); - + #Parse GFF to get AED information for each mRNA my $listRef=parseGFF($gffio); @@ -136,7 +127,7 @@ $ostreamAED->open( $pathAED, 'w' ) or croak( sprintf( "Can not open '%s' for writing %s", $pathAED, $! ) - ); + ); foreach my $AEDvalue (@{$hashOfList{$fileName}}){ print $ostreamAED "$AEDvalue\n"; } @@ -147,15 +138,15 @@ #R command $R->run(qq` - + listValues1=as.matrix(read.table("$pathAED", sep="\t", he=F)) - + # create a break point list formated correctly in purpose a<-seq(0,0.9999,$opt_breaks) a[length(a)+1]<-0.99999 a[length(a)+1]<-1 breakingPointList<-c(0,a) - + hist1<-hist(listValues1, breaks=breakingPointList, plot=F) plot(hist1\$mids,hist1\$counts) #par(new=TRUE) @@ -195,12 +186,12 @@ $R_command.=write_first_R_command($pathAED, $fileName, $highestYaxis, $nbFile, $outputPDF); } } - + #################################### # Surround main part of R command # #################################### - + #add header my $final_R_command='#create output pdf("'.$outputPDF.'") @@ -223,7 +214,7 @@ # remove temporary files unlink @listTmpFile; - ######################### + ######################### ######### END ########### ######################### ####################################################################################################################### @@ -236,23 +227,23 @@ ######## ###### #### - ## + ## sub write_first_R_command{ my ($pathIn1,$name1,$yAxisValue,$colorNb,$outputPDF)=@_; my $command=' - + listValues1=as.matrix(read.table("'.$pathIn1.'", sep="\t", he=F)) legendInfo=paste("'.$name1.'","(",length(listValues1),"mRNAs )") listlegend<-c(listlegend,legendInfo) - + # create a break point list formated correctly in purpose a<-seq(0,0.9999,'.$opt_breaks.') a[length(a)+1]<-0.99999 a[length(a)+1]<-1 breakingPointList<-c(0,a) - + hist1<-hist(listValues1, breaks=breakingPointList, plot=F) plot(hist1$mids,hist1$counts, type="l", ylim=c(0,'.$yAxisValue.'), col='.$colorNb.', main="", xlab="AED score", ylab="Number of mRNA") '; @@ -265,17 +256,17 @@ sub write_R_command{ my ($pathIn1,$name1,$yAxisValue,$colorNb,$outputPDF)=@_; my $command=' - + listValues1=as.matrix(read.table("'.$pathIn1.'", sep="\t", he=F)) legendInfo=paste("'.$name1.'","(",length(listValues1),"mRNAs )") listlegend<-c(listlegend,legendInfo) - + # create a break point list formated correctly in purpose a<-seq(0,0.9999,'.$opt_breaks.') a[length(a)+1]<-0.99999 a[length(a)+1]<-1 breakingPointList<-c(0,a) - + hist1<-hist(listValues1, breaks=breakingPointList, plot=F) plot(hist1$mids,hist1$counts, type="l", ylim=c(0,'.$yAxisValue.'), col='.$colorNb.', main="", yaxt="n", xaxt="n", xlab="", ylab="") '; @@ -314,9 +305,9 @@ sub parseGFF { print( "Reading features from $file_in...\n"); # read file and decompose it while (my $feature = $file_in->next_feature() ) { - + my $type = $feature->primary_tag(); - + if (lc($type) eq 'mrna'){ if(! $feature->has_tag('_AED')){ @@ -335,12 +326,15 @@ sub parseGFF { =head1 NAME -AEDplot.pl - +gaas_maker_AEDplot.pl + +=head1 DESCRIPTION + The script take one or several gff file(s) as input from Maker and create a Plot of their AED score (Attributes used: "_AED"). - =head1 SYNOPSIS - ./maker_AEDplot.pl -f infile1.gff[ --output outfile ] - ./maker_AEDplot.pl --help + gaas_maker_AEDplot.pl -f infile1.gff[ --output outfile ] + gaas_maker_AEDplot.pl --help =head1 OPTIONS @@ -363,4 +357,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Maker/maker_check_progress.sh b/annotation/tools/maker/maker_check_progress.sh similarity index 100% rename from annotation/Tools/Maker/maker_check_progress.sh rename to annotation/tools/maker/maker_check_progress.sh diff --git a/annotation/Tools/Maker/maker_check_progress_deeply.sh b/annotation/tools/maker/maker_check_progress_deeply.sh similarity index 100% rename from annotation/Tools/Maker/maker_check_progress_deeply.sh rename to annotation/tools/maker/maker_check_progress_deeply.sh diff --git a/annotation/Tools/Maker/maker_getRidOfContig_v2.pl b/annotation/tools/maker/maker_get_rid_of_contig.pl similarity index 66% rename from annotation/Tools/Maker/maker_getRidOfContig_v2.pl rename to annotation/tools/maker/maker_get_rid_of_contig.pl index b2a7dd93f..ba9a46d13 100755 --- a/annotation/Tools/Maker/maker_getRidOfContig_v2.pl +++ b/annotation/tools/maker/maker_get_rid_of_contig.pl @@ -3,12 +3,12 @@ #LIBRARIES use strict; use warnings; -use Data::Dumper; -use YAML::XS 'LoadFile'; use Getopt::Long; use Pod::Usage; use IO::File; +use GAAS::GAAS; +my $header = get_gaas_header(); #PARAMETERS my $input_listcontigWrong; my $input_datastorelog; @@ -17,16 +17,14 @@ my %contigs; my %datastores; - #get arg and PARAMETERS { my ($input_listcontigWrong, $input_datastorelog, $output_file, $delete_contig); # Define script options GetOptions( - 'help|h' => sub { pod2usage( -verbose => 2 ), -exitval => 0 }, - 'man' => sub { pod2usage( -verbose => 2 )}, - 'output-name|o=s' => \$output_file, + 'help|h' => sub { pod2usage( -verbose => 99 , -exitval => 0, -message => "$header\n" )}, + 'output|o=s' => \$output_file, 'contig-name|f=s' => \$input_listcontigWrong, 'datastore-name|d=s' => \$input_datastorelog, 'delete-contig|c=s' => \$delete_contig, @@ -105,51 +103,81 @@ sub get_list { =head1 NAME +gaas_maker_get_rid_of_contig.pl + +=head1 DESCRIPTION + Get rid of contigs not processed properly by maker in the log file and in the output folders of maker. Create a new log file that will need to be renamed as genome_master_datastore_index.log to replace the old one. Then maker can be rerun with this new log file. -=head1 AUTHOR - -Lucile SOLER NBIS 18/07/2016 - =head1 SYNOPSIS Get rid of contigs not processed properly by maker in the log file and in the output folders of maker. Create a new log file that will need to be renamed as genome_master_datastore_index.log to replace the old one. Then maker can be rerun with this new log file. -getRidOfContig.pl --help - -getRidOfContig.pl --contig-name|-f +gaas_maker_get_rid_of_contig.pl --help -getRidOfContig.pl --output-name|-o +gaas_maker_get_rid_of_contig.pl --contig-name|-f -getRidOfContig.pl --datastore-name|-d +gaas_maker_get_rid_of_contig.pl --output-name|-o -getRidOfContig.pl --delete-contig|-c log/all +gaas_maker_get_rid_of_contig.pl --datastore-name|-d - log option will only delete contigs in the log file - all option will delete contigs in the log file and contigs' folders +gaas_maker_get_rid_of_contig.pl --delete-contig|-c log/all =head1 OPTIONS -=over +=over 8 -=item B<--help> +=item B<--datastore-name> or B<-d> + +Input datastore log file + +=item B<--contig-name> or B<-f> + +Input file containing the list of wrong contig + +=item B<--delete-contig> or B<-c> + + option will only delete contigs in the log file + option will delete contigs in the log file and contigs' folders -Display a brief usage message. +=item B<--output> or B<-o> -=item B<--man> +File output name + +=item B<--help> Display the manual page. =back -=head1 DESCRIPTION +=head1 FEEDBACK -Get rid of contigs not processed properly by maker in the log file and in the output folders of maker. -Create a new log file that will need to be renamed as genome_master_datastore_index.log to replace the old one. -Then maker can be rerun with this new log file. +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md =cut + +AUTHOR - Lucile Soler / Jacques Dainat diff --git a/annotation/Tools/Maker/maker_get_rm_genome.pl b/annotation/tools/maker/maker_get_rm_genome.pl similarity index 71% rename from annotation/Tools/Maker/maker_get_rm_genome.pl rename to annotation/tools/maker/maker_get_rm_genome.pl index 3d0fef7de..9a2144fcf 100755 --- a/annotation/Tools/Maker/maker_get_rm_genome.pl +++ b/annotation/tools/maker/maker_get_rm_genome.pl @@ -1,6 +1,7 @@ #!/usr/bin/env perl use strict; +use warnings; use Getopt::Long; use Pod::Usage; use Scalar::Util qw(openhandle); @@ -8,15 +9,9 @@ use Time::Seconds; use File::Basename; use Cwd; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $dir = getcwd; my $outfile = "genome.rm.fa"; my $out_fh = undef; @@ -27,28 +22,25 @@ # OPTION MANAGMENT if ( !GetOptions( "help|h" => \$opt_help, - "i=s" => \$in, - "outfile|o=s" => \$outfile) ) + "i=s" => \$in, + "outfile|o=s" => \$outfile) ) { pod2usage( { -message => 'Failed to parse command line', -verbose => 1, -exitval => 1 } ); } if ($opt_help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } - ####################### ### MANAGE OPTIONS #### ####################### - # MANAGE IN my @inDir; -my $dir = getcwd; if(! $in){ # Find the datastore index @@ -71,23 +63,23 @@ else{ push(@inDir, $in); } -} +} # MANAGE OUT if (-f $outfile) { die "The outfile $outfile already exists, exiting\n"; -} +} else{ open($out_fh, '>', "$outfile") or die "Could not open file 'outfile' $!"; } # MESSAGES -my $nbDir=$#inDir+1; -if ($nbDir == 0){die "There seems to be no maker output directory here, exiting...\n";} +my $nbDir=$#inDir+1; +if ($nbDir == 0){die "There seems to be no maker output directory here, exiting...\n";} print "We found $nbDir maker output directorie(s):\n"; foreach my $makerDir (@inDir){ print "\t+$makerDir\n"; -} +} if ($nbDir > 1 ){print "Results will be merged together !\n";} ##################### @@ -109,7 +101,7 @@ } else { die "Could not find datastore index ($datastore), exiting...\n"; } - + collect_recursive($datastore); #Close file_handler opened @@ -119,29 +111,29 @@ sub collect_recursive { my ($full_path) = @_; - + my ($name,$path,$suffix) = fileparse($full_path,qr/\.[^.]*/); if( ! -d $full_path ){ - + ################### # deal with fasta # if($name eq "query.masked" and $suffix eq ".fasta"){ - + #print open(my $fh, '<:encoding(UTF-8)', $full_path) or die "Could not open file '$full_path' $!"; while (<$fh>) { print $out_fh $_; } close $fh; - } + } return; } opendir my $dh, $full_path or die; while (my $sub = readdir $dh) { next if $sub eq '.' or $sub eq '..'; - + collect_recursive("$full_path/$sub"); } close $dh; @@ -149,47 +141,20 @@ sub collect_recursive { } - - __END__ - -# Streaming the file, line by line -while (<$IN>) { - chomp; # Trims the line, removes the line breaks - my $line = $_; # store the line in a variable - next unless ($line =~ /^.*FINISHED$/) ; # We only want finished contig annotations... - - my ($contig,$location,$status) = split("\t",$line); - - # If the contig includes a dot character, the output file will include a percent character... - if ($contig =~ /^.*\..*$/) { - $contig =~ s/\./\%2E/g ; - } - - my $void = $location . "theVoid." . $contig ; - - my $repeat_contig = $maker_dir . "/" . $void . "/" . "query.masked.fasta" ; - - if (-f $repeat_contig ) { - system("cat $repeat_contig >> $outfile"); - } - -} -# We should close the file to make sure that the transaction finishes cleanly. -close ($IN); - - =head1 NAME -maker_get_rm_genome.pl +gaas_maker_get_rm_genome.pl + +=head1 DESCRIPTION Must be executed in the folder from which Maker was run and will find the maker output on its own and create a concatenated masked assembly. =head1 SYNOPSIS - ./maker_get_rm_genome.pl + gaas_maker_get_rm_genome.pl -i maker_output_folder [-o GenomeMasked.fa] =head1 OPTIONS @@ -197,7 +162,7 @@ =head1 OPTIONS =item B<-i> -The path to the input directory. If none given, we assume that the script is launched where Maker was run. So, in that case the script will look for the folder +The path to the input directory. If none given, we assume that the script is launched where Maker was run. So, in that case the script will look for the folder *.maker.output. =item B<--outfile>, B<-o> @@ -210,4 +175,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Maker/maker_merge_outputs_from_datastore.pl b/annotation/tools/maker/maker_merge_outputs_from_datastore.pl similarity index 86% rename from annotation/Tools/Maker/maker_merge_outputs_from_datastore.pl rename to annotation/tools/maker/maker_merge_outputs_from_datastore.pl index 4629cf642..8753769bf 100755 --- a/annotation/Tools/Maker/maker_merge_outputs_from_datastore.pl +++ b/annotation/tools/maker/maker_merge_outputs_from_datastore.pl @@ -11,20 +11,13 @@ use Pod::Usage; use URI::Escape; use Getopt::Long qw(:config no_ignore_case bundling); -use NBIS::GFF3::Omniscient; use Bio::Tools::GFF; use IO::File; use File::Basename; use IPC::Cmd qw[can_run run]; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $output = undef; my $in = undef; my $help= 0; @@ -42,8 +35,8 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } @@ -201,9 +194,9 @@ } else{ print "Now performing the statistics of the annotation file $annotation...\n"; - my $full_path = can_run('gff3_sp_statistics.pl') or print "Cannot launch statistics. gff3_sp_statistics.pl script not available\n"; + my $full_path = can_run('agat_sp_statistics.pl') or print "Cannot launch statistics. agat_sp_statistics.pl script not available\n"; if ($full_path) { - system "gff3_sp_statistics.pl --gff $annotation -o $annotation_stat > $outfolder/maker_annotation_parsing.log"; + system "agat_sp_statistics.pl --gff $annotation -o $annotation_stat > $outfolder/maker_annotation_parsing.log"; } } print "All done!\n"; @@ -308,18 +301,19 @@ sub _exists_keys { } __END__ -# -------------- +=head1 NAME +gaas_maker_merge_outputs_from_datastore.pl -=head1 NAME +=head1 DESCRIPTION -maker_merge_outputs_from datastore.pl - The script will look over the datastore folder and subfolders to gather all outputs. +The script will look over the datastore folder and subfolders to gather all outputs. =head1 SYNOPSIS - ./maker_merge_outputs_from.pl - ./maker_merge_outputs_from.pl --help + gaas_maker_merge_outputs_from_datastore.pl + gaas_maker_merge_outputs_from_datastore.pl --help =head1 OPTIONS @@ -340,4 +334,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Maker/maker_merge_outputs_from_index.pl b/annotation/tools/maker/maker_merge_outputs_from_index.pl similarity index 100% rename from annotation/Tools/Maker/maker_merge_outputs_from_index.pl rename to annotation/tools/maker/maker_merge_outputs_from_index.pl diff --git a/annotation/Tools/Maker/maker_moveResultsSafely.sh b/annotation/tools/maker/maker_moveResultsSafely.sh similarity index 100% rename from annotation/Tools/Maker/maker_moveResultsSafely.sh rename to annotation/tools/maker/maker_moveResultsSafely.sh diff --git a/annotation/Tools/Maker/rs_PlotDensityAEDFromMakerAnnotationFiles.R b/annotation/tools/maker/rs_PlotDensityAEDFromMakerAnnotationFiles.R similarity index 100% rename from annotation/Tools/Maker/rs_PlotDensityAEDFromMakerAnnotationFiles.R rename to annotation/tools/maker/rs_PlotDensityAEDFromMakerAnnotationFiles.R diff --git a/annotation/Tools/Maker/rs_PlotDensityOneRowPerFile.R b/annotation/tools/maker/rs_PlotDensityOneRowPerFile.R similarity index 100% rename from annotation/Tools/Maker/rs_PlotDensityOneRowPerFile.R rename to annotation/tools/maker/rs_PlotDensityOneRowPerFile.R diff --git a/annotation/Tools/Manager/create_annotation_project.pl b/annotation/tools/manager/create_annotation_project.pl similarity index 100% rename from annotation/Tools/Manager/create_annotation_project.pl rename to annotation/tools/manager/create_annotation_project.pl diff --git a/annotation/Tools/Manager/create_annotation_project.rb b/annotation/tools/manager/create_annotation_project.rb similarity index 100% rename from annotation/Tools/Manager/create_annotation_project.rb rename to annotation/tools/manager/create_annotation_project.rb diff --git a/annotation/Tools/Manager/create_delivery_dir.sh b/annotation/tools/manager/create_delivery_dir.sh similarity index 100% rename from annotation/Tools/Manager/create_delivery_dir.sh rename to annotation/tools/manager/create_delivery_dir.sh diff --git a/annotation/Tools/Manager/create_preautomated_report.pl b/annotation/tools/manager/create_preautomated_report.pl similarity index 100% rename from annotation/Tools/Manager/create_preautomated_report.pl rename to annotation/tools/manager/create_preautomated_report.pl diff --git a/annotation/Tools/NCBI/ncbi_get_genome_tree.pl b/annotation/tools/ncbi/ncbi_get_genome_tree.pl similarity index 89% rename from annotation/Tools/NCBI/ncbi_get_genome_tree.pl rename to annotation/tools/ncbi/ncbi_get_genome_tree.pl index 08077b5e1..4d8cae145 100755 --- a/annotation/Tools/NCBI/ncbi_get_genome_tree.pl +++ b/annotation/tools/ncbi/ncbi_get_genome_tree.pl @@ -2,10 +2,7 @@ use strict; use warnings; - use Getopt::Long; -use Bio::DB::EUtilities; -use Bio::SeqIO; use Scalar::Util qw(openhandle); use Time::Piece; use Time::Seconds; @@ -16,20 +13,15 @@ use LWP::UserAgent; use HTTP::Request::Common; use Try::Tiny; - use Bio::DB::Taxonomy; use Bio::TreeIO; use Bio::Tree::Tree; use Bio::Tree::TreeFunctionsI; +use Bio::DB::EUtilities; +use Bio::SeqIO; +use GAAS::GAAS; -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# Marc Hoeppner / Jacques Dainat # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $outfile = undef; my $quiet = undef; my $message=""; @@ -53,8 +45,8 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } @@ -304,17 +296,20 @@ sub key_exits{ } - +__END__ =head1 NAME -ncbi_get_genome_tree.pl - +gaas_ncbi_get_genome_tree.pl + +=head1 DESCRIPTION + The script creates a tree that covers only whole genomes from the genome NCBI database. The result is written to the specified output file, or to STDOUT. =head1 SYNOPSIS - ./ncbi_get_genome_tree.pl [ -o outfile ] - ./ncbi_get_genome_tree.pl --help + gaas_ncbi_get_genome_tree.pl [ -o outfile ] + gaas_ncbi_get_genome_tree.pl --help =head1 OPTIONS @@ -342,4 +337,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/NCBI/ncbi_get_reference_data.pl b/annotation/tools/ncbi/ncbi_get_reference_data.pl similarity index 93% rename from annotation/Tools/NCBI/ncbi_get_reference_data.pl rename to annotation/tools/ncbi/ncbi_get_reference_data.pl index afbd7ad21..53c7600c8 100755 --- a/annotation/Tools/NCBI/ncbi_get_reference_data.pl +++ b/annotation/tools/ncbi/ncbi_get_reference_data.pl @@ -8,16 +8,9 @@ use Time::Piece; use Time::Seconds; use Pod::Usage; +use GAAS::GAAS; - -my $header = qq{ -######################################################## -# NBIS 2015 - Sweden # -# Marc Hoeppner / Jacques Dainat # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - +my $header = get_gaas_header(); my $outfile = undef; my $format = "fasta"; my $quiet; @@ -45,8 +38,8 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } diff --git a/annotation/Tools/NCBI/ncbi_get_sequence_from_list.pl b/annotation/tools/ncbi/ncbi_get_sequence_from_list.pl similarity index 88% rename from annotation/Tools/NCBI/ncbi_get_sequence_from_list.pl rename to annotation/tools/ncbi/ncbi_get_sequence_from_list.pl index 201a978fd..524846d0c 100755 --- a/annotation/Tools/NCBI/ncbi_get_sequence_from_list.pl +++ b/annotation/tools/ncbi/ncbi_get_sequence_from_list.pl @@ -2,7 +2,7 @@ # -# NCBI recommends that users post no more than three URL requests per second +# NCBI recommends that users post no more than three URL requests per second # use strict; @@ -16,18 +16,9 @@ use Time::Seconds; use Pod::Usage; use XML::LibXML; -use Data::Dumper; - -#use Bio::DB::Taxonomy; - -my $header = qq{ -######################################################## -# NBIS 2017 - Sweden # -# Jacques Dainat # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; +use GAAS::GAAS; +my $header = get_gaas_header(); my $opt_output = undef; my $col = undef; my $message=""; @@ -38,12 +29,12 @@ my $quiet = undef; if ( !GetOptions( - "help|h" => \$help, - "list|l=s" => \$list, - "line=i" => \$lineToAvoid, - "col=i" => \$col, - "s=s" =>\$separator, - "q" => \$quiet, + "help|h" => \$help, + "list|l=s" => \$list, + "line=i" => \$lineToAvoid, + "col=i" => \$col, + "s=s" =>\$separator, + "q" => \$quiet, "o|output|outfile=s" => \$opt_output)) { pod2usage( { -message => 'Failed to parse command line', @@ -53,14 +44,14 @@ # Print Help and exit if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } if ( ! (defined($list)) ){ pod2usage( { - -message => "$header\nAt least 1 parameters is mandatory.\n", + -message => "$header\nAt least 1 parameters is mandatory (--list).\n", -verbose => 0, -exitval => 1 } ); } @@ -127,8 +118,8 @@ } else{ @cols = split /$separator/, $_; - } - + } + my $id = $cols[$col]; $id =~ s/[^[:print:]]+//g; print $id."\n"; @@ -157,7 +148,7 @@ -term => $ID); my $count = 0; $count = $factory->get_count; - + if ($count == 0){ # Skip if nothing was found if ($opt_output) { print $error "a - No identifier found for $ID\n"; ## => print to log error @@ -168,18 +159,18 @@ next; } else{ - #msg("We found $count ID for $ID in database 'protein db' \n"); + #msg("We found $count ID for $ID in database 'protein db' \n"); } - # Go trough the XML response to extract the ids + # Go trough the XML response to extract the ids my $xml_data; $factory->get_Response(-cb => sub { ($xml_data) = @_; } ); my $xmldoc = XML::LibXML->load_xml(string => $xml_data); my @nodes = $xmldoc->getElementsByLocalName('Id'); - - + + foreach my $node (@nodes){ $idcorrect = $node->textContent; last; @@ -227,7 +218,7 @@ ######## ###### #### - ## + ## sub msg { my $t = localtime; @@ -243,7 +234,7 @@ sub runcmd { sub key_exits{ my ($resu_kingdom, %HASH)=@_; - + while ($resu_kingdom){ foreach my $key (keys %HASH){ if($resu_kingdom == $key){ @@ -260,15 +251,19 @@ sub key_exits{ =head1 NAME -ncbi_get_sequence_from_list.pl - -The script allow to retrieve the sequences from the NCBI ID list. +gaas_ncbi_get_sequence_from_list.pl + +=head1 DESCRIPTION + + +The script allow to retrieve the sequences from the NCBI ID list. The list should be a column in a file containing one ID per line. The result is written to the specified output file, or to STDOUT in fasta format. =head1 SYNOPSIS - ./ncbi_get_sequence_from_list.pl [ -o outfile ] - ./ncbi_get_sequence_from_list.pl --help + gaas_ncbi_get_sequence_from_list.pl --list file.txt [ -o outfile ] + gaas_ncbi_get_sequence_from_list.pl --help =head1 OPTIONS diff --git a/annotation/Tools/Util/pasa/pasa_create_chunks.rb b/annotation/tools/pasa/pasa_create_chunks.rb similarity index 100% rename from annotation/Tools/Util/pasa/pasa_create_chunks.rb rename to annotation/tools/pasa/pasa_create_chunks.rb diff --git a/annotation/Tools/Util/pasa/pasa_delete_db b/annotation/tools/pasa/pasa_delete_db similarity index 100% rename from annotation/Tools/Util/pasa/pasa_delete_db rename to annotation/tools/pasa/pasa_delete_db diff --git a/annotation/Tools/Util/pasa/pasa_find_duplicates.rb b/annotation/tools/pasa/pasa_find_duplicates.rb similarity index 100% rename from annotation/Tools/Util/pasa/pasa_find_duplicates.rb rename to annotation/tools/pasa/pasa_find_duplicates.rb diff --git a/annotation/Tools/Util/screen_mito_tblastn.pl b/annotation/tools/screen_mito_tblastn.pl similarity index 81% rename from annotation/Tools/Util/screen_mito_tblastn.pl rename to annotation/tools/screen_mito_tblastn.pl index 5df9df465..95723f4aa 100755 --- a/annotation/Tools/Util/screen_mito_tblastn.pl +++ b/annotation/tools/screen_mito_tblastn.pl @@ -7,7 +7,9 @@ use File::Basename; use Getopt::Long; use Pod::Usage; +use GAAS::GAAS; +my $header = get_gaas_header(); my @copyARGV=@ARGV; my $opt_output = undef; @@ -15,18 +17,11 @@ my $opt_genome= undef; my $help= undef; -my $header = qq{ -######################################################## -# NBIS 2018 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; if ( !GetOptions("tab=s" => \$tabfile, - "o|out=s" => \$opt_output, - "g|genome=s" => \$opt_genome, - "h|help" => \$help) ) + "o|out=s" => \$opt_output, + "g|genome=s" => \$opt_genome, + "h|help" => \$help) ) { pod2usage( { -message => 'Failed to parse command line', -verbose => 1, @@ -34,8 +29,8 @@ } if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } @@ -73,7 +68,7 @@ foreach my $id (@ids ){$allIDs{lc($id)}=$id;} } -##### Stream in 1 +##### Stream in 1 my $fh1; if ($tabfile) { open($fh1, '<', $tabfile) or die "Could not open file '$tabfile' $!"; @@ -101,11 +96,11 @@ else{ $start = $list[9]; $end = $list[8]; - } + } #print $start." ".$end."\n"; push (@{$info{$ID}}, [$start, $end, $mito_gene]); } - + my %omni; my %nbMitoGeneByContig; @@ -124,7 +119,7 @@ $nbMitoGeneByContig{$contig}{$tuple->[2]}++; #print $tuple->[2];exit; - + $start = @$tuple[0]; $end = @$tuple[1]; @@ -132,15 +127,15 @@ #print "it overlaps\n"; if ($end > $prev_end){ $prev_end = $end; - } - } + } + } elsif($start > $prev_end){ if($prev_start != -1){ push (@uniq_list, [$prev_start,$prev_end]); #print "I push the tuple [$prev_start,$prev_end]\n"; } $prev_start = $start ; - $prev_end = $end ; + $prev_end = $end ; } } @@ -150,7 +145,7 @@ push (@{$omni{$contig}}, @uniq_list) } - + #calculate bp incremented non-overlaping hit size my %size; foreach my $contig (keys %omni){ @@ -163,20 +158,20 @@ print $ostream "SequenceID\tNumber_of_non_ovelaping_Hit\tNb_mito_gene\tTotal_hit_size\tSize_sequence\t%_Sequence_covered_by_hit\tGene_names\n"; # sort by number of non-overlaping hits foreach my $contig (sort { @{$omni{$a}} <=> @{$omni{$b}} } keys %omni){ - + #compute length of the contig my $seq_id_correct = $allIDs{lc($contig)}; my $seq = $db->get_Seq_by_id($seq_id_correct); my $length = $seq->length; - + #compute % of seq covered by mito hits my $goodxGenome=sprintf("%0.2f",($size{$contig}*100)/$length); - + my $nbMitoGene = keys %{$nbMitoGeneByContig{$contig}}; - + my @geneList=(); foreach my $key (sort keys %{$nbMitoGeneByContig{$contig}}){ - push @geneList, $key + push @geneList, $key } print $ostream $contig."\t".@{$omni{$contig}}."\t".$nbMitoGene."\t".$size{$contig}."\t".$length."\t".$goodxGenome."\t".join(",", @geneList)."\n"; } @@ -187,12 +182,12 @@ # sort by number of non-overlaping hits foreach my $contig (sort { @{$omni{$a}} <=> @{$omni{$b}} } keys %omni){ my $nbMitoGene = keys %{$nbMitoGeneByContig{$contig}}; - + my @geneList=(); foreach my $key (sort keys %{$nbMitoGeneByContig{$contig}}){ push @geneList, $key } - + print $ostream $contig."\t".@{$omni{$contig}}."\t".$nbMitoGene."\t".$size{$contig}."\t".join(",", @geneList)."\n"; } } @@ -201,17 +196,21 @@ =head1 NAME - -Based on a default blast tabulated output ( -outfmt 6 => qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore) the script aims to tell you -for each sequence of your assembly, how many non-overlaping mito hits have been found, the number of mito genes that have a hit and the total size in bp of those hits -(overlaping part counted only once). When the assembly is provided, 2 new columns are displayed, the sie of the Sequence and the % part covered by mito hits. + +screen_mito_tblastn.pl + +=head1 DESCRIPTION + +Based on a default blast tabulated output ( -outfmt 6 => qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore) the script aims to tell you +for each sequence of your assembly, how many non-overlaping mito hits have been found, the number of mito genes that have a hit and the total size in bp of those hits +(overlaping part counted only once). When the assembly is provided, 2 new columns are displayed, the sie of the Sequence and the % part covered by mito hits. The script aims to help determining the contigs from an assembly which are mitochondrial. An assembly graph could be helpful to check if the suspicious (those that might be mitochondrial) contigs sounds to be circular as expected for a mitochondrial genome. =head1 SYNOPSIS - ./screen_mito.pl --tab=infile -o=outFile - ./screen_mito.pl --help + screen_mito_tblastn.pl --tab=infile -o=outFile + screen_mito_tblastn.pl --help Mitochondrial genome size (from wikipedia) @@ -247,4 +246,30 @@ =head1 OPTIONS =back +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + =cut + +AUTHOR - Jacques Dainat diff --git a/annotation/Tools/Util/sync_dat_and_embl.pl b/annotation/tools/sync_dat_and_embl.pl similarity index 91% rename from annotation/Tools/Util/sync_dat_and_embl.pl rename to annotation/tools/sync_dat_and_embl.pl index 498b97df8..9471bcf46 100755 --- a/annotation/Tools/Util/sync_dat_and_embl.pl +++ b/annotation/tools/sync_dat_and_embl.pl @@ -6,7 +6,9 @@ use File::Basename; use Getopt::Long; use Pod::Usage; +use GAAS::GAAS; +my $header = get_gaas_header(); my @copyARGV=@ARGV; my $opt_output = undef; @@ -15,15 +17,6 @@ my $bac = undef; #believe in AC my $help= undef; - -my $header = qq{ -######################################################## -# NBIS 2016 - Sweden # -# jacques.dainat\@nbis.se # -# Please cite NBIS (www.nbis.se) when using this tool. # -######################################################## -}; - if ( !GetOptions("dat=s" => \$datfile, "embl=s" => \$emblfile, "o|out=s" => \$opt_output, @@ -36,8 +29,8 @@ } if ($help) { - pod2usage( { -verbose => 2, - -exitval => 2, + pod2usage( { -verbose => 99, + -exitval => 0, -message => "$header\n" } ); } @@ -48,7 +41,7 @@ -exitval => 1 } ); } -##### Stream in 1 +##### Stream in 1 my $fh1; if ($datfile) { open($fh1, '<', $datfile) or die "Could not open file '$datfile' $!"; @@ -72,8 +65,8 @@ my $ID = undef; my $sourceSeen=undef; my $ACline=0; -while( my $line = <$fh1>) { - +while( my $line = <$fh1>) { + if( $line =~ m/^ID/){ # my @list = split(/\s/,$line); @@ -84,7 +77,7 @@ #print $ID."\n"; $sourceSeen=undef; } - + ################## Look for signal to stop save the information ############################# # #we keep all until source and the few next lines related to source. (Stop at another FT than source or XX if no other source available.) # @@ -106,10 +99,10 @@ $sourceSeen=1; } - if( $line =~ m/^FT [^source|^\s]/){ + if( $line =~ m/^FT [^source|^\s]/){ $ID=undef; } - + if( $sourceSeen){ if( $line =~ m/^XX/){ $ID=undef; @@ -142,13 +135,13 @@ # print $headers{$key}; #} -# print $fh2 but part (ID line until source and few next lines) are replaced by those saved from the dat file. (We do that if an comon identifier is found: here the size in bp fron the ID line is the ioentifier) +# print $fh2 but part (ID line until source and few next lines) are replaced by those saved from the dat file. (We do that if an comon identifier is found: here the size in bp fron the ID line is the ioentifier) my $printNext=1; my $nbIDfound=0; my $nbIDTotal=0; $ACline=0; my $saved_line=undef; -while( my $line = <$fh2>) { +while( my $line = <$fh2>) { my $skip_line=undef; if($saved_line){ @@ -164,7 +157,7 @@ $ID = $list[10]; #print "ID= $ID\n"; $sourceSeen=undef; - + if ( exists($headers{$ID}) and ! $bac){ $nbIDfound++; print $emblout $headers{$ID}; @@ -208,7 +201,7 @@ if( $line =~ m/^FT [^source|^\s]/){ #we keep all wat is related tosource as well and then we stop. $printNext=1; } - + if( $sourceSeen){ if( $line =~ m/^XX/){ $printNext=1; @@ -227,15 +220,15 @@ =head1 NAME - + sync_dat_and_embl.pl - This script allow to update the record "headers" of an EMBL file by those from a dat file provided by ENA. -It is useful when an assembly/annotation has been submitted using AGP file while the annotation has been done directly on the chromosomes. +It is useful when an assembly/annotation has been submitted using AGP file while the annotation has been done directly on the chromosomes. (Passing by an AGP file happens only if the chromosome and unplaced (not related at all to any chromosome) contigs are part of the same assembly). =head1 SYNOPSIS - ./sync_dat_and_embl.pl --dat=infile --embl=infile2 -o=outFile + ./sync_dat_and_embl.pl --dat=infile --embl=infile2 -o=outFile ./sync_dat_and_embl.pl --help =head1 OPTIONS diff --git a/annotation/Tools/System/cp_all_links.sh b/annotation/tools/system/cp_all_links.sh similarity index 100% rename from annotation/Tools/System/cp_all_links.sh rename to annotation/tools/system/cp_all_links.sh diff --git a/annotation/Tools/System/info_BioPerlVersion.sh b/annotation/tools/system/info_BioPerlVersion.sh similarity index 100% rename from annotation/Tools/System/info_BioPerlVersion.sh rename to annotation/tools/system/info_BioPerlVersion.sh diff --git a/annotation/Tools/System/info_find_perl_module.sh b/annotation/tools/system/info_find_perl_module.sh similarity index 100% rename from annotation/Tools/System/info_find_perl_module.sh rename to annotation/tools/system/info_find_perl_module.sh diff --git a/annotation/Tools/System/manage_backup.sh b/annotation/tools/system/manage_backup.sh similarity index 100% rename from annotation/Tools/System/manage_backup.sh rename to annotation/tools/system/manage_backup.sh diff --git a/annotation/Tools/System/reveal_hidden_characters.pl b/annotation/tools/system/reveal_hidden_characters.pl similarity index 100% rename from annotation/Tools/System/reveal_hidden_characters.pl rename to annotation/tools/system/reveal_hidden_characters.pl diff --git a/annotation/Tools/System/unlink_all.sh b/annotation/tools/system/unlink_all.sh similarity index 100% rename from annotation/Tools/System/unlink_all.sh rename to annotation/tools/system/unlink_all.sh diff --git a/annotation/WebApollo/apollo1/add_track.rb b/annotation/tools/webapollo/apollo1/add_track.rb similarity index 100% rename from annotation/WebApollo/apollo1/add_track.rb rename to annotation/tools/webapollo/apollo1/add_track.rb diff --git a/annotation/WebApollo/apollo1/apollo_track_helper.rb b/annotation/tools/webapollo/apollo1/apollo_track_helper.rb similarity index 100% rename from annotation/WebApollo/apollo1/apollo_track_helper.rb rename to annotation/tools/webapollo/apollo1/apollo_track_helper.rb diff --git a/annotation/WebApollo/apollo1/build_template_WA_1_0_X.rb b/annotation/tools/webapollo/apollo1/build_template_WA_1_0_X.rb similarity index 100% rename from annotation/WebApollo/apollo1/build_template_WA_1_0_X.rb rename to annotation/tools/webapollo/apollo1/build_template_WA_1_0_X.rb diff --git a/annotation/WebApollo/apollo1/new_species_WA_1_0_X.rb b/annotation/tools/webapollo/apollo1/new_species_WA_1_0_X.rb similarity index 100% rename from annotation/WebApollo/apollo1/new_species_WA_1_0_X.rb rename to annotation/tools/webapollo/apollo1/new_species_WA_1_0_X.rb diff --git a/annotation/WebApollo/apollo1/setup_rakefile.rb b/annotation/tools/webapollo/apollo1/setup_rakefile.rb similarity index 100% rename from annotation/WebApollo/apollo1/setup_rakefile.rb rename to annotation/tools/webapollo/apollo1/setup_rakefile.rb diff --git a/annotation/WebApollo/apollo1/sync_user_db.rb b/annotation/tools/webapollo/apollo1/sync_user_db.rb similarity index 100% rename from annotation/WebApollo/apollo1/sync_user_db.rb rename to annotation/tools/webapollo/apollo1/sync_user_db.rb diff --git a/annotation/WebApollo/apollo1/transplant.rb b/annotation/tools/webapollo/apollo1/transplant.rb similarity index 100% rename from annotation/WebApollo/apollo1/transplant.rb rename to annotation/tools/webapollo/apollo1/transplant.rb diff --git a/annotation/WebApollo/apollo2.3/apollo_track_helper.rb b/annotation/tools/webapollo/apollo2.3/apollo_track_helper.rb similarity index 100% rename from annotation/WebApollo/apollo2.3/apollo_track_helper.rb rename to annotation/tools/webapollo/apollo2.3/apollo_track_helper.rb diff --git a/annotation/WebApollo/apollo2.3/install_WA_2_0_X.rb b/annotation/tools/webapollo/apollo2.3/install_WA_2_0_X.rb similarity index 100% rename from annotation/WebApollo/apollo2.3/install_WA_2_0_X.rb rename to annotation/tools/webapollo/apollo2.3/install_WA_2_0_X.rb diff --git a/annotation/WebApollo/apollo2.3/manage_species.rb b/annotation/tools/webapollo/apollo2.3/manage_species.rb similarity index 100% rename from annotation/WebApollo/apollo2.3/manage_species.rb rename to annotation/tools/webapollo/apollo2.3/manage_species.rb diff --git a/annotation/WebApollo/apollo2/apollo_track_helper.rb b/annotation/tools/webapollo/apollo2/apollo_track_helper.rb similarity index 100% rename from annotation/WebApollo/apollo2/apollo_track_helper.rb rename to annotation/tools/webapollo/apollo2/apollo_track_helper.rb diff --git a/annotation/WebApollo/apollo2/install_WA_2_0_X.rb b/annotation/tools/webapollo/apollo2/install_WA_2_0_X.rb similarity index 100% rename from annotation/WebApollo/apollo2/install_WA_2_0_X.rb rename to annotation/tools/webapollo/apollo2/install_WA_2_0_X.rb diff --git a/annotation/WebApollo/apollo2/manage_species.rb b/annotation/tools/webapollo/apollo2/manage_species.rb similarity index 100% rename from annotation/WebApollo/apollo2/manage_species.rb rename to annotation/tools/webapollo/apollo2/manage_species.rb diff --git a/bin/gaas_AGP2chromosome.pl b/bin/gaas_AGP2chromosome.pl new file mode 100755 index 000000000..1b697b735 --- /dev/null +++ b/bin/gaas_AGP2chromosome.pl @@ -0,0 +1,255 @@ +#!/usr/bin/env perl + +use strict; +use Pod::Usage; +use Getopt::Long; +use Bio::SeqIO ; +use Bio::DB::Fasta; +use Bio::Tools::GFF; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); + +my $opt_agpfile; +my $opt_fastafile; +my $opt_output; +my $opt_help; + +# OPTION MANAGMENT +my @copyARGV=@ARGV; +if ( !GetOptions( 'a|agp=s' => \$opt_agpfile, + 'f|fa|fasta=s' => \$opt_fastafile, + 'o|output=s' => \$opt_output, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => "$header\nFailed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header \n" } ); +} + +if ( (! (defined($opt_agpfile)) ) or (! (defined($opt_fastafile)) ) ){ + pod2usage( { + -message => "\nAt least 2 parametes are mandatory:\nInput agp file (-g); Input fasta file (-f)\n\n". + "Output is optional. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 2 } ); +} + + +my $ostream; +if ($opt_output) { + $opt_output=~ s/.fasta//g; + $opt_output=~ s/.fa//g; + open(my $fh, '>', $opt_output.".fa") or die "Could not open file '$opt_output' $!"; + $ostream= Bio::SeqIO->new(-fh => $fh, -format => 'Fasta' ); +} +else{ + $ostream = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); +} + +################################################################## +########################### MAIN ########################## +################################################################## + +###################### +### Parse AGP input # +my %hash_agp; +print "Reading file $opt_agpfile\n"; +if (open(my $fh, '<:encoding(UTF-8)', $opt_agpfile)) { + while (my $row = <$fh>) { + chomp $row; + my ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, + $component_beg_or_gap_type, $component_end_or_linkage, $orientation_or_linkage_evidence ) = split(/\t/, $row); + push (@{$hash_agp{$object}{$part_number}}, ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, + $component_beg_or_gap_type, $component_end_or_linkage, $orientation_or_linkage_evidence)); + } +} else { + warn "Could not open file '$opt_agpfile' $!"; +} + +print "Parsing Finished\n"; +### END Parse AGP input # +######################### + + +###################### +### READ FASTA input # +my $nbFastaSeq=0; +my $db = Bio::DB::Fasta->new($opt_fastafile); +my @ids = $db->get_all_primary_ids; +my @newids; foreach my $id (@ids) { $id =~ s/[^[:print:]]//g; push @newids, $id; } # FIX FOR THE CRAZY BUG ADDING NULLBILLION TIMES AT THE END OF MY IDS +my %allIDs; # save ID in lower case to avoid cast problems +foreach my $id (@newids ){$allIDs{lc($id)}=$id;} +### END fASTAS input # +###################### + +foreach my $object (keys %hash_agp){ + #print $object."\n"; + my $sequence=""; + my $faID=""; + foreach my $part_number (sort {$a <=> $b} keys %{$hash_agp{$object}} ){ + #print $part_number."\n"; + my @feature = @{$hash_agp{$object}{$part_number}}; + $faID = $feature[0]; + + if( lc($feature[4]) eq "n" or lc($feature[4]) eq "u" ){ # GAP + my $gap_length = $feature[5]; + #print "gap length= $gap_length\n"; + $sequence.= 'N' x $gap_length; + } + else{; + my $peace_sequence = get_sequence($db, \%allIDs, $feature[5], $feature[6], $feature[7]); + if($feature[8] eq "+"){ + $sequence.= $peace_sequence; + } + elsif($feature[8] eq "-"){ + my $rev_sequence = reverse $peace_sequence; + $rev_sequence =~ tr/ATCGYRKMDHVBatcgyrkmdhvb/TAGCRYMKHDBVtagcrymkhdbv/; + $sequence.= $rev_sequence; + } + else{ + print "Problem with the strand !!\n"; + } + } + } + #create sequence object + my $seq = Bio::Seq->new( '-format' => 'fasta' , -id => $faID , -seq => $sequence); + $ostream->write_seq($seq); +} + +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + +#check if reference exists in hash. Deep infinite : hash{a} or hash{a}{b} or hash{a}{b}{c}, etc. +# usage example: exists_keys($hash_omniscient,('level3','cds',$level2_ID) +sub exists_keys { + my ($hash, $key, @keys) = @_; + + if (ref $hash eq 'HASH' && exists $hash->{$key}) { + if (@keys) { + return exists_keys($hash->{$key}, @keys); + } + return 1; + } + return ''; +} + +sub get_sequence{ + my ($db, $allIDs, $seq_id, $start, $end) = @_; + + my $sequence=""; + my $seq_id_correct = undef; + if( exists_keys($allIDs,(lc($seq_id)) ) ){ + + $seq_id_correct = $allIDs{lc($seq_id)}; + + $sequence = $db->subseq($seq_id_correct, $start, $end); + + if($sequence eq ""){ + warn "Problem ! no sequence extracted for - $seq_id !\n"; exit; + } + if(length($sequence) != ($end-$start+1)){ + my $wholeSeq = $db->subseq($seq_id_correct); + $wholeSeq = length($wholeSeq); + warn "Problem ! The size of the sequence extracted ".length($sequence)." is different than the specified span: ".($end-$start+1).".\nThat often occurs when the fasta file does not correspond to the annotation file. Or the index file comes from another fasta file which had the same name and haven't been removed.\n". + "As last possibility your gff contains location errors (Already encountered for a Maker annotation)\nSupplement information: seq_id=$seq_id ; seq_id_correct=$seq_id_correct ; start=$start ; end=$end ; $seq_id sequence length: $wholeSeq )\n"; + } + } + else{ + warn "Problem ! ID $seq_id not found !\n"; + } + + return $sequence; +} + +__END__ + +=head1 NAME + +AGP2chromosome.pl + + +=head1 DESCRIPTION + +The script aims to combine contigs from the fasta file in chromosome as described into the AGP file. +AGP version 2 is expected. See https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/ for specification of this format. If you are unsure about the AGP file you are using, +you could check its sanity using the agp validator provided by the NCBI at this address: https://www.ncbi.nlm.nih.gov/projects/genome/assembly/agp/agp_validate.cgi +The result is written to the specified output file, or to STDOUT. + + +=head1 SYNOPSIS + + gaas_AGP2chromosome.pl -g=infile.gff -f=infile.fasta [ -o outfile ] + gaas_AGP2chromosome.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--agp> or B<-a> + +Input AGP file + +=item B<--fasta>, B<--fa> or B<-f> + +Input fasta file. + +=item B<-o> or B<--output> + +Output GFF file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_add_track.rb b/bin/gaas_add_track.rb new file mode 100755 index 000000000..f992f2db2 --- /dev/null +++ b/bin/gaas_add_track.rb @@ -0,0 +1,146 @@ +#!/usr/bin/ruby +# == NAME +# add_track.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -g | --gff ] |[ -s | --species ] | +# == DESCRIPTION +# A script to add a track to an existing WebApollo installation. +# +# == OPTIONS +# -h,--help:: Show help +# -s,--species=SPECIES:: Name of the species +# -g,--gff=GFF:: Annotation file to process (optional) +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } +opts.on("-s","--species", "=SPECIES","Name of the species") {|argument| options.species = argument } +opts.on("-c","--category", "=CATEGORY","Track category") {|argument| options.category = argument } +opts.on("-l","--label", "=LABEL","Label") {|argument| options.label = argument } +opts.on("-o","--outfile", "=OUTFILE","Output") {|argument| options.outfile = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +### Usernames, passwords and locations + +home = ENV['HOME'] + +config = { + :web_apollo_build => "/big/webapollo/build/#{options.species}", # The location where this WA project is to be build + :web_apollo_data => "/big/data/#{options.species}/data" # Location of the data store for this species +} + +### The workflow + +raise "This species (#{options.species}) does not have a build directory under #{config[:web_apollo_build]})" unless File.directory?(config[:web_apollo_build]) +raise "This species (#{options.species}) does not have a build directory under #{config[:web_apollo_data]})" unless File.directory?(config[:web_apollo_data]) + +tracks = { "protein" => "\"match_part\": \"blue-80pct\"" , + "est" => "\"match_part\": \"green-80pct\"" , + "tRNA" => "\"exon\": \"green-80pct\"", + "ncRNA" => "\"exon\": \"green-80pct\"", + "synteny" => "\"nucleotide_motif\": \"springgreen-80pct\"", + "rnaseq" => "\"match_part\": \"orange-80pct\"" , + "abinito" => "\"match_part\": \"springgreen-80pct\"", + "gene" => "\"wholeCDS\": null, \"CDS\": \"blueviolet-80pct\", \"UTR\": \"darkorange-60pct\", \"exon\" : \"container-100pct\"", + "lift-over" => "\"wholeCDS\": null, \"CDS\": \"green-80pct\", \"UTR\": \"darkorange-60pct\", \"exon\" : \"container-100pct\"" + } + +abort "No track provided" if options.infile.nil? +abort "No species provided" unless options.species +abort "Must provide a category for this track" unless options.category +abort "Must provide a label for this track" unless options.label + +## Some useful variables + +track = nil +track_number = {} +proceed = false +failure = 0 + +## + +# List the current information +puts "Your data:" +puts "----------" +puts "Track to load: #{options.infile}" +puts "WebApollo portal: #{config[:web_apollo_build]}" +puts "Track label: #{options.label}" +puts "Category: #{options.category}" +puts "Track type selection..." +puts + +# Ask about the type of data + + +while proceed == false + + abort "Can't make up your mind, eh? Aborting..." if failure > 3 + + puts "###############################################" + puts "Choose the track type from the following list:" + + number = 0 + + tracks.sort_by{|t,f| t}.each do |track_type,formatting| + number += 1 + track_number[number] = track_type + track = track_type + puts "\t#{number}\t#{track_type}" + end + + puts "##############################################" + puts "Enter the number corresponding to your choice:" + + selection = gets.chomp + + next unless track_number.has_key?(selection.to_i) + + puts "You selected: #{track_number[selection.to_i]} - is that correct? (Y/N)" + + answer = gets.chomp + + if answer.downcase == "y" + proceed = true + track = track_number[selection.to_i] + else + failure += 1 + end + + +end + +puts "Loading track as type #{track}, with the label #{options.label} into the category #{options.category} of webapollo directory #{config[:web_apollo_build]}" + +puts "Proceed? (Y/N)" + +answer = gets.chomp + +abort "Aborting..." unless answer.downcase == "y" + +if track == "gene" or track == "lift-over" + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --type mRNA --trackLabel #{options.label}" +elsif track == "tRNA" + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --type tRNA --trackLabel #{options.label}" +elsif track == "ncRNA" + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --type ncRNA --trackLabel #{options.label}" +else + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --trackLabel #{options.label}" +end diff --git a/bin/gaas_apollo_track_helper.rb b/bin/gaas_apollo_track_helper.rb new file mode 100755 index 000000000..d62886f9c --- /dev/null +++ b/bin/gaas_apollo_track_helper.rb @@ -0,0 +1,172 @@ +#!/usr/bin/ruby +# == NAME +# apollo_track_helper.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -g | --gff ] |[ -s | --species ] | +# == DESCRIPTION +# A script to manage tracks for a WebApollo installation +# +# == OPTIONS +# -h,--help:: Show help +# -s,--species=SPECIES:: Name of the species +# -g,--gff=GFF:: Annotation file to process (optional) +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } +opts.on("-r","--remove", "Remove track, requires species (-s) and label (-l)") {|argument| options.remove = true } +opts.on("-s","--species", "=SPECIES","Name of the species") {|argument| options.species = argument } +opts.on("-c","--category", "=CATEGORY","Track category") {|argument| options.category = argument } +opts.on("-l","--label", "=LABEL","Label") {|argument| options.label = argument } +opts.on("-o","--outfile", "=OUTFILE","Output") {|argument| options.outfile = argument } +opts.on("-w","--wa_installation", "=wa_installation_name","path to the WA installation") {|argument| options.wa_installation_name = argument } +opts.on("-d","--direct", "=DIRECT","Direct") {|argument| options.direct = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +### Usernames, passwords and locations + +home = ENV['HOME'] + +build_dir = ENV['APOLLO_BUILD_DIR'] or abort "Environment variable APOLLO_BUILD_DIR not set" +data_dir = ENV['APOLLO_DATA_DIR'] or abort "Environment vairable APOLLO_DATA_DIR not set" + +config = { + :web_apollo_build => "#{build_dir}/#{options.wa_installation_name}", # The location where this WA isntance is + :web_apollo_data => "#{data_dir}/#{options.species}" # Location of the data store for this species +} + +### The workflow + +tracks = { "protein" => "\"match_part\": \"blue-80pct\"" , + "est" => "\"match_part\": \"green-80pct\"" , + "repeat" => "\"match_part\": \"green-80pct\"" , + "tRNA" => "\"exon\": \"green-80pct\"", + "ncRNA" => "\"exon\": \"green-80pct\"", + "synteny" => "\"nucleotide_motif\": \"springgreen-80pct\"", + "rnaseq_match" => "\"match_part\": \"orange-80pct\"" , + "rnaseq" => "\"exon\" : \"green-80pct\"", + "abinito" => "\"match_part\": \"springgreen-80pct\"", + "gene" => "\"wholeCDS\": null, \"CDS\": \"blueviolet-80pct\", \"UTR\": \"darkorange-60pct\", \"exon\" : \"container-100pct\"", + "lift-over" => "\"wholeCDS\": null, \"CDS\": \"green-80pct\", \"UTR\": \"darkorange-60pct\", \"exon\" : \"container-100pct\"" + } + + +abort "No species provided" unless options.species +raise "This WA instance/installation (#{options.wa_installation_name}) does not have a build directory under #{config[:web_apollo_build]})" unless File.directory?(config[:web_apollo_build]) +raise "This species (#{options.species}) does not have a build directory under #{config[:web_apollo_data]})" unless File.directory?(config[:web_apollo_data]) +abort "No track label provided" unless options.label + +if options.remove + + system("perl #{config[:web_apollo_build]}/bin/remove-track.pl --dir #{config[:web_apollo_data]} --trackLabel #{options.label} -D") + +else + + abort "No track provided" if options.infile.nil? + abort "Must provide a category for this track" unless options.category + abort "A track with the label (#{options.label}) already exits for the species (#{options.species})." if File.directory?("#{config[:web_apollo_data]}/tracks/#{options.label}") + + ## Some useful variables + + track = nil + track_number = {} + proceed = false + failure = 0 + + ## + + # List the current information + puts "Your data:" + puts "----------" + puts "Track to load: #{options.infile}" + puts "WebApollo portal: #{config[:web_apollo_build]}" + puts "Track label: #{options.label}" + puts "Category: #{options.category}" + puts "Track type selection..." + puts + + # Ask about the type of data + + + while proceed == false + + abort "Can't make up your mind, eh? Aborting..." if failure > 3 + + puts "###############################################" + puts "Choose the track type from the following list:" + + number = 0 + + tracks.sort_by{|t,f| t}.each do |track_type,formatting| + number += 1 + track_number[number] = track_type + track = track_type + puts "\t#{number}\t#{track_type}" + end + + puts "##############################################" + + + if options.direct + selection = options.direct + proceed = true + track = track_number[selection.to_i] + else + puts "Enter the number corresponding to your choice:" + selection = gets.chomp + + next unless track_number.has_key?(selection.to_i) + + puts "You selected: #{track_number[selection.to_i]} - is that correct? (Y/N)" + + answer = gets.chomp + + if answer.downcase == "y" + proceed = true + track = track_number[selection.to_i] + else + failure += 1 + end + end + + end + + puts "Loading track as type #{track}, with the label #{options.label} into the category #{options.category} of webapollo directory #{config[:web_apollo_build]}" + +unless options.direct + puts "Proceed? (Y/N)" + + answer = gets.chomp + + abort "Aborting..." unless answer.downcase == "y" +end + + if track == "gene" or track == "lift-over" or track == "rnaseq" + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --type mRNA --trackLabel #{options.label}" + elsif track == "tRNA" + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --type tRNA --trackLabel #{options.label}" + elsif track == "ncRNA" + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --type ncRNA --trackLabel #{options.label}" + else + system "perl #{config[:web_apollo_build]}/bin/flatfile-to-json.pl --gff #{options.infile} --out #{config[:web_apollo_data]} --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{#{tracks[track]}}' --cssClass container-16px --config '{ \"category\": \"#{options.category}\" }' --trackLabel #{options.label}" + end + +end diff --git a/bin/gaas_augustus2grid.pl b/bin/gaas_augustus2grid.pl new file mode 100755 index 000000000..a1c0dbdc7 --- /dev/null +++ b/bin/gaas_augustus2grid.pl @@ -0,0 +1,260 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = "augustus_output"; +my $genome = undef; +my $species = undef; +my $hints = undef; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( + "hints=s" => \$hints, + "species=s" => \$species, + "genome|g=s" => \$genome, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($genome) or ! defined($species)){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\nInput genome fasta file (--genome) and augustus species hmm profile (--species)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ( "augustus" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +my $augustus_config_pathj = $ENV{'AUGUSTUS_CONFIG_PATH'} or die "AUGUSTUS_CONFIG_PATH is not set, aborting." ; + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/augustus.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my $counter = 10000; +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + my $outfile = $outdir . "/seq_" . $counter . ".fa"; + my $cmd=undef; + if($hints){ + $cmd = "augustus --species=$species --hintsfile=$hints --alternatives-from-evidence=true --gff3=on --extrinsicCfgFile=/references/software/augustus/config/extrinsic/extrinsic.E.cfg --uniqueGeneId=true $outfile > $outfile.augustus" ; + } + else{ + $cmd = "augustus --species=$species --alternatives-from-evidence=true --gff3=on --extrinsicCfgFile=/references/software/augustus/config/extrinsic/extrinsic.E.cfg --uniqueGeneId=true $outfile > $outfile.augustus" ; + } + push(@cmds,$cmd); + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + $seq_out->write_seq($seq); +} + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, like merging of output files + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_augustus2grid.pl - + +=head1 DESCRIPTION + +Chunk input data to run multiple augustus jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_augustus2grid.pl -f fasta_file --species species_name + gaas_augustus2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--genome> or B<-g> + +The name of the genome fasta file. + +=item B<--hints> + +Augustus hints file (e.g Intron) + +=item B<--species> + +Species name for the hmm profile to use within Augustus + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_augustus_create_hints.rb b/bin/gaas_augustus_create_hints.rb new file mode 100644 index 000000000..5ef103269 --- /dev/null +++ b/bin/gaas_augustus_create_hints.rb @@ -0,0 +1,158 @@ +#!/usr/bin/ruby +# == NAME +# gff2hints.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# Converts Maker-generated evidence alignments from GFF3 format to Augustus-compatbile hint format +# +# == OPTIONS +# -h,--help:: Show help +# -i,--infile=INFILE:: input file +# -o,--outfile=OUTFILE:: output file + +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' +require 'bio' + +### Define modules and classes here + + +def cufflinks2hints(entries) + + bin = entries.records.group_by{|r| r.attributes["transcript_id"].gsub(/\"/, '')} + + bin.each do |transcript_id,records| + + strand = records[0].strand + + strand == "+" ? first_exon = records[0] : first_exon = records[-1] + strand == "+" ? last_exon = records[-1] : last_exon = records[1] + + # Building exon part hints + records.each do |record| + + puts "#{record.seqname}\tc2h\tep\t#{record.start}\t#{record.end}\t.\t#{record.strand}\t.\tgrp=#{transcript_id};pri=4,src=E" + + # building exon hints + unless record == first_exon or record == last_exon + puts "#{record.seqname}\tc2h\texon\t#{record.start}\t#{record.end}\t.\t#{record.strand}\t.\tgrp=#{transcript_id};pri=4,src=E" + end + + # building TSS hints + if record == first_exon and records.length > 2 + if strand == "+" + puts "#{record.seqname}\tc2h\ttss\t#{record.start.to_i-20}\t#{record.start.to_i+20}\t.\t#{record.strand}\t.\tgrp=#{transcript_id};pri=4,src=E" + else + puts "#{record.seqname}\tc2h\ttss\t#{record.end.to_i-20}\t#{record.end.to_i+20}\t.\t#{record.strand}\t.\tgrp=#{transcript_id};pri=4,src=E" + end + end + + end + + # Building the intron hints + while records.length > 1 + upstream,downstream = records[0..1] + puts "#{upstream.seqname}\tc2h\tintron\t#{upstream.end.to_i+1}\t#{downstream.start.to_i-1}\t.\t#{upstream.strand}\t.\tgrp=#{transcript_id};pri=4,src=E" + records.shift + end + + + end + + +end + +def protein2hints(entries) + + bin = entries.records.select{|r| r.feature == "match_part" }.group_by{|r| r.attributes.find{|k,v| k == "Parent" } } + + bin.each do |protein_id,records| + + strand = records[0].strand + + strand == "+" ? first_exon = records[0] : first_exon = records[-1] + strand == "+" ? last_exon = records[-1] : last_exon = records[1] + + records.each do |record| + puts "#{record.seqname}\tp2h\tCDSpart\t#{record.start}\t#{record.end}\t.\t#{record.strand}\t.\tgrp=#{protein_id};pri=2,src=P" + + + if record == first_exon + + target = record.attributes.find{|k,v| k == "Target" }[1] + + # The protein aligns from position one, which we assume is the start codon + if target.start.to_i == 1 + + if strand == "+" + puts "#{record.seqname}\tp2h\tstart\t#{record.start.to_i-20}\t#{record.start.to_i+20}\t.\t#{record.strand}\t.\tgrp=#{protein_id};pri=2,src=P" + else + puts "#{record.seqname}\tp2h\tstart\t#{record.end.to_i-20}\t#{record.end.to_i+20}\t.\t#{record.strand}\t.\tgrp=#{protein_id};pri=2,src=P" + end + + end + + end + + end + + end + +end + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } +opts.on("-o","--outfile", "=OUTFILE","Output") {|argument| options.outfile = argument } +opts.on("-s","--source", "=SOURCE","Source of the data") {|argument| options.source = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +sources = [ "cufflinks" , "protein2genome" ] + +abort "Must specify a source (#{sources.join(',')})" unless options.source + +options.infile ? input_stream = File.open(options.infile) : input_stream = $stdin +options.outfile ? output_stream = File.new(options.outfile,'w') : output_stream = $stdout + +if options.infile.include?(".gff") or options.infile.include?(".gtf") + + + if options.source == "cufflinks" + + entries = Bio::GFF.new(input_stream) + cufflinks2hints(entries) + + elsif options.source == "protein2genome" + + entries = Bio::GFF::GFF3.new(input_stream) + protein2hints(entries) + + end + +elsif options.infile.include?(".bed") + + + +end + + + +output_stream.close + + + diff --git a/bin/gaas_bam2bigwig.sh b/bin/gaas_bam2bigwig.sh new file mode 100755 index 000000000..4d7f8b324 --- /dev/null +++ b/bin/gaas_bam2bigwig.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Convert BAM to BigWig +# Requires genome.list (tab-delimited list of chromosomes and lengths) + +file=$1 + +# load bamToBed +module load bedtools/2.17.0 +# load bedGraphToBigWig +module load ucsc-tools + +command -v bamToBed >/dev/null 2>&1 || { echo >&2 "I require bamToBed but it's not in PATH. Aborting."; exit 1; } +command -v bedGraphToBigWig >/dev/null 2>&1 || { echo >&2 "I require bedGraphToBigWig but it's not in PATH. Aborting."; exit 1; } + +echo "Converting file to BED" +bamToBed -i $file > $(basename $file .bam).bed +echo "Creating coverage track" +genomeCoverageBed -i $(basename $file .bam).bed -bg -g genome.list > $(basename $file .bam).cov +echo "Writing BigWig file" +bedGraphToBigWig $(basename $file .bam).cov genome.list $(basename $file .bam).bw diff --git a/bin/gaas_bam_to_wiggle.py b/bin/gaas_bam_to_wiggle.py new file mode 100755 index 000000000..614f58c2b --- /dev/null +++ b/bin/gaas_bam_to_wiggle.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +"""Convert BAM files to BigWig file format in a specified region. + +Usage: + bam_to_wiggle.py [] + [--outfile= + --chrom= + --start= + --end= + --normalize] + +chrom start and end are optional, in which case they default to everything. +The normalize flag adjusts counts to reads per million. + +The config file is in YAML format and specifies the location of the wigToBigWig +program from UCSC: + +program: + ucsc_bigwig: wigToBigWig + +If not specified, these will be assumed to be present in the system path. + +The script requires: + pysam (http://code.google.com/p/pysam/) + wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) +If a configuration file is used, then PyYAML is also required (http://pyyaml.org/) +""" +import os +import sys +import subprocess +import tempfile +from optparse import OptionParser +from contextlib import contextmanager, closing + +import pysam + +from bcbio.pipeline.config_utils import load_config, get_program + +def main(bam_file, config_file=None, chrom='all', start=0, end=None, + outfile=None, normalize=False, use_tempfile=False): + if config_file: + config = load_config(config_file) + else: + config = {"program": {"ucsc_bigwig" : "wigToBigWig"}} + if outfile is None: + outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] + if start > 0: + start = int(start) - 1 + if end is not None: + end = int(end) + regions = [(chrom, start, end)] + if os.path.abspath(bam_file) == os.path.abspath(outfile): + sys.stderr.write("Bad arguments, input and output files are the same.\n") + sys.exit(1) + if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0): + if use_tempfile: + #Use a temp file to avoid any possiblity of not having write permission + out_handle = tempfile.NamedTemporaryFile(delete=False) + wig_file = out_handle.name + else: + wig_file = "%s.wig" % os.path.splitext(outfile)[0] + out_handle = open(wig_file, "w") + with closing(out_handle): + chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle, + normalize) + try: + if wig_valid: + convert_to_bigwig(wig_file, chr_sizes, config, outfile) + finally: + os.remove(wig_file) + +@contextmanager +def indexed_bam(bam_file, config): + if not os.path.exists(bam_file + ".bai"): + pysam.index(bam_file) + sam_reader = pysam.Samfile(bam_file, "rb") + yield sam_reader + sam_reader.close() + +def write_bam_track(bam_file, regions, config, out_handle, normalize): + out_handle.write("track %s\n" % " ".join(["type=wiggle_0", + "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0], + "visibility=full", + ])) + normal_scale = 1e6 + is_valid = False + with indexed_bam(bam_file, config) as work_bam: + total = sum(1 for r in work_bam.fetch() if not r.is_unmapped) if normalize else None + sizes = zip(work_bam.references, work_bam.lengths) + if len(regions) == 1 and regions[0][0] == "all": + regions = [(name, 0, length) for name, length in sizes] + for chrom, start, end in regions: + if end is None and chrom in work_bam.references: + end = work_bam.lengths[work_bam.references.index(chrom)] + assert end is not None, "Could not find %s in header" % chrom + out_handle.write("variableStep chrom=%s\n" % chrom) + for col in work_bam.pileup(chrom, start, end): + if normalize: + n = float(col.n) / total * normal_scale + else: + n = col.n + out_handle.write("%s %.1f\n" % (col.pos+1, n)) + is_valid = True + return sizes, is_valid + +def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None): + if not bw_file: + bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0]) + size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0]) + with open(size_file, "w") as out_handle: + for chrom, size in chr_sizes: + out_handle.write("%s\t%s\n" % (chrom, size)) + try: + cl = [get_program("ucsc_bigwig", config, default="wigToBigWig"), wig_file, size_file, bw_file] + subprocess.check_call(cl) + finally: + os.remove(size_file) + return bw_file + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-o", "--outfile", dest="outfile") + parser.add_option("-c", "--chrom", dest="chrom") + parser.add_option("-s", "--start", dest="start") + parser.add_option("-e", "--end", dest="end") + parser.add_option("-n", "--normalize", dest="normalize", + action="store_true", default=False) + parser.add_option("-t", "--tempfile", dest="use_tempfile", + action="store_true", default=False) + (options, args) = parser.parse_args() + if len(args) not in [1, 2]: + print "Incorrect arguments" + print __doc__ + sys.exit() + kwargs = dict( + outfile=options.outfile, + chrom=options.chrom or 'all', + start=options.start or 0, + end=options.end, + normalize=options.normalize, + use_tempfile=options.use_tempfile) + main(*args, **kwargs) diff --git a/bin/gaas_bed2wiggle.rb b/bin/gaas_bed2wiggle.rb new file mode 100755 index 000000000..d5a00affe --- /dev/null +++ b/bin/gaas_bed2wiggle.rb @@ -0,0 +1,32 @@ +input = File.open(ARGV.shift,"r") + +header = "track type=WIG name=\"FST Wiggle file\" description=\"FST data\"" +puts header + +current_scaffold = "none" + +last_pos = 1 + +while (line =input.gets) + + next unless line.include?("scaffold") + + scaffold,start,stop,score = line.strip.split("\t") + + next if scaffold == current_scaffold && start.to_i == last_pos + + last_pos = stop.to_i + + if scaffold != current_scaffold + puts "variableStep chrom=#{scaffold}" + current_scaffold = scaffold + end + + + puts "#{start}\t#{score}" + #puts "#{stop}\t#{score}" + + +end + +input.close diff --git a/bin/gaas_bed_create_random_feature.pl b/bin/gaas_bed_create_random_feature.pl new file mode 100755 index 000000000..693ef50b8 --- /dev/null +++ b/bin/gaas_bed_create_random_feature.pl @@ -0,0 +1,173 @@ +#!/usr/bin/env perl + +############################################# +# Jacques Dainat 2018 +############################################# + + +#libraries +use strict; +use warnings; +use Getopt::Long; +use IO::File; +use Pod::Usage; +use Carp; +use Bio::FeatureIO; +use GAAS::GAAS; + +# PARAMETERS - OPTION +my $header = get_gaas_header(); +my $opt_genome; +my $opt_sizeGenome; +my $opt_sizeGeneMAx; +my $opt_nb; +my $opt_output; +my $opt_help; + + +# OPTION MANAGMENT +if ( !GetOptions( 'g|genome|fa=s' => \$opt_genome, + 's|size=i' => \$opt_sizeGenome, + 'nbg|number_gene=i' => \$opt_nb, + 'sg|size_gene=i' => \$opt_sizeGeneMAx, + 'o|output=s' => \$opt_output, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! (defined($opt_genome)) and ! (defined($opt_sizeGenome)) ){ + pod2usage( { + -message => "\nAt least 1 parameter is mandatory:\nInput reference gff file (--f)\n\n". + "Many optional parameters are available. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 2 } ); +} + + + + +if ( ! (defined($opt_sizeGeneMAx))) { + print "you didnt define size of gene. We will use 1000 bp by default.\n"; + $opt_sizeGeneMAx=1000; +} +if ( ! (defined($opt_nb)) ){ + print "you didnt define number of gene. We will use 100 by default.\n"; + $opt_nb=100; +} + +my $output; +if ($opt_output) { + if (-f $opt_output){ + print "Cannot create a file with the name $opt_output because a file with this name already exists.\n";exit(); + } + open(my $fh, '>', $opt_output) or die "Could not open file '$opt_output' $!"; + $output= Bio::FeatureIO->new(-fh => $fh, -format => 'BED' ); +} +else{ + $output = Bio::FeatureIO->new(-fh => \*STDOUT, -format => 'BED' ); +} + + +my $seq_id; +if ( ! (defined($opt_genome))) { + $seq_id="chr_unknown"; +} +else{ + $seq_id=$opt_genome; +} + +for (my $i=0; $i <= $opt_nb; $i++) { + my $start=int(rand($opt_sizeGenome-$opt_sizeGeneMAx)); + my $end=$start+$opt_sizeGeneMAx; + + my $primary_tag="gene_invent".$i; + + my $random_strand = int(rand(2)); + + my $feature = Bio::SeqFeature::Annotated->new(-seq_id => $seq_id, -start => $start, -end => $end, -strand => $random_strand ) ; + $output->write_feature($feature); +} +print "FINISH !!\n"; + + +__END__ + +=head1 NAME + +gaas_create_random_feature.pl + +=head1 DESCRIPTION + +The script aims to create a fake bed file. + +=head1 SYNOPSIS + + gaas_create_random_feature.pl -g name -s 10000 -o + gaas_create_random_feature.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-g>, B<--genome> or B<--fa> + +STRING: Name to use for writing in first column of the bed file. default chr_unknown. + +=item B<-s>, B<--size> + +INTEGER: Genome size. It define the range where features will be created. + +=item B<--nbg>, B<--number_gene> + +INTEGER: Number of gene. It define the number of gene features to be created. + +=item B<--sg>, B<--size_gene> + +INTEGER: Size of genes. It define the size oft the gene features to be created. + +=item B<-o> or B<--output> + +STRING: Output file. If no output file is specified, the output will be written to STDOUT. The result is in tabulate format. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_blastp2grid.pl b/bin/gaas_blastp2grid.pl new file mode 100755 index 000000000..6edf150ef --- /dev/null +++ b/bin/gaas_blastp2grid.pl @@ -0,0 +1,328 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = "blastp_output"; +my $db = undef; +my $fasta = undef; +my $chunk_size = 500; +my $eval = 1e-5; +my $nb_seq = undef; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we + # don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( "h|help!" => \$help, + "fasta|f=s" => \$fasta, + "db=s" => \$db, + "chunk_size=i" => \$chunk_size, + "nb_seq=i" => \$nb_seq, + "eval" => \$eval, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($db) or ! defined($fasta) ){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\nInput fasta file (--fasta) and a database (--db)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ("blastp"); +foreach my $exe (@tools) { + check_bin($exe) == 1 or die "Missing executable $exe in PATH"; +} + +# .. Create output directory + +if ( -d $outdir ) { + msg( "Be careful, we are using an existinf Output directory $outdir. " . + "If you do not want that, you have to stop the job" ); +} +else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir"); +} + +# .. set up log file + +my $logfile = "$outdir/blastp2grid.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err ("Can't open logfile"); + +# .. Read protein fasta file. +my $inseq = Bio::SeqIO->new( -file => "<$fasta", -format => 'fasta' ); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while ( $seq = $inseq->next_seq() ) { + $counter += 1; + push( @seqarray, $seq ); + + if ( $counter == $chunk_size ) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk( $outfile, @seqarray ); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = + $outdir . "/chunk_" . + $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk( $outfile, @seqarray ); + +# Push all jobs into the command list +if ( !defined($nb_seq) ) { + for ( my $i = 1; $i <= $chunk_counter; $i++ ) { + my $cmd = + "blastp -evalue $eval -num_alignments 100000 " . + "-seg yes -outfmt 6 -db $db -query $outdir/chunk_$i.fa " . + "-out $outdir/chunk_$i.tab"; + push( @cmds, $cmd ); + } +} +else { + for ( my $i = 1; $i <= $chunk_counter; $i++ ) { + my $cmd = + "blastp -dbsize $nb_seq -evalue $eval " . + "-num_alignments 100000 -seg yes -outfmt 6 -db $db " . + "-query $outdir/chunk_$i.fa -out $outdir/chunk_$i.tab"; + push( @cmds, $cmd ); + } +} + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.tab>; + +foreach my $file (@files) { + system("cat $file >> $outdir/blastp.merged"); +} + +msg("Finished BLASTp grid run."); + +# -------------------- + +sub write_chunk +{ + my $outfile = shift; + my @seqs = @_; + my $seq_out = + Bio::SeqIO->new( -file => ">$outfile", -format => 'fasta' ); + foreach my $seq (@seqs) { $seq_out->write_seq($seq) } +} + +# -------------------- + +sub msg +{ + my $t = localtime; + my $line = "[" . $t->hms . "] @_\n"; + print LOG $line if openhandle( \*LOG ); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd +{ + msg( "Running:", @_ ); + system(@_) == 0 or err ( "Could not run command:", @_ ); +} + +# -------------------- + +sub check_bin +{ + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err +{ + $quiet = 0; + msg(@_); + exit(2); + +} + +__END__ + +=head1 NAME + +gaas_blastp2grid.pl - + +=head1 DESCRIPTION + +Chunk input data to run multiple blastp jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_blastp2grid.pl -f fasta_file --db db_name + gaas_blastp2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the protein fasta file to use as query. + +=item B<--db> + +The name of the database use to blast + +=item B<--nb_seq> + +The number of proteins contained in the db. Useful to cheat on +the database size. (OrthoMCL aggregation as example). If not +provided, the current database size is used. + +=item B<--eval> + +The maximu evalue of the sequences kept in the result + +=item B<--chunk_size> + +The number of sequence by job. If not provided, default size +will be 500. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_blat2grid.pl b/bin/gaas_blat2grid.pl new file mode 100755 index 000000000..2c2cf5460 --- /dev/null +++ b/bin/gaas_blat2grid.pl @@ -0,0 +1,289 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = "blat_output"; +my $db = undef; +my $fasta = undef; +my $chunk_size = 500; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( + "chunk_size=s" => \$chunk_size, + "f|fasta=s" => \$fasta, + "db=s" => \$db, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($db) or ! defined($fasta)){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\nInput fasta file (--fasta) and a database (--db)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ("blat" ); +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/blat2grid.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read protein fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + push(@seqarray,$seq); + + if ($counter == $chunk_size) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk($outfile,@seqarray); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk($outfile,@seqarray); + +# Push all jobs into the command list + +for (my $i=1;$i<=$chunk_counter;$i++) { + my $cmd = "blat -minIdentity=98 -minScore=80 $db $outdir/chunk_$i.fa $outdir/chunk_$i.psl"; + push(@cmds,$cmd); +} + + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.psl>; + +foreach my $file (@files) { + system("cat $file >> $outdir/blat.merged"); +} + +msg("Finished BLAT grid run."); + +# -------------------- + +sub write_chunk { + my $outfile = shift; + my @seqs = @_; + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_blat2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple blat jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_blat2grid.pl -f fasta_file --db db_name + gaas_blat2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the nucleotide/protein fasta file to use as query. + +=item B<--db> + +The name of the database use to blat + +=item B<--chunk_size> + +The number of sequence by job. If not provided, default size +will be 500. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_build_template_WA_1_0_X.rb b/bin/gaas_build_template_WA_1_0_X.rb new file mode 100755 index 000000000..ed4246760 --- /dev/null +++ b/bin/gaas_build_template_WA_1_0_X.rb @@ -0,0 +1,79 @@ +#!/usr/bin/ruby +# == NAME +# build.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# A script to build a new WebApollo template installation (version 1.0 and up) +# You only need to do this once - the new_species.rb script will then copy this template +# to create new build projects. +# +# == OPTIONS +# -h,--help:: Show help +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +options.clean = false +opts.on("-c","--[no]clean","Clean up project") { options.clean = true } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +### Usernames, passwords and locations + +user = ENV['USER'] +home = ENV['HOME'] +build_dir = ENV['APOLLO_BUILD_DIR'] or abort "Environment variable APOLLO_BUILD_DIR not set" + + +config = { + :web_apollo_source => "/big/webapollo/release/live",# The source code of webapollo + :web_apollo_build => "#{build_dir}/template", # The location where this WA project is to be build +} + +### File targets + +config_files = [ "sample_config.properties" , "sample_config.xml" , "sample_blat_config.xml", "sample_hibernate.xml" ] + +### The workflow + +if options.clean == true + system("rm -Rf #{config[:web_apollo_build]}") + +else + + File.directory?(config[:web_apollo_source]) or abort "Could not find the reference git clone (expected: #{config[:web_apollo_source]})" + abort "Template already exist. Use flag '--clean' to remove!" if File.directory?(config[:web_apollo_build]) + + # Create the folder where the data is to be stored + system("mkdir -p #{config[:web_apollo_build]}") + # Create a copy of the webapollo code for this installation + system("mkdir -p #{config[:web_apollo_build]}") + system("cp -R #{config[:web_apollo_source]}/* #{config[:web_apollo_build]}") + # Copy the template config files into place + config_files.each do |file| + system("cp #{config[:web_apollo_build]}/#{file} #{config[:web_apollo_build]}/#{file.gsub(/sample_/, '')}") + end + # Build the webapollo installation + Dir.chdir(config[:web_apollo_build]) do + system("./apollo deploy") + end + +end + diff --git a/bin/gaas_cp_all_links.sh b/bin/gaas_cp_all_links.sh new file mode 100755 index 000000000..81b6ed864 --- /dev/null +++ b/bin/gaas_cp_all_links.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# JD 2014 +# +#This script copy all the links of a directory and create the same link in the selected directory +#### + + + +# Arguments and Paths +############################################################################ + +if (( $# != 2 )) ; then + echo -e "The script needs 2 parameters: \n(1)Directory where all the link will be copied" + echo -e "(2)Directory where all the link will be pasted" + exit +fi + +DirPath=${1%/*} +if [[ $DirPath =~ [\/]$ ]];then + DirPath=$(echo ${DirPath::-1}) +fi + +IFS=$'\n' +for i in $(ls -l $1);do + if [[ $i =~ ^[l] ]];then + paired=$(echo $i | awk '{print $9" "$11}') + to=$(echo $paired | cut -d' ' -f1) + from=$(echo $paired | cut -d' ' -f2) + if [[ $from =~ ^[\/] ]];then + ln -s ${from} $2/$to + else + ln -s ${DirPath}/${from} $2/$to + fi + fi +done diff --git a/bin/gaas_create_annotation_project.pl b/bin/gaas_create_annotation_project.pl new file mode 100755 index 000000000..afbd876c2 --- /dev/null +++ b/bin/gaas_create_annotation_project.pl @@ -0,0 +1,123 @@ +#!/usr/bin/env perl + +# +#This script creates directories needed for a new project: +# +#NBIS 2018 +#Nima Rafati +use strict; +use Getopt::Long; +use Pod::Usage; +use File::Path; + + +my $logname = $ENV{ LOGNAME }; +my $help; +my $fasta = undef; +my $version = undef; +my $species = undef; +#my $annotation_root = "/projects/annotation"; +my $annotation_root = "~/test_annotation/"; +my $usage = qq{ +perl $0 + Getting help: + [-help] + Input: + [-s species_name] + [-g genome.fa] + [-v assembly version] +}; + +GetOptions( +"help" => \$help, +"s=s" => \$species, +"g=s" => \$fasta, +"v=i" => \$version); + +# Print Help and exit +if ($help) { + print $usage; + exit(0); +} +#check all parameters +if ( !( defined($fasta))){ + pod2usage( { -message => "\nA fasta file for genome assembly must be provided (-g)\n$usage", +-verbose => 0, +-exitval => 2 } ); +} +if ( !( defined($species))){ + pod2usage( { -message => "\nA species name must be provided (-s)\n$usage", +-verbose => 0, +-exitval => 2 } ); +} +if ( !( defined($version))){ + pod2usage( { -message => "\nVersion of the genome assembly must be provided (-v)\n$usage", +-verbose => 0, +-exitval => 2 } ); +} + +my $project_path = "$annotation_root/$species/"; +print "This project \" $species v.$version\" was created by \$logname = $logname on ". localtime().".\n You can find the working directory here:$project_path\n"; + +#/refseqs +#/repeats +#/RNAseq +#/ab-initio +#/ASSEMBLY_VERSION +#/maker +# +#/evidence_build +#/gene_build_ +# +#/tophat +#/cufflinks +#/rfam +#/webapollo_tracks +#/customer_data + +##Prepare fasta file +prepare_fasta($fasta); + +##Resources +make_dir($project_path, "Refseqs"); +make_dir($project_path, "EST"); +make_dir($project_path, "RNAseq"); +make_dir($project_path, "Genome"); +make_dir($project_path, "Mito"); +make_dir($project_path, "Delivery"); + +##Ab initio +make_dir($project_path, "ab-initio"); +make_dir("$project_path/ab-initio", "SNAP"); +make_dir("$project_path/ab-initio", "Augustus"); +make_dir("$project_path/ab-initio", "GeneMark_ET"); +make_dir("$project_path/ab-initio", "GeneMark_EP"); + +##Maker +make_dir($project_path, "Maker"); +make_dir("$project_path/Maker", "Evidence_build"); +make_dir("$project_path/Maker", "gene_build"); + +##Transcriptome assembly +make_dir($project_path, "RNAseq_alignment"); +make_dir($project_path, "Transcriptome_assembly"); +make_dir("$project_path/Transcriptome_assembly", "Genome_guided"); +make_dir("$project_path/Transcriptome_assembly", "Denovo"); + +sub prepare_fasta { + system("mv $fasta $project_path/Genome/"); + system("ln -s $project_path/Genome/$fasta $project_path/Genome/genome.fa"); +} + +sub make_dir { +# my $directory = $_[0],"/",$_[1]; + my ($tmp_path, $tmp_dir) = @_; + my $directory = "$tmp_path/$tmp_dir"; +# print "$directory"; ; + system("mkdir -p $directory"); +# mkdir $directory; +# unless(mkdir $directory) { +# unless(-e $directory or mkdir($directory,775)) { +# "Unable to create $directory\n"; +# } +} diff --git a/bin/gaas_create_annotation_project.rb b/bin/gaas_create_annotation_project.rb new file mode 100755 index 000000000..be66d4f65 --- /dev/null +++ b/bin/gaas_create_annotation_project.rb @@ -0,0 +1,121 @@ +#!/usr/bin/ruby +# == NAME +# create_annotation_project +# +# == USAGE +# ./create_annotation_project [ -h | --help ] +# [ -s | --species ] |[ -g | --genome ] | [ -v | --version ] | [ -f, --force ] +# == DESCRIPTION +# +# +# == OPTIONS +# -h,--help:: Show help +# -s,--SPECIES=SPECIES:: Species name in snake_case (e.g. homo_sapiens) [Required] +# -g,--genome=GENOME:: Genome fasta file [Required] +# -v,--version=VERSION:: Assembly version (if not specified, 1.0 is assumed) +# -f,--force=FORCE:: Force the creation of folder structure if it already exists (everything will be wiped, careful!) +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'rdoc/usage' +require 'optparse' +require 'ostruct' +require 'logger' +require 'fileutils' + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-h","--help","Display the usage information") {RDoc::usage} +opts.on("-s","--species", "=SPECIES","Species") {|argument| options.species = argument } +opts.on("-g","--genome", "=GENOME","Genome sequence") {|argument| options.genome = argument } +opts.on("-v","--version", "=VERSION","Version of genome assembly") {|argument| options.version = argument } +opts.on("-f","--force", "=FORCE","Force project creation if exists") {|argument| options.force = true } + +opts.parse! rescue RDoc::usage('usage') + +# Check the sanity of the settings + +abort "Must specify a species name in snake_case (--species)" unless options.species or options.species.match(/[a-z]*_[a-z]*/) == false +abort "Must specify a genome sequence (--genome)" unless options.genome + +unless options.version + warn "No assembly version specified (--version), assuming 1.0" + options.version = "1.0" +end + +# Set useful, derived variables + +WORK_DIR = Dir.getwd +FASTA_EXPLODE = "/sw/bioinfo/exonerate/fastaexplode" +FASTA_FORMATTER = "/sw/bioinfo/fastx-0.0.13/fasta_formatter" + +# Stupid hack to check whether genome sequence is here or a path on the system... +options.genome.include?("/") ? genome_location = options.genome : genome_location = "#{WORK_DIR}/#{options.genome}" +genome_name = genome_location.split("/")[-1] + +ANNOTATION_ROOT = "/projects/annotation" + +PROJECT_PATH = "#{ANNOTATION_ROOT}/#{options.species}" + +# Define the folder structure +# Indentation is meaningless, the order matters + +folders = [ + "#{PROJECT_PATH}/RNAseq" , + "#{PROJECT_PATH}/trinity" , + "#{PROJECT_PATH}/RNAseq/trimmed" , + "#{PROJECT_PATH}/RNAseq/trimmed/normalized" , + "#{PROJECT_PATH}/RNAseq/trimmed/normalized/C100/" , + "#{PROJECT_PATH}/v#{options.version}" , + "#{PROJECT_PATH}/v#{options.version}/cegma" , + "#{PROJECT_PATH}/v#{options.version}/genome" , + "#{PROJECT_PATH}/v#{options.version}/contigs" , + "#{PROJECT_PATH}/ab-initio" , + "#{PROJECT_PATH}/ab-initio/snap" , + "#{PROJECT_PATH}/ab-initio/augustus" , + "#{PROJECT_PATH}/v#{options.version}/maker" , + "#{PROJECT_PATH}/v#{options.version}/cufflinks" , + "#{PROJECT_PATH}/v#{options.version}/tophat" , + "#{PROJECT_PATH}/v#{options.version}/trinity" , + "#{PROJECT_PATH}/v#{options.version}/pasa" , + "#{PROJECT_PATH}/v#{options.version}/trnascan" , + "#{PROJECT_PATH}/v#{options.version}/rfam" , + "#{PROJECT_PATH}/refseqs" +] + +# Create project path (or not...) + +if File.directory?(PROJECT_PATH) and options.force.nil? + abort "Can't create project folder - already exists and --force not specified." +elsif File.directory?(PROJECT_PATH) and options.force + warn "Forced to create project path under #{PROJECT_PATH}, everything will be wiped!" + #FileUtils.rm_r(PROJECT_PATH) + #FileUtils.mkdir_p(PROJECT_PATH) + abort "This function is currently not available. Please remove manually and try again..." +else + warn "Creating annotation project under #{PROJECT_PATH}" + FileUtils.mkdir_p(PROJECT_PATH) +end + +FileUtils.cd(PROJECT_PATH) + +folders.each do |folder| + warn "Creating: #{folder}" + FileUtils.mkdir_p(folder) + system("touch #{folder}/00README") +end + +FileUtils.cp genome_location , "#{PROJECT_PATH}/v#{options.version}/genome/#{genome_name}" + +# Clean up the genome sequence + +system("#{FASTA_FORMATTER} -i #{PROJECT_PATH}/v#{options.version}/genome/#{genome_name} -w 80 -o #{PROJECT_PATH}/v#{options.version}/genome/genome.reformatted.fa") +system("#{FASTA_EXPLODE} -f #{PROJECT_PATH}/v#{options.version}/genome/genome.reformatted.fa -d #{PROJECT_PATH}/v#{options.version}/contigs/") + + diff --git a/bin/gaas_create_delivery_dir.sh b/bin/gaas_create_delivery_dir.sh new file mode 100755 index 000000000..29a67947b --- /dev/null +++ b/bin/gaas_create_delivery_dir.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# JD 2015 +# +#This script will soft-mask (lowerCase) a genome using a gff file as input +#### + + + +# Arguments and Paths +############################################################################ +dirname="" + +if (( $# != 1 )) ; then + echo -e "No directory name given. By default we will call it " + dirname="delivery" +else + if [[ $1 =~ \-?[hH]{1}elp$ || $1 =~ \-?[hH]{1}$ ]];then + echo -e "This script prepre the tree of directories for genome annotation results as well as a Readme file.\nCommand:\n========\nscript.sh [dirName]\nBy default the directory name is called " + exit + else + dirname=$1 + fi +fi + +if [[ -d $dirname ]];then + echo "file <$dirname> already exists"; exit; +else + mkdir -p "$dirname/metadata" + mkdir -p "$dirname/summary" + mkdir -p "$dirname/gff/repeats" + mkdir -p "$dirname/gff/lift-over" + mkdir -p "$dirname/gff/ncRNA" + mkdir -p "$dirname/gff/gene-build" + mkdir -p "$dirname/gff/transcript" + mkdir -p "$dirname/fasta/transcript" + + #write Readme + echo -e "* NBIS Genome Annotation Platform team *" >> $dirname/ReadMe.txt + mydate=$(date) + echo -e "$mydate\nPlease find here an overview about data available.\n" >> $dirname/ReadMe.txt + echo -e " file => A complete report of work/process performed." >> $dirname/ReadMe.txt + echo -e "================================\n" >> $dirname/ReadMe.txt + echo -e " directory => It contains data in FASTA format." >> $dirname/ReadMe.txt + echo -e "================= file => It contains cds of annotated genes." >> $dirname/ReadMe.txt + echo -e " file => It contains the proteins produced by the annotated genes." >> $dirname/ReadMe.txt + echo -e " sub directory" >> $dirname/ReadMe.txt + echo -e " =========== is the output of our Trinity pipeline used as inout for Maker.\n" >> $dirname/ReadMe.txt + echo -e " directory => It contains data in gff3 format." >> $dirname/ReadMe.txt + echo -e "===============\n" >> $dirname/ReadMe.txt + echo -e " sub directory\n" >> $dirname/ReadMe.txt + echo -e " =========== Are the “release candidate” annotation for the species. The higher release candidate corresponds to the most successful achievement.\n" >> $dirname/ReadMe.txt + echo -e " sub directory (depending on customer demand)" >> $dirname/ReadMe.txt + echo -e " ============ It is the lift-over of a reference species genome annotation (from Ensembl) on the genome under investigation.\n" >> $dirname/ReadMe.txt + echo -e " sub directory" >> $dirname/ReadMe.txt + echo -e " ======= It contains annotation of non-coding element annotated using the Eucaryotes data from Rfam database." >> $dirname/ReadMe.txt + echo -e " <*_tRNA.gff> It contains transfert RNA annotated using tRNAscan.\n" >> $dirname/ReadMe.txt + echo -e " sub directory" >> $dirname/ReadMe.txt + echo -e " ======= It contains repeats annotated thanks to repeatmasker." >> $dirname/ReadMe.txt + echo -e " It contains repeats annotated thanks to repeatrunner.\n" >> $dirname/ReadMe.txt + echo -e " sub directory" >> $dirname/ReadMe.txt + echo -e " =========== contains output of our Cufflinks pipeline used as input for Maker. (Depending of RNAseq data types, and the most appropriate Transcriptome assembly method chosen.)" >> $dirname/ReadMe.txt + echo -e " is the gff output created by Maker using the Trinity_*_.fasta file as input. (Depending of RNAseq data types, and the most appropriate Transcriptome assembly method chosen.)" >> $dirname/ReadMe.txt + echo -e " <*> Depending of RNAseq data types, and the most appropriate Transcriptome assembly method chosen.\n" >> $dirname/ReadMe.txt + echo -e " directory" >> $dirname/ReadMe.txt + echo -e "========== Tab-delimited format file (2 rows) containing GO terms of functions retrieved for each transcript." >> $dirname/ReadMe.txt + echo -e " Tab-delimited format file (2 rows) containing pram terms of functions retrieved for each transcript." >> $dirname/ReadMe.txt + echo -e " Tab-delimited format file (2 rows) containing interpro terms of functions retrieved for each transcript.\n" >> $dirname/ReadMe.txt + echo -e " directory" >> $dirname/ReadMe.txt + echo -e "========= General information about annotated coding genes" >> $dirname/ReadMe.txt + echo -e " General information about annotated tRNA" >> $dirname/ReadMe.txt + + echo -e "Well done ! Directories and ReadMe have been created. Think to check The ReadMe..." + +fi + diff --git a/annotation/Tools/Manager/create_preautomated_report.pf b/bin/gaas_create_preautomated_report.pl similarity index 100% rename from annotation/Tools/Manager/create_preautomated_report.pf rename to bin/gaas_create_preautomated_report.pl diff --git a/annotation/Tools/Util/fasta/fasta_filter_by_accession.rb b/bin/gaas_cufflinks2hints.rb similarity index 56% rename from annotation/Tools/Util/fasta/fasta_filter_by_accession.rb rename to bin/gaas_cufflinks2hints.rb index 39a464b74..b1ae4d5c0 100755 --- a/annotation/Tools/Util/fasta/fasta_filter_by_accession.rb +++ b/bin/gaas_cufflinks2hints.rb @@ -1,17 +1,16 @@ #!/usr/bin/ruby # == NAME -# this_script.rb +# cufflinks2hints.rb # # == USAGE # ./this_script.rb [ -h | --help ] # [ -i | --infile ] |[ -o | --outfile ] | # == DESCRIPTION -# +# Converts a Cufflinks-formatted GTF file into Augustus compatible exon hints # # == OPTIONS # -h,--help:: Show help -# -f,--fasta=FASTA:: FASTA file to filter -# -k,--killlist=KILLLIST:: List of accession numbers to kill from file +# -i,--infile=INFILE:: input file # -o,--outfile=OUTFILE:: output file # @@ -20,38 +19,41 @@ # == AUTHOR # Marc Hoeppner, mphoeppner@gmail.com -require 'rubygems' -require 'bio' require 'rdoc/usage' require 'optparse' require 'ostruct' require 'logger' + ### Define modules and classes here ### Get the script arguments and open relevant files - options = OpenStruct.new() opts = OptionParser.new() opts.on("-h","--help","Display the usage information") {RDoc::usage} -opts.on("-k","--killlist", "=KILLLIST","Killlist") {|argument| options.kill = argument } -opts.on("-f","--fasta", "=FASTA","FASTA file") {|argument| options.fasta = argument } +opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } opts.on("-o","--outfile", "=OUTFILE","Output") {|argument| options.outfile = argument } opts.parse! rescue RDoc::usage('usage') options.outfile ? output_stream = File.new(options.outfile,'w') : output_stream = $stdout -list = IO.readlines(options.kill).collect{|e| e.strip } -ff = Bio::FastaFormat.open(options.fasta) - -ff.each_entry do |entry| +IO.readlines(options.infile).each do |line| + + next unless line.include?("Cufflinks") + + elements = line.strip.split("\t") + + seq_region,from,to,strand = elements[0],elements[3],elements[4],elements[6] + transcript_id = elements[-1].split(";").find{|e| e.include?("transcript_id")}.gsub(/\"/, '').split(" ")[1] + + output_stream.puts "#{seq_region}\tb2h\tep\t#{from}\t#{to}\t.\t#{strand}\t.\tgrp=#{transcript_id};pri=4,src=W" + + +end - next if list.include?(entry.definition.strip.split(" ")[0]) +output_stream.close - output_stream.puts entry.to_s -end -output_stream.close diff --git a/bin/gaas_embl2gb.pl b/bin/gaas_embl2gb.pl new file mode 100755 index 000000000..ad7318e06 --- /dev/null +++ b/bin/gaas_embl2gb.pl @@ -0,0 +1,118 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Bio::SeqIO; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outfile = undef; +my $embl = undef; +my $help; + +if( !GetOptions( + "help" => \$help, + "embl=s" => \$embl, + "outfile|output|o|out|gb=s" => \$outfile)) +{ + pod2usage( { -message => "$header\nFailed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! (defined($embl)) ){ + pod2usage( { + -message => "$header\nMissing the --embl argument", + -verbose => 0, + -exitval => 1 } ); +} + +## Manage output file +my $gb_out; +if ($outfile) { +open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; + $gb_out= Bio::SeqIO->new(-fh => $fh, -format => 'genbank'); +} +else{ + $gb_out = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'genbank'); +} + +### Read gb input file. +my $embl_in = Bio::SeqIO->new(-file => $embl, -format => 'embl'); + + +### MAIN ### + +while( my $seq = $embl_in->next_seq) { + $gb_out->write_seq($seq) +} + +__END__ + +=head1 NAME + +gaas_embl2gb.pl + +=head1 DESCRIPTION + +The script take a EMBL file as input, and will translate it in Genbank format. + +=head1 SYNOPSIS + + gaas_embl2gb.pl --embl=infile.gff [ -o outfile ] + +=head1 OPTIONS + +=over 8 + +=item B<-embl> + +Input EMBL file that will be read + +=item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--gb> + +Output Genbank file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_eugene2maker.rb b/bin/gaas_eugene2maker.rb new file mode 100755 index 000000000..268ac0cfa --- /dev/null +++ b/bin/gaas_eugene2maker.rb @@ -0,0 +1,31 @@ + +tcounter = 10000 +ccounter = 10000 + +this_mrna = nil + +lines = IO.readlines(ARGV.shift) + +lines.each do |line| + + elements = line.strip.split("\t") + + if line.include?("EuGene") && line.include?("mRNA") || line.include?("CDS") + + elements = line.strip.split("\t") + features = {} + elements[-1].split(";").each{|e| features[e.split("=")[0]] = e.split("=")[1]} + + + if elements[2] == "mRNA" + tcounter += 1 + puts "#{elements[0]}\teugene\tmatch\t#{elements[3]}\t#{elements[4]}\t#{elements[5]}\t#{elements[6]}\t.\tID=#{features['ID']};Name=Transcript#{tcounter}" + this_mrna = features["ID"] + elsif elements[2] = "CDS" + ccounter += 1 + puts "#{elements[0]}\teugene\tmatch_part\t#{elements[3]}\t#{elements[4]}\t#{elements[5]}\t#{elements[6]}\t.\tID=#{features['ID']};Name=CDS#{ccounter},Parent=#{this_mrna}" + end + + end + +end diff --git a/bin/gaas_exonerate2grid.pl b/bin/gaas_exonerate2grid.pl new file mode 100755 index 000000000..f9257f154 --- /dev/null +++ b/bin/gaas_exonerate2grid.pl @@ -0,0 +1,276 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +use Bio::SeqFeature::Generic; +use Bio::Tools::GFF; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $gff_formatter = Bio::Tools::GFF->new(-gff_version => 3); + +my $outdir = "exonerate_output"; +my $genome = undef; +my $proteins = undef; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my @annotations = (); # Stores Rfama annotations as hashes +my $help; + +if ( !GetOptions( + "help|h!" => \$help, + "genome|g|fasta|f=s" => \$genome, + "protein|p=s" => \$proteins, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined( $genome ) ){ + pod2usage( { + -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH +my @tools = ( "exonerate" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/exonerate_search.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for grid\n"); + +my $seq; + +my $seq_counter = 0; + +while( $seq = $inseq->next_seq() ) { + $seq_counter += 1; + my $outfile = $outdir . "/" . $seq->display_id . ".fasta" ; # We could also use the display_id, but this can cause trouble with special characters + my $seq_out = Bio::SeqIO->new(-file => ">$outfile" , -format => 'fasta'); + $seq_out->write_seq($seq); + my $command = "exonerate --showtargetgff --refine region --model protein2genome --percent 60 $proteins $outfile > $outfile.exonerate 2> /dev/null" ; + push(@cmds,$command); +} + +# Submit job chunks to grid + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, merging of output and printing gff + +msg("Merging output and writing GFF file"); + +my @files = <$outdir/*.exonerate>; + +my $outfile = $outdir . "/exonerate_annotations.gff"; +open (my $OUT, '>', $outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; + +foreach my $file (@files) { + + open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; + + while (<$IN>) { + chomp; + my $line = $_; + next if ($line =~ /^#.*/); # Skipping comment lines + + print $OUT $line , "\n"; + + } +} + +close($OUT); + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_exonerate2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple exonerate jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_exonerate2grid.pl -f genome.fasta -o outdir + gaas_exonerate2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--genome>, B<--fasta>, B<-f> or B<-g> + +The name of the genome fasta file to use as target. + +=item B<--protein> + +The name of the protein file to use as query. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_bionano_filter_not_scaffolded_part.pl b/bin/gaas_fasta_bionano_filter_not_scaffolded_part.pl new file mode 100755 index 000000000..ed22ca967 --- /dev/null +++ b/bin/gaas_fasta_bionano_filter_not_scaffolded_part.pl @@ -0,0 +1,223 @@ +#!/usr/bin/env perl + +use Carp; +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; +use Bio::SeqIO; +use IO::File; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outfile = undef; +my $file1 = undef; +my $agp = undef; +my $verbose= undef; +my $help= 0; + +if ( !GetOptions( + "help|h" => \$help, + "fasta|file|f=s" => \$file1, + "a|agp=s" => \$agp, + "output|outfile|out|o=s" => \$outfile)) + +{ + pod2usage( { -message => "$header"."Failed to parse command line.", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -message => "$header\n", + -verbose => 99, + -exitval => 0 } ); +} + +if ( ! ((defined($file1)) and (defined($agp)))){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory.\n", + -verbose => 0, + -exitval => 1 } ); +} + +###################### +# Manage output file # +my $fastaout; +if ($outfile) { + $outfile=~ s/.fasta//g; + $outfile=~ s/.fa//g; +open(my $fh, '>', $outfile.".fa") or die "Could not open file '$outfile' $!"; + $fastaout= Bio::SeqIO->new(-fh => $fh , -format => 'Fasta'); +} +else{ + $fastaout = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); +} + + + ##################### + # MAIN # + ##################### + +###################### +### Parse AGP input # +my @list_contig_used; +print "Reading file $agp\n"; +if (open(my $fh, '<:encoding(UTF-8)', $agp)) { + while (my $row = <$fh>) { + if($row =~ /^#/){next;} + chomp $row; + my ($object, $object_beg, $object_end, $part_number, $component_type, $component_id_or_gap_length, + $component_beg_or_gap_type, $component_end_or_linkage, $orientation_or_linkage_evidence ) = split(/\t/, $row); + if($component_type ne "N" and $component_type ne "U"){ + if ($object =~ /^Super-Scaffold/ ){ + #print $component_id_or_gap_length."\n"; + push (@list_contig_used, $component_id_or_gap_length); + } + } + } +} +else { + warn "Could not open file $agp $!"; +} + +print "Parsing Finished\n"; +### END Parse AGP input # +######################### + +# primary contig header: >004069F|arrow|arrow_obj +# alternative contig header: >000019F-023-01|arrow|arrow_obj +# If piece used not all contig: >000838F|arrow|arrow_subseq_300884:420004 => To keep + + +#Go all over fasta1 and skip sequence to exclude +my $fasta1 = Bio::SeqIO->new(-file => $file1 , -format => 'Fasta'); +my $nbRemoved=0; +while ( my $seq = $fasta1->next_seq() ) { + my $header = $seq->id; + if( $header =~ m/-/ ){ #its from alternative + #print "alternative: $header\n"; + if( $header =~ m/subseq/ ){ + #print "Subsequence we have to keep it\n"; + if ( $header =~ m/(.+?(?=-))/ ) { + #print "header1= $header $1\n"; + if ( grep( /^\Q$1\E\|/, @list_contig_used ) ) { + my @res = grep( /^\Q$1\E\|/, @list_contig_used ); + #print "match in AGP: @res\n"; + if(@res > 1 ){ + print "header used severeal times in the agp file!!! \n"; + } + else{ + print "The primary version of this contig (@res) has been already taken into the hybrid_Assembly.fasta output. No need to include it \n" if($verbose); + } + } + else{ + print "subseq of alternative contigs $header has to be included into the final assembly. Indeed the primary version of this one has not been included\n"; + $fastaout->write_seq($seq); + } + } + } + } + else{ #its from primary + if( $header =~ m/subseq/ ){ + print "Subsequence we have to keep it\n" if ($verbose); + $fastaout->write_seq($seq); + } + else{ + if ( $header =~ m/(.+?(?=_obj))/ ) { + #print "header= $header $1\n"; + if ( grep( /^\Q$1\E/, @list_contig_used ) ) { + my @res = grep( /^\Q$1\E/, @list_contig_used ); + print "match in AGP: @res\n"; + if(@res > 1 ){ + print "header used severeal times in the agp file!!! Need to implement how to deal with this case. A loop will be enough...\n"; + } + else{ + print "header used in AGP: @res \n"; + } + } + else{ + $fastaout->write_seq($seq); + } + } + else{ + print "not match for <_obj> at the end of the string\n"; + } + } + } +} + + + +__END__ + +=head1 NAME + +gaas_fasta_bionano_filter_not_scaffolded_part.pl + +=head1 DESCRIPTION + +This script aims to filter the NOT_SCAFFOLDED.fasta file from bionano output in order to remove redundant part from secondary assembly. Indeed the NOT_SCAFFOLDED.fasta file is a mixup of the primary and the secondary assembly. + +Is not included in the output: + - piece of the secondary assembly (they are cut in pieces when a piece is used into a scaffold), when the corresponding sequence from the primary assembly is already used. + - contig of the secondary assembly + - contig of the primary assembly if the counterpart of the secondary assembly is already used into a scaffold. + +=head1 SYNOPSIS + + gaas_fasta_bionano_filter_not_scaffolded_part.pl my_script.pl --fasta1 file1 -a agp [--out outfile] + gaas_fasta_bionano_filter_not_scaffolded_part.pl my_script.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta1>, B<--file1> or B<-f1> + +Fasta file 1. The headers of sequences of this file will be used to compare against those to file 2. + +=item B<-a>, B<--agp> or B<-f2> + +This is a file containing the headers of sequence to be removed. Only one ID per line. Header should be identical at 100% to be removed. + +=item B<-o> , B<--output> , B<--out> or B<--outfile> + +Output fasta file. If no output file is specified, the output will be +written to STDOUT. + +=item B<--help> or B<-h> + +Getting help. +Display the full information. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_checkProteins.pl b/bin/gaas_fasta_checkProteins.pl new file mode 100755 index 000000000..cba6685f0 --- /dev/null +++ b/bin/gaas_fasta_checkProteins.pl @@ -0,0 +1,166 @@ +#!/usr/bin/env perl + +############################################# +# checkProtein.pl - Jacques Dainat 12/2014 # +############################################# + +use strict; +use warnings; +use Getopt::Long; +use IO::File; +use Pod::Usage; + +#VERIABLE DECLARATION +my $opt_reffile; +my $opt_help; +my $opt_output=undef; +my $nbProt=0; + +# OPTION MANAGMENT +if ( !GetOptions( 'f|ref|reffile=s' => \$opt_reffile, + 'o|output=s' => \$opt_output, + 'h|help!' => \$opt_help ) ) + +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 2, + -exitval => 0 } ); +} + +if ( ! (defined($opt_reffile)) ){ + pod2usage( { + -message => "\nAt least 1 parameter is mandatory:\nInput fasta file (--f)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +### MANAGE OUTPUT +my $ostream = IO::File->new(); +if ($opt_output){ + $ostream->open( $opt_output, 'w' ) or + croak( + sprintf( "Can not open '%s' for writing %s", $opt_output, $! ) + ); +} +else{ + + $ostream->fdopen( fileno(STDOUT), 'w' ) or + croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); + +} +my $output= $ostream ; + +### READ FILE +if (! (-f $opt_reffile)){ + print "File doesnot exist\n"; exit; +} + +open(FIC,$opt_reffile) or die "Couldn't open the file $opt_reffile\n"; + +my $header; my $sequence; my $firstHeaderRead="no"; +my $nbProtWithStop=0; my $nbProtWithoutStop=0; my $nbProtWithStart=0; my $nbProtWithoutStart=0; my $nbProtWithStopStart=0; my $nbProtWithoutStartStop=0; my $specialStart=0; +my $nbProtWithStartWithoutStop=0;my $nbProtWithoutStartWithStop=0; +while( my $line = ) { + chomp($line) ; + if($line =~ m/^>/){ + $nbProt++; + if ($firstHeaderRead eq "yes"){ # Allow to avoid if comment line at the beginning of the file + if ($sequence =~ m/^L/){ + $specialStart++; + } + + if ($sequence =~ m/^M/){ + $nbProtWithStart++; + if ( $sequence =~ m/[\.X\*]$/ ){ + $nbProtWithStop++; + $nbProtWithStopStart++; + } + else{$nbProtWithStartWithoutStop++;$nbProtWithoutStop++;} + + } + else{$nbProtWithoutStart++; + if ( $sequence =~ m/[\.X\*]$/) { + $nbProtWithStop++; + $nbProtWithoutStartWithStop++; + } + else{$nbProtWithoutStartStop++;$nbProtWithoutStop++;} + } + + $sequence=""; + } + $header=$line; + $firstHeaderRead="yes"; + } + elsif($firstHeaderRead eq "yes"){ + $sequence.=$line; + + } +} +# Check last protein read +if ($sequence =~ m/^L/){ + $specialStart++; +} +if ($sequence =~ m/^M/){ + $nbProtWithStart++; + if ( $sequence =~ m/[\.X\*]$/ ) { + $nbProtWithStop++; + $nbProtWithStopStart++; + } + else{$nbProtWithStartWithoutStop++;$nbProtWithoutStop++;} +} +else{$nbProtWithoutStart++; + if ( $sequence =~ m/[\.X\*]$/ ) { + $nbProtWithStop++; + $nbProtWithoutStartWithStop++; + } + else{$nbProtWithoutStartStop++;$nbProtWithoutStop++;} +} + +my $Result; +$Result = "\nWe checked $nbProt Proteins:\n"; +$Result .= "M....? We have $nbProtWithStart proteins with a start at the first position \n"; +$Result .= "?....X We have $nbProtWithStop proteins with a stop at the last position \n"; +$Result .= "M....X We have $nbProtWithStopStart proteins with a start at the first position and a stop at the last position (Correct Proteins !!)\n\n"; +$Result .= ".....? We have $nbProtWithoutStart proteins without a start at the first position \n"; +$Result .= "?..... We have $nbProtWithoutStop proteins without a stop at the last position \n"; +$Result .= "...... We have $nbProtWithoutStartStop proteins without a start at the first position and without stop at the last position\n\n"; +$Result .= "M..... start wihtout stop= $nbProtWithStartWithoutStop\n"; +$Result .= ".....X stop wihtout start= $nbProtWithoutStartWithStop\n"; +print "$Result" if($opt_output); +print $output $Result; + +my $prop = ($specialStart*100)/$nbProtWithoutStart; +print "special start (L) $specialStart corresponding to $prop% of sequence without start. If this value is close to 10% it should correspond to the number we can find randomly (Mean that prediction didn't take this potential start codon in account.)\n"; +__END__ + +=head1 NAME + +checkProteins.pl - +The script take a fasta file as input. - +It will check the presence of Start (M in first position) and Stop (. or X or * at last position) of each sequence. + +=head1 SYNOPSIS + + ./checkProteins.pl -f=infile.fa [ -o outfile ] + ./checkProteins.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f>, B<--reffile> or B<-ref> + +Input fasta file that will be read. In general come from gffread output. + +=item B<-o> or B<--output> + +By default the result is written on screen at te fly. If you give an output it will writte the report in this file. + +=back + +=cut \ No newline at end of file diff --git a/bin/gaas_fasta_cleaner.pl b/bin/gaas_fasta_cleaner.pl new file mode 100755 index 000000000..bec0175f1 --- /dev/null +++ b/bin/gaas_fasta_cleaner.pl @@ -0,0 +1,203 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use POSIX qw(strftime); +use Getopt::Long; + +my $start_run = time(); +my $file_fasta; +my $outfile; +my $help = 0; +my $verbose = 0; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my @copyARGV=@ARGV; +Getopt::Long::Configure ('bundling'); + +if ( !GetOptions( + "help|h" => \$help, + "fasta|fa|f=s" => \$file_fasta, + "v!" => \$verbose, + "output|outfile|out|o=s" => \$outfile)) + +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( !(defined($file_fasta)) ){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\n Input fasta file (--fasta)\n\n", + -verbose => 0, + -exitval => 1 } ); +} + +open my $fh, $file_fasta or die "Could not open $file_fasta: $!"; + +#OUTPUT +my $fho; +if ($outfile) { + open($fho, '>', $outfile) or die "Could not open file '$outfile' $!"; + } +else{ + $fho = *STDOUT; +} + +# To follow progression + my $startP=time; + my $nbLine=`grep -c ">" $file_fasta`; + $nbLine =~ s/ //g; + chomp $nbLine; + print "$nbLine sequence to process...\n"; + my $line_cpt=0; + +######### +#MAIN + +my $first_line = <$fh>; +chomp($first_line); + +if ( $first_line !~ /^>/ ) { + die( sprintf( "This does not look like fasta formatted input:\n%s\n", + $first_line ) ); +} + +my %parsers = ( + 'null' => sub { + my ($line) = @_; + return $line; + }, + + 'uniprot' => sub { + my ($line) = @_; + + + if ( $line =~ /^>(?:sp|tr)\|([^|]+).*PE=(\d+) SV=(\d+)/ ) { + $line_cpt++; + return sprintf( ">%s.%d %d", $1, $3, $2 ); + } + else { + $line =~ tr/O/K/; + } + + return $line; + }, + 'refseq' => sub { + my ($line) = @_; + + if ( $line =~ /^>gi/ ) { + $line_cpt++; + return sprintf( ">%s", [ split( /\|/, $line ) ]->[3] ); + } + + return $line; + } ); + +my $parser = 'null'; + +if ( $first_line =~ /^>(?:sp|tr)/ ) { $parser = 'uniprot'; } +elsif ( $first_line =~ /^>gi/ ) { $parser = 'refseq'; } + +print $fho ( $parsers{$parser}($first_line), "\n" ); + +while ( my $line = <$fh> ) { + chomp($line); + print $fho $parsers{$parser}($line), "\n"; + + #Display progression + if ((30 - (time - $startP)) < 0) { + my $done = ($line_cpt*100)/$nbLine; + $done = sprintf ('%.0f', $done); + print "\rProgress : $done %"; + $startP= time; + } +} + +#END +print "usage: $0 @copyARGV\n"; +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; + +__END__ + + +=head1 NAME + +gaas_fasta_cleaner.pl + +=head1 DESCRIPTION + +A filter for Uniprot and RefSeq fasta files that makes the fasta +headers a bit more terse. Reads from STDIN, writes to STDOUT. + +Note: Will pass any other fasta file unchanged. +Note: For Uniprot, will also change any 'O' in the protein sequence + into 'K'. + +=head1 SYNOPSIS + + gaas_fasta_cleaner.pl -f infile.fasta [ -o outfile ] + gaas_fasta_cleaner.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f>, B<--fa> or B<--fasta> + +Input fasta file. + +=item B<-v> + +Add verbosity + +=item B<-o>, B<--output>, B<--outfile> or B<--out> + +Output fasta file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_domain_extractor.pl b/bin/gaas_fasta_domain_extractor.pl new file mode 100755 index 000000000..b3c7e6136 --- /dev/null +++ b/bin/gaas_fasta_domain_extractor.pl @@ -0,0 +1,178 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use IO::File; +use GAAS::GAAS; + +my $header_nbis = get_gaas_header(); +my $inputFile; +my $outputFile; +my $nameSeq; +my $start; +my $end; +my $opt_help = 0; + +Getopt::Long::Configure ('bundling'); +if ( !GetOptions ( 'i|f|fasta|input_file=s' => \$inputFile, + 'n|name=s' => \$nameSeq, + 'o|output=s' => \$outputFile, + 's|start=i' => \$start, + 'e|end=i' => \$end, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 2, + -exitval => 0, + -message => "$header_nbis\n" } ); +} + +if ((!defined($inputFile)) || (!defined($start)) || (!defined($end)) ){ + pod2usage( { -message => '$header_nbis\nAt least 3 parameters are mandatory: -i -s and -e', + -verbose => 0, + -exitval => 1 } ); +} + +my $ostream = IO::File->new(); +my $ref_istream = IO::File->new(); + +# Manage input fasta file +$ref_istream->open( $inputFile, 'r' ) or + croak( + sprintf( "Can not open '%s' for reading: %s", $inputFile, $! ) ); + +# Manage Output +if(defined($outputFile)) +{ +$ostream->open( $outputFile, 'w' ) or + croak( + sprintf( "Can not open '%s' for reading: %s", $outputFile, $! ) ); +} +else{ + $ostream->fdopen( fileno(STDOUT), 'w' ) or + croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); +} + +my $nbSeq=0; +my $seq=""; +my $headerCurrent=""; +my $header=""; +my $headerFound="no"; +while (my $line = readline(*$ref_istream)) { + $line=~ s/\s//g; + if($line =~ m/^>/){ + $nbSeq++; $header=$headerCurrent; $headerCurrent=$line; + if( ($nbSeq > 1) && (!defined($nameSeq)) ){ + print "The input file is an MultiFasta file.\nPlease specify the name of the sequence you are interested in (-n )\n";exit; + } + elsif( ($nbSeq > 1) && (defined($nameSeq)) ) { + if(($nameSeq eq $header) or (">".$nameSeq eq $header)){ + $headerFound="yes"; + last; + } + $seq=""; + } + } + else{ + if($nbSeq > 0){ #first sequence encountered. We can begin to collect sequence data + $seq.=$line; + } + } +} +$header=$headerCurrent; +if($headerFound eq "no"){ + if( ($nbSeq > 1) && (!defined($nameSeq)) ){ + print "The input file is an MultiFasta file.\nPlease specify the name of the sequence you are interested in (-n )\n";exit; + } + elsif( ($nbSeq > 1) && (defined($nameSeq)) ){ + if(($nameSeq eq $header) or (">".$nameSeq eq $header)){ + $headerFound="yes"; + } + } + elsif ($nbSeq == 1){ + $header=$headerCurrent; + } +} + +if( ($nbSeq > 1) && (defined($nameSeq)) && ($headerFound eq "no") ){ + print "The header you specified >$nameSeq< doesn't exist in this MultiFasta file.\nPlease check it.\n";exit; +} + +print "Name studied sequence: $header\n"; +#print "sequence: $seq\n"; +if($start<0 || $end <0){print "Start and End cannot be a negative value!\n"; exit;} +if(length($seq) < $start){print "Start position for extraction is over the sequence size !\n"; exit;} +if(length($seq) < $end){print "End position for extraction is over the sequence size !\n"; exit;} +#end is 1-based coordinate system and 0-based coordinate system +#start is 1-based coordinate system +# The extraction compute in 0-based coordinate system +# Lets change the 1-based coordinate system in 0-based coordinate system for the start +$start=$start-1; +my $lengtExtraction=$end-$start; #Length in 0-based coordinate (in 1-based coordinate we must add +1) +print "Length sequence extracted: $lengtExtraction\n"; +my $extractedPart=substr($seq, $start, $lengtExtraction); +if ($outputFile){ + print $ostream $extractedPart; +} +else{ + print "Sequence extracted: $extractedPart\n"; +} + +__END__ + +=head1 NAME + +domainExtractor_JD.pl - +The script allows to extract a part of a AA or nt sequence. +The script takes as input a (multi)fasta file and coordinates of part that you want extract. +If the Input file is a MultiFastaFile you have to specify to the script the header of the sequence you want to extract. +NOTE: The script expect the use of 1-based coordinate system. So, -s 1 -e 1 extract the first AA/nt +/!\ Some file formats are 1-based (GFF, SAM, VCF) and others are 0-based (BED, BAM) +/!\ Ensembl uses 1-based coordinate system when UCSC uses 0-based coordinate system +/!\ Be aware of what kind of coordinate you are using as input. +Rule of coordinate system + 1-based coordinate system = Numbers nucleotides directly + 0-based coordinate system = Numbers between nucleotides + +=head1 SYNOPSIS + + fasta_domain_extractor.pl -i -s -e [-o -n ] + fasta_domain_extractor.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-i>, B<--file> or B<-ref> + +Input fasta file that will be read. + +=item B<-s> or B<--start> + +Start coordinate of the region that will be extract + +=item B<-e> or B<--end> + +End coordinate of the region that will be extract + +=item B<-n> or B<--name> + +In Multifasta file case, the name allows to specify which sequence you are interested in. + +=item B<-o> or B<--output> + +Output file. If no output file is specified, the output will be +written to STDOUT. + + + +=back + +=cut diff --git a/bin/gaas_fasta_extractFaFromMultiFa.sh b/bin/gaas_fasta_extractFaFromMultiFa.sh new file mode 100755 index 000000000..aa38ec5e0 --- /dev/null +++ b/bin/gaas_fasta_extractFaFromMultiFa.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# 10/2014 +# Jacques Dainat +# jacques.dainat@nbis.se +# + +#ARGS in command line +if (( $# < 2 )); then + echo -e "This script allows to extract easily and efficiently the sequence(s) from multi-fasta-file\n" + echo -e "The script needs 2 parameters" + echo -e "[usage: script.sh multiFastaFile.fa [HeaderName or HeaderNameFile] [m f] ]" + echo -e "In the case of HeaderNameFile, this file must contains 1 header per line" + echo -e "m <= term match\nf <= line finish by this term" + exit +else + multiFasta=$1 + name=$2 + matchType=$3 +fi + +if [[ $matchType == "" ]];then + echo -e "By default we are looking for matching term" + matchType="m" +fi + +#PREPARE REGEX +regex="$name"; +if [[ $matchType == "m" ]];then + regex="$name" +elif [[ $matchType == "f" ]];then + regex="${name}\$" +fi + +if [[ ! -e $name ]];then + nbOcc=$(grep -c "$regex" $multiFasta) + if [[ $nbOcc == 0 ]];then + echo "No match found"; exit 1; + elif [[ $nbOcc > 1 ]];then + echo "We found $nbOcc match. Only one is mandatory"; exit 1; + fi + + line=$(grep -nr "$regex" $multiFasta) + lineNumber=$(echo $line | cut -d':' -f1) + sed -n "$lineNumber p" $multiFasta; + awk -v nb=$lineNumber 'NR > nb {if ($0 ~ ">") exit; else print $0 }' $multiFasta +## Case of header file +else + echo "Reading $name file that should contain one header name by line." + IFS=$'\n'; + for i in $(cat $name);do + line=$(grep -nr "${i}" $multiFasta); + if [[ ! -z $line ]];then + for j in $line;do + lineNumber=$(echo $j | cut -d':' -f1) + sed -n "$lineNumber p" $multiFasta + awk -v nb=$lineNumber 'NR > nb {if ($0 ~ ">") exit; else print $0 }' $multiFasta + done + fi + done +fi + +exit + diff --git a/bin/gaas_fasta_extract_sequence_from_OG.pl b/bin/gaas_fasta_extract_sequence_from_OG.pl new file mode 100755 index 000000000..4d81e8af2 --- /dev/null +++ b/bin/gaas_fasta_extract_sequence_from_OG.pl @@ -0,0 +1,233 @@ +#!/usr/bin/env perl + +### +# Implement case insensitive +### +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Bio::SeqIO ; +use Bio::DB::Fasta; +use IO::File; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); +my $opt_fastafile; +my $opt_help = 0; +my $opt_OGfile; +my $opt_dir; +my $name_OG; +my %OG_seqID=(); +my %OG_count=(); +my @tab_seqID; +my $path; + +# OPTION MANAGMENT +my @copyARGV=@ARGV; +if ( !GetOptions( 'f|fa|fasta=s' => \$opt_fastafile, + 'og|OG_file=s' => \$opt_OGfile, + 'd|dir=s' => \$opt_dir, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => "$header\nFailed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( (! (defined($opt_OGfile)) ) or (! (defined($opt_fastafile)) ) ){ + pod2usage( { + -message => "\nAt least 2 parametes are mandatory:\n Input reference fasta file (-f); orthoMCL group in text format obtained by the in-house orthoMCL pipeline (-og)\n\n". + "Output will be created for you, one fasta file per OG group.\n", + -verbose => 0, + -exitval => 2 } ); +} + + +##### MAIN #### + +###################### +####create folder +if (defined($opt_dir)){ + $path = "$opt_dir/"; +}else { + $path = "OG_fasta/"; +} + + +my $createdir="mkdir -p $path"; +if (-d $path) + { print "$path already exists\n"; exit; } +else + { print "mkdir $path \n" unless system($createdir) } + + +#### read fasta file and save info in memory +###################### +my $db = Bio::DB::Fasta->new($opt_fastafile); +print ("Genome fasta parsed\n"); + +########################### +### open OG file + +my $file = IO::File->new(); +if ( defined $opt_OGfile ) { + $file->open($opt_OGfile, 'r') or croak (sprintf("Can not open '%s' for reading: %s", $opt_OGfile, $!)); +} + +my $output_stat = "$path"."stat_OG.txt"; + +my $ostream_stat = IO::File->new(); +$ostream_stat->open($output_stat, 'w' ) or +croak( + sprintf( "Can not open '%s' for reading: %s", $output_stat, $! ) ); + +############################## +#create the hash with OG ID and the sequence IDs in the same orthology group +while(my $line =<$file>) { + chomp $line; + #print $line."\n"; + @tab_seqID = split (" ",$line); + + $tab_seqID[0]=~/^(OG_\d+)\:/; + #print scalar @tab_seqID."\n"; + $name_OG=$1; + +#chomp the first element of the table that is the OG ID + @tab_seqID = @tab_seqID[ 1 .. $#tab_seqID ]; + + $OG_seqID{$name_OG}=[@tab_seqID]; + +###to count the number of sequence per OG and species + foreach (@tab_seqID) { + $_=~/(\d+)\|/; + $OG_count{$name_OG}{$1}++; + } + +} + + +########################## +#Now extract the sequences + +foreach my $OG_ID (sort keys %OG_seqID){ + + my $ostream_OG = "$path"."$OG_ID".".fasta"; + + + my $seqout_OG = Bio::SeqIO->new( -file => ">$ostream_OG", + -format => 'Fasta', + ); + ####print number of sequences per OG + + print $ostream_stat "\n".$OG_ID."\t number of sequences:".scalar(@{$OG_seqID{$OG_ID}})."\n"; + #print $OG_ID."\t number of sequences: ".scalar(@{$OG_seqID{$OG_ID}})."\n"; + + foreach my $count ( keys %{$OG_count{$OG_ID}}){ + ####print number of sequences per species and per OG + print $ostream_stat $OG_ID."\t species: ".$count."\tnumber of sequences: ".$OG_count{$OG_ID}{$count}."\n"; + # print $OG_ID."\t species: ".$count."\tnumber of sequences: ".$OG_count{$OG_ID}{$count}."\n"; + } + + foreach my $seq_name (@{$OG_seqID{$OG_ID}}) { + + if($db->seq($seq_name)){ + #create sequence object + my $seq_obj = $db->get_Seq_by_id($seq_name); + $seqout_OG->write_seq($seq_obj); + + }else{ + print $seq_name." not found into the $opt_fastafile fasta file !\n"; + } + + } + + +} + +#END +print "usage: $0 @copyARGV\n"; +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; + + +__END__ + +=head1 NAME + +gaas_fasta_extract_sequence_from_OG.pl + +=head1 DESCRIPTION + +This script extracts sequence in fasta format from a fasta file. You can extract one fasta sequence providing the name of a file created by the in-house orthoMCL pipeline. +The OG file contains all the orthoMCL groups and the ID of the sequences in each group. + +=head1 SYNOPSIS + + gaas_fasta_extract_sequence_from_OG.pl -f infile.fasta -og OGfile.txt + gaas_fasta_extract_sequence_from_OG.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f> or B<--fasta> + +Input fasta file. + +=item B<-og>, B<--og> + +The OG file contains all the orthoMCL groups and the ID of the sequences in each group. + +eg : + +OG_1000: 5833|MAL13P1.2:pep 5833|PF10_0398:pep + +OG_1001: 5833|MAL13P1.1:pep 5833|PFE0005w:pep 5833|MAL8P1.220:pep 5833|PFF1595c:pep + +=item B<-d>, B<--dir> + +optional you can choose a name for the output folder, by default it will be called OG_fasta + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_extract_sequence_from_id.pl b/bin/gaas_fasta_extract_sequence_from_id.pl new file mode 100755 index 000000000..d96238813 --- /dev/null +++ b/bin/gaas_fasta_extract_sequence_from_id.pl @@ -0,0 +1,257 @@ +#!/usr/bin/env perl + +### +# Implement case insensitive +### +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Bio::SeqIO ; +use Bio::DB::Fasta; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); + +my $col = undef; +my $lineToAvoid=undef; +my $separator=undef; +my $opt_fastafile; +my $opt_output; +my $opt_help = 0; +my $opt_name = undef; + +# OPTION MANAGMENT +my @copyARGV=@ARGV; +if ( !GetOptions( 'f|fa|fasta=s' => \$opt_fastafile, + "line=i" => \$lineToAvoid, + "col=i" => \$col, + "s=s" =>\$separator, + 'n|name=s' => \$opt_name, + 'o|output=s' => \$opt_output, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => "$header\nFailed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( (! (defined($opt_name)) ) or (! (defined($opt_fastafile)) ) ){ + pod2usage( { + -message => "\nAt least 2 parametes are mandatory:\nInput reference gff file (-g); Input reference fasta file (-f)\n\n". + "Output is optional. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 2 } ); +} + + +my $ostream; +if ($opt_output) { + $opt_output=~ s/.fasta//g; + $opt_output=~ s/.fa//g; + open(my $fh, '>', $opt_output.".fa") or die "Could not open file '$opt_output' $!"; + $ostream= Bio::SeqIO->new(-fh => $fh, -format => 'Fasta' ); +} +else{ + $ostream = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); +} + +##### MAIN #### +#### read fasta file and save info in memory +###################### +my $db = Bio::DB::Fasta->new($opt_fastafile); +print ("Genome fasta parsed\n"); + + +########################### +### Extract sequence ID(s) +my %list_of_ID; +my $nbID=0; +# Case it's a file +if (-f $opt_name){ + #Manage column with the ID + if (! defined $col){ + $col=0; + } + else{$col=$col -1 ;} + + #Manage line to avoid + if (! defined $lineToAvoid){ + $lineToAvoid=0; + } + + print "It's a file you gave me... I will read line by line to take in account the ID from the column ".($col+1)." of the file $opt_name\n"; + print "The first $lineToAvoid lines will be ignored.\n"; + my $ID_list = IO::File->new("<".$opt_name); + + + my $cpt_line=0; + while ( <$ID_list> ) { + $cpt_line++; + + if($cpt_line > $lineToAvoid){ + + chomp; + if(! $_ =~ /^\s*$/){ + + my @cols; + if (! $separator){ + @cols = split /\s/, $_; + } + else{ + @cols = split /$separator/, $_; + } + my $id = $cols[$col]; + $id =~ s/[^[:print:]]+//g; + #print $id."\n"; + if($id =~ m/^>/){ + print "I remove the chevron (\">\") !\n"; + $id=substr($id, 1 , length($id)); + } + $list_of_ID{$id}++; + $nbID++; + } + } + } + print "$nbID ID found.\n"; +} +else{ + if($opt_name =~ m/^>/){ + print "I remove the chevron (\">\") !\n"; + $opt_name=substr($opt_name, 1 , length($opt_name)); + print $opt_name."\n"; + } + $list_of_ID{$opt_name}++; +} + +########################## +#Now extract the sequences +my @list_seq_result=(); +foreach my $ID (keys %list_of_ID){ + + if($db->seq($ID)){ + #create sequence object + my $seq_obj = $db->get_Seq_by_id($ID); + + push @list_seq_result, $seq_obj; + } + else{ + print "<$ID> not found into the $opt_fastafile fasta file !\n"; + } +} + +if (! @list_seq_result){ + print "Nothing found !\n"; +} +else{ + foreach my $seq_obj (@list_seq_result){ + $ostream->write_seq($seq_obj); + } +} + +#END +print "usage: $0 @copyARGV\n"; +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + + + + +__END__ + +=head1 NAME + +gaas_fasta_extract_sequence_from_id.pl + +=head1 DESCRIPTION + +This script extract sequence in fasta format from a fasta file. You can extract one fasta sequence providing a sequence name or the name of a file containing a list of sequence name (one by line) + +=head1 SYNOPSIS + + gaas_fasta_extract_sequence_from_id.pl -f=infile.fasta -n sequenceID [ -o outfile ] + gaas_fasta_extract_sequence_from_id.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f> or B<--fasta> + +Input fasta file. + +=item B<-n>, B<--name> + +Could be a sequence name to retrieve in the fasta file, or a file containing a list of sequence name (one by line). + +=item B<--line> + +Integer, number of line to avoid. Allow to avoid headers. + +=item B<--col> + +column containing the ID. By default the first column is considered. + +=item B<-q> + +Field separator, by default un-printable character are use as separator (\s). You can define the one you wnat with this option. + +=item B<-o> or B<--output> + +Output GFF file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_filter_by_size.pl b/bin/gaas_fasta_filter_by_size.pl new file mode 100755 index 000000000..d571d0d09 --- /dev/null +++ b/bin/gaas_fasta_filter_by_size.pl @@ -0,0 +1,161 @@ +#!/usr/bin/env perl + +### +# Implement case insensitive +### +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Bio::SeqIO ; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); + +my $opt_fastafile; +my $opt_output; +my $opt_help = undef; +my $opt_size = 1000; + +# OPTION MANAGMENT +my @copyARGV=@ARGV; +if ( !GetOptions( 'f|fa|fasta=s' => \$opt_fastafile, + 's|size=s' => \$opt_size, + 'o|output=s' => \$opt_output, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => "$header\nFailed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if (! (defined($opt_fastafile)) ) { + pod2usage( { + -message => "\nAt least 1 parameter is mandatory:\nInput reference fasta file (-f)\n\n". + "Output is optional. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 2 } ); +} + + +my $ostream; +if ($opt_output) { + $opt_output=~ s/.fasta//g; + $opt_output=~ s/.fa//g; + open(my $fh, '>', $opt_output.".fa") or die "Could not open file '$opt_output' $!"; + $ostream= Bio::SeqIO->new(-fh => $fh, -format => 'Fasta' ); +} +else{ + $ostream = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); +} + +print "We will remove sequences < $opt_size bp.\n"; + + + +##### MAIN #### + +######### read fasta file ############# +my $fasta1 = Bio::SeqIO->new(-file => $opt_fastafile , -format => 'Fasta'); +while ( my $seq = $fasta1->next_seq() ) { + if($seq->length() >= $opt_size){ + $ostream->write_seq($seq); + } +} + + +#END +print "usage: $0 @copyARGV\n"; +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + + + + +__END__ + +=head1 NAME + +gaas_fasta_filer_by_size.pl + +=head1 DESCRIPTION + +This script filter sequences by size. It will remove from the output all sequences under a certain size (1000 bp/aa by default) +We keep all sequences >= --size + +=head1 SYNOPSIS + + gaas_fasta_filer_by_size.pl -f infile.fasta [ -o outfile ] + gaas_fasta_filer_by_size.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f> or B<--fasta> + +Input fasta file. + +=item B<-s>, B<--size> + +Integer corresponding to a size in bp. Default value 1000. Sequence under the value will be discarded from the output. + +=item B<-o> or B<--output> + +Output fasta file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_get_longestORF.pl b/bin/gaas_fasta_get_longestORF.pl new file mode 100755 index 000000000..1343fa5f1 --- /dev/null +++ b/bin/gaas_fasta_get_longestORF.pl @@ -0,0 +1,278 @@ +#!/usr/bin/env perl + +use Carp; +use strict; +use warnings; +use POSIX qw(strftime); +use Getopt::Long; +use Pod::Usage; +use Bio::Seq; +use Bio::SeqIO; +use GAAS::FASTA::Longest_orf; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); +my $codonTableId=1; +my $MIN_PROT_LENGTH = 100; +my $force_start_codon = undef; +my $force_complete = undef; +my $file_fasta=undef; +my $keep_all_orf=undef; +my $outfile = undef; +my $verbose = undef; +my $help= 0; + +my @copyARGV=@ARGV; +Getopt::Long::Configure ('bundling'); +if ( !GetOptions( + "help|h" => \$help, + "fasta|fa|f=s" => \$file_fasta, + "size_min|s=i" => \$MIN_PROT_LENGTH, + "force_start_codon!" => \$force_start_codon, + "force_complete!" => \$force_complete, + "table|codon|ct=i" => \$codonTableId, + "keep_all_orf!" => \$keep_all_orf, + "v!" => \$verbose, + "output|outfile|out|o=s" => \$outfile)) + +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( !(defined($file_fasta)) ){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\n Input fasta file (--fasta)\n\n", + -verbose => 0, + -exitval => 1 } ); +} + +if($codonTableId<0 and $codonTableId>25){ + print "$codonTableId codon table is not a correct value. It should be between 0 and 25 (0,23 and 25 can be problematic !)\n"; +} + +###################### +# Manage output file # +my $fasta_out; +if ($outfile) { + $fasta_out = Bio::SeqIO->new(-file => ">$outfile" , -format => 'fasta'); +} +else{ + $fasta_out = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'fasta'); +} + +# print usage performed +my $stringPrint = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; +$stringPrint = "Launched the ".$stringPrint."\nusage: $0 @copyARGV\n"; +print $stringPrint; + + ##################### + # MAIN # + ##################### + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$file_fasta", -format => 'fasta'); + +my %canditates; +while( my $seqObj = $inseq->next_seq() ) { + + + my $longest_orf_finder = Longest_orf->new(); + $longest_orf_finder->allow_5prime_partials(); + $longest_orf_finder->allow_3prime_partials(); + + my $seq = $seqObj->seq(); + my @orf_structs = $longest_orf_finder->capture_all_ORFs($seq); + + #sorting ORF by size + @orf_structs = reverse sort {$a->{length}<=>$b->{length}} @orf_structs; + my $acc = $seqObj->id(); + print "looking at sequence $acc \n" if $verbose; + + my %candidates; + while (@orf_structs) { + my $orf = shift @orf_structs; + + my $start = $orf->{start}; + my $stop = $orf->{stop}; + + my $length = int((abs($start-$stop)+1)/3); + my $orient = $orf->{orient}; + my $protein = $orf->{protein}; + + ################################## + # adjust for boundary conditions, since starts and stops run off the ends of the sequences at partial codons + ################################# + + # adjust at 3' end + if ($stop > length($seq)) { + $stop -= 3; + } + if ($start > length($seq)) { + $start -= 3; + } + + # adjust at 5' end + if ($stop < 1) { + $stop += 3; + } + if ($start < 1) { + $start += 3; + } + + + if ($length < $MIN_PROT_LENGTH) { next; } + if ($force_complete and (substr($orf->{protein},0,1) ne 'M' or substr($orf->{protein},-1) ne '*' ) ) {next;} + if ($force_start_codon and substr($orf->{protein},0,1) ne 'M' ) {next;} + + + print "Candidate (len $length): ".Dumper($orf) if $verbose; + push (@{$canditates{$acc}}, $orf); + + } + + if($keep_all_orf){ + my $cpt=1; + foreach my $orf (@{$canditates{$acc}}){ + #create a new sequence object + my $new_seqObj = Bio::Seq->new( '-format' => 'fasta' , -seq => $orf->{protein}); + $new_seqObj->id($acc.".".$cpt); + $new_seqObj->description($seqObj->description()); + $fasta_out->write_seq($new_seqObj); + $cpt++; + } + } + else{ # let's keep only the longest + my $orf = @{$canditates{$acc}}[0]; + $seqObj->seq($orf->{protein}); #changing the DNA sequence by the corresponding AA sequence is enough + $fasta_out->write_seq($seqObj); + } +} + +# END +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + +#check if reference exists in hash. Deep infinite : hash{a} or hash{a}{b} or hash{a}{b}{c}, etc. +# usage example: exists_keys($hash_omniscient,('level3','cds',$level2_ID) +sub exists_keys { + my ($hash, $key, @keys) = @_; + + if (ref $hash eq 'HASH' && exists $hash->{$key}) { + if (@keys) { + return exists_keys($hash->{$key}, @keys); + } + return 1; + } + return ''; +} + +__END__ + +=head1 NAME + +gaas_fasta_get_longestORF.pl + +=head1 DESCRIPTION + +The script take a nucleotide fasta file as input and will extract the longest ORF(s) and translate it(them) in AA. +By default it extracts only the longest ORF even incomplete (missing start or/and stop codon) >= 100 AA. +This script is an adpatation of the TransDecoder.LongestORF tool, adapted to use bioperl. +/!\ Bolean parameter don't expect any value. + +=head1 SYNOPSIS + + gaas_fasta_get_longestORF.pl -f infile.fasta [ -o outfile ] + gaas_fasta_get_longestORF.pl -h + +=head1 OPTIONS + +=over 8 + +=item B<-f> or B<--fa> or B<--fasta> + +Nucleotide fasta file. + +=item B<-s> or B<--size_min> + +Minimum length of the ORF to be kept in AA (100 by default) + +=item B<--ct> or B<--table> or B<--codon> + +This option allows specifying the codon table to use - It expects an integer (1 by default = standard) + +=item B<--force_start_codon> + +This option force to keep the longest ORF that contains a start codon (M). Bolean + +=item B<--force_complete> + +This option force to keep the longest ORF that contains a start codon (M) and stop codon (*). Bolean + +=item B<--keep_all_orf> + +This option force to keep all the ORFs that meet the criteria. Bolean + +=item B<-v> + +Verbose. Useful for debugging purpose. Bolean + +=item B<-o> or B<--out> or B<--output> or B<--outfile> + +Output GFF file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_removeFromFasta1_intersection_withFasta2ID.pl b/bin/gaas_fasta_removeFromFasta1_intersection_withFasta2ID.pl new file mode 100755 index 000000000..e58beed8b --- /dev/null +++ b/bin/gaas_fasta_removeFromFasta1_intersection_withFasta2ID.pl @@ -0,0 +1,150 @@ +#!/usr/bin/env perl + + +use Carp; +use strict; +use Getopt::Long; +use Pod::Usage; +use Bio::SeqIO; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outfile = undef; +my $file1 = undef; +my $file2 = undef; +my $help= 0; + +if ( !GetOptions( + "help|h" => \$help, + "fasta1|file1|f1=s" => \$file1, + "fasta2|file2|f2=s" => \$file2, + "output|outfile|out|o=s" => \$outfile)) + +{ + pod2usage( { -message => "$header"."Failed to parse command line.", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -message => "$header\n", + -verbose => 99, + -exitval => 0 } ); +} + +if ( ! ((defined($file1)) and (defined($file2)))){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory.\n", + -verbose => 0, + -exitval => 1 } ); +} + +###################### +# Manage output file # +my $fastaout; +if ($outfile) { + $outfile=~ s/.gff//g; +open(my $fh, '>', $outfile.".gff") or die "Could not open file '$outfile' $!"; + $fastaout= Bio::SeqIO->new(-fh => $fh , '-format' => 'Fasta'); +} +else{ + $fastaout = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); +} + + + ##################### + # MAIN # + ##################### + + +######################## +### Manage INPUT FILES # +my $fasta1 = Bio::SeqIO->new(-file => $file1 , -format => 'Fasta'); +my $fasta2 = Bio::SeqIO->new(-file => $file2 , -format => 'Fasta'); + +############## +#### MAIN #### +my $nbToRemove; +my $nbRemoved; +my $nbSeqPrint; +my %id_fasta1; +while ( my $seq = $fasta1->next_seq() ) { + $id_fasta1{$seq->id}++; + $nbToRemove++; +} + +while ( my $seq = $fasta2->next_seq() ) { + if(! exists($id_fasta1{$seq->id})){ + $fastaout->write_seq($seq); + $nbSeqPrint++; + } + else{$nbRemoved++;} +} + +my $totalSeq=$nbRemoved+$nbSeqPrint; +print "On the $nbToRemove sequences in $file1, $nbRemoved sequences have been removed from $file2.\nSo, on the $totalSeq sequences of $file2, $nbSeqPrint have been printed.\n"; + +__END__ + +=head1 NAME + +Compare two fasta file in order to remove occurence of fasta sequence from file 1 present in file 2. +The whole header must be identical to be consider as identic. + +=head1 SYNOPSIS + + perl my_script.pl --fasta1 file1 --fasta2 file2 [--out outfile] + perl my_script.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta1>, B<--file1> or B<-f1> + +Fasta file 1. The headers of sequences of this file will be used to compare against those to file 2. + +=item B<--fasta2>, B<--file2> or B<-f2> + +Fasta file 2. This is the "reference file" in which we will remove sequences already existing in file 1. + +=item B<-o> , B<--output> , B<--out> or B<--outfile> + +Output fasta file. If no output file is specified, the output will be +written to STDOUT. + +=item B<--help> or B<-h> + +Getting help. +Display the full information. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_removeSeqFromIDlist.pl b/bin/gaas_fasta_removeSeqFromIDlist.pl new file mode 100755 index 000000000..82a4daa9d --- /dev/null +++ b/bin/gaas_fasta_removeSeqFromIDlist.pl @@ -0,0 +1,173 @@ +#!/usr/bin/env perl + + +use Carp; +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; +use Bio::SeqIO; +use IO::File; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outfile = undef; +my $file1 = undef; +my $file2 = undef; +my $help= 0; + +if ( !GetOptions( + "help|h" => \$help, + "fasta|file|f=s" => \$file1, + "list|l=s" => \$file2, + "output|outfile|out|o=s" => \$outfile)) + +{ + pod2usage( { -message => "$header"."Failed to parse command line.", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -message => "$header\n", + -verbose => 99, + -exitval => 0 + } ); +} + +if ( ! ((defined($file1)) and (defined($file2)))){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory.\n", + -verbose => 0, + -exitval => 1 } ); +} + +###################### +# Manage output file # +my $fastaout; +if ($outfile) { + $outfile=~ s/.fasta//g; + $outfile=~ s/.fa//g; +open(my $fh, '>', $outfile.".fa") or die "Could not open file '$outfile' $!"; + $fastaout= Bio::SeqIO->new(-fh => $fh , -format => 'Fasta'); +} +else{ + $fastaout = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); +} + + + ##################### + # MAIN # + ##################### + + +######################## +### Manage INPUT FILES # +my $nbToExclude; +my %list_to_exclude; +if (-f $file2){ + my $ID_list = IO::File->new("<".$file2); + + ############## + #### MAIN #### + + # create hash of ID to remove + while ( <$ID_list> ) { + chomp; + if(! $_ =~ /^\s*$/){ + $list_to_exclude{$_}++; + $nbToExclude++; + } + } +} +else{ + $list_to_exclude{$file2}++; + $nbToExclude++; +} +print "You want to removed $nbToExclude sequence from $file1\n"; + +#Go all over fasta1 and skip sequence to exclude +my $fasta1 = Bio::SeqIO->new(-file => $file1 , -format => 'Fasta'); +my $nbRemoved=0; +while ( my $seq = $fasta1->next_seq() ) { +if(! exists($list_to_exclude{$seq->id})){ + $fastaout->write_seq($seq); + } + else{$nbRemoved++;} +} + +if($nbToExclude == $nbRemoved){ + print "Exclusion successful. All protein you wanted to exclude have not been kept in the output.\n"; +}else{ + print "WARNING only $nbRemoved sequences on the $nbToExclude you wanted to exclude have been excluded fron the output\n"; +} + + +__END__ + +=head1 NAME + +gaas_fasta_removeSeqFromIDlist + +=head1 DESCRIPTION + +Compare a fasta file to a list of ID in order to remove the matching name from file 1. +The whole header must be identical to be consider as identic. + +=head1 SYNOPSIS + + gaas_fasta_removeSeqFromIDlist.pl --fasta1 file1 --list file2 [--out outfile] + gaas_fasta_removeSeqFromIDlist.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta1>, B<--file1> or B<-f1> + +Fasta file 1. The headers of sequences of this file will be used to compare against those to file 2. + +=item B<--fasta2>, B<--file2> or B<-f2> + +This is a file containing the headers of sequence to be removed. Only one ID per line. Header should be identical at 100% to be removed. + +=item B<-o> , B<--output> , B<--out> or B<--outfile> + +Output fasta file. If no output file is specified, the output will be +written to STDOUT. + +=item B<--help> or B<-h> + +Getting help. +Display the full information. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_splitMultFastaFile.sh b/bin/gaas_fasta_splitMultFastaFile.sh new file mode 100755 index 000000000..1ca58b682 --- /dev/null +++ b/bin/gaas_fasta_splitMultFastaFile.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +############################################################################ +# JD 2014/03 +# Cut a multi FastaFile in one file by fasta +# use: script fileInput DirectoryOutpßut +############################################################################# + +#constants +java=$(which java) + +# Arguments and Paths +if (( $# !=2 )); then + echo -e "The script needs 2 parameters: \n(1)The Multi-fasta file as input" + echo -e "(2)The second corresponds to directory of fasta files output" + exit +fi + +pathDir=$(pwd) +echo "The directory path is $pathDir" + +resuDir=$2 +pathresuDir="$pathDir/$resuDir" +[[ $pathresuDir != */ ]] && pathresuDir="$pathresuDir"/ #test if a slash exist at the end +if [ ! -d "$pathresuDir" ]; then + mkdir $pathresuDir +else + echo "The directory $pathresuDir already exists !" + rm -r $pathresuDir + mkdir $pathresuDir +fi + +fastaFile=$1 +pathFastaFile="$pathDir/$fastaFile" + +awk -v path=$pathresuDir '/^>/{head=gsub(">","");f=$head".fa"; header="true"} {if (header == "true") {print ">"$0 > path"/"f ; header="false" ;} else {print > path"/"f}}' $pathFastaFile diff --git a/bin/gaas_fasta_spliter_overlap.pl b/bin/gaas_fasta_spliter_overlap.pl new file mode 100755 index 000000000..3e8f84566 --- /dev/null +++ b/bin/gaas_fasta_spliter_overlap.pl @@ -0,0 +1,189 @@ +#!/usr/bin/env perl + +### +# Implement case insensitive +### +use strict; +use Pod::Usage; +use Getopt::Long; +use Bio::SeqIO ; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); + +my $opt_fastafile; +my $opt_output; +my $opt_help = 0; +my $opt_chunck_size = undef; +my $opt_overlap = 0; + +# OPTION MANAGMENT +my @copyARGV=@ARGV; +if ( !GetOptions( 'f|fa|fasta=s' => \$opt_fastafile, + 'c|chunck_size=s' => \$opt_chunck_size, + 'l|overlap=s' => \$opt_overlap, + 'o|output=s' => \$opt_output, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => "$header\nFailed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if (! defined($opt_fastafile) or ! defined($opt_chunck_size) ) { + pod2usage( { + -message => "\nAt least 2 parameter is mandatory:\nInput reference fasta file (-f)\nChunck_size (-c)\n". + "Output is optional. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 2 } ); +} + + +#D OUTPUT +my $ostream; +if ($opt_output) { + $opt_output=~ s/.fasta//g; + $opt_output=~ s/.fa//g; + open(my $fh, '>', $opt_output.".fa") or die "Could not open file '$opt_output' $!"; + $ostream= Bio::SeqIO->new(-fh => $fh, -format => 'Fasta' ); +} +else{ + $ostream = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'Fasta'); +} + +print "We will split fasta sequences by chunck of $opt_chunck_size and overlap of $opt_overlap.\n"; +if($opt_overlap >= $opt_chunck_size){ + print "$opt_overlap cannot be >= to $opt_chunck_size otherwise we end up in an infinite loop.\n"; +} + + +##### MAIN #### + +######### read fasta file ############# +my $fasta1 = Bio::SeqIO->new(-file => $opt_fastafile , -format => 'Fasta'); +while ( my $seq = $fasta1->next_seq() ) { + my $start = 1; + my $end = $opt_chunck_size; + + while ( $end < $seq->length() ) { + + my $sequence = undef; + my $seqObj = undef; + my $id_seq = undef; + if($seq->length() > ($end+$opt_chunck_size) ){ + + + $sequence = $seq->subseq($start, $end); + $seqObj = Bio::Seq->new( '-format' => 'fasta' , -seq => $sequence); + $id_seq = $seq->id."_".$start."_".$end; + $seqObj->id($id_seq); + $ostream->write_seq($seqObj); + } + else{ + $sequence = $seq->subseq($start, $seq->length()); + $seqObj = Bio::Seq->new( '-format' => 'fasta' , -seq => $sequence); + $id_seq = $seq->id."_".$start."_".$seq->length(); + $seqObj->id($id_seq); + $ostream->write_seq($seqObj); + last; + } + $start = $end - $opt_overlap; + $end = $start + $opt_chunck_size; + } +} + + +#END +print "usage: $0 @copyARGV\n"; +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + + + + +__END__ + +=head1 NAME + +gaas_fasta_spliter_overlap.pl + +=head1 DESCRIPTION + +This script split sequences by size with an overlaped part. + +=head1 SYNOPSIS + + gaas_fasta_spliter_overlap.pl -f=infile.fasta [ -o outfile ] + gaas_fasta_spliter_overlap.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f> or B<--fasta> + +Input fasta file. + +=item B<-s>, B<--size> + +Integer corresponding to a size in bp. Default value 1000. Sequence under the value will be discarded from the output. + +=item B<-o> or B<--output> + +Output fasta file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fasta_statisticsAndPlot.pl b/bin/gaas_fasta_statisticsAndPlot.pl new file mode 100755 index 000000000..833e02ae4 --- /dev/null +++ b/bin/gaas_fasta_statisticsAndPlot.pl @@ -0,0 +1,405 @@ +#!/usr/bin/env perl +# Takes a fasta-file (usually a genomic or transcriptomic assembly) and checks for +# potential problems as well as calculates a few basic statistics. +# +# NBIS 2018 +# jacques.dainat@nbis.se + +use warnings; +use strict; +use Statistics::R; +use POSIX qw(strftime); +use File::Basename; +use Pod::Usage; +use Getopt::Long; +use Try::Tiny; +use Carp; +use IO::File; +use Bio::SeqIO; + +my $nb_seq = 0; +my $problemcount=0; +my $nbseq_withLowerCase=0; +my $total_lowerCaseCount; +my $Ncount=0; +my $pureNseq=0; +my $totalcount=0; +my $totalcountOver1000=0; +my $totalcountOver10000=0; +my $gccount=0; +my $total_noNs=0; +my @sequencelength=(); +my @sequencelengthOver1000=(); +my @sequencelengthOver10000=(); + +my $opt_infile; +my $opt_dirRes; +my $opt_help = 0; + + +if ( !GetOptions( 'f|infile=s' => \$opt_infile, + 'o|out|output=s' => \$opt_dirRes, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 2, + -exitval => 0 } ); +} + +if ( !( defined($opt_infile) ) ) { + pod2usage( { + -message => "Must specify at leat one parameter:\nfasta_statisticsAndPlot.pl -f InputFastaFile [-o Ouput_directory] ", + -verbose => 0, + -exitval => 2 } ); +} + + + +my $outstream = IO::File->new();; +my $outstreamFix; +my $outstreamError; + +if ( defined($opt_dirRes) ) { + if (-d $opt_dirRes) { + print "$opt_dirRes output directory already exits !\n";exit; + } + mkdir $opt_dirRes; + + $outstream->open( "$opt_dirRes/fasta_report.txt", 'w' ) or + croak(sprintf( "Can not open '%s' for writing %s", "$opt_dirRes/fasta_report.txt", $! )); + + open(my $fh, '>', "$opt_dirRes/fix_sequences.fa") or die "Could not open file '$opt_dirRes/fix_sequences.txt' $!"; + $outstreamFix = Bio::SeqIO->new(-fh => $fh , -format => 'fasta'); + + open(my $fhe, '>', "$opt_dirRes/problem_sequences.fa") or die "Could not open file '$opt_dirRes/problem_sequences.txt' $!"; + $outstreamError = Bio::SeqIO->new(-fh => $fhe , -format => 'fasta'); +} +else { + $outstream->fdopen( fileno(STDOUT), 'w' ) or + croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); + $outstreamError = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'fasta'); +} + +# INPUT FILE +my $inseq = Bio::SeqIO->new(-file => "<$opt_infile", -format => 'fasta'); + + +#Calculate the statistics from the entries in the hash +my $cp1kb=0; +my $cp10kb=0; +while( my $seqObj = $inseq->next_seq() ) { + + my $sequence = $seqObj->seq(); + $nb_seq++; + my $problemNstart=undef; + my $problemNend =undef; + my $problem_lowerCase = undef; + + #save sequence length for N50 calculation + push @sequencelength, length $sequence; + + #Check for Ns at the beginning or end of sequence + if ($sequence =~ /^N/){ + print $outstreamError->write_seq( $seqObj ); + $problemNstart="yes"; + $problemcount++; + } + if ($sequence =~ /N$/){ + print $outstreamError->write_seq( $seqObj ); + $problemNend = "yes"; + $problemcount++; + } + if (length($sequence) > 1000){ + $cp1kb++; + #Count total size over 1000 + $totalcountOver1000 += length $sequence; + #save sequence length for N50 calculation + push @sequencelengthOver1000, length $sequence; + + if (length($sequence) > 10000){ + $cp10kb++; + #Count total size over 10000 + $totalcountOver10000 += length $sequence; + #save sequence length for N50 calculation + push @sequencelengthOver10000, length $sequence; + } + } + + + #Count number of NNN regions + my $match=0; + $match++ while $sequence =~ /[ACGT]N+[ACGT]/g; + $Ncount += $match; + #Count GC + $gccount += ($sequence =~ tr/gGcC/gGcC/); + #Count size total + $totalcount += length $sequence; + #Count size with No Ns + my $noNs=$sequence; + $noNs =~ s/N//g; + $total_noNs += length $noNs; + if(length $noNs == 0){ + $pureNseq++; + } + #Count lowercase outside Ns + my $lowerCaseCount += ($noNs =~ tr/atgc/atgc/); + $total_lowerCaseCount += $lowerCaseCount; + if($lowerCaseCount > 0){ + $nbseq_withLowerCase++; + $problem_lowerCase="yes"; + } + # Now we can print the fixed sequnece + if ($opt_dirRes){ + + #Fix the lowercase issue + if($problem_lowerCase){ + $sequence =~ tr/atgcn/ATGCN/; + $seqObj->seq($sequence); + } + + #Fix starting N + if($problemNstart or $problemNend){ + $sequence =~ s/^N+|N+$//g; + $seqObj->seq($sequence); + } + print $outstreamFix->write_seq( $seqObj ); + } +} + +#Calculate some statistics +my $GCpercentage = ($gccount/$totalcount*100); +my $GCnoNs = ($gccount/$total_noNs*100); +my $totalNs =$totalcount-$total_noNs; + +################# +# Calculate N50 # +@sequencelength = reverse sort { $a <=> $b } @sequencelength; +my $N50=$totalcount/2; +my $sum=0; +my $entry; +# copy of sequencelength to keep it intactfor R calculation purpose later +my @sequencelengthForN50Calcul=@sequencelength; + +my $nbcontig=0; + +while ($sum < $N50){ + $entry = shift @sequencelengthForN50Calcul; + $sum += $entry; + $nbcontig+=1; +} + +################# +# Calculate N90 # +# copy of sequencelength to keep it intactfor R calculation purpose later +my @sequencelengthForN90Calcul=@sequencelength; +@sequencelengthForN90Calcul = reverse sort { $a <=> $b } @sequencelengthForN90Calcul; +my $NinetyPercGenomeSize=( 90*$totalcount / 100); +$sum=0; +my $N90; +while ($sum < $NinetyPercGenomeSize){ + $N90 = shift @sequencelengthForN90Calcul; + $sum += $N90; +} + + +########################### +# Calculate N50 over 1000 # +@sequencelengthOver1000 = reverse sort { $a <=> $b } @sequencelengthOver1000; +my $HalfGenomeSizeOver1000=$totalcountOver1000/2; +$sum=0; +my $N50over1000=0; +while ($sum < $HalfGenomeSizeOver1000){ + $N50over1000 = shift @sequencelengthOver1000; + $sum += $N50over1000; +} + + +############################ +# Calculate N50 over 10000 # +@sequencelengthOver10000 = reverse sort { $a <=> $b } @sequencelengthOver10000; +my $HalfGenomeSizeOver10000=$totalcountOver10000/2; +$sum=0; +my $N50over10000=0; +while ($sum < $HalfGenomeSizeOver10000){ + $N50over10000 = shift @sequencelengthOver10000; + $sum += $N50over10000; +} + + +########################### +#print out the statistics # +my $date = strftime "%m/%d/%Y at %Hh%Mm%Ss", localtime; +my $StingToPrint; +$StingToPrint .= "\n========================================\n"; +$StingToPrint .= "Fasta-statistics\ launched the $date:\n"; +$StingToPrint .= "There are $nb_seq sequences\n"; +$StingToPrint .= "There are $cp10kb sequences > 10kb \n"; +$StingToPrint .= "There are $cp1kb sequences > 1kb \n"; +$StingToPrint .= "There are $totalcount nucleotides, of which $totalNs are Ns\n"; +$StingToPrint .= "There are $Ncount N-regions (possibly links between contigs)\n"; +$StingToPrint .= "There are $pureNseq pure (only) N sequences. Assembler doing that must be notified ! \n"; +$StingToPrint .= "There are $problemcount sequences that begin or end with Ns (see problem_sequences.txt)\n"; +$StingToPrint .= sprintf("The GC-content is %.1f",$GCpercentage); +$StingToPrint .= "\%"; +$StingToPrint .= sprintf(" (not counting Ns %.1f", $GCnoNs); +$StingToPrint .= "\%)\n"; +$StingToPrint .= "There are $nbseq_withLowerCase sequences with lowercase nucleotides (Ns not considered)\n"; +$StingToPrint .= "There are $total_lowerCaseCount lowercase nucleotides (Ns not considered)\n"; +$StingToPrint .= "The N50 is $entry\n"; +$StingToPrint .= "The N90 is $N90\n"; +$StingToPrint .= "The N50 for sequences over 1000bp is $N50over1000\n"; +$StingToPrint .= "The N50 for sequeces over 10000bp is $N50over10000\n"; +$StingToPrint .= "========================================\n"; +if($opt_dirRes){ + $StingToPrint .= "You will find corrected sequences in fix_sequences.txt (Extremities N trimed and lowercase nucleotides).\n"; +} +print $outstream "$StingToPrint"; + +####### +# +# Plot +# +###### +if($opt_dirRes){ + print $StingToPrint; + if($nb_seq > 1){ #If only 1 seq we get error like: density.default(listValues) : need at least 2 points to select a bandwidth automatically + # temporary file name + my $tempFile1="dump.tmp"; + + try { + print $StingToPrint; + print "This result was saved in the $opt_dirRes directory.\nThe plots are in format and available in the directory.\n"; + + # write the data in temporary file + open(FILE, ">$tempFile1") || die "Erreur E/S:$!\n"; + foreach my $size ( @sequencelength ) { + print FILE "$size\n"; + } + close(FILE); + + + + my $ouputName=basename($opt_infile); + my $ouputPlot=$opt_dirRes."/".$ouputName; + + + + # Calcul percentage of contig right and left to N50 + my $percentContigRightN50=($nbcontig*100)/($#sequencelength+1); + my $percentContigLeftN50=100-$percentContigRightN50; + $percentContigRightN50=sprintf ("%0.2f",$percentContigRightN50)."%"; + $percentContigLeftN50=sprintf ("%0.2f",$percentContigLeftN50)."%"; + # Name of different outputs + my $outputPlotLog=$ouputPlot."_PlotLog.pdf"; + my $outputPlotDensity=$ouputPlot."_PlotDensity.pdf"; + my $outputPlotHist=$ouputPlot."_PlotHisto.pdf"; + # calcul right and left position to write percentContigRightN50 and percentContigLeftN50 + my $biggestValue=shift @sequencelength; + my $positionright=(5*$biggestValue)/100+$entry; + my $positionleft=$entry-(5*$biggestValue)/100; + # Tab=as.matrix(read.table("$tempFile1", sep="\t", he=T)) + # R object Declaration + my $R = Statistics::R->new() or die "Problem with R : $!\n"; + + ## info ## + #myhist$breaks contient la valeur minimale de chaque intervalle + #myhist$mids contient la valeur au milieu de chaque intervalle + #myhist$counts contient le nombre de valeurs situées dans cet intervalle + #myhist$density contient la proportion de valeurs situées dans cet intervalle (autrement dit, tec.hist$counts / length (tec)). + + # R command + $R->send( + qq` + listValues=as.matrix(read.table("$tempFile1", sep="\t", he=F)) + myhist<-hist(listValues) + legendToDisplay=paste("Number of value used : ",length(listValues)) + + pdf("$outputPlotLog") + plot(x = log(myhist\$mids), y = log(myhist\$counts), xlab="log(Contig size)", ylab="log(Frequency)", main="Size distribution of contigs") + abline(v =log($entry), col=2) + axisValues=par("usr") + ymax=(axisValues[4]*90)/100 + ymax2=(axisValues[4]*85)/100 + shiftFivePercent=(5*(axisValues[2]-axisValues[1]))/100 + text(x = log($entry)+shiftFivePercent, y = ymax2, paste("$percentContigRightN50"), cex = 1, col = "red") + text(x = log($entry)-shiftFivePercent, y = ymax2, paste("$percentContigLeftN50"), cex = 1, col = "red") + text(x = log($entry), y = ymax, paste("N50"), cex = 1, col = "red") + legend("topright", col=(1), lty=1, c(legendToDisplay)) + dev.off() + + pdf("$outputPlotDensity") + plot(density(listValues), xlab="Contig size", main="Size distribution of contigs") + abline(v =$entry, col=2) + axisValues=par("usr") + ymax=(axisValues[4]*90)/100 + ymax2=(axisValues[4]*85)/100 + text(x = $positionright, y = ymax2, paste("$percentContigRightN50"), cex = 1, col = "red") + text(x = $positionleft, y = ymax2, paste("$percentContigLeftN50"), cex = 1, col = "red") + text(x = $entry, y = ymax, paste("N50"), cex = 1, col = "red") + legend("topright", col=(1), lty=1, c(legendToDisplay)) + dev.off() + + pdf("$outputPlotHist") + hist(listValues, xlab="Contig size",main="Size distribution of contigs") + abline(v =$entry, col=2) + axisValues=par("usr") + ymax=(axisValues[4]*90)/100 + ymax2=(axisValues[4]*85)/100 + text(x = $positionright, y = ymax2, paste("$percentContigRightN50"), cex = 1, col = "red") + text(x = $positionleft, y = ymax2, paste("$percentContigLeftN50"), cex = 1, col = "red") + text(x = $entry, y = ymax, paste("N50"), cex = 1, col = "red") + legend("topright", col=(1), lty=1, c(legendToDisplay)) + dev.off()` + ); + + # Close the bridge + $R->stopR(); + + # Delete temporary file + unlink $tempFile1; + unlink "Rplots.pdf"; #created by " myhist<-hist(listValues)". I do not know how do differently... + } + catch{ + warn "caught error: $_"; + unlink $tempFile1; + } + } +} + +__END__ + +=head1 NAME + +fasta_statisticsAndPlot.pl - get some basic statistics about a nucleotide fasta file. (Number of sequence, Number of nucleotide, N50, GC-content,etc). It will also create R plots about contig size distribution. +The R output plot will be perform only if an output is given. +This script is not yet designed for AA sequences or IUPAC Nucleotides. + +=head1 SYNOPSIS + + ./fasta_statisticsAndPlot.pl --f=infile [--output=Directory] + ./fasta_statisticsAndPlot.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--f>, B<--infile> or B<-f> + +Input fasta file containing DNA sequences. + +=item B<--out>, B<--output> or B<-o> + +[OPTIONAL] Output directory where diffrent output files will be written. If no output is specified, the result will written to STDOUT. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=cut diff --git a/bin/gaas_fastq_check_sync_pair1_pair2.pl b/bin/gaas_fastq_check_sync_pair1_pair2.pl new file mode 100755 index 000000000..42fbfc1c2 --- /dev/null +++ b/bin/gaas_fastq_check_sync_pair1_pair2.pl @@ -0,0 +1,233 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Data::Dumper; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); +my @inputFile; +my $check_complete; +my $gzip_input; +my $nb; +my $opt_help = 0; + +Getopt::Long::Configure ('bundling'); +if ( !GetOptions ( + 'i|file|input=s' => \@inputFile, + 'c|complete!' => \$check_complete, + 'nb=s' => \$nb, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0 } ); +} + +if (! ($#inputFile == 1) ){ + pod2usage( { -message => 'at least 2 input files are mandatory', + -verbose => 1, + -exitval => 1 } ); +} + +#Deal with extensions +#my $pieces_list = split_keep_delimiter($inputFile); +#my @pieces = @$pieces_list; +#my $suffix = pop(@pieces); + +#if ($suffix eq ".gzip" or $suffix eq ".gz") { +# $gzip_input=1; +#} + +my $in1 = IO::File->new(); +my $in2 = IO::File->new(); +open($in1, '<', $inputFile[0]) or die "Could not open file '$inputFile[0]' $!"; +open($in2, '<', $inputFile[1]) or die "Could not open file '$inputFile[1]' $!"; + + +my $read_cpt = 0; +my $read_fail = 0; +my $header_type=undef; +my $count=0; + +while (!eof($in1) and !eof($in2)) { + $count++; + my $id1 = <$in1>; + my $id2 = <$in2>; + # skip all line that are not header + next unless ($count % 4 == 1 ); + + #extract header + chomp $id1; + chomp $id2; + + #check header type + if ($read_cpt == 0){ + if ($id1 =~ /^@\S+\/[12]$/) { # @ at the start of the line followed by non-whitespace, a /, a 1 or 2, the end of the line + $header_type = 1; + print STDOUT "Read Id looks like Casava 1.7 style\n"; # TESTING + } + elsif ($id1 =~ /^@\S+\W[12]\S+$/) { # @ at the start of the line followed by non-whitspace, a space, a 1 or 2, non-whitespace + $header_type = 2; + print STDOUT "Read Id looks like Casava 1.8 style\n"; + } + else { + print STDOUT "Unknwon id style (Not Casava 1.7 or 1.8): $id1\n"; + exit 1; + } + } + + + if ($header_type == 1) { # 1 or 2 at end of id + chop $id1; # last char of the id (should be the "1" or "2") + chop $id2; + } else { + ($id1) = split ' ', $id1; + ($id2) = split ' ', $id2; + } + + if ($id1 ne $id2 ){ + if($check_complete) { + $read_fail++; + } + else{ + my $end_run = time(); + my $run_time = $end_run - $start_run; + print "ERROR dectected. Read1 and Read2 not synchronized at line $count.\nRead1 = $id1 -- Read2 = $id2\n"; + print "Runtime $run_time\n"; + exit; + } + } + $read_cpt++; + + if($nb and $nb == $read_cpt){ + last; + } +} + +my $percent = ($read_fail / $read_cpt) * 100 ; +$percent = $percent * 100; # make it percent +$percent = sprintf("%.2f", $percent); + +my $end_run = time(); +my $run_time = $end_run - $start_run; + +print "Check successfully passed in $run_time seconds. $read_cpt reads read.\n"; + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + + +sub concat_list_from_left{ + + my ($list)=@_; + + my $result=""; + foreach my $element (@{$list}){ + $result = $result.$element; + } + + return $result; +} + +sub split_keep_delimiter{ + my ($string)=@_; + + my @result; + my @pieces = split(/\./, $string); + my $cpt=0; + foreach my $element (@pieces){ + if($cpt != 0){ + push @result, ".".$element; + } + else{ + push @result, $element; + } + $cpt++; + } + return \@result; +} + +__END__ + +=head1 NAME + +gaas_fastq_check_sync_pair1_pair2.pl + +=head1 DESCRIPTION + +The aim of this script is to check that paired reads from 2 fastq files are still synchronized. +Read1 and the read2, that come from a paired sequencing, are in the same position in the two fastq files. +But the order of read in R1/R2 files can get out of sync if you e.g scan/trim the two files independently. So it is a good thing always to check. + +=head1 SYNOPSIS + + gaas_fastq_check_sync_pair1_pair2.pl -i input_R1.fastq -i input_R2.fastq + gaas_fastq_check_sync_pair1_pair2.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-i>, B<--file> or B<--input> + +STRING: Input fastq file that will be read. + +=item B<--complete> or B<-c> + +BOLEAN - In complete mode, the script doesn't stop at the first synchronization problem, but will read the whole file and report the number of de-synchronization found. + +=item B<--nb> + +Integer - Allow to check just a subsample of the reads. So, define here the number of read to check. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fastq_deinterleave_bash.pl b/bin/gaas_fastq_deinterleave_bash.pl new file mode 100755 index 000000000..a19ba8ee1 --- /dev/null +++ b/bin/gaas_fastq_deinterleave_bash.pl @@ -0,0 +1,222 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $start_run = time(); + +my $inputFile; +my $output_suffix1=1; +my $output_suffix2=2; +my $gzip_output; +my $gzip_input; +my $pigz_compression_threads=1; +my $opt_help = 0; + +Getopt::Long::Configure ('bundling'); +if ( !GetOptions ( + 'i|file|input|gff=s' => \$inputFile, + 'os1|output_suffix1=s' => \$output_suffix1, + 'os2|output_suffix2=s' => \$output_suffix2, + 'thread=i' => \$pigz_compression_threads, + 'c|gzip!' => \$gzip_output, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0 } ); +} + +if (! $inputFile ){ + pod2usage( { -message => 'At least 1 input file is mandatory', + -verbose => 1, + -exitval => 1 } ); +} + +#Deal with extensions +my $pieces_list = split_keep_delimiter($inputFile); +my @pieces = @$pieces_list; +my $suffix = pop(@pieces); +my $fq_ext; +my $filename; + +if ($suffix eq ".gzip" or $suffix eq ".gz") { + $gzip_input=1; + $fq_ext = pop(@pieces); + $filename = concat_list_from_left(\@pieces); +} +else{ + $fq_ext = $suffix; + $filename = concat_list_from_left(\@pieces); +} + +if ($gzip_input) {#unzip input case + if ($gzip_output) { + print "command1\n"; + my $command = 'gzip -dc '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes '.$pigz_compression_threads.' > '.$filename."_".$output_suffix1.$fq_ext.'.gz) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes '.$pigz_compression_threads.' > '.$filename."_".$output_suffix2.$fq_ext.".gz"; + print "Command launched:\n".$command."\n"; + system ("/bin/bash -c '$command'"); + } + else{ + print "command2\n"; + my $command = 'gzip -dc '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > '.$filename."_".$output_suffix1.$fq_ext.') | cut -f 5-8 | tr "\t" "\n" > '.$filename."_".$output_suffix2.$fq_ext; + print "Command launched:\n".$command."\n"; + system ("/bin/bash -c '$command'"); + } +} +else{ + if ($gzip_output) { + print "command3\n"; + my $command = 'cat '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes '.$pigz_compression_threads.' > '.$filename."_".$output_suffix1.$fq_ext.'.gz) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes '.$pigz_compression_threads.' > '.$filename."_".$output_suffix2.$fq_ext.".gz"; + print "Command launched:\n".$command."\n"; + system ("/bin/bash -c '$command'"); + } + else{ + print "command4\n"; + my $command = 'cat '.$inputFile.' | paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > '.$filename."_".$output_suffix1.$fq_ext.') | cut -f 5-8 | tr "\t" "\n" > '.$filename."_".$output_suffix2.$fq_ext; + print "Command launched:\n".$command."\n"; + system ("/bin/bash -c '$command'"); + } +} + +my $end_run = time(); +my $run_time = $end_run - $start_run; +print "Job done in $run_time seconds\n"; + + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + + +sub concat_list_from_left{ + + my ($list)=@_; + + my $result=""; + foreach my $element (@{$list}){ + $result = $result.$element; + } + + return $result; +} + +sub split_keep_delimiter{ + my ($string)=@_; + + my @result; + my @pieces = split(/\./, $string); + my $cpt=0; + foreach my $element (@pieces){ + if($cpt != 0){ + push @result, ".".$element; + } + else{ + push @result, $element; + } + $cpt++; + } + return \@result; +} + +__END__ + +=head1 NAME + +deinterleave_fastq.pl + +=head1 SYNOPSIS + +Deinterleaves a (compressed or not compressed) FASTQ file of paired reads into two FASTQ files. +Optionally GZip compresses the output FASTQ files using pigz. + +Can deinterleave 100 million paired reads (200 million total +reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s) + +Script inspired by a pure bash code from the nathanhaigh repository: https://gist.github.com/3521724 +Also see the interleaving script: https://gist.github.com/4544979 + +The nathanhaigh script was itself inspired by Torsten Seemann's blog post: +http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html + + deinterleave_fastq.pl -i input.fastq + deinterleave_fastq.pl -i input.fastq --os R1 --os R2 + deinterleave_fastq.pl --help + +The first command will create input.1.fastq and input.2.fastq files. +The second command will create input.R1.fastq and input.R2.fastq files. + +=head1 OPTIONS + +=over 8 + +=item B<-i>, B<--file> or B<--input> + +STRING: Input fastq file that will be read. + +=item B<--os1>, B<--output_suffix1> + +STRING: Suffix to add to the output file 1. By default 1 + +=item B<--os2>, B<--output_suffix2> + +STRING: Suffix to add to the output file 2. By default 2. + +=item B<-gz> or B<--gzip> + +Bolean: The output will be compressed using pigz. + +=item B<--thread> + +Integer: The number of thread used when running pigz. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_fastq_guessMyFormat.pl b/bin/gaas_fastq_guessMyFormat.pl new file mode 100755 index 000000000..15358d406 --- /dev/null +++ b/bin/gaas_fastq_guessMyFormat.pl @@ -0,0 +1,351 @@ +#!/usr/bin/env perl +# Author: Martin Dahlo / Jacques Dainat + + +use warnings; +use strict; +use Getopt::Std; +use Getopt::Long; +use Pod::Usage; + + +=pod + +Used to detect the format of a fastq file. It has 2 different modes, normal and advanced. + +In the normal mode, it can only differentiate between Sanger/Illumina1.8+ and Solexa/Illumina1.3+/Illumina1.5+. + +In the advanced mode, it will try to pinpoint exactly which scoring system is used. It will look at all quality scores until either + +* There is only one scoring system that matches +* It has been running for more than a specified time +* It reaches the end of the file without either finding a single matching scoring syste, or running out of time before reaching the end of the file. + + +To run the program in normal mode, give it only the name of the fastq file: + +perl scriptname.pl +Ex. +perl scriptname.pl myReads.fq + + +To run it in advanced mode with default timout of 60 seconds, specify -a: + +perl scriptname.pl -a +Ex. +perl scriptname.pl myReads.fq -a + + +To run it in advanced mode with a custom timeout, specify -a and -t: + +perl scriptname.pl -a -t +Ex. +perl scriptname.pl myReads.fq -a -t 600 + + + +The output from the program reports the interval in which qualities were observed (raw ascii numbers, not adjusting for any offsets or phred etc), and the scoring systems matcing these values. + +Ex. + +Time limit reached, observed qualities in range [37,67]. +Possible matches: +Illumina 1.8+;Sanger + + + +Can easily be copy/pasted into any other script and altered to do other things than die when it has determined the format. + +Pseudo code + +* Open the fastq file +* Look at each quality ASCII char and convert it to a number +* Depending on if that number is above or below certain thresholds, + determine the format. + +=cut + + +# REMINDER +my $remain="\n/!\\ We remain that ASCII code used by the differents score system overlap each other. We differentiate them only looking the part non-overlaping. So we assume that the reads analyzed should statistically contains at least one value of the non-overlapping part. +Indeed, fastq file are enough large to contains each value possible of the quality score system.\n(fastqc do the same assumption and never reported any error) +"; +# scoring system definitions, according to http://en.wikipedia.org/wiki/FASTQ_format#Encoding +# Feel free to add more on your own, following the system of the ones already in here. +my %systems = ( 'Sanger', [33,126], + 'Solexa', [59,126], + 'Illumina 1.3+', [64,126], + 'Illumina 1.5+', [66,126], + 'Illumina 1.8+', [35,126]); + +my %infoDisplay = ( 'Sanger' => 'Phred+33 - It could be Sanger or Illumina 1.8+. Sorry this is the only case impossible to really differentiate !\nAnyway, you will be happy to learn that one or the other have exactly the same quality score system (Phred+33)'. + '\nAs the character is not present we could assume that is Sanger !', + 'Solexa' => 'Solexa+64 - It could be Solexa', + 'Illumina 1.3+' => 'Phred+64 - It could be Illumina 1.3+', + 'Illumina 1.5+' => 'Phred+64 - It could be Illumina 1.5+', + 'Illumina 1.8+' => "Phred+33 - It could be Sanger or Illumina 1.8+. Sorry this is the only case impossible to really differentiate !\nAnyway, you will be happy to learn that one or the other have exactly the same quality score system (Phred+33)". + "\nWe know that you really want to know exactly which Quality score it is... So, as the character is present we could assume that is Illumina 1.8+ !", + 'last' => "Phred+33 - It could be Sanger or Illumina 1.8+. Sorry this is the only case impossible to really differentiate !\nAnyway, you will be happy to learn that one or the other have exactly the same quality score system (Phred+33)". + "\nWe know that you really want to know exactly which Quality score it is... but there is ASCII value over 41 we absolutly cannot differentiate them abinitio."); + +my $inputFile=undef; +my $opt_help=undef; +my $adv = undef; +my $time = 999999999; + +Getopt::Long::Configure ('bundling'); +if ( !GetOptions ('i|fq|fastq=s' => \$inputFile, + 'a!' => \$adv, + 't=i' => \$time, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 2, + -exitval => 0 } ); +} + +if ((!defined($inputFile)) ){ + pod2usage( { -message => 'at least 1 parameter is mandatory: -i', + -verbose => 1, + -exitval => 1 } ); +} + +# open the files +my $fh; +if( $inputFile =~ /\.gz$/ or $inputFile =~ /\.gzip$/){ + open ($fh, "gunzip -c $inputFile |") or die "gunzip $inputFile $!"; +} +else{ + open ($fh, "<", $inputFile) or die $!; +} + +# in non advance mode +if(!$adv){ + + # initiate + my @line; + my $l; + my $number; + + + # go thorugh the file + while(<$fh>){ + + # if it is the line before the quality line + if($_ =~ /^\+/){ + + $l = <$fh>; # get the quality line + chomp($l); # remove newline and whitespaces + @line = split(//,$l); # divide in chars + + for(my $i = 0; $i <= $#line; $i++){ # for each char + + $number = ord($line[$i]); # get the number represented by the ascii char + + # check if it is sanger or illumina/solexa, based on the ASCII image at http://en.wikipedia.org/wiki/FASTQ_format#Encoding + if($number > 74){ # if solexa/illumina + die "This file looks like Solexa/Illumina1.3+/Illumina1.5+ format. Launch the script in advanced mode in order to know more\n"; # print result to terminal and die + }elsif($number < 59){ # if sanger + die $infoDisplay{'last'}."\n"; # print result (Sanger/illumina1.8+) to terminal and die + } + } + } + } + + die "Unconclusive, could be either Sanger/Illumina1.8+ or Solexa/Illumina1.3+/Illumina1.5+\n"; +} + + + + + + +# if the user wants the advanced mode +if($adv){ + + # initiate + my @line; + my $l; + my $number; + my $max = -100; + my $min = 99999999; + my $start = time(); + + + my $nb_line = `awk 'END {print NR}' $inputFile`; + my $nb_read = $nb_line/4; + my $startP=time; + print "Your file contains $nb_read reads. The analysis could take a while.\n"; + my $nb_read_checked=0; + + # go thorugh the file + while(<$fh>){ + + #Display progression + if ((30 - (time - $startP)) < 0) { + my $done = ($nb_read_checked*100)/$nb_read; + $done = sprintf ('%.0f', $done); + print "Progression : $done % processed.\n"; + $startP= time; + + } + + # if it is the line before the quality line + if($_ =~ /^\+/){ + $nb_read_checked++; + $l = <$fh>; # get the quality line + chomp($l); # remove newline and whitespaces + @line = split(//,$l); # divide in chars + + for(my $i = 0; $i <= $#line; $i++){ # for each char + + $number = ord($line[$i]); # get the number represented by the ascii char + + # check if the new number is larger or smaller than the previous records + if($number < $min){ + + # update min and check how many systems are matching + $min = $number; + check($min, $max, \%systems, \%infoDisplay); + } + if($number > $max){ + + # update max and check how many systems are matching + $max = $number; + check($min, $max, \%systems, \%infoDisplay); + } + + # terminate if time is up + if((time() - $start) >= $time){ + + # print message to screen + die "Time limit reached, observed qualities in range [$min,$max].\nPossible matches:\n".join("\n", check($min, $max, \%systems, \%infoDisplay))."\n".$remain."\n"; + } + } + } + } + + # reached the end of the file without finding a definite answer, without running out of time + die "Reached end of file, observed qualities in range [$min,$max].\nPossible matches:\n".join("\n", check($min, $max, \%systems, \%infoDisplay))."\n".$remain."\n"; + +} + + + + +###subroutines + +# check how many scoring systems are matching the current max min values +sub check{ + + # get arguments + my ($min, $max, $systems, $infoDisplay) = @_; + + # init + my @matching; + + # check available systems + foreach my $key (keys %{$systems}){ + + # is it a match? + if( ($min >= $systems->{$key}[0]) && ($max <= $systems->{$key}[1]) ){ + + # save matching systems + my $messageToDisplay = $infoDisplay->{$key}; + push(@matching, $messageToDisplay); + + } + + } + + + # check if only one system matched + if($#matching == 0){ + + # print message to screen + die "Only one possible match, observed qualities in range [$min,$max]:\n$matching[0]\n"; + + } + + # If still not dtermined + if($#matching >= 1){ + @matching=(); + + + if($min >= $systems->{'Illumina 1.5+'}[0]){ + my $messageToDisplay = $infoDisplay->{'Illumina 1.5+'}; + push(@matching, $messageToDisplay); + } + elsif($min >= $systems->{'Illumina 1.3+'}[0]){ + my $messageToDisplay = $infoDisplay->{'Illumina 1.3+'}; + push(@matching, $messageToDisplay); + } + elsif($min >= $systems->{'Solexa'}[0]){ + my $messageToDisplay = $infoDisplay->{'Solexa'}; + push(@matching, $messageToDisplay); + } + else{ #could be Illumina 1.8+ or Sanger + if($max == $systems->{'Sanger'}[1]){ + my $messageToDisplay = $infoDisplay->{'Sanger'}; + push(@matching, $messageToDisplay); + } + elsif($max == $systems->{'Illumina 1.8+'}[1]){ + my $messageToDisplay = $infoDisplay->{'Illumina 1.8+'}; + push(@matching, $messageToDisplay); + } + else{ + my $messageToDisplay = $infoDisplay->{'last'}; + push(@matching, $messageToDisplay); + } + } + + } + + # return all matching systems + return @matching; +} + +__END__ + + +-a +-t + +=head1 NAME + +Used to detect the format of a fastq file. +Be aware that parse a fastq file could be very long, so think to set a maximum time, no need to check all the file to guess the format. + +=head1 SYNOPSIS + + fastq_FormatDetect.pl -i [-a -t ] + fastq_FormatDetect.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-i>, B<--fq> or B<--fastq> + +STRING: Input fastq file that will be read. + +=item B<-a> + +Advanced mode. Can be used to find exactly which scoring system it is. + +=item B<-t> + +Set the max search time in seconds to be used when using -a. Default is 60. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=cut diff --git a/bin/gaas_fastq_interleave.py b/bin/gaas_fastq_interleave.py new file mode 100755 index 000000000..cf1cb128c --- /dev/null +++ b/bin/gaas_fastq_interleave.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# encoding:utf8 +# authors: Erik Garrison, Sébastien Boisvert +"""This script takes two fastq or fastq.gz files and interleaves them +Usage: + interleave-fasta fasta_file1 fasta_file2 +""" + +import sys + +def interleave(f1, f2): + """Interleaves two (open) fastq files. + """ + while True: + line = f1.readline() + if line.strip() == "": + break + print line.strip() + + for i in xrange(3): + print f1.readline().strip() + + for i in xrange(4): + print f2.readline().strip() + +if __name__ == '__main__': + try: + file1 = sys.argv[1] + file2 = sys.argv[2] + except: + print __doc__ + sys.exit(1) + + if file1[-2:] == "gz": + import gzip + with gzip.open(file1) as f1: + with gzip.open(file2) as f2: + interleave(f1, f2) + else: + with open(file1) as f1: + with open(file2) as f2: + interleave(f1, f2) diff --git a/bin/gaas_fastq_pairfq_lite.pl b/bin/gaas_fastq_pairfq_lite.pl new file mode 100755 index 000000000..4ece1e562 --- /dev/null +++ b/bin/gaas_fastq_pairfq_lite.pl @@ -0,0 +1,901 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Cwd qw(getcwd abs_path); +use File::Basename; +use Getopt::Long; +use Pod::Usage; + +our $VERSION = '0.17.0'; + +my $infile; # input file for 'addinfo', 'splitpairs' and 'makepairs' methods +my $outfile; # output file for 'addinfo' method +my $fread; # file of forward reads for 'splitpairs', 'makepairs' and 'joinpairs' methods +my $rread; # file of forward reads for 'splitpairs', 'makepairs' and 'joinpairs' methods +my $fpread; # file of paired forward reads for 'makepairs' method +my $rpread; # file of paired reverse reads for 'makepairs' method +my $fsread; # file of unpaired forward reads for 'makepairs' method +my $rsread; # file of unpaired reverse reads for 'makepairs' method +my $pairnum; # for the 'addinfo' method +my $uppercase; # for 'addinfo' method +my $stats; # currently, for 'makepairs' option only + +my $version; +my $help; +my $man; +my $script = basename($0, ()); +$script = "pairfq_lite" if $script =~ /^-$|stdin/i; + +if ( !GetOptions( + 'i|infile=s' => \$infile, + 'o|outfile=s' => \$outfile, + 'p|pairnum=i' => \$pairnum, + 'f|forward=s' => \$fread, + 'r|reverse=s' => \$rread, + 'fp|forw_paired=s' => \$fpread, + 'rp|rev_paired=s' => \$rpread, + 'fs|forw_unpaired=s' => \$fsread, + 'rs|rev_unpaired=s' => \$rsread, + 'uc|uppercase' => \$uppercase, + 's|stats' => \$stats, + 'version' => \$version, + 'h|help' => \$help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0 } ); +} +# +# Check @ARGV +# +print $VERSION and exit(0) if $version; + +my $method = shift; +if (!defined $method) { + print "\nERROR: Command line not parsed correctly. Check input.\n\n"; + usage($script); + exit(1); +} + +if ($method eq 'addinfo') { + if (!$pairnum || !$infile || !$outfile) { + print "\nERROR: Command line not parsed correctly. Check input.\n\n"; + addinfo_usage($script); + exit(1); + } + add_pair_info($pairnum, $infile, $outfile, $uppercase); +} +elsif ($method eq 'makepairs') { + if ($infile && $fpread && $rpread && $fsread && $rsread) { + interleaved_to_pairs_and_singles($script, $infile, $fpread, $rpread, $fsread, $rsread, $stats); + } + elsif (!$infile && $fread && $rread && $fpread && $rpread && $fsread && $rsread) { + make_pairs_and_singles($script, $fread, $rread, $fpread, $rpread, $fsread, $rsread, $stats); + } + else { + print "\nERROR: Command line not parsed correctly. Check input.\n\n"; + makepairs_usage($script); + exit(1); + } +} +elsif ($method eq 'joinpairs') { + if (!$fread || !$rread || !$outfile) { + print "\nERROR: Command line not parsed correctly. Check input.\n\n"; + joinpairs_usage($script); + exit(1); + } + pairs_to_interleaved($fread, $rread, $outfile); +} +elsif ($method eq 'splitpairs') { + if (!$infile || !$fread || !$rread) { + print "\nERROR: Command line not parsed correctly. Check input.\n\n"; + splitpairs_usage($script); + exit(1); + } + interleaved_to_pairs($infile, $fread, $rread); +} +else { + print "\nERROR: '$method' is not recognized. See the manual by typing 'perl $script -m',". + " or see https://github.com/sestaton/Pairfq.\n\n"; + exit(1); +} + +exit; +# +# Methods +# +sub add_pair_info { + my ($pairnum, $infile, $outfile, $uppercase) = @_; + + my $pair; + if ($pairnum == 1) { + $pair = "/1"; + } + elsif ($pairnum == 2) { + $pair = "/2"; + } + else { + print "\nERROR: $pairnum is not correct. Must be 1 or 2. Exiting.\n"; + exit(1); + } + + my $fh = get_fh($infile); + my $out = get_outfh($outfile); + + my @aux = undef; + my ($name, $comm, $seq, $qual); + + while (($name, $comm, $seq, $qual) = readfq(\*$fh, \@aux)) { + $seq = uc($seq) if $uppercase; + print $out join "\n", "@".$name.$pair, $seq, "+", "$qual\n" if defined $qual; + print $out join "\n", ">".$name.$pair, "$seq\n" if !defined $qual; + } + close $fh; + close $out; + + exit; +} + +sub make_pairs_and_singles { + my ($script, $fread, $rread, $fpread, $rpread, $fsread, $rsread, $stats) = @_; + + my ($rseqpairs, $rct) = store_pair($rread); + + my @faux = undef; + my ($fname, $fcomm, $fseq, $fqual, $forw_id, $rev_id); + my ($fct, $fpct, $rpct, $pct, $fsct, $rsct, $sct) = (0, 0, 0, 0, 0, 0, 0); + + my $fh = get_fh($fread); + open my $fp, '>', $fpread or die "\nERROR: Could not open file: $fpread\n"; + open my $rp, '>', $rpread or die "\nERROR: Could not open file: $rpread\n"; + open my $fs, '>', $fsread or die "\nERROR: Could not open file: $fsread\n"; + + while (($fname, $fcomm, $fseq, $fqual) = readfq(\*$fh, \@faux)) { + $fct++; + if ($fname =~ /(\/\d)$/) { + $fname =~ s/$1//; + } + elsif (defined $fcomm && $fcomm =~ /^\d/) { + $fcomm =~ s/^\d//; + $fname = mk_key($fname, $fcomm); + } + else { + print "\nERROR: Could not determine FASTA/Q format. ". + "Please see https://github.com/sestaton/Pairfq or the README for supported formats. Exiting.\n\n"; + exit(1); + } + + if ($fname =~ /\|\|/) { + my ($name, $comm); + ($name, $comm) = mk_vec($fname); + $forw_id = $name." 1".$comm if defined $comm; + $forw_id = $name."/1" if !defined $comm; + $rev_id = $name." 2".$comm if defined $comm; + $rev_id = $name."/2" if !defined $comm; + } + + if (exists $rseqpairs->{$fname}) { + $fpct++; + $rpct++; + if (defined $fqual) { + my ($rread, $rqual) = mk_vec($rseqpairs->{$fname}); + if ($fname =~ /\|\|/) { + print $fp join "\n", "@".$forw_id, $fseq, "+", "$fqual\n"; + print $rp join "\n", "@".$rev_id, $rread, "+", "$rqual\n"; + } + else { + print $fp join "\n", "@".$fname."/1", $fseq, "+", "$fqual\n"; + print $rp join "\n", "@".$fname."/2", $rread, "+", "$rqual\n"; + } + } + else { + if ($fname =~ /\|\|/) { + print $fp join "\n", ">".$forw_id, "$fseq\n"; + print $rp join "\n", ">".$rev_id, "$rseqpairs->{$fname}\n"; + } + else { + print $fp join "\n", ">".$fname."/1", "$fseq\n"; + print $rp join "\n", ">".$fname."/2", "$rseqpairs->{$fname}\n"; + } + } + delete $rseqpairs->{$fname}; + } + else { + $fsct++; + if (defined $fqual) { + if ($fname =~ /\|\|/) { + print $fs join "\n", "@".$forw_id, $fseq, "+", "$fqual\n"; + } + else { + print $fs join "\n", "@".$fname."/1", $fseq, "+", "$fqual\n"; + } + } + else { + if ($fname =~ /\|\|/) { + print $fs join "\n", ">".$forw_id, "$fseq\n"; + } + else { + print $fs join "\n", ">".$fname."/1", "$fseq\n"; + } + } + } + undef $forw_id; + undef $rev_id; + } + close $fh; + close $fp; + close $rp; + close $fs; + + open my $rs, '>', $rsread or die "\nERROR: Could not open file: $rsread\n"; + + while (my ($rname_up_unenc, $rseq_up_unenc) = each %$rseqpairs) { + $rsct++; + my ($rname_up, $rcomm_up) = mk_vec($rname_up_unenc); + my ($rseq_up, $rqual_up) = mk_vec($rseq_up_unenc); + + my $rev_id_up .= $rname_up." 2".$rcomm_up if defined $rcomm_up; + + if (defined $rcomm_up && defined $rqual_up) { + print $rs join "\n", "@".$rev_id_up, $rseq_up, "+", "$rqual_up\n"; + } + elsif (defined $rcomm_up && !defined $rqual_up) { + print $rs join "\n", ">".$rev_id_up, "$rseq_up_unenc\n"; + } + elsif (!defined $rcomm_up && defined $rqual_up) { + print $rs join "\n", "@".$rname_up."/2", $rseq_up, "+", "$rqual_up\n"; + } + else { + print $rs join "\n", ">".$rname_up."/2", "$rseq_up_unenc\n"; + } + } + close $rs; + + $pct = $fpct + $rpct; + $sct = $fsct + $rsct; + + if (defined $stats) { + my $maxfn = max(length($fread), length($rread), length($fpread), length($rpread), length($fsread), length($rsread)); + my $offset = $maxfn + 38; + my $date = qx(date); chomp $date; + print "========= $script version : $VERSION (completion time: $date)\n"; + printf "%-${offset}s %s %10d\n", "Total forward reads ($fread)", ":",$fct; + printf "%-${offset}s %s %10d\n", "Total reverse reads ($rread)", ":", $rct; + printf "%-${offset}s %s %10d\n", "Total forward paired reads ($fpread)", ":", $fpct; + printf "%-${offset}s %s %10d\n", "Total reverse paired reads ($rpread)", ":", $rpct; + printf "%-${offset}s %s %10d\n", "Total forward unpaired reads ($fsread)", ":", $fsct; + printf "%-${offset}s %s %10d\n\n", "Total reverse unpaired reads ($rsread)", ":", $rsct; + printf "%-${offset}s %s %10d\n", "Total paired reads", ":", $pct; + printf "%-${offset}s %s %10d\n", "Total unpaired reads", ":", $sct; + } + exit; +} + +sub interleaved_to_pairs_and_singles { + my ($script, $infile, $fpread, $rpread, $fsread, $rsread, $stats) = @_; + + my $fh = get_fh($infile); + open my $fp, '>', $fpread or die "\nERROR: Could not open file: $fpread\n"; + open my $rp, '>', $rpread or die "\nERROR: Could not open file: $rpread\n"; + open my $fs, '>', $fsread or die "\nERROR: Could not open file: $fsread\n"; + open my $rs, '>', $rsread or die "\nERROR: Could not open file: $rsread\n"; + + my @aux = undef; + my ($fct, $rct, $fpct, $rpct, $pct, $fsct, $rsct, $sct, $pair) = (0, 0, 0, 0, 0, 0, 0, 0, 0); + my %singles; + my ($fpairname, $rpairname); + my ($name, $comm, $seq, $qual); + my ($fname, $fcomm, $fseq, $fqual, $rname, $rcomm, $rseq, $rqual); + + while (($name, $comm, $seq, $qual) = readfq(\*$fh, \@aux)) { + if ($name =~ /\/1$/) { + $fct++; + $pair = 1; + ($fname, $fpairname, $fseq, $fqual) = ($name, $name, $seq, $qual); + $fpairname =~ s/\/1//; + $singles{$fname} = { name => $fname, seq => $fseq, qual => $fqual, pair => $pair }; + } + elsif (defined $comm && $comm =~ /^1/) { + $fct++; + $pair = 1; + ($fname, $fpairname, $fseq, $fcomm, $fqual) = ($name, $name, $seq, $comm, $qual); + $singles{$fname} = { name => $fname, seq => $fseq, comm => $fcomm, qual => $fqual, pair => $pair }; + } + elsif ($name =~ /\/2$/) { + $rct++; + $pair = 2; + ($rname, $rpairname, $rseq, $rqual) = ($name, $name, $seq, $qual); + $rpairname =~ s/\/2//; + $singles{$rname} = { name => $rname, seq => $rseq, qual => $rqual, pair => $pair }; + } + elsif (defined $comm && $comm =~ /^2/) { + $rct++; + $pair = 2; + ($rname, $rpairname, $rseq, $rcomm, $rqual) = ($name, $name, $seq, $comm, $qual); + $singles{$rname} = { name => $rname, seq => $rseq, comm => $rcomm, qual => $rqual, pair => $pair }; + } + + next unless defined $fpairname && defined $rpairname; + if ($fpairname eq $rpairname) { + $fpct++; + $rpct++; + say $fp join "\n", ">".$fname, $fseq + if !defined $fqual && !defined $fcomm; + say $fp join "\n", ">".$fname." ".$fcomm, $fseq + if !defined $fqual && defined $fcomm; + say $fp join "\n", "@".$fname, $fseq, "+", $fqual + if defined $fqual && !defined $fcomm; + say $fp join "\n", "@".$fname." ".$fcomm, $fseq, "+", $fqual + if defined $fqual && defined $fcomm; + + say $rp join "\n", ">".$rname, $rseq + if !defined $rqual && !defined $rcomm; + say $rp join "\n", ">".$rname." ".$rcomm, $rseq + if !defined $rqual && defined $rcomm; + say $rp join "\n", "@".$rname, $rseq, "+", $rqual + if defined $rqual && !defined $rcomm; + say $rp join "\n", "@".$rname." ".$rcomm, $rseq, "+", $rqual + if defined $rqual && defined $rcomm; + delete $singles{$fname}; + delete $singles{$rname}; + } + $pair = 0; + } + close $fh; + close $fp; + close $rp; + + for my $id (keys %singles) { + my $sfh; + if ($singles{$id}->{'pair'} == 1) { + $fsct++; + $sfh = $fs; + } + else { + $rsct++; + $sfh = $rs; + } + + say $sfh join "\n", ">".$singles{$id}->{'name'}, $singles{$id}->{'seq'} + if !defined $singles{$id}->{'qual'} && !defined $singles{$id}->{'comm'}; + say $sfh join "\n", ">".$singles{$id}->{'name'}." ".$singles{$id}->{'comm'}, $singles{$id}->{'seq'} + if !defined $singles{$id}->{'qual'} && defined $singles{$id}->{'comm'}; + say $sfh join "\n", "@".$singles{$id}->{'name'}, $singles{$id}->{'seq'}, "+", $singles{$id}->{'qual'} + if defined $singles{$id}->{'qual'} && !defined $singles{$id}->{'comm'}; + say $sfh join "\n", "@".$singles{$id}->{'name'}." ".$singles{$id}->{'comm'}, $singles{$id}->{'seq'}, "+", $singles{$id}->{'qual'} + if defined $singles{$id}->{'qual'} && defined $singles{$id}->{'comm'}; + } + close $fs; + close $rs; + + $pct = $fpct + $rpct; + $sct = $fsct + $rsct; + + if (defined $stats) { + my $maxfn = max(length($infile), length($fpread), length($rpread), length($fsread), length($rsread)); + my $offset = $maxfn + 38; + my $date = qx(date); chomp $date; + print "========= $script version : $VERSION (completion time: $date)\n"; + printf "%-${offset}s %s %10d\n", "Total forward reads ($infile)", ":",$fct; + printf "%-${offset}s %s %10d\n", "Total reverse reads ($infile)", ":", $rct; + printf "%-${offset}s %s %10d\n", "Total forward paired reads ($fpread)", ":", $fpct; + printf "%-${offset}s %s %10d\n", "Total reverse paired reads ($rpread)", ":", $rpct; + printf "%-${offset}s %s %10d\n", "Total forward unpaired reads ($fsread)", ":", $fsct; + printf "%-${offset}s %s %10d\n\n", "Total reverse unpaired reads ($rsread)", ":", $rsct; + printf "%-${offset}s %s %10d\n", "Total paired reads", ":", $pct; + printf "%-${offset}s %s %10d\n", "Total unpaired reads", ":", $sct; + } + exit; +} + +sub pairs_to_interleaved { + my ($forward, $reverse, $outfile) = @_; + + my ($pairs, $ct) = store_pair($forward); + + my $fh = get_fh($reverse); + my $out = get_outfh($outfile); + + my @raux = undef; + my ($rname, $rcomm, $rseq, $rqual, $forw_id, $rev_id, $rname_enc); + + while (($rname, $rcomm, $rseq, $rqual) = readfq(\*$fh, \@raux)) { + if ($rname =~ /(\/\d)$/) { + $rname =~ s/$1//; + } + elsif (defined $rcomm && $rcomm =~ /^\d/) { + $rcomm =~ s/^\d//; + $rname = mk_key($rname, $rcomm); + } + else { + print "\nERROR: Could not determine FastA/Q format. ". + "Please see https://github.com/sestaton/Pairfq or the README for supported formats. Exiting.\n\n"; + exit(1); + } + + if ($rname =~ /\|\|/) { + my ($name, $comm) = mk_vec($rname); + $forw_id = $name." 1".$comm if defined $comm; + $forw_id = $name."/1" if !defined $comm; + $rev_id = $name." 2".$comm if defined $comm; + $rev_id = $name."/2" if !defined $comm; + } + + if (exists $pairs->{$rname}) { + if (defined $rqual) { + my ($seqf, $qualf) = mk_vec($pairs->{$rname}); + if ($rname =~ /\|\|/) { + print $out join "\n", "@".$forw_id, $seqf, "+", "$qualf\n"; + print $out join "\n", "@".$rev_id, $rseq, "+", "$rqual\n"; + } + else { + print $out join "\n", "@".$rname."/1", $seqf, "+", "$qualf\n"; + print $out join "\n", "@".$rname."/2", $rseq, "+", "$rqual\n"; + } + } + else { + if ($rname =~ /\|\|/) { + print $out join "\n", ">".$forw_id, "$pairs->{$rname}\n"; + print $out join "\n", ">".$rev_id, "$rseq\n"; + } + else { + print $out join "\n", ">".$rname."/1", "$pairs->{$rname}\n"; + print $out join "\n", ">".$rname."/2", "$rseq\n"; + } + } + } + } + close $fh; + close $out; + + exit; +} + +sub interleaved_to_pairs { + my ($infile, $forward, $reverse) = @_; + + my $fh = get_fh($infile); + open my $f, '>', $forward or die "\nERROR: Could not open file: $forward\n"; + open my $r, '>', $reverse or die "\nERROR: Could not open file: $reverse\n"; + + my @aux = undef; + my ($name, $comm, $seq, $qual); + + while (($name, $comm, $seq, $qual) = readfq(\*$fh, \@aux)) { + if (defined $comm && $comm =~ /^1/ || $name =~ /\/1$/) { + print $f join "\n", ">".$name, "$seq\n" if !defined $qual && !defined $comm; + print $f join "\n", ">".$name." ".$comm, "$seq\n" if !defined $qual && defined $comm; + print $f join "\n", "@".$name, $seq, "+", "$qual\n" if defined $qual && !defined $comm; + print $f join "\n", "@".$name." ".$comm, $seq, '+', "$qual\n" if defined $qual && defined $comm; + } + elsif (defined $comm && $comm =~ /^2/ || $name =~ /\/2$/) { + print $r join "\n", ">".$name, "$seq\n" if !defined $qual && !defined $comm; + print $r join "\n", ">".$name." ".$comm, "$seq\n" if !defined $qual && defined $comm; + print $r join "\n", "@".$name, $seq, "+", "$qual\n" if defined $qual && !defined $comm; + print $r join "\n", "@".$name." ".$comm, $seq, "+", "$qual\n" if defined $qual && defined $comm; + } + } + close $fh; + close $f; + close $r; + + exit; +} + +sub get_fh { + my ($file) = @_; + + unless ($file =~ /^-$|STDIN/i) { + $file = abs_path($file); + } + + my $fh; + if ($file =~ /\.gz$/) { + open $fh, '-|', 'zcat', $file or die "\nERROR: Could not open file: $file\n"; + } + elsif ($file =~ /\.bz2$/) { + open $fh, '-|', 'bzcat', $file or die "\nERROR: Could not open file: $file\n"; + } + elsif ($file =~ /^-$|STDIN/i) { + open $fh, '<&', \*STDIN or die "\nERROR: Could not open STDIN\n"; + } + else { + open $fh, '<', $file or die "\nERROR: Could not open file: $file\n"; + } + + return $fh; +} + +sub get_outfh { + my ($file) = @_; + + unless ($file =~ /^-$|STDOUT/i) { + $file = abs_path($file); + } + + my $fh; + if ($file =~ /^-$|STDOUT/i) { + open $fh, '>&', \*STDOUT or die "\nERROR: Could not open STDOUT\n"; + } + else { + open $fh, '>', $file or die "\nERROR: Could not open file: $file\n"; + } + + return $fh; +} + +sub store_pair { + my ($file) = @_; + + my $rct = 0; + my %rseqpairs; + my $cwd = getcwd(); + + my @raux = undef; + my ($rname, $rcomm, $rseq, $rqual); + + my $fh = get_fh($file); + + while (($rname, $rcomm, $rseq, $rqual) = readfq(\*$fh, \@raux)) { + $rct++; + if ($rname =~ /(\/\d)$/) { + $rname =~ s/$1//; + } + elsif (defined $rcomm && $rcomm =~ /^\d/) { + $rcomm =~ s/^\d//; + $rname = mk_key($rname, $rcomm); + } + else { + print "\nERROR: Could not determine FASTA/Q format. ". + "Please see https://github.com/sestaton/Pairfq or the README for supported formats. Exiting.\n\n"; + exit(1); + } + + $rseqpairs{$rname} = mk_key($rseq, $rqual) if defined $rqual; + $rseqpairs{$rname} = $rseq if !defined $rqual; + } + close $fh; + return (\%rseqpairs, $rct); +} + +sub readfq { + my ($fh, $aux) = @_; + @$aux = [undef, 0] if (!@$aux); + return if ($aux->[1]); + if (!defined($aux->[0])) { + while (<$fh>) { + chomp; + if (substr($_, 0, 1) eq '>' || substr($_, 0, 1) eq '@') { + $aux->[0] = $_; + last; + } + } + if (!defined($aux->[0])) { + $aux->[1] = 1; + return; + } + } + my ($name, $comm); + defined $_ && do { + ($name, $comm) = /^.(\S+)(?:\s+)(\S+)/ ? ($1, $2) : + /^.(\S+)/ ? ($1, '') : ('', ''); + }; + my $seq = ''; + my $c; + $aux->[0] = undef; + while (<$fh>) { + chomp; + $c = substr($_, 0, 1); + last if ($c eq '>' || $c eq '@' || $c eq '+'); + $seq .= $_; + } + $aux->[0] = $_; + $aux->[1] = 1 if (!defined($aux->[0])); + return ($name, $comm, $seq) if ($c ne '+'); + my $qual = ''; + while (<$fh>) { + chomp; + $qual .= $_; + if (length($qual) >= length($seq)) { + $aux->[0] = undef; + return ($name, $comm, $seq, $qual); + } + } + $aux->[1] = 1; + return ($name, $seq); +} + +sub mk_key { return join "||", @_ } + +sub mk_vec { return split /\|\|/, shift } + +sub max { + my $max = shift; + for (@_) { $max = $_ if $_ > $max } + return $max; +} + +sub usage { + my ($script) = @_; + print STDERR< \$help, + "gb=s" => \$gb, + "outfile|output|o|out|embl=s" => \$outfile)) +{ + pod2usage( { -message => "Failed to parse command line\n$header\n", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! (defined($gb)) ){ + pod2usage( { + -message => "$header\nMissing the --gb argument\n", + -verbose => 0, + -exitval => 1 } ); +} + +## Manage output file +my $embl_out; +if ($outfile) { +open(my $fh, '>', $outfile) or die "Could not open file '$outfile' $!"; + $embl_out= Bio::SeqIO->new(-fh => $fh, -format => 'embl'); +} +else{ + $embl_out = Bio::SeqIO->new(-fh => \*STDOUT, -format => 'embl'); +} + +### Read gb input file. +my $gb_in = Bio::SeqIO->new(-file => $gb, -format => 'genbank'); + + +### MAIN ### + +while( my $seq = $gb_in->next_seq) { + + $embl_out->write_seq($seq); + +} + +__END__ + +=head1 NAME + +gaas_gb2embl.pl + +=head1 DESCRIPTION + +The script take a Genebank file as input, and will translate it in EMBL format. + +=head1 SYNOPSIS + + gaas_gb2embl.pl --gb infile.gb [ -o outfile ] + +=head1 OPTIONS + +=over 8 + +=item B<--gb> + +Input genebank file that will be read + +=item B<-o> , B<--output> , B<--out> , B<--outfile> or B<--embl> + +Output embl file. If no output file is specified, the output will be +written to STDOUT. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_info_BioPerlVersion.sh b/bin/gaas_info_BioPerlVersion.sh new file mode 100644 index 000000000..2aad96366 --- /dev/null +++ b/bin/gaas_info_BioPerlVersion.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +perl -MBio::Root::Version -e 'print "$Bio::Root::Version::VERSION\n"' diff --git a/bin/gaas_info_find_perl_module.sh b/bin/gaas_info_find_perl_module.sh new file mode 100755 index 000000000..060e8924c --- /dev/null +++ b/bin/gaas_info_find_perl_module.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +if [[ $# != 1 ]];then + echo "usage example: ./script Bio::Seq" + exit +fi + +perl -M${1} -le "\$mname=\"${1}.pm\";\$mname=~s#::#/#g;print \"$1 INSTALLED AT \$INC{\$mname}\";" 2>/dev/null || echo "${1} NOT INSTALLED" diff --git a/bin/gaas_install_WA_2_0_X.rb b/bin/gaas_install_WA_2_0_X.rb new file mode 100755 index 000000000..980199d7e --- /dev/null +++ b/bin/gaas_install_WA_2_0_X.rb @@ -0,0 +1,182 @@ +#!/usr/bin/ruby +# == NAME +# build.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# A script to build a new WebApollo installation (version 2.0 and up) +# You only need to do this once - Managing species will then be done directly from web interface +# +# == OPTIONS +# -h,--help:: Show help +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Jacques Dainat, jacques.dainat@nbis.se + +require 'optparse' +require 'ostruct' + +#################################### +### Define modules and classes here + +def tomcat_group + + user_groups = `id` + user_groups.include?("tomcat") + +end + +def parse_apollo_config(file,config) + parsed_content = [] + f = File.open(file,"r") + while (line = f.gets) + if line.include?("url =") + line.gsub!(/"jdbc:postgresql:\/\/localhost\/apollo.*"/, "\"jdbc:postgresql://localhost/#{config[:web_apollo_db]}\"") + elsif line.include?("username =") + line.gsub!(//, "#{config[:pguser]}") + elsif line.include?("password =") + line.gsub!(//, "#{config[:pgpass]}") + end + + parsed_content << line + + end + f.close + parsed_content.compact! + + o = File.new(file, "w+") + o.puts parsed_content + o.close + +end + +#################################################### +### Get the script arguments and open relevant files + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +options.clean = false +opts.on("-n","--name", "=NAME","name of the installation") {|argument| options.name = argument } +opts.on("-c","--[no]clean","Clean up the installation") { options.clean = true } +opts.on("-t","--table","=INTEGER","Codon table used. By default it is the table 1") { |argument| options.codonTable = Integer(argument) } +opts.on("-p","--path","=PATH","Define the path to the WA folder to be installed. By default we are looking at: /big/webapollo/release/live_WA_2") { |argument| options.path = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +raise "You are not member of the tomcat group and thus cannot deploy WebApollo!" unless tomcat_group + +@name = options.name or abort 'No Webapollo installation name provided' + +################### +### Usernames, passwords and locations + +user = ENV['USER'] +home = ENV['HOME'] +build_dir = ENV['APOLLO_BUILD_DIR'] or abort "Environment variable APOLLO_BUILD_DIR not set" +user_db_admin = ENV['WA_DB_ADMIN'] or abort "Environment variable WA_PGUSER not set" +user_db_admin_pw = ENV['WA_DB_ADMIN_PW'] or abort "Environment variable WA_PGPASS not set" +tomcat_apps = "/var/lib/tomcat/webapps" # Deployment location + + + +#Define the codon table to use +unless defined?(options.codonTable).nil? # If define we give the default path + if (options.codonTable<1 || options.codonTable>25) + puts "The --table option expects an integer between 1 and 25" + exit + end + if (options.codonTable>1) #We have to add the information to the otpion file + @addCodonString=options.codonTable + end +end + +#Define path to the WA folder +if (defined?(options.path)).nil? # If not define we give the default path + path="/big/webapollo/release/live_WA_2" +else + path=options.path +end + +config = { + :web_apollo_source => "#{path}",# The source code of webapollo + :web_apollo_build => "#{build_dir}/#{@name}", # The location where this WA project is to be build + + :web_apollo_db => "web_apollo_#{@name}", # Name of the user database for this WA project + :pgpass => "#{user_db_admin_pw}", # PW of the SQL WA admin + :pguser => "#{user_db_admin}" # Username of the SQL WA admin +} + + +### The workflow +if options.clean == true + + puts "Are you really sure to erase the whole webapollo installation ( #{tomcat_apps}/#{@name}, #{config[:web_apollo_build]},DB:#{config[:web_apollo_db]} ) ? [y|n]:" + selection = gets.chomp + if(selection.downcase == "y") + puts "Lets remove everything" + + puts "Cleaning webapollo folder" + system("sudo rm -Rf #{config[:web_apollo_build]}") + puts "Cleaning database" +# system("psql -d template1 -U #{config[:pguser]} -c \"DROP DATABASE IF EXISTS #{config[:web_apollo_db]}\"") + system("sudo su - postgres -c \"dropdb #{config[:web_apollo_db]}\"") + puts "Cleaning tomcat folder" + system("rm -f #{tomcat_apps}/#{@name}.war") + system("sudo rm -Rf #{tomcat_apps}/#{@name}") + puts "Cleaning finished" + else + puts "Fine we let everything as it was." + end +else + + File.directory?(config[:web_apollo_source]) or abort "Could not find the reference folder (expected: #{config[:web_apollo_source]})" + abort "Installation already exist. Use flag '--clean' to remove!" if File.directory?(config[:web_apollo_build]) + + # Create the folder where the data is to be stored + puts "Create folders" + system("mkdir -p #{config[:web_apollo_build]}") + + # Create a copy of the webapollo code for this installation + puts "Create a copy of the webapollo source" + system("cp -R #{config[:web_apollo_source]}/* #{config[:web_apollo_build]}") + + # Copy the template config files into place and configurate it + puts "rename the config file and configure it properly" + system("cp #{config[:web_apollo_build]}/sample-postgres-apollo-config.groovy #{config[:web_apollo_build]}/apollo-config.groovy") + parse_apollo_config("#{config[:web_apollo_build]}/apollo-config.groovy",config) + if(defined?(addCodonString).nil?) + open("#{config[:web_apollo_build]}/apollo-config.groovy", 'a') { |f| + f << "// default apollo settings\n" + f << "apollo {\n" + f << "get_translation_code = #{@addCodonString}\n" + f << "}\n" + } + end + + #Create a new database for this installation + puts "Create the database #{config[:web_apollo_db]}" + system("sudo su - postgres -c \"createdb -E UTF-8 -O #{config[:pguser]} #{config[:web_apollo_db]}\"") + + + # Build the webapollo installation + Dir.chdir(config[:web_apollo_build]) do + system("./apollo deploy") + end + + # Copy the packaged WebApollo installation to the Tomcat folder + system("cp #{config[:web_apollo_build]}/target/apollo*.war #{tomcat_apps}/#{@name}.war") + + #Final message: + puts "Congratulation ! Installation done. You can retrieve your installation on the webpage http://annotation-prod.scilifelab.se:8080/#{@name}" + +end + diff --git a/bin/gaas_interpro2grid.pl b/bin/gaas_interpro2grid.pl new file mode 100755 index 000000000..024aa34e3 --- /dev/null +++ b/bin/gaas_interpro2grid.pl @@ -0,0 +1,295 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = undef; +my $genome = undef; +my $fasta = undef; +my $chunk_size = 10; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( + "chunk_size=s" => \$chunk_size, + "f|fasta=s" => \$fasta, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if (! defined($fasta)){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\nInput fasta file (--fasta)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my $interproscan = "/sw/bioinfo/interproscan-5.3-46.0/interproscan.sh" ; + +if (-f $interproscan ) { + msg ("Found interproscan at $interproscan"); +} else { + die "Could not find Interproscan at the default location." ; +} + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/generic.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +#### HERE YOU READ YOUR FILE TO BE CHUNKED AND RUN ON GRID! #### +#### Example below: Read a FASTA file, split into smaller sub-files +#### and analyse with e.g. blast or whatever via grid-submission + +# .. Read e.g. protein fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); # Stores entries for a given chunk to be printed later +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + push(@seqarray,$seq); + + if ($counter == $chunk_size) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk($outfile,@seqarray); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk($outfile,@seqarray); + +# Push all jobs into the command list + +for (my $i=1;$i<=$chunk_counter;$i++) { + + my $command = $interproscan . " -i $outdir/chunk_$i.fa -d $outdir" ; + push(@cmds,$command); +} + + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.tsv>; + +foreach my $file (@files) { + system("cat $file >> $outdir/interprosan.merged.tsv"); +} + +msg("Finished with InterProScan"); + + +# -------------------- + +sub write_chunk { + my $outfile = shift; + my @seqs = @_; + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_interpro2grid.pl - + +=head1 DESCRIPTION + +Chunk input data to run multiple interpro jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_interpro2grid.pl -f fasta_file -o outdir + gaas_interpro2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the protein fasta file to use as query. + +=item B<--chunk_size> + +The number of sequence by job. If not provided, default size +will be 500. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_junctions2hints.pl b/bin/gaas_junctions2hints.pl new file mode 100755 index 000000000..a74a6ef14 --- /dev/null +++ b/bin/gaas_junctions2hints.pl @@ -0,0 +1,63 @@ +#!/usr/bin/env perl +# A script to convert a junction.bed output from Tophat to a gff format to be used as an intronhints file in Augustus +# If you have several junctions.bed files, concatenate these first +# usage: perl junctions2hints.pl --infile junctions.bed + + +use warnings; +use strict; +use Getopt::Long; + +my $usage = qq{ +Usage: perl junctions2hints.pl --infile junctions.bed +If you have several junctions.bed files, concatenate these first. +A file called intronhints.gff will be created with all your intron-hints, including multiplicities. + + Getting help: + [--help] + + Input file + [--infile] + Name of junctions.bed file + Output file + [--outfile] + Name of the hint file to write +}; + +my $help; +my $infile; +my $outfile; +my %junctions=(); + +GetOptions( + "help" => \$help, + "outfile=s" => \$outfile, + "infile=s" => \$infile); + +# Print Help and exit +if ($help) { + print $usage; + exit(0); +} +open INFILE, $infile or die "$usage"; +open HINTS,">$outfile"; + +while () { + chomp; + unless ($_ =~ /^track/){ + my @bed_line=split(/\t/, $_); + my ($startblock,$endblock)=split(/\,/, $bed_line[10]); + $bed_line[1]=$bed_line[1]+$startblock+1; + $bed_line[2]=$bed_line[2]-$endblock; + my $key = join (':',$bed_line[0],$bed_line[1],$bed_line[2]); + $junctions{$key} +=$bed_line[4]; + } +} +close INFILE; + +foreach my $key (sort keys %junctions){ + my ($scaffold, $start, $stop) = split (/\:/, $key); + print HINTS "$scaffold\ttophat\tintron\t$start\t$stop\t0\t\.\t\.\tmult=$junctions{$key}\;src=E\n"; +} + +close HINTS; diff --git a/bin/gaas_lastz2grid.pl b/bin/gaas_lastz2grid.pl new file mode 100755 index 000000000..efa23a999 --- /dev/null +++ b/bin/gaas_lastz2grid.pl @@ -0,0 +1,345 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = undef; +my $query = undef; +my $target = undef; +my @cmds = (); # Stores the commands to send to farm +my @query_seqs = (); # List of sequences from the query genome +my @target_seqs = (); # List of sequences from the target genome +my @lav_files = (); +my $job_limit = 500; # Maximum number of jobs to allow before aborting +my $quiet; +my $grid="Slurm"; +my $queue=undef; +my $help; + +if ( ! GetOptions( + "query=s" => \$query, + "target=s" => \$target, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "help|h!" => \$help ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($query) or ! defined($target)){ + pod2usage( { + -message => "$header\nAt least 2 parameters are mandatory:\n a query genome file (--query) and a target genome file (--target)\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ("faToNib" , "lastz" , "fastaexplode" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +my $working_dir = getcwd; + +my $query_base = basename($query) ; +my $target_base = basename($target) ; + +my $query_dir = $outdir . "/" . $query_base ; +my $target_dir = $outdir . "/" . $target_base ; + + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir"); + runcmd("mkdir -p $outdir/chain"); + runcmd("mkdir -p $outdir/lav"); + runcmd("mkdir -p $outdir/psl"); + runcmd("mkdir -p $query_dir"); + runcmd("mkdir -p $target_dir"); +} + +# .. set up log file + +my $logfile = "$outdir/lastz.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +msg("Generating size indices for genomes"); + +my $query_size = $outdir . "/" . basename($query) . ".sizes" ; +my $target_size = $outdir . "/" . basename($target) . ".sizes" ; + +runcmd("faSize $query -detailed > $query_size"); +runcmd("faSize $target -detailed > $target_size"); + +msg("Exploding genome sequences for recursive searches"); + +runcmd("fastaexplode -f $query -d $query_dir"); +runcmd("fastaexplode -f $target -d $target_dir"); + +msg("Converting and gathering sequences for LastZ recursive alignments..."); + +msg("Searching $query_dir"); + +opendir (DIR, $query_dir ."/" ) or die "Could not open directory for reading ($query_dir)" ; + +while (my $file = readdir(DIR)) { + next unless ($file =~ m/fa$/ ); + my $nib_file = $file; + $nib_file =~ s/fa$/nib/ ; + system("faToNib $query_dir/$file $query_dir/$nib_file"); + push @query_seqs, $nib_file ; + +} + +closedir(DIR); + +opendir(DIR, $target_dir) or die "Could not open directpry for reading ($target_dir)"; + +msg("Searching $target_dir"); + +while (my $file = readdir(DIR)) { + next unless ($file =~ m/fa$/ ); + my $nib_file = $file; + $nib_file =~ s/fa$/nib/ ; + system("faToNib $target_dir/$file $target_dir/$nib_file"); + push @target_seqs, $nib_file ; +} + +closedir(DIR); + +my $query_jobs = scalar @query_seqs ; +my $target_jobs = scalar @target_seqs ; + +die "Way too many jobs - can't submit that to the grid. Consider limiting your input data" if ($target_jobs * $query_jobs > $job_limit); + +msg("Building commands for GRID..."); + +foreach my $query_seq(@query_seqs) { + + foreach my $target_seq(@target_seqs) { + my $lav_file = $query_seq . "-" . $target_seq . ".lav" ; + push @lav_files , $lav_file ; + push @cmds , "lastz $query_dir/$query_seq $target_dir/$target_seq > $outdir/lav/$lav_file" ; + } + +} + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +msg("### Converting LAV files to PSL ###"); + +foreach my $lav_file (@lav_files) { + + my $psl_file = $lav_file ; + $psl_file =~ s/lav$/psl/ ; + runcmd("lavToPsl $outdir/lav/$lav_file $outdir/psl/$psl_file"); +} + +opendir(DIR, $outdir."/psl") or die "Could not open directory for reading ($outdir)" ; + + +while (my $file = readdir(DIR)) { + + next unless ($file =~ m/psl$/ ); + + my $chainfile = $file; + $chainfile =~ s/psl$/chain/ ; + + runcmd("axtChain -linearGap=loose -psl $outdir/psl/$file $query_dir $target_dir $outdir/chain/$chainfile"); + +} + +closedir(DIR); + +# Merge Chains + +msg("### Merging chains ###"); + +runcmd("chainMergeSort $outdir/chain/*.chain > $outdir/all.chain"); + +runcmd("chainPreNet $outdir/all.chain $query_size $target_size $outdir/all.pre.chain"); + +# Netting chains + +msg("### Netting the chains ###"); + +runcmd("chainNet $outdir/all.pre.chain -minSpace=1 $query_size $target_size stdout /dev/null | netSyntenic stdin $outdir/lastz.net"); + + + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_blat2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple blat jobs in parallel to grid + +=head1 SYNOPSIS + + gaas_blat2grid.pl -f fasta_file --db db_name + gaas_blat2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--query> + +The name of the query genome file. + +=item B<--target> + +The name of the target genome file. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_maker_AEDmeanInGffFile.sh b/bin/gaas_maker_AEDmeanInGffFile.sh new file mode 100755 index 000000000..2db5d4268 --- /dev/null +++ b/bin/gaas_maker_AEDmeanInGffFile.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +############################################################################ +# JD 2014/04 +# What it does ? read gff file to define te number of gene and mRNA and the AED mean +# use: script gffFile +############################################################################ + +# Arguments and Paths +if (( $# !=1 )); then + echo -e "You have to specify a a gff file" + exit +else + gffFile=$1 + if [[ ! -f $gffFile ]];then + echo "The file $gffFile doesn't exist." + exit + fi +fi + +#file arg is required +total=0;nb=0; for i in $(awk '{if($3 == "mRNA") print $9}' $1 | cut -d';' -f4 | cut -d'=' -f2 );do resu=$total; total=$(echo $resu+$i | bc);((nb=nb+1));done; mean=$(echo "scale=4;$total / $nb" | bc) +nbGene=$(awk '{if($3 == "gene") print $0}' $1 | wc -l) +echo -e "\nThere is $nbGene genes for $nb mRNA." +echo -e "\nAED=> Between 1 and 0. The lowest the this value is, the better it is." +echo "AED Total of $nb mRNA is $total" +echo "The AED mean by mRNA is $mean" diff --git a/bin/gaas_maker_AEDplot.pl b/bin/gaas_maker_AEDplot.pl new file mode 100755 index 000000000..8a047e724 --- /dev/null +++ b/bin/gaas_maker_AEDplot.pl @@ -0,0 +1,386 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Statistics::R; +use File::Basename; +use strict; +use Carp; +use Getopt::Long; +use IO::File; +use Pod::Usage; +use List::MoreUtils qw(uniq); +use Bio::Tools::GFF; +use GAAS::GAAS; + +my $header = get_gaas_header(); + +# PARAMETERS - OPTION +my @opt_files; +my $opt_output; +my $opt_breaks; +my $opt_help = 0; +# END PARAMETERS - OPTION + + +# OPTION MANAGMENT +if ( !GetOptions( 'f=s' => \@opt_files, + 'w|window=i' => \$opt_breaks, + 'o|output=s' => \$opt_output, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 2, + -exitval => 0 } ); +} + +if ( ! ($#opt_files >= 0)){ + pod2usage( { + -message => "\nAt least 1 parameter is mandatory:\nInput gff file\n\n". + "You may add as many file you want like: -f file1 -f file2 -f file3\n". + "Many optional parameters are available. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 2 } ); +} + +# Check R is available. If not we try to load it through Module software + +if ( system("R --version 1>/dev/null 2>/dev/null") == 0 ) { + print "R is available. We can continue\n"; +} +else { + die "R no available. We cannot perform any plot\n"; +} + + +############################# +####### Manage options ####### +############################# + +#Choose breaks value: +if(! $opt_breaks){ + $opt_breaks="0.05"; +} + +############################# +####### Manage output ####### +############################# +my $outputPDF; +if (defined($opt_output) ) { + if (-f $opt_output){ + print "Cannot create a directory with the name $opt_output because a file with this name already exists.\n";exit(); + } + $outputPDF=$opt_output."pdf"; +} +else{ + $outputPDF="outputPlot.pdf"; +} + +####################### +# MAIN # +####################### + +########## constant ############# +my $R_command; +my $nbFile=0; +my @listTmpFile; +#Choose a title: +my $title="AED distribution"; + +#PART 1 +################################### +# Read input gff3 files one by one and save value in hash of list +my %hashOfList; +foreach my $file (@opt_files){ + + my $gffio = Bio::Tools::GFF->new(-file => $file, -gff_version => 3); + + # parse file name te remove extension + my ($file1,$dir1,$ext1) = fileparse($file, qr/\.[^.]*/); + + #Parse GFF to get AED information for each mRNA + my $listRef=parseGFF($gffio); + + $hashOfList{$file1}=$listRef; + $gffio->close(); + + print("Parsing $file Finished\n\n"); +} + +#PART 2 +############################### +#print values in file and pre-plot to get highest Y axis. +my $highestYaxis=0; +foreach my $fileName (keys %hashOfList){ + + ######################### + # Write value in tmp files + my $pathAED="tmp_AED_".$fileName.".txt"; + push (@listTmpFile, $pathAED); + # Manage Output + my $ostreamAED = IO::File->new(); + $ostreamAED->open( $pathAED, 'w' ) or + croak( + sprintf( "Can not open '%s' for writing %s", $pathAED, $! ) + ); + foreach my $AEDvalue (@{$hashOfList{$fileName}}){ + print $ostreamAED "$AEDvalue\n"; + } + $ostreamAED->close(); + + ## check using R + my $R = Statistics::R->new() or die "Problem with R : $!\n"; + + #R command + $R->run(qq` + + listValues1=as.matrix(read.table("$pathAED", sep="\t", he=F)) + + # create a break point list formated correctly in purpose + a<-seq(0,0.9999,$opt_breaks) + a[length(a)+1]<-0.99999 + a[length(a)+1]<-1 + breakingPointList<-c(0,a) + + hist1<-hist(listValues1, breaks=breakingPointList, plot=F) + plot(hist1\$mids,hist1\$counts) + #par(new=TRUE) + mylims <- par("usr")` + ); + + #retrieve R values in Perl + my $maxY = $R->get('mylims'); + + if($maxY->[$#$maxY] > $highestYaxis){ + $highestYaxis=$maxY->[$#$maxY]; + } + + # Close the bridge + $R->stopR(); +} + +#PART 3 +############################################ +## Read values from files and plot with correct Y axis +foreach my $fileName (keys %hashOfList){ + $nbFile++; + my $pathAED="tmp_AED_".$fileName.".txt"; # Need to be similar as in #PART 2 + + + ################################## + # create main part of R command # + ################################## + if($nbFile > 1){ # only one plot to do + $R_command.=' + par(new=TRUE)'; + # write it for each file + $R_command.=write_R_command($pathAED, $fileName, $highestYaxis, $nbFile, $outputPDF); + } + else{ + # write it for each file + $R_command.=write_first_R_command($pathAED, $fileName, $highestYaxis, $nbFile, $outputPDF); + } +} + + +#################################### +# Surround main part of R command # +#################################### + + #add header + my $final_R_command='#create output + pdf("'.$outputPDF.'") + # create an empty vector + listlegend <- c();'; + + #add heart + $final_R_command.=$R_command; + + #add footer + $final_R_command .='# Add Title + title(main="'.$title.'") + + #Add Legend + legend("topright", col=(1:'.$nbFile.'), lty=1, c(listlegend))'; + +# plot +plotR($final_R_command); + +# remove temporary files +unlink @listTmpFile; + + ######################### + ######### END ########### + ######################### +####################################################################################################################### + #################### + # methods # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + +sub write_first_R_command{ + my ($pathIn1,$name1,$yAxisValue,$colorNb,$outputPDF)=@_; + + my $command=' + + listValues1=as.matrix(read.table("'.$pathIn1.'", sep="\t", he=F)) + legendInfo=paste("'.$name1.'","(",length(listValues1),"mRNAs )") + listlegend<-c(listlegend,legendInfo) + + # create a break point list formated correctly in purpose + a<-seq(0,0.9999,'.$opt_breaks.') + a[length(a)+1]<-0.99999 + a[length(a)+1]<-1 + breakingPointList<-c(0,a) + + hist1<-hist(listValues1, breaks=breakingPointList, plot=F) + plot(hist1$mids,hist1$counts, type="l", ylim=c(0,'.$yAxisValue.'), col='.$colorNb.', main="", xlab="AED score", ylab="Number of mRNA") + '; + + return $command +} + +#xlab="AED score" +sub write_R_command{ + my ($pathIn1,$name1,$yAxisValue,$colorNb,$outputPDF)=@_; + + my $command=' + + listValues1=as.matrix(read.table("'.$pathIn1.'", sep="\t", he=F)) + legendInfo=paste("'.$name1.'","(",length(listValues1),"mRNAs )") + listlegend<-c(listlegend,legendInfo) + + # create a break point list formated correctly in purpose + a<-seq(0,0.9999,'.$opt_breaks.') + a[length(a)+1]<-0.99999 + a[length(a)+1]<-1 + breakingPointList<-c(0,a) + + hist1<-hist(listValues1, breaks=breakingPointList, plot=F) + plot(hist1$mids,hist1$counts, type="l", ylim=c(0,'.$yAxisValue.'), col='.$colorNb.', main="", yaxt="n", xaxt="n", xlab="", ylab="") + '; + + return $command +} + +sub plotR { + my ($command)=@_; + + my $R = Statistics::R->new() or die "Problem with R : $!\n"; + +#R command +$R->send( + qq` + + $command + + dev.off()` + ); + +# Close the bridge +$R->stopR(); +} + + +# Delete temporary file +#unlink "$pathIn1"; +#unlink "$pathIn2"; + +# method to parse GFF3 files +# take in account features gens,mRNA,tRNA,exon,CDS,three_prime_UTR and five_prime_UTR +sub parseGFF { + my @list; + my($file_in) = @_; + print( "Reading features from $file_in...\n"); + # read file and decompose it + while (my $feature = $file_in->next_feature() ) { + + my $type = $feature->primary_tag(); + + if (lc($type) eq 'mrna'){ + + if(! $feature->has_tag('_AED')){ + print "AED of this feature not found for".$feature->_tag_value('ID')."\n"; + } + else{ + my $AED=$feature->_tag_value('_AED'); + push(@list,$AED); + } + } + } + return \@list; +} + +__END__ + +=head1 NAME + +gaas_maker_AEDplot.pl + +=head1 DESCRIPTION + +The script take one or several gff file(s) as input from Maker and create a Plot of their AED score (Attributes used: "_AED"). - +=head1 SYNOPSIS + + gaas_maker_AEDplot.pl -f infile1.gff[ --output outfile ] + gaas_maker_AEDplot.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-f> + +Input GFF3 file(s) created with maker (with mRNA containing _AED attribute). When you want to use several distinct files do: -f file1 -f file2 -f file3 + +=item B<-w>, B<--window> +The AED score value is between 0 and 1. You can define how precise the plot will by defining the window size that will be taken in account to peform the calcul. The value is 0.05 By default. + +=item B<--output>, B<-o> + +Output name of the pdf file created. If none provided, the default output is ouputPlot.pdf + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_maker_check_progress.sh b/bin/gaas_maker_check_progress.sh new file mode 100755 index 000000000..299267a49 --- /dev/null +++ b/bin/gaas_maker_check_progress.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +############################################################################ +# JD 2014/04 +# What it does ? => script look the index log of maker to resume the contig analyzed and those not +# use: script logfile +############################################################################ + + +# Arguments and Paths +logFile="" +if (( $# !=1 )); then + echo -e "You can specify the maker log file path.\nNevetheless we will try the default path ..." + if [ ! -f genome.maker.output/genome_master_datastore_index.log ];then + echo -e "The default path do not work\nRetry giving a correct path to the log file" + exit + else + logFile=genome.maker.output/genome_master_datastore_index.log + fi +else + logFile=$1 +fi + + +# Light information +#small=$(awk ' { if($3 == "SKIPPED_SMALL") nb++} END {print nb}' $logFile) +small=$(awk ' { if($3 == "SKIPPED_SMALL") print $1}' $logFile | sort -n | uniq | wc -l) +if [[ $small == "" ]];then + small=0 +fi +echo -e "\n$small contigs are too small to be analyzed\n" + +#startedDoublon=$(awk ' { if($3 == "STARTED") nb++} END {print nb}' $logFile) +startedOne=$(awk ' { if($3 == "STARTED") print $1}' $logFile | sort -n | uniq -u | wc -l) +echo "$startedOne contigs has begin to be studied only one time" +startedDoublon=$(awk ' { if($3 == "STARTED") print $1}' $logFile | sort -n | uniq -d | wc -l) +echo "$startedDoublon contigs have been started several times" +((started=$startedOne+$startedDoublon)) +echo "$started contigs has begin to be studied in total" +startedAll=$(awk ' { if($3 == "STARTED") print $1}' $logFile | wc -l) +echo -e "$startedAll STARTED signal in total\n" + +#finished=$(awk ' { if($3 == "FINISHED") nb++} END {print nb}' $logFile) +finished=$(awk ' { if($3 == "FINISHED") print $1}' $logFile | sort -n | uniq -u | wc -l) +finishedDoublon=$(awk ' { if($3 == "FINISHED") print $1}' $logFile | sort -n | uniq -d | wc -l) +echo "$finished contigs have been finished" +echo -e "$finishedDoublon contigs have been finished several times\n" + +#skippedperm=$(awk ' { if($3 == "DIED_SKIPPED_PERMANENT") nb++} END {print nb}' $logFile) +skippedpermAll=$(awk ' { if( $3 ~ /.*DIED.*/ ) print $1}' $logFile | wc -l) +echo "$skippedpermAll DIED_SKIPPED_PERMANENT signal found. The number of retry attempts has been reached" +skippedperm=$(awk ' { if( $3 ~ /.*DIED.*/ ) print $1}' $logFile | sort -n | uniq | wc -l) +echo -e "$skippedperm contigs (doublon removed) have the signal DIED_SKIPPED_PERMANENT. The number of retry attempts has been reached\n" + +declare -A startList +declare -A finishList +declare -A diedList +declare -A allBug +for i in $(awk ' { if($3 == "STARTED") print $1}' $logFile | sort -n | uniq);do + startList["$i"]=1 +done + +for i in $(awk ' { if($3 == "FINISHED") print $1}' $logFile | sort -n | uniq);do + finishList["$i"]=1 +done + +for i in $(awk ' { if($3 ~ /.*DIED.*/) print $1}' $logFile | sort -n | uniq);do + diedList["$i"]=1 + allBug["$i"]=1 +done + +# Verif STARTED signal whithout FINISHED signal +cptNeverFinish=0 +for i in ${!startList[*]};do + if [[ ! ${finishList[$i]-X} == ${finishList[$i]} ]];then +# echo "$i never finished ..." +# grep "$i/" $logFile + ((cptNeverFinish=cptNeverFinish+1)) + allBug["$i"]=1 + fi +done +echo -e "$cptNeverFinish contig whithout FINISHED signal.\n" + + +# Verif FINISHED signal whithout STARTED signal +cptNeverStart=0 +for i in ${!finishList[*]};do + if [[ ! ${startList[$i]-X} == ${startList[$i]} ]];then +# echo "$i never started ..." +# grep "$i/" $logFile + ((cptNeverStart=cptNeverStart+1)) + fi +done +echo -e "$cptNeverStart contig whithout STARTED signal.\n" + +#About All errors detected +echo -e "\n###Finally, ${#allBug[@]} errors has been found in the log file:(See below)###" +num=1 +for i in ${!allBug[*]};do + echo -e "\n$num) $i:" + grep "$i/" $logFile + ((num=num+1)) +done + +# verify in directory the errors +bugcounter=${#allBug[@]} +echo -e "\n### Now, verification of errors found in the directory of these ananlysis ###" +for i in ${!allBug[*]};do + echo -ne "\n$i:" + IFS=$'\n' + for j in $(grep "$i/" $logFile);do #for each line containing the contig bugged + if [[ $j =~ [^[:alnum:]+[:blank:].+[:blank:].+] ]];then #if the line is correctly written. Mean should have, at least, three columns + pathdir=$(echo $j | awk '{print $2}') # get path in the second column + goodPath="$(dirname $logFile)/$pathdir/run.log" # get the root path and concatenate to the path took before + ok="no" + for k in $(grep "FINISHED" $goodPath);do #get all line with FINISHED term + if [[ $(echo $k | grep "final.section") != "" ]];then # verify if one of these line contain final.section + echo -ne "Finaly this analysis was well terminated !" + ok="yes" + ((bugcounter=bugcounter-1)) #We remove this case of the counter because it is not a bug + break + fi + done + if [[ $ok == "no" ]];then #if no line contains Finished or final.section +# Print Pretty + sizeString=$(echo $i | wc -m) + lengthBeforeWritte=25; + nbSpaceToWrite=$((lengthBeforeWritte-sizeString)) + startValue=1 + while [[ $startValue -le $nbSpaceToWrite ]];do + echo -ne "-";((startValue=startValue+1)) + done + echo -ne "It seems that this analysis has never finished" + if [[ $(grep "DIED" $goodPath) != "" ]];then + echo "Indeed the DIED signal is present (see below)" + grep "DIED" $goodPath + fi + + fi + break + fi + done +done + +if (( $bugcounter == 0 ));then + echo -e "\n\nThis job does not contains bug ! Congratulation." +else + echo -e "\n\nWe found $bugcounter errors. Good luck for next ..." +fi + +echo -e "\nBye" diff --git a/bin/gaas_maker_check_progress_deeply.sh b/bin/gaas_maker_check_progress_deeply.sh new file mode 100755 index 000000000..c87a13252 --- /dev/null +++ b/bin/gaas_maker_check_progress_deeply.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +############################################################################ +# JD 2014/04 +# What it does ? => script look the index log of maker to resume the contig analyzed and those not +# use: script logfile +############################################################################ + + +# Arguments and Paths +dir0="" +if (( $# !=1 )); then + echo -e "You can specify the maker datastore path.\nNevetheless we will try the default path ..." + if [ ! -f "genome.maker.output/genome_datastore" ];then + echo -e "The default path do not work\nRetry giving a correct path to the datastore directory" + exit + else + dir0="genome.maker.output/genome_datastore" + fi +else + dir0=$1 +fi + +##Function +# verify in errors in directory + +function testContigState { + pathContig=$1 + goodPath=${pathContig}run.log + ok="no" + if [[ ! $(tail -n1 $goodPath | grep "trnascan" ) == "" ]];then +# echo "nothing annotated in this one" + ((skippedSmall=skippedSmall+1)) + else { +# if [[ $(grep "log.child" $goodPath) != "" ]];then +# echo "This studies have child.log $pathContig" +# fi + + for k in $(grep "FINISHED" $goodPath);do #get all line with FINISHED term + if [[ $(echo $k | grep "final.section") != "" ]];then # verify if one of these line contain final.section + ok="yes" + ((analyzeOk=analyzeOk+1)) + break + fi + done + if [[ $ok == "no" ]];then #if no line contains Finished or final.section + echo "It seems that this analysis has never finished $pathContig" + if [[ $(grep "DIED" $goodPath) != "" ]];then + echo "Indeed the DIED signal is present (see below)" + grep "DIED" $goodPath + ((skippedDied=skippedDied+1)) + else { + echo "/!\\ Wierd case. You have to verify manually if this study was perform correctly." + } + fi + fi + } + fi +} + +# Light information +nbRlevel1=$(ls -l -d ${dir0}*/ |wc -l) +echo "There is $nbRlevel1 directory at level 1" + +skippedSmall=0 +analyzeOk=0 +skippedDied=0 + +counterL2=0 +counterL3=0 +#counterL3cl=0 +counterT=0 +IFS=$'\n' +max=100000 + +for dirLevel2 in $(ls -l -d ${dir0}*/ | awk '{ print $9 }' );do + if [[ $dirLevel2 == "" ]];then + continue + fi + + nbRlevel2=$(ls -l -d ${dirLevel2}*/ | wc -l) + ((counterL2=counterL2+nbRlevel2)) + ((counterT=counterT+nbRlevel2)) + echo "There is $nbRlevel2 directory at level 2 for $dirLevel2" + + for dirLevel3 in $(ls -l -d ${dirLevel2}*/ | awk '{ print $9 }' );do + if [[ $dirLevel3 == "" ]];then + continue + fi + + nbRlevel3=$(ls -l -d $dirLevel3*/ | wc -l) + ((counterL3=counterL3+nbRlevel3)) + for dirFinalLevel in $(ls -l -d ${dirLevel3}*/ | awk '{ print $9 }' );do + testContigState $dirFinalLevel + done +# if (($nbRlevel3 != 1 ));then +# echo " =>There is $nbRlevel3 directory at level3 for $dirLevel3" +# ((counterL3cl=counterL3cl+nbRlevel3-1)) +# ((counterT=counterT+nbRlevel3-1)) +# fi + if (( $counterL3 >= $max ));then + break + fi + + done + + if (( $counterL3 > $max ));then + break + fi + +done + +echo -e "\n#Resume:" +echo -e "In total, we have $nbRlevel1 directory at level 1" +echo -e "In total, we have $counterL2 directory at level 2" +#echo -e "In total, we have $counterL3cl directory at level 3 clean" +echo -e "In total, we have $counterL3 directory at level 3 (correspond to thenimber of Contig studied)" +#echo -e "In total, we have $counterT directory in total" + + +#Resume result // Display them +echo -e "\nAmong $counterL3 Contigs studied we have:" +echo "$skippedSmall contig(s) too small" +echo "$skippedDied contig(s) not studied (Number attemps reaching)" +echo "$analyzeOk contig(s) studied succesfully" + +((totalVerified=skippedSmall+analyzeOk+skippedDied)) +if (( $totalVerified != $counterL3 ));then + ((missingInfo=$counterL3-$totalVerified)) + echo "There is a problem. We didn't understand what happened for $missingInfo" +fi + +echo -e "\nBye" diff --git a/bin/gaas_maker_get_rid_of_contig.pl b/bin/gaas_maker_get_rid_of_contig.pl new file mode 100755 index 000000000..ba9a46d13 --- /dev/null +++ b/bin/gaas_maker_get_rid_of_contig.pl @@ -0,0 +1,183 @@ +#!/usr/bin/env perl + +#LIBRARIES +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; +use IO::File; +use GAAS::GAAS; + +my $header = get_gaas_header(); +#PARAMETERS +my $input_listcontigWrong; +my $input_datastorelog; +my $output_file; +my $delete_contig; +my %contigs; +my %datastores; + +#get arg and PARAMETERS +{ + my ($input_listcontigWrong, $input_datastorelog, $output_file, $delete_contig); + + # Define script options + GetOptions( + 'help|h' => sub { pod2usage( -verbose => 99 , -exitval => 0, -message => "$header\n" )}, + 'output|o=s' => \$output_file, + 'contig-name|f=s' => \$input_listcontigWrong, + 'datastore-name|d=s' => \$input_datastorelog, + 'delete-contig|c=s' => \$delete_contig, + + ) or pod2usage(2); + + pod2usage( "--contig-name must be specified" ) + unless defined $input_listcontigWrong; + + pod2usage( "--datastore-name must be specified" ) + unless defined $input_datastorelog; + + pod2usage( "--delete-contig options must be specified" ) + unless defined $delete_contig; + + main($input_listcontigWrong, $input_datastorelog, $output_file, $delete_contig); +} + +sub main { + my ($input_listcontigWrong, $input_datastorelog, $output_file, $delete_contig) = @_; + + my $ostream = IO::File->new(); + if(defined($output_file)){ + $ostream->open($output_file, 'w' ) or + croak( + sprintf( "Can not open '%s' for reading: %s", $output_file, $! ) ); + } + else{ + $ostream->fdopen( fileno(STDOUT), 'w' ) or + croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); + } +get_list($input_listcontigWrong, $input_datastorelog, $ostream, $delete_contig); + +} + +sub get_list { + + my ($input_listcontigWrong, $input_datastorelog, $ostream, $delete_contig) = @_; + + my $listcontigWrong = IO::File->new(); + if ( defined $input_listcontigWrong ) { + $listcontigWrong->open($input_listcontigWrong, 'r') or croak (sprintf("Can not open '%s' for reading: %s", $input_listcontigWrong, $!)); + } + + my $datastorelog = IO::File->new(); + if ( defined $datastorelog ) { + $datastorelog->open($input_datastorelog, 'r') or croak (sprintf("Can not open '%s' for reading: %s", $input_datastorelog, $!)); + } + + #get list of path where contigs are and delete them + while(my $line =<$listcontigWrong>) { + chomp $line; + if ($line=~/(genome.maker.output\/)(genome_datastore\/\S+\/)/){ + $contigs{$2}=$line; + if ($delete_contig eq 'all') { + system("rm -r $1$2"); + } + } + } + #then recreate a file without lines corresponding to the contigs in output of maker (genome_master_datastore_index.log) + while(my $lineB =<$datastorelog>) { + chomp $lineB; + if ($lineB=~/(genome_datastore\/\S+\/)/){ + if ($delete_contig eq 'all' || $delete_contig eq 'log'){ + if(! exists $contigs{$1}) { + print $ostream $lineB."\n"; + } + } + } + } +} + +1; + +__END__; + +=head1 NAME + +gaas_maker_get_rid_of_contig.pl + +=head1 DESCRIPTION + +Get rid of contigs not processed properly by maker in the log file and in the output folders of maker. +Create a new log file that will need to be renamed as genome_master_datastore_index.log to replace the old one. +Then maker can be rerun with this new log file. + +=head1 SYNOPSIS + +Get rid of contigs not processed properly by maker in the log file and in the output folders of maker. +Create a new log file that will need to be renamed as genome_master_datastore_index.log to replace the old one. +Then maker can be rerun with this new log file. + +gaas_maker_get_rid_of_contig.pl --help + +gaas_maker_get_rid_of_contig.pl --contig-name|-f + +gaas_maker_get_rid_of_contig.pl --output-name|-o + +gaas_maker_get_rid_of_contig.pl --datastore-name|-d + +gaas_maker_get_rid_of_contig.pl --delete-contig|-c log/all + +=head1 OPTIONS + +=over 8 + +=item B<--datastore-name> or B<-d> + +Input datastore log file + +=item B<--contig-name> or B<-f> + +Input file containing the list of wrong contig + +=item B<--delete-contig> or B<-c> + + option will only delete contigs in the log file + option will delete contigs in the log file and contigs' folders + +=item B<--output> or B<-o> + +File output name + +=item B<--help> + +Display the manual page. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Lucile Soler / Jacques Dainat diff --git a/bin/gaas_maker_get_rm_genome.pl b/bin/gaas_maker_get_rm_genome.pl new file mode 100755 index 000000000..9a2144fcf --- /dev/null +++ b/bin/gaas_maker_get_rm_genome.pl @@ -0,0 +1,204 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Cwd; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $dir = getcwd; +my $outfile = "genome.rm.fa"; +my $out_fh = undef; +my $maker_dir = undef; +my $datastore = undef; +my $in = undef; +my $opt_help = 0; + +# OPTION MANAGMENT +if ( !GetOptions( "help|h" => \$opt_help, + "i=s" => \$in, + "outfile|o=s" => \$outfile) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +####################### +### MANAGE OPTIONS #### +####################### + +# MANAGE IN +my @inDir; + +if(! $in){ + # Find the datastore index + my $maker_dir = undef; + + opendir(DIR, $dir) or die "couldn't open $dir: $!\n"; + my @dirList = readdir DIR; + closedir DIR; + + my (@matchedDir) = grep $_ =~ /^.*\.maker\.output$/ , @dirList ; + + foreach my $makerDir (@matchedDir){ + push(@inDir, $makerDir); + } +} +else{ + if (! -d "$in") { + die "The outdirectory $in doesn't exist.\n"; + } + else{ + push(@inDir, $in); + } +} + +# MANAGE OUT +if (-f $outfile) { + die "The outfile $outfile already exists, exiting\n"; +} +else{ + open($out_fh, '>', "$outfile") or die "Could not open file 'outfile' $!"; +} + +# MESSAGES +my $nbDir=$#inDir+1; +if ($nbDir == 0){die "There seems to be no maker output directory here, exiting...\n";} +print "We found $nbDir maker output directorie(s):\n"; +foreach my $makerDir (@inDir){ + print "\t+$makerDir\n"; +} +if ($nbDir > 1 ){print "Results will be merged together !\n";} + + ##################### + # MAIN # + ##################### + +############################# +# Read the genome_datastore # +############################# + +foreach my $makerDir (@inDir){ + my $prefix = $makerDir; + $prefix =~ s/\.maker\.output.*//; + my $maker_dir_path = $dir . "/" . $makerDir."/"; + my $datastore = $maker_dir_path.$prefix."_datastore" ; + + if (-d $datastore ) { + print "Found datastore in $makerDir, merging query.masked.fasta files now...\n"; + } else { + die "Could not find datastore index ($datastore), exiting...\n"; + } + + collect_recursive($datastore); + + #Close file_handler opened + close $out_fh; +} + + +sub collect_recursive { + my ($full_path) = @_; + + my ($name,$path,$suffix) = fileparse($full_path,qr/\.[^.]*/); + + if( ! -d $full_path ){ + + ################### + # deal with fasta # + if($name eq "query.masked" and $suffix eq ".fasta"){ + + #print + open(my $fh, '<:encoding(UTF-8)', $full_path) or die "Could not open file '$full_path' $!"; + while (<$fh>) { + print $out_fh $_; + } + close $fh; + } + return; + } + + opendir my $dh, $full_path or die; + while (my $sub = readdir $dh) { + next if $sub eq '.' or $sub eq '..'; + + collect_recursive("$full_path/$sub"); + } + close $dh; + return; +} + + +__END__ + +=head1 NAME + +gaas_maker_get_rm_genome.pl + +=head1 DESCRIPTION + +Must be executed in the folder from which Maker was run and will find the maker output +on its own and create a concatenated masked assembly. + +=head1 SYNOPSIS + + gaas_maker_get_rm_genome.pl -i maker_output_folder [-o GenomeMasked.fa] + +=head1 OPTIONS + +=over 8 + +=item B<-i> + +The path to the input directory. If none given, we assume that the script is launched where Maker was run. So, in that case the script will look for the folder +*.maker.output. + +=item B<--outfile>, B<-o> + +The name of the masked genome file. By default, the name will genome.rm.fa + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_maker_merge_outputs_from_datastore.pl b/bin/gaas_maker_merge_outputs_from_datastore.pl new file mode 100755 index 000000000..8753769bf --- /dev/null +++ b/bin/gaas_maker_merge_outputs_from_datastore.pl @@ -0,0 +1,363 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use File::Copy; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use Try::Tiny; +use Cwd; +use Pod::Usage; +use URI::Escape; +use Getopt::Long qw(:config no_ignore_case bundling); +use Bio::Tools::GFF; +use IO::File; +use File::Basename; +use IPC::Cmd qw[can_run run]; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $output = undef; +my $in = undef; +my $help= 0; + +if ( !GetOptions( + "help|h" => \$help, + "i=s" => \$in, + "output|out|o=s" => \$output)) + +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +####################### +### MANAGE OPTIONS #### +####################### + +# MANAGE IN +my @inDir; +my $dir = getcwd; + +if(! $in){ + # Find the datastore index + my $maker_dir = undef; + + opendir(DIR, $dir) or die "couldn't open $dir: $!\n"; + my @dirList = readdir DIR; + closedir DIR; + + my (@matchedDir) = grep $_ =~ /^.*\.maker\.output$/ , @dirList ; + + foreach my $makerDir (@matchedDir){ + push(@inDir, $makerDir); + } +} +else{ + if (! -d "$in") { + die "The outdirectory $in doesn't exist.\n"; + } + else{ + push(@inDir, $in); + } +} + +# MESSAGES +my $nbDir=$#inDir+1; +if ($nbDir == 0){die "There seems to be no maker output directory here, exiting...\n";} +print "We found $nbDir maker output directorie(s):\n"; +foreach my $makerDir (@inDir){ + print "\t+$makerDir\n"; +} + +#CONSTANT +my $maker_annotation_prefix = "maker_annotation"; +my $maker_mix_prefix = "maker_mix"; + + ##################### + # MAIN # + ##################### + +############################# +# Read the genome_datastore # +############################# +foreach my $makerDir (@inDir){ + print "\nDealing with $makerDir:\n"; + my %file_hds; + my $genomeName = $makerDir; + $genomeName =~ s/\.maker\.output.*//; + my $maker_dir_path = $dir . "/" . $makerDir."/"; + my $datastore = $maker_dir_path.$genomeName."_datastore" ; + +# --------------- check presence datastore ---------------------- + if (-d $datastore ) { + print "Datastore folder found in $makerDir, merging annotations now...\n"; + } else { + die "Could not find datastore index ($datastore), exiting...\n"; + } +# --------------- check output folder ---------------------- + + my $outfolder = undef; + if ($output){ + if ($nbDir == 1){ + $outfolder = $output; + } + else{ + $outfolder = $output."_$genomeName"; + } + } + else{ $outfolder = "maker_output_processed_$genomeName";} + if (-d "$outfolder") { + print "The output directory <$outfolder> already exists, let's see if something is missing inside.\n"; + } + else{ + print "Creating the $outfolder folder\n"; + mkdir $outfolder; + } + +# --------------- GATHERING gff and fasta ---------------------- + if ( ( grep -f, glob "$outfolder/*.fasta") or ( grep -f, glob "$outfolder/*.gff") ){ + print "Output fasta/gff file already exists. We skip the gathering step.\n"; + } + else{ + print "Now collecting gff and fasta files...\n"; + collect_recursive(\%file_hds, $datastore, $outfolder, $genomeName); + + #Close all file_handler opened that are not gff (gff files created by awk) + foreach my $key (keys %file_hds){ + close $file_hds{$key}; + } + #add ##gff-version 3 header to all gff files + opendir(DIR, $outfolder); + my @gff_files = grep(/\.gff$/,readdir(DIR)); + closedir(DIR); + + foreach my $gff_file (@gff_files) { + if($^O =~ "linux"){ + system "sed -i '1s/^/##gff-version 3\\\n/' $outfolder/$gff_file"; + } + else{ + system "sed -i '' '1s/^/##gff-version 3\\\n/' $outfolder/$gff_file"; # Mac syntax + } + } + } + + #-------------------------------------------------Save maker option files------------------------------------------------- + print "Now save a copy of the Maker option files ...\n"; + if (-f "$outfolder/maker_opts.ctl") { + print "A copy of the Maker files already exists in $outfolder/maker_opts.ctl. We skip it.\n"; + } + else{ + if(! $in){ + copy("maker_opts.ctl","$outfolder/maker_opts.ctl") or print "Copy failed: $! $outfolder/maker_opts.ctl\n"; + copy("maker_exe.ctl","$outfolder/maker_exe.ctl") or print "Copy failed: $! $outfolder/maker_exe.ctl\n"; + copy("maker_evm.ctl","$outfolder/maker_evm.ctl") or print "Copy failed: $! $outfolder/maker_evm.ctl\n"; + copy("maker_bopts.ctl","$outfolder/maker_bopts.ctl") or print "Copy failed: $! $outfolder/maker_bopts.ctl\n"; + } + else{ + my ($name,$path,$suffix) = fileparse($in); + copy("$path/maker_opts.ctl","$outfolder/maker_opts.ctl") or print "Copy failed: $! $outfolder/maker_opts.ctl\n"; + copy("$path/maker_exe.ctl","$outfolder/maker_exe.ctl") or print "Copy failed: $! $outfolder/maker_exe.ctl\n"; + copy("$path/maker_evm.ctl","$outfolder/maker_evm.ctl") or print "Copy failed: $! $outfolder/maker_evm.ctl\n"; + copy("$path/maker_bopts.ctl","$outfolder/maker_bopts.ctl") or print "Copy failed: $! $outfolder/maker_bopts.ctl\n"; + } + } + + + ############################################ + # Now manage to split file by kind of data # Split is done on the fly (no data saved in memory) + ############################################ + print "Now protecting the maker_annotation.gff annotation by making it readable only...\n"; + #make the annotation safe + my $annotation="$outfolder/maker_annotation.gff"; + if (-f $annotation) { + system "chmod 444 $annotation"; + } + else{ + print "ERROR: Do not find the $annotation file !\n"; + } + + + #do statistics + my $annotation_stat="$outfolder/maker_annotation_stat.txt"; + if (-f $annotation_stat) { + print "$annotation_stat file already exsits...\n"; + } + else{ + print "Now performing the statistics of the annotation file $annotation...\n"; + my $full_path = can_run('agat_sp_statistics.pl') or print "Cannot launch statistics. agat_sp_statistics.pl script not available\n"; + if ($full_path) { + system "agat_sp_statistics.pl --gff $annotation -o $annotation_stat > $outfolder/maker_annotation_parsing.log"; + } + } + print "All done!\n"; +} + +####################################################################################################################### + #################### + # methods # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + +sub collect_recursive { + my ($file_hds, $full_path, $out, $genomeName) = @_; + + my ($name,$path,$suffix) = fileparse($full_path,qr/\.[^.]*/); + + if( ! -d $full_path ){ + + ################### + # deal with fasta # + if($suffix eq ".fasta"){ + my $key = undef; + my $type = undef; + if($name =~ /([^\.]+)\.transcripts/){ + $key = $1; + $type = "transcripts"; + } + if($name =~ /([^\.]+)\.proteins/){ + $key = $1; + $type = "proteins"; + } + if($name =~ /([^\.]+)\.noncoding/){ + $key = $1; + $type = "noncoding"; + } + if($key){ + my $prot_out_file_name=undef; + if ($key eq 'maker'){ # protein or transcript correspinding to the maker annotation + $prot_out_file_name = "$maker_annotation_prefix.$type.fasta"; + } + else{ + my $source = "maker.$key"; + $prot_out_file_name = "$genomeName.all.$source.$type.fasta"; + } + + my $protein_out_fh=undef; + if( _exists_keys ($file_hds,($prot_out_file_name)) ){ + $protein_out_fh = $file_hds->{$prot_out_file_name}; + } + else{ + open($protein_out_fh, '>', "$out/$prot_out_file_name") or die "Could not open file '$out/$prot_out_file_name' $!"; + $file_hds->{$prot_out_file_name}=$protein_out_fh; + } + + #print + open(my $fh, '<:encoding(UTF-8)', $full_path) or die "Could not open file '$full_path' $!"; + while (<$fh>) { + print $protein_out_fh $_; + } + close $fh; + } + } + + ################ + #deal with gff # + if($suffix eq ".gff"){ + system "awk -F ' ' 'NF==9 {print \$0 >> \"$out/$maker_mix_prefix.gff\"}' $full_path"; + system "awk '{if(\$2 ~ /[a-zA-Z]+/) if(\$2==\"maker\") { print \$0 >> \"$out/$maker_annotation_prefix.gff\" } else { gsub(/:/, \"_\" ,\$2); print \$0 >> \"$out/\"\$2\".gff\" } }' $full_path"; + } + + return; + } + if($name =~ /^theVoid/){ # In the void there is sub results already stored in the up folder. No need to go such deep otherwise we will have duplicates. + return; + } + opendir my $dh, $full_path or die; + while (my $sub = readdir $dh) { + next if $sub eq '.' or $sub eq '..'; + + collect_recursive($file_hds, "$full_path/$sub", $out, $genomeName); + } + close $dh; + return; +} + +sub _exists_keys { + my ($hash, $key, @keys) = @_; + + if (ref $hash eq 'HASH' && exists $hash->{$key}) { + if (@keys) { + return exists_keys($hash->{$key}, @keys); + } + return 1; + } + return ''; +} + +__END__ + +=head1 NAME + +gaas_maker_merge_outputs_from_datastore.pl + +=head1 DESCRIPTION + +The script will look over the datastore folder and subfolders to gather all outputs. + +=head1 SYNOPSIS + + gaas_maker_merge_outputs_from_datastore.pl + gaas_maker_merge_outputs_from_datastore.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-i> + +The path to the input directory. If none given, we assume that the script is launched where Maker was run. So, in that case the script will look for the folder +*.maker.output. + +=item B<-o> or B<--output> + +The name of the output directory. By default the name is annotations + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_maker_merge_outputs_from_index.pl b/bin/gaas_maker_merge_outputs_from_index.pl new file mode 100755 index 000000000..42fb49d2b --- /dev/null +++ b/bin/gaas_maker_merge_outputs_from_index.pl @@ -0,0 +1,159 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use Cwd; + +my $dir = getcwd; + +my $usage = qq{ +perl my_script.pl + Getting help: + [--help] + + Ouput: + [--outdir filename] + The name of the output directory. By default the file is named annotations.gff + + Usage: + Must be executed in the folder from which Maker was run and will find the maker output + on its own and create a concatenated annotation file. + +}; +my $outdir = "annotations"; +my $protein_file = "annotations.proteins.fa"; +my $annotations_file = "annotations.gff"; +my $maker_dir = undef; +my $datastore = undef; +my $quiet; +my $help; + +GetOptions( + "help" => \$help, + "outdir=s" => \$outdir); + +# Print Help and exit +if ($help) { + print $usage; + exit(0); +} + +if (-d "$outdir") { + die "The outdirectory $outdir already exists, exiting\n"; +} +else{ + mkdir $outdir; +} + +# Find the datastore index + +my $maker_dir = undef; + +opendir(DIR, $dir) or die "couldn't open $dir: $!\n"; +my @files = readdir DIR; +closedir DIR; + +my (@matchedDir) = grep $_ =~ /^.*\.maker\.output$/ , @files ; +my $nbDir=$#matchedDir+1; +print "We found $nbDir maker output directorie(s):\n"; +foreach my $j (@matchedDir){ +print "\t+$j\n";} +if ($nbDir == 0){die "There seems to be no maker output directory here, exiting...\n";} +elsif ($nbDir > 1 ){print "Results will be merged together !\n";} + + +foreach my $matched (@matchedDir){ + + $maker_dir = $dir . "/" . $matched ; + my $base = $matched; + $base =~ s/\.maker\.output.*//g ; + $datastore = $matched . "/" . $base . "_master_datastore_index.log" ; + + if (-f $dir . "/" . $datastore ) { + print "Found datastore in $matched, merging annotations now...\n"; + } else { + die "Could not find datastore index ($datastore), exiting...\n"; + } + + # This is one way to open a file... + open (my $IN, '<', $datastore) or die "FATAL: Can't open file: $datastore for reading.\n$!\n"; + + # Streaming the file, line by line + while (<$IN>) { + chomp; # Trims the line, removes the line breaks + my $line = $_; # store the line in a variable + next unless ($line =~ /^.*FINISHED$/) ; # We only want finished contig annotations... + + my ($contig,$location,$status) = split("\t",$line); + + #### + #NOTE: Not all special character will be in URI format. As example the underscore. + + # If the contig includes a dot character, the output file will include a percent character... + if ($contig =~ /^.*\..*$/) { + $contig =~ s/\./\%2E/g ; + } + + # If the contig includes a pipe character, the output file will include a percent character... + if ($contig =~ m/\|/) { + $contig =~ s/\|/\%7C/g ; + } + + my $gff_file = $maker_dir . "/" . $location . $contig . ".gff" ; + my $aa_file = $maker_dir . "/" . $location . $contig . ".maker.proteins.fasta" ; + + if (-f $gff_file ) { + system("cat $gff_file >> $outdir/$annotations_file"); + } + + if (-f $aa_file ) { + system("cat $aa_file >> $outdir/$protein_file"); + } + } + # We should close the file to make sure that the transaction finishes cleanly. + close ($IN); +} + +# Now manage to split file by kind of data +print "Now split file by data type...\n"; +my $splitedData_dir= "$outdir/annotationByType"; +mkdir $splitedData_dir; + +#call split script +my $SplitScript="/projects/scripts/gmod/split_gff_by_source.pl"; +system("$SplitScript","--input","$outdir/$annotations_file","-d","$splitedData_dir"); + + +#convert the gff in gtf +if (-f "${splitedData_dir}/maker.gff"){ + print "Converting Maker file to GTF...\n"; + my $gffreadPath="/sw/bioinfo/cufflinks/cufflinks-2.1.1/gffread"; + system("$gffreadPath","-o","$splitedData_dir/maker.gtf","-T","-F","$splitedData_dir/maker.gff"); +} +else{print "No gff file to convert\n";} +print "All done!\n"; + +# -------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + + diff --git a/bin/gaas_maker_moveResultsSafely.sh b/bin/gaas_maker_moveResultsSafely.sh new file mode 100755 index 000000000..a13434032 --- /dev/null +++ b/bin/gaas_maker_moveResultsSafely.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +############################################################################ +# JD 2014/04 +# What it does ? clean move of maker results +# use: script Directory +############################################################################ + +# Arguments and Paths +logFile="" +if (( $# !=1 )); then + echo -e "You have to specify a new directory ..." + exit +else + dirRes=$1 + if [ ! -d $dirRes ];then + mkdir $dirRes + else + echo -e "The directory already exists !\nDo you Want Overwrite this directory ?\n (0) yes\n (1) no\n (2) Take only things absent in the target directory" + read + case $REPLY in + 0) echo "Let s go to delete the directory" + rm -r $dirRes + mkdir $dirRes + ;; + 1) echo "Ok we keep it ! See you then." + ;; + 2) echo "I will move only directories/Files that are absent in the target directory" + moveifabsent="yes" + ;; + esac + + fi +fi + + +# Directory move +if [ ! -d genome.maker.output ];then + echo "/!\\ folder not found !" +else + if [[ moveifabsent == "yes" ]];then + if [ ! -d $dirRes/genome.maker.output ];then + mv genome.maker.output $dirRes/ + fi + else + mv genome.maker.output $dirRes/ + fi +fi + +if [ ! -d annotation ];then + echo "/!\\ folder not found !" + echo "Suggestion: Did you think to launch the script to perform this directory ?" +else + if [[ moveifabsent == "yes" ]];then + if [ ! -d $dirRes/annotation ];then + mv annotation $dirRes/ + fi + else + mv annotation $dirRes/ + fi +fi + + + +#Files Move +if [ ! -f annotations.gff ];then + echo "/!\\ file not found !" + echo "Suggestion: Did you think to launch the script to perform this file ?" +else + if [[ moveifabsent == "yes" ]];then + if [ ! -f $dirRes/annotations.gff ];then + mv annotations.gff $dirRes/ + fi + else + mv annotations.gff $dirRes/ + fi +fi + +if [ ! -f annotations.proteins.fa ];then + echo "/!\\ file not found !" +else + if [[ moveifabsent == "yes" ]];then + if [ ! -f $dirRes/annotations.proteins.fa ];then + mv annotations.proteins.fa $dirRes/ + fi + else + mv annotations.proteins.fa $dirRes/ + fi +fi + +if [ ! -f maker_opts.ctl ];then + echo "/!\\ file not found !" +else + if [[ moveifabsent == "yes" ]];then + if [ ! -f $dirRes/maker_opts.ctl ];then + cp maker_opts.ctl $dirRes/ + fi + else + cp maker_opts.ctl $dirRes/ + fi + cp maker_opts.ctl $dirRes/ +fi diff --git a/bin/gaas_manage_backup.sh b/bin/gaas_manage_backup.sh new file mode 100755 index 000000000..8cbd78ff2 --- /dev/null +++ b/bin/gaas_manage_backup.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +## jacques.dainat@nbis.se - 2015 +# This script allows to make weekly/monthly/yearly compressed backup. +# It is not completely generalized but can be done easily with some modification. +# In the folder where you want to launch the backup you need a current_backup.log file (last modified by the last backup) and a folder called current_backup containing everything you want to backup + +now=$(date +"%Y-%m-%d") +gap=43200 #equivalent to 12 hours. We need it because rsync and backup script are not launched at the same time in order to check the current rsync made bedore is the one done in time. + +# Move to the working directory. Folder where all the backup process and save has to be done +cd /databases/backup/backup_prod/ + +############### +#### Manage directories needed +weeklyDir="weekly" +monthlyDir="monthly" +yearlyDir="yearly" + +if [ ! -d "$weeklyDir" ]; then + mkdir $weeklyDir +fi +if [ ! -d "$monthlyDir" ]; then + mkdir $monthlyDir +fi +if [ ! -d "$yearlyDir" ]; then + mkdir $yearlyDir +fi + +##### +# INFO +dayInterval=7 +nbWeeklyMax=4 +monthInterval=1 +nbMonthlyMax=11 +yearInterval=1 + +dateFileCurrentBackup=`date -r current_backup.log +"%s"` + + +######## +# METHOD + +prepare_backup_to_store(){ + + # copy backup + mkdir ${now}.backup + cd ${now}.backup + ln -s ../current_backup.log + ln -s ../current_backup + cd .. + #compress it + tar -chzf ${now}.tar.gz ${now}.backup && rm -Rf ${now}.backup + if [[ $? != 0 ]];then date > READMEplease.log ; echo -e "The exit status code of the tar command is: $? \nPlease check why is not 0! It could be unsafe for the backup.\n" > READMEplease.log;fi +} + +#################### +#lastBackup // Handle compare to weekly backup +if [ ! "$(ls -A $weeklyDir)" ]; then # directory empty + prepare_backup_to_store + # Move it in weekly directory + mv ${now}.tar.gz $weeklyDir/ +else # directory not empty + nameFileLastBackup=`ls -lrt $weeklyDir/* | awk '{print $NF}' | tail -n 1` + dateFileLastBackup=`date -r $nameFileLastBackup +"%s"` # date in (number of seconds since the epoch, when the seventies begun, UTC) + diffInSec=`expr $dateFileCurrentBackup - $dateFileLastBackup` + diffInDay=`expr $diffInSec / 86400 + $gap` + + # If diff with last backup is over this determined we copy it + if (( $diffInDay >= $dayInterval ));then + prepare_backup_to_store + # Move it in weekly directory + mv ${now}.tar.gz $weeklyDir/ + + #Now check if we have to remove old backup + nbBackup=`ls -lrt $weeklyDir/* | wc -l` + if(($nbBackup > $nbWeeklyMax));then + nameFileOlderBackup=`ls -lrt $weeklyDir/* | awk '{print $NF}' | head -n 1` + rm $nameFileOlderBackup + fi + fi +fi + +################### +## Handle monthly backup +if [ ! "$(ls -A $monthlyDir)" ]; then # directory empty + if [[ -f $weeklyDir/${now}.tar.gz ]];then #Current backup already managed, we just copy it + cp $weeklyDir/${now}.tar.gz $monthlyDir/ + else # #Current backup not yet managed, we handle it + prepare_backup_to_store + # Move it in montly directory + mv ${now}.tar.gz $monthlyDir/ + fi +else # directory not empty + nameFileLastBackup=`ls -lrt $monthlyDir/* | awk '{print $NF}' | tail -n 1` + dateFileLastBackup=`date -r $nameFileLastBackup +"%s"` # date in (number of seconds since the epoch, when the seventies begun, UTC) + diffInSec=`expr $dateFileCurrentBackup - $dateFileLastBackup + $gap` + diffInMonth=`expr $diffInSec / 2629746` + + # If diff with last backup is over this determined we copy it + if (( $diffInMonth >= $monthInterval ));then + if [[ -f $weeklyDir/${now}.tar.gz ]];then #Current backup already managed, we just copy it + cp $weeklyDir/${now}.tar.gz $monthlyDir/ + else # #Current backup not yet managed, we handle it + prepare_backup_to_store + # Move it in monthly directory + mv ${now}.tar.gz $monthlyDir/ + fi + + #Now check if we have to remove old backup + nbBackup=`ls -lrt $monthlyDir/* | wc -l` + if(($nbBackup > $nbMonthlyMax));then + nameFileOlderBackup=`ls -lrt $monthlyDir/* | awk '{print $NF}' | head -n 1` + rm $nameFileOlderBackup + fi + fi +fi + + +################### +## Handle yearly backup +if [ ! "$(ls -A $yearlyDir)" ]; then # directory empty + if [[ -f $weeklyDir/${now}.tar.gz ]];then #Current backup already managed, we just copy it + cp $weeklyDir/${now}.tar.gz $yearlyDir/ + else # #Current backup not yet managed, we handle it + prepare_backup_to_store + # Move it in montly directory + mv ${now}.tar.gz $yearlyDir/ + fi +else # directory not empty + nameFileLastBackup=`ls -lrt $yearlyDir/* | awk '{print $NF}' | tail -n 1` + dateFileLastBackup=`date -r $nameFileLastBackup +"%s"` # date in (number of seconds since the epoch, when the seventies begun, UTC) + diffInSec=`expr $dateFileCurrentBackup - $dateFileLastBackup + $gap` + diffInYear=`expr $diffInSec / 31556952` + + # If diff with last backup is over this determined we copy it + if (( $diffInYear >= $yearInterval ));then + if [[ -f $weeklyDir/${now}.tar.gz ]];then #Current backup already managed, we just copy it + cp $weeklyDir/${now}.tar.gz $yearlyDir/ + else # #Current backup not yet managed, we handle it + prepare_backup_to_store + # Move it in monthly directory + mv ${now}.tar.gz $yearlyDir/ + fi + fi +fi + diff --git a/bin/gaas_manage_species.rb b/bin/gaas_manage_species.rb new file mode 100755 index 000000000..b057a0ce0 --- /dev/null +++ b/bin/gaas_manage_species.rb @@ -0,0 +1,118 @@ +#!/usr/bin/ruby +# == NAME +# build.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# A script to build a new WebApollo installation based on pre-built template +# +# == OPTIONS +# -h,--help:: Show help +# -s,--species=SPECIES:: Name of the species +# -f,--fasta=FASTA:: Genome sequence +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' + +### Define modules and classes here + +def tomcat_group + + user_groups = `id` + user_groups.include?("tomcat") + +end + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +options.clean = false +opts.on("-s","--species", "=SPECIES","Species name") {|argument| options.species = argument } +opts.on("-f","--fasta", "=FASTA","Fasta file") {|argument| options.fasta = argument } +opts.on("-c","--[no]clean","Clean up project") { options.clean = true } +opts.on("-w","--wa_installation", "=wa_installation_name","path to the WA installation") {|argument| options.wa_installation_name = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +raise "You are not member of the tomcat group and thus cannot deploy WebApollo!" unless tomcat_group + +@species = options.species or abort 'No species name provided' +@organism = options.species.split('_')[0].capitalize + ' ' + options.species.split('_')[-1] +@wa_installation_name = options.wa_installation_name or abort 'Name of the WebApollo installation not provided' +if options.clean != true + @fasta = options.fasta or abort 'No genome sequence provided' +end + +# Custom CSS styles needed for WA +CSS_STRING = ".plus-cigarM {\nbackground-color: green; /* color for plus matches */\n}\n\n.minus-cigarM {\nbackground-color: blue; /* color for minus matches */\n}\n" + +### Usernames, passwords and locations + +user = ENV['USER'] +home = ENV['HOME'] + +build_dir = ENV['APOLLO_BUILD_DIR'] or abort "Environment variable APOLLO_BUILD_DIR not set" +data_dir = ENV['APOLLO_DATA_DIR'] or abort "Environment vairable APOLLO_DATA_DIR not set" + +web_apollo_storage = "#{data_dir}/#{@species}" # Folder tree where data is stored + +config = { + :web_apollo_path => "#{build_dir}/#{@wa_installation_name}", # The location where this WA installation has been built + :tool_dir => "/opt/ucsc", # Location of blat and FaToNib +} + +### The workflow + +if options.clean == true + puts "Are you really sure to remove the species folder ? [y|n]:" + selection = STDIN.gets.chomp + if(selection.downcase == "y") + puts "Cleaning database" + system("#{config[:web_apollo_path]}/docs/web_services/examples/groovy/delete_annotations_from_organism.groovy -destinationurl http://localhost:8080/#{@wa_installation_name} -organismname #{@species}") + + puts "Cleaning webapollo folder" + system("rm -Rf #{web_apollo_storage}") + + puts "Cleaning finished" + else + puts "Fine we let everythong as it was." + end +else + #check if instalation already existing + if File.directory?("#{web_apollo_storage}") + puts "Instalation for species #{@species} already exits !" + exit + else + # Create the folder where the data is to be stored + puts "Create folders" + system("mkdir -p #{web_apollo_storage}") + system("chgrp -R tomcat #{web_apollo_storage}") # Must be owned by tomcat group + + # Load genome assembly + puts "Loading genome assembly" + system("#{config[:web_apollo_path]}/bin/prepare-refseqs.pl --fasta #{@fasta} --out #{web_apollo_storage}") + + + # Create custom CSS style sheet + f = File.new("#{web_apollo_storage}/custom.css","w") + f.puts CSS_STRING + f.close + + # Build Blat database + puts "Build Blat database" + system("#{config[:tool_dir]}/faToTwoBit #{@fasta} #{web_apollo_storage}/blat.2bit") + + end +end + diff --git a/bin/gaas_multiplotAll.R b/bin/gaas_multiplotAll.R new file mode 100644 index 000000000..0d52298a4 --- /dev/null +++ b/bin/gaas_multiplotAll.R @@ -0,0 +1,24 @@ +#!/usr/bin/Rscript + +tiff(filename="multiplotAll.tif", width = 800, height = 800, units = "px", pointsize = 26, compression="lzw") +tab=as.matrix(read.table("MatriceNbSeqAliLengthByAli.csv")) +y=as.numeric(tab[,2]) +x=as.numeric(tab[,3]) +yhist<-hist(y, breaks=seq(0,16400,200), plot=FALSE) +xhist<-hist(x, breaks=seq(4,50,1), plot=FALSE) +top <- max(c(xhist$counts, yhist$counts)) +xrange<- c(4,50) +yrange<- c(0,17000) +nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE) +par(mar=c(4,4,1,1)) +plot(x, y, xlab="Sequence number", ylab="Alignment length", col=rgb(114, 46, 165, maxColorValue = 255)) +par(mar=c(0,4,1,1)) +barplot(xhist$counts, axes=TRUE, ylim=c(0, 800), space=0,col="light green") +par(mar=c(4,0,1,1)) +#title(main = "Alignment length VS sequence number",outer=TRUE, line = -1) +#title(main=paste("B) Alignment length VS sequence number","\n",sep=""),cex.main=1,outer=TRUE, line = -3) +#title(main=paste("\n","of UP1 clusters",sep=""),cex.main=1,outer=TRUE, line = -3) +title(main="A) UPall clusters",cex.main=1,outer=TRUE, line = -3) +barplot(yhist$counts, axes=TRUE, ylim=c(0, 82), xlim=c(0, 400), space=0, horiz=TRUE,col="light blue") +dev.off() + diff --git a/bin/gaas_ncbi_get_genome_tree.pl b/bin/gaas_ncbi_get_genome_tree.pl new file mode 100755 index 000000000..4d8cae145 --- /dev/null +++ b/bin/gaas_ncbi_get_genome_tree.pl @@ -0,0 +1,366 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use Pod::Usage; +use XML::LibXML; +use Data::Dumper; +use LWP::Simple; +use LWP::UserAgent; +use HTTP::Request::Common; +use Try::Tiny; +use Bio::DB::Taxonomy; +use Bio::TreeIO; +use Bio::Tree::Tree; +use Bio::Tree::TreeFunctionsI; +use Bio::DB::EUtilities; +use Bio::SeqIO; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outfile = undef; +my $quiet = undef; +my $message=""; +my $taxid=undef; +my $verbose=undef; +my $list; +my $help; + +if ( !GetOptions( + "help|h" => \$help, + "t|taxid=i" => \$taxid, + "q" => \$quiet, + "v" => \$verbose, +# "outdir=s" => \$outdir, + "o|output|outfile=s" => \$outfile)) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + + +# .. Create output directory + +#runcmd("mkdir -p $outdir"); + +# .. set up log file + +#my $logfile = "$outdir/reference_sequences.log"; +#msg("Writing log to: $logfile"); +#open LOG, '>', $logfile or err("Can't open logfile"); + + +## +my %KINGDOM = (1 => 'Eukaryota', 2 => 'Bacteria', 3 => 'Archaea',4 => 'Viroids', 5 => 'Viruses'); + +my %GROUP = ('Eukaryota' => {1 => 'All', 2 => 'Animals', 3 => 'Fungi', 4 => 'Other', 5 => 'Plants', 6 => 'Protists'}, + 'Bacteria' => {1 => 'All', 2 => 'Acidobacteria', 3 => 'Aquificae', 4 => 'Caldiserica', 5 => 'Chrysiogenetes', 6 => 'Deferribacteres', 7 => 'Dictyoglomi', 8 => 'Elusimicrobia', 9 => 'FCB group', 10 => 'Fusobacteira', 11 => 'Nitrospinae/Tectomicrobia group', + 12 => 'Nitrospirae', 13 => 'PVC group', 14 => 'Proteobacteria', 15 => 'Rhodothermaeota', 16 => 'Spirochaetes', 17 => 'Synergistetes', 18 => 'Terrabacteria group', 19 => 'Thermodesulfobacteria', 20 => 'Thermotogae', 21 => 'environmental samples', 22 => 'unclassified Bacteria'}, + 'Archaea' => {1 => 'All'}, + 'Viroids' => {1 => 'All'}, + 'Viruses' => {1 => 'All'}); + +my %SUBGROUP = ('Animals' => {1 => 'All', 2 => 'Amphibians', 3 => 'Birds', 4 => 'Fishes', 5 => 'Flatworms', 6 => 'Insects', 7 => 'Mammals', 8 => 'Other Animals', 9 => 'Reptiles', 10 => 'Roundworms'}, + 'Fungi' => {1 => 'All', 2 => 'Ascomycetes', 3 => 'Basidiomycetes', 4 => 'Other Fungi'}, + 'Other' => {1 => 'All'}, + 'Plants' => {1 => 'All', 2 => 'Green Algae', 3 => 'Land Plants', 4 => 'Other Plants'}, + 'Protists' => {1 => 'All', 2 => 'Apicomplexans', 3 => 'Kinetoplasts', 4 => 'Other Protists'}); + + +############### +# MANAGE output + +my $log=undef; +if ($outfile) { + open($log, '>', $outfile."_report.txt") or die "Could not open file '$outfile' $!"; +} + + +my $log_tree=undef; +if ($outfile) { + open(my $tree, '>', $outfile."_tree.nhx") or die "Could not open file '$outfile' $!"; + $log_tree = Bio::TreeIO->new(-fh => $tree, -format => 'nhx'); +} +my $screenDisplayTree = Bio::TreeIO->new( + -format => 'nhx', + -fh => \*STDOUT, + ); + +############# + + + +############### +# CREATE QUERY +my $query=""; +if($taxid){ + $query="txid".$taxid."[orgn]"; +} +else{ + ### KINGDOM LEVEL ### + print "Please chose a Kingdom:\n"; + foreach my $key (sort{$a <=> $b} keys %KINGDOM){ + print "$key $KINGDOM{$key}\n"; + } + print "choice:"; + my $resu_kingdom = ; + $resu_kingdom = key_exits($resu_kingdom, %KINGDOM); + + print "Please chose a Group within $KINGDOM{$resu_kingdom}:\n"; + foreach my $key (sort{$a <=> $b} keys %{$GROUP{$KINGDOM{$resu_kingdom} } }){ + print "$key $GROUP{$KINGDOM{$resu_kingdom}}{$key}\n"; + } + print "choice:"; + my $resu_group=; + $resu_group = key_exits($resu_group, %{$GROUP{$KINGDOM{$resu_kingdom} } }); + + #case all in kingdom + if($resu_group == "1"){ + $query=$KINGDOM{$resu_kingdom}."[Organism]"; + } + else{ # we continue + ### GROUP LEVEL ### + print "Please chose a Group within $GROUP{$KINGDOM{$resu_kingdom}}{$resu_group}:\n"; + foreach my $key (sort{$a <=> $b} keys %{$SUBGROUP{$GROUP{$KINGDOM{$resu_kingdom}}{$resu_group} } }){ + print "$key $SUBGROUP{$GROUP{$KINGDOM{$resu_kingdom}}{$resu_group}}{$key}\n"; + } + print "choice:"; + my $resu_subgroup=; + $resu_subgroup = key_exits($resu_subgroup, %{$SUBGROUP{$GROUP{$KINGDOM{$resu_kingdom}}{$resu_group} } }); + + if($resu_subgroup == "1"){ + $query=$KINGDOM{$resu_kingdom}."[Organism] AND ".$GROUP{$KINGDOM{$resu_kingdom}}{$resu_group}."[Organism]"; + } + else{ # we continue + ### SUBGROUP LEVEL ### + $query=$KINGDOM{$resu_kingdom}."[Organism] AND ".$GROUP{$KINGDOM{$resu_kingdom}}{$resu_group}."[Organism] AND ".$SUBGROUP{$GROUP{$KINGDOM{$resu_kingdom}}{$resu_group}}{$resu_subgroup}."[Organism]"; + } + } +} + +############################################ +# fecth the genome database using esearch +# It will give us a list of IDs +msg("### Now fetching ids into the genome database using esearch with the query: $query"); +my $esearch = Bio::DB::EUtilities->new(-eutil => 'esearch', + -email => 'me@nbis.se', + -db => 'genome', + -retmax => [100000], + -term => $query); +my $count = 0; +$count = $esearch->get_count; + +msg("Found " . $count . " hits for " . $query . " in database '" . 'genome db' . "'\n"); + +if ($count == 0){ # Skip if nothing was found + msg("Nothing found"); exit ; +} + +# Go trough the XML response to extract the ids +my $xml_data; +my @id_Genomes; +$esearch->get_Response(-cb => sub { ($xml_data) = @_; } ); + +my $xmldoc = XML::LibXML->load_xml(string => $xml_data); + +my @nodes = $xmldoc->getElementsByLocalName('Id'); +foreach my $node (@nodes){ + my $avalue = $node->textContent; + push @id_Genomes, $avalue; +} + + +############################################ +# link genome database with taxonomy database to fetch the taxid from the id using elink +# It will give us a list of IDs +msg("### Now translating ids into taxids using elink:"); +my @taxid_Genomes; +foreach my $id (@id_Genomes){ + sleep 0.5; #sleep one second. After december 2018 any site (IP address) posting more than 3 requests per second to the E-utilities without an API key will receive an error message. By including an API key, a site can post up to 10 requests per second by default... see https://www.ncbi.nlm.nih.gov/books/NBK25497/ + print "search for id = $id\n" if $verbose; + my $elink = Bio::DB::EUtilities->new(-eutil => 'elink', + -email => 'me@nbis.se', + -dbfrom => 'genome', + -db => 'taxonomy', + -retmax => [100], + -id => $id); + + my $xml_id = $elink->get_Response()->content(); + + try{ + my $xmldoc = XML::LibXML->load_xml(string => $xml_id); + my @nodes = $xmldoc->getElementsByLocalName('Link'); + my $taxid = undef; + + foreach my $node (@nodes){ + my @childnodes = $node->childNodes(); + + foreach my $childnode (@childnodes){ + my $name = $childnode->nodeName; + + if($name eq "Id"){ + $taxid = $childnode->textContent; + msg("id $id has been mapped to taxid $taxid"); + push @taxid_Genomes, $taxid; + last; + } + } + last if($taxid); + } + } + catch{ + warn "caught error: $_" if $verbose; + msg("No taxid found for id $id"); + } +} + +############### +# CREATING TREE +msg("### Now translating taxids into scientific_name using entrez:"); +my $db = Bio::DB::Taxonomy->new(-source => 'entrez'); +my @species_names; +my $speciesTreeReady; +foreach my $taxid (@taxid_Genomes){ + my $taxon = $db->get_taxon(-taxonid => "$taxid"); + my $spName=$taxon->scientific_name; + msg("taxid $taxid = $spName"); + push(@species_names, $spName); +} +msg("### Now creating tree"); +$speciesTreeReady = $db->get_tree(@species_names); +$speciesTreeReady->contract_linear_paths(); +msg( "This is the tree of species that have whole genome sequenced:" ); + +# PRINT THE TREE INTO A VARIABLE +my $tree; +do { + local *STDOUT; + open STDOUT, ">>", \$tree; + $screenDisplayTree->write_tree($speciesTreeReady); +}; +msg($tree); +# print in a file if asked +if($log_tree){ + $log_tree->write_tree($speciesTreeReady); +} + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print $log $line if $log; + print STDERR $line unless $quiet; +} + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +sub key_exits{ + my ($resu_kingdom, %HASH)=@_; + + while ($resu_kingdom){ + foreach my $key (keys %HASH){ + if($resu_kingdom == $key){ + return $key; + } + } + print "Wrong choice, please try again:"; + $resu_kingdom=; + } + + +} + +__END__ +=head1 NAME + +gaas_ncbi_get_genome_tree.pl + +=head1 DESCRIPTION + +The script creates a tree that covers only whole genomes from the genome NCBI database. +The result is written to the specified output file, or to STDOUT. + +=head1 SYNOPSIS + + gaas_ncbi_get_genome_tree.pl [ -o outfile ] + gaas_ncbi_get_genome_tree.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-t> or <--taxid> + +To specify a specific taxid. Allow to focus on a specific part of the tree of life. + +=item B<-v> + +For debugging purpose. + +=item B<-q> + +Quiet to avoid printing the progress on STDOUT + +=item B<-o>, B<--output> or B<--outfile> + +The name of the output file. By default the output is the standard output + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_ncbi_get_reference_data.pl b/bin/gaas_ncbi_get_reference_data.pl new file mode 100755 index 000000000..53c7600c8 --- /dev/null +++ b/bin/gaas_ncbi_get_reference_data.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Long; +use Bio::DB::EUtilities; +use Bio::SeqIO; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use Pod::Usage; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outfile = undef; +my $format = "fasta"; +my $quiet; +my $organisms = undef; +my $dbs = undef; +my $outdir = "tmp"; +my @dbs = ( "nucest" , "protein" ); + +my $list; +my $help; + +if ( !GetOptions( + "help|h" => \$help, + "l|list" => \$list, + "outdir=s" => \$outdir, + "f|format=s" => \$format, + "organisms=s" => \$organisms, + "db|dbs=s" => \$dbs, + "o|output|outfile=s" => \$outfile)) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + + +if ($list) { + my $db_factory = Bio::DB::EUtilities->new(-eutil => 'einfo', -email => 'mymail@foo.bar',); + my $db_list = join("\n\t",$db_factory->get_available_databases); + print "\n", $db_list , "\n"; + exit(0); +} + +if (! defined($organisms) ){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\nInput organisms (-o species1:species2:species3).\n\n". + "Output is optional. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 1 } ); +} + +# .. Create output directory + +runcmd("mkdir -p $outdir"); + +# .. set up log file + +my $logfile = "$outdir/reference_sequences.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +if ($dbs) { + @dbs = split(":",$dbs); +} + +# Iterate over all organisms (can be one..) +foreach my $organism (split(":", $organisms)) { + + my $query_term = join(" ",split("_",$organism)) . "[ORGN]"; + + foreach my $db (@dbs) { + + my $factory = Bio::DB::EUtilities->new(-eutil => 'esearch', + -email => 'me@foo.com', + -db => $db, + -retmax => [10], + -term => $query_term, + -usehistory => 'y'); + + my $count = $factory->get_count; + + msg("Found " . $factory->get_count . " hits for " . $organism . " in database '" . $db . "'\n"); + + next if ($count == 0); # Skip if nothing was found + + my $hist = $factory->next_History || die 'No history data returned'; + print "History returned\n"; + + # note db carries over from above + $factory->set_parameters(-eutil => 'efetch', + -rettype => $format, + -history => $hist); + + my $retry = 0; + my ($retmax, $retstart) = (500,0); + + open (my $out, '>', $organism . "." . $db . ".fa") || die "Can't open file:$!"; + + RETRIEVE_SEQS: + + while ($retstart < $count) { + $factory->set_parameters(-retmax => $retmax, + -rettype => $format, + -retstart => $retstart); + eval{ + $factory->get_Response(-cb => sub {my ($data) = @_; print $out $data} ); + }; + if ($@) { + die "Server error: $@. Try again later" if $retry == 5; + print STDERR "Server error, redo #$retry\n"; + $retry++ && redo RETRIEVE_SEQS; + } + print "Retrieved $retstart"; + $retstart += $retmax; + } + close $out; + print "\n"; + + + } # end databases + +} # end organism + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +=head1 NAME + +ncbi_get_reference_data.pl - +The script allow to recovered information from NCBI databases. +The result is written to the specified output file, or to STDOUT. + +=head1 SYNOPSIS + + ./ncbi_get_reference_data.pl -o species1:species2:species3 [ -o outfile ] + ./ncbi_get_reference_data.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<-l> or B<--list> + +List of all available databases + +=item B<--organisms> + +The names of the species to query data from. Species name format: Genus_species (e.g. Gallus_gallus). When querying several organisms please follow this nomenclature: species1:species2:species3 + +=item B<--db> or B<--dbs> + +The names of the NCBI databases to query for data. Default: nucest, protein (see --list for options). When querying several databases please follow this nomenclature: db1:db2:db3 + +=item B<-f> or B<--format> + +The file format to produce. Not all databases can write all formats! Default: fasta + +=item B<-o>, B<--output> or B<--outfile> + +The name of the output file. By default the output is the standard output + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=cut diff --git a/bin/gaas_ncbi_get_sequence_from_list.pl b/bin/gaas_ncbi_get_sequence_from_list.pl new file mode 100755 index 000000000..524846d0c --- /dev/null +++ b/bin/gaas_ncbi_get_sequence_from_list.pl @@ -0,0 +1,302 @@ +#!/usr/bin/env perl + + +# +# NCBI recommends that users post no more than three URL requests per second +# + +use strict; +use Try::Tiny; +use Getopt::Long; +use Bio::DB::EUtilities; +use Bio::DB::Fasta; +use Bio::SeqIO; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use Pod::Usage; +use XML::LibXML; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $opt_output = undef; +my $col = undef; +my $message=""; +my $list=undef; +my $separator=undef; +my $lineToAvoid=undef; +my $help; +my $quiet = undef; + +if ( !GetOptions( + "help|h" => \$help, + "list|l=s" => \$list, + "line=i" => \$lineToAvoid, + "col=i" => \$col, + "s=s" =>\$separator, + "q" => \$quiet, + "o|output|outfile=s" => \$opt_output)) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! (defined($list)) ){ + pod2usage( { + -message => "$header\nAt least 1 parameters is mandatory (--list).\n", + -verbose => 0, + -exitval => 1 } ); +} +############### +# MANAGE output + +my $log=undef; +if ($opt_output) { + open($log, '>', $opt_output."_log.txt") or die "Could not open file '$opt_output'_log.txt $!"; +} +my $error=undef; +if ($opt_output) { + open($error, '>', $opt_output."_error.txt") or die "Could not open file '$opt_output'_error.txt $!"; +} + +my $outstream; +if ($opt_output) { + $opt_output=~ s/.fasta//g; + $opt_output=~ s/.fa//g; + open($outstream, '>', $opt_output.".fa") or die "Could not open file '$opt_output' $!"; + # $ostream= Bio::SeqIO->new(-fh => $fh, -format => 'Fasta' ); +} +else{ + $outstream = \*STDOUT; +} + +#Manage column with the ID +if (! defined $col){ + $col=0; +} +else{$col=$col -1 ;} + +#Manage line to avoid +if (! defined $lineToAvoid){ + $lineToAvoid=0; +} + +print "The ID will be retrieved into the column ".($col+1)." of the file $list\n"; +print "The first $lineToAvoid lines will be ignored.\n"; + +######################## +### Manage INPUT FILES # +my %list_of_ID; +my $nbID=0; +if (-f $list){ + my $ID_list = IO::File->new("<".$list); + + ############## + #### MAIN #### + + # create hash of ID to remove + my $cpt_line=0; + while ( <$ID_list> ) { + $cpt_line++; + + if($cpt_line > $lineToAvoid){ + + chomp; + if(! $_ =~ /^\s*$/){ + + my @cols; + if (! $separator){ + @cols = split /\s/, $_; + } + else{ + @cols = split /$separator/, $_; + } + + my $id = $cols[$col]; + $id =~ s/[^[:print:]]+//g; + print $id."\n"; + $list_of_ID{$id}++; + $nbID++; + } + } + } +} + +msg( "There is $nbID identifiers, lets try to extract the corresponding sequences\n"); + +##Fetch correct ID +foreach my $ID (keys %list_of_ID){ + + ############################################ + # fecth the genome database using esearch + # It will give us a list of IDs + msg("### Now fetching the correct identifier into the protein database using esearch with the query: $ID\n"); + my $idcorrect=undef; + try{ + my $factory = Bio::DB::EUtilities->new(-eutil => 'esearch', + -email => 'me@foo.com', + -db => 'protein', + -retmax => [100000], + -term => $ID); + my $count = 0; + $count = $factory->get_count; + + if ($count == 0){ # Skip if nothing was found + if ($opt_output) { + print $error "a - No identifier found for $ID\n"; ## => print to log error + } + else { + msg("a - No identifier found for $ID\n"); + } + next; + } + else{ + #msg("We found $count ID for $ID in database 'protein db' \n"); + } + + # Go trough the XML response to extract the ids + my $xml_data; + $factory->get_Response(-cb => sub { ($xml_data) = @_; } ); + + my $xmldoc = XML::LibXML->load_xml(string => $xml_data); + + my @nodes = $xmldoc->getElementsByLocalName('Id'); + + + foreach my $node (@nodes){ + $idcorrect = $node->textContent; + last; + } + sleep(1) + } + catch { + warn "caught error: $_"; # not $@ + }; + + if(! $idcorrect){ + msg("b - No identifier found for $ID\n"); + print $error "b - No identifier found for $ID\n"; ## => print to log error + } + else{ + ##Fetch sequence from correct ID + + ############################################ + # fecth the genome database using esearch + # It will give us a list of IDs + msg("### Now fetching the sequence into the protein database using efetch with the query: $idcorrect\n"); + my $factory = Bio::DB::EUtilities->new(-eutil => 'efetch', + -email => 'me@foo.com', + -db => 'protein', + -retmax => [100000], + -rettype => 'fasta', + -id => $idcorrect); + + my $fasta = $factory->get_Response->content; + print $outstream $fasta; + } +} + +if ($opt_output) { + close $log; +} + +####################################################################################################################### + #################### + # METHODS # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print $log $line if $log; + print STDERR $line unless $quiet; +} + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +sub key_exits{ + my ($resu_kingdom, %HASH)=@_; + + while ($resu_kingdom){ + foreach my $key (keys %HASH){ + if($resu_kingdom == $key){ + return $key; + } + } + print "Wrong choice, please try again:"; + $resu_kingdom=; + } + + +} + + +=head1 NAME + +gaas_ncbi_get_sequence_from_list.pl + +=head1 DESCRIPTION + + +The script allow to retrieve the sequences from the NCBI ID list. +The list should be a column in a file containing one ID per line. +The result is written to the specified output file, or to STDOUT in fasta format. + +=head1 SYNOPSIS + + gaas_ncbi_get_sequence_from_list.pl --list file.txt [ -o outfile ] + gaas_ncbi_get_sequence_from_list.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--list> or or B<-l> + +File containing ID by colomn + +=item B<--line> + +Integer, number of line to avoid. Allow to avoid headers. + +=item B<--col> + +column containing the ID. By default the first column is considered. + +=item B<-q> + +Field separator, by default un-printable character are use as separator (\s). You can define the one you wnat with this option. + +=item B<-q> + +Quiet to avoid any print on STDOUT + +=item B<-o>, B<--output> or B<--outfile> + +The name of the output file. By default the output is the standard output + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=cut diff --git a/bin/gaas_new_species_WA_1_0_X.rb b/bin/gaas_new_species_WA_1_0_X.rb new file mode 100755 index 000000000..7cdcdc3da --- /dev/null +++ b/bin/gaas_new_species_WA_1_0_X.rb @@ -0,0 +1,256 @@ +#!/usr/bin/ruby +# == NAME +# build.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# A script to build a new WebApollo installation based on pre-built template +# +# == OPTIONS +# -h,--help:: Show help +# -s,--species=SPECIES:: Name of the species +# -g,--gff=GFF:: Annotation file to process (optional) +# -f,--fasta=FASTA:: Genome sequence +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' + +### Define modules and classes here + +def tomcat_group + + user_groups = `id` + user_groups.include?("tomcat") + +end + +def parse_config_xml(file,config) + + parsed_content = [] + f = File.open(file,"r") + while (line = f.gets) + + if line.include?("ENTER_DATASTORE_DIRECTORY_HERE") + line.gsub!(/ENTER_DATASTORE_DIRECTORY_HERE/, config[:data_dir]) + elsif line.include?("jdbc:postgresql:web_apollo_users") + line.gsub!(/jdbc:postgresql:web_apollo_users/, "jdbc:postgresql://localhost/#{config[:web_apollo_user_db]}") + elsif line.include?("ENTER_USER_DATABASE_USERNAME") + line.gsub!(/ENTER_USER_DATABASE_USERNAME/, config[:web_apollo_admin]) + elsif line.include?("ENTER_USER_DATABASE_PASSWORD") + line.gsub!(/ENTER_USER_DATABASE_PASSWORD/, config[:web_apollo_admin_pw]) + elsif line.include?("ENTER_PATH_TO_REFSEQS_JSON_FILE") + line.gsub!(/ENTER_PATH_TO_REFSEQS_JSON_FILE/, "#{config[:data_dir]}/refSeqs.json") + elsif line.include?("ENTER_ORGANISM") + line.gsub!(/ENTER_ORGANISM/ , config[:organism] ) + elsif line.include?("/config/translation_tables") + line.gsub!(/ncbi_1_/, "ncbi_#{config[:translation_table]}_") + end + + parsed_content << line + end + f.close + + parsed_content.compact! + + o = File.new(file, "w+") + o.puts parsed_content.join("\n") + o.close + +end + +def parse_hibernate_xml(file,config) + + parsed_content = [] + f = File.open(file,"r") + while (line = f.gets) + if line.include?("ENTER_DATABASE_CONNECTION_URL") + line.gsub!(/ENTER_DATABASE_CONNECTION_URL/, "jdbc:postgresql://localhost/#{config[:chado_db]}") + elsif line.include?("ENTER_USERNAME") + line.gsub!(/ENTER_USERNAME/, "#{config[:chado_db_user]}") + elsif line.include?("ENTER_PASSWORD") + line.gsub!(/ENTER_PASSWORD/, "#{config[:chado_db_user_pw]}") + end + + parsed_content << line + + end + f.close + parsed_content.compact! + + o = File.new(file, "w+") + o.puts parsed_content.join("\n") + o.close + +end + +def parse_blat_xml(file,config) + + + lines = IO.readlines(file) + parsed_lines = [] + + lines.each do |line| + if line.include?("ENTER_PATH_TO_BLAT_BINARY") + line.gsub!(/ENTER_PATH_TO_BLAT_BINARY/, "#{config[:tool_dir]}/blat") + elsif line.include?("ENTER_PATH_FOR_TEMPORARY_DATA") + line.gsub!(/ENTER_PATH_FOR_TEMPORARY_DATA/, '/tmp') + elsif line.include?("ENTER_PATH_TO_BLAT_DATABASE") + line.gsub!(/ENTER_PATH_TO_BLAT_DATABASE/, "#{config[:annotation_dir]}/blat.2bit") + elsif line.include?("ENTER_ANY_BLAT_OPTIONS") + line.gsub!(/ENTER_ANY_BLAT_OPTIONS/, '-minScore=100 -minIdentity=60') + end + + parsed_lines << line.strip + + end + + f = File.new(file,"w+") + f.puts parsed_lines.join("\n") + f.close + +end + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +options.clean = false +opts.on("-s","--species", "=SPECIES","Species name") {|argument| options.species = argument } +opts.on("-g","--gff", "=GFF","Annotation") {|argument| options.gff = argument } +opts.on("-f","--fasta", "=FASTA","Fasta file") {|argument| options.fasta = argument } +opts.on("-c","--[no]clean","Clean up project") { options.clean = true } +opts.on("-t","--translation_table","=TRANSLATION_TABLE","NCBI translation table to use") { |argument| options.translation_table = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +raise "You are not member of the tomcat group and thus cannot deploy WebApollo!" unless tomcat_group + +@species = options.species or abort 'No species name provided' +@organism = options.species.split('_')[0].capitalize + ' ' + options.species.split('_')[-1] +@fasta = options.fasta or abort 'No genome sequence provided' +options.translation_table ? @translation_table = options.translation_table : @translation_table = "1" + +# Custom CSS styles needed for WA +CSS_STRING = ".plus-cigarM {\nbackground-color: green; /* color for plus matches */\n}\n\n.minus-cigarM {\nbackground-color: blue; /* color for minus matches */\n}\n" + +### Usernames, passwords and locations + +user = ENV['USER'] +home = ENV['HOME'] +user_db_admin = ENV['WA_USER_DB_ADMIN'] or abort "Environment variable WA_PGUSER not set" +user_db_admin_pw = ENV['WA_WA_USER_DB_ADMIN_PW'] or abort "Environment variable WA_PGPASS not set" +wa_website_username = ENV['WA_WEBSITE_USERNAME'] or abort "Environment variable WA_WEBSITE_USERNAME not set" +wa_website_password = ENV['WA_WEBSITE_PASSWORD'] or abort "Environment variable WA_WEBSITE_PASSWORD not set" + +build_dir = ENV['APOLLO_BUILD_DIR'] or abort "Environment variable APOLLO_BUILD_DIR not set" +data_dir = ENV['APOLLO_DATA_DIR'] or abort "Environment vairable APOLLO_DATA_DIR not set" + +web_apollo_storage = "#{data_dir}/#{@species}" # Folder tree where data is stored +tomcat_apps = "/var/lib/tomcat/webapps" # Deployment location + +config = { + + :pguser => "#{user_db_admin}", # Username of the SQL WA admin + :pgpass => "#{user_db_admin_pw}", # PW of the SQL WA admin + :web_apollo_admin => "#{wa_website_username}", # Username of admin for WA website + :web_apollo_admin_pw => "#{wa_website_password}", # Password of WA website admin + :web_apollo_user_db => "web_apollo_users_#{@species}", # Name of the user database for this WA project + :web_apollo_source => "#{build_dir}/template_march2015", # The pre-built template install + :web_apollo_build => "#{build_dir}/#{@species}", # The location where this WA project is to be build + :web_apollo_storage => "#{web_apollo_storage}", # Location where data is stored + :annotation_dir => "#{web_apollo_storage}/annotations", # Where annotation data is stored + :translation_table => @translation_table, # NCBI translation table to use + :data_dir => "#{web_apollo_storage}/data", # Where the Jbrowse data is stored + :tool_dir => "/opt/ucsc", # Location of blat and FaToNib + :organism => @organism, # Species name + :chado_db => "nbis_chado", # Name of the chado db to use for storing annotations (optional) + :chado_db_user => "nbis_chado_user", # User with write permissions in chado db + :chado_db_user_pw => "nbis_chado_user" # PW for chado user +} + +### File targets + +config_files = [ "sample_config.properties" , "sample_config.xml" , "sample_blat_config.xml", "sample_hibernate.xml" ] + +### The workflow + +if options.clean == true + system("rm -Rf #{config[:web_apollo_storage]}") + system("rm -Rf #{config[:web_apollo_build]}") + system("rm -R temp") + system("psql -d template1 -U #{config[:pguser]} -c \"DROP DATABASE IF EXISTS #{config[:web_apollo_user_db]}\"") + +else + File.directory?(config[:web_apollo_source]) or raise "No template installation found!" + + # Create the folder where the data is to be stored + system("mkdir -p #{config[:web_apollo_storage]}") + system("mkdir -p #{config[:annotation_dir]}") + system("mkdir -p #{config[:data_dir]}") + system("chgrp -R tomcat #{config[:web_apollo_storage]}") # Must be owned by tomcat group + system("mkdir -p #{config[:web_apollo_build]}") + # Create a temporary folder + system("mkdir -p temp") + # Create a copy of the webapollo template code for this installation + system("mkdir -p #{config[:web_apollo_build]}") + system("cp -R #{config[:web_apollo_source]}/* #{config[:web_apollo_build]}") + + # Create a new database for this genome + system("psql -d template1 -U #{config[:pguser]} -c \"DROP DATABASE IF EXISTS #{config[:web_apollo_user_db]}\"") + system("psql -d template1 -U #{config[:pguser]} -c \"CREATE DATABASE #{config[:web_apollo_user_db]}\"") + # Load the database schema + system("psql -d #{config[:web_apollo_user_db]} -U #{config[:pguser]} -f #{config[:web_apollo_source]}/tools/user/user_database_postgresql.sql") + # Add website admin user + system("#{config[:web_apollo_build]}/tools/user/add_user.pl -D #{config[:web_apollo_user_db]} -U #{config[:pguser]} -P #{config[:pgpass]} -u #{config[:web_apollo_admin]} -p #{config[:web_apollo_admin_pw]}") + # Load genome assembly + system("#{config[:web_apollo_build]}/tools/user/extract_seqids_from_fasta.pl -p Annotations- -i #{@fasta} -o temp/seqids.txt") + system("#{config[:web_apollo_build]}/tools/user/add_tracks.pl -D #{config[:web_apollo_user_db]} -U #{config[:pguser]} -P #{config[:pgpass]} -t temp/seqids.txt") + system("#{config[:web_apollo_build]}/tools/user/set_track_permissions.pl -D #{config[:web_apollo_user_db]} -U #{config[:pguser]} -P #{config[:pgpass]} -u #{config[:web_apollo_admin]} -t temp/seqids.txt -a") + system("#{config[:web_apollo_build]}/bin/prepare-refseqs.pl --fasta #{@fasta} --out #{config[:data_dir]}") + # LOADING ANNOTATIONS ? + + # Add webapollo plugin to the genome browser + system("#{config[:web_apollo_build]}/client/apollo/bin/add-webapollo-plugin.pl -i #{config[:data_dir]}/trackList.json") + + # Update the config files + system("echo jbrowse.data=#{config[:data_dir]} > #{config[:web_apollo_build]}/config.properties") + system("echo datastore.directory=#{config[:annotation_dir]} >> #{config[:web_apollo_build]}/config.properties") + system("echo database.url=jdbc:postgresql://localhost/#{config[:web_apollo_user_db]} >> #{config[:web_apollo_build]}/config.properties") + system("echo database.username=#{config[:pguser]} >> #{config[:web_apollo_build]}/config.properties") + system("echo database.password=#{config[:pgpass]} >> #{config[:web_apollo_build]}/config.properties") + system("echo organism=#{@organism} >> #{config[:web_apollo_build]}/config.properties") + system("echo tracks.refseqs=#{config[:data_dir]}/seq/refSeqs.json >> #{config[:web_apollo_build]}/config.properties ") + system("echo tracks.data=#{config[:data_dir]} >> #{config[:web_apollo_build]}/config.properties") + + parse_config_xml("#{config[:web_apollo_build]}/config.xml",config) + parse_hibernate_xml("#{config[:web_apollo_build]}/hibernate.xml",config) + parse_blat_xml("#{config[:web_apollo_build]}/blat_config.xml",config) + + # Create custom CSS style sheet + f = File.new("#{config[:data_dir]}/custom.css","w") + f.puts CSS_STRING + f.close + + # Build Blat database + system("#{config[:tool_dir]}/faToTwoBit #{@fasta} #{config[:annotation_dir]}/blat.2bit") + + # Build the webapollo war file + Dir.chdir(config[:web_apollo_build]) do + system("./apollo deploy") + end + + # Copy the packaged WebApollo installation to the Tomcat folder + system("cp #{config[:web_apollo_build]}/target/apollo*.war #{tomcat_apps}/#{@species}.war") + +end + diff --git a/bin/gaas_orthomcl_analyzeOG.pl b/bin/gaas_orthomcl_analyzeOG.pl new file mode 100755 index 000000000..22c64750f --- /dev/null +++ b/bin/gaas_orthomcl_analyzeOG.pl @@ -0,0 +1,699 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Scalar::Util qw(looks_like_number); +#use Data::Dumper; +use Bio::DB::Taxonomy; +use Bio::TreeIO; +use Bio::Tree::Tree; +use Bio::Tree::TreeFunctionsI; +use Getopt::Long; +use IO::File; +use Pod::Usage; +use GAAS::GAAS; + +my $header = get_gaas_header(); + +#VARIABLE DECLARATION +my $orthoMCL_file; +my $opt_help; +my $opt_output=undef; +my $outFile="output"; +my $opt_tree; +my $nbProt=0; +my $speciesTreeString; +my $species_opt; my $focusThisTaxid=""; +my $taxid_opt; my @TAXID_LIST; + +my $message = "command line: orthomcl_analyzeOG.pl @ARGV\n\n"; + +# OPTION MANAGMENT +if ( !GetOptions( 'cog|kog|og=s' => \$orthoMCL_file, + 'o|out|output=s' => \$opt_output, + 't|tree=s' => \$opt_tree, + 'taxid=s' => \$taxid_opt, + 's|species=s' => \$species_opt, + 'h|help!' => \$opt_help ) ) + +{ + pod2usage( { -message => "$header".'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! (defined($orthoMCL_file) ) ){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\nInput reference KOG/COG file (--cog).\n\n", + -verbose => 0, + -exitval => 1 } ); +} + +my $outReport; +if ( $opt_output ){ + $outFile=$opt_output; + $outFile=~ s/.gff//g; + open($outReport, '>', $outFile."_report.txt") or die "Could not open file '$outFile' $!"; +} +# print remind of options used +$message="Script launch ".localtime()."\n$message"; +print $message; if ( $opt_output ){print $outReport $message;} +############################################ + +#### create connection to NCBI => useful to create tree / get taxid from species name +my $db = Bio::DB::Taxonomy->new(-source => 'entrez'); +my %taxonList; + +#### Manage others otpion: +my $focusThisSpecies; +if ( defined($species_opt) ){ + + $focusThisTaxid="$species_opt"; + # get taxid + if (! looks_like_number($focusThisTaxid)){ # try to retrive taxid from name + $focusThisSpecies = $focusThisTaxid; + my @results=$db->get_taxonids($focusThisTaxid); + $focusThisTaxid=$results[0]; + }# get scientific name + else{ my $taxon = get_taxon_efficiently($focusThisTaxid); + $focusThisSpecies = $taxon->scientific_name; } + + $message = "You decided to focuse on $focusThisSpecies (taxid: $focusThisTaxid)\n"; + print $message; if ( $opt_output ){print $outReport $message;} +} + +#### Manage output ###### +my $outTree; my $outTreeName=$outFile.'_gene_flux.nhx'; +my $outSpTree; my $outSpTreeName=$outFile.'_species_tree.nhx'; + +if($opt_output){ + $outTree = Bio::TreeIO->new( + -format => 'nhx', + -file => '>'.$outTreeName, + ); + + $outSpTree = Bio::TreeIO->new( + -format => 'nhx', + -file => '>'.$outSpTreeName, + ); +} + +my $screenDisplayTree = Bio::TreeIO->new( + -format => 'nhx', + -fh => \*STDOUT, + ); + +############## +#### MAIN #### +############## + +#### FIRST: PARSE GOs AND DEFINE TAXID LIST +$message = "Step: Parse GO File (Duplicate species by OG removed)\n"; +print $message; if ( $opt_output ){print $outReport $message;} + +my ($hashOG, $taxidListFromOG) = readOGfile($orthoMCL_file, $focusThisTaxid); + +$message = "Parsing finished.\n"; +print $message; if ( $opt_output ){print $outReport $message;} + + +######### get taxid from OG file if none given ########### +if ( ! (defined($taxid_opt) ) ){ + my $nbTaxid= @$taxidListFromOG; + + $message = "No species taxid list has been defined, we will use the $nbTaxid species present among the parsed OGs.\n"; + print $message; if ( $opt_output ){print $outReport $message;} + + @TAXID_LIST=@$taxidListFromOG; +} +else{ @TAXID_LIST=split(/[:,_\-\s\/]+/,$taxid_opt); } + +$message = "\nList of taxid: \n"; +print $message; if ( $opt_output ){print $outReport $message;} + +foreach my $taxid (@TAXID_LIST){ + my $taxon = get_taxon_efficiently($taxid); + my $sci_name = $taxon->scientific_name; + + $message = "taxid $taxid corresponding to species $sci_name\n"; + print $message; if ( $opt_output ){print $outReport $message;} +} + +$message = "\n"; +print $message; +if ( $opt_output ){print $outReport $message;} + +######### SPECIES TREE MANAGEMENT ########### +my $speciesTreeReady; + +$message = "Step: Species tree creation\n"; +print $message; if ( $opt_output ){print $outReport $message;} + +## Tree exist +if( defined ($opt_tree)){ + #Get Tree in File + open(FIC,$opt_tree) or die "Couldn't open the file $opt_tree\n"; + #while( my $line = ) { + # $line =~ s/\n//g; + # $speciesTreeString.=$line; + #} + my $test = Bio::TreeIO->new(-file => $opt_tree); + $speciesTreeReady=$test->next_tree; +} +## Tree must be created +else{ + my $treeFromNCBI; + my @species_names; + foreach my $taxid (@TAXID_LIST){ + my $taxon = $db->get_taxon(-taxonid => "$taxid"); + my $spName=$taxon->scientific_name; +# print "$taxid = $spName\n"; + push(@species_names, $spName); + } + $speciesTreeReady = $db->get_tree(@species_names); +# print $speciesTreeReady, "\n"; + ## Clean Tree + $speciesTreeReady->contract_linear_paths(); +} + +### Print Tree ### +$message = "This is the tree we will used:\n"; +print $message; if ( $opt_output ){print $outReport $message;} + +print $screenDisplayTree->write_tree($speciesTreeReady), "\n"; +if ( $opt_output ){ + $outSpTree->write_tree($speciesTreeReady); + + #print species tree within the report file + open F, "<$outSpTreeName" or die "Could not open file '$outSpTreeName' $!";; + while () { + print $outReport $_; + } + close F; +} + +# create hash taxid => nodes (Leaves) +$message = "\n\nClean Taxid List according to tree provide:\n"; +print $message; if ( $opt_output ){print $outReport $message;} + +if (defined($opt_tree)){#Clean hashAbbTaxid to remove Taxid not existing in species Tree. Only useful when tree is no performed using taxid but comes as external file. + my @copy_TAXID_LIST=@TAXID_LIST; + foreach my $taxid (@copy_TAXID_LIST){ + my @nodes = $speciesTreeReady->find_node(-id => $taxid); + if ($#nodes == -1){ + + $message = "Species $taxid not present in Species Tree. We remove it from analyse.\n"; + print $message; if ( $opt_output ){print $outReport $message;} + + removeValueFromList($taxid, \@TAXID_LIST); + } + elsif ($#nodes >= 1){print "ERROR nb node not expected >1";exit;} + } +} + +## Create hast taxid/node whole Tree +my %hashTaxidNode; +foreach my $taxid (@TAXID_LIST){ + my @nodes = $speciesTreeReady->find_node(-id => $taxid); + $hashTaxidNode{$taxid}=$nodes[0]; +} + +$message = "\nStep clean GO: keep only @TAXID_LIST\n"; +print $message; if ( $opt_output ){print $outReport $message;} + +my $hashOGFiltered = filterOGfileByTaxid($hashOG,\@TAXID_LIST); + +$message = "\nStep: sort GO by species present \n"; +print $message; if ( $opt_output ){print $outReport $message;} + +my $hashOGsorted = sortOGforFlatDisplay($hashOGFiltered); +foreach my $key ( sort { $hashOGsorted->{$a} <=> $hashOGsorted->{$b}} keys %$hashOGsorted){ + $message = sizedPrint($key,100)."$hashOGsorted->{$key}\n"; + print $message; + if ( $opt_output ){print $outReport $message;} +} + +### Deduce appearance +$message = "\nStep: Deduce gene apparence (in ancestors or leaves)"; +print $message; if ( $opt_output ){print $outReport $message;} + +my $hashAppearance=deduceAppearance($hashOGFiltered, $speciesTreeReady, \%hashTaxidNode); +foreach my $taxID (keys %$hashAppearance){ + my $nbAppearance=$#{$hashAppearance->{$taxID}}+1; + + my $taxon = get_taxon_efficiently($taxID); + my $sci_name = $taxon->scientific_name; + + $message = "$nbAppearance genes occured at $taxID node ($sci_name).\n"; + print $message; if ( $opt_output ){print $outReport $message;} + + my @node = $speciesTreeReady->find_node(-id => $taxID); + $node[0]->add_tag_value('geneAppearance', $nbAppearance); +} + +$message = "\nStep: Deduce gene losses\n"; +print $message; if ( $opt_output ){print $outReport $message;} + +### Deduce lost +my %LossIdByAppearance; +foreach my $taxID (keys %$hashAppearance){ + my @node = $speciesTreeReady->find_node(-id => $taxID); + if( ! ($node[0]->is_Leaf )){ # avoid to study appearence on Leaf + my %LossId; + + ## Create list of leaves + my @original_ListLeavesId; my %original_hashNodeAllDescendant; my %original_hashTaxidAllDescendant; + + #print "\nAmong the $nbAppearance gene appeared at taxid $taxID ($sci_name) we have:\n"; + my @cladeNodes = $node[0]->get_all_Descendents(); ## I know all the descendent of Node of appearance + foreach my $NodeFromDesc (@cladeNodes){ #get List leaves + my $NodeFromDescTaxid = $NodeFromDesc->id(); + $original_hashNodeAllDescendant{$NodeFromDesc}=$NodeFromDescTaxid; + $original_hashTaxidAllDescendant{$NodeFromDescTaxid}=$NodeFromDesc; + if( $NodeFromDesc->is_Leaf ){ + push( @original_ListLeavesId, $NodeFromDescTaxid); + } + } + ## Create the corresponding hash of leaves + my %original_hashLeavesId = map { $_ => 1 } @original_ListLeavesId; + ## Create the corresponding list of nodes + my %original_hashLeavesNodes; + foreach my $leafId (@original_ListLeavesId){ + my @node = $speciesTreeReady->find_node(-id => $leafId); + $original_hashLeavesNodes{$leafId}=$node[0]; + } + + + #### loop each list of present taxid + my @ListsTaxidPresent = @{$hashAppearance->{$taxID}}; # all list of Leaves Present + foreach my $oneList (@ListsTaxidPresent){ + my @ListTaxidPresent=@$oneList; # List of taxid that have the gene + my %hashTaxidPresent = map { $_ => 1 } @ListTaxidPresent; # Hash of taxid that have the gene +# print "ListTaxidPresent $#ListTaxidPresent @ListTaxidPresent 5555555555 sizeList $#original_ListLeavesId @original_ListLeavesId\n"; + + + ### Case No loss + if($#ListTaxidPresent == $#original_ListLeavesId){ + next; + } + + ### Case only one loss + elsif($#ListTaxidPresent == $#original_ListLeavesId-1){ + foreach my $taxid (@original_ListLeavesId){ + if(! exists($hashTaxidPresent{$taxid})){ # avoid to study lost on Leaf + $LossId{$taxid}++; + last; + } + } + } + + ### Case several Lost or only one ancestral lost + else{ + ## Create list of Taxid Absent + my @ListTaxidAbsent; + my %hashNodePresent; my %hashNodeAbsent; + foreach my $taxid (@original_ListLeavesId){ + my @node = $speciesTreeReady->find_node(-id => $taxid); + if(! exists($hashTaxidPresent{$taxid})){ + push(@ListTaxidAbsent, $taxid); + $hashNodeAbsent{$node[0]}=$taxid; + } + else{$hashNodePresent{$node[0]}=$taxid;} + } + my %hashTaxidAbsent = map { $_ => 1 } @ListTaxidAbsent; + + ## Analyze + my @listTaxidWithLoss; + my %hashTaxidAdded; + my @copy_listTaxidAbsent = @ListTaxidAbsent; + while ($#copy_listTaxidAbsent > -1){ + my $OneTaxid = shift @copy_listTaxidAbsent; + my $nodeOneTaxid = $original_hashTaxidAllDescendant{$OneTaxid}; +# print "OneTaxid $OneTaxid oneList List Absent: $@oneList = @ListTaxidAbsent\n"; + + ## Study ancestor + my $parentNode = $nodeOneTaxid->ancestor; + my @ancestorCladeNodes = $parentNode->get_all_Descendents(); + my $presentGeneFound="no"; + foreach my $anc_clNode (@ancestorCladeNodes){ + # my $clNodeTaxid = $clNode->id(); + # if(exists(clNodeTaxid)) + # + if(exists($hashNodePresent{$anc_clNode})){ + $presentGeneFound="yes"; + $LossId{$OneTaxid}++; +# print "oui existe save child $OneTaxid\n"; + last; + } + } + if($presentGeneFound eq "no"){ + my $taxidFocused = $original_hashNodeAllDescendant{$parentNode}; + if(! exists($hashTaxidAdded{$taxidFocused})) { + push ( @copy_listTaxidAbsent, $taxidFocused); # Push new ancestrak taxid to test if absent if before + $hashTaxidAdded{$taxidFocused}++; +# print "je push $taxidFocused \n"; + } + } + } + } + } + # save results + my $nbKey = keys %LossId; + if ($nbKey >= 1){ + foreach my $key (keys %LossId){ + + my $taxon = get_taxon_efficiently($key); + my $sci_name = $taxon->scientific_name; + #print "$LossId{$key} loss at $key ($sci_name)\n"; + $LossIdByAppearance{$taxID}{$key}=$LossId{$key}; + } + } + else{ + $LossIdByAppearance{$taxID}{'null'}++; + #print "No loss\n"; + } + } +} + +## Merge All Loss A only one appearance +my %lossMergedByTaxid; +foreach my $keyID (keys %LossIdByAppearance){ + + my $taxon = get_taxon_efficiently($keyID); + my $sci_name = $taxon->scientific_name; + my $nbAppearance=$#{$hashAppearance->{$keyID}}+1; + + my $message = "\nAmong the $nbAppearance genes appeared at taxid $keyID ($sci_name) we have:\n"; + print $message; if ( $opt_output ){ print $outReport $message; } + + if($LossIdByAppearance{$keyID}{'null'}){ + + my $message = "No loss\n"; + print $message; if ( $opt_output ){ print $outReport $message; } + } + else{ + foreach my $keyID2 (keys %{$LossIdByAppearance{$keyID}}){ + my $value=$LossIdByAppearance{$keyID}{$keyID2}; + + my $taxon = get_taxon_efficiently($keyID2); + my $sci_name = $taxon->scientific_name; + + my $message = "$value loss at $keyID2 ($sci_name)\n"; + print $message; if ( $opt_output ){ print $outReport $message; } + + if(exists ($lossMergedByTaxid{$keyID2})){ + $lossMergedByTaxid{$keyID2}=$lossMergedByTaxid{$keyID2}+$value; + } + else{ + $lossMergedByTaxid{$keyID2}=$value; + } + } + } +} + +$message = "\nFinal losses Resume (Total number of loss independent of their birth):\n"; +print $message; if ( $opt_output ){ print $outReport $message; } + +foreach my $keyID (keys %lossMergedByTaxid){ + + my $taxon = get_taxon_efficiently($keyID); + my $sci_name = $taxon->scientific_name; + + my $message = "Gene lost in taxid $keyID ($sci_name) => $lossMergedByTaxid{$keyID}\n"; + print $message; if ( $opt_output ){ print $outReport $message; } + + my @node = $speciesTreeReady->find_node(-id => $keyID); + $node[0]->add_tag_value('geneLoss', $lossMergedByTaxid{$keyID}); +} + +$message = "\n"; +print $message; if ( $opt_output ){ print $outReport $message; } + +print $screenDisplayTree->write_tree($speciesTreeReady), "\n"; +if ( $opt_output ){ + print $outTree->write_tree($speciesTreeReady); + + #print tree within the report file + open F, "<$outTreeName" or die "Could not open file '$outSpTreeName' $!";; + while () { + print $outReport $_; + } + close F; +} + +my $finalMessage = "\nThe results given by this programm allow to have an overview of gene flux between the different species studied. The gene appearences and losses should be cautiously interpreted.". +"/!\\ The study is based on a Dollo like parsimomy. We allow genes to appear only one time. As you should be aware, annotations are often incomplete. Moreover, method of gene clustering have limitation to define precisely the orthologous groups.". +" Consequently, the gene described as lost here are only potential losses. Verification should be performed.\nEND\n"; + +print $finalMessage; +if ( $opt_output ){ print $outReport $finalMessage; } + +################################################################## FUNCTIONS ##################################### +sub deduceAppearance{ + my ($hashOGFiltered, $speciesTreeReady, $hashTaxidNode)=@_; + + my %hashAppearance; + my $nbOGstudied=0; + foreach my $key (keys %$hashOGFiltered){ + my @speciesList=@{$hashOGFiltered->{$key}}; + if ($#speciesList == 0){ + $nbOGstudied++; + push (@{$hashAppearance{$speciesList[0]}}, [@speciesList]); + } + else{ + my @nodesList; + foreach my $taxid (@speciesList){ + push (@nodesList, $hashTaxidNode{$taxid}); + } + my $nbNodes=scalar @nodesList; + # print "nbNodes $nbNodes\n"; + if ( $nbNodes <= 1){ + # print "Not enough species kept for reconstruct lca. We skip this OG.\n"; + next; + } + my $lca=$speciesTreeReady->get_lca(-nodes => \@nodesList); + my $idLCA=$lca->id(); + $nbOGstudied++; + # print "resu $nbOGstudied $idLCA\n"; + push (@{$hashAppearance{$idLCA}}, [@speciesList]); + } + } + print "\nWe studied $nbOGstudied OGs\n"; + return (\%hashAppearance); +} + +sub removeValueFromList{ + my ($val, $array)=@_; + my $index = 0; + $index++ until $array->[$index] eq $val; + splice(@$array, $index, 1); +} + +sub sizedPrint{ + my ($term,$size) = @_; + my $result; my $sizeTerm=length($term); + if ($sizeTerm > $size ){ + $result=substr($term, 0,$size); + return $result; + } + else{ + my $nbBlanc=$size-$sizeTerm; + $result=$term; + for (my $i = 0; $i < $nbBlanc; $i++){ + $result.=" "; + } + return $result; + } +} + +sub sortOGforFlatDisplay{ +my ($hashOGref)=@_; +my %hashOGidSentence; + foreach my $OGkey (keys %$hashOGref){ + my @ListSpeciesOG=@{$hashOGref->{$OGkey}}; + my @ListSpeciesOGSorted = ( sort ({ $a <=> $b } @ListSpeciesOG)); + my $IDsentence; my $cpt=0; + foreach my $key (@ListSpeciesOGSorted){ + if ($cpt == 0){$IDsentence.="$key";} + else{$IDsentence.="_$key";} + $cpt++; + } + $hashOGidSentence{$IDsentence}++; + } + return \%hashOGidSentence; +} + +sub filterOGfileByTaxid{ +my ($hashOGref, $ListTaxidToTest)=@_; +my $OGnotKept=0; +my $OGKept; +my %hashOGrefCleaned; +my %taxidAnalyzed; + + foreach my $OGkey (keys %$hashOGref){ # foreach OG group + my @newOG; + my @ListSpeciesOG=@{$hashOGref->{$OGkey}}; + foreach my $taxidOG (@ListSpeciesOG){ # foreach species of the OG + foreach my $taxidToTest (@$ListTaxidToTest){ + if($taxidOG eq $taxidToTest){ + push(@newOG, $taxidOG); + $taxidAnalyzed{$taxidOG}++; + } + } + } + if ($#newOG == -1 ){ + $OGnotKept++; + } + else{ + @{$hashOGrefCleaned{$OGkey}}=@newOG; + $OGKept++; + } + } + print "$OGnotKept OG removed\n"; print "$OGKept OG kept containing at least one of these species: @$ListTaxidToTest\n"; + + ##check if Taxid in list given are not present in OG => Warning message + foreach my $taxidToTest (@$ListTaxidToTest){ + if ( ! exists ($taxidAnalyzed{$taxidToTest})){ print "///!\\\\\\ WARNING MESSAGE: Taxid $taxidToTest given as input is not present in the OG analyzed."} + } + + return \%hashOGrefCleaned; +} + +sub readOGfile{ +my ($OG_file, $TaxidThisSpecies)=@_; +my %allTaxidFromOG; my @allTaxid; +my %nbSpeciesByOG; +my %nbSpeciesByOGwithThisSpecies; +my %HashSpeciesByOG; + + open(FIC,$orthoMCL_file) or die "Couldn't open the file $orthoMCL_file\n"; + while( my $line = ) { + if (! ($line =~ /^#/)){ + chomp($line) ; + my @splitedLine = split(" ",$line); + my %hashOGspecies; my %hashOGspeciesWithThisSpecies; my $OGcontainsThisSpecies="no"; + + my $OGname=shift @splitedLine; + $OGname=~s/://g ; # remove ":" + foreach my $prot (@splitedLine ){ + my @splitedProt = split("\\|",$prot); + $hashOGspecies{$splitedProt[0]}++; #hash of taxid + $allTaxidFromOG{$splitedProt[0]}++; + # check for species centered analysis + if ($splitedProt[0] eq $TaxidThisSpecies){ + $OGcontainsThisSpecies="yes"; + } + } + # species centered analysis + if ($OGcontainsThisSpecies eq "yes"){ + foreach my $prot (@splitedLine ){ + my @splitedProt = split("\\|",$prot); + $hashOGspeciesWithThisSpecies{$splitedProt[0]}++; #hash of taxid + } + my $nbSpecies= keys %hashOGspeciesWithThisSpecies; # count number of taxid + $nbSpeciesByOGwithThisSpecies{$nbSpecies}++; #General infornation about number of OG containing a number of species + } + + my $nbSpecies= keys %hashOGspecies; # count number of taxid + $nbSpeciesByOG{$nbSpecies}++; #General infornation about number of OG containing a number of species + # print "There is $nbSpecies species in the group $OGname\n"; + foreach my $key (keys %hashOGspecies){ + push(@{$HashSpeciesByOG{$OGname}}, $key); + } + } + } + + ### Manage list of all taxid present in OGs + foreach my $key (keys %allTaxidFromOG){ + push (@allTaxid, $key) + } + + my $nbOGanalyzed=keys %HashSpeciesByOG; + print "$nbOGanalyzed OGs analized:"; + + foreach my $nbKey (sort {$b <=> $a} keys %nbSpeciesByOG){ + print "There is $nbSpeciesByOG{$nbKey} OG containing $nbKey species\n"; + } + print "\n\n"; + if($TaxidThisSpecies ne ""){ + print "Analysis focusing on $TaxidThisSpecies\n"; + my $size = keys %nbSpeciesByOGwithThisSpecies; + if ($size == 0){print "This taxid has not been retrieved among GOs parsed. \n";} + else{ + foreach my $nbKey (sort {$b <=> $a} keys %nbSpeciesByOGwithThisSpecies){ + print "There is $nbSpeciesByOGwithThisSpecies{$nbKey} OG containing $nbKey species including taxid $TaxidThisSpecies\n"; + } + } + } + return \%HashSpeciesByOG, \@allTaxid ; +} + +sub get_taxon_efficiently{ + my ($taxid) = @_; + + my $taxon; + if(! exists($taxonList{$taxid})){ + if($taxid !~ /^\d+$/){warn "taxid is expected to be a number. We got <$taxid>. Please fix your txt file to have proper taxid.";exit;} + $taxon = $db->get_taxon(-taxonid => $taxid); + $taxonList{$taxid} = $taxon; + }else{ + $taxon = $taxonList{$taxid}; + } + +} +__END__ + +=head1 NAME + +analyzeOG.pl - +The script computes some statistics of a COG/KOG file from OrthoMCL output - +Statistics as : - number of OG by number of species + - number of OG by number of species that includes a specifc species (if specified by -s option) + - gene appearances + - gene losses + - Amount of gene at each node/leaf (Compute for ancestral nodes is Dallo like parsimony => on gene appear only one time. Moreover we do not consider potential HGTs). + - etc... +*OG = Ortholog Group + +Prerequisite: OrthoMCL output where taxid (http://www.ncbi.nlm.nih.gov/taxonomy/) has been used to label the sequences. As example: +OG00001: 10090|ENSMUSP0000001 9606|ENSP0000001 + +=> In that case The orthologous group "OG00001" contains 2 sequences, one comming from Mus musculus (taxid 10090) and the other from human (taxid 9606). + +=head1 SYNOPSIS + + ./analyzeOG.pl --cog infile [ -s TaxidSpeciesWhichFocusOn -t tree -taxid taxidA_taxidB_taxidC --output outfile ] + ./analyzeOG.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--cog>, B<--og> or B<--kog> + +Orthomcl file containg Ortholog groups (COG) from OrthoMCL. + +=item B<--taxid> + +Taxid list. If provided the analyse will use only these species. If a tree is also provided, the taxid will be filtered according to the tree to keep only taxid present in the tree. +If no taxid is provided, but a tree is, only species from the tree will be analyzed. If no tree and no taxid are provided, only taxid among OG will be use. + +=item B<-t> or B<--tree> + +Tree file in nhx format. If provided the analyse will focuse only on species present in the tree. +When no tree is provided, a species tree will be created on the fly using the NCBI taxonomy database online according to the species present among the OG. + +=item B<-s>, B<--species> or B<-ref> + +taxid or scientific name (use underscore instead of spaces). It allows to focus the analysis only on OG containg the species defined. + +=back + +=cut diff --git a/bin/gaas_pasa_create_chunks.rb b/bin/gaas_pasa_create_chunks.rb new file mode 100755 index 000000000..c2417183c --- /dev/null +++ b/bin/gaas_pasa_create_chunks.rb @@ -0,0 +1,170 @@ +#!/usr/bin/ruby +# == NAME +# pasa_create_chunks.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -g | --genome ] |[ -c | --cufflinks ] | [ -t | --trinity ] | [ -s | --chunks ] | [ -r, --ref] | [-a , --analysis ] +# == DESCRIPTION +# Creates several PASA chunks from input data to increase parallelization +# +# == OPTIONS +# -h,--help:: Show help +# -g,--genome=GENOME:: Genome file +# -c,--cufflinks=CUFFLINKS:: cufflinks file +# -t,--trinity=TRINITY:: Trinity file +# -s,--chunks=CHUNKS:: number of chunks to create +# -r,--ref=REF:: Reference annotation to polish +# -a,--analysis=ANALYSIS:: A name for this analysis (to later identify the SQL databases) + +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'rdoc/usage' +require 'optparse' +require 'ostruct' +require 'logger' +require 'bio' +require 'fileutils' +require 'pathname' + +### Define modules and classes here + +def make_chunk_store(fasta_bin,chunks) + chunk_store = {} + chunk_counter = 0 + + chunks.times do + chunk_counter += 1 + chunk_store[chunk_counter] = [] + end + + fasta_bin = fasta_bin.sort_by{|k,v| v } + while fasta_bin.length > 0 + chunk_counter = 0 + chunks.to_i.times do + next if fasta_bin.empty? + chunk_counter += 1 + entry = fasta_bin.shift + chunk_store[chunk_counter] << entry[0] + end + end + return chunk_store +end + +def parse_lines(file,sequences) + + answer = [] + infile = File.open(file, "r") + while (line = infile.gets) + line.strip! + e = line.split("\t")[0] + next unless sequences.include?(e) + answer << line + end + infile.close + return answer + +end + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-h","--help","Display the usage information") {RDoc::usage} +opts.on("-g","--genome", "=GENOME","Genome sequence") {|argument| options.genome = argument } +opts.on("-c","--cufflinks", "=CUFFLINKS","Cufflinks GTF file") {|argument| options.cufflinks = argument } +opts.on("-t","--trinity", "=TRINITY","Trinity fasta file") {|argument| options.trinity = argument } +opts.on("-s","--chunks", "=CHUNKS","Number of chunks") {|argument| options.chunks = argument } +opts.on("-r","--ref", "=REF","Reference annotation") {|argument| options.ref = argument } +opts.on("-a","--analysis", "=ANALYSIS","Name for this analysis") {|argument| options.analysis = argument } + +opts.parse! rescue RDoc::usage('usage') + +options.chunks = 2 unless options.chunks +raise "No Genome sequence provided!" unless options.genome +raise "No Trinity assembly provided!" unless options.trinity +#raise "No reference annotation provided!" unless options.ref +#raise "No cufflinks annotation provuded!" unless options.cufflinks +raise "No analysis name defined!" unless options.analysis +pasa_dir = "/sw/bioinfo/pasa2/r20140414" +pasa_conf_template = "#{pasa_dir}/pasa_conf/pasa.alignAssembly.Template.txt" +wd = Dir.getwd +analysis = "crow27" + +genome_file = options.genome +cufflinks_file = options.cufflinks +trinity_file = options.trinity + +options.cufflinks ? cufflinks_flag = "--cufflinks_gtf cufflinks.gtf" : cufflinks_flag = "" +models_flag = "" + +#Container for fasta +fasta_bin = {} + +# Gather stats on fasta sequence lengths +Bio::FastaFormat.open(options.genome).each_entry do |entry| + fasta_bin[entry.definition] = entry.naseq.length +end + +chunk_store = make_chunk_store(fasta_bin,options.chunks.to_i) + +chunk_store.each do |chunk,sequences| + + # Create the folder for this chunk + system("mkdir -p chunk_#{chunk}") + + # Create a Pasa config file + system("sed 's/\<__MYSQLDB__\>/pasa2_#{options.analysis}_polish_chunk_#{chunk}/g' #{pasa_conf_template} > chunk_#{chunk}/pasa.conf") + + # Grep out the relevant lines from the cufflinks assembly and reference annotation + + if options.cufflinks + cufflinks_lines = parse_lines(options.cufflinks,sequences) + cufflinks_file = File.new("#{wd}/chunk_#{chunk}/cufflinks.gtf","w+") + cufflinks_lines.each{|l| cufflinks_file.puts l} + cufflinks_file.close + end + + if options.ref + models_flag = "-A -L --annots_gff3 models.gff" + model_lines = parse_lines(options.ref,sequences) + model_file = File.new("#{wd}/chunk_#{chunk}/models.gff", "w+") + model_lines.each{|l| model_file.puts l } + model_file.close + end + + + # Copy over all Trinity files + system("cp #{options.trinity}* chunk_#{chunk}/") + + outfile = File.new("#{wd}/chunk_#{chunk}/genome_chunk.fa", "w+") + + # Extract the contigs for this chunk + Bio::FastaFormat.open(options.genome).each_entry do |entry| + next unless sequences.include?(entry.definition) + outfile.puts entry.to_s + end + + outfile.close + + # Write a bsub file + bsub_file = File.new("#{wd}/chunk_#{chunk}/bsub.pasa_chunk", "w+") + + command_lines = [ + "#BSUB -n 16", + "#BSUB -R \"span[hosts=1]\"", + "#BSUB -e err.pasa", + "#BSUB -J PasaChunk-#{chunk}", + "Launch_PASA_pipeline.pl -c pasa.conf -C -R --ALIGNER blat,gmap -g genome_chunk.fa -t #{options.trinity}.clean -T -u #{options.trinity} --CPU 16 --TRANSDECODER --transcribed_is_aligned_orient #{cufflinks_flag} #{models_flag}" + ] + + command_lines.each do |cl| + bsub_file.puts cl + end + + bsub_file.close + +end + diff --git a/bin/gaas_pasa_find_duplicates.rb b/bin/gaas_pasa_find_duplicates.rb new file mode 100755 index 000000000..e24236b73 --- /dev/null +++ b/bin/gaas_pasa_find_duplicates.rb @@ -0,0 +1,31 @@ +#!/usr/bin/ruby + +seen = [] +this_seq = nil +counter = 0 + +file = File.open(ARGV.shift,"r") + +while (line = file.gets) + + line.strip! + next if line.length == 0 + + seq = line.split("\t")[0] + + this_seq = seq if this_seq.nil? + if this_seq != seq + seen = [] + this_seq = seq + end + + if seen.include?(line) + counter += 1 + else + seen << line + puts line + end + +end + +warn "#{counter} lines removed!" diff --git a/bin/gaas_pfam2grid.pl b/bin/gaas_pfam2grid.pl new file mode 100755 index 000000000..9ea040ac6 --- /dev/null +++ b/bin/gaas_pfam2grid.pl @@ -0,0 +1,319 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +use Bio::SeqFeature::Generic; +use Bio::Tools::GFF; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $pfam_hmm_file = "/projects/references/databases/pfam/31.0/Pfam-A.hmm"; + +my $outdir = "pfam_output"; +my $fasta = undef; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $help; +my $chunk_size = 500; +my $grid="Slurm"; +my $queue=undef; +my @chunks = (); # Holds chunks, partitioning the fasta input (so we + # don't send 50.000 jobs to the farm... + +if ( !GetOptions( + "help|h!" => \$help, + "fasta|f=s" => \$fasta, + "hmm=s" => \$pfam_hmm_file, + "chunk_size=i" => \$chunk_size, + "grid=s" => \$grid, + "quiet|q" => \$quiet, + "queue=s" => \$queue, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($fasta) ){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\nInput fasta file\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +if (! -e $pfam_hmm_file){ + print "The cm file ".$pfam_hmm_file." does not exist. Please define it using the cm option.\n";exit; +} +if (! -e $fasta){ + print "The fasta file ".$fasta." does not exist.\n";exit; +} + +# .. Check that all binaries are available in $PATH +my @tools = ("hmmscan" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/pfam_search.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks\n"); + +my @seqarray = (); +my $counter = 0; +my $chunk_counter = 1; +my $seq; +my $outfile; +while( $seq = $inseq->next_seq() ) { + $counter += 1; + push( @seqarray, $seq ); + + if ( $counter == $chunk_size ) { + $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk( $outfile, @seqarray ); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} + + +$outfile = + $outdir . "/chunk_" . + $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk( $outfile, @seqarray ); + +# Push all jobs into the command list +for ( my $i = 1; $i <= $chunk_counter; $i++ ) { + my $infile = $outdir . "/chunk_" . $i . ".fa"; + my $outfile = $outdir . "/chunk_" . $i . ".pfam"; + my $cmd = "hmmscan --cpu 1 --domtblout " . $outfile . " " . $pfam_hmm_file . " " . $infile . " > /dev/null" ; + push( @cmds, $cmd ); + } + + # Submit job chunks to grid + msg("submitting chunks\n"); + +if( $grid ){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, merging of output + +msg("Merging output and writing GFF file"); + +my @files = <$outdir/*.pfam>; + +$outfile = "pfam.merged"; +open (my $OUT, '>', $outdir."/".$outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; + +foreach my $file (@files) { + + open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; + + while (<$IN>) { + chomp; + my $line = $_; + next if ($line =~ /^#.*$/); # Skipping comment lines + + print $OUT $line; + } +} + +close ($OUT); + +msg("Finished pfam grid run."); + +# -------------------- + +sub write_chunk +{ + my $outfile = shift; + my @seqs = @_; + my $seq_out = + Bio::SeqIO->new( -file => ">$outfile", -format => 'fasta' ); + foreach my $seq (@seqs) { $seq_out->write_seq($seq) } +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_pfam2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple hmmscan searches in parallel +We run hmmscan searches against a pfam.hmm + +=head1 SYNOPSIS + + gaas_pfam2grid.pl -f genome.fasta -o outdir + gaas_pfam2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the fasta file to read. + +=item B<--chunk_size> + +We create chunks with a maximum of $chunk_size sequences. By default 500. + +=item B<--hmm> + +File containing the pfam hmm models + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_prep-contigs.sh b/bin/gaas_prep-contigs.sh new file mode 100755 index 000000000..88b6d96e9 --- /dev/null +++ b/bin/gaas_prep-contigs.sh @@ -0,0 +1,85 @@ +#!/bin/sh -e + +# References: +# https://support.10xgenomics.com/genome-exome/software/pipelines/latest/advanced/references +# https://en.wikipedia.org/wiki/Bin_packing_problem + +max_contigs=500 +padding=500 +out_width=70 + +order= + +while getopts 'n:p:rs:w:' opt; +do + case $opt in + n) max_contigs=$OPTARG ;; + p) padding=$OPTARG ;; + r) order=r ;; + w) out_width=$OPTARG ;; + *) + echo 'Error in command line parsing' >&2 + exit 1 + esac +done + +shift "$(( OPTIND - 1 ))" + +fasta_in="$1" +fasta_out="$(dirname "$fasta_in")/packed-$(basename "$fasta_in")" +fasta_map="$(dirname "$fasta_in")/$(basename "$fasta_in").map" + +if [ ! -f "$fasta_in" ]; then + printf 'Can not read from "%s"\n' "$fasta_in" >&2 + exit 1 +fi + +cat >&2 <&2 +else + echo ' sorting: (no -r) increasing' >&2 +fi + +padding_str=$( perl -e "print 'N' x $padding" ) + +samtools faidx "$fasta_in" + +contigs=0 + +sort -k "2,2n$order" <"$fasta_in.fai" | +awk -f "$(dirname "$0")"/fa-firstfit.awk \ + -v padding="$padding" \ + -v maxcontigs="$max_contigs" | +sort -k1,1 -k3,3n | +tee "$fasta_map" | +while read contig seq len; do + if [ "$contig" != "$prevcontig" ]; then + prevcontig=$contig + pad=0 + + contigs=$(( contigs + 1 )) + if [ "$contigs" -gt "$max_contigs" ]; then + echo 'Error: Maximum number of contigs exceeded!' >&2 + exit 1 + fi + else + pad=1 + fi + + if [ "$pad" -eq 1 ]; then + printf '%s\n' "$padding_str" + else + printf '>%s\n' "$contig" + fi + + samtools faidx "$fasta_in" "$seq" | sed 1d +done | +awk -f "$(dirname "$0")"/fa-reformat.awk -v maxlen="$out_width" >"$fasta_out" diff --git a/bin/gaas_prepare_matrice_by_window.pl b/bin/gaas_prepare_matrice_by_window.pl new file mode 100755 index 000000000..e4c6fa33c --- /dev/null +++ b/bin/gaas_prepare_matrice_by_window.pl @@ -0,0 +1,636 @@ +#!/usr/bin/env perl + +#libraries +use File::Basename; +use strict; +use warnings; +use Data::Dumper; +use Carp; +use Bio::SimpleAlign; +use Bio::LocatableSeq; +use Getopt::Long; +use IO::File; +use Pod::Usage; +use Bio::SeqIO; +use List::MoreUtils qw(uniq); +use Storable 'dclone'; +use Clone 'clone'; +use GAAS::GAAS; + +my $header = get_gaas_header(); +# END libraries +# PARAMETERS - OPTION +my $opt_position; +my $opt_output=undef; +my $opt_nbByWindow=undef; +my $opt_windowsSize=undef; +my $opt_aliDir=undef; +my $opt_tsv=undef; +my $opt_cons=undef; +my $opt_consList=undef; +my $opt_consThreshold=undef; + +my $opt_help = 0; +# END PARAMETERS - OPTION + + +# OPTION MANAGMENT +if ( !GetOptions( 'position|p=s' => \$opt_position, + 'tsv=s' => \$opt_tsv, + 'o|output=s' => \$opt_output, + 'v|value|window=i' => \$opt_windowsSize, + 'ad=s' => \$opt_aliDir, + 'consensus|c' => \$opt_cons, + 'consensus_list|cl=s' => \$opt_consList, + 'consensus_threshold|ct=i' => \$opt_consThreshold, + 'nbbw=i' => \$opt_nbByWindow, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($opt_help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ((!$opt_position or !$opt_aliDir or !$opt_tsv)){ + pod2usage( { + -message => "\nIf you want to merge sequence by window, at least 3 parameter is mandatory: position file, tsv file, and directory containing alignments\n". + "Many optional parameters are available. Look at the help documentation to know more.\n", + -verbose => 0, + -exitval => 2 } ); +} +############################# +####### Manage options ####### +############################# + +#Choose y axis value: +if(! $opt_nbByWindow){ + $opt_nbByWindow="4"; + print "you didn't choose any value for the number of minimum gene that must be present to keep a window. We will take the default value.\n"; +} +print "Number minimum of gene by window : $opt_nbByWindow .\n"; +#Choose opt_windowsSize value: +if(! $opt_windowsSize){ + print "you didn't choose any value for the window. We will take the default value.\n"; + $opt_windowsSize=1; +} +print "Window size used : $opt_windowsSize Mb.\n"; + +if($opt_cons ){ + print "You decided to create consensus sequences in the alignements when several individual from the same species are present.\n"; +} + +my %consensListOk; +if($opt_consList){ + print "Moreover You decided to make consensus between species. Here is the detail:\n"; + # Mange consensus list given + my @consensListPair; + + @consensListPair= split(/,/, $opt_consList); + my $nbCons=$#consensListPair+1; + print "You defined $nbCons species for creating consensus sequences.\n"; + foreach my $consensusTuple (@consensListPair){ + my @consensList= split(/\//, $consensusTuple); + if($#consensList != 1){ # Attribute alone + print "Problem with this tuple $consensusTuple. You should have something like: Species/consensusName\n"; + } + else{ # Attribute we have to replace by a new name + push(@{$consensListOk{$consensList[1]}}, $consensList[0]); + } + } + foreach my $key (keys %consensListOk){ + print "You dicided to make a consensus of (inter)species <@{$consensListOk{$key}}> and call this new sequence <$key>\n"; + } +} + +if($opt_cons or $opt_consList){ + if(! $opt_consThreshold){ + $opt_consThreshold=0; + } + print "The consensus residue has to appear at least threshold $opt_consThreshold% of the sequences at a given location, otherwise a '?'\n"; +} + +############################# +####### Manage output ####### +############################# + +my $report_output; + +if( $opt_output){ + + $opt_output=~ s/\..*//g; + $report_output=$opt_output."_report"; +} +else{ + $opt_output="output_prepare_matrice_by_window"; + $report_output=$opt_output."_report";; +} + +if (-d $opt_output ){ + print "Directory $opt_output already exists.\n";exit; +} +else{ + mkdir $opt_output; + mkdir $report_output; + print "$opt_output dir created\n"; +} + + ####################### + # MAIN # + ####################### + +########## constant ############# + +my $nbFile=0; +my @listTmpFile; + +####################### +# Manage input files # +####################### + +### READ POSITION FILE AND STORE RESULTS +my $position_stream = IO::File->new(); +$position_stream->open( $opt_position, 'r' ) or +croak( + sprintf( "Can not open '%s' for reading: %s", $opt_position, $! ) ); + +my %hash_chr2name; +my $count=0; + foreach my $line (<$position_stream>){ + chomp($line); + if($count >=1){ # avoid header + my @list=split /@/,$line; + #geneName@Chromosome@position@GC + + $hash_chr2name{$list[1]}{$list[2]}= $list[0]; + } + $count++; + } + +### READ TSV FILE AND STORE RESULTS +my $tsv_stream = IO::File->new(); +$tsv_stream->open( $opt_tsv, 'r' ) or +croak( + sprintf( "Can not open '%s' for reading: %s", $opt_tsv, $! ) ); + +my %hash_HordeumName2Aliname; + + foreach my $line (<$tsv_stream>){ + chomp($line); + my @list=split /\t/, $line; + #print @list."\n"; + #MarqueurName Nbspecies NbIndividu NbSequence SequenceNamesList(separated by tabulation) + my $count=0; + my $marqueurName; + my $hordeumName; + foreach my $el (@list){ + + if($count == 0){ # marqueurName + $marqueurName=$el; + } + if($count >=4){ # SequenceNamesList + + if ($el =~ "H_vulgare"){ # H_vulgare sequence => shoud contain EPlHVUG or MLOC that will be used to retrieve position information + + if($el =~ "EPlHVUG"){ + + $el =~ /.*_(EPlHVUG[^|]*)\|.*/; + $hordeumName = $1; + } + else{ + $el =~ /.*\|([^\.]*).*/; + $hordeumName = $1; + last; + } + + } + } + $count++; + } + + $hash_HordeumName2Aliname{$hordeumName}=$marqueurName; + } + + +### READ ALI FOLDER #### +my @listFile; +if( -d $opt_aliDir){ + opendir(DIR, $opt_aliDir) or die "cannot open dir $opt_aliDir: $!"; + @listFile= readdir DIR; + closedir DIR; +} +else{ + print "$opt_aliDir is not a directory or doesn't exists !\n";exit; +} + +########################## +# General results # +########################## + + +my $currentCenter; +my $path; +my %Ali;my $tmpAli=\%Ali; + + +#For each chromosome +foreach my $chr (keys %hash_chr2name){ + + print "Process chromosome".$chr."\n"; + my $nbWinOk=0; + my $dir=$opt_output."/".$chr; + mkdir $dir; + my $currentLimit = 0; + + #report + my $report_stream = IO::File->new(); + $report_stream->open( $report_output."/".$chr, 'w' ) or croak( sprintf( "Can not open '%s' for reading: %s", $report_output, $! ) ); + + # need to know the highest value + my $highvalue=get_highest_value(\%hash_chr2name, $chr); + + + # Go through all the chromosome window by window + my $stop=0; + while ($stop != 2){ + + if( (1000000*$currentLimit) > $highvalue){$stop++}; + + my $nbPrintedVal=0; + $currentLimit = $currentLimit + $opt_windowsSize; + $currentCenter=$currentLimit-($opt_windowsSize/2); + $currentCenter=sprintf "%.1f",$currentCenter; + $path=$dir."/".$currentCenter."Mb.aln"; + #print "study of chr $chr window ".1000000*($currentLimit-$opt_windowsSize)."-".1000000*$currentLimit."\n"; + + %$tmpAli = (); # empty the hash + + # we go through the chromosome positions by increasing order + foreach my $position (sort {$a<=>$b} keys %{$hash_chr2name{$chr} } ) { + #print "study of $hash_chr2name{$chr}{$position} $position\n"; + if($position > (1000000*($currentLimit-$opt_windowsSize)) and $position < (1000000*$currentLimit) ){ #kb to nucleotide : * 1000000 + + if(exists ($hash_HordeumName2Aliname{$hash_chr2name{$chr}{$position} } ) ) { + + if (fill_for_ali($tmpAli, $hash_HordeumName2Aliname{$hash_chr2name{$chr}{$position}}, $nbPrintedVal)){ # save sequence of the alignement to create supermatrice + $nbPrintedVal++; + } + } + else{ + #print "Hordeum Name: $hash_chr2name{$chr}{$position} doesn't exists among the Hordeum names of alignments we are working with.\n"; + } + + } + # We are over the chromosome size, consequently we stop + elsif($position > (1000000*($currentLimit-$opt_windowsSize))){last;} + } + + #report + print $report_stream $currentCenter."Mb\t".$nbPrintedVal."\n"; + + # remove files if not enough value ín the interval + if($nbPrintedVal < $opt_nbByWindow){ + #print "only $nbPrintedVal alignment ! We delete it \n"; + unlink $path; + } + else{ # enough value in the window, so we print the result + $nbWinOk++; + my $ostreamChrFile = Bio::SeqIO->new(-format => "fasta", -file => ">$path"); + + ################### + #Create consensus + my %listOfAli; + if($opt_cons){ + my %hashNum; + my %hashNames; + #create list name of the current Ali + foreach my $key (keys %$tmpAli ){ + #print "name = $key\n"; + $key =~ /(.*_[^_]*)_/; + my $nameRough = $1; + $hashNum{$nameRough}++; + push(@{$hashNames{$nameRough}}, $key); + } + + # + foreach my $nameRough (keys %hashNum ){ + if($hashNum{$nameRough} > 1){ + # create a small alignementof only same species + my $aln = new Bio::SimpleAlign(); + foreach my $nameComplete (@{$hashNames{$nameRough}}){ + my $seq = new Bio::LocatableSeq(-seq => $tmpAli->{$nameComplete} , -id => "$nameComplete"); + $aln->add_seq($seq); + print "We put $nameComplete in $nameRough \n"; + #remove the sequence from old ali no more usefull + delete $tmpAli->{$nameComplete}; #REMOVE THAT KEY-VALUE pair + } + #print "we keep name $nameRough -------\n"; + $listOfAli{$nameRough}=$aln; + } + } + } + + if($opt_consList){ + foreach my $ConsensName (keys %consensListOk ){ + + my $SuperAln = undef; + foreach my $nameCons (@{$consensListOk{$ConsensName}} ){ + #print "Does it start by ? $nameCons\n"; + #foreach my $na ( keys %$tmpAli){ + # print "$na \n"; + #} + # check in the original alignement + if (grep {/$nameCons.*/} keys %$tmpAli){ + + if(! $SuperAln){ $SuperAln = new Bio::SimpleAlign();} + + foreach my $name (keys %$tmpAli){ + if($name =~ /$nameCons.*/){ + my $seq = new Bio::LocatableSeq(-seq => $tmpAli->{$name} , -id => "$name"); + $SuperAln->add_seq($seq); + print "We put $name in $ConsensName \n"; + #remove the sequence from old ali no more usefull + delete $tmpAli->{$name}; #REMOVE THAT KEY-VALUE pair + } + } + } + + #check if alignement that can have been done by the option "c" + if (grep {/$nameCons.*/} keys %listOfAli){ + + if(! $SuperAln){ $SuperAln = new Bio::SimpleAlign();} + + my %deep_copy_listOfAli = %{ clone (\%listOfAli) }; + foreach my $key (%deep_copy_listOfAli){ + if ($key =~ /$nameCons.*/){ + + my $currentAli=$listOfAli{$key}; + + foreach my $seq ($currentAli->each_seq) { + $SuperAln->add_seq($seq); + } + print "We put $key in $ConsensName \n"; + delete $listOfAli{$key}; #REMOVE THAT KEY-VALUE pair + } + } + } + } + if($SuperAln){ + $listOfAli{$ConsensName}=$SuperAln; + } + } + } + + #create a consensus from each ali if asked + foreach my $consensName (keys %listOfAli){ + my $aln=$listOfAli{$consensName}; + my $consensus = $aln->consensus_string($opt_consThreshold); + $consensus=~ s/\?/-/g; # quand il y a que des gap il remplace par ? .Donc ici on remplace ? par - + + #now add the consensus sequence to + $tmpAli->{$consensName."_Cons"}=$consensus; + } + + # End create consensus + ###################### + + #print result alignment + foreach my $key (keys %$tmpAli ){ + + my $seq = Bio::Seq->new(-display_id => $key, -seq => $tmpAli->{$key}); + $ostreamChrFile->write_seq($seq); + } + + #print "$nbPrintedVal ali read\n"; + $position_stream->close(); + } + } + + # remove folder in no file + if (is_folder_empty($dir)) { + rmdir $dir + } + else{ + print "We found $nbWinOk window(s) of $opt_windowsSize Mb with more than $opt_nbByWindow alignments\n"; + } +} +$position_stream->close(); +print("Parsing Finished\n\n"); + + + ######################### + ######### END ########### + ######################### +####################################################################################################################### + #################### + # methods # + ################ + ############## + ############ + ########## + ######## + ###### + #### + ## + +sub fill_for_ali{ + my ($hash,$name,$nbPrintedVal)=@_; + + ### get file name !! + my @matches = grep { /$name/ } @listFile; + if (@matches >= 2){ + print "several file start with the name $name whithin the directory $opt_aliDir. We don't know which one take. We stop. \n"; + return 0; + } + if (@matches == 0){ + #print "No file with name starting by $name found whithin the directory $opt_aliDir. We don't know which one take. We stop. \n"; + return 0; + } + else{ + $name=$matches[0]; + + if(-f "$opt_aliDir/$name"){ + + #print "$opt_aliDir/$name\n"; + my $seqio = Bio::SeqIO->new(-file => "$opt_aliDir/$name", '-format' => 'Fasta'); + my %idPerformed; + my $string; + + #get actualSize + my $sizeAlAli=0; + foreach my $key (keys %$hash){ + $sizeAlAli = length($hash->{$key}); + last; + } + + #print "read file $opt_aliDir/$name\n"; + while(my $seq = $seqio->next_seq) { + + $string = $seq->seq; + my $id_original = $seq->primary_id; + my @ids= split /_/,$id_original ; + my $newID=$ids[0]."_".$ids[1]."_".$ids[2]; + #print "newID $newID\n"; + + if(! exists($hash->{$newID})){ # If Id is a new one + if(! $nbPrintedVal == 0){ # if not the first alignment read we have to add empty seq in front + + # If it's new but some other were already existing we add empty seq in front + my $emptySeq = ""; + $emptySeq =~ s/^(.*)/'-' x $sizeAlAli . $1/mge; # create a string of gap for the sequence to add in front of the ali + my $NewSeq="$emptySeq$string" ; + $hash->{$newID}=$NewSeq; + } + else{ #first ali file + #print "save $newID\n"; + $hash->{$newID}=$string; + } + } + else{ # If Id already known + if(! exists ($idPerformed{$newID})){ #If we have not already seen this ID this round ! Allow avoiding duplication !! + #print "already exists we append it $newID\n"; + $hash->{$newID}.=$string; + } + else{print "Duplication removed !\n";} + } + $idPerformed{$newID}++; + } + # check if we have to add empty seq (sequences present in pevious alignment that are not present in the current one) + foreach my $key (keys %$hash){ + if (! exists ($idPerformed{$key})){ # add empty line for that seuquence that doesent exists + my $n = length($string); + my $emptySeq = ""; + $emptySeq =~ s/^(.*)/'-' x $n . $1/mge; + $hash->{$key}.=$emptySeq; + } + } + + return 1; + } + else{print "File $name not present within the folder $opt_aliDir...\n" ; + return 0;} + } +} + +sub get_highest_value{ + + my ($hash,$chr) = @_; + my $high=0; + + foreach my $position (keys %{$hash->{$chr} } ) { + if($position > $high){ + $high=$position ; + } + } + return $high; +} + +sub is_folder_empty { + my $dirname = shift; + opendir(my $dh, $dirname) or die "Not a directory"; + return scalar(grep { $_ ne "." && $_ ne ".." } readdir($dh)) == 0; +} + +__END__ + +=head1 NAME + +gaas_prepare_matrice_by_window.pl + +=head1 DESCRIPTION + +The script take a position file as input and a directory containing the alignments. It will concatenates files to create mini-superMatrices corresponding to the size of window size choosen. +The aim is to check if ther is a clear phylogeny pattern by portion of the chromosome. If hte pattern is different in different location of the chromosome it could be a hint of recombination. + +=head1 SYNOPSIS + + gaas_prepare_matrice_by_window.pl -p positionFile --ad directoryWithAlignments [ --output outfile ] + gaas_prepare_matrice_by_window.pl --help + + gaas_prepare_matrice_by_window.pl -p Hordeum_position.txt -tsv clusters_1L_7speIN_1speOUT_noDup_withHordeum6235.tsv -ad ALIGNMENTS -c -o out_consens + + example with -cl option + gaas_prepare_matrice_by_window.pl -p Hordeum_position.txt -tsv clusters_1L_7speIN_1speOUT_noDup_withHordeum6235.tsv -ad aliTest/ -c -cl Ae_longissima/D,Ae_sharonensis/D,Ae_bicornis/D,Ae_searsii/D,Ae_tauschii/D,Ae_comosa/D,Ae_uniaristata/D,Ae_umbellulata/D,Ae_caudata/D,T_urartu/A,T_boeoticum/A,Ae_speltoides/B,Ae_mutica/B + +=head1 OPTIONS + +=over 8 + +=item B<-p> or B<--position> + +Input file containing position information related to a reference in that format: +GeneName@ChromosomeName@PositionInNucleotide + +=item B<--tsv> +This option define the tsv file that contain information needed to link the position to the alignments. Format like this: +Nom marqueur (sequence untiliser pour construire le cluster) \t nombre sp \t nombre individu \t nombre sequences \t Toutes les entetes des sequences de l'alignement seaparer par une tabulation + +=item B<--ad> + +Input directory containing alignment files in fasta format. Name of files should be the same those find at "GeneName" from the position file. + +=item B<-v>, B<--value> or B<--window> + +Allows to define the sliding window size to use to clusterize the alignments (in megabase) (Default 1). + +=item B<--nbbw> + +Number minimum of sequence that must be present within a window to take it into account (Default 4). + +=item B<--consensus>, B<-c> + +This option when activated, will create consensus sequences in the alignements when several individual from the same species are present. +Ae_comosa_Tr272 => Using the two first word linked by an underscore to define the species name. Here Ae_comosa will be used. + +=item B<--consensus_list>, B<-cl> +This option when activated, will create consensus sequences in the alignements of sequences starting by names defined, and will be grouped by Names chosen. Explantion below: +Example: Species1/A,Species2/A,Species3/B,Species4/B +In this example Species1 and Species2 will create a consensus called A, and species3 and species4 will create a consensus called B. + +=item B<--consensus_threshold>, B<-ct> +Optional treshold ranging from 0 to 100. The consensus residue has to appear at least threshold % of the sequences at a given location, otherwise a '?' character will be placed at that location. +Default value=0; + +=item B<-o> or B<--output> + +Output name of the directory that will contain results + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_protein2hints.rb b/bin/gaas_protein2hints.rb new file mode 100644 index 000000000..f3b66121e --- /dev/null +++ b/bin/gaas_protein2hints.rb @@ -0,0 +1,70 @@ +#!/usr/bin/ruby +# == NAME +# gff2hints.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# Converts Maker-generated evidence alignments from GFF3 format to Augustus-compatbile hint format +# +# == OPTIONS +# -h,--help:: Show help +# -i,--infile=INFILE:: input file +# -o,--outfile=OUTFILE:: output file + +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'rdoc/usage' +require 'optparse' +require 'ostruct' +require 'logger' + + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-h","--help","Display the usage information") {RDoc::usage} +opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } +opts.on("-o","--outfile", "=OUTFILE","Output") {|argument| options.outfile = argument } + +opts.parse! rescue RDoc::usage('usage') + +options.outfile ? output_stream = File.new(options.outfile,'w') : output_stream = $stdout + +lines = IO.readlines(options.infile) + +evidence = lines[0].split("\t")[2] +priority = 1 +type_of_evidence = nil + +if evidence == "expressed_sequence_match" + type_of_evidence = "E" + priority = 4 +elsif evidence == "protein_match" + type_of_evidence = "P" + priority = 2 +end + +abort "No evidence type identified" unless type_of_evidence + +lines.each do |line| + + next unless line.include?("match_part") + + seq_region,source,feature,start,stop,score,strand,phase,tag_values = line.strip.split("\t") + group = tag_values.split(";").find{|e| e.include?("Parent")}.split("=")[-1].strip.gsub(/\:/, '_').gsub(/\./, '-') + + + output_stream.puts "#{seq_region}\tb2h\tCDSpart\t#{start}\t#{stop}\t0\t.\t.\tgroup=#{group};src=#{type_of_evidence};pri=#{priority}" + +end + + + diff --git a/bin/gaas_removeIsoforms.sh b/bin/gaas_removeIsoforms.sh new file mode 100755 index 000000000..5c8cc6538 --- /dev/null +++ b/bin/gaas_removeIsoforms.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# JD 2015 +# +#This script kept only one isoform per gene for proteomes coming form Ensembl +#### + +# Arguments and Paths +############################################################################ + +if (( $# != 2 )) ; then + echo -e "The script allows to filter proteome in fasta format from Ensembl with aims to keep the longest isoform per gene !" + echo -e "The script needs 2 parameters: \n(1)The proteome input fasta file" + echo -e "(2)The cleaned proteome output in fasta format" + exit +fi + + + cat $1 | awk '/^>/ {if(N>0) printf("\n"); printf("%s\t",$1"\t"$4);N++;next;}{printf("%s",$0);} END {if(N>0) printf("\n");}' | awk -F '\t' '{printf("%s\t%d\n",$0,length($3));}' | sort -t ' ' -k2,2 -k4,4nr | sort -t ' ' -k2,2 -u -s | cut -f 1,2,3 | awk '{print $1"\t"$2"\n"$3}' | fold -w 60 > $2 + + + + + + + + + +# Explanation +############# +# cat test | awk '/^>/ {if(N>0) printf("\n"); printf("%s\t",$1"\t"$4);N++;next;}{printf("%s",$0);} END {if(N>0) printf("\n");}' |\ #linearize fasta and print col1 col4 and seq linearized +# awk -F '\t' '{printf("%s\t%d\n",$0,length($3));}' |\ #extact length on the 4th column +# sort -t ' ' -k2,2 -k4,4nr |\#sort on column2, inverse length +# sort -t ' ' -k2,2 -u -s |\# #sort on column 2, unique, stable sort (keep previous order) +# cut -f 1,2,3 |\ #cut 3column +# awk '{print $1"\t"$2"\n"$3}' |\ # print col1\tcol2 \n col3 +# fold -w 60 #pretty fasta diff --git a/bin/gaas_reveal_hidden_characters.pl b/bin/gaas_reveal_hidden_characters.pl new file mode 100755 index 000000000..3131bdc7f --- /dev/null +++ b/bin/gaas_reveal_hidden_characters.pl @@ -0,0 +1,415 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use POSIX qw(strftime); +use File::Basename; +use Pod::Usage; +use Getopt::Long; +use Try::Tiny; +use Carp; +use IO::File; + +my %column = ( + oct => 0, + hex => 1, + bin => 2, + symbol => 3, + html_number => 4, + html_name => 5, + description => 6, + ); + +my %ascii = ( +0 => ["000", "00", "00000000", "NUL", "�", " ", "Null char"], +1 => ["001", "01", "00000001", "SOH", "", " ", "Start of Heading"], +2 => ["002", "02", "00000010", "STX", "", " ", "Start of Text"], +3 => ["003", "03", "00000011", "ETX", "", " ", "End of Text"], +4 => ["004", "04", "00000100", "EOT", "", " ", "End of Transmission"], +5 => ["005", "05", "00000101", "ENQ", "", " ", "Enquiry"], +6 => ["006", "06", "00000110", "ACK", "", " ", "Acknowledgment"], +7 => ["007", "07", "00000111", "BEL", "", " ", "Bell"], +8 => ["010", "08", "00001000", "BS", "", " ", "Back Space"], +9 => ["011", "09", "00001001", "HT", " ", " ", "Horizontal Tab"], +10 => ["012", "0A", "00001010", "LF", " ", " ", "Line Feed"], +11 => ["013", "0B", "00001011", "VT", " ", " ", "Vertical Tab"], +12 => ["014", "0C", "00001100", "FF", " ", " ", "Form Feed"], +13 => ["015", "0D", "00001101", "CR", " ", " ", "Carriage Return"], +14 => ["016", "0E", "00001110", "SO", "", " ", "Shift Out / X-On"], +15 => ["017", "0F", "00001111", "SI", "", " ", "Shift In / X-Off"], +16 => ["020", "10", "00010000", "DLE", "", " ", "Data Line Escape"], +17 => ["021", "11", "00010001", "DC1", "", " ", "Device Control 1 (oft. XON)"], +18 => ["022", "12", "00010010", "DC2", "", " ", "Device Control 2"], +19 => ["023", "13", "00010011", "DC3", "", " ", "Device Control 3 (oft. XOFF)"], +20 => ["024", "14", "00010100", "DC4", "", " ", "Device Control 4"], +21 => ["025", "15", "00010101", "NAK", "", " ", "Negative Acknowledgement"], +22 => ["026", "16", "00010110", "SYN", "", " ", "Synchronous Idle"], +23 => ["027", "17", "00010111", "ETB", "", " ", "End of Transmit Block"], +24 => ["030", "18", "00011000", "CAN", "", " ", "Cancel"], +25 => ["031", "19", "00011001", "EM", "", " ", "End of Medium"], +26 => ["032", "1A", "00011010", "SUB", "", " ", "Substitute"], +27 => ["033", "1B", "00011011", "ESC", "", " ", "Escape"], +28 => ["034", "1C", "00011100", "FS", "", " ", "File Separator"], +29 => ["035", "1D", "00011101", "GS", "", " ", "Group Separator"], +30 => ["036", "1E", "00011110", "RS", "", " ", "Record Separator"], +31 => ["037", "1F", "00011111", "US", "", " ", "Unit Separator"], +32 => ["040", "20", "00100000", " ", " ", " ", "Space"], +33 => ["041", "21", "00100001", "!", "!", " ", "Exclamation mark"], +34 => ["042", "22", "00100010", '"', """, """, "Double quotes (or speech marks)"], +35 => ["043", "23", "00100011", "#", "#", " ", "Number"], +36 => ["044", "24", "00100100", '$', "$", " ", "Dollar"], +37 => ["045", "25", "00100101", "%", "%", " ", "Per cent sign"], +38 => ["046", "26", "00100110", "&", "&", "&", "Ampersand"], +39 => ["047", "27", "00100111", "'", "'", " ", "Single quote"], +40 => ["050", "28", "00101000", "(", "(", " ", "Open parenthesis (or open bracket)"], +41 => ["051", "29", "00101001", ")", ")", " ", "Close parenthesis (or close bracket)"], +42 => ["052", "2A", "00101010", "*", "*", " ", "Asterisk"], +43 => ["053", "2B", "00101011", "+", "+", " ", "Plus"], +44 => ["054", "2C", "00101100", ",", ",", " ", "Comma"], +45 => ["055", "2D", "00101101", "-", "-", " ", "Hyphen"], +46 => ["056", "2E", "00101110", ".", ".", " ", "Period, dot or full stop"], +47 => ["057", "2F", "00101111", "/", "/", " ", "Slash or divide"], +48 => ["060", "30", "00110000", "0", "0", " ", "Zero"], +49 => ["061", "31", "00110001", "1", "1", " ", "One"], +50 => ["062", "32", "00110010", "2", "2", " ", "Two"], +51 => ["063", "33", "00110011", "3", "3", " ", "Three"], +52 => ["064", "34", "00110100", "4", "4", " ", "Four"], +53 => ["065", "35", "00110101", "5", "5", " ", "Five"], +54 => ["066", "36", "00110110", "6", "6", " ", "Six"], +55 => ["067", "37", "00110111", "7", "7", " ", "Seven"], +56 => ["070", "38", "00111000", "8", "8", " ", "Eight"], +57 => ["071", "39", "00111001", "9", "9", " ", "Nine"], +58 => ["072", "3A", "00111010", ":", ":", " ", "Colon"], +59 => ["073", "3B", "00111011", ";", ";", " ", "Semicolon"], +60 => ["074", "3C", "00111100", "<", "<", "<", "Less than (or open angled bracket)"], +61 => ["075", "3D", "00111101", "=", "=", " ", "Equals"], +62 => ["076", "3E", "00111110", ">", ">", ">", "Greater than (or close angled bracket)"], +63 => ["077", "3F", "00111111", "?", "?", " ", "Question mark"], +64 => ["100", "40", "01000000", "@", "@", " ", "At symbol"], +65 => ["101", "41", "01000001", "A", "A", " ", "Uppercase A"], +66 => ["102", "42", "01000010", "B", "B", " ", "Uppercase B"], +67 => ["103", "43", "01000011", "C", "C", " ", "Uppercase C"], +68 => ["104", "44", "01000100", "D", "D", " ", "Uppercase D"], +69 => ["105", "45", "01000101", "E", "E", " ", "Uppercase E"], +70 => ["106", "46", "01000110", "F", "F", " ", "Uppercase F"], +71 => ["107", "47", "01000111", "G", "G", " ", "Uppercase G"], +72 => ["110", "48", "01001000", "H", "H", " ", "Uppercase H"], +73 => ["111", "49", "01001001", "I", "I", " ", "Uppercase I"], +74 => ["112", "4A", "01001010", "J", "J", " ", "Uppercase J"], +75 => ["113", "4B", "01001011", "K", "K", " ", "Uppercase K"], +76 => ["114", "4C", "01001100", "L", "L", " ", "Uppercase L"], +77 => ["115", "4D", "01001101", "M", "M", " ", "Uppercase M"], +78 => ["116", "4E", "01001110", "N", "N", " ", "Uppercase N"], +79 => ["117", "4F", "01001111", "O", "O", " ", "Uppercase O"], +80 => ["120", "50", "01010000", "P", "P", " ", "Uppercase P"], +81 => ["121", "51", "01010001", "Q", "Q", " ", "Uppercase Q"], +82 => ["122", "52", "01010010", "R", "R", " ", "Uppercase R"], +83 => ["123", "53", "01010011", "S", "S", " ", "Uppercase S"], +84 => ["124", "54", "01010100", "T", "T", " ", "Uppercase T"], +85 => ["125", "55", "01010101", "U", "U", " ", "Uppercase U"], +86 => ["126", "56", "01010110", "V", "V", " ", "Uppercase V"], +87 => ["127", "57", "01010111", "W", "W", " ", "Uppercase W"], +88 => ["130", "58", "01011000", "X", "X", " ", "Uppercase X"], +89 => ["131", "59", "01011001", "Y", "Y", " ", "Uppercase Y"], +90 => ["132", "5A", "01011010", "Z", "Z", " ", "Uppercase Z"], +91 => ["133", "5B", "01011011", "[", "[", " ", "Opening bracket"], +92 => ["134", "5C", "01011100", '\\', "\", " ", "Backslash"], +93 => ["135", "5D", "01011101", "]", "]", " ", "Closing bracket"], +94 => ["136", "5E", "01011110", "^", "^", " ", "Caret - circumflex"], +95 => ["137", "5F", "01011111", "_", "_", " ", "Underscore"], +96 => ["140", "60", "01100000", "`", "`", " ", "Grave accent"], +97 => ["141", "61", "01100001", "a", "a", " ", "Lowercase a"], +98 => ["142", "62", "01100010", "b", "b", " ", "Lowercase b"], +99 => ["143", "63", "01100011", "c", "c", " ", "Lowercase c"], +100 => ["144", "64", "01100100", "d", "d", " ", "Lowercase d"], +101 => ["145", "65", "01100101", "e", "e", " ", "Lowercase e"], +102 => ["146", "66", "01100110", "f", "f", " ", "Lowercase f"], +103 => ["147", "67", "01100111", "g", "g", " ", "Lowercase g"], +104 => ["150", "68", "01101000", "h", "h", " ", "Lowercase h"], +105 => ["151", "69", "01101001", "i", "i", " ", "Lowercase i"], +106 => ["152", "6A", "01101010", "j", "j", " ", "Lowercase j"], +107 => ["153", "6B", "01101011", "k", "k", " ", "Lowercase k"], +108 => ["154", "6C", "01101100", "l", "l", " ", "Lowercase l"], +109 => ["155", "6D", "01101101", "m", "m", " ", "Lowercase m"], +110 => ["156", "6E", "01101110", "n", "n", " ", "Lowercase n"], +111 => ["157", "6F", "01101111", "o", "o", " ", "Lowercase o"], +112 => ["160", "70", "01110000", "p", "p", " ", "Lowercase p"], +113 => ["161", "71", "01110001", "q", "q", " ", "Lowercase q"], +114 => ["162", "72", "01110010", "r", "r", " ", "Lowercase r"], +115 => ["163", "73", "01110011", "s", "s", " ", "Lowercase s"], +116 => ["164", "74", "01110100", "t", "t", " ", "Lowercase t"], +117 => ["165", "75", "01110101", "u", "u", " ", "Lowercase u"], +118 => ["166", "76", "01110110", "v", "v", " ", "Lowercase v"], +119 => ["167", "77", "01110111", "w", "w", " ", "Lowercase w"], +120 => ["170", "78", "01111000", "x", "x", " ", "Lowercase x"], +121 => ["171", "79", "01111001", "y", "y", " ", "Lowercase y"], +122 => ["172", "7A", "01111010", "z", "z", " ", "Lowercase z"], +123 => ["173", "7B", "01111011", "{", "{", " ", "Opening brace"], +124 => ["174", "7C", "01111100", "|", "|", " ", "Vertical bar"], +125 => ["175", "7D", "01111101", "}", "}", " ", "Closing brace"], +126 => ["176", "7E", "01111110", "~", "~", " ", "Equivalency sign - tilde"], +127 => ["177", "7F", "01111111", "", "", " ", "Delete"], +128 => ["200", "80", "10000000", "€", "€", "€", "Euro sign"], +129 => ["201", "81", "10000001", " ", " ", " ", " "], +130 => ["202", "82", "10000010", "‚", "‚", "‚", "Single low-9 quotation mark"], +131 => ["203", "83", "10000011", "ƒ", "ƒ", "ƒ", "Latin small letter f with hook"], +132 => ["204", "84", "10000100", "„", "„", "„", "Double low-9 quotation mark"], +133 => ["205", "85", "10000101", "…", "…", "…", "Horizontal ellipsis"], +134 => ["206", "86", "10000110", "†", "†", "†", "Dagger"], +135 => ["207", "87", "10000111", "‡", "‡", "‡", "Double dagger"], +136 => ["210", "88", "10001000", "ˆ", "ˆ", "ˆ", "Modifier letter circumflex accent"], +137 => ["211", "89", "10001001", "‰", "‰", "‰", "Per mille sign"], +138 => ["212", "8A", "10001010", "Š", "Š", "Š", "Latin capital letter S with caron"], +139 => ["213", "8B", "10001011", "‹", "‹", "‹", "Single left-pointing angle quotation"], +140 => ["214", "8C", "10001100", "Œ", "Œ", "Œ", "Latin capital ligature OE"], +141 => ["215", "8D", "10001101", " ", " ", " ", " "], +142 => ["216", "8E", "10001110", "Ž", "Ž", " ", "Latin capital letter Z with caron"], +143 => ["217", "8F", "10001111", " ", " ", " ", " "], +144 => ["220", "90", "10010000", " ", " ", " ", " "], +145 => ["221", "91", "10010001", "‘", "‘", "‘", "Left single quotation mark"], +146 => ["222", "92", "10010010", "’", "’", "’", "Right single quotation mark"], +147 => ["223", "93", "10010011", "“", "“", "“", "Left double quotation mark"], +148 => ["224", "94", "10010100", "”", "”", "”", "Right double quotation mark"], +149 => ["225", "95", "10010101", "•", "•", "•", "Bullet"], +150 => ["226", "96", "10010110", "–", "–", "–", "En dash"], +151 => ["227", "97", "10010111", "—", "—", "—", "Em dash"], +152 => ["230", "98", "10011000", "˜", "˜", "˜", "Small tilde"], +153 => ["231", "99", "10011001", "™", "™", "™", "Trade mark sign"], +154 => ["232", "9A", "10011010", "š", "š", "š", "Latin small letter S with caron"], +155 => ["233", "9B", "10011011", "›", "›", "›", "Single right-pointing angle quotation mark"], +156 => ["234", "9C", "10011100", "œ", "œ", "œ", "Latin small ligature oe"], +157 => ["235", "9D", "10011101", " ", " ", " ", " "], +158 => ["236", "9E", "10011110", "ž", "ž", " ", "Latin small letter z with caron"], +159 => ["237", "9F", "10011111", "Ÿ", "Ÿ", "Ÿ", "Latin capital letter Y with diaeresis"], +160 => ["240", "A0", "10100000", " ", " ", " ", "Non-breaking space"], +161 => ["241", "A1", "10100001", "¡", "¡", "¡", "Inverted exclamation mark"], +162 => ["242", "A2", "10100010", "¢", "¢", "¢", "Cent sign"], +163 => ["243", "A3", "10100011", "£", "£", "£", "Pound sign"], +164 => ["244", "A4", "10100100", "¤", "¤", "¤", "Currency sign"], +165 => ["245", "A5", "10100101", "¥", "¥", "¥", "Yen sign"], +166 => ["246", "A6", "10100110", "¦", "¦", "¦", "Pipe, Broken vertical bar"], +167 => ["247", "A7", "10100111", "§", "§", "§", "Section sign"], +168 => ["250", "A8", "10101000", "¨", "¨", "¨", "Spacing diaeresis - umlaut"], +169 => ["251", "A9", "10101001", "©", "©", "©", "Copyright sign"], +170 => ["252", "AA", "10101010", "ª", "ª", "ª", "Feminine ordinal indicator"], +171 => ["253", "AB", "10101011", "«", "«", "«", "Left double angle quotes"], +172 => ["254", "AC", "10101100", "¬", "¬", "¬", "Not sign"], +173 => ["255", "AD", "10101101", "", "­", "­", "Soft hyphen"], +174 => ["256", "AE", "10101110", "®", "®", "®", "Registered trade mark sign"], +175 => ["257", "AF", "10101111", "¯", "¯", "¯", "Spacing macron - overline"], +176 => ["260", "B0", "10110000", "°", "°", "°", "Degree sign"], +177 => ["261", "B1", "10110001", "±", "±", "±", "Plus-or-minus sign"], +178 => ["262", "B2", "10110010", "²", "²", "²", "Superscript two - squared"], +179 => ["263", "B3", "10110011", "³", "³", "³", "Superscript three - cubed"], +180 => ["264", "B4", "10110100", "´", "´", "´", "Acute accent - spacing acute"], +181 => ["265", "B5", "10110101", "µ", "µ", "µ", "Micro sign"], +182 => ["266", "B6", "10110110", "¶", "¶", "¶", "Pilcrow sign - paragraph sign"], +183 => ["267", "B7", "10110111", "·", "·", "·", "Middle dot - Georgian comma"], +184 => ["270", "B8", "10111000", "¸", "¸", "¸", "Spacing cedilla"], +185 => ["271", "B9", "10111001", "¹", "¹", "¹", "Superscript one"], +186 => ["272", "BA", "10111010", "º", "º", "º", "Masculine ordinal indicator"], +187 => ["273", "BB", "10111011", "»", "»", "»", "Right double angle quotes"], +188 => ["274", "BC", "10111100", "¼", "¼", "¼", "Fraction one quarter"], +189 => ["275", "BD", "10111101", "½", "½", "½", "Fraction one half"], +190 => ["276", "BE", "10111110", "¾", "¾", "¾", "Fraction three quarters"], +191 => ["277", "BF", "10111111", "¿", "¿", "¿", "Inverted question mark"], +192 => ["300", "C0", "11000000", "À", "À", "À", "Latin capital letter A with grave"], +193 => ["301", "C1", "11000001", "Á", "Á", "Á", "Latin capital letter A with acute"], +194 => ["302", "C2", "11000010", "Â", "Â", "Â", "Latin capital letter A with circumflex"], +195 => ["303", "C3", "11000011", "Ã", "Ã", "Ã", "Latin capital letter A with tilde"], +196 => ["304", "C4", "11000100", "Ä", "Ä", "Ä", "Latin capital letter A with diaeresis"], +197 => ["305", "C5", "11000101", "Å", "Å", "Å", "Latin capital letter A with ring above"], +198 => ["306", "C6", "11000110", "Æ", "Æ", "Æ", "Latin capital letter AE"], +199 => ["307", "C7", "11000111", "Ç", "Ç", "Ç", "Latin capital letter C with cedilla"], +200 => ["310", "C8", "11001000", "È", "È", "È", "Latin capital letter E with grave"], +201 => ["311", "C9", "11001001", "É", "É", "É", "Latin capital letter E with acute"], +202 => ["312", "CA", "11001010", "Ê", "Ê", "Ê", "Latin capital letter E with circumflex"], +203 => ["313", "CB", "11001011", "Ë", "Ë", "Ë", "Latin capital letter E with diaeresis"], +204 => ["314", "CC", "11001100", "Ì", "Ì", "Ì", "Latin capital letter I with grave"], +205 => ["315", "CD", "11001101", "Í", "Í", "Í", "Latin capital letter I with acute"], +206 => ["316", "CE", "11001110", "Î", "Î", "Î", "Latin capital letter I with circumflex"], +207 => ["317", "CF", "11001111", "Ï", "Ï", "Ï", "Latin capital letter I with diaeresis"], +208 => ["320", "D0", "11010000", "Ð", "Ð", "Ð", "Latin capital letter ETH"], +209 => ["321", "D1", "11010001", "Ñ", "Ñ", "Ñ", "Latin capital letter N with tilde"], +210 => ["322", "D2", "11010010", "Ò", "Ò", "Ò", "Latin capital letter O with grave"], +211 => ["323", "D3", "11010011", "Ó", "Ó", "Ó", "Latin capital letter O with acute"], +212 => ["324", "D4", "11010100", "Ô", "Ô", "Ô", "Latin capital letter O with circumflex"], +213 => ["325", "D5", "11010101", "Õ", "Õ", "Õ", "Latin capital letter O with tilde"], +214 => ["326", "D6", "11010110", "Ö", "Ö", "Ö", "Latin capital letter O with diaeresis"], +215 => ["327", "D7", "11010111", "×", "×", "×", "Multiplication sign"], +216 => ["330", "D8", "11011000", "Ø", "Ø", "Ø", "Latin capital letter O with slash"], +217 => ["331", "D9", "11011001", "Ù", "Ù", "Ù", "Latin capital letter U with grave"], +218 => ["332", "DA", "11011010", "Ú", "Ú", "Ú", "Latin capital letter U with acute"], +219 => ["333", "DB", "11011011", "Û", "Û", "Û", "Latin capital letter U with circumflex"], +220 => ["334", "DC", "11011100", "Ü", "Ü", "Ü", "Latin capital letter U with diaeresis"], +221 => ["335", "DD", "11011101", "Ý", "Ý", "Ý", "Latin capital letter Y with acute"], +222 => ["336", "DE", "11011110", "Þ", "Þ", "Þ", "Latin capital letter THORN"], +223 => ["337", "DF", "11011111", "ß", "ß", "ß", "Latin small letter sharp s - ess-zed"], +224 => ["340", "E0", "11100000", "à", "à", "à", "Latin small letter a with grave"], +225 => ["341", "E1", "11100001", "á", "á", "á", "Latin small letter a with acute"], +226 => ["342", "E2", "11100010", "â", "â", "â", "Latin small letter a with circumflex"], +227 => ["343", "E3", "11100011", "ã", "ã", "ã", "Latin small letter a with tilde"], +228 => ["344", "E4", "11100100", "ä", "ä", "ä", "Latin small letter a with diaeresis"], +229 => ["345", "E5", "11100101", "å", "å", "å", "Latin small letter a with ring above"], +230 => ["346", "E6", "11100110", "æ", "æ", "æ", "Latin small letter ae"], +231 => ["347", "E7", "11100111", "ç", "ç", "ç", "Latin small letter c with cedilla"], +232 => ["350", "E8", "11101000", "è", "è", "è", "Latin small letter e with grave"], +233 => ["351", "E9", "11101001", "é", "é", "é", "Latin small letter e with acute"], +234 => ["352", "EA", "11101010", "ê", "ê", "ê", "Latin small letter e with circumflex"], +235 => ["353", "EB", "11101011", "ë", "ë", "ë", "Latin small letter e with diaeresis"], +236 => ["354", "EC", "11101100", "ì", "ì", "ì", "Latin small letter i with grave"], +237 => ["355", "ED", "11101101", "í", "í", "í", "Latin small letter i with acute"], +238 => ["356", "EE", "11101110", "î", "î", "î", "Latin small letter i with circumflex"], +239 => ["357", "EF", "11101111", "ï", "ï", "ï", "Latin small letter i with diaeresis"], +240 => ["360", "F0", "11110000", "ð", "ð", "ð", "Latin small letter eth"], +241 => ["361", "F1", "11110001", "ñ", "ñ", "ñ", "Latin small letter n with tilde"], +242 => ["362", "F2", "11110010", "ò", "ò", "ò", "Latin small letter o with grave"], +243 => ["363", "F3", "11110011", "ó", "ó", "ó", "Latin small letter o with acute"], +244 => ["364", "F4", "11110100", "ô", "ô", "ô", "Latin small letter o with circumflex"], +245 => ["365", "F5", "11110101", "õ", "õ", "õ", "Latin small letter o with tilde"], +246 => ["366", "F6", "11110110", "ö", "ö", "ö", "Latin small letter o with diaeresis"], +247 => ["367", "F7", "11110111", "÷", "÷", "÷", "Division sign"], +248 => ["370", "F8", "11111000", "ø", "ø", "ø", "Latin small letter o with slash"], +249 => ["371", "F9", "11111001", "ù", "ù", "ù", "Latin small letter u with grave"], +250 => ["372", "FA", "11111010", "ú", "ú", "ú", "Latin small letter u with acute"], +251 => ["373", "FB", "11111011", "û", "û", "û", "Latin small letter u with circumflex"], +252 => ["374", "FC", "11111100", "ü", "ü", "ü", "Latin small letter u with diaeresis"], +253 => ["375", "FD", "11111101", "ý", "ý", "ý", "Latin small letter y with acute"], +254 => ["376", "FE", "11111110", "þ", "þ", "þ", "Latin small letter thorn"], +255 => ["377", "FF", "11111111", "ÿ", "ÿ", "ÿ", "Latin small letter y with diaeresis"], +); + + +my $opt_infile; +my $opt_column="description"; +my $opt_out; +my $opt_help = 0; + + +if ( !GetOptions( 'f|infile=s' => \$opt_infile, + 'o|out|output=s' => \$opt_out, + 'c|column=s' => \$opt_column, + 'h|help!' => \$opt_help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($opt_help) { + pod2usage( { -verbose => 2, + -exitval => 0 } ); +} + +if ( !( defined($opt_infile) ) ) { + pod2usage( { + -message => "Must specify at leat one parameter:\nfasta_statisticsAndPlot.pl -f InputFastaFile [-o Ouput_directory] ", + -verbose => 0, + -exitval => 2 } ); +} + + +if ($opt_column) { + $opt_column = lc($opt_column); + $opt_column =~ tr/ /_/; + if( ! exists($column{$opt_column})){ + print "$opt_column does not exist. Choice must be among , , , , , , and . Whithout the <> characters (it is case insensitive).\n"; exit; + } +} + +my $outstream = IO::File->new(); + +if ( defined($opt_out) ) { + if (-f $opt_out) { + print "$opt_out output directory already exits !\n";exit; + } + + $outstream->open( $opt_out, 'w' ) or + croak( sprintf( "Can not open '%s' for writing %s", $opt_out, $! ) ); +} +else { + $outstream->fdopen( fileno(STDOUT), 'w' ) or + croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); +} + + + +open FILE, $opt_infile or die "Couldn't open $opt_infile"; + +#read line by line +while () { + #read character by character + foreach my $char (split //){ + + my $ascii_dec = ord($char); + my $desc = $ascii{$ascii_dec}[$column{$opt_column}]; + printf "character:<%s> | ascii decimal:%3d | %s:%s\n", $char,, $ascii_dec, $opt_column, $desc; + + } + +} +close FILE; + + + +__END__ + +=head1 NAME + +reveal_hidden_charracter.pl - + +It happens we have problem with some characters in a file e.g non-printable chracter. +This script decode character by character. For each character it will print: +the character itself | The ascii decimal | An extra information +The extra information can be choosen between , , , , , , and (Decription by default) + + +ABOUT THE ASCII TABLE used (https://www.ascii-code.com): +================================ + +ASCII Code - The extended ASCII table +ASCII, stands for American Standard Code for Information Interchange. It's a 7-bit character code where every single bit represents a unique character. On this webpage you will find 8 bits, 256 characters, ASCII table according to Windows-1252 (code page 1252) which is a superset of ISO 8859-1 in terms of printable characters. In the range 128 to 159 (hex 80 to 9F), ISO/IEC 8859-1 has invisible control characters, while Windows-1252 has writable characters. Windows-1252 is probably the most-used 8-bit character encoding in the world. + +ASCII control characters (character code 0-31) +The first 32 characters in the ASCII-table are unprintable control codes and are used to control peripherals such as printers. +DEC OCT HEX BIN Symbol HTML Number HTML Name Description + +ASCII printable characters (character code 32-127) +Codes 32-127 are common for all the different variations of the ASCII table, they are called printable characters, represent letters, digits, punctuation marks, and a few miscellaneous symbols. You will find almost every character on your keyboard. Character 127 represents the command DEL. + +The extended ASCII codes (character code 128-255) +There are several different variations of the 8-bit ASCII table. The table below is according to Windows-1252 (CP-1252) which is a superset of ISO 8859-1, also called ISO Latin-1, in terms of printable characters, but differs from the IANA's ISO-8859-1 by using displayable characters rather than control characters in the 128 to 159 range. Characters that differ from ISO-8859-1 is marked by light blue color. + +DEC OCT HEX BIN Symbol HTML Number HTML Name Description + + +=head1 SYNOPSIS + + ./reveal_hidden_charracter.pl --f=infile [--output=file] + ./reveal_hidden_charracter.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--infile> or B<-f> + +Input file containing DNA. + +=item B<--desc> or B<-d> + +Specify extra information/description you want for the character. Choice must be among , , , , , , and . Whithout the <> characters. +The option is case insensitive. +By default it is . + +=item B<--out>, B<--output> or B<-o> + +[OPTIONAL] Output file. If no output is specified, the result is written to STDOUT. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=cut diff --git a/bin/gaas_rfam2apollo.rb b/bin/gaas_rfam2apollo.rb new file mode 100755 index 000000000..a7482b024 --- /dev/null +++ b/bin/gaas_rfam2apollo.rb @@ -0,0 +1,63 @@ +#!/usr/bin/ruby +# == NAME +# rfam2apollo.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# Converts output from Rfam/Infernal to full GFF3 format for WebApollo +# +# == OPTIONS +# -h,--help:: Show help +# -i,--infile=INFILE:: input file +# -o,--outfile=OUTFILE:: output file + +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'rdoc/usage' +require 'optparse' +require 'ostruct' +require 'logger' + + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-h","--help","Display the usage information") {RDoc::usage} +opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } +opts.on("-o","--outfile", "=OUTFILE","Output") {|argument| options.outfile = argument } + +opts.parse! rescue RDoc::usage('usage') + +options.outfile ? output_stream = File.new(options.outfile,'w') : output_stream = $stdout + +lines = IO.readlines(options.infile) + +id_counter = 100000 + +lines.each do |line| + + id_counter += 1 + + e = line.strip.split("\t") + + rfam_features = {} + e[-1].split(";").collect{|f| rfam_features[f.split("=")[0]] = f.split("=")[1] } + + output_stream.puts "#{e[0]}\t#{e[1]}\tgene\t#{e[3]}\t#{e[4]}\t#{e[5]}\t#{e[6]}\t#{e[7]}\t#{e[8]}" + output_stream.puts"#{e[0]}\t#{e[1]}\tncRNA\t#{e[3]}\t#{e[4]}\t#{e[5]}\t#{e[6]}\t#{e[7]}\tID=RFAMT#{id_counter};Parent=#{rfam_features['ID']};Name=#{rfam_features['ID']};description=#{rfam_features['rfam-acc']} (#{rfam_features['rfam-id']})" + output_stream.puts"#{e[0]}\t#{e[1]}\texon\t#{e[3]}\t#{e[4]}\t#{e[5]}\t#{e[6]}\t#{e[7]}\tID=RFAME#{id_counter};Parent=RFAMT#{id_counter}" + +end + +output_stream.close + + + diff --git a/bin/gaas_rfam2grid.pl b/bin/gaas_rfam2grid.pl new file mode 100755 index 000000000..075031dfe --- /dev/null +++ b/bin/gaas_rfam2grid.pl @@ -0,0 +1,324 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Carp; +use Getopt::Long; +use Pod::Usage; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Bio::SeqFeature::Generic; +use Bio::Tools::GFF; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $rfam_cm_file = "/projects/references/databases/rfam/14.1/Rfam.cm"; #cm models to be annotated by tRNAscan +my $gff_formatter = Bio::Tools::GFF->new(-gff_version => 3); +my $queue=undef; +my $outdir = "rfam_output"; +my $fasta = undef; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my @annotations = (); # Stores Rfama annotations as hashes +my $help; +my $grid="Slurm"; + +if ( !GetOptions( + "h|help!" => \$help, + "fasta|f=s" => \$fasta, + "cm=s" => \$rfam_cm_file, + "grid=s" => \$grid, + "queue=s" => \$queue, + "quiet|q!" => \$quiet, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header \n" } ); +} + +if ( ! defined( $fasta ) ){ + pod2usage( { + -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +if (! -e $rfam_cm_file){ + print "The cm file ".$rfam_cm_file." does not exist. Please define it using the cm option.\n";exit; +} +if (! -e $fasta){ + print "The fasta file ".$fasta." does not exist.\n";exit; +} + +# .. Check that all binaries are available in $PATH +my @tools = ("cmsearch" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/rfam_search.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks\n"); + +my $seq; +my $seq_counter = 0; + +while( $seq = $inseq->next_seq() ) { + $seq_counter += 1; + my $outfile = $outdir . "/seq_" . $seq_counter . ".fasta" ; # We could also use the display_id, but this can cause trouble with special characters + my $seq_out = Bio::SeqIO->new(-file => ">$outfile" , -format => 'fasta'); + $seq_out->write_seq($seq); + my $command = "cmsearch --cpu 1 --rfam --cut_tc --tblout " . $outfile . ".rfam " . $rfam_cm_file . " " . $outfile . " > /dev/null" ; + push(@cmds,$command); +} + + +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, merging of output and printing gff + +msg("Merging output and writing GFF file"); + +my @files = <$outdir/*.rfam>; + +foreach my $file (@files) { + + open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; + + while (<$IN>) { + chomp; + my $line = $_; + next if ($line =~ /^#.*$/); # Skipping comment lines + + my $annotation = parse_line($line); + push(@annotations,$annotation); + + } +} + +my $outfile = "rfam.gff"; +open (my $OUT, '>', $outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; + +foreach my $feature (@annotations) { + $feature->gff_format($gff_formatter); + print $OUT $feature->gff_string, "\n"; +} + +close($OUT); + +# -------------------- + +sub parse_line { + # chomp; + my $line = shift ; + + my ($tn,$tacc,$qn,$qacc,$mdl,$mdlf,$mdlt,$seqf,$seqt,$strand,$trunc,$pass,$gc,$bias,$score,$evalue,$inc,$desc) = split(/\s+/,$line); + + my %tags = ( 'rfam-id' => $qn, + 'rfam-acc' => ($qacc || 'unknown'), + 'model_start' => $mdlf, + 'model_end' => $mdlf, + 'gc-content' => $gc, + 'ID' => $qacc . "_" . $tn . "_" . $seqf, + 'Name' => $qacc . "_" . $tn . "_" . $seqf, + ); + + my($from,$to) = sort($seqf,$seqt); # cmsearch reports coordinates in orientation of annotation, not chromosome. Need to sort from low to high for gff + + if( $evalue =~ /[0-9]/ ) { + $tags{'evalue'} = $evalue; + } + + my $f = Bio::SeqFeature::Generic->new( -seq_id => $tn, + -start => $from, + -end => $to, + -strand => $strand, + -frame => 0, + -primary_tag => 'ncRNA', # may argue over whether this is an exon feature, but anything else will be ignored by Maker + -source_tag => 'Rfam', + -score => $score, + -tag => \%tags, + ); + + return $f; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_rfam2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple rfam jobs in parallel +We currently run infernal (cmsearch) searches directly on the contigs – rather than using the Rfam pipeline with it’s two-step search approach (blast to limit candidates, infernal to refine and verify). +Infernal ("INFERence of RNA ALignment") is for searching DNA sequence databases for RNA structure and sequence similarities. It is an implementation of a special case of profile stochastic context-free grammars called covariance models (CMs). A CM is like a sequence profile, but it scores a combination of sequence consensus and RNA secondary structure consensus, so in many cases, it is more capable of identifying RNA homologs that conserve their secondary structure more than their primary sequence. + +=head1 SYNOPSIS + + gaas_rfam2grid.pl -f genome.fasta -o outdir + gaas_rfam2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the genome file to read. + +=item B<--cm> + +File containing the covariance models (cm) used by rfam + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_rs_PlotDensityAEDFromMakerAnnotationFiles.R b/bin/gaas_rs_PlotDensityAEDFromMakerAnnotationFiles.R new file mode 100755 index 000000000..07bc5a3d3 --- /dev/null +++ b/bin/gaas_rs_PlotDensityAEDFromMakerAnnotationFiles.R @@ -0,0 +1,71 @@ +#!/sw/R/3.0.2/bin/Rscript + +# usage: script.R gtf file1.gtf +# You can use one or many gff or gtf files (should be well formatted) +# /!\ Do not forget to load R + +# Jacques Dainat 04/2014 - nbis.se + +# get arguments +args <- commandArgs(TRUE) +cat("You gave",length(args),"arguments:",paste(args,collapse=" - "),"\n"); + +if (length(args) < 1) { + cat("You have to give a least one file. Do nont forget to load R module.","\n"); + quit(); +} +# create an empty vector +listlegend <- c(); + +nbFile=length(args); +cat("Number file(s) =",nbFile,"\n"); +pdf("outputPlot.pdf") + +for (i in 1:nbFile) { + + cat("file studied =",args[i],"\n"); + + #If more than one file to plot + if(i != 1){ + par(new=TRUE); + } + myfileName <- ""; + myfileName <- args[i] ; + #take only file name + myfileName <- basename(myfileName); + #get extension + myExt <- strsplit(myfileName, "\\.")[[1]]; + myExt <- myExt[2]; + + #remove extensiom + myfileName <- sub("^([^.]*).*", "\\1", myfileName); + + # call bash command + if(myExt == "gtf"){ + command=paste('awk \'!($12 in arr){cpt++;arr[$12]++;arrScore[cpt]=$18}END{for (x in arrScore) if(arrScore[x] != "") print arrScore[x]}\' ',args[i],' | sed s/[\\\",\\;]//g') + }else if (myExt == "gff"){ + command=paste('awk \'{if($3=="mRNA") {split($9,a,";"); if(!(a[3] in arr)){ cpt++;arr[a[3]]++;arrScore[cpt]=a[4]}}} END {for (x in arrScore) if(arrScore[x] != "") print arrScore[x]}\' ',args[i],' | sed s/[\\\",\\;AED\\_\\=]//g') + } + else { + cat("The extension ",myExt,"is not recognized by the program. Use only gtf or/and gff extension !\n");quit(); + } + myData <- system(command, intern=TRUE) + myGoodData=as.matrix(as.numeric(myData)) + + legendInfo=paste(myfileName,"(",length(myGoodData),"mRNAs )") + listlegend<-c(listlegend,legendInfo) + + #make plot + if (nbFile == 1){ + plot(density(myGoodData),xlim=c(0,1), col=i, xlab="AED score", main="") + } + else{plot(density(myGoodData),xlim=c(0,1),ylim=c(0,18), col=i, xlab="AED score", main="")} # You can modify the value 18 according to the value in your graph +} +# Add Title +title(main="AED distribution") + +#Add Legend +legend("topright", col=(1:nbFile), lty=1, c(listlegend)) + +#END +dev.off() diff --git a/bin/gaas_rs_PlotDensityOneRowPerFile.R b/bin/gaas_rs_PlotDensityOneRowPerFile.R new file mode 100755 index 000000000..87b138c82 --- /dev/null +++ b/bin/gaas_rs_PlotDensityOneRowPerFile.R @@ -0,0 +1,47 @@ +#!/opt/local/bin/Rscript + +# To prepare data do: +#awk '{print $18}' maker_a1_p0_c0.gtf | sed s/[\",\;]//g >> AED_a1p0c0.csv + +#Use this programm +# Just type the file containing row of aed value as argument + +args <- commandArgs(TRUE) +cat("You gave",length(args),"arguments:",paste(args,collapse=" - "),"\n"); + +if (length(args) == 0) { + cat("You have to give a least one file","\n"); + quit(); +} +# create an empty vector +listFileName <- c(); + +cat("My file=",length(args),"\n"); +pdf("outputPlot.pdf") +for (i in 1:length(args)) { + + + if(i != 1){ + par(new=TRUE); + } + + myfileName <- ""; + myfileName <- args[i] ; + #take only file name + myfileName <- basename(myfileName); + #remove extensiom + myfileName <- sub("^([^.]*).*", "\\1", myfileName); +# cat("My file=",myfileName,"\n"); + + listFileName<-c(listFileName,myfileName) + + myfileData=as.matrix(read.table(args[i])); + plot(density(myfileData),xlim=c(0,1),ylim=c(0,5), col=i, xlab="", ylab="", main="") +} + +legend("topright", col=(1:length(args)), lty=1, c(listFileName)) +dev.off() + + + + diff --git a/bin/gaas_scaffold2AGP.pl b/bin/gaas_scaffold2AGP.pl new file mode 100755 index 000000000..cf7969617 --- /dev/null +++ b/bin/gaas_scaffold2AGP.pl @@ -0,0 +1,160 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use Pod::Usage; +use Bio::SeqIO ; +use Getopt::Long; +use File::Basename; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $infile = undef; +my $outfile = undef; +my $contigcount=0; +my $help = undef; + +if ( !GetOptions( 'i=s' => \$infile, + 'o|out|output=s' => \$outfile, + 'h|help!' => \$help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined( $infile) ) { + pod2usage( { + -message => "$header\nMust specify at least 1 parameters:\nInput fasta file (-i)\n", + -verbose => 0, + -exitval => 1 } ); +} + + +#DEAL with output create default output file based on the input file name +if (! $outfile){ + my ($file_in,$path,$ext) = fileparse($infile,qr/\.[^.]*/); + $outfile = $file_in.".agp"; +} +open (AGP_FILE, ">$outfile"); + +# =================== + +my $inseq = Bio::SeqIO->new('-file' => "<$infile", + '-format' => 'Fasta' ); + +my $outseq = Bio::SeqIO->new( + -file => ">contigs.fasta", + -format => 'fasta', + ); + +#Read scaffolded FASTA-file +while (my $seq_obj = $inseq->next_seq ) { + my $scaffold = $seq_obj->id; + my $sequence = $seq_obj->seq; + my $start=1; + my $oldsum; + my $newsum; + my $count=0; + my $rounded; + + next if ($scaffold =~ /^contig/i); + foreach my $substring_sequence (split /(N{20,})/i, $sequence){ + my $type; + my $substring_length = length($substring_sequence); + $count++; + $oldsum=$start; + $newsum=$oldsum+$substring_length-1; + + if ($substring_sequence !~ m/^N+$/i){ + $type="W"; + $contigcount++; + $rounded=sprintf("%05s", $contigcount); + my $contig_obj = Bio::Seq->new(-seq => "$substring_sequence", + -display_id => "contig$rounded", + -alphabet => "dna" ); + $outseq->write_seq($contig_obj); + } + elsif ($substring_sequence =~ m/^N+$/i){ + $type="N"; + } + $start += $substring_length; + if ($type eq "W"){ + print AGP_FILE "$scaffold\t$oldsum\t$newsum\t$count\t$type\tcontig$rounded\t1\t$substring_length\t+\n"; + } + if ($type eq "N"){ + print AGP_FILE "$scaffold\t$oldsum\t$newsum\t$count\t$type\t$substring_length\tscaffold\tyes\tpaired-ends\n"; + } + } +} + +close AGP_FILE; + +__END__ + + +=head1 NAME + +gaas_scaffold2AGP.pl - This script + +=head1 DESCRIPTION + +Creates a AGP-file needed by e.g. EMBL for a scaffolded assembly + +=head1 SYNOPSIS + + gaas_scaffold2AGP.pl -i scaffoldfile.fasta -o scaffoldfile.agp + gaas_scaffold2AGP.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--gff>, B<-f>, B<--ref> or B<-reffile> + +Input fasta file. + +=item B<--out>, B<--output> or B<-o> + +Output agp file. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat / Henrik Lantz diff --git a/bin/gaas_scipio2grid.pl b/bin/gaas_scipio2grid.pl new file mode 100755 index 000000000..d3d8c426a --- /dev/null +++ b/bin/gaas_scipio2grid.pl @@ -0,0 +1,283 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Pod::Usage; +use Getopt::Long; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $scipio_outfile = "scipio.merged.gff"; +my $outdir = undef; +my $genome = undef; +my $protein = undef; +my $queue=undef; +my $chunk_size = 10; # Partition size of fasta input +my @chunks = (); # Holds chunks, partitioning the fasta input (so we don't send 50.000 jobs to the farm... +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $help; +my $grid="Slurm"; + +if ( !GetOptions( + "h|help" => \$help, + "fasta|f=s" => \$genome, + "grid=s" => \$grid, + "chunk=i" => \$chunk_size, + "protein|p=s" => \$protein, + "queue=s" => \$queue, + "quiet|q!" => \$quiet, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($genome) ){ + pod2usage( { + -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ( "scipio.pl" , "blat" ); +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/scipio2grid.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read protein fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$genome", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for GRID\n"); + +my @seqarray = (); +my $counter = 0; +my $chunk_counter = 1; + +my $seq; + +while( $seq = $inseq->next_seq() ) { + $counter += 1; + push(@seqarray,$seq); + + if ($counter == $chunk_size) { + my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; + write_chunk($outfile,@seqarray); + @seqarray = (); + $chunk_counter += 1; + $counter = 0; + } +} +my $outfile = $outdir . "/chunk_" . $chunk_counter . ".fa"; # Clunky, the last chunk is <= chunk_size... +write_chunk($outfile,@seqarray); + +# Push all jobs into the command list +for (my $i=1;$i<=$chunk_counter;$i++) { + my $scipio_cmd = "scipio.pl $outdir/chunk_$i.fa $protein > $outdir/chunk_$i.scipio"; + push(@cmds,$scipio_cmd); +} + +# SUBMISSION +msg("submitting chunks\n"); +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# Merging the outputs +msg("Merging outputs from chunks"); + +my @files = <$outdir/*.scipio>; + +foreach my $file (@files) { + system("cat $file >> $outdir/scipio.merged"); +} + +system("yaml2gff.1.4.pl $outdir/scipio.merged > $scipio_outfile 2> /dev/null"); + +msg("Finished scipio grid run."); + +# -------------------- + +sub write_chunk { + my $outfile = shift; + my @seqs = @_; + my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); + foreach my $seq (@seqs) {$seq_out->write_seq($seq)}; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(0); +} + +__END__ + +=head1 NAME + +gaas_scipiogrid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple scipio jobs in parallel using Grid + +=head1 SYNOPSIS + + gaas_scipiogrid.pl --genome genome.fasta --protein protein.fasta -o outdir + gaas_scipiogrid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--genome> or B<-g> + +The name of the genome file to read. + +=item B<--protein> or B<-p> + +The name of the protein file to read. + +=item B<--chunk> + +By default 10. We slice the fasta input file in many chunk to distribute more efficiently small tasks to each cpu. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_screen_mito_tblastn.pl b/bin/gaas_screen_mito_tblastn.pl new file mode 100755 index 000000000..95723f4aa --- /dev/null +++ b/bin/gaas_screen_mito_tblastn.pl @@ -0,0 +1,275 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use IO::File; +use Bio::DB::Fasta; +use File::Basename; +use Getopt::Long; +use Pod::Usage; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my @copyARGV=@ARGV; + +my $opt_output = undef; +my $tabfile = undef; +my $opt_genome= undef; +my $help= undef; + + +if ( !GetOptions("tab=s" => \$tabfile, + "o|out=s" => \$opt_output, + "g|genome=s" => \$opt_genome, + "h|help" => \$help) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined( $tabfile) ) { + pod2usage( { + -message => "$header\nMust specify at least 1 file. \n", + -verbose => 0, + -exitval => 1 } ); +} + +### output +my $ostream = IO::File->new(); + +# Manage Output +if(defined($opt_output)) +{ +$ostream->open( $opt_output, 'w' ) or + croak( + sprintf( "Can not open '%s' for reading: %s", $opt_output, $! ) ); +} +else{ + $ostream->fdopen( fileno(STDOUT), 'w' ) or + croak( sprintf( "Can not open STDOUT for writing: %s", $! ) ); +} + + + +##### +my %allIDs; # save ID in lower case to avoid cast problems +my $db = undef; +if ($opt_genome){ + my $nbFastaSeq=0; + $db = Bio::DB::Fasta->new($opt_genome); + my @ids = $db->get_all_primary_ids; + foreach my $id (@ids ){$allIDs{lc($id)}=$id;} +} + +##### Stream in 1 +my $fh1; +if ($tabfile) { + open($fh1, '<', $tabfile) or die "Could not open file '$tabfile' $!"; +} + + +my %info; + +while( my $line = <$fh1>) { + + if( $line =~ m/^#/){ next; } + + my @list = split(/\s/,$line); + my $ID = $list[1]; + my $mito_gene = $list[0]; + $mito_gene =~ /\w+\|\w+\|([^_]+).*/; + $mito_gene = $1; + #print " my mytogene = $mito_gene \n"; + my $start=undef; + my $end= undef; + if($list[8]< $list[9]){ + $start = $list[8]; + $end = $list[9]; + } + else{ + $start = $list[9]; + $end = $list[8]; + } + #print $start." ".$end."\n"; + push (@{$info{$ID}}, [$start, $end, $mito_gene]); +} + +my %omni; +my %nbMitoGeneByContig; + +foreach my $contig (keys %info){ + + my @uniq_list; + #print "contig: ".$contig."\n"; + + my $prev_start = -1; + my $prev_end = -1; + my $start = -1; + my $end = -1; + my $printed=undef; + + foreach my $tuple (sort {$a->[0] <=> $b->[0] } @{$info{$contig}}){ + + $nbMitoGeneByContig{$contig}{$tuple->[2]}++; + #print $tuple->[2];exit; + + $start = @$tuple[0]; + $end = @$tuple[1]; + + if ( ($prev_start <= $end) and ($prev_end >= $start) ){ #it overlaps or are consecutive + #print "it overlaps\n"; + if ($end > $prev_end){ + $prev_end = $end; + } + } + elsif($start > $prev_end){ + if($prev_start != -1){ + push (@uniq_list, [$prev_start,$prev_end]); + #print "I push the tuple [$prev_start,$prev_end]\n"; + } + $prev_start = $start ; + $prev_end = $end ; + } + + } + # Deal with the last round + push (@uniq_list, [$prev_start,$prev_end]); + #print "I push the last tuple [$prev_start,$prev_end]\n"; + + push (@{$omni{$contig}}, @uniq_list) +} + + #calculate bp incremented non-overlaping hit size + my %size; + foreach my $contig (keys %omni){ + foreach my $tuple ( @{$omni{$contig}} ){ + $size{$contig}+=(@$tuple[1]-@$tuple[0]+1); + } + } + + if ($opt_genome){ + print $ostream "SequenceID\tNumber_of_non_ovelaping_Hit\tNb_mito_gene\tTotal_hit_size\tSize_sequence\t%_Sequence_covered_by_hit\tGene_names\n"; + # sort by number of non-overlaping hits + foreach my $contig (sort { @{$omni{$a}} <=> @{$omni{$b}} } keys %omni){ + + #compute length of the contig + my $seq_id_correct = $allIDs{lc($contig)}; + my $seq = $db->get_Seq_by_id($seq_id_correct); + my $length = $seq->length; + + #compute % of seq covered by mito hits + my $goodxGenome=sprintf("%0.2f",($size{$contig}*100)/$length); + + my $nbMitoGene = keys %{$nbMitoGeneByContig{$contig}}; + + my @geneList=(); + foreach my $key (sort keys %{$nbMitoGeneByContig{$contig}}){ + push @geneList, $key + } + print $ostream $contig."\t".@{$omni{$contig}}."\t".$nbMitoGene."\t".$size{$contig}."\t".$length."\t".$goodxGenome."\t".join(",", @geneList)."\n"; + } + + } + else{ + print $ostream "SequenceID\tNumber_of_non_ovelaping_Hit\tNb_mito_gene\tTotal_hit_size\tGene_names\n"; + # sort by number of non-overlaping hits + foreach my $contig (sort { @{$omni{$a}} <=> @{$omni{$b}} } keys %omni){ + my $nbMitoGene = keys %{$nbMitoGeneByContig{$contig}}; + + my @geneList=(); + foreach my $key (sort keys %{$nbMitoGeneByContig{$contig}}){ + push @geneList, $key + } + + print $ostream $contig."\t".@{$omni{$contig}}."\t".$nbMitoGene."\t".$size{$contig}."\t".join(",", @geneList)."\n"; + } + } + +__END__ + + +=head1 NAME + +screen_mito_tblastn.pl + +=head1 DESCRIPTION + +Based on a default blast tabulated output ( -outfmt 6 => qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore) the script aims to tell you +for each sequence of your assembly, how many non-overlaping mito hits have been found, the number of mito genes that have a hit and the total size in bp of those hits +(overlaping part counted only once). When the assembly is provided, 2 new columns are displayed, the sie of the Sequence and the % part covered by mito hits. +The script aims to help determining the contigs from an assembly which are mitochondrial. An assembly graph could be helpful to check if the suspicious (those that might be mitochondrial) contigs sounds to be circular +as expected for a mitochondrial genome. + +=head1 SYNOPSIS + + screen_mito_tblastn.pl --tab=infile -o=outFile + screen_mito_tblastn.pl --help + +Mitochondrial genome size (from wikipedia) + +Genome Type Kingdom Introns Size Shape Description +1 Animal No 11–28kbp Circular Single molecule +2 Fungi, Plant, Protista Yes 19–1000kbp Circular Single molecule +3 Fungi, Plant, Protista No 20–1000kbp Circular Large molecule and small plasmid like structures +4 Protista No 1–200kbp Circular Heterogeneous group of molecules +5 Fungi, Plant, Protista No 1–200kbp Linear Homogeneous group of molecules +6 Protista No 1–200kbp Linear Heterogeneous group of molecules + + + +=head1 OPTIONS + +=over 8 + +=item B<--tab> + +Input tabulated blast file -outfmt 6 + +=item B<--out>, B<--output> or B<-o> + +The output will be the EMBL file with the record "headers" modified + +=item B<--genome> or B<-g> + +Optional. Genome in fasta format. Allow to calculate the mapping coverage. + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_setup_rakefile.rb b/bin/gaas_setup_rakefile.rb new file mode 100755 index 000000000..75c098acb --- /dev/null +++ b/bin/gaas_setup_rakefile.rb @@ -0,0 +1,348 @@ +#!/usr/bin/ruby + +require 'fileutils' + +# A rake file to set up a new WebApollo instance +GENOME = ENV['genome'].downcase +abort "Must provide a genome name or horrible things will happen" unless GENOME +SEQUENCE = ENV['sequence'] +abort "Must provide a genome sequence!" unless SEQUENCE +ANNOTATION = ENV['gff'] +abort "Must provide an annotation!" unless ANNOTATION + +WEB_APOLLO_INSTALL_DIR = ENV['install_dir'] || "/databases/webapollo/WebApollo-2014-04-03" # the directory with the installation files +WEB_APOLLO_DIR = "/usr/share/tomcat7/webapps/#{GENOME}" + + +# Determine where all the data will live (BAM files, annotations etc) +STORAGE_DIR = "/databases/data" + +BLAT_DIR = "/databases/tools/blat" + +WORK_DIR=Dir.getwd + +# Some key variables: + +# -- The user with admin rights to the user DB +user_db = "web_apollo_users_#{GENOME}" +user_db_admin = ENV['WA_USER_DB_ADMIN'] or abort "Environment variable WA_PGUSER not set" +user_db_admin_pw = ENV['WA_WA_USER_DB_ADMIN_PW'] or abort "Environment variable WA_PGPASS not set" + +# -- The user with admin rights on the website <= Default value during installation process +web_apollo_admin = "web_apollo_admin" +web_apollo_admin_pw = "web_apollo_admin" + +# -- The Chado DB stores modified annotations +chado_db = "nbis_chado" +chado_db_user = "chado_admin" +chado_db_user_pw = "chado_admin" + +# -- Custom CSS styles we wish to include (e.g. stranded RNAseq data etc) +CSS_STRING = ".plus-cigarM {\nbackground-color: green; /* color for plus matches */\n}\n\n.minus-cigarM {\nbackground-color: blue; /* color for minus matches */\n}\n" + +############################################### +#### HERE BE DRAGONS! ######################### +####=Things you don't want to mess with######## +############################################### + +directory 'log' +directory 'scratch' +directory 'split_gff' + +# rake targets (files that need to be generated) +# Only way to create targets for PSQL is to +# write log files. +user_db_create_log = "log/user_db_create.log" +user_db_populate_log = "log/user_db_populate.log" +user_db_add_web_user_log = "log/user_db_add_web_user.log" +user_db_add_sequence_id_log = "log/user_db_add_sequence_id.log" + +extract_sequence_id = "scratch/#{SEQUENCE}.ids.txt" +## + +desc 'Create the user database for this annotation project' +namespace :userdb do + + file user_db_create_log => 'log' do |f| + system("psql -d template1 -U #{user_db_admin} -c \"DROP DATABASE IF EXISTS #{user_db}\"") + system("psql -L #{f} -d template1 -U #{user_db_admin} -c \"CREATE DATABASE #{user_db}\"") + end + + file user_db_populate_log => user_db_create_log do |f| + system("psql -L #{f} -d #{user_db} -U #{user_db_admin} -f #{WEB_APOLLO_INSTALL_DIR}/tools/user/user_database_postgresql.sql") + end + + file user_db_add_web_user_log => user_db_populate_log do |f| + system("perl #{WEB_APOLLO_INSTALL_DIR}/tools/user/add_user.pl -D #{user_db} -U #{user_db_admin} -P #{user_db_admin_pw} -u #{web_apollo_admin} -p #{web_apollo_admin_pw} 2> #{f}") + end + + file extract_sequence_id => [ 'scratch', user_db_add_web_user_log] do |f| + system("perl #{WEB_APOLLO_INSTALL_DIR}/tools/user/extract_seqids_from_fasta.pl -p Annotations- -i #{SEQUENCE} -o #{f}") + end + + file user_db_add_sequence_id_log => extract_sequence_id do |f| + system("perl #{WEB_APOLLO_INSTALL_DIR}/tools/user/add_tracks.pl -D #{user_db} -U #{user_db_admin} -P #{user_db_admin_pw} -t #{extract_sequence_id} 2> #{f}") + system("perl #{WEB_APOLLO_INSTALL_DIR}/tools/user/set_track_permissions.pl -D #{user_db} -U #{user_db_admin} -P #{user_db_admin_pw} -u #{web_apollo_admin} -t #{extract_sequence_id} -a") + end + + task :create => user_db_add_sequence_id_log + +end + +desc 'Create the Tomcat folder' + +namespace :servlet do + + + task :make_folder => "userdb:create" do + system("mkdir -p #{WEB_APOLLO_DIR}") + FileUtils.cd("#{WEB_APOLLO_DIR}") { + system("jar -xvf #{WEB_APOLLO_INSTALL_DIR}/war/WebApollo.war 2> /dev/null ") + } + end + + task :create => :make_folder + +end + +## +config_copy = "scratch/config.xml" +hibernate_copy = "scratch/hibernate.xml" +## + +namespace :config do + + file config_copy => 'servlet:create' do |f| + parsed_content = [] + fil = File.open("#{WEB_APOLLO_DIR}/config/config.xml","r") + while (line = fil.gets) + + if line.include?("ENTER_DATASTORE_DIRECTORY_HERE") + line.gsub!(/ENTER_DATASTORE_DIRECTORY_HERE/, "#{STORAGE_DIR}/#{GENOME}/") + elsif line.include?("ENTER_USER_DATABASE_JDBC_URL") + line.gsub!(/ENTER_USER_DATABASE_JDBC_URL/, "jdbc:postgresql://localhost/web_apollo_users_#{GENOME}") + elsif line.include?("ENTER_USER_DATABASE_USERNAME") + line.gsub!(/ENTER_USER_DATABASE_USERNAME/, "#{user_db_admin}") + elsif line.include?("ENTER_USER_DATABASE_PASSWORD") + line.gsub!(/ENTER_USER_DATABASE_PASSWORD/, "#{user_db_admin_pw}") + elsif line.include?("ENTER_PATH_TO_REFSEQS_JSON_FILE") + line.gsub!(/ENTER_PATH_TO_REFSEQS_JSON_FILE/, "#{WEB_APOLLO_DIR}/jbrowse/data/seq/refSeqs.json") + elsif line.include?("ENTER_ORGANISM") + line.gsub!(/ENTER_ORGANISM/ , "#{GENOME.capitalize.split('_').join(' ')}") + elsif line.include?("ENTER_CVTERM_FOR_SEQUENCE") + line.gsub!(/ENTER_CVTERM_FOR_SEQUENCE/, 'sequence:contig') + end + + parsed_content << line + end + fil.close + + parsed_content.compact! + + o = File.new("#{f}", "w+") + o.puts parsed_content.join("\n") + o.close + + system("cp #{f} #{WEB_APOLLO_DIR}/config/config.xml") + + end + + file hibernate_copy => config_copy do |f| + + parsed_content = [] + fil = File.open("#{WEB_APOLLO_DIR}/config/hibernate.xml","r") + while (line = fil.gets) + if line.include?("ENTER_DATABASE_CONNECTION_URL") + line.gsub!(/ENTER_DATABASE_CONNECTION_URL/, "jdbc:postgresql://localhost/#{chado_db}") + elsif line.include?("ENTER_USERNAME") + line.gsub!(/ENTER_USERNAME/, "#{chado_db_user}") + elsif line.include?("ENTER_PASSWORD") + line.gsub!(/ENTER_PASSWORD/, "#{chado_db_user_pw}") + end + + parsed_content << line + + end + fil.close + parsed_content.compact! + + o = File.new("#{f}", "w+") + o.puts parsed_content.join("\n") + o.close + + system("cp #{f} #{WEB_APOLLO_DIR}/config/hibernate.xml") + end + + task :parse => hibernate_copy + +end + +namespace :blat do + + blat_db = "#{STORAGE_DIR}/#{GENOME}/blat.2bit" + blat_config = "#{WEB_APOLLO_DIR}/config/blat_config.xml" + blat_config_copy = "#{WORK_DIR}/scratch/blat_config.xml" + + file blat_db => 'config:parse' do + system("mkdir -p #{STORAGE_DIR}/#{GENOME}") + FileUtils.cd("#{STORAGE_DIR}/#{GENOME}") { + system("#{BLAT_DIR}/faToTwoBit #{WORK_DIR}/#{SEQUENCE} #{blat_db}") + } + end + + file blat_config_copy => blat_db do |f| + + lines = IO.readlines(blat_config) + parsed_lines = [] + + file = File.new("#{WORK_DIR}/scratch/blat_config.xml","w+") + + + lines.each do |line| + if line.include?("ENTER_PATH_TO_BLAT_BINARY") + line.gsub!(/ENTER_PATH_TO_BLAT_BINARY/, "#{BLAT_DIR}/blat") + elsif line.include?("ENTER_PATH_FOR_TEMPORARY_DATA") + line.gsub!(/ENTER_PATH_FOR_TEMPORARY_DATA/, '/tmp') + elsif line.include?("ENTER_PATH_TO_BLAT_DATABASE") + line.gsub!(/ENTER_PATH_TO_BLAT_DATABASE/, "#{blat_db}") + elsif line.include?("ENTER_ANY_BLAT_OPTIONS") + line.gsub!(/ENTER_ANY_BLAT_OPTIONS/, '-minScore=100 -minIdentity=60') + end + + parsed_lines << line.strip + + end + + file.puts parsed_lines.join("\n") + + file.close + + system("cp #{f} #{blat_config}") + + end + + task :create_config => blat_config_copy + +end + +namespace :data do + + + task :data_dir => 'blat:create_config' do + system("mkdir -p #{STORAGE_DIR}/#{GENOME}") + end + + task :dna_track_setup => :data_dir do + system("perl #{WEB_APOLLO_DIR}/jbrowse/bin/prepare-refseqs.pl --fasta #{WORK_DIR}/#{SEQUENCE}") + end + + task :link_data_dir => :dna_track_setup do + system("ln -sf #{STORAGE_DIR}/#{GENOME} #{WEB_APOLLO_DIR}/jbrowse/data") + end + + task :copy_json => :link_data_dir do + system("cp -R #{WORK_DIR}/data/* #{STORAGE_DIR}/#{GENOME}") + end + + task :add_plugin => :copy_json do + system("chmod +x #{WEB_APOLLO_DIR}/jbrowse/bin/*.pl") + system("#{WEB_APOLLO_DIR}/jbrowse/bin/add-webapollo-plugin.pl -i #{WEB_APOLLO_DIR}/jbrowse/data/trackList.json") + end + + task :split_gff_file => ['split_gff', :add_plugin ] do + system("perl #{WEB_APOLLO_INSTALL_DIR}/tools/data/split_gff_by_source.pl -i #{ANNOTATION} -d split_gff") + end + +end + +namespace :gff do + + + task :parse => "data:split_gff_file" do + + files = Dir["split_gff/*.gff"] + + warn files.inspect + + maker = files.find{|f| f.include?("maker") } || nil + protein = files.find{|f| f.include?("protein_coding") } || nil + + system("chmod +x #{WEB_APOLLO_DIR}/jbrowse/bin/flatfile-to-json.pl") + + FileUtils.cd("#{WEB_APOLLO_DIR}/jbrowse") { + + + if maker + files.delete_if{|f| f.include?("maker") } + system("bin/flatfile-to-json.pl --gff #{WORK_DIR}/split_gff/maker.gff --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{\"wholeCDS\": null, \"CDS\":\"brightgreen-80pct\", \"UTR\": \"darkgreen-60pct\", \"exon\":\"container-100pct\"}' --cssClass container-16px --type mRNA --trackLabel maker") + elsif protein + files.delete_if{|f| f.include?("protein_codin") } + system("bin/flatfile-to-json.pl --gff #{WORK_DIR}/split_gff/protein_coding.gff --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{\"wholeCDS\": null, \"CDS\":\"brightgreen-80pct\", \"UTR\": \"darkgreen-60pct\", \"exon\":\"container-100pct\"}' --cssClass container-16px --type mRNA --trackLabel EnsEMBLProtein") + + end + + files.each do |gff_file| + feature = gff_file.split("/")[-1].split(".")[0] + system("bin/flatfile-to-json.pl --gff #{WORK_DIR}/#{gff_file} --arrowheadClass webapollo-arrowhead --getSubfeatures --subfeatureClasses '{\"match_part\": \"darkblue-80pct\"}' --cssClass container-10px --trackLabel #{feature}") + end + } + end + + task :add_names => :parse do + + FileUtils.cd("#{WEB_APOLLO_DIR}/jbrowse") { + system("bin/generate-names.pl") + } + end + +end + +css_file = "#{WEB_APOLLO_DIR}/jbrowse/data/custom.css" +json_file = "#{WEB_APOLLO_DIR}/jbrowse/data/trackList.json" +json_file_copy = "#{STORAGE_DIR}/#{GENOME}/trackList.json.copy" + +desc 'Creates the custom CSS file' +namespace :css do + + file css_file => 'gff:parse' do |f| + file = File.new(css_file,"w+") + file.puts CSS_STRING + file.close + end + + file json_file_copy => css_file do + + file = File.open(json_file,"r") + lines = IO.readlines(json_file) + lines[0] = "\"css\" : \"data/custom.css\",\n" + lines.unshift("{\n") + file.close + + copy = File.new(json_file_copy,"w+") + copy.puts lines.join + copy.close + + system("cp #{json_file_copy} #{json_file}") + end + + task :generate_custom_css => json_file_copy +end + + + +task :default => 'css:generate_custom_css' + +task :clean do + + system("rm -fR #{WEB_APOLLO_DIR}") + system("rm -fR #{WORK_DIR}/data") + system("rm -R log") + system("rm -Rf #{STORAGE_DIR}/#{GENOME}") + system("rm -R split_gff") + system("rm -R scratch") + system("psql -d template1 -U #{user_db_admin} -c \"DROP DATABASE IF EXISTS #{user_db}\"") + +end + + + diff --git a/bin/gaas_snap_train.sh b/bin/gaas_snap_train.sh new file mode 100755 index 000000000..3e9c1ec27 --- /dev/null +++ b/bin/gaas_snap_train.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +NAME=$1 + +if [ -z "$NAME" ] +then + echo "Must provide a name!" +else + fathom -categorize 1000 genome.ann genome.dna + fathom -export 1000 -plus uni.ann uni.dna + forge export.ann export.dna + hmm-assembler.pl $NAME . > $NAME.hmm +fi diff --git a/bin/gaas_sr_AllResu_AllIntervalPlotMean.R b/bin/gaas_sr_AllResu_AllIntervalPlotMean.R new file mode 100644 index 000000000..843ff687d --- /dev/null +++ b/bin/gaas_sr_AllResu_AllIntervalPlotMean.R @@ -0,0 +1,30 @@ +#!/usr/bin/Rscript + +args <- commandArgs(TRUE) +cat("Il y a",length(args),"arguments:",paste(args,collapse=" - "),"\n"); + +if (length(args) != 3) { +cat("Il faut 3 arguments: Tab1, resuname","\n"); +quit +} + +SUP2a=as.matrix(read.table(args[1], sep="\t", he=T)) +SSO2a=as.matrix(read.table(args[2], sep="\t", he=T)) +objp<-sapply(1:nrow(SUP2a),function(x) SUP2a[x,4]>0) +obju<-sapply(1:nrow(SSO2a),function(x) SSO2a[x,4]>0) +SUP2=SUP2a[objp,] +SSO2=SSO2a[obju,] + +tiff(filename=args[3], width = 800, height = 800, units = "px", pointsize = 26, compression="lzw") +#pdf(args[3]) +plot(SUP2, xlim=c(0,1),ylim=c(0,1.2), col=2, xlab="Number of synonymous mutation", ylab="W") +#regi<-lm(SUP2[,2]~SUP2[,1]) +#abline(regi, col=2) +par(new=TRUE) +plot(SSO2, xlim=c(0,1),ylim=c(0,1.2), col=3, xlab="", ylab="") +#rego<-lm(SSO2[,2]~SSO2[,1]) +#abline(rego, col=3) +title("mean W value according to \nthe total number of mutation") +legend("topright", col=(2:3), lty=1, c(args[1], args[2])) +dev.off() + diff --git a/bin/gaas_sr_MadeGraphAndRegByInterval.R b/bin/gaas_sr_MadeGraphAndRegByInterval.R new file mode 100644 index 000000000..25ed1af2f --- /dev/null +++ b/bin/gaas_sr_MadeGraphAndRegByInterval.R @@ -0,0 +1,82 @@ +#!/usr/bin/Rscript + +args <- commandArgs(TRUE) +cat("Il y a",length(args),"arguments:",paste(args,collapse=" - "),"\n"); + +if (length(args) != 5) { +cat("Il faut 5 arguments: Tab1, tab2, interval, fin, resuname","\n"); +quit +} + +count=as.numeric(args[3]) +maxi=as.numeric(args[4]) +interval=count +pdf(args[5]) +while ( count < maxi) { +cat("while") +if (count == interval) { +SUP2a=as.matrix(read.table(args[1])) +SSO2a=as.matrix(read.table(args[2])) +SUP2=SUP2a[SUP2a[,2]>0 & SUP2a[,1]>=0 & SUP2a[,1]<=count , ] +SSO2=SSO2a[SSO2a[,2]>0 & SSO2a[,1]>=0 & SSO2a[,1]<=count , ] +print(SUP2) +if(nrow(SUP2) != 0){ + cat("if1\n") + plot(SUP2, xlim=c(0,500),ylim=c(0,1.2), col=2) + cat("if1\n") + regi<-lm(SUP2[,2]~SUP2[,1]) + abline(regi, col=2) + if(nrow(SSO2) != 0){ + cat("if2\n") + par(new=TRUE) + plot(SSO2, xlim=c(0,500),ylim=c(0,1.2), col=3) + rego<-lm(SSO2[,2]~SSO2[,1]) + abline(rego, col=3) + } + } +else { + cat("else1\n") + if(nrow(SSO2) != 0){ + cat("if3\n") + plot(SSO2, xlim=c(0,500),ylim=c(0,1.2), col=3) + rego<-lm(SSO2[,2]~SSO2[,1]) + abline(rego, col=3) + } +} +count<-count+interval +cat("EndFirst1\n") +} +else { +cat("Else") +counttmp<-count+interval +SUP2new=SUP2a[SUP2a[,2]>0 & SUP2a[,1]>=count & SUP2a[,1]<=counttmp , ] +SSO2new=SSO2a[SSO2a[,2]>0 & SSO2a[,1]>=count & SSO2a[,1]<=counttmp , ] +if(nrow(SUP2new) != 0){ +# par(new=TRUE) + plot(SUP2new, xlim=c(0,500),ylim=c(0,1.2), col=2) + regi<-lm(SUP2new[,2]~SUP2new[,1]) + abline(regi, col=2) + + if(nrow(SSO2new) != 0){ + par(new=TRUE) + plot(SSO2new, xlim=c(0,500),ylim=c(0,1.2), col=3) + rego<-lm(SSO2new[,2]~SSO2new[,1]) + abline(rego, col=3) + } + } + +else{ + if(nrow(SSO2new) != 0){ + plot(SSO2new, xlim=c(0,500),ylim=c(0,1.2), col=3) + regu<-lm(SSO2new[,2]~SSO2new[,1]) + abline(regu, col=3) + } + } + +count<-count+interval +} +} +title("mean W value according to the total number of mutation") +legend("topright", col=(2:3), lty=1, c(args[1], args[2])) +dev.off() + diff --git a/bin/gaas_sr_Mean2col_AllIntervalPlotMean.R b/bin/gaas_sr_Mean2col_AllIntervalPlotMean.R new file mode 100644 index 000000000..1fe25967d --- /dev/null +++ b/bin/gaas_sr_Mean2col_AllIntervalPlotMean.R @@ -0,0 +1,30 @@ +#!/usr/bin/Rscript + +args <- commandArgs(TRUE) +cat("Il y a",length(args),"arguments:",paste(args,collapse=" - "),"\n"); + +if (length(args) != 3) { +cat("Il faut 3 arguments: Tab1, tab2, resuname","\n"); +quit +} + +SUP2a=as.matrix(read.table(args[1], sep="\t", he=T)) +SSO2a=as.matrix(read.table(args[2], sep="\t", he=T)) +objp<-sapply(1:nrow(SUP2a),function(x) SUP2a[x,2]>0) +obju<-sapply(1:nrow(SSO2a),function(x) SSO2a[x,2]>0) +SUP2=SUP2a[objp,] +SSO2=SSO2a[obju,] + +tiff(filename=args[3], width = 800, height = 800, units = "px", pointsize = 26, compression="lzw") +#pdf(args[3]) +plot(SUP2, xlim=c(0,1),ylim=c(0,1.2), col=2, xlab="Number of synonymous mutation", ylab="W") +#regi<-lm(SUP2[,2]~SUP2[,1]) +#abline(regi, col=2) +par(new=TRUE) +plot(SSO2, xlim=c(0,1),ylim=c(0,1.2), col=3, xlab="", ylab="") +#rego<-lm(SSO2[,2]~SSO2[,1]) +#abline(rego, col=3) +title("mean W value according to \nthe total number of mutation") +legend("topright", col=(2:3), lty=1, c(args[1], args[2])) +dev.off() + diff --git a/bin/gaas_sr_Mean2col_PlotPoints.R b/bin/gaas_sr_Mean2col_PlotPoints.R new file mode 100644 index 000000000..5868a5c87 --- /dev/null +++ b/bin/gaas_sr_Mean2col_PlotPoints.R @@ -0,0 +1,29 @@ +#!/usr/bin/Rscript + +args <- commandArgs(TRUE) +cat("Il y a",length(args),"arguments:",paste(args,collapse=" - "),"\n"); + +if (length(args) != 3) { +cat("Il faut 3 arguments: Tab1, tab2, resuname","\n"); +quit +} + +SUP2a=read.table(args[1], sep=" ") +SSO2a=read.table(args[2], sep=" ") +objp<-sapply(1:nrow(SUP2a),function(x) SUP2a[x,2]>0) +obju<-sapply(1:nrow(SSO2a),function(x) SSO2a[x,2]>0) +SUP2=SUP2a[objp,] +SSO2=SSO2a[obju,] + +tiff(filename=args[3], width = 800, height = 800, units = "px", pointsize = 26, compression="lzw") +par(lwd=1) +plot(SUP2[,2]~SUP2[,1], xlim=c(0,1),ylim=c(0,1), col=2, pch = 0, xlab="synonymous mutation rate", ylab="ω") +par(new=TRUE, lwd=1, pch = 2) +plot(SSO2[,2]~SSO2[,1], xlim=c(0,1),ylim=c(0,1), col=3, pch = 2, xlab="", ylab="") +abline(v = 170) + + +#title("mean W value according to \nthe total number of mutation") +legend("topright", col=(2:3), pch=(0:1), c("UPint", "SOall")) +dev.off() + diff --git a/bin/gaas_sr_Mean2col_PlotPointsAndRegs.R b/bin/gaas_sr_Mean2col_PlotPointsAndRegs.R new file mode 100644 index 000000000..7ec14f857 --- /dev/null +++ b/bin/gaas_sr_Mean2col_PlotPointsAndRegs.R @@ -0,0 +1,33 @@ +#!/usr/bin/Rscript + +args <- commandArgs(TRUE) +cat("Il y a",length(args),"arguments:",paste(args,collapse=" - "),"\n"); + +if (length(args) != 3) { +cat("Il faut 3 arguments: Tab1, tab2, resuname","\n"); +quit +} + +SUP2a=read.table(args[1], sep=" ") +SSO2a=read.table(args[2], sep=" ") +objp<-sapply(1:nrow(SUP2a),function(x) SUP2a[x,2]>0) +obju<-sapply(1:nrow(SSO2a),function(x) SSO2a[x,2]>0) +SUP2=SUP2a[objp,] +SSO2=SSO2a[obju,] + +tiff(filename=args[3], width = 800, height = 800, units = "px", pointsize = 26, compression="lzw") +par(lwd=1) +plot(SUP2[,2]~SUP2[,1], xlim=c(0,1),ylim=c(0,1), col=2, pch = 0, xlab="synonymous mutation rate", ylab="ω") +regi<-lm(SUP2[,2]~SUP2[,1]) +abline(regi, col=2) +par(new=TRUE, lwd=1, pch = 2) +plot(SSO2[,2]~SSO2[,1], xlim=c(0,1),ylim=c(0,1), col=3, pch = 2, xlab="", ylab="") +rego<-lm(SSO2[,2]~SSO2[,1]) +abline(rego, col=3) +abline(v = 170) + + +#title("mean W value according to \nthe total number of mutation") +legend("topright", col=(2:3), pch=(0:1), c("UPint", "SOall")) +dev.off() + diff --git a/bin/gaas_sync_dat_and_embl.pl b/bin/gaas_sync_dat_and_embl.pl new file mode 100755 index 000000000..9471bcf46 --- /dev/null +++ b/bin/gaas_sync_dat_and_embl.pl @@ -0,0 +1,261 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use IO::File; +use File::Basename; +use Getopt::Long; +use Pod::Usage; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my @copyARGV=@ARGV; + +my $opt_output = undef; +my $datfile = undef; +my $emblfile = undef; +my $bac = undef; #believe in AC +my $help= undef; + +if ( !GetOptions("dat=s" => \$datfile, + "embl=s" => \$emblfile, + "o|out=s" => \$opt_output, + "bac!" => \$bac, + "h|help" => \$help) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} + +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined( $datfile) or ! defined( $emblfile)) { + pod2usage( { + -message => "$header\nMust specify at least 2 files. dat and embl\n", + -verbose => 0, + -exitval => 1 } ); +} + +##### Stream in 1 +my $fh1; +if ($datfile) { + open($fh1, '<', $datfile) or die "Could not open file '$datfile' $!"; +} + +##### Stream in 2 +my $fh2; +if ($emblfile) { + open($fh2, '<', $emblfile) or die "Could not open file '$emblfile' $!"; +} + + +##### Stream out +my $emblout; +if ($opt_output) { + open($emblout, '>', $opt_output) or die "Could not open file '$opt_output' $!"; +} + +my %sizes; +my %headers; +my $ID = undef; +my $sourceSeen=undef; +my $ACline=0; +while( my $line = <$fh1>) { + + if( $line =~ m/^ID/){ + # + my @list = split(/\s/,$line); + $ID = $list[11]; + #$line=$list[0]." ".$list[1]." ".$list[2]." ".$list[3]." ".$list[4]." ".$list[5]." ".$list[6]." ".$list[7]." ".$list[8]." STD; ".$list[10]." ".$list[11]." ".$list[12]."\n"; + #print $line."\n"; + $sizes{$ID}++; + #print $ID."\n"; + $sourceSeen=undef; + } + + ################## Look for signal to stop save the information ############################# + # #we keep all until source and the few next lines related to source. (Stop at another FT than source or XX if no other source available.) + # + if($bac){ + if( $line =~ m/^AC/){ + $ACline++; + if($ACline % 2 == 0){ + my @list = split(/\s/,$line); + my $newID= $list[2]; + chomp($newID); + $sizes{$newID} = delete $sizes{$ID}; + $headers{$newID} = delete $headers{$ID}; + $ID=$newID; + #print "$ID\n"; + } + } + } + if( $line =~ m/^FT source/){ + $sourceSeen=1; + } + + if( $line =~ m/^FT [^source|^\s]/){ + $ID=undef; + } + + if( $sourceSeen){ + if( $line =~ m/^XX/){ + $ID=undef; + } + } + ############################################### + + + if($ID){ + $headers{$ID}.=$line; + } +} + +my $uniq= 1; +foreach my $key (keys %sizes){ + my $nb = $sizes{$key}; + if ($nb != 1){ + print "/!\ This size is not uniq:\n"; + print $key." ".$nb."\n"; + $uniq = undef; + } +} +if ($uniq){ + my $nbID = keys %sizes; + print "Fine, all contig size are uniq we can use them to map the two files information.\nThere is $nbID keys in the dat file.\n"; +} + +#everything needed from dat file is saved in headers now +#foreach my $key (keys %headers){ +# print $headers{$key}; +#} + +# print $fh2 but part (ID line until source and few next lines) are replaced by those saved from the dat file. (We do that if an comon identifier is found: here the size in bp fron the ID line is the ioentifier) +my $printNext=1; +my $nbIDfound=0; +my $nbIDTotal=0; +$ACline=0; +my $saved_line=undef; +while( my $line = <$fh2>) { + my $skip_line=undef; + + if($saved_line){ + $saved_line.=$line; + } + + if( $line =~ m/^ID/){ + if($bac){$printNext=undef;}; + $saved_line.=$line; + + # + my @list = split(/\s/,$line); + $ID = $list[10]; + #print "ID= $ID\n"; + $sourceSeen=undef; + + if ( exists($headers{$ID}) and ! $bac){ + $nbIDfound++; + print $emblout $headers{$ID}; + $printNext=undef; + } + } + + if($bac){ + if( $line =~ m/^AC/){ + $ACline++; + if($ACline % 2 == 0){ + $nbIDTotal++; + my @list = split(/\s/,$line); + $ID=$list[2]; + chomp($ID); + if ( exists($headers{$ID}) ){ + $nbIDfound++; + print "$ID\n"; + print $emblout $headers{$ID}; + $saved_line=""; + } + else{ + print $emblout $saved_line; + $printNext=1; + $saved_line=""; + $skip_line=1; + print $line; + } + } + } + } + + if($printNext and !$skip_line){ + print $emblout $line; + } + else{ + if( $line =~ m/^FT source/){ + $sourceSeen=1; + } + + if( $line =~ m/^FT [^source|^\s]/){ #we keep all wat is related tosource as well and then we stop. + $printNext=1; + } + + if( $sourceSeen){ + if( $line =~ m/^XX/){ + $printNext=1; + } + } + + if($printNext and !$skip_line){ + print $emblout $line; + } + } + +} + +print "On $nbIDTotal headers there are $nbIDfound that has been modified properly.\n"; +__END__ + + +=head1 NAME + +sync_dat_and_embl.pl - This script allow to update the record "headers" of an EMBL file by those from a dat file provided by ENA. +It is useful when an assembly/annotation has been submitted using AGP file while the annotation has been done directly on the chromosomes. +(Passing by an AGP file happens only if the chromosome and unplaced (not related at all to any chromosome) contigs are part of the same assembly). + + +=head1 SYNOPSIS + + ./sync_dat_and_embl.pl --dat=infile --embl=infile2 -o=outFile + ./sync_dat_and_embl.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--dat> + +Input dat file provided by ENA + +=item B<--embl> + +Input embl file + + +=item B<--bac> + +Bolean. Believe in AC line. Instead of looking at the sequence size, look at the AC line (the second one of each record) as common information for the two files. + +=item B<--out>, B<--output> or B<-o> + +The output will be the EMBL file with the record "headers" modified + +=item B<--help> or B<-h> + +Display this helpful text. + +=back + +=cut diff --git a/bin/gaas_sync_user_db.rb b/bin/gaas_sync_user_db.rb new file mode 100755 index 000000000..aeb18ef3b --- /dev/null +++ b/bin/gaas_sync_user_db.rb @@ -0,0 +1,64 @@ +#!/usr/bin/ruby +# == NAME +# sync_user_db.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# Parses a WebApollo SQL file and updates the local user database +# +# == OPTIONS +# -h,--help:: Show help +# -i,--infile=INFILE:: input file +# -o,--outfile=OUTFILE:: output file + +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-i","--infile", "=INFILE","Input") {|argument| options.infile = argument } +opts.on("-s","--species", "=SPECIES","Species to use") {|argument| options.species = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +abort "Must provide species name!" unless options.species +abort "Must provide an input file!" unless options.infile + +home = ENV['HOME'] + +WEB_APOLLO_SCRIPT="/big/webapollo/build/#{options.species}/tools/user/add_user.pl" +# "#{home}/webapollo/build/template/tools/user/add_user.pl" + +options.infile ? input_stream = File.open(options.infile) : input_stream = $stdin + +while (line = input_stream.gets) + + next unless line.match(/^[0-9]/) + + user_id,name,pass = line.strip.split("\t") + + next if user_id == "1" + + warn "Adding user #{name} with password #{pass} to database web_apollo_users_#{options.species}" + + system("perl #{WEB_APOLLO_SCRIPT} -D web_apollo_users_#{options.species} -U web_apollo_users_admin -P web_apollo_users_admin -u #{name} -p #{pass}") + +end + + + diff --git a/bin/gaas_synplot.R b/bin/gaas_synplot.R new file mode 100755 index 000000000..f4b336552 --- /dev/null +++ b/bin/gaas_synplot.R @@ -0,0 +1,128 @@ +#!/usr/bin/env Rscript +## synplot.R by Brandon Seah (kbseah@mpi-bremen.de) +## Use with synplot.pl + +# Rscript synplot.R --args tab0file tab1file tab2file tab3file outname cdstype colmax + +## Get arguments from command line +args <- commandArgs(trailingOnly=TRUE) +tab0file <- args[2] +tab1file <- args[3] +tab2file <- args[4] +tab3file <- args[5] +outname <- args[6] +cdstype <- args[7] +colmax <- args[8] + +# Read input tables + +tab0 <- read.table(file=tab0file,header=T) +tab1 <- read.table(file=tab1file,header=T) +tab2 <- read.table(file=tab2file,header=T) +tab3 <- read.table(file=tab3file,header=T) + +pdf(file=outname,width=10,height=7) + +par(mar=c(5,12,4,2)+0.1) +plot("0", + type="n", + xlim=c(0,max(tab1$stop)), + ylim=c(0,max(tab1$y)), + ylab="", + xlab="Position (bp)", + yaxt="n" + ) +axis(side=2, + at=tab0$y, + labels=tab0$label, + las=2) + +# Define color palette for ID values +colfunc <- colorRampPalette(c("white",colmax)) + +# Plot polygons corresponding to pairs of best Blast hits +for (i in 1:length(tab0$genome)) { + tab3.subset <- subset(tab3, + genome1 == as.character(tab0$genome[i]) + ) + xses <- tab3.subset[,5:9] + yses <- tab3.subset[,10:14] + for (j in 1:dim(xses)[1]) { + #polygon (xses[j,], yses[j,],col="pink",lty=0) + polygon(xses[j,], + yses[j,], + col=colfunc(100)[round(tab3$pid[j])], + lty=0) + } +} + +# Plot CDS regions +if (cdstype=="color") { + # Plot CDS regions on top so that the colors will show + for (i in 1:length(tab0$genome)) { + tab2.subset <- subset(tab2, + genome==as.character(tab0$genome[i]) + ) + segments (tab2.subset$cumulstart, + rep(tab0$y[i], + dim(tab2.subset)[1] + ), + tab2.subset$cumulstop, + rep(tab0$y[i], + dim(tab2.subset)[1] + ), + lwd=5, + lend="butt", # Prevent round caps + col=as.character(tab2.subset$color) + ) + } +} else if (cdstype=="arrow") { + # Plot CDS regions with arrows instead of line segments + for (i in 1:length(tab0$genome)) { + tab2.subset <- subset(tab2, + genome==as.character(tab0$genome[i]) + ) + arrows (tab2.subset$cumulstart, + rep(tab0$y[i], + dim(tab2.subset)[1] + ), + tab2.subset$cumulstop, + rep(tab0$y[i], + dim(tab2.subset)[1] + ), + lwd=5, + lend="butt", + col=as.character(levels(tab2.subset$color)[1]), + length=0.04 + ) + } +} + +# Draw tick marks indicating contig boundaries +for (i in 1:length(tab1$contig)) { + segments(x0=tab1$start[i], + x1=tab1$start[i], + y0=tab1$y[i]-0.2, + y1=tab1$y[i]+0.2, + lend="butt" + ) + segments(x0=tab1$stop[i], + x1=tab1$stop[i], + y0=tab1$y[i]-0.2, + y1=tab1$y[i]+0.2, + lend="butt" + ) +} + +#text(x=rep(0,length(tab0$genome)),y=tab0$y,pos=2,labels=tab0$genome) +legend(x="topright", + fill=colfunc(100)[seq(10,100,10)], + legend=seq(10,100,10), + title="Percent ID", + y.intersp=0.65, + cex=0.9, + border="white", + bty="n" + ) + +dev.off() diff --git a/bin/gaas_synplot.pl b/bin/gaas_synplot.pl new file mode 100755 index 000000000..89c41ec66 --- /dev/null +++ b/bin/gaas_synplot.pl @@ -0,0 +1,418 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Bio::DB::Fasta; +use FindBin qw($Bin $Script); +use Pod::Usage; + +my $input_fasta; # List of Fasta files containing inputs +my $input_gff; # List of GFF files containing gene predictions +my $gencode=4; # Genetic code +my $outfix="test"; # Output prefix +my $cdstype="color"; # How to indicate CDS direction +my $colmax="red"; # Color for maximum %ID in color scale +my $colcds1="darkblue"; # Color for CDS +my $colcds2="darkgreen"; +my @valid_cds = qw(color arrow); # List of valid CDS drawing types +my %contig_length_hash; # Hash of hash of contig lengths +my %total_length_hash; # Hash of total Fasta file lengths +my %x1_hash; # Don't ask +my %x0_hash; +my $plotonly; # Flag - do not run Blastp, only call synplot.R to draw plot +my $bidir; # Flag - Bidrectional best hits only +my $help; +my $man; +my $verbose = undef; + +if( !GetOptions( 'fasta|f=s' => \$input_fasta, # Input should be list of Fasta files, comma-separated + 'gff|g=s' => \$input_gff, # Input should be list of GFF files, comma-separated + 'plotonly|p'=>\$plotonly, # + 'out|o=s' =>\$outfix, # Output prefix + 'gencode=i' =>\$gencode, # Genetic code + 'cds|c=s' =>\$cdstype, # How to indicate CDS direction + 'color_id=s' =>\$colmax, # Color for maximum %id in synteny plot + 'color_cds_f=s' =>\$colcds1, + 'color_cds_r=s' =>\$colcds2, + 'bidir' => \$bidir, # Bidirectional best hits only + 'verbose|v!' => \$verbose, + 'help|h!' => \$help, + 'man|m!' => \$man ) ) +{ + pod2usage( { -message => "Failed to parse command line.", + -verbose => 1, + -exitval => 2 } ); +} +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0 } ); +} + +# Print Help and exit +if ($man) { + pod2usage( { -verbose => 2, + -exitval => 0 } ); +} + +my @tools = ("blastp"); +foreach my $exe (@tools) { + check_bin($exe) == 1 or die "Missing executable $exe in PATH"; +} + +## MAIN ####################################################################### + +my @input_fasta_list = split /,/, $input_fasta; +my @input_gff_list = split /,/, $input_gff; + +# Names for output files +my $output_tab_0 = $outfix.".output_0.tab"; +my $output_tab_1 = $outfix.".output_1.tab"; # Table to store summary information for plotting +my $output_tab_2 = $outfix.".output_2.tab"; # Table of feature information for plotting +my $output_tab_3 = $outfix.".output_3.tab"; # Table of polygons for plotting + +if (!$plotonly) { + # Headers for output tables + open(OUTPUT0, ">", "$output_tab_0") or die ("$!\n"); + print OUTPUT0 join ("\t", qw(genome y label)). "\n"; + close(OUTPUT0); + open(OUTPUT1, ">", "$output_tab_1") or die ("$!\n"); + print OUTPUT1 join("\t", qw(genome contig length start stop y))."\n"; + close(OUTPUT1); + open(OUTPUT2, ">", "$output_tab_2") or die ("$!\n"); + print OUTPUT2 join ("\t", qw(genome gene start stop cumulstart cumulstop color))."\n"; + close(OUTPUT2); + open(OUTPUT3, ">", "$output_tab_3") or die ("$!\n"); + print OUTPUT3 join ("\t", qw (genome1 gene1 genome2 gene2 g1x0 g1x1 g2x1 g2x0 g1x0 g1y0 g1y1 g2y1 g2y0 g1y0 pid))."\n"; + close(OUTPUT3); + + # Run functions + parse_fasta_gff(); + if ($bidir) { + run_blast_pairs_bidir(); + } else { + run_blast_pairs(); + } +} + +print "Call Rscript\n" if ($verbose); +# Call R script to generate plot +system ("Rscript $Bin/synplot.R --args $output_tab_0 $output_tab_1 $output_tab_2 $output_tab_3 $outfix.synteny.pdf $cdstype $colmax"); +print "Result written to $outfix.synteny.pdf\nBye Bye\n"; +## SUBROUTINES ################################################################ + +sub parse_fasta_gff { + # Some ideas and code from here: https://www.biostars.org/p/46281/ + ## Record contig lengths for each Fasta file + my %contig_zero_position; + my %running_total; + + for my $i (0 .. scalar(@input_fasta_list)-1) { + my $the_fasta = $input_fasta_list[$i]; + my ($label,$path,$ext) = fileparse($the_fasta,qr/\.[^.]*/); + open(OUTPUT0, ">>", "$output_tab_0") or die ("$!\n");; + print OUTPUT0 $the_fasta."\t".$i."\t".$label."\n"; + close(OUTPUT0); + $running_total{$the_fasta} = 0; + ## Predict ORFs with Prodigal if not already supplied in GFF file + # system ("prodigal -m -c -g 4 -a $the_fasta.prodigal.pep -q -p single -f gff -o $the_fasta.prodigal.gff"); + my $the_fasta_object = Bio::SeqIO->new(-file => $the_fasta); + my $seq_object; + open(OUTPUT1, ">>", "$output_tab_1") or die ("$!\n"); + while ($seq_object = $the_fasta_object->next_seq) { + $contig_length_hash{$the_fasta}{$seq_object->display_id} = $seq_object->length; + $total_length_hash{$the_fasta} += $seq_object->length; + $contig_zero_position{$the_fasta}{$seq_object->display_id} = $running_total{$the_fasta}; + $running_total{$the_fasta} += $seq_object->length; # update running total + #Tabulate contig lengths and positions along concatenated plot of genome + print OUTPUT1 join ("\t", + $the_fasta, + $seq_object->display_id, + $seq_object->length, + $contig_zero_position{$the_fasta}{$seq_object->display_id}, + $running_total{$the_fasta}, + $i + )."\n"; + } + print $the_fasta."\t".$total_length_hash{$the_fasta}."\n" if ($verbose); + close (OUTPUT1); + ## Index Fasta files and parse corresponding GFF files + # Load fasta sequences to memory + my $db = Bio::DB::Fasta->new($the_fasta); + #my %CDS; + # Output file for translated CDS sequences + my $outfile_pep = Bio::SeqIO->new(-format=>'fasta', -file=> ">$the_fasta.pep"); + # Open GFF file + open(GFF, "<", $input_gff_list[$i]) or die ("$!\n"); + open (OUTPUT2, ">>", "$output_tab_2") or die ("$!\n"); + while () { + # For each feature + chomp; + if (!/^\#/) { + # If not a comment line + # Split GFF line into fields + my @array = split("\t",$_); + # Split notes field into elements + my @attrs = split(";",$array[8]); + $attrs[0] =~ s/ID=//; + # Gene name parsed from ID field + my $gene_name = $attrs[0]; + my $start; + my $stop; + # What type of feature + my $type = $array[2]; + my $current_contig = $array[0]; + # Extract gene sequence and ID from loaded Fasta file using GFF feature table + my $gene_seq = $db->seq($array[0], + $array[3], + $array[4] + ); + #print $db->seq($array[0],$array[3],$array[4])."\n"; + # Output sequence object + my $output_gene = Bio::Seq->new( + -seq => $gene_seq, + -id => $gene_name, + -display_id => $gene_name, + -alphabet => 'dna', + ); + if ($array[6] eq '+') { + $start = $array[3]; + $stop = $array[4]; + } elsif ($array[6] eq '-') { + # Reverse complement if feature is '-' + $output_gene=$output_gene->revcom(); + $start = $array[4]; + $stop=$array[3]; + } + if ($type eq "CDS") { + # If CDS, write translation to file + # Translation table 4 (protozoan mitochondrial) + my $output_pep = $output_gene->translate(-codontable_id=>$gencode); + $outfile_pep->write_seq($output_pep); + } + my $zerostart = $start + $contig_zero_position{$the_fasta}{$current_contig}; + my $zerostop = $stop + $contig_zero_position{$the_fasta}{$current_contig}; + # Define color for CDS depending on transcription direction + my $cds_color; + if ($zerostart < $zerostop) { + $cds_color=$colcds1; + } + elsif ($zerostart >= $zerostop) { + $cds_color=$colcds2; + } + print OUTPUT2 join ("\t", + $the_fasta, + $gene_name, + $start, + $stop, + $zerostart, + $zerostop, + $cds_color) . "\n"; + $x0_hash{$the_fasta}{$gene_name} = $zerostart; + $x1_hash{$the_fasta}{$gene_name} = $zerostop; + } + } + close (OUTPUT2); + close(GFF); + } +} + +sub run_blast_pairs { + + print "run run_blast_pairs\n" if ($verbose); + for my $i (0 .. scalar(@input_fasta_list)-2) { + my $j = $i + 1; + my $blastfile1 = "$input_fasta_list[$i].pep"; + my $blastfile2 = "$input_fasta_list[$i+1].pep"; + if (-f "$outfix.blastout.$i.out6"){ + print "run_blast_pairs output already exists for $outfix.blastout.$i.out6, skipping this step\n"; + } + else{ + system ("blastp -subject $blastfile1 -query $blastfile2 -evalue 1e-3 -outfmt 6 -max_target_seqs 1 -out $outfix.blastout.$i.out6"); + } + open(OUTPUT3, ">>", "$output_tab_3") or die ("$!\n"); + open(HITS, "<", "$outfix.blastout.$i.out6") or die ("$!\n"); + while () { + # Convert Blast hit results (pairs of genes with best hits to each other) to polygons for drawing synteny diagrams + chomp; + print "$_\n"; + my @splitline = split("\t",$_); + my ($query,$subject,$pid) = ($splitline[0],$splitline[1],$splitline[2]); + print OUTPUT3 $input_fasta_list[$i]."\t".$query."\t".$input_fasta_list[$i+1]."\t".$subject."\t"; + print OUTPUT3 join("\t", + $x0_hash{$input_fasta_list[$i+1]}{$query}, + $x1_hash{$input_fasta_list[$i+1]}{$query}, + $x1_hash{$input_fasta_list[$i]}{$subject}, + $x0_hash{$input_fasta_list[$i]}{$subject}, + $x0_hash{$input_fasta_list[$i+1]}{$query}, + $j, + $j, + $i, + $i, + $j, + $pid + )."\n"; + } + close(HITS); + close(OUTPUT3); + } +} + +sub run_blast_pairs_bidir { + # Take bidirectional best hits only + for my $i (0 .. scalar(@input_fasta_list)-2) { + my $j = $i + 1; + my $blastfile1 = "$input_fasta_list[$i].pep"; + my $blastfile2 = "$input_fasta_list[$i+1].pep"; + my %hit_ji; + system ("blastp -subject $blastfile1 -query $blastfile2 -evalue 1e-3 -outfmt 6 -max_target_seqs 1 -out $outfix.blastout.$i.out6"); + system ("blastp -subject $blastfile2 -query $blastfile1 -evalue 1e-3 -outfmt 6 -max_target_seqs 1 -out $outfix.blastout.$i.r.out6"); + open(HITJI, "<", "$outfix.blastout.$i.r.out6") or die ("$!\n"); + while () { + chomp; + my @splitline = split "\t"; + $hit_ji{$splitline[0]} = $splitline[1]; + } + close(HITJI); + open(OUTPUT3, ">>", "$output_tab_3") or die ("$!\n"); + open(HITIJ, "<", "$outfix.blastout.$i.out6") or die ("$!\n"); + while () { + chomp; + my @splitline = split "\t"; + if (defined $hit_ji{$splitline[1]} && $hit_ji{$splitline[1]} eq $splitline[0]) { + # Only print hits which are bidirectional best hits + my ($query, $subject, $pid) = ($splitline[0], $splitline[1], $splitline[2]); + print OUTPUT3 $input_fasta_list[$i]."\t".$query."\t".$input_fasta_list[$i+1]."\t".$subject."\t"; + print OUTPUT3 join("\t", + $x0_hash{$input_fasta_list[$i+1]}{$query}, + $x1_hash{$input_fasta_list[$i+1]}{$query}, + $x1_hash{$input_fasta_list[$i]}{$subject}, + $x0_hash{$input_fasta_list[$i]}{$subject}, + $x0_hash{$input_fasta_list[$i+1]}{$query}, + $j, + $j, + $i, + $i, + $j, + $pid + )."\n"; + } + } + close(HITIJ); + close(OUTPUT3); + } +} + +sub check_bin +{ + length(`which @_`) > 0 ? return 1 : return 0; +} + + +__END__ + +=head1 NAME + +synplot.pl - Make synteny plots for small genomes + +=head1 SYNOPSIS + + perl synplot.pl -f ,, \ + -g ,, \ + -o + perl synplot.pl --help + +=head1 DESCRIPTION + +Make synteny plots for a set of contigs or small genomes. Given a set of Fasta +files, each representing a single genome, and GFF3 feature tables with CDS +features for those Fasta files, perform Blastp between each adjacent pair of +genomes, and make synteny plots. +Requires the accompanying R script synplot.R in the same folder as the perl +script, and command Rscript in path. + +=head1 OPTIONS + +=over 8 + +=item --fasta|-f ,, + +List of file names separated by commas; nucleotide fasta files containing +contigs to be compared, in order that they will appear on synteny plot. + +=item --gff|-g ,, + +List of file names separated by commas; GFF files containing predicted CDS to +be compared by Blastp against each other. Must be in same order as the list of +Fasta files given to -f parameter (above). + +=item --out|-o + +Prefix for output file names. (Default: test) + +=item --bidir + +Flag: Take bidirectional reciprocal best blast hits only. (Default: Off) + +=item --plotonly + +Flag: Draw plot only. Requires precomputed intermediate files with same file +name prefix as supplied to -o parameter above. + +=item --gencode + +Genetic code for a.a. sequence translation. (Default: 4) + +=item --help + +This help message. + +=item --cds|-c + +How to depict CDS direction. Allowed values: color, arrow. No quotation marks. +(Default: color) + +=item --color_id + +Color corresponding to max value in color scale, used to show percentage ID for +two CDSs connected by a stripe in the synteny plot. + +=item --color_cds_f + +Color for forward-directed CDSs (if "--cds color" specified), or color of arrow +(if "--cds arrow" specified). + +=item --color_cds_r + +Color for reverse-directed CDSs (if "--cds color" specified, otherwise ignored) + +=back + +=head1 OUTPUT + +All output files have output prefix as given to --out. +Synteny plot is in PDF format at .synteny.pdf. +Intermediate files: Blastp output in tabular format (outfmt 6), and tables +containing coordinates of features and connecting polygons for plotting (suffix +.tab). + +=head1 COPYRIGHT AND LICENSE + +Copyright 2016, Brandon Seah (kbseah@mpi-bremen.de) +LICENSE +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +=cut diff --git a/annotation/Tools/Grid/blastp2grid.pl b/bin/gaas_tblastn2grid.pl similarity index 53% rename from annotation/Tools/Grid/blastp2grid.pl rename to bin/gaas_tblastn2grid.pl index f3c8d9fcf..0d54d143f 100755 --- a/annotation/Tools/Grid/blastp2grid.pl +++ b/bin/gaas_tblastn2grid.pl @@ -7,39 +7,16 @@ use Scalar::Util qw(openhandle); use Time::Piece; use Time::Seconds; -use FindBin; -use lib ( "$FindBin::Bin/PerlLib", "$FindBin::Bin/PerlLibAdaptors" ); use File::Basename; use Bio::SeqIO; use Cwd; use Carp; no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; -my $usage = qq{ -perl my_script.pl - Getting help: - [--help] - - Input: - [--fasta filename] - The name of the protein file to read. - [--db name] - The name of the database use to blast. - [--chunk_size] - The number of sequence by job. If not provided, default size - will be 500. - [--nb_seq] - The number of proteins contained in the db. Useful to cheat on - the database size. (OrthoMCL aggregation as example). If not - provided, the current database size is used. - [--eval] - The evalue of the sequences kept in the result -Ouput: - [--outdir name] - The name of the output directory. - -}; - +my $header = get_gaas_header(); my $outdir = undef; my $db = undef; my $fasta = undef; @@ -50,20 +27,31 @@ # don't send 50.000 jobs to the farm... my @cmds = (); # Stores the commands to send to farm my $quiet; +my $queue=undef; +my $grid="Slurm"; my $help; -GetOptions( "help" => \$help, +if ( !GetOptions( "help" => \$help, "fasta=s" => \$fasta, "db=s" => \$db, "chunk_size=i" => \$chunk_size, "nb_seq=i" => \$nb_seq, "eval" => \$eval, - "outdir=s" => \$outdir ); + "quiet!" => \$quiet, + "grid=s" => \$grid, + "queue=s" => \$queue, + "outdir=s" => \$outdir, + "h|help!" => \$help ) ) +{ + pod2usage( { -message => 'Failed to parse command line', + -verbose => 1, + -exitval => 1 } ); +} # Print Help and exit if ($help) { - print $usage; - exit(0); + pod2usage( { -verbose => 1, + -exitval => 0 } ); } #check all parameters if ( !( defined($db) ) ) { @@ -82,6 +70,14 @@ -exitval => 2 } ); } +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + # .. Check that all binaries are available in $PATH my @tools = ("blastp"); @@ -102,22 +98,10 @@ # .. set up log file -my $logfile = "$outdir/blastp2grid.log"; +my $logfile = "$outdir/tblastn2grid.log"; msg("Writing log to: $logfile"); open LOG, '>', $logfile or err ("Can't open logfile"); -# .. load grid module (courtesy of Brian Haas) - -my $grid_computing_module = "BilsGridRunner"; -my $perl_lib_repo = "$FindBin::Bin/../PerlLibAdaptors"; -msg("-importing module: $grid_computing_module\n"); -require "$grid_computing_module.pm" or - die -"Error, could not import perl module at run-time: $grid_computing_module"; - -my $grid_computing_method = $grid_computing_module . "::run_on_grid" or - die "Failed to initialize GRID module\n"; - # .. Read protein fasta file. my $inseq = Bio::SeqIO->new( -file => "<$fasta", -format => 'fasta' ); @@ -151,7 +135,7 @@ if ( !defined($nb_seq) ) { for ( my $i = 1; $i <= $chunk_counter; $i++ ) { my $cmd = - "blastp -evalue $eval -num_alignments 100000 " . + "tblastn -evalue $eval -num_alignments 100000 " . "-seg yes -outfmt 6 -db $db -query $outdir/chunk_$i.fa " . "-out $outdir/chunk_$i.tab"; push( @cmds, $cmd ); @@ -160,16 +144,48 @@ else { for ( my $i = 1; $i <= $chunk_counter; $i++ ) { my $cmd = - "blastp -dbsize $nb_seq -evalue $eval " . + "tblastn -dbsize $nb_seq -evalue $eval " . "-num_alignments 100000 -seg yes -outfmt 6 -db $db " . "-query $outdir/chunk_$i.fa -out $outdir/chunk_$i.tab"; push( @cmds, $cmd ); } } + # Submit job chunks to grid -chomp(@cmds); # Remove empty indices -&$grid_computing_method(@cmds); +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} # Merging the outputs msg("Merging outputs from chunks"); @@ -177,10 +193,10 @@ my @files = <$outdir/*.tab>; foreach my $file (@files) { - system("cat $file >> $outdir/blastp.merged"); + system("cat $file >> $outdir/tblastn.merged"); } -msg("Finished BLASTp grid run."); +msg("Finished tBLASTn grid run."); # -------------------- @@ -226,3 +242,94 @@ sub err msg(@_); exit(2); } + +__END__ + +=head1 NAME + +gaas_tblastn2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple tblastn jobs in parallel using Grid + +=head1 SYNOPSIS + + gaas_tblastn2grid.pl --chunck 100 --db genome.fa --eval 1e-6 --outdir blastouput --fasta db.fasta + gaas_tblastn2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> + +The name of the protein file to read. + +=item B<--db> + +The name of the database use to blast. + +=item B<--chunk_size> + +The number of sequence by job. If not provided, default size will be 500. + +=item B<--nb_seq> + +The number of proteins contained in the db. Useful to cheat on +the database size. (OrthoMCL aggregation as example). If not +provided, the current database size is used. + +=item B<--eval> + +The evalue of the sequences kept in the result + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_transplant.rb b/bin/gaas_transplant.rb new file mode 100755 index 000000000..86ff87fec --- /dev/null +++ b/bin/gaas_transplant.rb @@ -0,0 +1,72 @@ +#!/usr/bin/ruby +# == NAME +# transplant.rb +# +# == USAGE +# ./this_script.rb [ -h | --help ] +# [ -i | --infile ] |[ -o | --outfile ] | +# == DESCRIPTION +# Transplants a WebApollo installation +# +# == OPTIONS +# -h,--help:: Show help +# -i,--infile=INFILE:: input file +# -o,--outfile=OUTFILE:: output file + +# +# == EXPERT OPTIONS +# +# == AUTHOR +# Marc Hoeppner, mphoeppner@gmail.com + +require 'optparse' +require 'ostruct' + +### Define modules and classes here + +### Get the script arguments and open relevant files +options = OpenStruct.new() +opts = OptionParser.new() +opts.on("-s","--species", "=SPECIES","Name of species to transplant") {|argument| options.species = argument } +opts.on("-h","--help","Display the usage information") { + puts opts + exit +} + +opts.parse! + +home = ENV['HOME'] + +SERVER = "nbis-web.imbim.uu.se" +USER = "root" +LOCAL_PROJECT = "/big/webapollo/projects/#{options.species}" +LOCAL_DATA = "/big/data/#{options.species}" +TRANSFER = "/big/transfer/#{options.species}" +REMOTE_PROJECT = "/databases/webapollo/#{options.species}" +REMOTE_DATA = "/databases/data/#{options.species}" + +# Create local folders +system("mkdir -p #{TRANSFER}") +system("mkdir -p #{LOCAL_PROJECT}") + +# Copy remote folders/files +# Genome sequence +system("scp #{USER}@#{SERVER}:#{REMOTE_PROJECT}/genome.fa #{LOCAL_PROJECT}/") +abort "No genome sequence has been transferred from remote host (expecting: genome.fa)" unless File.exists?("#{LOCAL_PROJECT}/genome.fa") + +# User db, if dumped +system("scp #{USER}@#{SERVER}:#{REMOTE_PROJECT}/userdb.sql #{LOCAL_PROJECT}/") + +# Data +system("scp -r #{USER}@#{SERVER}:#{REMOTE_DATA}/* #{TRANSFER}") + +# Deploy local installation +system("ruby #{home}/git/code/WebApollo/new_species.rb -s #{options.species} -f #{LOCAL_PROJECT}/genome.fa") + +# Copy the old data to new location +system("cp -R #{TRANSFER}/Annotations* #{LOCAL_DATA}/annotations/") +system("cp -R #{TRANSFER}/track* #{LOCAL_DATA}/data/") +system("cp -R #{TRANSFER}/blat* #{LOCAL_DATA}/annotations/") +system("cp -R #{TRANSFER}/bam #{LOCAL_DATA}/data/") + +system("chown -R #{ENV['USER']}:tomcat #{LOCAL_DATA}") diff --git a/bin/gaas_transposonPSI2grid.pl b/bin/gaas_transposonPSI2grid.pl new file mode 100755 index 000000000..c8a995859 --- /dev/null +++ b/bin/gaas_transposonPSI2grid.pl @@ -0,0 +1,306 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; +use Scalar::Util qw(openhandle); +use Time::Piece; +use POSIX; +use Time::Seconds; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +use Bio::SeqFeature::Generic; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $outdir = "transposonPSI_output"; +my $fastaFile; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $help; +my $grid="Slurm"; +my $queue=undef; +my $chunk=10; + +if ( !GetOptions( + "h|help!" => \$help, + "fasta|f=s" => \$fastaFile, + "grid=s" => \$grid, + "quiet|q!" => \$quiet, + "chunk=i" => \$chunk, + "queue=s" => \$queue, + "outdir|o=s" => \$outdir ) ) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header\n" } ); +} + +if ( ! defined($fastaFile) ){ + pod2usage( { + -message => "$header\nAt least 1 parameter is mandatory:\nInput fasta file\n\n", + -verbose => 0, + -exitval => 2 } ); +} + +if (! -e $fastaFile){ + print "The fasta file ".$fastaFile." does not exist.\n";exit; +} + +# set grid +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH +my @tools = ("transposonPSI.pl" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/transposonPSI.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fastaFile", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks\n"); + +my $seq; + +my $seq_counter = 0; +my $nbLine=`grep -c ">" $fastaFile`; +my $chunk_size= ceil($nbLine/$chunk); +my $chunk_counter=0; +my $seq_out; +my $chunk_file ; + +while( $seq = $inseq->next_seq() ) { + if ($chunk_counter == 0){ + $seq_counter += 1; + $chunk_file = $outdir."/".$fastaFile. "_chunck" . $seq_counter . ".fasta" ; # We could also use the display_id, but this can cause trouble with special characters + $seq_out = Bio::SeqIO->new(-file => ">$chunk_file" , -format => 'fasta'); + } + + $seq_out->write_seq($seq); + $chunk_counter++; + + if ($chunk_counter == $chunk_size){ + my $command = "transposonPSI.pl ".$chunk_file." prot > /dev/null" ; + push(@cmds,$command); + $chunk_counter = 0; + } +} + +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, merging of output and printing + +msg("Merging outputS"); + +# Deal with TPSI.allHits +my $outfile = $fastaFile.".all.TPSI.allHits"; +my @files_allHits = <*.TPSI.allHits>; +open (my $OUT, '>', $outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; + +foreach my $file (@files_allHits) { + + open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; + + while (<$IN>) { + my $line = $_; + next if ($line =~ /^\/\/.*$/); # Skipping comment lines + print $OUT $line; + } +} +close($OUT); +my $command = "mv *.TPSI.allHits $outdir"; +system ("/bin/bash -c '$command'"); + +# Deal with TPSI.topHits +$outfile = $fastaFile.".all.TPSI.topHits"; +my @files_topHits = <*.TPSI.topHits>; +open ($OUT, '>', $outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; + +foreach my $file (@files_topHits) { + + open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; + + while (<$IN>) { + my $line = $_; + next if ($line =~ /^\/\/.*$/); # Skipping comment lines + print $OUT $line; + } +} +close($OUT); +$command = "mv *.TPSI.topHits $outdir"; +system ("/bin/bash -c '$command'"); + +# -------------------- + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_transposonPSI2grid.pl - + +=head1 DESCRIPTION + +Chunk input data to run multiple transposonPSI jobs in parallel + +=head1 SYNOPSIS + + gaas_transposonPSI2grid.pl -f fasta_file -o outdir + gaas_transposonPSI2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the protein fasta file to read. + +=item B<--chunk> + +By default 10. We slice the fasta input file in many chunk to distribute more efficiently small tasks to each cpu. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_trnascan2grid.pl b/bin/gaas_trnascan2grid.pl new file mode 100755 index 000000000..8a69e5682 --- /dev/null +++ b/bin/gaas_trnascan2grid.pl @@ -0,0 +1,311 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Getopt::Long; +use Pod::Usage; +use Scalar::Util qw(openhandle); +use Time::Piece; +use Time::Seconds; +use FindBin; +use File::Basename; +use Bio::SeqIO; +use Cwd; +use Carp; +use Bio::SeqFeature::Generic; +use Bio::Tools::GFF; +no strict qw(subs refs); +use GAAS::Grid::Bsub; +use GAAS::Grid::Sbatch; +use GAAS::GAAS; + +my $header = get_gaas_header(); +my $gff_formatter = Bio::Tools::GFF->new(-gff_version => 3); +my $outdir = undef; +my $fasta = undef; +my @cmds = (); # Stores the commands to send to farm +my $quiet; +my $grid="Slurm"; +my @annotations = (); # Stores Rfama annotations as hashes +my $help; +my $queue=undef; + +if ( !GetOptions( + "h|help!" => \$help, + "fasta|f=s" => \$fasta, + "grid=s" => \$grid, + "queue=s" => \$queue, + "quiet|q!" => \$quiet, + "outdir|o=s" => \$outdir)) + +{ + pod2usage( { -message => "Failed to parse command line", + -verbose => 1, + -exitval => 1 } ); +} + +# Print Help and exit +if ($help) { + pod2usage( { -verbose => 99, + -exitval => 0, + -message => "$header \n" } ); +} + +if ( ! (defined($fasta) and defined($outdir) ) ){ + pod2usage( { + -message => "$header\nAt least 2 parameter are mandatory:\nInput fasta file and output directory \n\n", + -verbose => 0, + -exitval => 2 } ); +} + +# set grid option properly +my @grid_choice=('slurm','lsf','none'); +$grid=lc($grid); +if (! grep( /^$grid/, @grid_choice ) ) { + print "$grid is not a value accepted for grid parameter.";exit; +} +$grid= undef if lc($grid) eq 'none'; + +# .. Check that all binaries are available in $PATH + +my @tools = ( "tRNAscan-SE" ); # List of tools to check for! +foreach my $exe (@tools) { check_bin($exe) == 1 or die "Missing executable $exe in PATH"; } + +# .. Create output directory + +if (-d $outdir ) { + die "Output directory $outdir exists. Please remove and try again"; +} else { + msg("Creating output directory $outdir"); + runcmd("mkdir -p $outdir") +} + +# .. set up log file + +my $logfile = "$outdir/trna_search.log"; +msg("Writing log to: $logfile"); +open LOG, '>', $logfile or err("Can't open logfile"); + +# .. Read genome fasta file. +my $inseq = Bio::SeqIO->new(-file => "<$fasta", -format => 'fasta'); + +# .. and create chunks +msg("Creating chunks for grid\n"); + +my $seq; + +my $seq_counter = 0; + +while( $seq = $inseq->next_seq() ) { + $seq_counter += 1; + my $outfile = $outdir . "/seq_" . $seq_counter . ".fasta" ; # We could also use the display_id, but this can cause trouble with special characters + my $seq_out = Bio::SeqIO->new(-file => ">$outfile" , -format => 'fasta'); + $seq_out->write_seq($seq); + my $command = "tRNAscan-SE -o " . $outfile . ".trna -q $outfile > /dev/null" ; + push(@cmds,$command); +} + +# Submit job chunks to grid +msg("submitting chunks\n"); + +if( $grid){ + msg("Sending $#cmds jobs to the grid\n"); + chomp(@cmds); # Remove empty indices + # Submit job chunks to grid + my $grid_runner; + if ( $grid eq 'lsf'){ + $grid_runner = Bsub->new( cmds_list => \@cmds); + } + elsif( $grid eq 'slurm'){ + $grid_runner = Sbatch->new( cmds_list => \@cmds); + } + if($queue){$grid_runner->queue($queue)} + $grid_runner->run(); +} +else{ + foreach my $command (@cmds){ + + system($command); + + if ($? == -1) { + print "failed to execute: $!\n"; + } + elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + } + else { + printf "child exited with value %d\n", $? >> 8; + } + } +} + +# ..Postprocessing here, merging of output and printing gff + +msg("Merging output and writing GFF file"); + +my @files = <$outdir/*.trna>; + + +foreach my $file (@files) { + + open (my $IN, '<', $file) or die "FATAL: Can't open file: $file for reading.\n$!\n"; + + while (<$IN>) { + chomp; + my $line = $_; + next if ($line =~ /^Sequence.*/ or $line =~ /^Name.*/ or $line =~ /^---.*/); # Skipping comment lines + + my $annotation = parse_line($line); + push(@annotations,$annotation); + + } +} + +my $outfile = $outdir . "/trnascan_annotations.gff"; +open (my $OUT, '>', $outfile) or die "FATAL: Can't open file: $outfile for reading.\n$!\n"; + +foreach my $feature (@annotations) { + $feature->gff_format($gff_formatter); + print $OUT $feature->gff_string, "\n"; +} + +close($OUT); + +# -------------------- + +sub parse_line { + # chomp; + my $line = shift ; + my ($seq,$trna_num,$tstart,$tend,$ttype,$tanti,$istart,$iend,$score) = split(/\s+/,$line); + + my %tags = ( 'tRNA-type' => $ttype, + 'anti-codon' => ($tanti || 'unknown'), + 'ID' => 'tRNA_' . $ttype . "_" . $seq . "_" . $tstart, + 'Name' => 'tRNA_' . $ttype . "_" . $seq . "_" . $tstart, + ); + + my($from,$to) = sort($tstart,$tend); + + my $strand = ( $tstart < $tend ? '1' : '-1' ); + + my $f = Bio::SeqFeature::Generic->new( -seq_id => $seq, + -start => $from, + -end => $to, + -strand => $strand, + -frame => 0, + -primary_tag => 'tRNA', + -source_tag => 'tRNAscan', + -score => $score, + -tag => \%tags, + ); + + return $f; +} + +# -------------------- + +sub msg { + my $t = localtime; + my $line = "[".$t->hms."] @_\n"; + print LOG $line if openhandle(\*LOG); + print STDERR $line unless $quiet; +} + +# -------------------- + +sub runcmd { + msg("Running:", @_); + system(@_)==0 or err("Could not run command:", @_); +} + +# -------------------- + +sub check_bin { + length(`which @_`) > 0 ? return 1 : return 0; +} + +#---------------------------------------------------------------------- + +sub err { + $quiet=0; + msg(@_); + exit(2); +} + +__END__ + +=head1 NAME + +gaas_trnascan2grid.pl + +=head1 DESCRIPTION + +Chunk input data to run multiple trnascan jobs in parallel using Grid + +=head1 SYNOPSIS + + gaas_trnascan2grid.pl -f fasta_file -o outdir + gaas_trnascan2grid.pl --help + +=head1 OPTIONS + +=over 8 + +=item B<--fasta> or B<-f> + +The name of the protein fasta file to read. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--grid> + +Define which grid to use, Slurm, Lsf or None. Default = Slurm. + +=item B<--queue> + +If you want to define a particular queue to run the jobs + +=item B<--quiet> or B<-q> + +Quiet mode + +=item B<--outdir> or B<-o> + +The name of the output directory. + +=item B<-h> or B<--help> + +Display this helpful text. + +=back + +=head1 FEEDBACK + +=head2 Did you find a bug? + +Do not hesitate to report bugs to help us keep track of the bugs and their +resolution. Please use the GitHub issue tracking system available at this +address: + + https://github.com/NBISweden/GAAS/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + +=head2 Do you want to contribute? + +You are very welcome, visit this address for the Contributing guidelines: +https://github.com/NBISweden/GAAS/blob/master/CONTRIBUTING.md + +=cut + +AUTHOR - Jacques Dainat diff --git a/bin/gaas_unlink_all.sh b/bin/gaas_unlink_all.sh new file mode 100755 index 000000000..6301abcb0 --- /dev/null +++ b/bin/gaas_unlink_all.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# JD 2014 +# +#The script allow to unlink all links in a directory +#### + + + +# Arguments and Paths +############################################################################ + +if (( $# != 1 )) ; then + echo -e "The script needs 1 parameter: \n(1)Directory where all the link will be unlinked" + exit +fi + +IFS=$'\n' +for i in $(ls -l $1);do + if [[ $i =~ ^[l] ]];then + paired=$(echo $i | awk '{print $9" "$11}') + to=$(echo $paired | cut -d' ' -f1) + unlink $1/$to + fi +done + diff --git a/gaas_environment.yml b/conda_environment_GAAS.yml similarity index 57% rename from gaas_environment.yml rename to conda_environment_GAAS.yml index a22622653..8150b3ec4 100644 --- a/gaas_environment.yml +++ b/conda_environment_GAAS.yml @@ -1,4 +1,4 @@ -# conda env create -f gaas_environment.yml +# conda env create -f conda_environment_GAAS.yml name: gaas channels: - conda-forge @@ -6,12 +6,16 @@ channels: - defaults dependencies: + - perl + - perl-bio-eutilities - perl-bioperl - perl-clone - - perl-moose - perl-graph - perl-lwp-simple + - perl-moose - perl-statistics-r - - perl-json - perl-sort-naturally - - perl-data-dumper + - perl-file-share + - perl-file-sharedir-install + - r-base + - libdb diff --git a/gaas_refresh_bin.sh b/gaas_refresh_bin.sh new file mode 100755 index 000000000..73168088e --- /dev/null +++ b/gaas_refresh_bin.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +cd bin + +if [[ ! ${PWD##*/} == "bin" ]];then + echo "The script must be run in GAAS/bin folder" + echo "currently here `pwd`" + exit +fi + + +#remove scripts +for j in *;do + name=$(basename $j) + unlink $name +done + +#While looking at all script within the repo we skip all Deprecated folder and what it contains +for i in $(find ../ -not \( -path */Deprecated -prune \) -not \( -path */bin -prune \) -not \( -path */blib -prune \) -name '*.pl' -o -name '*.sh' -o -name '*.py' -o -name '*.r' -o -name '*.R' -o -name '*.rb');do + + name=$(basename $i) + + # skip gaas_refresh_list.sh because must not be in the bin to avoid to be distributed + if [ $name == "gaas_refresh_bin.sh" ] ; then + continue + fi + + #Populate scripts using hard link + if [[ ! -f gaas_${name} ]];then + ln $i gaas_${name} + fi + +done diff --git a/annotation/NBIS/FASTA/Longest_orf.pm b/lib/GAAS/FASTA/Longest_orf.pm similarity index 100% rename from annotation/NBIS/FASTA/Longest_orf.pm rename to lib/GAAS/FASTA/Longest_orf.pm diff --git a/lib/GAAS/GAAS.pm b/lib/GAAS/GAAS.pm new file mode 100644 index 000000000..1cb406337 --- /dev/null +++ b/lib/GAAS/GAAS.pm @@ -0,0 +1,46 @@ +#!/usr/bin/perl -w + +package GAAS::GAAS; + +use strict; +use warnings; +use Exporter; + +our $VERSION = "v0.0.1"; +our @ISA = qw(Exporter); +our @EXPORT = qw(get_gaas_header); +sub import { + GAAS::GAAS->export_to_level(1, @_); # to be able to load the EXPORT functions when direct call; (normal case) +} + +=head1 SYNOPSIS + + Meta package for conveniency. + +=head1 DESCRIPTION + + + +=head1 AUTHOR + + Jacques Dainat - jacques.dainat@nbis.se + +=cut + +# Provide meta information +sub get_gaas_header{ + + my ($verbose) = @_; + + my $header = qq{ + ------------------------------------------------------------------------------ +| Genome Assembly Annotation Service (AGAT) - Version: $VERSION | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + }; + +return $header; + +} +1; diff --git a/annotation/NBIS/Grid/Bsub.pm b/lib/GAAS/Grid/Bsub.pm similarity index 99% rename from annotation/NBIS/Grid/Bsub.pm rename to lib/GAAS/Grid/Bsub.pm index 2da6a084e..af3c00b18 100644 --- a/annotation/NBIS/Grid/Bsub.pm +++ b/lib/GAAS/Grid/Bsub.pm @@ -5,7 +5,7 @@ use warnings; use File::Basename; use Carp; use Moose; -use NBIS::Grid::GridRunner; +use GAAS::Grid::GridRunner; extends 'GridRunner'; diff --git a/annotation/NBIS/Grid/GridRunner.pm b/lib/GAAS/Grid/GridRunner.pm similarity index 100% rename from annotation/NBIS/Grid/GridRunner.pm rename to lib/GAAS/Grid/GridRunner.pm diff --git a/annotation/NBIS/Grid/Sbatch.pm b/lib/GAAS/Grid/Sbatch.pm similarity index 99% rename from annotation/NBIS/Grid/Sbatch.pm rename to lib/GAAS/Grid/Sbatch.pm index 0df8ed8f0..17234a765 100644 --- a/annotation/NBIS/Grid/Sbatch.pm +++ b/lib/GAAS/Grid/Sbatch.pm @@ -6,7 +6,7 @@ use File::Basename; #use IPC::Cmd qw[can_run run]; use Carp; use Moose; -use NBIS::Grid::GridRunner; +use GAAS::Grid::GridRunner; extends 'GridRunner'; diff --git a/annotation/NBIS/Handler/Genbankhandler.pm b/lib/GAAS/Handler/Genbankhandler.pm similarity index 99% rename from annotation/NBIS/Handler/Genbankhandler.pm rename to lib/GAAS/Handler/Genbankhandler.pm index 36712ff45..ef6c52b65 100644 --- a/annotation/NBIS/Handler/Genbankhandler.pm +++ b/lib/GAAS/Handler/Genbankhandler.pm @@ -1,6 +1,6 @@ #!/usr/bin/perl -w -package NBIS::Handler::Genbankhandler ; +package GAAS::Handler::Genbankhandler ; use strict; use Clone 'clone'; diff --git a/annotation/NBIS/Plot/R.pm b/lib/GAAS/Plot/R.pm similarity index 99% rename from annotation/NBIS/Plot/R.pm rename to lib/GAAS/Plot/R.pm index 00ad362d0..387ec04c0 100644 --- a/annotation/NBIS/Plot/R.pm +++ b/lib/GAAS/Plot/R.pm @@ -1,6 +1,6 @@ #!/usr/bin/perl -w -package NBIS::Plot::R ; +package GAAS::Plot::R ; use strict; use Bio::Tools::GFF; diff --git a/annotation/NBIS/Tree/CleanTaxonomicTreeFromNCBI.pm b/lib/GAAS/Tree/CleanTaxonomicTreeFromNCBI.pm similarity index 98% rename from annotation/NBIS/Tree/CleanTaxonomicTreeFromNCBI.pm rename to lib/GAAS/Tree/CleanTaxonomicTreeFromNCBI.pm index a1ad25db1..add52b9a6 100644 --- a/annotation/NBIS/Tree/CleanTaxonomicTreeFromNCBI.pm +++ b/lib/GAAS/Tree/CleanTaxonomicTreeFromNCBI.pm @@ -1,6 +1,6 @@ #!/usr/bin/perl -w -package NBIS::Tree::CleanTaxonomicTreeFromNCBI; +package GAAS::Tree::CleanTaxonomicTreeFromNCBI; use Data::Dumper; @@ -18,7 +18,7 @@ our @EXPORT_OK = qw/ some_other /; =head1 DESCRIPTION A library to clean the taxonomic tree coming from NCBI. Indeed there is several ancestor in some unitary internal branches - Inherits from NBIS::Tree + Inherits from GAAS::Tree =cut diff --git a/annotation/NBIS/CheckModule.pm b/lib/GAAS/deprecated/CheckModule.pm similarity index 99% rename from annotation/NBIS/CheckModule.pm rename to lib/GAAS/deprecated/CheckModule.pm index 2e9f3755f..e299dac64 100644 --- a/annotation/NBIS/CheckModule.pm +++ b/lib/GAAS/deprecated/CheckModule.pm @@ -1,6 +1,6 @@ #!/usr/bin/perl -w -package NBIS::CheckModule; +package GAAS::CheckModule; use strict; use Exporter qw(import); diff --git a/profiles/README.md b/profiles/README.md deleted file mode 100644 index a900d9909..000000000 --- a/profiles/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Profiles - -These profiles are meant to make it easy to set up the environment needed for WebApollo without corrupting your own settings - -## NBIS production server environment - -To activate the NBIS production server environment use the profile `activate_nbis_env`. -```bash -# Change GAAS_HOME to where the GAAS repository is cloned -GAAS_HOME=$HOME/GAAS -source $GAAS_HOME/profiles/activate_nbis_env -``` -Your prompt should now indicate the environment is active. - -To restore your previous environment just type: -```bash -deactivate -``` - -## Rackham user environment - -To activate the Rackham user environment use the profile `activate_rackham_env`. -```bash -# Change GAAS_HOME to where the GAAS repository is cloned -GAAS_HOME=$HOME/GAAS -source $GAAS_HOME/profiles/activate_rackham_env -``` -Your prompt should now indicate the environment is active. - -To restore your previous environment just type: -```bash -deactivate -``` - diff --git a/profiles/activate_env b/profiles/activate_env deleted file mode 100644 index 0f721160a..000000000 --- a/profiles/activate_env +++ /dev/null @@ -1,72 +0,0 @@ -# This file must be used with "source bin/activate" *from bash* -# you cannot run it directly - -deactivate () { - - # reset old environment variables - # ! [ -z ${VAR+_} ] returns true if VAR is declared at all - if ! [ -z "${_OLD_VIRTUAL_PATH+_}" ] ; then - PATH="$_OLD_VIRTUAL_PATH" - export PATH - unset _OLD_VIRTUAL_PATH - fi - - # This should detect bash and zsh, which have a hash command that must - # be called to get it to forget past commands. Without forgetting - # past commands the $PATH changes we made may not be respected - if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null - fi - - if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then - PS1="$_OLD_VIRTUAL_PS1" - export PS1 - unset _OLD_VIRTUAL_PS1 - fi - - # Unset PERL5LIB - if ! [ -z "${_OLD_PERL5LIB+_}" ] ; then - PERL5LIB="$_OLD_PERL5LIB" - export PERL5LIB - unset _OLD_PERL5LIB - fi - - unset VIRTUAL_ENV - if [ ! "${1-}" = "nondestructive" ] ; then - # Self destruct! - unset -f deactivate - fi -} - -# unset irrelevant variables -deactivate nondestructive - -VIRTUAL_ENV="/home/student/annotation_course/GAAS" -export VIRTUAL_ENV - -_OLD_VIRTUAL_PATH="$PATH" -PATH="$VIRTUAL_ENV/annotation/Tools/bin:$PATH" -export PATH - -_OLD_PERL5LIB="$PERL5LIB" -PERL5LIB="$PERL5LIB:$VIRTUAL_ENV/annotation" -export PERL5LIB - -make -f "$VIRTUAL_ENV/Makefile" check - -if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then - _OLD_VIRTUAL_PS1="$PS1" - if [ "x" != x ] ; then - PS1="$PS1" - else - PS1="(`basename \"$VIRTUAL_ENV\"`) $PS1" - fi - export PS1 -fi - -# This should detect bash and zsh, which have a hash command that must -# be called to get it to forget past commands. Without forgetting -# past commands the $PATH changes we made may not be respected -if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null -fi diff --git a/profiles/activate_nbis_env b/profiles/activate_nbis_env deleted file mode 100644 index 6addd7a28..000000000 --- a/profiles/activate_nbis_env +++ /dev/null @@ -1,89 +0,0 @@ -# This file must be used with "source bin/activate" *from bash* -# you cannot run it directly - -deactivate () { - - # reset old environment variables - # ! [ -z ${VAR+_} ] returns true if VAR is declared at all - if ! [ -z "${_OLD_VIRTUAL_PATH+_}" ] ; then - PATH="$_OLD_VIRTUAL_PATH" - export PATH - unset _OLD_VIRTUAL_PATH - fi - - # This should detect bash and zsh, which have a hash command that must - # be called to get it to forget past commands. Without forgetting - # past commands the $PATH changes we made may not be respected - if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null - fi - - if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then - PS1="$_OLD_VIRTUAL_PS1" - export PS1 - unset _OLD_VIRTUAL_PS1 - fi - - # Unset Admin variables - unset WA_USER_DB_ADMIN_PW - unset WA_WEBSITE_PASSWORD - unset WA_DB_ADMIN - unset WA_DB_ADMIN_PW - unset APOLLO_DATA_DIR - unset APOLLO_BUILD_DIR - - # Unset PERL5LIB - if ! [ -z "${_OLD_PERL5LIB+_}" ] ; then - PERL5LIB="$_OLD_PERL5LIB" - export PERL5LIB - unset _OLD_PERL5LIB - fi - - unset VIRTUAL_ENV - if [ ! "${1-}" = "nondestructive" ] ; then - # Self destruct! - unset -f deactivate - fi -} - -# unset irrelevant variables -deactivate nondestructive - -# Setup WebApollo settings -ADMIN_SETTINGS_PATH="/big/webapollo/webapollo_admin_settings" -if [ -r "$ADMIN_SETTINGS_PATH" ]; then - echo "Admin privileges granted" - source "$ADMIN_SETTINGS_PATH" -else - echo "Admin privileges not granted" -fi - -VIRTUAL_ENV="$HOME/GAAS" -export VIRTUAL_ENV - -_OLD_VIRTUAL_PATH="$PATH" -PATH="$VIRTUAL_ENV/annotation/Tools/bin:$PATH" -export PATH - -_OLD_PERL5LIB="$PERL5LIB" -PERL5LIB="$PERL5LIB:$VIRTUAL_ENV/annotation" -export PERL5LIB - -make -f "$VIRTUAL_ENV/Makefile" check - -if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then - _OLD_VIRTUAL_PS1="$PS1" - if [ "x" != x ] ; then - PS1="$PS1" - else - PS1="(`basename \"$VIRTUAL_ENV\"`) $PS1" - fi - export PS1 -fi - -# This should detect bash and zsh, which have a hash command that must -# be called to get it to forget past commands. Without forgetting -# past commands the $PATH changes we made may not be respected -if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null -fi diff --git a/profiles/activate_rackham_env b/profiles/activate_rackham_env deleted file mode 100644 index 34f2ff9c2..000000000 --- a/profiles/activate_rackham_env +++ /dev/null @@ -1,79 +0,0 @@ -# This file must be used with "source pathToTheFile" *from bash* -# you cannot run it directly - -deactivate () { - - # reset old environment variables - # ! [ -z ${VAR+_} ] returns true if VAR is declared at all - if ! [ -z "${_OLD_VIRTUAL_PATH+_}" ] ; then - PATH="$_OLD_VIRTUAL_PATH" - export PATH - unset _OLD_VIRTUAL_PATH - fi - - # This should detect bash and zsh, which have a hash command that must - # be called to get it to forget past commands. Without forgetting - # past commands the $PATH changes we made may not be respected - if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null - fi - - if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then - PS1="$_OLD_VIRTUAL_PS1" - export PS1 - unset _OLD_VIRTUAL_PS1 - fi - - # Unset PERL5LIB - if ! [ -z "${_OLD_PERL5LIB+_}" ] ; then - PERL5LIB="$_OLD_PERL5LIB" - export PERL5LIB - unset _OLD_PERL5LIB - fi - - unset VIRTUAL_ENV - if [ ! "${1-}" = "nondestructive" ] ; then - # Self destruct! - unset -f deactivate - # restore modules - module restore temp - module disable temp - fi -} - -# unset irrelevant variables -deactivate nondestructive - -# Save the current module state -module --force save temp -module load bioinfo-tools BioPerl/1.6.924_Perl5.18.4 - -VIRTUAL_ENV="$HOME/GAAS" -export VIRTUAL_ENV - -_OLD_VIRTUAL_PATH="$PATH" -PATH="$VIRTUAL_ENV/annotation/Tools/bin:$PATH" -export PATH - -_OLD_PERL5LIB="$PERL5LIB" -PERL5LIB="$PERL5LIB:$VIRTUAL_ENV/annotation" -export PERL5LIB - -make -f "$VIRTUAL_ENV/Makefile" check - -if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then - _OLD_VIRTUAL_PS1="$PS1" - if [ "x" != x ] ; then - PS1="$PS1" - else - PS1="(`basename \"$VIRTUAL_ENV\"`) $PS1" - fi - export PS1 -fi - -# This should detect bash and zsh, which have a hash command that must -# be called to get it to forget past commands. Without forgetting -# past commands the $PATH changes we made may not be respected -if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null -fi diff --git a/share/NBIS.png b/share/NBIS.png new file mode 100644 index 000000000..e62d215b4 Binary files /dev/null and b/share/NBIS.png differ diff --git a/t/scripts_compile.t b/t/scripts_compile.t new file mode 100644 index 000000000..d4838a1fb --- /dev/null +++ b/t/scripts_compile.t @@ -0,0 +1,51 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +=head1 DESCRIPTION + +Test to see if all script in the bin file can be compiled without error. + +=cut + + +################################################################################ +# set number of test according to number of scripts +my $nb_test=0; +my %perl_blacklist; + +BEGIN{ + ## BLACK listed list of perl scripts + %perl_blacklist = ( "gaas_create_preautomated_report.pl" => 0 + ); + + opendir (DIR, "bin") or die $!; + while (my $file = readdir(DIR)) { + # Use a regular expression to ignore files beginning with a period + next if ($file =~ m/^\./); + next if ! ($file =~ /\.pl$/); + next if exists ($perl_blacklist{$file}); + #add exe file + $nb_test++; + } + closedir(DIR); +} +# +################################################################################ + +use Test::More tests => $nb_test ; + + +# foreach script in the bin, let run the test +opendir (DIR, "bin") or die $!; +while (my $file = readdir(DIR)) { + # Use a regular expression to ignore files beginning with a period + next if ($file =~ m/^\./); + next if ! ($file =~ /\.pl$/); + next if exists ($perl_blacklist{$file}); + print "bin/$file -h 1>/dev/null\n"; + #run test - check the script can run calling the help. + ok( system("bin/$file -h 1>/dev/null") == 0, "test $file") +} +closedir(DIR); diff --git a/uninstall_GAAS b/uninstall_GAAS new file mode 100644 index 000000000..1a437de59 --- /dev/null +++ b/uninstall_GAAS @@ -0,0 +1,24 @@ +#usage: perl uninstall_GAAS +use 5.14.2; +use ExtUtils::Installed; +use ExtUtils::Packlist; + +my $module = "GAAS"; + +my $installed_modules = ExtUtils::Installed->new; + +# iterate through and try to delete every file associated with the module +foreach my $file ($installed_modules->files($module)) { + unlink $file or warn "could not remove $file: $!\n"; +} + +# delete the module packfile +my $packfile = $installed_modules->packlist($module)->packlist_file; +print "removing $packfile\n"; +unlink $packfile or warn "could not remove $packfile: $!\n"; + +# delete the module directories if they are empty +foreach my $dir (sort($installed_modules->directory_tree($module))) { + print("removing $dir\n"); + rmdir $dir or warn "could not remove $dir: $!\n"; +}